From 604c7073a018295b6c89ec5bb6852867e0fb897a Mon Sep 17 00:00:00 2001
From: rigm <rigm@zhaw.ch>
Date: Thu, 7 Jan 2021 10:23:15 +0100
Subject: [PATCH 01/60] test

---
 src/turbulence/sa.F90 | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/turbulence/sa.F90 b/src/turbulence/sa.F90
index c8e0f7a4c..3821d454f 100644
--- a/src/turbulence/sa.F90
+++ b/src/turbulence/sa.F90
@@ -107,6 +107,7 @@ subroutine saSource
 
     ! Local variables.
     integer(kind=intType) :: i, j, k, nn, ii
+    real(kind=realType) :: dnew,ks,cr1         
     real(kind=realType) :: fv1, fv2, ft2
     real(kind=realType) :: ss, sst, nu, dist2Inv, chi, chi2, chi3
     real(kind=realType) :: rr, gg, gg6, termFw, fwSa, term1, term2
@@ -124,6 +125,10 @@ subroutine saSource
     cw36    = rsaCw3**6
     cb3Inv  = one/rsaCb3
 
+    ! constants for SA rough
+    ks=0.0001 
+    cr1=0.5 
+
     ! Determine the non-dimensional wheel speed of this block.
 
     omegax = timeRef*sections(sectionID)%rotRate(1)
@@ -243,13 +248,18 @@ subroutine saSource
                 ! and nu) and the functions fv1 and fv2. The latter corrects
                 ! the production term near a viscous wall.
 
+                ! SA rough 
+                dnew     = d2Wall(i,j,k) + 0.03*ks
+
                 nu       = rlv(i,j,k)/w(i,j,k,irho)
-                dist2Inv = one/(d2Wall(i,j,k)**2)
-                chi      = w(i,j,k,itu1)/nu
+                ! dist2Inv = one/(d2Wall(i,j,k)**2)
+                dist2Inv = one/(dnew**2)
+                chi      = w(i,j,k,itu1)/nu + cr1*ks/dnew 
                 chi2     = chi*chi
                 chi3     = chi*chi2
                 fv1      = chi3/(chi3+cv13)
-                fv2      = one - chi/(one + chi*fv1)
+                ! fv2      = one - chi/(one + chi*fv1)
+                fv2      = one - w(i,j,k,itu1)/(nu + w(i,j,k,itu1)*fv1) 
 
                 ! The function ft2, which is designed to keep a laminar
                 ! solution laminar. When running in fully turbulent mode

From 50126175a2ae2f53f0d0286e453dc3ccdf8e9716 Mon Sep 17 00:00:00 2001
From: rigm <rigm@zhaw.ch>
Date: Thu, 7 Jan 2021 10:48:02 +0100
Subject: [PATCH 02/60] test

---
 src/turbulence/turbBCRoutines.F90 | 36 ++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 6c4c8c31d..b52edf075 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -331,42 +331,48 @@ subroutine bcEddyWall(nn)
     case (iMin)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(1,i,j) = -rev(2,i,j)
+             ! rev(1,i,j) = -rev(2,i,j)
+             rev(1,i,j) = rev(2,i,j)
           enddo
        enddo
 
     case (iMax)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(ie,i,j) = -rev(il,i,j)
+             ! rev(ie,i,j) = -rev(il,i,j)
+             rev(ie,i,j) = rev(il,i,j)
           enddo
        enddo
 
     case (jMin)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,1,j) = -rev(i,2,j)
+             ! rev(i,1,j) = -rev(i,2,j)
+             rev(i,1,j) = rev(i,2,j)
           enddo
        enddo
 
     case (jMax)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,je,j) = -rev(i,jl,j)
+             ! rev(i,je,j) = -rev(i,jl,j)
+             rev(i,je,j) = rev(i,jl,j)
           enddo
        enddo
 
     case (kMin)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,j,1) = -rev(i,j,2)
+             ! rev(i,j,1) = -rev(i,j,2)
+             rev(i,j,1) = rev(i,j,2)
           enddo
        enddo
 
     case (kMax)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,j,ke) = -rev(i,j,kl)
+             ! rev(i,j,ke) = -rev(i,j,kl)
+             rev(i,j,ke) = rev(i,j,kl)
           enddo
        enddo
     end select
@@ -845,39 +851,45 @@ subroutine bcTurbWall(nn)
        case (iMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti1(i,j,itu1,itu1) = one
+                ! bmti1(i,j,itu1,itu1) = one
+                bmti1(i,j,itu1,itu1) = -one
              enddo
           enddo
        case (iMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti2(i,j,itu1,itu1) = one
+                ! bmti2(i,j,itu1,itu1) = one
+                bmti2(i,j,itu1,itu1) = -one
              enddo
           enddo
        case (jMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj1(i,j,itu1,itu1) = one
+                ! bmtj1(i,j,itu1,itu1) = one
+                bmtj1(i,j,itu1,itu1) = -one
              enddo
           enddo
        case (jMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj2(i,j,itu1,itu1) = one
+                ! bmtj2(i,j,itu1,itu1) = one
+                bmtj2(i,j,itu1,itu1) = -one
              enddo
           enddo
 
        case (kMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk1(i,j,itu1,itu1) = one
+                ! bmtk1(i,j,itu1,itu1) = one
+                bmtk1(i,j,itu1,itu1) = -one
              enddo
           enddo
 
        case (kMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk2(i,j,itu1,itu1) = one
+                ! bmtk2(i,j,itu1,itu1) = one
+                bmtk2(i,j,itu1,itu1) = -one
              enddo
           enddo
        end select

From 3929038b119605733043e0a8236e0f31981f1d88 Mon Sep 17 00:00:00 2001
From: rigm <rigm@zhaw.ch>
Date: Thu, 7 Jan 2021 15:57:00 +0100
Subject: [PATCH 03/60] test

---
 src/turbulence/turbBCRoutines.F90 | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index b52edf075..b19126411 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -332,7 +332,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(1,i,j) = -rev(2,i,j)
-             rev(1,i,j) = rev(2,i,j)
+             rev(1,i,j) = 0.7143*rev(2,i,j)
           enddo
        enddo
 
@@ -340,7 +340,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(ie,i,j) = -rev(il,i,j)
-             rev(ie,i,j) = rev(il,i,j)
+             rev(ie,i,j) = 0.7143*rev(il,i,j)
           enddo
        enddo
 
@@ -348,7 +348,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,1,j) = -rev(i,2,j)
-             rev(i,1,j) = rev(i,2,j)
+             rev(i,1,j) = 0.7143*rev(i,2,j)
           enddo
        enddo
 
@@ -356,7 +356,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,je,j) = -rev(i,jl,j)
-             rev(i,je,j) = rev(i,jl,j)
+             rev(i,je,j) = 0.7143*rev(i,jl,j)
           enddo
        enddo
 
@@ -364,7 +364,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,j,1) = -rev(i,j,2)
-             rev(i,j,1) = rev(i,j,2)
+             rev(i,j,1) = 0.7143*rev(i,j,2)
           enddo
        enddo
 
@@ -372,7 +372,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,j,ke) = -rev(i,j,kl)
-             rev(i,j,ke) = rev(i,j,kl)
+             rev(i,j,ke) = 0.7143*rev(i,j,kl)
           enddo
        enddo
     end select
@@ -852,28 +852,28 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmti1(i,j,itu1,itu1) = one
-                bmti1(i,j,itu1,itu1) = -one
+                bmti1(i,j,itu1,itu1) = -0.7143
              enddo
           enddo
        case (iMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmti2(i,j,itu1,itu1) = one
-                bmti2(i,j,itu1,itu1) = -one
+                bmti2(i,j,itu1,itu1) = -0.7143
              enddo
           enddo
        case (jMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtj1(i,j,itu1,itu1) = one
-                bmtj1(i,j,itu1,itu1) = -one
+                bmtj1(i,j,itu1,itu1) = -0.7143
              enddo
           enddo
        case (jMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtj2(i,j,itu1,itu1) = one
-                bmtj2(i,j,itu1,itu1) = -one
+                bmtj2(i,j,itu1,itu1) = -0.7143
              enddo
           enddo
 
@@ -881,7 +881,7 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtk1(i,j,itu1,itu1) = one
-                bmtk1(i,j,itu1,itu1) = -one
+                bmtk1(i,j,itu1,itu1) = -0.7143
              enddo
           enddo
 
@@ -889,7 +889,7 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtk2(i,j,itu1,itu1) = one
-                bmtk2(i,j,itu1,itu1) = -one
+                bmtk2(i,j,itu1,itu1) = -0.7143
              enddo
           enddo
        end select

From 1cccbe572d2946b2e12a6b9cff8510b5afca086f Mon Sep 17 00:00:00 2001
From: David Anderegg <andv@zhaw.ch>
Date: Mon, 11 Jan 2021 17:28:30 +0100
Subject: [PATCH 04/60] Added ks as input *NOT WORKING*

---
 adflow/pyADflow.py         | 2 ++
 src/NKSolver/blockette.F90 | 2 +-
 src/f2py/adflow.pyf        | 1 +
 src/modules/inputParam.F90 | 2 ++
 src/turbulence/sa.F90      | 8 ++++----
 src/utils/utils.F90        | 1 +
 6 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/adflow/pyADflow.py b/adflow/pyADflow.py
index 6801e808e..908724c76 100644
--- a/adflow/pyADflow.py
+++ b/adflow/pyADflow.py
@@ -4574,6 +4574,7 @@ def _getDefOptions(self):
             'useqcr':[bool, False],
             'userotationsa':[bool, False],
             'useft2sa':[bool, True],
+            'kssa': [float, 0.0],
             'eddyvisinfratio':[float, .009],
             'usewallfunctions':[bool, False],
             'useapproxwalldistance':[bool, True],
@@ -4880,6 +4881,7 @@ def _getOptionMap(self):
             'useqcr':['physics', 'useqcr'],
             'userotationsa':['physics', 'userotationsa'],
             'useft2sa':['physics', 'useft2sa'],
+            'kssa':['physics', 'kssa'],
             'eddyvisinfratio':['physics', 'eddyvisinfratio'],
             'usewallfunctions':['physics', 'wallfunctions'],
             'walldistcutoff':['physics', 'walldistcutoff'],
diff --git a/src/NKSolver/blockette.F90 b/src/NKSolver/blockette.F90
index 25acea4e5..74954edba 100644
--- a/src/NKSolver/blockette.F90
+++ b/src/NKSolver/blockette.F90
@@ -982,7 +982,7 @@ subroutine saSource
     use constants
     use paramTurb
     use blockPointers, only : sectionID
-    use inputPhysics, only :useft2SA, useRotationSA, turbProd, equations
+    use inputPhysics, only :useft2SA, useRotationSA, turbProd, equations, kssa
     use inputDiscretization, only : approxSA
     use section, only : sections
     use sa, only : cv13, kar2Inv, cw36, cb3Inv
diff --git a/src/f2py/adflow.pyf b/src/f2py/adflow.pyf
index a169cccd7..64a8d9a88 100644
--- a/src/f2py/adflow.pyf
+++ b/src/f2py/adflow.pyf
@@ -1132,6 +1132,7 @@ python module libadflow
          real(kind=realtype) :: beta
          integer(kind=inttype) :: liftindex
          real(kind=realtype) :: cavitationnumber
+         real(kind=realType) :: kssa
        end module inputphysics
 
        module inputadjoint ! in :adflow:../modules/inputParam.f90
diff --git a/src/modules/inputParam.F90 b/src/modules/inputParam.F90
index f4f5b7d99..0f0ac1e1f 100644
--- a/src/modules/inputParam.F90
+++ b/src/modules/inputParam.F90
@@ -516,6 +516,7 @@ module inputPhysics
   !                      when considering turbulence model effects
   ! useRotationSA:       Determines if we will use rotation correction (SA model only)
   ! useft2SA:            Determines if we will use the ft2 term (SA model only)
+  ! kssa:                Conventional Nikuradse sand roughness scale height
   ! wallFunctions:       Whether or not to use wall functions.
   ! wallDistanceNeeded:  Whether or not the wall distance is needed
   !                      for the turbulence model in a RANS problem.
@@ -579,6 +580,7 @@ module inputPhysics
   real(kind=realType), dimension(3,2) :: momentAxis
   real(kind=realType) :: SSuthDim, muSuthDim, TSuthDim
   real(kind=realType) :: cavitationnumber
+  real(kind=realType) :: kssa
 
 #ifndef USE_TAPENADE
   real(kind=realType) :: alphad, betad
diff --git a/src/turbulence/sa.F90 b/src/turbulence/sa.F90
index 3821d454f..2b8489fcf 100644
--- a/src/turbulence/sa.F90
+++ b/src/turbulence/sa.F90
@@ -107,7 +107,7 @@ subroutine saSource
 
     ! Local variables.
     integer(kind=intType) :: i, j, k, nn, ii
-    real(kind=realType) :: dnew,ks,cr1         
+    real(kind=realType) :: dnew,cr1         
     real(kind=realType) :: fv1, fv2, ft2
     real(kind=realType) :: ss, sst, nu, dist2Inv, chi, chi2, chi3
     real(kind=realType) :: rr, gg, gg6, termFw, fwSa, term1, term2
@@ -126,7 +126,7 @@ subroutine saSource
     cb3Inv  = one/rsaCb3
 
     ! constants for SA rough
-    ks=0.0001 
+    ! ks=0.0001 
     cr1=0.5 
 
     ! Determine the non-dimensional wheel speed of this block.
@@ -249,12 +249,12 @@ subroutine saSource
                 ! the production term near a viscous wall.
 
                 ! SA rough 
-                dnew     = d2Wall(i,j,k) + 0.03*ks
+                dnew     = d2Wall(i,j,k) + 0.03*kssa
 
                 nu       = rlv(i,j,k)/w(i,j,k,irho)
                 ! dist2Inv = one/(d2Wall(i,j,k)**2)
                 dist2Inv = one/(dnew**2)
-                chi      = w(i,j,k,itu1)/nu + cr1*ks/dnew 
+                chi      = w(i,j,k,itu1)/nu + cr1*kssa/dnew 
                 chi2     = chi*chi
                 chi3     = chi*chi2
                 fv1      = chi3/(chi3+cv13)
diff --git a/src/utils/utils.F90 b/src/utils/utils.F90
index 0360f417a..274580c4a 100644
--- a/src/utils/utils.F90
+++ b/src/utils/utils.F90
@@ -4140,6 +4140,7 @@ subroutine writeIntroMessage
          &Euler equations"
     print "(a)", "# on multiblock structured hexahedral grids."
 
+    print "(a)", "# SA_rough dev"
 
     write(integerString,"(i7)") nProc
     integerString = adjustl(integerString)

From 8b929fc169a9057a4ffbbbfb534ae570cbc13bb9 Mon Sep 17 00:00:00 2001
From: David Anderegg <andv@zhaw.ch>
Date: Tue, 12 Jan 2021 07:57:35 +0100
Subject: [PATCH 05/60] Added ks as input

---
 src/f2py/adflow.pyf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/f2py/adflow.pyf b/src/f2py/adflow.pyf
index 64a8d9a88..121876851 100644
--- a/src/f2py/adflow.pyf
+++ b/src/f2py/adflow.pyf
@@ -1132,7 +1132,7 @@ python module libadflow
          real(kind=realtype) :: beta
          integer(kind=inttype) :: liftindex
          real(kind=realtype) :: cavitationnumber
-         real(kind=realType) :: kssa
+         real(kind=realtype) :: kssa
        end module inputphysics
 
        module inputadjoint ! in :adflow:../modules/inputParam.f90

From 98e1dc51228182cfb5c3c8135b8edabb5bca45ee Mon Sep 17 00:00:00 2001
From: David Anderegg <andv@zhaw.ch>
Date: Tue, 12 Jan 2021 17:30:31 +0100
Subject: [PATCH 06/60] Changed turbBC

---
 src/turbulence/turbBCRoutines.F90 | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index b19126411..168df3f3e 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -332,7 +332,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(1,i,j) = -rev(2,i,j)
-             rev(1,i,j) = 0.7143*rev(2,i,j)
+             rev(1,i,j) = 0.5*rev(2,i,j)
           enddo
        enddo
 
@@ -340,7 +340,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(ie,i,j) = -rev(il,i,j)
-             rev(ie,i,j) = 0.7143*rev(il,i,j)
+             rev(ie,i,j) = 0.5*rev(il,i,j)
           enddo
        enddo
 
@@ -348,7 +348,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,1,j) = -rev(i,2,j)
-             rev(i,1,j) = 0.7143*rev(i,2,j)
+             rev(i,1,j) = 0.5*rev(i,2,j)
           enddo
        enddo
 
@@ -356,7 +356,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,je,j) = -rev(i,jl,j)
-             rev(i,je,j) = 0.7143*rev(i,jl,j)
+             rev(i,je,j) = 0.5*rev(i,jl,j)
           enddo
        enddo
 
@@ -364,7 +364,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,j,1) = -rev(i,j,2)
-             rev(i,j,1) = 0.7143*rev(i,j,2)
+             rev(i,j,1) = 0.5*rev(i,j,2)
           enddo
        enddo
 
@@ -372,7 +372,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,j,ke) = -rev(i,j,kl)
-             rev(i,j,ke) = 0.7143*rev(i,j,kl)
+             rev(i,j,ke) = 0.5*rev(i,j,kl)
           enddo
        enddo
     end select
@@ -852,28 +852,28 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmti1(i,j,itu1,itu1) = one
-                bmti1(i,j,itu1,itu1) = -0.7143
+                bmti1(i,j,itu1,itu1) = -0.5
              enddo
           enddo
        case (iMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmti2(i,j,itu1,itu1) = one
-                bmti2(i,j,itu1,itu1) = -0.7143
+                bmti2(i,j,itu1,itu1) = -0.5
              enddo
           enddo
        case (jMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtj1(i,j,itu1,itu1) = one
-                bmtj1(i,j,itu1,itu1) = -0.7143
+                bmtj1(i,j,itu1,itu1) = -0.5
              enddo
           enddo
        case (jMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtj2(i,j,itu1,itu1) = one
-                bmtj2(i,j,itu1,itu1) = -0.7143
+                bmtj2(i,j,itu1,itu1) = -0.5
              enddo
           enddo
 
@@ -881,7 +881,7 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtk1(i,j,itu1,itu1) = one
-                bmtk1(i,j,itu1,itu1) = -0.7143
+                bmtk1(i,j,itu1,itu1) = -0.5
              enddo
           enddo
 
@@ -889,7 +889,7 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtk2(i,j,itu1,itu1) = one
-                bmtk2(i,j,itu1,itu1) = -0.7143
+                bmtk2(i,j,itu1,itu1) = -0.5
              enddo
           enddo
        end select

From 3efa7954c8ca97560c259a5e7bb9ed21e9f94712 Mon Sep 17 00:00:00 2001
From: David Anderegg <andv@zhaw.ch>
Date: Wed, 13 Jan 2021 11:33:46 +0100
Subject: [PATCH 07/60] changed BC for ks=1e-3

---
 src/turbulence/turbBCRoutines.F90 | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 168df3f3e..789abf25c 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -332,7 +332,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(1,i,j) = -rev(2,i,j)
-             rev(1,i,j) = 0.5*rev(2,i,j)
+             rev(1,i,j) = 0.93*rev(2,i,j)
           enddo
        enddo
 
@@ -340,7 +340,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(ie,i,j) = -rev(il,i,j)
-             rev(ie,i,j) = 0.5*rev(il,i,j)
+             rev(ie,i,j) = 0.93*rev(il,i,j)
           enddo
        enddo
 
@@ -348,7 +348,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,1,j) = -rev(i,2,j)
-             rev(i,1,j) = 0.5*rev(i,2,j)
+             rev(i,1,j) = 0.93*rev(i,2,j)
           enddo
        enddo
 
@@ -356,7 +356,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,je,j) = -rev(i,jl,j)
-             rev(i,je,j) = 0.5*rev(i,jl,j)
+             rev(i,je,j) = 0.93*rev(i,jl,j)
           enddo
        enddo
 
@@ -364,7 +364,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,j,1) = -rev(i,j,2)
-             rev(i,j,1) = 0.5*rev(i,j,2)
+             rev(i,j,1) = 0.93*rev(i,j,2)
           enddo
        enddo
 
@@ -372,7 +372,7 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,j,ke) = -rev(i,j,kl)
-             rev(i,j,ke) = 0.5*rev(i,j,kl)
+             rev(i,j,ke) = 0.93*rev(i,j,kl)
           enddo
        enddo
     end select
@@ -852,28 +852,28 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmti1(i,j,itu1,itu1) = one
-                bmti1(i,j,itu1,itu1) = -0.5
+                bmti1(i,j,itu1,itu1) = -0.93
              enddo
           enddo
        case (iMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmti2(i,j,itu1,itu1) = one
-                bmti2(i,j,itu1,itu1) = -0.5
+                bmti2(i,j,itu1,itu1) = -0.93
              enddo
           enddo
        case (jMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtj1(i,j,itu1,itu1) = one
-                bmtj1(i,j,itu1,itu1) = -0.5
+                bmtj1(i,j,itu1,itu1) = -0.93
              enddo
           enddo
        case (jMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtj2(i,j,itu1,itu1) = one
-                bmtj2(i,j,itu1,itu1) = -0.5
+                bmtj2(i,j,itu1,itu1) = -0.93
              enddo
           enddo
 
@@ -881,7 +881,7 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtk1(i,j,itu1,itu1) = one
-                bmtk1(i,j,itu1,itu1) = -0.5
+                bmtk1(i,j,itu1,itu1) = -0.93
              enddo
           enddo
 
@@ -889,7 +889,7 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtk2(i,j,itu1,itu1) = one
-                bmtk2(i,j,itu1,itu1) = -0.5
+                bmtk2(i,j,itu1,itu1) = -0.93
              enddo
           enddo
        end select

From 60ab0499bb4f6cd22033cd84be61e4cb89cd16a5 Mon Sep 17 00:00:00 2001
From: David Anderegg <andv@zhaw.ch>
Date: Wed, 13 Jan 2021 16:04:23 +0100
Subject: [PATCH 08/60] added dynamic SA_fact calculation

---
 src/turbulence/turbBCRoutines.F90 | 50 +++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 789abf25c..972a80915 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -312,6 +312,7 @@ subroutine bcEddyWall(nn)
     !
     use constants
     use blockPointers
+    use inputPhysics, only : kssa
     implicit none
     !
     !      Subroutine arguments.
@@ -325,14 +326,15 @@ subroutine bcEddyWall(nn)
 
     ! Determine the face id on which the subface is located and
     ! loop over the faces of the subface and set the eddy viscosity
-    ! in the halo cells.
+    ! in the halo cells. 
 
     select case (BCFaceid(nn))
     case (iMin)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(1,i,j) = -rev(2,i,j)
-             rev(1,i,j) = 0.93*rev(2,i,j)
+            !  print "(f12.3)", saFact(kssa, d2Wall(2,i,j))
+             rev(1,i,j) = saFact(kssa, d2Wall(2,i,j))*rev(2,i,j)
           enddo
        enddo
 
@@ -340,7 +342,8 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(ie,i,j) = -rev(il,i,j)
-             rev(ie,i,j) = 0.93*rev(il,i,j)
+            ! print "(f12.3)", saFact(kssa, d2Wall(il,i,j))
+             rev(ie,i,j) = saFact(kssa, d2Wall(il,i,j))*rev(il,i,j)
           enddo
        enddo
 
@@ -348,7 +351,8 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,1,j) = -rev(i,2,j)
-             rev(i,1,j) = 0.93*rev(i,2,j)
+            ! print "(f12.3)", saFact(kssa, d2Wall(i,1,j))
+             rev(i,1,j) = saFact(kssa, d2Wall(i,2,j))*rev(i,2,j)
           enddo
        enddo
 
@@ -356,7 +360,8 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,je,j) = -rev(i,jl,j)
-             rev(i,je,j) = 0.93*rev(i,jl,j)
+            ! print "(f12.3)", saFact(kssa, d2Wall(i,je,j))
+             rev(i,je,j) = saFact(kssa, d2Wall(i,jl,j))*rev(i,jl,j)
           enddo
        enddo
 
@@ -364,7 +369,8 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,j,1) = -rev(i,j,2)
-             rev(i,j,1) = 0.93*rev(i,j,2)
+            ! print "(f12.3)", saFact(kssa, d2Wall(i,j,2))
+             rev(i,j,1) = saFact(kssa, d2Wall(i,j,2))*rev(i,j,2)
           enddo
        enddo
 
@@ -372,7 +378,8 @@ subroutine bcEddyWall(nn)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
              ! rev(i,j,ke) = -rev(i,j,kl)
-             rev(i,j,ke) = 0.93*rev(i,j,kl)
+            ! print "(f12.3)", saFact(kssa, d2Wall(i,j,kl))
+             rev(i,j,ke) = saFact(kssa, d2Wall(i,j,kl))*rev(i,j,kl)
           enddo
        enddo
     end select
@@ -852,28 +859,28 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmti1(i,j,itu1,itu1) = one
-                bmti1(i,j,itu1,itu1) = -0.93
+                bmti1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(2,i,j))
              enddo
           enddo
        case (iMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmti2(i,j,itu1,itu1) = one
-                bmti2(i,j,itu1,itu1) = -0.93
+                bmti2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(il,i,j))
              enddo
           enddo
        case (jMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtj1(i,j,itu1,itu1) = one
-                bmtj1(i,j,itu1,itu1) = -0.93
+                bmtj1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,2,j))
              enddo
           enddo
        case (jMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtj2(i,j,itu1,itu1) = one
-                bmtj2(i,j,itu1,itu1) = -0.93
+                bmtj2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,jl,j))
              enddo
           enddo
 
@@ -881,7 +888,7 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtk1(i,j,itu1,itu1) = one
-                bmtk1(i,j,itu1,itu1) = -0.93
+                bmtk1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,j,2))
              enddo
           enddo
 
@@ -889,7 +896,7 @@ subroutine bcTurbWall(nn)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
                 ! bmtk2(i,j,itu1,itu1) = one
-                bmtk2(i,j,itu1,itu1) = -0.93
+                bmtk2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,j,kl))
              enddo
           enddo
        end select
@@ -1404,4 +1411,21 @@ subroutine turbBCNSWall(secondHalo)
     enddo bocos
   end subroutine turbBCNSWall
 
+  function  saFact(ks, d)
+
+   use constants
+   implicit none
+
+   ! dummy arguments
+   real(kind=realType) :: saFact
+
+   ! local variablse
+   real(kind=realType) :: ks
+   real(kind=realType) :: d
+   
+
+   saFact = (ks - d/0.03) / (ks + d/0.03)
+
+  end function  saFact
+
 end module turbBCRoutines

From e4e89e75945a8f6fdbc159b3c91d0c11ce313b1e Mon Sep 17 00:00:00 2001
From: Anil Yildirim <anily@umich.edu>
Date: Fri, 15 Jan 2021 17:54:19 +0000
Subject: [PATCH 09/60] reran tapenade

---
 src/adjoint/outputForward/sa_d.f90            |  41 +++--
 .../outputForward/turbbcroutines_d.f90        | 140 ++++++++++++++----
 src/adjoint/outputReverse/sa_b.f90            |  87 ++++++-----
 .../outputReverse/turbbcroutines_b.f90        | 140 +++++++++++++++---
 src/adjoint/outputReverseFast/sa_fast_b.f90   |  83 +++++++----
 .../turbbcroutines_fast_b.f90                 |  69 +++++++--
 6 files changed, 418 insertions(+), 142 deletions(-)

diff --git a/src/adjoint/outputForward/sa_d.f90 b/src/adjoint/outputForward/sa_d.f90
index 882d43cc0..8baf51b7e 100644
--- a/src/adjoint/outputForward/sa_d.f90
+++ b/src/adjoint/outputForward/sa_d.f90
@@ -40,6 +40,8 @@ subroutine sasource_d()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
+    real(kind=realtype) :: dnew, cr1
+    real(kind=realtype) :: dnewd
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: fv1d, fv2d, ft2d
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
@@ -76,6 +78,9 @@ subroutine sasource_d()
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
+! constants for sa rough
+! ks=0.0001 
+    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegaxd = sections(sectionid)%rotrate(1)*timerefd
     omegax = timeref*sections(sectionid)%rotrate(1)
@@ -274,23 +279,29 @@ subroutine sasource_d()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
+! sa rough 
+            dnewd = d2walld(i, j, k)
+            dnew = d2wall(i, j, k) + 0.03*kssa
             nud = (rlvd(i, j, k)*w(i, j, k, irho)-rlv(i, j, k)*wd(i, j, &
 &             k, irho))/w(i, j, k, irho)**2
             nu = rlv(i, j, k)/w(i, j, k, irho)
-            dist2invd = -(one*2*d2wall(i, j, k)*d2walld(i, j, k)/(d2wall&
-&             (i, j, k)**2)**2)
-            dist2inv = one/d2wall(i, j, k)**2
-            chid = (wd(i, j, k, itu1)*nu-w(i, j, k, itu1)*nud)/nu**2
-            chi = w(i, j, k, itu1)/nu
+! dist2inv = one/(d2wall(i,j,k)**2)
+            dist2invd = -(one*2*dnew*dnewd/(dnew**2)**2)
+            dist2inv = one/dnew**2
+            chid = (wd(i, j, k, itu1)*nu-w(i, j, k, itu1)*nud)/nu**2 - &
+&             cr1*kssa*dnewd/dnew**2
+            chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
             chi2d = chid*chi + chi*chid
             chi2 = chi*chi
             chi3d = chid*chi2 + chi*chi2d
             chi3 = chi*chi2
             fv1d = (chi3d*(chi3+cv13)-chi3*chi3d)/(chi3+cv13)**2
             fv1 = chi3/(chi3+cv13)
-            fv2d = -((chid*(one+chi*fv1)-chi*(chid*fv1+chi*fv1d))/(one+&
-&             chi*fv1)**2)
-            fv2 = one - chi/(one+chi*fv1)
+! fv2      = one - chi/(one + chi*fv1)
+            fv2d = -((wd(i, j, k, itu1)*(nu+w(i, j, k, itu1)*fv1)-w(i, j&
+&             , k, itu1)*(nud+wd(i, j, k, itu1)*fv1+w(i, j, k, itu1)*&
+&             fv1d))/(nu+w(i, j, k, itu1)*fv1)**2)
+            fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
@@ -402,6 +413,7 @@ subroutine sasource()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
+    real(kind=realtype) :: dnew, cr1
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
     real(kind=realtype) :: rr, gg, gg6, termfw, fwsa, term1, term2
@@ -424,6 +436,9 @@ subroutine sasource()
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
+! constants for sa rough
+! ks=0.0001 
+    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
     omegay = timeref*sections(sectionid)%rotrate(2)
@@ -515,13 +530,17 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
+! sa rough 
+            dnew = d2wall(i, j, k) + 0.03*kssa
             nu = rlv(i, j, k)/w(i, j, k, irho)
-            dist2inv = one/d2wall(i, j, k)**2
-            chi = w(i, j, k, itu1)/nu
+! dist2inv = one/(d2wall(i,j,k)**2)
+            dist2inv = one/dnew**2
+            chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
             chi2 = chi*chi
             chi3 = chi*chi2
             fv1 = chi3/(chi3+cv13)
-            fv2 = one - chi/(one+chi*fv1)
+! fv2      = one - chi/(one + chi*fv1)
+            fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
diff --git a/src/adjoint/outputForward/turbbcroutines_d.f90 b/src/adjoint/outputForward/turbbcroutines_d.f90
index 64831e184..49ca80b7b 100644
--- a/src/adjoint/outputForward/turbbcroutines_d.f90
+++ b/src/adjoint/outputForward/turbbcroutines_d.f90
@@ -416,6 +416,7 @@ subroutine bceddywall_d(nn)
 !
     use constants
     use blockpointers
+    use inputphysics, only : kssa
     implicit none
 !
 !      subroutine arguments.
@@ -425,50 +426,69 @@ subroutine bceddywall_d(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j
+    real(kind=realtype) :: result1
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
-! in the halo cells.
+! in the halo cells. 
     select case  (bcfaceid(nn)) 
     case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          revd(1, i, j) = -revd(2, i, j)
-          rev(1, i, j) = -rev(2, i, j)
+! rev(1,i,j) = -rev(2,i,j)
+!  print "(f12.3)", safact(kssa, d2wall(2,i,j))
+          result1 = safact(kssa, d2wall(2, i, j))
+          revd(1, i, j) = result1*revd(2, i, j)
+          rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
     case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          revd(ie, i, j) = -revd(il, i, j)
-          rev(ie, i, j) = -rev(il, i, j)
+! rev(ie,i,j) = -rev(il,i,j)
+! print "(f12.3)", safact(kssa, d2wall(il,i,j))
+          result1 = safact(kssa, d2wall(il, i, j))
+          revd(ie, i, j) = result1*revd(il, i, j)
+          rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
     case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          revd(i, 1, j) = -revd(i, 2, j)
-          rev(i, 1, j) = -rev(i, 2, j)
+! rev(i,1,j) = -rev(i,2,j)
+! print "(f12.3)", safact(kssa, d2wall(i,1,j))
+          result1 = safact(kssa, d2wall(i, 2, j))
+          revd(i, 1, j) = result1*revd(i, 2, j)
+          rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
     case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          revd(i, je, j) = -revd(i, jl, j)
-          rev(i, je, j) = -rev(i, jl, j)
+! rev(i,je,j) = -rev(i,jl,j)
+! print "(f12.3)", safact(kssa, d2wall(i,je,j))
+          result1 = safact(kssa, d2wall(i, jl, j))
+          revd(i, je, j) = result1*revd(i, jl, j)
+          rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
     case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          revd(i, j, 1) = -revd(i, j, 2)
-          rev(i, j, 1) = -rev(i, j, 2)
+! rev(i,j,1) = -rev(i,j,2)
+! print "(f12.3)", safact(kssa, d2wall(i,j,2))
+          result1 = safact(kssa, d2wall(i, j, 2))
+          revd(i, j, 1) = result1*revd(i, j, 2)
+          rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
     case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          revd(i, j, ke) = -revd(i, j, kl)
-          rev(i, j, ke) = -rev(i, j, kl)
+! rev(i,j,ke) = -rev(i,j,kl)
+! print "(f12.3)", safact(kssa, d2wall(i,j,kl))
+          result1 = safact(kssa, d2wall(i, j, kl))
+          revd(i, j, ke) = result1*revd(i, j, kl)
+          rev(i, j, ke) = result1*rev(i, j, kl)
         end do
       end do
     end select
@@ -482,6 +502,7 @@ subroutine bceddywall(nn)
 !
     use constants
     use blockpointers
+    use inputphysics, only : kssa
     implicit none
 !
 !      subroutine arguments.
@@ -491,44 +512,63 @@ subroutine bceddywall(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j
+    real(kind=realtype) :: result1
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
-! in the halo cells.
+! in the halo cells. 
     select case  (bcfaceid(nn)) 
     case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(1, i, j) = -rev(2, i, j)
+! rev(1,i,j) = -rev(2,i,j)
+!  print "(f12.3)", safact(kssa, d2wall(2,i,j))
+          result1 = safact(kssa, d2wall(2, i, j))
+          rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
     case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(ie, i, j) = -rev(il, i, j)
+! rev(ie,i,j) = -rev(il,i,j)
+! print "(f12.3)", safact(kssa, d2wall(il,i,j))
+          result1 = safact(kssa, d2wall(il, i, j))
+          rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
     case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, 1, j) = -rev(i, 2, j)
+! rev(i,1,j) = -rev(i,2,j)
+! print "(f12.3)", safact(kssa, d2wall(i,1,j))
+          result1 = safact(kssa, d2wall(i, 2, j))
+          rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
     case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, je, j) = -rev(i, jl, j)
+! rev(i,je,j) = -rev(i,jl,j)
+! print "(f12.3)", safact(kssa, d2wall(i,je,j))
+          result1 = safact(kssa, d2wall(i, jl, j))
+          rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
     case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, j, 1) = -rev(i, j, 2)
+! rev(i,j,1) = -rev(i,j,2)
+! print "(f12.3)", safact(kssa, d2wall(i,j,2))
+          result1 = safact(kssa, d2wall(i, j, 2))
+          rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
     case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, j, ke) = -rev(i, j, kl)
+! rev(i,j,ke) = -rev(i,j,kl)
+! print "(f12.3)", safact(kssa, d2wall(i,j,kl))
+          result1 = safact(kssa, d2wall(i, j, kl))
+          rev(i, j, ke) = result1*rev(i, j, kl)
         end do
       end do
     end select
@@ -1180,6 +1220,7 @@ subroutine bcturbwall_d(nn)
     real(kind=realtype), dimension(:, :), pointer :: rlv2, dd2wall
     intrinsic min
     intrinsic max
+    real(kind=realtype) :: result1
     integer(kind=inttype) :: y12
     integer(kind=inttype) :: y11
     integer(kind=inttype) :: y10
@@ -1204,37 +1245,49 @@ subroutine bcturbwall_d(nn)
       case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmti1(i, j, itu1, itu1) = one
+! bmti1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(2, i, j))
+            bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmti2(i, j, itu1, itu1) = one
+! bmti2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(il, i, j))
+            bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
       case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtj1(i, j, itu1, itu1) = one
+! bmtj1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, 2, j))
+            bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtj2(i, j, itu1, itu1) = one
+! bmtj2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, jl, j))
+            bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
       case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtk1(i, j, itu1, itu1) = one
+! bmtk1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, j, 2))
+            bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtk2(i, j, itu1, itu1) = one
+! bmtk2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, j, kl))
+            bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
@@ -1879,6 +1932,7 @@ subroutine bcturbwall(nn)
     real(kind=realtype), dimension(:, :), pointer :: rlv2, dd2wall
     intrinsic min
     intrinsic max
+    real(kind=realtype) :: result1
     integer(kind=inttype) :: y12
     integer(kind=inttype) :: y11
     integer(kind=inttype) :: y10
@@ -1903,37 +1957,49 @@ subroutine bcturbwall(nn)
       case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmti1(i, j, itu1, itu1) = one
+! bmti1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(2, i, j))
+            bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmti2(i, j, itu1, itu1) = one
+! bmti2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(il, i, j))
+            bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
       case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtj1(i, j, itu1, itu1) = one
+! bmtj1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, 2, j))
+            bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtj2(i, j, itu1, itu1) = one
+! bmtj2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, jl, j))
+            bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
       case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtk1(i, j, itu1, itu1) = one
+! bmtk1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, j, 2))
+            bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtk2(i, j, itu1, itu1) = one
+! bmtk2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, j, kl))
+            bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
@@ -2193,4 +2259,14 @@ subroutine bcturbwall(nn)
       end select
     end select
   end subroutine bcturbwall
+  function safact(ks, d)
+    use constants
+    implicit none
+! dummy arguments
+    real(kind=realtype) :: safact
+! local variablse
+    real(kind=realtype) :: ks
+    real(kind=realtype) :: d
+    safact = (ks-d/0.03)/(ks+d/0.03)
+  end function safact
 end module turbbcroutines_d
diff --git a/src/adjoint/outputReverse/sa_b.f90 b/src/adjoint/outputReverse/sa_b.f90
index dc9178fa9..b5d866f1d 100644
--- a/src/adjoint/outputReverse/sa_b.f90
+++ b/src/adjoint/outputReverse/sa_b.f90
@@ -42,6 +42,8 @@ subroutine sasource_b()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
+    real(kind=realtype) :: dnew, cr1
+    real(kind=realtype) :: dnewd
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: fv1d, fv2d, ft2d
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
@@ -69,10 +71,10 @@ subroutine sasource_b()
     intrinsic min
     intrinsic max
     integer :: branch
+    real(kind=realtype) :: temp3
     real(kind=realtype) :: temp2
     real(kind=realtype) :: temp1
     real(kind=realtype) :: temp0
-    real(kind=realtype) :: tempd10
     real(kind=realtype) :: min1
     real(kind=realtype) :: min1d
     real(kind=realtype) :: tempd9
@@ -93,6 +95,9 @@ subroutine sasource_b()
     cv13 = rsacv1**3
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
+! constants for sa rough
+! ks=0.0001 
+    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
     omegay = timeref*sections(sectionid)%rotrate(2)
@@ -194,13 +199,17 @@ subroutine sasource_b()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
+! sa rough 
+        dnew = d2wall(i, j, k) + 0.03*kssa
         nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/d2wall(i, j, k)**2
-        chi = w(i, j, k, itu1)/nu
+! dist2inv = one/(d2wall(i,j,k)**2)
+        dist2inv = one/dnew**2
+        chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-        fv2 = one - chi/(one+chi*fv1)
+! fv2      = one - chi/(one + chi*fv1)
+        fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
@@ -262,18 +271,18 @@ subroutine sasource_b()
         end if
         term2 = dist2inv*(kar2inv*rsacb1*((one-ft2)*fv2+ft2)-rsacw1*fwsa&
 &         )
-        tempd9 = w(i, j, k, itu1)*scratchd(i, j, k, idvt)
-        temp2 = w(i, j, k, itu1)
-        term1d = tempd9
-        term2d = temp2*tempd9
-        wd(i, j, k, itu1) = wd(i, j, k, itu1) + (term1+term2*temp2)*&
-&         scratchd(i, j, k, idvt) + term2*tempd9
+        tempd8 = w(i, j, k, itu1)*scratchd(i, j, k, idvt)
+        temp3 = w(i, j, k, itu1)
+        term1d = tempd8
+        term2d = temp3*tempd8
+        wd(i, j, k, itu1) = wd(i, j, k, itu1) + (term1+term2*temp3)*&
+&         scratchd(i, j, k, idvt) + term2*tempd8
         scratchd(i, j, k, idvt) = 0.0_8
-        tempd10 = dist2inv*kar2inv*rsacb1*term2d
+        tempd9 = dist2inv*kar2inv*rsacb1*term2d
         dist2invd = (kar2inv*rsacb1*((one-ft2)*fv2+ft2)-rsacw1*fwsa)*&
 &         term2d
-        ft2d = (1.0_8-fv2)*tempd10
-        fv2d = (one-ft2)*tempd10
+        ft2d = (1.0_8-fv2)*tempd9
+        fv2d = (one-ft2)*tempd9
         fwsad = -(dist2inv*rsacw1*term2d)
         call popcontrol1b(branch)
         if (branch .ne. 0) then
@@ -281,21 +290,21 @@ subroutine sasource_b()
           ssd = ssd + rsacb1*(one-ft2)*term1d
         end if
         termfwd = gg*fwsad
-        temp1 = (one+cw36)/(cw36+gg6)
-        if (temp1 .le. 0.0_8 .and. (sixth .eq. 0.0_8 .or. sixth .ne. int&
+        temp2 = (one+cw36)/(cw36+gg6)
+        if (temp2 .le. 0.0_8 .and. (sixth .eq. 0.0_8 .or. sixth .ne. int&
 &           (sixth))) then
           gg6d = 0.0
         else
-          gg6d = -(sixth*temp1**(sixth-1)*temp1*termfwd/(cw36+gg6))
+          gg6d = -(sixth*temp2**(sixth-1)*temp2*termfwd/(cw36+gg6))
         end if
         ggd = 6*gg**5*gg6d + termfw*fwsad
         rrd = (rsacw2*6*rr**5-rsacw2+1.0_8)*ggd
         call popcontrol1b(branch)
         if (branch .eq. 0) rrd = 0.0_8
-        tempd8 = w(i, j, k, itu1)*kar2inv*rrd/sst
+        tempd7 = w(i, j, k, itu1)*kar2inv*rrd/sst
         wd(i, j, k, itu1) = wd(i, j, k, itu1) + kar2inv*dist2inv*rrd/sst
-        dist2invd = dist2invd + tempd8
-        sstd = -(dist2inv*tempd8/sst)
+        dist2invd = dist2invd + tempd7
+        sstd = -(dist2inv*tempd7/sst)
         call popcontrol1b(branch)
         if (branch .eq. 0) sstd = 0.0_8
         call popcontrol1b(branch)
@@ -310,32 +319,34 @@ subroutine sasource_b()
           if (.not.two*strainmag2 .eq. 0.0_8) strainmag2d = strainmag2d &
 &             + two*y1d/(2.0*sqrt(two*strainmag2))
         end if
-        tempd7 = kar2inv*w(i, j, k, itu1)*sstd
+        tempd6 = kar2inv*w(i, j, k, itu1)*sstd
         ssd = ssd + sstd
         wd(i, j, k, itu1) = wd(i, j, k, itu1) + kar2inv*fv2*dist2inv*&
 &         sstd
-        fv2d = fv2d + dist2inv*tempd7
-        dist2invd = dist2invd + fv2*tempd7
+        fv2d = fv2d + dist2inv*tempd6
+        dist2invd = dist2invd + fv2*tempd6
         call popcontrol1b(branch)
         if (branch .eq. 0) then
           chi2d = -(exp(-(rsact4*chi2))*rsact3*rsact4*ft2d)
         else
           chi2d = 0.0_8
         end if
-        tempd4 = -(fv2d/(one+chi*fv1))
-        tempd5 = -(chi*tempd4/(one+chi*fv1))
-        fv1d = chi*tempd5
-        tempd6 = fv1d/(cv13+chi3)
-        chi3d = (1.0_8-chi3/(cv13+chi3))*tempd6
+        temp1 = w(i, j, k, itu1)
+        temp0 = nu + temp1*fv1
+        tempd4 = w(i, j, k, itu1)*fv2d/temp0**2
+        wd(i, j, k, itu1) = wd(i, j, k, itu1) + fv1*tempd4 - fv2d/temp0
+        fv1d = temp1*tempd4
+        tempd5 = fv1d/(cv13+chi3)
+        chi3d = (1.0_8-chi3/(cv13+chi3))*tempd5
         chi2d = chi2d + chi*chi3d
-        chid = chi2*chi3d + 2*chi*chi2d + fv1*tempd5 + tempd4
+        chid = 2*chi*chi2d + chi2*chi3d
+        nud = tempd4 - w(i, j, k, itu1)*chid/nu**2
         wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
-        nud = -(w(i, j, k, itu1)*chid/nu**2)
-        temp0 = d2wall(i, j, k)
-        d2walld(i, j, k) = d2walld(i, j, k) - one*2*dist2invd/temp0**3
+        dnewd = -(one*2*dist2invd/dnew**3) - cr1*kssa*chid/dnew**2
         temp = w(i, j, k, irho)
         rlvd(i, j, k) = rlvd(i, j, k) + nud/temp
         wd(i, j, k, irho) = wd(i, j, k, irho) - rlv(i, j, k)*nud/temp**2
+        d2walld(i, j, k) = d2walld(i, j, k) + dnewd
         call popcontrol2b(branch)
         if (branch .eq. 0) then
           if (strainprod .eq. 0.0_8) then
@@ -537,6 +548,7 @@ subroutine sasource()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
+    real(kind=realtype) :: dnew, cr1
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
     real(kind=realtype) :: rr, gg, gg6, termfw, fwsa, term1, term2
@@ -559,6 +571,9 @@ subroutine sasource()
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
+! constants for sa rough
+! ks=0.0001 
+    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
     omegay = timeref*sections(sectionid)%rotrate(2)
@@ -651,13 +666,17 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
+! sa rough 
+        dnew = d2wall(i, j, k) + 0.03*kssa
         nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/d2wall(i, j, k)**2
-        chi = w(i, j, k, itu1)/nu
+! dist2inv = one/(d2wall(i,j,k)**2)
+        dist2inv = one/dnew**2
+        chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-        fv2 = one - chi/(one+chi*fv1)
+! fv2      = one - chi/(one + chi*fv1)
+        fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
diff --git a/src/adjoint/outputReverse/turbbcroutines_b.f90 b/src/adjoint/outputReverse/turbbcroutines_b.f90
index 22baac6eb..af8601433 100644
--- a/src/adjoint/outputReverse/turbbcroutines_b.f90
+++ b/src/adjoint/outputReverse/turbbcroutines_b.f90
@@ -570,6 +570,7 @@ subroutine bceddywall_b(nn)
 !
     use constants
     use blockpointers
+    use inputphysics, only : kssa
     implicit none
 !
 !      subroutine arguments.
@@ -579,6 +580,7 @@ subroutine bceddywall_b(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j
+    real(kind=realtype) :: result1
     real(kind=realtype) :: tmp
     real(kind=realtype) :: tmp0
     real(kind=realtype) :: tmp1
@@ -587,51 +589,105 @@ subroutine bceddywall_b(nn)
     real(kind=realtype) :: tmpd0
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
-! in the halo cells.
+! in the halo cells. 
     select case  (bcfaceid(nn)) 
     case (imin) 
+      do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
+        do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+! rev(1,i,j) = -rev(2,i,j)
+!  print "(f12.3)", safact(kssa, d2wall(2,i,j))
+          call pushreal8(result1)
+          result1 = safact(kssa, d2wall(2, i, j))
+        end do
+      end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
-          revd(2, i, j) = revd(2, i, j) - revd(1, i, j)
+          revd(2, i, j) = revd(2, i, j) + result1*revd(1, i, j)
           revd(1, i, j) = 0.0_8
+          call popreal8(result1)
         end do
       end do
     case (imax) 
+      do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
+        do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+! rev(ie,i,j) = -rev(il,i,j)
+! print "(f12.3)", safact(kssa, d2wall(il,i,j))
+          call pushreal8(result1)
+          result1 = safact(kssa, d2wall(il, i, j))
+        end do
+      end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd = revd(ie, i, j)
           revd(ie, i, j) = 0.0_8
-          revd(il, i, j) = revd(il, i, j) - tmpd
+          revd(il, i, j) = revd(il, i, j) + result1*tmpd
+          call popreal8(result1)
         end do
       end do
     case (jmin) 
+      do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
+        do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+! rev(i,1,j) = -rev(i,2,j)
+! print "(f12.3)", safact(kssa, d2wall(i,1,j))
+          call pushreal8(result1)
+          result1 = safact(kssa, d2wall(i, 2, j))
+        end do
+      end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
-          revd(i, 2, j) = revd(i, 2, j) - revd(i, 1, j)
+          revd(i, 2, j) = revd(i, 2, j) + result1*revd(i, 1, j)
           revd(i, 1, j) = 0.0_8
+          call popreal8(result1)
         end do
       end do
     case (jmax) 
+      do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
+        do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+! rev(i,je,j) = -rev(i,jl,j)
+! print "(f12.3)", safact(kssa, d2wall(i,je,j))
+          call pushreal8(result1)
+          result1 = safact(kssa, d2wall(i, jl, j))
+        end do
+      end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd0 = revd(i, je, j)
           revd(i, je, j) = 0.0_8
-          revd(i, jl, j) = revd(i, jl, j) - tmpd0
+          revd(i, jl, j) = revd(i, jl, j) + result1*tmpd0
+          call popreal8(result1)
         end do
       end do
     case (kmin) 
+      do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
+        do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+! rev(i,j,1) = -rev(i,j,2)
+! print "(f12.3)", safact(kssa, d2wall(i,j,2))
+          call pushreal8(result1)
+          result1 = safact(kssa, d2wall(i, j, 2))
+        end do
+      end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
-          revd(i, j, 2) = revd(i, j, 2) - revd(i, j, 1)
+          revd(i, j, 2) = revd(i, j, 2) + result1*revd(i, j, 1)
           revd(i, j, 1) = 0.0_8
+          call popreal8(result1)
         end do
       end do
     case (kmax) 
+      do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
+        do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+! rev(i,j,ke) = -rev(i,j,kl)
+! print "(f12.3)", safact(kssa, d2wall(i,j,kl))
+          call pushreal8(result1)
+          result1 = safact(kssa, d2wall(i, j, kl))
+        end do
+      end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd1 = revd(i, j, ke)
           revd(i, j, ke) = 0.0_8
-          revd(i, j, kl) = revd(i, j, kl) - tmpd1
+          revd(i, j, kl) = revd(i, j, kl) + result1*tmpd1
+          call popreal8(result1)
         end do
       end do
     end select
@@ -645,6 +701,7 @@ subroutine bceddywall(nn)
 !
     use constants
     use blockpointers
+    use inputphysics, only : kssa
     implicit none
 !
 !      subroutine arguments.
@@ -654,44 +711,63 @@ subroutine bceddywall(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j
+    real(kind=realtype) :: result1
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
-! in the halo cells.
+! in the halo cells. 
     select case  (bcfaceid(nn)) 
     case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(1, i, j) = -rev(2, i, j)
+! rev(1,i,j) = -rev(2,i,j)
+!  print "(f12.3)", safact(kssa, d2wall(2,i,j))
+          result1 = safact(kssa, d2wall(2, i, j))
+          rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
     case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(ie, i, j) = -rev(il, i, j)
+! rev(ie,i,j) = -rev(il,i,j)
+! print "(f12.3)", safact(kssa, d2wall(il,i,j))
+          result1 = safact(kssa, d2wall(il, i, j))
+          rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
     case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, 1, j) = -rev(i, 2, j)
+! rev(i,1,j) = -rev(i,2,j)
+! print "(f12.3)", safact(kssa, d2wall(i,1,j))
+          result1 = safact(kssa, d2wall(i, 2, j))
+          rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
     case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, je, j) = -rev(i, jl, j)
+! rev(i,je,j) = -rev(i,jl,j)
+! print "(f12.3)", safact(kssa, d2wall(i,je,j))
+          result1 = safact(kssa, d2wall(i, jl, j))
+          rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
     case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, j, 1) = -rev(i, j, 2)
+! rev(i,j,1) = -rev(i,j,2)
+! print "(f12.3)", safact(kssa, d2wall(i,j,2))
+          result1 = safact(kssa, d2wall(i, j, 2))
+          rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
     case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, j, ke) = -rev(i, j, kl)
+! rev(i,j,ke) = -rev(i,j,kl)
+! print "(f12.3)", safact(kssa, d2wall(i,j,kl))
+          result1 = safact(kssa, d2wall(i, j, kl))
+          rev(i, j, ke) = result1*rev(i, j, kl)
         end do
       end do
     end select
@@ -1359,6 +1435,7 @@ subroutine bcturbwall_b(nn)
     real(kind=realtype), dimension(:, :), pointer :: rlv2, dd2wall
     intrinsic min
     intrinsic max
+    real(kind=realtype) :: result1
     integer :: branch
     real(kind=realtype) :: temp3
     real(kind=realtype) :: temp2
@@ -2231,6 +2308,7 @@ subroutine bcturbwall(nn)
     real(kind=realtype), dimension(:, :), pointer :: rlv2, dd2wall
     intrinsic min
     intrinsic max
+    real(kind=realtype) :: result1
     integer(kind=inttype) :: y12
     integer(kind=inttype) :: y11
     integer(kind=inttype) :: y10
@@ -2255,37 +2333,49 @@ subroutine bcturbwall(nn)
       case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmti1(i, j, itu1, itu1) = one
+! bmti1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(2, i, j))
+            bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmti2(i, j, itu1, itu1) = one
+! bmti2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(il, i, j))
+            bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
       case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtj1(i, j, itu1, itu1) = one
+! bmtj1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, 2, j))
+            bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtj2(i, j, itu1, itu1) = one
+! bmtj2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, jl, j))
+            bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
       case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtk1(i, j, itu1, itu1) = one
+! bmtk1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, j, 2))
+            bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtk2(i, j, itu1, itu1) = one
+! bmtk2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, j, kl))
+            bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
@@ -2545,4 +2635,14 @@ subroutine bcturbwall(nn)
       end select
     end select
   end subroutine bcturbwall
+  function safact(ks, d)
+    use constants
+    implicit none
+! dummy arguments
+    real(kind=realtype) :: safact
+! local variablse
+    real(kind=realtype) :: ks
+    real(kind=realtype) :: d
+    safact = (ks-d/0.03)/(ks+d/0.03)
+  end function safact
 end module turbbcroutines_b
diff --git a/src/adjoint/outputReverseFast/sa_fast_b.f90 b/src/adjoint/outputReverseFast/sa_fast_b.f90
index 6114929a6..3de8c9f66 100644
--- a/src/adjoint/outputReverseFast/sa_fast_b.f90
+++ b/src/adjoint/outputReverseFast/sa_fast_b.f90
@@ -37,6 +37,7 @@ subroutine sasource_fast_b()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
+    real(kind=realtype) :: dnew, cr1
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: fv1d, fv2d, ft2d
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
@@ -62,9 +63,10 @@ subroutine sasource_fast_b()
     intrinsic min
     intrinsic max
     integer :: branch
+    real(kind=realtype) :: temp3
+    real(kind=realtype) :: temp2
     real(kind=realtype) :: temp1
     real(kind=realtype) :: temp0
-    real(kind=realtype) :: tempd10
     real(kind=realtype) :: min1
     real(kind=realtype) :: min1d
     real(kind=realtype) :: tempd9
@@ -85,6 +87,9 @@ subroutine sasource_fast_b()
     cv13 = rsacv1**3
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
+! constants for sa rough
+! ks=0.0001 
+    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
     omegay = timeref*sections(sectionid)%rotrate(2)
@@ -182,13 +187,17 @@ subroutine sasource_fast_b()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
+! sa rough 
+        dnew = d2wall(i, j, k) + 0.03*kssa
         nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/d2wall(i, j, k)**2
-        chi = w(i, j, k, itu1)/nu
+! dist2inv = one/(d2wall(i,j,k)**2)
+        dist2inv = one/dnew**2
+        chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-        fv2 = one - chi/(one+chi*fv1)
+! fv2      = one - chi/(one + chi*fv1)
+        fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
@@ -262,16 +271,16 @@ subroutine sasource_fast_b()
         end if
         term2 = dist2inv*(kar2inv*rsacb1*((one-ft2)*fv2+ft2)-rsacw1*fwsa&
 &         )
-        tempd9 = w(i, j, k, itu1)*scratchd(i, j, k, idvt)
-        temp1 = w(i, j, k, itu1)
-        term1d = tempd9
-        term2d = temp1*tempd9
-        wd(i, j, k, itu1) = wd(i, j, k, itu1) + (term1+term2*temp1)*&
-&         scratchd(i, j, k, idvt) + term2*tempd9
+        tempd8 = w(i, j, k, itu1)*scratchd(i, j, k, idvt)
+        temp3 = w(i, j, k, itu1)
+        term1d = tempd8
+        term2d = temp3*tempd8
+        wd(i, j, k, itu1) = wd(i, j, k, itu1) + (term1+term2*temp3)*&
+&         scratchd(i, j, k, idvt) + term2*tempd8
         scratchd(i, j, k, idvt) = 0.0_8
-        tempd10 = dist2inv*kar2inv*rsacb1*term2d
-        ft2d = (1.0_8-fv2)*tempd10
-        fv2d = (one-ft2)*tempd10
+        tempd9 = dist2inv*kar2inv*rsacb1*term2d
+        ft2d = (1.0_8-fv2)*tempd9
+        fv2d = (one-ft2)*tempd9
         fwsad = -(dist2inv*rsacw1*term2d)
 branch = myIntStack(myIntPtr)
  myIntPtr = myIntPtr - 1
@@ -280,21 +289,21 @@ subroutine sasource_fast_b()
           ssd = ssd + rsacb1*(one-ft2)*term1d
         end if
         termfwd = gg*fwsad
-        temp0 = (one+cw36)/(cw36+gg6)
-        if (temp0 .le. 0.0_8 .and. (sixth .eq. 0.0_8 .or. sixth .ne. int&
+        temp2 = (one+cw36)/(cw36+gg6)
+        if (temp2 .le. 0.0_8 .and. (sixth .eq. 0.0_8 .or. sixth .ne. int&
 &           (sixth))) then
           gg6d = 0.0
         else
-          gg6d = -(sixth*temp0**(sixth-1)*temp0*termfwd/(cw36+gg6))
+          gg6d = -(sixth*temp2**(sixth-1)*temp2*termfwd/(cw36+gg6))
         end if
         ggd = 6*gg**5*gg6d + termfw*fwsad
         rrd = (rsacw2*6*rr**5-rsacw2+1.0_8)*ggd
 branch = myIntStack(myIntPtr)
  myIntPtr = myIntPtr - 1
         if (branch .eq. 0) rrd = 0.0_8
-        tempd8 = kar2inv*dist2inv*rrd/sst
-        wd(i, j, k, itu1) = wd(i, j, k, itu1) + tempd8
-        sstd = -(w(i, j, k, itu1)*tempd8/sst)
+        tempd7 = kar2inv*dist2inv*rrd/sst
+        wd(i, j, k, itu1) = wd(i, j, k, itu1) + tempd7
+        sstd = -(w(i, j, k, itu1)*tempd7/sst)
 branch = myIntStack(myIntPtr)
  myIntPtr = myIntPtr - 1
         if (branch .eq. 0) sstd = 0.0_8
@@ -312,10 +321,10 @@ subroutine sasource_fast_b()
           if (.not.two*strainmag2 .eq. 0.0_8) strainmag2d = strainmag2d &
 &             + two*y1d/(2.0*sqrt(two*strainmag2))
         end if
-        tempd7 = kar2inv*dist2inv*sstd
+        tempd6 = kar2inv*dist2inv*sstd
         ssd = ssd + sstd
-        wd(i, j, k, itu1) = wd(i, j, k, itu1) + fv2*tempd7
-        fv2d = fv2d + w(i, j, k, itu1)*tempd7
+        wd(i, j, k, itu1) = wd(i, j, k, itu1) + fv2*tempd6
+        fv2d = fv2d + w(i, j, k, itu1)*tempd6
 branch = myIntStack(myIntPtr)
  myIntPtr = myIntPtr - 1
         if (branch .eq. 0) then
@@ -323,15 +332,17 @@ subroutine sasource_fast_b()
         else
           chi2d = 0.0_8
         end if
-        tempd4 = -(fv2d/(one+chi*fv1))
-        tempd5 = -(chi*tempd4/(one+chi*fv1))
-        fv1d = chi*tempd5
-        tempd6 = fv1d/(cv13+chi3)
-        chi3d = (1.0_8-chi3/(cv13+chi3))*tempd6
+        temp1 = w(i, j, k, itu1)
+        temp0 = nu + temp1*fv1
+        tempd4 = w(i, j, k, itu1)*fv2d/temp0**2
+        wd(i, j, k, itu1) = wd(i, j, k, itu1) + fv1*tempd4 - fv2d/temp0
+        fv1d = temp1*tempd4
+        tempd5 = fv1d/(cv13+chi3)
+        chi3d = (1.0_8-chi3/(cv13+chi3))*tempd5
         chi2d = chi2d + chi*chi3d
-        chid = chi2*chi3d + 2*chi*chi2d + fv1*tempd5 + tempd4
+        chid = 2*chi*chi2d + chi2*chi3d
+        nud = tempd4 - w(i, j, k, itu1)*chid/nu**2
         wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
-        nud = -(w(i, j, k, itu1)*chid/nu**2)
         temp = w(i, j, k, irho)
         rlvd(i, j, k) = rlvd(i, j, k) + nud/temp
         wd(i, j, k, irho) = wd(i, j, k, irho) - rlv(i, j, k)*nud/temp**2
@@ -470,6 +481,7 @@ subroutine sasource()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
+    real(kind=realtype) :: dnew, cr1
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
     real(kind=realtype) :: rr, gg, gg6, termfw, fwsa, term1, term2
@@ -492,6 +504,9 @@ subroutine sasource()
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
+! constants for sa rough
+! ks=0.0001 
+    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
     omegay = timeref*sections(sectionid)%rotrate(2)
@@ -584,13 +599,17 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
+! sa rough 
+        dnew = d2wall(i, j, k) + 0.03*kssa
         nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/d2wall(i, j, k)**2
-        chi = w(i, j, k, itu1)/nu
+! dist2inv = one/(d2wall(i,j,k)**2)
+        dist2inv = one/dnew**2
+        chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-        fv2 = one - chi/(one+chi*fv1)
+! fv2      = one - chi/(one + chi*fv1)
+        fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
diff --git a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90 b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
index 5960624c4..8be51eeca 100644
--- a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
@@ -195,6 +195,7 @@ subroutine bceddywall(nn)
 !
     use constants
     use blockpointers
+    use inputphysics, only : kssa
     implicit none
 !
 !      subroutine arguments.
@@ -204,44 +205,63 @@ subroutine bceddywall(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j
+    real(kind=realtype) :: result1
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
-! in the halo cells.
+! in the halo cells. 
     select case  (bcfaceid(nn)) 
     case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(1, i, j) = -rev(2, i, j)
+! rev(1,i,j) = -rev(2,i,j)
+!  print "(f12.3)", safact(kssa, d2wall(2,i,j))
+          result1 = safact(kssa, d2wall(2, i, j))
+          rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
     case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(ie, i, j) = -rev(il, i, j)
+! rev(ie,i,j) = -rev(il,i,j)
+! print "(f12.3)", safact(kssa, d2wall(il,i,j))
+          result1 = safact(kssa, d2wall(il, i, j))
+          rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
     case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, 1, j) = -rev(i, 2, j)
+! rev(i,1,j) = -rev(i,2,j)
+! print "(f12.3)", safact(kssa, d2wall(i,1,j))
+          result1 = safact(kssa, d2wall(i, 2, j))
+          rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
     case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, je, j) = -rev(i, jl, j)
+! rev(i,je,j) = -rev(i,jl,j)
+! print "(f12.3)", safact(kssa, d2wall(i,je,j))
+          result1 = safact(kssa, d2wall(i, jl, j))
+          rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
     case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, j, 1) = -rev(i, j, 2)
+! rev(i,j,1) = -rev(i,j,2)
+! print "(f12.3)", safact(kssa, d2wall(i,j,2))
+          result1 = safact(kssa, d2wall(i, j, 2))
+          rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
     case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          rev(i, j, ke) = -rev(i, j, kl)
+! rev(i,j,ke) = -rev(i,j,kl)
+! print "(f12.3)", safact(kssa, d2wall(i,j,kl))
+          result1 = safact(kssa, d2wall(i, j, kl))
+          rev(i, j, ke) = result1*rev(i, j, kl)
         end do
       end do
     end select
@@ -843,6 +863,7 @@ subroutine bcturbwall(nn)
     real(kind=realtype), dimension(:, :), pointer :: rlv2, dd2wall
     intrinsic min
     intrinsic max
+    real(kind=realtype) :: result1
     integer(kind=inttype) :: y12
     integer(kind=inttype) :: y11
     integer(kind=inttype) :: y10
@@ -867,37 +888,49 @@ subroutine bcturbwall(nn)
       case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmti1(i, j, itu1, itu1) = one
+! bmti1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(2, i, j))
+            bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmti2(i, j, itu1, itu1) = one
+! bmti2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(il, i, j))
+            bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
       case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtj1(i, j, itu1, itu1) = one
+! bmtj1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, 2, j))
+            bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtj2(i, j, itu1, itu1) = one
+! bmtj2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, jl, j))
+            bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
       case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtk1(i, j, itu1, itu1) = one
+! bmtk1(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, j, 2))
+            bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
       case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            bmtk2(i, j, itu1, itu1) = one
+! bmtk2(i,j,itu1,itu1) = one
+            result1 = safact(kssa, d2wall(i, j, kl))
+            bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
@@ -1157,4 +1190,14 @@ subroutine bcturbwall(nn)
       end select
     end select
   end subroutine bcturbwall
+  function safact(ks, d)
+    use constants
+    implicit none
+! dummy arguments
+    real(kind=realtype) :: safact
+! local variablse
+    real(kind=realtype) :: ks
+    real(kind=realtype) :: d
+    safact = (ks-d/0.03)/(ks+d/0.03)
+  end function safact
 end module turbbcroutines_fast_b

From 5deba2afb3fe41cb342f2cc3308465cd333f0ba9 Mon Sep 17 00:00:00 2001
From: David Anderegg <andv@zhaw.ch>
Date: Mon, 25 Jan 2021 17:31:18 +0100
Subject: [PATCH 10/60] Prevent division by 0 (hack)

---
 src/adjoint/outputForward/turbbcroutines_d.f90          | 6 +++++-
 src/adjoint/outputReverse/turbbcroutines_b.f90          | 6 +++++-
 src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90 | 6 +++++-
 src/turbulence/turbBCRoutines.F90                       | 7 +++++--
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/adjoint/outputForward/turbbcroutines_d.f90 b/src/adjoint/outputForward/turbbcroutines_d.f90
index 49ca80b7b..3db1c46ad 100644
--- a/src/adjoint/outputForward/turbbcroutines_d.f90
+++ b/src/adjoint/outputForward/turbbcroutines_d.f90
@@ -2267,6 +2267,10 @@ function safact(ks, d)
 ! local variablse
     real(kind=realtype) :: ks
     real(kind=realtype) :: d
-    safact = (ks-d/0.03)/(ks+d/0.03)
+    if (ks .eq. zero) then
+      safact = -one
+    else
+      safact = (ks-d/0.03)/(ks+d/0.03)
+    end if
   end function safact
 end module turbbcroutines_d
diff --git a/src/adjoint/outputReverse/turbbcroutines_b.f90 b/src/adjoint/outputReverse/turbbcroutines_b.f90
index af8601433..f6726f0f8 100644
--- a/src/adjoint/outputReverse/turbbcroutines_b.f90
+++ b/src/adjoint/outputReverse/turbbcroutines_b.f90
@@ -2643,6 +2643,10 @@ function safact(ks, d)
 ! local variablse
     real(kind=realtype) :: ks
     real(kind=realtype) :: d
-    safact = (ks-d/0.03)/(ks+d/0.03)
+    if (ks .eq. zero) then
+      safact = -one
+    else
+      safact = (ks-d/0.03)/(ks+d/0.03)
+    end if
   end function safact
 end module turbbcroutines_b
diff --git a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90 b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
index 8be51eeca..993aed227 100644
--- a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
@@ -1198,6 +1198,10 @@ function safact(ks, d)
 ! local variablse
     real(kind=realtype) :: ks
     real(kind=realtype) :: d
-    safact = (ks-d/0.03)/(ks+d/0.03)
+    if (ks .eq. zero) then
+      safact = -one
+    else
+      safact = (ks-d/0.03)/(ks+d/0.03)
+    end if
   end function safact
 end module turbbcroutines_fast_b
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 972a80915..4c220f5f0 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -1423,8 +1423,11 @@ function  saFact(ks, d)
    real(kind=realType) :: ks
    real(kind=realType) :: d
    
-
-   saFact = (ks - d/0.03) / (ks + d/0.03)
+   if (ks .eq. zero) then
+      saFact = -one
+   else
+      saFact = (ks - d/0.03) / (ks + d/0.03)
+   end if
 
   end function  saFact
 

From 90797b15f3dd4ab3c99b570308f678e00e5b8545 Mon Sep 17 00:00:00 2001
From: DavidAnderegg <andv@zhaw.ch>
Date: Fri, 5 Feb 2021 10:06:49 +0100
Subject: [PATCH 11/60] Prepare to merge

---
 adflow/pyADflow.py | 657 +++++++++++++++++++++------------------------
 1 file changed, 299 insertions(+), 358 deletions(-)

diff --git a/adflow/pyADflow.py b/adflow/pyADflow.py
index 908724c76..5d28a6e01 100644
--- a/adflow/pyADflow.py
+++ b/adflow/pyADflow.py
@@ -24,41 +24,18 @@
 import os
 import time
 import copy
+import types
 import numpy
 import sys
 from mpi4py import MPI
 from petsc4py import PETSc
 from baseclasses import AeroSolver, AeroProblem, getPy3SafeString
+from baseclasses.utils import Error
 from . import MExt
 from pprint import pprint as pp
 import hashlib
-try:
-    from collections import OrderedDict
-except ImportError:
-    try:
-        from ordereddict import OrderedDict
-    except ImportError:
-        print('Could not find any OrderedDict class. For 2.6 and earlier, \
-use:\n pip install ordereddict')
-
-class Error(Exception):
-    """
-    Format the error message in a box to make it clear this
-    was a expliclty raised exception.
-    """
-    def __init__(self, message):
-        msg = '\n+'+'-'*78+'+'+'\n' + '| pyADFLOW Error: '
-        i = 17
-        for word in message.split():
-            if len(word) + i + 1 > 78: # Finish line and start new one
-                msg += ' '*(78-i)+'|\n| ' + word + ' '
-                i = 1 + len(word)+1
-            else:
-                msg += word + ' '
-                i += len(word)+1
-        msg += ' '*(78-i) + '|\n' + '+'+'-'*78+'+'+'\n'
-        print(msg)
-        Exception.__init__(self)
+from collections import OrderedDict
+
 
 class ADFLOWWarning(object):
     """
@@ -90,7 +67,7 @@ class ADFLOW(AeroSolver):
         The communicator on which to create ADflow. If not given, defaults
         to MPI.COMM_WORLD.
     options : dictionary
-        The list of options to use with ADflow. This keyword arguement
+        The list of options to use with ADflow. This keyword argument
         is NOT OPTIONAL. It must always be provided. It must contain, at least
         the 'gridFile' entry for the filename of the grid to load
     debug : bool
@@ -98,7 +75,7 @@ class ADFLOW(AeroSolver):
         debugger. The MExt module deletes the copied .so file when not
         required which causes issues debugging.
     dtype : str
-        String type for float: 'd' or 'D'. Not needed to be uset by user.
+        String type for float: 'd' or 'D'. Not needed to be used by user.
         """
     def __init__(self, comm=None, options=None, debug=False, dtype='d'):
 
@@ -120,11 +97,11 @@ def __init__(self, comm=None, options=None, debug=False, dtype='d'):
         informs = {}
 
         # Load all the option/objective/DV information:
-        defOpts = self._getDefOptions()
+        defaultOptions = self._getDefaultOptions()
         self.optionMap, self.moduleMap = self._getOptionMap()
-        self.ignoreOptions, self.deprecatedOptions, self.specialOptions = \
+        self.pythonOptions, deprecatedOptions, self.specialOptions = \
                            self._getSpecialOptionLists()
-        self.imOptions = self._getImmutableOptions()
+        immutableOptions = self._getImmutableOptions()
 
         self.possibleAeroDVs, self.possibleBCDvs, self.basicCostFunctions = (
             self._getObjectivesAndDVs())
@@ -134,7 +111,7 @@ def __init__(self, comm=None, options=None, debug=False, dtype='d'):
         for key in self.basicCostFunctions:
             self.adflowCostFunctions[key] = [None, key]
 
-        # Separate list of the suplied supplied functions
+        # Separate list of the supplied supplied functions
         self.adflowUserCostFunctions = OrderedDict()
 
         # This is the real solver so dtype is 'd'
@@ -144,15 +121,13 @@ def __init__(self, comm=None, options=None, debug=False, dtype='d'):
         if comm is None:
             comm = MPI.COMM_WORLD
 
-        self.comm = comm
-        self.adflow.communication.adflow_comm_world = self.comm.py2f()
+        self.adflow.communication.adflow_comm_world = comm.py2f()
         self.adflow.communication.adflow_comm_self = MPI.COMM_SELF.py2f()
-        self.adflow.communication.sendrequests = numpy.zeros(self.comm.size)
-        self.adflow.communication.recvrequests = numpy.zeros(self.comm.size)
-        self.myid = self.adflow.communication.myid = self.comm.rank
-        self.adflow.communication.nproc = self.comm.size
+        self.adflow.communication.sendrequests = numpy.zeros(comm.size)
+        self.adflow.communication.recvrequests = numpy.zeros(comm.size)
+        self.myid = self.adflow.communication.myid = comm.rank
+        self.adflow.communication.nproc = comm.size
 
-        # Initialize the inherited aerosolver.
         if options is None:
             raise Error("The 'options' keyword argument must be passed "
                         "adflow. The options dictionary must contain (at least) "
@@ -163,14 +138,23 @@ def __init__(self, comm=None, options=None, debug=False, dtype='d'):
 
         defSetupTime = time.time()
 
-        AeroSolver.__init__(self, name, category, defOpts, informs,
-                            options=options)
+        # Initialize the inherited AeroSolver
+        super().__init__(
+            name,
+            category,
+            defaultOptions=defaultOptions,
+            options=options,
+            immutableOptions=immutableOptions,
+            deprecatedOptions=deprecatedOptions,
+            comm=comm,
+            informs=informs,
+        )
 
         baseClassTime = time.time()
         # Update turbresscale depending on the turbulence model specified
         self._updateTurbResScale()
 
-        # Initialize petec in case the user has not already
+        # Initialize PETSc in case the user has not already
         self.adflow.adjointutils.initializepetsc()
 
         # Set the stand-alone adflow flag to false...this changes how
@@ -181,7 +165,7 @@ def __init__(self, comm=None, options=None, debug=False, dtype='d'):
         # terminate calls are handled
         self.adflow.killsignals.frompython = True
 
-        # Dictionary of design varibales and their index
+        # Dictionary of design variables and their index
         self.aeroDVs = []
 
         # Default counters
@@ -208,7 +192,7 @@ def __init__(self, comm=None, options=None, debug=False, dtype='d'):
         self.adflow.utils.writeintromessage()
 
         # Remind the user of all the adflow options:
-        self.printCurrentOptions()
+        self.printOptions()
 
         # Do the remainder of the operations that would have been done
         # had we read in a param file
@@ -533,7 +517,7 @@ def addSlices(self, direction, positions, sliceType='relative',
         sliceType : str {'relative', 'absolute'}
             Relative slices are 'sliced' at the beginning and then parametricly
             move as the geometry deforms. As a result, the slice through the
-            geometry may not remain planar. An abolute slice is re-sliced for
+            geometry may not remain planar. An absolute slice is re-sliced for
             every out put so is always exactly planar and always at the initial
             position the user indicated.
         groupName : str
@@ -612,7 +596,7 @@ def addIntegrationSurface(self, fileName, familyName, isInflow=True):
         # Check that the family name is not already defined:
         if familyName.lower() in self.families:
             raise Error("Cannot add integration surface with family name '%s'"
-                        "becuase the name it already exists."%familyName)
+                        "because the name it already exists."%familyName)
 
         # Need to add an additional family so first figure out what
         # the max family index is:
@@ -635,8 +619,8 @@ def addActuatorRegion(self, fileName, axis1, axis2, familyName,
                           thrust=0.0, torque=0.0, relaxStart=None,
                           relaxEnd=None):
         """Add an actuator disk zone defined by the (closed) supplied
-        in the plot3d file "fileName". Axis1 and Axis2 defines the
-        physical extent of the region overwhich to apply the ramp
+        in the plot3d file 'fileName'. 'axis1' and 'axis2' defines the
+        physical extent of the region over which to apply the ramp
         factor.
 
         Parameters
@@ -680,7 +664,7 @@ def addActuatorRegion(self, fileName, axis1, axis2, familyName,
         # else.
         if familyName.lower() in self.families:
             raise Error("Cannot add ActuatorDiskRegion with family name '%s'"
-                        "becuase the name it already exists."%familyName)
+                        "because the name it already exists."%familyName)
 
         # Need to add an additional family so first figure out what
         # the max family index is:
@@ -908,7 +892,7 @@ def __call__(self, aeroProblem, **kwargs):
             not supported.
         writeSolution : bool
             Flag to override any solution writing parameters. This
-            is used in a multidisciplinary enviornment when the outer
+            is used in a multidisciplinary environment when the outer
             solver can suppress all I/O during intermediate solves.
             """
 
@@ -950,7 +934,7 @@ def __call__(self, aeroProblem, **kwargs):
         self.curAP.adflowData.callCounter += 1
 
         # --------------------------------------------------------------
-        # Setup interation arrays ---- don't touch this unless you
+        # Setup iteration arrays ---- don't touch this unless you
         # REALLY REALLY know what you're doing!
 
         # iterTot is non-zero which means we already did a solution,
@@ -1152,7 +1136,7 @@ def evalFunctions(self, aeroProblem, funcs, evalFuncs=None,
         """
         Evaluate the desired functions given in iterable object,
         'evalFuncs' and add them to the dictionary 'funcs'. The keys
-        in the funcs dictionary will be have an _<ap.name> appended to
+        in the funcs dictionary will be have an ``<ap.name>_`` prepended to
         them.
 
         Parameters
@@ -1167,7 +1151,7 @@ def evalFunctions(self, aeroProblem, funcs, evalFuncs=None,
           If not None, use these functions to evaluate.
 
         ignoreMissing : bool
-            Flag to supress checking for a valid function. Please use
+            Flag to suppress checking for a valid function. Please use
             this option with caution.
 
         Examples
@@ -1176,8 +1160,8 @@ def evalFunctions(self, aeroProblem, funcs, evalFuncs=None,
         >>> CFDsolver(ap)
         >>> CFDsolver.evalFunctions(ap1, funcs, ['cl', 'cd'])
         >>> funcs
-        >>> # Result will look like (if aeroProblem, ap1, has name of 'c1'):
-        >>> # {'cl_c1':0.501, 'cd_c1':0.02750}
+        >>> # Result will look like (if aeroProblem, ap1, has name of 'wing'):
+        >>> # {'wing_cl':0.501, 'wing_cd':0.02750}
         """
 
         startEvalTime = time.time()
@@ -1287,9 +1271,9 @@ def _getFuncsBar(self, f):
     def evalFunctionsSens(self, aeroProblem, funcsSens, evalFuncs=None):
         """
         Evaluate the sensitivity of the desired functions given in
-        iterable object,'evalFuncs' and add them to the dictionary
-        'funcSens'. The keys in the funcs dictioary will be have an
-        _<ap.name> appended to them.
+        iterable object, 'evalFuncs' and add them to the dictionary
+        'funcSens'. The keys in the 'funcsSens' dictionary will be have an
+        ``<ap.name>_`` prepended to them.
 
         Parameters
         ----------
@@ -1300,7 +1284,7 @@ def evalFunctionsSens(self, aeroProblem, funcsSens, evalFuncs=None):
             The functions the user wants the derivatives of
 
         evalFuncs : iterable object containing strings
-            The additaion functions the user wants returned that are
+            The addition functions the user wants returned that are
             not already defined in the aeroProblem
 
         Examples
@@ -1476,7 +1460,7 @@ def solveCL(self, aeroProblem, CLStar, alpha0=None,
         CLStar : float
             The desired target CL
         alpha0 : angle (deg)
-            Initial guess for secant seach (deg). If None, use the
+            Initial guess for secant search (deg). If None, use the
             value in the aeroProblem
         delta : angle (deg)
             Initial step direction for secant search
@@ -1611,7 +1595,7 @@ def solveTrimCL(self, aeroProblem, trimFunc, trimDV, dvIndex,
         trim0 : float or None
             Starting trim value. If None, use what is in the DVGeo object
         da : float
-            Initial alpha step for jacobian
+            Initial alpha step for Jacobian
         deta : float
             Initial  stet in the 'eta' or trim dv function
         tol : float
@@ -1619,9 +1603,9 @@ def solveTrimCL(self, aeroProblem, trimFunc, trimDV, dvIndex,
         nIter : int
             Maximum number of iterations.
         Jac0 : 2x2 numpy array
-            Initial guess for the trim-cl jacobian. Usually obtained
+            Initial guess for the trim-cl Jacobian. Usually obtained
             from a previous analysis and saves two function
-            evaluations to produce the intial jacobian.
+            evaluations to produce the initial Jacobian.
         liftFunc : str
             Solution variable to use for lift. Usually 'cl' or a
             custom function created from cl.
@@ -1727,9 +1711,9 @@ def solveTargetFuncs(self, aeroProblem, funcDict, tol=1e-4, nIter=10, Jac0=None)
         nIter : int
             Maximum number of iterations.
         Jac0 : nxn numpy array
-            Initial guess for the func-dv jacobian. Usually obtained
+            Initial guess for the func-dv Jacobian. Usually obtained
             from a previous analysis and saves n function
-            evaluations to produce the intial jacobian.
+            evaluations to produce the initial Jacobian.
         """
 
         self.setAeroProblem(aeroProblem)
@@ -1855,7 +1839,7 @@ def Func(Xn, targetVals):
     def solveSep(self, aeroProblem, sepStar, nIter=10, alpha0=None,
                  delta=0.1, tol=1e-3, expansionRatio=1.2, sepName=None):
         """This is a safe-guarded secant search method to determine
-        the alpha that yields a specified value of the sep
+        the alpha that yields a specified value of the separation
         sensor. Since this function is highly nonlinear we use a
         linear search to get the bounding range first.
 
@@ -1898,8 +1882,8 @@ def solveSep(self, aeroProblem, sepStar, nIter=10, alpha0=None,
         # Name of function to use
         funcName = '%s_%s'%(ap.name, sepName)
 
-        if not self.getOption('rkreset') and self.getOption('usenksolve'):
-            ADFLOWWarning("nRKReset option is not set. It is usually necessary "
+        if not self.getOption('rkreset') and self.getOption('usenksolver'):
+            ADFLOWWarning("RKReset option is not set. It is usually necessary "
                         "for solveSep() when NK solver is used.")
 
         # Solve first problem
@@ -2005,7 +1989,7 @@ def writeSolution(self, outputDir=None, baseName=None, number=None):
         """This is a generic shell function that potentially writes
         the various output files. The intent is that the user or
         calling program can call this file and ADflow write all the
-        files that the user has defined. It is recommneded that this
+        files that the user has defined. It is recommended that this
         function is used along with the associated logical flags in
         the options to determine the desired writing procedure
 
@@ -2108,7 +2092,7 @@ def writeMeshFile(self, fileName):
     def writeVolumeSolutionFile(self, fileName, writeGrid=True):
         """Write the current state of the volume flow solution to a CGNS
         file. This is a lower level routine; Normally one should call
-        writeSolution().
+        ``writeSolution()``.
 
         Parameters
         ----------
@@ -2118,7 +2102,7 @@ def writeVolumeSolutionFile(self, fileName, writeGrid=True):
             Flag specifying whether the grid should be included or if
             links should be used. Always writing the grid is
             recommended even in cases when it is not strictly necessary.
-            Note that if writeGrid == False the volume files do not contain
+            Note that if ``writeGrid = False`` the volume files do not contain
             any grid coordinates rendering the file useless if a separate
             grid file was written out and is linked to it.
         """
@@ -2296,7 +2280,7 @@ def writeForceFile(self, fileName, TS=0, groupName=None,
         # end if (root proc )
 
     def writeSurfaceSensitivity(self, fileName, func, groupName=None):
-        """Write a tecplot file of the surface sensitivty. It is up to the use
+        """Write a tecplot file of the surface sensitivity. It is up to the use
         to make sure the adjoint already computed before calling this
         function.
 
@@ -3143,10 +3127,10 @@ def getSurfaceConnectivity(self, groupName=None, includeZipper=True, includeCGNS
             cgnsBlockID = numpy.zeros(ncell, dtype='intc')
 
         if includeCGNS:
-            # Conver to 0-based ordering becuase we are in python
+            # Convert to 0-based ordering becuase we are in python
             return conn-1, faceSizes, cgnsBlockID-1
         else:
-            # Conver to 0-based ordering becuase we are in python
+            # Convert to 0-based ordering becuase we are in python
             return conn-1, faceSizes
 
     def _expandGroupNames(self, groupNames):
@@ -3213,7 +3197,7 @@ def globalAdjointPreCon(self, inVec, outVec):
         return self.adflow.nksolver.applyadjointpc(inVec, outVec)
 
     def _addAeroDV(self, dv):
-        """Add a single desgin variable that ADflow knows about.
+        """Add a single design variable that ADflow knows about.
 
         Parameters
         ----------
@@ -3667,7 +3651,7 @@ def computeJacobianVectorProductFwd(self, xDvDot=None, xSDot=None, xVDot=None, w
         Returns
         -------
         dwdot, funcsdot, fDot : array, dict, array
-            One or more of the these are return depending on the \*Deriv flags
+            One or more of the these are return depending on the ``*Deriv`` flags
         """
 
         if xDvDot is None and xSDot is None and xVDot is None and wDot is None:
@@ -3816,11 +3800,11 @@ def computeJacobianVectorProductBwd(self, resBar=None, funcsBar=None, fBar=None,
         """This the main python gateway for producing reverse mode jacobian
         vector products. It is not generally called by the user by
         rather internally or from another solver. A mesh object must
-        be present for the xSDeriv=True flag and a mesh and DVGeo
-        object must be present for xDvDeriv=True flag. Note that more
+        be present for the ``xSDeriv=True`` flag and a mesh and DVGeo
+        object must be present for ``xDvDeriv=True`` flag. Note that more
         than one of the specified return flags may be spcified. If
         more than one return is specified, the order of return is :
-        (wDeriv, xVDeriv, XsDeriv, xDvDeriv, dXdvDerivAero).
+        ``(wDeriv, xVDeriv, XsDeriv, xDvDeriv, dXdvDerivAero)``.
 
         Parameters
         ----------
@@ -3849,7 +3833,7 @@ def computeJacobianVectorProductBwd(self, resBar=None, funcsBar=None, fBar=None,
         Returns
         -------
         wbar, xvbar, xsbar, xdvbar, xdvaerobar : array, array, array, dict, dict
-            One or more of these are returned depending on the \*Deriv flags provided.
+            One or more of these are returned depending on the ``*Deriv`` flags provided.
 
         """
         # Error Checking
@@ -4338,37 +4322,11 @@ def setOption(self, name, value):
         """
         Set Solver Option Value
         """
+        super().setOption(name, value)
         name = name.lower()
-        # Make sure we are not trying to change an immutable option if
-        # we are not allowed to.
-        if self.solverCreated and name in self.imOptions:
-            raise Error("Option '%-35s' cannot be modified after the solver "
-                        "is created."%name)
-
-        # Check to see if we have a deprecated option. Print a useful
-        # warning that this is deprecated.
-        if name in self.deprecatedOptions:
-            if self.comm.rank == 0:
-                ADFLOWWarning("Option '%-29s\' is a deprecated ADflow Option |"% name)
-            return
 
-        # Try the option in the option dictionary to make sure we are setting a valid option
-        if name not in self.defaultOptions:
-            if self.comm.rank == 0:
-                ADFLOWWarning("Option '%-30s' is not a valid ADflow Option |"%name)
-            return
-
-        # Now we know the option exists, lets check if the type is ok:
-        if isinstance(value, self.defaultOptions[name][0]):
-            self.options[name] = [type(value),value]
-        else:
-            raise Error("Datatype for Option %-35s was not valid \n "
-                        "Expected data type is %-47s \n "
-                        "Received data type is %-47s"% (
-                            name, self.defaultOptions[name][0], type(value)))
-
-        # If the option is in the ignoredOption list, we just return.
-        if name in self.ignoreOptions:
+        # If the option is only used in Python, we just return
+        if name in self.pythonOptions:
             return
 
         # Do special Options individually
@@ -4517,260 +4475,244 @@ def setOption(self, name, value):
         # Set in the correct module
         setattr(module, variable, value)
 
-    def getOption(self, name):
-        # Redefine the getOption def from the base class so we can
-        # make sure the name is lowercase
-
-        if name.lower() in self.defaultOptions:
-            return self.options[name.lower()][1]
-        else:
-            raise Error('%s is not a valid option name.'% name)
-
-    def _getDefOptions(self):
-        """
-        There are many options for ADflow. These technically belong in
-        the __init__ function but it gets far too long so we split
-        them out.
-        """
+    @staticmethod
+    def _getDefaultOptions():
         defOpts = {
             # Input file parameters
-            'gridfile':[str, 'default.cgns'],
-            'restartfile':[object, None],
+            'gridFile':[str, 'default.cgns'],
+            'restartFile':[(str, list, type(None)), None],
 
-            # Surface definition parameters:
-            'meshsurfacefamily':[object, None],
-            'designsurfacefamily':[object, None],
-            'closedsurfacefamilies':[object, None],
+            # Surface definition parameters
+            'meshSurfaceFamily':[(str, type(None)), None],
+            'designSurfaceFamily':[(str, type(None)), None],
+            'closedSurfaceFamilies':[(list, type(None)), None],
 
             # Output Parameters
-            'storerindlayer':[bool, True],
-            'outputdirectory':[str, './'],
-            'outputsurfacefamily':[str, 'allSurfaces'],
-            'writesurfacesolution':[bool,True],
-            'writevolumesolution':[bool,True],
-            'writetecplotsurfacesolution':[bool,False],
-            'nsavevolume':[int,1],
-            'nsavesurface':[int,1],
-            'solutionprecision':[str,'single'],
-            'gridprecision':[str,'double'],
-            'solutionprecisionsurface':[str,'single'],
-            'gridprecisionsurface':[str,'single'],
+            'storeRindLayer':[bool, True],
+            'outputDirectory':[str, './'],
+            'outputSurfaceFamily':[str, 'allSurfaces'],
+            'writeSurfaceSolution':[bool,True],
+            'writeVolumeSolution':[bool,True],
+            'writeTecplotSurfaceSolution':[bool,False],
+            'nSaveVolume':[int,1],
+            'nSaveSurface':[int,1],
+            'solutionPrecision':[str, ['single', 'double']],
+            'gridPrecision':[str, ['double', 'single']],
+            'solutionPrecisionSurface':[str, ['single', 'double']],
+            'gridPrecisionSurface':[str, ['single', 'double']],
             'isosurface':[dict, {}],
-            'isovariables':[list, []],
-            'viscoussurfacevelocities':[bool, True],
+            'isoVariables':[list, []],
+            'viscousSurfaceVelocities':[bool, True],
 
             # Physics Parameters
-            'discretization':[str, 'central plus scalar dissipation'],
-            'coarsediscretization':[str, 'central plus scalar dissipation'],
-            'limiter':[str, 'vanalbeda'],
-            'smoother':[str, 'runge kutta'],
-            'equationtype': [str, 'euler'],
-            'equationmode': [str, 'steady'],
-            'flowtype':[str, 'external'],
-            'turbulencemodel':[str, 'sa'],
-            'turbulenceorder':[str, 'first order'],
-            'turbresscale':[object, None],
-            'turbulenceproduction':[str, 'strain'],
-            'useqcr':[bool, False],
-            'userotationsa':[bool, False],
-            'useft2sa':[bool, True],
-            'kssa': [float, 0.0],
-            'eddyvisinfratio':[float, .009],
-            'usewallfunctions':[bool, False],
-            'useapproxwalldistance':[bool, True],
-            'eulerwalltreatment':[str, 'linear pressure extrapolation'],
-            'viscwalltreatment':[str, 'constant pressure extrapolation'],
-            'dissipationscalingexponent':[float, 0.67],
+            'discretization':[str, ['central plus scalar dissipation', 'central plus matrix dissipation', 'upwind']],
+            'coarseDiscretization':[str, ['central plus scalar dissipation', 'central plus matrix dissipation', 'upwind']],
+            'limiter':[str, ['van Albada', 'minmod', 'no limiter']],
+            'smoother':[str, ['DADI', 'Runge-Kutta']],
+            'equationType': [str, ['RANS', 'Euler', 'laminar NS']],
+            'equationMode': [str, ['steady', 'unsteady', 'time spectral']],
+            'flowType':[str, ['external', 'internal']],
+            'turbulenceModel':[str, ['SA', 'SA-Edwards', 'k-omega Wilcox', 'k-omega modified', 'k-tau', 'Menter SST', 'v2f']],
+            'turbulenceOrder':[str, ['first order', 'second order']],
+            'turbResScale':[(float, list, type(None)), None],
+            'turbulenceProduction':[str, ['strain', 'vorticity', 'Kato-Launder']],
+            'useQCR':[bool, False],
+            'useRotationSA':[bool, False],
+            'useft2SA':[bool, True],
+            'eddyVisInfRatio':[float, 0.009],
+            'useWallFunctions':[bool, False],
+            'useApproxWallDistance':[bool, True],
+            'eulerWallTreatment':[str, ['linear pressure extrapolation', 'constant pressure extrapolation', \
+                'quadratic pressure extrapolation', 'normal momentum']],
+            'viscWallTreatment':[str, ['constant pressure extrapolation', 'linear pressure extrapolation']],
+            'dissipationScalingExponent':[float, 0.67],
             'vis4':[float, 0.0156],
             'vis2':[float, 0.25],
-            'vis2coarse':[float, 0.5],
-            'restrictionrelaxation':[float, .80],
-            'liftindex':[int, 2],
-            'lowspeedpreconditioner':[bool, False],
-            'walldistcutoff':[float, 1e20],
-            'infchangecorrection':[bool, False],
-            'cavitationnumber':[float, 1.4],
+            'vis2Coarse':[float, 0.5],
+            'restrictionRelaxation':[float, 0.80],
+            'liftIndex':[int, [2, 3]],
+            'lowSpeedPreconditioner':[bool, False],
+            'wallDistCutoff':[float, 1e20],
+            'infChangeCorrection':[bool, True],
+            'cavitationNumber':[float, 1.4],
 
             # Common Parameters
-            'ncycles':[int, 500],
-            'timelimit':[float, -1.0],
-            'ncyclescoarse':[int, 500],
-            'nsubiterturb':[int, 1],
-            'nsubiter':[int, 1],
-            'cfl':[float, 1.7],
-            'cflcoarse':[float, 1.0],
-            'mgcycle':[str, '3w'],
-            'mgstartlevel':[int, -1],
-            'resaveraging':[str,'alternateresaveraging'],
-            'smoothparameter':[float, 1.5],
-            'cfllimit':[float, 1.5],
-            'useblockettes':[bool, True],
-            'uselinresmonitor':[bool, False],
-
-            # Overset Parameters:
-            'nearwalldist':[float, 0.1],
-            'backgroundvolscale':[float, 1.0],
-            'oversetprojtol':[float, 1e-12],
-            'overlapfactor':[float, 0.9],
-            'oversetloadbalance':[bool, True],
-            'debugzipper':[bool, False],
-            'zippersurfacefamily':[object, None],
-            'cutcallback':[object, None],
-            'oversetupdatemode':[str, 'frozen'],
-            'nrefine':[int,10],
-            'usezippermesh':[bool, True],
-            'useoversetwallscaling':[bool, False],
-            'selfzipcutoff':[float, 120.0],
-            'oversetpriority':[dict, {}],
+            'nCycles':[int, 2000],
+            'timeLimit':[float, -1.0],
+            'nCyclesCoarse':[int, 500],
+            'nSubiterTurb':[int, 3],
+            'nSubiter':[int, 1],
+            'CFL':[float, 1.7],
+            'CFLCoarse':[float, 1.0],
+            'MGCycle':[str, '3w'],
+            'MGStartLevel':[int, -1],
+            'resAveraging':[str, ['alternate', 'never', 'always']],
+            'smoothParameter':[float, 1.5],
+            'CFLLimit':[float, 1.5],
+            'useBlockettes':[bool, True],
+            'useLinResMonitor':[bool, False],
+
+            # Overset Parameters
+            'nearWallDist':[float, 0.1],
+            'backgroundVolScale':[float, 1.0],
+            'oversetProjTol':[float, 1e-12],
+            'overlapFactor':[float, 0.9],
+            'oversetLoadBalance':[bool, True],
+            'debugZipper':[bool, False],
+            'zipperSurfaceFamily':[(str, type(None)), None],
+            'cutCallback':[(types.FunctionType, type(None)), None],
+            'oversetUpdateMode':[str, ['frozen', 'fast', 'full']],
+            'nRefine':[int,10],
+            'useZipperMesh':[bool, True],
+            'useOversetWallScaling':[bool, False],
+            'selfZipCutoff':[float, 120.0],
+            'oversetPriority':[dict, {}],
 
             # Unsteady Parameters
-            'timeintegrationscheme':[str, 'bdf'],
-            'timeaccuracy':[int, 2],
-            'ntimestepscoarse':[int, 48],
-            'ntimestepsfine':[int, 400],
-            'deltat':[float, .010],
-            'useale':[bool, True],
-            'usegridmotion':[bool, False],
-            'coupledsolution':[bool, False],
+            'timeIntegrationScheme':[str, ['BDF', 'explicit RK', 'implicit RK']],
+            'timeAccuracy':[int, [2, 1, 3]],
+            'nTimeStepsCoarse':[int, 48],
+            'nTimeStepsFine':[int, 400],
+            'deltaT':[float, 0.010],
+            'useALE':[bool, True],
+            'useGridMotion':[bool, False],
+            'coupledSolution':[bool, False],
 
             # Time Spectral Parameters
-            'timeintervals': [int, 1],
-            'alphamode':[bool, False],
-            'betamode':[bool, False],
-            'machmode':[bool, False],
-            'pmode':[bool, False],
-            'qmode':[bool, False],
-            'rmode':[bool, False],
-            'altitudemode':[bool, False],
-            'windaxis':[bool, False],
-            'alphafollowing':[bool,True],
-            'tsstability': [bool, False],
+            'timeIntervals': [int, 1],
+            'alphaMode':[bool, False],
+            'betaMode':[bool, False],
+            'machMode':[bool, False],
+            'pMode':[bool, False],
+            'qMode':[bool, False],
+            'rMode':[bool, False],
+            'altitudeMode':[bool, False],
+            'windAxis':[bool, False],
+            'alphaFollowing':[bool,True],
+            'TSStability': [bool, False],
 
             # Convergence Parameters
-            'l2convergence':[float, 1e-6],
-            'l2convergencerel':[float, 1e-16],
-            'l2convergencecoarse':[float, 1e-2],
-            'maxl2deviationfactor':[float, 1.0],
+            'L2Convergence':[float, 1e-8],
+            'L2ConvergenceRel':[float, 1e-16],
+            'L2ConvergenceCoarse':[float, 1e-2],
+            'maxL2DeviationFactor':[float, 1.0],
 
             # Newton-Krylov Parameters
-            'usenksolver':[bool, False],
-            'nkswitchtol':[float, 2.5e-4],
-            'nksubspacesize':[int, 60],
-            'nklinearsolvetol':[float, 0.3],
-            'nkuseew':[bool, True],
-            'nkadpc':[bool, False],
-            'nkviscpc':[bool, False],
-            'nkasmoverlap':[int, 1],
-            'nkpcilufill':[int, 2],
-            'nkjacobianlag':[int, 20],
-            'applypcsubspacesize':[int, 10],
-            'nkinnerpreconits':[int, 1],
-            'nkouterpreconits':[int, 1],
-            'nkcfl0':[float, 100.0],
-            'nkls':[str, 'cubic'],
-            'nkfixedstep':[float, 0.25],
-            'rkreset':[bool, False],
-            'nrkreset':[int, 5],
+            'useNKSolver':[bool, False],
+            'NKSwitchTol':[float, 1e-5],
+            'NKSubspaceSize':[int, 60],
+            'NKLinearSolveTol':[float, 0.3],
+            'NKUseEW':[bool, True],
+            'NKADPC':[bool, False],
+            'NKViscPC':[bool, False],
+            'NKASMOverlap':[int, 1],
+            'NKPCILUFill':[int, 2],
+            'NKJacobianLag':[int, 20],
+            'applyPCSubspaceSize':[int, 10],
+            'NKInnerPreconIts':[int, 1],
+            'NKOuterPreconIts':[int, 1],
+            'NKLS':[str, ['cubic', 'none', 'non-monotone']],
+            'NKFixedStep':[float, 0.25],
+            'RKReset':[bool, False],
+            'nRKReset':[int, 5],
 
             # MG PC
-            'agmglevels':[int, 1],
-            'agmgnsmooth':[int, 3],
+            'AGMGLevels':[int, 1],
+            'AGMGNSmooth':[int, 3],
 
             # Approximate Newton-Krylov Parameters
-            'useanksolver':[bool, False],
-            'ankuseturbdadi':[bool, True],
-            'ankswitchtol':[float, 1.0],
-            'anksubspacesize':[int, -1],
-            'ankmaxiter':[int, 40],
-            'anklinearsolvetol':[float, 0.05],
-            'anklinresmax':[float, 0.9],
-            'ankasmoverlap':[int, 1],
-            'ankpcilufill':[int, 2],
-            'ankjacobianlag':[int, 10],
-            'ankinnerpreconits':[int, 1],
-            'ankouterpreconits':[int, 1],
-            'ankcfl0':[float, 5.0],
-            'ankcflmin':[float,1.0],
-            'ankcfllimit':[float, 1e5],
-            'ankcflfactor':[float, 10.0],
-            'ankcflexponent':[float, 0.5],
-            'ankcflcutback':[float,0.5],
-            'ankstepfactor':[float, 1.0],
-            'ankstepmin':[float, 0.01],
-            'ankconstcflstep':[float, 0.4],
-            'ankphysicallstol':[float, 0.2],
-            'ankphysicallstolturb':[float, 0.99],
-            'ankunsteadylstol':[float, 1.0],
-            'anksecondordswitchtol':[float, 1e-16],
-            'ankcoupledswitchtol':[float, 1e-16],
-            'ankturbcflscale' : [float, 1.0],
-            'ankusefullvisc' : [bool, True],
-            'ankpcupdatetol':[float,0.5],
-            'ankadpc':[bool, False],
-            'anknsubiterturb':[int,1],
-            'ankturbkspdebug':[bool,False],
-            'ankusematrixfree':[bool,True],
+            'useANKSolver':[bool, True],
+            'ANKUseTurbDADI':[bool, True],
+            'ANKSwitchTol':[float, 1.0],
+            'ANKSubspaceSize':[int, -1],
+            'ANKMaxIter':[int, 40],
+            'ANKLinearSolveTol':[float, 0.05],
+            'ANKLinResMax':[float, 0.1],
+            'ANKASMOverlap':[int, 1],
+            'ANKPCILUFill':[int, 2],
+            'ANKJacobianLag':[int, 10],
+            'ANKInnerPreconIts':[int, 1],
+            'ANKOuterPreconIts':[int, 1],
+            'ANKCFL0':[float, 5.0],
+            'ANKCFLMin':[float,1.0],
+            'ANKCFLLimit':[float, 1e5],
+            'ANKCFLFactor':[float, 10.0],
+            'ANKCFLExponent':[float, 0.5],
+            'ANKCFLCutback':[float,0.5],
+            'ANKStepFactor':[float, 1.0],
+            'ANKStepMin':[float, 0.01],
+            'ANKConstCFLStep':[float, 0.4],
+            'ANKPhysicalLSTol':[float, 0.2],
+            'ANKPhysicalLSTolTurb':[float, 0.99],
+            'ANKUnsteadyLSTol':[float, 1.0],
+            'ANKSecondOrdSwitchTol':[float, 1e-16],
+            'ANKCoupledSwitchTol':[float, 1e-16],
+            'ANKTurbCFLScale' : [float, 1.0],
+            'ANKUseFullVisc' : [bool, True],
+            'ANKPCUpdateTol':[float,0.5],
+            'ANKADPC':[bool, False],
+            'ANKNSubiterTurb':[int,1],
+            'ANKTurbKSPDebug':[bool,False],
+            'ANKUseMatrixFree':[bool,True],
 
             # Load Balance/partitioning parameters
-            'blocksplitting':[bool, True],
-            'loadimbalance':[float, 0.1],
-            'loadbalanceiter':[int, 10],
-            'partitiononly':[bool, False],
-            'partitionlikenproc':[int, -1],
+            'blockSplitting':[bool, True],
+            'loadImbalance':[float, 0.1],
+            'loadBalanceIter':[int, 10],
+            'partitionOnly':[bool, False],
+            'partitionLikeNProc':[int, -1],
 
             # Misc Parameters
-            'autosolveretry':[bool, False],
-            'autoadjointretry':[bool, False],
-            'numbersolutions':[bool, True],
-            'printiterations':[bool, True],
-            'printtiming':[bool, True],
-            'setmonitor':[bool, True],
-            'printwarnings':[bool, True],
-            'monitorvariables':[list, ['cpu','resrho', 'resturb', 'cl', 'cd']],
-            'surfacevariables':[list, ['cp','vx', 'vy','vz', 'mach']],
-            'volumevariables':[list, ['resrho']],
-
-            # Multidisciplinary Coupling Parameters:
-            'forcesastractions':[bool, True],
+            'numberSolutions':[bool, True],
+            'printIterations':[bool, True],
+            'printTiming':[bool, True],
+            'setMonitor':[bool, True],
+            'printWarnings':[bool, True],
+            'monitorVariables':[list, ['cpu','resrho', 'resturb', 'cl', 'cd']],
+            'surfaceVariables':[list, ['cp','vx', 'vy','vz', 'mach']],
+            'volumeVariables':[list, ['resrho']],
+
+            # Multidisciplinary Coupling Parameters
+            'forcesAsTractions':[bool, True],
 
             # Adjoint Parameters
-            'adjointl2convergence':[float, 1e-6],
-            'adjointl2convergencerel':[float, 1e-16],
-            'adjointl2convergenceabs':[float, 1e-16],
-            'adjointdivtol':[float, 1e5],
-            'approxpc': [bool, True],
-            'adpc': [bool, False],
-            'viscpc':[bool,False],
-            'usediagtspc':[bool, True],
-            'restartadjoint':[bool, True],
-            'adjointsolver': [str, 'gmres'],
-            'adjointmaxiter': [int, 500],
-            'adjointsubspacesize' : [int, 100],
-            'adjointmonitorstep': [int, 10],
-            'dissipationlumpingparameter':[float, 6.0],
-            'preconditionerside': [str, 'right'],
-            'matrixordering': [str, 'rcm'],
-            'globalpreconditioner': [str, 'additive schwartz'],
-            'localpreconditioner' : [str, 'ilu'],
-            'ilufill': [int, 2],
-            'asmoverlap' : [int, 1],
-            'innerpreconits':[int, 1],
-            'outerpreconits':[int, 3],
-            'applyadjointpcsubspacesize':[int, 20],
-            'frozenturbulence':[bool, False],
-            'usematrixfreedrdw':[bool, True],
-            'skipafterfailedadjoint':[bool,True],
+            'adjointL2Convergence':[float, 1e-6],
+            'adjointL2ConvergenceRel':[float, 1e-16],
+            'adjointL2ConvergenceAbs':[float, 1e-16],
+            'adjointDivTol':[float, 1e5],
+            'approxPC': [bool, True],
+            'ADPC': [bool, False],
+            'viscPC':[bool,False],
+            'useDiagTSPC':[bool, True],
+            'restartAdjoint':[bool, True],
+            'adjointSolver': [str, ['GMRES', 'TFQMR', 'Richardson', 'BCGS', 'IBCGS']],
+            'adjointMaxIter': [int, 500],
+            'adjointSubspaceSize' : [int, 100],
+            'adjointMonitorStep': [int, 10],
+            'dissipationLumpingParameter':[float, 6.0],
+            'preconditionerSide': [str, ['right', 'left']],
+            'matrixOrdering': [str, ['RCM', 'natural', 'nested dissection', 'one way dissection', 'quotient minimum degree']],
+            'globalPreconditioner': [str, ['additive Schwarz', 'multigrid']],
+            'localPreconditioner' : [str, ['ILU']],
+            'ILUFill': [int, 2],
+            'ASMOverlap' : [int, 1],
+            'innerPreconIts':[int, 1],
+            'outerPreconIts':[int, 3],
+            'applyAdjointPCSubspaceSize':[int, 20],
+            'frozenTurbulence':[bool, False],
+            'useMatrixFreedrdw':[bool, True],
+            'skipAfterFailedAdjoint':[bool,True],
 
             # ADjoint debugger
-            'firstrun':[bool, True],
-            'verifystate':[bool, True],
-            'verifyspatial':[bool, True],
-            'verifyextra':[bool, True],
+            'firstRun':[bool, True],
+            'verifyState':[bool, True],
+            'verifySpatial':[bool, True],
+            'verifyExtra':[bool, True],
 
             # Function parmeters
-            'sepsensoroffset':[float, 0.0],
-            'sepsensorsharpness':[float, 10.0],
-            'computecavitation':[bool,False],
+            'sepSensorOffset':[float, 0.0],
+            'sepSensorSharpness':[float, 10.0],
+            'computeCavitation':[bool,False],
             }
 
         return defOpts
@@ -4841,11 +4783,11 @@ def _getOptionMap(self):
                                     'central plus cusp dissipation': self.adflow.constants.disscusp,
                                     'upwind': self.adflow.constants.upwind,
                                     'location':['discr', 'spacediscrcoarse']},
-            'limiter':{'vanalbeda':self.adflow.constants.vanalbeda,
+            'limiter':{'van albada':self.adflow.constants.vanalbeda,
                        'minmod':self.adflow.constants.minmod,
-                       'nolimiter':self.adflow.constants.nolimiter,
+                       'no limiter':self.adflow.constants.nolimiter,
                        'location':['discr', 'limiter']},
-            'smoother':{'runge kutta':self.adflow.constants.rungekutta,
+            'smoother':{'runge-kutta':self.adflow.constants.rungekutta,
                         'lu sgs':self.adflow.constants.nllusgs,
                         'lu sgs line':self.adflow.constants.nllusgsline,
                         'dadi':self.adflow.constants.dadi,
@@ -4863,10 +4805,10 @@ def _getOptionMap(self):
                         'external':self.adflow.constants.externalflow,
                         'location':['physics', 'flowtype']},
             'turbulencemodel':{'sa':self.adflow.constants.spalartallmaras,
-                               'sae':self.adflow.constants.spalartallmarasedwards,
-                               'k omega wilcox':self.adflow.constants.komegawilcox,
-                               'k omega modified':self.adflow.constants.komegamodified,
-                               'ktau':self.adflow.constants.ktau,
+                               'sa-edwards':self.adflow.constants.spalartallmarasedwards,
+                               'k-omega wilcox':self.adflow.constants.komegawilcox,
+                               'k-omega modified':self.adflow.constants.komegamodified,
+                               'k-tau':self.adflow.constants.ktau,
                                'menter sst':self.adflow.constants.mentersst,
                                'v2f':self.adflow.constants.v2f,
                                'location':['physics', 'turbmodel']},
@@ -4876,12 +4818,11 @@ def _getOptionMap(self):
             'turbresscale':['iter', 'turbresscale'],
             'turbulenceproduction':{'strain':self.adflow.constants.strain,
                                     'vorticity':self.adflow.constants.vorticity,
-                                    'katolaunder':self.adflow.constants.katolaunder,
+                                    'kato-launder':self.adflow.constants.katolaunder,
                                     'location':['physics', 'turbprod']},
             'useqcr':['physics', 'useqcr'],
             'userotationsa':['physics', 'userotationsa'],
             'useft2sa':['physics', 'useft2sa'],
-            'kssa':['physics', 'kssa'],
             'eddyvisinfratio':['physics', 'eddyvisinfratio'],
             'usewallfunctions':['physics', 'wallfunctions'],
             'walldistcutoff':['physics', 'walldistcutoff'],
@@ -4913,9 +4854,9 @@ def _getOptionMap(self):
             'cflcoarse':['iter', 'cflcoarse'],
             'mgcycle':['iter', 'mgdescription'],
             'mgstartlevel':['iter', 'mgstartlevel'],
-            'resaveraging':{'noresaveraging':self.adflow.constants.noresaveraging,
-                            'alwaysresaveraging':self.adflow.constants.alwaysresaveraging,
-                            'alternateresaveraging':self.adflow.constants.alternateresaveraging,
+            'resaveraging':{'never':self.adflow.constants.noresaveraging,
+                            'always':self.adflow.constants.alwaysresaveraging,
+                            'alternate':self.adflow.constants.alternateresaveraging,
                             'location':['iter', 'resaveraging']},
             'smoothparameter':['iter', 'smoop'],
             'cfllimit':['iter', 'cfllimit'],
@@ -4939,8 +4880,8 @@ def _getOptionMap(self):
 
             # Unsteady Params
             'timeintegrationscheme':{'bdf':self.adflow.constants.bdf,
-                                     'explicitrk':self.adflow.constants.explicitrk,
-                                     'implicitrk':self.adflow.constants.implicitrk,
+                                     'explicit rk':self.adflow.constants.explicitrk,
+                                     'implicit rk':self.adflow.constants.implicitrk,
                                      'location':['unsteady', 'timeintegrationscheme']},
             'timeaccuracy':['unsteady', 'timeaccuracy'],
             'ntimestepscoarse':['unsteady', 'ntimestepscoarse'],
@@ -4984,10 +4925,9 @@ def _getOptionMap(self):
             'applypcsubspacesize':['nk', 'applypcsubspacesize'],
             'nkinnerpreconits':['nk', 'nk_innerpreconits'],
             'nkouterpreconits':['nk', 'nk_outerpreconits'],
-            'nkcfl0':['nk', 'nk_cfl0'],
             'nkls':{'none':self.adflow.constants.nolinesearch,
                     'cubic':self.adflow.constants.cubiclinesearch,
-                    'non monotone':self.adflow.constants.nonmonotonelinesearch,
+                    'non-monotone':self.adflow.constants.nonmonotonelinesearch,
                     'location':['nk', 'nk_ls']},
             'nkfixedstep':['nk', 'nk_fixedstep'],
             'rkreset':['iter', 'rkreset'],
@@ -5076,7 +5016,7 @@ def _getOptionMap(self):
                               'quotient minimum degree':'qmd',
                               'location':['adjoint', 'matrixordering']},
 
-            'globalpreconditioner':{'additive schwartz':'asm',
+            'globalpreconditioner':{'additive schwarz':'asm',
                                     'multigrid':'mg',
                                     'location':['adjoint', 'precondtype']},
             'localpreconditioner':{'ilu':'ilu',
@@ -5105,17 +5045,14 @@ def _getSpecialOptionLists(self):
         """
         Lists of special options
         """
-        # These "ignore_options" are NOT actually', ignored, rather,
-        # they DO NOT GET SET IN THE FORTRAN CODE. Rather, they are
-        # used strictly in Python
+        # pythonOptions do not get set in the Fortran code.
+        # They are used strictly in Python.
 
-        ignoreOptions = set(('numbersolutions',
+        pythonOptions = set(('numbersolutions',
                              'writesurfacesolution',
                              'writevolumesolution',
                              'writetecplotsurfacesolution',
                              'coupledsolution',
-                             'autosolveretry',
-                             'autoadjointretry',
                              'partitiononly',
                              'liftindex',
                              'meshsurfacefamily',
@@ -5128,10 +5065,14 @@ def _getSpecialOptionLists(self):
                              'skipafterfailedadjoint',
                          ))
 
-        # Deprecated options. These should not be used, but old
-        # scripts can continue to run
+        # Deprecated options that may be in old scripts and should not be used.
+
         deprecatedOptions = {'finitedifferencepc':'Use the ADPC option.',
                              'writesolution':'Use writeSurfaceSolution and writeVolumeSolution options instead.',
+                             'autosolveretry':'This feature is not implemented.',
+                             'autoadjointretry':'This feature is not implemented.',
+                             'nkcfl0':'The NK solver does not use a CFL value anymore. \
+                                       The CFL is set to infinity and the true Newton method is used.',
                              }
 
         specialOptions = set(('surfacevariables',
@@ -5145,7 +5086,7 @@ def _getSpecialOptionLists(self):
                               'oversetpriority',
                           ))
 
-        return ignoreOptions, deprecatedOptions, specialOptions
+        return pythonOptions, deprecatedOptions, specialOptions
 
     def _getObjectivesAndDVs(self):
         iDV = OrderedDict()
@@ -5268,9 +5209,9 @@ def _updateTurbResScale(self):
 
         if self.getOption("turbresscale") is None:
             turbModel = self.getOption("turbulencemodel")
-            if turbModel == "sa":
+            if turbModel == "SA":
                 self.setOption("turbresscale", 10000.0)
-            elif turbModel == "menter sst":
+            elif turbModel == "Menter SST":
                 self.setOption("turbresscale", [1e3, 1e-6])
             else:
                 raise Error("Turbulence model %-35s does not have default values specified for turbresscale. Specify turbresscale manually or update the python interface"%(turbModel))

From 74cf9927d1c5ff7ec3707ab458af9e712dbb64af Mon Sep 17 00:00:00 2001
From: DavidAnderegg <andv@zhaw.ch>
Date: Fri, 5 Feb 2021 10:29:27 +0100
Subject: [PATCH 12/60] complete merge

---
 adflow/pyADflow.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/adflow/pyADflow.py b/adflow/pyADflow.py
index 5d28a6e01..629a9f291 100644
--- a/adflow/pyADflow.py
+++ b/adflow/pyADflow.py
@@ -4519,6 +4519,7 @@ def _getDefaultOptions():
             'useQCR':[bool, False],
             'useRotationSA':[bool, False],
             'useft2SA':[bool, True],
+            'kssa': [float, 0.0],
             'eddyVisInfRatio':[float, 0.009],
             'useWallFunctions':[bool, False],
             'useApproxWallDistance':[bool, True],
@@ -4823,6 +4824,7 @@ def _getOptionMap(self):
             'useqcr':['physics', 'useqcr'],
             'userotationsa':['physics', 'userotationsa'],
             'useft2sa':['physics', 'useft2sa'],
+            'kssa':['physics', 'kssa'],
             'eddyvisinfratio':['physics', 'eddyvisinfratio'],
             'usewallfunctions':['physics', 'wallfunctions'],
             'walldistcutoff':['physics', 'walldistcutoff'],

From 9d5b0c9b469ac0fef30ddbaec4a5e137dbba84cc Mon Sep 17 00:00:00 2001
From: DavidAnderegg <andv@zhaw.ch>
Date: Wed, 10 Feb 2021 08:46:06 +0100
Subject: [PATCH 13/60] try to fix crash

---
 .../outputForward/turbbcroutines_d.f90        |    2 +
 .../outputReverse/turbbcroutines_b.f90        |    2 +
 .../outputReverseFast/adjointextra_fast_b.f90 |  585 ++++++++++
 .../outputReverseFast/bcdata_fast_b.f90       | 1019 +++++++++++++++++
 .../oversetutilities_fast_b.f90               |  174 +++
 .../turbbcroutines_fast_b.f90                 |    2 +
 .../zipperintegrations_fast_b.f90             |  331 ++++++
 src/turbulence/sa.F90                         |    2 +-
 src/turbulence/turbBCRoutines.F90             |    2 +
 9 files changed, 2118 insertions(+), 1 deletion(-)
 create mode 100644 src/adjoint/outputReverseFast/adjointextra_fast_b.f90
 create mode 100644 src/adjoint/outputReverseFast/bcdata_fast_b.f90
 create mode 100644 src/adjoint/outputReverseFast/oversetutilities_fast_b.f90
 create mode 100644 src/adjoint/outputReverseFast/zipperintegrations_fast_b.f90

diff --git a/src/adjoint/outputForward/turbbcroutines_d.f90 b/src/adjoint/outputForward/turbbcroutines_d.f90
index 3db1c46ad..25241e978 100644
--- a/src/adjoint/outputForward/turbbcroutines_d.f90
+++ b/src/adjoint/outputForward/turbbcroutines_d.f90
@@ -2269,6 +2269,8 @@ function safact(ks, d)
     real(kind=realtype) :: d
     if (ks .eq. zero) then
       safact = -one
+    else if (d .eq. zero) then
+      safact = one
     else
       safact = (ks-d/0.03)/(ks+d/0.03)
     end if
diff --git a/src/adjoint/outputReverse/turbbcroutines_b.f90 b/src/adjoint/outputReverse/turbbcroutines_b.f90
index f6726f0f8..93ac32df8 100644
--- a/src/adjoint/outputReverse/turbbcroutines_b.f90
+++ b/src/adjoint/outputReverse/turbbcroutines_b.f90
@@ -2645,6 +2645,8 @@ function safact(ks, d)
     real(kind=realtype) :: d
     if (ks .eq. zero) then
       safact = -one
+    else if (d .eq. zero) then
+      safact = one
     else
       safact = (ks-d/0.03)/(ks+d/0.03)
     end if
diff --git a/src/adjoint/outputReverseFast/adjointextra_fast_b.f90 b/src/adjoint/outputReverseFast/adjointextra_fast_b.f90
new file mode 100644
index 000000000..5cbb23259
--- /dev/null
+++ b/src/adjoint/outputReverseFast/adjointextra_fast_b.f90
@@ -0,0 +1,585 @@
+!        generated by tapenade     (inria, tropics team)
+!  tapenade 3.10 (r5363) -  9 sep 2014 09:53
+!
+module adjointextra_fast_b
+  implicit none
+
+contains
+  subroutine volume_block()
+! this is copy of metric.f90. it was necessary to copy this file
+! since there is debugging stuff in the original that is not
+! necessary for ad.
+    use constants
+    use blockpointers
+    use cgnsgrid
+    use communication
+    use inputtimespectral
+    implicit none
+!
+!      local parameter.
+!
+    real(kind=realtype), parameter :: thresvolume=1.e-2_realtype
+    real(kind=realtype), parameter :: halocellratio=1e-10_realtype
+!
+!      local variables.
+!
+    integer(kind=inttype) :: i, j, k, n, m, l, ii
+    integer(kind=inttype) :: mm
+    real(kind=realtype) :: fact, mult
+    real(kind=realtype) :: xp, yp, zp, vp1, vp2, vp3, vp4, vp5, vp6
+    real(kind=realtype) :: xxp, yyp, zzp
+    real(kind=realtype), dimension(3) :: v1, v2
+    intrinsic abs
+! compute the volumes. the hexahedron is split into 6 pyramids
+! whose volumes are computed. the volume is positive for a
+! right handed block.
+! initialize the volumes to zero. the reasons is that the second
+! level halo's must be initialized to zero and for convenience
+! all the volumes are set to zero.
+    vol = zero
+    do k=1,ke
+      n = k - 1
+      do j=1,je
+        m = j - 1
+        do i=1,ie
+          l = i - 1
+! compute the coordinates of the center of gravity.
+          xp = eighth*(x(i, j, k, 1)+x(i, m, k, 1)+x(i, m, n, 1)+x(i, j&
+&           , n, 1)+x(l, j, k, 1)+x(l, m, k, 1)+x(l, m, n, 1)+x(l, j, n&
+&           , 1))
+          yp = eighth*(x(i, j, k, 2)+x(i, m, k, 2)+x(i, m, n, 2)+x(i, j&
+&           , n, 2)+x(l, j, k, 2)+x(l, m, k, 2)+x(l, m, n, 2)+x(l, j, n&
+&           , 2))
+          zp = eighth*(x(i, j, k, 3)+x(i, m, k, 3)+x(i, m, n, 3)+x(i, j&
+&           , n, 3)+x(l, j, k, 3)+x(l, m, k, 3)+x(l, m, n, 3)+x(l, j, n&
+&           , 3))
+! compute the volumes of the 6 sub pyramids. the
+! arguments of volpym must be such that for a (regular)
+! right handed hexahedron all volumes are positive.
+          call volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), x(i, &
+&               j, n, 1), x(i, j, n, 2), x(i, j, n, 3), x(i, m, n, 1), x&
+&               (i, m, n, 2), x(i, m, n, 3), x(i, m, k, 1), x(i, m, k, 2&
+&               ), x(i, m, k, 3), vp1)
+          call volpym(x(l, j, k, 1), x(l, j, k, 2), x(l, j, k, 3), x(l, &
+&               m, k, 1), x(l, m, k, 2), x(l, m, k, 3), x(l, m, n, 1), x&
+&               (l, m, n, 2), x(l, m, n, 3), x(l, j, n, 1), x(l, j, n, 2&
+&               ), x(l, j, n, 3), vp2)
+          call volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), x(l, &
+&               j, k, 1), x(l, j, k, 2), x(l, j, k, 3), x(l, j, n, 1), x&
+&               (l, j, n, 2), x(l, j, n, 3), x(i, j, n, 1), x(i, j, n, 2&
+&               ), x(i, j, n, 3), vp3)
+          call volpym(x(i, m, k, 1), x(i, m, k, 2), x(i, m, k, 3), x(i, &
+&               m, n, 1), x(i, m, n, 2), x(i, m, n, 3), x(l, m, n, 1), x&
+&               (l, m, n, 2), x(l, m, n, 3), x(l, m, k, 1), x(l, m, k, 2&
+&               ), x(l, m, k, 3), vp4)
+          call volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), x(i, &
+&               m, k, 1), x(i, m, k, 2), x(i, m, k, 3), x(l, m, k, 1), x&
+&               (l, m, k, 2), x(l, m, k, 3), x(l, j, k, 1), x(l, j, k, 2&
+&               ), x(l, j, k, 3), vp5)
+          call volpym(x(i, j, n, 1), x(i, j, n, 2), x(i, j, n, 3), x(l, &
+&               j, n, 1), x(l, j, n, 2), x(l, j, n, 3), x(l, m, n, 1), x&
+&               (l, m, n, 2), x(l, m, n, 3), x(i, m, n, 1), x(i, m, n, 2&
+&               ), x(i, m, n, 3), vp6)
+! set the volume to 1/6 of the sum of the volumes of the
+! pyramid. remember that volpym computes 6 times the
+! volume.
+          vol(i, j, k) = sixth*(vp1+vp2+vp3+vp4+vp5+vp6)
+          if (vol(i, j, k) .ge. 0.) then
+            vol(i, j, k) = vol(i, j, k)
+          else
+            vol(i, j, k) = -vol(i, j, k)
+          end if
+        end do
+      end do
+    end do
+! some additional safety stuff for halo volumes.
+    do k=2,kl
+      do j=2,jl
+        if (vol(1, j, k)/vol(2, j, k) .lt. halocellratio) vol(1, j, k)&
+&          = vol(2, j, k)
+        if (vol(ie, j, k)/vol(il, j, k) .lt. halocellratio) vol(ie, j, k&
+&         ) = vol(il, j, k)
+      end do
+    end do
+    do k=2,kl
+      do i=1,ie
+        if (vol(i, 1, k)/vol(i, 2, k) .lt. halocellratio) vol(i, 1, k)&
+&          = vol(i, 2, k)
+        if (vol(i, je, k)/vol(i, jl, k) .lt. halocellratio) vol(i, je, k&
+&         ) = vol(i, jl, k)
+      end do
+    end do
+    do j=1,je
+      do i=1,ie
+        if (vol(i, j, 1)/vol(i, j, 2) .lt. halocellratio) vol(i, j, 1)&
+&          = vol(i, j, 2)
+        if (vol(i, j, ke)/vol(i, j, kl) .lt. halocellratio) vol(i, j, ke&
+&         ) = vol(i, j, kl)
+      end do
+    end do
+
+  contains
+    subroutine volpym(xa, ya, za, xb, yb, zb, xc, yc, zc, xd, yd, zd, &
+&     volume)
+!
+!         volpym computes 6 times the volume of a pyramid. node p,
+!         whose coordinates are set in the subroutine metric itself,
+!         is the top node and a-b-c-d is the quadrilateral surface.
+!         it is assumed that the cross product vca * vdb points in
+!         the direction of the top node. here vca is the diagonal
+!         running from node c to node a and vdb the diagonal from
+!         node d to node b.
+!
+      use precision
+      implicit none
+!
+!        function type.
+!
+      real(kind=realtype) :: volume
+!
+!        function arguments.
+!
+      real(kind=realtype), intent(in) :: xa, ya, za, xb, yb, zb
+      real(kind=realtype), intent(in) :: xc, yc, zc, xd, yd, zd
+      volume = (xp-fourth*(xa+xb+xc+xd))*((ya-yc)*(zb-zd)-(za-zc)*(yb-yd&
+&       )) + (yp-fourth*(ya+yb+yc+yd))*((za-zc)*(xb-xd)-(xa-xc)*(zb-zd))&
+&       + (zp-fourth*(za+zb+zc+zd))*((xa-xc)*(yb-yd)-(ya-yc)*(xb-xd))
+    end subroutine volpym
+  end subroutine volume_block
+  subroutine metric_block()
+    use constants
+    use blockpointers
+    implicit none
+! local variables.
+    integer(kind=inttype) :: i, j, k, n, m, l, ii
+    real(kind=realtype) :: fact
+    real(kind=realtype) :: xxp, yyp, zzp
+    real(kind=realtype), dimension(3) :: v1, v2
+    intrinsic mod
+! set the factor in the surface normals computation. for a
+! left handed block this factor is negative, such that the
+! normals still point in the direction of increasing index.
+! the formulae used later on assume a right handed block
+! and fact is used to correct this for a left handed block,
+! as well as the scaling factor of 0.5
+    if (righthanded) then
+      fact = half
+    else
+      fact = -half
+    end if
+!
+!  computation of the face normals in i-, j- and k-direction.
+!  formula's are valid for a right handed block; for a left
+!  handed block the correct orientation is obtained via fact.
+!  the normals point in the direction of increasing index.
+!  the absolute value of fact is 0.5, because the cross
+!  product of the two diagonals is twice the normal vector.
+!  note that also the normals of the first level halo cells
+!  are computed. these are needed for the viscous fluxes.
+!
+! projected areas of cell faces in the i direction.
+    do ii=0,ke*je*(ie+1)-1
+! 0:ie
+      i = mod(ii, ie + 1) + 0
+!1:je
+      j = mod(ii/(ie+1), je) + 1
+!1:ke
+      k = ii/((ie+1)*je) + 1
+      n = k - 1
+      m = j - 1
+! determine the two diagonal vectors of the face.
+      v1(1) = x(i, j, n, 1) - x(i, m, k, 1)
+      v1(2) = x(i, j, n, 2) - x(i, m, k, 2)
+      v1(3) = x(i, j, n, 3) - x(i, m, k, 3)
+      v2(1) = x(i, j, k, 1) - x(i, m, n, 1)
+      v2(2) = x(i, j, k, 2) - x(i, m, n, 2)
+      v2(3) = x(i, j, k, 3) - x(i, m, n, 3)
+! the face normal, which is the cross product of the two
+! diagonal vectors times fact; remember that fact is
+! either -0.5 or 0.5.
+      si(i, j, k, 1) = fact*(v1(2)*v2(3)-v1(3)*v2(2))
+      si(i, j, k, 2) = fact*(v1(3)*v2(1)-v1(1)*v2(3))
+      si(i, j, k, 3) = fact*(v1(1)*v2(2)-v1(2)*v2(1))
+    end do
+! projected areas of cell faces in the j direction
+    do ii=0,ke*(je+1)*ie-1
+! 1:ie
+      i = mod(ii, ie) + 1
+!0:je
+      j = mod(ii/ie, je + 1) + 0
+!1:ke
+      k = ii/(ie*(je+1)) + 1
+      n = k - 1
+      l = i - 1
+! determine the two diagonal vectors of the face.
+      v1(1) = x(i, j, n, 1) - x(l, j, k, 1)
+      v1(2) = x(i, j, n, 2) - x(l, j, k, 2)
+      v1(3) = x(i, j, n, 3) - x(l, j, k, 3)
+      v2(1) = x(l, j, n, 1) - x(i, j, k, 1)
+      v2(2) = x(l, j, n, 2) - x(i, j, k, 2)
+      v2(3) = x(l, j, n, 3) - x(i, j, k, 3)
+! the face normal, which is the cross product of the two
+! diagonal vectors times fact; remember that fact is
+! either -0.5 or 0.5.
+      sj(i, j, k, 1) = fact*(v1(2)*v2(3)-v1(3)*v2(2))
+      sj(i, j, k, 2) = fact*(v1(3)*v2(1)-v1(1)*v2(3))
+      sj(i, j, k, 3) = fact*(v1(1)*v2(2)-v1(2)*v2(1))
+    end do
+! projected areas of cell faces in the k direction.
+    do ii=0,(ke+1)*je*ie-1
+! 1:ie
+      i = mod(ii, ie) + 1
+!1:je
+      j = mod(ii/ie, je) + 1
+!0:ke
+      k = ii/(ie*je) + 0
+      m = j - 1
+      l = i - 1
+! determine the two diagonal vectors of the face.
+      v1(1) = x(i, j, k, 1) - x(l, m, k, 1)
+      v1(2) = x(i, j, k, 2) - x(l, m, k, 2)
+      v1(3) = x(i, j, k, 3) - x(l, m, k, 3)
+      v2(1) = x(l, j, k, 1) - x(i, m, k, 1)
+      v2(2) = x(l, j, k, 2) - x(i, m, k, 2)
+      v2(3) = x(l, j, k, 3) - x(i, m, k, 3)
+! the face normal, which is the cross product of the two
+! diagonal vectors times fact; remember that fact is
+! either -0.5 or 0.5.
+      sk(i, j, k, 1) = fact*(v1(2)*v2(3)-v1(3)*v2(2))
+      sk(i, j, k, 2) = fact*(v1(3)*v2(1)-v1(1)*v2(3))
+      sk(i, j, k, 3) = fact*(v1(1)*v2(2)-v1(2)*v2(1))
+    end do
+  end subroutine metric_block
+  subroutine boundarynormals()
+!  the unit normals on the boundary faces. these always point
+!  out of the domain, so a multiplication by -1 is needed for
+!  the imin, jmin and kmin boundaries.
+!
+    use constants
+    use blockpointers
+    use cgnsgrid
+    use communication
+    use inputtimespectral
+    implicit none
+! local variables.
+    integer(kind=inttype) :: i, j, ii
+    integer(kind=inttype) :: mm
+    real(kind=realtype) :: fact, mult
+    real(kind=realtype) :: xxp, yyp, zzp
+    intrinsic mod
+    intrinsic sqrt
+!loop over the boundary subfaces of this block.
+bocoloop:do mm=1,nbocos
+! loop over the boundary faces of the subface.
+      do ii=0,(bcdata(mm)%jcend-bcdata(mm)%jcbeg+1)*(bcdata(mm)%icend-&
+&         bcdata(mm)%icbeg+1)-1
+        i = mod(ii, bcdata(mm)%icend - bcdata(mm)%icbeg + 1) + bcdata(mm&
+&         )%icbeg
+        j = ii/(bcdata(mm)%icend-bcdata(mm)%icbeg+1) + bcdata(mm)%jcbeg
+        select case  (bcfaceid(mm)) 
+        case (imin) 
+          mult = -one
+          xxp = si(1, i, j, 1)
+          yyp = si(1, i, j, 2)
+          zzp = si(1, i, j, 3)
+        case (imax) 
+          mult = one
+          xxp = si(il, i, j, 1)
+          yyp = si(il, i, j, 2)
+          zzp = si(il, i, j, 3)
+        case (jmin) 
+          mult = -one
+          xxp = sj(i, 1, j, 1)
+          yyp = sj(i, 1, j, 2)
+          zzp = sj(i, 1, j, 3)
+        case (jmax) 
+          mult = one
+          xxp = sj(i, jl, j, 1)
+          yyp = sj(i, jl, j, 2)
+          zzp = sj(i, jl, j, 3)
+        case (kmin) 
+          mult = -one
+          xxp = sk(i, j, 1, 1)
+          yyp = sk(i, j, 1, 2)
+          zzp = sk(i, j, 1, 3)
+        case (kmax) 
+          mult = one
+          xxp = sk(i, j, kl, 1)
+          yyp = sk(i, j, kl, 2)
+          zzp = sk(i, j, kl, 3)
+        end select
+! compute the inverse of the length of the normal vector
+! and possibly correct for inward pointing.
+        fact = sqrt(xxp*xxp + yyp*yyp + zzp*zzp)
+        if (fact .gt. zero) fact = mult/fact
+! compute the unit normal.
+        bcdata(mm)%norm(i, j, 1) = fact*xxp
+        bcdata(mm)%norm(i, j, 2) = fact*yyp
+        bcdata(mm)%norm(i, j, 3) = fact*zzp
+      end do
+    end do bocoloop
+  end subroutine boundarynormals
+  subroutine xhalo_block()
+!
+!       xhalo determines the coordinates of the nodal halo's.
+!       first it sets all halo coordinates by simple extrapolation,
+!       then the symmetry planes are treated (also the unit normal of
+!       symmetry planes are determined) and finally an exchange is
+!       made for the internal halo's.
+!
+    use constants
+    use blockpointers
+    use communication
+    use inputtimespectral
+    implicit none
+!
+!      local variables.
+!
+    integer(kind=inttype) :: mm, i, j, k
+    integer(kind=inttype) :: ibeg, iend, jbeg, jend, iimax, jjmax
+    logical :: err
+    real(kind=realtype) :: length, dot
+    real(kind=realtype), dimension(3) :: v1, v2, norm
+    intrinsic sqrt
+! extrapolation in i-direction.
+    do k=1,kl
+      do j=1,jl
+        x(0, j, k, 1) = two*x(1, j, k, 1) - x(2, j, k, 1)
+        x(0, j, k, 2) = two*x(1, j, k, 2) - x(2, j, k, 2)
+        x(0, j, k, 3) = two*x(1, j, k, 3) - x(2, j, k, 3)
+        x(ie, j, k, 1) = two*x(il, j, k, 1) - x(nx, j, k, 1)
+        x(ie, j, k, 2) = two*x(il, j, k, 2) - x(nx, j, k, 2)
+        x(ie, j, k, 3) = two*x(il, j, k, 3) - x(nx, j, k, 3)
+      end do
+    end do
+! extrapolation in j-direction.
+    do k=1,kl
+      do i=0,ie
+        x(i, 0, k, 1) = two*x(i, 1, k, 1) - x(i, 2, k, 1)
+        x(i, 0, k, 2) = two*x(i, 1, k, 2) - x(i, 2, k, 2)
+        x(i, 0, k, 3) = two*x(i, 1, k, 3) - x(i, 2, k, 3)
+        x(i, je, k, 1) = two*x(i, jl, k, 1) - x(i, ny, k, 1)
+        x(i, je, k, 2) = two*x(i, jl, k, 2) - x(i, ny, k, 2)
+        x(i, je, k, 3) = two*x(i, jl, k, 3) - x(i, ny, k, 3)
+      end do
+    end do
+! extrapolation in k-direction.
+    do j=0,je
+      do i=0,ie
+        x(i, j, 0, 1) = two*x(i, j, 1, 1) - x(i, j, 2, 1)
+        x(i, j, 0, 2) = two*x(i, j, 1, 2) - x(i, j, 2, 2)
+        x(i, j, 0, 3) = two*x(i, j, 1, 3) - x(i, j, 2, 3)
+        x(i, j, ke, 1) = two*x(i, j, kl, 1) - x(i, j, nz, 1)
+        x(i, j, ke, 2) = two*x(i, j, kl, 2) - x(i, j, nz, 2)
+        x(i, j, ke, 3) = two*x(i, j, kl, 3) - x(i, j, nz, 3)
+      end do
+    end do
+!
+!           mirror the halo coordinates adjacent to the symmetry
+!           planes
+!
+! loop over boundary subfaces.
+loopbocos:do mm=1,nbocos
+! the actual correction of the coordinates only takes
+! place for symmetry planes.
+      if (bctype(mm) .eq. symm) then
+! set some variables, depending on the block face on
+! which the subface is located.
+        norm(1) = bcdata(mm)%symnorm(1)
+        norm(2) = bcdata(mm)%symnorm(2)
+        norm(3) = bcdata(mm)%symnorm(3)
+        length = sqrt(norm(1)**2 + norm(2)**2 + norm(3)**2)
+! compute the unit normal of the subface.
+        norm(1) = norm(1)/length
+        norm(2) = norm(2)/length
+        norm(3) = norm(3)/length
+! see xhalo_block for comments for below:
+        if (length .gt. eps) then
+          select case  (bcfaceid(mm)) 
+          case (imin) 
+            ibeg = jnbeg(mm)
+            iend = jnend(mm)
+            iimax = jl
+            jbeg = knbeg(mm)
+            jend = knend(mm)
+            jjmax = kl
+            if (ibeg .eq. 1) ibeg = 0
+            if (iend .eq. iimax) iend = iimax + 1
+            if (jbeg .eq. 1) jbeg = 0
+            if (jend .eq. jjmax) jend = jjmax + 1
+            do j=jbeg,jend
+              do i=ibeg,iend
+                v1(1) = x(1, i, j, 1) - x(2, i, j, 1)
+                v1(2) = x(1, i, j, 2) - x(2, i, j, 2)
+                v1(3) = x(1, i, j, 3) - x(2, i, j, 3)
+                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
+                x(0, i, j, 1) = x(2, i, j, 1) + dot*norm(1)
+                x(0, i, j, 2) = x(2, i, j, 2) + dot*norm(2)
+                x(0, i, j, 3) = x(2, i, j, 3) + dot*norm(3)
+              end do
+            end do
+          case (imax) 
+            ibeg = jnbeg(mm)
+            iend = jnend(mm)
+            iimax = jl
+            jbeg = knbeg(mm)
+            jend = knend(mm)
+            jjmax = kl
+            if (ibeg .eq. 1) ibeg = 0
+            if (iend .eq. iimax) iend = iimax + 1
+            if (jbeg .eq. 1) jbeg = 0
+            if (jend .eq. jjmax) jend = jjmax + 1
+            do j=jbeg,jend
+              do i=ibeg,iend
+                v1(1) = x(il, i, j, 1) - x(nx, i, j, 1)
+                v1(2) = x(il, i, j, 2) - x(nx, i, j, 2)
+                v1(3) = x(il, i, j, 3) - x(nx, i, j, 3)
+                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
+                x(ie, i, j, 1) = x(nx, i, j, 1) + dot*norm(1)
+                x(ie, i, j, 2) = x(nx, i, j, 2) + dot*norm(2)
+                x(ie, i, j, 3) = x(nx, i, j, 3) + dot*norm(3)
+              end do
+            end do
+          case (jmin) 
+            ibeg = inbeg(mm)
+            iend = inend(mm)
+            iimax = il
+            jbeg = knbeg(mm)
+            jend = knend(mm)
+            jjmax = kl
+            if (ibeg .eq. 1) ibeg = 0
+            if (iend .eq. iimax) iend = iimax + 1
+            if (jbeg .eq. 1) jbeg = 0
+            if (jend .eq. jjmax) jend = jjmax + 1
+            do j=jbeg,jend
+              do i=ibeg,iend
+                v1(1) = x(i, 1, j, 1) - x(i, 2, j, 1)
+                v1(2) = x(i, 1, j, 2) - x(i, 2, j, 2)
+                v1(3) = x(i, 1, j, 3) - x(i, 2, j, 3)
+                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
+                x(i, 0, j, 1) = x(i, 2, j, 1) + dot*norm(1)
+                x(i, 0, j, 2) = x(i, 2, j, 2) + dot*norm(2)
+                x(i, 0, j, 3) = x(i, 2, j, 3) + dot*norm(3)
+              end do
+            end do
+          case (jmax) 
+            ibeg = inbeg(mm)
+            iend = inend(mm)
+            iimax = il
+            jbeg = knbeg(mm)
+            jend = knend(mm)
+            jjmax = kl
+            if (ibeg .eq. 1) ibeg = 0
+            if (iend .eq. iimax) iend = iimax + 1
+            if (jbeg .eq. 1) jbeg = 0
+            if (jend .eq. jjmax) jend = jjmax + 1
+            do j=jbeg,jend
+              do i=ibeg,iend
+                v1(1) = x(i, jl, j, 1) - x(i, ny, j, 1)
+                v1(2) = x(i, jl, j, 2) - x(i, ny, j, 2)
+                v1(3) = x(i, jl, j, 3) - x(i, ny, j, 3)
+                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
+                x(i, je, j, 1) = x(i, ny, j, 1) + dot*norm(1)
+                x(i, je, j, 2) = x(i, ny, j, 2) + dot*norm(2)
+                x(i, je, j, 3) = x(i, ny, j, 3) + dot*norm(3)
+              end do
+            end do
+          case (kmin) 
+            ibeg = inbeg(mm)
+            iend = inend(mm)
+            iimax = il
+            jbeg = jnbeg(mm)
+            jend = jnend(mm)
+            jjmax = jl
+            if (ibeg .eq. 1) ibeg = 0
+            if (iend .eq. iimax) iend = iimax + 1
+            if (jbeg .eq. 1) jbeg = 0
+            if (jend .eq. jjmax) jend = jjmax + 1
+            do j=jbeg,jend
+              do i=ibeg,iend
+                v1(1) = x(i, j, 1, 1) - x(i, j, 2, 1)
+                v1(2) = x(i, j, 1, 2) - x(i, j, 2, 2)
+                v1(3) = x(i, j, 1, 3) - x(i, j, 2, 3)
+                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
+                x(i, j, 0, 1) = x(i, j, 2, 1) + dot*norm(1)
+                x(i, j, 0, 2) = x(i, j, 2, 2) + dot*norm(2)
+                x(i, j, 0, 3) = x(i, j, 2, 3) + dot*norm(3)
+              end do
+            end do
+          case (kmax) 
+            ibeg = inbeg(mm)
+            iend = inend(mm)
+            iimax = il
+            jbeg = jnbeg(mm)
+            jend = jnend(mm)
+            jjmax = jl
+            if (ibeg .eq. 1) ibeg = 0
+            if (iend .eq. iimax) iend = iimax + 1
+            if (jbeg .eq. 1) jbeg = 0
+            if (jend .eq. jjmax) jend = jjmax + 1
+            do j=jbeg,jend
+              do i=ibeg,iend
+                v1(1) = x(i, j, kl, 1) - x(i, j, nz, 1)
+                v1(2) = x(i, j, kl, 2) - x(i, j, nz, 2)
+                v1(3) = x(i, j, kl, 3) - x(i, j, nz, 3)
+                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
+                x(i, j, ke, 1) = x(i, j, nz, 1) + dot*norm(1)
+                x(i, j, ke, 2) = x(i, j, nz, 2) + dot*norm(2)
+                x(i, j, ke, 3) = x(i, j, nz, 3) + dot*norm(3)
+              end do
+            end do
+          end select
+        end if
+      end if
+    end do loopbocos
+  end subroutine xhalo_block
+  subroutine resscale()
+    use constants
+    use blockpointers, only : il, jl, kl, nx, ny, nz, volref, dw
+    use flowvarrefstate, only : nwf, nt1, nt2
+    use inputiteration, only : turbresscale
+    implicit none
+! local variables
+    integer(kind=inttype) :: i, j, k, ii, nturb
+    real(kind=realtype) :: ovol
+    intrinsic mod
+! divide through by the reference volume
+    nturb = nt2 - nt1 + 1
+    do ii=0,nx*ny*nz-1
+      i = mod(ii, nx) + 2
+      j = mod(ii/nx, ny) + 2
+      k = ii/(nx*ny) + 2
+      ovol = one/volref(i, j, k)
+      dw(i, j, k, 1:nwf) = dw(i, j, k, 1:nwf)*ovol
+      dw(i, j, k, nt1:nt2) = dw(i, j, k, nt1:nt2)*ovol*turbresscale(1:&
+&       nturb)
+    end do
+  end subroutine resscale
+  subroutine sumdwandfw()
+    use constants
+    use blockpointers, only : il, jl, kl, dw, fw, iblank
+    use flowvarrefstate, only : nwf
+    implicit none
+! local variables
+    integer(kind=inttype) :: i, j, k, l
+    intrinsic real
+    intrinsic max
+    real(kind=realtype) :: x1
+    real(kind=realtype) :: max1
+    do l=1,nwf
+      do k=2,kl
+        do j=2,jl
+          do i=2,il
+            x1 = real(iblank(i, j, k), realtype)
+            if (x1 .lt. zero) then
+              max1 = zero
+            else
+              max1 = x1
+            end if
+            dw(i, j, k, l) = (dw(i, j, k, l)+fw(i, j, k, l))*max1
+          end do
+        end do
+      end do
+    end do
+  end subroutine sumdwandfw
+end module adjointextra_fast_b
diff --git a/src/adjoint/outputReverseFast/bcdata_fast_b.f90 b/src/adjoint/outputReverseFast/bcdata_fast_b.f90
new file mode 100644
index 000000000..361b759e9
--- /dev/null
+++ b/src/adjoint/outputReverseFast/bcdata_fast_b.f90
@@ -0,0 +1,1019 @@
+!        generated by tapenade     (inria, tropics team)
+!  tapenade 3.10 (r5363) -  9 sep 2014 09:53
+!
+module bcdata_fast_b
+  use constants
+  use bcdatamod
+  implicit none
+
+contains
+! ---------------------------------------------------------------
+! routines that set the appropriate variable names for bcs with
+! bcdata.
+  subroutine setbcvarnamesisothermalwall()
+    use cgnsnames
+    use constants
+    implicit none
+    nbcvar = nbcvarisothermalwall
+    bcvarnames(1) = cgnstemp
+  end subroutine setbcvarnamesisothermalwall
+  subroutine setbcvarnamessubsonicinflow()
+    use constants
+    use cgnsnames
+    use inputphysics, only : equations
+    use flowvarrefstate, only : nwt
+    implicit none
+!
+!      local variables.
+!
+    logical :: varallowed
+    nbcvar = nbcvarsubsonicinflow
+    if (equations .eq. ransequations) nbcvar = nbcvar + nwt
+    bcvarnames(1) = cgnsptot
+    bcvarnames(2) = cgnsttot
+    bcvarnames(3) = cgnsrhotot
+    bcvarnames(4) = cgnsvelanglex
+    bcvarnames(5) = cgnsvelangley
+    bcvarnames(6) = cgnsvelanglez
+    bcvarnames(7) = cgnsvelvecx
+    bcvarnames(8) = cgnsvelvecy
+    bcvarnames(9) = cgnsvelvecz
+    bcvarnames(10) = cgnsvelvecr
+    bcvarnames(11) = cgnsvelvectheta
+    bcvarnames(12) = cgnsdensity
+    bcvarnames(13) = cgnsvelx
+    bcvarnames(14) = cgnsvely
+    bcvarnames(15) = cgnsvelz
+    bcvarnames(16) = cgnsvelr
+    bcvarnames(17) = cgnsveltheta
+    call setbcvarnamesturb(17_inttype)
+  end subroutine setbcvarnamessubsonicinflow
+  subroutine setbcvarnamessubsonicoutflow()
+    use cgnsnames
+    use constants
+    use flowvarrefstate, only : nwt
+    implicit none
+    nbcvar = nbcvarsubsonicoutflow
+    bcvarnames(1) = cgnspressure
+  end subroutine setbcvarnamessubsonicoutflow
+  subroutine setbcvarnamessupersonicinflow()
+    use constants
+    use cgnsnames
+    use inputphysics, only : equations
+    use flowvarrefstate, only : nwt
+    implicit none
+    nbcvar = nbcvarsupersonicinflow
+    if (equations .eq. ransequations) nbcvar = nbcvar + nwt
+    bcvarnames(1) = cgnsdensity
+    bcvarnames(2) = cgnspressure
+    bcvarnames(3) = cgnsvelx
+    bcvarnames(4) = cgnsvely
+    bcvarnames(5) = cgnsvelz
+    bcvarnames(6) = cgnsvelr
+    bcvarnames(7) = cgnsveltheta
+    call setbcvarnamesturb(7_inttype)
+  end subroutine setbcvarnamessupersonicinflow
+  subroutine setbcvarnamesturb(offset)
+!
+!       setbcvarnamesturb sets the names for the turbulence
+!       variables to be determined. this depends on the turbulence
+!       model. if not the rans equations are solved an immediate
+!       return is made.
+!
+    use constants
+    use cgnsnames
+    use inputphysics, only : equations, turbmodel
+    implicit none
+!
+!      subroutine arguments.
+!
+    integer(kind=inttype), intent(in) :: offset
+! return immediately if not the rans equations are solved.
+    if (equations .ne. ransequations) then
+      return
+    else
+! determine the turbulence model and set the names accordingly.
+      select case  (turbmodel) 
+      case (spalartallmaras, spalartallmarasedwards) 
+        bcvarnames(offset+1) = cgnsturbsanu
+      case (komegawilcox, komegamodified, mentersst) 
+        bcvarnames(offset+1) = cgnsturbk
+        bcvarnames(offset+2) = cgnsturbomega
+      case (ktau) 
+        bcvarnames(offset+1) = cgnsturbk
+        bcvarnames(offset+2) = cgnsturbtau
+      case (v2f) 
+        bcvarnames(offset+1) = cgnsturbk
+        bcvarnames(offset+2) = cgnsturbepsilon
+        bcvarnames(offset+3) = cgnsturbv2
+        bcvarnames(offset+4) = cgnsturbf
+      end select
+    end if
+  end subroutine setbcvarnamesturb
+! ---------------------------------------------------------------
+! --------------------------------------
+!                utilities
+! --------------------------------------
+  subroutine computehtot(tt, ht)
+!
+!       computehtot computes the total enthalpy from the given total
+!       temperature. the total enthalpy is the integral of cp, which
+!       is a very simple expression for constant cp. for a variable cp
+!       it is a bit more work.
+!
+    use constants
+    use cpcurvefits
+    use communication, only : myid
+    use inputphysics, only : cpmodel, gammaconstant, rgasdim
+    use flowvarrefstate, only : pinfdim
+    implicit none
+!
+!      subroutine arguments.
+!
+    real(kind=realtype), intent(in) :: tt
+    real(kind=realtype), intent(out) :: ht
+!
+!      local variables.
+!
+    integer(kind=inttype) :: ii, nn, mm, start
+    real(kind=realtype) :: t2
+!        ================================================================
+! determine the cp model used in the computation.
+    select case  (cpmodel) 
+    case (cpconstant) 
+! constant cp. the total enthalpy is simply cp*tt.
+      ht = gammaconstant*rgasdim*tt/(gammaconstant-one)
+    end select
+  end subroutine computehtot
+  subroutine unitvectorscylsystem(boco)
+!
+!       unitvectorscylsystem determines the unit vectors of the
+!       local coordinate systen of the boundary face defined by the
+!       data in bcdatamod. in that local system the axial direction
+!       is rotation axis.
+!
+    use constants
+    use blockpointers, only : bcfaceid, bcdata, x, si, sj, sk, il, jl,&
+&   kl, sectionid
+    use section, only : sections
+    implicit none
+!
+!      subroutine arguments.
+!
+    integer(kind=inttype), intent(in) :: boco
+!
+!      local variables.
+!
+    integer(kind=inttype) :: i, j
+    real(kind=realtype) :: factinlet, var
+    real(kind=realtype), dimension(3) :: dir
+    real(kind=realtype), dimension(:, :, :), pointer :: ss
+    intrinsic abs
+    intrinsic sqrt
+    real(kind=realtype) :: abs0
+! set the pointers for coordinates and normals of the block
+! face on which this subface is located. set factinlet
+! such that factinlet*normals points into the domain.
+    select case  (bcfaceid(boco)) 
+    case (imin) 
+      xf => x(1, :, :, :)
+      ss => si(1, :, :, :)
+      factinlet = one
+    case (imax) 
+      xf => x(il, :, :, :)
+      ss => si(il, :, :, :)
+      factinlet = -one
+    case (jmin) 
+      xf => x(:, 1, :, :)
+      ss => sj(:, 1, :, :)
+      factinlet = one
+    case (jmax) 
+      xf => x(:, jl, :, :)
+      ss => sj(:, jl, :, :)
+      factinlet = -one
+    case (kmin) 
+      xf => x(:, :, 1, :)
+      ss => sk(:, :, 1, :)
+      factinlet = one
+    case (kmax) 
+      xf => x(:, :, kl, :)
+      ss => sk(:, :, kl, :)
+      factinlet = -one
+    end select
+! loop over the physical range of the subface to store the sum of
+! the normals. note that jbeg, jend, ibeg, iend cannot be used
+! here, because they may include the halo faces. instead the
+! nodal range is used, which defines the original subface. the
+! offset of +1 in the start index is there because you need
+! the face id's.
+    dir(1) = zero
+    dir(2) = zero
+    dir(3) = zero
+    do j=bcdata(boco)%jnbeg+1,bcdata(boco)%jnend
+      do i=bcdata(boco)%inbeg+1,bcdata(boco)%inend
+        dir(1) = dir(1) + ss(i, j, 1)
+        dir(2) = dir(2) + ss(i, j, 2)
+        dir(3) = dir(3) + ss(i, j, 3)
+      end do
+    end do
+! multiply by factinlet to make sure that the normal
+! is inward pointing.
+    dir(1) = dir(1)*factinlet
+    dir(2) = dir(2)*factinlet
+    dir(3) = dir(3)*factinlet
+! determine three unit vectors, which define the local cartesian
+! coordinate system of the rotation axis. first the axial
+! direction. if the axis cannot be determined from rotation info,
+! it is assumed to be the x-axis.
+    axis = sections(sectionid)%rotaxis
+    var = axis(1)**2 + axis(2)**2 + axis(3)**2
+    if (var .lt. half) then
+! no rotation axis specified. assume the x-axis
+! and set the logical axassumed to .true.
+      axis(1) = one
+      axis(2) = zero
+      axis(3) = zero
+      axassumed = .true.
+    end if
+! the axial axis must be such that it points into the
+! computational domain. if the dot product with dir is
+! negative the direction of axis should be reversed.
+    var = axis(1)*dir(1) + axis(2)*dir(2) + axis(3)*dir(3)
+    if (var .lt. zero) then
+      axis(1) = -axis(1)
+      axis(2) = -axis(2)
+      axis(3) = -axis(3)
+    end if
+    if (axis(2) .ge. 0.) then
+      abs0 = axis(2)
+    else
+      abs0 = -axis(2)
+    end if
+! two unit vectors define the radial plane. these vectors are
+! defined up to a constants. just pick a direction for the second
+! and create a unit vector normal to axis.
+    if (abs0 .lt. 0.707107_realtype) then
+      radvec1(1) = zero
+      radvec1(2) = one
+      radvec1(3) = zero
+    else
+      radvec1(1) = zero
+      radvec1(2) = zero
+      radvec1(3) = one
+    end if
+    var = radvec1(1)*axis(1) + radvec1(2)*axis(2) + radvec1(3)*axis(3)
+    radvec1(1) = radvec1(1) - var*axis(1)
+    radvec1(2) = radvec1(2) - var*axis(2)
+    radvec1(3) = radvec1(3) - var*axis(3)
+    var = one/sqrt(radvec1(1)**2+radvec1(2)**2+radvec1(3)**2)
+    radvec1(1) = radvec1(1)*var
+    radvec1(2) = radvec1(2)*var
+    radvec1(3) = radvec1(3)*var
+! the second vector of the radial plane is obtained
+! by taking the cross product of axis and radvec1.
+    radvec2(1) = axis(2)*radvec1(3) - axis(3)*radvec1(2)
+    radvec2(2) = axis(3)*radvec1(1) - axis(1)*radvec1(3)
+    radvec2(3) = axis(1)*radvec1(2) - axis(2)*radvec1(1)
+  end subroutine unitvectorscylsystem
+! ---------------------------------------------------------------
+! routines that set the actual bcdata values from the cgns data set
+! information.
+! ---------------------------------------------------------------
+  subroutine bcdataisothermalwall(boco, bcvararray, ibeg, iend, jbeg, &
+&   jend)
+!
+!       bcdataisothermalwall tries to extract the wall temperature
+!       for the currently active boundary face, which is an isothermal
+!       viscous wall.
+!
+    use constants
+    use cgnsnames
+    use blockpointers, only : bcfaceid, bcdata, nbkglobal
+    use utils_fast_b, only : terminate, sitemperature
+    use flowvarrefstate, only : tref
+    implicit none
+!
+!      subroutine arguments.
+!
+    integer(kind=inttype) :: boco
+    integer(kind=inttype) :: ibeg, iend, jbeg, jend
+    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
+&   bcvararray
+!
+!      local variables.
+!
+    integer :: ierr
+    integer(kind=inttype) :: i, j
+    real(kind=realtype) :: mult, trans
+    character(len=maxstringlen) :: errormessage
+    intrinsic trim
+! write an error message and terminate if it was not
+! possible to determine the temperature.
+    if (.not.bcvarpresent(1)) then
+      write(errormessage, 100) trim(cgnsdoms(nbkglobal)%zonename), trim(&
+&     cgnsdoms(nbkglobal)%bocoinfo(cgnsboco)%boconame)
+      call terminate('bcdataisothermalwall', errormessage)
+    end if
+! convert to si-units and store the temperature in tns_wall.
+    call sitemperature(temp(1), mult, trans)
+    do j=jbeg,jend
+      do i=ibeg,iend
+        bcdata(boco)%tns_wall(i, j) = (mult*bcvararray(i, j, 1)+trans)/&
+&         tref
+      end do
+    end do
+ 100 format('zone ',a,', boundary subface ',a, &
+&          ': wall temperature not specified for isothermal wall')
+  end subroutine bcdataisothermalwall
+  subroutine bcdatasubsonicinflow(boco, bcvararray, ibeg, iend, jbeg, &
+&   jend, allturbpresent)
+!
+!       bcdatasubsonicinflow tries to extract the prescribed data
+!       for the currently active boundary face, which is a subsonic
+!       inflow. either total conditions and velocity direction or the
+!       velocity and density can be prescribed. in the latter case the
+!       mass flow is prescribed, which is okay as long as the flow is
+!       not choked.
+!
+    use constants
+    use cgnsnames
+    use blockpointers, only : nbkglobal, sectionid, bcfaceid, bcdata
+    use flowvarrefstate, only : tref, pref, href, rhoref, muref, nwt, &
+&   winf
+    use inputphysics, only : equations
+    use utils_fast_b, only : sidensity, sivelocity, sipressure, siangle, &
+&   sitemperature, terminate
+    implicit none
+!
+!      subroutine arguments.
+!
+    integer(kind=inttype), intent(in) :: boco
+    integer(kind=inttype) :: ibeg, iend, jbeg, jend
+    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
+&   bcvararray
+    logical, intent(inout) :: allturbpresent
+!
+!      local variables.
+!
+    integer :: ierr, nn
+    logical :: ptpresent, ttpresent, rhotpresent
+    logical :: axpresent, aypresent, azpresent
+    logical :: xdirpresent, ydirpresent, zdirpresent
+    logical :: rdirpresent, tdirpresent
+    logical :: velxpresent, velypresent, velzpresent
+    logical :: rhopresent, velrpresent, veltpresent
+    logical :: totpresent, velpresent, dirpresent
+    character(len=maxstringlen) :: errormessage
+    intrinsic trim
+! store the logicals, which indicate succes or failure
+! a bit more readable.
+    ptpresent = bcvarpresent(1)
+    ttpresent = bcvarpresent(2)
+    rhotpresent = bcvarpresent(3)
+    axpresent = bcvarpresent(4)
+    aypresent = bcvarpresent(5)
+    azpresent = bcvarpresent(6)
+    xdirpresent = bcvarpresent(7)
+    ydirpresent = bcvarpresent(8)
+    zdirpresent = bcvarpresent(9)
+    rdirpresent = bcvarpresent(10)
+    tdirpresent = bcvarpresent(11)
+    rhopresent = bcvarpresent(12)
+    velxpresent = bcvarpresent(13)
+    velypresent = bcvarpresent(14)
+    velzpresent = bcvarpresent(15)
+    velrpresent = bcvarpresent(16)
+    veltpresent = bcvarpresent(17)
+! check if the total conditions are present.
+    nn = 0
+    if (ptpresent) nn = nn + 1
+    if (ttpresent) nn = nn + 1
+    if (rhotpresent) nn = nn + 1
+    totpresent = .false.
+    if (nn .ge. 2) totpresent = .true.
+! check if a velocity direction is present.
+    dirpresent = .false.
+    if (xdirpresent .and. rdirpresent) dirpresent = .true.
+    if ((axpresent .or. xdirpresent) .and. (aypresent .or. ydirpresent) &
+&       .and. (azpresent .or. zdirpresent)) dirpresent = .true.
+! check if a velocity vector is present.
+    velpresent = .false.
+    if (velxpresent .and. velrpresent) velpresent = .true.
+    if (velxpresent .and. velypresent .and. velzpresent) velpresent = &
+&       .true.
+! determine the situation we have here.
+    if (totpresent .and. dirpresent) then
+! total conditions and velocity direction are prescribed.
+! determine the values for the faces of the subface.
+      call totalsubsonicinlet()
+    else
+! not enough data is prescribed. print an error message
+! and exit.
+      write(errormessage, 100) trim(cgnsdoms(nbkglobal)%zonename), trim(&
+&     cgnsdoms(nbkglobal)%bocoinfo(cgnsboco)%boconame)
+      call terminate('bcdatasubsonicinflow', errormessage)
+    end if
+! set the turbulence variables and check if all of them are
+! prescribed. if not set allturbpresent to .false.
+    allturbpresent = setbcvarturb(17_inttype, boco, bcvararray, ibeg, &
+&     iend, jbeg, jend, bcdata(boco)%turbinlet)
+ 100 format('zone ',a,', boundary subface ',a, &
+&          ': not enough data specified for subsonic inlet')
+
+  contains
+!=================================================================
+!===============================================================
+    subroutine totalsubsonicinlet()
+!
+!         totalsubsonicinlet converts the prescribed total
+!         conditions and velocity direction into a useable format.
+!
+      use constants
+      use communication, only : adflow_comm_world
+      use inputphysics, only : rgasdim
+      use section, only : sections
+      implicit none
+!
+!        local variables.
+!
+      integer(kind=inttype) :: i, j, nn
+      real(kind=realtype) :: rhot, mult, trans, hdim, tdim
+      real(kind=realtype) :: ax, r1, r2, var, wax, wrad, wtheta
+      real(kind=realtype), dimension(3) :: xc, dir
+      integer :: ierr
+      intrinsic max
+      intrinsic sqrt
+      intrinsic cos
+      real(kind=realtype) :: max2
+      real(kind=realtype) :: max1
+      real(kind=realtype) :: y1
+! set the subsonic inlet treatment to totalconditions.
+      bcdata(boco)%subsonicinlettreatment = totalconditions
+! if the total pressure is present, convert it to si-units and
+! store it.
+      if (ptpresent) then
+        call sipressure(mass(1), length(1), time(1), mult, trans)
+        do j=jbeg,jend
+          do i=ibeg,iend
+            bcdata(boco)%ptinlet(i, j) = (mult*bcvararray(i, j, 1)+trans&
+&             )/pref
+          end do
+        end do
+      end if
+! if the total temperature is present, convert it to si-units
+! and store it.
+      if (ttpresent) then
+        call sitemperature(temp(2), mult, trans)
+        do j=jbeg,jend
+          do i=ibeg,iend
+            bcdata(boco)%ttinlet(i, j) = (mult*bcvararray(i, j, 2)+trans&
+&             )/tref
+          end do
+        end do
+      end if
+! check if the total density is present. if so, it may be used
+! to determine the total temperature or pressure if one of these
+! variables was not specified.
+      if (rhotpresent) then
+        call sidensity(mass(3), length(3), mult, trans)
+        if (ptpresent .and. (.not.ttpresent)) then
+! total pressure is present but total temperature is not.
+! convert the total density to si-units and use the perfect
+! gas law to obtain the total temperature.
+          do j=jbeg,jend
+            do i=ibeg,iend
+              rhot = mult*bcvararray(i, j, 3) + trans
+              bcdata(boco)%ttinlet(i, j) = bcdata(boco)%ptinlet(i, j)*&
+&               pref/(rgasdim*rhot)/tref
+            end do
+          end do
+        else if (ttpresent .and. (.not.ptpresent)) then
+! total temperature is present but total pressure is not.
+! convert the total density to si-units and use the perfect
+! gas law to obtain the total pressure.
+          do j=jbeg,jend
+            do i=ibeg,iend
+              rhot = mult*bcvararray(i, j, 3) + trans
+              bcdata(boco)%ptinlet(i, j) = rgasdim*rhot*bcdata(boco)%&
+&               ttinlet(i, j)*tref/pref
+            end do
+          end do
+        end if
+      end if
+! determine the velocity direction. there are multiple
+! possibilities to specify this direction.
+      if (rdirpresent) then
+! radial direction specified, i.e. a cylindrical coordinate
+! system is used for the velocity direction.
+! determine the unit vectors, which define the cylindrical
+! coordinate system aligned with the rotation axis.
+        call unitvectorscylsystem(boco)
+! initialize wtheta to zero. this value will be used if no
+! theta velocity component was specified.
+        wtheta = zero
+! loop over the faces of the subface.
+        do j=jbeg,jend
+          do i=ibeg,iend
+! determine the coordinates of the face center relative to
+! the rotation point of this section. normally this is an
+! average of i-1, i, j-1, j, but due to the usage of the
+! pointer xf and the fact that x originally starts at 0,
+! an offset of 1 is introduced and thus the average should
+! be taken of i, i+1, j and j+1.
+            xc(1) = fourth*(xf(i, j, 1)+xf(i+1, j, 1)+xf(i, j+1, 1)+xf(i&
+&             +1, j+1, 1)) - sections(sectionid)%rotcenter(1)
+            xc(2) = fourth*(xf(i, j, 2)+xf(i+1, j, 2)+xf(i, j+1, 2)+xf(i&
+&             +1, j+1, 2)) - sections(sectionid)%rotcenter(2)
+            xc(3) = fourth*(xf(i, j, 3)+xf(i+1, j, 3)+xf(i, j+1, 3)+xf(i&
+&             +1, j+1, 3)) - sections(sectionid)%rotcenter(3)
+! determine the coordinates in the local cartesian frame,
+! i.e. the frame determined by axis, radvec1 and radvec2.
+            ax = xc(1)*axis(1) + xc(2)*axis(2) + xc(3)*axis(3)
+            r1 = xc(1)*radvec1(1) + xc(2)*radvec1(2) + xc(3)*radvec1(3)
+            r2 = xc(1)*radvec2(1) + xc(2)*radvec2(2) + xc(3)*radvec2(3)
+! determine the weights of the unit vectors in the local
+! cylindrical system.
+            wax = bcvararray(i, j, 7)
+            wrad = bcvararray(i, j, 10)
+            if (tdirpresent) wtheta = bcvararray(i, j, 11)
+            if (eps .lt. r1*r1 + r2*r2) then
+              max1 = r1*r1 + r2*r2
+            else
+              max1 = eps
+            end if
+! determine the direction in the local cartesian frame,
+! determined by axis, radvec1 and radvec2.
+            var = one/sqrt(max1)
+            dir(1) = wax
+            dir(2) = var*(wrad*r1-wtheta*r2)
+            dir(3) = var*(wrad*r2+wtheta*r1)
+! transform this direction to the global cartesian frame.
+            bcdata(boco)%flowxdirinlet(i, j) = dir(1)*axis(1) + dir(2)*&
+&             radvec1(1) + dir(3)*radvec2(1)
+            bcdata(boco)%flowydirinlet(i, j) = dir(1)*axis(2) + dir(2)*&
+&             radvec1(2) + dir(3)*radvec2(2)
+            bcdata(boco)%flowzdirinlet(i, j) = dir(1)*axis(3) + dir(2)*&
+&             radvec1(3) + dir(3)*radvec2(3)
+          end do
+        end do
+      else
+! cartesian direction specified. either the angle or the
+! direction should be present.
+! x-direction.
+        if (axpresent) then
+! angle specified. convert it to si-units and determine
+! the corresponding direction.
+          call siangle(angle(4), mult, trans)
+          do j=jbeg,jend
+            do i=ibeg,iend
+              bcdata(boco)%flowxdirinlet(i, j) = cos(mult*bcvararray(i, &
+&               j, 4) + trans)
+            end do
+          end do
+        else
+! direction specified. simply copy it.
+          do j=jbeg,jend
+            do i=ibeg,iend
+              bcdata(boco)%flowxdirinlet(i, j) = bcvararray(i, j, 7)
+            end do
+          end do
+        end if
+! y-direction.
+        if (aypresent) then
+! angle specified. convert it to si-units and determine
+! the corresponding direction.
+          call siangle(angle(5), mult, trans)
+          do j=jbeg,jend
+            do i=ibeg,iend
+              bcdata(boco)%flowydirinlet(i, j) = cos(mult*bcvararray(i, &
+&               j, 5) + trans)
+            end do
+          end do
+        else
+! direction specified. simply copy it.
+          do j=jbeg,jend
+            do i=ibeg,iend
+              bcdata(boco)%flowydirinlet(i, j) = bcvararray(i, j, 8)
+            end do
+          end do
+        end if
+! z-direction.
+        if (azpresent) then
+! angle specified. convert it to si-units and determine
+! the corresponding direction.
+          call siangle(angle(6), mult, trans)
+          do j=jbeg,jend
+            do i=ibeg,iend
+              bcdata(boco)%flowzdirinlet(i, j) = cos(mult*bcvararray(i, &
+&               j, 6) + trans)
+            end do
+          end do
+        else
+! direction specified. simply copy it.
+          do j=jbeg,jend
+            do i=ibeg,iend
+              bcdata(boco)%flowzdirinlet(i, j) = bcvararray(i, j, 9)
+            end do
+          end do
+        end if
+      end if
+! loop over the faces of the subface to compute some
+! additional info.
+      do j=jbeg,jend
+        do i=ibeg,iend
+! compute the total enthalpy from the given
+! total temperature.
+          tdim = bcdata(boco)%ttinlet(i, j)*tref
+          call computehtot(tdim, hdim)
+          bcdata(boco)%htinlet(i, j) = hdim/href
+! determine the unit vector of the flow direction.
+          dir(1) = bcdata(boco)%flowxdirinlet(i, j)
+          dir(2) = bcdata(boco)%flowydirinlet(i, j)
+          dir(3) = bcdata(boco)%flowzdirinlet(i, j)
+          y1 = sqrt(dir(1)**2 + dir(2)**2 + dir(3)**2)
+          if (eps .lt. y1) then
+            max2 = y1
+          else
+            max2 = eps
+          end if
+          var = one/max2
+          bcdata(boco)%flowxdirinlet(i, j) = var*dir(1)
+          bcdata(boco)%flowydirinlet(i, j) = var*dir(2)
+          bcdata(boco)%flowzdirinlet(i, j) = var*dir(3)
+        end do
+      end do
+! check if the prescribed direction is an inflow. no halo's
+! should be included here and therefore the nodal range
+! (with an offset) must be used.
+      nn = 0
+      do j=bcdata(boco)%jnbeg+1,bcdata(boco)%jnend
+        do i=bcdata(boco)%inbeg+1,bcdata(boco)%inend
+          var = bcdata(boco)%flowxdirinlet(i, j)*bcdata(boco)%norm(i, j&
+&           , 1) + bcdata(boco)%flowydirinlet(i, j)*bcdata(boco)%norm(i&
+&           , j, 2) + bcdata(boco)%flowzdirinlet(i, j)*bcdata(boco)%norm&
+&           (i, j, 3)
+          if (var .gt. zero) nn = nn + 1
+        end do
+      end do
+    end subroutine totalsubsonicinlet
+  end subroutine bcdatasubsonicinflow
+  subroutine bcdatasubsonicoutflow(boco, bcvararray, ibeg, iend, jbeg, &
+&   jend)
+!
+!       bcdatasubsonicoutflow tries to extract the static pressure
+!       for the currently active boundary face, which is a subsonic
+!       outflow boundary.
+!
+    use constants
+    use cgnsnames
+    use blockpointers, only : bcdata, nbkglobal, bcfaceid
+    use utils_fast_b, only : terminate, sipressure
+    use flowvarrefstate, only : pref
+    implicit none
+!
+!      subroutine arguments.
+!
+    integer(kind=inttype) :: boco
+    integer(kind=inttype) :: ibeg, iend, jbeg, jend
+    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
+&   bcvararray
+!
+!      local variables.
+!
+    integer :: ierr
+    integer(kind=inttype) :: i, j
+    real(kind=realtype) :: mult, trans
+    character(len=maxstringlen) :: errormessage
+    intrinsic trim
+! write an error message and terminate if it was not
+! possible to determine the static pressure.
+    if (.not.bcvarpresent(1)) then
+      write(errormessage, 100) trim(cgnsdoms(nbkglobal)%zonename), trim(&
+&     cgnsdoms(nbkglobal)%bocoinfo(cgnsboco)%boconame)
+      call terminate('bcdatasubsonicoutflow', errormessage)
+    end if
+! convert to si-units and store the pressure in ps.
+    call sipressure(mass(1), length(1), time(1), mult, trans)
+    do j=jbeg,jend
+      do i=ibeg,iend
+        bcdata(boco)%ps(i, j) = (mult*bcvararray(i, j, 1)+trans)/pref
+      end do
+    end do
+ 100 format('zone ',a,', boundary subface ',a, &
+&          ': static pressure not specified for subsonic outlet')
+  end subroutine bcdatasubsonicoutflow
+  subroutine bcdatasupersonicinflow(boco, bcvararray, ibeg, iend, jbeg, &
+&   jend, allflowpresent, allturbpresent)
+!
+!       bcdatasupersonicinflow tries to extract the primitive state
+!       vector for the currently active boundary face, which is a
+!       supersonic inflow.
+!
+    use constants
+    use cgnsnames
+    use blockpointers, only : bcdata, nbkglobal, bcfaceid, sectionid
+    use flowvarrefstate, only : nwt, pinfcorr, winf, uref, rhoref, &
+&   pref, muref
+    use inputphysics, only : equations, flowtype, veldirfreestream
+    use utils_fast_b, only : sidensity, sipressure, sivelocity, sitemperature&
+&   , terminate
+    implicit none
+!
+!      subroutine arguments.
+!
+    integer(kind=inttype), intent(in) :: boco
+    integer(kind=inttype) :: ibeg, iend, jbeg, jend
+    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
+&   bcvararray
+    logical, intent(inout) :: allflowpresent
+    logical, intent(inout) :: allturbpresent
+!
+!      local variables.
+!
+    integer :: ierr
+    integer(kind=inttype) :: i, j, nn
+    real(kind=realtype) :: var
+    character(len=maxstringlen) :: errormessage
+    logical :: rhopresent, ppresent, velpresent
+    logical :: velxpresent, velypresent, velzpresent
+    logical :: velrpresent, veltpresent
+    intrinsic trim
+! store the logicals, which indicate success or failure
+! a bit more readable.
+    rhopresent = bcvarpresent(1)
+    ppresent = bcvarpresent(2)
+    velxpresent = bcvarpresent(3)
+    velypresent = bcvarpresent(4)
+    velzpresent = bcvarpresent(5)
+    velrpresent = bcvarpresent(6)
+    veltpresent = bcvarpresent(7)
+! check if a velocity vector is present.
+    velpresent = .false.
+    if (velxpresent .and. velrpresent) velpresent = .true.
+    if (velxpresent .and. velypresent .and. velzpresent) velpresent = &
+&       .true.
+! check if rho, p and the velocity vector are present.
+    if (rhopresent .and. ppresent .and. velpresent) then
+! all the variables needed are prescribed. set them.
+      call prescribedsupersonicinlet()
+    else
+! not all variables are present. check what type of flow
+! is to be solved.
+      select case  (flowtype) 
+      case (internalflow) 
+! internal flow. data at the inlet must be specified;
+! no free stream data can be taken.
+        write(errormessage, 100) trim(cgnsdoms(nbkglobal)%zonename), &
+&       trim(cgnsdoms(nbkglobal)%bocoinfo(cgnsboco)%boconame)
+        call terminate('bcdatasupersonicinflow', errormessage)
+      case (externalflow) 
+!=============================================================
+! external flow. free stream data is used.
+        do j=jbeg,jend
+          do i=ibeg,iend
+            bcdata(boco)%rho(i, j) = winf(irho)
+            bcdata(boco)%velx(i, j) = winf(ivx)
+            bcdata(boco)%vely(i, j) = winf(ivy)
+            bcdata(boco)%velz(i, j) = winf(ivz)
+            bcdata(boco)%ps(i, j) = pinfcorr
+          end do
+        end do
+! set the turbulence values
+        allturbpresent = setbcvarturb(7_inttype, boco, bcvararray, ibeg&
+&         , iend, jbeg, jend, bcdata(boco)%turbinlet)
+! set allflowpresent to .false.
+        allflowpresent = .false.
+      end select
+    end if
+! check if the prescribed velocity is an inflow. no halo's
+! should be included here and therefore the nodal range
+! (with an offset) must be used.
+    nn = 0
+    do j=bcdata(boco)%jnbeg+1,bcdata(boco)%jnend
+      do i=bcdata(boco)%inbeg+1,bcdata(boco)%inend
+        var = bcdata(boco)%velx(i, j)*bcdata(boco)%norm(i, j, 1) + &
+&         bcdata(boco)%vely(i, j)*bcdata(boco)%norm(i, j, 2) + bcdata(&
+&         boco)%velz(i, j)*bcdata(boco)%norm(i, j, 3)
+        if (var .gt. zero) nn = nn + 1
+      end do
+    end do
+    if (nn .gt. 0) then
+      write(errormessage, 102) trim(cgnsdoms(nbkglobal)%zonename), trim(&
+&     cgnsdoms(nbkglobal)%bocoinfo(cgnsboco)%boconame)
+      call terminate('bcdatasupersonicinflow', errormessage)
+    end if
+ 100 format('zone ',a,', boundary subface ',a, &
+&          ': not enough data specified for supersonic inlet')
+ 102 format('zone ',a,', supersonic inlet boundary subface ',a, &
+&          ': velocity points out of the domain for some faces.')
+
+  contains
+    subroutine prescribedsupersonicinlet()
+!
+!         prescribedsupersonicinlet sets the variables for this
+!         supersonic inlet to prescribed values.
+!
+      use section, only : sections
+      implicit none
+!
+!        local variables.
+!
+      integer(kind=inttype) :: i, j
+      real(kind=realtype) :: mult, trans
+      real(kind=realtype) :: ax, r1, r2, var, vax, vrad, vtheta
+      real(kind=realtype), dimension(3) :: xc, vloc
+      real(kind=realtype), dimension(3) :: multvel, transvel
+      intrinsic max
+      intrinsic sqrt
+      real(kind=realtype) :: max1
+! set the density. take the conversion factor to si-units
+! into account.
+      call sidensity(mass(1), length(1), mult, trans)
+      do j=jbeg,jend
+        do i=ibeg,iend
+          bcdata(boco)%rho(i, j) = (mult*bcvararray(i, j, 1)+trans)/&
+&           rhoref
+        end do
+      end do
+! set the pressure. take the conversion factor to si-units
+! into account.
+      call sipressure(mass(1), length(2), time(2), mult, trans)
+      do j=jbeg,jend
+        do i=ibeg,iend
+          bcdata(boco)%ps(i, j) = (mult*bcvararray(i, j, 2)+trans)/pref
+        end do
+      end do
+! check the situation we are having here for the velocity.
+      if (velrpresent) then
+! radial velocity component prescribed. this must be converted
+! to cartesian components.
+! determine the unit vectors, which define the cylindrical
+! coordinate system aligned with the rotation axis.
+        call unitvectorscylsystem(boco)
+! determine the conversion factor to si-units for the three
+! components. note that a test must be made whether the theta
+! component is present.
+        call sivelocity(length(3), time(3), multvel(1), transvel(1))
+        call sivelocity(length(6), time(6), multvel(2), transvel(2))
+        if (veltpresent) call sivelocity(length(7), time(7), multvel(3)&
+&                                  , transvel(3))
+! initialize vtheta to zero. this value will be used
+! if no theta velocity component was specified.
+        vtheta = zero
+! loop over the faces of the subface.
+        do j=jbeg,jend
+          do i=ibeg,iend
+! determine the coordinates of the face center relative to
+! the rotation point of this section. normally this is an
+! average of i-1, i, j-1, j, but due to the usage of the
+! pointer xf and the fact that x originally starts at 0,
+! an offset of 1 is introduced and thus the average should
+! be taken of i, i+1, j and j+1.
+            xc(1) = fourth*(xf(i, j, 1)+xf(i+1, j, 1)+xf(i, j+1, 1)+xf(i&
+&             +1, j+1, 1)) - sections(sectionid)%rotcenter(1)
+            xc(2) = fourth*(xf(i, j, 2)+xf(i+1, j, 2)+xf(i, j+1, 2)+xf(i&
+&             +1, j+1, 2)) - sections(sectionid)%rotcenter(2)
+            xc(3) = fourth*(xf(i, j, 3)+xf(i+1, j, 3)+xf(i, j+1, 3)+xf(i&
+&             +1, j+1, 3)) - sections(sectionid)%rotcenter(3)
+! determine the coordinates in the local cartesian frame,
+! i.e. the frame determined by axis, radvec1 and radvec2.
+            ax = xc(1)*axis(1) + xc(2)*axis(2) + xc(3)*axis(3)
+            r1 = xc(1)*radvec1(1) + xc(2)*radvec1(2) + xc(3)*radvec1(3)
+            r2 = xc(1)*radvec2(1) + xc(2)*radvec2(2) + xc(3)*radvec2(3)
+! determine the velocity components in the local
+! cylindrical system. take the conversion to si units
+! into account.
+            vax = multvel(1)*bcvararray(i, j, 3) + transvel(1)
+            vrad = multvel(2)*bcvararray(i, j, 6) + transvel(2)
+            if (veltpresent) vtheta = multvel(3)*bcvararray(i, j, 7) + &
+&               transvel(3)
+            if (eps .lt. r1*r1 + r2*r2) then
+              max1 = r1*r1 + r2*r2
+            else
+              max1 = eps
+            end if
+! determine the velocities in the local cartesian
+! frame determined by axis, radvec1 and radvec2.
+            var = one/sqrt(max1)
+            vloc(1) = vax
+            vloc(2) = var*(vrad*r1-vtheta*r2)
+            vloc(3) = var*(vrad*r2+vtheta*r1)
+! transform vloc to the global cartesian frame and
+! store the values.
+            bcdata(boco)%velx(i, j) = (vloc(1)*axis(1)+vloc(2)*radvec1(1&
+&             )+vloc(3)*radvec2(1))/uref
+            bcdata(boco)%vely(i, j) = (vloc(1)*axis(2)+vloc(2)*radvec1(2&
+&             )+vloc(3)*radvec2(2))/uref
+            bcdata(boco)%velz(i, j) = (vloc(1)*axis(3)+vloc(2)*radvec1(3&
+&             )+vloc(3)*radvec2(3))/uref
+          end do
+        end do
+      else
+! cartesian components prescribed.
+! determine the conversion factor to si-units for the three
+! components.
+        call sivelocity(length(3), time(3), multvel(1), transvel(1))
+        call sivelocity(length(4), time(4), multvel(2), transvel(2))
+        call sivelocity(length(5), time(5), multvel(3), transvel(3))
+! set the velocities.
+        do j=jbeg,jend
+          do i=ibeg,iend
+            bcdata(boco)%velx(i, j) = (multvel(1)*bcvararray(i, j, 3)+&
+&             transvel(1))/uref
+            bcdata(boco)%vely(i, j) = (multvel(2)*bcvararray(i, j, 4)+&
+&             transvel(2))/uref
+            bcdata(boco)%velz(i, j) = (multvel(3)*bcvararray(i, j, 5)+&
+&             transvel(3))/uref
+          end do
+        end do
+      end if
+! set the turbulence variables and check if all of them are
+! prescribed. if not set allturbpresent to .false.
+      allturbpresent = setbcvarturb(7_inttype, boco, bcvararray, ibeg, &
+&       iend, jbeg, jend, bcdata(boco)%turbinlet)
+    end subroutine prescribedsupersonicinlet
+  end subroutine bcdatasupersonicinflow
+!=================================================================
+  logical function setbcvarturb(offset, boco, bcvararray, ibeg, iend, &
+&   jbeg, jend, turbinlet)
+!
+!       setbcvarturb sets the array for the turbulent halo data
+!       for inlet boundaries. this function returns .true. if all
+!       turbulence variables could be interpolated and .false.
+!       otherwise.
+!
+    use constants
+    use flowvarrefstate, only : nt1, nt2, muref, pref, rhoref, winf
+    use inputphysics, only : equations, turbmodel
+    use utils_fast_b, only : terminate, siturb
+    implicit none
+!
+!      subroutine arguments.
+!
+    integer(kind=inttype), intent(in) :: offset, boco, ibeg, iend, jbeg&
+&   , jend
+    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
+&   bcvararray
+    real(kind=realtype), dimension(:, :, :), pointer :: turbinlet
+!
+!      local variables.
+!
+    integer(kind=inttype) :: nn, mm, i, j
+    real(kind=realtype) :: mult, trans, nuref
+    real(kind=realtype), dimension(nt1:nt2) :: ref
+! initialize setbcvarturb to .true. and return immediately
+! if not the rans equations are solved.
+    setbcvarturb = .true.
+    if (equations .ne. ransequations) then
+      return
+    else
+! set the reference values depending on the turbulence model.
+      nuref = muref/rhoref
+      select case  (turbmodel) 
+      case (spalartallmaras, spalartallmarasedwards) 
+        ref(itu1) = nuref
+      case (komegawilcox, komegamodified, mentersst) 
+        ref(itu1) = pref/rhoref
+        ref(itu2) = ref(itu1)/nuref
+      case (ktau) 
+        ref(itu1) = pref/rhoref
+        ref(itu2) = nuref/ref(itu1)
+      case (v2f) 
+        ref(itu1) = pref/rhoref
+        ref(itu4) = ref(itu1)/nuref
+        ref(itu2) = ref(itu1)*ref(itu4)
+        ref(itu3) = ref(itu1)
+      end select
+! loop over the number of turbulent variables. mm is the counter
+! in the arrays bcvararray and bcvarpresent.
+      mm = offset
+turbloop:do nn=nt1,nt2
+        mm = mm + 1
+! check if the variable is present. if so, use the
+! interpolated data.
+        if (bcvarpresent(mm)) then
+! conversion to si units if possible.
+          call siturb(mass(mm), length(mm), time(mm), temp(mm), &
+&               bcvarnames(mm), mult, trans)
+! set the turbulent variables.
+          do j=jbeg,jend
+            do i=ibeg,iend
+              turbinlet(i, j, nn) = (mult*bcvararray(i, j, mm)+trans)/&
+&               ref(nn)
+            end do
+          end do
+        else
+! turbulent variable not present. use the free stream data.
+          do j=jbeg,jend
+            do i=ibeg,iend
+              turbinlet(i, j, nn) = winf(nn)
+            end do
+          end do
+! set the logical value to false to indicate that indeed not
+! all the values were present
+          setbcvarturb = .false.
+        end if
+      end do turbloop
+    end if
+  end function setbcvarturb
+end module bcdata_fast_b
diff --git a/src/adjoint/outputReverseFast/oversetutilities_fast_b.f90 b/src/adjoint/outputReverseFast/oversetutilities_fast_b.f90
new file mode 100644
index 000000000..da6a09bc7
--- /dev/null
+++ b/src/adjoint/outputReverseFast/oversetutilities_fast_b.f90
@@ -0,0 +1,174 @@
+!        generated by tapenade     (inria, tropics team)
+!  tapenade 3.10 (r5363) -  9 sep 2014 09:53
+!
+module oversetutilities_fast_b
+  implicit none
+
+contains
+! --------------------------------------------------
+!           tapenade routine below this point
+! --------------------------------------------------
+  subroutine fractoweights(frac, weights)
+    use constants
+    implicit none
+    real(kind=realtype), dimension(3), intent(in) :: frac
+    real(kind=realtype), dimension(8), intent(out) :: weights
+    weights(1) = (one-frac(1))*(one-frac(2))*(one-frac(3))
+    weights(2) = frac(1)*(one-frac(2))*(one-frac(3))
+    weights(3) = (one-frac(1))*frac(2)*(one-frac(3))
+    weights(4) = frac(1)*frac(2)*(one-frac(3))
+    weights(5) = (one-frac(1))*(one-frac(2))*frac(3)
+    weights(6) = frac(1)*(one-frac(2))*frac(3)
+    weights(7) = (one-frac(1))*frac(2)*frac(3)
+    weights(8) = frac(1)*frac(2)*frac(3)
+  end subroutine fractoweights
+  subroutine fractoweights2(frac, weights)
+    use constants
+    implicit none
+    real(kind=realtype), dimension(3), intent(in) :: frac
+    real(kind=realtype), dimension(8), intent(out) :: weights
+    weights(1) = (one-frac(1))*(one-frac(2))*(one-frac(3))
+    weights(2) = frac(1)*(one-frac(2))*(one-frac(3))
+    weights(3) = frac(1)*frac(2)*(one-frac(3))
+    weights(4) = (one-frac(1))*frac(2)*(one-frac(3))
+    weights(5) = (one-frac(1))*(one-frac(2))*frac(3)
+    weights(6) = frac(1)*(one-frac(2))*frac(3)
+    weights(7) = frac(1)*frac(2)*frac(3)
+    weights(8) = (one-frac(1))*frac(2)*frac(3)
+  end subroutine fractoweights2
+  subroutine newtonupdate(xcen, blk, frac0, frac)
+! this routine performs the newton update to recompute the new
+! "frac" (u,v,w) for the point xcen. the actual search is performed
+! on the the dual cell formed by the cell centers of the 3x3x3 block
+! of primal nodes. this routine is ad'd with tapenade in both
+! forward and reverse.
+    use constants
+    implicit none
+! input
+    real(kind=realtype), dimension(3), intent(in) :: xcen
+    real(kind=realtype), dimension(3, 3, 3, 3), intent(in) :: blk
+    real(kind=realtype), dimension(3), intent(in) :: frac0
+! output
+    real(kind=realtype), dimension(3), intent(out) :: frac
+! working
+    real(kind=realtype), dimension(3, 8) :: xn
+    real(kind=realtype) :: u, v, w, uv, uw, vw, wvu, du, dv, dw
+    real(kind=realtype) :: a11, a12, a13, a21, a22, a23, a31, a32, a33, &
+&   val
+    real(kind=realtype) :: f(3), x(3)
+    integer(kind=inttype), dimension(8), parameter :: indices=(/1, 2, 4&
+&     , 3, 5, 6, 8, 7/)
+    integer(kind=inttype) :: i, j, k, ii, ll
+    real(kind=realtype), parameter :: adteps=1.e-25_realtype
+    real(kind=realtype), parameter :: thresconv=1.e-10_realtype
+    intrinsic sign
+    intrinsic abs
+    intrinsic max
+    intrinsic sqrt
+    real(kind=realtype) :: x1
+    real(kind=realtype) :: max1
+! compute the cell center locations for the 8 nodes describing the
+! dual cell. note that this must be counter-clockwise ordering.
+    ii = 0
+    do k=1,2
+      do j=1,2
+        do i=1,2
+          ii = ii + 1
+          xn(:, indices(ii)) = eighth*(blk(i, j, k, :)+blk(i+1, j, k, :)&
+&           +blk(i, j+1, k, :)+blk(i+1, j+1, k, :)+blk(i, j, k+1, :)+blk&
+&           (i+1, j, k+1, :)+blk(i, j+1, k+1, :)+blk(i+1, j+1, k+1, :))
+        end do
+      end do
+    end do
+! compute the coordinates relative to node 1.
+    do i=2,8
+      xn(:, i) = xn(:, i) - xn(:, 1)
+    end do
+! compute the location of our seach point relative to the first node.
+    x = xcen - xn(:, 1)
+! modify the coordinates of node 3, 6, 8 and 7 such that
+! they correspond to the weights of the u*v, u*w, v*w and
+! u*v*w term in the transformation respectively.
+    xn(1, 7) = xn(1, 7) + xn(1, 2) + xn(1, 4) + xn(1, 5) - xn(1, 3) - xn&
+&     (1, 6) - xn(1, 8)
+    xn(2, 7) = xn(2, 7) + xn(2, 2) + xn(2, 4) + xn(2, 5) - xn(2, 3) - xn&
+&     (2, 6) - xn(2, 8)
+    xn(3, 7) = xn(3, 7) + xn(3, 2) + xn(3, 4) + xn(3, 5) - xn(3, 3) - xn&
+&     (3, 6) - xn(3, 8)
+    xn(1, 3) = xn(1, 3) - xn(1, 2) - xn(1, 4)
+    xn(2, 3) = xn(2, 3) - xn(2, 2) - xn(2, 4)
+    xn(3, 3) = xn(3, 3) - xn(3, 2) - xn(3, 4)
+    xn(1, 6) = xn(1, 6) - xn(1, 2) - xn(1, 5)
+    xn(2, 6) = xn(2, 6) - xn(2, 2) - xn(2, 5)
+    xn(3, 6) = xn(3, 6) - xn(3, 2) - xn(3, 5)
+    xn(1, 8) = xn(1, 8) - xn(1, 4) - xn(1, 5)
+    xn(2, 8) = xn(2, 8) - xn(2, 4) - xn(2, 5)
+    xn(3, 8) = xn(3, 8) - xn(3, 4) - xn(3, 5)
+! set the starting values of u, v and w based on our previous values
+    u = frac0(1)
+    v = frac0(2)
+    w = frac0(3)
+! the newton algorithm to determine the parametric
+! weights u, v and w for the given coordinate.
+newtonhexa:do ll=1,15
+! compute the rhs.
+      uv = u*v
+      uw = u*w
+      vw = v*w
+      wvu = u*v*w
+      f(1) = xn(1, 2)*u + xn(1, 4)*v + xn(1, 5)*w + xn(1, 3)*uv + xn(1, &
+&       6)*uw + xn(1, 8)*vw + xn(1, 7)*wvu - x(1)
+      f(2) = xn(2, 2)*u + xn(2, 4)*v + xn(2, 5)*w + xn(2, 3)*uv + xn(2, &
+&       6)*uw + xn(2, 8)*vw + xn(2, 7)*wvu - x(2)
+      f(3) = xn(3, 2)*u + xn(3, 4)*v + xn(3, 5)*w + xn(3, 3)*uv + xn(3, &
+&       6)*uw + xn(3, 8)*vw + xn(3, 7)*wvu - x(3)
+! compute the jacobian.
+      a11 = xn(1, 2) + xn(1, 3)*v + xn(1, 6)*w + xn(1, 7)*vw
+      a12 = xn(1, 4) + xn(1, 3)*u + xn(1, 8)*w + xn(1, 7)*uw
+      a13 = xn(1, 5) + xn(1, 6)*u + xn(1, 8)*v + xn(1, 7)*uv
+      a21 = xn(2, 2) + xn(2, 3)*v + xn(2, 6)*w + xn(2, 7)*vw
+      a22 = xn(2, 4) + xn(2, 3)*u + xn(2, 8)*w + xn(2, 7)*uw
+      a23 = xn(2, 5) + xn(2, 6)*u + xn(2, 8)*v + xn(2, 7)*uv
+      a31 = xn(3, 2) + xn(3, 3)*v + xn(3, 6)*w + xn(3, 7)*vw
+      a32 = xn(3, 4) + xn(3, 3)*u + xn(3, 8)*w + xn(3, 7)*uw
+      a33 = xn(3, 5) + xn(3, 6)*u + xn(3, 8)*v + xn(3, 7)*uv
+! compute the determinant. make sure that it is not zero
+! and invert the value. the cut off is needed to be able
+! to handle exceptional cases for degenerate elements.
+      val = a11*(a22*a33-a32*a23) + a21*(a13*a32-a12*a33) + a31*(a12*a23&
+&       -a13*a22)
+      if (val .ge. 0.) then
+        x1 = val
+      else
+        x1 = -val
+      end if
+      if (x1 .lt. adteps) then
+        max1 = adteps
+      else
+        max1 = x1
+      end if
+      val = sign(one, val)/max1
+! compute the new values of u, v and w.
+      du = val*((a22*a33-a23*a32)*f(1)+(a13*a32-a12*a33)*f(2)+(a12*a23-&
+&       a13*a22)*f(3))
+      dv = val*((a23*a31-a21*a33)*f(1)+(a11*a33-a13*a31)*f(2)+(a13*a21-&
+&       a11*a23)*f(3))
+      dw = val*((a21*a32-a22*a31)*f(1)+(a12*a31-a11*a32)*f(2)+(a11*a22-&
+&       a12*a21)*f(3))
+      u = u - du
+      v = v - dv
+      w = w - dw
+! exit the loop if the update of the parametric
+! weights is below the threshold
+      val = sqrt(du*du + dv*dv + dw*dw)
+      if (val .le. thresconv) goto 100
+    end do newtonhexa
+! we would *like* that all solutions fall inside the hexa, but we
+! can't be picky here since we are not changing the donors. so
+! whatever the u,v,w is we have to accept. even if it is greater than
+! 1 or less than zero, it shouldn't be by much.
+ 100 frac(1) = u
+    frac(2) = v
+    frac(3) = w
+  end subroutine newtonupdate
+end module oversetutilities_fast_b
diff --git a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90 b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
index 993aed227..b7c934129 100644
--- a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
@@ -1200,6 +1200,8 @@ function safact(ks, d)
     real(kind=realtype) :: d
     if (ks .eq. zero) then
       safact = -one
+    else if (d .eq. zero) then
+      safact = one
     else
       safact = (ks-d/0.03)/(ks+d/0.03)
     end if
diff --git a/src/adjoint/outputReverseFast/zipperintegrations_fast_b.f90 b/src/adjoint/outputReverseFast/zipperintegrations_fast_b.f90
new file mode 100644
index 000000000..1fc7e0a5c
--- /dev/null
+++ b/src/adjoint/outputReverseFast/zipperintegrations_fast_b.f90
@@ -0,0 +1,331 @@
+!        generated by tapenade     (inria, tropics team)
+!  tapenade 3.10 (r5363) -  9 sep 2014 09:53
+!
+module zipperintegrations_fast_b
+  implicit none
+
+contains
+  subroutine flowintegrationzipper(isinflow, conn, fams, vars, &
+&   localvalues, famlist, sps, ptvalid)
+! integrate over the trianges for the inflow/outflow conditions.
+    use constants
+    use blockpointers, only : bctype
+    use sorting, only : faminlist
+    use flowvarrefstate, only : pref, pinf, rhoref, pref, timeref, &
+&   lref, tref, rgas, uref, uinf, rhoinf
+    use inputphysics, only : pointref, flowtype
+    use flowutils_fast_b, only : computeptot, computettot
+    use surfacefamilies, only : familyexchange, bcfamexchange
+    use utils_fast_b, only : mynorm2, cross_prod
+    implicit none
+! input/output variables
+    logical, intent(in) :: isinflow
+    integer(kind=inttype), dimension(:, :), intent(in) :: conn
+    integer(kind=inttype), dimension(:), intent(in) :: fams
+    real(kind=realtype), dimension(:, :), intent(in) :: vars
+    real(kind=realtype), dimension(nlocalvalues), intent(inout) :: &
+&   localvalues
+    integer(kind=inttype), dimension(:), intent(in) :: famlist
+    integer(kind=inttype), intent(in) :: sps
+    logical(kind=inttype), dimension(:), optional, intent(in) :: ptvalid
+! working variables
+    integer(kind=inttype) :: i, j
+    real(kind=realtype) :: sf, vmag, vnm, vxm, vym, vzm, fx, fy, fz, u, &
+&   v, w, vnmfreestreamref
+    real(kind=realtype), dimension(3) :: fp, mp, fmom, mmom, refpoint, &
+&   ss, x1, x2, x3, norm, sfacecoordref
+    real(kind=realtype) :: pm, ptot, ttot, rhom, gammam, mnm, &
+&   massflowratelocal, am
+    real(kind=realtype) :: massflowrate, mass_ptot, mass_ttot, mass_ps, &
+&   mass_mn, mass_a, mass_rho, mass_vx, mass_vy, mass_vz, mass_nx, &
+&   mass_ny, mass_nz
+    real(kind=realtype) :: area, cellarea, overcellarea
+    real(kind=realtype) :: area_ptot, area_ps
+    real(kind=realtype) :: mredim
+    real(kind=realtype) :: internalflowfact, inflowfact, xc, yc, zc, mx&
+&   , my, mz
+    logical :: triisvalid
+    intrinsic sqrt
+    intrinsic size
+    intrinsic present
+    real(kind=realtype), dimension(3) :: arg1
+    real(kind=realtype), dimension(3) :: arg2
+    mredim = sqrt(pref*rhoref)
+    fp = zero
+    mp = zero
+    fmom = zero
+    mmom = zero
+    massflowrate = zero
+    area = zero
+    mass_ptot = zero
+    mass_ttot = zero
+    mass_ps = zero
+    mass_mn = zero
+    mass_a = zero
+    mass_rho = zero
+    mass_vx = zero
+    mass_vy = zero
+    mass_vz = zero
+    mass_nx = zero
+    mass_ny = zero
+    mass_nz = zero
+    area_ptot = zero
+    area_ps = zero
+    refpoint(1) = lref*pointref(1)
+    refpoint(2) = lref*pointref(2)
+    refpoint(3) = lref*pointref(3)
+    internalflowfact = one
+    if (flowtype .eq. internalflow) internalflowfact = -one
+    inflowfact = one
+    if (isinflow) inflowfact = -one
+    do i=1,size(conn, 2)
+      if (faminlist(fams(i), famlist)) then
+! if the ptvalid list is given, check if we should integrate
+! this triangle.
+        triisvalid = .true.
+        if (present(ptvalid)) then
+! check if each of the three nodes are valid
+          if (((ptvalid(conn(1, i)) .eqv. .false.) .or. (ptvalid(conn(2&
+&             , i)) .eqv. .false.)) .or. (ptvalid(conn(3, i)) .eqv. &
+&             .false.)) triisvalid = .false.
+        end if
+        if (triisvalid) then
+! compute the averaged values for this triangle
+          vxm = zero
+          vym = zero
+          vzm = zero
+          rhom = zero
+          pm = zero
+          mnm = zero
+          gammam = zero
+          sf = zero
+          do j=1,3
+            rhom = rhom + vars(conn(j, i), irho)
+            vxm = vxm + vars(conn(j, i), ivx)
+            vym = vym + vars(conn(j, i), ivy)
+            vzm = vzm + vars(conn(j, i), ivz)
+            pm = pm + vars(conn(j, i), irhoe)
+            gammam = gammam + vars(conn(j, i), izippflowgamma)
+            sf = sf + vars(conn(j, i), izippflowsface)
+          end do
+! divide by 3 due to the summation above:
+          rhom = third*rhom
+          vxm = third*vxm
+          vym = third*vym
+          vzm = third*vzm
+          pm = third*pm
+          gammam = third*gammam
+          sf = third*sf
+! get the nodes of triangle.
+          x1 = vars(conn(1, i), izippflowx:izippflowz)
+          x2 = vars(conn(2, i), izippflowx:izippflowz)
+          x3 = vars(conn(3, i), izippflowx:izippflowz)
+          arg1(:) = x2 - x1
+          arg2(:) = x3 - x1
+          call cross_prod(arg1(:), arg2(:), norm)
+          ss = half*norm
+          call computeptot(rhom, vxm, vym, vzm, pm, ptot)
+          call computettot(rhom, vxm, vym, vzm, pm, ttot)
+          vnm = vxm*ss(1) + vym*ss(2) + vzm*ss(3) - sf
+          vmag = sqrt(vxm**2 + vym**2 + vzm**2) - sf
+          am = sqrt(gammam*pm/rhom)
+          mnm = vmag/sqrt(gammam*pm/rhom)
+          cellarea = sqrt(ss(1)**2 + ss(2)**2 + ss(3)**2)
+          area = area + cellarea
+          overcellarea = 1/cellarea
+          massflowratelocal = rhom*vnm*mredim
+          massflowrate = massflowrate + massflowratelocal
+          pm = pm*pref
+          mass_ptot = mass_ptot + ptot*massflowratelocal*pref
+          mass_ttot = mass_ttot + ttot*massflowratelocal*tref
+          mass_rho = mass_rho + rhom*massflowratelocal*rhoref
+          mass_a = mass_a + am*massflowratelocal*uref
+          mass_ps = mass_ps + pm*massflowratelocal
+          mass_mn = mass_mn + mnm*massflowratelocal
+          area_ptot = area_ptot + ptot*pref*cellarea
+          area_ps = area_ps + pm*cellarea
+          sfacecoordref(1) = sf*ss(1)*overcellarea
+          sfacecoordref(2) = sf*ss(2)*overcellarea
+          sfacecoordref(3) = sf*ss(3)*overcellarea
+          mass_vx = mass_vx + (vxm*uref-sfacecoordref(1))*&
+&           massflowratelocal
+          mass_vy = mass_vy + (vym*uref-sfacecoordref(2))*&
+&           massflowratelocal
+          mass_vz = mass_vz + (vzm*uref-sfacecoordref(3))*&
+&           massflowratelocal
+          mass_nx = mass_nx + ss(1)*overcellarea*massflowratelocal
+          mass_ny = mass_ny + ss(2)*overcellarea*massflowratelocal
+          mass_nz = mass_nz + ss(3)*overcellarea*massflowratelocal
+! compute the average cell center.
+          xc = zero
+          yc = zero
+          zc = zero
+          do j=1,3
+            xc = xc + vars(conn(1, i), izippflowx)
+            yc = yc + vars(conn(2, i), izippflowy)
+            zc = zc + vars(conn(3, i), izippflowz)
+          end do
+! finish average for cell center
+          xc = third*xc
+          yc = third*yc
+          zc = third*zc
+          xc = xc - refpoint(1)
+          yc = yc - refpoint(2)
+          zc = zc - refpoint(3)
+          pm = -(pm-pinf*pref)
+          fx = pm*ss(1)
+          fy = pm*ss(2)
+          fz = pm*ss(3)
+! update the pressure force and moment coefficients.
+          fp(1) = fp(1) + fx
+          fp(2) = fp(2) + fy
+          fp(3) = fp(3) + fz
+          mx = yc*fz - zc*fy
+          my = zc*fx - xc*fz
+          mz = xc*fy - yc*fx
+          mp(1) = mp(1) + mx
+          mp(2) = mp(2) + my
+          mp(3) = mp(3) + mz
+! momentum forces
+! get unit normal vector.
+          ss = ss/cellarea
+          massflowratelocal = massflowratelocal/timeref*internalflowfact&
+&           *inflowfact
+          fx = massflowratelocal*ss(1)*vxm
+          fy = massflowratelocal*ss(2)*vym
+          fz = massflowratelocal*ss(3)*vzm
+          fmom(1) = fmom(1) - fx
+          fmom(2) = fmom(2) - fy
+          fmom(3) = fmom(3) - fz
+          mx = yc*fz - zc*fy
+          my = zc*fx - xc*fz
+          mz = xc*fy - yc*fx
+          mmom(1) = mmom(1) + mx
+          mmom(2) = mmom(2) + my
+          mmom(3) = mmom(3) + mz
+        end if
+      end if
+    end do
+! increment the local values array with what we computed here
+    localvalues(imassflow) = localvalues(imassflow) + massflowrate
+    localvalues(iarea) = localvalues(iarea) + area
+    localvalues(imassrho) = localvalues(imassrho) + mass_rho
+    localvalues(imassa) = localvalues(imassa) + mass_a
+    localvalues(imassptot) = localvalues(imassptot) + mass_ptot
+    localvalues(imassttot) = localvalues(imassttot) + mass_ttot
+    localvalues(imassps) = localvalues(imassps) + mass_ps
+    localvalues(imassmn) = localvalues(imassmn) + mass_mn
+    localvalues(ifp:ifp+2) = localvalues(ifp:ifp+2) + fp
+    localvalues(iflowfm:iflowfm+2) = localvalues(iflowfm:iflowfm+2) + &
+&     fmom
+    localvalues(iflowmp:iflowmp+2) = localvalues(iflowmp:iflowmp+2) + mp
+    localvalues(iflowmm:iflowmm+2) = localvalues(iflowmm:iflowmm+2) + &
+&     mmom
+    localvalues(iareaptot) = localvalues(iareaptot) + area_ptot
+    localvalues(iareaps) = localvalues(iareaps) + area_ps
+    localvalues(imassvx) = localvalues(imassvx) + mass_vx
+    localvalues(imassvy) = localvalues(imassvy) + mass_vy
+    localvalues(imassvz) = localvalues(imassvz) + mass_vz
+    localvalues(imassnx) = localvalues(imassnx) + mass_nx
+    localvalues(imassny) = localvalues(imassny) + mass_ny
+    localvalues(imassnz) = localvalues(imassnz) + mass_nz
+  end subroutine flowintegrationzipper
+  subroutine wallintegrationzipper(conn, fams, vars, localvalues, &
+&   famlist, sps)
+    use constants
+    use sorting, only : faminlist
+    use flowvarrefstate, only : lref
+    use inputphysics, only : pointref
+    use utils_fast_b, only : mynorm2, cross_prod
+    implicit none
+! input/output
+    integer(kind=inttype), dimension(:, :), intent(in) :: conn
+    integer(kind=inttype), dimension(:), intent(in) :: fams
+    real(kind=realtype), dimension(:, :), intent(in) :: vars
+    real(kind=realtype), intent(inout) :: localvalues(nlocalvalues)
+    integer(kind=inttype), dimension(:), intent(in) :: famlist
+    integer(kind=inttype), intent(in) :: sps
+! working
+    real(kind=realtype), dimension(3) :: fp, fv, mp, mv
+    integer(kind=inttype) :: i, j
+    real(kind=realtype), dimension(3) :: ss, norm, refpoint
+    real(kind=realtype), dimension(3) :: p1, p2, p3, v1, v2, v3, x1, x2&
+&   , x3
+    real(kind=realtype) :: fact, triarea, fx, fy, fz, mx, my, mz, xc, yc&
+&   , zc
+    intrinsic size
+    real(kind=realtype), dimension(3) :: arg1
+    real(kind=realtype), dimension(3) :: arg2
+    real(kind=realtype) :: result1
+! determine the reference point for the moment computation in
+! meters.
+    refpoint(1) = lref*pointref(1)
+    refpoint(2) = lref*pointref(2)
+    refpoint(3) = lref*pointref(3)
+    fp = zero
+    fv = zero
+    mp = zero
+    mv = zero
+    do i=1,size(conn, 2)
+      if (faminlist(fams(i), famlist)) then
+! get the nodes of triangle.
+        x1 = vars(conn(1, i), izippwallx:izippwallz)
+        x2 = vars(conn(2, i), izippwallx:izippwallz)
+        x3 = vars(conn(3, i), izippwallx:izippwallz)
+        arg1(:) = x2 - x1
+        arg2(:) = x3 - x1
+        call cross_prod(arg1(:), arg2(:), norm)
+        ss = half*norm
+! the third here is to account for the summation of p1, p2
+! and p3
+        result1 = mynorm2(ss)
+        triarea = result1*third
+! compute the average cell center.
+        xc = third*(x1(1)+x2(1)+x3(1))
+        yc = third*(x1(2)+x2(2)+x3(2))
+        zc = third*(x1(3)+x2(3)+x3(3))
+        xc = xc - refpoint(1)
+        yc = yc - refpoint(2)
+        zc = zc - refpoint(3)
+! update the pressure force and moment coefficients.
+        p1 = vars(conn(1, i), izippwalltpx:izippwalltpz)
+        p2 = vars(conn(2, i), izippwalltpx:izippwalltpz)
+        p3 = vars(conn(3, i), izippwalltpx:izippwalltpz)
+        fx = (p1(1)+p2(1)+p3(1))*triarea
+        fy = (p1(2)+p2(2)+p3(2))*triarea
+        fz = (p1(3)+p2(3)+p3(3))*triarea
+        fp(1) = fp(1) + fx
+        fp(2) = fp(2) + fy
+        fp(3) = fp(3) + fz
+        mx = yc*fz - zc*fy
+        my = zc*fx - xc*fz
+        mz = xc*fy - yc*fx
+        mp(1) = mp(1) + mx
+        mp(2) = mp(2) + my
+        mp(3) = mp(3) + mz
+! update the viscous force and moment coefficients
+        v1 = vars(conn(1, i), izippwalltvx:izippwalltvz)
+        v2 = vars(conn(2, i), izippwalltvx:izippwalltvz)
+        v3 = vars(conn(3, i), izippwalltvx:izippwalltvz)
+        fx = (v1(1)+v2(1)+v3(1))*triarea
+        fy = (v1(2)+v2(2)+v3(2))*triarea
+        fz = (v1(3)+v2(3)+v3(3))*triarea
+! note: momentum forces have opposite sign to pressure forces
+        fv(1) = fv(1) + fx
+        fv(2) = fv(2) + fy
+        fv(3) = fv(3) + fz
+        mx = yc*fz - zc*fy
+        my = zc*fx - xc*fz
+        mz = xc*fy - yc*fx
+        mv(1) = mv(1) + mx
+        mv(2) = mv(2) + my
+        mv(3) = mv(3) + mz
+      end if
+    end do
+! increment into the local vector
+    localvalues(ifp:ifp+2) = localvalues(ifp:ifp+2) + fp
+    localvalues(ifv:ifv+2) = localvalues(ifv:ifv+2) + fv
+    localvalues(imp:imp+2) = localvalues(imp:imp+2) + mp
+    localvalues(imv:imv+2) = localvalues(imv:imv+2) + mv
+  end subroutine wallintegrationzipper
+end module zipperintegrations_fast_b
diff --git a/src/turbulence/sa.F90 b/src/turbulence/sa.F90
index 2b8489fcf..21192a2b4 100644
--- a/src/turbulence/sa.F90
+++ b/src/turbulence/sa.F90
@@ -318,7 +318,7 @@ subroutine saSource
                 ! treatment.
 
                 dfv1 = three*chi2*cv13/((chi3+cv13)**2)
-                dfv2 = (chi2*dfv1 - one)/(nu*((one + chi*fv1)**2))
+                dfv2 = (w(i,j,k,itu1)*dfv1 - nu) / (nu + w(i,j,k,itu1)*fv1)**2
                 dft2 = -two*rsaCt4*chi*ft2/nu
 
                 drr = (one - rr*(fv2 + w(i,j,k,itu1)*dfv2)) &
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 4c220f5f0..c91325ace 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -1425,6 +1425,8 @@ function  saFact(ks, d)
    
    if (ks .eq. zero) then
       saFact = -one
+   else if (d .eq. zero) then
+      saFact = one
    else
       saFact = (ks - d/0.03) / (ks + d/0.03)
    end if

From 415ac8acf503d519288aefdfe6fcb6da80ac2c49 Mon Sep 17 00:00:00 2001
From: David Anderegg <andv@zhaw.ch>
Date: Wed, 28 Jul 2021 15:16:33 +0200
Subject: [PATCH 14/60] Removed uncommented code lines

---
 src/adjoint/outputForward/sa_d.f90            |  10 +-
 .../outputForward/turbbcroutines_d.f90        | 412 ++++++++----------
 src/adjoint/outputReverse/sa_b.f90            |  10 +-
 .../outputReverse/turbbcroutines_b.f90        | 362 +++++++--------
 src/adjoint/outputReverseFast/sa_fast_b.f90   |  10 +-
 .../turbbcroutines_fast_b.f90                 | 234 +++++-----
 src/turbulence/sa.F90                         |  13 +-
 src/turbulence/turbBCRoutines.F90             |  22 +-
 8 files changed, 475 insertions(+), 598 deletions(-)

diff --git a/src/adjoint/outputForward/sa_d.f90 b/src/adjoint/outputForward/sa_d.f90
index 8baf51b7e..7a85a6ea6 100644
--- a/src/adjoint/outputForward/sa_d.f90
+++ b/src/adjoint/outputForward/sa_d.f90
@@ -79,7 +79,6 @@ subroutine sasource_d()
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
 ! constants for sa rough
-! ks=0.0001 
     cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegaxd = sections(sectionid)%rotrate(1)*timerefd
@@ -279,13 +278,12 @@ subroutine sasource_d()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough 
+! sa rough
             dnewd = d2walld(i, j, k)
             dnew = d2wall(i, j, k) + 0.03*kssa
             nud = (rlvd(i, j, k)*w(i, j, k, irho)-rlv(i, j, k)*wd(i, j, &
 &             k, irho))/w(i, j, k, irho)**2
             nu = rlv(i, j, k)/w(i, j, k, irho)
-! dist2inv = one/(d2wall(i,j,k)**2)
             dist2invd = -(one*2*dnew*dnewd/(dnew**2)**2)
             dist2inv = one/dnew**2
             chid = (wd(i, j, k, itu1)*nu-w(i, j, k, itu1)*nud)/nu**2 - &
@@ -297,7 +295,6 @@ subroutine sasource_d()
             chi3 = chi*chi2
             fv1d = (chi3d*(chi3+cv13)-chi3*chi3d)/(chi3+cv13)**2
             fv1 = chi3/(chi3+cv13)
-! fv2      = one - chi/(one + chi*fv1)
             fv2d = -((wd(i, j, k, itu1)*(nu+w(i, j, k, itu1)*fv1)-w(i, j&
 &             , k, itu1)*(nud+wd(i, j, k, itu1)*fv1+w(i, j, k, itu1)*&
 &             fv1d))/(nu+w(i, j, k, itu1)*fv1)**2)
@@ -437,7 +434,6 @@ subroutine sasource()
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
 ! constants for sa rough
-! ks=0.0001 
     cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
@@ -530,16 +526,14 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough 
+! sa rough
             dnew = d2wall(i, j, k) + 0.03*kssa
             nu = rlv(i, j, k)/w(i, j, k, irho)
-! dist2inv = one/(d2wall(i,j,k)**2)
             dist2inv = one/dnew**2
             chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
             chi2 = chi*chi
             chi3 = chi*chi2
             fv1 = chi3/(chi3+cv13)
-! fv2      = one - chi/(one + chi*fv1)
             fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
diff --git a/src/adjoint/outputForward/turbbcroutines_d.f90 b/src/adjoint/outputForward/turbbcroutines_d.f90
index 25241e978..ece12d732 100644
--- a/src/adjoint/outputForward/turbbcroutines_d.f90
+++ b/src/adjoint/outputForward/turbbcroutines_d.f90
@@ -40,8 +40,8 @@ subroutine applyallturbbcthisblock_d(secondhalo)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
       if (.not.wallfunctions) then
-        select case  (bcfaceid(nn)) 
-        case (imin) 
+        select case  (bcfaceid(nn))
+        case (imin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -56,7 +56,7 @@ subroutine applyallturbbcthisblock_d(secondhalo)
               end do
             end do
           end do
-        case (imax) 
+        case (imax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -71,7 +71,7 @@ subroutine applyallturbbcthisblock_d(secondhalo)
               end do
             end do
           end do
-        case (jmin) 
+        case (jmin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -86,7 +86,7 @@ subroutine applyallturbbcthisblock_d(secondhalo)
               end do
             end do
           end do
-        case (jmax) 
+        case (jmax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -101,7 +101,7 @@ subroutine applyallturbbcthisblock_d(secondhalo)
               end do
             end do
           end do
-        case (kmin) 
+        case (kmin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -116,7 +116,7 @@ subroutine applyallturbbcthisblock_d(secondhalo)
               end do
             end do
           end do
-        case (kmax) 
+        case (kmax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -180,8 +180,8 @@ subroutine applyallturbbcthisblock(secondhalo)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
       if (.not.wallfunctions) then
-        select case  (bcfaceid(nn)) 
-        case (imin) 
+        select case  (bcfaceid(nn))
+        case (imin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -193,7 +193,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (imax) 
+        case (imax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -205,7 +205,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmin) 
+        case (jmin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -217,7 +217,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmax) 
+        case (jmax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -229,7 +229,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmin) 
+        case (kmin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -241,7 +241,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmax) 
+        case (kmax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -299,43 +299,43 @@ subroutine bceddynowall_d(nn)
 !
     integer(kind=inttype) :: i, j
 ! determine the face id on which the subface and copy
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(1, i, j) = revd(2, i, j)
           rev(1, i, j) = rev(2, i, j)
         end do
       end do
-    case (imax) 
+    case (imax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(ie, i, j) = revd(il, i, j)
           rev(ie, i, j) = rev(il, i, j)
         end do
       end do
-    case (jmin) 
+    case (jmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(i, 1, j) = revd(i, 2, j)
           rev(i, 1, j) = rev(i, 2, j)
         end do
       end do
-    case (jmax) 
+    case (jmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(i, je, j) = revd(i, jl, j)
           rev(i, je, j) = rev(i, jl, j)
         end do
       end do
-    case (kmin) 
+    case (kmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(i, j, 1) = revd(i, j, 2)
           rev(i, j, 1) = rev(i, j, 2)
         end do
       end do
-    case (kmax) 
+    case (kmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(i, j, ke) = revd(i, j, kl)
@@ -364,38 +364,38 @@ subroutine bceddynowall(nn)
 !
     integer(kind=inttype) :: i, j
 ! determine the face id on which the subface and copy
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(1, i, j) = rev(2, i, j)
         end do
       end do
-    case (imax) 
+    case (imax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(ie, i, j) = rev(il, i, j)
         end do
       end do
-    case (jmin) 
+    case (jmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, 1, j) = rev(i, 2, j)
         end do
       end do
-    case (jmax) 
+    case (jmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, je, j) = rev(i, jl, j)
         end do
       end do
-    case (kmin) 
+    case (kmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, 1) = rev(i, j, 2)
         end do
       end do
-    case (kmax) 
+    case (kmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, ke) = rev(i, j, kl)
@@ -429,63 +429,51 @@ subroutine bceddywall_d(nn)
     real(kind=realtype) :: result1
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
-! in the halo cells. 
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+! in the halo cells.
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(1,i,j) = -rev(2,i,j)
-!  print "(f12.3)", safact(kssa, d2wall(2,i,j))
           result1 = safact(kssa, d2wall(2, i, j))
           revd(1, i, j) = result1*revd(2, i, j)
           rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
-    case (imax) 
+    case (imax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(ie,i,j) = -rev(il,i,j)
-! print "(f12.3)", safact(kssa, d2wall(il,i,j))
           result1 = safact(kssa, d2wall(il, i, j))
           revd(ie, i, j) = result1*revd(il, i, j)
           rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
-    case (jmin) 
+    case (jmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,1,j) = -rev(i,2,j)
-! print "(f12.3)", safact(kssa, d2wall(i,1,j))
           result1 = safact(kssa, d2wall(i, 2, j))
           revd(i, 1, j) = result1*revd(i, 2, j)
           rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
-    case (jmax) 
+    case (jmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,je,j) = -rev(i,jl,j)
-! print "(f12.3)", safact(kssa, d2wall(i,je,j))
           result1 = safact(kssa, d2wall(i, jl, j))
           revd(i, je, j) = result1*revd(i, jl, j)
           rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
-    case (kmin) 
+    case (kmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,j,1) = -rev(i,j,2)
-! print "(f12.3)", safact(kssa, d2wall(i,j,2))
           result1 = safact(kssa, d2wall(i, j, 2))
           revd(i, j, 1) = result1*revd(i, j, 2)
           rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
-    case (kmax) 
+    case (kmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,j,ke) = -rev(i,j,kl)
-! print "(f12.3)", safact(kssa, d2wall(i,j,kl))
           result1 = safact(kssa, d2wall(i, j, kl))
           revd(i, j, ke) = result1*revd(i, j, kl)
           rev(i, j, ke) = result1*rev(i, j, kl)
@@ -515,58 +503,46 @@ subroutine bceddywall(nn)
     real(kind=realtype) :: result1
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
-! in the halo cells. 
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+! in the halo cells.
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(1,i,j) = -rev(2,i,j)
-!  print "(f12.3)", safact(kssa, d2wall(2,i,j))
           result1 = safact(kssa, d2wall(2, i, j))
           rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
-    case (imax) 
+    case (imax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(ie,i,j) = -rev(il,i,j)
-! print "(f12.3)", safact(kssa, d2wall(il,i,j))
           result1 = safact(kssa, d2wall(il, i, j))
           rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
-    case (jmin) 
+    case (jmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,1,j) = -rev(i,2,j)
-! print "(f12.3)", safact(kssa, d2wall(i,1,j))
           result1 = safact(kssa, d2wall(i, 2, j))
           rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
-    case (jmax) 
+    case (jmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,je,j) = -rev(i,jl,j)
-! print "(f12.3)", safact(kssa, d2wall(i,je,j))
           result1 = safact(kssa, d2wall(i, jl, j))
           rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
-    case (kmin) 
+    case (kmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,j,1) = -rev(i,j,2)
-! print "(f12.3)", safact(kssa, d2wall(i,j,2))
           result1 = safact(kssa, d2wall(i, j, 2))
           rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
-    case (kmax) 
+    case (kmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,j,ke) = -rev(i,j,kl)
-! print "(f12.3)", safact(kssa, d2wall(i,j,kl))
           result1 = safact(kssa, d2wall(i, j, kl))
           rev(i, j, ke) = result1*rev(i, j, kl)
         end do
@@ -601,23 +577,23 @@ subroutine bcturbinflow(nn)
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
 ! loop over the number of turbulent variables.
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bvti1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti1(i, j, l, l) = one
-          case (imax) 
+          case (imax)
             bvti2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti2(i, j, l, l) = one
-          case (jmin) 
+          case (jmin)
             bvtj1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj1(i, j, l, l) = one
-          case (jmax) 
+          case (jmax)
             bvtj2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj2(i, j, l, l) = one
-          case (kmin) 
+          case (kmin)
             bvtk1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk1(i, j, l, l) = one
-          case (kmax) 
+          case (kmax)
             bvtk2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk2(i, j, l, l) = one
           end select
@@ -654,18 +630,18 @@ subroutine bcturboutflow(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bmti1(i, j, l, l) = -one
-          case (imax) 
+          case (imax)
             bmti2(i, j, l, l) = -one
-          case (jmin) 
+          case (jmin)
             bmtj1(i, j, l, l) = -one
-          case (jmax) 
+          case (jmax)
             bmtj2(i, j, l, l) = -one
-          case (kmin) 
+          case (kmin)
             bmtk1(i, j, l, l) = -one
-          case (kmax) 
+          case (kmax)
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -756,26 +732,26 @@ subroutine bcturbtreatment_d()
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
-      select case  (bctype(nn)) 
-      case (nswalladiabatic, nswallisothermal) 
+      select case  (bctype(nn))
+      case (nswalladiabatic, nswallisothermal)
 ! viscous wall. there is no difference between an adiabatic
 ! and an isothermal wall for the turbulent equations.
 ! set the implicit treatment of the wall boundary conditions.
         call bcturbwall_d(nn)
-      case (symm, symmpolar, eulerwall) 
+      case (symm, symmpolar, eulerwall)
 !=============================================================
 !=============================================================
 ! symmetry, polar symmetry or inviscid wall. treatment of
 ! the turbulent equations is identical.
         call bcturbsymm(nn)
-      case (farfield) 
+      case (farfield)
 !=============================================================
 ! farfield. the kind of boundary condition to be applied,
 ! inflow or outflow, depends on the local conditions.
         call bcturbfarfield_d(nn)
       case (slidinginterface, oversetouterbound, domaininterfaceall, &
 &     domaininterfacerhouvw, domaininterfacep, domaininterfacerho, &
-&     domaininterfacetotal) 
+&     domaininterfacetotal)
 !=============================================================
 ! sliding mesh interface, overset outer boudaries, and
 ! domain interface with another code are not really boundary
@@ -829,41 +805,41 @@ subroutine bcturbfarfield_d(nn)
 ! outflow. simply extrapolation or zero neumann bc
 ! of the turbulent variables.
           do l=nt1,nt2
-            select case  (bcfaceid(nn)) 
-            case (imin) 
+            select case  (bcfaceid(nn))
+            case (imin)
               bmti1(i, j, l, l) = -one
-            case (imax) 
+            case (imax)
               bmti2(i, j, l, l) = -one
-            case (jmin) 
+            case (jmin)
               bmtj1(i, j, l, l) = -one
-            case (jmax) 
+            case (jmax)
               bmtj2(i, j, l, l) = -one
-            case (kmin) 
+            case (kmin)
               bmtk1(i, j, l, l) = -one
-            case (kmax) 
+            case (kmax)
               bmtk2(i, j, l, l) = -one
             end select
           end do
         else
 ! inflow. turbulent variables are prescribed.
           do l=nt1,nt2
-            select case  (bcfaceid(nn)) 
-            case (imin) 
+            select case  (bcfaceid(nn))
+            case (imin)
               bvti1d(i, j, l) = winfd(l)
               bvti1(i, j, l) = winf(l)
-            case (imax) 
+            case (imax)
               bvti2d(i, j, l) = winfd(l)
               bvti2(i, j, l) = winf(l)
-            case (jmin) 
+            case (jmin)
               bvtj1d(i, j, l) = winfd(l)
               bvtj1(i, j, l) = winf(l)
-            case (jmax) 
+            case (jmax)
               bvtj2d(i, j, l) = winfd(l)
               bvtj2(i, j, l) = winf(l)
-            case (kmin) 
+            case (kmin)
               bvtk1d(i, j, l) = winfd(l)
               bvtk1(i, j, l) = winf(l)
-            case (kmax) 
+            case (kmax)
               bvtk2d(i, j, l) = winfd(l)
               bvtk2(i, j, l) = winf(l)
             end select
@@ -905,23 +881,23 @@ subroutine bcturbinterface_d(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bvti1d(i, j, l) = wd(1, i, j, l)
             bvti1(i, j, l) = w(1, i, j, l)
-          case (imax) 
+          case (imax)
             bvti2d(i, j, l) = wd(ie, i, j, l)
             bvti2(i, j, l) = w(ie, i, j, l)
-          case (jmin) 
+          case (jmin)
             bvtj1d(i, j, l) = wd(i, 1, j, l)
             bvtj1(i, j, l) = w(i, 1, j, l)
-          case (jmax) 
+          case (jmax)
             bvtj2d(i, j, l) = wd(i, je, j, l)
             bvtj2(i, j, l) = w(i, je, j, l)
-          case (kmin) 
+          case (kmin)
             bvtk1d(i, j, l) = wd(i, j, 1, l)
             bvtk1(i, j, l) = w(i, j, 1, l)
-          case (kmax) 
+          case (kmax)
             bvtk2d(i, j, l) = wd(i, j, ke, l)
             bvtk2(i, j, l) = w(i, j, ke, l)
           end select
@@ -992,26 +968,26 @@ subroutine bcturbtreatment()
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
-      select case  (bctype(nn)) 
-      case (nswalladiabatic, nswallisothermal) 
+      select case  (bctype(nn))
+      case (nswalladiabatic, nswallisothermal)
 ! viscous wall. there is no difference between an adiabatic
 ! and an isothermal wall for the turbulent equations.
 ! set the implicit treatment of the wall boundary conditions.
         call bcturbwall(nn)
-      case (symm, symmpolar, eulerwall) 
+      case (symm, symmpolar, eulerwall)
 !=============================================================
 !=============================================================
 ! symmetry, polar symmetry or inviscid wall. treatment of
 ! the turbulent equations is identical.
         call bcturbsymm(nn)
-      case (farfield) 
+      case (farfield)
 !=============================================================
 ! farfield. the kind of boundary condition to be applied,
 ! inflow or outflow, depends on the local conditions.
         call bcturbfarfield(nn)
       case (slidinginterface, oversetouterbound, domaininterfaceall, &
 &     domaininterfacerhouvw, domaininterfacep, domaininterfacerho, &
-&     domaininterfacetotal) 
+&     domaininterfacetotal)
 !=============================================================
 ! sliding mesh interface, overset outer boudaries, and
 ! domain interface with another code are not really boundary
@@ -1058,36 +1034,36 @@ subroutine bcturbfarfield(nn)
 ! outflow. simply extrapolation or zero neumann bc
 ! of the turbulent variables.
           do l=nt1,nt2
-            select case  (bcfaceid(nn)) 
-            case (imin) 
+            select case  (bcfaceid(nn))
+            case (imin)
               bmti1(i, j, l, l) = -one
-            case (imax) 
+            case (imax)
               bmti2(i, j, l, l) = -one
-            case (jmin) 
+            case (jmin)
               bmtj1(i, j, l, l) = -one
-            case (jmax) 
+            case (jmax)
               bmtj2(i, j, l, l) = -one
-            case (kmin) 
+            case (kmin)
               bmtk1(i, j, l, l) = -one
-            case (kmax) 
+            case (kmax)
               bmtk2(i, j, l, l) = -one
             end select
           end do
         else
 ! inflow. turbulent variables are prescribed.
           do l=nt1,nt2
-            select case  (bcfaceid(nn)) 
-            case (imin) 
+            select case  (bcfaceid(nn))
+            case (imin)
               bvti1(i, j, l) = winf(l)
-            case (imax) 
+            case (imax)
               bvti2(i, j, l) = winf(l)
-            case (jmin) 
+            case (jmin)
               bvtj1(i, j, l) = winf(l)
-            case (jmax) 
+            case (jmax)
               bvtj2(i, j, l) = winf(l)
-            case (kmin) 
+            case (kmin)
               bvtk1(i, j, l) = winf(l)
-            case (kmax) 
+            case (kmax)
               bvtk2(i, j, l) = winf(l)
             end select
           end do
@@ -1121,18 +1097,18 @@ subroutine bcturbinterface(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bvti1(i, j, l) = w(1, i, j, l)
-          case (imax) 
+          case (imax)
             bvti2(i, j, l) = w(ie, i, j, l)
-          case (jmin) 
+          case (jmin)
             bvtj1(i, j, l) = w(i, 1, j, l)
-          case (jmax) 
+          case (jmax)
             bvtj2(i, j, l) = w(i, je, j, l)
-          case (kmin) 
+          case (kmin)
             bvtk1(i, j, l) = w(i, j, 1, l)
-          case (kmax) 
+          case (kmax)
             bvtk2(i, j, l) = w(i, j, ke, l)
           end select
         end do
@@ -1166,18 +1142,18 @@ subroutine bcturbsymm(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bmti1(i, j, l, l) = -one
-          case (imax) 
+          case (imax)
             bmti2(i, j, l, l) = -one
-          case (jmin) 
+          case (jmin)
             bmtj1(i, j, l, l) = -one
-          case (jmax) 
+          case (jmax)
             bmtj2(i, j, l, l) = -one
-          case (kmin) 
+          case (kmin)
             bmtk1(i, j, l, l) = -one
-          case (kmax) 
+          case (kmax)
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -1237,61 +1213,55 @@ subroutine bcturbwall_d(nn)
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
-    select case  (turbmodel) 
-    case (spalartallmaras, spalartallmarasedwards) 
+    select case  (turbmodel)
+    case (spalartallmaras, spalartallmarasedwards)
 ! spalart-allmaras type of model. value at the wall is zero,
 ! so simply negate the internal value.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmti1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(2, i, j))
             bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (imax) 
+      case (imax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmti2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(il, i, j))
             bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmin) 
+      case (jmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtj1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, 2, j))
             bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmax) 
+      case (jmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtj2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, jl, j))
             bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmin) 
+      case (kmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtk1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, j, 2))
             bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmax) 
+      case (kmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtk2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, j, kl))
             bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
-    case (komegawilcox, komegamodified, mentersst) 
+    case (komegawilcox, komegamodified, mentersst)
 !        ================================================================
 ! k-omega type of models. k is zero on the wall and thus the
 ! halo value is the negative of the first internal cell.
@@ -1303,8 +1273,8 @@ subroutine bcturbwall_d(nn)
 ! distance. due to the usage of the dd2wall pointer and the
 ! fact that the original d2wall array starts at 2, there is
 ! an offset of -1 present in dd2wall.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1341,7 +1311,7 @@ subroutine bcturbwall_d(nn)
             bvti1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (imax) 
+      case (imax)
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1378,7 +1348,7 @@ subroutine bcturbwall_d(nn)
             bvti2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmin) 
+      case (jmin)
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1415,7 +1385,7 @@ subroutine bcturbwall_d(nn)
             bvtj1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmax) 
+      case (jmax)
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1452,7 +1422,7 @@ subroutine bcturbwall_d(nn)
             bvtj2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmin) 
+      case (kmin)
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1489,7 +1459,7 @@ subroutine bcturbwall_d(nn)
             bvtk1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmax) 
+      case (kmax)
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1527,47 +1497,47 @@ subroutine bcturbwall_d(nn)
           end do
         end do
       end select
-    case (ktau) 
+    case (ktau)
 !        ================================================================
 ! k-tau model. both k and tau are zero at the wall, so the
 ! negative value of the internal cell is taken for the halo.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti1(i, j, itu1, itu1) = one
             bmti1(i, j, itu2, itu2) = one
           end do
         end do
-      case (imax) 
+      case (imax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti2(i, j, itu1, itu1) = one
             bmti2(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmin) 
+      case (jmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj1(i, j, itu1, itu1) = one
             bmtj1(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmax) 
+      case (jmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj2(i, j, itu1, itu1) = one
             bmtj2(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmin) 
+      case (kmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk1(i, j, itu1, itu1) = one
             bmtk1(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmax) 
+      case (kmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk2(i, j, itu1, itu1) = one
@@ -1603,8 +1573,8 @@ subroutine turb2ndhalo_d(nn)
 ! some pointers accordingly.
 ! loop over the turbulent variables and set the second halo
 ! value. if this is an eddy model, also set the eddy viscosity.
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           do l=nt1,nt2
@@ -1617,7 +1587,7 @@ subroutine turb2ndhalo_d(nn)
           end if
         end do
       end do
-    case (imax) 
+    case (imax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1631,7 +1601,7 @@ subroutine turb2ndhalo_d(nn)
           end if
         end do
       end do
-    case (jmin) 
+    case (jmin)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1645,7 +1615,7 @@ subroutine turb2ndhalo_d(nn)
           end if
         end do
       end do
-    case (jmax) 
+    case (jmax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1659,7 +1629,7 @@ subroutine turb2ndhalo_d(nn)
           end if
         end do
       end do
-    case (kmin) 
+    case (kmin)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1673,7 +1643,7 @@ subroutine turb2ndhalo_d(nn)
           end if
         end do
       end do
-    case (kmax) 
+    case (kmax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1711,8 +1681,8 @@ subroutine turb2ndhalo(nn)
 ! some pointers accordingly.
 ! loop over the turbulent variables and set the second halo
 ! value. if this is an eddy model, also set the eddy viscosity.
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           do l=nt1,nt2
@@ -1721,7 +1691,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(0, i, j) = rev(1, i, j)
         end do
       end do
-    case (imax) 
+    case (imax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1731,7 +1701,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(ib, i, j) = rev(ie, i, j)
         end do
       end do
-    case (jmin) 
+    case (jmin)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1741,7 +1711,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, 0, j) = rev(i, 1, j)
         end do
       end do
-    case (jmax) 
+    case (jmax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1751,7 +1721,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, jb, j) = rev(i, je, j)
         end do
       end do
-    case (kmin) 
+    case (kmin)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1761,7 +1731,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, j, 0) = rev(i, j, 1)
         end do
       end do
-    case (kmax) 
+    case (kmax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1798,8 +1768,8 @@ subroutine turbbcnswall(secondhalo)
       call bcturbwall(nn)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1816,7 +1786,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (imax) 
+      case (imax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1833,7 +1803,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmin) 
+      case (jmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1850,7 +1820,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmax) 
+      case (jmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1867,7 +1837,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmin) 
+      case (kmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1884,7 +1854,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmax) 
+      case (kmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1949,61 +1919,55 @@ subroutine bcturbwall(nn)
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
-    select case  (turbmodel) 
-    case (spalartallmaras, spalartallmarasedwards) 
+    select case  (turbmodel)
+    case (spalartallmaras, spalartallmarasedwards)
 ! spalart-allmaras type of model. value at the wall is zero,
 ! so simply negate the internal value.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmti1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(2, i, j))
             bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (imax) 
+      case (imax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmti2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(il, i, j))
             bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmin) 
+      case (jmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtj1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, 2, j))
             bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmax) 
+      case (jmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtj2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, jl, j))
             bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmin) 
+      case (kmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtk1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, j, 2))
             bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmax) 
+      case (kmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtk2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, j, kl))
             bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
-    case (komegawilcox, komegamodified, mentersst) 
+    case (komegawilcox, komegamodified, mentersst)
 !        ================================================================
 ! k-omega type of models. k is zero on the wall and thus the
 ! halo value is the negative of the first internal cell.
@@ -2015,8 +1979,8 @@ subroutine bcturbwall(nn)
 ! distance. due to the usage of the dd2wall pointer and the
 ! fact that the original d2wall array starts at 2, there is
 ! an offset of -1 present in dd2wall.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2048,7 +2012,7 @@ subroutine bcturbwall(nn)
             bvti1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (imax) 
+      case (imax)
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2080,7 +2044,7 @@ subroutine bcturbwall(nn)
             bvti2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmin) 
+      case (jmin)
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2112,7 +2076,7 @@ subroutine bcturbwall(nn)
             bvtj1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmax) 
+      case (jmax)
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2144,7 +2108,7 @@ subroutine bcturbwall(nn)
             bvtj2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmin) 
+      case (kmin)
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2176,7 +2140,7 @@ subroutine bcturbwall(nn)
             bvtk1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmax) 
+      case (kmax)
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2209,47 +2173,47 @@ subroutine bcturbwall(nn)
           end do
         end do
       end select
-    case (ktau) 
+    case (ktau)
 !        ================================================================
 ! k-tau model. both k and tau are zero at the wall, so the
 ! negative value of the internal cell is taken for the halo.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti1(i, j, itu1, itu1) = one
             bmti1(i, j, itu2, itu2) = one
           end do
         end do
-      case (imax) 
+      case (imax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti2(i, j, itu1, itu1) = one
             bmti2(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmin) 
+      case (jmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj1(i, j, itu1, itu1) = one
             bmtj1(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmax) 
+      case (jmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj2(i, j, itu1, itu1) = one
             bmtj2(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmin) 
+      case (kmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk1(i, j, itu1, itu1) = one
             bmtk1(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmax) 
+      case (kmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk2(i, j, itu1, itu1) = one
diff --git a/src/adjoint/outputReverse/sa_b.f90 b/src/adjoint/outputReverse/sa_b.f90
index b5d866f1d..07348678c 100644
--- a/src/adjoint/outputReverse/sa_b.f90
+++ b/src/adjoint/outputReverse/sa_b.f90
@@ -96,7 +96,6 @@ subroutine sasource_b()
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
 ! constants for sa rough
-! ks=0.0001 
     cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
@@ -199,16 +198,14 @@ subroutine sasource_b()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough 
+! sa rough
         dnew = d2wall(i, j, k) + 0.03*kssa
         nu = rlv(i, j, k)/w(i, j, k, irho)
-! dist2inv = one/(d2wall(i,j,k)**2)
         dist2inv = one/dnew**2
         chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-! fv2      = one - chi/(one + chi*fv1)
         fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
@@ -572,7 +569,6 @@ subroutine sasource()
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
 ! constants for sa rough
-! ks=0.0001 
     cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
@@ -666,16 +662,14 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough 
+! sa rough
         dnew = d2wall(i, j, k) + 0.03*kssa
         nu = rlv(i, j, k)/w(i, j, k, irho)
-! dist2inv = one/(d2wall(i,j,k)**2)
         dist2inv = one/dnew**2
         chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-! fv2      = one - chi/(one + chi*fv1)
         fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
diff --git a/src/adjoint/outputReverse/turbbcroutines_b.f90 b/src/adjoint/outputReverse/turbbcroutines_b.f90
index 93ac32df8..50520f791 100644
--- a/src/adjoint/outputReverse/turbbcroutines_b.f90
+++ b/src/adjoint/outputReverse/turbbcroutines_b.f90
@@ -73,8 +73,8 @@ subroutine applyallturbbcthisblock_b(secondhalo)
       if (wallfunctions) then
         call pushcontrol3b(0)
       else
-        select case  (bcfaceid(nn)) 
-        case (imin) 
+        select case  (bcfaceid(nn))
+        case (imin)
           ad_from0 = bcdata(nn)%jcbeg
           do j=ad_from0,bcdata(nn)%jcend
             ad_from = bcdata(nn)%icbeg
@@ -85,7 +85,7 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           call pushinteger4(j - 1)
           call pushinteger4(ad_from0)
           call pushcontrol3b(2)
-        case (imax) 
+        case (imax)
           ad_from2 = bcdata(nn)%jcbeg
           do j=ad_from2,bcdata(nn)%jcend
             ad_from1 = bcdata(nn)%icbeg
@@ -96,7 +96,7 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           call pushinteger4(j - 1)
           call pushinteger4(ad_from2)
           call pushcontrol3b(3)
-        case (jmin) 
+        case (jmin)
           ad_from4 = bcdata(nn)%jcbeg
           do j=ad_from4,bcdata(nn)%jcend
             ad_from3 = bcdata(nn)%icbeg
@@ -107,7 +107,7 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           call pushinteger4(j - 1)
           call pushinteger4(ad_from4)
           call pushcontrol3b(4)
-        case (jmax) 
+        case (jmax)
           ad_from6 = bcdata(nn)%jcbeg
           do j=ad_from6,bcdata(nn)%jcend
             ad_from5 = bcdata(nn)%icbeg
@@ -118,7 +118,7 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           call pushinteger4(j - 1)
           call pushinteger4(ad_from6)
           call pushcontrol3b(5)
-        case (kmin) 
+        case (kmin)
           ad_from8 = bcdata(nn)%jcbeg
           do j=ad_from8,bcdata(nn)%jcend
             ad_from7 = bcdata(nn)%icbeg
@@ -129,7 +129,7 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           call pushinteger4(j - 1)
           call pushinteger4(ad_from8)
           call pushcontrol3b(6)
-        case (kmax) 
+        case (kmax)
           ad_from10 = bcdata(nn)%jcbeg
           do j=ad_from10,bcdata(nn)%jcend
             ad_from9 = bcdata(nn)%icbeg
@@ -325,8 +325,8 @@ subroutine applyallturbbcthisblock(secondhalo)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
       if (.not.wallfunctions) then
-        select case  (bcfaceid(nn)) 
-        case (imin) 
+        select case  (bcfaceid(nn))
+        case (imin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -338,7 +338,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (imax) 
+        case (imax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -350,7 +350,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmin) 
+        case (jmin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -362,7 +362,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmax) 
+        case (jmax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -374,7 +374,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmin) 
+        case (kmin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -386,7 +386,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmax) 
+        case (kmax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -450,15 +450,15 @@ subroutine bceddynowall_b(nn)
     real(kind=realtype) :: tmpd1
     real(kind=realtype) :: tmpd0
 ! determine the face id on which the subface and copy
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           revd(2, i, j) = revd(2, i, j) + revd(1, i, j)
           revd(1, i, j) = 0.0_8
         end do
       end do
-    case (imax) 
+    case (imax)
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd = revd(ie, i, j)
@@ -466,14 +466,14 @@ subroutine bceddynowall_b(nn)
           revd(il, i, j) = revd(il, i, j) + tmpd
         end do
       end do
-    case (jmin) 
+    case (jmin)
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           revd(i, 2, j) = revd(i, 2, j) + revd(i, 1, j)
           revd(i, 1, j) = 0.0_8
         end do
       end do
-    case (jmax) 
+    case (jmax)
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd0 = revd(i, je, j)
@@ -481,14 +481,14 @@ subroutine bceddynowall_b(nn)
           revd(i, jl, j) = revd(i, jl, j) + tmpd0
         end do
       end do
-    case (kmin) 
+    case (kmin)
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           revd(i, j, 2) = revd(i, j, 2) + revd(i, j, 1)
           revd(i, j, 1) = 0.0_8
         end do
       end do
-    case (kmax) 
+    case (kmax)
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd1 = revd(i, j, ke)
@@ -518,38 +518,38 @@ subroutine bceddynowall(nn)
 !
     integer(kind=inttype) :: i, j
 ! determine the face id on which the subface and copy
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(1, i, j) = rev(2, i, j)
         end do
       end do
-    case (imax) 
+    case (imax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(ie, i, j) = rev(il, i, j)
         end do
       end do
-    case (jmin) 
+    case (jmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, 1, j) = rev(i, 2, j)
         end do
       end do
-    case (jmax) 
+    case (jmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, je, j) = rev(i, jl, j)
         end do
       end do
-    case (kmin) 
+    case (kmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, 1) = rev(i, j, 2)
         end do
       end do
-    case (kmax) 
+    case (kmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, ke) = rev(i, j, kl)
@@ -589,13 +589,11 @@ subroutine bceddywall_b(nn)
     real(kind=realtype) :: tmpd0
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
-! in the halo cells. 
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+! in the halo cells.
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(1,i,j) = -rev(2,i,j)
-!  print "(f12.3)", safact(kssa, d2wall(2,i,j))
           call pushreal8(result1)
           result1 = safact(kssa, d2wall(2, i, j))
         end do
@@ -607,11 +605,9 @@ subroutine bceddywall_b(nn)
           call popreal8(result1)
         end do
       end do
-    case (imax) 
+    case (imax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(ie,i,j) = -rev(il,i,j)
-! print "(f12.3)", safact(kssa, d2wall(il,i,j))
           call pushreal8(result1)
           result1 = safact(kssa, d2wall(il, i, j))
         end do
@@ -624,11 +620,9 @@ subroutine bceddywall_b(nn)
           call popreal8(result1)
         end do
       end do
-    case (jmin) 
+    case (jmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,1,j) = -rev(i,2,j)
-! print "(f12.3)", safact(kssa, d2wall(i,1,j))
           call pushreal8(result1)
           result1 = safact(kssa, d2wall(i, 2, j))
         end do
@@ -640,11 +634,9 @@ subroutine bceddywall_b(nn)
           call popreal8(result1)
         end do
       end do
-    case (jmax) 
+    case (jmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,je,j) = -rev(i,jl,j)
-! print "(f12.3)", safact(kssa, d2wall(i,je,j))
           call pushreal8(result1)
           result1 = safact(kssa, d2wall(i, jl, j))
         end do
@@ -657,11 +649,9 @@ subroutine bceddywall_b(nn)
           call popreal8(result1)
         end do
       end do
-    case (kmin) 
+    case (kmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,j,1) = -rev(i,j,2)
-! print "(f12.3)", safact(kssa, d2wall(i,j,2))
           call pushreal8(result1)
           result1 = safact(kssa, d2wall(i, j, 2))
         end do
@@ -673,11 +663,9 @@ subroutine bceddywall_b(nn)
           call popreal8(result1)
         end do
       end do
-    case (kmax) 
+    case (kmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,j,ke) = -rev(i,j,kl)
-! print "(f12.3)", safact(kssa, d2wall(i,j,kl))
           call pushreal8(result1)
           result1 = safact(kssa, d2wall(i, j, kl))
         end do
@@ -714,58 +702,46 @@ subroutine bceddywall(nn)
     real(kind=realtype) :: result1
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
-! in the halo cells. 
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+! in the halo cells.
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(1,i,j) = -rev(2,i,j)
-!  print "(f12.3)", safact(kssa, d2wall(2,i,j))
           result1 = safact(kssa, d2wall(2, i, j))
           rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
-    case (imax) 
+    case (imax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(ie,i,j) = -rev(il,i,j)
-! print "(f12.3)", safact(kssa, d2wall(il,i,j))
           result1 = safact(kssa, d2wall(il, i, j))
           rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
-    case (jmin) 
+    case (jmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,1,j) = -rev(i,2,j)
-! print "(f12.3)", safact(kssa, d2wall(i,1,j))
           result1 = safact(kssa, d2wall(i, 2, j))
           rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
-    case (jmax) 
+    case (jmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,je,j) = -rev(i,jl,j)
-! print "(f12.3)", safact(kssa, d2wall(i,je,j))
           result1 = safact(kssa, d2wall(i, jl, j))
           rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
-    case (kmin) 
+    case (kmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,j,1) = -rev(i,j,2)
-! print "(f12.3)", safact(kssa, d2wall(i,j,2))
           result1 = safact(kssa, d2wall(i, j, 2))
           rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
-    case (kmax) 
+    case (kmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,j,ke) = -rev(i,j,kl)
-! print "(f12.3)", safact(kssa, d2wall(i,j,kl))
           result1 = safact(kssa, d2wall(i, j, kl))
           rev(i, j, ke) = result1*rev(i, j, kl)
         end do
@@ -800,23 +776,23 @@ subroutine bcturbinflow(nn)
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
 ! loop over the number of turbulent variables.
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bvti1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti1(i, j, l, l) = one
-          case (imax) 
+          case (imax)
             bvti2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti2(i, j, l, l) = one
-          case (jmin) 
+          case (jmin)
             bvtj1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj1(i, j, l, l) = one
-          case (jmax) 
+          case (jmax)
             bvtj2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj2(i, j, l, l) = one
-          case (kmin) 
+          case (kmin)
             bvtk1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk1(i, j, l, l) = one
-          case (kmax) 
+          case (kmax)
             bvtk2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk2(i, j, l, l) = one
           end select
@@ -853,18 +829,18 @@ subroutine bcturboutflow(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bmti1(i, j, l, l) = -one
-          case (imax) 
+          case (imax)
             bmti2(i, j, l, l) = -one
-          case (jmin) 
+          case (jmin)
             bmtj1(i, j, l, l) = -one
-          case (jmax) 
+          case (jmax)
             bmtj2(i, j, l, l) = -one
-          case (kmin) 
+          case (kmin)
             bmtk1(i, j, l, l) = -one
-          case (kmax) 
+          case (kmax)
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -906,16 +882,16 @@ subroutine bcturbtreatment_b()
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
-      select case  (bctype(nn)) 
-      case (nswalladiabatic, nswallisothermal) 
+      select case  (bctype(nn))
+      case (nswalladiabatic, nswallisothermal)
         call pushcontrol2b(2)
-      case (symm, symmpolar, eulerwall) 
+      case (symm, symmpolar, eulerwall)
         call pushcontrol2b(3)
-      case (farfield) 
+      case (farfield)
         call pushcontrol2b(1)
       case (slidinginterface, oversetouterbound, domaininterfaceall, &
 &     domaininterfacerhouvw, domaininterfacep, domaininterfacerho, &
-&     domaininterfacetotal) 
+&     domaininterfacetotal)
         call pushcontrol2b(0)
       case default
         call pushcontrol2b(3)
@@ -1005,18 +981,18 @@ subroutine bcturbfarfield_b(nn)
         else
 ! inflow. turbulent variables are prescribed.
           do l=nt1,nt2
-            select case  (bcfaceid(nn)) 
-            case (imin) 
+            select case  (bcfaceid(nn))
+            case (imin)
               call pushcontrol3b(5)
-            case (imax) 
+            case (imax)
               call pushcontrol3b(4)
-            case (jmin) 
+            case (jmin)
               call pushcontrol3b(3)
-            case (jmax) 
+            case (jmax)
               call pushcontrol3b(2)
-            case (kmin) 
+            case (kmin)
               call pushcontrol3b(1)
-            case (kmax) 
+            case (kmax)
               call pushcontrol3b(0)
             case default
               call pushcontrol3b(6)
@@ -1094,18 +1070,18 @@ subroutine bcturbinterface_b(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             call pushcontrol3b(5)
-          case (imax) 
+          case (imax)
             call pushcontrol3b(4)
-          case (jmin) 
+          case (jmin)
             call pushcontrol3b(3)
-          case (jmax) 
+          case (jmax)
             call pushcontrol3b(2)
-          case (kmin) 
+          case (kmin)
             call pushcontrol3b(1)
-          case (kmax) 
+          case (kmax)
             call pushcontrol3b(0)
           case default
             call pushcontrol3b(6)
@@ -1207,26 +1183,26 @@ subroutine bcturbtreatment()
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
-      select case  (bctype(nn)) 
-      case (nswalladiabatic, nswallisothermal) 
+      select case  (bctype(nn))
+      case (nswalladiabatic, nswallisothermal)
 ! viscous wall. there is no difference between an adiabatic
 ! and an isothermal wall for the turbulent equations.
 ! set the implicit treatment of the wall boundary conditions.
         call bcturbwall(nn)
-      case (symm, symmpolar, eulerwall) 
+      case (symm, symmpolar, eulerwall)
 !=============================================================
 !=============================================================
 ! symmetry, polar symmetry or inviscid wall. treatment of
 ! the turbulent equations is identical.
         call bcturbsymm(nn)
-      case (farfield) 
+      case (farfield)
 !=============================================================
 ! farfield. the kind of boundary condition to be applied,
 ! inflow or outflow, depends on the local conditions.
         call bcturbfarfield(nn)
       case (slidinginterface, oversetouterbound, domaininterfaceall, &
 &     domaininterfacerhouvw, domaininterfacep, domaininterfacerho, &
-&     domaininterfacetotal) 
+&     domaininterfacetotal)
 !=============================================================
 ! sliding mesh interface, overset outer boudaries, and
 ! domain interface with another code are not really boundary
@@ -1273,36 +1249,36 @@ subroutine bcturbfarfield(nn)
 ! outflow. simply extrapolation or zero neumann bc
 ! of the turbulent variables.
           do l=nt1,nt2
-            select case  (bcfaceid(nn)) 
-            case (imin) 
+            select case  (bcfaceid(nn))
+            case (imin)
               bmti1(i, j, l, l) = -one
-            case (imax) 
+            case (imax)
               bmti2(i, j, l, l) = -one
-            case (jmin) 
+            case (jmin)
               bmtj1(i, j, l, l) = -one
-            case (jmax) 
+            case (jmax)
               bmtj2(i, j, l, l) = -one
-            case (kmin) 
+            case (kmin)
               bmtk1(i, j, l, l) = -one
-            case (kmax) 
+            case (kmax)
               bmtk2(i, j, l, l) = -one
             end select
           end do
         else
 ! inflow. turbulent variables are prescribed.
           do l=nt1,nt2
-            select case  (bcfaceid(nn)) 
-            case (imin) 
+            select case  (bcfaceid(nn))
+            case (imin)
               bvti1(i, j, l) = winf(l)
-            case (imax) 
+            case (imax)
               bvti2(i, j, l) = winf(l)
-            case (jmin) 
+            case (jmin)
               bvtj1(i, j, l) = winf(l)
-            case (jmax) 
+            case (jmax)
               bvtj2(i, j, l) = winf(l)
-            case (kmin) 
+            case (kmin)
               bvtk1(i, j, l) = winf(l)
-            case (kmax) 
+            case (kmax)
               bvtk2(i, j, l) = winf(l)
             end select
           end do
@@ -1336,18 +1312,18 @@ subroutine bcturbinterface(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bvti1(i, j, l) = w(1, i, j, l)
-          case (imax) 
+          case (imax)
             bvti2(i, j, l) = w(ie, i, j, l)
-          case (jmin) 
+          case (jmin)
             bvtj1(i, j, l) = w(i, 1, j, l)
-          case (jmax) 
+          case (jmax)
             bvtj2(i, j, l) = w(i, je, j, l)
-          case (kmin) 
+          case (kmin)
             bvtk1(i, j, l) = w(i, j, 1, l)
-          case (kmax) 
+          case (kmax)
             bvtk2(i, j, l) = w(i, j, ke, l)
           end select
         end do
@@ -1381,18 +1357,18 @@ subroutine bcturbsymm(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bmti1(i, j, l, l) = -one
-          case (imax) 
+          case (imax)
             bmti2(i, j, l, l) = -one
-          case (jmin) 
+          case (jmin)
             bmtj1(i, j, l, l) = -one
-          case (jmax) 
+          case (jmax)
             bmtj2(i, j, l, l) = -one
-          case (kmin) 
+          case (kmin)
             bmtk1(i, j, l, l) = -one
-          case (kmax) 
+          case (kmax)
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -1471,10 +1447,10 @@ subroutine bcturbwall_b(nn)
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
-    select case  (turbmodel) 
-    case (spalartallmaras, spalartallmarasedwards) 
+    select case  (turbmodel)
+    case (spalartallmaras, spalartallmarasedwards)
 
-    case (komegawilcox, komegamodified, mentersst) 
+    case (komegawilcox, komegamodified, mentersst)
 !        ================================================================
 ! k-omega type of models. k is zero on the wall and thus the
 ! halo value is the negative of the first internal cell.
@@ -1486,8 +1462,8 @@ subroutine bcturbwall_b(nn)
 ! distance. due to the usage of the dd2wall pointer and the
 ! fact that the original d2wall array starts at 2, there is
 ! an offset of -1 present in dd2wall.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1551,7 +1527,7 @@ subroutine bcturbwall_b(nn)
             call popinteger4(jj)
           end if
         end do
-      case (imax) 
+      case (imax)
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1615,7 +1591,7 @@ subroutine bcturbwall_b(nn)
             call popinteger4(jj)
           end if
         end do
-      case (jmin) 
+      case (jmin)
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1679,7 +1655,7 @@ subroutine bcturbwall_b(nn)
             call popinteger4(jj)
           end if
         end do
-      case (jmax) 
+      case (jmax)
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1743,7 +1719,7 @@ subroutine bcturbwall_b(nn)
             call popinteger4(jj)
           end if
         end do
-      case (kmin) 
+      case (kmin)
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1807,7 +1783,7 @@ subroutine bcturbwall_b(nn)
             call popinteger4(jj)
           end if
         end do
-      case (kmax) 
+      case (kmax)
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1913,8 +1889,8 @@ subroutine turb2ndhalo_b(nn)
 ! some pointers accordingly.
 ! loop over the turbulent variables and set the second halo
 ! value. if this is an eddy model, also set the eddy viscosity.
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           if (eddymodel) then
@@ -1937,7 +1913,7 @@ subroutine turb2ndhalo_b(nn)
           end do
         end do
       end do
-    case (imax) 
+    case (imax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1963,7 +1939,7 @@ subroutine turb2ndhalo_b(nn)
           end do
         end do
       end do
-    case (jmin) 
+    case (jmin)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1987,7 +1963,7 @@ subroutine turb2ndhalo_b(nn)
           end do
         end do
       end do
-    case (jmax) 
+    case (jmax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2013,7 +1989,7 @@ subroutine turb2ndhalo_b(nn)
           end do
         end do
       end do
-    case (kmin) 
+    case (kmin)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2037,7 +2013,7 @@ subroutine turb2ndhalo_b(nn)
           end do
         end do
       end do
-    case (kmax) 
+    case (kmax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2087,8 +2063,8 @@ subroutine turb2ndhalo(nn)
 ! some pointers accordingly.
 ! loop over the turbulent variables and set the second halo
 ! value. if this is an eddy model, also set the eddy viscosity.
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           do l=nt1,nt2
@@ -2097,7 +2073,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(0, i, j) = rev(1, i, j)
         end do
       end do
-    case (imax) 
+    case (imax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2107,7 +2083,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(ib, i, j) = rev(ie, i, j)
         end do
       end do
-    case (jmin) 
+    case (jmin)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2117,7 +2093,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, 0, j) = rev(i, 1, j)
         end do
       end do
-    case (jmax) 
+    case (jmax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2127,7 +2103,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, jb, j) = rev(i, je, j)
         end do
       end do
-    case (kmin) 
+    case (kmin)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2137,7 +2113,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, j, 0) = rev(i, j, 1)
         end do
       end do
-    case (kmax) 
+    case (kmax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2174,8 +2150,8 @@ subroutine turbbcnswall(secondhalo)
       call bcturbwall(nn)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2192,7 +2168,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (imax) 
+      case (imax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2209,7 +2185,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmin) 
+      case (jmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2226,7 +2202,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmax) 
+      case (jmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2243,7 +2219,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmin) 
+      case (kmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2260,7 +2236,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmax) 
+      case (kmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2325,61 +2301,55 @@ subroutine bcturbwall(nn)
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
-    select case  (turbmodel) 
-    case (spalartallmaras, spalartallmarasedwards) 
+    select case  (turbmodel)
+    case (spalartallmaras, spalartallmarasedwards)
 ! spalart-allmaras type of model. value at the wall is zero,
 ! so simply negate the internal value.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmti1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(2, i, j))
             bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (imax) 
+      case (imax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmti2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(il, i, j))
             bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmin) 
+      case (jmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtj1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, 2, j))
             bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmax) 
+      case (jmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtj2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, jl, j))
             bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmin) 
+      case (kmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtk1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, j, 2))
             bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmax) 
+      case (kmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtk2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, j, kl))
             bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
-    case (komegawilcox, komegamodified, mentersst) 
+    case (komegawilcox, komegamodified, mentersst)
 !        ================================================================
 ! k-omega type of models. k is zero on the wall and thus the
 ! halo value is the negative of the first internal cell.
@@ -2391,8 +2361,8 @@ subroutine bcturbwall(nn)
 ! distance. due to the usage of the dd2wall pointer and the
 ! fact that the original d2wall array starts at 2, there is
 ! an offset of -1 present in dd2wall.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2424,7 +2394,7 @@ subroutine bcturbwall(nn)
             bvti1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (imax) 
+      case (imax)
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2456,7 +2426,7 @@ subroutine bcturbwall(nn)
             bvti2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmin) 
+      case (jmin)
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2488,7 +2458,7 @@ subroutine bcturbwall(nn)
             bvtj1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmax) 
+      case (jmax)
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2520,7 +2490,7 @@ subroutine bcturbwall(nn)
             bvtj2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmin) 
+      case (kmin)
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2552,7 +2522,7 @@ subroutine bcturbwall(nn)
             bvtk1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmax) 
+      case (kmax)
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2585,47 +2555,47 @@ subroutine bcturbwall(nn)
           end do
         end do
       end select
-    case (ktau) 
+    case (ktau)
 !        ================================================================
 ! k-tau model. both k and tau are zero at the wall, so the
 ! negative value of the internal cell is taken for the halo.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti1(i, j, itu1, itu1) = one
             bmti1(i, j, itu2, itu2) = one
           end do
         end do
-      case (imax) 
+      case (imax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti2(i, j, itu1, itu1) = one
             bmti2(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmin) 
+      case (jmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj1(i, j, itu1, itu1) = one
             bmtj1(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmax) 
+      case (jmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj2(i, j, itu1, itu1) = one
             bmtj2(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmin) 
+      case (kmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk1(i, j, itu1, itu1) = one
             bmtk1(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmax) 
+      case (kmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk2(i, j, itu1, itu1) = one
diff --git a/src/adjoint/outputReverseFast/sa_fast_b.f90 b/src/adjoint/outputReverseFast/sa_fast_b.f90
index 3de8c9f66..64351041a 100644
--- a/src/adjoint/outputReverseFast/sa_fast_b.f90
+++ b/src/adjoint/outputReverseFast/sa_fast_b.f90
@@ -88,7 +88,6 @@ subroutine sasource_fast_b()
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
 ! constants for sa rough
-! ks=0.0001 
     cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
@@ -187,16 +186,14 @@ subroutine sasource_fast_b()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough 
+! sa rough
         dnew = d2wall(i, j, k) + 0.03*kssa
         nu = rlv(i, j, k)/w(i, j, k, irho)
-! dist2inv = one/(d2wall(i,j,k)**2)
         dist2inv = one/dnew**2
         chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-! fv2      = one - chi/(one + chi*fv1)
         fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
@@ -505,7 +502,6 @@ subroutine sasource()
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
 ! constants for sa rough
-! ks=0.0001 
     cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
@@ -599,16 +595,14 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough 
+! sa rough
         dnew = d2wall(i, j, k) + 0.03*kssa
         nu = rlv(i, j, k)/w(i, j, k, irho)
-! dist2inv = one/(d2wall(i,j,k)**2)
         dist2inv = one/dnew**2
         chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-! fv2      = one - chi/(one + chi*fv1)
         fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
diff --git a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90 b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
index b7c934129..12bcc85e3 100644
--- a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
@@ -32,8 +32,8 @@ subroutine applyallturbbcthisblock(secondhalo)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
       if (.not.wallfunctions) then
-        select case  (bcfaceid(nn)) 
-        case (imin) 
+        select case  (bcfaceid(nn))
+        case (imin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -45,7 +45,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (imax) 
+        case (imax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -57,7 +57,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmin) 
+        case (jmin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -69,7 +69,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmax) 
+        case (jmax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -81,7 +81,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmin) 
+        case (kmin)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -93,7 +93,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmax) 
+        case (kmax)
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -147,38 +147,38 @@ subroutine bceddynowall(nn)
 !
     integer(kind=inttype) :: i, j
 ! determine the face id on which the subface and copy
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(1, i, j) = rev(2, i, j)
         end do
       end do
-    case (imax) 
+    case (imax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(ie, i, j) = rev(il, i, j)
         end do
       end do
-    case (jmin) 
+    case (jmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, 1, j) = rev(i, 2, j)
         end do
       end do
-    case (jmax) 
+    case (jmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, je, j) = rev(i, jl, j)
         end do
       end do
-    case (kmin) 
+    case (kmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, 1) = rev(i, j, 2)
         end do
       end do
-    case (kmax) 
+    case (kmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, ke) = rev(i, j, kl)
@@ -208,58 +208,46 @@ subroutine bceddywall(nn)
     real(kind=realtype) :: result1
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
-! in the halo cells. 
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+! in the halo cells.
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(1,i,j) = -rev(2,i,j)
-!  print "(f12.3)", safact(kssa, d2wall(2,i,j))
           result1 = safact(kssa, d2wall(2, i, j))
           rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
-    case (imax) 
+    case (imax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(ie,i,j) = -rev(il,i,j)
-! print "(f12.3)", safact(kssa, d2wall(il,i,j))
           result1 = safact(kssa, d2wall(il, i, j))
           rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
-    case (jmin) 
+    case (jmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,1,j) = -rev(i,2,j)
-! print "(f12.3)", safact(kssa, d2wall(i,1,j))
           result1 = safact(kssa, d2wall(i, 2, j))
           rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
-    case (jmax) 
+    case (jmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,je,j) = -rev(i,jl,j)
-! print "(f12.3)", safact(kssa, d2wall(i,je,j))
           result1 = safact(kssa, d2wall(i, jl, j))
           rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
-    case (kmin) 
+    case (kmin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,j,1) = -rev(i,j,2)
-! print "(f12.3)", safact(kssa, d2wall(i,j,2))
           result1 = safact(kssa, d2wall(i, j, 2))
           rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
-    case (kmax) 
+    case (kmax)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! rev(i,j,ke) = -rev(i,j,kl)
-! print "(f12.3)", safact(kssa, d2wall(i,j,kl))
           result1 = safact(kssa, d2wall(i, j, kl))
           rev(i, j, ke) = result1*rev(i, j, kl)
         end do
@@ -294,23 +282,23 @@ subroutine bcturbinflow(nn)
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
 ! loop over the number of turbulent variables.
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bvti1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti1(i, j, l, l) = one
-          case (imax) 
+          case (imax)
             bvti2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti2(i, j, l, l) = one
-          case (jmin) 
+          case (jmin)
             bvtj1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj1(i, j, l, l) = one
-          case (jmax) 
+          case (jmax)
             bvtj2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj2(i, j, l, l) = one
-          case (kmin) 
+          case (kmin)
             bvtk1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk1(i, j, l, l) = one
-          case (kmax) 
+          case (kmax)
             bvtk2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk2(i, j, l, l) = one
           end select
@@ -347,18 +335,18 @@ subroutine bcturboutflow(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bmti1(i, j, l, l) = -one
-          case (imax) 
+          case (imax)
             bmti2(i, j, l, l) = -one
-          case (jmin) 
+          case (jmin)
             bmtj1(i, j, l, l) = -one
-          case (jmax) 
+          case (jmax)
             bmtj2(i, j, l, l) = -one
-          case (kmin) 
+          case (kmin)
             bmtk1(i, j, l, l) = -one
-          case (kmax) 
+          case (kmax)
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -428,26 +416,26 @@ subroutine bcturbtreatment()
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
-      select case  (bctype(nn)) 
-      case (nswalladiabatic, nswallisothermal) 
+      select case  (bctype(nn))
+      case (nswalladiabatic, nswallisothermal)
 ! viscous wall. there is no difference between an adiabatic
 ! and an isothermal wall for the turbulent equations.
 ! set the implicit treatment of the wall boundary conditions.
         call bcturbwall(nn)
-      case (symm, symmpolar, eulerwall) 
+      case (symm, symmpolar, eulerwall)
 !=============================================================
 !=============================================================
 ! symmetry, polar symmetry or inviscid wall. treatment of
 ! the turbulent equations is identical.
         call bcturbsymm(nn)
-      case (farfield) 
+      case (farfield)
 !=============================================================
 ! farfield. the kind of boundary condition to be applied,
 ! inflow or outflow, depends on the local conditions.
         call bcturbfarfield(nn)
       case (slidinginterface, oversetouterbound, domaininterfaceall, &
 &     domaininterfacerhouvw, domaininterfacep, domaininterfacerho, &
-&     domaininterfacetotal) 
+&     domaininterfacetotal)
 !=============================================================
 ! sliding mesh interface, overset outer boudaries, and
 ! domain interface with another code are not really boundary
@@ -494,36 +482,36 @@ subroutine bcturbfarfield(nn)
 ! outflow. simply extrapolation or zero neumann bc
 ! of the turbulent variables.
           do l=nt1,nt2
-            select case  (bcfaceid(nn)) 
-            case (imin) 
+            select case  (bcfaceid(nn))
+            case (imin)
               bmti1(i, j, l, l) = -one
-            case (imax) 
+            case (imax)
               bmti2(i, j, l, l) = -one
-            case (jmin) 
+            case (jmin)
               bmtj1(i, j, l, l) = -one
-            case (jmax) 
+            case (jmax)
               bmtj2(i, j, l, l) = -one
-            case (kmin) 
+            case (kmin)
               bmtk1(i, j, l, l) = -one
-            case (kmax) 
+            case (kmax)
               bmtk2(i, j, l, l) = -one
             end select
           end do
         else
 ! inflow. turbulent variables are prescribed.
           do l=nt1,nt2
-            select case  (bcfaceid(nn)) 
-            case (imin) 
+            select case  (bcfaceid(nn))
+            case (imin)
               bvti1(i, j, l) = winf(l)
-            case (imax) 
+            case (imax)
               bvti2(i, j, l) = winf(l)
-            case (jmin) 
+            case (jmin)
               bvtj1(i, j, l) = winf(l)
-            case (jmax) 
+            case (jmax)
               bvtj2(i, j, l) = winf(l)
-            case (kmin) 
+            case (kmin)
               bvtk1(i, j, l) = winf(l)
-            case (kmax) 
+            case (kmax)
               bvtk2(i, j, l) = winf(l)
             end select
           end do
@@ -557,18 +545,18 @@ subroutine bcturbinterface(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bvti1(i, j, l) = w(1, i, j, l)
-          case (imax) 
+          case (imax)
             bvti2(i, j, l) = w(ie, i, j, l)
-          case (jmin) 
+          case (jmin)
             bvtj1(i, j, l) = w(i, 1, j, l)
-          case (jmax) 
+          case (jmax)
             bvtj2(i, j, l) = w(i, je, j, l)
-          case (kmin) 
+          case (kmin)
             bvtk1(i, j, l) = w(i, j, 1, l)
-          case (kmax) 
+          case (kmax)
             bvtk2(i, j, l) = w(i, j, ke, l)
           end select
         end do
@@ -602,18 +590,18 @@ subroutine bcturbsymm(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn)) 
-          case (imin) 
+          select case  (bcfaceid(nn))
+          case (imin)
             bmti1(i, j, l, l) = -one
-          case (imax) 
+          case (imax)
             bmti2(i, j, l, l) = -one
-          case (jmin) 
+          case (jmin)
             bmtj1(i, j, l, l) = -one
-          case (jmax) 
+          case (jmax)
             bmtj2(i, j, l, l) = -one
-          case (kmin) 
+          case (kmin)
             bmtk1(i, j, l, l) = -one
-          case (kmax) 
+          case (kmax)
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -642,8 +630,8 @@ subroutine turb2ndhalo(nn)
 ! some pointers accordingly.
 ! loop over the turbulent variables and set the second halo
 ! value. if this is an eddy model, also set the eddy viscosity.
-    select case  (bcfaceid(nn)) 
-    case (imin) 
+    select case  (bcfaceid(nn))
+    case (imin)
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           do l=nt1,nt2
@@ -652,7 +640,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(0, i, j) = rev(1, i, j)
         end do
       end do
-    case (imax) 
+    case (imax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -662,7 +650,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(ib, i, j) = rev(ie, i, j)
         end do
       end do
-    case (jmin) 
+    case (jmin)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -672,7 +660,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, 0, j) = rev(i, 1, j)
         end do
       end do
-    case (jmax) 
+    case (jmax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -682,7 +670,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, jb, j) = rev(i, je, j)
         end do
       end do
-    case (kmin) 
+    case (kmin)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -692,7 +680,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, j, 0) = rev(i, j, 1)
         end do
       end do
-    case (kmax) 
+    case (kmax)
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -729,8 +717,8 @@ subroutine turbbcnswall(secondhalo)
       call bcturbwall(nn)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -747,7 +735,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (imax) 
+      case (imax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -764,7 +752,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmin) 
+      case (jmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -781,7 +769,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmax) 
+      case (jmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -798,7 +786,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmin) 
+      case (kmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -815,7 +803,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmax) 
+      case (kmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -880,61 +868,55 @@ subroutine bcturbwall(nn)
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
-    select case  (turbmodel) 
-    case (spalartallmaras, spalartallmarasedwards) 
+    select case  (turbmodel)
+    case (spalartallmaras, spalartallmarasedwards)
 ! spalart-allmaras type of model. value at the wall is zero,
 ! so simply negate the internal value.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmti1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(2, i, j))
             bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (imax) 
+      case (imax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmti2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(il, i, j))
             bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmin) 
+      case (jmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtj1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, 2, j))
             bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmax) 
+      case (jmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtj2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, jl, j))
             bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmin) 
+      case (kmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtk1(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, j, 2))
             bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmax) 
+      case (kmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-! bmtk2(i,j,itu1,itu1) = one
             result1 = safact(kssa, d2wall(i, j, kl))
             bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
-    case (komegawilcox, komegamodified, mentersst) 
+    case (komegawilcox, komegamodified, mentersst)
 !        ================================================================
 ! k-omega type of models. k is zero on the wall and thus the
 ! halo value is the negative of the first internal cell.
@@ -946,8 +928,8 @@ subroutine bcturbwall(nn)
 ! distance. due to the usage of the dd2wall pointer and the
 ! fact that the original d2wall array starts at 2, there is
 ! an offset of -1 present in dd2wall.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -979,7 +961,7 @@ subroutine bcturbwall(nn)
             bvti1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (imax) 
+      case (imax)
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1011,7 +993,7 @@ subroutine bcturbwall(nn)
             bvti2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmin) 
+      case (jmin)
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1043,7 +1025,7 @@ subroutine bcturbwall(nn)
             bvtj1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmax) 
+      case (jmax)
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1075,7 +1057,7 @@ subroutine bcturbwall(nn)
             bvtj2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmin) 
+      case (kmin)
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1107,7 +1089,7 @@ subroutine bcturbwall(nn)
             bvtk1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmax) 
+      case (kmax)
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1140,47 +1122,47 @@ subroutine bcturbwall(nn)
           end do
         end do
       end select
-    case (ktau) 
+    case (ktau)
 !        ================================================================
 ! k-tau model. both k and tau are zero at the wall, so the
 ! negative value of the internal cell is taken for the halo.
-      select case  (bcfaceid(nn)) 
-      case (imin) 
+      select case  (bcfaceid(nn))
+      case (imin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti1(i, j, itu1, itu1) = one
             bmti1(i, j, itu2, itu2) = one
           end do
         end do
-      case (imax) 
+      case (imax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti2(i, j, itu1, itu1) = one
             bmti2(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmin) 
+      case (jmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj1(i, j, itu1, itu1) = one
             bmtj1(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmax) 
+      case (jmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj2(i, j, itu1, itu1) = one
             bmtj2(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmin) 
+      case (kmin)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk1(i, j, itu1, itu1) = one
             bmtk1(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmax) 
+      case (kmax)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk2(i, j, itu1, itu1) = one
diff --git a/src/turbulence/sa.F90 b/src/turbulence/sa.F90
index 21192a2b4..7e6af09a3 100644
--- a/src/turbulence/sa.F90
+++ b/src/turbulence/sa.F90
@@ -107,7 +107,7 @@ subroutine saSource
 
     ! Local variables.
     integer(kind=intType) :: i, j, k, nn, ii
-    real(kind=realType) :: dnew,cr1         
+    real(kind=realType) :: dnew,cr1
     real(kind=realType) :: fv1, fv2, ft2
     real(kind=realType) :: ss, sst, nu, dist2Inv, chi, chi2, chi3
     real(kind=realType) :: rr, gg, gg6, termFw, fwSa, term1, term2
@@ -126,8 +126,7 @@ subroutine saSource
     cb3Inv  = one/rsaCb3
 
     ! constants for SA rough
-    ! ks=0.0001 
-    cr1=0.5 
+    cr1=0.5
 
     ! Determine the non-dimensional wheel speed of this block.
 
@@ -248,18 +247,16 @@ subroutine saSource
                 ! and nu) and the functions fv1 and fv2. The latter corrects
                 ! the production term near a viscous wall.
 
-                ! SA rough 
+                ! SA rough
                 dnew     = d2Wall(i,j,k) + 0.03*kssa
 
                 nu       = rlv(i,j,k)/w(i,j,k,irho)
-                ! dist2Inv = one/(d2Wall(i,j,k)**2)
                 dist2Inv = one/(dnew**2)
-                chi      = w(i,j,k,itu1)/nu + cr1*kssa/dnew 
+                chi      = w(i,j,k,itu1)/nu + cr1*kssa/dnew
                 chi2     = chi*chi
                 chi3     = chi*chi2
                 fv1      = chi3/(chi3+cv13)
-                ! fv2      = one - chi/(one + chi*fv1)
-                fv2      = one - w(i,j,k,itu1)/(nu + w(i,j,k,itu1)*fv1) 
+                fv2      = one - w(i,j,k,itu1)/(nu + w(i,j,k,itu1)*fv1)
 
                 ! The function ft2, which is designed to keep a laminar
                 ! solution laminar. When running in fully turbulent mode
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index c91325ace..278fa2074 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -326,14 +326,12 @@ subroutine bcEddyWall(nn)
 
     ! Determine the face id on which the subface is located and
     ! loop over the faces of the subface and set the eddy viscosity
-    ! in the halo cells. 
+    ! in the halo cells.
 
     select case (BCFaceid(nn))
     case (iMin)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             ! rev(1,i,j) = -rev(2,i,j)
-            !  print "(f12.3)", saFact(kssa, d2Wall(2,i,j))
              rev(1,i,j) = saFact(kssa, d2Wall(2,i,j))*rev(2,i,j)
           enddo
        enddo
@@ -341,8 +339,6 @@ subroutine bcEddyWall(nn)
     case (iMax)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             ! rev(ie,i,j) = -rev(il,i,j)
-            ! print "(f12.3)", saFact(kssa, d2Wall(il,i,j))
              rev(ie,i,j) = saFact(kssa, d2Wall(il,i,j))*rev(il,i,j)
           enddo
        enddo
@@ -350,8 +346,6 @@ subroutine bcEddyWall(nn)
     case (jMin)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             ! rev(i,1,j) = -rev(i,2,j)
-            ! print "(f12.3)", saFact(kssa, d2Wall(i,1,j))
              rev(i,1,j) = saFact(kssa, d2Wall(i,2,j))*rev(i,2,j)
           enddo
        enddo
@@ -359,8 +353,6 @@ subroutine bcEddyWall(nn)
     case (jMax)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             ! rev(i,je,j) = -rev(i,jl,j)
-            ! print "(f12.3)", saFact(kssa, d2Wall(i,je,j))
              rev(i,je,j) = saFact(kssa, d2Wall(i,jl,j))*rev(i,jl,j)
           enddo
        enddo
@@ -368,8 +360,6 @@ subroutine bcEddyWall(nn)
     case (kMin)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             ! rev(i,j,1) = -rev(i,j,2)
-            ! print "(f12.3)", saFact(kssa, d2Wall(i,j,2))
              rev(i,j,1) = saFact(kssa, d2Wall(i,j,2))*rev(i,j,2)
           enddo
        enddo
@@ -377,8 +367,6 @@ subroutine bcEddyWall(nn)
     case (kMax)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             ! rev(i,j,ke) = -rev(i,j,kl)
-            ! print "(f12.3)", saFact(kssa, d2Wall(i,j,kl))
              rev(i,j,ke) = saFact(kssa, d2Wall(i,j,kl))*rev(i,j,kl)
           enddo
        enddo
@@ -858,28 +846,24 @@ subroutine bcTurbWall(nn)
        case (iMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ! bmti1(i,j,itu1,itu1) = one
                 bmti1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(2,i,j))
              enddo
           enddo
        case (iMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ! bmti2(i,j,itu1,itu1) = one
                 bmti2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(il,i,j))
              enddo
           enddo
        case (jMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ! bmtj1(i,j,itu1,itu1) = one
                 bmtj1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,2,j))
              enddo
           enddo
        case (jMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ! bmtj2(i,j,itu1,itu1) = one
                 bmtj2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,jl,j))
              enddo
           enddo
@@ -887,7 +871,6 @@ subroutine bcTurbWall(nn)
        case (kMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ! bmtk1(i,j,itu1,itu1) = one
                 bmtk1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,j,2))
              enddo
           enddo
@@ -895,7 +878,6 @@ subroutine bcTurbWall(nn)
        case (kMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ! bmtk2(i,j,itu1,itu1) = one
                 bmtk2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,j,kl))
              enddo
           enddo
@@ -1422,7 +1404,7 @@ function  saFact(ks, d)
    ! local variablse
    real(kind=realType) :: ks
    real(kind=realType) :: d
-   
+
    if (ks .eq. zero) then
       saFact = -one
    else if (d .eq. zero) then

From 5d92cd7e8303d926ad353642f3d99cd60fa0edfe Mon Sep 17 00:00:00 2001
From: Marco Mangano <mmangano@umich.edu>
Date: Tue, 7 Sep 2021 16:57:52 -0400
Subject: [PATCH 15/60] removed unused fast_b files and updated gitignore

---
 .gitignore                                    |    6 +
 .../outputReverseFast/adjointextra_fast_b.f90 |  585 ----------
 .../outputReverseFast/bcdata_fast_b.f90       | 1019 -----------------
 .../oversetutilities_fast_b.f90               |  174 ---
 .../zipperintegrations_fast_b.f90             |  331 ------
 5 files changed, 6 insertions(+), 2109 deletions(-)
 delete mode 100644 src/adjoint/outputReverseFast/adjointextra_fast_b.f90
 delete mode 100644 src/adjoint/outputReverseFast/bcdata_fast_b.f90
 delete mode 100644 src/adjoint/outputReverseFast/oversetutilities_fast_b.f90
 delete mode 100644 src/adjoint/outputReverseFast/zipperintegrations_fast_b.f90

diff --git a/.gitignore b/.gitignore
index 03c447853..3805b34a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,12 @@ src/build/libadflow-f2pywrappers2.f90
 src/build/libadflowmodule.c
 src/build/importTest.py
 
+# unused fast_b routines generated by Tapenade
+src/adjoint/outputReverseFast/adjointextra_fast_b.f90
+src/adjoint/outputReverseFast/bcdata_fast_b.f90
+src/adjoint/outputReverseFast/oversetutilities_fast_b.f90
+src/adjoint/outputReverseFast/zipperintegrations_fast_b.f90
+
 # Regression test auxiliary files:
 input_files.tar.gz
 adflow_input_files.tar.gz
diff --git a/src/adjoint/outputReverseFast/adjointextra_fast_b.f90 b/src/adjoint/outputReverseFast/adjointextra_fast_b.f90
deleted file mode 100644
index 5cbb23259..000000000
--- a/src/adjoint/outputReverseFast/adjointextra_fast_b.f90
+++ /dev/null
@@ -1,585 +0,0 @@
-!        generated by tapenade     (inria, tropics team)
-!  tapenade 3.10 (r5363) -  9 sep 2014 09:53
-!
-module adjointextra_fast_b
-  implicit none
-
-contains
-  subroutine volume_block()
-! this is copy of metric.f90. it was necessary to copy this file
-! since there is debugging stuff in the original that is not
-! necessary for ad.
-    use constants
-    use blockpointers
-    use cgnsgrid
-    use communication
-    use inputtimespectral
-    implicit none
-!
-!      local parameter.
-!
-    real(kind=realtype), parameter :: thresvolume=1.e-2_realtype
-    real(kind=realtype), parameter :: halocellratio=1e-10_realtype
-!
-!      local variables.
-!
-    integer(kind=inttype) :: i, j, k, n, m, l, ii
-    integer(kind=inttype) :: mm
-    real(kind=realtype) :: fact, mult
-    real(kind=realtype) :: xp, yp, zp, vp1, vp2, vp3, vp4, vp5, vp6
-    real(kind=realtype) :: xxp, yyp, zzp
-    real(kind=realtype), dimension(3) :: v1, v2
-    intrinsic abs
-! compute the volumes. the hexahedron is split into 6 pyramids
-! whose volumes are computed. the volume is positive for a
-! right handed block.
-! initialize the volumes to zero. the reasons is that the second
-! level halo's must be initialized to zero and for convenience
-! all the volumes are set to zero.
-    vol = zero
-    do k=1,ke
-      n = k - 1
-      do j=1,je
-        m = j - 1
-        do i=1,ie
-          l = i - 1
-! compute the coordinates of the center of gravity.
-          xp = eighth*(x(i, j, k, 1)+x(i, m, k, 1)+x(i, m, n, 1)+x(i, j&
-&           , n, 1)+x(l, j, k, 1)+x(l, m, k, 1)+x(l, m, n, 1)+x(l, j, n&
-&           , 1))
-          yp = eighth*(x(i, j, k, 2)+x(i, m, k, 2)+x(i, m, n, 2)+x(i, j&
-&           , n, 2)+x(l, j, k, 2)+x(l, m, k, 2)+x(l, m, n, 2)+x(l, j, n&
-&           , 2))
-          zp = eighth*(x(i, j, k, 3)+x(i, m, k, 3)+x(i, m, n, 3)+x(i, j&
-&           , n, 3)+x(l, j, k, 3)+x(l, m, k, 3)+x(l, m, n, 3)+x(l, j, n&
-&           , 3))
-! compute the volumes of the 6 sub pyramids. the
-! arguments of volpym must be such that for a (regular)
-! right handed hexahedron all volumes are positive.
-          call volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), x(i, &
-&               j, n, 1), x(i, j, n, 2), x(i, j, n, 3), x(i, m, n, 1), x&
-&               (i, m, n, 2), x(i, m, n, 3), x(i, m, k, 1), x(i, m, k, 2&
-&               ), x(i, m, k, 3), vp1)
-          call volpym(x(l, j, k, 1), x(l, j, k, 2), x(l, j, k, 3), x(l, &
-&               m, k, 1), x(l, m, k, 2), x(l, m, k, 3), x(l, m, n, 1), x&
-&               (l, m, n, 2), x(l, m, n, 3), x(l, j, n, 1), x(l, j, n, 2&
-&               ), x(l, j, n, 3), vp2)
-          call volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), x(l, &
-&               j, k, 1), x(l, j, k, 2), x(l, j, k, 3), x(l, j, n, 1), x&
-&               (l, j, n, 2), x(l, j, n, 3), x(i, j, n, 1), x(i, j, n, 2&
-&               ), x(i, j, n, 3), vp3)
-          call volpym(x(i, m, k, 1), x(i, m, k, 2), x(i, m, k, 3), x(i, &
-&               m, n, 1), x(i, m, n, 2), x(i, m, n, 3), x(l, m, n, 1), x&
-&               (l, m, n, 2), x(l, m, n, 3), x(l, m, k, 1), x(l, m, k, 2&
-&               ), x(l, m, k, 3), vp4)
-          call volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), x(i, &
-&               m, k, 1), x(i, m, k, 2), x(i, m, k, 3), x(l, m, k, 1), x&
-&               (l, m, k, 2), x(l, m, k, 3), x(l, j, k, 1), x(l, j, k, 2&
-&               ), x(l, j, k, 3), vp5)
-          call volpym(x(i, j, n, 1), x(i, j, n, 2), x(i, j, n, 3), x(l, &
-&               j, n, 1), x(l, j, n, 2), x(l, j, n, 3), x(l, m, n, 1), x&
-&               (l, m, n, 2), x(l, m, n, 3), x(i, m, n, 1), x(i, m, n, 2&
-&               ), x(i, m, n, 3), vp6)
-! set the volume to 1/6 of the sum of the volumes of the
-! pyramid. remember that volpym computes 6 times the
-! volume.
-          vol(i, j, k) = sixth*(vp1+vp2+vp3+vp4+vp5+vp6)
-          if (vol(i, j, k) .ge. 0.) then
-            vol(i, j, k) = vol(i, j, k)
-          else
-            vol(i, j, k) = -vol(i, j, k)
-          end if
-        end do
-      end do
-    end do
-! some additional safety stuff for halo volumes.
-    do k=2,kl
-      do j=2,jl
-        if (vol(1, j, k)/vol(2, j, k) .lt. halocellratio) vol(1, j, k)&
-&          = vol(2, j, k)
-        if (vol(ie, j, k)/vol(il, j, k) .lt. halocellratio) vol(ie, j, k&
-&         ) = vol(il, j, k)
-      end do
-    end do
-    do k=2,kl
-      do i=1,ie
-        if (vol(i, 1, k)/vol(i, 2, k) .lt. halocellratio) vol(i, 1, k)&
-&          = vol(i, 2, k)
-        if (vol(i, je, k)/vol(i, jl, k) .lt. halocellratio) vol(i, je, k&
-&         ) = vol(i, jl, k)
-      end do
-    end do
-    do j=1,je
-      do i=1,ie
-        if (vol(i, j, 1)/vol(i, j, 2) .lt. halocellratio) vol(i, j, 1)&
-&          = vol(i, j, 2)
-        if (vol(i, j, ke)/vol(i, j, kl) .lt. halocellratio) vol(i, j, ke&
-&         ) = vol(i, j, kl)
-      end do
-    end do
-
-  contains
-    subroutine volpym(xa, ya, za, xb, yb, zb, xc, yc, zc, xd, yd, zd, &
-&     volume)
-!
-!         volpym computes 6 times the volume of a pyramid. node p,
-!         whose coordinates are set in the subroutine metric itself,
-!         is the top node and a-b-c-d is the quadrilateral surface.
-!         it is assumed that the cross product vca * vdb points in
-!         the direction of the top node. here vca is the diagonal
-!         running from node c to node a and vdb the diagonal from
-!         node d to node b.
-!
-      use precision
-      implicit none
-!
-!        function type.
-!
-      real(kind=realtype) :: volume
-!
-!        function arguments.
-!
-      real(kind=realtype), intent(in) :: xa, ya, za, xb, yb, zb
-      real(kind=realtype), intent(in) :: xc, yc, zc, xd, yd, zd
-      volume = (xp-fourth*(xa+xb+xc+xd))*((ya-yc)*(zb-zd)-(za-zc)*(yb-yd&
-&       )) + (yp-fourth*(ya+yb+yc+yd))*((za-zc)*(xb-xd)-(xa-xc)*(zb-zd))&
-&       + (zp-fourth*(za+zb+zc+zd))*((xa-xc)*(yb-yd)-(ya-yc)*(xb-xd))
-    end subroutine volpym
-  end subroutine volume_block
-  subroutine metric_block()
-    use constants
-    use blockpointers
-    implicit none
-! local variables.
-    integer(kind=inttype) :: i, j, k, n, m, l, ii
-    real(kind=realtype) :: fact
-    real(kind=realtype) :: xxp, yyp, zzp
-    real(kind=realtype), dimension(3) :: v1, v2
-    intrinsic mod
-! set the factor in the surface normals computation. for a
-! left handed block this factor is negative, such that the
-! normals still point in the direction of increasing index.
-! the formulae used later on assume a right handed block
-! and fact is used to correct this for a left handed block,
-! as well as the scaling factor of 0.5
-    if (righthanded) then
-      fact = half
-    else
-      fact = -half
-    end if
-!
-!  computation of the face normals in i-, j- and k-direction.
-!  formula's are valid for a right handed block; for a left
-!  handed block the correct orientation is obtained via fact.
-!  the normals point in the direction of increasing index.
-!  the absolute value of fact is 0.5, because the cross
-!  product of the two diagonals is twice the normal vector.
-!  note that also the normals of the first level halo cells
-!  are computed. these are needed for the viscous fluxes.
-!
-! projected areas of cell faces in the i direction.
-    do ii=0,ke*je*(ie+1)-1
-! 0:ie
-      i = mod(ii, ie + 1) + 0
-!1:je
-      j = mod(ii/(ie+1), je) + 1
-!1:ke
-      k = ii/((ie+1)*je) + 1
-      n = k - 1
-      m = j - 1
-! determine the two diagonal vectors of the face.
-      v1(1) = x(i, j, n, 1) - x(i, m, k, 1)
-      v1(2) = x(i, j, n, 2) - x(i, m, k, 2)
-      v1(3) = x(i, j, n, 3) - x(i, m, k, 3)
-      v2(1) = x(i, j, k, 1) - x(i, m, n, 1)
-      v2(2) = x(i, j, k, 2) - x(i, m, n, 2)
-      v2(3) = x(i, j, k, 3) - x(i, m, n, 3)
-! the face normal, which is the cross product of the two
-! diagonal vectors times fact; remember that fact is
-! either -0.5 or 0.5.
-      si(i, j, k, 1) = fact*(v1(2)*v2(3)-v1(3)*v2(2))
-      si(i, j, k, 2) = fact*(v1(3)*v2(1)-v1(1)*v2(3))
-      si(i, j, k, 3) = fact*(v1(1)*v2(2)-v1(2)*v2(1))
-    end do
-! projected areas of cell faces in the j direction
-    do ii=0,ke*(je+1)*ie-1
-! 1:ie
-      i = mod(ii, ie) + 1
-!0:je
-      j = mod(ii/ie, je + 1) + 0
-!1:ke
-      k = ii/(ie*(je+1)) + 1
-      n = k - 1
-      l = i - 1
-! determine the two diagonal vectors of the face.
-      v1(1) = x(i, j, n, 1) - x(l, j, k, 1)
-      v1(2) = x(i, j, n, 2) - x(l, j, k, 2)
-      v1(3) = x(i, j, n, 3) - x(l, j, k, 3)
-      v2(1) = x(l, j, n, 1) - x(i, j, k, 1)
-      v2(2) = x(l, j, n, 2) - x(i, j, k, 2)
-      v2(3) = x(l, j, n, 3) - x(i, j, k, 3)
-! the face normal, which is the cross product of the two
-! diagonal vectors times fact; remember that fact is
-! either -0.5 or 0.5.
-      sj(i, j, k, 1) = fact*(v1(2)*v2(3)-v1(3)*v2(2))
-      sj(i, j, k, 2) = fact*(v1(3)*v2(1)-v1(1)*v2(3))
-      sj(i, j, k, 3) = fact*(v1(1)*v2(2)-v1(2)*v2(1))
-    end do
-! projected areas of cell faces in the k direction.
-    do ii=0,(ke+1)*je*ie-1
-! 1:ie
-      i = mod(ii, ie) + 1
-!1:je
-      j = mod(ii/ie, je) + 1
-!0:ke
-      k = ii/(ie*je) + 0
-      m = j - 1
-      l = i - 1
-! determine the two diagonal vectors of the face.
-      v1(1) = x(i, j, k, 1) - x(l, m, k, 1)
-      v1(2) = x(i, j, k, 2) - x(l, m, k, 2)
-      v1(3) = x(i, j, k, 3) - x(l, m, k, 3)
-      v2(1) = x(l, j, k, 1) - x(i, m, k, 1)
-      v2(2) = x(l, j, k, 2) - x(i, m, k, 2)
-      v2(3) = x(l, j, k, 3) - x(i, m, k, 3)
-! the face normal, which is the cross product of the two
-! diagonal vectors times fact; remember that fact is
-! either -0.5 or 0.5.
-      sk(i, j, k, 1) = fact*(v1(2)*v2(3)-v1(3)*v2(2))
-      sk(i, j, k, 2) = fact*(v1(3)*v2(1)-v1(1)*v2(3))
-      sk(i, j, k, 3) = fact*(v1(1)*v2(2)-v1(2)*v2(1))
-    end do
-  end subroutine metric_block
-  subroutine boundarynormals()
-!  the unit normals on the boundary faces. these always point
-!  out of the domain, so a multiplication by -1 is needed for
-!  the imin, jmin and kmin boundaries.
-!
-    use constants
-    use blockpointers
-    use cgnsgrid
-    use communication
-    use inputtimespectral
-    implicit none
-! local variables.
-    integer(kind=inttype) :: i, j, ii
-    integer(kind=inttype) :: mm
-    real(kind=realtype) :: fact, mult
-    real(kind=realtype) :: xxp, yyp, zzp
-    intrinsic mod
-    intrinsic sqrt
-!loop over the boundary subfaces of this block.
-bocoloop:do mm=1,nbocos
-! loop over the boundary faces of the subface.
-      do ii=0,(bcdata(mm)%jcend-bcdata(mm)%jcbeg+1)*(bcdata(mm)%icend-&
-&         bcdata(mm)%icbeg+1)-1
-        i = mod(ii, bcdata(mm)%icend - bcdata(mm)%icbeg + 1) + bcdata(mm&
-&         )%icbeg
-        j = ii/(bcdata(mm)%icend-bcdata(mm)%icbeg+1) + bcdata(mm)%jcbeg
-        select case  (bcfaceid(mm)) 
-        case (imin) 
-          mult = -one
-          xxp = si(1, i, j, 1)
-          yyp = si(1, i, j, 2)
-          zzp = si(1, i, j, 3)
-        case (imax) 
-          mult = one
-          xxp = si(il, i, j, 1)
-          yyp = si(il, i, j, 2)
-          zzp = si(il, i, j, 3)
-        case (jmin) 
-          mult = -one
-          xxp = sj(i, 1, j, 1)
-          yyp = sj(i, 1, j, 2)
-          zzp = sj(i, 1, j, 3)
-        case (jmax) 
-          mult = one
-          xxp = sj(i, jl, j, 1)
-          yyp = sj(i, jl, j, 2)
-          zzp = sj(i, jl, j, 3)
-        case (kmin) 
-          mult = -one
-          xxp = sk(i, j, 1, 1)
-          yyp = sk(i, j, 1, 2)
-          zzp = sk(i, j, 1, 3)
-        case (kmax) 
-          mult = one
-          xxp = sk(i, j, kl, 1)
-          yyp = sk(i, j, kl, 2)
-          zzp = sk(i, j, kl, 3)
-        end select
-! compute the inverse of the length of the normal vector
-! and possibly correct for inward pointing.
-        fact = sqrt(xxp*xxp + yyp*yyp + zzp*zzp)
-        if (fact .gt. zero) fact = mult/fact
-! compute the unit normal.
-        bcdata(mm)%norm(i, j, 1) = fact*xxp
-        bcdata(mm)%norm(i, j, 2) = fact*yyp
-        bcdata(mm)%norm(i, j, 3) = fact*zzp
-      end do
-    end do bocoloop
-  end subroutine boundarynormals
-  subroutine xhalo_block()
-!
-!       xhalo determines the coordinates of the nodal halo's.
-!       first it sets all halo coordinates by simple extrapolation,
-!       then the symmetry planes are treated (also the unit normal of
-!       symmetry planes are determined) and finally an exchange is
-!       made for the internal halo's.
-!
-    use constants
-    use blockpointers
-    use communication
-    use inputtimespectral
-    implicit none
-!
-!      local variables.
-!
-    integer(kind=inttype) :: mm, i, j, k
-    integer(kind=inttype) :: ibeg, iend, jbeg, jend, iimax, jjmax
-    logical :: err
-    real(kind=realtype) :: length, dot
-    real(kind=realtype), dimension(3) :: v1, v2, norm
-    intrinsic sqrt
-! extrapolation in i-direction.
-    do k=1,kl
-      do j=1,jl
-        x(0, j, k, 1) = two*x(1, j, k, 1) - x(2, j, k, 1)
-        x(0, j, k, 2) = two*x(1, j, k, 2) - x(2, j, k, 2)
-        x(0, j, k, 3) = two*x(1, j, k, 3) - x(2, j, k, 3)
-        x(ie, j, k, 1) = two*x(il, j, k, 1) - x(nx, j, k, 1)
-        x(ie, j, k, 2) = two*x(il, j, k, 2) - x(nx, j, k, 2)
-        x(ie, j, k, 3) = two*x(il, j, k, 3) - x(nx, j, k, 3)
-      end do
-    end do
-! extrapolation in j-direction.
-    do k=1,kl
-      do i=0,ie
-        x(i, 0, k, 1) = two*x(i, 1, k, 1) - x(i, 2, k, 1)
-        x(i, 0, k, 2) = two*x(i, 1, k, 2) - x(i, 2, k, 2)
-        x(i, 0, k, 3) = two*x(i, 1, k, 3) - x(i, 2, k, 3)
-        x(i, je, k, 1) = two*x(i, jl, k, 1) - x(i, ny, k, 1)
-        x(i, je, k, 2) = two*x(i, jl, k, 2) - x(i, ny, k, 2)
-        x(i, je, k, 3) = two*x(i, jl, k, 3) - x(i, ny, k, 3)
-      end do
-    end do
-! extrapolation in k-direction.
-    do j=0,je
-      do i=0,ie
-        x(i, j, 0, 1) = two*x(i, j, 1, 1) - x(i, j, 2, 1)
-        x(i, j, 0, 2) = two*x(i, j, 1, 2) - x(i, j, 2, 2)
-        x(i, j, 0, 3) = two*x(i, j, 1, 3) - x(i, j, 2, 3)
-        x(i, j, ke, 1) = two*x(i, j, kl, 1) - x(i, j, nz, 1)
-        x(i, j, ke, 2) = two*x(i, j, kl, 2) - x(i, j, nz, 2)
-        x(i, j, ke, 3) = two*x(i, j, kl, 3) - x(i, j, nz, 3)
-      end do
-    end do
-!
-!           mirror the halo coordinates adjacent to the symmetry
-!           planes
-!
-! loop over boundary subfaces.
-loopbocos:do mm=1,nbocos
-! the actual correction of the coordinates only takes
-! place for symmetry planes.
-      if (bctype(mm) .eq. symm) then
-! set some variables, depending on the block face on
-! which the subface is located.
-        norm(1) = bcdata(mm)%symnorm(1)
-        norm(2) = bcdata(mm)%symnorm(2)
-        norm(3) = bcdata(mm)%symnorm(3)
-        length = sqrt(norm(1)**2 + norm(2)**2 + norm(3)**2)
-! compute the unit normal of the subface.
-        norm(1) = norm(1)/length
-        norm(2) = norm(2)/length
-        norm(3) = norm(3)/length
-! see xhalo_block for comments for below:
-        if (length .gt. eps) then
-          select case  (bcfaceid(mm)) 
-          case (imin) 
-            ibeg = jnbeg(mm)
-            iend = jnend(mm)
-            iimax = jl
-            jbeg = knbeg(mm)
-            jend = knend(mm)
-            jjmax = kl
-            if (ibeg .eq. 1) ibeg = 0
-            if (iend .eq. iimax) iend = iimax + 1
-            if (jbeg .eq. 1) jbeg = 0
-            if (jend .eq. jjmax) jend = jjmax + 1
-            do j=jbeg,jend
-              do i=ibeg,iend
-                v1(1) = x(1, i, j, 1) - x(2, i, j, 1)
-                v1(2) = x(1, i, j, 2) - x(2, i, j, 2)
-                v1(3) = x(1, i, j, 3) - x(2, i, j, 3)
-                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
-                x(0, i, j, 1) = x(2, i, j, 1) + dot*norm(1)
-                x(0, i, j, 2) = x(2, i, j, 2) + dot*norm(2)
-                x(0, i, j, 3) = x(2, i, j, 3) + dot*norm(3)
-              end do
-            end do
-          case (imax) 
-            ibeg = jnbeg(mm)
-            iend = jnend(mm)
-            iimax = jl
-            jbeg = knbeg(mm)
-            jend = knend(mm)
-            jjmax = kl
-            if (ibeg .eq. 1) ibeg = 0
-            if (iend .eq. iimax) iend = iimax + 1
-            if (jbeg .eq. 1) jbeg = 0
-            if (jend .eq. jjmax) jend = jjmax + 1
-            do j=jbeg,jend
-              do i=ibeg,iend
-                v1(1) = x(il, i, j, 1) - x(nx, i, j, 1)
-                v1(2) = x(il, i, j, 2) - x(nx, i, j, 2)
-                v1(3) = x(il, i, j, 3) - x(nx, i, j, 3)
-                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
-                x(ie, i, j, 1) = x(nx, i, j, 1) + dot*norm(1)
-                x(ie, i, j, 2) = x(nx, i, j, 2) + dot*norm(2)
-                x(ie, i, j, 3) = x(nx, i, j, 3) + dot*norm(3)
-              end do
-            end do
-          case (jmin) 
-            ibeg = inbeg(mm)
-            iend = inend(mm)
-            iimax = il
-            jbeg = knbeg(mm)
-            jend = knend(mm)
-            jjmax = kl
-            if (ibeg .eq. 1) ibeg = 0
-            if (iend .eq. iimax) iend = iimax + 1
-            if (jbeg .eq. 1) jbeg = 0
-            if (jend .eq. jjmax) jend = jjmax + 1
-            do j=jbeg,jend
-              do i=ibeg,iend
-                v1(1) = x(i, 1, j, 1) - x(i, 2, j, 1)
-                v1(2) = x(i, 1, j, 2) - x(i, 2, j, 2)
-                v1(3) = x(i, 1, j, 3) - x(i, 2, j, 3)
-                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
-                x(i, 0, j, 1) = x(i, 2, j, 1) + dot*norm(1)
-                x(i, 0, j, 2) = x(i, 2, j, 2) + dot*norm(2)
-                x(i, 0, j, 3) = x(i, 2, j, 3) + dot*norm(3)
-              end do
-            end do
-          case (jmax) 
-            ibeg = inbeg(mm)
-            iend = inend(mm)
-            iimax = il
-            jbeg = knbeg(mm)
-            jend = knend(mm)
-            jjmax = kl
-            if (ibeg .eq. 1) ibeg = 0
-            if (iend .eq. iimax) iend = iimax + 1
-            if (jbeg .eq. 1) jbeg = 0
-            if (jend .eq. jjmax) jend = jjmax + 1
-            do j=jbeg,jend
-              do i=ibeg,iend
-                v1(1) = x(i, jl, j, 1) - x(i, ny, j, 1)
-                v1(2) = x(i, jl, j, 2) - x(i, ny, j, 2)
-                v1(3) = x(i, jl, j, 3) - x(i, ny, j, 3)
-                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
-                x(i, je, j, 1) = x(i, ny, j, 1) + dot*norm(1)
-                x(i, je, j, 2) = x(i, ny, j, 2) + dot*norm(2)
-                x(i, je, j, 3) = x(i, ny, j, 3) + dot*norm(3)
-              end do
-            end do
-          case (kmin) 
-            ibeg = inbeg(mm)
-            iend = inend(mm)
-            iimax = il
-            jbeg = jnbeg(mm)
-            jend = jnend(mm)
-            jjmax = jl
-            if (ibeg .eq. 1) ibeg = 0
-            if (iend .eq. iimax) iend = iimax + 1
-            if (jbeg .eq. 1) jbeg = 0
-            if (jend .eq. jjmax) jend = jjmax + 1
-            do j=jbeg,jend
-              do i=ibeg,iend
-                v1(1) = x(i, j, 1, 1) - x(i, j, 2, 1)
-                v1(2) = x(i, j, 1, 2) - x(i, j, 2, 2)
-                v1(3) = x(i, j, 1, 3) - x(i, j, 2, 3)
-                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
-                x(i, j, 0, 1) = x(i, j, 2, 1) + dot*norm(1)
-                x(i, j, 0, 2) = x(i, j, 2, 2) + dot*norm(2)
-                x(i, j, 0, 3) = x(i, j, 2, 3) + dot*norm(3)
-              end do
-            end do
-          case (kmax) 
-            ibeg = inbeg(mm)
-            iend = inend(mm)
-            iimax = il
-            jbeg = jnbeg(mm)
-            jend = jnend(mm)
-            jjmax = jl
-            if (ibeg .eq. 1) ibeg = 0
-            if (iend .eq. iimax) iend = iimax + 1
-            if (jbeg .eq. 1) jbeg = 0
-            if (jend .eq. jjmax) jend = jjmax + 1
-            do j=jbeg,jend
-              do i=ibeg,iend
-                v1(1) = x(i, j, kl, 1) - x(i, j, nz, 1)
-                v1(2) = x(i, j, kl, 2) - x(i, j, nz, 2)
-                v1(3) = x(i, j, kl, 3) - x(i, j, nz, 3)
-                dot = two*(v1(1)*norm(1)+v1(2)*norm(2)+v1(3)*norm(3))
-                x(i, j, ke, 1) = x(i, j, nz, 1) + dot*norm(1)
-                x(i, j, ke, 2) = x(i, j, nz, 2) + dot*norm(2)
-                x(i, j, ke, 3) = x(i, j, nz, 3) + dot*norm(3)
-              end do
-            end do
-          end select
-        end if
-      end if
-    end do loopbocos
-  end subroutine xhalo_block
-  subroutine resscale()
-    use constants
-    use blockpointers, only : il, jl, kl, nx, ny, nz, volref, dw
-    use flowvarrefstate, only : nwf, nt1, nt2
-    use inputiteration, only : turbresscale
-    implicit none
-! local variables
-    integer(kind=inttype) :: i, j, k, ii, nturb
-    real(kind=realtype) :: ovol
-    intrinsic mod
-! divide through by the reference volume
-    nturb = nt2 - nt1 + 1
-    do ii=0,nx*ny*nz-1
-      i = mod(ii, nx) + 2
-      j = mod(ii/nx, ny) + 2
-      k = ii/(nx*ny) + 2
-      ovol = one/volref(i, j, k)
-      dw(i, j, k, 1:nwf) = dw(i, j, k, 1:nwf)*ovol
-      dw(i, j, k, nt1:nt2) = dw(i, j, k, nt1:nt2)*ovol*turbresscale(1:&
-&       nturb)
-    end do
-  end subroutine resscale
-  subroutine sumdwandfw()
-    use constants
-    use blockpointers, only : il, jl, kl, dw, fw, iblank
-    use flowvarrefstate, only : nwf
-    implicit none
-! local variables
-    integer(kind=inttype) :: i, j, k, l
-    intrinsic real
-    intrinsic max
-    real(kind=realtype) :: x1
-    real(kind=realtype) :: max1
-    do l=1,nwf
-      do k=2,kl
-        do j=2,jl
-          do i=2,il
-            x1 = real(iblank(i, j, k), realtype)
-            if (x1 .lt. zero) then
-              max1 = zero
-            else
-              max1 = x1
-            end if
-            dw(i, j, k, l) = (dw(i, j, k, l)+fw(i, j, k, l))*max1
-          end do
-        end do
-      end do
-    end do
-  end subroutine sumdwandfw
-end module adjointextra_fast_b
diff --git a/src/adjoint/outputReverseFast/bcdata_fast_b.f90 b/src/adjoint/outputReverseFast/bcdata_fast_b.f90
deleted file mode 100644
index 361b759e9..000000000
--- a/src/adjoint/outputReverseFast/bcdata_fast_b.f90
+++ /dev/null
@@ -1,1019 +0,0 @@
-!        generated by tapenade     (inria, tropics team)
-!  tapenade 3.10 (r5363) -  9 sep 2014 09:53
-!
-module bcdata_fast_b
-  use constants
-  use bcdatamod
-  implicit none
-
-contains
-! ---------------------------------------------------------------
-! routines that set the appropriate variable names for bcs with
-! bcdata.
-  subroutine setbcvarnamesisothermalwall()
-    use cgnsnames
-    use constants
-    implicit none
-    nbcvar = nbcvarisothermalwall
-    bcvarnames(1) = cgnstemp
-  end subroutine setbcvarnamesisothermalwall
-  subroutine setbcvarnamessubsonicinflow()
-    use constants
-    use cgnsnames
-    use inputphysics, only : equations
-    use flowvarrefstate, only : nwt
-    implicit none
-!
-!      local variables.
-!
-    logical :: varallowed
-    nbcvar = nbcvarsubsonicinflow
-    if (equations .eq. ransequations) nbcvar = nbcvar + nwt
-    bcvarnames(1) = cgnsptot
-    bcvarnames(2) = cgnsttot
-    bcvarnames(3) = cgnsrhotot
-    bcvarnames(4) = cgnsvelanglex
-    bcvarnames(5) = cgnsvelangley
-    bcvarnames(6) = cgnsvelanglez
-    bcvarnames(7) = cgnsvelvecx
-    bcvarnames(8) = cgnsvelvecy
-    bcvarnames(9) = cgnsvelvecz
-    bcvarnames(10) = cgnsvelvecr
-    bcvarnames(11) = cgnsvelvectheta
-    bcvarnames(12) = cgnsdensity
-    bcvarnames(13) = cgnsvelx
-    bcvarnames(14) = cgnsvely
-    bcvarnames(15) = cgnsvelz
-    bcvarnames(16) = cgnsvelr
-    bcvarnames(17) = cgnsveltheta
-    call setbcvarnamesturb(17_inttype)
-  end subroutine setbcvarnamessubsonicinflow
-  subroutine setbcvarnamessubsonicoutflow()
-    use cgnsnames
-    use constants
-    use flowvarrefstate, only : nwt
-    implicit none
-    nbcvar = nbcvarsubsonicoutflow
-    bcvarnames(1) = cgnspressure
-  end subroutine setbcvarnamessubsonicoutflow
-  subroutine setbcvarnamessupersonicinflow()
-    use constants
-    use cgnsnames
-    use inputphysics, only : equations
-    use flowvarrefstate, only : nwt
-    implicit none
-    nbcvar = nbcvarsupersonicinflow
-    if (equations .eq. ransequations) nbcvar = nbcvar + nwt
-    bcvarnames(1) = cgnsdensity
-    bcvarnames(2) = cgnspressure
-    bcvarnames(3) = cgnsvelx
-    bcvarnames(4) = cgnsvely
-    bcvarnames(5) = cgnsvelz
-    bcvarnames(6) = cgnsvelr
-    bcvarnames(7) = cgnsveltheta
-    call setbcvarnamesturb(7_inttype)
-  end subroutine setbcvarnamessupersonicinflow
-  subroutine setbcvarnamesturb(offset)
-!
-!       setbcvarnamesturb sets the names for the turbulence
-!       variables to be determined. this depends on the turbulence
-!       model. if not the rans equations are solved an immediate
-!       return is made.
-!
-    use constants
-    use cgnsnames
-    use inputphysics, only : equations, turbmodel
-    implicit none
-!
-!      subroutine arguments.
-!
-    integer(kind=inttype), intent(in) :: offset
-! return immediately if not the rans equations are solved.
-    if (equations .ne. ransequations) then
-      return
-    else
-! determine the turbulence model and set the names accordingly.
-      select case  (turbmodel) 
-      case (spalartallmaras, spalartallmarasedwards) 
-        bcvarnames(offset+1) = cgnsturbsanu
-      case (komegawilcox, komegamodified, mentersst) 
-        bcvarnames(offset+1) = cgnsturbk
-        bcvarnames(offset+2) = cgnsturbomega
-      case (ktau) 
-        bcvarnames(offset+1) = cgnsturbk
-        bcvarnames(offset+2) = cgnsturbtau
-      case (v2f) 
-        bcvarnames(offset+1) = cgnsturbk
-        bcvarnames(offset+2) = cgnsturbepsilon
-        bcvarnames(offset+3) = cgnsturbv2
-        bcvarnames(offset+4) = cgnsturbf
-      end select
-    end if
-  end subroutine setbcvarnamesturb
-! ---------------------------------------------------------------
-! --------------------------------------
-!                utilities
-! --------------------------------------
-  subroutine computehtot(tt, ht)
-!
-!       computehtot computes the total enthalpy from the given total
-!       temperature. the total enthalpy is the integral of cp, which
-!       is a very simple expression for constant cp. for a variable cp
-!       it is a bit more work.
-!
-    use constants
-    use cpcurvefits
-    use communication, only : myid
-    use inputphysics, only : cpmodel, gammaconstant, rgasdim
-    use flowvarrefstate, only : pinfdim
-    implicit none
-!
-!      subroutine arguments.
-!
-    real(kind=realtype), intent(in) :: tt
-    real(kind=realtype), intent(out) :: ht
-!
-!      local variables.
-!
-    integer(kind=inttype) :: ii, nn, mm, start
-    real(kind=realtype) :: t2
-!        ================================================================
-! determine the cp model used in the computation.
-    select case  (cpmodel) 
-    case (cpconstant) 
-! constant cp. the total enthalpy is simply cp*tt.
-      ht = gammaconstant*rgasdim*tt/(gammaconstant-one)
-    end select
-  end subroutine computehtot
-  subroutine unitvectorscylsystem(boco)
-!
-!       unitvectorscylsystem determines the unit vectors of the
-!       local coordinate systen of the boundary face defined by the
-!       data in bcdatamod. in that local system the axial direction
-!       is rotation axis.
-!
-    use constants
-    use blockpointers, only : bcfaceid, bcdata, x, si, sj, sk, il, jl,&
-&   kl, sectionid
-    use section, only : sections
-    implicit none
-!
-!      subroutine arguments.
-!
-    integer(kind=inttype), intent(in) :: boco
-!
-!      local variables.
-!
-    integer(kind=inttype) :: i, j
-    real(kind=realtype) :: factinlet, var
-    real(kind=realtype), dimension(3) :: dir
-    real(kind=realtype), dimension(:, :, :), pointer :: ss
-    intrinsic abs
-    intrinsic sqrt
-    real(kind=realtype) :: abs0
-! set the pointers for coordinates and normals of the block
-! face on which this subface is located. set factinlet
-! such that factinlet*normals points into the domain.
-    select case  (bcfaceid(boco)) 
-    case (imin) 
-      xf => x(1, :, :, :)
-      ss => si(1, :, :, :)
-      factinlet = one
-    case (imax) 
-      xf => x(il, :, :, :)
-      ss => si(il, :, :, :)
-      factinlet = -one
-    case (jmin) 
-      xf => x(:, 1, :, :)
-      ss => sj(:, 1, :, :)
-      factinlet = one
-    case (jmax) 
-      xf => x(:, jl, :, :)
-      ss => sj(:, jl, :, :)
-      factinlet = -one
-    case (kmin) 
-      xf => x(:, :, 1, :)
-      ss => sk(:, :, 1, :)
-      factinlet = one
-    case (kmax) 
-      xf => x(:, :, kl, :)
-      ss => sk(:, :, kl, :)
-      factinlet = -one
-    end select
-! loop over the physical range of the subface to store the sum of
-! the normals. note that jbeg, jend, ibeg, iend cannot be used
-! here, because they may include the halo faces. instead the
-! nodal range is used, which defines the original subface. the
-! offset of +1 in the start index is there because you need
-! the face id's.
-    dir(1) = zero
-    dir(2) = zero
-    dir(3) = zero
-    do j=bcdata(boco)%jnbeg+1,bcdata(boco)%jnend
-      do i=bcdata(boco)%inbeg+1,bcdata(boco)%inend
-        dir(1) = dir(1) + ss(i, j, 1)
-        dir(2) = dir(2) + ss(i, j, 2)
-        dir(3) = dir(3) + ss(i, j, 3)
-      end do
-    end do
-! multiply by factinlet to make sure that the normal
-! is inward pointing.
-    dir(1) = dir(1)*factinlet
-    dir(2) = dir(2)*factinlet
-    dir(3) = dir(3)*factinlet
-! determine three unit vectors, which define the local cartesian
-! coordinate system of the rotation axis. first the axial
-! direction. if the axis cannot be determined from rotation info,
-! it is assumed to be the x-axis.
-    axis = sections(sectionid)%rotaxis
-    var = axis(1)**2 + axis(2)**2 + axis(3)**2
-    if (var .lt. half) then
-! no rotation axis specified. assume the x-axis
-! and set the logical axassumed to .true.
-      axis(1) = one
-      axis(2) = zero
-      axis(3) = zero
-      axassumed = .true.
-    end if
-! the axial axis must be such that it points into the
-! computational domain. if the dot product with dir is
-! negative the direction of axis should be reversed.
-    var = axis(1)*dir(1) + axis(2)*dir(2) + axis(3)*dir(3)
-    if (var .lt. zero) then
-      axis(1) = -axis(1)
-      axis(2) = -axis(2)
-      axis(3) = -axis(3)
-    end if
-    if (axis(2) .ge. 0.) then
-      abs0 = axis(2)
-    else
-      abs0 = -axis(2)
-    end if
-! two unit vectors define the radial plane. these vectors are
-! defined up to a constants. just pick a direction for the second
-! and create a unit vector normal to axis.
-    if (abs0 .lt. 0.707107_realtype) then
-      radvec1(1) = zero
-      radvec1(2) = one
-      radvec1(3) = zero
-    else
-      radvec1(1) = zero
-      radvec1(2) = zero
-      radvec1(3) = one
-    end if
-    var = radvec1(1)*axis(1) + radvec1(2)*axis(2) + radvec1(3)*axis(3)
-    radvec1(1) = radvec1(1) - var*axis(1)
-    radvec1(2) = radvec1(2) - var*axis(2)
-    radvec1(3) = radvec1(3) - var*axis(3)
-    var = one/sqrt(radvec1(1)**2+radvec1(2)**2+radvec1(3)**2)
-    radvec1(1) = radvec1(1)*var
-    radvec1(2) = radvec1(2)*var
-    radvec1(3) = radvec1(3)*var
-! the second vector of the radial plane is obtained
-! by taking the cross product of axis and radvec1.
-    radvec2(1) = axis(2)*radvec1(3) - axis(3)*radvec1(2)
-    radvec2(2) = axis(3)*radvec1(1) - axis(1)*radvec1(3)
-    radvec2(3) = axis(1)*radvec1(2) - axis(2)*radvec1(1)
-  end subroutine unitvectorscylsystem
-! ---------------------------------------------------------------
-! routines that set the actual bcdata values from the cgns data set
-! information.
-! ---------------------------------------------------------------
-  subroutine bcdataisothermalwall(boco, bcvararray, ibeg, iend, jbeg, &
-&   jend)
-!
-!       bcdataisothermalwall tries to extract the wall temperature
-!       for the currently active boundary face, which is an isothermal
-!       viscous wall.
-!
-    use constants
-    use cgnsnames
-    use blockpointers, only : bcfaceid, bcdata, nbkglobal
-    use utils_fast_b, only : terminate, sitemperature
-    use flowvarrefstate, only : tref
-    implicit none
-!
-!      subroutine arguments.
-!
-    integer(kind=inttype) :: boco
-    integer(kind=inttype) :: ibeg, iend, jbeg, jend
-    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
-&   bcvararray
-!
-!      local variables.
-!
-    integer :: ierr
-    integer(kind=inttype) :: i, j
-    real(kind=realtype) :: mult, trans
-    character(len=maxstringlen) :: errormessage
-    intrinsic trim
-! write an error message and terminate if it was not
-! possible to determine the temperature.
-    if (.not.bcvarpresent(1)) then
-      write(errormessage, 100) trim(cgnsdoms(nbkglobal)%zonename), trim(&
-&     cgnsdoms(nbkglobal)%bocoinfo(cgnsboco)%boconame)
-      call terminate('bcdataisothermalwall', errormessage)
-    end if
-! convert to si-units and store the temperature in tns_wall.
-    call sitemperature(temp(1), mult, trans)
-    do j=jbeg,jend
-      do i=ibeg,iend
-        bcdata(boco)%tns_wall(i, j) = (mult*bcvararray(i, j, 1)+trans)/&
-&         tref
-      end do
-    end do
- 100 format('zone ',a,', boundary subface ',a, &
-&          ': wall temperature not specified for isothermal wall')
-  end subroutine bcdataisothermalwall
-  subroutine bcdatasubsonicinflow(boco, bcvararray, ibeg, iend, jbeg, &
-&   jend, allturbpresent)
-!
-!       bcdatasubsonicinflow tries to extract the prescribed data
-!       for the currently active boundary face, which is a subsonic
-!       inflow. either total conditions and velocity direction or the
-!       velocity and density can be prescribed. in the latter case the
-!       mass flow is prescribed, which is okay as long as the flow is
-!       not choked.
-!
-    use constants
-    use cgnsnames
-    use blockpointers, only : nbkglobal, sectionid, bcfaceid, bcdata
-    use flowvarrefstate, only : tref, pref, href, rhoref, muref, nwt, &
-&   winf
-    use inputphysics, only : equations
-    use utils_fast_b, only : sidensity, sivelocity, sipressure, siangle, &
-&   sitemperature, terminate
-    implicit none
-!
-!      subroutine arguments.
-!
-    integer(kind=inttype), intent(in) :: boco
-    integer(kind=inttype) :: ibeg, iend, jbeg, jend
-    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
-&   bcvararray
-    logical, intent(inout) :: allturbpresent
-!
-!      local variables.
-!
-    integer :: ierr, nn
-    logical :: ptpresent, ttpresent, rhotpresent
-    logical :: axpresent, aypresent, azpresent
-    logical :: xdirpresent, ydirpresent, zdirpresent
-    logical :: rdirpresent, tdirpresent
-    logical :: velxpresent, velypresent, velzpresent
-    logical :: rhopresent, velrpresent, veltpresent
-    logical :: totpresent, velpresent, dirpresent
-    character(len=maxstringlen) :: errormessage
-    intrinsic trim
-! store the logicals, which indicate succes or failure
-! a bit more readable.
-    ptpresent = bcvarpresent(1)
-    ttpresent = bcvarpresent(2)
-    rhotpresent = bcvarpresent(3)
-    axpresent = bcvarpresent(4)
-    aypresent = bcvarpresent(5)
-    azpresent = bcvarpresent(6)
-    xdirpresent = bcvarpresent(7)
-    ydirpresent = bcvarpresent(8)
-    zdirpresent = bcvarpresent(9)
-    rdirpresent = bcvarpresent(10)
-    tdirpresent = bcvarpresent(11)
-    rhopresent = bcvarpresent(12)
-    velxpresent = bcvarpresent(13)
-    velypresent = bcvarpresent(14)
-    velzpresent = bcvarpresent(15)
-    velrpresent = bcvarpresent(16)
-    veltpresent = bcvarpresent(17)
-! check if the total conditions are present.
-    nn = 0
-    if (ptpresent) nn = nn + 1
-    if (ttpresent) nn = nn + 1
-    if (rhotpresent) nn = nn + 1
-    totpresent = .false.
-    if (nn .ge. 2) totpresent = .true.
-! check if a velocity direction is present.
-    dirpresent = .false.
-    if (xdirpresent .and. rdirpresent) dirpresent = .true.
-    if ((axpresent .or. xdirpresent) .and. (aypresent .or. ydirpresent) &
-&       .and. (azpresent .or. zdirpresent)) dirpresent = .true.
-! check if a velocity vector is present.
-    velpresent = .false.
-    if (velxpresent .and. velrpresent) velpresent = .true.
-    if (velxpresent .and. velypresent .and. velzpresent) velpresent = &
-&       .true.
-! determine the situation we have here.
-    if (totpresent .and. dirpresent) then
-! total conditions and velocity direction are prescribed.
-! determine the values for the faces of the subface.
-      call totalsubsonicinlet()
-    else
-! not enough data is prescribed. print an error message
-! and exit.
-      write(errormessage, 100) trim(cgnsdoms(nbkglobal)%zonename), trim(&
-&     cgnsdoms(nbkglobal)%bocoinfo(cgnsboco)%boconame)
-      call terminate('bcdatasubsonicinflow', errormessage)
-    end if
-! set the turbulence variables and check if all of them are
-! prescribed. if not set allturbpresent to .false.
-    allturbpresent = setbcvarturb(17_inttype, boco, bcvararray, ibeg, &
-&     iend, jbeg, jend, bcdata(boco)%turbinlet)
- 100 format('zone ',a,', boundary subface ',a, &
-&          ': not enough data specified for subsonic inlet')
-
-  contains
-!=================================================================
-!===============================================================
-    subroutine totalsubsonicinlet()
-!
-!         totalsubsonicinlet converts the prescribed total
-!         conditions and velocity direction into a useable format.
-!
-      use constants
-      use communication, only : adflow_comm_world
-      use inputphysics, only : rgasdim
-      use section, only : sections
-      implicit none
-!
-!        local variables.
-!
-      integer(kind=inttype) :: i, j, nn
-      real(kind=realtype) :: rhot, mult, trans, hdim, tdim
-      real(kind=realtype) :: ax, r1, r2, var, wax, wrad, wtheta
-      real(kind=realtype), dimension(3) :: xc, dir
-      integer :: ierr
-      intrinsic max
-      intrinsic sqrt
-      intrinsic cos
-      real(kind=realtype) :: max2
-      real(kind=realtype) :: max1
-      real(kind=realtype) :: y1
-! set the subsonic inlet treatment to totalconditions.
-      bcdata(boco)%subsonicinlettreatment = totalconditions
-! if the total pressure is present, convert it to si-units and
-! store it.
-      if (ptpresent) then
-        call sipressure(mass(1), length(1), time(1), mult, trans)
-        do j=jbeg,jend
-          do i=ibeg,iend
-            bcdata(boco)%ptinlet(i, j) = (mult*bcvararray(i, j, 1)+trans&
-&             )/pref
-          end do
-        end do
-      end if
-! if the total temperature is present, convert it to si-units
-! and store it.
-      if (ttpresent) then
-        call sitemperature(temp(2), mult, trans)
-        do j=jbeg,jend
-          do i=ibeg,iend
-            bcdata(boco)%ttinlet(i, j) = (mult*bcvararray(i, j, 2)+trans&
-&             )/tref
-          end do
-        end do
-      end if
-! check if the total density is present. if so, it may be used
-! to determine the total temperature or pressure if one of these
-! variables was not specified.
-      if (rhotpresent) then
-        call sidensity(mass(3), length(3), mult, trans)
-        if (ptpresent .and. (.not.ttpresent)) then
-! total pressure is present but total temperature is not.
-! convert the total density to si-units and use the perfect
-! gas law to obtain the total temperature.
-          do j=jbeg,jend
-            do i=ibeg,iend
-              rhot = mult*bcvararray(i, j, 3) + trans
-              bcdata(boco)%ttinlet(i, j) = bcdata(boco)%ptinlet(i, j)*&
-&               pref/(rgasdim*rhot)/tref
-            end do
-          end do
-        else if (ttpresent .and. (.not.ptpresent)) then
-! total temperature is present but total pressure is not.
-! convert the total density to si-units and use the perfect
-! gas law to obtain the total pressure.
-          do j=jbeg,jend
-            do i=ibeg,iend
-              rhot = mult*bcvararray(i, j, 3) + trans
-              bcdata(boco)%ptinlet(i, j) = rgasdim*rhot*bcdata(boco)%&
-&               ttinlet(i, j)*tref/pref
-            end do
-          end do
-        end if
-      end if
-! determine the velocity direction. there are multiple
-! possibilities to specify this direction.
-      if (rdirpresent) then
-! radial direction specified, i.e. a cylindrical coordinate
-! system is used for the velocity direction.
-! determine the unit vectors, which define the cylindrical
-! coordinate system aligned with the rotation axis.
-        call unitvectorscylsystem(boco)
-! initialize wtheta to zero. this value will be used if no
-! theta velocity component was specified.
-        wtheta = zero
-! loop over the faces of the subface.
-        do j=jbeg,jend
-          do i=ibeg,iend
-! determine the coordinates of the face center relative to
-! the rotation point of this section. normally this is an
-! average of i-1, i, j-1, j, but due to the usage of the
-! pointer xf and the fact that x originally starts at 0,
-! an offset of 1 is introduced and thus the average should
-! be taken of i, i+1, j and j+1.
-            xc(1) = fourth*(xf(i, j, 1)+xf(i+1, j, 1)+xf(i, j+1, 1)+xf(i&
-&             +1, j+1, 1)) - sections(sectionid)%rotcenter(1)
-            xc(2) = fourth*(xf(i, j, 2)+xf(i+1, j, 2)+xf(i, j+1, 2)+xf(i&
-&             +1, j+1, 2)) - sections(sectionid)%rotcenter(2)
-            xc(3) = fourth*(xf(i, j, 3)+xf(i+1, j, 3)+xf(i, j+1, 3)+xf(i&
-&             +1, j+1, 3)) - sections(sectionid)%rotcenter(3)
-! determine the coordinates in the local cartesian frame,
-! i.e. the frame determined by axis, radvec1 and radvec2.
-            ax = xc(1)*axis(1) + xc(2)*axis(2) + xc(3)*axis(3)
-            r1 = xc(1)*radvec1(1) + xc(2)*radvec1(2) + xc(3)*radvec1(3)
-            r2 = xc(1)*radvec2(1) + xc(2)*radvec2(2) + xc(3)*radvec2(3)
-! determine the weights of the unit vectors in the local
-! cylindrical system.
-            wax = bcvararray(i, j, 7)
-            wrad = bcvararray(i, j, 10)
-            if (tdirpresent) wtheta = bcvararray(i, j, 11)
-            if (eps .lt. r1*r1 + r2*r2) then
-              max1 = r1*r1 + r2*r2
-            else
-              max1 = eps
-            end if
-! determine the direction in the local cartesian frame,
-! determined by axis, radvec1 and radvec2.
-            var = one/sqrt(max1)
-            dir(1) = wax
-            dir(2) = var*(wrad*r1-wtheta*r2)
-            dir(3) = var*(wrad*r2+wtheta*r1)
-! transform this direction to the global cartesian frame.
-            bcdata(boco)%flowxdirinlet(i, j) = dir(1)*axis(1) + dir(2)*&
-&             radvec1(1) + dir(3)*radvec2(1)
-            bcdata(boco)%flowydirinlet(i, j) = dir(1)*axis(2) + dir(2)*&
-&             radvec1(2) + dir(3)*radvec2(2)
-            bcdata(boco)%flowzdirinlet(i, j) = dir(1)*axis(3) + dir(2)*&
-&             radvec1(3) + dir(3)*radvec2(3)
-          end do
-        end do
-      else
-! cartesian direction specified. either the angle or the
-! direction should be present.
-! x-direction.
-        if (axpresent) then
-! angle specified. convert it to si-units and determine
-! the corresponding direction.
-          call siangle(angle(4), mult, trans)
-          do j=jbeg,jend
-            do i=ibeg,iend
-              bcdata(boco)%flowxdirinlet(i, j) = cos(mult*bcvararray(i, &
-&               j, 4) + trans)
-            end do
-          end do
-        else
-! direction specified. simply copy it.
-          do j=jbeg,jend
-            do i=ibeg,iend
-              bcdata(boco)%flowxdirinlet(i, j) = bcvararray(i, j, 7)
-            end do
-          end do
-        end if
-! y-direction.
-        if (aypresent) then
-! angle specified. convert it to si-units and determine
-! the corresponding direction.
-          call siangle(angle(5), mult, trans)
-          do j=jbeg,jend
-            do i=ibeg,iend
-              bcdata(boco)%flowydirinlet(i, j) = cos(mult*bcvararray(i, &
-&               j, 5) + trans)
-            end do
-          end do
-        else
-! direction specified. simply copy it.
-          do j=jbeg,jend
-            do i=ibeg,iend
-              bcdata(boco)%flowydirinlet(i, j) = bcvararray(i, j, 8)
-            end do
-          end do
-        end if
-! z-direction.
-        if (azpresent) then
-! angle specified. convert it to si-units and determine
-! the corresponding direction.
-          call siangle(angle(6), mult, trans)
-          do j=jbeg,jend
-            do i=ibeg,iend
-              bcdata(boco)%flowzdirinlet(i, j) = cos(mult*bcvararray(i, &
-&               j, 6) + trans)
-            end do
-          end do
-        else
-! direction specified. simply copy it.
-          do j=jbeg,jend
-            do i=ibeg,iend
-              bcdata(boco)%flowzdirinlet(i, j) = bcvararray(i, j, 9)
-            end do
-          end do
-        end if
-      end if
-! loop over the faces of the subface to compute some
-! additional info.
-      do j=jbeg,jend
-        do i=ibeg,iend
-! compute the total enthalpy from the given
-! total temperature.
-          tdim = bcdata(boco)%ttinlet(i, j)*tref
-          call computehtot(tdim, hdim)
-          bcdata(boco)%htinlet(i, j) = hdim/href
-! determine the unit vector of the flow direction.
-          dir(1) = bcdata(boco)%flowxdirinlet(i, j)
-          dir(2) = bcdata(boco)%flowydirinlet(i, j)
-          dir(3) = bcdata(boco)%flowzdirinlet(i, j)
-          y1 = sqrt(dir(1)**2 + dir(2)**2 + dir(3)**2)
-          if (eps .lt. y1) then
-            max2 = y1
-          else
-            max2 = eps
-          end if
-          var = one/max2
-          bcdata(boco)%flowxdirinlet(i, j) = var*dir(1)
-          bcdata(boco)%flowydirinlet(i, j) = var*dir(2)
-          bcdata(boco)%flowzdirinlet(i, j) = var*dir(3)
-        end do
-      end do
-! check if the prescribed direction is an inflow. no halo's
-! should be included here and therefore the nodal range
-! (with an offset) must be used.
-      nn = 0
-      do j=bcdata(boco)%jnbeg+1,bcdata(boco)%jnend
-        do i=bcdata(boco)%inbeg+1,bcdata(boco)%inend
-          var = bcdata(boco)%flowxdirinlet(i, j)*bcdata(boco)%norm(i, j&
-&           , 1) + bcdata(boco)%flowydirinlet(i, j)*bcdata(boco)%norm(i&
-&           , j, 2) + bcdata(boco)%flowzdirinlet(i, j)*bcdata(boco)%norm&
-&           (i, j, 3)
-          if (var .gt. zero) nn = nn + 1
-        end do
-      end do
-    end subroutine totalsubsonicinlet
-  end subroutine bcdatasubsonicinflow
-  subroutine bcdatasubsonicoutflow(boco, bcvararray, ibeg, iend, jbeg, &
-&   jend)
-!
-!       bcdatasubsonicoutflow tries to extract the static pressure
-!       for the currently active boundary face, which is a subsonic
-!       outflow boundary.
-!
-    use constants
-    use cgnsnames
-    use blockpointers, only : bcdata, nbkglobal, bcfaceid
-    use utils_fast_b, only : terminate, sipressure
-    use flowvarrefstate, only : pref
-    implicit none
-!
-!      subroutine arguments.
-!
-    integer(kind=inttype) :: boco
-    integer(kind=inttype) :: ibeg, iend, jbeg, jend
-    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
-&   bcvararray
-!
-!      local variables.
-!
-    integer :: ierr
-    integer(kind=inttype) :: i, j
-    real(kind=realtype) :: mult, trans
-    character(len=maxstringlen) :: errormessage
-    intrinsic trim
-! write an error message and terminate if it was not
-! possible to determine the static pressure.
-    if (.not.bcvarpresent(1)) then
-      write(errormessage, 100) trim(cgnsdoms(nbkglobal)%zonename), trim(&
-&     cgnsdoms(nbkglobal)%bocoinfo(cgnsboco)%boconame)
-      call terminate('bcdatasubsonicoutflow', errormessage)
-    end if
-! convert to si-units and store the pressure in ps.
-    call sipressure(mass(1), length(1), time(1), mult, trans)
-    do j=jbeg,jend
-      do i=ibeg,iend
-        bcdata(boco)%ps(i, j) = (mult*bcvararray(i, j, 1)+trans)/pref
-      end do
-    end do
- 100 format('zone ',a,', boundary subface ',a, &
-&          ': static pressure not specified for subsonic outlet')
-  end subroutine bcdatasubsonicoutflow
-  subroutine bcdatasupersonicinflow(boco, bcvararray, ibeg, iend, jbeg, &
-&   jend, allflowpresent, allturbpresent)
-!
-!       bcdatasupersonicinflow tries to extract the primitive state
-!       vector for the currently active boundary face, which is a
-!       supersonic inflow.
-!
-    use constants
-    use cgnsnames
-    use blockpointers, only : bcdata, nbkglobal, bcfaceid, sectionid
-    use flowvarrefstate, only : nwt, pinfcorr, winf, uref, rhoref, &
-&   pref, muref
-    use inputphysics, only : equations, flowtype, veldirfreestream
-    use utils_fast_b, only : sidensity, sipressure, sivelocity, sitemperature&
-&   , terminate
-    implicit none
-!
-!      subroutine arguments.
-!
-    integer(kind=inttype), intent(in) :: boco
-    integer(kind=inttype) :: ibeg, iend, jbeg, jend
-    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
-&   bcvararray
-    logical, intent(inout) :: allflowpresent
-    logical, intent(inout) :: allturbpresent
-!
-!      local variables.
-!
-    integer :: ierr
-    integer(kind=inttype) :: i, j, nn
-    real(kind=realtype) :: var
-    character(len=maxstringlen) :: errormessage
-    logical :: rhopresent, ppresent, velpresent
-    logical :: velxpresent, velypresent, velzpresent
-    logical :: velrpresent, veltpresent
-    intrinsic trim
-! store the logicals, which indicate success or failure
-! a bit more readable.
-    rhopresent = bcvarpresent(1)
-    ppresent = bcvarpresent(2)
-    velxpresent = bcvarpresent(3)
-    velypresent = bcvarpresent(4)
-    velzpresent = bcvarpresent(5)
-    velrpresent = bcvarpresent(6)
-    veltpresent = bcvarpresent(7)
-! check if a velocity vector is present.
-    velpresent = .false.
-    if (velxpresent .and. velrpresent) velpresent = .true.
-    if (velxpresent .and. velypresent .and. velzpresent) velpresent = &
-&       .true.
-! check if rho, p and the velocity vector are present.
-    if (rhopresent .and. ppresent .and. velpresent) then
-! all the variables needed are prescribed. set them.
-      call prescribedsupersonicinlet()
-    else
-! not all variables are present. check what type of flow
-! is to be solved.
-      select case  (flowtype) 
-      case (internalflow) 
-! internal flow. data at the inlet must be specified;
-! no free stream data can be taken.
-        write(errormessage, 100) trim(cgnsdoms(nbkglobal)%zonename), &
-&       trim(cgnsdoms(nbkglobal)%bocoinfo(cgnsboco)%boconame)
-        call terminate('bcdatasupersonicinflow', errormessage)
-      case (externalflow) 
-!=============================================================
-! external flow. free stream data is used.
-        do j=jbeg,jend
-          do i=ibeg,iend
-            bcdata(boco)%rho(i, j) = winf(irho)
-            bcdata(boco)%velx(i, j) = winf(ivx)
-            bcdata(boco)%vely(i, j) = winf(ivy)
-            bcdata(boco)%velz(i, j) = winf(ivz)
-            bcdata(boco)%ps(i, j) = pinfcorr
-          end do
-        end do
-! set the turbulence values
-        allturbpresent = setbcvarturb(7_inttype, boco, bcvararray, ibeg&
-&         , iend, jbeg, jend, bcdata(boco)%turbinlet)
-! set allflowpresent to .false.
-        allflowpresent = .false.
-      end select
-    end if
-! check if the prescribed velocity is an inflow. no halo's
-! should be included here and therefore the nodal range
-! (with an offset) must be used.
-    nn = 0
-    do j=bcdata(boco)%jnbeg+1,bcdata(boco)%jnend
-      do i=bcdata(boco)%inbeg+1,bcdata(boco)%inend
-        var = bcdata(boco)%velx(i, j)*bcdata(boco)%norm(i, j, 1) + &
-&         bcdata(boco)%vely(i, j)*bcdata(boco)%norm(i, j, 2) + bcdata(&
-&         boco)%velz(i, j)*bcdata(boco)%norm(i, j, 3)
-        if (var .gt. zero) nn = nn + 1
-      end do
-    end do
-    if (nn .gt. 0) then
-      write(errormessage, 102) trim(cgnsdoms(nbkglobal)%zonename), trim(&
-&     cgnsdoms(nbkglobal)%bocoinfo(cgnsboco)%boconame)
-      call terminate('bcdatasupersonicinflow', errormessage)
-    end if
- 100 format('zone ',a,', boundary subface ',a, &
-&          ': not enough data specified for supersonic inlet')
- 102 format('zone ',a,', supersonic inlet boundary subface ',a, &
-&          ': velocity points out of the domain for some faces.')
-
-  contains
-    subroutine prescribedsupersonicinlet()
-!
-!         prescribedsupersonicinlet sets the variables for this
-!         supersonic inlet to prescribed values.
-!
-      use section, only : sections
-      implicit none
-!
-!        local variables.
-!
-      integer(kind=inttype) :: i, j
-      real(kind=realtype) :: mult, trans
-      real(kind=realtype) :: ax, r1, r2, var, vax, vrad, vtheta
-      real(kind=realtype), dimension(3) :: xc, vloc
-      real(kind=realtype), dimension(3) :: multvel, transvel
-      intrinsic max
-      intrinsic sqrt
-      real(kind=realtype) :: max1
-! set the density. take the conversion factor to si-units
-! into account.
-      call sidensity(mass(1), length(1), mult, trans)
-      do j=jbeg,jend
-        do i=ibeg,iend
-          bcdata(boco)%rho(i, j) = (mult*bcvararray(i, j, 1)+trans)/&
-&           rhoref
-        end do
-      end do
-! set the pressure. take the conversion factor to si-units
-! into account.
-      call sipressure(mass(1), length(2), time(2), mult, trans)
-      do j=jbeg,jend
-        do i=ibeg,iend
-          bcdata(boco)%ps(i, j) = (mult*bcvararray(i, j, 2)+trans)/pref
-        end do
-      end do
-! check the situation we are having here for the velocity.
-      if (velrpresent) then
-! radial velocity component prescribed. this must be converted
-! to cartesian components.
-! determine the unit vectors, which define the cylindrical
-! coordinate system aligned with the rotation axis.
-        call unitvectorscylsystem(boco)
-! determine the conversion factor to si-units for the three
-! components. note that a test must be made whether the theta
-! component is present.
-        call sivelocity(length(3), time(3), multvel(1), transvel(1))
-        call sivelocity(length(6), time(6), multvel(2), transvel(2))
-        if (veltpresent) call sivelocity(length(7), time(7), multvel(3)&
-&                                  , transvel(3))
-! initialize vtheta to zero. this value will be used
-! if no theta velocity component was specified.
-        vtheta = zero
-! loop over the faces of the subface.
-        do j=jbeg,jend
-          do i=ibeg,iend
-! determine the coordinates of the face center relative to
-! the rotation point of this section. normally this is an
-! average of i-1, i, j-1, j, but due to the usage of the
-! pointer xf and the fact that x originally starts at 0,
-! an offset of 1 is introduced and thus the average should
-! be taken of i, i+1, j and j+1.
-            xc(1) = fourth*(xf(i, j, 1)+xf(i+1, j, 1)+xf(i, j+1, 1)+xf(i&
-&             +1, j+1, 1)) - sections(sectionid)%rotcenter(1)
-            xc(2) = fourth*(xf(i, j, 2)+xf(i+1, j, 2)+xf(i, j+1, 2)+xf(i&
-&             +1, j+1, 2)) - sections(sectionid)%rotcenter(2)
-            xc(3) = fourth*(xf(i, j, 3)+xf(i+1, j, 3)+xf(i, j+1, 3)+xf(i&
-&             +1, j+1, 3)) - sections(sectionid)%rotcenter(3)
-! determine the coordinates in the local cartesian frame,
-! i.e. the frame determined by axis, radvec1 and radvec2.
-            ax = xc(1)*axis(1) + xc(2)*axis(2) + xc(3)*axis(3)
-            r1 = xc(1)*radvec1(1) + xc(2)*radvec1(2) + xc(3)*radvec1(3)
-            r2 = xc(1)*radvec2(1) + xc(2)*radvec2(2) + xc(3)*radvec2(3)
-! determine the velocity components in the local
-! cylindrical system. take the conversion to si units
-! into account.
-            vax = multvel(1)*bcvararray(i, j, 3) + transvel(1)
-            vrad = multvel(2)*bcvararray(i, j, 6) + transvel(2)
-            if (veltpresent) vtheta = multvel(3)*bcvararray(i, j, 7) + &
-&               transvel(3)
-            if (eps .lt. r1*r1 + r2*r2) then
-              max1 = r1*r1 + r2*r2
-            else
-              max1 = eps
-            end if
-! determine the velocities in the local cartesian
-! frame determined by axis, radvec1 and radvec2.
-            var = one/sqrt(max1)
-            vloc(1) = vax
-            vloc(2) = var*(vrad*r1-vtheta*r2)
-            vloc(3) = var*(vrad*r2+vtheta*r1)
-! transform vloc to the global cartesian frame and
-! store the values.
-            bcdata(boco)%velx(i, j) = (vloc(1)*axis(1)+vloc(2)*radvec1(1&
-&             )+vloc(3)*radvec2(1))/uref
-            bcdata(boco)%vely(i, j) = (vloc(1)*axis(2)+vloc(2)*radvec1(2&
-&             )+vloc(3)*radvec2(2))/uref
-            bcdata(boco)%velz(i, j) = (vloc(1)*axis(3)+vloc(2)*radvec1(3&
-&             )+vloc(3)*radvec2(3))/uref
-          end do
-        end do
-      else
-! cartesian components prescribed.
-! determine the conversion factor to si-units for the three
-! components.
-        call sivelocity(length(3), time(3), multvel(1), transvel(1))
-        call sivelocity(length(4), time(4), multvel(2), transvel(2))
-        call sivelocity(length(5), time(5), multvel(3), transvel(3))
-! set the velocities.
-        do j=jbeg,jend
-          do i=ibeg,iend
-            bcdata(boco)%velx(i, j) = (multvel(1)*bcvararray(i, j, 3)+&
-&             transvel(1))/uref
-            bcdata(boco)%vely(i, j) = (multvel(2)*bcvararray(i, j, 4)+&
-&             transvel(2))/uref
-            bcdata(boco)%velz(i, j) = (multvel(3)*bcvararray(i, j, 5)+&
-&             transvel(3))/uref
-          end do
-        end do
-      end if
-! set the turbulence variables and check if all of them are
-! prescribed. if not set allturbpresent to .false.
-      allturbpresent = setbcvarturb(7_inttype, boco, bcvararray, ibeg, &
-&       iend, jbeg, jend, bcdata(boco)%turbinlet)
-    end subroutine prescribedsupersonicinlet
-  end subroutine bcdatasupersonicinflow
-!=================================================================
-  logical function setbcvarturb(offset, boco, bcvararray, ibeg, iend, &
-&   jbeg, jend, turbinlet)
-!
-!       setbcvarturb sets the array for the turbulent halo data
-!       for inlet boundaries. this function returns .true. if all
-!       turbulence variables could be interpolated and .false.
-!       otherwise.
-!
-    use constants
-    use flowvarrefstate, only : nt1, nt2, muref, pref, rhoref, winf
-    use inputphysics, only : equations, turbmodel
-    use utils_fast_b, only : terminate, siturb
-    implicit none
-!
-!      subroutine arguments.
-!
-    integer(kind=inttype), intent(in) :: offset, boco, ibeg, iend, jbeg&
-&   , jend
-    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
-&   bcvararray
-    real(kind=realtype), dimension(:, :, :), pointer :: turbinlet
-!
-!      local variables.
-!
-    integer(kind=inttype) :: nn, mm, i, j
-    real(kind=realtype) :: mult, trans, nuref
-    real(kind=realtype), dimension(nt1:nt2) :: ref
-! initialize setbcvarturb to .true. and return immediately
-! if not the rans equations are solved.
-    setbcvarturb = .true.
-    if (equations .ne. ransequations) then
-      return
-    else
-! set the reference values depending on the turbulence model.
-      nuref = muref/rhoref
-      select case  (turbmodel) 
-      case (spalartallmaras, spalartallmarasedwards) 
-        ref(itu1) = nuref
-      case (komegawilcox, komegamodified, mentersst) 
-        ref(itu1) = pref/rhoref
-        ref(itu2) = ref(itu1)/nuref
-      case (ktau) 
-        ref(itu1) = pref/rhoref
-        ref(itu2) = nuref/ref(itu1)
-      case (v2f) 
-        ref(itu1) = pref/rhoref
-        ref(itu4) = ref(itu1)/nuref
-        ref(itu2) = ref(itu1)*ref(itu4)
-        ref(itu3) = ref(itu1)
-      end select
-! loop over the number of turbulent variables. mm is the counter
-! in the arrays bcvararray and bcvarpresent.
-      mm = offset
-turbloop:do nn=nt1,nt2
-        mm = mm + 1
-! check if the variable is present. if so, use the
-! interpolated data.
-        if (bcvarpresent(mm)) then
-! conversion to si units if possible.
-          call siturb(mass(mm), length(mm), time(mm), temp(mm), &
-&               bcvarnames(mm), mult, trans)
-! set the turbulent variables.
-          do j=jbeg,jend
-            do i=ibeg,iend
-              turbinlet(i, j, nn) = (mult*bcvararray(i, j, mm)+trans)/&
-&               ref(nn)
-            end do
-          end do
-        else
-! turbulent variable not present. use the free stream data.
-          do j=jbeg,jend
-            do i=ibeg,iend
-              turbinlet(i, j, nn) = winf(nn)
-            end do
-          end do
-! set the logical value to false to indicate that indeed not
-! all the values were present
-          setbcvarturb = .false.
-        end if
-      end do turbloop
-    end if
-  end function setbcvarturb
-end module bcdata_fast_b
diff --git a/src/adjoint/outputReverseFast/oversetutilities_fast_b.f90 b/src/adjoint/outputReverseFast/oversetutilities_fast_b.f90
deleted file mode 100644
index da6a09bc7..000000000
--- a/src/adjoint/outputReverseFast/oversetutilities_fast_b.f90
+++ /dev/null
@@ -1,174 +0,0 @@
-!        generated by tapenade     (inria, tropics team)
-!  tapenade 3.10 (r5363) -  9 sep 2014 09:53
-!
-module oversetutilities_fast_b
-  implicit none
-
-contains
-! --------------------------------------------------
-!           tapenade routine below this point
-! --------------------------------------------------
-  subroutine fractoweights(frac, weights)
-    use constants
-    implicit none
-    real(kind=realtype), dimension(3), intent(in) :: frac
-    real(kind=realtype), dimension(8), intent(out) :: weights
-    weights(1) = (one-frac(1))*(one-frac(2))*(one-frac(3))
-    weights(2) = frac(1)*(one-frac(2))*(one-frac(3))
-    weights(3) = (one-frac(1))*frac(2)*(one-frac(3))
-    weights(4) = frac(1)*frac(2)*(one-frac(3))
-    weights(5) = (one-frac(1))*(one-frac(2))*frac(3)
-    weights(6) = frac(1)*(one-frac(2))*frac(3)
-    weights(7) = (one-frac(1))*frac(2)*frac(3)
-    weights(8) = frac(1)*frac(2)*frac(3)
-  end subroutine fractoweights
-  subroutine fractoweights2(frac, weights)
-    use constants
-    implicit none
-    real(kind=realtype), dimension(3), intent(in) :: frac
-    real(kind=realtype), dimension(8), intent(out) :: weights
-    weights(1) = (one-frac(1))*(one-frac(2))*(one-frac(3))
-    weights(2) = frac(1)*(one-frac(2))*(one-frac(3))
-    weights(3) = frac(1)*frac(2)*(one-frac(3))
-    weights(4) = (one-frac(1))*frac(2)*(one-frac(3))
-    weights(5) = (one-frac(1))*(one-frac(2))*frac(3)
-    weights(6) = frac(1)*(one-frac(2))*frac(3)
-    weights(7) = frac(1)*frac(2)*frac(3)
-    weights(8) = (one-frac(1))*frac(2)*frac(3)
-  end subroutine fractoweights2
-  subroutine newtonupdate(xcen, blk, frac0, frac)
-! this routine performs the newton update to recompute the new
-! "frac" (u,v,w) for the point xcen. the actual search is performed
-! on the the dual cell formed by the cell centers of the 3x3x3 block
-! of primal nodes. this routine is ad'd with tapenade in both
-! forward and reverse.
-    use constants
-    implicit none
-! input
-    real(kind=realtype), dimension(3), intent(in) :: xcen
-    real(kind=realtype), dimension(3, 3, 3, 3), intent(in) :: blk
-    real(kind=realtype), dimension(3), intent(in) :: frac0
-! output
-    real(kind=realtype), dimension(3), intent(out) :: frac
-! working
-    real(kind=realtype), dimension(3, 8) :: xn
-    real(kind=realtype) :: u, v, w, uv, uw, vw, wvu, du, dv, dw
-    real(kind=realtype) :: a11, a12, a13, a21, a22, a23, a31, a32, a33, &
-&   val
-    real(kind=realtype) :: f(3), x(3)
-    integer(kind=inttype), dimension(8), parameter :: indices=(/1, 2, 4&
-&     , 3, 5, 6, 8, 7/)
-    integer(kind=inttype) :: i, j, k, ii, ll
-    real(kind=realtype), parameter :: adteps=1.e-25_realtype
-    real(kind=realtype), parameter :: thresconv=1.e-10_realtype
-    intrinsic sign
-    intrinsic abs
-    intrinsic max
-    intrinsic sqrt
-    real(kind=realtype) :: x1
-    real(kind=realtype) :: max1
-! compute the cell center locations for the 8 nodes describing the
-! dual cell. note that this must be counter-clockwise ordering.
-    ii = 0
-    do k=1,2
-      do j=1,2
-        do i=1,2
-          ii = ii + 1
-          xn(:, indices(ii)) = eighth*(blk(i, j, k, :)+blk(i+1, j, k, :)&
-&           +blk(i, j+1, k, :)+blk(i+1, j+1, k, :)+blk(i, j, k+1, :)+blk&
-&           (i+1, j, k+1, :)+blk(i, j+1, k+1, :)+blk(i+1, j+1, k+1, :))
-        end do
-      end do
-    end do
-! compute the coordinates relative to node 1.
-    do i=2,8
-      xn(:, i) = xn(:, i) - xn(:, 1)
-    end do
-! compute the location of our seach point relative to the first node.
-    x = xcen - xn(:, 1)
-! modify the coordinates of node 3, 6, 8 and 7 such that
-! they correspond to the weights of the u*v, u*w, v*w and
-! u*v*w term in the transformation respectively.
-    xn(1, 7) = xn(1, 7) + xn(1, 2) + xn(1, 4) + xn(1, 5) - xn(1, 3) - xn&
-&     (1, 6) - xn(1, 8)
-    xn(2, 7) = xn(2, 7) + xn(2, 2) + xn(2, 4) + xn(2, 5) - xn(2, 3) - xn&
-&     (2, 6) - xn(2, 8)
-    xn(3, 7) = xn(3, 7) + xn(3, 2) + xn(3, 4) + xn(3, 5) - xn(3, 3) - xn&
-&     (3, 6) - xn(3, 8)
-    xn(1, 3) = xn(1, 3) - xn(1, 2) - xn(1, 4)
-    xn(2, 3) = xn(2, 3) - xn(2, 2) - xn(2, 4)
-    xn(3, 3) = xn(3, 3) - xn(3, 2) - xn(3, 4)
-    xn(1, 6) = xn(1, 6) - xn(1, 2) - xn(1, 5)
-    xn(2, 6) = xn(2, 6) - xn(2, 2) - xn(2, 5)
-    xn(3, 6) = xn(3, 6) - xn(3, 2) - xn(3, 5)
-    xn(1, 8) = xn(1, 8) - xn(1, 4) - xn(1, 5)
-    xn(2, 8) = xn(2, 8) - xn(2, 4) - xn(2, 5)
-    xn(3, 8) = xn(3, 8) - xn(3, 4) - xn(3, 5)
-! set the starting values of u, v and w based on our previous values
-    u = frac0(1)
-    v = frac0(2)
-    w = frac0(3)
-! the newton algorithm to determine the parametric
-! weights u, v and w for the given coordinate.
-newtonhexa:do ll=1,15
-! compute the rhs.
-      uv = u*v
-      uw = u*w
-      vw = v*w
-      wvu = u*v*w
-      f(1) = xn(1, 2)*u + xn(1, 4)*v + xn(1, 5)*w + xn(1, 3)*uv + xn(1, &
-&       6)*uw + xn(1, 8)*vw + xn(1, 7)*wvu - x(1)
-      f(2) = xn(2, 2)*u + xn(2, 4)*v + xn(2, 5)*w + xn(2, 3)*uv + xn(2, &
-&       6)*uw + xn(2, 8)*vw + xn(2, 7)*wvu - x(2)
-      f(3) = xn(3, 2)*u + xn(3, 4)*v + xn(3, 5)*w + xn(3, 3)*uv + xn(3, &
-&       6)*uw + xn(3, 8)*vw + xn(3, 7)*wvu - x(3)
-! compute the jacobian.
-      a11 = xn(1, 2) + xn(1, 3)*v + xn(1, 6)*w + xn(1, 7)*vw
-      a12 = xn(1, 4) + xn(1, 3)*u + xn(1, 8)*w + xn(1, 7)*uw
-      a13 = xn(1, 5) + xn(1, 6)*u + xn(1, 8)*v + xn(1, 7)*uv
-      a21 = xn(2, 2) + xn(2, 3)*v + xn(2, 6)*w + xn(2, 7)*vw
-      a22 = xn(2, 4) + xn(2, 3)*u + xn(2, 8)*w + xn(2, 7)*uw
-      a23 = xn(2, 5) + xn(2, 6)*u + xn(2, 8)*v + xn(2, 7)*uv
-      a31 = xn(3, 2) + xn(3, 3)*v + xn(3, 6)*w + xn(3, 7)*vw
-      a32 = xn(3, 4) + xn(3, 3)*u + xn(3, 8)*w + xn(3, 7)*uw
-      a33 = xn(3, 5) + xn(3, 6)*u + xn(3, 8)*v + xn(3, 7)*uv
-! compute the determinant. make sure that it is not zero
-! and invert the value. the cut off is needed to be able
-! to handle exceptional cases for degenerate elements.
-      val = a11*(a22*a33-a32*a23) + a21*(a13*a32-a12*a33) + a31*(a12*a23&
-&       -a13*a22)
-      if (val .ge. 0.) then
-        x1 = val
-      else
-        x1 = -val
-      end if
-      if (x1 .lt. adteps) then
-        max1 = adteps
-      else
-        max1 = x1
-      end if
-      val = sign(one, val)/max1
-! compute the new values of u, v and w.
-      du = val*((a22*a33-a23*a32)*f(1)+(a13*a32-a12*a33)*f(2)+(a12*a23-&
-&       a13*a22)*f(3))
-      dv = val*((a23*a31-a21*a33)*f(1)+(a11*a33-a13*a31)*f(2)+(a13*a21-&
-&       a11*a23)*f(3))
-      dw = val*((a21*a32-a22*a31)*f(1)+(a12*a31-a11*a32)*f(2)+(a11*a22-&
-&       a12*a21)*f(3))
-      u = u - du
-      v = v - dv
-      w = w - dw
-! exit the loop if the update of the parametric
-! weights is below the threshold
-      val = sqrt(du*du + dv*dv + dw*dw)
-      if (val .le. thresconv) goto 100
-    end do newtonhexa
-! we would *like* that all solutions fall inside the hexa, but we
-! can't be picky here since we are not changing the donors. so
-! whatever the u,v,w is we have to accept. even if it is greater than
-! 1 or less than zero, it shouldn't be by much.
- 100 frac(1) = u
-    frac(2) = v
-    frac(3) = w
-  end subroutine newtonupdate
-end module oversetutilities_fast_b
diff --git a/src/adjoint/outputReverseFast/zipperintegrations_fast_b.f90 b/src/adjoint/outputReverseFast/zipperintegrations_fast_b.f90
deleted file mode 100644
index 1fc7e0a5c..000000000
--- a/src/adjoint/outputReverseFast/zipperintegrations_fast_b.f90
+++ /dev/null
@@ -1,331 +0,0 @@
-!        generated by tapenade     (inria, tropics team)
-!  tapenade 3.10 (r5363) -  9 sep 2014 09:53
-!
-module zipperintegrations_fast_b
-  implicit none
-
-contains
-  subroutine flowintegrationzipper(isinflow, conn, fams, vars, &
-&   localvalues, famlist, sps, ptvalid)
-! integrate over the trianges for the inflow/outflow conditions.
-    use constants
-    use blockpointers, only : bctype
-    use sorting, only : faminlist
-    use flowvarrefstate, only : pref, pinf, rhoref, pref, timeref, &
-&   lref, tref, rgas, uref, uinf, rhoinf
-    use inputphysics, only : pointref, flowtype
-    use flowutils_fast_b, only : computeptot, computettot
-    use surfacefamilies, only : familyexchange, bcfamexchange
-    use utils_fast_b, only : mynorm2, cross_prod
-    implicit none
-! input/output variables
-    logical, intent(in) :: isinflow
-    integer(kind=inttype), dimension(:, :), intent(in) :: conn
-    integer(kind=inttype), dimension(:), intent(in) :: fams
-    real(kind=realtype), dimension(:, :), intent(in) :: vars
-    real(kind=realtype), dimension(nlocalvalues), intent(inout) :: &
-&   localvalues
-    integer(kind=inttype), dimension(:), intent(in) :: famlist
-    integer(kind=inttype), intent(in) :: sps
-    logical(kind=inttype), dimension(:), optional, intent(in) :: ptvalid
-! working variables
-    integer(kind=inttype) :: i, j
-    real(kind=realtype) :: sf, vmag, vnm, vxm, vym, vzm, fx, fy, fz, u, &
-&   v, w, vnmfreestreamref
-    real(kind=realtype), dimension(3) :: fp, mp, fmom, mmom, refpoint, &
-&   ss, x1, x2, x3, norm, sfacecoordref
-    real(kind=realtype) :: pm, ptot, ttot, rhom, gammam, mnm, &
-&   massflowratelocal, am
-    real(kind=realtype) :: massflowrate, mass_ptot, mass_ttot, mass_ps, &
-&   mass_mn, mass_a, mass_rho, mass_vx, mass_vy, mass_vz, mass_nx, &
-&   mass_ny, mass_nz
-    real(kind=realtype) :: area, cellarea, overcellarea
-    real(kind=realtype) :: area_ptot, area_ps
-    real(kind=realtype) :: mredim
-    real(kind=realtype) :: internalflowfact, inflowfact, xc, yc, zc, mx&
-&   , my, mz
-    logical :: triisvalid
-    intrinsic sqrt
-    intrinsic size
-    intrinsic present
-    real(kind=realtype), dimension(3) :: arg1
-    real(kind=realtype), dimension(3) :: arg2
-    mredim = sqrt(pref*rhoref)
-    fp = zero
-    mp = zero
-    fmom = zero
-    mmom = zero
-    massflowrate = zero
-    area = zero
-    mass_ptot = zero
-    mass_ttot = zero
-    mass_ps = zero
-    mass_mn = zero
-    mass_a = zero
-    mass_rho = zero
-    mass_vx = zero
-    mass_vy = zero
-    mass_vz = zero
-    mass_nx = zero
-    mass_ny = zero
-    mass_nz = zero
-    area_ptot = zero
-    area_ps = zero
-    refpoint(1) = lref*pointref(1)
-    refpoint(2) = lref*pointref(2)
-    refpoint(3) = lref*pointref(3)
-    internalflowfact = one
-    if (flowtype .eq. internalflow) internalflowfact = -one
-    inflowfact = one
-    if (isinflow) inflowfact = -one
-    do i=1,size(conn, 2)
-      if (faminlist(fams(i), famlist)) then
-! if the ptvalid list is given, check if we should integrate
-! this triangle.
-        triisvalid = .true.
-        if (present(ptvalid)) then
-! check if each of the three nodes are valid
-          if (((ptvalid(conn(1, i)) .eqv. .false.) .or. (ptvalid(conn(2&
-&             , i)) .eqv. .false.)) .or. (ptvalid(conn(3, i)) .eqv. &
-&             .false.)) triisvalid = .false.
-        end if
-        if (triisvalid) then
-! compute the averaged values for this triangle
-          vxm = zero
-          vym = zero
-          vzm = zero
-          rhom = zero
-          pm = zero
-          mnm = zero
-          gammam = zero
-          sf = zero
-          do j=1,3
-            rhom = rhom + vars(conn(j, i), irho)
-            vxm = vxm + vars(conn(j, i), ivx)
-            vym = vym + vars(conn(j, i), ivy)
-            vzm = vzm + vars(conn(j, i), ivz)
-            pm = pm + vars(conn(j, i), irhoe)
-            gammam = gammam + vars(conn(j, i), izippflowgamma)
-            sf = sf + vars(conn(j, i), izippflowsface)
-          end do
-! divide by 3 due to the summation above:
-          rhom = third*rhom
-          vxm = third*vxm
-          vym = third*vym
-          vzm = third*vzm
-          pm = third*pm
-          gammam = third*gammam
-          sf = third*sf
-! get the nodes of triangle.
-          x1 = vars(conn(1, i), izippflowx:izippflowz)
-          x2 = vars(conn(2, i), izippflowx:izippflowz)
-          x3 = vars(conn(3, i), izippflowx:izippflowz)
-          arg1(:) = x2 - x1
-          arg2(:) = x3 - x1
-          call cross_prod(arg1(:), arg2(:), norm)
-          ss = half*norm
-          call computeptot(rhom, vxm, vym, vzm, pm, ptot)
-          call computettot(rhom, vxm, vym, vzm, pm, ttot)
-          vnm = vxm*ss(1) + vym*ss(2) + vzm*ss(3) - sf
-          vmag = sqrt(vxm**2 + vym**2 + vzm**2) - sf
-          am = sqrt(gammam*pm/rhom)
-          mnm = vmag/sqrt(gammam*pm/rhom)
-          cellarea = sqrt(ss(1)**2 + ss(2)**2 + ss(3)**2)
-          area = area + cellarea
-          overcellarea = 1/cellarea
-          massflowratelocal = rhom*vnm*mredim
-          massflowrate = massflowrate + massflowratelocal
-          pm = pm*pref
-          mass_ptot = mass_ptot + ptot*massflowratelocal*pref
-          mass_ttot = mass_ttot + ttot*massflowratelocal*tref
-          mass_rho = mass_rho + rhom*massflowratelocal*rhoref
-          mass_a = mass_a + am*massflowratelocal*uref
-          mass_ps = mass_ps + pm*massflowratelocal
-          mass_mn = mass_mn + mnm*massflowratelocal
-          area_ptot = area_ptot + ptot*pref*cellarea
-          area_ps = area_ps + pm*cellarea
-          sfacecoordref(1) = sf*ss(1)*overcellarea
-          sfacecoordref(2) = sf*ss(2)*overcellarea
-          sfacecoordref(3) = sf*ss(3)*overcellarea
-          mass_vx = mass_vx + (vxm*uref-sfacecoordref(1))*&
-&           massflowratelocal
-          mass_vy = mass_vy + (vym*uref-sfacecoordref(2))*&
-&           massflowratelocal
-          mass_vz = mass_vz + (vzm*uref-sfacecoordref(3))*&
-&           massflowratelocal
-          mass_nx = mass_nx + ss(1)*overcellarea*massflowratelocal
-          mass_ny = mass_ny + ss(2)*overcellarea*massflowratelocal
-          mass_nz = mass_nz + ss(3)*overcellarea*massflowratelocal
-! compute the average cell center.
-          xc = zero
-          yc = zero
-          zc = zero
-          do j=1,3
-            xc = xc + vars(conn(1, i), izippflowx)
-            yc = yc + vars(conn(2, i), izippflowy)
-            zc = zc + vars(conn(3, i), izippflowz)
-          end do
-! finish average for cell center
-          xc = third*xc
-          yc = third*yc
-          zc = third*zc
-          xc = xc - refpoint(1)
-          yc = yc - refpoint(2)
-          zc = zc - refpoint(3)
-          pm = -(pm-pinf*pref)
-          fx = pm*ss(1)
-          fy = pm*ss(2)
-          fz = pm*ss(3)
-! update the pressure force and moment coefficients.
-          fp(1) = fp(1) + fx
-          fp(2) = fp(2) + fy
-          fp(3) = fp(3) + fz
-          mx = yc*fz - zc*fy
-          my = zc*fx - xc*fz
-          mz = xc*fy - yc*fx
-          mp(1) = mp(1) + mx
-          mp(2) = mp(2) + my
-          mp(3) = mp(3) + mz
-! momentum forces
-! get unit normal vector.
-          ss = ss/cellarea
-          massflowratelocal = massflowratelocal/timeref*internalflowfact&
-&           *inflowfact
-          fx = massflowratelocal*ss(1)*vxm
-          fy = massflowratelocal*ss(2)*vym
-          fz = massflowratelocal*ss(3)*vzm
-          fmom(1) = fmom(1) - fx
-          fmom(2) = fmom(2) - fy
-          fmom(3) = fmom(3) - fz
-          mx = yc*fz - zc*fy
-          my = zc*fx - xc*fz
-          mz = xc*fy - yc*fx
-          mmom(1) = mmom(1) + mx
-          mmom(2) = mmom(2) + my
-          mmom(3) = mmom(3) + mz
-        end if
-      end if
-    end do
-! increment the local values array with what we computed here
-    localvalues(imassflow) = localvalues(imassflow) + massflowrate
-    localvalues(iarea) = localvalues(iarea) + area
-    localvalues(imassrho) = localvalues(imassrho) + mass_rho
-    localvalues(imassa) = localvalues(imassa) + mass_a
-    localvalues(imassptot) = localvalues(imassptot) + mass_ptot
-    localvalues(imassttot) = localvalues(imassttot) + mass_ttot
-    localvalues(imassps) = localvalues(imassps) + mass_ps
-    localvalues(imassmn) = localvalues(imassmn) + mass_mn
-    localvalues(ifp:ifp+2) = localvalues(ifp:ifp+2) + fp
-    localvalues(iflowfm:iflowfm+2) = localvalues(iflowfm:iflowfm+2) + &
-&     fmom
-    localvalues(iflowmp:iflowmp+2) = localvalues(iflowmp:iflowmp+2) + mp
-    localvalues(iflowmm:iflowmm+2) = localvalues(iflowmm:iflowmm+2) + &
-&     mmom
-    localvalues(iareaptot) = localvalues(iareaptot) + area_ptot
-    localvalues(iareaps) = localvalues(iareaps) + area_ps
-    localvalues(imassvx) = localvalues(imassvx) + mass_vx
-    localvalues(imassvy) = localvalues(imassvy) + mass_vy
-    localvalues(imassvz) = localvalues(imassvz) + mass_vz
-    localvalues(imassnx) = localvalues(imassnx) + mass_nx
-    localvalues(imassny) = localvalues(imassny) + mass_ny
-    localvalues(imassnz) = localvalues(imassnz) + mass_nz
-  end subroutine flowintegrationzipper
-  subroutine wallintegrationzipper(conn, fams, vars, localvalues, &
-&   famlist, sps)
-    use constants
-    use sorting, only : faminlist
-    use flowvarrefstate, only : lref
-    use inputphysics, only : pointref
-    use utils_fast_b, only : mynorm2, cross_prod
-    implicit none
-! input/output
-    integer(kind=inttype), dimension(:, :), intent(in) :: conn
-    integer(kind=inttype), dimension(:), intent(in) :: fams
-    real(kind=realtype), dimension(:, :), intent(in) :: vars
-    real(kind=realtype), intent(inout) :: localvalues(nlocalvalues)
-    integer(kind=inttype), dimension(:), intent(in) :: famlist
-    integer(kind=inttype), intent(in) :: sps
-! working
-    real(kind=realtype), dimension(3) :: fp, fv, mp, mv
-    integer(kind=inttype) :: i, j
-    real(kind=realtype), dimension(3) :: ss, norm, refpoint
-    real(kind=realtype), dimension(3) :: p1, p2, p3, v1, v2, v3, x1, x2&
-&   , x3
-    real(kind=realtype) :: fact, triarea, fx, fy, fz, mx, my, mz, xc, yc&
-&   , zc
-    intrinsic size
-    real(kind=realtype), dimension(3) :: arg1
-    real(kind=realtype), dimension(3) :: arg2
-    real(kind=realtype) :: result1
-! determine the reference point for the moment computation in
-! meters.
-    refpoint(1) = lref*pointref(1)
-    refpoint(2) = lref*pointref(2)
-    refpoint(3) = lref*pointref(3)
-    fp = zero
-    fv = zero
-    mp = zero
-    mv = zero
-    do i=1,size(conn, 2)
-      if (faminlist(fams(i), famlist)) then
-! get the nodes of triangle.
-        x1 = vars(conn(1, i), izippwallx:izippwallz)
-        x2 = vars(conn(2, i), izippwallx:izippwallz)
-        x3 = vars(conn(3, i), izippwallx:izippwallz)
-        arg1(:) = x2 - x1
-        arg2(:) = x3 - x1
-        call cross_prod(arg1(:), arg2(:), norm)
-        ss = half*norm
-! the third here is to account for the summation of p1, p2
-! and p3
-        result1 = mynorm2(ss)
-        triarea = result1*third
-! compute the average cell center.
-        xc = third*(x1(1)+x2(1)+x3(1))
-        yc = third*(x1(2)+x2(2)+x3(2))
-        zc = third*(x1(3)+x2(3)+x3(3))
-        xc = xc - refpoint(1)
-        yc = yc - refpoint(2)
-        zc = zc - refpoint(3)
-! update the pressure force and moment coefficients.
-        p1 = vars(conn(1, i), izippwalltpx:izippwalltpz)
-        p2 = vars(conn(2, i), izippwalltpx:izippwalltpz)
-        p3 = vars(conn(3, i), izippwalltpx:izippwalltpz)
-        fx = (p1(1)+p2(1)+p3(1))*triarea
-        fy = (p1(2)+p2(2)+p3(2))*triarea
-        fz = (p1(3)+p2(3)+p3(3))*triarea
-        fp(1) = fp(1) + fx
-        fp(2) = fp(2) + fy
-        fp(3) = fp(3) + fz
-        mx = yc*fz - zc*fy
-        my = zc*fx - xc*fz
-        mz = xc*fy - yc*fx
-        mp(1) = mp(1) + mx
-        mp(2) = mp(2) + my
-        mp(3) = mp(3) + mz
-! update the viscous force and moment coefficients
-        v1 = vars(conn(1, i), izippwalltvx:izippwalltvz)
-        v2 = vars(conn(2, i), izippwalltvx:izippwalltvz)
-        v3 = vars(conn(3, i), izippwalltvx:izippwalltvz)
-        fx = (v1(1)+v2(1)+v3(1))*triarea
-        fy = (v1(2)+v2(2)+v3(2))*triarea
-        fz = (v1(3)+v2(3)+v3(3))*triarea
-! note: momentum forces have opposite sign to pressure forces
-        fv(1) = fv(1) + fx
-        fv(2) = fv(2) + fy
-        fv(3) = fv(3) + fz
-        mx = yc*fz - zc*fy
-        my = zc*fx - xc*fz
-        mz = xc*fy - yc*fx
-        mv(1) = mv(1) + mx
-        mv(2) = mv(2) + my
-        mv(3) = mv(3) + mz
-      end if
-    end do
-! increment into the local vector
-    localvalues(ifp:ifp+2) = localvalues(ifp:ifp+2) + fp
-    localvalues(ifv:ifv+2) = localvalues(ifv:ifv+2) + fv
-    localvalues(imp:imp+2) = localvalues(imp:imp+2) + mp
-    localvalues(imv:imv+2) = localvalues(imv:imv+2) + mv
-  end subroutine wallintegrationzipper
-end module zipperintegrations_fast_b

From 24790650f267f43ebe1c202a66fef7f5bb3f8aef Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Mon, 13 Dec 2021 08:58:28 +0100
Subject: [PATCH 16/60] Renames 'kssa' to better match rest

---
 adflow/pyADflow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/adflow/pyADflow.py b/adflow/pyADflow.py
index f7cf90cfe..9f4301bb5 100644
--- a/adflow/pyADflow.py
+++ b/adflow/pyADflow.py
@@ -4687,7 +4687,7 @@ def _getDefaultOptions():
             "useQCR": [bool, False],
             "useRotationSA": [bool, False],
             "useft2SA": [bool, True],
-            'kssa': [float, 0.0],
+            'ksSA': [float, 0.0],
             "eddyVisInfRatio": [float, 0.009],
             "useWallFunctions": [bool, False],
             "useApproxWallDistance": [bool, True],

From 89932aaf057999cfba69c0691e3f377ca79678c4 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Fri, 2 Sep 2022 09:31:13 +0200
Subject: [PATCH 17/60] BC for nu_t should not be modified

---
 src/turbulence/turbBCRoutines.F90 | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 278fa2074..83f82c3d7 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -846,39 +846,39 @@ subroutine bcTurbWall(nn)
        case (iMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(2,i,j))
+                bmti1(i,j,itu1,itu1) = one
              enddo
           enddo
        case (iMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(il,i,j))
+                bmti2(i,j,itu1,itu1) = one
              enddo
           enddo
        case (jMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,2,j))
+                bmtj1(i,j,itu1,itu1) = one
              enddo
           enddo
        case (jMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,jl,j))
+                bmtj2(i,j,itu1,itu1) = one
              enddo
           enddo
 
        case (kMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,j,2))
+                bmtk1(i,j,itu1,itu1) = one
              enddo
           enddo
 
        case (kMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,j,kl))
+                bmtk2(i,j,itu1,itu1) = one
              enddo
           enddo
        end select

From 1eb84ab4e1f486f8fc4ce480aac1f0bfbee99fe1 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Tue, 13 Sep 2022 16:46:01 +0200
Subject: [PATCH 18/60] Ks may be set via CGNS BC now

This only implements the logic to read the ks value from the gridfile
and stores it in memory.
The same logic must be implemented for the isothermal wall aswell
---
 src/bcdata/BCData.F90     | 81 +++++++++++++++++++++++++++++++++++++--
 src/modules/block.F90     |  4 +-
 src/modules/cgnsNames.f90 |  3 ++
 src/modules/constants.F90 |  1 +
 src/utils/utils.F90       |  6 +++
 5 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/src/bcdata/BCData.F90 b/src/bcdata/BCData.F90
index 07ebfbe5a..5f0a65fa4 100644
--- a/src/bcdata/BCData.F90
+++ b/src/bcdata/BCData.F90
@@ -13,9 +13,19 @@ subroutine setBCVarNamesIsothermalWall
     implicit none
     nbcVar = nbcVarIsothermalWall
     bcVarNames(1) = cgnsTemp
+    ! TODO: Add Sand grain roughness !!!!
 
   end subroutine setBCVarNamesIsothermalWall
 
+  subroutine setBCVarNamesAdiabaticWall
+    use cgnsNames
+    use constants
+    implicit none
+    nbcVar = nbcVarAdiabaticWall
+    bcVarNames(1) = cgnsSandGrainRoughness
+
+  end subroutine setBCVarNamesAdiabaticWall
+
   subroutine setBCVarNamesSubsonicInflow
     use constants
     use cgnsNames
@@ -468,6 +478,42 @@ subroutine BCDataIsothermalWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
 
   end subroutine BCDataIsothermalWall
 
+  subroutine BCDataAdiabaticWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
+    !
+    !       Tries to extract the equivalent sand grain roughness. It sets
+    !       a default value of 0.0
+    !
+    use constants
+    use cgnsNames
+    use blockPointers, only : BCFaceID, BCData, nBKGlobal
+    implicit none
+    !
+    !      Subroutine arguments.
+    !
+    integer(kind=intType) :: boco
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+    real(kind=realType), dimension(iBeg:iEnd,jBeg:jEnd, nbcVarMax) :: bcVarArray
+    !
+    !      Local variables.
+    !
+    integer(kind=intType) :: i, j
+
+    ! Set a value of 0 if it was not possible to determine the
+    ! sand grain roughness
+
+    if(.not. bcVarPresent(1)) then
+       bcVarArray(:,:,1) = zero
+    endif
+
+    do j=jBeg,jEnd
+       do i=iBeg,iEnd
+          BCData(boco)%ksNS_Wall(i,j) = bcVarArray(i,j,1)
+       enddo
+    enddo
+
+  end subroutine BCDataAdiabaticWall
+
+
   subroutine BCDataSubsonicInflow(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd, allTurbPresent)
     !
     !       BCDataSubsonicInflow tries to extract the prescribed data
@@ -1446,8 +1492,12 @@ subroutine setBCData(bcDataNamesIn, bcDataIn, famLists, sps, &
 
                 select case (BCType(j))
 
+
+                case (NSWallAdiabatic)
+                   call setBCVarNamesAdiabaticWall
+                   call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
                 case (NSWallIsothermal)
-                   call setBCVarNamesSupersonicInflow
+                   call setBCVarNamesSupersonicInflow  ! possible bug?
                    call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
                 case (SupersonicInflow)
                    call setBCVarNamesSupersonicInflow
@@ -1544,8 +1594,11 @@ subroutine setBCData_d(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
 
                 select case (BCType(j))
 
+                case (NSWallAdiabatic)
+                   call setBCVarNamesAdiabaticWall
+                   call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
                 case (NSWallIsothermal)
-                   call setBCVarNamesSupersonicInflow
+                   call setBCVarNamesSupersonicInflow  ! possible bug?
                    call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
                 case (SupersonicInflow)
                    call setBCVarNamesSupersonicInflow
@@ -1649,8 +1702,11 @@ subroutine setBCData_b(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
 
                 select case (BCType(j))
 
+                case (NSWallAdiabatic)
+                   call setBCVarNamesAdiabaticWall
+                   call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
                 case (NSWallIsothermal)
-                   call setBCVarNamesSupersonicInflow
+                   call setBCVarNamesSupersonicInflow  ! possible bug?
                    call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
                 case (SupersonicInflow)
                    call setBCVarNamesSupersonicInflow
@@ -2248,6 +2304,7 @@ subroutine allocMemBCData
                 case (NSWallAdiabatic)
                    allocate(BCData(mm)%uSlip(iBeg:iEnd,jBeg:jEnd,3), &
                         BCData(mm)%uSlipALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd,3), &
+                        BCData(mm)%ksNS_Wall(iBeg:iEnd,jBeg:jEnd), &
                         BCData(mm)%F(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
                         BCData(mm)%T(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
                         BCData(mm)%Tp(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
@@ -2269,6 +2326,7 @@ subroutine allocMemBCData
                    allocate(BCData(mm)%uSlip(iBeg:iEnd,jBeg:jEnd,3),  &
                         BCData(mm)%uSlipALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd,3), &
                         BCData(mm)%TNS_Wall(iBeg:iEnd,jBeg:jEnd), &
+                        ! TODO: Add KS!
                         BCData(mm)%F(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
                         BCData(mm)%T(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
                         BCData(mm)%Tp(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
@@ -2640,6 +2698,7 @@ subroutine initBCData
                 nullify(BCData(j)%surfIndex)
                 nullify(BCData(j)%uSlip)
                 nullify(BCData(j)%TNS_Wall)
+                nullify(BCData(j)%ksNS_Wall)
                 nullify(BCData(j)%CpTarget)
 
                 nullify(BCData(j)%normALE)
@@ -2764,6 +2823,11 @@ subroutine setBCDataFineGrid(initializationPart)
 
              select case (BCType(j))
 
+             case (NSWallAdiabatic)
+                call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
+                call extractFromDataSet(bcVarArray)
+                call BCDataAdiabaticWall(j, bcVarArray, iBeg, iEnd, jBeg, jEnd)
+
              case (NSWallIsothermal)
                 call setBCVarNamesIsothermalWall ! sets bcVarNames and nbcVar
                 call extractFromDataSet(bcVarArray)
@@ -3018,6 +3082,11 @@ subroutine setBCDataFineGrid_d(initializationPart)
              ! call the appropriate routine.
              select case (BCType(j))
 
+!            case (NSWallAdiabatic)
+!               call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
+!               call extractFromDataSet_d(bcVarArray, bcVarArrayd)
+!               call BCDataAdiabaticWall_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
+
              case (NSWallIsothermal)
                 call setBCVarNamesIsothermalWall ! sets bcVarNames and nbcVar
                 call extractFromDataSet_d(bcVarArray, bcVarArrayd)
@@ -3114,6 +3183,12 @@ subroutine setBCDataFineGrid_b(initializationPart)
              ! call the appropriate routine.
              select case (BCType(j))
 
+!            case (NSWallAdiabatic)
+!               call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
+!               call extractFromDataSet(bcVarArray)
+!               call BCDataAdiabaticWall_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
+!               call extractFromDataSet_b(bcVarArray, bcVarArrayd)
+
              case (NSWallIsothermal)
                 call setBCVarNamesIsothermalWall ! sets bcVarNames and nbcVar
                 call extractFromDataSet(bcVarArray)
diff --git a/src/modules/block.F90 b/src/modules/block.F90
index 36d66d470..6d432e97c 100644
--- a/src/modules/block.F90
+++ b/src/modules/block.F90
@@ -95,10 +95,12 @@ module block
 
      ! uSlip(:,:,3):  the 3 components of the velocity vector on
      !                a viscous wall.
-     ! TNS_Wall(:,:): Wall temperature for isothermal walls.
+     ! KSTNS_Wall(:,:): Wall temperature for isothermal walls.
+     ! ksNS_Wall(:,:): Equivalent Sand Grain Roughness on viscous walls.
 
      real(kind=realType), dimension(:,:,:), pointer :: uSlip
      real(kind=realType), dimension(:,:),   pointer :: TNS_Wall
+     real(kind=realType), dimension(:,:),   pointer :: ksNS_Wall
 
      ! The name of this boundary condition and it's index
      character(maxCGNSNameLen) :: family
diff --git a/src/modules/cgnsNames.f90 b/src/modules/cgnsNames.f90
index 878905ce2..ede08cb60 100644
--- a/src/modules/cgnsNames.f90
+++ b/src/modules/cgnsNames.f90
@@ -171,6 +171,9 @@ module cgnsNames
        character(len=maxCGNSNameLen), parameter :: &
             cgnsIntermittency = "Intermittency"
 
+       character(len=maxCGNSNameLen), parameter :: &
+            cgnsSandGrainRoughness = "SandGrainRoughness"
+
 
 !
 !       Residual names.
diff --git a/src/modules/constants.F90 b/src/modules/constants.F90
index e076c1e5d..4f8a5a408 100644
--- a/src/modules/constants.F90
+++ b/src/modules/constants.F90
@@ -300,6 +300,7 @@ module constants
   integer(kind=intType), parameter :: nbcVarSubsonicInflow = 17
   integer(kind=intType), parameter :: nbcVarSubsonicOutflow = 1
   integer(kind=intType), parameter :: nbcVarSupersonicInflow = 7
+  integer(kind=intType), parameter :: nbcVarAdiabaticWall = 1
   integer(kind=intType), parameter :: nbcVarIsothermalWall = 1
 
   ! Indices of specific familyExcahnge groups based on BC
diff --git a/src/utils/utils.F90 b/src/utils/utils.F90
index 9738a202d..27ed3a56e 100644
--- a/src/utils/utils.F90
+++ b/src/utils/utils.F90
@@ -4709,6 +4709,7 @@ subroutine deallocDerivativeValues(level)
                   flowDomsd(nn, level, sps)%BCData(mm)%area, &
                   flowDomsd(nn, level, sps)%BCData(mm)%uSlip, &
                   flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall, &
+!                  flowDomsd(nn, level, sps)%BCData(mm)%ksNS_Wall, &
                   stat=ierr)
              call EChk(ierr,__FILE__,__LINE__)
           enddo
@@ -4937,6 +4938,10 @@ subroutine deallocateBlock(nn, level, sps)
             deallocate(BCData(i)%TNS_Wall, stat=ierr)
        if(ierr /= 0) deallocationFailure = .true.
 
+       if( associated(BCData(i)%ksNS_Wall) ) &
+            deallocate(BCData(i)%ksNS_Wall, stat=ierr)
+       if(ierr /= 0) deallocationFailure = .true.
+
        if( associated(BCData(i)%ptInlet) ) &
             deallocate(BCData(i)%ptInlet, stat=ierr)
        if(ierr /= 0) deallocationFailure = .true.
@@ -5016,6 +5021,7 @@ subroutine deallocateBlock(nn, level, sps)
 
        nullify(BCData(i)%uSlip)
        nullify(BCData(i)%TNS_Wall)
+       nullify(BCData(i)%ksNS_Wall)
 
        nullify(BCData(i)%normALE)
        nullify(BCData(i)%rfaceALE)

From c04caceefcfcf1a8699e58f1f3fdae349de17cb0 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Thu, 6 Oct 2022 14:28:19 +0200
Subject: [PATCH 19/60] Add ks BC for implicit treatment

---
 src/turbulence/turbBCRoutines.F90 | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 83f82c3d7..278fa2074 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -846,39 +846,39 @@ subroutine bcTurbWall(nn)
        case (iMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti1(i,j,itu1,itu1) = one
+                bmti1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(2,i,j))
              enddo
           enddo
        case (iMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti2(i,j,itu1,itu1) = one
+                bmti2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(il,i,j))
              enddo
           enddo
        case (jMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj1(i,j,itu1,itu1) = one
+                bmtj1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,2,j))
              enddo
           enddo
        case (jMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj2(i,j,itu1,itu1) = one
+                bmtj2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,jl,j))
              enddo
           enddo
 
        case (kMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk1(i,j,itu1,itu1) = one
+                bmtk1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,j,2))
              enddo
           enddo
 
        case (kMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk2(i,j,itu1,itu1) = one
+                bmtk2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,j,kl))
              enddo
           enddo
        end select

From 5d7725b9cac72e1e3da93a2d5f04a8c41874f92d Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Tue, 18 Oct 2022 13:28:31 +0200
Subject: [PATCH 20/60] replace 'kssa' with 'useRoughSA'

---
 adflow/pyADflow.py                | 4 ++--
 src/f2py/adflow.pyf               | 2 +-
 src/modules/inputParam.F90        | 5 +++--
 src/turbulence/turbBCRoutines.F90 | 1 -
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/adflow/pyADflow.py b/adflow/pyADflow.py
index 844654977..c1d622699 100644
--- a/adflow/pyADflow.py
+++ b/adflow/pyADflow.py
@@ -4972,7 +4972,7 @@ def _getDefaultOptions():
             "useQCR": [bool, False],
             "useRotationSA": [bool, False],
             "useft2SA": [bool, True],
-            'ksSA': [float, 0.0],
+            'useRoughSA': [bool, False],
             "eddyVisInfRatio": [float, 0.009],
             "useWallFunctions": [bool, False],
             "useApproxWallDistance": [bool, True],
@@ -5329,7 +5329,7 @@ def _getOptionMap(self):
             "useqcr": ["physics", "useqcr"],
             "userotationsa": ["physics", "userotationsa"],
             "useft2sa": ["physics", "useft2sa"],
-            "kssa":["physics", "kssa"],
+            "useroughsa":["physics", "useroughsa"],
             "eddyvisinfratio": ["physics", "eddyvisinfratio"],
             "usewallfunctions": ["physics", "wallfunctions"],
             "walldistcutoff": ["physics", "walldistcutoff"],
diff --git a/src/f2py/adflow.pyf b/src/f2py/adflow.pyf
index 548e49004..ba75435f3 100644
--- a/src/f2py/adflow.pyf
+++ b/src/f2py/adflow.pyf
@@ -1205,7 +1205,7 @@ python module libadflow
          real(kind=realtype) :: beta
          integer(kind=inttype) :: liftindex
          real(kind=realtype) :: cavitationnumber
-         real(kind=realtype) :: kssa
+         real(kind=realtype) :: useroughsa
        end module inputphysics
 
        module inputadjoint ! in :adflow:../modules/inputParam.f90
diff --git a/src/modules/inputParam.F90 b/src/modules/inputParam.F90
index a6bff999e..86330ce0f 100644
--- a/src/modules/inputParam.F90
+++ b/src/modules/inputParam.F90
@@ -519,7 +519,7 @@ module inputPhysics
   !                      when considering turbulence model effects
   ! useRotationSA:       Determines if we will use rotation correction (SA model only)
   ! useft2SA:            Determines if we will use the ft2 term (SA model only)
-  ! kssa:                Conventional Nikuradse sand roughness scale height
+  ! useRoughSA:          Whether or not to use rough version of SA
   ! wallFunctions:       Whether or not to use wall functions.
   ! wallDistanceNeeded:  Whether or not the wall distance is needed
   !                      for the turbulence model in a RANS problem.
@@ -583,7 +583,8 @@ module inputPhysics
   real(kind=realType), dimension(3,2) :: momentAxis
   real(kind=realType) :: SSuthDim, muSuthDim, TSuthDim
   real(kind=realType) :: cavitationnumber
-  real(kind=realType) :: kssa
+  logical :: useRoughSA
+  real(kind=realType) :: kssa   ! TODO: Remove this!!
 
 #ifndef USE_TAPENADE
   real(kind=realType) :: alphad, betad
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 278fa2074..a0fbb4abb 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -312,7 +312,6 @@ subroutine bcEddyWall(nn)
     !
     use constants
     use blockPointers
-    use inputPhysics, only : kssa
     implicit none
     !
     !      Subroutine arguments.

From 6817ef91eb882fb3069be59996895a5f70a0d420 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Thu, 20 Oct 2022 13:44:16 +0200
Subject: [PATCH 21/60] proper implementation on 1 process + tapenade

---
 adflow/pyADflow.py                            |   2 +-
 src/NKSolver/blockette.F90                    |   2 +-
 src/adjoint/outputForward/bcdata_d.f90        |  78 ++-
 src/adjoint/outputForward/sa_d.f90            |  47 +-
 .../outputForward/turbbcroutines_d.f90        | 445 +++++++++---------
 src/adjoint/outputForward/walldistance_d.f90  |  47 ++
 src/adjoint/outputReverse/bcdata_b.f90        |  61 +++
 src/adjoint/outputReverse/sa_b.f90            |  51 +-
 .../outputReverse/turbbcroutines_b.f90        | 389 +++++++--------
 src/adjoint/outputReverse/walldistance_b.f90  |  47 ++
 src/adjoint/outputReverseFast/sa_fast_b.f90   |  38 +-
 .../turbbcroutines_fast_b.f90                 | 262 ++++++-----
 .../outputReverseFast/walldistance_fast_b.f90 |  47 ++
 src/bcdata/BCData.F90                         |  55 ++-
 src/f2py/adflow.pyf                           |   2 +-
 src/initFlow/initializeFlow.F90               |   7 +
 src/modules/block.F90                         |   6 +
 src/modules/blockPointers.F90                 |   1 +
 src/modules/constants.F90                     |   2 +-
 src/modules/inputParam.F90                    |   5 +-
 src/modules/overset.F90                       |   4 +
 src/modules/paramTurb.F90                     |   1 +
 src/overset/buildClusterWalls.F90             |  56 ++-
 src/turbulence/sa.F90                         |  21 +-
 src/turbulence/turbBCRoutines.F90             |  52 +-
 src/utils/utils.F90                           |   7 +-
 src/wallDistance/wallDistance.F90             |  81 +++-
 27 files changed, 1160 insertions(+), 656 deletions(-)

diff --git a/adflow/pyADflow.py b/adflow/pyADflow.py
index c1d622699..acb6f5591 100644
--- a/adflow/pyADflow.py
+++ b/adflow/pyADflow.py
@@ -4972,7 +4972,7 @@ def _getDefaultOptions():
             "useQCR": [bool, False],
             "useRotationSA": [bool, False],
             "useft2SA": [bool, True],
-            'useRoughSA': [bool, False],
+            "useRoughSA": [bool, False],
             "eddyVisInfRatio": [float, 0.009],
             "useWallFunctions": [bool, False],
             "useApproxWallDistance": [bool, True],
diff --git a/src/NKSolver/blockette.F90 b/src/NKSolver/blockette.F90
index c96a21000..8293db443 100644
--- a/src/NKSolver/blockette.F90
+++ b/src/NKSolver/blockette.F90
@@ -981,7 +981,7 @@ subroutine saSource
     use constants
     use paramTurb
     use blockPointers, only : sectionID
-    use inputPhysics, only :useft2SA, useRotationSA, turbProd, equations, kssa
+    use inputPhysics, only :useft2SA, useRotationSA, turbProd, equations
     use inputDiscretization, only : approxSA
     use section, only : sections
     use sa, only : cv13, kar2Inv, cw36, cb3Inv
diff --git a/src/adjoint/outputForward/bcdata_d.f90 b/src/adjoint/outputForward/bcdata_d.f90
index 1bbb29366..92659d985 100644
--- a/src/adjoint/outputForward/bcdata_d.f90
+++ b/src/adjoint/outputForward/bcdata_d.f90
@@ -13,10 +13,26 @@ module bcdata_d
   subroutine setbcvarnamesisothermalwall()
     use cgnsnames
     use constants
+    use inputphysics, only : useroughsa
     implicit none
     nbcvar = nbcvarisothermalwall
     bcvarnames(1) = cgnstemp
+    if (useroughsa) then
+      nbcvar = nbcvar + 1
+      bcvarnames(2) = cgnssandgrainroughness
+    end if
   end subroutine setbcvarnamesisothermalwall
+  subroutine setbcvarnamesadiabaticwall()
+    use cgnsnames
+    use constants
+    use inputphysics, only : useroughsa
+    implicit none
+    nbcvar = nbcvaradiabaticwall
+    if (useroughsa) then
+      nbcvar = nbcvar + 1
+      bcvarnames(1) = cgnssandgrainroughness
+    end if
+  end subroutine setbcvarnamesadiabaticwall
   subroutine setbcvarnamessubsonicinflow()
     use constants
     use cgnsnames
@@ -321,10 +337,10 @@ subroutine unitvectorscylsystem(boco)
     radvec2(3) = axis(1)*radvec1(2) - axis(2)*radvec1(1)
   end subroutine unitvectorscylsystem
 !  differentiation of bcdataisothermalwall in forward (tangent) mode (with options i4 dr8 r8):
-!   variations   of useful results: *(*bcdata.tns_wall)
+!   variations   of useful results: *(*bcdata.tns_wall) bcvararray
 !   with respect to varying inputs: tref bcvararray
 !   rw status of diff variables: tref:in *(*bcdata.tns_wall):out
-!                bcvararray:in
+!                bcvararray:in-out
 !   plus diff mem management of: bcdata:in *bcdata.tns_wall:in
 ! ---------------------------------------------------------------
 ! routines that set the actual bcdata values from the cgns data set
@@ -342,6 +358,7 @@ subroutine bcdataisothermalwall_d(boco, bcvararray, bcvararrayd, ibeg&
     use blockpointers, only : bcfaceid, bcdata, bcdatad, nbkglobal
     use utils_d, only : terminate, sitemperature
     use flowvarrefstate, only : tref, trefd
+    use inputphysics, only : useroughsa
     use diffsizes
 !  hint: isize1ofdrfbcdata should be the size of dimension 1 of array *bcdata
     implicit none
@@ -383,6 +400,19 @@ subroutine bcdataisothermalwall_d(boco, bcvararray, bcvararrayd, ibeg&
 &         tref
       end do
     end do
+! set a value of 0 if it was not possible to determine the
+! sand grain roughness
+    if (useroughsa) then
+      if (.not.bcvarpresent(2)) then
+        bcvararrayd(:, :, 1) = 0.0_8
+        bcvararray(:, :, 1) = zero
+      end if
+      do j=jbeg,jend
+        do i=ibeg,iend
+          bcdata(boco)%ksns_wall(i, j) = bcvararray(i, j, 2)
+        end do
+      end do
+    end if
  100 format('zone ',a,', boundary subface ',a, &
 &          ': wall temperature not specified for isothermal wall')
   end subroutine bcdataisothermalwall_d
@@ -402,6 +432,7 @@ subroutine bcdataisothermalwall(boco, bcvararray, ibeg, iend, jbeg, &
     use blockpointers, only : bcfaceid, bcdata, nbkglobal
     use utils_d, only : terminate, sitemperature
     use flowvarrefstate, only : tref
+    use inputphysics, only : useroughsa
     implicit none
 !
 !      subroutine arguments.
@@ -433,9 +464,52 @@ subroutine bcdataisothermalwall(boco, bcvararray, ibeg, iend, jbeg, &
 &         tref
       end do
     end do
+! set a value of 0 if it was not possible to determine the
+! sand grain roughness
+    if (useroughsa) then
+      if (.not.bcvarpresent(2)) bcvararray(:, :, 1) = zero
+      do j=jbeg,jend
+        do i=ibeg,iend
+          bcdata(boco)%ksns_wall(i, j) = bcvararray(i, j, 2)
+        end do
+      end do
+    end if
  100 format('zone ',a,', boundary subface ',a, &
 &          ': wall temperature not specified for isothermal wall')
   end subroutine bcdataisothermalwall
+  subroutine bcdataadiabaticwall(boco, bcvararray, ibeg, iend, jbeg, &
+&   jend)
+!
+!       tries to extract the equivalent sand grain roughness. it sets
+!       a default value of 0.0
+!
+    use constants
+    use cgnsnames
+    use inputphysics, only : useroughsa
+    use blockpointers, only : bcfaceid, bcdata, nbkglobal
+    implicit none
+!
+!      subroutine arguments.
+!
+    integer(kind=inttype) :: boco
+    integer(kind=inttype) :: ibeg, iend, jbeg, jend
+    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
+&   bcvararray
+!
+!      local variables.
+!
+    integer(kind=inttype) :: i, j
+! set a value of 0 if it was not possible to determine the
+! sand grain roughness
+    if (useroughsa) then
+      if (.not.bcvarpresent(1)) bcvararray(:, :, 1) = zero
+      do j=jbeg,jend
+        do i=ibeg,iend
+          bcdata(boco)%ksns_wall(i, j) = bcvararray(i, j, 1)
+        end do
+      end do
+    end if
+  end subroutine bcdataadiabaticwall
   subroutine bcdatasubsonicinflow(boco, bcvararray, ibeg, iend, jbeg, &
 &   jend, allturbpresent)
 !
diff --git a/src/adjoint/outputForward/sa_d.f90 b/src/adjoint/outputForward/sa_d.f90
index 7a85a6ea6..066602c52 100644
--- a/src/adjoint/outputForward/sa_d.f90
+++ b/src/adjoint/outputForward/sa_d.f90
@@ -40,8 +40,8 @@ subroutine sasource_d()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dnew, cr1
-    real(kind=realtype) :: dnewd
+    real(kind=realtype) :: dist, kslocal
+    real(kind=realtype) :: distd
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: fv1d, fv2d, ft2d
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
@@ -78,8 +78,6 @@ subroutine sasource_d()
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
-! constants for sa rough
-    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegaxd = sections(sectionid)%rotrate(1)*timerefd
     omegax = timeref*sections(sectionid)%rotrate(1)
@@ -278,17 +276,25 @@ subroutine sasource_d()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough
-            dnewd = d2walld(i, j, k)
-            dnew = d2wall(i, j, k) + 0.03*kssa
+! as the rough version of sa is supported, this looks slightly different
+! than the standard sa implementation
+            if (useroughsa) then
+              kslocal = ks(i, j, k)
+              distd = d2walld(i, j, k)
+              dist = d2wall(i, j, k) + 0.03*kslocal
+            else
+              kslocal = zero
+              distd = d2walld(i, j, k)
+              dist = d2wall(i, j, k)
+            end if
             nud = (rlvd(i, j, k)*w(i, j, k, irho)-rlv(i, j, k)*wd(i, j, &
 &             k, irho))/w(i, j, k, irho)**2
             nu = rlv(i, j, k)/w(i, j, k, irho)
-            dist2invd = -(one*2*dnew*dnewd/(dnew**2)**2)
-            dist2inv = one/dnew**2
+            dist2invd = -(one*2*dist*distd/(dist**2)**2)
+            dist2inv = one/dist**2
             chid = (wd(i, j, k, itu1)*nu-w(i, j, k, itu1)*nud)/nu**2 - &
-&             cr1*kssa*dnewd/dnew**2
-            chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
+&             rsacr1*kslocal*distd/dist**2
+            chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
             chi2d = chid*chi + chi*chid
             chi2 = chi*chi
             chi3d = chid*chi2 + chi*chi2d
@@ -410,7 +416,7 @@ subroutine sasource()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dnew, cr1
+    real(kind=realtype) :: dist, kslocal
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
     real(kind=realtype) :: rr, gg, gg6, termfw, fwsa, term1, term2
@@ -433,8 +439,6 @@ subroutine sasource()
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
-! constants for sa rough
-    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
     omegay = timeref*sections(sectionid)%rotrate(2)
@@ -526,11 +530,18 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough
-            dnew = d2wall(i, j, k) + 0.03*kssa
+! as the rough version of sa is supported, this looks slightly different
+! than the standard sa implementation
+            if (useroughsa) then
+              kslocal = ks(i, j, k)
+              dist = d2wall(i, j, k) + 0.03*kslocal
+            else
+              kslocal = zero
+              dist = d2wall(i, j, k)
+            end if
             nu = rlv(i, j, k)/w(i, j, k, irho)
-            dist2inv = one/dnew**2
-            chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
+            dist2inv = one/dist**2
+            chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
             chi2 = chi*chi
             chi3 = chi*chi2
             fv1 = chi3/(chi3+cv13)
diff --git a/src/adjoint/outputForward/turbbcroutines_d.f90 b/src/adjoint/outputForward/turbbcroutines_d.f90
index ece12d732..d63357892 100644
--- a/src/adjoint/outputForward/turbbcroutines_d.f90
+++ b/src/adjoint/outputForward/turbbcroutines_d.f90
@@ -40,8 +40,8 @@ subroutine applyallturbbcthisblock_d(secondhalo)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
       if (.not.wallfunctions) then
-        select case  (bcfaceid(nn))
-        case (imin)
+        select case  (bcfaceid(nn)) 
+        case (imin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -56,7 +56,7 @@ subroutine applyallturbbcthisblock_d(secondhalo)
               end do
             end do
           end do
-        case (imax)
+        case (imax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -71,7 +71,7 @@ subroutine applyallturbbcthisblock_d(secondhalo)
               end do
             end do
           end do
-        case (jmin)
+        case (jmin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -86,7 +86,7 @@ subroutine applyallturbbcthisblock_d(secondhalo)
               end do
             end do
           end do
-        case (jmax)
+        case (jmax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -101,7 +101,7 @@ subroutine applyallturbbcthisblock_d(secondhalo)
               end do
             end do
           end do
-        case (kmin)
+        case (kmin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -116,7 +116,7 @@ subroutine applyallturbbcthisblock_d(secondhalo)
               end do
             end do
           end do
-        case (kmax)
+        case (kmax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -180,8 +180,8 @@ subroutine applyallturbbcthisblock(secondhalo)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
       if (.not.wallfunctions) then
-        select case  (bcfaceid(nn))
-        case (imin)
+        select case  (bcfaceid(nn)) 
+        case (imin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -193,7 +193,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (imax)
+        case (imax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -205,7 +205,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmin)
+        case (jmin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -217,7 +217,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmax)
+        case (jmax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -229,7 +229,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmin)
+        case (kmin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -241,7 +241,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmax)
+        case (kmax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -299,43 +299,43 @@ subroutine bceddynowall_d(nn)
 !
     integer(kind=inttype) :: i, j
 ! determine the face id on which the subface and copy
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(1, i, j) = revd(2, i, j)
           rev(1, i, j) = rev(2, i, j)
         end do
       end do
-    case (imax)
+    case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(ie, i, j) = revd(il, i, j)
           rev(ie, i, j) = rev(il, i, j)
         end do
       end do
-    case (jmin)
+    case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(i, 1, j) = revd(i, 2, j)
           rev(i, 1, j) = rev(i, 2, j)
         end do
       end do
-    case (jmax)
+    case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(i, je, j) = revd(i, jl, j)
           rev(i, je, j) = rev(i, jl, j)
         end do
       end do
-    case (kmin)
+    case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(i, j, 1) = revd(i, j, 2)
           rev(i, j, 1) = rev(i, j, 2)
         end do
       end do
-    case (kmax)
+    case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           revd(i, j, ke) = revd(i, j, kl)
@@ -364,38 +364,38 @@ subroutine bceddynowall(nn)
 !
     integer(kind=inttype) :: i, j
 ! determine the face id on which the subface and copy
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(1, i, j) = rev(2, i, j)
         end do
       end do
-    case (imax)
+    case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(ie, i, j) = rev(il, i, j)
         end do
       end do
-    case (jmin)
+    case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, 1, j) = rev(i, 2, j)
         end do
       end do
-    case (jmax)
+    case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, je, j) = rev(i, jl, j)
         end do
       end do
-    case (kmin)
+    case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, 1) = rev(i, j, 2)
         end do
       end do
-    case (kmax)
+    case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, ke) = rev(i, j, kl)
@@ -416,7 +416,6 @@ subroutine bceddywall_d(nn)
 !
     use constants
     use blockpointers
-    use inputphysics, only : kssa
     implicit none
 !
 !      subroutine arguments.
@@ -430,51 +429,51 @@ subroutine bceddywall_d(nn)
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
 ! in the halo cells.
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(2, i, j))
+          result1 = saroughfact(2, i, j)
           revd(1, i, j) = result1*revd(2, i, j)
           rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
-    case (imax)
+    case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(il, i, j))
+          result1 = saroughfact(il, i, j)
           revd(ie, i, j) = result1*revd(il, i, j)
           rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
-    case (jmin)
+    case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, 2, j))
+          result1 = saroughfact(i, 2, j)
           revd(i, 1, j) = result1*revd(i, 2, j)
           rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
-    case (jmax)
+    case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, jl, j))
+          result1 = saroughfact(i, jl, j)
           revd(i, je, j) = result1*revd(i, jl, j)
           rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
-    case (kmin)
+    case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, j, 2))
+          result1 = saroughfact(i, j, 2)
           revd(i, j, 1) = result1*revd(i, j, 2)
           rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
-    case (kmax)
+    case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, j, kl))
+          result1 = saroughfact(i, j, kl)
           revd(i, j, ke) = result1*revd(i, j, kl)
           rev(i, j, ke) = result1*rev(i, j, kl)
         end do
@@ -490,7 +489,6 @@ subroutine bceddywall(nn)
 !
     use constants
     use blockpointers
-    use inputphysics, only : kssa
     implicit none
 !
 !      subroutine arguments.
@@ -504,46 +502,46 @@ subroutine bceddywall(nn)
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
 ! in the halo cells.
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(2, i, j))
+          result1 = saroughfact(2, i, j)
           rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
-    case (imax)
+    case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(il, i, j))
+          result1 = saroughfact(il, i, j)
           rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
-    case (jmin)
+    case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, 2, j))
+          result1 = saroughfact(i, 2, j)
           rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
-    case (jmax)
+    case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, jl, j))
+          result1 = saroughfact(i, jl, j)
           rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
-    case (kmin)
+    case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, j, 2))
+          result1 = saroughfact(i, j, 2)
           rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
-    case (kmax)
+    case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, j, kl))
+          result1 = saroughfact(i, j, kl)
           rev(i, j, ke) = result1*rev(i, j, kl)
         end do
       end do
@@ -577,23 +575,23 @@ subroutine bcturbinflow(nn)
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
 ! loop over the number of turbulent variables.
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bvti1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti1(i, j, l, l) = one
-          case (imax)
+          case (imax) 
             bvti2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti2(i, j, l, l) = one
-          case (jmin)
+          case (jmin) 
             bvtj1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj1(i, j, l, l) = one
-          case (jmax)
+          case (jmax) 
             bvtj2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj2(i, j, l, l) = one
-          case (kmin)
+          case (kmin) 
             bvtk1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk1(i, j, l, l) = one
-          case (kmax)
+          case (kmax) 
             bvtk2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk2(i, j, l, l) = one
           end select
@@ -630,18 +628,18 @@ subroutine bcturboutflow(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bmti1(i, j, l, l) = -one
-          case (imax)
+          case (imax) 
             bmti2(i, j, l, l) = -one
-          case (jmin)
+          case (jmin) 
             bmtj1(i, j, l, l) = -one
-          case (jmax)
+          case (jmax) 
             bmtj2(i, j, l, l) = -one
-          case (kmin)
+          case (kmin) 
             bmtk1(i, j, l, l) = -one
-          case (kmax)
+          case (kmax) 
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -732,26 +730,26 @@ subroutine bcturbtreatment_d()
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
-      select case  (bctype(nn))
-      case (nswalladiabatic, nswallisothermal)
+      select case  (bctype(nn)) 
+      case (nswalladiabatic, nswallisothermal) 
 ! viscous wall. there is no difference between an adiabatic
 ! and an isothermal wall for the turbulent equations.
 ! set the implicit treatment of the wall boundary conditions.
         call bcturbwall_d(nn)
-      case (symm, symmpolar, eulerwall)
+      case (symm, symmpolar, eulerwall) 
 !=============================================================
 !=============================================================
 ! symmetry, polar symmetry or inviscid wall. treatment of
 ! the turbulent equations is identical.
         call bcturbsymm(nn)
-      case (farfield)
+      case (farfield) 
 !=============================================================
 ! farfield. the kind of boundary condition to be applied,
 ! inflow or outflow, depends on the local conditions.
         call bcturbfarfield_d(nn)
       case (slidinginterface, oversetouterbound, domaininterfaceall, &
 &     domaininterfacerhouvw, domaininterfacep, domaininterfacerho, &
-&     domaininterfacetotal)
+&     domaininterfacetotal) 
 !=============================================================
 ! sliding mesh interface, overset outer boudaries, and
 ! domain interface with another code are not really boundary
@@ -805,41 +803,41 @@ subroutine bcturbfarfield_d(nn)
 ! outflow. simply extrapolation or zero neumann bc
 ! of the turbulent variables.
           do l=nt1,nt2
-            select case  (bcfaceid(nn))
-            case (imin)
+            select case  (bcfaceid(nn)) 
+            case (imin) 
               bmti1(i, j, l, l) = -one
-            case (imax)
+            case (imax) 
               bmti2(i, j, l, l) = -one
-            case (jmin)
+            case (jmin) 
               bmtj1(i, j, l, l) = -one
-            case (jmax)
+            case (jmax) 
               bmtj2(i, j, l, l) = -one
-            case (kmin)
+            case (kmin) 
               bmtk1(i, j, l, l) = -one
-            case (kmax)
+            case (kmax) 
               bmtk2(i, j, l, l) = -one
             end select
           end do
         else
 ! inflow. turbulent variables are prescribed.
           do l=nt1,nt2
-            select case  (bcfaceid(nn))
-            case (imin)
+            select case  (bcfaceid(nn)) 
+            case (imin) 
               bvti1d(i, j, l) = winfd(l)
               bvti1(i, j, l) = winf(l)
-            case (imax)
+            case (imax) 
               bvti2d(i, j, l) = winfd(l)
               bvti2(i, j, l) = winf(l)
-            case (jmin)
+            case (jmin) 
               bvtj1d(i, j, l) = winfd(l)
               bvtj1(i, j, l) = winf(l)
-            case (jmax)
+            case (jmax) 
               bvtj2d(i, j, l) = winfd(l)
               bvtj2(i, j, l) = winf(l)
-            case (kmin)
+            case (kmin) 
               bvtk1d(i, j, l) = winfd(l)
               bvtk1(i, j, l) = winf(l)
-            case (kmax)
+            case (kmax) 
               bvtk2d(i, j, l) = winfd(l)
               bvtk2(i, j, l) = winf(l)
             end select
@@ -881,23 +879,23 @@ subroutine bcturbinterface_d(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bvti1d(i, j, l) = wd(1, i, j, l)
             bvti1(i, j, l) = w(1, i, j, l)
-          case (imax)
+          case (imax) 
             bvti2d(i, j, l) = wd(ie, i, j, l)
             bvti2(i, j, l) = w(ie, i, j, l)
-          case (jmin)
+          case (jmin) 
             bvtj1d(i, j, l) = wd(i, 1, j, l)
             bvtj1(i, j, l) = w(i, 1, j, l)
-          case (jmax)
+          case (jmax) 
             bvtj2d(i, j, l) = wd(i, je, j, l)
             bvtj2(i, j, l) = w(i, je, j, l)
-          case (kmin)
+          case (kmin) 
             bvtk1d(i, j, l) = wd(i, j, 1, l)
             bvtk1(i, j, l) = w(i, j, 1, l)
-          case (kmax)
+          case (kmax) 
             bvtk2d(i, j, l) = wd(i, j, ke, l)
             bvtk2(i, j, l) = w(i, j, ke, l)
           end select
@@ -968,26 +966,26 @@ subroutine bcturbtreatment()
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
-      select case  (bctype(nn))
-      case (nswalladiabatic, nswallisothermal)
+      select case  (bctype(nn)) 
+      case (nswalladiabatic, nswallisothermal) 
 ! viscous wall. there is no difference between an adiabatic
 ! and an isothermal wall for the turbulent equations.
 ! set the implicit treatment of the wall boundary conditions.
         call bcturbwall(nn)
-      case (symm, symmpolar, eulerwall)
+      case (symm, symmpolar, eulerwall) 
 !=============================================================
 !=============================================================
 ! symmetry, polar symmetry or inviscid wall. treatment of
 ! the turbulent equations is identical.
         call bcturbsymm(nn)
-      case (farfield)
+      case (farfield) 
 !=============================================================
 ! farfield. the kind of boundary condition to be applied,
 ! inflow or outflow, depends on the local conditions.
         call bcturbfarfield(nn)
       case (slidinginterface, oversetouterbound, domaininterfaceall, &
 &     domaininterfacerhouvw, domaininterfacep, domaininterfacerho, &
-&     domaininterfacetotal)
+&     domaininterfacetotal) 
 !=============================================================
 ! sliding mesh interface, overset outer boudaries, and
 ! domain interface with another code are not really boundary
@@ -1034,36 +1032,36 @@ subroutine bcturbfarfield(nn)
 ! outflow. simply extrapolation or zero neumann bc
 ! of the turbulent variables.
           do l=nt1,nt2
-            select case  (bcfaceid(nn))
-            case (imin)
+            select case  (bcfaceid(nn)) 
+            case (imin) 
               bmti1(i, j, l, l) = -one
-            case (imax)
+            case (imax) 
               bmti2(i, j, l, l) = -one
-            case (jmin)
+            case (jmin) 
               bmtj1(i, j, l, l) = -one
-            case (jmax)
+            case (jmax) 
               bmtj2(i, j, l, l) = -one
-            case (kmin)
+            case (kmin) 
               bmtk1(i, j, l, l) = -one
-            case (kmax)
+            case (kmax) 
               bmtk2(i, j, l, l) = -one
             end select
           end do
         else
 ! inflow. turbulent variables are prescribed.
           do l=nt1,nt2
-            select case  (bcfaceid(nn))
-            case (imin)
+            select case  (bcfaceid(nn)) 
+            case (imin) 
               bvti1(i, j, l) = winf(l)
-            case (imax)
+            case (imax) 
               bvti2(i, j, l) = winf(l)
-            case (jmin)
+            case (jmin) 
               bvtj1(i, j, l) = winf(l)
-            case (jmax)
+            case (jmax) 
               bvtj2(i, j, l) = winf(l)
-            case (kmin)
+            case (kmin) 
               bvtk1(i, j, l) = winf(l)
-            case (kmax)
+            case (kmax) 
               bvtk2(i, j, l) = winf(l)
             end select
           end do
@@ -1097,18 +1095,18 @@ subroutine bcturbinterface(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bvti1(i, j, l) = w(1, i, j, l)
-          case (imax)
+          case (imax) 
             bvti2(i, j, l) = w(ie, i, j, l)
-          case (jmin)
+          case (jmin) 
             bvtj1(i, j, l) = w(i, 1, j, l)
-          case (jmax)
+          case (jmax) 
             bvtj2(i, j, l) = w(i, je, j, l)
-          case (kmin)
+          case (kmin) 
             bvtk1(i, j, l) = w(i, j, 1, l)
-          case (kmax)
+          case (kmax) 
             bvtk2(i, j, l) = w(i, j, ke, l)
           end select
         end do
@@ -1142,18 +1140,18 @@ subroutine bcturbsymm(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bmti1(i, j, l, l) = -one
-          case (imax)
+          case (imax) 
             bmti2(i, j, l, l) = -one
-          case (jmin)
+          case (jmin) 
             bmtj1(i, j, l, l) = -one
-          case (jmax)
+          case (jmax) 
             bmtj2(i, j, l, l) = -one
-          case (kmin)
+          case (kmin) 
             bmtk1(i, j, l, l) = -one
-          case (kmax)
+          case (kmax) 
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -1213,55 +1211,55 @@ subroutine bcturbwall_d(nn)
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
-    select case  (turbmodel)
-    case (spalartallmaras, spalartallmarasedwards)
+    select case  (turbmodel) 
+    case (spalartallmaras, spalartallmarasedwards) 
 ! spalart-allmaras type of model. value at the wall is zero,
 ! so simply negate the internal value.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(2, i, j))
+            result1 = saroughfact(2, i, j)
             bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (imax)
+      case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(il, i, j))
+            result1 = saroughfact(il, i, j)
             bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmin)
+      case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, 2, j))
+            result1 = saroughfact(i, 2, j)
             bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmax)
+      case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, jl, j))
+            result1 = saroughfact(i, jl, j)
             bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmin)
+      case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, j, 2))
+            result1 = saroughfact(i, j, 2)
             bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmax)
+      case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, j, kl))
+            result1 = saroughfact(i, j, kl)
             bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
-    case (komegawilcox, komegamodified, mentersst)
+    case (komegawilcox, komegamodified, mentersst) 
 !        ================================================================
 ! k-omega type of models. k is zero on the wall and thus the
 ! halo value is the negative of the first internal cell.
@@ -1273,8 +1271,8 @@ subroutine bcturbwall_d(nn)
 ! distance. due to the usage of the dd2wall pointer and the
 ! fact that the original d2wall array starts at 2, there is
 ! an offset of -1 present in dd2wall.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1311,7 +1309,7 @@ subroutine bcturbwall_d(nn)
             bvti1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (imax)
+      case (imax) 
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1348,7 +1346,7 @@ subroutine bcturbwall_d(nn)
             bvti2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmin)
+      case (jmin) 
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1385,7 +1383,7 @@ subroutine bcturbwall_d(nn)
             bvtj1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmax)
+      case (jmax) 
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1422,7 +1420,7 @@ subroutine bcturbwall_d(nn)
             bvtj2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmin)
+      case (kmin) 
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1459,7 +1457,7 @@ subroutine bcturbwall_d(nn)
             bvtk1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmax)
+      case (kmax) 
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1497,47 +1495,47 @@ subroutine bcturbwall_d(nn)
           end do
         end do
       end select
-    case (ktau)
+    case (ktau) 
 !        ================================================================
 ! k-tau model. both k and tau are zero at the wall, so the
 ! negative value of the internal cell is taken for the halo.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti1(i, j, itu1, itu1) = one
             bmti1(i, j, itu2, itu2) = one
           end do
         end do
-      case (imax)
+      case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti2(i, j, itu1, itu1) = one
             bmti2(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmin)
+      case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj1(i, j, itu1, itu1) = one
             bmtj1(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmax)
+      case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj2(i, j, itu1, itu1) = one
             bmtj2(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmin)
+      case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk1(i, j, itu1, itu1) = one
             bmtk1(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmax)
+      case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk2(i, j, itu1, itu1) = one
@@ -1573,8 +1571,8 @@ subroutine turb2ndhalo_d(nn)
 ! some pointers accordingly.
 ! loop over the turbulent variables and set the second halo
 ! value. if this is an eddy model, also set the eddy viscosity.
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           do l=nt1,nt2
@@ -1587,7 +1585,7 @@ subroutine turb2ndhalo_d(nn)
           end if
         end do
       end do
-    case (imax)
+    case (imax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1601,7 +1599,7 @@ subroutine turb2ndhalo_d(nn)
           end if
         end do
       end do
-    case (jmin)
+    case (jmin) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1615,7 +1613,7 @@ subroutine turb2ndhalo_d(nn)
           end if
         end do
       end do
-    case (jmax)
+    case (jmax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1629,7 +1627,7 @@ subroutine turb2ndhalo_d(nn)
           end if
         end do
       end do
-    case (kmin)
+    case (kmin) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1643,7 +1641,7 @@ subroutine turb2ndhalo_d(nn)
           end if
         end do
       end do
-    case (kmax)
+    case (kmax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1681,8 +1679,8 @@ subroutine turb2ndhalo(nn)
 ! some pointers accordingly.
 ! loop over the turbulent variables and set the second halo
 ! value. if this is an eddy model, also set the eddy viscosity.
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           do l=nt1,nt2
@@ -1691,7 +1689,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(0, i, j) = rev(1, i, j)
         end do
       end do
-    case (imax)
+    case (imax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1701,7 +1699,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(ib, i, j) = rev(ie, i, j)
         end do
       end do
-    case (jmin)
+    case (jmin) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1711,7 +1709,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, 0, j) = rev(i, 1, j)
         end do
       end do
-    case (jmax)
+    case (jmax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1721,7 +1719,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, jb, j) = rev(i, je, j)
         end do
       end do
-    case (kmin)
+    case (kmin) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1731,7 +1729,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, j, 0) = rev(i, j, 1)
         end do
       end do
-    case (kmax)
+    case (kmax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1768,8 +1766,8 @@ subroutine turbbcnswall(secondhalo)
       call bcturbwall(nn)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1786,7 +1784,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (imax)
+      case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1803,7 +1801,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmin)
+      case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1820,7 +1818,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmax)
+      case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1837,7 +1835,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmin)
+      case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1854,7 +1852,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmax)
+      case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -1919,55 +1917,55 @@ subroutine bcturbwall(nn)
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
-    select case  (turbmodel)
-    case (spalartallmaras, spalartallmarasedwards)
+    select case  (turbmodel) 
+    case (spalartallmaras, spalartallmarasedwards) 
 ! spalart-allmaras type of model. value at the wall is zero,
 ! so simply negate the internal value.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(2, i, j))
+            result1 = saroughfact(2, i, j)
             bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (imax)
+      case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(il, i, j))
+            result1 = saroughfact(il, i, j)
             bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmin)
+      case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, 2, j))
+            result1 = saroughfact(i, 2, j)
             bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmax)
+      case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, jl, j))
+            result1 = saroughfact(i, jl, j)
             bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmin)
+      case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, j, 2))
+            result1 = saroughfact(i, j, 2)
             bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmax)
+      case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, j, kl))
+            result1 = saroughfact(i, j, kl)
             bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
-    case (komegawilcox, komegamodified, mentersst)
+    case (komegawilcox, komegamodified, mentersst) 
 !        ================================================================
 ! k-omega type of models. k is zero on the wall and thus the
 ! halo value is the negative of the first internal cell.
@@ -1979,8 +1977,8 @@ subroutine bcturbwall(nn)
 ! distance. due to the usage of the dd2wall pointer and the
 ! fact that the original d2wall array starts at 2, there is
 ! an offset of -1 present in dd2wall.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2012,7 +2010,7 @@ subroutine bcturbwall(nn)
             bvti1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (imax)
+      case (imax) 
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2044,7 +2042,7 @@ subroutine bcturbwall(nn)
             bvti2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmin)
+      case (jmin) 
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2076,7 +2074,7 @@ subroutine bcturbwall(nn)
             bvtj1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmax)
+      case (jmax) 
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2108,7 +2106,7 @@ subroutine bcturbwall(nn)
             bvtj2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmin)
+      case (kmin) 
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2140,7 +2138,7 @@ subroutine bcturbwall(nn)
             bvtk1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmax)
+      case (kmax) 
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2173,47 +2171,47 @@ subroutine bcturbwall(nn)
           end do
         end do
       end select
-    case (ktau)
+    case (ktau) 
 !        ================================================================
 ! k-tau model. both k and tau are zero at the wall, so the
 ! negative value of the internal cell is taken for the halo.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti1(i, j, itu1, itu1) = one
             bmti1(i, j, itu2, itu2) = one
           end do
         end do
-      case (imax)
+      case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti2(i, j, itu1, itu1) = one
             bmti2(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmin)
+      case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj1(i, j, itu1, itu1) = one
             bmtj1(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmax)
+      case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj2(i, j, itu1, itu1) = one
             bmtj2(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmin)
+      case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk1(i, j, itu1, itu1) = one
             bmtk1(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmax)
+      case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk2(i, j, itu1, itu1) = one
@@ -2223,20 +2221,23 @@ subroutine bcturbwall(nn)
       end select
     end select
   end subroutine bcturbwall
-  function safact(ks, d)
+  function saroughfact(i, j, k)
+! returns either the regular sa-boundary condition
+! or the modified roughness-boundary condition
     use constants
+    use inputphysics, only : useroughsa
+    use blockpointers, only : ks, d2wall
     implicit none
 ! dummy arguments
-    real(kind=realtype) :: safact
+    real(kind=realtype) :: saroughfact
 ! local variablse
-    real(kind=realtype) :: ks
-    real(kind=realtype) :: d
-    if (ks .eq. zero) then
-      safact = -one
-    else if (d .eq. zero) then
-      safact = one
+    integer(kind=inttype) :: i, j, k
+    if (useroughsa) then
+      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
+&       d2wall(i, j, k)/0.03)
+      return
     else
-      safact = (ks-d/0.03)/(ks+d/0.03)
+      saroughfact = one
     end if
-  end function safact
+  end function saroughfact
 end module turbbcroutines_d
diff --git a/src/adjoint/outputForward/walldistance_d.f90 b/src/adjoint/outputForward/walldistance_d.f90
index f4c433ed2..9ba2bdce0 100644
--- a/src/adjoint/outputForward/walldistance_d.f90
+++ b/src/adjoint/outputForward/walldistance_d.f90
@@ -158,4 +158,51 @@ subroutine updatewalldistancesquickly(nn, level, sps)
       end do
     end do
   end subroutine updatewalldistancesquickly
+  subroutine updatewallroughness()
+! sets the roughness-value (ks) of the nearest wall-cell.
+    use constants
+    use blockpointers, only : il, jl, kl, flowdoms, ks, bcdata, ndom, &
+&   nbocos
+    use inputtimespectral, only : ntimeintervalsspectral
+    ! use utils_d, only : setpointers TODO: How is this supposed to be handled?
+    use iteration, only : groundlevel
+    implicit none
+! local variables
+    integer(kind=inttype) :: i, j, k, iii, jjj, boco, dom
+    integer(kind=inttype) :: nn, sps, level, nlevels
+    intrinsic ubound
+    external setpointers
+    nlevels = ubound(flowdoms, 2)
+    do level=1,nlevels
+      do sps=1,ntimeintervalsspectral
+        do nn=1,ndom
+          ! call setpointers(nn, level, sps)
+          do k=2,kl
+            do j=2,jl
+              do i=2,il
+                if (flowdoms(nn, level, sps)%nearestbccell(1, i, j, k) &
+&                   .eq. 0) then
+! this cell is too far away and has no
+! association. set the roughness to zero.
+                  print*, 'ks cutoff'
+                  ks(i, j, k) = zero
+                else
+                  dom = flowdoms(nn, level, sps)%nearestbccell(1, i, j, &
+&                   k)
+                  boco = flowdoms(nn, level, sps)%nearestbccell(2, i, j&
+&                   , k)
+                  iii = flowdoms(nn, level, sps)%nearestbccell(3, i, j, &
+&                   k)
+                  jjj = flowdoms(nn, level, sps)%nearestbccell(4, i, j, &
+&                   k)
+                  ks(i, j, k) = flowdoms(dom, level, sps)%bcdata(boco)%&
+&                   ksns_wall(iii, jjj)
+                end if
+              end do
+            end do
+          end do
+        end do
+      end do
+    end do
+  end subroutine updatewallroughness
 end module walldistance_d
diff --git a/src/adjoint/outputReverse/bcdata_b.f90 b/src/adjoint/outputReverse/bcdata_b.f90
index 83022699e..00d36e9ca 100644
--- a/src/adjoint/outputReverse/bcdata_b.f90
+++ b/src/adjoint/outputReverse/bcdata_b.f90
@@ -13,10 +13,26 @@ module bcdata_b
   subroutine setbcvarnamesisothermalwall()
     use cgnsnames
     use constants
+    use inputphysics, only : useroughsa
     implicit none
     nbcvar = nbcvarisothermalwall
     bcvarnames(1) = cgnstemp
+    if (useroughsa) then
+      nbcvar = nbcvar + 1
+      bcvarnames(2) = cgnssandgrainroughness
+    end if
   end subroutine setbcvarnamesisothermalwall
+  subroutine setbcvarnamesadiabaticwall()
+    use cgnsnames
+    use constants
+    use inputphysics, only : useroughsa
+    implicit none
+    nbcvar = nbcvaradiabaticwall
+    if (useroughsa) then
+      nbcvar = nbcvar + 1
+      bcvarnames(1) = cgnssandgrainroughness
+    end if
+  end subroutine setbcvarnamesadiabaticwall
   subroutine setbcvarnamessubsonicinflow()
     use constants
     use cgnsnames
@@ -339,6 +355,7 @@ subroutine bcdataisothermalwall_b(boco, bcvararray, bcvararrayd, ibeg&
     use blockpointers, only : bcfaceid, bcdata, bcdatad, nbkglobal
     use utils_b, only : terminate, sitemperature
     use flowvarrefstate, only : tref, trefd
+    use inputphysics, only : useroughsa
     implicit none
 !
 !      subroutine arguments.
@@ -390,6 +407,7 @@ subroutine bcdataisothermalwall(boco, bcvararray, ibeg, iend, jbeg, &
     use blockpointers, only : bcfaceid, bcdata, nbkglobal
     use utils_b, only : terminate, sitemperature
     use flowvarrefstate, only : tref
+    use inputphysics, only : useroughsa
     implicit none
 !
 !      subroutine arguments.
@@ -421,9 +439,52 @@ subroutine bcdataisothermalwall(boco, bcvararray, ibeg, iend, jbeg, &
 &         tref
       end do
     end do
+! set a value of 0 if it was not possible to determine the
+! sand grain roughness
+    if (useroughsa) then
+      if (.not.bcvarpresent(2)) bcvararray(:, :, 1) = zero
+      do j=jbeg,jend
+        do i=ibeg,iend
+          bcdata(boco)%ksns_wall(i, j) = bcvararray(i, j, 2)
+        end do
+      end do
+    end if
  100 format('zone ',a,', boundary subface ',a, &
 &          ': wall temperature not specified for isothermal wall')
   end subroutine bcdataisothermalwall
+  subroutine bcdataadiabaticwall(boco, bcvararray, ibeg, iend, jbeg, &
+&   jend)
+!
+!       tries to extract the equivalent sand grain roughness. it sets
+!       a default value of 0.0
+!
+    use constants
+    use cgnsnames
+    use inputphysics, only : useroughsa
+    use blockpointers, only : bcfaceid, bcdata, nbkglobal
+    implicit none
+!
+!      subroutine arguments.
+!
+    integer(kind=inttype) :: boco
+    integer(kind=inttype) :: ibeg, iend, jbeg, jend
+    real(kind=realtype), dimension(ibeg:iend, jbeg:jend, nbcvarmax) :: &
+&   bcvararray
+!
+!      local variables.
+!
+    integer(kind=inttype) :: i, j
+! set a value of 0 if it was not possible to determine the
+! sand grain roughness
+    if (useroughsa) then
+      if (.not.bcvarpresent(1)) bcvararray(:, :, 1) = zero
+      do j=jbeg,jend
+        do i=ibeg,iend
+          bcdata(boco)%ksns_wall(i, j) = bcvararray(i, j, 1)
+        end do
+      end do
+    end if
+  end subroutine bcdataadiabaticwall
   subroutine bcdatasubsonicinflow(boco, bcvararray, ibeg, iend, jbeg, &
 &   jend, allturbpresent)
 !
diff --git a/src/adjoint/outputReverse/sa_b.f90 b/src/adjoint/outputReverse/sa_b.f90
index 07348678c..b1e2bc429 100644
--- a/src/adjoint/outputReverse/sa_b.f90
+++ b/src/adjoint/outputReverse/sa_b.f90
@@ -42,8 +42,8 @@ subroutine sasource_b()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dnew, cr1
-    real(kind=realtype) :: dnewd
+    real(kind=realtype) :: dist, kslocal
+    real(kind=realtype) :: distd
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: fv1d, fv2d, ft2d
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
@@ -95,8 +95,6 @@ subroutine sasource_b()
     cv13 = rsacv1**3
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
-! constants for sa rough
-    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
     omegay = timeref*sections(sectionid)%rotrate(2)
@@ -198,11 +196,20 @@ subroutine sasource_b()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough
-        dnew = d2wall(i, j, k) + 0.03*kssa
+! as the rough version of sa is supported, this looks slightly different
+! than the standard sa implementation
+        if (useroughsa) then
+          kslocal = ks(i, j, k)
+          dist = d2wall(i, j, k) + 0.03*kslocal
+          call pushcontrol1b(0)
+        else
+          kslocal = zero
+          dist = d2wall(i, j, k)
+          call pushcontrol1b(1)
+        end if
         nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/dnew**2
-        chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
+        dist2inv = one/dist**2
+        chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
@@ -339,11 +346,16 @@ subroutine sasource_b()
         chid = 2*chi*chi2d + chi2*chi3d
         nud = tempd4 - w(i, j, k, itu1)*chid/nu**2
         wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
-        dnewd = -(one*2*dist2invd/dnew**3) - cr1*kssa*chid/dnew**2
+        distd = -(one*2*dist2invd/dist**3) - rsacr1*kslocal*chid/dist**2
         temp = w(i, j, k, irho)
         rlvd(i, j, k) = rlvd(i, j, k) + nud/temp
         wd(i, j, k, irho) = wd(i, j, k, irho) - rlv(i, j, k)*nud/temp**2
-        d2walld(i, j, k) = d2walld(i, j, k) + dnewd
+        call popcontrol1b(branch)
+        if (branch .eq. 0) then
+          d2walld(i, j, k) = d2walld(i, j, k) + distd
+        else
+          d2walld(i, j, k) = d2walld(i, j, k) + distd
+        end if
         call popcontrol2b(branch)
         if (branch .eq. 0) then
           if (strainprod .eq. 0.0_8) then
@@ -545,7 +557,7 @@ subroutine sasource()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dnew, cr1
+    real(kind=realtype) :: dist, kslocal
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
     real(kind=realtype) :: rr, gg, gg6, termfw, fwsa, term1, term2
@@ -568,8 +580,6 @@ subroutine sasource()
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
-! constants for sa rough
-    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
     omegay = timeref*sections(sectionid)%rotrate(2)
@@ -662,11 +672,18 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough
-        dnew = d2wall(i, j, k) + 0.03*kssa
+! as the rough version of sa is supported, this looks slightly different
+! than the standard sa implementation
+        if (useroughsa) then
+          kslocal = ks(i, j, k)
+          dist = d2wall(i, j, k) + 0.03*kslocal
+        else
+          kslocal = zero
+          dist = d2wall(i, j, k)
+        end if
         nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/dnew**2
-        chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
+        dist2inv = one/dist**2
+        chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
diff --git a/src/adjoint/outputReverse/turbbcroutines_b.f90 b/src/adjoint/outputReverse/turbbcroutines_b.f90
index 50520f791..6a73ea41e 100644
--- a/src/adjoint/outputReverse/turbbcroutines_b.f90
+++ b/src/adjoint/outputReverse/turbbcroutines_b.f90
@@ -73,8 +73,8 @@ subroutine applyallturbbcthisblock_b(secondhalo)
       if (wallfunctions) then
         call pushcontrol3b(0)
       else
-        select case  (bcfaceid(nn))
-        case (imin)
+        select case  (bcfaceid(nn)) 
+        case (imin) 
           ad_from0 = bcdata(nn)%jcbeg
           do j=ad_from0,bcdata(nn)%jcend
             ad_from = bcdata(nn)%icbeg
@@ -85,7 +85,7 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           call pushinteger4(j - 1)
           call pushinteger4(ad_from0)
           call pushcontrol3b(2)
-        case (imax)
+        case (imax) 
           ad_from2 = bcdata(nn)%jcbeg
           do j=ad_from2,bcdata(nn)%jcend
             ad_from1 = bcdata(nn)%icbeg
@@ -96,7 +96,7 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           call pushinteger4(j - 1)
           call pushinteger4(ad_from2)
           call pushcontrol3b(3)
-        case (jmin)
+        case (jmin) 
           ad_from4 = bcdata(nn)%jcbeg
           do j=ad_from4,bcdata(nn)%jcend
             ad_from3 = bcdata(nn)%icbeg
@@ -107,7 +107,7 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           call pushinteger4(j - 1)
           call pushinteger4(ad_from4)
           call pushcontrol3b(4)
-        case (jmax)
+        case (jmax) 
           ad_from6 = bcdata(nn)%jcbeg
           do j=ad_from6,bcdata(nn)%jcend
             ad_from5 = bcdata(nn)%icbeg
@@ -118,7 +118,7 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           call pushinteger4(j - 1)
           call pushinteger4(ad_from6)
           call pushcontrol3b(5)
-        case (kmin)
+        case (kmin) 
           ad_from8 = bcdata(nn)%jcbeg
           do j=ad_from8,bcdata(nn)%jcend
             ad_from7 = bcdata(nn)%icbeg
@@ -129,7 +129,7 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           call pushinteger4(j - 1)
           call pushinteger4(ad_from8)
           call pushcontrol3b(6)
-        case (kmax)
+        case (kmax) 
           ad_from10 = bcdata(nn)%jcbeg
           do j=ad_from10,bcdata(nn)%jcend
             ad_from9 = bcdata(nn)%icbeg
@@ -325,8 +325,8 @@ subroutine applyallturbbcthisblock(secondhalo)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
       if (.not.wallfunctions) then
-        select case  (bcfaceid(nn))
-        case (imin)
+        select case  (bcfaceid(nn)) 
+        case (imin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -338,7 +338,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (imax)
+        case (imax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -350,7 +350,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmin)
+        case (jmin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -362,7 +362,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmax)
+        case (jmax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -374,7 +374,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmin)
+        case (kmin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -386,7 +386,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmax)
+        case (kmax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -450,15 +450,15 @@ subroutine bceddynowall_b(nn)
     real(kind=realtype) :: tmpd1
     real(kind=realtype) :: tmpd0
 ! determine the face id on which the subface and copy
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           revd(2, i, j) = revd(2, i, j) + revd(1, i, j)
           revd(1, i, j) = 0.0_8
         end do
       end do
-    case (imax)
+    case (imax) 
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd = revd(ie, i, j)
@@ -466,14 +466,14 @@ subroutine bceddynowall_b(nn)
           revd(il, i, j) = revd(il, i, j) + tmpd
         end do
       end do
-    case (jmin)
+    case (jmin) 
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           revd(i, 2, j) = revd(i, 2, j) + revd(i, 1, j)
           revd(i, 1, j) = 0.0_8
         end do
       end do
-    case (jmax)
+    case (jmax) 
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd0 = revd(i, je, j)
@@ -481,14 +481,14 @@ subroutine bceddynowall_b(nn)
           revd(i, jl, j) = revd(i, jl, j) + tmpd0
         end do
       end do
-    case (kmin)
+    case (kmin) 
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           revd(i, j, 2) = revd(i, j, 2) + revd(i, j, 1)
           revd(i, j, 1) = 0.0_8
         end do
       end do
-    case (kmax)
+    case (kmax) 
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd1 = revd(i, j, ke)
@@ -518,38 +518,38 @@ subroutine bceddynowall(nn)
 !
     integer(kind=inttype) :: i, j
 ! determine the face id on which the subface and copy
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(1, i, j) = rev(2, i, j)
         end do
       end do
-    case (imax)
+    case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(ie, i, j) = rev(il, i, j)
         end do
       end do
-    case (jmin)
+    case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, 1, j) = rev(i, 2, j)
         end do
       end do
-    case (jmax)
+    case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, je, j) = rev(i, jl, j)
         end do
       end do
-    case (kmin)
+    case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, 1) = rev(i, j, 2)
         end do
       end do
-    case (kmax)
+    case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, ke) = rev(i, j, kl)
@@ -570,7 +570,6 @@ subroutine bceddywall_b(nn)
 !
     use constants
     use blockpointers
-    use inputphysics, only : kssa
     implicit none
 !
 !      subroutine arguments.
@@ -590,12 +589,12 @@ subroutine bceddywall_b(nn)
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
 ! in the halo cells.
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(result1)
-          result1 = safact(kssa, d2wall(2, i, j))
+          result1 = saroughfact(2, i, j)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
@@ -605,11 +604,11 @@ subroutine bceddywall_b(nn)
           call popreal8(result1)
         end do
       end do
-    case (imax)
+    case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(result1)
-          result1 = safact(kssa, d2wall(il, i, j))
+          result1 = saroughfact(il, i, j)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
@@ -620,11 +619,11 @@ subroutine bceddywall_b(nn)
           call popreal8(result1)
         end do
       end do
-    case (jmin)
+    case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(result1)
-          result1 = safact(kssa, d2wall(i, 2, j))
+          result1 = saroughfact(i, 2, j)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
@@ -634,11 +633,11 @@ subroutine bceddywall_b(nn)
           call popreal8(result1)
         end do
       end do
-    case (jmax)
+    case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(result1)
-          result1 = safact(kssa, d2wall(i, jl, j))
+          result1 = saroughfact(i, jl, j)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
@@ -649,11 +648,11 @@ subroutine bceddywall_b(nn)
           call popreal8(result1)
         end do
       end do
-    case (kmin)
+    case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(result1)
-          result1 = safact(kssa, d2wall(i, j, 2))
+          result1 = saroughfact(i, j, 2)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
@@ -663,11 +662,11 @@ subroutine bceddywall_b(nn)
           call popreal8(result1)
         end do
       end do
-    case (kmax)
+    case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(result1)
-          result1 = safact(kssa, d2wall(i, j, kl))
+          result1 = saroughfact(i, j, kl)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
@@ -689,7 +688,6 @@ subroutine bceddywall(nn)
 !
     use constants
     use blockpointers
-    use inputphysics, only : kssa
     implicit none
 !
 !      subroutine arguments.
@@ -703,46 +701,46 @@ subroutine bceddywall(nn)
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
 ! in the halo cells.
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(2, i, j))
+          result1 = saroughfact(2, i, j)
           rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
-    case (imax)
+    case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(il, i, j))
+          result1 = saroughfact(il, i, j)
           rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
-    case (jmin)
+    case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, 2, j))
+          result1 = saroughfact(i, 2, j)
           rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
-    case (jmax)
+    case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, jl, j))
+          result1 = saroughfact(i, jl, j)
           rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
-    case (kmin)
+    case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, j, 2))
+          result1 = saroughfact(i, j, 2)
           rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
-    case (kmax)
+    case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, j, kl))
+          result1 = saroughfact(i, j, kl)
           rev(i, j, ke) = result1*rev(i, j, kl)
         end do
       end do
@@ -776,23 +774,23 @@ subroutine bcturbinflow(nn)
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
 ! loop over the number of turbulent variables.
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bvti1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti1(i, j, l, l) = one
-          case (imax)
+          case (imax) 
             bvti2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti2(i, j, l, l) = one
-          case (jmin)
+          case (jmin) 
             bvtj1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj1(i, j, l, l) = one
-          case (jmax)
+          case (jmax) 
             bvtj2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj2(i, j, l, l) = one
-          case (kmin)
+          case (kmin) 
             bvtk1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk1(i, j, l, l) = one
-          case (kmax)
+          case (kmax) 
             bvtk2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk2(i, j, l, l) = one
           end select
@@ -829,18 +827,18 @@ subroutine bcturboutflow(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bmti1(i, j, l, l) = -one
-          case (imax)
+          case (imax) 
             bmti2(i, j, l, l) = -one
-          case (jmin)
+          case (jmin) 
             bmtj1(i, j, l, l) = -one
-          case (jmax)
+          case (jmax) 
             bmtj2(i, j, l, l) = -one
-          case (kmin)
+          case (kmin) 
             bmtk1(i, j, l, l) = -one
-          case (kmax)
+          case (kmax) 
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -882,16 +880,16 @@ subroutine bcturbtreatment_b()
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
-      select case  (bctype(nn))
-      case (nswalladiabatic, nswallisothermal)
+      select case  (bctype(nn)) 
+      case (nswalladiabatic, nswallisothermal) 
         call pushcontrol2b(2)
-      case (symm, symmpolar, eulerwall)
+      case (symm, symmpolar, eulerwall) 
         call pushcontrol2b(3)
-      case (farfield)
+      case (farfield) 
         call pushcontrol2b(1)
       case (slidinginterface, oversetouterbound, domaininterfaceall, &
 &     domaininterfacerhouvw, domaininterfacep, domaininterfacerho, &
-&     domaininterfacetotal)
+&     domaininterfacetotal) 
         call pushcontrol2b(0)
       case default
         call pushcontrol2b(3)
@@ -981,18 +979,18 @@ subroutine bcturbfarfield_b(nn)
         else
 ! inflow. turbulent variables are prescribed.
           do l=nt1,nt2
-            select case  (bcfaceid(nn))
-            case (imin)
+            select case  (bcfaceid(nn)) 
+            case (imin) 
               call pushcontrol3b(5)
-            case (imax)
+            case (imax) 
               call pushcontrol3b(4)
-            case (jmin)
+            case (jmin) 
               call pushcontrol3b(3)
-            case (jmax)
+            case (jmax) 
               call pushcontrol3b(2)
-            case (kmin)
+            case (kmin) 
               call pushcontrol3b(1)
-            case (kmax)
+            case (kmax) 
               call pushcontrol3b(0)
             case default
               call pushcontrol3b(6)
@@ -1070,18 +1068,18 @@ subroutine bcturbinterface_b(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             call pushcontrol3b(5)
-          case (imax)
+          case (imax) 
             call pushcontrol3b(4)
-          case (jmin)
+          case (jmin) 
             call pushcontrol3b(3)
-          case (jmax)
+          case (jmax) 
             call pushcontrol3b(2)
-          case (kmin)
+          case (kmin) 
             call pushcontrol3b(1)
-          case (kmax)
+          case (kmax) 
             call pushcontrol3b(0)
           case default
             call pushcontrol3b(6)
@@ -1183,26 +1181,26 @@ subroutine bcturbtreatment()
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
-      select case  (bctype(nn))
-      case (nswalladiabatic, nswallisothermal)
+      select case  (bctype(nn)) 
+      case (nswalladiabatic, nswallisothermal) 
 ! viscous wall. there is no difference between an adiabatic
 ! and an isothermal wall for the turbulent equations.
 ! set the implicit treatment of the wall boundary conditions.
         call bcturbwall(nn)
-      case (symm, symmpolar, eulerwall)
+      case (symm, symmpolar, eulerwall) 
 !=============================================================
 !=============================================================
 ! symmetry, polar symmetry or inviscid wall. treatment of
 ! the turbulent equations is identical.
         call bcturbsymm(nn)
-      case (farfield)
+      case (farfield) 
 !=============================================================
 ! farfield. the kind of boundary condition to be applied,
 ! inflow or outflow, depends on the local conditions.
         call bcturbfarfield(nn)
       case (slidinginterface, oversetouterbound, domaininterfaceall, &
 &     domaininterfacerhouvw, domaininterfacep, domaininterfacerho, &
-&     domaininterfacetotal)
+&     domaininterfacetotal) 
 !=============================================================
 ! sliding mesh interface, overset outer boudaries, and
 ! domain interface with another code are not really boundary
@@ -1249,36 +1247,36 @@ subroutine bcturbfarfield(nn)
 ! outflow. simply extrapolation or zero neumann bc
 ! of the turbulent variables.
           do l=nt1,nt2
-            select case  (bcfaceid(nn))
-            case (imin)
+            select case  (bcfaceid(nn)) 
+            case (imin) 
               bmti1(i, j, l, l) = -one
-            case (imax)
+            case (imax) 
               bmti2(i, j, l, l) = -one
-            case (jmin)
+            case (jmin) 
               bmtj1(i, j, l, l) = -one
-            case (jmax)
+            case (jmax) 
               bmtj2(i, j, l, l) = -one
-            case (kmin)
+            case (kmin) 
               bmtk1(i, j, l, l) = -one
-            case (kmax)
+            case (kmax) 
               bmtk2(i, j, l, l) = -one
             end select
           end do
         else
 ! inflow. turbulent variables are prescribed.
           do l=nt1,nt2
-            select case  (bcfaceid(nn))
-            case (imin)
+            select case  (bcfaceid(nn)) 
+            case (imin) 
               bvti1(i, j, l) = winf(l)
-            case (imax)
+            case (imax) 
               bvti2(i, j, l) = winf(l)
-            case (jmin)
+            case (jmin) 
               bvtj1(i, j, l) = winf(l)
-            case (jmax)
+            case (jmax) 
               bvtj2(i, j, l) = winf(l)
-            case (kmin)
+            case (kmin) 
               bvtk1(i, j, l) = winf(l)
-            case (kmax)
+            case (kmax) 
               bvtk2(i, j, l) = winf(l)
             end select
           end do
@@ -1312,18 +1310,18 @@ subroutine bcturbinterface(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bvti1(i, j, l) = w(1, i, j, l)
-          case (imax)
+          case (imax) 
             bvti2(i, j, l) = w(ie, i, j, l)
-          case (jmin)
+          case (jmin) 
             bvtj1(i, j, l) = w(i, 1, j, l)
-          case (jmax)
+          case (jmax) 
             bvtj2(i, j, l) = w(i, je, j, l)
-          case (kmin)
+          case (kmin) 
             bvtk1(i, j, l) = w(i, j, 1, l)
-          case (kmax)
+          case (kmax) 
             bvtk2(i, j, l) = w(i, j, ke, l)
           end select
         end do
@@ -1357,18 +1355,18 @@ subroutine bcturbsymm(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bmti1(i, j, l, l) = -one
-          case (imax)
+          case (imax) 
             bmti2(i, j, l, l) = -one
-          case (jmin)
+          case (jmin) 
             bmtj1(i, j, l, l) = -one
-          case (jmax)
+          case (jmax) 
             bmtj2(i, j, l, l) = -one
-          case (kmin)
+          case (kmin) 
             bmtk1(i, j, l, l) = -one
-          case (kmax)
+          case (kmax) 
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -1447,10 +1445,10 @@ subroutine bcturbwall_b(nn)
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
-    select case  (turbmodel)
-    case (spalartallmaras, spalartallmarasedwards)
+    select case  (turbmodel) 
+    case (spalartallmaras, spalartallmarasedwards) 
 
-    case (komegawilcox, komegamodified, mentersst)
+    case (komegawilcox, komegamodified, mentersst) 
 !        ================================================================
 ! k-omega type of models. k is zero on the wall and thus the
 ! halo value is the negative of the first internal cell.
@@ -1462,8 +1460,8 @@ subroutine bcturbwall_b(nn)
 ! distance. due to the usage of the dd2wall pointer and the
 ! fact that the original d2wall array starts at 2, there is
 ! an offset of -1 present in dd2wall.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1527,7 +1525,7 @@ subroutine bcturbwall_b(nn)
             call popinteger4(jj)
           end if
         end do
-      case (imax)
+      case (imax) 
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1591,7 +1589,7 @@ subroutine bcturbwall_b(nn)
             call popinteger4(jj)
           end if
         end do
-      case (jmin)
+      case (jmin) 
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1655,7 +1653,7 @@ subroutine bcturbwall_b(nn)
             call popinteger4(jj)
           end if
         end do
-      case (jmax)
+      case (jmax) 
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1719,7 +1717,7 @@ subroutine bcturbwall_b(nn)
             call popinteger4(jj)
           end if
         end do
-      case (kmin)
+      case (kmin) 
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1783,7 +1781,7 @@ subroutine bcturbwall_b(nn)
             call popinteger4(jj)
           end if
         end do
-      case (kmax)
+      case (kmax) 
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1889,8 +1887,8 @@ subroutine turb2ndhalo_b(nn)
 ! some pointers accordingly.
 ! loop over the turbulent variables and set the second halo
 ! value. if this is an eddy model, also set the eddy viscosity.
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           if (eddymodel) then
@@ -1913,7 +1911,7 @@ subroutine turb2ndhalo_b(nn)
           end do
         end do
       end do
-    case (imax)
+    case (imax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1939,7 +1937,7 @@ subroutine turb2ndhalo_b(nn)
           end do
         end do
       end do
-    case (jmin)
+    case (jmin) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1963,7 +1961,7 @@ subroutine turb2ndhalo_b(nn)
           end do
         end do
       end do
-    case (jmax)
+    case (jmax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -1989,7 +1987,7 @@ subroutine turb2ndhalo_b(nn)
           end do
         end do
       end do
-    case (kmin)
+    case (kmin) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2013,7 +2011,7 @@ subroutine turb2ndhalo_b(nn)
           end do
         end do
       end do
-    case (kmax)
+    case (kmax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2063,8 +2061,8 @@ subroutine turb2ndhalo(nn)
 ! some pointers accordingly.
 ! loop over the turbulent variables and set the second halo
 ! value. if this is an eddy model, also set the eddy viscosity.
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           do l=nt1,nt2
@@ -2073,7 +2071,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(0, i, j) = rev(1, i, j)
         end do
       end do
-    case (imax)
+    case (imax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2083,7 +2081,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(ib, i, j) = rev(ie, i, j)
         end do
       end do
-    case (jmin)
+    case (jmin) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2093,7 +2091,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, 0, j) = rev(i, 1, j)
         end do
       end do
-    case (jmax)
+    case (jmax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2103,7 +2101,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, jb, j) = rev(i, je, j)
         end do
       end do
-    case (kmin)
+    case (kmin) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2113,7 +2111,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, j, 0) = rev(i, j, 1)
         end do
       end do
-    case (kmax)
+    case (kmax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -2150,8 +2148,8 @@ subroutine turbbcnswall(secondhalo)
       call bcturbwall(nn)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2168,7 +2166,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (imax)
+      case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2185,7 +2183,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmin)
+      case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2202,7 +2200,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmax)
+      case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2219,7 +2217,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmin)
+      case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2236,7 +2234,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmax)
+      case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -2301,55 +2299,55 @@ subroutine bcturbwall(nn)
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
-    select case  (turbmodel)
-    case (spalartallmaras, spalartallmarasedwards)
+    select case  (turbmodel) 
+    case (spalartallmaras, spalartallmarasedwards) 
 ! spalart-allmaras type of model. value at the wall is zero,
 ! so simply negate the internal value.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(2, i, j))
+            result1 = saroughfact(2, i, j)
             bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (imax)
+      case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(il, i, j))
+            result1 = saroughfact(il, i, j)
             bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmin)
+      case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, 2, j))
+            result1 = saroughfact(i, 2, j)
             bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmax)
+      case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, jl, j))
+            result1 = saroughfact(i, jl, j)
             bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmin)
+      case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, j, 2))
+            result1 = saroughfact(i, j, 2)
             bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmax)
+      case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, j, kl))
+            result1 = saroughfact(i, j, kl)
             bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
-    case (komegawilcox, komegamodified, mentersst)
+    case (komegawilcox, komegamodified, mentersst) 
 !        ================================================================
 ! k-omega type of models. k is zero on the wall and thus the
 ! halo value is the negative of the first internal cell.
@@ -2361,8 +2359,8 @@ subroutine bcturbwall(nn)
 ! distance. due to the usage of the dd2wall pointer and the
 ! fact that the original d2wall array starts at 2, there is
 ! an offset of -1 present in dd2wall.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2394,7 +2392,7 @@ subroutine bcturbwall(nn)
             bvti1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (imax)
+      case (imax) 
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2426,7 +2424,7 @@ subroutine bcturbwall(nn)
             bvti2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmin)
+      case (jmin) 
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2458,7 +2456,7 @@ subroutine bcturbwall(nn)
             bvtj1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmax)
+      case (jmax) 
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2490,7 +2488,7 @@ subroutine bcturbwall(nn)
             bvtj2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmin)
+      case (kmin) 
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2522,7 +2520,7 @@ subroutine bcturbwall(nn)
             bvtk1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmax)
+      case (kmax) 
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -2555,47 +2553,47 @@ subroutine bcturbwall(nn)
           end do
         end do
       end select
-    case (ktau)
+    case (ktau) 
 !        ================================================================
 ! k-tau model. both k and tau are zero at the wall, so the
 ! negative value of the internal cell is taken for the halo.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti1(i, j, itu1, itu1) = one
             bmti1(i, j, itu2, itu2) = one
           end do
         end do
-      case (imax)
+      case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti2(i, j, itu1, itu1) = one
             bmti2(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmin)
+      case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj1(i, j, itu1, itu1) = one
             bmtj1(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmax)
+      case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj2(i, j, itu1, itu1) = one
             bmtj2(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmin)
+      case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk1(i, j, itu1, itu1) = one
             bmtk1(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmax)
+      case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk2(i, j, itu1, itu1) = one
@@ -2605,20 +2603,23 @@ subroutine bcturbwall(nn)
       end select
     end select
   end subroutine bcturbwall
-  function safact(ks, d)
+  function saroughfact(i, j, k)
+! returns either the regular sa-boundary condition
+! or the modified roughness-boundary condition
     use constants
+    use inputphysics, only : useroughsa
+    use blockpointers, only : ks, d2wall
     implicit none
 ! dummy arguments
-    real(kind=realtype) :: safact
+    real(kind=realtype) :: saroughfact
 ! local variablse
-    real(kind=realtype) :: ks
-    real(kind=realtype) :: d
-    if (ks .eq. zero) then
-      safact = -one
-    else if (d .eq. zero) then
-      safact = one
+    integer(kind=inttype) :: i, j, k
+    if (useroughsa) then
+      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
+&       d2wall(i, j, k)/0.03)
+      return
     else
-      safact = (ks-d/0.03)/(ks+d/0.03)
+      saroughfact = one
     end if
-  end function safact
+  end function saroughfact
 end module turbbcroutines_b
diff --git a/src/adjoint/outputReverse/walldistance_b.f90 b/src/adjoint/outputReverse/walldistance_b.f90
index 39c8f1c4d..bfae518e5 100644
--- a/src/adjoint/outputReverse/walldistance_b.f90
+++ b/src/adjoint/outputReverse/walldistance_b.f90
@@ -189,4 +189,51 @@ subroutine updatewalldistancesquickly(nn, level, sps)
       end if
     end do
   end subroutine updatewalldistancesquickly
+  subroutine updatewallroughness()
+! sets the roughness-value (ks) of the nearest wall-cell.
+    use constants
+    use blockpointers, only : il, jl, kl, flowdoms, ks, bcdata, ndom, &
+&   nbocos
+    use inputtimespectral, only : ntimeintervalsspectral
+    ! use utils_b, only : setpointers
+    use iteration, only : groundlevel
+    implicit none
+! local variables
+    integer(kind=inttype) :: i, j, k, iii, jjj, boco, dom
+    integer(kind=inttype) :: nn, sps, level, nlevels
+    intrinsic ubound
+    external setpointers
+    nlevels = ubound(flowdoms, 2)
+    do level=1,nlevels
+      do sps=1,ntimeintervalsspectral
+        do nn=1,ndom
+          ! call setpointers(nn, level, sps)
+          do k=2,kl
+            do j=2,jl
+              do i=2,il
+                if (flowdoms(nn, level, sps)%nearestbccell(1, i, j, k) &
+&                   .eq. 0) then
+! this cell is too far away and has no
+! association. set the roughness to zero.
+                  print*, 'ks cutoff'
+                  ks(i, j, k) = zero
+                else
+                  dom = flowdoms(nn, level, sps)%nearestbccell(1, i, j, &
+&                   k)
+                  boco = flowdoms(nn, level, sps)%nearestbccell(2, i, j&
+&                   , k)
+                  iii = flowdoms(nn, level, sps)%nearestbccell(3, i, j, &
+&                   k)
+                  jjj = flowdoms(nn, level, sps)%nearestbccell(4, i, j, &
+&                   k)
+                  ks(i, j, k) = flowdoms(dom, level, sps)%bcdata(boco)%&
+&                   ksns_wall(iii, jjj)
+                end if
+              end do
+            end do
+          end do
+        end do
+      end do
+    end do
+  end subroutine updatewallroughness
 end module walldistance_b
diff --git a/src/adjoint/outputReverseFast/sa_fast_b.f90 b/src/adjoint/outputReverseFast/sa_fast_b.f90
index 64351041a..3f52c40db 100644
--- a/src/adjoint/outputReverseFast/sa_fast_b.f90
+++ b/src/adjoint/outputReverseFast/sa_fast_b.f90
@@ -37,7 +37,7 @@ subroutine sasource_fast_b()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dnew, cr1
+    real(kind=realtype) :: dist, kslocal
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: fv1d, fv2d, ft2d
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
@@ -87,8 +87,6 @@ subroutine sasource_fast_b()
     cv13 = rsacv1**3
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
-! constants for sa rough
-    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
     omegay = timeref*sections(sectionid)%rotrate(2)
@@ -186,11 +184,18 @@ subroutine sasource_fast_b()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough
-        dnew = d2wall(i, j, k) + 0.03*kssa
+! as the rough version of sa is supported, this looks slightly different
+! than the standard sa implementation
+        if (useroughsa) then
+          kslocal = ks(i, j, k)
+          dist = d2wall(i, j, k) + 0.03*kslocal
+        else
+          kslocal = zero
+          dist = d2wall(i, j, k)
+        end if
         nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/dnew**2
-        chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
+        dist2inv = one/dist**2
+        chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
@@ -478,7 +483,7 @@ subroutine sasource()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dnew, cr1
+    real(kind=realtype) :: dist, kslocal
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
     real(kind=realtype) :: rr, gg, gg6, termfw, fwsa, term1, term2
@@ -501,8 +506,6 @@ subroutine sasource()
     kar2inv = one/rsak**2
     cw36 = rsacw3**6
     cb3inv = one/rsacb3
-! constants for sa rough
-    cr1 = 0.5
 ! determine the non-dimensional wheel speed of this block.
     omegax = timeref*sections(sectionid)%rotrate(1)
     omegay = timeref*sections(sectionid)%rotrate(2)
@@ -595,11 +598,18 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! sa rough
-        dnew = d2wall(i, j, k) + 0.03*kssa
+! as the rough version of sa is supported, this looks slightly different
+! than the standard sa implementation
+        if (useroughsa) then
+          kslocal = ks(i, j, k)
+          dist = d2wall(i, j, k) + 0.03*kslocal
+        else
+          kslocal = zero
+          dist = d2wall(i, j, k)
+        end if
         nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/dnew**2
-        chi = w(i, j, k, itu1)/nu + cr1*kssa/dnew
+        dist2inv = one/dist**2
+        chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
diff --git a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90 b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
index 12bcc85e3..77c1733e8 100644
--- a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
@@ -32,8 +32,8 @@ subroutine applyallturbbcthisblock(secondhalo)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
       if (.not.wallfunctions) then
-        select case  (bcfaceid(nn))
-        case (imin)
+        select case  (bcfaceid(nn)) 
+        case (imin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -45,7 +45,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (imax)
+        case (imax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -57,7 +57,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmin)
+        case (jmin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -69,7 +69,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (jmax)
+        case (jmax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -81,7 +81,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmin)
+        case (kmin) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -93,7 +93,7 @@ subroutine applyallturbbcthisblock(secondhalo)
               end do
             end do
           end do
-        case (kmax)
+        case (kmax) 
           do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
             do i=bcdata(nn)%icbeg,bcdata(nn)%icend
               do l=nt1,nt2
@@ -147,38 +147,38 @@ subroutine bceddynowall(nn)
 !
     integer(kind=inttype) :: i, j
 ! determine the face id on which the subface and copy
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(1, i, j) = rev(2, i, j)
         end do
       end do
-    case (imax)
+    case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(ie, i, j) = rev(il, i, j)
         end do
       end do
-    case (jmin)
+    case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, 1, j) = rev(i, 2, j)
         end do
       end do
-    case (jmax)
+    case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, je, j) = rev(i, jl, j)
         end do
       end do
-    case (kmin)
+    case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, 1) = rev(i, j, 2)
         end do
       end do
-    case (kmax)
+    case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           rev(i, j, ke) = rev(i, j, kl)
@@ -195,7 +195,6 @@ subroutine bceddywall(nn)
 !
     use constants
     use blockpointers
-    use inputphysics, only : kssa
     implicit none
 !
 !      subroutine arguments.
@@ -209,46 +208,46 @@ subroutine bceddywall(nn)
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
 ! in the halo cells.
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(2, i, j))
+          result1 = saroughfact(2, i, j)
           rev(1, i, j) = result1*rev(2, i, j)
         end do
       end do
-    case (imax)
+    case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(il, i, j))
+          result1 = saroughfact(il, i, j)
           rev(ie, i, j) = result1*rev(il, i, j)
         end do
       end do
-    case (jmin)
+    case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, 2, j))
+          result1 = saroughfact(i, 2, j)
           rev(i, 1, j) = result1*rev(i, 2, j)
         end do
       end do
-    case (jmax)
+    case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, jl, j))
+          result1 = saroughfact(i, jl, j)
           rev(i, je, j) = result1*rev(i, jl, j)
         end do
       end do
-    case (kmin)
+    case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, j, 2))
+          result1 = saroughfact(i, j, 2)
           rev(i, j, 1) = result1*rev(i, j, 2)
         end do
       end do
-    case (kmax)
+    case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = safact(kssa, d2wall(i, j, kl))
+          result1 = saroughfact(i, j, kl)
           rev(i, j, ke) = result1*rev(i, j, kl)
         end do
       end do
@@ -282,23 +281,23 @@ subroutine bcturbinflow(nn)
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
 ! loop over the number of turbulent variables.
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bvti1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti1(i, j, l, l) = one
-          case (imax)
+          case (imax) 
             bvti2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmti2(i, j, l, l) = one
-          case (jmin)
+          case (jmin) 
             bvtj1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj1(i, j, l, l) = one
-          case (jmax)
+          case (jmax) 
             bvtj2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtj2(i, j, l, l) = one
-          case (kmin)
+          case (kmin) 
             bvtk1(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk1(i, j, l, l) = one
-          case (kmax)
+          case (kmax) 
             bvtk2(i, j, l) = two*bcdata(nn)%turbinlet(i, j, l)
             bmtk2(i, j, l, l) = one
           end select
@@ -335,18 +334,18 @@ subroutine bcturboutflow(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bmti1(i, j, l, l) = -one
-          case (imax)
+          case (imax) 
             bmti2(i, j, l, l) = -one
-          case (jmin)
+          case (jmin) 
             bmtj1(i, j, l, l) = -one
-          case (jmax)
+          case (jmax) 
             bmtj2(i, j, l, l) = -one
-          case (kmin)
+          case (kmin) 
             bmtk1(i, j, l, l) = -one
-          case (kmax)
+          case (kmax) 
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -416,26 +415,26 @@ subroutine bcturbtreatment()
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
-      select case  (bctype(nn))
-      case (nswalladiabatic, nswallisothermal)
+      select case  (bctype(nn)) 
+      case (nswalladiabatic, nswallisothermal) 
 ! viscous wall. there is no difference between an adiabatic
 ! and an isothermal wall for the turbulent equations.
 ! set the implicit treatment of the wall boundary conditions.
         call bcturbwall(nn)
-      case (symm, symmpolar, eulerwall)
+      case (symm, symmpolar, eulerwall) 
 !=============================================================
 !=============================================================
 ! symmetry, polar symmetry or inviscid wall. treatment of
 ! the turbulent equations is identical.
         call bcturbsymm(nn)
-      case (farfield)
+      case (farfield) 
 !=============================================================
 ! farfield. the kind of boundary condition to be applied,
 ! inflow or outflow, depends on the local conditions.
         call bcturbfarfield(nn)
       case (slidinginterface, oversetouterbound, domaininterfaceall, &
 &     domaininterfacerhouvw, domaininterfacep, domaininterfacerho, &
-&     domaininterfacetotal)
+&     domaininterfacetotal) 
 !=============================================================
 ! sliding mesh interface, overset outer boudaries, and
 ! domain interface with another code are not really boundary
@@ -482,36 +481,36 @@ subroutine bcturbfarfield(nn)
 ! outflow. simply extrapolation or zero neumann bc
 ! of the turbulent variables.
           do l=nt1,nt2
-            select case  (bcfaceid(nn))
-            case (imin)
+            select case  (bcfaceid(nn)) 
+            case (imin) 
               bmti1(i, j, l, l) = -one
-            case (imax)
+            case (imax) 
               bmti2(i, j, l, l) = -one
-            case (jmin)
+            case (jmin) 
               bmtj1(i, j, l, l) = -one
-            case (jmax)
+            case (jmax) 
               bmtj2(i, j, l, l) = -one
-            case (kmin)
+            case (kmin) 
               bmtk1(i, j, l, l) = -one
-            case (kmax)
+            case (kmax) 
               bmtk2(i, j, l, l) = -one
             end select
           end do
         else
 ! inflow. turbulent variables are prescribed.
           do l=nt1,nt2
-            select case  (bcfaceid(nn))
-            case (imin)
+            select case  (bcfaceid(nn)) 
+            case (imin) 
               bvti1(i, j, l) = winf(l)
-            case (imax)
+            case (imax) 
               bvti2(i, j, l) = winf(l)
-            case (jmin)
+            case (jmin) 
               bvtj1(i, j, l) = winf(l)
-            case (jmax)
+            case (jmax) 
               bvtj2(i, j, l) = winf(l)
-            case (kmin)
+            case (kmin) 
               bvtk1(i, j, l) = winf(l)
-            case (kmax)
+            case (kmax) 
               bvtk2(i, j, l) = winf(l)
             end select
           end do
@@ -545,18 +544,18 @@ subroutine bcturbinterface(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bvti1(i, j, l) = w(1, i, j, l)
-          case (imax)
+          case (imax) 
             bvti2(i, j, l) = w(ie, i, j, l)
-          case (jmin)
+          case (jmin) 
             bvtj1(i, j, l) = w(i, 1, j, l)
-          case (jmax)
+          case (jmax) 
             bvtj2(i, j, l) = w(i, je, j, l)
-          case (kmin)
+          case (kmin) 
             bvtk1(i, j, l) = w(i, j, 1, l)
-          case (kmax)
+          case (kmax) 
             bvtk2(i, j, l) = w(i, j, ke, l)
           end select
         end do
@@ -590,18 +589,18 @@ subroutine bcturbsymm(nn)
     do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
       do i=bcdata(nn)%icbeg,bcdata(nn)%icend
         do l=nt1,nt2
-          select case  (bcfaceid(nn))
-          case (imin)
+          select case  (bcfaceid(nn)) 
+          case (imin) 
             bmti1(i, j, l, l) = -one
-          case (imax)
+          case (imax) 
             bmti2(i, j, l, l) = -one
-          case (jmin)
+          case (jmin) 
             bmtj1(i, j, l, l) = -one
-          case (jmax)
+          case (jmax) 
             bmtj2(i, j, l, l) = -one
-          case (kmin)
+          case (kmin) 
             bmtk1(i, j, l, l) = -one
-          case (kmax)
+          case (kmax) 
             bmtk2(i, j, l, l) = -one
           end select
         end do
@@ -630,8 +629,8 @@ subroutine turb2ndhalo(nn)
 ! some pointers accordingly.
 ! loop over the turbulent variables and set the second halo
 ! value. if this is an eddy model, also set the eddy viscosity.
-    select case  (bcfaceid(nn))
-    case (imin)
+    select case  (bcfaceid(nn)) 
+    case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           do l=nt1,nt2
@@ -640,7 +639,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(0, i, j) = rev(1, i, j)
         end do
       end do
-    case (imax)
+    case (imax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -650,7 +649,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(ib, i, j) = rev(ie, i, j)
         end do
       end do
-    case (jmin)
+    case (jmin) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -660,7 +659,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, 0, j) = rev(i, 1, j)
         end do
       end do
-    case (jmax)
+    case (jmax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -670,7 +669,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, jb, j) = rev(i, je, j)
         end do
       end do
-    case (kmin)
+    case (kmin) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -680,7 +679,7 @@ subroutine turb2ndhalo(nn)
           if (eddymodel) rev(i, j, 0) = rev(i, j, 1)
         end do
       end do
-    case (kmax)
+    case (kmax) 
 !===============================================================
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
@@ -717,8 +716,8 @@ subroutine turbbcnswall(secondhalo)
       call bcturbwall(nn)
 ! loop over the faces and set the state in
 ! the turbulent halo cells.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -735,7 +734,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (imax)
+      case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -752,7 +751,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmin)
+      case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -769,7 +768,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (jmax)
+      case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -786,7 +785,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmin)
+      case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -803,7 +802,7 @@ subroutine turbbcnswall(secondhalo)
             end if
           end do
         end do
-      case (kmax)
+      case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             do l=nt1,nt2
@@ -868,55 +867,55 @@ subroutine bcturbwall(nn)
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
-    select case  (turbmodel)
-    case (spalartallmaras, spalartallmarasedwards)
+    select case  (turbmodel) 
+    case (spalartallmaras, spalartallmarasedwards) 
 ! spalart-allmaras type of model. value at the wall is zero,
 ! so simply negate the internal value.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(2, i, j))
+            result1 = saroughfact(2, i, j)
             bmti1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (imax)
+      case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(il, i, j))
+            result1 = saroughfact(il, i, j)
             bmti2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmin)
+      case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, 2, j))
+            result1 = saroughfact(i, 2, j)
             bmtj1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (jmax)
+      case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, jl, j))
+            result1 = saroughfact(i, jl, j)
             bmtj2(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmin)
+      case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, j, 2))
+            result1 = saroughfact(i, j, 2)
             bmtk1(i, j, itu1, itu1) = -result1
           end do
         end do
-      case (kmax)
+      case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = safact(kssa, d2wall(i, j, kl))
+            result1 = saroughfact(i, j, kl)
             bmtk2(i, j, itu1, itu1) = -result1
           end do
         end do
       end select
-    case (komegawilcox, komegamodified, mentersst)
+    case (komegawilcox, komegamodified, mentersst) 
 !        ================================================================
 ! k-omega type of models. k is zero on the wall and thus the
 ! halo value is the negative of the first internal cell.
@@ -928,8 +927,8 @@ subroutine bcturbwall(nn)
 ! distance. due to the usage of the dd2wall pointer and the
 ! fact that the original d2wall array starts at 2, there is
 ! an offset of -1 present in dd2wall.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -961,7 +960,7 @@ subroutine bcturbwall(nn)
             bvti1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (imax)
+      case (imax) 
         iimax = jl
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -993,7 +992,7 @@ subroutine bcturbwall(nn)
             bvti2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmin)
+      case (jmin) 
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1025,7 +1024,7 @@ subroutine bcturbwall(nn)
             bvtj1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (jmax)
+      case (jmax) 
         iimax = il
         jjmax = kl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1057,7 +1056,7 @@ subroutine bcturbwall(nn)
             bvtj2(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmin)
+      case (kmin) 
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1089,7 +1088,7 @@ subroutine bcturbwall(nn)
             bvtk1(i, j, itu2) = two*60.0_realtype*nu*tmpd
           end do
         end do
-      case (kmax)
+      case (kmax) 
         iimax = il
         jjmax = jl
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
@@ -1122,47 +1121,47 @@ subroutine bcturbwall(nn)
           end do
         end do
       end select
-    case (ktau)
+    case (ktau) 
 !        ================================================================
 ! k-tau model. both k and tau are zero at the wall, so the
 ! negative value of the internal cell is taken for the halo.
-      select case  (bcfaceid(nn))
-      case (imin)
+      select case  (bcfaceid(nn)) 
+      case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti1(i, j, itu1, itu1) = one
             bmti1(i, j, itu2, itu2) = one
           end do
         end do
-      case (imax)
+      case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmti2(i, j, itu1, itu1) = one
             bmti2(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmin)
+      case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj1(i, j, itu1, itu1) = one
             bmtj1(i, j, itu2, itu2) = one
           end do
         end do
-      case (jmax)
+      case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtj2(i, j, itu1, itu1) = one
             bmtj2(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmin)
+      case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk1(i, j, itu1, itu1) = one
             bmtk1(i, j, itu2, itu2) = one
           end do
         end do
-      case (kmax)
+      case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             bmtk2(i, j, itu1, itu1) = one
@@ -1172,20 +1171,23 @@ subroutine bcturbwall(nn)
       end select
     end select
   end subroutine bcturbwall
-  function safact(ks, d)
+  function saroughfact(i, j, k)
+! returns either the regular sa-boundary condition
+! or the modified roughness-boundary condition
     use constants
+    use inputphysics, only : useroughsa
+    use blockpointers, only : ks, d2wall
     implicit none
 ! dummy arguments
-    real(kind=realtype) :: safact
+    real(kind=realtype) :: saroughfact
 ! local variablse
-    real(kind=realtype) :: ks
-    real(kind=realtype) :: d
-    if (ks .eq. zero) then
-      safact = -one
-    else if (d .eq. zero) then
-      safact = one
+    integer(kind=inttype) :: i, j, k
+    if (useroughsa) then
+      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
+&       d2wall(i, j, k)/0.03)
+      return
     else
-      safact = (ks-d/0.03)/(ks+d/0.03)
+      saroughfact = one
     end if
-  end function safact
+  end function saroughfact
 end module turbbcroutines_fast_b
diff --git a/src/adjoint/outputReverseFast/walldistance_fast_b.f90 b/src/adjoint/outputReverseFast/walldistance_fast_b.f90
index 4a82e6539..fecfbf826 100644
--- a/src/adjoint/outputReverseFast/walldistance_fast_b.f90
+++ b/src/adjoint/outputReverseFast/walldistance_fast_b.f90
@@ -68,4 +68,51 @@ subroutine updatewalldistancesquickly(nn, level, sps)
       end if
     end do
   end subroutine updatewalldistancesquickly
+  subroutine updatewallroughness()
+! sets the roughness-value (ks) of the nearest wall-cell.
+    use constants
+    use blockpointers, only : il, jl, kl, flowdoms, ks, bcdata, ndom, &
+&   nbocos
+    use inputtimespectral, only : ntimeintervalsspectral
+    ! use utils_fast_b, only : setpointers
+    use iteration, only : groundlevel
+    implicit none
+! local variables
+    integer(kind=inttype) :: i, j, k, iii, jjj, boco, dom
+    integer(kind=inttype) :: nn, sps, level, nlevels
+    intrinsic ubound
+    external setpointers
+    nlevels = ubound(flowdoms, 2)
+    do level=1,nlevels
+      do sps=1,ntimeintervalsspectral
+        do nn=1,ndom
+          ! call setpointers(nn, level, sps)
+          do k=2,kl
+            do j=2,jl
+              do i=2,il
+                if (flowdoms(nn, level, sps)%nearestbccell(1, i, j, k) &
+&                   .eq. 0) then
+! this cell is too far away and has no
+! association. set the roughness to zero.
+                  print*, 'ks cutoff'
+                  ks(i, j, k) = zero
+                else
+                  dom = flowdoms(nn, level, sps)%nearestbccell(1, i, j, &
+&                   k)
+                  boco = flowdoms(nn, level, sps)%nearestbccell(2, i, j&
+&                   , k)
+                  iii = flowdoms(nn, level, sps)%nearestbccell(3, i, j, &
+&                   k)
+                  jjj = flowdoms(nn, level, sps)%nearestbccell(4, i, j, &
+&                   k)
+                  ks(i, j, k) = flowdoms(dom, level, sps)%bcdata(boco)%&
+&                   ksns_wall(iii, jjj)
+                end if
+              end do
+            end do
+          end do
+        end do
+      end do
+    end do
+  end subroutine updatewallroughness
 end module walldistance_fast_b
diff --git a/src/bcdata/BCData.F90 b/src/bcdata/BCData.F90
index 5f0a65fa4..f1174d161 100644
--- a/src/bcdata/BCData.F90
+++ b/src/bcdata/BCData.F90
@@ -10,19 +10,29 @@ module BCData
   subroutine setBCVarNamesIsothermalWall
     use cgnsNames
     use constants
+    use inputPhysics, only : useRoughSA
     implicit none
     nbcVar = nbcVarIsothermalWall
     bcVarNames(1) = cgnsTemp
-    ! TODO: Add Sand grain roughness !!!!
+
+    if (useRoughSA) then
+       nbcVar = nbcVar + 1
+       bcVarNames(2) = cgnsSandGrainRoughness
+    end if
 
   end subroutine setBCVarNamesIsothermalWall
 
   subroutine setBCVarNamesAdiabaticWall
     use cgnsNames
     use constants
+    use inputPhysics, only : useRoughSA
     implicit none
     nbcVar = nbcVarAdiabaticWall
-    bcVarNames(1) = cgnsSandGrainRoughness
+
+    if (useRoughSA) then
+       nbcVar = nbcVar + 1
+       bcVarNames(1) = cgnsSandGrainRoughness
+    end if
 
   end subroutine setBCVarNamesAdiabaticWall
 
@@ -433,6 +443,7 @@ subroutine BCDataIsothermalWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
     use blockPointers, only : BCFaceID, BCData, nBKGlobal
     use utils, only : terminate, siTemperature
     use flowVarRefState, only : Tref
+    use inputPhysics, only : useRoughSA
     implicit none
     !
     !      Subroutine arguments.
@@ -476,6 +487,22 @@ subroutine BCDataIsothermalWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
        enddo
     enddo
 
+    ! Set a value of 0 if it was not possible to determine the
+    ! sand grain roughness
+
+    if (useRoughSA) then
+        if(.not. bcVarPresent(2)) then
+        bcVarArray(:,:,1) = zero
+        endif
+
+        do j=jBeg,jEnd
+            do i=iBeg,iEnd
+                BCData(boco)%ksNS_Wall(i,j) = bcVarArray(i,j,2)
+            enddo
+        enddo
+    end if
+
+
   end subroutine BCDataIsothermalWall
 
   subroutine BCDataAdiabaticWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
@@ -485,6 +512,7 @@ subroutine BCDataAdiabaticWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
     !
     use constants
     use cgnsNames
+    use inputPhysics, only : useRoughSA
     use blockPointers, only : BCFaceID, BCData, nBKGlobal
     implicit none
     !
@@ -501,15 +529,17 @@ subroutine BCDataAdiabaticWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
     ! Set a value of 0 if it was not possible to determine the
     ! sand grain roughness
 
-    if(.not. bcVarPresent(1)) then
-       bcVarArray(:,:,1) = zero
-    endif
+    if (useRoughSA) then
+        if(.not. bcVarPresent(1)) then
+           bcVarArray(:,:,1) = zero
+        endif
 
-    do j=jBeg,jEnd
-       do i=iBeg,iEnd
-          BCData(boco)%ksNS_Wall(i,j) = bcVarArray(i,j,1)
-       enddo
-    enddo
+        do j=jBeg,jEnd
+            do i=iBeg,iEnd
+                BCData(boco)%ksNS_Wall(i,j) = bcVarArray(i,j,1)
+            enddo
+        enddo
+    end if
 
   end subroutine BCDataAdiabaticWall
 
@@ -2243,6 +2273,7 @@ subroutine allocMemBCData
     use blockPointers, only : BCData, flowDoms, nBocos, nDom, BCType
     use flowVarRefState, only : nt1, nt2
     use inputTimeSpectral, only : nTimeIntervalsSpectral
+    use inputPhysics, only : useRoughSA
     use iteration, only : nALESteps
     use utils, only : setPointers, terminate
     implicit none
@@ -2304,7 +2335,6 @@ subroutine allocMemBCData
                 case (NSWallAdiabatic)
                    allocate(BCData(mm)%uSlip(iBeg:iEnd,jBeg:jEnd,3), &
                         BCData(mm)%uSlipALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd,3), &
-                        BCData(mm)%ksNS_Wall(iBeg:iEnd,jBeg:jEnd), &
                         BCData(mm)%F(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
                         BCData(mm)%T(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
                         BCData(mm)%Tp(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
@@ -2314,6 +2344,9 @@ subroutine allocMemBCData
                         BCData(mm)%area(iNodeBeg+1:iNodeEnd, jNodeBeg+1:jNodeEnd), &
                         BCData(mm)%CpTarget(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd), &
                         stat=ierr)
+                    if (useRoughSA .and. ierr == 0) then
+                        allocate(BCData(mm)%ksNS_Wall(iBeg:iEnd,jBeg:jEnd), stat=ierr)
+                   end if
                    if(ierr /= 0)                      &
                         call terminate("allocMemBCData", &
                         "Memory allocation failure for &
diff --git a/src/f2py/adflow.pyf b/src/f2py/adflow.pyf
index ba75435f3..6ff2658bb 100644
--- a/src/f2py/adflow.pyf
+++ b/src/f2py/adflow.pyf
@@ -1205,7 +1205,7 @@ python module libadflow
          real(kind=realtype) :: beta
          integer(kind=inttype) :: liftindex
          real(kind=realtype) :: cavitationnumber
-         real(kind=realtype) :: useroughsa
+         logical :: useroughsa
        end module inputphysics
 
        module inputadjoint ! in :adflow:../modules/inputParam.f90
diff --git a/src/initFlow/initializeFlow.F90 b/src/initFlow/initializeFlow.F90
index 0d373e942..4700958de 100644
--- a/src/initFlow/initializeFlow.F90
+++ b/src/initFlow/initializeFlow.F90
@@ -272,6 +272,8 @@ subroutine updateBCDataAllLevels()
     use constants
     use iteration, only : groundLevel
     use bcdata, only : setbcdataFineGrid, setBCDataCoarseGrid
+    use wallDistance, only : updateWallRoughness
+    use inputPhysics, only : useRoughSA
     implicit none
 
     ! Allocate the memory for the prescribed boundary data at the
@@ -290,6 +292,11 @@ subroutine updateBCDataAllLevels()
     call setBCDataCoarseGrid
 #endif
 
+    ! update the roughness value in flowdoms
+    if (useRoughSA) then
+       call updateWallRoughness
+    end if
+
   end subroutine updateBCDataAllLevels
 
   subroutine initFlow
diff --git a/src/modules/block.F90 b/src/modules/block.F90
index 6d432e97c..3e27e2d33 100644
--- a/src/modules/block.F90
+++ b/src/modules/block.F90
@@ -672,9 +672,15 @@ module block
      ! d2Wall(2:il,2:jl,2:kl) - Distance from the center of the cell
      !                          to the nearest viscous wall.
      ! intermittency( )       - Function defining the transition location
+     !
+     ! The next two variables are only initialized if roughness is requested (useRoughSA = True)
+     ! nearestBCCell(2:il,2:jl,2:kl,4)   - dom,boco,i,j indices for the nearest wall cell; is needed for rougness
+     ! ks(2:il,2:jl,2:kl)     - Roughness value of the nearest wall
 
      real(kind=realType), dimension(:,:,:), pointer :: d2Wall, filterDES
      real(kind=realType), dimension(:,:,:), pointer :: intermittency
+     integer(kind=intType), dimension(:,:,:,:), pointer :: nearestBCCell
+     real(kind=realType), dimension(:,:,:), pointer :: ks
 
      ! bmti1(je,ke,nt1:nt2,nt1:nt2): Matrix used for the implicit
      !                               boundary condition treatment of
diff --git a/src/modules/blockPointers.F90 b/src/modules/blockPointers.F90
index 08ed3ea8b..62950b127 100644
--- a/src/modules/blockPointers.F90
+++ b/src/modules/blockPointers.F90
@@ -151,6 +151,7 @@ module blockPointers
   real(kind=realType), dimension(:,:,:),   pointer :: radI, radJ, radK
 
   real(kind=realType), dimension(:,:,:), pointer :: d2Wall
+  real(kind=realType), dimension(:,:,:), pointer :: ks
   real(kind=realType), dimension(:,:,:), pointer :: intermittency
   real(kind=realType), dimension(:,:,:),   pointer :: filterDES  ! eran-des
   real(kind=realType), dimension(:,:,:,:), pointer :: bmti1
diff --git a/src/modules/constants.F90 b/src/modules/constants.F90
index 4f8a5a408..26a8a2cfd 100644
--- a/src/modules/constants.F90
+++ b/src/modules/constants.F90
@@ -300,7 +300,7 @@ module constants
   integer(kind=intType), parameter :: nbcVarSubsonicInflow = 17
   integer(kind=intType), parameter :: nbcVarSubsonicOutflow = 1
   integer(kind=intType), parameter :: nbcVarSupersonicInflow = 7
-  integer(kind=intType), parameter :: nbcVarAdiabaticWall = 1
+  integer(kind=intType), parameter :: nbcVarAdiabaticWall = 0
   integer(kind=intType), parameter :: nbcVarIsothermalWall = 1
 
   ! Indices of specific familyExcahnge groups based on BC
diff --git a/src/modules/inputParam.F90 b/src/modules/inputParam.F90
index 86330ce0f..43d08b08f 100644
--- a/src/modules/inputParam.F90
+++ b/src/modules/inputParam.F90
@@ -519,7 +519,7 @@ module inputPhysics
   !                      when considering turbulence model effects
   ! useRotationSA:       Determines if we will use rotation correction (SA model only)
   ! useft2SA:            Determines if we will use the ft2 term (SA model only)
-  ! useRoughSA:          Whether or not to use rough version of SA
+  ! useRoughSA:          Whether or not to use rough version of SA (BC values are set via CGNS-Grid)
   ! wallFunctions:       Whether or not to use wall functions.
   ! wallDistanceNeeded:  Whether or not the wall distance is needed
   !                      for the turbulence model in a RANS problem.
@@ -583,8 +583,7 @@ module inputPhysics
   real(kind=realType), dimension(3,2) :: momentAxis
   real(kind=realType) :: SSuthDim, muSuthDim, TSuthDim
   real(kind=realType) :: cavitationnumber
-  logical :: useRoughSA
-  real(kind=realType) :: kssa   ! TODO: Remove this!!
+  logical             :: useRoughSA
 
 #ifndef USE_TAPENADE
   real(kind=realType) :: alphad, betad
diff --git a/src/modules/overset.F90 b/src/modules/overset.F90
index a2ff1de8b..26de38f3e 100644
--- a/src/modules/overset.F90
+++ b/src/modules/overset.F90
@@ -172,6 +172,10 @@ module oversetData
      ! indCell: Global cell index for wall cells
      integer(kind=intType), dimension(:), pointer :: indCell
 
+     ! BCCell: dom,boco,i,j values that define this cell in the CGNS-mesh
+     ! I it is only used for the rough SA variant (when 'useRoughSA' = True)
+     integer(kind=intType), dimension(:, :), pointer :: BCCell
+
      ! Blanking values for Nodes
      integer(kind=intType), dimension(:), allocatable :: iBlank
      integer(kind=intType), dimension(:), allocatable :: cellPtr
diff --git a/src/modules/paramTurb.F90 b/src/modules/paramTurb.F90
index 6d02c1f3f..468bd1506 100644
--- a/src/modules/paramTurb.F90
+++ b/src/modules/paramTurb.F90
@@ -24,6 +24,7 @@ module paramTurb
        real(kind=realType), parameter :: rsaCt3 = 1.2_realType
        real(kind=realType), parameter :: rsaCt4 = 0.5_realType
        real(kind=realType), parameter :: rsaCrot = 2.0_realType
+       real(kind=realType), parameter :: rsaCr1 = 0.5_realType
 
 !
 !       K-omega constants.
diff --git a/src/overset/buildClusterWalls.F90 b/src/overset/buildClusterWalls.F90
index c249aa4e4..cc74a15ee 100644
--- a/src/overset/buildClusterWalls.F90
+++ b/src/overset/buildClusterWalls.F90
@@ -57,6 +57,9 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
   integer(kind=intType), dimension(:), allocatable :: cellIndicesLocal
   integer(kind=intType), dimension(:), allocatable :: cgnsIndices, curCGNSNode
 
+  integer(kind=intType), dimension(:,:), allocatable :: cellBCCellLocal
+  integer(kind=intType), dimension(:,:), allocatable :: cellBCCellGlobal
+
   integer(kind=intType), dimension(:),    allocatable :: nCellProc, cumCellProc
   integer(kind=intType), dimension(:),    allocatable :: nNodeProc, cumNodeProc
   real(kind=realType),   dimension(:, :), allocatable :: uniqueNodes
@@ -164,6 +167,10 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
        nodeIndicesLocal(nNodesLocal), nodeIndicesCGNSLocal(nNodesLocal), &
        cellIndicesLocal(nCellsLocal))
 
+  if (useRoughSA) then
+     allocate(cellBCCellLocal(4, nCellsLocal))
+  end if
+
   iCell = 0
   iNode = 0
   ! Second loop over the local walls
@@ -335,10 +342,16 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
 
                     ! Save the global cell index
                     if (useDual) then
-                       cellIndicesLocal(iCell) = 0
+                        cellIndicesLocal(iCell) = 0
                     else
-                       ! Valid only when using primary nodes
-                       cellIndicesLocal(iCell) = indCell(iBeg+i+1, jBeg+j+1)
+                        ! Valid only when using primary nodes
+                        cellIndicesLocal(iCell) = indCell(iBeg+i+1, jBeg+j+1)
+                        if (useRoughSA) then
+                            cellBCCellLocal(1, iCell) = nn
+                            cellBCCellLocal(2, iCell) = mm
+                            cellBCCellLocal(3, iCell) = i
+                            cellBCCellLocal(4, iCell) = j
+                        end if
                     end if
                  end do
               end do
@@ -359,8 +372,14 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
                     if (useDual) then
                        cellIndicesLocal(iCell) = 0
                     else
-                       ! Valid only when using primary nodes
-                       cellIndicesLocal(iCell) = indCell(iBeg+i+1, jBeg+j+1)
+                        ! Valid only when using primary nodes
+                        cellIndicesLocal(iCell) = indCell(iBeg+i+1, jBeg+j+1)
+                        if (useRoughSA) then
+                            cellBCCellLocal(1, iCell) = nn
+                            cellBCCellLocal(2, iCell) = mm
+                            cellBCCellLocal(3, iCell) = i
+                            cellBCCellLocal(4, iCell) = j
+                        end if
                     end if
                  end do
               end do
@@ -394,6 +413,10 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
        nodeIndicesGlobal(nNodesGlobal), nodeIndicesCGNSGlobal(nNodesGlobal), &
        cellIndicesGlobal(nCellsGlobal))
 
+  if (useRoughSA) then
+     allocate(cellBCCellGlobal(4, nCellsGlobal))
+  end if
+
   ! Communicate the nodes, connectivity and cluster information to everyone
   call mpi_allgatherv(nodesLocal, 3*nNodesLocal, adflow_real, &
        nodesGlobal, nNodeProc*3, cumNodeProc*3, adflow_real, &
@@ -430,11 +453,23 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
        adflow_comm_world, ierr)
   call EChk(ierr, __FILE__, __LINE__)
 
+  if (useRoughSA) then
+    call mpi_allgatherv(cellBCCellLocal, nCellsLocal*4, adflow_integer, &
+        cellBCCellGlobal, nCellProc*4, cumCellProc, adflow_integer, &
+        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+  end if
+
+
   ! Free the local data we do not need anymore
   deallocate(nodesLocal, connLocal, clusterCellLocal, clusterNodeLocal, &
        nCellProc, cumCellProc, nNodeProc, cumNodeProc, nodeIndicesLocal, &
        nodeIndicesCGNSLocal, cellIndicesLocal)
 
+  if (useRoughSA) then
+     deallocate(cellBCCellLocal)
+  end if
+
   ! We will now build separate trees for each cluster.
   allocate(nodesPerCluster(nClusters), cellsPerCluster(nClusters), &
        cnc(nClusters), ccc(nClusters))
@@ -463,6 +498,9 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
      allocate(walls(i)%x(3, nNodes), walls(i)%conn(4, nCells), &
           walls(i)%ind(nNodes))
      allocate(walls(i)%indCell(nCells))
+     if (useRoughSA) then
+        allocate(walls(i)%BCCell(4, nCells))
+     end if
   end do
 
   ! We now loop through the master list of nodes and elements and
@@ -487,6 +525,10 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
      walls(c)%conn(:, ccc(c)) = connGlobal(:, i)
 
      walls(c)%indCell(ccc(c)) = cellIndicesGlobal(i)
+
+     if (useRoughSA) then
+        walls(c)%BCCell(:, ccc(c)) = cellBCCellGlobal(:, i)
+     end if
   end do
 
   do i=1, nClusters
@@ -564,6 +606,10 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
        clusterNodeGlobal, localNodeNums, nodeIndicesGlobal, &
        nodeIndicesCGNSGlobal)
 
+  if (useRoughSA) then
+     deallocate(cellBCCellGlobal)
+  end if
+
   do nn=1, nDom
      deallocate(flowDoms(nn, level, sps)%globalCGNSNode)
   end do
diff --git a/src/turbulence/sa.F90 b/src/turbulence/sa.F90
index 7e6af09a3..0980c2ce6 100644
--- a/src/turbulence/sa.F90
+++ b/src/turbulence/sa.F90
@@ -107,7 +107,7 @@ subroutine saSource
 
     ! Local variables.
     integer(kind=intType) :: i, j, k, nn, ii
-    real(kind=realType) :: dnew,cr1
+    real(kind=realType) :: dist, ksLocal
     real(kind=realType) :: fv1, fv2, ft2
     real(kind=realType) :: ss, sst, nu, dist2Inv, chi, chi2, chi3
     real(kind=realType) :: rr, gg, gg6, termFw, fwSa, term1, term2
@@ -125,9 +125,6 @@ subroutine saSource
     cw36    = rsaCw3**6
     cb3Inv  = one/rsaCb3
 
-    ! constants for SA rough
-    cr1=0.5
-
     ! Determine the non-dimensional wheel speed of this block.
 
     omegax = timeRef*sections(sectionID)%rotRate(1)
@@ -247,12 +244,20 @@ subroutine saSource
                 ! and nu) and the functions fv1 and fv2. The latter corrects
                 ! the production term near a viscous wall.
 
-                ! SA rough
-                dnew     = d2Wall(i,j,k) + 0.03*kssa
+                ! As the rough Version of SA is supported, this looks slightly different
+                ! than the standard SA implementation
+
+                if (useRoughSA) then
+                   ksLocal = ks(i,j,k)
+                   dist = d2Wall(i,j,k) + 0.03 * ksLocal
+                else
+                   ksLocal = zero
+                   dist = d2Wall(i,j,k)
+                end if
 
                 nu       = rlv(i,j,k)/w(i,j,k,irho)
-                dist2Inv = one/(dnew**2)
-                chi      = w(i,j,k,itu1)/nu + cr1*kssa/dnew
+                dist2Inv = one/(dist**2)
+                chi      = w(i,j,k,itu1)/nu + rsaCr1*ksLocal/dist
                 chi2     = chi*chi
                 chi3     = chi*chi2
                 fv1      = chi3/(chi3+cv13)
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index a0fbb4abb..9d0880a10 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -331,42 +331,42 @@ subroutine bcEddyWall(nn)
     case (iMin)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(1,i,j) = saFact(kssa, d2Wall(2,i,j))*rev(2,i,j)
+             rev(1,i,j) = saRoughFact(2,i,j)*rev(2,i,j)
           enddo
        enddo
 
     case (iMax)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(ie,i,j) = saFact(kssa, d2Wall(il,i,j))*rev(il,i,j)
+             rev(ie,i,j) = saRoughFact(il,i,j)*rev(il,i,j)
           enddo
        enddo
 
     case (jMin)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,1,j) = saFact(kssa, d2Wall(i,2,j))*rev(i,2,j)
+             rev(i,1,j) = saRoughFact(i,2,j)*rev(i,2,j)
           enddo
        enddo
 
     case (jMax)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,je,j) = saFact(kssa, d2Wall(i,jl,j))*rev(i,jl,j)
+             rev(i,je,j) = saRoughFact(i,jl,j)*rev(i,jl,j)
           enddo
        enddo
 
     case (kMin)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,j,1) = saFact(kssa, d2Wall(i,j,2))*rev(i,j,2)
+             rev(i,j,1) = saRoughFact(i,j,2)*rev(i,j,2)
           enddo
        enddo
 
     case (kMax)
        do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
           do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,j,ke) = saFact(kssa, d2Wall(i,j,kl))*rev(i,j,kl)
+             rev(i,j,ke) = saRoughFact(i,j,kl)*rev(i,j,kl)
           enddo
        enddo
     end select
@@ -845,39 +845,39 @@ subroutine bcTurbWall(nn)
        case (iMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(2,i,j))
+                bmti1(i,j,itu1,itu1) = -saRoughFact(2,i,j)
              enddo
           enddo
        case (iMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(il,i,j))
+                bmti2(i,j,itu1,itu1) = -saRoughFact(il,i,j)
              enddo
           enddo
        case (jMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,2,j))
+                bmtj1(i,j,itu1,itu1) = -saRoughFact(i,2,j)
              enddo
           enddo
        case (jMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,jl,j))
+                bmtj2(i,j,itu1,itu1) = -saRoughFact(i,jl,j)
              enddo
           enddo
 
        case (kMin)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk1(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,j,2))
+                bmtk1(i,j,itu1,itu1) = -saRoughFact(i,j,2)
              enddo
           enddo
 
        case (kMax)
           do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
              do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk2(i,j,itu1,itu1) = -saFact(kssa, d2Wall(i,j,kl))
+                bmtk2(i,j,itu1,itu1) = -saRoughFact(i,j,kl)
              enddo
           enddo
        end select
@@ -1392,26 +1392,30 @@ subroutine turbBCNSWall(secondHalo)
     enddo bocos
   end subroutine turbBCNSWall
 
-  function  saFact(ks, d)
+  function  saRoughFact(i,j,k)
+
+    ! returns either the regular SA-boundary condition
+    ! or the modified Roughness-boundary condition
 
    use constants
+   use inputPhysics, only : useRoughSA
+   use BlockPointers, only : ks, d2wall
    implicit none
 
    ! dummy arguments
-   real(kind=realType) :: saFact
+   real(kind=realType) :: saRoughFact
 
    ! local variablse
-   real(kind=realType) :: ks
-   real(kind=realType) :: d
-
-   if (ks .eq. zero) then
-      saFact = -one
-   else if (d .eq. zero) then
-      saFact = one
-   else
-      saFact = (ks - d/0.03) / (ks + d/0.03)
+   integer(kind=intType) :: i, j, k
+
+   if (useRoughSA) then
+      saRoughFact = (ks(i,j,k) - d2wall(i,j,k)/0.03) / &
+           (ks(i,j,k) + d2wall(i,j,k)/0.03)
+      return
    end if
 
-  end function  saFact
+   saRoughFact = one
+
+  end function  saRoughFact
 
 end module turbBCRoutines
diff --git a/src/utils/utils.F90 b/src/utils/utils.F90
index 27ed3a56e..7dc268877 100644
--- a/src/utils/utils.F90
+++ b/src/utils/utils.F90
@@ -3264,6 +3264,7 @@ subroutine setPointers(nn,mm,ll)
     ! from blockPointers so use a bare use.
     use constants
     use blockPointers
+    use inputPhysics, only : useRoughSA
     implicit none
     !
     !      Subroutine arguments
@@ -3501,6 +3502,9 @@ subroutine setPointers(nn,mm,ll)
 
     d2Wall => flowDoms(nn,mm,ll)%d2Wall
     filterDES   => flowDoms(nn,mm,ll)%filterDES  ! eran-des
+    if (useRoughSA) then
+       ks => flowDoms(nn,mm,ll)%ks
+    end if
 
     ! Arrays used for the implicit treatment of the turbulent wall
     ! boundary conditions. As these variables are only allocated for
@@ -4626,7 +4630,7 @@ subroutine deallocDerivativeValues(level)
     use inputtimespectral, only : nTimeIntervalsSpectral
     use wallDistanceData, only : xSurfVec, xSurfVecd
     use flowVarRefState, only : winfd
-    use inputPhysics, only : wallDistanceNeeded
+    use inputPhysics, only : wallDistanceNeeded, useRoughSA
     use adjointVars, only : derivVarsAllocated
     use BCPointers_b
 
@@ -4709,7 +4713,6 @@ subroutine deallocDerivativeValues(level)
                   flowDomsd(nn, level, sps)%BCData(mm)%area, &
                   flowDomsd(nn, level, sps)%BCData(mm)%uSlip, &
                   flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall, &
-!                  flowDomsd(nn, level, sps)%BCData(mm)%ksNS_Wall, &
                   stat=ierr)
              call EChk(ierr,__FILE__,__LINE__)
           enddo
diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index f6852b542..f1d6f2b26 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -120,6 +120,53 @@ subroutine updateWallDistancesQuickly(nn, level, sps)
 
   end subroutine updateWallDistancesQuickly
 
+  subroutine updateWallRoughness()
+
+    ! Sets the roughness-value (ks) of the nearest wall-cell.
+
+    use constants
+    use blockPointers, only : il, jl, kl, flowDoms, ks, BCData, nDom, nBocos
+    use inputTimeSpectral, only :nTimeIntervalsSpectral
+    use utils, only : setPointers
+    use iteration, only : groundLevel
+    implicit none
+
+    ! Local Variables
+    integer(kind=intType) :: i, j, k, iii, jjj, boco, dom
+    integer(kind=intType) :: nn, sps, level, nLevels
+
+    nLevels = ubound(flowDoms,2)
+
+    do level=1, nLevels
+        do sps=1, nTimeIntervalsSpectral
+            do nn=1, nDom
+                call setPointers(nn, level, sps)
+                do k=2,kl
+                    do j=2,jl
+                        do i=2,il
+                            if (flowDoms(nn, level, sps)%nearestBCCell(1, i, j, k) == 0) then
+                                ! This cell is too far away and has no
+                                ! association. Set the roughness to zero.
+                                print *, 'ks cutoff'
+                                ks(i, j, k) = zero
+                                cycle
+                            end if
+
+                            dom = flowDoms(nn, level, sps)%nearestBCCell(1, i, j, k)
+                            boco = flowDoms(nn, level, sps)%nearestBCCell(2, i, j, k)
+                            iii = flowDoms(nn, level, sps)%nearestBCCell(3, i, j, k)
+                            jjj = flowDoms(nn, level, sps)%nearestBCCell(4, i, j, k)
+
+                            ks(i, j, k) = flowDoms(dom, level, sps)%BCData(boco)%ksNS_Wall(iii,jjj)
+                        end do
+                    end do
+                end do
+            end do
+        end do
+    end do
+
+  end subroutine updateWallRoughness
+
   ! ----------------------------------------------------------------------
   !                                                                      |
   !                    No Tapenade Routine below this line               |
@@ -474,6 +521,7 @@ subroutine initWallDistance(level, sps, allocMem)
     !
     use constants
     use blockPointers, only : nDom, flowDoms
+    use inputPhysics, only : useRoughSA
     use utils, only : terminate
     implicit none
     !
@@ -505,6 +553,13 @@ subroutine initWallDistance(level, sps, allocMem)
           if(ierr /= 0)                          &
                call terminate("initWallDistance", &
                "Memory allocation failure for d2Wall")
+          if (useRoughSA) then
+            allocate(flowDoms(nn,level,sps)%ks(2:il,2:jl,2:kl), &
+                stat=ierr)
+            if(ierr /= 0)                          &
+                call terminate("initWallDistance", &
+                "Memory allocation failure for ks")
+          end if
        endif
 
        ! Initialize the wall distances to a large value.
@@ -1782,6 +1837,9 @@ subroutine determineWallAssociation(level, sps)
        if (.not. associated(flowDoms(nn,level,sps)%surfNodeIndices)) then
           allocate(flowDoms(nn,level,sps)%surfNodeIndices(4, 2:il, 2:jl, 2:kl))
           allocate(flowDoms(nn,level,sps)%uv(2, 2:il, 2:jl, 2:kl))
+          if (useRoughSA) then
+             allocate(flowDoms(nn,level,sps)%nearestBCCell(4, 2:il, 2:jl, 2:kl))
+          end if
        end if
 
        ! Set the cluster for this block
@@ -1824,10 +1882,16 @@ subroutine determineWallAssociation(level, sps)
                               walls(c)%ind(walls(c)%conn(kk, cellID))
                       end do
                       flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
+                      if (useRoughSA) then
+                         flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = walls(c)%BCCell(:, cellID)
+                      end if
                    else
                       ! Just set dummy values. These will never be used.
                       flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
                       flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
+                      if (useRoughSA) then
+                          flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = 0
+                      end if
                    end if
 
                    ! We are done with this point.
@@ -1857,7 +1921,9 @@ subroutine determineWallAssociation(level, sps)
                               fullWall%ind(fullWall%conn(kk, cellID))
                       end do
                       flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
-
+                      if (useRoughSA) then
+                         flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = walls(c)%BCCell(:, cellID)
+                      end if
                    else
 
                       ! This point is *closer* than the nearWallDist AND
@@ -1876,6 +1942,9 @@ subroutine determineWallAssociation(level, sps)
                                  walls(c)%ind(walls(c)%conn(kk, cellID2))
                          end do
                          flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw2(1:2)
+                         if (useRoughSA) then
+                            flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = walls(c)%BCCell(:, cellID2)
+                         end if
                       else
                          ! The full wall distance is better. Take that.
 
@@ -1884,7 +1953,9 @@ subroutine determineWallAssociation(level, sps)
                                  fullWall%ind(fullWall%conn(kk, cellID))
                          end do
                          flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
-
+                         if (useRoughSA) then
+                            flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = walls(c)%BCCell(:, cellID)
+                         end if
                       end if
                    end if
                 else
@@ -1895,6 +1966,9 @@ subroutine determineWallAssociation(level, sps)
 
                    flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
                    flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
+                   if (useRoughSA) then
+                       flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = 0
+                   end if
 
                 end if
              end do
@@ -1997,6 +2071,9 @@ subroutine determineWallAssociation(level, sps)
     if (oversetPresent) then
        deallocate(fullWall%x, fullWall%conn, fullWall%ind)
        call destroySerialQuad(fullWall%ADT)
+       if (useRoughSA) then
+          deallocate(walls(c)%BCCell)
+       end if
     end if
 
   end subroutine determineWallAssociation

From de9fd146117778021e2f0c420833160b56359404 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Mon, 7 Nov 2022 09:24:50 +0100
Subject: [PATCH 22/60] multi-proc now possible (+tapenade)

---
 src/adjoint/outputForward/walldistance_d.f90  |  44 ----
 src/adjoint/outputReverse/walldistance_b.f90  |  44 ----
 .../outputReverseFast/walldistance_fast_b.f90 |  44 ----
 src/modules/block.F90                         |   4 +-
 src/modules/overset.F90                       |   4 -
 src/modules/wallDistanceData.F90              |   5 +
 src/overset/buildClusterWalls.F90             |  56 +----
 src/preprocessing/preprocessingAPI.F90        |  15 +-
 src/utils/utils.F90                           |   1 -
 src/wallDistance/wallDistance.F90             | 212 ++++++++++++++----
 10 files changed, 197 insertions(+), 232 deletions(-)

diff --git a/src/adjoint/outputForward/walldistance_d.f90 b/src/adjoint/outputForward/walldistance_d.f90
index 9ba2bdce0..8b9323668 100644
--- a/src/adjoint/outputForward/walldistance_d.f90
+++ b/src/adjoint/outputForward/walldistance_d.f90
@@ -159,50 +159,6 @@ subroutine updatewalldistancesquickly(nn, level, sps)
     end do
   end subroutine updatewalldistancesquickly
   subroutine updatewallroughness()
-! sets the roughness-value (ks) of the nearest wall-cell.
-    use constants
-    use blockpointers, only : il, jl, kl, flowdoms, ks, bcdata, ndom, &
-&   nbocos
-    use inputtimespectral, only : ntimeintervalsspectral
-    ! use utils_d, only : setpointers TODO: How is this supposed to be handled?
-    use iteration, only : groundlevel
     implicit none
-! local variables
-    integer(kind=inttype) :: i, j, k, iii, jjj, boco, dom
-    integer(kind=inttype) :: nn, sps, level, nlevels
-    intrinsic ubound
-    external setpointers
-    nlevels = ubound(flowdoms, 2)
-    do level=1,nlevels
-      do sps=1,ntimeintervalsspectral
-        do nn=1,ndom
-          ! call setpointers(nn, level, sps)
-          do k=2,kl
-            do j=2,jl
-              do i=2,il
-                if (flowdoms(nn, level, sps)%nearestbccell(1, i, j, k) &
-&                   .eq. 0) then
-! this cell is too far away and has no
-! association. set the roughness to zero.
-                  print*, 'ks cutoff'
-                  ks(i, j, k) = zero
-                else
-                  dom = flowdoms(nn, level, sps)%nearestbccell(1, i, j, &
-&                   k)
-                  boco = flowdoms(nn, level, sps)%nearestbccell(2, i, j&
-&                   , k)
-                  iii = flowdoms(nn, level, sps)%nearestbccell(3, i, j, &
-&                   k)
-                  jjj = flowdoms(nn, level, sps)%nearestbccell(4, i, j, &
-&                   k)
-                  ks(i, j, k) = flowdoms(dom, level, sps)%bcdata(boco)%&
-&                   ksns_wall(iii, jjj)
-                end if
-              end do
-            end do
-          end do
-        end do
-      end do
-    end do
   end subroutine updatewallroughness
 end module walldistance_d
diff --git a/src/adjoint/outputReverse/walldistance_b.f90 b/src/adjoint/outputReverse/walldistance_b.f90
index bfae518e5..ebc81b847 100644
--- a/src/adjoint/outputReverse/walldistance_b.f90
+++ b/src/adjoint/outputReverse/walldistance_b.f90
@@ -190,50 +190,6 @@ subroutine updatewalldistancesquickly(nn, level, sps)
     end do
   end subroutine updatewalldistancesquickly
   subroutine updatewallroughness()
-! sets the roughness-value (ks) of the nearest wall-cell.
-    use constants
-    use blockpointers, only : il, jl, kl, flowdoms, ks, bcdata, ndom, &
-&   nbocos
-    use inputtimespectral, only : ntimeintervalsspectral
-    ! use utils_b, only : setpointers
-    use iteration, only : groundlevel
     implicit none
-! local variables
-    integer(kind=inttype) :: i, j, k, iii, jjj, boco, dom
-    integer(kind=inttype) :: nn, sps, level, nlevels
-    intrinsic ubound
-    external setpointers
-    nlevels = ubound(flowdoms, 2)
-    do level=1,nlevels
-      do sps=1,ntimeintervalsspectral
-        do nn=1,ndom
-          ! call setpointers(nn, level, sps)
-          do k=2,kl
-            do j=2,jl
-              do i=2,il
-                if (flowdoms(nn, level, sps)%nearestbccell(1, i, j, k) &
-&                   .eq. 0) then
-! this cell is too far away and has no
-! association. set the roughness to zero.
-                  print*, 'ks cutoff'
-                  ks(i, j, k) = zero
-                else
-                  dom = flowdoms(nn, level, sps)%nearestbccell(1, i, j, &
-&                   k)
-                  boco = flowdoms(nn, level, sps)%nearestbccell(2, i, j&
-&                   , k)
-                  iii = flowdoms(nn, level, sps)%nearestbccell(3, i, j, &
-&                   k)
-                  jjj = flowdoms(nn, level, sps)%nearestbccell(4, i, j, &
-&                   k)
-                  ks(i, j, k) = flowdoms(dom, level, sps)%bcdata(boco)%&
-&                   ksns_wall(iii, jjj)
-                end if
-              end do
-            end do
-          end do
-        end do
-      end do
-    end do
   end subroutine updatewallroughness
 end module walldistance_b
diff --git a/src/adjoint/outputReverseFast/walldistance_fast_b.f90 b/src/adjoint/outputReverseFast/walldistance_fast_b.f90
index fecfbf826..dc7ff4896 100644
--- a/src/adjoint/outputReverseFast/walldistance_fast_b.f90
+++ b/src/adjoint/outputReverseFast/walldistance_fast_b.f90
@@ -69,50 +69,6 @@ subroutine updatewalldistancesquickly(nn, level, sps)
     end do
   end subroutine updatewalldistancesquickly
   subroutine updatewallroughness()
-! sets the roughness-value (ks) of the nearest wall-cell.
-    use constants
-    use blockpointers, only : il, jl, kl, flowdoms, ks, bcdata, ndom, &
-&   nbocos
-    use inputtimespectral, only : ntimeintervalsspectral
-    ! use utils_fast_b, only : setpointers
-    use iteration, only : groundlevel
     implicit none
-! local variables
-    integer(kind=inttype) :: i, j, k, iii, jjj, boco, dom
-    integer(kind=inttype) :: nn, sps, level, nlevels
-    intrinsic ubound
-    external setpointers
-    nlevels = ubound(flowdoms, 2)
-    do level=1,nlevels
-      do sps=1,ntimeintervalsspectral
-        do nn=1,ndom
-          ! call setpointers(nn, level, sps)
-          do k=2,kl
-            do j=2,jl
-              do i=2,il
-                if (flowdoms(nn, level, sps)%nearestbccell(1, i, j, k) &
-&                   .eq. 0) then
-! this cell is too far away and has no
-! association. set the roughness to zero.
-                  print*, 'ks cutoff'
-                  ks(i, j, k) = zero
-                else
-                  dom = flowdoms(nn, level, sps)%nearestbccell(1, i, j, &
-&                   k)
-                  boco = flowdoms(nn, level, sps)%nearestbccell(2, i, j&
-&                   , k)
-                  iii = flowdoms(nn, level, sps)%nearestbccell(3, i, j, &
-&                   k)
-                  jjj = flowdoms(nn, level, sps)%nearestbccell(4, i, j, &
-&                   k)
-                  ks(i, j, k) = flowdoms(dom, level, sps)%bcdata(boco)%&
-&                   ksns_wall(iii, jjj)
-                end if
-              end do
-            end do
-          end do
-        end do
-      end do
-    end do
   end subroutine updatewallroughness
 end module walldistance_fast_b
diff --git a/src/modules/block.F90 b/src/modules/block.F90
index 3e27e2d33..0184afa3b 100644
--- a/src/modules/block.F90
+++ b/src/modules/block.F90
@@ -674,12 +674,12 @@ module block
      ! intermittency( )       - Function defining the transition location
      !
      ! The next two variables are only initialized if roughness is requested (useRoughSA = True)
-     ! nearestBCCell(2:il,2:jl,2:kl,4)   - dom,boco,i,j indices for the nearest wall cell; is needed for rougness
+     ! nearestWallCellInd(2:il,2:jl,2:kl)   - global cell ID for the nearest wall cell; is needed for rougness
      ! ks(2:il,2:jl,2:kl)     - Roughness value of the nearest wall
 
      real(kind=realType), dimension(:,:,:), pointer :: d2Wall, filterDES
      real(kind=realType), dimension(:,:,:), pointer :: intermittency
-     integer(kind=intType), dimension(:,:,:,:), pointer :: nearestBCCell
+     integer(kind=intType), dimension(:,:,:), pointer :: nearestWallCellInd
      real(kind=realType), dimension(:,:,:), pointer :: ks
 
      ! bmti1(je,ke,nt1:nt2,nt1:nt2): Matrix used for the implicit
diff --git a/src/modules/overset.F90 b/src/modules/overset.F90
index 26de38f3e..a2ff1de8b 100644
--- a/src/modules/overset.F90
+++ b/src/modules/overset.F90
@@ -172,10 +172,6 @@ module oversetData
      ! indCell: Global cell index for wall cells
      integer(kind=intType), dimension(:), pointer :: indCell
 
-     ! BCCell: dom,boco,i,j values that define this cell in the CGNS-mesh
-     ! I it is only used for the rough SA variant (when 'useRoughSA' = True)
-     integer(kind=intType), dimension(:, :), pointer :: BCCell
-
      ! Blanking values for Nodes
      integer(kind=intType), dimension(:), allocatable :: iBlank
      integer(kind=intType), dimension(:), allocatable :: cellPtr
diff --git a/src/modules/wallDistanceData.F90 b/src/modules/wallDistanceData.F90
index 7796abf43..1bf7c8048 100644
--- a/src/modules/wallDistanceData.F90
+++ b/src/modules/wallDistanceData.F90
@@ -44,4 +44,9 @@ module wallDistanceData
   real(kind=realType), dimension(:), pointer :: xVolume
 #endif
 
+
+
+  ! sa rough
+  integer(kind=intType), dimension(:,:), allocatable :: nCellBlockOffset
+
 end module wallDistanceData
diff --git a/src/overset/buildClusterWalls.F90 b/src/overset/buildClusterWalls.F90
index cc74a15ee..c249aa4e4 100644
--- a/src/overset/buildClusterWalls.F90
+++ b/src/overset/buildClusterWalls.F90
@@ -57,9 +57,6 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
   integer(kind=intType), dimension(:), allocatable :: cellIndicesLocal
   integer(kind=intType), dimension(:), allocatable :: cgnsIndices, curCGNSNode
 
-  integer(kind=intType), dimension(:,:), allocatable :: cellBCCellLocal
-  integer(kind=intType), dimension(:,:), allocatable :: cellBCCellGlobal
-
   integer(kind=intType), dimension(:),    allocatable :: nCellProc, cumCellProc
   integer(kind=intType), dimension(:),    allocatable :: nNodeProc, cumNodeProc
   real(kind=realType),   dimension(:, :), allocatable :: uniqueNodes
@@ -167,10 +164,6 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
        nodeIndicesLocal(nNodesLocal), nodeIndicesCGNSLocal(nNodesLocal), &
        cellIndicesLocal(nCellsLocal))
 
-  if (useRoughSA) then
-     allocate(cellBCCellLocal(4, nCellsLocal))
-  end if
-
   iCell = 0
   iNode = 0
   ! Second loop over the local walls
@@ -342,16 +335,10 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
 
                     ! Save the global cell index
                     if (useDual) then
-                        cellIndicesLocal(iCell) = 0
+                       cellIndicesLocal(iCell) = 0
                     else
-                        ! Valid only when using primary nodes
-                        cellIndicesLocal(iCell) = indCell(iBeg+i+1, jBeg+j+1)
-                        if (useRoughSA) then
-                            cellBCCellLocal(1, iCell) = nn
-                            cellBCCellLocal(2, iCell) = mm
-                            cellBCCellLocal(3, iCell) = i
-                            cellBCCellLocal(4, iCell) = j
-                        end if
+                       ! Valid only when using primary nodes
+                       cellIndicesLocal(iCell) = indCell(iBeg+i+1, jBeg+j+1)
                     end if
                  end do
               end do
@@ -372,14 +359,8 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
                     if (useDual) then
                        cellIndicesLocal(iCell) = 0
                     else
-                        ! Valid only when using primary nodes
-                        cellIndicesLocal(iCell) = indCell(iBeg+i+1, jBeg+j+1)
-                        if (useRoughSA) then
-                            cellBCCellLocal(1, iCell) = nn
-                            cellBCCellLocal(2, iCell) = mm
-                            cellBCCellLocal(3, iCell) = i
-                            cellBCCellLocal(4, iCell) = j
-                        end if
+                       ! Valid only when using primary nodes
+                       cellIndicesLocal(iCell) = indCell(iBeg+i+1, jBeg+j+1)
                     end if
                  end do
               end do
@@ -413,10 +394,6 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
        nodeIndicesGlobal(nNodesGlobal), nodeIndicesCGNSGlobal(nNodesGlobal), &
        cellIndicesGlobal(nCellsGlobal))
 
-  if (useRoughSA) then
-     allocate(cellBCCellGlobal(4, nCellsGlobal))
-  end if
-
   ! Communicate the nodes, connectivity and cluster information to everyone
   call mpi_allgatherv(nodesLocal, 3*nNodesLocal, adflow_real, &
        nodesGlobal, nNodeProc*3, cumNodeProc*3, adflow_real, &
@@ -453,23 +430,11 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
        adflow_comm_world, ierr)
   call EChk(ierr, __FILE__, __LINE__)
 
-  if (useRoughSA) then
-    call mpi_allgatherv(cellBCCellLocal, nCellsLocal*4, adflow_integer, &
-        cellBCCellGlobal, nCellProc*4, cumCellProc, adflow_integer, &
-        adflow_comm_world, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-  end if
-
-
   ! Free the local data we do not need anymore
   deallocate(nodesLocal, connLocal, clusterCellLocal, clusterNodeLocal, &
        nCellProc, cumCellProc, nNodeProc, cumNodeProc, nodeIndicesLocal, &
        nodeIndicesCGNSLocal, cellIndicesLocal)
 
-  if (useRoughSA) then
-     deallocate(cellBCCellLocal)
-  end if
-
   ! We will now build separate trees for each cluster.
   allocate(nodesPerCluster(nClusters), cellsPerCluster(nClusters), &
        cnc(nClusters), ccc(nClusters))
@@ -498,9 +463,6 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
      allocate(walls(i)%x(3, nNodes), walls(i)%conn(4, nCells), &
           walls(i)%ind(nNodes))
      allocate(walls(i)%indCell(nCells))
-     if (useRoughSA) then
-        allocate(walls(i)%BCCell(4, nCells))
-     end if
   end do
 
   ! We now loop through the master list of nodes and elements and
@@ -525,10 +487,6 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
      walls(c)%conn(:, ccc(c)) = connGlobal(:, i)
 
      walls(c)%indCell(ccc(c)) = cellIndicesGlobal(i)
-
-     if (useRoughSA) then
-        walls(c)%BCCell(:, ccc(c)) = cellBCCellGlobal(:, i)
-     end if
   end do
 
   do i=1, nClusters
@@ -606,10 +564,6 @@ subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
        clusterNodeGlobal, localNodeNums, nodeIndicesGlobal, &
        nodeIndicesCGNSGlobal)
 
-  if (useRoughSA) then
-     deallocate(cellBCCellGlobal)
-  end if
-
   do nn=1, nDom
      deallocate(flowDoms(nn, level, sps)%globalCGNSNode)
   end do
diff --git a/src/preprocessing/preprocessingAPI.F90 b/src/preprocessing/preprocessingAPI.F90
index 648f28851..39f75bc42 100644
--- a/src/preprocessing/preprocessingAPI.F90
+++ b/src/preprocessing/preprocessingAPI.F90
@@ -32,6 +32,7 @@ subroutine preprocessing
     use coarseUtils, only : createCoarseBlocks
     use pointMatchedCommPattern, only : determineCommPattern
     use oversetAPI, only : oversetComm, determineClusters, determineViscousDirs
+    use wallDistanceData, only : nCellBlockOffset
     implicit none
     !
     !      Local variables.
@@ -209,6 +210,11 @@ subroutine preprocessing
     ! See the corresponding subroutine header, although the
     ! names are pretty self-explaining
 
+    ! Allocate Block-offset. SA rough might need it. It is filled in
+    ! 'setGlobalCellsAndNodes'. This function is called per level and thus it
+    ! must be allocated before the call
+
+    allocate(nCellBlockOffset(nLevels, nDom))
 
     do level=1,nLevels
        call xhalo(level)
@@ -1931,6 +1937,7 @@ subroutine setGlobalCellsAndNodes(level)
     use inputTimeSpectral
     use utils, only: setPointers, terminate
     use haloExchange, only : whalo1to1intgeneric
+    use wallDistanceData, only : nCellBlockOffset
     implicit none
 
     ! Input variables
@@ -1941,7 +1948,7 @@ subroutine setGlobalCellsAndNodes(level)
     integer(kind=intType) :: ierr, istart
     logical :: commPressure, commLamVis, commEddyVis, commGamma
     integer(kind=intType), dimension(nProc) :: nNodes, nCells, nCellOffset, nNodeOffset
-    integer(kind=intType), dimension(nDom) :: nCellBLockOffset,nNodeBLockOffset
+    integer(kind=intType), dimension(nDom) :: nNodeBLockOffset
     integer(kind=intType) :: npts, nCell, nNode
     integer(kind=intType), dimension(:), allocatable :: nNodesProc, cumNodesProc
     integer(kind=intTYpe), dimension(:), allocatable :: nCellsProc, cumCellsProc
@@ -2008,10 +2015,10 @@ subroutine setGlobalCellsAndNodes(level)
          adflow_integer, 0, ADflow_comm_world, ierr)
 
     ! Determine the global cell number offset for each local block.
-    nCellBlockOffset(1) = nCellOffsetLocal(level)
+    nCellBlockOffset(level, 1) = nCellOffsetLocal(level)
     do nn=2,nDom
        call setPointers(nn-1, level, 1)
-       nCellBlockOffset(nn) = nCellBlockOffset(nn-1)          &
+       nCellBlockOffset(level, nn) = nCellBlockOffset(level, nn-1)          &
             + nx*ny*nz
     enddo
 
@@ -2039,7 +2046,7 @@ subroutine setGlobalCellsAndNodes(level)
                    ! instances of a give block adjacent to each other in
                    ! the matrix
                    globalCell(i, j, k) = &
-                        nCellBLockOffset(nn)*nTimeIntervalsSpectral+nx*ny*nz*(sps-1)+&
+                        nCellBLockOffset(level,nn)*nTimeIntervalsSpectral+nx*ny*nz*(sps-1)+&
                         (i-2) +(j-2)*nx +(k-2)*nx*ny
                 enddo
              enddo
diff --git a/src/utils/utils.F90 b/src/utils/utils.F90
index 7dc268877..a940694b2 100644
--- a/src/utils/utils.F90
+++ b/src/utils/utils.F90
@@ -4146,7 +4146,6 @@ subroutine writeIntroMessage
          &Euler equations"
     print "(a)", "# on multiblock structured hexahedral grids."
 
-    print "(a)", "# SA_rough dev"
 
     write(integerString,"(i7)") nProc
     integerString = adjustl(integerString)
diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index f1d6f2b26..fa4b4a6f4 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -122,49 +122,188 @@ end subroutine updateWallDistancesQuickly
 
   subroutine updateWallRoughness()
 
+#ifndef USE_TAPENADE
+
     ! Sets the roughness-value (ks) of the nearest wall-cell.
 
     use constants
-    use blockPointers, only : il, jl, kl, flowDoms, ks, BCData, nDom, nBocos
+    ! use blockPointers, only : il, jl, kl, flowDoms, ks, BCData, nDom, nBocos, nx, ny, nz
+    use blockPointers
     use inputTimeSpectral, only :nTimeIntervalsSpectral
-    use utils, only : setPointers
+    use utils, only : setPointers, EChk
     use iteration, only : groundLevel
+    use surfaceFamilies, only : BCFamGroups
+    use communication, only : adflow_comm_world, nProc, myID
+    use sorting, only : famInList
+    use wallDistanceData, only : nCellBlockOffset
     implicit none
 
     ! Local Variables
-    integer(kind=intType) :: i, j, k, iii, jjj, boco, dom
-    integer(kind=intType) :: nn, sps, level, nLevels
+    integer(kind=intType) :: i, j, k, ii, jj, ierr, iCell
+    integer(kind=intType) :: iBeg, jBeg, iEnd, jEnd, ni, nj
+    integer(kind=intType) :: nn, sps, level, nLevels, mm
+    integer(kind=intType) :: nCellsLocal, nCellsGlobal
+    integer(kind=intType), dimension(:), allocatable :: nCellProc, cumCellProc
+    integer(kind=intType), dimension(:), pointer :: wallFamList
+    integer(kind=intType), dimension(:), allocatable :: cellIdLocal, cellIdGlobal
+    real(kind=realType), dimension(:), allocatable :: ksLocal, ksGlobal
+
 
+    wallFamList => BCFamGroups(iBCGroupWalls)%famList
     nLevels = ubound(flowDoms,2)
 
     do level=1, nLevels
-        do sps=1, nTimeIntervalsSpectral
-            do nn=1, nDom
-                call setPointers(nn, level, sps)
-                do k=2,kl
-                    do j=2,jl
-                        do i=2,il
-                            if (flowDoms(nn, level, sps)%nearestBCCell(1, i, j, k) == 0) then
-                                ! This cell is too far away and has no
-                                ! association. Set the roughness to zero.
-                                print *, 'ks cutoff'
-                                ks(i, j, k) = zero
-                                cycle
-                            end if
-
-                            dom = flowDoms(nn, level, sps)%nearestBCCell(1, i, j, k)
-                            boco = flowDoms(nn, level, sps)%nearestBCCell(2, i, j, k)
-                            iii = flowDoms(nn, level, sps)%nearestBCCell(3, i, j, k)
-                            jjj = flowDoms(nn, level, sps)%nearestBCCell(4, i, j, k)
-
-                            ks(i, j, k) = flowDoms(dom, level, sps)%BCData(boco)%ksNS_Wall(iii,jjj)
-                        end do
-                    end do
-                end do
+       do sps=1, nTimeIntervalsSpectral
+
+          ! figure out the local space needed
+          nCellsLocal = 0
+          do nn=1,nDom
+            call setPointers(nn, level, sps)
+
+            do mm=1, nBocos
+                if (.not. famInList(BCdata(mm)%famID, wallFamlist)) then
+                   cycle
+                end if
+                nCellsLocal = nCellsLocal + &
+                (bcData(mm)%inEnd - bcData(mm)%inBeg)*(bcData(mm)%jnEnd - bcData(mm)%jnBeg)
             end do
-        end do
+          end do
+
+          ! Now communicate these sizes with everyone
+          allocate(nCellProc(nProc), cumCellProc(0:nProc))
+
+          call mpi_allgather(nCellsLocal, 1, adflow_integer, nCellProc, 1, adflow_integer, &
+               adflow_comm_world, ierr)
+          call EChk(ierr, __FILE__, __LINE__)
+
+          ! Now make cumulative versions of these
+          cumCellProc(0) = 0
+          do i=1,nProc
+             cumCellProc(i) = cumCellProc(i-1) + nCellProc(i)
+          end do
+
+          ! And save the total number of nodes and cells for reference
+          nCellsGlobal = cumCellProc(nProc)
+
+          ! Allocate the space for the local ks values and cellId's
+          allocate(ksLocal(nCellsLocal), cellIdLocal(nCellsLocal))
+
+
+          ! Move all the local ks-values in a list
+          ! Create a seccond list with the global cell ID corresponding to the ks-values
+          iCell = 0
+          do nn=1, nDom
+             call setPointers(nn, level, sps)
+             do mm=1, nBocos
+                if (.not. famInList(BCdata(mm)%famID, wallFamlist)) then
+                   cycle
+                end if
+
+                jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
+                iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
+                ni = iEnd - iBeg
+                nj = jEnd - jBeg
+
+                do jj=1, nj
+                   do ii=1, ni
+                      iCell = iCell + 1
+
+                      ! saving local ks-value is easy
+                      ksLocal(iCell) = BCData(mm)%ksNS_Wall(ii, jj)
+
+                      ! to calculate the global cellID, we must associate the
+                      ! BC-cell to the volume cell first We basically have to
+                      ! set surface i-j values to global i,j,k values. The +1 is
+                      ! needed because there are no halo-cells on the BC
+
+                      ! TODO: Do I need to care about the rightHanded-stuff?
+                      select case (BCFaceID(mm))
+                      case (iMin)
+                         i = 2
+                         j = ii + 1
+                         k = jj + 1
+                      case (iMax)
+                         i = il
+                         j = ii + 1
+                         k = jj + 1
+                      case (jMin)
+                         i = ii + 1
+                         j = 2
+                         k = jj + 1
+                      case (jMax)
+                         i = ii + 1
+                         j = jl
+                         k = jj + 1
+                      case (kMin)
+                         i = jj + 1
+                         j = ii + 1
+                         k = 2
+                      case (kMax)
+                         i = ii + 1
+                         j = jj + 1
+                         k = kl
+                      end select
+
+                      cellIdLocal(iCell) = nCellBLockOffset(level,nn)*nTimeIntervalsSpectral+nx*ny*nz*(sps-1)+&
+                           (i-2) +(j-2)*nx +(k-2)*nx*ny
+                   end do
+                end do
+             end do
+          end do
+
+
+          ! allocate global arrays
+          allocate(ksGlobal(nCellsGlobal), cellIdGlobal(nCellsGlobal))
+
+          ! gather all the surface-ks values on each proc
+          call mpi_allgatherv(ksLocal, nCellsLocal, adflow_real, &
+               ksGlobal, nCellProc, cumCellProc, adflow_real, &
+               adflow_comm_world, ierr)
+          call EChk(ierr, __FILE__, __LINE__)
+
+          ! gather all the cellId's on each proc
+          call mpi_allgatherv(cellIdLocal, nCellsLocal, adflow_integer, &
+               cellIdGlobal, nCellProc, cumCellProc, adflow_integer, &
+               adflow_comm_world, ierr)
+          call EChk(ierr, __FILE__, __LINE__)
+
+          ! free local memmory
+          deallocate(cumCellProc, nCellProc, ksLocal, cellIdLocal)
+
+          ! set the ks-values in the volume
+          do nn=1, nDom
+             call setPointers(nn, level, sps)
+             do k=2,kl
+                do j=2,jl
+                   do i=2,il
+                      if (flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) == 0) then
+                         ! This cell is too far away and has no
+                         ! association. Set the roughness to zero.
+                         ks(i, j, k) = zero
+                         cycle
+                      end if
+
+                      ! find the index of the surface cell (Requires gfortran > 9.0 )
+                      iCell = findloc(cellIdGlobal, flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k), DIM=1)
+
+                      ! findloc did not find the requested cell, set a value of 0
+                      if (iCell == 0) then
+                         ks(i, j, k) = zero
+                         cycle
+                      end if
+
+                      flowDoms(nn, level, sps)%ks(i, j, k) = ksGlobal(iCell)
+                   end do
+                end do
+             end do
+          end do
+
+          ! free global memory
+          deallocate(ksGlobal, cellIdGlobal)
+       end do
     end do
 
+#endif
   end subroutine updateWallRoughness
 
   ! ----------------------------------------------------------------------
@@ -1838,7 +1977,7 @@ subroutine determineWallAssociation(level, sps)
           allocate(flowDoms(nn,level,sps)%surfNodeIndices(4, 2:il, 2:jl, 2:kl))
           allocate(flowDoms(nn,level,sps)%uv(2, 2:il, 2:jl, 2:kl))
           if (useRoughSA) then
-             allocate(flowDoms(nn,level,sps)%nearestBCCell(4, 2:il, 2:jl, 2:kl))
+             allocate(flowDoms(nn,level,sps)%nearestWallCellInd(2:il, 2:jl, 2:kl))
           end if
        end if
 
@@ -1883,14 +2022,14 @@ subroutine determineWallAssociation(level, sps)
                       end do
                       flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
                       if (useRoughSA) then
-                         flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = walls(c)%BCCell(:, cellID)
+                         flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = walls(c)%indCell(cellID)
                       end if
                    else
                       ! Just set dummy values. These will never be used.
                       flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
                       flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
                       if (useRoughSA) then
-                          flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = 0
+                          flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = 0
                       end if
                    end if
 
@@ -1922,7 +2061,7 @@ subroutine determineWallAssociation(level, sps)
                       end do
                       flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
                       if (useRoughSA) then
-                         flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = walls(c)%BCCell(:, cellID)
+                         flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = walls(c)%indCell(cellID)
                       end if
                    else
 
@@ -1943,7 +2082,7 @@ subroutine determineWallAssociation(level, sps)
                          end do
                          flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw2(1:2)
                          if (useRoughSA) then
-                            flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = walls(c)%BCCell(:, cellID2)
+                            flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = walls(c)%indCell(cellID)
                          end if
                       else
                          ! The full wall distance is better. Take that.
@@ -1954,7 +2093,7 @@ subroutine determineWallAssociation(level, sps)
                          end do
                          flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
                          if (useRoughSA) then
-                            flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = walls(c)%BCCell(:, cellID)
+                            flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = walls(c)%indCell(cellID)
                          end if
                       end if
                    end if
@@ -1967,7 +2106,7 @@ subroutine determineWallAssociation(level, sps)
                    flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
                    flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
                    if (useRoughSA) then
-                       flowDoms(nn, level, sps)%nearestBCCell(:, i, j, k) = 0
+                       flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = 0
                    end if
 
                 end if
@@ -2071,9 +2210,6 @@ subroutine determineWallAssociation(level, sps)
     if (oversetPresent) then
        deallocate(fullWall%x, fullWall%conn, fullWall%ind)
        call destroySerialQuad(fullWall%ADT)
-       if (useRoughSA) then
-          deallocate(walls(c)%BCCell)
-       end if
     end if
 
   end subroutine determineWallAssociation

From 351e21af2acc0d427bd0c809008700b914d13f22 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Mon, 7 Nov 2022 10:33:35 +0100
Subject: [PATCH 23/60] remove unneeded check

---
 src/wallDistance/wallDistance.F90 | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index fa4b4a6f4..44f7b2543 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -286,12 +286,7 @@ subroutine updateWallRoughness()
                       ! find the index of the surface cell (Requires gfortran > 9.0 )
                       iCell = findloc(cellIdGlobal, flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k), DIM=1)
 
-                      ! findloc did not find the requested cell, set a value of 0
-                      if (iCell == 0) then
-                         ks(i, j, k) = zero
-                         cycle
-                      end if
-
+                      ! set the ks value
                       flowDoms(nn, level, sps)%ks(i, j, k) = ksGlobal(iCell)
                    end do
                 end do

From dfa996ce2ced2fbf45f2c27bc99dd7e9401ea040 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Mon, 7 Nov 2022 19:57:27 +0100
Subject: [PATCH 24/60] Write volume ks-values for debugging

---
 src/inputParam/inputParamRoutines.F90 | 12 ++++++++++++
 src/modules/extraOutput.f90           |  1 +
 src/output/outputMod.F90              | 20 ++++++++++++++++++++
 3 files changed, 33 insertions(+)

diff --git a/src/inputParam/inputParamRoutines.F90 b/src/inputParam/inputParamRoutines.F90
index fafb9791b..5c0b75860 100644
--- a/src/inputParam/inputParamRoutines.F90
+++ b/src/inputParam/inputParamRoutines.F90
@@ -2513,6 +2513,7 @@ subroutine volumeVariables(variables)
     use constants
     use extraOutput
     use utils, only : convertToLowerCase, terminate
+    use inputPhysics, only : useRoughSA
     implicit none
     !
     !      Subroutine arguments.
@@ -2564,6 +2565,8 @@ subroutine volumeVariables(variables)
     volWriteStatus = .false.
     volWriteIntermittency = .false.
 
+    volWriteKs = .false.
+
 
     ! Initialize nVarSpecified to 0. This serves as a test
     ! later on.
@@ -2715,6 +2718,15 @@ subroutine volumeVariables(variables)
           volWriteIntermittency = .true.
           nVarSpecified = nVarSpecified + 1
 
+       case("ks")
+          if (.not. useRoughSA) then
+             call terminate("volumeVariables", "Can not write Surface-Roughness &
+             &values ('ks' in 'volumeVariables') when the rough SA variant is not &
+             &used (useRoughSA = False)")
+          end if
+          volWriteKs = .true.
+          nVarSpecified = nVarSpecified + 1
+
        case default
           pos = len_trim(keyword)
           write(errorMessage,"(3a)" ) "Unknown extra volume output &
diff --git a/src/modules/extraOutput.f90 b/src/modules/extraOutput.f90
index 3c5601eec..20d3d99d9 100644
--- a/src/modules/extraOutput.f90
+++ b/src/modules/extraOutput.f90
@@ -36,6 +36,7 @@ module extraOutput
        logical :: volWriteResRhoE,      volWriteResTurb,  volWriteBlank
        logical :: volWriteShock,        volWriteFilteredShock, volWriteGC, volWriteStatus
        logical :: volWriteIntermittency
+       logical :: volWriteKs
 !
 !       The logical variables, which define the isosurface variables
 !       to be written.
diff --git a/src/output/outputMod.F90 b/src/output/outputMod.F90
index 717977085..c2d4980be 100644
--- a/src/output/outputMod.F90
+++ b/src/output/outputMod.F90
@@ -232,6 +232,8 @@ subroutine numberOfVolSolVariables(nVolSolvar, nVolDiscrVar)
 
     if( volWriteBlank ) nVolDiscrVar  = nVolDiscrVar + 1
 
+    if( volwriteKs )    nVolDiscrVar  = nVolDiscrVar + 1
+
 
   end subroutine numberOfVolSolVariables
 
@@ -568,6 +570,11 @@ subroutine volSolNames(solNames)
        solNames(nn) = cgnsIntermittency
     endif
 
+    if( volWriteKs) then
+       nn = nn + 1
+       solNames(nn) = cgnsSandGrainRoughness
+    endif
+
   end subroutine volSolNames
 
   subroutine surfSolNames(solNames)
@@ -1348,6 +1355,19 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
           end do
        end do
 
+    case  (cgnsSandGrainRoughness)
+       ! It is only possible to write this when it was allocated in the first place
+       ! (useRoughSA = True) but this has been check in 'inputParamRoutines'
+       ! allready
+       do k=kBeg,kEnd
+          do j=jBeg,jEnd
+             do i=iBeg,iEnd
+                wIO(i,j,k,1) = real(ks(i,j,k),realType)
+             enddo
+          enddo
+       enddo
+
+
 
     case default
        call terminate("storeSolInBuffer", &

From 538a2343ccc6df44e8e9e0eabcfa4faa425cc235 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Tue, 8 Nov 2022 11:31:14 +0100
Subject: [PATCH 25/60] use -1 as failure (0 might be a cellID)

---
 src/wallDistance/wallDistance.F90 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index 44f7b2543..de4f35d33 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -276,7 +276,7 @@ subroutine updateWallRoughness()
              do k=2,kl
                 do j=2,jl
                    do i=2,il
-                      if (flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) == 0) then
+                      if (flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) == -1) then
                          ! This cell is too far away and has no
                          ! association. Set the roughness to zero.
                          ks(i, j, k) = zero
@@ -2024,7 +2024,7 @@ subroutine determineWallAssociation(level, sps)
                       flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
                       flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
                       if (useRoughSA) then
-                          flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = 0
+                          flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = -1
                       end if
                    end if
 
@@ -2077,7 +2077,7 @@ subroutine determineWallAssociation(level, sps)
                          end do
                          flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw2(1:2)
                          if (useRoughSA) then
-                            flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = walls(c)%indCell(cellID)
+                            flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = walls(c)%indCell(cellID2)
                          end if
                       else
                          ! The full wall distance is better. Take that.
@@ -2101,7 +2101,7 @@ subroutine determineWallAssociation(level, sps)
                    flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
                    flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
                    if (useRoughSA) then
-                       flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = 0
+                       flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = -1
                    end if
 
                 end if

From 9db1b9777ad3510494abcacdfcb8e3a6ac8ec87f Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Wed, 9 Nov 2022 14:56:29 +0100
Subject: [PATCH 26/60] Adds the ability to set ks via pyADflow

---
 adflow/pyADflow.py                     | 55 ++++++++++++++++++++++++++
 src/bcdata/BCData.F90                  | 14 ++++---
 src/f2py/adflow.pyf                    |  9 +++++
 src/initFlow/initializeFlow.F90        |  7 ----
 src/preprocessing/preprocessingAPI.F90 | 39 ++++++++++++++++++
 src/wallDistance/wallDistance.F90      |  9 +++--
 6 files changed, 117 insertions(+), 16 deletions(-)

diff --git a/adflow/pyADflow.py b/adflow/pyADflow.py
index acb6f5591..f95acd369 100644
--- a/adflow/pyADflow.py
+++ b/adflow/pyADflow.py
@@ -142,6 +142,9 @@ def __init__(self, comm=None, options=None, debug=False, dtype="d"):
         # Set all internal adflow default options before we set anything from python
         self.adflow.inputparamroutines.setdefaultvalues()
 
+        # surface-roughness overwrite-dict
+        self.surfaceRoughness = dict()
+
         defSetupTime = time.time()
 
         # Initialize the inherited AeroSolver
@@ -941,6 +944,50 @@ def setRotationRate(self, rotCenter, rotRate, cgnsBlocks=None):
         self.adflow.preprocessingapi.updaterotationrate(rotCenter, rotations, cgnsBlocks)
         self._updateVelInfo = True
 
+    def setSurfaceRoughness(self, ks, groupName=None):
+        """
+        Sets the equivalent sandgrain roughness on the specified surface family.
+        It may only be used when the rough SA-variant (useRoughSA = True) is
+        active.
+
+        This function takes precendence over all boundary conditions set in the
+        mesh.
+
+        Parameters:
+        ----------
+        ks : float
+            Equivalent sandgrain roughness
+        groupName : str
+            Family group to use. Default to all walls if not given (None)
+        """
+
+        if not self.options['useroughsa']:
+           raise Error (
+               "It is not possible to set a surface roughness value without "
+               "using the rough SA-variant (useRoughSA = False)"
+           )
+
+        if groupName is None:
+            groupName = self.allWallsGroup
+
+        if groupName in self.surfaceRoughness:
+            raise Error(
+                f"The roughness value for surface group '{groupName}' has "
+                f"allready been set to {self.surfaceRoughness[groupName]}"
+            )
+
+        self.surfaceRoughness[groupName] = ks
+
+    def _setSurfaceRoughness(self):
+        """
+        Set the actual roughness values. It must be postponed for as long as
+        possible. Otherwise it might get overwritten.
+        """
+
+        for groupName, ks in self.surfaceRoughness.items():
+            famList = self._getFamilyList(groupName)
+            self.adflow.preprocessingapi.updatesurfaceroughness(ks, famList)
+
     def checkPartitioning(self, nprocs):
         """This function determines the potential load balancing for
         nprocs. The intent is this function can be run in serial
@@ -3120,6 +3167,14 @@ def _setAeroProblemData(self, aeroProblem, firstCall=False):
                 self.adflow.preprocessingapi.updatemetricsalllevels()
             self.adflow.preprocessingapi.updategridvelocitiesalllevels()
 
+            # overwrite BC surface roughness
+            if len(self.surfaceRoughness) > 0:
+                self._setSurfaceRoughness()
+
+            # Propagate roughness values through volume
+            self.adflow.walldistance.updatewallroughness()
+
+
     def _getBCDataFromAeroProblem(self, AP):
 
         variables = []
diff --git a/src/bcdata/BCData.F90 b/src/bcdata/BCData.F90
index f1174d161..d5d735a6d 100644
--- a/src/bcdata/BCData.F90
+++ b/src/bcdata/BCData.F90
@@ -491,13 +491,15 @@ subroutine BCDataIsothermalWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
     ! sand grain roughness
 
     if (useRoughSA) then
-        if(.not. bcVarPresent(2)) then
-        bcVarArray(:,:,1) = zero
+        ! Set a value of 0 if it was not possible to determine the
+        ! sand grain roughness
+        if(.not. bcVarPresent(1)) then
+           bcVarArray(:,:,2) = zero
         endif
 
         do j=jBeg,jEnd
             do i=iBeg,iEnd
-                BCData(boco)%ksNS_Wall(i,j) = bcVarArray(i,j,2)
+               BCData(boco)%ksNS_Wall(i,j) = bcVarArray(i,j,2)
             enddo
         enddo
     end if
@@ -526,17 +528,17 @@ subroutine BCDataAdiabaticWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
     !
     integer(kind=intType) :: i, j
 
-    ! Set a value of 0 if it was not possible to determine the
-    ! sand grain roughness
 
     if (useRoughSA) then
+        ! Set a value of 0 if it was not possible to determine the
+        ! sand grain roughness
         if(.not. bcVarPresent(1)) then
            bcVarArray(:,:,1) = zero
         endif
 
         do j=jBeg,jEnd
             do i=iBeg,iEnd
-                BCData(boco)%ksNS_Wall(i,j) = bcVarArray(i,j,1)
+               BCData(boco)%ksNS_Wall(i,j) = bcVarArray(i,j,1)
             enddo
         enddo
     end if
diff --git a/src/f2py/adflow.pyf b/src/f2py/adflow.pyf
index 6ff2658bb..04eeb0d41 100644
--- a/src/f2py/adflow.pyf
+++ b/src/f2py/adflow.pyf
@@ -83,6 +83,9 @@ python module libadflow
          subroutine updatewalldistancealllevels
          end subroutine updatewalldistancealllevels
 
+         subroutine updatewallroughness
+         end subroutine updatewallroughness
+
        end module walldistance
 
        module initializeflow
@@ -554,6 +557,12 @@ python module libadflow
            integer(kind=inttype), optional,intent(in),check(len(blocks)>=nblocks),depend(blocks) :: nblocks=len(blocks)
          end subroutine updaterotationrate
 
+         subroutine updatesurfaceroughness(ks_in, famlist, nfamlist) ! in :test:updatesurfaceroughness.f90
+           real(kind=realtype) ,intent(in) :: ks_in
+           integer(kind=inttype) dimension(nfamlist),intent(in) :: famlist
+           integer(kind=inttype), optional,intent(in),check(len(famlist)>=nfamlist),depend(famlist) :: nfamlist=len(famlist)
+         end subroutine updatesurfaceroughness
+
          subroutine preprocessingoverset(flag,n, closedfamlist, nfam) ! in :test:preprocessingAPI.F90:preprocessingapi
            integer(kind=inttype) dimension(n) :: flag
            integer(kind=inttype), optional,intent(in),check(len(flag)>=n),depend(flag) :: n=len(flag)
diff --git a/src/initFlow/initializeFlow.F90 b/src/initFlow/initializeFlow.F90
index 4700958de..0d373e942 100644
--- a/src/initFlow/initializeFlow.F90
+++ b/src/initFlow/initializeFlow.F90
@@ -272,8 +272,6 @@ subroutine updateBCDataAllLevels()
     use constants
     use iteration, only : groundLevel
     use bcdata, only : setbcdataFineGrid, setBCDataCoarseGrid
-    use wallDistance, only : updateWallRoughness
-    use inputPhysics, only : useRoughSA
     implicit none
 
     ! Allocate the memory for the prescribed boundary data at the
@@ -292,11 +290,6 @@ subroutine updateBCDataAllLevels()
     call setBCDataCoarseGrid
 #endif
 
-    ! update the roughness value in flowdoms
-    if (useRoughSA) then
-       call updateWallRoughness
-    end if
-
   end subroutine updateBCDataAllLevels
 
   subroutine initFlow
diff --git a/src/preprocessing/preprocessingAPI.F90 b/src/preprocessing/preprocessingAPI.F90
index 39f75bc42..11e9be3e7 100644
--- a/src/preprocessing/preprocessingAPI.F90
+++ b/src/preprocessing/preprocessingAPI.F90
@@ -4156,4 +4156,43 @@ subroutine updateRotationRate(rotCenter, rotRate, blocks, nblocks)
 
   end subroutine updateRotationRate
 
+  subroutine updateSurfaceRoughness(ks_in, famList, nFamList)
+
+    use constants
+    use blockPointers
+    use inputTimeSpectral, only : nTimeIntervalsSpectral
+    use utils, only : setPointers
+    use sorting, only : famInList
+    implicit none
+
+    real(kind=realType), intent(in) :: ks_in
+    integer(kind=intType), intent(in) :: nFamList, famList(nFamList)
+
+    integer(kind=intType) :: nLevels, level, sps, nn, mm
+
+    print *, 'setting', ks_in
+
+    nLevels = ubound(flowDoms,2)
+
+    do level=1, nLevels
+       do sps=1, nTimeIntervalsSpectral
+          do nn=1,nDom
+             call setPointers(nn, level, sps)
+
+             ! Loop over the number of boundary subfaces of this block.
+             do mm=1,nBocos
+
+                if ( .not. famInList(BCData(mm)%famID, famList)) then
+                   cycle
+                end if
+
+                BCData(mm)%ksNS_Wall = ks_in
+
+             end do
+          end do
+       end do
+    end do
+
+  end subroutine updateSurfaceRoughness
+
 end module preprocessingAPI
diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index de4f35d33..67a0605a8 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -127,11 +127,10 @@ subroutine updateWallRoughness()
     ! Sets the roughness-value (ks) of the nearest wall-cell.
 
     use constants
-    ! use blockPointers, only : il, jl, kl, flowDoms, ks, BCData, nDom, nBocos, nx, ny, nz
     use blockPointers
-    use inputTimeSpectral, only :nTimeIntervalsSpectral
+    use inputTimeSpectral, only : nTimeIntervalsSpectral
+    use inputPhysics, only : useRoughSA
     use utils, only : setPointers, EChk
-    use iteration, only : groundLevel
     use surfaceFamilies, only : BCFamGroups
     use communication, only : adflow_comm_world, nProc, myID
     use sorting, only : famInList
@@ -148,6 +147,10 @@ subroutine updateWallRoughness()
     integer(kind=intType), dimension(:), allocatable :: cellIdLocal, cellIdGlobal
     real(kind=realType), dimension(:), allocatable :: ksLocal, ksGlobal
 
+    ! exit if not in use
+    if (.not. useRoughSA) then
+       return
+    end if
 
     wallFamList => BCFamGroups(iBCGroupWalls)%famList
     nLevels = ubound(flowDoms,2)

From abba6517421b6839d3fd0b346fabd29dc271d07f Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Wed, 9 Nov 2022 15:09:32 +0100
Subject: [PATCH 27/60] Add comments and fix missing allocation

---
 src/bcdata/BCData.F90                 |  4 +++-
 src/inputParam/inputParamRoutines.F90 |  4 ++--
 src/wallDistance/wallDistance.F90     | 23 ++++++++++++++++++-----
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/bcdata/BCData.F90 b/src/bcdata/BCData.F90
index d5d735a6d..33bc2babe 100644
--- a/src/bcdata/BCData.F90
+++ b/src/bcdata/BCData.F90
@@ -2361,7 +2361,6 @@ subroutine allocMemBCData
                    allocate(BCData(mm)%uSlip(iBeg:iEnd,jBeg:jEnd,3),  &
                         BCData(mm)%uSlipALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd,3), &
                         BCData(mm)%TNS_Wall(iBeg:iEnd,jBeg:jEnd), &
-                        ! TODO: Add KS!
                         BCData(mm)%F(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
                         BCData(mm)%T(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
                         BCData(mm)%Tp(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
@@ -2373,6 +2372,9 @@ subroutine allocMemBCData
                         BCData(mm)%area(iNodeBeg+1:iNodeEnd, jNodeBeg+1:jNodeEnd), &
                         BCData(mm)%CpTarget(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd), &
                         stat=ierr)
+                    if (useRoughSA .and. ierr == 0) then
+                        allocate(BCData(mm)%ksNS_Wall(iBeg:iEnd,jBeg:jEnd), stat=ierr)
+                   end if
                    if(ierr /= 0)                      &
                         call terminate("allocMemBCData", &
                         "Memory allocation failure for &
diff --git a/src/inputParam/inputParamRoutines.F90 b/src/inputParam/inputParamRoutines.F90
index 5c0b75860..6351608bb 100644
--- a/src/inputParam/inputParamRoutines.F90
+++ b/src/inputParam/inputParamRoutines.F90
@@ -2720,8 +2720,8 @@ subroutine volumeVariables(variables)
 
        case("ks")
           if (.not. useRoughSA) then
-             call terminate("volumeVariables", "Can not write Surface-Roughness &
-             &values ('ks' in 'volumeVariables') when the rough SA variant is not &
+             call terminate("volumeVariables", "Can not export surface roughness &
+             &values ('volumeVariables': ['ks']) when the rough SA variant is not &
              &used (useRoughSA = False)")
           end if
           volWriteKs = .true.
diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index 67a0605a8..df5d16de6 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -124,7 +124,22 @@ subroutine updateWallRoughness()
 
 #ifndef USE_TAPENADE
 
-    ! Sets the roughness-value (ks) of the nearest wall-cell.
+    ! Sets the roughness-value (ks) of the nearest wall-cell in the volume cells.
+    !
+    ! At first, it creates two lists: (1) ks values on the surface; (2) global
+    ! cellIndex corresponding to this ks-value.
+    !
+    ! Then it gathers the full list on each proc *THIS DOES NOT SCALE IN MEMORY*
+    !
+    ! After that, it iterate through every volume cell and finds the index in
+    ! list (1) that corresponds to the cellIndex of the nearest surface-cell.
+    ! Then it uses this index to set the ks value listed in (2).
+    !
+    !
+    ! A more memory efficient approach would be to create a 'PETSc Scatter'.
+    ! This should be straight forward using the cellIndex-list mentioned above.
+    ! You might take a look at 'wallScatter' further down this file for
+    ! inspiration.
 
     use constants
     use blockPointers
@@ -215,11 +230,9 @@ subroutine updateWallRoughness()
                       ksLocal(iCell) = BCData(mm)%ksNS_Wall(ii, jj)
 
                       ! to calculate the global cellID, we must associate the
-                      ! BC-cell to the volume cell first We basically have to
-                      ! set surface i-j values to global i,j,k values. The +1 is
-                      ! needed because there are no halo-cells on the BC
+                      ! BC-cell to the volume cell first. We basically have to
+                      ! set surface i-j values to global i,j,k values.
 
-                      ! TODO: Do I need to care about the rightHanded-stuff?
                       select case (BCFaceID(mm))
                       case (iMin)
                          i = 2

From 7c9a285b5943aa80040b8f16ca5b93e244124470 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Wed, 9 Nov 2022 15:13:18 +0100
Subject: [PATCH 28/60] remove debugging-print

---
 src/preprocessing/preprocessingAPI.F90 | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/preprocessing/preprocessingAPI.F90 b/src/preprocessing/preprocessingAPI.F90
index 11e9be3e7..38560ee81 100644
--- a/src/preprocessing/preprocessingAPI.F90
+++ b/src/preprocessing/preprocessingAPI.F90
@@ -4170,8 +4170,6 @@ subroutine updateSurfaceRoughness(ks_in, famList, nFamList)
 
     integer(kind=intType) :: nLevels, level, sps, nn, mm
 
-    print *, 'setting', ks_in
-
     nLevels = ubound(flowDoms,2)
 
     do level=1, nLevels

From b4642e57258e0ff831f404b94c9f33852ba417c9 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Wed, 9 Nov 2022 15:26:49 +0100
Subject: [PATCH 29/60] run tapenade

---
 src/adjoint/outputForward/bcdata_d.f90 | 14 +++++++++-----
 src/adjoint/outputReverse/bcdata_b.f90 |  6 ++++--
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/adjoint/outputForward/bcdata_d.f90 b/src/adjoint/outputForward/bcdata_d.f90
index 92659d985..5b59e875c 100644
--- a/src/adjoint/outputForward/bcdata_d.f90
+++ b/src/adjoint/outputForward/bcdata_d.f90
@@ -403,9 +403,11 @@ subroutine bcdataisothermalwall_d(boco, bcvararray, bcvararrayd, ibeg&
 ! set a value of 0 if it was not possible to determine the
 ! sand grain roughness
     if (useroughsa) then
-      if (.not.bcvarpresent(2)) then
-        bcvararrayd(:, :, 1) = 0.0_8
-        bcvararray(:, :, 1) = zero
+! set a value of 0 if it was not possible to determine the
+! sand grain roughness
+      if (.not.bcvarpresent(1)) then
+        bcvararrayd(:, :, 2) = 0.0_8
+        bcvararray(:, :, 2) = zero
       end if
       do j=jbeg,jend
         do i=ibeg,iend
@@ -467,7 +469,9 @@ subroutine bcdataisothermalwall(boco, bcvararray, ibeg, iend, jbeg, &
 ! set a value of 0 if it was not possible to determine the
 ! sand grain roughness
     if (useroughsa) then
-      if (.not.bcvarpresent(2)) bcvararray(:, :, 1) = zero
+! set a value of 0 if it was not possible to determine the
+! sand grain roughness
+      if (.not.bcvarpresent(1)) bcvararray(:, :, 2) = zero
       do j=jbeg,jend
         do i=ibeg,iend
           bcdata(boco)%ksns_wall(i, j) = bcvararray(i, j, 2)
@@ -499,9 +503,9 @@ subroutine bcdataadiabaticwall(boco, bcvararray, ibeg, iend, jbeg, &
 !      local variables.
 !
     integer(kind=inttype) :: i, j
+    if (useroughsa) then
 ! set a value of 0 if it was not possible to determine the
 ! sand grain roughness
-    if (useroughsa) then
       if (.not.bcvarpresent(1)) bcvararray(:, :, 1) = zero
       do j=jbeg,jend
         do i=ibeg,iend
diff --git a/src/adjoint/outputReverse/bcdata_b.f90 b/src/adjoint/outputReverse/bcdata_b.f90
index 00d36e9ca..f1f03a955 100644
--- a/src/adjoint/outputReverse/bcdata_b.f90
+++ b/src/adjoint/outputReverse/bcdata_b.f90
@@ -442,7 +442,9 @@ subroutine bcdataisothermalwall(boco, bcvararray, ibeg, iend, jbeg, &
 ! set a value of 0 if it was not possible to determine the
 ! sand grain roughness
     if (useroughsa) then
-      if (.not.bcvarpresent(2)) bcvararray(:, :, 1) = zero
+! set a value of 0 if it was not possible to determine the
+! sand grain roughness
+      if (.not.bcvarpresent(1)) bcvararray(:, :, 2) = zero
       do j=jbeg,jend
         do i=ibeg,iend
           bcdata(boco)%ksns_wall(i, j) = bcvararray(i, j, 2)
@@ -474,9 +476,9 @@ subroutine bcdataadiabaticwall(boco, bcvararray, ibeg, iend, jbeg, &
 !      local variables.
 !
     integer(kind=inttype) :: i, j
+    if (useroughsa) then
 ! set a value of 0 if it was not possible to determine the
 ! sand grain roughness
-    if (useroughsa) then
       if (.not.bcvarpresent(1)) bcvararray(:, :, 1) = zero
       do j=jbeg,jend
         do i=ibeg,iend

From d86c058e27bb88898332226402fc7067ba8c55e9 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Wed, 9 Nov 2022 16:35:22 +0100
Subject: [PATCH 30/60] change for better readability

---
 src/adjoint/outputForward/turbbcroutines_d.f90          | 8 ++++----
 src/adjoint/outputReverse/turbbcroutines_b.f90          | 8 ++++----
 src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90 | 8 ++++----
 src/turbulence/turbBCRoutines.F90                       | 8 ++++----
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/adjoint/outputForward/turbbcroutines_d.f90 b/src/adjoint/outputForward/turbbcroutines_d.f90
index d63357892..00b51de9c 100644
--- a/src/adjoint/outputForward/turbbcroutines_d.f90
+++ b/src/adjoint/outputForward/turbbcroutines_d.f90
@@ -2232,12 +2232,12 @@ function saroughfact(i, j, k)
     real(kind=realtype) :: saroughfact
 ! local variablse
     integer(kind=inttype) :: i, j, k
-    if (useroughsa) then
-      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
-&       d2wall(i, j, k)/0.03)
+    if (.not.useroughsa) then
+      saroughfact = one
       return
     else
-      saroughfact = one
+      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
+&       d2wall(i, j, k)/0.03)
     end if
   end function saroughfact
 end module turbbcroutines_d
diff --git a/src/adjoint/outputReverse/turbbcroutines_b.f90 b/src/adjoint/outputReverse/turbbcroutines_b.f90
index 6a73ea41e..813a8922c 100644
--- a/src/adjoint/outputReverse/turbbcroutines_b.f90
+++ b/src/adjoint/outputReverse/turbbcroutines_b.f90
@@ -2614,12 +2614,12 @@ function saroughfact(i, j, k)
     real(kind=realtype) :: saroughfact
 ! local variablse
     integer(kind=inttype) :: i, j, k
-    if (useroughsa) then
-      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
-&       d2wall(i, j, k)/0.03)
+    if (.not.useroughsa) then
+      saroughfact = one
       return
     else
-      saroughfact = one
+      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
+&       d2wall(i, j, k)/0.03)
     end if
   end function saroughfact
 end module turbbcroutines_b
diff --git a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90 b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
index 77c1733e8..8f640bcd9 100644
--- a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
@@ -1182,12 +1182,12 @@ function saroughfact(i, j, k)
     real(kind=realtype) :: saroughfact
 ! local variablse
     integer(kind=inttype) :: i, j, k
-    if (useroughsa) then
-      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
-&       d2wall(i, j, k)/0.03)
+    if (.not.useroughsa) then
+      saroughfact = one
       return
     else
-      saroughfact = one
+      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
+&       d2wall(i, j, k)/0.03)
     end if
   end function saroughfact
 end module turbbcroutines_fast_b
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 9d0880a10..504a5f4dc 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -1408,13 +1408,13 @@ function  saRoughFact(i,j,k)
    ! local variablse
    integer(kind=intType) :: i, j, k
 
-   if (useRoughSA) then
-      saRoughFact = (ks(i,j,k) - d2wall(i,j,k)/0.03) / &
-           (ks(i,j,k) + d2wall(i,j,k)/0.03)
+   if (.not. useRoughSA) then
+      saRoughFact = one
       return
    end if
 
-   saRoughFact = one
+   saRoughFact = (ks(i,j,k) - d2wall(i,j,k)/0.03) / &
+        (ks(i,j,k) + d2wall(i,j,k)/0.03)
 
   end function  saRoughFact
 

From 81449a42c4fcbcee1cb5e483054211b54f2e45de Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Wed, 9 Nov 2022 16:52:32 +0100
Subject: [PATCH 31/60] Add TODO note in BCData

---
 src/bcdata/BCData.F90 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/bcdata/BCData.F90 b/src/bcdata/BCData.F90
index 9fd13c0f2..c9ee7239e 100644
--- a/src/bcdata/BCData.F90
+++ b/src/bcdata/BCData.F90
@@ -3120,6 +3120,7 @@ subroutine setBCDataFineGrid_d(initializationPart)
              select case (BCType(j))
 
 !            case (NSWallAdiabatic)
+!            TODO: This is not needed, right?
 !               call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
 !               call extractFromDataSet_d(bcVarArray, bcVarArrayd)
 !               call BCDataAdiabaticWall_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
@@ -3221,6 +3222,7 @@ subroutine setBCDataFineGrid_b(initializationPart)
              select case (BCType(j))
 
 !            case (NSWallAdiabatic)
+!            TODO: This is not needed, right?
 !               call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
 !               call extractFromDataSet(bcVarArray)
 !               call BCDataAdiabaticWall_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)

From d7856fc21ff826d2f52a4141ab25e967c939d61a Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Thu, 15 Dec 2022 15:38:52 +0100
Subject: [PATCH 32/60] Fix i-j mix-up in rough SA

---
 src/wallDistance/wallDistance.F90 | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index 5058ed5c5..0b504d2b6 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -145,7 +145,7 @@ subroutine updateWallRoughness()
     use blockPointers
     use inputTimeSpectral, only : nTimeIntervalsSpectral
     use inputPhysics, only : useRoughSA
-    use utils, only : setPointers, EChk
+    use utils, only : setPointers, EChk, terminate
     use surfaceFamilies, only : BCFamGroups
     use communication, only : adflow_comm_world, nProc, myID
     use sorting, only : famInList
@@ -156,12 +156,14 @@ subroutine updateWallRoughness()
     integer(kind=intType) :: i, j, k, ii, jj, ierr, iCell
     integer(kind=intType) :: iBeg, jBeg, iEnd, jEnd, ni, nj
     integer(kind=intType) :: nn, sps, level, nLevels, mm
-    integer(kind=intType) :: nCellsLocal, nCellsGlobal
     integer(kind=intType), dimension(:), allocatable :: nCellProc, cumCellProc
     integer(kind=intType), dimension(:), pointer :: wallFamList
     integer(kind=intType), dimension(:), allocatable :: cellIdLocal, cellIdGlobal
+    integer(kind=intType) :: nCellsLocal, nCellsGlobal
     real(kind=realType), dimension(:), allocatable :: ksLocal, ksGlobal
 
+    character(len=maxStringLen) :: errorMessage
+
     ! exit if not in use
     if (.not. useRoughSA) then
        return
@@ -208,7 +210,7 @@ subroutine updateWallRoughness()
 
 
           ! Move all the local ks-values in a list
-          ! Create a seccond list with the global cell ID corresponding to the ks-values
+          ! Create a second list with the global cell ID corresponding to the ks-values
           iCell = 0
           do nn=1, nDom
              call setPointers(nn, level, sps)
@@ -251,8 +253,8 @@ subroutine updateWallRoughness()
                          j = jl
                          k = jj + 1
                       case (kMin)
-                         i = jj + 1
-                         j = ii + 1
+                         i = ii + 1
+                         j = jj + 1
                          k = 2
                       case (kMax)
                          i = ii + 1
@@ -283,7 +285,8 @@ subroutine updateWallRoughness()
                adflow_comm_world, ierr)
           call EChk(ierr, __FILE__, __LINE__)
 
-          ! free local memmory
+
+          ! free local memory
           deallocate(cumCellProc, nCellProc, ksLocal, cellIdLocal)
 
           ! set the ks-values in the volume
@@ -302,6 +305,13 @@ subroutine updateWallRoughness()
                       ! find the index of the surface cell (Requires gfortran > 9.0 )
                       iCell = findloc(cellIdGlobal, flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k), DIM=1)
 
+                      if (iCell == 0) then
+                         write(errorMessage,100) &
+                              flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k)
+100                      format("Could not find surface cell with id ", I10.1)
+                         call terminate("updateWallRoughness", errorMessage)
+                      endif
+
                       ! set the ks value
                       flowDoms(nn, level, sps)%ks(i, j, k) = ksGlobal(iCell)
                    end do

From fbb67e9f6a0d9f20b06bf043050a9c09cf1ef6f4 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Fri, 16 Dec 2022 11:46:58 +0100
Subject: [PATCH 33/60] Init ks-values on BC for MG

---
 src/bcdata/BCData.F90 | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/bcdata/BCData.F90 b/src/bcdata/BCData.F90
index 15915510a..193b63db2 100644
--- a/src/bcdata/BCData.F90
+++ b/src/bcdata/BCData.F90
@@ -2353,6 +2353,11 @@ subroutine allocMemBCData
                         stat=ierr)
                     if (useRoughSA .and. ierr == 0) then
                         allocate(BCData(mm)%ksNS_Wall(iBeg:iEnd,jBeg:jEnd), stat=ierr)
+                        if (level > 1) then
+                           ! The extrapolation of the BC for MG does not work
+                           ! properly. Thus it must be initialized with zero
+                           BCData(mm)%ksNS_Wall = zero
+                        end if
                    end if
                    if(ierr /= 0)                      &
                         call terminate("allocMemBCData", &
@@ -2379,6 +2384,11 @@ subroutine allocMemBCData
                         stat=ierr)
                     if (useRoughSA .and. ierr == 0) then
                         allocate(BCData(mm)%ksNS_Wall(iBeg:iEnd,jBeg:jEnd), stat=ierr)
+                        if (level > 1) then
+                           ! The extrapolation of the BC for MG does not work
+                           ! properly. Thus it must be initialized with zero
+                           BCData(mm)%ksNS_Wall = zero
+                        end if
                    end if
                    if(ierr /= 0)                      &
                         call terminate("allocMemBCData", &

From 25d832b23b47d86e86c6d970a8d31dd9abac163e Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Wed, 4 Jan 2023 13:13:14 +0100
Subject: [PATCH 34/60] Fix BC-bug for non-SA_rough

---
 src/turbulence/turbBCRoutines.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 504a5f4dc..3c673365f 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -1409,7 +1409,7 @@ function  saRoughFact(i,j,k)
    integer(kind=intType) :: i, j, k
 
    if (.not. useRoughSA) then
-      saRoughFact = one
+      saRoughFact = -one
       return
    end if
 

From 8355385f712c390231c5e14558d4bfb4c87c4459 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Wed, 4 Jan 2023 13:13:51 +0100
Subject: [PATCH 35/60] make SA-rough changes more visibile

---
 src/turbulence/sa.F90 | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/turbulence/sa.F90 b/src/turbulence/sa.F90
index 0980c2ce6..c7824742a 100644
--- a/src/turbulence/sa.F90
+++ b/src/turbulence/sa.F90
@@ -107,7 +107,7 @@ subroutine saSource
 
     ! Local variables.
     integer(kind=intType) :: i, j, k, nn, ii
-    real(kind=realType) :: dist, ksLocal
+    real(kind=realType) :: distRough
     real(kind=realType) :: fv1, fv2, ft2
     real(kind=realType) :: ss, sst, nu, dist2Inv, chi, chi2, chi3
     real(kind=realType) :: rr, gg, gg6, termFw, fwSa, term1, term2
@@ -244,24 +244,23 @@ subroutine saSource
                 ! and nu) and the functions fv1 and fv2. The latter corrects
                 ! the production term near a viscous wall.
 
-                ! As the rough Version of SA is supported, this looks slightly different
-                ! than the standard SA implementation
-
-                if (useRoughSA) then
-                   ksLocal = ks(i,j,k)
-                   dist = d2Wall(i,j,k) + 0.03 * ksLocal
+                nu       = rlv(i,j,k)/w(i,j,k,irho)
+                if (.not. useRoughSA) then
+                   dist2Inv = one/(d2Wall(i,j,k)**2)
+                   chi      = w(i,j,k,itu1)/nu
                 else
-                   ksLocal = zero
-                   dist = d2Wall(i,j,k)
+                   distRough = d2Wall(i,j,k) + 0.03 * ks(i,j,k)
+                   dist2Inv = one/(distRough**2)
+                   chi      = w(i,j,k,itu1)/nu + rsaCr1*ks(i,j,k)/distRough
                 end if
-
-                nu       = rlv(i,j,k)/w(i,j,k,irho)
-                dist2Inv = one/(dist**2)
-                chi      = w(i,j,k,itu1)/nu + rsaCr1*ksLocal/dist
                 chi2     = chi*chi
                 chi3     = chi*chi2
                 fv1      = chi3/(chi3+cv13)
-                fv2      = one - w(i,j,k,itu1)/(nu + w(i,j,k,itu1)*fv1)
+                if (.not. useRoughSA) then
+                   fv2      = one - chi/(one + chi*fv1)
+                else
+                   fv2      = one - w(i,j,k,itu1)/(nu + w(i,j,k,itu1)*fv1)
+                end if
 
                 ! The function ft2, which is designed to keep a laminar
                 ! solution laminar. When running in fully turbulent mode
@@ -320,7 +319,11 @@ subroutine saSource
                 ! treatment.
 
                 dfv1 = three*chi2*cv13/((chi3+cv13)**2)
-                dfv2 = (w(i,j,k,itu1)*dfv1 - nu) / (nu + w(i,j,k,itu1)*fv1)**2
+                if (.not. useRoughSA) then
+                   dfv2 = (chi2*dfv1 - one)/(nu*((one + chi*fv1)**2))
+                else
+                   dfv2 = (w(i,j,k,itu1)*dfv1 - nu) / (nu + w(i,j,k,itu1)*fv1)**2
+                endif
                 dft2 = -two*rsaCt4*chi*ft2/nu
 
                 drr = (one - rr*(fv2 + w(i,j,k,itu1)*dfv2)) &

From 3774ec885e8f804a0d4175e942d5088fd5f829b9 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Wed, 4 Jan 2023 13:14:21 +0100
Subject: [PATCH 36/60] run tapenade

---
 src/adjoint/outputForward/sa_d.f90            |  75 +++++-----
 .../outputForward/turbbcroutines_d.f90        |   2 +-
 src/adjoint/outputReverse/sa_b.f90            | 141 ++++++++++--------
 .../outputReverse/turbbcroutines_b.f90        |   2 +-
 src/adjoint/outputReverseFast/sa_fast_b.f90   | 126 ++++++++++------
 .../turbbcroutines_fast_b.f90                 |   2 +-
 6 files changed, 205 insertions(+), 143 deletions(-)

diff --git a/src/adjoint/outputForward/sa_d.f90 b/src/adjoint/outputForward/sa_d.f90
index 066602c52..927ee7619 100644
--- a/src/adjoint/outputForward/sa_d.f90
+++ b/src/adjoint/outputForward/sa_d.f90
@@ -40,8 +40,8 @@ subroutine sasource_d()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dist, kslocal
-    real(kind=realtype) :: distd
+    real(kind=realtype) :: distrough
+    real(kind=realtype) :: distroughd
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: fv1d, fv2d, ft2d
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
@@ -276,35 +276,41 @@ subroutine sasource_d()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! as the rough version of sa is supported, this looks slightly different
-! than the standard sa implementation
-            if (useroughsa) then
-              kslocal = ks(i, j, k)
-              distd = d2walld(i, j, k)
-              dist = d2wall(i, j, k) + 0.03*kslocal
-            else
-              kslocal = zero
-              distd = d2walld(i, j, k)
-              dist = d2wall(i, j, k)
-            end if
             nud = (rlvd(i, j, k)*w(i, j, k, irho)-rlv(i, j, k)*wd(i, j, &
 &             k, irho))/w(i, j, k, irho)**2
             nu = rlv(i, j, k)/w(i, j, k, irho)
-            dist2invd = -(one*2*dist*distd/(dist**2)**2)
-            dist2inv = one/dist**2
-            chid = (wd(i, j, k, itu1)*nu-w(i, j, k, itu1)*nud)/nu**2 - &
-&             rsacr1*kslocal*distd/dist**2
-            chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
+            if (.not.useroughsa) then
+              dist2invd = -(one*2*d2wall(i, j, k)*d2walld(i, j, k)/(&
+&               d2wall(i, j, k)**2)**2)
+              dist2inv = one/d2wall(i, j, k)**2
+              chid = (wd(i, j, k, itu1)*nu-w(i, j, k, itu1)*nud)/nu**2
+              chi = w(i, j, k, itu1)/nu
+            else
+              distroughd = d2walld(i, j, k)
+              distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+              dist2invd = -(one*2*distrough*distroughd/(distrough**2)**2&
+&               )
+              dist2inv = one/distrough**2
+              chid = (wd(i, j, k, itu1)*nu-w(i, j, k, itu1)*nud)/nu**2 -&
+&               rsacr1*ks(i, j, k)*distroughd/distrough**2
+              chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
+            end if
             chi2d = chid*chi + chi*chid
             chi2 = chi*chi
             chi3d = chid*chi2 + chi*chi2d
             chi3 = chi*chi2
             fv1d = (chi3d*(chi3+cv13)-chi3*chi3d)/(chi3+cv13)**2
             fv1 = chi3/(chi3+cv13)
-            fv2d = -((wd(i, j, k, itu1)*(nu+w(i, j, k, itu1)*fv1)-w(i, j&
-&             , k, itu1)*(nud+wd(i, j, k, itu1)*fv1+w(i, j, k, itu1)*&
-&             fv1d))/(nu+w(i, j, k, itu1)*fv1)**2)
-            fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+            if (.not.useroughsa) then
+              fv2d = -((chid*(one+chi*fv1)-chi*(chid*fv1+chi*fv1d))/(one&
+&               +chi*fv1)**2)
+              fv2 = one - chi/(one+chi*fv1)
+            else
+              fv2d = -((wd(i, j, k, itu1)*(nu+w(i, j, k, itu1)*fv1)-w(i&
+&               , j, k, itu1)*(nud+wd(i, j, k, itu1)*fv1+w(i, j, k, itu1&
+&               )*fv1d))/(nu+w(i, j, k, itu1)*fv1)**2)
+              fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+            end if
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
@@ -416,7 +422,7 @@ subroutine sasource()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dist, kslocal
+    real(kind=realtype) :: distrough
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
     real(kind=realtype) :: rr, gg, gg6, termfw, fwsa, term1, term2
@@ -530,22 +536,23 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! as the rough version of sa is supported, this looks slightly different
-! than the standard sa implementation
-            if (useroughsa) then
-              kslocal = ks(i, j, k)
-              dist = d2wall(i, j, k) + 0.03*kslocal
+            nu = rlv(i, j, k)/w(i, j, k, irho)
+            if (.not.useroughsa) then
+              dist2inv = one/d2wall(i, j, k)**2
+              chi = w(i, j, k, itu1)/nu
             else
-              kslocal = zero
-              dist = d2wall(i, j, k)
+              distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+              dist2inv = one/distrough**2
+              chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
             end if
-            nu = rlv(i, j, k)/w(i, j, k, irho)
-            dist2inv = one/dist**2
-            chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
             chi2 = chi*chi
             chi3 = chi*chi2
             fv1 = chi3/(chi3+cv13)
-            fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+            if (.not.useroughsa) then
+              fv2 = one - chi/(one+chi*fv1)
+            else
+              fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+            end if
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
diff --git a/src/adjoint/outputForward/turbbcroutines_d.f90 b/src/adjoint/outputForward/turbbcroutines_d.f90
index 00b51de9c..b4bdd82b9 100644
--- a/src/adjoint/outputForward/turbbcroutines_d.f90
+++ b/src/adjoint/outputForward/turbbcroutines_d.f90
@@ -2233,7 +2233,7 @@ function saroughfact(i, j, k)
 ! local variablse
     integer(kind=inttype) :: i, j, k
     if (.not.useroughsa) then
-      saroughfact = one
+      saroughfact = -one
       return
     else
       saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
diff --git a/src/adjoint/outputReverse/sa_b.f90 b/src/adjoint/outputReverse/sa_b.f90
index b1e2bc429..0ebb428cb 100644
--- a/src/adjoint/outputReverse/sa_b.f90
+++ b/src/adjoint/outputReverse/sa_b.f90
@@ -42,8 +42,8 @@ subroutine sasource_b()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dist, kslocal
-    real(kind=realtype) :: distd
+    real(kind=realtype) :: distrough
+    real(kind=realtype) :: distroughd
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: fv1d, fv2d, ft2d
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
@@ -75,6 +75,8 @@ subroutine sasource_b()
     real(kind=realtype) :: temp2
     real(kind=realtype) :: temp1
     real(kind=realtype) :: temp0
+    real(kind=realtype) :: tempd11
+    real(kind=realtype) :: tempd10
     real(kind=realtype) :: min1
     real(kind=realtype) :: min1d
     real(kind=realtype) :: tempd9
@@ -91,6 +93,7 @@ subroutine sasource_b()
     real(kind=realtype) :: temp
     real(kind=realtype) :: y1
     real(kind=realtype) :: y1d
+    real(kind=realtype) :: temp4
 ! set model constants
     cv13 = rsacv1**3
     kar2inv = one/rsak**2
@@ -196,24 +199,27 @@ subroutine sasource_b()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! as the rough version of sa is supported, this looks slightly different
-! than the standard sa implementation
-        if (useroughsa) then
-          kslocal = ks(i, j, k)
-          dist = d2wall(i, j, k) + 0.03*kslocal
+        nu = rlv(i, j, k)/w(i, j, k, irho)
+        if (.not.useroughsa) then
+          dist2inv = one/d2wall(i, j, k)**2
+          chi = w(i, j, k, itu1)/nu
           call pushcontrol1b(0)
         else
-          kslocal = zero
-          dist = d2wall(i, j, k)
+          distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+          dist2inv = one/distrough**2
+          chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
           call pushcontrol1b(1)
         end if
-        nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/dist**2
-        chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-        fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+        if (.not.useroughsa) then
+          fv2 = one - chi/(one+chi*fv1)
+          call pushcontrol1b(0)
+        else
+          fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+          call pushcontrol1b(1)
+        end if
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
@@ -275,18 +281,18 @@ subroutine sasource_b()
         end if
         term2 = dist2inv*(kar2inv*rsacb1*((one-ft2)*fv2+ft2)-rsacw1*fwsa&
 &         )
-        tempd8 = w(i, j, k, itu1)*scratchd(i, j, k, idvt)
-        temp3 = w(i, j, k, itu1)
-        term1d = tempd8
-        term2d = temp3*tempd8
-        wd(i, j, k, itu1) = wd(i, j, k, itu1) + (term1+term2*temp3)*&
-&         scratchd(i, j, k, idvt) + term2*tempd8
+        tempd10 = w(i, j, k, itu1)*scratchd(i, j, k, idvt)
+        temp4 = w(i, j, k, itu1)
+        term1d = tempd10
+        term2d = temp4*tempd10
+        wd(i, j, k, itu1) = wd(i, j, k, itu1) + (term1+term2*temp4)*&
+&         scratchd(i, j, k, idvt) + term2*tempd10
         scratchd(i, j, k, idvt) = 0.0_8
-        tempd9 = dist2inv*kar2inv*rsacb1*term2d
+        tempd11 = dist2inv*kar2inv*rsacb1*term2d
         dist2invd = (kar2inv*rsacb1*((one-ft2)*fv2+ft2)-rsacw1*fwsa)*&
 &         term2d
-        ft2d = (1.0_8-fv2)*tempd9
-        fv2d = (one-ft2)*tempd9
+        ft2d = (1.0_8-fv2)*tempd11
+        fv2d = (one-ft2)*tempd11
         fwsad = -(dist2inv*rsacw1*term2d)
         call popcontrol1b(branch)
         if (branch .ne. 0) then
@@ -294,21 +300,21 @@ subroutine sasource_b()
           ssd = ssd + rsacb1*(one-ft2)*term1d
         end if
         termfwd = gg*fwsad
-        temp2 = (one+cw36)/(cw36+gg6)
-        if (temp2 .le. 0.0_8 .and. (sixth .eq. 0.0_8 .or. sixth .ne. int&
+        temp3 = (one+cw36)/(cw36+gg6)
+        if (temp3 .le. 0.0_8 .and. (sixth .eq. 0.0_8 .or. sixth .ne. int&
 &           (sixth))) then
           gg6d = 0.0
         else
-          gg6d = -(sixth*temp2**(sixth-1)*temp2*termfwd/(cw36+gg6))
+          gg6d = -(sixth*temp3**(sixth-1)*temp3*termfwd/(cw36+gg6))
         end if
         ggd = 6*gg**5*gg6d + termfw*fwsad
         rrd = (rsacw2*6*rr**5-rsacw2+1.0_8)*ggd
         call popcontrol1b(branch)
         if (branch .eq. 0) rrd = 0.0_8
-        tempd7 = w(i, j, k, itu1)*kar2inv*rrd/sst
+        tempd9 = w(i, j, k, itu1)*kar2inv*rrd/sst
         wd(i, j, k, itu1) = wd(i, j, k, itu1) + kar2inv*dist2inv*rrd/sst
-        dist2invd = dist2invd + tempd7
-        sstd = -(dist2inv*tempd7/sst)
+        dist2invd = dist2invd + tempd9
+        sstd = -(dist2inv*tempd9/sst)
         call popcontrol1b(branch)
         if (branch .eq. 0) sstd = 0.0_8
         call popcontrol1b(branch)
@@ -323,39 +329,55 @@ subroutine sasource_b()
           if (.not.two*strainmag2 .eq. 0.0_8) strainmag2d = strainmag2d &
 &             + two*y1d/(2.0*sqrt(two*strainmag2))
         end if
-        tempd6 = kar2inv*w(i, j, k, itu1)*sstd
+        tempd8 = kar2inv*w(i, j, k, itu1)*sstd
         ssd = ssd + sstd
         wd(i, j, k, itu1) = wd(i, j, k, itu1) + kar2inv*fv2*dist2inv*&
 &         sstd
-        fv2d = fv2d + dist2inv*tempd6
-        dist2invd = dist2invd + fv2*tempd6
+        fv2d = fv2d + dist2inv*tempd8
+        dist2invd = dist2invd + fv2*tempd8
         call popcontrol1b(branch)
         if (branch .eq. 0) then
           chi2d = -(exp(-(rsact4*chi2))*rsact3*rsact4*ft2d)
         else
           chi2d = 0.0_8
         end if
-        temp1 = w(i, j, k, itu1)
-        temp0 = nu + temp1*fv1
-        tempd4 = w(i, j, k, itu1)*fv2d/temp0**2
-        wd(i, j, k, itu1) = wd(i, j, k, itu1) + fv1*tempd4 - fv2d/temp0
-        fv1d = temp1*tempd4
-        tempd5 = fv1d/(cv13+chi3)
-        chi3d = (1.0_8-chi3/(cv13+chi3))*tempd5
+        call popcontrol1b(branch)
+        if (branch .eq. 0) then
+          tempd5 = -(fv2d/(one+chi*fv1))
+          tempd6 = -(chi*tempd5/(one+chi*fv1))
+          chid = fv1*tempd6 + tempd5
+          fv1d = chi*tempd6
+          nud = 0.0_8
+        else
+          temp2 = w(i, j, k, itu1)
+          temp1 = nu + temp2*fv1
+          tempd7 = w(i, j, k, itu1)*fv2d/temp1**2
+          wd(i, j, k, itu1) = wd(i, j, k, itu1) + fv1*tempd7 - fv2d/&
+&           temp1
+          nud = tempd7
+          fv1d = temp2*tempd7
+          chid = 0.0_8
+        end if
+        tempd4 = fv1d/(cv13+chi3)
+        chi3d = (1.0_8-chi3/(cv13+chi3))*tempd4
         chi2d = chi2d + chi*chi3d
-        chid = 2*chi*chi2d + chi2*chi3d
-        nud = tempd4 - w(i, j, k, itu1)*chid/nu**2
-        wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
-        distd = -(one*2*dist2invd/dist**3) - rsacr1*kslocal*chid/dist**2
-        temp = w(i, j, k, irho)
-        rlvd(i, j, k) = rlvd(i, j, k) + nud/temp
-        wd(i, j, k, irho) = wd(i, j, k, irho) - rlv(i, j, k)*nud/temp**2
+        chid = chid + 2*chi*chi2d + chi2*chi3d
         call popcontrol1b(branch)
         if (branch .eq. 0) then
-          d2walld(i, j, k) = d2walld(i, j, k) + distd
+          wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
+          nud = nud - w(i, j, k, itu1)*chid/nu**2
+          temp0 = d2wall(i, j, k)
+          d2walld(i, j, k) = d2walld(i, j, k) - one*2*dist2invd/temp0**3
         else
-          d2walld(i, j, k) = d2walld(i, j, k) + distd
+          wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
+          nud = nud - w(i, j, k, itu1)*chid/nu**2
+          distroughd = -(one*2*dist2invd/distrough**3) - ks(i, j, k)*&
+&           rsacr1*chid/distrough**2
+          d2walld(i, j, k) = d2walld(i, j, k) + distroughd
         end if
+        temp = w(i, j, k, irho)
+        rlvd(i, j, k) = rlvd(i, j, k) + nud/temp
+        wd(i, j, k, irho) = wd(i, j, k, irho) - rlv(i, j, k)*nud/temp**2
         call popcontrol2b(branch)
         if (branch .eq. 0) then
           if (strainprod .eq. 0.0_8) then
@@ -557,7 +579,7 @@ subroutine sasource()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dist, kslocal
+    real(kind=realtype) :: distrough
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
     real(kind=realtype) :: rr, gg, gg6, termfw, fwsa, term1, term2
@@ -672,22 +694,23 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! as the rough version of sa is supported, this looks slightly different
-! than the standard sa implementation
-        if (useroughsa) then
-          kslocal = ks(i, j, k)
-          dist = d2wall(i, j, k) + 0.03*kslocal
+        nu = rlv(i, j, k)/w(i, j, k, irho)
+        if (.not.useroughsa) then
+          dist2inv = one/d2wall(i, j, k)**2
+          chi = w(i, j, k, itu1)/nu
         else
-          kslocal = zero
-          dist = d2wall(i, j, k)
+          distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+          dist2inv = one/distrough**2
+          chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
         end if
-        nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/dist**2
-        chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-        fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+        if (.not.useroughsa) then
+          fv2 = one - chi/(one+chi*fv1)
+        else
+          fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+        end if
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
diff --git a/src/adjoint/outputReverse/turbbcroutines_b.f90 b/src/adjoint/outputReverse/turbbcroutines_b.f90
index 813a8922c..7b33716ba 100644
--- a/src/adjoint/outputReverse/turbbcroutines_b.f90
+++ b/src/adjoint/outputReverse/turbbcroutines_b.f90
@@ -2615,7 +2615,7 @@ function saroughfact(i, j, k)
 ! local variablse
     integer(kind=inttype) :: i, j, k
     if (.not.useroughsa) then
-      saroughfact = one
+      saroughfact = -one
       return
     else
       saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
diff --git a/src/adjoint/outputReverseFast/sa_fast_b.f90 b/src/adjoint/outputReverseFast/sa_fast_b.f90
index 3f52c40db..dcdde684f 100644
--- a/src/adjoint/outputReverseFast/sa_fast_b.f90
+++ b/src/adjoint/outputReverseFast/sa_fast_b.f90
@@ -37,7 +37,7 @@ subroutine sasource_fast_b()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dist, kslocal
+    real(kind=realtype) :: distrough
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: fv1d, fv2d, ft2d
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
@@ -67,6 +67,8 @@ subroutine sasource_fast_b()
     real(kind=realtype) :: temp2
     real(kind=realtype) :: temp1
     real(kind=realtype) :: temp0
+    real(kind=realtype) :: tempd11
+    real(kind=realtype) :: tempd10
     real(kind=realtype) :: min1
     real(kind=realtype) :: min1d
     real(kind=realtype) :: tempd9
@@ -184,22 +186,31 @@ subroutine sasource_fast_b()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! as the rough version of sa is supported, this looks slightly different
-! than the standard sa implementation
-        if (useroughsa) then
-          kslocal = ks(i, j, k)
-          dist = d2wall(i, j, k) + 0.03*kslocal
+        nu = rlv(i, j, k)/w(i, j, k, irho)
+        if (.not.useroughsa) then
+          dist2inv = one/d2wall(i, j, k)**2
+          chi = w(i, j, k, itu1)/nu
+myIntPtr = myIntPtr + 1
+ myIntStack(myIntPtr) = 0
         else
-          kslocal = zero
-          dist = d2wall(i, j, k)
+          distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+          dist2inv = one/distrough**2
+          chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
+myIntPtr = myIntPtr + 1
+ myIntStack(myIntPtr) = 1
         end if
-        nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/dist**2
-        chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-        fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+        if (.not.useroughsa) then
+          fv2 = one - chi/(one+chi*fv1)
+myIntPtr = myIntPtr + 1
+ myIntStack(myIntPtr) = 0
+        else
+          fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+myIntPtr = myIntPtr + 1
+ myIntStack(myIntPtr) = 1
+        end if
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
@@ -273,16 +284,16 @@ subroutine sasource_fast_b()
         end if
         term2 = dist2inv*(kar2inv*rsacb1*((one-ft2)*fv2+ft2)-rsacw1*fwsa&
 &         )
-        tempd8 = w(i, j, k, itu1)*scratchd(i, j, k, idvt)
+        tempd10 = w(i, j, k, itu1)*scratchd(i, j, k, idvt)
         temp3 = w(i, j, k, itu1)
-        term1d = tempd8
-        term2d = temp3*tempd8
+        term1d = tempd10
+        term2d = temp3*tempd10
         wd(i, j, k, itu1) = wd(i, j, k, itu1) + (term1+term2*temp3)*&
-&         scratchd(i, j, k, idvt) + term2*tempd8
+&         scratchd(i, j, k, idvt) + term2*tempd10
         scratchd(i, j, k, idvt) = 0.0_8
-        tempd9 = dist2inv*kar2inv*rsacb1*term2d
-        ft2d = (1.0_8-fv2)*tempd9
-        fv2d = (one-ft2)*tempd9
+        tempd11 = dist2inv*kar2inv*rsacb1*term2d
+        ft2d = (1.0_8-fv2)*tempd11
+        fv2d = (one-ft2)*tempd11
         fwsad = -(dist2inv*rsacw1*term2d)
 branch = myIntStack(myIntPtr)
  myIntPtr = myIntPtr - 1
@@ -303,9 +314,9 @@ subroutine sasource_fast_b()
 branch = myIntStack(myIntPtr)
  myIntPtr = myIntPtr - 1
         if (branch .eq. 0) rrd = 0.0_8
-        tempd7 = kar2inv*dist2inv*rrd/sst
-        wd(i, j, k, itu1) = wd(i, j, k, itu1) + tempd7
-        sstd = -(w(i, j, k, itu1)*tempd7/sst)
+        tempd9 = kar2inv*dist2inv*rrd/sst
+        wd(i, j, k, itu1) = wd(i, j, k, itu1) + tempd9
+        sstd = -(w(i, j, k, itu1)*tempd9/sst)
 branch = myIntStack(myIntPtr)
  myIntPtr = myIntPtr - 1
         if (branch .eq. 0) sstd = 0.0_8
@@ -323,10 +334,10 @@ subroutine sasource_fast_b()
           if (.not.two*strainmag2 .eq. 0.0_8) strainmag2d = strainmag2d &
 &             + two*y1d/(2.0*sqrt(two*strainmag2))
         end if
-        tempd6 = kar2inv*dist2inv*sstd
+        tempd8 = kar2inv*dist2inv*sstd
         ssd = ssd + sstd
-        wd(i, j, k, itu1) = wd(i, j, k, itu1) + fv2*tempd6
-        fv2d = fv2d + w(i, j, k, itu1)*tempd6
+        wd(i, j, k, itu1) = wd(i, j, k, itu1) + fv2*tempd8
+        fv2d = fv2d + w(i, j, k, itu1)*tempd8
 branch = myIntStack(myIntPtr)
  myIntPtr = myIntPtr - 1
         if (branch .eq. 0) then
@@ -334,17 +345,37 @@ subroutine sasource_fast_b()
         else
           chi2d = 0.0_8
         end if
-        temp1 = w(i, j, k, itu1)
-        temp0 = nu + temp1*fv1
-        tempd4 = w(i, j, k, itu1)*fv2d/temp0**2
-        wd(i, j, k, itu1) = wd(i, j, k, itu1) + fv1*tempd4 - fv2d/temp0
-        fv1d = temp1*tempd4
-        tempd5 = fv1d/(cv13+chi3)
-        chi3d = (1.0_8-chi3/(cv13+chi3))*tempd5
+branch = myIntStack(myIntPtr)
+ myIntPtr = myIntPtr - 1
+        if (branch .eq. 0) then
+          tempd5 = -(fv2d/(one+chi*fv1))
+          tempd6 = -(chi*tempd5/(one+chi*fv1))
+          chid = fv1*tempd6 + tempd5
+          fv1d = chi*tempd6
+          nud = 0.0_8
+        else
+          temp1 = w(i, j, k, itu1)
+          temp0 = nu + temp1*fv1
+          tempd7 = w(i, j, k, itu1)*fv2d/temp0**2
+          wd(i, j, k, itu1) = wd(i, j, k, itu1) + fv1*tempd7 - fv2d/&
+&           temp0
+          nud = tempd7
+          fv1d = temp1*tempd7
+          chid = 0.0_8
+        end if
+        tempd4 = fv1d/(cv13+chi3)
+        chi3d = (1.0_8-chi3/(cv13+chi3))*tempd4
         chi2d = chi2d + chi*chi3d
-        chid = 2*chi*chi2d + chi2*chi3d
-        nud = tempd4 - w(i, j, k, itu1)*chid/nu**2
-        wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
+        chid = chid + 2*chi*chi2d + chi2*chi3d
+branch = myIntStack(myIntPtr)
+ myIntPtr = myIntPtr - 1
+        if (branch .eq. 0) then
+          wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
+          nud = nud - w(i, j, k, itu1)*chid/nu**2
+        else
+          wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
+          nud = nud - w(i, j, k, itu1)*chid/nu**2
+        end if
         temp = w(i, j, k, irho)
         rlvd(i, j, k) = rlvd(i, j, k) + nud/temp
         wd(i, j, k, irho) = wd(i, j, k, irho) - rlv(i, j, k)*nud/temp**2
@@ -483,7 +514,7 @@ subroutine sasource()
     real(kind=realtype), parameter :: f23=two*third
 ! local variables.
     integer(kind=inttype) :: i, j, k, nn, ii
-    real(kind=realtype) :: dist, kslocal
+    real(kind=realtype) :: distrough
     real(kind=realtype) :: fv1, fv2, ft2
     real(kind=realtype) :: ss, sst, nu, dist2inv, chi, chi2, chi3
     real(kind=realtype) :: rr, gg, gg6, termfw, fwsa, term1, term2
@@ -598,22 +629,23 @@ subroutine sasource()
 ! wall distance squared, the ratio chi (ratio of nutilde
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
-! as the rough version of sa is supported, this looks slightly different
-! than the standard sa implementation
-        if (useroughsa) then
-          kslocal = ks(i, j, k)
-          dist = d2wall(i, j, k) + 0.03*kslocal
+        nu = rlv(i, j, k)/w(i, j, k, irho)
+        if (.not.useroughsa) then
+          dist2inv = one/d2wall(i, j, k)**2
+          chi = w(i, j, k, itu1)/nu
         else
-          kslocal = zero
-          dist = d2wall(i, j, k)
+          distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+          dist2inv = one/distrough**2
+          chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
         end if
-        nu = rlv(i, j, k)/w(i, j, k, irho)
-        dist2inv = one/dist**2
-        chi = w(i, j, k, itu1)/nu + rsacr1*kslocal/dist
         chi2 = chi*chi
         chi3 = chi*chi2
         fv1 = chi3/(chi3+cv13)
-        fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+        if (.not.useroughsa) then
+          fv2 = one - chi/(one+chi*fv1)
+        else
+          fv2 = one - w(i, j, k, itu1)/(nu+w(i, j, k, itu1)*fv1)
+        end if
 ! the function ft2, which is designed to keep a laminar
 ! solution laminar. when running in fully turbulent mode
 ! this function should be set to 0.0.
diff --git a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90 b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
index 8f640bcd9..15151db2a 100644
--- a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
@@ -1183,7 +1183,7 @@ function saroughfact(i, j, k)
 ! local variablse
     integer(kind=inttype) :: i, j, k
     if (.not.useroughsa) then
-      saroughfact = one
+      saroughfact = -one
       return
     else
       saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&

From 766fdd182ff64d3d30338489d750264139bf2fd7 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Wed, 4 Jan 2023 14:22:14 +0100
Subject: [PATCH 37/60] cleanup + tapenade

---
 src/adjoint/outputForward/sa_d.f90          | 17 ++++++-------
 src/adjoint/outputReverse/sa_b.f90          | 18 ++++++--------
 src/adjoint/outputReverseFast/sa_fast_b.f90 | 27 ++++++---------------
 src/modules/block.F90                       |  2 +-
 src/modules/wallDistanceData.F90            |  3 ---
 src/turbulence/sa.F90                       |  9 ++++---
 6 files changed, 31 insertions(+), 45 deletions(-)

diff --git a/src/adjoint/outputForward/sa_d.f90 b/src/adjoint/outputForward/sa_d.f90
index 927ee7619..13ed59973 100644
--- a/src/adjoint/outputForward/sa_d.f90
+++ b/src/adjoint/outputForward/sa_d.f90
@@ -279,21 +279,20 @@ subroutine sasource_d()
             nud = (rlvd(i, j, k)*w(i, j, k, irho)-rlv(i, j, k)*wd(i, j, &
 &             k, irho))/w(i, j, k, irho)**2
             nu = rlv(i, j, k)/w(i, j, k, irho)
+            chid = (wd(i, j, k, itu1)*nu-w(i, j, k, itu1)*nud)/nu**2
+            chi = w(i, j, k, itu1)/nu
             if (.not.useroughsa) then
               dist2invd = -(one*2*d2wall(i, j, k)*d2walld(i, j, k)/(&
 &               d2wall(i, j, k)**2)**2)
               dist2inv = one/d2wall(i, j, k)**2
-              chid = (wd(i, j, k, itu1)*nu-w(i, j, k, itu1)*nud)/nu**2
-              chi = w(i, j, k, itu1)/nu
             else
               distroughd = d2walld(i, j, k)
-              distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+              distrough = d2wall(i, j, k) + 0.03_realtype*ks(i, j, k)
               dist2invd = -(one*2*distrough*distroughd/(distrough**2)**2&
 &               )
               dist2inv = one/distrough**2
-              chid = (wd(i, j, k, itu1)*nu-w(i, j, k, itu1)*nud)/nu**2 -&
-&               rsacr1*ks(i, j, k)*distroughd/distrough**2
-              chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
+              chid = chid - rsacr1*ks(i, j, k)*distroughd/distrough**2
+              chi = chi + rsacr1*ks(i, j, k)/distrough
             end if
             chi2d = chid*chi + chi*chid
             chi2 = chi*chi
@@ -537,13 +536,13 @@ subroutine sasource()
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
             nu = rlv(i, j, k)/w(i, j, k, irho)
+            chi = w(i, j, k, itu1)/nu
             if (.not.useroughsa) then
               dist2inv = one/d2wall(i, j, k)**2
-              chi = w(i, j, k, itu1)/nu
             else
-              distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+              distrough = d2wall(i, j, k) + 0.03_realtype*ks(i, j, k)
               dist2inv = one/distrough**2
-              chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
+              chi = chi + rsacr1*ks(i, j, k)/distrough
             end if
             chi2 = chi*chi
             chi3 = chi*chi2
diff --git a/src/adjoint/outputReverse/sa_b.f90 b/src/adjoint/outputReverse/sa_b.f90
index 0ebb428cb..482d221d3 100644
--- a/src/adjoint/outputReverse/sa_b.f90
+++ b/src/adjoint/outputReverse/sa_b.f90
@@ -200,14 +200,14 @@ subroutine sasource_b()
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
         nu = rlv(i, j, k)/w(i, j, k, irho)
+        chi = w(i, j, k, itu1)/nu
         if (.not.useroughsa) then
           dist2inv = one/d2wall(i, j, k)**2
-          chi = w(i, j, k, itu1)/nu
           call pushcontrol1b(0)
         else
-          distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+          distrough = d2wall(i, j, k) + 0.03_realtype*ks(i, j, k)
           dist2inv = one/distrough**2
-          chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
+          chi = chi + rsacr1*ks(i, j, k)/distrough
           call pushcontrol1b(1)
         end if
         chi2 = chi*chi
@@ -364,17 +364,15 @@ subroutine sasource_b()
         chid = chid + 2*chi*chi2d + chi2*chi3d
         call popcontrol1b(branch)
         if (branch .eq. 0) then
-          wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
-          nud = nud - w(i, j, k, itu1)*chid/nu**2
           temp0 = d2wall(i, j, k)
           d2walld(i, j, k) = d2walld(i, j, k) - one*2*dist2invd/temp0**3
         else
-          wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
-          nud = nud - w(i, j, k, itu1)*chid/nu**2
           distroughd = -(one*2*dist2invd/distrough**3) - ks(i, j, k)*&
 &           rsacr1*chid/distrough**2
           d2walld(i, j, k) = d2walld(i, j, k) + distroughd
         end if
+        wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
+        nud = nud - w(i, j, k, itu1)*chid/nu**2
         temp = w(i, j, k, irho)
         rlvd(i, j, k) = rlvd(i, j, k) + nud/temp
         wd(i, j, k, irho) = wd(i, j, k, irho) - rlv(i, j, k)*nud/temp**2
@@ -695,13 +693,13 @@ subroutine sasource()
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
         nu = rlv(i, j, k)/w(i, j, k, irho)
+        chi = w(i, j, k, itu1)/nu
         if (.not.useroughsa) then
           dist2inv = one/d2wall(i, j, k)**2
-          chi = w(i, j, k, itu1)/nu
         else
-          distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+          distrough = d2wall(i, j, k) + 0.03_realtype*ks(i, j, k)
           dist2inv = one/distrough**2
-          chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
+          chi = chi + rsacr1*ks(i, j, k)/distrough
         end if
         chi2 = chi*chi
         chi3 = chi*chi2
diff --git a/src/adjoint/outputReverseFast/sa_fast_b.f90 b/src/adjoint/outputReverseFast/sa_fast_b.f90
index dcdde684f..1ed499e2e 100644
--- a/src/adjoint/outputReverseFast/sa_fast_b.f90
+++ b/src/adjoint/outputReverseFast/sa_fast_b.f90
@@ -187,17 +187,13 @@ subroutine sasource_fast_b()
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
         nu = rlv(i, j, k)/w(i, j, k, irho)
+        chi = w(i, j, k, itu1)/nu
         if (.not.useroughsa) then
           dist2inv = one/d2wall(i, j, k)**2
-          chi = w(i, j, k, itu1)/nu
-myIntPtr = myIntPtr + 1
- myIntStack(myIntPtr) = 0
         else
-          distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+          distrough = d2wall(i, j, k) + 0.03_realtype*ks(i, j, k)
           dist2inv = one/distrough**2
-          chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
-myIntPtr = myIntPtr + 1
- myIntStack(myIntPtr) = 1
+          chi = chi + rsacr1*ks(i, j, k)/distrough
         end if
         chi2 = chi*chi
         chi3 = chi*chi2
@@ -367,15 +363,8 @@ subroutine sasource_fast_b()
         chi3d = (1.0_8-chi3/(cv13+chi3))*tempd4
         chi2d = chi2d + chi*chi3d
         chid = chid + 2*chi*chi2d + chi2*chi3d
-branch = myIntStack(myIntPtr)
- myIntPtr = myIntPtr - 1
-        if (branch .eq. 0) then
-          wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
-          nud = nud - w(i, j, k, itu1)*chid/nu**2
-        else
-          wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
-          nud = nud - w(i, j, k, itu1)*chid/nu**2
-        end if
+        wd(i, j, k, itu1) = wd(i, j, k, itu1) + chid/nu
+        nud = nud - w(i, j, k, itu1)*chid/nu**2
         temp = w(i, j, k, irho)
         rlvd(i, j, k) = rlvd(i, j, k) + nud/temp
         wd(i, j, k, irho) = wd(i, j, k, irho) - rlv(i, j, k)*nud/temp**2
@@ -630,13 +619,13 @@ subroutine sasource()
 ! and nu) and the functions fv1 and fv2. the latter corrects
 ! the production term near a viscous wall.
         nu = rlv(i, j, k)/w(i, j, k, irho)
+        chi = w(i, j, k, itu1)/nu
         if (.not.useroughsa) then
           dist2inv = one/d2wall(i, j, k)**2
-          chi = w(i, j, k, itu1)/nu
         else
-          distrough = d2wall(i, j, k) + 0.03*ks(i, j, k)
+          distrough = d2wall(i, j, k) + 0.03_realtype*ks(i, j, k)
           dist2inv = one/distrough**2
-          chi = w(i, j, k, itu1)/nu + rsacr1*ks(i, j, k)/distrough
+          chi = chi + rsacr1*ks(i, j, k)/distrough
         end if
         chi2 = chi*chi
         chi3 = chi*chi2
diff --git a/src/modules/block.F90 b/src/modules/block.F90
index 0184afa3b..4b185e3ba 100644
--- a/src/modules/block.F90
+++ b/src/modules/block.F90
@@ -95,7 +95,7 @@ module block
 
      ! uSlip(:,:,3):  the 3 components of the velocity vector on
      !                a viscous wall.
-     ! KSTNS_Wall(:,:): Wall temperature for isothermal walls.
+     ! TNS_Wall(:,:): Wall temperature for isothermal walls.
      ! ksNS_Wall(:,:): Equivalent Sand Grain Roughness on viscous walls.
 
      real(kind=realType), dimension(:,:,:), pointer :: uSlip
diff --git a/src/modules/wallDistanceData.F90 b/src/modules/wallDistanceData.F90
index 1bf7c8048..2365fc689 100644
--- a/src/modules/wallDistanceData.F90
+++ b/src/modules/wallDistanceData.F90
@@ -44,9 +44,6 @@ module wallDistanceData
   real(kind=realType), dimension(:), pointer :: xVolume
 #endif
 
-
-
-  ! sa rough
   integer(kind=intType), dimension(:,:), allocatable :: nCellBlockOffset
 
 end module wallDistanceData
diff --git a/src/turbulence/sa.F90 b/src/turbulence/sa.F90
index c7824742a..2da8780d0 100644
--- a/src/turbulence/sa.F90
+++ b/src/turbulence/sa.F90
@@ -245,17 +245,20 @@ subroutine saSource
                 ! the production term near a viscous wall.
 
                 nu       = rlv(i,j,k)/w(i,j,k,irho)
+                chi      = w(i,j,k,itu1)/nu
+
                 if (.not. useRoughSA) then
                    dist2Inv = one/(d2Wall(i,j,k)**2)
-                   chi      = w(i,j,k,itu1)/nu
                 else
-                   distRough = d2Wall(i,j,k) + 0.03 * ks(i,j,k)
+                   distRough = d2Wall(i,j,k) + 0.03_realType * ks(i,j,k)
                    dist2Inv = one/(distRough**2)
-                   chi      = w(i,j,k,itu1)/nu + rsaCr1*ks(i,j,k)/distRough
+                   chi      = chi + rsaCr1*ks(i,j,k)/distRough
                 end if
+
                 chi2     = chi*chi
                 chi3     = chi*chi2
                 fv1      = chi3/(chi3+cv13)
+
                 if (.not. useRoughSA) then
                    fv2      = one - chi/(one + chi*fv1)
                 else

From b3df15261d8acc42a6649db91ce7943dd9ab0b38 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Wed, 11 Jan 2023 16:46:28 +0100
Subject: [PATCH 38/60] Use realtype instead of float (+tapenade)

---
 src/adjoint/outputForward/turbbcroutines_d.f90          | 4 ++--
 src/adjoint/outputReverse/turbbcroutines_b.f90          | 4 ++--
 src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90 | 4 ++--
 src/turbulence/turbBCRoutines.F90                       | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/adjoint/outputForward/turbbcroutines_d.f90 b/src/adjoint/outputForward/turbbcroutines_d.f90
index b4bdd82b9..3dd1d02a8 100644
--- a/src/adjoint/outputForward/turbbcroutines_d.f90
+++ b/src/adjoint/outputForward/turbbcroutines_d.f90
@@ -2236,8 +2236,8 @@ function saroughfact(i, j, k)
       saroughfact = -one
       return
     else
-      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
-&       d2wall(i, j, k)/0.03)
+      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j&
+&       , k)+d2wall(i, j, k)/0.03_realtype)
     end if
   end function saroughfact
 end module turbbcroutines_d
diff --git a/src/adjoint/outputReverse/turbbcroutines_b.f90 b/src/adjoint/outputReverse/turbbcroutines_b.f90
index 7b33716ba..a2c007ebd 100644
--- a/src/adjoint/outputReverse/turbbcroutines_b.f90
+++ b/src/adjoint/outputReverse/turbbcroutines_b.f90
@@ -2618,8 +2618,8 @@ function saroughfact(i, j, k)
       saroughfact = -one
       return
     else
-      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
-&       d2wall(i, j, k)/0.03)
+      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j&
+&       , k)+d2wall(i, j, k)/0.03_realtype)
     end if
   end function saroughfact
 end module turbbcroutines_b
diff --git a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90 b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
index 15151db2a..21d093e92 100644
--- a/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/turbbcroutines_fast_b.f90
@@ -1186,8 +1186,8 @@ function saroughfact(i, j, k)
       saroughfact = -one
       return
     else
-      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03)/(ks(i, j, k)+&
-&       d2wall(i, j, k)/0.03)
+      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j&
+&       , k)+d2wall(i, j, k)/0.03_realtype)
     end if
   end function saroughfact
 end module turbbcroutines_fast_b
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 3c673365f..085305ca6 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -1413,8 +1413,8 @@ function  saRoughFact(i,j,k)
       return
    end if
 
-   saRoughFact = (ks(i,j,k) - d2wall(i,j,k)/0.03) / &
-        (ks(i,j,k) + d2wall(i,j,k)/0.03)
+   saRoughFact = (ks(i,j,k) - d2wall(i,j,k)/0.03_realType) / &
+        (ks(i,j,k) + d2wall(i,j,k)/0.03_realType)
 
   end function  saRoughFact
 

From 3c716c7eae3ffcab78a6f231d8a0d5360ef093bd Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Sat, 21 Jan 2023 16:39:40 +0100
Subject: [PATCH 39/60] use correct wall cluster for overset case

---
 src/wallDistance/wallDistance.F90 | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index 0b504d2b6..bb40be60d 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -313,7 +313,7 @@ subroutine updateWallRoughness()
                       endif
 
                       ! set the ks value
-                      flowDoms(nn, level, sps)%ks(i, j, k) = ksGlobal(iCell)
+                      ks(i, j, k) = ksGlobal(iCell)
                    end do
                 end do
              end do
@@ -1953,6 +1953,10 @@ subroutine determineWallAssociation(level, sps)
        allocate(fullWall%conn(4, nCells))
        allocate(fullWall%ind(nNodes))
 
+       if (useRoughSA) then
+          allocate(fullWall%indCell(nCells))
+       end if
+
        nNodes = 0
        nCells = 0
        ii = 0
@@ -1969,6 +1973,9 @@ subroutine determineWallAssociation(level, sps)
           do j=1, walls(i)%nCells
              nCells = nCells + 1
              fullWall%conn(:, nCells) = walls(i)%conn(:, j) + ii
+             if (useRoughSA) then
+                fullWall%indCell(nCells) = walls(i)%indCell(j)
+             end if
           end do
 
           ! Increment the node offset
@@ -2082,7 +2089,7 @@ subroutine determineWallAssociation(level, sps)
                       end do
                       flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
                       if (useRoughSA) then
-                         flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = walls(c)%indCell(cellID)
+                         flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = fullWall%indCell(cellID)
                       end if
                    else
 
@@ -2103,7 +2110,7 @@ subroutine determineWallAssociation(level, sps)
                          end do
                          flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw2(1:2)
                          if (useRoughSA) then
-                            flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = walls(c)%indCell(cellID2)
+                            flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = fullWall%indCell(cellID2)
                          end if
                       else
                          ! The full wall distance is better. Take that.
@@ -2114,7 +2121,7 @@ subroutine determineWallAssociation(level, sps)
                          end do
                          flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
                          if (useRoughSA) then
-                            flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = walls(c)%indCell(cellID)
+                            flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = fullWall%indCell(cellID)
                          end if
                       end if
                    end if

From 30123beb74e2df8cc8ea98496fddea9ca93d7f0a Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Thu, 16 Feb 2023 16:40:34 +0100
Subject: [PATCH 40/60] Cleanup Comments

---
 src/bcdata/BCData.F90 | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/bcdata/BCData.F90 b/src/bcdata/BCData.F90
index 193b63db2..4bf51c0bc 100644
--- a/src/bcdata/BCData.F90
+++ b/src/bcdata/BCData.F90
@@ -1635,7 +1635,7 @@ subroutine setBCData_d(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
                    call setBCVarNamesAdiabaticWall
                    call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
                 case (NSWallIsothermal)
-                   call setBCVarNamesSupersonicInflow  ! possible bug?
+                   call setBCVarNamesSupersonicInflow
                    call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
                 case (SupersonicInflow)
                    call setBCVarNamesSupersonicInflow
@@ -3134,11 +3134,11 @@ subroutine setBCDataFineGrid_d(initializationPart)
              ! call the appropriate routine.
              select case (BCType(j))
 
-!            case (NSWallAdiabatic)
-!            TODO: This is not needed, right?
-!               call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
-!               call extractFromDataSet_d(bcVarArray, bcVarArrayd)
-!               call BCDataAdiabaticWall_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
+           ! This would be needed if wall roughness is differentiated
+           ! case (NSWallAdiabatic)
+           !    call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
+           !    call extractFromDataSet_d(bcVarArray, bcVarArrayd)
+           !    call BCDataAdiabaticWall_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
 
              case (NSWallIsothermal)
                 call setBCVarNamesIsothermalWall ! sets bcVarNames and nbcVar
@@ -3236,12 +3236,12 @@ subroutine setBCDataFineGrid_b(initializationPart)
              ! call the appropriate routine.
              select case (BCType(j))
 
-!            case (NSWallAdiabatic)
-!            TODO: This is not needed, right?
-!               call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
-!               call extractFromDataSet(bcVarArray)
-!               call BCDataAdiabaticWall_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
-!               call extractFromDataSet_b(bcVarArray, bcVarArrayd)
+           ! This would be needed if wall roughness is differentiated
+           ! case (NSWallAdiabatic)
+           !    call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
+           !    call extractFromDataSet(bcVarArray)
+           !    call BCDataAdiabaticWall_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
+           !    call extractFromDataSet_b(bcVarArray, bcVarArrayd)
 
              case (NSWallIsothermal)
                 call setBCVarNamesIsothermalWall ! sets bcVarNames and nbcVar

From 63b0fb68f1323a8008117145b5e3bbf5ba9fa04b Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Thu, 16 Feb 2023 16:40:48 +0100
Subject: [PATCH 41/60] Add tests

---
 .../reg_tests/refs/adjoint_rans_rough_sa.json | 791 ++++++++++++++++++
 tests/reg_tests/refs/funcs_rans_rough_sa.json | 788 +++++++++++++++++
 tests/reg_tests/test_adjoint.py               |  47 +-
 tests/reg_tests/test_functionals.py           |  69 ++
 4 files changed, 1694 insertions(+), 1 deletion(-)
 create mode 100644 tests/reg_tests/refs/adjoint_rans_rough_sa.json
 create mode 100644 tests/reg_tests/refs/funcs_rans_rough_sa.json

diff --git a/tests/reg_tests/refs/adjoint_rans_rough_sa.json b/tests/reg_tests/refs/adjoint_rans_rough_sa.json
new file mode 100644
index 000000000..e0b3421db
--- /dev/null
+++ b/tests/reg_tests/refs/adjoint_rans_rough_sa.json
@@ -0,0 +1,791 @@
+{
+    "Eval Functions Sens:": {
+        "mdo_tutorial_cd": {
+            "P_mdo_tutorial": 1.1906967920306818e-08,
+            "T_mdo_tutorial": 5.703210137725844e-08,
+            "alpha_mdo_tutorial": 0.005064588210334571,
+            "beta_mdo_tutorial": -0.0008845056168120849,
+            "mach_mdo_tutorial": 0.09103183719324323,
+            "shape": {
+                "__ndarray__": [
+                    [
+                        0.0014588467683968598,
+                        -0.00014951327884760212,
+                        -0.015675496092974146,
+                        -0.014075985970051375,
+                        0.0013474930848888166,
+                        0.0018275829889606954,
+                        -0.012563965621770756,
+                        -0.010466958623448562,
+                        0.0013093995527863652,
+                        -0.0032557348500170855,
+                        0.002277825768411675,
+                        0.006688700916670462,
+                        -0.0018114377012302114,
+                        0.005977863740562657,
+                        0.009835326794037596,
+                        0.007136308037146596,
+                        0.00051486740314912,
+                        -0.001967845348703995,
+                        0.002182197087400649,
+                        0.004428495984520651,
+                        0.00434797688639819,
+                        0.003642262509593754,
+                        0.003191110798437138,
+                        0.004015682626633113,
+                        0.0020419681575339715,
+                        0.002166621981060197,
+                        0.0019894668966113013,
+                        0.0016303554139183803,
+                        -0.019657990220678254,
+                        -0.020829878357254417,
+                        -0.020784582903082136,
+                        -0.01888479845407782,
+                        -0.00036947254958874,
+                        -0.00031304699767892383,
+                        -0.0003057421005384802,
+                        0.0002688066991534009,
+                        -0.017918305571589913,
+                        -0.018865736825431162,
+                        -0.01859183680327999,
+                        -0.0164887342965249,
+                        0.00091265077701028,
+                        0.0008121102434689316,
+                        0.0008901049453480803,
+                        0.0010405908745272647,
+                        -0.004064652115740351,
+                        -0.0037963135756424967,
+                        -0.003204166967230152,
+                        -0.0026293301388228283,
+                        0.003333173380565299,
+                        0.0034921446557946326,
+                        0.0033575207726116423,
+                        0.0030107476981468774,
+                        0.0074402621899147,
+                        0.007319652738596415,
+                        0.006915763888469176,
+                        0.006162240091788922,
+                        0.00042023051048211465,
+                        0.00313156137607408,
+                        0.005383636958473503,
+                        0.006824954530041592,
+                        0.010375635834801573,
+                        0.01198796332413367,
+                        0.011950339779730068,
+                        0.009432494720749723,
+                        0.011250799431237779,
+                        0.009854867862598673,
+                        0.008009279266530044,
+                        0.005465377456483178,
+                        0.006177656586328,
+                        0.005047038528601074,
+                        0.004344575288306905,
+                        0.004048991557116562
+                    ]
+                ],
+                "dtype": "float64",
+                "shape": [
+                    1,
+                    72
+                ]
+            },
+            "span": 0.0022966478428800424,
+            "twist": {
+                "__ndarray__": [
+                    [
+                        -0.0009085026908665598,
+                        -0.0011352775427239743,
+                        -0.0011130263520413698,
+                        -0.0009513309718359052,
+                        -0.000686800358123642,
+                        -0.00031510515859730894
+                    ]
+                ],
+                "dtype": "float64",
+                "shape": [
+                    1,
+                    6
+                ]
+            },
+            "xRef_mdo_tutorial": 0.0,
+            "yRef_mdo_tutorial": 0.0,
+            "zRef_mdo_tutorial": 0.0
+        },
+        "mdo_tutorial_cl": {
+            "P_mdo_tutorial": -2.2527598084618665e-07,
+            "T_mdo_tutorial": -1.3233798184444104e-07,
+            "alpha_mdo_tutorial": 0.11733097471393325,
+            "beta_mdo_tutorial": -0.005417244776871979,
+            "mach_mdo_tutorial": 0.4461752831328847,
+            "shape": {
+                "__ndarray__": [
+                    [
+                        0.000664512668947459,
+                        -0.009764051446158363,
+                        -0.38014815803026536,
+                        -0.3170998471501844,
+                        0.01262978825010657,
+                        0.023153961286768835,
+                        -0.19944001682277823,
+                        -0.169191221601281,
+                        0.035149811661999326,
+                        0.06625711164122611,
+                        0.09120798114669897,
+                        0.1595909431983622,
+                        0.0784037050728186,
+                        0.06061704069028219,
+                        0.04711837791039026,
+                        0.16186094246740526,
+                        0.027871468602502482,
+                        0.032536469116873794,
+                        0.041772976243304574,
+                        0.058567756425349775,
+                        0.03130211475024963,
+                        0.02349065555604541,
+                        0.04666963738532008,
+                        0.07533987952044943,
+                        0.008047492664585834,
+                        0.01353940603382281,
+                        0.01504377805258511,
+                        0.01571595124130868,
+                        -0.4638501532002112,
+                        -0.47399094658617624,
+                        -0.44823619262467873,
+                        -0.3688999595192084,
+                        0.008718247472931123,
+                        0.029550002917121544,
+                        0.043126333367589316,
+                        0.04434156612727114,
+                        -0.3956395006967429,
+                        -0.4079460890282394,
+                        -0.38620462938522176,
+                        -0.3163656383524239,
+                        0.06398782335322896,
+                        0.07479424398972617,
+                        0.07286141978007313,
+                        0.05846132788211428,
+                        0.08736644102793703,
+                        0.08697238548655578,
+                        0.08008641195454812,
+                        0.06426734524155492,
+                        0.10936456518484935,
+                        0.10829983178785824,
+                        0.10157510763622173,
+                        0.0825523738580159,
+                        0.1743947516759498,
+                        0.1692317944353174,
+                        0.15458475447925518,
+                        0.12034837358470284,
+                        0.11961118306482939,
+                        0.12724594458071078,
+                        0.11338927137762066,
+                        0.07770667985489071,
+                        0.056709733905460566,
+                        0.03836441806462418,
+                        0.024469816046477105,
+                        0.019976154473583273,
+                        0.04934741117499243,
+                        0.051117751963949146,
+                        0.0563070196687687,
+                        0.06050160881794041,
+                        0.18336405571947617,
+                        0.1839640798107625,
+                        0.17295069839871852,
+                        0.1403137146906817
+                    ]
+                ],
+                "dtype": "float64",
+                "shape": [
+                    1,
+                    72
+                ]
+            },
+            "span": 0.043457680475263105,
+            "twist": {
+                "__ndarray__": [
+                    [
+                        -0.024654343292180775,
+                        -0.02875083974451341,
+                        -0.025904215715540878,
+                        -0.02015846260061425,
+                        -0.01272474899214707,
+                        -0.004816129705518564
+                    ]
+                ],
+                "dtype": "float64",
+                "shape": [
+                    1,
+                    6
+                ]
+            },
+            "xRef_mdo_tutorial": 0.0,
+            "yRef_mdo_tutorial": 0.0,
+            "zRef_mdo_tutorial": 0.0
+        },
+        "mdo_tutorial_cmz": {
+            "P_mdo_tutorial": -3.6562060386387263e-07,
+            "T_mdo_tutorial": -2.3220688985930304e-07,
+            "alpha_mdo_tutorial": 0.15587229388606796,
+            "beta_mdo_tutorial": -0.00960134679308337,
+            "mach_mdo_tutorial": 0.8704444540175647,
+            "shape": {
+                "__ndarray__": [
+                    [
+                        -0.02904712834133137,
+                        -0.04708047818363903,
+                        -0.49032482175390585,
+                        -0.401316554603264,
+                        0.009545464221693022,
+                        0.02672530112488119,
+                        -0.44647159525464397,
+                        -0.37531454266609365,
+                        0.017827410830409764,
+                        0.08962439141045295,
+                        0.13469818406324752,
+                        0.23088231518746674,
+                        0.06582830189607722,
+                        0.07763781029298673,
+                        0.09150772425797266,
+                        0.2507770031456443,
+                        0.051190462308772576,
+                        0.07611282984010259,
+                        0.10139818246299213,
+                        0.14288689493950352,
+                        0.05984752177317515,
+                        0.06087119675464407,
+                        0.11793996845148985,
+                        0.18543384750328865,
+                        -0.035792580712111305,
+                        -0.029063640559724546,
+                        -0.01801377651314351,
+                        -0.0024105812920647285,
+                        -0.6512115102017961,
+                        -0.7548563235266409,
+                        -0.8101129239208131,
+                        -0.7525332782418399,
+                        -0.04452557149988305,
+                        -0.01709954630398286,
+                        0.016810542144914467,
+                        0.041434039606613654,
+                        -0.5458096626601696,
+                        -0.6405871067374069,
+                        -0.6906119609438893,
+                        -0.6398074456856482,
+                        0.05479140879876329,
+                        0.08331778931396966,
+                        0.09984183171688307,
+                        0.09540804372093747,
+                        0.13061487068539895,
+                        0.1467662689081624,
+                        0.15217931951376726,
+                        0.13677491289715185,
+                        0.17617158652365011,
+                        0.19557374496959673,
+                        0.20395944940757538,
+                        0.18339268089443714,
+                        0.27865713047217044,
+                        0.3033889798196525,
+                        0.30782550157857647,
+                        0.26624801836986134,
+                        0.12544590383563262,
+                        0.16080952345936206,
+                        0.16870887439203017,
+                        0.1338404820039562,
+                        0.08919775817735803,
+                        0.07838533829431213,
+                        0.06617921442228045,
+                        0.061226194962847716,
+                        0.1122797518483396,
+                        0.12801585006675376,
+                        0.14524155735561856,
+                        0.15534721746744484,
+                        0.31101156418400344,
+                        0.34532436046246506,
+                        0.3561771297416096,
+                        0.3173191720433378
+                    ]
+                ],
+                "dtype": "float64",
+                "shape": [
+                    1,
+                    72
+                ]
+            },
+            "span": 0.06413599860398977,
+            "twist": {
+                "__ndarray__": [
+                    [
+                        -0.024967200640317398,
+                        -0.03216020114952957,
+                        -0.03408738804686446,
+                        -0.03126988598549719,
+                        -0.022967856493619272,
+                        -0.009658516327858274
+                    ]
+                ],
+                "dtype": "float64",
+                "shape": [
+                    1,
+                    6
+                ]
+            },
+            "xRef_mdo_tutorial": -0.12640436393489507,
+            "yRef_mdo_tutorial": 0.003590207077957915,
+            "zRef_mdo_tutorial": 0.0
+        },
+        "mdo_tutorial_drag": {
+            "P_mdo_tutorial": 0.505615746299267,
+            "T_mdo_tutorial": 0.02325084708947911,
+            "alpha_mdo_tutorial": 2064.7313215891973,
+            "beta_mdo_tutorial": -360.59524986195044,
+            "mach_mdo_tutorial": 62149.93506781718,
+            "shape": {
+                "__ndarray__": [
+                    [
+                        594.7426505400322,
+                        -60.95357352058238,
+                        -6390.58624718373,
+                        -5738.497960270588,
+                        549.345980847465,
+                        745.0690329394852,
+                        -5122.077504683481,
+                        -4267.169691607504,
+                        533.8160096799684,
+                        -1327.2979836548932,
+                        928.6240092660958,
+                        2726.849589708214,
+                        -738.4869220374837,
+                        2437.055489752636,
+                        4009.6660273932557,
+                        2909.330060583923,
+                        209.90114291584837,
+                        -802.2511917596289,
+                        889.6381085914871,
+                        1805.409242969377,
+                        1772.583217046823,
+                        1484.8775799111868,
+                        1300.9520503068206,
+                        1637.1134932257708,
+                        832.4695784634872,
+                        883.2884492386564,
+                        811.0658644105258,
+                        664.6632951462427,
+                        -8014.169453166152,
+                        -8491.924808685506,
+                        -8473.45875792854,
+                        -7698.954633758449,
+                        -150.62656901632556,
+                        -127.62300001372478,
+                        -124.64493954750833,
+                        109.5871151108405,
+                        -7304.9348154258005,
+                        -7691.183588991789,
+                        -7579.520027961214,
+                        -6722.127198007299,
+                        372.06946877170367,
+                        331.081104057561,
+                        362.87798411953634,
+                        424.22808772726233,
+                        -1657.0773745448485,
+                        -1547.6811185177953,
+                        -1306.2747892003838,
+                        -1071.9253109952701,
+                        1358.86812378893,
+                        1423.6775332743846,
+                        1368.7940685782762,
+                        1227.4216215805127,
+                        3033.246089584422,
+                        2984.076028470986,
+                        2819.418622051113,
+                        2512.2220406205115,
+                        171.3195745134269,
+                        1276.6749417978572,
+                        2194.8011152304007,
+                        2782.397462807346,
+                        4229.939217131979,
+                        4887.252887982736,
+                        4871.914521400283,
+                        3845.4394477552814,
+                        4586.725912127057,
+                        4017.6325302242367,
+                        3265.2229713789347,
+                        2228.12508145904,
+                        2518.507037114204,
+                        2057.576667340112,
+                        1771.196453536957,
+                        1650.6928780052563
+                    ]
+                ],
+                "dtype": "float64",
+                "shape": [
+                    1,
+                    72
+                ]
+            },
+            "span": 936.2973925853767,
+            "twist": {
+                "__ndarray__": [
+                    [
+                        -370.37837701247804,
+                        -462.82994861770794,
+                        -453.7585832002331,
+                        -387.8386105980703,
+                        -279.9947699998464,
+                        -128.46207105695308
+                    ]
+                ],
+                "dtype": "float64",
+                "shape": [
+                    1,
+                    6
+                ]
+            },
+            "xRef_mdo_tutorial": 0.0,
+            "yRef_mdo_tutorial": 0.0,
+            "zRef_mdo_tutorial": 0.0
+        },
+        "mdo_tutorial_lift": {
+            "P_mdo_tutorial": 8.270592846885847,
+            "T_mdo_tutorial": -0.05395154843834504,
+            "alpha_mdo_tutorial": 47833.49177137628,
+            "beta_mdo_tutorial": -2208.502350635163,
+            "mach_mdo_tutorial": 600018.4073654747,
+            "shape": {
+                "__ndarray__": [
+                    [
+                        270.908524876344,
+                        -3980.608493570493,
+                        -154978.80106577938,
+                        -129275.2656861883,
+                        5148.912073803472,
+                        9439.40693739,
+                        -81307.7060583101,
+                        -68975.87722240997,
+                        14329.875218364088,
+                        27011.699273895196,
+                        37183.66975388621,
+                        65062.03572310845,
+                        31963.622484085907,
+                        24712.35514861459,
+                        19209.22030650783,
+                        65987.46902511171,
+                        11362.640319868198,
+                        13264.467729567019,
+                        17030.006954870347,
+                        23876.902939486547,
+                        12761.246141381704,
+                        9576.670457088181,
+                        19026.277769246855,
+                        30714.562082896737,
+                        3280.801809498298,
+                        5519.745051868973,
+                        6133.04743647798,
+                        6407.079002056791,
+                        -189102.43045666313,
+                        -193236.62910425203,
+                        -182736.93100922933,
+                        -150393.13549679087,
+                        3554.2551297638493,
+                        12046.945189252045,
+                        17581.743587298828,
+                        18077.169678765833,
+                        -161294.31164404895,
+                        -166311.46157503212,
+                        -157447.90330776715,
+                        -128975.94344351538,
+                        26086.555824644354,
+                        30492.117389731626,
+                        29704.143615940033,
+                        23833.51415098023,
+                        35617.55067826964,
+                        35456.90211515942,
+                        32649.62842562994,
+                        26200.51130807692,
+                        44585.74593455944,
+                        44151.67542327428,
+                        41410.13988113458,
+                        33654.951774435576,
+                        71097.25236325164,
+                        68992.41795539077,
+                        63021.112706102766,
+                        49063.62494301142,
+                        48763.08711186904,
+                        51875.626686665215,
+                        46226.53815522764,
+                        31679.45924324079,
+                        23119.42431857829,
+                        15640.40595658914,
+                        9975.854605827382,
+                        8143.878655788661,
+                        20117.952587818672,
+                        20839.685120662565,
+                        22955.24577856176,
+                        24665.295882896382,
+                        74753.85823571598,
+                        74998.47605725183,
+                        70508.5407231889,
+                        57203.095205096804
+                    ]
+                ],
+                "dtype": "float64",
+                "shape": [
+                    1,
+                    72
+                ]
+            },
+            "span": 17716.82717615593,
+            "twist": {
+                "__ndarray__": [
+                    [
+                        -10051.082673356312,
+                        -11721.142347043358,
+                        -10560.630662911663,
+                        -8218.202033018604,
+                        -5187.625669118539,
+                        -1963.4397583458262
+                    ]
+                ],
+                "dtype": "float64",
+                "shape": [
+                    1,
+                    6
+                ]
+            },
+            "xRef_mdo_tutorial": 0.0,
+            "yRef_mdo_tutorial": 0.0,
+            "zRef_mdo_tutorial": 0.0
+        }
+    },
+    "Norm of residual": 7.43989104883568e-13,
+    "metadata": {
+        "ADPC": false,
+        "AGMGLevels": 1,
+        "AGMGNSmooth": 3,
+        "ANKADPC": false,
+        "ANKASMOverlap": 1,
+        "ANKCFL0": 5.0,
+        "ANKCFLCutback": 0.5,
+        "ANKCFLExponent": 0.5,
+        "ANKCFLFactor": 10.0,
+        "ANKCFLLimit": 100000.0,
+        "ANKCFLMin": 1.0,
+        "ANKConstCFLStep": 0.4,
+        "ANKCoupledSwitchTol": 1e-16,
+        "ANKInnerPreconIts": 1,
+        "ANKJacobianLag": 10,
+        "ANKLinResMax": 0.1,
+        "ANKLinearSolveTol": 0.05,
+        "ANKMaxIter": 40,
+        "ANKNSubiterTurb": 1,
+        "ANKOuterPreconIts": 1,
+        "ANKPCILUFill": 2,
+        "ANKPCUpdateCutoff": 1e-06,
+        "ANKPCUpdateTol": 0.5,
+        "ANKPhysicalLSTol": 0.2,
+        "ANKPhysicalLSTolTurb": 0.99,
+        "ANKSecondOrdSwitchTol": 0.01,
+        "ANKStepFactor": 1.0,
+        "ANKStepMin": 0.01,
+        "ANKSubspaceSize": -1,
+        "ANKSwitchTol": 0.01,
+        "ANKTurbCFLScale": 1.0,
+        "ANKTurbKSPDebug": false,
+        "ANKUnsteadyLSTol": 1.0,
+        "ANKUseApproxSA": false,
+        "ANKUseFullVisc": true,
+        "ANKUseMatrixFree": true,
+        "ANKUseTurbDADI": true,
+        "ASMOverlap": 1,
+        "CFL": 1.5,
+        "CFLCoarse": 1.25,
+        "CFLLimit": 1.5,
+        "GMRESOrthogonalizationType": "modified Gram-Schmidt",
+        "ILUFill": 2,
+        "L2Convergence": 1e-15,
+        "L2ConvergenceCoarse": 0.01,
+        "L2ConvergenceRel": 1e-16,
+        "MGCycle": "2w",
+        "MGStartLevel": -1,
+        "NKADPC": false,
+        "NKASMOverlap": 1,
+        "NKFixedStep": 0.25,
+        "NKInnerPreconIts": 1,
+        "NKJacobianLag": 2,
+        "NKLS": "cubic",
+        "NKLinearSolveTol": 1e-06,
+        "NKOuterPreconIts": 1,
+        "NKPCILUFill": 2,
+        "NKSubspaceSize": 60,
+        "NKSwitchTol": 1e-05,
+        "NKUseEW": false,
+        "NKViscPC": false,
+        "RKReset": false,
+        "TSStability": false,
+        "adjointDivTol": 100000.0,
+        "adjointL2Convergence": 1e-16,
+        "adjointL2ConvergenceAbs": 1e-16,
+        "adjointL2ConvergenceRel": 1e-16,
+        "adjointMaxIter": 500,
+        "adjointMaxL2DeviationFactor": 1.0,
+        "adjointMonitorStep": 10,
+        "adjointSolver": "GMRES",
+        "adjointSubspaceSize": 100,
+        "alphaFollowing": true,
+        "alphaMode": false,
+        "altitudeMode": false,
+        "applyAdjointPCSubspaceSize": 20,
+        "applyPCSubspaceSize": 10,
+        "approxPC": true,
+        "backgroundVolScale": 1.0,
+        "betaMode": false,
+        "blockSplitting": true,
+        "cavExponent": 0,
+        "cavSensorOffset": 0.0,
+        "cavSensorSharpness": 10.0,
+        "cavitationNumber": 1.4,
+        "closedSurfaceFamilies": null,
+        "coarseDiscretization": "central plus scalar dissipation",
+        "computeCavitation": false,
+        "coupledSolution": false,
+        "cpMinRho": 100.0,
+        "cutCallback": null,
+        "debugZipper": false,
+        "deltaT": 0.01,
+        "designSurfaceFamily": null,
+        "discretization": "central plus scalar dissipation",
+        "dissipationLumpingParameter": 6.0,
+        "dissipationScalingExponent": 0.67,
+        "eddyVisInfRatio": 0.009,
+        "equationMode": "steady",
+        "equationType": "RANS",
+        "eulerWallTreatment": "linear pressure extrapolation",
+        "explicitSurfaceCallback": null,
+        "firstRun": true,
+        "flowType": "external",
+        "forcesAsTractions": true,
+        "frozenTurbulence": false,
+        "globalPreconditioner": "additive Schwarz",
+        "gridFile": "input_files/mdo_tutorial_rough.cgns",
+        "gridPrecision": "double",
+        "gridPrecisionSurface": "single",
+        "infChangeCorrection": true,
+        "innerPreconIts": 1,
+        "isoVariables": [],
+        "isosurface": {},
+        "liftIndex": 2,
+        "limiter": "van Albada",
+        "loadBalanceIter": 10,
+        "loadImbalance": 0.1,
+        "localPreconditioner": "ILU",
+        "lowSpeedPreconditioner": false,
+        "machMode": false,
+        "matrixOrdering": "RCM",
+        "maxL2DeviationFactor": 1.0,
+        "meshSurfaceFamily": null,
+        "monitorVariables": [
+            "resrho",
+            "resturb",
+            "cd"
+        ],
+        "nCycles": 1000,
+        "nCyclesCoarse": 100,
+        "nFloodIter": -1,
+        "nRKReset": 5,
+        "nRefine": 10,
+        "nSaveSurface": 1,
+        "nSaveVolume": 1,
+        "nSubiter": 3,
+        "nSubiterTurb": 3,
+        "nTimeStepsCoarse": 48,
+        "nTimeStepsFine": 400,
+        "nearWallDist": 0.1,
+        "numberSolutions": true,
+        "outerPreconIts": 3,
+        "outputDirectory": "tests/output_files",
+        "outputSurfaceFamily": "allSurfaces",
+        "overlapFactor": 0.9,
+        "oversetDebugPrint": false,
+        "oversetLoadBalance": true,
+        "oversetPriority": {},
+        "oversetProjTol": 1e-12,
+        "oversetUpdateMode": "frozen",
+        "pMode": false,
+        "partitionLikeNProc": -1,
+        "partitionOnly": false,
+        "preconditionerSide": "right",
+        "printAllOptions": true,
+        "printIntro": true,
+        "printIterations": true,
+        "printTiming": true,
+        "printWarnings": true,
+        "qMode": false,
+        "rMode": false,
+        "resAveraging": "never",
+        "restartAdjoint": true,
+        "restartFile": "input_files/mdo_tutorial_rough.cgns",
+        "restrictionRelaxation": 0.8,
+        "selfZipCutoff": 120.0,
+        "sepSensorOffset": 0.0,
+        "sepSensorSharpness": 10.0,
+        "setMonitor": true,
+        "skipAfterFailedAdjoint": true,
+        "smoothParameter": 1.5,
+        "smoother": "DADI",
+        "solutionPrecision": "single",
+        "solutionPrecisionSurface": "single",
+        "storeConvHist": true,
+        "storeRindLayer": true,
+        "surfaceVariables": [
+            "cp",
+            "vx",
+            "vy",
+            "vz",
+            "mach"
+        ],
+        "timeAccuracy": 2,
+        "timeIntegrationScheme": "BDF",
+        "timeIntervals": 1,
+        "timeLimit": -1.0,
+        "turbResScale": 10000.0,
+        "turbulenceModel": "SA",
+        "turbulenceOrder": "first order",
+        "turbulenceProduction": "strain",
+        "useALE": true,
+        "useANKSolver": true,
+        "useApproxWallDistance": true,
+        "useBlockettes": false,
+        "useDiagTSPC": true,
+        "useExternalDynamicMesh": false,
+        "useGridMotion": false,
+        "useLinResMonitor": false,
+        "useMatrixFreedrdw": true,
+        "useNKSolver": true,
+        "useOversetWallScaling": false,
+        "useQCR": false,
+        "useRotationSA": false,
+        "useRoughSA": true,
+        "useTSInterpolatedGridVelocity": false,
+        "useWallFunctions": false,
+        "useZipperMesh": true,
+        "useft2SA": true,
+        "verifyExtra": true,
+        "verifySpatial": true,
+        "verifyState": true,
+        "vis2": 0.25,
+        "vis2Coarse": 0.5,
+        "vis4": 0.0156,
+        "viscPC": false,
+        "viscWallTreatment": "constant pressure extrapolation",
+        "viscousSurfaceVelocities": true,
+        "volumeVariables": [
+            "resrho"
+        ],
+        "wallDistCutoff": 1e+20,
+        "windAxis": false,
+        "writeSolutionEachIter": false,
+        "writeSurfaceSolution": true,
+        "writeTecplotSurfaceSolution": false,
+        "writeVolumeSolution": true,
+        "zipperSurfaceFamily": null
+    }
+}
\ No newline at end of file
diff --git a/tests/reg_tests/refs/funcs_rans_rough_sa.json b/tests/reg_tests/refs/funcs_rans_rough_sa.json
new file mode 100644
index 000000000..ccf3b126e
--- /dev/null
+++ b/tests/reg_tests/refs/funcs_rans_rough_sa.json
@@ -0,0 +1,788 @@
+{
+    "Dot product test for (w, xV) -> (dw, F)": 25052510162.91482,
+    "Dot product test for Xv -> R": -7422557.015466748,
+    "Dot product test for w -> F": 48678.90552195662,
+    "Dot product test for w -> R": 25061252292.45091,
+    "Dot product test for xV -> F": -1368251.4261434954,
+    "Eval Functions:": {
+        "mdo_tutorial_cd": 0.024566400785789793,
+        "mdo_tutorial_cfx": 0.011668173003364507,
+        "mdo_tutorial_cfy": 0.4108141827884017,
+        "mdo_tutorial_cfz": 0.0075654034410337,
+        "mdo_tutorial_cl": 0.41024496461720367,
+        "mdo_tutorial_cmx": -0.7922014856117325,
+        "mdo_tutorial_cmy": -0.008981003314113318,
+        "mdo_tutorial_cmz": 0.6295255266022379,
+        "mdo_tutorial_drag": 10015.230272350784,
+        "mdo_tutorial_fx": 4756.880770011643,
+        "mdo_tutorial_fy": 167480.72603917564,
+        "mdo_tutorial_fz": 3084.2636748406194,
+        "mdo_tutorial_lift": 167248.66717514166,
+        "mdo_tutorial_mx": -1049635.2803761212,
+        "mdo_tutorial_my": -11899.470151067582,
+        "mdo_tutorial_mz": 834096.1417269013,
+        "mdo_tutorial_sepsensor": 0.016301194542570726,
+        "mdo_tutorial_sepsensoravgx": 0.09540342694812158,
+        "mdo_tutorial_sepsensoravgy": 9.994583497283502e-05,
+        "mdo_tutorial_sepsensoravgz": 0.05019620000103363
+    },
+    "Norm of residual": 2.287162877608185e-16,
+    "Norm of state vector": 472.577787711008,
+    "Sum of Forces x": 4756.880770011647,
+    "Sum of Forces y": 167480.72603917564,
+    "Sum of Forces z": 3084.263674840622,
+    "Sum of Tractions x": 336771.10695490666,
+    "Sum of Tractions y": 1650799.5101746526,
+    "Sum of Tractions z": 279940.140789498,
+    "Total number of adjoint state DOF": {
+        "__ndarray__": 145152,
+        "dtype": "int64",
+        "shape": []
+    },
+    "Total number of spatial DOF": {
+        "__ndarray__": 91125,
+        "dtype": "int64",
+        "shape": []
+    },
+    "Total number of state DOF": {
+        "__ndarray__": 145152,
+        "dtype": "int64",
+        "shape": []
+    },
+    "dFuncs/dP": {
+        "cd": 1.0255863078366303e-07,
+        "cfx": 1.0254457111038081e-07,
+        "cfy": 2.0585057746076828e-09,
+        "cfz": 3.0809570252248843e-09,
+        "cl": -1.163512791647045e-09,
+        "cmx": -5.0258036886875666e-09,
+        "cmy": 1.8366346879812804e-07,
+        "cmz": 2.0700897670852983e-09,
+        "drag": 0.542572616215423,
+        "fx": 0.27964940925086224,
+        "fy": 8.374875513592976,
+        "fz": 0.15546922830207469,
+        "lift": 8.361959017862185,
+        "mx": -52.48842300766143,
+        "my": -0.3516267579346115,
+        "mz": 41.70754987248287,
+        "sepsensor": 0.0,
+        "sepsensoravgx": 0.0,
+        "sepsensoravgy": 0.0,
+        "sepsensoravgz": 0.0
+    },
+    "dFuncs/dT": {
+        "cd": -6.434671811482668e-21,
+        "cfx": -6.433182828244117e-21,
+        "cfy": -1.4846411730779536e-22,
+        "cfz": -1.846656102488088e-22,
+        "cl": 5.368029669547891e-23,
+        "cmx": 3.5502025762938107e-22,
+        "cmy": -1.2030208599706458e-20,
+        "cmz": -1.3651932049147963e-22,
+        "drag": -2.6232870041052545e-15,
+        "fx": -2.6226799754185616e-15,
+        "fy": -6.052585134404201e-17,
+        "fz": -7.528447598623437e-17,
+        "lift": 2.1884383356812845e-17,
+        "mx": 4.703876405486247e-16,
+        "my": -1.5939545186267067e-14,
+        "mz": -1.8088263887839085e-16,
+        "sepsensor": 0.0,
+        "sepsensoravgx": 0.0,
+        "sepsensoravgy": 0.0,
+        "sepsensoravgz": 0.0
+    },
+    "dFuncs/dXv * xVDot": {
+        "cd": 4.252968273207523,
+        "cfx": 4.249035399663728,
+        "cfy": 0.19195702983176227,
+        "cfz": 0.3506482601505134,
+        "cl": 0.058396883371890924,
+        "cmx": -1.0927832721306252,
+        "cmy": -1.1999451606004616,
+        "cmz": 1.3595113636997629,
+        "drag": 1733850.105621243,
+        "fx": 1732246.7517349084,
+        "fy": 78257.04192181284,
+        "fz": 142952.28269816132,
+        "lift": 23807.241413052492,
+        "mx": -1447894.124242193,
+        "my": -1589879.3399891877,
+        "mz": 1801298.1764476378,
+        "sepsensor": -0.11694957310130713,
+        "sepsensoravgx": -0.6029863600829567,
+        "sepsensoravgy": 0.007262037328436887,
+        "sepsensoravgz": -0.09612647922522867
+    },
+    "dFuncs/dalpha": {
+        "cd": 0.007160125372297845,
+        "cfx": 0.0,
+        "cfy": 0.0,
+        "cfz": 0.0,
+        "cl": -0.0004287645790765541,
+        "cmx": 0.0,
+        "cmy": 0.0,
+        "cmz": 0.0,
+        "drag": 2919.0399117783854,
+        "fx": 0.0,
+        "fy": 0.0,
+        "fz": 0.0,
+        "lift": -174.79874359792956,
+        "mx": 0.0,
+        "my": 0.0,
+        "mz": 0.0,
+        "sepsensor": 0.000889013327828802,
+        "sepsensoravgx": 0.006211849207427457,
+        "sepsensoravgy": 1.1629154598405563e-06,
+        "sepsensoravgz": 0.006150040814983931
+    },
+    "dFuncs/dbeta": {
+        "cd": -0.007561670371126665,
+        "cfx": 0.0,
+        "cfy": 0.0,
+        "cfz": 0.0,
+        "cl": 0.00023763506481515236,
+        "cmx": 0.0,
+        "cmy": 0.0,
+        "cmz": 0.0,
+        "drag": -3082.741776900919,
+        "fx": 0.0,
+        "fy": 0.0,
+        "fz": 0.0,
+        "lift": 96.87906322384131,
+        "mx": 0.0,
+        "my": 0.0,
+        "mz": 0.0,
+        "sepsensor": 0.18136049395962298,
+        "sepsensoravgx": 1.106075491578891,
+        "sepsensoravgy": 0.0011646306998538993,
+        "sepsensoravgz": 0.7192972767550524
+    },
+    "dFuncs/dmach": {
+        "cd": -0.06141600196447448,
+        "cfx": -0.02917043250841127,
+        "cfy": -1.0270354569710043,
+        "cfz": -0.018913508602584252,
+        "cl": -1.0256124115430092,
+        "cmx": 1.9805037140293313,
+        "cmy": 0.022452508285283296,
+        "cmz": -1.573813816505595,
+        "drag": 0.0,
+        "fx": 0.0,
+        "fy": 0.0,
+        "fz": 0.0,
+        "lift": 0.0,
+        "mx": 0.0,
+        "my": 0.0,
+        "mz": 0.0,
+        "sepsensor": 0.0,
+        "sepsensoravgx": 0.0,
+        "sepsensoravgy": 0.0,
+        "sepsensoravgz": 0.0
+    },
+    "dFuncs/dw * wDot": {
+        "cd": 0.031101767260431203,
+        "cfx": 0.028800395325415784,
+        "cfy": 0.07371942797663707,
+        "cfz": 0.021716000987844,
+        "cl": 0.0727784096101363,
+        "cmx": -0.15842697054164168,
+        "cmy": 0.005024771647054986,
+        "cmz": 0.12014382033187082,
+        "drag": 12679.568476732593,
+        "fx": 11741.345166265506,
+        "fy": 30053.936397515397,
+        "fz": 8853.179282724243,
+        "lift": 29670.30202986037,
+        "mx": -209909.39888885355,
+        "my": 6657.621441481974,
+        "mz": 159185.75618691556,
+        "sepsensor": -12.044419253289895,
+        "sepsensoravgx": -68.10234813562754,
+        "sepsensoravgy": -0.08084780944322403,
+        "sepsensoravgz": -28.399498948465343
+    },
+    "dFuncs/dxRef": {
+        "cd": 0.0,
+        "cfx": 0.0,
+        "cfy": 0.0,
+        "cfz": 0.0,
+        "cl": 0.0,
+        "cmx": 0.0,
+        "cmy": 0.002327816443394985,
+        "cmz": -0.12640436393489288,
+        "drag": 0.0,
+        "fx": 0.0,
+        "fy": 0.0,
+        "fz": 0.0,
+        "lift": 0.0,
+        "mx": 0.0,
+        "my": 3084.2636748406194,
+        "mz": -167480.72603917564,
+        "sepsensor": 0.0,
+        "sepsensoravgx": 0.0,
+        "sepsensoravgy": 0.0,
+        "sepsensoravgz": 0.0
+    },
+    "dFuncs/dyRef": {
+        "cd": 0.0,
+        "cfx": 0.0,
+        "cfy": 0.0,
+        "cfz": 0.0,
+        "cl": 0.0,
+        "cmx": -0.002327816443394985,
+        "cmy": 0.0,
+        "cmz": 0.0035902070779583106,
+        "drag": 0.0,
+        "fx": 0.0,
+        "fy": 0.0,
+        "fz": 0.0,
+        "lift": 0.0,
+        "mx": -3084.2636748406194,
+        "my": 0.0,
+        "mz": 4756.880770011643,
+        "sepsensor": 0.0,
+        "sepsensoravgx": 0.0,
+        "sepsensoravgy": 0.0,
+        "sepsensoravgz": 0.0
+    },
+    "dFuncs/dzRef": {
+        "cd": 0.0,
+        "cfx": 0.0,
+        "cfy": 0.0,
+        "cfz": 0.0,
+        "cl": 0.0,
+        "cmx": 0.12640436393489288,
+        "cmy": -0.0035902070779583106,
+        "cmz": 0.0,
+        "drag": 0.0,
+        "fx": 0.0,
+        "fy": 0.0,
+        "fz": 0.0,
+        "lift": 0.0,
+        "mx": 167480.72603917564,
+        "my": -4756.880770011643,
+        "mz": 0.0,
+        "sepsensor": 0.0,
+        "sepsensoravgx": 0.0,
+        "sepsensoravgy": 0.0,
+        "sepsensoravgz": 0.0
+    },
+    "metadata": {
+        "ADPC": false,
+        "AGMGLevels": 1,
+        "AGMGNSmooth": 3,
+        "ANKADPC": false,
+        "ANKASMOverlap": 1,
+        "ANKCFL0": 5.0,
+        "ANKCFLCutback": 0.5,
+        "ANKCFLExponent": 0.5,
+        "ANKCFLFactor": 10.0,
+        "ANKCFLLimit": 100000.0,
+        "ANKCFLMin": 1.0,
+        "ANKConstCFLStep": 0.4,
+        "ANKCoupledSwitchTol": 1e-16,
+        "ANKInnerPreconIts": 1,
+        "ANKJacobianLag": 10,
+        "ANKLinResMax": 0.1,
+        "ANKLinearSolveTol": 0.05,
+        "ANKMaxIter": 40,
+        "ANKNSubiterTurb": 1,
+        "ANKOuterPreconIts": 1,
+        "ANKPCILUFill": 2,
+        "ANKPCUpdateCutoff": 1e-06,
+        "ANKPCUpdateTol": 0.5,
+        "ANKPhysicalLSTol": 0.2,
+        "ANKPhysicalLSTolTurb": 0.99,
+        "ANKSecondOrdSwitchTol": 0.01,
+        "ANKStepFactor": 1.0,
+        "ANKStepMin": 0.01,
+        "ANKSubspaceSize": -1,
+        "ANKSwitchTol": 0.01,
+        "ANKTurbCFLScale": 1.0,
+        "ANKTurbKSPDebug": false,
+        "ANKUnsteadyLSTol": 1.0,
+        "ANKUseApproxSA": false,
+        "ANKUseFullVisc": true,
+        "ANKUseMatrixFree": true,
+        "ANKUseTurbDADI": true,
+        "ASMOverlap": 1,
+        "CFL": 1.5,
+        "CFLCoarse": 1.25,
+        "CFLLimit": 1.5,
+        "GMRESOrthogonalizationType": "modified Gram-Schmidt",
+        "ILUFill": 2,
+        "L2Convergence": 1e-15,
+        "L2ConvergenceCoarse": 0.01,
+        "L2ConvergenceRel": 1e-16,
+        "MGCycle": "2w",
+        "MGStartLevel": -1,
+        "NKADPC": false,
+        "NKASMOverlap": 1,
+        "NKFixedStep": 0.25,
+        "NKInnerPreconIts": 1,
+        "NKJacobianLag": 2,
+        "NKLS": "cubic",
+        "NKLinearSolveTol": 0.3,
+        "NKOuterPreconIts": 1,
+        "NKPCILUFill": 2,
+        "NKSubspaceSize": 60,
+        "NKSwitchTol": 1e-05,
+        "NKUseEW": true,
+        "NKViscPC": false,
+        "RKReset": false,
+        "TSStability": false,
+        "adjointDivTol": 100000.0,
+        "adjointL2Convergence": 1e-16,
+        "adjointL2ConvergenceAbs": 1e-16,
+        "adjointL2ConvergenceRel": 1e-16,
+        "adjointMaxIter": 500,
+        "adjointMaxL2DeviationFactor": 1.0,
+        "adjointMonitorStep": 10,
+        "adjointSolver": "GMRES",
+        "adjointSubspaceSize": 100,
+        "alphaFollowing": true,
+        "alphaMode": false,
+        "altitudeMode": false,
+        "applyAdjointPCSubspaceSize": 20,
+        "applyPCSubspaceSize": 10,
+        "approxPC": true,
+        "backgroundVolScale": 1.0,
+        "betaMode": false,
+        "blockSplitting": true,
+        "cavExponent": 0,
+        "cavSensorOffset": 0.0,
+        "cavSensorSharpness": 10.0,
+        "cavitationNumber": 1.4,
+        "closedSurfaceFamilies": null,
+        "coarseDiscretization": "central plus scalar dissipation",
+        "computeCavitation": false,
+        "coupledSolution": false,
+        "cpMinRho": 100.0,
+        "cutCallback": null,
+        "debugZipper": false,
+        "deltaT": 0.01,
+        "designSurfaceFamily": null,
+        "discretization": "central plus scalar dissipation",
+        "dissipationLumpingParameter": 6.0,
+        "dissipationScalingExponent": 0.67,
+        "eddyVisInfRatio": 0.009,
+        "equationMode": "steady",
+        "equationType": "RANS",
+        "eulerWallTreatment": "linear pressure extrapolation",
+        "explicitSurfaceCallback": null,
+        "firstRun": true,
+        "flowType": "external",
+        "forcesAsTractions": true,
+        "frozenTurbulence": false,
+        "globalPreconditioner": "additive Schwarz",
+        "gridFile": "input_files/mdo_tutorial_rough.cgns",
+        "gridPrecision": "double",
+        "gridPrecisionSurface": "single",
+        "infChangeCorrection": true,
+        "innerPreconIts": 1,
+        "isoVariables": [],
+        "isosurface": {},
+        "liftIndex": 2,
+        "limiter": "van Albada",
+        "loadBalanceIter": 10,
+        "loadImbalance": 0.1,
+        "localPreconditioner": "ILU",
+        "lowSpeedPreconditioner": false,
+        "machMode": false,
+        "matrixOrdering": "RCM",
+        "maxL2DeviationFactor": 1.0,
+        "meshSurfaceFamily": null,
+        "monitorVariables": [
+            "resrho",
+            "resrho",
+            "resturb",
+            "cd"
+        ],
+        "nCycles": 1000,
+        "nCyclesCoarse": 100,
+        "nFloodIter": -1,
+        "nRKReset": 5,
+        "nRefine": 10,
+        "nSaveSurface": 1,
+        "nSaveVolume": 1,
+        "nSubiter": 3,
+        "nSubiterTurb": 3,
+        "nTimeStepsCoarse": 48,
+        "nTimeStepsFine": 400,
+        "nearWallDist": 0.1,
+        "numberSolutions": true,
+        "outerPreconIts": 3,
+        "outputDirectory": "tests/output_files",
+        "outputSurfaceFamily": "allSurfaces",
+        "overlapFactor": 0.9,
+        "oversetDebugPrint": false,
+        "oversetLoadBalance": true,
+        "oversetPriority": {},
+        "oversetProjTol": 1e-12,
+        "oversetUpdateMode": "frozen",
+        "pMode": false,
+        "partitionLikeNProc": -1,
+        "partitionOnly": false,
+        "preconditionerSide": "right",
+        "printAllOptions": true,
+        "printIntro": true,
+        "printIterations": true,
+        "printTiming": true,
+        "printWarnings": true,
+        "qMode": false,
+        "rMode": false,
+        "resAveraging": "never",
+        "restartAdjoint": true,
+        "restartFile": "input_files/mdo_tutorial_rough.cgns",
+        "restrictionRelaxation": 0.8,
+        "selfZipCutoff": 120.0,
+        "sepSensorOffset": 0.0,
+        "sepSensorSharpness": 10.0,
+        "setMonitor": true,
+        "skipAfterFailedAdjoint": true,
+        "smoothParameter": 1.5,
+        "smoother": "DADI",
+        "solutionPrecision": "single",
+        "solutionPrecisionSurface": "single",
+        "storeConvHist": true,
+        "storeRindLayer": true,
+        "surfaceVariables": [
+            "cp",
+            "vx",
+            "vy",
+            "vz",
+            "mach"
+        ],
+        "timeAccuracy": 2,
+        "timeIntegrationScheme": "BDF",
+        "timeIntervals": 1,
+        "timeLimit": -1.0,
+        "turbResScale": 10000.0,
+        "turbulenceModel": "SA",
+        "turbulenceOrder": "first order",
+        "turbulenceProduction": "strain",
+        "useALE": true,
+        "useANKSolver": true,
+        "useApproxWallDistance": true,
+        "useBlockettes": false,
+        "useDiagTSPC": true,
+        "useExternalDynamicMesh": false,
+        "useGridMotion": false,
+        "useLinResMonitor": false,
+        "useMatrixFreedrdw": true,
+        "useNKSolver": true,
+        "useOversetWallScaling": false,
+        "useQCR": false,
+        "useRotationSA": false,
+        "useRoughSA": true,
+        "useTSInterpolatedGridVelocity": false,
+        "useWallFunctions": false,
+        "useZipperMesh": true,
+        "useft2SA": true,
+        "verifyExtra": true,
+        "verifySpatial": true,
+        "verifyState": true,
+        "vis2": 0.25,
+        "vis2Coarse": 0.5,
+        "vis4": 0.0156,
+        "viscPC": false,
+        "viscWallTreatment": "constant pressure extrapolation",
+        "viscousSurfaceVelocities": true,
+        "volumeVariables": [
+            "resrho"
+        ],
+        "wallDistCutoff": 1e+20,
+        "windAxis": false,
+        "writeSolutionEachIter": false,
+        "writeSurfaceSolution": true,
+        "writeTecplotSurfaceSolution": false,
+        "writeVolumeSolution": true,
+        "zipperSurfaceFamily": null
+    },
+    "||FBar^T * dF/dXv||": 47500739.33793808,
+    "||FBar^T * dF/dw||": 159520.82502357475,
+    "||FBar^T * dF/xDv||": {
+        "P_mdo_tutorial": 57.286099642092886,
+        "T_mdo_tutorial": 9.947598300641403e-14,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dF/dP||": 5.592228638945923,
+    "||dF/dT||": 1.259326633802106e-15,
+    "||dF/dXv * xVDot||": 7581135.361357855,
+    "||dF/dalpha||": 0.0,
+    "||dF/dbeta||": 0.0,
+    "||dF/dmach||": 0.0,
+    "||dF/dw * wDot||": 131479.675920046,
+    "||dF/dxRef||": 0.0,
+    "||dF/dyRef||": 0.0,
+    "||dF/dzRef||": 0.0,
+    "||dR/dP||": 0.07356605182246512,
+    "||dR/dT||": 9.22063005758926e-07,
+    "||dR/dXv * xVDot||": 26736790.15992888,
+    "||dR/dalpha||": 0.010271084828244875,
+    "||dR/dbeta||": 0.6070879776942454,
+    "||dR/dmach||": 0.9283860217641269,
+    "||dR/dw * wDot||": 2695502679.7876015,
+    "||dR/dxRef||": 0.0,
+    "||dR/dyRef||": 0.0,
+    "||dR/dzRef||": 0.0,
+    "||dcd/dXdv||": {
+        "P_mdo_tutorial": 1.0255863078366315e-07,
+        "T_mdo_tutorial": 6.776263578034403e-21,
+        "alpha_mdo_tutorial": 0.007160125372297843,
+        "beta_mdo_tutorial": -0.00013197604492666193,
+        "mach_mdo_tutorial": -0.06141600196447446,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcd/dXv||": 27.91337076794424,
+    "||dcd/dw||": 0.014161257652762545,
+    "||dcfx/dXdv||": {
+        "P_mdo_tutorial": 1.0254457111038001e-07,
+        "T_mdo_tutorial": 1.0164395367051604e-20,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": -0.02917043250841126,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcfx/dXv||": 27.912716195114633,
+    "||dcfx/dw||": 0.014090362780761697,
+    "||dcfy/dXdv||": {
+        "P_mdo_tutorial": 2.058505774664427e-09,
+        "T_mdo_tutorial": 1.376428539288238e-21,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": -1.0270354569710038,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcfy/dXv||": 2.7931029821306077,
+    "||dcfy/dw||": 0.11354907277670545,
+    "||dcfz/dXdv||": {
+        "P_mdo_tutorial": 3.0809570252252938e-09,
+        "T_mdo_tutorial": 6.352747104407253e-22,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": -0.018913508602584256,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcfz/dXv||": 2.519605387991788,
+    "||dcfz/dw||": 0.007655345743081828,
+    "||dcl/dXdv||": {
+        "P_mdo_tutorial": -1.1635127915569029e-09,
+        "T_mdo_tutorial": 1.5881867761018131e-22,
+        "alpha_mdo_tutorial": -0.000428764579076554,
+        "beta_mdo_tutorial": 4.147514299214541e-06,
+        "mach_mdo_tutorial": -1.0256124115430092,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcl/dXv||": 2.786553790809974,
+    "||dcl/dw||": 0.11354025292131026,
+    "||dcmx/dXdv||": {
+        "P_mdo_tutorial": -5.02580368859844e-09,
+        "T_mdo_tutorial": 3.1763735522036263e-21,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 1.9805037140293307,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": -0.002327816443394987,
+        "zRef_mdo_tutorial": 0.12640436393489274
+    },
+    "||dcmx/dXv||": 5.614216937125317,
+    "||dcmx/dw||": 0.21272941416694607,
+    "||dcmy/dXdv||": {
+        "P_mdo_tutorial": 1.8366346879813387e-07,
+        "T_mdo_tutorial": -2.0328790734103208e-20,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.022452508285283296,
+        "xRef_mdo_tutorial": 0.002327816443394987,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": -0.0035902070779583175
+    },
+    "||dcmy/dXv||": 54.53814156559552,
+    "||dcmy/dw||": 0.033839731077361684,
+    "||dcmz/dXdv||": {
+        "P_mdo_tutorial": 2.070089767240118e-09,
+        "T_mdo_tutorial": 2.117582368135751e-22,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": -1.5738138165055944,
+        "xRef_mdo_tutorial": -0.12640436393489274,
+        "yRef_mdo_tutorial": 0.0035902070779583175,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcmz/dXv||": 3.669722758336583,
+    "||dcmz/dw||": 0.1701587174303123,
+    "||ddrag/dXdv||": {
+        "P_mdo_tutorial": 0.5425726162154224,
+        "T_mdo_tutorial": 1.7763568394002505e-15,
+        "alpha_mdo_tutorial": 2919.039911778386,
+        "beta_mdo_tutorial": -53.803993995701546,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||ddrag/dXv||": 11379722.994675511,
+    "||ddrag/dw||": 5773.261519878235,
+    "||dfx/dXdv||": {
+        "P_mdo_tutorial": 0.279649409250863,
+        "T_mdo_tutorial": -1.0658141036401503e-14,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dfx/dXv||": 11379456.138424335,
+    "||dfx/dw||": 5744.359098460929,
+    "||dfy/dXdv||": {
+        "P_mdo_tutorial": 8.374875513592972,
+        "T_mdo_tutorial": 8.326672684688674e-17,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dfy/dXv||": 1138692.2237550062,
+    "||dfy/dw||": 46291.685989607286,
+    "||dfz/dXdv||": {
+        "P_mdo_tutorial": 0.1554692283020747,
+        "T_mdo_tutorial": -5.551115123125783e-17,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dfz/dXv||": 1027192.7245764921,
+    "||dfz/dw||": 3120.931352539599,
+    "||dlift/dXdv||": {
+        "P_mdo_tutorial": 8.361959017862194,
+        "T_mdo_tutorial": -2.914335439641036e-16,
+        "alpha_mdo_tutorial": -174.7987435979296,
+        "beta_mdo_tutorial": 1.6908586295037842,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dlift/dXv||": 1136022.2494374104,
+    "||dlift/dw||": 46288.09031095977,
+    "||dmx/dXdv||": {
+        "P_mdo_tutorial": -52.48842300766153,
+        "T_mdo_tutorial": -3.9968028886505635e-15,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": -3084.263674840622,
+        "zRef_mdo_tutorial": 167480.72603917567
+    },
+    "||dmx/dXv||": 7438612.873013561,
+    "||dmx/dw||": 281857.96459463687,
+    "||dmy/dXdv||": {
+        "P_mdo_tutorial": -0.35162675793460774,
+        "T_mdo_tutorial": 8.526512829121202e-14,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 3084.263674840622,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": -4756.880770011648
+    },
+    "||dmy/dXv||": 72260856.04875143,
+    "||dmy/dw||": 44836.290088261136,
+    "||dmz/dXdv||": {
+        "P_mdo_tutorial": 41.70754987248287,
+        "T_mdo_tutorial": -6.661338147750939e-16,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": -167480.72603917567,
+        "yRef_mdo_tutorial": 4756.880770011648,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dmz/dXv||": 4862235.8658856405,
+    "||dmz/dw||": 225453.4942464666,
+    "||dsepsensor/dXdv||": {
+        "P_mdo_tutorial": 0.0,
+        "T_mdo_tutorial": 0.0,
+        "alpha_mdo_tutorial": 0.0008890133278288007,
+        "beta_mdo_tutorial": 0.003165337752638709,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dsepsensor/dXv||": 0.5244726837411959,
+    "||dsepsensor/dw||": 8.307879565539196,
+    "||dsepsensoravgx/dXdv||": {
+        "P_mdo_tutorial": 0.0,
+        "T_mdo_tutorial": 0.0,
+        "alpha_mdo_tutorial": 0.0062118492074274565,
+        "beta_mdo_tutorial": 0.01930465910366647,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dsepsensoravgx/dXv||": 3.355503366983782,
+    "||dsepsensoravgx/dw||": 42.297878810572435,
+    "||dsepsensoravgy/dXdv||": {
+        "P_mdo_tutorial": 0.0,
+        "T_mdo_tutorial": 0.0,
+        "alpha_mdo_tutorial": 1.1629154598405565e-06,
+        "beta_mdo_tutorial": 2.032664028225635e-05,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dsepsensoravgy/dXv||": 0.0037955898018984654,
+    "||dsepsensoravgy/dw||": 0.07333646978426119,
+    "||dsepsensoravgz/dXdv||": {
+        "P_mdo_tutorial": 0.0,
+        "T_mdo_tutorial": 0.0,
+        "alpha_mdo_tutorial": 0.006150040814983931,
+        "beta_mdo_tutorial": 0.012554105780004536,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dsepsensoravgz/dXv||": 3.3049236735056238,
+    "||dsepsensoravgz/dw||": 10.598482329462021,
+    "||dwBar^T * dR/dXv||": 111601096.04341303,
+    "||dwBar^T * dR/dw||": 2217397593.2762733,
+    "||dwBar^T * dR/xDv||": {
+        "P_mdo_tutorial": 0.8893628179225441,
+        "T_mdo_tutorial": -7.760276361068463e-06,
+        "alpha_mdo_tutorial": -0.0020004837586182936,
+        "beta_mdo_tutorial": -0.025156969826717016,
+        "mach_mdo_tutorial": -3.2833710241496035,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    }
+}
\ No newline at end of file
diff --git a/tests/reg_tests/test_adjoint.py b/tests/reg_tests/test_adjoint.py
index 13bbbdb80..de0ad4daf 100644
--- a/tests/reg_tests/test_adjoint.py
+++ b/tests/reg_tests/test_adjoint.py
@@ -209,6 +209,50 @@ def span(val, geo):
         "evalFuncs": ["fy", "my"],
         "N_PROCS": 2,
     },
+    # Rough SA test
+    {
+        "name": "Rough_SA_wing",
+        "options": {
+            "gridFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
+            "restartFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
+            'equationType':'RANS',
+            'useBlockettes': False,
+            'useRoughSA': True,
+
+            "MGCycle": "2w",
+            "equationType": "RANS",
+            "smoother": "DADI",
+            "CFL": 1.5,
+            "CFLCoarse": 1.25,
+            "resAveraging": "never",
+            "nSubiter": 3,
+            "nSubiterTurb": 3,
+            "nCyclesCoarse": 100,
+            "nCycles": 1000,
+            "monitorVariables": ["resrho", "resturb", "cd"],
+            "volumeVariables": ["resrho"],
+            "useNKsolver": True,
+            "ANKSwitchTol": 1e-2,
+            "ANKSecondordSwitchTol": 1e-2,
+            "NKSwitchTol": 1e-5,
+            "NKjacobianlag": 2,
+            "L2Convergence": 1e-15,
+
+            "adjointL2Convergence": 1e-16,
+
+            # to get slightly better complex convergence
+            "NKUseEW": False,
+            "NKLinearSolveTol": 1e-6,
+
+
+        },
+        "ref_file": "adjoint_rans_rough_sa.json",
+        "aero_prob": ap_tutorial_wing,
+        "evalFuncs": ["cl", "cd", "cmz", "lift", "drag"],
+        # "evalFuncs": ["cd"],
+        "N_PROCS": 2,
+    },
+
 ]
 
 
@@ -387,7 +431,8 @@ def cmplx_test_geom_dvs(self):
             rtol = 5e-9
         atol = 5e-9
 
-        for dv in ["span", "twist", "shape"]:
+        # for dv in ["span", "twist", "shape"]:
+        for dv in ["shape"]:
 
             xRef[dv][0] += self.h * 1j
 
diff --git a/tests/reg_tests/test_functionals.py b/tests/reg_tests/test_functionals.py
index 990030700..8bfdb44fe 100644
--- a/tests/reg_tests/test_functionals.py
+++ b/tests/reg_tests/test_functionals.py
@@ -202,6 +202,75 @@
             "ref_file": "funcs_euler_scalar_jst_CRM_WBT.json",
             "aero_prob": ap_CRM,
         },
+        # Rough SA test
+        {
+            "name": "Rough_SA_wing",
+            "options": {
+                "gridFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
+                "restartFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
+                'equationType':'RANS',
+                'useBlockettes': False,
+                'useRoughSA': True,
+
+                "MGCycle": "2w",
+                "equationType": "RANS",
+                "smoother": "DADI",
+                "CFL": 1.5,
+                "CFLCoarse": 1.25,
+                "resAveraging": "never",
+                "nSubiter": 3,
+                "nSubiterTurb": 3,
+                "nCyclesCoarse": 100,
+                "nCycles": 1000,
+                "monitorVariables": ["resrho", "resrho", "resturb", "cd"],
+                "volumeVariables": ["resrho"],
+                "useNKsolver": True,
+                "ANKSwitchTol": 1e-2,
+                "ANKSecondordSwitchTol": 1e-2,
+                "L2Convergence": 1e-15,
+                "NKSwitchTol": 1e-5,
+                "adjointL2Convergence": 1e-16,
+                "blockSplitting": True,
+                "NKjacobianlag": 2,
+            },
+            "ref_file": "funcs_rans_rough_sa.json",
+            "aero_prob": ap_tutorial_wing,
+        },
+        # Rough Tutorial wing RANS
+        # This test makes sure a roughness value of 0 equals the standard SA model
+        # It checks if the outcome is consistent with the test 'rans_tut_wing'
+        {
+            "name": "Rough_SA_rans_tut_wing",
+            "options": {
+                "gridfile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rans_scalar_jst.cgns"),
+                "restartfile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rans_scalar_jst.cgns"),
+                "useBlockettes": False,
+                "useRoughSA": True,
+                "mgcycle": "sg",
+                "equationtype": "RANS",
+                "smoother": "DADI",
+                "cfl": 1.5,
+                "cflcoarse": 1.25,
+                "resaveraging": "never",
+                "nsubiter": 3,
+                "nsubiterturb": 3,
+                "ncyclescoarse": 100,
+                "ncycles": 1000,
+                "monitorvariables": ["cpu", "resrho", "resturb", "cl", "cd", "cmz", "yplus", "totalr"],
+                "usenksolver": True,
+                "l2convergence": 1e-14,
+                "l2convergencecoarse": 1e-4,
+                "nkswitchtol": 1e-3,
+                "adjointl2convergence": 1e-14,
+                "frozenturbulence": False,
+            },
+            "ref_file": "funcs_rans_tut_wing.json",
+            "aero_prob": ap_tutorial_wing,
+            "no_train": True,       # This test should not be able to over-write
+                                    # the training file as it is coming from a different test
+        },
+
+
     ]
 )
 class TestFunctionals(reg_test_classes.RegTest):

From b9523cef2648addca3f07507df25bf5873cbd280 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Thu, 16 Feb 2023 17:01:25 +0100
Subject: [PATCH 42/60] rerun tapenade

---
 src/adjoint/outputForward/bcdata_d.f90 | 4 ----
 src/adjoint/outputReverse/bcdata_b.f90 | 2 --
 2 files changed, 6 deletions(-)

diff --git a/src/adjoint/outputForward/bcdata_d.f90 b/src/adjoint/outputForward/bcdata_d.f90
index b2895d375..4f786a0c7 100644
--- a/src/adjoint/outputForward/bcdata_d.f90
+++ b/src/adjoint/outputForward/bcdata_d.f90
@@ -409,8 +409,6 @@ subroutine bcdataisothermalwall_d(boco, bcvararray, bcvararrayd, ibeg&
         end do
       end do
     end if
- 100 format('zone ',a,', boundary subface ',a, &
-&          ': wall temperature not specified for isothermal wall')
   end subroutine bcdataisothermalwall_d
 ! ---------------------------------------------------------------
 ! routines that set the actual bcdata values from the cgns data set
@@ -466,8 +464,6 @@ subroutine bcdataisothermalwall(boco, bcvararray, ibeg, iend, jbeg, &
         end do
       end do
     end if
- 100 format('zone ',a,', boundary subface ',a, &
-&          ': wall temperature not specified for isothermal wall')
   end subroutine bcdataisothermalwall
   subroutine bcdataadiabaticwall(boco, bcvararray, ibeg, iend, jbeg, &
 &   jend)
diff --git a/src/adjoint/outputReverse/bcdata_b.f90 b/src/adjoint/outputReverse/bcdata_b.f90
index dcff0f9d5..0239fbdf4 100644
--- a/src/adjoint/outputReverse/bcdata_b.f90
+++ b/src/adjoint/outputReverse/bcdata_b.f90
@@ -442,8 +442,6 @@ subroutine bcdataisothermalwall(boco, bcvararray, ibeg, iend, jbeg, &
         end do
       end do
     end if
- 100 format('zone ',a,', boundary subface ',a, &
-&          ': wall temperature not specified for isothermal wall')
   end subroutine bcdataisothermalwall
   subroutine bcdataadiabaticwall(boco, bcvararray, ibeg, iend, jbeg, &
 &   jend)

From e3883893ac50211c0ff68178961a6b54959e1e32 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Thu, 16 Feb 2023 17:05:51 +0100
Subject: [PATCH 43/60] run black

---
 adflow/pyADflow.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/adflow/pyADflow.py b/adflow/pyADflow.py
index 597111222..ccac210df 100644
--- a/adflow/pyADflow.py
+++ b/adflow/pyADflow.py
@@ -1224,11 +1224,11 @@ def setSurfaceRoughness(self, ks, groupName=None):
             Family group to use. Default to all walls if not given (None)
         """
 
-        if not self.options['useroughsa']:
-           raise Error (
-               "It is not possible to set a surface roughness value without "
-               "using the rough SA-variant (useRoughSA = False)"
-           )
+        if not self.options["useroughsa"]:
+            raise Error(
+                "It is not possible to set a surface roughness value without "
+                "using the rough SA-variant (useRoughSA = False)"
+            )
 
         if groupName is None:
             groupName = self.allWallsGroup
@@ -3441,7 +3441,6 @@ def _setAeroProblemData(self, aeroProblem, firstCall=False):
             # Propagate roughness values through volume
             self.adflow.walldistance.updatewallroughness()
 
-
     def _getBCDataFromAeroProblem(self, AP):
         variables = []
         dataArray = []
@@ -5651,7 +5650,7 @@ def _getOptionMap(self):
             "useqcr": ["physics", "useqcr"],
             "userotationsa": ["physics", "userotationsa"],
             "useft2sa": ["physics", "useft2sa"],
-            "useroughsa":["physics", "useroughsa"],
+            "useroughsa": ["physics", "useroughsa"],
             "eddyvisinfratio": ["physics", "eddyvisinfratio"],
             "usewallfunctions": ["physics", "wallfunctions"],
             "walldistcutoff": ["physics", "walldistcutoff"],

From 0d8a7bc700fe32ef0ec44000b031d009b89f6818 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Thu, 16 Feb 2023 17:20:53 +0100
Subject: [PATCH 44/60] Update docs

---
 doc/options.yaml | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/doc/options.yaml b/doc/options.yaml
index 667c20bac..c1ba89692 100644
--- a/doc/options.yaml
+++ b/doc/options.yaml
@@ -237,6 +237,15 @@ useft2SA:
   desc: >
     Include the ft2 term in the Spalart-Allmaras turbulence model.
 
+useRoughSA:
+  desc: >
+    Activates the rough SA variant described `here
+    <https://turbmodels.larc.nasa.gov/spalart.html#sarough>`__. To use it,
+    blockettes must be disabled ``useBlockettes: False``. The roughness value on
+    the boundary may be prescribed like a regular boundary value using the name
+    ``SandGrainRoughness``. Additionally, the ADflow method ``setSurfaceRoughness``
+    may be used.
+
 eddyVisInfRatio:
   desc: >
     Free stream value of eddy viscosity.
@@ -1414,4 +1423,4 @@ cavSensorSharpness:
 
 cavExponent:
   desc: >
-    The exponent for the numerator term (- Cp - cavitationnumber) of the cavitation sensor.
\ No newline at end of file
+    The exponent for the numerator term (- Cp - cavitationnumber) of the cavitation sensor.

From fe1fec16ac438139557ba9d0e85f348520ad2fc0 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Thu, 16 Feb 2023 17:23:38 +0100
Subject: [PATCH 45/60] run flake8

---
 tests/reg_tests/test_adjoint.py     | 2 +-
 tests/reg_tests/test_functionals.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/reg_tests/test_adjoint.py b/tests/reg_tests/test_adjoint.py
index d802732f9..a69006dd7 100644
--- a/tests/reg_tests/test_adjoint.py
+++ b/tests/reg_tests/test_adjoint.py
@@ -214,7 +214,7 @@ def span(val, geo):
         "options": {
             "gridFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
             "restartFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
-            'equationType':'RANS',
+            'equationType': 'RANS',
             'useBlockettes': False,
             'useRoughSA': True,
 
diff --git a/tests/reg_tests/test_functionals.py b/tests/reg_tests/test_functionals.py
index 8bfdb44fe..6b0c58896 100644
--- a/tests/reg_tests/test_functionals.py
+++ b/tests/reg_tests/test_functionals.py
@@ -208,7 +208,7 @@
             "options": {
                 "gridFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
                 "restartFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
-                'equationType':'RANS',
+                'equationType': 'RANS',
                 'useBlockettes': False,
                 'useRoughSA': True,
 

From 450485e76fc3021efd6ef057d68e6450c3abddf9 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Tue, 21 Feb 2023 10:58:58 +0100
Subject: [PATCH 46/60] run fprettify

---
 src/bcdata/BCData.F90                  |  6475 ++++++------
 src/inputParam/inputParamRoutines.F90  |  7770 ++++++++-------
 src/modules/block.F90                  |  2186 ++--
 src/modules/blockPointers.F90          |   461 +-
 src/modules/cgnsNames.f90              |   480 +-
 src/modules/constants.F90              |   995 +-
 src/modules/extraOutput.f90            |    83 +-
 src/modules/inputParam.F90             |  1519 ++-
 src/modules/paramTurb.F90              |   176 +-
 src/modules/wallDistanceData.F90       |    59 +-
 src/output/outputMod.F90               |  7211 +++++++-------
 src/preprocessing/preprocessingAPI.F90 |  7639 +++++++-------
 src/turbulence/sa.F90                  |  2335 +++--
 src/turbulence/turbBCRoutines.F90      |  2719 +++--
 src/utils/utils.F90                    | 12195 +++++++++++------------
 src/wallDistance/wallDistance.F90      |  4176 ++++----
 16 files changed, 28187 insertions(+), 28292 deletions(-)

diff --git a/src/bcdata/BCData.F90 b/src/bcdata/BCData.F90
index 4c7a221fa..830bfa65a 100644
--- a/src/bcdata/BCData.F90
+++ b/src/bcdata/BCData.F90
@@ -1,3589 +1,3580 @@
 module BCData
-  use constants
-  use BCDataMod
-
-contains
-  ! ---------------------------------------------------------------
-  ! Routines that set the appropriate variable names for BCs with
-  ! BCdata.
-
-  subroutine setBCVarNamesIsothermalWall
-    use cgnsNames
-    use constants
-    use inputPhysics, only : useRoughSA
-    implicit none
-    nbcVar = nbcVarIsothermalWall
-    bcVarNames(1) = cgnsTemp
-
-    if (useRoughSA) then
-       nbcVar = nbcVar + 1
-       bcVarNames(2) = cgnsSandGrainRoughness
-    end if
-
-  end subroutine setBCVarNamesIsothermalWall
-
-  subroutine setBCVarNamesAdiabaticWall
-    use cgnsNames
-    use constants
-    use inputPhysics, only : useRoughSA
-    implicit none
-    nbcVar = nbcVarAdiabaticWall
-
-    if (useRoughSA) then
-       nbcVar = nbcVar + 1
-       bcVarNames(1) = cgnsSandGrainRoughness
-    end if
-
-  end subroutine setBCVarNamesAdiabaticWall
-
-  subroutine setBCVarNamesSubsonicInflow
-    use constants
-    use cgnsNames
-    use inputPhysics, only : equations
-    use flowVarRefState, only : nwt
-    implicit none
-    !
-    !      Local variables.
-    !
-    logical :: varAllowed
-
-    nbcVar = nbcVarSubsonicInflow
-    if(equations == RANSEquations) then
-       nbcVar = nbcVar + nwt
-    end if
-
-    bcVarNames(1)  = cgnsPtot
-    bcVarNames(2)  = cgnsTtot
-    bcVarNames(3)  = cgnsRhotot
-    bcVarNames(4)  = cgnsVelAnglex
-    bcVarNames(5)  = cgnsVelAngley
-    bcVarNames(6)  = cgnsVelAnglez
-    bcVarNames(7)  = cgnsVelVecx
-    bcVarNames(8)  = cgnsVelVecy
-    bcVarNames(9)  = cgnsVelVecz
-    bcVarNames(10) = cgnsVelVecr
-    bcVarNames(11) = cgnsVelVectheta
-    bcVarNames(12) = cgnsDensity
-    bcVarNames(13) = cgnsVelx
-    bcVarNames(14) = cgnsVely
-    bcVarNames(15) = cgnsVelz
-    bcVarNames(16) = cgnsVelr
-    bcVarNames(17) = cgnsVeltheta
-
-    call setBcVarNamesTurb(17_intType)
-
-  end subroutine setBCVarNamesSubsonicInflow
-
-  subroutine setBCVarNamesSubsonicOutflow
-    use cgnsNames
     use constants
-    use flowVarRefState, only : nwt
-
-    nbcVar = nbcVarSubsonicOutflow
-
-    bcVarNames(1) = cgnsPressure
+    use BCDataMod
 
-  end subroutine setBCVarNamesSubsonicOutflow
-
-  subroutine setBCVarNamesSupersonicInflow
-    use constants
-    use cgnsNames
-    use inputPhysics, only : equations
-    use flowVarRefState, only : nwt
-
-    nbcVar = nbcVarSupersonicInflow
-    if(equations == RANSEquations) then
-       nbcVar = nbcVar + nwt
-    end if
-
-    bcVarNames(1) = cgnsDensity
-    bcVarNames(2) = cgnsPressure
-    bcVarNames(3) = cgnsVelx
-    bcVarNames(4) = cgnsVely
-    bcVarNames(5) = cgnsVelz
-    bcVarNames(6) = cgnsVelr
-    bcVarNames(7) = cgnsVeltheta
-
-    call setBCVarNamesTurb(7_intType)
-
-  end subroutine setBCVarNamesSupersonicInflow
-
-  subroutine setBCVarNamesTurb(offset)
-    !
-    !       setBCVarNamesTurb sets the names for the turbulence
-    !       variables to be determined. This depends on the turbulence
-    !       model. If not the RANS equations are solved an immediate
-    !       return is made.
-    !
-    use constants
-    use cgnsNames
-    use inputPhysics, only : equations, turbModel
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: offset
-
-    ! Return immediately if not the RANS equations are solved.
-
-    if(equations /= RANSEquations) return
-
-    ! Determine the turbulence model and set the names accordingly.
-
-    select case (turbModel)
-    case (spalartAllmaras, spalartAllmarasEdwards)
-       bcVarNames(offset+1) = cgnsTurbSaNu
-
-    case (komegaWilcox, komegaModified, menterSST)
-       bcVarNames(offset+1) = cgnsTurbK
-       bcVarNames(offset+2) = cgnsTurbOmega
-
-    case (ktau)
-       bcVarNames(offset+1) = cgnsTurbK
-       bcVarNames(offset+2) = cgnsTurbTau
-
-    case (v2f)
-       bcVarNames(offset+1) = cgnsTurbK
-       bcVarNames(offset+2) = cgnsTurbEpsilon
-       bcVarNames(offset+3) = cgnsTurbV2
-       bcVarNames(offset+4) = cgnsTurbF
-
-    end select
-
-  end subroutine setBCVarNamesTurb
-  ! ---------------------------------------------------------------
-  ! --------------------------------------
-  !                Utilities
-  ! --------------------------------------
-
-  subroutine computeHtot(tt, ht)
-    !
-    !       computeHtot computes the total enthalpy from the given total
-    !       temperature. The total enthalpy is the integral of cp, which
-    !       is a very simple expression for constant cp. For a variable cp
-    !       it is a bit more work.
-    !
-    use constants
-    use cpCurveFits
-    use communication, only : myid
-    use inputPhysics, only : cpModel, gammaConstant,rGasDim
-    use flowVarRefState, only : PinfDim
+contains
+    ! ---------------------------------------------------------------
+    ! Routines that set the appropriate variable names for BCs with
+    ! BCdata.
+
+    subroutine setBCVarNamesIsothermalWall
+        use cgnsNames
+        use constants
+        use inputPhysics, only: useRoughSA
+        implicit none
+        nbcVar = nbcVarIsothermalWall
+        bcVarNames(1) = cgnsTemp
+
+        if (useRoughSA) then
+            nbcVar = nbcVar + 1
+            bcVarNames(2) = cgnsSandGrainRoughness
+        end if
+
+    end subroutine setBCVarNamesIsothermalWall
+
+    subroutine setBCVarNamesAdiabaticWall
+        use cgnsNames
+        use constants
+        use inputPhysics, only: useRoughSA
+        implicit none
+        nbcVar = nbcVarAdiabaticWall
+
+        if (useRoughSA) then
+            nbcVar = nbcVar + 1
+            bcVarNames(1) = cgnsSandGrainRoughness
+        end if
+
+    end subroutine setBCVarNamesAdiabaticWall
+
+    subroutine setBCVarNamesSubsonicInflow
+        use constants
+        use cgnsNames
+        use inputPhysics, only: equations
+        use flowVarRefState, only: nwt
+        implicit none
+        !
+        !      Local variables.
+        !
+        logical :: varAllowed
+
+        nbcVar = nbcVarSubsonicInflow
+        if (equations == RANSEquations) then
+            nbcVar = nbcVar + nwt
+        end if
+
+        bcVarNames(1) = cgnsPtot
+        bcVarNames(2) = cgnsTtot
+        bcVarNames(3) = cgnsRhotot
+        bcVarNames(4) = cgnsVelAnglex
+        bcVarNames(5) = cgnsVelAngley
+        bcVarNames(6) = cgnsVelAnglez
+        bcVarNames(7) = cgnsVelVecx
+        bcVarNames(8) = cgnsVelVecy
+        bcVarNames(9) = cgnsVelVecz
+        bcVarNames(10) = cgnsVelVecr
+        bcVarNames(11) = cgnsVelVectheta
+        bcVarNames(12) = cgnsDensity
+        bcVarNames(13) = cgnsVelx
+        bcVarNames(14) = cgnsVely
+        bcVarNames(15) = cgnsVelz
+        bcVarNames(16) = cgnsVelr
+        bcVarNames(17) = cgnsVeltheta
+
+        call setBcVarNamesTurb(17_intType)
+
+    end subroutine setBCVarNamesSubsonicInflow
+
+    subroutine setBCVarNamesSubsonicOutflow
+        use cgnsNames
+        use constants
+        use flowVarRefState, only: nwt
+
+        nbcVar = nbcVarSubsonicOutflow
+
+        bcVarNames(1) = cgnsPressure
+
+    end subroutine setBCVarNamesSubsonicOutflow
+
+    subroutine setBCVarNamesSupersonicInflow
+        use constants
+        use cgnsNames
+        use inputPhysics, only: equations
+        use flowVarRefState, only: nwt
+
+        nbcVar = nbcVarSupersonicInflow
+        if (equations == RANSEquations) then
+            nbcVar = nbcVar + nwt
+        end if
+
+        bcVarNames(1) = cgnsDensity
+        bcVarNames(2) = cgnsPressure
+        bcVarNames(3) = cgnsVelx
+        bcVarNames(4) = cgnsVely
+        bcVarNames(5) = cgnsVelz
+        bcVarNames(6) = cgnsVelr
+        bcVarNames(7) = cgnsVeltheta
+
+        call setBCVarNamesTurb(7_intType)
+
+    end subroutine setBCVarNamesSupersonicInflow
+
+    subroutine setBCVarNamesTurb(offset)
+        !
+        !       setBCVarNamesTurb sets the names for the turbulence
+        !       variables to be determined. This depends on the turbulence
+        !       model. If not the RANS equations are solved an immediate
+        !       return is made.
+        !
+        use constants
+        use cgnsNames
+        use inputPhysics, only: equations, turbModel
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: offset
+
+        ! Return immediately if not the RANS equations are solved.
+
+        if (equations /= RANSEquations) return
+
+        ! Determine the turbulence model and set the names accordingly.
+
+        select case (turbModel)
+        case (spalartAllmaras, spalartAllmarasEdwards)
+            bcVarNames(offset + 1) = cgnsTurbSaNu
+
+        case (komegaWilcox, komegaModified, menterSST)
+            bcVarNames(offset + 1) = cgnsTurbK
+            bcVarNames(offset + 2) = cgnsTurbOmega
+
+        case (ktau)
+            bcVarNames(offset + 1) = cgnsTurbK
+            bcVarNames(offset + 2) = cgnsTurbTau
+
+        case (v2f)
+            bcVarNames(offset + 1) = cgnsTurbK
+            bcVarNames(offset + 2) = cgnsTurbEpsilon
+            bcVarNames(offset + 3) = cgnsTurbV2
+            bcVarNames(offset + 4) = cgnsTurbF
+
+        end select
+
+    end subroutine setBCVarNamesTurb
+    ! ---------------------------------------------------------------
+    ! --------------------------------------
+    !                Utilities
+    ! --------------------------------------
+
+    subroutine computeHtot(tt, ht)
+        !
+        !       computeHtot computes the total enthalpy from the given total
+        !       temperature. The total enthalpy is the integral of cp, which
+        !       is a very simple expression for constant cp. For a variable cp
+        !       it is a bit more work.
+        !
+        use constants
+        use cpCurveFits
+        use communication, only: myid
+        use inputPhysics, only: cpModel, gammaConstant, rGasDim
+        use flowVarRefState, only: PinfDim
 #ifndef USE_TAPENADE
-    use commonFormats, only : stringSci5
+        use commonFormats, only: stringSci5
 #endif
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    real(kind=realType), intent(in)  :: tt
-    real(kind=realType), intent(out) :: ht
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: ii, nn, mm, start
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), intent(in)  :: tt
+        real(kind=realType), intent(out) :: ht
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: ii, nn, mm, start
 
-    real(kind=realType) :: t2
+        real(kind=realType) :: t2
 
-    ! Determine the cp model used in the computation.
+        ! Determine the cp model used in the computation.
 
-    select case (cpModel)
+        select case (cpModel)
 
-    case (cpConstant)
+        case (cpConstant)
 
-       ! Constant cp. The total enthalpy is simply cp*tt.
+            ! Constant cp. The total enthalpy is simply cp*tt.
 
-       ht = gammaConstant*RGasDim*tt/(gammaConstant - one)
+            ht = gammaConstant*RGasDim*tt/(gammaConstant - one)
 
-       !        ================================================================
+            !        ================================================================
 #ifndef USE_TAPENADE
-    case (cpTempCurveFits)
+        case (cpTempCurveFits)
 
-       ! Cp as function of the temperature is given via curve fits.
-       ! The actual integral must be computed.
+            ! Cp as function of the temperature is given via curve fits.
+            ! The actual integral must be computed.
 
-       ! Determine the case we are having here.
+            ! Determine the case we are having here.
 
-       if(tt < cpTrange(0)) then
+            if (tt < cpTrange(0)) then
 
-          ! Temperature is less than the smallest value in the
-          ! curve fits. Print a warning and use extrapolation using
-          ! constant cp.
+                ! Temperature is less than the smallest value in the
+                ! curve fits. Print a warning and use extrapolation using
+                ! constant cp.
 
-          if(myId == 0) then
-             print "(a)", "#"
-             print "(a)", "#                    Warning"
-             print stringSci5, "# Prescribed total temperature ", tt, &
-             " is less than smallest curve fit value, ", cpTrange(0), "."
-             print "(a)", "# Extrapolation with constant cp is used."
-             print "(a)", "#"
+                if (myId == 0) then
+                    print "(a)", "#"
+                    print "(a)", "#                    Warning"
+                    print stringSci5, "# Prescribed total temperature ", tt, &
+                        " is less than smallest curve fit value, ", cpTrange(0), "."
+                    print "(a)", "# Extrapolation with constant cp is used."
+                    print "(a)", "#"
 
-          endif
+                end if
 
-          ht = RGasDim*(cpEint(0) + tt + cv0*(tt - cpTrange(0)))
+                ht = RGasDim*(cpEint(0) + tt + cv0*(tt - cpTrange(0)))
 
-       else if(tt > cpTrange(cpNparts)) then
+            else if (tt > cpTrange(cpNparts)) then
 
-          ! Temperature is larger than the largest value in the
-          ! curve fits. Print a warning and use extrapolation using
-          ! constant cp.
+                ! Temperature is larger than the largest value in the
+                ! curve fits. Print a warning and use extrapolation using
+                ! constant cp.
 
-          if(myId == 0) then
-             print "(a)", "#"
-             print "(a)", "#                    Warning"
-             print stringSci5, "# Prescribed total temperature ", tt, &
-             " is larger than largest curve fit value, ", cpTrange(cpNparts), "."
-             print "(a)", "# Extrapolation with constant cp is used."
-             print "(a)", "#"
-          endif
+                if (myId == 0) then
+                    print "(a)", "#"
+                    print "(a)", "#                    Warning"
+                    print stringSci5, "# Prescribed total temperature ", tt, &
+                        " is larger than largest curve fit value, ", cpTrange(cpNparts), "."
+                    print "(a)", "# Extrapolation with constant cp is used."
+                    print "(a)", "#"
+                end if
 
-          ht = RGasDim*(cpEint(cpNparts) + tt + cvn*(tt - cpTrange(cpNparts)))
+                ht = RGasDim*(cpEint(cpNparts) + tt + cvn*(tt - cpTrange(cpNparts)))
 
-       else
+            else
 
-          ! Temperature is in the curve fit range.
-          ! First find the correct range for this temperature.
+                ! Temperature is in the curve fit range.
+                ! First find the correct range for this temperature.
 
-          ii    = cpNparts
-          start = 1
-          interval: do
+                ii = cpNparts
+                start = 1
+                interval: do
 
-             ! Next guess for the interval.
+                    ! Next guess for the interval.
 
-             nn = start + ii/2
+                    nn = start + ii/2
 
-             ! Determine the situation we are having here.
+                    ! Determine the situation we are having here.
 
-             if(tt > cpTrange(nn)) then
+                    if (tt > cpTrange(nn)) then
 
-                ! Temperature is larger than the upper boundary of
-                ! the current interval. Update the lower boundary.
+                        ! Temperature is larger than the upper boundary of
+                        ! the current interval. Update the lower boundary.
 
-                start = nn + 1
-                ii    = ii - 1
+                        start = nn + 1
+                        ii = ii - 1
 
-             else if(tt >= cpTrange(nn-1)) then
+                    else if (tt >= cpTrange(nn - 1)) then
 
-                ! This is the correct range. Exit the do-loop.
+                        ! This is the correct range. Exit the do-loop.
 
-                exit
+                        exit
 
-             endif
+                    end if
 
-             ! Modify ii for the next branch to search.
+                    ! Modify ii for the next branch to search.
 
-             ii = ii/2
+                    ii = ii/2
 
-          enddo interval
+                end do interval
 
-          ! nn contains the correct curve fit interval.
-          ! Integrate cp to get ht.
+                ! nn contains the correct curve fit interval.
+                ! Integrate cp to get ht.
 
-          ht = cpTempFit(nn)%eint0
-          do ii=1,cpTempFit(nn)%nterm
-             if(cpTempFit(nn)%exponents(ii) == -1_intType) then
-                ht = ht + cpTempFit(nn)%constants(ii)*log(tt)
-             else
-                mm = cpTempFit(nn)%exponents(ii) + 1
-                t2 = tt**mm
-                ht = ht + cpTempFit(nn)%constants(ii)*t2/mm
-             endif
-          enddo
+                ht = cpTempFit(nn)%eint0
+                do ii = 1, cpTempFit(nn)%nterm
+                    if (cpTempFit(nn)%exponents(ii) == -1_intType) then
+                        ht = ht + cpTempFit(nn)%constants(ii)*log(tt)
+                    else
+                        mm = cpTempFit(nn)%exponents(ii) + 1
+                        t2 = tt**mm
+                        ht = ht + cpTempFit(nn)%constants(ii)*t2/mm
+                    end if
+                end do
 
-          ! Multiply ht by RGasDim to obtain the correct
-          ! dimensional value.
+                ! Multiply ht by RGasDim to obtain the correct
+                ! dimensional value.
 
-          ht = RGasDim*ht
+                ht = RGasDim*ht
 
-       endif
+            end if
 #endif
-    end select
-
-  end subroutine computeHtot
-
-
-  subroutine unitVectorsCylSystem(boco)
-    !
-    !       unitVectorsCylSystem determines the unit vectors of the
-    !       local coordinate systen of the boundary face defined by the
-    !       data in BCDataMod. In that local system the axial direction
-    !       is rotation axis.
-    !
-    use constants
-    use blockPointers, only : BCFaceID, BCData, x, si, sj, sk, il, jl, kl, &
-         sectionID
-    use section, only : sections
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: boco
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j
-    real(kind=realType)   :: factInlet, var
-
-    real(kind=realType), dimension(3) :: dir
-
-    real(kind=realType), dimension(:,:,:), pointer :: ss
-
-    ! Set the pointers for coordinates and normals of the block
-    ! face on which this subface is located. Set factInlet
-    ! such that factInlet*normals points into the domain.
-
-    select case (BCFaceID(boco))
-    case (iMin)
-       xf => x(1,:,:,:);  ss => si(1 ,:,:,:); factInlet =  one
-    case (iMax)
-       xf => x(il,:,:,:); ss => si(il,:,:,:); factInlet = -one
-    case (jMin)
-       xf => x(:,1,:,:);  ss => sj(:,1 ,:,:); factInlet =  one
-    case (jMax)
-       xf => x(:,jl,:,:); ss => sj(:,jl,:,:); factInlet = -one
-    case (kMin)
-       xf => x(:,:,1,:);  ss => sk(:,:,1 ,:); factInlet =  one
-    case (kMax)
-       xf => x(:,:,kl,:); ss => sk(:,:,kl,:); factInlet = -one
-    end select
-
-    ! Loop over the physical range of the subface to store the sum of
-    ! the normals. Note that jBeg, jEnd, iBeg, iEnd cannot be used
-    ! here, because they may include the halo faces. Instead the
-    ! nodal range is used, which defines the original subface. The
-    ! offset of +1 in the start index is there because you need
-    ! the face id's.
-
-    dir(1) = zero; dir(2) = zero; dir(3) = zero
-
-    do j=(BCData(boco)%jnBeg+1), BCData(boco)%jnEnd
-       do i=(BCData(boco)%inBeg+1), BCData(boco)%inEnd
-          dir(1) = dir(1) + ss(i,j,1)
-          dir(2) = dir(2) + ss(i,j,2)
-          dir(3) = dir(3) + ss(i,j,3)
-       enddo
-    enddo
-
-    ! Multiply by factInlet to make sure that the normal
-    ! is inward pointing.
-
-    dir(1) = dir(1)*factInlet
-    dir(2) = dir(2)*factInlet
-    dir(3) = dir(3)*factInlet
-
-    ! Determine three unit vectors, which define the local cartesian
-    ! coordinate system of the rotation axis. First the axial
-    ! direction. If the axis cannot be determined from rotation info,
-    ! it is assumed to be the x-axis.
-
-    axis = sections(sectionId)%rotAxis
-    var  = axis(1)**2 + axis(2)**2 + axis(3)**2
-    if(var < half) then
-
-       ! No rotation axis specified. Assume the x-axis
-       ! and set the logical axAssumed to .True.
-
-       axis(1) = one; axis(2) = zero; axis(3) = zero
-       axAssumed = .true.
-    endif
-
-    ! The axial axis must be such that it points into the
-    ! computational domain. If the dot product with dir is
-    ! negative the direction of axis should be reversed.
-
-    var = axis(1)*dir(1) + axis(2)*dir(2) + axis(3)*dir(3)
-    if(var < zero) then
-       axis(1) = -axis(1); axis(2) = -axis(2); axis(3) = -axis(3)
-    endif
-
-    ! Two unit vectors define the radial plane. These vectors are
-    ! defined up to a constants. Just pick a direction for the second
-    ! and create a unit vector normal to axis.
-
-    if(abs(axis(2)) < 0.707107_realType) then
-       radVec1(1) = zero; radVec1(2) = one;  radVec1(3) = zero
-    else
-       radVec1(1) = zero; radVec1(2) = zero; radVec1(3) = one
-    endif
-
-    var = radVec1(1)*axis(1) + radVec1(2)*axis(2) &
-         + radVec1(3)*axis(3)
-    radVec1(1) = radVec1(1) - var*axis(1)
-    radVec1(2) = radVec1(2) - var*axis(2)
-    radVec1(3) = radVec1(3) - var*axis(3)
-
-    var = one/sqrt(radVec1(1)**2 + radVec1(2)**2 &
-         +          radVec1(3)**2)
-    radVec1(1) = radVec1(1)*var
-    radVec1(2) = radVec1(2)*var
-    radVec1(3) = radVec1(3)*var
-
-    ! The second vector of the radial plane is obtained
-    ! by taking the cross product of axis and radVec1.
-
-    radVec2(1) = axis(2)*radVec1(3) - axis(3)*radVec1(2)
-    radVec2(2) = axis(3)*radVec1(1) - axis(1)*radVec1(3)
-    radVec2(3) = axis(1)*radVec1(2) - axis(2)*radVec1(1)
-
-  end subroutine unitVectorsCylSystem
-
-  ! ---------------------------------------------------------------
-  ! Routines that set the actual BCdata values from the CGNS data set
-  ! information.
-  ! ---------------------------------------------------------------
-
-  subroutine BCDataIsothermalWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
-    !
-    !       BCDataIsothermalWall tries to extract the wall temperature
-    !       for the currently active boundary face, which is an isothermal
-    !       viscous wall.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : BCFaceID, BCData, nBKGlobal
-    use utils, only : terminate, siTemperature
-    use flowVarRefState, only : Tref
+        end select
+
+    end subroutine computeHtot
+
+    subroutine unitVectorsCylSystem(boco)
+        !
+        !       unitVectorsCylSystem determines the unit vectors of the
+        !       local coordinate systen of the boundary face defined by the
+        !       data in BCDataMod. In that local system the axial direction
+        !       is rotation axis.
+        !
+        use constants
+        use blockPointers, only: BCFaceID, BCData, x, si, sj, sk, il, jl, kl, &
+                                 sectionID
+        use section, only: sections
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: boco
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j
+        real(kind=realType)   :: factInlet, var
+
+        real(kind=realType), dimension(3) :: dir
+
+        real(kind=realType), dimension(:, :, :), pointer :: ss
+
+        ! Set the pointers for coordinates and normals of the block
+        ! face on which this subface is located. Set factInlet
+        ! such that factInlet*normals points into the domain.
+
+        select case (BCFaceID(boco))
+        case (iMin)
+            xf => x(1, :, :, :); ss => si(1, :, :, :); factInlet = one
+        case (iMax)
+            xf => x(il, :, :, :); ss => si(il, :, :, :); factInlet = -one
+        case (jMin)
+            xf => x(:, 1, :, :); ss => sj(:, 1, :, :); factInlet = one
+        case (jMax)
+            xf => x(:, jl, :, :); ss => sj(:, jl, :, :); factInlet = -one
+        case (kMin)
+            xf => x(:, :, 1, :); ss => sk(:, :, 1, :); factInlet = one
+        case (kMax)
+            xf => x(:, :, kl, :); ss => sk(:, :, kl, :); factInlet = -one
+        end select
+
+        ! Loop over the physical range of the subface to store the sum of
+        ! the normals. Note that jBeg, jEnd, iBeg, iEnd cannot be used
+        ! here, because they may include the halo faces. Instead the
+        ! nodal range is used, which defines the original subface. The
+        ! offset of +1 in the start index is there because you need
+        ! the face id's.
+
+        dir(1) = zero; dir(2) = zero; dir(3) = zero
+
+        do j = (BCData(boco)%jnBeg + 1), BCData(boco)%jnEnd
+            do i = (BCData(boco)%inBeg + 1), BCData(boco)%inEnd
+                dir(1) = dir(1) + ss(i, j, 1)
+                dir(2) = dir(2) + ss(i, j, 2)
+                dir(3) = dir(3) + ss(i, j, 3)
+            end do
+        end do
+
+        ! Multiply by factInlet to make sure that the normal
+        ! is inward pointing.
+
+        dir(1) = dir(1)*factInlet
+        dir(2) = dir(2)*factInlet
+        dir(3) = dir(3)*factInlet
+
+        ! Determine three unit vectors, which define the local cartesian
+        ! coordinate system of the rotation axis. First the axial
+        ! direction. If the axis cannot be determined from rotation info,
+        ! it is assumed to be the x-axis.
+
+        axis = sections(sectionId)%rotAxis
+        var = axis(1)**2 + axis(2)**2 + axis(3)**2
+        if (var < half) then
+
+            ! No rotation axis specified. Assume the x-axis
+            ! and set the logical axAssumed to .True.
+
+            axis(1) = one; axis(2) = zero; axis(3) = zero
+            axAssumed = .true.
+        end if
+
+        ! The axial axis must be such that it points into the
+        ! computational domain. If the dot product with dir is
+        ! negative the direction of axis should be reversed.
+
+        var = axis(1)*dir(1) + axis(2)*dir(2) + axis(3)*dir(3)
+        if (var < zero) then
+            axis(1) = -axis(1); axis(2) = -axis(2); axis(3) = -axis(3)
+        end if
+
+        ! Two unit vectors define the radial plane. These vectors are
+        ! defined up to a constants. Just pick a direction for the second
+        ! and create a unit vector normal to axis.
+
+        if (abs(axis(2)) < 0.707107_realType) then
+            radVec1(1) = zero; radVec1(2) = one; radVec1(3) = zero
+        else
+            radVec1(1) = zero; radVec1(2) = zero; radVec1(3) = one
+        end if
+
+        var = radVec1(1)*axis(1) + radVec1(2)*axis(2) &
+              + radVec1(3)*axis(3)
+        radVec1(1) = radVec1(1) - var*axis(1)
+        radVec1(2) = radVec1(2) - var*axis(2)
+        radVec1(3) = radVec1(3) - var*axis(3)
+
+        var = one/sqrt(radVec1(1)**2 + radVec1(2)**2 &
+                       + radVec1(3)**2)
+        radVec1(1) = radVec1(1)*var
+        radVec1(2) = radVec1(2)*var
+        radVec1(3) = radVec1(3)*var
+
+        ! The second vector of the radial plane is obtained
+        ! by taking the cross product of axis and radVec1.
+
+        radVec2(1) = axis(2)*radVec1(3) - axis(3)*radVec1(2)
+        radVec2(2) = axis(3)*radVec1(1) - axis(1)*radVec1(3)
+        radVec2(3) = axis(1)*radVec1(2) - axis(2)*radVec1(1)
+
+    end subroutine unitVectorsCylSystem
+
+    ! ---------------------------------------------------------------
+    ! Routines that set the actual BCdata values from the CGNS data set
+    ! information.
+    ! ---------------------------------------------------------------
+
+    subroutine BCDataIsothermalWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
+        !
+        !       BCDataIsothermalWall tries to extract the wall temperature
+        !       for the currently active boundary face, which is an isothermal
+        !       viscous wall.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: BCFaceID, BCData, nBKGlobal
+        use utils, only: terminate, siTemperature
+        use flowVarRefState, only: Tref
 #ifndef USE_TAPENADE
-    use commonFormats, only : strings
+        use commonFormats, only: strings
 #endif
-    use inputPhysics, only : useRoughSA
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType) :: boco
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-    real(kind=realType), dimension(iBeg:iEnd,jBeg:jEnd, nbcVarMax) :: bcVarArray
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        use inputPhysics, only: useRoughSA
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType) :: boco
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+        real(kind=realType), dimension(iBeg:iEnd, jBeg:jEnd, nbcVarMax) :: bcVarArray
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-    integer(kind=intType) :: i, j
+        integer(kind=intType) :: i, j
 
-    real(kind=realType) :: mult, trans
+        real(kind=realType) :: mult, trans
 
-    character(len=maxStringLen) :: errorMessage
+        character(len=maxStringLen) :: errorMessage
 
-    ! Write an error message and terminate if it was not
-    ! possible to determine the temperature.
+        ! Write an error message and terminate if it was not
+        ! possible to determine the temperature.
 
 #ifndef USE_TAPENADE
-    if(.not. bcVarPresent(1)) then
+        if (.not. bcVarPresent(1)) then
 
-       write(errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename),", &
-         boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
-         ": Wall temperature not specified for isothermal wall"
+            write (errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename), ", &
+     &         boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
+              ": Wall temperature not specified for isothermal wall"
 
-       call terminate("BCDataIsothermalWall", errorMessage)
+            call terminate("BCDataIsothermalWall", errorMessage)
 
-    endif
+        end if
 #endif
 
-    ! Convert to si-units and store the temperature in TNS_Wall.
+        ! Convert to si-units and store the temperature in TNS_Wall.
 
-    call siTemperature(temp(1), mult, trans)
+        call siTemperature(temp(1), mult, trans)
 
-    do j=jBeg,jEnd
-       do i=iBeg,iEnd
-          BCData(boco)%TNS_Wall(i,j) = (mult*bcVarArray(i,j,1) + trans)/Tref
-       enddo
-    enddo
+        do j = jBeg, jEnd
+            do i = iBeg, iEnd
+                BCData(boco)%TNS_Wall(i, j) = (mult*bcVarArray(i, j, 1) + trans)/Tref
+            end do
+        end do
 
-    ! Set a value of 0 if it was not possible to determine the
-    ! sand grain roughness
-
-    if (useRoughSA) then
         ! Set a value of 0 if it was not possible to determine the
         ! sand grain roughness
-        if(.not. bcVarPresent(1)) then
-           bcVarArray(:,:,2) = zero
-        endif
-
-        do j=jBeg,jEnd
-            do i=iBeg,iEnd
-               BCData(boco)%ksNS_Wall(i,j) = bcVarArray(i,j,2)
-            enddo
-        enddo
-    end if
 
-
-  end subroutine BCDataIsothermalWall
-
-  subroutine BCDataAdiabaticWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
-    !
-    !       Tries to extract the equivalent sand grain roughness. It sets
-    !       a default value of 0.0
-    !
-    use constants
-    use cgnsNames
-    use inputPhysics, only : useRoughSA
-    use blockPointers, only : BCFaceID, BCData, nBKGlobal
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType) :: boco
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-    real(kind=realType), dimension(iBeg:iEnd,jBeg:jEnd, nbcVarMax) :: bcVarArray
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j
-
-
-    if (useRoughSA) then
-        ! Set a value of 0 if it was not possible to determine the
-        ! sand grain roughness
-        if(.not. bcVarPresent(1)) then
-           bcVarArray(:,:,1) = zero
-        endif
-
-        do j=jBeg,jEnd
-            do i=iBeg,iEnd
-               BCData(boco)%ksNS_Wall(i,j) = bcVarArray(i,j,1)
-            enddo
-        enddo
-    end if
-
-  end subroutine BCDataAdiabaticWall
-
-
-  subroutine BCDataSubsonicInflow(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd, allTurbPresent)
-    !
-    !       BCDataSubsonicInflow tries to extract the prescribed data
-    !       for the currently active boundary face, which is a subsonic
-    !       inflow. Either total conditions and velocity direction or the
-    !       velocity and density can be prescribed. In the latter case the
-    !       mass flow is prescribed, which is okay as long as the flow is
-    !       not choked.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : nbkGlobal, sectionID, BCFaceID, BCData
-    use flowVarRefState, only : Tref, Pref, Href, rhoRef, muRef, nwt, wInf
-    use inputPhysics, only : equations
-    use utils, only : siDensity, siVelocity, siPressure, siAngle, &
-         siTemperature, terminate
+        if (useRoughSA) then
+            ! Set a value of 0 if it was not possible to determine the
+            ! sand grain roughness
+            if (.not. bcVarPresent(1)) then
+                bcVarArray(:, :, 2) = zero
+            end if
+
+            do j = jBeg, jEnd
+                do i = iBeg, iEnd
+                    BCData(boco)%ksNS_Wall(i, j) = bcVarArray(i, j, 2)
+                end do
+            end do
+        end if
+
+    end subroutine BCDataIsothermalWall
+
+    subroutine BCDataAdiabaticWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
+        !
+        !       Tries to extract the equivalent sand grain roughness. It sets
+        !       a default value of 0.0
+        !
+        use constants
+        use cgnsNames
+        use inputPhysics, only: useRoughSA
+        use blockPointers, only: BCFaceID, BCData, nBKGlobal
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType) :: boco
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+        real(kind=realType), dimension(iBeg:iEnd, jBeg:jEnd, nbcVarMax) :: bcVarArray
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j
+
+        if (useRoughSA) then
+            ! Set a value of 0 if it was not possible to determine the
+            ! sand grain roughness
+            if (.not. bcVarPresent(1)) then
+                bcVarArray(:, :, 1) = zero
+            end if
+
+            do j = jBeg, jEnd
+                do i = iBeg, iEnd
+                    BCData(boco)%ksNS_Wall(i, j) = bcVarArray(i, j, 1)
+                end do
+            end do
+        end if
+
+    end subroutine BCDataAdiabaticWall
+
+    subroutine BCDataSubsonicInflow(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd, allTurbPresent)
+        !
+        !       BCDataSubsonicInflow tries to extract the prescribed data
+        !       for the currently active boundary face, which is a subsonic
+        !       inflow. Either total conditions and velocity direction or the
+        !       velocity and density can be prescribed. In the latter case the
+        !       mass flow is prescribed, which is okay as long as the flow is
+        !       not choked.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: nbkGlobal, sectionID, BCFaceID, BCData
+        use flowVarRefState, only: Tref, Pref, Href, rhoRef, muRef, nwt, wInf
+        use inputPhysics, only: equations
+        use utils, only: siDensity, siVelocity, siPressure, siAngle, &
+                         siTemperature, terminate
 #ifndef USE_TAPENADE
-    use commonFormats, only : strings
-    use utils, only: returnFail
+        use commonFormats, only: strings
+        use utils, only: returnFail
 #endif
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)    :: boco
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-    real(kind=realType), dimension(iBeg:iEnd,jBeg:jEnd, nbcVarMax) :: bcVarArray
-    logical,               intent(inout) :: allTurbPresent
-    !
-    !      Local variables.
-    !
-    integer :: ierr, nn
-
-    logical :: ptPresent,   ttPresent,   rhotPresent
-    logical :: axPresent,   ayPresent,   azPresent
-    logical :: xdirPresent, ydirPresent, zdirPresent
-    logical :: rdirPresent, tdirPresent
-    logical :: velxPresent, velyPresent, velzPresent
-    logical :: rhoPresent,  velrPresent, veltPresent
-    logical :: totPresent, velPresent, dirPresent
-
-    character(len=maxStringLen) :: errorMessage
-
-    ! Store the logicals, which indicate succes or failure
-    ! a bit more readable.
-
-    ptPresent   = bcVarPresent(1)
-    ttPresent   = bcVarPresent(2)
-    rhotPresent = bcVarPresent(3)
-    axPresent   = bcVarPresent(4)
-    ayPresent   = bcVarPresent(5)
-    azPresent   = bcVarPresent(6)
-    xdirPresent = bcVarPresent(7)
-    ydirPresent = bcVarPresent(8)
-    zdirPresent = bcVarPresent(9)
-    rdirPresent = bcVarPresent(10)
-    tdirPresent = bcVarPresent(11)
-    rhoPresent  = bcVarPresent(12)
-    velxPresent = bcVarPresent(13)
-    velyPresent = bcVarPresent(14)
-    velzPresent = bcVarPresent(15)
-    velrPresent = bcVarPresent(16)
-    veltPresent = bcVarPresent(17)
-
-    ! Check if the total conditions are present.
-
-    nn = 0
-    if( ptPresent )   nn = nn + 1
-    if( ttPresent )   nn = nn + 1
-    if( rhotPresent ) nn = nn + 1
-
-    totPresent = .false.
-    if(nn >= 2) totPresent = .true.
-
-    ! Check if a velocity direction is present.
-
-    dirPresent = .false.
-    if(xdirPresent .and. rdirPresent) dirPresent = .true.
-    if((axPresent .or. xdirPresent) .and. &
-         (ayPresent .or. ydirPresent) .and. &
-         (azPresent .or. zdirPresent))  dirPresent = .true.
-
-    ! Check if a velocity vector is present.
-
-    velPresent = .false.
-    if(velxPresent .and. velrPresent) velPresent = .true.
-    if(velxPresent .and. velyPresent .and. velzPresent) &
-         velPresent = .true.
-
-    ! Determine the situation we have here.
-
-    if(totPresent .and. dirPresent) then
-
-       ! Total conditions and velocity direction are prescribed.
-       ! Determine the values for the faces of the subface.
-
-       call totalSubsonicInlet
-
-    else
-
-       ! Not enough data is prescribed. Print an error message
-       ! and exit.
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in)    :: boco
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+        real(kind=realType), dimension(iBeg:iEnd, jBeg:jEnd, nbcVarMax) :: bcVarArray
+        logical, intent(inout) :: allTurbPresent
+        !
+        !      Local variables.
+        !
+        integer :: ierr, nn
+
+        logical :: ptPresent, ttPresent, rhotPresent
+        logical :: axPresent, ayPresent, azPresent
+        logical :: xdirPresent, ydirPresent, zdirPresent
+        logical :: rdirPresent, tdirPresent
+        logical :: velxPresent, velyPresent, velzPresent
+        logical :: rhoPresent, velrPresent, veltPresent
+        logical :: totPresent, velPresent, dirPresent
+
+        character(len=maxStringLen) :: errorMessage
+
+        ! Store the logicals, which indicate succes or failure
+        ! a bit more readable.
+
+        ptPresent = bcVarPresent(1)
+        ttPresent = bcVarPresent(2)
+        rhotPresent = bcVarPresent(3)
+        axPresent = bcVarPresent(4)
+        ayPresent = bcVarPresent(5)
+        azPresent = bcVarPresent(6)
+        xdirPresent = bcVarPresent(7)
+        ydirPresent = bcVarPresent(8)
+        zdirPresent = bcVarPresent(9)
+        rdirPresent = bcVarPresent(10)
+        tdirPresent = bcVarPresent(11)
+        rhoPresent = bcVarPresent(12)
+        velxPresent = bcVarPresent(13)
+        velyPresent = bcVarPresent(14)
+        velzPresent = bcVarPresent(15)
+        velrPresent = bcVarPresent(16)
+        veltPresent = bcVarPresent(17)
+
+        ! Check if the total conditions are present.
+
+        nn = 0
+        if (ptPresent) nn = nn + 1
+        if (ttPresent) nn = nn + 1
+        if (rhotPresent) nn = nn + 1
+
+        totPresent = .false.
+        if (nn >= 2) totPresent = .true.
+
+        ! Check if a velocity direction is present.
+
+        dirPresent = .false.
+        if (xdirPresent .and. rdirPresent) dirPresent = .true.
+        if ((axPresent .or. xdirPresent) .and. &
+            (ayPresent .or. ydirPresent) .and. &
+            (azPresent .or. zdirPresent)) dirPresent = .true.
+
+        ! Check if a velocity vector is present.
+
+        velPresent = .false.
+        if (velxPresent .and. velrPresent) velPresent = .true.
+        if (velxPresent .and. velyPresent .and. velzPresent) &
+            velPresent = .true.
+
+        ! Determine the situation we have here.
+
+        if (totPresent .and. dirPresent) then
+
+            ! Total conditions and velocity direction are prescribed.
+            ! Determine the values for the faces of the subface.
+
+            call totalSubsonicInlet
+
+        else
+
+            ! Not enough data is prescribed. Print an error message
+            ! and exit.
 
 #ifndef USE_TAPENADE
-       write(errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename), &
-         ", boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
-         ": Not enough data specified for subsonic inlet"
+            write (errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename), &
+                ", boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
+                ": Not enough data specified for subsonic inlet"
 
-       call terminate("BCDataSubsonicInflow", errorMessage)
+            call terminate("BCDataSubsonicInflow", errorMessage)
 #endif
 
-    endif
+        end if
 
-    ! Set the turbulence variables and check if all of them are
-    ! prescribed. If not set allTurbPresent to .false.
+        ! Set the turbulence variables and check if all of them are
+        ! prescribed. If not set allTurbPresent to .false.
 
-    allTurbPresent = setBcVarTurb(17_intType, boco, bcVarArray, &
-         iBeg, iEnd, jBeg, jEnd, BCData(boco)%turbInlet)
+        allTurbPresent = setBcVarTurb(17_intType, boco, bcVarArray, &
+                                      iBeg, iEnd, jBeg, jEnd, BCData(boco)%turbInlet)
 
-    !=================================================================
+        !=================================================================
 
-  contains
+    contains
 
-    !===============================================================
+        !===============================================================
 
-    subroutine totalSubsonicInlet
-      !
-      !         TotalSubsonicInlet converts the prescribed total
-      !         conditions and velocity direction into a useable format.
-      !
-      use constants
-      use communication, only : adflow_comm_world
-      use inputPhysics, only : RGasDim
-      use section, only : sections
+        subroutine totalSubsonicInlet
+            !
+            !         TotalSubsonicInlet converts the prescribed total
+            !         conditions and velocity direction into a useable format.
+            !
+            use constants
+            use communication, only: adflow_comm_world
+            use inputPhysics, only: RGasDim
+            use section, only: sections
 #ifndef USE_TAPENADE
-      use commonFormats, only : strings
-      use utils, only: returnFail
+            use commonFormats, only: strings
+            use utils, only: returnFail
 #endif
-      implicit none
-      !
-      !        Local variables.
-      !
-      integer(kind=intType) :: i, j, nn
+            implicit none
+            !
+            !        Local variables.
+            !
+            integer(kind=intType) :: i, j, nn
 
-      real(kind=realType) :: rhot, mult, trans, Hdim, Tdim
-      real(kind=realType) :: ax, r1, r2, var, wax, wrad, wtheta
+            real(kind=realType) :: rhot, mult, trans, Hdim, Tdim
+            real(kind=realType) :: ax, r1, r2, var, wax, wrad, wtheta
 
-      real(kind=realType), dimension(3) :: xc, dir
+            real(kind=realType), dimension(3) :: xc, dir
 
-      integer :: ierr
+            integer :: ierr
 
-      ! Set the subsonic inlet treatment to totalConditions.
+            ! Set the subsonic inlet treatment to totalConditions.
 
-      BCData(boco)%subsonicInletTreatment = totalConditions
+            BCData(boco)%subsonicInletTreatment = totalConditions
 
-      ! If the total pressure is present, convert it to SI-units and
-      ! store it.
+            ! If the total pressure is present, convert it to SI-units and
+            ! store it.
 
-      if( ptPresent ) then
-         call siPressure(mass(1), length(1), time(1), mult, trans)
+            if (ptPresent) then
+                call siPressure(mass(1), length(1), time(1), mult, trans)
 
-         do j=jBeg,jEnd
-            do i=iBeg,iEnd
-               BCData(boco)%ptInlet(i,j) = (mult*bcVarArray(i,j,1) &
-                    + trans)/Pref
-            enddo
-         enddo
-      endif
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        BCData(boco)%ptInlet(i, j) = (mult*bcVarArray(i, j, 1) &
+                                                      + trans)/Pref
+                    end do
+                end do
+            end if
 
-      ! If the total temperature is present, convert it to SI-units
-      ! and store it.
+            ! If the total temperature is present, convert it to SI-units
+            ! and store it.
 
-      if( ttPresent ) then
-         call siTemperature(temp(2), mult, trans)
+            if (ttPresent) then
+                call siTemperature(temp(2), mult, trans)
 
-         do j=jBeg,jEnd
-            do i=iBeg,iEnd
-               BCData(boco)%ttInlet(i,j) = (mult*bcVarArray(i,j,2) &
-                    + trans)/Tref
-            enddo
-         enddo
-      endif
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        BCData(boco)%ttInlet(i, j) = (mult*bcVarArray(i, j, 2) &
+                                                      + trans)/Tref
+                    end do
+                end do
+            end if
 
-      ! Check if the total density is present. If so, it may be used
-      ! to determine the total temperature or pressure if one of these
-      ! variables was not specified.
+            ! Check if the total density is present. If so, it may be used
+            ! to determine the total temperature or pressure if one of these
+            ! variables was not specified.
 
-      if( rhotPresent ) then
-         call siDensity(mass(3), length(3), mult, trans)
+            if (rhotPresent) then
+                call siDensity(mass(3), length(3), mult, trans)
 
-         if(ptPresent .and. (.not. ttPresent)) then
+                if (ptPresent .and. (.not. ttPresent)) then
 
-            ! Total pressure is present but total temperature is not.
-            ! Convert the total density to SI-units and use the perfect
-            ! gas law to obtain the total temperature.
+                    ! Total pressure is present but total temperature is not.
+                    ! Convert the total density to SI-units and use the perfect
+                    ! gas law to obtain the total temperature.
 
-            do j=jBeg,jEnd
-               do i=iBeg,iEnd
-                  rhot = mult*bcVarArray(i,j,3) + trans
-                  BCData(boco)%ttInlet(i,j) = &
-                       (BCData(boco)%ptInlet(i,j)*pRef/(RGasDim*rhot))/Tref
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            rhot = mult*bcVarArray(i, j, 3) + trans
+                            BCData(boco)%ttInlet(i, j) = &
+                                (BCData(boco)%ptInlet(i, j)*pRef/(RGasDim*rhot))/Tref
 
-               enddo
-            enddo
+                        end do
+                    end do
 
-         else if(ttPresent .and. (.not. ptPresent)) then
+                else if (ttPresent .and. (.not. ptPresent)) then
 
-            ! Total temperature is present but total pressure is not.
-            ! Convert the total density to SI-units and use the perfect
-            ! gas law to obtain the total pressure.
+                    ! Total temperature is present but total pressure is not.
+                    ! Convert the total density to SI-units and use the perfect
+                    ! gas law to obtain the total pressure.
 
-            do j=jBeg,jEnd
-               do i=iBeg,iEnd
-                  rhot = mult*bcVarArray(i,j,3) + trans
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            rhot = mult*bcVarArray(i, j, 3) + trans
 
-                  BCData(boco)%ptInlet(i,j) = (RGasDim*rhot &
-                       * BCData(boco)%ttInlet(i,j)*Tref)/Pref
-               enddo
-            enddo
+                            BCData(boco)%ptInlet(i, j) = (RGasDim*rhot &
+                                                          *BCData(boco)%ttInlet(i, j)*Tref)/Pref
+                        end do
+                    end do
 
-         endif
-      endif
+                end if
+            end if
 
-      ! Determine the velocity direction. There are multiple
-      ! possibilities to specify this direction.
+            ! Determine the velocity direction. There are multiple
+            ! possibilities to specify this direction.
 
-      radialTest: if( rdirPresent ) then
+            radialTest: if (rdirPresent) then
 
-         ! Radial direction specified, i.e. a cylindrical coordinate
-         ! system is used for the velocity direction.
+                ! Radial direction specified, i.e. a cylindrical coordinate
+                ! system is used for the velocity direction.
 
-         ! Determine the unit vectors, which define the cylindrical
-         ! coordinate system aligned with the rotation axis.
+                ! Determine the unit vectors, which define the cylindrical
+                ! coordinate system aligned with the rotation axis.
 
-         call unitVectorsCylSystem(boco)
+                call unitVectorsCylSystem(boco)
 
-         ! Initialize wtheta to zero. This value will be used if no
-         ! theta velocity component was specified.
+                ! Initialize wtheta to zero. This value will be used if no
+                ! theta velocity component was specified.
 
-         wtheta = zero
+                wtheta = zero
 
-         ! Loop over the faces of the subface.
+                ! Loop over the faces of the subface.
 
-         do j=jBeg,jEnd
-            do i=iBeg,iEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
 
-               ! Determine the coordinates of the face center relative to
-               ! the rotation point of this section. Normally this is an
-               ! average of i-1, i, j-1, j, but due to the usage of the
-               ! pointer xf and the fact that x originally starts at 0,
-               ! an offset of 1 is introduced and thus the average should
-               ! be taken of i, i+1, j and j+1.
+                        ! Determine the coordinates of the face center relative to
+                        ! the rotation point of this section. Normally this is an
+                        ! average of i-1, i, j-1, j, but due to the usage of the
+                        ! pointer xf and the fact that x originally starts at 0,
+                        ! an offset of 1 is introduced and thus the average should
+                        ! be taken of i, i+1, j and j+1.
 
-               xc(1) = fourth*(xf(i,j,  1) + xf(i+1,j,  1)  &
-                    +         xf(i,j+1,1) + xf(i+1,j+1,1)) &
-                    - sections(sectionId)%rotCenter(1)
-               xc(2) = fourth*(xf(i,j,  2) + xf(i+1,j,  2)  &
-                    +         xf(i,j+1,2) + xf(i+1,j+1,2)) &
-                    - sections(sectionId)%rotCenter(2)
-               xc(3) = fourth*(xf(i,j,  3) + xf(i+1,j,  3)  &
-                    +         xf(i,j+1,3) + xf(i+1,j+1,3)) &
-                    - sections(sectionId)%rotCenter(3)
+                        xc(1) = fourth*(xf(i, j, 1) + xf(i + 1, j, 1) &
+                                        + xf(i, j + 1, 1) + xf(i + 1, j + 1, 1)) &
+                                - sections(sectionId)%rotCenter(1)
+                        xc(2) = fourth*(xf(i, j, 2) + xf(i + 1, j, 2) &
+                                        + xf(i, j + 1, 2) + xf(i + 1, j + 1, 2)) &
+                                - sections(sectionId)%rotCenter(2)
+                        xc(3) = fourth*(xf(i, j, 3) + xf(i + 1, j, 3) &
+                                        + xf(i, j + 1, 3) + xf(i + 1, j + 1, 3)) &
+                                - sections(sectionId)%rotCenter(3)
 
-               ! Determine the coordinates in the local cartesian frame,
-               ! i.e. the frame determined by axis, radVec1 and radVec2.
+                        ! Determine the coordinates in the local cartesian frame,
+                        ! i.e. the frame determined by axis, radVec1 and radVec2.
 
-               ax = xc(1)*axis(1)    + xc(2)*axis(2)    &
-                    + xc(3)*axis(3)
-               r1 = xc(1)*radVec1(1) + xc(2)*radVec1(2) &
-                    + xc(3)*radVec1(3)
-               r2 = xc(1)*radVec2(1) + xc(2)*radVec2(2) &
-                    + xc(3)*radVec2(3)
+                        ax = xc(1)*axis(1) + xc(2)*axis(2) &
+                             + xc(3)*axis(3)
+                        r1 = xc(1)*radVec1(1) + xc(2)*radVec1(2) &
+                             + xc(3)*radVec1(3)
+                        r2 = xc(1)*radVec2(1) + xc(2)*radVec2(2) &
+                             + xc(3)*radVec2(3)
 
-               ! Determine the weights of the unit vectors in the local
-               ! cylindrical system.
+                        ! Determine the weights of the unit vectors in the local
+                        ! cylindrical system.
 
-               wax  = bcVarArray(i,j,7)
-               wrad = bcVarArray(i,j,10)
-               if( tdirPresent ) wtheta = bcVarArray(i,j,11)
+                        wax = bcVarArray(i, j, 7)
+                        wrad = bcVarArray(i, j, 10)
+                        if (tdirPresent) wtheta = bcVarArray(i, j, 11)
 
-               ! Determine the direction in the local cartesian frame,
-               ! determined by axis, radVec1 and radVec2.
+                        ! Determine the direction in the local cartesian frame,
+                        ! determined by axis, radVec1 and radVec2.
 
-               var    = one/sqrt(max(eps,(r1*r1 + r2*r2)))
-               dir(1) = wax
-               dir(2) = var*(wrad*r1 - wtheta*r2)
-               dir(3) = var*(wrad*r2 + wtheta*r1)
+                        var = one/sqrt(max(eps, (r1*r1 + r2*r2)))
+                        dir(1) = wax
+                        dir(2) = var*(wrad*r1 - wtheta*r2)
+                        dir(3) = var*(wrad*r2 + wtheta*r1)
 
-               ! Transform this direction to the global cartesian frame.
+                        ! Transform this direction to the global cartesian frame.
 
-               BCData(boco)%flowXdirInlet(i,j) = dir(1)*axis(1)     &
-                    + dir(2)*radVec1(1) &
-                    + dir(3)*radVec2(1)
+                        BCData(boco)%flowXdirInlet(i, j) = dir(1)*axis(1) &
+                                                           + dir(2)*radVec1(1) &
+                                                           + dir(3)*radVec2(1)
 
-               BCData(boco)%flowYdirInlet(i,j) = dir(1)*axis(2)     &
-                    + dir(2)*radVec1(2) &
-                    + dir(3)*radVec2(2)
+                        BCData(boco)%flowYdirInlet(i, j) = dir(1)*axis(2) &
+                                                           + dir(2)*radVec1(2) &
+                                                           + dir(3)*radVec2(2)
 
-               BCData(boco)%flowZdirInlet(i,j) = dir(1)*axis(3)     &
-                    + dir(2)*radVec1(3) &
-                    + dir(3)*radVec2(3)
-            enddo
-         enddo
+                        BCData(boco)%flowZdirInlet(i, j) = dir(1)*axis(3) &
+                                                           + dir(2)*radVec1(3) &
+                                                           + dir(3)*radVec2(3)
+                    end do
+                end do
 
-      else radialTest
+                else radialTest
 
-         ! Cartesian direction specified. Either the angle or the
-         ! direction should be present.
+                ! Cartesian direction specified. Either the angle or the
+                ! direction should be present.
 
-         ! X-direction.
+                ! X-direction.
 
-         if( axPresent ) then
+                if (axPresent) then
 
-            ! Angle specified. Convert it to SI-units and determine
-            ! the corresponding direction.
+                    ! Angle specified. Convert it to SI-units and determine
+                    ! the corresponding direction.
 
-            call siAngle(angle(4), mult, trans)
+                    call siAngle(angle(4), mult, trans)
 
-            do j=jBeg,jEnd
-               do i=iBeg,iEnd
-                  BCData(boco)%flowXdirInlet(i,j) = &
-                       cos(mult*bcVarArray(i,j,4) + trans)
-               enddo
-            enddo
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            BCData(boco)%flowXdirInlet(i, j) = &
+                                cos(mult*bcVarArray(i, j, 4) + trans)
+                        end do
+                    end do
 
-         else
+                else
 
-            ! Direction specified. Simply copy it.
+                    ! Direction specified. Simply copy it.
 
-            do j=jBeg,jEnd
-               do i=iBeg,iEnd
-                  BCData(boco)%flowXdirInlet(i,j) = bcVarArray(i,j,7)
-               enddo
-            enddo
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            BCData(boco)%flowXdirInlet(i, j) = bcVarArray(i, j, 7)
+                        end do
+                    end do
 
-         endif
+                end if
 
-         ! Y-direction.
+                ! Y-direction.
 
-         if( ayPresent ) then
+                if (ayPresent) then
 
-            ! Angle specified. Convert it to SI-units and determine
-            ! the corresponding direction.
+                    ! Angle specified. Convert it to SI-units and determine
+                    ! the corresponding direction.
 
-            call siAngle(angle(5), mult, trans)
+                    call siAngle(angle(5), mult, trans)
 
-            do j=jBeg,jEnd
-               do i=iBeg,iEnd
-                  BCData(boco)%flowYdirInlet(i,j) = &
-                       cos(mult*bcVarArray(i,j,5) + trans)
-               enddo
-            enddo
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            BCData(boco)%flowYdirInlet(i, j) = &
+                                cos(mult*bcVarArray(i, j, 5) + trans)
+                        end do
+                    end do
 
-         else
+                else
 
-            ! Direction specified. Simply copy it.
+                    ! Direction specified. Simply copy it.
 
-            do j=jBeg,jEnd
-               do i=iBeg,iEnd
-                  BCData(boco)%flowYdirInlet(i,j) = bcVarArray(i,j,8)
-               enddo
-            enddo
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            BCData(boco)%flowYdirInlet(i, j) = bcVarArray(i, j, 8)
+                        end do
+                    end do
 
-         endif
+                end if
 
-         ! Z-direction.
+                ! Z-direction.
 
-         if( azPresent ) then
+                if (azPresent) then
 
-            ! Angle specified. Convert it to SI-units and determine
-            ! the corresponding direction.
+                    ! Angle specified. Convert it to SI-units and determine
+                    ! the corresponding direction.
 
-            call siAngle(angle(6), mult, trans)
+                    call siAngle(angle(6), mult, trans)
 
-            do j=jBeg,jEnd
-               do i=iBeg,iEnd
-                  BCData(boco)%flowZdirInlet(i,j) = &
-                       cos(mult*bcVarArray(i,j,6) + trans)
-               enddo
-            enddo
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            BCData(boco)%flowZdirInlet(i, j) = &
+                                cos(mult*bcVarArray(i, j, 6) + trans)
+                        end do
+                    end do
 
-         else
+                else
 
-            ! Direction specified. Simply copy it.
+                    ! Direction specified. Simply copy it.
 
-            do j=jBeg,jEnd
-               do i=iBeg,iEnd
-                  BCData(boco)%flowZdirInlet(i,j) = bcVarArray(i,j,9)
-               enddo
-            enddo
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            BCData(boco)%flowZdirInlet(i, j) = bcVarArray(i, j, 9)
+                        end do
+                    end do
 
-         endif
+                end if
 
-      endif radialTest
+            end if radialTest
 
-      ! Loop over the faces of the subface to compute some
-      ! additional info.
+            ! Loop over the faces of the subface to compute some
+            ! additional info.
 
-      do j=jBeg,jEnd
-         do i=iBeg,iEnd
+            do j = jBeg, jEnd
+                do i = iBeg, iEnd
 
-            ! Compute the total enthalpy from the given
-            ! total temperature.
-            TDim = BCData(boco)%ttInlet(i,j)*Tref
-            call computeHtot(TDim, Hdim)
-            BCData(boco)%htInlet(i,j) = Hdim/Href
+                    ! Compute the total enthalpy from the given
+                    ! total temperature.
+                    TDim = BCData(boco)%ttInlet(i, j)*Tref
+                    call computeHtot(TDim, Hdim)
+                    BCData(boco)%htInlet(i, j) = Hdim/Href
 
-            ! Determine the unit vector of the flow direction.
+                    ! Determine the unit vector of the flow direction.
 
-            dir(1) = BCData(boco)%flowXdirInlet(i,j)
-            dir(2) = BCData(boco)%flowYdirInlet(i,j)
-            dir(3) = BCData(boco)%flowZdirInlet(i,j)
+                    dir(1) = BCData(boco)%flowXdirInlet(i, j)
+                    dir(2) = BCData(boco)%flowYdirInlet(i, j)
+                    dir(3) = BCData(boco)%flowZdirInlet(i, j)
 
-            var = one/max(eps,sqrt(dir(1)**2 + dir(2)**2 + dir(3)**2))
+                    var = one/max(eps, sqrt(dir(1)**2 + dir(2)**2 + dir(3)**2))
 
-            BCData(boco)%flowXdirInlet(i,j) = var*dir(1)
-            BCData(boco)%flowYdirInlet(i,j) = var*dir(2)
-            BCData(boco)%flowZdirInlet(i,j) = var*dir(3)
+                    BCData(boco)%flowXdirInlet(i, j) = var*dir(1)
+                    BCData(boco)%flowYdirInlet(i, j) = var*dir(2)
+                    BCData(boco)%flowZdirInlet(i, j) = var*dir(3)
 
-         enddo
-      enddo
+                end do
+            end do
 
-      ! Check if the prescribed direction is an inflow. No halo's
-      ! should be included here and therefore the nodal range
-      ! (with an offset) must be used.
+            ! Check if the prescribed direction is an inflow. No halo's
+            ! should be included here and therefore the nodal range
+            ! (with an offset) must be used.
 
-      nn = 0
-      do j=(BCData(boco)%jnbeg+1), BCData(boco)%jnend
-         do i=(BCData(boco)%inbeg+1), BCData(boco)%inend
+            nn = 0
+            do j = (BCData(boco)%jnbeg + 1), BCData(boco)%jnend
+                do i = (BCData(boco)%inbeg + 1), BCData(boco)%inend
 
-            var = BCData(boco)%flowXdirInlet(i,j) &
-                 * BCData(boco)%norm(i,j,1)          &
-                 + BCData(boco)%flowYdirInlet(i,j) &
-                 * BCData(boco)%norm(i,j,2)          &
-                 + BCData(boco)%flowZdirInlet(i,j) &
-                 * BCData(boco)%norm(i,j,3)
+                    var = BCData(boco)%flowXdirInlet(i, j) &
+                          *BCData(boco)%norm(i, j, 1) &
+                          + BCData(boco)%flowYdirInlet(i, j) &
+                          *BCData(boco)%norm(i, j, 2) &
+                          + BCData(boco)%flowZdirInlet(i, j) &
+                          *BCData(boco)%norm(i, j, 3)
 
-            if(var > zero) nn = nn + 1
+                    if (var > zero) nn = nn + 1
 
-         enddo
-      enddo
+                end do
+            end do
 #ifndef USE_TAPENADE
-      if(nn > 0) then
-         write(errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename), &
-           ", subsonic inlet boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
-           ": Flow direction points out of the domain for some faces."
-
-          ! Call returnFail if the flow direction is pointing out of a
-          ! BC domain.  This will be caught by an allreduce in the
-          ! python layer.
-          call returnFail("totalSubsonicInlet", errorMessage)
-      endif
+            if (nn > 0) then
+                write (errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename), &
+                    ", subsonic inlet boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
+                    ": Flow direction points out of the domain for some faces."
+
+                ! Call returnFail if the flow direction is pointing out of a
+                ! BC domain.  This will be caught by an allreduce in the
+                ! python layer.
+                call returnFail("totalSubsonicInlet", errorMessage)
+            end if
 #endif
-    end subroutine totalSubsonicInlet
-
-  end subroutine BCDataSubsonicInflow
-
-  subroutine BCDataSubsonicOutflow(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
-    !
-    !       BCDataSubsonicOutflow tries to extract the static pressure
-    !       for the currently active boundary face, which is a subsonic
-    !       outflow boundary.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : BCData, nbkGlobal, BCFaceID
-    use utils, only : terminate, siPressure
-    use flowVarRefState, only : pRef
+        end subroutine totalSubsonicInlet
+
+    end subroutine BCDataSubsonicInflow
+
+    subroutine BCDataSubsonicOutflow(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
+        !
+        !       BCDataSubsonicOutflow tries to extract the static pressure
+        !       for the currently active boundary face, which is a subsonic
+        !       outflow boundary.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: BCData, nbkGlobal, BCFaceID
+        use utils, only: terminate, siPressure
+        use flowVarRefState, only: pRef
 #ifndef USE_TAPENADE
-    use commonFormats, only : strings
+        use commonFormats, only: strings
 #endif
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType) :: boco
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-    real(kind=realType), dimension(iBeg:iEnd,jBeg:jEnd, nbcVarMax) :: bcVarArray
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType) :: boco
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+        real(kind=realType), dimension(iBeg:iEnd, jBeg:jEnd, nbcVarMax) :: bcVarArray
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-    integer(kind=intType) :: i, j
+        integer(kind=intType) :: i, j
 
-    real(kind=realType) :: mult, trans
+        real(kind=realType) :: mult, trans
 
-    character(len=maxStringLen) :: errorMessage
+        character(len=maxStringLen) :: errorMessage
 
-    ! Write an error message and terminate if it was not
-    ! possible to determine the static pressure.
+        ! Write an error message and terminate if it was not
+        ! possible to determine the static pressure.
 
 #ifndef USE_TAPENADE
-    if(.not. bcVarPresent(1)) then
+        if (.not. bcVarPresent(1)) then
 
-       write(errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename), &
-         ", boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
-         ": Static pressure not specified for subsonic outlet"
+            write (errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename), &
+                ", boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
+                ": Static pressure not specified for subsonic outlet"
 
-       call terminate("BCDataSubsonicOutflow", errorMessage)
+            call terminate("BCDataSubsonicOutflow", errorMessage)
 
-    endif
+        end if
 #endif
 
-    ! Convert to SI-units and store the pressure in ps.
-
-    call siPressure(mass(1), length(1), time(1), mult, trans)
-    do j=jBeg,jEnd
-       do i=iBeg,iEnd
-          BCData(boco)%ps(i,j) = (mult*bcVarArray(i,j,1) + trans)/Pref
-       enddo
-    enddo
-
-  end subroutine BCDataSubsonicOutflow
-
-  subroutine BCDataSupersonicInflow(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd, &
-       allFlowPresent, allTurbPresent)
-    !
-    !       BCDataSupersonicInflow tries to extract the primitive state
-    !       vector for the currently active boundary face, which is a
-    !       supersonic inflow.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : BCData, nbkGlobal, BCFaceID, sectionID
-    use flowVarRefState, only : nwt, pInfCorr, wInf, uRef, rhoRef, pRef, muRef
-    use inputPhysics, onlY : equations, flowType, velDirFreeStream
-    use utils, only : siDensity, siPressure, siVelocity, siTemperature, terminate
+        ! Convert to SI-units and store the pressure in ps.
+
+        call siPressure(mass(1), length(1), time(1), mult, trans)
+        do j = jBeg, jEnd
+            do i = iBeg, iEnd
+                BCData(boco)%ps(i, j) = (mult*bcVarArray(i, j, 1) + trans)/Pref
+            end do
+        end do
+
+    end subroutine BCDataSubsonicOutflow
+
+    subroutine BCDataSupersonicInflow(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd, &
+                                      allFlowPresent, allTurbPresent)
+        !
+        !       BCDataSupersonicInflow tries to extract the primitive state
+        !       vector for the currently active boundary face, which is a
+        !       supersonic inflow.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: BCData, nbkGlobal, BCFaceID, sectionID
+        use flowVarRefState, only: nwt, pInfCorr, wInf, uRef, rhoRef, pRef, muRef
+        use inputPhysics, onlY: equations, flowType, velDirFreeStream
+        use utils, only: siDensity, siPressure, siVelocity, siTemperature, terminate
 #ifndef USE_TAPENADE
-    use commonFormats, only : strings
-    use utils, only: returnFail
+        use commonFormats, only: strings
+        use utils, only: returnFail
 #endif
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: boco
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-    real(kind=realType), dimension(iBeg:iEnd,jBeg:jEnd, nbcVarMax) :: bcVarArray
-    logical,               intent(inout) :: allFlowPresent
-    logical,               intent(inout) :: allTurbPresent
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: boco
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+        real(kind=realType), dimension(iBeg:iEnd, jBeg:jEnd, nbcVarMax) :: bcVarArray
+        logical, intent(inout) :: allFlowPresent
+        logical, intent(inout) :: allTurbPresent
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-    integer(kind=intType) :: i, j, nn
+        integer(kind=intType) :: i, j, nn
 
-    real(kind=realType) :: var
+        real(kind=realType) :: var
 
-    character(len=maxStringLen) :: errorMessage
+        character(len=maxStringLen) :: errorMessage
 
-    logical :: rhoPresent,  pPresent,    velPresent
-    logical :: velxPresent, velyPresent, velzPresent
-    logical :: velrPresent, veltPresent
+        logical :: rhoPresent, pPresent, velPresent
+        logical :: velxPresent, velyPresent, velzPresent
+        logical :: velrPresent, veltPresent
 
-    ! Store the logicals, which indicate success or failure
-    ! a bit more readable.
+        ! Store the logicals, which indicate success or failure
+        ! a bit more readable.
 
-    rhoPresent  = bcVarPresent(1)
-    pPresent    = bcVarPresent(2)
-    velxPresent = bcVarPresent(3)
-    velyPresent = bcVarPresent(4)
-    velzPresent = bcVarPresent(5)
-    velrPresent = bcVarPresent(6)
-    veltPresent = bcVarPresent(7)
+        rhoPresent = bcVarPresent(1)
+        pPresent = bcVarPresent(2)
+        velxPresent = bcVarPresent(3)
+        velyPresent = bcVarPresent(4)
+        velzPresent = bcVarPresent(5)
+        velrPresent = bcVarPresent(6)
+        veltPresent = bcVarPresent(7)
 
-    ! Check if a velocity vector is present.
+        ! Check if a velocity vector is present.
 
-    velPresent = .false.
-    if(velxPresent .and. velrPresent) velPresent = .true.
-    if(velxPresent .and. velyPresent .and. velzPresent) &
-         velPresent = .true.
+        velPresent = .false.
+        if (velxPresent .and. velrPresent) velPresent = .true.
+        if (velxPresent .and. velyPresent .and. velzPresent) &
+            velPresent = .true.
 
-    ! Check if rho, p and the velocity vector are present.
+        ! Check if rho, p and the velocity vector are present.
 
-    testPresent: if(rhoPresent .and. pPresent .and. velPresent) then
+        testPresent: if (rhoPresent .and. pPresent .and. velPresent) then
 
-       ! All the variables needed are prescribed. Set them.
+            ! All the variables needed are prescribed. Set them.
 
-       call prescribedSupersonicInlet
+            call prescribedSupersonicInlet
 
-    else testPresent
+            else testPresent
 
-       ! Not all variables are present. Check what type of flow
-       ! is to be solved.
+            ! Not all variables are present. Check what type of flow
+            ! is to be solved.
 
-       select case(flowType)
+            select case (flowType)
 
-       case (internalFlow)
+            case (internalFlow)
 
-          ! Internal flow. Data at the inlet must be specified;
-          ! no free stream data can be taken.
+                ! Internal flow. Data at the inlet must be specified;
+                ! no free stream data can be taken.
 
 #ifndef USE_TAPENADE
-          write(errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename), &
-            ", boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
-            ": Not enough data specified for supersonic inlet"
+                write (errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename), &
+                    ", boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
+                    ": Not enough data specified for supersonic inlet"
 
-          call terminate("BCDataSupersonicInflow", errorMessage)
+                call terminate("BCDataSupersonicInflow", errorMessage)
 #endif
 
-          !=============================================================
+                !=============================================================
 
-       case (externalFlow)
+            case (externalFlow)
 
-          ! External flow. Free stream data is used.
+                ! External flow. Free stream data is used.
 
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                BCData(boco)%rho(i,j)  = wInf(iRho)
-                BCData(boco)%velx(i,j) = wInf(ivx)
-                BCData(boco)%vely(i,j) = wInf(ivy)
-                BCData(boco)%velz(i,j) = wInf(ivz)
-                BCData(boco)%ps(i,j)   = PinfCorr
-             enddo
-          enddo
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        BCData(boco)%rho(i, j) = wInf(iRho)
+                        BCData(boco)%velx(i, j) = wInf(ivx)
+                        BCData(boco)%vely(i, j) = wInf(ivy)
+                        BCData(boco)%velz(i, j) = wInf(ivz)
+                        BCData(boco)%ps(i, j) = PinfCorr
+                    end do
+                end do
 
-          ! Set the turbulence values
-          allTurbPresent = setBCVarTurb(7_intType, boco, bcVarArray, &
-               iBeg, iEnd, jBeg, jEnd, BCData(boco)%turbInlet)
+                ! Set the turbulence values
+                allTurbPresent = setBCVarTurb(7_intType, boco, bcVarArray, &
+                                              iBeg, iEnd, jBeg, jEnd, BCData(boco)%turbInlet)
 
-          ! Set allFlowPresent to .false.
+                ! Set allFlowPresent to .false.
 
-          allFlowPresent = .false.
+                allFlowPresent = .false.
 
-       end select
+            end select
 
-    endif testPresent
+        end if testPresent
 
 #ifndef USE_TAPENADE
-    ! Check if the prescribed velocity is an inflow. No halo's
-    ! should be included here and therefore the nodal range
-    ! (with an offset) must be used.
+        ! Check if the prescribed velocity is an inflow. No halo's
+        ! should be included here and therefore the nodal range
+        ! (with an offset) must be used.
 
-    nn = 0
-    do j=(BCData(boco)%jnbeg+1), BCData(boco)%jnend
-       do i=(BCData(boco)%inbeg+1), BCData(boco)%inend
+        nn = 0
+        do j = (BCData(boco)%jnbeg + 1), BCData(boco)%jnend
+            do i = (BCData(boco)%inbeg + 1), BCData(boco)%inend
 
-          var = BCData(boco)%velx(i,j)*BCData(boco)%norm(i,j,1) &
-               + BCData(boco)%vely(i,j)*BCData(boco)%norm(i,j,2) &
-               + BCData(boco)%velz(i,j)*BCData(boco)%norm(i,j,3)
+                var = BCData(boco)%velx(i, j)*BCData(boco)%norm(i, j, 1) &
+                      + BCData(boco)%vely(i, j)*BCData(boco)%norm(i, j, 2) &
+                      + BCData(boco)%velz(i, j)*BCData(boco)%norm(i, j, 3)
 
-          if(var > zero) nn = nn + 1
+                if (var > zero) nn = nn + 1
 
-       enddo
-    enddo
+            end do
+        end do
 
-    if(nn > 0) then
-       write(errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename), &
-         ", supersonic inlet boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
-         ": Velocity points out of the domain for some faces."
+        if (nn > 0) then
+            write (errorMessage, strings) "Zone ", trim(cgnsDoms(nbkGlobal)%zonename), &
+                ", supersonic inlet boundary subface ", trim(cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%bocoName), &
+                ": Velocity points out of the domain for some faces."
 
-       call returnFail("BCDataSupersonicInflow", errorMessage)
-    endif
+            call returnFail("BCDataSupersonicInflow", errorMessage)
+        end if
 #endif
 
-  contains
-
-    subroutine prescribedSupersonicInlet
-      !
-      !         prescribedSupersonicInlet sets the variables for this
-      !         supersonic inlet to prescribed values.
-      !
-      use section, only: sections
-      implicit none
-      !
-      !        Local variables.
-      !
-      integer(kind=intType) :: i, j
-
-      real(kind=realType) :: mult, trans
-      real(kind=realType) :: ax, r1, r2, var, vax, vrad, vtheta
-
-      real(kind=realType), dimension(3) :: xc, vloc
-      real(kind=realType), dimension(3) :: multVel, transVel
-
-      ! Set the density. Take the conversion factor to SI-units
-      ! into account.
-
-      call siDensity(mass(1), length(1), mult, trans)
-
-      do j=jBeg,jEnd
-         do i=iBeg,iEnd
-            BCData(boco)%rho(i,j) = (mult*bcVarArray(i,j,1) + trans)/rhoRef
-         enddo
-      enddo
-
-      ! Set the pressure. Take the conversion factor to SI-units
-      ! into account.
-
-      call siPressure(mass(1), length(2), time(2), mult, trans)
-
-      do j=jBeg,jEnd
-         do i=iBeg,iEnd
-            BCData(boco)%ps(i,j) = (mult*bcVarArray(i,j,2) + trans)/pRef
-         enddo
-      enddo
-
-      ! Check the situation we are having here for the velocity.
-
-      testRadial: if( velrPresent ) then
-
-         ! Radial velocity component prescribed. This must be converted
-         ! to cartesian components.
-
-         ! Determine the unit vectors, which define the cylindrical
-         ! coordinate system aligned with the rotation axis.
-
-         call unitVectorsCylSystem(boco)
-
-         ! Determine the conversion factor to SI-units for the three
-         ! components. Note that a test must be made whether the theta
-         ! component is present.
-
-         call siVelocity(length(3), time(3), multVel(1), transVel(1))
-         call siVelocity(length(6), time(6), multVel(2), transVel(2))
-
-         if( veltPresent ) &
-              call siVelocity(length(7), time(7), multVel(3), transVel(3))
-
-         ! Initialize vtheta to zero. This value will be used
-         ! if no theta velocity component was specified.
-
-         vtheta = zero
-
-         ! Loop over the faces of the subface.
-
-         do j=jBeg,jEnd
-            do i=iBeg,iEnd
-
-               ! Determine the coordinates of the face center relative to
-               ! the rotation point of this section. Normally this is an
-               ! average of i-1, i, j-1, j, but due to the usage of the
-               ! pointer xf and the fact that x originally starts at 0,
-               ! an offset of 1 is introduced and thus the average should
-               ! be taken of i, i+1, j and j+1.
-
-               xc(1) = fourth*(xf(i,j,  1) + xf(i+1,j,  1)  &
-                    +         xf(i,j+1,1) + xf(i+1,j+1,1)) &
-                    - sections(sectionID)%rotCenter(1)
-               xc(2) = fourth*(xf(i,j,  2) + xf(i+1,j,  2)  &
-                    +         xf(i,j+1,2) + xf(i+1,j+1,2)) &
-                    - sections(sectionID)%rotCenter(2)
-               xc(3) = fourth*(xf(i,j,  3) + xf(i+1,j,  3)  &
-                    +         xf(i,j+1,3) + xf(i+1,j+1,3)) &
-                    - sections(sectionID)%rotCenter(3)
-
-               ! Determine the coordinates in the local cartesian frame,
-               ! i.e. the frame determined by axis, radVec1 and radVec2.
-
-               ax = xc(1)*axis(1)    + xc(2)*axis(2)    &
-                    + xc(3)*axis(3)
-               r1 = xc(1)*radVec1(1) + xc(2)*radVec1(2) &
-                    + xc(3)*radVec1(3)
-               r2 = xc(1)*radVec2(1) + xc(2)*radVec2(2) &
-                    + xc(3)*radVec2(3)
-
-               ! Determine the velocity components in the local
-               ! cylindrical system. Take the conversion to si units
-               ! into account.
-
-               vax  = multVel(1)*bcVarArray(i,j,3) + transVel(1)
-               vrad = multVel(2)*bcVarArray(i,j,6) + transVel(2)
-               if( veltPresent ) &
-                    vtheta = multVel(3)*bcVarArray(i,j,7) + transVel(3)
-
-               ! Determine the velocities in the local cartesian
-               ! frame determined by axis, radVec1 and radVec2.
+    contains
 
-               var     = one/sqrt(max(eps,(r1*r1 + r2*r2)))
-               vloc(1) = vax
-               vloc(2) = var*(vrad*r1 - vtheta*r2)
-               vloc(3) = var*(vrad*r2 + vtheta*r1)
+        subroutine prescribedSupersonicInlet
+            !
+            !         prescribedSupersonicInlet sets the variables for this
+            !         supersonic inlet to prescribed values.
+            !
+            use section, only: sections
+            implicit none
+            !
+            !        Local variables.
+            !
+            integer(kind=intType) :: i, j
 
-               ! Transform vloc to the global cartesian frame and
-               ! store the values.
+            real(kind=realType) :: mult, trans
+            real(kind=realType) :: ax, r1, r2, var, vax, vrad, vtheta
 
-               BCData(boco)%velx(i,j) = (vloc(1)*axis(1)    &
-                    + vloc(2)*radVec1(1) &
-                    + vloc(3)*radVec2(1))/uRef
+            real(kind=realType), dimension(3) :: xc, vloc
+            real(kind=realType), dimension(3) :: multVel, transVel
 
-               BCData(boco)%vely(i,j) = (vloc(1)*axis(2)    &
-                    + vloc(2)*radVec1(2) &
-                    + vloc(3)*radVec2(2))/uRef
+            ! Set the density. Take the conversion factor to SI-units
+            ! into account.
 
-               BCData(boco)%velz(i,j) = (vloc(1)*axis(3)    &
-                    + vloc(2)*radVec1(3) &
-                    + vloc(3)*radVec2(3))/uRef
-            enddo
-         enddo
+            call siDensity(mass(1), length(1), mult, trans)
 
-      else testRadial
+            do j = jBeg, jEnd
+                do i = iBeg, iEnd
+                    BCData(boco)%rho(i, j) = (mult*bcVarArray(i, j, 1) + trans)/rhoRef
+                end do
+            end do
 
-         ! Cartesian components prescribed.
+            ! Set the pressure. Take the conversion factor to SI-units
+            ! into account.
 
-         ! Determine the conversion factor to SI-units for the three
-         ! components.
+            call siPressure(mass(1), length(2), time(2), mult, trans)
 
-         call siVelocity(length(3), time(3), multVel(1), transVel(1))
-         call siVelocity(length(4), time(4), multVel(2), transVel(2))
-         call siVelocity(length(5), time(5), multVel(3), transVel(3))
+            do j = jBeg, jEnd
+                do i = iBeg, iEnd
+                    BCData(boco)%ps(i, j) = (mult*bcVarArray(i, j, 2) + trans)/pRef
+                end do
+            end do
 
-         ! Set the velocities.
+            ! Check the situation we are having here for the velocity.
 
-         do j=jBeg,jEnd
-            do i=iBeg,iEnd
-               BCData(boco)%velx(i,j) = (multVel(1)*bcVarArray(i,j,3) &
-                    + transVel(1))/uRef
-               BCData(boco)%vely(i,j) = (multVel(2)*bcVarArray(i,j,4) &
-                    + transVel(2))/uRef
-               BCData(boco)%velz(i,j) = (multVel(3)*bcVarArray(i,j,5) &
-                    + transVel(3))/uRef
-            enddo
-         enddo
+            testRadial: if (velrPresent) then
 
-      endif testRadial
+                ! Radial velocity component prescribed. This must be converted
+                ! to cartesian components.
 
-      ! Set the turbulence variables and check if all of them are
-      ! prescribed. If not set allTurbPresent to .false.
+                ! Determine the unit vectors, which define the cylindrical
+                ! coordinate system aligned with the rotation axis.
 
-      allTurbPresent = setBCVarTurb(7_intType, boco, bcVarArray, &
-           iBeg, iEnd, jBeg, jEnd, BCData(boco)%turbInlet)
+                call unitVectorsCylSystem(boco)
 
-    end subroutine prescribedSupersonicInlet
-
-  end subroutine BCDataSupersonicInflow
-
-  !=================================================================
-
-  logical function setBCVarTurb(offset, boco, bcVarArray, &
-       iBeg, iEnd, jBeg, jEnd, turbInlet)
-    !
-    !       SetBCVarTurb sets the array for the turbulent halo data
-    !       for inlet boundaries. This function returns .true. If all
-    !       turbulence variables could be interpolated and .false.
-    !       otherwise.
-    !
-    use constants
-    use flowVarRefState, only : nt1, nt2, muRef, Pref, rhoRef, wInf
-    use inputPhysics, only : equations, turbModel
-    use utils, only : terminate, siTurb
-
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: offset, boco, iBeg, iEnd, jBeg, jEnd
-    real(kind=realType), dimension(iBeg:iEnd, jBeg:jEnd, nbcVarMax) :: bcVarArray
-    real(kind=realType), dimension(:,:,:), pointer :: turbInlet
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, i, j
-    real(kind=realType)   :: mult, trans, nuRef
-    real(kind=realType), dimension(nt1:nt2) :: ref
-
-    ! Initialize setBCVarTurb to .true. And return immediately
-    ! if not the rans equations are solved.
-
-    setBCVarTurb = .true.
-    if(equations /= RANSEquations) return
-
-    ! Set the reference values depending on the turbulence model.
-
-    nuRef = muRef/rhoRef
-    select case (turbModel)
-
-    case (spalartAllmaras, spalartAllmarasEdwards)
-       ref(itu1) = nuRef
-
-    case (komegaWilcox, komegaModified, menterSST)
-       ref(itu1) = pRef/rhoRef
-       ref(itu2) = ref(itu1)/nuRef
-
-    case (ktau)
-       ref(itu1) = pRef/rhoRef
-       ref(itu2) = nuRef/ref(itu1)
+                ! Determine the conversion factor to SI-units for the three
+                ! components. Note that a test must be made whether the theta
+                ! component is present.
 
-    case (v2f)
-       ref(itu1) = pRef/rhoRef
-       ref(itu4) = ref(itu1)/nuRef
-       ref(itu2) = ref(itu1)*ref(itu4)
-       ref(itu3) = ref(itu1)
+                call siVelocity(length(3), time(3), multVel(1), transVel(1))
+                call siVelocity(length(6), time(6), multVel(2), transVel(2))
 
-    end select
+                if (veltPresent) &
+                    call siVelocity(length(7), time(7), multVel(3), transVel(3))
 
-    ! Loop over the number of turbulent variables. mm is the counter
-    ! in the arrays bcVarArray and bcVarPresent.
+                ! Initialize vtheta to zero. This value will be used
+                ! if no theta velocity component was specified.
 
-    mm = offset
-    turbLoop: do nn=nt1,nt2
-       mm = mm + 1
+                vtheta = zero
 
-       ! Check if the variable is present. If so, use the
-       ! interpolated data.
+                ! Loop over the faces of the subface.
 
-       if( bcVarPresent(mm) ) then
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
 
-          ! Conversion to SI units if possible.
+                        ! Determine the coordinates of the face center relative to
+                        ! the rotation point of this section. Normally this is an
+                        ! average of i-1, i, j-1, j, but due to the usage of the
+                        ! pointer xf and the fact that x originally starts at 0,
+                        ! an offset of 1 is introduced and thus the average should
+                        ! be taken of i, i+1, j and j+1.
 
-          call siTurb(mass(mm), length(mm), time(mm), temp(mm), &
-               bcVarNames(mm), mult, trans)
+                        xc(1) = fourth*(xf(i, j, 1) + xf(i + 1, j, 1) &
+                                        + xf(i, j + 1, 1) + xf(i + 1, j + 1, 1)) &
+                                - sections(sectionID)%rotCenter(1)
+                        xc(2) = fourth*(xf(i, j, 2) + xf(i + 1, j, 2) &
+                                        + xf(i, j + 1, 2) + xf(i + 1, j + 1, 2)) &
+                                - sections(sectionID)%rotCenter(2)
+                        xc(3) = fourth*(xf(i, j, 3) + xf(i + 1, j, 3) &
+                                        + xf(i, j + 1, 3) + xf(i + 1, j + 1, 3)) &
+                                - sections(sectionID)%rotCenter(3)
 
-          ! Set the turbulent variables.
+                        ! Determine the coordinates in the local cartesian frame,
+                        ! i.e. the frame determined by axis, radVec1 and radVec2.
 
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                turbInlet(i,j,nn) = (mult*bcVarArray(i,j,mm) + trans)/ref(nn)
-             enddo
-          enddo
+                        ax = xc(1)*axis(1) + xc(2)*axis(2) &
+                             + xc(3)*axis(3)
+                        r1 = xc(1)*radVec1(1) + xc(2)*radVec1(2) &
+                             + xc(3)*radVec1(3)
+                        r2 = xc(1)*radVec2(1) + xc(2)*radVec2(2) &
+                             + xc(3)*radVec2(3)
 
-       else
+                        ! Determine the velocity components in the local
+                        ! cylindrical system. Take the conversion to si units
+                        ! into account.
 
-          ! Turbulent variable not present. Use the free stream data.
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                turbInlet(i,j,nn) = wInf(nn)
-             enddo
-          enddo
+                        vax = multVel(1)*bcVarArray(i, j, 3) + transVel(1)
+                        vrad = multVel(2)*bcVarArray(i, j, 6) + transVel(2)
+                        if (veltPresent) &
+                            vtheta = multVel(3)*bcVarArray(i, j, 7) + transVel(3)
 
-          ! Set the logical value to false to indicate that indeed not
-          ! all the values were present
-          setBCVarTurb = .false.
-
-       endif
-    enddo turbLoop
-  end function setBCVarTurb
-
-
-#ifndef USE_TAPENADE
-
-  subroutine setBCData(bcDataNamesIn, bcDataIn, famLists, sps, &
-       nVar, nFamMax)
-    !--------------------------------------------------------------
-    ! Manual Differentiation Warning: Modifying this routine requires
-    ! modifying the hand-written forward and reverse routines.
-    ! --------------------------------------------------------------
-    use constants
-    use cgnsNames
-    use blockPointers, only : BCData, nDom, nBocos, nBKGlobal, &
-         cgnsSubFace, BCType
-    use sorting, only : famInList
-    use utils, only : setPointers,terminate, char2str
-    use communication, only : myid
-    use actuatorRegionData, only : actuatorRegions, nActuatorRegions
-    !
-    !      Subroutine arguments.
-    !
-    character, dimension(nVar, maxCGNSNameLen), intent(in) :: bcdatanamesin
-    real(kind=realType), dimension(nVar), intent(in) :: bcDataIn
-    integer(kind=intType), dimension(nVar, nFamMax) :: famLists
-    integer(kind=intType), intent(in) ::  sps , nVar, nFamMax
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, iVar, nFam, iRegion
-    character(maxCGNSNameLen) :: varName
-    domainsLoop: do i=1, nDom
-
-       ! Set the pointers to this block on groundLevel to make
-       ! the code readable.
-
-       call setPointers(i, 1_intType, sps)
-
-       varLoop: do iVar=1, nVar
-
-          ! Loop over the number of boundary condition subfaces.
-
-          bocoLoop: do j=1, nBocos
-
-             ! Store the cgns boundary subface number, the number of
-             ! boundary condition data sets and the data sets a bit easier.
-
-             cgnsBoco = cgnsSubface(j)
-             nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
-             dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
-
-             ! Check if this surface should be included or not:
-             nFam = famLists(iVar, 1)
-             famInclude: if (famInList(BCdata(j)%famID, famLists(iVar, 2:2+nFam-1))) then
-
-                select case (BCType(j))
-
-
-                case (NSWallAdiabatic)
-                   call setBCVarNamesAdiabaticWall
-                   call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
-                case (NSWallIsothermal)
-                   call setBCVarNamesSupersonicInflow  ! possible bug?
-                   call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
-                case (SupersonicInflow)
-                   call setBCVarNamesSupersonicInflow
-                   call errorCheckbcDataNamesIn("SupersonicInflow", bcDataNamesIn)
-                case (SubsonicInflow)
-                   call setBCVarNamesSubsonicInflow
-                   call errorCheckbcDataNamesIn("SubsonicInflow", bcDataNamesIn)
-                case (SubsonicOutflow)
-                   call setBCVarNamesSubsonicOutflow
-                   call errorCheckbcDataNamesIn("SubsonicOutflow", bcDataNamesIn)
-                case default
-                   call terminate('setBCData', &
-                        'This is not a valid boundary condtion for setBCData')
-                end select
-                call insertToDataSet(bcDataNamesIn, bcDataIn)
-
-             end if famInclude
-          end do bocoLoop
-       end do varLoop
-    end do domainsLoop
-
-    ! Loop over any actuator regions since they also could have to set BCData
-    regionLoop: do iRegion=1, nActuatorRegions
-       varLoop2: do iVar=1, nVar
-          nFam = famLists(iVar, 1)
-          famInclude2: if (famInList(actuatorRegions(iRegion)%famID, famLists(iVar, 2:2+nFam-1))) then
-
-             ! Extract the name
-             varName = char2str(bcDataNamesIn(iVar,:), maxCGNSNameLen)
-
-             if (trim(varName) == "Thrust") then
-                actuatorRegions(iRegion)%force = actuatorRegions(iRegion)%axisVec* &
-                     bcDataIn(iVar)
-             else if (trim(varName) == "Torque") then
-                actuatorRegions(iRegion)%torque = bcDataIn(iVar)
-             else if (trim(varName) == "Heat") then
-                actuatorRegions(iRegion)%heat = bcDataIn(iVar)
-             end if
-          end if famInclude2
-       end do varLoop2
-    end do regionLoop
-
-  end subroutine setBCData
-
-  subroutine setBCData_d(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
-       nVar, nFamMax)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-    use constants
-    use cgnsNames
-    use blockPointers, only : BCData, nDom, nBocos, nBKGlobal, &
-         cgnsSubFace, BCType
-    use sorting, only : famInList
-    use utils, only : setPointers_d, terminate, char2str
-    use actuatorRegionData, only : actuatorRegionsd, actuatorRegions, nActuatorRegions
-    !
-    !      Subroutine arguments.
-    !
-    character, dimension(nVar, maxCGNSNameLen), intent(in) :: bcdatanamesin
-    real(kind=realType), dimension(nVar), intent(in) :: bcDataIn, bcDataInd
-    integer(kind=intType), dimension(nVar, nFamMax) :: famLists
-    integer(kind=intType), intent(in) ::  sps , nVar, nFamMax
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, iVar, nFam
-    character(maxCGNSNameLen) :: varName
-
-    domainsLoop: do i=1, nDom
-
-       ! Set the pointers to this block on groundLevel to make
-       ! the code readable.
-
-       call setPointers_d(i, 1_intType, sps)
-
-       varLoop: do iVar=1, nVar
-
-          ! Loop over the number of boundary condition subfaces.
-
-          bocoLoop: do j=1, nBocos
-
-             ! Store the cgns boundary subface number, the number of
-             ! boundary condition data sets and the data sets a bit easier.
-
-             cgnsBoco = cgnsSubface(j)
-             nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
-             dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
-             dataSetd => cgnsDomsd(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
-
-             ! Check if this surface should be included or not:
-             nFam = famLists(iVar, 1)
-             famInclude: if (famInList(BCdata(j)%famID, famLists(iVar, 2:2+nFam-1))) then
-
-                select case (BCType(j))
-
-                case (NSWallAdiabatic)
-                   call setBCVarNamesAdiabaticWall
-                   call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
-                case (NSWallIsothermal)
-                   call setBCVarNamesSupersonicInflow
-                   call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
-                case (SupersonicInflow)
-                   call setBCVarNamesSupersonicInflow
-                   call errorCheckbcDataNamesIn("SupersonicInflow", bcDataNamesIn)
-                case (SubsonicInflow)
-                   call setBCVarNamesSubsonicInflow
-                   call errorCheckbcDataNamesIn("SubsonicInflow", bcDataNamesIn)
-                case (SubsonicOutflow)
-                   call setBCVarNamesSubsonicOutflow
-                   call errorCheckbcDataNamesIn("SubsonicOutflow", bcDataNamesIn)
-                case default
-                   call terminate('setBCData', &
-                        'This is not a valid boundary condtion for setBCData')
-                end select
-                call insertToDataSet_d(bcDataNamesIn, bcDataIn, bcDataInd)
-
-             end if famInclude
-          end do bocoLoop
-       end do varLoop
-    end do domainsLoop
-
-    ! Loop over any actuator regions since they also could have to set BCData
-    regionLoop: do iRegion=1, nActuatorRegions
-       varLoop2: do iVar=1, nVar
-          nFam = famLists(iVar, 1)
-          famInclude2: if (famInList(actuatorRegions(iRegion)%famID, famLists(iVar, 2:2+nFam-1))) then
-
-             ! Extract the name
-             varName = char2str(bcDataNamesIn(iVar,:), maxCGNSNameLen)
-
-             if (trim(varName) == "Thrust") then
-                actuatorRegions(iRegion)%force = actuatorRegions(iRegion)%axisVec* &
-                     bcDataIn(iVar)
-                actuatorRegionsd(iRegion)%force = actuatorRegions(iRegion)%axisVec* &
-                     bcDataInd(iVar)
-             else if (trim(varName) == "Torque") then
-                actuatorRegions(iRegion)%torque = bcDataIn(iVar)
-                actuatorRegionsd(iRegion)%torque = bcDataInd(iVar)
-             else if (trim(varName) == "Heat") then
-                actuatorRegions(iRegion)%heat = bcDataIn(iVar)
-                actuatorRegionsd(iRegion)%heat = bcDataInd(iVar)
-             end if
-          end if famInclude2
-       end do varLoop2
-    end do regionLoop
-
-  end subroutine setBCData_d
-
-  subroutine setBCData_b(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
-       nVar, nFamMax)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-    use constants
-    use cgnsNames
-    use blockPointers, only : BCData, nDom, nBocos, nBKGlobal, &
-         cgnsSubFace, BCType
-    use sorting, only : famInList
-    use utils, only : setPointers_b, terminate, char2str
-    use actuatorRegionData, only : actuatorRegionsd, actuatorRegions, nActuatorRegions
-
-    !
-    !      Subroutine arguments.
-    !
-    character, dimension(nVar, maxCGNSNameLen), intent(in) :: bcdatanamesin
-    real(kind=realType), dimension(nVar), intent(in) :: bcDataIn
-    real(kind=realType), dimension(nVar), intent(out) :: bcDataInd
-    integer(kind=intType), dimension(nVar, nFamMax) :: famLists
-    integer(kind=intType), intent(in) ::  sps , nVar, nFamMax
-
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, iVar, nFam
-    character(maxCGNSNameLen) :: varName
-
-    domainsLoop: do i=1, nDom
-
-       ! Set the pointers to this block on groundLevel to make
-       ! the code readable.
-
-       call setPointers_b(i, 1_intType, sps)
-
-       varLoop: do iVar=1, nVar
-
-          ! Loop over the number of boundary condition subfaces.
-
-          bocoLoop: do j=1, nBocos
-
-             ! Store the cgns boundary subface number, the number of
-             ! boundary condition data sets and the data sets a bit easier.
-
-             cgnsBoco = cgnsSubface(j)
-             nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
-             dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
-             dataSetd => cgnsDomsd(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
-
-             ! Check if this surface should be included or not:
-             nFam = famLists(iVar, 1)
-             famInclude: if (famInList(BCdata(j)%famID, famLists(iVar, 2:2+nFam-1))) then
-
-                select case (BCType(j))
-
-                case (NSWallAdiabatic)
-                   call setBCVarNamesAdiabaticWall
-                   call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
-                case (NSWallIsothermal)
-                   call setBCVarNamesSupersonicInflow  ! possible bug?
-                   call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
-                case (SupersonicInflow)
-                   call setBCVarNamesSupersonicInflow
-                   call errorCheckbcDataNamesIn("SupersonicInflow", bcDataNamesIn)
-                case (SubsonicInflow)
-                   call setBCVarNamesSubsonicInflow
-                   call errorCheckbcDataNamesIn("SubsonicInflow", bcDataNamesIn)
-                case (SubsonicOutflow)
-                   call setBCVarNamesSubsonicOutflow
-                   call errorCheckbcDataNamesIn("SubsonicOutflow", bcDataNamesIn)
-                case default
-                   call terminate('setBCData', &
-                        'This is not a valid boundary condtion for setBCData')
-                end select
-                call insertToDataSet_b(bcDataNamesIn, bcDataIn, bcDataInd)
-
-             end if famInclude
-          end do bocoLoop
-       end do varLoop
-    end do domainsLoop
-
-    ! Loop over any actuator regions since they also could have to set BCData
-    regionLoop: do iRegion=1, nActuatorRegions
-       varLoop2: do iVar=1, nVar
-          nFam = famLists(iVar, 1)
-          famInclude2: if (famInList(actuatorRegions(iRegion)%famID, famLists(iVar, 2:2+nFam-1))) then
-
-             ! Extract the name
-             varName = char2str(bcDataNamesIn(iVar,:), maxCGNSNameLen)
-
-             if (trim(varName) == "Thrust") then
-                bcDataInd(ivar) = &
-                     sum(actuatorRegions(iRegion)%axisVec*actuatorRegionsd(iRegion)%force)
-             else if (trim(varName) == "Torque") then
-                bcDataInd(ivar) = actuatorRegionsd(iRegion)%torque
-             else if (trim(varName) == "Heat") then
-               bcDataInd(ivar) = actuatorRegionsd(iRegion)%heat
-             end if
-          end if famInclude2
-       end do varLoop2
-    end do regionLoop
-
-  end subroutine setBCData_b
-
-  subroutine extractFromDataSet(bcVarArray)
-    !--------------------------------------------------------------
-    ! Manual Differentiation Warning: Modifying this routine requires
-    ! modifying the hand-written forward and reverse routines.
-    ! --------------------------------------------------------------
-    !
-    !       extractFromDataSet tries to extract and interpolate the
-    !       variables in bcVarNames from the cgns data set.
-    !       If successful the corresponding entry of bcVarPresent is
-    !       set to .true., otherwise it is set to .false.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, onlY : nbkGlobal
-    use utils, only : terminate
-    implicit none
+                        ! Determine the velocities in the local cartesian
+                        ! frame determined by axis, radVec1 and radVec2.
 
-    ! Input
-    real(kind=realType), dimension(:, :, :) :: bcVarArray
+                        var = one/sqrt(max(eps, (r1*r1 + r2*r2)))
+                        vloc(1) = vax
+                        vloc(2) = var*(vrad*r1 - vtheta*r2)
+                        vloc(3) = var*(vrad*r2 + vtheta*r1)
 
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: k, l, m, n
-    integer(kind=intType) :: nInter, nDim, nVarPresent, nCoor
+                        ! Transform vloc to the global cartesian frame and
+                        ! store the values.
 
-    integer(kind=intType), dimension(3) :: dataDim, coor
-    integer(kind=intType), dimension(2,3) :: indCoor
-    integer(kind=intType), dimension(2,nbcVar) :: ind
+                        BCData(boco)%velx(i, j) = (vloc(1)*axis(1) &
+                                                   + vloc(2)*radVec1(1) &
+                                                   + vloc(3)*radVec2(1))/uRef
 
-    character(len=maxStringLen) :: errorMessage
+                        BCData(boco)%vely(i, j) = (vloc(1)*axis(2) &
+                                                   + vloc(2)*radVec1(2) &
+                                                   + vloc(3)*radVec2(2))/uRef
 
-    logical :: xPresent, yPresent, zPresent, rPresent
-    logical :: firstVar
+                        BCData(boco)%velz(i, j) = (vloc(1)*axis(3) &
+                                                   + vloc(2)*radVec1(3) &
+                                                   + vloc(3)*radVec2(3))/uRef
+                    end do
+                end do
 
-    ! Determine whether the variables are specified and if so,
-    ! where they are located in the data set. As the number of
-    ! variables specified is usually not so big, a linear search
-    ! algorithm is perfectly okay. At the moment only the Dirichlet
-    ! arrays are checked.
+                else testRadial
 
-    nVarPresent = 0
-
-    do m=1,nbcVar
-       bcVarPresent(m) = .false.
-
-       dataSetLoop: do k=1,nDataSet
-          do l=1,dataSet(k)%nDirichletArrays
-             if(dataSet(k)%dirichletArrays(l)%arrayName == &
-                  bcVarNames(m)) then
-
-                ! Variable is present. Store the indices, update
-                ! nVarPresent and set bcVarPresent(m) to .True.
-
-                ind(1,m) = k; ind(2,m) = l
-
-                nVarPresent      = nVarPresent + 1
-                bcVarPresent(m) = .true.
-
-                ! Set the units for this variable.
-
-                mass(m)   = dataSet(k)%dirichletArrays(l)%mass
-                length(m) = dataSet(k)%dirichletArrays(l)%len
-                time(m)   = dataSet(k)%dirichletArrays(l)%time
-                temp(m)   = dataSet(k)%dirichletArrays(l)%temp
-                angle(m)  = dataSet(k)%dirichletArrays(l)%angle
-
-                ! Exit the search loop, as the variable was found.
-
-                exit dataSetLoop
-
-             endif
-          enddo
-       enddo dataSetLoop
-    enddo
-
-    ! Find out whether the given data points are equal for every
-    ! variable or that every variable must be interpolated
-    ! differently.
-
-    do m=1,nbcVar
-       if( bcVarPresent(m) ) then
-          k = ind(1,m)
-          l = ind(2,m)
-
-          bcVarArray(:,:,m) = &
-               dataSet(k)%dirichletArrays(l)%dataArr(1)
-       endif
-    enddo
-
-  end subroutine extractFromDataSet
-
-  subroutine extractFromDataSet_d(bcVarArray, bcVarArrayd)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-    !
-    !       extractFromDataSet tries to extract and interpolate the
-    !       variables in bcVarNames from the cgns data set.
-    !       If successful the corresponding entry of bcVarPresent is
-    !       set to .true., otherwise it is set to .false.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, onlY : nbkGlobal
-    use utils, only : terminate
-    implicit none
-
-    ! Input
-    real(kind=realType), dimension(:, :, :) :: bcVarArray, bcVarArrayd
-
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: k, l, m, n
-    integer(kind=intType) :: nInter, nDim, nVarPresent, nCoor
-
-    integer(kind=intType), dimension(3) :: dataDim, coor
-    integer(kind=intType), dimension(2,3) :: indCoor
-    integer(kind=intType), dimension(2,nbcVar) :: ind
-
-    character(len=maxStringLen) :: errorMessage
-
-    logical :: xPresent, yPresent, zPresent, rPresent
-    logical :: firstVar
-
-    ! Determine whether the variables are specified and if so,
-    ! where they are located in the data set. As the number of
-    ! variables specified is usually not so big, a linear search
-    ! algorithm is perfectly okay. At the moment only the Dirichlet
-    ! arrays are checked.
-
-    nVarPresent = 0
-
-    do m=1,nbcVar
-       bcVarPresent(m) = .false.
-
-       dataSetLoop: do k=1,nDataSet
-          do l=1,dataSet(k)%nDirichletArrays
-             if(dataSet(k)%dirichletArrays(l)%arrayName == &
-                  bcVarNames(m)) then
-
-                ! Variable is present. Store the indices, update
-                ! nVarPresent and set bcVarPresent(m) to .True.
-
-                ind(1,m) = k; ind(2,m) = l
-
-                nVarPresent      = nVarPresent + 1
-                bcVarPresent(m) = .true.
-
-                ! Set the units for this variable.
-
-                mass(m)   = dataSet(k)%dirichletArrays(l)%mass
-                length(m) = dataSet(k)%dirichletArrays(l)%len
-                time(m)   = dataSet(k)%dirichletArrays(l)%time
-                temp(m)   = dataSet(k)%dirichletArrays(l)%temp
-                angle(m)  = dataSet(k)%dirichletArrays(l)%angle
-
-                ! Exit the search loop, as the variable was found.
-
-                exit dataSetLoop
-
-             endif
-          enddo
-       enddo dataSetLoop
-    enddo
-
-    ! Find out whether the given data points are equal for every
-    ! variable or that every variable must be interpolated
-    ! differently.
-
-    do m=1,nbcVar
-       if( bcVarPresent(m) ) then
-          k = ind(1,m)
-          l = ind(2,m)
-
-          bcVarArray(:,:,m) = &
-               dataSet(k)%dirichletArrays(l)%dataArr(1)
-          bcVarArrayd(:,:,m) = &
-               dataSetd(k)%dirichletArrays(l)%dataArr(1)
-       endif
-    enddo
-
-  end subroutine extractFromDataSet_d
-
-  subroutine extractFromDataSet_b(bcVarArray, bcVarArrayd)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-    !
-    !       extractFromDataSet tries to extract and interpolate the
-    !       variables in bcVarNames from the cgns data set.
-    !       If successful the corresponding entry of bcVarPresent is
-    !       set to .true., otherwise it is set to .false.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, onlY : nbkGlobal
-    use utils, only : terminate
-    implicit none
+                ! Cartesian components prescribed.
 
-    ! Input
-    real(kind=realType), dimension(:, :, :) :: bcVarArray, bcVarArrayd
+                ! Determine the conversion factor to SI-units for the three
+                ! components.
 
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: k, l, m, n
-    integer(kind=intType) :: nInter, nDim, nVarPresent, nCoor
+                call siVelocity(length(3), time(3), multVel(1), transVel(1))
+                call siVelocity(length(4), time(4), multVel(2), transVel(2))
+                call siVelocity(length(5), time(5), multVel(3), transVel(3))
 
-    integer(kind=intType), dimension(3) :: dataDim, coor
-    integer(kind=intType), dimension(2,3) :: indCoor
-    integer(kind=intType), dimension(2,nbcVar) :: ind
+                ! Set the velocities.
 
-    character(len=maxStringLen) :: errorMessage
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        BCData(boco)%velx(i, j) = (multVel(1)*bcVarArray(i, j, 3) &
+                                                   + transVel(1))/uRef
+                        BCData(boco)%vely(i, j) = (multVel(2)*bcVarArray(i, j, 4) &
+                                                   + transVel(2))/uRef
+                        BCData(boco)%velz(i, j) = (multVel(3)*bcVarArray(i, j, 5) &
+                                                   + transVel(3))/uRef
+                    end do
+                end do
 
-    logical :: xPresent, yPresent, zPresent, rPresent
-    logical :: firstVar
+            end if testRadial
 
-    ! Determine whether the variables are specified and if so,
-    ! where they are located in the data set. As the number of
-    ! variables specified is usually not so big, a linear search
-    ! algorithm is perfectly okay. At the moment only the Dirichlet
-    ! arrays are checked.
+            ! Set the turbulence variables and check if all of them are
+            ! prescribed. If not set allTurbPresent to .false.
 
-    nVarPresent = 0
+            allTurbPresent = setBCVarTurb(7_intType, boco, bcVarArray, &
+                                          iBeg, iEnd, jBeg, jEnd, BCData(boco)%turbInlet)
 
-    do m=1,nbcVar
-       bcVarPresent(m) = .false.
+        end subroutine prescribedSupersonicInlet
 
-       dataSetLoop: do k=1,nDataSet
-          do l=1,dataSet(k)%nDirichletArrays
-             if(dataSet(k)%dirichletArrays(l)%arrayName == &
-                  bcVarNames(m)) then
+    end subroutine BCDataSupersonicInflow
 
-                ! Variable is present. Store the indices, update
-                ! nVarPresent and set bcVarPresent(m) to .True.
-
-                ind(1,m) = k; ind(2,m) = l
-
-                nVarPresent      = nVarPresent + 1
-                bcVarPresent(m) = .true.
-
-                ! Set the units for this variable.
-
-                mass(m)   = dataSet(k)%dirichletArrays(l)%mass
-                length(m) = dataSet(k)%dirichletArrays(l)%len
-                time(m)   = dataSet(k)%dirichletArrays(l)%time
-                temp(m)   = dataSet(k)%dirichletArrays(l)%temp
-                angle(m)  = dataSet(k)%dirichletArrays(l)%angle
-
-                ! Exit the search loop, as the variable was found.
-
-                exit dataSetLoop
-
-             endif
-          enddo
-       enddo dataSetLoop
-    enddo
-
-    ! Find out whether the given data points are equal for every
-    ! variable or that every variable must be interpolated
-    ! differently.
-
-    do m=1,nbcVar
-       if( bcVarPresent(m) ) then
-          k = ind(1,m)
-          l = ind(2,m)
-          ! Accumulate. No need to zero.
-          dataSetd(k)%dirichletArrays(l)%dataArr(1) = &
-               dataSetd(k)%dirichletArrays(l)%dataArr(1) + sum(bcVarArrayd(:,:,m))
-          bcvararrayd(:, :, m) = 0.0_8
-       endif
-    enddo
-
-  end subroutine extractFromDataSet_b
-
-  subroutine insertToDataSet(bcDataNamesIn, bcDataIn)
-    !--------------------------------------------------------------
-    ! Manual Differentiation Warning: Modifying this routine requires
-    ! modifying the hand-written forward and reverse routines.
-    ! --------------------------------------------------------------
-    use constants
-    use utils, only: char2str
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    character, dimension(:,:), intent(in) :: bcdatanamesin
-    real(kind=realType), dimension(:), intent(in):: bcDataIn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: k, l, m, n, q
-    integer(kind=intType) :: ind(2,nbcVar), nVarPresent
-    character(len=maxCGNSNameLen) :: varName
-
-    nVarPresent = 0
-
-    do m=1, size(bcDataIn)
-       bcVarPresent(m) = .false.
-
-       dataSetLoop: do k=1,nDataSet
-          do l=1,dataSet(k)%nDirichletArrays
-             if(dataSet(k)%dirichletArrays(l)%arrayName == &
-                  bcVarNames(m)) then
-
-                ! Variable is present. Store the indices, update
-                ! nVarPresent and set bcVarPresent(m) to .True.
-
-                ind(1,m) = k; ind(2,m) = l
-
-                nVarPresent      = nVarPresent + 1
-                bcVarPresent(m) = .true.
-
-                ! Exit the search loop, as the variable was found.
-
-                exit dataSetLoop
-
-             endif
-          enddo
-       enddo dataSetLoop
-    enddo
-
-    do m=1, size(bcDataIn)
-       if( bcVarPresent(m) ) then
-          k = ind(1,m)
-          l = ind(2,m)
-          do n=1, size(bcDataIn)
-
-             varName = char2str(bcDataNamesIn(n,:), maxCGNSNameLen)
-
-             if (bcVarNames(m) == varname) then
-                dataSet(k)%dirichletArrays(l)%dataArr(1) = bcDataIn(n)
-             end if
-          end do
-       endif
-    enddo
-  end subroutine insertToDataSet
-
-  subroutine insertToDataSet_d(bcDataNamesIn, bcDataIn, bcDataInd)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-    use constants
-    use utils, only: char2str
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    character, dimension(:,:), intent(in) :: bcdatanamesin
-    real(kind=realType), dimension(:), intent(in):: bcDataIn, bcDataInd
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: k, l, m, n, q
-    integer(kind=intType) :: ind(2,nbcVar), nVarPresent
-    character(len=maxCGNSNameLen) :: varName
-
-    nVarPresent = 0
-
-    do m=1, size(bcDataIn)
-       bcVarPresent(m) = .false.
-
-       dataSetLoop: do k=1,nDataSet
-          do l=1,dataSet(k)%nDirichletArrays
-             if(dataSet(k)%dirichletArrays(l)%arrayName == &
-                  bcVarNames(m)) then
-
-                ! Variable is present. Store the indices, update
-                ! nVarPresent and set bcVarPresent(m) to .True.
-
-                ind(1,m) = k; ind(2,m) = l
-
-                nVarPresent      = nVarPresent + 1
-                bcVarPresent(m) = .true.
-
-                ! Exit the search loop, as the variable was found.
-
-                exit dataSetLoop
-
-             endif
-          enddo
-       enddo dataSetLoop
-    enddo
-
-    do m=1, size(bcDataIn)
-       if( bcVarPresent(m) ) then
-          k = ind(1,m)
-          l = ind(2,m)
-          do n=1, size(bcDataIn)
-
-             varName = char2str(bcDataNamesIn(n,:), maxCGNSNameLen)
-
-             if (bcVarNames(m) == varname) then
-                dataSet(k)%dirichletArrays(l)%dataArr(1) = bcDataIn(n)
-                dataSetd(k)%dirichletArrays(l)%dataArr(1) = bcDataInd(n)
-             end if
-          end do
-       endif
-    enddo
-  end subroutine insertToDataSet_d
-
-  subroutine insertToDataSet_b(bcDataNamesIn, bcDataIn, bcDataInd)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-    use constants
-    use utils, only: char2str
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    character, dimension(:,:), intent(in) :: bcdatanamesin
-    real(kind=realType), dimension(:), intent(in):: bcDataIn
-    real(kind=realType), dimension(:), intent(out) :: bcDataInd
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: k, l, m, n, q
-    integer(kind=intType) :: ind(2,nbcVar), nVarPresent
-    character(len=maxCGNSNameLen) :: varName
-
-    nVarPresent = 0
-
-    do m=1, size(bcDataIn)
-       bcVarPresent(m) = .false.
-
-       dataSetLoop: do k=1,nDataSet
-          do l=1,dataSet(k)%nDirichletArrays
-             if(dataSet(k)%dirichletArrays(l)%arrayName == &
-                  bcVarNames(m)) then
-
-                ! Variable is present. Store the indices, update
-                ! nVarPresent and set bcVarPresent(m) to .True.
-
-                ind(1,m) = k; ind(2,m) = l
-
-                nVarPresent      = nVarPresent + 1
-                bcVarPresent(m) = .true.
-
-                ! Exit the search loop, as the variable was found.
-
-                exit dataSetLoop
-
-             endif
-          enddo
-       enddo dataSetLoop
-    enddo
-
-    do m=1, size(bcDataIn)
-       if( bcVarPresent(m) ) then
-          k = ind(1,m)
-          l = ind(2,m)
-          do n=1, size(bcDataIn)
-
-             varName = char2str(bcDataNamesIn(n,:), maxCGNSNameLen)
-
-             if (bcVarNames(m) == varname) then
-                bcDataInd(n) = bcdataind(n) + dataSetd(k)%dirichletArrays(l)%dataArr(1)
-                datasetd(k)%dirichletarrays(l)%dataarr(1) = 0.0_8
-             end if
-          end do
-       endif
-    enddo
-  end subroutine insertToDataSet_b
-  !--------------------------------------------
-  ! Initialization routines
-  !--------------------------------------------
-
-  subroutine allocMemBCData
-    !
-    !       allocMemBCData allocates the memory for the prescribed
-    !       boundary data for all multigrid levels and all spectral
-    !       solutions for all blocks.
-    !
-    use constants
-    use blockPointers, only : BCData, flowDoms, nBocos, nDom, BCType
-    use flowVarRefState, only : nt1, nt2
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputPhysics, only : useRoughSA
-    use iteration, only : nALESteps
-    use utils, only : setPointers, terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: mm, nn, sps, level, nLevels
-    integer(kind=intType) :: iBeg, jBeg, iEnd, jEnd
-    integer(kind=intType) :: inodeBeg, jnodeBeg, inodeEnd, jnodeEnd
-
-    ! Determine the number of multigrid levels.
-
-    nLevels = ubound(flowDoms,2)
-
-    ! Loop over the number of multigrid level, spectral solutions
-    ! and local blocks.
-
-    levelLoop: do level=1,nLevels
-       spectralLoop: do sps=1,nTimeIntervalsSpectral
-          domainsLoop: do nn=1,nDom
-
-             ! Have the pointers in blockPointers point to the
-             ! current block to make everything more readable.
-
-             call setPointers(nn, level, sps)
-
-             ! Loop over the number of boundary subfaces for this block.
-
-             bocoLoop: do mm=1,nBocos
-
-                ! Store the cell range of the boundary subface
-                ! a bit easier.
-
-                iBeg = BCData(mm)%icbeg; iEnd = BCData(mm)%icend
-                jBeg = BCData(mm)%jcbeg; jEnd = BCData(mm)%jcend
-
-                inodeBeg = BCData(mm)%inbeg; inodeEnd = BCData(mm)%inend
-                jnodeBeg = BCData(mm)%jnbeg; jnodeEnd = BCData(mm)%jnend
-
-
-                ! Note: iBlank/delta are cell based, but uses the node
-                ! numbers to guarantee a halo exists. These must be
-                ! allocated for all boundary conditions.
-                allocate(BCData(mm)%iBlank(iNodeBeg:iNodeEnd+1, jNodeBeg:jnodeEnd+1), &
-                     BCData(mm)%delta(iNodeBeg:iNodeEnd+1, jNodeBeg:jnodeEnd+1), &
-                     BCData(mm)%deltaNode(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd), &
-                     BCData(mm)%surfIndex(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd))
-
-                ! Set the iBlank to 1 for non-overset cases.
-                BCData(mm)%iBlank = 1
-
-                ! Determine the boundary condition we are having here
-                ! and allocate the memory accordingly.
-
-                select case (BCType(mm))
-
-                case (NSWallAdiabatic)
-                   allocate(BCData(mm)%uSlip(iBeg:iEnd,jBeg:jEnd,3), &
-                        BCData(mm)%uSlipALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd,3), &
-                        BCData(mm)%F(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%T(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%Tp(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%Tv(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%Fp(iNodeBeg+1:iNodeEnd, jNodeBeg+1:jNodeEnd, 3), &
-                        BCData(mm)%Fv(iNodeBeg+1:iNodeEnd, jNodeBeg+1:jNodeEnd, 3), &
-                        BCData(mm)%area(iNodeBeg+1:iNodeEnd, jNodeBeg+1:jNodeEnd), &
-                        BCData(mm)%CpTarget(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd), &
-                        stat=ierr)
-                    if (useRoughSA .and. ierr == 0) then
-                        allocate(BCData(mm)%ksNS_Wall(iBeg:iEnd,jBeg:jEnd), stat=ierr)
-                        if (level > 1) then
-                           ! The extrapolation of the BC for MG does not work
-                           ! properly. Thus it must be initialized with zero
-                           BCData(mm)%ksNS_Wall = zero
-                        end if
-                   end if
-                   if(ierr /= 0)                      &
-                        call terminate("allocMemBCData", &
-                        "Memory allocation failure for &
-                        &an adiabatic wall")
-                   BCData(mm)%CpTarget = zero
-                   !=======================================================
-
-                case (NSWallIsothermal)
-
-                   allocate(BCData(mm)%uSlip(iBeg:iEnd,jBeg:jEnd,3),  &
-                        BCData(mm)%uSlipALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd,3), &
-                        BCData(mm)%TNS_Wall(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%F(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%T(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%Tp(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%Tv(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%cellHeatFlux(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%nodeHeatFlux(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd), &
-                        BCData(mm)%Fp(iNodeBeg+1:iNodeEnd, jNodeBeg+1:jNodeEnd, 3), &
-                        BCData(mm)%Fv(iNodeBeg+1:iNodeEnd, jNodeBeg+1:jNodeEnd, 3), &
-                        BCData(mm)%area(iNodeBeg+1:iNodeEnd, jNodeBeg+1:jNodeEnd), &
-                        BCData(mm)%CpTarget(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd), &
-                        stat=ierr)
-                    if (useRoughSA .and. ierr == 0) then
-                        allocate(BCData(mm)%ksNS_Wall(iBeg:iEnd,jBeg:jEnd), stat=ierr)
-                        if (level > 1) then
-                           ! The extrapolation of the BC for MG does not work
-                           ! properly. Thus it must be initialized with zero
-                           BCData(mm)%ksNS_Wall = zero
-                        end if
-                   end if
-                   if(ierr /= 0)                      &
-                        call terminate("allocMemBCData", &
-                        "Memory allocation failure for &
-                        &an isothermal wall")
-                   BCData(mm)%CpTarget = zero
-
-                   !=======================================================
-
-                case (EulerWall)
-
-                   allocate(BCData(mm)%rface(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%rFaceALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%F(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%T(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%Tp(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%Tv(iNodeBeg:iNodeEnd,jNodeBeg:jNodeEnd,3), &
-                        BCData(mm)%Fp(iNodeBeg+1:iNodeEnd, jNodeBeg+1:jNodeEnd, 3), &
-                        BCData(mm)%Fv(iNodeBeg+1:iNodeEnd, jNodeBeg+1:jNodeEnd, 3), &
-                        BCData(mm)%area(iNodeBeg+1:iNodeEnd, jNodeBeg+1:jNodeEnd), &
-                        BCData(mm)%CpTarget(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd), &
-                        stat=ierr)
-                   if(ierr /= 0)                      &
-                        call terminate("allocMemBCData", &
-                        "Memory allocation failure for &
-                        &an Euler wall")
-                   BCData(mm)%CpTarget = zero
-                   !=======================================================
-
-                case (farField)
-
-                   ! Just allocate the memory for the normal mesh
-                   ! velocity.
-
-                   allocate(BCData(mm)%rface(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%rFaceALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd), &
-                        stat=ierr)
-                   if(ierr /= 0)                      &
-                        call terminate("allocMemBCData", &
-                        "Memory allocation failure for &
-                        &a farfield")
-                   !=======================================================
-
-                case (symm, symmPolar)
-
-                   ! Allocate for symm as well. This is not necessary
-                   ! but we need it for the reverse AD.
-
-                   ! Modified by HDN
-                   allocate(BCData(mm)%rface(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%rFaceALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd), &
-                        stat=ierr)
-                   if(ierr /= 0)                      &
-                        call terminate("allocMemBCData", &
-                        "Memory allocation failure for &
-                        &a symm")
-
-                   !=======================================================
-
-                case (SupersonicInflow, DomainInterfaceAll)
-
-                   ! Supersonic inflow or a domain interface with
-                   ! all the data prescribed. Allocate the memory for
-                   ! the entire state vector to be prescribed.
-
-                   allocate(BCData(mm)%rho(iBeg:iEnd,jBeg:jEnd),   &
-                        BCData(mm)%velx(iBeg:iEnd,jBeg:jEnd),  &
-                        BCData(mm)%vely(iBeg:iEnd,jBeg:jEnd),  &
-                        BCData(mm)%velz(iBeg:iEnd,jBeg:jEnd),  &
-                        BCData(mm)%ps(iBeg:iEnd,jBeg:jEnd), &
-                        stat=ierr)
-                   if(ierr /= 0)                      &
-                        call terminate("allocMemBCData", &
-                        "Memory allocation failure for &
-                        &a supersonic inflow")
-
-                   ! Check if memory for the turbulent variables must
-                   ! be allocated. If so, do so.
-
-                   if(nt2 >= nt1) then
-                      allocate(&
-                           BCData(mm)%turbInlet(iBeg:iEnd,jBeg:jEnd,nt1:nt2), &
-                           stat=ierr)
-                      if(ierr /= 0)                      &
-                           call terminate("allocMemBCData", &
-                           "Memory allocation failure for &
-                           &turbInlet for a supersonic &
-                           &inflow")
-                   endif
-
-                   !=======================================================
-
-                   ! Added by HDN
-                case (SupersonicOutflow)
-                   ! No state is needed for this boco
-
-
-                   !=======================================================
-
-                case (SubsonicInflow)
-
-                   ! Subsonic inflow. Allocate the memory for the
-                   ! variables needed. Note the there are two ways to
-                   ! specify boundary conditions for a subsonic inflow.
-
-                   allocate(BCData(mm)%flowXdirInlet(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%flowYdirInlet(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%flowZdirInlet(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%ptInlet(iBeg:iEnd,jBeg:jEnd),       &
-                        BCData(mm)%ttInlet(iBeg:iEnd,jBeg:jEnd),       &
-                        BCData(mm)%htInlet(iBeg:iEnd,jBeg:jEnd),       &
-                        BCData(mm)%rho(iBeg:iEnd,jBeg:jEnd),           &
-                        BCData(mm)%velx(iBeg:iEnd,jBeg:jEnd),          &
-                        BCData(mm)%vely(iBeg:iEnd,jBeg:jEnd),          &
-                        BCData(mm)%velz(iBeg:iEnd,jBeg:jEnd), &
-                        stat=ierr)
-                   if(ierr /= 0)                      &
-                        call terminate("allocMemBCData", &
-                        "Memory allocation failure for &
-                        &a subsonic inflow")
-
-                   ! Check if memory for the turbulent variables must
-                   ! be allocated. If so, do so.
-
-                   if(nt2 >= nt1) then
-                      allocate(&
-                           BCData(mm)%turbInlet(iBeg:iEnd,jBeg:jEnd,nt1:nt2), &
-                           stat=ierr)
-                      if(ierr /= 0)                      &
-                           call terminate("allocMemBCData", &
-                           "Memory allocation failure for &
-                           &turbInlet for a subsonic inflow")
-                   endif
-
-                   !=======================================================
-
-                case (SubsonicOutflow, MassBleedOutflow, &
-                     DomainInterfaceP)
-
-                   ! Subsonic outflow, outflow mass bleed or domain
-                   ! interface with prescribed pressure. Allocate the
-                   ! memory for the static pressure.
-
-                   allocate(BCData(mm)%ps(iBeg:iEnd,jBeg:jEnd), &
-                        stat=ierr)
-                   if(ierr /= 0)                      &
-                        call terminate("allocMemBCData", &
-                        "Memory allocation failure for &
-                        &a subsonic outflow, outflow mass &
-                        &bleed or domain interface with &
-                        &prescribed pressure.")
-
-                   ! Initialize the pressure to avoid problems for
-                   ! the bleed flows.
-
-                   BCData(mm)%ps = zero
-
-                   !=======================================================
-
-                case (DomainInterfaceRhoUVW)
-
-                   ! Domain interface with prescribed density and
-                   ! velocities, i.e. mass flow is prescribed. Allocate
-                   ! the memory for the variables needed.
-
-                   allocate(BCData(mm)%rho(iBeg:iEnd,jBeg:jEnd),  &
-                        BCData(mm)%velx(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%vely(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%velz(iBeg:iEnd,jBeg:jEnd), &
-                        stat=ierr)
-                   if(ierr /= 0)                      &
-                        call terminate("allocMemBCData", &
-                        "Memory allocation failure for &
-                        &a domain interface with a &
-                        &prescribed mass flow")
-
-                   ! Check if memory for the turbulent variables must
-                   ! be allocated. If so, do so.
-
-                   if(nt2 >= nt1) then
-                      allocate(&
-                           BCData(mm)%turbInlet(iBeg:iEnd,jBeg:jEnd,nt1:nt2), &
-                           stat=ierr)
-                      if(ierr /= 0)                      &
-                           call terminate("allocMemBCData", &
-                           "Memory allocation failure for &
-                           &turbInlet for a domain interface &
-                           &with a prescribed mass flow")
-                   endif
-
-                   !=======================================================
-
-                case (DomainInterfaceTotal)
-
-                   ! Domain interface with prescribed total conditions.
-                   ! Allocate the memory for the variables needed.
-
-                   allocate(BCData(mm)%flowXdirInlet(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%flowYdirInlet(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%flowZdirInlet(iBeg:iEnd,jBeg:jEnd), &
-                        BCData(mm)%ptInlet(iBeg:iEnd,jBeg:jEnd),       &
-                        BCData(mm)%ttInlet(iBeg:iEnd,jBeg:jEnd),       &
-                        BCData(mm)%htInlet(iBeg:iEnd,jBeg:jEnd),       &
-                        stat=ierr)
-                   if(ierr /= 0)                      &
-                        call terminate("allocMemBCData", &
-                        "Memory allocation failure for &
-                        &a domain interface with total &
-                        &conditions")
-
-                   ! Check if memory for the turbulent variables must
-                   ! be allocated. If so, do so.
-
-                   if(nt2 >= nt1) then
-                      allocate(&
-                           BCData(mm)%turbInlet(iBeg:iEnd,jBeg:jEnd,nt1:nt2), &
-                           stat=ierr)
-                      if(ierr /= 0)                      &
-                           call terminate("allocMemBCData", &
-                           "Memory allocation failure for &
-                           &turbInlet for a domain interface &
-                           &with a prescribed mass flow")
-                   endif
-
-                   !=======================================================
-
-                case (domainInterfaceRho)
-
-                   ! Domain interface with prescribed density.
-                   ! Allocate the memory for the density.
-
-                   allocate(BCData(mm)%rho(iBeg:iEnd,jBeg:jEnd), &
-                        stat=ierr)
-                   if(ierr /= 0)                      &
-                        call terminate("allocMemBCData", &
-                        "Memory allocation failure for &
-                        &a domain interface")
-
-                end select
-
-             enddo bocoLoop
-
-          enddo domainsLoop
-       enddo spectralLoop
-    enddo levelLoop
-
-  end subroutine allocMemBCData
-
-  subroutine initBCData
-    !
-    !       initBCData allocates and initializes the arrays BCData for
-    !       all boundary subfaces on all grid levels for all spectral
-    !       solutions.
-    !
-    use constants
-    use blockPointers, only : flowDoms, BCData, nDom, nBocos, inBeg, inEnd, &
-         jnBeg, jnEnd, knBeg, knEnd, icBeg, icEnd, jcBeg, jcBeg, jcEnd, kcBeg, &
-         kcEnd, BCFaceID
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers, terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+    !=================================================================
 
-    integer(kind=intType) :: i, j, sps
-    integer(kind=intType) :: nLevels, level
-
-    ! Determine the number of grid levels.
-
-    nLevels = ubound(flowDoms,2)
-
-    ! Loop over the number of grid levels.
-
-    levelLoop: do level=1,nLevels
-
-       ! Loop over the number of spectral solutions and number of
-       ! blocks stored on this processor.
-
-       spectralLoop: do sps=1,nTimeIntervalsSpectral
-          domainsLoop: do i=1,nDom
-
-             ! Allocate the memory for the array of the boundary
-             ! condition data.
-
-             j = flowDoms(i,level,sps)%nBocos
-             allocate(flowDoms(i,level,sps)%BCData(j), stat=ierr)
-             if(ierr /= 0)                   &
-                  call terminate("initBCData", &
-                  "Memory allocation failure for BCData")
-
-             ! Set the pointers to make it more readable.
-
-             call setPointers(i,level,sps)
-
-             ! Copy the range of the subfaces in BCData and nullify its
-             ! pointers.
-
-             bocoLoop: do j=1,nBocos
-
-                ! Determine the block face on which the subface is located
-                ! and set the dimensions accordingly.
-
-                select case (BCFaceID(j))
-
-                case (iMin,iMax)
-                   BCData(j)%inBeg = jnBeg(j)
-                   BCData(j)%inEnd = jnEnd(j)
-                   BCData(j)%jnBeg = knBeg(j)
-                   BCData(j)%jnEnd = knEnd(j)
-
-                   BCData(j)%icbeg = jcbeg(j)
-                   BCData(j)%icend = jcend(j)
-                   BCData(j)%jcbeg = kcbeg(j)
-                   BCData(j)%jcend = kcend(j)
-
-                case (jMin,jMax)
-                   BCData(j)%inBeg = inBeg(j)
-                   BCData(j)%inEnd = inEnd(j)
-                   BCData(j)%jnBeg = knBeg(j)
-                   BCData(j)%jnEnd = knEnd(j)
-
-                   BCData(j)%icbeg = icbeg(j)
-                   BCData(j)%icend = icend(j)
-                   BCData(j)%jcbeg = kcbeg(j)
-                   BCData(j)%jcend = kcend(j)
-
-                case (kMin,kMax)
-                   BCData(j)%inBeg = inBeg(j)
-                   BCData(j)%inEnd = inEnd(j)
-                   BCData(j)%jnBeg = jnBeg(j)
-                   BCData(j)%jnEnd = jnEnd(j)
-
-                   BCData(j)%icbeg = icbeg(j)
-                   BCData(j)%icend = icend(j)
-                   BCData(j)%jcbeg = jcbeg(j)
-                   BCData(j)%jcend = jcend(j)
-
-                end select
-
-                ! Initialize the boundary condition treatment for
-                ! subsonic inlet to noSubInlet.
-
-                BCData(j)%subsonicInletTreatment = noSubInlet
-
-                ! Nullify the pointers of BCData.
-                ! Some compilers require this.
-
-                nullify(BCData(j)%norm)
-                nullify(BCData(j)%rface)
-                nullify(BCData(j)%F)
-                nullify(BCData(j)%Fv)
-                nullify(BCData(j)%Fp)
-                nullify(BCData(j)%T)
-                nullify(BCData(j)%tv)
-                nullify(BCData(j)%Tp)
-                nullify(BCData(j)%area)
-                nullify(BCData(j)%surfIndex)
-                nullify(BCData(j)%uSlip)
-                nullify(BCData(j)%TNS_Wall)
-                nullify(BCData(j)%ksNS_Wall)
-                nullify(BCData(j)%CpTarget)
-
-                nullify(BCData(j)%normALE)
-                nullify(BCData(j)%rfaceALE)
-                nullify(BCData(j)%uSlipALE)
-                nullify(BCData(j)%cellHeatFlux)
-                nullify(BCData(j)%nodeHeatFlux)
-
-                nullify(BCData(j)%ptInlet)
-                nullify(BCData(j)%ttInlet)
-                nullify(BCData(j)%htInlet)
-                nullify(BCData(j)%flowXdirInlet)
-                nullify(BCData(j)%flowYdirInlet)
-                nullify(BCData(j)%flowZdirInlet)
-
-                nullify(BCData(j)%turbInlet)
-
-                nullify(BCData(j)%rho)
-                nullify(BCData(j)%velx)
-                nullify(BCData(j)%vely)
-                nullify(BCData(j)%velz)
-                nullify(BCData(j)%ps)
-                bcData(j)%symNormSet = .False.
-                bcData(j)%symNorm = zero
-                nullify(BCData(j)%iblank)
-                nullify(BCData(j)%delta)
-                nullify(BCData(j)%deltaNode)
-
-             enddo bocoLoop
-          enddo domainsLoop
-       enddo spectralLoop
-    enddo levelLoop
-
-  end subroutine initBCData
-
-  ! ------------------------------------------
-  !             Update routines
-  ! ------------------------------------------
-
-
-  subroutine setBCDataFineGrid(initializationPart)
-    !--------------------------------------------------------------
-    ! Manual Differentiation Warning: Modifying this routine requires
-    ! modifying the hand-written forward and reverse routines.
-    ! --------------------------------------------------------------
-    !
-    !       setBCDataFineGrid extracts the boundary condition data from
-    !       the cgnsGrid and stores it in useable form in the BCData
-    !       arrays of the currently finest grid, i.e. groundLevel.
-    !
-    use constants
-    use blockPointers, only : BCData, BCType, nBKGlobal, nBocos, nDom, cgnsSubFace
-    use communication, only : adflow_comm_world, myid
-    use inputTimeSpectral, only :nTimeIntervalsSpectral
-    use iteration, only : groundLevel
-    use utils, only : setPointers, terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: initializationPart
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+    logical function setBCVarTurb(offset, boco, bcVarArray, &
+                                  iBeg, iEnd, jBeg, jEnd, turbInlet)
+        !
+        !       SetBCVarTurb sets the array for the turbulent halo data
+        !       for inlet boundaries. This function returns .true. If all
+        !       turbulence variables could be interpolated and .false.
+        !       otherwise.
+        !
+        use constants
+        use flowVarRefState, only: nt1, nt2, muRef, Pref, rhoRef, wInf
+        use inputPhysics, only: equations, turbModel
+        use utils, only: terminate, siTurb
 
-    integer(kind=intType) :: i, j, sps, iBeg, iEnd, jBeg, jEnd
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: offset, boco, iBeg, iEnd, jBeg, jEnd
+        real(kind=realType), dimension(iBeg:iEnd, jBeg:jEnd, nbcVarMax) :: bcVarArray
+        real(kind=realType), dimension(:, :, :), pointer :: turbInlet
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, i, j
+        real(kind=realType)   :: mult, trans, nuRef
+        real(kind=realType), dimension(nt1:nt2) :: ref
 
-    logical :: allTurbMassBleedInflow,  allTurbSubsonicInflow
-    logical :: allFlowSupersonicInflow, allTurbSupersonicInflow
-    real(kind=realType), dimension(:,:,:), allocatable :: bcVarArray
+        ! Initialize setBCVarTurb to .true. And return immediately
+        ! if not the rans equations are solved.
 
-    ! Initialize axAssumed and massflowPrescribed to .false.,
-    ! indicating that no assumption is made about the axial direction
-    ! and no subsonic inflow boundaries with prescribed mass flow
-    ! are present.
+        setBCVarTurb = .true.
+        if (equations /= RANSEquations) return
 
-    axAssumed          = .false.
-    massflowPrescribed = .false.
+        ! Set the reference values depending on the turbulence model.
 
-    ! Initialize all the prescribed turbulence as well as flow
-    ! variables for supersonic inlet to .true.
+        nuRef = muRef/rhoRef
+        select case (turbModel)
 
-    allTurbMassBleedInflow  = .true.
-    allTurbSubsonicInflow   = .true.
-    allTurbSupersonicInflow = .true.
+        case (spalartAllmaras, spalartAllmarasEdwards)
+            ref(itu1) = nuRef
 
-    allFlowSupersonicInflow = .true.
+        case (komegaWilcox, komegaModified, menterSST)
+            ref(itu1) = pRef/rhoRef
+            ref(itu2) = ref(itu1)/nuRef
 
-    ! Loop over the number of spectral solutions and local blocks.
+        case (ktau)
+            ref(itu1) = pRef/rhoRef
+            ref(itu2) = nuRef/ref(itu1)
 
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domainsLoop: do i=1,nDom
+        case (v2f)
+            ref(itu1) = pRef/rhoRef
+            ref(itu4) = ref(itu1)/nuRef
+            ref(itu2) = ref(itu1)*ref(itu4)
+            ref(itu3) = ref(itu1)
 
-          ! Set the pointers to this block on groundLevel to make
-          ! the code readable.
+        end select
 
-          call setPointers(i,groundLevel,sps)
+        ! Loop over the number of turbulent variables. mm is the counter
+        ! in the arrays bcVarArray and bcVarPresent.
 
-          ! Loop over the number of boundary condition subfaces.
+        mm = offset
+        turbLoop: do nn = nt1, nt2
+            mm = mm + 1
 
-          bocoLoop: do j=1,nBocos
+            ! Check if the variable is present. If so, use the
+            ! interpolated data.
 
-             ! Store the cgns boundary subface number, the number of
-             ! boundary condition data sets and the data sets a bit easier.
+            if (bcVarPresent(mm)) then
 
-             cgnsBoco = cgnsSubface(j)
-             nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
-             dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
+                ! Conversion to SI units if possible.
 
-             ! Store the range of the boundary subface a bit easier.
+                call siTurb(mass(mm), length(mm), time(mm), temp(mm), &
+                            bcVarNames(mm), mult, trans)
 
-             iBeg = BCData(j)%icBeg; iEnd = BCData(j)%icEnd
-             jBeg = BCData(j)%jcBeg; jEnd = BCData(j)%jcEnd
+                ! Set the turbulent variables.
 
-             ! Allocate the bcVarArray to the maximum size it could
-             ! possibly be *in the last dimension*.
-             allocate(bcVarArray(iBeg:iEnd,jBeg:jEnd,nbcVarMax))
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        turbInlet(i, j, nn) = (mult*bcVarArray(i, j, mm) + trans)/ref(nn)
+                    end do
+                end do
 
-             ! Determine the boundary condition we are having here and
-             ! call the appropriate routine.
+            else
 
-             select case (BCType(j))
+                ! Turbulent variable not present. Use the free stream data.
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        turbInlet(i, j, nn) = wInf(nn)
+                    end do
+                end do
 
-             case (NSWallAdiabatic)
-                call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
-                call extractFromDataSet(bcVarArray)
-                call BCDataAdiabaticWall(j, bcVarArray, iBeg, iEnd, jBeg, jEnd)
+                ! Set the logical value to false to indicate that indeed not
+                ! all the values were present
+                setBCVarTurb = .false.
 
-             case (NSWallIsothermal)
-                call setBCVarNamesIsothermalWall ! sets bcVarNames and nbcVar
-                call extractFromDataSet(bcVarArray)
-                call BCDataIsothermalWall(j, bcVarArray, iBeg, iEnd, jBeg, jEnd)
+            end if
+        end do turbLoop
+    end function setBCVarTurb
 
-             case (SupersonicInflow)
-                call setBCVarNamesSupersonicInflow
-                call extractFromDataSet(bcVarArray)
-                call BCDataSupersonicInflow(j, bcVarArray, iBeg, iEnd, jBeg, jEnd, &
-                     allFlowSupersonicInflow, allTurbSupersonicInflow)
+#ifndef USE_TAPENADE
 
-             case (SubsonicInflow)
-                call setBCVarNamesSubsonicInflow
-                call extractFromDataSet(bcVarArray)
-                call BCDataSubsonicInflow(j, bcVarArray, iBeg, iEnd, jBeg, jEnd, &
-                     allTurbSubsonicInflow)
+    subroutine setBCData(bcDataNamesIn, bcDataIn, famLists, sps, &
+                         nVar, nFamMax)
+        !--------------------------------------------------------------
+        ! Manual Differentiation Warning: Modifying this routine requires
+        ! modifying the hand-written forward and reverse routines.
+        ! --------------------------------------------------------------
+        use constants
+        use cgnsNames
+        use blockPointers, only: BCData, nDom, nBocos, nBKGlobal, &
+                                 cgnsSubFace, BCType
+        use sorting, only: famInList
+        use utils, only: setPointers, terminate, char2str
+        use communication, only: myid
+        use actuatorRegionData, only: actuatorRegions, nActuatorRegions
+        !
+        !      Subroutine arguments.
+        !
+        character, dimension(nVar, maxCGNSNameLen), intent(in) :: bcdatanamesin
+        real(kind=realType), dimension(nVar), intent(in) :: bcDataIn
+        integer(kind=intType), dimension(nVar, nFamMax) :: famLists
+        integer(kind=intType), intent(in) ::  sps, nVar, nFamMax
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, iVar, nFam, iRegion
+        character(maxCGNSNameLen) :: varName
+        domainsLoop: do i = 1, nDom
+
+            ! Set the pointers to this block on groundLevel to make
+            ! the code readable.
+
+            call setPointers(i, 1_intType, sps)
+
+            varLoop: do iVar = 1, nVar
+
+                ! Loop over the number of boundary condition subfaces.
+
+                bocoLoop: do j = 1, nBocos
+
+                    ! Store the cgns boundary subface number, the number of
+                    ! boundary condition data sets and the data sets a bit easier.
+
+                    cgnsBoco = cgnsSubface(j)
+                    nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
+                    dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
+
+                    ! Check if this surface should be included or not:
+                    nFam = famLists(iVar, 1)
+                    famInclude: if (famInList(BCdata(j)%famID, famLists(iVar, 2:2 + nFam - 1))) then
+
+                        select case (BCType(j))
+
+                        case (NSWallAdiabatic)
+                            call setBCVarNamesAdiabaticWall
+                            call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
+                        case (NSWallIsothermal)
+                            call setBCVarNamesSupersonicInflow  ! possible bug?
+                            call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
+                        case (SupersonicInflow)
+                            call setBCVarNamesSupersonicInflow
+                            call errorCheckbcDataNamesIn("SupersonicInflow", bcDataNamesIn)
+                        case (SubsonicInflow)
+                            call setBCVarNamesSubsonicInflow
+                            call errorCheckbcDataNamesIn("SubsonicInflow", bcDataNamesIn)
+                        case (SubsonicOutflow)
+                            call setBCVarNamesSubsonicOutflow
+                            call errorCheckbcDataNamesIn("SubsonicOutflow", bcDataNamesIn)
+                        case default
+                            call terminate('setBCData', &
+                                           'This is not a valid boundary condtion for setBCData')
+                        end select
+                        call insertToDataSet(bcDataNamesIn, bcDataIn)
+
+                    end if famInclude
+                end do bocoLoop
+            end do varLoop
+        end do domainsLoop
+
+        ! Loop over any actuator regions since they also could have to set BCData
+        regionLoop: do iRegion = 1, nActuatorRegions
+            varLoop2: do iVar = 1, nVar
+                nFam = famLists(iVar, 1)
+                famInclude2: if (famInList(actuatorRegions(iRegion)%famID, famLists(iVar, 2:2 + nFam - 1))) then
+
+                    ! Extract the name
+                    varName = char2str(bcDataNamesIn(iVar, :), maxCGNSNameLen)
+
+                    if (trim(varName) == "Thrust") then
+                        actuatorRegions(iRegion)%force = actuatorRegions(iRegion)%axisVec* &
+                                                         bcDataIn(iVar)
+                    else if (trim(varName) == "Torque") then
+                        actuatorRegions(iRegion)%torque = bcDataIn(iVar)
+                    else if (trim(varName) == "Heat") then
+                        actuatorRegions(iRegion)%heat = bcDataIn(iVar)
+                    end if
+                end if famInclude2
+            end do varLoop2
+        end do regionLoop
+
+    end subroutine setBCData
+
+    subroutine setBCData_d(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
+                           nVar, nFamMax)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+        use constants
+        use cgnsNames
+        use blockPointers, only: BCData, nDom, nBocos, nBKGlobal, &
+                                 cgnsSubFace, BCType
+        use sorting, only: famInList
+        use utils, only: setPointers_d, terminate, char2str
+        use actuatorRegionData, only: actuatorRegionsd, actuatorRegions, nActuatorRegions
+        !
+        !      Subroutine arguments.
+        !
+        character, dimension(nVar, maxCGNSNameLen), intent(in) :: bcdatanamesin
+        real(kind=realType), dimension(nVar), intent(in) :: bcDataIn, bcDataInd
+        integer(kind=intType), dimension(nVar, nFamMax) :: famLists
+        integer(kind=intType), intent(in) ::  sps, nVar, nFamMax
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, iVar, nFam
+        character(maxCGNSNameLen) :: varName
+
+        domainsLoop: do i = 1, nDom
+
+            ! Set the pointers to this block on groundLevel to make
+            ! the code readable.
+
+            call setPointers_d(i, 1_intType, sps)
+
+            varLoop: do iVar = 1, nVar
+
+                ! Loop over the number of boundary condition subfaces.
+
+                bocoLoop: do j = 1, nBocos
+
+                    ! Store the cgns boundary subface number, the number of
+                    ! boundary condition data sets and the data sets a bit easier.
+
+                    cgnsBoco = cgnsSubface(j)
+                    nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
+                    dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
+                    dataSetd => cgnsDomsd(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
+
+                    ! Check if this surface should be included or not:
+                    nFam = famLists(iVar, 1)
+                    famInclude: if (famInList(BCdata(j)%famID, famLists(iVar, 2:2 + nFam - 1))) then
+
+                        select case (BCType(j))
+
+                        case (NSWallAdiabatic)
+                            call setBCVarNamesAdiabaticWall
+                            call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
+                        case (NSWallIsothermal)
+                            call setBCVarNamesSupersonicInflow
+                            call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
+                        case (SupersonicInflow)
+                            call setBCVarNamesSupersonicInflow
+                            call errorCheckbcDataNamesIn("SupersonicInflow", bcDataNamesIn)
+                        case (SubsonicInflow)
+                            call setBCVarNamesSubsonicInflow
+                            call errorCheckbcDataNamesIn("SubsonicInflow", bcDataNamesIn)
+                        case (SubsonicOutflow)
+                            call setBCVarNamesSubsonicOutflow
+                            call errorCheckbcDataNamesIn("SubsonicOutflow", bcDataNamesIn)
+                        case default
+                            call terminate('setBCData', &
+                                           'This is not a valid boundary condtion for setBCData')
+                        end select
+                        call insertToDataSet_d(bcDataNamesIn, bcDataIn, bcDataInd)
+
+                    end if famInclude
+                end do bocoLoop
+            end do varLoop
+        end do domainsLoop
+
+        ! Loop over any actuator regions since they also could have to set BCData
+        regionLoop: do iRegion = 1, nActuatorRegions
+            varLoop2: do iVar = 1, nVar
+                nFam = famLists(iVar, 1)
+                famInclude2: if (famInList(actuatorRegions(iRegion)%famID, famLists(iVar, 2:2 + nFam - 1))) then
+
+                    ! Extract the name
+                    varName = char2str(bcDataNamesIn(iVar, :), maxCGNSNameLen)
+
+                    if (trim(varName) == "Thrust") then
+                        actuatorRegions(iRegion)%force = actuatorRegions(iRegion)%axisVec* &
+                                                         bcDataIn(iVar)
+                        actuatorRegionsd(iRegion)%force = actuatorRegions(iRegion)%axisVec* &
+                                                          bcDataInd(iVar)
+                    else if (trim(varName) == "Torque") then
+                        actuatorRegions(iRegion)%torque = bcDataIn(iVar)
+                        actuatorRegionsd(iRegion)%torque = bcDataInd(iVar)
+                    else if (trim(varName) == "Heat") then
+                        actuatorRegions(iRegion)%heat = bcDataIn(iVar)
+                        actuatorRegionsd(iRegion)%heat = bcDataInd(iVar)
+                    end if
+                end if famInclude2
+            end do varLoop2
+        end do regionLoop
+
+    end subroutine setBCData_d
+
+    subroutine setBCData_b(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
+                           nVar, nFamMax)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+        use constants
+        use cgnsNames
+        use blockPointers, only: BCData, nDom, nBocos, nBKGlobal, &
+                                 cgnsSubFace, BCType
+        use sorting, only: famInList
+        use utils, only: setPointers_b, terminate, char2str
+        use actuatorRegionData, only: actuatorRegionsd, actuatorRegions, nActuatorRegions
+
+        !
+        !      Subroutine arguments.
+        !
+        character, dimension(nVar, maxCGNSNameLen), intent(in) :: bcdatanamesin
+        real(kind=realType), dimension(nVar), intent(in) :: bcDataIn
+        real(kind=realType), dimension(nVar), intent(out) :: bcDataInd
+        integer(kind=intType), dimension(nVar, nFamMax) :: famLists
+        integer(kind=intType), intent(in) ::  sps, nVar, nFamMax
+
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, iVar, nFam
+        character(maxCGNSNameLen) :: varName
+
+        domainsLoop: do i = 1, nDom
+
+            ! Set the pointers to this block on groundLevel to make
+            ! the code readable.
+
+            call setPointers_b(i, 1_intType, sps)
+
+            varLoop: do iVar = 1, nVar
+
+                ! Loop over the number of boundary condition subfaces.
+
+                bocoLoop: do j = 1, nBocos
+
+                    ! Store the cgns boundary subface number, the number of
+                    ! boundary condition data sets and the data sets a bit easier.
+
+                    cgnsBoco = cgnsSubface(j)
+                    nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
+                    dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
+                    dataSetd => cgnsDomsd(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
+
+                    ! Check if this surface should be included or not:
+                    nFam = famLists(iVar, 1)
+                    famInclude: if (famInList(BCdata(j)%famID, famLists(iVar, 2:2 + nFam - 1))) then
+
+                        select case (BCType(j))
+
+                        case (NSWallAdiabatic)
+                            call setBCVarNamesAdiabaticWall
+                            call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
+                        case (NSWallIsothermal)
+                            call setBCVarNamesSupersonicInflow  ! possible bug?
+                            call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
+                        case (SupersonicInflow)
+                            call setBCVarNamesSupersonicInflow
+                            call errorCheckbcDataNamesIn("SupersonicInflow", bcDataNamesIn)
+                        case (SubsonicInflow)
+                            call setBCVarNamesSubsonicInflow
+                            call errorCheckbcDataNamesIn("SubsonicInflow", bcDataNamesIn)
+                        case (SubsonicOutflow)
+                            call setBCVarNamesSubsonicOutflow
+                            call errorCheckbcDataNamesIn("SubsonicOutflow", bcDataNamesIn)
+                        case default
+                            call terminate('setBCData', &
+                                           'This is not a valid boundary condtion for setBCData')
+                        end select
+                        call insertToDataSet_b(bcDataNamesIn, bcDataIn, bcDataInd)
+
+                    end if famInclude
+                end do bocoLoop
+            end do varLoop
+        end do domainsLoop
+
+        ! Loop over any actuator regions since they also could have to set BCData
+        regionLoop: do iRegion = 1, nActuatorRegions
+            varLoop2: do iVar = 1, nVar
+                nFam = famLists(iVar, 1)
+                famInclude2: if (famInList(actuatorRegions(iRegion)%famID, famLists(iVar, 2:2 + nFam - 1))) then
+
+                    ! Extract the name
+                    varName = char2str(bcDataNamesIn(iVar, :), maxCGNSNameLen)
+
+                    if (trim(varName) == "Thrust") then
+                        bcDataInd(ivar) = &
+                            sum(actuatorRegions(iRegion)%axisVec*actuatorRegionsd(iRegion)%force)
+                    else if (trim(varName) == "Torque") then
+                        bcDataInd(ivar) = actuatorRegionsd(iRegion)%torque
+                    else if (trim(varName) == "Heat") then
+                        bcDataInd(ivar) = actuatorRegionsd(iRegion)%heat
+                    end if
+                end if famInclude2
+            end do varLoop2
+        end do regionLoop
+
+    end subroutine setBCData_b
+
+    subroutine extractFromDataSet(bcVarArray)
+        !--------------------------------------------------------------
+        ! Manual Differentiation Warning: Modifying this routine requires
+        ! modifying the hand-written forward and reverse routines.
+        ! --------------------------------------------------------------
+        !
+        !       extractFromDataSet tries to extract and interpolate the
+        !       variables in bcVarNames from the cgns data set.
+        !       If successful the corresponding entry of bcVarPresent is
+        !       set to .true., otherwise it is set to .false.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, onlY: nbkGlobal
+        use utils, only: terminate
+        implicit none
+
+        ! Input
+        real(kind=realType), dimension(:, :, :) :: bcVarArray
+
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: k, l, m, n
+        integer(kind=intType) :: nInter, nDim, nVarPresent, nCoor
+
+        integer(kind=intType), dimension(3) :: dataDim, coor
+        integer(kind=intType), dimension(2, 3) :: indCoor
+        integer(kind=intType), dimension(2, nbcVar) :: ind
+
+        character(len=maxStringLen) :: errorMessage
+
+        logical :: xPresent, yPresent, zPresent, rPresent
+        logical :: firstVar
+
+        ! Determine whether the variables are specified and if so,
+        ! where they are located in the data set. As the number of
+        ! variables specified is usually not so big, a linear search
+        ! algorithm is perfectly okay. At the moment only the Dirichlet
+        ! arrays are checked.
+
+        nVarPresent = 0
+
+        do m = 1, nbcVar
+            bcVarPresent(m) = .false.
+
+            dataSetLoop: do k = 1, nDataSet
+                do l = 1, dataSet(k)%nDirichletArrays
+                    if (dataSet(k)%dirichletArrays(l)%arrayName == &
+                        bcVarNames(m)) then
+
+                        ! Variable is present. Store the indices, update
+                        ! nVarPresent and set bcVarPresent(m) to .True.
+
+                        ind(1, m) = k; ind(2, m) = l
+
+                        nVarPresent = nVarPresent + 1
+                        bcVarPresent(m) = .true.
+
+                        ! Set the units for this variable.
+
+                        mass(m) = dataSet(k)%dirichletArrays(l)%mass
+                        length(m) = dataSet(k)%dirichletArrays(l)%len
+                        time(m) = dataSet(k)%dirichletArrays(l)%time
+                        temp(m) = dataSet(k)%dirichletArrays(l)%temp
+                        angle(m) = dataSet(k)%dirichletArrays(l)%angle
+
+                        ! Exit the search loop, as the variable was found.
+
+                        exit dataSetLoop
+
+                    end if
+                end do
+            end do dataSetLoop
+        end do
+
+        ! Find out whether the given data points are equal for every
+        ! variable or that every variable must be interpolated
+        ! differently.
+
+        do m = 1, nbcVar
+            if (bcVarPresent(m)) then
+                k = ind(1, m)
+                l = ind(2, m)
+
+                bcVarArray(:, :, m) = &
+                    dataSet(k)%dirichletArrays(l)%dataArr(1)
+            end if
+        end do
+
+    end subroutine extractFromDataSet
+
+    subroutine extractFromDataSet_d(bcVarArray, bcVarArrayd)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+        !
+        !       extractFromDataSet tries to extract and interpolate the
+        !       variables in bcVarNames from the cgns data set.
+        !       If successful the corresponding entry of bcVarPresent is
+        !       set to .true., otherwise it is set to .false.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, onlY: nbkGlobal
+        use utils, only: terminate
+        implicit none
+
+        ! Input
+        real(kind=realType), dimension(:, :, :) :: bcVarArray, bcVarArrayd
+
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: k, l, m, n
+        integer(kind=intType) :: nInter, nDim, nVarPresent, nCoor
+
+        integer(kind=intType), dimension(3) :: dataDim, coor
+        integer(kind=intType), dimension(2, 3) :: indCoor
+        integer(kind=intType), dimension(2, nbcVar) :: ind
+
+        character(len=maxStringLen) :: errorMessage
+
+        logical :: xPresent, yPresent, zPresent, rPresent
+        logical :: firstVar
+
+        ! Determine whether the variables are specified and if so,
+        ! where they are located in the data set. As the number of
+        ! variables specified is usually not so big, a linear search
+        ! algorithm is perfectly okay. At the moment only the Dirichlet
+        ! arrays are checked.
+
+        nVarPresent = 0
+
+        do m = 1, nbcVar
+            bcVarPresent(m) = .false.
+
+            dataSetLoop: do k = 1, nDataSet
+                do l = 1, dataSet(k)%nDirichletArrays
+                    if (dataSet(k)%dirichletArrays(l)%arrayName == &
+                        bcVarNames(m)) then
+
+                        ! Variable is present. Store the indices, update
+                        ! nVarPresent and set bcVarPresent(m) to .True.
 
-             case (SubsonicOutflow)
-                call setBCVarNamesSubsonicOutflow ! sets bcVarNames and nbcVar
-                call extractFromDataSet(bcVarArray)
-                call BCDataSubsonicOutflow(j, bcVarArray, iBeg, iEnd, jBeg, jEnd)
+                        ind(1, m) = k; ind(2, m) = l
+
+                        nVarPresent = nVarPresent + 1
+                        bcVarPresent(m) = .true.
+
+                        ! Set the units for this variable.
+
+                        mass(m) = dataSet(k)%dirichletArrays(l)%mass
+                        length(m) = dataSet(k)%dirichletArrays(l)%len
+                        time(m) = dataSet(k)%dirichletArrays(l)%time
+                        temp(m) = dataSet(k)%dirichletArrays(l)%temp
+                        angle(m) = dataSet(k)%dirichletArrays(l)%angle
+
+                        ! Exit the search loop, as the variable was found.
+
+                        exit dataSetLoop
+
+                    end if
+                end do
+            end do dataSetLoop
+        end do
+
+        ! Find out whether the given data points are equal for every
+        ! variable or that every variable must be interpolated
+        ! differently.
+
+        do m = 1, nbcVar
+            if (bcVarPresent(m)) then
+                k = ind(1, m)
+                l = ind(2, m)
+
+                bcVarArray(:, :, m) = &
+                    dataSet(k)%dirichletArrays(l)%dataArr(1)
+                bcVarArrayd(:, :, m) = &
+                    dataSetd(k)%dirichletArrays(l)%dataArr(1)
+            end if
+        end do
+
+    end subroutine extractFromDataSet_d
+
+    subroutine extractFromDataSet_b(bcVarArray, bcVarArrayd)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+        !
+        !       extractFromDataSet tries to extract and interpolate the
+        !       variables in bcVarNames from the cgns data set.
+        !       If successful the corresponding entry of bcVarPresent is
+        !       set to .true., otherwise it is set to .false.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, onlY: nbkGlobal
+        use utils, only: terminate
+        implicit none
+
+        ! Input
+        real(kind=realType), dimension(:, :, :) :: bcVarArray, bcVarArrayd
+
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: k, l, m, n
+        integer(kind=intType) :: nInter, nDim, nVarPresent, nCoor
+
+        integer(kind=intType), dimension(3) :: dataDim, coor
+        integer(kind=intType), dimension(2, 3) :: indCoor
+        integer(kind=intType), dimension(2, nbcVar) :: ind
+
+        character(len=maxStringLen) :: errorMessage
+
+        logical :: xPresent, yPresent, zPresent, rPresent
+        logical :: firstVar
+
+        ! Determine whether the variables are specified and if so,
+        ! where they are located in the data set. As the number of
+        ! variables specified is usually not so big, a linear search
+        ! algorithm is perfectly okay. At the moment only the Dirichlet
+        ! arrays are checked.
+
+        nVarPresent = 0
+
+        do m = 1, nbcVar
+            bcVarPresent(m) = .false.
+
+            dataSetLoop: do k = 1, nDataSet
+                do l = 1, dataSet(k)%nDirichletArrays
+                    if (dataSet(k)%dirichletArrays(l)%arrayName == &
+                        bcVarNames(m)) then
+
+                        ! Variable is present. Store the indices, update
+                        ! nVarPresent and set bcVarPresent(m) to .True.
+
+                        ind(1, m) = k; ind(2, m) = l
+
+                        nVarPresent = nVarPresent + 1
+                        bcVarPresent(m) = .true.
+
+                        ! Set the units for this variable.
+
+                        mass(m) = dataSet(k)%dirichletArrays(l)%mass
+                        length(m) = dataSet(k)%dirichletArrays(l)%len
+                        time(m) = dataSet(k)%dirichletArrays(l)%time
+                        temp(m) = dataSet(k)%dirichletArrays(l)%temp
+                        angle(m) = dataSet(k)%dirichletArrays(l)%angle
+
+                        ! Exit the search loop, as the variable was found.
+
+                        exit dataSetLoop
+
+                    end if
+                end do
+            end do dataSetLoop
+        end do
+
+        ! Find out whether the given data points are equal for every
+        ! variable or that every variable must be interpolated
+        ! differently.
+
+        do m = 1, nbcVar
+            if (bcVarPresent(m)) then
+                k = ind(1, m)
+                l = ind(2, m)
+                ! Accumulate. No need to zero.
+                dataSetd(k)%dirichletArrays(l)%dataArr(1) = &
+                    dataSetd(k)%dirichletArrays(l)%dataArr(1) + sum(bcVarArrayd(:, :, m))
+                bcvararrayd(:, :, m) = 0.0_8
+            end if
+        end do
+
+    end subroutine extractFromDataSet_b
+
+    subroutine insertToDataSet(bcDataNamesIn, bcDataIn)
+        !--------------------------------------------------------------
+        ! Manual Differentiation Warning: Modifying this routine requires
+        ! modifying the hand-written forward and reverse routines.
+        ! --------------------------------------------------------------
+        use constants
+        use utils, only: char2str
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        character, dimension(:, :), intent(in) :: bcdatanamesin
+        real(kind=realType), dimension(:), intent(in):: bcDataIn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: k, l, m, n, q
+        integer(kind=intType) :: ind(2, nbcVar), nVarPresent
+        character(len=maxCGNSNameLen) :: varName
+
+        nVarPresent = 0
+
+        do m = 1, size(bcDataIn)
+            bcVarPresent(m) = .false.
+
+            dataSetLoop: do k = 1, nDataSet
+                do l = 1, dataSet(k)%nDirichletArrays
+                    if (dataSet(k)%dirichletArrays(l)%arrayName == &
+                        bcVarNames(m)) then
+
+                        ! Variable is present. Store the indices, update
+                        ! nVarPresent and set bcVarPresent(m) to .True.
+
+                        ind(1, m) = k; ind(2, m) = l
+
+                        nVarPresent = nVarPresent + 1
+                        bcVarPresent(m) = .true.
+
+                        ! Exit the search loop, as the variable was found.
+
+                        exit dataSetLoop
+
+                    end if
+                end do
+            end do dataSetLoop
+        end do
+
+        do m = 1, size(bcDataIn)
+            if (bcVarPresent(m)) then
+                k = ind(1, m)
+                l = ind(2, m)
+                do n = 1, size(bcDataIn)
+
+                    varName = char2str(bcDataNamesIn(n, :), maxCGNSNameLen)
+
+                    if (bcVarNames(m) == varname) then
+                        dataSet(k)%dirichletArrays(l)%dataArr(1) = bcDataIn(n)
+                    end if
+                end do
+            end if
+        end do
+    end subroutine insertToDataSet
+
+    subroutine insertToDataSet_d(bcDataNamesIn, bcDataIn, bcDataInd)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+        use constants
+        use utils, only: char2str
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        character, dimension(:, :), intent(in) :: bcdatanamesin
+        real(kind=realType), dimension(:), intent(in):: bcDataIn, bcDataInd
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: k, l, m, n, q
+        integer(kind=intType) :: ind(2, nbcVar), nVarPresent
+        character(len=maxCGNSNameLen) :: varName
+
+        nVarPresent = 0
+
+        do m = 1, size(bcDataIn)
+            bcVarPresent(m) = .false.
+
+            dataSetLoop: do k = 1, nDataSet
+                do l = 1, dataSet(k)%nDirichletArrays
+                    if (dataSet(k)%dirichletArrays(l)%arrayName == &
+                        bcVarNames(m)) then
+
+                        ! Variable is present. Store the indices, update
+                        ! nVarPresent and set bcVarPresent(m) to .True.
+
+                        ind(1, m) = k; ind(2, m) = l
+
+                        nVarPresent = nVarPresent + 1
+                        bcVarPresent(m) = .true.
+
+                        ! Exit the search loop, as the variable was found.
+
+                        exit dataSetLoop
+
+                    end if
+                end do
+            end do dataSetLoop
+        end do
+
+        do m = 1, size(bcDataIn)
+            if (bcVarPresent(m)) then
+                k = ind(1, m)
+                l = ind(2, m)
+                do n = 1, size(bcDataIn)
+
+                    varName = char2str(bcDataNamesIn(n, :), maxCGNSNameLen)
+
+                    if (bcVarNames(m) == varname) then
+                        dataSet(k)%dirichletArrays(l)%dataArr(1) = bcDataIn(n)
+                        dataSetd(k)%dirichletArrays(l)%dataArr(1) = bcDataInd(n)
+                    end if
+                end do
+            end if
+        end do
+    end subroutine insertToDataSet_d
+
+    subroutine insertToDataSet_b(bcDataNamesIn, bcDataIn, bcDataInd)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+        use constants
+        use utils, only: char2str
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        character, dimension(:, :), intent(in) :: bcdatanamesin
+        real(kind=realType), dimension(:), intent(in):: bcDataIn
+        real(kind=realType), dimension(:), intent(out) :: bcDataInd
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: k, l, m, n, q
+        integer(kind=intType) :: ind(2, nbcVar), nVarPresent
+        character(len=maxCGNSNameLen) :: varName
+
+        nVarPresent = 0
+
+        do m = 1, size(bcDataIn)
+            bcVarPresent(m) = .false.
+
+            dataSetLoop: do k = 1, nDataSet
+                do l = 1, dataSet(k)%nDirichletArrays
+                    if (dataSet(k)%dirichletArrays(l)%arrayName == &
+                        bcVarNames(m)) then
+
+                        ! Variable is present. Store the indices, update
+                        ! nVarPresent and set bcVarPresent(m) to .True.
+
+                        ind(1, m) = k; ind(2, m) = l
+
+                        nVarPresent = nVarPresent + 1
+                        bcVarPresent(m) = .true.
+
+                        ! Exit the search loop, as the variable was found.
+
+                        exit dataSetLoop
+
+                    end if
+                end do
+            end do dataSetLoop
+        end do
+
+        do m = 1, size(bcDataIn)
+            if (bcVarPresent(m)) then
+                k = ind(1, m)
+                l = ind(2, m)
+                do n = 1, size(bcDataIn)
+
+                    varName = char2str(bcDataNamesIn(n, :), maxCGNSNameLen)
+
+                    if (bcVarNames(m) == varname) then
+                        bcDataInd(n) = bcdataind(n) + dataSetd(k)%dirichletArrays(l)%dataArr(1)
+                        datasetd(k)%dirichletarrays(l)%dataarr(1) = 0.0_8
+                    end if
+                end do
+            end if
+        end do
+    end subroutine insertToDataSet_b
+    !--------------------------------------------
+    ! Initialization routines
+    !--------------------------------------------
+
+    subroutine allocMemBCData
+        !
+        !       allocMemBCData allocates the memory for the prescribed
+        !       boundary data for all multigrid levels and all spectral
+        !       solutions for all blocks.
+        !
+        use constants
+        use blockPointers, only: BCData, flowDoms, nBocos, nDom, BCType
+        use flowVarRefState, only: nt1, nt2
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputPhysics, only: useRoughSA
+        use iteration, only: nALESteps
+        use utils, only: setPointers, terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: mm, nn, sps, level, nLevels
+        integer(kind=intType) :: iBeg, jBeg, iEnd, jEnd
+        integer(kind=intType) :: inodeBeg, jnodeBeg, inodeEnd, jnodeEnd
+
+        ! Determine the number of multigrid levels.
+
+        nLevels = ubound(flowDoms, 2)
+
+        ! Loop over the number of multigrid level, spectral solutions
+        ! and local blocks.
+
+        levelLoop: do level = 1, nLevels
+            spectralLoop: do sps = 1, nTimeIntervalsSpectral
+                domainsLoop: do nn = 1, nDom
+
+                    ! Have the pointers in blockPointers point to the
+                    ! current block to make everything more readable.
+
+                    call setPointers(nn, level, sps)
+
+                    ! Loop over the number of boundary subfaces for this block.
+
+                    bocoLoop: do mm = 1, nBocos
+
+                        ! Store the cell range of the boundary subface
+                        ! a bit easier.
+
+                        iBeg = BCData(mm)%icbeg; iEnd = BCData(mm)%icend
+                        jBeg = BCData(mm)%jcbeg; jEnd = BCData(mm)%jcend
+
+                        inodeBeg = BCData(mm)%inbeg; inodeEnd = BCData(mm)%inend
+                        jnodeBeg = BCData(mm)%jnbeg; jnodeEnd = BCData(mm)%jnend
+
+                        ! Note: iBlank/delta are cell based, but uses the node
+                        ! numbers to guarantee a halo exists. These must be
+                        ! allocated for all boundary conditions.
+                        allocate (BCData(mm)%iBlank(iNodeBeg:iNodeEnd + 1, jNodeBeg:jnodeEnd + 1), &
+                                  BCData(mm)%delta(iNodeBeg:iNodeEnd + 1, jNodeBeg:jnodeEnd + 1), &
+                                  BCData(mm)%deltaNode(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd), &
+                                  BCData(mm)%surfIndex(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd))
+
+                        ! Set the iBlank to 1 for non-overset cases.
+                        BCData(mm)%iBlank = 1
+
+                        ! Determine the boundary condition we are having here
+                        ! and allocate the memory accordingly.
+
+                        select case (BCType(mm))
+
+                        case (NSWallAdiabatic)
+                            allocate (BCData(mm)%uSlip(iBeg:iEnd, jBeg:jEnd, 3), &
+                                      BCData(mm)%uSlipALE(0:nALEsteps, iBeg:iEnd, jBeg:jEnd, 3), &
+                                      BCData(mm)%F(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%T(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%Tp(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%Tv(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%Fp(iNodeBeg + 1:iNodeEnd, jNodeBeg + 1:jNodeEnd, 3), &
+                                      BCData(mm)%Fv(iNodeBeg + 1:iNodeEnd, jNodeBeg + 1:jNodeEnd, 3), &
+                                      BCData(mm)%area(iNodeBeg + 1:iNodeEnd, jNodeBeg + 1:jNodeEnd), &
+                                      BCData(mm)%CpTarget(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd), &
+                                      stat=ierr)
+                            if (useRoughSA .and. ierr == 0) then
+                                allocate (BCData(mm)%ksNS_Wall(iBeg:iEnd, jBeg:jEnd), stat=ierr)
+                                if (level > 1) then
+                                    ! The extrapolation of the BC for MG does not work
+                                    ! properly. Thus it must be initialized with zero
+                                    BCData(mm)%ksNS_Wall = zero
+                                end if
+                            end if
+                            if (ierr /= 0) &
+                                 call terminate("allocMemBCData", &
+                                 "Memory allocation failure for &
+                                 &an adiabatic wall")
+                            BCData(mm)%CpTarget = zero
+                            !=======================================================
+
+                        case (NSWallIsothermal)
+
+                            allocate (BCData(mm)%uSlip(iBeg:iEnd, jBeg:jEnd, 3), &
+                                      BCData(mm)%uSlipALE(0:nALEsteps, iBeg:iEnd, jBeg:jEnd, 3), &
+                                      BCData(mm)%TNS_Wall(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%F(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%T(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%Tp(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%Tv(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%cellHeatFlux(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%nodeHeatFlux(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd), &
+                                      BCData(mm)%Fp(iNodeBeg + 1:iNodeEnd, jNodeBeg + 1:jNodeEnd, 3), &
+                                      BCData(mm)%Fv(iNodeBeg + 1:iNodeEnd, jNodeBeg + 1:jNodeEnd, 3), &
+                                      BCData(mm)%area(iNodeBeg + 1:iNodeEnd, jNodeBeg + 1:jNodeEnd), &
+                                      BCData(mm)%CpTarget(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd), &
+                                      stat=ierr)
+                            if (useRoughSA .and. ierr == 0) then
+                                allocate (BCData(mm)%ksNS_Wall(iBeg:iEnd, jBeg:jEnd), stat=ierr)
+                                if (level > 1) then
+                                    ! The extrapolation of the BC for MG does not work
+                                    ! properly. Thus it must be initialized with zero
+                                    BCData(mm)%ksNS_Wall = zero
+                                end if
+                            end if
+                            if (ierr /= 0) &
+                                 call terminate("allocMemBCData", &
+                                 "Memory allocation failure for &
+                                 &an isothermal wall")
+                            BCData(mm)%CpTarget = zero
+
+                            !=======================================================
+
+                        case (EulerWall)
+
+                            allocate (BCData(mm)%rface(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%rFaceALE(0:nALEsteps, iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%F(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%T(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%Tp(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%Tv(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd, 3), &
+                                      BCData(mm)%Fp(iNodeBeg + 1:iNodeEnd, jNodeBeg + 1:jNodeEnd, 3), &
+                                      BCData(mm)%Fv(iNodeBeg + 1:iNodeEnd, jNodeBeg + 1:jNodeEnd, 3), &
+                                      BCData(mm)%area(iNodeBeg + 1:iNodeEnd, jNodeBeg + 1:jNodeEnd), &
+                                      BCData(mm)%CpTarget(iNodeBeg:iNodeEnd, jNodeBeg:jNodeEnd), &
+                                      stat=ierr)
+                            if (ierr /= 0) &
+                                 call terminate("allocMemBCData", &
+                                 "Memory allocation failure for &
+                                 &an Euler wall")
+                            BCData(mm)%CpTarget = zero
+                            !=======================================================
+
+                        case (farField)
+
+                            ! Just allocate the memory for the normal mesh
+                            ! velocity.
+
+                            allocate (BCData(mm)%rface(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%rFaceALE(0:nALEsteps, iBeg:iEnd, jBeg:jEnd), &
+                                      stat=ierr)
+                            if (ierr /= 0) &
+                                 call terminate("allocMemBCData", &
+                                 "Memory allocation failure for &
+                                 &a farfield")
+                            !=======================================================
+
+                        case (symm, symmPolar)
+
+                            ! Allocate for symm as well. This is not necessary
+                            ! but we need it for the reverse AD.
+
+                            ! Modified by HDN
+                            allocate (BCData(mm)%rface(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%rFaceALE(0:nALEsteps, iBeg:iEnd, jBeg:jEnd), &
+                                      stat=ierr)
+                            if (ierr /= 0) &
+                                 call terminate("allocMemBCData", &
+                                 "Memory allocation failure for &
+                                 &a symm")
+
+                            !=======================================================
+
+                        case (SupersonicInflow, DomainInterfaceAll)
+
+                            ! Supersonic inflow or a domain interface with
+                            ! all the data prescribed. Allocate the memory for
+                            ! the entire state vector to be prescribed.
+
+                            allocate (BCData(mm)%rho(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%velx(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%vely(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%velz(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%ps(iBeg:iEnd, jBeg:jEnd), &
+                                      stat=ierr)
+                            if (ierr /= 0) &
+                                 call terminate("allocMemBCData", &
+                                 "Memory allocation failure for &
+                                 &a supersonic inflow")
+
+                            ! Check if memory for the turbulent variables must
+                            ! be allocated. If so, do so.
+
+                            if (nt2 >= nt1) then
+                                allocate ( &
+                                    BCData(mm)%turbInlet(iBeg:iEnd, jBeg:jEnd, nt1:nt2), &
+                                    stat=ierr)
+                                if (ierr /= 0) &
+                                     call terminate("allocMemBCData", &
+                                     "Memory allocation failure for &
+                                     &turbInlet for a supersonic &
+                                     &inflow")
+                            end if
+
+                            !=======================================================
+
+                            ! Added by HDN
+                        case (SupersonicOutflow)
+                            ! No state is needed for this boco
+
+                            !=======================================================
+
+                        case (SubsonicInflow)
+
+                            ! Subsonic inflow. Allocate the memory for the
+                            ! variables needed. Note the there are two ways to
+                            ! specify boundary conditions for a subsonic inflow.
+
+                            allocate (BCData(mm)%flowXdirInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%flowYdirInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%flowZdirInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%ptInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%ttInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%htInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%rho(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%velx(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%vely(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%velz(iBeg:iEnd, jBeg:jEnd), &
+                                      stat=ierr)
+                            if (ierr /= 0) &
+                                 call terminate("allocMemBCData", &
+                                 "Memory allocation failure for &
+                                 &a subsonic inflow")
+
+                            ! Check if memory for the turbulent variables must
+                            ! be allocated. If so, do so.
+
+                            if (nt2 >= nt1) then
+                                allocate ( &
+                                    BCData(mm)%turbInlet(iBeg:iEnd, jBeg:jEnd, nt1:nt2), &
+                                    stat=ierr)
+                                if (ierr /= 0) &
+                                     call terminate("allocMemBCData", &
+                                     "Memory allocation failure for &
+                                     &turbInlet for a subsonic inflow")
+                            end if
+
+                            !=======================================================
+
+                        case (SubsonicOutflow, MassBleedOutflow, &
+                              DomainInterfaceP)
+
+                            ! Subsonic outflow, outflow mass bleed or domain
+                            ! interface with prescribed pressure. Allocate the
+                            ! memory for the static pressure.
+
+                            allocate (BCData(mm)%ps(iBeg:iEnd, jBeg:jEnd), &
+                                      stat=ierr)
+                            if (ierr /= 0) &
+                                 call terminate("allocMemBCData", &
+                                 "Memory allocation failure for &
+                                 &a subsonic outflow, outflow mass &
+                                 &bleed or domain interface with &
+                                 &prescribed pressure.")
+
+                            ! Initialize the pressure to avoid problems for
+                            ! the bleed flows.
+
+                            BCData(mm)%ps = zero
+
+                            !=======================================================
+
+                        case (DomainInterfaceRhoUVW)
+
+                            ! Domain interface with prescribed density and
+                            ! velocities, i.e. mass flow is prescribed. Allocate
+                            ! the memory for the variables needed.
+
+                            allocate (BCData(mm)%rho(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%velx(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%vely(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%velz(iBeg:iEnd, jBeg:jEnd), &
+                                      stat=ierr)
+                            if (ierr /= 0) &
+                                 call terminate("allocMemBCData", &
+                                 "Memory allocation failure for &
+                                 &a domain interface with a &
+                                 &prescribed mass flow")
+
+                            ! Check if memory for the turbulent variables must
+                            ! be allocated. If so, do so.
+
+                            if (nt2 >= nt1) then
+                                allocate ( &
+                                    BCData(mm)%turbInlet(iBeg:iEnd, jBeg:jEnd, nt1:nt2), &
+                                    stat=ierr)
+                                if (ierr /= 0) &
+                                     call terminate("allocMemBCData", &
+                                     "Memory allocation failure for &
+                                     &turbInlet for a domain interface &
+                                     &with a prescribed mass flow")
+                            end if
+
+                            !=======================================================
+
+                        case (DomainInterfaceTotal)
+
+                            ! Domain interface with prescribed total conditions.
+                            ! Allocate the memory for the variables needed.
+
+                            allocate (BCData(mm)%flowXdirInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%flowYdirInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%flowZdirInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%ptInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%ttInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      BCData(mm)%htInlet(iBeg:iEnd, jBeg:jEnd), &
+                                      stat=ierr)
+                            if (ierr /= 0) &
+                                 call terminate("allocMemBCData", &
+                                 "Memory allocation failure for &
+                                 &a domain interface with total &
+                                 &conditions")
+
+                            ! Check if memory for the turbulent variables must
+                            ! be allocated. If so, do so.
+
+                            if (nt2 >= nt1) then
+                                allocate ( &
+                                    BCData(mm)%turbInlet(iBeg:iEnd, jBeg:jEnd, nt1:nt2), &
+                                    stat=ierr)
+                                if (ierr /= 0) &
+                                     call terminate("allocMemBCData", &
+                                     "Memory allocation failure for &
+                                     &turbInlet for a domain interface &
+                                     &with a prescribed mass flow")
+                            end if
+
+                            !=======================================================
+
+                        case (domainInterfaceRho)
+
+                            ! Domain interface with prescribed density.
+                            ! Allocate the memory for the density.
+
+                            allocate (BCData(mm)%rho(iBeg:iEnd, jBeg:jEnd), &
+                                      stat=ierr)
+                            if (ierr /= 0) &
+                                 call terminate("allocMemBCData", &
+                                 "Memory allocation failure for &
+                                 &a domain interface")
+
+                        end select
+
+                    end do bocoLoop
+
+                end do domainsLoop
+            end do spectralLoop
+        end do levelLoop
+
+    end subroutine allocMemBCData
+
+    subroutine initBCData
+        !
+        !       initBCData allocates and initializes the arrays BCData for
+        !       all boundary subfaces on all grid levels for all spectral
+        !       solutions.
+        !
+        use constants
+        use blockPointers, only: flowDoms, BCData, nDom, nBocos, inBeg, inEnd, &
+                                 jnBeg, jnEnd, knBeg, knEnd, icBeg, icEnd, jcBeg, jcBeg, jcEnd, kcBeg, &
+                                 kcEnd, BCFaceID
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers, terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, j, sps
+        integer(kind=intType) :: nLevels, level
+
+        ! Determine the number of grid levels.
+
+        nLevels = ubound(flowDoms, 2)
+
+        ! Loop over the number of grid levels.
+
+        levelLoop: do level = 1, nLevels
+
+            ! Loop over the number of spectral solutions and number of
+            ! blocks stored on this processor.
+
+            spectralLoop: do sps = 1, nTimeIntervalsSpectral
+                domainsLoop: do i = 1, nDom
+
+                    ! Allocate the memory for the array of the boundary
+                    ! condition data.
+
+                    j = flowDoms(i, level, sps)%nBocos
+                    allocate (flowDoms(i, level, sps)%BCData(j), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("initBCData", &
+                                       "Memory allocation failure for BCData")
+
+                    ! Set the pointers to make it more readable.
+
+                    call setPointers(i, level, sps)
+
+                    ! Copy the range of the subfaces in BCData and nullify its
+                    ! pointers.
+
+                    bocoLoop: do j = 1, nBocos
+
+                        ! Determine the block face on which the subface is located
+                        ! and set the dimensions accordingly.
+
+                        select case (BCFaceID(j))
+
+                        case (iMin, iMax)
+                            BCData(j)%inBeg = jnBeg(j)
+                            BCData(j)%inEnd = jnEnd(j)
+                            BCData(j)%jnBeg = knBeg(j)
+                            BCData(j)%jnEnd = knEnd(j)
+
+                            BCData(j)%icbeg = jcbeg(j)
+                            BCData(j)%icend = jcend(j)
+                            BCData(j)%jcbeg = kcbeg(j)
+                            BCData(j)%jcend = kcend(j)
+
+                        case (jMin, jMax)
+                            BCData(j)%inBeg = inBeg(j)
+                            BCData(j)%inEnd = inEnd(j)
+                            BCData(j)%jnBeg = knBeg(j)
+                            BCData(j)%jnEnd = knEnd(j)
+
+                            BCData(j)%icbeg = icbeg(j)
+                            BCData(j)%icend = icend(j)
+                            BCData(j)%jcbeg = kcbeg(j)
+                            BCData(j)%jcend = kcend(j)
+
+                        case (kMin, kMax)
+                            BCData(j)%inBeg = inBeg(j)
+                            BCData(j)%inEnd = inEnd(j)
+                            BCData(j)%jnBeg = jnBeg(j)
+                            BCData(j)%jnEnd = jnEnd(j)
+
+                            BCData(j)%icbeg = icbeg(j)
+                            BCData(j)%icend = icend(j)
+                            BCData(j)%jcbeg = jcbeg(j)
+                            BCData(j)%jcend = jcend(j)
+
+                        end select
+
+                        ! Initialize the boundary condition treatment for
+                        ! subsonic inlet to noSubInlet.
+
+                        BCData(j)%subsonicInletTreatment = noSubInlet
+
+                        ! Nullify the pointers of BCData.
+                        ! Some compilers require this.
+
+                        nullify (BCData(j)%norm)
+                        nullify (BCData(j)%rface)
+                        nullify (BCData(j)%F)
+                        nullify (BCData(j)%Fv)
+                        nullify (BCData(j)%Fp)
+                        nullify (BCData(j)%T)
+                        nullify (BCData(j)%tv)
+                        nullify (BCData(j)%Tp)
+                        nullify (BCData(j)%area)
+                        nullify (BCData(j)%surfIndex)
+                        nullify (BCData(j)%uSlip)
+                        nullify (BCData(j)%TNS_Wall)
+                        nullify (BCData(j)%ksNS_Wall)
+                        nullify (BCData(j)%CpTarget)
+
+                        nullify (BCData(j)%normALE)
+                        nullify (BCData(j)%rfaceALE)
+                        nullify (BCData(j)%uSlipALE)
+                        nullify (BCData(j)%cellHeatFlux)
+                        nullify (BCData(j)%nodeHeatFlux)
+
+                        nullify (BCData(j)%ptInlet)
+                        nullify (BCData(j)%ttInlet)
+                        nullify (BCData(j)%htInlet)
+                        nullify (BCData(j)%flowXdirInlet)
+                        nullify (BCData(j)%flowYdirInlet)
+                        nullify (BCData(j)%flowZdirInlet)
+
+                        nullify (BCData(j)%turbInlet)
+
+                        nullify (BCData(j)%rho)
+                        nullify (BCData(j)%velx)
+                        nullify (BCData(j)%vely)
+                        nullify (BCData(j)%velz)
+                        nullify (BCData(j)%ps)
+                        bcData(j)%symNormSet = .False.
+                        bcData(j)%symNorm = zero
+                        nullify (BCData(j)%iblank)
+                        nullify (BCData(j)%delta)
+                        nullify (BCData(j)%deltaNode)
+
+                    end do bocoLoop
+                end do domainsLoop
+            end do spectralLoop
+        end do levelLoop
+
+    end subroutine initBCData
+
+    ! ------------------------------------------
+    !             Update routines
+    ! ------------------------------------------
+
+    subroutine setBCDataFineGrid(initializationPart)
+        !--------------------------------------------------------------
+        ! Manual Differentiation Warning: Modifying this routine requires
+        ! modifying the hand-written forward and reverse routines.
+        ! --------------------------------------------------------------
+        !
+        !       setBCDataFineGrid extracts the boundary condition data from
+        !       the cgnsGrid and stores it in useable form in the BCData
+        !       arrays of the currently finest grid, i.e. groundLevel.
+        !
+        use constants
+        use blockPointers, only: BCData, BCType, nBKGlobal, nBocos, nDom, cgnsSubFace
+        use communication, only: adflow_comm_world, myid
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: groundLevel
+        use utils, only: setPointers, terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: initializationPart
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, j, sps, iBeg, iEnd, jBeg, jEnd
+
+        logical :: allTurbMassBleedInflow, allTurbSubsonicInflow
+        logical :: allFlowSupersonicInflow, allTurbSupersonicInflow
+        real(kind=realType), dimension(:, :, :), allocatable :: bcVarArray
+
+        ! Initialize axAssumed and massflowPrescribed to .false.,
+        ! indicating that no assumption is made about the axial direction
+        ! and no subsonic inflow boundaries with prescribed mass flow
+        ! are present.
+
+        axAssumed = .false.
+        massflowPrescribed = .false.
+
+        ! Initialize all the prescribed turbulence as well as flow
+        ! variables for supersonic inlet to .true.
+
+        allTurbMassBleedInflow = .true.
+        allTurbSubsonicInflow = .true.
+        allTurbSupersonicInflow = .true.
+
+        allFlowSupersonicInflow = .true.
+
+        ! Loop over the number of spectral solutions and local blocks.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domainsLoop: do i = 1, nDom
+
+                ! Set the pointers to this block on groundLevel to make
+                ! the code readable.
+
+                call setPointers(i, groundLevel, sps)
+
+                ! Loop over the number of boundary condition subfaces.
+
+                bocoLoop: do j = 1, nBocos
+
+                    ! Store the cgns boundary subface number, the number of
+                    ! boundary condition data sets and the data sets a bit easier.
+
+                    cgnsBoco = cgnsSubface(j)
+                    nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
+                    dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
+
+                    ! Store the range of the boundary subface a bit easier.
+
+                    iBeg = BCData(j)%icBeg; iEnd = BCData(j)%icEnd
+                    jBeg = BCData(j)%jcBeg; jEnd = BCData(j)%jcEnd
+
+                    ! Allocate the bcVarArray to the maximum size it could
+                    ! possibly be *in the last dimension*.
+                    allocate (bcVarArray(iBeg:iEnd, jBeg:jEnd, nbcVarMax))
+
+                    ! Determine the boundary condition we are having here and
+                    ! call the appropriate routine.
+
+                    select case (BCType(j))
+
+                    case (NSWallAdiabatic)
+                        call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
+                        call extractFromDataSet(bcVarArray)
+                        call BCDataAdiabaticWall(j, bcVarArray, iBeg, iEnd, jBeg, jEnd)
+
+                    case (NSWallIsothermal)
+                        call setBCVarNamesIsothermalWall ! sets bcVarNames and nbcVar
+                        call extractFromDataSet(bcVarArray)
+                        call BCDataIsothermalWall(j, bcVarArray, iBeg, iEnd, jBeg, jEnd)
+
+                    case (SupersonicInflow)
+                        call setBCVarNamesSupersonicInflow
+                        call extractFromDataSet(bcVarArray)
+                        call BCDataSupersonicInflow(j, bcVarArray, iBeg, iEnd, jBeg, jEnd, &
+                                                    allFlowSupersonicInflow, allTurbSupersonicInflow)
+
+                    case (SubsonicInflow)
+                        call setBCVarNamesSubsonicInflow
+                        call extractFromDataSet(bcVarArray)
+                        call BCDataSubsonicInflow(j, bcVarArray, iBeg, iEnd, jBeg, jEnd, &
+                                                  allTurbSubsonicInflow)
+
+                    case (SubsonicOutflow)
+                        call setBCVarNamesSubsonicOutflow ! sets bcVarNames and nbcVar
+                        call extractFromDataSet(bcVarArray)
+                        call BCDataSubsonicOutflow(j, bcVarArray, iBeg, iEnd, jBeg, jEnd)
 
-             case (DomainInterfaceAll, DomainInterfaceRhoUVW, &
-                  DomainInterfaceP,   DomainInterfaceRho,    &
-                  DomainInterfaceTotal)
-                call terminate('setBCDataFineGrid', &
-                     'Domain interface BCs are not fully implemented')
-             end select
+                    case (DomainInterfaceAll, DomainInterfaceRhoUVW, &
+                          DomainInterfaceP, DomainInterfaceRho, &
+                          DomainInterfaceTotal)
+                        call terminate('setBCDataFineGrid', &
+                                       'Domain interface BCs are not fully implemented')
+                    end select
 
-             deallocate(bcVarArray)
+                    deallocate (bcVarArray)
 
-          enddo bocoLoop
-       enddo domainsLoop
-    enddo spectralLoop
+                end do bocoLoop
+            end do domainsLoop
+        end do spectralLoop
 
-    ! If this is the initialization part perform some checks
-    ! to see if certain assumptions were made.
+        ! If this is the initialization part perform some checks
+        ! to see if certain assumptions were made.
 #ifndef USE_TAPENADE
-    checkInit: if( initializationPart ) then
-
-       ! Check whether or not an assumption was made on the axial
-       ! direction. If so, processor 0 prints a warning.
-
-       i = 0
-       if( axAssumed ) i = 1
-       call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
-            ADflow_comm_world, ierr)
-
-       if(myID == 0 .and. j == 1) then
-
-          print "(a)", "#"
-          print "(a)", "#*==================== !!! Warning !!! &
-               &======================"
-          print "(a)", "# Radial boundary data given while no &
-               &rotation axis is present."
-          print "(a)", "# It is assumed that the X-axis is the axial &
-               &direction."
-          print "(a)", "#*=====================================&
-               &======================"
-          print "(a)", "#"
-
-       endif
-
-       ! Check whether or not subsonic inflow boundaries are present with
-       ! a prescribed mass flow. If so print a warning that the flow
-       ! problem should not be a choked one.
-
-       i = 0
-       if( massflowPrescribed ) i = 1
-       call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
-            ADflow_comm_world, ierr)
-
-       if(myID == 0 .and. j == 1) then
-
-          print "(a)", "#"
-          print "(a)", "#*==================== !!! Warning !!! &
-               &======================"
-          print "(a)", "# Subsonic inflow boundaries present with &
-               &prescribed mass flow."
-          print "(a)", "# This is only a well posed problem if the &
-               &flow is not choked."
-          print "(a)", "#*=====================================&
-               &======================"
-          print "(a)", "#"
-
-       endif
-
-       ! Check whether or not mass bleed inflow regions are present
-       ! for which the free stream turbulence is used.
-
-       i = 0
-       if(.not. allTurbMassBleedInflow) i = 1
-       call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
-            ADflow_comm_world, ierr)
-
-       if(myID == 0 .and. j == 1) then
-
-          print "(a)", "#"
-          print "(a)", "#*==================== !!! Warning !!! &
-               &======================"
-          print "(a)", "# Inflow bleed regions present for which the &
-               &turbulence"
-          print "(a)", "# quantities are not or insufficiently &
-               &prescribed."
-          print "(a)", "# Using free stream values instead."
-          print "(a)", "#*=====================================&
-               &======================"
-          print "(a)", "#"
-
-       endif
-
-       ! Check whether or not subsonic inflow regions are present
-       ! for which the free stream turbulence is used.
-
-       i = 0
-       if(.not. allTurbSubsonicInflow) i = 1
-       call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
-            ADflow_comm_world, ierr)
-
-       if(myID == 0 .and. j == 1) then
-
-          print "(a)", "#"
-          print "(a)", "#*==================== !!! Warning !!! &
-               &======================"
-          print "(a)", "# Subsonic inflow regions present for which &
-               &the turbulence"
-          print "(a)", "# quantities are not or insufficiently &
-               &prescribed."
-          print "(a)", "# Using free stream values instead."
-          print "(a)", "#*=====================================&
-               &======================"
-          print "(a)", "#"
-
-       endif
-
-       ! Check whether or not supersonic inflow regions are present
-       ! for which the free stream variables is used.
-
-       i = 0
-       if(.not. allFlowSupersonicInflow) i = 1
-       call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
-            ADflow_comm_world, ierr)
-
-       if(myID == 0 .and. j == 1) then
-
-          print "(a)", "#"
-          print "(a)", "#*==================== !!! Warning !!! &
-               &======================"
-          print "(a)", "# Supersonic inflow regions present for which &
-               &the flow variables"
-          print "(a)", "# are not or insufficiently prescribed."
-          print "(a)", "# Using free stream values instead."
-          print "(a)", "#*=====================================&
-               &======================"
-          print "(a)", "#"
-
-       endif
-
-       ! Check whether or not supersonic inflow regions are present
-       ! for which the free stream turbulence is used.
-
-       i = 0
-       if(.not. allTurbSupersonicInflow) i = 1
-       call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
-            ADflow_comm_world, ierr)
-
-       if(myID == 0 .and. j == 1) then
-
-          print "(a)", "#"
-          print "(a)", "#*==================== !!! Warning !!! &
-               &======================"
-          print "(a)", "# Supersonic inflow regions present for which &
-               &the turbulence"
-          print "(a)", "# quantities are not or insufficiently &
-               &prescribed."
-          print "(a)", "# Using free stream values instead."
-          print "(a)", "#*=====================================&
-               &======================"
-          print "(a)", "#"
-
-       endif
-
-    endif checkInit
+        checkInit: if (initializationPart) then
+
+            ! Check whether or not an assumption was made on the axial
+            ! direction. If so, processor 0 prints a warning.
+
+            i = 0
+            if (axAssumed) i = 1
+            call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
+                            ADflow_comm_world, ierr)
+
+            if (myID == 0 .and. j == 1) then
+
+                print "(a)", "#"
+                print "(a)", "#*==================== !!! Warning !!! &
+                     &======================"
+                print "(a)", "# Radial boundary data given while no &
+                     &rotation axis is present."
+                print "(a)", "# It is assumed that the X-axis is the axial &
+                     &direction."
+                print "(a)", "#*=====================================&
+                     &======================"
+                print "(a)", "#"
+
+            end if
+
+            ! Check whether or not subsonic inflow boundaries are present with
+            ! a prescribed mass flow. If so print a warning that the flow
+            ! problem should not be a choked one.
+
+            i = 0
+            if (massflowPrescribed) i = 1
+            call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
+                            ADflow_comm_world, ierr)
+
+            if (myID == 0 .and. j == 1) then
+
+                print "(a)", "#"
+                print "(a)", "#*==================== !!! Warning !!! &
+                     &======================"
+                print "(a)", "# Subsonic inflow boundaries present with &
+                     &prescribed mass flow."
+                print "(a)", "# This is only a well posed problem if the &
+                     &flow is not choked."
+                print "(a)", "#*=====================================&
+                     &======================"
+                print "(a)", "#"
+
+            end if
+
+            ! Check whether or not mass bleed inflow regions are present
+            ! for which the free stream turbulence is used.
+
+            i = 0
+            if (.not. allTurbMassBleedInflow) i = 1
+            call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
+                            ADflow_comm_world, ierr)
+
+            if (myID == 0 .and. j == 1) then
+
+                print "(a)", "#"
+                print "(a)", "#*==================== !!! Warning !!! &
+                     &======================"
+                print "(a)", "# Inflow bleed regions present for which the &
+                     &turbulence"
+                print "(a)", "# quantities are not or insufficiently &
+                     &prescribed."
+                print "(a)", "# Using free stream values instead."
+                print "(a)", "#*=====================================&
+                     &======================"
+                print "(a)", "#"
+
+            end if
+
+            ! Check whether or not subsonic inflow regions are present
+            ! for which the free stream turbulence is used.
+
+            i = 0
+            if (.not. allTurbSubsonicInflow) i = 1
+            call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
+                            ADflow_comm_world, ierr)
+
+            if (myID == 0 .and. j == 1) then
+
+                print "(a)", "#"
+                print "(a)", "#*==================== !!! Warning !!! &
+                     &======================"
+                print "(a)", "# Subsonic inflow regions present for which &
+                     &the turbulence"
+                print "(a)", "# quantities are not or insufficiently &
+                     &prescribed."
+                print "(a)", "# Using free stream values instead."
+                print "(a)", "#*=====================================&
+                     &======================"
+                print "(a)", "#"
+
+            end if
+
+            ! Check whether or not supersonic inflow regions are present
+            ! for which the free stream variables is used.
+
+            i = 0
+            if (.not. allFlowSupersonicInflow) i = 1
+            call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
+                            ADflow_comm_world, ierr)
+
+            if (myID == 0 .and. j == 1) then
+
+                print "(a)", "#"
+                print "(a)", "#*==================== !!! Warning !!! &
+                     &======================"
+                print "(a)", "# Supersonic inflow regions present for which &
+                     &the flow variables"
+                print "(a)", "# are not or insufficiently prescribed."
+                print "(a)", "# Using free stream values instead."
+                print "(a)", "#*=====================================&
+                     &======================"
+                print "(a)", "#"
+
+            end if
+
+            ! Check whether or not supersonic inflow regions are present
+            ! for which the free stream turbulence is used.
+
+            i = 0
+            if (.not. allTurbSupersonicInflow) i = 1
+            call mpi_reduce(i, j, 1, adflow_integer, mpi_max, 0, &
+                            ADflow_comm_world, ierr)
+
+            if (myID == 0 .and. j == 1) then
+
+                print "(a)", "#"
+                print "(a)", "#*==================== !!! Warning !!! &
+                     &======================"
+                print "(a)", "# Supersonic inflow regions present for which &
+                     &the turbulence"
+                print "(a)", "# quantities are not or insufficiently &
+                     &prescribed."
+                print "(a)", "# Using free stream values instead."
+                print "(a)", "#*=====================================&
+                     &======================"
+                print "(a)", "#"
+
+            end if
+
+        end if checkInit
 #endif
-  end subroutine setBCDataFineGrid
+    end subroutine setBCDataFineGrid
 #ifndef USE_COMPLEX
-  subroutine setBCDataFineGrid_d(initializationPart)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-    !
-    !       setBCDataFineGrid extracts the boundary condition data from
-    !       the cgnsGrid and stores it in useable form in the BCData
-    !       arrays of the currently finest grid, i.e. groundLevel.
-    !
-    use constants
-    use blockPointers, only : BCData, BCType, nBKGlobal, nBocos, nDom, cgnsSubFace
-    use communication, only : adflow_comm_world, myid
-    use inputTimeSpectral, only :nTimeIntervalsSpectral
-    use iteration, only : groundLevel
-    use utils, only : setPointers_d, terminate
-    use bcdata_d, onlY : BCDataIsothermalWall_d, BCDataSupersonicInflow_d, &
-         BCDataSubsonicInflow_d, BCDataSubsonicOutflow_d
-    use diffsizes, only :  ISIZE1OFDrfbcdata
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: initializationPart
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    logical :: allTurbSubsonicInflow
-    logical :: allFlowSupersonicInflow, allTurbSupersonicInflow
-    integer(kind=intType) :: i, j, sps, iBeg, iEnd, jBeg, jEnd
-    real(kind=realType), dimension(:,:,:), allocatable :: bcVarArray, bcVarArrayd
-
-    ! Loop over the number of spectral solutions and local blocks.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domainsLoop: do i=1,nDom
-
-          ! Set the pointers to this block on groundLevel to make
-          ! the code readable.
-
-          call setPointers_d(i,groundLevel,sps)
-
-          ! Loop over the number of boundary condition subfaces.
-          iSize1OfDrfbcdata = nBocos
-
-          bocoLoop: do j=1,nBocos
-
-             ! Store the cgns boundary subface number, the number of
-             ! boundary condition data sets and the data sets a bit easier.
-
-             cgnsBoco = cgnsSubface(j)
-             nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
-             dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
-             dataSetd => cgnsDomsd(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
-
-             ! Store the range of the boundary subface a bit easier.
-
-             iBeg = BCData(j)%icBeg; iEnd = BCData(j)%icEnd
-             jBeg = BCData(j)%jcBeg; jEnd = BCData(j)%jcEnd
-
-             ! Allocate the bcVarArray to the maximum size it could
-             ! possibly be *in the last dimension*.
-             allocate(bcVarArray(iBeg:iEnd,jBeg:jEnd,nbcVarMax), &
-                  bcVarArrayd(iBeg:iEnd,jBeg:jEnd,nbcVarMax))
-             ! Determine the boundary condition we are having here and
-             ! call the appropriate routine.
-             select case (BCType(j))
-
-           ! This would be needed if wall roughness is differentiated
-           ! case (NSWallAdiabatic)
-           !    call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
-           !    call extractFromDataSet_d(bcVarArray, bcVarArrayd)
-           !    call BCDataAdiabaticWall_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
-
-             case (NSWallIsothermal)
-                call setBCVarNamesIsothermalWall ! sets bcVarNames and nbcVar
-                call extractFromDataSet_d(bcVarArray, bcVarArrayd)
-                call BCDataIsothermalWall_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
-
-             case (SupersonicInflow)
-                call setBCVarNamesSupersonicInflow
-                call extractFromDataSet_d(bcVarArray, bcVarArrayd)
-                call BCDataSupersonicInflow_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd, &
-                     allFlowSupersonicInflow, allTurbSupersonicInflow)
-
-             case (SubsonicInflow)
-                call setBCVarNamesSubsonicInflow
-                call extractFromDataSet_d(bcVarArray, bcVarArrayd)
-                call BCDataSubsonicInflow_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd, &
-                     allTurbSubsonicInflow)
-
-             case (SubsonicOutflow)
-                call setBCVarNamesSubsonicOutflow ! sets bcVarNames and nbcVar
-                call extractFromDataSet_d(bcVarArray, bcVarArrayd)
-                call BCDataSubsonicOutflow_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
-             end select
-
-             deallocate(bcVarArray, bcVarArrayd)
-          enddo bocoLoop
-       enddo domainsLoop
-    enddo spectralLoop
-  end subroutine setBCDataFineGrid_d
-
-  subroutine setBCDataFineGrid_b(initializationPart)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-    !
-    !       setBCDataFineGrid extracts the boundary condition data from
-    !       the cgnsGrid and stores it in useable form in the BCData
-    !       arrays of the currently finest grid, i.e. groundLevel.
-    !
-    use constants
-    use blockPointers, only : BCData, BCType, nBKGlobal, nBocos, nDom, cgnsSubFace
-    use communication, only : adflow_comm_world, myid
-    use inputTimeSpectral, only :nTimeIntervalsSpectral
-    use iteration, only : groundLevel
-    use utils, only : setPointers_b, terminate
-    use bcdata_b , only : BCDataIsothermalWall_b, BCDataSupersonicInflow_b, &
-         BCDataSubsonicInflow_b, BCDataSubsonicOutflow_b
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: initializationPart
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    logical :: allTurbSubsonicInflow
-    logical :: allFlowSupersonicInflow, allTurbSupersonicInflow
-    integer(kind=intType) :: i, j, sps, iBeg, iEnd, jBeg, jEnd
-    real(kind=realType), dimension(:, :, :),allocatable :: bcVarArray, bcVarArrayd
-
-    ! Loop over the number of spectral solutions and local blocks.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domainsLoop: do i=1,nDom
-
-          ! Set the pointers to this block on groundLevel to make
-          ! the code readable.
-
-          call setPointers_b(i,groundLevel,sps)
-
-          ! Loop over the number of boundary condition subfaces.
-
-          bocoLoop: do j=1,nBocos
-
-             ! Store the cgns boundary subface number, the number of
-             ! boundary condition data sets and the data sets a bit easier.
-
-             cgnsBoco = cgnsSubface(j)
-             nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
-             dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
-             dataSetd => cgnsDomsd(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
-
-             ! Store the range of the boundary subface a bit easier.
-
-             iBeg = BCData(j)%icBeg; iEnd = BCData(j)%icEnd
-             jBeg = BCData(j)%jcBeg; jEnd = BCData(j)%jcEnd
-
-             ! Allocate the bcVarArray to the maximum size it could
-             ! possibly be *in the last dimension*.
-             allocate(bcVarArray(iBeg:iEnd,jBeg:jEnd, nbcVarMax), &
-                  bcVarArrayd(iBeg:iEnd,jBeg:jEnd, nbcVarMax))
-
-             ! Determine the boundary condition we are having here and
-             ! call the appropriate routine.
-             select case (BCType(j))
-
-           ! This would be needed if wall roughness is differentiated
-           ! case (NSWallAdiabatic)
-           !    call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
-           !    call extractFromDataSet(bcVarArray)
-           !    call BCDataAdiabaticWall_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
-           !    call extractFromDataSet_b(bcVarArray, bcVarArrayd)
-
-             case (NSWallIsothermal)
-                call setBCVarNamesIsothermalWall ! sets bcVarNames and nbcVar
-                call extractFromDataSet(bcVarArray)
-                call BCDataIsothermalWall_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
-                call extractFromDataSet_b(bcVarArray, bcVarArrayd)
-
-             case (SupersonicInflow)
-                call setBCVarNamesSupersonicInflow
-                call extractFromDataSet(bcVarArray)
-                call BCDataSupersonicInflow_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd, &
-                     allFlowSupersonicInflow, allTurbSupersonicInflow)
-                call extractFromDataSet_b(bcVarArray, bcVarArrayd)
-
-             case (SubsonicInflow)
-                call setBCVarNamesSubsonicInflow
-                call extractFromDataSet(bcVarArray)
-                call BCDataSubsonicInflow_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd, &
-                     allTurbSubsonicInflow)
-                call extractFromDataSet_b(bcVarArray, bcVarArrayd)
-
-             case (SubsonicOutflow)
-                call setBCVarNamesSubsonicOutflow
-                call extractFromDataSet(bcVarArray)
-                call BCDataSubsonicOutflow_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
-                call extractFromDataSet_b(bcVarArray, bcVarArrayd)
-             end select
-
-             deallocate(bcVarArray, bcVarArrayd)
-          enddo bocoLoop
-       enddo domainsLoop
-    enddo spectralLoop
-  end subroutine setBCDataFineGrid_b
+    subroutine setBCDataFineGrid_d(initializationPart)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+        !
+        !       setBCDataFineGrid extracts the boundary condition data from
+        !       the cgnsGrid and stores it in useable form in the BCData
+        !       arrays of the currently finest grid, i.e. groundLevel.
+        !
+        use constants
+        use blockPointers, only: BCData, BCType, nBKGlobal, nBocos, nDom, cgnsSubFace
+        use communication, only: adflow_comm_world, myid
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: groundLevel
+        use utils, only: setPointers_d, terminate
+        use bcdata_d, onlY: BCDataIsothermalWall_d, BCDataSupersonicInflow_d, &
+                            BCDataSubsonicInflow_d, BCDataSubsonicOutflow_d
+        use diffsizes, only: ISIZE1OFDrfbcdata
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: initializationPart
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        logical :: allTurbSubsonicInflow
+        logical :: allFlowSupersonicInflow, allTurbSupersonicInflow
+        integer(kind=intType) :: i, j, sps, iBeg, iEnd, jBeg, jEnd
+        real(kind=realType), dimension(:, :, :), allocatable :: bcVarArray, bcVarArrayd
+
+        ! Loop over the number of spectral solutions and local blocks.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domainsLoop: do i = 1, nDom
+
+                ! Set the pointers to this block on groundLevel to make
+                ! the code readable.
+
+                call setPointers_d(i, groundLevel, sps)
+
+                ! Loop over the number of boundary condition subfaces.
+                iSize1OfDrfbcdata = nBocos
+
+                bocoLoop: do j = 1, nBocos
+
+                    ! Store the cgns boundary subface number, the number of
+                    ! boundary condition data sets and the data sets a bit easier.
+
+                    cgnsBoco = cgnsSubface(j)
+                    nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
+                    dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
+                    dataSetd => cgnsDomsd(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
+
+                    ! Store the range of the boundary subface a bit easier.
+
+                    iBeg = BCData(j)%icBeg; iEnd = BCData(j)%icEnd
+                    jBeg = BCData(j)%jcBeg; jEnd = BCData(j)%jcEnd
+
+                    ! Allocate the bcVarArray to the maximum size it could
+                    ! possibly be *in the last dimension*.
+                    allocate (bcVarArray(iBeg:iEnd, jBeg:jEnd, nbcVarMax), &
+                              bcVarArrayd(iBeg:iEnd, jBeg:jEnd, nbcVarMax))
+                    ! Determine the boundary condition we are having here and
+                    ! call the appropriate routine.
+                    select case (BCType(j))
+
+                        ! This would be needed if wall roughness is differentiated
+                        ! case (NSWallAdiabatic)
+                        !    call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
+                        !    call extractFromDataSet_d(bcVarArray, bcVarArrayd)
+                        !    call BCDataAdiabaticWall_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
+
+                    case (NSWallIsothermal)
+                        call setBCVarNamesIsothermalWall ! sets bcVarNames and nbcVar
+                        call extractFromDataSet_d(bcVarArray, bcVarArrayd)
+                        call BCDataIsothermalWall_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
+
+                    case (SupersonicInflow)
+                        call setBCVarNamesSupersonicInflow
+                        call extractFromDataSet_d(bcVarArray, bcVarArrayd)
+                        call BCDataSupersonicInflow_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd, &
+                                                      allFlowSupersonicInflow, allTurbSupersonicInflow)
+
+                    case (SubsonicInflow)
+                        call setBCVarNamesSubsonicInflow
+                        call extractFromDataSet_d(bcVarArray, bcVarArrayd)
+                        call BCDataSubsonicInflow_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd, &
+                                                    allTurbSubsonicInflow)
+
+                    case (SubsonicOutflow)
+                        call setBCVarNamesSubsonicOutflow ! sets bcVarNames and nbcVar
+                        call extractFromDataSet_d(bcVarArray, bcVarArrayd)
+                        call BCDataSubsonicOutflow_d(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
+                    end select
+
+                    deallocate (bcVarArray, bcVarArrayd)
+                end do bocoLoop
+            end do domainsLoop
+        end do spectralLoop
+    end subroutine setBCDataFineGrid_d
+
+    subroutine setBCDataFineGrid_b(initializationPart)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+        !
+        !       setBCDataFineGrid extracts the boundary condition data from
+        !       the cgnsGrid and stores it in useable form in the BCData
+        !       arrays of the currently finest grid, i.e. groundLevel.
+        !
+        use constants
+        use blockPointers, only: BCData, BCType, nBKGlobal, nBocos, nDom, cgnsSubFace
+        use communication, only: adflow_comm_world, myid
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: groundLevel
+        use utils, only: setPointers_b, terminate
+        use bcdata_b, only: BCDataIsothermalWall_b, BCDataSupersonicInflow_b, &
+                            BCDataSubsonicInflow_b, BCDataSubsonicOutflow_b
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: initializationPart
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        logical :: allTurbSubsonicInflow
+        logical :: allFlowSupersonicInflow, allTurbSupersonicInflow
+        integer(kind=intType) :: i, j, sps, iBeg, iEnd, jBeg, jEnd
+        real(kind=realType), dimension(:, :, :), allocatable :: bcVarArray, bcVarArrayd
+
+        ! Loop over the number of spectral solutions and local blocks.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domainsLoop: do i = 1, nDom
+
+                ! Set the pointers to this block on groundLevel to make
+                ! the code readable.
+
+                call setPointers_b(i, groundLevel, sps)
+
+                ! Loop over the number of boundary condition subfaces.
+
+                bocoLoop: do j = 1, nBocos
+
+                    ! Store the cgns boundary subface number, the number of
+                    ! boundary condition data sets and the data sets a bit easier.
+
+                    cgnsBoco = cgnsSubface(j)
+                    nDataSet = cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%nDataSet
+                    dataSet => cgnsDoms(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
+                    dataSetd => cgnsDomsd(nbkGlobal)%bocoInfo(cgnsBoco)%dataSet
+
+                    ! Store the range of the boundary subface a bit easier.
+
+                    iBeg = BCData(j)%icBeg; iEnd = BCData(j)%icEnd
+                    jBeg = BCData(j)%jcBeg; jEnd = BCData(j)%jcEnd
+
+                    ! Allocate the bcVarArray to the maximum size it could
+                    ! possibly be *in the last dimension*.
+                    allocate (bcVarArray(iBeg:iEnd, jBeg:jEnd, nbcVarMax), &
+                              bcVarArrayd(iBeg:iEnd, jBeg:jEnd, nbcVarMax))
+
+                    ! Determine the boundary condition we are having here and
+                    ! call the appropriate routine.
+                    select case (BCType(j))
+
+                        ! This would be needed if wall roughness is differentiated
+                        ! case (NSWallAdiabatic)
+                        !    call setBCVarNamesAdiabaticWall ! sets bcVarNames and nbcVar
+                        !    call extractFromDataSet(bcVarArray)
+                        !    call BCDataAdiabaticWall_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
+                        !    call extractFromDataSet_b(bcVarArray, bcVarArrayd)
+
+                    case (NSWallIsothermal)
+                        call setBCVarNamesIsothermalWall ! sets bcVarNames and nbcVar
+                        call extractFromDataSet(bcVarArray)
+                        call BCDataIsothermalWall_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
+                        call extractFromDataSet_b(bcVarArray, bcVarArrayd)
+
+                    case (SupersonicInflow)
+                        call setBCVarNamesSupersonicInflow
+                        call extractFromDataSet(bcVarArray)
+                        call BCDataSupersonicInflow_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd, &
+                                                      allFlowSupersonicInflow, allTurbSupersonicInflow)
+                        call extractFromDataSet_b(bcVarArray, bcVarArrayd)
+
+                    case (SubsonicInflow)
+                        call setBCVarNamesSubsonicInflow
+                        call extractFromDataSet(bcVarArray)
+                        call BCDataSubsonicInflow_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd, &
+                                                    allTurbSubsonicInflow)
+                        call extractFromDataSet_b(bcVarArray, bcVarArrayd)
+
+                    case (SubsonicOutflow)
+                        call setBCVarNamesSubsonicOutflow
+                        call extractFromDataSet(bcVarArray)
+                        call BCDataSubsonicOutflow_b(j, bcVarArray, bcVarArrayd, iBeg, iEnd, jBeg, jEnd)
+                        call extractFromDataSet_b(bcVarArray, bcVarArrayd)
+                    end select
+
+                    deallocate (bcVarArray, bcVarArrayd)
+                end do bocoLoop
+            end do domainsLoop
+        end do spectralLoop
+    end subroutine setBCDataFineGrid_b
 #endif
-  subroutine setBCDataCoarseGrid
-    !
-    !       setBCDataCoarseGrid determines the boundary condition info
-    !       on the coarse grid from the known info on the fine grid. It
-    !       will be stored in the BCData arrays of flowDoms.
-    !
-    use constants
-    use blockPointers, only : BCFaceID, BCData, nDom, flowDoms, il, jl, kl, &
-         mgIFine, mgJFine, mgKFine, nBocos, BCType
-    use flowVarRefState, only : nt1, nt2, hRef, Tref
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use iteration, only : groundLevel
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, l, sps
-    integer(kind=intType) :: iBeg, jBeg, iEnd, jEnd, iiMax, jjMax
-    integer(kind=intType) :: nLevels, level, levm1
-
-    integer(kind=intType), dimension(:,:), pointer :: iFine, jFine
+    subroutine setBCDataCoarseGrid
+        !
+        !       setBCDataCoarseGrid determines the boundary condition info
+        !       on the coarse grid from the known info on the fine grid. It
+        !       will be stored in the BCData arrays of flowDoms.
+        !
+        use constants
+        use blockPointers, only: BCFaceID, BCData, nDom, flowDoms, il, jl, kl, &
+                                 mgIFine, mgJFine, mgKFine, nBocos, BCType
+        use flowVarRefState, only: nt1, nt2, hRef, Tref
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: groundLevel
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, l, sps
+        integer(kind=intType) :: iBeg, jBeg, iEnd, jEnd, iiMax, jjMax
+        integer(kind=intType) :: nLevels, level, levm1
 
-    real(kind=realType) :: var, hDim, TDim
+        integer(kind=intType), dimension(:, :), pointer :: iFine, jFine
 
-    real(kind=realType), dimension(3) :: dir
+        real(kind=realType) :: var, hDim, TDim
 
-    ! Determine the number of grid levels.
-    nLevels = ubound(flowDoms,2)
+        real(kind=realType), dimension(3) :: dir
 
-    ! Loop over the coarser grid levels. It is assumed that the
-    ! bc data of groundLevel is set correctly.
+        ! Determine the number of grid levels.
+        nLevels = ubound(flowDoms, 2)
 
-    coarseLevelLoop: do level=(groundLevel+1),nLevels
+        ! Loop over the coarser grid levels. It is assumed that the
+        ! bc data of groundLevel is set correctly.
 
-       ! Store the fine grid level a bit easier.
+        coarseLevelLoop: do level = (groundLevel + 1), nLevels
 
-       levm1 = level - 1
+            ! Store the fine grid level a bit easier.
 
-       ! Loop over the number of spectral solutions and local blocks.
+            levm1 = level - 1
 
-       spectralLoop: do sps=1,nTimeIntervalsSpectral
-          domainsLoop: do i=1,nDom
+            ! Loop over the number of spectral solutions and local blocks.
 
-             ! Set the pointers to the coarse block.
+            spectralLoop: do sps = 1, nTimeIntervalsSpectral
+                domainsLoop: do i = 1, nDom
 
-             call setPointers(i, level, sps)
+                    ! Set the pointers to the coarse block.
 
-             ! Loop over the boundary subfaces and interpolate the
-             ! prescribed boundary data for this grid level.
+                    call setPointers(i, level, sps)
 
-             bocoLoop: do j=1,nBocos
+                    ! Loop over the boundary subfaces and interpolate the
+                    ! prescribed boundary data for this grid level.
 
-                ! Determine the block face on which the subface is
-                ! located and set some multigrid variables accordingly.
+                    bocoLoop: do j = 1, nBocos
 
-                select case (BCFaceID(j))
+                        ! Determine the block face on which the subface is
+                        ! located and set some multigrid variables accordingly.
 
-                case (iMin,iMax)
-                   iiMax = jl; jjMax = kl
-                   iFine => mgJFine; jFine => mgKFine
+                        select case (BCFaceID(j))
 
-                case (jMin,jMax)
-                   iiMax = il; jjMax = kl
-                   iFine => mgIFine; jFine => mgKFine
+                        case (iMin, iMax)
+                            iiMax = jl; jjMax = kl
+                            iFine => mgJFine; jFine => mgKFine
 
-                case (kMin,kMax)
-                   iiMax = il; jjMax = jl
-                   iFine => mgIFine; jFine => mgJFine
+                        case (jMin, jMax)
+                            iiMax = il; jjMax = kl
+                            iFine => mgIFine; jFine => mgKFine
 
-                end select
+                        case (kMin, kMax)
+                            iiMax = il; jjMax = jl
+                            iFine => mgIFine; jFine => mgJFine
 
-                ! Abbreviate the size of the subface a bit easier.
+                        end select
 
-                iBeg = BCData(j)%icBeg; iEnd = BCData(j)%icEnd
-                jBeg = BCData(j)%jcBeg; jEnd = BCData(j)%jcEnd
+                        ! Abbreviate the size of the subface a bit easier.
 
-                ! Copy the subsonic boundary conditions treatment.
+                        iBeg = BCData(j)%icBeg; iEnd = BCData(j)%icEnd
+                        jBeg = BCData(j)%jcBeg; jEnd = BCData(j)%jcEnd
 
-                BCData(j)%subsonicInletTreatment = &
-                     flowDoms(i,levm1,sps)%BCData(j)%subsonicInletTreatment
+                        ! Copy the subsonic boundary conditions treatment.
 
-                ! Interpolate the data for the possible prescribed boundary
-                ! data.
-                call interpolateBcData(BCData(j)%TNS_Wall, &
-                     flowDoms(i,levm1,sps)%BCData(j)%TNS_Wall)
+                        BCData(j)%subsonicInletTreatment = &
+                            flowDoms(i, levm1, sps)%BCData(j)%subsonicInletTreatment
 
-                call interpolateBcData(BCData(j)%ptInlet, &
-                     flowDoms(i,levm1,sps)%BCData(j)%ptInlet)
-                call interpolateBcData(BCData(j)%ttInlet, &
-                     flowDoms(i,levm1,sps)%BCData(j)%ttInlet)
+                        ! Interpolate the data for the possible prescribed boundary
+                        ! data.
+                        call interpolateBcData(BCData(j)%TNS_Wall, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%TNS_Wall)
 
-                call interpolateBcData(BCData(j)%htInlet, &
-                     flowDoms(i,levm1,sps)%BCData(j)%htInlet)
+                        call interpolateBcData(BCData(j)%ptInlet, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%ptInlet)
+                        call interpolateBcData(BCData(j)%ttInlet, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%ttInlet)
 
-                call interpolateBcData(BCData(j)%flowXdirInlet, &
-                     flowDoms(i,levm1,sps)%BCData(j)%flowXdirInlet)
-                call interpolateBcData(BCData(j)%flowYdirInlet, &
-                     flowDoms(i,levm1,sps)%BCData(j)%flowYdirInlet)
-                call interpolateBcData(BCData(j)%flowZdirInlet, &
-                     flowDoms(i,levm1,sps)%BCData(j)%flowZdirInlet)
+                        call interpolateBcData(BCData(j)%htInlet, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%htInlet)
 
-                call interpolateBCVecData(BCData(j)%turbInlet, &
-                     flowDoms(i,levm1,sps)%BCData(j)%turbInlet, &
-                     nt1, nt2)
+                        call interpolateBcData(BCData(j)%flowXdirInlet, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%flowXdirInlet)
+                        call interpolateBcData(BCData(j)%flowYdirInlet, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%flowYdirInlet)
+                        call interpolateBcData(BCData(j)%flowZdirInlet, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%flowZdirInlet)
 
-                call interpolateBcData(BCData(j)%rho,  &
-                     flowDoms(i,levm1,sps)%BCData(j)%rho)
-                call interpolateBcData(BCData(j)%velx, &
-                     flowDoms(i,levm1,sps)%BCData(j)%velx)
-                call interpolateBcData(BCData(j)%vely, &
-                     flowDoms(i,levm1,sps)%BCData(j)%vely)
-                call interpolateBcData(BCData(j)%velz, &
-                     flowDoms(i,levm1,sps)%BCData(j)%velz)
-                call interpolateBcData(BCData(j)%ps,   &
-                     flowDoms(i,levm1,sps)%BCData(j)%ps)
+                        call interpolateBCVecData(BCData(j)%turbInlet, &
+                                                  flowDoms(i, levm1, sps)%BCData(j)%turbInlet, &
+                                                  nt1, nt2)
 
-                ! Some additional variables should be computed/corrected
-                ! for some boundary conditions. Determine the type of
-                ! boundary condition.
+                        call interpolateBcData(BCData(j)%rho, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%rho)
+                        call interpolateBcData(BCData(j)%velx, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%velx)
+                        call interpolateBcData(BCData(j)%vely, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%vely)
+                        call interpolateBcData(BCData(j)%velz, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%velz)
+                        call interpolateBcData(BCData(j)%ps, &
+                                               flowDoms(i, levm1, sps)%BCData(j)%ps)
 
-                if((BCType(j) == SubsonicInflow .and.                         &
-                     BCData(j)%subsonicInletTreatment == totalConditions) .or. &
-                     BCType(j) == DomainInterfaceTotal) then
+                        ! Some additional variables should be computed/corrected
+                        ! for some boundary conditions. Determine the type of
+                        ! boundary condition.
 
-                   ! Total conditions are specified for subsonic inflow
-                   ! or domain interfaces.
+                        if ((BCType(j) == SubsonicInflow .and. &
+                             BCData(j)%subsonicInletTreatment == totalConditions) .or. &
+                            BCType(j) == DomainInterfaceTotal) then
 
-                   ! Compute the total enthalpy and make
-                   ! sure that the unit vector is a unit vector.
+                            ! Total conditions are specified for subsonic inflow
+                            ! or domain interfaces.
 
-                   ! Loop over the faces of the subface.
+                            ! Compute the total enthalpy and make
+                            ! sure that the unit vector is a unit vector.
 
-                   do l=jBeg,jEnd
-                      do k=iBeg,iEnd
+                            ! Loop over the faces of the subface.
 
-                         ! Compute the total enthalpy.
+                            do l = jBeg, jEnd
+                                do k = iBeg, iEnd
 
-                         TDim = BCData(j)%ttInlet(k,l)*Tref
-                         call computeHtot(TDim, Hdim)
-                         BCData(j)%htInlet(k,l) = Hdim/Href
+                                    ! Compute the total enthalpy.
 
-                         ! Flow direction.
+                                    TDim = BCData(j)%ttInlet(k, l)*Tref
+                                    call computeHtot(TDim, Hdim)
+                                    BCData(j)%htInlet(k, l) = Hdim/Href
 
-                         dir(1) = BCData(j)%flowXdirInlet(k,l)
-                         dir(2) = BCData(j)%flowYdirInlet(k,l)
-                         dir(3) = BCData(j)%flowZdirInlet(k,l)
+                                    ! Flow direction.
 
-                         var = one/max(eps,sqrt(dir(1)**2 + dir(2)**2 &
-                              +                  dir(3)**2))
+                                    dir(1) = BCData(j)%flowXdirInlet(k, l)
+                                    dir(2) = BCData(j)%flowYdirInlet(k, l)
+                                    dir(3) = BCData(j)%flowZdirInlet(k, l)
 
-                         BCData(j)%flowXdirInlet(k,l) = var*dir(1)
-                         BCData(j)%flowYdirInlet(k,l) = var*dir(2)
-                         BCData(j)%flowZdirInlet(k,l) = var*dir(3)
+                                    var = one/max(eps, sqrt(dir(1)**2 + dir(2)**2 &
+                                                            + dir(3)**2))
 
-                      enddo
-                   enddo
-                endif
-             enddo bocoLoop
-          enddo domainsLoop
-       enddo spectralLoop
-    enddo coarseLevelLoop
-
-  contains
+                                    BCData(j)%flowXdirInlet(k, l) = var*dir(1)
+                                    BCData(j)%flowYdirInlet(k, l) = var*dir(2)
+                                    BCData(j)%flowZdirInlet(k, l) = var*dir(3)
 
-    subroutine interpolateBcData(varCoarse, varFine)
-      !
-      !         InterpolateBcData interpolates the given data array from
-      !         the fine to the coarse grid. Of course only if the fine
-      !         array is associated with some data.
-      !
-      use constants
-      implicit none
-      !
-      !        Subroutine arguments.
-      !
-      real(kind=realType), dimension(:,:), pointer :: varCoarse
-      real(kind=realType), dimension(:,:), pointer :: varFine
-      !
-      !        Local variables.
-      !
-      integer(kind=intType) :: i, j, if1, if2, jf1, jf2
-
-      ! Check if varFine is associated to data. If not return.
-
-      if(.not. associated(varFine)) return
-
-      ! Loop over the faces of the given subface.
-      ! First the j-direction.
-
-      do j=jBeg,jEnd
-
-         ! Determine the two children in this direction. Take care of
-         ! the halo's, as this info is only available for owned cells.
-
-         if(j < 2) then
-            jf1 = 1; jf2 = 1
-         else if(j > jjMax) then
-            jf1 = jFine(jjMax,2) +1; jf2 = jf1
-         else
-            jf1 = jFine(j,1); jf2 = jFine(j,2)
-         endif
-
-         ! Loop in the i-direction.
-
-         do i=iBeg,iEnd
-
-            ! Determine the two children in this direction.
-            ! Same story as in j-direction.
-
-            if(i < 2) then
-               if1 = 1; if2 = 1
-            else if(i > iiMax) then
-               if1 = iFine(iiMax,2) +1; if2 = if1
-            else
-               if1 = iFine(i,1); if2 = iFine(i,2)
-            endif
-
-            ! Compute the coarse grid data as the average of the
-            ! 4 fine grid values.
-
-            varCoarse(i,j) = fourth*(varFine(if1,jf1) &
-                 +         varFine(if2,jf1) &
-                 +         varFine(if1,jf2) &
-                 +         varFine(if2,jf2))
-         enddo
-      enddo
-
-    end subroutine interpolateBcData
-
-    subroutine interpolateBCVecData(varCoarse, varFine, &
-         nstart, nend)
-      !
-      !         interpolateBCVecData interpolates the given data array
-      !         from the fine to the coarse grid. Of course only if the fine
-      !         array is associated with some data.
-      !
-      implicit none
-      !
-      !        Subroutine arguments.
-      !
-      integer(kind=intType), intent(in) :: nstart, nend
-
-      real(kind=realType), dimension(:,:,:), pointer :: varCoarse
-      real(kind=realType), dimension(:,:,:), pointer :: varFine
-      !
-      !        Local variables.
-      !
-      integer(kind=intType) :: nn, i, j, if1, if2, jf1, jf2
-      ! Check if varFine is associated to data. If not return.
-
-      if(.not. associated(varFine)) return
-
-      ! Loop over the faces of the given subface.
-      ! First the j-direction.
-
-      do j=jBeg,jEnd
-
-         ! Determine the two children in this direction. Take care of
-         ! the halo's, as this info is only available for owned cells.
-
-         if(j < 2) then
-            jf1 = 1; jf2 = 1
-         else if(j > jjMax) then
-            jf1 = jFine(jjMax,2) +1; jf2 = jf1
-         else
-            jf1 = jFine(j,1); jf2 = jFine(j,2)
-         endif
-
-         ! Loop in the i-direction.
-
-         do i=iBeg,iEnd
-
-            ! Determine the two children in this direction.
-            ! Same story as in j-direction.
-
-            if(i < 2) then
-               if1 = 1; if2 = 1
-            else if(i > iiMax) then
-               if1 = iFine(iiMax,2) +1; if2 = if1
-            else
-               if1 = iFine(i,1); if2 = iFine(i,2)
-            endif
-
-            ! Compute the coarse grid data as the average of the
-            ! 4 fine grid values.
-
-            do nn=nstart,nend
-               varCoarse(i,j,nn) = fourth*(varFine(if1,jf1,nn) &
-                    +         varFine(if2,jf1,nn) &
-                    +         varFine(if1,jf2,nn) &
-                    +         varFine(if2,jf2,nn))
-            enddo
-         enddo
-      enddo
-
-    end subroutine interpolateBCVecData
-  end subroutine setBCDataCoarseGrid
-
-  subroutine errorCheckbcDataNamesIn(setSubroutineName, bcDataNamesIn)
-    use constants
-    use utils, only: terminate, char2str
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    character(len=*), intent(in) :: setSubroutineName
-    character, dimension(:, :), intent(in) :: bcDatanamesIn
-    !
-    !      Local variables.
-    !
-    logical :: varAllowed
-    integer :: i,j
-    character(maxCGNSNameLen) :: varName
-
-    ! TODO: Justin add back in error checking
-
-  end subroutine errorCheckbcDataNamesIn
+                                end do
+                            end do
+                        end if
+                    end do bocoLoop
+                end do domainsLoop
+            end do spectralLoop
+        end do coarseLevelLoop
+
+    contains
+
+        subroutine interpolateBcData(varCoarse, varFine)
+            !
+            !         InterpolateBcData interpolates the given data array from
+            !         the fine to the coarse grid. Of course only if the fine
+            !         array is associated with some data.
+            !
+            use constants
+            implicit none
+            !
+            !        Subroutine arguments.
+            !
+            real(kind=realType), dimension(:, :), pointer :: varCoarse
+            real(kind=realType), dimension(:, :), pointer :: varFine
+            !
+            !        Local variables.
+            !
+            integer(kind=intType) :: i, j, if1, if2, jf1, jf2
+
+            ! Check if varFine is associated to data. If not return.
+
+            if (.not. associated(varFine)) return
+
+            ! Loop over the faces of the given subface.
+            ! First the j-direction.
+
+            do j = jBeg, jEnd
+
+                ! Determine the two children in this direction. Take care of
+                ! the halo's, as this info is only available for owned cells.
+
+                if (j < 2) then
+                    jf1 = 1; jf2 = 1
+                else if (j > jjMax) then
+                    jf1 = jFine(jjMax, 2) + 1; jf2 = jf1
+                else
+                    jf1 = jFine(j, 1); jf2 = jFine(j, 2)
+                end if
+
+                ! Loop in the i-direction.
+
+                do i = iBeg, iEnd
+
+                    ! Determine the two children in this direction.
+                    ! Same story as in j-direction.
+
+                    if (i < 2) then
+                        if1 = 1; if2 = 1
+                    else if (i > iiMax) then
+                        if1 = iFine(iiMax, 2) + 1; if2 = if1
+                    else
+                        if1 = iFine(i, 1); if2 = iFine(i, 2)
+                    end if
+
+                    ! Compute the coarse grid data as the average of the
+                    ! 4 fine grid values.
+
+                    varCoarse(i, j) = fourth*(varFine(if1, jf1) &
+                                              + varFine(if2, jf1) &
+                                              + varFine(if1, jf2) &
+                                              + varFine(if2, jf2))
+                end do
+            end do
+
+        end subroutine interpolateBcData
+
+        subroutine interpolateBCVecData(varCoarse, varFine, &
+                                        nstart, nend)
+            !
+            !         interpolateBCVecData interpolates the given data array
+            !         from the fine to the coarse grid. Of course only if the fine
+            !         array is associated with some data.
+            !
+            implicit none
+            !
+            !        Subroutine arguments.
+            !
+            integer(kind=intType), intent(in) :: nstart, nend
+
+            real(kind=realType), dimension(:, :, :), pointer :: varCoarse
+            real(kind=realType), dimension(:, :, :), pointer :: varFine
+            !
+            !        Local variables.
+            !
+            integer(kind=intType) :: nn, i, j, if1, if2, jf1, jf2
+            ! Check if varFine is associated to data. If not return.
+
+            if (.not. associated(varFine)) return
+
+            ! Loop over the faces of the given subface.
+            ! First the j-direction.
+
+            do j = jBeg, jEnd
+
+                ! Determine the two children in this direction. Take care of
+                ! the halo's, as this info is only available for owned cells.
+
+                if (j < 2) then
+                    jf1 = 1; jf2 = 1
+                else if (j > jjMax) then
+                    jf1 = jFine(jjMax, 2) + 1; jf2 = jf1
+                else
+                    jf1 = jFine(j, 1); jf2 = jFine(j, 2)
+                end if
+
+                ! Loop in the i-direction.
+
+                do i = iBeg, iEnd
+
+                    ! Determine the two children in this direction.
+                    ! Same story as in j-direction.
+
+                    if (i < 2) then
+                        if1 = 1; if2 = 1
+                    else if (i > iiMax) then
+                        if1 = iFine(iiMax, 2) + 1; if2 = if1
+                    else
+                        if1 = iFine(i, 1); if2 = iFine(i, 2)
+                    end if
+
+                    ! Compute the coarse grid data as the average of the
+                    ! 4 fine grid values.
+
+                    do nn = nstart, nend
+                        varCoarse(i, j, nn) = fourth*(varFine(if1, jf1, nn) &
+                                                      + varFine(if2, jf1, nn) &
+                                                      + varFine(if1, jf2, nn) &
+                                                      + varFine(if2, jf2, nn))
+                    end do
+                end do
+            end do
+
+        end subroutine interpolateBCVecData
+    end subroutine setBCDataCoarseGrid
+
+    subroutine errorCheckbcDataNamesIn(setSubroutineName, bcDataNamesIn)
+        use constants
+        use utils, only: terminate, char2str
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        character(len=*), intent(in) :: setSubroutineName
+        character, dimension(:, :), intent(in) :: bcDatanamesIn
+        !
+        !      Local variables.
+        !
+        logical :: varAllowed
+        integer :: i, j
+        character(maxCGNSNameLen) :: varName
+
+        ! TODO: Justin add back in error checking
+
+    end subroutine errorCheckbcDataNamesIn
 #endif
 end module BCData
diff --git a/src/inputParam/inputParamRoutines.F90 b/src/inputParam/inputParamRoutines.F90
index 92be9e65d..b8dad6c7f 100644
--- a/src/inputParam/inputParamRoutines.F90
+++ b/src/inputParam/inputParamRoutines.F90
@@ -1,4184 +1,4176 @@
 module inputParamRoutines
 
-  use constants, only : maxStringLen, intType
-
-  ! Set the parameter none, which is used as a check to see
-  ! whether or not some key parameters were specified.
-
-  integer(kind=intType), parameter :: none = 0
-
-  ! monDturb:             Whether or not the turbulent residuals
-  !                        must be monitored. This must be done via
-  !                        this construction, because during the
-  !                        reading of the monitoring variables the
-  !                        turbulence model might not be known.
-  ! monitorSpecified:     Whether or not the monitoring variables
-  !                        were specified.
-  ! surfaceOutSpecified: Whether or not the surface output
-  !                        variables were specified.
-  ! volumeOutSpecified:  Whether or not the volume output
-  !                        variables were specified.
-  ! isoOutSpecified:      Wheter or not the isosurface output
-  !                        variables were specified
-  logical :: monDturb
-  logical :: monitorSpecified
-  logical :: surfaceOutSpecified
-  logical :: volumeOutSpecified
-  logical :: isoOutSpecified
-
-  ! liftDirSpecified: Whether or not the lift direction was
-  !                     specified.
-
-  logical :: liftDirSpecified
+    use constants, only: maxStringLen, intType
+
+    ! Set the parameter none, which is used as a check to see
+    ! whether or not some key parameters were specified.
+
+    integer(kind=intType), parameter :: none = 0
+
+    ! monDturb:             Whether or not the turbulent residuals
+    !                        must be monitored. This must be done via
+    !                        this construction, because during the
+    !                        reading of the monitoring variables the
+    !                        turbulence model might not be known.
+    ! monitorSpecified:     Whether or not the monitoring variables
+    !                        were specified.
+    ! surfaceOutSpecified: Whether or not the surface output
+    !                        variables were specified.
+    ! volumeOutSpecified:  Whether or not the volume output
+    !                        variables were specified.
+    ! isoOutSpecified:      Wheter or not the isosurface output
+    !                        variables were specified
+    logical :: monDturb
+    logical :: monitorSpecified
+    logical :: surfaceOutSpecified
+    logical :: volumeOutSpecified
+    logical :: isoOutSpecified
+
+    ! liftDirSpecified: Whether or not the lift direction was
+    !                     specified.
+
+    logical :: liftDirSpecified
 
 contains
 
-  subroutine checkMonitor
-    !
-    !       checkMonitor checks and possibly corrects the variables
-    !       to be monitored during the convergence.  This depends on the
-    !       governing equations to be solved. After the correction the
-    !       sequence of the monitoring variable names is changed, such
-    !       that the output is independent of the specified sequence.
-    !       Furthermore memory is allocated for the arrays used to compute
-    !       the monitoring variables and it is checked whether or not the
-    !       maximum Mach number of total enthalpy difference is to be
-    !       monitored.
-    !
-    use constants
-    use cgnsNames
-    use monitor, only : monNames, monGlob, monLoc, nMonMax, nMonSum, &
-         nMon, monMachOrHMax, monRef
-    use inputPhysics, only : equations, flowType, turbModel, equationMode
-    use inputUnsteady, only : timeIntegrationScheme
-    use sorting, only :qsortIntegers, bsearchIntegers
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i, ii, nn
-    integer(kind=intType), dimension(:), allocatable :: sortNumber
-    integer(kind=intType), dimension(:), allocatable :: tmpNumber
-
-    character(len=maxCGNSNameLen), dimension(:), allocatable :: &
-         tmpNames
-    logical :: RKExplicit
-
-    ! Find out if an explicit RK scheme is used in unsteady mode and
-    ! set the logical RKExplicit accordingly. For explicit RK schemes
-    ! no residuals are monitored.
-
-    RKExplicit = .false.
-    if(equationMode          == unsteady .and. &
-         timeIntegrationScheme == explicitRK) RKExplicit = .true.
-
-    ! If the turbulent residuals must be monitored add them to the
-    ! list of monitoring names. Enough memory should have been
-    ! allocated for this.
-
-    if(monDturb .and. equations == RANSEquations .and. &
-         (.not. RKExplicit)) then
-
-       select case (turbModel)
-
-          ! One equation models of the spalart-allmaras family.
-
-       case (spalartAllmaras, spalartAllmarasEdwards)
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          monNames(nMon) = cgnsL2ResNu
-
-          ! Two equation models of the k-w family.
-
-       case (komegaWilcox, komegaModified, menterSST)
-          nMon = nMon + 2; nMonSum = nMonSum + 2
-          monNames(nMon-1) = cgnsL2ResK
-          monNames(nMon)   = cgnsL2ResOmega
-
-          ! Two equation k-tau model.
-
-       case (ktau)
-          nMon = nMon + 2; nMonSum = nMonSum + 2
-          monNames(nMon-1) = cgnsL2ResK
-          monNames(nMon)   = cgnsL2ResTau
-
-          ! V2f model.
-
-       case (v2f)
-          nMon = nMon + 4; nMonSum = nMonSum + 4
-          monNames(nMon-3) = cgnsL2ResK
-          monNames(nMon-2) = cgnsL2ResEpsilon
-          monNames(nMon-1) = cgnsL2ResV2
-          monNames(nMon)   = cgnsL2ResF
-
-       end select
-    endif
-
-    ! Allocate the memory for sortNumber tmpNumber and tmpNames.
-
-    allocate(sortNumber(nMon), tmpNumber(nMon), tmpNames(nMon), &
-         stat=ierr)
-    if(ierr /= 0)                    &
-         call terminate("checkMonitor", &
-         "Memory allocation failure for sortNumber, etc.")
-
-    ! Loop over the monitoring variables, copy the name into tmpName
-    ! and set a number to determine its place in the sequence. If the
-    ! variable cannot be monitored for the governing equations, the
-    ! priority is set to a high number, such that it will be at the
-    ! end of the sorted numbers. At the same time the number of
-    ! variables to be monitored, nMonSum, nMonMax and nMon, is
-    ! corrected.
-
-    nn = nMon
-    do i=1,nn
-
-       tmpNames(i) = monNames(i)
-
-       ! Determine the place in the sequence for this string.
-
-       select case (monNames(i))
-       case (cgnsL2ResRho)
-          sortNumber(i) = 1
-          if( RKExplicit ) then
-             sortNumber(i) = 10001
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsL2ResMomx)
-          sortNumber(i) = 2
-          if( RKExplicit ) then
-             sortNumber(i) = 10002
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsL2ResMomy)
-          sortNumber(i) = 3
-          if( RKExplicit ) then
-             sortNumber(i) = 10003
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsL2ResMomz)
-          sortNumber(i) = 4
-          if( RKExplicit ) then
-             sortNumber(i) = 10004
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsL2ResRhoE)
-          sortNumber(i) = 5
-          if( RKExplicit ) then
-             sortNumber(i) = 10005
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsL2ResNu)
-          sortNumber(i) = 6
-          if(equations /= RANSEquations) then
-             sortNumber(i) = 10001
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsL2ResK)
-          sortNumber(i) = 7
-          if(equations /= RANSEquations) then
-             sortNumber(i) = 10002
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsL2ResOmega)
-          sortNumber(i) = 8
-          if(equations /= RANSEquations) then
-             sortNumber(i) = 10003
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsL2ResTau)
-          sortNumber(i) = 9
-          if(equations /= RANSEquations) then
-             sortNumber(i) = 10004
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsL2ResEpsilon)
-          sortNumber(i) = 10
-          if(equations /= RANSEquations) then
-             sortNumber(i) = 10005
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsL2ResV2)
-          sortNumber(i) = 11
-          if(equations /= RANSEquations) then
-             sortNumber(i) = 10006
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsL2ResF)
-          sortNumber(i) = 12
-          if(equations /= RANSEquations) then
-             sortNumber(i) = 10007
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsCl)
-          sortNumber(i) = 101
-          if(flowType == internalFlow) then
-             sortNumber(i) = 11001
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsClp)
-          sortNumber(i) = 102
-          if(flowType == internalFlow) then
-             sortNumber(i) = 11002
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsClv)
-          sortNumber(i) = 103
-          if(equations == EulerEquations .or. &
-               flowType == internalFlow) then
-             sortNumber(i) = 11003
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsCd)
-          sortNumber(i) = 104
-          if(flowType == internalFlow) then
-             sortNumber(i) = 11004
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsCdp)
-          sortNumber(i) = 105
-          if(flowType == internalFlow) then
-             sortNumber(i) = 11005
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsCdv)
-          sortNumber(i) = 106
-          if(equations == EulerEquations .or. &
-               flowType == internalFlow) then
-             sortNumber(i) = 11006
-             nMonSum       = nMonSum - 1
-          endif
-
-       case (cgnsCfx)
-          sortNumber(i) = 107
-
-       case (cgnsCfy)
-          sortNumber(i) = 108
-
-       case (cgnsCfz)
-          sortNumber(i) = 109
-
-       case (cgnsCmx)
-          sortNumber(i) = 110
-
-       case (cgnsCmy)
-          sortNumber(i) = 111
-
-       case (cgnsCmz)
-          sortNumber(i) = 112
-
-       case('totalR')
-          sortNumber(i) = 113
-
-       case(cgnsSepSensor)
-          sortNumber(i) = 114
-
-       case (cgnsCavitation)
-          sortNumber(i) = 115
-
-       case(cgnsAxisMoment)
-          sortNumber(i) = 116
-
-       case (cgnsHdiffMax)
-          sortNumber(i) = 201
-
-       case (cgnsMachMax)
-          sortNumber(i) = 202
-
-       case (cgnsYplusMax)
-          sortNumber(i) = 203
-          if(equations /= RANSEquations) then
-             sortNumber(i) = 12003
-             nMonMax       = nMonMax - 1
-          endif
-
-       case (cgnsEddyMax)
-          sortNumber(i) = 204
-          if(equations /= RANSEquations) then
-             sortNumber(i) = 12004
-             nMonMax       = nMonMax - 1
-          endif
-
-       case default
-          call terminate("checkMonitor", "This should not happen")
-       end select
-
-    enddo
-
-    ! Set the new value of nMon, because this might have changed
-    ! due to the corrections.
-
-    nMon = nMonSum + nMonMax
-
-    ! Copy sortNumber in tmpNumber and sort it in increasing order.
-    ! Note that here nn must be used and not nMon.
-
-    do i=1,nn
-       tmpNumber(i) = sortNumber(i)
-    enddo
-
-    call qsortIntegers(sortNumber, nn)
-
-    ! Loop over the the number of monitoring variables and store the
-    ! new sequence in monNames.
-
-    do i=1,nn
-       ii = bsearchIntegers(tmpNumber(i), sortNumber)
-       monNames(ii) = tmpNames(i)
-    enddo
-
-    ! Release the memory of sortNumber, tmpNumber and tmpNames.
-
-    deallocate(sortNumber, tmpNumber, tmpNames, stat=ierr)
-    if(ierr /= 0)                    &
-         call terminate("checkMonitor", &
-         "Deallocation error for sortNumber, etc.")
-
-    ! Allocate the memory for the monitoring variables.
-
-    allocate(monLoc(nMon), monGlob(nMon), monRef(nMon), stat=ierr)
-    if(ierr /= 0)                    &
-         call terminate("checkMonitor", &
-         "Memory allocation for monitoring variables")
-
-    ! Check if the maximum Mach number or the maximum total enthalpy
-    ! difference must be monitored.
-
-    monMachOrHMax = .false.
-    do i=(nMonSum+1),nMon
-       if(monNames(i) == cgnsHdiffMax .or. &
-            monNames(i) == cgnsMachMax) monMachOrHMax = .true.
-    enddo
-
-  end subroutine checkMonitor
-  subroutine checkOutput
-    !
-    !       checkOutput checks and possibly corrects the and output
-    !       variables. This depends on the set of governing equations to
-    !       be solved.
-    !
-    use constants
-    use extraOutput
-    use inputPhysics, only : equations, equationMode, wallDistanceNeeded
-    use inputUnsteady, only : timeIntegrationScheme
-    use flowVarRefState, only : kPresent, eddyModel
-    implicit none
-
-    ! Determine the governing equations to be solved and set the
-    ! variables accordingly.
-
-    select case (equations)
-    case (EulerEquations)
-       surfWriteCf    = .false.
-       surfWriteCh    = .false.
-       surfWriteYplus = .false.
-       surfWriteCfx   = .false.
-       surfWriteCfy   = .false.
-       surfWriteCfz   = .false.
-
-       volWriteMachTurb     = .false.
-       volWriteEddyVis      = .false.
-       volWriteRatioEddyVis = .false.
-       volWriteDist         = .false.
-       volWriteResTurb      = .false.
-
-    case (NSEquations)
-       surfWriteYplus = .false.
-
-       volWriteMachTurb     = .false.
-       volWriteEddyVis      = .false.
-       volWriteRatioEddyVis = .false.
-       volWriteDist         = .false.
-       volWriteResTurb      = .false.
-
-    case (RANSEquations)
-
-       ! Check if it is possible to write a turbulent Mach
-       ! number and eddy viscosity; this depends on the turbulence
-       ! model used.
-
-       if(.not. kPresent) volWriteMachTurb = .false.
-       if(.not. eddyModel) then
-          volWriteEddyVis      = .false.
-          volWriteRatioEddyVis = .false.
-       endif
-
-       ! If a wall distance free turbulence model is used,
-       ! set volWriteDist to .false.
-
-       if(.not. wallDistanceNeeded) volWriteDist = .false.
-
-    end select
-
-    if(equationMode          == unsteady .and. &
-         timeIntegrationScheme == explicitRK) then
-       volWriteResRho  = .false.
-       volWriteResMom  = .false.
-       volWriteResRhoE = .false.
-       volWriteResTurb = .false.
-    endif
-
-  end subroutine checkOutput
-  subroutine defaultIsoOut
-    !
-    !       defaultIsoOut sets the default set of additional
-    !       variables to be written to the solution file; the primitive
-    !       variables are always written. This additional set depends on
-    !       the governing equations to be solved.
-    !
-    use constants
-    use extraOutput
-    use inputPhysics, only : equations
-    implicit none
-
-    ! First set the variables, which are independent from the
-    ! governing equations to be solved.
-    isoWriteRho      = .false.
-    isoWriteVx       = .false.
-    isoWriteVy       = .false.
-    isoWriteVz       = .false.
-    isoWriteP        = .false.
-    isoWriteMx       = .false.
-    isoWriteMy       = .false.
-    isoWriteMz       = .false.
-    isoWriteRhoe     = .false.
-    isoWriteTemp     = .false.
-    isoWriteCp       = .false.
-    isoWriteMach     = .false.
-    isoWriteMachTurb = .false.
-    isoWriteDist     = .false.
-    isoWriteVort     = .false.
-    isoWriteVortx    = .false.
-    isoWriteVorty    = .false.
-    isoWriteVortz    = .false.
-    isoWritePtotloss = .false.
-    isoWriteResRho   = .false.
-    isoWriteResMom   = .false.
-    isoWriteResRhoe  = .false.
-    isoWriteShock    = .false.
-    isoWriteFilteredShock = .false.
-    ! Set the values which depend on the equations to be solved.
-
-    select case (equations)
-    case (EulerEquations)
-       isoWriteEddyVis      = .false.
-       isoWriteRatioEddyVis = .false.
-       isoWriteResTurb      = .false.
-
-    case (NSEquations)
-       isoWriteEddyVis      = .false.
-       isoWriteRatioEddyVis = .false.
-       isoWriteResTurb      = .false.
-
-    case (RANSEquations)
-       isoWriteEddyVis      = .false.
-       isoWriteRatioEddyVis = .false.
-       isoWriteResTurb      = .false.
-    end select
-
-  end subroutine defaultIsoOut
-  subroutine defaultMonitor
-    !
-    !       defaultMonitor sets the default set of variables to be
-    !       monitored during the convergence. This set depends on the
-    !       governing equations to be solved.
-    !
-    use constants
-    use cgnsNames
-    use inputPhysics, only : equations, flowType
-    use monitor, only : nMOnSum, nMonMax, nMon, monNames, showCPU
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    ! CPU time is written to stdout.
-
-    showCPU = .true.
-
-    ! Determine the governing equations to be solved.
-
-    select case (equations)
-    case (EulerEquations)
-
-       ! Set the number of summation and maximum monitor variables
-       ! and allocate the memory for the monitoring names.
-       ! A distinction is made between internal and external flows,
-       ! because cl and cd do not make a lot of sense for the former.
-
-       if(flowType == internalFlow) then
-
-          ! Internal flow; only the density residual is monitored.
-
-          nMonSum = 1; nMonMax = 0; nMon = 1
-          allocate(monNames(nMon), stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("defaultMonitor", &
-               "Memory allocation failure for monNames")
-
-          ! Set the names for the variables to be monitored.
-
-          monNames(1) = cgnsL2resRho
-
-       else
-
-          ! External; also lift and drag is monitored.
-
-          nMonSum = 3; nMonMax = 0; nMon = 3
-          allocate(monNames(nMon), stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("defaultMonitor", &
-               "Memory allocation failure for monNames")
-
-          ! Set the names for the variables to be monitored.
-
-          monNames(1) = cgnsL2resRho
-          monNames(2) = cgnsCl
-          monNames(3) = cgnsCd
-
-       endif
-
-    case (NSEquations)
-
-       ! Set the number of summation and maximum monitor variables
-       ! and allocate the memory for the monitoring names.
-       ! A distinction is made between internal and external flows,
-       ! because cl and cd do not make a lot of sense for the former.
+    subroutine checkMonitor
+        !
+        !       checkMonitor checks and possibly corrects the variables
+        !       to be monitored during the convergence.  This depends on the
+        !       governing equations to be solved. After the correction the
+        !       sequence of the monitoring variable names is changed, such
+        !       that the output is independent of the specified sequence.
+        !       Furthermore memory is allocated for the arrays used to compute
+        !       the monitoring variables and it is checked whether or not the
+        !       maximum Mach number of total enthalpy difference is to be
+        !       monitored.
+        !
+        use constants
+        use cgnsNames
+        use monitor, only: monNames, monGlob, monLoc, nMonMax, nMonSum, &
+                           nMon, monMachOrHMax, monRef
+        use inputPhysics, only: equations, flowType, turbModel, equationMode
+        use inputUnsteady, only: timeIntegrationScheme
+        use sorting, only: qsortIntegers, bsearchIntegers
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, ii, nn
+        integer(kind=intType), dimension(:), allocatable :: sortNumber
+        integer(kind=intType), dimension(:), allocatable :: tmpNumber
+
+        character(len=maxCGNSNameLen), dimension(:), allocatable :: &
+            tmpNames
+        logical :: RKExplicit
+
+        ! Find out if an explicit RK scheme is used in unsteady mode and
+        ! set the logical RKExplicit accordingly. For explicit RK schemes
+        ! no residuals are monitored.
+
+        RKExplicit = .false.
+        if (equationMode == unsteady .and. &
+            timeIntegrationScheme == explicitRK) RKExplicit = .true.
+
+        ! If the turbulent residuals must be monitored add them to the
+        ! list of monitoring names. Enough memory should have been
+        ! allocated for this.
+
+        if (monDturb .and. equations == RANSEquations .and. &
+            (.not. RKExplicit)) then
+
+            select case (turbModel)
+
+                ! One equation models of the spalart-allmaras family.
+
+            case (spalartAllmaras, spalartAllmarasEdwards)
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                monNames(nMon) = cgnsL2ResNu
+
+                ! Two equation models of the k-w family.
+
+            case (komegaWilcox, komegaModified, menterSST)
+                nMon = nMon + 2; nMonSum = nMonSum + 2
+                monNames(nMon - 1) = cgnsL2ResK
+                monNames(nMon) = cgnsL2ResOmega
+
+                ! Two equation k-tau model.
+
+            case (ktau)
+                nMon = nMon + 2; nMonSum = nMonSum + 2
+                monNames(nMon - 1) = cgnsL2ResK
+                monNames(nMon) = cgnsL2ResTau
+
+                ! V2f model.
+
+            case (v2f)
+                nMon = nMon + 4; nMonSum = nMonSum + 4
+                monNames(nMon - 3) = cgnsL2ResK
+                monNames(nMon - 2) = cgnsL2ResEpsilon
+                monNames(nMon - 1) = cgnsL2ResV2
+                monNames(nMon) = cgnsL2ResF
+
+            end select
+        end if
+
+        ! Allocate the memory for sortNumber tmpNumber and tmpNames.
+
+        allocate (sortNumber(nMon), tmpNumber(nMon), tmpNames(nMon), &
+                  stat=ierr)
+        if (ierr /= 0) &
+            call terminate("checkMonitor", &
+                           "Memory allocation failure for sortNumber, etc.")
+
+        ! Loop over the monitoring variables, copy the name into tmpName
+        ! and set a number to determine its place in the sequence. If the
+        ! variable cannot be monitored for the governing equations, the
+        ! priority is set to a high number, such that it will be at the
+        ! end of the sorted numbers. At the same time the number of
+        ! variables to be monitored, nMonSum, nMonMax and nMon, is
+        ! corrected.
+
+        nn = nMon
+        do i = 1, nn
+
+            tmpNames(i) = monNames(i)
+
+            ! Determine the place in the sequence for this string.
+
+            select case (monNames(i))
+            case (cgnsL2ResRho)
+                sortNumber(i) = 1
+                if (RKExplicit) then
+                    sortNumber(i) = 10001
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsL2ResMomx)
+                sortNumber(i) = 2
+                if (RKExplicit) then
+                    sortNumber(i) = 10002
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsL2ResMomy)
+                sortNumber(i) = 3
+                if (RKExplicit) then
+                    sortNumber(i) = 10003
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsL2ResMomz)
+                sortNumber(i) = 4
+                if (RKExplicit) then
+                    sortNumber(i) = 10004
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsL2ResRhoE)
+                sortNumber(i) = 5
+                if (RKExplicit) then
+                    sortNumber(i) = 10005
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsL2ResNu)
+                sortNumber(i) = 6
+                if (equations /= RANSEquations) then
+                    sortNumber(i) = 10001
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsL2ResK)
+                sortNumber(i) = 7
+                if (equations /= RANSEquations) then
+                    sortNumber(i) = 10002
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsL2ResOmega)
+                sortNumber(i) = 8
+                if (equations /= RANSEquations) then
+                    sortNumber(i) = 10003
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsL2ResTau)
+                sortNumber(i) = 9
+                if (equations /= RANSEquations) then
+                    sortNumber(i) = 10004
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsL2ResEpsilon)
+                sortNumber(i) = 10
+                if (equations /= RANSEquations) then
+                    sortNumber(i) = 10005
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsL2ResV2)
+                sortNumber(i) = 11
+                if (equations /= RANSEquations) then
+                    sortNumber(i) = 10006
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsL2ResF)
+                sortNumber(i) = 12
+                if (equations /= RANSEquations) then
+                    sortNumber(i) = 10007
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsCl)
+                sortNumber(i) = 101
+                if (flowType == internalFlow) then
+                    sortNumber(i) = 11001
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsClp)
+                sortNumber(i) = 102
+                if (flowType == internalFlow) then
+                    sortNumber(i) = 11002
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsClv)
+                sortNumber(i) = 103
+                if (equations == EulerEquations .or. &
+                    flowType == internalFlow) then
+                    sortNumber(i) = 11003
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsCd)
+                sortNumber(i) = 104
+                if (flowType == internalFlow) then
+                    sortNumber(i) = 11004
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsCdp)
+                sortNumber(i) = 105
+                if (flowType == internalFlow) then
+                    sortNumber(i) = 11005
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsCdv)
+                sortNumber(i) = 106
+                if (equations == EulerEquations .or. &
+                    flowType == internalFlow) then
+                    sortNumber(i) = 11006
+                    nMonSum = nMonSum - 1
+                end if
+
+            case (cgnsCfx)
+                sortNumber(i) = 107
+
+            case (cgnsCfy)
+                sortNumber(i) = 108
+
+            case (cgnsCfz)
+                sortNumber(i) = 109
+
+            case (cgnsCmx)
+                sortNumber(i) = 110
+
+            case (cgnsCmy)
+                sortNumber(i) = 111
+
+            case (cgnsCmz)
+                sortNumber(i) = 112
+
+            case ('totalR')
+                sortNumber(i) = 113
+
+            case (cgnsSepSensor)
+                sortNumber(i) = 114
+
+            case (cgnsCavitation)
+                sortNumber(i) = 115
+
+            case (cgnsAxisMoment)
+                sortNumber(i) = 116
+
+            case (cgnsHdiffMax)
+                sortNumber(i) = 201
+
+            case (cgnsMachMax)
+                sortNumber(i) = 202
+
+            case (cgnsYplusMax)
+                sortNumber(i) = 203
+                if (equations /= RANSEquations) then
+                    sortNumber(i) = 12003
+                    nMonMax = nMonMax - 1
+                end if
+
+            case (cgnsEddyMax)
+                sortNumber(i) = 204
+                if (equations /= RANSEquations) then
+                    sortNumber(i) = 12004
+                    nMonMax = nMonMax - 1
+                end if
+
+            case default
+                call terminate("checkMonitor", "This should not happen")
+            end select
+
+        end do
+
+        ! Set the new value of nMon, because this might have changed
+        ! due to the corrections.
+
+        nMon = nMonSum + nMonMax
+
+        ! Copy sortNumber in tmpNumber and sort it in increasing order.
+        ! Note that here nn must be used and not nMon.
+
+        do i = 1, nn
+            tmpNumber(i) = sortNumber(i)
+        end do
+
+        call qsortIntegers(sortNumber, nn)
+
+        ! Loop over the the number of monitoring variables and store the
+        ! new sequence in monNames.
+
+        do i = 1, nn
+            ii = bsearchIntegers(tmpNumber(i), sortNumber)
+            monNames(ii) = tmpNames(i)
+        end do
+
+        ! Release the memory of sortNumber, tmpNumber and tmpNames.
+
+        deallocate (sortNumber, tmpNumber, tmpNames, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("checkMonitor", &
+                           "Deallocation error for sortNumber, etc.")
+
+        ! Allocate the memory for the monitoring variables.
+
+        allocate (monLoc(nMon), monGlob(nMon), monRef(nMon), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("checkMonitor", &
+                           "Memory allocation for monitoring variables")
+
+        ! Check if the maximum Mach number or the maximum total enthalpy
+        ! difference must be monitored.
+
+        monMachOrHMax = .false.
+        do i = (nMonSum + 1), nMon
+            if (monNames(i) == cgnsHdiffMax .or. &
+                monNames(i) == cgnsMachMax) monMachOrHMax = .true.
+        end do
+
+    end subroutine checkMonitor
+    subroutine checkOutput
+        !
+        !       checkOutput checks and possibly corrects the and output
+        !       variables. This depends on the set of governing equations to
+        !       be solved.
+        !
+        use constants
+        use extraOutput
+        use inputPhysics, only: equations, equationMode, wallDistanceNeeded
+        use inputUnsteady, only: timeIntegrationScheme
+        use flowVarRefState, only: kPresent, eddyModel
+        implicit none
+
+        ! Determine the governing equations to be solved and set the
+        ! variables accordingly.
+
+        select case (equations)
+        case (EulerEquations)
+            surfWriteCf = .false.
+            surfWriteCh = .false.
+            surfWriteYplus = .false.
+            surfWriteCfx = .false.
+            surfWriteCfy = .false.
+            surfWriteCfz = .false.
+
+            volWriteMachTurb = .false.
+            volWriteEddyVis = .false.
+            volWriteRatioEddyVis = .false.
+            volWriteDist = .false.
+            volWriteResTurb = .false.
+
+        case (NSEquations)
+            surfWriteYplus = .false.
+
+            volWriteMachTurb = .false.
+            volWriteEddyVis = .false.
+            volWriteRatioEddyVis = .false.
+            volWriteDist = .false.
+            volWriteResTurb = .false.
+
+        case (RANSEquations)
+
+            ! Check if it is possible to write a turbulent Mach
+            ! number and eddy viscosity; this depends on the turbulence
+            ! model used.
+
+            if (.not. kPresent) volWriteMachTurb = .false.
+            if (.not. eddyModel) then
+                volWriteEddyVis = .false.
+                volWriteRatioEddyVis = .false.
+            end if
+
+            ! If a wall distance free turbulence model is used,
+            ! set volWriteDist to .false.
+
+            if (.not. wallDistanceNeeded) volWriteDist = .false.
+
+        end select
+
+        if (equationMode == unsteady .and. &
+            timeIntegrationScheme == explicitRK) then
+            volWriteResRho = .false.
+            volWriteResMom = .false.
+            volWriteResRhoE = .false.
+            volWriteResTurb = .false.
+        end if
+
+    end subroutine checkOutput
+    subroutine defaultIsoOut
+        !
+        !       defaultIsoOut sets the default set of additional
+        !       variables to be written to the solution file; the primitive
+        !       variables are always written. This additional set depends on
+        !       the governing equations to be solved.
+        !
+        use constants
+        use extraOutput
+        use inputPhysics, only: equations
+        implicit none
+
+        ! First set the variables, which are independent from the
+        ! governing equations to be solved.
+        isoWriteRho = .false.
+        isoWriteVx = .false.
+        isoWriteVy = .false.
+        isoWriteVz = .false.
+        isoWriteP = .false.
+        isoWriteMx = .false.
+        isoWriteMy = .false.
+        isoWriteMz = .false.
+        isoWriteRhoe = .false.
+        isoWriteTemp = .false.
+        isoWriteCp = .false.
+        isoWriteMach = .false.
+        isoWriteMachTurb = .false.
+        isoWriteDist = .false.
+        isoWriteVort = .false.
+        isoWriteVortx = .false.
+        isoWriteVorty = .false.
+        isoWriteVortz = .false.
+        isoWritePtotloss = .false.
+        isoWriteResRho = .false.
+        isoWriteResMom = .false.
+        isoWriteResRhoe = .false.
+        isoWriteShock = .false.
+        isoWriteFilteredShock = .false.
+        ! Set the values which depend on the equations to be solved.
+
+        select case (equations)
+        case (EulerEquations)
+            isoWriteEddyVis = .false.
+            isoWriteRatioEddyVis = .false.
+            isoWriteResTurb = .false.
+
+        case (NSEquations)
+            isoWriteEddyVis = .false.
+            isoWriteRatioEddyVis = .false.
+            isoWriteResTurb = .false.
+
+        case (RANSEquations)
+            isoWriteEddyVis = .false.
+            isoWriteRatioEddyVis = .false.
+            isoWriteResTurb = .false.
+        end select
+
+    end subroutine defaultIsoOut
+    subroutine defaultMonitor
+        !
+        !       defaultMonitor sets the default set of variables to be
+        !       monitored during the convergence. This set depends on the
+        !       governing equations to be solved.
+        !
+        use constants
+        use cgnsNames
+        use inputPhysics, only: equations, flowType
+        use monitor, only: nMOnSum, nMonMax, nMon, monNames, showCPU
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        ! CPU time is written to stdout.
+
+        showCPU = .true.
+
+        ! Determine the governing equations to be solved.
+
+        select case (equations)
+        case (EulerEquations)
+
+            ! Set the number of summation and maximum monitor variables
+            ! and allocate the memory for the monitoring names.
+            ! A distinction is made between internal and external flows,
+            ! because cl and cd do not make a lot of sense for the former.
+
+            if (flowType == internalFlow) then
+
+                ! Internal flow; only the density residual is monitored.
+
+                nMonSum = 1; nMonMax = 0; nMon = 1
+                allocate (monNames(nMon), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("defaultMonitor", &
+                                   "Memory allocation failure for monNames")
+
+                ! Set the names for the variables to be monitored.
+
+                monNames(1) = cgnsL2resRho
+
+            else
+
+                ! External; also lift and drag is monitored.
+
+                nMonSum = 3; nMonMax = 0; nMon = 3
+                allocate (monNames(nMon), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("defaultMonitor", &
+                                   "Memory allocation failure for monNames")
+
+                ! Set the names for the variables to be monitored.
+
+                monNames(1) = cgnsL2resRho
+                monNames(2) = cgnsCl
+                monNames(3) = cgnsCd
+
+            end if
+
+        case (NSEquations)
+
+            ! Set the number of summation and maximum monitor variables
+            ! and allocate the memory for the monitoring names.
+            ! A distinction is made between internal and external flows,
+            ! because cl and cd do not make a lot of sense for the former.
 
-       if(flowType == internalFlow) then
+            if (flowType == internalFlow) then
 
-          ! Internal flow; only the density residual is monitored.
+                ! Internal flow; only the density residual is monitored.
 
-          nMonSum = 1; nMonMax = 0; nMon = 1
-          allocate(monNames(nMon), stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("defaultMonitor", &
-               "Memory allocation failure for monNames")
+                nMonSum = 1; nMonMax = 0; nMon = 1
+                allocate (monNames(nMon), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("defaultMonitor", &
+                                   "Memory allocation failure for monNames")
 
-          ! Set the names for the variables to be monitored.
+                ! Set the names for the variables to be monitored.
 
-          monNames(1) = cgnsL2resRho
+                monNames(1) = cgnsL2resRho
 
-       else
+            else
 
-          ! External; also lift and drag (total and viscous)
-          ! is monitored.
+                ! External; also lift and drag (total and viscous)
+                ! is monitored.
 
-          nMonSum = 4; nMonMax = 0; nMon = 4
-          allocate(monNames(nMon), stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("defaultMonitor", &
-               "Memory allocation failure for monNames")
+                nMonSum = 4; nMonMax = 0; nMon = 4
+                allocate (monNames(nMon), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("defaultMonitor", &
+                                   "Memory allocation failure for monNames")
 
-          ! Set the names for the variables to be monitored.
+                ! Set the names for the variables to be monitored.
 
-          monNames(1) = cgnsL2resRho
-          monNames(2) = cgnsCl
-          monNames(3) = cgnsCd
-          monNames(4) = cgnsCdv
+                monNames(1) = cgnsL2resRho
+                monNames(2) = cgnsCl
+                monNames(3) = cgnsCd
+                monNames(4) = cgnsCdv
 
-       endif
+            end if
 
-    case (RANSEquations)
+        case (RANSEquations)
 
-       ! Set the number of summation and maximum monitor variables
-       ! and allocate the memory for the monitoring names.
-       ! A distinction is made between internal and external flows,
-       ! because cl and cd do not make a lot of sense for the former.
+            ! Set the number of summation and maximum monitor variables
+            ! and allocate the memory for the monitoring names.
+            ! A distinction is made between internal and external flows,
+            ! because cl and cd do not make a lot of sense for the former.
 
-       if(flowType == internalFlow) then
+            if (flowType == internalFlow) then
 
-          ! Internal flow; the density residual as well as the
-          ! maximum values of yplus and the eddy viscosity ration
-          ! are monitored.
+                ! Internal flow; the density residual as well as the
+                ! maximum values of yplus and the eddy viscosity ration
+                ! are monitored.
 
-          nMonSum = 1; nMonMax = 2; nMon = 3
-          allocate(monNames(nMon), stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("defaultMonitor", &
-               "Memory allocation failure for monNames")
+                nMonSum = 1; nMonMax = 2; nMon = 3
+                allocate (monNames(nMon), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("defaultMonitor", &
+                                   "Memory allocation failure for monNames")
+
+                ! Set the names for the variables to be monitored.
+
+                monNames(1) = cgnsL2resRho
+                monNames(2) = cgnsYplusMax
+                monNames(3) = cgnsEddyMax
+
+            else
+
+                ! External; also lift and drag (total and viscous)
+                ! is monitored.
+
+                nMonSum = 4; nMonMax = 2; nMon = 6
+                allocate (monNames(nMon), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("defaultMonitor", &
+                                   "Memory allocation failure for monNames")
+
+                ! Set the names for the variables to be monitored.
+
+                monNames(1) = cgnsL2resRho
+                monNames(2) = cgnsCl
+                monNames(3) = cgnsCd
+                monNames(4) = cgnsCdv
+                monNames(5) = cgnsYplusMax
+                monNames(6) = cgnsEddyMax
+
+            end if
+
+        end select
+
+    end subroutine defaultMonitor
+    subroutine defaultSurfaceOut
+        !
+        !       defaultSurfaceOut sets the default set of surface variables
+        !       to be written to the solution file. This set depends on the
+        !       governing equations to be solved.
+        !
+        use constants
+        use extraOutput
+        use inputPhysics, only: equations
+        implicit none
+
+        ! First set the variables, which are independent from the
+        ! governing equations to be solved.
+
+        surfWriteRho = .true.
+        surfWriteP = .false.
+        surfWriteTemp = .false.
+        surfWriteVx = .true.
+        surfWriteVy = .true.
+        surfWriteVz = .true.
+        surfWriteCp = .true.
+        surfWriteMach = .true.
+
+        ! Set the values which depend on the equations to be solved.
+
+        select case (equations)
+        case (EulerEquations)
+            surfWritePtotloss = .true.
+            surfWriteCf = .false.
+            surfWriteCh = .false.
+            surfWriteYplus = .false.
+            surfWriteCfx = .false.
+            surfWriteCfy = .false.
+            surfWriteCfz = .false.
+
+        case (NSEquations)
+            surfWritePtotloss = .false.
+            surfWriteCf = .true.
+            surfWriteCh = .false.
+            surfWriteYplus = .false.
+            surfWriteCfx = .true.
+            surfWriteCfy = .true.
+            surfWriteCfz = .true.
+
+        case (RANSEquations)
+            surfWritePtotloss = .false.
+            surfWriteCf = .true.
+            surfWriteCh = .false.
+            surfWriteYplus = .true.
+            surfWriteCfx = .true.
+            surfWriteCfy = .true.
+            surfWriteCfz = .true.
+        end select
+
+    end subroutine defaultSurfaceOut
+    subroutine defaultVolumeOut
+        !
+        !       defaultVolumeOut sets the default set of additional
+        !       variables to be written to the solution file; the primitive
+        !       variables are always written. This additional set depends on
+        !       the governing equations to be solved.
+        !
+        use constants
+        use extraOutput
+        use inputPhysics, only: equations
+        implicit none
+
+        ! First set the variables, which are independent from the
+        ! governing equations to be solved.
+
+        volWriteMx = .false.
+        volWriteMy = .false.
+        volWriteMz = .false.
+        volWriteRhoe = .false.
+        volWriteTemp = .false.
+        volWriteCp = .false.
+        volWriteMach = .false.
+        volWriteMachTurb = .false.
+        volWriteDist = .false.
+        volWriteVort = .false.
+        volWriteVortx = .false.
+        volWriteVorty = .false.
+        volWriteVortz = .false.
+        volWritePtotloss = .true.
+        volWriteResRho = .true.
+        volWriteResMom = .false.
+        volWriteResRhoe = .false.
+
+        ! Set the values which depend on the equations to be solved.
+
+        select case (equations)
+        case (EulerEquations)
+            volWriteEddyVis = .false.
+            volWriteRatioEddyVis = .false.
+            volWriteResTurb = .false.
+
+        case (NSEquations)
+            volWriteEddyVis = .false.
+            volWriteRatioEddyVis = .false.
+            volWriteResTurb = .false.
+
+        case (RANSEquations)
+            volWriteEddyVis = .true.
+            volWriteRatioEddyVis = .true.
+            volWriteResTurb = .true.
+        end select
+
+    end subroutine defaultVolumeOut
+
+    subroutine dummyreadParamFile
+        !
+        !       This subroutine is the same as readParamFile EXCEPT it does not
+        !       read the actual file. Values are set diectly from python for
+        !       all the options and then this file is run.
+        !
+        use constants
+        use inputPhysics, only: cpModel
+
+        implicit none
+        !
+        !      Local variables
+        !
+        integer, parameter :: readUnit = 32
+
+        ! Check if all the desired input parameters were specified and
+        ! print warnings if some irrelevant ones are specified.
+
+        call checkInputParam
+
+        ! Read the cp curve fits from file if a variable cp model
+        ! must be used.
+
+        if (cpModel == cpTempCurveFits) call readCpTempCurveFits
+
+        ! Determine the number of governing equations and set the
+        ! corresponding parameters accordingly.
+
+        call setEquationParameters
+
+        ! Extract the multigrid info from the string.
+
+        call extractMgInfo
+
+        ! If no monitoring variables were specified, set the default set.
+        ! Idem for the surface and volume output variables.
+
+        if (.not. monitorSpecified) call defaultMonitor
+        if (.not. surfaceOutSpecified) call defaultSurfaceOut
+        if (.not. volumeOutSpecified) call defaultVolumeOut
+        if (.not. isoOutSpecified) call defaultIsoOut
+
+        ! Check the monitoring and output variables.
+
+        call checkMonitor
+        call checkOutput
+
+    end subroutine dummyreadParamFile
+    subroutine extractMGInfo
+        !
+        !       extractMgInfo creates the integer array cycleStrategy from
+        !       the string describing the multigrid strategy. This string
+        !       either contains a predefined strategy, like sg, 2v, 4w, etc.,
+        !       or a combination of -1's, 0's and 1's, which defines a user
+        !       defined strategy. The integers -1, 0 and 1 have the following
+        !       meaning:  0 -> perform an iteration step on the current grid.
+        !                 1 -> go to next coarser grid.
+        !                -1 -> go to next finer grid.
+        !       For a valid cycling strategy the sum of the elements of the
+        !       array should be 0.
+        !
+        use constants
+        use inputIteration, only: cycleStrategy, nMGLevels, nMGSteps, mgStartLevel, mgDescription
+        use inputPhysics, only: equationMode
+        use inputUnsteady, only: timeIntegrationScheme
+        use communication, only: myID
+        use utils, only: convertToLowerCase, terminate
+        implicit none
+        !
+        !      Local variables
+        !
+        integer                      :: stringLen, error
+        integer(kind=intType)        :: i, ii, nMinus, nn
+        character(len=maxStringLen) :: errorMessage
+
+        ! For an unsteady computation using explicit Runge-Kutta schemes
+        ! overrule mgDescription to sg.
+
+        if (equationMode == unsteady .and. &
+            timeIntegrationScheme == explicitRK) mgDescription = "sg"
+
+        ! Create a lower case version of mgDescription and determine the
+        ! length of the string. Note that if the string contains a user
+        ! defined cycling strategy the contents (0's, 1's, -1's) is not
+        ! changed by the call to convertToLowerCase
+
+        mgDescription = trim(mgDescription)
+        call convertToLowerCase(mgDescription)
+        stringLen = len_trim(mgDescription)
+
+        ! Check for predefined cycling strategies.
+
+        if (mgDescription == "sg") then
+
+            ! Single grid computation. Set the values for the parameters
+            ! nMGSteps (number of steps in the cycle strategy) and
+            ! cycleStrategy (the cycle strategy itself).
+
+            nMGSteps = 1
+            allocate (cycleStrategy(1), stat=error)
+            if (error /= 0) call terminate("extractMgInfo", &
+                                           "allocation error for 1 integer")
+            cycleStrategy(1) = 0
+
+            ! Set the number of grid levels needed by the multigrid to 1.
+
+            nMGLevels = 1
+
+        else if (mgDescription(stringLen:stringLen) == "v") then
+
+            ! Must be a v-cycle. The rest of the string should only contain
+            ! digits. Check this.
+
+            if (.not. digitsOnlyInString(mgDescription(:stringLen - 1))) then
+                write (errorMessage, *) "Invalid cycle strategy, ", &
+                    mgDescription(:stringLen), ", specified"
+                if (myID == 0) call terminate("extractMgInfo", errorMessage)
+            end if
+
+            ! Read the number of levels in the cycle.
+
+            read (mgDescription(:stringLen - 1), *) nMGLevels
+
+            ! Determine the number of steps in cycleStrategy and allocate
+            ! the memory for it.
+
+            nMGSteps = 4*nMGLevels - 4
+            allocate (cycleStrategy(nMGSteps), stat=error)
+            if (error /= 0) then
+                write (errorMessage, *) "Allocation error for", nMGSteps, &
+                    "integers for the v-cycle ", &
+                    mgDescription(:stringLen)
+                call terminate("extractMgInfo", errorMessage)
+            end if
+
+            ! Set the values of cycleStrategy.
+
+            ii = 1
+            do i = 1, (nMGLevels - 1)
+                cycleStrategy(ii) = 0
+                cycleStrategy(ii + 1) = 1
+                ii = ii + 2
+            end do
+
+            do i = 1, (nMGLevels - 1)
+                cycleStrategy(ii) = 0
+                cycleStrategy(ii + 1) = -1
+                ii = ii + 2
+            end do
+
+        else if (mgDescription(stringLen:stringLen) == "w") then
+
+            ! Must be a w-cycle. The rest of the string should only contain
+            ! digits. Check this.
+
+            if (.not. digitsOnlyInString(mgDescription(:stringLen - 1))) then
+                write (errorMessage, *) "Invalid cycle strategy, ", &
+                    mgDescription(:stringLen), ", specified"
+                if (myID == 0) call terminate("extractMgInfo", errorMessage)
+            end if
+
+            ! Read the number of levels in the cycle.
+
+            read (mgDescription(:stringLen - 1), *) nMGLevels
+
+            ! Determine the number of steps in cycleStrategy and allocate
+            ! the memory for it.
+
+            nMGSteps = computeNstepsWcycle(nMGLevels)
+            allocate (cycleStrategy(nMGSteps), stat=error)
+            if (error /= 0) then
+                write (errorMessage, *) "Allocation error for", nMGSteps, &
+                    "integers for the w-cycle ", &
+                    mgDescription(:stringLen)
+                call terminate("extractMgInfo", errorMessage)
+            end if
+
+            ! Set the values of cycleStrategy.
+
+            ii = 1
+            call setEntriesWcycle(ii, nMGLevels)
+
+        else
 
-          ! Set the names for the variables to be monitored.
+            ! The string must be a collection of 0's, -1's and 1's to
+            ! describe the cycle strategy. Get rid of the internal spaces
+            ! first and determine the amount of -'s.
 
-          monNames(1) = cgnsL2resRho
-          monNames(2) = cgnsYplusMax
-          monNames(3) = cgnsEddyMax
-
-       else
-
-          ! External; also lift and drag (total and viscous)
-          ! is monitored.
-
-
-          nMonSum = 4; nMonMax = 2; nMon = 6
-          allocate(monNames(nMon), stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("defaultMonitor", &
-               "Memory allocation failure for monNames")
-
-          ! Set the names for the variables to be monitored.
-
-          monNames(1) = cgnsL2resRho
-          monNames(2) = cgnsCl
-          monNames(3) = cgnsCd
-          monNames(4) = cgnsCdv
-          monNames(5) = cgnsYplusMax
-          monNames(6) = cgnsEddyMax
-
-       endif
-
-    end select
-
-  end subroutine defaultMonitor
-  subroutine defaultSurfaceOut
-    !
-    !       defaultSurfaceOut sets the default set of surface variables
-    !       to be written to the solution file. This set depends on the
-    !       governing equations to be solved.
-    !
-    use constants
-    use extraOutput
-    use inputPhysics, only : equations
-    implicit none
-
-    ! First set the variables, which are independent from the
-    ! governing equations to be solved.
-
-    surfWriteRho  = .true.
-    surfWriteP    = .false.
-    surfWriteTemp = .false.
-    surfWriteVx   = .true.
-    surfWriteVy   = .true.
-    surfWriteVz   = .true.
-    surfWriteCp   = .true.
-    surfWriteMach = .true.
-
-    ! Set the values which depend on the equations to be solved.
-
-    select case (equations)
-    case (EulerEquations)
-       surfWritePtotloss = .true.
-       surfWriteCf       = .false.
-       surfWriteCh       = .false.
-       surfWriteYplus    = .false.
-       surfWriteCfx      = .false.
-       surfWriteCfy      = .false.
-       surfWriteCfz      = .false.
-
-    case (NSEquations)
-       surfWritePtotloss = .false.
-       surfWriteCf       = .true.
-       surfWriteCh       = .false.
-       surfWriteYplus    = .false.
-       surfWriteCfx      = .true.
-       surfWriteCfy      = .true.
-       surfWriteCfz      = .true.
-
-    case (RANSEquations)
-       surfWritePtotloss = .false.
-       surfWriteCf       = .true.
-       surfWriteCh       = .false.
-       surfWriteYplus    = .true.
-       surfWriteCfx      = .true.
-       surfWriteCfy      = .true.
-       surfWriteCfz      = .true.
-    end select
-
-  end subroutine defaultSurfaceOut
-  subroutine defaultVolumeOut
-    !
-    !       defaultVolumeOut sets the default set of additional
-    !       variables to be written to the solution file; the primitive
-    !       variables are always written. This additional set depends on
-    !       the governing equations to be solved.
-    !
-    use constants
-    use extraOutput
-    use inputPhysics, only : equations
-    implicit none
-
-    ! First set the variables, which are independent from the
-    ! governing equations to be solved.
-
-    volWriteMx       = .false.
-    volWriteMy       = .false.
-    volWriteMz       = .false.
-    volWriteRhoe     = .false.
-    volWriteTemp     = .false.
-    volWriteCp       = .false.
-    volWriteMach     = .false.
-    volWriteMachTurb = .false.
-    volWriteDist     = .false.
-    volWriteVort     = .false.
-    volWriteVortx    = .false.
-    volWriteVorty    = .false.
-    volWriteVortz    = .false.
-    volWritePtotloss = .true.
-    volWriteResRho   = .true.
-    volWriteResMom   = .false.
-    volWriteResRhoe  = .false.
-
-    ! Set the values which depend on the equations to be solved.
-
-    select case (equations)
-    case (EulerEquations)
-       volWriteEddyVis      = .false.
-       volWriteRatioEddyVis = .false.
-       volWriteResTurb      = .false.
-
-    case (NSEquations)
-       volWriteEddyVis      = .false.
-       volWriteRatioEddyVis = .false.
-       volWriteResTurb      = .false.
-
-    case (RANSEquations)
-       volWriteEddyVis      = .true.
-       volWriteRatioEddyVis = .true.
-       volWriteResTurb      = .true.
-    end select
-
-  end subroutine defaultVolumeOut
-
-  subroutine dummyreadParamFile
-    !
-    !       This subroutine is the same as readParamFile EXCEPT it does not
-    !       read the actual file. Values are set diectly from python for
-    !       all the options and then this file is run.
-    !
-    use constants
-    use inputPhysics, only : cpModel
-
-    implicit none
-    !
-    !      Local variables
-    !
-    integer, parameter :: readUnit = 32
-
-
-    ! Check if all the desired input parameters were specified and
-    ! print warnings if some irrelevant ones are specified.
-
-    call checkInputParam
-
-    ! Read the cp curve fits from file if a variable cp model
-    ! must be used.
-
-    if(cpModel == cpTempCurveFits) call readCpTempCurveFits
-
-    ! Determine the number of governing equations and set the
-    ! corresponding parameters accordingly.
-
-    call setEquationParameters
-
-    ! Extract the multigrid info from the string.
-
-    call extractMgInfo
-
-    ! If no monitoring variables were specified, set the default set.
-    ! Idem for the surface and volume output variables.
-
-    if(.not. monitorSpecified)    call defaultMonitor
-    if(.not. surfaceOutSpecified) call defaultSurfaceOut
-    if(.not. volumeOutSpecified)  call defaultVolumeOut
-    if(.not. isoOutSpecified)     call defaultIsoOut
-
-    ! Check the monitoring and output variables.
-
-    call checkMonitor
-    call checkOutput
-
-
-  end subroutine dummyreadParamFile
-  subroutine extractMGInfo
-    !
-    !       extractMgInfo creates the integer array cycleStrategy from
-    !       the string describing the multigrid strategy. This string
-    !       either contains a predefined strategy, like sg, 2v, 4w, etc.,
-    !       or a combination of -1's, 0's and 1's, which defines a user
-    !       defined strategy. The integers -1, 0 and 1 have the following
-    !       meaning:  0 -> perform an iteration step on the current grid.
-    !                 1 -> go to next coarser grid.
-    !                -1 -> go to next finer grid.
-    !       For a valid cycling strategy the sum of the elements of the
-    !       array should be 0.
-    !
-    use constants
-    use inputIteration, only :cycleStrategy, nMGLevels, nMGSteps, mgStartLevel, mgDescription
-    use inputPhysics, only : equationMode
-    use inputUnsteady, only : timeIntegrationScheme
-    use communication, only : myID
-    use utils, only : convertToLowerCase, terminate
-    implicit none
-    !
-    !      Local variables
-    !
-    integer                      :: stringLen, error
-    integer(kind=intType)        :: i, ii, nMinus, nn
-    character (len=maxStringLen) :: errorMessage
-
-    ! For an unsteady computation using explicit Runge-Kutta schemes
-    ! overrule mgDescription to sg.
-
-    if(equationMode          == unsteady .and. &
-         timeIntegrationScheme == explicitRK) mgDescription = "sg"
-
-    ! Create a lower case version of mgDescription and determine the
-    ! length of the string. Note that if the string contains a user
-    ! defined cycling strategy the contents (0's, 1's, -1's) is not
-    ! changed by the call to convertToLowerCase
+            ii = 0
+            nMinus = 0
+            do i = 1, stringLen
+                if (mgDescription(i:i) /= " ") then
+                    ii = ii + 1
+                    mgDescription(ii:ii) = mgDescription(i:i)
+                    if (mgDescription(ii:ii) == "-") nMinus = nMinus + 1
+                end if
+            end do
+            stringLen = ii
+
+            ! Determine the number of steps in the cycle strategy and
+            ! allocate the memory for it.
+
+            nMGSteps = ii - nMinus
+            allocate (cycleStrategy(nMGSteps), stat=error)
+            if (error /= 0) then
+                write (errorMessage, *) "Allocation error for", nMGSteps, &
+                    "integers for the cycle strategy"
+                call terminate("extractMgInfo", errorMessage)
+            end if
+
+            ! Determine the entries for cycleStrategy.
+
+            i = 1
+            nn = 1
+            do
+                ii = i
+                if (mgDescription(i:i) == "-") i = i + 1
+
+                ! Determine the case we are having here.
+
+                select case (mgDescription(ii:i))
+                case ("0")
+                    cycleStrategy(nn) = 0
+                case ("1")
+                    cycleStrategy(nn) = 1
+                case ("-1")
+                    cycleStrategy(nn) = -1
+                case default
+                    write (errorMessage, *) "Invalid character, ", &
+                         mgDescription(ii:i), &
+                         ", in the string describing &
+                         &cycling strategy"
+                    if (myID == 0) call terminate("extractMgInfo", errorMessage)
+                end select
+
+                ! Update i and nn
+
+                i = i + 1
+                nn = nn + 1
+
+                ! Exit the do loop in case i is larger than stringLen.
+
+                if (i > stringLen) exit
+            end do
+
+            ! Check if the string specified is valid and determine the
+            ! maximum grid level needed in the cycle.
+
+            nn = 0
+            nMGLevels = 0
+            do i = 1, nMGSteps
+                nn = nn + cycleStrategy(i)
+                nMGLevels = max(nn, nMGLevels)
+            end do
+            nMGLevels = nMGLevels + 1
+
+            if (nn /= 0 .and. myID == 0) &
+                call terminate("extractMgInfo", &
+                               "sum of coefficients in cycle strategy is not 0")
+        end if
+
+        ! Correct the value of mgStartlevel in case a nonpositive number
+        ! has been specified. In that case it is set to -1, the default
+        ! value.
+
+        if (mgStartlevel <= 0) mgStartlevel = -1
+
+        ! Determine the value of mgStartlevel. This parameter might be
+        ! specified in the python script file and is checked here for
+        ! consistency. If mgStartlevel has not been specified to any
+        ! specific level it is set to the coarsest level in the mg cycle
+        ! (starting from free stream) or to the finest level (restart).
+        ! The restart is handled in python wrapper.
+
+        if (mgStartlevel == -1) then
+
+            ! Value has not been specified. Default value is set, see
+            ! the comments above.
+            mgStartlevel = nMGLevels
+
+        end if
+
+    end subroutine extractMGInfo
+
+    !      ==================================================================
+
+    logical function digitsOnlyInString(string)
+        !
+        !       digitsOnlyInString checks whether the given string contains
+        !       digits only or if other character types are present. In the
+        !       former case the function returns .True., otherwise .False.
+        !
+        implicit none
+        !
+        !      Subroutine argument                                              *
+        !
+        character(len=*), intent(in) :: string
+        !
+        !      Local variables
+        !
+        integer :: i, stringLen
+
+        ! Initialize digitsOnlyInString to .True.
+
+        digitsOnlyInString = .true.
+
+        ! Determine the length of the string.
+
+        stringLen = len_trim(string)
+
+        ! Loop over the elements of the string and check if they are digits.
+
+        do i = 1, stringLen
+            if (string(i:i) < "0" .or. string(i:i) > "9") &
+                digitsOnlyInString = .false.
+        end do
+
+    end function digitsOnlyInString
+
+    !      ==================================================================
+
+    recursive function computeNstepsWcycle(nLevels) result(nSteps)
+        !
+        !       computeNstepsWcycle is recursive function, which determines
+        !       the number of entries of a w-cycle of a given level.
+        !
+        use constants
+        use communication
+        use utils, only: terminate
+        implicit none
+        !
+        !      Result variable
+        !
+        integer(kind=intType) :: nSteps
+        !
+        !      Function argument
+        !
+        integer(kind=intType), intent(in) :: nLevels
+        !
+        !      Local variables
+        !
+        character(len=maxStringLen) :: errorMessage
+
+        ! Determine the case we are having here. For nLevels is less
+        ! than 2 an error message is printed, in case nLevels is 2
+        ! the recursion is broken and otherwise a recursive call is made.
+
+        if (nLevels < 2) then
+            write (errorMessage, *) "Wrong value of nLevels", nLevels
+            if (myID == 0) call terminate("computeNstepsWcycle", errorMessage)
+        else if (nLevels == 2) then
+            nSteps = 4
+        else
+            nSteps = 4 + 2*computeNstepsWcycle(nLevels - 1)
+        end if
+
+    end function computeNstepsWcycle
+
+    !      ==================================================================
+
+    recursive subroutine setEntriesWcycle(counter, nLevels)
+        !
+        !       setEntriesWcycle is a recursive subroutine, which actually
+        !       fills the entries of cycleStrategy for a w-cycle.
+        !
+        use constants
+        use inputIteration, only: cycleStrategy
+        use communication, only: myID
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: counter
+        integer(kind=intType), intent(in)    :: nLevels
+        !
+        !      Local variables
+        !
+        character(len=maxStringLen) :: errorMessage
+
+        ! Determine the case we are having here. For nLevels is less
+        ! than 2 an error message is printed, in case nLevels is 2
+        ! the recursion is broken and otherwise a recursive call is made.
+
+        if (nLevels < 2) then
+
+            write (errorMessage, *) "Wrong value of nLevels", nLevels
+            if (myID == 0) call terminate("setEntriesWcycle", errorMessage)
+
+        else if (nLevels == 2) then
+
+            cycleStrategy(counter) = 0
+            cycleStrategy(counter + 1) = 1
+            cycleStrategy(counter + 2) = 0
+            cycleStrategy(counter + 3) = -1
+
+            counter = counter + 4
+
+        else
+
+            cycleStrategy(counter) = 0
+            cycleStrategy(counter + 1) = 1
+            counter = counter + 2
+
+            call setEntriesWcycle(counter, nLevels - 1)
+            call setEntriesWcycle(counter, nLevels - 1)
+
+            cycleStrategy(counter) = 0
+            cycleStrategy(counter + 1) = -1
+            counter = counter + 2
+
+        end if
+
+    end subroutine setEntriesWcycle
+    subroutine isoVariables(variables)
+        !
+        !       isoVariables extracts from the given string the extra
+        !       iso surface variables to be written to the solution file.
+        !
+        use constants
+        use extraOutput
+        use utils, only: convertToLowerCase, terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        character(len=*), intent(inout) :: variables
+        !
+        !      Local variables.
+        !
+        integer :: nVarSpecified, pos
+
+        character(len=15) :: keyword
+        character(len=maxStringLen) :: errorMessage
+
+        ! Convert the string variables to lower case.
+
+        call convertToLowerCase(variables)
+
+        ! Initialize all the iso output variables to .False.
+        isoWriteRho = .false.
+        isoWriteVx = .false.
+        isoWriteVy = .false.
+        isoWriteVz = .false.
+        isoWriteP = .false.
+        isoWriteTurb = .false.
+
+        isoWriteMx = .false.
+        isoWriteMy = .false.
+        isoWriteMz = .false.
+        isoWriteRhoe = .false.
+        isoWriteTemp = .false.
+        isoWriteVort = .false.
+        isoWriteVortx = .false.
+        isoWriteVorty = .false.
+        isoWriteVortz = .false.
+
+        isoWriteCp = .false.
+        isoWriteMach = .false.
+        isoWriteMachTurb = .false.
+        isoWritePtotloss = .false.
+
+        isoWriteEddyVis = .false.
+        isoWriteRatioEddyVis = .false.
+        isoWriteDist = .false.
+
+        isoWriteResRho = .false.
+        isoWriteResMom = .false.
+        isoWriteResRhoe = .false.
+        isoWriteResTurb = .false.
+
+        isoWriteShock = .false.
+        isoWriteFilteredShock = .false.
+
+        isoWriteBlank = .false.
+
+        ! Initialize nVarSpecified to 0. This serves as a test
+        ! later on.
 
-    mgDescription = trim(mgDescription)
-    call convertToLowerCase(mgDescription)
-    stringLen = len_trim(mgDescription)
+        nVarSpecified = 0
+
+        ! Loop to extract the info from the string variables.
 
-    ! Check for predefined cycling strategies.
+        do
+            ! Condition to exit the loop.
 
-    if(mgDescription == "sg") then
+            if (len_trim(variables) == 0) exit
+
+            ! Locate the first occurance of the _ in the string and
+            ! determine the string keyword.
+
+            pos = index(variables, "_")
+            if (pos == 0) then
+                keyword = variables
+                variables = ""
+            else
+                keyword = variables(:pos - 1)
+                variables = variables(pos + 1:)
+            end if
+
+            ! Check the keyword.
+
+            select case (keyword)
+            case ("")
+                ! Multiple occurence of "_". Just ignore it.
+
+            case ("rho")
+                isoWriteRho = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("vx")
+                isoWriteVx = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("vy")
+                isoWriteVy = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("vz")
+                isoWriteVz = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("P")
+                isoWriteP = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       ! Single grid computation. Set the values for the parameters
-       ! nMGSteps (number of steps in the cycle strategy) and
-       ! cycleStrategy (the cycle strategy itself).
-
-       nMGSteps = 1
-       allocate(cycleStrategy(1), stat=error)
-       if(error /= 0) call terminate("extractMgInfo", &
-            "allocation error for 1 integer")
-       cycleStrategy(1) = 0
+            case ("turb")
+                isoWriteTurb = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       ! Set the number of grid levels needed by the multigrid to 1.
+            case ("mx")
+                isoWriteMx = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       nMGLevels = 1
-
-    else if(mgDescription(stringLen:stringLen) == "v") then
+            case ("my")
+                isoWriteMy = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       ! Must be a v-cycle. The rest of the string should only contain
-       ! digits. Check this.
+            case ("mz")
+                isoWriteMz = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("rvx")
+                isoWriteRVx = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       if(.not. digitsOnlyInString(mgDescription(:stringLen-1))) then
-          write(errorMessage,*) "Invalid cycle strategy, ", &
-               mgDescription(:stringLen), ", specified"
-          if(myID == 0) call terminate("extractMgInfo", errorMessage)
-       endif
-
-       ! Read the number of levels in the cycle.
-
-       read(mgDescription(:stringLen-1),*) nMGLevels
-
-       ! Determine the number of steps in cycleStrategy and allocate
-       ! the memory for it.
-
-       nMGSteps = 4*nMGLevels - 4
-       allocate(cycleStrategy(nMGSteps), stat=error)
-       if(error /= 0) then
-          write(errorMessage,*) "Allocation error for", nMGSteps, &
-               "integers for the v-cycle ",       &
-               mgDescription(:stringLen)
-          call terminate("extractMgInfo", errorMessage)
-       endif
-
-       ! Set the values of cycleStrategy.
-
-       ii = 1
-       do i=1,(nMGLevels-1)
-          cycleStrategy(ii)   = 0
-          cycleStrategy(ii+1) = 1
-          ii = ii+2
-       enddo
-
-       do i=1,(nMGLevels-1)
-          cycleStrategy(ii)   =  0
-          cycleStrategy(ii+1) = -1
-          ii = ii+2
-       enddo
-
-    else if(mgDescription(stringLen:stringLen) == "w") then
-
-       ! Must be a w-cycle. The rest of the string should only contain
-       ! digits. Check this.
-
-       if(.not. digitsOnlyInString(mgDescription(:stringLen-1))) then
-          write(errorMessage,*) "Invalid cycle strategy, ", &
-               mgDescription(:stringLen), ", specified"
-          if(myID == 0) call terminate("extractMgInfo", errorMessage)
-       endif
-
-       ! Read the number of levels in the cycle.
-
-       read(mgDescription(:stringLen-1),*) nMGLevels
-
-       ! Determine the number of steps in cycleStrategy and allocate
-       ! the memory for it.
-
-       nMGSteps = computeNstepsWcycle(nMGLevels)
-       allocate(cycleStrategy(nMGSteps), stat=error)
-       if(error /= 0) then
-          write(errorMessage,*) "Allocation error for", nMGSteps, &
-               "integers for the w-cycle ",       &
-               mgDescription(:stringLen)
-          call terminate("extractMgInfo", errorMessage)
-       endif
-
-       ! Set the values of cycleStrategy.
-
-       ii = 1
-       call setEntriesWcycle(ii, nMGLevels)
-
-    else
+            case ("rvy")
+                isoWriteRVy = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       ! The string must be a collection of 0's, -1's and 1's to
-       ! describe the cycle strategy. Get rid of the internal spaces
-       ! first and determine the amount of -'s.
+            case ("rvz")
+                isoWriteRVz = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       ii = 0
-       nMinus = 0
-       do i=1,stringLen
-          if(mgDescription(i:i) /= " ") then
-             ii = ii+1
-             mgDescription(ii:ii) = mgDescription(i:i)
-             if(mgDescription(ii:ii) == "-") nMinus = nMinus+1
-          endif
-       enddo
-       stringLen = ii
-
-       ! Determine the number of steps in the cycle strategy and
-       ! allocate the memory for it.
-
-       nMGSteps = ii - nMinus
-       allocate(cycleStrategy(nMGSteps), stat=error)
-       if(error /= 0) then
-          write(errorMessage,*) "Allocation error for", nMGSteps, &
-               "integers for the cycle strategy"
-          call terminate("extractMgInfo", errorMessage)
-       endif
-
-       ! Determine the entries for cycleStrategy.
-
-       i = 1
-       nn = 1
-       do
-          ii = i
-          if(mgDescription(i:i) == "-") i = i+1
-
-          ! Determine the case we are having here.
-
-          select case (mgDescription(ii:i))
-          case ("0")
-             cycleStrategy(nn) = 0
-          case ("1")
-             cycleStrategy(nn) = 1
-          case ("-1")
-             cycleStrategy(nn) = -1
-          case default
-             write(errorMessage, *) "Invalid character, ", &
-                  mgDescription(ii:i), &
-                  ", in the string describing &
-                  &cycling strategy"
-             if(myID == 0) call terminate("extractMgInfo", errorMessage)
-          end select
-
-          ! Update i and nn
-
-          i  = i  + 1
-          nn = nn + 1
-
-          ! Exit the do loop in case i is larger than stringLen.
-
-          if(i > stringLen) exit
-       enddo
-
-       ! Check if the string specified is valid and determine the
-       ! maximum grid level needed in the cycle.
-
-       nn = 0
-       nMGLevels = 0
-       do i=1,nMGSteps
-          nn = nn + cycleStrategy(i)
-          nMGLevels = max(nn, nMGLevels)
-       enddo
-       nMGLevels = nMGLevels + 1
-
-       if(nn /= 0 .and. myID == 0) &
-            call terminate("extractMgInfo", &
-            "sum of coefficients in cycle strategy is not 0")
-    endif
-
-    ! Correct the value of mgStartlevel in case a nonpositive number
-    ! has been specified. In that case it is set to -1, the default
-    ! value.
-
-    if(mgStartlevel <= 0) mgStartlevel = -1
-
-    ! Determine the value of mgStartlevel. This parameter might be
-    ! specified in the python script file and is checked here for
-    ! consistency. If mgStartlevel has not been specified to any
-    ! specific level it is set to the coarsest level in the mg cycle
-    ! (starting from free stream) or to the finest level (restart).
-    ! The restart is handled in python wrapper.
-
-    if(mgStartlevel == -1) then
-
-       ! Value has not been specified. Default value is set, see
-       ! the comments above.
-       mgStartlevel = nMGLevels
-
-    endif
-
-  end subroutine extractMGInfo
-
-  !      ==================================================================
-
-  logical function digitsOnlyInString(string)
-    !
-    !       digitsOnlyInString checks whether the given string contains
-    !       digits only or if other character types are present. In the
-    !       former case the function returns .True., otherwise .False.
-    !
-    implicit none
-    !
-    !      Subroutine argument                                              *
-    !
-    character (len=*), intent(in) :: string
-    !
-    !      Local variables
-    !
-    integer :: i, stringLen
-
-    ! Initialize digitsOnlyInString to .True.
-
-    digitsOnlyInString = .true.
-
-    ! Determine the length of the string.
-
-    stringLen = len_trim(string)
-
-    ! Loop over the elements of the string and check if they are digits.
-
-    do i=1,stringLen
-       if(string(i:i) < "0" .or. string(i:i) > "9") &
-            digitsOnlyInString = .false.
-    enddo
-
-  end function digitsOnlyInString
-
-  !      ==================================================================
-
-  recursive function computeNstepsWcycle(nLevels) result(nSteps)
-    !
-    !       computeNstepsWcycle is recursive function, which determines
-    !       the number of entries of a w-cycle of a given level.
-    !
-    use constants
-    use communication
-    use utils, only : terminate
-    implicit none
-    !
-    !      Result variable
-    !
-    integer(kind=intType) :: nSteps
-    !
-    !      Function argument
-    !
-    integer(kind=intType), intent(in) :: nLevels
-    !
-    !      Local variables
-    !
-    character (len=maxStringLen) :: errorMessage
-
-    ! Determine the case we are having here. For nLevels is less
-    ! than 2 an error message is printed, in case nLevels is 2
-    ! the recursion is broken and otherwise a recursive call is made.
-
-    if(nLevels < 2) then
-       write(errorMessage,*) "Wrong value of nLevels", nLevels
-       if(myID == 0) call terminate("computeNstepsWcycle", errorMessage)
-    else if(nLevels == 2) then
-       nSteps = 4
-    else
-       nSteps = 4 + 2*computeNstepsWcycle(nLevels-1)
-    endif
-
-  end function computeNstepsWcycle
-
-  !      ==================================================================
-
-  recursive subroutine setEntriesWcycle(counter, nLevels)
-    !
-    !       setEntriesWcycle is a recursive subroutine, which actually
-    !       fills the entries of cycleStrategy for a w-cycle.
-    !
-    use constants
-    use inputIteration, only : cycleStrategy
-    use communication, only : myID
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: counter
-    integer(kind=intType), intent(in)    :: nLevels
-    !
-    !      Local variables
-    !
-    character (len=maxStringLen) :: errorMessage
-
-    ! Determine the case we are having here. For nLevels is less
-    ! than 2 an error message is printed, in case nLevels is 2
-    ! the recursion is broken and otherwise a recursive call is made.
-
-    if(nLevels < 2) then
-
-       write(errorMessage,*) "Wrong value of nLevels", nLevels
-       if(myID == 0) call terminate("setEntriesWcycle", errorMessage)
-
-    else if(nLevels == 2) then
-
-       cycleStrategy(counter)   =  0
-       cycleStrategy(counter+1) =  1
-       cycleStrategy(counter+2) =  0
-       cycleStrategy(counter+3) = -1
-
-       counter = counter + 4
-
-    else
-
-       cycleStrategy(counter)   =  0
-       cycleStrategy(counter+1) =  1
-       counter = counter + 2
-
-       call setEntriesWcycle(counter, nLevels-1)
-       call setEntriesWcycle(counter, nLevels-1)
-
-       cycleStrategy(counter)   =  0
-       cycleStrategy(counter+1) = -1
-       counter = counter + 2
-
-    endif
-
-  end subroutine setEntriesWcycle
-  subroutine isoVariables(variables)
-    !
-    !       isoVariables extracts from the given string the extra
-    !       iso surface variables to be written to the solution file.
-    !
-    use constants
-    use extraOutput
-    use utils, only : convertToLowerCase, terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    character(len=*), intent(inout) :: variables
-    !
-    !      Local variables.
-    !
-    integer :: nVarSpecified, pos
-
-    character(len=15) :: keyword
-    character(len=maxStringLen) :: errorMessage
-
-    ! Convert the string variables to lower case.
-
-    call convertToLowerCase(variables)
-
-    ! Initialize all the iso output variables to .False.
-    isoWriteRho   = .false.
-    isoWriteVx    = .false.
-    isoWriteVy    = .false.
-    isoWriteVz    = .false.
-    isoWriteP     = .false.
-    isoWriteTurb  = .false.
-
-    isoWriteMx    = .false.
-    isoWriteMy    = .false.
-    isoWriteMz    = .false.
-    isoWriteRhoe  = .false.
-    isoWriteTemp  = .false.
-    isoWriteVort  = .false.
-    isoWriteVortx = .false.
-    isoWriteVorty = .false.
-    isoWriteVortz = .false.
-
-    isoWriteCp       = .false.
-    isoWriteMach     = .false.
-    isoWriteMachTurb = .false.
-    isoWritePtotloss = .false.
-
-    isoWriteEddyVis      = .false.
-    isoWriteRatioEddyVis = .false.
-    isoWriteDist         = .false.
-
-    isoWriteResRho  = .false.
-    isoWriteResMom  = .false.
-    isoWriteResRhoe = .false.
-    isoWriteResTurb = .false.
-
-    isoWriteShock = .false.
-    isoWriteFilteredShock = .false.
-
-    isoWriteBlank = .false.
-
-    ! Initialize nVarSpecified to 0. This serves as a test
-    ! later on.
+            case ("rhoe")
+                isoWriteRhoe = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    nVarSpecified = 0
-
-    ! Loop to extract the info from the string variables.
+            case ("temp")
+                isoWriteTemp = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    do
-       ! Condition to exit the loop.
+            case ("vort")
+                isoWriteVort = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       if(len_trim(variables) == 0) exit
-
-       ! Locate the first occurance of the _ in the string and
-       ! determine the string keyword.
-
-       pos = index(variables, "_")
-       if(pos == 0) then
-          keyword   = variables
-          variables = ""
-       else
-          keyword   = variables(:pos-1)
-          variables = variables(pos+1:)
-       endif
-
-       ! Check the keyword.
-
-       select case (keyword)
-       case ("")
-          ! Multiple occurence of "_". Just ignore it.
-
-       case("rho")
-          isoWriteRho   = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case("vx")
-          isoWriteVx   = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case("vy")
-          isoWriteVy   = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case("vz")
-          isoWriteVz   = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case("P")
-          isoWriteP   = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("vortx")
+                isoWriteVortx = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       case("turb")
-          isoWriteTurb = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("vorty")
+                isoWriteVorty = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("vortz")
+                isoWriteVortz = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("cp")
+                isoWriteCp = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("mach")
+                isoWriteMach = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("rmach")
+                isoWriteRMach = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("macht")
+                isoWriteMachTurb = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("ptloss")
+                isoWritePtotloss = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("eddy")
+                isoWriteEddyVis = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("eddyratio")
+                isoWriteRatioEddyVis = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("dist")
+                isoWriteDist = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("resrho")
+                isoWriteResRho = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("resmom")
+                isoWriteResMom = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("resrhoe")
+                isoWriteResRhoe = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       case ("mx")
-          isoWriteMx = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("resturb")
+                isoWriteResTurb = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       case ("my")
-          isoWriteMy = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("blank")
+                isoWriteBlank = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("shock")
+                isoWriteShock = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("filteredshock")
+                isoWriteFilteredShock = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case default
+                pos = len_trim(keyword)
+                write (errorMessage, "(3a)") "Unknown extra iso output &
+                     &variable, ", trim(keyword), &
+                     ", specified"
+                call terminate("isoVariables", errorMessage)
+
+            end select
+
+        end do
+
+        ! Set this to true regardless...it is possible no varibles were
+        ! specified
+        isoOutSpecified = .true.
+
+    end subroutine isoVariables
+
+    subroutine monitorVariables(variables)
+        !
+        !       monitorVariables extracts from the given string the variables
+        !       to be monitored during the convergence.
+        !
+        use constants
+        use cgnsNames
+        use communication, only: myid, adflow_comm_world
+        use monitor, only: monNames, nMOn, nMonMax, nMonSum, showCPU
+        use utils, only: convertToLowerCase, terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        character(len=*), intent(inout) :: variables
+        !
+        !      Local parameter.
+        !
+        integer(kind=intType), parameter :: nVarMax = 21
+        !
+        !      Local variables.
+        !
+        integer :: pos, ierr
+
+        character(len=15) :: keyword
+        character(len=maxStringLen) :: errorMessage
+
+        character(len=maxCGNSNameLen), dimension(nVarMax) :: tmpNames
+
+        logical :: monDrho, monTotalR
+
+        ! Check if the monitoring names have already been allocated.
+        ! This happens when multiple lines for the monitoring variables
+        ! are specified in the parameter file. If this happens the last
+        ! value is taken and thus release the memory of previously
+        ! specified names.
+
+        if (allocated(monNames)) then
+            deallocate (monNames, stat=ierr)
+            if (ierr /= 0) call terminate("monitorVariables", &
+                                          "Deallocation error for monNames")
+        end if
+
+        ! Initialize monDrho, monDturb and showCPU to .false.
+
+        monDrho = .false.
+        monTotalR = .false.
+        monDturb = .false.
+        showCPU = .false.
+
+        ! Initialize nMonSum, nMonMax and nMon to 0.
+
+        nMonSum = 0
+        nMonMax = 0
+        nMon = 0
+
+        ! Convert the string variables to lower case.
+
+        call convertToLowerCase(variables)
+
+        ! Loop to extract the info from the string variables.
+
+        do
+            ! Condition to exit the loop.
+
+            if (len_trim(variables) == 0) exit
+
+            ! Locate the first occurance of the _ in the string and
+            ! determine the string keyword.
+
+            pos = index(variables, "_")
+            if (pos == 0) then
+                keyword = variables
+                variables = ""
+            else
+                keyword = variables(:pos - 1)
+                variables = variables(pos + 1:)
+            end if
+
+            ! Check the keyword.
+
+            select case (keyword)
+            case ("")
+                ! Multiple occurence of "_". Just ignore it.
+
+            case ("cpu")    ! only written to stdout.
+                showCPU = .true.
+
+            case ("resrho")
+                monDrho = .true.
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsL2resRho
+
+            case ("resmom")
+                nMon = nMon + 3; nMonSum = nMonSum + 3
+                tmpNames(nMon - 2) = cgnsL2resMomx
+                tmpNames(nMon - 1) = cgnsL2resMomy
+                tmpNames(nMon) = cgnsL2resMomz
+
+            case ("resrhoe")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsL2resRhoe
+
+            case ("resturb")  ! special case, because the turbulence model
+                ! Is not yet known. See checkMonitor.
+                monDturb = .true.
+
+            case ("cl")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsCl
+
+            case ("clp")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsClp
 
-       case ("mz")
-          isoWriteMz = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("clv")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsClv
 
-       case ("rvx")
-          isoWriteRVx = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("cd")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsCd
 
-       case ("rvy")
-          isoWriteRVy = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("cdp")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsCdp
 
-       case ("rvz")
-          isoWriteRVz = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("cdv")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsCdv
 
-       case ("rhoe")
-          isoWriteRhoe = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("cfx")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsCfx
 
-       case ("temp")
-          isoWriteTemp = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("cfy")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsCfy
 
-       case ("vort")
-          isoWriteVort = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("cfz")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsCfz
 
-       case ("vortx")
-          isoWriteVortx = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("cmx")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsCmx
 
-       case ("vorty")
-          isoWriteVorty = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("cmy")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsCmy
 
-       case ("vortz")
-          isoWriteVortz = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("cp")
-          isoWriteCp = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("mach")
-          isoWriteMach = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("rmach")
-          isoWriteRMach = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("macht")
-          isoWriteMachTurb = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("ptloss")
-          isoWritePtotloss = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("eddy")
-          isoWriteEddyVis = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("eddyratio")
-          isoWriteRatioEddyVis = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("dist")
-          isoWriteDist = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("resrho")
-          isoWriteResRho = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("resmom")
-          isoWriteResMom = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("cmz")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsCmz
 
-       case ("resrhoe")
-          isoWriteResRhoe = .true.
-          nVarSpecified = nVarSpecified + 1
+            case ("hdiff")
+                nMon = nMon + 1; nMonMax = nMonMax + 1
+                tmpNames(nMon) = cgnsHdiffMax
 
-       case ("resturb")
-          isoWriteResTurb = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("blank")
-          isoWriteBlank = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case("shock")
-          isoWriteShock = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case("filteredshock")
-          isoWriteFilteredShock = .true.
-          nVarSpecified = nVarSpecified + 1
-
-
-       case default
-          pos = len_trim(keyword)
-          write(errorMessage,"(3a)" ) "Unknown extra iso output &
-               &variable, ", trim(keyword), &
-               ", specified"
-          call terminate("isoVariables", errorMessage)
-
-       end select
-
-    enddo
-
-    ! Set this to true regardless...it is possible no varibles were
-    ! specified
-    isoOutSpecified = .true.
-
-  end subroutine isoVariables
-
-  subroutine monitorVariables(variables)
-    !
-    !       monitorVariables extracts from the given string the variables
-    !       to be monitored during the convergence.
-    !
-    use constants
-    use cgnsNames
-    use communication, only : myid, adflow_comm_world
-    use monitor, only : monNames, nMOn , nMonMax, nMonSum, showCPU
-    use utils, only : convertToLowerCase, terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    character(len=*), intent(inout) :: variables
-    !
-    !      Local parameter.
-    !
-    integer(kind=intType), parameter :: nVarMax = 21
-    !
-    !      Local variables.
-    !
-    integer :: pos, ierr
-
-    character(len=15) :: keyword
-    character(len=maxStringLen) :: errorMessage
-
-    character(len=maxCGNSNameLen), dimension(nVarMax) :: tmpNames
-
-    logical :: monDrho, monTotalR
-
-    ! Check if the monitoring names have already been allocated.
-    ! This happens when multiple lines for the monitoring variables
-    ! are specified in the parameter file. If this happens the last
-    ! value is taken and thus release the memory of previously
-    ! specified names.
-
-    if( allocated(monNames) ) then
-       deallocate(monNames, stat=ierr)
-       if(ierr /= 0) call terminate("monitorVariables", &
-            "Deallocation error for monNames")
-    endif
-
-    ! Initialize monDrho, monDturb and showCPU to .false.
-
-    monDrho  = .false.
-    monTotalR = .false.
-    monDturb = .false.
-    showCPU  = .false.
-
-    ! Initialize nMonSum, nMonMax and nMon to 0.
-
-    nMonSum = 0
-    nMonMax = 0
-    nMon    = 0
-
-    ! Convert the string variables to lower case.
-
-    call convertToLowerCase(variables)
-
-    ! Loop to extract the info from the string variables.
-
-    do
-       ! Condition to exit the loop.
-
-       if(len_trim(variables) == 0) exit
-
-       ! Locate the first occurance of the _ in the string and
-       ! determine the string keyword.
-
-       pos = index(variables, "_")
-       if(pos == 0) then
-          keyword   = variables
-          variables = ""
-       else
-          keyword   = variables(:pos-1)
-          variables = variables(pos+1:)
-       endif
-
-       ! Check the keyword.
-
-       select case (keyword)
-       case ("")
-          ! Multiple occurence of "_". Just ignore it.
-
-       case ("cpu")    ! only written to stdout.
-          showCPU = .true.
-
-       case ("resrho")
-          monDrho = .true.
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsL2resRho
-
-       case ("resmom")
-          nMon = nMon + 3; nMonSum = nMonSum + 3
-          tmpNames(nMon-2) = cgnsL2resMomx
-          tmpNames(nMon-1) = cgnsL2resMomy
-          tmpNames(nMon)   = cgnsL2resMomz
-
-       case ("resrhoe")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsL2resRhoe
-
-       case ("resturb")  ! special case, because the turbulence model
-          ! Is not yet known. See checkMonitor.
-          monDturb = .true.
-
-       case ("cl")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsCl
-
-       case ("clp")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsClp
-
-       case ("clv")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsClv
-
-       case ("cd")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsCd
-
-       case ("cdp")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsCdp
-
-       case ("cdv")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsCdv
-
-       case ("cfx")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsCfx
-
-       case ("cfy")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsCfy
-
-       case ("cfz")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsCfz
-
-       case ("cmx")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsCmx
-
-       case ("cmy")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsCmy
-
-       case ("cmz")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsCmz
-
-       case ("hdiff")
-          nMon = nMon + 1; nMonMax = nMonMax + 1
-          tmpNames(nMon) = cgnsHdiffMax
-
-       case ("mach")
-          nMon = nMon + 1; nMonMax = nMonMax + 1
-          tmpNames(nMon) = cgnsMachMax
+            case ("mach")
+                nMon = nMon + 1; nMonMax = nMonMax + 1
+                tmpNames(nMon) = cgnsMachMax
+
+            case ("yplus")
+                nMon = nMon + 1; nMonMax = nMonMax + 1
+                tmpNames(nMon) = cgnsYplusMax
+
+            case ("eddyv")
+                nMon = nMon + 1; nMonMax = nMonMax + 1
+                tmpNames(nMon) = cgnsEddyMax
+
+            case ("totalr")
+                monTotalR = .True.
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = 'totalR'
+
+            case ("sepsensor")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsSepSensor
+
+            case ("cavitation")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsCavitation
+
+            case ("axismoment")
+                nMon = nMon + 1; nMonSum = nMonSum + 1
+                tmpNames(nMon) = cgnsAxisMoment
+
+            case default
+                write (errorMessage, "(3a)") "Unknown monitoring variable, ", &
+                    trim(keyword), ", specified"
+                if (myID == 0) &
+                    call terminate("monitorVariables", errorMessage)
+                call mpi_barrier(ADflow_comm_world, ierr)
 
-       case ("yplus")
-          nMon = nMon + 1; nMonMax = nMonMax + 1
-          tmpNames(nMon) = cgnsYplusMax
+            end select
 
-       case ("eddyv")
-          nMon = nMon + 1; nMonMax = nMonMax + 1
-          tmpNames(nMon) = cgnsEddyMax
+        end do
 
-       case("totalr")
-          monTotalR = .True.
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = 'totalR'
+        ! If the density residual was not specified to be monitored,
+        ! add it to tmpNames.
 
-       case("sepsensor")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsSepSensor
+        if (.not. monDrho) then
+            nMon = nMon + 1; nMonSum = nMonSum + 1
+            tmpNames(nMon) = cgnsL2resRho
+        end if
 
-       case("cavitation")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsCavitation
+        ! If the total residual was not specified to be monitored, add
+        ! it to tmpNames.
 
-       case("axismoment")
-          nMon = nMon + 1; nMonSum = nMonSum + 1
-          tmpNames(nMon) = cgnsAxisMoment
+        if (.not. monTotalR) then
+            nMon = nMon + 1; nMonSum = nMonSum + 1
+            tmpNames(nMon) = "totalR"
+        end if
 
-       case default
-          write(errorMessage,"(3a)") "Unknown monitoring variable, ", &
-               trim(keyword), ", specified"
-          if(myID == 0) &
-               call terminate("monitorVariables", errorMessage)
-          call mpi_barrier(ADflow_comm_world, ierr)
+        ! Allocate the memory for monNames. If the turbulent residuals
+        ! must be monitored allocate some extra place.
 
-       end select
+        pos = nMon
+        if (monDturb) pos = nMon + 4
+        allocate (monNames(pos), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("monitorVariables", &
+                           "Memory allocation failure for monNames")
 
-    enddo
+        ! Copy the monitoring names into monNames.
 
-    ! If the density residual was not specified to be monitored,
-    ! add it to tmpNames.
+        do pos = 1, nMon
+            monNames(pos) = tmpNames(pos)
+        end do
 
-    if(.not. monDrho) then
-       nMon = nMon + 1; nMonSum = nMonSum + 1
-       tmpNames(nMon) = cgnsL2resRho
-    endif
+        ! Set monitorSpecified to .true. to indicate that monitoring
+        ! variables have been specified.
 
-    ! If the total residual was not specified to be monitored, add
-    ! it to tmpNames.
+        monitorSpecified = .true.
 
-    if(.not. monTotalR) then
-       nMon = nMon + 1; nMonSum = nMonSum + 1
-       tmpNames(nMon) = "totalR"
-    endif
+    end subroutine monitorVariables
+    subroutine readCpTempCurveFits
+        !
+        !       readCpTempCurveFits reads the curve fits for the cp as a
+        !       function of the temperature from the file cpFile.
+        !
+        use constants
+        use communication, only: myid, adflow_comm_world
+        use cpCurveFits, only: cpTrange, cpTempFit, cpEint, cpHint, cvn, cv0, &
+                               cpNParts
+        use inputIO, only: cpFile
+        use utils, only: terminate
+        implicit none
 
+        ! Local variables.
 
-    ! Allocate the memory for monNames. If the turbulent residuals
-    ! must be monitored allocate some extra place.
+        integer, parameter :: readUnit = 32
 
-    pos = nMon
-    if( monDturb ) pos = nMon + 4
-    allocate(monNames(pos), stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("monitorVariables", &
-         "Memory allocation failure for monNames")
+        integer :: ios, ierr
 
-    ! Copy the monitoring names into monNames.
+        integer(kind=intType) :: nn, mm, kk, ii
+        real(kind=realType)   :: T1, T2, e0
 
-    do pos=1,nMon
-       monNames(pos) = tmpNames(pos)
-    enddo
+        character(len=2*maxStringLen) :: errorMessage
+        character(len=512)            :: string
 
-    ! Set monitorSpecified to .true. to indicate that monitoring
-    ! variables have been specified.
+        ! Open the file for reading and check if it went okay. If the file
+        ! is not found, processor 0 prints an error message.
 
-    monitorSpecified = .true.
+        open (unit=readUnit, file=cpFile, status="old", &
+              action="read", iostat=ios)
 
-  end subroutine monitorVariables
-  subroutine readCpTempCurveFits
-    !
-    !       readCpTempCurveFits reads the curve fits for the cp as a
-    !       function of the temperature from the file cpFile.
-    !
-    use constants
-    use communication, only : myid, adflow_comm_world
-    use cpCurveFits, only : cpTrange, cpTempFit, cpEint, cpHint, cvn, cv0, &
-         cpNParts
-    use inputIO, only : cpFile
-    use utils, only : terminate
-    implicit none
+        if (ios /= 0) then
 
-    ! Local variables.
+            write (errorMessage, *) "Cp curve fit file ", trim(cpFile), &
+                " not found."
+            if (myID == 0) &
+                call terminate("readCpTempCurveFits", errorMessage)
 
-    integer, parameter :: readUnit = 32
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
 
-    integer :: ios, ierr
+        ! Skip the comment lines and read the number of parts.
+        ! Check if a valid number is read.
 
-    integer(kind=intType) :: nn, mm, kk, ii
-    real(kind=realType)   :: T1, T2, e0
+        call findNextInfoLine(readUnit, string)
+        read (string, *) cpNparts
 
-    character(len=2*maxStringLen) :: errorMessage
-    character(len=512)            :: string
+        if (cpNparts <= 0) then
+            if (myID == 0) &
+                 call terminate("readCpTempCurveFits", &
+                 "Wrong number of temperature ranges in &
+                 &Cp curve fit file.")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
 
-    ! Open the file for reading and check if it went okay. If the file
-    ! is not found, processor 0 prints an error message.
+        ! Allocate the memory for the variables to store the curve fit
+        ! data.
 
-    open(unit=readUnit, file=cpFile, status="old", &
-         action="read", iostat=ios)
+        allocate (cpTrange(0:cpNparts), cpEint(0:cpNparts), &
+                  cpHint(0:cpNparts), cpTempFit(cpNparts), &
+                  stat=ierr)
+        if (ierr /= 0) &
+             call terminate("readCpTempCurveFits", &
+             "Memory allocation failure for cpTrange, &
+             &cpEint, cpHint and cpTempFit")
+
+        ! Loop over the number of temperature ranges.
+
+        nRanges: do nn = 1, cpNparts
 
-    if(ios /= 0) then
+            ! Find the next line with information and read the temperature
+            ! range.
+
+            call findNextInfoLine(readUnit, string)
+            read (string, *) T1, T2
+
+            ! If this is the first range, set the temperature range;
+            ! otherwise check if the lower boundary equals the upper
+            ! boundary of the previous range.
+
+            if (nn == 1) then
+                cpTrange(0) = T1
+                cpTrange(1) = T2
+            else
+                cpTrange(nn) = T2
 
-       write(errorMessage,*) "Cp curve fit file ", trim(cpFile), &
-            " not found."
-       if(myID == 0) &
-            call terminate("readCpTempCurveFits", errorMessage)
+                if (T1 /= cpTrange(nn - 1)) then
+                    if (myID == 0) &
+                        call terminate("readCpTempCurveFits", &
+                                       "Curve fit boundary not continuous")
+                    call mpi_barrier(ADflow_comm_world, ierr)
+                end if
+            end if
 
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
+            ! Read the number of points in the fit.
+
+            call findNextInfoLine(readUnit, string)
+            read (string, *) cpTempFit(nn)%nterm
+
+            ! Allocate the memory for the exponents and the constants.
+
+            ii = cpTempFit(nn)%nterm
+            allocate (cpTempFit(nn)%exponents(ii), &
+                      cpTempFit(nn)%constants(ii), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("readCpTempCurveFits", &
+                 "Memory allocation failure for exponents and &
+                 &constants")
 
-    ! Skip the comment lines and read the number of parts.
-    ! Check if a valid number is read.
+            ! Read the exponents from the file.
 
-    call findNextInfoLine(readUnit, string)
-    read(string,*) cpNparts
+            call findNextInfoLine(readUnit, string)
+            do ii = 1, cpTempFit(nn)%nterm
 
-    if(cpNparts <= 0) then
-       if(myID == 0)                           &
-            call terminate("readCpTempCurveFits", &
-            "Wrong number of temperature ranges in &
-            &Cp curve fit file.")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
+                ! Read the exponent from the string.
 
-    ! Allocate the memory for the variables to store the curve fit
-    ! data.
+                read (string, *) cpTempFit(nn)%exponents(ii)
 
-    allocate(cpTrange(0:cpNparts), cpEint(0:cpNparts),  &
-         cpHint(0:cpNparts),   cpTempFit(cpNparts), &
-         stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("readCpTempCurveFits", &
-         "Memory allocation failure for cpTrange, &
-         &cpEint, cpHint and cpTempFit")
+                ! Remove this value from the string if this is not the
+                ! last exponent to be read.
 
-    ! Loop over the number of temperature ranges.
+                if (ii < cpTempFit(nn)%nterm) then
+                    ios = index(string, " ")
+                    if (ios > 0) then
+                        string = string(ios:)
+                        string = adjustl(string)
+                        string = trim(string)
+                    else
+                        if (myID == 0) &
+                             call terminate("readCpTempCurveFits", &
+                             "Not enough exponents on line; &
+                             &Cp curve fit file not valid.")
+                        call mpi_barrier(ADflow_comm_world, ierr)
+                    end if
+                end if
+            end do
 
-    nRanges: do nn=1,cpNparts
+            ! Read the constants from the file.
 
-       ! Find the next line with information and read the temperature
-       ! range.
+            call findNextInfoLine(readUnit, string)
+            do ii = 1, cpTempFit(nn)%nterm
 
-       call findNextInfoLine(readUnit, string)
-       read(string,*) T1, T2
+                ! Read the constant from the string.
 
-       ! If this is the first range, set the temperature range;
-       ! otherwise check if the lower boundary equals the upper
-       ! boundary of the previous range.
+                read (string, *) cpTempFit(nn)%constants(ii)
 
-       if(nn == 1) then
-          cpTrange(0) = T1
-          cpTrange(1) = T2
-       else
-          cpTrange(nn) = T2
+                ! Remove this value from the string if this is not the
+                ! last constant to be read.
 
-          if(T1 /= cpTrange(nn-1)) then
-             if(myID == 0)                           &
-                  call terminate("readCpTempCurveFits", &
-                  "Curve fit boundary not continuous")
-             call mpi_barrier(ADflow_comm_world, ierr)
-          endif
-       endif
+                if (ii < cpTempFit(nn)%nterm) then
+                    ios = index(string, " ")
+                    if (ios > 0) then
+                        string = string(ios:)
+                        string = adjustl(string)
+                        string = trim(string)
+                    else
+                        if (myID == 0) &
+                             call terminate("readCpTempCurveFits", &
+                             "Not enough constants on line; &
+                             &Cp curve fit file not valid.")
+                        call mpi_barrier(ADflow_comm_world, ierr)
+                    end if
+                end if
+            end do
 
-       ! Read the number of points in the fit.
+        end do nRanges
 
-       call findNextInfoLine(readUnit, string)
-       read(string,*) cpTempFit(nn)%nterm
+        ! Close the file
 
-       ! Allocate the memory for the exponents and the constants.
-
-       ii = cpTempFit(nn)%nterm
-       allocate(cpTempFit(nn)%exponents(ii), &
-            cpTempFit(nn)%constants(ii), stat=ierr)
-       if(ierr /= 0)                           &
-            call terminate("readCpTempCurveFits", &
-            "Memory allocation failure for exponents and &
-            &constants")
+        close (unit=readUnit)
+        !
+        !       Compute the constants eint0, such that the internal energy is
+        !       a continous function of the temperature.
+        !
+        ! First for the first interval, such that at T = 0 Kelvin the
+        ! energy is also zero.
 
-       ! Read the exponents from the file.
+        T1 = cpTrange(0)
+        cv0 = -one          ! cv/R = cp/R - 1.0
+        e0 = -T1           ! e = integral of cv, not of cp.
 
-       call findNextInfoLine(readUnit, string)
-       do ii=1,cpTempFit(nn)%nterm
+        do ii = 1, cpTempFit(1)%nterm
 
-          ! Read the exponent from the string.
+            ! Update cv0.
 
-          read(string,*) cpTempFit(nn)%exponents(ii)
+            T2 = T1**(cpTempFit(1)%exponents(ii))
+            cv0 = cv0 + cpTempFit(1)%constants(ii)*T2
 
-          ! Remove this value from the string if this is not the
-          ! last exponent to be read.
+            ! Update e0, for which this contribution must be integrated.
+            ! Take the exceptional case exponent is -1 into account.
 
-          if(ii < cpTempFit(nn)%nterm) then
-             ios = index(string," ")
-             if(ios > 0) then
-                string = string(ios:)
-                string = adjustl(string)
-                string = trim(string)
-             else
-                if(myID == 0)                           &
-                     call terminate("readCpTempCurveFits", &
-                     "Not enough exponents on line; &
-                     &Cp curve fit file not valid.")
-                call mpi_barrier(ADflow_comm_world, ierr)
-             endif
-          endif
-       enddo
+            if (cpTempFit(1)%exponents(ii) == -1_intType) then
+                e0 = e0 + cpTempFit(1)%constants(ii)*log(T1)
+            else
+                T2 = T1*T2
+                e0 = e0 + cpTempFit(1)%constants(ii)*T2 &
+                     /(cpTempFit(1)%exponents(ii) + 1)
+            end if
 
-       ! Read the constants from the file.
+        end do
 
-       call findNextInfoLine(readUnit, string)
-       do ii=1,cpTempFit(nn)%nterm
+        ! Set the value of the internal energy at the temperature T1.
+        ! Cv is assumed to be constant in the temperature range 0 - T1.
+        ! Idem for the internal enthalpy.
 
-          ! Read the constant from the string.
+        cpEint(0) = cv0*T1
+        cpHint(0) = cpEint(0) + T1
 
-          read(string,*) cpTempFit(nn)%constants(ii)
+        ! Compute the integration constant for the energy.
 
-          ! Remove this value from the string if this is not the
-          ! last constant to be read.
-
-          if(ii < cpTempFit(nn)%nterm) then
-             ios = index(string," ")
-             if(ios > 0) then
-                string = string(ios:)
-                string = adjustl(string)
-                string = trim(string)
-             else
-                if(myID == 0)                           &
-                     call terminate("readCpTempCurveFits", &
-                     "Not enough constants on line; &
-                     &Cp curve fit file not valid.")
-                call mpi_barrier(ADflow_comm_world, ierr)
-             endif
-          endif
-       enddo
+        cpTempFit(1)%eint0 = cpEint(0) - e0
 
-    enddo nRanges
+        ! Loop over the other temperature ranges to compute their
+        ! integration constant and the energy at the curve fit boundary.
 
-    ! Close the file
+        nRanges2: do nn = 2, cpNparts
 
-    close(unit=readUnit)
-    !
-    !       Compute the constants eint0, such that the internal energy is
-    !       a continous function of the temperature.
-    !
-    ! First for the first interval, such that at T = 0 Kelvin the
-    ! energy is also zero.
+            ! Store nn-1, the previous temperature range, in mm.
 
-    T1  =  cpTrange(0)
-    cv0 = -one          ! cv/R = cp/R - 1.0
-    e0  = -T1           ! e = integral of cv, not of cp.
+            mm = nn - 1
 
-    do ii=1,cpTempFit(1)%nterm
+            ! Store the temperature at the interface a bit easier.
 
-       ! Update cv0.
+            T1 = cpTrange(mm)
 
-       T2  = T1**(cpTempFit(1)%exponents(ii))
-       cv0 = cv0 + cpTempFit(1)%constants(ii)*T2
+            ! First compute the internal energy (scaled by r) from the
+            ! previous range. Actually not the energy but the enthalpy is
+            ! computed. This leads to the same integraton constant.
+            ! Again check for exponent -1 when integrating.
 
-       ! Update e0, for which this contribution must be integrated.
-       ! Take the exceptional case exponent is -1 into account.
+            e0 = cpTempFit(mm)%eint0
 
-       if(cpTempFit(1)%exponents(ii) == -1_intType) then
-          e0 = e0 + cpTempFit(1)%constants(ii)*log(T1)
-       else
-          T2 = T1*T2
-          e0 = e0 + cpTempFit(1)%constants(ii)*T2 &
-               / (cpTempFit(1)%exponents(ii) + 1)
-       endif
+            do ii = 1, cpTempFit(mm)%nterm
+                if (cpTempFit(mm)%exponents(ii) == -1_intType) then
+                    e0 = e0 + cpTempFit(mm)%constants(ii)*log(T1)
+                else
+                    kk = cpTempFit(mm)%exponents(ii) + 1
+                    T2 = T1**kk
+                    e0 = e0 + cpTempFit(mm)%constants(ii)*T2/kk
+                end if
+            end do
 
-    enddo
+            ! Store the enthalpy and energy at the curve fit boundary.
+            ! Remember that cp was integrated.
 
-    ! Set the value of the internal energy at the temperature T1.
-    ! Cv is assumed to be constant in the temperature range 0 - T1.
-    ! Idem for the internal enthalpy.
+            cpHint(mm) = e0
+            cpEint(mm) = e0 - T1
 
-    cpEint(0) = cv0*T1
-    cpHint(0) = cpEint(0) + T1
+            ! Substract the part coming from the integration of cp/r of
+            ! the range nn.
 
-    ! Compute the integration constant for the energy.
+            do ii = 1, cpTempFit(nn)%nterm
+                if (cpTempFit(nn)%exponents(ii) == -1_intType) then
+                    e0 = e0 - cpTempFit(nn)%constants(ii)*log(T1)
+                else
+                    kk = cpTempFit(nn)%exponents(ii) + 1
+                    T2 = T1**kk
+                    e0 = e0 - cpTempFit(nn)%constants(ii)*T2/kk
+                end if
+            end do
 
-    cpTempFit(1)%eint0 = cpEint(0) - e0
+            ! Store the integration constant for the range nn.
 
-    ! Loop over the other temperature ranges to compute their
-    ! integration constant and the energy at the curve fit boundary.
+            cpTempFit(nn)%eint0 = e0
 
-    nRanges2: do nn=2,cpNparts
+        end do nRanges2
 
-       ! Store nn-1, the previous temperature range, in mm.
+        ! Compute the values of cv and the internal energy at the upper
+        ! boundary of the curve fit. This is needed for the extrapolation
+        ! of the energy if states occur with a higher temperature than
+        ! the validness of the curve fits.
 
-       mm = nn - 1
+        ! First initialize these values.
 
-       ! Store the temperature at the interface a bit easier.
+        nn = cpNparts
+        T1 = cpTrange(nn)
+        cvn = -one                       ! cv/R = cp/R - 1.0
+        e0 = cpTempFit(nn)%eint0 - T1  ! e = integral of cv, not of cp.
 
-       T1 = cpTrange(mm)
+        do ii = 1, cpTempFit(nn)%nterm
 
-       ! First compute the internal energy (scaled by r) from the
-       ! previous range. Actually not the energy but the enthalpy is
-       ! computed. This leads to the same integraton constant.
-       ! Again check for exponent -1 when integrating.
+            ! Update cvn.
 
-       e0 = cpTempFit(mm)%eint0
+            T2 = T1**(cpTempFit(nn)%exponents(ii))
+            cvn = cvn + cpTempFit(nn)%constants(ii)*T2
 
-       do ii=1,cpTempFit(mm)%nterm
-          if(cpTempFit(mm)%exponents(ii) == -1_intType) then
-             e0 = e0 + cpTempFit(mm)%constants(ii)*log(T1)
-          else
-             kk = cpTempFit(mm)%exponents(ii) + 1
-             T2 = T1**kk
-             e0 = e0 + cpTempFit(mm)%constants(ii)*T2/kk
-          endif
-       enddo
+            ! Update e0, for which this contribution must be integrated.
+            ! Take the exceptional case exponent is -1 into account.
 
-       ! Store the enthalpy and energy at the curve fit boundary.
-       ! Remember that cp was integrated.
+            if (cpTempFit(nn)%exponents(ii) == -1_intType) then
+                e0 = e0 + cpTempFit(nn)%constants(ii)*log(T1)
+            else
+                e0 = e0 + cpTempFit(nn)%constants(ii)*T2*T1 &
+                     /(cpTempFit(nn)%exponents(ii) + 1)
+            end if
 
-       cpHint(mm) = e0
-       cpEint(mm) = e0 - T1
+        end do
 
-       ! Substract the part coming from the integration of cp/r of
-       ! the range nn.
+        ! Store e0 correctly.
 
-       do ii=1,cpTempFit(nn)%nterm
-          if(cpTempFit(nn)%exponents(ii) == -1_intType) then
-             e0 = e0 - cpTempFit(nn)%constants(ii)*log(T1)
-          else
-             kk = cpTempFit(nn)%exponents(ii) + 1
-             T2 = T1**kk
-             e0 = e0 - cpTempFit(nn)%constants(ii)*T2/kk
-          endif
-       enddo
+        cpEint(nn) = e0
+        cpHint(nn) = e0 + T1
 
-       ! Store the integration constant for the range nn.
+        ! Compute the values of the integrands of cp/(R*T) at the lower
+        ! and upper curve fit boundary. This is needed to compute the
+        ! total pressure. This cannot be done with a single integration
+        ! constant, because of the singularity at T = 0.
 
-       cpTempFit(nn)%eint0 = e0
+        nRanges3: do nn = 1, cpNparts
 
-    enddo nRanges2
+            ! Store the temperatures of the lower and upper boundary a
+            ! bit easier.
 
-    ! Compute the values of cv and the internal energy at the upper
-    ! boundary of the curve fit. This is needed for the extrapolation
-    ! of the energy if states occur with a higher temperature than
-    ! the validness of the curve fits.
+            T1 = cpTrange(nn - 1)
+            T2 = cpTrange(nn)
 
-    ! First initialize these values.
+            ! Initializes the integrands to zero.
 
-    nn  =  cpNparts
-    T1  =  cpTrange(nn)
-    cvn = -one                       ! cv/R = cp/R - 1.0
-    e0  =  cpTempFit(nn)%eint0 - T1  ! e = integral of cv, not of cp.
+            cpTempFit(nn)%intCpovrT_1 = zero
+            cpTempFit(nn)%intCpovrT_2 = zero
 
-    do ii=1,cpTempFit(nn)%nterm
+            ! Loop over the number of terms of the curve fits and compute
+            ! the integral cp/(r*t).
 
-       ! Update cvn.
+            do ii = 1, cpTempFit(nn)%nterm
 
-       T2  = T1**(cpTempFit(nn)%exponents(ii))
-       cvn = cvn + cpTempFit(nn)%constants(ii)*T2
+                ! Store the coefficient a bit easier in mm. As the integral
+                ! of cp/(R*T) must be computed, this is also the exponent
+                ! of the primitive function; except of course when the exponent
+                ! is 0.
 
-       ! Update e0, for which this contribution must be integrated.
-       ! Take the exceptional case exponent is -1 into account.
+                mm = cpTempFit(nn)%exponents(ii)
 
-       if(cpTempFit(nn)%exponents(ii) == -1_intType) then
-          e0 = e0 + cpTempFit(nn)%constants(ii)*log(T1)
-       else
-          e0 = e0 + cpTempFit(nn)%constants(ii)*T2*T1 &
-               /     (cpTempFit(nn)%exponents(ii) + 1)
-       endif
+                ! Update the integrands if the temperature is larger than
+                ! 0 kelvin. In case the boundary is 0 kelvin the value is not
+                ! needed anyway.
 
-    enddo
+                if (T1 > zero) then
+                    if (mm == 0_intType) then
+                        cpTempFit(nn)%intCpovrT_1 = cpTempFit(nn)%intCpovrT_1 &
+                                                    + cpTempFit(nn)%constants(ii)*log(T1)
+                    else
+                        cpTempFit(nn)%intCpovrT_1 = cpTempFit(nn)%intCpovrT_1 &
+                                                    + (cpTempFit(nn)%constants(ii)*T1**mm)/mm
+                    end if
+                end if
 
-    ! Store e0 correctly.
+                if (T2 > zero) then
+                    if (mm == 0_intType) then
+                        cpTempFit(nn)%intCpovrT_2 = cpTempFit(nn)%intCpovrT_2 &
+                                                    + cpTempFit(nn)%constants(ii)*log(T2)
+                    else
+                        cpTempFit(nn)%intCpovrT_2 = cpTempFit(nn)%intCpovrT_2 &
+                                                    + (cpTempFit(nn)%constants(ii)*T2**mm)/mm
+                    end if
+                end if
 
-    cpEint(nn) = e0
-    cpHint(nn) = e0 + T1
+            end do
 
-    ! Compute the values of the integrands of cp/(R*T) at the lower
-    ! and upper curve fit boundary. This is needed to compute the
-    ! total pressure. This cannot be done with a single integration
-    ! constant, because of the singularity at T = 0.
+        end do nRanges3
 
-    nRanges3: do nn=1,cpNparts
+    end subroutine readCpTempCurveFits
 
-       ! Store the temperatures of the lower and upper boundary a
-       ! bit easier.
+    !      ==================================================================
 
-       T1 = cpTrange(nn-1)
-       T2 = cpTrange(nn)
+    subroutine findNextInfoLine(readUnit, string)
+        !
+        !       findNextInfoLine skips the comment lines in the given unit
+        !       and finds the first line containing information.
+        !
+        use communication
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in)             :: readUnit
+        character(len=512), intent(out) :: string
+        !
+        !      Local variables.
+        !
+        integer :: ios, ierr
 
-       ! Initializes the integrands to zero.
+        ! Loop to skip the comment lines.
 
-       cpTempFit(nn)%intCpovrT_1 = zero
-       cpTempFit(nn)%intCpovrT_2 = zero
+        do
+            read (unit=readUnit, fmt="(a512)", iostat=ios) string
 
-       ! Loop over the number of terms of the curve fits and compute
-       ! the integral cp/(r*t).
-
-       do ii=1,cpTempFit(nn)%nterm
-
-          ! Store the coefficient a bit easier in mm. As the integral
-          ! of cp/(R*T) must be computed, this is also the exponent
-          ! of the primitive function; except of course when the exponent
-          ! is 0.
-
-          mm = cpTempFit(nn)%exponents(ii)
-
-          ! Update the integrands if the temperature is larger than
-          ! 0 kelvin. In case the boundary is 0 kelvin the value is not
-          ! needed anyway.
+            ! Test if everything went okay.
+
+            if (ios /= 0) then
+                if (myID == 0) &
+                    call terminate("findNextInfoLine", &
+                                   "Unexpected end of Cp curve fit file")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-          if(T1 > zero) then
-             if(mm == 0_intType) then
-                cpTempFit(nn)%intCpovrT_1 = cpTempFit(nn)%intCpovrT_1 &
-                     + cpTempFit(nn)%constants(ii)*log(T1)
-             else
-                cpTempFit(nn)%intCpovrT_1 = cpTempFit(nn)%intCpovrT_1 &
-                     + (cpTempFit(nn)%constants(ii)*T1**mm)/mm
-             endif
-          endif
+            ! Get rid of the leading and trailing spaces in string.
 
-          if(T2 > zero) then
-             if(mm == 0_intType) then
-                cpTempFit(nn)%intCpovrT_2 = cpTempFit(nn)%intCpovrT_2 &
-                     + cpTempFit(nn)%constants(ii)*log(T2)
-             else
-                cpTempFit(nn)%intCpovrT_2 = cpTempFit(nn)%intCpovrT_2 &
-                     + (cpTempFit(nn)%constants(ii)*T2**mm)/mm
-             endif
-          endif
+            string = adjustl(string)
+            string = trim(string)
 
-       enddo
+            ! Check if this is the correct line. If so, exit
 
-    enddo nRanges3
+            if ((len_trim(string) > 0) .and. (string(:1) /= "#")) exit
+        end do
 
-  end subroutine readCpTempCurveFits
+    end subroutine findNextInfoLine
 
-  !      ==================================================================
+    subroutine setEquationParameters
+        !
+        !       setEquationParameters sets the number of variables in the
+        !       governing equations, the number of turbulent variables, etc.
+        !
+        use constants
+        use paramTurb
+        use turbCurveFits
+        use flowVarRefState, only: nw, nwf, nt1, nt2, nwt, viscous, &
+                                   eddyModel, kPresent
+        use inputPhysics, only: equations, turbModel, wallFunctions, rvfN
+        implicit none
 
-  subroutine findNextInfoLine(readUnit, string)
-    !
-    !       findNextInfoLine skips the comment lines in the given unit
-    !       and finds the first line containing information.
-    !
-    use communication
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in)             :: readUnit
-    character(len=512), intent(out) :: string
-    !
-    !      Local variables.
-    !
-    integer :: ios, ierr
+        ! Set the number of flow variables to 5, nt1 to 6. This is valid
+        ! for all governing equations. Furthermore initialize viscous,
+        ! kPresent and eddyModel to .False., which indicates an inviscid
+        ! computation. For ns and rans this will be corrected.
 
-    ! Loop to skip the comment lines.
+        nwf = 5
+        nt1 = 6
 
-    do
-       read(unit=readUnit, fmt="(a512)", iostat=ios) string
+        viscous = .false.
+        kPresent = .false.
+        eddyModel = .false.
 
-       ! Test if everything went okay.
+        ! Determine the set of governing equations to solve for and set
+        ! the parameters accordingly.
 
-       if(ios /= 0) then
-          if(myID == 0)                        &
-               call terminate("findNextInfoLine", &
-               "Unexpected end of Cp curve fit file")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
+        select case (equations)
+        case (EulerEquations)
+            nw = 5
+            nt2 = 5
 
-       ! Get rid of the leading and trailing spaces in string.
+            !===============================================================
 
-       string = adjustl(string)
-       string = trim(string)
+        case (NSEquations)
+            nw = 5
+            nt2 = 5
 
-       ! Check if this is the correct line. If so, exit
+            viscous = .true.
 
-       if((len_trim(string) > 0) .and. (string(:1) /= "#")) exit
-    enddo
+            !===============================================================
 
-  end subroutine findNextInfoLine
+        case (RANSEquations)
 
-  subroutine setEquationParameters
-    !
-    !       setEquationParameters sets the number of variables in the
-    !       governing equations, the number of turbulent variables, etc.
-    !
-    use constants
-    use paramTurb
-    use turbCurveFits
-    use flowVarRefState, only : nw, nwf, nt1, nt2, nwt, viscous, &
-         eddyModel, kPresent
-    use inputPhysics, only : equations, turbModel, wallFunctions, rvfN
-    implicit none
+            viscous = .true.
 
-    ! Set the number of flow variables to 5, nt1 to 6. This is valid
-    ! for all governing equations. Furthermore initialize viscous,
-    ! kPresent and eddyModel to .False., which indicates an inviscid
-    ! computation. For ns and rans this will be corrected.
+            select case (turbModel)
 
-    nwf = 5
-    nt1 = 6
+            case (spalartAllmaras)
+                nw = 6
+                nt2 = 6
 
-    viscous    = .false.
-    kPresent  = .false.
-    eddyModel = .false.
+                eddyModel = .true.
+                if (wallFunctions) call initCurveFitDataSa
 
-    ! Determine the set of governing equations to solve for and set
-    ! the parameters accordingly.
+                !===========================================================
 
-    select case (equations)
-    case (EulerEquations)
-       nw  = 5
-       nt2 = 5
+            case (spalartAllmarasEdwards)
+                nw = 6
+                nt2 = 6
 
-       !===============================================================
+                eddyModel = .true.
+                if (wallFunctions) call initCurveFitDataSae
 
-    case (NSEquations)
-       nw  = 5
-       nt2 = 5
+                !===========================================================
 
-       viscous = .true.
+            case (komegaWilcox)
+                nw = 7
+                nt2 = 7
 
-       !===============================================================
+                kPresent = .true.
+                eddyModel = .true.
+                if (wallFunctions) call initCurveFitDataKw
 
-    case (RANSEquations)
+                !===========================================================
 
-       viscous = .true.
+            case (komegaModified)
+                nw = 7
+                nt2 = 7
 
-       select case(turbModel)
+                kPresent = .true.
+                eddyModel = .true.
+                if (wallFunctions) call initCurveFitDataKwMod
 
-       case (spalartAllmaras)
-          nw  = 6
-          nt2 = 6
+                !===========================================================
 
-          eddyModel = .true.
-          if( wallFunctions ) call initCurveFitDataSa
+            case (menterSST)
+                nw = 7
+                nt2 = 7
 
-          !===========================================================
+                kPresent = .true.
+                eddyModel = .true.
+                if (wallFunctions) call initCurveFitDataSST
 
-       case (spalartAllmarasEdwards)
-          nw  = 6
-          nt2 = 6
+                !===========================================================
 
-          eddyModel = .true.
-          if( wallFunctions ) call initCurveFitDataSae
+            case (ktau)
+                nw = 7
+                nt2 = 7
 
-          !===========================================================
+                kPresent = .true.
+                eddyModel = .true.
+                if (wallFunctions) call initCurveFitDataKtau
 
-       case (komegaWilcox)
-          nw  = 7
-          nt2 = 7
+                !===========================================================
 
-          kPresent  = .true.
-          eddyModel = .true.
-          if( wallFunctions ) call initCurveFitDataKw
+            case (v2f)
+                nw = 9
+                nt2 = 9
 
-          !===========================================================
+                rvfLimitK = 1.e-25_realType
+                rvfLimitE = 1.e-25_realType
 
-       case (komegaModified)
-          nw  = 7
-          nt2 = 7
+                if (rvfN == 6) then
+                    rvfCmu = rvfN6Cmu
+                    rvfCl = rvfN6Cl
+                else
+                    rvfCmu = rvfN1Cmu
+                    rvfCl = rvfN1Cl
+                end if
 
-          kPresent  = .true.
-          eddyModel = .true.
-          if( wallFunctions ) call initCurveFitDataKwMod
+                kPresent = .true.
+                eddyModel = .true.
+                if (wallFunctions) call initCurveFitDataVf
 
-          !===========================================================
+            end select
 
-       case (menterSST)
-          nw  = 7
-          nt2 = 7
+        end select
 
-          kPresent  = .true.
-          eddyModel = .true.
-          if( wallFunctions ) call initCurveFitDataSST
+        ! Determine the number of turbulent variables.
 
-          !===========================================================
+        nwt = nw - nwf
 
-       case (ktau)
-          nw  = 7
-          nt2 = 7
+    end subroutine setEquationParameters
+    subroutine setStageCoeffExplicitRK
+        !
+        !       setStageCoeffExplicitRK determines the coefficients of the
+        !       stages for the explicit Runge Kutta time integration schemes
+        !       for unsteady problems.
+        !
+        use constants
+        use inputUnsteady, only: timeAccuracy, betaRKUnsteady, &
+                                 gammaRKUnsteady, nRKStagesUnsteady
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-          kPresent  = .true.
-          eddyModel = .true.
-          if( wallFunctions ) call initCurveFitDataKtau
+        ! Determine the number of Runge Kutta stages as a function of
+        ! the accuracy.
 
-          !===========================================================
+        select case (timeAccuracy)
+        case (firstOrder)
+            nRKStagesUnsteady = 1
 
-       case (v2f)
-          nw  = 9
-          nt2 = 9
+        case (secondOrder)
+            nRKStagesUnsteady = 2
 
-          rvfLimitK = 1.e-25_realType
-          rvfLimitE = 1.e-25_realType
+        case (thirdOrder)
+            nRKStagesUnsteady = 3
 
-          if(rvfN == 6) then
-             rvfCmu   = rvfN6Cmu
-             rvfCl    = rvfN6Cl
-          else
-             rvfCmu   = rvfN1Cmu
-             rvfCl    = rvfN1Cl
-          endif
+        case default
+            call terminate("setStageCoeffExplicitRK", &
+                           "No higher order stuff yet")
+        end select
 
-          kPresent  = .true.
-          eddyModel = .true.
-          if( wallFunctions ) call initCurveFitDataVf
+        ! Allocate and determine betaRKUnsteady and gammaRKUnsteady.
 
-       end select
+        allocate (betaRKUnsteady(nRKStagesUnsteady, nRKStagesUnsteady), &
+                  gammaRKUnsteady(nRKStagesUnsteady), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("setStageCoeffExplicitRK", &
+             "Memory allocation failure for betaRKUnsteady &
+             &and gammaRKUnsteady.")
 
-    end select
+        betaRKUnsteady = zero
 
-    ! Determine the number of turbulent variables.
+        select case (timeAccuracy)
+        case (firstOrder)
 
-    nwt = nw - nwf
+            ! Just the forward Euler time integration scheme.
 
-  end subroutine setEquationParameters
-  subroutine setStageCoeffExplicitRK
-    !
-    !       setStageCoeffExplicitRK determines the coefficients of the
-    !       stages for the explicit Runge Kutta time integration schemes
-    !       for unsteady problems.
-    !
-    use constants
-    use inputUnsteady, only : timeAccuracy, betaRKUnsteady, &
-         gammaRKUnsteady, nRKStagesUnsteady
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+            betaRKUnsteady(1, 1) = 1.0_realType
+            gammaRKUnsteady(1) = 0.0_realType
+
+            !==============================================================
+
+        case (secondOrder)
+
+            ! The TVD Runge Kutta scheme which allows for the maximum
+            ! CFL number (1.0).
+
+            betaRKUnsteady(1, 1) = 1.0_realType
+            betaRKUnsteady(2, 1) = -0.5_realType
+            betaRKUnsteady(2, 2) = 0.5_realType
+
+            gammaRKUnsteady(1) = 0.0_realType
+            gammaRKUnsteady(2) = 1.0_realType
+
+            !==============================================================
+
+        case (thirdOrder)
+
+            ! Low storage (although not exploited in this implemetation)
+            ! 3 stage scheme of Le and Moin.
+
+            betaRKUnsteady(1, 1) = 8.0_realType/15.0_realType
+            betaRKUnsteady(2, 1) = -17.0_realType/60.0_realType
+            betaRKUnsteady(2, 2) = 5.0_realType/12.0_realType
+            betaRKUnsteady(3, 2) = -5.0_realType/12.0_realType
+            betaRKUnsteady(3, 3) = 3.0_realType/4.0_realType
+
+            gammaRKUnsteady(1) = 0.0_realType
+            gammaRKUnsteady(2) = 8.0_realType/15.0_realType
+            gammaRKUnsteady(3) = 2.0_realType/3.0_realType
+
+            ! The TVD Runge Kutta scheme which allows for the maximum
+            ! CFL number (1.0).
 
-    ! Determine the number of Runge Kutta stages as a function of
-    ! the accuracy.
+            ! betaRKUnsteady(1,1) =  1.0_realType
+            ! betaRKUnsteady(2,1) = -3.0_realType/ 4.0_realType
+            ! betaRKUnsteady(2,2) =  1.0_realType/ 4.0_realType
+            ! betaRKUnsteady(3,1) = -1.0_realType/12.0_realType
+            ! betaRKUnsteady(3,2) = -1.0_realType/12.0_realType
+            ! betaRKUnsteady(3,3) =  2.0_realType/ 3.0_realType
+
+            ! gammaRKUnsteady(1)  = 0.0_realType
+            ! gammaRKUnsteady(2)  = 1.0_realType
+            ! gammaRKUnsteady(3)  = 0.5_realType
+
+            !==============================================================
+
+        case default
+            call terminate("setStageCoeffExplicitRK", &
+                           "No higher order stuff yet")
+        end select
+
+    end subroutine setStageCoeffExplicitRK
+    subroutine surfaceVariables(variables)
+        !
+        !       surfaceVariables extracts from the given string the surface
+        !       variables to be written to the solution file.
+        !
+        use constants
+        use extraOutput
+        use utils, only: convertToLowerCase, terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        character(len=*), intent(inout) :: variables
+        !
+        !      Local variables.
+        !
+        integer :: nVarSpecified, pos
 
-    select case (timeAccuracy)
-    case (firstOrder)
-       nRKStagesUnsteady = 1
+        character(len=15) :: keyword
+        character(len=maxStringLen) :: errorMessage
 
-    case (secondOrder)
-       nRKStagesUnsteady = 2
+        ! Convert the string variables to lower case.
 
-    case (thirdOrder)
-       nRKStagesUnsteady = 3
+        call convertToLowerCase(variables)
 
-    case default
-       call terminate("setStageCoeffExplicitRK", &
-            "No higher order stuff yet")
-    end select
+        ! Initialize all the surface output variables to .false.
 
-    ! Allocate and determine betaRKUnsteady and gammaRKUnsteady.
+        surfWriteRho = .false.
+        surfWriteP = .false.
+        surfWriteTemp = .false.
+        surfWriteVx = .false.
+        surfWriteVy = .false.
+        surfWriteVz = .false.
+        surfWriteRVx = .false.
+        surfWriteRVy = .false.
+        surfWriteRVz = .false.
 
-    allocate(betaRKUnsteady(nRKStagesUnsteady,nRKStagesUnsteady), &
-         gammaRKUnsteady(nRKStagesUnsteady), stat=ierr)
-    if(ierr /= 0)                               &
-         call terminate("setStageCoeffExplicitRK", &
-         "Memory allocation failure for betaRKUnsteady &
-         &and gammaRKUnsteady.")
+        surfWriteCp = .false.
+        surfWritePtotloss = .false.
+        surfWriteMach = .false.
+        surfWriteRMach = .false.
 
-    betaRKUnsteady = zero
+        surfWriteCf = .false.
+        surfWriteCh = .false.
+        surfWriteYplus = .false.
+        surfWriteCfx = .false.
+        surfWriteCfy = .false.
+        surfWriteCfz = .false.
+
+        surfWriteBlank = .false.
+        surfWriteSepSensor = .false.
+        surfWriteCavitation = .false.
+        surfWriteAxisMoment = .false.
+        surfWriteGC = .false.
+
+        ! Initialize nVarSpecified to 0. This serves as a test
+        ! later on.
+
+        nVarSpecified = 0
+
+        ! Loop to extract the info from the string variables.
+
+        do
+            ! Condition to exit the loop.
+
+            if (len_trim(variables) == 0) exit
+
+            ! Locate the first occurance of the _ in the string and
+            ! determine the string keyword.
+
+            pos = index(variables, "_")
+            if (pos == 0) then
+                keyword = variables
+                variables = ""
+            else
+                keyword = variables(:pos - 1)
+                variables = variables(pos + 1:)
+            end if
+
+            ! Check the keyword.
+
+            select case (keyword)
+            case ("")
+                ! Multiple occurence of "_". Just ignore it.
+
+            case ("rho")
+                surfWriteRho = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("p")
+                surfWriteP = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("temp")
+                surfWriteTemp = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("vx")
+                surfWriteVx = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("vy")
+                surfWriteVy = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("vz")
+                surfWriteVz = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("rvx")
+                surfWriteRVx = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("rvy")
+                surfWriteRVy = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("rvz")
+                surfWriteRVz = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("cp")
+                surfWriteCp = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("ptloss")
+                surfWritePtotloss = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("mach")
+                surfWriteMach = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("rmach")
+                surfWriteRMach = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("cf")
+                surfWriteCf = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("ch")
+                surfWriteCh = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("yplus")
+                surfWriteYplus = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("cfx")
+                surfWriteCfx = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("cfy")
+                surfWriteCfy = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("cfz")
+                surfWriteCfz = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("blank")
+                surfWriteBlank = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("sepsensor")
+                surfWriteSepSensor = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("cavitation")
+                surfWriteCavitation = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    select case (timeAccuracy)
-    case (firstOrder)
+            case ("axismoment")
+                surfWriteAxisMoment = .true.
+                nVarSpecified = nVarSpecified + 1
 
-       ! Just the forward Euler time integration scheme.
+            case ("gc")
+                surfWriteGC = .True.
+                nVarSpecified = nVarSpecified + 1
 
-       betaRKUnsteady(1,1) = 1.0_realType
-       gammaRKUnsteady(1)  = 0.0_realType
-
-       !==============================================================
-
-    case (secondOrder)
-
-       ! The TVD Runge Kutta scheme which allows for the maximum
-       ! CFL number (1.0).
-
-       betaRKUnsteady(1,1) =  1.0_realType
-       betaRKUnsteady(2,1) = -0.5_realType
-       betaRKUnsteady(2,2) =  0.5_realType
-
-       gammaRKUnsteady(1)  = 0.0_realType
-       gammaRKUnsteady(2)  = 1.0_realType
-
-       !==============================================================
-
-    case (thirdOrder)
-
-       ! Low storage (although not exploited in this implemetation)
-       ! 3 stage scheme of Le and Moin.
-
-       betaRKUnsteady(1,1) =   8.0_realType/15.0_realType
-       betaRKUnsteady(2,1) = -17.0_realType/60.0_realType
-       betaRKUnsteady(2,2) =   5.0_realType/12.0_realType
-       betaRKUnsteady(3,2) =  -5.0_realType/12.0_realType
-       betaRKUnsteady(3,3) =   3.0_realType/ 4.0_realType
-
-       gammaRKUnsteady(1)  = 0.0_realType
-       gammaRKUnsteady(2)  = 8.0_realType/15.0_realType
-       gammaRKUnsteady(3)  = 2.0_realType/ 3.0_realType
-
-       ! The TVD Runge Kutta scheme which allows for the maximum
-       ! CFL number (1.0).
+            case default
+                pos = len_trim(keyword)
+                write (errorMessage, "(3a)") "Unknown surface output &
+                     &variable, ", trim(keyword), &
+                     ", specified"
+                call terminate("surfaceVariables", errorMessage)
 
-       ! betaRKUnsteady(1,1) =  1.0_realType
-       ! betaRKUnsteady(2,1) = -3.0_realType/ 4.0_realType
-       ! betaRKUnsteady(2,2) =  1.0_realType/ 4.0_realType
-       ! betaRKUnsteady(3,1) = -1.0_realType/12.0_realType
-       ! betaRKUnsteady(3,2) = -1.0_realType/12.0_realType
-       ! betaRKUnsteady(3,3) =  2.0_realType/ 3.0_realType
-
-       ! gammaRKUnsteady(1)  = 0.0_realType
-       ! gammaRKUnsteady(2)  = 1.0_realType
-       ! gammaRKUnsteady(3)  = 0.5_realType
-
-       !==============================================================
-
-    case default
-       call terminate("setStageCoeffExplicitRK", &
-            "No higher order stuff yet")
-    end select
-
-  end subroutine setStageCoeffExplicitRK
-  subroutine surfaceVariables(variables)
-    !
-    !       surfaceVariables extracts from the given string the surface
-    !       variables to be written to the solution file.
-    !
-    use constants
-    use extraOutput
-    use utils, only : convertToLowerCase, terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    character(len=*), intent(inout) :: variables
-    !
-    !      Local variables.
-    !
-    integer :: nVarSpecified, pos
+            end select
+
+        end do
 
-    character(len=15) :: keyword
-    character(len=maxStringLen) :: errorMessage
+        ! Set surfaceOutSpecified to .true. if variables were specified.
+        ! If not, later on the defaults will be set.
 
-    ! Convert the string variables to lower case.
+        if (nVarSpecified > 0) surfaceOutSpecified = .true.
 
-    call convertToLowerCase(variables)
+    end subroutine surfaceVariables
 
-    ! Initialize all the surface output variables to .false.
+    subroutine volumeVariables(variables)
+        !
+        !       volumeVariables extracts from the given string the extra
+        !       volume variables to be written to the solution file.
+        !
+        use constants
+        use extraOutput
+        use utils, only: convertToLowerCase, terminate
+        use inputPhysics, only: useRoughSA
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        character(len=*), intent(inout) :: variables
+        !
+        !      Local variables.
+        !
+        integer :: nVarSpecified, pos
+
+        character(len=15) :: keyword
+        character(len=maxStringLen) :: errorMessage
 
-    surfWriteRho  = .false.
-    surfWriteP    = .false.
-    surfWriteTemp = .false.
-    surfWriteVx   = .false.
-    surfWriteVy   = .false.
-    surfWriteVz   = .false.
-    surfWriteRVx   = .false.
-    surfWriteRVy   = .false.
-    surfWriteRVz   = .false.
+        ! Convert the string variables to lower case.
 
-    surfWriteCp       = .false.
-    surfWritePtotloss = .false.
-    surfWriteMach     = .false.
-    surfWriteRMach     = .false.
+        call convertToLowerCase(variables)
 
-    surfWriteCf    = .false.
-    surfWriteCh    = .false.
-    surfWriteYplus = .false.
-    surfWriteCfx   = .false.
-    surfWriteCfy   = .false.
-    surfWriteCfz   = .false.
-
-    surfWriteBlank = .false.
-    surfWriteSepSensor = .false.
-    surfWriteCavitation = .false.
-    surfWriteAxisMoment = .false.
-    surfWriteGC = .false.
-
-    ! Initialize nVarSpecified to 0. This serves as a test
-    ! later on.
-
-    nVarSpecified = 0
-
-    ! Loop to extract the info from the string variables.
-
-    do
-       ! Condition to exit the loop.
-
-       if(len_trim(variables) == 0) exit
-
-       ! Locate the first occurance of the _ in the string and
-       ! determine the string keyword.
-
-       pos = index(variables, "_")
-       if(pos == 0) then
-          keyword   = variables
-          variables = ""
-       else
-          keyword   = variables(:pos-1)
-          variables = variables(pos+1:)
-       endif
-
-       ! Check the keyword.
-
-       select case (keyword)
-       case ("")
-          ! Multiple occurence of "_". Just ignore it.
-
-       case ("rho")
-          surfWriteRho = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("p")
-          surfWriteP = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("temp")
-          surfWriteTemp = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("vx")
-          surfWriteVx = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("vy")
-          surfWriteVy = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("vz")
-          surfWriteVz = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("rvx")
-          surfWriteRVx = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("rvy")
-          surfWriteRVy = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("rvz")
-          surfWriteRVz = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("cp")
-          surfWriteCp = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("ptloss")
-          surfWritePtotloss = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("mach")
-          surfWriteMach = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("rmach")
-          surfWriteRMach = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("cf")
-          surfWriteCf = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("ch")
-          surfWriteCh = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("yplus")
-          surfWriteYplus = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("cfx")
-          surfWriteCfx = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("cfy")
-          surfWriteCfy = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("cfz")
-          surfWriteCfz = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("blank")
-          surfWriteBlank = .true.
-          nVarSpecified = nVarSpecified + 1
+        ! Initialize all the volume output variables to .False.
 
-       case ("sepsensor")
-          surfWriteSepSensor = .true.
-          nVarSpecified = nVarSpecified + 1
+        volWriteMx = .false.
+        volWriteMy = .false.
+        volWriteMz = .false.
+        volWriteRhoe = .false.
+        volWriteTemp = .false.
+        volWriteVort = .false.
+        volWriteVortx = .false.
+        volWriteVorty = .false.
+        volWriteVortz = .false.
 
-       case ("cavitation")
-          surfWriteCavitation = .true.
-          nVarSpecified = nVarSpecified + 1
+        volWriteCp = .false.
+        volWriteMach = .false.
+        volWriteMachTurb = .false.
+        volWritePtotloss = .false.
+
+        volWriteEddyVis = .false.
+        volWriteRatioEddyVis = .false.
+        volWriteDist = .false.
+
+        volWriteResRho = .false.
+        volWriteResMom = .false.
+        volWriteResRhoe = .false.
+        volWriteResTurb = .false.
 
-       case ("axismoment")
-          surfWriteAxisMoment = .true.
-          nVarSpecified = nVarSpecified + 1
+        volWriteShock = .false.
+        volWriteFilteredShock = .false.
+
+        volWriteBlank = .false.
+        volWriteGC = .false.
+        volWriteStatus = .false.
+        volWriteIntermittency = .false.
 
-       case ("gc")
-          surfWriteGC = .True.
-          nVarSpecified = nVarSpecified + 1
+        volWriteKs = .false.
 
-       case default
-          pos = len_trim(keyword)
-          write(errorMessage,"(3a)") "Unknown surface output &
-               &variable, ", trim(keyword), &
-               ", specified"
-          call terminate("surfaceVariables", errorMessage)
+        ! Initialize nVarSpecified to 0. This serves as a test
+        ! later on.
 
-       end select
+        nVarSpecified = 0
+
+        ! Loop to extract the info from the string variables.
 
-    enddo
+        do
+            ! Condition to exit the loop.
 
-    ! Set surfaceOutSpecified to .true. if variables were specified.
-    ! If not, later on the defaults will be set.
+            if (len_trim(variables) == 0) exit
 
-    if(nVarSpecified > 0) surfaceOutSpecified = .true.
+            ! Locate the first occurance of the _ in the string and
+            ! determine the string keyword.
 
-  end subroutine surfaceVariables
+            pos = index(variables, "_")
+            if (pos == 0) then
+                keyword = variables
+                variables = ""
+            else
+                keyword = variables(:pos - 1)
+                variables = variables(pos + 1:)
+            end if
 
-  subroutine volumeVariables(variables)
-    !
-    !       volumeVariables extracts from the given string the extra
-    !       volume variables to be written to the solution file.
-    !
-    use constants
-    use extraOutput
-    use utils, only : convertToLowerCase, terminate
-    use inputPhysics, only : useRoughSA
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    character(len=*), intent(inout) :: variables
-    !
-    !      Local variables.
-    !
-    integer :: nVarSpecified, pos
+            ! Check the keyword.
 
-    character(len=15) :: keyword
-    character(len=maxStringLen) :: errorMessage
+            select case (keyword)
+            case ("")
+                ! Multiple occurence of "_". Just ignore it.
 
-    ! Convert the string variables to lower case.
+            case ("mx")
+                volWriteMx = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    call convertToLowerCase(variables)
+            case ("my")
+                volWriteMy = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("mz")
+                volWriteMz = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("rvx")
+                volWriteRVx = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    ! Initialize all the volume output variables to .False.
+            case ("rvy")
+                volWriteRVy = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    volWriteMx    = .false.
-    volWriteMy    = .false.
-    volWriteMz    = .false.
-    volWriteRhoe  = .false.
-    volWriteTemp  = .false.
-    volWriteVort  = .false.
-    volWriteVortx = .false.
-    volWriteVorty = .false.
-    volWriteVortz = .false.
+            case ("rvz")
+                volWriteRVz = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    volWriteCp       = .false.
-    volWriteMach     = .false.
-    volWriteMachTurb = .false.
-    volWritePtotloss = .false.
+            case ("rhoe")
+                volWriteRhoe = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    volWriteEddyVis      = .false.
-    volWriteRatioEddyVis = .false.
-    volWriteDist         = .false.
+            case ("temp")
+                volWriteTemp = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    volWriteResRho  = .false.
-    volWriteResMom  = .false.
-    volWriteResRhoe = .false.
-    volWriteResTurb = .false.
+            case ("vort")
+                volWriteVort = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("vortx")
+                volWriteVortx = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    volWriteShock   = .false.
-    volWriteFilteredShock = .false.
+            case ("vorty")
+                volWriteVorty = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    volWriteBlank = .false.
-    volWriteGC = .false.
-    volWriteStatus = .false.
-    volWriteIntermittency = .false.
+            case ("vortz")
+                volWriteVortz = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    volWriteKs = .false.
+            case ("cp")
+                volWriteCp = .true.
+                nVarSpecified = nVarSpecified + 1
 
+            case ("mach")
+                volWriteMach = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    ! Initialize nVarSpecified to 0. This serves as a test
-    ! later on.
+            case ("rmach")
+                volWriteRMach = .true.
+                nVarSpecified = nVarSpecified + 1
 
-    nVarSpecified = 0
+            case ("macht")
+                volWriteMachTurb = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("ptloss")
+                volWritePtotloss = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("eddy")
+                volWriteEddyVis = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("eddyratio")
+                volWriteRatioEddyVis = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("dist")
+                volWriteDist = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("resrho")
+                volWriteResRho = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("resmom")
+                volWriteResMom = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("resrhoe")
+                volWriteResRhoe = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("resturb")
+                volWriteResTurb = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("shock")
+                volWriteShock = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("filteredshock")
+                volWriteFilteredShock = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("blank")
+                volWriteBlank = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("gc")
+                volWriteGC = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("status")
+                volWriteStatus = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("intermittency")
+                volWriteIntermittency = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case ("ks")
+                if (.not. useRoughSA) then
+                    call terminate("volumeVariables", "Can not export surface roughness &
+                    &values ('volumeVariables': ['ks']) when the rough SA variant is not &
+                    &used (useRoughSA = False)")
+                end if
+                volWriteKs = .true.
+                nVarSpecified = nVarSpecified + 1
+
+            case default
+                pos = len_trim(keyword)
+                write (errorMessage, "(3a)") "Unknown extra volume output &
+                     &variable, ", trim(keyword), &
+                     ", specified"
+                call terminate("volumeVariables", errorMessage)
+
+            end select
+
+        end do
+
+        ! Set volumeOutSpecified to .true. if variables were specified.
+        ! If not, later on the defaults will be set.
+
+        if (nVarSpecified > 0) volumeOutSpecified = .true.
+
+    end subroutine volumeVariables
+
+    subroutine checkInputParam
+        !
+        !       checkInputParam checks if all necessary data has been
+        !       specified. If some key data is missing an error message will
+        !       be printed and the program will exit. Key data depends on the
+        !       case to be solved. E.g. for the Navier Stokes equations it is
+        !       necessary to specify the Reynolds number, but for Euler this
+        !       can be omitted.
+        !       Furthermore warnings are printed in case parameters have been
+        !       specified that are ignored, e.g. Mach number for internal flow
+        !       computations.
+        !       Note that only processor 0 prints warning and error messages,
+        !       such that the output does not become messy.
+        !
+        use constants
+        !  --------- Bare imports...too many to list -------
+        use inputDiscretization
+        use inputIO
+        use inputIteration
+        use inputMotion
+        use inputOverset
+        use inputParallel
+        use inputPhysics
+        use inputTimeSpectral
+        use inputUnsteady
+        use inputADjoint
+        use inputTSStabDeriv
+        !  ------------------------------------------------
+        use communication, only: myid, adflow_comm_world
+        use iteration, only: coefTime, coefTimeALE, coefMeshALE, &
+                             oldSolWritten, nALEMeshes, nALESteps, nOldLevels
+        use monitor, only: nTimeStepsRestart
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, oldSolWrittenSize
+
+        real(kind=realType) :: vecLength, dot
+
+        logical :: gridPrecisionWarning, solPrecisionWarning
+
+        !       Discretization parameters. Check if the key parameters have
+        !       been specified and set some coarse grid parameters in case
+        !       these have not been specified.
+        !
+        if (spaceDiscr == none) then
+            if (myID == 0) &
+                call terminate("checkInputParam", &
+                               "Discretization scheme not specified")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        if (spaceDiscrCoarse == none) spaceDiscrCoarse = spaceDiscr
+
+        if (riemannCoarse == none) riemannCoarse = riemann
+
+        ! Set dirScaling to .false. if a scheme other than scalar
+        ! dissipation is used.
+
+        if (spaceDiscr /= dissScalar) dirScaling = .false.
+
+        ! Determine whether or not the spectral radIi are needed for
+        ! the flux computations.
+
+        radiiNeededFine = .false.
+        if (spaceDiscr == dissScalar) radiiNeededFine = .true.
+
+        radiiNeededCoarse = .false.
+        if (spaceDiscrCoarse == dissScalar) radiiNeededCoarse = .true.
+        !
+        !       IO parameters. Check if the grid file has been specified
+        !       Possibly correct the
+        !       value of restart. Note that restart got the default value of
+        !       .true. in case no restart file has been specified it is now
+        !       set to false. Set the names of the solution files if not
+        !       specified and check if a cp curve fit file has been specified
+        !       if curve fits must be used.
+        !       If the code has been compiled without cgns check that the file
+        !       format is not cgns.
+        !       Overwrite storeConvInnerIter to .true. if this is not an
+        !       unsteady computation.
+        !
+        if (gridFile == "") then
+            if (myID == 0) &
+                call terminate("checkInputParam", "Grid file not specified")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        if (newGridFile == "") then
+            newGridFile = "NewGrid.cgns"
+        end if
+
+        if (solFile == "") then
+            solFile = "SolADflow.cgns"
+        end if
+
+        if (surfaceSolFile == "") &
+            surfaceSolFile = trim(solfile)//"Surface"
+
+        if (cpModel == cpTempCurveFits .and. cpFile == "") then
+            if (myID == 0) &
+                call terminate("checkInputParam", &
+                               "Cp curve fit file not specified")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
 
-    ! Loop to extract the info from the string variables.
+#ifdef USE_NO_CGNS
 
-    do
-       ! Condition to exit the loop.
+        if (fileFormatRead == cgnsFormat .or. &
+            fileFormatWrite == cgnsFormat) then
+            if (myID == 0) &
+                call terminate("checkInputParam", &
+                               "cgns support disabled during compile time")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
 
-       if(len_trim(variables) == 0) exit
+#endif
 
-       ! Locate the first occurance of the _ in the string and
-       ! determine the string keyword.
+        if (equationMode == unsteady) then
+            if (timeIntegrationScheme == explicitRK) &
+                storeConvInnerIter = .false.
+        end if
+        !
+        !       Iteration parameters. Check if the key parameters have specified
+        !       been and set some coarse grid parameters in case these
+        !       have not been specified.
+        !
+        if (equationMode == unsteady .and. &
+            timeIntegrationScheme == explicitRK) then
+            smoother = none
+        else
+            if (smoother == none) then
+                if (myID == 0) &
+                    call terminate("checkInputParam", "Smoother not specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-       pos = index(variables, "_")
-       if(pos == 0) then
-          keyword   = variables
-          variables = ""
-       else
-          keyword   = variables(:pos-1)
-          variables = variables(pos+1:)
-       endif
+            if (ncycles < 0) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Number of multigrid cycles not or wrongly &
+                     &specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-       ! Check the keyword.
+            if (cfl < zero) then
+                if (myID == 0) &
+                    call terminate("checkInputParam", &
+                                   "cfl number not or wrongly specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-       select case (keyword)
-       case ("")
-          ! Multiple occurence of "_". Just ignore it.
+            if (l2Conv <= zero .or. L2Conv >= one) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Relative L2 norm for convergence must be a &
+                     & number between 0 and 1.")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-       case ("mx")
-          volWriteMx = .true.
-          nVarSpecified = nVarSpecified + 1
+            if (l2ConvCoarse <= zero .or. L2ConvCoarse >= one) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Relative L2 norm for convergence coarse grid &
+                     &must be a number between 0 and 1.")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+        end if
+        !
+        !       Grid motion parameters. These can only be specified for an
+        !       external flow problem.
+        !
+        if (flowType == internalFlow .and. gridMotionSpecified) then
+            if (myID == 0) &
+                 call terminate("checkInputParam", &
+                 "Grid motion specified for an internal flow; &
+                 &this is not possible")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+        !
+        !       Physics parameters. Check if the key parameters have been
+        !       specified and set the unit vector for the free-stream velocity.
+        !
+        if (equations == none) then
+            if (myID == 0) &
+                call terminate("checkInputParam", "Equations not specified")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        if (equationMode == none) then
+            if (myID == 0) &
+                call terminate("checkInputParam", "Mode not specified")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        if (flowType == none) then
+            if (myID == 0) &
+                call terminate("checkInputParam", "Flow type not specified")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        if (Mach < zero .and. flowType == externalFlow) then
+            if (myID == 0) &
+                call terminate("checkInputParam", &
+                               "Mach not or wrongly specified")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        if (equations == RANSEquations .and. turbModel == none) then
+            if (myID == 0) &
+                call terminate("checkInputParam", &
+                               "Turbulence model not specified")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        ! Create a unit vector for the free stream velocity. It is checked
+        ! if the vector specified is a valid one. If not processor 0 prints
+        ! an error message. Only for external flows.
+
+        if (flowType == externalFlow) then
+            vecLength = sqrt(velDirFreestream(1)*velDirFreestream(1) &
+                             + velDirFreestream(2)*velDirFreestream(2) &
+                             + velDirFreestream(3)*velDirFreestream(3))
+            if (vecLength < eps) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Free stream velocity direction wrongly &
+                     &specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-       case ("my")
-          volWriteMy = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("mz")
-          volWriteMz = .true.
-          nVarSpecified = nVarSpecified + 1
+            vecLength = one/vecLength
+            velDirFreestream(1) = velDirFreestream(1)*vecLength
+            velDirFreestream(2) = velDirFreestream(2)*vecLength
+            velDirFreestream(3) = velDirFreestream(3)*vecLength
+        else
+            ! Internal flow; simply reset the velocity direction. The value
+            ! will be determined later from the inflow boundary conditions.
 
-       case ("rvx")
-          volWriteRVx = .true.
-          nVarSpecified = nVarSpecified + 1
+            velDirFreestream(1) = one
+            velDirFreestream(2) = zero
+            velDirFreestream(3) = zero
+        end if
 
-       case ("rvy")
-          volWriteRVy = .true.
-          nVarSpecified = nVarSpecified + 1
+        ! Set the drag direction to the velocity direction.
 
-       case ("rvz")
-          volWriteRVz = .true.
-          nVarSpecified = nVarSpecified + 1
+        dragDirection = velDirFreestream
 
-       case ("rhoe")
-          volWriteRhoe = .true.
-          nVarSpecified = nVarSpecified + 1
+        ! Check the lift direction if it was specified for an external
+        ! flow. Otherwise set the default direction.
 
-       case ("temp")
-          volWriteTemp = .true.
-          nVarSpecified = nVarSpecified + 1
+        if (liftDirSpecified .and. flowType == externalFlow) then
 
-       case ("vort")
-          volWriteVort = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("vortx")
-          volWriteVortx = .true.
-          nVarSpecified = nVarSpecified + 1
+            ! Create a unit vector. Perform the same check as for
+            ! for the free stream velocity direction.
 
-       case ("vorty")
-          volWriteVorty = .true.
-          nVarSpecified = nVarSpecified + 1
+            vecLength = sqrt(liftDirection(1)*liftDirection(1) &
+                             + liftDirection(2)*liftDirection(2) &
+                             + liftDirection(3)*liftDirection(3))
+            if (vecLength < eps) then
+                if (myID == 0) &
+                    call terminate("checkInputParam", &
+                                   "Lift direction wrongly specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-       case ("vortz")
-          volWriteVortz = .true.
-          nVarSpecified = nVarSpecified + 1
+            vecLength = one/vecLength
+            liftDirection(1) = liftDirection(1)*vecLength
+            liftDirection(2) = liftDirection(2)*vecLength
+            liftDirection(3) = liftDirection(3)*vecLength
 
-       case ("cp")
-          volWriteCp = .true.
-          nVarSpecified = nVarSpecified + 1
+            ! Check the orthogonality with the drag direction.
 
-       case ("mach")
-          volWriteMach = .true.
-          nVarSpecified = nVarSpecified + 1
+            dot = liftDirection(1)*dragDirection(1) &
+                  + liftDirection(2)*dragDirection(2) &
+                  + liftDirection(3)*dragDirection(3)
 
-       case ("rmach")
-          volWriteRMach = .true.
-          nVarSpecified = nVarSpecified + 1
+            if (abs(dot) > 1.e-3_realType) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Lift direction not orthogonal to &
+                     &free-stream")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+        else
+
+            ! Lift direction not specified. Set the default direction.
+            ! It will have a zero component in the y-direction and a positive
+            ! one in the z-direction.
+
+            liftDirection(1) = -dragDirection(3)
+            liftDirection(2) = zero
+            liftDirection(3) = dragDirection(1)
+
+            if (liftDirection(3) < zero) then
+                liftDirection(1) = -liftDirection(1)
+                liftDirection(3) = -liftDirection(3)
+            end if
+        end if
+
+        ! Set the Mach number for the coefficients equal to the Mach
+        ! number if it was not specified. For internal flow field this
+        ! will again be changed in initFlo.
+
+        if (MachCoef < zero) MachCoef = Mach
+        !
+        !       Time spectral parameters. They only need to be specified for a
+        !       time spectral computation.
+        !
+        testSpectral: if (equationMode == timeSpectral) then
+
+            ! Check if the number of time intervals was specified.
+
+            if (nTimeIntervalsSpectral < 0) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Number time intervals spectral not or &
+                     &wrongly specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+            ! If an unsteady restart solution file must be written, check
+            ! if the corresponding time step has been specified.
+
+            if (writeUnsteadyRestartSpectral) then
+                if (dtUnsteadyRestartSpectral <= zero) then
+                    if (myID == 0) &
+                         call terminate("checkInputParam", &
+                         "Time step (in sec) for unsteady restart &
+                         &not or wrongly specified.")
+                    call mpi_barrier(ADflow_comm_world, ierr)
+                end if
+            end if
+
+            ! If solution files (for postprocessing) must be written,
+            ! check if the number has been specified.
+
+            if (writeUnsteadyVolSpectral .or. &
+                writeUnsteadySurfSpectral) then
+                if (nunsteadySolSpectral <= 0) then
+                    if (myID == 0) &
+                         call terminate("checkInputParam", &
+                         "Number of unsteady solution files &
+                         &not or wrongly specified.")
+                    call mpi_barrier(ADflow_comm_world, ierr)
+                end if
+            end if
+
+            else testSpectral
+
+            ! No spectral method. Set nTimeIntervalsSpectral to 1.
+
+            nTimeIntervalsSpectral = 1
+
+        end if testSpectral
+        !
+        !       Unsteady parameters. They only need to be specified for an
+        !       unsteady computation.
+        !
+        testUnsteady: if (equationMode == unsteady) then
+
+            ! Physical time step parameters.
+
+            if (nTimeStepsFine < 0) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Number of unsteady time steps fine grid &
+                     &not or wrongly specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-       case ("macht")
-          volWriteMachTurb = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("ptloss")
-          volWritePtotloss = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("eddy")
-          volWriteEddyVis = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("eddyratio")
-          volWriteRatioEddyVis = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("dist")
-          volWriteDist = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("resrho")
-          volWriteResRho = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("resmom")
-          volWriteResMom = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("resrhoe")
-          volWriteResRhoe = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("resturb")
-          volWriteResTurb = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("shock")
-          volWriteShock = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("filteredshock")
-          volWriteFilteredShock = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("blank")
-          volWriteBlank = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("gc")
-          volWriteGC = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case ("status")
-          volWriteStatus = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case("intermittency")
-          volWriteIntermittency = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case("ks")
-          if (.not. useRoughSA) then
-             call terminate("volumeVariables", "Can not export surface roughness &
-             &values ('volumeVariables': ['ks']) when the rough SA variant is not &
-             &used (useRoughSA = False)")
-          end if
-          volWriteKs = .true.
-          nVarSpecified = nVarSpecified + 1
-
-       case default
-          pos = len_trim(keyword)
-          write(errorMessage,"(3a)" ) "Unknown extra volume output &
-               &variable, ", trim(keyword), &
-               ", specified"
-          call terminate("volumeVariables", errorMessage)
-
-       end select
-
-    enddo
-
-    ! Set volumeOutSpecified to .true. if variables were specified.
-    ! If not, later on the defaults will be set.
-
-    if(nVarSpecified > 0) volumeOutSpecified = .true.
-
-  end subroutine volumeVariables
-
-  subroutine checkInputParam
-    !
-    !       checkInputParam checks if all necessary data has been
-    !       specified. If some key data is missing an error message will
-    !       be printed and the program will exit. Key data depends on the
-    !       case to be solved. E.g. for the Navier Stokes equations it is
-    !       necessary to specify the Reynolds number, but for Euler this
-    !       can be omitted.
-    !       Furthermore warnings are printed in case parameters have been
-    !       specified that are ignored, e.g. Mach number for internal flow
-    !       computations.
-    !       Note that only processor 0 prints warning and error messages,
-    !       such that the output does not become messy.
-    !
-    use constants
-    !  --------- Bare imports...too many to list -------
-    use inputDiscretization
-    use inputIO
-    use inputIteration
-    use inputMotion
-    use inputOverset
-    use inputParallel
-    use inputPhysics
-    use inputTimeSpectral
-    use inputUnsteady
-    use inputADjoint
-    use inputTSStabDeriv
-    !  ------------------------------------------------
-    use communication, only : myid, adflow_comm_world
-    use iteration, only : coefTime, coefTimeALE, coefMeshALE, &
-         oldSolWritten, nALEMeshes, nALESteps, nOldLevels
-    use monitor, only : nTimeStepsRestart
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, oldSolWrittenSize
-
-    real(kind=realType) :: vecLength, dot
-
-    logical :: gridPrecisionWarning, solPrecisionWarning
-
-    !       Discretization parameters. Check if the key parameters have
-    !       been specified and set some coarse grid parameters in case
-    !       these have not been specified.
-    !
-    if(spaceDiscr == none) then
-       if(myID == 0)                       &
-            call terminate("checkInputParam", &
-            "Discretization scheme not specified")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-
-    if(spaceDiscrCoarse == none) spaceDiscrCoarse = spaceDiscr
-
-    if(riemannCoarse == none) riemannCoarse = riemann
-
-    ! Set dirScaling to .false. if a scheme other than scalar
-    ! dissipation is used.
-
-    if(spaceDiscr /= dissScalar) dirScaling = .false.
-
-    ! Determine whether or not the spectral radIi are needed for
-    ! the flux computations.
-
-    radiiNeededFine = .false.
-    if(spaceDiscr == dissScalar) radiiNeededFine = .true.
-
-    radiiNeededCoarse = .false.
-    if(spaceDiscrCoarse == dissScalar) radiiNeededCoarse = .true.
-    !
-    !       IO parameters. Check if the grid file has been specified
-    !       Possibly correct the
-    !       value of restart. Note that restart got the default value of
-    !       .true. in case no restart file has been specified it is now
-    !       set to false. Set the names of the solution files if not
-    !       specified and check if a cp curve fit file has been specified
-    !       if curve fits must be used.
-    !       If the code has been compiled without cgns check that the file
-    !       format is not cgns.
-    !       Overwrite storeConvInnerIter to .true. if this is not an
-    !       unsteady computation.
-    !
-    if(gridFile == "") then
-       if(myID == 0) &
-            call terminate("checkInputParam", "Grid file not specified")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-
-    if(newGridFile == "") then
-       newGridFile = "NewGrid.cgns"
-    endif
-
-    if(solFile == "") then
-       solFile = "SolADflow.cgns"
-    endif
-
-    if(surfaceSolFile == "") &
-         surfaceSolFile = trim(solfile)//"Surface"
-
-    if(cpModel == cpTempCurveFits .and. cpFile == "") then
-       if(myID == 0)                        &
-            call terminate("checkInputParam", &
-            "Cp curve fit file not specified")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
+            if (nTimeStepsCoarse < 0) nTimeStepsCoarse = nTimeStepsFine
 
-#ifdef USE_NO_CGNS
+            if (deltaT < 0) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Unsteady time step (in sec) &
+                     &not or wrongly specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-    if(fileFormatRead  == cgnsFormat .or. &
-         fileFormatWrite == cgnsFormat) then
-       if(myID == 0)                       &
-            call terminate("checkInputParam", &
-            "cgns support disabled during compile time")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
+            ! Check if the rigid body rotation parameters are consistent.
+            ! The polynomial rotation coefficients.
 
-#endif
+            if (degreePolXRot >= 0 .and. &
+                .not. allocated(coefPolXRot)) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Polynomial coefficients x-rotation &
+                     &not specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+            if (degreePolYRot >= 0 .and. &
+                .not. allocated(coefPolYRot)) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Polynomial coefficients y-rotation &
+                     &not specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+            if (degreePolZRot >= 0 .and. &
+                .not. allocated(coefPolZRot)) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Polynomial coefficients z-rotation &
+                     &not specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-    if(equationMode == unsteady) then
-       if(timeIntegrationScheme == explicitRK) &
-            storeConvInnerIter = .false.
-    endif
-    !
-    !       Iteration parameters. Check if the key parameters have specified
-    !       been and set some coarse grid parameters in case these
-    !       have not been specified.
-    !
-    if(equationMode          == unsteady .and. &
-         timeIntegrationScheme == explicitRK) then
-       smoother = none
-    else
-       if(smoother == none) then
-          if(myID == 0) &
-               call terminate("checkInputParam", "Smoother not specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(ncycles < 0) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Number of multigrid cycles not or wrongly &
-               &specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(cfl < zero) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "cfl number not or wrongly specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(l2Conv <= zero .or. L2Conv >= one) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Relative L2 norm for convergence must be a &
-               & number between 0 and 1.")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(l2ConvCoarse <= zero .or. L2ConvCoarse >= one) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Relative L2 norm for convergence coarse grid &
-               &must be a number between 0 and 1.")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-    endif
-    !
-    !       Grid motion parameters. These can only be specified for an
-    !       external flow problem.
-    !
-    if(flowType == internalFlow .and. gridMotionSpecified) then
-       if(myID == 0) &
-            call terminate("checkInputParam", &
-            "Grid motion specified for an internal flow; &
-            &this is not possible")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-    !
-    !       Physics parameters. Check if the key parameters have been
-    !       specified and set the unit vector for the free-stream velocity.
-    !
-    if(equations == none) then
-       if(myID == 0) &
-            call terminate("checkInputParam", "Equations not specified")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-
-    if(equationMode == none) then
-       if(myID == 0) &
-            call terminate("checkInputParam", "Mode not specified")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-
-    if(flowType == none) then
-       if(myID == 0) &
-            call terminate("checkInputParam", "Flow type not specified")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-
-    if(Mach < zero .and. flowType == externalFlow) then
-       if(myID == 0)                        &
-            call terminate("checkInputParam", &
-            "Mach not or wrongly specified")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
+            ! The fourier rotation coefficients.
 
-    if(equations == RANSEquations .and. turbModel == none) then
-       if(myID == 0)                        &
-            call terminate("checkInputParam", &
-            "Turbulence model not specified")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-
-    ! Create a unit vector for the free stream velocity. It is checked
-    ! if the vector specified is a valid one. If not processor 0 prints
-    ! an error message. Only for external flows.
-
-    if(flowType == externalFlow) then
-       vecLength = sqrt(velDirFreestream(1)*velDirFreestream(1) &
-            +      velDirFreestream(2)*velDirFreestream(2) &
-            +      velDirFreestream(3)*velDirFreestream(3))
-       if(vecLength < eps) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Free stream velocity direction wrongly &
-               &specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       vecLength = one/vecLength
-       velDirFreestream(1) = velDirFreestream(1)*vecLength
-       velDirFreestream(2) = velDirFreestream(2)*vecLength
-       velDirFreestream(3) = velDirFreestream(3)*vecLength
-    else
-       ! Internal flow; simply reset the velocity direction. The value
-       ! will be determined later from the inflow boundary conditions.
-
-       velDirFreestream(1) = one
-       velDirFreestream(2) = zero
-       velDirFreestream(3) = zero
-    endif
-
-    ! Set the drag direction to the velocity direction.
-
-    dragDirection = velDirFreestream
-
-    ! Check the lift direction if it was specified for an external
-    ! flow. Otherwise set the default direction.
-
-    if(liftDirSpecified .and. flowType == externalFlow) then
-
-       ! Create a unit vector. Perform the same check as for
-       ! for the free stream velocity direction.
-
-       vecLength = sqrt(liftDirection(1)*liftDirection(1) &
-            +      liftDirection(2)*liftDirection(2) &
-            +      liftDirection(3)*liftDirection(3))
-       if(vecLength < eps) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Lift direction wrongly specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       vecLength = one/vecLength
-       liftDirection(1) = liftDirection(1)*vecLength
-       liftDirection(2) = liftDirection(2)*vecLength
-       liftDirection(3) = liftDirection(3)*vecLength
-
-       ! Check the orthogonality with the drag direction.
-
-       dot = liftDirection(1)*dragDirection(1) &
-            + liftDirection(2)*dragDirection(2) &
-            + liftDirection(3)*dragDirection(3)
-
-       if(abs(dot) > 1.e-3_realType) then
-          if(myID == 0)                       &
-               call terminate("checkInputParam", &
-               "Lift direction not orthogonal to &
-               &free-stream")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-    else
-
-       ! Lift direction not specified. Set the default direction.
-       ! It will have a zero component in the y-direction and a positive
-       ! one in the z-direction.
-
-       liftDirection(1) = -dragDirection(3)
-       liftDirection(2) =  zero
-       liftDirection(3) =  dragDirection(1)
-
-       if(liftDirection(3) < zero) then
-          liftDirection(1) = -liftDirection(1)
-          liftDirection(3) = -liftDirection(3)
-       endif
-    endif
-
-    ! Set the Mach number for the coefficients equal to the Mach
-    ! number if it was not specified. For internal flow field this
-    ! will again be changed in initFlo.
-
-    if(MachCoef < zero) MachCoef = Mach
-    !
-    !       Time spectral parameters. They only need to be specified for a
-    !       time spectral computation.
-    !
-    testSpectral: if(equationMode == timeSpectral) then
-
-       ! Check if the number of time intervals was specified.
-
-       if(nTimeIntervalsSpectral < 0) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Number time intervals spectral not or &
-               &wrongly specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       ! If an unsteady restart solution file must be written, check
-       ! if the corresponding time step has been specified.
-
-       if( writeUnsteadyRestartSpectral ) then
-          if(dtUnsteadyRestartSpectral <= zero) then
-             if(myID == 0)                        &
-                  call terminate("checkInputParam", &
-                  "Time step (in sec) for unsteady restart &
-                  &not or wrongly specified.")
-             call mpi_barrier(ADflow_comm_world, ierr)
-          endif
-       endif
-
-       ! If solution files (for postprocessing) must be written,
-       ! check if the number has been specified.
-
-       if( writeUnsteadyVolSpectral .or. &
-            writeUnsteadySurfSpectral) then
-          if(nunsteadySolSpectral <= 0) then
-             if(myID == 0)                        &
-                  call terminate("checkInputParam", &
-                  "Number of unsteady solution files &
-                  &not or wrongly specified.")
-             call mpi_barrier(ADflow_comm_world, ierr)
-          endif
-       endif
-
-    else testSpectral
-
-       ! No spectral method. Set nTimeIntervalsSpectral to 1.
-
-       nTimeIntervalsSpectral = 1
-
-    endif testSpectral
-    !
-    !       Unsteady parameters. They only need to be specified for an
-    !       unsteady computation.
-    !
-    testUnsteady: if(equationMode == unsteady) then
-
-       ! Physical time step parameters.
-
-       if(nTimeStepsFine < 0) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Number of unsteady time steps fine grid &
-               &not or wrongly specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(nTimeStepsCoarse < 0) nTimeStepsCoarse = nTimeStepsFine
-
-       if(deltaT < 0) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Unsteady time step (in sec) &
-               &not or wrongly specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       ! Check if the rigid body rotation parameters are consistent.
-       ! The polynomial rotation coefficients.
-
-       if(degreePolXRot >= 0 .and. &
-            .not. allocated(coefPolXRot)) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Polynomial coefficients x-rotation &
-               &not specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(degreePolYRot >= 0 .and. &
-            .not. allocated(coefPolYRot)) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Polynomial coefficients y-rotation &
-               &not specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(degreePolZRot >= 0 .and. &
-            .not. allocated(coefPolZRot)) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Polynomial coefficients z-rotation &
-               &not specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       ! The fourier rotation coefficients.
-
-       if(degreeFourXRot >= 0 .and. &
-            .not. allocated(cosCoefFourXRot)) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Fourier cosine coefficients x-rotation &
-               &not specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(degreeFourXRot >= 1 .and. &
-            .not. allocated(sinCoefFourXRot)) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Fourier sine coefficients x-rotation &
-               &not specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(degreeFourYRot >= 0 .and. &
-            .not. allocated(cosCoefFourYRot)) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Fourier cosine coefficients y-rotation &
-               &not specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(degreeFourYRot >= 1 .and. &
-            .not. allocated(sinCoefFourYRot)) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Fourier sine coefficients y-rotation &
-               &not specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(degreeFourZRot >= 0 .and. &
-            .not. allocated(cosCoefFourZRot)) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Fourier cosine coefficients z-rotation &
-               &not specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       if(degreeFourZRot >= 1 .and. &
-            .not. allocated(sinCoefFourZRot)) then
-          if(myID == 0)                        &
-               call terminate("checkInputParam", &
-               "Fourier sine coefficients z-rotation &
-               &not specified")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-    endif testUnsteady
-    !
-    !                             Warning messages.
-    !
-    ! Check for an invisid problem if the Reynolds number is specified.
-    ! If so, print a Warning that this info is ignored.
-
-    if(myID == 0 .and. equations == EulerEquations .and. &
-         Reynolds > zero) then
-
-       print "(a)", "#"
-       print "(a)", "#                      Warning"
-       print "(a)", "# Reynolds number specified for the Euler &
-            &equations."
-       print "(a)", "# This information is ignored."
-       print "(a)", "#"
-
-    endif
-
-    ! Check if the Mach and Reynolds number are specified for an
-    ! internal flow problem. If so, print a Warning message that this
-    ! info is ignored.
-
-    if(flowType == internalFlow) then
-
-       ! Check whether a viscous or an inviscid problem is to be solved.
-       ! For an inviscid problem you do not want to mention that the
-       ! Reynolds number is ignored, because this has already been
-       ! taken care of.
-
-       if((equations == NSEquations .or.     &
-            equations == RANSEquations) .and. &
-            Mach > zero .and. Reynolds > zero) then
-
-          ! Viscous problem, where both the Mach and Reynolds were
-          ! specified. Processor 0 prints the Warning.
-
-          if(myID == 0) then
-             print "(a)", "#"
-             print "(a)", "#                      Warning"
-             print "(a)", "# Mach and Reynolds number specified &
-                  &for an internal flow problem."
-             print "(a)", "# This information is ignored."
-             print "(a)", "#"
-          endif
-
-       else if(Mach > zero) then
-
-          ! The Mach number has been specified. Processor 0 prints
-          ! a Warning.
-
-          if(myID == 0) then
-             print "(a)", "#"
-             print "(a)", "#                      Warning"
-             print "(a)", "# Mach number specified for an internal &
-                  &flow problem."
-             print "(a)", "# This information is ignored."
-             print "(a)", "#"
-          endif
-
-       endif
-
-    endif
-
-    ! For a steady computation possible specified rigid body
-    ! rotation info is ignored. Processor 0 will print the Warning.
-
-    if(degreePolXRot  >= 0 .or. degreePolYRot  >= 0 .or. &
-         degreePolZRot  >= 0 .or. degreeFourXRot >= 0 .or. &
-         degreeFourYRot >= 0 .or. degreeFourZRot >= 0) then
-
-       if(equationMode == steady .and. myID == 0) then
-          print "(a)", "#"
-          print "(a)", "#                      Warning"
-          print "(a)", "# Rigid body rotation info specified for &
-               &a steady computation."
-          print "(a)", "# This information is ignored."
-          print "(a)", "#"
-       endif
-    endif
-
-    ! Print warning messages if the precision to be written
-    ! is larger than the precision used in the computation.
-
-    gridPrecisionWarning = .false.
-    solPrecisionWarning  = .false.
+            if (degreeFourXRot >= 0 .and. &
+                .not. allocated(cosCoefFourXRot)) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Fourier cosine coefficients x-rotation &
+                     &not specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+            if (degreeFourXRot >= 1 .and. &
+                .not. allocated(sinCoefFourXRot)) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Fourier sine coefficients x-rotation &
+                     &not specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+            if (degreeFourYRot >= 0 .and. &
+                .not. allocated(cosCoefFourYRot)) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Fourier cosine coefficients y-rotation &
+                     &not specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+            if (degreeFourYRot >= 1 .and. &
+                .not. allocated(sinCoefFourYRot)) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Fourier sine coefficients y-rotation &
+                     &not specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+            if (degreeFourZRot >= 0 .and. &
+                .not. allocated(cosCoefFourZRot)) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Fourier cosine coefficients z-rotation &
+                     &not specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+            if (degreeFourZRot >= 1 .and. &
+                .not. allocated(sinCoefFourZRot)) then
+                if (myID == 0) &
+                     call terminate("checkInputParam", &
+                     "Fourier sine coefficients z-rotation &
+                     &not specified")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+        end if testUnsteady
+        !
+        !                             Warning messages.
+        !
+        ! Check for an invisid problem if the Reynolds number is specified.
+        ! If so, print a Warning that this info is ignored.
+
+        if (myID == 0 .and. equations == EulerEquations .and. &
+            Reynolds > zero) then
+
+            print "(a)", "#"
+            print "(a)", "#                      Warning"
+            print "(a)", "# Reynolds number specified for the Euler &
+                 &equations."
+            print "(a)", "# This information is ignored."
+            print "(a)", "#"
+
+        end if
+
+        ! Check if the Mach and Reynolds number are specified for an
+        ! internal flow problem. If so, print a Warning message that this
+        ! info is ignored.
+
+        if (flowType == internalFlow) then
+
+            ! Check whether a viscous or an inviscid problem is to be solved.
+            ! For an inviscid problem you do not want to mention that the
+            ! Reynolds number is ignored, because this has already been
+            ! taken care of.
+
+            if ((equations == NSEquations .or. &
+                 equations == RANSEquations) .and. &
+                Mach > zero .and. Reynolds > zero) then
+
+                ! Viscous problem, where both the Mach and Reynolds were
+                ! specified. Processor 0 prints the Warning.
+
+                if (myID == 0) then
+                    print "(a)", "#"
+                    print "(a)", "#                      Warning"
+                    print "(a)", "# Mach and Reynolds number specified &
+                         &for an internal flow problem."
+                    print "(a)", "# This information is ignored."
+                    print "(a)", "#"
+                end if
+
+            else if (Mach > zero) then
+
+                ! The Mach number has been specified. Processor 0 prints
+                ! a Warning.
+
+                if (myID == 0) then
+                    print "(a)", "#"
+                    print "(a)", "#                      Warning"
+                    print "(a)", "# Mach number specified for an internal &
+                         &flow problem."
+                    print "(a)", "# This information is ignored."
+                    print "(a)", "#"
+                end if
+
+            end if
+
+        end if
+
+        ! For a steady computation possible specified rigid body
+        ! rotation info is ignored. Processor 0 will print the Warning.
+
+        if (degreePolXRot >= 0 .or. degreePolYRot >= 0 .or. &
+            degreePolZRot >= 0 .or. degreeFourXRot >= 0 .or. &
+            degreeFourYRot >= 0 .or. degreeFourZRot >= 0) then
+
+            if (equationMode == steady .and. myID == 0) then
+                print "(a)", "#"
+                print "(a)", "#                      Warning"
+                print "(a)", "# Rigid body rotation info specified for &
+                     &a steady computation."
+                print "(a)", "# This information is ignored."
+                print "(a)", "#"
+            end if
+        end if
+
+        ! Print warning messages if the precision to be written
+        ! is larger than the precision used in the computation.
+
+        gridPrecisionWarning = .false.
+        solPrecisionWarning = .false.
 
 #ifdef USE_SINGLE_PRECISION
-    if(precisionGrid == precisionDouble) gridPrecisionWarning = .true.
-    if(precisionSol  == precisionDouble) solPrecisionWarning = .true.
+        if (precisionGrid == precisionDouble) gridPrecisionWarning = .true.
+        if (precisionSol == precisionDouble) solPrecisionWarning = .true.
 #endif
 
-    if(gridPrecisionWarning .and. myID == 0) then
-       print "(a)", "#"
-       print "(a)", "#                      Warning"
-       print "(a)", "# Precision of the grid file to write is &
-            &bigger than used in the computation."
-       print "(a)", "# This does not make sense and is a waste &
-            &of disk space"
-       print "(a)", "#"
-    endif
-
-    if(solPrecisionWarning .and. myID == 0) then
-       print "(a)", "#"
-       print "(a)", "#                      Warning"
-       print "(a)", "# Precision of the solution file to write is &
-            &bigger than used in the computation."
-       print "(a)", "# This does not make sense and is a waste &
-            &of disk space"
-       print "(a)", "#"
-    endif
-    !
-    !       Wall functions can only be used if the RANS equations are to
-    !       be solved. If no wall functions are used the wall offset is
-    !       set to zero.
-    !
-    if(equations /= RANSEquations) wallFunctions = .false.
-    if(.not. wallFunctions) wallOffset = zero
-    !
-    !       Check whether or not the wall distance is needed for the
-    !       turbulence model.
-    !
-    if(equations == RANSEquations) then
-
-       ! RANS simulation. Determine if the turbulence model is
-       ! wall distance free. Note that updateWallDistanceUnsteady is
-       ! NOT overruled, because this is just the case for which this
-       ! parameter was intended.
-
-       select case (turbModel)
-       case (komegaWilcox, komegaModified, ktau)
-
-          ! Wall distance free turbulence models.
-
-          wallDistanceNeeded = .false.
-
-          !=============================================================
-
-       case default
-
-          ! The turbulence model needs the wall distance
-
-          wallDistanceNeeded = .true.
-
-       end select
-
-    else
-
-       ! Laminar or inviscid computation. Simply initialize the
-       ! logicals for the wall distance to .false.
-
-       wallDistanceNeeded         = .false.
-       updateWallDistanceUnsteady = .false.
-
-    endif
-    !
-    !       Parallelization parameters. Set the minimum load imbalance to
-    !       3 percent to avoid any problems.
-    !
-    loadImbalance = max(loadImbalance, 0.03_realType)
-    !
-    !       Some default parameters, which depend on other parameters.
-    !       Only if these have not been specified of course.
-    !
-    if(nsgStartup < 0)    nsgStartup    = 0
-    if(ncyclesCoarse < 0) nCyclesCoarse = nCycles
-    if(cflCoarse < zero)  cflCoarse     = cfl
-    if(betaTurb  < zero)  betaTurb      = alfaTurb
-
-    if(turbRelax == turbRelaxNotDefined) then
-       turbRelax = turbRelaxImplicit
-       if(turbModel == v2f) turbRelax = turbRelaxExplicit
-    endif
-
-    ! V2f should only be solved with explicit underrelaxation.
-
-    if(equations == RANSEquations .and. turbModel == v2f .and. &
-         turbRelax == turbRelaxImplicit) then
-
-       turbRelax = turbRelaxExplicit
-
-       if(myID == 0) then
-          print "(a)", "#"
-          print "(a)", "#                      Warning"
-          print "(a)", "# Implicit underrelaxation specified for &
-               &the v2f model."
-          print "(a)", "# This is overwritten to explicit &
-               &underrelaxation."
-          print "(a)", "#"
-       endif
-
-    endif
-
-    if(nsaveVolume <= 0) then
-       select case (equationMode)
-       case (steady, timeSpectral)
-          nSaveVolume = nCycles + nCyclesCoarse + nsgStartup + 1
-
-       case (unsteady)
-          nSaveVolume = nTimeStepsFine + nTimeStepsCoarse &
-               + nTimeStepsRestart + 1
-       end select
-    endif
-
-    if(nsaveSurface <= 0) nSaveSurface  = nSaveVolume
-
-    if(eddyVisInfRatio < zero) then
-
-       ! Default value depends on the turbulence model.
-
-       select case (turbModel)
-
-       case (spalartAllmaras, spalartAllmarasEdwards)
-          eddyVisInfRatio = 0.009_realType
-
-       case default
-          eddyVisInfRatio = 0.1_realType
-
-       end select
-    endif
-    !
-    !       Determine the number of old grid levels needed for the BDF
-    !       time integration of unsteady problems and allocate the memory
-    !       for the coefficients. The actual values are not yet set,
-    !       because in the first (and possibly second) time step a reduced
-    !       order must be used, because the older states are not available
-    !       yet. Also allocate the memory for the logicals to indicate
-    !       whether or not old solutions have been written.
-    !       If a Runge Kutta scheme must be used for the time integration,
-    !       either explicit or implicit, a separate routine is called to
-    !       set all the necessary variables.
-    !
-    select case (timeIntegrationScheme)
-    case (BDF, MD)
-
-       ! First check if the accuracy is okay.
-
-       if(timeAccuracy > thirdOrder) then
-          if(myID == 0) then
-             print "(a)", "#"
-             print "(a)", "#                      Warning"
-             print "(a)", "# Maximum third order possible for BDF."
-             print "(a)", "# Order has been reduced to third."
-             print "(a)", "#"
-          endif
-
-          timeAccuracy = thirdOrder
-       endif
-
-       ! Determine the accuracy and set nOldLevels accordingly.
-
-       select case (timeAccuracy)
-       case (firstOrder)
-          nOldLevels = 1
-
-       case (secondOrder)
-          nOldLevels = 2
-
-       case (thirdOrder)
-          nOldLevels = 3
-       end select
-
-       ! Allocate the memory for coefTime.
-       if( allocated(coefTime)) deallocate(coefTime)
-       allocate(coefTime(0:nOldLevels), stat=ierr)
-       if(ierr /= 0)                       &
-            call terminate("checkInputParam", &
-            "Memory allocation error for coefTime")
-
-       ! Determine the accuracy and set ALE parameters accordingly.
-       if (useALE) then
-          select case (timeAccuracy)
-          case (firstOrder)
-             nALEMeshes = 1
-             nALESteps  = 2
-
-          case (secondOrder)
-             nALEMeshes = 2
-             nALESteps  = 4
-
-          case (thirdOrder)
-             call terminate("checkInputParam", &
-                  "ALE can only use 1st and 2nd order time accuracy")
-          end select
-
-          if( allocated(coefTimeALE)) deallocate(coefTimeALE)
-          allocate(coefTimeALE(1:nALEsteps), stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("checkInputParam", &
-               "Memory allocation error for coefTimeALE")
-
-          if( allocated(coefMeshALE)) deallocate(coefMeshALE)
-          allocate(coefMeshALE(1:nALEMeshes,2), stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("checkInputParam", &
-               "Memory allocation error for coefMeshALE")
-       end if
-
-       !===============================================================
-
-    case (explicitRK)
-       nOldLevels = 1
-       call setStageCoeffExplicitRK
-
-
-    end select
-
-    ! Set the logicals whether or not the old solutions have been
-    ! written. Note that this is only used for the second and
-    ! higher order BDF schemes. However it is allocated with a
-    ! minimum size of 1 to avoid problems.
-
-    oldSolWrittenSize = max(nOldLevels-1_intType, 1_intType)
-
-    !check allocations for multipile succesive calls
-    if (allocated(oldSolWritten)) deallocate(oldSolWritten)
-    allocate(oldSolWritten(oldSolWrittenSize), stat=ierr)
-    if(ierr /= 0)                       &
-         call terminate("checkInputParam", &
-         "Memory allocation error for oldSolWritten")
-
-    do nn=1,oldSolWrittenSize
-       oldSolWritten(nn) = .false.
-    enddo
-    !
-    !       Determine the values of the runge kutta parameters, depending
-    !       on the number of stages specified.
-    !
-    ! Limit the number of stages between 1 and 6 and allocate the
-    ! memory.
-
-    nRKStages = min(6_intType,max(1_intType,nRKStages))
-
-    !check allocations for multipile succesive calls
-    if (allocated(etaRk)) deallocate(etaRk)
-    if (allocated(cdisRK)) deallocate(cdisRK)
-
-    allocate(etaRk(nRKStages), cdisRK(nRKStages), stat=ierr)
-    if(ierr /= 0) &
-         call terminate("checkInputParam", &
-         "Memory allocation error for etaRK and cdisRK")
-
-    ! Determine the case we are having here.
-
-    select case (nRKStages)
-    case (1_intType)
-       etaRK(1) = one
-
-       cdisRK(1) = one
-
-    case (2_intType)
-       etaRK(1) = 0.2222_realType
-       etaRK(2) = one
-
-       cdisRK(1) = one
-       cdisRK(2) = one
-
-    case (3_intType)
-       etaRK(1) = 0.2846_realType
-       etaRK(2) = 0.6067_realType
-       etaRK(3) = one
-
-       cdisRK(1) = one
-       cdisRK(2) = one
-       cdisRK(3) = one
-
-    case (4_intType)
-       etaRK(1) = 0.33333333_realType
-       etaRK(2) = 0.26666667_realType
-       etaRK(3) = 0.55555555_realType
-       etaRK(4) = one
-
-       cdisRK(1) = one
-       cdisRK(2) = half
-       cdisRK(3) = zero
-       cdisRK(4) = zero
-
-    case (5_intType)
-       etaRK(1) = fourth
-       etaRK(2) = 0.16666667_realType !1/6
-       etaRK(3) = 0.37500000_realType !3/8
-       etaRK(4) = half
-       etaRK(5) = one
-
-       cdisRK(1) = one
-       cdisRK(2) = zero
-       cdisRK(3) = 0.56_realType
-       cdisRK(4) = zero
-       cdisRK(5) = 0.44_realType
-
-    case (6_intType)
-       etaRK(1) = 0.0722_realType
-       etaRK(2) = 0.1421_realType
-       etaRK(3) = 0.2268_realType
-       etaRK(4) = 0.3425_realType
-       etaRK(5) = 0.5349_realType
-       etaRK(6) = one
-
-       cdisRK(1) = one
-       cdisRK(2) = one
-       cdisRK(3) = one
-       cdisRK(4) = one
-       cdisRK(5) = one
-       cdisRK(6) = one
-    end select
-    !
-    !       To avoid any problems later on, allocate the memory for the
-    !       rigid body motion parameters if these values were not present
-    !       in the parameter file.
-    !
-    if(.not. allocated(coefPolXRot) ) then
-       allocate(coefPolXRot(0:0), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("checkInputParam", &
-            "Memory allocation failure for coefPolXRot")
-       coefPolXRot = zero
-    endif
-
-    if(.not. allocated(coefPolYRot) ) then
-       allocate(coefPolYRot(0:0), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("checkInputParam", &
-            "Memory allocation failure for coefPolYRot")
-       coefPolYRot = zero
-    endif
-
-    if(.not. allocated(coefPolZRot) ) then
-       allocate(coefPolZRot(0:0), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("checkInputParam", &
-            "Memory allocation failure for coefPolZRot")
-       coefPolZRot = zero
-    endif
-
-    if(.not. allocated(cosCoefFourXRot) ) then
-       allocate(cosCoefFourXRot(0:0), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("checkInputParam", &
-            "Memory allocation failure for &
-            &cosCoefFourXRot")
-       cosCoefFourXRot = zero
-    endif
-
-    if(.not. allocated(sinCoefFourXRot) ) then
-       allocate(sinCoefFourXRot(1), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("checkInputParam", &
-            "Memory allocation failure for &
-            &sinCoefFourXRot")
-       sinCoefFourXRot = zero
-    endif
-
-    if(.not. allocated(cosCoefFourYRot) ) then
-       allocate(cosCoefFourYRot(0:0), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("checkInputParam", &
-            "Memory allocation failure for &
-            &cosCoefFourYRot")
-       cosCoefFourYRot = zero
-    endif
-
-    if(.not. allocated(sinCoefFourYRot) ) then
-       allocate(sinCoefFourYRot(1), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("checkInputParam", &
-            "Memory allocation failure for &
-            &sinCoefFourYRot")
-       sinCoefFourYRot = zero
-    endif
-
-    if(.not. allocated(cosCoefFourZRot) ) then
-       allocate(cosCoefFourZRot(0:0), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("checkInputParam", &
-            "Memory allocation failure for &
-            &cosCoefFourZRot")
-       cosCoefFourZRot = zero
-    endif
-
-    if(.not. allocated(sinCoefFourZRot) ) then
-       allocate(sinCoefFourZRot(1), stat=ierr)
-       if(ierr /= 0)                         &
+        if (gridPrecisionWarning .and. myID == 0) then
+            print "(a)", "#"
+            print "(a)", "#                      Warning"
+            print "(a)", "# Precision of the grid file to write is &
+                 &bigger than used in the computation."
+            print "(a)", "# This does not make sense and is a waste &
+                 &of disk space"
+            print "(a)", "#"
+        end if
+
+        if (solPrecisionWarning .and. myID == 0) then
+            print "(a)", "#"
+            print "(a)", "#                      Warning"
+            print "(a)", "# Precision of the solution file to write is &
+                 &bigger than used in the computation."
+            print "(a)", "# This does not make sense and is a waste &
+                 &of disk space"
+            print "(a)", "#"
+        end if
+        !
+        !       Wall functions can only be used if the RANS equations are to
+        !       be solved. If no wall functions are used the wall offset is
+        !       set to zero.
+        !
+        if (equations /= RANSEquations) wallFunctions = .false.
+        if (.not. wallFunctions) wallOffset = zero
+        !
+        !       Check whether or not the wall distance is needed for the
+        !       turbulence model.
+        !
+        if (equations == RANSEquations) then
+
+            ! RANS simulation. Determine if the turbulence model is
+            ! wall distance free. Note that updateWallDistanceUnsteady is
+            ! NOT overruled, because this is just the case for which this
+            ! parameter was intended.
+
+            select case (turbModel)
+            case (komegaWilcox, komegaModified, ktau)
+
+                ! Wall distance free turbulence models.
+
+                wallDistanceNeeded = .false.
+
+                !=============================================================
+
+            case default
+
+                ! The turbulence model needs the wall distance
+
+                wallDistanceNeeded = .true.
+
+            end select
+
+        else
+
+            ! Laminar or inviscid computation. Simply initialize the
+            ! logicals for the wall distance to .false.
+
+            wallDistanceNeeded = .false.
+            updateWallDistanceUnsteady = .false.
+
+        end if
+        !
+        !       Parallelization parameters. Set the minimum load imbalance to
+        !       3 percent to avoid any problems.
+        !
+        loadImbalance = max(loadImbalance, 0.03_realType)
+        !
+        !       Some default parameters, which depend on other parameters.
+        !       Only if these have not been specified of course.
+        !
+        if (nsgStartup < 0) nsgStartup = 0
+        if (ncyclesCoarse < 0) nCyclesCoarse = nCycles
+        if (cflCoarse < zero) cflCoarse = cfl
+        if (betaTurb < zero) betaTurb = alfaTurb
+
+        if (turbRelax == turbRelaxNotDefined) then
+            turbRelax = turbRelaxImplicit
+            if (turbModel == v2f) turbRelax = turbRelaxExplicit
+        end if
+
+        ! V2f should only be solved with explicit underrelaxation.
+
+        if (equations == RANSEquations .and. turbModel == v2f .and. &
+            turbRelax == turbRelaxImplicit) then
+
+            turbRelax = turbRelaxExplicit
+
+            if (myID == 0) then
+                print "(a)", "#"
+                print "(a)", "#                      Warning"
+                print "(a)", "# Implicit underrelaxation specified for &
+                     &the v2f model."
+                print "(a)", "# This is overwritten to explicit &
+                     &underrelaxation."
+                print "(a)", "#"
+            end if
+
+        end if
+
+        if (nsaveVolume <= 0) then
+            select case (equationMode)
+            case (steady, timeSpectral)
+                nSaveVolume = nCycles + nCyclesCoarse + nsgStartup + 1
+
+            case (unsteady)
+                nSaveVolume = nTimeStepsFine + nTimeStepsCoarse &
+                              + nTimeStepsRestart + 1
+            end select
+        end if
+
+        if (nsaveSurface <= 0) nSaveSurface = nSaveVolume
+
+        if (eddyVisInfRatio < zero) then
+
+            ! Default value depends on the turbulence model.
+
+            select case (turbModel)
+
+            case (spalartAllmaras, spalartAllmarasEdwards)
+                eddyVisInfRatio = 0.009_realType
+
+            case default
+                eddyVisInfRatio = 0.1_realType
+
+            end select
+        end if
+        !
+        !       Determine the number of old grid levels needed for the BDF
+        !       time integration of unsteady problems and allocate the memory
+        !       for the coefficients. The actual values are not yet set,
+        !       because in the first (and possibly second) time step a reduced
+        !       order must be used, because the older states are not available
+        !       yet. Also allocate the memory for the logicals to indicate
+        !       whether or not old solutions have been written.
+        !       If a Runge Kutta scheme must be used for the time integration,
+        !       either explicit or implicit, a separate routine is called to
+        !       set all the necessary variables.
+        !
+        select case (timeIntegrationScheme)
+        case (BDF, MD)
+
+            ! First check if the accuracy is okay.
+
+            if (timeAccuracy > thirdOrder) then
+                if (myID == 0) then
+                    print "(a)", "#"
+                    print "(a)", "#                      Warning"
+                    print "(a)", "# Maximum third order possible for BDF."
+                    print "(a)", "# Order has been reduced to third."
+                    print "(a)", "#"
+                end if
+
+                timeAccuracy = thirdOrder
+            end if
+
+            ! Determine the accuracy and set nOldLevels accordingly.
+
+            select case (timeAccuracy)
+            case (firstOrder)
+                nOldLevels = 1
+
+            case (secondOrder)
+                nOldLevels = 2
+
+            case (thirdOrder)
+                nOldLevels = 3
+            end select
+
+            ! Allocate the memory for coefTime.
+            if (allocated(coefTime)) deallocate (coefTime)
+            allocate (coefTime(0:nOldLevels), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("checkInputParam", &
+                               "Memory allocation error for coefTime")
+
+            ! Determine the accuracy and set ALE parameters accordingly.
+            if (useALE) then
+                select case (timeAccuracy)
+                case (firstOrder)
+                    nALEMeshes = 1
+                    nALESteps = 2
+
+                case (secondOrder)
+                    nALEMeshes = 2
+                    nALESteps = 4
+
+                case (thirdOrder)
+                    call terminate("checkInputParam", &
+                                   "ALE can only use 1st and 2nd order time accuracy")
+                end select
+
+                if (allocated(coefTimeALE)) deallocate (coefTimeALE)
+                allocate (coefTimeALE(1:nALEsteps), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("checkInputParam", &
+                                   "Memory allocation error for coefTimeALE")
+
+                if (allocated(coefMeshALE)) deallocate (coefMeshALE)
+                allocate (coefMeshALE(1:nALEMeshes, 2), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("checkInputParam", &
+                                   "Memory allocation error for coefMeshALE")
+            end if
+
+            !===============================================================
+
+        case (explicitRK)
+            nOldLevels = 1
+            call setStageCoeffExplicitRK
+
+        end select
+
+        ! Set the logicals whether or not the old solutions have been
+        ! written. Note that this is only used for the second and
+        ! higher order BDF schemes. However it is allocated with a
+        ! minimum size of 1 to avoid problems.
+
+        oldSolWrittenSize = max(nOldLevels - 1_intType, 1_intType)
+
+        !check allocations for multipile succesive calls
+        if (allocated(oldSolWritten)) deallocate (oldSolWritten)
+        allocate (oldSolWritten(oldSolWrittenSize), stat=ierr)
+        if (ierr /= 0) &
             call terminate("checkInputParam", &
-            "Memory allocation failure for &
-            &sinCoefFourZRot")
-       sinCoefFourZRot = zero
-    endif
-
-    ! Allocate the memory for cpmin_family. We had to wait until
-    ! nTimeIntervalsSpectral was set.
-    if(.not. allocated(cpmin_family) ) then
-       allocate(cpmin_family(nTimeIntervalsSpectral), stat=ierr)
-       if(ierr /= 0)                         &
+                           "Memory allocation error for oldSolWritten")
+
+        do nn = 1, oldSolWrittenSize
+            oldSolWritten(nn) = .false.
+        end do
+        !
+        !       Determine the values of the runge kutta parameters, depending
+        !       on the number of stages specified.
+        !
+        ! Limit the number of stages between 1 and 6 and allocate the
+        ! memory.
+
+        nRKStages = min(6_intType, max(1_intType, nRKStages))
+
+        !check allocations for multipile succesive calls
+        if (allocated(etaRk)) deallocate (etaRk)
+        if (allocated(cdisRK)) deallocate (cdisRK)
+
+        allocate (etaRk(nRKStages), cdisRK(nRKStages), stat=ierr)
+        if (ierr /= 0) &
             call terminate("checkInputParam", &
-            "Memory allocation failure for &
-            &cpmin_family")
-       cpmin_family = zero
-    endif
-
-  end subroutine checkInputParam
-  subroutine setDefaultValues
-    !
-    !       setDefaultValues sets the default values for the input
-    !       parameters where-ever possible. The parameters that must be
-    !       set by the user are initialized such a check can be performed
-    !       later.
-    !
-    use constants
-
-    !  --------- Bare imports...too many to list -------
-    use inputDiscretization
-    use inputIO
-    use inputIteration
-    use inputMotion
-    use inputOverset
-    use inputParallel
-    use inputPhysics
-    use inputTimeSpectral
-    use inputUnsteady
-    use inputADjoint
-    use inputTSStabDeriv
-    !  ------------------------------------------------
-    use flowVarRefState, only : Lref, lRefSpecified, pRef, rhoRef, &
-         TinfDim, Tref
-    use iteration, only : nOldSolAvail, timeSpectralGridsNotWritten
-    use monitor, only : monMassSliding, nTimeStepsRestart, timeUnsteadyRestart
-    use killSignals, only : fatalFail, routineFailed
-    use ADjointPETSc, only : adjointPETScVarsAllocated, adjointPETScPreProcVarsAllocated
-    use inputCostFunctions
-    implicit none
-
-    ! Initialize monitoring the turbulent residuals as well as the
-    ! monitoring of mass flow of the sliding interfaces to .false.
-
-    monDturb       = .false.
-    monMassSliding = .false.
-
-    ! Initialize the logicals to check whether or not monitoring,
-    ! surface output and volume output variables were specified to
-    ! .false.
-
-    monitorSpecified    = .false.
-    surfaceOutSpecified = .false.
-    volumeOutSpecified  = .false.
-    isoOutSpecified     = .false.
-    !
-    !       Set the default values for the discretization parameters.
-    !
-    spaceDiscr = none                  ! Serves as a check later on.
-    orderTurb  = firstOrder            ! First order discretization.
-    ! Of turbulent advective terms.
-    riemann     = Roe
-    limiter     = noLimiter            ! No limiter in upwind schemes.
-    precond     = noPrecond            ! No preconditioning.
-
-    eulerWallBCTreatment = normalMomentum   ! Normal momentum equation is
-    ! Used to determine ghost
-    ! cell pressure.
-
-    viscWallBCTreatment = constantPressure  ! Normal momentum equation is
-    ! Used to determine ghost
-    ! cell pressure.
-
-    outflowTreatment = constantExtrapol ! Constant extrapolation at
-    ! outflow boundaries.
-
-    spaceDiscrCoarse = none             ! Serves as a check. If nothing
-    riemannCoarse    = none             ! is specified the fine grid
-    ! parameter is taken.
-
-    nonMatchTreatment = NonConservative ! Non conservative treatment
-    ! of non-matching block to
-    ! block boundaries.
-
-    vortexCorr = .false.                ! No vortex correction is
-    ! applied.
-
-    vis2       = half
-    vis4       = one/64.0_realType
-    vis2Coarse = half
-
-    dirScaling = .true.                 ! Apply isotropic directional
-    adis       = two*third              ! scaling in the artificial
-    ! dissipation schemes.
-
-    hScalingInlet = .false.             ! No total enthalpy scaling.
-
-    kappaCoef = third
-    !
-    !       Set the default values for the IO-parameters.
-
-    gridFile       = ""          ! Serves as a check later on.
-
-    checkRestartSol = .true.     ! Restart solution is checked for
-    ! correct nonDimensionalization.
-
-    newGridFile = ""             ! This will be corrected later on
-    solFile     = ""             ! if nothing is specified. The
-    ! default names depend on the
-    ! format used
-
-    surfaceSolFile = ""          ! This will be corrected later if no
-    ! surface solution file is specified.
-
-    storeRindLayer = .True.     ! No halo cells in solution files.
-
-    autoParameterUpdate = .true. ! Update the input parameter file
-    ! when a restart file is written.
-    writeCoorMeter = .false.     ! Use original coordinate units
-    ! when writing solution files.
-
-    cpFile = ""                  ! Serves as a check later on.
-
-    storeConvInnerIter = .false. ! Do not store the convergence of
-                                 ! iterations(inner iterations in unsteady mode).
+                           "Memory allocation error for etaRK and cdisRK")
+
+        ! Determine the case we are having here.
+
+        select case (nRKStages)
+        case (1_intType)
+            etaRK(1) = one
+
+            cdisRK(1) = one
+
+        case (2_intType)
+            etaRK(1) = 0.2222_realType
+            etaRK(2) = one
+
+            cdisRK(1) = one
+            cdisRK(2) = one
+
+        case (3_intType)
+            etaRK(1) = 0.2846_realType
+            etaRK(2) = 0.6067_realType
+            etaRK(3) = one
+
+            cdisRK(1) = one
+            cdisRK(2) = one
+            cdisRK(3) = one
+
+        case (4_intType)
+            etaRK(1) = 0.33333333_realType
+            etaRK(2) = 0.26666667_realType
+            etaRK(3) = 0.55555555_realType
+            etaRK(4) = one
+
+            cdisRK(1) = one
+            cdisRK(2) = half
+            cdisRK(3) = zero
+            cdisRK(4) = zero
+
+        case (5_intType)
+            etaRK(1) = fourth
+            etaRK(2) = 0.16666667_realType !1/6
+            etaRK(3) = 0.37500000_realType !3/8
+            etaRK(4) = half
+            etaRK(5) = one
+
+            cdisRK(1) = one
+            cdisRK(2) = zero
+            cdisRK(3) = 0.56_realType
+            cdisRK(4) = zero
+            cdisRK(5) = 0.44_realType
+
+        case (6_intType)
+            etaRK(1) = 0.0722_realType
+            etaRK(2) = 0.1421_realType
+            etaRK(3) = 0.2268_realType
+            etaRK(4) = 0.3425_realType
+            etaRK(5) = 0.5349_realType
+            etaRK(6) = one
+
+            cdisRK(1) = one
+            cdisRK(2) = one
+            cdisRK(3) = one
+            cdisRK(4) = one
+            cdisRK(5) = one
+            cdisRK(6) = one
+        end select
+        !
+        !       To avoid any problems later on, allocate the memory for the
+        !       rigid body motion parameters if these values were not present
+        !       in the parameter file.
+        !
+        if (.not. allocated(coefPolXRot)) then
+            allocate (coefPolXRot(0:0), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("checkInputParam", &
+                               "Memory allocation failure for coefPolXRot")
+            coefPolXRot = zero
+        end if
+
+        if (.not. allocated(coefPolYRot)) then
+            allocate (coefPolYRot(0:0), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("checkInputParam", &
+                               "Memory allocation failure for coefPolYRot")
+            coefPolYRot = zero
+        end if
+
+        if (.not. allocated(coefPolZRot)) then
+            allocate (coefPolZRot(0:0), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("checkInputParam", &
+                               "Memory allocation failure for coefPolZRot")
+            coefPolZRot = zero
+        end if
+
+        if (.not. allocated(cosCoefFourXRot)) then
+            allocate (cosCoefFourXRot(0:0), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("checkInputParam", &
+                 "Memory allocation failure for &
+                 &cosCoefFourXRot")
+            cosCoefFourXRot = zero
+        end if
+
+        if (.not. allocated(sinCoefFourXRot)) then
+            allocate (sinCoefFourXRot(1), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("checkInputParam", &
+                 "Memory allocation failure for &
+                 &sinCoefFourXRot")
+            sinCoefFourXRot = zero
+        end if
+
+        if (.not. allocated(cosCoefFourYRot)) then
+            allocate (cosCoefFourYRot(0:0), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("checkInputParam", &
+                 "Memory allocation failure for &
+                 &cosCoefFourYRot")
+            cosCoefFourYRot = zero
+        end if
+
+        if (.not. allocated(sinCoefFourYRot)) then
+            allocate (sinCoefFourYRot(1), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("checkInputParam", &
+                 "Memory allocation failure for &
+                 &sinCoefFourYRot")
+            sinCoefFourYRot = zero
+        end if
+
+        if (.not. allocated(cosCoefFourZRot)) then
+            allocate (cosCoefFourZRot(0:0), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("checkInputParam", &
+                 "Memory allocation failure for &
+                 &cosCoefFourZRot")
+            cosCoefFourZRot = zero
+        end if
+
+        if (.not. allocated(sinCoefFourZRot)) then
+            allocate (sinCoefFourZRot(1), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("checkInputParam", &
+                 "Memory allocation failure for &
+                 &sinCoefFourZRot")
+            sinCoefFourZRot = zero
+        end if
+
+        ! Allocate the memory for cpmin_family. We had to wait until
+        ! nTimeIntervalsSpectral was set.
+        if (.not. allocated(cpmin_family)) then
+            allocate (cpmin_family(nTimeIntervalsSpectral), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("checkInputParam", &
+                 "Memory allocation failure for &
+                 &cpmin_family")
+            cpmin_family = zero
+        end if
+
+    end subroutine checkInputParam
+    subroutine setDefaultValues
+        !
+        !       setDefaultValues sets the default values for the input
+        !       parameters where-ever possible. The parameters that must be
+        !       set by the user are initialized such a check can be performed
+        !       later.
+        !
+        use constants
+
+        !  --------- Bare imports...too many to list -------
+        use inputDiscretization
+        use inputIO
+        use inputIteration
+        use inputMotion
+        use inputOverset
+        use inputParallel
+        use inputPhysics
+        use inputTimeSpectral
+        use inputUnsteady
+        use inputADjoint
+        use inputTSStabDeriv
+        !  ------------------------------------------------
+        use flowVarRefState, only: Lref, lRefSpecified, pRef, rhoRef, &
+                                   TinfDim, Tref
+        use iteration, only: nOldSolAvail, timeSpectralGridsNotWritten
+        use monitor, only: monMassSliding, nTimeStepsRestart, timeUnsteadyRestart
+        use killSignals, only: fatalFail, routineFailed
+        use ADjointPETSc, only: adjointPETScVarsAllocated, adjointPETScPreProcVarsAllocated
+        use inputCostFunctions
+        implicit none
+
+        ! Initialize monitoring the turbulent residuals as well as the
+        ! monitoring of mass flow of the sliding interfaces to .false.
+
+        monDturb = .false.
+        monMassSliding = .false.
+
+        ! Initialize the logicals to check whether or not monitoring,
+        ! surface output and volume output variables were specified to
+        ! .false.
+
+        monitorSpecified = .false.
+        surfaceOutSpecified = .false.
+        volumeOutSpecified = .false.
+        isoOutSpecified = .false.
+        !
+        !       Set the default values for the discretization parameters.
+        !
+        spaceDiscr = none                  ! Serves as a check later on.
+        orderTurb = firstOrder            ! First order discretization.
+        ! Of turbulent advective terms.
+        riemann = Roe
+        limiter = noLimiter            ! No limiter in upwind schemes.
+        precond = noPrecond            ! No preconditioning.
+
+        eulerWallBCTreatment = normalMomentum   ! Normal momentum equation is
+        ! Used to determine ghost
+        ! cell pressure.
+
+        viscWallBCTreatment = constantPressure  ! Normal momentum equation is
+        ! Used to determine ghost
+        ! cell pressure.
+
+        outflowTreatment = constantExtrapol ! Constant extrapolation at
+        ! outflow boundaries.
+
+        spaceDiscrCoarse = none             ! Serves as a check. If nothing
+        riemannCoarse = none             ! is specified the fine grid
+        ! parameter is taken.
+
+        nonMatchTreatment = NonConservative ! Non conservative treatment
+        ! of non-matching block to
+        ! block boundaries.
+
+        vortexCorr = .false.                ! No vortex correction is
+        ! applied.
+
+        vis2 = half
+        vis4 = one/64.0_realType
+        vis2Coarse = half
+
+        dirScaling = .true.                 ! Apply isotropic directional
+        adis = two*third              ! scaling in the artificial
+        ! dissipation schemes.
+
+        hScalingInlet = .false.             ! No total enthalpy scaling.
+
+        kappaCoef = third
+        !
+        !       Set the default values for the IO-parameters.
+
+        gridFile = ""          ! Serves as a check later on.
+
+        checkRestartSol = .true.     ! Restart solution is checked for
+        ! correct nonDimensionalization.
+
+        newGridFile = ""             ! This will be corrected later on
+        solFile = ""             ! if nothing is specified. The
+        ! default names depend on the
+        ! format used
+
+        surfaceSolFile = ""          ! This will be corrected later if no
+        ! surface solution file is specified.
+
+        storeRindLayer = .True.     ! No halo cells in solution files.
+
+        autoParameterUpdate = .true. ! Update the input parameter file
+        ! when a restart file is written.
+        writeCoorMeter = .false.     ! Use original coordinate units
+        ! when writing solution files.
+
+        cpFile = ""                  ! Serves as a check later on.
+
+        storeConvInnerIter = .false. ! Do not store the convergence of
+        ! iterations(inner iterations in unsteady mode).
 
 #ifdef USE_SINGLE_PRECISION
-    precisionGrid = precisionSingle   ! Default IO precision depends
-    precisionSol  = precisionSingle   ! on the default floating
-                                      ! point type used. Note that
+        precisionGrid = precisionSingle   ! Default IO precision depends
+        precisionSol = precisionSingle   ! on the default floating
+        ! point type used. Note that
 #else
-    precisionGrid = precisionDouble   ! for quadrupole precision the
-    precisionSol  = precisionDouble   ! IO takes place in double
-                                      ! precision.
+        precisionGrid = precisionDouble   ! for quadrupole precision the
+        precisionSol = precisionDouble   ! IO takes place in double
+        ! precision.
 #endif
 
-    ! Surface solution defaults to single precision
-    precisionSurfGrid = precisionSingle
-    precisionSurfSol  = precisionSingle
-
-    !
-    !       Set the default values for the iteration parameters.
-    !
-    nCycles       = -1    ! Serves as a check later on.
-    nsgStartup    =  0    ! No single grid startup iterations.
-    nSubIterTurb  =  0    ! No additional turbulent subiterations.
-    nUpdateBleeds = 50    ! Update the bleeds every 50 iterations.
-
-    nSaveVolume  = 1      ! Only save at the end of the computation.
-    nSaveSurface = 1
-
-    smoother  = none
-    nRKStages = 5
-    nSubiterations = 1
-
-    !resAveraging =  noResAveraging ! No residual averaging.
-    resAveraging =  noResAveraging
-    smoop        = 1.5_realType
-
-    turbTreatment     = decoupled      ! Decoupled solver for the
-    ! turbulent equations
-    turbSmoother      = adi            ! solved using an adi scheme.
-    freezeTurbSource = .true.          ! Freeze the coarse grid source
-    ! terms for a coupled solver.
-    turbRelax = turbRelaxNotDefined    ! Will be set later, depending
-    ! on the turbulence model.
-
-    cfl = -one                         ! Serves as a check later on.
-
-    relaxBleeds = 0.1_realType         ! Relaxation factor for the
-    ! bleed boundary conditions.
-
-    alfaTurb =  0.8_realType
-    betaTurb = -one                    ! Serves as a check later on.
-
-    L2Conv        = 1.e-6_realType     ! Six orders of magnitude for
-    ! convergence.
-    L2ConvCoarse = 1.e-2_realType      ! Only two on coarse grids in
-    ! full mg.
-
-    maxL2DeviationFactor = 1_realType
-    nCyclesCoarse = -1             ! If these parameters are not
-    cflCoarse     = -one           ! specified the corresponding fine
-    ! grid values are taken.
-
-    fcoll = one       ! No relaxation when restricting the residuals.
-
-    mgBoundCorr = bcDirichlet0 ! Zero out the boundary halo's for
-    ! the multigrid corrections.
-
-    mgStartlevel = -1    ! Start at the coarsest grid of the mg cycle
-    ! when no restart is performed.
-    mgDescription = "sg" ! Single grid computation.
-    !
-    !       Set the default values for the motion parameters,
-    !       i.e. no motion.
-    !
-    ! Translation data.
-
-
-    ! Rotation data.
-
-    rotPoint = zero
-
-    degreePolXRot = -1      ! -1, because the start index is 0.
-    degreePolYRot = -1
-    degreePolZRot = -1
-
-    degreeFourXRot = -1     ! -1, because the start index is 0,
-    ! at least of the cosine part.
-    degreeFourYRot = -1
-    degreeFourZRot = -1
-
-    omegaFourXRot = zero
-    omegaFourYRot = zero
-    omegaFourZRot = zero
-
-    ! The logical to determine whether or not a motion is specified.
-    ! Initialize it to .false.
-
-    gridMotionSpecified = .false.
-    !
-    !       Set the default values for the parallel parameters.
-    !
-    loadImbalance = 0.1_realType  ! Allow 10 percent load imbalance.
-    splitBlocks   = .true.        ! Allow the splitting of blocks to
-    ! obtain a better load balancing.
-    loadbalanceiter = 2           ! Do two iterations
-    !
-    !       Set the default values for the physics parameters.
-    !
-    equations     = none       ! These are parameters that must be
-    equationMode = none        ! specified. If not, the program
-    flowType     = none        ! exits.
-    turbModel    = none
-
-    cpModel = cpConstant       ! Constant cp.
-
-    turbProd = strain          ! Strain is used in the production
-    ! term of transport turbulence models.
-
-    wallFunctions = .false.    ! No wall functions used.
-
-    Mach     = -one            ! Both parameters must be specified
-    Reynolds = -one            ! for external flows. The -1. serves
-    ! as a check later on.
-
-    MachCoef = -one            ! If not specified MachCoef will
-    ! be set to Mach.
-
-    velDirFreestream(1) = one  ! Free stream velocity
-    velDirFreestream(2) = zero ! is specified in the
-    velDirFreestream(3) = zero ! x-axis direction.
-
-    liftDirSpecified = .false. ! Lift direction not specified.
-
-    ReynoldsLength = one
-    TinfDim = 288.15_realType
-    gammaConstant  = 1.4_realType
-    RGasDim        = 287.87_realType
-
-    prandtl     = 0.72_realType
-    prandtlTurb = 0.90_realType
-    pklim       = 20.0_realType
-    wallOffset  = zero
-
-    SSuthDim    = 110.55_realType
-    muSuthDim   = 1.716e-5_realType
-    TSuthDim    = 273.15_realType
-
-    rvfN = 1                             ! Version 1 of the v2f
-    ! model is used.
-    rvfB = .true.                        ! An upper bound is used
-    ! in the v2f scales.
-    eddyVisInfRatio = -one               ! Default value depends on
-    ! the turbulence model.
-    turbIntensityInf = 0.001_realType
-
-    surfaceRef = one
-    lengthRef  = one
-
-    pointRef(1) = zero
-    pointRef(2) = zero
-    pointRef(3) = zero
-
-    momentAxis(1,1) = zero
-    momentAxis(1,2) = one
-    momentAxis(2,1) = zero
-    momentAxis(2,2) = zero
-    momentAxis(3,1) = zero
-    momentAxis(3,2) = zero
-
-    !
-    !       Set the default values for the time spectral parameters.
-    !
-    nTimeIntervalsSpectral   = -1   ! Serves as a check later on.
-
-    nUnsteadySolSpectral = -1       ! Serves as a check later on.
-
-    writeUnsteadyVolSpectral  = .false. ! No writing of the files
-    writeUnsteadySurfSpectral = .false. ! for postprocessing.
-
-    writeUnsteadyRestartSpectral = .false. ! No writing of an unsteady
-    ! mode restart file.
-
-    dtUnsteadyRestartSpectral    = -one    ! Is checked later on.
-    !
-    !       Set the default values for the unsteady parameters.
-    !
-    timeAccuracy = secondOrder  ! Second order time accuracy.
-
-    nTimeStepsCoarse = -1       ! Serves as a check later on.
-    nTimeStepsFine   = -1       ! Serves as a check later on.
-
-    deltaT = -one               ! Serves as a check later on.
-
-    useALE = .True.             ! Use the ALE scheme by default.
-
-    updateWallDistanceUnsteady = .true.  ! This default value is
-    ! overruled for models that
-    ! are wall distance free.
-    !
-    !       The reference state variables. Set them to -1, such that they
-    !       can be checked later on.
-    !
-    pRef   = -one
-    rhoRef = -one
-    TRef   = -one
-    !
-    !       The conversion factor of the grid units to meters. Default 1.
-    !
-    LRef           = one
-    LRefSpecified = .false.
-    !
-    !       Initialization of some unsteady restart parameters. These will
-    !       be overwritten when an actual unsteady restart is performed.
-    !
-    nOldSolAvail        = 1
-    nTimeStepsRestart   = 0
-    timeUnsteadyRestart = zero
-    !
-    !       Variables needed for the writing of grid and solution files.
-    !
-    timeSpectralGridsNotWritten = .true.
-
-    ! Additional Paramters Requiring Defaults
-    printIterations = .True.
-    routineFailed = .False.
-    fatalFail     = .False.
-    lumpedDiss    = .False.
-    approxSA      = .False.
-    useApproxWallDistance = .False.
-    cflLimit = 3.0
-    adjointPETScVarsAllocated = .False.
-    adjointPETScPreProcVarsAllocated = .False.
-    usematrixfreedrdw = .False.
-    sepSensorOffset = zero
-    sepSensorSharpness = 10_realType
-  end subroutine setDefaultValues
-
-  subroutine initializeIsoSurfaceVariables(values, nValues)
-    !
-    !       isoVariables extracts from the given string the extra
-    !       iso surface variables to be written to the solution file.
-    !
-    use constants
-    use extraOutput, only : isoValues, isoSurfaceNames, nIsoSurface
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nValues
-    real(kind=realType), dimension(nValues), intent(in) :: values
-
-    ! Basically just copy into module
-    if (allocated(isoValues)) then
-       deallocate(isoValues)
-    end if
-
-    if (allocated(isoSurfaceNames)) then
-       deallocate(isoSurfaceNames)
-    end if
-
-    nIsoSurface = nValues
-    allocate(isoValues(nIsoSurface))
-    allocate(isoSurfaceNames(nIsoSurface))
-
-    isoValues = values
-
-  end subroutine initializeIsoSurfaceVariables
-
-  subroutine setIsoSurfaceVariable(variable, iVar)
-
-    ! Set variable to iVar. initializeIsoSurfaceVariables MUST be called
-    ! first with the desired number of values to set.
-
-    use constants
-    use cgnsNames
-    use extraOutput
-    use communication, only : myID
-    use utils, only : EChk
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    character(len=*), intent(in):: variable
-    integer(kind=intType) :: iVar
-
-    select case (variable)
-    case("rho")
-       isoSurfaceNames(iVar) = cgnsDensity
-    case("vx")
-       isoSurfaceNames(iVar) = cgnsVelX
-    case("vy")
-       isoSurfaceNames(iVar) = cgnsVelY
-    case("vz")
-       isoSurfaceNames(iVar) = cgnsVelZ
-    case("P")
-       isoSurfaceNames(iVar) = cgnsPressure
-    case ("mx")
-       isoSurfaceNames(iVar) = cgnsMomX
-    case ("my")
-       isoSurfaceNames(iVar) = cgnsMomY
-    case ("mz")
-       isoSurfaceNames(iVar) = cgnsMomZ
-    case ("rvx")
-       isoSurfaceNames(iVar) = cgnsRelVelX
-    case ("rvy")
-       isoSurfaceNames(iVar) = cgnsRelVelY
-    case ("rvz")
-       isoSurfaceNames(iVar) = cgnsRelVelZ
-    case ("rhoe")
-       isoSurfaceNames(iVar) = cgnsEnergy
-    case ("temp")
-       isoSurfaceNames(iVar) = cgnsTemp
-    case ("vort")
-       isoSurfaceNames(iVar) = cgnsVortMagn
-    case ("vortx")
-       isoSurfaceNames(iVar) = cgnsVortX
-    case ("vorty")
-       isoSurfaceNames(iVar) = cgnsVortY
-    case ("vortz")
-       isoSurfaceNames(iVar) = cgnsVortZ
-    case ("cp")
-       isoSurfaceNames(iVar) = cgnsCp
-    case ("mach")
-       isoSurfaceNames(iVar) = cgnsMach
-    case ("rmach")
-       isoSurfaceNames(iVar) = cgnsRelMach
-    case ("macht")
-       isoSurfaceNames(iVar) = cgnsMachTurb
-    case ("ptloss")
-       isoSurfaceNames(iVar) = cgnsPTotLoss
-    case ("eddy")
-       isoSurfaceNames(iVar) = cgnsEddy
-    case ("eddyratio")
-       isoSurfaceNames(iVar) = cgnsEddyRatio
-    case ("dist")
-       isoSurfaceNames(iVar) = cgnsWallDist
-    case ("resrho")
-       isoSurfaceNames(iVar) = cgnsResRho
-    case("shock")
-       isoSurfaceNames(iVar) = cgnsShock
-    case("filteredShock")
-       isoSurfaceNames(iVar) = cgnsFilteredShock
-    case default
-
-       if(myID == 0) Then
-          print *,'Error: ', variable, 'cannot be used as an isoSurface'
-       end if
-       call EChk(-99, __FILE__, __LINE__)
-    end select
-  end subroutine setIsoSurfaceVariable
+        ! Surface solution defaults to single precision
+        precisionSurfGrid = precisionSingle
+        precisionSurfSol = precisionSingle
+
+        !
+        !       Set the default values for the iteration parameters.
+        !
+        nCycles = -1    ! Serves as a check later on.
+        nsgStartup = 0    ! No single grid startup iterations.
+        nSubIterTurb = 0    ! No additional turbulent subiterations.
+        nUpdateBleeds = 50    ! Update the bleeds every 50 iterations.
+
+        nSaveVolume = 1      ! Only save at the end of the computation.
+        nSaveSurface = 1
+
+        smoother = none
+        nRKStages = 5
+        nSubiterations = 1
+
+        !resAveraging =  noResAveraging ! No residual averaging.
+        resAveraging = noResAveraging
+        smoop = 1.5_realType
+
+        turbTreatment = decoupled      ! Decoupled solver for the
+        ! turbulent equations
+        turbSmoother = adi            ! solved using an adi scheme.
+        freezeTurbSource = .true.          ! Freeze the coarse grid source
+        ! terms for a coupled solver.
+        turbRelax = turbRelaxNotDefined    ! Will be set later, depending
+        ! on the turbulence model.
+
+        cfl = -one                         ! Serves as a check later on.
+
+        relaxBleeds = 0.1_realType         ! Relaxation factor for the
+        ! bleed boundary conditions.
+
+        alfaTurb = 0.8_realType
+        betaTurb = -one                    ! Serves as a check later on.
+
+        L2Conv = 1.e-6_realType     ! Six orders of magnitude for
+        ! convergence.
+        L2ConvCoarse = 1.e-2_realType      ! Only two on coarse grids in
+        ! full mg.
+
+        maxL2DeviationFactor = 1_realType
+        nCyclesCoarse = -1             ! If these parameters are not
+        cflCoarse = -one           ! specified the corresponding fine
+        ! grid values are taken.
+
+        fcoll = one       ! No relaxation when restricting the residuals.
+
+        mgBoundCorr = bcDirichlet0 ! Zero out the boundary halo's for
+        ! the multigrid corrections.
+
+        mgStartlevel = -1    ! Start at the coarsest grid of the mg cycle
+        ! when no restart is performed.
+        mgDescription = "sg" ! Single grid computation.
+        !
+        !       Set the default values for the motion parameters,
+        !       i.e. no motion.
+        !
+        ! Translation data.
+
+        ! Rotation data.
+
+        rotPoint = zero
+
+        degreePolXRot = -1      ! -1, because the start index is 0.
+        degreePolYRot = -1
+        degreePolZRot = -1
+
+        degreeFourXRot = -1     ! -1, because the start index is 0,
+        ! at least of the cosine part.
+        degreeFourYRot = -1
+        degreeFourZRot = -1
+
+        omegaFourXRot = zero
+        omegaFourYRot = zero
+        omegaFourZRot = zero
+
+        ! The logical to determine whether or not a motion is specified.
+        ! Initialize it to .false.
+
+        gridMotionSpecified = .false.
+        !
+        !       Set the default values for the parallel parameters.
+        !
+        loadImbalance = 0.1_realType  ! Allow 10 percent load imbalance.
+        splitBlocks = .true.        ! Allow the splitting of blocks to
+        ! obtain a better load balancing.
+        loadbalanceiter = 2           ! Do two iterations
+        !
+        !       Set the default values for the physics parameters.
+        !
+        equations = none       ! These are parameters that must be
+        equationMode = none        ! specified. If not, the program
+        flowType = none        ! exits.
+        turbModel = none
+
+        cpModel = cpConstant       ! Constant cp.
+
+        turbProd = strain          ! Strain is used in the production
+        ! term of transport turbulence models.
+
+        wallFunctions = .false.    ! No wall functions used.
+
+        Mach = -one            ! Both parameters must be specified
+        Reynolds = -one            ! for external flows. The -1. serves
+        ! as a check later on.
+
+        MachCoef = -one            ! If not specified MachCoef will
+        ! be set to Mach.
+
+        velDirFreestream(1) = one  ! Free stream velocity
+        velDirFreestream(2) = zero ! is specified in the
+        velDirFreestream(3) = zero ! x-axis direction.
+
+        liftDirSpecified = .false. ! Lift direction not specified.
+
+        ReynoldsLength = one
+        TinfDim = 288.15_realType
+        gammaConstant = 1.4_realType
+        RGasDim = 287.87_realType
+
+        prandtl = 0.72_realType
+        prandtlTurb = 0.90_realType
+        pklim = 20.0_realType
+        wallOffset = zero
+
+        SSuthDim = 110.55_realType
+        muSuthDim = 1.716e-5_realType
+        TSuthDim = 273.15_realType
+
+        rvfN = 1                             ! Version 1 of the v2f
+        ! model is used.
+        rvfB = .true.                        ! An upper bound is used
+        ! in the v2f scales.
+        eddyVisInfRatio = -one               ! Default value depends on
+        ! the turbulence model.
+        turbIntensityInf = 0.001_realType
+
+        surfaceRef = one
+        lengthRef = one
+
+        pointRef(1) = zero
+        pointRef(2) = zero
+        pointRef(3) = zero
+
+        momentAxis(1, 1) = zero
+        momentAxis(1, 2) = one
+        momentAxis(2, 1) = zero
+        momentAxis(2, 2) = zero
+        momentAxis(3, 1) = zero
+        momentAxis(3, 2) = zero
+
+        !
+        !       Set the default values for the time spectral parameters.
+        !
+        nTimeIntervalsSpectral = -1   ! Serves as a check later on.
+
+        nUnsteadySolSpectral = -1       ! Serves as a check later on.
+
+        writeUnsteadyVolSpectral = .false. ! No writing of the files
+        writeUnsteadySurfSpectral = .false. ! for postprocessing.
+
+        writeUnsteadyRestartSpectral = .false. ! No writing of an unsteady
+        ! mode restart file.
+
+        dtUnsteadyRestartSpectral = -one    ! Is checked later on.
+        !
+        !       Set the default values for the unsteady parameters.
+        !
+        timeAccuracy = secondOrder  ! Second order time accuracy.
+
+        nTimeStepsCoarse = -1       ! Serves as a check later on.
+        nTimeStepsFine = -1       ! Serves as a check later on.
+
+        deltaT = -one               ! Serves as a check later on.
+
+        useALE = .True.             ! Use the ALE scheme by default.
+
+        updateWallDistanceUnsteady = .true.  ! This default value is
+        ! overruled for models that
+        ! are wall distance free.
+        !
+        !       The reference state variables. Set them to -1, such that they
+        !       can be checked later on.
+        !
+        pRef = -one
+        rhoRef = -one
+        TRef = -one
+        !
+        !       The conversion factor of the grid units to meters. Default 1.
+        !
+        LRef = one
+        LRefSpecified = .false.
+        !
+        !       Initialization of some unsteady restart parameters. These will
+        !       be overwritten when an actual unsteady restart is performed.
+        !
+        nOldSolAvail = 1
+        nTimeStepsRestart = 0
+        timeUnsteadyRestart = zero
+        !
+        !       Variables needed for the writing of grid and solution files.
+        !
+        timeSpectralGridsNotWritten = .true.
+
+        ! Additional Paramters Requiring Defaults
+        printIterations = .True.
+        routineFailed = .False.
+        fatalFail = .False.
+        lumpedDiss = .False.
+        approxSA = .False.
+        useApproxWallDistance = .False.
+        cflLimit = 3.0
+        adjointPETScVarsAllocated = .False.
+        adjointPETScPreProcVarsAllocated = .False.
+        usematrixfreedrdw = .False.
+        sepSensorOffset = zero
+        sepSensorSharpness = 10_realType
+    end subroutine setDefaultValues
+
+    subroutine initializeIsoSurfaceVariables(values, nValues)
+        !
+        !       isoVariables extracts from the given string the extra
+        !       iso surface variables to be written to the solution file.
+        !
+        use constants
+        use extraOutput, only: isoValues, isoSurfaceNames, nIsoSurface
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nValues
+        real(kind=realType), dimension(nValues), intent(in) :: values
+
+        ! Basically just copy into module
+        if (allocated(isoValues)) then
+            deallocate (isoValues)
+        end if
+
+        if (allocated(isoSurfaceNames)) then
+            deallocate (isoSurfaceNames)
+        end if
+
+        nIsoSurface = nValues
+        allocate (isoValues(nIsoSurface))
+        allocate (isoSurfaceNames(nIsoSurface))
+
+        isoValues = values
+
+    end subroutine initializeIsoSurfaceVariables
+
+    subroutine setIsoSurfaceVariable(variable, iVar)
+
+        ! Set variable to iVar. initializeIsoSurfaceVariables MUST be called
+        ! first with the desired number of values to set.
+
+        use constants
+        use cgnsNames
+        use extraOutput
+        use communication, only: myID
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        character(len=*), intent(in):: variable
+        integer(kind=intType) :: iVar
+
+        select case (variable)
+        case ("rho")
+            isoSurfaceNames(iVar) = cgnsDensity
+        case ("vx")
+            isoSurfaceNames(iVar) = cgnsVelX
+        case ("vy")
+            isoSurfaceNames(iVar) = cgnsVelY
+        case ("vz")
+            isoSurfaceNames(iVar) = cgnsVelZ
+        case ("P")
+            isoSurfaceNames(iVar) = cgnsPressure
+        case ("mx")
+            isoSurfaceNames(iVar) = cgnsMomX
+        case ("my")
+            isoSurfaceNames(iVar) = cgnsMomY
+        case ("mz")
+            isoSurfaceNames(iVar) = cgnsMomZ
+        case ("rvx")
+            isoSurfaceNames(iVar) = cgnsRelVelX
+        case ("rvy")
+            isoSurfaceNames(iVar) = cgnsRelVelY
+        case ("rvz")
+            isoSurfaceNames(iVar) = cgnsRelVelZ
+        case ("rhoe")
+            isoSurfaceNames(iVar) = cgnsEnergy
+        case ("temp")
+            isoSurfaceNames(iVar) = cgnsTemp
+        case ("vort")
+            isoSurfaceNames(iVar) = cgnsVortMagn
+        case ("vortx")
+            isoSurfaceNames(iVar) = cgnsVortX
+        case ("vorty")
+            isoSurfaceNames(iVar) = cgnsVortY
+        case ("vortz")
+            isoSurfaceNames(iVar) = cgnsVortZ
+        case ("cp")
+            isoSurfaceNames(iVar) = cgnsCp
+        case ("mach")
+            isoSurfaceNames(iVar) = cgnsMach
+        case ("rmach")
+            isoSurfaceNames(iVar) = cgnsRelMach
+        case ("macht")
+            isoSurfaceNames(iVar) = cgnsMachTurb
+        case ("ptloss")
+            isoSurfaceNames(iVar) = cgnsPTotLoss
+        case ("eddy")
+            isoSurfaceNames(iVar) = cgnsEddy
+        case ("eddyratio")
+            isoSurfaceNames(iVar) = cgnsEddyRatio
+        case ("dist")
+            isoSurfaceNames(iVar) = cgnsWallDist
+        case ("resrho")
+            isoSurfaceNames(iVar) = cgnsResRho
+        case ("shock")
+            isoSurfaceNames(iVar) = cgnsShock
+        case ("filteredShock")
+            isoSurfaceNames(iVar) = cgnsFilteredShock
+        case default
+
+            if (myID == 0) Then
+                print *, 'Error: ', variable, 'cannot be used as an isoSurface'
+            end if
+            call EChk(-99, __FILE__, __LINE__)
+        end select
+    end subroutine setIsoSurfaceVariable
 
 end module inputParamRoutines
diff --git a/src/modules/block.F90 b/src/modules/block.F90
index 4b185e3ba..b5db7f123 100644
--- a/src/modules/block.F90
+++ b/src/modules/block.F90
@@ -1,1120 +1,1102 @@
 module block
-  !
-  !       This module contains the definition of the derived data type
-  !       for block, which is the basic building block for this code.
-  !       Apart from the derived data type for block, this module also
-  !       contains the actual array for storing the blocks and the
-  !       number of blocks stored on this processor.
-  !
-  use constants, only : realType, intType, porType, maxCGNSNameLen, &
-       sortByDonor, sortByReceiver
-  implicit none
-  save
-
-  !
-  !       The definition of the derived data type visc_subface_type,
-  !       which stores the viscous stress tensor and heat flux vector.
-  !       In this way it is avoided that these quantities must be
-  !       recomputed for the viscous forces and postprocessing. This
-  !       saves both time and a considerable amount of code.
-  !
-  type viscSubfaceType
-
-     ! tau(:,:,6): The 6 components of the viscous stress tensor.
-     !             The first 2 dimensions of these arrays are equal
-     !             to the dimenions of the cell subface without any
-     !             halo cell. Consequently the starting index is
-     !             arbitrary, such that no offset computation is
-     !             needed when the arrays are accessed.
-     ! q(:,:,3):   Same story for the heat flux vector.
-     ! uTau(:,:):  And for the friction velocity.
-
-     real(kind=realType), dimension(:,:,:), pointer :: tau, q
-     real(kind=realType), dimension(:,:),   pointer :: uTau
-
-  end type viscSubfaceType
-
-  type rPtr
-     real(kind=realType), dimension(:, :, :), pointer :: var
-  end type rPtr
-
-  type iPtr
-     integer(kind=intType), dimension(:, :, :), pointer :: var
-  end type iPtr
-  !
-  !       The definition of the derived data type BCDataType, which
-  !       stores the prescribed data of boundary faces as well as unit
-  !       normals. For all the arrays the first two dimensions equal the
-  !       dimensions of the subface, possibly extended with halo cells.
-  !       Consequently the starting index is arbitrary, such that no
-  !       offset computation is needed when the array is accessed.
-  !
-  type BCDataType
-
-     ! inBeg, inEnd: Node range in the first direction of the subface
-     ! jnBeg, jnEnd: Idem in the second direction.
-     ! icBeg, icEnd: Cell range in the first direction of the subface
-     ! jcBeg, jcEnd: Idem in the second direction.
-
-     integer(kind=intType) :: inBeg, inEnd, jnBeg, jnEnd
-     integer(kind=intType) :: icBeg, icEnd, jcBeg, jcEnd
-
-     ! norm(:,:,3):  The unit normal; it points out of the domain.
-     ! rface(:,:):   Velocity of the face in the direction of the
-     !               outward pointing normal. only allocated for
-     !               the boundary conditions that need this info.
-
-     real(kind=realType), dimension(:,:,:), pointer :: norm
-     real(kind=realType), dimension(:,:),   pointer :: rface
-     real(kind=realType), dimension(:,:,:), pointer :: F, Fv, Fp
-     real(kind=realType), dimension(:,:,:), pointer :: T, Tv, Tp
-     real(kind=realType), dimension(:,:), pointer :: area
-     real(kind=realType), dimension(:,:), pointer :: CpTarget
-     integer(kind=realType), dimension(:,:), pointer :: surfIndex
-
-     ! Generic pointers for performing a globalized reduction.
-     real(kind=realType), dimension(:, :), pointer :: nodeVal
-     real(kind=realType), dimension(:, :), pointer :: cellVal
-
-     ! symNorm is the normal for (symmertry) boundary conditions.
-     ! symNormSet is set to false until symNorm is computed at the
-     ! beginning of a simulation. symNorm then remains constant for
-     ! the remainder of the simulation. This is ok, since if the
-     ! normal of the symmetry plane is changing, your results are
-     ! invalid anyway.  These values are only used on symmetry
-     ! plane. They are undefined for other BC's.
-     real(kind=realType), dimension(3) :: symNorm
-
-     logical :: symNormSet
-
-     ! subsonicInletTreatment: which boundary condition treatment
-     !                         to use for subsonic inlets; either
-     !                         totalConditions or massFlow.
-
-     integer(kind=intType) :: subsonicInletTreatment
-
-     ! uSlip(:,:,3):  the 3 components of the velocity vector on
-     !                a viscous wall.
-     ! TNS_Wall(:,:): Wall temperature for isothermal walls.
-     ! ksNS_Wall(:,:): Equivalent Sand Grain Roughness on viscous walls.
-
-     real(kind=realType), dimension(:,:,:), pointer :: uSlip
-     real(kind=realType), dimension(:,:),   pointer :: TNS_Wall
-     real(kind=realType), dimension(:,:),   pointer :: ksNS_Wall
-
-     ! The name of this boundary condition and it's index
-     character(maxCGNSNameLen) :: family
-     integer(kind=intType) :: famID
-
-     ! Added by HDN
-     ! normALE(0:nALEsteps,ie:ib,je:jb,3)
-     !                 - Storage of norm for intermediate meshes.
-     ! rFaceALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd)
-     !                 - Storage of rface for intermediate meshes.
-     ! uSlipALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd,3)
-     !                 - Storage of uSlip for intermediate meshes.
-     ! cellHeatFlux(iBeg:iEnd,jBeg:jEnd,3)
-     !                 - Surface heat flux (cell based/node based).
-     real(kind=realType), dimension(:,:,:,:), pointer :: normALE
-     real(kind=realType), dimension(:,:,:),   pointer :: rFaceALE
-     real(kind=realType), dimension(:,:,:,:), pointer :: uSlipALE
-     real(kind=realType), dimension(:,:),     pointer :: nodeHeatFlux
-     real(kind=realType), dimension(:,:),     pointer :: cellHeatFlux
-
-     ! ptInlet(:,:):       Total pressure at subsonic inlets.
-     ! ttInlet(:,:):       Total temperature at subsonic inlets.
-     ! htInlet(:,:):       Total enthalpy at subsonic inlets.
-     ! flowXDirInlet(:,:): X-direction of the flow for subsonic
-     !                     inlets.
-     ! flowYDirInlet(:,:): Idem in y-direction.
-     ! flowZDirInlet(:,:): Idem in z-direction.
-
-     real(kind=realType), dimension(:,:), pointer :: ptInlet, ttInlet, htInlet
-     real(kind=realType), dimension(:,:), pointer :: flowXDirInlet, flowYDirInlet, flowZDirInlet
-
-     ! turbInlet(:,:,nt1:nt2): Turbulence variables at inlets,
-     !                         either subsonic or supersonic.
-
-     real(kind=realType), dimension(:,:,:), pointer :: turbInlet
-
-     ! rho(:,:):  density; used for multiple bc's.
-     ! velX(:,:): x-velocity; used for multiple bc's.
-     ! velY(:,:): y-velocity; used for multiple bc's.
-     ! velZ(:,:): z-velocity; used for multiple bc's.
-     ! ps(:,:):   static pressure; used for multiple bc's.
-
-     real(kind=realType), dimension(:,:), pointer :: rho
-     real(kind=realType), dimension(:,:), pointer :: velX, velY, velZ
-     real(kind=realType), dimension(:,:), pointer :: ps
-
-     ! Surface blanking for force integration
-     integer(kind=intType), dimension(:,:), pointer :: iblank
-
-     ! Surface deviation. This is an estimate of how much the surface
-     ! deviates from the "real" underlying surface'
-     real(Kind=realType), dimension(:, :), pointer :: delta
-     real(Kind=realType), dimension(:, :), pointer :: deltaNode
-
-  end type BCDataType
-
-  type surfaceNodeWeightArray
-     real(kind=realType), dimension(:, :, :), pointer :: weight
-  end type surfaceNodeWeightArray
-
- type fringeType
-
-     ! Make everything in here static such that we can easily copy the
-     ! datatype and use MPI to communicate them directly. This data
-     ! type bears some resemblence to the haloList type used for the
-     ! B2B preprocessing.
-
-    ! The quality metric for the fringe
-    real(kind=realType) :: quality
-
-    ! This is the information regarding where the cell came from.
-    integer(kind=intType) :: myBlock, myIndex
-
-    ! This is the information about the donor that was found
-    integer(kind=intType) :: donorProc, donorBlock, dIndex
-    real(kind=realType) :: donorFrac(3)
-
-  end type fringeType
-
-  interface operator(<=)
-     module procedure lessEqualFringeType
-  end interface operator(<=)
-
-  interface operator(<)
-     module procedure lessFringeType
-  end interface operator(<)
-
-  type interpPtType
-     integer(kind=intType) :: donorProc, donorBlock, dI, dJ, dK, myBlock
-     real(kind=realType) :: donorFrac(3)
-  end type interpPtType
-
-  interface operator(<=)
-     module procedure lessEqualinterpPtType
-  end interface operator(<=)
-
-  interface operator(<)
-     module procedure lessInterpPtType
-  end interface operator(<)
-
-
-
-  !       The definition of the derived data type block_type, which
-  !       stores dimensions, coordinates, solution, etc.
-  !
-  type blockType
-     !
-     !         Block dimensions and orientation.
-     !
-     !  nx, ny, nz - Block integer dimensions for no halo cell based
-     !               quantities.
-     !  il, jl, kl - Block integer dimensions for no halo node based
-     !               quantities.
-     !  ie, je, ke - Block integer dimensions for single halo
-     !               cell-centered quantities.
-     !  ib, jb, kb - Block integer dimensions for double halo
-     !               cell-centered quantities.
-     ! rightHanded - Whether or not the block is a right handed.
-     !               If not right handed it is left handed of course.
-
-     integer(kind=intType) :: nx, ny, nz
-     integer(kind=intType) :: il, jl, kl
-     integer(kind=intType) :: ie, je, ke
-     integer(kind=intType) :: ib, jb, kb
-
-     logical :: rightHanded
-     !
-     !         Block boundary conditions.
-     !
-     !  nSubface             - Number of subfaces on this block.
-     !  n1to1                - Number of 1 to 1 block boundaries.
-     !  nBocos               - Number of physical boundary subfaces.
-     !  nViscBocos           - Number of viscous boundary subfaces.
-     !  BCType(:)            - Boundary condition type for each
-     !                         subface. See the module BCTypes for
-     !                         the possibilities.
-     !  BCFaceID(:)          - Block face location of each subface.
-     !                         possible values are: iMin, iMax, jMin,
-     !                         jMax, kMin, kMax. see also module
-     !                         BCTypes.
-     !  cgnsSubface(:)       - The subface in the corresponding cgns
-     !                         block. As cgns distinguishes between
-     !                         boundary and internal boundaries, the
-     !                         BCType of the subface is needed to
-     !                         know which one to take.
-     !  inBeg(:), inEnd(:)   - Lower and upper limits for the nodes
-     !  jnBeg(:), jnEnd(:)     in each of the index directions on a
-     !  knBeg(:), knEnd(:)     given subface. Note that one of these
-     !                         indices will not change since we will
-     !                         be moving on a face.
-     !  dinBeg(:), dinEnd(:) - Lower and upper limits for the nodes
-     !  djnBeg(:), djnEnd(:)   in the each of the index directions
-     !  dknBeg(:), dknEnd(:)   of the donor subface for this
-     !                         particular subface. Note that one of
-     !                         these indices will not change since we
-     !                         will be moving on a face.
-     !  icBeg(:), icEnd(:)   - Lower and upper limits for the cells
-     !  jcBeg(:), jcEnd(:)     in each of the index directions for
-     !  kcBeg(:), kcEnd(:)     the subface. The cells indicated by
-     !                         this range are halo cells (the
-     !                         constant index) adjacent to the face.
-     !                         a possible overlap outside the block
-     !                         is stored.
-     !  neighBlock(:)        - Local block number to which this
-     !                         subface connects. This value is set to
-     !                         zero if this subface is not connected
-     !                         to another block.
-     !  neighProc(:)         - Processor number where the neighbor
-     !                         block is stored. This value is set to
-     !                         -1 if this subface is not connected
-     !                         to another block.
-     !  l1(:), l2(:),        - Short hand for the transformation
-     !  l3(:)                  matrix between this subface and the
-     !                         neighbor block. These values are set
-     !                         to zero if this subface is not
-     !                         connected to another block.
-     !  groupNum(:)          - Group number to which this subface
-     !                         belongs. If this subface does not
-     !                         belong to any group, the corresponding
-     !                         entry in this array is zeroed out. If
-     !                         the subface belongs to a sliding mesh
-     !                         interface the absolute value of
-     !                         groupNum contains the number of the
-     !                         sliding mesh interface. One side of
-     !                         the interface gets a positive number,
-     !                         the other side a negative one.
-     !
-     !
-     integer(kind=intType) :: nSubface, n1to1, nBocos, nViscBocos
-
-     integer(kind=intType), dimension(:), pointer :: BCType
-     integer(kind=intType), dimension(:), pointer :: BCFaceID
-
-     integer(kind=intType), dimension(:), pointer :: cgnsSubface
-
-     integer(kind=intType), dimension(:), pointer :: inBeg, inEnd
-     integer(kind=intType), dimension(:), pointer :: jnBeg, jnEnd
-     integer(kind=intType), dimension(:), pointer :: knBeg, knEnd
-
-     integer(kind=intType), dimension(:), pointer :: dinBeg, dinEnd
-     integer(kind=intType), dimension(:), pointer :: djnBeg, djnEnd
-     integer(kind=intType), dimension(:), pointer :: dknBeg, dknEnd
-
-     integer(kind=intType), dimension(:), pointer :: icBeg, icEnd
-     integer(kind=intType), dimension(:), pointer :: jcBeg, jcEnd
-     integer(kind=intType), dimension(:), pointer :: kcBeg, kcEnd
-
-     integer(kind=intType), dimension(:), pointer :: neighBlock
-     integer(kind=intType), dimension(:), pointer :: neighProc
-     integer(kind=intType), dimension(:), pointer :: l1, l2, l3
-     integer(kind=intType), dimension(:), pointer :: groupNum
-
-     !
-     !         Overset interpolation information
-
-     integer(kind=intType), dimension(:,:,:), pointer :: iblank
-     integer(kind=intType), dimension(:,:,:), pointer :: iblankLast
-     integer(kind=intType), dimension(:,:,:), pointer :: status
-     integer(kind=intType), dimension(:,:,:), pointer :: forcedRecv
-     type(fringeType) , dimension(:), pointer :: fringes=>null()
-     integer(kind=intType), dimension(:, :, :, :), pointer :: fringePtr=>null()
-     integer(kind=intType), dimension(:, :, :, :), pointer :: gInd=>null()
-     integer(kind=intType), pointer :: nDonors
-     integer(kind=intType) :: nDonorsOnOwnedCells
-
-     integer(kind=intType), dimension(:, :), pointer :: orphans
-     integer(kind=intType) :: nOrphans
-
-
-     !
-     !         Boundary data for the boundary subfaces.
-     !
-     ! BCData(nBocos): The boundary data for each of the boundary
-     !                 subfaces.
-
-     type(BCDataType), dimension(:), pointer :: BCData
-     !
-     !         The stress tensor and heat flux vector at viscous wall faces
-     !         as well as the face pointers to these viscous wall faces.
-     !
-     ! viscSubface(nViscBocos):    Storage for the viscous stress
-     !                             tensor and heat flux vector for
-     !                             the viscous subfaces.
-     ! viscIMinPointer(2:jl,2:kl): Pointer to viscous subface for
-     !                             the iMin block face. If the face
-     !                             is not part of a viscous subface
-     !                             this value is set to 0.
-     ! viscIMaxPointer(2:jl,2:kl): Idem for iMax block face.
-     ! viscJMinPointer(2:il,2:kl): Idem for jMin block face.
-     ! viscJMaxPointer(2:il,2:kl): Idem for jmax block face.
-     ! viscKMinPointer(2:il,2:jl): Idem for kMin block face.
-     ! viscKMaxPointer(2:il,2:jl): Idem for kMax block face.
-
-     type(viscSubfaceType), dimension(:), pointer :: viscSubface
-
-     integer(kind=intType), dimension(:,:), pointer :: viscIMinPointer
-     integer(kind=intType), dimension(:,:), pointer :: viscIMaxPointer
-     integer(kind=intType), dimension(:,:), pointer :: viscJMinPointer
-     integer(kind=intType), dimension(:,:), pointer :: viscJMaxPointer
-     integer(kind=intType), dimension(:,:), pointer :: viscKMinPointer
-     integer(kind=intType), dimension(:,:), pointer :: viscKMaxPointer
-     !
-     !         Mesh related variables.
-     !
-     !  x(0:ie,0:je,0:ke,3)  - xyz locations of grid points in block.
-     !  xOld(nOld,:,:,:,:)   - Coordinates on older time levels;
-     !                         only needed for unsteady problems on
-     !                         deforming grids. Only allocated on
-     !                         the finest grid level. The blank
-     !                         dimensions are equal to the dimensions
-     !                         of x.
-     !  sI(0:ie,1:je,1:ke,3) - Projected areas in the i-coordinate
-     !                         direction. Normals point in the
-     !                         direction of increasing i.
-     !  sJ(1:ie,0:je,1:ke,3) - Projected areas in the j-coordinate
-     !                         direction. Normals point in the
-     !                         direction of increasing j.
-     !  sK(1:ie,1:je,0:ke,3) - Projected areas in the k-coordinate
-     !                         direction. Normals point in the
-     !                         direction of increasing k.
-     !  vol(0:ib,0:jb,0:kb)  - Cell volumes. The second level halo
-     !                         is present for a multigrid option.
-     !  volOld(nold,2:il,..) - Volumes on older time levels; only
-     !                         needed for unsteady problems on
-     !                         deforming grids. Only allocated on
-     !                         the finest grid level.
-     !  uv(2,2:il,2:jl,2:kl) - Parametric location on elemID for each cell.
-     !                         Only used for fast wall distance calcs.
-     !  porI(1:il,2:jl,2:kl) - Porosity in the i direction.
-     !  porJ(2:il,1:jl,2:kl) - Porosity in the j direction.
-     !  porK(2:il,2:jl,1:kl) - Porosity in the k direction.
-     !
-     !  indFamilyI(:,:,:)  - Index of the i-face in the arrays
-     !                       to compute the local mass flow
-     !                       for a family or sliding mesh interface.
-     !                       Dimension is (1:il,2:jl,2:kl).
-     !  indFamilyJ(:,:,:)  - Idem for the j-faces.
-     !                       Dimension is (2:il,1:jl,2:kl).
-     !  indFamilyK(:,:,:)  - Idem for the k-faces.
-     !                       Dimension is (2:il,2:jl,1:kl)
-     !  factFamilyI(:,:,:) - Corresponding factor to make sure
-     !                       that the massflow is defined positive
-     !                       when it enters the block and to define
-     !                       the mass flow of the entire wheel
-     !                       instead of a sector. Hence the possible
-     !                       values or -nSlices and nSlices, where
-     !                       nSlices or the number of sections to
-     !                       obtain the full wheel.
-     !  factFamilyJ(:,:,:) - Idem for the j-faces.
-     !  factFamilyK(:,:,:) - Idem for the k-faces.
-     !
-     !  rotMatrixI(:,:,:,:,:) - Rotation matrix of the i-faces to
-     !                          transform the velocity components
-     !                          from Cartesian to local cylindrical.
-     !                          This is needed only for problems with
-     !                          rotational periodicity in combination
-     !                          with an upwind scheme.
-     !                          Dimension is (1:il,2:jl,2:kl,3,3).
-     !  rotMatrixJ(:,:,:,:,:) - Idem for the j-faces.
-     !                          Dimension is (2:il,1:jl,2:kl,3,3).
-     !  rotMatrixK(:,:,:,:,:) - Idem for the k-faces.
-     !                          Dimension is (2:il,2:jl,1:kl,3,3).
-     !
-     !  blockIsMoving      - Whether or not the block is moving.
-     !  addGridVelocities  - Whether or not the face velocities
-     !                       are allocated and set.
-     !  sFaceI(0:ie,je,ke) - Dot product of the face velocity and
-     !                       the normal in i-direction.
-     !  sFaceJ(ie,0:je,ke) - Idem in j-direction.
-     !  sFaceK(ie,je,0:ke) - Idem in k-direction.
-
-     real(kind=realType), dimension(:,:,:,:),   pointer :: x, xtmp
-     real(kind=realType), dimension(:,:,:,:,:),   pointer :: Xold
-     real(kind=realType), dimension(:,:,:,:), pointer :: sI, sJ, sK
-     real(kind=realType), dimension(:,:,:),   pointer :: vol
-     real(kind=realType), dimension(:,:,:,:), pointer :: volOld
-     real(kind=realType), dimension(:,:,:),   pointer :: volref
-     real(kind=realType), dimension(:,:,:,:), pointer :: uv
-     integer(kind=intType), dimension(:,:,:,:), pointer :: surfNodeIndices
-
-     integer(kind=porType), dimension(:,:,:), pointer :: porI, porJ, porK
-     integer(kind=intType), dimension(:,:,:), pointer :: indFamilyI, indFamilyJ, indFamilyK
-     integer(kind=intType), dimension(:,:,:), pointer :: factFamilyI, factFamilyJ, factFamilyK
-     real(kind=realType), dimension(:,:,:,:,:), pointer :: rotMatrixI, rotMatrixJ, rotMatrixK
-
-     logical :: blockIsMoving, addGridVelocities
-
-     real(kind=realType), dimension(:,:,:), pointer :: sFaceI, sFaceJ, sFaceK
-
-
-     ! Added by HDN
-     ! xALE(0:ie,0:je,0:ke,3)              - Temporary storage of x so that
-     !                                       intermediate meshes can be stored in
-     !                                       x directly
-     ! sVeloIALE(0:ie,1:je,1:ke,3)
-     ! sVeloJALE(1:ie,0:je,1:ke,3)
-     ! sVeloKALE(1:ie,1:je,0:ke,3)         - Storage of surface velocities at one
-     !                                       time step
-     ! sIALE(0:nALEsteps,0:ie,1:je,1:ke,3)
-     ! sJALE(0:nALEsteps,1:ie,0:je,1:ke,3)
-     ! sKALE(0:nALEsteps,1:ie,1:je,0:ke,3) - Storage of sI, sJ, sK for intermediate
-     !                                       meshes
-     ! sFaceIALE(0:nALEsteps,0:ie,je,ke)
-     ! sFaceJALE(0:nALEsteps,ie,0:je,ke)
-     ! sFaceKALE(0:nALEsteps,ie,je,0:ke)   - Storage of sFaceI, sFaceJ, sFaceK for
-     !                                       intermediate meshes
-     real(kind=realType), dimension(:,:,:,:),   pointer :: xALE
-     real(kind=realType), dimension(:,:,:,:),   pointer :: sVeloIALE, sVeloJALE, sVeloKALE
-     real(kind=realType), dimension(:,:,:,:,:), pointer :: sIALE, sJALE, sKALE
-     real(kind=realType), dimension(:,:,:,:),   pointer :: sFaceIALE, sFaceJALE, sFaceKALE
-
-
-     ! Tempory storage for overset variables
-     real(kind=realType), dimension(:, :, :, :), pointer :: XSeed
-     integer(kind=intType), dimension(:, :, :), pointer :: wallInd
-
-     !
-     !         Flow variables.
-     !
-     ! w(0:ib,0:jb,0:kb,1:nw)       - The set of independent variables
-     !                                w(i,j,k,1:nwf) flow field
-     !                                variables, which are rho, u,
-     !                                v, w and rhoE. In other words
-     !                                the velocities  are stored and
-     !                                not the momentum!!!!
-     !                                w(i,j,k,nt1:nt2) turbulent
-     !                                variables; also the primitive
-     !                                variables are stored.
-     ! wOld(nOld,2:il,2:jl,2:kl,nw) - Solution on older time levels,
-     !                                needed for the time integration
-     !                                for unsteady problems. In
-     !                                constrast to w, the conservative
-     !                                variables are stored in wOld for
-     !                                the flow variables; the turbulent
-     !                                variables are always the
-     !                                primitive ones.
-     !                                Only allocated on the finest
-     !                                mesh.
-     ! p(0:ib,0:jb,0:kb)            - Static pressure.
-     ! gamma(0:ib,0:jb,0:kb)        - Specific heat ratio; only
-     !                                allocated on the finest grid.
-     ! rlv(0:ib,0:jb,0:kb)          - Laminar viscosity; only
-     !                                allocated on the finest mesh
-     !                                and only for viscous problems.
-     ! rev(0:ib,0:jb,0:kb)          - Eddy viscosity; only
-     !                                allocated rans problems with
-     !                                eddy viscosity models.
-     ! s(1:ie,1:je,1:ke,3)          - Mesh velocities of the cell
-     !                                centers; only for moving mesh
-     !                                problems.
-
-     real(kind=realType), dimension(:,:,:,:),   pointer :: w, wtmp
-     real(kind=realType), dimension(:,:,:,:,:), pointer :: dw_deriv
-     real(kind=realType), dimension(:,:,:,:,:), pointer :: wOld
-     real(kind=realType), dimension(:,:,:),     pointer :: p, gamma, aa
-     real(kind=realType), dimension(:,:,:),     pointer :: rlv, rev
-     real(kind=realType), dimension(:,:,:,:),   pointer :: s
-     real(kind=realType), dimension(:,:,:),   pointer :: shockSensor
-
-     ! Nodal Fluxes: ux,uy,uz,vx,vy,vz,wx,wy,wz,qx,qy,qz(il, jl, kl)
-     real(kind=realType), dimension(:, :, :), pointer:: ux, uy, uz
-     real(kind=realType), dimension(:, :, :), pointer:: vx, vy, vz
-     real(kind=realType), dimension(:, :, :), pointer:: wx, wy, wz
-     real(kind=realType), dimension(:, :, :), pointer:: qx, qy, qz
-
-
-
-
-     !
-     !         Residual and multigrid variables.
-     !
-     ! dw(0:ib,0:jb,0:kb,1:nw)   - Values of convective and combined
-     !                             flow residuals. Only allocated on
-     !                             the finest mesh.
-     ! fw(0:ib,0:jb,0:kb,1:nwf)  - values of artificial dissipation
-     !                             and viscous residuals.
-     !                             Only allocated on the finest mesh.
-
-     ! dwOldRK(:,2:il,2:jl,2:kl,nw) - Old residuals for the time
-     !                                accurate Runge-Kutta schemes.
-     !                                The first dimension is
-     !                                nRKStagesUnsteady - 1.Only
-     !                                allocated on the finest level
-     !                                and only in unsteady mode for
-     !                                Runge-Kutta schemes.
-
-     ! w1(1:ie,1:je,1:ke,1:nMGVar) - Values of the mg variables
-     !                               upon first entry to a coarser
-     !                               mesh; only allocated on the
-     !                               coarser grids. The variables
-     !                               used to compute the multigrid
-     !                               corrections are rho, u, v, w
-     !                               and p; the rhoE value is used
-     !                               for unsteady problems only.
-     ! p1(1:ie,1:je,1:ke)          - Value of the pressure upon
-     !                               first entry to a coarser grid;
-     !                               only allocated on the coarser
-     !                               grids.
-     ! wr(2:il,2:jl,2:kl,1:nMGVar) - Multigrid forcing terms; only
-     !                               allocated on the coarser grids.
-     !                               The forcing term of course
-     !                               contains conservative residuals,
-     !                               at least for the flow variables.
-     ! shockSensor(0:ib,0:jb,0:kb)   Precomputed sensor value for shock
-     !                               that is *NOT* differentated.
-     ! scratch(0:ib,0:jb,0:kb,5)     Scratch space for the turbulence
-     !                               models. NOMINALLY this could use
-     !                               dw and the code was nominally setup
-     !                               for this originally. However, this
-     !                               complicates reverse mode sensitivities
-     !                               So we use this instead.
-
-     real(kind=realType), dimension(:,:,:),     pointer :: p1
-     real(kind=realType), dimension(:,:,:,:),   pointer :: dw, fw
-     real(kind=realType), dimension(:,:,:,:),   pointer :: dwtmp, dwtmp2
-     real(kind=realType), dimension(:,:,:,:,:), pointer :: dwOldRK
-     real(kind=realType), dimension(:,:,:,:),   pointer :: w1, wr
-     real(kind=realType), dimension(:,:,:,:),   pointer :: scratch
-
-
-     ! Added by HDN
-     ! Used for ALE. Only allocated on the finest mesh.
-     ! Extra dim is used to store initial residuals
-     ! dwALE(0:nALEsteps,0:ib,0:jb,0:kb,1:nw)   - Values of ONLY the convective flux
-     !                                            of intermediate meshes.
-     ! fwALE(0:nALEsteps,0:ib,0:jb,0:kb,1:nwf)  - values of ONLY the artificial
-     !                                            dissipation of intermediate meshes.
-     real(kind=realType), dimension(:,:,:,:,:),   pointer :: dwALE, fwALE
-
-
-
-     ! mgIFine(2:il,2) - The two fine grid i-cells used for the
-     !                   restriction of the solution and residual to
-     !                   the coarse grid. Only on the coarser grids.
-     ! mgJFine(2:jl,2) - Idem for j-cells.
-     ! mgKFine(2:kl,2) - Idem for k-cells.
-
-     ! mgIWeight(2:il) - Weight for the residual restriction in
-     !                   in i-direction. Value is either 0.5 or 1.0,
-     !                   depending whether mgIFine(,1) is equal to
-     !                   or differs from mgIFine(,2).
-     ! mgJWeight(2:jl) - Idem for weights in j-direction.
-     ! mgKWeight(2:kl) - Idem for weights in k-direction.
-
-     ! mgICoarse(2:il,2) - The two coarse grid i-cells used for the
-     !                     interpolation of the correction to the
-     !                     fine grid. Not on the coarsest grid.
-     ! mgJCoarse(2:jl,2) - Idem for j-cells.
-     ! mgKCoarse(2:kl,2) - Idem for k-cells.
-
-     integer(kind=intType), dimension(:,:), pointer :: mgIFine
-     integer(kind=intType), dimension(:,:), pointer :: mgJFine
-     integer(kind=intType), dimension(:,:), pointer :: mgKFine
-
-     real(kind=realType),   dimension(:),   pointer :: mgIWeight
-     real(kind=realType),   dimension(:),   pointer :: mgJWeight
-     real(kind=realType),   dimension(:),   pointer :: mgKWeight
-
-     integer(kind=intType), dimension(:,:), pointer :: mgICoarse
-     integer(kind=intType), dimension(:,:), pointer :: mgJCoarse
-     integer(kind=intType), dimension(:,:), pointer :: mgKCoarse
-
-     ! iCoarsened - How this block was coarsened in i-direction.
-     ! jCoarsened - How this block was coarsened in j-direction.
-     ! kCoarsened - How this block was coarsened in k-direction.
-
-     integer(kind=porType) :: iCoarsened, jCoarsened, kCoarsened
-
-     ! iCo: Indicates whether or not i grid lines are present on the
-     !      coarse grid; not allocated for the coarsest grid.
-     ! jCo: Idem in j-direction.
-     ! kCo: Idem in k-direction.
-
-     logical, dimension(:), pointer :: iCo, jCo, kCo
-     !
-     !         Time-stepping and spectral radii variables.
-     !         only allocated on the finest grid.
-     !
-     ! wn(2:il,2:jl,2:kl,1:nMGVar) - Values of the update variables
-     !                               at the beginning of the RungeKutta
-     !                               iteration. Only allocated for
-     !                               RungeKutta smoother.
-     ! pn(2:il,2:jl,2:kl)          - The pressure for the RungeKutta
-     !                               smoother.
-     ! dtl(1:ie,1:je,1:ke)         - Time step
-     ! radI(1:ie,1:je,1:ke)        - Spectral radius in i-direction.
-     ! radJ(1:ie,1:je,1:ke)        - Spectral radius in j-direction.
-     ! radK(1:ie,1:je,1:ke)        - Spectral radius in k-direction.
-
-     real(kind=realType), dimension(:,:,:,:), pointer :: wn
-     real(kind=realType), dimension(:,:,:),   pointer :: pn
-     real(kind=realType), dimension(:,:,:),   pointer :: dtl
-     real(kind=realType), dimension(:,:,:),   pointer :: radI, radJ, radK
-
-     !
-     !         Variables for Iso/Surface Slice generation
-     ! fc(1:ie,1:je,1:ke) - cell center values of the function to be iso-valued
-     ! fn(1:il,1:jl,1:kl) - node values of the function to be iso-valued
-     ! Note these are are only allocated temporaily during solution writing.
-
-     real(kind=realType), dimension(:, :, :), pointer :: fc
-     real(kind=realType), dimension(:, :, :), pointer :: fn
-
-
-
-     !
-     !         Turbulence model variables.
-     !
-     ! d2Wall(2:il,2:jl,2:kl) - Distance from the center of the cell
-     !                          to the nearest viscous wall.
-     ! intermittency( )       - Function defining the transition location
-     !
-     ! The next two variables are only initialized if roughness is requested (useRoughSA = True)
-     ! nearestWallCellInd(2:il,2:jl,2:kl)   - global cell ID for the nearest wall cell; is needed for rougness
-     ! ks(2:il,2:jl,2:kl)     - Roughness value of the nearest wall
-
-     real(kind=realType), dimension(:,:,:), pointer :: d2Wall, filterDES
-     real(kind=realType), dimension(:,:,:), pointer :: intermittency
-     integer(kind=intType), dimension(:,:,:), pointer :: nearestWallCellInd
-     real(kind=realType), dimension(:,:,:), pointer :: ks
-
-     ! bmti1(je,ke,nt1:nt2,nt1:nt2): Matrix used for the implicit
-     !                               boundary condition treatment of
-     !                               the turbulence equations at the
-     !                               iMin boundary. Only allocated on
-     !                               the finest level and for the 1st
-     !                               spectral solution.
-     ! bmti2(je,ke,nt1:nt2,nt1:nt2): Idem for the iMax boundary.
-     ! bmtj1(ie,ke,nt1:nt2,nt1:nt2): Idem for the jMin boundary.
-     ! bmtj2(ie,ke,nt1:nt2,nt1:nt2): Idem for the jMax boundary.
-     ! bmtk1(ie,je,nt1:nt2,nt1:nt2): Idem for the kMin boundary.
-     ! bmtk2(ie,je,nt1:nt2,nt1:nt2): Idem for the kMax boundary.
-
-     real(kind=realType), dimension(:,:,:,:), pointer :: bmti1, bmti2
-     real(kind=realType), dimension(:,:,:,:), pointer :: bmtj1, bmtj2
-     real(kind=realType), dimension(:,:,:,:), pointer :: bmtk1, bmtk2
-
-
-     ! bvti1(je,ke,nt1:nt2): RHS vector used for the implicit
-     !                       boundary condition treatment of the
-     !                       turbulence equations at the iMin
-     !                       boundary. Only allocated on the finest
-     !                       level and for the 1st spectral solution.
-     ! bvti2(je,ke,nt1:nt2): Idem for the iMax boundary.
-     ! bvtj1(ie,ke,nt1:nt2): Idem for the jMin boundary.
-     ! bvtj2(ie,ke,nt1:nt2): Idem for the jMax boundary.
-     ! bvti2(je,ke,nt1:nt2): Idem for the iMax boundary.
-     ! bvtk1(ie,ke,nt1:nt2): Idem for the kMin boundary.
-     ! bvtk2(ie,ke,nt1:nt2): idem for the kMax boundary.
-
-     real(kind=realType), dimension(:,:,:), pointer :: bvti1, bvti2
-     real(kind=realType), dimension(:,:,:), pointer :: bvtj1, bvtj2
-     real(kind=realType), dimension(:,:,:), pointer :: bvtk1, bvtk2
-     !
-     !         Relation to the original cgns grid.
-     !
-     ! sectionID      - The section of the grid this block belongs to.
-     ! cgnsBlockID    - Block/zone number of the cgns grid to which
-     !                  this block is related.
-     ! iBegOr, iEndOr - Range of points of this block in the
-     ! jBegOr, jEndOr   corresponding cgns block, i.e. for this block
-     ! kBegOr, kEndOr   iBegOr <= i <= iEndOr, jBegOr <= j <= jEndOr,
-     !                  kBegOr <= k <= kEndOr.
-     !                  It is of course possible that the entire
-     !                  block is stored.
-     integer(kind=intType) :: sectionID=1
-     integer(kind=intType) :: cgnsBlockID
-     integer(kind=intType) :: iBegOr, iEndOr, jBegOr, jEndOr
-     integer(kind=intType) :: kBegOr, kEndOr
-     type(surfaceNodeWeightArray) , dimension(6) :: nodalWeights
-     !
-     !         Adjoint solver variables.
-     !
-     ! globalNode(ib:ie,jb:je,kb:ke):  Global node numbering.
-     ! globalCell(0:ib,0:jb,0:kb):     Global cell numbering.
-     ! color(0:ib,0:jb,0:kb)     :     Temporary coloring array used for
-     !                                 forward mode AD/FD calculations
-     integer(kind=intType), dimension(:,:,:), pointer :: globalCGNSNode
-     integer(kind=intType), dimension(:,:,:), pointer :: globalNode
-     integer(kind=intType), dimension(:,:,:), pointer :: globalCell
-     integer(kind=intType), dimension(:,:,:), pointer :: color
-
-     ! Data storing the first order PC in tri-diagonal ordering. 7
-     ! real(kind=realType), dimension(:, :, :, :, :), pointer :: Diag
-     ! real(kind=realType), dimension(:, :, :, :, :), pointer :: i_L, i_U
-     ! real(kind=realType), dimension(:, :, :, :, :), pointer :: j_L, j_U
-     ! real(kind=realType), dimension(:, :, :, :, :), pointer :: k_L, k_U
-
-     real(kind=realType), dimension(:, :, :, :, :), pointer :: PCMat
-
-     ! Generic vectors for doing products/preconditioning. Like w, but
-     ! only 1 level of halos, and it is in block ordering (nw first)
-     ! instead of field ordering like w is.
-     real(kind=realType), dimension(:, :, :, :), pointer :: PCVec1, PCVec2
-
-     ! Data for the factorized trigonal solves
-     real(kind=realType), dimension(:, :, :, :), pointer :: i_D_fact,  j_D_fact,  k_D_fact
-     real(kind=realType), dimension(:, :, :, :), pointer :: i_L_Fact,  j_L_Fact,  k_L_Fact
-     real(kind=realType), dimension(:, :, :, :), pointer :: i_U_Fact,  j_U_Fact,  k_U_Fact
-     real(kind=realType), dimension(:, :, :, :), pointer :: i_U2_Fact, j_U2_Fact, k_U2_Fact
-
-     integer(kind=intType), dimension(:, :, :, :), pointer :: i_ipiv, j_ipiv, k_ipiv
-
-     ! A list of pointers for generic communication of either real or
-     ! integer data.
-     type(rPtr), dimension(24) :: realCommVars
-     type(iPtr), dimension(3) :: intCommvars
-
-  end type blockType
-
-  !
-  !       Array of all blocks at all multigrid levels and spectral sols.
-  !
-  ! nDom:            total number of computational blocks.
-  ! flowDoms(:,:,:): array of blocks. Dimensions are
-  !                  (nDom,nLevels,nTimeIntervalsSpectral)
-
-  integer(kind=intType) :: nDom
-
-  ! A global paramter for how to sort fringes
-  integer(kind=intType) :: fringeSortType=sortByDonor
-
-#ifdef USE_TAPENADE
-  ! This is never actually compiled...just make tapenade think it
-  ! isn't allocatable
-  type(blockType), dimension(nn:nn,1,ntimeIntervalsSpectral) :: flowDoms
-#else
-  type(blockType), allocatable, target, dimension(:,:,:) :: flowDoms
-  type(blockType), allocatable, target, dimension(:,:,:) :: flowDomsd
-  type(blockType), allocatable, target, dimension(:,:,:) :: flowDomsb
-#endif
+    !
+    !       This module contains the definition of the derived data type
+    !       for block, which is the basic building block for this code.
+    !       Apart from the derived data type for block, this module also
+    !       contains the actual array for storing the blocks and the
+    !       number of blocks stored on this processor.
+    !
+    use constants, only: realType, intType, porType, maxCGNSNameLen, &
+                         sortByDonor, sortByReceiver
+    implicit none
+    save
 
-  !
-  !       Additional info needed in the flow solver.
-  !
-  ! nCellGlobal(nLev) - Global number of cells on every mg level.
+    !
+    !       The definition of the derived data type visc_subface_type,
+    !       which stores the viscous stress tensor and heat flux vector.
+    !       In this way it is avoided that these quantities must be
+    !       recomputed for the viscous forces and postprocessing. This
+    !       saves both time and a considerable amount of code.
+    !
+    type viscSubfaceType
 
-  integer(kind=intType), allocatable, dimension(:) :: nCellGlobal
+        ! tau(:,:,6): The 6 components of the viscous stress tensor.
+        !             The first 2 dimensions of these arrays are equal
+        !             to the dimenions of the cell subface without any
+        !             halo cell. Consequently the starting index is
+        !             arbitrary, such that no offset computation is
+        !             needed when the arrays are accessed.
+        ! q(:,:,3):   Same story for the heat flux vector.
+        ! uTau(:,:):  And for the friction velocity.
 
-  contains
+        real(kind=realType), dimension(:, :, :), pointer :: tau, q
+        real(kind=realType), dimension(:, :), pointer :: uTau
 
+    end type viscSubfaceType
 
-  logical function lessEqualFringeType(g1, g2)
+    type rPtr
+        real(kind=realType), dimension(:, :, :), pointer :: var
+    end type rPtr
 
-    !         lessEqual returns .true. if g1 <= g2 and .false. otherwise.
-    !         The comparison is firstly based on the processor ID of the
-    !         donor, then the block, then then the I, J, K
-    !
-    implicit none
-    !
-    !        Function arguments.
-    !
-    type(fringeType), intent(in) :: g1, g2
-    !
-    ! Compare the donor processors first. If not equal,
-    ! set lessEqual appropriately and return.
-    if (fringeSortType == sortByDonor) then
-       if(g1%donorProc < g2%donorProc) then
-          lessEqualfringeType = .true.
-          return
-       else if(g1%donorProc > g2%donorProc) then
-          lessEqualfringeType = .false.
-          return
-       endif
-
-       ! Donor processors are identical. Now we check the block
-
-       if(g1%donorBlock < g2%donorBlock) then
-          lessEqualfringeType = .true.
-          return
-       else if(g1%donorBlock > g2%donorBlock) then
-          lessEqualfringeType = .false.
-          return
-       endif
-
-       ! Compare the indices of the halo. First k, then j and
-       ! finally i.
-
-       if(g1%dIndex < g2%dIndex) then
-          lessEqualfringeType = .true.
-          return
-       else if(g1%dindex > g2%dIndex) then
-          lessEqualfringeType = .false.
-          return
-       endif
-
-    else if (fringeSortType == sortByReceiver) then
-
-
-       ! Compare my indices
-
-       if(g1%myIndex < g2%myIndex) then
-          lessEqualfringeType = .true.
-          return
-       else if(g1%myIndex > g2%myIndex) then
-          lessEqualfringeType = .false.
-          return
-       endif
-
-       ! Now compare the donor information:
-
-       if(g1%donorProc < g2%donorProc) then
-          lessEqualfringeType = .true.
-          return
-       else if(g1%donorProc > g2%donorProc) then
-          lessEqualfringeType = .false.
-          return
-       endif
-
-       ! Donor processors are identical. Now we check the block
-
-       if(g1%donorBlock < g2%donorBlock) then
-          lessEqualfringeType = .true.
-          return
-       else if(g1%donorBlock > g2%donorBlock) then
-          lessEqualfringeType = .false.
-          return
-       endif
-
-       ! Compare the indices of the halo. First k, then j and
-       ! finally i.
-
-       if(g1%dIndex < g2%dIndex) then
-          lessEqualfringeType = .true.
-          return
-       else if(g1%dIndex > g2%dIndex) then
-          lessEqualfringeType = .false.
-          return
-       endif
-    end if
-
-    ! Both entities are identical. So set lessEqual to .true.
-
-    lessEqualfringeType = .true.
-
-  end function lessEqualFringeType
-
-  logical function lessFringeType(g1, g2)
-
-    !         less returns .true. if g1 <= g2 and .false. otherwise.
-    !         The comparison is firstly based on the processor ID of the
-    !         donor, then the block, then then the I, J, K
+    type iPtr
+        integer(kind=intType), dimension(:, :, :), pointer :: var
+    end type iPtr
     !
-    implicit none
-    !
-    !        Function arguments.
+    !       The definition of the derived data type BCDataType, which
+    !       stores the prescribed data of boundary faces as well as unit
+    !       normals. For all the arrays the first two dimensions equal the
+    !       dimensions of the subface, possibly extended with halo cells.
+    !       Consequently the starting index is arbitrary, such that no
+    !       offset computation is needed when the array is accessed.
     !
-    type(fringeType), intent(in) :: g1, g2
+    type BCDataType
+
+        ! inBeg, inEnd: Node range in the first direction of the subface
+        ! jnBeg, jnEnd: Idem in the second direction.
+        ! icBeg, icEnd: Cell range in the first direction of the subface
+        ! jcBeg, jcEnd: Idem in the second direction.
+
+        integer(kind=intType) :: inBeg, inEnd, jnBeg, jnEnd
+        integer(kind=intType) :: icBeg, icEnd, jcBeg, jcEnd
+
+        ! norm(:,:,3):  The unit normal; it points out of the domain.
+        ! rface(:,:):   Velocity of the face in the direction of the
+        !               outward pointing normal. only allocated for
+        !               the boundary conditions that need this info.
+
+        real(kind=realType), dimension(:, :, :), pointer :: norm
+        real(kind=realType), dimension(:, :), pointer :: rface
+        real(kind=realType), dimension(:, :, :), pointer :: F, Fv, Fp
+        real(kind=realType), dimension(:, :, :), pointer :: T, Tv, Tp
+        real(kind=realType), dimension(:, :), pointer :: area
+        real(kind=realType), dimension(:, :), pointer :: CpTarget
+        integer(kind=realType), dimension(:, :), pointer :: surfIndex
+
+        ! Generic pointers for performing a globalized reduction.
+        real(kind=realType), dimension(:, :), pointer :: nodeVal
+        real(kind=realType), dimension(:, :), pointer :: cellVal
+
+        ! symNorm is the normal for (symmertry) boundary conditions.
+        ! symNormSet is set to false until symNorm is computed at the
+        ! beginning of a simulation. symNorm then remains constant for
+        ! the remainder of the simulation. This is ok, since if the
+        ! normal of the symmetry plane is changing, your results are
+        ! invalid anyway.  These values are only used on symmetry
+        ! plane. They are undefined for other BC's.
+        real(kind=realType), dimension(3) :: symNorm
+
+        logical :: symNormSet
+
+        ! subsonicInletTreatment: which boundary condition treatment
+        !                         to use for subsonic inlets; either
+        !                         totalConditions or massFlow.
+
+        integer(kind=intType) :: subsonicInletTreatment
+
+        ! uSlip(:,:,3):  the 3 components of the velocity vector on
+        !                a viscous wall.
+        ! TNS_Wall(:,:): Wall temperature for isothermal walls.
+        ! ksNS_Wall(:,:): Equivalent Sand Grain Roughness on viscous walls.
+
+        real(kind=realType), dimension(:, :, :), pointer :: uSlip
+        real(kind=realType), dimension(:, :), pointer :: TNS_Wall
+        real(kind=realType), dimension(:, :), pointer :: ksNS_Wall
+
+        ! The name of this boundary condition and it's index
+        character(maxCGNSNameLen) :: family
+        integer(kind=intType) :: famID
+
+        ! Added by HDN
+        ! normALE(0:nALEsteps,ie:ib,je:jb,3)
+        !                 - Storage of norm for intermediate meshes.
+        ! rFaceALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd)
+        !                 - Storage of rface for intermediate meshes.
+        ! uSlipALE(0:nALEsteps,iBeg:iEnd,jBeg:jEnd,3)
+        !                 - Storage of uSlip for intermediate meshes.
+        ! cellHeatFlux(iBeg:iEnd,jBeg:jEnd,3)
+        !                 - Surface heat flux (cell based/node based).
+        real(kind=realType), dimension(:, :, :, :), pointer :: normALE
+        real(kind=realType), dimension(:, :, :), pointer :: rFaceALE
+        real(kind=realType), dimension(:, :, :, :), pointer :: uSlipALE
+        real(kind=realType), dimension(:, :), pointer :: nodeHeatFlux
+        real(kind=realType), dimension(:, :), pointer :: cellHeatFlux
+
+        ! ptInlet(:,:):       Total pressure at subsonic inlets.
+        ! ttInlet(:,:):       Total temperature at subsonic inlets.
+        ! htInlet(:,:):       Total enthalpy at subsonic inlets.
+        ! flowXDirInlet(:,:): X-direction of the flow for subsonic
+        !                     inlets.
+        ! flowYDirInlet(:,:): Idem in y-direction.
+        ! flowZDirInlet(:,:): Idem in z-direction.
+
+        real(kind=realType), dimension(:, :), pointer :: ptInlet, ttInlet, htInlet
+        real(kind=realType), dimension(:, :), pointer :: flowXDirInlet, flowYDirInlet, flowZDirInlet
+
+        ! turbInlet(:,:,nt1:nt2): Turbulence variables at inlets,
+        !                         either subsonic or supersonic.
+
+        real(kind=realType), dimension(:, :, :), pointer :: turbInlet
+
+        ! rho(:,:):  density; used for multiple bc's.
+        ! velX(:,:): x-velocity; used for multiple bc's.
+        ! velY(:,:): y-velocity; used for multiple bc's.
+        ! velZ(:,:): z-velocity; used for multiple bc's.
+        ! ps(:,:):   static pressure; used for multiple bc's.
+
+        real(kind=realType), dimension(:, :), pointer :: rho
+        real(kind=realType), dimension(:, :), pointer :: velX, velY, velZ
+        real(kind=realType), dimension(:, :), pointer :: ps
+
+        ! Surface blanking for force integration
+        integer(kind=intType), dimension(:, :), pointer :: iblank
+
+        ! Surface deviation. This is an estimate of how much the surface
+        ! deviates from the "real" underlying surface'
+        real(Kind=realType), dimension(:, :), pointer :: delta
+        real(Kind=realType), dimension(:, :), pointer :: deltaNode
+
+    end type BCDataType
+
+    type surfaceNodeWeightArray
+        real(kind=realType), dimension(:, :, :), pointer :: weight
+    end type surfaceNodeWeightArray
+
+    type fringeType
+
+        ! Make everything in here static such that we can easily copy the
+        ! datatype and use MPI to communicate them directly. This data
+        ! type bears some resemblence to the haloList type used for the
+        ! B2B preprocessing.
+
+        ! The quality metric for the fringe
+        real(kind=realType) :: quality
+
+        ! This is the information regarding where the cell came from.
+        integer(kind=intType) :: myBlock, myIndex
+
+        ! This is the information about the donor that was found
+        integer(kind=intType) :: donorProc, donorBlock, dIndex
+        real(kind=realType) :: donorFrac(3)
+
+    end type fringeType
+
+    interface operator(<=)
+        module procedure lessEqualFringeType
+    end interface operator(<=)
+
+    interface operator(<)
+        module procedure lessFringeType
+    end interface operator(<)
+
+    type interpPtType
+        integer(kind=intType) :: donorProc, donorBlock, dI, dJ, dK, myBlock
+        real(kind=realType) :: donorFrac(3)
+    end type interpPtType
+
+    interface operator(<=)
+        module procedure lessEqualinterpPtType
+    end interface operator(<=)
+
+    interface operator(<)
+        module procedure lessInterpPtType
+    end interface operator(<)
+
+    !       The definition of the derived data type block_type, which
+    !       stores dimensions, coordinates, solution, etc.
     !
-    ! Compare the donor processors first. If not equal,
-    ! set less appropriately and return.
-    if (fringeSortType == sortByDonor) then
-       if(g1%donorProc < g2%donorProc) then
-          lessfringeType = .true.
-          return
-       else if(g1%donorProc > g2%donorProc) then
-          lessfringeType = .false.
-          return
-       endif
-
-       ! Donor processors are identical. Now we check the block
-
-       if(g1%donorBlock < g2%donorBlock) then
-          lessfringeType = .true.
-          return
-       else if(g1%donorBlock > g2%donorBlock) then
-          lessfringeType = .false.
-          return
-       endif
-
-       ! Compare the indices of the halo. First k, then j and
-       ! finally i.
-
-       if(g1%dIndex < g2%dIndex) then
-          lessfringeType = .true.
-          return
-       else if(g1%dIndex > g2%dIndex) then
-          lessfringeType = .false.
-          return
-       endif
-
-    else if (fringeSortType == sortByReceiver) then
-
-       ! Compare my indices
-
-       if(g1%myIndex < g2%myIndex) then
-          lessfringeType = .true.
-          return
-       else if(g1%myIndex > g2%myIndex) then
-          lessfringeType = .false.
-          return
-       endif
-
-       ! Now compare the donor information:
-
-       if(g1%donorProc < g2%donorProc) then
-          lessfringeType = .true.
-          return
-       else if(g1%donorProc > g2%donorProc) then
-          lessfringeType = .false.
-          return
-       endif
-
-       ! Donor processors are identical. Now we check the block
-
-       if(g1%donorBlock < g2%donorBlock) then
-          lessfringeType = .true.
-          return
-       else if(g1%donorBlock > g2%donorBlock) then
-          lessfringeType = .false.
-          return
-       endif
-
-       ! Compare the indices of the halo. First k, then j and
-       ! finally i.
-
-       if(g1%dIndex < g2%dIndex) then
-          lessfringeType = .true.
-          return
-       else if(g1%dIndex > g2%dIndex) then
-          lessfringeType = .false.
-          return
-       endif
-    end if
-
-    ! Both entities are identical. So set less to .False.
-
-    lessFringeType = .False.
-
-  end function lessFringeType
-
-
-  logical function lessEqualInterpPtType(g1, g2)
-
-    !         lessEqual returns .true. if g1 <= g2 and .false. otherwise.
-    !         The comparison is firstly based on the processor ID of the
-    !         donor, then the block, then then the I, J, K
-    !
-    implicit none
-    !
-    !        Function arguments.
+    type blockType
+        !
+        !         Block dimensions and orientation.
+        !
+        !  nx, ny, nz - Block integer dimensions for no halo cell based
+        !               quantities.
+        !  il, jl, kl - Block integer dimensions for no halo node based
+        !               quantities.
+        !  ie, je, ke - Block integer dimensions for single halo
+        !               cell-centered quantities.
+        !  ib, jb, kb - Block integer dimensions for double halo
+        !               cell-centered quantities.
+        ! rightHanded - Whether or not the block is a right handed.
+        !               If not right handed it is left handed of course.
+
+        integer(kind=intType) :: nx, ny, nz
+        integer(kind=intType) :: il, jl, kl
+        integer(kind=intType) :: ie, je, ke
+        integer(kind=intType) :: ib, jb, kb
+
+        logical :: rightHanded
+        !
+        !         Block boundary conditions.
+        !
+        !  nSubface             - Number of subfaces on this block.
+        !  n1to1                - Number of 1 to 1 block boundaries.
+        !  nBocos               - Number of physical boundary subfaces.
+        !  nViscBocos           - Number of viscous boundary subfaces.
+        !  BCType(:)            - Boundary condition type for each
+        !                         subface. See the module BCTypes for
+        !                         the possibilities.
+        !  BCFaceID(:)          - Block face location of each subface.
+        !                         possible values are: iMin, iMax, jMin,
+        !                         jMax, kMin, kMax. see also module
+        !                         BCTypes.
+        !  cgnsSubface(:)       - The subface in the corresponding cgns
+        !                         block. As cgns distinguishes between
+        !                         boundary and internal boundaries, the
+        !                         BCType of the subface is needed to
+        !                         know which one to take.
+        !  inBeg(:), inEnd(:)   - Lower and upper limits for the nodes
+        !  jnBeg(:), jnEnd(:)     in each of the index directions on a
+        !  knBeg(:), knEnd(:)     given subface. Note that one of these
+        !                         indices will not change since we will
+        !                         be moving on a face.
+        !  dinBeg(:), dinEnd(:) - Lower and upper limits for the nodes
+        !  djnBeg(:), djnEnd(:)   in the each of the index directions
+        !  dknBeg(:), dknEnd(:)   of the donor subface for this
+        !                         particular subface. Note that one of
+        !                         these indices will not change since we
+        !                         will be moving on a face.
+        !  icBeg(:), icEnd(:)   - Lower and upper limits for the cells
+        !  jcBeg(:), jcEnd(:)     in each of the index directions for
+        !  kcBeg(:), kcEnd(:)     the subface. The cells indicated by
+        !                         this range are halo cells (the
+        !                         constant index) adjacent to the face.
+        !                         a possible overlap outside the block
+        !                         is stored.
+        !  neighBlock(:)        - Local block number to which this
+        !                         subface connects. This value is set to
+        !                         zero if this subface is not connected
+        !                         to another block.
+        !  neighProc(:)         - Processor number where the neighbor
+        !                         block is stored. This value is set to
+        !                         -1 if this subface is not connected
+        !                         to another block.
+        !  l1(:), l2(:),        - Short hand for the transformation
+        !  l3(:)                  matrix between this subface and the
+        !                         neighbor block. These values are set
+        !                         to zero if this subface is not
+        !                         connected to another block.
+        !  groupNum(:)          - Group number to which this subface
+        !                         belongs. If this subface does not
+        !                         belong to any group, the corresponding
+        !                         entry in this array is zeroed out. If
+        !                         the subface belongs to a sliding mesh
+        !                         interface the absolute value of
+        !                         groupNum contains the number of the
+        !                         sliding mesh interface. One side of
+        !                         the interface gets a positive number,
+        !                         the other side a negative one.
+        !
+        !
+        integer(kind=intType) :: nSubface, n1to1, nBocos, nViscBocos
+
+        integer(kind=intType), dimension(:), pointer :: BCType
+        integer(kind=intType), dimension(:), pointer :: BCFaceID
+
+        integer(kind=intType), dimension(:), pointer :: cgnsSubface
+
+        integer(kind=intType), dimension(:), pointer :: inBeg, inEnd
+        integer(kind=intType), dimension(:), pointer :: jnBeg, jnEnd
+        integer(kind=intType), dimension(:), pointer :: knBeg, knEnd
+
+        integer(kind=intType), dimension(:), pointer :: dinBeg, dinEnd
+        integer(kind=intType), dimension(:), pointer :: djnBeg, djnEnd
+        integer(kind=intType), dimension(:), pointer :: dknBeg, dknEnd
+
+        integer(kind=intType), dimension(:), pointer :: icBeg, icEnd
+        integer(kind=intType), dimension(:), pointer :: jcBeg, jcEnd
+        integer(kind=intType), dimension(:), pointer :: kcBeg, kcEnd
+
+        integer(kind=intType), dimension(:), pointer :: neighBlock
+        integer(kind=intType), dimension(:), pointer :: neighProc
+        integer(kind=intType), dimension(:), pointer :: l1, l2, l3
+        integer(kind=intType), dimension(:), pointer :: groupNum
+
+        !
+        !         Overset interpolation information
+
+        integer(kind=intType), dimension(:, :, :), pointer :: iblank
+        integer(kind=intType), dimension(:, :, :), pointer :: iblankLast
+        integer(kind=intType), dimension(:, :, :), pointer :: status
+        integer(kind=intType), dimension(:, :, :), pointer :: forcedRecv
+        type(fringeType), dimension(:), pointer :: fringes => null()
+        integer(kind=intType), dimension(:, :, :, :), pointer :: fringePtr => null()
+        integer(kind=intType), dimension(:, :, :, :), pointer :: gInd => null()
+        integer(kind=intType), pointer :: nDonors
+        integer(kind=intType) :: nDonorsOnOwnedCells
+
+        integer(kind=intType), dimension(:, :), pointer :: orphans
+        integer(kind=intType) :: nOrphans
+
+        !
+        !         Boundary data for the boundary subfaces.
+        !
+        ! BCData(nBocos): The boundary data for each of the boundary
+        !                 subfaces.
+
+        type(BCDataType), dimension(:), pointer :: BCData
+        !
+        !         The stress tensor and heat flux vector at viscous wall faces
+        !         as well as the face pointers to these viscous wall faces.
+        !
+        ! viscSubface(nViscBocos):    Storage for the viscous stress
+        !                             tensor and heat flux vector for
+        !                             the viscous subfaces.
+        ! viscIMinPointer(2:jl,2:kl): Pointer to viscous subface for
+        !                             the iMin block face. If the face
+        !                             is not part of a viscous subface
+        !                             this value is set to 0.
+        ! viscIMaxPointer(2:jl,2:kl): Idem for iMax block face.
+        ! viscJMinPointer(2:il,2:kl): Idem for jMin block face.
+        ! viscJMaxPointer(2:il,2:kl): Idem for jmax block face.
+        ! viscKMinPointer(2:il,2:jl): Idem for kMin block face.
+        ! viscKMaxPointer(2:il,2:jl): Idem for kMax block face.
+
+        type(viscSubfaceType), dimension(:), pointer :: viscSubface
+
+        integer(kind=intType), dimension(:, :), pointer :: viscIMinPointer
+        integer(kind=intType), dimension(:, :), pointer :: viscIMaxPointer
+        integer(kind=intType), dimension(:, :), pointer :: viscJMinPointer
+        integer(kind=intType), dimension(:, :), pointer :: viscJMaxPointer
+        integer(kind=intType), dimension(:, :), pointer :: viscKMinPointer
+        integer(kind=intType), dimension(:, :), pointer :: viscKMaxPointer
+        !
+        !         Mesh related variables.
+        !
+        !  x(0:ie,0:je,0:ke,3)  - xyz locations of grid points in block.
+        !  xOld(nOld,:,:,:,:)   - Coordinates on older time levels;
+        !                         only needed for unsteady problems on
+        !                         deforming grids. Only allocated on
+        !                         the finest grid level. The blank
+        !                         dimensions are equal to the dimensions
+        !                         of x.
+        !  sI(0:ie,1:je,1:ke,3) - Projected areas in the i-coordinate
+        !                         direction. Normals point in the
+        !                         direction of increasing i.
+        !  sJ(1:ie,0:je,1:ke,3) - Projected areas in the j-coordinate
+        !                         direction. Normals point in the
+        !                         direction of increasing j.
+        !  sK(1:ie,1:je,0:ke,3) - Projected areas in the k-coordinate
+        !                         direction. Normals point in the
+        !                         direction of increasing k.
+        !  vol(0:ib,0:jb,0:kb)  - Cell volumes. The second level halo
+        !                         is present for a multigrid option.
+        !  volOld(nold,2:il,..) - Volumes on older time levels; only
+        !                         needed for unsteady problems on
+        !                         deforming grids. Only allocated on
+        !                         the finest grid level.
+        !  uv(2,2:il,2:jl,2:kl) - Parametric location on elemID for each cell.
+        !                         Only used for fast wall distance calcs.
+        !  porI(1:il,2:jl,2:kl) - Porosity in the i direction.
+        !  porJ(2:il,1:jl,2:kl) - Porosity in the j direction.
+        !  porK(2:il,2:jl,1:kl) - Porosity in the k direction.
+        !
+        !  indFamilyI(:,:,:)  - Index of the i-face in the arrays
+        !                       to compute the local mass flow
+        !                       for a family or sliding mesh interface.
+        !                       Dimension is (1:il,2:jl,2:kl).
+        !  indFamilyJ(:,:,:)  - Idem for the j-faces.
+        !                       Dimension is (2:il,1:jl,2:kl).
+        !  indFamilyK(:,:,:)  - Idem for the k-faces.
+        !                       Dimension is (2:il,2:jl,1:kl)
+        !  factFamilyI(:,:,:) - Corresponding factor to make sure
+        !                       that the massflow is defined positive
+        !                       when it enters the block and to define
+        !                       the mass flow of the entire wheel
+        !                       instead of a sector. Hence the possible
+        !                       values or -nSlices and nSlices, where
+        !                       nSlices or the number of sections to
+        !                       obtain the full wheel.
+        !  factFamilyJ(:,:,:) - Idem for the j-faces.
+        !  factFamilyK(:,:,:) - Idem for the k-faces.
+        !
+        !  rotMatrixI(:,:,:,:,:) - Rotation matrix of the i-faces to
+        !                          transform the velocity components
+        !                          from Cartesian to local cylindrical.
+        !                          This is needed only for problems with
+        !                          rotational periodicity in combination
+        !                          with an upwind scheme.
+        !                          Dimension is (1:il,2:jl,2:kl,3,3).
+        !  rotMatrixJ(:,:,:,:,:) - Idem for the j-faces.
+        !                          Dimension is (2:il,1:jl,2:kl,3,3).
+        !  rotMatrixK(:,:,:,:,:) - Idem for the k-faces.
+        !                          Dimension is (2:il,2:jl,1:kl,3,3).
+        !
+        !  blockIsMoving      - Whether or not the block is moving.
+        !  addGridVelocities  - Whether or not the face velocities
+        !                       are allocated and set.
+        !  sFaceI(0:ie,je,ke) - Dot product of the face velocity and
+        !                       the normal in i-direction.
+        !  sFaceJ(ie,0:je,ke) - Idem in j-direction.
+        !  sFaceK(ie,je,0:ke) - Idem in k-direction.
+
+        real(kind=realType), dimension(:, :, :, :), pointer :: x, xtmp
+        real(kind=realType), dimension(:, :, :, :, :), pointer :: Xold
+        real(kind=realType), dimension(:, :, :, :), pointer :: sI, sJ, sK
+        real(kind=realType), dimension(:, :, :), pointer :: vol
+        real(kind=realType), dimension(:, :, :, :), pointer :: volOld
+        real(kind=realType), dimension(:, :, :), pointer :: volref
+        real(kind=realType), dimension(:, :, :, :), pointer :: uv
+        integer(kind=intType), dimension(:, :, :, :), pointer :: surfNodeIndices
+
+        integer(kind=porType), dimension(:, :, :), pointer :: porI, porJ, porK
+        integer(kind=intType), dimension(:, :, :), pointer :: indFamilyI, indFamilyJ, indFamilyK
+        integer(kind=intType), dimension(:, :, :), pointer :: factFamilyI, factFamilyJ, factFamilyK
+        real(kind=realType), dimension(:, :, :, :, :), pointer :: rotMatrixI, rotMatrixJ, rotMatrixK
+
+        logical :: blockIsMoving, addGridVelocities
+
+        real(kind=realType), dimension(:, :, :), pointer :: sFaceI, sFaceJ, sFaceK
+
+        ! Added by HDN
+        ! xALE(0:ie,0:je,0:ke,3)              - Temporary storage of x so that
+        !                                       intermediate meshes can be stored in
+        !                                       x directly
+        ! sVeloIALE(0:ie,1:je,1:ke,3)
+        ! sVeloJALE(1:ie,0:je,1:ke,3)
+        ! sVeloKALE(1:ie,1:je,0:ke,3)         - Storage of surface velocities at one
+        !                                       time step
+        ! sIALE(0:nALEsteps,0:ie,1:je,1:ke,3)
+        ! sJALE(0:nALEsteps,1:ie,0:je,1:ke,3)
+        ! sKALE(0:nALEsteps,1:ie,1:je,0:ke,3) - Storage of sI, sJ, sK for intermediate
+        !                                       meshes
+        ! sFaceIALE(0:nALEsteps,0:ie,je,ke)
+        ! sFaceJALE(0:nALEsteps,ie,0:je,ke)
+        ! sFaceKALE(0:nALEsteps,ie,je,0:ke)   - Storage of sFaceI, sFaceJ, sFaceK for
+        !                                       intermediate meshes
+        real(kind=realType), dimension(:, :, :, :), pointer :: xALE
+        real(kind=realType), dimension(:, :, :, :), pointer :: sVeloIALE, sVeloJALE, sVeloKALE
+        real(kind=realType), dimension(:, :, :, :, :), pointer :: sIALE, sJALE, sKALE
+        real(kind=realType), dimension(:, :, :, :), pointer :: sFaceIALE, sFaceJALE, sFaceKALE
+
+        ! Tempory storage for overset variables
+        real(kind=realType), dimension(:, :, :, :), pointer :: XSeed
+        integer(kind=intType), dimension(:, :, :), pointer :: wallInd
+
+        !
+        !         Flow variables.
+        !
+        ! w(0:ib,0:jb,0:kb,1:nw)       - The set of independent variables
+        !                                w(i,j,k,1:nwf) flow field
+        !                                variables, which are rho, u,
+        !                                v, w and rhoE. In other words
+        !                                the velocities  are stored and
+        !                                not the momentum!!!!
+        !                                w(i,j,k,nt1:nt2) turbulent
+        !                                variables; also the primitive
+        !                                variables are stored.
+        ! wOld(nOld,2:il,2:jl,2:kl,nw) - Solution on older time levels,
+        !                                needed for the time integration
+        !                                for unsteady problems. In
+        !                                constrast to w, the conservative
+        !                                variables are stored in wOld for
+        !                                the flow variables; the turbulent
+        !                                variables are always the
+        !                                primitive ones.
+        !                                Only allocated on the finest
+        !                                mesh.
+        ! p(0:ib,0:jb,0:kb)            - Static pressure.
+        ! gamma(0:ib,0:jb,0:kb)        - Specific heat ratio; only
+        !                                allocated on the finest grid.
+        ! rlv(0:ib,0:jb,0:kb)          - Laminar viscosity; only
+        !                                allocated on the finest mesh
+        !                                and only for viscous problems.
+        ! rev(0:ib,0:jb,0:kb)          - Eddy viscosity; only
+        !                                allocated rans problems with
+        !                                eddy viscosity models.
+        ! s(1:ie,1:je,1:ke,3)          - Mesh velocities of the cell
+        !                                centers; only for moving mesh
+        !                                problems.
+
+        real(kind=realType), dimension(:, :, :, :), pointer :: w, wtmp
+        real(kind=realType), dimension(:, :, :, :, :), pointer :: dw_deriv
+        real(kind=realType), dimension(:, :, :, :, :), pointer :: wOld
+        real(kind=realType), dimension(:, :, :), pointer :: p, gamma, aa
+        real(kind=realType), dimension(:, :, :), pointer :: rlv, rev
+        real(kind=realType), dimension(:, :, :, :), pointer :: s
+        real(kind=realType), dimension(:, :, :), pointer :: shockSensor
+
+        ! Nodal Fluxes: ux,uy,uz,vx,vy,vz,wx,wy,wz,qx,qy,qz(il, jl, kl)
+        real(kind=realType), dimension(:, :, :), pointer:: ux, uy, uz
+        real(kind=realType), dimension(:, :, :), pointer:: vx, vy, vz
+        real(kind=realType), dimension(:, :, :), pointer:: wx, wy, wz
+        real(kind=realType), dimension(:, :, :), pointer:: qx, qy, qz
+
+        !
+        !         Residual and multigrid variables.
+        !
+        ! dw(0:ib,0:jb,0:kb,1:nw)   - Values of convective and combined
+        !                             flow residuals. Only allocated on
+        !                             the finest mesh.
+        ! fw(0:ib,0:jb,0:kb,1:nwf)  - values of artificial dissipation
+        !                             and viscous residuals.
+        !                             Only allocated on the finest mesh.
+
+        ! dwOldRK(:,2:il,2:jl,2:kl,nw) - Old residuals for the time
+        !                                accurate Runge-Kutta schemes.
+        !                                The first dimension is
+        !                                nRKStagesUnsteady - 1.Only
+        !                                allocated on the finest level
+        !                                and only in unsteady mode for
+        !                                Runge-Kutta schemes.
+
+        ! w1(1:ie,1:je,1:ke,1:nMGVar) - Values of the mg variables
+        !                               upon first entry to a coarser
+        !                               mesh; only allocated on the
+        !                               coarser grids. The variables
+        !                               used to compute the multigrid
+        !                               corrections are rho, u, v, w
+        !                               and p; the rhoE value is used
+        !                               for unsteady problems only.
+        ! p1(1:ie,1:je,1:ke)          - Value of the pressure upon
+        !                               first entry to a coarser grid;
+        !                               only allocated on the coarser
+        !                               grids.
+        ! wr(2:il,2:jl,2:kl,1:nMGVar) - Multigrid forcing terms; only
+        !                               allocated on the coarser grids.
+        !                               The forcing term of course
+        !                               contains conservative residuals,
+        !                               at least for the flow variables.
+        ! shockSensor(0:ib,0:jb,0:kb)   Precomputed sensor value for shock
+        !                               that is *NOT* differentated.
+        ! scratch(0:ib,0:jb,0:kb,5)     Scratch space for the turbulence
+        !                               models. NOMINALLY this could use
+        !                               dw and the code was nominally setup
+        !                               for this originally. However, this
+        !                               complicates reverse mode sensitivities
+        !                               So we use this instead.
+
+        real(kind=realType), dimension(:, :, :), pointer :: p1
+        real(kind=realType), dimension(:, :, :, :), pointer :: dw, fw
+        real(kind=realType), dimension(:, :, :, :), pointer :: dwtmp, dwtmp2
+        real(kind=realType), dimension(:, :, :, :, :), pointer :: dwOldRK
+        real(kind=realType), dimension(:, :, :, :), pointer :: w1, wr
+        real(kind=realType), dimension(:, :, :, :), pointer :: scratch
+
+        ! Added by HDN
+        ! Used for ALE. Only allocated on the finest mesh.
+        ! Extra dim is used to store initial residuals
+        ! dwALE(0:nALEsteps,0:ib,0:jb,0:kb,1:nw)   - Values of ONLY the convective flux
+        !                                            of intermediate meshes.
+        ! fwALE(0:nALEsteps,0:ib,0:jb,0:kb,1:nwf)  - values of ONLY the artificial
+        !                                            dissipation of intermediate meshes.
+        real(kind=realType), dimension(:, :, :, :, :), pointer :: dwALE, fwALE
+
+        ! mgIFine(2:il,2) - The two fine grid i-cells used for the
+        !                   restriction of the solution and residual to
+        !                   the coarse grid. Only on the coarser grids.
+        ! mgJFine(2:jl,2) - Idem for j-cells.
+        ! mgKFine(2:kl,2) - Idem for k-cells.
+
+        ! mgIWeight(2:il) - Weight for the residual restriction in
+        !                   in i-direction. Value is either 0.5 or 1.0,
+        !                   depending whether mgIFine(,1) is equal to
+        !                   or differs from mgIFine(,2).
+        ! mgJWeight(2:jl) - Idem for weights in j-direction.
+        ! mgKWeight(2:kl) - Idem for weights in k-direction.
+
+        ! mgICoarse(2:il,2) - The two coarse grid i-cells used for the
+        !                     interpolation of the correction to the
+        !                     fine grid. Not on the coarsest grid.
+        ! mgJCoarse(2:jl,2) - Idem for j-cells.
+        ! mgKCoarse(2:kl,2) - Idem for k-cells.
+
+        integer(kind=intType), dimension(:, :), pointer :: mgIFine
+        integer(kind=intType), dimension(:, :), pointer :: mgJFine
+        integer(kind=intType), dimension(:, :), pointer :: mgKFine
+
+        real(kind=realType), dimension(:), pointer :: mgIWeight
+        real(kind=realType), dimension(:), pointer :: mgJWeight
+        real(kind=realType), dimension(:), pointer :: mgKWeight
+
+        integer(kind=intType), dimension(:, :), pointer :: mgICoarse
+        integer(kind=intType), dimension(:, :), pointer :: mgJCoarse
+        integer(kind=intType), dimension(:, :), pointer :: mgKCoarse
+
+        ! iCoarsened - How this block was coarsened in i-direction.
+        ! jCoarsened - How this block was coarsened in j-direction.
+        ! kCoarsened - How this block was coarsened in k-direction.
+
+        integer(kind=porType) :: iCoarsened, jCoarsened, kCoarsened
+
+        ! iCo: Indicates whether or not i grid lines are present on the
+        !      coarse grid; not allocated for the coarsest grid.
+        ! jCo: Idem in j-direction.
+        ! kCo: Idem in k-direction.
+
+        logical, dimension(:), pointer :: iCo, jCo, kCo
+        !
+        !         Time-stepping and spectral radii variables.
+        !         only allocated on the finest grid.
+        !
+        ! wn(2:il,2:jl,2:kl,1:nMGVar) - Values of the update variables
+        !                               at the beginning of the RungeKutta
+        !                               iteration. Only allocated for
+        !                               RungeKutta smoother.
+        ! pn(2:il,2:jl,2:kl)          - The pressure for the RungeKutta
+        !                               smoother.
+        ! dtl(1:ie,1:je,1:ke)         - Time step
+        ! radI(1:ie,1:je,1:ke)        - Spectral radius in i-direction.
+        ! radJ(1:ie,1:je,1:ke)        - Spectral radius in j-direction.
+        ! radK(1:ie,1:je,1:ke)        - Spectral radius in k-direction.
+
+        real(kind=realType), dimension(:, :, :, :), pointer :: wn
+        real(kind=realType), dimension(:, :, :), pointer :: pn
+        real(kind=realType), dimension(:, :, :), pointer :: dtl
+        real(kind=realType), dimension(:, :, :), pointer :: radI, radJ, radK
+
+        !
+        !         Variables for Iso/Surface Slice generation
+        ! fc(1:ie,1:je,1:ke) - cell center values of the function to be iso-valued
+        ! fn(1:il,1:jl,1:kl) - node values of the function to be iso-valued
+        ! Note these are are only allocated temporaily during solution writing.
+
+        real(kind=realType), dimension(:, :, :), pointer :: fc
+        real(kind=realType), dimension(:, :, :), pointer :: fn
+
+        !
+        !         Turbulence model variables.
+        !
+        ! d2Wall(2:il,2:jl,2:kl) - Distance from the center of the cell
+        !                          to the nearest viscous wall.
+        ! intermittency( )       - Function defining the transition location
+        !
+        ! The next two variables are only initialized if roughness is requested (useRoughSA = True)
+        ! nearestWallCellInd(2:il,2:jl,2:kl)   - global cell ID for the nearest wall cell; is needed for rougness
+        ! ks(2:il,2:jl,2:kl)     - Roughness value of the nearest wall
+
+        real(kind=realType), dimension(:, :, :), pointer :: d2Wall, filterDES
+        real(kind=realType), dimension(:, :, :), pointer :: intermittency
+        integer(kind=intType), dimension(:, :, :), pointer :: nearestWallCellInd
+        real(kind=realType), dimension(:, :, :), pointer :: ks
+
+        ! bmti1(je,ke,nt1:nt2,nt1:nt2): Matrix used for the implicit
+        !                               boundary condition treatment of
+        !                               the turbulence equations at the
+        !                               iMin boundary. Only allocated on
+        !                               the finest level and for the 1st
+        !                               spectral solution.
+        ! bmti2(je,ke,nt1:nt2,nt1:nt2): Idem for the iMax boundary.
+        ! bmtj1(ie,ke,nt1:nt2,nt1:nt2): Idem for the jMin boundary.
+        ! bmtj2(ie,ke,nt1:nt2,nt1:nt2): Idem for the jMax boundary.
+        ! bmtk1(ie,je,nt1:nt2,nt1:nt2): Idem for the kMin boundary.
+        ! bmtk2(ie,je,nt1:nt2,nt1:nt2): Idem for the kMax boundary.
+
+        real(kind=realType), dimension(:, :, :, :), pointer :: bmti1, bmti2
+        real(kind=realType), dimension(:, :, :, :), pointer :: bmtj1, bmtj2
+        real(kind=realType), dimension(:, :, :, :), pointer :: bmtk1, bmtk2
+
+        ! bvti1(je,ke,nt1:nt2): RHS vector used for the implicit
+        !                       boundary condition treatment of the
+        !                       turbulence equations at the iMin
+        !                       boundary. Only allocated on the finest
+        !                       level and for the 1st spectral solution.
+        ! bvti2(je,ke,nt1:nt2): Idem for the iMax boundary.
+        ! bvtj1(ie,ke,nt1:nt2): Idem for the jMin boundary.
+        ! bvtj2(ie,ke,nt1:nt2): Idem for the jMax boundary.
+        ! bvti2(je,ke,nt1:nt2): Idem for the iMax boundary.
+        ! bvtk1(ie,ke,nt1:nt2): Idem for the kMin boundary.
+        ! bvtk2(ie,ke,nt1:nt2): idem for the kMax boundary.
+
+        real(kind=realType), dimension(:, :, :), pointer :: bvti1, bvti2
+        real(kind=realType), dimension(:, :, :), pointer :: bvtj1, bvtj2
+        real(kind=realType), dimension(:, :, :), pointer :: bvtk1, bvtk2
+        !
+        !         Relation to the original cgns grid.
+        !
+        ! sectionID      - The section of the grid this block belongs to.
+        ! cgnsBlockID    - Block/zone number of the cgns grid to which
+        !                  this block is related.
+        ! iBegOr, iEndOr - Range of points of this block in the
+        ! jBegOr, jEndOr   corresponding cgns block, i.e. for this block
+        ! kBegOr, kEndOr   iBegOr <= i <= iEndOr, jBegOr <= j <= jEndOr,
+        !                  kBegOr <= k <= kEndOr.
+        !                  It is of course possible that the entire
+        !                  block is stored.
+        integer(kind=intType) :: sectionID = 1
+        integer(kind=intType) :: cgnsBlockID
+        integer(kind=intType) :: iBegOr, iEndOr, jBegOr, jEndOr
+        integer(kind=intType) :: kBegOr, kEndOr
+        type(surfaceNodeWeightArray), dimension(6) :: nodalWeights
+        !
+        !         Adjoint solver variables.
+        !
+        ! globalNode(ib:ie,jb:je,kb:ke):  Global node numbering.
+        ! globalCell(0:ib,0:jb,0:kb):     Global cell numbering.
+        ! color(0:ib,0:jb,0:kb)     :     Temporary coloring array used for
+        !                                 forward mode AD/FD calculations
+        integer(kind=intType), dimension(:, :, :), pointer :: globalCGNSNode
+        integer(kind=intType), dimension(:, :, :), pointer :: globalNode
+        integer(kind=intType), dimension(:, :, :), pointer :: globalCell
+        integer(kind=intType), dimension(:, :, :), pointer :: color
+
+        ! Data storing the first order PC in tri-diagonal ordering. 7
+        ! real(kind=realType), dimension(:, :, :, :, :), pointer :: Diag
+        ! real(kind=realType), dimension(:, :, :, :, :), pointer :: i_L, i_U
+        ! real(kind=realType), dimension(:, :, :, :, :), pointer :: j_L, j_U
+        ! real(kind=realType), dimension(:, :, :, :, :), pointer :: k_L, k_U
+
+        real(kind=realType), dimension(:, :, :, :, :), pointer :: PCMat
+
+        ! Generic vectors for doing products/preconditioning. Like w, but
+        ! only 1 level of halos, and it is in block ordering (nw first)
+        ! instead of field ordering like w is.
+        real(kind=realType), dimension(:, :, :, :), pointer :: PCVec1, PCVec2
+
+        ! Data for the factorized trigonal solves
+        real(kind=realType), dimension(:, :, :, :), pointer :: i_D_fact, j_D_fact, k_D_fact
+        real(kind=realType), dimension(:, :, :, :), pointer :: i_L_Fact, j_L_Fact, k_L_Fact
+        real(kind=realType), dimension(:, :, :, :), pointer :: i_U_Fact, j_U_Fact, k_U_Fact
+        real(kind=realType), dimension(:, :, :, :), pointer :: i_U2_Fact, j_U2_Fact, k_U2_Fact
+
+        integer(kind=intType), dimension(:, :, :, :), pointer :: i_ipiv, j_ipiv, k_ipiv
+
+        ! A list of pointers for generic communication of either real or
+        ! integer data.
+        type(rPtr), dimension(24) :: realCommVars
+        type(iPtr), dimension(3) :: intCommvars
+
+    end type blockType
+
     !
-    type(interpPtType), intent(in) :: g1, g2
+    !       Array of all blocks at all multigrid levels and spectral sols.
     !
+    ! nDom:            total number of computational blocks.
+    ! flowDoms(:,:,:): array of blocks. Dimensions are
+    !                  (nDom,nLevels,nTimeIntervalsSpectral)
 
-    if(g1%donorProc < g2%donorProc) then
-       lessEqualinterpPtType = .true.
-       return
-    else if(g1%donorProc > g2%donorProc) then
-       lessEqualinterpPtType = .false.
-       return
-    endif
-
-    ! Donor processors are identical. Now we check the block
-
-    if(g1%donorBlock < g2%donorBlock) then
-       lessEqualinterpPtType = .true.
-       return
-    else if(g1%donorBlock > g2%donorBlock) then
-       lessEqualinterpPtType = .false.
-       return
-    endif
-
-    ! Compare the indices of the halo. First k, then j and
-    ! finally i.
-
-    if(g1%dK < g2%dK) then
-       lessEqualinterpPtType = .true.
-       return
-    else if(g1%dK > g2%dK) then
-       lessEqualinterpPtType = .false.
-       return
-    endif
-
-   if(g1%dJ < g2%dJ) then
-       lessEqualinterpPtType = .true.
-       return
-    else if(g1%dJ > g2%dJ) then
-       lessEqualinterpPtType = .false.
-       return
-    endif
-
-    if(g1%dI < g2%dI) then
-       lessEqualinterpPtType = .true.
-       return
-    else if(g1%dI > g2%dI) then
-       lessEqualinterpPtType = .false.
-       return
-    endif
-
-    ! Both entities are identical. So set lessEqual to .true.
-
-    lessEqualinterpPtType = .true.
-
-  end function lessEqualInterpPtType
-
-  logical function lessInterpPtType(g1, g2)
+    integer(kind=intType) :: nDom
+
+    ! A global paramter for how to sort fringes
+    integer(kind=intType) :: fringeSortType = sortByDonor
+
+#ifdef USE_TAPENADE
+    ! This is never actually compiled...just make tapenade think it
+    ! isn't allocatable
+    type(blockType), dimension(nn:nn, 1, ntimeIntervalsSpectral) :: flowDoms
+#else
+    type(blockType), allocatable, target, dimension(:, :, :) :: flowDoms
+    type(blockType), allocatable, target, dimension(:, :, :) :: flowDomsd
+    type(blockType), allocatable, target, dimension(:, :, :) :: flowDomsb
+#endif
 
-    implicit none
-    !
-    !        Function arguments.
     !
-    type(interpPtType), intent(in) :: g1, g2
+    !       Additional info needed in the flow solver.
     !
-    if(g1%donorProc < g2%donorProc) then
-       lessInterpPtType = .true.
-       return
-    else if(g1%donorProc > g2%donorProc) then
-       lessInterpPtType = .false.
-       return
-    endif
-
-    ! Donor processors are identical. Now we check the block
-
-    if(g1%donorBlock < g2%donorBlock) then
-       lessInterpPtType = .true.
-       return
-    else if(g1%donorBlock > g2%donorBlock) then
-       lessInterpPtType = .false.
-       return
-    endif
-
-    ! Compare the indices of the halo. First k, then j and
-    ! finally i.
-
-    if(g1%dK < g2%dK) then
-       lessInterpPtType = .true.
-       return
-    else if(g1%dK > g2%dK) then
-       lessInterpPtType = .false.
-       return
-    endif
-
-   if(g1%dJ < g2%dJ) then
-       lessInterpPtType = .true.
-       return
-    else if(g1%dJ > g2%dJ) then
-       lessInterpPtType = .false.
-       return
-    endif
-
-    if(g1%dI < g2%dI) then
-       lessInterpPtType = .true.
-       return
-    else if(g1%dI > g2%dI) then
-       lessInterpPtType = .false.
-       return
-    endif
-
-    ! Both entities are identical. So set less to .False.
-
-    lessInterpPtType = .False.
-
-  end function lessInterpPtType
-
+    ! nCellGlobal(nLev) - Global number of cells on every mg level.
+
+    integer(kind=intType), allocatable, dimension(:) :: nCellGlobal
+
+contains
+
+    logical function lessEqualFringeType(g1, g2)
+
+        !         lessEqual returns .true. if g1 <= g2 and .false. otherwise.
+        !         The comparison is firstly based on the processor ID of the
+        !         donor, then the block, then then the I, J, K
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(fringeType), intent(in) :: g1, g2
+        !
+        ! Compare the donor processors first. If not equal,
+        ! set lessEqual appropriately and return.
+        if (fringeSortType == sortByDonor) then
+            if (g1%donorProc < g2%donorProc) then
+                lessEqualfringeType = .true.
+                return
+            else if (g1%donorProc > g2%donorProc) then
+                lessEqualfringeType = .false.
+                return
+            end if
+
+            ! Donor processors are identical. Now we check the block
+
+            if (g1%donorBlock < g2%donorBlock) then
+                lessEqualfringeType = .true.
+                return
+            else if (g1%donorBlock > g2%donorBlock) then
+                lessEqualfringeType = .false.
+                return
+            end if
+
+            ! Compare the indices of the halo. First k, then j and
+            ! finally i.
+
+            if (g1%dIndex < g2%dIndex) then
+                lessEqualfringeType = .true.
+                return
+            else if (g1%dindex > g2%dIndex) then
+                lessEqualfringeType = .false.
+                return
+            end if
+
+        else if (fringeSortType == sortByReceiver) then
+
+            ! Compare my indices
+
+            if (g1%myIndex < g2%myIndex) then
+                lessEqualfringeType = .true.
+                return
+            else if (g1%myIndex > g2%myIndex) then
+                lessEqualfringeType = .false.
+                return
+            end if
+
+            ! Now compare the donor information:
+
+            if (g1%donorProc < g2%donorProc) then
+                lessEqualfringeType = .true.
+                return
+            else if (g1%donorProc > g2%donorProc) then
+                lessEqualfringeType = .false.
+                return
+            end if
+
+            ! Donor processors are identical. Now we check the block
+
+            if (g1%donorBlock < g2%donorBlock) then
+                lessEqualfringeType = .true.
+                return
+            else if (g1%donorBlock > g2%donorBlock) then
+                lessEqualfringeType = .false.
+                return
+            end if
+
+            ! Compare the indices of the halo. First k, then j and
+            ! finally i.
+
+            if (g1%dIndex < g2%dIndex) then
+                lessEqualfringeType = .true.
+                return
+            else if (g1%dIndex > g2%dIndex) then
+                lessEqualfringeType = .false.
+                return
+            end if
+        end if
+
+        ! Both entities are identical. So set lessEqual to .true.
+
+        lessEqualfringeType = .true.
+
+    end function lessEqualFringeType
+
+    logical function lessFringeType(g1, g2)
+
+        !         less returns .true. if g1 <= g2 and .false. otherwise.
+        !         The comparison is firstly based on the processor ID of the
+        !         donor, then the block, then then the I, J, K
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(fringeType), intent(in) :: g1, g2
+        !
+        ! Compare the donor processors first. If not equal,
+        ! set less appropriately and return.
+        if (fringeSortType == sortByDonor) then
+            if (g1%donorProc < g2%donorProc) then
+                lessfringeType = .true.
+                return
+            else if (g1%donorProc > g2%donorProc) then
+                lessfringeType = .false.
+                return
+            end if
+
+            ! Donor processors are identical. Now we check the block
+
+            if (g1%donorBlock < g2%donorBlock) then
+                lessfringeType = .true.
+                return
+            else if (g1%donorBlock > g2%donorBlock) then
+                lessfringeType = .false.
+                return
+            end if
+
+            ! Compare the indices of the halo. First k, then j and
+            ! finally i.
+
+            if (g1%dIndex < g2%dIndex) then
+                lessfringeType = .true.
+                return
+            else if (g1%dIndex > g2%dIndex) then
+                lessfringeType = .false.
+                return
+            end if
+
+        else if (fringeSortType == sortByReceiver) then
+
+            ! Compare my indices
+
+            if (g1%myIndex < g2%myIndex) then
+                lessfringeType = .true.
+                return
+            else if (g1%myIndex > g2%myIndex) then
+                lessfringeType = .false.
+                return
+            end if
+
+            ! Now compare the donor information:
+
+            if (g1%donorProc < g2%donorProc) then
+                lessfringeType = .true.
+                return
+            else if (g1%donorProc > g2%donorProc) then
+                lessfringeType = .false.
+                return
+            end if
+
+            ! Donor processors are identical. Now we check the block
+
+            if (g1%donorBlock < g2%donorBlock) then
+                lessfringeType = .true.
+                return
+            else if (g1%donorBlock > g2%donorBlock) then
+                lessfringeType = .false.
+                return
+            end if
+
+            ! Compare the indices of the halo. First k, then j and
+            ! finally i.
+
+            if (g1%dIndex < g2%dIndex) then
+                lessfringeType = .true.
+                return
+            else if (g1%dIndex > g2%dIndex) then
+                lessfringeType = .false.
+                return
+            end if
+        end if
+
+        ! Both entities are identical. So set less to .False.
+
+        lessFringeType = .False.
+
+    end function lessFringeType
+
+    logical function lessEqualInterpPtType(g1, g2)
+
+        !         lessEqual returns .true. if g1 <= g2 and .false. otherwise.
+        !         The comparison is firstly based on the processor ID of the
+        !         donor, then the block, then then the I, J, K
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(interpPtType), intent(in) :: g1, g2
+        !
+
+        if (g1%donorProc < g2%donorProc) then
+            lessEqualinterpPtType = .true.
+            return
+        else if (g1%donorProc > g2%donorProc) then
+            lessEqualinterpPtType = .false.
+            return
+        end if
+
+        ! Donor processors are identical. Now we check the block
+
+        if (g1%donorBlock < g2%donorBlock) then
+            lessEqualinterpPtType = .true.
+            return
+        else if (g1%donorBlock > g2%donorBlock) then
+            lessEqualinterpPtType = .false.
+            return
+        end if
+
+        ! Compare the indices of the halo. First k, then j and
+        ! finally i.
+
+        if (g1%dK < g2%dK) then
+            lessEqualinterpPtType = .true.
+            return
+        else if (g1%dK > g2%dK) then
+            lessEqualinterpPtType = .false.
+            return
+        end if
+
+        if (g1%dJ < g2%dJ) then
+            lessEqualinterpPtType = .true.
+            return
+        else if (g1%dJ > g2%dJ) then
+            lessEqualinterpPtType = .false.
+            return
+        end if
+
+        if (g1%dI < g2%dI) then
+            lessEqualinterpPtType = .true.
+            return
+        else if (g1%dI > g2%dI) then
+            lessEqualinterpPtType = .false.
+            return
+        end if
+
+        ! Both entities are identical. So set lessEqual to .true.
+
+        lessEqualinterpPtType = .true.
+
+    end function lessEqualInterpPtType
+
+    logical function lessInterpPtType(g1, g2)
+
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(interpPtType), intent(in) :: g1, g2
+        !
+        if (g1%donorProc < g2%donorProc) then
+            lessInterpPtType = .true.
+            return
+        else if (g1%donorProc > g2%donorProc) then
+            lessInterpPtType = .false.
+            return
+        end if
+
+        ! Donor processors are identical. Now we check the block
+
+        if (g1%donorBlock < g2%donorBlock) then
+            lessInterpPtType = .true.
+            return
+        else if (g1%donorBlock > g2%donorBlock) then
+            lessInterpPtType = .false.
+            return
+        end if
+
+        ! Compare the indices of the halo. First k, then j and
+        ! finally i.
+
+        if (g1%dK < g2%dK) then
+            lessInterpPtType = .true.
+            return
+        else if (g1%dK > g2%dK) then
+            lessInterpPtType = .false.
+            return
+        end if
+
+        if (g1%dJ < g2%dJ) then
+            lessInterpPtType = .true.
+            return
+        else if (g1%dJ > g2%dJ) then
+            lessInterpPtType = .false.
+            return
+        end if
+
+        if (g1%dI < g2%dI) then
+            lessInterpPtType = .true.
+            return
+        else if (g1%dI > g2%dI) then
+            lessInterpPtType = .false.
+            return
+        end if
+
+        ! Both entities are identical. So set less to .False.
+
+        lessInterpPtType = .False.
+
+    end function lessInterpPtType
 
 end module block
diff --git a/src/modules/blockPointers.F90 b/src/modules/blockPointers.F90
index 62950b127..05edd8a3d 100644
--- a/src/modules/blockPointers.F90
+++ b/src/modules/blockPointers.F90
@@ -1,260 +1,259 @@
 module blockPointers
-  !
-  !       This module contains the pointers for all variables inside a
-  !       block. The pointers are set via the subroutine setPointers,
-  !       which can be found in the utils directory. In this way the
-  !       code becomes much more readable. The relation to the original
-  !       multiblock grid is not copied, because it does not affect the
-  !       computation.
-  !       See the module block for the meaning of the variables.
-  !       Note that the dimensions are not pointers, but integers.
-  !       Consequently changing dimensions of a block must be done only
-  !       with the variables of floDoms.
-  !
-  use constants, only : intType, realType, porType
-  use block, only : fringeType, BCDataType, viscSubFaceType, flowDoms, nDom
+    !
+    !       This module contains the pointers for all variables inside a
+    !       block. The pointers are set via the subroutine setPointers,
+    !       which can be found in the utils directory. In this way the
+    !       code becomes much more readable. The relation to the original
+    !       multiblock grid is not copied, because it does not affect the
+    !       computation.
+    !       See the module block for the meaning of the variables.
+    !       Note that the dimensions are not pointers, but integers.
+    !       Consequently changing dimensions of a block must be done only
+    !       with the variables of floDoms.
+    !
+    use constants, only: intType, realType, porType
+    use block, only: fringeType, BCDataType, viscSubFaceType, flowDoms, nDom
 #ifndef USE_TAPENADE
-  use block, only : flowDomsd
+    use block, only: flowDomsd
 #endif
-  implicit none
-  !
-  !       Additional info, such that it is known to which block the data
-  !       inside this module belongs.
-  !
-  ! sectionID:   the section to which this block belongs.
-  ! nbkLocal :   local block number.
-  ! nbkGlobal:   global block number in the original cgns grid.
-  ! mgLevel:     the multigrid level.
-  ! spectralSol: the spectral solution index of this block.
-
-  integer(kind=intType) :: sectionID
-  integer(kind=intType) :: nbkLocal, nbkGlobal, mgLevel
-  integer(kind=intType) :: spectralSol
-  !
-  !       Variables, which are either copied or the pointer is set to
-  !       the correct variable in the block. See the module block for
-  !       meaning of the variables.
-  !
-  integer(kind=intType) :: nx, ny, nz, il, jl, kl
-  integer(kind=intType) :: ie, je, ke, ib, jb, kb
-  integer(kind=intType) :: maxDim, imaxDim, jmaxDim
-
-  logical :: rightHanded
-
-  integer(kind=intType) :: iBegOr, iEndOr, jBegOr, jEndOr
-  integer(kind=intType) :: kBegOr, kEndOr
-
-  integer(kind=intType) :: nSubface, n1to1, nBocos, nViscBocos
-
-  integer(kind=intType), dimension(:), pointer :: BCType
-  integer(kind=intType), dimension(:), pointer :: BCFaceID
-
-  integer(kind=intType), dimension(:), pointer :: cgnsSubface
-
-  integer(kind=intType), dimension(:), pointer :: inBeg, inEnd
-  integer(kind=intType), dimension(:), pointer :: jnBeg, jnEnd
-  integer(kind=intType), dimension(:), pointer :: knBeg, knEnd
-
-  integer(kind=intType), dimension(:), pointer :: dinBeg, dinEnd
-  integer(kind=intType), dimension(:), pointer :: djnBeg, djnEnd
-  integer(kind=intType), dimension(:), pointer :: dknBeg, dknEnd
-
-  integer(kind=intType), dimension(:), pointer :: icBeg, icEnd
-  integer(kind=intType), dimension(:), pointer :: jcBeg, jcEnd
-  integer(kind=intType), dimension(:), pointer :: kcBeg, kcEnd
-
-  integer(kind=intType), dimension(:), pointer :: neighBlock
-  integer(kind=intType), dimension(:), pointer :: neighProc
-  integer(kind=intType), dimension(:), pointer :: l1, l2, l3
-  integer(kind=intType), dimension(:), pointer :: groupNum
-
-  integer(kind=intType), dimension(:,:,:), pointer :: iblank
-  integer(kind=intType), dimension(:,:,:), pointer :: status
-  integer(kind=intType), dimension(:,:,:), pointer :: forcedRecv
-  type(fringeType), dimension(:), pointer :: fringes
-  integer(kind=intType), pointer :: nDonors
-  integer(kind=intType), dimension(:, :, :, :), pointer :: fringePtr
-  integer(kind=intType), dimension(:, :, :, :), pointer :: gInd
-  integer(kind=intType), dimension(:, :), pointer :: orphans
-  integer(kind=intType) :: nOrphans
-
-  integer(kind=intType), dimension(:), pointer :: neighBlockOver
-  integer(kind=intType), dimension(:), pointer :: neighProcOver
-
-  type(BCDataType),      dimension(:), pointer :: BCData
-  type(viscSubfaceType), dimension(:), pointer :: viscSubface
-
-  integer(kind=intType), dimension(:,:), pointer :: viscIMinPointer
-  integer(kind=intType), dimension(:,:), pointer :: viscIMaxPointer
-  integer(kind=intType), dimension(:,:), pointer :: viscJMinPointer
-  integer(kind=intType), dimension(:,:), pointer :: viscJMaxPointer
-  integer(kind=intType), dimension(:,:), pointer :: viscKMinPointer
-  integer(kind=intType), dimension(:,:), pointer :: viscKMaxPointer
-
-  real(kind=realType), dimension(:,:,:,:),   pointer :: x
-  real(kind=realType), dimension(:,:,:,:,:), pointer :: xOld
-  real(kind=realType), dimension(:,:,:,:), pointer :: sI, sJ, sK
-  real(kind=realType), dimension(:,:,:),   pointer :: vol
-  real(kind=realType), dimension(:,:,:),   pointer :: volref
-  real(kind=realType), dimension(:,:,:,:), pointer :: volOld
-  real(kind=realType), dimension(:,:,:,:),   pointer :: dadidata
-
-  integer(kind=porType), dimension(:,:,:), pointer :: porI, porJ, porK
-
-  integer(kind=intType), dimension(:,:,:), pointer :: indFamilyI
-  integer(kind=intType), dimension(:,:,:), pointer :: indFamilyJ
-  integer(kind=intType), dimension(:,:,:), pointer :: indFamilyK
-
-  integer(kind=intType), dimension(:,:,:), pointer :: factFamilyI
-  integer(kind=intType), dimension(:,:,:), pointer :: factFamilyJ
-  integer(kind=intType), dimension(:,:,:), pointer :: factFamilyK
-
-  real(kind=realType), dimension(:,:,:,:,:), pointer :: rotMatrixI
-  real(kind=realType), dimension(:,:,:,:,:), pointer :: rotMatrixJ
-  real(kind=realType), dimension(:,:,:,:,:), pointer :: rotMatrixK
-
-  logical :: blockIsMoving, addGridVelocities
-
-  real(kind=realType), dimension(:,:,:), pointer :: sFaceI, sFaceJ, sfaceK
-  real(kind=realType), dimension(:,:,:,:),   pointer :: w
-  real(kind=realType), dimension(:,:,:,:,:), pointer :: wOld
-
-  real(kind=realType), dimension(:,:,:),     pointer :: p, gamma, aa
-  real(kind=realType), dimension(:,:,:),     pointer :: shockSensor
-  real(kind=realType), dimension(:,:,:),     pointer :: rlv, rev
-  real(kind=realType), dimension(:,:,:,:),   pointer :: s
-  real(kind=realType), dimension(:,:,:),     pointer :: p1
-  real(kind=realType), dimension(:,:,:,:),   pointer :: dw, fw
-  real(kind=realType), dimension(:,:,:,:),   pointer :: scratch
-  real(kind=realType), dimension(:,:,:,:,:), pointer :: dwOldRK
-  real(kind=realType), dimension(:,:,:,:),   pointer :: w1, wr
-  real(kind=realType), dimension(:, :, :), pointer:: ux, uy, uz
-  real(kind=realType), dimension(:, :, :), pointer:: vx, vy, vz
-  real(kind=realType), dimension(:, :, :), pointer:: wx, wy, wz
-  real(kind=realType), dimension(:, :, :), pointer:: qx, qy, qz
-
-  integer(kind=intType), dimension(:,:), pointer :: mgIFine
-  integer(kind=intType), dimension(:,:), pointer :: mgJFine
-  integer(kind=intType), dimension(:,:), pointer :: mgKFine
-
-  real(kind=realType),   dimension(:),   pointer :: mgIWeight
-  real(kind=realType),   dimension(:),   pointer :: mgJWeight
-  real(kind=realType),   dimension(:),   pointer :: mgKWeight
-
-  integer(kind=intType), dimension(:,:), pointer :: mgICoarse
-  integer(kind=intType), dimension(:,:), pointer :: mgJCoarse
-  integer(kind=intType), dimension(:,:), pointer :: mgKCoarse
-
-  real(kind=realType), dimension(:,:,:,:), pointer :: wn
-  real(kind=realType), dimension(:,:,:),   pointer :: pn
-  real(kind=realType), dimension(:,:,:),   pointer :: dtl
-  real(kind=realType), dimension(:,:,:),   pointer :: radI, radJ, radK
-
-  real(kind=realType), dimension(:,:,:), pointer :: d2Wall
-  real(kind=realType), dimension(:,:,:), pointer :: ks
-  real(kind=realType), dimension(:,:,:), pointer :: intermittency
-  real(kind=realType), dimension(:,:,:),   pointer :: filterDES  ! eran-des
-  real(kind=realType), dimension(:,:,:,:), pointer :: bmti1
-  real(kind=realType), dimension(:,:,:,:), pointer :: bmti2
-  real(kind=realType), dimension(:,:,:,:), pointer :: bmtj1
-  real(kind=realType), dimension(:,:,:,:), pointer :: bmtj2
-  real(kind=realType), dimension(:,:,:,:), pointer :: bmtk1
-  real(kind=realType), dimension(:,:,:,:), pointer :: bmtk2
-  real(kind=realType), dimension(:,:,:), pointer :: bvti1, bvti2
-  real(kind=realType), dimension(:,:,:), pointer :: bvtj1, bvtj2
-  real(kind=realType), dimension(:,:,:), pointer :: bvtk1, bvtk2
-
-  integer(kind=intType), dimension(:,:,:), pointer :: globalNode
-  integer(kind=intType), dimension(:,:,:), pointer :: globalCell
-  real(kind=realType), dimension(:, :, :, :), pointer :: xSeed
-  integer(kind=intType), dimension(:, :, :), pointer :: wallInd
-
-  real(kind=realType), dimension(:,:,:,:), pointer :: w_offTimeInstance
-  real(kind=realType), dimension(:,:,:), pointer :: vol_offTimeInstance
-
-  ! Added by HDN
-  real(kind=realType), dimension(:,:,:,:),   pointer :: xALE
-  real(kind=realType), dimension(:,:,:,:),   pointer :: sVeloIALE, sVeloJALE, sVeloKALE
-  real(kind=realType), dimension(:,:,:,:,:), pointer :: sIALE, sJALE, sKALE
-  real(kind=realType), dimension(:,:,:,:),   pointer :: sFaceIALE, sFaceJALE, sFaceKALE
-  real(kind=realType), dimension(:,:,:,:,:), pointer :: dwALE, fwALE
-
+    implicit none
+    !
+    !       Additional info, such that it is known to which block the data
+    !       inside this module belongs.
+    !
+    ! sectionID:   the section to which this block belongs.
+    ! nbkLocal :   local block number.
+    ! nbkGlobal:   global block number in the original cgns grid.
+    ! mgLevel:     the multigrid level.
+    ! spectralSol: the spectral solution index of this block.
+
+    integer(kind=intType) :: sectionID
+    integer(kind=intType) :: nbkLocal, nbkGlobal, mgLevel
+    integer(kind=intType) :: spectralSol
+    !
+    !       Variables, which are either copied or the pointer is set to
+    !       the correct variable in the block. See the module block for
+    !       meaning of the variables.
+    !
+    integer(kind=intType) :: nx, ny, nz, il, jl, kl
+    integer(kind=intType) :: ie, je, ke, ib, jb, kb
+    integer(kind=intType) :: maxDim, imaxDim, jmaxDim
+
+    logical :: rightHanded
+
+    integer(kind=intType) :: iBegOr, iEndOr, jBegOr, jEndOr
+    integer(kind=intType) :: kBegOr, kEndOr
+
+    integer(kind=intType) :: nSubface, n1to1, nBocos, nViscBocos
+
+    integer(kind=intType), dimension(:), pointer :: BCType
+    integer(kind=intType), dimension(:), pointer :: BCFaceID
+
+    integer(kind=intType), dimension(:), pointer :: cgnsSubface
+
+    integer(kind=intType), dimension(:), pointer :: inBeg, inEnd
+    integer(kind=intType), dimension(:), pointer :: jnBeg, jnEnd
+    integer(kind=intType), dimension(:), pointer :: knBeg, knEnd
+
+    integer(kind=intType), dimension(:), pointer :: dinBeg, dinEnd
+    integer(kind=intType), dimension(:), pointer :: djnBeg, djnEnd
+    integer(kind=intType), dimension(:), pointer :: dknBeg, dknEnd
+
+    integer(kind=intType), dimension(:), pointer :: icBeg, icEnd
+    integer(kind=intType), dimension(:), pointer :: jcBeg, jcEnd
+    integer(kind=intType), dimension(:), pointer :: kcBeg, kcEnd
+
+    integer(kind=intType), dimension(:), pointer :: neighBlock
+    integer(kind=intType), dimension(:), pointer :: neighProc
+    integer(kind=intType), dimension(:), pointer :: l1, l2, l3
+    integer(kind=intType), dimension(:), pointer :: groupNum
+
+    integer(kind=intType), dimension(:, :, :), pointer :: iblank
+    integer(kind=intType), dimension(:, :, :), pointer :: status
+    integer(kind=intType), dimension(:, :, :), pointer :: forcedRecv
+    type(fringeType), dimension(:), pointer :: fringes
+    integer(kind=intType), pointer :: nDonors
+    integer(kind=intType), dimension(:, :, :, :), pointer :: fringePtr
+    integer(kind=intType), dimension(:, :, :, :), pointer :: gInd
+    integer(kind=intType), dimension(:, :), pointer :: orphans
+    integer(kind=intType) :: nOrphans
+
+    integer(kind=intType), dimension(:), pointer :: neighBlockOver
+    integer(kind=intType), dimension(:), pointer :: neighProcOver
+
+    type(BCDataType), dimension(:), pointer :: BCData
+    type(viscSubfaceType), dimension(:), pointer :: viscSubface
+
+    integer(kind=intType), dimension(:, :), pointer :: viscIMinPointer
+    integer(kind=intType), dimension(:, :), pointer :: viscIMaxPointer
+    integer(kind=intType), dimension(:, :), pointer :: viscJMinPointer
+    integer(kind=intType), dimension(:, :), pointer :: viscJMaxPointer
+    integer(kind=intType), dimension(:, :), pointer :: viscKMinPointer
+    integer(kind=intType), dimension(:, :), pointer :: viscKMaxPointer
+
+    real(kind=realType), dimension(:, :, :, :), pointer :: x
+    real(kind=realType), dimension(:, :, :, :, :), pointer :: xOld
+    real(kind=realType), dimension(:, :, :, :), pointer :: sI, sJ, sK
+    real(kind=realType), dimension(:, :, :), pointer :: vol
+    real(kind=realType), dimension(:, :, :), pointer :: volref
+    real(kind=realType), dimension(:, :, :, :), pointer :: volOld
+    real(kind=realType), dimension(:, :, :, :), pointer :: dadidata
+
+    integer(kind=porType), dimension(:, :, :), pointer :: porI, porJ, porK
+
+    integer(kind=intType), dimension(:, :, :), pointer :: indFamilyI
+    integer(kind=intType), dimension(:, :, :), pointer :: indFamilyJ
+    integer(kind=intType), dimension(:, :, :), pointer :: indFamilyK
+
+    integer(kind=intType), dimension(:, :, :), pointer :: factFamilyI
+    integer(kind=intType), dimension(:, :, :), pointer :: factFamilyJ
+    integer(kind=intType), dimension(:, :, :), pointer :: factFamilyK
+
+    real(kind=realType), dimension(:, :, :, :, :), pointer :: rotMatrixI
+    real(kind=realType), dimension(:, :, :, :, :), pointer :: rotMatrixJ
+    real(kind=realType), dimension(:, :, :, :, :), pointer :: rotMatrixK
+
+    logical :: blockIsMoving, addGridVelocities
+
+    real(kind=realType), dimension(:, :, :), pointer :: sFaceI, sFaceJ, sfaceK
+    real(kind=realType), dimension(:, :, :, :), pointer :: w
+    real(kind=realType), dimension(:, :, :, :, :), pointer :: wOld
+
+    real(kind=realType), dimension(:, :, :), pointer :: p, gamma, aa
+    real(kind=realType), dimension(:, :, :), pointer :: shockSensor
+    real(kind=realType), dimension(:, :, :), pointer :: rlv, rev
+    real(kind=realType), dimension(:, :, :, :), pointer :: s
+    real(kind=realType), dimension(:, :, :), pointer :: p1
+    real(kind=realType), dimension(:, :, :, :), pointer :: dw, fw
+    real(kind=realType), dimension(:, :, :, :), pointer :: scratch
+    real(kind=realType), dimension(:, :, :, :, :), pointer :: dwOldRK
+    real(kind=realType), dimension(:, :, :, :), pointer :: w1, wr
+    real(kind=realType), dimension(:, :, :), pointer:: ux, uy, uz
+    real(kind=realType), dimension(:, :, :), pointer:: vx, vy, vz
+    real(kind=realType), dimension(:, :, :), pointer:: wx, wy, wz
+    real(kind=realType), dimension(:, :, :), pointer:: qx, qy, qz
+
+    integer(kind=intType), dimension(:, :), pointer :: mgIFine
+    integer(kind=intType), dimension(:, :), pointer :: mgJFine
+    integer(kind=intType), dimension(:, :), pointer :: mgKFine
+
+    real(kind=realType), dimension(:), pointer :: mgIWeight
+    real(kind=realType), dimension(:), pointer :: mgJWeight
+    real(kind=realType), dimension(:), pointer :: mgKWeight
+
+    integer(kind=intType), dimension(:, :), pointer :: mgICoarse
+    integer(kind=intType), dimension(:, :), pointer :: mgJCoarse
+    integer(kind=intType), dimension(:, :), pointer :: mgKCoarse
+
+    real(kind=realType), dimension(:, :, :, :), pointer :: wn
+    real(kind=realType), dimension(:, :, :), pointer :: pn
+    real(kind=realType), dimension(:, :, :), pointer :: dtl
+    real(kind=realType), dimension(:, :, :), pointer :: radI, radJ, radK
+
+    real(kind=realType), dimension(:, :, :), pointer :: d2Wall
+    real(kind=realType), dimension(:, :, :), pointer :: ks
+    real(kind=realType), dimension(:, :, :), pointer :: intermittency
+    real(kind=realType), dimension(:, :, :), pointer :: filterDES  ! eran-des
+    real(kind=realType), dimension(:, :, :, :), pointer :: bmti1
+    real(kind=realType), dimension(:, :, :, :), pointer :: bmti2
+    real(kind=realType), dimension(:, :, :, :), pointer :: bmtj1
+    real(kind=realType), dimension(:, :, :, :), pointer :: bmtj2
+    real(kind=realType), dimension(:, :, :, :), pointer :: bmtk1
+    real(kind=realType), dimension(:, :, :, :), pointer :: bmtk2
+    real(kind=realType), dimension(:, :, :), pointer :: bvti1, bvti2
+    real(kind=realType), dimension(:, :, :), pointer :: bvtj1, bvtj2
+    real(kind=realType), dimension(:, :, :), pointer :: bvtk1, bvtk2
+
+    integer(kind=intType), dimension(:, :, :), pointer :: globalNode
+    integer(kind=intType), dimension(:, :, :), pointer :: globalCell
+    real(kind=realType), dimension(:, :, :, :), pointer :: xSeed
+    integer(kind=intType), dimension(:, :, :), pointer :: wallInd
+
+    real(kind=realType), dimension(:, :, :, :), pointer :: w_offTimeInstance
+    real(kind=realType), dimension(:, :, :), pointer :: vol_offTimeInstance
+
+    ! Added by HDN
+    real(kind=realType), dimension(:, :, :, :), pointer :: xALE
+    real(kind=realType), dimension(:, :, :, :), pointer :: sVeloIALE, sVeloJALE, sVeloKALE
+    real(kind=realType), dimension(:, :, :, :, :), pointer :: sIALE, sJALE, sKALE
+    real(kind=realType), dimension(:, :, :, :), pointer :: sFaceIALE, sFaceJALE, sFaceKALE
+    real(kind=realType), dimension(:, :, :, :, :), pointer :: dwALE, fwALE
 
 #ifndef USE_TAPENADE
-  TYPE(VISCSUBFACETYPE), DIMENSION(:), POINTER :: viscsubfaced
+    TYPE(VISCSUBFACETYPE), DIMENSION(:), POINTER :: viscsubfaced
 
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: xd
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: sid, sjd, skd
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: xd
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: sid, sjd, skd
 
-  real(kind=realType), dimension(:,:,:),   pointer ::vold
+    real(kind=realType), dimension(:, :, :), pointer ::vold
 
-  REAL(kind=realtype), DIMENSION(:, :, :, :, :), POINTER :: rotmatrixid
-  REAL(kind=realtype), DIMENSION(:, :, :, :, :), POINTER :: rotmatrixjd
-  REAL(kind=realtype), DIMENSION(:, :, :, :, :), POINTER :: rotmatrixkd
+    REAL(kind=realtype), DIMENSION(:, :, :, :, :), POINTER :: rotmatrixid
+    REAL(kind=realtype), DIMENSION(:, :, :, :, :), POINTER :: rotmatrixjd
+    REAL(kind=realtype), DIMENSION(:, :, :, :, :), POINTER :: rotmatrixkd
 
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: sfaceid
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: sfacejd
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: sfacekd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: sfaceid
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: sfacejd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: sfacekd
 
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: wd
-  REAL(kind=realtype), DIMENSION(:, :, :, :, :), POINTER :: woldd
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: wd
+    REAL(kind=realtype), DIMENSION(:, :, :, :, :), POINTER :: woldd
 
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: uxd
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: uyd
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: uzd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: uxd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: uyd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: uzd
 
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: vxd
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: vyd
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: vzd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: vxd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: vyd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: vzd
 
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: wxd
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: wyd
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: wzd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: wxd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: wyd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: wzd
 
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: qxd
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: qyd
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: qzd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: qxd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: qyd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: qzd
 
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: pd, gammad, aad
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: rlvd, revd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: pd, gammad, aad
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: rlvd, revd
 
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: sd
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: sd
 
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: dwd, fwd
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: w1d, wrd
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: scratchd
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: dwd, fwd
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: w1d, wrd
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: scratchd
 
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: dtld
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: radid
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: radjd
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: radkd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: dtld
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: radid
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: radjd
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: radkd
 
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmti1d
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmti2d
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmtj1d
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmtj2d
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmtk1d
-  REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmtk2d
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmti1d
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmti2d
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmtj1d
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmtj2d
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmtk1d
+    REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: bmtk2d
 
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: bvti1d, bvti2d
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: bvtj1d, bvtj2d
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: bvtk1d, bvtk2d
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: bvti1d, bvti2d
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: bvtj1d, bvtj2d
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: bvtk1d, bvtk2d
 
-  REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: d2walld
+    REAL(kind=realtype), DIMENSION(:, :, :), POINTER :: d2walld
 
-  real(kind=realType), dimension(:,:,:,:), pointer :: w_offTimeInstanced
-  real(kind=realType), dimension(:,:,:), pointer :: vol_offTimeInstanced
+    real(kind=realType), dimension(:, :, :, :), pointer :: w_offTimeInstanced
+    real(kind=realType), dimension(:, :, :), pointer :: vol_offTimeInstanced
 
-  type(BCDataType),      dimension(:), pointer :: BCDatad
+    type(BCDataType), dimension(:), pointer :: BCDatad
 
-  real(kind=realType), dimension(:, :, :, :, :), pointer :: PCMat
+    real(kind=realType), dimension(:, :, :, :, :), pointer :: PCMat
     real(kind=realType), dimension(:, :, :, :), pointer :: PCvec1, PCvec2
 
-  real(kind=realType), dimension(:, :, :, :), pointer :: i_D_fact,  j_D_fact,  k_D_fact
-  real(kind=realType), dimension(:, :, :, :), pointer :: i_L_Fact,  j_L_Fact,  k_L_Fact
-  real(kind=realType), dimension(:, :, :, :), pointer :: i_U_Fact,  j_U_Fact,  k_U_Fact
-  real(kind=realType), dimension(:, :, :, :), pointer :: i_U2_Fact, j_U2_Fact, k_U2_Fact
-  integer(kind=intType), dimension(:, :, :, :), pointer :: i_ipiv, j_ipiv, k_ipiv
+    real(kind=realType), dimension(:, :, :, :), pointer :: i_D_fact, j_D_fact, k_D_fact
+    real(kind=realType), dimension(:, :, :, :), pointer :: i_L_Fact, j_L_Fact, k_L_Fact
+    real(kind=realType), dimension(:, :, :, :), pointer :: i_U_Fact, j_U_Fact, k_U_Fact
+    real(kind=realType), dimension(:, :, :, :), pointer :: i_U2_Fact, j_U2_Fact, k_U2_Fact
+    integer(kind=intType), dimension(:, :, :, :), pointer :: i_ipiv, j_ipiv, k_ipiv
 #endif
 
 end module blockPointers
diff --git a/src/modules/cgnsNames.f90 b/src/modules/cgnsNames.f90
index ede08cb60..02d8c5164 100644
--- a/src/modules/cgnsNames.f90
+++ b/src/modules/cgnsNames.f90
@@ -1,290 +1,288 @@
-       module cgnsNames
+module cgnsNames
 !
 !       Parametrized cgns names of the variables used in this code.
 !
-       use constants, only : maxCGNSNameLen
-       implicit none
-       save
+    use constants, only: maxCGNSNameLen
+    implicit none
+    save
 !
 !       Time history values.
 !
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsTimeValue = "TimeValues"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsTimeValue = "TimeValues"
 !
 !       coordinate names.
 !
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCoorX = "CoordinateX"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCoorY = "CoordinateY"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCoorZ = "CoordinateZ"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCoorR = "CoordinateR"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCoorX = "CoordinateX"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCoorY = "CoordinateY"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCoorZ = "CoordinateZ"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCoorR = "CoordinateR"
 !
 !       Variable names.
 !
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsDensity     = "Density"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsMomX        = "MomentumX"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsMomY        = "MomentumY"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsMomZ        = "MomentumZ"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsEnergy      = "EnergyStagnationDensity"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsTurbSANu    = "TurbulentSANuTilde"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsTurbK       = "TurbulentEnergyKinetic"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsTurbOmega   = "TurbulentDissipationRate"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsTurbTau     = "TurbulentInvDissipationRate"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsTurbEpsilon = "TurbulentDissipation"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsTurbV2      = "TurbulentScalarV2"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsTurbF       = "TurbulentScalarF"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsDensity = "Density"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsMomX = "MomentumX"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsMomY = "MomentumY"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsMomZ = "MomentumZ"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsEnergy = "EnergyStagnationDensity"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsTurbSANu = "TurbulentSANuTilde"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsTurbK = "TurbulentEnergyKinetic"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsTurbOmega = "TurbulentDissipationRate"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsTurbTau = "TurbulentInvDissipationRate"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsTurbEpsilon = "TurbulentDissipation"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsTurbV2 = "TurbulentScalarV2"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsTurbF = "TurbulentScalarF"
 
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsVelX      = "VelocityX"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsVelY      = "VelocityY"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsVelZ      = "VelocityZ"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsRelVelX   = "RelativeVelocityX"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsRelVelY   = "RelativeVelocityY"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsRelVelZ   = "RelativeVelocityZ"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsVelr      = "VelocityR"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsVelTheta  = "VelocityTheta"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsPressure  = "Pressure"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsTemp      = "Temperature"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsCp        = "CoefPressure"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsMach      = "Mach"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsRelMach   = "RelativeMach"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsMachTurb  = "MachTurbulent"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsViscMol   = "ViscosityMolecular"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsViscKin   = "ViscosityKinematic"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsEddy      = "ViscosityEddy"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsEddyRatio = "ViscosityEddyRatio"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsWallDist  = "TurbulentDistance"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsVortMagn  = "VorticityMagnitude"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsVortX     = "VorticityX"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsVorty     = "VorticityY"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsVortZ     = "VorticityZ"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsPtotLoss  = "RelativePressureStagnationLoss"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelX = "VelocityX"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelY = "VelocityY"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelZ = "VelocityZ"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsRelVelX = "RelativeVelocityX"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsRelVelY = "RelativeVelocityY"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsRelVelZ = "RelativeVelocityZ"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelr = "VelocityR"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelTheta = "VelocityTheta"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsPressure = "Pressure"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsTemp = "Temperature"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCp = "CoefPressure"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsMach = "Mach"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsRelMach = "RelativeMach"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsMachTurb = "MachTurbulent"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsViscMol = "ViscosityMolecular"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsViscKin = "ViscosityKinematic"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsEddy = "ViscosityEddy"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsEddyRatio = "ViscosityEddyRatio"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsWallDist = "TurbulentDistance"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVortMagn = "VorticityMagnitude"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVortX = "VorticityX"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVorty = "VorticityY"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVortZ = "VorticityZ"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsPtotLoss = "RelativePressureStagnationLoss"
 
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsRhoTot    = "DensityStagnation"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsPTot      = "PressureStagnation"
-       character(len=maxCGNSNameLen), parameter :: &
-                        cgnsTTot      = "TemperatureStagnation"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsRhoTot = "DensityStagnation"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsPTot = "PressureStagnation"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsTTot = "TemperatureStagnation"
 
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsSkinFMag  = "SkinFrictionMagnitude"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsSkinFX    = "SkinFrictionX"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsSkinFY    = "SkinFrictionY"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsSkinFZ    = "SkinFrictionZ"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsStanton   = "StantonNumber"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsYPlus     = "YPlus"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsSkinFMag = "SkinFrictionMagnitude"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsSkinFX = "SkinFrictionX"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsSkinFY = "SkinFrictionY"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsSkinFZ = "SkinFrictionZ"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsStanton = "StantonNumber"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsYPlus = "YPlus"
 
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsVelocity    = "Mach_Velocity"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsSoundSpeed  = "Mach_VelocitySound"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsLength      = "LengthReference"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsReyn        = "Reynolds"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsReynLen     = "Reynolds_Length"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsHeatRatio   = "SpecificHeatRatio"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsPrandtl     = "Prandtl"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsPrandtlTurb = "PrandtlTurbulent"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelocity = "Mach_Velocity"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsSoundSpeed = "Mach_VelocitySound"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsLength = "LengthReference"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsReyn = "Reynolds"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsReynLen = "Reynolds_Length"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsHeatRatio = "SpecificHeatRatio"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsPrandtl = "Prandtl"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsPrandtlTurb = "PrandtlTurbulent"
 
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsVelAngleX   = "VelocityAngleX"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsVelAngleY   = "VelocityAngleY"
-       character(len=maxCGNSNameLen), parameter :: &
-                             cgnsVelAngleZ   = "VelocityAngleZ"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelAngleX = "VelocityAngleX"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelAngleY = "VelocityAngleY"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelAngleZ = "VelocityAngleZ"
 
-       character(len=maxCGNSNameLen), parameter :: &
-                           cgnsVelVecX     = "VelocityUnitVectorX"
-       character(len=maxCGNSNameLen), parameter :: &
-                           cgnsVelVecY     = "VelocityUnitVectorY"
-       character(len=maxCGNSNameLen), parameter :: &
-                           cgnsVelVecZ     = "VelocityUnitVectorZ"
-       character(len=maxCGNSNameLen), parameter :: &
-                           cgnsVelVecR     = "VelocityUnitVectorR"
-       character(len=maxCGNSNameLen), parameter :: &
-                           cgnsVelVecTheta = "VelocityUnitVectorTheta"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelVecX = "VelocityUnitVectorX"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelVecY = "VelocityUnitVectorY"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelVecZ = "VelocityUnitVectorZ"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelVecR = "VelocityUnitVectorR"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsVelVecTheta = "VelocityUnitVectorTheta"
 
-       character(len=maxCGNSNameLen), parameter :: &
-                           cgnsMassFlow = "MassFlow"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsMassFlow = "MassFlow"
 
-       character(len=maxCGNSNameLen), parameter :: &
-                           cgnsShock = "Shock"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsShock = "Shock"
 
-       character(len=maxCGNSNameLen), parameter :: &
-                           cgnsFilteredShock = "FilteredShock"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsFilteredShock = "FilteredShock"
 
-       character(len=maxCGNSNameLen), parameter :: &
-            cgnsGC = "globalCell"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsGC = "globalCell"
 
-       character(len=maxCGNSNameLen), parameter :: &
-            cgnsStatus = "status"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsStatus = "status"
 
-       character(len=maxCGNSNameLen), parameter :: &
-            cgnsIntermittency = "Intermittency"
-
-       character(len=maxCGNSNameLen), parameter :: &
-            cgnsSandGrainRoughness = "SandGrainRoughness"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsIntermittency = "Intermittency"
 
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsSandGrainRoughness = "SandGrainRoughness"
 
 !
 !       Residual names.
 !
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResRho     = "ResDensity"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResMomX    = "ResMomentumX"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResMomY    = "ResMomentumY"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResMomZ    = "ResMomentumZ"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResRhoE    = "ResEnergyStagnationDensity"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResNu      = "ResTurbulentSANuTilde"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResK       = "ResTurbulentEnergyKinetic"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResOmega   = "ResTurbulentDissipationRate"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResTau     = "ResTurbulentInvDissipationRate"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResEpsilon = "ResTurbulentDissipation"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResV2      = "ResTurbulentScalarV2"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsResF       = "ResTurbulentScalarF"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResRho = "ResDensity"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResMomX = "ResMomentumX"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResMomY = "ResMomentumY"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResMomZ = "ResMomentumZ"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResRhoE = "ResEnergyStagnationDensity"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResNu = "ResTurbulentSANuTilde"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResK = "ResTurbulentEnergyKinetic"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResOmega = "ResTurbulentDissipationRate"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResTau = "ResTurbulentInvDissipationRate"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResEpsilon = "ResTurbulentDissipation"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResV2 = "ResTurbulentScalarV2"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsResF = "ResTurbulentScalarF"
 !
 !       Residual L2 norm names.
 !
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2ResRho     = "RSDMassRMS"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2ResMomX    = "RSDMomentumXRMS"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2ResMomy    = "RSDMomentumYRMS"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2ResMomZ    = "RSDMomentumZRMS"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2ResRhoE    = "RSDEnergyStagnationDensityRMS"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2ResNu      = "RSDTurbulentSANuTildeRMS"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2ResK       = "RSDTurbulentEnergyKineticRMS"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2ResOmega   = "RSDTurbulentDissRateRMS"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2ResTau     = "RSDTurbulentInvDissRateRMS"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2resEpsilon = "RSDTurbulentDissRMS"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2resV2      = "RSDTurbulentScalarV2RMS"
-       character(len=maxCGNSNameLen), parameter :: &
-                         cgnsL2ResF       = "RSDTurbulentScalarFRMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2ResRho = "RSDMassRMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2ResMomX = "RSDMomentumXRMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2ResMomy = "RSDMomentumYRMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2ResMomZ = "RSDMomentumZRMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2ResRhoE = "RSDEnergyStagnationDensityRMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2ResNu = "RSDTurbulentSANuTildeRMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2ResK = "RSDTurbulentEnergyKineticRMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2ResOmega = "RSDTurbulentDissRateRMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2ResTau = "RSDTurbulentInvDissRateRMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2resEpsilon = "RSDTurbulentDissRMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2resV2 = "RSDTurbulentScalarV2RMS"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsL2ResF = "RSDTurbulentScalarFRMS"
 !
 !       Force and moment coefficients names.
 !
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCL  = "CoefLift"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCLp = "CoefPressureLift"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCLv = "CoefViscousLift"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCD  = "CoefDrag"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCDp = "CoefPressureDrag"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCDv = "CoefViscousDrag"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCL = "CoefLift"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCLp = "CoefPressureLift"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCLv = "CoefViscousLift"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCD = "CoefDrag"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCDp = "CoefPressureDrag"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCDv = "CoefViscousDrag"
 
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCFx = "CoefForceX"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCFy = "CoefForceY"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCFz = "CoefForceZ"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCMx = "CoefMomentX"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCMy = "CoefMomentY"
-       character(len=maxCGNSNameLen), parameter :: &
-                                         cgnsCMz = "CoefMomentZ"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCFx = "CoefForceX"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCFy = "CoefForceY"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCFz = "CoefForceZ"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCMx = "CoefMomentX"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCMy = "CoefMomentY"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsCMz = "CoefMomentZ"
 !
 !       Names of the "maximum" variables.
 !
-       character(len=maxCGNSNameLen), parameter :: &
-                       cgnsHDiffMax = "MaxDiffHAndHinf"
-       character(len=maxCGNSNameLen), parameter :: &
-                       cgnsMachMax  = "MaxMach"
-       character(len=maxCGNSNameLen), parameter :: &
-                       cgnsYPlusMax = "MaxYplus"
-       character(len=maxCGNSNameLen), parameter :: &
-                       cgnsEddyMax  = "MaxRatioEddyAndLaminarViscosity"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsHDiffMax = "MaxDiffHAndHinf"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsMachMax = "MaxMach"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsYPlusMax = "MaxYplus"
+    character(len=maxCGNSNameLen), parameter :: &
+        cgnsEddyMax = "MaxRatioEddyAndLaminarViscosity"
 !
 !       Names of the blanking paramter.
 !
-       character(len=maxCGNSNameLen), parameter :: cgnsBlank = "Iblank"
+    character(len=maxCGNSNameLen), parameter :: cgnsBlank = "Iblank"
 !
 !       Names of the "lift" force, separation sensor and cavitation
 !
-       character(len=maxCGNSNameLen), parameter :: cgnsSepSensor = "SepSensor"
-       character(len=maxCGNSNameLen), parameter :: cgnsCavitation = "Cavitation"
-       character(len=maxCGNSNameLen), parameter :: cgnsAxisMoment = "AxisMoment"
+    character(len=maxCGNSNameLen), parameter :: cgnsSepSensor = "SepSensor"
+    character(len=maxCGNSNameLen), parameter :: cgnsCavitation = "Cavitation"
+    character(len=maxCGNSNameLen), parameter :: cgnsAxisMoment = "AxisMoment"
 !
 !       Names for the convergence history and time history.
 !
-       character(len=maxCGNSNameLen), parameter :: &
-                                      ConvHistory = "ConvergenceHistory"
-       character(len=maxCGNSNameLen), parameter :: &
-                                      TimeHistory = "TimeHistory"
-
+    character(len=maxCGNSNameLen), parameter :: &
+        ConvHistory = "ConvergenceHistory"
+    character(len=maxCGNSNameLen), parameter :: &
+        TimeHistory = "TimeHistory"
 
-       end module cgnsNames
+end module cgnsNames
diff --git a/src/modules/constants.F90 b/src/modules/constants.F90
index 879cc4b8a..867b8279e 100644
--- a/src/modules/constants.F90
+++ b/src/modules/constants.F90
@@ -1,516 +1,515 @@
 module constants
 
-  ! Define all constants used in the code. This is the *only* module
-  ! that is allowed to be imported without an 'only' qualifier.
+    ! Define all constants used in the code. This is the *only* module
+    ! that is allowed to be imported without an 'only' qualifier.
 
-  use precision
+    use precision
 #ifndef USE_TAPENADE
 #include <petsc/finclude/petsc.h>
-  use petsc
+    use petsc
 #endif
-  implicit none
-  save
-
-  ! Maximum numbers of characters in a string and in a cgns name
-
-  integer, parameter :: maxStringLen   = 256
-  integer, parameter :: maxCGNSNameLen =  32
-  integer, parameter :: maxIterTypelen =  8
-
-  ! Numerical constants
-
-  real(kind=realType), parameter :: pi    = 3.1415926535897931_realType
-  real(kind=realType), parameter :: eps   = 1.e-25_realType
-  real(kind=realType), parameter :: large = 1.e+37_realType
-
-  ! Constants to define the porosity values
-
-  integer(kind=porType), parameter :: noFlux     = -1_porType
-  integer(kind=porType), parameter :: boundFlux  =  0_porType
-  integer(kind=porType), parameter :: normalFlux =  1_porType
-
-  ! Indices in the array of independent variables
-
-  integer, parameter :: irho    =  1  ! Density
-  integer, parameter :: ivx     =  2  ! x-Velocity
-  integer, parameter :: ivy     =  3  ! y-velocity
-  integer, parameter :: ivz     =  4  ! z-Velocity
-  integer, parameter :: irhoE   =  5  ! Energy
-
-  integer, parameter :: itu1    =  6  ! Turbulent kinetic energy,
-  ! SA viscosity
-  integer, parameter :: itu2    =  7  ! Dissipation rate, time scale
-  integer, parameter :: itu3    =  8  ! Scalar V2
-  integer, parameter :: itu4    =  9  ! Scalar F2
-  integer, parameter :: itu5    = 10  ! Eddy-viscosity used for
-  ! wall functions.
-
-  ! Parameters to indicate the position in the work array dw for
-  ! turbulence models.
-
-  integer, parameter :: idvt    = 1  ! Tmp RHS storage; at max a
-  ! 2x2 subsystem is solved.
-  integer, parameter :: ivort   = 3  ! Tmp vort storage
-  integer, parameter :: istrain = 3  ! Tmp strain storage
-  integer, parameter :: iprod   = 3  ! Tmp prod storage
-  integer, parameter :: icd     = 4  ! Tmp cross term storage
-  integer, parameter :: if1SST  = 5  ! Tmp F1 (for SST) storage
-  integer, parameter :: isct    = 4  ! Tmp time scale (for v2f) storage
-  integer, parameter :: iscl2   = 5  ! Tmp length scale (for v2f) storage
-  integer, parameter :: iqq     = 6  ! Central jacobian storage
-
-  ! Indices in the array of conservative flow residuals for the
-  ! momentum variables.
-
-  integer, parameter :: imx   = ivx ! x-Momentum
-  integer, parameter :: imy   = ivy ! y-Momentum
-  integer, parameter :: imz   = ivz ! z-Momentum
-
-  ! Floating point parameters.
-
-  real(kind=realType), parameter :: zero  = 0.0_realType
-  real(kind=realType), parameter :: one   = 1.0_realType
-  real(kind=realType), parameter :: two   = 2.0_realType
-  real(kind=realType), parameter :: three = 3.0_realType
-  real(kind=realType), parameter :: four  = 4.0_realType
-  real(kind=realType), parameter :: five  = 5.0_realType
-  real(kind=realType), parameter :: six   = 6.0_realType
-  real(kind=realType), parameter :: eight = 8.0_realType
-
-  real(kind=realType), parameter :: half   = 0.5_realType
-  real(kind=realType), parameter :: third  = one/three
-  real(kind=realType), parameter :: fourth = 0.25_realType
-  real(kind=realType), parameter :: sixth  = one/six
-  real(kind=realType), parameter :: eighth = 0.125_realType
-  real(kind=realType), parameter :: threefourth = 0.75_realType
-  real(kind=realType), parameter :: sqrtthree = 1.7320508075688772_realType
-
-  ! String constants
-  CHARACTER( * ), PARAMETER :: LOWER_CASE = 'abcdefghijklmnopqrstuvwxyz'
-  CHARACTER( * ), PARAMETER :: UPPER_CASE = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-
-  ! Threshold parameter for real types; the value depends
-  ! whether single or double precision is used.
+    implicit none
+    save
+
+    ! Maximum numbers of characters in a string and in a cgns name
+
+    integer, parameter :: maxStringLen = 256
+    integer, parameter :: maxCGNSNameLen = 32
+    integer, parameter :: maxIterTypelen = 8
+
+    ! Numerical constants
+
+    real(kind=realType), parameter :: pi = 3.1415926535897931_realType
+    real(kind=realType), parameter :: eps = 1.e-25_realType
+    real(kind=realType), parameter :: large = 1.e+37_realType
+
+    ! Constants to define the porosity values
+
+    integer(kind=porType), parameter :: noFlux = -1_porType
+    integer(kind=porType), parameter :: boundFlux = 0_porType
+    integer(kind=porType), parameter :: normalFlux = 1_porType
+
+    ! Indices in the array of independent variables
+
+    integer, parameter :: irho = 1  ! Density
+    integer, parameter :: ivx = 2  ! x-Velocity
+    integer, parameter :: ivy = 3  ! y-velocity
+    integer, parameter :: ivz = 4  ! z-Velocity
+    integer, parameter :: irhoE = 5  ! Energy
+
+    integer, parameter :: itu1 = 6  ! Turbulent kinetic energy,
+    ! SA viscosity
+    integer, parameter :: itu2 = 7  ! Dissipation rate, time scale
+    integer, parameter :: itu3 = 8  ! Scalar V2
+    integer, parameter :: itu4 = 9  ! Scalar F2
+    integer, parameter :: itu5 = 10  ! Eddy-viscosity used for
+    ! wall functions.
+
+    ! Parameters to indicate the position in the work array dw for
+    ! turbulence models.
+
+    integer, parameter :: idvt = 1  ! Tmp RHS storage; at max a
+    ! 2x2 subsystem is solved.
+    integer, parameter :: ivort = 3  ! Tmp vort storage
+    integer, parameter :: istrain = 3  ! Tmp strain storage
+    integer, parameter :: iprod = 3  ! Tmp prod storage
+    integer, parameter :: icd = 4  ! Tmp cross term storage
+    integer, parameter :: if1SST = 5  ! Tmp F1 (for SST) storage
+    integer, parameter :: isct = 4  ! Tmp time scale (for v2f) storage
+    integer, parameter :: iscl2 = 5  ! Tmp length scale (for v2f) storage
+    integer, parameter :: iqq = 6  ! Central jacobian storage
+
+    ! Indices in the array of conservative flow residuals for the
+    ! momentum variables.
+
+    integer, parameter :: imx = ivx ! x-Momentum
+    integer, parameter :: imy = ivy ! y-Momentum
+    integer, parameter :: imz = ivz ! z-Momentum
+
+    ! Floating point parameters.
+
+    real(kind=realType), parameter :: zero = 0.0_realType
+    real(kind=realType), parameter :: one = 1.0_realType
+    real(kind=realType), parameter :: two = 2.0_realType
+    real(kind=realType), parameter :: three = 3.0_realType
+    real(kind=realType), parameter :: four = 4.0_realType
+    real(kind=realType), parameter :: five = 5.0_realType
+    real(kind=realType), parameter :: six = 6.0_realType
+    real(kind=realType), parameter :: eight = 8.0_realType
+
+    real(kind=realType), parameter :: half = 0.5_realType
+    real(kind=realType), parameter :: third = one/three
+    real(kind=realType), parameter :: fourth = 0.25_realType
+    real(kind=realType), parameter :: sixth = one/six
+    real(kind=realType), parameter :: eighth = 0.125_realType
+    real(kind=realType), parameter :: threefourth = 0.75_realType
+    real(kind=realType), parameter :: sqrtthree = 1.7320508075688772_realType
+
+    ! String constants
+    CHARACTER(*), PARAMETER :: LOWER_CASE = 'abcdefghijklmnopqrstuvwxyz'
+    CHARACTER(*), PARAMETER :: UPPER_CASE = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
+    ! Threshold parameter for real types; the value depends
+    ! whether single or double precision is used.
 
 #ifdef USE_SINGLE_PRECISION
-  real(kind=realType), parameter :: thresholdReal = 1.e-5_realType
+    real(kind=realType), parameter :: thresholdReal = 1.e-5_realType
 #else
-  real(kind=realType), parameter :: thresholdReal = 1.e-10_realType
+    real(kind=realType), parameter :: thresholdReal = 1.e-10_realType
 #endif
 
-  ! Definition of the tab and carriage return character.
+    ! Definition of the tab and carriage return character.
 #ifndef USE_TAPENADE
-  character(len=1), parameter :: tabChar = achar(9)
-  character(len=1), parameter :: retChar = achar(13)
+    character(len=1), parameter :: tabChar = achar(9)
+    character(len=1), parameter :: retChar = achar(13)
 #endif
 
-  integer(kind=intType), parameter :: &
-       EulerEquations = 1,  &
-       NSEquations    = 2,  &
-       RANSEquations  = 3
-
-  integer(kind=intType), parameter :: &
-       steady        = 1,   &
-       unsteady      = 2,   &
-       timeSpectral  = 3
-
-  integer(kind=intType), parameter :: &
-       internalFlow = 1,    &
-       externalFlow = 2
-
-  integer(kind=intType), parameter :: &
-       cpConstant      = 1, &
-       cpTempCurveFits = 2
-
-  integer(kind=intType), parameter ::                              &
-       spalartAllmaras        =  2,  &
-       spalartAllmarasEdwards =  3,  &
-       komegaWilcox           =  4,  &
-       komegaModified         =  5,  &
-       ktau                   =  6,  &
-       menterSST              =  7,  &
-       v2f                    = 10
-
-  integer(kind=intType), parameter :: &
-       strain       = 1,    &
-       vorticity    = 2,    &
-       katoLaunder  = 3
-
-  integer(kind=intType), parameter :: &
-       firstOrder  = 1, &
-       secondOrder = 2, &
-       thirdOrder  = 3, &
-       fourthOrder = 4, &
-       fifthOrder  = 5
-
-  integer(kind=intType), parameter :: &
-       dissScalar = 1,  &
-       dissMatrix = 2,  &
-       dissCusp   = 3,  &
-       upwind     = 9
-
-  integer(kind=intType), parameter :: &
-       Roe     = 1,     &
-       vanLeer = 2,     &
-       ausmdv  = 3
-
-  integer(kind=intType), parameter :: &
-       noLimiter  = 2,  &
-       vanAlbeda  = 3,  &
-       minmod     = 4
-
-  integer(kind=intType), parameter :: &
-       noPrecond  = 1,  &
-       Turkel     = 2,  &
-       ChoiMerkle = 3
-
-  integer(kind=intType), parameter ::                          &
-       constantPressure     = 1, &
-       linExtrapolPressure  = 2, &
-       quadExtrapolPressure = 3, &
-       normalMomentum       = 4
-
-  integer(kind=intType), parameter ::  &
-       constantExtrapol = 1, &
-       linExtrapol      = 2
-
-  integer(kind=intType), parameter :: &
-       NonConservative = 1, &
-       Conservative    = 2
-
-  integer(kind=intType), parameter :: &
-       precisionSingle = 1, &
-       precisionDouble = 2
-
-  ! Definition of the parameters for the time integration scheme.
-  integer(kind=intType), parameter :: &
-       BDF        = 1, &
-       explicitRK = 2, &
-       implicitRK = 3, &
-       MD         = 4
-
-  ! Line search parameters
-  integer(kind=intType), parameter :: &
-       noLineSearch = 0_intType, &
-       cubicLineSearch = 1_intType, &
-       nonMonotoneLineSearch = 2_intType
-
-  integer(kind=intType), parameter :: &
-       RungeKutta  = 1,  &
-       DADI        = 2,  &
-       nlLusgs     = 3,  &
-       nlLusgsLine = 4
-
-  integer(kind=intType), parameter :: &
-       decoupled  = 1,   &
-       coupled    = 2
-  integer(kind=intType), parameter :: &
-       gmres = 1,        &
-       adi   = 2
-
-  integer(kind=intType), parameter :: &
-       bcDirichlet0 = 0, &
-       bcNeumann0    = 1
-
-  integer(kind=intType), parameter :: &
-       noResAveraging        = 0, &
-       alwaysResAveraging    = 1, &
-       alternateResAveraging = 2
-
-  integer(kind=intType), parameter :: &
-       turbRelaxNotDefined = 0,  &
-       turbRelaxExplicit   = 1,  &
-       turbRelaxImplicit   = 2
-
-  ! Parameters used for coarsening definition.
-  integer(kind=porType), parameter :: &
-       leftStarted  = -1_porType, &
-       regular      =  0_porType, &
-       rightStarted =  1_porType
-
-  ! Parameters used for subsonic inlet bc treatment.
-  integer(kind=intType), parameter :: &
-       noSubInlet      = 0, &
-       totalConditions = 1, &
-       massFlow        = 2
-  ! Parameters for overset update mdoe
-  integer(kind=intType), parameter :: &
-       updateFrozen = 0, &
-       updateFast   = 1, &
-       updateFull   = 2
-
-  integer, parameter :: adtSurfaceADT = 1
-  integer, parameter :: adtVolumeADT  = 2
-  integer(kind=intType), parameter :: nCoorMaxLowerLimit = 100000
-
-  integer(kind=adtElementType), parameter :: adtTriangle      = 1
-  integer(kind=adtElementType), parameter :: adtQuadrilateral = 2
-  integer(kind=adtElementType), parameter :: adtTetrahedron   = 3
-  integer(kind=adtElementType), parameter :: adtPyramid       = 4
-  integer(kind=adtElementType), parameter :: adtPrism         = 5
-  integer(kind=adtElementType), parameter :: adtHexahedron    = 6
-
-  ! BCDefinitions
-  integer(kind=intType), parameter :: BCNull                =   0
-  integer(kind=intType), parameter :: Symm                  =  -1
-  integer(kind=intType), parameter :: SymmPolar             =  -2
-  integer(kind=intType), parameter :: NSWallAdiabatic       =  -3
-  integer(kind=intType), parameter :: NSWallIsothermal      =  -4
-  integer(kind=intType), parameter :: EulerWall             =  -5
-  integer(kind=intType), parameter :: FarField              =  -6
-  integer(kind=intType), parameter :: SupersonicInflow      =  -7
-  integer(kind=intType), parameter :: SubsonicInflow        =  -8
-  integer(kind=intType), parameter :: SupersonicOutflow     =  -9
-  integer(kind=intType), parameter :: SubsonicOutflow       = -10
-  integer(kind=intType), parameter :: MassBleedInflow       = -11
-  integer(kind=intType), parameter :: MassBleedOutflow      = -12
-  integer(kind=intType), parameter :: mDot                  = -13
-  integer(kind=intType), parameter :: bcThrust              = -14
-  integer(kind=intType), parameter :: Extrap                = -15
-  integer(kind=intType), parameter :: B2BMatch              = -16
-  integer(kind=intType), parameter :: B2BMismatch           = -17
-  integer(kind=intType), parameter :: SlidingInterface      = -18
-  integer(kind=intType), parameter :: OversetOuterBound     = -19
-  integer(kind=intType), parameter :: DomainInterfaceAll    = -20
-  integer(kind=intType), parameter :: DomainInterfaceRhoUVW = -21
-  integer(kind=intType), parameter :: DomainInterfaceP      = -22
-  integer(kind=intType), parameter :: DomainInterfaceRho    = -23
-  integer(kind=intType), parameter :: DomainInterfaceTotal  = -24
-  integer(kind=intType), parameter :: BCNotValid            = -25
-  !
-  !      Number of actual boundary conditions supported by the code
-  !      This number refers to bocos, not flow-through BCs
-  !      Edit this number when additional boundary conditions are
-  !      supported
-  !
-  integer(kind=intType), parameter :: nBCs = 24
+    integer(kind=intType), parameter :: &
+        EulerEquations = 1, &
+        NSEquations = 2, &
+        RANSEquations = 3
+
+    integer(kind=intType), parameter :: &
+        steady = 1, &
+        unsteady = 2, &
+        timeSpectral = 3
+
+    integer(kind=intType), parameter :: &
+        internalFlow = 1, &
+        externalFlow = 2
+
+    integer(kind=intType), parameter :: &
+        cpConstant = 1, &
+        cpTempCurveFits = 2
+
+    integer(kind=intType), parameter :: &
+        spalartAllmaras = 2, &
+        spalartAllmarasEdwards = 3, &
+        komegaWilcox = 4, &
+        komegaModified = 5, &
+        ktau = 6, &
+        menterSST = 7, &
+        v2f = 10
+
+    integer(kind=intType), parameter :: &
+        strain = 1, &
+        vorticity = 2, &
+        katoLaunder = 3
+
+    integer(kind=intType), parameter :: &
+        firstOrder = 1, &
+        secondOrder = 2, &
+        thirdOrder = 3, &
+        fourthOrder = 4, &
+        fifthOrder = 5
+
+    integer(kind=intType), parameter :: &
+        dissScalar = 1, &
+        dissMatrix = 2, &
+        dissCusp = 3, &
+        upwind = 9
+
+    integer(kind=intType), parameter :: &
+        Roe = 1, &
+        vanLeer = 2, &
+        ausmdv = 3
+
+    integer(kind=intType), parameter :: &
+        noLimiter = 2, &
+        vanAlbeda = 3, &
+        minmod = 4
+
+    integer(kind=intType), parameter :: &
+        noPrecond = 1, &
+        Turkel = 2, &
+        ChoiMerkle = 3
+
+    integer(kind=intType), parameter :: &
+        constantPressure = 1, &
+        linExtrapolPressure = 2, &
+        quadExtrapolPressure = 3, &
+        normalMomentum = 4
+
+    integer(kind=intType), parameter :: &
+        constantExtrapol = 1, &
+        linExtrapol = 2
+
+    integer(kind=intType), parameter :: &
+        NonConservative = 1, &
+        Conservative = 2
+
+    integer(kind=intType), parameter :: &
+        precisionSingle = 1, &
+        precisionDouble = 2
+
+    ! Definition of the parameters for the time integration scheme.
+    integer(kind=intType), parameter :: &
+        BDF = 1, &
+        explicitRK = 2, &
+        implicitRK = 3, &
+        MD = 4
+
+    ! Line search parameters
+    integer(kind=intType), parameter :: &
+        noLineSearch = 0_intType, &
+        cubicLineSearch = 1_intType, &
+        nonMonotoneLineSearch = 2_intType
+
+    integer(kind=intType), parameter :: &
+        RungeKutta = 1, &
+        DADI = 2, &
+        nlLusgs = 3, &
+        nlLusgsLine = 4
+
+    integer(kind=intType), parameter :: &
+        decoupled = 1, &
+        coupled = 2
+    integer(kind=intType), parameter :: &
+        gmres = 1, &
+        adi = 2
+
+    integer(kind=intType), parameter :: &
+        bcDirichlet0 = 0, &
+        bcNeumann0 = 1
+
+    integer(kind=intType), parameter :: &
+        noResAveraging = 0, &
+        alwaysResAveraging = 1, &
+        alternateResAveraging = 2
+
+    integer(kind=intType), parameter :: &
+        turbRelaxNotDefined = 0, &
+        turbRelaxExplicit = 1, &
+        turbRelaxImplicit = 2
+
+    ! Parameters used for coarsening definition.
+    integer(kind=porType), parameter :: &
+        leftStarted = -1_porType, &
+        regular = 0_porType, &
+        rightStarted = 1_porType
+
+    ! Parameters used for subsonic inlet bc treatment.
+    integer(kind=intType), parameter :: &
+        noSubInlet = 0, &
+        totalConditions = 1, &
+        massFlow = 2
+    ! Parameters for overset update mdoe
+    integer(kind=intType), parameter :: &
+        updateFrozen = 0, &
+        updateFast = 1, &
+        updateFull = 2
+
+    integer, parameter :: adtSurfaceADT = 1
+    integer, parameter :: adtVolumeADT = 2
+    integer(kind=intType), parameter :: nCoorMaxLowerLimit = 100000
+
+    integer(kind=adtElementType), parameter :: adtTriangle = 1
+    integer(kind=adtElementType), parameter :: adtQuadrilateral = 2
+    integer(kind=adtElementType), parameter :: adtTetrahedron = 3
+    integer(kind=adtElementType), parameter :: adtPyramid = 4
+    integer(kind=adtElementType), parameter :: adtPrism = 5
+    integer(kind=adtElementType), parameter :: adtHexahedron = 6
+
+    ! BCDefinitions
+    integer(kind=intType), parameter :: BCNull = 0
+    integer(kind=intType), parameter :: Symm = -1
+    integer(kind=intType), parameter :: SymmPolar = -2
+    integer(kind=intType), parameter :: NSWallAdiabatic = -3
+    integer(kind=intType), parameter :: NSWallIsothermal = -4
+    integer(kind=intType), parameter :: EulerWall = -5
+    integer(kind=intType), parameter :: FarField = -6
+    integer(kind=intType), parameter :: SupersonicInflow = -7
+    integer(kind=intType), parameter :: SubsonicInflow = -8
+    integer(kind=intType), parameter :: SupersonicOutflow = -9
+    integer(kind=intType), parameter :: SubsonicOutflow = -10
+    integer(kind=intType), parameter :: MassBleedInflow = -11
+    integer(kind=intType), parameter :: MassBleedOutflow = -12
+    integer(kind=intType), parameter :: mDot = -13
+    integer(kind=intType), parameter :: bcThrust = -14
+    integer(kind=intType), parameter :: Extrap = -15
+    integer(kind=intType), parameter :: B2BMatch = -16
+    integer(kind=intType), parameter :: B2BMismatch = -17
+    integer(kind=intType), parameter :: SlidingInterface = -18
+    integer(kind=intType), parameter :: OversetOuterBound = -19
+    integer(kind=intType), parameter :: DomainInterfaceAll = -20
+    integer(kind=intType), parameter :: DomainInterfaceRhoUVW = -21
+    integer(kind=intType), parameter :: DomainInterfaceP = -22
+    integer(kind=intType), parameter :: DomainInterfaceRho = -23
+    integer(kind=intType), parameter :: DomainInterfaceTotal = -24
+    integer(kind=intType), parameter :: BCNotValid = -25
+    !
+    !      Number of actual boundary conditions supported by the code
+    !      This number refers to bocos, not flow-through BCs
+    !      Edit this number when additional boundary conditions are
+    !      supported
+    !
+    integer(kind=intType), parameter :: nBCs = 24
 
     !Block faces on which boundary conditions may be imposed
-  integer(kind=intType), parameter :: iMin = 1
-  integer(kind=intType), parameter :: iMax = 2
-  integer(kind=intType), parameter :: jMin = 3
-  integer(kind=intType), parameter :: jMax = 4
-  integer(kind=intType), parameter :: kMin = 5
-  integer(kind=intType), parameter :: kMax = 6
-
-  integer(kind=intType) :: myIntStack(32)
-  integer(kind=intType) :: myIntPtr = 0
-
-  ! BC specific input variable counts
-  integer(kind=intType), parameter :: nbcVarSubsonicInflow = 17
-  integer(kind=intType), parameter :: nbcVarSubsonicOutflow = 1
-  integer(kind=intType), parameter :: nbcVarSupersonicInflow = 7
-  integer(kind=intType), parameter :: nbcVarAdiabaticWall = 0
-  integer(kind=intType), parameter :: nbcVarIsothermalWall = 1
-
-  ! Indices of specific familyExcahnge groups based on BC
-  integer(kind=intType), parameter :: iBCGroupWalls=1
-  integer(kind=intType), parameter :: iBCGroupInflow=2
-  integer(kind=intType), parameter :: iBCGroupOutflow=3
-  integer(kind=intType), parameter :: iBCGroupSymm=4
-  integer(kind=intType), parameter :: iBCGroupFarfield=5
-  integer(kind=intType), parameter :: iBCGroupOverset=6
-  integer(kind=intType), parameter :: iBCGroupOther=7
-
-  integer(kind=intType), parameter :: nFamExchange=7
-
-  ! Constants for tecplot I/O
-  real(kind=realType) :: zoneMarker = 299.0
-  real(kind=realType) :: dataSectionMarker = 357.0
-
-  ! Fringe sort type
-  integer(kind=intType), parameter :: sortByDonor=1
-  integer(kind=intType), parameter :: sortByReceiver=2
-
-  ! Task breakdown for overset connectivity. Note that iComm1
-  ! *includes* the surfaceCorrection and donor search times.
-  integer(kind=intType), parameter :: iBoundingBox=1
-  integer(kind=intType), parameter :: iBuildOverlap=2
-  integer(kind=intType), parameter :: iBuildClusterWalls=3
-  integer(kind=intType), parameter :: iComputeCellWallPoint=4
-  integer(kind=intType), parameter :: iBuildADT=5
-  integer(kind=intType), parameter :: iBuildSearchPoints=6
-  integer(kind=intType), parameter :: iSurfaceCorrection=7
-  integer(kind=intType), parameter :: iDonorSearch=8
-  integer(kind=intType), parameter :: iFringeProcessing=9
-  integer(kind=intType), parameter :: iCheckDonors=10
-  integer(kind=intType), parameter :: iDetermineDonors=11
-  integer(kind=intType), parameter :: iIrregularCellCorrection=12
-  integer(kind=intType), parameter :: iFlooding=13
-  integer(kind=intType), parameter :: iFinalCommStructures=14
-  integer(kind=intType), parameter :: iFringeReduction=15
-  integer(kind=intType), parameter :: iTotal=16
-
-  ! Cost functions.
-  integer(kind=intType), parameter :: nCostFunction = 99
-  integer(kind=intType), parameter :: &
-       costFuncLift       = 1,&
-       costFuncDrag       = 2,&
-       costFuncLiftCoef   = 3,&
-       costFuncDragCoef   = 4,&
-       costFuncForceX     = 5,&
-       costFuncForceY     = 6,&
-       costFuncForceZ     = 7,&
-       costFuncForceXCoef = 8,&
-       costFuncForceYCoef = 9,&
-       costFuncForceZCoef = 10,&
-       costFuncMomX       = 11,&
-       costFuncMomY       = 12,&
-       costFuncMomZ       = 13,&
-       costFuncMomXCoef   = 14,&
-       costFuncMomYCoef   = 15,&
-       costFuncMomZCoef   = 16,&
-       costFuncCm0        = 17,&
-       costFuncCmzAlpha   = 18,&
-       costFuncCmzAlphaDot= 19,&
-       costFuncCmzq       = 20,&
-       costFuncCmzqDot    = 21,&
-       costFuncCl0        = 22,&
-       costFuncClAlpha    = 23,&
-       costFuncClAlphaDot = 24,&
-       costFuncClq        = 25,&
-       costFuncClqDot     = 26,&
-       costFuncCd0        = 27,&
-       costFuncCdAlpha    = 28,&
-       costFuncCdAlphadot = 29,&
-       costFuncCdq        = 30,&
-       costFuncCdqDot     = 31,&
-       costFuncCfy0       = 32,&
-       costFuncCfyAlpha   = 33,&
-       costFuncCfyAlphadot= 34,&
-       costFuncCfyq       = 35,&
-       costFuncCfyqDot    = 36,&
-       costFuncBendingCoef= 37,&
-       costFuncSepSensor  = 38,&
-       costFuncSepSensorAvgX = 39, &
-       costFuncSepSensorAvgY = 40, &
-       costFuncSepSensorAvgZ = 41, &
-       costFuncCavitation    = 42, &
-       costFuncMdot          = 43, &
-       costFuncMavgPtot      = 44, &
-       costFuncMavgTtot      = 45, &
-       costFuncMavgRho       = 46, &
-       costFuncMavgPs        = 47, &
-       costFuncMavgMN        = 48, &
-       costFuncMavga         = 49, &
-       costFuncArea          = 50, &
-       costFuncAxisMoment    = 51, &
-       costFuncFlowPower     = 52, &
-       costFuncForceXPressure = 53, &
-       costFuncForceYPressure = 54, &
-       costFuncForceZPressure = 55, &
-       costFuncForceXViscous = 56, &
-       costFuncForceYViscous = 57, &
-       costFuncForceZViscous = 58, &
-       costFuncForceXMomentum = 59, &
-       costFuncForceYMomentum = 60, &
-       costFuncForceZMomentum = 61, &
-       costFuncDragPressure= 62, &
-       costFuncDragViscous = 63, &
-       costFuncDragMomentum = 64, &
-       costFuncLiftPressure= 65, &
-       costFuncLiftViscous = 66, &
-       costFuncLiftMomentum = 67, &
-       costFuncForceXCoefPressure = 68,&
-       costFuncForceXCoefViscous = 69,&
-       costFuncForceXCoefMomentum = 70,&
-       costFuncForceYCoefPressure = 71,&
-       costFuncForceYCoefViscous = 72,&
-       costFuncForceYCoefMomentum = 73,&
-       costFuncForceZCoefPressure = 74,&
-       costFuncForceZCoefViscous = 75,&
-       costFuncForceZCoefMomentum = 76,&
-       costFuncLiftCoefPressure = 77,&
-       costFuncLiftCoefViscous = 78, &
-       costFuncLiftCoefMomentum = 79, &
-       costFuncDragCoefPressure = 80,&
-       costFuncDragCoefViscous = 81, &
-       costFuncDragCoefMomentum = 82, &
-       costfuncmavgvx = 83, &
-       costfuncmavgvy = 84, &
-       costfuncmavgvz = 85, &
-       costfunccperror2 = 86, &
-       costfuncaavgptot = 87, &
-       costfuncaavgps   = 88, &
-       costfunccpmin    = 89, &
-       costfuncCoForceXX    = 90, &
-       costfuncCoForceXY    = 91, &
-       costfuncCoForceXZ    = 92, &
-       costfuncCoForceYX    = 93, &
-       costfuncCoForceYY    = 94, &
-       costfuncCoForceYZ    = 95, &
-       costfuncCoForceZX    = 96, &
-       costfuncCoForceZY    = 97, &
-       costfuncCoForceZZ    = 98, &
-       costfuncmavgvi       = 99
-
-  integer(kind=intType), parameter :: nLocalValues=60
-  integer(kind=intType), parameter :: &
-       iFp =  1, &
-       iFv =  4, &
-       iMp =  7, &
-       iMv = 10, &
-       iSepSensor  = 13, &
-       iSepAvg     = 14, &
-       iCavitation = 17, &
-       iyPlus      = 18, &
-       iMassFlow   = 19, &
-       iMassPTot   = 20, &
-       iMassTtot   = 21, &
-       iMassPs     = 22, &
-       iFlowMp     = 23, &
-       iFlowFm     = 26, &
-       iFlowMm     = 29, &
-       iMassMN     = 32, &
-       isigmaMN    = 33, &
-       isigmaPtot  = 34, &
-       iPk         = 35, &
-       iMassa      = 36, &
-       iMassRho    = 37, &
-       iArea       = 38, &
-       iMassVx     = 39, &
-       iMassVy     = 40, &
-       iMassVz     = 41, &
-       iMassnx     = 42, &
-       iMassny     = 43, &
-       iMassnz     = 44, &
-       iAxisMoment = 45, &
-       iPower      = 46, &
-       iCpError2   = 47, &
-       iAreaPTot   = 48, &
-       iAreaPs     = 49, &
-       iCpMin      = 50, &
-       ! the following 3 reserve 3 slots each!
-       iCoForceX   = 51, &
-       iCoForceY   = 54, &
-       iCoForceZ   = 57, &
-       iMassVi = 60
-
-  ! Constants for zipper comm
-
-  ! Flow-through conditions
-  integer(kind=intType), parameter :: nZippFlowComm=10
-
-  integer(kind=intType), parameter :: iZippFlowP=5
-  integer(kind=intType), parameter :: iZippFlowGamma=6
-  integer(kind=intType), parameter :: iZippFlowSface=7
-  integer(kind=intType), parameter :: iZippFlowX=8
-  integer(kind=intType), parameter :: iZippFlowY=9
-  integer(kind=intType), parameter :: iZippFlowZ=10
-
-  ! Wall Conditions
-  integer(kind=intType), parameter :: nZippWallComm=10
-
-  integer(kind=intType), parameter :: iZippWallTpx=1
-  integer(kind=intType), parameter :: iZippWallTpy=2
-  integer(kind=intType), parameter :: iZippWallTpz=3
-  integer(kind=intType), parameter :: iZippWallTvx=4
-  integer(kind=intType), parameter :: iZippWallTvy=5
-  integer(kind=intType), parameter :: iZippWallTvz=6
-
-  integer(kind=intType), parameter :: iZippWallX=7
-  integer(kind=intType), parameter :: iZippWallY=8
-  integer(kind=intType), parameter :: iZippWallZ=9
-
+    integer(kind=intType), parameter :: iMin = 1
+    integer(kind=intType), parameter :: iMax = 2
+    integer(kind=intType), parameter :: jMin = 3
+    integer(kind=intType), parameter :: jMax = 4
+    integer(kind=intType), parameter :: kMin = 5
+    integer(kind=intType), parameter :: kMax = 6
+
+    integer(kind=intType) :: myIntStack(32)
+    integer(kind=intType) :: myIntPtr = 0
+
+    ! BC specific input variable counts
+    integer(kind=intType), parameter :: nbcVarSubsonicInflow = 17
+    integer(kind=intType), parameter :: nbcVarSubsonicOutflow = 1
+    integer(kind=intType), parameter :: nbcVarSupersonicInflow = 7
+    integer(kind=intType), parameter :: nbcVarAdiabaticWall = 0
+    integer(kind=intType), parameter :: nbcVarIsothermalWall = 1
+
+    ! Indices of specific familyExcahnge groups based on BC
+    integer(kind=intType), parameter :: iBCGroupWalls = 1
+    integer(kind=intType), parameter :: iBCGroupInflow = 2
+    integer(kind=intType), parameter :: iBCGroupOutflow = 3
+    integer(kind=intType), parameter :: iBCGroupSymm = 4
+    integer(kind=intType), parameter :: iBCGroupFarfield = 5
+    integer(kind=intType), parameter :: iBCGroupOverset = 6
+    integer(kind=intType), parameter :: iBCGroupOther = 7
+
+    integer(kind=intType), parameter :: nFamExchange = 7
+
+    ! Constants for tecplot I/O
+    real(kind=realType) :: zoneMarker = 299.0
+    real(kind=realType) :: dataSectionMarker = 357.0
+
+    ! Fringe sort type
+    integer(kind=intType), parameter :: sortByDonor = 1
+    integer(kind=intType), parameter :: sortByReceiver = 2
+
+    ! Task breakdown for overset connectivity. Note that iComm1
+    ! *includes* the surfaceCorrection and donor search times.
+    integer(kind=intType), parameter :: iBoundingBox = 1
+    integer(kind=intType), parameter :: iBuildOverlap = 2
+    integer(kind=intType), parameter :: iBuildClusterWalls = 3
+    integer(kind=intType), parameter :: iComputeCellWallPoint = 4
+    integer(kind=intType), parameter :: iBuildADT = 5
+    integer(kind=intType), parameter :: iBuildSearchPoints = 6
+    integer(kind=intType), parameter :: iSurfaceCorrection = 7
+    integer(kind=intType), parameter :: iDonorSearch = 8
+    integer(kind=intType), parameter :: iFringeProcessing = 9
+    integer(kind=intType), parameter :: iCheckDonors = 10
+    integer(kind=intType), parameter :: iDetermineDonors = 11
+    integer(kind=intType), parameter :: iIrregularCellCorrection = 12
+    integer(kind=intType), parameter :: iFlooding = 13
+    integer(kind=intType), parameter :: iFinalCommStructures = 14
+    integer(kind=intType), parameter :: iFringeReduction = 15
+    integer(kind=intType), parameter :: iTotal = 16
+
+    ! Cost functions.
+    integer(kind=intType), parameter :: nCostFunction = 99
+    integer(kind=intType), parameter :: &
+        costFuncLift = 1, &
+        costFuncDrag = 2, &
+        costFuncLiftCoef = 3, &
+        costFuncDragCoef = 4, &
+        costFuncForceX = 5, &
+        costFuncForceY = 6, &
+        costFuncForceZ = 7, &
+        costFuncForceXCoef = 8, &
+        costFuncForceYCoef = 9, &
+        costFuncForceZCoef = 10, &
+        costFuncMomX = 11, &
+        costFuncMomY = 12, &
+        costFuncMomZ = 13, &
+        costFuncMomXCoef = 14, &
+        costFuncMomYCoef = 15, &
+        costFuncMomZCoef = 16, &
+        costFuncCm0 = 17, &
+        costFuncCmzAlpha = 18, &
+        costFuncCmzAlphaDot = 19, &
+        costFuncCmzq = 20, &
+        costFuncCmzqDot = 21, &
+        costFuncCl0 = 22, &
+        costFuncClAlpha = 23, &
+        costFuncClAlphaDot = 24, &
+        costFuncClq = 25, &
+        costFuncClqDot = 26, &
+        costFuncCd0 = 27, &
+        costFuncCdAlpha = 28, &
+        costFuncCdAlphadot = 29, &
+        costFuncCdq = 30, &
+        costFuncCdqDot = 31, &
+        costFuncCfy0 = 32, &
+        costFuncCfyAlpha = 33, &
+        costFuncCfyAlphadot = 34, &
+        costFuncCfyq = 35, &
+        costFuncCfyqDot = 36, &
+        costFuncBendingCoef = 37, &
+        costFuncSepSensor = 38, &
+        costFuncSepSensorAvgX = 39, &
+        costFuncSepSensorAvgY = 40, &
+        costFuncSepSensorAvgZ = 41, &
+        costFuncCavitation = 42, &
+        costFuncMdot = 43, &
+        costFuncMavgPtot = 44, &
+        costFuncMavgTtot = 45, &
+        costFuncMavgRho = 46, &
+        costFuncMavgPs = 47, &
+        costFuncMavgMN = 48, &
+        costFuncMavga = 49, &
+        costFuncArea = 50, &
+        costFuncAxisMoment = 51, &
+        costFuncFlowPower = 52, &
+        costFuncForceXPressure = 53, &
+        costFuncForceYPressure = 54, &
+        costFuncForceZPressure = 55, &
+        costFuncForceXViscous = 56, &
+        costFuncForceYViscous = 57, &
+        costFuncForceZViscous = 58, &
+        costFuncForceXMomentum = 59, &
+        costFuncForceYMomentum = 60, &
+        costFuncForceZMomentum = 61, &
+        costFuncDragPressure = 62, &
+        costFuncDragViscous = 63, &
+        costFuncDragMomentum = 64, &
+        costFuncLiftPressure = 65, &
+        costFuncLiftViscous = 66, &
+        costFuncLiftMomentum = 67, &
+        costFuncForceXCoefPressure = 68, &
+        costFuncForceXCoefViscous = 69, &
+        costFuncForceXCoefMomentum = 70, &
+        costFuncForceYCoefPressure = 71, &
+        costFuncForceYCoefViscous = 72, &
+        costFuncForceYCoefMomentum = 73, &
+        costFuncForceZCoefPressure = 74, &
+        costFuncForceZCoefViscous = 75, &
+        costFuncForceZCoefMomentum = 76, &
+        costFuncLiftCoefPressure = 77, &
+        costFuncLiftCoefViscous = 78, &
+        costFuncLiftCoefMomentum = 79, &
+        costFuncDragCoefPressure = 80, &
+        costFuncDragCoefViscous = 81, &
+        costFuncDragCoefMomentum = 82, &
+        costfuncmavgvx = 83, &
+        costfuncmavgvy = 84, &
+        costfuncmavgvz = 85, &
+        costfunccperror2 = 86, &
+        costfuncaavgptot = 87, &
+        costfuncaavgps = 88, &
+        costfunccpmin = 89, &
+        costfuncCoForceXX = 90, &
+        costfuncCoForceXY = 91, &
+        costfuncCoForceXZ = 92, &
+        costfuncCoForceYX = 93, &
+        costfuncCoForceYY = 94, &
+        costfuncCoForceYZ = 95, &
+        costfuncCoForceZX = 96, &
+        costfuncCoForceZY = 97, &
+        costfuncCoForceZZ = 98, &
+        costfuncmavgvi = 99
+
+    integer(kind=intType), parameter :: nLocalValues = 60
+    integer(kind=intType), parameter :: &
+        iFp = 1, &
+        iFv = 4, &
+        iMp = 7, &
+        iMv = 10, &
+        iSepSensor = 13, &
+        iSepAvg = 14, &
+        iCavitation = 17, &
+        iyPlus = 18, &
+        iMassFlow = 19, &
+        iMassPTot = 20, &
+        iMassTtot = 21, &
+        iMassPs = 22, &
+        iFlowMp = 23, &
+        iFlowFm = 26, &
+        iFlowMm = 29, &
+        iMassMN = 32, &
+        isigmaMN = 33, &
+        isigmaPtot = 34, &
+        iPk = 35, &
+        iMassa = 36, &
+        iMassRho = 37, &
+        iArea = 38, &
+        iMassVx = 39, &
+        iMassVy = 40, &
+        iMassVz = 41, &
+        iMassnx = 42, &
+        iMassny = 43, &
+        iMassnz = 44, &
+        iAxisMoment = 45, &
+        iPower = 46, &
+        iCpError2 = 47, &
+        iAreaPTot = 48, &
+        iAreaPs = 49, &
+        iCpMin = 50, &
+        ! the following 3 reserve 3 slots each!
+        iCoForceX = 51, &
+        iCoForceY = 54, &
+        iCoForceZ = 57, &
+        iMassVi = 60
+
+    ! Constants for zipper comm
+
+    ! Flow-through conditions
+    integer(kind=intType), parameter :: nZippFlowComm = 10
+
+    integer(kind=intType), parameter :: iZippFlowP = 5
+    integer(kind=intType), parameter :: iZippFlowGamma = 6
+    integer(kind=intType), parameter :: iZippFlowSface = 7
+    integer(kind=intType), parameter :: iZippFlowX = 8
+    integer(kind=intType), parameter :: iZippFlowY = 9
+    integer(kind=intType), parameter :: iZippFlowZ = 10
+
+    ! Wall Conditions
+    integer(kind=intType), parameter :: nZippWallComm = 10
+
+    integer(kind=intType), parameter :: iZippWallTpx = 1
+    integer(kind=intType), parameter :: iZippWallTpy = 2
+    integer(kind=intType), parameter :: iZippWallTpz = 3
+    integer(kind=intType), parameter :: iZippWallTvx = 4
+    integer(kind=intType), parameter :: iZippWallTvy = 5
+    integer(kind=intType), parameter :: iZippWallTvz = 6
+
+    integer(kind=intType), parameter :: iZippWallX = 7
+    integer(kind=intType), parameter :: iZippWallY = 8
+    integer(kind=intType), parameter :: iZippWallZ = 9
 
 end module constants
diff --git a/src/modules/extraOutput.f90 b/src/modules/extraOutput.f90
index 20d3d99d9..78fdc9d03 100644
--- a/src/modules/extraOutput.f90
+++ b/src/modules/extraOutput.f90
@@ -1,65 +1,64 @@
-       module extraOutput
+module extraOutput
 !
 !       This module contains the logicals which define the variables
 !       to be written to the solution file. Both the surface variables
 !       to be written as well as the extra volume variables are stored
 !       in this module.
 !
-       use constants, only: intType, realType, maxCGNSNameLen
-       implicit none
-       save
+    use constants, only: intType, realType, maxCGNSNameLen
+    implicit none
+    save
 !
 !       The logical variables, which define the surface variables to
 !       be written.
 !
-       logical :: surfWriteRho,   surfWriteP,        surfWriteTemp
-       logical :: surfWriteVx,    surfWriteVy,       surfWriteVz
-       logical :: surfWriteRVx,   surfWriteRVy,      surfWriteRVz
-       logical :: surfWriteCp,    surfWritePtotLoss, surfWriteMach
-       logical :: surfWriteRMach
-       logical :: surfWriteCf,    surfWriteCh,       surfWriteYPlus
-       logical :: surfWriteCfx,   surfWriteCfy,      surfWriteCfz
-       logical :: surfWriteBlank, surfWriteSepSensor
-       logical :: surfWriteCavitation, surfWriteGC, surfWriteAxisMoment
+    logical :: surfWriteRho, surfWriteP, surfWriteTemp
+    logical :: surfWriteVx, surfWriteVy, surfWriteVz
+    logical :: surfWriteRVx, surfWriteRVy, surfWriteRVz
+    logical :: surfWriteCp, surfWritePtotLoss, surfWriteMach
+    logical :: surfWriteRMach
+    logical :: surfWriteCf, surfWriteCh, surfWriteYPlus
+    logical :: surfWriteCfx, surfWriteCfy, surfWriteCfz
+    logical :: surfWriteBlank, surfWriteSepSensor
+    logical :: surfWriteCavitation, surfWriteGC, surfWriteAxisMoment
 !
 !       The logical variables, which define the extra volume variables
 !       to be written.
 !
-       logical :: volWriteMx,           volWriteMy,       volWriteMz
-       logical :: volWriteRVx,          volWriteRVy,      volWriteRVz
-       logical :: volWriteRhoE,         volWriteTemp,     volWriteCp
-       logical :: volWriteMach,         volWriteMachTurb, volWriteEddyVis
-       logical :: volWriteRMach
-       logical :: volWriteRatioEddyVis, volWriteDist,     volWriteVortx
-       logical :: volWritevorty,        volWritevortz,    volWriteVort
-       logical :: volWritePtotLoss,     volWriteResRho,   volWriteresMom
-       logical :: volWriteResRhoE,      volWriteResTurb,  volWriteBlank
-       logical :: volWriteShock,        volWriteFilteredShock, volWriteGC, volWriteStatus
-       logical :: volWriteIntermittency
-       logical :: volWriteKs
+    logical :: volWriteMx, volWriteMy, volWriteMz
+    logical :: volWriteRVx, volWriteRVy, volWriteRVz
+    logical :: volWriteRhoE, volWriteTemp, volWriteCp
+    logical :: volWriteMach, volWriteMachTurb, volWriteEddyVis
+    logical :: volWriteRMach
+    logical :: volWriteRatioEddyVis, volWriteDist, volWriteVortx
+    logical :: volWritevorty, volWritevortz, volWriteVort
+    logical :: volWritePtotLoss, volWriteResRho, volWriteresMom
+    logical :: volWriteResRhoE, volWriteResTurb, volWriteBlank
+    logical :: volWriteShock, volWriteFilteredShock, volWriteGC, volWriteStatus
+    logical :: volWriteIntermittency
+    logical :: volWriteKs
 !
 !       The logical variables, which define the isosurface variables
 !       to be written.
 !
-       logical :: isoWriteRho,          isoWriteVx,       isoWriteVy
-       logical :: isoWriteVz,           isoWriteP,        isoWriteTurb
-       logical :: isoWriteMx,           isoWriteMy,       isoWriteMz
-       logical :: isoWriteRVx,          isoWriteRVy,      isoWriteRVz
-       logical :: isoWriteRhoE,         isoWriteTemp,     isoWriteCp
-       logical :: isoWriteMach,         isoWriteMachTurb, isoWriteEddyVis
-       logical :: isoWriteRMach
-       logical :: isoWriteRatioEddyVis, isoWriteDist,     isoWriteVortx
-       logical :: isoWritevorty,        isoWritevortz,    isoWriteVort
-       logical :: isoWritePtotLoss,     isoWriteResRho,   isoWriteresMom
-       logical :: isoWriteResRhoE,      isoWriteResTurb,  isoWriteBlank
-       logical :: isoWriteShock,        isoWriteFilteredShock
+    logical :: isoWriteRho, isoWriteVx, isoWriteVy
+    logical :: isoWriteVz, isoWriteP, isoWriteTurb
+    logical :: isoWriteMx, isoWriteMy, isoWriteMz
+    logical :: isoWriteRVx, isoWriteRVy, isoWriteRVz
+    logical :: isoWriteRhoE, isoWriteTemp, isoWriteCp
+    logical :: isoWriteMach, isoWriteMachTurb, isoWriteEddyVis
+    logical :: isoWriteRMach
+    logical :: isoWriteRatioEddyVis, isoWriteDist, isoWriteVortx
+    logical :: isoWritevorty, isoWritevortz, isoWriteVort
+    logical :: isoWritePtotLoss, isoWriteResRho, isoWriteresMom
+    logical :: isoWriteResRhoE, isoWriteResTurb, isoWriteBlank
+    logical :: isoWriteShock, isoWriteFilteredShock
 !
 !       Extra variables defining the type and number of iso surfaces
 !       to be written.
 !
-       integer(kind=intType) :: nIsoSurface = 0
-       real(kind=realType), dimension(:), allocatable :: isoValues
-       character(len=maxCGNSNameLen), dimension(:), allocatable :: isoSurfaceNames
+    integer(kind=intType) :: nIsoSurface = 0
+    real(kind=realType), dimension(:), allocatable :: isoValues
+    character(len=maxCGNSNameLen), dimension(:), allocatable :: isoSurfaceNames
 
-
-       end module extraOutput
+end module extraOutput
diff --git a/src/modules/inputParam.F90 b/src/modules/inputParam.F90
index 07e97df72..35353559e 100644
--- a/src/modules/inputParam.F90
+++ b/src/modules/inputParam.F90
@@ -1,880 +1,875 @@
 module inputDiscretization
-  !
-  !       Input parameters which are related to the discretization of
-  !       the governing equations, i.e. scheme parameters, time accuracy
-  !       (in case of an unsteady computation) and preconditioning info.
-  !
-  use constants, only : intType, realType
-  implicit none
-  save
-  !
-  !       Definition of the discretization input parameters.
-  !
-  ! spaceDiscr:             Fine grid discretization.
-  ! spaceDiscrCoarse:       Coarse grid discretization.
-  ! orderTurb:              Order of the discretization of the advective
-  !                         terms of the turbulent transport equations.
-  !                         Possibilities are 1st and 2nd order.
-  ! riemann:                Fine grid riemann solver, upwind schemes only.
-  ! riemannCoarse:          Idem, but on the coarse grids.
-  ! limiter:                Limiter, upwind schemes only.
-  ! precond:                Preconditioner.
-  ! eulerWallBCTreatment:   Wall boundary condition treatment for inviscid
-  !                         simulations.
-  ! viscWallBCTreatment:    Wall boundary condition treatment for viscous
-  !                         simulations.
-  ! outflowTreatment:       Treatment of the outflow boundaries. Either
-  !                         constantExtrapol or linExtrapol.
-  ! nonMatchTreatment:      Treatment of the non-matching block
-  !                         boundaries. Either NonConservative or
-  !                         Conservative.
-  ! vis2:                   Coefficient of the second order dissipation.
-  ! vis4:                   Coefficient of the fourth order dissipation.
-  ! vis2Coarse:             Coefficient of the second order dissipation
-  !                         on the coarser grids in the mg cycle. On the
-  !                         coarser grids a first order scheme is used.
-  ! adis:                   Exponent for directional scaling of the
-  !                         dissipation. adis == 0: no directional scaling,
-  !                                      adis == 1: isotropic dissipation.
-  ! kappaCoef:              Coefficient in the upwind reconstruction
-  !                         schemes, both linear and nonlinear.
-  ! vortexCorr:             Whether or not a vortex correction must be
-  !                         applied. Steady flow only.
-  ! dirScaling:             Whether or not directional scaling must be
-  !                         applied.
-  ! hScalingInlet:          Whether or not the outgoing Riemann invariant
-  !                         must be scaled for a subsonic inlet. May be
-  !                         needed for stability when strong total
-  !                         temperature gradients are present.
-  ! radiiNeededFine:        Whether or not the spectral radii are needed
-  !                         to compute the fluxes of the fine grid.
-  ! radiiNeededCoarse:      Idem for the coarse grid.
-  ! lumpedDiss :            logical factor for determining whether or not
-  !                         lumped dissipation is used for preconditioner
-  ! approxSA:               Determines if the approximate source terms form
-  !                         the SA model is used.
-  ! sigma      :            Scaling parameter for dissipation lumping in
-  !                         approximateprecondtioner
-  ! useApproxWallDistance : logical to determine if the user wants to
-  !                         use the fast approximate wall distance
-  !                         computations. Typically only used for
-  !                         repeated calls when the wall distance would
-  !                         not have changed significantly
-  ! updateWallAssociation : Logical to determine if the full wall distance
-  !                         assocation is to be performed on the next
-  !                         wall distance calculation. This is only
-  !                         significant when useApproxWallDistance is
-  !                         set to True. This allows the user to
-  !                         reassociate the face a cell is associated
-  !                         with.
-  ! lowspeedpreconditoner:  Whether or not to use low-speed precondioner
-
-  integer(kind=intType) :: spaceDiscr, spaceDiscrCoarse
-  integer(kind=intType) :: orderTurb, limiter
-  integer(kind=intType) :: riemann, riemannCoarse, precond
-  integer(kind=intType) :: eulerWallBCTreatment, viscWallBCTreatment, outflowTreatment
-  integer(kind=intType) :: nonMatchTreatment
-
-  real(kind=realType) :: vis2, vis4, vis2Coarse, adis
-  real(kind=realType) :: kappaCoef
-  logical :: lumpedDiss
-  logical :: approxSA
-  real(kind=realType) :: sigma
-  logical :: useBlockettes
+    !
+    !       Input parameters which are related to the discretization of
+    !       the governing equations, i.e. scheme parameters, time accuracy
+    !       (in case of an unsteady computation) and preconditioning info.
+    !
+    use constants, only: intType, realType
+    implicit none
+    save
+    !
+    !       Definition of the discretization input parameters.
+    !
+    ! spaceDiscr:             Fine grid discretization.
+    ! spaceDiscrCoarse:       Coarse grid discretization.
+    ! orderTurb:              Order of the discretization of the advective
+    !                         terms of the turbulent transport equations.
+    !                         Possibilities are 1st and 2nd order.
+    ! riemann:                Fine grid riemann solver, upwind schemes only.
+    ! riemannCoarse:          Idem, but on the coarse grids.
+    ! limiter:                Limiter, upwind schemes only.
+    ! precond:                Preconditioner.
+    ! eulerWallBCTreatment:   Wall boundary condition treatment for inviscid
+    !                         simulations.
+    ! viscWallBCTreatment:    Wall boundary condition treatment for viscous
+    !                         simulations.
+    ! outflowTreatment:       Treatment of the outflow boundaries. Either
+    !                         constantExtrapol or linExtrapol.
+    ! nonMatchTreatment:      Treatment of the non-matching block
+    !                         boundaries. Either NonConservative or
+    !                         Conservative.
+    ! vis2:                   Coefficient of the second order dissipation.
+    ! vis4:                   Coefficient of the fourth order dissipation.
+    ! vis2Coarse:             Coefficient of the second order dissipation
+    !                         on the coarser grids in the mg cycle. On the
+    !                         coarser grids a first order scheme is used.
+    ! adis:                   Exponent for directional scaling of the
+    !                         dissipation. adis == 0: no directional scaling,
+    !                                      adis == 1: isotropic dissipation.
+    ! kappaCoef:              Coefficient in the upwind reconstruction
+    !                         schemes, both linear and nonlinear.
+    ! vortexCorr:             Whether or not a vortex correction must be
+    !                         applied. Steady flow only.
+    ! dirScaling:             Whether or not directional scaling must be
+    !                         applied.
+    ! hScalingInlet:          Whether or not the outgoing Riemann invariant
+    !                         must be scaled for a subsonic inlet. May be
+    !                         needed for stability when strong total
+    !                         temperature gradients are present.
+    ! radiiNeededFine:        Whether or not the spectral radii are needed
+    !                         to compute the fluxes of the fine grid.
+    ! radiiNeededCoarse:      Idem for the coarse grid.
+    ! lumpedDiss :            logical factor for determining whether or not
+    !                         lumped dissipation is used for preconditioner
+    ! approxSA:               Determines if the approximate source terms form
+    !                         the SA model is used.
+    ! sigma      :            Scaling parameter for dissipation lumping in
+    !                         approximateprecondtioner
+    ! useApproxWallDistance : logical to determine if the user wants to
+    !                         use the fast approximate wall distance
+    !                         computations. Typically only used for
+    !                         repeated calls when the wall distance would
+    !                         not have changed significantly
+    ! updateWallAssociation : Logical to determine if the full wall distance
+    !                         assocation is to be performed on the next
+    !                         wall distance calculation. This is only
+    !                         significant when useApproxWallDistance is
+    !                         set to True. This allows the user to
+    !                         reassociate the face a cell is associated
+    !                         with.
+    ! lowspeedpreconditoner:  Whether or not to use low-speed precondioner
+
+    integer(kind=intType) :: spaceDiscr, spaceDiscrCoarse
+    integer(kind=intType) :: orderTurb, limiter
+    integer(kind=intType) :: riemann, riemannCoarse, precond
+    integer(kind=intType) :: eulerWallBCTreatment, viscWallBCTreatment, outflowTreatment
+    integer(kind=intType) :: nonMatchTreatment
+
+    real(kind=realType) :: vis2, vis4, vis2Coarse, adis
+    real(kind=realType) :: kappaCoef
+    logical :: lumpedDiss
+    logical :: approxSA
+    real(kind=realType) :: sigma
+    logical :: useBlockettes
 
 #ifndef USE_TAPENADE
-  real(kind=realType) :: vis2b, vis4b, vis2Coarseb, adisb
-  real(kind=realType) :: kappaCoefb
-  real(kind=realType) :: sigmab
+    real(kind=realType) :: vis2b, vis4b, vis2Coarseb, adisb
+    real(kind=realType) :: kappaCoefb
+    real(kind=realType) :: sigmab
 #endif
-  logical :: vortexCorr, dirScaling, hScalingInlet
-  logical :: radiiNeededFine, radiiNeededCoarse
+    logical :: vortexCorr, dirScaling, hScalingInlet
+    logical :: radiiNeededFine, radiiNeededCoarse
 
-
-  logical :: useApproxWallDistance
-  logical :: lowSpeedPreconditioner
+    logical :: useApproxWallDistance
+    logical :: lowSpeedPreconditioner
 end module inputDiscretization
 
 !      ==================================================================
 
 module inputIO
-  !
-  !       Input parameters which are related to io issues, like file
-  !       names and corresponding info.
-  !
-  use constants
-  implicit none
-  save
-  !
-  !       Definition of the IO input parameters.
-  !
-  ! paramFile:           Parameter file, command line argument.
-  ! firstWrite:          Whether or not this is the first time a
-  !                      solution is written. Needed when different
-  !                      file formats are used for reading and
-  !                      writing.
-  ! gridFile:            Grid file.
-  ! newGridFile:         File to which the changed grid is
-  !                      written. Needed for moving and/or
-  !                      deforming geometries.
-  ! restartFiles:        Restart solution files; for cgns this
-  !                      could be the same as the grid file, but
-  !                      not necesarrily.
-  ! solFile:             Solution file; for cgns this could be the
-  !                      same as the grid or restart file, but not
-  !                      necesarrily.
-  ! surfaceSolFile:      Surface solution file.
-  ! sliceSolFile:        File name of a slice of a surface solution. TEMPORARY
-  ! liftDistributionFile:File name of a lift file. TEMPORARY
-  ! cpFile:              File which contains the curve fits for cp.
-  ! precisionGrid:       Precision of the grid file to be written.
-  !                      Possibilities are precisionSingle and
-  !                      precisionDouble.
-  ! precisionSol:        Idem for the solution file(s).
-
-  ! precisionSurfGrid:   Precision of the grid in the surface file
-  ! precisionSurfSol:    Precision of the solution in the surface file
-  ! storeRindLayer:      Whether or not to store 1 layer of rind
-  !                      (halo) cells in the solution file.
-  ! checkRestartSol:     Whether or not the solution in the restart
-  !                      file must be checked for correct
-  !                      nondimensionalization.
-  ! autoParameterUpdate: Whether or not the parameter file must be
-  !                      updated automatically. After a restart file
-  !                      is written, such that a restart can be made
-  !                      without editing the parameter file.
-  ! writeCoorMeter:      Whether or not the coordinates in the
-  !                      solution files must be written in meters.
-  !                      If not, the original units are used.
-  ! storeConvInnerIter:  Whether or not to store the convergence of
-  !                      the inner iterations for unsteady mode.
-  !                      On systems with a limited amount of memory
-  !                      the storage of this info could be a
-  !                      bottleneck for memory.
-
-  integer(kind=intType) :: precisionGrid, precisionSol
-  integer(kind=intType) :: precisionSurfGrid, precisionSurfSol
-  character(len=maxStringLen) :: paramFile, gridFile
-  character(len=maxStringLen) :: newGridFile
-  character(len=maxStringLen) :: solFile
-  character(len=maxstringlen), dimension(:), allocatable :: restartFiles
-  character(len=maxStringLen) :: surfaceSolFile, cpFile, sliceSolFile, liftDistributionFile
-
-  logical :: storeRindLayer, checkRestartSol
-  logical :: autoParameterUpdate, writeCoorMeter
-  logical :: storeConvInnerIter
-  logical :: firstWrite = .true.
-  logical :: viscousSurfaceVelocities = .True.
-
-  ! Extra file names (set from python) that specify the name of
-  ! the volume, surface, lift and slice files written from an
-  ! interrupt.
-  character(len=maxStringLen) :: forcedSurfaceFile, forcedVolumeFile
-  character(len=maxStringLen) :: forcedLiftFile, forcedSliceFile
-  character(len=maxStringLen) :: convSolFileBasename
-  ! logical to control the us of the transition model
-  logical :: laminarToTurbulent
+    !
+    !       Input parameters which are related to io issues, like file
+    !       names and corresponding info.
+    !
+    use constants
+    implicit none
+    save
+    !
+    !       Definition of the IO input parameters.
+    !
+    ! paramFile:           Parameter file, command line argument.
+    ! firstWrite:          Whether or not this is the first time a
+    !                      solution is written. Needed when different
+    !                      file formats are used for reading and
+    !                      writing.
+    ! gridFile:            Grid file.
+    ! newGridFile:         File to which the changed grid is
+    !                      written. Needed for moving and/or
+    !                      deforming geometries.
+    ! restartFiles:        Restart solution files; for cgns this
+    !                      could be the same as the grid file, but
+    !                      not necesarrily.
+    ! solFile:             Solution file; for cgns this could be the
+    !                      same as the grid or restart file, but not
+    !                      necesarrily.
+    ! surfaceSolFile:      Surface solution file.
+    ! sliceSolFile:        File name of a slice of a surface solution. TEMPORARY
+    ! liftDistributionFile:File name of a lift file. TEMPORARY
+    ! cpFile:              File which contains the curve fits for cp.
+    ! precisionGrid:       Precision of the grid file to be written.
+    !                      Possibilities are precisionSingle and
+    !                      precisionDouble.
+    ! precisionSol:        Idem for the solution file(s).
+
+    ! precisionSurfGrid:   Precision of the grid in the surface file
+    ! precisionSurfSol:    Precision of the solution in the surface file
+    ! storeRindLayer:      Whether or not to store 1 layer of rind
+    !                      (halo) cells in the solution file.
+    ! checkRestartSol:     Whether or not the solution in the restart
+    !                      file must be checked for correct
+    !                      nondimensionalization.
+    ! autoParameterUpdate: Whether or not the parameter file must be
+    !                      updated automatically. After a restart file
+    !                      is written, such that a restart can be made
+    !                      without editing the parameter file.
+    ! writeCoorMeter:      Whether or not the coordinates in the
+    !                      solution files must be written in meters.
+    !                      If not, the original units are used.
+    ! storeConvInnerIter:  Whether or not to store the convergence of
+    !                      the inner iterations for unsteady mode.
+    !                      On systems with a limited amount of memory
+    !                      the storage of this info could be a
+    !                      bottleneck for memory.
+
+    integer(kind=intType) :: precisionGrid, precisionSol
+    integer(kind=intType) :: precisionSurfGrid, precisionSurfSol
+    character(len=maxStringLen) :: paramFile, gridFile
+    character(len=maxStringLen) :: newGridFile
+    character(len=maxStringLen) :: solFile
+    character(len=maxstringlen), dimension(:), allocatable :: restartFiles
+    character(len=maxStringLen) :: surfaceSolFile, cpFile, sliceSolFile, liftDistributionFile
+
+    logical :: storeRindLayer, checkRestartSol
+    logical :: autoParameterUpdate, writeCoorMeter
+    logical :: storeConvInnerIter
+    logical :: firstWrite = .true.
+    logical :: viscousSurfaceVelocities = .True.
+
+    ! Extra file names (set from python) that specify the name of
+    ! the volume, surface, lift and slice files written from an
+    ! interrupt.
+    character(len=maxStringLen) :: forcedSurfaceFile, forcedVolumeFile
+    character(len=maxStringLen) :: forcedLiftFile, forcedSliceFile
+    character(len=maxStringLen) :: convSolFileBasename
+    ! logical to control the us of the transition model
+    logical :: laminarToTurbulent
 
 end module inputIO
 
 !      ==================================================================
 
 module inputIteration
-  !
-  !       Input parameters which are related to the iteration process,
-  !       i.e. multigrid parameters, cfl numbers, smoothers and
-  !       convergence.
-  !
-  use constants
-  implicit none
-  save
-  !
-  !       Definition of the iteration input parameters.
-  !
-  ! nCycles:          Maximum number of multigrid cycles.
-  ! nCyclesCoarse:    Idem, but on the coarse grids in full multigrid.
-  ! nSaveVolume:      Number of fine grid cycles after which a volume
-  !                   solution file is written.
-  ! nSaveSurface:     Number of fine grid cycles after which a
-  !                   surface solution file is written.
-  ! nsgStartup:       Number of single grid iterations, before
-  !                   switching to multigrid. Could be useful for
-  !                   supersonic problems with strong shocks.
-  ! nSubIterTurb:     Number of turbulent subiterations when using
-  !                   a decoupled approach for the turbulence.
-  ! nUpdateBleeds:    Number of iterations after which the bleed
-  !                   boundary conditions must be updated.
-  ! smoother:         Smoother to be used.
-  ! nRKStages:        Number of stages in the runge kutta scheme.
-  ! nSubiterations:   Maximum number of subiterations used in
-  !                   DADI.
-  ! turbTreatment:    Treatment of the turbulent transport equations;
-  !                   either decoupled or coupled.
-  ! turbSmoother:     Smoother to use in case a decoupled solver
-  !                   is to be used.
-  ! turbRelax:        What kind of turbulent relaxation to use.
-  !                   Either turbRelaxExplicit or
-  !                   turbRelaxImplicit.
-  ! resAveraging:     What kind of residual averaging to use.
-  ! freezeTurbSource: Whether or not the turbulent source terms must
-  !                   be frozen on the coarser grid levels; only if
-  !                   a coupled solver is to be used.
-  ! mgBoundCorr:      Treatment of the boundary halo's for the
-  !                   multigrid corrections. Either dirichlet0,
-  !                   set the corrections to zero, or neumann.
-  ! mgStartlevel:     Grid level on which the multigrid must be
-  !                   started in the full mg cycle. In case a restart
-  !                   is specified this info is overruled and the
-  !                   start level is the finest grid.
-  ! nMGSteps:         Number of steps in the array cycleStrategy.
-  ! nMGLevels:        Number of levels in the multigrid. This info
-  !                   is derived from the cycle strategy.
-  ! cycleStrategy:    Array which describes the mg cycle.
-  ! cfl:              Cfl number on the fine grid.
-  ! cflCoarse:        Idem, but on the coarse grids.
-  ! cfllimit          Limit used to determine how much residuals are smoothed
-  ! alfaTurb:         Relaxation factor in turbulent dd-adi smoother.
-  ! betaTurb:         Relaxation factor in vf dd-adi smoother.
-  ! relaxBleeds:      Relaxation coefficient for the update
-  !                   of the bleed boundary condition.
-  ! smoop:            Coefficient in the implicit smoothing.
-  ! fcoll:            Relaxation factor for the restricted residuals.
-  ! L2Conv:           Relative L2 norm of the density residuals for
-  !                   which the computation is assumed converged.
-  ! L2ConvCoarse:     Idem, but on the coarse grids during full mg.
-  ! etaRk:            Coefficients in the runge kutta scheme. The
-  !                   values depend on the number of stages specified.
-  ! cdisRk:           Dissipative coefficients in the runge kutta
-  !                   scheme. The values depend on the number of
-  !                   stages specified.
-  ! printIterations: If True, iterations are printed to stdout
-  ! turbresscale: Scaling factor for turbulent residual. Necessary for
-  !            NKsolver with RANS. Only tested on SA.
-  ! iterType : String used for specifying which type of iteration was taken
-  !
-  ! Definition of the string, which stores the multigrid cycling
-  ! strategy.
-  !
-
-  integer(kind=intType) :: nCycles, nCyclesCoarse
-  integer(kind=intType) :: nSaveVolume, nSaveSurface
-  integer(kind=intType) :: nsgStartup, smoother, nRKStages
-  integer(kind=intType) :: nSubiterations
-  integer(kind=intType) :: nSubIterTurb, nUpdateBleeds
-  integer(kind=intType) :: resAveraging
-  real(kind=realType) :: CFLLimit
-  integer(kind=intType) :: turbTreatment, turbSmoother, turbRelax
-  integer(kind=intType) :: mgBoundCorr, mgStartlevel
-  integer(kind=intType) :: nMGSteps, nMGLevels
-  real(kind=realType) :: timeLimit
-  integer(kind=intType), allocatable, dimension(:) :: cycleStrategy
-  integer(kind=intType) :: miniterNum
-  real(kind=realType) :: cfl, cflCoarse, fcoll, smoop
-  real(kind=realType) :: alfaTurb, betaTurb
-  real(kind=realType) :: L2Conv, L2ConvCoarse
-  real(kind=realType) :: L2ConvRel
-  real(kind=realType) :: maxL2DeviationFactor
-  real(kind=realType) :: relaxBleeds
-  real(kind=realtype) :: epscoefconv
-  integer(kind=inttype) :: convcheckwindowsize
-  real(kind=realType), allocatable, dimension(:) :: etaRK, cdisRK
-  character (len=maxStringLen) :: mgDescription
-  logical :: rkReset
-  logical :: useLinResMonitor
-  logical :: freezeTurbSource
-  logical :: printIterations
-  logical :: printWarnings
-  real(kind=realType), dimension(4) :: turbResScale
+    !
+    !       Input parameters which are related to the iteration process,
+    !       i.e. multigrid parameters, cfl numbers, smoothers and
+    !       convergence.
+    !
+    use constants
+    implicit none
+    save
+    !
+    !       Definition of the iteration input parameters.
+    !
+    ! nCycles:          Maximum number of multigrid cycles.
+    ! nCyclesCoarse:    Idem, but on the coarse grids in full multigrid.
+    ! nSaveVolume:      Number of fine grid cycles after which a volume
+    !                   solution file is written.
+    ! nSaveSurface:     Number of fine grid cycles after which a
+    !                   surface solution file is written.
+    ! nsgStartup:       Number of single grid iterations, before
+    !                   switching to multigrid. Could be useful for
+    !                   supersonic problems with strong shocks.
+    ! nSubIterTurb:     Number of turbulent subiterations when using
+    !                   a decoupled approach for the turbulence.
+    ! nUpdateBleeds:    Number of iterations after which the bleed
+    !                   boundary conditions must be updated.
+    ! smoother:         Smoother to be used.
+    ! nRKStages:        Number of stages in the runge kutta scheme.
+    ! nSubiterations:   Maximum number of subiterations used in
+    !                   DADI.
+    ! turbTreatment:    Treatment of the turbulent transport equations;
+    !                   either decoupled or coupled.
+    ! turbSmoother:     Smoother to use in case a decoupled solver
+    !                   is to be used.
+    ! turbRelax:        What kind of turbulent relaxation to use.
+    !                   Either turbRelaxExplicit or
+    !                   turbRelaxImplicit.
+    ! resAveraging:     What kind of residual averaging to use.
+    ! freezeTurbSource: Whether or not the turbulent source terms must
+    !                   be frozen on the coarser grid levels; only if
+    !                   a coupled solver is to be used.
+    ! mgBoundCorr:      Treatment of the boundary halo's for the
+    !                   multigrid corrections. Either dirichlet0,
+    !                   set the corrections to zero, or neumann.
+    ! mgStartlevel:     Grid level on which the multigrid must be
+    !                   started in the full mg cycle. In case a restart
+    !                   is specified this info is overruled and the
+    !                   start level is the finest grid.
+    ! nMGSteps:         Number of steps in the array cycleStrategy.
+    ! nMGLevels:        Number of levels in the multigrid. This info
+    !                   is derived from the cycle strategy.
+    ! cycleStrategy:    Array which describes the mg cycle.
+    ! cfl:              Cfl number on the fine grid.
+    ! cflCoarse:        Idem, but on the coarse grids.
+    ! cfllimit          Limit used to determine how much residuals are smoothed
+    ! alfaTurb:         Relaxation factor in turbulent dd-adi smoother.
+    ! betaTurb:         Relaxation factor in vf dd-adi smoother.
+    ! relaxBleeds:      Relaxation coefficient for the update
+    !                   of the bleed boundary condition.
+    ! smoop:            Coefficient in the implicit smoothing.
+    ! fcoll:            Relaxation factor for the restricted residuals.
+    ! L2Conv:           Relative L2 norm of the density residuals for
+    !                   which the computation is assumed converged.
+    ! L2ConvCoarse:     Idem, but on the coarse grids during full mg.
+    ! etaRk:            Coefficients in the runge kutta scheme. The
+    !                   values depend on the number of stages specified.
+    ! cdisRk:           Dissipative coefficients in the runge kutta
+    !                   scheme. The values depend on the number of
+    !                   stages specified.
+    ! printIterations: If True, iterations are printed to stdout
+    ! turbresscale: Scaling factor for turbulent residual. Necessary for
+    !            NKsolver with RANS. Only tested on SA.
+    ! iterType : String used for specifying which type of iteration was taken
+    !
+    ! Definition of the string, which stores the multigrid cycling
+    ! strategy.
+    !
+
+    integer(kind=intType) :: nCycles, nCyclesCoarse
+    integer(kind=intType) :: nSaveVolume, nSaveSurface
+    integer(kind=intType) :: nsgStartup, smoother, nRKStages
+    integer(kind=intType) :: nSubiterations
+    integer(kind=intType) :: nSubIterTurb, nUpdateBleeds
+    integer(kind=intType) :: resAveraging
+    real(kind=realType) :: CFLLimit
+    integer(kind=intType) :: turbTreatment, turbSmoother, turbRelax
+    integer(kind=intType) :: mgBoundCorr, mgStartlevel
+    integer(kind=intType) :: nMGSteps, nMGLevels
+    real(kind=realType) :: timeLimit
+    integer(kind=intType), allocatable, dimension(:) :: cycleStrategy
+    integer(kind=intType) :: miniterNum
+    real(kind=realType) :: cfl, cflCoarse, fcoll, smoop
+    real(kind=realType) :: alfaTurb, betaTurb
+    real(kind=realType) :: L2Conv, L2ConvCoarse
+    real(kind=realType) :: L2ConvRel
+    real(kind=realType) :: maxL2DeviationFactor
+    real(kind=realType) :: relaxBleeds
+    real(kind=realtype) :: epscoefconv
+    integer(kind=inttype) :: convcheckwindowsize
+    real(kind=realType), allocatable, dimension(:) :: etaRK, cdisRK
+    character(len=maxStringLen) :: mgDescription
+    logical :: rkReset
+    logical :: useLinResMonitor
+    logical :: freezeTurbSource
+    logical :: printIterations
+    logical :: printWarnings
+    real(kind=realType), dimension(4) :: turbResScale
 
 end module inputIteration
 
 module inputCostFunctions
-  use constants
-  real(kind=realtype) :: sepSensorOffset = zero
-  real(kind=realtype) :: sepSensorSharpness = 10.0_realType
-  real(kind=realtype) :: cavSensorOffset
-  real(kind=realtype) :: cavSensorSharpness
-  integer(kind=inttype) :: cavExponent
-  logical :: computeCavitation
+    use constants
+    real(kind=realtype) :: sepSensorOffset = zero
+    real(kind=realtype) :: sepSensorSharpness = 10.0_realType
+    real(kind=realtype) :: cavSensorOffset
+    real(kind=realtype) :: cavSensorSharpness
+    integer(kind=inttype) :: cavExponent
+    logical :: computeCavitation
 
 end module inputCostFunctions
 
 !      ==================================================================
 
 module inputMotion
-  !
-  !       Input parameters which are related to the rigid body motion of
-  !       the entire mesh, i.e. translation and rotation.
-  !       These parameters can only be specified for an external flow
-  !       computation.
-  !
-  use precision
-  implicit none
-  save
-  ! rotPoint(3): Rotation point of the rigid body rotation.
+    !
+    !       Input parameters which are related to the rigid body motion of
+    !       the entire mesh, i.e. translation and rotation.
+    !       These parameters can only be specified for an external flow
+    !       computation.
+    !
+    use precision
+    implicit none
+    save
+    ! rotPoint(3): Rotation point of the rigid body rotation.
 
-  real(kind=realType), dimension(3) :: rotPoint
-  real(kind=realType), dimension(3) :: rotPointd
+    real(kind=realType), dimension(3) :: rotPoint
+    real(kind=realType), dimension(3) :: rotPointd
 
-  ! degreePolXRot: Degree of the x-rotation polynomial.
-  ! degreePolYRot: Degree of the y-rotation polynomial.
-  ! degreePolZRot: Degree of the z-rotation polynomial.
+    ! degreePolXRot: Degree of the x-rotation polynomial.
+    ! degreePolYRot: Degree of the y-rotation polynomial.
+    ! degreePolZRot: Degree of the z-rotation polynomial.
 
-  integer(kind=intType) :: degreePolXRot
-  integer(kind=intType) :: degreePolYRot
-  integer(kind=intType) :: degreePolZRot
+    integer(kind=intType) :: degreePolXRot
+    integer(kind=intType) :: degreePolYRot
+    integer(kind=intType) :: degreePolZRot
 
-  ! coefPolXRot(0:): coefficients of the x-rotation polynomial.
-  ! coefPolYRot(0:): coefficients of the y-rotation polynomial.
-  ! coefPolZRot(0:): coefficients of the z-rotation polynomial.
+    ! coefPolXRot(0:): coefficients of the x-rotation polynomial.
+    ! coefPolYRot(0:): coefficients of the y-rotation polynomial.
+    ! coefPolZRot(0:): coefficients of the z-rotation polynomial.
 
-  real(kind=realType), dimension(:), allocatable :: coefPolXRot
-  real(kind=realType), dimension(:), allocatable :: coefPolYRot
-  real(kind=realType), dimension(:), allocatable :: coefPolZRot
+    real(kind=realType), dimension(:), allocatable :: coefPolXRot
+    real(kind=realType), dimension(:), allocatable :: coefPolYRot
+    real(kind=realType), dimension(:), allocatable :: coefPolZRot
 
-  ! degreeFourXRot: Degree of the x-rotation fourier series.
-  ! degreeFourYRot: Degree of the y-rotation fourier series.
-  ! degreeFourZRot: Degree of the z-rotation fourier series.
+    ! degreeFourXRot: Degree of the x-rotation fourier series.
+    ! degreeFourYRot: Degree of the y-rotation fourier series.
+    ! degreeFourZRot: Degree of the z-rotation fourier series.
 
-  integer(kind=intType) :: degreeFourXRot
-  integer(kind=intType) :: degreeFourYRot
-  integer(kind=intType) :: degreeFourZRot
+    integer(kind=intType) :: degreeFourXRot
+    integer(kind=intType) :: degreeFourYRot
+    integer(kind=intType) :: degreeFourZRot
 
-  ! omegaFourXRot: Fourier frequency of the x-rotation; the
-  !                   period of the motion is 2*pi/omega.
-  ! omegaFourYRot: Fourier frequency of the y-rotation.
-  ! omegaFourZRot: Fourier frequency of the z-rotation.
+    ! omegaFourXRot: Fourier frequency of the x-rotation; the
+    !                   period of the motion is 2*pi/omega.
+    ! omegaFourYRot: Fourier frequency of the y-rotation.
+    ! omegaFourZRot: Fourier frequency of the z-rotation.
 
-  real(kind=realType) :: omegaFourXRot,omegaFourXRotb
-  real(kind=realType) :: omegaFourYRot,omegaFourYRotb
-  real(kind=realType) :: omegaFourZRot,omegaFourZRotb
+    real(kind=realType) :: omegaFourXRot, omegaFourXRotb
+    real(kind=realType) :: omegaFourYRot, omegaFourYRotb
+    real(kind=realType) :: omegaFourZRot, omegaFourZRotb
 
-  ! cosCoefFourXRot(0:): cosine coefficients of the
-  !                      x-rotation fourier series.
-  ! cosCoefFourYRot(0:): cosine coefficients of the
-  !                      y-rotation fourier series.
-  ! cosCoefFourZRot(0:): cosine coefficients of the
-  !                      z-rotation fourier series.
+    ! cosCoefFourXRot(0:): cosine coefficients of the
+    !                      x-rotation fourier series.
+    ! cosCoefFourYRot(0:): cosine coefficients of the
+    !                      y-rotation fourier series.
+    ! cosCoefFourZRot(0:): cosine coefficients of the
+    !                      z-rotation fourier series.
 
-  real(kind=realType), dimension(:), allocatable :: cosCoefFourXRot
-  real(kind=realType), dimension(:), allocatable :: cosCoefFourYRot
-  real(kind=realType), dimension(:), allocatable :: cosCoefFourZRot
+    real(kind=realType), dimension(:), allocatable :: cosCoefFourXRot
+    real(kind=realType), dimension(:), allocatable :: cosCoefFourYRot
+    real(kind=realType), dimension(:), allocatable :: cosCoefFourZRot
 
-  ! sinCoefFourXRot(1:): sine coefficients of the
-  !                      x-rotation fourier series.
-  ! sinCoefFourYRot(1:): sine coefficients of the
-  !                      y-rotation fourier series.
-  ! sinCoefFourZRot(1:): sine coefficients of the
-  !                      z-rotation fourier series.
+    ! sinCoefFourXRot(1:): sine coefficients of the
+    !                      x-rotation fourier series.
+    ! sinCoefFourYRot(1:): sine coefficients of the
+    !                      y-rotation fourier series.
+    ! sinCoefFourZRot(1:): sine coefficients of the
+    !                      z-rotation fourier series.
 
-  real(kind=realType), dimension(:), allocatable :: sinCoefFourXRot
-  real(kind=realType), dimension(:), allocatable :: sinCoefFourYRot
-  real(kind=realType), dimension(:), allocatable :: sinCoefFourZRot
+    real(kind=realType), dimension(:), allocatable :: sinCoefFourXRot
+    real(kind=realType), dimension(:), allocatable :: sinCoefFourYRot
+    real(kind=realType), dimension(:), allocatable :: sinCoefFourZRot
 
-  ! degreePolAlpha: Degree of the Alpha polynomial.
+    ! degreePolAlpha: Degree of the Alpha polynomial.
 
-  integer(kind=intType) :: degreePolAlpha
+    integer(kind=intType) :: degreePolAlpha
 
-  ! coefPolAlpha(0:): coefficients of the Alpha polynomial.
+    ! coefPolAlpha(0:): coefficients of the Alpha polynomial.
 
-  real(kind=realType), dimension(:), allocatable :: coefPolAlpha
-  real(kind=realType), dimension(:), allocatable :: coefPolAlphab
+    real(kind=realType), dimension(:), allocatable :: coefPolAlpha
+    real(kind=realType), dimension(:), allocatable :: coefPolAlphab
 
-  ! degreeFourAlpha: Degree of the Alpha fourier series.
+    ! degreeFourAlpha: Degree of the Alpha fourier series.
 
-  integer(kind=intType) :: degreeFourAlpha
+    integer(kind=intType) :: degreeFourAlpha
 
-  ! omegaFourAlpha: Fourier frequency of the Alpha; the
-  !                   period of the motion is 2*pi/omega.
+    ! omegaFourAlpha: Fourier frequency of the Alpha; the
+    !                   period of the motion is 2*pi/omega.
 
-  real(kind=realType) :: omegaFourAlpha,omegafouralphab
+    real(kind=realType) :: omegaFourAlpha, omegafouralphab
 
-  ! cosCoefFourAlpha(0:): cosine coefficients of the
-  !                      x-rotation fourier series.
+    ! cosCoefFourAlpha(0:): cosine coefficients of the
+    !                      x-rotation fourier series.
 
-  real(kind=realType), dimension(:), allocatable :: cosCoefFourAlpha
-  real(kind=realType), dimension(:), allocatable :: cosCoefFourAlphab
+    real(kind=realType), dimension(:), allocatable :: cosCoefFourAlpha
+    real(kind=realType), dimension(:), allocatable :: cosCoefFourAlphab
 
-  ! sinCoefFourAlpha(1:): sine coefficients of the
-  !                      Alpha fourier series.
+    ! sinCoefFourAlpha(1:): sine coefficients of the
+    !                      Alpha fourier series.
 
-  real(kind=realType), dimension(:), allocatable :: sinCoefFourAlpha
-  real(kind=realType), dimension(:), allocatable :: sinCoefFourAlphab
+    real(kind=realType), dimension(:), allocatable :: sinCoefFourAlpha
+    real(kind=realType), dimension(:), allocatable :: sinCoefFourAlphab
 
-  ! degreePolXRot: Degree of the Beta polynomial.
+    ! degreePolXRot: Degree of the Beta polynomial.
 
-  integer(kind=intType) :: degreePolBeta
+    integer(kind=intType) :: degreePolBeta
 
-  ! coefPolXRot(0:): coefficients of the Beta polynomial.
+    ! coefPolXRot(0:): coefficients of the Beta polynomial.
 
-  real(kind=realType), dimension(:), allocatable :: coefPolBeta
-  real(kind=realType), dimension(:), allocatable :: coefPolBetab
+    real(kind=realType), dimension(:), allocatable :: coefPolBeta
+    real(kind=realType), dimension(:), allocatable :: coefPolBetab
 
-  ! degreeFourBeta: Degree of the Beta fourier series.
+    ! degreeFourBeta: Degree of the Beta fourier series.
 
-  integer(kind=intType) :: degreeFourBeta
+    integer(kind=intType) :: degreeFourBeta
 
-  ! omegaFourBeta: Fourier frequency of the Beta; the
-  !                   period of the motion is 2*pi/omega.
+    ! omegaFourBeta: Fourier frequency of the Beta; the
+    !                   period of the motion is 2*pi/omega.
 
-  real(kind=realType) :: omegaFourBeta,omegafourbetab
+    real(kind=realType) :: omegaFourBeta, omegafourbetab
 
-  ! cosCoefFourBeta(0:): cosine coefficients of the
-  !                      Beta fourier series.
+    ! cosCoefFourBeta(0:): cosine coefficients of the
+    !                      Beta fourier series.
 
-  real(kind=realType), dimension(:), allocatable :: cosCoefFourBeta
-  real(kind=realType), dimension(:), allocatable :: cosCoefFourBetab
+    real(kind=realType), dimension(:), allocatable :: cosCoefFourBeta
+    real(kind=realType), dimension(:), allocatable :: cosCoefFourBetab
 
-  ! sinCoefFourBeta(1:): sine coefficients of the
-  !                      Beta fourier series.
+    ! sinCoefFourBeta(1:): sine coefficients of the
+    !                      Beta fourier series.
 
-  real(kind=realType), dimension(:), allocatable :: sinCoefFourBeta
-  real(kind=realType), dimension(:), allocatable :: sinCoefFourBetab
+    real(kind=realType), dimension(:), allocatable :: sinCoefFourBeta
+    real(kind=realType), dimension(:), allocatable :: sinCoefFourBetab
 
-  ! degreePolMach: Degree of the Mach polynomial.
+    ! degreePolMach: Degree of the Mach polynomial.
 
-  integer(kind=intType) :: degreePolMach
+    integer(kind=intType) :: degreePolMach
 
-  ! coefPolMach(0:): coefficients of the Mach polynomial.
+    ! coefPolMach(0:): coefficients of the Mach polynomial.
 
-  real(kind=realType), dimension(:), allocatable :: coefPolMach
-  real(kind=realType), dimension(:), allocatable :: coefPolMachb
+    real(kind=realType), dimension(:), allocatable :: coefPolMach
+    real(kind=realType), dimension(:), allocatable :: coefPolMachb
 
-  ! degreeFourMach: Degree of the Mach fourier series.
+    ! degreeFourMach: Degree of the Mach fourier series.
 
-  integer(kind=intType) :: degreeFourMach
+    integer(kind=intType) :: degreeFourMach
 
-  ! omegaFourMach: Fourier frequency of the Mach Number; the
-  !                   period of the motion is 2*pi/omega.
+    ! omegaFourMach: Fourier frequency of the Mach Number; the
+    !                   period of the motion is 2*pi/omega.
 
-  real(kind=realType) :: omegaFourMach,omegafourmachb
+    real(kind=realType) :: omegaFourMach, omegafourmachb
 
-  ! cosCoefFourMach(0:): cosine coefficients of the
-  !                      Mach Number fourier series.
+    ! cosCoefFourMach(0:): cosine coefficients of the
+    !                      Mach Number fourier series.
 
-  real(kind=realType), dimension(:), allocatable :: cosCoefFourMach
-  real(kind=realType), dimension(:), allocatable :: cosCoefFourMachb
+    real(kind=realType), dimension(:), allocatable :: cosCoefFourMach
+    real(kind=realType), dimension(:), allocatable :: cosCoefFourMachb
 
-  ! sinCoefFourMach(1:): sine coefficients of the
-  !                      Mach Number fourier series.
+    ! sinCoefFourMach(1:): sine coefficients of the
+    !                      Mach Number fourier series.
 
-  real(kind=realType), dimension(:), allocatable :: sinCoefFourMach
-  real(kind=realType), dimension(:), allocatable :: sinCoefFourMachb
+    real(kind=realType), dimension(:), allocatable :: sinCoefFourMach
+    real(kind=realType), dimension(:), allocatable :: sinCoefFourMachb
 
-  ! gridMotionSpecified: Whether or not a rigid body motion of
-  !                      the grid has been specified.
+    ! gridMotionSpecified: Whether or not a rigid body motion of
+    !                      the grid has been specified.
 
-  logical :: gridMotionSpecified
+    logical :: gridMotionSpecified
 
 end module inputMotion
 
 !      ==================================================================
 
 module inputParallel
-  !
-  !       Input parameters which are related to the parallelization.
-  !
-  use precision
-  implicit none
-  save
-
-  ! loadImbalance: Allowable load imbalance
-  ! splitBlocks:   Whether or not blocks can be split to improve
-  !                the load balance.
-  ! loadBalanceIter: The number of refinment iterations to run to try
-  !                   to get better load balancing.
-  real(realType) :: loadImbalance
-  logical        :: splitBlocks
-  integer(kind=inttype) :: loadBalanceIter, partitionlikenproc
+    !
+    !       Input parameters which are related to the parallelization.
+    !
+    use precision
+    implicit none
+    save
+
+    ! loadImbalance: Allowable load imbalance
+    ! splitBlocks:   Whether or not blocks can be split to improve
+    !                the load balance.
+    ! loadBalanceIter: The number of refinment iterations to run to try
+    !                   to get better load balancing.
+    real(realType) :: loadImbalance
+    logical        :: splitBlocks
+    integer(kind=inttype) :: loadBalanceIter, partitionlikenproc
 end module inputParallel
 
 !      ==================================================================
 
 module inputPhysics
-  !
-  !       Input parameters which are related to the physics of the flow,
-  !       like governing equations, mode of the equations, turbulence
-  !       model and free stream conditions.
-  !
-  use precision
-  implicit none
-  save
-
-  !       Definition of the physics input parameters.
-  !
-  ! equations:           Governing equations to be solved.
-  ! equationMode:        Mode of the equations, steady, unsteady
-  !                      or timeSpectral.
-  ! flowType:            Type of flow, internal or external.
-  ! cpModel:             Which cp model, constant or function of
-  !                      temperature via curve fits.
-  ! turbModel:           Turbulence model.
-  ! turbProd:            Which production term to use in the transport
-  !                      turbulence equations, strain, vorticity or
-  !                      kato-launder.
-  ! rvfN:                Determines the version of v2f turbulence model.
-  ! rvfB:                Whether or not to solve v2f with an
-  !                      upper bound.
-  ! useQCR:              Determines if the QCR term is applied to the shear tensor computation
-  !                      when considering turbulence model effects
-  ! useRotationSA:       Determines if we will use rotation correction (SA model only)
-  ! useft2SA:            Determines if we will use the ft2 term (SA model only)
-  ! useRoughSA:          Whether or not to use rough version of SA (BC values are set via CGNS-Grid)
-  ! wallFunctions:       Whether or not to use wall functions.
-  ! wallDistanceNeeded:  Whether or not the wall distance is needed
-  !                      for the turbulence model in a RANS problem.
-  ! Mach:                Free stream Mach number.
-  ! MachCoef:            Mach number used to compute coefficients;
-  !                      only relevant for translating geometries.
-  ! MachGrid:            Mach number of the Mesh. Used in stability
-  !                      derivative calculations. Specified as the
-  !                      negative of the desired freestream Mach number.
-  !                      When this option is set, set Mach = 0.0...
-  ! velDirFreestream(3): Direction of the free-stream velocity.
-  !                      Internally this vector is scaled to a unit
-  !                      vector, so there is no need to specify a
-  !                      unit vector. Specifying this vector solves
-  !                      the problem of angle of attack and yaw angle
-  !                      definition as well as the direction of the
-  !                      axis (e.g. y- or z-axis in spanwise direction).
-  ! liftDirection(3):    Direction vector for the lift.
-  ! dragDirection(3):    Direction vector for the drag.
-  ! gammaConstant:       Constant specific heat ratio.
-  ! RGasDim:             Gas constant in S.I. units.
-  ! Prandtl:             Prandtl number.
-  ! PrandtlTurb:         Turbulent prandtl number.
-  ! pklim:               Limiter for the production of k, the production
-  !                      is limited to pklim times the destruction.
-  ! wallOffset:          Offset from the wall when wall functions
-  !                      are used.
-  ! eddyVisInfRatio:     Free stream value of the eddy viscosity.
-  ! turbIntensityInf:    Free stream value of the turbulent intensity.
-  ! surfaceRef:          Reference area for the force and moments
-  !                      computation.
-  ! lengthRef:           Reference length for the moments computation.
-  ! pointRef(3):         Moment reference point.
-  ! pointRefEC(3):       Elastic center. Bending moment refernce point
-  ! SSuthDim:            Sutherlands law temperature (SI Units)
-  ! muSuthDim:           Reference viscosity at reference temperature for Sutherlands law (SI Units)
-  ! TSuthDim:            Reference temperature for Sutherlands law (SI Units)
-  ! momentAxis(3,2)      Axis about which to calculate a moment, provided as 2 points in 3-D
-  ! cavitationnumber     Negative Cp value that triggers the traditional
-  !                      step-function based cavitation sensor.
-  ! cpmin_rho            The rho parameter used with the KS-based cavitation sensor.
-  ! cpmin_family         The cpmin for a given surface family that does not use
-  !                      KS-aggregation, but rather an exact min computation.
-
-
-  integer(kind=intType) :: equations, equationMode, flowType
-  integer(kind=intType) :: turbModel, cpModel, turbProd
-  integer(kind=intType) :: rvfN
-  logical               :: rvfB
-  logical               :: useQCR, useRotationSA, useft2SA
-
-  logical :: wallFunctions, wallDistanceNeeded
-
-  real(kind=realType) :: alpha, beta
-  integer(kind=intType) :: liftIndex
-  real(kind=realType) :: Mach, MachCoef, MachGrid
-  real(kind=realType) :: Reynolds, ReynoldsLength
-  real(kind=realType) :: gammaConstant, RGasDim
-  real(kind=realType) :: Prandtl, PrandtlTurb, pklim, wallOffset, wallDistCutoff
-  real(kind=realType) :: eddyVisInfRatio, turbIntensityInf
-  real(kind=realType) :: surfaceRef, lengthRef
-  real(kind=realType), dimension(3) :: velDirFreestream
-  real(kind=realType), dimension(3) :: liftDirection
-  real(kind=realType), dimension(3) :: dragDirection
-  real(kind=realType), dimension(3) :: pointRef
-  real(kind=realType), dimension(3,2) :: momentAxis
-  real(kind=realType) :: SSuthDim, muSuthDim, TSuthDim
-  real(kind=realType) :: cavitationnumber
-  logical             :: useRoughSA
-  real(kind=realType) :: cpmin_rho
-  real(kind=realType), dimension(:), allocatable :: cpmin_family
+    !
+    !       Input parameters which are related to the physics of the flow,
+    !       like governing equations, mode of the equations, turbulence
+    !       model and free stream conditions.
+    !
+    use precision
+    implicit none
+    save
+
+    !       Definition of the physics input parameters.
+    !
+    ! equations:           Governing equations to be solved.
+    ! equationMode:        Mode of the equations, steady, unsteady
+    !                      or timeSpectral.
+    ! flowType:            Type of flow, internal or external.
+    ! cpModel:             Which cp model, constant or function of
+    !                      temperature via curve fits.
+    ! turbModel:           Turbulence model.
+    ! turbProd:            Which production term to use in the transport
+    !                      turbulence equations, strain, vorticity or
+    !                      kato-launder.
+    ! rvfN:                Determines the version of v2f turbulence model.
+    ! rvfB:                Whether or not to solve v2f with an
+    !                      upper bound.
+    ! useQCR:              Determines if the QCR term is applied to the shear tensor computation
+    !                      when considering turbulence model effects
+    ! useRotationSA:       Determines if we will use rotation correction (SA model only)
+    ! useft2SA:            Determines if we will use the ft2 term (SA model only)
+    ! useRoughSA:          Whether or not to use rough version of SA (BC values are set via CGNS-Grid)
+    ! wallFunctions:       Whether or not to use wall functions.
+    ! wallDistanceNeeded:  Whether or not the wall distance is needed
+    !                      for the turbulence model in a RANS problem.
+    ! Mach:                Free stream Mach number.
+    ! MachCoef:            Mach number used to compute coefficients;
+    !                      only relevant for translating geometries.
+    ! MachGrid:            Mach number of the Mesh. Used in stability
+    !                      derivative calculations. Specified as the
+    !                      negative of the desired freestream Mach number.
+    !                      When this option is set, set Mach = 0.0...
+    ! velDirFreestream(3): Direction of the free-stream velocity.
+    !                      Internally this vector is scaled to a unit
+    !                      vector, so there is no need to specify a
+    !                      unit vector. Specifying this vector solves
+    !                      the problem of angle of attack and yaw angle
+    !                      definition as well as the direction of the
+    !                      axis (e.g. y- or z-axis in spanwise direction).
+    ! liftDirection(3):    Direction vector for the lift.
+    ! dragDirection(3):    Direction vector for the drag.
+    ! gammaConstant:       Constant specific heat ratio.
+    ! RGasDim:             Gas constant in S.I. units.
+    ! Prandtl:             Prandtl number.
+    ! PrandtlTurb:         Turbulent prandtl number.
+    ! pklim:               Limiter for the production of k, the production
+    !                      is limited to pklim times the destruction.
+    ! wallOffset:          Offset from the wall when wall functions
+    !                      are used.
+    ! eddyVisInfRatio:     Free stream value of the eddy viscosity.
+    ! turbIntensityInf:    Free stream value of the turbulent intensity.
+    ! surfaceRef:          Reference area for the force and moments
+    !                      computation.
+    ! lengthRef:           Reference length for the moments computation.
+    ! pointRef(3):         Moment reference point.
+    ! pointRefEC(3):       Elastic center. Bending moment refernce point
+    ! SSuthDim:            Sutherlands law temperature (SI Units)
+    ! muSuthDim:           Reference viscosity at reference temperature for Sutherlands law (SI Units)
+    ! TSuthDim:            Reference temperature for Sutherlands law (SI Units)
+    ! momentAxis(3,2)      Axis about which to calculate a moment, provided as 2 points in 3-D
+    ! cavitationnumber     Negative Cp value that triggers the traditional
+    !                      step-function based cavitation sensor.
+    ! cpmin_rho            The rho parameter used with the KS-based cavitation sensor.
+    ! cpmin_family         The cpmin for a given surface family that does not use
+    !                      KS-aggregation, but rather an exact min computation.
+
+    integer(kind=intType) :: equations, equationMode, flowType
+    integer(kind=intType) :: turbModel, cpModel, turbProd
+    integer(kind=intType) :: rvfN
+    logical               :: rvfB
+    logical               :: useQCR, useRotationSA, useft2SA
+
+    logical :: wallFunctions, wallDistanceNeeded
+
+    real(kind=realType) :: alpha, beta
+    integer(kind=intType) :: liftIndex
+    real(kind=realType) :: Mach, MachCoef, MachGrid
+    real(kind=realType) :: Reynolds, ReynoldsLength
+    real(kind=realType) :: gammaConstant, RGasDim
+    real(kind=realType) :: Prandtl, PrandtlTurb, pklim, wallOffset, wallDistCutoff
+    real(kind=realType) :: eddyVisInfRatio, turbIntensityInf
+    real(kind=realType) :: surfaceRef, lengthRef
+    real(kind=realType), dimension(3) :: velDirFreestream
+    real(kind=realType), dimension(3) :: liftDirection
+    real(kind=realType), dimension(3) :: dragDirection
+    real(kind=realType), dimension(3) :: pointRef
+    real(kind=realType), dimension(3, 2) :: momentAxis
+    real(kind=realType) :: SSuthDim, muSuthDim, TSuthDim
+    real(kind=realType) :: cavitationnumber
+    logical             :: useRoughSA
+    real(kind=realType) :: cpmin_rho
+    real(kind=realType), dimension(:), allocatable :: cpmin_family
 
 #ifndef USE_TAPENADE
-  real(kind=realType) :: alphad, betad
-  real(kind=realType), dimension(3) :: velDirFreestreamd, velDirFreeStreamb
-  real(kind=realType), dimension(3) :: liftDirectiond, liftDirectionb
-  real(kind=realType), dimension(3) :: dragDirectiond, dragDirectionb
-  real(kind=realType), dimension(3) :: pointRefd, pointRefb
-  real(kind=realType), dimension(3,2) :: momentAxisd, momentAxisb
-  real(kind=realType) :: Machd, MachCoefd, MachGridd
-  real(kind=realType) :: reynoldsd, reynoldslengthd
-  real(kind=realType) :: gammaconstantd
-  real(kind=realType) :: surfaceRefd, lengthRefd
-  real(kind=realType) :: rgasdimd
-  real(kind=realType) :: Prandtlb, PrandtlTurbb
+    real(kind=realType) :: alphad, betad
+    real(kind=realType), dimension(3) :: velDirFreestreamd, velDirFreeStreamb
+    real(kind=realType), dimension(3) :: liftDirectiond, liftDirectionb
+    real(kind=realType), dimension(3) :: dragDirectiond, dragDirectionb
+    real(kind=realType), dimension(3) :: pointRefd, pointRefb
+    real(kind=realType), dimension(3, 2) :: momentAxisd, momentAxisb
+    real(kind=realType) :: Machd, MachCoefd, MachGridd
+    real(kind=realType) :: reynoldsd, reynoldslengthd
+    real(kind=realType) :: gammaconstantd
+    real(kind=realType) :: surfaceRefd, lengthRefd
+    real(kind=realType) :: rgasdimd
+    real(kind=realType) :: Prandtlb, PrandtlTurbb
 #endif
 
-  real(kind=realType), dimension(3) :: pointRefEC
+    real(kind=realType), dimension(3) :: pointRefEC
 
-  ! Return forces as tractions instead of forces:
-  logical :: forcesAsTractions
+    ! Return forces as tractions instead of forces:
+    logical :: forcesAsTractions
 
 end module inputPhysics
 
 !      ==================================================================
 
 module inputTimeSpectral
-  !
-  !       Input parameters for time spectral problems.
-  !
-  use precision
-  implicit none
-  save
-
-  ! nTimeIntervalsSpectral: Number of time instances used.
-
-  integer(kind=intType) :: nTimeIntervalsSpectral
-
-  ! dscalar(:,:,:): Matrix for the time derivatices of scalar
-  !                 quantities; different for every section to
-  !                 allow for different periodic angles.
-  !                 The second and third dimension equal the
-  !                 number of time intervals.
-  ! dvector(:,:,:): Matrices for the time derivatives of vector
-  !                 quantities; different for every section to
-  !                 allow for different periodic angles and for
-  !                 sector periodicity.
-  !                 The second and third dimension equal 3 times
-  !                 the number of time intervals.
-
-  real(kind=realType), dimension(:,:,:), allocatable :: dscalar
-  real(kind=realType), dimension(:,:,:), allocatable :: dvector
-
-  ! writeUnsteadyRestartSpectral: Whether or not a restart file
-  !                               must be written, which is
-  !                               capable to do a restart in
-  !                               unsteady mode.
-  ! dtUnsteadyRestartSpectral:    The corresponding time step.
-
-
-  real(kind=realType) :: dtUnsteadyRestartSpectral
-  logical ::             writeUnsteadyRestartSpectral
-
-  ! writeUnsteadyVolSpectral:  Whether or not the corresponding
-  !                            unsteady volume solution files
-  !                            must be written after the
-  !                            computation.
-  ! writeUnsteadySurfSpectral: Idem for the surface solution
-  !                            files.
-  ! nUnsteadySolSpectral:      The corresponding number of
-  !                            unsteady solutions to be created.
-
-  integer(kind=intType) :: nUnsteadySolSpectral
-  logical ::               writeUnsteadyVolSpectral
-  logical ::               writeUnsteadySurfSpectral
-
-  ! rotMatrixSpectral(:,3,3):  The corresponding rotation matrices
-  !                            for the velocity. No rotation
-  !                            point is needed, because only the
-  !                            velocities need to be transformed.
-  !                            The matrix stored is the one used
-  !                            when the upper bound of the mode
-  !                            number is exceeded; for the lower
-  !                            bound the inverse (== transpose)
-  !                            must be used. The 1st dimension
-  !                            is the number of sections.
-
-  real(kind=realType), dimension(:,:,:), allocatable :: &
-       rotMatrixSpectral
-  logical :: useTSInterpolatedGridVelocity
-
-  real(kind=realType) :: omegaFourier
+    !
+    !       Input parameters for time spectral problems.
+    !
+    use precision
+    implicit none
+    save
+
+    ! nTimeIntervalsSpectral: Number of time instances used.
+
+    integer(kind=intType) :: nTimeIntervalsSpectral
+
+    ! dscalar(:,:,:): Matrix for the time derivatices of scalar
+    !                 quantities; different for every section to
+    !                 allow for different periodic angles.
+    !                 The second and third dimension equal the
+    !                 number of time intervals.
+    ! dvector(:,:,:): Matrices for the time derivatives of vector
+    !                 quantities; different for every section to
+    !                 allow for different periodic angles and for
+    !                 sector periodicity.
+    !                 The second and third dimension equal 3 times
+    !                 the number of time intervals.
+
+    real(kind=realType), dimension(:, :, :), allocatable :: dscalar
+    real(kind=realType), dimension(:, :, :), allocatable :: dvector
+
+    ! writeUnsteadyRestartSpectral: Whether or not a restart file
+    !                               must be written, which is
+    !                               capable to do a restart in
+    !                               unsteady mode.
+    ! dtUnsteadyRestartSpectral:    The corresponding time step.
+
+    real(kind=realType) :: dtUnsteadyRestartSpectral
+    logical ::             writeUnsteadyRestartSpectral
+
+    ! writeUnsteadyVolSpectral:  Whether or not the corresponding
+    !                            unsteady volume solution files
+    !                            must be written after the
+    !                            computation.
+    ! writeUnsteadySurfSpectral: Idem for the surface solution
+    !                            files.
+    ! nUnsteadySolSpectral:      The corresponding number of
+    !                            unsteady solutions to be created.
+
+    integer(kind=intType) :: nUnsteadySolSpectral
+    logical ::               writeUnsteadyVolSpectral
+    logical ::               writeUnsteadySurfSpectral
+
+    ! rotMatrixSpectral(:,3,3):  The corresponding rotation matrices
+    !                            for the velocity. No rotation
+    !                            point is needed, because only the
+    !                            velocities need to be transformed.
+    !                            The matrix stored is the one used
+    !                            when the upper bound of the mode
+    !                            number is exceeded; for the lower
+    !                            bound the inverse (== transpose)
+    !                            must be used. The 1st dimension
+    !                            is the number of sections.
+
+    real(kind=realType), dimension(:, :, :), allocatable :: &
+        rotMatrixSpectral
+    logical :: useTSInterpolatedGridVelocity
+
+    real(kind=realType) :: omegaFourier
 
 end module inputTimeSpectral
 
 !      ==================================================================
 
 module inputUnsteady
-  !
-  !       Input parameters for unsteady problems.
-  !
-  use constants
-  implicit none
-  save
-
-
-  ! timeIntegrationScheme: Time integration scheme to be used for
-  !                        unsteady problems. Possibilities are
-  !                        Backward difference schemes, explicit
-  !                        RungeKutta schemes and implicit
-  !                        RungeKutta schemes.
-
-  integer(kind=intType) :: timeIntegrationScheme
-
-  ! timeAccuracy:     Accuracy of the time integrator for unsteady
-  !                   problems. Possibilities are 1st, 2nd and 3rd
-  !                   order accurate schemes.
-  ! nTimeStepsCoarse: Number of time steps on the coarse mesh;
-  !                   only relevant for periodic problems for
-  !                   which a full mg can be used.
-  ! nTimeStepsFine:   Number of time steps on the fine mesh.
-  ! deltaT:           Physical time step in seconds.
-
-  integer(kind=intType) :: timeAccuracy
-  integer(kind=intType) :: nTimeStepsCoarse, nTimeStepsFine
-
-  real(kind=realType) :: deltaT
-
-  ! nRKStagesUnsteady:   Number of stages used in the Runge-Kutta
-  !                      schemes for a time accurate computation.
-  ! betaRKUnsteady(:,:): Matrix with the Runge-Kutta coefficients
-  !                      for the residuals.
-  ! gammaRKUnsteady(:):  Vector with the time portion of the
-  !                      Runge-Kutta stages.
-
-  integer(kind=intType) :: nRKStagesUnsteady
-
-  real(kind=realType), dimension(:,:), allocatable :: betaRKUnsteady
-  real(kind=realType), dimension(:),   allocatable :: gammaRKUnsteady
-
-  ! nOldGridRead: Number of old grid levels read from the grid
-  !               files. Needed only for a consistent restart
-  !               on the deforming meshes.
-
-  integer(kind=intType) :: nOldGridRead
-
-  ! useALE: Use the deforming mesh ale formuation.
-  logical :: useALE
-
-  ! updateWallDistanceUnsteady: Whether or not to update the wall
-  !                             distance in unsteady mode. For a
-  !                             RANS simulation on a changing grid
-  !                             this should be done if the
-  !                             turbulence model requires the wall
-  !                             distance. However, the user may
-  !                             overrule this if he thinks it is
-  !                             not necessary.
-
-  logical :: updateWallDistanceUnsteady
+    !
+    !       Input parameters for unsteady problems.
+    !
+    use constants
+    implicit none
+    save
+
+    ! timeIntegrationScheme: Time integration scheme to be used for
+    !                        unsteady problems. Possibilities are
+    !                        Backward difference schemes, explicit
+    !                        RungeKutta schemes and implicit
+    !                        RungeKutta schemes.
+
+    integer(kind=intType) :: timeIntegrationScheme
+
+    ! timeAccuracy:     Accuracy of the time integrator for unsteady
+    !                   problems. Possibilities are 1st, 2nd and 3rd
+    !                   order accurate schemes.
+    ! nTimeStepsCoarse: Number of time steps on the coarse mesh;
+    !                   only relevant for periodic problems for
+    !                   which a full mg can be used.
+    ! nTimeStepsFine:   Number of time steps on the fine mesh.
+    ! deltaT:           Physical time step in seconds.
+
+    integer(kind=intType) :: timeAccuracy
+    integer(kind=intType) :: nTimeStepsCoarse, nTimeStepsFine
+
+    real(kind=realType) :: deltaT
+
+    ! nRKStagesUnsteady:   Number of stages used in the Runge-Kutta
+    !                      schemes for a time accurate computation.
+    ! betaRKUnsteady(:,:): Matrix with the Runge-Kutta coefficients
+    !                      for the residuals.
+    ! gammaRKUnsteady(:):  Vector with the time portion of the
+    !                      Runge-Kutta stages.
+
+    integer(kind=intType) :: nRKStagesUnsteady
+
+    real(kind=realType), dimension(:, :), allocatable :: betaRKUnsteady
+    real(kind=realType), dimension(:), allocatable :: gammaRKUnsteady
+
+    ! nOldGridRead: Number of old grid levels read from the grid
+    !               files. Needed only for a consistent restart
+    !               on the deforming meshes.
+
+    integer(kind=intType) :: nOldGridRead
+
+    ! useALE: Use the deforming mesh ale formuation.
+    logical :: useALE
+
+    ! updateWallDistanceUnsteady: Whether or not to update the wall
+    !                             distance in unsteady mode. For a
+    !                             RANS simulation on a changing grid
+    !                             this should be done if the
+    !                             turbulence model requires the wall
+    !                             distance. However, the user may
+    !                             overrule this if he thinks it is
+    !                             not necessary.
+
+    logical :: updateWallDistanceUnsteady
 
 end module inputUnsteady
 
 module inputADjoint
-  !
-  !       Definition of some parameters ADjoint.
-  !       The actual values of this parameters are arbitrary;
-  !       in the code always the symbolic names are (should be) used.
-  !
-  use constants
-  implicit none
-  save
-  !
-  !       Definition of the adjoint input parameters.
-  !
-
-  ! Monitor      : Whether or not to enable the monitor for the KSP
-  !                contexts.
-  ! ApproxPC     : Whether or not to use the approximate jacobian
-  !                preconditioner
-  ! ADPC         : Whether or not to use AD for preconditioning
-  ! viscPC       : Whether or not to keep cross derivative terms
-  !                in viscous preconditioner.
-  ! FrozenTurbulence: Whether to use frozen turbulence assumption
-  ! useDiagTSPC   : Whether or not the off time instance terms are
-  !                 included in the TS preconditioner.
-  logical :: setMonitor, ApproxPC, useDiagTSPC
-  logical :: frozenTurbulence, viscPC, ADPC
-
-  ! ADjointSolverType: Type of linear solver for the ADjoint
-  ! PreCondType      : Type of Preconditioner to use
-  ! Matrix Ordering  : Type of matrix ordering to use
-  ! LocalPCType      : Type of preconditioner to use on subdomains
-  character(maxStringLen) :: ADjointSolverType
-  character(maxStringLen) :: GMRESOrthogType
-  character(maxStringLen) :: PreCondType
-  character(maxStringLen) :: matrixOrdering
-  character(maxStringLen) :: adjointPCSide
-  character(maxStringLen) :: LocalPCType
-
-  ! FillLevel     : Number of levels of fill for the ILU local PC
-  ! Overlap       : Amount of overlap in the ASM PC
-  integer(kind=intType):: FillLevel, Overlap
-
-  ! adjRelTol     : Relative tolerance
-  ! adjAbsTol     : Absolute tolerance
-  ! adjDivTol     : Relative tolerance increase to divergence
-  ! adjMaxIter    : Maximum number of iterations
-  ! adjRestart    : Maximum number of steps before restart
-  !                 It has a high impact on the required memory!
-  ! adjMonStep    : Convergence monitor step
-
-  real(kind=alwaysRealType)    :: adjRelTol
-  real(kind=alwaysRealType)    :: adjAbsTol
-  real(kind=alwaysRealType)    :: adjRelTolRel
-  real(kind=alwaysRealType)    :: adjDivTol
-  real(kind=realType) :: adjMaxL2Dev
-  integer(kind=intType)  :: adjMaxIter
-  integer(kind=intType)  :: adjRestart
-  integer(kind=intType)  :: adjMonStep
-
-  ! outerPCIts : Number of iterations to run for on (global) preconditioner
-  ! intterPCIts : Number of iterations to run on local preconditioner
-  integer(kind=intType) :: outerPreConIts
-  integer(kind=intType) :: innerPreConIts
-
-  logical :: printTiming
-  integer(kind=intType) :: subKSPSubspaceSize
-  integer(kind=intType) :: applyAdjointPCSubSpaceSize
-
-  ! firstRun     :  Whether this is the first run of the TGT debugger
-  ! verifyState  :  Whether to verify state
-  ! verifySpatial:  Whether to verify spatial
-  ! verifyExtra  :  Whether to verify extra
-  logical :: firstRun
-  logical :: verifyState
-  logical :: verifySpatial
-  logical :: verifyExtra
-
-  ! Logicals for specifiying if we are using matrix-free forms of
-  ! drdw
-  logical :: useMatrixFreedRdw
+    !
+    !       Definition of some parameters ADjoint.
+    !       The actual values of this parameters are arbitrary;
+    !       in the code always the symbolic names are (should be) used.
+    !
+    use constants
+    implicit none
+    save
+    !
+    !       Definition of the adjoint input parameters.
+    !
+
+    ! Monitor      : Whether or not to enable the monitor for the KSP
+    !                contexts.
+    ! ApproxPC     : Whether or not to use the approximate jacobian
+    !                preconditioner
+    ! ADPC         : Whether or not to use AD for preconditioning
+    ! viscPC       : Whether or not to keep cross derivative terms
+    !                in viscous preconditioner.
+    ! FrozenTurbulence: Whether to use frozen turbulence assumption
+    ! useDiagTSPC   : Whether or not the off time instance terms are
+    !                 included in the TS preconditioner.
+    logical :: setMonitor, ApproxPC, useDiagTSPC
+    logical :: frozenTurbulence, viscPC, ADPC
+
+    ! ADjointSolverType: Type of linear solver for the ADjoint
+    ! PreCondType      : Type of Preconditioner to use
+    ! Matrix Ordering  : Type of matrix ordering to use
+    ! LocalPCType      : Type of preconditioner to use on subdomains
+    character(maxStringLen) :: ADjointSolverType
+    character(maxStringLen) :: GMRESOrthogType
+    character(maxStringLen) :: PreCondType
+    character(maxStringLen) :: matrixOrdering
+    character(maxStringLen) :: adjointPCSide
+    character(maxStringLen) :: LocalPCType
+
+    ! FillLevel     : Number of levels of fill for the ILU local PC
+    ! Overlap       : Amount of overlap in the ASM PC
+    integer(kind=intType):: FillLevel, Overlap
+
+    ! adjRelTol     : Relative tolerance
+    ! adjAbsTol     : Absolute tolerance
+    ! adjDivTol     : Relative tolerance increase to divergence
+    ! adjMaxIter    : Maximum number of iterations
+    ! adjRestart    : Maximum number of steps before restart
+    !                 It has a high impact on the required memory!
+    ! adjMonStep    : Convergence monitor step
+
+    real(kind=alwaysRealType)    :: adjRelTol
+    real(kind=alwaysRealType)    :: adjAbsTol
+    real(kind=alwaysRealType)    :: adjRelTolRel
+    real(kind=alwaysRealType)    :: adjDivTol
+    real(kind=realType) :: adjMaxL2Dev
+    integer(kind=intType)  :: adjMaxIter
+    integer(kind=intType)  :: adjRestart
+    integer(kind=intType)  :: adjMonStep
+
+    ! outerPCIts : Number of iterations to run for on (global) preconditioner
+    ! intterPCIts : Number of iterations to run on local preconditioner
+    integer(kind=intType) :: outerPreConIts
+    integer(kind=intType) :: innerPreConIts
+
+    logical :: printTiming
+    integer(kind=intType) :: subKSPSubspaceSize
+    integer(kind=intType) :: applyAdjointPCSubSpaceSize
+
+    ! firstRun     :  Whether this is the first run of the TGT debugger
+    ! verifyState  :  Whether to verify state
+    ! verifySpatial:  Whether to verify spatial
+    ! verifyExtra  :  Whether to verify extra
+    logical :: firstRun
+    logical :: verifyState
+    logical :: verifySpatial
+    logical :: verifyExtra
+
+    ! Logicals for specifiying if we are using matrix-free forms of
+    ! drdw
+    logical :: useMatrixFreedRdw
 
 end module inputADjoint
 
 module inputTSStabDeriv
-  !
-  !       Definition of some parameters for Time Spectral stability
-  !       derivatives.
-  !       The actual values of this parameters are arbitrary;
-  !       in the code always the symbolic names are (should be) used.
-  !
-
-  ! TSStability : Whether or not the TS stability derivatives should
-  !               be computed
-  logical:: TSStability,TSAlphaMode,TSBetaMode,TSpMode,&
-       TSqMode,TSrMode,TSAltitudeMode,TSMachMode
-  ! TSAlphaFollowing : Whether or not alpha follows the body in p,q,r mode
-  logical:: TSAlphaFollowing
-
-  ! useWindAxis : whether to rotate around the wind axis or the body
-  !               axis...
-  logical:: useWindAxis
+    !
+    !       Definition of some parameters for Time Spectral stability
+    !       derivatives.
+    !       The actual values of this parameters are arbitrary;
+    !       in the code always the symbolic names are (should be) used.
+    !
+
+    ! TSStability : Whether or not the TS stability derivatives should
+    !               be computed
+    logical:: TSStability, TSAlphaMode, TSBetaMode, TSpMode, &
+              TSqMode, TSrMode, TSAltitudeMode, TSMachMode
+    ! TSAlphaFollowing : Whether or not alpha follows the body in p,q,r mode
+    logical:: TSAlphaFollowing
+
+    ! useWindAxis : whether to rotate around the wind axis or the body
+    !               axis...
+    logical:: useWindAxis
 
 end module inputTSStabDeriv
 
-
 module inputOverset
-  use constants
-  implicit none
-  save
-  !
-  !       Definition of parameters for the overset implementation
-  !
-  logical :: useoversetLoadBalance=.True.
-  real(kind=realType) :: overlapFactor=0.9
-  real(kind=realType) :: nearWallDist=0.1
-  real(kind=realType) :: oversetProjTol=1e-12
-  real(kind=realType) :: backgroundVolScale = 1.0
-  logical :: debugZipper=.False.
-  integer(kind=intType) :: oversetUpdateMode
-  real(kind=realType) :: selfZipCutoff
-  ! nRefine: number of connectivity loops to run
-  integer(kind=intType)::nRefine
-  integer(kind=intType)::nFloodIter
-  logical :: useZipperMesh
-  logical :: useOversetWallScaling
-  logical :: oversetDebugPrint
+    use constants
+    implicit none
+    save
+    !
+    !       Definition of parameters for the overset implementation
+    !
+    logical :: useoversetLoadBalance = .True.
+    real(kind=realType) :: overlapFactor = 0.9
+    real(kind=realType) :: nearWallDist = 0.1
+    real(kind=realType) :: oversetProjTol = 1e-12
+    real(kind=realType) :: backgroundVolScale = 1.0
+    logical :: debugZipper = .False.
+    integer(kind=intType) :: oversetUpdateMode
+    real(kind=realType) :: selfZipCutoff
+    ! nRefine: number of connectivity loops to run
+    integer(kind=intType)::nRefine
+    integer(kind=intType)::nFloodIter
+    logical :: useZipperMesh
+    logical :: useOversetWallScaling
+    logical :: oversetDebugPrint
 end module inputOverset
diff --git a/src/modules/paramTurb.F90 b/src/modules/paramTurb.F90
index 636323715..51a607929 100644
--- a/src/modules/paramTurb.F90
+++ b/src/modules/paramTurb.F90
@@ -1,85 +1,85 @@
-       module paramTurb
+module paramTurb
 !
 !       Module that contains the constants for the turbulence models
 !       as well as some global variables/parameters for the turbulent
 !       routines.
 !
-       use constants, only : realType, intType
-       implicit none
-       save
+    use constants, only: realType, intType
+    implicit none
+    save
 !
 !       Spalart-Allmaras constants.
 !
-       real(kind=realType), parameter :: rsaK   = 0.41_realType
-       real(kind=realType), parameter :: rsaCb1 = 0.1355_realType
-       real(kind=realType), parameter :: rsaCb2 = 0.622_realType
-       real(kind=realType), parameter :: rsaCb3 = 0.66666666667_realType
-       real(kind=realType), parameter :: rsaCv1 = 7.1_realType
-       real(kind=realType), parameter :: rsaCw1 = rsaCb1/(rsaK*rsaK) &
-                                                  + (1.+rsaCb2)/rsaCb3
-       real(kind=realType), parameter :: rsaCw2 = 0.3_realType
-       real(kind=realType), parameter :: rsaCw3 = 2.0_realType
-       real(kind=realType), parameter :: rsaCt1 = 1.0_realType
-       real(kind=realType), parameter :: rsaCt2 = 2.0_realType
-       real(kind=realType), parameter :: rsaCt3 = 1.2_realType
-       real(kind=realType), parameter :: rsaCt4 = 0.5_realType
-       real(kind=realType), parameter :: rsaCrot = 2.0_realType
-       real(kind=realType), parameter :: rsaCr1 = 0.5_realType
+    real(kind=realType), parameter :: rsaK = 0.41_realType
+    real(kind=realType), parameter :: rsaCb1 = 0.1355_realType
+    real(kind=realType), parameter :: rsaCb2 = 0.622_realType
+    real(kind=realType), parameter :: rsaCb3 = 0.66666666667_realType
+    real(kind=realType), parameter :: rsaCv1 = 7.1_realType
+    real(kind=realType), parameter :: rsaCw1 = rsaCb1/(rsaK*rsaK) &
+                                      + (1.+rsaCb2)/rsaCb3
+    real(kind=realType), parameter :: rsaCw2 = 0.3_realType
+    real(kind=realType), parameter :: rsaCw3 = 2.0_realType
+    real(kind=realType), parameter :: rsaCt1 = 1.0_realType
+    real(kind=realType), parameter :: rsaCt2 = 2.0_realType
+    real(kind=realType), parameter :: rsaCt3 = 1.2_realType
+    real(kind=realType), parameter :: rsaCt4 = 0.5_realType
+    real(kind=realType), parameter :: rsaCrot = 2.0_realType
+    real(kind=realType), parameter :: rsaCr1 = 0.5_realType
 
 !
 !       K-omega constants.
 !
-       real(kind=realType), parameter :: rkwK     = 0.41_realType
-       real(kind=realType), parameter :: rkwSigk1 = 0.5_realType
-       real(kind=realType), parameter :: rkwSigw1 = 0.5_realType
-       real(kind=realType), parameter :: rkwSigd1 = 0.5_realType
-       real(kind=realType), parameter :: rkwBeta1 = 0.0750_realType
-       real(kind=realType), parameter :: rkwBetas = 0.09_realType
+    real(kind=realType), parameter :: rkwK = 0.41_realType
+    real(kind=realType), parameter :: rkwSigk1 = 0.5_realType
+    real(kind=realType), parameter :: rkwSigw1 = 0.5_realType
+    real(kind=realType), parameter :: rkwSigd1 = 0.5_realType
+    real(kind=realType), parameter :: rkwBeta1 = 0.0750_realType
+    real(kind=realType), parameter :: rkwBetas = 0.09_realType
 !
 !       K-omega SST constants.
 !
-       real(kind=realType), parameter :: rSSTK     = 0.41_realType
-       real(kind=realType), parameter :: rSSTA1    = 0.31_realType
-       real(kind=realType), parameter :: rSSTBetas = 0.09_realType
+    real(kind=realType), parameter :: rSSTK = 0.41_realType
+    real(kind=realType), parameter :: rSSTA1 = 0.31_realType
+    real(kind=realType), parameter :: rSSTBetas = 0.09_realType
 
-       real(kind=realType), parameter :: rSSTSigk1 = 0.85_realType
-       real(kind=realType), parameter :: rSSTSigw1 = 0.5_realType
-       real(kind=realType), parameter :: rSSTBeta1 = 0.0750_realType
+    real(kind=realType), parameter :: rSSTSigk1 = 0.85_realType
+    real(kind=realType), parameter :: rSSTSigw1 = 0.5_realType
+    real(kind=realType), parameter :: rSSTBeta1 = 0.0750_realType
 
-       real(kind=realType), parameter :: rSSTSigk2 = 1.0_realType
-       real(kind=realType), parameter :: rSSTSigw2 = 0.856_realType
-       real(kind=realType), parameter :: rSSTBeta2 = 0.0828_realType
+    real(kind=realType), parameter :: rSSTSigk2 = 1.0_realType
+    real(kind=realType), parameter :: rSSTSigw2 = 0.856_realType
+    real(kind=realType), parameter :: rSSTBeta2 = 0.0828_realType
 !
 !       K-tau constants.
 !
-       real(kind=realType), parameter :: rktK     = 0.41_realType
-       real(kind=realType), parameter :: rktSigk1 = 0.5_realType
-       real(kind=realType), parameter :: rktSigt1 = 0.5_realType
-       real(kind=realType), parameter :: rktSigd1 = 0.5_realType
-       real(kind=realType), parameter :: rktBeta1 = 0.0750_realType
-       real(kind=realType), parameter :: rktBetas = 0.09_realType
+    real(kind=realType), parameter :: rktK = 0.41_realType
+    real(kind=realType), parameter :: rktSigk1 = 0.5_realType
+    real(kind=realType), parameter :: rktSigt1 = 0.5_realType
+    real(kind=realType), parameter :: rktSigd1 = 0.5_realType
+    real(kind=realType), parameter :: rktBeta1 = 0.0750_realType
+    real(kind=realType), parameter :: rktBetas = 0.09_realType
 !
 !       V2-f constants.
 !
-       real(kind=realType), parameter :: rvfC1    = 1.4_realType
-       real(kind=realType), parameter :: rvfC2    = 0.3_realType
-       real(kind=realType), parameter :: rvfBeta  = 1.9_realType
-       real(kind=realType), parameter :: rvfSigk1 = 1.0_realType
-       real(kind=realType), parameter :: rvfSige1 = 0.7692307692_realType
-       real(kind=realType), parameter :: rvfSigv1 = 1.00_realType
-       real(kind=realType), parameter :: rvfCn    = 70.0_realType
+    real(kind=realType), parameter :: rvfC1 = 1.4_realType
+    real(kind=realType), parameter :: rvfC2 = 0.3_realType
+    real(kind=realType), parameter :: rvfBeta = 1.9_realType
+    real(kind=realType), parameter :: rvfSigk1 = 1.0_realType
+    real(kind=realType), parameter :: rvfSige1 = 0.7692307692_realType
+    real(kind=realType), parameter :: rvfSigv1 = 1.00_realType
+    real(kind=realType), parameter :: rvfCn = 70.0_realType
 
-       real(kind=realType), parameter :: rvfN1Cmu = 0.190_realType
-       real(kind=realType), parameter :: rvfN1A   = 1.300_realType
-       real(kind=realType), parameter :: rvfN1B   = 0.250_realType
-       real(kind=realType), parameter :: rvfN1Cl  = 0.300_realType
-       real(kind=realType), parameter :: rvfN6Cmu = 0.220_realType
-       real(kind=realType), parameter :: rvfN6A   = 1.400_realType
-       real(kind=realType), parameter :: rvfN6B   = 0.045_realType
-       real(kind=realType), parameter :: rvfN6Cl  = 0.230_realType
+    real(kind=realType), parameter :: rvfN1Cmu = 0.190_realType
+    real(kind=realType), parameter :: rvfN1A = 1.300_realType
+    real(kind=realType), parameter :: rvfN1B = 0.250_realType
+    real(kind=realType), parameter :: rvfN1Cl = 0.300_realType
+    real(kind=realType), parameter :: rvfN6Cmu = 0.220_realType
+    real(kind=realType), parameter :: rvfN6A = 1.400_realType
+    real(kind=realType), parameter :: rvfN6B = 0.045_realType
+    real(kind=realType), parameter :: rvfN6Cl = 0.230_realType
 
-       real(kind=realType) :: rvfLimitK, rvfLimitE, rvfCl
-       real(kind=realType) :: rvfCmu
+    real(kind=realType) :: rvfLimitK, rvfLimitE, rvfCl
+    real(kind=realType) :: rvfCmu
 !
 !       Variables to store the parameters for the wall functions fits.
 !       As these variables depend on the turbulence model they are set
@@ -89,41 +89,41 @@ module paramTurb
 !       constructed such that both the function and the derivatives
 !       are continuous. Consequently cubic polynomials are used.
 !
-       ! nFit:               Number of intervals of the curve.
-       ! ypT(0:nFit):        y+ values at the interval boundaries.
-       ! reT(0:nFit):        Reynolds number at the interval
-       !                     boundaries, where the Reynolds number is
-       !                     defined with the local velocity and the
-       !                     wall distance.
-       ! up0(nFit):          Coefficient 0 in the fit for the
-       !                     nondimensional tangential velocity as a
-       !                     function of the Reynolds number.
-       ! up1(nFit):          Idem for coefficient 1.
-       ! up2(nFit):          Idem for coefficient 2.
-       ! up3(nFit):          Idem for coefficient 3.
-       ! tup0(nFit,nt1:nt2): Coefficient 0 in the fit for the
-       !                     nondimensional turbulence variables as a
-       !                     function of y+.
-       ! tup1(nFit,nt1:nt2): Idem for coefficient 1.
-       ! tup2(nFit,nt1:nt2): Idem for coefficient 2.
-       ! tup3(nFit,nt1:nt2): Idem for coefficient 3.
-       ! tuLogFit(nt1:nt2):  Whether or not the logarithm of the variable
-       !                     has been fitted.
+    ! nFit:               Number of intervals of the curve.
+    ! ypT(0:nFit):        y+ values at the interval boundaries.
+    ! reT(0:nFit):        Reynolds number at the interval
+    !                     boundaries, where the Reynolds number is
+    !                     defined with the local velocity and the
+    !                     wall distance.
+    ! up0(nFit):          Coefficient 0 in the fit for the
+    !                     nondimensional tangential velocity as a
+    !                     function of the Reynolds number.
+    ! up1(nFit):          Idem for coefficient 1.
+    ! up2(nFit):          Idem for coefficient 2.
+    ! up3(nFit):          Idem for coefficient 3.
+    ! tup0(nFit,nt1:nt2): Coefficient 0 in the fit for the
+    !                     nondimensional turbulence variables as a
+    !                     function of y+.
+    ! tup1(nFit,nt1:nt2): Idem for coefficient 1.
+    ! tup2(nFit,nt1:nt2): Idem for coefficient 2.
+    ! tup3(nFit,nt1:nt2): Idem for coefficient 3.
+    ! tuLogFit(nt1:nt2):  Whether or not the logarithm of the variable
+    !                     has been fitted.
 
-       integer(kind=intType) :: nFit
+    integer(kind=intType) :: nFit
 
-       real(kind=realType), dimension(:), allocatable :: ypT, reT
-       real(kind=realType), dimension(:), allocatable :: up0, up1
-       real(kind=realType), dimension(:), allocatable :: up2, up3
+    real(kind=realType), dimension(:), allocatable :: ypT, reT
+    real(kind=realType), dimension(:), allocatable :: up0, up1
+    real(kind=realType), dimension(:), allocatable :: up2, up3
 
-       real(kind=realType), dimension(:,:), allocatable :: tup0, tup1
-       real(kind=realType), dimension(:,:), allocatable :: tup2, tup3
+    real(kind=realType), dimension(:, :), allocatable :: tup0, tup1
+    real(kind=realType), dimension(:, :), allocatable :: tup2, tup3
 #ifndef USE_TAPENADE
-       real(kind=realType), dimension(:), allocatable :: ypTb, reTb
-       real(kind=realType), dimension(:), allocatable :: up0b, up1b
-       real(kind=realType), dimension(:), allocatable :: up2b, up3b
+    real(kind=realType), dimension(:), allocatable :: ypTb, reTb
+    real(kind=realType), dimension(:), allocatable :: up0b, up1b
+    real(kind=realType), dimension(:), allocatable :: up2b, up3b
 #endif
 
-       logical, dimension(:), allocatable :: tuLogFit
+    logical, dimension(:), allocatable :: tuLogFit
 
-       end module paramTurb
+end module paramTurb
diff --git a/src/modules/wallDistanceData.F90 b/src/modules/wallDistanceData.F90
index 2365fc689..a9d613a33 100644
--- a/src/modules/wallDistanceData.F90
+++ b/src/modules/wallDistanceData.F90
@@ -1,49 +1,48 @@
 module wallDistanceData
 
-  ! This module stores some additional data required for the fast wall
-  ! distance computations.
+    ! This module stores some additional data required for the fast wall
+    ! distance computations.
 
-  use constants
+    use constants
 #ifndef USE_TAPENADE
 #include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
+    use petsc
+    implicit none
 #endif
 
-  ! xVolume: flatten 1-D vector of all volume nodes for all
-  ! spectral instances. The dimension is the number of levels
+    ! xVolume: flatten 1-D vector of all volume nodes for all
+    ! spectral instances. The dimension is the number of levels
 
-  ! xSurf: flatten 1-D vector of the surface nodes for the faces
-  ! that individual processors require for doing its own wall
-  ! distance calculation
+    ! xSurf: flatten 1-D vector of the surface nodes for the faces
+    ! that individual processors require for doing its own wall
+    ! distance calculation
 
-  ! wallScatter: The vecScatter objects that select the nodes
-  ! from xVolume and desposit them into xSurf. This is the
-  ! forward operation. The reverse operation is used for the
-  ! sensitivities.
+    ! wallScatter: The vecScatter objects that select the nodes
+    ! from xVolume and desposit them into xSurf. This is the
+    ! forward operation. The reverse operation is used for the
+    ! sensitivities.
 
-  ! wallDistanceDataAllocated : Logical array keeping track of
-  ! whether or not the petsc data is allocated.
+    ! wallDistanceDataAllocated : Logical array keeping track of
+    ! whether or not the petsc data is allocated.
 
-  ! indicesForSPS: A simple derived type for keeping track of
-  ! indices while doing wall distance computation.
+    ! indicesForSPS: A simple derived type for keeping track of
+    ! indices while doing wall distance computation.
 
+    real(kind=realType), dimension(:), pointer :: xSurf
 
-  real(kind=realType), dimension(:), pointer :: xSurf
-
-  logical, dimension(:), allocatable  :: wallDistanceDataAllocated
-  logical, dimension(:), allocatable ::  updateWallAssociation
+    logical, dimension(:), allocatable  :: wallDistanceDataAllocated
+    logical, dimension(:), allocatable ::  updateWallAssociation
 
 #ifndef USE_TAPENADE
-  real(kind=realType), dimension(:), pointer :: xSurfd
-  Vec , dimension(:), allocatable :: xVolumeVec
-  Vec , dimension(:, :), allocatable :: xSurfVec
-  Vec , dimension(:), allocatable :: xSurfVecd
-  VecScatter, dimension(:, :), allocatable :: wallScatter
-  IS IS1, IS2
-  real(kind=realType), dimension(:), pointer :: xVolume
+    real(kind=realType), dimension(:), pointer :: xSurfd
+    Vec, dimension(:), allocatable :: xVolumeVec
+    Vec, dimension(:, :), allocatable :: xSurfVec
+    Vec, dimension(:), allocatable :: xSurfVecd
+    VecScatter, dimension(:, :), allocatable :: wallScatter
+    IS IS1, IS2
+    real(kind=realType), dimension(:), pointer :: xVolume
 #endif
 
-  integer(kind=intType), dimension(:,:), allocatable :: nCellBlockOffset
+    integer(kind=intType), dimension(:, :), allocatable :: nCellBlockOffset
 
 end module wallDistanceData
diff --git a/src/output/outputMod.F90 b/src/output/outputMod.F90
index 3b7006656..e1657a058 100644
--- a/src/output/outputMod.F90
+++ b/src/output/outputMod.F90
@@ -1,3864 +1,3853 @@
 module outputMod
-  !
-  !       This local module contains variables used when writing the
-  !       grid and solution files.
-  !
-  use constants, only : intType, maxStringLen
-  use su_cgns, only : cgsize_t
-  implicit none
-
-  ! nblocksCGNSblock(0:cgnsNDom): The number of local blocks per
-  !                               cgns block in cumulative storage
-  !                               format.
-  ! blocksCGNSblock(nDom):        The corresponding local block ID's.
-
-  integer(kind=intType), dimension(:), allocatable :: nblocksCGNSblock
-  integer(kind=intType), dimension(:), allocatable :: blocksCGNSblock
-
-  ! nDomPerProc(0:nProc):  The number of flow domains for each
-  !                        processor in cumulative storage format.
-  ! IDsBegOrAllDoms(4,..): The CGNS block numbers and the beginning
-  !                        indices for all of the flow domains on
-  !                        every processor.
-
-  integer(kind=intType), dimension(:),   allocatable :: nDomPerProc
-  integer(kind=intType), dimension(:,:), allocatable :: IDsBegOrAllDoms
-
-  ! nGridsToWrite:   Number of grid files to write.
-  ! nVolSolToWrite:  Number of volume solution files to write.
-  !                  For CGNS nVolSolToWrite == nGridsToWrite.
-  ! nSurfSolToWrite: Number of surface solution files to write.
-
-  integer(kind=intType) :: nGridsToWrite
-  integer(kind=intType) :: nVolSolToWrite
-  integer(kind=intType) :: nSurfSolToWrite
-
-  ! gridFileNames(nGridsToWrite):      Names of the grid files to
-  !                                    write.
-  ! volSolFileNames(nVolSolToWrite):   Names of the volume solution
-  !                                    files to write.
-  ! surfSolFileNames(nSurfSolToWrite): Names of the surface solution
-  !                                    files to write.
-  ! fileIDs(nFilesToWrite):            Identifiers for the files to
-  !                                    write. As the grids, volume
-  !                                    solution and surface solution
-  !                                    files are written one after
-  !                                    the other, only one set is
-  !                                    needed.
-  ! cgnsBases(nFilesToWrite):          The CGNS base IDs of the
-  !                                    files.
-
-  character(len=maxStringLen), dimension(:), allocatable :: &
-       gridFileNames, volSolFileNames, surfSolFileNames
-
-  integer, dimension(:), allocatable :: fileIDs
-  integer, dimension(:), allocatable :: cgnsBases
-  integer, dimension(:), allocatable :: cgnsIsoSurfBases
-  integer, dimension(:), allocatable :: cgnsLiftDistBases
-
-  ! useLinksInCGNS:     Whether or not to use links in CGNS between
-  !                     the grid and volume solution files. If not,
-  !                     the grid and solution are written in the
-  !                     same file.
-
-  logical :: useLinksInCGNS
-
-contains
-
-  subroutine numberOfIsoSurfVariables(nIsoSolVar)
-    !
-    !       numberOfVolSolVariables determines the number of variables
-    !       to be written on the isosurface. These are similar to the
-    !       volume variables.
-    !
-    use flowVarRefState
-    use inputPhysics
-    use extraOutput
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(out) :: nIsoSolVar
-
-    nIsoSolvar   = 0
-
-    ! Check whether or not some additional solution variables must
-    ! be written.
-    if (isoWriteRho)           nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteVx )           nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteVy )           nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteVz )           nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteP )            nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteTurb )         nIsoSolvar  = nIsoSolvar + (nw - nwf)
-    if( isoWriteMx )           nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteMy )           nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteMz )           nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteRVx )          nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteRVy )          nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteRVz )          nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteRhoe )         nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteTemp )         nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteCp )           nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteMach )         nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteRMach )        nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteMachTurb )     nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteEddyVis )      nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteRatioEddyVis ) nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteDist )         nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteVort )         nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteVortx )        nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteVorty )        nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteVortz )        nIsoSolvar = nIsoSolvar + 1
-    if( isoWritePtotloss )     nIsoSolvar = nIsoSolvar + 1
-    if( isoWriteShock )        nIsoSolVar = nIsoSolVar + 1
-    if (isoWriteFilteredShock) nIsoSolVar = nIsoSolVar + 1
-
-    ! Check the discrete variables.
-
-    if( isoWriteResRho )  nIsoSolvar  = nIsoSolvar + 1
-    if( isoWriteResMom )  nIsoSolvar  = nIsoSolvar + 3
-    if( isoWriteResRhoe ) nIsoSolvar  = nIsoSolvar + 1
-    if( isoWriteResTurb ) nIsoSolvar  = nIsoSolvar   &
-         + (nw - nwf)
-
-    if( isoWriteBlank ) nIsoSolvar  = nIsoSolvar + 1
-
-  end subroutine numberOfIsoSurfVariables
-
-  subroutine numberOfSurfSolVariables(nSolVar)
-    !
-    !       numberOfSurfSolVariables determines the number of surface
-    !       variables to be written to the surface solution file.
-    !
-    use precision
-    use extraOutput
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(out) :: nSolVar
-
-    ! Initialize the number of solution variables to zero.
-
-    nSolVar = 0
-
-    ! Determine the number of surface variables to be written.
-
-    if( surfWriteRho )      nSolVar = nSolVar +1
-    if( surfWriteP   )      nSolVar = nSolVar +1
-    if( surfWriteTemp )     nSolVar = nSolVar +1
-    if( surfWriteVx )       nSolVar = nSolVar +1
-    if( surfWriteVy )       nSolVar = nSolVar +1
-    if( surfWriteVz )       nSolVar = nSolVar +1
-    if( surfWriteRVx )      nSolVar = nSolVar +1
-    if( surfWriteRVy )      nSolVar = nSolVar +1
-    if( surfWriteRVz )      nSolVar = nSolVar +1
-    if( surfWriteCp )       nSolVar = nSolVar +1
-    if( surfWritePtotloss ) nSolVar = nSolVar +1
-    if( surfWriteMach )     nSolVar = nSolVar +1
-    if( surfWriteRMach )    nSolVar = nSolVar +1
-    if( surfWriteCf )       nSolVar = nSolVar +1
-    if( surfWriteCh )       nSolVar = nSolVar +1
-    if( surfWriteYplus )    nSolVar = nSolVar +1
-    if( surfWriteCfx )      nSolVar = nSolVar +1
-    if( surfWriteCfy )      nSolVar = nSolVar +1
-    if( surfWriteCfz )      nSolVar = nSolVar +1
-    if( surfWriteBlank )    nSolVar = nSolVar +1
-    if( surfWriteSepSensor )      nSolVar = nSolVar +1
-    if( surfWriteCavitation )     nsolVar = nsolVar +1
-    if( surfWriteGC )             nsolVar = nsolVar +1
-
-  end subroutine numberOfSurfSolVariables
-
-  subroutine numberOfVolSolVariables(nVolSolvar, nVolDiscrVar)
-    !
-    !       numberOfVolSolVariables determines the number of volume
-    !       variables to be written to the solution file. A distinction is
-    !       made between solution variables and discrete variables. The
-    !       former discribes the actual solution, the latter is additional
-    !       info such as equation residuals.
-    !
-    use flowVarRefState
-    use inputPhysics
-    use extraOutput
-    implicit none
-    !
-    !      Subroutine arguments.
     !
-    integer(kind=intType), intent(out) :: nVolSolvar, nVolDiscrVar
-
-    ! Initialize the number of solution variables to the number of
-    ! independent variables and the number of discrete variables to 0.
-
-    nVolSolvar   = nw
-    nVolDiscrVar = 0
-
-    ! Check whether or not some additional solution variables must
-    ! be written.
-
-    if( volWriteMx )           nVolSolvar = nVolSolvar + 1
-    if( volWriteMy )           nVolSolvar = nVolSolvar + 1
-    if( volWriteMz )           nVolSolvar = nVolSolvar + 1
-    if( volWriteRVx )          nVolSolvar = nVolSolvar + 1
-    if( volWriteRVy )          nVolSolvar = nVolSolvar + 1
-    if( volWriteRVz )          nVolSolvar = nVolSolvar + 1
-    if( volWriteRhoe )         nVolSolvar = nVolSolvar + 1
-    if( volWriteTemp )         nVolSolvar = nVolSolvar + 1
-    if( volWriteCp )           nVolSolvar = nVolSolvar + 1
-    if( volWriteMach )         nVolSolvar = nVolSolvar + 1
-    if( volWriteRMach )        nVolSolvar = nVolSolvar + 1
-    if( volWriteMachTurb )     nVolSolvar = nVolSolvar + 1
-    if( volWriteEddyVis )      nVolSolvar = nVolSolvar + 1
-    if( volWriteRatioEddyVis ) nVolSolvar = nVolSolvar + 1
-    if( volWriteDist )         nVolSolvar = nVolSolvar + 1
-    if( volWriteVort )         nVolSolvar = nVolSolvar + 1
-    if( volWriteVortx )        nVolSolvar = nVolSolvar + 1
-    if( volWriteVorty )        nVolSolvar = nVolSolvar + 1
-    if( volWriteVortz )        nVolSolvar = nVolSolvar + 1
-    if( volWritePtotloss )     nVolSolvar = nVolSolvar + 1
-    if( volWriteShock )        nVolSolvar = nVolSolvar + 1
-    if( volWriteFilteredShock) nVolSolvar = nVolSolvar + 1
-    if( volWriteGC  )          nVolSolvar = nVolSolvar + 1
-    if( volWriteStatus )       nVolSolvar = nVolSolvar + 1
-    if( volWriteIntermittency ) nVolDiscrVar  = nVolDiscrVar + 1
-
-    ! Check the discrete variables.
-
-    if( volWriteResRho )  nVolDiscrVar  = nVolDiscrVar + 1
-    if( volWriteResMom )  nVolDiscrVar  = nVolDiscrVar + 3
-    if( volWriteResRhoe ) nVolDiscrVar  = nVolDiscrVar + 1
-    if( volWriteResTurb ) nVolDiscrVar  = nVolDiscrVar   &
-         + (nw - nwf)
-
-    if( volWriteBlank ) nVolDiscrVar  = nVolDiscrVar + 1
-
-    if( volwriteKs )    nVolDiscrVar  = nVolDiscrVar + 1
-
-
-  end subroutine numberOfVolSolVariables
-
-  subroutine copyDataBufSinglePrecision(val, buffer,      &
-       iBeg, jBeg, kBeg, &
-       iEnd, jEnd, kEnd, subRange)
+    !       This local module contains variables used when writing the
+    !       grid and solution files.
     !
-    !       copyDataBufSinglePrecision stores the given 1D buffer into the
-    !       subrange of the 3D single precision val array.
-    !
-    use precision
+    use constants, only: intType, maxStringLen
+    use su_cgns, only: cgsize_t
     implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: iBeg, jBeg, kBeg
-    integer(kind=intType), intent(in) :: iEnd, jEnd, kEnd
-    integer(kind=intType), dimension(3,2), intent(in) :: subRange
-
-    real(kind=realType), dimension(*), intent(in) :: buffer
-    real(kind=4), dimension(iBeg:iEnd,jBeg:jEnd,kBeg:kEnd), &
-         intent(inout) :: val
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ll
-
-    ! Copy the subrange into val.
-
-    ll = 0
-    do k=subRange(3,1), subRange(3,2)
-       do j=subRange(2,1), subRange(2,2)
-          do i=subRange(1,1), subRange(1,2)
-             ll = ll + 1
-             val(i,j,k) = real(buffer(ll), singleType)
-          enddo
-       enddo
-    enddo
-
-  end subroutine copyDataBufSinglePrecision
-
-  !      ==================================================================
-
-  subroutine copyDataBufDoublePrecision(val, buffer,      &
-       iBeg, jBeg, kBeg, &
-       iEnd, jEnd, kEnd, subRange)
-    !
-    !       copyDataBufDoublePrecision stores the given 1D buffer into the
-    !       subrange of the 3D double precision val array.
-    !
-    use precision
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: iBeg, jBeg, kBeg
-    integer(kind=intType), intent(in) :: iEnd, jEnd, kEnd
-    integer(kind=intType), dimension(3,2), intent(in) :: subRange
-
-    real(kind=realType), dimension(*), intent(in) :: buffer
-    real(kind=8), dimension(iBeg:iEnd,jBeg:jEnd,kBeg:kEnd), &
-         intent(inout) :: val
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ll
-
-    ! Copy the subrange into val.
-
-    ll = 0
-    do k=subRange(3,1), subRange(3,2)
-       do j=subRange(2,1), subRange(2,2)
-          do i=subRange(1,1), subRange(1,2)
-             ll = ll + 1
-             val(i,j,k) = real(buffer(ll), doubleType)
-          enddo
-       enddo
-    enddo
-
-  end subroutine copyDataBufDoublePrecision
-
-  subroutine volSolNames(solNames)
-    !
-    !       volSolNames sets the names for the volume variables to be
-    !       written to the volume solution file. Sids convention names are
-    !       used as much as possible.
-    !
-    use constants
-    use cgnsNames
-    use inputPhysics
-    use flowVarRefState
-    use extraOutput
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    character(len=*), dimension(*), intent(out) :: solNames
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-    !
-    ! First store the names of the independent flow variables.
-
-    solNames(1) = cgnsDensity
-    solNames(2) = cgnsVelx
-    solNames(3) = cgnsVely
-    solNames(4) = cgnsVelz
-    solNames(5) = cgnsPressure
-
-    ! The turbulent variables if the RANS equations are solved.
-    ! Note that these are the primitive variables and not the
-    ! conservative ones. The reason is that the sids conventions only
-    ! defines these names and not the conservative ones.
-
-    if(equations == RANSEquations) then
-
-       select case(turbModel)
-
-       case(spalartAllmaras, spalartAllmarasEdwards)
-          solNames(itu1) = cgnsTurbSaNu
-
-       case(komegaWilcox, komegaModified, menterSST)
-          solNames(itu1) = cgnsTurbK
-          solNames(itu2) = cgnsTurbOmega
-
-       case(ktau)
-          solNames(itu1) = cgnsTurbK
-          solNames(itu2) = cgnsTurbTau
-
-       case(v2f)
-          solNames(itu1) = cgnsTurbK
-          solNames(itu2) = cgnsTurbEpsilon
-          solNames(itu3) = cgnsTurbV2
-          solNames(itu4) = cgnsTurbF
-
-       end select
-
-    endif
-
-    ! Initialize nn to the number of independent variables.
-
-    nn = nw
-
-    ! Check the additional variables to be written.
-
-    if( volWriteMx )  then
-       nn = nn + 1
-       solNames(nn) = cgnsMomx
-    endif
-
-    if( volWriteMy )  then
-       nn = nn + 1
-       solNames(nn) = cgnsMomy
-    endif
-
-    if( volWriteMz )  then
-       nn = nn + 1
-       solNames(nn) = cgnsMomz
-    endif
-
-    if( volWriteRVx )  then
-       nn = nn + 1
-       solNames(nn) = cgnsRelVelx
-    endif
-
-    if( volWriteRVy )  then
-       nn = nn + 1
-       solNames(nn) = cgnsRelVely
-    endif
-
-    if( volWriteRVz )  then
-       nn = nn + 1
-       solNames(nn) = cgnsRelVelz
-    endif
-
-    if( volWriteRhoe ) then
-       nn = nn + 1
-       solNames(nn) = cgnsEnergy
-    endif
-
-    if( volWriteTemp ) then
-       nn = nn + 1
-       solNames(nn) = cgnsTemp
-    endif
-
-    if( volWriteCp ) then
-       nn = nn + 1
-       solNames(nn) = cgnsCp
-    endif
-
-    if( volWriteMach ) then
-       nn = nn + 1
-       solNames(nn) = cgnsMach
-    endif
-
-    if( volWriteRMach ) then
-       nn = nn + 1
-       solNames(nn) = cgnsRelMach
-    endif
-
-    if( volWriteMachTurb ) then
-       nn = nn + 1
-       solNames(nn) = cgnsMachTurb
-    endif
-
-    if( volWriteEddyVis ) then
-       nn = nn + 1
-       solNames(nn) = cgnsEddy
-    endif
-
-    if( volWriteRatioEddyVis ) then
-       nn = nn + 1
-       solNames(nn) = cgnsEddyRatio
-    endif
-
-    if( volWriteDist ) then
-       nn = nn + 1
-       solNames(nn) = cgNSWallDist
-    endif
-
-    if( volWriteVort ) then
-       nn = nn + 1
-       solNames(nn) = cgnsVortMagn
-    endif
-
-    if( volWriteVortx ) then
-       nn = nn + 1
-       solNames(nn) = cgnsVortx
-    endif
-
-    if( volWriteVorty ) then
-       nn = nn + 1
-       solNames(nn) = cgnsVorty
-    endif
-
-    if( volWriteVortz ) then
-       nn = nn + 1
-       solNames(nn) = cgnsVortz
-    endif
-
-    if( volWritePtotloss ) then
-       nn = nn + 1
-       solNames(nn) = cgnsPtotloss
-    endif
-
-    if( volWriteResRho ) then
-       nn = nn + 1
-       solNames(nn) = cgnsResRho
-    endif
-
-    if( volWriteResMom ) then
-       nn = nn + 1
-       solNames(nn) = cgnsResMomx
-
-       nn = nn + 1
-       solNames(nn) = cgnsResMomy
-
-       nn = nn + 1
-       solNames(nn) = cgnsResMomz
-    endif
-
-    if( volWriteResRhoe) then
-       nn = nn + 1
-       solNames(nn) = cgnsResRhoe
-    endif
-
-    if( volWriteResTurb ) then
-
-       select case(turbModel)
-
-       case(spalartAllmaras, spalartAllmarasEdwards)
-          nn = nn + 1
-          solNames(nn) = cgnsResNu
-
-       case(komegaWilcox, komegaModified, menterSST)
-          nn = nn + 1
-          solNames(nn) = cgnsResK
-
-          nn = nn + 1
-          solNames(nn) = cgnsResOmega
-
-       case(ktau)
-          nn = nn + 1
-          solNames(nn) = cgnsResK
-
-          nn = nn + 1
-          solNames(nn) = cgnsResTau
-
-       case(v2f)
-          nn = nn + 1
-          solNames(nn) = cgnsResK
-
-          nn = nn + 1
-          solNames(nn) = cgnsResEpsilon
 
-          nn = nn + 1
-          solNames(nn) = cgnsResV2
+    ! nblocksCGNSblock(0:cgnsNDom): The number of local blocks per
+    !                               cgns block in cumulative storage
+    !                               format.
+    ! blocksCGNSblock(nDom):        The corresponding local block ID's.
+
+    integer(kind=intType), dimension(:), allocatable :: nblocksCGNSblock
+    integer(kind=intType), dimension(:), allocatable :: blocksCGNSblock
+
+    ! nDomPerProc(0:nProc):  The number of flow domains for each
+    !                        processor in cumulative storage format.
+    ! IDsBegOrAllDoms(4,..): The CGNS block numbers and the beginning
+    !                        indices for all of the flow domains on
+    !                        every processor.
+
+    integer(kind=intType), dimension(:), allocatable :: nDomPerProc
+    integer(kind=intType), dimension(:, :), allocatable :: IDsBegOrAllDoms
+
+    ! nGridsToWrite:   Number of grid files to write.
+    ! nVolSolToWrite:  Number of volume solution files to write.
+    !                  For CGNS nVolSolToWrite == nGridsToWrite.
+    ! nSurfSolToWrite: Number of surface solution files to write.
+
+    integer(kind=intType) :: nGridsToWrite
+    integer(kind=intType) :: nVolSolToWrite
+    integer(kind=intType) :: nSurfSolToWrite
+
+    ! gridFileNames(nGridsToWrite):      Names of the grid files to
+    !                                    write.
+    ! volSolFileNames(nVolSolToWrite):   Names of the volume solution
+    !                                    files to write.
+    ! surfSolFileNames(nSurfSolToWrite): Names of the surface solution
+    !                                    files to write.
+    ! fileIDs(nFilesToWrite):            Identifiers for the files to
+    !                                    write. As the grids, volume
+    !                                    solution and surface solution
+    !                                    files are written one after
+    !                                    the other, only one set is
+    !                                    needed.
+    ! cgnsBases(nFilesToWrite):          The CGNS base IDs of the
+    !                                    files.
+
+    character(len=maxStringLen), dimension(:), allocatable :: &
+        gridFileNames, volSolFileNames, surfSolFileNames
+
+    integer, dimension(:), allocatable :: fileIDs
+    integer, dimension(:), allocatable :: cgnsBases
+    integer, dimension(:), allocatable :: cgnsIsoSurfBases
+    integer, dimension(:), allocatable :: cgnsLiftDistBases
+
+    ! useLinksInCGNS:     Whether or not to use links in CGNS between
+    !                     the grid and volume solution files. If not,
+    !                     the grid and solution are written in the
+    !                     same file.
+
+    logical :: useLinksInCGNS
 
-          nn = nn + 1
-          solNames(nn) = cgnsResF
-
-       end select
-
-    endif
-
-    if ( volWriteShock) then
-       nn = nn + 1
-       solNames(nn) = cgnsShock
-    end if
-
-    if ( volWriteFilteredShock) then
-       nn = nn + 1
-       solNames(nn) = cgnsFilteredShock
-    end if
-
-    if( volWriteBlank) then
-       nn = nn + 1
-       solNames(nn) = cgnsBlank
-    endif
-
-    if( volWriteGC) then
-       nn = nn + 1
-       solNames(nn) = cgnsGC
-    endif
-
-    if( volWriteStatus) then
-       nn = nn + 1
-       solNames(nn) = cgnsStatus
-    endif
-
-    if( volWriteIntermittency) then
-       nn = nn + 1
-       solNames(nn) = cgnsIntermittency
-    endif
-
-    if( volWriteKs) then
-       nn = nn + 1
-       solNames(nn) = cgnsSandGrainRoughness
-    endif
-
-  end subroutine volSolNames
-
-  subroutine surfSolNames(solNames)
-    !
-    !       surfSolNames sets the names for the surface variables to be
-    !       written to the surface solution file. Sids convention names
-    !       are used as much as possible.
-    !
-    use cgnsNames
-    use extraOutput
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    character(len=*), dimension(*), intent(out) :: solNames
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    ! Initialize nn to 0.
-
-    nn = 0
-
-    ! Check which surfaces variables must be written and set
-    ! solNames accordingly.
-
-    if( surfWriteRho ) then
-       nn = nn + 1
-       solNames(nn) = cgnsDensity
-    endif
-
-    if( surfWriteP   ) then
-       nn = nn + 1
-       solNames(nn) = cgnsPressure
-    endif
-
-    if( surfWriteTemp ) then
-       nn = nn + 1
-       solNames(nn) = cgnsTemp
-    endif
-
-    if( surfWriteVx ) then
-       nn = nn + 1
-       solNames(nn) = cgnsVelx
-    endif
-
-    if( surfWriteVy ) then
-       nn = nn + 1
-       solNames(nn) = cgnsVely
-    endif
-
-    if( surfWriteVz ) then
-       nn = nn + 1
-       solNames(nn) = cgnsVelz
-    endif
-
-    if( surfWriteRVx ) then
-       nn = nn + 1
-       solNames(nn) = cgnsRelVelx
-    endif
-
-    if( surfWriteRVy ) then
-       nn = nn + 1
-       solNames(nn) = cgnsRelVely
-    endif
-
-    if( surfWriteRVz ) then
-       nn = nn + 1
-       solNames(nn) = cgnsRelVelz
-    endif
-
-    if( surfWriteCp ) then
-       nn = nn + 1
-       solNames(nn) = cgnsCp
-    endif
-
-    if( surfWritePtotloss ) then
-       nn = nn + 1
-       solNames(nn) = cgnsPtotloss
-    endif
-
-    if( surfWriteMach ) then
-       nn = nn + 1
-       solNames(nn) = cgnsMach
-    endif
-
-    if( surfWriteRMach ) then
-       nn = nn + 1
-       solNames(nn) = cgnsRelMach
-    endif
-
-    if( surfWriteCf ) then
-       nn = nn + 1
-       solNames(nn) = cgnsSkinFmag
-    endif
-
-    if( surfWriteCh ) then
-       nn = nn + 1
-       solNames(nn) = cgnsStanton
-    endif
-
-    if( surfWriteYplus ) then
-       nn = nn + 1
-       solNames(nn) = cgnsYplus
-    endif
-
-    if( surfWriteCfx )  then
-       nn = nn + 1
-       solNames(nn) = cgnsSkinFx
-    endif
-
-    if( surfWriteCfy ) then
-       nn = nn + 1
-       solNames(nn) = cgnsSkinFy
-    endif
-
-    if( surfWriteCfz ) then
-       nn = nn + 1
-       solNames(nn) = cgnsSkinFz
-    endif
-
-    if( surfWriteBlank ) then
-       nn = nn + 1
-       solNames(nn) = cgnsBlank
-    endif
-
-    if (surfWriteSepSensor) then
-       nn = nn + 1
-       solNames(nn) = cgnsSepSensor
-    end if
-
-    if (surfWriteCavitation) then
-       nn = nn + 1
-       solNames(nn) = cgnsCavitation
-    end if
-
-    if (surfWriteAxisMoment) then
-       nn = nn + 1
-       solNames(nn) = cgnsAxisMoment
-    end if
-
-    if (surfWriteGC) then
-       nn = nn + 1
-       solNames(nn) = cgnsGC
-    end if
-
-  end subroutine surfSolNames
-
-  subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
-       iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
-    !
-    !       StoreSolInBuffer stores the given range of the variable
-    !       indicated by solName in IOVar and copies it into buffer if
-    !       desired. It is assumed that the variables in blockPointers
-    !       already point to the correct block.
-    !
-    use constants
-    use blockPointers
-    use cgnsGrid
-    use cgnsNames
-    use flowVarRefState
-    use inputPhysics
-    use IOModule
-    use flowUtils, only : computePTot
-    use utils, only : terminate
-    use oversetData, only : oversetPresent
-    use inputIO, only : laminarToTurbulent
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: iBeg, iEnd, jBeg, jEnd
-    integer(kind=intType), intent(in) :: kBeg, kEnd
-
-    real(kind=realType), dimension(*), intent(out) :: buffer
-    character(len=*), intent(in)                   :: solName
+contains
 
-    logical, intent(in) :: copyInBuffer
-    !
-    !      Local parameters
-    !
-    real(kind=realType), parameter :: plim   = 0.001_realType
-    real(kind=realType), parameter :: rholim = 0.001_realType
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii, jj, kk, nn
-
-    real(kind=realType) :: uuy, uuz, vvx, vvz, wwx, wwy, tmp
-    real(kind=realType) :: vortx, vorty, vortz, a2, ptotInf, ptot
-    real(kind=realType) :: a, UovA(3), gradP(3)
-
-    real(kind=realType), dimension(:,:,:,:), pointer :: wIO
-
-    ! Set the pointer to the correct entry of IOVar. I'm cheating a
-    ! bit here, because I know that only memory has been allocated
-    ! for the first solution ID of IOVar.
-
-    wIO => IOVar(nbkLocal,1)%w
-
-    ! Determine the variable to be stored, compute it and store
-    ! it in the 1D array buffer.
-
-    select case(solName)
-
-    case (cgnsDensity)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,irho)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsMomx)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,irho)*w(i,j,k,ivx)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsMomy)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,irho)*w(i,j,k,ivy)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsMomz)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,irho)*w(i,j,k,ivz)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsEnergy)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,irhoE)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsTurbSaNu,cgnsTurbK)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,itu1)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsTurbOmega,cgnsTurbTau,cgnsTurbEpsilon)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,itu2)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsTurbV2)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,itu3)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsTurbF)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,itu4)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsVelx)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,ivx)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsVely)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,ivy)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsVelz)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,ivz)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsRelVelx)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,ivx)-s(i,j,k,1)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsRelVely)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,ivy)-s(i,j,k,2)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsRelVelz)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = w(i,j,k,ivz)-s(i,j,k,3)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsPressure)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = p(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsTemp)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = p(i,j,k)/(RGas*w(i,j,k,irho))
-             enddo
-          enddo
-       enddo
-
-    case (cgnsCp)
-       tmp = two/(gammaInf*pInf*MachCoef*MachCoef)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = tmp*(p(i,j,k) - pInf)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsMach)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                a2  = gamma(i,j,k)*max(p(i,j,k),plim) &
-                     / max(w(i,j,k,irho),rholim)
-                tmp = (w(i,j,k,ivx)**2 + w(i,j,k,ivy)**2 &
-                     +  w(i,j,k,ivz)**2)/a2
-                wIO(i,j,k,1) = sqrt(max(zero,tmp))
-             enddo
-          enddo
-       enddo
-
-    case (cgnsRelMach)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                a2  = gamma(i,j,k)*max(p(i,j,k),plim) &
-                     / max(w(i,j,k,irho),rholim)
-                tmp = ((w(i,j,k,ivx)-s(i,j,k,1))**2 +&
-                     (w(i,j,k,ivy)-s(i,j,k,2))**2 &
-                     +(w(i,j,k,ivz)-s(i,j,k,3))**2)/a2
-                wIO(i,j,k,1) = sqrt(max(zero,tmp))
-             enddo
-          enddo
-       enddo
-
-
-    case (cgnsMachTurb)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                tmp = w(i,j,k,irho)*w(i,j,k,itu1) &
-                     / (gamma(i,j,k)*max(p(i,j,k),plim))
-                wIO(i,j,k,1) = sqrt(max(zero,tmp))
-             enddo
-          enddo
-       enddo
-
-    case (cgnsEddy)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = rev(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsEddyRatio)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = rev(i,j,k)/rlv(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgNSWallDist)
-       do k=kBeg,kEnd
-          kk = max(2_intType,k); kk = min(kl,kk)
-          do j=jBeg,jEnd
-             jj = max(2_intType,j); jj = min(jl,jj)
-             do i=iBeg,iEnd
-                ii = max(2_intType,i); ii = min(il,ii)
-                wIO(i,j,k,1) = d2Wall(ii,jj,kk)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsVortMagn)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                tmp = half/vol(i,j,k)
-                uuy = si(i,  j,k,2)*w(i+1,j,k,ivx) &
-                     - si(i-1,j,k,2)*w(i-1,j,k,ivx) &
-                     + sj(i,j,  k,2)*w(i,j+1,k,ivx) &
-                     - sj(i,j-1,k,2)*w(i,j-1,k,ivx) &
-                     + sk(i,j,k,  2)*w(i,j,k+1,ivx) &
-                     - sk(i,j,k-1,2)*w(i,j,k-1,ivx)
-
-                uuz = si(i,  j,k,3)*w(i+1,j,k,ivx) &
-                     - si(i-1,j,k,3)*w(i-1,j,k,ivx) &
-                     + sj(i,j,  k,3)*w(i,j+1,k,ivx) &
-                     - sj(i,j-1,k,3)*w(i,j-1,k,ivx) &
-                     + sk(i,j,k,  3)*w(i,j,k+1,ivx) &
-                     - sk(i,j,k-1,3)*w(i,j,k-1,ivx)
-
-                vvx = si(i,  j,k,1)*w(i+1,j,k,ivy) &
-                     - si(i-1,j,k,1)*w(i-1,j,k,ivy) &
-                     + sj(i,j,  k,1)*w(i,j+1,k,ivy) &
-                     - sj(i,j-1,k,1)*w(i,j-1,k,ivy) &
-                     + sk(i,j,k,  1)*w(i,j,k+1,ivy) &
-                     - sk(i,j,k-1,1)*w(i,j,k-1,ivy)
-
-                vvz = si(i,  j,k,3)*w(i+1,j,k,ivy) &
-                     - si(i-1,j,k,3)*w(i-1,j,k,ivy) &
-                     + sj(i,j,  k,3)*w(i,j+1,k,ivy) &
-                     - sj(i,j-1,k,3)*w(i,j-1,k,ivy) &
-                     + sk(i,j,k,  3)*w(i,j,k+1,ivy) &
-                     - sk(i,j,k-1,3)*w(i,j,k-1,ivy)
-
-                wwx = si(i,  j,k,1)*w(i+1,j,k,ivz) &
-                     - si(i-1,j,k,1)*w(i-1,j,k,ivz) &
-                     + sj(i,j,  k,1)*w(i,j+1,k,ivz) &
-                     - sj(i,j-1,k,1)*w(i,j-1,k,ivz) &
-                     + sk(i,j,k,  1)*w(i,j,k+1,ivz) &
-                     - sk(i,j,k-1,1)*w(i,j,k-1,ivz)
-
-                wwy = si(i,  j,k,2)*w(i+1,j,k,ivz) &
-                     - si(i-1,j,k,2)*w(i-1,j,k,ivz) &
-                     + sj(i,j,  k,2)*w(i,j+1,k,ivz) &
-                     - sj(i,j-1,k,2)*w(i,j-1,k,ivz) &
-                     + sk(i,j,k,  2)*w(i,j,k+1,ivz) &
-                     - sk(i,j,k-1,2)*w(i,j,k-1,ivz)
-
-                vortx = wwy - vvz; vorty = uuz - wwx; vortz = vvx - uuy
-
-                wIO(i,j,k,1) = tmp*sqrt(vortx**2 + vorty**2 + vortz**2)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsVortx)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                tmp = half/vol(i,j,k)
-                vvz = si(i,  j,k,3)*w(i+1,j,k,ivy) &
-                     - si(i-1,j,k,3)*w(i-1,j,k,ivy) &
-                     + sj(i,j,  k,3)*w(i,j+1,k,ivy) &
-                     - sj(i,j-1,k,3)*w(i,j-1,k,ivy) &
-                     + sk(i,j,k,  3)*w(i,j,k+1,ivy) &
-                     - sk(i,j,k-1,3)*w(i,j,k-1,ivy)
-
-                wwy = si(i,  j,k,2)*w(i+1,j,k,ivz) &
-                     - si(i-1,j,k,2)*w(i-1,j,k,ivz) &
-                     + sj(i,j,  k,2)*w(i,j+1,k,ivz) &
-                     - sj(i,j-1,k,2)*w(i,j-1,k,ivz) &
-                     + sk(i,j,k,  2)*w(i,j,k+1,ivz) &
-                     - sk(i,j,k-1,2)*w(i,j,k-1,ivz)
-
-                wIO(i,j,k,1) = tmp*(wwy - vvz)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsVorty)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                tmp = half/vol(i,j,k)
-                uuz = si(i,  j,k,3)*w(i+1,j,k,ivx) &
-                     - si(i-1,j,k,3)*w(i-1,j,k,ivx) &
-                     + sj(i,j,  k,3)*w(i,j+1,k,ivx) &
-                     - sj(i,j-1,k,3)*w(i,j-1,k,ivx) &
-                     + sk(i,j,k,  3)*w(i,j,k+1,ivx) &
-                     - sk(i,j,k-1,3)*w(i,j,k-1,ivx)
-
-                wwx = si(i,  j,k,1)*w(i+1,j,k,ivz) &
-                     - si(i-1,j,k,1)*w(i-1,j,k,ivz) &
-                     + sj(i,j,  k,1)*w(i,j+1,k,ivz) &
-                     - sj(i,j-1,k,1)*w(i,j-1,k,ivz) &
-                     + sk(i,j,k,  1)*w(i,j,k+1,ivz) &
-                     - sk(i,j,k-1,1)*w(i,j,k-1,ivz)
-
-                wIO(i,j,k,1) = tmp*(uuz - wwx)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsVortz)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                tmp = half/vol(i,j,k)
-                uuy = si(i,  j,k,2)*w(i+1,j,k,ivx) &
-                     - si(i-1,j,k,2)*w(i-1,j,k,ivx) &
-                     + sj(i,j,  k,2)*w(i,j+1,k,ivx) &
-                     - sj(i,j-1,k,2)*w(i,j-1,k,ivx) &
-                     + sk(i,j,k,  2)*w(i,j,k+1,ivx) &
-                     - sk(i,j,k-1,2)*w(i,j,k-1,ivx)
-
-                vvx = si(i,  j,k,1)*w(i+1,j,k,ivy) &
-                     - si(i-1,j,k,1)*w(i-1,j,k,ivy) &
-                     + sj(i,j,  k,1)*w(i,j+1,k,ivy) &
-                     - sj(i,j-1,k,1)*w(i,j-1,k,ivy) &
-                     + sk(i,j,k,  1)*w(i,j,k+1,ivy) &
-                     - sk(i,j,k-1,1)*w(i,j,k-1,ivy)
-
-                wIO(i,j,k,1) = tmp*(vvx - uuy)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsPtotloss)
-
-       ! Compute the free stream total pressure.
-
-       call computePtot(rhoInf, uInf, zero, zero, &
-            pInf, ptotInf)
-       ptotInf = one/ptotInf
-
-       ! Loop over the cell centers and compute the
-       ! total pressure loss.
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                call computePtot(w(i,j,k,irho), w(i,j,k,ivx), &
-                     w(i,j,k,ivy),  w(i,j,k,ivz), &
-                     p(i,j,k),      ptot)
-
-                wIO(i,j,k,1) = one - ptot*ptotInf
-             enddo
-          enddo
-       enddo
-
-    case (cgnsResRho)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                !   wIO(i,j,k,1) = dw(i,j,k,irho)
-                wIO(i,j,k,1) = dw(i,j,k,irho)/vol(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsResMomx)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                !   wIO(i,j,k,1) = dw(i,j,k,imx)
-                wIO(i,j,k,1) = dw(i,j,k,imx)/vol(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsResMomy)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                !   wIO(i,j,k,1) = dw(i,j,k,imy)
-                wIO(i,j,k,1) = dw(i,j,k,imy)/vol(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsResMomz)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                !   wIO(i,j,k,1) = dw(i,j,k,imz)
-                wIO(i,j,k,1) = dw(i,j,k,imz)/vol(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsResRhoE)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                !   wIO(i,j,k,1) = dw(i,j,k,irhoE)
-                wIO(i,j,k,1) = dw(i,j,k,irhoE)/vol(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsResNu,cgnsResK)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                !   wIO(i,j,k,1) = dw(i,j,k,itu1)
-                wIO(i,j,k,1) = dw(i,j,k,itu1)/vol(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsResOmega,cgnsResTau,cgnsResEpsilon)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                !   wIO(i,j,k,1) = dw(i,j,k,itu2)
-                wIO(i,j,k,1) = dw(i,j,k,itu2)/vol(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsResV2)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                !   wIO(i,j,k,1) = dw(i,j,k,itu3)
-                wIO(i,j,k,1) = dw(i,j,k,itu3)/vol(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsResF)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                !   wIO(i,j,k,1) = dw(i,j,k,itu4)
-                wIO(i,j,k,1) = dw(i,j,k,itu4)/vol(i,j,k)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsBlank)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = real(iblank(i,j,k),realType)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsGC)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = real(globalcell(i,j,k),realType)
-             enddo
-          enddo
-       enddo
-
-    case (cgnsstatus)
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                if (oversetPresent) then
-                   wIO(i,j,k,1) = real(status(i, j, k))
-                else
-                   wIO(i,j,k,1) = 0
-                end if
-             enddo
-          enddo
-       enddo
-
-    case (cgnsintermittency)
-       if(laminartoturbulent)then
-          do k=kBeg,kEnd
-             kk = max(2_intType,k); kk = min(kl,kk)
-             do j=jBeg,jEnd
-                jj = max(2_intType,j); jj = min(jl,jj)
-                do i=iBeg,iEnd
-                   ii = max(2_intType,i); ii = min(il,ii)
-                    wIO(i,j,k,1) = intermittency(ii,jj,kk)
-                 enddo
-              enddo
-           enddo
-        endif
-
-    case (cgnsShock)
-
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-
-                ! Here we compute U/a <dot> grad P / ||grad P||
-                ! Whre U is the velocity vector, a is the speed of
-                ! sound and P is the pressure.
-
-                ! U / a
-                a  = sqrt(gamma(i,j,k)*max(p(i,j,k),plim) &
-                     / max(w(i,j,k,irho),rholim))
-                UovA = (/w(i,j,k,ivx), w(i,j,k,ivy), w(i,j,k,ivz)/)/a
-
-                ! grad P / ||grad P||
-
-                gradP(1) = si(i,  j,k,1)*P(i+1,j,k) &
-                     - si(i-1,j,k,1)*P(i-1,j,k) &
-                     + sj(i,j,  k,1)*P(i,j+1,k) &
-                     - sj(i,j-1,k,1)*P(i,j-1,k) &
-                     + sk(i,j,k,  1)*P(i,j,k+1) &
-                     - sk(i,j,k-1,1)*P(i,j,k-1)
-
-                gradP(2) = si(i,  j,k,2)*P(i+1,j,k) &
-                     - si(i-1,j,k,2)*P(i-1,j,k) &
-                     + sj(i,j,  k,2)*P(i,j+1,k) &
-                     - sj(i,j-1,k,2)*P(i,j-1,k) &
-                     + sk(i,j,k,  2)*P(i,j,k+1) &
-                     - sk(i,j,k-1,2)*P(i,j,k-1)
-
-                gradP(3) = si(i,  j,k,3)*P(i+1,j,k) &
-                     - si(i-1,j,k,3)*P(i-1,j,k) &
-                     + sj(i,j,  k,3)*P(i,j+1,k) &
-                     - sj(i,j-1,k,3)*P(i,j-1,k) &
-                     + sk(i,j,k,  3)*P(i,j,k+1) &
-                     - sk(i,j,k-1,3)*P(i,j,k-1)
-
-                gradP = gradP / sqrt(gradP(1)**2 + gradP(2)**2 + gradP(3)**2 )
-                ! Dot product
-                wIO(i,j,k,1) = UovA(1)*gradP(1) + UovA(2)*gradP(2) + UovA(3)*gradP(3)
-             end do
-          end do
-       end do
-
-    case  (cgnsSandGrainRoughness)
-       ! It is only possible to write this when it was allocated in the first place
-       ! (useRoughSA = True) but this has been check in 'inputParamRoutines'
-       ! allready
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                wIO(i,j,k,1) = real(ks(i,j,k),realType)
-             enddo
-          enddo
-       enddo
-
-
-
-    case default
-       call terminate("storeSolInBuffer", &
-            "This should not happen")
-
-    end select
-
-    ! Copy the data in the 1D buffer, if desired.
-
-    if( copyInBuffer ) then
-       nn = 0
-       do k=kBeg,kEnd
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
+    subroutine numberOfIsoSurfVariables(nIsoSolVar)
+        !
+        !       numberOfVolSolVariables determines the number of variables
+        !       to be written on the isosurface. These are similar to the
+        !       volume variables.
+        !
+        use flowVarRefState
+        use inputPhysics
+        use extraOutput
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(out) :: nIsoSolVar
+
+        nIsoSolvar = 0
+
+        ! Check whether or not some additional solution variables must
+        ! be written.
+        if (isoWriteRho) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteVx) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteVy) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteVz) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteP) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteTurb) nIsoSolvar = nIsoSolvar + (nw - nwf)
+        if (isoWriteMx) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteMy) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteMz) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteRVx) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteRVy) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteRVz) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteRhoe) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteTemp) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteCp) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteMach) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteRMach) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteMachTurb) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteEddyVis) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteRatioEddyVis) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteDist) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteVort) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteVortx) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteVorty) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteVortz) nIsoSolvar = nIsoSolvar + 1
+        if (isoWritePtotloss) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteShock) nIsoSolVar = nIsoSolVar + 1
+        if (isoWriteFilteredShock) nIsoSolVar = nIsoSolVar + 1
+
+        ! Check the discrete variables.
+
+        if (isoWriteResRho) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteResMom) nIsoSolvar = nIsoSolvar + 3
+        if (isoWriteResRhoe) nIsoSolvar = nIsoSolvar + 1
+        if (isoWriteResTurb) nIsoSolvar = nIsoSolvar &
+                                          + (nw - nwf)
+
+        if (isoWriteBlank) nIsoSolvar = nIsoSolvar + 1
+
+    end subroutine numberOfIsoSurfVariables
+
+    subroutine numberOfSurfSolVariables(nSolVar)
+        !
+        !       numberOfSurfSolVariables determines the number of surface
+        !       variables to be written to the surface solution file.
+        !
+        use precision
+        use extraOutput
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(out) :: nSolVar
+
+        ! Initialize the number of solution variables to zero.
+
+        nSolVar = 0
+
+        ! Determine the number of surface variables to be written.
+
+        if (surfWriteRho) nSolVar = nSolVar + 1
+        if (surfWriteP) nSolVar = nSolVar + 1
+        if (surfWriteTemp) nSolVar = nSolVar + 1
+        if (surfWriteVx) nSolVar = nSolVar + 1
+        if (surfWriteVy) nSolVar = nSolVar + 1
+        if (surfWriteVz) nSolVar = nSolVar + 1
+        if (surfWriteRVx) nSolVar = nSolVar + 1
+        if (surfWriteRVy) nSolVar = nSolVar + 1
+        if (surfWriteRVz) nSolVar = nSolVar + 1
+        if (surfWriteCp) nSolVar = nSolVar + 1
+        if (surfWritePtotloss) nSolVar = nSolVar + 1
+        if (surfWriteMach) nSolVar = nSolVar + 1
+        if (surfWriteRMach) nSolVar = nSolVar + 1
+        if (surfWriteCf) nSolVar = nSolVar + 1
+        if (surfWriteCh) nSolVar = nSolVar + 1
+        if (surfWriteYplus) nSolVar = nSolVar + 1
+        if (surfWriteCfx) nSolVar = nSolVar + 1
+        if (surfWriteCfy) nSolVar = nSolVar + 1
+        if (surfWriteCfz) nSolVar = nSolVar + 1
+        if (surfWriteBlank) nSolVar = nSolVar + 1
+        if (surfWriteSepSensor) nSolVar = nSolVar + 1
+        if (surfWriteCavitation) nsolVar = nsolVar + 1
+        if (surfWriteGC) nsolVar = nsolVar + 1
+
+    end subroutine numberOfSurfSolVariables
+
+    subroutine numberOfVolSolVariables(nVolSolvar, nVolDiscrVar)
+        !
+        !       numberOfVolSolVariables determines the number of volume
+        !       variables to be written to the solution file. A distinction is
+        !       made between solution variables and discrete variables. The
+        !       former discribes the actual solution, the latter is additional
+        !       info such as equation residuals.
+        !
+        use flowVarRefState
+        use inputPhysics
+        use extraOutput
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(out) :: nVolSolvar, nVolDiscrVar
+
+        ! Initialize the number of solution variables to the number of
+        ! independent variables and the number of discrete variables to 0.
+
+        nVolSolvar = nw
+        nVolDiscrVar = 0
+
+        ! Check whether or not some additional solution variables must
+        ! be written.
+
+        if (volWriteMx) nVolSolvar = nVolSolvar + 1
+        if (volWriteMy) nVolSolvar = nVolSolvar + 1
+        if (volWriteMz) nVolSolvar = nVolSolvar + 1
+        if (volWriteRVx) nVolSolvar = nVolSolvar + 1
+        if (volWriteRVy) nVolSolvar = nVolSolvar + 1
+        if (volWriteRVz) nVolSolvar = nVolSolvar + 1
+        if (volWriteRhoe) nVolSolvar = nVolSolvar + 1
+        if (volWriteTemp) nVolSolvar = nVolSolvar + 1
+        if (volWriteCp) nVolSolvar = nVolSolvar + 1
+        if (volWriteMach) nVolSolvar = nVolSolvar + 1
+        if (volWriteRMach) nVolSolvar = nVolSolvar + 1
+        if (volWriteMachTurb) nVolSolvar = nVolSolvar + 1
+        if (volWriteEddyVis) nVolSolvar = nVolSolvar + 1
+        if (volWriteRatioEddyVis) nVolSolvar = nVolSolvar + 1
+        if (volWriteDist) nVolSolvar = nVolSolvar + 1
+        if (volWriteVort) nVolSolvar = nVolSolvar + 1
+        if (volWriteVortx) nVolSolvar = nVolSolvar + 1
+        if (volWriteVorty) nVolSolvar = nVolSolvar + 1
+        if (volWriteVortz) nVolSolvar = nVolSolvar + 1
+        if (volWritePtotloss) nVolSolvar = nVolSolvar + 1
+        if (volWriteShock) nVolSolvar = nVolSolvar + 1
+        if (volWriteFilteredShock) nVolSolvar = nVolSolvar + 1
+        if (volWriteGC) nVolSolvar = nVolSolvar + 1
+        if (volWriteStatus) nVolSolvar = nVolSolvar + 1
+        if (volWriteIntermittency) nVolDiscrVar = nVolDiscrVar + 1
+
+        ! Check the discrete variables.
+
+        if (volWriteResRho) nVolDiscrVar = nVolDiscrVar + 1
+        if (volWriteResMom) nVolDiscrVar = nVolDiscrVar + 3
+        if (volWriteResRhoe) nVolDiscrVar = nVolDiscrVar + 1
+        if (volWriteResTurb) nVolDiscrVar = nVolDiscrVar &
+                                            + (nw - nwf)
+
+        if (volWriteBlank) nVolDiscrVar = nVolDiscrVar + 1
+
+        if (volwriteKs) nVolDiscrVar = nVolDiscrVar + 1
+
+    end subroutine numberOfVolSolVariables
+
+    subroutine copyDataBufSinglePrecision(val, buffer, &
+                                          iBeg, jBeg, kBeg, &
+                                          iEnd, jEnd, kEnd, subRange)
+        !
+        !       copyDataBufSinglePrecision stores the given 1D buffer into the
+        !       subrange of the 3D single precision val array.
+        !
+        use precision
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: iBeg, jBeg, kBeg
+        integer(kind=intType), intent(in) :: iEnd, jEnd, kEnd
+        integer(kind=intType), dimension(3, 2), intent(in) :: subRange
+
+        real(kind=realType), dimension(*), intent(in) :: buffer
+        real(kind=4), dimension(iBeg:iEnd, jBeg:jEnd, kBeg:kEnd), &
+            intent(inout) :: val
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ll
+
+        ! Copy the subrange into val.
+
+        ll = 0
+        do k = subRange(3, 1), subRange(3, 2)
+            do j = subRange(2, 1), subRange(2, 2)
+                do i = subRange(1, 1), subRange(1, 2)
+                    ll = ll + 1
+                    val(i, j, k) = real(buffer(ll), singleType)
+                end do
+            end do
+        end do
+
+    end subroutine copyDataBufSinglePrecision
+
+    !      ==================================================================
+
+    subroutine copyDataBufDoublePrecision(val, buffer, &
+                                          iBeg, jBeg, kBeg, &
+                                          iEnd, jEnd, kEnd, subRange)
+        !
+        !       copyDataBufDoublePrecision stores the given 1D buffer into the
+        !       subrange of the 3D double precision val array.
+        !
+        use precision
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: iBeg, jBeg, kBeg
+        integer(kind=intType), intent(in) :: iEnd, jEnd, kEnd
+        integer(kind=intType), dimension(3, 2), intent(in) :: subRange
+
+        real(kind=realType), dimension(*), intent(in) :: buffer
+        real(kind=8), dimension(iBeg:iEnd, jBeg:jEnd, kBeg:kEnd), &
+            intent(inout) :: val
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ll
+
+        ! Copy the subrange into val.
+
+        ll = 0
+        do k = subRange(3, 1), subRange(3, 2)
+            do j = subRange(2, 1), subRange(2, 2)
+                do i = subRange(1, 1), subRange(1, 2)
+                    ll = ll + 1
+                    val(i, j, k) = real(buffer(ll), doubleType)
+                end do
+            end do
+        end do
+
+    end subroutine copyDataBufDoublePrecision
+
+    subroutine volSolNames(solNames)
+        !
+        !       volSolNames sets the names for the volume variables to be
+        !       written to the volume solution file. Sids convention names are
+        !       used as much as possible.
+        !
+        use constants
+        use cgnsNames
+        use inputPhysics
+        use flowVarRefState
+        use extraOutput
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        character(len=*), dimension(*), intent(out) :: solNames
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+        !
+        ! First store the names of the independent flow variables.
+
+        solNames(1) = cgnsDensity
+        solNames(2) = cgnsVelx
+        solNames(3) = cgnsVely
+        solNames(4) = cgnsVelz
+        solNames(5) = cgnsPressure
+
+        ! The turbulent variables if the RANS equations are solved.
+        ! Note that these are the primitive variables and not the
+        ! conservative ones. The reason is that the sids conventions only
+        ! defines these names and not the conservative ones.
+
+        if (equations == RANSEquations) then
+
+            select case (turbModel)
+
+            case (spalartAllmaras, spalartAllmarasEdwards)
+                solNames(itu1) = cgnsTurbSaNu
+
+            case (komegaWilcox, komegaModified, menterSST)
+                solNames(itu1) = cgnsTurbK
+                solNames(itu2) = cgnsTurbOmega
+
+            case (ktau)
+                solNames(itu1) = cgnsTurbK
+                solNames(itu2) = cgnsTurbTau
+
+            case (v2f)
+                solNames(itu1) = cgnsTurbK
+                solNames(itu2) = cgnsTurbEpsilon
+                solNames(itu3) = cgnsTurbV2
+                solNames(itu4) = cgnsTurbF
+
+            end select
+
+        end if
+
+        ! Initialize nn to the number of independent variables.
+
+        nn = nw
+
+        ! Check the additional variables to be written.
+
+        if (volWriteMx) then
+            nn = nn + 1
+            solNames(nn) = cgnsMomx
+        end if
+
+        if (volWriteMy) then
+            nn = nn + 1
+            solNames(nn) = cgnsMomy
+        end if
+
+        if (volWriteMz) then
+            nn = nn + 1
+            solNames(nn) = cgnsMomz
+        end if
+
+        if (volWriteRVx) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelVelx
+        end if
+
+        if (volWriteRVy) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelVely
+        end if
+
+        if (volWriteRVz) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelVelz
+        end if
+
+        if (volWriteRhoe) then
+            nn = nn + 1
+            solNames(nn) = cgnsEnergy
+        end if
+
+        if (volWriteTemp) then
+            nn = nn + 1
+            solNames(nn) = cgnsTemp
+        end if
+
+        if (volWriteCp) then
+            nn = nn + 1
+            solNames(nn) = cgnsCp
+        end if
+
+        if (volWriteMach) then
+            nn = nn + 1
+            solNames(nn) = cgnsMach
+        end if
+
+        if (volWriteRMach) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelMach
+        end if
+
+        if (volWriteMachTurb) then
+            nn = nn + 1
+            solNames(nn) = cgnsMachTurb
+        end if
+
+        if (volWriteEddyVis) then
+            nn = nn + 1
+            solNames(nn) = cgnsEddy
+        end if
+
+        if (volWriteRatioEddyVis) then
+            nn = nn + 1
+            solNames(nn) = cgnsEddyRatio
+        end if
+
+        if (volWriteDist) then
+            nn = nn + 1
+            solNames(nn) = cgNSWallDist
+        end if
+
+        if (volWriteVort) then
+            nn = nn + 1
+            solNames(nn) = cgnsVortMagn
+        end if
+
+        if (volWriteVortx) then
+            nn = nn + 1
+            solNames(nn) = cgnsVortx
+        end if
+
+        if (volWriteVorty) then
+            nn = nn + 1
+            solNames(nn) = cgnsVorty
+        end if
+
+        if (volWriteVortz) then
+            nn = nn + 1
+            solNames(nn) = cgnsVortz
+        end if
+
+        if (volWritePtotloss) then
+            nn = nn + 1
+            solNames(nn) = cgnsPtotloss
+        end if
+
+        if (volWriteResRho) then
+            nn = nn + 1
+            solNames(nn) = cgnsResRho
+        end if
+
+        if (volWriteResMom) then
+            nn = nn + 1
+            solNames(nn) = cgnsResMomx
+
+            nn = nn + 1
+            solNames(nn) = cgnsResMomy
+
+            nn = nn + 1
+            solNames(nn) = cgnsResMomz
+        end if
+
+        if (volWriteResRhoe) then
+            nn = nn + 1
+            solNames(nn) = cgnsResRhoe
+        end if
+
+        if (volWriteResTurb) then
+
+            select case (turbModel)
+
+            case (spalartAllmaras, spalartAllmarasEdwards)
                 nn = nn + 1
-                buffer(nn) = wIO(i,j,k,1)
-             enddo
-          enddo
-       enddo
-    endif
-
-  end subroutine storeSolInBuffer
-
-  subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID,   &
-                                  faceID, cellRange, solName, &
-                                  viscousSubface, useRindLayer, &
-                                  iBeg, iEnd, jBeg, jEnd)
-    !
-    !       storeSurfsolInBuffer stores the variable indicated by
-    !       solName of the given block ID in the buffer. As the solution
-    !       must be stored in the center of the boundary face the average
-    !       value of the first internal cell and its corresponding halo is
-    !       computed. The counter nn is updated in this routine. However
-    !       it is not initialized, because multiple contributions may be
-    !       stored in buffer.
-    !
-    use blockPointers
-    use cgnsNames
-    use constants
-    use flowVarRefState
-    use inputPhysics
-    use inputIO
-    use communication
-    use utils, only : setPointers
-    use flowUtils, only : computePtot
-    use inputCostFunctions
-    use cgnsGrid, only : cgnsDoms,cgnsNDom ! see subroutine updateRotationRate in preprocessingAPI.F90
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)    :: sps, blockID, faceID
-    integer(kind=intType), intent(inout) :: nn
-    integer(kind=intType), dimension(3,2), intent(in) :: cellRange
-    real(kind=realType), dimension(*), intent(out) :: buffer
-    character(len=*), intent(in) :: solName
-    logical, intent(in) :: viscousSubface, useRindLayer
-
-    ! if useRindLayer is true, then iBeg, iEnd, jBeg, jEnd are use to determine
-    ! when the indices are in the rind layer.
-    integer(kind=intType), optional, intent(in) :: iBeg, iEnd, jBeg, jEnd
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ior, jor
-    integer(kind=intType) :: ii, jj, mm, iiMax, jjMax
-
-    integer(kind=intType), dimension(2,2) :: rangeFace
-    integer(kind=intType), dimension(3,2) :: rangeCell
-
-    integer(kind=intType), dimension(:,:), pointer :: viscPointer
-    integer(kind=intType), dimension(:,:), pointer :: iblank2
-
-    real(kind=realType) :: fact, gm1, ptotInf, ptot, psurf, rsurf
-    real(kind=realType) :: usurf, vsurf, wsurf, m2surf, musurf
-    real(kind=realType) :: fx, fy, fz, fn, a2Tot, a2, qw
-    real(kind=realType) :: tauxx, tauyy, tauzz
-    real(kind=realType) :: tauxy, tauxz, tauyz
-    real(kind=realType) :: pm1, a, sensor, plocal, sensor1
-    real(kind=realType), dimension(3) :: norm, V
+                solNames(nn) = cgnsResNu
 
-    real(kind=realType), dimension(:,:,:), pointer :: ww1, ww2
-    real(kind=realType), dimension(:,:,:), pointer :: ss1, ss2, ss
-    real(kind=realType), dimension(:,:),   pointer :: pp1, pp2
-
-    real(kind=realType), dimension(:,:),   pointer :: gamma1, gamma2
-    real(kind=realType), dimension(:,:),   pointer :: rlv1, rlv2
-    real(kind=realType), dimension(:,:),   pointer :: dd2Wall
+            case (komegaWilcox, komegaModified, menterSST)
+                nn = nn + 1
+                solNames(nn) = cgnsResK
 
-    real(kind=realType) :: uInfDim2 ! MachCoeff-derived (Uinf*Uref)**2
-    real(kind=realType) :: rot_speed2 ! norm of wCrossR squared
-    real(kind=realType),Dimension(3) :: r_ ! spanwise position for given point
-    real(kind=realType),Dimension(3) :: rrate_  ! the rotational rate of the WT
-    real(kind=realType),Dimension(3) :: wCrossR ! rotationrate cross radius
-    real(kind=realType), dimension(:,:,:), pointer :: xx1, xx2 ! for the coords
-    ! The original i,j beging of the local block in the entire cgns block.
-    real(kind=realType) :: subface_jBegOr, subface_jEndOr, subface_iBegOr, subface_iEndOr
+                nn = nn + 1
+                solNames(nn) = cgnsResOmega
 
-    ! Set the pointers to this block.
-    call setPointers(blockID, 1_intType, sps)
+            case (ktau)
+                nn = nn + 1
+                solNames(nn) = cgnsResK
 
-    ! Set the offset for the viscous data, such that the range is
-    ! limited to the actual physical face. Viscous data, like skin
-    ! friction, need gradIent information, which is not available
-    ! in the halo's.
+                nn = nn + 1
+                solNames(nn) = cgnsResTau
 
+            case (v2f)
+                nn = nn + 1
+                solNames(nn) = cgnsResK
 
-    ! CellRange contains the range of the current block in the
-    ! original cgns block. Substract the offset and store the local
-    ! range in rangeCell.
+                nn = nn + 1
+                solNames(nn) = cgnsResEpsilon
 
-    rangeCell(1,1) = cellRange(1,1) - iBegor + 1
-    rangeCell(1,2) = cellRange(1,2) - iBegor + 1
+                nn = nn + 1
+                solNames(nn) = cgnsResV2
 
-    rangeCell(2,1) = cellRange(2,1) - jBegor + 1
-    rangeCell(2,2) = cellRange(2,2) - jBegor + 1
+                nn = nn + 1
+                solNames(nn) = cgnsResF
+
+            end select
+
+        end if
+
+        if (volWriteShock) then
+            nn = nn + 1
+            solNames(nn) = cgnsShock
+        end if
+
+        if (volWriteFilteredShock) then
+            nn = nn + 1
+            solNames(nn) = cgnsFilteredShock
+        end if
+
+        if (volWriteBlank) then
+            nn = nn + 1
+            solNames(nn) = cgnsBlank
+        end if
+
+        if (volWriteGC) then
+            nn = nn + 1
+            solNames(nn) = cgnsGC
+        end if
+
+        if (volWriteStatus) then
+            nn = nn + 1
+            solNames(nn) = cgnsStatus
+        end if
+
+        if (volWriteIntermittency) then
+            nn = nn + 1
+            solNames(nn) = cgnsIntermittency
+        end if
+
+        if (volWriteKs) then
+            nn = nn + 1
+            solNames(nn) = cgnsSandGrainRoughness
+        end if
+
+    end subroutine volSolNames
+
+    subroutine surfSolNames(solNames)
+        !
+        !       surfSolNames sets the names for the surface variables to be
+        !       written to the surface solution file. Sids convention names
+        !       are used as much as possible.
+        !
+        use cgnsNames
+        use extraOutput
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        character(len=*), dimension(*), intent(out) :: solNames
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        ! Initialize nn to 0.
+
+        nn = 0
+
+        ! Check which surfaces variables must be written and set
+        ! solNames accordingly.
+
+        if (surfWriteRho) then
+            nn = nn + 1
+            solNames(nn) = cgnsDensity
+        end if
+
+        if (surfWriteP) then
+            nn = nn + 1
+            solNames(nn) = cgnsPressure
+        end if
+
+        if (surfWriteTemp) then
+            nn = nn + 1
+            solNames(nn) = cgnsTemp
+        end if
+
+        if (surfWriteVx) then
+            nn = nn + 1
+            solNames(nn) = cgnsVelx
+        end if
+
+        if (surfWriteVy) then
+            nn = nn + 1
+            solNames(nn) = cgnsVely
+        end if
+
+        if (surfWriteVz) then
+            nn = nn + 1
+            solNames(nn) = cgnsVelz
+        end if
+
+        if (surfWriteRVx) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelVelx
+        end if
+
+        if (surfWriteRVy) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelVely
+        end if
+
+        if (surfWriteRVz) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelVelz
+        end if
+
+        if (surfWriteCp) then
+            nn = nn + 1
+            solNames(nn) = cgnsCp
+        end if
+
+        if (surfWritePtotloss) then
+            nn = nn + 1
+            solNames(nn) = cgnsPtotloss
+        end if
+
+        if (surfWriteMach) then
+            nn = nn + 1
+            solNames(nn) = cgnsMach
+        end if
+
+        if (surfWriteRMach) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelMach
+        end if
+
+        if (surfWriteCf) then
+            nn = nn + 1
+            solNames(nn) = cgnsSkinFmag
+        end if
+
+        if (surfWriteCh) then
+            nn = nn + 1
+            solNames(nn) = cgnsStanton
+        end if
+
+        if (surfWriteYplus) then
+            nn = nn + 1
+            solNames(nn) = cgnsYplus
+        end if
+
+        if (surfWriteCfx) then
+            nn = nn + 1
+            solNames(nn) = cgnsSkinFx
+        end if
+
+        if (surfWriteCfy) then
+            nn = nn + 1
+            solNames(nn) = cgnsSkinFy
+        end if
+
+        if (surfWriteCfz) then
+            nn = nn + 1
+            solNames(nn) = cgnsSkinFz
+        end if
+
+        if (surfWriteBlank) then
+            nn = nn + 1
+            solNames(nn) = cgnsBlank
+        end if
+
+        if (surfWriteSepSensor) then
+            nn = nn + 1
+            solNames(nn) = cgnsSepSensor
+        end if
+
+        if (surfWriteCavitation) then
+            nn = nn + 1
+            solNames(nn) = cgnsCavitation
+        end if
+
+        if (surfWriteAxisMoment) then
+            nn = nn + 1
+            solNames(nn) = cgnsAxisMoment
+        end if
+
+        if (surfWriteGC) then
+            nn = nn + 1
+            solNames(nn) = cgnsGC
+        end if
+
+    end subroutine surfSolNames
+
+    subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
+                                iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        !
+        !       StoreSolInBuffer stores the given range of the variable
+        !       indicated by solName in IOVar and copies it into buffer if
+        !       desired. It is assumed that the variables in blockPointers
+        !       already point to the correct block.
+        !
+        use constants
+        use blockPointers
+        use cgnsGrid
+        use cgnsNames
+        use flowVarRefState
+        use inputPhysics
+        use IOModule
+        use flowUtils, only: computePTot
+        use utils, only: terminate
+        use oversetData, only: oversetPresent
+        use inputIO, only: laminarToTurbulent
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: iBeg, iEnd, jBeg, jEnd
+        integer(kind=intType), intent(in) :: kBeg, kEnd
+
+        real(kind=realType), dimension(*), intent(out) :: buffer
+        character(len=*), intent(in)                   :: solName
+
+        logical, intent(in) :: copyInBuffer
+        !
+        !      Local parameters
+        !
+        real(kind=realType), parameter :: plim = 0.001_realType
+        real(kind=realType), parameter :: rholim = 0.001_realType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii, jj, kk, nn
+
+        real(kind=realType) :: uuy, uuz, vvx, vvz, wwx, wwy, tmp
+        real(kind=realType) :: vortx, vorty, vortz, a2, ptotInf, ptot
+        real(kind=realType) :: a, UovA(3), gradP(3)
+
+        real(kind=realType), dimension(:, :, :, :), pointer :: wIO
+
+        ! Set the pointer to the correct entry of IOVar. I'm cheating a
+        ! bit here, because I know that only memory has been allocated
+        ! for the first solution ID of IOVar.
+
+        wIO => IOVar(nbkLocal, 1)%w
+
+        ! Determine the variable to be stored, compute it and store
+        ! it in the 1D array buffer.
+
+        select case (solName)
+
+        case (cgnsDensity)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, irho)
+                    end do
+                end do
+            end do
+
+        case (cgnsMomx)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, irho)*w(i, j, k, ivx)
+                    end do
+                end do
+            end do
+
+        case (cgnsMomy)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, irho)*w(i, j, k, ivy)
+                    end do
+                end do
+            end do
+
+        case (cgnsMomz)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, irho)*w(i, j, k, ivz)
+                    end do
+                end do
+            end do
+
+        case (cgnsEnergy)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, irhoE)
+                    end do
+                end do
+            end do
+
+        case (cgnsTurbSaNu, cgnsTurbK)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, itu1)
+                    end do
+                end do
+            end do
+
+        case (cgnsTurbOmega, cgnsTurbTau, cgnsTurbEpsilon)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, itu2)
+                    end do
+                end do
+            end do
+
+        case (cgnsTurbV2)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, itu3)
+                    end do
+                end do
+            end do
+
+        case (cgnsTurbF)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, itu4)
+                    end do
+                end do
+            end do
+
+        case (cgnsVelx)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, ivx)
+                    end do
+                end do
+            end do
+
+        case (cgnsVely)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, ivy)
+                    end do
+                end do
+            end do
+
+        case (cgnsVelz)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, ivz)
+                    end do
+                end do
+            end do
+
+        case (cgnsRelVelx)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, ivx) - s(i, j, k, 1)
+                    end do
+                end do
+            end do
+
+        case (cgnsRelVely)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, ivy) - s(i, j, k, 2)
+                    end do
+                end do
+            end do
+
+        case (cgnsRelVelz)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = w(i, j, k, ivz) - s(i, j, k, 3)
+                    end do
+                end do
+            end do
+
+        case (cgnsPressure)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = p(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgnsTemp)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = p(i, j, k)/(RGas*w(i, j, k, irho))
+                    end do
+                end do
+            end do
+
+        case (cgnsCp)
+            tmp = two/(gammaInf*pInf*MachCoef*MachCoef)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = tmp*(p(i, j, k) - pInf)
+                    end do
+                end do
+            end do
+
+        case (cgnsMach)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        a2 = gamma(i, j, k)*max(p(i, j, k), plim) &
+                             /max(w(i, j, k, irho), rholim)
+                        tmp = (w(i, j, k, ivx)**2 + w(i, j, k, ivy)**2 &
+                               + w(i, j, k, ivz)**2)/a2
+                        wIO(i, j, k, 1) = sqrt(max(zero, tmp))
+                    end do
+                end do
+            end do
+
+        case (cgnsRelMach)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        a2 = gamma(i, j, k)*max(p(i, j, k), plim) &
+                             /max(w(i, j, k, irho), rholim)
+                        tmp = ((w(i, j, k, ivx) - s(i, j, k, 1))**2 + &
+                               (w(i, j, k, ivy) - s(i, j, k, 2))**2 &
+                               + (w(i, j, k, ivz) - s(i, j, k, 3))**2)/a2
+                        wIO(i, j, k, 1) = sqrt(max(zero, tmp))
+                    end do
+                end do
+            end do
+
+        case (cgnsMachTurb)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        tmp = w(i, j, k, irho)*w(i, j, k, itu1) &
+                              /(gamma(i, j, k)*max(p(i, j, k), plim))
+                        wIO(i, j, k, 1) = sqrt(max(zero, tmp))
+                    end do
+                end do
+            end do
+
+        case (cgnsEddy)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = rev(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgnsEddyRatio)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = rev(i, j, k)/rlv(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgNSWallDist)
+            do k = kBeg, kEnd
+                kk = max(2_intType, k); kk = min(kl, kk)
+                do j = jBeg, jEnd
+                    jj = max(2_intType, j); jj = min(jl, jj)
+                    do i = iBeg, iEnd
+                        ii = max(2_intType, i); ii = min(il, ii)
+                        wIO(i, j, k, 1) = d2Wall(ii, jj, kk)
+                    end do
+                end do
+            end do
+
+        case (cgnsVortMagn)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        tmp = half/vol(i, j, k)
+                        uuy = si(i, j, k, 2)*w(i + 1, j, k, ivx) &
+                              - si(i - 1, j, k, 2)*w(i - 1, j, k, ivx) &
+                              + sj(i, j, k, 2)*w(i, j + 1, k, ivx) &
+                              - sj(i, j - 1, k, 2)*w(i, j - 1, k, ivx) &
+                              + sk(i, j, k, 2)*w(i, j, k + 1, ivx) &
+                              - sk(i, j, k - 1, 2)*w(i, j, k - 1, ivx)
+
+                        uuz = si(i, j, k, 3)*w(i + 1, j, k, ivx) &
+                              - si(i - 1, j, k, 3)*w(i - 1, j, k, ivx) &
+                              + sj(i, j, k, 3)*w(i, j + 1, k, ivx) &
+                              - sj(i, j - 1, k, 3)*w(i, j - 1, k, ivx) &
+                              + sk(i, j, k, 3)*w(i, j, k + 1, ivx) &
+                              - sk(i, j, k - 1, 3)*w(i, j, k - 1, ivx)
+
+                        vvx = si(i, j, k, 1)*w(i + 1, j, k, ivy) &
+                              - si(i - 1, j, k, 1)*w(i - 1, j, k, ivy) &
+                              + sj(i, j, k, 1)*w(i, j + 1, k, ivy) &
+                              - sj(i, j - 1, k, 1)*w(i, j - 1, k, ivy) &
+                              + sk(i, j, k, 1)*w(i, j, k + 1, ivy) &
+                              - sk(i, j, k - 1, 1)*w(i, j, k - 1, ivy)
+
+                        vvz = si(i, j, k, 3)*w(i + 1, j, k, ivy) &
+                              - si(i - 1, j, k, 3)*w(i - 1, j, k, ivy) &
+                              + sj(i, j, k, 3)*w(i, j + 1, k, ivy) &
+                              - sj(i, j - 1, k, 3)*w(i, j - 1, k, ivy) &
+                              + sk(i, j, k, 3)*w(i, j, k + 1, ivy) &
+                              - sk(i, j, k - 1, 3)*w(i, j, k - 1, ivy)
+
+                        wwx = si(i, j, k, 1)*w(i + 1, j, k, ivz) &
+                              - si(i - 1, j, k, 1)*w(i - 1, j, k, ivz) &
+                              + sj(i, j, k, 1)*w(i, j + 1, k, ivz) &
+                              - sj(i, j - 1, k, 1)*w(i, j - 1, k, ivz) &
+                              + sk(i, j, k, 1)*w(i, j, k + 1, ivz) &
+                              - sk(i, j, k - 1, 1)*w(i, j, k - 1, ivz)
+
+                        wwy = si(i, j, k, 2)*w(i + 1, j, k, ivz) &
+                              - si(i - 1, j, k, 2)*w(i - 1, j, k, ivz) &
+                              + sj(i, j, k, 2)*w(i, j + 1, k, ivz) &
+                              - sj(i, j - 1, k, 2)*w(i, j - 1, k, ivz) &
+                              + sk(i, j, k, 2)*w(i, j, k + 1, ivz) &
+                              - sk(i, j, k - 1, 2)*w(i, j, k - 1, ivz)
+
+                        vortx = wwy - vvz; vorty = uuz - wwx; vortz = vvx - uuy
+
+                        wIO(i, j, k, 1) = tmp*sqrt(vortx**2 + vorty**2 + vortz**2)
+                    end do
+                end do
+            end do
+
+        case (cgnsVortx)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        tmp = half/vol(i, j, k)
+                        vvz = si(i, j, k, 3)*w(i + 1, j, k, ivy) &
+                              - si(i - 1, j, k, 3)*w(i - 1, j, k, ivy) &
+                              + sj(i, j, k, 3)*w(i, j + 1, k, ivy) &
+                              - sj(i, j - 1, k, 3)*w(i, j - 1, k, ivy) &
+                              + sk(i, j, k, 3)*w(i, j, k + 1, ivy) &
+                              - sk(i, j, k - 1, 3)*w(i, j, k - 1, ivy)
+
+                        wwy = si(i, j, k, 2)*w(i + 1, j, k, ivz) &
+                              - si(i - 1, j, k, 2)*w(i - 1, j, k, ivz) &
+                              + sj(i, j, k, 2)*w(i, j + 1, k, ivz) &
+                              - sj(i, j - 1, k, 2)*w(i, j - 1, k, ivz) &
+                              + sk(i, j, k, 2)*w(i, j, k + 1, ivz) &
+                              - sk(i, j, k - 1, 2)*w(i, j, k - 1, ivz)
+
+                        wIO(i, j, k, 1) = tmp*(wwy - vvz)
+                    end do
+                end do
+            end do
+
+        case (cgnsVorty)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        tmp = half/vol(i, j, k)
+                        uuz = si(i, j, k, 3)*w(i + 1, j, k, ivx) &
+                              - si(i - 1, j, k, 3)*w(i - 1, j, k, ivx) &
+                              + sj(i, j, k, 3)*w(i, j + 1, k, ivx) &
+                              - sj(i, j - 1, k, 3)*w(i, j - 1, k, ivx) &
+                              + sk(i, j, k, 3)*w(i, j, k + 1, ivx) &
+                              - sk(i, j, k - 1, 3)*w(i, j, k - 1, ivx)
+
+                        wwx = si(i, j, k, 1)*w(i + 1, j, k, ivz) &
+                              - si(i - 1, j, k, 1)*w(i - 1, j, k, ivz) &
+                              + sj(i, j, k, 1)*w(i, j + 1, k, ivz) &
+                              - sj(i, j - 1, k, 1)*w(i, j - 1, k, ivz) &
+                              + sk(i, j, k, 1)*w(i, j, k + 1, ivz) &
+                              - sk(i, j, k - 1, 1)*w(i, j, k - 1, ivz)
+
+                        wIO(i, j, k, 1) = tmp*(uuz - wwx)
+                    end do
+                end do
+            end do
+
+        case (cgnsVortz)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        tmp = half/vol(i, j, k)
+                        uuy = si(i, j, k, 2)*w(i + 1, j, k, ivx) &
+                              - si(i - 1, j, k, 2)*w(i - 1, j, k, ivx) &
+                              + sj(i, j, k, 2)*w(i, j + 1, k, ivx) &
+                              - sj(i, j - 1, k, 2)*w(i, j - 1, k, ivx) &
+                              + sk(i, j, k, 2)*w(i, j, k + 1, ivx) &
+                              - sk(i, j, k - 1, 2)*w(i, j, k - 1, ivx)
+
+                        vvx = si(i, j, k, 1)*w(i + 1, j, k, ivy) &
+                              - si(i - 1, j, k, 1)*w(i - 1, j, k, ivy) &
+                              + sj(i, j, k, 1)*w(i, j + 1, k, ivy) &
+                              - sj(i, j - 1, k, 1)*w(i, j - 1, k, ivy) &
+                              + sk(i, j, k, 1)*w(i, j, k + 1, ivy) &
+                              - sk(i, j, k - 1, 1)*w(i, j, k - 1, ivy)
+
+                        wIO(i, j, k, 1) = tmp*(vvx - uuy)
+                    end do
+                end do
+            end do
+
+        case (cgnsPtotloss)
+
+            ! Compute the free stream total pressure.
+
+            call computePtot(rhoInf, uInf, zero, zero, &
+                             pInf, ptotInf)
+            ptotInf = one/ptotInf
+
+            ! Loop over the cell centers and compute the
+            ! total pressure loss.
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        call computePtot(w(i, j, k, irho), w(i, j, k, ivx), &
+                                         w(i, j, k, ivy), w(i, j, k, ivz), &
+                                         p(i, j, k), ptot)
+
+                        wIO(i, j, k, 1) = one - ptot*ptotInf
+                    end do
+                end do
+            end do
+
+        case (cgnsResRho)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        !   wIO(i,j,k,1) = dw(i,j,k,irho)
+                        wIO(i, j, k, 1) = dw(i, j, k, irho)/vol(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgnsResMomx)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        !   wIO(i,j,k,1) = dw(i,j,k,imx)
+                        wIO(i, j, k, 1) = dw(i, j, k, imx)/vol(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgnsResMomy)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        !   wIO(i,j,k,1) = dw(i,j,k,imy)
+                        wIO(i, j, k, 1) = dw(i, j, k, imy)/vol(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgnsResMomz)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        !   wIO(i,j,k,1) = dw(i,j,k,imz)
+                        wIO(i, j, k, 1) = dw(i, j, k, imz)/vol(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgnsResRhoE)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        !   wIO(i,j,k,1) = dw(i,j,k,irhoE)
+                        wIO(i, j, k, 1) = dw(i, j, k, irhoE)/vol(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgnsResNu, cgnsResK)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        !   wIO(i,j,k,1) = dw(i,j,k,itu1)
+                        wIO(i, j, k, 1) = dw(i, j, k, itu1)/vol(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgnsResOmega, cgnsResTau, cgnsResEpsilon)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        !   wIO(i,j,k,1) = dw(i,j,k,itu2)
+                        wIO(i, j, k, 1) = dw(i, j, k, itu2)/vol(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgnsResV2)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        !   wIO(i,j,k,1) = dw(i,j,k,itu3)
+                        wIO(i, j, k, 1) = dw(i, j, k, itu3)/vol(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgnsResF)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        !   wIO(i,j,k,1) = dw(i,j,k,itu4)
+                        wIO(i, j, k, 1) = dw(i, j, k, itu4)/vol(i, j, k)
+                    end do
+                end do
+            end do
+
+        case (cgnsBlank)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = real(iblank(i, j, k), realType)
+                    end do
+                end do
+            end do
+
+        case (cgnsGC)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = real(globalcell(i, j, k), realType)
+                    end do
+                end do
+            end do
+
+        case (cgnsstatus)
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        if (oversetPresent) then
+                            wIO(i, j, k, 1) = real(status(i, j, k))
+                        else
+                            wIO(i, j, k, 1) = 0
+                        end if
+                    end do
+                end do
+            end do
+
+        case (cgnsintermittency)
+            if (laminartoturbulent) then
+                do k = kBeg, kEnd
+                    kk = max(2_intType, k); kk = min(kl, kk)
+                    do j = jBeg, jEnd
+                        jj = max(2_intType, j); jj = min(jl, jj)
+                        do i = iBeg, iEnd
+                            ii = max(2_intType, i); ii = min(il, ii)
+                            wIO(i, j, k, 1) = intermittency(ii, jj, kk)
+                        end do
+                    end do
+                end do
+            end if
+
+        case (cgnsShock)
+
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+
+                        ! Here we compute U/a <dot> grad P / ||grad P||
+                        ! Whre U is the velocity vector, a is the speed of
+                        ! sound and P is the pressure.
+
+                        ! U / a
+                        a = sqrt(gamma(i, j, k)*max(p(i, j, k), plim) &
+                                 /max(w(i, j, k, irho), rholim))
+                        UovA = (/w(i, j, k, ivx), w(i, j, k, ivy), w(i, j, k, ivz)/)/a
+
+                        ! grad P / ||grad P||
+
+                        gradP(1) = si(i, j, k, 1)*P(i + 1, j, k) &
+                                   - si(i - 1, j, k, 1)*P(i - 1, j, k) &
+                                   + sj(i, j, k, 1)*P(i, j + 1, k) &
+                                   - sj(i, j - 1, k, 1)*P(i, j - 1, k) &
+                                   + sk(i, j, k, 1)*P(i, j, k + 1) &
+                                   - sk(i, j, k - 1, 1)*P(i, j, k - 1)
+
+                        gradP(2) = si(i, j, k, 2)*P(i + 1, j, k) &
+                                   - si(i - 1, j, k, 2)*P(i - 1, j, k) &
+                                   + sj(i, j, k, 2)*P(i, j + 1, k) &
+                                   - sj(i, j - 1, k, 2)*P(i, j - 1, k) &
+                                   + sk(i, j, k, 2)*P(i, j, k + 1) &
+                                   - sk(i, j, k - 1, 2)*P(i, j, k - 1)
+
+                        gradP(3) = si(i, j, k, 3)*P(i + 1, j, k) &
+                                   - si(i - 1, j, k, 3)*P(i - 1, j, k) &
+                                   + sj(i, j, k, 3)*P(i, j + 1, k) &
+                                   - sj(i, j - 1, k, 3)*P(i, j - 1, k) &
+                                   + sk(i, j, k, 3)*P(i, j, k + 1) &
+                                   - sk(i, j, k - 1, 3)*P(i, j, k - 1)
+
+                        gradP = gradP/sqrt(gradP(1)**2 + gradP(2)**2 + gradP(3)**2)
+                        ! Dot product
+                        wIO(i, j, k, 1) = UovA(1)*gradP(1) + UovA(2)*gradP(2) + UovA(3)*gradP(3)
+                    end do
+                end do
+            end do
+
+        case (cgnsSandGrainRoughness)
+            ! It is only possible to write this when it was allocated in the first place
+            ! (useRoughSA = True) but this has been check in 'inputParamRoutines'
+            ! allready
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        wIO(i, j, k, 1) = real(ks(i, j, k), realType)
+                    end do
+                end do
+            end do
+
+        case default
+            call terminate("storeSolInBuffer", &
+                           "This should not happen")
+
+        end select
+
+        ! Copy the data in the 1D buffer, if desired.
+
+        if (copyInBuffer) then
+            nn = 0
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        nn = nn + 1
+                        buffer(nn) = wIO(i, j, k, 1)
+                    end do
+                end do
+            end do
+        end if
+
+    end subroutine storeSolInBuffer
+
+    subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
+                                    faceID, cellRange, solName, &
+                                    viscousSubface, useRindLayer, &
+                                    iBeg, iEnd, jBeg, jEnd)
+        !
+        !       storeSurfsolInBuffer stores the variable indicated by
+        !       solName of the given block ID in the buffer. As the solution
+        !       must be stored in the center of the boundary face the average
+        !       value of the first internal cell and its corresponding halo is
+        !       computed. The counter nn is updated in this routine. However
+        !       it is not initialized, because multiple contributions may be
+        !       stored in buffer.
+        !
+        use blockPointers
+        use cgnsNames
+        use constants
+        use flowVarRefState
+        use inputPhysics
+        use inputIO
+        use communication
+        use utils, only: setPointers
+        use flowUtils, only: computePtot
+        use inputCostFunctions
+        use cgnsGrid, only: cgnsDoms, cgnsNDom ! see subroutine updateRotationRate in preprocessingAPI.F90
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in)    :: sps, blockID, faceID
+        integer(kind=intType), intent(inout) :: nn
+        integer(kind=intType), dimension(3, 2), intent(in) :: cellRange
+        real(kind=realType), dimension(*), intent(out) :: buffer
+        character(len=*), intent(in) :: solName
+        logical, intent(in) :: viscousSubface, useRindLayer
+
+        ! if useRindLayer is true, then iBeg, iEnd, jBeg, jEnd are use to determine
+        ! when the indices are in the rind layer.
+        integer(kind=intType), optional, intent(in) :: iBeg, iEnd, jBeg, jEnd
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ior, jor
+        integer(kind=intType) :: ii, jj, mm, iiMax, jjMax
+
+        integer(kind=intType), dimension(2, 2) :: rangeFace
+        integer(kind=intType), dimension(3, 2) :: rangeCell
+
+        integer(kind=intType), dimension(:, :), pointer :: viscPointer
+        integer(kind=intType), dimension(:, :), pointer :: iblank2
+
+        real(kind=realType) :: fact, gm1, ptotInf, ptot, psurf, rsurf
+        real(kind=realType) :: usurf, vsurf, wsurf, m2surf, musurf
+        real(kind=realType) :: fx, fy, fz, fn, a2Tot, a2, qw
+        real(kind=realType) :: tauxx, tauyy, tauzz
+        real(kind=realType) :: tauxy, tauxz, tauyz
+        real(kind=realType) :: pm1, a, sensor, plocal, sensor1
+        real(kind=realType), dimension(3) :: norm, V
+
+        real(kind=realType), dimension(:, :, :), pointer :: ww1, ww2
+        real(kind=realType), dimension(:, :, :), pointer :: ss1, ss2, ss
+        real(kind=realType), dimension(:, :), pointer :: pp1, pp2
+
+        real(kind=realType), dimension(:, :), pointer :: gamma1, gamma2
+        real(kind=realType), dimension(:, :), pointer :: rlv1, rlv2
+        real(kind=realType), dimension(:, :), pointer :: dd2Wall
+
+        real(kind=realType) :: uInfDim2 ! MachCoeff-derived (Uinf*Uref)**2
+        real(kind=realType) :: rot_speed2 ! norm of wCrossR squared
+        real(kind=realType), Dimension(3) :: r_ ! spanwise position for given point
+        real(kind=realType), Dimension(3) :: rrate_  ! the rotational rate of the WT
+        real(kind=realType), Dimension(3) :: wCrossR ! rotationrate cross radius
+        real(kind=realType), dimension(:, :, :), pointer :: xx1, xx2 ! for the coords
+        ! The original i,j beging of the local block in the entire cgns block.
+        real(kind=realType) :: subface_jBegOr, subface_jEndOr, subface_iBegOr, subface_iEndOr
+
+        ! Set the pointers to this block.
+        call setPointers(blockID, 1_intType, sps)
+
+        ! Set the offset for the viscous data, such that the range is
+        ! limited to the actual physical face. Viscous data, like skin
+        ! friction, need gradIent information, which is not available
+        ! in the halo's.
+
+        ! CellRange contains the range of the current block in the
+        ! original cgns block. Substract the offset and store the local
+        ! range in rangeCell.
+
+        rangeCell(1, 1) = cellRange(1, 1) - iBegor + 1
+        rangeCell(1, 2) = cellRange(1, 2) - iBegor + 1
+
+        rangeCell(2, 1) = cellRange(2, 1) - jBegor + 1
+        rangeCell(2, 2) = cellRange(2, 2) - jBegor + 1
+
+        rangeCell(3, 1) = cellRange(3, 1) - kBegor + 1
+        rangeCell(3, 2) = cellRange(3, 2) - kBegor + 1
+        !
+        !                Viscous variables for a non-viscous wall.
+        !                Simply set the variables to zero and return.
+        !
+        if (.not. viscousSubface) then
+
+            select case (solName)
+
+            case (cgnsSkinFmag, cgnsStanton, cgnsYplus, &
+                  cgnsSkinFx, cgnsSkinFy, cgnsSkinFz)
+
+                ! Update the counter and set this entry of buffer to 0.
+
+                do k = rangeCell(3, 1), rangeCell(3, 2)
+                    do j = rangeCell(2, 1), rangeCell(2, 2)
+                        do i = rangeCell(1, 1), rangeCell(1, 2)
+                            nn = nn + 1
+                            buffer(nn) = zero
+                        end do
+                    end do
+                end do
+
+                ! Work has been done for this variable. So return.
+
+                return
+
+            end select
+        end if
+        !
+        !       Determine the face on which the subface is located and set
+        !       a couple of variables accordingly. In this way a generic
+        !       treatment is possible and there is no need to repeat the code
+        !       for each of the six block faces.
+        !       Note that for dd2Wall a slightly different notation must be
+        !       used. Reason is that d2Wall starts at index 2, rather than 0.
+        !
+
+        select case (faceID)
+
+        case (iMin)
+            rangeFace(1, 1:2) = rangeCell(2, 1:2)
+            rangeFace(2, 1:2) = rangeCell(3, 1:2)
+            iiMax = jl; jjMax = kl
+
+            ! We need to get the mesh coordinates further down in order to compute
+            ! the correct Cp-normalisation for rotational setups.
+            ! The flow variables, w(0:ib,0:jb,0:kb,1:nw), we point to
+            ! below uses what is defined in type blockType in block.F90 as
+            !   !  ib, jb, kb - Block integer dimensions for double halo
+            !   !               cell-centered quantities.
+            ! BUT the mesh, x(0:ie,0:je,0:ke,3), is defined with the single halos:
+            !   !  ie, je, ke - Block integer dimensions for single halo
+            !   !               cell-centered quantities.
+            ! This explains why in the (iMax)-case we use:
+            !        ww1    => w(ie,1:,1:,:);   ww2    => w(il,1:,1:,:)
+            ! namely, we use single halos for w(:,:,:,:) instead of the
+            ! usual double halos...
+
+            ! we don't have double halo structure for x, so we start from 0
+            xx1 => x(0, :, :, :); xx2 => x(1, :, :, :) ! 1 is our 2 since we are
+            ! single haloed...
+            ww1 => w(1, 1:, 1:, :); ww2 => w(2, 1:, 1:, :)
+            pp1 => p(1, 1:, 1:); pp2 => p(2, 1:, 1:)
+            ss => si(1, :, :, :); fact = -one
+
+            pp1 => p(1, 1:, 1:); pp2 => p(2, 1:, 1:)
+            gamma1 => gamma(1, 1:, 1:); gamma2 => gamma(2, 1:, 1:)
+
+            if (blockIsMoving) then
+                ss1 => s(1, 1:, 1:, :); ss2 => s(2, 1:, 1:, :)
+            end if
+
+            iblank2 => iblank(2, 1:, 1:)
+            viscPointer => viscIminPointer
+
+            if (viscous) then
+                rlv1 => rlv(1, 1:, 1:); rlv2 => rlv(2, 1:, 1:)
+            end if
 
-    rangeCell(3,1) = cellRange(3,1) - kBegor + 1
-    rangeCell(3,2) = cellRange(3,2) - kBegor + 1
-    !
-    !                Viscous variables for a non-viscous wall.
-    !                Simply set the variables to zero and return.
-    !
-    if(.not. viscousSubface) then
+            if (equations == RANSEquations) dd2Wall => d2Wall(2, :, :)
+            subface_iBegOr = jBegOr
+            subface_iEndOr = jEndOr
 
-       select case (solName)
+            subface_jBegOr = kBegOr
+            subface_jEndOr = kEndOr
 
-       case (cgnsSkinFmag, cgnsStanton, cgnsYplus, &
-            cgnsSkinFx, cgnsSkinFy, cgnsSkinFz)
+            !===============================================================
 
-          ! Update the counter and set this entry of buffer to 0.
+        case (iMax)
+            rangeFace(1, 1:2) = rangeCell(2, 1:2)
+            rangeFace(2, 1:2) = rangeCell(3, 1:2)
+            iiMax = jl; jjMax = kl
 
-          do k=rangeCell(3,1),rangeCell(3,2)
-             do j=rangeCell(2,1),rangeCell(2,2)
-                do i=rangeCell(1,1),rangeCell(1,2)
-                   nn = nn + 1
-                   buffer(nn) = zero
-                enddo
-             enddo
-          enddo
+            xx1 => x(ie - 1, :, :, :); xx2 => x(il - 1, :, :, :)
+            !
+            ww1 => w(ie, 1:, 1:, :); ww2 => w(il, 1:, 1:, :)
+            ss => si(il, :, :, :); fact = one
 
-          ! Work has been done for this variable. So return.
+            pp1 => p(ie, 1:, 1:); pp2 => p(il, 1:, 1:)
+            gamma1 => gamma(ie, 1:, 1:); gamma2 => gamma(il, 1:, 1:)
 
-          return
+            if (blockIsMoving) then
+                ss1 => s(ie - 1, 1:, 1:, :); ss2 => s(ie, 1:, 1:, :)
+            end if
 
-       end select
-    endif
-    !
-    !       Determine the face on which the subface is located and set
-    !       a couple of variables accordingly. In this way a generic
-    !       treatment is possible and there is no need to repeat the code
-    !       for each of the six block faces.
-    !       Note that for dd2Wall a slightly different notation must be
-    !       used. Reason is that d2Wall starts at index 2, rather than 0.
-    !
+            iblank2 => iblank(il, 1:, 1:)
+            viscPointer => viscImaxPointer
 
-    select case (faceID)
+            if (viscous) then
+                rlv1 => rlv(ie, 1:, 1:); rlv2 => rlv(il, 1:, 1:)
+            end if
 
-    case (iMin)
-       rangeFace(1,1:2) = rangeCell(2,1:2)
-       rangeFace(2,1:2) = rangeCell(3,1:2)
-       iiMax = jl; jjMax = kl
+            if (equations == RANSEquations) dd2Wall => d2Wall(il, :, :)
 
-       ! We need to get the mesh coordinates further down in order to compute
-       ! the correct Cp-normalisation for rotational setups.
-       ! The flow variables, w(0:ib,0:jb,0:kb,1:nw), we point to
-       ! below uses what is defined in type blockType in block.F90 as
-       !   !  ib, jb, kb - Block integer dimensions for double halo
-       !   !               cell-centered quantities.
-       ! BUT the mesh, x(0:ie,0:je,0:ke,3), is defined with the single halos:
-       !   !  ie, je, ke - Block integer dimensions for single halo
-       !   !               cell-centered quantities.
-       ! This explains why in the (iMax)-case we use:
-       !        ww1    => w(ie,1:,1:,:);   ww2    => w(il,1:,1:,:)
-       ! namely, we use single halos for w(:,:,:,:) instead of the
-       ! usual double halos...
+            subface_iBegOr = jBegOr
+            subface_iEndOr = jEndOr
 
+            subface_jBegOr = kBegOr
+            subface_jEndOr = kEndOr
 
-       ! we don't have double halo structure for x, so we start from 0
-       xx1    => x(0,:,:,:);   xx2    => x(1,:,:,:) ! 1 is our 2 since we are
-       ! single haloed...
-       ww1    => w(1,1:,1:,:);   ww2    => w(2,1:,1:,:)
-       pp1    => p(1,1:,1:);     pp2    => p(2,1:,1:)
-       ss => si(1,:,:,:) ; fact = -one
+            !===============================================================
 
-       pp1    => p(1,1:,1:);     pp2    => p(2,1:,1:)
-       gamma1 => gamma(1,1:,1:); gamma2 => gamma(2,1:,1:)
+        case (jMin)
+            rangeFace(1, 1:2) = rangeCell(1, 1:2)
+            rangeFace(2, 1:2) = rangeCell(3, 1:2)
+            iiMax = il; jjMax = kl
 
-       if( blockIsMoving)then
-          ss1    => s(1,1:,1:,:);   ss2    => s(2,1:,1:,:)
-       end if
+            xx1 => x(:, 0, :, :); xx2 => x(:, 1, :, :)
+            !
+            ww1 => w(1:, 1, 1:, :); ww2 => w(1:, 2, 1:, :)
+            ss => sj(:, 1, :, :); fact = -one
 
-       iblank2 => iblank(2,1:,1:)
-       viscPointer => viscIminPointer
+            pp1 => p(1:, 1, 1:); pp2 => p(1:, 2, 1:)
+            gamma1 => gamma(1:, 1, 1:); gamma2 => gamma(1:, 2, 1:)
 
-       if( viscous ) then
-          rlv1 => rlv(1,1:,1:); rlv2 => rlv(2,1:,1:)
-       endif
+            if (blockIsMoving) then
+                ss1 => s(1:, 1, 1:, :); ss2 => s(1:, 2, 1:, :)
+            end if
 
-       if(equations == RANSEquations) dd2Wall => d2Wall(2,:,:)
-       subface_iBegOr = jBegOr
-       subface_iEndOr = jEndOr
+            iblank2 => iblank(1:, 2, 1:)
+            viscPointer => viscJminPointer
 
-       subface_jBegOr = kBegOr
-       subface_jEndOr = kEndOr
+            if (viscous) then
+                rlv1 => rlv(1:, 1, 1:); rlv2 => rlv(1:, 2, 1:)
+            end if
 
-       !===============================================================
+            if (equations == RANSEquations) dd2Wall => d2Wall(:, 2, :)
 
-    case (iMax)
-       rangeFace(1,1:2) = rangeCell(2,1:2)
-       rangeFace(2,1:2) = rangeCell(3,1:2)
-       iiMax = jl; jjMax = kl
+            subface_iBegOr = iBegOr
+            subface_iEndOr = iEndOr
 
-       xx1    => x(ie-1,:,:,:);  xx2   => x(il-1,:,:,:)
-       !
-       ww1    => w(ie,1:,1:,:);   ww2    => w(il,1:,1:,:)
-       ss => si(il,:,:,:) ; fact = one
+            subface_jBegOr = kBegOr
+            subface_jEndOr = kEndOr
 
-       pp1    => p(ie,1:,1:);     pp2    => p(il,1:,1:)
-       gamma1 => gamma(ie,1:,1:); gamma2 => gamma(il,1:,1:)
+            !===============================================================
 
-       if( blockIsMoving)then
-          ss1    => s(ie-1,1:,1:,:);   ss2    => s(ie,1:,1:,:)
-       end if
+        case (jMax)
+            rangeFace(1, 1:2) = rangeCell(1, 1:2)
+            rangeFace(2, 1:2) = rangeCell(3, 1:2)
+            iiMax = il; jjMax = kl
 
-       iblank2 => iblank(il,1:,1:)
-       viscPointer => viscImaxPointer
+            xx1 => x(:, je - 1, :, :); xx2 => x(:, jl - 1, :, :)
+            !
+            ww1 => w(1:, je, 1:, :); ww2 => w(1:, jl, 1:, :)
+            ss => sj(:, jl, :, :); fact = one
 
-       if( viscous ) then
-          rlv1 => rlv(ie,1:,1:); rlv2 => rlv(il,1:,1:)
-       endif
+            pp1 => p(1:, je, 1:); pp2 => p(1:, jl, 1:)
+            gamma1 => gamma(1:, je, 1:); gamma2 => gamma(1:, jl, 1:)
 
-       if(equations == RANSEquations) dd2Wall => d2Wall(il,:,:)
+            if (blockIsMoving) then
+                ss1 => s(1:, je - 1, 1:, :); ss2 => s(1:, je, 1:, :)
+            end if
 
-       subface_iBegOr = jBegOr
-       subface_iEndOr = jEndOr
+            iblank2 => iblank(1:, jl, 1:)
+            viscPointer => viscJmaxPointer
 
-       subface_jBegOr = kBegOr
-       subface_jEndOr = kEndOr
+            if (viscous) then
+                rlv1 => rlv(1:, je, 1:); rlv2 => rlv(1:, jl, 1:)
+            end if
 
-       !===============================================================
+            if (equations == RANSEquations) dd2Wall => d2Wall(:, jl, :)
 
-    case (jMin)
-       rangeFace(1,1:2) = rangeCell(1,1:2)
-       rangeFace(2,1:2) = rangeCell(3,1:2)
-       iiMax = il; jjMax = kl
+            subface_iBegOr = iBegOr
+            subface_iEndOr = iEndOr
 
-       xx1    => x(:,0,:,:);  xx2   => x(:,1,:,:)
-       !
-       ww1    => w(1:,1,1:,:);   ww2    => w(1:,2,1:,:)
-       ss => sj(:,1,:,:) ; fact = -one
+            subface_jBegOr = kBegOr
+            subface_jEndOr = kEndOr
 
-       pp1    => p(1:,1,1:);     pp2    => p(1:,2,1:)
-       gamma1 => gamma(1:,1,1:); gamma2 => gamma(1:,2,1:)
+            !===============================================================
 
-       if( blockIsMoving)then
-          ss1    => s(1:,1,1:,:);   ss2    => s(1:,2,1:,:)
-       end if
+        case (kMin)
+            rangeFace(1, 1:2) = rangeCell(1, 1:2)
+            rangeFace(2, 1:2) = rangeCell(2, 1:2)
+            iiMax = il; jjMax = jl
 
-       iblank2 => iblank(1:,2,1:)
-       viscPointer => viscJminPointer
+            xx1 => x(:, :, 0, :); xx2 => x(:, :, 1, :)
+            !
+            ww1 => w(1:, 1:, 1, :); ww2 => w(1:, 1:, 2, :)
+            ss => sk(:, :, 1, :); fact = -one
 
-       if( viscous ) then
-          rlv1 => rlv(1:,1,1:); rlv2 => rlv(1:,2,1:)
-       endif
+            pp1 => p(1:, 1:, 1); pp2 => p(1:, 1:, 2)
+            gamma1 => gamma(1:, 1:, 1); gamma2 => gamma(1:, 1:, 2)
 
-       if(equations == RANSEquations) dd2Wall => d2Wall(:,2,:)
+            if (blockIsMoving) then
+                ss1 => s(1:, 1:, 1, :); ss2 => s(1:, 1:, 2, :)
+            end if
 
+            iblank2 => iblank(1:, 1:, 2)
+            viscPointer => viscKminPointer
 
-       subface_iBegOr = iBegOr
-       subface_iEndOr = iEndOr
+            if (viscous) then
+                rlv1 => rlv(1:, 1:, 1); rlv2 => rlv(1:, 1:, 2)
+            end if
 
-       subface_jBegOr = kBegOr
-       subface_jEndOr = kEndOr
+            if (equations == RANSEquations) dd2Wall => d2Wall(:, :, 2)
 
-       !===============================================================
+            subface_iBegOr = iBegOr
+            subface_iEndOr = iEndOr
 
-    case (jMax)
-       rangeFace(1,1:2) = rangeCell(1,1:2)
-       rangeFace(2,1:2) = rangeCell(3,1:2)
-       iiMax = il; jjMax = kl
+            subface_jBegOr = jBegOr
+            subface_jEndOr = jEndOr
 
-       xx1    => x(:,je-1,:,:);  xx2   => x(:,jl-1,:,:)
-       !
-       ww1    => w(1:,je,1:,:);   ww2    => w(1:,jl,1:,:)
-       ss => sj(:,jl,:,:); fact = one
+            !===============================================================
 
-       pp1    => p(1:,je,1:);     pp2    => p(1:,jl,1:)
-       gamma1 => gamma(1:,je,1:); gamma2 => gamma(1:,jl,1:)
+        case (kMax)
+            rangeFace(1, 1:2) = rangeCell(1, 1:2)
+            rangeFace(2, 1:2) = rangeCell(2, 1:2)
+            iiMax = il; jjMax = jl
 
-       if( blockIsMoving)then
-          ss1    => s(1:,je-1,1:,:);   ss2    => s(1:,je,1:,:)
-       end if
+            xx1 => x(:, :, ke - 1, :); xx2 => x(:, :, kl - 1, :)
+            !
+            ww1 => w(1:, 1:, ke, :); ww2 => w(1:, 1:, kl, :)
+            ss => sk(:, :, kl, :); fact = one
 
-       iblank2 => iblank(1:,jl,1:)
-       viscPointer => viscJmaxPointer
+            pp1 => p(1:, 1:, ke); pp2 => p(1:, 1:, kl)
+            gamma1 => gamma(1:, 1:, ke); gamma2 => gamma(1:, 1:, kl)
 
-       if( viscous ) then
-          rlv1 => rlv(1:,je,1:); rlv2 => rlv(1:,jl,1:)
-       endif
+            if (blockIsMoving) then
+                ss1 => s(1:, 1:, ke - 1, :); ss2 => s(1:, 1:, ke, :)
+            end if
 
-       if(equations == RANSEquations) dd2Wall => d2Wall(:,jl,:)
+            iblank2 => iblank(1:, 1:, kl)
+            viscPointer => viscKmaxPointer
 
-       subface_iBegOr = iBegOr
-       subface_iEndOr = iEndOr
+            if (viscous) then
+                rlv1 => rlv(1:, 1:, ke); rlv2 => rlv(1:, 1:, kl)
+            end if
 
-       subface_jBegOr = kBegOr
-       subface_jEndOr = kEndOr
+            if (equations == RANSEquations) dd2Wall => d2Wall(:, :, kl)
 
-       !===============================================================
+            subface_iBegOr = iBegOr
+            subface_iEndOr = iEndOr
 
-    case (kMin)
-       rangeFace(1,1:2) = rangeCell(1,1:2)
-       rangeFace(2,1:2) = rangeCell(2,1:2)
-       iiMax = il; jjMax = jl
+            subface_jBegOr = jBegOr
+            subface_jEndOr = jEndOr
 
-       xx1    => x(:,:,0,:);  xx2   => x(:,:,1,:)
-       !
-       ww1    => w(1:,1:,1,:);   ww2    => w(1:,1:,2,:)
-       ss => sk(:,:,1,:);  fact = -one
+        end select
+        !
+        !       The actual part for storing the data. Determine the variable
+        !       to be written and loop over the boundary faces of the subface.
+        !
+        ! Determine the variable to be written.
 
-       pp1    => p(1:,1:,1);     pp2    => p(1:,1:,2)
-       gamma1 => gamma(1:,1:,1); gamma2 => gamma(1:,1:,2)
+        varName:select case(solName)
 
-       if( blockIsMoving)then
-          ss1    => s(1:,1:,1,:);   ss2    => s(1:,1:,2,:)
-       end if
+        case (cgnsDensity)
 
-       iblank2 => iblank(1:,1:,2)
-       viscPointer => viscKminPointer
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
+                buffer(nn) = half*(ww1(i, j, irho) + ww2(i, j, irho))
+            end do
+        end do
 
-       if( viscous ) then
-          rlv1 => rlv(1:,1:,1); rlv2 => rlv(1:,1:,2)
-       endif
+        !===============================================================
 
-       if(equations == RANSEquations) dd2Wall => d2Wall(:,:,2)
+        case (cgnsPressure)
 
-       subface_iBegOr = iBegOr
-       subface_iEndOr = iEndOr
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
+                buffer(nn) = half*(pp1(i, j) + pp2(i, j))
+            end do
+        end do
 
-       subface_jBegOr = jBegOr
-       subface_jEndOr = jEndOr
+        !===============================================================
 
-       !===============================================================
+        case (cgnsTemp)
 
-    case (kMax)
-       rangeFace(1,1:2) = rangeCell(1,1:2)
-       rangeFace(2,1:2) = rangeCell(2,1:2)
-       iiMax = il; jjMax = jl
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
+                buffer(nn) = (pp1(i, j) + pp2(i, j)) &
+                             /(RGas*(ww1(i, j, irho) + ww2(i, j, irho)))
+            end do
+        end do
 
-       xx1    => x(:,:,ke-1,:);  xx2   => x(:,:,kl-1,:)
-       !
-       ww1    => w(1:,1:,ke,:);   ww2    => w(1:,1:,kl,:)
-       ss => sk(:,:,kl,:);  fact = one
+        !===============================================================
 
-       pp1    => p(1:,1:,ke);     pp2    => p(1:,1:,kl)
-       gamma1 => gamma(1:,1:,ke); gamma2 => gamma(1:,1:,kl)
+        case (cgnsVelx)
 
-       if( blockIsMoving)then
-          ss1    => s(1:,1:,ke-1,:);   ss2    => s(1:,1:,ke,:)
-       end if
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
+                if (viscousSurfaceVelocities .and. viscous) then
+                    buffer(nn) = ww2(i, j, ivx)
+                else
+                    buffer(nn) = half*(ww1(i, j, ivx) + ww2(i, j, ivx))
+                end if
+            end do
+        end do
 
-       iblank2 => iblank(1:,1:,kl)
-       viscPointer => viscKmaxPointer
+        !===============================================================
 
-       if( viscous ) then
-          rlv1 => rlv(1:,1:,ke); rlv2 => rlv(1:,1:,kl)
-       endif
+        case (cgnsVely)
 
-       if(equations == RANSEquations) dd2Wall => d2Wall(:,:,kl)
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
+                if (viscousSurfaceVelocities .and. viscous) then
+                    buffer(nn) = ww2(i, j, ivy)
+                else
+                    buffer(nn) = half*(ww1(i, j, ivy) + ww2(i, j, ivy))
+                end if
+            end do
+        end do
 
-       subface_iBegOr = iBegOr
-       subface_iEndOr = iEndOr
+        !===============================================================
 
-       subface_jBegOr = jBegOr
-       subface_jEndOr = jEndOr
+        case (cgnsVelz)
 
-    end select
-    !
-    !       The actual part for storing the data. Determine the variable
-    !       to be written and loop over the boundary faces of the subface.
-    !
-    ! Determine the variable to be written.
-
-    varName: select case (solName)
-
-    case (cgnsDensity)
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-             buffer(nn) = half*(ww1(i,j,irho) + ww2(i,j,irho))
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (cgnsPressure)
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-             buffer(nn) = half*(pp1(i,j) + pp2(i,j))
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (cgnsTemp)
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-             buffer(nn) = (pp1(i,j) + pp2(i,j)) &
-                  / (RGas*(ww1(i,j,irho) + ww2(i,j,irho)))
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (cgnsVelx)
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-             if (viscousSurfaceVelocities .and. viscous) then
-                buffer(nn) = ww2(i,j,ivx)
-             else
-                buffer(nn) = half*(ww1(i,j,ivx) + ww2(i,j,ivx))
-             end if
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (cgnsVely)
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-             if (viscousSurfaceVelocities .and. viscous) then
-                buffer(nn) = ww2(i,j,ivy)
-             else
-                buffer(nn) = half*(ww1(i,j,ivy) + ww2(i,j,ivy))
-             end if
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (cgnsVelz)
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-             if (viscousSurfaceVelocities .and. viscous) then
-                buffer(nn) = ww2(i,j,ivz)
-             else
-                buffer(nn) = half*(ww1(i,j,ivz) + ww2(i,j,ivz))
-             end if
-
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (cgnsRelVelx)
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-             if (viscousSurfaceVelocities .and. viscous) then
-                buffer(nn) = ww2(i,j,ivx) - ss2(i,j,1)
-             else
-                buffer(nn) = half*(ww1(i,j,ivx) + ww2(i,j,ivx))-half*(ss1(i,j,1) + ss2(i,j,1))
-             end if
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (cgnsRelVely)
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-             if (viscousSurfaceVelocities .and. viscous) then
-                buffer(nn) = ww2(i,j,ivy) - ss2(i,j,2)
-             else
-                buffer(nn) = half*(ww1(i,j,ivy) + ww2(i,j,ivy))-half*(ss1(i,j,2) + ss2(i,j,2))
-             end if
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (cgnsRelVelz)
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-             if (viscousSurfaceVelocities .and. viscous) then
-                buffer(nn) = ww2(i,j,ivz) - ss2(i,j,3)
-             else
-                buffer(nn) = half*(ww1(i,j,ivz) + ww2(i,j,ivz))-half*(ss1(i,j,3) + ss2(i,j,3))
-             end if
-          enddo
-       enddo
-
-
-       !================================================================
-
-    case (cgnsCp)
-       ! Calclulating the square of (dimensional) inflow velocity from MachCoef
-       !
-       ! Same formula used in referenceState (see initializeFlow.F90),
-       ! multiplied by the square of the reference velocity (uRef).
-       ! MachCoef is initialized in inputParamRoutines.F90 and can also be passed from the python layer
-       ! Note that the reference quantities (such as pRef, uRef, rhoInfDim, ..) are defined in  module
-       ! flowVarRefState (see flowVarRefState.F90) and first set in the subroutine referenceState
-       ! (see initializeFlow.F90).
-       uInfDim2 = (MachCoef*MachCoef*gammaInf*pInf/rhoInf)*uRef*uRef
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-             ! Get frame rotation rate and local surface coordinates
-             ! by averaging wall and halo cell centers
-             ! (xx1,xx2 are pointers to the mesh coordinates, see block.F90)
-             rrate_=cgnsdoms(1)%rotrate
-             r_(1) =   (half*(xx1(i,j,1) + xx2(i,j,1)))
-             r_(2) =   (half*(xx1(i,j,2) + xx2(i,j,2)))
-             r_(3) =   (half*(xx1(i,j,3) + xx2(i,j,3)))
-             ! calc cross-product between rotation rate and r_
-             ! to obtain local apparent wall velocity
-             wCrossR(1) = rrate_(2)*r_(3) - rrate_(3)*r_(2)
-             wCrossR(2) = rrate_(3)*r_(1) - rrate_(1)*r_(3)
-             wCrossR(3) = rrate_(1)*r_(2) - rrate_(2)*r_(1)
-             rot_speed2 = wCrossR(1)**2 +wCrossR(2)**2 +wCrossR(3)**2
-             buffer(nn) = ((half*(pp1(i,j) + pp2(i,j)) - pInf)*pRef) &
-                  / (half*(rhoInfDim)*(uInfDim2 + rot_speed2))
-             ! Comments on the Cp (buffer(nn)) calculation above:
-             !
-             ! Cp = (P_i - P_0) / (0.5*rho*(U_a)^2)
-             !
-             ! Numerator (dimensionalized):
-             !     (P_i-P_0) -> (half*(pp1(i,j)+pp2(i,j))-pInf) * pRef
-             !     P_i is given by the average of the wall and halo cell
-             !     (see comment at the beginning of storeSurfsolInBuffer)
-             !     pp1, pp2 are (nondimensional) pressure pointers, e.g. pp1 => p(1,1:,1:)
-             !
-             ! Denominator (dimensionalized): (0.5*rho*(U_a)^2) ->
-             !       (half*(rhoInfDim)*(uInfDim2 + rot_speed2))
-             !       The local velocity term includes the rotational components!
-          enddo
-       enddo
-
-    case (cgnsPtotloss)
-
-       ! First compute the total pressure of the free stream.
-
-       call computePtot(rhoInf, uInf, zero, zero, &
-            pInf, ptotInf)
-       ptotInf = one/ptotInf
-
-       ! Loop over the faces and compute the total pressure loss.
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-
-             psurf = half*(pp1(i,j) + pp2(i,j))
-             rsurf = half*(ww1(i,j,irho) + ww2(i,j,irho))
-             usurf = half*(ww1(i,j,ivx)  + ww2(i,j,ivx))
-             vsurf = half*(ww1(i,j,ivy)  + ww2(i,j,ivy))
-             wsurf = half*(ww1(i,j,ivz)  + ww2(i,j,ivz))
-
-             call computePtot(rsurf, usurf, vsurf, wsurf, &
-                  psurf, ptot)
-
-             nn = nn + 1
-             buffer(nn) = one - ptot*ptotInf
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (cgnsMach)
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-
-             psurf  = half*(pp1(i,j) + pp2(i,j))
-             rsurf  = half*(ww1(i,j,irho) + ww2(i,j,irho))
-             usurf  = half*(ww1(i,j,ivx)  + ww2(i,j,ivx))
-             vsurf  = half*(ww1(i,j,ivy)  + ww2(i,j,ivy))
-             wsurf  = half*(ww1(i,j,ivz)  + ww2(i,j,ivz))
-             m2surf = rsurf*(usurf**2 + vsurf**2 + wsurf**2) &
-                  / (half*(gamma1(i,j) + gamma2(i,j))*psurf)
-
-             nn = nn + 1
-             buffer(nn) = sqrt(m2surf)
-          enddo
-       enddo
-
-
-       !===============================================================
-
-    case (cgnsRelMach)
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-
-             psurf  = half*(pp1(i,j) + pp2(i,j))
-             rsurf  = half*(ww1(i,j,irho) + ww2(i,j,irho))
-             usurf  = half*(ww1(i,j,ivx)  + ww2(i,j,ivx))-half*(ss1(i,j,1) + ss2(i,j,1))
-             vsurf  = half*(ww1(i,j,ivy)  + ww2(i,j,ivy))-half*(ss1(i,j,2) + ss2(i,j,2))
-             wsurf  = half*(ww1(i,j,ivz)  + ww2(i,j,ivz))-half*(ss1(i,j,3) + ss2(i,j,3))
-             m2surf = rsurf*(usurf**2 + vsurf**2 + wsurf**2) &
-                  / (half*(gamma1(i,j) + gamma2(i,j))*psurf)
-
-             nn = nn + 1
-             buffer(nn) = sqrt(m2surf)
-          enddo
-       enddo
-
-       !        ================================================================
-
-    case (cgnsSkinFmag, cgnsYplus, &
-         cgnsSkinFx, cgnsSkinFy, cgnsSkinFz)
-
-       ! To avoid a lot of code duplication these 5 variables are
-       ! treated together.
-
-       ! Multiplication factor to obtain the skin friction from
-       ! the wall shear stress.
-
-       fact = two/(gammaInf*pInf*MachCoef*MachCoef)
-
-       ! Loop over the given range of faces. As the viscous data is
-       ! only present in the owned faces, the values of the halo's
-       ! are set equal to the nearest physical face. Therefore the
-       ! working indices are ii and jj.
-       do j=rangeFace(2,1), rangeFace(2,2)
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
+                if (viscousSurfaceVelocities .and. viscous) then
+                    buffer(nn) = ww2(i, j, ivz)
+                else
+                    buffer(nn) = half*(ww1(i, j, ivz) + ww2(i, j, ivz))
+                end if
 
-         ! if statements are used to copy the value of the interior
-         ! cell since the value isn't defined in the rind cell
+            end do
+        end do
 
-         if (present(jBeg) .and. present(jEnd) .and. (useRindLayer)) then
-            jor = j + subface_jBegOr - 1
-            if (jor == jBeg) then
-               jj = j + 1
-            else if (jor == jEnd +1 ) then
-               jj = j - 1
-            else
-               jj = j
-            endif
-         else
-            jj = j
-
-         end if
-
-          do i=rangeFace(1,1), rangeFace(1,2)
-             if (present(iBeg) .and. present( iEnd) .and. (useRindLayer)) then
-               ior = i + subface_iBegOr - 1
-               if (ior == iBeg) then
-                  ii = i + 1
-               else if (ior == iEnd + 1) then
-                  ii = i - 1
-               else
-                  ii = i
-               endif
-            else
-               ii = i
-            endif
+        !===============================================================
 
-             ! Determine the viscous subface on which this
-             ! face is located.
+        case (cgnsRelVelx)
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
+                if (viscousSurfaceVelocities .and. viscous) then
+                    buffer(nn) = ww2(i, j, ivx) - ss2(i, j, 1)
+                else
+                    buffer(nn) = half*(ww1(i, j, ivx) + ww2(i, j, ivx)) - half*(ss1(i, j, 1) + ss2(i, j, 1))
+                end if
+            end do
+        end do
 
-             mm = viscPointer(ii,jj)
+        !===============================================================
 
-             ! Store the 6 components of the viscous stress tensor
-             ! a bit easier.
+        case (cgnsRelVely)
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
+                if (viscousSurfaceVelocities .and. viscous) then
+                    buffer(nn) = ww2(i, j, ivy) - ss2(i, j, 2)
+                else
+                    buffer(nn) = half*(ww1(i, j, ivy) + ww2(i, j, ivy)) - half*(ss1(i, j, 2) + ss2(i, j, 2))
+                end if
+            end do
+        end do
 
-             tauxx = viscSubface(mm)%tau(ii,jj,1)
-             tauyy = viscSubface(mm)%tau(ii,jj,2)
-             tauzz = viscSubface(mm)%tau(ii,jj,3)
-             tauxy = viscSubface(mm)%tau(ii,jj,4)
-             tauxz = viscSubface(mm)%tau(ii,jj,5)
-             tauyz = viscSubface(mm)%tau(ii,jj,6)
+        !===============================================================
 
-             ! Compute the "unit" force on this face. The unit normal
-             ! is outward pointing per definition. A minus sign is
-             ! present, because of the definition of the viscous
-             ! stress tensor. Note that in the normal the indices i
-             ! and j could be used. However this is not done.
+        case (cgnsRelVelz)
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
+                if (viscousSurfaceVelocities .and. viscous) then
+                    buffer(nn) = ww2(i, j, ivz) - ss2(i, j, 3)
+                else
+                    buffer(nn) = half*(ww1(i, j, ivz) + ww2(i, j, ivz)) - half*(ss1(i, j, 3) + ss2(i, j, 3))
+                end if
+            end do
+        end do
+
+        !================================================================
+
+        case (cgnsCp)
+        ! Calclulating the square of (dimensional) inflow velocity from MachCoef
+        !
+        ! Same formula used in referenceState (see initializeFlow.F90),
+        ! multiplied by the square of the reference velocity (uRef).
+        ! MachCoef is initialized in inputParamRoutines.F90 and can also be passed from the python layer
+        ! Note that the reference quantities (such as pRef, uRef, rhoInfDim, ..) are defined in  module
+        ! flowVarRefState (see flowVarRefState.F90) and first set in the subroutine referenceState
+        ! (see initializeFlow.F90).
+        uInfDim2 = (MachCoef*MachCoef*gammaInf*pInf/rhoInf)*uRef*uRef
+
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
+                ! Get frame rotation rate and local surface coordinates
+                ! by averaging wall and halo cell centers
+                ! (xx1,xx2 are pointers to the mesh coordinates, see block.F90)
+                rrate_ = cgnsdoms(1)%rotrate
+                r_(1) = (half*(xx1(i, j, 1) + xx2(i, j, 1)))
+                r_(2) = (half*(xx1(i, j, 2) + xx2(i, j, 2)))
+                r_(3) = (half*(xx1(i, j, 3) + xx2(i, j, 3)))
+                ! calc cross-product between rotation rate and r_
+                ! to obtain local apparent wall velocity
+                wCrossR(1) = rrate_(2)*r_(3) - rrate_(3)*r_(2)
+                wCrossR(2) = rrate_(3)*r_(1) - rrate_(1)*r_(3)
+                wCrossR(3) = rrate_(1)*r_(2) - rrate_(2)*r_(1)
+                rot_speed2 = wCrossR(1)**2 + wCrossR(2)**2 + wCrossR(3)**2
+                buffer(nn) = ((half*(pp1(i, j) + pp2(i, j)) - pInf)*pRef) &
+                             /(half*(rhoInfDim)*(uInfDim2 + rot_speed2))
+                ! Comments on the Cp (buffer(nn)) calculation above:
+                !
+                ! Cp = (P_i - P_0) / (0.5*rho*(U_a)^2)
+                !
+                ! Numerator (dimensionalized):
+                !     (P_i-P_0) -> (half*(pp1(i,j)+pp2(i,j))-pInf) * pRef
+                !     P_i is given by the average of the wall and halo cell
+                !     (see comment at the beginning of storeSurfsolInBuffer)
+                !     pp1, pp2 are (nondimensional) pressure pointers, e.g. pp1 => p(1,1:,1:)
+                !
+                ! Denominator (dimensionalized): (0.5*rho*(U_a)^2) ->
+                !       (half*(rhoInfDim)*(uInfDim2 + rot_speed2))
+                !       The local velocity term includes the rotational components!
+            end do
+        end do
+
+        case (cgnsPtotloss)
+
+        ! First compute the total pressure of the free stream.
+
+        call computePtot(rhoInf, uInf, zero, zero, &
+                         pInf, ptotInf)
+        ptotInf = one/ptotInf
+
+        ! Loop over the faces and compute the total pressure loss.
+
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+
+                psurf = half*(pp1(i, j) + pp2(i, j))
+                rsurf = half*(ww1(i, j, irho) + ww2(i, j, irho))
+                usurf = half*(ww1(i, j, ivx) + ww2(i, j, ivx))
+                vsurf = half*(ww1(i, j, ivy) + ww2(i, j, ivy))
+                wsurf = half*(ww1(i, j, ivz) + ww2(i, j, ivz))
+
+                call computePtot(rsurf, usurf, vsurf, wsurf, &
+                                 psurf, ptot)
 
-             norm(1) = BCData(mm)%norm(ii,jj,1)
-             norm(2) = BCData(mm)%norm(ii,jj,2)
-             norm(3) = BCData(mm)%norm(ii,jj,3)
+                nn = nn + 1
+                buffer(nn) = one - ptot*ptotInf
+            end do
+        end do
 
-             fx = -(tauxx*norm(1) + tauxy*norm(2) + tauxz*norm(3))
-             fy = -(tauxy*norm(1) + tauyy*norm(2) + tauyz*norm(3))
-             fz = -(tauxz*norm(1) + tauyz*norm(2) + tauzz*norm(3))
+        !===============================================================
 
-             fn = fx*norm(1) + fy*norm(2) + fz*norm(3)
+        case (cgnsMach)
 
-             fx = fx - fn*norm(1)
-             fy = fy - fn*norm(2)
-             fz = fz - fn*norm(3)
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
 
-             ! Determine the variable to be stored and compute it.
-             ! Note that an offset of -1 must be used in dd2Wall,
-             ! because the original array, d2Wall, starts at 2.
-             ! First update the counter nn.
+                psurf = half*(pp1(i, j) + pp2(i, j))
+                rsurf = half*(ww1(i, j, irho) + ww2(i, j, irho))
+                usurf = half*(ww1(i, j, ivx) + ww2(i, j, ivx))
+                vsurf = half*(ww1(i, j, ivy) + ww2(i, j, ivy))
+                wsurf = half*(ww1(i, j, ivz) + ww2(i, j, ivz))
+                m2surf = rsurf*(usurf**2 + vsurf**2 + wsurf**2) &
+                         /(half*(gamma1(i, j) + gamma2(i, j))*psurf)
 
-             nn = nn + 1
+                nn = nn + 1
+                buffer(nn) = sqrt(m2surf)
+            end do
+        end do
 
-             select case (solName)
-             case (cgnsSkinFmag)
-                buffer(nn) = fact*sqrt(fx*fx + fy*fy + fz*fz)
+        !===============================================================
 
-             case (cgnsSkinFx)
-                buffer(nn) = fact*fx
+        case (cgnsRelMach)
 
-             case (cgnsSkinFy)
-                buffer(nn) = fact*fy
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
 
-             case (cgnsSkinFz)
-                buffer(nn) = fact*fz
+                psurf = half*(pp1(i, j) + pp2(i, j))
+                rsurf = half*(ww1(i, j, irho) + ww2(i, j, irho))
+                usurf = half*(ww1(i, j, ivx) + ww2(i, j, ivx)) - half*(ss1(i, j, 1) + ss2(i, j, 1))
+                vsurf = half*(ww1(i, j, ivy) + ww2(i, j, ivy)) - half*(ss1(i, j, 2) + ss2(i, j, 2))
+                wsurf = half*(ww1(i, j, ivz) + ww2(i, j, ivz)) - half*(ss1(i, j, 3) + ss2(i, j, 3))
+                m2surf = rsurf*(usurf**2 + vsurf**2 + wsurf**2) &
+                         /(half*(gamma1(i, j) + gamma2(i, j))*psurf)
 
-             case (cgnsYplus)
-                rsurf      = half*(ww1(ii,jj,irho) + ww2(ii,jj,irho))
-                musurf     = half*(rlv1(ii,jj)     + rlv2(ii,jj))
-                buffer(nn) = sqrt(rsurf*sqrt(fx*fx + fy*fy + fz*fz)) &
-                     * dd2Wall(ii-1,jj-1)/musurf
-             end select
+                nn = nn + 1
+                buffer(nn) = sqrt(m2surf)
+            end do
+        end do
 
-          enddo
-       enddo
+        !        ================================================================
 
-       !        ================================================================
+        case (cgnsSkinFmag, cgnsYplus, &
+              cgnsSkinFx, cgnsSkinFy, cgnsSkinFz)
 
-    case (cgnsStanton)
+        ! To avoid a lot of code duplication these 5 variables are
+        ! treated together.
 
-       ! Some constants needed to compute the stanton number.
+        ! Multiplication factor to obtain the skin friction from
+        ! the wall shear stress.
 
-       gm1   = gammaInf - one
-       a2Tot = gammaInf*pInf*(one + half*gm1*MachCoef*MachCoef) &
-            / rhoInf
-       fact   = MachCoef*sqrt(gammaInf*pInf*rhoInf)/gm1
+        fact = two/(gammaInf*pInf*MachCoef*MachCoef)
 
-       ! Loop over the given range of faces. As the viscous data is
-       ! only present in the owned faces, the values of the halo's
-       ! are set equal to the nearest physical face. Therefore the
-       ! working indices are ii and jj.
-       do j=rangeFace(2,1), rangeFace(2,2)
+        ! Loop over the given range of faces. As the viscous data is
+        ! only present in the owned faces, the values of the halo's
+        ! are set equal to the nearest physical face. Therefore the
+        ! working indices are ii and jj.
+        do j = rangeFace(2, 1), rangeFace(2, 2)
 
-         ! if statements are used to copy the value of the interior
-         ! cell since the value isn't defined in the rind cell
+            ! if statements are used to copy the value of the interior
+            ! cell since the value isn't defined in the rind cell
 
-         if (present(jBeg) .and. present(jEnd) .and. (useRindLayer)) then
-            jor = j + jBegOr - 1
-            if (jor == jBeg) then
-               jj = j + 1
-            else if (jor == jEnd + 1) then
-               jj = j - 1
-            else
-               jj = j
-            endif
-         else
-            jj = j
-
-         end if
-
-          do i=rangeFace(1,1), rangeFace(1,2)
-             if (present(iBeg) .and. present( iEnd) .and. (useRindLayer)) then
-               ior = i + iBegor - 1
-               if (ior == iBeg) then
-                  ii = i + 1
-               else if (ior == iEnd + 1) then
-                  ii = i - 1
-               else
-                  ii = i
-               endif
+            if (present(jBeg) .and. present(jEnd) .and. (useRindLayer)) then
+                jor = j + subface_jBegOr - 1
+                if (jor == jBeg) then
+                    jj = j + 1
+                else if (jor == jEnd + 1) then
+                    jj = j - 1
+                else
+                    jj = j
+                end if
             else
-               ii = i
-            endif
-             ! Determine the viscous subface on which this
-             ! face is located.
-
-             mm = viscPointer(ii,jj)
-
-             ! Compute the heat flux. Multipy with the sign of the
-             ! normal to obtain the correct value.
-
-             qw = viscSubface(mm)%q(ii,jj,1)*BCData(mm)%norm(ii,jj,1) &
-                  + viscSubface(mm)%q(ii,jj,2)*BCData(mm)%norm(ii,jj,2) &
-                  + viscSubface(mm)%q(ii,jj,3)*BCData(mm)%norm(ii,jj,3)
-
-             ! Compute the speed of sound squared at the wall and
-             ! the stanton number, which is stored in buffer.
-
-             a2 = half*(gamma1(ii,jj)   + gamma2(ii,jj)) &
-                  *      (pp1(ii,jj)      + pp2(ii,jj))    &
-                  /      (ww1(ii,jj,irho) + ww2(ii,jj,irho))
-
-             nn = nn + 1
-             buffer(nn) = qw/(fact*(a2Tot-a2))
-
-          enddo
-       enddo
-
-       !        ================================================================
-
-    case (cgnsBlank)
-
-       ! Loop over the given range of faces. Since iblanks are set
-       ! to 2 for boundary conditions and >= 10 for the boundary,
-       ! take the minimum of the value and 1, so that cells with
-       ! valid data always have an iblank of 1.
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-             buffer(nn) = real(min(iblank2(i,j), 1_intType), realType)
-          enddo
-       enddo
-
-    case (cgnsSepSensor)
-
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-             nn = nn + 1
-
-             ! Get normalized surface velocity:
-             v(1) = ww2(i, j, ivx)
-             v(2) = ww2(i, j, ivy)
-             v(3) = ww2(i, j, ivz)
-
-             ! Normalize
-             v = v / (sqrt(v(1)**2 + v(2)**2 + v(3)**2) + 1e-16)
-
-             ! Dot product with free stream
-             sensor = -dot_product(v, velDirFreeStream)
-
-             !Now run through a smooth heaviside function:
-             sensor = one/(one + exp(-2*sepSensorSharpness*(sensor - sepSensorOffset)))
-             buffer(nn) = sensor
-          enddo
-       enddo
-
-    case (cgnsCavitation)
-       fact = two/(gammaInf*pInf*MachCoef*MachCoef)
-       do j=rangeFace(2,1), rangeFace(2,2)
-          do i=rangeFace(1,1), rangeFace(1,2)
-
-             nn = nn + 1
-             ! Get local pressure
-             plocal = half*(pp1(i,j) + pp2(i,j))
-
-             sensor1 = (-(fact)*(plocal-pInf))- cavitationnumber
-             sensor1 = (sensor1**cavExponent)/(one + exp(2*cavSensorSharpness*(-sensor1 + cavSensorOffset)))
-             buffer(nn) = sensor1
-             !print*, sensor
-          enddo
-       enddo
-    end select varName
-
-  end subroutine storeSurfsolInBuffer
-
-  subroutine storeOldSolInBuffer(buffer, ind, wID, &
-       iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
-    !
-    !       storeOldSolInBuffer stores the given range of the wID'th
-    !       conservative variable of an old solution in buffer. Needed for
-    !       a time accurate restart. It is assumed that the variables in
-    !       blockPointers already point to the correct block.
-    !
-    use blockPointers
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: ind, wID
-    integer(kind=intType), intent(in) :: iBeg, iEnd, jBeg, jEnd
-    integer(kind=intType), intent(in) :: kBeg, kEnd
-
-    real(kind=realType), dimension(*), intent(out) :: buffer
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, nOld, nn
-
-    ! Store the index in wOld a bit easier.
-
-    nOld = ind - 1
-
-    ! Loop over the cell range of the block and copy the wID'th
-    ! variable in buffer.
-
-    nn = 0
-
-    do k=kBeg,kEnd
-       do j=jBeg,jEnd
-          do i=iBeg,iEnd
-             nn = nn + 1; buffer(nn) = wOld(nOld,i,j,k,wID)
-          enddo
-       enddo
-    enddo
-
-  end subroutine storeOldSolInBuffer
-
-  subroutine describeScheme(string)
-    !
-    !       describeScheme gives a short description about the scheme
-    !       used to obtain the solution. The description is stored in the
-    !       character array string.
-    !
-    use constants
-    use inputDiscretization
-    use inputPhysics
-    use flowVarRefState
-    use commonFormats, only : stringSpace, stringSci5
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    character(len=*), intent(out) :: string
-
-    character(len=maxStringLen) :: upwindFormat = "(A, F7.3, A)"
-
-    ! Write the basic scheme info.
-
-    select case(spaceDiscr)
-    case (dissScalar)
-       write(string, stringSci5) "Scalar dissipation scheme, k2 = ", vis2, ", k4 = ", vis4, "."
-    case (dissMatrix)
-       write(string, stringSci5) "Matrix dissipation scheme, k2 = ", vis2, ", k4 = ", vis4, "."
-    case (dissCusp)
-       write(string, stringSci5) "CUSP dissipation scheme, k2 = ", vis2, ", k4 = ", vis4, "."
-    case (upwind)
-       select case (limiter)
-       case (firstOrder)
-          write(string, stringSpace) "First order upwind scheme."
-       case (noLimiter)
-          write(string, upwindFormat) "Second order upwind scheme using linear reconstruction, &
-          &i.e. no limiter, kappa = ", kappaCoef, "."
-       case (vanAlbeda)
-          write(string, upwindFormat) "Second order upwind scheme with Van Albada limiter, &
-          &kappa =", kappaCoef, "."
-       case (minmod)
-          write(string, upwindFormat) "Second order upwind scheme with Minmod limiter, &
-          &kappa =", kappaCoef, "."
-       end select
-
-       select case (riemann)
-       case (Roe)
-          write(string, stringSpace) trim(string), "Roe's approximate Riemann Solver."
-       case (vanLeer)
-          write(string, stringSpace) trim(string), "Van Leer flux vector splitting."
-       case (ausmdv)
-          write(string, stringSpace) trim(string), "ausmdv flux vector splitting."
-       end select
-
-    end select
-
-    ! In case of the scalar dissipation scheme, write whether or not
-    ! directional scaling has been applied.
-
-    if(spaceDiscr == dissScalar) then
-       if( dirScaling ) then
-          write(string, "(2(A, 1X), ES12.5, A)") trim(string), "Directional scaling of dissipation with exponent", adis, "."
-       else
-          write(string, stringSpace) trim(string), "No directional scaling of dissipation."
-       endif
-    endif
-
-    ! For the Euler equations, write the inviscid wall boundary
-    ! condition treatment.
-
-    if(equations == EulerEquations) then
-       select case (eulerWallBcTreatment)
-       case (constantPressure)
-          write(string, stringSpace) trim(string), "Zero normal pressure gradient", &
-            "for inviscid wall boundary conditions."
-       case (linExtrapolPressure)
-          write(string, stringSpace) trim(string), "Linear extrapolation of normal pressure gradient", &
-            "for inviscid wall boundary conditions."
-       case (quadExtrapolPressure)
-          write(string, stringSpace) trim(string), "Quadratic extrapolation of normal pressure gradIent", &
-            "for inviscid wall boundary conditions."
-       case (normalMomentum)
-          write(string, stringSpace) trim(string), "Normal momentum equation used to determine pressure gradient", &
-            "for inviscid wall boundary conditions."
-       end select
-    endif
-
-    ! If preconditioning is used, write the preconditioner.
-
-    select case(precond)
-    case (Turkel)
-       write(string, stringSpace) trim(string), "Turkel preconditioner for inviscid fluxes."
-    case (ChoiMerkle)
-       write(string, stringSpace) trim(string), "Choi Merkle preconditioner for inviscid fluxes."
-    end select
-
-    ! For a viscous computation write that a central discretization
-    ! is used for the viscous fluxes.
-
-    if( viscous ) then
-       write(string, stringSpace) trim(string), "Central discretization for viscous fluxes."
-    endif
-
-  end subroutine describeScheme
-
-  subroutine isoSurfNames(solNames)
-    !
-    !       isoNames sets the names for the volume variables to be
-    !       written to the isosurfaces. Sids convention names are
-    !       used as much as possible.
-    !
-    use constants
-    use cgnsNames
-    use inputPhysics
-    use flowVarRefState
-    use extraOutput
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    character(len=*), dimension(*), intent(out) :: solNames
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-
-    ! Check the additional variables to be written -- there are no
-    ! default variables already written
-    nn = 0
-    if (isoWriteRho) then
-       nn = nn + 1
-       solNames(nn) = cgnsDensity
-    end if
-
-    if (isoWriteVx) then
-       nn = nn + 1
-       solNames(nn) = cgnsVelx
-    end if
-
-    if (isoWriteVy) then
-       nn = nn + 1
-       solNames(nn) = cgnsVely
-    end if
-
-    if (isoWriteVz) then
-       nn = nn + 1
-       solNames(nn) = cgnsVelz
-    end if
-
-    if (isoWriteP) then
-       nn = nn + 1
-       solNames(nn) = cgnsPressure
-    end if
-
-    if( isoWriteTurb ) then
-
-       select case(turbModel)
-
-       case(spalartAllmaras, spalartAllmarasEdwards)
-          nn = nn + 1
-          solNames(nn) = cgnsTurbSaNu
-
-       case(komegaWilcox, komegaModified, menterSST)
-          nn = nn + 1
-          solNames(nn) = cgnsTurbK
-          nn = nn + 1
-          solNames(nn) = cgnsTurbOmega
-
-       case(ktau)
-          nn = nn + 1
-          solNames(nn) = cgnsTurbK
-          nn = nn + 1
-          solNames(nn) = cgnsTurbTau
-
-       case(v2f)
-          nn = nn + 1
-          solNames(nn) = cgnsTurbK
-          nn = nn + 1
-          solNames(nn) = cgnsTurbEpsilon
-          nn = nn + 1
-          solNames(nn) = cgnsTurbV2
-          nn = nn + 1
-          solNames(nn) = cgnsTurbF
-
-       end select
-
-    endif
-
-    if( isoWriteMx )  then
-       nn = nn + 1
-       solNames(nn) = cgnsMomx
-    endif
-
-    if( isoWriteMy )  then
-       nn = nn + 1
-       solNames(nn) = cgnsMomy
-    endif
-
-    if( isoWriteMz )  then
-       nn = nn + 1
-       solNames(nn) = cgnsMomz
-    endif
-
-    if( isoWriteRVx )  then
-       nn = nn + 1
-       solNames(nn) = cgnsRelVelx
-    endif
-
-    if( isoWriteRVy )  then
-       nn = nn + 1
-       solNames(nn) = cgnsRelVely
-    endif
-
-    if( isoWriteRVz )  then
-       nn = nn + 1
-       solNames(nn) = cgnsRelVelz
-    endif
-
-    if( isoWriteRhoe ) then
-       nn = nn + 1
-       solNames(nn) = cgnsEnergy
-    endif
-
-    if( isoWriteTemp ) then
-       nn = nn + 1
-       solNames(nn) = cgnsTemp
-    endif
-
-    if( isoWriteCp ) then
-       nn = nn + 1
-       solNames(nn) = cgnsCp
-    endif
-
-    if( isoWriteMach ) then
-       nn = nn + 1
-       solNames(nn) = cgnsMach
-    endif
-
-    if( isoWriteRMach ) then
-       nn = nn + 1
-       solNames(nn) = cgnsRelMach
-    endif
-
-    if( isoWriteMachTurb ) then
-       nn = nn + 1
-       solNames(nn) = cgnsMachTurb
-    endif
-
-    if( isoWriteEddyVis ) then
-       nn = nn + 1
-       solNames(nn) = cgnsEddy
-    endif
-
-    if( isoWriteRatioEddyVis ) then
-       nn = nn + 1
-       solNames(nn) = cgnsEddyRatio
-    endif
-
-    if( isoWriteDist ) then
-       nn = nn + 1
-       solNames(nn) = cgNSWallDist
-    endif
-
-    if( isoWriteVort ) then
-       nn = nn + 1
-       solNames(nn) = cgnsVortMagn
-    endif
-
-    if( isoWriteVortx ) then
-       nn = nn + 1
-       solNames(nn) = cgnsVortx
-    endif
-
-    if( isoWriteVorty ) then
-       nn = nn + 1
-       solNames(nn) = cgnsVorty
-    endif
-
-    if( isoWriteVortz ) then
-       nn = nn + 1
-       solNames(nn) = cgnsVortz
-    endif
-
-    if( isoWritePtotloss ) then
-       nn = nn + 1
-       solNames(nn) = cgnsPtotloss
-    endif
-
-    if( isoWriteResRho ) then
-       nn = nn + 1
-       solNames(nn) = cgnsResRho
-    endif
-
-    if( isoWriteResMom ) then
-       nn = nn + 1
-       solNames(nn) = cgnsResMomx
-
-       nn = nn + 1
-       solNames(nn) = cgnsResMomy
-
-       nn = nn + 1
-       solNames(nn) = cgnsResMomz
-    endif
-
-    if( isoWriteResRhoe) then
-       nn = nn + 1
-       solNames(nn) = cgnsResRhoe
-    endif
-
-    if( isoWriteResTurb ) then
-
-       select case(turbModel)
-
-       case(spalartAllmaras, spalartAllmarasEdwards)
-          nn = nn + 1
-          solNames(nn) = cgnsResNu
-
-       case(komegaWilcox, komegaModified, menterSST)
-          nn = nn + 1
-          solNames(nn) = cgnsResK
-
-          nn = nn + 1
-          solNames(nn) = cgnsResOmega
-
-       case(ktau)
-          nn = nn + 1
-          solNames(nn) = cgnsResK
-
-          nn = nn + 1
-          solNames(nn) = cgnsResTau
-
-       case(v2f)
-          nn = nn + 1
-          solNames(nn) = cgnsResK
-
-          nn = nn + 1
-          solNames(nn) = cgnsResEpsilon
-
-          nn = nn + 1
-          solNames(nn) = cgnsResV2
-
-          nn = nn + 1
-          solNames(nn) = cgnsResF
-
-       end select
-
-    endif
+                jj = j
+
+            end if
+
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                if (present(iBeg) .and. present(iEnd) .and. (useRindLayer)) then
+                    ior = i + subface_iBegOr - 1
+                    if (ior == iBeg) then
+                        ii = i + 1
+                    else if (ior == iEnd + 1) then
+                        ii = i - 1
+                    else
+                        ii = i
+                    end if
+                else
+                    ii = i
+                end if
 
-    if (isoWriteShock) then
-       nn = nn + 1
-       solNames(nn) = cgnsShock
-    end if
+                ! Determine the viscous subface on which this
+                ! face is located.
 
-    if (isoWriteFilteredShock) then
-       nn = nn + 1
-       solNames(nn) = cgnsFilteredShock
-    end if
+                mm = viscPointer(ii, jj)
 
-    if( isoWriteBlank) then
-       nn = nn + 1
-       solNames(nn) = cgnsBlank
-    endif
+                ! Store the 6 components of the viscous stress tensor
+                ! a bit easier.
 
-  end subroutine isoSurfNames
+                tauxx = viscSubface(mm)%tau(ii, jj, 1)
+                tauyy = viscSubface(mm)%tau(ii, jj, 2)
+                tauzz = viscSubface(mm)%tau(ii, jj, 3)
+                tauxy = viscSubface(mm)%tau(ii, jj, 4)
+                tauxz = viscSubface(mm)%tau(ii, jj, 5)
+                tauyz = viscSubface(mm)%tau(ii, jj, 6)
 
-  subroutine setHelpVariablesWriting
-    !
-    !       setHelpVariablesWriting determines the variables, which are
-    !       needed to write the CGNS files.
-    !
-    use block
-    use cgnsGrid
-    use communication
-    use monitor
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr, nSend
-    integer, dimension(nProc) :: recvCounts, displs
+                ! Compute the "unit" force on this face. The unit normal
+                ! is outward pointing per definition. A minus sign is
+                ! present, because of the definition of the viscous
+                ! stress tensor. Note that in the normal the indices i
+                ! and j could be used. However this is not done.
 
-    integer(kind=intType) :: i, nn
+                norm(1) = BCData(mm)%norm(ii, jj, 1)
+                norm(2) = BCData(mm)%norm(ii, jj, 2)
+                norm(3) = BCData(mm)%norm(ii, jj, 3)
 
-    integer(kind=intType), dimension(cgnsNDom) :: tmp
-    integer(kind=intType), dimension(4,nDom)   :: buffer
+                fx = -(tauxx*norm(1) + tauxy*norm(2) + tauxz*norm(3))
+                fy = -(tauxy*norm(1) + tauyy*norm(2) + tauyz*norm(3))
+                fz = -(tauxz*norm(1) + tauyz*norm(2) + tauzz*norm(3))
 
-    ! Determine for each CGNS block how many (sub) blocks are stored
-    ! on this processor. Note that this info is the same for all
-    ! spectral solutions, so the 1st is fine.
+                fn = fx*norm(1) + fy*norm(2) + fz*norm(3)
 
-    allocate(nBlocksCGNSblock(0:cgnsNDom), blocksCGNSblock(nDom), &
-         stat=ierr)
-    if(ierr /= 0)                               &
-         call terminate("setHelpVariablesWriting", &
-         "Memory allocation failure for &
-         &nBlocksCGNSblock and blocksCGNSblock.")
+                fx = fx - fn*norm(1)
+                fy = fy - fn*norm(2)
+                fz = fz - fn*norm(3)
 
-    nBlocksCGNSblock = 0
-    do nn=1,nDom
-       i = flowDoms(nn,1,1)%cgnsBlockID
-       nBlocksCGNSblock(i) = nBlocksCGNSblock(i) + 1
-    enddo
+                ! Determine the variable to be stored and compute it.
+                ! Note that an offset of -1 must be used in dd2Wall,
+                ! because the original array, d2Wall, starts at 2.
+                ! First update the counter nn.
 
-    ! Put nBlocksCGNSblock in cumulative storage format.
-    ! Store this accumulated value in tmp, which serves as
-    ! a counter later on.
+                nn = nn + 1
 
-    do i=1,cgnsNDom
-       tmp(i)              = nBlocksCGNSblock(i-1)
-       nBlocksCGNSblock(i) = nBlocksCGNSblock(i) + tmp(i)
-    enddo
+                select case (solName)
+                case (cgnsSkinFmag)
+                    buffer(nn) = fact*sqrt(fx*fx + fy*fy + fz*fz)
 
-    ! Determine the values for blocksCGNSblock.
+                case (cgnsSkinFx)
+                    buffer(nn) = fact*fx
 
-    do nn=1,nDom
-       i = flowDoms(nn,1,1)%cgnsBlockID
-       tmp(i) = tmp(i) + 1
-       blocksCGNSblock(tmp(i)) = nn
-    enddo
+                case (cgnsSkinFy)
+                    buffer(nn) = fact*fy
 
-  end subroutine setHelpVariablesWriting
+                case (cgnsSkinFz)
+                    buffer(nn) = fact*fz
 
-  subroutine releaseHelpVariablesWriting
-    !
-    !       releaseHelpVariablesWriting releases the memory of the
-    !       variables, which were needed to write the CGNS files.
-    !
-    use cgnsGrid
-    use monitor
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables
-    !
-    integer :: ierr
+                case (cgnsYplus)
+                    rsurf = half*(ww1(ii, jj, irho) + ww2(ii, jj, irho))
+                    musurf = half*(rlv1(ii, jj) + rlv2(ii, jj))
+                    buffer(nn) = sqrt(rsurf*sqrt(fx*fx + fy*fy + fz*fz)) &
+                                 *dd2Wall(ii - 1, jj - 1)/musurf
+                end select
 
-    ! Release the memory of the allocatable arrays in outputMod.
+            end do
+        end do
 
-    deallocate(nBlocksCGNSblock, blocksCGNSblock, stat=ierr)
-    if(ierr /= 0)                                   &
-         call terminate("releaseHelpVariablesWriting", &
-         "Deallocation failure for nBlocksCGNSblock, &
-         &etc.")
+        !        ================================================================
 
-  end subroutine releaseHelpVariablesWriting
-  subroutine writeCGNSHeader(cgnsInd, base)
-    !
-    !       writeCGNSHeader writes a descriptive header to the given base
-    !       of the given CGNS file. Only processor 0 performs this task.
-    !
-    use constants
-    use cgnsGrid
-    use cgnsNames
-    use flowVarRefState
-    use su_cgns
-    use inputPhysics
-    use inputTimeSpectral
-    use monitor
-    use utils, only : terminate, setCGNSRealType
-    use commonFormats, only : strings
+        case (cgnsStanton)
 
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, intent(in) :: cgnsInd, base
-    !
-    !      Local variables.
-    !
-    integer :: ierr, realTypeCGNS
-
-    real(kind=cgnsRealType) :: val
-
-    character(len=2048) :: message
-    character(len=7)    :: integerString
-    character(len=12)   :: realString
+        ! Some constants needed to compute the stanton number.
 
-    ! Set the cgns real type.
+        gm1 = gammaInf - one
+        a2Tot = gammaInf*pInf*(one + half*gm1*MachCoef*MachCoef) &
+                /rhoInf
+        fact = MachCoef*sqrt(gammaInf*pInf*rhoInf)/gm1
 
-    realTypeCGNS = setCGNSRealType()
+        ! Loop over the given range of faces. As the viscous data is
+        ! only present in the owned faces, the values of the halo's
+        ! are set equal to the nearest physical face. Therefore the
+        ! working indices are ii and jj.
+        do j = rangeFace(2, 1), rangeFace(2, 2)
 
-    ! Go to the correct position in the CGNS file.
+            ! if statements are used to copy the value of the interior
+            ! cell since the value isn't defined in the rind cell
 
-    call cg_goto_f(cgnsInd, base, ierr, "end")
-    if(ierr /= CG_OK)                    &
-         call terminate("writeCGNSHeader", &
-         "Something wrong when calling cg_goto_f")
+            if (present(jBeg) .and. present(jEnd) .and. (useRindLayer)) then
+                jor = j + jBegOr - 1
+                if (jor == jBeg) then
+                    jj = j + 1
+                else if (jor == jEnd + 1) then
+                    jj = j - 1
+                else
+                    jj = j
+                end if
+            else
+                jj = j
+
+            end if
+
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                if (present(iBeg) .and. present(iEnd) .and. (useRindLayer)) then
+                    ior = i + iBegor - 1
+                    if (ior == iBeg) then
+                        ii = i + 1
+                    else if (ior == iEnd + 1) then
+                        ii = i - 1
+                    else
+                        ii = i
+                    end if
+                else
+                    ii = i
+                end if
+                ! Determine the viscous subface on which this
+                ! face is located.
 
-    ! Create a data class type node to indicate that nonDimensional
-    ! solution data is written for which the reference state
-    ! is known.
+                mm = viscPointer(ii, jj)
 
-    call cg_dataclass_write_f(NormalizedByDimensional,ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSHeader", &
-         "Something wrong when calling &
-         &cg_dataclass_write_f")
+                ! Compute the heat flux. Multipy with the sign of the
+                ! normal to obtain the correct value.
 
-    ! Write the info about the solver used.
+                qw = viscSubface(mm)%q(ii, jj, 1)*BCData(mm)%norm(ii, jj, 1) &
+                     + viscSubface(mm)%q(ii, jj, 2)*BCData(mm)%norm(ii, jj, 2) &
+                     + viscSubface(mm)%q(ii, jj, 3)*BCData(mm)%norm(ii, jj, 3)
 
-    call cg_descriptor_write_f("SolverInfo", &
-         "ADflow multiblock code", ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSHeader", &
-         "Something wrong when calling &
-         &cg_descriptor_write_f")
+                ! Compute the speed of sound squared at the wall and
+                ! the stanton number, which is stored in buffer.
 
-    ! Write the info about the scheme used; message is used as
-    ! storage for the string containing the scheme description.
+                a2 = half*(gamma1(ii, jj) + gamma2(ii, jj)) &
+                     *(pp1(ii, jj) + pp2(ii, jj)) &
+                     /(ww1(ii, jj, irho) + ww2(ii, jj, irho))
 
-    call describeScheme(message)
-    call cg_descriptor_write_f("DiscretizationScheme", message, ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSHeader", &
-         "Something wrong when calling &
-         &cg_descriptor_write_f")
+                nn = nn + 1
+                buffer(nn) = qw/(fact*(a2Tot - a2))
 
-    ! Write the similation type to the CGNS file.
+            end do
+        end do
 
-    select case (equationMode)
+        !        ================================================================
 
-    case (steady)
+        case (cgnsBlank)
 
-       ! Steady mode. Just write this info.
+        ! Loop over the given range of faces. Since iblanks are set
+        ! to 2 for boundary conditions and >= 10 for the boundary,
+        ! take the minimum of the value and 1, so that cells with
+        ! valid data always have an iblank of 1.
 
-       call cg_simulation_type_write_f(cgnsInd, base, &
-            nonTimeaccurate, ierr)
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_simulation_type_write_f")
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
+                buffer(nn) = real(min(iblank2(i, j), 1_intType), realType)
+            end do
+        end do
 
-       !===============================================================
+        case (cgnsSepSensor)
 
-    case (unsteady)
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
+                nn = nn + 1
 
-       ! Unsteady mode. First write the simulation type.
+                ! Get normalized surface velocity:
+                v(1) = ww2(i, j, ivx)
+                v(2) = ww2(i, j, ivy)
+                v(3) = ww2(i, j, ivz)
 
-       call cg_simulation_type_write_f(cgnsInd, base, &
-            timeaccurate, ierr)
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_simulation_type_write_f")
+                ! Normalize
+                v = v/(sqrt(v(1)**2 + v(2)**2 + v(3)**2) + 1e-16)
 
-       ! Write some additional stuff, like time step and
-       ! physical time. First store it in the big string message.
+                ! Dot product with free stream
+                sensor = -dot_product(v, velDirFreeStream)
 
-       write(integerString,"(i7)") timeStepUnsteady + &
-            nTimeStepsRestart
-       write(realString,"(es12.5)") timeUnsteady + &
-            timeUnsteadyRestart
+                !Now run through a smooth heaviside function:
+                sensor = one/(one + exp(-2*sepSensorSharpness*(sensor - sepSensorOffset)))
+                buffer(nn) = sensor
+            end do
+        end do
 
-       integerString = adjustl(integerString)
-       realString    = adjustl(realString)
+        case (cgnsCavitation)
+        fact = two/(gammaInf*pInf*MachCoef*MachCoef)
+        do j = rangeFace(2, 1), rangeFace(2, 2)
+            do i = rangeFace(1, 1), rangeFace(1, 2)
 
-       write(message, strings) "Unsteady time step ", trim(integerString),", physical time ", &
-         trim(realString), " seconds"
+                nn = nn + 1
+                ! Get local pressure
+                plocal = half*(pp1(i, j) + pp2(i, j))
+
+                sensor1 = (-(fact)*(plocal - pInf)) - cavitationnumber
+                sensor1 = (sensor1**cavExponent)/(one + exp(2*cavSensorSharpness*(-sensor1 + cavSensorOffset)))
+                buffer(nn) = sensor1
+                !print*, sensor
+            end do
+        end do
+        end select varName
+
+    end subroutine storeSurfsolInBuffer
+
+    subroutine storeOldSolInBuffer(buffer, ind, wID, &
+                                   iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        !
+        !       storeOldSolInBuffer stores the given range of the wID'th
+        !       conservative variable of an old solution in buffer. Needed for
+        !       a time accurate restart. It is assumed that the variables in
+        !       blockPointers already point to the correct block.
+        !
+        use blockPointers
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: ind, wID
+        integer(kind=intType), intent(in) :: iBeg, iEnd, jBeg, jEnd
+        integer(kind=intType), intent(in) :: kBeg, kEnd
+
+        real(kind=realType), dimension(*), intent(out) :: buffer
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nOld, nn
+
+        ! Store the index in wOld a bit easier.
+
+        nOld = ind - 1
+
+        ! Loop over the cell range of the block and copy the wID'th
+        ! variable in buffer.
+
+        nn = 0
+
+        do k = kBeg, kEnd
+            do j = jBeg, jEnd
+                do i = iBeg, iEnd
+                    nn = nn + 1; buffer(nn) = wOld(nOld, i, j, k, wID)
+                end do
+            end do
+        end do
+
+    end subroutine storeOldSolInBuffer
+
+    subroutine describeScheme(string)
+        !
+        !       describeScheme gives a short description about the scheme
+        !       used to obtain the solution. The description is stored in the
+        !       character array string.
+        !
+        use constants
+        use inputDiscretization
+        use inputPhysics
+        use flowVarRefState
+        use commonFormats, only: stringSpace, stringSci5
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        character(len=*), intent(out) :: string
+
+        character(len=maxStringLen) :: upwindFormat = "(A, F7.3, A)"
+
+        ! Write the basic scheme info.
+
+        select case (spaceDiscr)
+        case (dissScalar)
+            write (string, stringSci5) "Scalar dissipation scheme, k2 = ", vis2, ", k4 = ", vis4, "."
+        case (dissMatrix)
+            write (string, stringSci5) "Matrix dissipation scheme, k2 = ", vis2, ", k4 = ", vis4, "."
+        case (dissCusp)
+            write (string, stringSci5) "CUSP dissipation scheme, k2 = ", vis2, ", k4 = ", vis4, "."
+        case (upwind)
+            select case (limiter)
+            case (firstOrder)
+                write (string, stringSpace) "First order upwind scheme."
+            case (noLimiter)
+                write (string, upwindFormat) "Second order upwind scheme using linear reconstruction, &
+                &i.e. no limiter, kappa = ", kappaCoef, "."
+            case (vanAlbeda)
+                write (string, upwindFormat) "Second order upwind scheme with Van Albada limiter, &
+                &kappa =", kappaCoef, "."
+            case (minmod)
+                write (string, upwindFormat) "Second order upwind scheme with Minmod limiter, &
+                &kappa =", kappaCoef, "."
+            end select
+
+            select case (riemann)
+            case (Roe)
+                write (string, stringSpace) trim(string), "Roe's approximate Riemann Solver."
+            case (vanLeer)
+                write (string, stringSpace) trim(string), "Van Leer flux vector splitting."
+            case (ausmdv)
+                write (string, stringSpace) trim(string), "ausmdv flux vector splitting."
+            end select
+
+        end select
+
+        ! In case of the scalar dissipation scheme, write whether or not
+        ! directional scaling has been applied.
+
+        if (spaceDiscr == dissScalar) then
+            if (dirScaling) then
+                write (string, "(2(A, 1X), ES12.5, A)") trim(string), "Directional scaling of dissipation with exponent", adis, "."
+            else
+                write (string, stringSpace) trim(string), "No directional scaling of dissipation."
+            end if
+        end if
+
+        ! For the Euler equations, write the inviscid wall boundary
+        ! condition treatment.
+
+        if (equations == EulerEquations) then
+            select case (eulerWallBcTreatment)
+            case (constantPressure)
+                write (string, stringSpace) trim(string), "Zero normal pressure gradient", &
+                    "for inviscid wall boundary conditions."
+            case (linExtrapolPressure)
+                write (string, stringSpace) trim(string), "Linear extrapolation of normal pressure gradient", &
+                    "for inviscid wall boundary conditions."
+            case (quadExtrapolPressure)
+                write (string, stringSpace) trim(string), "Quadratic extrapolation of normal pressure gradIent", &
+                    "for inviscid wall boundary conditions."
+            case (normalMomentum)
+                write (string, stringSpace) trim(string), "Normal momentum equation used to determine pressure gradient", &
+                    "for inviscid wall boundary conditions."
+            end select
+        end if
+
+        ! If preconditioning is used, write the preconditioner.
+
+        select case (precond)
+        case (Turkel)
+            write (string, stringSpace) trim(string), "Turkel preconditioner for inviscid fluxes."
+        case (ChoiMerkle)
+            write (string, stringSpace) trim(string), "Choi Merkle preconditioner for inviscid fluxes."
+        end select
+
+        ! For a viscous computation write that a central discretization
+        ! is used for the viscous fluxes.
+
+        if (viscous) then
+            write (string, stringSpace) trim(string), "Central discretization for viscous fluxes."
+        end if
+
+    end subroutine describeScheme
+
+    subroutine isoSurfNames(solNames)
+        !
+        !       isoNames sets the names for the volume variables to be
+        !       written to the isosurfaces. Sids convention names are
+        !       used as much as possible.
+        !
+        use constants
+        use cgnsNames
+        use inputPhysics
+        use flowVarRefState
+        use extraOutput
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        character(len=*), dimension(*), intent(out) :: solNames
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        ! Check the additional variables to be written -- there are no
+        ! default variables already written
+        nn = 0
+        if (isoWriteRho) then
+            nn = nn + 1
+            solNames(nn) = cgnsDensity
+        end if
+
+        if (isoWriteVx) then
+            nn = nn + 1
+            solNames(nn) = cgnsVelx
+        end if
+
+        if (isoWriteVy) then
+            nn = nn + 1
+            solNames(nn) = cgnsVely
+        end if
+
+        if (isoWriteVz) then
+            nn = nn + 1
+            solNames(nn) = cgnsVelz
+        end if
+
+        if (isoWriteP) then
+            nn = nn + 1
+            solNames(nn) = cgnsPressure
+        end if
+
+        if (isoWriteTurb) then
+
+            select case (turbModel)
+
+            case (spalartAllmaras, spalartAllmarasEdwards)
+                nn = nn + 1
+                solNames(nn) = cgnsTurbSaNu
 
-       ! And write the info.
+            case (komegaWilcox, komegaModified, menterSST)
+                nn = nn + 1
+                solNames(nn) = cgnsTurbK
+                nn = nn + 1
+                solNames(nn) = cgnsTurbOmega
 
-       call cg_descriptor_write_f("UnsteadyInfo", message, ierr)
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_descriptor_write_f")
+            case (ktau)
+                nn = nn + 1
+                solNames(nn) = cgnsTurbK
+                nn = nn + 1
+                solNames(nn) = cgnsTurbTau
 
-       !===============================================================
+            case (v2f)
+                nn = nn + 1
+                solNames(nn) = cgnsTurbK
+                nn = nn + 1
+                solNames(nn) = cgnsTurbEpsilon
+                nn = nn + 1
+                solNames(nn) = cgnsTurbV2
+                nn = nn + 1
+                solNames(nn) = cgnsTurbF
+
+            end select
+
+        end if
+
+        if (isoWriteMx) then
+            nn = nn + 1
+            solNames(nn) = cgnsMomx
+        end if
+
+        if (isoWriteMy) then
+            nn = nn + 1
+            solNames(nn) = cgnsMomy
+        end if
+
+        if (isoWriteMz) then
+            nn = nn + 1
+            solNames(nn) = cgnsMomz
+        end if
+
+        if (isoWriteRVx) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelVelx
+        end if
+
+        if (isoWriteRVy) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelVely
+        end if
+
+        if (isoWriteRVz) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelVelz
+        end if
+
+        if (isoWriteRhoe) then
+            nn = nn + 1
+            solNames(nn) = cgnsEnergy
+        end if
+
+        if (isoWriteTemp) then
+            nn = nn + 1
+            solNames(nn) = cgnsTemp
+        end if
+
+        if (isoWriteCp) then
+            nn = nn + 1
+            solNames(nn) = cgnsCp
+        end if
+
+        if (isoWriteMach) then
+            nn = nn + 1
+            solNames(nn) = cgnsMach
+        end if
+
+        if (isoWriteRMach) then
+            nn = nn + 1
+            solNames(nn) = cgnsRelMach
+        end if
+
+        if (isoWriteMachTurb) then
+            nn = nn + 1
+            solNames(nn) = cgnsMachTurb
+        end if
+
+        if (isoWriteEddyVis) then
+            nn = nn + 1
+            solNames(nn) = cgnsEddy
+        end if
+
+        if (isoWriteRatioEddyVis) then
+            nn = nn + 1
+            solNames(nn) = cgnsEddyRatio
+        end if
+
+        if (isoWriteDist) then
+            nn = nn + 1
+            solNames(nn) = cgNSWallDist
+        end if
+
+        if (isoWriteVort) then
+            nn = nn + 1
+            solNames(nn) = cgnsVortMagn
+        end if
+
+        if (isoWriteVortx) then
+            nn = nn + 1
+            solNames(nn) = cgnsVortx
+        end if
+
+        if (isoWriteVorty) then
+            nn = nn + 1
+            solNames(nn) = cgnsVorty
+        end if
+
+        if (isoWriteVortz) then
+            nn = nn + 1
+            solNames(nn) = cgnsVortz
+        end if
+
+        if (isoWritePtotloss) then
+            nn = nn + 1
+            solNames(nn) = cgnsPtotloss
+        end if
+
+        if (isoWriteResRho) then
+            nn = nn + 1
+            solNames(nn) = cgnsResRho
+        end if
+
+        if (isoWriteResMom) then
+            nn = nn + 1
+            solNames(nn) = cgnsResMomx
+
+            nn = nn + 1
+            solNames(nn) = cgnsResMomy
+
+            nn = nn + 1
+            solNames(nn) = cgnsResMomz
+        end if
+
+        if (isoWriteResRhoe) then
+            nn = nn + 1
+            solNames(nn) = cgnsResRhoe
+        end if
+
+        if (isoWriteResTurb) then
+
+            select case (turbModel)
+
+            case (spalartAllmaras, spalartAllmarasEdwards)
+                nn = nn + 1
+                solNames(nn) = cgnsResNu
 
-    case (timeSpectral)
+            case (komegaWilcox, komegaModified, menterSST)
+                nn = nn + 1
+                solNames(nn) = cgnsResK
 
-       ! Time spectral mode. This is not a predefined mode in CGNS
-       ! and therefore use userDefined.
+                nn = nn + 1
+                solNames(nn) = cgnsResOmega
 
-       call cg_simulation_type_write_f(cgnsInd, base, &
-            UserDefined, ierr)
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_simulation_type_write_f")
+            case (ktau)
+                nn = nn + 1
+                solNames(nn) = cgnsResK
 
-       ! Write some info to the string message.
+                nn = nn + 1
+                solNames(nn) = cgnsResTau
 
-       write(integerString,"(i7)") nTimeIntervalsSpectral
-       integerString = adjustl(integerString)
+            case (v2f)
+                nn = nn + 1
+                solNames(nn) = cgnsResK
 
-       write(message, strings) "Time spectral mode for periodic problems; ", &
-         trim(integerString), " spectral solutions have been used to model the problem."
+                nn = nn + 1
+                solNames(nn) = cgnsResEpsilon
 
-       ! And write the info.
+                nn = nn + 1
+                solNames(nn) = cgnsResV2
 
-       call cg_descriptor_write_f("PeriodicInfo", message, ierr)
-       if(ierr /= CG_OK)                  &
+                nn = nn + 1
+                solNames(nn) = cgnsResF
+
+            end select
+
+        end if
+
+        if (isoWriteShock) then
+            nn = nn + 1
+            solNames(nn) = cgnsShock
+        end if
+
+        if (isoWriteFilteredShock) then
+            nn = nn + 1
+            solNames(nn) = cgnsFilteredShock
+        end if
+
+        if (isoWriteBlank) then
+            nn = nn + 1
+            solNames(nn) = cgnsBlank
+        end if
+
+    end subroutine isoSurfNames
+
+    subroutine setHelpVariablesWriting
+        !
+        !       setHelpVariablesWriting determines the variables, which are
+        !       needed to write the CGNS files.
+        !
+        use block
+        use cgnsGrid
+        use communication
+        use monitor
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr, nSend
+        integer, dimension(nProc) :: recvCounts, displs
+
+        integer(kind=intType) :: i, nn
+
+        integer(kind=intType), dimension(cgnsNDom) :: tmp
+        integer(kind=intType), dimension(4, nDom)   :: buffer
+
+        ! Determine for each CGNS block how many (sub) blocks are stored
+        ! on this processor. Note that this info is the same for all
+        ! spectral solutions, so the 1st is fine.
+
+        allocate (nBlocksCGNSblock(0:cgnsNDom), blocksCGNSblock(nDom), &
+                  stat=ierr)
+        if (ierr /= 0) &
+             call terminate("setHelpVariablesWriting", &
+             "Memory allocation failure for &
+             &nBlocksCGNSblock and blocksCGNSblock.")
+
+        nBlocksCGNSblock = 0
+        do nn = 1, nDom
+            i = flowDoms(nn, 1, 1)%cgnsBlockID
+            nBlocksCGNSblock(i) = nBlocksCGNSblock(i) + 1
+        end do
+
+        ! Put nBlocksCGNSblock in cumulative storage format.
+        ! Store this accumulated value in tmp, which serves as
+        ! a counter later on.
+
+        do i = 1, cgnsNDom
+            tmp(i) = nBlocksCGNSblock(i - 1)
+            nBlocksCGNSblock(i) = nBlocksCGNSblock(i) + tmp(i)
+        end do
+
+        ! Determine the values for blocksCGNSblock.
+
+        do nn = 1, nDom
+            i = flowDoms(nn, 1, 1)%cgnsBlockID
+            tmp(i) = tmp(i) + 1
+            blocksCGNSblock(tmp(i)) = nn
+        end do
+
+    end subroutine setHelpVariablesWriting
+
+    subroutine releaseHelpVariablesWriting
+        !
+        !       releaseHelpVariablesWriting releases the memory of the
+        !       variables, which were needed to write the CGNS files.
+        !
+        use cgnsGrid
+        use monitor
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables
+        !
+        integer :: ierr
+
+        ! Release the memory of the allocatable arrays in outputMod.
+
+        deallocate (nBlocksCGNSblock, blocksCGNSblock, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("releaseHelpVariablesWriting", &
+             "Deallocation failure for nBlocksCGNSblock, &
+             &etc.")
+
+    end subroutine releaseHelpVariablesWriting
+    subroutine writeCGNSHeader(cgnsInd, base)
+        !
+        !       writeCGNSHeader writes a descriptive header to the given base
+        !       of the given CGNS file. Only processor 0 performs this task.
+        !
+        use constants
+        use cgnsGrid
+        use cgnsNames
+        use flowVarRefState
+        use su_cgns
+        use inputPhysics
+        use inputTimeSpectral
+        use monitor
+        use utils, only: terminate, setCGNSRealType
+        use commonFormats, only: strings
+
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, intent(in) :: cgnsInd, base
+        !
+        !      Local variables.
+        !
+        integer :: ierr, realTypeCGNS
+
+        real(kind=cgnsRealType) :: val
+
+        character(len=2048) :: message
+        character(len=7)    :: integerString
+        character(len=12)   :: realString
+
+        ! Set the cgns real type.
+
+        realTypeCGNS = setCGNSRealType()
+
+        ! Go to the correct position in the CGNS file.
+
+        call cg_goto_f(cgnsInd, base, ierr, "end")
+        if (ierr /= CG_OK) &
             call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_descriptor_write_f")
-    end select
+                           "Something wrong when calling cg_goto_f")
 
-    ! Go back to the given base in the cgns file.
+        ! Create a data class type node to indicate that nonDimensional
+        ! solution data is written for which the reference state
+        ! is known.
 
-    call cg_goto_f(cgnsInd, base, ierr, "end")
-    if(ierr /= CG_OK)                    &
-         call terminate("writeCGNSHeader", &
-         "Something wrong when calling cg_goto_f")
+        call cg_dataclass_write_f(NormalizedByDimensional, ierr)
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSHeader", &
+             "Something wrong when calling &
+             &cg_dataclass_write_f")
 
-    ! Create a flow equation set.
+        ! Write the info about the solver used.
 
-    call cg_equationset_write_f(cgnsPhysDim, ierr)
-    if(ierr /= CG_OK)                    &
-         call terminate("writeCGNSHeader", &
-         "Something wrong when calling &
-         &cg_equationset_write_f")
+        call cg_descriptor_write_f("SolverInfo", &
+                                   "ADflow multiblock code", ierr)
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSHeader", &
+             "Something wrong when calling &
+             &cg_descriptor_write_f")
 
-    ! Write the rest of the physical model under the flow
-    ! equation set just created.
+        ! Write the info about the scheme used; message is used as
+        ! storage for the string containing the scheme description.
 
-    call cg_goto_f(cgnsInd, base, ierr, &
-         "FlowEquationSet_t", 1, "end")
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSHeader", &
-         "Something wrong when calling cg_goto_f")
+        call describeScheme(message)
+        call cg_descriptor_write_f("DiscretizationScheme", message, ierr)
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSHeader", &
+             "Something wrong when calling &
+             &cg_descriptor_write_f")
 
-    ! Write the governing equations solved.
+        ! Write the similation type to the CGNS file.
 
-    select case (equations)
-    case (EulerEquations)
-       call cg_governing_write_f(Euler, ierr)
+        select case (equationMode)
 
-    case (NSEquations)
-       call cg_governing_write_f(nsLaminar, ierr)
+        case (steady)
 
-    case (RANSEquations)
-       call cg_governing_write_f(nsTurbulent, ierr)
-    end select
+            ! Steady mode. Just write this info.
 
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSHeader", &
-         "Something wrong when calling &
-         &cg_governing_write_f")
+            call cg_simulation_type_write_f(cgnsInd, base, &
+                                            nonTimeaccurate, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_simulation_type_write_f")
 
-    ! Write the information about the gas model used.
-    ! Determine the cp model used in the computation.
+            !===============================================================
 
-    select case (cpModel)
+        case (unsteady)
 
-    case (cpConstant)
+            ! Unsteady mode. First write the simulation type.
 
-       ! Constant cp and thus constant gamma.
+            call cg_simulation_type_write_f(cgnsInd, base, &
+                                            timeaccurate, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_simulation_type_write_f")
 
-       call cg_model_write_f("GasModel_t", Ideal, ierr)
-       if(ierr /= CG_OK)                &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_model_write_f")
+            ! Write some additional stuff, like time step and
+            ! physical time. First store it in the big string message.
 
-       ! Write the actual value of gamma; this must be done under
-       ! gas model type, which explains the goto statement.
+            write (integerString, "(i7)") timeStepUnsteady + &
+                nTimeStepsRestart
+            write (realString, "(es12.5)") timeUnsteady + &
+                timeUnsteadyRestart
 
-       call cg_goto_f(cgnsInd, base, ierr, "FlowEquationSet_t", &
-            1, "GasModel_t", 1, "end")
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling cg_goto_f")
+            integerString = adjustl(integerString)
+            realString = adjustl(realString)
 
-       val = gammaConstant
-       call cg_array_write_f(cgnsHeatRatio, realTypeCGNS, &
-            1, int(1, cgsize_t), val, ierr)
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_array_write_f")
+            write (message, strings) "Unsteady time step ", trim(integerString), ", physical time ", &
+                trim(realString), " seconds"
 
-       ! And create a data class under SpecificHeatRatio to tell that
-       ! this is a nonDimensional parameter.
+            ! And write the info.
 
-       call cg_goto_f(cgnsInd, base, ierr,       &
-            "FlowEquationSet_t", 1,    &
-            "GasModel_t", 1, "DataArray_t", 1,"end")
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling cg_goto_f")
+            call cg_descriptor_write_f("UnsteadyInfo", message, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_descriptor_write_f")
 
-       call cg_dataclass_write_f(NonDimensionalParameter,ierr)
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_dataclass_write_f")
+            !===============================================================
 
-       !===============================================================
+        case (timeSpectral)
 
-    case (cpTempCurveFits)
+            ! Time spectral mode. This is not a predefined mode in CGNS
+            ! and therefore use userDefined.
 
-       ! Cp as function of the temperature is given via curve fits.
+            call cg_simulation_type_write_f(cgnsInd, base, &
+                                            UserDefined, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_simulation_type_write_f")
 
-       call cg_model_write_f("GasModel_t", ThermallyPerfect, ierr)
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_model_write_f")
-
-    end select
+            ! Write some info to the string message.
 
-    ! The rest of physical model description is only
-    ! for viscous flows.
+            write (integerString, "(i7)") nTimeIntervalsSpectral
+            integerString = adjustl(integerString)
 
-    viscousTest: if( viscous ) then
+            write (message, strings) "Time spectral mode for periodic problems; ", &
+                trim(integerString), " spectral solutions have been used to model the problem."
 
-       ! Write the info of the viscosity model. Under the flow
-       ! equation set.
-
-       call cg_goto_f(cgnsInd, base, ierr, &
-            "FlowEquationSet_t", 1, "end")
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling cg_goto_f")
+            ! And write the info.
 
-       call cg_model_write_f("ViscosityModel_t", sutherlandlaw, ierr)
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_model_write_f")
+            call cg_descriptor_write_f("PeriodicInfo", message, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_descriptor_write_f")
+        end select
 
-       ! Write the info about the thermal conductivity, i.e.
-       ! Constant Prandtl number. Write the used value as well.
+        ! Go back to the given base in the cgns file.
 
-       call cg_model_write_f("ThermalConductivityModel_t", &
-            constantPrandtl, ierr)
-       if(ierr /= CG_OK)                  &
+        call cg_goto_f(cgnsInd, base, ierr, "end")
+        if (ierr /= CG_OK) &
             call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_model_write_f")
+                           "Something wrong when calling cg_goto_f")
 
-       call cg_goto_f(cgnsInd, base, ierr, "FlowEquationSet_t", 1,&
-            "ThermalConductivityModel_t", 1, "end")
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling cg_goto_f")
+        ! Create a flow equation set.
 
-       val = prandtl
-       call cg_array_write_f(cgnsPrandtl, realTypeCGNS, 1, int(1, cgsize_t), &
-            val, ierr)
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_array_write_f")
+        call cg_equationset_write_f(cgnsPhysDim, ierr)
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSHeader", &
+             "Something wrong when calling &
+             &cg_equationset_write_f")
 
-       ! And create a data class under Prandtl number to tell that
-       ! this is a nonDimensional parameter.
+        ! Write the rest of the physical model under the flow
+        ! equation set just created.
 
-       call cg_goto_f(cgnsInd, base, ierr, "FlowEquationSet_t", &
-            1, "ThermalConductivityModel_t", 1,       &
-            "DataArray_t", 1,"end")
-       if(ierr /= CG_OK)                  &
+        call cg_goto_f(cgnsInd, base, ierr, &
+                       "FlowEquationSet_t", 1, "end")
+        if (ierr /= CG_OK) &
             call terminate("writeCGNSHeader", &
-            "Something wrong when calling cg_goto_f")
-
-       call cg_dataclass_write_f(NonDimensionalParameter,ierr)
-       if(ierr /= CG_OK)                  &
-            call terminate("writeCGNSHeader", &
-            "Something wrong when calling &
-            &cg_dataclass_write_f")
-
-       ! The rest of the physical model description is only for the
-       ! RANS equations.
-
-       turbulentTest: if(equations == RANSEquations) then
-
-          select case(turbModel)
-
-          case (spalartAllmaras)
-             call writeCGNSSaInfo(cgnsInd, base)
-
-          case (spalartAllmarasEdwards)
-             call writeCGNSSaeInfo(cgnsInd, base)
-
-          case (komegaWilcox)
-             call writeCGNSKomegaWilcoxInfo(cgnsInd, base)
-
-          case (komegaModified)
-             call writeCGNSKomegaModifiedInfo(cgnsInd, base)
-
-          case (ktau)
-             call writeCGNSKtauInfo(cgnsInd, base)
-
-          case (menterSST)
-             call writeCGNSMenterSSTInfo(cgnsInd, base)
-
-          case (v2f)
-             call writeCGNSV2fInfo(cgnsInd, base)
-          end select
-
-       endif turbulentTest
-
-    endif viscousTest
-
-    ! Write the reference state.
-
-    call writeCGNSReferenceState(cgnsInd, base)
-  end subroutine writeCGNSHeader
-
-  subroutine writeCGNSKomegaModifiedInfo(cgnsInd, cgnsBase)
-    !
-    !       writeCGNSKomegaModifiedInfo writes information about the
-    !       modified k-omega turbulence model to the cgns file.
-    !
-    use inputPhysics
-    use cgnsNames
-    use su_cgns
-    use utils, only : terminate, setCGNSRealType
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsInd, cgnsBase
-    !
-    !      Local variables.
-    !
-    integer :: realTypeCGNS, ierr
-
-    real(kind=cgnsRealType) :: val
-
-    ! Set the cgns real type.
-
-    realTypeCGNS = setCGNSRealType()
-
-    ! Write the info of the turbulence model under the flow equation
-    ! set. So move to this location first.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-         "FlowEquationSet_t", 1, "end")
-    if(ierr /= CG_OK)                              &
-         call terminate("writeCGNSKomegaModifiedInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write that the k-omega model is used.
-
-    call cg_model_write_f("TurbulenceModel_t", &
-         TwoEquation_Wilcox, ierr)
-    if(ierr /= CG_OK)                              &
-         call terminate("writeCGNSKomegaModifiedInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the turbulent closure type.
-
-    call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
-    if(ierr /= CG_OK)                              &
-         call terminate("writeCGNSKomegaModifiedInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the details of the turbulence model under the turbulent
-    ! closure type.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
-         "TurbulenceClosure_t", 1, "end")
-    if(ierr /= CG_OK)                              &
-         call terminate("writeCGNSKomegaModifiedInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write the value of the turbulent prandtl number.
-
-    val = prandtlTurb
-    call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
-         1, int(1, cgsize_t), val, ierr)
-    if(ierr /= CG_OK)                              &
-         call terminate("writeCGNSKomegaModifiedInfo", &
-         "Something wrong when calling cg_array_write_f")
-
-    ! Indicate that this is a nonDimensional parameter.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "DataArray_t", 1,"end")
-    if(ierr /= CG_OK)                                  &
-         call terminate("writeCGNSKomegaModifiedInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    call cg_dataclass_write_f(NonDimensionalParameter,ierr)
-    if(ierr /= CG_OK)                              &
-         call terminate("writeCGNSKomegaModifiedInfo", &
-         "Something wrong when calling &
-         &cg_dataclass_write_f")
-  end subroutine writeCGNSKomegaModifiedInfo
-
-  subroutine writeCGNSKomegaWilcoxInfo(cgnsInd, cgnsBase)
-    !
-    !       writeCGNSKomegaWilcoxInfo writes information about the
-    !       standard Wilcox k-omega turbulence model to the cgns file.
-    !
-    use inputPhysics
-    use cgnsNames
-    use su_cgns
-    use utils, only : terminate, setCGNSRealType
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsInd, cgnsBase
-    !
-    !      Local variables.
-    !
-    integer :: realTypeCGNS, ierr
-
-    real(kind=cgnsRealType) :: val
-
-    ! Set the cgns real type.
-
-    realTypeCGNS = setCGNSRealType()
-
-    ! Write the info of the turbulence model under the flow equation
-    ! set. So move to this location first.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-         "FlowEquationSet_t", 1, "end")
-    if(ierr /= CG_OK)                            &
-         call terminate("writeCGNSKomegaWilcoxInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write that the k-omega model is used.
-
-    call cg_model_write_f("TurbulenceModel_t", &
-         TwoEquation_Wilcox, ierr)
-    if(ierr /= CG_OK)                            &
-         call terminate("writeCGNSKomegaWilcoxInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the turbulent closure type.
-
-    call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
-    if(ierr /= CG_OK)                            &
-         call terminate("writeCGNSKomegaWilcoxInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the details of the turbulence model under the turbulent
-    ! closure type.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "end")
-    if(ierr /= CG_OK)                            &
-         call terminate("writeCGNSKomegaWilcoxInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write the value of the turbulent prandtl number.
-
-    val = prandtlTurb
-    call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
-         1, int(1, cgsize_t), val, ierr)
-    if(ierr /= CG_OK)                            &
-         call terminate("writeCGNSKomegaWilcoxInfo", &
-         "Something wrong when calling cg_array_write_f")
-
-    ! Indicate that this is a nonDimensional parameter.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "DataArray_t", 1,"end")
-    if(ierr /= CG_OK)                            &
-         call terminate("writeCGNSKomegaWilcoxInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    call cg_dataclass_write_f(NonDimensionalParameter,IERR)
-    if(ierr /= CG_OK)                            &
-         call terminate("writeCGNSKomegaWilcoxInfo", &
-         "Something wrong when calling &
-         &cg_dataclass_write_f")
-  end subroutine writeCGNSKomegaWilcoxInfo
-
-  subroutine writeCGNSKtauInfo(cgnsInd, cgnsBase)
-    !
-    !       WriteCGNSKtauInfo writes information about the k-tau
-    !       turbulence model to the cgns file.
-    !
-    use inputPhysics
-    use cgnsNames
-    use su_cgns
-    use utils, only : terminate, setCGNSRealType
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsInd, cgnsBase
-    !
-    !      Local variables.
-    !
-    integer :: realTypeCGNS, ierr
-
-    real(kind=cgnsRealType) :: val
-
-    ! Set the cgns real type.
-
-    realTypeCGNS = setCGNSRealType()
-
-    ! Write the info of the turbulence model under the flow equation
-    ! set. So move to this location first.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-         "FlowEquationSet_t", 1, "end")
-    if(ierr /= CG_OK)                    &
-         call terminate("writeCGNSKtauInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write that user defined model is used; k-tau is not
-    ! supported by cgns.
-
-    call cg_model_write_f("TurbulenceModel_t", &
-         UserDefined, ierr)
-    if(ierr /= CG_OK)                    &
-         call terminate("writeCGNSKtauInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the turbulent closure type.
+                           "Something wrong when calling cg_goto_f")
 
-    call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
-    if(ierr /= CG_OK)                    &
-         call terminate("writeCGNSKtauInfo", &
-         "Something wrong when calling cg_model_write_f")
+        ! Write the governing equations solved.
 
-    ! Write the details of the turbulence model under the turbulent
-    ! closure type.
+        select case (equations)
+        case (EulerEquations)
+            call cg_governing_write_f(Euler, ierr)
 
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "end")
-    if(ierr /= CG_OK)                    &
-         call terminate("writeCGNSKtauInfo", &
-         "Something wrong when calling cg_goto_f")
+        case (NSEquations)
+            call cg_governing_write_f(nsLaminar, ierr)
 
-    ! Write the value of the turbulent prandtl number.
+        case (RANSEquations)
+            call cg_governing_write_f(nsTurbulent, ierr)
+        end select
 
-    val = prandtlTurb
-    call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
-         1, int(1, cgsize_t), val, ierr)
-    if(ierr /= CG_OK)                    &
-         call terminate("writeCGNSKtauInfo", &
-         "Something wrong when calling cg_array_write_f")
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSHeader", &
+             "Something wrong when calling &
+             &cg_governing_write_f")
 
-    ! Indicate that this is a nonDimensional parameter.
+        ! Write the information about the gas model used.
+        ! Determine the cp model used in the computation.
 
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "DataArray_t", 1,"end")
-    if(ierr /= CG_OK)                    &
-         call terminate("writeCGNSKtauInfo", &
-         "Something wrong when calling cg_goto_f")
+        select case (cpModel)
+
+        case (cpConstant)
 
-    call cg_dataclass_write_f(NonDimensionalParameter,ierr)
-    if(ierr /= CG_OK)                    &
-         call terminate("writeCGNSKtauInfo", &
-         "Something wrong when calling &
-         &cg_dataclass_write_f")
-  end subroutine writeCGNSKtauInfo
+            ! Constant cp and thus constant gamma.
 
-  subroutine writeCGNSMenterSSTInfo(cgnsInd, cgnsBase)
-    !
-    !       WriteCGNSMenterSSTInfo writes information about menter's
-    !       SST turbulence model to the cgns file.
-    !
-    use inputPhysics
-    use cgnsNames
-    use su_cgns
-    use utils, only : terminate, setCGNSRealType
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsInd, cgnsBase
-    !
-    !      Local variables.
-    !
-    integer :: realTypeCGNS, ierr
-
-    real(kind=cgnsRealType) :: val
-
-    ! Set the cgns real type.
-    ! Note that this info is only written to the 1st base.
-
-    realTypeCGNS = setCGNSRealType()
-
-    ! Write the info of the turbulence model under the flow equation
-    ! set. So move to this location first.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-         "FlowEquationSet_t", 1, "end")
-    if(ierr /= CG_OK)                         &
-         call terminate("writeCGNSMenterSSTInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write that the SST variant of the kOmega model is used.
-
-    call cg_model_write_f("TurbulenceModel_t", &
-         TwoEquation_MenterSST, ierr)
-    if(ierr /= CG_OK)                         &
-         call terminate("writeCGNSMenterSSTInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the turbulent closure type.
+            call cg_model_write_f("GasModel_t", Ideal, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_model_write_f")
+
+            ! Write the actual value of gamma; this must be done under
+            ! gas model type, which explains the goto statement.
+
+            call cg_goto_f(cgnsInd, base, ierr, "FlowEquationSet_t", &
+                           1, "GasModel_t", 1, "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeCGNSHeader", &
+                               "Something wrong when calling cg_goto_f")
+
+            val = gammaConstant
+            call cg_array_write_f(cgnsHeatRatio, realTypeCGNS, &
+                                  1, int(1, cgsize_t), val, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_array_write_f")
 
-    call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
-    if(ierr /= CG_OK)                         &
-         call terminate("writeCGNSMenterSSTInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the details of the turbulence model under the turbulent
-    ! closure type.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "end")
-    if(ierr /= CG_OK)                         &
-         call terminate("writeCGNSMenterSSTInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write the value of the turbulent prandtl number.
-
-    val = prandtlTurb
-    call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
-         1, int(1, cgsize_t), val, ierr)
-    if(ierr /= CG_OK)                         &
-         call terminate("writeCGNSMenterSSTInfo", &
-         "Something wrong when calling cg_array_write_f")
-
-    ! Indicate that this is a nonDimensional parameter.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "DataArray_t", 1,"end")
-    if(ierr /= CG_OK)                         &
-         call terminate("writeCGNSMenterSSTInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    call cg_dataclass_write_f(NonDimensionalParameter,ierr)
-    if(ierr /= CG_OK)                         &
-         call terminate("writeCGNSMenterSSTInfo", &
-         "Something wrong when calling &
-         &cg_dataclass_write_f")
-  end subroutine writeCGNSMenterSSTInfo
-
-  subroutine writeCGNSReferenceState(cgnsInd, cgnsBase)
-    !
-    !       writeCGNSReferenceState writes the reference state to the
-    !       cgns file. Enough info is specified such that a restart can be
-    !       performed by a different solver, which uses a different
-    !       nonDimensionalization.
-    !
-    use constants
-    use cgnsNames
-    use su_cgns
-    use inputPhysics
-    use flowVarRefState
-    use utils, only : terminate, setCGNSRealType
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsInd, cgnsBase
-    !
-    !      Local variables.
-    !
-    integer :: ierr, realTypeCGNS, ii
+            ! And create a data class under SpecificHeatRatio to tell that
+            ! this is a nonDimensional parameter.
 
-    integer(kind=intType) :: i
+            call cg_goto_f(cgnsInd, base, ierr, &
+                           "FlowEquationSet_t", 1, &
+                           "GasModel_t", 1, "DataArray_t", 1, "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeCGNSHeader", &
+                               "Something wrong when calling cg_goto_f")
 
-    real(kind=cgnsRealType) :: val
+            call cg_dataclass_write_f(NonDimensionalParameter, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_dataclass_write_f")
 
-    ! Set the cgns real type.
+            !===============================================================
 
-    realTypeCGNS = setCGNSRealType()
+        case (cpTempCurveFits)
 
-    ! Go to the base.
+            ! Cp as function of the temperature is given via curve fits.
 
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "end")
-    if(ierr /= CG_OK)                      &
-         call terminate("writeReferenceState", &
-         "Something wrong when calling cg_goto_f")
+            call cg_model_write_f("GasModel_t", ThermallyPerfect, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_model_write_f")
 
-    ! Create the reference state node with a nice description.
+        end select
 
-    call cg_state_write_f("Reference state variables for &
-         &nonDimensional data. Variables are &
-         &nonDimensionalized using the reference &
-         &density, pressure and temperature.", ierr)
-    if(ierr /= CG_OK)                      &
-         call terminate("writeReferenceState", &
-         "Something wrong when calling cg_state_write_f")
+        ! The rest of physical model description is only
+        ! for viscous flows.
 
-    ! The actual data should be written below the reference state
-    ! node. So go there first.
+        viscousTest: if (viscous) then
 
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-         "ReferenceState_t", 1, "end")
-    if(ierr /= CG_OK)                      &
-         call terminate("writeReferenceState", &
-         "Something wrong when calling cg_goto_f")
+            ! Write the info of the viscosity model. Under the flow
+            ! equation set.
 
-    ! Write the Mach number and indicate that it is a nonDimensional
-    ! parameter
+            call cg_goto_f(cgnsInd, base, ierr, &
+                           "FlowEquationSet_t", 1, "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeCGNSHeader", &
+                               "Something wrong when calling cg_goto_f")
+
+            call cg_model_write_f("ViscosityModel_t", sutherlandlaw, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_model_write_f")
 
-    val = Mach
-    call cg_array_write_f(cgnsMach, realTypeCGNS, 1, int(1, cgsize_t), val, ierr)
-    if(ierr /= CG_OK)                      &
-         call terminate("writeReferenceState", &
-         "Something wrong when calling cg_array_write_f")
+            ! Write the info about the thermal conductivity, i.e.
+            ! Constant Prandtl number. Write the used value as well.
 
-    ii = 1
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "ReferenceState_t", 1, &
-         "DataArray_t", ii, "end")
-    if(ierr /= CG_OK)                      &
-         call terminate("writeReferenceState", &
-         "Something wrong when calling cg_goto_f")
+            call cg_model_write_f("ThermalConductivityModel_t", &
+                                  constantPrandtl, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_model_write_f")
+
+            call cg_goto_f(cgnsInd, base, ierr, "FlowEquationSet_t", 1, &
+                           "ThermalConductivityModel_t", 1, "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeCGNSHeader", &
+                               "Something wrong when calling cg_goto_f")
 
-    call cg_dataclass_write_f(NonDimensionalParameter, ierr)
-    if(ierr /= CG_OK)                      &
-         call terminate("writeReferenceState", &
-         "Something wrong when calling &
-         &cg_dataclass_write_f")
+            val = prandtl
+            call cg_array_write_f(cgnsPrandtl, realTypeCGNS, 1, int(1, cgsize_t), &
+                                  val, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_array_write_f")
+
+            ! And create a data class under Prandtl number to tell that
+            ! this is a nonDimensional parameter.
+
+            call cg_goto_f(cgnsInd, base, ierr, "FlowEquationSet_t", &
+                           1, "ThermalConductivityModel_t", 1, &
+                           "DataArray_t", 1, "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeCGNSHeader", &
+                               "Something wrong when calling cg_goto_f")
+
+            call cg_dataclass_write_f(NonDimensionalParameter, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSHeader", &
+                 "Something wrong when calling &
+                 &cg_dataclass_write_f")
+
+            ! The rest of the physical model description is only for the
+            ! RANS equations.
+
+            turbulentTest: if (equations == RANSEquations) then
+
+                select case (turbModel)
 
-    ! Write the 3 flow angles. The units are degrees.
+                case (spalartAllmaras)
+                    call writeCGNSSaInfo(cgnsInd, base)
 
-    velocityDir: do i=1,3
+                case (spalartAllmarasEdwards)
+                    call writeCGNSSaeInfo(cgnsInd, base)
 
-       ! Go to the reference state node.
-
-       call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-            "ReferenceState_t", 1, "end")
-       if(ierr /= CG_OK)                      &
+                case (komegaWilcox)
+                    call writeCGNSKomegaWilcoxInfo(cgnsInd, base)
+
+                case (komegaModified)
+                    call writeCGNSKomegaModifiedInfo(cgnsInd, base)
+
+                case (ktau)
+                    call writeCGNSKtauInfo(cgnsInd, base)
+
+                case (menterSST)
+                    call writeCGNSMenterSSTInfo(cgnsInd, base)
+
+                case (v2f)
+                    call writeCGNSV2fInfo(cgnsInd, base)
+                end select
+
+            end if turbulentTest
+
+        end if viscousTest
+
+        ! Write the reference state.
+
+        call writeCGNSReferenceState(cgnsInd, base)
+    end subroutine writeCGNSHeader
+
+    subroutine writeCGNSKomegaModifiedInfo(cgnsInd, cgnsBase)
+        !
+        !       writeCGNSKomegaModifiedInfo writes information about the
+        !       modified k-omega turbulence model to the cgns file.
+        !
+        use inputPhysics
+        use cgnsNames
+        use su_cgns
+        use utils, only: terminate, setCGNSRealType
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase
+        !
+        !      Local variables.
+        !
+        integer :: realTypeCGNS, ierr
+
+        real(kind=cgnsRealType) :: val
+
+        ! Set the cgns real type.
+
+        realTypeCGNS = setCGNSRealType()
+
+        ! Write the info of the turbulence model under the flow equation
+        ! set. So move to this location first.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                       "FlowEquationSet_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaModifiedInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write that the k-omega model is used.
+
+        call cg_model_write_f("TurbulenceModel_t", &
+                              TwoEquation_Wilcox, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaModifiedInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the turbulent closure type.
+
+        call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaModifiedInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the details of the turbulence model under the turbulent
+        ! closure type.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaModifiedInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write the value of the turbulent prandtl number.
+
+        val = prandtlTurb
+        call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
+                              1, int(1, cgsize_t), val, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaModifiedInfo", &
+                           "Something wrong when calling cg_array_write_f")
+
+        ! Indicate that this is a nonDimensional parameter.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "DataArray_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaModifiedInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        call cg_dataclass_write_f(NonDimensionalParameter, ierr)
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSKomegaModifiedInfo", &
+             "Something wrong when calling &
+             &cg_dataclass_write_f")
+    end subroutine writeCGNSKomegaModifiedInfo
+
+    subroutine writeCGNSKomegaWilcoxInfo(cgnsInd, cgnsBase)
+        !
+        !       writeCGNSKomegaWilcoxInfo writes information about the
+        !       standard Wilcox k-omega turbulence model to the cgns file.
+        !
+        use inputPhysics
+        use cgnsNames
+        use su_cgns
+        use utils, only: terminate, setCGNSRealType
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase
+        !
+        !      Local variables.
+        !
+        integer :: realTypeCGNS, ierr
+
+        real(kind=cgnsRealType) :: val
+
+        ! Set the cgns real type.
+
+        realTypeCGNS = setCGNSRealType()
+
+        ! Write the info of the turbulence model under the flow equation
+        ! set. So move to this location first.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                       "FlowEquationSet_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaWilcoxInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write that the k-omega model is used.
+
+        call cg_model_write_f("TurbulenceModel_t", &
+                              TwoEquation_Wilcox, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaWilcoxInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the turbulent closure type.
+
+        call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaWilcoxInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the details of the turbulence model under the turbulent
+        ! closure type.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaWilcoxInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write the value of the turbulent prandtl number.
+
+        val = prandtlTurb
+        call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
+                              1, int(1, cgsize_t), val, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaWilcoxInfo", &
+                           "Something wrong when calling cg_array_write_f")
+
+        ! Indicate that this is a nonDimensional parameter.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "DataArray_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKomegaWilcoxInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        call cg_dataclass_write_f(NonDimensionalParameter, IERR)
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSKomegaWilcoxInfo", &
+             "Something wrong when calling &
+             &cg_dataclass_write_f")
+    end subroutine writeCGNSKomegaWilcoxInfo
+
+    subroutine writeCGNSKtauInfo(cgnsInd, cgnsBase)
+        !
+        !       WriteCGNSKtauInfo writes information about the k-tau
+        !       turbulence model to the cgns file.
+        !
+        use inputPhysics
+        use cgnsNames
+        use su_cgns
+        use utils, only: terminate, setCGNSRealType
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase
+        !
+        !      Local variables.
+        !
+        integer :: realTypeCGNS, ierr
+
+        real(kind=cgnsRealType) :: val
+
+        ! Set the cgns real type.
+
+        realTypeCGNS = setCGNSRealType()
+
+        ! Write the info of the turbulence model under the flow equation
+        ! set. So move to this location first.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                       "FlowEquationSet_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKtauInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write that user defined model is used; k-tau is not
+        ! supported by cgns.
+
+        call cg_model_write_f("TurbulenceModel_t", &
+                              UserDefined, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKtauInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the turbulent closure type.
+
+        call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKtauInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the details of the turbulence model under the turbulent
+        ! closure type.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKtauInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write the value of the turbulent prandtl number.
+
+        val = prandtlTurb
+        call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
+                              1, int(1, cgsize_t), val, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKtauInfo", &
+                           "Something wrong when calling cg_array_write_f")
+
+        ! Indicate that this is a nonDimensional parameter.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "DataArray_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSKtauInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        call cg_dataclass_write_f(NonDimensionalParameter, ierr)
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSKtauInfo", &
+             "Something wrong when calling &
+             &cg_dataclass_write_f")
+    end subroutine writeCGNSKtauInfo
+
+    subroutine writeCGNSMenterSSTInfo(cgnsInd, cgnsBase)
+        !
+        !       WriteCGNSMenterSSTInfo writes information about menter's
+        !       SST turbulence model to the cgns file.
+        !
+        use inputPhysics
+        use cgnsNames
+        use su_cgns
+        use utils, only: terminate, setCGNSRealType
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase
+        !
+        !      Local variables.
+        !
+        integer :: realTypeCGNS, ierr
+
+        real(kind=cgnsRealType) :: val
+
+        ! Set the cgns real type.
+        ! Note that this info is only written to the 1st base.
+
+        realTypeCGNS = setCGNSRealType()
+
+        ! Write the info of the turbulence model under the flow equation
+        ! set. So move to this location first.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                       "FlowEquationSet_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSMenterSSTInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write that the SST variant of the kOmega model is used.
+
+        call cg_model_write_f("TurbulenceModel_t", &
+                              TwoEquation_MenterSST, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSMenterSSTInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the turbulent closure type.
+
+        call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSMenterSSTInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the details of the turbulence model under the turbulent
+        ! closure type.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSMenterSSTInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write the value of the turbulent prandtl number.
+
+        val = prandtlTurb
+        call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
+                              1, int(1, cgsize_t), val, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSMenterSSTInfo", &
+                           "Something wrong when calling cg_array_write_f")
+
+        ! Indicate that this is a nonDimensional parameter.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "DataArray_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSMenterSSTInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        call cg_dataclass_write_f(NonDimensionalParameter, ierr)
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSMenterSSTInfo", &
+             "Something wrong when calling &
+             &cg_dataclass_write_f")
+    end subroutine writeCGNSMenterSSTInfo
+
+    subroutine writeCGNSReferenceState(cgnsInd, cgnsBase)
+        !
+        !       writeCGNSReferenceState writes the reference state to the
+        !       cgns file. Enough info is specified such that a restart can be
+        !       performed by a different solver, which uses a different
+        !       nonDimensionalization.
+        !
+        use constants
+        use cgnsNames
+        use su_cgns
+        use inputPhysics
+        use flowVarRefState
+        use utils, only: terminate, setCGNSRealType
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase
+        !
+        !      Local variables.
+        !
+        integer :: ierr, realTypeCGNS, ii
+
+        integer(kind=intType) :: i
+
+        real(kind=cgnsRealType) :: val
+
+        ! Set the cgns real type.
+
+        realTypeCGNS = setCGNSRealType()
+
+        ! Go to the base.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "end")
+        if (ierr /= CG_OK) &
             call terminate("writeReferenceState", &
-            "Something wrong when calling cg_goto_f")
-
-       ! Store component i of the direction in val.
-
-       val = velDirFreestream(i)
-
-       select case(i)
-       case (1_intType)
+                           "Something wrong when calling cg_goto_f")
 
-          call cg_array_write_f(cgnsVelVecX, realTypeCGNS, &
-               1, int(1, cgsize_t), val, ierr)
+        ! Create the reference state node with a nice description.
 
-       case (2_intType)
-          call cg_array_write_f(cgnsVelVecY, realTypeCGNS, &
-               1, int(1, cgsize_t), val, ierr)
-
-       case (3_intType)
-          call cg_array_write_f(cgnsVelVecZ, realTypeCGNS, &
-               1, int(1, cgsize_t), val, ierr)
-       end select
-
-       if(ierr /= CG_OK)                      &
+        call cg_state_write_f("Reference state variables for &
+             &nonDimensional data. Variables are &
+             &nonDimensionalized using the reference &
+             &density, pressure and temperature.", ierr)
+        if (ierr /= CG_OK) &
             call terminate("writeReferenceState", &
-            "Something wrong when calling &
-            &cg_array_write_f")
+                           "Something wrong when calling cg_state_write_f")
 
-       ! Write the info that the unit vector is nondimensional.
-
-       ii = ii + 1
-       call cg_goto_f(cgnsInd, cgnsBase, ierr, "ReferenceState_t", 1, &
-            "DataArray_t", ii, "end")
-       if(ierr /= CG_OK)                      &
-            call terminate("writeReferenceState", &
-            "Something wrong when calling cg_goto_f")
+        ! The actual data should be written below the reference state
+        ! node. So go there first.
 
-       call cg_dataclass_write_f(NonDimensionalParameter, ierr)
-       if(ierr /= CG_OK)                      &
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                       "ReferenceState_t", 1, "end")
+        if (ierr /= CG_OK) &
             call terminate("writeReferenceState", &
-            "Something wrong when calling &
-            &cg_dataclass_write_f")
+                           "Something wrong when calling cg_goto_f")
 
-    enddo velocityDir
+        ! Write the Mach number and indicate that it is a nonDimensional
+        ! parameter
 
-
-    ! Write some reference values of the density, pressure, temperature,
-    ! velocity and length.
-
-    refLoop: do i=1,5
-
-       ! Go to the reference state node.
-
-       call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-            "ReferenceState_t", 1, "end")
-       if(ierr /= CG_OK)                      &
+        val = Mach
+        call cg_array_write_f(cgnsMach, realTypeCGNS, 1, int(1, cgsize_t), val, ierr)
+        if (ierr /= CG_OK) &
             call terminate("writeReferenceState", &
-            "Something wrong when calling cg_goto_f")
-
-       ! Write a value, depending on i.
-
-       select case(i)
-       case (1_intType)
-          val = rhoref
-          call cg_array_write_f(cgnsDensity, realTypeCGNS, &
-               1, int(1, cgsize_t), val, ierr)
-       case (2_intType)
-          val = pref
-          call cg_array_write_f(cgnsPressure, realTypeCGNS, &
-               1, int(1, cgsize_t), val, ierr)
-
-       case (3_intType)
-          val = Tref
-          call cg_array_write_f(cgnsTemp, realTypeCGNS, &
-               1, int(1, cgsize_t), val, ierr)
-
-       case (4_intType)
-          val = sqrt(pref/rhoref)
-          call cg_array_write_f(cgnsVelocity, realTypeCGNS, &
-               1, int(1, cgsize_t), val, ierr)
-
-       case (5_intType)
-          val = one
-          call cg_array_write_f(cgnsLength, realTypeCGNS, &
-               1, int(1, cgsize_t), val, ierr)
-       end select
-
-       if(ierr /= CG_OK)                      &
-            call terminate("writeReferenceState", &
-            "Something wrong when calling &
-            &cg_array_write_f")
-
-       ! Write the info that the this reference value is dimensional
-       ! and based on si units.
+                           "Something wrong when calling cg_array_write_f")
 
-       ii = ii + 1
-       call cg_goto_f(cgnsInd, cgnsBase, ierr, "ReferenceState_t", 1, &
-            "DataArray_t", ii, "end")
-       if(ierr /= CG_OK)                      &
+        ii = 1
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "ReferenceState_t", 1, &
+                       "DataArray_t", ii, "end")
+        if (ierr /= CG_OK) &
             call terminate("writeReferenceState", &
-            "Something wrong when calling cg_goto_f")
-
-       call cg_dataclass_write_f(Dimensional, ierr)
-       if(ierr /= CG_OK)                      &
-            call terminate("writeReferenceState", &
-            "Something wrong when calling &
-            &cg_dataclass_write_f")
-
-       call cg_units_write_f(Kilogram, Meter, Second, Kelvin, &
-            Null, ierr)
-       if(ierr /= CG_OK)                      &
-            call terminate("writeReferenceState", &
-            "Something wrong when calling &
-            &cg_units_write_f")
-
-    enddo refLoop
-  end subroutine writeCGNSReferenceState
-
-  subroutine writeCGNSSaInfo(cgnsInd, cgnsBase)
-    !
-    !       WriteCGNSSaInfo writes information about the Spalart
-    !       Allmaras turbulence model to the cgns file.
-    !
-    use inputPhysics
-    use cgnsNames
-    use su_cgns
-    use utils, only : terminate, setCGNSRealType
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsInd, cgnsBase
-    !
-    !      Local variables.
-    !
-    integer :: realTypeCGNS, ierr
-
-    real(kind=cgnsRealType) :: val
-
-    ! Set the cgns real type.
-
-    realTypeCGNS = setCGNSRealType()
-
-    ! Write the info of the turbulence model under the flow equation
-    ! set. So move to this location first.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-         "FlowEquationSet_t", 1, "end")
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write that the spalart-allmaras model is used.
-
-    call cg_model_write_f("TurbulenceModel_t", &
-         OneEquation_SpalartAllmaras, ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the turbulent closure type.
-
-    call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the details of the turbulence model under the turbulent
-    ! closure type.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "end")
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write the value of the turbulent prandtl number.
-
-    val = prandtlTurb
-    call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
-         1, int(1, cgsize_t), val, ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_array_write_f")
-
-    ! Indicate that this is a nonDimensional parameter.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "DataArray_t", 1,"end")
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    call cg_dataclass_write_f(NonDimensionalParameter,ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling &
-         &cg_dataclass_write_f")
-  end subroutine writeCGNSSaInfo
-
-  subroutine writeCGNSSaeInfo(cgnsInd, cgnsBase)
-    !
-    !       WriteCGNSSaeInfo writes information about the Spalart
-    !       Allmaras turbulence model using the Edwards modification to
-    !       the cgns file.
-    !
-    use inputPhysics
-    use cgnsNames
-    use su_cgns
-    use utils, only : terminate, setCGNSRealType
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsInd, cgnsBase
-    !
-    !      Local variables.
-    !
-    integer :: realTypeCGNS, ierr
-
-    real(kind=cgnsRealType) :: val
-
-    ! Set the cgns real type.
-
-    realTypeCGNS = setCGNSRealType()
-
-    ! Write the info of the turbulence model under the flow equation
-    ! set. So move to this location first.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-         "FlowEquationSet_t", 1, "end")
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write that the spalart-allmaras model is used.
-
-    call cg_model_write_f("TurbulenceModel_t", &
-         OneEquation_SpalartAllmaras, ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the turbulent closure type.
-
-    call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the details of the turbulence model under the turbulent
-    ! closure type.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "end")
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write the value of the turbulent prandtl number.
-
-    val = prandtlTurb
-    call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
-         1, int(1, cgsize_t), val, ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_array_write_f")
-
-    ! Indicate that this is a nonDimensional parameter.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "DataArray_t", 1,"end")
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    call cg_dataclass_write_f(NonDimensionalParameter,ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("writeCGNSSaInfo", &
-         "Something wrong when calling &
-         &cg_dataclass_write_f")
-  end subroutine writeCGNSSaeInfo
-
-  subroutine writeCGNSV2fInfo(cgnsInd, cgnsBase)
-    !
-    !       WriteCGNSV2fInfo writes information about Durbin's v2f
-    !       turbulence model to the cgns file.
-    !
-    use inputPhysics
-    use cgnsNames
-    use su_cgns
-    use utils, only : terminate, setCGNSRealType
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsInd, cgnsBase
-    !
-    !      Local variables.
-    !
-    integer :: realTypeCGNS, ierr
-
-    real(kind=cgnsRealType) :: val
-
-    ! Set the cgns real type.
-
-    realTypeCGNS = setCGNSRealType()
-
-    ! Write the info of the turbulence model under the flow equation
-    ! set. So move to this location first.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-         "FlowEquationSet_t", 1, "end")
-    if(ierr /= CG_OK)                   &
-         call terminate("writeCGNSV2fInfo", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Write that user defined model is used; v2-f is not
-    ! supported by cgns.
-
-    call cg_model_write_f("TurbulenceModel_t", UserDefined, ierr)
-
-    if(ierr /= CG_OK)                   &
-         call terminate("writeCGNSV2fInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the turbulent closure type.
-
-    call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
-    if(ierr /= CG_OK)                   &
-         call terminate("writeCGNSV2fInfo", &
-         "Something wrong when calling cg_model_write_f")
-
-    ! Write the details of the turbulence model under the turbulent
-    ! closure type.
-
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "end")
-    if(ierr /= CG_OK)                   &
-         call terminate("writeCGNSV2fInfo", &
-         "Something wrong when calling cg_goto_f")
+                           "Something wrong when calling cg_goto_f")
 
-    ! Write the value of the turbulent prandtl number.
+        call cg_dataclass_write_f(NonDimensionalParameter, ierr)
+        if (ierr /= CG_OK) &
+             call terminate("writeReferenceState", &
+             "Something wrong when calling &
+             &cg_dataclass_write_f")
 
-    val = prandtlTurb
-    call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
-         1, int(1, cgsize_t), val, ierr)
-    if(ierr /= CG_OK)                   &
-         call terminate("writeCGNSV2fInfo", &
-         "Something wrong when calling cg_array_write_f")
+        ! Write the 3 flow angles. The units are degrees.
 
-    ! Indicate that this is a nonDimensional parameter.
+        velocityDir: do i = 1, 3
 
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1,&
-         "TurbulenceClosure_t", 1, "DataArray_t", 1,"end")
-    if(ierr /= CG_OK)                   &
-         call terminate("writeCGNSV2fInfo", &
-         "Something wrong when calling cg_goto_f")
+            ! Go to the reference state node.
 
-    call cg_dataclass_write_f(NonDimensionalParameter,ierr)
-    if(ierr /= CG_OK)                   &
-         call terminate("writeCGNSV2fInfo", &
-         "Something wrong when calling &
-         &cg_dataclass_write_f")
-  end subroutine writeCGNSV2fInfo
+            call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                           "ReferenceState_t", 1, "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeReferenceState", &
+                               "Something wrong when calling cg_goto_f")
+
+            ! Store component i of the direction in val.
+
+            val = velDirFreestream(i)
+
+            select case (i)
+            case (1_intType)
+
+                call cg_array_write_f(cgnsVelVecX, realTypeCGNS, &
+                                      1, int(1, cgsize_t), val, ierr)
+
+            case (2_intType)
+                call cg_array_write_f(cgnsVelVecY, realTypeCGNS, &
+                                      1, int(1, cgsize_t), val, ierr)
+
+            case (3_intType)
+                call cg_array_write_f(cgnsVelVecZ, realTypeCGNS, &
+                                      1, int(1, cgsize_t), val, ierr)
+            end select
+
+            if (ierr /= CG_OK) &
+                 call terminate("writeReferenceState", &
+                 "Something wrong when calling &
+                 &cg_array_write_f")
+
+            ! Write the info that the unit vector is nondimensional.
+
+            ii = ii + 1
+            call cg_goto_f(cgnsInd, cgnsBase, ierr, "ReferenceState_t", 1, &
+                           "DataArray_t", ii, "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeReferenceState", &
+                               "Something wrong when calling cg_goto_f")
+
+            call cg_dataclass_write_f(NonDimensionalParameter, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeReferenceState", &
+                 "Something wrong when calling &
+                 &cg_dataclass_write_f")
+
+        end do velocityDir
+
+        ! Write some reference values of the density, pressure, temperature,
+        ! velocity and length.
+
+        refLoop: do i = 1, 5
+
+            ! Go to the reference state node.
+
+            call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                           "ReferenceState_t", 1, "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeReferenceState", &
+                               "Something wrong when calling cg_goto_f")
+
+            ! Write a value, depending on i.
+
+            select case (i)
+            case (1_intType)
+                val = rhoref
+                call cg_array_write_f(cgnsDensity, realTypeCGNS, &
+                                      1, int(1, cgsize_t), val, ierr)
+            case (2_intType)
+                val = pref
+                call cg_array_write_f(cgnsPressure, realTypeCGNS, &
+                                      1, int(1, cgsize_t), val, ierr)
+
+            case (3_intType)
+                val = Tref
+                call cg_array_write_f(cgnsTemp, realTypeCGNS, &
+                                      1, int(1, cgsize_t), val, ierr)
+
+            case (4_intType)
+                val = sqrt(pref/rhoref)
+                call cg_array_write_f(cgnsVelocity, realTypeCGNS, &
+                                      1, int(1, cgsize_t), val, ierr)
+
+            case (5_intType)
+                val = one
+                call cg_array_write_f(cgnsLength, realTypeCGNS, &
+                                      1, int(1, cgsize_t), val, ierr)
+            end select
+
+            if (ierr /= CG_OK) &
+                 call terminate("writeReferenceState", &
+                 "Something wrong when calling &
+                 &cg_array_write_f")
+
+            ! Write the info that the this reference value is dimensional
+            ! and based on si units.
+
+            ii = ii + 1
+            call cg_goto_f(cgnsInd, cgnsBase, ierr, "ReferenceState_t", 1, &
+                           "DataArray_t", ii, "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeReferenceState", &
+                               "Something wrong when calling cg_goto_f")
+
+            call cg_dataclass_write_f(Dimensional, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeReferenceState", &
+                 "Something wrong when calling &
+                 &cg_dataclass_write_f")
+
+            call cg_units_write_f(Kilogram, Meter, Second, Kelvin, &
+                                  Null, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeReferenceState", &
+                 "Something wrong when calling &
+                 &cg_units_write_f")
+
+        end do refLoop
+    end subroutine writeCGNSReferenceState
+
+    subroutine writeCGNSSaInfo(cgnsInd, cgnsBase)
+        !
+        !       WriteCGNSSaInfo writes information about the Spalart
+        !       Allmaras turbulence model to the cgns file.
+        !
+        use inputPhysics
+        use cgnsNames
+        use su_cgns
+        use utils, only: terminate, setCGNSRealType
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase
+        !
+        !      Local variables.
+        !
+        integer :: realTypeCGNS, ierr
+
+        real(kind=cgnsRealType) :: val
+
+        ! Set the cgns real type.
+
+        realTypeCGNS = setCGNSRealType()
+
+        ! Write the info of the turbulence model under the flow equation
+        ! set. So move to this location first.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                       "FlowEquationSet_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write that the spalart-allmaras model is used.
+
+        call cg_model_write_f("TurbulenceModel_t", &
+                              OneEquation_SpalartAllmaras, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the turbulent closure type.
+
+        call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the details of the turbulence model under the turbulent
+        ! closure type.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write the value of the turbulent prandtl number.
+
+        val = prandtlTurb
+        call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
+                              1, int(1, cgsize_t), val, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_array_write_f")
+
+        ! Indicate that this is a nonDimensional parameter.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "DataArray_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        call cg_dataclass_write_f(NonDimensionalParameter, ierr)
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSSaInfo", &
+             "Something wrong when calling &
+             &cg_dataclass_write_f")
+    end subroutine writeCGNSSaInfo
+
+    subroutine writeCGNSSaeInfo(cgnsInd, cgnsBase)
+        !
+        !       WriteCGNSSaeInfo writes information about the Spalart
+        !       Allmaras turbulence model using the Edwards modification to
+        !       the cgns file.
+        !
+        use inputPhysics
+        use cgnsNames
+        use su_cgns
+        use utils, only: terminate, setCGNSRealType
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase
+        !
+        !      Local variables.
+        !
+        integer :: realTypeCGNS, ierr
+
+        real(kind=cgnsRealType) :: val
+
+        ! Set the cgns real type.
+
+        realTypeCGNS = setCGNSRealType()
+
+        ! Write the info of the turbulence model under the flow equation
+        ! set. So move to this location first.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                       "FlowEquationSet_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write that the spalart-allmaras model is used.
+
+        call cg_model_write_f("TurbulenceModel_t", &
+                              OneEquation_SpalartAllmaras, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the turbulent closure type.
+
+        call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the details of the turbulence model under the turbulent
+        ! closure type.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write the value of the turbulent prandtl number.
+
+        val = prandtlTurb
+        call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
+                              1, int(1, cgsize_t), val, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_array_write_f")
+
+        ! Indicate that this is a nonDimensional parameter.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "DataArray_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSSaInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        call cg_dataclass_write_f(NonDimensionalParameter, ierr)
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSSaInfo", &
+             "Something wrong when calling &
+             &cg_dataclass_write_f")
+    end subroutine writeCGNSSaeInfo
+
+    subroutine writeCGNSV2fInfo(cgnsInd, cgnsBase)
+        !
+        !       WriteCGNSV2fInfo writes information about Durbin's v2f
+        !       turbulence model to the cgns file.
+        !
+        use inputPhysics
+        use cgnsNames
+        use su_cgns
+        use utils, only: terminate, setCGNSRealType
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase
+        !
+        !      Local variables.
+        !
+        integer :: realTypeCGNS, ierr
+
+        real(kind=cgnsRealType) :: val
+
+        ! Set the cgns real type.
+
+        realTypeCGNS = setCGNSRealType()
+
+        ! Write the info of the turbulence model under the flow equation
+        ! set. So move to this location first.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                       "FlowEquationSet_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSV2fInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write that user defined model is used; v2-f is not
+        ! supported by cgns.
+
+        call cg_model_write_f("TurbulenceModel_t", UserDefined, ierr)
+
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSV2fInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the turbulent closure type.
+
+        call cg_model_write_f("TurbulenceClosure_t", EddyViscosity, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSV2fInfo", &
+                           "Something wrong when calling cg_model_write_f")
+
+        ! Write the details of the turbulence model under the turbulent
+        ! closure type.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSV2fInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        ! Write the value of the turbulent prandtl number.
+
+        val = prandtlTurb
+        call cg_array_write_f(cgnsPrandtlTurb, realTypeCGNS, &
+                              1, int(1, cgsize_t), val, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSV2fInfo", &
+                           "Something wrong when calling cg_array_write_f")
+
+        ! Indicate that this is a nonDimensional parameter.
+
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "FlowEquationSet_t", 1, &
+                       "TurbulenceClosure_t", 1, "DataArray_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSV2fInfo", &
+                           "Something wrong when calling cg_goto_f")
+
+        call cg_dataclass_write_f(NonDimensionalParameter, ierr)
+        if (ierr /= CG_OK) &
+             call terminate("writeCGNSV2fInfo", &
+             "Something wrong when calling &
+             &cg_dataclass_write_f")
+    end subroutine writeCGNSV2fInfo
 
 end module outputMod
diff --git a/src/preprocessing/preprocessingAPI.F90 b/src/preprocessing/preprocessingAPI.F90
index 545af6f06..9af113422 100644
--- a/src/preprocessing/preprocessingAPI.F90
+++ b/src/preprocessing/preprocessingAPI.F90
@@ -2,1032 +2,1030 @@ module preprocessingAPI
 
 contains
 
-
-  subroutine preprocessing
-    !
-    !       preprocessing determines the communication patterns between
-    !       the processors for all the mg levels, computes the wall
-    !       distances and the metrics.
-    !
-    use constants
-    use block
-    use blockPointers, only : BCType, nBocos
-    use cgnsGrid
-    use bcdata, only : initBCData, allocMemBCData
-    use communication, only : adflow_comm_world, commPatternCell_1st, &
-         commPatternCell_2nd, commPatternNode_1st, internalCell_1st, &
-         internalCell_2nd, internalNode_1st, myid, nProc, &
-         recvBufferSize_1to1, sendBufferSize_1to1, sendBufferSIzeOver,&
-         recvBufferSizeOver, commPatternOverset, internalOverset, sendBuffer, &
-         recvBuffer, sendBufferSize, recvBufferSize
-    use inputPhysics
-    use inputTimeSpectral
-    use section
-    use wallDistance, only : xVolumeVec, xSurfVec, wallScatter, &
-         wallDistanceDataAllocated, updateWallAssociation, &
-         computeWallDistance
-    use oversetData, only : cumDomProc, nDomProc, wallFringes, nDomTotal, &
-         overlapMatrix, oversetPresent, localWallFringes
-    use utils, only : setPointers, EChk, setBufferSizes, terminate
-    use coarseUtils, only : createCoarseBlocks
-    use pointMatchedCommPattern, only : determineCommPattern
-    use oversetAPI, only : oversetComm, determineClusters, determineViscousDirs
-    use wallDistanceData, only : nCellBlockOffset
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nLevels, level, nn, mm, nsMin, nsMax, i, iProc
-    logical :: local
-    !
-    ! Check that for the unsteady modes the number of periodic slices
-    ! is identical for all sections.
-
-    nsMin = sections(1)%nSlices
-    nsMax = sections(1)%nSlices
-
-    do nn=2,nSections
-       nsMin = min(nsMin,sections(nn)%nSlices)
-       nsMax = max(nsMax,sections(nn)%nSlices)
-    enddo
-
-    if((equationMode == unsteady .or. &
-         equationMode == timeSpectral) .and. nsMin < nsMax) then
-
-       if(myID == 0)                     &
+    subroutine preprocessing
+        !
+        !       preprocessing determines the communication patterns between
+        !       the processors for all the mg levels, computes the wall
+        !       distances and the metrics.
+        !
+        use constants
+        use block
+        use blockPointers, only: BCType, nBocos
+        use cgnsGrid
+        use bcdata, only: initBCData, allocMemBCData
+        use communication, only: adflow_comm_world, commPatternCell_1st, &
+                                 commPatternCell_2nd, commPatternNode_1st, internalCell_1st, &
+                                 internalCell_2nd, internalNode_1st, myid, nProc, &
+                                 recvBufferSize_1to1, sendBufferSize_1to1, sendBufferSIzeOver, &
+                                 recvBufferSizeOver, commPatternOverset, internalOverset, sendBuffer, &
+                                 recvBuffer, sendBufferSize, recvBufferSize
+        use inputPhysics
+        use inputTimeSpectral
+        use section
+        use wallDistance, only: xVolumeVec, xSurfVec, wallScatter, &
+                                wallDistanceDataAllocated, updateWallAssociation, &
+                                computeWallDistance
+        use oversetData, only: cumDomProc, nDomProc, wallFringes, nDomTotal, &
+                               overlapMatrix, oversetPresent, localWallFringes
+        use utils, only: setPointers, EChk, setBufferSizes, terminate
+        use coarseUtils, only: createCoarseBlocks
+        use pointMatchedCommPattern, only: determineCommPattern
+        use oversetAPI, only: oversetComm, determineClusters, determineViscousDirs
+        use wallDistanceData, only: nCellBlockOffset
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nLevels, level, nn, mm, nsMin, nsMax, i, iProc
+        logical :: local
+        !
+        ! Check that for the unsteady modes the number of periodic slices
+        ! is identical for all sections.
+
+        nsMin = sections(1)%nSlices
+        nsMax = sections(1)%nSlices
+
+        do nn = 2, nSections
+            nsMin = min(nsMin, sections(nn)%nSlices)
+            nsMax = max(nsMax, sections(nn)%nSlices)
+        end do
+
+        if ((equationMode == unsteady .or. &
+             equationMode == timeSpectral) .and. nsMin < nsMax) then
+
+            if (myID == 0) &
+                 call terminate("preprocessing", &
+                 "Different rotational periodicity encountered &
+                 &for time accurate computation")
+            call mpi_barrier(ADflow_comm_world, ierr)
+
+        end if
+
+        ! Determine the number of multigrid levels needed in the
+        ! computation, and allocate the memory for the node and cell
+        ! communication patterns, including the memory copies for internal
+        ! communication, and the global number of cells on each level.
+        ! Note that this communication pattern does not change in time.
+
+        nLevels = ubound(flowDoms, 2)
+        nn = nLevels
+        allocate (commPatternCell_1st(nn), commPatternCell_2nd(nn), &
+                  commPatternNode_1st(nn), internalCell_1st(nn), &
+                  internalCell_2nd(nn), internalNode_1st(nn), &
+                  nCellGlobal(nn), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("preprocessing", &
+                           "Memory allocation failure for commPatterns")
+
+        ! Set the sizes here so that we know how to dealloc the stuff
+        ! later on
+        do i = 1, nLevels
+            commPatternCell_1st(i)%nPeriodic = 0
+            commPatternCell_1st(i)%nProcSend = 0
+            commPatternCell_1st(i)%nProcRecv = 0
+
+            commPatternCell_2nd(i)%nPeriodic = 0
+            commPatternCell_2nd(i)%nProcSend = 0
+            commPatternCell_2nd(i)%nProcRecv = 0
+
+            commPatternNode_1st(i)%nPeriodic = 0
+            commPatternNode_1st(i)%nProcSend = 0
+            commPatternNode_1st(i)%nProcRecv = 0
+
+            internalCell_1st(i)%nPeriodic = 0
+            internalCell_2nd(i)%nPeriodic = 0
+            internalNode_1st(i)%nPeriodic = 0
+        end do
+
+        ! Allocate the memory for the overset mesh communication pattern.
+        ! This pattern changes in time and therefore each spectral time
+        ! value has its own sliding mesh communication pattern.
+
+        mm = nTimeIntervalsSpectral
+        allocate (commPatternOverset(nn, mm), internalOverset(nn, mm), &
+                  overlapMatrix(nn, mm), stat=ierr)
+        if (ierr /= 0) &
             call terminate("preprocessing", &
-            "Different rotational periodicity encountered &
-            &for time accurate computation")
-       call mpi_barrier(ADflow_comm_world, ierr)
+                           "Memory allocation failure for commOverset")
+
+        ! Determine the fine grid 1 to 1 matching communication pattern.
 
-    endif
+        call determineCommPattern(1_intType)
 
-    ! Determine the number of multigrid levels needed in the
-    ! computation, and allocate the memory for the node and cell
-    ! communication patterns, including the memory copies for internal
-    ! communication, and the global number of cells on each level.
-    ! Note that this communication pattern does not change in time.
+        ! Initialize the send and receive buffer sizes to 0 and determine
+        ! its size for the finest grid for the 1 to 1 communication.
 
-    nLevels = ubound(flowDoms,2)
-    nn      = nLevels
-    allocate(commPatternCell_1st(nn), commPatternCell_2nd(nn), &
-         commPatternNode_1st(nn), internalCell_1st(nn),    &
-         internalCell_2nd(nn),    internalNode_1st(nn),    &
-         nCellGlobal(nn),         stat=ierr)
-    if(ierr /= 0)                     &
-         call terminate("preprocessing", &
-         "Memory allocation failure for commPatterns")
+        sendBufferSize_1to1 = 0
+        recvBufferSize_1to1 = 0
+        sendBufferSizeOver = 0
+        recvBufferSizeOver = 0
 
-    ! Set the sizes here so that we know how to dealloc the stuff
-    ! later on
-    do i=1,nLevels
-       commPatternCell_1st(i)%nPeriodic = 0
-       commPatternCell_1st(i)%nProcSend = 0
-       commPatternCell_1st(i)%nProcRecv = 0
+        call setBufferSizes(1_intType, 1_intType, .true., .false.)
 
-       commPatternCell_2nd(i)%nPeriodic = 0
-       commPatternCell_2nd(i)%nProcSend = 0
-       commPatternCell_2nd(i)%nProcRecv = 0
+        ! Loop to create the coarse grid levels.
 
-       commPatternNode_1st(i)%nPeriodic = 0
-       commPatternNode_1st(i)%nProcSend = 0
-       commPatternNode_1st(i)%nProcRecv = 0
+        do level = 2, nLevels
 
-       internalCell_1st(i)%nPeriodic = 0
-       internalCell_2nd(i)%nPeriodic = 0
-       internalNode_1st(i)%nPeriodic = 0
-    end do
+            ! Create the coarse grid blocks, its communication pattern, the
+            ! coarse grid level 0 cooling parameters and check the
+            ! communication buffer sizes.
 
-    ! Allocate the memory for the overset mesh communication pattern.
-    ! This pattern changes in time and therefore each spectral time
-    ! value has its own sliding mesh communication pattern.
+            call createCoarseBlocks(level)
+            call determineCommPattern(level)
+            call setBufferSizes(level, 1_intType, .true., .false.)
 
-    mm = nTimeIntervalsSpectral
-    allocate(commPatternOverset(nn,mm), internalOverset(nn,mm), &
-         overlapMatrix(nn, mm), stat=ierr)
-    if(ierr /= 0)                     &
-         call terminate("preprocessing", &
-         "Memory allocation failure for commOverset")
+        end do
 
-    ! Determine the fine grid 1 to 1 matching communication pattern.
+        ! Synchronize the processors, just to be sure.
 
-    call determineCommPattern(1_intType)
+        call mpi_barrier(ADflow_comm_world, ierr)
 
-    ! Initialize the send and receive buffer sizes to 0 and determine
-    ! its size for the finest grid for the 1 to 1 communication.
+        ! Allocate memory for the nonblocking point to point communication.
 
-    sendBufferSize_1to1  = 0
-    recvBufferSize_1to1  = 0
-    sendBufferSizeOver   = 0
-    recvBufferSizeOver   = 0
-
-    call setBufferSizes(1_intType, 1_intType, .true., .false.)
-
-    ! Loop to create the coarse grid levels.
-
-    do level=2,nLevels
-
-       ! Create the coarse grid blocks, its communication pattern, the
-       ! coarse grid level 0 cooling parameters and check the
-       ! communication buffer sizes.
-
-       call createCoarseBlocks(level)
-       call determineCommPattern(level)
-       call setBufferSizes(level, 1_intType, .true., .false.)
-
-    enddo
-
-    ! Synchronize the processors, just to be sure.
-
-    call mpi_barrier(ADflow_comm_world, ierr)
-
-    ! Allocate memory for the nonblocking point to point communication.
-
-    allocate(sendBuffer(sendBufferSize), &
-         recvBuffer(recvBufferSize), stat=ierr)
-    if(ierr /= 0)                     &
-         call terminate("preprocessing", &
-         "Memory allocation failure for sendBuffer &
-         &and recvBuffer")
-
-    ! Determine the cell range for the subfaces and initialize the
-    ! arrays for the boundary condition data.
-    ! Done for all grid levels.
-
-    call cellRangeSubface
-    call initBcdata
-
-    ! Allocate some data of size nLevels for the fast wall distance calc
-    allocate(xVolumeVec(nLevels), xSurfVec(nLevels, mm), wallScatter(nLevels, mm), &
-         wallDistanceDataAllocated(nLevels), updateWallAssociation(nLevels))
-    wallDistanceDataAllocated = .False.
-    updateWallAssociation = .True.
-
-    ! Nullify the wallFringe poiter as initialization
-    nullify(wallFringes, localWallFringes)
-
-    ! Allocate nDomProc: the number of domains on each processor
-    ! and a cumulative form.
-    allocate(nDomProc(0:nProc-1), cumDomProc(0:nProc))
-
-    ! Gather the dimensions of all blocks to everyone
-    call mpi_allreduce(nDom, nDomTotal, 1, adflow_integer, MPI_SUM, &
-         adflow_comm_world, ierr)
-
-    ! Receive the number of domains from each proc using an allgather.
-    call mpi_allgather(nDom, 1, adflow_integer, nDomProc, 1, adflow_integer, &
-         adflow_comm_world, ierr)
-
-    ! Compute the cumulative format:
-    cumDomProc(0) = 0
-    do iProc=1, nProc
-       cumDomProc(iProc) = cumDomProc(iProc-1) + nDomProc(iProc-1)
-    end do
-
-    ! Determine the number of grid clusters
-    call determineClusters()
-
-    ! Detertmine the viscous directions in the CGNSBlocks
-    call determineViscousDirs()
-
-    ! Determine if we have overset mesh present:
-    local = .False.
-    do nn=1,nDom
-       call setPointers(nn, 1_intType, 1_intType)
-
-       do mm=1, nBocos
-          if (BCType(mm) == OversetOuterBound) then
-             local = .True.
-          end if
-       end do
-    end do
+        allocate (sendBuffer(sendBufferSize), &
+                  recvBuffer(recvBufferSize), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("preprocessing", &
+             "Memory allocation failure for sendBuffer &
+             &and recvBuffer")
 
-    call mpi_allreduce(local, oversetPresent, 1, MPI_LOGICAL, MPI_LOR, ADflow_comm_world, ierr)
+        ! Determine the cell range for the subfaces and initialize the
+        ! arrays for the boundary condition data.
+        ! Done for all grid levels.
 
-    ! Loop over the number of levels and perform a lot of tasks.
-    ! See the corresponding subroutine header, although the
-    ! names are pretty self-explaining
+        call cellRangeSubface
+        call initBcdata
 
-    ! Allocate Block-offset. SA rough might need it. It is filled in
-    ! 'setGlobalCellsAndNodes'. This function is called per level and thus it
-    ! must be allocated before the call
+        ! Allocate some data of size nLevels for the fast wall distance calc
+        allocate (xVolumeVec(nLevels), xSurfVec(nLevels, mm), wallScatter(nLevels, mm), &
+                  wallDistanceDataAllocated(nLevels), updateWallAssociation(nLevels))
+        wallDistanceDataAllocated = .False.
+        updateWallAssociation = .True.
 
-    allocate(nCellBlockOffset(nLevels, nDom))
-
-    do level=1,nLevels
-       call xhalo(level)
-       call allocateMetric(level)
-       call metric(level)
-       call setPorosities(level)
-       call setFamilyInfoFaces(level)
-       call faceRotationMatrices(level, .true.)
-       call checkSymmetry(level)
-       call viscSubfaceInfo(level)
-       call determineNcellGlobal(level)
-       call setGlobalCellsAndNodes(level)
-       call setReferenceVolume(level)
-    end do
-
-    ! BC Data must be alloaced (for surface iblank) before we can do
-    ! the overset computation.
-    call allocMemBCData
-
-    call setSurfaceFamilyInfo
-
-    ! Surface info needs to be computed before the wall distance can
-    ! be done and overset connectivity computed
-    do level=1,nLevels
-       call computeWallDistance(level, .True.)
-    end do
-    call preprocessingADjoint
-
-  end subroutine preprocessing
-
-  subroutine preprocessingoverset(flag, n, closedFamList, nFam)
-
-    use constants
-    use block, only : flowDoms
-    use oversetAPI, only : oversetComm, setExplicitHoleCut
-
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: n, nFam
-    integer(kind=intType), dimension(n) :: flag
-    integer(kind=intType), dimension(nFam) :: closedFamList
-
-    ! Working
-    integer(kind=intType) :: level, nLevels
-
-    nLevels = ubound(flowDoms,2)
-
-    do level=1,nLevels
-       if (level == 1) then
-          call setExplicitHoleCut(flag)
-          call oversetComm(level, .true., .false., closedFamList)
-       else
-          call oversetComm(level, .True., .True., closedFamList)
-       end if
-    end do
-  end subroutine preprocessingoverset
-
-  subroutine cellRangeSubface
-    !
-    !       cellRangeSubface determines the cell range for every subface
-    !       of every block all grid levels. This subrange can include one
-    !       cell of overlap if the boundary coincides with the block
-    !       boundary.
-    !
-    use constants
-    use block
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        ! Nullify the wallFringe poiter as initialization
+        nullify (wallFringes, localWallFringes)
 
-    integer(kind=intType) :: nLevels, level
-    integer(kind=intType) :: nn, mm, il, jl, kl, ie, je, ke
-    integer(kind=intType) :: iBeg, jBeg, kBeg, iEnd, jEnd, kEnd
+        ! Allocate nDomProc: the number of domains on each processor
+        ! and a cumulative form.
+        allocate (nDomProc(0:nProc - 1), cumDomProc(0:nProc))
 
-    ! Determine the number of grid levels.
+        ! Gather the dimensions of all blocks to everyone
+        call mpi_allreduce(nDom, nDomTotal, 1, adflow_integer, MPI_SUM, &
+                           adflow_comm_world, ierr)
 
-    nLevels = ubound(flowDoms,2)
+        ! Receive the number of domains from each proc using an allgather.
+        call mpi_allgather(nDom, 1, adflow_integer, nDomProc, 1, adflow_integer, &
+                           adflow_comm_world, ierr)
 
-    ! Loop over the number of grid levels.
+        ! Compute the cumulative format:
+        cumDomProc(0) = 0
+        do iProc = 1, nProc
+            cumDomProc(iProc) = cumDomProc(iProc - 1) + nDomProc(iProc - 1)
+        end do
 
-    levelLoop: do level=1,nLevels
+        ! Determine the number of grid clusters
+        call determineClusters()
 
-       ! Loop over the blocks.
+        ! Detertmine the viscous directions in the CGNSBlocks
+        call determineViscousDirs()
+
+        ! Determine if we have overset mesh present:
+        local = .False.
+        do nn = 1, nDom
+            call setPointers(nn, 1_intType, 1_intType)
+
+            do mm = 1, nBocos
+                if (BCType(mm) == OversetOuterBound) then
+                    local = .True.
+                end if
+            end do
+        end do
+
+        call mpi_allreduce(local, oversetPresent, 1, MPI_LOGICAL, MPI_LOR, ADflow_comm_world, ierr)
+
+        ! Loop over the number of levels and perform a lot of tasks.
+        ! See the corresponding subroutine header, although the
+        ! names are pretty self-explaining
+
+        ! Allocate Block-offset. SA rough might need it. It is filled in
+        ! 'setGlobalCellsAndNodes'. This function is called per level and thus it
+        ! must be allocated before the call
+
+        allocate (nCellBlockOffset(nLevels, nDom))
+
+        do level = 1, nLevels
+            call xhalo(level)
+            call allocateMetric(level)
+            call metric(level)
+            call setPorosities(level)
+            call setFamilyInfoFaces(level)
+            call faceRotationMatrices(level, .true.)
+            call checkSymmetry(level)
+            call viscSubfaceInfo(level)
+            call determineNcellGlobal(level)
+            call setGlobalCellsAndNodes(level)
+            call setReferenceVolume(level)
+        end do
+
+        ! BC Data must be alloaced (for surface iblank) before we can do
+        ! the overset computation.
+        call allocMemBCData
+
+        call setSurfaceFamilyInfo
+
+        ! Surface info needs to be computed before the wall distance can
+        ! be done and overset connectivity computed
+        do level = 1, nLevels
+            call computeWallDistance(level, .True.)
+        end do
+        call preprocessingADjoint
+
+    end subroutine preprocessing
+
+    subroutine preprocessingoverset(flag, n, closedFamList, nFam)
+
+        use constants
+        use block, only: flowDoms
+        use oversetAPI, only: oversetComm, setExplicitHoleCut
+
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: n, nFam
+        integer(kind=intType), dimension(n) :: flag
+        integer(kind=intType), dimension(nFam) :: closedFamList
+
+        ! Working
+        integer(kind=intType) :: level, nLevels
+
+        nLevels = ubound(flowDoms, 2)
+
+        do level = 1, nLevels
+            if (level == 1) then
+                call setExplicitHoleCut(flag)
+                call oversetComm(level, .true., .false., closedFamList)
+            else
+                call oversetComm(level, .True., .True., closedFamList)
+            end if
+        end do
+    end subroutine preprocessingoverset
+
+    subroutine cellRangeSubface
+        !
+        !       cellRangeSubface determines the cell range for every subface
+        !       of every block all grid levels. This subrange can include one
+        !       cell of overlap if the boundary coincides with the block
+        !       boundary.
+        !
+        use constants
+        use block
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-       domains: do nn=1,nDom
+        integer(kind=intType) :: nLevels, level
+        integer(kind=intType) :: nn, mm, il, jl, kl, ie, je, ke
+        integer(kind=intType) :: iBeg, jBeg, kBeg, iEnd, jEnd, kEnd
 
-          ! Allocate the memory for the variables defining the cell
-          ! range of the subfaces. Only allocated for the 1st spectral
-          ! solution, because this info is identical for all of them.
+        ! Determine the number of grid levels.
 
-          mm = flowDoms(nn,level,1)%nSubface
-          allocate(flowDoms(nn,level,1)%icBeg(mm), &
-               flowDoms(nn,level,1)%jcBeg(mm), &
-               flowDoms(nn,level,1)%kcBeg(mm), &
-               flowDoms(nn,level,1)%icEnd(mm), &
-               flowDoms(nn,level,1)%jcEnd(mm), &
-               flowDoms(nn,level,1)%kcEnd(mm), stat=ierr)
-          if(ierr /= 0)                        &
-               call terminate("cellRangeSubface", &
-               "Memory allocation failure for &
-               &cell subranges")
+        nLevels = ubound(flowDoms, 2)
 
-          ! Store the nodal dimensions of the block a bit easier.
+        ! Loop over the number of grid levels.
 
-          il = flowDoms(nn,level,1)%il
-          jl = flowDoms(nn,level,1)%jl
-          kl = flowDoms(nn,level,1)%kl
+        levelLoop: do level = 1, nLevels
 
-          ie = flowDoms(nn,level,1)%ie
-          je = flowDoms(nn,level,1)%je
-          ke = flowDoms(nn,level,1)%ke
+            ! Loop over the blocks.
 
-          ! Loop over the number of subfaces for this block.
+            domains: do nn = 1, nDom
 
-          subfaces: do mm=1,flowDoms(nn,level,1)%nSubface
+                ! Allocate the memory for the variables defining the cell
+                ! range of the subfaces. Only allocated for the 1st spectral
+                ! solution, because this info is identical for all of them.
 
-             ! Store the nodal range of the subface a bit easier.
-             ! Make sure that iBeg, jBeg and kBeg contain the lowest and
-             ! iEnd, jEnd and kEnd the highest node numbers.
+                mm = flowDoms(nn, level, 1)%nSubface
+                allocate (flowDoms(nn, level, 1)%icBeg(mm), &
+                          flowDoms(nn, level, 1)%jcBeg(mm), &
+                          flowDoms(nn, level, 1)%kcBeg(mm), &
+                          flowDoms(nn, level, 1)%icEnd(mm), &
+                          flowDoms(nn, level, 1)%jcEnd(mm), &
+                          flowDoms(nn, level, 1)%kcEnd(mm), stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("cellRangeSubface", &
+                     "Memory allocation failure for &
+                     &cell subranges")
 
-             iBeg = min(flowDoms(nn,level,1)%inBeg(mm), &
-                  flowDoms(nn,level,1)%inEnd(mm))
-             iEnd = max(flowDoms(nn,level,1)%inBeg(mm), &
-                  flowDoms(nn,level,1)%inEnd(mm))
+                ! Store the nodal dimensions of the block a bit easier.
 
-             jBeg = min(flowDoms(nn,level,1)%jnBeg(mm), &
-                  flowDoms(nn,level,1)%jnEnd(mm))
-             jEnd = max(flowDoms(nn,level,1)%jnBeg(mm), &
-                  flowDoms(nn,level,1)%jnEnd(mm))
+                il = flowDoms(nn, level, 1)%il
+                jl = flowDoms(nn, level, 1)%jl
+                kl = flowDoms(nn, level, 1)%kl
 
-             kBeg = min(flowDoms(nn,level,1)%knBeg(mm), &
-                  flowDoms(nn,level,1)%knEnd(mm))
-             kEnd = max(flowDoms(nn,level,1)%knBeg(mm), &
-                  flowDoms(nn,level,1)%knEnd(mm))
+                ie = flowDoms(nn, level, 1)%ie
+                je = flowDoms(nn, level, 1)%je
+                ke = flowDoms(nn, level, 1)%ke
 
-             ! Determine the block face on which the subface is located
-             ! and set the range accordingly.
+                ! Loop over the number of subfaces for this block.
 
-             select case (flowDoms(nn,level,1)%BCFaceID(mm))
+                subfaces: do mm = 1, flowDoms(nn, level, 1)%nSubface
 
-             case (iMin)
-                flowDoms(nn,level,1)%icBeg(mm) = 1
-                flowDoms(nn,level,1)%icEnd(mm) = 1
+                    ! Store the nodal range of the subface a bit easier.
+                    ! Make sure that iBeg, jBeg and kBeg contain the lowest and
+                    ! iEnd, jEnd and kEnd the highest node numbers.
 
-                flowDoms(nn,level,1)%jcBeg(mm) = jBeg +1
-                if(jBeg == 1) flowDoms(nn,level,1)%jcBeg(mm) = 1
+                    iBeg = min(flowDoms(nn, level, 1)%inBeg(mm), &
+                               flowDoms(nn, level, 1)%inEnd(mm))
+                    iEnd = max(flowDoms(nn, level, 1)%inBeg(mm), &
+                               flowDoms(nn, level, 1)%inEnd(mm))
 
-                flowDoms(nn,level,1)%jcEnd(mm) = jEnd
-                if(jEnd == jl) flowDoms(nn,level,1)%jcEnd(mm) = je
+                    jBeg = min(flowDoms(nn, level, 1)%jnBeg(mm), &
+                               flowDoms(nn, level, 1)%jnEnd(mm))
+                    jEnd = max(flowDoms(nn, level, 1)%jnBeg(mm), &
+                               flowDoms(nn, level, 1)%jnEnd(mm))
 
-                flowDoms(nn,level,1)%kcBeg(mm) = kBeg +1
-                if(kBeg == 1) flowDoms(nn,level,1)%kcBeg(mm) = 1
+                    kBeg = min(flowDoms(nn, level, 1)%knBeg(mm), &
+                               flowDoms(nn, level, 1)%knEnd(mm))
+                    kEnd = max(flowDoms(nn, level, 1)%knBeg(mm), &
+                               flowDoms(nn, level, 1)%knEnd(mm))
 
-                flowDoms(nn,level,1)%kcEnd(mm) = kEnd
-                if(kEnd == kl) flowDoms(nn,level,1)%kcEnd(mm) = ke
+                    ! Determine the block face on which the subface is located
+                    ! and set the range accordingly.
 
-                !=========================================================
+                    select case (flowDoms(nn, level, 1)%BCFaceID(mm))
 
-             case (iMax)
-                flowDoms(nn,level,1)%icBeg(mm) = ie
-                flowDoms(nn,level,1)%icEnd(mm) = ie
+                    case (iMin)
+                        flowDoms(nn, level, 1)%icBeg(mm) = 1
+                        flowDoms(nn, level, 1)%icEnd(mm) = 1
 
-                flowDoms(nn,level,1)%jcBeg(mm) = jBeg +1
-                if(jBeg == 1) flowDoms(nn,level,1)%jcBeg(mm) = 1
+                        flowDoms(nn, level, 1)%jcBeg(mm) = jBeg + 1
+                        if (jBeg == 1) flowDoms(nn, level, 1)%jcBeg(mm) = 1
 
-                flowDoms(nn,level,1)%jcEnd(mm) = jEnd
-                if(jEnd == jl) flowDoms(nn,level,1)%jcEnd(mm) = je
+                        flowDoms(nn, level, 1)%jcEnd(mm) = jEnd
+                        if (jEnd == jl) flowDoms(nn, level, 1)%jcEnd(mm) = je
 
-                flowDoms(nn,level,1)%kcBeg(mm) = kBeg +1
-                if(kBeg == 1) flowDoms(nn,level,1)%kcBeg(mm) = 1
+                        flowDoms(nn, level, 1)%kcBeg(mm) = kBeg + 1
+                        if (kBeg == 1) flowDoms(nn, level, 1)%kcBeg(mm) = 1
 
-                flowDoms(nn,level,1)%kcEnd(mm) = kEnd
-                if(kEnd == kl) flowDoms(nn,level,1)%kcEnd(mm) = ke
+                        flowDoms(nn, level, 1)%kcEnd(mm) = kEnd
+                        if (kEnd == kl) flowDoms(nn, level, 1)%kcEnd(mm) = ke
 
-                !=========================================================
+                        !=========================================================
 
-             case (jMin)
-                flowDoms(nn,level,1)%icBeg(mm) = iBeg +1
-                if(iBeg == 1) flowDoms(nn,level,1)%icBeg(mm) = 1
+                    case (iMax)
+                        flowDoms(nn, level, 1)%icBeg(mm) = ie
+                        flowDoms(nn, level, 1)%icEnd(mm) = ie
 
-                flowDoms(nn,level,1)%icEnd(mm) = iEnd
-                if(iEnd == il) flowDoms(nn,level,1)%icEnd(mm) = ie
+                        flowDoms(nn, level, 1)%jcBeg(mm) = jBeg + 1
+                        if (jBeg == 1) flowDoms(nn, level, 1)%jcBeg(mm) = 1
 
-                flowDoms(nn,level,1)%jcBeg(mm) = 1
-                flowDoms(nn,level,1)%jcEnd(mm) = 1
+                        flowDoms(nn, level, 1)%jcEnd(mm) = jEnd
+                        if (jEnd == jl) flowDoms(nn, level, 1)%jcEnd(mm) = je
 
-                flowDoms(nn,level,1)%kcBeg(mm) = kBeg +1
-                if(kBeg == 1) flowDoms(nn,level,1)%kcBeg(mm) = 1
+                        flowDoms(nn, level, 1)%kcBeg(mm) = kBeg + 1
+                        if (kBeg == 1) flowDoms(nn, level, 1)%kcBeg(mm) = 1
 
-                flowDoms(nn,level,1)%kcEnd(mm) = kEnd
-                if(kEnd == kl) flowDoms(nn,level,1)%kcEnd(mm) = ke
+                        flowDoms(nn, level, 1)%kcEnd(mm) = kEnd
+                        if (kEnd == kl) flowDoms(nn, level, 1)%kcEnd(mm) = ke
 
-                !=========================================================
+                        !=========================================================
 
-             case (jMax)
-                flowDoms(nn,level,1)%icBeg(mm) = iBeg +1
-                if(iBeg == 1) flowDoms(nn,level,1)%icBeg(mm) = 1
+                    case (jMin)
+                        flowDoms(nn, level, 1)%icBeg(mm) = iBeg + 1
+                        if (iBeg == 1) flowDoms(nn, level, 1)%icBeg(mm) = 1
 
-                flowDoms(nn,level,1)%icEnd(mm) = iEnd
-                if(iEnd == il) flowDoms(nn,level,1)%icEnd(mm) = ie
+                        flowDoms(nn, level, 1)%icEnd(mm) = iEnd
+                        if (iEnd == il) flowDoms(nn, level, 1)%icEnd(mm) = ie
 
-                flowDoms(nn,level,1)%jcBeg(mm) = je
-                flowDoms(nn,level,1)%jcEnd(mm) = je
+                        flowDoms(nn, level, 1)%jcBeg(mm) = 1
+                        flowDoms(nn, level, 1)%jcEnd(mm) = 1
 
-                flowDoms(nn,level,1)%kcBeg(mm) = kBeg +1
-                if(kBeg == 1) flowDoms(nn,level,1)%kcBeg(mm) = 1
+                        flowDoms(nn, level, 1)%kcBeg(mm) = kBeg + 1
+                        if (kBeg == 1) flowDoms(nn, level, 1)%kcBeg(mm) = 1
 
-                flowDoms(nn,level,1)%kcEnd(mm) = kEnd
-                if(kEnd == kl) flowDoms(nn,level,1)%kcEnd(mm) = ke
+                        flowDoms(nn, level, 1)%kcEnd(mm) = kEnd
+                        if (kEnd == kl) flowDoms(nn, level, 1)%kcEnd(mm) = ke
 
-                !=========================================================
+                        !=========================================================
 
-             case (kMin)
-                flowDoms(nn,level,1)%icBeg(mm) = iBeg +1
-                if(iBeg == 1) flowDoms(nn,level,1)%icBeg(mm) = 1
+                    case (jMax)
+                        flowDoms(nn, level, 1)%icBeg(mm) = iBeg + 1
+                        if (iBeg == 1) flowDoms(nn, level, 1)%icBeg(mm) = 1
 
-                flowDoms(nn,level,1)%icEnd(mm) = iEnd
-                if(iEnd == il) flowDoms(nn,level,1)%icEnd(mm) = ie
+                        flowDoms(nn, level, 1)%icEnd(mm) = iEnd
+                        if (iEnd == il) flowDoms(nn, level, 1)%icEnd(mm) = ie
 
-                flowDoms(nn,level,1)%jcBeg(mm) = jBeg +1
-                if(jBeg == 1) flowDoms(nn,level,1)%jcBeg(mm) = 1
+                        flowDoms(nn, level, 1)%jcBeg(mm) = je
+                        flowDoms(nn, level, 1)%jcEnd(mm) = je
 
-                flowDoms(nn,level,1)%jcEnd(mm) = jEnd
-                if(jEnd == jl) flowDoms(nn,level,1)%jcEnd(mm) = je
+                        flowDoms(nn, level, 1)%kcBeg(mm) = kBeg + 1
+                        if (kBeg == 1) flowDoms(nn, level, 1)%kcBeg(mm) = 1
 
-                flowDoms(nn,level,1)%kcBeg(mm) = 1
-                flowDoms(nn,level,1)%kcEnd(mm) = 1
+                        flowDoms(nn, level, 1)%kcEnd(mm) = kEnd
+                        if (kEnd == kl) flowDoms(nn, level, 1)%kcEnd(mm) = ke
 
-                !=========================================================
+                        !=========================================================
 
-             case (kMax)
-                flowDoms(nn,level,1)%icBeg(mm) = iBeg +1
-                if(iBeg == 1) flowDoms(nn,level,1)%icBeg(mm) = 1
+                    case (kMin)
+                        flowDoms(nn, level, 1)%icBeg(mm) = iBeg + 1
+                        if (iBeg == 1) flowDoms(nn, level, 1)%icBeg(mm) = 1
 
-                flowDoms(nn,level,1)%icEnd(mm) = iEnd
-                if(iEnd == il) flowDoms(nn,level,1)%icEnd(mm) = ie
+                        flowDoms(nn, level, 1)%icEnd(mm) = iEnd
+                        if (iEnd == il) flowDoms(nn, level, 1)%icEnd(mm) = ie
 
-                flowDoms(nn,level,1)%jcBeg(mm) = jBeg +1
-                if(jBeg == 1) flowDoms(nn,level,1)%jcBeg(mm) = 1
+                        flowDoms(nn, level, 1)%jcBeg(mm) = jBeg + 1
+                        if (jBeg == 1) flowDoms(nn, level, 1)%jcBeg(mm) = 1
 
-                flowDoms(nn,level,1)%jcEnd(mm) = jEnd
-                if(jEnd == jl) flowDoms(nn,level,1)%jcEnd(mm) = je
+                        flowDoms(nn, level, 1)%jcEnd(mm) = jEnd
+                        if (jEnd == jl) flowDoms(nn, level, 1)%jcEnd(mm) = je
 
-                flowDoms(nn,level,1)%kcBeg(mm) = ke
-                flowDoms(nn,level,1)%kcEnd(mm) = ke
+                        flowDoms(nn, level, 1)%kcBeg(mm) = 1
+                        flowDoms(nn, level, 1)%kcEnd(mm) = 1
 
-             end select
+                        !=========================================================
 
-          enddo subfaces
-       enddo domains
-    enddo levelLoop
+                    case (kMax)
+                        flowDoms(nn, level, 1)%icBeg(mm) = iBeg + 1
+                        if (iBeg == 1) flowDoms(nn, level, 1)%icBeg(mm) = 1
 
-  end subroutine cellRangeSubface
+                        flowDoms(nn, level, 1)%icEnd(mm) = iEnd
+                        if (iEnd == il) flowDoms(nn, level, 1)%icEnd(mm) = ie
 
+                        flowDoms(nn, level, 1)%jcBeg(mm) = jBeg + 1
+                        if (jBeg == 1) flowDoms(nn, level, 1)%jcBeg(mm) = 1
 
-  subroutine determineNcellGlobal(level)
-    !       determineNcellGlobal determines the global number of cells
-    !       the given grid level. This info is needed to compute the L2
-    !       norm of the residuals in the flow solver.
-    !       Only the 1st spectral solution needs to be considered, because
-    !       this info is identical for all of them.
-    !
-    use constants
-    use block
-    use communication
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    integer(kind=intType) :: nn, nCellLocal
+                        flowDoms(nn, level, 1)%jcEnd(mm) = jEnd
+                        if (jEnd == jl) flowDoms(nn, level, 1)%jcEnd(mm) = je
 
-    character(len=12) :: int1String, int2String
-    !
-    ! Determine the local number of cells by looping over the blocks.
+                        flowDoms(nn, level, 1)%kcBeg(mm) = ke
+                        flowDoms(nn, level, 1)%kcEnd(mm) = ke
 
-    nCellLocal = 0
-    do nn=1,nDom
-       nCellLocal = nCellLocal + flowDoms(nn,level,1)%nx &
-            *              flowDoms(nn,level,1)%ny &
-            *              flowDoms(nn,level,1)%nz
-    enddo
+                    end select
 
-    ! And determine the global sum.
+                end do subfaces
+            end do domains
+        end do levelLoop
 
-    call mpi_allreduce(nCellLocal, nCellGlobal(level), 1, &
-         adflow_integer, mpi_sum, ADflow_comm_world, ierr)
+    end subroutine cellRangeSubface
 
-    ! Write the total number of cells to stdout; only done by
-    ! processor 0 to avoid a messy output.
+    subroutine determineNcellGlobal(level)
+        !       determineNcellGlobal determines the global number of cells
+        !       the given grid level. This info is needed to compute the L2
+        !       norm of the residuals in the flow solver.
+        !       Only the 1st spectral solution needs to be considered, because
+        !       this info is identical for all of them.
+        !
+        use constants
+        use block
+        use communication
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer(kind=intType) :: nn, nCellLocal
 
-    if(myID == 0) then
+        character(len=12) :: int1String, int2String
+        !
+        ! Determine the local number of cells by looping over the blocks.
 
-       write(int1String,"(i12)") level
-       write(int2String,"(i12)") nCellGlobal(level)
-       int1String = adjustl(int1String)
-       int2String = adjustl(int2String)
+        nCellLocal = 0
+        do nn = 1, nDom
+            nCellLocal = nCellLocal + flowDoms(nn, level, 1)%nx &
+                         *flowDoms(nn, level, 1)%ny &
+                         *flowDoms(nn, level, 1)%nz
+        end do
 
-       print "(a)", "#"
-       print strings, "# Grid level: ", trim(int1String),", Total number of cells: ", trim(int2String)
-       print "(a)", "#"
+        ! And determine the global sum.
 
-    endif
+        call mpi_allreduce(nCellLocal, nCellGlobal(level), 1, &
+                           adflow_integer, mpi_sum, ADflow_comm_world, ierr)
 
-  end subroutine determineNcellGlobal
+        ! Write the total number of cells to stdout; only done by
+        ! processor 0 to avoid a messy output.
 
-  subroutine setPorosities(level)
-    !
-    !       setPorosities sets the porosities for the faces to a certain
-    !       flag. Default is normalFlux. The two other possibilities are
-    !       boundFlux, used for solid wall boundaries, and noFlux for a
-    !       conservative treatment of non matching block boundaries. In
-    !       the latter case the flux is constructed differently and the
-    !       flux computation in the block must be neglected.
-    !       Note that only the 1st spectral solution is treated, because
-    !       this informations is the same for all of them.
-    !
-    use blockPointers
-    use constants
-    use inputDiscretization
-    use utils, only : terminate, setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, mm, i, j, k
-
-    integer(kind=intType), dimension(2) :: ri, rj, rk
-
-    integer(kind=porType) :: por
-
-    ! Loop over the number of domains.
-
-    domains: do nn=1,nDom
-
-       ! Store the number of nodes in this block a bit easier.
-
-       il = flowDoms(nn,level,1)%il
-       jl = flowDoms(nn,level,1)%jl
-       kl = flowDoms(nn,level,1)%kl
-
-       ! Allocate the memory for the porosities.
-
-       allocate(flowDoms(nn,level,1)%porI(1:il,2:jl,2:kl), &
-            flowDoms(nn,level,1)%porJ(2:il,1:jl,2:kl), &
-            flowDoms(nn,level,1)%porK(2:il,2:jl,1:kl), stat=ierr)
-       if(ierr /= 0)                     &
-            call terminate("setPorosities", &
-            "Memory allocation failure for porosities")
-
-       ! Set the pointers for this block to make the source
-       ! more readable.
-
-       call setPointers(nn, level, 1_intType)
-
-       ! Initialize the porosities to normalFlux.
+        if (myID == 0) then
 
-       porI = normalFlux
-       porJ = normalFlux
-       porK = normalFlux
+            write (int1String, "(i12)") level
+            write (int2String, "(i12)") nCellGlobal(level)
+            int1String = adjustl(int1String)
+            int2String = adjustl(int2String)
 
-       ! Loop over the subfaces to alter the porosities.
+            print "(a)", "#"
+            print strings, "# Grid level: ", trim(int1String), ", Total number of cells: ", trim(int2String)
+            print "(a)", "#"
 
-       subface: do mm=1,nsubface
+        end if
 
-          ! Set the porosity for this subface or continue with the
-          ! next if the porosity should not be changed.
-
-          if(BCType(mm) == NSWallAdiabatic  .or. &
-               BCType(mm) == NSWallIsothermal .or. &
-               BCType(mm) == EulerWall        .or. &
-               BCType(mm) == Extrap)    then
-             por = boundFlux
-          else if(BCType(mm)        == B2BMismatch .and. &
-               nonMatchTreatment == Conservative) then
-             por = noFlux
-          else
-             cycle
-          endif
-
-          ! Set the range for the faces on this subface.
-
-          ri(1) = min(inBeg(mm), inEnd(mm)) +1
-          ri(2) = max(inBeg(mm), inEnd(mm))
-
-          rj(1) = min(jnBeg(mm), jnEnd(mm)) +1
-          rj(2) = max(jnBeg(mm), jnEnd(mm))
-
-          rk(1) = min(knBeg(mm), knEnd(mm)) +1
-          rk(2) = max(knBeg(mm), knEnd(mm))
-
-          ! Determine the block face this subface is located on and
-          ! set the corresponding porosities correctly.
-
-          select case( BCFaceID(mm) )
-
-          case (iMin)
-             do k=rk(1),rk(2)
-                do j=rj(1),rj(2)
-                   porI(1,j,k) = por
-                enddo
-             enddo
-
-             !===========================================================
-
-          case (iMax)
-             do k=rk(1),rk(2)
-                do j=rj(1),rj(2)
-                   porI(il,j,k) = por
-                enddo
-             enddo
+    end subroutine determineNcellGlobal
 
-             !===========================================================
+    subroutine setPorosities(level)
+        !
+        !       setPorosities sets the porosities for the faces to a certain
+        !       flag. Default is normalFlux. The two other possibilities are
+        !       boundFlux, used for solid wall boundaries, and noFlux for a
+        !       conservative treatment of non matching block boundaries. In
+        !       the latter case the flux is constructed differently and the
+        !       flux computation in the block must be neglected.
+        !       Note that only the 1st spectral solution is treated, because
+        !       this informations is the same for all of them.
+        !
+        use blockPointers
+        use constants
+        use inputDiscretization
+        use utils, only: terminate, setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, mm, i, j, k
+
+        integer(kind=intType), dimension(2) :: ri, rj, rk
+
+        integer(kind=porType) :: por
+
+        ! Loop over the number of domains.
+
+        domains: do nn = 1, nDom
+
+            ! Store the number of nodes in this block a bit easier.
+
+            il = flowDoms(nn, level, 1)%il
+            jl = flowDoms(nn, level, 1)%jl
+            kl = flowDoms(nn, level, 1)%kl
+
+            ! Allocate the memory for the porosities.
+
+            allocate (flowDoms(nn, level, 1)%porI(1:il, 2:jl, 2:kl), &
+                      flowDoms(nn, level, 1)%porJ(2:il, 1:jl, 2:kl), &
+                      flowDoms(nn, level, 1)%porK(2:il, 2:jl, 1:kl), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("setPorosities", &
+                               "Memory allocation failure for porosities")
+
+            ! Set the pointers for this block to make the source
+            ! more readable.
+
+            call setPointers(nn, level, 1_intType)
+
+            ! Initialize the porosities to normalFlux.
+
+            porI = normalFlux
+            porJ = normalFlux
+            porK = normalFlux
+
+            ! Loop over the subfaces to alter the porosities.
 
-          case (jMin)
-             do k=rk(1),rk(2)
-                do i=ri(1),ri(2)
-                   porJ(i,1,k) = por
-                enddo
-             enddo
+            subface: do mm = 1, nsubface
 
-             !===========================================================
+                ! Set the porosity for this subface or continue with the
+                ! next if the porosity should not be changed.
 
-          case (jMax)
-             do k=rk(1),rk(2)
-                do i=ri(1),ri(2)
-                   porJ(i,jl,k) = por
-                enddo
-             enddo
+                if (BCType(mm) == NSWallAdiabatic .or. &
+                    BCType(mm) == NSWallIsothermal .or. &
+                    BCType(mm) == EulerWall .or. &
+                    BCType(mm) == Extrap) then
+                    por = boundFlux
+                else if (BCType(mm) == B2BMismatch .and. &
+                         nonMatchTreatment == Conservative) then
+                    por = noFlux
+                else
+                    cycle
+                end if
 
-             !===========================================================
+                ! Set the range for the faces on this subface.
 
-          case (kMin)
-             do j=rj(1),rj(2)
-                do i=ri(1),ri(2)
-                   porK(i,j,1) = por
-                enddo
-             enddo
+                ri(1) = min(inBeg(mm), inEnd(mm)) + 1
+                ri(2) = max(inBeg(mm), inEnd(mm))
 
-             !===========================================================
+                rj(1) = min(jnBeg(mm), jnEnd(mm)) + 1
+                rj(2) = max(jnBeg(mm), jnEnd(mm))
 
-          case (kMax)
-             do j=rj(1),rj(2)
-                do i=ri(1),ri(2)
-                   porK(i,j,kl) = por
-                enddo
-             enddo
+                rk(1) = min(knBeg(mm), knEnd(mm)) + 1
+                rk(2) = max(knBeg(mm), knEnd(mm))
 
-          end select
+                ! Determine the block face this subface is located on and
+                ! set the corresponding porosities correctly.
 
-       enddo subface
+                select case (BCFaceID(mm))
 
-    enddo domains
+                case (iMin)
+                    do k = rk(1), rk(2)
+                        do j = rj(1), rj(2)
+                            porI(1, j, k) = por
+                        end do
+                    end do
 
-  end subroutine setPorosities
+                    !===========================================================
 
-  subroutine exchangeGlobalCells(level, sps, commPattern, internal)
-    !
-    !       ExchangeIblank exchanges the 1 to 1 internal halo's for the
-    !       given level and sps instance.
-    !
-    use constants
-    use block
-    use communication
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
+                case (iMax)
+                    do k = rk(1), rk(2)
+                        do j = rj(1), rj(2)
+                            porI(il, j, k) = por
+                        end do
+                    end do
 
-    type(commType),          dimension(*), intent(in) :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-    !
-    !      Local variables.
-    !
-    integer :: size, procId, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
+                    !===========================================================
 
-    integer(kind=intType) :: i, j, ii, jj
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+                case (jMin)
+                    do k = rk(1), rk(2)
+                        do i = ri(1), ri(2)
+                            porJ(i, 1, k) = por
+                        end do
+                    end do
 
-    integer(kind=intType), dimension(:), allocatable :: sendBufInt
-    integer(kind=intType), dimension(:), allocatable :: recvBufInt
+                    !===========================================================
 
-    ! Allocate the memory for the sending and receiving buffers.
+                case (jMax)
+                    do k = rk(1), rk(2)
+                        do i = ri(1), ri(2)
+                            porJ(i, jl, k) = por
+                        end do
+                    end do
 
-    ii = commPattern(level)%nProcSend
-    ii = commPattern(level)%nsendCum(ii)
-    jj = commPattern(level)%nProcRecv
-    jj = commPattern(level)%nrecvCum(jj)
+                    !===========================================================
 
-    allocate(sendBufInt(ii), recvBufInt(jj), stat=ierr)
-    if(ierr /= 0)                       &
-         call terminate("exchangeIblank", &
-         "Memory allocation failure for buffers")
+                case (kMin)
+                    do j = rj(1), rj(2)
+                        do i = ri(1), ri(2)
+                            porK(i, j, 1) = por
+                        end do
+                    end do
 
-    ! Send the variables. The data is first copied into
-    ! the send buffer after which the buffer is sent asap.
+                    !===========================================================
 
-    ii = 1
-    sends: do i=1,commPattern(level)%nProcSend
+                case (kMax)
+                    do j = rj(1), rj(2)
+                        do i = ri(1), ri(2)
+                            porK(i, j, kl) = por
+                        end do
+                    end do
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+                end select
 
-       procID = commPattern(level)%sendProc(i)
-       size    = commPattern(level)%nsend(i)
+            end do subface
 
-       ! Copy the data in the correct part of the send buffer.
+        end do domains
 
-       jj = ii
-       do j=1,commPattern(level)%nsend(i)
+    end subroutine setPorosities
 
-          ! Store the block id and the indices of the donor
-          ! a bit easier.
+    subroutine exchangeGlobalCells(level, sps, commPattern, internal)
+        !
+        !       ExchangeIblank exchanges the 1 to 1 internal halo's for the
+        !       given level and sps instance.
+        !
+        use constants
+        use block
+        use communication
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
 
-          d1 = commPattern(level)%sendList(i)%block(j)
-          i1 = commPattern(level)%sendList(i)%indices(j,1)
-          j1 = commPattern(level)%sendList(i)%indices(j,2)
-          k1 = commPattern(level)%sendList(i)%indices(j,3)
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+        !
+        !      Local variables.
+        !
+        integer :: size, procId, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
 
-          ! Copy globalCell values to buffer.
+        integer(kind=intType) :: i, j, ii, jj
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
 
-          sendBufInt(jj) = flowDoms(d1,level,sps)%globalCell(i1,j1,k1)
-          jj = jj + 1
+        integer(kind=intType), dimension(:), allocatable :: sendBufInt
+        integer(kind=intType), dimension(:), allocatable :: recvBufInt
 
-       enddo
+        ! Allocate the memory for the sending and receiving buffers.
 
-       ! Send the data.
+        ii = commPattern(level)%nProcSend
+        ii = commPattern(level)%nsendCum(ii)
+        jj = commPattern(level)%nProcRecv
+        jj = commPattern(level)%nrecvCum(jj)
 
-       call mpi_isend(sendBufInt(ii), size, adflow_integer, procId, &
-            procId, ADflow_comm_world, sendRequests(i),   &
-            ierr)
+        allocate (sendBufInt(ii), recvBufInt(jj), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("exchangeIblank", &
+                           "Memory allocation failure for buffers")
 
-       ! Set ii to jj for the next processor.
+        ! Send the variables. The data is first copied into
+        ! the send buffer after which the buffer is sent asap.
 
-       ii = jj
+        ii = 1
+        sends: do i = 1, commPattern(level)%nProcSend
 
-    enddo sends
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-    ! Post the nonblocking receives.
+            procID = commPattern(level)%sendProc(i)
+            size = commPattern(level)%nsend(i)
 
-    ii = 1
-    receives: do i=1,commPattern(level)%nProcRecv
+            ! Copy the data in the correct part of the send buffer.
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+            jj = ii
+            do j = 1, commPattern(level)%nsend(i)
 
-       procID = commPattern(level)%recvProc(i)
-       size    = commPattern(level)%nrecv(i)
+                ! Store the block id and the indices of the donor
+                ! a bit easier.
 
-       ! Post the receive.
+                d1 = commPattern(level)%sendList(i)%block(j)
+                i1 = commPattern(level)%sendList(i)%indices(j, 1)
+                j1 = commPattern(level)%sendList(i)%indices(j, 2)
+                k1 = commPattern(level)%sendList(i)%indices(j, 3)
 
-       call mpi_irecv(recvBufInt(ii), size, adflow_integer, procId, &
-            myId, ADflow_comm_world, recvRequests(i), ierr)
+                ! Copy globalCell values to buffer.
 
-       ! And update ii.
+                sendBufInt(jj) = flowDoms(d1, level, sps)%globalCell(i1, j1, k1)
+                jj = jj + 1
 
-       ii = ii + size
+            end do
 
-    enddo receives
+            ! Send the data.
 
-    ! Copy the local data.
+            call mpi_isend(sendBufInt(ii), size, adflow_integer, procId, &
+                           procId, ADflow_comm_world, sendRequests(i), &
+                           ierr)
 
-    localCopy: do i=1,internal(level)%ncopy
+            ! Set ii to jj for the next processor.
 
-       ! Store the block and the indices of the donor a bit easier.
+            ii = jj
 
-       d1 = internal(level)%donorBlock(i)
-       i1 = internal(level)%donorIndices(i,1)
-       j1 = internal(level)%donorIndices(i,2)
-       k1 = internal(level)%donorIndices(i,3)
+        end do sends
 
-       ! Idem for the halo's.
+        ! Post the nonblocking receives.
 
-       d2 = internal(level)%haloBlock(i)
-       i2 = internal(level)%haloIndices(i,1)
-       j2 = internal(level)%haloIndices(i,2)
-       k2 = internal(level)%haloIndices(i,3)
+        ii = 1
+        receives: do i = 1, commPattern(level)%nProcRecv
 
-       ! Copy the globalCell value
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       flowDoms(d2,level,sps)%globalCell(i2,j2,k2) = flowDoms(d1,level,sps)%globalCell(i1,j1,k1)
+            procID = commPattern(level)%recvProc(i)
+            size = commPattern(level)%nrecv(i)
 
-    enddo localCopy
+            ! Post the receive.
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+            call mpi_irecv(recvBufInt(ii), size, adflow_integer, procId, &
+                           myId, ADflow_comm_world, recvRequests(i), ierr)
 
-    size = commPattern(level)%nProcRecv
-    completeRecvs: do i=1,commPattern(level)%nProcRecv
+            ! And update ii.
 
-       ! Complete any of the requests.
+            ii = ii + size
 
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+        end do receives
 
-       ! Copy the data just arrived in the halo's.
+        ! Copy the local data.
 
-       ii = index
-       jj = commPattern(level)%nrecvCum(ii-1)
-       do j=1,commPattern(level)%nrecv(ii)
+        localCopy: do i = 1, internal(level)%ncopy
 
-          ! Store the block and the indices of the halo a bit easier.
+            ! Store the block and the indices of the donor a bit easier.
 
-          d2 = commPattern(level)%recvList(ii)%block(j)
-          i2 = commPattern(level)%recvList(ii)%indices(j,1)
-          j2 = commPattern(level)%recvList(ii)%indices(j,2)
-          k2 = commPattern(level)%recvList(ii)%indices(j,3)
+            d1 = internal(level)%donorBlock(i)
+            i1 = internal(level)%donorIndices(i, 1)
+            j1 = internal(level)%donorIndices(i, 2)
+            k1 = internal(level)%donorIndices(i, 3)
 
-          ! Copy the globalCell value
+            ! Idem for the halo's.
 
-          jj = jj + 1
-          flowDoms(d2,level,sps)%globalCell(i2,j2,k2) = recvBufInt(jj)
+            d2 = internal(level)%haloBlock(i)
+            i2 = internal(level)%haloIndices(i, 1)
+            j2 = internal(level)%haloIndices(i, 2)
+            k2 = internal(level)%haloIndices(i, 3)
 
-       enddo
+            ! Copy the globalCell value
 
-    enddo completeRecvs
+            flowDoms(d2, level, sps)%globalCell(i2, j2, k2) = flowDoms(d1, level, sps)%globalCell(i1, j1, k1)
 
-    ! Complete the nonblocking sends.
+        end do localCopy
 
-    size = commPattern(level)%nProcSend
-    do i=1,commPattern(level)%nProcSend
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-    enddo
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-    ! Deallocate the memory for the sending and receiving buffers.
+        size = commPattern(level)%nProcRecv
+        completeRecvs: do i = 1, commPattern(level)%nProcRecv
 
-    deallocate(sendBufInt, recvBufInt, stat=ierr)
-    if(ierr /= 0)                       &
-         call terminate("exchangeGlobalCell", &
-         "Deallocation failure for buffers")
+            ! Complete any of the requests.
 
-  end subroutine exchangeGlobalCells
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
 
-  subroutine checkSymmetry(level)
-    !
-    !       checkSymmetry checks whether or not the symmetry planes are
-    !       really planar (within a certain tolerance). If this is not the
-    !       case for the finest level, a warning is printed. In all cases
-    !       the unit normals are replaced by the face averaged unit
-    !       normal.
-    !
-    use constants
-    use blockPointers
-    use cgnsGrid
-    use inputTimeSpectral
-    use utils, only : setPointers
-    use commonFormats, only : stringSpace, stringSci5
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local parameter, tolerance for planar, 0.1 degrees.
-    !
-    real(kind=realType), parameter :: tolDotmin = 0.9999985_realType
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, sps, i, j
+            ! Copy the data just arrived in the halo's.
 
-    real(kind=realType) :: fact, dotMin, dot, mult
+            ii = index
+            jj = commPattern(level)%nrecvCum(ii - 1)
+            do j = 1, commPattern(level)%nrecv(ii)
 
-    real(kind=realType), dimension(3) :: faceNorm
-    real(kind=realType), dimension(:,:,:), pointer :: ss
+                ! Store the block and the indices of the halo a bit easier.
 
-    ! Loop over the number of spectral solutions and local domains.
+                d2 = commPattern(level)%recvList(ii)%block(j)
+                i2 = commPattern(level)%recvList(ii)%indices(j, 1)
+                j2 = commPattern(level)%recvList(ii)%indices(j, 2)
+                k2 = commPattern(level)%recvList(ii)%indices(j, 3)
 
-    spectral: do sps=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
+                ! Copy the globalCell value
 
-          ! Set the pointers for this block.
+                jj = jj + 1
+                flowDoms(d2, level, sps)%globalCell(i2, j2, k2) = recvBufInt(jj)
 
-          call setPointers(nn, level, sps)
+            end do
 
-          ! Loop over the number of boundary subfaces for this block.
+        end do completeRecvs
 
-          bocos: do mm=1,nBocos
+        ! Complete the nonblocking sends.
 
-             ! Check for symmetry boundary condition.
+        size = commPattern(level)%nProcSend
+        do i = 1, commPattern(level)%nProcSend
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+        end do
 
-             symmetry: if(BCType(mm) == symm) then
+        ! Deallocate the memory for the sending and receiving buffers.
 
-                ! Determine the block face on which this subface is
-                ! located and set some variables accordingly.
+        deallocate (sendBufInt, recvBufInt, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("exchangeGlobalCell", &
+                           "Deallocation failure for buffers")
 
-                select case (BCFaceID(mm))
+    end subroutine exchangeGlobalCells
 
-                case (iMin)
-                   mult = -one; ss => si(1,:,:,:)
-                case (iMax)
-                   mult = one; ss => si(il,:,:,:)
-                case (jMin)
-                   mult = -one; ss => sj(:,1,:,:)
-                case (jMax)
-                   mult = one; ss => sj(:,jl,:,:)
-                case (kMin)
-                   mult = -one; ss => sk(:,:,1,:)
-                case (kMax)
-                   mult = one; ss => sk(:,:,kl,:)
+    subroutine checkSymmetry(level)
+        !
+        !       checkSymmetry checks whether or not the symmetry planes are
+        !       really planar (within a certain tolerance). If this is not the
+        !       case for the finest level, a warning is printed. In all cases
+        !       the unit normals are replaced by the face averaged unit
+        !       normal.
+        !
+        use constants
+        use blockPointers
+        use cgnsGrid
+        use inputTimeSpectral
+        use utils, only: setPointers
+        use commonFormats, only: stringSpace, stringSci5
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local parameter, tolerance for planar, 0.1 degrees.
+        !
+        real(kind=realType), parameter :: tolDotmin = 0.9999985_realType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, sps, i, j
 
-                end select
+        real(kind=realType) :: fact, dotMin, dot, mult
 
-                ! Loop over the range of the subface compute the face
-                ! normal. The halo cells should not be taken into account,
-                ! which explains why the nodal range of BCData is used.
-                ! As the starting index of the cell range is shifted 1,
-                ! (inBeg+1) and (jnBeg+1) are the starting indices for the
-                ! owned cell range.
+        real(kind=realType), dimension(3) :: faceNorm
+        real(kind=realType), dimension(:, :, :), pointer :: ss
 
-                faceNorm = zero
+        ! Loop over the number of spectral solutions and local domains.
 
-                do j=(BCData(mm)%jnBeg+1), BCData(mm)%jnEnd
-                   do i=(BCData(mm)%inBeg+1), BCData(mm)%inEnd
-                      faceNorm(1) = faceNorm(1) + ss(i,j,1)
-                      faceNorm(2) = faceNorm(2) + ss(i,j,2)
-                      faceNorm(3) = faceNorm(3) + ss(i,j,3)
-                   enddo
-                enddo
+        spectral: do sps = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
 
-                ! Create the unit normal for faceNorm. Make sure it
-                ! is outward pointing by multiplying it by mult;
-                ! mult is either 1.0 or -1.0.
+                ! Set the pointers for this block.
 
-                fact = sqrt(faceNorm(1)*faceNorm(1) &
-                     +      faceNorm(2)*faceNorm(2) &
-                     +      faceNorm(3)*faceNorm(3))
-                if(fact > zero) fact = mult/fact
+                call setPointers(nn, level, sps)
 
-                faceNorm(1) = faceNorm(1)*fact
-                faceNorm(2) = faceNorm(2)*fact
-                faceNorm(3) = faceNorm(3)*fact
+                ! Loop over the number of boundary subfaces for this block.
 
-                ! Check if the symmetry plane is really planar. This is
-                ! only done on the finest mesh and for the 1st spectral
-                ! solution, because it is only to inform the user.
-                ! Afterwards the normals will be reset to the unit
-                ! normal of the face anyway.
+                bocos: do mm = 1, nBocos
 
-                fineLevelTest: if(level == 1 .and. sps == 1) then
+                    ! Check for symmetry boundary condition.
 
-                   ! Initialize dotMin such that it will always
-                   ! be overwritten.
+                    symmetry: if (BCType(mm) == symm) then
 
-                   dotMin = one
+                        ! Determine the block face on which this subface is
+                        ! located and set some variables accordingly.
 
-                   ! Loop over the physical faces of the symmetry plane,
-                   ! i.e. no halo's.
+                        select case (BCFaceID(mm))
 
-                   do j=(BCData(mm)%jnBeg+1), BCData(mm)%jnEnd
-                      do i=(BCData(mm)%inBeg+1), BCData(mm)%inEnd
+                        case (iMin)
+                            mult = -one; ss => si(1, :, :, :)
+                        case (iMax)
+                            mult = one; ss => si(il, :, :, :)
+                        case (jMin)
+                            mult = -one; ss => sj(:, 1, :, :)
+                        case (jMax)
+                            mult = one; ss => sj(:, jl, :, :)
+                        case (kMin)
+                            mult = -one; ss => sk(:, :, 1, :)
+                        case (kMax)
+                            mult = one; ss => sk(:, :, kl, :)
 
-                         ! Compute the dot product between the normal of
-                         ! this face and the averaged normal of the plane.
+                        end select
 
-                         dot = BCData(mm)%norm(i,j,1)*faceNorm(1) &
-                              + BCData(mm)%norm(i,j,2)*faceNorm(2) &
-                              + BCData(mm)%norm(i,j,3)*faceNorm(3)
+                        ! Loop over the range of the subface compute the face
+                        ! normal. The halo cells should not be taken into account,
+                        ! which explains why the nodal range of BCData is used.
+                        ! As the starting index of the cell range is shifted 1,
+                        ! (inBeg+1) and (jnBeg+1) are the starting indices for the
+                        ! owned cell range.
 
-                         ! And determine the minimum of dot and dotMin
+                        faceNorm = zero
 
-                         dotMin = min(dot,dotMin)
-                      enddo
-                   enddo
+                        do j = (BCData(mm)%jnBeg + 1), BCData(mm)%jnEnd
+                            do i = (BCData(mm)%inBeg + 1), BCData(mm)%inEnd
+                                faceNorm(1) = faceNorm(1) + ss(i, j, 1)
+                                faceNorm(2) = faceNorm(2) + ss(i, j, 2)
+                                faceNorm(3) = faceNorm(3) + ss(i, j, 3)
+                            end do
+                        end do
 
-                   ! Test if the minimum dot product is smaller than the
-                   ! tolerance. If so, the plane is considered as not
-                   ! planar.
+                        ! Create the unit normal for faceNorm. Make sure it
+                        ! is outward pointing by multiplying it by mult;
+                        ! mult is either 1.0 or -1.0.
 
-                   if(dotMin < tolDotmin) then
+                        fact = sqrt(faceNorm(1)*faceNorm(1) &
+                                    + faceNorm(2)*faceNorm(2) &
+                                    + faceNorm(3)*faceNorm(3))
+                        if (fact > zero) fact = mult/fact
 
-                      ! Determine the corresponding angle in degrees of
-                      ! dotmin.
+                        faceNorm(1) = faceNorm(1)*fact
+                        faceNorm(2) = faceNorm(2)*fact
+                        faceNorm(3) = faceNorm(3)*fact
 
-                      fact = acos(dotMin)*180.0_realType/pi
+                        ! Check if the symmetry plane is really planar. This is
+                        ! only done on the finest mesh and for the 1st spectral
+                        ! solution, because it is only to inform the user.
+                        ! Afterwards the normals will be reset to the unit
+                        ! normal of the face anyway.
 
-                      ! Store the corresponding cgns block id and the
-                      ! subface in this block a bit easier.
+                        fineLevelTest: if (level == 1 .and. sps == 1) then
 
-                      i = nbkGlobal
-                      j = cgnsSubface(mm)
+                            ! Initialize dotMin such that it will always
+                            ! be overwritten.
 
-                      ! Print a warning.
+                            dotMin = one
 
-                      print "(a)", "#"
-                      print "(a)", "#                      Warning"
-                      print stringSpace, "# Symmetry boundary face", trim(cgnsDoms(i)%bocoInfo(j)%bocoName), &
-                           "of zone", trim(cgnsDoms(i)%zonename), "is not planar."
-                      write(*, stringSci5) "# Maximum deviation from the mean normal: ", real(fact), " degrees"
-                      print "(a)", "#"
+                            ! Loop over the physical faces of the symmetry plane,
+                            ! i.e. no halo's.
 
-                   endif
+                            do j = (BCData(mm)%jnBeg + 1), BCData(mm)%jnEnd
+                                do i = (BCData(mm)%inBeg + 1), BCData(mm)%inEnd
 
-                endif fineLevelTest
+                                    ! Compute the dot product between the normal of
+                                    ! this face and the averaged normal of the plane.
 
-                !removed as this would cause issues with the ADjoint
+                                    dot = BCData(mm)%norm(i, j, 1)*faceNorm(1) &
+                                          + BCData(mm)%norm(i, j, 2)*faceNorm(2) &
+                                          + BCData(mm)%norm(i, j, 3)*faceNorm(3)
+
+                                    ! And determine the minimum of dot and dotMin
+
+                                    dotMin = min(dot, dotMin)
+                                end do
+                            end do
+
+                            ! Test if the minimum dot product is smaller than the
+                            ! tolerance. If so, the plane is considered as not
+                            ! planar.
+
+                            if (dotMin < tolDotmin) then
+
+                                ! Determine the corresponding angle in degrees of
+                                ! dotmin.
+
+                                fact = acos(dotMin)*180.0_realType/pi
+
+                                ! Store the corresponding cgns block id and the
+                                ! subface in this block a bit easier.
+
+                                i = nbkGlobal
+                                j = cgnsSubface(mm)
+
+                                ! Print a warning.
+
+                                print "(a)", "#"
+                                print "(a)", "#                      Warning"
+                                print stringSpace, "# Symmetry boundary face", trim(cgnsDoms(i)%bocoInfo(j)%bocoName), &
+                                    "of zone", trim(cgnsDoms(i)%zonename), "is not planar."
+                                write (*, stringSci5) "# Maximum deviation from the mean normal: ", real(fact), " degrees"
+                                print "(a)", "#"
+
+                            end if
+
+                        end if fineLevelTest
+
+                        !removed as this would cause issues with the ADjoint
 !!$               ! Set the unit normals to the unit normal of the entire
 !!$               ! plane. All the cells, also possible halo's, are treated.
 !!$
@@ -1041,3135 +1039,3126 @@ subroutine checkSymmetry(level)
 !!$                 enddo
 !!$               enddo
 
-             endif symmetry
-          enddo bocos
-       enddo domains
-    enddo spectral
-
-  end subroutine checkSymmetry
-  subroutine xhalo(level)
-    !
-    !       xhalo determines the coordinates of the nodal halo's.
-    !       First it sets all halo coordinates by simple extrapolation,
-    !       then the symmetry planes are treated (also the unit normal of
-    !       symmetry planes are determined) and finally an exchange is
-    !       made for the internal halo's.
-    !
-    use constants
-    use blockPointers
-    use communication
-    use inputTimeSpectral
-    use utils, only : setPointers
-    use haloExchange, only : exchangeCoor
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType) :: level
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, sps, i, j, k
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iiMax, jjMax
-
-    real(kind=realType), dimension(:,:,:), pointer :: x0, x1, x2
-
-    real(kind=realType) :: length, dot
-
-    real(kind=realType), dimension(3) :: v1, v2, norm, tmp, tmp2
-    real(kind=realType), parameter :: tolDotmin = 0.99_realType
-
-
-    ! Loop over the number of spectral solutions and the local
-    ! number of blocks.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
-
-          ! Set the pointers to this block.
-
-          call setPointers(nn, level, sps)
-
-          !
-          !           Extrapolation of the coordinates. First extrapolation in
-          !           i-direction, without halo's, followed by extrapolation in
-          !           j-direction, with i-halo's and finally extrapolation in
-          !           k-direction, with both i- and j-halo's. In this way also
-          !           the indirect halo's get a value, albeit a bit arbitrary.
-          !
-          ! Extrapolation in i-direction.
-
-          do k=1,kl
-             do j=1,jl
-                x(0,j,k,1) = two*x(1,j,k,1) - x(2,j,k,1)
-                x(0,j,k,2) = two*x(1,j,k,2) - x(2,j,k,2)
-                x(0,j,k,3) = two*x(1,j,k,3) - x(2,j,k,3)
-
-                x(ie,j,k,1) = two*x(il,j,k,1) - x(nx,j,k,1)
-                x(ie,j,k,2) = two*x(il,j,k,2) - x(nx,j,k,2)
-                x(ie,j,k,3) = two*x(il,j,k,3) - x(nx,j,k,3)
-             enddo
-          enddo
-
-          ! Extrapolation in j-direction.
-
-          do k=1,kl
-             do i=0,ie
-                x(i,0,k,1) = two*x(i,1,k,1) - x(i,2,k,1)
-                x(i,0,k,2) = two*x(i,1,k,2) - x(i,2,k,2)
-                x(i,0,k,3) = two*x(i,1,k,3) - x(i,2,k,3)
-
-                x(i,je,k,1) = two*x(i,jl,k,1) - x(i,ny,k,1)
-                x(i,je,k,2) = two*x(i,jl,k,2) - x(i,ny,k,2)
-                x(i,je,k,3) = two*x(i,jl,k,3) - x(i,ny,k,3)
-             enddo
-          enddo
-
-          ! Extrapolation in k-direction.
-
-          do j=0,je
-             do i=0,ie
-                x(i,j,0,1) = two*x(i,j,1,1) - x(i,j,2,1)
-                x(i,j,0,2) = two*x(i,j,1,2) - x(i,j,2,2)
-                x(i,j,0,3) = two*x(i,j,1,3) - x(i,j,2,3)
-
-                x(i,j,ke,1) = two*x(i,j,kl,1) - x(i,j,nz,1)
-                x(i,j,ke,2) = two*x(i,j,kl,2) - x(i,j,nz,2)
-                x(i,j,ke,3) = two*x(i,j,kl,3) - x(i,j,nz,3)
-             enddo
-          enddo
-          !
-          !           Mirror the halo coordinates adjacent to the symmetry
-          !           planes
-          !
-          ! Loop over boundary subfaces.
-
-          loopBocos: do mm=1,nBocos
-             ! The actual correction of the coordinates only takes
-             ! place for symmetry planes.
-
-             testSymmetry: if(BCType(mm) == Symm) then
-                ! Set some variables, depending on the block face on
-                ! which the subface is located.
+                    end if symmetry
+                end do bocos
+            end do domains
+        end do spectral
+
+    end subroutine checkSymmetry
+    subroutine xhalo(level)
+        !
+        !       xhalo determines the coordinates of the nodal halo's.
+        !       First it sets all halo coordinates by simple extrapolation,
+        !       then the symmetry planes are treated (also the unit normal of
+        !       symmetry planes are determined) and finally an exchange is
+        !       made for the internal halo's.
+        !
+        use constants
+        use blockPointers
+        use communication
+        use inputTimeSpectral
+        use utils, only: setPointers
+        use haloExchange, only: exchangeCoor
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType) :: level
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, sps, i, j, k
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iiMax, jjMax
+
+        real(kind=realType), dimension(:, :, :), pointer :: x0, x1, x2
+
+        real(kind=realType) :: length, dot
+
+        real(kind=realType), dimension(3) :: v1, v2, norm, tmp, tmp2
+        real(kind=realType), parameter :: tolDotmin = 0.99_realType
+
+        ! Loop over the number of spectral solutions and the local
+        ! number of blocks.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers to this block.
+
+                call setPointers(nn, level, sps)
+
+                !
+                !           Extrapolation of the coordinates. First extrapolation in
+                !           i-direction, without halo's, followed by extrapolation in
+                !           j-direction, with i-halo's and finally extrapolation in
+                !           k-direction, with both i- and j-halo's. In this way also
+                !           the indirect halo's get a value, albeit a bit arbitrary.
+                !
+                ! Extrapolation in i-direction.
+
+                do k = 1, kl
+                    do j = 1, jl
+                        x(0, j, k, 1) = two*x(1, j, k, 1) - x(2, j, k, 1)
+                        x(0, j, k, 2) = two*x(1, j, k, 2) - x(2, j, k, 2)
+                        x(0, j, k, 3) = two*x(1, j, k, 3) - x(2, j, k, 3)
+
+                        x(ie, j, k, 1) = two*x(il, j, k, 1) - x(nx, j, k, 1)
+                        x(ie, j, k, 2) = two*x(il, j, k, 2) - x(nx, j, k, 2)
+                        x(ie, j, k, 3) = two*x(il, j, k, 3) - x(nx, j, k, 3)
+                    end do
+                end do
+
+                ! Extrapolation in j-direction.
+
+                do k = 1, kl
+                    do i = 0, ie
+                        x(i, 0, k, 1) = two*x(i, 1, k, 1) - x(i, 2, k, 1)
+                        x(i, 0, k, 2) = two*x(i, 1, k, 2) - x(i, 2, k, 2)
+                        x(i, 0, k, 3) = two*x(i, 1, k, 3) - x(i, 2, k, 3)
+
+                        x(i, je, k, 1) = two*x(i, jl, k, 1) - x(i, ny, k, 1)
+                        x(i, je, k, 2) = two*x(i, jl, k, 2) - x(i, ny, k, 2)
+                        x(i, je, k, 3) = two*x(i, jl, k, 3) - x(i, ny, k, 3)
+                    end do
+                end do
+
+                ! Extrapolation in k-direction.
+
+                do j = 0, je
+                    do i = 0, ie
+                        x(i, j, 0, 1) = two*x(i, j, 1, 1) - x(i, j, 2, 1)
+                        x(i, j, 0, 2) = two*x(i, j, 1, 2) - x(i, j, 2, 2)
+                        x(i, j, 0, 3) = two*x(i, j, 1, 3) - x(i, j, 2, 3)
+
+                        x(i, j, ke, 1) = two*x(i, j, kl, 1) - x(i, j, nz, 1)
+                        x(i, j, ke, 2) = two*x(i, j, kl, 2) - x(i, j, nz, 2)
+                        x(i, j, ke, 3) = two*x(i, j, kl, 3) - x(i, j, nz, 3)
+                    end do
+                end do
+                !
+                !           Mirror the halo coordinates adjacent to the symmetry
+                !           planes
+                !
+                ! Loop over boundary subfaces.
+
+                loopBocos: do mm = 1, nBocos
+                    ! The actual correction of the coordinates only takes
+                    ! place for symmetry planes.
+
+                    testSymmetry: if (BCType(mm) == Symm) then
+                        ! Set some variables, depending on the block face on
+                        ! which the subface is located.
+
+                        select case (BCFaceID(mm))
+                        case (iMin)
+                            iBeg = jnBeg(mm); iEnd = jnEnd(mm); iiMax = jl
+                            jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
+                            x0 => x(0, :, :, :); x1 => x(1, :, :, :); x2 => x(2, :, :, :)
+
+                        case (iMax)
+                            iBeg = jnBeg(mm); iEnd = jnEnd(mm); iiMax = jl
+                            jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
+                            x0 => x(ie, :, :, :); x1 => x(il, :, :, :); x2 => x(nx, :, :, :)
+
+                        case (jMin)
+                            iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
+                            jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
+                            x0 => x(:, 0, :, :); x1 => x(:, 1, :, :); x2 => x(:, 2, :, :)
+
+                        case (jMax)
+                            iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
+                            jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
+                            x0 => x(:, je, :, :); x1 => x(:, jl, :, :); x2 => x(:, ny, :, :)
+
+                        case (kMin)
+                            iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
+                            jBeg = jnBeg(mm); jEnd = jnEnd(mm); jjMax = jl
+                            x0 => x(:, :, 0, :); x1 => x(:, :, 1, :); x2 => x(:, :, 2, :)
+
+                        case (kMax)
+                            iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
+                            jBeg = jnBeg(mm); jEnd = jnEnd(mm); jjMax = jl
+                            x0 => x(:, :, ke, :); x1 => x(:, :, kl, :); x2 => x(:, :, nz, :)
+                        end select
+
+                        ! Determine the vector from the lower left corner to
+                        ! the upper right corner. Due to the usage of pointers
+                        ! an offset of +1 must be used, because the original
+                        ! array x start at 0.
+
+                        v1(1) = x1(iimax + 1, jjmax + 1, 1) - x1(1 + 1, 1 + 1, 1)
+                        v1(2) = x1(iimax + 1, jjmax + 1, 2) - x1(1 + 1, 1 + 1, 2)
+                        v1(3) = x1(iimax + 1, jjmax + 1, 3) - x1(1 + 1, 1 + 1, 3)
+
+                        ! And the vector from the upper left corner to the
+                        ! lower right corner.
+
+                        v2(1) = x1(iimax + 1, 1 + 1, 1) - x1(1 + 1, jjmax + 1, 1)
+                        v2(2) = x1(iimax + 1, 1 + 1, 2) - x1(1 + 1, jjmax + 1, 2)
+                        v2(3) = x1(iimax + 1, 1 + 1, 3) - x1(1 + 1, jjmax + 1, 3)
+
+                        ! Determine the normal of the face by taking the cross
+                        ! product of v1 and v2 and add it to norm.
+
+                        norm(1) = v1(2)*v2(3) - v1(3)*v2(2)
+                        norm(2) = v1(3)*v2(1) - v1(1)*v2(3)
+                        norm(3) = v1(1)*v2(2) - v1(2)*v2(1)
+
+                        ! Check if BCData is allocated yet:
+                        if (.not. bcData(mm)%symNormSet) then
+                            length = sqrt(norm(1)**2 + norm(2)**2 + norm(3)**2)
+                            if (length == 0) then
+                                length = eps
+                            end if
+                            bcData(mm)%symNorm(1) = norm(1)/length
+                            bcData(mm)%symNorm(2) = norm(2)/length
+                            bcData(mm)%symNorm(3) = norm(3)/length
+                            bcData(mm)%symNormSet = .True.
+                        else
+
+                            ! Check that the orientation of norm() is not
+                            ! different from the stored one:
+                            length = sqrt(norm(1)**2 + norm(2)**2 + norm(3)**2)
+                            if (length > eps) then
+                                tmp = norm/length
+                                tmp2 = bcData(mm)%symNorm
+                                dot = dot_product(tmp, tmp2)
+                                if (abs(dot) < tolDotmin) then
+                                    print *, 'Symmetry Plane normal has changed from initial configuration. Resetting.'
+                                    print *, 'This may cause a slightly inaccurate gradient!'
+                                    bcData(mm)%symNorm(1) = norm(1)
+                                    bcData(mm)%symNorm(2) = norm(2)
+                                    bcData(mm)%symNorm(3) = norm(3)
+                                end if
+                            end if
+
+                            ! Copy out the saved symNorm
+                            norm(1) = bcData(mm)%symNorm(1)
+                            norm(2) = bcData(mm)%symNorm(2)
+                            norm(3) = bcData(mm)%symNorm(3)
+                        end if
+
+                        ! Compute the length of the normal and test if this is
+                        ! larger than eps. If this is the case this means that
+                        ! it is a nonsingular subface and the coordinates are
+                        ! corrected.
+
+                        length = sqrt(norm(1)**2 + norm(2)**2 + norm(3)**2)
+
+                        testSingular: if (length > eps) then
+
+                            ! Compute the unit normal of the subface.
+
+                            norm(1) = norm(1)/length
+                            norm(2) = norm(2)/length
+                            norm(3) = norm(3)/length
+
+                            ! Add an overlap to the symmetry subface if the
+                            ! boundaries coincide with the block boundaries.
+                            ! This way the indirect halo's are treated properly.
+
+                            if (iBeg == 1) iBeg = 0
+                            if (iEnd == iiMax) iEnd = iiMax + 1
+
+                            if (jBeg == 1) jBeg = 0
+                            if (jEnd == jjMax) jEnd = jjMax + 1
+
+                            ! Loop over the nodes of the subface and set the
+                            ! corresponding halo coordinates.
+
+                            do j = jBeg, jEnd
+                                do i = iBeg, iEnd
+
+                                    ! Determine the vector from the internal node to the
+                                    ! node on the face. Again an offset of +1 must be
+                                    ! used, due to the usage of pointers.
+
+                                    v1(1) = x1(i + 1, j + 1, 1) - x2(i + 1, j + 1, 1)
+                                    v1(2) = x1(i + 1, j + 1, 2) - x2(i + 1, j + 1, 2)
+                                    v1(3) = x1(i + 1, j + 1, 3) - x2(i + 1, j + 1, 3)
+
+                                    ! Determine two times the normal component of this
+                                    ! vector; this vector must be added to the
+                                    ! coordinates of the internal node to obtain the
+                                    ! halo coordinates. Again the offset of +1.
+
+                                    dot = two*(v1(1)*norm(1) + v1(2)*norm(2) &
+                                               + v1(3)*norm(3))
+
+                                    x0(i + 1, j + 1, 1) = x2(i + 1, j + 1, 1) + dot*norm(1)
+                                    x0(i + 1, j + 1, 2) = x2(i + 1, j + 1, 2) + dot*norm(2)
+                                    x0(i + 1, j + 1, 3) = x2(i + 1, j + 1, 3) + dot*norm(3)
+
+                                end do
+                            end do
+                        end if testSingular
+                    end if testSymmetry
+                end do loopBocos
+            end do domains
+        end do spectralLoop
+
+        !
+        !       Exchange the coordinates for the internal halo's.
+        !
+        call exchangeCoor(level)
+
+    end subroutine xhalo
+
+    subroutine setSurfaceFamilyInfo
+
+        use constants
+        use su_cgns
+        use blockPointers, onlY: nDom, flowDoms, nBocos, cgnsSubFace, BCType, BCData
+        use cgnsGrid, onlY: cgnsDoms
+        use communication, only: myid, adflow_comm_world, nProc
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use surfaceFamilies, only: BCFamExchange, famNames, fullFamList, &
+                                   zeroCellVal, zeroNodeVal, oneCellVal, BCFamgroups
+        use utils, only: setPointers, EChk, pointReduce, terminate, convertToLowerCase
+        use sorting, only: qsortStrings, bsearchStrings, famInList
+        use surfaceUtils, only: getSurfaceSize
+        implicit none
+
+        integer :: ierr
+        integer(kind=intType) :: nLevels, level, nn, mm, nsMin, nsMax, i, j, k, nFam, famID, cgb, iFam
+        integer(kind=intType) :: sps, isizemax, jsizemax, totalFamilies, totalWallFamilies
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ii, iBCGroup, totalBCFamilies
+        character(maxCGNSNameLen), dimension(25) :: defaultFamName
+        character(maxCGNSNameLen) :: curStr, family
+        character(maxCGNSNameLen), dimension(:), allocatable :: uniqueFamListNames
+        integer(kind=intType), dimension(:), allocatable :: localFlag, famIsPartOfBCGroup
+        integer(kind=intType), dimension(:), allocatable :: localIndices, nodeSizes, nodeDisps
+        integer(kind=intType) :: iProc, nodeSize, cellSize
+
+        ! Process out the family information. The goal here is to
+        ! assign a unique integer to each family in each boundary
+        ! condition. The CGNS grid has all the information we need.
+
+        ! Firstly make sure that there is actual family specified for
+        ! each BC. If there isn't, we will provide one for you.
+        defaultFamName(BCAxisymmetricWedge) = 'axi'
+        defaultFamName(BCDegenerateLine) = 'degenerate'
+        defaultFamName(BCDegeneratePoint) = 'degenerate'
+        defaultFamName(BCDirichlet) = 'dirichlet'
+        defaultFamName(BCExtrapolate) = 'extrap'
+        defaultFamName(BCFarfield) = 'far'
+        defaultFamName(BCGeneral) = 'general'
+        defaultFamName(BCInflow) = 'inflow'
+        defaultFamName(BCInflowSubsonic) = 'inflow'
+        defaultFamName(BCInflowSupersonic) = 'inflow'
+        defaultFamName(BCNeumann) = 'neumann'
+        defaultFamName(BCOutflow) = 'outflow'
+        defaultFamName(BCOutflowSubsonic) = 'outflow'
+        defaultFamName(BCOutflowSupersonic) = 'outflow'
+        defaultFamName(BCSymmetryPlane) = 'sym'
+        defaultFamName(BCSymmetryPolar) = 'sympolar'
+        defaultFamName(BCTunnelInflow) = 'inflow'
+        defaultFamName(BCTunnelOutflow) = 'outflow'
+        defaultFamName(BCWall) = 'wall'
+        defaultFamName(BCWallInviscid) = 'wall'
+        defaultFamName(BCWallViscous) = 'wall'
+        defaultFamName(BCWallViscousHeatFlux) = 'wall'
+        defaultFamName(BCWallViscousIsothermal) = 'wall'
+        defaultFamName(UserDefined) = 'userDefined'
+
+        nFam = 0
+        do i = 1, size(cgnsDoms)
+            do j = 1, size(cgnsDoms(i)%bocoInfo)
+                if (cgnsDoms(i)%bocoInfo(j)%actualFace) then
+                    if (trim(cgnsDoms(i)%bocoInfo(j)%wallBCName) == "") then
+                        if (myid == 0) then
+                            ! Tell the user we are adding an automatic family name
+                            write (*, "(2(A, I4), *(A))") "CGNS Block ", i, ", boundary condition ", j, ", of type ", &
+                                trim(BCTypeName(cgnsDoms(i)%bocoInfo(j)%BCTypeCGNS)), &
+                                " does not have a family. Based on the boundary condition type,", &
+                                " a name of: '", trim(defaultFamName(cgnsDoms(i)%bocoInfo(j)%BCTypeCGNS)), "' will be used."
+                        end if
+                        cgnsDoms(i)%bocoInfo(j)%wallBCName = trim(defaultFamName(cgnsDoms(i)%bocoInfo(j)%BCTypeCGNS))
+                    end if
+                    nFam = nFam + 1
+                end if
+            end do
+        end do
+
+        ! Allocate space for the full family list
+        allocate (famNames(nFam))
+        nFam = 0
+        do i = 1, size(cgnsDoms)
+            do j = 1, size(cgnsDoms(i)%bocoInfo)
+                if (cgnsDoms(i)%bocoInfo(j)%actualFace) then
+                    nFam = nFam + 1
+                    famNames(nfam) = cgnsDoms(i)%bocoInfo(j)%wallBCName
+                    call convertToLowerCase(famNames(nFam))
+                end if
+            end do
+        end do
+
+        ! Now sort the family names:
+        call qsortStrings(famNames, nFam)
+
+        ! Next we need to generate a unique set of names.
+        allocate (uniqueFamListNames(nFam))
+
+        curStr = famNames(1)
+        uniqueFamListNames(1) = curStr
+        j = 1
+        i = 1
+        do while (i < nFam)
+
+            i = i + 1
+            if (famNames(i) == curStr) then
+                ! Same str, do nothing.
+            else
+                j = j + 1
+                curStr = famNames(i)
+                uniqueFamListNames(j) = curStr
+            end if
+        end do
+
+        totalFamilies = j
+        ! Now copy the uniqueFamListNames back to "famNames" and allocate
+        ! exactly the right size.
+        deallocate (famNames)
+        allocate (famNames(totalFamilies))
+        famNames(1:totalFamilies) = uniqueFamListNames(1:totalFamilies)
+        deallocate (uniqueFamListNames)
+
+        ! Now each block boundary condition can uniquely determine it's
+        ! famID. We do all BC on all blocks and levels.
+        nLevels = ubound(flowDoms, 2)
+        do nn = 1, nDom
+            call setPointers(nn, 1_intType, 1_intType)
+            do mm = 1, nBocos
+
+                cgb = flowDoms(nn, 1, 1)%cgnsBlockID
+                family = cgnsDoms(cgb)%bocoInfo(cgnsSubface(mm))%wallBCName
+                call convertToLowerCase(family)
+
+                famID = bsearchStrings(family, famNames)
+                if (famID == 0) then
+                    ! Somehow we never found the family...
+                    call terminate("setSurfaceFamilyInfo", &
+                                   "An error occuring in assigning families")
+                end if
+
+                ! Now set the data on each of the level/sps instances
+                do sps = 1, nTimeIntervalsSpectral
+                    do level = 1, nlevels
+
+                        flowDoms(nn, level, sps)%bcData(mm)%famID = famID
+                        flowDoms(nn, level, sps)%bcData(mm)%family = family
+
+                    end do
+                end do
+            end do
+        end do
+
+        ! Next we need to group the families based on their boundary
+        ! condition. The reason for this is that we generate the reduction
+        ! scatterd based on groups of BC types. Specifically the following groups:
+
+        ! 1. Walls : EulerWall, NSWallAdiabatic, NSWallIsothermal
+        ! 2. Symm : Symm, SymmPolar
+        ! 3. Inflow/Outflow : subSonicInflow, subSonicOutflow, supersonicInflow, superSonicOutflow
+        ! 4. Farfield : Farfield
+        ! 5. Overset : OversetouterBound
+        ! 6. Others : All remaining BCs
+
+        ! The final familyExchange structure.
+        allocate (BCFamExchange(nFamExchange, nTimeIntervalsSpectral), localFlag(totalFamilies))
+
+        BCGroupLoop: do iBCGroup = 1, nfamExchange
+            localFlag = 0
+            ! Determine which of the unique families match the specific
+            ! BCGroup.  This is slightly inefficient but not it isn't
+            ! performance critical.
+            famLoop: do iFam = 1, totalFamilies
+                domainLoop: do nn = 1, nDom
+                    call setPointers(nn, 1_intType, 1_intType)
+                    bocoLoop: do mm = 1, nBocos
+                        matchiFam: if (flowDoms(nn, 1, 1)%bcData(mm)%famID == iFam) then
+                            select case (iBCGroup)
+
+                            case (iBCGroupWalls)
+                                if (BCType(mm) == EulerWall .or. &
+                                    BCType(mm) == NSWallAdiabatic .or. &
+                                    BCType(mm) == NSwallIsoThermal) then
+                                    localFlag(iFam) = 1
+                                end if
+
+                            case (iBCGroupInflow)
+                                if (BCType(mm) == SubsonicInflow .or. &
+                                    BCType(mm) == SupersonicInflow) then
+                                    localFlag(iFam) = 1
+                                end if
+
+                            case (iBCGroupOutflow)
+                                if (BCType(mm) == SubsonicOutflow .or. &
+                                    BCType(mm) == SupersonicOutflow) then
+                                    localFlag(iFam) = 1
+                                end if
+
+                            case (iBCGroupSymm)
+                                if (BCType(mm) == Symm .or. BCType(mm) == SymmPolar) then
+                                    localFlag(iFam) = 1
+                                end if
+
+                            case (iBCGroupFarfield)
+                                if (BCType(mm) == Farfield) then
+                                    localFlag(iFam) = 1
+                                end if
+
+                            case (iBCGroupOverset)
+                                if (BCType(mm) == OversetOuterBound) then
+                                    localFlag(iFam) = 1
+                                end if
+
+                            case (iBCGroupOther)
+                                ! All other boundary conditions. Note that some
+                                ! of these are not actually implemented
+                                if (BCType(mm) == BCNull .or. &
+                                    BCType(mm) == MassBleedInflow .or. &
+                                    BCType(mm) == MassbleedOutflow .or. &
+                                    BCType(mm) == mDot .or. &
+                                    BCType(mm) == BCThrust .or. &
+                                    BCType(mm) == Extrap .or. &
+                                    BCType(mm) == B2BMatch .or. &
+                                    BCType(mm) == B2BMisMatch .or. &
+                                    BCType(mm) == SlidingInterface .or. &
+                                    BCType(mm) == DomainInterfaceAll .or. &
+                                    BCType(mm) == DomainInterfaceRhoUVW .or. &
+                                    BCType(mm) == DomainInterfaceP .or. &
+                                    BCType(mm) == DomainInterfaceRho .or. &
+                                    BCType(mm) == DomainInterfaceTotal) then
+                                    localFlag(iFam) = 1
+                                end if
+                            end select
+                        end if matchiFam
+                    end do bocoLoop
+                end do domainLoop
+            end do famLoop
+
+            ! All Reduce so all procs know the same information.
+            allocate (famIsPartOfBCGroup(totalFamilies))
+            call mpi_allreduce(localFlag, famIsPartOfBCGroup, totalFamilies, &
+                               adflow_integer, MPI_SUM, adflow_comm_world, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Count up the number of families this BC has.
+            totalBCFamilies = 0
+            do i = 1, totalFamilies
+                if (famIsPartOfBCGroup(i) > 0) then
+                    totalBCFamilies = totalBCFamilies + 1
+                end if
+            end do
+
+            ! Allocate the space for the list of fam for each BC and set.
+            allocate (BCFamGroups(iBCGroup)%famList(totalBCFamilies))
+            k = 0
+            do i = 1, totalFamilies
+                if (famIsPartOfBCGroup(i) > 0) then
+                    k = k + 1
+                    BCFamGroups(iBCGroup)%famList(k) = i
+                end if
+            end do
+            deallocate (famIsPartOfBCGroup)
+        end do BCGroupLoop
+
+        ! Dump a little information out to the user giving the family and
+        ! the BC types. This will probably be useful in general.
+
+        if (myid == 0) then
+            write (*, "(a)") '+--------------------------------------------------+'
+            write (*, "(a)") '  CGNS Surface Families by Boundary Condition Type'
+            write (*, "(a)") '+--------------------------------------------------+'
+
+            do iBCGroup = 1, 6
+                select case (iBCGroup)
+                case (iBCGroupWalls)
+                    write (*, "(a)", advance="no") '| Wall Types           : '
+                case (iBCGroupInflow)
+                    write (*, "(a)", advance="no") '| Inflow Types         : '
+                case (iBCGroupOutflow)
+                    write (*, "(a)", advance="no") '| Outflow Types        : '
+                case (iBCGroupSymm)
+                    write (*, "(a)", advance="no") '| Symmetry Types       : '
+                case (iBCGroupFarfield)
+                    write (*, "(a)", advance="no") '| Farfield Types       : '
+                case (iBCGroupOverset)
+                    write (*, "(a)", advance="no") '| Overset Types        : '
+                case (iBCGroupOther)
+                    write (*, "(a)", advance="no") '| Other Types          : '
+                end select
+
+                do i = 1, size(BCFamGroups(iBCGroup)%famList)
+                    write (*, "(a,1x)", advance="no") trim(famNames(BCFamGroups(iBCGroup)%famList(i)))
+                end do
+                print "(1x)"
+            end do
+            write (*, "(a)") '+--------------------------------------------------+'
+        end if
+
+        ! Generate the node scatters for each family. This will also tell
+        ! us the surfaceIndex for each BC. This is the index into the
+        ! *gloablly reduced vector*. This is what we will need for tecplot
+        ! output as well as the zipper mesh computations.
+
+        do iBCGroup = 1, nFamExchange
+            do sps = 1, nTimeIntervalsSpectral
+                call createNodeScatterForFamilies( &
+                    BCFamGroups(iBCGroup)%famList, BCFamExchange(iBCGroup, sps), sps, localIndices)
+
+                ! this won't include the zipper nodes since that isn't done yet.
+                call getSurfaceSize(nodeSize, cellSize, BCFamGroups(iBCGroup)%famList, &
+                                    size(BCFamGroups(iBCGroup)%famlist), .False.)
+                allocate (nodeSizes(nProc), nodeDisps(0:nProc))
+                nodeSizes = 0
+                nodeDisps = 0
+
+                call mpi_allgather(nodeSize, 1, adflow_integer, nodeSizes, 1, adflow_integer, &
+                                   adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+                nodeDisps(0) = 0
+                do iProc = 1, nProc
+                    nodeDisps(iProc) = nodeDisps(iProc - 1) + nodeSizes(iProc)
+                end do
+
+                ii = 0
+                do nn = 1, nDom
+                    call setPointers(nn, 1, sps)
+                    do mm = 1, nBocos
+                        famInclude: if (famInList(BCData(mm)%famId, BCFamGroups(iBCGroup)%famList)) then
+                            iBeg = BCData(mm)%inbeg; iEnd = BCData(mm)%inend
+                            jBeg = BCData(mm)%jnbeg; jEnd = BCData(mm)%jnend
+                            do j = jBeg, jEnd
+                                do i = iBeg, iEnd
+                                    ii = ii + 1
+                                    BCData(mm)%surfIndex(i, j) = ii + nodeDisps(myid)
+                                end do
+                            end do
+                        end if famInclude
+                    end do
+                end do
+                deallocate (localIndices, nodeSizes, nodeDisps)
+            end do
+        end do
+        ! Allocate  arrays that have the maximum face size. These may
+        ! be slightly larger than necessary, but that's ok. We just need
+        ! somethwere to point the pointers.
+        isizemax = 0
+        jsizemax = 0
+        do nn = 1, nDom
+            isizemax = max(isizemax, flowDoms(nn, 1, 1)%ie)
+            isizemax = max(isizemax, flowDoms(nn, 1, 1)%je)
+
+            jsizemax = max(jsizemax, flowDoms(nn, 1, 1)%je)
+            jsizemax = max(jsizemax, flowDoms(nn, 1, 1)%ke)
+        end do
+
+        ! Allocate generic arrays for the cell and nodes. These will be
+        ! used when a BC is not included in a computed but needs to be
+        ! point somehwere.
+        allocate (zeroCellVal(isizemax, jsizemax), zeroNodeVal(isizemax, jsizemax), oneCellVal(isizemax, jsizemax))
+        oneCellVal = one
+        zeroCellVal = zero
+        zeroNodeVal = zero
+
+        ! Finally, create the shortcut array for all families. This is just
+        ! 1,2,3..totalFamilies.
+        allocate (fullFamList(totalFamilies))
+        do i = 1, totalFamilies
+            fullFamList(i) = i
+        end do
+
+    end subroutine setSurfaceFamilyInfo
+
+    subroutine createNodeScatterForFamilies(famList, exch, sps, localIndices)
+
+        ! The purpose of this routine is to create the appropriate data
+        ! structures that allow for the averaging of cell based surface
+        ! quantities to node-based quantities. The primary reason for this
+        ! is that the viscous stress tensor is not available at halo cells
+        ! and therefore it is not possible to create consistent node-based
+        ! values locallly. What the scatter does is allows us to sum the
+        ! nodal values across processors, average them and finally update
+        ! the node based values to be consistent. This operation is
+        ! necessary for several operations:
+
+        ! 1. Integration of forces over zipper triangles requires force/area
+        ! at nodes.
+        ! 2. Lift distributions/slices also requires node-based tractions
+        ! 3. Node-based output for tecplot files.
+        use constants
+        use communication, only: adflow_comm_world, myid, nProc
+        use surfaceFamilies, only: familyExchange, IS1, IS2!, PETSC_COPY_VALUES, PETSC_DETERMINE
+        use utils, only: pointReduce, eChk
+        use surfaceUtils
+        implicit none
+
+        ! Input Parameters
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+        type(familyExchange), intent(inout) :: exch
+        integer(kind=intType), dimension(:), intent(out), allocatable :: localIndices
+
+        ! Working param
+        integer(kind=intType) :: i, j, ierr, nNodesLocal, nNodesTotal, nCellsLocal, nFam
+        integer(kind=intType) :: nUnique, iSize, iStart, iEnd, iProc
+        real(kind=realType), dimension(:, :), allocatable :: localNodes, allNodes
+        real(kind=realType), dimension(:, :), allocatable :: uniqueNodes
+        integer(kind=intType), dimension(:), allocatable :: link, startIndices, endIndices
+        integer(kind=intType), dimension(:), allocatable :: nNodesProc, cumNodesProc
+        real(kind=realType) :: tol
+        integer(kind=intType) :: mpiStatus(MPI_STATUS_SIZE)
+
+        ! Save the family list.
+        nFam = size(famList)
+        allocate (exch%famList(nFam))
+        exch%famList = famList
+        exch%sps = sps
+
+        ! Determine the total number of nodes and cells on this
+        ! processor. This will include the zipper mesh if there is one.
+        call getSurfaceSize(nNodesLocal, nCellsLocal, famList, nFam, .True.)
+
+        ! Allocate the space to store nodal values, connectivity and the
+        ! family of each element
+        exch%nNodes = nNodesLocal
+
+        ! Allocate space for the some arrays
+        allocate (localNodes(3, nNodesLocal), nNodesProc(nProc), cumNodesProc(0:nProc))
+        call getSurfacePoints(localNodes, nNodesLocal, sps, famList, nFam, .True.)
+
+        ! Determine the total number of nodes on each proc
+        call mpi_allgather(nNodesLocal, 1, adflow_integer, nNodesProc, 1, adflow_integer, &
+                           adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Determine cumulative version
+        cumNodesProc(0) = 0_intType
+        nNodesTotal = 0
+        do i = 1, nProc
+            nNodesTotal = nNodesTotal + nNodesProc(i)
+            cumNodesProc(i) = cumNodesProc(i - 1) + nNodesProc(i)
+        end do
+
+        ! Send all the nodes to everyone
+        allocate (allNodes(3, nNodesTotal))
+        call mpi_allgatherv(localNodes, nNodesLocal*3, adflow_real, allNodes, &
+                            nNodesProc*3, cumNodesProc*3, adflow_real, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Local nodes is no longer necessary
+        deallocate (localNodes)
+
+        ! Now point reduce
+        allocate (uniqueNodes(3, nNodestotal), link(nNodestotal))
+        tol = 1e-12
+
+        call pointReduce(allNodes, nNodesTotal, tol, uniqueNodes, link, nUnique)
+
+        ! We can immediately discard everything but link since we are only
+        ! doing logical operations here:
+        deallocate (uniqueNodes, allNodes)
+
+        ! Now back out the global indices for our local points
+        if (allocated(localIndices)) then
+            deallocate (localIndices)
+        end if
+        allocate (localIndices(nNodesLocal))
+        do i = 1, nNodesLocal
+            ! The -1 is to convert to 0-based ordering for petsc
+            localIndices(i) = link(cumNodesProc(myid) + i) - 1
+        end do
+
+        ! Create the basic (scalar) local vector
+        call VecCreateMPI(ADFLOW_COMM_WORLD, nNodesLocal, PETSC_DETERMINE, &
+                          exch%nodeValLocal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Create the basic global vector. This is slightly tricker than it
+        ! sounds. We could just make it uniform, but then there would be
+        ! more communicaiton than necessary. Instead what we do is determine
+        ! the min and max range of local indices on the proc and the one
+        ! before it. A little diagram will help
+        !
+        ! Proc 0 +---------------------+
+        ! Proc 1                    +-------------+
+        ! Proc 2                                +----------------+
+        !
+        ! Proc zero has a many global nodes as local since they are by
+        ! definition all unqiue. Proc 1 then will start at 1 more than the
+        ! proc 0 and continue to it's maximum value. Proc 2 starts at the
+        ! end of proc 1 etc. This way the vast majority of the global nodes
+        ! are owned locally.
+
+        ! In order to determine the owning range for each processor, it is
+        ! much trickier than it sounds. We do a linear cascasde through the
+        ! procs sending the upper range from proc 0 to proc 1, then proc1 to
+        ! proc 2 and so on.
+
+        ! Proc zero owns all of it's nodes.
+        if (myid == 0) then
+            iStart = 0
+            if (nNodesLocal == 0) then
+                iEnd = 0
+            else
+                iEnd = maxval(localIndices) + 1
+            end if
+        end if
+
+        do iProc = 0, nProc - 2
+            if (myid == iProc) then
+                ! I need to send my iEnd to proc+1
+                call mpi_send(iEnd, 1, adflow_integer, iProc + 1, iProc, adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            else if (myid == iProc + 1) then
+
+                ! Receive the value from the proc below me:
+                call mpi_recv(iEnd, 1, adflow_integer, iProc, iProc, adflow_comm_world, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! On this proc, the start index is the
+                iStart = iEnd
+                if (nNodesLOCAl == 0) then
+                    iEnd = iStart
+                else
+                    iEnd = max(iStart, maxval(localIndices) + 1)
+                end if
+            end if
+        end do
+
+        iSize = iEnd - iStart
+        ! Create the actual global vec. Note we also include nUnique to make
+        ! sure we have all the local sizes correct.
+        call VecCreateMPI(ADFLOW_COMM_WORLD, iSize, nUnique, &
+                          exch%nodeValGlobal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecDuplicate(exch%nodeValGlobal, exch%sumGlobal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Now create the scatter that goes from the local vec to the global
+        ! vec.
+
+        ! Indices for the local vector is just a stride, starting at the
+        ! offset
+        call ISCreateStride(ADFLOW_COMM_WORLD, nNodesLocal, cumNodesProc(myid), &
+                            1, IS1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Indices for the global vector are the "localIndices" we previously
+        ! computed.
+        call ISCreateGeneral(adflow_comm_world, nNodesLocal, localIndices, &
+                             PETSC_COPY_VALUES, IS2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterCreate(exch%nodeValLocal, IS1, exch%nodeValGlobal, IS2, &
+                              exch%scatter, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! And dont' forget to destroy the index sets
+        call ISDestroy(IS1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call ISDestroy(IS2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        exch%allocated = .True.
+    end subroutine createNodeScatterForFamilies
+
+    subroutine setReferenceVolume(level)
+
+        use constants
+        use blockPointers, only: nDom, flowDoms, ib, jb, kb
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers
+        implicit none
+        integer :: ierr
+
+        integer(kind=intType), intent(in) :: level
+        integer(kind=intType) :: nn, sps
+        integer(kind=intType) :: i, j, k
+
+        spectral: do sps = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                allocate (flowDoms(nn, level, sps)%volRef(0:ib, 0:jb, 0:kb))
+
+                do k = 0, kb
+                    do j = 0, jb
+                        do i = 0, ib
+                            flowDoms(nn, level, sps)%volRef(i, j, k) = &
+                                flowDoms(nn, level, sps)%vol(i, j, k)
+                        end do
+                    end do
+                end do
+            end do domains
+        end do spectral
+    end subroutine setReferenceVolume
+
+    subroutine setGlobalCellsAndNodes(level)
+        !
+        !      Determine the global node numbering that is used to assemble
+        !      the adjoint system of equations. It take cares of all the halo
+        !      nodes between the blocks.
+        !      The nodes are numbered according to the following sequence:
+        !      loop processor = 1, nProc
+        !        loop domain = 1, nDom
+        !          loop k = 2, kl
+        !            loop j = 2, jl
+        !              loop i = 2, il
+        !      Only the onwned nodes are numbered, meaning i/j/k span from 2
+        !      to il/jl/kl. The halo nodes receive the numbering from the
+        !      neighboring block that owns them.
+        !      These variables are the same for all spectral modes, therefore
+        !      only the 1st mode needs to be communicated.
+        !      This function will also set FMPointer which is only defined
+        !      on wall boundary conditions and points to the correct index
+        !      for the vectors that are of shape nsurface nodes
+        !
+        use ADjointVars
+        use blockpointers
+        use communication
+        use inputTimeSpectral
+        use utils, only: setPointers, terminate
+        use haloExchange, only: whalo1to1intgeneric
+        use wallDistanceData, only: nCellBlockOffset
+        implicit none
+
+        ! Input variables
+        integer(kind=intType), intent(in) :: level
+
+        ! Local variables
+        integer(kind=intType) :: nn, i, j, k, sps, iDim
+        integer(kind=intType) :: ierr, istart
+        logical :: commPressure, commLamVis, commEddyVis, commGamma
+        integer(kind=intType), dimension(nProc) :: nNodes, nCells, nCellOffset, nNodeOffset
+        integer(kind=intType), dimension(nDom) :: nNodeBLockOffset
+        integer(kind=intType) :: npts, nCell, nNode
+        integer(kind=intType), dimension(:), allocatable :: nNodesProc, cumNodesProc
+        integer(kind=intTYpe), dimension(:), allocatable :: nCellsProc, cumCellsProc
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ii, jj, mm
+
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                ! Allocate memory for the cell and node indexing...only on sps=1
+                allocate (flowDoms(nn, level, sps)%globalCell(0:ib, 0:jb, 0:kb), &
+                          flowDoms(nn, level, sps)%globalNode(0:ie, 0:je, 0:ke), stat=ierr)
+                if (ierr /= 0) then
+                    call terminate("setGlobalCellsAndNodes", "Allocation failure for globalCell/Node")
+                end if
+                ! Assign a 'magic number' of -5 to globalCell and global Node:
+                flowDoms(nn, level, sps)%globalCell = -5
+                flowDoms(nn, level, sps)%globalNode = -5
+            end do
+        end do
+
+        ! Determine the number of nodes and cells owned by each processor
+        ! by looping over the local block domains.
+        nCellsLocal(level) = 0
+        nNodesLocal(level) = 0
+        do nn = 1, nDom
+            ! Set to first spectral instance since we only need sizes
+            call setPointers(nn, level, 1_intType)
+            nCellsLocal(level) = nCellsLocal(level) + nx*ny*nz
+            nNodesLocal(level) = nNodesLocal(level) + il*jl*kl
+        end do
+
+        ! Reduce the number of cells in all processors: add up nCellsLocal
+        ! into nCellsGlobal and sends the result to all processors.
+        ! (use mpi sum operation)
+
+        call mpi_allreduce(nCellsLocal(level), nCellsGlobal(level), 1, adflow_integer, &
+                           mpi_sum, ADflow_comm_world, ierr)
+
+        ! Gather the number of Cells per processor in the root processor.
+        call mpi_gather(nCellsLocal(level), 1, adflow_integer, nCells, 1, &
+                        adflow_integer, 0, ADflow_comm_world, ierr)
+
+        ! Repeat for the number of nodes.
+        ! (use mpi sum operation)
+        call mpi_allreduce(nNodesLocal(level), nNodesGlobal(level), 1, adflow_integer, &
+                           mpi_sum, ADflow_comm_world, ierr)
+
+        ! Gather the number of nodes per processor in the root processor.
+        call mpi_gather(nNodesLocal(level), 1, adflow_integer, nNodes, 1, &
+                        adflow_integer, 0, ADflow_comm_world, ierr)
+
+        ! Determine the global cell number offset for each processor.
+        rootProc: if (myID == 0) then
+            nCellOffset(1) = 0
+            nNodeOffset(1) = 0
+            do nn = 2, nProc
+                nCellOffset(nn) = nCellOffset(nn - 1) + nCells(nn - 1)
+                nNodeOffset(nn) = nNodeOffset(nn - 1) + nNodes(nn - 1)
+            end do
+        end if rootProc
+
+        ! Scatter the global cell number offset per processor.
+        call mpi_scatter(nCellOffset, 1, adflow_integer, nCellOffsetLocal(level), 1, &
+                         adflow_integer, 0, ADflow_comm_world, ierr)
+
+        ! Determine the global cell number offset for each local block.
+        nCellBlockOffset(level, 1) = nCellOffsetLocal(level)
+        do nn = 2, nDom
+            call setPointers(nn - 1, level, 1)
+            nCellBlockOffset(level, nn) = nCellBlockOffset(level, nn - 1) &
+                                          + nx*ny*nz
+        end do
+
+        ! Repeat for nodes.
+        call mpi_scatter(nNodeOffset, 1, adflow_integer, nNodeOffsetLocal(level), 1, &
+                         adflow_integer, 0, ADflow_comm_world, ierr)
+
+        ! Determine the global node number offset for each local block.
+        nNodeBlockOffset(1) = nNodeOffsetLocal(level)
+        do nn = 2, nDom
+            call setPointers(nn - 1, level, 1)
+            nNodeBlockOffset(nn) = nNodeBLockOffset(nn - 1) + il*jl*kl
+        end do
+
+        ! Determine the global block row index for each (i,j,k) cell in
+        ! each local block.
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            ! modified Timespectral indexing. Put all time
+                            ! instances of a give block adjacent to each other in
+                            ! the matrix
+                            globalCell(i, j, k) = &
+                                nCellBLockOffset(level, nn)*nTimeIntervalsSpectral + nx*ny*nz*(sps - 1) + &
+                                (i - 2) + (j - 2)*nx + (k - 2)*nx*ny
+                        end do
+                    end do
+                end do
+            end do
+        end do
+
+        ! Determine the global block row index for each (i,j,k) node in
+        ! each local block.
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            !modified Timespectral indexing. Put all time
+                            !instances of a give block adjacent to each other in
+                            !the matrix
+                            globalNode(i, j, k) = &
+                                nNodeBLockOffset(nn)*nTimeIntervalsSpectral + &
+                                il*jl*kl*(sps - 1) + (i - 1) + (j - 1)*il + (k - 1)*il*jl
+
+                        end do
+                    end do
+                end do
+            end do
+        end do
+
+        ! The above procedure has uniquely numbered all cells and nodes
+        ! owned on each processor. However we must also determine the
+        ! indices of the halo cells/nodes from other processors. To do this
+        ! we just run the specific halo exchanges for the cells and one for
+        ! the nodes
+
+        spectralModes: do sps = 1, nTimeIntervalsSpectral
+            domainLoop: do nn = 1, nDom
+                flowDoms(nn, level, sps)%intCommVars(1)%var => &
+                    flowDoms(nn, level, sps)%globalNode(:, :, :)
+            end do domainLoop
+
+            ! Run the generic integer exchange
+            call wHalo1to1IntGeneric(1, level, sps, commPatternNode_1st, internalNode_1st)
+        end do spectralModes
+
+        spectralModes2: do sps = 1, nTimeIntervalsSpectral
+            domainLoop2: do nn = 1, nDom
+                flowDoms(nn, level, sps)%intCommVars(1)%var => &
+                    flowDoms(nn, level, sps)%globalCell(:, :, :)
+            end do domainLoop2
+
+            ! Run the generic integer exchange
+            call wHalo1to1IntGeneric(1, level, sps, commPatternCell_2nd, internalCell_2nd)
+        end do spectralModes2
+
+    end subroutine setGlobalCellsAndNodes
+    subroutine setFamilyInfoFaces(level)
+        !
+        !       setFamilyInfoFaces sets the values of the family parameters
+        !       for faces on the given multigrid level. The default values for
+        !       indFamily is 0, which means that the mass flow through that
+        !       face does not contribute to the mass flow that must be
+        !       monitored. For sliding mesh interfaces both sides of the
+        !       interface are monitored and the value of indFamily corresponds
+        !       to one of the two entries in the local monitoring arrays. The
+        !       values of factFamily are such that the mass flow entering the
+        !       block is defined positive.
+        !       Note that only the 1st spectral solution is treated, because
+        !       this informations is the same for all of them.
+        !
+        use constants
+        use blockPointers
+        use cgnsGrid
+        use inputTimeSpectral
+        use monitor
+        use section
+        use utils, only: setPointers, terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, mm, i, j, k, ii, nSlices
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        integer(kind=intType), dimension(cgnsNFamilies) :: orToMassFam
+
+        ! Set the offset ii for the values of orToMassFam. If the mass
+        ! flow through sliding mesh interfaces must be monitored this
+        ! offset if 2*cgnsNSliding. This means that in the arrays to store
+        ! the mass flow the sliding mesh interfaces are stored first,
+        ! followed by the families.
+
+        if (monMassSliding) then
+            ii = 2*cgnsNSliding
+        else
+            ii = 0
+        end if
+
+        ! Determine the number of families for which the mass flow must
+        ! be monitored and set the entries of orToMassFam accordingly,
+        ! i.e. the offset ii is included.
+
+        mm = ii
+        do nn = 1, cgnsNFamilies
+            if (cgnsFamilies(nn)%monitorMassflow .and. &
+                cgnsFamilies(nn)%BCType /= MassBleedInflow .and. &
+                cgnsFamilies(nn)%BCType /= MassBleedOutflow .and. &
+                cgnsFamilies(nn)%BCType /= SlidingInterface) then
+                mm = mm + 1
+                orToMassFam(nn) = mm
+            else
+                orToMassFam(nn) = 0
+            end if
+        end do
+
+        ! Set monMassFamilies to .true. if the mass flow of at least one
+        ! family must be monitored. Otherwise set it to .false.
+
+        if (mm > ii) then
+            monMassFamilies = .true.
+        else
+            monMassFamilies = .false.
+        end if
+
+        ! If this is the first level, allocate the memory for
+        ! massFlowFamilyInv and massFlowFamilyDiss.
+
+        if (level == 1) then
+            nn = nTimeIntervalsSpectral
+
+            allocate (massFlowFamilyInv(0:mm, nn), &
+                      massFlowFamilyDiss(0:mm, nn), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("setFamilyInfoFaces", &
+                 "Memory allocation failure for &
+                 &massFlowFamilyInv and massFlowFamilyDiss")
+        end if
+
+        ! Loop over the number of domains.
+
+        domains: do nn = 1, nDom
+
+            ! Allocate the memory for indFamily and factFamily.
+
+            il = flowDoms(nn, level, 1)%il
+            jl = flowDoms(nn, level, 1)%jl
+            kl = flowDoms(nn, level, 1)%kl
+
+            allocate (flowDoms(nn, level, 1)%indFamilyI(1:il, 2:jl, 2:kl), &
+                      flowDoms(nn, level, 1)%indFamilyJ(2:il, 1:jl, 2:kl), &
+                      flowDoms(nn, level, 1)%indFamilyK(2:il, 2:jl, 1:kl), &
+                      flowDoms(nn, level, 1)%factFamilyI(1:il, 2:jl, 2:kl), &
+                      flowDoms(nn, level, 1)%factFamilyJ(2:il, 1:jl, 2:kl), &
+                      flowDoms(nn, level, 1)%factFamilyK(2:il, 2:jl, 1:kl), &
+                      stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("setFamilyInfoFaces", &
+                 "Memory allocation failure for indFamily &
+                 &and factFamily")
+
+            ! Set the pointers for this domain.
+
+            call setPointers(nn, level, 1_intType)
+
+            ! Determine the number of slices for this block to make
+            ! the full wheel.
+
+            nSlices = sections(sectionID)%nSlices
+
+            ! Initialize the values of indFamily and factFamily.
+
+            indFamilyI = 0_intType
+            indFamilyJ = 0_intType
+            indFamilyK = 0_intType
+
+            factFamilyI = 0_intType
+            factFamilyJ = 0_intType
+            factFamilyK = 0_intType
+
+            ! Loop over the boundary conditions.
+
+            boco: do mm = 1, nBocos
+
+                ! Test for the boundary condition.
+
+                select case (BCType(mm))
+                case (SlidingInterface)
+
+                    ! Sliding mesh boundary.
+                    ! If the mass flow through sliding interfaces must be monitored,
+                    ! determine the index in the arrays massFlowFamilyInv and
+                    ! massFlowFamilyDiss where to store the contribution of this
+                    ! subface. If the sliding mesh mass flows are not monitored,
+                    ! set the index to 0.
+
+                    if (monMassSliding) then
+                        ii = 2*abs(groupNum(mm))
+                        if (groupNum(mm) < 0) ii = ii - 1
+                    else
+                        ii = 0
+                    end if
+
+                case (MassBleedInflow, MassBleedOutflow)
+
+                    ! Inflow or outflow bleed. These boundary conditions are
+                    ! handled separetely and need not be monitored.
+
+                    ii = 0
+
+                case default
+
+                    ! Subface is an ordinary boundary condition. Determine the
+                    ! family ID and set the index ii in the arrays massFlowFamilyInv
+                    ! and massFlowFamilyDiss accordingly.
+
+                    if (groupNum(mm) > 0) then
+                        ii = orToMassFam(groupNum(mm))
+                    else
+                        ii = 0
+                    end if
+
+                end select
+
+                ! Set the owned cell range for the faces on this subface.
+                ! As icBeg, etc. may contain halo cells, inBeg, etc. is
+                ! used.
+
+                iBeg = min(inBeg(mm), inEnd(mm)) + 1
+                iEnd = max(inBeg(mm), inEnd(mm))
+
+                jBeg = min(jnBeg(mm), jnEnd(mm)) + 1
+                jEnd = max(jnBeg(mm), jnEnd(mm))
+
+                kBeg = min(knBeg(mm), knEnd(mm)) + 1
+                kEnd = max(knBeg(mm), knEnd(mm))
+
+                ! Determine the block this subface is located on and set
+                ! the corresponding values of indFamily and factFamily.
+                ! Note that factFamily is set to nSlices on min faces and to
+                ! -nSlices on max faces, such that the mass flow entering the
+                ! domain is defined positive and the mass flow of the entire
+                ! wheel is monitored.
 
                 select case (BCFaceID(mm))
                 case (iMin)
-                   iBeg = jnBeg(mm); iEnd = jnEnd(mm); iiMax = jl
-                   jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
-                   x0 => x(0,:,:,:); x1 => x(1,:,:,:); x2 => x(2,:,:,:)
+                    do k = kBeg, kEnd
+                        do j = jBeg, jEnd
+                            indFamilyI(1, j, k) = ii
+                            factFamilyI(1, j, k) = nSlices
+                        end do
+                    end do
+
+                    !===========================================================
 
                 case (iMax)
-                   iBeg = jnBeg(mm); iEnd = jnEnd(mm); iiMax = jl
-                   jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
-                   x0 => x(ie,:,:,:); x1 => x(il,:,:,:); x2 => x(nx,:,:,:)
+                    do k = kBeg, kEnd
+                        do j = jBeg, jEnd
+                            indFamilyI(il, j, k) = ii
+                            factFamilyI(il, j, k) = -nSlices
+                        end do
+                    end do
+
+                    !===========================================================
 
                 case (jMin)
-                   iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
-                   jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
-                   x0 => x(:,0,:,:); x1 => x(:,1,:,:); x2 => x(:,2,:,:)
+                    do k = kBeg, kEnd
+                        do i = iBeg, iEnd
+                            indFamilyJ(i, 1, k) = ii
+                            factFamilyJ(i, 1, k) = nSlices
+                        end do
+                    end do
+
+                    !===========================================================
 
                 case (jMax)
-                   iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
-                   jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
-                   x0 => x(:,je,:,:); x1 => x(:,jl,:,:); x2 => x(:,ny,:,:)
+                    do k = kBeg, kEnd
+                        do i = iBeg, iEnd
+                            indFamilyJ(i, jl, k) = ii
+                            factFamilyJ(i, jl, k) = -nSlices
+                        end do
+                    end do
+
+                    !===========================================================
 
                 case (kMin)
-                   iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
-                   jBeg = jnBeg(mm); jEnd = jnEnd(mm); jjMax = jl
-                   x0 => x(:,:,0,:); x1 => x(:,:,1,:); x2 => x(:,:,2,:)
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            indFamilyK(i, j, 1) = ii
+                            factFamilyK(i, j, 1) = nSlices
+                        end do
+                    end do
+
+                    !===========================================================
 
                 case (kMax)
-                   iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
-                   jBeg = jnBeg(mm); jEnd = jnEnd(mm); jjMax = jl
-                   x0 => x(:,:,ke,:); x1 => x(:,:,kl,:); x2 => x(:,:,nz,:)
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            indFamilyK(i, j, kl) = ii
+                            factFamilyK(i, j, kl) = -nSlices
+                        end do
+                    end do
+
                 end select
 
+            end do boco
+        end do domains
+
+    end subroutine setFamilyInfoFaces
+    subroutine shiftCoorAndVolumes
+        !
+        !       shiftCoorAndVolumes shifts the owned coordinates and
+        !       volumes in case of a deforming mesh for an unsteady
+        !       computation. In this case the old coordinates are needed to
+        !       determine the mesh velocities. The loop over the number of
+        !       spectral solutions is present for consistency, but this number
+        !       will be 1 when this routine is called.
+        !
+        use blockPointers
+        use inputTimeSpectral
+        use iteration
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn, mm, ll, kk
+
+        ! Loop over the number of spectral solutions and local blocks.
+
+        spectralLoop: do kk = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block on the ground level.
+
+                call setPointers(nn, groundLevel, kk)
+
+                ! Shift the coordinates already stored in xOld and the
+                ! volumes stored in volOld.
+
+                loopOldLevels: do mm = nOldLevels, 2, -1
+
+                    ! Shift the coordinates from level mm-1 to mm, including
+                    ! the halo's.
+
+                    ll = mm - 1
+
+                    do k = 0, ke
+                        do j = 0, je
+                            do i = 0, ie
+                                xOld(mm, i, j, k, 1) = xOld(ll, i, j, k, 1)
+                                xOld(mm, i, j, k, 2) = xOld(ll, i, j, k, 2)
+                                xOld(mm, i, j, k, 3) = xOld(ll, i, j, k, 3)
+                            end do
+                        end do
+                    end do
+
+                    ! Shift the old volumes from level mm-1 to mm.
+                    ! Only the owned ones need to be considered.
+
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                volOld(mm, i, j, k) = volOld(ll, i, j, k)
+                            end do
+                        end do
+                    end do
+
+                end do loopOldLevels
+
+                ! Shift the current coordinates into the 1st level of xOld.
+
+                do k = 0, ke
+                    do j = 0, je
+                        do i = 0, ie
+                            xOld(1, i, j, k, 1) = x(i, j, k, 1)
+                            xOld(1, i, j, k, 2) = x(i, j, k, 2)
+                            xOld(1, i, j, k, 3) = x(i, j, k, 3)
+                        end do
+                    end do
+                end do
+
+                ! Shift the current volumes into the 1st level of volOld.
 
-                ! Determine the vector from the lower left corner to
-                ! the upper right corner. Due to the usage of pointers
-                ! an offset of +1 must be used, because the original
-                ! array x start at 0.
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            volOld(1, i, j, k) = vol(i, j, k)
+                        end do
+                    end do
+                end do
 
-                v1(1) = x1(iimax+1,jjmax+1,1) - x1(1+1,1+1,1)
-                v1(2) = x1(iimax+1,jjmax+1,2) - x1(1+1,1+1,2)
-                v1(3) = x1(iimax+1,jjmax+1,3) - x1(1+1,1+1,3)
+            end do domains
+        end do spectralLoop
+
+    end subroutine shiftCoorAndVolumes
+    subroutine viscSubfaceInfo(level)
+        !
+        !       viscSubfaceInfo allocates the memory for the storage of the
+        !       stress tensor and heat flux vector of viscous subfaces for the
+        !       given multigrid level and all spectral solutions. Furthermore
+        !       the pointers viscIminPointer, etc. Are allocated and set.
+        !       These pointers contain info to which viscous subface the faces
+        !       of the block faces possibly belong. If not part of a viscous
+        !       subface these values are set to 0. Note that these pointers
+        !       are only allocated and determined for the 1st spectral
+        !       solution, because the info is the same for all of them.
+        !
+        use constants
+        use blockPointers
+        use inputTimeSpectral
+        use utils, only: setPointers, terminate
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, mm, sps, i, j
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+
+        integer(kind=intType), dimension(:, :), pointer :: viscPointer
+
+        ! Loop over the number blocks stored on this processor.
+
+        domains: do nn = 1, nDom
+
+            ! Set the pointers to the block of the 1st spectral solution.
+
+            call setPointers(nn, level, 1_intType)
+
+            ! Allocate the memory for viscSubface and the pointers
+            ! viscIminPointer, etc. ViscSubface must be allocated for
+            ! all spectral solutions, the pointers only for the 1st.
+
+            do sps = 1, nTimeIntervalsSpectral
+                allocate (flowDoms(nn, level, sps)%viscSubface(nViscBocos), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("viscSubfaceInfo", &
+                                   "Memory allocation failure for viscSubface")
+            end do
+
+            allocate (flowDoms(nn, level, 1)%viscIminPointer(2:jl, 2:kl), &
+                      flowDoms(nn, level, 1)%viscImaxPointer(2:jl, 2:kl), &
+                      flowDoms(nn, level, 1)%viscJminPointer(2:il, 2:kl), &
+                      flowDoms(nn, level, 1)%viscJmaxPointer(2:il, 2:kl), &
+                      flowDoms(nn, level, 1)%viscKminPointer(2:il, 2:jl), &
+                      flowDoms(nn, level, 1)%viscKmaxPointer(2:il, 2:jl), &
+                      stat=ierr)
+            if (ierr /= 0) &
+                call terminate("viscSubfaceInfo", &
+                               "Memory allocation failure for subface info")
+
+            ! Reset the pointers viscIminPointer, etc. to make it more
+            ! readable and initialize them to 0. This indicates that
+            ! the faces are not part of a viscous wall subfaces.
+
+            viscIminPointer => flowDoms(nn, level, 1)%viscIminPointer
+            viscImaxPointer => flowDoms(nn, level, 1)%viscImaxPointer
+            viscJminPointer => flowDoms(nn, level, 1)%viscJminPointer
+            viscJmaxPointer => flowDoms(nn, level, 1)%viscJmaxPointer
+            viscKminPointer => flowDoms(nn, level, 1)%viscKminPointer
+            viscKmaxPointer => flowDoms(nn, level, 1)%viscKmaxPointer
+
+            viscIminPointer = 0
+            viscImaxPointer = 0
+            viscJminPointer = 0
+            viscJmaxPointer = 0
+            viscKminPointer = 0
+            viscKmaxPointer = 0
+
+            ! Loop over the viscous subfaces to allocate the memory for the
+            ! stress tensor and the heat flux vector and to set the range
+            ! in viscIminPointer, etc.
+
+            viscSubfaces: do mm = 1, nViscBocos
+
+                ! Store the cell range in iBeg, iEnd, etc. As the viscous data
+                ! do not allow for an overlap, the nodal range of the
+                ! subface must be used.
+
+                iBeg = BCData(mm)%inBeg + 1
+                iEnd = BCData(mm)%inEnd
+
+                jBeg = BCData(mm)%jnBeg + 1
+                jEnd = BCData(mm)%jnEnd
+
+                ! Loop over the spectral solutions and allocate the memory
+                ! for the stress tensor, heat flux and friction velocity.
+
+                do sps = 1, nTimeIntervalsSpectral
+
+                    ! Set the pointer for viscSubface to make the code
+                    ! more readable and allocate the memory.
+
+                    viscSubface => flowDoms(nn, level, sps)%viscSubface
+
+                    allocate (viscSubface(mm)%tau(iBeg:iEnd, jBeg:jEnd, 6), &
+                              viscSubface(mm)%q(iBeg:iEnd, jBeg:jEnd, 3), &
+                              viscSubface(mm)%utau(iBeg:iEnd, jBeg:jEnd), &
+                              stat=ierr)
+                    if (ierr /= 0) &
+                         call terminate("viscSubfaceInfo", &
+                         "Memory allocation failure for tau, q &
+                         &and utau.")
+                end do
 
-                ! And the vector from the upper left corner to the
-                ! lower right corner.
+                ! Set the pointer viscPointer, depending on the block face
+                ! on which the subface is located.
 
-                v2(1) = x1(iimax+1,1+1,1) - x1(1+1,jjmax+1,1)
-                v2(2) = x1(iimax+1,1+1,2) - x1(1+1,jjmax+1,2)
-                v2(3) = x1(iimax+1,1+1,3) - x1(1+1,jjmax+1,3)
+                select case (BCFaceID(mm))
+                case (iMin)
+                    viscPointer => viscIminPointer
 
-                ! Determine the normal of the face by taking the cross
-                ! product of v1 and v2 and add it to norm.
+                case (iMax)
+                    viscPointer => viscImaxPointer
 
-                norm(1) = v1(2)*v2(3) - v1(3)*v2(2)
-                norm(2) = v1(3)*v2(1) - v1(1)*v2(3)
-                norm(3) = v1(1)*v2(2) - v1(2)*v2(1)
+                case (jMin)
+                    viscPointer => viscJminPointer
 
-                ! Check if BCData is allocated yet:
-                if (.not. bcData(mm)%symNormSet) then
-                   length = sqrt(norm(1)**2 + norm(2)**2 + norm(3)**2)
-                   if (length == 0) then
-                      length = eps
-                   end if
-                   bcData(mm)%symNorm(1) = norm(1)/length
-                   bcData(mm)%symNorm(2) = norm(2)/length
-                   bcData(mm)%symNorm(3) = norm(3)/length
-                   bcData(mm)%symNormSet = .True.
-                else
+                case (jMax)
+                    viscPointer => viscJmaxPointer
 
-                   ! Check that the orientation of norm() is not
-                   ! different from the stored one:
-                   length = sqrt(norm(1)**2 + norm(2)**2 + norm(3)**2)
-                   if (length > eps) then
-                      tmp = norm / length
-                      tmp2 = bcData(mm)%symNorm
-                      dot = dot_product(tmp, tmp2)
-                      if (abs(dot) < tolDotmin) then
-                         print *, 'Symmetry Plane normal has changed from initial configuration. Resetting.'
-                         print *, 'This may cause a slightly inaccurate gradient!'
-                         bcData(mm)%symNorm(1) = norm(1)
-                         bcData(mm)%symNorm(2) = norm(2)
-                         bcData(mm)%symNorm(3) = norm(3)
-                      end if
-                   end if
-
-                   ! Copy out the saved symNorm
-                   norm(1) = bcData(mm)%symNorm(1)
-                   norm(2) = bcData(mm)%symNorm(2)
-                   norm(3) = bcData(mm)%symNorm(3)
-                end if
+                case (kMin)
+                    viscPointer => viscKminPointer
 
-                ! Compute the length of the normal and test if this is
-                ! larger than eps. If this is the case this means that
-                ! it is a nonsingular subface and the coordinates are
-                ! corrected.
-
-                length = sqrt(norm(1)**2 + norm(2)**2 + norm(3)**2)
-
-                testSingular: if(length > eps) then
-
-                   ! Compute the unit normal of the subface.
-
-                   norm(1) = norm(1)/length
-                   norm(2) = norm(2)/length
-                   norm(3) = norm(3)/length
-
-                   ! Add an overlap to the symmetry subface if the
-                   ! boundaries coincide with the block boundaries.
-                   ! This way the indirect halo's are treated properly.
-
-                   if(iBeg == 1)     iBeg = 0
-                   if(iEnd == iiMax) iEnd = iiMax + 1
-
-                   if(jBeg == 1)     jBeg = 0
-                   if(jEnd == jjMax) jEnd = jjMax + 1
-
-                   ! Loop over the nodes of the subface and set the
-                   ! corresponding halo coordinates.
-
-                   do j=jBeg,jEnd
-                      do i=iBeg,iEnd
-
-                         ! Determine the vector from the internal node to the
-                         ! node on the face. Again an offset of +1 must be
-                         ! used, due to the usage of pointers.
-
-                         v1(1) = x1(i+1,j+1,1) - x2(i+1,j+1,1)
-                         v1(2) = x1(i+1,j+1,2) - x2(i+1,j+1,2)
-                         v1(3) = x1(i+1,j+1,3) - x2(i+1,j+1,3)
-
-                         ! Determine two times the normal component of this
-                         ! vector; this vector must be added to the
-                         ! coordinates of the internal node to obtain the
-                         ! halo coordinates. Again the offset of +1.
-
-                         dot = two*(v1(1)*norm(1) + v1(2)*norm(2) &
-                              +      v1(3)*norm(3))
-
-                         x0(i+1,j+1,1) = x2(i+1,j+1,1) + dot*norm(1)
-                         x0(i+1,j+1,2) = x2(i+1,j+1,2) + dot*norm(2)
-                         x0(i+1,j+1,3) = x2(i+1,j+1,3) + dot*norm(3)
-
-                      enddo
-                   enddo
-                endif testSingular
-             endif testSymmetry
-          enddo loopBocos
-       enddo domains
-    enddo spectralLoop
-
-    !
-    !       Exchange the coordinates for the internal halo's.
-    !
-    call exchangeCoor(level)
-
-  end subroutine xhalo
-
-  subroutine setSurfaceFamilyInfo
-
-    use constants
-    use su_cgns
-    use blockPointers, onlY : nDom, flowDoms, nBocos, cgnsSubFace, BCType, BCData
-    use cgnsGrid, onlY : cgnsDoms
-    use communication, only : myid, adflow_comm_world, nProc
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use surfaceFamilies, only : BCFamExchange, famNames, fullFamList, &
-         zeroCellVal, zeroNodeVal, oneCellVal, BCFamgroups
-    use utils, only : setPointers, EChk, pointReduce, terminate, convertToLowerCase
-    use sorting, only : qsortStrings, bsearchStrings, famInList
-    use surfaceUtils, only : getSurfaceSize
-    implicit none
-
-    integer :: ierr
-    integer(kind=intType) :: nLevels, level, nn, mm, nsMin, nsMax, i, j, k, nFam, famID, cgb, iFam
-    integer(kind=intType) :: sps, isizemax, jsizemax, totalFamilies, totalWallFamilies
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ii, iBCGroup, totalBCFamilies
-    character(maxCGNSNameLen), dimension(25) :: defaultFamName
-    character(maxCGNSNameLen) :: curStr, family
-    character(maxCGNSNameLen), dimension(:), allocatable :: uniqueFamListNames
-    integer(kind=intType), dimension(:), allocatable :: localFlag, famIsPartOfBCGroup
-    integer(kind=intType), dimension(:), allocatable :: localIndices, nodeSizes, nodeDisps
-    integer(kind=intType) :: iProc, nodeSize, cellSize
-
-    ! Process out the family information. The goal here is to
-    ! assign a unique integer to each family in each boundary
-    ! condition. The CGNS grid has all the information we need.
-
-    ! Firstly make sure that there is actual family specified for
-    ! each BC. If there isn't, we will provide one for you.
-    defaultFamName(BCAxisymmetricWedge) = 'axi'
-    defaultFamName(BCDegenerateLine) = 'degenerate'
-    defaultFamName(BCDegeneratePoint) ='degenerate'
-    defaultFamName(BCDirichlet) = 'dirichlet'
-    defaultFamName(BCExtrapolate) = 'extrap'
-    defaultFamName(BCFarfield) = 'far'
-    defaultFamName(BCGeneral) = 'general'
-    defaultFamName(BCInflow) = 'inflow'
-    defaultFamName(BCInflowSubsonic) = 'inflow'
-    defaultFamName(BCInflowSupersonic) = 'inflow'
-    defaultFamName(BCNeumann) = 'neumann'
-    defaultFamName(BCOutflow) = 'outflow'
-    defaultFamName(BCOutflowSubsonic) = 'outflow'
-    defaultFamName(BCOutflowSupersonic)  ='outflow'
-    defaultFamName(BCSymmetryPlane) = 'sym'
-    defaultFamName(BCSymmetryPolar) = 'sympolar'
-    defaultFamName(BCTunnelInflow) = 'inflow'
-    defaultFamName(BCTunnelOutflow) = 'outflow'
-    defaultFamName(BCWall) = 'wall'
-    defaultFamName(BCWallInviscid) = 'wall'
-    defaultFamName(BCWallViscous) = 'wall'
-    defaultFamName(BCWallViscousHeatFlux) = 'wall'
-    defaultFamName(BCWallViscousIsothermal) = 'wall'
-    defaultFamName(UserDefined) = 'userDefined'
-
-    nFam = 0
-    do i=1, size(cgnsDoms)
-       do j=1, size(cgnsDoms(i)%bocoInfo)
-          if (cgnsDoms(i)%bocoInfo(j)%actualFace) then
-             if (trim(cgnsDoms(i)%bocoInfo(j)%wallBCName) == "") then
-                if (myid == 0) then
-                   ! Tell the user we are adding an automatic family name
-                   write(*, "(2(A, I4), *(A))") "CGNS Block ", i, ", boundary condition ", j, ", of type ", &
-                   trim(BCTypeName(cgnsDoms(i)%bocoInfo(j)%BCTypeCGNS)), &
-                   " does not have a family. Based on the boundary condition type,", &
-                   " a name of: '", trim(defaultFamName(cgnsDoms(i)%bocoInfo(j)%BCTypeCGNS)), "' will be used."
-                end if
-                cgnsDoms(i)%bocoInfo(j)%wallBCName = trim(defaultFamName(cgnsDoms(i)%bocoInfo(j)%BCTypeCGNS))
-             end if
-             nFam = nFam + 1
-          end if
-       end do
-    end do
-
-    ! Allocate space for the full family list
-    allocate(famNames(nFam))
-    nFam = 0
-    do i=1, size(cgnsDoms)
-       do j=1, size(cgnsDoms(i)%bocoInfo)
-          if (cgnsDoms(i)%bocoInfo(j)%actualFace) then
-             nFam = nFam + 1
-             famNames(nfam) = cgnsDoms(i)%bocoInfo(j)%wallBCName
-             call convertToLowerCase(famNames(nFam))
-          end if
-       end do
-    end do
-
-    ! Now sort the family names:
-    call qsortStrings(famNames, nFam)
-
-    ! Next we need to generate a unique set of names.
-    allocate(uniqueFamListNames(nFam))
-
-    curStr = famNames(1)
-    uniqueFamListNames(1) = curStr
-    j = 1
-    i = 1
-    do while(i < nFam)
-
-       i = i + 1
-       if (famNames(i) == curStr) then
-          ! Same str, do nothing.
-       else
-          j = j + 1
-          curStr = famNames(i)
-          uniqueFamListNames(j) = curStr
-       end if
-    end do
-
-
-    totalFamilies = j
-    ! Now copy the uniqueFamListNames back to "famNames" and allocate
-    ! exactly the right size.
-    deallocate(famNames)
-    allocate(famNames(totalFamilies))
-    famNames(1:totalFamilies) = uniqueFamListNames(1:totalFamilies)
-    deallocate(uniqueFamListNames)
-
-    ! Now each block boundary condition can uniquely determine it's
-    ! famID. We do all BC on all blocks and levels.
-    nLevels = ubound(flowDoms,2)
-    do nn=1, nDom
-       call setPointers(nn, 1_intType, 1_intType)
-       do mm=1, nBocos
-
-          cgb = flowDoms(nn, 1, 1)%cgnsBlockID
-          family = cgnsDoms(cgb)%bocoInfo(cgnsSubface(mm))%wallBCName
-          call convertToLowerCase(family)
-
-          famID = bsearchStrings(family, famNames)
-          if (famID == 0) then
-             ! Somehow we never found the family...
-             call terminate("setSurfaceFamilyInfo", &
-                  "An error occuring in assigning families")
-          end if
-
-          ! Now set the data on each of the level/sps instances
-          do sps=1, nTimeIntervalsSpectral
-             do level=1,nlevels
-
-                flowDoms(nn, level, sps)%bcData(mm)%famID = famID
-                flowDoms(nn, level, sps)%bcData(mm)%family = family
-
-             end do
-          end do
-       end do
-    end do
-
-    ! Next we need to group the families based on their boundary
-    ! condition. The reason for this is that we generate the reduction
-    ! scatterd based on groups of BC types. Specifically the following groups:
-
-    ! 1. Walls : EulerWall, NSWallAdiabatic, NSWallIsothermal
-    ! 2. Symm : Symm, SymmPolar
-    ! 3. Inflow/Outflow : subSonicInflow, subSonicOutflow, supersonicInflow, superSonicOutflow
-    ! 4. Farfield : Farfield
-    ! 5. Overset : OversetouterBound
-    ! 6. Others : All remaining BCs
-
-    ! The final familyExchange structure.
-    allocate(BCFamExchange(nFamExchange, nTimeIntervalsSpectral), localFlag(totalFamilies))
-
-    BCGroupLoop: do iBCGroup=1, nfamExchange
-       localFlag = 0
-       ! Determine which of the unique families match the specific
-       ! BCGroup.  This is slightly inefficient but not it isn't
-       ! performance critical.
-       famLoop: do iFam=1, totalFamilies
-          domainLoop: do nn=1,nDom
-             call setPointers(nn, 1_intType, 1_intType)
-             bocoLoop: do mm=1, nBocos
-                matchiFam: if (flowDoms(nn, 1, 1)%bcData(mm)%famID == iFam) then
-                   select case(iBCGroup)
-
-                   case (iBCGroupWalls)
-                      if (BCType(mm) == EulerWall .or. &
-                           BCType(mm) == NSWallAdiabatic .or. &
-                           BCType(mm) == NSwallIsoThermal) then
-                         localFlag(iFam) = 1
-                      end if
-
-                   case (iBCGroupInflow)
-                      if (BCType(mm) == SubsonicInflow .or. &
-                           BCType(mm) == SupersonicInflow) then
-                         localFlag(iFam) = 1
-                      end if
-
-                   case (iBCGroupOutflow)
-                      if (BCType(mm) == SubsonicOutflow .or. &
-                           BCType(mm) == SupersonicOutflow) then
-                         localFlag(iFam) = 1
-                      end if
-
-                   case (iBCGroupSymm)
-                      if (BCType(mm) == Symm .or. BCType(mm) == SymmPolar) then
-                         localFlag(iFam) = 1
-                      end if
-
-                   case (iBCGroupFarfield)
-                      if (BCType(mm) == Farfield) then
-                         localFlag(iFam) = 1
-                      end if
-
-                   case (iBCGroupOverset)
-                      if (BCType(mm) == OversetOuterBound) then
-                         localFlag(iFam) = 1
-                      end if
-
-                   case (iBCGroupOther)
-                      ! All other boundary conditions. Note that some
-                      ! of these are not actually implemented
-                      if (BCType(mm) == BCNull .or. &
-                           BCType(mm) == MassBleedInflow .or. &
-                           BCType(mm) == MassbleedOutflow .or. &
-                           BCType(mm) == mDot .or. &
-                           BCType(mm) == BCThrust .or. &
-                           BCType(mm) == Extrap .or. &
-                           BCType(mm) == B2BMatch .or. &
-                           BCType(mm) == B2BMisMatch .or. &
-                           BCType(mm) == SlidingInterface .or. &
-                           BCType(mm) == DomainInterfaceAll .or. &
-                           BCType(mm) == DomainInterfaceRhoUVW .or. &
-                           BCType(mm) == DomainInterfaceP .or. &
-                           BCType(mm) == DomainInterfaceRho .or. &
-                           BCType(mm) == DomainInterfaceTotal) then
-                         localFlag(iFam) = 1
-                      end if
-                   end select
-                end if matchiFam
-             end do bocoLoop
-          end do domainLoop
-       end do famLoop
-
-       ! All Reduce so all procs know the same information.
-       allocate(famIsPartOfBCGroup(totalFamilies))
-       call mpi_allreduce(localFlag, famIsPartOfBCGroup, totalFamilies, &
-            adflow_integer, MPI_SUM, adflow_comm_world, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Count up the number of families this BC has.
-       totalBCFamilies = 0
-       do i=1, totalFamilies
-          if (famIsPartOfBCGroup(i) > 0) then
-             totalBCFamilies = totalBCFamilies + 1
-          end if
-       end do
-
-       ! Allocate the space for the list of fam for each BC and set.
-       allocate(BCFamGroups(iBCGroup)%famList(totalBCFamilies))
-       k = 0
-       do i=1, totalFamilies
-          if (famIsPartOfBCGroup(i) > 0) then
-             k = k + 1
-             BCFamGroups(iBCGroup)%famList(k) = i
-          end if
-       end do
-       deallocate(famIsPartOfBCGroup)
-    end do BCGroupLoop
-
-    ! Dump a little information out to the user giving the family and
-    ! the BC types. This will probably be useful in general.
-
-    if (myid == 0) then
-       write(*, "(a)") '+--------------------------------------------------+'
-       write(*, "(a)") '  CGNS Surface Families by Boundary Condition Type'
-       write(*, "(a)") '+--------------------------------------------------+'
-
-       do iBCGroup=1,6
-          select case(iBCGroup)
-          case (iBCGroupWalls)
-             write(*,"(a)",advance="no") '| Wall Types           : '
-          case (iBCGroupInflow)
-             write(*,"(a)",advance="no") '| Inflow Types         : '
-          case (iBCGroupOutflow)
-             write(*,"(a)",advance="no") '| Outflow Types        : '
-          case (iBCGroupSymm)
-             write(*,"(a)",advance="no") '| Symmetry Types       : '
-          case (iBCGroupFarfield)
-             write(*,"(a)",advance="no") '| Farfield Types       : '
-          case (iBCGroupOverset)
-             write(*,"(a)",advance="no") '| Overset Types        : '
-          case (iBCGroupOther)
-             write(*,"(a)",advance="no") '| Other Types          : '
-          end select
-
-          do i=1,size(BCFamGroups(iBCGroup)%famList)
-             write(*,"(a,1x)",advance="no") trim(famNames(BCFamGroups(iBCGroup)%famList(i)))
-          end do
-          print "(1x)"
-       end do
-       write(*, "(a)") '+--------------------------------------------------+'
-    end if
-
-    ! Generate the node scatters for each family. This will also tell
-    ! us the surfaceIndex for each BC. This is the index into the
-    ! *gloablly reduced vector*. This is what we will need for tecplot
-    ! output as well as the zipper mesh computations.
-
-    do iBCGroup=1, nFamExchange
-       do sps=1,nTimeIntervalsSpectral
-          call createNodeScatterForFamilies(&
-               BCFamGroups(iBCGroup)%famList, BCFamExchange(iBCGroup, sps), sps, localIndices)
-
-          ! this won't include the zipper nodes since that isn't done yet.
-          call getSurfaceSize(nodeSize, cellSize, BCFamGroups(iBCGroup)%famList, &
-               size(BCFamGroups(iBCGroup)%famlist), .False.)
-          allocate(nodeSizes(nProc), nodeDisps(0:nProc))
-          nodeSizes = 0
-          nodeDisps = 0
-
-          call mpi_allgather(nodeSize, 1, adflow_integer, nodeSizes, 1, adflow_integer, &
-               adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-          nodeDisps(0) = 0
-          do iProc=1, nProc
-             nodeDisps(iProc) = nodeDisps(iProc-1) + nodeSizes(iProc)
-          end do
-
-
-          ii = 0
-          do nn=1, nDom
-             call setPointers(nn, 1, sps)
-             do mm=1, nBocos
-                famInclude: if (famInList(BCData(mm)%famId, BCFamGroups(iBCGroup)%famList)) then
-                   iBeg = BCData(mm)%inbeg; iEnd = BCData(mm)%inend
-                   jBeg = BCData(mm)%jnbeg; jEnd = BCData(mm)%jnend
-                   do j=jBeg, jEnd
-                      do i=iBeg, iEnd
-                         ii = ii + 1
-                         BCData(mm)%surfIndex(i,j) = ii + nodeDisps(myid)
-                      end do
-                   end do
-                end if famInclude
-             end do
-          end do
-          deallocate(localIndices, nodeSizes, nodeDisps)
-       end do
-    end do
-    ! Allocate  arrays that have the maximum face size. These may
-    ! be slightly larger than necessary, but that's ok. We just need
-    ! somethwere to point the pointers.
-    isizemax = 0
-    jsizemax = 0
-    do nn=1,nDom
-       isizemax = max(isizemax, flowDoms(nn, 1, 1)%ie)
-       isizemax = max(isizemax, flowDoms(nn, 1, 1)%je)
-
-       jsizemax = max(jsizemax, flowDoms(nn, 1, 1)%je)
-       jsizemax = max(jsizemax, flowDoms(nn, 1, 1)%ke)
-    end do
-
-
-    ! Allocate generic arrays for the cell and nodes. These will be
-    ! used when a BC is not included in a computed but needs to be
-    ! point somehwere.
-    allocate(zeroCellVal(isizemax, jsizemax), zeroNodeVal(isizemax, jsizemax), oneCellVal(isizemax, jsizemax))
-    oneCellVal = one
-    zeroCellVal = zero
-    zeroNodeVal = zero
-
-    ! Finally, create the shortcut array for all families. This is just
-    ! 1,2,3..totalFamilies.
-    allocate(fullFamList(totalFamilies))
-    do i=1, totalFamilies
-       fullFamList(i) = i
-    end do
-
-  end subroutine setSurfaceFamilyInfo
-
-  subroutine createNodeScatterForFamilies(famList, exch, sps, localIndices)
-
-    ! The purpose of this routine is to create the appropriate data
-    ! structures that allow for the averaging of cell based surface
-    ! quantities to node-based quantities. The primary reason for this
-    ! is that the viscous stress tensor is not available at halo cells
-    ! and therefore it is not possible to create consistent node-based
-    ! values locallly. What the scatter does is allows us to sum the
-    ! nodal values across processors, average them and finally update
-    ! the node based values to be consistent. This operation is
-    ! necessary for several operations:
-
-    ! 1. Integration of forces over zipper triangles requires force/area
-    ! at nodes.
-    ! 2. Lift distributions/slices also requires node-based tractions
-    ! 3. Node-based output for tecplot files.
-    use constants
-    use communication, only : adflow_comm_world, myid, nProc
-    use surfaceFamilies, only : familyExchange, IS1, IS2!, PETSC_COPY_VALUES, PETSC_DETERMINE
-    use utils, only : pointReduce, eChk
-    use surfaceUtils
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType) , dimension(:), intent(in) :: famList
-    integer(kind=intType) , intent(in) :: sps
-    type(familyExchange), intent(inout) :: exch
-    integer(kind=intType), dimension(:), intent(out), allocatable :: localIndices
-
-    ! Working param
-    integer(kind=intType) :: i,j, ierr, nNodesLocal, nNodesTotal, nCellsLocal, nFam
-    integer(kind=intType) :: nUnique, iSize, iStart, iEnd, iProc
-    real(kind=realType), dimension(:, :), allocatable :: localNodes, allNodes
-    real(kind=realType), dimension(:, :), allocatable :: uniqueNodes
-    integer(kind=intType), dimension(:), allocatable :: link, startIndices, endIndices
-    integer(kind=intType), dimension(:), allocatable :: nNodesProc, cumNodesProc
-    real(kind=realType) :: tol
-    integer(kind=intType) :: mpiStatus(MPI_STATUS_SIZE)
-
-    ! Save the family list.
-    nFam = size(famList)
-    allocate(exch%famList(nFam))
-    exch%famList = famList
-    exch%sps = sps
-
-    ! Determine the total number of nodes and cells on this
-    ! processor. This will include the zipper mesh if there is one.
-    call getSurfaceSize(nNodesLocal, nCellsLocal, famList, nFam, .True.)
-
-    ! Allocate the space to store nodal values, connectivity and the
-    ! family of each element
-    exch%nNodes = nNodesLocal
-
-    ! Allocate space for the some arrays
-    allocate(localNodes(3, nNodesLocal), nNodesProc(nProc), cumNodesProc(0:nProc))
-    call getSurfacePoints(localNodes, nNodesLocal, sps, famList, nFam, .True.)
-
-    ! Determine the total number of nodes on each proc
-    call mpi_allgather(nNodesLocal, 1, adflow_integer, nNodesProc, 1, adflow_integer, &
-         adflow_comm_world, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Determine cumulative version
-    cumNodesProc(0) = 0_intType
-    nNodesTotal = 0
-    do i=1, nProc
-       nNodesTotal = nNodesTotal + nNodesProc(i)
-       cumNodesProc(i) = cumNodesProc(i-1) + nNodesProc(i)
-    end do
-
-    ! Send all the nodes to everyone
-    allocate(allNodes(3, nNodesTotal))
-    call mpi_allgatherv(localNodes, nNodesLocal*3, adflow_real, allNodes, &
-         nNodesProc*3, cumNodesProc*3, adflow_real, adflow_comm_world, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Local nodes is no longer necessary
-    deallocate(localNodes)
-
-    ! Now point reduce
-    allocate(uniqueNodes(3, nNodestotal), link(nNodestotal))
-    tol = 1e-12
-
-    call pointReduce(allNodes, nNodesTotal, tol, uniqueNodes, link, nUnique)
-
-    ! We can immediately discard everything but link since we are only
-    ! doing logical operations here:
-    deallocate(uniqueNodes, allNodes)
-
-    ! Now back out the global indices for our local points
-    if (allocated(localIndices)) then
-       deallocate(localIndices)
-    end if
-    allocate(localIndices(nNodesLocal))
-    do i=1, nNodesLocal
-       ! The -1 is to convert to 0-based ordering for petsc
-       localIndices(i) = link(cumNodesProc(myid) + i)-1
-    end do
-
-    ! Create the basic (scalar) local vector
-    call VecCreateMPI(ADFLOW_COMM_WORLD, nNodesLocal, PETSC_DETERMINE, &
-         exch%nodeValLocal, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Create the basic global vector. This is slightly tricker than it
-    ! sounds. We could just make it uniform, but then there would be
-    ! more communicaiton than necessary. Instead what we do is determine
-    ! the min and max range of local indices on the proc and the one
-    ! before it. A little diagram will help
-    !
-    ! Proc 0 +---------------------+
-    ! Proc 1                    +-------------+
-    ! Proc 2                                +----------------+
-    !
-    ! Proc zero has a many global nodes as local since they are by
-    ! definition all unqiue. Proc 1 then will start at 1 more than the
-    ! proc 0 and continue to it's maximum value. Proc 2 starts at the
-    ! end of proc 1 etc. This way the vast majority of the global nodes
-    ! are owned locally.
-
-    ! In order to determine the owning range for each processor, it is
-    ! much trickier than it sounds. We do a linear cascasde through the
-    ! procs sending the upper range from proc 0 to proc 1, then proc1 to
-    ! proc 2 and so on.
-
-    ! Proc zero owns all of it's nodes.
-    if (myid == 0) then
-       iStart = 0
-       if (nNodesLocal == 0) then
-          iEnd = 0
-       else
-          iEnd = maxval(localIndices) + 1
-       end if
-    end if
-
-    do iProc=0, nProc-2
-       if (myid == iProc) then
-          ! I need to send my iEnd to proc+1
-          call mpi_send(iEnd, 1, adflow_integer, iProc+1, iProc, adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-       else if(myid == iProc+1) then
-
-          ! Receive the value from the proc below me:
-          call mpi_recv(iEnd, 1, adflow_integer, iProc, iProc, adflow_comm_world, mpiStatus, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! On this proc, the start index is the
-          iStart = iEnd
-          if (nNodesLOCAl == 0) then
-             iEnd = iStart
-          else
-             iEnd = max(iStart, maxval(localIndices)+1)
-          end if
-       end if
-    end do
-
-    iSize = iEnd-iStart
-    ! Create the actual global vec. Note we also include nUnique to make
-    ! sure we have all the local sizes correct.
-    call VecCreateMPI(ADFLOW_COMM_WORLD, iSize, nUnique, &
-         exch%nodeValGlobal, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call VecDuplicate(exch%nodeValGlobal, exch%sumGlobal, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Now create the scatter that goes from the local vec to the global
-    ! vec.
-
-    ! Indices for the local vector is just a stride, starting at the
-    ! offset
-    call ISCreateStride(ADFLOW_COMM_WORLD, nNodesLocal, cumNodesProc(myid), &
-         1, IS1, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Indices for the global vector are the "localIndices" we previously
-    ! computed.
-    call ISCreateGeneral(adflow_comm_world, nNodesLocal, localIndices, &
-         PETSC_COPY_VALUES, IS2, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call VecScatterCreate(exch%nodeValLocal, IS1, exch%nodeValGlobal, IS2, &
-         exch%scatter, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! And dont' forget to destroy the index sets
-    call ISDestroy(IS1, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call ISDestroy(IS2, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    exch%allocated = .True.
-  end subroutine createNodeScatterForFamilies
-
-
-  subroutine setReferenceVolume(level)
-
-    use constants
-    use blockPointers, only : nDom, flowDoms, ib, jb, kb
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers
-    implicit none
-    integer :: ierr
-
-    integer(kind=intType), intent(in) :: level
-    integer(kind=intType) :: nn, sps
-    integer(kind=intType) :: i,j, k
-
-    spectral: do sps=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
-          call setPointers(nn, level, sps)
-          allocate(flowDoms(nn, level, sps)%volRef(0:ib, 0:jb, 0:kb))
-
-          do k=0, kb
-             do j=0, jb
-                do i=0, ib
-                   flowDoms(nn, level, sps)%volRef(i, j, k) = &
-                        flowDoms(nn, level, sps)%vol(i, j, k)
-                end do
-             end do
-          end do
-       end do domains
-    end do spectral
-  end subroutine setReferenceVolume
-
-  subroutine setGlobalCellsAndNodes(level)
-    !
-    !      Determine the global node numbering that is used to assemble
-    !      the adjoint system of equations. It take cares of all the halo
-    !      nodes between the blocks.
-    !      The nodes are numbered according to the following sequence:
-    !      loop processor = 1, nProc
-    !        loop domain = 1, nDom
-    !          loop k = 2, kl
-    !            loop j = 2, jl
-    !              loop i = 2, il
-    !      Only the onwned nodes are numbered, meaning i/j/k span from 2
-    !      to il/jl/kl. The halo nodes receive the numbering from the
-    !      neighboring block that owns them.
-    !      These variables are the same for all spectral modes, therefore
-    !      only the 1st mode needs to be communicated.
-    !      This function will also set FMPointer which is only defined
-    !      on wall boundary conditions and points to the correct index
-    !      for the vectors that are of shape nsurface nodes
-    !
-    use ADjointVars
-    use blockpointers
-    use communication
-    use inputTimeSpectral
-    use utils, only: setPointers, terminate
-    use haloExchange, only : whalo1to1intgeneric
-    use wallDistanceData, only : nCellBlockOffset
-    implicit none
-
-    ! Input variables
-    integer(kind=intType), intent(in) :: level
-
-    ! Local variables
-    integer(kind=intType) :: nn, i, j, k, sps, iDim
-    integer(kind=intType) :: ierr, istart
-    logical :: commPressure, commLamVis, commEddyVis, commGamma
-    integer(kind=intType), dimension(nProc) :: nNodes, nCells, nCellOffset, nNodeOffset
-    integer(kind=intType), dimension(nDom) :: nNodeBLockOffset
-    integer(kind=intType) :: npts, nCell, nNode
-    integer(kind=intType), dimension(:), allocatable :: nNodesProc, cumNodesProc
-    integer(kind=intTYpe), dimension(:), allocatable :: nCellsProc, cumCellsProc
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ii, jj,mm
-
-    do sps=1, nTimeIntervalsSpectral
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-          ! Allocate memory for the cell and node indexing...only on sps=1
-          allocate(flowDoms(nn,level,sps)%globalCell(0:ib,0:jb,0:kb), &
-               flowDoms(nn,level,sps)%globalNode(0:ie,0:je,0:ke), stat=ierr)
-          if (ierr /=0) then
-             call terminate("setGlobalCellsAndNodes", "Allocation failure for globalCell/Node")
-          end if
-          ! Assign a 'magic number' of -5 to globalCell and global Node:
-          flowDoms(nn,level,sps)%globalCell = -5
-          flowDoms(nn,level,sps)%globalNode = -5
-       end do
-    end do
-
-    ! Determine the number of nodes and cells owned by each processor
-    ! by looping over the local block domains.
-    nCellsLocal(level) = 0
-    nNodesLocal(level) = 0
-    do nn=1,nDom
-       ! Set to first spectral instance since we only need sizes
-       call setPointers(nn, level, 1_intType)
-       nCellsLocal(level) = nCellsLocal(level) + nx*ny*nz
-       nNodesLocal(level) = nNodesLocal(level) + il*jl*kl
-    enddo
-
-    ! Reduce the number of cells in all processors: add up nCellsLocal
-    ! into nCellsGlobal and sends the result to all processors.
-    ! (use mpi sum operation)
-
-    call mpi_allreduce(nCellsLocal(level), nCellsGlobal(level), 1, adflow_integer, &
-         mpi_sum, ADflow_comm_world, ierr)
-
-    ! Gather the number of Cells per processor in the root processor.
-    call mpi_gather(nCellsLocal(level), 1, adflow_integer, nCells, 1, &
-         adflow_integer, 0, ADflow_comm_world, ierr)
-
-    ! Repeat for the number of nodes.
-    ! (use mpi sum operation)
-    call mpi_allreduce(nNodesLocal(level), nNodesGlobal(level), 1, adflow_integer, &
-         mpi_sum, ADflow_comm_world, ierr)
-
-    ! Gather the number of nodes per processor in the root processor.
-    call mpi_gather(nNodesLocal(level), 1, adflow_integer, nNodes, 1, &
-         adflow_integer, 0, ADflow_comm_world, ierr)
-
-    ! Determine the global cell number offset for each processor.
-    rootProc: if( myID==0) then
-       nCellOffset(1) = 0
-       nNodeOffset(1) = 0
-       do nn=2,nProc
-          nCellOffset(nn) = nCellOffset(nn-1) + nCells(nn-1)
-          nNodeOffset(nn) = nNodeOffset(nn-1) + nNodes(nn-1)
-       enddo
-    endif rootProc
-
-    ! Scatter the global cell number offset per processor.
-    call mpi_scatter(nCellOffset, 1, adflow_integer, nCellOffsetLocal(level), 1, &
-         adflow_integer, 0, ADflow_comm_world, ierr)
-
-    ! Determine the global cell number offset for each local block.
-    nCellBlockOffset(level, 1) = nCellOffsetLocal(level)
-    do nn=2,nDom
-       call setPointers(nn-1, level, 1)
-       nCellBlockOffset(level, nn) = nCellBlockOffset(level, nn-1)          &
-            + nx*ny*nz
-    enddo
-
-    ! Repeat for nodes.
-    call mpi_scatter(nNodeOffset, 1, adflow_integer, nNodeOffsetLocal(level), 1, &
-         adflow_integer, 0, ADflow_comm_world, ierr)
-
-    ! Determine the global node number offset for each local block.
-    nNodeBlockOffset(1) = nNodeOffsetLocal(level)
-    do nn=2,nDom
-       call setPointers(nn-1, level, 1)
-       nNodeBlockOffset(nn) = nNodeBLockOffset(nn-1) + il*jl*kl
-    enddo
-
-    ! Determine the global block row index for each (i,j,k) cell in
-    ! each local block.
-
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   ! modified Timespectral indexing. Put all time
-                   ! instances of a give block adjacent to each other in
-                   ! the matrix
-                   globalCell(i, j, k) = &
-                        nCellBLockOffset(level,nn)*nTimeIntervalsSpectral+nx*ny*nz*(sps-1)+&
-                        (i-2) +(j-2)*nx +(k-2)*nx*ny
-                enddo
-             enddo
-          enddo
-       enddo
-    end do
-
-    ! Determine the global block row index for each (i,j,k) node in
-    ! each local block.
-    do sps=1, nTimeIntervalsSpectral
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-          do k=1, kl
-             do j=1, jl
-                do i=1, il
-                   !modified Timespectral indexing. Put all time
-                   !instances of a give block adjacent to each other in
-                   !the matrix
-                   globalNode(i, j, k) = &
-                        nNodeBLockOffset(nn)*nTimeIntervalsSpectral + &
-                        il*jl*kl*(sps-1) + (i-1)+(j-1)*il + (k-1)*il*jl
+                case (kMax)
+                    viscPointer => viscKmaxPointer
+                end select
 
+                ! Set this range in viscPointer to viscous subface mm.
+
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        viscPointer(i, j) = mm
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-
-    ! The above procedure has uniquely numbered all cells and nodes
-    ! owned on each processor. However we must also determine the
-    ! indices of the halo cells/nodes from other processors. To do this
-    ! we just run the specific halo exchanges for the cells and one for
-    ! the nodes
-
-    spectralModes: do sps=1,nTimeIntervalsSpectral
-       domainLoop:do nn=1, nDom
-          flowDoms(nn, level, sps)%intCommVars(1)%var => &
-               flowDoms(nn, level, sps)%globalNode(:, :, :)
-       end do domainLoop
-
-       ! Run the generic integer exchange
-       call wHalo1to1IntGeneric(1, level, sps, commPatternNode_1st, internalNode_1st)
-    end do spectralModes
-
-    spectralModes2: do sps=1,nTimeIntervalsSpectral
-       domainLoop2:do nn=1, nDom
-          flowDoms(nn, level, sps)%intCommVars(1)%var => &
-               flowDoms(nn, level, sps)%globalCell(:, :, :)
-       end do domainLoop2
-
-       ! Run the generic integer exchange
-       call wHalo1to1IntGeneric(1, level, sps, commPatternCell_2nd, internalCell_2nd)
-    end do spectralModes2
-
-  end subroutine setGlobalCellsAndNodes
-  subroutine setFamilyInfoFaces(level)
-    !
-    !       setFamilyInfoFaces sets the values of the family parameters
-    !       for faces on the given multigrid level. The default values for
-    !       indFamily is 0, which means that the mass flow through that
-    !       face does not contribute to the mass flow that must be
-    !       monitored. For sliding mesh interfaces both sides of the
-    !       interface are monitored and the value of indFamily corresponds
-    !       to one of the two entries in the local monitoring arrays. The
-    !       values of factFamily are such that the mass flow entering the
-    !       block is defined positive.
-    !       Note that only the 1st spectral solution is treated, because
-    !       this informations is the same for all of them.
-    !
-    use constants
-    use blockPointers
-    use cgnsGrid
-    use inputTimeSpectral
-    use monitor
-    use section
-    use utils, only : setPointers, terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, mm, i, j, k, ii, nSlices
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    integer(kind=intType), dimension(cgnsNFamilies) :: orToMassFam
-
-    ! Set the offset ii for the values of orToMassFam. If the mass
-    ! flow through sliding mesh interfaces must be monitored this
-    ! offset if 2*cgnsNSliding. This means that in the arrays to store
-    ! the mass flow the sliding mesh interfaces are stored first,
-    ! followed by the families.
-
-    if( monMassSliding ) then
-       ii = 2*cgnsNSliding
-    else
-       ii = 0
-    endif
-
-    ! Determine the number of families for which the mass flow must
-    ! be monitored and set the entries of orToMassFam accordingly,
-    ! i.e. the offset ii is included.
-
-    mm = ii
-    do nn=1,cgnsNFamilies
-       if(cgnsFamilies(nn)%monitorMassflow            .and. &
-            cgnsFamilies(nn)%BCType /= MassBleedInflow  .and. &
-            cgnsFamilies(nn)%BCType /= MassBleedOutflow .and. &
-            cgnsFamilies(nn)%BCType /= SlidingInterface) then
-          mm = mm + 1
-          orToMassFam(nn) = mm
-       else
-          orToMassFam(nn) = 0
-       endif
-    enddo
-
-    ! Set monMassFamilies to .true. if the mass flow of at least one
-    ! family must be monitored. Otherwise set it to .false.
-
-    if(mm > ii) then
-       monMassFamilies = .true.
-    else
-       monMassFamilies = .false.
-    endif
-
-    ! If this is the first level, allocate the memory for
-    ! massFlowFamilyInv and massFlowFamilyDiss.
-
-    if(level == 1) then
-       nn = nTimeIntervalsSpectral
-
-       allocate(massFlowFamilyInv(0:mm,nn), &
-            massFlowFamilyDiss(0:mm,nn), stat=ierr)
-       if(ierr /= 0) &
-            call terminate("setFamilyInfoFaces", &
-            "Memory allocation failure for &
-            &massFlowFamilyInv and massFlowFamilyDiss")
-    endif
-
-    ! Loop over the number of domains.
-
-    domains: do nn=1,nDom
-
-       ! Allocate the memory for indFamily and factFamily.
-
-       il = flowDoms(nn,level,1)%il
-       jl = flowDoms(nn,level,1)%jl
-       kl = flowDoms(nn,level,1)%kl
-
-       allocate(flowDoms(nn,level,1)%indFamilyI (1:il,2:jl,2:kl), &
-            flowDoms(nn,level,1)%indFamilyJ (2:il,1:jl,2:kl), &
-            flowDoms(nn,level,1)%indFamilyK (2:il,2:jl,1:kl), &
-            flowDoms(nn,level,1)%factFamilyI(1:il,2:jl,2:kl), &
-            flowDoms(nn,level,1)%factFamilyJ(2:il,1:jl,2:kl), &
-            flowDoms(nn,level,1)%factFamilyK(2:il,2:jl,1:kl), &
-            stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("setFamilyInfoFaces", &
-            "Memory allocation failure for indFamily &
-            &and factFamily")
-
-       ! Set the pointers for this domain.
-
-       call setPointers(nn, level, 1_intType)
-
-       ! Determine the number of slices for this block to make
-       ! the full wheel.
-
-       nSlices = sections(sectionID)%nSlices
-
-       ! Initialize the values of indFamily and factFamily.
-
-       indFamilyI = 0_intType
-       indFamilyJ = 0_intType
-       indFamilyK = 0_intType
-
-       factFamilyI = 0_intType
-       factFamilyJ = 0_intType
-       factFamilyK = 0_intType
-
-       ! Loop over the boundary conditions.
 
-       boco: do mm=1,nBocos
-
-          ! Test for the boundary condition.
-
-          select case (BCType(mm))
-          case (SlidingInterface)
-
-             ! Sliding mesh boundary.
-             ! If the mass flow through sliding interfaces must be monitored,
-             ! determine the index in the arrays massFlowFamilyInv and
-             ! massFlowFamilyDiss where to store the contribution of this
-             ! subface. If the sliding mesh mass flows are not monitored,
-             ! set the index to 0.
-
-             if( monMassSliding ) then
-                ii = 2*abs(groupNum(mm))
-                if(groupNum(mm) < 0) ii = ii - 1
-             else
-                ii = 0
-             endif
+            end do viscSubfaces
+
+        end do domains
+
+    end subroutine viscSubfaceInfo
+
+    !      ==================================================================
+
+    subroutine allocateMetric(level)
+        !
+        !       allocateMetric allocates the memory for the metric variables
+        !       on the given grid level for all spectral solutions.
+        !
+        use constants
+        use block
+        use inputPhysics
+        use inputTimeSpectral
+        use iteration
+        use inputUnsteady
+        use utils, only: terminate, setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, mm, sps
+        integer(kind=intType) :: il, jl, kl, ie, je, ke, ib, jb, kb
+
+        type(BCDataType), dimension(:), pointer :: BCData
+
+        ! Loop over the number of spectral solutions and local blocks.
+
+        spectral: do sps = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Store the the upper boundaries of the block a bit easier.
+
+                il = flowDoms(nn, level, sps)%il
+                jl = flowDoms(nn, level, sps)%jl
+                kl = flowDoms(nn, level, sps)%kl
+
+                ie = flowDoms(nn, level, sps)%ie
+                je = flowDoms(nn, level, sps)%je
+                ke = flowDoms(nn, level, sps)%ke
+
+                ib = flowDoms(nn, level, sps)%ib
+                jb = flowDoms(nn, level, sps)%jb
+                kb = flowDoms(nn, level, sps)%kb
+
+                ! Allocate the memory for the volumes and the face normals.
+
+                allocate (flowDoms(nn, level, sps)%si(0:ie, 1:je, 1:ke, 3), &
+                          flowDoms(nn, level, sps)%sj(1:ie, 0:je, 1:ke, 3), &
+                          flowDoms(nn, level, sps)%sk(1:ie, 1:je, 0:ke, 3), &
+                          flowDoms(nn, level, sps)%vol(0:ib, 0:jb, 0:kb), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("allocateMetric", &
+                     "Memory allocation failure for &
+                     &normals and volumes")
+
+                ! Added by HDN
+                ! Added s[I,J,K]ALE
+                if (equationMode == unSteady .and. useALE) then
+                    allocate (flowDoms(nn, level, sps)%sIALE(0:nALEsteps, 0:ie, 1:je, 1:ke, 3), &
+                              flowDoms(nn, level, sps)%sJALE(0:nALEsteps, 1:ie, 0:je, 1:ke, 3), &
+                              flowDoms(nn, level, sps)%sKALE(0:nALEsteps, 1:ie, 1:je, 0:ke, 3), &
+                              stat=ierr)
+                    if (ierr /= 0) &
+                         call terminate("allocateMetric", &
+                         "Memory allocation failure for &
+                         &sIALE, sJALE, and sKALE")
+                end if
 
-          case (MassBleedInflow, MassBleedOutflow)
+                ! Allocate the memory for the unit normals of the boundary
+                ! faces. First set the pointer to make it more readable.
 
-             ! Inflow or outflow bleed. These boundary conditions are
-             ! handled separetely and need not be monitored.
+                BCData => flowDoms(nn, level, sps)%BCData
 
-             ii = 0
+                do mm = 1, flowDoms(nn, level, sps)%nBocos
 
-          case default
+                    ! Store the size of the subface in ie:ib and je:jb, because
+                    ! these variables are not needed anymore.
 
-             ! Subface is an ordinary boundary condition. Determine the
-             ! family ID and set the index ii in the arrays massFlowFamilyInv
-             ! and massFlowFamilyDiss accordingly.
+                    ie = BCData(mm)%icBeg
+                    ib = BCData(mm)%icEnd
+                    je = BCData(mm)%jcBeg
+                    jb = BCData(mm)%jcEnd
 
-             if(groupNum(mm) > 0) then
-                ii = orToMassFam(groupNum(mm))
-             else
-                ii = 0
-             endif
-
-          end select
-
-          ! Set the owned cell range for the faces on this subface.
-          ! As icBeg, etc. may contain halo cells, inBeg, etc. is
-          ! used.
-
-          iBeg = min(inBeg(mm), inEnd(mm)) +1
-          iEnd = max(inBeg(mm), inEnd(mm))
-
-          jBeg = min(jnBeg(mm), jnEnd(mm)) +1
-          jEnd = max(jnBeg(mm), jnEnd(mm))
-
-          kBeg = min(knBeg(mm), knEnd(mm)) +1
-          kEnd = max(knBeg(mm), knEnd(mm))
-
-          ! Determine the block this subface is located on and set
-          ! the corresponding values of indFamily and factFamily.
-          ! Note that factFamily is set to nSlices on min faces and to
-          ! -nSlices on max faces, such that the mass flow entering the
-          ! domain is defined positive and the mass flow of the entire
-          ! wheel is monitored.
-
-          select case( BCFaceID(mm) )
-          case (iMin)
-             do k=kBeg,kEnd
-                do j=jBeg,jEnd
-                   indFamilyI (1,j,k) = ii
-                   factFamilyI(1,j,k) = nSlices
-                enddo
-             enddo
-
-             !===========================================================
-
-          case (iMax)
-             do k=kBeg,kEnd
-                do j=jBeg,jEnd
-                   indFamilyI (il,j,k) = ii
-                   factFamilyI(il,j,k) = -nSlices
-                enddo
-             enddo
-
-             !===========================================================
-
-          case (jMin)
-             do k=kBeg,kEnd
-                do i=iBeg,iEnd
-                   indFamilyJ (i,1,k) = ii
-                   factFamilyJ(i,1,k) = nSlices
-                enddo
-             enddo
-
-             !===========================================================
-
-          case (jMax)
-             do k=kBeg,kEnd
-                do i=iBeg,iEnd
-                   indFamilyJ (i,jl,k) = ii
-                   factFamilyJ(i,jl,k) = -nSlices
-                enddo
-             enddo
-
-             !===========================================================
-
-          case (kMin)
-             do j=jBeg,jEnd
-                do i=iBeg,iEnd
-                   indFamilyK (i,j,1) = ii
-                   factFamilyK(i,j,1) = nSlices
-                enddo
-             enddo
-
-             !===========================================================
-
-          case (kMax)
-             do j=jBeg,jEnd
-                do i=iBeg,iEnd
-                   indFamilyK (i,j,kl) = ii
-                   factFamilyK(i,j,kl) = -nSlices
-                enddo
-             enddo
-
-          end select
-
-       enddo boco
-    enddo domains
-
-  end subroutine setFamilyInfoFaces
-  subroutine shiftCoorAndVolumes
-    !
-    !       shiftCoorAndVolumes shifts the owned coordinates and
-    !       volumes in case of a deforming mesh for an unsteady
-    !       computation. In this case the old coordinates are needed to
-    !       determine the mesh velocities. The loop over the number of
-    !       spectral solutions is present for consistency, but this number
-    !       will be 1 when this routine is called.
-    !
-    use blockPointers
-    use inputTimeSpectral
-    use iteration
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, nn, mm, ll, kk
-
-    ! Loop over the number of spectral solutions and local blocks.
-
-    spectralLoop: do kk=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
-
-          ! Set the pointers for this block on the ground level.
-
-          call setPointers(nn, groundLevel,kk)
-
-          ! Shift the coordinates already stored in xOld and the
-          ! volumes stored in volOld.
-
-          loopOldLevels: do mm=nOldLevels,2,-1
-
-             ! Shift the coordinates from level mm-1 to mm, including
-             ! the halo's.
-
-             ll = mm - 1
-
-             do k=0,ke
-                do j=0,je
-                   do i=0,ie
-                      xOld(mm,i,j,k,1) = xOld(ll,i,j,k,1)
-                      xOld(mm,i,j,k,2) = xOld(ll,i,j,k,2)
-                      xOld(mm,i,j,k,3) = xOld(ll,i,j,k,3)
-                   enddo
-                enddo
-             enddo
-
-             ! Shift the old volumes from level mm-1 to mm.
-             ! Only the owned ones need to be considered.
-
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
-                      volOld(mm,i,j,k) = volOld(ll,i,j,k)
-                   enddo
-                enddo
-             enddo
-
-          enddo loopOldLevels
-
-          ! Shift the current coordinates into the 1st level of xOld.
-
-          do k=0,ke
-             do j=0,je
-                do i=0,ie
-                   xOld(1,i,j,k,1) = x(i,j,k,1)
-                   xOld(1,i,j,k,2) = x(i,j,k,2)
-                   xOld(1,i,j,k,3) = x(i,j,k,3)
-                enddo
-             enddo
-          enddo
-
-          ! Shift the current volumes into the 1st level of volOld.
-
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   volOld(1,i,j,k) = vol(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       enddo domains
-    enddo spectralLoop
-
-  end subroutine shiftCoorAndVolumes
-  subroutine viscSubfaceInfo(level)
-    !
-    !       viscSubfaceInfo allocates the memory for the storage of the
-    !       stress tensor and heat flux vector of viscous subfaces for the
-    !       given multigrid level and all spectral solutions. Furthermore
-    !       the pointers viscIminPointer, etc. Are allocated and set.
-    !       These pointers contain info to which viscous subface the faces
-    !       of the block faces possibly belong. If not part of a viscous
-    !       subface these values are set to 0. Note that these pointers
-    !       are only allocated and determined for the 1st spectral
-    !       solution, because the info is the same for all of them.
-    !
-    use constants
-    use blockPointers
-    use inputTimeSpectral
-    use utils, only : setPointers, terminate
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, mm, sps, i, j
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-
-    integer(kind=intType), dimension(:,:), pointer :: viscPointer
-
-    ! Loop over the number blocks stored on this processor.
-
-    domains: do nn=1,nDom
-
-       ! Set the pointers to the block of the 1st spectral solution.
-
-       call setPointers(nn, level, 1_intType)
-
-       ! Allocate the memory for viscSubface and the pointers
-       ! viscIminPointer, etc. ViscSubface must be allocated for
-       ! all spectral solutions, the pointers only for the 1st.
-
-       do sps=1,nTimeIntervalsSpectral
-          allocate(flowDoms(nn,level,sps)%viscSubface(nViscBocos), &
-               stat=ierr)
-          if(ierr /= 0)                         &
-               call terminate("viscSubfaceInfo", &
-               "Memory allocation failure for viscSubface")
-       enddo
-
-       allocate(flowDoms(nn,level,1)%viscIminPointer(2:jl,2:kl), &
-            flowDoms(nn,level,1)%viscImaxPointer(2:jl,2:kl), &
-            flowDoms(nn,level,1)%viscJminPointer(2:il,2:kl), &
-            flowDoms(nn,level,1)%viscJmaxPointer(2:il,2:kl), &
-            flowDoms(nn,level,1)%viscKminPointer(2:il,2:jl), &
-            flowDoms(nn,level,1)%viscKmaxPointer(2:il,2:jl), &
-            stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("viscSubfaceInfo", &
-            "Memory allocation failure for subface info")
-
-       ! Reset the pointers viscIminPointer, etc. to make it more
-       ! readable and initialize them to 0. This indicates that
-       ! the faces are not part of a viscous wall subfaces.
-
-       viscIminPointer => flowDoms(nn,level,1)%viscIminPointer
-       viscImaxPointer => flowDoms(nn,level,1)%viscImaxPointer
-       viscJminPointer => flowDoms(nn,level,1)%viscJminPointer
-       viscJmaxPointer => flowDoms(nn,level,1)%viscJmaxPointer
-       viscKminPointer => flowDoms(nn,level,1)%viscKminPointer
-       viscKmaxPointer => flowDoms(nn,level,1)%viscKmaxPointer
-
-       viscIminPointer = 0
-       viscImaxPointer = 0
-       viscJminPointer = 0
-       viscJmaxPointer = 0
-       viscKminPointer = 0
-       viscKmaxPointer = 0
-
-       ! Loop over the viscous subfaces to allocate the memory for the
-       ! stress tensor and the heat flux vector and to set the range
-       ! in viscIminPointer, etc.
-
-       viscSubfaces: do mm=1,nViscBocos
-
-          ! Store the cell range in iBeg, iEnd, etc. As the viscous data
-          ! do not allow for an overlap, the nodal range of the
-          ! subface must be used.
-
-          iBeg = BCData(mm)%inBeg + 1
-          iEnd = BCData(mm)%inEnd
-
-          jBeg = BCData(mm)%jnBeg + 1
-          jEnd = BCData(mm)%jnEnd
-
-          ! Loop over the spectral solutions and allocate the memory
-          ! for the stress tensor, heat flux and friction velocity.
-
-          do sps=1,nTimeIntervalsSpectral
-
-             ! Set the pointer for viscSubface to make the code
-             ! more readable and allocate the memory.
-
-             viscSubface => flowDoms(nn,level,sps)%viscSubface
-
-             allocate(viscSubface(mm)%tau( iBeg:iEnd,jBeg:jEnd,6), &
-                  viscSubface(mm)%q(   iBeg:iEnd,jBeg:jEnd,3), &
-                  viscSubface(mm)%utau(iBeg:iEnd,jBeg:jEnd),   &
-                  stat=ierr)
-             if(ierr /= 0)                       &
-                  call terminate("viscSubfaceInfo", &
-                  "Memory allocation failure for tau, q &
-                  &and utau.")
-          enddo
-
-          ! Set the pointer viscPointer, depending on the block face
-          ! on which the subface is located.
-
-          select case (BCFaceID(mm))
-          case (iMin)
-             viscPointer => viscIminPointer
-
-          case (iMax)
-             viscPointer => viscImaxPointer
-
-          case (jMin)
-             viscPointer => viscJminPointer
-
-          case (jMax)
-             viscPointer => viscJmaxPointer
-
-          case (kMin)
-             viscPointer => viscKminPointer
-
-          case (kMax)
-             viscPointer => viscKmaxPointer
-          end select
-
-          ! Set this range in viscPointer to viscous subface mm.
-
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
-                viscPointer(i,j) = mm
-             enddo
-          enddo
-
-       enddo viscSubfaces
-
-    enddo domains
-
-  end subroutine viscSubfaceInfo
-
-  !      ==================================================================
-
-  subroutine allocateMetric(level)
-    !
-    !       allocateMetric allocates the memory for the metric variables
-    !       on the given grid level for all spectral solutions.
-    !
-    use constants
-    use block
-    use inputPhysics
-    use inputTimeSpectral
-    use iteration
-    use inputUnsteady
-    use utils, only : terminate, setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, mm, sps
-    integer(kind=intType) :: il, jl, kl, ie, je, ke, ib, jb, kb
-
-    type(BCDataType), dimension(:), pointer :: BCData
-
-    ! Loop over the number of spectral solutions and local blocks.
-
-    spectral: do sps=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
-
-          ! Store the the upper boundaries of the block a bit easier.
-
-          il = flowDoms(nn,level,sps)%il
-          jl = flowDoms(nn,level,sps)%jl
-          kl = flowDoms(nn,level,sps)%kl
-
-          ie = flowDoms(nn,level,sps)%ie
-          je = flowDoms(nn,level,sps)%je
-          ke = flowDoms(nn,level,sps)%ke
-
-          ib = flowDoms(nn,level,sps)%ib
-          jb = flowDoms(nn,level,sps)%jb
-          kb = flowDoms(nn,level,sps)%kb
-
-          ! Allocate the memory for the volumes and the face normals.
-
-          allocate(flowDoms(nn,level,sps)%si(0:ie,1:je,1:ke,3), &
-               flowDoms(nn,level,sps)%sj(1:ie,0:je,1:ke,3), &
-               flowDoms(nn,level,sps)%sk(1:ie,1:je,0:ke,3), &
-               flowDoms(nn,level,sps)%vol(0:ib,0:jb,0:kb),  &
-               stat=ierr)
-          if(ierr /= 0)                      &
-               call terminate("allocateMetric", &
-               "Memory allocation failure for &
-               &normals and volumes")
-
-          ! Added by HDN
-          ! Added s[I,J,K]ALE
-          if (equationMode == unSteady .and. useALE) then
-             allocate(flowDoms(nn,level,sps)%sIALE(0:nALEsteps,0:ie,1:je,1:ke,3), &
-                  flowDoms(nn,level,sps)%sJALE(0:nALEsteps,1:ie,0:je,1:ke,3), &
-                  flowDoms(nn,level,sps)%sKALE(0:nALEsteps,1:ie,1:je,0:ke,3), &
-                  stat=ierr)
-             if(ierr /= 0)                      &
-                  call terminate("allocateMetric", &
-                  "Memory allocation failure for &
-                  &sIALE, sJALE, and sKALE")
-          end if
-
-          ! Allocate the memory for the unit normals of the boundary
-          ! faces. First set the pointer to make it more readable.
-
-          BCData => flowDoms(nn,level,sps)%BCData
-
-          do mm=1,flowDoms(nn,level,sps)%nBocos
-
-             ! Store the size of the subface in ie:ib and je:jb, because
-             ! these variables are not needed anymore.
-
-             ie = BCData(mm)%icBeg
-             ib = BCData(mm)%icEnd
-             je = BCData(mm)%jcBeg
-             jb = BCData(mm)%jcEnd
-
-             ! Allocate the memory for the unit normals.
-
-             ! Added by HDN
-             ! Added normALE
-             allocate(                                           &
-                  BCData(mm)%norm(ie:ib,je:jb,3),                &
-                  BCData(mm)%normALE(0:nALEsteps,ie:ib,je:jb,3), &
-                  stat=ierr)
-             if(ierr /= 0)                      &
-                  call terminate("allocateMetric", &
-                  "Memory allocation failure for norm")
-          enddo
-
-          ! Allocate the memory for the old volumes; only for unsteady
-          ! problems on deforming meshes on the finest grid level.
-
-          if(level == 1 .and. deforming_Grid .and. &
-               equationMode == unsteady) then
-
-             allocate(                                                   &
-                  flowDoms(nn,level,sps)%volOld(nOldLevels,2:il,2:jl,2:kl), &
-                  stat=ierr)
-             if(ierr /= 0)                      &
-                  call terminate("allocateMetric", &
-                  "Memory allocation failure for volOld")
-          endif
-
-       enddo domains
-    enddo spectral
-
-  end subroutine allocateMetric
-
-  !      ==================================================================
-
-  subroutine metric(level)
-    !
-    !       metric computes the face normals and the volume for the given
-    !       grid level for all spectral solutions. First the volumes are
-    !       computed assuming that the block is right handed. Then the
-    !       number of positive and negative volumes are determined. If all
-    !       volumes are positive the block is indeed right handed; if all
-    !       volumes are negative the block is left handed and both the
-    !       volumes and the normals must be negated (for the normals this
-    !       is done by the introduction of fact, which is either -0.5 or
-    !       0.5); if there are both positive and negative volumes the mesh
-    !       is not valid.
-    !
-    use constants
-    use blockPointers
-    use cgnsGrid
-    use communication
-    use inputTimeSpectral
-    use checkVolBlock
-    use inputIteration
-    use utils, only : setPointers, terminate, returnFail
-    use commonFormats, only : stringSpace, stringInt1
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: thresVolume = 1.e-2_realType
-    real(kind=realType), parameter :: haloCellRatio = 1e-10_realType
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i, j, k, n, m, l
-    integer(kind=intType) :: nn, mm, sps
-    integer(kind=intType) :: nVolNeg,   nVolPos
-    integer(kind=intType) :: nVolBad,   nVolBadGlobal
-    integer(kind=intType) :: nBlockBad, nBlockBadGlobal
-
-    real(kind=realType) :: fact, mult
-    real(kind=realType) :: xp, yp, zp, vp1, vp2, vp3, vp4, vp5, vp6
-
-    real(kind=realType), dimension(3) :: v1, v2
-
-    real(kind=realType), dimension(:,:,:), pointer :: ss
-
-    character(len=10) :: integerString
-
-    logical :: checkK, checkJ, checkI, checkAll, checkBlank
-    logical :: badVolume, iBlankAllocated
-
-    logical, dimension(:,:,:), pointer :: volumeIsNeg
-
-    type(checkVolBlockType), &
-         dimension(nDom,nTimeIntervalsSpectral) :: checkVolDoms
-
-    ! Initialize the number of bad volumes and bad blocks to 0.
-
-    nVolBad   = 0
-    nBlockBad = 0
-
-    ! Loop over the number of spectral solutions and local blocks.
-
-    spectral: do sps=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
-
-          ! Set the pointers to this block and allocate the memory for
-          ! volumeIsNeg. Set a pointer to this entry afterwards to make
-          ! the code more readable.
-
-          call setPointers(nn, level, sps)
-          if (associated(flowDoms(nn, level, sps)%iblank)) then
-             iBlankAllocated = .True.
-          else
-             iBlankAllocated = .False.
-          end if
-
-          allocate(checkVolDoms(nn,sps)%volumeIsNeg(2:il,2:jl,2:kl), &
-               stat=ierr)
-          if(ierr /= 0)              &
-               call terminate("metric", &
-               "Memory allocation failure for volumeIsNeg")
-          volumeIsNeg => checkVolDoms(nn,sps)%volumeIsNeg
-          !
-          !           Volume and block orientation computation.
-          !
-          ! Initialize the number of positive and negative volumes for
-          ! this block to 0.
-
-          nVolNeg = 0
-          nVolPos = 0
-
-          ! Compute the volumes. The hexahedron is split into 6 pyramids
-          ! whose volumes are computed. The volume is positive for a
-          ! right handed block.
-          ! Initialize the volumes to zero. The reasons is that the second
-          ! level halo's must be initialized to zero and for convenience
-          ! all the volumes are set to zero.
-
-          vol = zero
-
-          do k=1,ke
-             n = k -1
-
-             checkK = .true.
-             if(k == 1 .or. k == ke) checkK = .false.
-
-             do j=1,je
-                m = j -1
-
-                checkJ = .true.
-                if(j == 1 .or. j == je) checkJ = .false.
-
-                do i=1,ie
-                   l = i -1
-
-                   checkI = .true.
-                   if(i == 1 .or. i == ie) checkI = .false.
-
-                   ! Determine whether or not the voluem must be checked for
-                   ! quality. Only owned volumes are checked, not halo's.
-
-                   checkAll = .false.
-
-                   ! Only care about the quality of compute cells (1)
-                   ! and fringe cells (-1)
-                   checkBlank = .False.
-                   if (iblankAllocated) then
-                      if (abs(iblank(i, j, k)) == 1) then
-                         checkBlank = .True.
-                      end if
-                   end if
-
-                   if (checkK .and. checkJ .and. checkI .and. checkBlank) then
-                      checkAll = .true.
-                   end if
+                    ! Allocate the memory for the unit normals.
 
-                   ! Compute the coordinates of the center of gravity.
+                    ! Added by HDN
+                    ! Added normALE
+                    allocate ( &
+                        BCData(mm)%norm(ie:ib, je:jb, 3), &
+                        BCData(mm)%normALE(0:nALEsteps, ie:ib, je:jb, 3), &
+                        stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("allocateMetric", &
+                                       "Memory allocation failure for norm")
+                end do
 
-                   xp = eighth*(x(i,j,k,1) + x(i,m,k,1) &
-                        +         x(i,m,n,1) + x(i,j,n,1) &
-                        +         x(l,j,k,1) + x(l,m,k,1) &
-                        +         x(l,m,n,1) + x(l,j,n,1))
-                   yp = eighth*(x(i,j,k,2) + x(i,m,k,2) &
-                        +         x(i,m,n,2) + x(i,j,n,2) &
-                        +         x(l,j,k,2) + x(l,m,k,2) &
-                        +         x(l,m,n,2) + x(l,j,n,2))
-                   zp = eighth*(x(i,j,k,3) + x(i,m,k,3) &
-                        +         x(i,m,n,3) + x(i,j,n,3) &
-                        +         x(l,j,k,3) + x(l,m,k,3) &
-                        +         x(l,m,n,3) + x(l,j,n,3))
-
-                   ! Compute the volumes of the 6 sub pyramids. The
-                   ! arguments of volpym must be such that for a (regular)
-                   ! right handed hexahedron all volumes are positive.
-
-                   vp1 = volpym(x(i,j,k,1), x(i,j,k,2), x(i,j,k,3), &
-                        x(i,j,n,1), x(i,j,n,2), x(i,j,n,3), &
-                        x(i,m,n,1), x(i,m,n,2), x(i,m,n,3), &
-                        x(i,m,k,1), x(i,m,k,2), x(i,m,k,3))
-
-                   vp2 = volpym(x(l,j,k,1), x(l,j,k,2), x(l,j,k,3), &
-                        x(l,m,k,1), x(l,m,k,2), x(l,m,k,3), &
-                        x(l,m,n,1), x(l,m,n,2), x(l,m,n,3), &
-                        x(l,j,n,1), x(l,j,n,2), x(l,j,n,3))
-
-                   vp3 = volpym(x(i,j,k,1), x(i,j,k,2), x(i,j,k,3), &
-                        x(l,j,k,1), x(l,j,k,2), x(l,j,k,3), &
-                        x(l,j,n,1), x(l,j,n,2), x(l,j,n,3), &
-                        x(i,j,n,1), x(i,j,n,2), x(i,j,n,3))
-
-                   vp4 = volpym(x(i,m,k,1), x(i,m,k,2), x(i,m,k,3), &
-                        x(i,m,n,1), x(i,m,n,2), x(i,m,n,3), &
-                        x(l,m,n,1), x(l,m,n,2), x(l,m,n,3), &
-                        x(l,m,k,1), x(l,m,k,2), x(l,m,k,3))
-
-                   vp5 = volpym(x(i,j,k,1), x(i,j,k,2), x(i,j,k,3), &
-                        x(i,m,k,1), x(i,m,k,2), x(i,m,k,3), &
-                        x(l,m,k,1), x(l,m,k,2), x(l,m,k,3), &
-                        x(l,j,k,1), x(l,j,k,2), x(l,j,k,3))
-
-                   vp6 = volpym(x(i,j,n,1), x(i,j,n,2), x(i,j,n,3), &
-                        x(l,j,n,1), x(l,j,n,2), x(l,j,n,3), &
-                        x(l,m,n,1), x(l,m,n,2), x(l,m,n,3), &
-                        x(i,m,n,1), x(i,m,n,2), x(i,m,n,3))
-
-                   ! Set the volume to 1/6 of the sum of the volumes of the
-                   ! pyramid. Remember that volpym computes 6 times the
-                   ! volume.
-
-                   vol(i,j,k) = sixth*(vp1 + vp2 + vp3 + vp4 + vp5 + vp6)
+                ! Allocate the memory for the old volumes; only for unsteady
+                ! problems on deforming meshes on the finest grid level.
 
-                   ! Check the volume and update the number of positive
-                   ! and negative volumes if needed.
-
-                   if( checkAll ) then
+                if (level == 1 .and. deforming_Grid .and. &
+                    equationMode == unsteady) then
 
-                      ! Update either the number of negative or positive
-                      ! volumes. Negative volumes should only occur for left
-                      ! handed blocks. This is checked later.
-                      ! Set the logical volumeIsNeg accordingly.
-
-                      if(vol(i,j,k) < zero) then
-                         nVolNeg            = nVolNeg + 1
-                         volumeIsNeg(i,j,k) = .true.
-                      else
-                         nVolPos            = nVolPos + 1
-                         volumeIsNeg(i,j,k) = .false.
-                      endif
-
-                      ! Set the threshold for the volume quality.
-
-                      fact = thresVolume*abs(vol(i,j,k))
-
-                      ! Check the quality of the volume.
-
-                      badVolume = .false.
-                      if(vp1*vol(i,j,k) < zero .and. &
-                           abs(vp1)       > fact) badVolume = .true.
-                      if(vp2*vol(i,j,k) < zero .and. &
-                           abs(vp2)       > fact) badVolume = .true.
-                      if(vp3*vol(i,j,k) < zero .and. &
-                           abs(vp3)       > fact) badVolume = .true.
-                      if(vp4*vol(i,j,k) < zero .and. &
-                           abs(vp4)       > fact) badVolume = .true.
-                      if(vp5*vol(i,j,k) < zero .and. &
-                           abs(vp5)       > fact) badVolume = .true.
-                      if(vp6*vol(i,j,k) < zero .and. &
-                           abs(vp6)       > fact) badVolume = .true.
-
-                      ! Update nVolBad if this is a bad volume.
-
-                      if( badVolume ) nVolBad = nVolBad + 1
-
-                   endif
-
-                   ! Set the volume to the absolute value.
-
-                   vol(i,j,k) = abs(vol(i,j,k))
-
-                enddo
-             enddo
-          enddo
-
-          ! Some additional safety stuff for halo volumes.
-
-          do k=2,kl
-             do j=2,jl
-                if(vol(1, j,k)/vol(2, j, k) < haloCellRatio) then
-                   vol(1, j,k) = vol(2, j,k)
-                end if
-                if(vol(ie,j,k)/vol(il,j,k)  < haloCellRatio) then
-                   vol(ie,j,k) = vol(il,j,k)
+                    allocate ( &
+                        flowDoms(nn, level, sps)%volOld(nOldLevels, 2:il, 2:jl, 2:kl), &
+                        stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("allocateMetric", &
+                                       "Memory allocation failure for volOld")
                 end if
-             enddo
-          enddo
 
-          do k=2,kl
-             do i=1,ie
-                if(vol(i,1, k)/vol(i,2,k) < haloCellRatio) then
-                   vol(i,1, k) = vol(i,2, k)
+            end do domains
+        end do spectral
+
+    end subroutine allocateMetric
+
+    !      ==================================================================
+
+    subroutine metric(level)
+        !
+        !       metric computes the face normals and the volume for the given
+        !       grid level for all spectral solutions. First the volumes are
+        !       computed assuming that the block is right handed. Then the
+        !       number of positive and negative volumes are determined. If all
+        !       volumes are positive the block is indeed right handed; if all
+        !       volumes are negative the block is left handed and both the
+        !       volumes and the normals must be negated (for the normals this
+        !       is done by the introduction of fact, which is either -0.5 or
+        !       0.5); if there are both positive and negative volumes the mesh
+        !       is not valid.
+        !
+        use constants
+        use blockPointers
+        use cgnsGrid
+        use communication
+        use inputTimeSpectral
+        use checkVolBlock
+        use inputIteration
+        use utils, only: setPointers, terminate, returnFail
+        use commonFormats, only: stringSpace, stringInt1
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: thresVolume = 1.e-2_realType
+        real(kind=realType), parameter :: haloCellRatio = 1e-10_realType
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, j, k, n, m, l
+        integer(kind=intType) :: nn, mm, sps
+        integer(kind=intType) :: nVolNeg, nVolPos
+        integer(kind=intType) :: nVolBad, nVolBadGlobal
+        integer(kind=intType) :: nBlockBad, nBlockBadGlobal
+
+        real(kind=realType) :: fact, mult
+        real(kind=realType) :: xp, yp, zp, vp1, vp2, vp3, vp4, vp5, vp6
+
+        real(kind=realType), dimension(3) :: v1, v2
+
+        real(kind=realType), dimension(:, :, :), pointer :: ss
+
+        character(len=10) :: integerString
+
+        logical :: checkK, checkJ, checkI, checkAll, checkBlank
+        logical :: badVolume, iBlankAllocated
+
+        logical, dimension(:, :, :), pointer :: volumeIsNeg
+
+        type(checkVolBlockType), &
+            dimension(nDom, nTimeIntervalsSpectral) :: checkVolDoms
+
+        ! Initialize the number of bad volumes and bad blocks to 0.
+
+        nVolBad = 0
+        nBlockBad = 0
+
+        ! Loop over the number of spectral solutions and local blocks.
+
+        spectral: do sps = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers to this block and allocate the memory for
+                ! volumeIsNeg. Set a pointer to this entry afterwards to make
+                ! the code more readable.
+
+                call setPointers(nn, level, sps)
+                if (associated(flowDoms(nn, level, sps)%iblank)) then
+                    iBlankAllocated = .True.
+                else
+                    iBlankAllocated = .False.
                 end if
-                if(vol(i,je,k)/voL(i,jl,k) < haloCellRatio) then
-                   vol(i,je,k) = vol(i,jl,k)
+
+                allocate (checkVolDoms(nn, sps)%volumeIsNeg(2:il, 2:jl, 2:kl), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("metric", &
+                                   "Memory allocation failure for volumeIsNeg")
+                volumeIsNeg => checkVolDoms(nn, sps)%volumeIsNeg
+                !
+                !           Volume and block orientation computation.
+                !
+                ! Initialize the number of positive and negative volumes for
+                ! this block to 0.
+
+                nVolNeg = 0
+                nVolPos = 0
+
+                ! Compute the volumes. The hexahedron is split into 6 pyramids
+                ! whose volumes are computed. The volume is positive for a
+                ! right handed block.
+                ! Initialize the volumes to zero. The reasons is that the second
+                ! level halo's must be initialized to zero and for convenience
+                ! all the volumes are set to zero.
+
+                vol = zero
+
+                do k = 1, ke
+                    n = k - 1
+
+                    checkK = .true.
+                    if (k == 1 .or. k == ke) checkK = .false.
+
+                    do j = 1, je
+                        m = j - 1
+
+                        checkJ = .true.
+                        if (j == 1 .or. j == je) checkJ = .false.
+
+                        do i = 1, ie
+                            l = i - 1
+
+                            checkI = .true.
+                            if (i == 1 .or. i == ie) checkI = .false.
+
+                            ! Determine whether or not the voluem must be checked for
+                            ! quality. Only owned volumes are checked, not halo's.
+
+                            checkAll = .false.
+
+                            ! Only care about the quality of compute cells (1)
+                            ! and fringe cells (-1)
+                            checkBlank = .False.
+                            if (iblankAllocated) then
+                                if (abs(iblank(i, j, k)) == 1) then
+                                    checkBlank = .True.
+                                end if
+                            end if
+
+                            if (checkK .and. checkJ .and. checkI .and. checkBlank) then
+                                checkAll = .true.
+                            end if
+
+                            ! Compute the coordinates of the center of gravity.
+
+                            xp = eighth*(x(i, j, k, 1) + x(i, m, k, 1) &
+                                         + x(i, m, n, 1) + x(i, j, n, 1) &
+                                         + x(l, j, k, 1) + x(l, m, k, 1) &
+                                         + x(l, m, n, 1) + x(l, j, n, 1))
+                            yp = eighth*(x(i, j, k, 2) + x(i, m, k, 2) &
+                                         + x(i, m, n, 2) + x(i, j, n, 2) &
+                                         + x(l, j, k, 2) + x(l, m, k, 2) &
+                                         + x(l, m, n, 2) + x(l, j, n, 2))
+                            zp = eighth*(x(i, j, k, 3) + x(i, m, k, 3) &
+                                         + x(i, m, n, 3) + x(i, j, n, 3) &
+                                         + x(l, j, k, 3) + x(l, m, k, 3) &
+                                         + x(l, m, n, 3) + x(l, j, n, 3))
+
+                            ! Compute the volumes of the 6 sub pyramids. The
+                            ! arguments of volpym must be such that for a (regular)
+                            ! right handed hexahedron all volumes are positive.
+
+                            vp1 = volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), &
+                                         x(i, j, n, 1), x(i, j, n, 2), x(i, j, n, 3), &
+                                         x(i, m, n, 1), x(i, m, n, 2), x(i, m, n, 3), &
+                                         x(i, m, k, 1), x(i, m, k, 2), x(i, m, k, 3))
+
+                            vp2 = volpym(x(l, j, k, 1), x(l, j, k, 2), x(l, j, k, 3), &
+                                         x(l, m, k, 1), x(l, m, k, 2), x(l, m, k, 3), &
+                                         x(l, m, n, 1), x(l, m, n, 2), x(l, m, n, 3), &
+                                         x(l, j, n, 1), x(l, j, n, 2), x(l, j, n, 3))
+
+                            vp3 = volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), &
+                                         x(l, j, k, 1), x(l, j, k, 2), x(l, j, k, 3), &
+                                         x(l, j, n, 1), x(l, j, n, 2), x(l, j, n, 3), &
+                                         x(i, j, n, 1), x(i, j, n, 2), x(i, j, n, 3))
+
+                            vp4 = volpym(x(i, m, k, 1), x(i, m, k, 2), x(i, m, k, 3), &
+                                         x(i, m, n, 1), x(i, m, n, 2), x(i, m, n, 3), &
+                                         x(l, m, n, 1), x(l, m, n, 2), x(l, m, n, 3), &
+                                         x(l, m, k, 1), x(l, m, k, 2), x(l, m, k, 3))
+
+                            vp5 = volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), &
+                                         x(i, m, k, 1), x(i, m, k, 2), x(i, m, k, 3), &
+                                         x(l, m, k, 1), x(l, m, k, 2), x(l, m, k, 3), &
+                                         x(l, j, k, 1), x(l, j, k, 2), x(l, j, k, 3))
+
+                            vp6 = volpym(x(i, j, n, 1), x(i, j, n, 2), x(i, j, n, 3), &
+                                         x(l, j, n, 1), x(l, j, n, 2), x(l, j, n, 3), &
+                                         x(l, m, n, 1), x(l, m, n, 2), x(l, m, n, 3), &
+                                         x(i, m, n, 1), x(i, m, n, 2), x(i, m, n, 3))
+
+                            ! Set the volume to 1/6 of the sum of the volumes of the
+                            ! pyramid. Remember that volpym computes 6 times the
+                            ! volume.
+
+                            vol(i, j, k) = sixth*(vp1 + vp2 + vp3 + vp4 + vp5 + vp6)
+
+                            ! Check the volume and update the number of positive
+                            ! and negative volumes if needed.
+
+                            if (checkAll) then
+
+                                ! Update either the number of negative or positive
+                                ! volumes. Negative volumes should only occur for left
+                                ! handed blocks. This is checked later.
+                                ! Set the logical volumeIsNeg accordingly.
+
+                                if (vol(i, j, k) < zero) then
+                                    nVolNeg = nVolNeg + 1
+                                    volumeIsNeg(i, j, k) = .true.
+                                else
+                                    nVolPos = nVolPos + 1
+                                    volumeIsNeg(i, j, k) = .false.
+                                end if
+
+                                ! Set the threshold for the volume quality.
+
+                                fact = thresVolume*abs(vol(i, j, k))
+
+                                ! Check the quality of the volume.
+
+                                badVolume = .false.
+                                if (vp1*vol(i, j, k) < zero .and. &
+                                    abs(vp1) > fact) badVolume = .true.
+                                if (vp2*vol(i, j, k) < zero .and. &
+                                    abs(vp2) > fact) badVolume = .true.
+                                if (vp3*vol(i, j, k) < zero .and. &
+                                    abs(vp3) > fact) badVolume = .true.
+                                if (vp4*vol(i, j, k) < zero .and. &
+                                    abs(vp4) > fact) badVolume = .true.
+                                if (vp5*vol(i, j, k) < zero .and. &
+                                    abs(vp5) > fact) badVolume = .true.
+                                if (vp6*vol(i, j, k) < zero .and. &
+                                    abs(vp6) > fact) badVolume = .true.
+
+                                ! Update nVolBad if this is a bad volume.
+
+                                if (badVolume) nVolBad = nVolBad + 1
+
+                            end if
+
+                            ! Set the volume to the absolute value.
+
+                            vol(i, j, k) = abs(vol(i, j, k))
+
+                        end do
+                    end do
+                end do
+
+                ! Some additional safety stuff for halo volumes.
+
+                do k = 2, kl
+                    do j = 2, jl
+                        if (vol(1, j, k)/vol(2, j, k) < haloCellRatio) then
+                            vol(1, j, k) = vol(2, j, k)
+                        end if
+                        if (vol(ie, j, k)/vol(il, j, k) < haloCellRatio) then
+                            vol(ie, j, k) = vol(il, j, k)
+                        end if
+                    end do
+                end do
+
+                do k = 2, kl
+                    do i = 1, ie
+                        if (vol(i, 1, k)/vol(i, 2, k) < haloCellRatio) then
+                            vol(i, 1, k) = vol(i, 2, k)
+                        end if
+                        if (vol(i, je, k)/voL(i, jl, k) < haloCellRatio) then
+                            vol(i, je, k) = vol(i, jl, k)
+                        end if
+                    end do
+                end do
+
+                do j = 1, je
+                    do i = 1, ie
+                        if (vol(i, j, 1)/vol(i, j, 2) < haloCellRatio) then
+                            vol(i, j, 1) = vol(i, j, 2)
+                        end if
+                        if (vol(i, j, ke)/vol(i, j, kl) < haloCellRatio) then
+                            vol(i, j, ke) = vol(i, j, kl)
+                        end if
+                    end do
+                end do
+
+                ! Determine the orientation of the block. For the fine level
+                ! this is based on the number of positive and negative
+                ! volumes; on the coarse levels the corresponding fine level
+                ! value is taken. If both positive and negative volumes are
+                ! present it is assumed that the block was intended to be
+                ! right handed. The code will terminate later on anyway.
+
+                if (level == 1) then
+                    if (nVolPos == 0) then       ! Left handed block.
+                        flowDoms(nn, level, sps)%rightHanded = .false.
+                    else                        ! Right handed (or bad) block.
+                        flowDoms(nn, level, sps)%rightHanded = .true.
+                    end if
+                else
+                    flowDoms(nn, level, sps)%rightHanded = &
+                        flowDoms(nn, 1, sps)%rightHanded
                 end if
-             enddo
-          enddo
 
-          do j=1,je
-             do i=1,ie
-                if(vol(i,j,1)/vol(i,j,2)  < haloCellRatio) then
-                   vol(i,j,1)  = vol(i,j,2)
+                ! Set the factor in the surface normals computation. For a
+                ! left handed block this factor is negative, such that the
+                ! normals still point in the direction of increasing index.
+                ! The formulae used later on assume a right handed block
+                ! and fact is used to correct this for a left handed block,
+                ! as well as the scaling factor of 0.5
+
+                if (flowDoms(nn, level, sps)%rightHanded) then
+                    fact = half
+                else
+                    fact = -half
                 end if
-                if(vol(i,j,ke)/vol(i,j,kl) < haloCellRatio) then
-                   vol(i,j,ke) = vol(i,j,kl)
+
+                ! Check if both positive and negative volumes occur. If so,
+                ! the block is bad and the counter nBlockBad is updated.
+
+                if (nVolNeg > 0 .and. nVolPos > 0) then
+                    checkVolDoms(nn, sps)%blockHasNegVol = .true.
+                    nBlockBad = nBlockBad + 1
+                else
+                    checkVolDoms(nn, sps)%blockHasNegVol = .false.
                 end if
-             enddo
-          enddo
-
-          ! Determine the orientation of the block. For the fine level
-          ! this is based on the number of positive and negative
-          ! volumes; on the coarse levels the corresponding fine level
-          ! value is taken. If both positive and negative volumes are
-          ! present it is assumed that the block was intended to be
-          ! right handed. The code will terminate later on anyway.
-
-          if(level == 1) then
-             if(nVolPos == 0) then       ! Left handed block.
-                flowDoms(nn,level,sps)%rightHanded = .false.
-             else                        ! Right handed (or bad) block.
-                flowDoms(nn,level,sps)%rightHanded = .true.
-             endif
-          else
-             flowDoms(nn,level,sps)%rightHanded = &
-                  flowDoms(nn,1,sps)%rightHanded
-          endif
-
-          ! Set the factor in the surface normals computation. For a
-          ! left handed block this factor is negative, such that the
-          ! normals still point in the direction of increasing index.
-          ! The formulae used later on assume a right handed block
-          ! and fact is used to correct this for a left handed block,
-          ! as well as the scaling factor of 0.5
-
-          if( flowDoms(nn,level,sps)%rightHanded ) then
-             fact =  half
-          else
-             fact = -half
-          endif
-
-          ! Check if both positive and negative volumes occur. If so,
-          ! the block is bad and the counter nBlockBad is updated.
-
-          if(nVolNeg > 0 .and. nVolPos > 0) then
-             checkVolDoms(nn,sps)%blockHasNegVol = .true.
-             nBlockBad = nBlockBad + 1
-          else
-             checkVolDoms(nn,sps)%blockHasNegVol = .false.
-          endif
-          !
-          !           Computation of the face normals in i-, j- and k-direction.
-          !           Formula's are valid for a right handed block; for a left
-          !           handed block the correct orientation is obtained via fact.
-          !           The normals point in the direction of increasing index.
-          !           The absolute value of fact is 0.5, because the cross
-          !           product of the two diagonals is twice the normal vector.
-          !           Note that also the normals of the first level halo cells
-          !           are computed. These are needed for the viscous fluxes.
-          !
-          ! Projected areas of cell faces in the i direction.
-
-          do k=1,ke
-             n = k -1
-             do j=1,je
-                m = j -1
-                do i=0,ie
-
-                   ! Determine the two diagonal vectors of the face.
-
-                   v1(1) = x(i,j,n,1) - x(i,m,k,1)
-                   v1(2) = x(i,j,n,2) - x(i,m,k,2)
-                   v1(3) = x(i,j,n,3) - x(i,m,k,3)
-
-                   v2(1) = x(i,j,k,1) - x(i,m,n,1)
-                   v2(2) = x(i,j,k,2) - x(i,m,n,2)
-                   v2(3) = x(i,j,k,3) - x(i,m,n,3)
-
-                   ! The face normal, which is the cross product of the two
-                   ! diagonal vectors times fact; remember that fact is
-                   ! either -0.5 or 0.5.
-
-                   si(i,j,k,1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-                   si(i,j,k,2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-                   si(i,j,k,3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
-
-                enddo
-             enddo
-          enddo
-
-          ! Projected areas of cell faces in the j direction.
-
-          do k=1,ke
-             n = k -1
-             do j=0,je
-                do i=1,ie
-                   l = i -1
-
-                   ! Determine the two diagonal vectors of the face.
-
-                   v1(1) = x(i,j,n,1) - x(l,j,k,1)
-                   v1(2) = x(i,j,n,2) - x(l,j,k,2)
-                   v1(3) = x(i,j,n,3) - x(l,j,k,3)
-
-                   v2(1) = x(l,j,n,1) - x(i,j,k,1)
-                   v2(2) = x(l,j,n,2) - x(i,j,k,2)
-                   v2(3) = x(l,j,n,3) - x(i,j,k,3)
-
-                   ! The face normal, which is the cross product of the two
-                   ! diagonal vectors times fact; remember that fact is
-                   ! either -0.5 or 0.5.
-
-                   sj(i,j,k,1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-                   sj(i,j,k,2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-                   sj(i,j,k,3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
-
-                enddo
-             enddo
-          enddo
-
-          ! Projected areas of cell faces in the k direction.
-
-          do k=0,ke
-             do j=1,je
-                m = j -1
-                do i=1,ie
-                   l = i -1
-
-                   ! Determine the two diagonal vectors of the face.
-
-                   v1(1) = x(i,j,k,1) - x(l,m,k,1)
-                   v1(2) = x(i,j,k,2) - x(l,m,k,2)
-                   v1(3) = x(i,j,k,3) - x(l,m,k,3)
-
-                   v2(1) = x(l,j,k,1) - x(i,m,k,1)
-                   v2(2) = x(l,j,k,2) - x(i,m,k,2)
-                   v2(3) = x(l,j,k,3) - x(i,m,k,3)
-
-                   ! The face normal, which is the cross product of the two
-                   ! diagonal vectors times fact; remember that fact is
-                   ! either -0.5 or 0.5.
-
-                   sk(i,j,k,1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-                   sk(i,j,k,2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-                   sk(i,j,k,3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
-
-                enddo
-             enddo
-          enddo
-          !
-          !           The unit normals on the boundary faces. These always point
-          !           out of the domain, so a multiplication by -1 is needed for
-          !           the iMin, jMin and kMin boundaries.
-          !
-          ! Loop over the boundary subfaces of this block.
-
-          bocoLoop: do mm=1,nBocos
-
-             ! Determine the block face on which this subface is located
-             ! and set ss and mult accordingly.
+                !
+                !           Computation of the face normals in i-, j- and k-direction.
+                !           Formula's are valid for a right handed block; for a left
+                !           handed block the correct orientation is obtained via fact.
+                !           The normals point in the direction of increasing index.
+                !           The absolute value of fact is 0.5, because the cross
+                !           product of the two diagonals is twice the normal vector.
+                !           Note that also the normals of the first level halo cells
+                !           are computed. These are needed for the viscous fluxes.
+                !
+                ! Projected areas of cell faces in the i direction.
+
+                do k = 1, ke
+                    n = k - 1
+                    do j = 1, je
+                        m = j - 1
+                        do i = 0, ie
+
+                            ! Determine the two diagonal vectors of the face.
+
+                            v1(1) = x(i, j, n, 1) - x(i, m, k, 1)
+                            v1(2) = x(i, j, n, 2) - x(i, m, k, 2)
+                            v1(3) = x(i, j, n, 3) - x(i, m, k, 3)
+
+                            v2(1) = x(i, j, k, 1) - x(i, m, n, 1)
+                            v2(2) = x(i, j, k, 2) - x(i, m, n, 2)
+                            v2(3) = x(i, j, k, 3) - x(i, m, n, 3)
+
+                            ! The face normal, which is the cross product of the two
+                            ! diagonal vectors times fact; remember that fact is
+                            ! either -0.5 or 0.5.
+
+                            si(i, j, k, 1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
+                            si(i, j, k, 2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
+                            si(i, j, k, 3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
+
+                        end do
+                    end do
+                end do
+
+                ! Projected areas of cell faces in the j direction.
+
+                do k = 1, ke
+                    n = k - 1
+                    do j = 0, je
+                        do i = 1, ie
+                            l = i - 1
 
-             select case (BCFaceID(mm))
+                            ! Determine the two diagonal vectors of the face.
 
-             case (iMin)
-                mult = -one; ss => si(1,:,:,:)
+                            v1(1) = x(i, j, n, 1) - x(l, j, k, 1)
+                            v1(2) = x(i, j, n, 2) - x(l, j, k, 2)
+                            v1(3) = x(i, j, n, 3) - x(l, j, k, 3)
 
-             case (iMax)
-                mult = one;  ss => si(il,:,:,:)
+                            v2(1) = x(l, j, n, 1) - x(i, j, k, 1)
+                            v2(2) = x(l, j, n, 2) - x(i, j, k, 2)
+                            v2(3) = x(l, j, n, 3) - x(i, j, k, 3)
 
-             case (jMin)
-                mult = -one; ss => sj(:,1,:,:)
-
-             case (jMax)
-                mult = one;  ss => sj(:,jl,:,:)
+                            ! The face normal, which is the cross product of the two
+                            ! diagonal vectors times fact; remember that fact is
+                            ! either -0.5 or 0.5.
 
-             case (kMin)
-                mult = -one; ss => sk(:,:,1,:)
+                            sj(i, j, k, 1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
+                            sj(i, j, k, 2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
+                            sj(i, j, k, 3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
 
-             case (kMax)
-                mult = one;  ss => sk(:,:,kl,:)
+                        end do
+                    end do
+                end do
+
+                ! Projected areas of cell faces in the k direction.
 
-             end select
+                do k = 0, ke
+                    do j = 1, je
+                        m = j - 1
+                        do i = 1, ie
+                            l = i - 1
 
-             ! Loop over the boundary faces of the subface.
+                            ! Determine the two diagonal vectors of the face.
 
-             do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                do i=BCData(mm)%icBeg, BCData(mm)%icEnd
+                            v1(1) = x(i, j, k, 1) - x(l, m, k, 1)
+                            v1(2) = x(i, j, k, 2) - x(l, m, k, 2)
+                            v1(3) = x(i, j, k, 3) - x(l, m, k, 3)
 
-                   ! Compute the inverse of the length of the normal vector
-                   ! and possibly correct for inward pointing.
+                            v2(1) = x(l, j, k, 1) - x(i, m, k, 1)
+                            v2(2) = x(l, j, k, 2) - x(i, m, k, 2)
+                            v2(3) = x(l, j, k, 3) - x(i, m, k, 3)
 
-                   xp = ss(i,j,1);  yp = ss(i,j,2);  zp = ss(i,j,3)
-                   fact = sqrt(xp*xp + yp*yp + zp*zp)
-                   if(fact > zero) fact = mult/fact
+                            ! The face normal, which is the cross product of the two
+                            ! diagonal vectors times fact; remember that fact is
+                            ! either -0.5 or 0.5.
 
-                   ! Compute the unit normal.
+                            sk(i, j, k, 1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
+                            sk(i, j, k, 2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
+                            sk(i, j, k, 3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
 
-                   BCData(mm)%norm(i,j,1) = fact*xp
-                   BCData(mm)%norm(i,j,2) = fact*yp
-                   BCData(mm)%norm(i,j,3) = fact*zp
+                        end do
+                    end do
+                end do
+                !
+                !           The unit normals on the boundary faces. These always point
+                !           out of the domain, so a multiplication by -1 is needed for
+                !           the iMin, jMin and kMin boundaries.
+                !
+                ! Loop over the boundary subfaces of this block.
+
+                bocoLoop: do mm = 1, nBocos
+
+                    ! Determine the block face on which this subface is located
+                    ! and set ss and mult accordingly.
+
+                    select case (BCFaceID(mm))
 
-                enddo
-             enddo
+                    case (iMin)
+                        mult = -one; ss => si(1, :, :, :)
+
+                    case (iMax)
+                        mult = one; ss => si(il, :, :, :)
+
+                    case (jMin)
+                        mult = -one; ss => sj(:, 1, :, :)
+
+                    case (jMax)
+                        mult = one; ss => sj(:, jl, :, :)
 
-          enddo bocoLoop
-          !
-          !           Check in debug mode the sum of the normals of the cells.
-          !           If everything is correct this should sum up to zero.
-          !
-          debugging: if( debug ) then
+                    case (kMin)
+                        mult = -one; ss => sk(:, :, 1, :)
 
-             ! Loop over the cells including the 1st level halo's.
+                    case (kMax)
+                        mult = one; ss => sk(:, :, kl, :)
 
-             do k=2,kl
-                n = k -1
-                do j=2,jl
-                   m = j -1
-                   do i=2,il
-                      l = i -1
+                    end select
 
-                      ! Store the sum of the outward pointing surrounding
-                      ! normals in v1. Due to the outward convention the
-                      ! normals with the lowest index get a negative sign;
-                      ! normals point in the direction of the higher index.
+                    ! Loop over the boundary faces of the subface.
 
-                      v1(1) = si(i,j,k,1) + sj(i,j,k,1) + sk(i,j,k,1) &
-                           - si(l,j,k,1) - sj(i,m,k,1) - sk(i,j,n,1)
-                      v1(2) = si(i,j,k,2) + sj(i,j,k,2) + sk(i,j,k,2) &
-                           - si(l,j,k,2) - sj(i,m,k,2) - sk(i,j,n,2)
-                      v1(3) = si(i,j,k,3) + sj(i,j,k,3) + sk(i,j,k,3) &
-                           - si(l,j,k,3) - sj(i,m,k,3) - sk(i,j,n,3)
+                    do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                        do i = BCData(mm)%icBeg, BCData(mm)%icEnd
 
-                      ! Store the inverse of the sum of the areas of the
-                      ! six faces in fact.
-
-                      fact = one/(sqrt(si(i,j,k,1)*si(i,j,k,1)  &
-                           +           si(i,j,k,2)*si(i,j,k,2)  &
-                           +           si(i,j,k,3)*si(i,j,k,3)) &
-                           +      sqrt(si(l,j,k,1)*si(l,j,k,1)  &
-                           +           si(l,j,k,2)*si(l,j,k,2)  &
-                           +           si(l,j,k,3)*si(l,j,k,3)) &
-                           +      sqrt(sj(i,j,k,1)*sj(i,j,k,1)  &
-                           +           sj(i,j,k,2)*sj(i,j,k,2)  &
-                           +           sj(i,j,k,3)*sj(i,j,k,3)) &
-                           +      sqrt(sj(i,m,k,1)*sj(i,m,k,1)  &
-                           +           sj(i,m,k,2)*sj(i,m,k,2)  &
-                           +           sj(i,m,k,3)*sj(i,m,k,3)) &
-                           +      sqrt(sk(i,j,k,1)*sk(i,j,k,1)  &
-                           +           sk(i,j,k,2)*sk(i,j,k,2)  &
-                           +           sk(i,j,k,3)*sk(i,j,k,3)) &
-                           +      sqrt(sk(i,j,n,1)*sk(i,j,n,1)  &
-                           +           sk(i,j,n,2)*sk(i,j,n,2)  &
-                           +           sk(i,j,n,3)*sk(i,j,n,3)))
+                            ! Compute the inverse of the length of the normal vector
+                            ! and possibly correct for inward pointing.
 
-                      ! Multiply v1 by fact to obtain a nonDimensional
-                      ! quantity and take tha absolute value of it.
+                            xp = ss(i, j, 1); yp = ss(i, j, 2); zp = ss(i, j, 3)
+                            fact = sqrt(xp*xp + yp*yp + zp*zp)
+                            if (fact > zero) fact = mult/fact
 
-                      v1(1) = abs(v1(1)*fact)
-                      v1(2) = abs(v1(2)*fact)
-                      v1(3) = abs(v1(3)*fact)
-
-                      ! Check if the control volume is closed.
-
-                      if(v1(1) > thresholdReal .or. &
-                           v1(2) > thresholdReal .or. &
-                           v1(3) > thresholdReal)     &
-                           call terminate("metric", &
-                           "Normals do not sum up to 0")
-
-                   enddo
-                enddo
-             enddo
-
-          endif debugging
-
-       enddo domains
-    enddo spectral
-
-    ! Determine the global number of bad blocks. The result must be
-    ! known on all processors and thus an allreduce is needed.
-
-    call mpi_allreduce(nBlockBad, nBlockBadGlobal, 1, adflow_integer, &
-         mpi_sum, ADflow_comm_world, ierr)
-
-    ! Test if bad blocks are present in the grid. If so, the action
-    ! taken depends on the grid level.
-
-    if(nBlockBadGlobal > 0) then
-       if(level == 1) then
-
-          ! Negative volumes present on the fine grid level. Print a
-          ! list of the bad volumes and terminate executation.
-
-          call writeNegVolumes(checkVolDoms)
-
-          call returnFail("metric", &
-            "Negative volumes present in grid.")
-          call mpi_barrier(ADflow_comm_world, ierr)
-
-       else
-
-          ! Coarser grid level. The fine grid is okay, but due to the
-          ! coarsening negative volumes are introduced. Print a warning.
-
-          if(myID == 0) then
-             print "(a)", "#"
-             print "(a)", "#                      Warning"
-             print stringInt1, "#* Negative volumes present on coarse grid level ", level, "."
-             print "(a)", "#* Computation continues, but be aware of this"
-             print "(a)", "#"
-          endif
-
-       endif
-    endif
-
-    ! Determine the global number of bad volumes. The result will
-    ! only be known on processor 0. The quality volume check will
-    ! only be done for the finest grid level.
-
-    if(level == 1) then
-       call mpi_reduce(nVolBad, nVolBadGlobal, 1, adflow_integer, &
-            mpi_sum, 0, ADflow_comm_world, ierr)
-
-       ! Print a warning in case bad volumes were found. Only processor
-       ! 0 prints this warning.
-
-       if(myID == 0 .and. nVolBadGlobal > 0 .and. printWarnings) then
-          write(integerString,"(i10)") nVolBadGlobal
-          integerString = adjustl(integerString)
-          integerString = trim(integerString)
-          print "(a)", "#"
-          print "(a)", "#                      Warning"
-          print stringSpace, "#", trim(integerString), "bad quality volumes found."
-          print "(a)", "# Computation will continue, but be aware of this."
-          print "(a)", "#"
-       endif
-    endif
-
-    ! Release the memory of volumeIsNeg of all local blocks again.
-
-    do sps=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-          deallocate(checkVolDoms(nn,sps)%volumeIsNeg, stat=ierr)
-          if(ierr /= 0)              &
-               call terminate("metric", &
-               "Deallocation failure for volumeIsNeg")
-       enddo
-    enddo
-
-  contains
-
-    !        ================================================================
-
-    function volpym(xa,ya,za,xb,yb,zb,xc,yc,zc,xd,yd,zd)
-      !
-      !         volpym computes 6 times the volume of a pyramid. Node p,
-      !         whose coordinates are set in the subroutine metric itself,
-      !         is the top node and a-b-c-d is the quadrilateral surface.
-      !         It is assumed that the cross product vCa * vDb points in
-      !         the direction of the top node. Here vCa is the diagonal
-      !         running from node c to node a and vDb the diagonal from
-      !         node d to node b.
-      !
-      use precision
-      implicit none
-      !
-      !        Function type.
-      !
-      real(kind=realType) :: volpym
-      !
-      !        Function arguments.
-      !
-      real(kind=realType), intent(in) :: xa, ya, za, xb, yb, zb
-      real(kind=realType), intent(in) :: xc, yc, zc, xd, yd, zd
-
-      volpym = (xp - fourth*(xa + xb  + xc + xd))              &
-           * ((ya - yc)*(zb - zd) - (za - zc)*(yb - yd))   + &
-           (yp - fourth*(ya + yb  + yc + yd))              &
-           * ((za - zc)*(xb - xd) - (xa - xc)*(zb - zd))   + &
-           (zp - fourth*(za + zb  + zc + zd))              &
-           * ((xa - xc)*(yb - yd) - (ya - yc)*(xb - xd))
-
-    end function volpym
-
-  end subroutine metric
-
-  !      ==================================================================
-
-  subroutine writeNegVolumes(checkVolDoms)
-    !
-    !       writeNegVolumes writes the negative volumes of a block to
-    !       stdout. If a block is flagged to have negative volumes it is
-    !       assumed that the block is intended to be a right handed block.
-    !
-    use constants
-    use blockPointers
-    use cgnsGrid
-    use communication
-    use inputPhysics
-    use inputTimeSpectral
-    use checkVolBlock
-    use utils, only : setPointers, terminate
-    use commonFormats, only : stringSpace
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    type(checkVolBlockType), &
-         dimension(nDom,nTimeIntervalsSpectral), intent(in) :: checkVolDoms
-    !
-    !      Local variables.
-    !
-    integer :: proc, ierr
-    integer(kind=intType) :: nn, sps, i, j, k
-    real(kind=realType), dimension(3) :: xc
-    character(len=10) :: intString1, intString2, intString3
-
-    ! Processor 0 prints a message that negative volumes are present
-    ! in the grid.
-
-    if(myID == 0) then
-       print "(a)", "#"
-       print "(a)", "#                      Error"
-       print "(a)", "# Negative volumes found in the grid."
-       print "(a)", "# A list of the negative volumes is printed below"
-       print "(a)", "#"
-    endif
-
-    ! Loop over the processors such that a clean output is obtained.
-    ! This may not be the most efficient solution, but that is not
-    ! an issue here.
-
-    procLoop: do proc=0,(nProc-1)
-
-       ! Test if I'm to one that must write my bad volumes.
-
-       testIWrite: if(proc == myID) then
-
-          ! Loop over the number of spectral solutions and local blocks.
-
-          spectral: do sps=1,nTimeIntervalsSpectral
-             domains: do nn=1,nDom
-
-                ! Test for a bad block.
-
-                testBad: if( checkVolDoms(nn,sps)%blockHasNegVol ) then
-
-                   ! Set the pointers for this block.
-
-                   call setPointers(nn, 1_intType, sps)
-
-                   ! Write the name of the block. The error message
-                   ! depends a bit on the case computed. For a time
-                   ! spectral solution also the spectral solution is
-                   ! mentioned, for steady and unsteady this is not
-                   ! the case, because there is only one.
-
-                   select case (equationMode)
-                   case (steady, unsteady)
-
-                      print "(a)", "#"
-                      print stringSpace, "# Block", trim(cgnsDoms(nbkGlobal)%zoneName), &
-                        "contains the following negative volumes"
-                      print "(a)", "#================================&
-                      &===================================="
-                      print "(a)", "#"
-
-                      !====================================================
-
-                   case (timeSpectral)
-
-                      write(intString1,"(i10)") sps
-                      intString1 = adjustl(intString1)
-
-                      print "(a)", "#"
-                      print stringSpace, "# Spectral solution", trim(intString1), "block", &
-                        trim(cgnsDoms(nbkGlobal)%zoneName), "contains the following negative volumes"
-                      print "(a)", "#===================================&
-                      &================================="
-                      print "(a)", "#"
-
-                   end select
-
-                   ! Loop over the owned volumes and write the
-                   ! negative ones.
-
-                   do k=2,kl
-                      do j=2,jl
-                         do i=2,il
-                            if(checkVolDoms(nn,sps)%volumeIsNeg(i,j,k)) then
-
-                               xc(1:3) = eighth*(x(i-1,j-1,k-1,1:3) &
-                                    +         x(i,  j-1,k-1,1:3) &
-                                    +         x(i-1,j  ,k-1,1:3) &
-                                    +         x(i,  j  ,k-1,1:3) &
-                                    +         x(i-1,j-1,k  ,1:3) &
-                                    +         x(i,  j-1,k  ,1:3) &
-                                    +         x(i-1,j  ,k  ,1:3) &
-                                    +         x(i,  j  ,k  ,1:3))
+                            ! Compute the unit normal.
 
-                               write(intString1,"(i10)") i
-                               write(intString2,"(i10)") j
-                               write(intString3,"(i10)") k
+                            BCData(mm)%norm(i, j, 1) = fact*xp
+                            BCData(mm)%norm(i, j, 2) = fact*yp
+                            BCData(mm)%norm(i, j, 3) = fact*zp
 
-                               intString1 = adjustl(intString1)
-                               intString2 = adjustl(intString2)
-                               intString3 = adjustl(intString3)
+                        end do
+                    end do
 
-                               print "(7(A), 4(ES10.3, A))", "# Indices (", trim(intString1), &
-                                     ",", trim(intString2), ",", trim(intString3), "), coordinates (", &
-                                     xc(1), ",", xc(2), ",", xc(3), "), Volume: ", -vol(i,j,k)
+                end do bocoLoop
+                !
+                !           Check in debug mode the sum of the normals of the cells.
+                !           If everything is correct this should sum up to zero.
+                !
+                debugging: if (debug) then
 
-                            endif
-                         enddo
-                      enddo
-                   enddo
+                    ! Loop over the cells including the 1st level halo's.
 
+                    do k = 2, kl
+                        n = k - 1
+                        do j = 2, jl
+                            m = j - 1
+                            do i = 2, il
+                                l = i - 1
 
-                endif testBad
-             enddo domains
-          enddo spectral
+                                ! Store the sum of the outward pointing surrounding
+                                ! normals in v1. Due to the outward convention the
+                                ! normals with the lowest index get a negative sign;
+                                ! normals point in the direction of the higher index.
 
-       endif testIWrite
+                                v1(1) = si(i, j, k, 1) + sj(i, j, k, 1) + sk(i, j, k, 1) &
+                                        - si(l, j, k, 1) - sj(i, m, k, 1) - sk(i, j, n, 1)
+                                v1(2) = si(i, j, k, 2) + sj(i, j, k, 2) + sk(i, j, k, 2) &
+                                        - si(l, j, k, 2) - sj(i, m, k, 2) - sk(i, j, n, 2)
+                                v1(3) = si(i, j, k, 3) + sj(i, j, k, 3) + sk(i, j, k, 3) &
+                                        - si(l, j, k, 3) - sj(i, m, k, 3) - sk(i, j, n, 3)
 
-       ! Synchronize the processors to avoid a messy output.
+                                ! Store the inverse of the sum of the areas of the
+                                ! six faces in fact.
 
-       call mpi_barrier(ADflow_comm_world, ierr)
+                                fact = one/(sqrt(si(i, j, k, 1)*si(i, j, k, 1) &
+                                                 + si(i, j, k, 2)*si(i, j, k, 2) &
+                                                 + si(i, j, k, 3)*si(i, j, k, 3)) &
+                                            + sqrt(si(l, j, k, 1)*si(l, j, k, 1) &
+                                                   + si(l, j, k, 2)*si(l, j, k, 2) &
+                                                   + si(l, j, k, 3)*si(l, j, k, 3)) &
+                                            + sqrt(sj(i, j, k, 1)*sj(i, j, k, 1) &
+                                                   + sj(i, j, k, 2)*sj(i, j, k, 2) &
+                                                   + sj(i, j, k, 3)*sj(i, j, k, 3)) &
+                                            + sqrt(sj(i, m, k, 1)*sj(i, m, k, 1) &
+                                                   + sj(i, m, k, 2)*sj(i, m, k, 2) &
+                                                   + sj(i, m, k, 3)*sj(i, m, k, 3)) &
+                                            + sqrt(sk(i, j, k, 1)*sk(i, j, k, 1) &
+                                                   + sk(i, j, k, 2)*sk(i, j, k, 2) &
+                                                   + sk(i, j, k, 3)*sk(i, j, k, 3)) &
+                                            + sqrt(sk(i, j, n, 1)*sk(i, j, n, 1) &
+                                                   + sk(i, j, n, 2)*sk(i, j, n, 2) &
+                                                   + sk(i, j, n, 3)*sk(i, j, n, 3)))
 
-    enddo procLoop
+                                ! Multiply v1 by fact to obtain a nonDimensional
+                                ! quantity and take tha absolute value of it.
 
-  end subroutine writeNegVolumes
-  subroutine faceRotationMatrices(level, allocMem)
-    !
-    !       faceRotationMatrices computes the rotation matrices on the
-    !       faces, such that for a rotationally periodic the nonlinear
-    !       reconstruction in the upwind schemes is consistent with its
-    !       periodic neighbor.
-    !
-    use constants
-    use blockPointers
-    use inputDiscretization
-    use inputTimeSpectral
-    use section
-    use utils, only : setPointers, terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    logical,               intent(in) :: allocMem
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+                                v1(1) = abs(v1(1)*fact)
+                                v1(2) = abs(v1(2)*fact)
+                                v1(3) = abs(v1(3)*fact)
 
-    integer(kind=intType) :: nn, sps, mm
+                                ! Check if the control volume is closed.
 
-    real(kind=realType), dimension(:,:,:),   pointer :: xFace
-    real(kind=realType), dimension(:,:,:,:), pointer :: rotFace
+                                if (v1(1) > thresholdReal .or. &
+                                    v1(2) > thresholdReal .or. &
+                                    v1(3) > thresholdReal) &
+                                    call terminate("metric", &
+                                                   "Normals do not sum up to 0")
 
-    real(kind=realType), dimension(3) :: axis, vecR1, vecR2, rotCenter
+                            end do
+                        end do
+                    end do
 
-    ! If this is not the finest level, return. The matrices are
-    ! only needed on the finest grid.
+                end if debugging
 
-    if(level /= 1) return
+            end do domains
+        end do spectral
 
-    ! Check if an upwind scheme is used. If not, return.
+        ! Determine the global number of bad blocks. The result must be
+        ! known on all processors and thus an allreduce is needed.
 
-    if(spaceDiscr /= upwind) return
+        call mpi_allreduce(nBlockBad, nBlockBadGlobal, 1, adflow_integer, &
+                           mpi_sum, ADflow_comm_world, ierr)
+
+        ! Test if bad blocks are present in the grid. If so, the action
+        ! taken depends on the grid level.
+
+        if (nBlockBadGlobal > 0) then
+            if (level == 1) then
+
+                ! Negative volumes present on the fine grid level. Print a
+                ! list of the bad volumes and terminate executation.
+
+                call writeNegVolumes(checkVolDoms)
+
+                call returnFail("metric", &
+                                "Negative volumes present in grid.")
+                call mpi_barrier(ADflow_comm_world, ierr)
+
+            else
+
+                ! Coarser grid level. The fine grid is okay, but due to the
+                ! coarsening negative volumes are introduced. Print a warning.
+
+                if (myID == 0) then
+                    print "(a)", "#"
+                    print "(a)", "#                      Warning"
+                    print stringInt1, "#* Negative volumes present on coarse grid level ", level, "."
+                    print "(a)", "#* Computation continues, but be aware of this"
+                    print "(a)", "#"
+                end if
 
-    ! Check if a limiter is used at all. The rotation matrices are
-    ! only needed when a nonlinear construction is used.
+            end if
+        end if
+
+        ! Determine the global number of bad volumes. The result will
+        ! only be known on processor 0. The quality volume check will
+        ! only be done for the finest grid level.
+
+        if (level == 1) then
+            call mpi_reduce(nVolBad, nVolBadGlobal, 1, adflow_integer, &
+                            mpi_sum, 0, ADflow_comm_world, ierr)
+
+            ! Print a warning in case bad volumes were found. Only processor
+            ! 0 prints this warning.
+
+            if (myID == 0 .and. nVolBadGlobal > 0 .and. printWarnings) then
+                write (integerString, "(i10)") nVolBadGlobal
+                integerString = adjustl(integerString)
+                integerString = trim(integerString)
+                print "(a)", "#"
+                print "(a)", "#                      Warning"
+                print stringSpace, "#", trim(integerString), "bad quality volumes found."
+                print "(a)", "# Computation will continue, but be aware of this."
+                print "(a)", "#"
+            end if
+        end if
+
+        ! Release the memory of volumeIsNeg of all local blocks again.
+
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                deallocate (checkVolDoms(nn, sps)%volumeIsNeg, stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("metric", &
+                                   "Deallocation failure for volumeIsNeg")
+            end do
+        end do
+
+    contains
+
+        !        ================================================================
+
+        function volpym(xa, ya, za, xb, yb, zb, xc, yc, zc, xd, yd, zd)
+            !
+            !         volpym computes 6 times the volume of a pyramid. Node p,
+            !         whose coordinates are set in the subroutine metric itself,
+            !         is the top node and a-b-c-d is the quadrilateral surface.
+            !         It is assumed that the cross product vCa * vDb points in
+            !         the direction of the top node. Here vCa is the diagonal
+            !         running from node c to node a and vDb the diagonal from
+            !         node d to node b.
+            !
+            use precision
+            implicit none
+            !
+            !        Function type.
+            !
+            real(kind=realType) :: volpym
+            !
+            !        Function arguments.
+            !
+            real(kind=realType), intent(in) :: xa, ya, za, xb, yb, zb
+            real(kind=realType), intent(in) :: xc, yc, zc, xd, yd, zd
+
+            volpym = (xp - fourth*(xa + xb + xc + xd)) &
+                     *((ya - yc)*(zb - zd) - (za - zc)*(yb - yd)) + &
+                     (yp - fourth*(ya + yb + yc + yd)) &
+                     *((za - zc)*(xb - xd) - (xa - xc)*(zb - zd)) + &
+                     (zp - fourth*(za + zb + zc + zd)) &
+                     *((xa - xc)*(yb - yd) - (ya - yc)*(xb - xd))
+
+        end function volpym
+
+    end subroutine metric
+
+    !      ==================================================================
+
+    subroutine writeNegVolumes(checkVolDoms)
+        !
+        !       writeNegVolumes writes the negative volumes of a block to
+        !       stdout. If a block is flagged to have negative volumes it is
+        !       assumed that the block is intended to be a right handed block.
+        !
+        use constants
+        use blockPointers
+        use cgnsGrid
+        use communication
+        use inputPhysics
+        use inputTimeSpectral
+        use checkVolBlock
+        use utils, only: setPointers, terminate
+        use commonFormats, only: stringSpace
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        type(checkVolBlockType), &
+            dimension(nDom, nTimeIntervalsSpectral), intent(in) :: checkVolDoms
+        !
+        !      Local variables.
+        !
+        integer :: proc, ierr
+        integer(kind=intType) :: nn, sps, i, j, k
+        real(kind=realType), dimension(3) :: xc
+        character(len=10) :: intString1, intString2, intString3
+
+        ! Processor 0 prints a message that negative volumes are present
+        ! in the grid.
+
+        if (myID == 0) then
+            print "(a)", "#"
+            print "(a)", "#                      Error"
+            print "(a)", "# Negative volumes found in the grid."
+            print "(a)", "# A list of the negative volumes is printed below"
+            print "(a)", "#"
+        end if
+
+        ! Loop over the processors such that a clean output is obtained.
+        ! This may not be the most efficient solution, but that is not
+        ! an issue here.
+
+        procLoop: do proc = 0, (nProc - 1)
+
+            ! Test if I'm to one that must write my bad volumes.
+
+            testIWrite: if (proc == myID) then
+
+                ! Loop over the number of spectral solutions and local blocks.
+
+                spectral: do sps = 1, nTimeIntervalsSpectral
+                    domains: do nn = 1, nDom
+
+                        ! Test for a bad block.
+
+                        testBad: if (checkVolDoms(nn, sps)%blockHasNegVol) then
+
+                            ! Set the pointers for this block.
+
+                            call setPointers(nn, 1_intType, sps)
+
+                            ! Write the name of the block. The error message
+                            ! depends a bit on the case computed. For a time
+                            ! spectral solution also the spectral solution is
+                            ! mentioned, for steady and unsteady this is not
+                            ! the case, because there is only one.
+
+                            select case (equationMode)
+                            case (steady, unsteady)
+
+                                print "(a)", "#"
+                                print stringSpace, "# Block", trim(cgnsDoms(nbkGlobal)%zoneName), &
+                                    "contains the following negative volumes"
+                                print "(a)", "#================================&
+                                &===================================="
+                                print "(a)", "#"
+
+                                !====================================================
+
+                            case (timeSpectral)
+
+                                write (intString1, "(i10)") sps
+                                intString1 = adjustl(intString1)
+
+                                print "(a)", "#"
+                                print stringSpace, "# Spectral solution", trim(intString1), "block", &
+                                    trim(cgnsDoms(nbkGlobal)%zoneName), "contains the following negative volumes"
+                                print "(a)", "#===================================&
+                                &================================="
+                                print "(a)", "#"
+
+                            end select
+
+                            ! Loop over the owned volumes and write the
+                            ! negative ones.
+
+                            do k = 2, kl
+                                do j = 2, jl
+                                    do i = 2, il
+                                        if (checkVolDoms(nn, sps)%volumeIsNeg(i, j, k)) then
+
+                                            xc(1:3) = eighth*(x(i - 1, j - 1, k - 1, 1:3) &
+                                                              + x(i, j - 1, k - 1, 1:3) &
+                                                              + x(i - 1, j, k - 1, 1:3) &
+                                                              + x(i, j, k - 1, 1:3) &
+                                                              + x(i - 1, j - 1, k, 1:3) &
+                                                              + x(i, j - 1, k, 1:3) &
+                                                              + x(i - 1, j, k, 1:3) &
+                                                              + x(i, j, k, 1:3))
 
-    if(limiter == firstOrder .or. limiter == noLimiter) return
+                                            write (intString1, "(i10)") i
+                                            write (intString2, "(i10)") j
+                                            write (intString3, "(i10)") k
 
-    ! Check if rotational periodicity occurs at all. If not, there
-    ! is no need for the rotation matrices either.
+                                            intString1 = adjustl(intString1)
+                                            intString2 = adjustl(intString2)
+                                            intString3 = adjustl(intString3)
 
-    do nn=1,nSections
-       if(sections(nn)%nSlices > 1) exit
-    enddo
-    if(nn > nSections) return
+                                            print "(7(A), 4(ES10.3, A))", "# Indices (", trim(intString1), &
+                                                ",", trim(intString2), ",", trim(intString3), "), coordinates (", &
+                                                xc(1), ",", xc(2), ",", xc(3), "), Volume: ", -vol(i, j, k)
 
-    ! Loop over the number of blocks..
+                                        end if
+                                    end do
+                                end do
+                            end do
 
-    domains: do nn=1,nDom
+                        end if testBad
+                    end do domains
+                end do spectral
 
-       ! Hard coded section ID to 1
-       sectionID = 1
+            end if testIWrite
 
-       ! Determine the two unit vectors in the plane normal to
-       ! the rotation axis of this section.
+            ! Synchronize the processors to avoid a messy output.
 
-       axis      = sections(sectionID)%rotAxis
-       rotCenter = sections(sectionID)%rotCenter
-       call unitVectorsInAxialPlane(axis, vecR1, vecR2)
+            call mpi_barrier(ADflow_comm_world, ierr)
 
-       ! Loop over the number of time instances.
+        end do procLoop
 
-       spectral: do sps=1,nTimeIntervalsSpectral
+    end subroutine writeNegVolumes
+    subroutine faceRotationMatrices(level, allocMem)
+        !
+        !       faceRotationMatrices computes the rotation matrices on the
+        !       faces, such that for a rotationally periodic the nonlinear
+        !       reconstruction in the upwind schemes is consistent with its
+        !       periodic neighbor.
+        !
+        use constants
+        use blockPointers
+        use inputDiscretization
+        use inputTimeSpectral
+        use section
+        use utils, only: setPointers, terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        logical, intent(in) :: allocMem
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-          ! Check if the memory for the rotation matrices must be
-          ! allocated and do so if needed.
+        integer(kind=intType) :: nn, sps, mm
 
-          if( allocMem ) then
+        real(kind=realType), dimension(:, :, :), pointer :: xFace
+        real(kind=realType), dimension(:, :, :, :), pointer :: rotFace
 
-             il = flowDoms(nn,1,sps)%il
-             jl = flowDoms(nn,1,sps)%jl
-             kl = flowDoms(nn,1,sps)%kl
+        real(kind=realType), dimension(3) :: axis, vecR1, vecR2, rotCenter
 
-             allocate(flowDoms(nn,1,sps)%rotMatrixI(il,2:jl,2:kl,3,3), &
-                  flowDoms(nn,1,sps)%rotMatrixJ(2:il,jl,2:kl,3,3), &
-                  flowDoms(nn,1,sps)%rotMatrixK(2:il,2:jl,kl,3,3), &
-                  stat=ierr)
-             if(ierr /= 0)                            &
-                  call terminate("faceRotationMatrices", &
-                  "Memory allocation failure for the &
-                  &rotation matrices.")
-          endif
+        ! If this is not the finest level, return. The matrices are
+        ! only needed on the finest grid.
 
-          ! Set the pointers to this block.
+        if (level /= 1) return
 
-          call setPointers(nn,level,sps)
+        ! Check if an upwind scheme is used. If not, return.
 
-          ! The rotation matrices for the i-faces.
+        if (spaceDiscr /= upwind) return
 
-          do mm=1,il
-             xFace   => x(mm,1:,1:,:);
-             rotFace => rotMatrixI(mm,:,:,:,:)
+        ! Check if a limiter is used at all. The rotation matrices are
+        ! only needed when a nonlinear construction is used.
 
-             call computeRotMatrixFace(xFace, rotFace, jl, kl)
-          enddo
+        if (limiter == firstOrder .or. limiter == noLimiter) return
 
-          ! The rotation matrices for the j-faces.
+        ! Check if rotational periodicity occurs at all. If not, there
+        ! is no need for the rotation matrices either.
 
-          do mm=1,jl
-             xFace   => x(1:,mm,1:,:);
-             rotFace => rotMatrixJ(:,mm,:,:,:)
+        do nn = 1, nSections
+            if (sections(nn)%nSlices > 1) exit
+        end do
+        if (nn > nSections) return
 
-             call computeRotMatrixFace(xFace, rotFace, il, kl)
-          enddo
+        ! Loop over the number of blocks..
 
-          ! The rotation matrices for the k-faces.
+        domains: do nn = 1, nDom
 
-          do mm=1,kl
-             xFace   => x(1:,1:,mm,:);
-             rotFace => rotMatrixK(:,:,mm,:,:)
+            ! Hard coded section ID to 1
+            sectionID = 1
 
-             call computeRotMatrixFace(xFace, rotFace, il, jl)
-          enddo
+            ! Determine the two unit vectors in the plane normal to
+            ! the rotation axis of this section.
 
-       enddo spectral
-    enddo domains
+            axis = sections(sectionID)%rotAxis
+            rotCenter = sections(sectionID)%rotCenter
+            call unitVectorsInAxialPlane(axis, vecR1, vecR2)
 
-    !=================================================================
+            ! Loop over the number of time instances.
 
-  contains
+            spectral: do sps = 1, nTimeIntervalsSpectral
 
-    !===============================================================
+                ! Check if the memory for the rotation matrices must be
+                ! allocated and do so if needed.
 
-    subroutine computeRotMatrixFace(xx, rotMat, iil, jjl)
-      !
-      !         computeRotMatrixFace is an internal subroutine, which
-      !         computes the rotation matrix from Cartesian to local
-      !         cylindrical velocity components for the face centers.
-      !
-      implicit none
-      !
-      !        Subroutine arguments.
-      !
-      integer(kind=intType), intent(in) :: iil, jjl
+                if (allocMem) then
 
-      real(kind=realType), dimension(:,:,:),     intent(in)  :: xx
-      real(kind=realType), dimension(2:,2:,:,:), intent(out) :: rotMat
-      !
-      !        Local variables.
-      !
-      integer(kind=intType) :: i, j
+                    il = flowDoms(nn, 1, sps)%il
+                    jl = flowDoms(nn, 1, sps)%jl
+                    kl = flowDoms(nn, 1, sps)%kl
 
-      real(kind=realType) :: r1, r2, rInv, cosTheta, sinTheta
+                    allocate (flowDoms(nn, 1, sps)%rotMatrixI(il, 2:jl, 2:kl, 3, 3), &
+                              flowDoms(nn, 1, sps)%rotMatrixJ(2:il, jl, 2:kl, 3, 3), &
+                              flowDoms(nn, 1, sps)%rotMatrixK(2:il, 2:jl, kl, 3, 3), &
+                              stat=ierr)
+                    if (ierr /= 0) &
+                         call terminate("faceRotationMatrices", &
+                         "Memory allocation failure for the &
+                         &rotation matrices.")
+                end if
 
-      real(kind=realType), dimension(3) :: xF
+                ! Set the pointers to this block.
 
-      ! Loop over the face centers.
+                call setPointers(nn, level, sps)
 
-      do j=2,jjl
-         do i=2,iil
+                ! The rotation matrices for the i-faces.
 
-            ! Compute the coordinates of the face center relative to
-            ! the center of rotation.
+                do mm = 1, il
+                    xFace => x(mm, 1:, 1:, :); 
+                    rotFace => rotMatrixI(mm, :, :, :, :)
 
-            xF(1) = fourth*(xx(i-1,j-1,1) + xx(i-1,j,1) &
-                 +         xx(i,  j-1,1) + xx(i,  j,1)) - rotCenter(1)
-            xF(2) = fourth*(xx(i-1,j-1,2) + xx(i-1,j,2) &
-                 +         xx(i,  j-1,2) + xx(i,  j,2)) - rotCenter(2)
-            xF(3) = fourth*(xx(i-1,j-1,3) + xx(i-1,j,3) &
-                 +         xx(i,  j-1,3) + xx(i,  j,3)) - rotCenter(3)
-
-            ! Determine the two radial components for this point.
-
-            r1 = xF(1)*vecR1(1) + xF(2)*vecR1(2) + xF(3)*vecR1(3)
-            r2 = xF(1)*vecR2(1) + xF(2)*vecR2(2) + xF(3)*vecR2(3)
-
-            ! Determine the sine and cosine of the polar angle.
-
-            rInv     = one/sqrt(r1*r1 + r2*r2)
-            cosTheta = r1*rInv
-            sinTheta = r2*rInv
-
-            ! Compute the transformation matrix.
-
-            rotMat(i,j,1,1) = axis(1)
-            rotMat(i,j,1,2) = axis(2)
-            rotMat(i,j,1,3) = axis(3)
-
-            rotMat(i,j,2,1) = cosTheta*vecR1(1) + sinTheta*vecR2(1)
-            rotMat(i,j,2,2) = cosTheta*vecR1(2) + sinTheta*vecR2(2)
-            rotMat(i,j,2,3) = cosTheta*vecR1(3) + sinTheta*vecR2(3)
-
-            rotMat(i,j,3,1) = cosTheta*vecR2(1) - sinTheta*vecR1(1)
-            rotMat(i,j,3,2) = cosTheta*vecR2(2) - sinTheta*vecR1(2)
-            rotMat(i,j,3,3) = cosTheta*vecR2(3) - sinTheta*vecR1(3)
-
-         enddo
-      enddo
-
-    end subroutine computeRotMatrixFace
-
-  end subroutine faceRotationMatrices
-  subroutine updateCoordinatesAllLevels
-    !
-    !       updateCoordinatesAllLevels updates the coordinates of all
-    !       grid levels, assuming that the owned coordinates of the fine
-    !       grid are known.
-    !
-    use constants
-    use block
-    use iteration
-    use inputTimeSpectral
-    use blockPointers
-    use coarseUtils, only :coarseOwnedCoordinates
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nLevels, nn
-    real(kind=realType)   :: origGroundLevel
-
-    ! Determine the halo coordinates of the fine level.
-    origGroundLevel = groundLevel
-    groundLevel     = 1
-    call xhalo(groundLevel)
-
-    ! Loop over the coarse grid levels; first the owned coordinates
-    ! are determined, followed by the halo's.
-
-    nLevels = ubound(flowDoms,2)
-    do nn=(groundLevel+1),nLevels
-       call coarseOwnedCoordinates(nn)
-       call xhalo(nn)
-    enddo
-
-    groundLevel = origGroundLevel
-
-  end subroutine updateCoordinatesAllLevels
-
-  !      ==================================================================
-
-  subroutine updateMetricsAllLevels
-    !
-    !       updateMetricsAllLevels recomputes the metrics on all grid
-    !       levels. This routine is typically called when the coordinates
-    !       have changed, but the connectivity remains the same, i.e. for
-    !       moving or deforming mesh problems.
-    !
-    use constants
-    use block
-    use iteration
-    use inputphysics
-    use inputIteration
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nLevels, nn
-
-    ! Loop over the grid levels and call metric and checkSymmetry.
-
-    nLevels = ubound(flowDoms,2)
-    do nn=groundLevel,nLevels
-       if(equationMode == unsteady) then
-          call metric(nn)
-       else
-          call metric(nn)
-       end if
-
-       if (printWarnings) then
-          call checkSymmetry(nn)
-       end if
-    enddo
-
-  end subroutine updateMetricsAllLevels
-
-  subroutine updateGridVelocitiesAllLevels
-
-    !
-    !       updateGridVelocitesAllLevels recomputes the rotational
-    !       parameters on all grid
-    !       levels. This routine is typically called when the coordinates
-    !       have changed, but the connectivity remains the same, i.e. for
-    !       moving or deforming mesh problems.
-    !
-    use constants
-    use block
-    use iteration
-    use section
-    use monitor
-    use inputTimeSpectral
-    use inputPhysics
-    use solverUtils
-    implicit none
-
-    !subroutine variables
-
-    !Local Variables
-
-    integer(kind=inttype):: mm,nnn
-
-    real(kind=realType), dimension(nSections) :: t
-
-    do mm=1,nTimeIntervalsSpectral
-
-       ! Compute the time, which corresponds to this spectral solution.
-       ! For steady and unsteady mode this is simply the restart time;
-       ! for the spectral mode the periodic time must be taken into
-       ! account, which can be different for every section.
-
-       t = timeUnsteadyRestart
-
-       if(equationMode == timeSpectral) then
-          do nnn=1,nSections
-             t(nnn) = t(nnn) + (mm-1)*sections(nnn)%timePeriod &
-                  /         real(nTimeIntervalsSpectral,realType)
-          enddo
-       endif
-
-       call gridVelocitiesFineLevel(.false., t, mm)
-
-       call gridVelocitiesCoarseLevels(mm)
-       call normalVelocitiesAllLevels(mm)
-
-       call slipVelocitiesFineLevel(.false., t, mm)
-       call slipVelocitiesCoarseLevels(mm)
-
-    enddo
-
-  end subroutine updateGridVelocitiesAllLevels
-
-  subroutine updatePeriodicInfoAllLevels
-
-
-    !
-    !       updatePeriodicInfoAllLevels recomputes the spectral parameters
-    !       on all grid levels. This routine is typically called when the
-    !       frequnecy or amplitude of the oscillation in the time spectral
-    !       computation has changed
-    !
-    use block
-    use iteration
-    use section
-    use monitor
-    use inputTimeSpectral
-    use inputPhysics
-    use communication
-    use initializeFlow, onlY : timeSpectralMatrices
-    use partitioning, only : fineGridSpectralCoor, timeRotMatricesSpectral, &
-         timePeriodSpectral
-    !
-    implicit none
-
-    ! Determine for the time spectral mode the time of one period,
-    ! the rotation matrices for the velocity components and
-    ! create the fine grid coordinates of all time spectral locations.
-
-    call timePeriodSpectral
-    call timeRotMatricesSpectral
-    ! solve for the new grid only for rigid rotation with analytical deformation case
-    if (.NOT. usetsinterpolatedgridvelocity) then 
-       call fineGridSpectralCoor
-    end if
-    call timeSpectralMatrices
-
-
-  end subroutine updatePeriodicInfoAllLevels
-  subroutine unitVectorsInAxialPlane(axis, vecR1, vecR2)
-    !
-    !       unitVectorsInAxialPlane computes from the given unit vector
-    !       axis the two unit vectors which describe the plane normal to
-    !       axis. There is of course an ambiguity in this choice, but this
-    !       is not a problem as long as the choice is consistent
-    !       throughout the code.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    real(kind=realType), dimension(3), intent(in)  :: axis
-    real(kind=realType), dimension(3), intent(out) :: vecR1, vecR2
-    !
-    !      Local variables.
-    !
-    real(kind=realType) :: dot
-
-    ! The vectors which span the axial plane must be normal to axis.
-    ! For the first vector try first the y-axis. If not good enough
-    ! use the z-axis.
-
-    if(abs(axis(2)) < 0.707107_realType) then
-       vecR1(1) = zero
-       vecR1(2) = one
-       vecR1(3) = zero
-    else
-       vecR1(1) = zero
-       vecR1(2) = zero
-       vecR1(3) = one
-    endif
-
-    ! Make sure that vecR1 is normal to axis. Create a unit
-    ! vector again.
-
-    dot = vecR1(1)*axis(1) + vecR1(2)*axis(2) + vecR1(3)*axis(3)
-    vecR1(1) = vecR1(1) - dot*axis(1)
-    vecR1(2) = vecR1(2) - dot*axis(2)
-    vecR1(3) = vecR1(3) - dot*axis(3)
-
-    dot = one/sqrt(vecR1(1)**2 + vecR1(2)**2 + vecR1(3)**2)
-    vecR1(1) = vecR1(1)*dot
-    vecR1(2) = vecR1(2)*dot
-    vecR1(3) = vecR1(3)*dot
-
-    ! Create the second vector which spans the axial plane. This must
-    ! be normal to both axis and vecR1, i.e. the cross-product.
-
-    vecR2(1) = axis(2)*vecR1(3) - axis(3)*vecR1(2)
-    vecR2(2) = axis(3)*vecR1(1) - axis(1)*vecR1(3)
-    vecR2(3) = axis(1)*vecR1(2) - axis(2)*vecR1(1)
-
-  end subroutine unitVectorsInAxialPlane
-
-  subroutine preprocessingADjoint
-    !
-    !      Perform the preprocessing tasks for the adjoint solver. This
-    !      routine is called only once. The memory allcoated here is
-    !      deallocated in src/utils/releaseMemory.f90
-    !
-    use constants
-    use communication, only : adflow_comm_world
-    use adjointVars, only :nCellsLocal, nNOdesLocal
-    use flowVarRefState, only : nw, nwf
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputAdjoint, only : frozenTurbulence
-    use ADjointPETSc, only: w_like1, w_like2, PETScIerr, &
-         psi_like1, psi_like2, x_like, psi_like3, adjointPETScPreProcVarsAllocated
-    use utils, only : setPointers, EChk
+                    call computeRotMatrixFace(xFace, rotFace, jl, kl)
+                end do
+
+                ! The rotation matrices for the j-faces.
+
+                do mm = 1, jl
+                    xFace => x(1:, mm, 1:, :); 
+                    rotFace => rotMatrixJ(:, mm, :, :, :)
+
+                    call computeRotMatrixFace(xFace, rotFace, il, kl)
+                end do
+
+                ! The rotation matrices for the k-faces.
+
+                do mm = 1, kl
+                    xFace => x(1:, 1:, mm, :); 
+                    rotFace => rotMatrixK(:, :, mm, :, :)
+
+                    call computeRotMatrixFace(xFace, rotFace, il, jl)
+                end do
+
+            end do spectral
+        end do domains
+
+        !=================================================================
+
+    contains
+
+        !===============================================================
+
+        subroutine computeRotMatrixFace(xx, rotMat, iil, jjl)
+            !
+            !         computeRotMatrixFace is an internal subroutine, which
+            !         computes the rotation matrix from Cartesian to local
+            !         cylindrical velocity components for the face centers.
+            !
+            implicit none
+            !
+            !        Subroutine arguments.
+            !
+            integer(kind=intType), intent(in) :: iil, jjl
+
+            real(kind=realType), dimension(:, :, :), intent(in)  :: xx
+            real(kind=realType), dimension(2:, 2:, :, :), intent(out) :: rotMat
+            !
+            !        Local variables.
+            !
+            integer(kind=intType) :: i, j
+
+            real(kind=realType) :: r1, r2, rInv, cosTheta, sinTheta
+
+            real(kind=realType), dimension(3) :: xF
+
+            ! Loop over the face centers.
+
+            do j = 2, jjl
+                do i = 2, iil
+
+                    ! Compute the coordinates of the face center relative to
+                    ! the center of rotation.
+
+                    xF(1) = fourth*(xx(i - 1, j - 1, 1) + xx(i - 1, j, 1) &
+                                    + xx(i, j - 1, 1) + xx(i, j, 1)) - rotCenter(1)
+                    xF(2) = fourth*(xx(i - 1, j - 1, 2) + xx(i - 1, j, 2) &
+                                    + xx(i, j - 1, 2) + xx(i, j, 2)) - rotCenter(2)
+                    xF(3) = fourth*(xx(i - 1, j - 1, 3) + xx(i - 1, j, 3) &
+                                    + xx(i, j - 1, 3) + xx(i, j, 3)) - rotCenter(3)
+
+                    ! Determine the two radial components for this point.
+
+                    r1 = xF(1)*vecR1(1) + xF(2)*vecR1(2) + xF(3)*vecR1(3)
+                    r2 = xF(1)*vecR2(1) + xF(2)*vecR2(2) + xF(3)*vecR2(3)
+
+                    ! Determine the sine and cosine of the polar angle.
+
+                    rInv = one/sqrt(r1*r1 + r2*r2)
+                    cosTheta = r1*rInv
+                    sinTheta = r2*rInv
+
+                    ! Compute the transformation matrix.
+
+                    rotMat(i, j, 1, 1) = axis(1)
+                    rotMat(i, j, 1, 2) = axis(2)
+                    rotMat(i, j, 1, 3) = axis(3)
+
+                    rotMat(i, j, 2, 1) = cosTheta*vecR1(1) + sinTheta*vecR2(1)
+                    rotMat(i, j, 2, 2) = cosTheta*vecR1(2) + sinTheta*vecR2(2)
+                    rotMat(i, j, 2, 3) = cosTheta*vecR1(3) + sinTheta*vecR2(3)
+
+                    rotMat(i, j, 3, 1) = cosTheta*vecR2(1) - sinTheta*vecR1(1)
+                    rotMat(i, j, 3, 2) = cosTheta*vecR2(2) - sinTheta*vecR1(2)
+                    rotMat(i, j, 3, 3) = cosTheta*vecR2(3) - sinTheta*vecR1(3)
+
+                end do
+            end do
+
+        end subroutine computeRotMatrixFace
+
+    end subroutine faceRotationMatrices
+    subroutine updateCoordinatesAllLevels
+        !
+        !       updateCoordinatesAllLevels updates the coordinates of all
+        !       grid levels, assuming that the owned coordinates of the fine
+        !       grid are known.
+        !
+        use constants
+        use block
+        use iteration
+        use inputTimeSpectral
+        use blockPointers
+        use coarseUtils, only: coarseOwnedCoordinates
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nLevels, nn
+        real(kind=realType)   :: origGroundLevel
+
+        ! Determine the halo coordinates of the fine level.
+        origGroundLevel = groundLevel
+        groundLevel = 1
+        call xhalo(groundLevel)
+
+        ! Loop over the coarse grid levels; first the owned coordinates
+        ! are determined, followed by the halo's.
+
+        nLevels = ubound(flowDoms, 2)
+        do nn = (groundLevel + 1), nLevels
+            call coarseOwnedCoordinates(nn)
+            call xhalo(nn)
+        end do
+
+        groundLevel = origGroundLevel
+
+    end subroutine updateCoordinatesAllLevels
+
+    !      ==================================================================
+
+    subroutine updateMetricsAllLevels
+        !
+        !       updateMetricsAllLevels recomputes the metrics on all grid
+        !       levels. This routine is typically called when the coordinates
+        !       have changed, but the connectivity remains the same, i.e. for
+        !       moving or deforming mesh problems.
+        !
+        use constants
+        use block
+        use iteration
+        use inputphysics
+        use inputIteration
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nLevels, nn
+
+        ! Loop over the grid levels and call metric and checkSymmetry.
+
+        nLevels = ubound(flowDoms, 2)
+        do nn = groundLevel, nLevels
+            if (equationMode == unsteady) then
+                call metric(nn)
+            else
+                call metric(nn)
+            end if
+
+            if (printWarnings) then
+                call checkSymmetry(nn)
+            end if
+        end do
+
+    end subroutine updateMetricsAllLevels
+
+    subroutine updateGridVelocitiesAllLevels
+
+        !
+        !       updateGridVelocitesAllLevels recomputes the rotational
+        !       parameters on all grid
+        !       levels. This routine is typically called when the coordinates
+        !       have changed, but the connectivity remains the same, i.e. for
+        !       moving or deforming mesh problems.
+        !
+        use constants
+        use block
+        use iteration
+        use section
+        use monitor
+        use inputTimeSpectral
+        use inputPhysics
+        use solverUtils
+        implicit none
+
+        !subroutine variables
+
+        !Local Variables
+
+        integer(kind=inttype):: mm, nnn
+
+        real(kind=realType), dimension(nSections) :: t
+
+        do mm = 1, nTimeIntervalsSpectral
+
+            ! Compute the time, which corresponds to this spectral solution.
+            ! For steady and unsteady mode this is simply the restart time;
+            ! for the spectral mode the periodic time must be taken into
+            ! account, which can be different for every section.
+
+            t = timeUnsteadyRestart
+
+            if (equationMode == timeSpectral) then
+                do nnn = 1, nSections
+                    t(nnn) = t(nnn) + (mm - 1)*sections(nnn)%timePeriod &
+                             /real(nTimeIntervalsSpectral, realType)
+                end do
+            end if
+
+            call gridVelocitiesFineLevel(.false., t, mm)
+
+            call gridVelocitiesCoarseLevels(mm)
+            call normalVelocitiesAllLevels(mm)
+
+            call slipVelocitiesFineLevel(.false., t, mm)
+            call slipVelocitiesCoarseLevels(mm)
+
+        end do
+
+    end subroutine updateGridVelocitiesAllLevels
+
+    subroutine updatePeriodicInfoAllLevels
+
+        !
+        !       updatePeriodicInfoAllLevels recomputes the spectral parameters
+        !       on all grid levels. This routine is typically called when the
+        !       frequnecy or amplitude of the oscillation in the time spectral
+        !       computation has changed
+        !
+        use block
+        use iteration
+        use section
+        use monitor
+        use inputTimeSpectral
+        use inputPhysics
+        use communication
+        use initializeFlow, onlY: timeSpectralMatrices
+        use partitioning, only: fineGridSpectralCoor, timeRotMatricesSpectral, &
+                                timePeriodSpectral
+        !
+        implicit none
+
+        ! Determine for the time spectral mode the time of one period,
+        ! the rotation matrices for the velocity components and
+        ! create the fine grid coordinates of all time spectral locations.
+
+        call timePeriodSpectral
+        call timeRotMatricesSpectral
+        ! solve for the new grid only for rigid rotation with analytical deformation case
+        if (.NOT. usetsinterpolatedgridvelocity) then
+            call fineGridSpectralCoor
+        end if
+        call timeSpectralMatrices
+
+    end subroutine updatePeriodicInfoAllLevels
+    subroutine unitVectorsInAxialPlane(axis, vecR1, vecR2)
+        !
+        !       unitVectorsInAxialPlane computes from the given unit vector
+        !       axis the two unit vectors which describe the plane normal to
+        !       axis. There is of course an ambiguity in this choice, but this
+        !       is not a problem as long as the choice is consistent
+        !       throughout the code.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), dimension(3), intent(in)  :: axis
+        real(kind=realType), dimension(3), intent(out) :: vecR1, vecR2
+        !
+        !      Local variables.
+        !
+        real(kind=realType) :: dot
+
+        ! The vectors which span the axial plane must be normal to axis.
+        ! For the first vector try first the y-axis. If not good enough
+        ! use the z-axis.
+
+        if (abs(axis(2)) < 0.707107_realType) then
+            vecR1(1) = zero
+            vecR1(2) = one
+            vecR1(3) = zero
+        else
+            vecR1(1) = zero
+            vecR1(2) = zero
+            vecR1(3) = one
+        end if
+
+        ! Make sure that vecR1 is normal to axis. Create a unit
+        ! vector again.
+
+        dot = vecR1(1)*axis(1) + vecR1(2)*axis(2) + vecR1(3)*axis(3)
+        vecR1(1) = vecR1(1) - dot*axis(1)
+        vecR1(2) = vecR1(2) - dot*axis(2)
+        vecR1(3) = vecR1(3) - dot*axis(3)
+
+        dot = one/sqrt(vecR1(1)**2 + vecR1(2)**2 + vecR1(3)**2)
+        vecR1(1) = vecR1(1)*dot
+        vecR1(2) = vecR1(2)*dot
+        vecR1(3) = vecR1(3)*dot
+
+        ! Create the second vector which spans the axial plane. This must
+        ! be normal to both axis and vecR1, i.e. the cross-product.
+
+        vecR2(1) = axis(2)*vecR1(3) - axis(3)*vecR1(2)
+        vecR2(2) = axis(3)*vecR1(1) - axis(1)*vecR1(3)
+        vecR2(3) = axis(1)*vecR1(2) - axis(2)*vecR1(1)
+
+    end subroutine unitVectorsInAxialPlane
+
+    subroutine preprocessingADjoint
+        !
+        !      Perform the preprocessing tasks for the adjoint solver. This
+        !      routine is called only once. The memory allcoated here is
+        !      deallocated in src/utils/releaseMemory.f90
+        !
+        use constants
+        use communication, only: adflow_comm_world
+        use adjointVars, only: nCellsLocal, nNOdesLocal
+        use flowVarRefState, only: nw, nwf
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputAdjoint, only: frozenTurbulence
+        use ADjointPETSc, only: w_like1, w_like2, PETScIerr, &
+                                psi_like1, psi_like2, x_like, psi_like3, adjointPETScPreProcVarsAllocated
+        use utils, only: setPointers, EChk
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    !     Local variables.
-    !
-    integer(kind=intType) :: ndimW, ncell, nState, nDimPsi, nDimX
-    !
-    !
-    ! Create PETSc Vectors that are actually empty. These do NOT take
-    ! any (substantial) memory. We want to keep these around inbetween
-    ! creations/deletions of adjoint/NKsolver memory
-
-    ! Setup number of state variable based on turbulence assumption
-    if ( frozenTurbulence ) then
-       nState = nwf
-    else
-       nState = nw
-    endif
-
-    nDimW = nw * nCellsLocal(1_intType)*nTimeIntervalsSpectral
-    nDimPsi = nState*  nCellsLocal(1_intType)*nTimeIntervalsSpectral
-    nDimX = 3 * nNodesLocal(1_intType)*nTimeIntervalsSpectral
-
-    ! Two w-like vectors.
-    call VecCreateMPIWithArray(ADFLOW_COMM_WORLD,nw,ndimW,PETSC_DECIDE, &
-         PETSC_NULL_SCALAR,w_like1,PETScIerr)
-    call EChk(PETScIerr,__FILE__,__LINE__)
-
-    call VecCreateMPIWithArray(ADFLOW_COMM_WORLD,nw,ndimW,PETSC_DECIDE, &
-         PETSC_NULL_SCALAR,w_like2,PETScIerr)
-    call EChk(PETScIerr,__FILE__,__LINE__)
-
-    ! Two psi-like vectors.
-    call VecCreateMPIWithArray(ADFLOW_COMM_WORLD,nState,ndimPsi,PETSC_DECIDE, &
-         PETSC_NULL_SCALAR,psi_like1,PETScIerr)
-    call EChk(PETScIerr,__FILE__,__LINE__)
-
-    call VecCreateMPIWithArray(ADFLOW_COMM_WORLD,nstate,ndimPsi,PETSC_DECIDE, &
-         PETSC_NULL_SCALAR,psi_like2,PETScIerr)
-    call EChk(PETScIerr,__FILE__,__LINE__)
-
-    call VecCreateMPIWithArray(ADFLOW_COMM_WORLD,nstate,ndimPsi,PETSC_DECIDE, &
-         PETSC_NULL_SCALAR,psi_like3,PETScIerr)
-    call EChk(PETScIerr,__FILE__,__LINE__)
-
-    call VecCreateMPIWithArray(ADFLOW_COMM_WORLD,3,ndimX,PETSC_DECIDE, &
-         PETSC_NULL_SCALAR,x_like,PETScIerr)
-    call EChk(PETScIerr,__FILE__,__LINE__)
-    
-    adjointPETScPreProcVarsAllocated = .True.
-    
-    ! Need to initialize the stencils as well, only once:
-    call initialize_stencils
-
-  end subroutine preprocessingADjoint
-
-  subroutine updateReferencePoint
-    !
-    !       reruns the initialization routines to update AOA and other
-    !       flow variables after a design change
-    !
-    use constants
-    use inputTimeSpectral, only : nTimeINTervalsSpectral
-    use section, only : sections, nSections
-    use inputPhysics, only : equationMode
-    use inputMotion, only : rotPoint
-    use cgnsGrid, only : cgnsDoms
-    use monitor, only : timeUnsteadyRestart
-    use iteration, only : groundLevel
-    use blockpointers, only : nDom, nbkGlobal
-    use utils, onlY : setPointers
-    use solverUtils
-    implicit none
-
-    ! Working variables
-    integer(kind=intType) ::mm,nnn,nn
-    real(kind=realType), dimension(nSections) :: t
-
-    groundlevel = 1
-    do mm=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-          ! Set the pointers for this block.
-          call setPointers(nn, groundLevel, mm)
-          !lref is outside
-          cgnsDoms(nbkglobal)%rotCenter = rotPoint
-       enddo
-    enddo
-
-    groundlevel = 1
-    do mm=1,nTimeIntervalsSpectral
-
-       ! Compute the time, which corresponds to this spectral solution.
-       ! For steady and unsteady mode this is simply the restart time;
-       ! for the spectral mode the periodic time must be taken into
-       ! account, which can be different for every section.
-       t = timeUnsteadyRestart
-
-       if(equationMode == timeSpectral) then
-          do nnn=1,nSections
-             t(nnn) = t(nnn) + (mm-1)*sections(nnn)%timePeriod &
-                  /         real(nTimeIntervalsSpectral,realType)
-          enddo
-       endif
-
-       call gridVelocitiesFineLevel(.false., t, mm)
-       call gridVelocitiesCoarseLevels(mm)
-       call normalVelocitiesAllLevels(mm)
-       call slipVelocitiesFineLevel(.false., t, mm)
-       call slipVelocitiesCoarseLevels(mm)
-    enddo
-  end subroutine updateReferencePoint
-
-  subroutine updateRotationRate(rotCenter, rotRate, blocks, nblocks)
-
-    use constants
-    use inputTimeSpectral, only : nTimeInTervalsSpectral
-    use section, only : sections, nSections
-    use inputPhysics, only : equationMode
-    use inputMotion, only : rotPoint
-    use cgnsGrid, only : cgnsDoms
-    use monitor, only : timeUnsteadyRestart
-    use iteration, only : groundLevel
-    use blockpointers, only : nDom, nbkGlobal, flowDoms
-    use solverUtils
-    implicit none
-
-    real(kind=realType),intent(in)::rotCenter(3), rotRate(3)
-    integer(kind=intType), intent(in) :: nblocks
-    integer(kind=intType), intent(in) :: blocks(nblocks)
-
-    integer(kind=intType) ::mm,nnn,nn, level, sps, i
-    real(kind=realType), dimension(nSections) :: t
-
-    groundlevel = 1
-
-    do nn=1,nblocks
-       cgnsDoms(nn)%rotRate = rotRate
-       cgnsDoms(nn)%rotCenter = rotCenter
-
-       do i=1,cgnsDoms(nn)%nBocos
-          cgnsDoms(nn)%bocoInfo(i)%rotRate = rotRate
-       end do
-    enddo
-
-    do sps=1,nTimeIntervalsSpectral
-       do level=1,ubound(flowDoms,2)
-          do nn=1,nDom
-             flowDoms(nn,level,sps)%blockIsMoving = .True.
-             flowDoms(nn,level,sps)%addGridVelocities = .True.
-          end do
-       end do
-    end do
-    groundlevel = 1
-    do mm=1,nTimeIntervalsSpectral
-
-       ! Compute the time, which corresponds to this spectral solution.
-       ! For steady and unsteady mode this is simply the restart time;
-       ! for the spectral mode the periodic time must be taken into
-       ! account, which can be different for every section.
-
-       t = timeUnsteadyRestart
-
-       if(equationMode == timeSpectral) then
-          do nnn=1,nSections
-             t(nnn) = t(nnn) + (mm-1)*sections(nnn)%timePeriod &
-                  /         real(nTimeIntervalsSpectral,realType)
-          enddo
-       endif
-
-       call gridVelocitiesFineLevel(.false., t, mm)
-       call gridVelocitiesCoarseLevels(mm)
-       call normalVelocitiesAllLevels(mm)
-       call slipVelocitiesFineLevel(.false., t, mm)
-       call slipVelocitiesCoarseLevels(mm)
-
-    enddo
-
-  end subroutine updateRotationRate
-
-  subroutine updateSurfaceRoughness(ks_in, famList, nFamList)
-
-    use constants
-    use blockPointers
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers
-    use sorting, only : famInList
-    implicit none
-
-    real(kind=realType), intent(in) :: ks_in
-    integer(kind=intType), intent(in) :: nFamList, famList(nFamList)
-
-    integer(kind=intType) :: nLevels, level, sps, nn, mm
-
-    nLevels = ubound(flowDoms,2)
-
-    do level=1, nLevels
-       do sps=1, nTimeIntervalsSpectral
-          do nn=1,nDom
-             call setPointers(nn, level, sps)
-
-             ! Loop over the number of boundary subfaces of this block.
-             do mm=1,nBocos
-
-                if ( .not. famInList(BCData(mm)%famID, famList)) then
-                   cycle
-                end if
+        use petsc
+        implicit none
+
+        !     Local variables.
+        !
+        integer(kind=intType) :: ndimW, ncell, nState, nDimPsi, nDimX
+        !
+        !
+        ! Create PETSc Vectors that are actually empty. These do NOT take
+        ! any (substantial) memory. We want to keep these around inbetween
+        ! creations/deletions of adjoint/NKsolver memory
+
+        ! Setup number of state variable based on turbulence assumption
+        if (frozenTurbulence) then
+            nState = nwf
+        else
+            nState = nw
+        end if
+
+        nDimW = nw*nCellsLocal(1_intType)*nTimeIntervalsSpectral
+        nDimPsi = nState*nCellsLocal(1_intType)*nTimeIntervalsSpectral
+        nDimX = 3*nNodesLocal(1_intType)*nTimeIntervalsSpectral
+
+        ! Two w-like vectors.
+        call VecCreateMPIWithArray(ADFLOW_COMM_WORLD, nw, ndimW, PETSC_DECIDE, &
+                                   PETSC_NULL_SCALAR, w_like1, PETScIerr)
+        call EChk(PETScIerr, __FILE__, __LINE__)
+
+        call VecCreateMPIWithArray(ADFLOW_COMM_WORLD, nw, ndimW, PETSC_DECIDE, &
+                                   PETSC_NULL_SCALAR, w_like2, PETScIerr)
+        call EChk(PETScIerr, __FILE__, __LINE__)
+
+        ! Two psi-like vectors.
+        call VecCreateMPIWithArray(ADFLOW_COMM_WORLD, nState, ndimPsi, PETSC_DECIDE, &
+                                   PETSC_NULL_SCALAR, psi_like1, PETScIerr)
+        call EChk(PETScIerr, __FILE__, __LINE__)
+
+        call VecCreateMPIWithArray(ADFLOW_COMM_WORLD, nstate, ndimPsi, PETSC_DECIDE, &
+                                   PETSC_NULL_SCALAR, psi_like2, PETScIerr)
+        call EChk(PETScIerr, __FILE__, __LINE__)
+
+        call VecCreateMPIWithArray(ADFLOW_COMM_WORLD, nstate, ndimPsi, PETSC_DECIDE, &
+                                   PETSC_NULL_SCALAR, psi_like3, PETScIerr)
+        call EChk(PETScIerr, __FILE__, __LINE__)
+
+        call VecCreateMPIWithArray(ADFLOW_COMM_WORLD, 3, ndimX, PETSC_DECIDE, &
+                                   PETSC_NULL_SCALAR, x_like, PETScIerr)
+        call EChk(PETScIerr, __FILE__, __LINE__)
+
+        adjointPETScPreProcVarsAllocated = .True.
+
+        ! Need to initialize the stencils as well, only once:
+        call initialize_stencils
+
+    end subroutine preprocessingADjoint
+
+    subroutine updateReferencePoint
+        !
+        !       reruns the initialization routines to update AOA and other
+        !       flow variables after a design change
+        !
+        use constants
+        use inputTimeSpectral, only: nTimeINTervalsSpectral
+        use section, only: sections, nSections
+        use inputPhysics, only: equationMode
+        use inputMotion, only: rotPoint
+        use cgnsGrid, only: cgnsDoms
+        use monitor, only: timeUnsteadyRestart
+        use iteration, only: groundLevel
+        use blockpointers, only: nDom, nbkGlobal
+        use utils, onlY: setPointers
+        use solverUtils
+        implicit none
+
+        ! Working variables
+        integer(kind=intType) ::mm, nnn, nn
+        real(kind=realType), dimension(nSections) :: t
+
+        groundlevel = 1
+        do mm = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                ! Set the pointers for this block.
+                call setPointers(nn, groundLevel, mm)
+                !lref is outside
+                cgnsDoms(nbkglobal)%rotCenter = rotPoint
+            end do
+        end do
+
+        groundlevel = 1
+        do mm = 1, nTimeIntervalsSpectral
+
+            ! Compute the time, which corresponds to this spectral solution.
+            ! For steady and unsteady mode this is simply the restart time;
+            ! for the spectral mode the periodic time must be taken into
+            ! account, which can be different for every section.
+            t = timeUnsteadyRestart
+
+            if (equationMode == timeSpectral) then
+                do nnn = 1, nSections
+                    t(nnn) = t(nnn) + (mm - 1)*sections(nnn)%timePeriod &
+                             /real(nTimeIntervalsSpectral, realType)
+                end do
+            end if
+
+            call gridVelocitiesFineLevel(.false., t, mm)
+            call gridVelocitiesCoarseLevels(mm)
+            call normalVelocitiesAllLevels(mm)
+            call slipVelocitiesFineLevel(.false., t, mm)
+            call slipVelocitiesCoarseLevels(mm)
+        end do
+    end subroutine updateReferencePoint
+
+    subroutine updateRotationRate(rotCenter, rotRate, blocks, nblocks)
+
+        use constants
+        use inputTimeSpectral, only: nTimeInTervalsSpectral
+        use section, only: sections, nSections
+        use inputPhysics, only: equationMode
+        use inputMotion, only: rotPoint
+        use cgnsGrid, only: cgnsDoms
+        use monitor, only: timeUnsteadyRestart
+        use iteration, only: groundLevel
+        use blockpointers, only: nDom, nbkGlobal, flowDoms
+        use solverUtils
+        implicit none
+
+        real(kind=realType), intent(in)::rotCenter(3), rotRate(3)
+        integer(kind=intType), intent(in) :: nblocks
+        integer(kind=intType), intent(in) :: blocks(nblocks)
+
+        integer(kind=intType) ::mm, nnn, nn, level, sps, i
+        real(kind=realType), dimension(nSections) :: t
+
+        groundlevel = 1
+
+        do nn = 1, nblocks
+            cgnsDoms(nn)%rotRate = rotRate
+            cgnsDoms(nn)%rotCenter = rotCenter
+
+            do i = 1, cgnsDoms(nn)%nBocos
+                cgnsDoms(nn)%bocoInfo(i)%rotRate = rotRate
+            end do
+        end do
+
+        do sps = 1, nTimeIntervalsSpectral
+            do level = 1, ubound(flowDoms, 2)
+                do nn = 1, nDom
+                    flowDoms(nn, level, sps)%blockIsMoving = .True.
+                    flowDoms(nn, level, sps)%addGridVelocities = .True.
+                end do
+            end do
+        end do
+        groundlevel = 1
+        do mm = 1, nTimeIntervalsSpectral
+
+            ! Compute the time, which corresponds to this spectral solution.
+            ! For steady and unsteady mode this is simply the restart time;
+            ! for the spectral mode the periodic time must be taken into
+            ! account, which can be different for every section.
+
+            t = timeUnsteadyRestart
+
+            if (equationMode == timeSpectral) then
+                do nnn = 1, nSections
+                    t(nnn) = t(nnn) + (mm - 1)*sections(nnn)%timePeriod &
+                             /real(nTimeIntervalsSpectral, realType)
+                end do
+            end if
+
+            call gridVelocitiesFineLevel(.false., t, mm)
+            call gridVelocitiesCoarseLevels(mm)
+            call normalVelocitiesAllLevels(mm)
+            call slipVelocitiesFineLevel(.false., t, mm)
+            call slipVelocitiesCoarseLevels(mm)
 
-                BCData(mm)%ksNS_Wall = ks_in
+        end do
 
-             end do
-          end do
-       end do
-    end do
+    end subroutine updateRotationRate
+
+    subroutine updateSurfaceRoughness(ks_in, famList, nFamList)
+
+        use constants
+        use blockPointers
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers
+        use sorting, only: famInList
+        implicit none
+
+        real(kind=realType), intent(in) :: ks_in
+        integer(kind=intType), intent(in) :: nFamList, famList(nFamList)
+
+        integer(kind=intType) :: nLevels, level, sps, nn, mm
+
+        nLevels = ubound(flowDoms, 2)
+
+        do level = 1, nLevels
+            do sps = 1, nTimeIntervalsSpectral
+                do nn = 1, nDom
+                    call setPointers(nn, level, sps)
+
+                    ! Loop over the number of boundary subfaces of this block.
+                    do mm = 1, nBocos
+
+                        if (.not. famInList(BCData(mm)%famID, famList)) then
+                            cycle
+                        end if
+
+                        BCData(mm)%ksNS_Wall = ks_in
+
+                    end do
+                end do
+            end do
+        end do
 
-  end subroutine updateSurfaceRoughness
+    end subroutine updateSurfaceRoughness
 
 end module preprocessingAPI
diff --git a/src/turbulence/sa.F90 b/src/turbulence/sa.F90
index 2da8780d0..17cf560af 100644
--- a/src/turbulence/sa.F90
+++ b/src/turbulence/sa.F90
@@ -4,1281 +4,1280 @@
 
 module sa
 
-  use constants
-  real(kind=realType) :: cv13, kar2Inv, cw36, cb3Inv
-  real(kind=realType), dimension(:,:,:), allocatable :: qq
-  real(kind=realType), dimension(:,:,:), pointer :: ddw, ww, ddvt
-  real(kind=realType), dimension(:,:),   pointer :: rrlv
-  real(kind=realType), dimension(:,:),   pointer :: dd2Wall
+    use constants
+    real(kind=realType) :: cv13, kar2Inv, cw36, cb3Inv
+    real(kind=realType), dimension(:, :, :), allocatable :: qq
+    real(kind=realType), dimension(:, :, :), pointer :: ddw, ww, ddvt
+    real(kind=realType), dimension(:, :), pointer :: rrlv
+    real(kind=realType), dimension(:, :), pointer :: dd2Wall
 
 contains
 #ifndef USE_TAPENADE
-  subroutine sa_block(resOnly)
-    !
-    !       sa solves the transport equation for the Spalart-Allmaras
-    !       turbulence model in a decoupled manner using a diagonal
-    !       dominant ADI-scheme. Note that the scratch and boundary
-    !       matrix values are not strictly, but tapande would like to
-    !       see them becuase it must save them.
-    !
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, scratch, bmtj1, bmtj2, &
-         bmti1, bmti2, bmtk1, bmtk2
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use iteration, only : currentLevel
-    use inputPhysics, only : turbProd
-    use paramTurb
-    use turbutils
-    use turbBCRoutines
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    logical, intent(in) :: resOnly
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, sps
-
-
-    ! Set the arrays for the boundary condition treatment.
-    call bcTurbTreatment
-
-    ! Alloc central jacobian memory
-    allocate(qq(2:il,2:jl,2:kl))
-
-    ! Source Terms
-    call saSource
-
-    ! Advection Term
-    nn = itu1 - 1
-    call turbAdvection(1_intType, 1_intType, nn, qq)
-
-    ! Unsteady Term
-    call unsteadyTurbTerm(1_intType, 1_intType, nn, qq)
-
-    ! Viscous Terms
-    call saViscous
-
-    ! Perform the residual scaling
-    call saResScale
-
-    ! We need to do an acutal solve. Solve and update the eddy
-    ! viscosity and the boundary conditions
-
-    if(.not. resOnly ) then
-
-       ! Do solve
-       call saSolve
-
-       ! Compute the corresponding eddy viscosity.
-
-       call saEddyViscosity(2, il, 2, jl, 2, kl)
-
-       ! Set the halo values for the turbulent variables.
-       ! We are on the finest mesh, so the second layer of halo
-       ! cells must be computed as well.
-
-       call applyAllTurbBCThisBlock(.true.)
-    endif
-
-    deallocate(qq)
-  end subroutine sa_block
+    subroutine sa_block(resOnly)
+        !
+        !       sa solves the transport equation for the Spalart-Allmaras
+        !       turbulence model in a decoupled manner using a diagonal
+        !       dominant ADI-scheme. Note that the scratch and boundary
+        !       matrix values are not strictly, but tapande would like to
+        !       see them becuase it must save them.
+        !
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, scratch, bmtj1, bmtj2, &
+                                 bmti1, bmti2, bmtk1, bmtk2
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: currentLevel
+        use inputPhysics, only: turbProd
+        use paramTurb
+        use turbutils
+        use turbBCRoutines
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        logical, intent(in) :: resOnly
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, sps
+
+        ! Set the arrays for the boundary condition treatment.
+        call bcTurbTreatment
+
+        ! Alloc central jacobian memory
+        allocate (qq(2:il, 2:jl, 2:kl))
+
+        ! Source Terms
+        call saSource
+
+        ! Advection Term
+        nn = itu1 - 1
+        call turbAdvection(1_intType, 1_intType, nn, qq)
+
+        ! Unsteady Term
+        call unsteadyTurbTerm(1_intType, 1_intType, nn, qq)
+
+        ! Viscous Terms
+        call saViscous
+
+        ! Perform the residual scaling
+        call saResScale
+
+        ! We need to do an acutal solve. Solve and update the eddy
+        ! viscosity and the boundary conditions
+
+        if (.not. resOnly) then
+
+            ! Do solve
+            call saSolve
+
+            ! Compute the corresponding eddy viscosity.
+
+            call saEddyViscosity(2, il, 2, jl, 2, kl)
+
+            ! Set the halo values for the turbulent variables.
+            ! We are on the finest mesh, so the second layer of halo
+            ! cells must be computed as well.
+
+            call applyAllTurbBCThisBlock(.true.)
+        end if
+
+        deallocate (qq)
+    end subroutine sa_block
 #endif
 
-  subroutine saSource
-    !
-    !  Source terms.
-    !  Determine the source term and its derivative w.r.t. nuTilde
-    !  for all internal cells of the block.
-    !  Remember that the SA field variable nuTilde = w(i,j,k,itu1)
-
-    use blockPointers
-    use constants
-    use paramTurb
-    use section
-    use inputPhysics
-    use inputDiscretization, only : approxSA
-    use flowVarRefState
-    implicit none
-
-    ! Local parameters
-    real(kind=realType), parameter :: f23 = two*third
-
-    ! Local variables.
-    integer(kind=intType) :: i, j, k, nn, ii
-    real(kind=realType) :: distRough
-    real(kind=realType) :: fv1, fv2, ft2
-    real(kind=realType) :: ss, sst, nu, dist2Inv, chi, chi2, chi3
-    real(kind=realType) :: rr, gg, gg6, termFw, fwSa, term1, term2
-    real(kind=realType) :: dfv1, dfv2, dft2, drr, dgg, dfw
-    real(kind=realType) :: uux, uuy, uuz, vvx, vvy, vvz, wwx, wwy, wwz
-    real(kind=realType) :: div2, fact, sxx, syy, szz, sxy, sxz, syz
-    real(kind=realType) :: vortx, vorty, vortz
-    real(kind=realType) :: omegax, omegay, omegaz
-    real(kind=realType) :: strainMag2, strainProd, vortProd
-    real(kind=realType), parameter :: xminn = 1.e-10_realType
-
-    ! Set model constants
-    cv13    = rsaCv1**3
-    kar2Inv = one/(rsaK**2)
-    cw36    = rsaCw3**6
-    cb3Inv  = one/rsaCb3
-
-    ! Determine the non-dimensional wheel speed of this block.
-
-    omegax = timeRef*sections(sectionID)%rotRate(1)
-    omegay = timeRef*sections(sectionID)%rotRate(2)
-    omegaz = timeRef*sections(sectionID)%rotRate(3)
-
-    ! Create switches to production term depending on the variable that
-    ! should be used
-    if (turbProd .eq. katoLaunder) then
-       print *,'katoLaunder production term not supported for SA'
-       stop
-    end if
+    subroutine saSource
+        !
+        !  Source terms.
+        !  Determine the source term and its derivative w.r.t. nuTilde
+        !  for all internal cells of the block.
+        !  Remember that the SA field variable nuTilde = w(i,j,k,itu1)
+
+        use blockPointers
+        use constants
+        use paramTurb
+        use section
+        use inputPhysics
+        use inputDiscretization, only: approxSA
+        use flowVarRefState
+        implicit none
+
+        ! Local parameters
+        real(kind=realType), parameter :: f23 = two*third
+
+        ! Local variables.
+        integer(kind=intType) :: i, j, k, nn, ii
+        real(kind=realType) :: distRough
+        real(kind=realType) :: fv1, fv2, ft2
+        real(kind=realType) :: ss, sst, nu, dist2Inv, chi, chi2, chi3
+        real(kind=realType) :: rr, gg, gg6, termFw, fwSa, term1, term2
+        real(kind=realType) :: dfv1, dfv2, dft2, drr, dgg, dfw
+        real(kind=realType) :: uux, uuy, uuz, vvx, vvy, vvz, wwx, wwy, wwz
+        real(kind=realType) :: div2, fact, sxx, syy, szz, sxy, sxz, syz
+        real(kind=realType) :: vortx, vorty, vortz
+        real(kind=realType) :: omegax, omegay, omegaz
+        real(kind=realType) :: strainMag2, strainProd, vortProd
+        real(kind=realType), parameter :: xminn = 1.e-10_realType
+
+        ! Set model constants
+        cv13 = rsaCv1**3
+        kar2Inv = one/(rsaK**2)
+        cw36 = rsaCw3**6
+        cb3Inv = one/rsaCb3
+
+        ! Determine the non-dimensional wheel speed of this block.
+
+        omegax = timeRef*sections(sectionID)%rotRate(1)
+        omegay = timeRef*sections(sectionID)%rotRate(2)
+        omegaz = timeRef*sections(sectionID)%rotRate(3)
+
+        ! Create switches to production term depending on the variable that
+        ! should be used
+        if (turbProd .eq. katoLaunder) then
+            print *, 'katoLaunder production term not supported for SA'
+            stop
+        end if
 
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, nx*ny*nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii/nx, ny) + 2
+            k = ii/(nx*ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
-                ! Compute the gradient of u in the cell center. Use is made
-                ! of the fact that the surrounding normals sum up to zero,
-                ! such that the cell i,j,k does not give a contribution.
-                ! The gradient is scaled by the factor 2*vol.
+                        ! Compute the gradient of u in the cell center. Use is made
+                        ! of the fact that the surrounding normals sum up to zero,
+                        ! such that the cell i,j,k does not give a contribution.
+                        ! The gradient is scaled by the factor 2*vol.
 
-                uux = w(i+1,j,k,ivx)*si(i,j,k,1) - w(i-1,j,k,ivx)*si(i-1,j,k,1) &
-                     + w(i,j+1,k,ivx)*sj(i,j,k,1) - w(i,j-1,k,ivx)*sj(i,j-1,k,1) &
-                     + w(i,j,k+1,ivx)*sk(i,j,k,1) - w(i,j,k-1,ivx)*sk(i,j,k-1,1)
-                uuy = w(i+1,j,k,ivx)*si(i,j,k,2) - w(i-1,j,k,ivx)*si(i-1,j,k,2) &
-                     + w(i,j+1,k,ivx)*sj(i,j,k,2) - w(i,j-1,k,ivx)*sj(i,j-1,k,2) &
-                     + w(i,j,k+1,ivx)*sk(i,j,k,2) - w(i,j,k-1,ivx)*sk(i,j,k-1,2)
-                uuz = w(i+1,j,k,ivx)*si(i,j,k,3) - w(i-1,j,k,ivx)*si(i-1,j,k,3) &
-                     + w(i,j+1,k,ivx)*sj(i,j,k,3) - w(i,j-1,k,ivx)*sj(i,j-1,k,3) &
-                     + w(i,j,k+1,ivx)*sk(i,j,k,3) - w(i,j,k-1,ivx)*sk(i,j,k-1,3)
+                        uux = w(i + 1, j, k, ivx)*si(i, j, k, 1) - w(i - 1, j, k, ivx)*si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivx)*sj(i, j, k, 1) - w(i, j - 1, k, ivx)*sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivx)*sk(i, j, k, 1) - w(i, j, k - 1, ivx)*sk(i, j, k - 1, 1)
+                        uuy = w(i + 1, j, k, ivx)*si(i, j, k, 2) - w(i - 1, j, k, ivx)*si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivx)*sj(i, j, k, 2) - w(i, j - 1, k, ivx)*sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivx)*sk(i, j, k, 2) - w(i, j, k - 1, ivx)*sk(i, j, k - 1, 2)
+                        uuz = w(i + 1, j, k, ivx)*si(i, j, k, 3) - w(i - 1, j, k, ivx)*si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivx)*sj(i, j, k, 3) - w(i, j - 1, k, ivx)*sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivx)*sk(i, j, k, 3) - w(i, j, k - 1, ivx)*sk(i, j, k - 1, 3)
 
-                ! Idem for the gradient of v.
+                        ! Idem for the gradient of v.
 
-                vvx = w(i+1,j,k,ivy)*si(i,j,k,1) - w(i-1,j,k,ivy)*si(i-1,j,k,1) &
-                     + w(i,j+1,k,ivy)*sj(i,j,k,1) - w(i,j-1,k,ivy)*sj(i,j-1,k,1) &
-                     + w(i,j,k+1,ivy)*sk(i,j,k,1) - w(i,j,k-1,ivy)*sk(i,j,k-1,1)
-                vvy = w(i+1,j,k,ivy)*si(i,j,k,2) - w(i-1,j,k,ivy)*si(i-1,j,k,2) &
-                     + w(i,j+1,k,ivy)*sj(i,j,k,2) - w(i,j-1,k,ivy)*sj(i,j-1,k,2) &
-                     + w(i,j,k+1,ivy)*sk(i,j,k,2) - w(i,j,k-1,ivy)*sk(i,j,k-1,2)
-                vvz = w(i+1,j,k,ivy)*si(i,j,k,3) - w(i-1,j,k,ivy)*si(i-1,j,k,3) &
-                     + w(i,j+1,k,ivy)*sj(i,j,k,3) - w(i,j-1,k,ivy)*sj(i,j-1,k,3) &
-                     + w(i,j,k+1,ivy)*sk(i,j,k,3) - w(i,j,k-1,ivy)*sk(i,j,k-1,3)
+                        vvx = w(i + 1, j, k, ivy)*si(i, j, k, 1) - w(i - 1, j, k, ivy)*si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivy)*sj(i, j, k, 1) - w(i, j - 1, k, ivy)*sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivy)*sk(i, j, k, 1) - w(i, j, k - 1, ivy)*sk(i, j, k - 1, 1)
+                        vvy = w(i + 1, j, k, ivy)*si(i, j, k, 2) - w(i - 1, j, k, ivy)*si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivy)*sj(i, j, k, 2) - w(i, j - 1, k, ivy)*sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivy)*sk(i, j, k, 2) - w(i, j, k - 1, ivy)*sk(i, j, k - 1, 2)
+                        vvz = w(i + 1, j, k, ivy)*si(i, j, k, 3) - w(i - 1, j, k, ivy)*si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivy)*sj(i, j, k, 3) - w(i, j - 1, k, ivy)*sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivy)*sk(i, j, k, 3) - w(i, j, k - 1, ivy)*sk(i, j, k - 1, 3)
 
-                ! And for the gradient of w.
+                        ! And for the gradient of w.
 
-                wwx = w(i+1,j,k,ivz)*si(i,j,k,1) - w(i-1,j,k,ivz)*si(i-1,j,k,1) &
-                     + w(i,j+1,k,ivz)*sj(i,j,k,1) - w(i,j-1,k,ivz)*sj(i,j-1,k,1) &
-                     + w(i,j,k+1,ivz)*sk(i,j,k,1) - w(i,j,k-1,ivz)*sk(i,j,k-1,1)
-                wwy = w(i+1,j,k,ivz)*si(i,j,k,2) - w(i-1,j,k,ivz)*si(i-1,j,k,2) &
-                     + w(i,j+1,k,ivz)*sj(i,j,k,2) - w(i,j-1,k,ivz)*sj(i,j-1,k,2) &
-                     + w(i,j,k+1,ivz)*sk(i,j,k,2) - w(i,j,k-1,ivz)*sk(i,j,k-1,2)
-                wwz = w(i+1,j,k,ivz)*si(i,j,k,3) - w(i-1,j,k,ivz)*si(i-1,j,k,3) &
-                     + w(i,j+1,k,ivz)*sj(i,j,k,3) - w(i,j-1,k,ivz)*sj(i,j-1,k,3) &
-                     + w(i,j,k+1,ivz)*sk(i,j,k,3) - w(i,j,k-1,ivz)*sk(i,j,k-1,3)
+                        wwx = w(i + 1, j, k, ivz)*si(i, j, k, 1) - w(i - 1, j, k, ivz)*si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivz)*sj(i, j, k, 1) - w(i, j - 1, k, ivz)*sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivz)*sk(i, j, k, 1) - w(i, j, k - 1, ivz)*sk(i, j, k - 1, 1)
+                        wwy = w(i + 1, j, k, ivz)*si(i, j, k, 2) - w(i - 1, j, k, ivz)*si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivz)*sj(i, j, k, 2) - w(i, j - 1, k, ivz)*sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivz)*sk(i, j, k, 2) - w(i, j, k - 1, ivz)*sk(i, j, k - 1, 2)
+                        wwz = w(i + 1, j, k, ivz)*si(i, j, k, 3) - w(i - 1, j, k, ivz)*si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivz)*sj(i, j, k, 3) - w(i, j - 1, k, ivz)*sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivz)*sk(i, j, k, 3) - w(i, j, k - 1, ivz)*sk(i, j, k - 1, 3)
 
-                ! Compute the components of the stress tensor.
-                ! The combination of the current scaling of the velocity
-                ! gradients (2*vol) and the definition of the stress tensor,
-                ! leads to the factor 1/(4*vol).
+                        ! Compute the components of the stress tensor.
+                        ! The combination of the current scaling of the velocity
+                        ! gradients (2*vol) and the definition of the stress tensor,
+                        ! leads to the factor 1/(4*vol).
 
-                fact = fourth/vol(i,j,k)
+                        fact = fourth/vol(i, j, k)
 
-                if (turbProd .eq. strain) then
+                        if (turbProd .eq. strain) then
 
-                   sxx = two*fact*uux
-                   syy = two*fact*vvy
-                   szz = two*fact*wwz
+                            sxx = two*fact*uux
+                            syy = two*fact*vvy
+                            szz = two*fact*wwz
 
-                   sxy = fact*(uuy + vvx)
-                   sxz = fact*(uuz + wwx)
-                   syz = fact*(vvz + wwy)
+                            sxy = fact*(uuy + vvx)
+                            sxz = fact*(uuz + wwx)
+                            syz = fact*(vvz + wwy)
 
-                   ! Compute 2/3 * divergence of velocity squared
+                            ! Compute 2/3 * divergence of velocity squared
 
-                   div2 = f23*(sxx+syy+szz)**2
+                            div2 = f23*(sxx + syy + szz)**2
 
-                   ! Compute strain production term
+                            ! Compute strain production term
 
-                   strainMag2 = two*(sxy**2 + sxz**2 + syz**2) &
-                        +           sxx**2 + syy**2 + szz**2
+                            strainMag2 = two*(sxy**2 + sxz**2 + syz**2) &
+                                         + sxx**2 + syy**2 + szz**2
 
-                   strainProd = two*strainMag2 - div2
+                            strainProd = two*strainMag2 - div2
 
-                   ss = sqrt(strainProd)
+                            ss = sqrt(strainProd)
 
-                else if (turbProd .eq. vorticity) then
+                        else if (turbProd .eq. vorticity) then
 
-                   ! Compute the three components of the vorticity vector.
-                   ! Substract the part coming from the rotating frame.
+                            ! Compute the three components of the vorticity vector.
+                            ! Substract the part coming from the rotating frame.
 
-                   vortx = two*fact*(wwy - vvz) - two*omegax
-                   vorty = two*fact*(uuz - wwx) - two*omegay
-                   vortz = two*fact*(vvx - uuy) - two*omegaz
+                            vortx = two*fact*(wwy - vvz) - two*omegax
+                            vorty = two*fact*(uuz - wwx) - two*omegay
+                            vortz = two*fact*(vvx - uuy) - two*omegaz
 
-                   ! Compute the vorticity production term
+                            ! Compute the vorticity production term
 
-                   vortProd = vortx**2 + vorty**2 + vortz**2
+                            vortProd = vortx**2 + vorty**2 + vortz**2
 
-                   ! First take the square root of the production term to
-                   ! obtain the correct production term for spalart-allmaras.
-                   ! We do this to avoid if statements.
+                            ! First take the square root of the production term to
+                            ! obtain the correct production term for spalart-allmaras.
+                            ! We do this to avoid if statements.
 
-                   ss = sqrt(vortProd)
+                            ss = sqrt(vortProd)
 
-                end if
+                        end if
 
-                ! Compute the laminar kinematic viscosity, the inverse of
-                ! wall distance squared, the ratio chi (ratio of nuTilde
-                ! and nu) and the functions fv1 and fv2. The latter corrects
-                ! the production term near a viscous wall.
+                        ! Compute the laminar kinematic viscosity, the inverse of
+                        ! wall distance squared, the ratio chi (ratio of nuTilde
+                        ! and nu) and the functions fv1 and fv2. The latter corrects
+                        ! the production term near a viscous wall.
 
-                nu       = rlv(i,j,k)/w(i,j,k,irho)
-                chi      = w(i,j,k,itu1)/nu
+                        nu = rlv(i, j, k)/w(i, j, k, irho)
+                        chi = w(i, j, k, itu1)/nu
 
-                if (.not. useRoughSA) then
-                   dist2Inv = one/(d2Wall(i,j,k)**2)
-                else
-                   distRough = d2Wall(i,j,k) + 0.03_realType * ks(i,j,k)
-                   dist2Inv = one/(distRough**2)
-                   chi      = chi + rsaCr1*ks(i,j,k)/distRough
-                end if
+                        if (.not. useRoughSA) then
+                            dist2Inv = one/(d2Wall(i, j, k)**2)
+                        else
+                            distRough = d2Wall(i, j, k) + 0.03_realType*ks(i, j, k)
+                            dist2Inv = one/(distRough**2)
+                            chi = chi + rsaCr1*ks(i, j, k)/distRough
+                        end if
 
-                chi2     = chi*chi
-                chi3     = chi*chi2
-                fv1      = chi3/(chi3+cv13)
+                        chi2 = chi*chi
+                        chi3 = chi*chi2
+                        fv1 = chi3/(chi3 + cv13)
 
-                if (.not. useRoughSA) then
-                   fv2      = one - chi/(one + chi*fv1)
-                else
-                   fv2      = one - w(i,j,k,itu1)/(nu + w(i,j,k,itu1)*fv1)
-                end if
+                        if (.not. useRoughSA) then
+                            fv2 = one - chi/(one + chi*fv1)
+                        else
+                            fv2 = one - w(i, j, k, itu1)/(nu + w(i, j, k, itu1)*fv1)
+                        end if
 
-                ! The function ft2, which is designed to keep a laminar
-                ! solution laminar. When running in fully turbulent mode
-                ! this function should be set to 0.0.
+                        ! The function ft2, which is designed to keep a laminar
+                        ! solution laminar. When running in fully turbulent mode
+                        ! this function should be set to 0.0.
 
-                if (useft2SA) then
-                   ft2 = rsaCt3*exp(-rsaCt4*chi2)
-                else
-                   ft2 = zero
-                end if
+                        if (useft2SA) then
+                            ft2 = rsaCt3*exp(-rsaCt4*chi2)
+                        else
+                            ft2 = zero
+                        end if
 
-                ! Correct the production term to account for the influence
-                ! of the wall.
+                        ! Correct the production term to account for the influence
+                        ! of the wall.
 
-                sst = ss + w(i,j,k,itu1)*fv2*kar2Inv*dist2Inv
+                        sst = ss + w(i, j, k, itu1)*fv2*kar2Inv*dist2Inv
 
-                ! Add rotation term (useRotationSA defined in inputParams.F90)
+                        ! Add rotation term (useRotationSA defined in inputParams.F90)
 
-                if (useRotationSA) then
-                   sst = sst + rsaCrot*min(zero,sqrt(two*strainMag2))
-                end if
+                        if (useRotationSA) then
+                            sst = sst + rsaCrot*min(zero, sqrt(two*strainMag2))
+                        end if
 
-                ! Make sure that this term remains positive
-                ! (the function fv2 is negative between chi = 1 and 18.4,
-                ! which can cause sst to go negative, which is undesirable).
+                        ! Make sure that this term remains positive
+                        ! (the function fv2 is negative between chi = 1 and 18.4,
+                        ! which can cause sst to go negative, which is undesirable).
 
-                sst = max(sst,xminn)
+                        sst = max(sst, xminn)
 
-                ! Compute the function fw. The argument rr is cut off at 10
-                ! to avoid numerical problems. This is ok, because the
-                ! asymptotical value of fw is then already reached.
+                        ! Compute the function fw. The argument rr is cut off at 10
+                        ! to avoid numerical problems. This is ok, because the
+                        ! asymptotical value of fw is then already reached.
 
-                rr     = w(i,j,k,itu1)*kar2Inv*dist2Inv/sst
-                rr     = min(rr,10.0_realType)
-                gg     = rr + rsaCw2*(rr**6 - rr)
-                gg6    = gg**6
-                termFw = ((one + cw36)/(gg6 + cw36))**sixth
-                fwSa   = gg*termFw
+                        rr = w(i, j, k, itu1)*kar2Inv*dist2Inv/sst
+                        rr = min(rr, 10.0_realType)
+                        gg = rr + rsaCw2*(rr**6 - rr)
+                        gg6 = gg**6
+                        termFw = ((one + cw36)/(gg6 + cw36))**sixth
+                        fwSa = gg*termFw
 
-                ! Compute the source term; some terms are saved for the
-                ! linearization. The source term is stored in dvt.
+                        ! Compute the source term; some terms are saved for the
+                        ! linearization. The source term is stored in dvt.
 
-                if (approxSA) then
-                  term1 = zero
-                else
-                  term1 = rsaCb1*(one-ft2)*ss
-                end if
-                term2 = dist2Inv*(kar2Inv*rsaCb1*((one-ft2)*fv2 + ft2) &
-                     -           rsaCw1*fwSa)
+                        if (approxSA) then
+                            term1 = zero
+                        else
+                            term1 = rsaCb1*(one - ft2)*ss
+                        end if
+                        term2 = dist2Inv*(kar2Inv*rsaCb1*((one - ft2)*fv2 + ft2) &
+                                          - rsaCw1*fwSa)
 
-                scratch(i,j,k,idvt) = (term1 + term2*w(i,j,k,itu1))*w(i,j,k,itu1)
+                        scratch(i, j, k, idvt) = (term1 + term2*w(i, j, k, itu1))*w(i, j, k, itu1)
 
 #ifndef USE_TAPENADE
-                ! Compute some derivatives w.r.t. nuTilde. These will occur
-                ! in the left hand side, i.e. the matrix for the implicit
-                ! treatment.
-
-                dfv1 = three*chi2*cv13/((chi3+cv13)**2)
-                if (.not. useRoughSA) then
-                   dfv2 = (chi2*dfv1 - one)/(nu*((one + chi*fv1)**2))
-                else
-                   dfv2 = (w(i,j,k,itu1)*dfv1 - nu) / (nu + w(i,j,k,itu1)*fv1)**2
-                endif
-                dft2 = -two*rsaCt4*chi*ft2/nu
-
-                drr = (one - rr*(fv2 + w(i,j,k,itu1)*dfv2)) &
-                     * kar2Inv*dist2Inv/sst
-                dgg = (one - rsaCw2 + six*rsaCw2*(rr**5))*drr
-                dfw = (cw36/(gg6 + cw36))*termFw*dgg
-
-                ! Compute the source term jacobian. Note that the part
-                ! containing term1 is treated explicitly. The reason is that
-                ! implicit treatment of this part leads to a decrease of the
-                ! diagonal dominance of the jacobian and it thus decreases
-                ! the stability. You may want to play around and try to
-                ! take this term into account in the jacobian.
-                ! Note that -dsource/dnu is stored.
-                qq(i,j,k) = -two*term2*w(i,j,k,itu1)                      &
-                     -  dist2Inv*w(i,j,k,itu1)*w(i,j,k,itu1)         &
-                     * (rsaCb1*kar2Inv*(dfv2-ft2*dfv2-fv2*dft2+dft2) &
-                     -  rsaCw1*dfw)
-
-                ! A couple of terms in qq may lead to a negative
-                ! contribution. Clip qq to zero, if the total is negative.
-
-                qq(i,j,k) = max(qq(i,j,k), zero)
+                        ! Compute some derivatives w.r.t. nuTilde. These will occur
+                        ! in the left hand side, i.e. the matrix for the implicit
+                        ! treatment.
+
+                        dfv1 = three*chi2*cv13/((chi3 + cv13)**2)
+                        if (.not. useRoughSA) then
+                            dfv2 = (chi2*dfv1 - one)/(nu*((one + chi*fv1)**2))
+                        else
+                            dfv2 = (w(i, j, k, itu1)*dfv1 - nu)/(nu + w(i, j, k, itu1)*fv1)**2
+                        end if
+                        dft2 = -two*rsaCt4*chi*ft2/nu
+
+                        drr = (one - rr*(fv2 + w(i, j, k, itu1)*dfv2)) &
+                              *kar2Inv*dist2Inv/sst
+                        dgg = (one - rsaCw2 + six*rsaCw2*(rr**5))*drr
+                        dfw = (cw36/(gg6 + cw36))*termFw*dgg
+
+                        ! Compute the source term jacobian. Note that the part
+                        ! containing term1 is treated explicitly. The reason is that
+                        ! implicit treatment of this part leads to a decrease of the
+                        ! diagonal dominance of the jacobian and it thus decreases
+                        ! the stability. You may want to play around and try to
+                        ! take this term into account in the jacobian.
+                        ! Note that -dsource/dnu is stored.
+                        qq(i, j, k) = -two*term2*w(i, j, k, itu1) &
+                                      - dist2Inv*w(i, j, k, itu1)*w(i, j, k, itu1) &
+                                      *(rsaCb1*kar2Inv*(dfv2 - ft2*dfv2 - fv2*dft2 + dft2) &
+                                        - rsaCw1*dfw)
+
+                        ! A couple of terms in qq may lead to a negative
+                        ! contribution. Clip qq to zero, if the total is negative.
+
+                        qq(i, j, k) = max(qq(i, j, k), zero)
 #endif
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-  end subroutine saSource
-
-  subroutine saViscous
-    !
-    !  Viscous term.
-    !  Determine the viscous contribution to the residual
-    !  for all internal cells of the block.
-
-    use blockPointers
-    use paramTurb
-    implicit none
-    ! Local variables.
-    integer(kind=intType) :: i, j, k, nn, ii
-    real(kind=realType) :: nu
-    real(kind=realType) :: fv1, fv2, ft2
-    real(kind=realType) :: voli, volmi, volpi, xm, ym, zm, xp, yp, zp
-    real(kind=realType) :: xa, ya, za, ttm, ttp, cnud, cam, cap
-    real(kind=realType) :: nutm, nutp, num, nup, cdm, cdp
-    real(kind=realType) :: c1m, c1p, c10, b1, c1, d1, qs
-
-    ! Set model constants
-    cv13    = rsaCv1**3
-    kar2Inv = one/(rsaK**2)
-    cw36    = rsaCw3**6
-    cb3Inv  = one/rsaCb3
-
-    !
-    !       Viscous terms in k-direction.
-    !
+    end subroutine saSource
+
+    subroutine saViscous
+        !
+        !  Viscous term.
+        !  Determine the viscous contribution to the residual
+        !  for all internal cells of the block.
+
+        use blockPointers
+        use paramTurb
+        implicit none
+        ! Local variables.
+        integer(kind=intType) :: i, j, k, nn, ii
+        real(kind=realType) :: nu
+        real(kind=realType) :: fv1, fv2, ft2
+        real(kind=realType) :: voli, volmi, volpi, xm, ym, zm, xp, yp, zp
+        real(kind=realType) :: xa, ya, za, ttm, ttp, cnud, cam, cap
+        real(kind=realType) :: nutm, nutp, num, nup, cdm, cdp
+        real(kind=realType) :: c1m, c1p, c10, b1, c1, d1, qs
+
+        ! Set model constants
+        cv13 = rsaCv1**3
+        kar2Inv = one/(rsaK**2)
+        cw36 = rsaCw3**6
+        cb3Inv = one/rsaCb3
+
+        !
+        !       Viscous terms in k-direction.
+        !
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, nx*ny*nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii/nx, ny) + 2
+            k = ii/(nx*ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
-                ! Compute the metrics in zeta-direction, i.e. along the
-                ! line k = constant.
+                        ! Compute the metrics in zeta-direction, i.e. along the
+                        ! line k = constant.
 
-                voli  = one/vol(i,j,k)
-                volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-                volpi = two/(vol(i,j,k) + vol(i,j,k+1))
+                        voli = one/vol(i, j, k)
+                        volmi = two/(vol(i, j, k) + vol(i, j, k - 1))
+                        volpi = two/(vol(i, j, k) + vol(i, j, k + 1))
 
-                xm = sk(i,j,k-1,1)*volmi
-                ym = sk(i,j,k-1,2)*volmi
-                zm = sk(i,j,k-1,3)*volmi
-                xp = sk(i,j,k,  1)*volpi
-                yp = sk(i,j,k,  2)*volpi
-                zp = sk(i,j,k,  3)*volpi
+                        xm = sk(i, j, k - 1, 1)*volmi
+                        ym = sk(i, j, k - 1, 2)*volmi
+                        zm = sk(i, j, k - 1, 3)*volmi
+                        xp = sk(i, j, k, 1)*volpi
+                        yp = sk(i, j, k, 2)*volpi
+                        zp = sk(i, j, k, 3)*volpi
 
-                xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-                ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-                za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-                ttm = xm*xa + ym*ya + zm*za
-                ttp = xp*xa + yp*ya + zp*za
+                        xa = half*(sk(i, j, k, 1) + sk(i, j, k - 1, 1))*voli
+                        ya = half*(sk(i, j, k, 2) + sk(i, j, k - 1, 2))*voli
+                        za = half*(sk(i, j, k, 3) + sk(i, j, k - 1, 3))*voli
+                        ttm = xm*xa + ym*ya + zm*za
+                        ttp = xp*xa + yp*ya + zp*za
 
-                ! ttm and ttp ~ 1/deltaX^2
+                        ! ttm and ttp ~ 1/deltaX^2
 
-                ! Computation of the viscous terms in zeta-direction; note
-                ! that cross-derivatives are neglected, i.e. the mesh is
-                ! assumed to be orthogonal.
-                ! Furthermore, the grad(nu)**2 has been rewritten as
-                ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
-                ! The second derivative in zeta-direction is constructed as
-                ! the central difference of the first order derivatives, i.e.
-                ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
-                ! In this way the metric can be taken into account.
+                        ! Computation of the viscous terms in zeta-direction; note
+                        ! that cross-derivatives are neglected, i.e. the mesh is
+                        ! assumed to be orthogonal.
+                        ! Furthermore, the grad(nu)**2 has been rewritten as
+                        ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
+                        ! The second derivative in zeta-direction is constructed as
+                        ! the central difference of the first order derivatives, i.e.
+                        ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
+                        ! In this way the metric can be taken into account.
 
-                ! Compute the diffusion coefficients multiplying the nodes
-                ! k+1, k and k-1 in the second derivative. Make sure that
-                ! these coefficients are nonnegative.
+                        ! Compute the diffusion coefficients multiplying the nodes
+                        ! k+1, k and k-1 in the second derivative. Make sure that
+                        ! these coefficients are nonnegative.
 
-                cnud = -rsaCb2*w(i,j,k,itu1)*cb3Inv
-                cam  =  ttm*cnud
-                cap  =  ttp*cnud
+                        cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
+                        cam = ttm*cnud
+                        cap = ttp*cnud
 
-                ! Compute nuTilde at the faces
+                        ! Compute nuTilde at the faces
 
-                nutm = half*(w(i,j,k-1,itu1) + w(i,j,k,itu1))
-                nutp = half*(w(i,j,k+1,itu1) + w(i,j,k,itu1))
+                        nutm = half*(w(i, j, k - 1, itu1) + w(i, j, k, itu1))
+                        nutp = half*(w(i, j, k + 1, itu1) + w(i, j, k, itu1))
 
-                ! Compute nu at the faces
+                        ! Compute nu at the faces
 
-                nu   = rlv(i,j,k)/w(i,j,k,irho)
-                num  = half*(rlv(i,j,k-1)/w(i,j,k-1,irho) + nu)
-                nup  = half*(rlv(i,j,k+1)/w(i,j,k+1,irho) + nu)
+                        nu = rlv(i, j, k)/w(i, j, k, irho)
+                        num = half*(rlv(i, j, k - 1)/w(i, j, k - 1, irho) + nu)
+                        nup = half*(rlv(i, j, k + 1)/w(i, j, k + 1, irho) + nu)
 
-                cdm  = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-                cdp  = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+                        cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
+                        cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
 
-                c1m = max(cdm+cam, zero)
-                c1p = max(cdp+cap, zero)
-                c10 = c1m + c1p
+                        c1m = max(cdm + cam, zero)
+                        c1p = max(cdp + cap, zero)
+                        c10 = c1m + c1p
 
-                ! Update the residual for this cell and store the possible
-                ! coefficients for the matrix in b1, c1 and d1.
+                        ! Update the residual for this cell and store the possible
+                        ! coefficients for the matrix in b1, c1 and d1.
 
-                scratch(i,j,k,idvt) = scratch(i,j,k,idvt)      + c1m*w(i,j,k-1,itu1) &
-                     - c10*w(i,j,k,itu1) + c1p*w(i,j,k+1,itu1)
+                        scratch(i, j, k, idvt) = scratch(i, j, k, idvt) + c1m*w(i, j, k - 1, itu1) &
+                                                 - c10*w(i, j, k, itu1) + c1p*w(i, j, k + 1, itu1)
 #ifndef USE_TAPENADE
-                b1 = -c1m
-                c1 =  c10
-                d1 = -c1p
-
-                ! Update the central jacobian. For nonboundary cells this
-                ! is simply c1. For boundary cells this is slightly more
-                ! complicated, because the boundary conditions are treated
-                ! implicitly and the off-diagonal terms b1 and d1 must be
-                ! taken into account.
-                ! The boundary conditions are only treated implicitly if
-                ! the diagonal dominance of the matrix is increased.
-
-                if(k == 2) then
-                   qq(i,j,k) = qq(i,j,k) + c1 &
-                        - b1*max(bmtk1(i,j,itu1,itu1),zero)
-                else if(k == kl) then
-                   qq(i,j,k) = qq(i,j,k) + c1 &
-                        - d1*max(bmtk2(i,j,itu1,itu1),zero)
-                else
-                   qq(i,j,k) = qq(i,j,k) + c1
-                endif
+                        b1 = -c1m
+                        c1 = c10
+                        d1 = -c1p
+
+                        ! Update the central jacobian. For nonboundary cells this
+                        ! is simply c1. For boundary cells this is slightly more
+                        ! complicated, because the boundary conditions are treated
+                        ! implicitly and the off-diagonal terms b1 and d1 must be
+                        ! taken into account.
+                        ! The boundary conditions are only treated implicitly if
+                        ! the diagonal dominance of the matrix is increased.
+
+                        if (k == 2) then
+                            qq(i, j, k) = qq(i, j, k) + c1 &
+                                          - b1*max(bmtk1(i, j, itu1, itu1), zero)
+                        else if (k == kl) then
+                            qq(i, j, k) = qq(i, j, k) + c1 &
+                                          - d1*max(bmtk2(i, j, itu1, itu1), zero)
+                        else
+                            qq(i, j, k) = qq(i, j, k) + c1
+                        end if
 #endif
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-    !
-    !       Viscous terms in j-direction.
-    !
+        !
+        !       Viscous terms in j-direction.
+        !
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, nx*ny*nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii/nx, ny) + 2
+            k = ii/(nx*ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
-                ! Compute the metrics in eta-direction, i.e. along the
-                ! line j = constant.
-
-                voli  = one/vol(i,j,k)
-                volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-                volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-                xm = sj(i,j-1,k,1)*volmi
-                ym = sj(i,j-1,k,2)*volmi
-                zm = sj(i,j-1,k,3)*volmi
-                xp = sj(i,j,  k,1)*volpi
-                yp = sj(i,j,  k,2)*volpi
-                zp = sj(i,j,  k,3)*volpi
-
-                xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-                ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-                za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-                ttm = xm*xa + ym*ya + zm*za
-                ttp = xp*xa + yp*ya + zp*za
-
-                ! Computation of the viscous terms in eta-direction; note
-                ! that cross-derivatives are neglected, i.e. the mesh is
-                ! assumed to be orthogonal.
-                ! Furthermore, the grad(nu)**2 has been rewritten as
-                ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
-                ! The second derivative in eta-direction is constructed as
-                ! the central difference of the first order derivatives, i.e.
-                ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
-                ! In this way the metric can be taken into account.
-
-                ! Compute the diffusion coefficients multiplying the nodes
-                ! j+1, j and j-1 in the second derivative. Make sure that
-                ! these coefficients are nonnegative.
-
-                cnud = -rsaCb2*w(i,j,k,itu1)*cb3Inv
-                cam  =  ttm*cnud
-                cap  =  ttp*cnud
-
-                nutm = half*(w(i,j-1,k,itu1) + w(i,j,k,itu1))
-                nutp = half*(w(i,j+1,k,itu1) + w(i,j,k,itu1))
-                nu   = rlv(i,j,k)/w(i,j,k,irho)
-                num  = half*(rlv(i,j-1,k)/w(i,j-1,k,irho) + nu)
-                nup  = half*(rlv(i,j+1,k)/w(i,j+1,k,irho) + nu)
-                cdm  = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-                cdp  = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
-
-                c1m = max(cdm+cam, zero)
-                c1p = max(cdp+cap, zero)
-                c10 = c1m + c1p
-
-                ! Update the residual for this cell and store the possible
-                ! coefficients for the matrix in b1, c1 and d1.
-
-                scratch(i,j,k,idvt) = scratch(i,j,k,idvt)      + c1m*w(i,j-1,k,itu1) &
-                     - c10*w(i,j,k,itu1) + c1p*w(i,j+1,k,itu1)
+                        ! Compute the metrics in eta-direction, i.e. along the
+                        ! line j = constant.
+
+                        voli = one/vol(i, j, k)
+                        volmi = two/(vol(i, j, k) + vol(i, j - 1, k))
+                        volpi = two/(vol(i, j, k) + vol(i, j + 1, k))
+
+                        xm = sj(i, j - 1, k, 1)*volmi
+                        ym = sj(i, j - 1, k, 2)*volmi
+                        zm = sj(i, j - 1, k, 3)*volmi
+                        xp = sj(i, j, k, 1)*volpi
+                        yp = sj(i, j, k, 2)*volpi
+                        zp = sj(i, j, k, 3)*volpi
+
+                        xa = half*(sj(i, j, k, 1) + sj(i, j - 1, k, 1))*voli
+                        ya = half*(sj(i, j, k, 2) + sj(i, j - 1, k, 2))*voli
+                        za = half*(sj(i, j, k, 3) + sj(i, j - 1, k, 3))*voli
+                        ttm = xm*xa + ym*ya + zm*za
+                        ttp = xp*xa + yp*ya + zp*za
+
+                        ! Computation of the viscous terms in eta-direction; note
+                        ! that cross-derivatives are neglected, i.e. the mesh is
+                        ! assumed to be orthogonal.
+                        ! Furthermore, the grad(nu)**2 has been rewritten as
+                        ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
+                        ! The second derivative in eta-direction is constructed as
+                        ! the central difference of the first order derivatives, i.e.
+                        ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
+                        ! In this way the metric can be taken into account.
+
+                        ! Compute the diffusion coefficients multiplying the nodes
+                        ! j+1, j and j-1 in the second derivative. Make sure that
+                        ! these coefficients are nonnegative.
+
+                        cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
+                        cam = ttm*cnud
+                        cap = ttp*cnud
+
+                        nutm = half*(w(i, j - 1, k, itu1) + w(i, j, k, itu1))
+                        nutp = half*(w(i, j + 1, k, itu1) + w(i, j, k, itu1))
+                        nu = rlv(i, j, k)/w(i, j, k, irho)
+                        num = half*(rlv(i, j - 1, k)/w(i, j - 1, k, irho) + nu)
+                        nup = half*(rlv(i, j + 1, k)/w(i, j + 1, k, irho) + nu)
+                        cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
+                        cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+
+                        c1m = max(cdm + cam, zero)
+                        c1p = max(cdp + cap, zero)
+                        c10 = c1m + c1p
+
+                        ! Update the residual for this cell and store the possible
+                        ! coefficients for the matrix in b1, c1 and d1.
+
+                        scratch(i, j, k, idvt) = scratch(i, j, k, idvt) + c1m*w(i, j - 1, k, itu1) &
+                                                 - c10*w(i, j, k, itu1) + c1p*w(i, j + 1, k, itu1)
 #ifndef USE_TAPENADE
-                b1 = -c1m
-                c1 =  c10
-                d1 = -c1p
-
-                ! Update the central jacobian. For nonboundary cells this
-                ! is simply c1. For boundary cells this is slightly more
-                ! complicated, because the boundary conditions are treated
-                ! implicitly and the off-diagonal terms b1 and d1 must be
-                ! taken into account.
-                ! The boundary conditions are only treated implicitly if
-                ! the diagonal dominance of the matrix is increased.
-
-                if(j == 2) then
-                   qq(i,j,k) = qq(i,j,k) + c1 &
-                        - b1*max(bmtj1(i,k,itu1,itu1),zero)
-                else if(j == jl) then
-                   qq(i,j,k) = qq(i,j,k) + c1 &
-                        - d1*max(bmtj2(i,k,itu1,itu1),zero)
-                else
-                   qq(i,j,k) = qq(i,j,k) + c1
-                endif
+                        b1 = -c1m
+                        c1 = c10
+                        d1 = -c1p
+
+                        ! Update the central jacobian. For nonboundary cells this
+                        ! is simply c1. For boundary cells this is slightly more
+                        ! complicated, because the boundary conditions are treated
+                        ! implicitly and the off-diagonal terms b1 and d1 must be
+                        ! taken into account.
+                        ! The boundary conditions are only treated implicitly if
+                        ! the diagonal dominance of the matrix is increased.
+
+                        if (j == 2) then
+                            qq(i, j, k) = qq(i, j, k) + c1 &
+                                          - b1*max(bmtj1(i, k, itu1, itu1), zero)
+                        else if (j == jl) then
+                            qq(i, j, k) = qq(i, j, k) + c1 &
+                                          - d1*max(bmtj2(i, k, itu1, itu1), zero)
+                        else
+                            qq(i, j, k) = qq(i, j, k) + c1
+                        end if
 #endif
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-    !
-    !       Viscous terms in i-direction.
-    !
+        !
+        !       Viscous terms in i-direction.
+        !
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, nx*ny*nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii/nx, ny) + 2
+            k = ii/(nx*ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
-                ! Compute the metrics in xi-direction, i.e. along the
-                ! line i = constant.
-
-                voli  = one/vol(i,j,k)
-                volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-                volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-                xm = si(i-1,j,k,1)*volmi
-                ym = si(i-1,j,k,2)*volmi
-                zm = si(i-1,j,k,3)*volmi
-                xp = si(i,  j,k,1)*volpi
-                yp = si(i,  j,k,2)*volpi
-                zp = si(i,  j,k,3)*volpi
-
-                xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-                ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-                za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-                ttm = xm*xa + ym*ya + zm*za
-                ttp = xp*xa + yp*ya + zp*za
-
-                ! Computation of the viscous terms in xi-direction; note
-                ! that cross-derivatives are neglected, i.e. the mesh is
-                ! assumed to be orthogonal.
-                ! Furthermore, the grad(nu)**2 has been rewritten as
-                ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
-                ! The second derivative in xi-direction is constructed as
-                ! the central difference of the first order derivatives, i.e.
-                ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
-                ! In this way the metric can be taken into account.
-
-                ! Compute the diffusion coefficients multiplying the nodes
-                ! i+1, i and i-1 in the second derivative. Make sure that
-                ! these coefficients are nonnegative.
-
-                cnud = -rsaCb2*w(i,j,k,itu1)*cb3Inv
-                cam  =  ttm*cnud
-                cap  =  ttp*cnud
-
-                nutm = half*(w(i-1,j,k,itu1) + w(i,j,k,itu1))
-                nutp = half*(w(i+1,j,k,itu1) + w(i,j,k,itu1))
-                nu   = rlv(i,j,k)/w(i,j,k,irho)
-                num  = half*(rlv(i-1,j,k)/w(i-1,j,k,irho) + nu)
-                nup  = half*(rlv(i+1,j,k)/w(i+1,j,k,irho) + nu)
-                cdm  = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-                cdp  = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
-
-                c1m = max(cdm+cam, zero)
-                c1p = max(cdp+cap, zero)
-                c10 = c1m + c1p
-
-                ! Update the residual for this cell and store the possible
-                ! coefficients for the matrix in b1, c1 and d1.
-
-                scratch(i,j,k,idvt) = scratch(i,j,k,idvt)      + c1m*w(i-1,j,k,itu1) &
-                     - c10*w(i,j,k,itu1) + c1p*w(i+1,j,k,itu1)
+                        ! Compute the metrics in xi-direction, i.e. along the
+                        ! line i = constant.
+
+                        voli = one/vol(i, j, k)
+                        volmi = two/(vol(i, j, k) + vol(i - 1, j, k))
+                        volpi = two/(vol(i, j, k) + vol(i + 1, j, k))
+
+                        xm = si(i - 1, j, k, 1)*volmi
+                        ym = si(i - 1, j, k, 2)*volmi
+                        zm = si(i - 1, j, k, 3)*volmi
+                        xp = si(i, j, k, 1)*volpi
+                        yp = si(i, j, k, 2)*volpi
+                        zp = si(i, j, k, 3)*volpi
+
+                        xa = half*(si(i, j, k, 1) + si(i - 1, j, k, 1))*voli
+                        ya = half*(si(i, j, k, 2) + si(i - 1, j, k, 2))*voli
+                        za = half*(si(i, j, k, 3) + si(i - 1, j, k, 3))*voli
+                        ttm = xm*xa + ym*ya + zm*za
+                        ttp = xp*xa + yp*ya + zp*za
+
+                        ! Computation of the viscous terms in xi-direction; note
+                        ! that cross-derivatives are neglected, i.e. the mesh is
+                        ! assumed to be orthogonal.
+                        ! Furthermore, the grad(nu)**2 has been rewritten as
+                        ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
+                        ! The second derivative in xi-direction is constructed as
+                        ! the central difference of the first order derivatives, i.e.
+                        ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
+                        ! In this way the metric can be taken into account.
+
+                        ! Compute the diffusion coefficients multiplying the nodes
+                        ! i+1, i and i-1 in the second derivative. Make sure that
+                        ! these coefficients are nonnegative.
+
+                        cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
+                        cam = ttm*cnud
+                        cap = ttp*cnud
+
+                        nutm = half*(w(i - 1, j, k, itu1) + w(i, j, k, itu1))
+                        nutp = half*(w(i + 1, j, k, itu1) + w(i, j, k, itu1))
+                        nu = rlv(i, j, k)/w(i, j, k, irho)
+                        num = half*(rlv(i - 1, j, k)/w(i - 1, j, k, irho) + nu)
+                        nup = half*(rlv(i + 1, j, k)/w(i + 1, j, k, irho) + nu)
+                        cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
+                        cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+
+                        c1m = max(cdm + cam, zero)
+                        c1p = max(cdp + cap, zero)
+                        c10 = c1m + c1p
+
+                        ! Update the residual for this cell and store the possible
+                        ! coefficients for the matrix in b1, c1 and d1.
+
+                        scratch(i, j, k, idvt) = scratch(i, j, k, idvt) + c1m*w(i - 1, j, k, itu1) &
+                                                 - c10*w(i, j, k, itu1) + c1p*w(i + 1, j, k, itu1)
 #ifndef USE_TAPENADE
-                b1 = -c1m
-                c1 =  c10
-                d1 = -c1p
-
-                ! Update the central jacobian. For nonboundary cells this
-                ! is simply c1. For boundary cells this is slightly more
-                ! complicated, because the boundary conditions are treated
-                ! implicitly and the off-diagonal terms b1 and d1 must be
-                ! taken into account.
-                ! The boundary conditions are only treated implicitly if
-                ! the diagonal dominance of the matrix is increased.
-
-                if(i == 2) then
-                   qq(i,j,k) = qq(i,j,k) + c1 &
-                        - b1*max(bmti1(j,k,itu1,itu1),zero)
-                else if(i == il) then
-                   qq(i,j,k) = qq(i,j,k) + c1 &
-                        - d1*max(bmti2(j,k,itu1,itu1),zero)
-                else
-                   qq(i,j,k) = qq(i,j,k) + c1
-                endif
+                        b1 = -c1m
+                        c1 = c10
+                        d1 = -c1p
+
+                        ! Update the central jacobian. For nonboundary cells this
+                        ! is simply c1. For boundary cells this is slightly more
+                        ! complicated, because the boundary conditions are treated
+                        ! implicitly and the off-diagonal terms b1 and d1 must be
+                        ! taken into account.
+                        ! The boundary conditions are only treated implicitly if
+                        ! the diagonal dominance of the matrix is increased.
+
+                        if (i == 2) then
+                            qq(i, j, k) = qq(i, j, k) + c1 &
+                                          - b1*max(bmti1(j, k, itu1, itu1), zero)
+                        else if (i == il) then
+                            qq(i, j, k) = qq(i, j, k) + c1 &
+                                          - d1*max(bmti2(j, k, itu1, itu1), zero)
+                        else
+                            qq(i, j, k) = qq(i, j, k) + c1
+                        end if
 #endif
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-  end subroutine saViscous
+    end subroutine saViscous
 
-  subroutine saResScale
+    subroutine saResScale
 
-    !
-    !  Multiply the residual by the volume and store this in dw; this
-    ! * is done for monitoring reasons only. The multiplication with the
-    ! * volume is present to be consistent with the flow residuals; also
-    !  the negative value is taken, again to be consistent with the
-    ! * flow equations. Also multiply by iblank so that no updates occur
-    !  in holes or the overset boundary.
-    use blockPointers
-    implicit none
+        !
+        !  Multiply the residual by the volume and store this in dw; this
+        ! * is done for monitoring reasons only. The multiplication with the
+        ! * volume is present to be consistent with the flow residuals; also
+        !  the negative value is taken, again to be consistent with the
+        ! * flow equations. Also multiply by iblank so that no updates occur
+        !  in holes or the overset boundary.
+        use blockPointers
+        implicit none
 
-    ! Local variables
-    integer(kind=intType) :: i,j,k,ii
-    real(kind=realType) :: rblank
+        ! Local variables
+        integer(kind=intType) :: i, j, k, ii
+        real(kind=realType) :: rblank
 
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, nx*ny*nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii/nx, ny) + 2
+            k = ii/(nx*ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
-                rblank = max(real(iblank(i,j,k), realType), zero)
-                dw(i,j,k,itu1) = -volRef(i,j,k)*scratch(i,j,k,idvt)*rblank
+                        rblank = max(real(iblank(i, j, k), realType), zero)
+                        dw(i, j, k, itu1) = -volRef(i, j, k)*scratch(i, j, k, idvt)*rblank
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-  end subroutine saResScale
+    end subroutine saResScale
 
 #ifndef USE_TAPENADE
-  subroutine saSolve
-    !
-    !  saSolve solves the turbulent transport equation for the
-    !  original Spalart-Allmaras model in a decoupled manner using
-    !  a diagonal dominant ADI-scheme.
-    use blockPointers
-    use inputIteration
-    use inputPhysics
-    use paramTurb
-    use turbutils
-    use turbCurveFits, only : curveTupYp
-    implicit none
-
-    integer(kind=intType) :: i, j, k, nn, ii
-    real(kind=realType), dimension(2:max(kl,il,jl)) :: bb, cc, dd, ff
-    real(kind=realType) :: voli, volmi, volpi, xm, ym, zm, xp, yp, zp
-    real(kind=realType) :: xa, ya, za, ttm, ttp, cnud, cam, cap
-    real(kind=realType) :: nutm, nutp, num, nup, cdm, cdp
-    real(kind=realType) :: c1m, c1p, c10, b1, c1, d1, qs
-    real(kind=realType) :: uu, um, up, factor, f, tu1p(1), nu, rblank
-
-    logical, dimension(2:jl,2:kl), target :: flagI2, flagIl
-    logical, dimension(2:il,2:kl), target :: flagJ2, flagJl
-    logical, dimension(2:il,2:jl), target :: flagK2, flagKl
-    logical, dimension(:,:), pointer :: flag
-
-    ! Initialize the wall function flags to .false.
-
-    flagI2 = .false.
-    flagIl = .false.
-    flagJ2 = .false.
-    flagJl = .false.
-    flagK2 = .false.
-    flagKl = .false.
-
-    ! Modify the rhs of the 1st internal cell, if wall functions
-    ! are used; their value is determined by the table.
-
-    testWallFunctions: if( wallFunctions ) then
-
-       bocos: do nn=1,nViscBocos
-
-          ! Determine the block face on which the subface is located
-          ! and set some variables. As flag points to the entire array
-          ! flagI2, etc., its starting indices are the starting indices
-          ! of its target and not 1.
-
-          select case (BCFaceID(nn))
-          case (iMin)
-             flag    => flagI2
-             ddw     => dw(2,1:,1:,1:); ddvt => scratch(2,1:,1:,idvt:)
-             ww      => w(2,1:,1:,1:);  rrlv => rlv(2,1:,1:)
-             dd2Wall => d2Wall(2,:,:)
-
-          case (iMax)
-             flag    => flagIl
-             ddw     => dw(il,1:,1:,1:); ddvt => scratch(il,1:,1:,idvt:)
-             ww      => w(il,1:,1:,1:);  rrlv => rlv(il,1:,1:)
-             dd2Wall => d2Wall(il,:,:)
-
-          case (jMin)
-             flag    => flagJ2
-             ddw     => dw(1:,2,1:,1:); ddvt => scratch(1:,2,1:,idvt:)
-             ww      => w(1:,2,1:,1:);  rrlv => rlv(1:,2,1:)
-             dd2Wall => d2Wall(:,2,:)
-
-          case (jMax)
-             flag    => flagJl
-             ddw     => dw(1:,jl,1:,1:); ddvt => scratch(1:,jl,1:,idvt:)
-             ww      => w(1:,jl,1:,1:);  rrlv => rlv(1:,jl,1:)
-             dd2Wall => d2Wall(:,jl,:)
-
-          case (kMin)
-             flag    => flagK2
-             ddw     => dw(1:,1:,2,1:); ddvt => scratch(1:,1:,2,idvt:)
-             ww      => w(1:,1:,2,1:);  rrlv => rlv(1:,1:,2)
-             dd2Wall => d2Wall(:,:,2)
-
-          case (kMax)
-             flag    => flagKl
-             ddw     => dw(1:,1:,kl,:); ddvt => scratch(1:,1:,kl,idvt:)
-             ww      => w(1:,1:,kl,1:); rrlv => rlv(1:,1:,kl)
-             dd2Wall => d2Wall(:,:,kl)
-
-          end select
-
-          ! Loop over the owned faces of this subface. Therefore the
-          ! nodal range of BCData must be used. The offset of +1 is
-          ! present, because the starting index of the cell range is
-          ! 1 larger than the starting index of the nodal range.
-
-          do j=(BCData(nn)%jnBeg+1),BCData(nn)%jnEnd
-             do i=(BCData(nn)%inBeg+1),BCData(nn)%inEnd
-
-                ! Set ddw to zero.
-
-                ddw(i,j,itu1) = zero
-
-                ! Enforce nu tilde in the 1st internal cell from the
-                ! wall function table. There is an offset of -1 in the
-                ! wall distance. Note that the offset compared to the
-                ! current value must be stored, because dvt contains
-                ! the update. Also note that the curve fits contain the
-                ! non-dimensional value.
-
-                yp = ww(i,j,irho)*dd2Wall(i-1,j-1) &
-                     * viscSubface(nn)%utau(i,j)/rrlv(i,j)
-
-                call curveTupYp(tu1p, yp, itu1, itu1)
-                ddvt(i,j,1) = tu1p(1)*rrlv(i,j)/ww(i,j,irho) - ww(i,j,itu1)
-
-                ! Set the wall flag to .true.
-
-                flag(i,j) = .true.
-
-             enddo
-          enddo
-
-       enddo bocos
-    endif testWallFunctions
-
-    ! For implicit relaxation take the local time step into account,
-    ! where dt is the inverse of the central jacobian times the cfl
-    ! number. The following system is solved:
-    ! (I/dt + cc + bb + dd)*dw = rhs, in which I/dt = cc/cfl. As in
-    ! the rest of the algorithm only the modified central jacobian is
-    ! used, stored it now.
-
-    ! Compute the factor multiplying the central jacobian, which
-    ! is 1 + 1/cfl (implicit relaxation only).
-
-    factor = one
-    if(turbRelax == turbRelaxImplicit) &
-         factor = one + (one-alfaTurb)/alfaTurb
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             qq(i,j,k) = factor*qq(i,j,k)
-
-             ! Set qq to 1 if the value is determined by the
-             ! wall function table.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) qq(i,j,k) = one
-
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the grid velocity to zero. This value will be used
-    ! if the block is not moving.
-
-    qs = zero
-    !
-    !       dd-ADI step in j-direction. There is no particular reason to
-    !       start in j-direction, it just happened to be so. As we solve
-    !       in j-direction, the j-loop is the innermost loop.
-    !
-    do k=2,kl
-       do i=2,il
-          do j=2,jl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the j-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             cnud = -rsaCb2*w(i,j,k,itu1)*cb3Inv
-             cam  =  ttm*cnud
-             cap  =  ttp*cnud
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in j-direction.
-
-             nutm = half*(w(i,j-1,k,itu1) + w(i,j,k,itu1))
-             nutp = half*(w(i,j+1,k,itu1) + w(i,j,k,itu1))
-             nu   = rlv(i,j,k)/w(i,j,k,irho)
-             num  = half*(rlv(i,j-1,k)/w(i,j-1,k,irho) + nu)
-             nup  = half*(rlv(i,j+1,k)/w(i,j+1,k,irho) + nu)
-             cdm  = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-             cdp  = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
-
-             c1m = max(cdm+cam, zero)
-             c1p = max(cdp+cap, zero)
-
-             bb(j) = -c1m
-             dd(j) = -c1p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of j and j-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceJ(i,j,k) + sFaceJ(i,j-1,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! j-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(j) = bb(j) - up
-             dd(j) = dd(j) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = max(real(iblank(i,j,k), realType), zero)
-
-             cc(j) = qq(i,j,k)
-             ff(j) = scratch(i,j,k,idvt)*rblank
-
-             bb(j) = bb(j)*rblank
-             dd(j) = dd(j)*rblank
-
-             ! Set the off diagonal terms to zero if the wall is flagged.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(j) = zero
-                dd(j) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in j-direction.
-          ! First the backward sweep to eliMinate the upper diagonal dd.
-
-          do j=ny,2,-1
-             f     = dd(j)/cc(j+1)
-             cc(j) = cc(j) - f*bb(j+1)
-             ff(j) = ff(j) - f*ff(j+1)
-          enddo
-
-          ! The matrix is now in lower block bi-diagonal form.
-          ! Perform a forward sweep to compute the solution.
-
-          ff(2) = ff(2)/cc(2)
-          do j=3,jl
-             ff(j) = ff(j) - bb(j)*ff(j-1)
-             ff(j) = ff(j)/cc(j)
-          enddo
-
-          ! Determine the new rhs for the next direction.
-
-          do j=2,jl
-             scratch(i,j,k,idvt) = ff(j)*qq(i,j,k)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-ADI step in i-direction. As we solve in i-direction, the
-    !       i-loop is the innermost loop.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the i-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
-
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             cnud = -rsaCb2*w(i,j,k,itu1)*cb3Inv
-             cam  =  ttm*cnud
-             cap  =  ttp*cnud
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in i-direction.
-
-             nutm = half*(w(i-1,j,k,itu1) + w(i,j,k,itu1))
-             nutp = half*(w(i+1,j,k,itu1) + w(i,j,k,itu1))
-             nu   = rlv(i,j,k)/w(i,j,k,irho)
-             num  = half*(rlv(i-1,j,k)/w(i-1,j,k,irho) + nu)
-             nup  = half*(rlv(i+1,j,k)/w(i+1,j,k,irho) + nu)
-             cdm  = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-             cdp  = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
-
-             c1m = max(cdm+cam, zero)
-             c1p = max(cdp+cap, zero)
-
-             bb(i) = -c1m
-             dd(i) = -c1p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of i and i-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceI(i,j,k) + sFaceI(i-1,j,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! i-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(i) = bb(i) - up
-             dd(i) = dd(i) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = max(real(iblank(i,j,k), realType), zero)
-
-             cc(i) = qq(i,j,k)
-             ff(i) = scratch(i,j,k,idvt)*rblank
-
-             bb(i) = bb(i)*rblank
-             dd(i) = dd(i)*rblank
-
-             ! Set the off diagonal terms to zero if the wall is flagged.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(i) = zero
-                dd(i) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in i-direction.
-          ! First the backward sweep to eliMinate the upper diagonal dd.
-
-          do i=nx,2,-1
-             f     = dd(i)/cc(i+1)
-             cc(i) = cc(i) - f*bb(i+1)
-             ff(i) = ff(i) - f*ff(i+1)
-          enddo
-
-          ! The matrix is now in lower block bi-diagonal form.
-          ! Perform a forward sweep to compute the solution.
-
-          ff(2) = ff(2)/cc(2)
-          do i=3,il
-             ff(i) = ff(i) - bb(i)*ff(i-1)
-             ff(i) = ff(i)/cc(i)
-          enddo
-
-          ! Determine the new rhs for the next direction.
-
-          do i=2,il
-             scratch(i,j,k,idvt) = ff(i)*qq(i,j,k)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-ADI step in k-direction. As we solve in k-direction, the
-    !       k-loop is the innermost loop.
-    !
-    do j=2,jl
-       do i=2,il
-          do k=2,kl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the k-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             cnud = -rsaCb2*w(i,j,k,itu1)*cb3Inv
-             cam  =  ttm*cnud
-             cap  =  ttp*cnud
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in k-direction.
-
-             nutm = half*(w(i,j,k-1,itu1) + w(i,j,k,itu1))
-             nutp = half*(w(i,j,k+1,itu1) + w(i,j,k,itu1))
-             nu   = rlv(i,j,k)/w(i,j,k,irho)
-             num  = half*(rlv(i,j,k-1)/w(i,j,k-1,irho) + nu)
-             nup  = half*(rlv(i,j,k+1)/w(i,j,k+1,irho) + nu)
-             cdm  = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-             cdp  = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
-
-             c1m = max(cdm+cam, zero)
-             c1p = max(cdp+cap, zero)
-
-             bb(k) = -c1m
-             dd(k) = -c1p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of k and k-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceK(i,j,k) + sFaceK(i,j,k-1))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! k-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(k) = bb(k) - up
-             dd(k) = dd(k) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = max(real(iblank(i,j,k), realType), zero)
-
-             cc(k) = qq(i,j,k)
-             ff(k) = scratch(i,j,k,idvt)*rblank
-
-             bb(k) = bb(k)*rblank
-             dd(k) = dd(k)*rblank
-
-             ! Set the off diagonal terms to zero if the wall is flagged.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(k) = zero
-                dd(k) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in k-direction.
-          ! First the backward sweep to eliMinate the upper diagonal dd.
-
-          do k=nz,2,-1
-             f     = dd(k)/cc(k+1)
-             cc(k) = cc(k) - f*bb(k+1)
-             ff(k) = ff(k) - f*ff(k+1)
-          enddo
-
-          ! The matrix is now in lower block bi-diagonal form.
-          ! Perform a forward sweep to compute the solution.
-
-          ff(2) = ff(2)/cc(2)
-          do k=3,kl
-             ff(k) = ff(k) - bb(k)*ff(k-1)
-             ff(k) = ff(k)/cc(k)
-          enddo
-
-          ! Store the update in dvt.
-
-          do k=2,kl
-             scratch(i,j,k,idvt) = ff(k)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       Update the turbulent variables. For explicit relaxation the
-    !       update must be relaxed; for implicit relaxation this has been
-    !       done via the time step.
-    !
-    factor = one
-    if(turbRelax == turbRelaxExplicit) factor = alfaTurb
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             w(i,j,k,itu1) = w(i,j,k,itu1) + factor*scratch(i,j,k,idvt)
-             w(i,j,k,itu1) = max(w(i,j,k,itu1), zero)
-          enddo
-       enddo
-    enddo
-
-  end subroutine saSolve
+    subroutine saSolve
+        !
+        !  saSolve solves the turbulent transport equation for the
+        !  original Spalart-Allmaras model in a decoupled manner using
+        !  a diagonal dominant ADI-scheme.
+        use blockPointers
+        use inputIteration
+        use inputPhysics
+        use paramTurb
+        use turbutils
+        use turbCurveFits, only: curveTupYp
+        implicit none
+
+        integer(kind=intType) :: i, j, k, nn, ii
+        real(kind=realType), dimension(2:max(kl, il, jl)) :: bb, cc, dd, ff
+        real(kind=realType) :: voli, volmi, volpi, xm, ym, zm, xp, yp, zp
+        real(kind=realType) :: xa, ya, za, ttm, ttp, cnud, cam, cap
+        real(kind=realType) :: nutm, nutp, num, nup, cdm, cdp
+        real(kind=realType) :: c1m, c1p, c10, b1, c1, d1, qs
+        real(kind=realType) :: uu, um, up, factor, f, tu1p(1), nu, rblank
+
+        logical, dimension(2:jl, 2:kl), target :: flagI2, flagIl
+        logical, dimension(2:il, 2:kl), target :: flagJ2, flagJl
+        logical, dimension(2:il, 2:jl), target :: flagK2, flagKl
+        logical, dimension(:, :), pointer :: flag
+
+        ! Initialize the wall function flags to .false.
+
+        flagI2 = .false.
+        flagIl = .false.
+        flagJ2 = .false.
+        flagJl = .false.
+        flagK2 = .false.
+        flagKl = .false.
+
+        ! Modify the rhs of the 1st internal cell, if wall functions
+        ! are used; their value is determined by the table.
+
+        testWallFunctions: if (wallFunctions) then
+
+            bocos: do nn = 1, nViscBocos
+
+                ! Determine the block face on which the subface is located
+                ! and set some variables. As flag points to the entire array
+                ! flagI2, etc., its starting indices are the starting indices
+                ! of its target and not 1.
+
+                select case (BCFaceID(nn))
+                case (iMin)
+                    flag => flagI2
+                    ddw => dw(2, 1:, 1:, 1:); ddvt => scratch(2, 1:, 1:, idvt:)
+                    ww => w(2, 1:, 1:, 1:); rrlv => rlv(2, 1:, 1:)
+                    dd2Wall => d2Wall(2, :, :)
+
+                case (iMax)
+                    flag => flagIl
+                    ddw => dw(il, 1:, 1:, 1:); ddvt => scratch(il, 1:, 1:, idvt:)
+                    ww => w(il, 1:, 1:, 1:); rrlv => rlv(il, 1:, 1:)
+                    dd2Wall => d2Wall(il, :, :)
+
+                case (jMin)
+                    flag => flagJ2
+                    ddw => dw(1:, 2, 1:, 1:); ddvt => scratch(1:, 2, 1:, idvt:)
+                    ww => w(1:, 2, 1:, 1:); rrlv => rlv(1:, 2, 1:)
+                    dd2Wall => d2Wall(:, 2, :)
+
+                case (jMax)
+                    flag => flagJl
+                    ddw => dw(1:, jl, 1:, 1:); ddvt => scratch(1:, jl, 1:, idvt:)
+                    ww => w(1:, jl, 1:, 1:); rrlv => rlv(1:, jl, 1:)
+                    dd2Wall => d2Wall(:, jl, :)
+
+                case (kMin)
+                    flag => flagK2
+                    ddw => dw(1:, 1:, 2, 1:); ddvt => scratch(1:, 1:, 2, idvt:)
+                    ww => w(1:, 1:, 2, 1:); rrlv => rlv(1:, 1:, 2)
+                    dd2Wall => d2Wall(:, :, 2)
+
+                case (kMax)
+                    flag => flagKl
+                    ddw => dw(1:, 1:, kl, :); ddvt => scratch(1:, 1:, kl, idvt:)
+                    ww => w(1:, 1:, kl, 1:); rrlv => rlv(1:, 1:, kl)
+                    dd2Wall => d2Wall(:, :, kl)
+
+                end select
+
+                ! Loop over the owned faces of this subface. Therefore the
+                ! nodal range of BCData must be used. The offset of +1 is
+                ! present, because the starting index of the cell range is
+                ! 1 larger than the starting index of the nodal range.
+
+                do j = (BCData(nn)%jnBeg + 1), BCData(nn)%jnEnd
+                    do i = (BCData(nn)%inBeg + 1), BCData(nn)%inEnd
+
+                        ! Set ddw to zero.
+
+                        ddw(i, j, itu1) = zero
+
+                        ! Enforce nu tilde in the 1st internal cell from the
+                        ! wall function table. There is an offset of -1 in the
+                        ! wall distance. Note that the offset compared to the
+                        ! current value must be stored, because dvt contains
+                        ! the update. Also note that the curve fits contain the
+                        ! non-dimensional value.
+
+                        yp = ww(i, j, irho)*dd2Wall(i - 1, j - 1) &
+                             *viscSubface(nn)%utau(i, j)/rrlv(i, j)
+
+                        call curveTupYp(tu1p, yp, itu1, itu1)
+                        ddvt(i, j, 1) = tu1p(1)*rrlv(i, j)/ww(i, j, irho) - ww(i, j, itu1)
+
+                        ! Set the wall flag to .true.
+
+                        flag(i, j) = .true.
+
+                    end do
+                end do
+
+            end do bocos
+        end if testWallFunctions
+
+        ! For implicit relaxation take the local time step into account,
+        ! where dt is the inverse of the central jacobian times the cfl
+        ! number. The following system is solved:
+        ! (I/dt + cc + bb + dd)*dw = rhs, in which I/dt = cc/cfl. As in
+        ! the rest of the algorithm only the modified central jacobian is
+        ! used, stored it now.
+
+        ! Compute the factor multiplying the central jacobian, which
+        ! is 1 + 1/cfl (implicit relaxation only).
+
+        factor = one
+        if (turbRelax == turbRelaxImplicit) &
+            factor = one + (one - alfaTurb)/alfaTurb
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    qq(i, j, k) = factor*qq(i, j, k)
+
+                    ! Set qq to 1 if the value is determined by the
+                    ! wall function table.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) qq(i, j, k) = one
+
+                end do
+            end do
+        end do
+
+        ! Initialize the grid velocity to zero. This value will be used
+        ! if the block is not moving.
+
+        qs = zero
+        !
+        !       dd-ADI step in j-direction. There is no particular reason to
+        !       start in j-direction, it just happened to be so. As we solve
+        !       in j-direction, the j-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do i = 2, il
+                do j = 2, jl
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the j-loop to build the residual for
+                    ! the comments.
+
+                    voli = one/vol(i, j, k)
+                    volmi = two/(vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two/(vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1)*volmi
+                    ym = sj(i, j - 1, k, 2)*volmi
+                    zm = sj(i, j - 1, k, 3)*volmi
+                    xp = sj(i, j, k, 1)*volpi
+                    yp = sj(i, j, k, 2)*volpi
+                    zp = sj(i, j, k, 3)*volpi
+
+                    xa = half*(sj(i, j, k, 1) + sj(i, j - 1, k, 1))*voli
+                    ya = half*(sj(i, j, k, 2) + sj(i, j - 1, k, 2))*voli
+                    za = half*(sj(i, j, k, 3) + sj(i, j - 1, k, 3))*voli
+                    ttm = xm*xa + ym*ya + zm*za
+                    ttp = xp*xa + yp*ya + zp*za
+
+                    cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
+                    cam = ttm*cnud
+                    cap = ttp*cnud
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in j-direction.
+
+                    nutm = half*(w(i, j - 1, k, itu1) + w(i, j, k, itu1))
+                    nutp = half*(w(i, j + 1, k, itu1) + w(i, j, k, itu1))
+                    nu = rlv(i, j, k)/w(i, j, k, irho)
+                    num = half*(rlv(i, j - 1, k)/w(i, j - 1, k, irho) + nu)
+                    nup = half*(rlv(i, j + 1, k)/w(i, j + 1, k, irho) + nu)
+                    cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
+                    cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+
+                    c1m = max(cdm + cam, zero)
+                    c1p = max(cdp + cap, zero)
+
+                    bb(j) = -c1m
+                    dd(j) = -c1p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of j and j-1,
+
+                    if (addGridVelocities) &
+                        qs = half*(sFaceJ(i, j, k) + sFaceJ(i, j - 1, k))*voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! j-direction. First order approximation.
+
+                    uu = xa*w(i, j, k, ivx) + ya*w(i, j, k, ivy) + za*w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(j) = bb(j) - up
+                    dd(j) = dd(j) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = max(real(iblank(i, j, k), realType), zero)
+
+                    cc(j) = qq(i, j, k)
+                    ff(j) = scratch(i, j, k, idvt)*rblank
+
+                    bb(j) = bb(j)*rblank
+                    dd(j) = dd(j)*rblank
+
+                    ! Set the off diagonal terms to zero if the wall is flagged.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(j) = zero
+                        dd(j) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in j-direction.
+                ! First the backward sweep to eliMinate the upper diagonal dd.
+
+                do j = ny, 2, -1
+                    f = dd(j)/cc(j + 1)
+                    cc(j) = cc(j) - f*bb(j + 1)
+                    ff(j) = ff(j) - f*ff(j + 1)
+                end do
+
+                ! The matrix is now in lower block bi-diagonal form.
+                ! Perform a forward sweep to compute the solution.
+
+                ff(2) = ff(2)/cc(2)
+                do j = 3, jl
+                    ff(j) = ff(j) - bb(j)*ff(j - 1)
+                    ff(j) = ff(j)/cc(j)
+                end do
+
+                ! Determine the new rhs for the next direction.
+
+                do j = 2, jl
+                    scratch(i, j, k, idvt) = ff(j)*qq(i, j, k)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-ADI step in i-direction. As we solve in i-direction, the
+        !       i-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the i-loop to build the residual for
+                    ! the comments.
+
+                    voli = one/vol(i, j, k)
+                    volmi = two/(vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two/(vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1)*volmi
+                    ym = si(i - 1, j, k, 2)*volmi
+                    zm = si(i - 1, j, k, 3)*volmi
+                    xp = si(i, j, k, 1)*volpi
+                    yp = si(i, j, k, 2)*volpi
+                    zp = si(i, j, k, 3)*volpi
+
+                    xa = half*(si(i, j, k, 1) + si(i - 1, j, k, 1))*voli
+                    ya = half*(si(i, j, k, 2) + si(i - 1, j, k, 2))*voli
+                    za = half*(si(i, j, k, 3) + si(i - 1, j, k, 3))*voli
+                    ttm = xm*xa + ym*ya + zm*za
+                    ttp = xp*xa + yp*ya + zp*za
+
+                    cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
+                    cam = ttm*cnud
+                    cap = ttp*cnud
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in i-direction.
+
+                    nutm = half*(w(i - 1, j, k, itu1) + w(i, j, k, itu1))
+                    nutp = half*(w(i + 1, j, k, itu1) + w(i, j, k, itu1))
+                    nu = rlv(i, j, k)/w(i, j, k, irho)
+                    num = half*(rlv(i - 1, j, k)/w(i - 1, j, k, irho) + nu)
+                    nup = half*(rlv(i + 1, j, k)/w(i + 1, j, k, irho) + nu)
+                    cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
+                    cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+
+                    c1m = max(cdm + cam, zero)
+                    c1p = max(cdp + cap, zero)
+
+                    bb(i) = -c1m
+                    dd(i) = -c1p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of i and i-1,
+
+                    if (addGridVelocities) &
+                        qs = half*(sFaceI(i, j, k) + sFaceI(i - 1, j, k))*voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! i-direction. First order approximation.
+
+                    uu = xa*w(i, j, k, ivx) + ya*w(i, j, k, ivy) + za*w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(i) = bb(i) - up
+                    dd(i) = dd(i) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = max(real(iblank(i, j, k), realType), zero)
+
+                    cc(i) = qq(i, j, k)
+                    ff(i) = scratch(i, j, k, idvt)*rblank
+
+                    bb(i) = bb(i)*rblank
+                    dd(i) = dd(i)*rblank
+
+                    ! Set the off diagonal terms to zero if the wall is flagged.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(i) = zero
+                        dd(i) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in i-direction.
+                ! First the backward sweep to eliMinate the upper diagonal dd.
+
+                do i = nx, 2, -1
+                    f = dd(i)/cc(i + 1)
+                    cc(i) = cc(i) - f*bb(i + 1)
+                    ff(i) = ff(i) - f*ff(i + 1)
+                end do
+
+                ! The matrix is now in lower block bi-diagonal form.
+                ! Perform a forward sweep to compute the solution.
+
+                ff(2) = ff(2)/cc(2)
+                do i = 3, il
+                    ff(i) = ff(i) - bb(i)*ff(i - 1)
+                    ff(i) = ff(i)/cc(i)
+                end do
+
+                ! Determine the new rhs for the next direction.
+
+                do i = 2, il
+                    scratch(i, j, k, idvt) = ff(i)*qq(i, j, k)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-ADI step in k-direction. As we solve in k-direction, the
+        !       k-loop is the innermost loop.
+        !
+        do j = 2, jl
+            do i = 2, il
+                do k = 2, kl
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the k-loop to build the residual for
+                    ! the comments.
+
+                    voli = one/vol(i, j, k)
+                    volmi = two/(vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two/(vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1)*volmi
+                    ym = sk(i, j, k - 1, 2)*volmi
+                    zm = sk(i, j, k - 1, 3)*volmi
+                    xp = sk(i, j, k, 1)*volpi
+                    yp = sk(i, j, k, 2)*volpi
+                    zp = sk(i, j, k, 3)*volpi
+
+                    xa = half*(sk(i, j, k, 1) + sk(i, j, k - 1, 1))*voli
+                    ya = half*(sk(i, j, k, 2) + sk(i, j, k - 1, 2))*voli
+                    za = half*(sk(i, j, k, 3) + sk(i, j, k - 1, 3))*voli
+                    ttm = xm*xa + ym*ya + zm*za
+                    ttp = xp*xa + yp*ya + zp*za
+
+                    cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
+                    cam = ttm*cnud
+                    cap = ttp*cnud
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in k-direction.
+
+                    nutm = half*(w(i, j, k - 1, itu1) + w(i, j, k, itu1))
+                    nutp = half*(w(i, j, k + 1, itu1) + w(i, j, k, itu1))
+                    nu = rlv(i, j, k)/w(i, j, k, irho)
+                    num = half*(rlv(i, j, k - 1)/w(i, j, k - 1, irho) + nu)
+                    nup = half*(rlv(i, j, k + 1)/w(i, j, k + 1, irho) + nu)
+                    cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
+                    cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+
+                    c1m = max(cdm + cam, zero)
+                    c1p = max(cdp + cap, zero)
+
+                    bb(k) = -c1m
+                    dd(k) = -c1p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of k and k-1,
+
+                    if (addGridVelocities) &
+                        qs = half*(sFaceK(i, j, k) + sFaceK(i, j, k - 1))*voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! k-direction. First order approximation.
+
+                    uu = xa*w(i, j, k, ivx) + ya*w(i, j, k, ivy) + za*w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(k) = bb(k) - up
+                    dd(k) = dd(k) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = max(real(iblank(i, j, k), realType), zero)
+
+                    cc(k) = qq(i, j, k)
+                    ff(k) = scratch(i, j, k, idvt)*rblank
+
+                    bb(k) = bb(k)*rblank
+                    dd(k) = dd(k)*rblank
+
+                    ! Set the off diagonal terms to zero if the wall is flagged.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(k) = zero
+                        dd(k) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in k-direction.
+                ! First the backward sweep to eliMinate the upper diagonal dd.
+
+                do k = nz, 2, -1
+                    f = dd(k)/cc(k + 1)
+                    cc(k) = cc(k) - f*bb(k + 1)
+                    ff(k) = ff(k) - f*ff(k + 1)
+                end do
+
+                ! The matrix is now in lower block bi-diagonal form.
+                ! Perform a forward sweep to compute the solution.
+
+                ff(2) = ff(2)/cc(2)
+                do k = 3, kl
+                    ff(k) = ff(k) - bb(k)*ff(k - 1)
+                    ff(k) = ff(k)/cc(k)
+                end do
+
+                ! Store the update in dvt.
+
+                do k = 2, kl
+                    scratch(i, j, k, idvt) = ff(k)
+                end do
+
+            end do
+        end do
+        !
+        !       Update the turbulent variables. For explicit relaxation the
+        !       update must be relaxed; for implicit relaxation this has been
+        !       done via the time step.
+        !
+        factor = one
+        if (turbRelax == turbRelaxExplicit) factor = alfaTurb
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    w(i, j, k, itu1) = w(i, j, k, itu1) + factor*scratch(i, j, k, idvt)
+                    w(i, j, k, itu1) = max(w(i, j, k, itu1), zero)
+                end do
+            end do
+        end do
+
+    end subroutine saSolve
 #endif
 end module sa
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 085305ca6..e21230950 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -2,1420 +2,1413 @@ module turbBCRoutines
 
 contains
 #ifndef USE_TAPENADE
-  subroutine applyAllTurbBC(secondHalo)
-    !
-    !       applyAllTurbBC applies all boundary conditions to the
-    !       turbulent transport equations for the all blocks on the grid
-    !       level currentLevel.
-    !
-    use constants
-    use blockPointers
-    use inputTimeSpectral
-    use iteration
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: secondHalo
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, sps
-
-    ! Loop over the number of spectral modes and local blocks.
-
-    do sps=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-
-          ! Set the pointers to this block. The min function is present
-          ! because this routine can be called from movfin.
-
-          call setPointers(nn, min(currentLevel,groundLevel), sps)
-
-          ! Set the arrays for the boundary condition treatment
-          ! and set the turbulent halo values.
-
-          call bcTurbTreatment
-          call applyAllTurbBCThisBlock(secondHalo)
-
-       enddo
-    enddo
-
-  end subroutine applyAllTurbBC
+    subroutine applyAllTurbBC(secondHalo)
+        !
+        !       applyAllTurbBC applies all boundary conditions to the
+        !       turbulent transport equations for the all blocks on the grid
+        !       level currentLevel.
+        !
+        use constants
+        use blockPointers
+        use inputTimeSpectral
+        use iteration
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: secondHalo
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, sps
+
+        ! Loop over the number of spectral modes and local blocks.
+
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+
+                ! Set the pointers to this block. The min function is present
+                ! because this routine can be called from movfin.
+
+                call setPointers(nn, min(currentLevel, groundLevel), sps)
+
+                ! Set the arrays for the boundary condition treatment
+                ! and set the turbulent halo values.
+
+                call bcTurbTreatment
+                call applyAllTurbBCThisBlock(secondHalo)
+
+            end do
+        end do
+
+    end subroutine applyAllTurbBC
 #endif
-  !      ==================================================================
-
-  subroutine applyAllTurbBCThisBlock(secondHalo)
-    !
-    !       applyAllTurbBCThisBlock sets the halo values of the
-    !       turbulent variables and eddy viscosity for the block the
-    !       variables in blockPointers currently point to.
-    !
-    use constants
-    use blockPointers
-    use flowVarRefState
-    use inputPhysics
-
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: secondHalo
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, i, j, l, m
-
-    real(kind=realType), dimension(:,:,:,:), pointer :: bmt
-    real(kind=realType), dimension(:,:,:),   pointer :: bvt, ww1, ww2
-
-    ! Loop over the boundary condition subfaces of this block.
-
-    bocos: do nn=1,nBocos
-
-       ! Loop over the faces and set the state in
-       ! the turbulent halo cells.
-
-       if( wallFunctions ) then
+    !      ==================================================================
+
+    subroutine applyAllTurbBCThisBlock(secondHalo)
+        !
+        !       applyAllTurbBCThisBlock sets the halo values of the
+        !       turbulent variables and eddy viscosity for the block the
+        !       variables in blockPointers currently point to.
+        !
+        use constants
+        use blockPointers
+        use flowVarRefState
+        use inputPhysics
+
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: secondHalo
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, i, j, l, m
+
+        real(kind=realType), dimension(:, :, :, :), pointer :: bmt
+        real(kind=realType), dimension(:, :, :), pointer :: bvt, ww1, ww2
+
+        ! Loop over the boundary condition subfaces of this block.
+
+        bocos: do nn = 1, nBocos
+
+            ! Loop over the faces and set the state in
+            ! the turbulent halo cells.
+
+            if (wallFunctions) then
 #ifndef USE_TAPENADE
-          ! Determine the block face on which this subface is located
-          ! and set some pointers accordingly.
-
-          select case (BCFaceID(nn))
-          case (iMin)
-             bmt => bmti1; bvt => bvti1
-             ww1 => w(1 ,1:,1:,:); ww2 => w(2 ,1:,1:,:)
-
-          case (iMax)
-             bmt => bmti2; bvt => bvti2
-             ww1 => w(ie,1:,1:,:); ww2 => w(il,1:,1:,:)
-
-          case (jMin)
-             bmt => bmtj1; bvt => bvtj1
-             ww1 => w(1:,1 ,1:,:); ww2 => w(1:,2 ,1:,:)
-
-          case (jMax)
-             bmt => bmtj2; bvt => bvtj2
-             ww1 => w(1:,je,1:,:); ww2 => w(1:,jl,1:,:)
-
-          case (kMin)
-             bmt => bmtk1; bvt => bvtk1
-             ww1 => w(1:,1:,1 ,:); ww2 => w(1:,1:,2 ,:)
-
-          case (kMax)
-             bmt => bmtk2; bvt => bvtk2
-             ww1 => w(1:,1:,ke,:); ww2 => w(1:,1:,kl,:)
-          end select
-
-          ! Write an approximate value into the halo cell for
-          ! postprocessing (it is not used in computation).
-
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                do l=nt1,nt2
-                   ww1(i,j,l) = bvt(i,j,l) - bmt(i,j,l,l)*ww2(i,j,l)
-                   do m=nt1,nt2
-                      if(m /= l .and. bmt(i,j,l,m) /= zero) &
-                           ww1(i,j,l) = ww2(i,j,l)
-                   enddo
-                enddo
-             enddo
-          enddo
-#endif
-       else
-
-          select case (BCFaceID(nn))
-          case (iMin)
-             do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-                do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                   do l=nt1,nt2
-                      w(1,i,j,l) = bvti1(i,j,l)
-                      do m=nt1,nt2
-                         w(1,i,j,l) = w(1,i,j,l) - bmti1(i,j,l,m)*w(2,i,j,m)
-                      enddo
-                   enddo
-                enddo
-             enddo
-
-          case (iMax)
-             do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-                do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                   do l=nt1,nt2
-                      w(ie,i,j,l) = bvti2(i,j,l)
-                      do m=nt1,nt2
-                         w(ie,i,j,l) = w(ie,i,j,l) - bmti2(i,j,l,m)*w(il,i,j,m)
-                      enddo
-                   enddo
-                enddo
-             enddo
-
-          case (jMin)
-             do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-                do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                   do l=nt1,nt2
-                      w(i,1,j,l) = bvtj1(i,j,l)
-                      do m=nt1,nt2
-                         w(i,1,j,l) = w(i,1,j,l) - bmtj1(i,j,l,m)*w(i,2,j,m)
-                      enddo
-                   enddo
-                enddo
-             enddo
-
-          case (jMax)
-             do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-                do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                   do l=nt1,nt2
-                      w(i,je,j,l) = bvtj2(i,j,l)
-                      do m=nt1,nt2
-                         w(i,je,j,l) = w(i,je,j,l) - bmtj2(i,j,l,m)*w(i,jl,j,m)
-                      enddo
-                   enddo
-                enddo
-             enddo
-
-          case (kMin)
-             do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-                do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                   do l=nt1,nt2
-                      w(i,j,1,l) = bvtk1(i,j,l)
-                      do m=nt1,nt2
-                         w(i,j,1,l) = w(i,j,1,l) - bmtk1(i,j,l,m)*w(i,j,2,m)
-                      enddo
-                   enddo
-                enddo
-             enddo
-
-          case (kMax)
-             do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-                do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                   do l=nt1,nt2
-                      w(i,j,ke,l) = bvtk2(i,j,l)
-                      do m=nt1,nt2
-                         w(i,j,ke,l) = w(i,j,ke,l) - bmtk2(i,j,l,m)*w(i,j,kl,m)
-                      enddo
-                   enddo
-                enddo
-             enddo
-          end select
-
-       endif
-
-       ! Set the value of the eddy viscosity, depending on the type of
-       ! boundary condition. Only if the turbulence model is an eddy
-       ! viscosity model of course.
-
-       if( eddyModel ) then
-
-          if(BCType(nn) == NSWallAdiabatic .or. &
-               BCType(nn) == NSWallIsothermal) then
-
-             ! Viscous wall boundary condition. Eddy viscosity is
-             ! zero at the wall.
-
-             call bcEddyWall(nn)
-
-          else
-
-             ! Any boundary condition but viscous wall. A homogeneous
-             ! Neumann condition is applied to the eddy viscosity.
-
-             call bcEddyNoWall(nn)
-
-          endif
-
-       endif
-
-       ! Extrapolate the turbulent variables in case a second halo
-       ! is needed.
-
-       if( secondHalo ) call turb2ndHalo(nn)
-
-    enddo bocos
-
-  end subroutine applyAllTurbBCThisBlock
-
-  subroutine bcEddyNoWall(nn)
-    !
-    !       bcEddyNoWall sets the eddy viscosity in the halo cells of
-    !       subface nn of the block given in blockPointers. The boundary
-    !       condition on the subface can be anything but a viscous wall.
-    !       A homogeneous neumann condition is applied, which means that
-    !       the eddy viscosity is simply copied from the interior cell.
-    !
-    use constants
-    use blockPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j
-
-
-    ! Determine the face id on which the subface and copy
-
-    select case (BCFaceid(nn))
-    case (iMin)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(1,i,j) = rev(2,i,j)
-          enddo
-       enddo
-
-    case (iMax)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(ie,i,j) = rev(il,i,j)
-          enddo
-       enddo
-
-    case (jMin)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,1,j) = rev(i,2,j)
-          enddo
-       enddo
-
-    case (jMax)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,je,j) = rev(i,jl,j)
-          enddo
-       enddo
-
-    case (kMin)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,j,1) = rev(i,j,2)
-          enddo
-       enddo
-
-    case (kMax)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,j,ke) = rev(i,j,kl)
-          enddo
-       enddo
-    end select
-
-  end subroutine bcEddyNoWall
-  subroutine bcEddyWall(nn)
-    !
-    !       bcEddyWall sets the eddy viscosity in the halo cells of
-    !       viscous subface nn of the block given in blockPointers.
-    !       As the eddy viscosity is zero at the wall, the value in the
-    !       halo is simply the negative value of the first interior cell.
-    !
-    use constants
-    use blockPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j
-
-
-    ! Determine the face id on which the subface is located and
-    ! loop over the faces of the subface and set the eddy viscosity
-    ! in the halo cells.
-
-    select case (BCFaceid(nn))
-    case (iMin)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(1,i,j) = saRoughFact(2,i,j)*rev(2,i,j)
-          enddo
-       enddo
-
-    case (iMax)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(ie,i,j) = saRoughFact(il,i,j)*rev(il,i,j)
-          enddo
-       enddo
-
-    case (jMin)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,1,j) = saRoughFact(i,2,j)*rev(i,2,j)
-          enddo
-       enddo
-
-    case (jMax)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,je,j) = saRoughFact(i,jl,j)*rev(i,jl,j)
-          enddo
-       enddo
-
-    case (kMin)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,j,1) = saRoughFact(i,j,2)*rev(i,j,2)
-          enddo
-       enddo
-
-    case (kMax)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             rev(i,j,ke) = saRoughFact(i,j,kl)*rev(i,j,kl)
-          enddo
-       enddo
-    end select
-
-  end subroutine bcEddyWall
-  subroutine bcTurbFarfield(nn)
-    !
-    !       bcTurbFarfield applies the implicit treatment of the
-    !       farfield boundary condition to subface nn. As the farfield
-    !       boundary condition is independent of the turbulence model,
-    !       this routine is valid for all models. It is assumed that the
-    !       pointers in blockPointers are already set to the correct
-    !       block on the correct grid level.
-    !
-    use constants
-    use blockPointers
-    use flowVarRefState
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, l
-
-    real(kind=realType) :: nnx, nny, nnz, dot
-
-    ! Loop over the faces of the subfaces and set the values of
-    ! bmt and bvt for an implicit treatment.
-
-    do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-       do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-
-          ! Determine the dot product between the outward pointing
-          ! normal and the free stream velocity direction and add the
-          ! possible grid velocity.
-
-          dot = BCData(nn)%norm(i,j,1)*wInf(ivx) + &
-               BCData(nn)%norm(i,j,2)*wInf(ivy) + &
-               BCData(nn)%norm(i,j,3)*wInf(ivz) - BCData(nn)%rface(i,j)
-
-          ! Determine whether we are dealing with an inflow or
-          ! outflow boundary here.
-
-          if(dot > zero) then
-
-             ! Outflow. Simply extrapolation or zero Neumann BC
-             ! of the turbulent variables.
-
-             do l=nt1,nt2
+                ! Determine the block face on which this subface is located
+                ! and set some pointers accordingly.
+
                 select case (BCFaceID(nn))
                 case (iMin)
-                   bmti1(i,j,l,l) = -one
+                    bmt => bmti1; bvt => bvti1
+                    ww1 => w(1, 1:, 1:, :); ww2 => w(2, 1:, 1:, :)
+
                 case (iMax)
-                   bmti2(i,j,l,l) = -one
+                    bmt => bmti2; bvt => bvti2
+                    ww1 => w(ie, 1:, 1:, :); ww2 => w(il, 1:, 1:, :)
+
                 case (jMin)
-                   bmtj1(i,j,l,l) = -one
+                    bmt => bmtj1; bvt => bvtj1
+                    ww1 => w(1:, 1, 1:, :); ww2 => w(1:, 2, 1:, :)
+
                 case (jMax)
-                   bmtj2(i,j,l,l) = -one
+                    bmt => bmtj2; bvt => bvtj2
+                    ww1 => w(1:, je, 1:, :); ww2 => w(1:, jl, 1:, :)
+
                 case (kMin)
-                   bmtk1(i,j,l,l) = -one
+                    bmt => bmtk1; bvt => bvtk1
+                    ww1 => w(1:, 1:, 1, :); ww2 => w(1:, 1:, 2, :)
+
                 case (kMax)
-                   bmtk2(i,j,l,l) = -one
+                    bmt => bmtk2; bvt => bvtk2
+                    ww1 => w(1:, 1:, ke, :); ww2 => w(1:, 1:, kl, :)
                 end select
-             end do
-
-          else
 
-             ! Inflow. Turbulent variables are prescribed.
+                ! Write an approximate value into the halo cell for
+                ! postprocessing (it is not used in computation).
+
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        do l = nt1, nt2
+                            ww1(i, j, l) = bvt(i, j, l) - bmt(i, j, l, l)*ww2(i, j, l)
+                            do m = nt1, nt2
+                                if (m /= l .and. bmt(i, j, l, m) /= zero) &
+                                    ww1(i, j, l) = ww2(i, j, l)
+                            end do
+                        end do
+                    end do
+                end do
+#endif
+            else
 
-             do l=nt1,nt2
-                select case(BCFaceID(nn))
+                select case (BCFaceID(nn))
                 case (iMin)
-                   bvti1(i,j,l) = wInf(l)
+                    do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                        do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                            do l = nt1, nt2
+                                w(1, i, j, l) = bvti1(i, j, l)
+                                do m = nt1, nt2
+                                    w(1, i, j, l) = w(1, i, j, l) - bmti1(i, j, l, m)*w(2, i, j, m)
+                                end do
+                            end do
+                        end do
+                    end do
+
                 case (iMax)
-                   bvti2(i,j,l) = wInf(l)
+                    do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                        do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                            do l = nt1, nt2
+                                w(ie, i, j, l) = bvti2(i, j, l)
+                                do m = nt1, nt2
+                                    w(ie, i, j, l) = w(ie, i, j, l) - bmti2(i, j, l, m)*w(il, i, j, m)
+                                end do
+                            end do
+                        end do
+                    end do
+
                 case (jMin)
-                   bvtj1(i,j,l) = wInf(l)
+                    do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                        do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                            do l = nt1, nt2
+                                w(i, 1, j, l) = bvtj1(i, j, l)
+                                do m = nt1, nt2
+                                    w(i, 1, j, l) = w(i, 1, j, l) - bmtj1(i, j, l, m)*w(i, 2, j, m)
+                                end do
+                            end do
+                        end do
+                    end do
+
                 case (jMax)
-                   bvtj2(i,j,l) = wInf(l)
+                    do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                        do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                            do l = nt1, nt2
+                                w(i, je, j, l) = bvtj2(i, j, l)
+                                do m = nt1, nt2
+                                    w(i, je, j, l) = w(i, je, j, l) - bmtj2(i, j, l, m)*w(i, jl, j, m)
+                                end do
+                            end do
+                        end do
+                    end do
+
                 case (kMin)
-                   bvtk1(i,j,l) = wInf(l)
+                    do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                        do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                            do l = nt1, nt2
+                                w(i, j, 1, l) = bvtk1(i, j, l)
+                                do m = nt1, nt2
+                                    w(i, j, 1, l) = w(i, j, 1, l) - bmtk1(i, j, l, m)*w(i, j, 2, m)
+                                end do
+                            end do
+                        end do
+                    end do
+
                 case (kMax)
-                   bvtk2(i,j,l) = wInf(l)
+                    do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                        do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                            do l = nt1, nt2
+                                w(i, j, ke, l) = bvtk2(i, j, l)
+                                do m = nt1, nt2
+                                    w(i, j, ke, l) = w(i, j, ke, l) - bmtk2(i, j, l, m)*w(i, j, kl, m)
+                                end do
+                            end do
+                        end do
+                    end do
                 end select
-             enddo
-          endif
-       enddo
-    enddo
-  end subroutine bcTurbFarfield
-  subroutine bcTurbInflow(nn)
-    !
-    !       bcTurbInflow applies the implicit treatment of the inflow
-    !       boundary conditions to subface nn. As the inflow boundary
-    !       condition is independent of the turbulence model, this routine
-    !       is valid for all models. It is assumed that the pointers in
-    !       blockPointers are already set to the correct block on the
-    !       correct grid level.
-    !
-    use constants
-    use blockPointers
-    use flowVarRefState
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, l
-
-
-    ! Loop over the faces of the subfaces and set the values of
-    ! bvt and bmt such that the inflow state is linearly extrapolated
-    ! with a fixed state at the face.
-
-    do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-       do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-
-          ! Loop over the number of turbulent variables.
-
-          do l=nt1,nt2
-             select case (BCFaceID(nn))
-             case (iMin)
-                bvti1(i,j,l)   = two*BCData(nn)%turbInlet(i,j,l)
-                bmti1(i,j,l,l) = one
-             case (iMax)
-                bvti2(i,j,l)   = two*BCData(nn)%turbInlet(i,j,l)
-                bmti2(i,j,l,l) = one
-             case (jMin)
-                bvtj1(i,j,l)   = two*BCData(nn)%turbInlet(i,j,l)
-                bmtj1(i,j,l,l) = one
-             case (jMax)
-                bvtj2(i,j,l)   = two*BCData(nn)%turbInlet(i,j,l)
-                bmtj2(i,j,l,l) = one
-             case (kMin)
-                bvtk1(i,j,l)   = two*BCData(nn)%turbInlet(i,j,l)
-                bmtk1(i,j,l,l) = one
-             case (kMax)
-                bvtk2(i,j,l)   = two*BCData(nn)%turbInlet(i,j,l)
-                bmtk2(i,j,l,l) = one
-             end select
-          end do
-       enddo
-    enddo
-  end subroutine bcTurbInflow
-  subroutine bcTurbInterface(nn)
-    !
-    !       bcTurbInterface applies the halo treatment for interface halo
-    !       cells, sliding mesh interface and domain interface. As these
-    !       are not really boundary conditions, the variable bvt is simply
-    !       set to keep the current value.
-    !
-    use constants
-    use blockPointers
-    use flowVarRefState
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, l
-
-
-    ! Loop over the faces of the subfaces and set the values of
-    ! bvt to keep the current value.
-
-    do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-       do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-          do l=nt1,nt2
-             select case (BCFaceID(nn))
-             case (iMin)
-                bvti1(i,j,l) = w(1,i,j,l)
-             case (iMax)
-                bvti2(i,j,l) = w(ie,i,j,l)
-             case (jMin)
-                bvtj1(i,j,l) = w(i,1,j,l)
-             case (jMax)
-                bvtj2(i,j,l) = w(i,je,j,l)
-             case (kMin)
-                bvtk1(i,j,l) = w(i,j,1,l)
-             case (kMax)
-                bvtk2(i,j,l) = w(i,j,ke,l)
-             end select
-          enddo
-       enddo
-    enddo
-
-    ! Note that the original code had an error in the pointers...they
-    ! were pointing to {il,jl,kl} and not {ie, je, ke}.
-
-  end subroutine bcTurbInterface
-  subroutine bcTurbOutflow(nn)
-    !
-    !       bcTurbOutflow applies the implicit treatment of the outflow
-    !       boundary conditions to subface nn. As the outflow boundary
-    !       condition is independent of the turbulence model, either
-    !       extrapolation or zero Neumann, this routine is valid for all
-    !       models. It is assumed that the pointers in blockPointers are
-    !       already set to the correct block on the correct grid level.
-    !
-    use constants
-    use blockPointers
-    use flowVarRefState
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, l
-
-    ! Loop over the faces of the subfaces and set the values of bmt
-    ! for an implicit treatment. For an outflow the turbulent variable
-    ! variable is either extrapolated or zero Neumann. As constant
-    ! extrapolation is used this leads to an identical treatment, i.e.
-    ! the halo value is identical to the value of the internal cell.
-
-    do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-       do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-          do l=nt1,nt2
-             select case (BCFaceID(nn))
-             case (iMin)
-                bmti1(i,j,l,l) = -one
-             case (iMax)
-                bmti2(i,j,l,l) = -one
-             case (jMin)
-                bmtj1(i,j,l,l) = -one
-             case (jMax)
-                bmtj2(i,j,l,l) = -one
-             case (kMin)
-                bmtk1(i,j,l,l) = -one
-             case (kMax)
-                bmtk2(i,j,l,l) = -one
-             end select
-          enddo
-       enddo
-    enddo
-
-  end subroutine bcTurbOutflow
-  subroutine bcTurbSymm(nn)
-    !
-    !       bcTurbSymm applies the implicit treatment of the symmetry
-    !       boundary condition (or inviscid wall) to subface nn. As the
-    !       symmetry boundary condition is independent of the turbulence
-    !       model, this routine is valid for all models. It is assumed
-    !       that the pointers in blockPointers are already set to the
-    !       correct block on the correct grid level.
-    !
-    use constants
-    use blockPointers
-    use flowVarRefState
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, l
-
-    ! Loop over the faces of the subfaces and set the values of bmt
-    ! for an implicit treatment. For a symmetry face this means
-    ! that the halo value is set to the internal value.
-
-    do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-       do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-          do l=nt1,nt2
-             select case (BCFaceID(nn))
-             case (iMin)
-                bmti1(i,j,l,l) = -one
-             case (iMax)
-                bmti2(i,j,l,l) = -one
-             case (jMin)
-                bmtj1(i,j,l,l) = -one
-             case (jMax)
-                bmtj2(i,j,l,l) = -one
-             case (kMin)
-                bmtk1(i,j,l,l) = -one
-             case (kMax)
-                bmtk2(i,j,l,l) = -one
-             end select
-          enddo
-       enddo
-    enddo
-  end subroutine bcTurbSymm
-
-  subroutine bcTurbTreatment
-    !
-    !       bcTurbTreatment sets the arrays bmti1, bvti1, etc, such that
-    !       the physical boundary conditions are treated correctly.
-    !       It is assumed that the variables in blockPointers already
-    !       point to the correct block.
-    !       The turbulent variable in the halo is computed as follows:
-    !       wHalo = -bmt*wInternal + bvt for every block facer. As it is
-    !       possible to have a coupling in the boundary conditions bmt
-    !       actually are matrices. If there is no coupling between the
-    !       boundary conditions of the turbulence equations bmt is a
-    !       diagonal matrix.
-    !
-    use constants
-    use blockPointers
-    use flowVarRefState
-    implicit none
-    !
-    !      Local variable.
-    !
-    integer(kind=intType) :: nn, i, j, k, l, m
-
-    ! Initialize the arrays for the boundary condition treatment
-    ! to zero, such that internal block boundaries are solved
-    ! correctly (i.e. explicitly).
-
-    do k=1,ke
-       do j=1,je
-          do l=nt1,nt2
-             do m=nt1,nt2
-                bmti1(j,k,l,m) = zero
-                bmti2(j,k,l,m) = zero
-             enddo
-             bvti1(j,k,l) = zero
-             bvti2(j,k,l) = zero
-          enddo
-       enddo
-    enddo
-
-    do k=1,ke
-       do i=1,ie
-          do l=nt1,nt2
-             do m=nt1,nt2
-                bmtj1(i,k,l,m) = zero
-                bmtj2(i,k,l,m) = zero
-             enddo
-             bvtj1(i,k,l) = zero
-             bvtj2(i,k,l) = zero
-          enddo
-       enddo
-    enddo
-
-    do j=1,je
-       do i=1,ie
-          do l=nt1,nt2
-             do m=nt1,nt2
-                bmtk1(i,j,l,m) = zero
-                bmtk2(i,j,l,m) = zero
-             enddo
-             bvtk1(i,j,l) = zero
-             bvtk2(i,j,l) = zero
-          enddo
-       enddo
-    enddo
-
-    ! Loop over the boundary condition subfaces of this block.
-
-    bocos: do nn=1,nBocos
-
-       ! Determine the kind of boundary condition for this subface.
-
-       typeBC: select case (BCType(nn))
-
-       case (NSWallAdiabatic, NSWallIsothermal)
-
-          ! Viscous wall. There is no difference between an adiabatic
-          ! and an isothermal wall for the turbulent equations.
-          ! Set the implicit treatment of the wall boundary conditions.
-
-          call bcTurbWall(nn)
-
-          !=============================================================
+
+            end if
+
+            ! Set the value of the eddy viscosity, depending on the type of
+            ! boundary condition. Only if the turbulence model is an eddy
+            ! viscosity model of course.
+
+            if (eddyModel) then
+
+                if (BCType(nn) == NSWallAdiabatic .or. &
+                    BCType(nn) == NSWallIsothermal) then
+
+                    ! Viscous wall boundary condition. Eddy viscosity is
+                    ! zero at the wall.
+
+                    call bcEddyWall(nn)
+
+                else
+
+                    ! Any boundary condition but viscous wall. A homogeneous
+                    ! Neumann condition is applied to the eddy viscosity.
+
+                    call bcEddyNoWall(nn)
+
+                end if
+
+            end if
+
+            ! Extrapolate the turbulent variables in case a second halo
+            ! is needed.
+
+            if (secondHalo) call turb2ndHalo(nn)
+
+        end do bocos
+
+    end subroutine applyAllTurbBCThisBlock
+
+    subroutine bcEddyNoWall(nn)
+        !
+        !       bcEddyNoWall sets the eddy viscosity in the halo cells of
+        !       subface nn of the block given in blockPointers. The boundary
+        !       condition on the subface can be anything but a viscous wall.
+        !       A homogeneous neumann condition is applied, which means that
+        !       the eddy viscosity is simply copied from the interior cell.
+        !
+        use constants
+        use blockPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j
+
+        ! Determine the face id on which the subface and copy
+
+        select case (BCFaceid(nn))
+        case (iMin)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(1, i, j) = rev(2, i, j)
+                end do
+            end do
+
+        case (iMax)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(ie, i, j) = rev(il, i, j)
+                end do
+            end do
+
+        case (jMin)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(i, 1, j) = rev(i, 2, j)
+                end do
+            end do
+
+        case (jMax)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(i, je, j) = rev(i, jl, j)
+                end do
+            end do
+
+        case (kMin)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(i, j, 1) = rev(i, j, 2)
+                end do
+            end do
+
+        case (kMax)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(i, j, ke) = rev(i, j, kl)
+                end do
+            end do
+        end select
+
+    end subroutine bcEddyNoWall
+    subroutine bcEddyWall(nn)
+        !
+        !       bcEddyWall sets the eddy viscosity in the halo cells of
+        !       viscous subface nn of the block given in blockPointers.
+        !       As the eddy viscosity is zero at the wall, the value in the
+        !       halo is simply the negative value of the first interior cell.
+        !
+        use constants
+        use blockPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j
+
+        ! Determine the face id on which the subface is located and
+        ! loop over the faces of the subface and set the eddy viscosity
+        ! in the halo cells.
+
+        select case (BCFaceid(nn))
+        case (iMin)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(1, i, j) = saRoughFact(2, i, j)*rev(2, i, j)
+                end do
+            end do
+
+        case (iMax)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(ie, i, j) = saRoughFact(il, i, j)*rev(il, i, j)
+                end do
+            end do
+
+        case (jMin)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(i, 1, j) = saRoughFact(i, 2, j)*rev(i, 2, j)
+                end do
+            end do
+
+        case (jMax)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(i, je, j) = saRoughFact(i, jl, j)*rev(i, jl, j)
+                end do
+            end do
+
+        case (kMin)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(i, j, 1) = saRoughFact(i, j, 2)*rev(i, j, 2)
+                end do
+            end do
+
+        case (kMax)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    rev(i, j, ke) = saRoughFact(i, j, kl)*rev(i, j, kl)
+                end do
+            end do
+        end select
+
+    end subroutine bcEddyWall
+    subroutine bcTurbFarfield(nn)
+        !
+        !       bcTurbFarfield applies the implicit treatment of the
+        !       farfield boundary condition to subface nn. As the farfield
+        !       boundary condition is independent of the turbulence model,
+        !       this routine is valid for all models. It is assumed that the
+        !       pointers in blockPointers are already set to the correct
+        !       block on the correct grid level.
+        !
+        use constants
+        use blockPointers
+        use flowVarRefState
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, l
+
+        real(kind=realType) :: nnx, nny, nnz, dot
+
+        ! Loop over the faces of the subfaces and set the values of
+        ! bmt and bvt for an implicit treatment.
+
+        do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+            do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+
+                ! Determine the dot product between the outward pointing
+                ! normal and the free stream velocity direction and add the
+                ! possible grid velocity.
+
+                dot = BCData(nn)%norm(i, j, 1)*wInf(ivx) + &
+                      BCData(nn)%norm(i, j, 2)*wInf(ivy) + &
+                      BCData(nn)%norm(i, j, 3)*wInf(ivz) - BCData(nn)%rface(i, j)
+
+                ! Determine whether we are dealing with an inflow or
+                ! outflow boundary here.
+
+                if (dot > zero) then
+
+                    ! Outflow. Simply extrapolation or zero Neumann BC
+                    ! of the turbulent variables.
+
+                    do l = nt1, nt2
+                        select case (BCFaceID(nn))
+                        case (iMin)
+                            bmti1(i, j, l, l) = -one
+                        case (iMax)
+                            bmti2(i, j, l, l) = -one
+                        case (jMin)
+                            bmtj1(i, j, l, l) = -one
+                        case (jMax)
+                            bmtj2(i, j, l, l) = -one
+                        case (kMin)
+                            bmtk1(i, j, l, l) = -one
+                        case (kMax)
+                            bmtk2(i, j, l, l) = -one
+                        end select
+                    end do
+
+                else
+
+                    ! Inflow. Turbulent variables are prescribed.
+
+                    do l = nt1, nt2
+                        select case (BCFaceID(nn))
+                        case (iMin)
+                            bvti1(i, j, l) = wInf(l)
+                        case (iMax)
+                            bvti2(i, j, l) = wInf(l)
+                        case (jMin)
+                            bvtj1(i, j, l) = wInf(l)
+                        case (jMax)
+                            bvtj2(i, j, l) = wInf(l)
+                        case (kMin)
+                            bvtk1(i, j, l) = wInf(l)
+                        case (kMax)
+                            bvtk2(i, j, l) = wInf(l)
+                        end select
+                    end do
+                end if
+            end do
+        end do
+    end subroutine bcTurbFarfield
+    subroutine bcTurbInflow(nn)
+        !
+        !       bcTurbInflow applies the implicit treatment of the inflow
+        !       boundary conditions to subface nn. As the inflow boundary
+        !       condition is independent of the turbulence model, this routine
+        !       is valid for all models. It is assumed that the pointers in
+        !       blockPointers are already set to the correct block on the
+        !       correct grid level.
+        !
+        use constants
+        use blockPointers
+        use flowVarRefState
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, l
+
+        ! Loop over the faces of the subfaces and set the values of
+        ! bvt and bmt such that the inflow state is linearly extrapolated
+        ! with a fixed state at the face.
+
+        do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+            do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+
+                ! Loop over the number of turbulent variables.
+
+                do l = nt1, nt2
+                    select case (BCFaceID(nn))
+                    case (iMin)
+                        bvti1(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bmti1(i, j, l, l) = one
+                    case (iMax)
+                        bvti2(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bmti2(i, j, l, l) = one
+                    case (jMin)
+                        bvtj1(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bmtj1(i, j, l, l) = one
+                    case (jMax)
+                        bvtj2(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bmtj2(i, j, l, l) = one
+                    case (kMin)
+                        bvtk1(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bmtk1(i, j, l, l) = one
+                    case (kMax)
+                        bvtk2(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bmtk2(i, j, l, l) = one
+                    end select
+                end do
+            end do
+        end do
+    end subroutine bcTurbInflow
+    subroutine bcTurbInterface(nn)
+        !
+        !       bcTurbInterface applies the halo treatment for interface halo
+        !       cells, sliding mesh interface and domain interface. As these
+        !       are not really boundary conditions, the variable bvt is simply
+        !       set to keep the current value.
+        !
+        use constants
+        use blockPointers
+        use flowVarRefState
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, l
+
+        ! Loop over the faces of the subfaces and set the values of
+        ! bvt to keep the current value.
+
+        do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+            do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                do l = nt1, nt2
+                    select case (BCFaceID(nn))
+                    case (iMin)
+                        bvti1(i, j, l) = w(1, i, j, l)
+                    case (iMax)
+                        bvti2(i, j, l) = w(ie, i, j, l)
+                    case (jMin)
+                        bvtj1(i, j, l) = w(i, 1, j, l)
+                    case (jMax)
+                        bvtj2(i, j, l) = w(i, je, j, l)
+                    case (kMin)
+                        bvtk1(i, j, l) = w(i, j, 1, l)
+                    case (kMax)
+                        bvtk2(i, j, l) = w(i, j, ke, l)
+                    end select
+                end do
+            end do
+        end do
+
+        ! Note that the original code had an error in the pointers...they
+        ! were pointing to {il,jl,kl} and not {ie, je, ke}.
+
+    end subroutine bcTurbInterface
+    subroutine bcTurbOutflow(nn)
+        !
+        !       bcTurbOutflow applies the implicit treatment of the outflow
+        !       boundary conditions to subface nn. As the outflow boundary
+        !       condition is independent of the turbulence model, either
+        !       extrapolation or zero Neumann, this routine is valid for all
+        !       models. It is assumed that the pointers in blockPointers are
+        !       already set to the correct block on the correct grid level.
+        !
+        use constants
+        use blockPointers
+        use flowVarRefState
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, l
+
+        ! Loop over the faces of the subfaces and set the values of bmt
+        ! for an implicit treatment. For an outflow the turbulent variable
+        ! variable is either extrapolated or zero Neumann. As constant
+        ! extrapolation is used this leads to an identical treatment, i.e.
+        ! the halo value is identical to the value of the internal cell.
+
+        do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+            do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                do l = nt1, nt2
+                    select case (BCFaceID(nn))
+                    case (iMin)
+                        bmti1(i, j, l, l) = -one
+                    case (iMax)
+                        bmti2(i, j, l, l) = -one
+                    case (jMin)
+                        bmtj1(i, j, l, l) = -one
+                    case (jMax)
+                        bmtj2(i, j, l, l) = -one
+                    case (kMin)
+                        bmtk1(i, j, l, l) = -one
+                    case (kMax)
+                        bmtk2(i, j, l, l) = -one
+                    end select
+                end do
+            end do
+        end do
+
+    end subroutine bcTurbOutflow
+    subroutine bcTurbSymm(nn)
+        !
+        !       bcTurbSymm applies the implicit treatment of the symmetry
+        !       boundary condition (or inviscid wall) to subface nn. As the
+        !       symmetry boundary condition is independent of the turbulence
+        !       model, this routine is valid for all models. It is assumed
+        !       that the pointers in blockPointers are already set to the
+        !       correct block on the correct grid level.
+        !
+        use constants
+        use blockPointers
+        use flowVarRefState
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, l
+
+        ! Loop over the faces of the subfaces and set the values of bmt
+        ! for an implicit treatment. For a symmetry face this means
+        ! that the halo value is set to the internal value.
+
+        do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+            do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                do l = nt1, nt2
+                    select case (BCFaceID(nn))
+                    case (iMin)
+                        bmti1(i, j, l, l) = -one
+                    case (iMax)
+                        bmti2(i, j, l, l) = -one
+                    case (jMin)
+                        bmtj1(i, j, l, l) = -one
+                    case (jMax)
+                        bmtj2(i, j, l, l) = -one
+                    case (kMin)
+                        bmtk1(i, j, l, l) = -one
+                    case (kMax)
+                        bmtk2(i, j, l, l) = -one
+                    end select
+                end do
+            end do
+        end do
+    end subroutine bcTurbSymm
+
+    subroutine bcTurbTreatment
+        !
+        !       bcTurbTreatment sets the arrays bmti1, bvti1, etc, such that
+        !       the physical boundary conditions are treated correctly.
+        !       It is assumed that the variables in blockPointers already
+        !       point to the correct block.
+        !       The turbulent variable in the halo is computed as follows:
+        !       wHalo = -bmt*wInternal + bvt for every block facer. As it is
+        !       possible to have a coupling in the boundary conditions bmt
+        !       actually are matrices. If there is no coupling between the
+        !       boundary conditions of the turbulence equations bmt is a
+        !       diagonal matrix.
+        !
+        use constants
+        use blockPointers
+        use flowVarRefState
+        implicit none
+        !
+        !      Local variable.
+        !
+        integer(kind=intType) :: nn, i, j, k, l, m
+
+        ! Initialize the arrays for the boundary condition treatment
+        ! to zero, such that internal block boundaries are solved
+        ! correctly (i.e. explicitly).
+
+        do k = 1, ke
+            do j = 1, je
+                do l = nt1, nt2
+                    do m = nt1, nt2
+                        bmti1(j, k, l, m) = zero
+                        bmti2(j, k, l, m) = zero
+                    end do
+                    bvti1(j, k, l) = zero
+                    bvti2(j, k, l) = zero
+                end do
+            end do
+        end do
+
+        do k = 1, ke
+            do i = 1, ie
+                do l = nt1, nt2
+                    do m = nt1, nt2
+                        bmtj1(i, k, l, m) = zero
+                        bmtj2(i, k, l, m) = zero
+                    end do
+                    bvtj1(i, k, l) = zero
+                    bvtj2(i, k, l) = zero
+                end do
+            end do
+        end do
+
+        do j = 1, je
+            do i = 1, ie
+                do l = nt1, nt2
+                    do m = nt1, nt2
+                        bmtk1(i, j, l, m) = zero
+                        bmtk2(i, j, l, m) = zero
+                    end do
+                    bvtk1(i, j, l) = zero
+                    bvtk2(i, j, l) = zero
+                end do
+            end do
+        end do
+
+        ! Loop over the boundary condition subfaces of this block.
+
+        bocos: do nn = 1, nBocos
+
+            ! Determine the kind of boundary condition for this subface.
+
+            typeBC:select case(BCType(nn))
+
+            case (NSWallAdiabatic, NSWallIsothermal)
+
+            ! Viscous wall. There is no difference between an adiabatic
+            ! and an isothermal wall for the turbulent equations.
+            ! Set the implicit treatment of the wall boundary conditions.
+
+            call bcTurbWall(nn)
+
+            !=============================================================
 #ifndef USE_TAPENADE
-       case (SubsonicInflow, SupersonicInflow, MassBleedInflow)
+            case (SubsonicInflow, SupersonicInflow, MassBleedInflow)
 
-          ! Inflow. Subsonic, supersonic or mass bleed inflow is
-          ! identical for the turbulent transport equations.
+            ! Inflow. Subsonic, supersonic or mass bleed inflow is
+            ! identical for the turbulent transport equations.
 
-          call bcTurbInflow(nn)
+            call bcTurbInflow(nn)
 
-          !=============================================================
+            !=============================================================
 
-       case (SubsonicOutflow,  SupersonicOutflow, &
-            MassBleedOutflow, Extrap)
+            case (SubsonicOutflow, SupersonicOutflow, &
+                  MassBleedOutflow, Extrap)
 
-          ! Outflow. Subsonic, supersonic or mass bleed outflow is
-          ! identical for the turbulent transport equations. The
-          ! extrapolation boundary is also an outflow.
+            ! Outflow. Subsonic, supersonic or mass bleed outflow is
+            ! identical for the turbulent transport equations. The
+            ! extrapolation boundary is also an outflow.
 
-          call bcTurbOutflow(nn)
+            call bcTurbOutflow(nn)
 #endif
-          !=============================================================
-
-       case (Symm, SymmPolar, EulerWall)
-
-          ! Symmetry, polar symmetry or inviscid wall. Treatment of
-          ! the turbulent equations is identical.
-
-          call bcTurbSymm(nn)
-
-          !=============================================================
-
-       case (FarField)
-
-          ! Farfield. The kind of boundary condition to be applied,
-          ! inflow or outflow, depends on the local conditions.
-
-          call bcTurbFarfield(nn)
-
-          !=============================================================
-
-       case (SlidingInterface,   OversetOuterBound,     &
-            DomainInterfaceAll, DomainInterfaceRhoUVW, &
-            DomainInterfaceP,   DomainInterfaceRho,    &
-            DomainInterfaceTotal)
-
-          ! Sliding mesh interface, overset outer boudaries, and
-          ! domain interface with another code are not really boundary
-          ! condition and therefore the values are kept.
-
-          call bcTurbInterface(nn)
-
-       end select typeBC
-
-    enddo bocos
-
-  end subroutine bcTurbTreatment
-  subroutine bcTurbWall(nn)
-    !
-    !       bcTurbWall applies the implicit treatment of the viscous
-    !       wall boundary condition for the turbulence model used to the
-    !       given subface nn.
-    !       It is assumed that the pointers in blockPointers are
-    !       already set to the correct block.
-    !
-    use blockPointers
-    use flowVarRefState
-    use inputPhysics
-    use constants
-    use paramTurb
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, ii, jj, iiMax, jjMax
-
-    real(kind=realType) :: tmpd, tmpe, tmpf, nu
-
-    real(kind=realType), dimension(:,:,:,:), pointer :: bmt
-    real(kind=realType), dimension(:,:,:),   pointer :: bvt, ww2
-    real(kind=realType), dimension(:,:),     pointer :: rlv2, dd2Wall
-
-
-
-    ! Determine the turbulence model used and loop over the faces
-    ! of the subface and set the values of bmt and bvt for an
-    ! implicit treatment.
-
-    select case (turbModel)
-
-    case (spalartAllmaras, spalartAllmarasEdwards)
-
-       ! Spalart-allmaras type of model. Value at the wall is zero,
-       ! so simply negate the internal value.
-       select case (BCFaceID(nn))
-       case (iMin)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti1(i,j,itu1,itu1) = -saRoughFact(2,i,j)
-             enddo
-          enddo
-       case (iMax)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti2(i,j,itu1,itu1) = -saRoughFact(il,i,j)
-             enddo
-          enddo
-       case (jMin)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj1(i,j,itu1,itu1) = -saRoughFact(i,2,j)
-             enddo
-          enddo
-       case (jMax)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj2(i,j,itu1,itu1) = -saRoughFact(i,jl,j)
-             enddo
-          enddo
-
-       case (kMin)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk1(i,j,itu1,itu1) = -saRoughFact(i,j,2)
-             enddo
-          enddo
-
-       case (kMax)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk2(i,j,itu1,itu1) = -saRoughFact(i,j,kl)
-             enddo
-          enddo
-       end select
-
-       !        ================================================================
-    case (komegaWilcox, komegaModified, menterSST)
-
-       ! K-omega type of models. K is zero on the wall and thus the
-       ! halo value is the negative of the first internal cell.
-       ! For omega the situation is a bit more complicated.
-       ! Theoretically omega is infinity, but it is set to a large
-       ! value, see menter's paper. The halo value is constructed
-       ! such that the wall value is correct. Make sure that i and j
-       ! are limited to physical dimensions of the face for the wall
-       ! distance. Due to the usage of the dd2Wall pointer and the
-       ! fact that the original d2Wall array starts at 2, there is
-       ! an offset of -1 present in dd2Wall.
-
-       select case (BCFaceID(nn))
-       case (iMin)
-          iiMax = jl; jjMax = kl
-
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             jj = max(2,min(j,jjMax))
-
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ii = max(2,min(i,iiMax))
-
-                nu   = rlv(2,i,j)/w(2,i,j,irho)
-                tmpd = one/(rkwBeta1*(d2Wall(2,ii,jj)**2))
-
-                bmti1(i,j,itu1,itu1) = one
-                bmti1(i,j,itu2,itu2) = one
-
-                bvti1(i,j,itu2) = two*60.0_realType*nu*tmpd
-             enddo
-          enddo
+            !=============================================================
+
+            case (Symm, SymmPolar, EulerWall)
+
+            ! Symmetry, polar symmetry or inviscid wall. Treatment of
+            ! the turbulent equations is identical.
+
+            call bcTurbSymm(nn)
+
+            !=============================================================
+
+            case (FarField)
+
+            ! Farfield. The kind of boundary condition to be applied,
+            ! inflow or outflow, depends on the local conditions.
+
+            call bcTurbFarfield(nn)
+
+            !=============================================================
+
+            case (SlidingInterface, OversetOuterBound, &
+                  DomainInterfaceAll, DomainInterfaceRhoUVW, &
+                  DomainInterfaceP, DomainInterfaceRho, &
+                  DomainInterfaceTotal)
+
+            ! Sliding mesh interface, overset outer boudaries, and
+            ! domain interface with another code are not really boundary
+            ! condition and therefore the values are kept.
+
+            call bcTurbInterface(nn)
+
+            end select typeBC
+
+        end do bocos
+
+    end subroutine bcTurbTreatment
+    subroutine bcTurbWall(nn)
+        !
+        !       bcTurbWall applies the implicit treatment of the viscous
+        !       wall boundary condition for the turbulence model used to the
+        !       given subface nn.
+        !       It is assumed that the pointers in blockPointers are
+        !       already set to the correct block.
+        !
+        use blockPointers
+        use flowVarRefState
+        use inputPhysics
+        use constants
+        use paramTurb
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, ii, jj, iiMax, jjMax
+
+        real(kind=realType) :: tmpd, tmpe, tmpf, nu
+
+        real(kind=realType), dimension(:, :, :, :), pointer :: bmt
+        real(kind=realType), dimension(:, :, :), pointer :: bvt, ww2
+        real(kind=realType), dimension(:, :), pointer :: rlv2, dd2Wall
+
+        ! Determine the turbulence model used and loop over the faces
+        ! of the subface and set the values of bmt and bvt for an
+        ! implicit treatment.
 
-       case (iMax)
-          iiMax = jl; jjMax = kl
+        select case (turbModel)
 
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             jj = max(2,min(j,jjMax))
+        case (spalartAllmaras, spalartAllmarasEdwards)
 
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ii = max(2,min(i,iiMax))
+            ! Spalart-allmaras type of model. Value at the wall is zero,
+            ! so simply negate the internal value.
+            select case (BCFaceID(nn))
+            case (iMin)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmti1(i, j, itu1, itu1) = -saRoughFact(2, i, j)
+                    end do
+                end do
+            case (iMax)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmti2(i, j, itu1, itu1) = -saRoughFact(il, i, j)
+                    end do
+                end do
+            case (jMin)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmtj1(i, j, itu1, itu1) = -saRoughFact(i, 2, j)
+                    end do
+                end do
+            case (jMax)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmtj2(i, j, itu1, itu1) = -saRoughFact(i, jl, j)
+                    end do
+                end do
+
+            case (kMin)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmtk1(i, j, itu1, itu1) = -saRoughFact(i, j, 2)
+                    end do
+                end do
+
+            case (kMax)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmtk2(i, j, itu1, itu1) = -saRoughFact(i, j, kl)
+                    end do
+                end do
+            end select
+
+            !        ================================================================
+        case (komegaWilcox, komegaModified, menterSST)
+
+            ! K-omega type of models. K is zero on the wall and thus the
+            ! halo value is the negative of the first internal cell.
+            ! For omega the situation is a bit more complicated.
+            ! Theoretically omega is infinity, but it is set to a large
+            ! value, see menter's paper. The halo value is constructed
+            ! such that the wall value is correct. Make sure that i and j
+            ! are limited to physical dimensions of the face for the wall
+            ! distance. Due to the usage of the dd2Wall pointer and the
+            ! fact that the original d2Wall array starts at 2, there is
+            ! an offset of -1 present in dd2Wall.
+
+            select case (BCFaceID(nn))
+            case (iMin)
+                iiMax = jl; jjMax = kl
+
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    jj = max(2, min(j, jjMax))
+
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        ii = max(2, min(i, iiMax))
+
+                        nu = rlv(2, i, j)/w(2, i, j, irho)
+                        tmpd = one/(rkwBeta1*(d2Wall(2, ii, jj)**2))
+
+                        bmti1(i, j, itu1, itu1) = one
+                        bmti1(i, j, itu2, itu2) = one
+
+                        bvti1(i, j, itu2) = two*60.0_realType*nu*tmpd
+                    end do
+                end do
+
+            case (iMax)
+                iiMax = jl; jjMax = kl
+
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    jj = max(2, min(j, jjMax))
+
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        ii = max(2, min(i, iiMax))
+
+                        nu = rlv(jl, i, j)/w(il, i, j, irho)
+                        tmpd = one/(rkwBeta1*(d2Wall(il, ii, jj)**2))
+
+                        bmti2(i, j, itu1, itu1) = one
+                        bmti2(i, j, itu2, itu2) = one
+
+                        bvti2(i, j, itu2) = two*60.0_realType*nu*tmpd
+                    end do
+                end do
+
+            case (jMin)
+                iiMax = il; jjMax = kl
+
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    jj = max(2, min(j, jjMax))
+
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        ii = max(2, min(i, iiMax))
+
+                        nu = rlv(i, 2, j)/w(i, 2, j, irho)
+                        tmpd = one/(rkwBeta1*(d2Wall(ii, 2, jj)**2))
+
+                        bmtj1(i, j, itu1, itu1) = one
+                        bmtj1(i, j, itu2, itu2) = one
+
+                        bvtj1(i, j, itu2) = two*60.0_realType*nu*tmpd
+                    end do
+                end do
+
+            case (jMax)
+                iiMax = il; jjMax = kl
+
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    jj = max(2, min(j, jjMax))
+
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        ii = max(2, min(i, iiMax))
+
+                        nu = rlv(i, jl, j)/w(i, jl, j, irho)
+                        tmpd = one/(rkwBeta1*(d2Wall(ii, jl, jj)**2))
+
+                        bmtj2(i, j, itu1, itu1) = one
+                        bmtj2(i, j, itu2, itu2) = one
+
+                        bvtj2(i, j, itu2) = two*60.0_realType*nu*tmpd
+                    end do
+                end do
+
+            case (kMin)
+                iiMax = il; jjMax = jl
+
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    jj = max(2, min(j, jjMax))
+
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        ii = max(2, min(i, iiMax))
+
+                        nu = rlv(i, j, 2)/w(i, j, 2, irho)
+                        tmpd = one/(rkwBeta1*(d2Wall(ii, jj, 2)**2))
 
-                nu   = rlv(jl,i,j)/w(il,i,j,irho)
-                tmpd = one/(rkwBeta1*(d2Wall(il,ii,jj)**2))
-
-                bmti2(i,j,itu1,itu1) = one
-                bmti2(i,j,itu2,itu2) = one
-
-                bvti2(i,j,itu2) = two*60.0_realType*nu*tmpd
-             enddo
-          enddo
-
-       case (jMin)
-          iiMax = il; jjMax = kl
-
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             jj = max(2,min(j,jjMax))
-
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ii = max(2,min(i,iiMax))
-
-                nu   = rlv(i,2,j)/w(i,2,j,irho)
-                tmpd = one/(rkwBeta1*(d2Wall(ii,2,jj)**2))
-
-                bmtj1(i,j,itu1,itu1) = one
-                bmtj1(i,j,itu2,itu2) = one
-
-                bvtj1(i,j,itu2) = two*60.0_realType*nu*tmpd
-             enddo
-          enddo
-
-       case (jMax)
-          iiMax = il; jjMax = kl
-
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             jj = max(2,min(j,jjMax))
-
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ii = max(2,min(i,iiMax))
-
-                nu   = rlv(i,jl,j)/w(i,jl,j,irho)
-                tmpd = one/(rkwBeta1*(d2Wall(ii,jl,jj)**2))
-
-                bmtj2(i,j,itu1,itu1) = one
-                bmtj2(i,j,itu2,itu2) = one
-
-                bvtj2(i,j,itu2) = two*60.0_realType*nu*tmpd
-             enddo
-          enddo
-
-       case (kMin)
-          iiMax = il; jjMax = jl
-
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             jj = max(2,min(j,jjMax))
-
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ii = max(2,min(i,iiMax))
-
-                nu   = rlv(i,j,2)/w(i,j,2,irho)
-                tmpd = one/(rkwBeta1*(d2Wall(ii,jj,2)**2))
-
-                bmtk1(i,j,itu1,itu1) = one
-                bmtk1(i,j,itu2,itu2) = one
-
-                bvtk1(i,j,itu2) = two*60.0_realType*nu*tmpd
-             enddo
-          enddo
-
-       case (kMax)
-          iiMax = il; jjMax = jl
-
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             jj = max(2,min(j,jjMax))
-
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                ii = max(2,min(i,iiMax))
-
-                nu   = rlv(i,j,kl)/w(i,j,kl,irho)
-                tmpd = one/(rkwBeta1*(d2Wall(ii,jj,kl)**2))
-
-                bmtk2(i,j,itu1,itu1) = one
-                bmtk2(i,j,itu2,itu2) = one
-
-                bvtk2(i,j,itu2) = two*60.0_realType*nu*tmpd
-             enddo
-          enddo
-       end select
-
-       !        ================================================================
-
-    case (ktau)
-
-       ! K-tau model. Both k and tau are zero at the wall, so the
-       ! negative value of the internal cell is taken for the halo.
-       select case (BCFaceID(nn))
-       case (iMin)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti1(i,j,itu1,itu1) = one
-                bmti1(i,j,itu2,itu2) = one
-             enddo
-          enddo
-       case (iMax)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmti2(i,j,itu1,itu1) = one
-                bmti2(i,j,itu2,itu2) = one
-             enddo
-          enddo
-       case (jMin)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj1(i,j,itu1,itu1) = one
-                bmtj1(i,j,itu2,itu2) = one
-             enddo
-          enddo
-       case (jMax)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtj2(i,j,itu1,itu1) = one
-                bmtj2(i,j,itu2,itu2) = one
-             enddo
-          enddo
-
-       case (kMin)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk1(i,j,itu1,itu1) = one
-                bmtk1(i,j,itu2,itu2) = one
-             enddo
-          enddo
-
-       case (kMax)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                bmtk2(i,j,itu1,itu1) = one
-                bmtk2(i,j,itu2,itu2) = one
-             enddo
-          enddo
-       end select
-
-       !        ================================================================
+                        bmtk1(i, j, itu1, itu1) = one
+                        bmtk1(i, j, itu2, itu2) = one
+
+                        bvtk1(i, j, itu2) = two*60.0_realType*nu*tmpd
+                    end do
+                end do
+
+            case (kMax)
+                iiMax = il; jjMax = jl
+
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    jj = max(2, min(j, jjMax))
+
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        ii = max(2, min(i, iiMax))
+
+                        nu = rlv(i, j, kl)/w(i, j, kl, irho)
+                        tmpd = one/(rkwBeta1*(d2Wall(ii, jj, kl)**2))
+
+                        bmtk2(i, j, itu1, itu1) = one
+                        bmtk2(i, j, itu2, itu2) = one
+
+                        bvtk2(i, j, itu2) = two*60.0_realType*nu*tmpd
+                    end do
+                end do
+            end select
+
+            !        ================================================================
+
+        case (ktau)
+
+            ! K-tau model. Both k and tau are zero at the wall, so the
+            ! negative value of the internal cell is taken for the halo.
+            select case (BCFaceID(nn))
+            case (iMin)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmti1(i, j, itu1, itu1) = one
+                        bmti1(i, j, itu2, itu2) = one
+                    end do
+                end do
+            case (iMax)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmti2(i, j, itu1, itu1) = one
+                        bmti2(i, j, itu2, itu2) = one
+                    end do
+                end do
+            case (jMin)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmtj1(i, j, itu1, itu1) = one
+                        bmtj1(i, j, itu2, itu2) = one
+                    end do
+                end do
+            case (jMax)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmtj2(i, j, itu1, itu1) = one
+                        bmtj2(i, j, itu2, itu2) = one
+                    end do
+                end do
+
+            case (kMin)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmtk1(i, j, itu1, itu1) = one
+                        bmtk1(i, j, itu2, itu2) = one
+                    end do
+                end do
+
+            case (kMax)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                        bmtk2(i, j, itu1, itu1) = one
+                        bmtk2(i, j, itu2, itu2) = one
+                    end do
+                end do
+            end select
+
+            !        ================================================================
 #ifndef USE_TAPENADE
-    case (v2f)
-
-       ! Set some variables depending on the block face on which the
-       ! subface is located. Needed for a general treatment.
-
-       select case (BCFaceID(nn))
-       case (iMin)
-          iiMax = jl; jjMax = kl
-          bmt => bmti1; bvt => bvti1; ww2 => w(2 ,1:,1:,:)
-          rlv2 => rlv(2, 1:,1:); dd2Wall => d2Wall(2, :,:)
-
-       case (iMax)
-          iiMax = jl; jjMax = kl
-          bmt => bmti2; bvt => bvti2; ww2 => w(il,1:,1:,:)
-          rlv2 => rlv(il,1:,1:); dd2Wall => d2Wall(il,:,:)
-
-       case (jMin)
-          iiMax = il; jjMax = kl
-          bmt => bmtj1; bvt => bvtj1; ww2 => w(1:,2 ,1:,:)
-          rlv2 => rlv(1:,2 ,1:); dd2Wall => d2Wall(:,2 ,:)
-
-       case (jMax)
-          iiMax = il; jjMax = kl
-          bmt => bmtj2; bvt => bvtj2; ww2 => w(1:,jl,1:,:)
-          rlv2 => rlv(1:,jl,1:); dd2Wall => d2Wall(:,jl,:)
-
-       case (kMin)
-          iiMax = il; jjMax = jl
-          bmt => bmtk1; bvt => bvtk1; ww2 => w(1:,1:,2 ,:)
-          rlv2 => rlv(1:,1:,2 ); dd2Wall => d2Wall(:,:,2 )
-
-       case (kMax)
-          iiMax = il; jjMax = jl
-          bmt => bmtk2; bvt => bvtk2; ww2 => w(1:,1:,kl,:)
-          rlv2 => rlv(1:,1:,kl); dd2Wall => d2Wall(:,:,kl)
-       end select
-
-       ! V2f turbulence model. Same story for the wall distance as
-       ! for k-omega. For this model there is a coupling between the
-       ! equations via the boundary conditions.
-
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          jj = max(2,min(j,jjMax))
-
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             ii = max(2,min(i,iiMax))
-
-             nu   = rlv2(i,j)/ww2(i,j,irho)
-             tmpd = one/(dd2Wall(ii-1,jj-1)**2)
-             tmpe = two*nu*tmpd
-             tmpf =-20.0_realType*(nu*tmpd)**2 &
-                  / abs(tmpe*ww2(i,j,itu1))
-             if(rvfN == 6) tmpf = zero
-
-             bmt(i,j,itu1,itu1) = one
-             bmt(i,j,itu2,itu2) = one
-             bmt(i,j,itu3,itu3) = one
-             bmt(i,j,itu4,itu4) = one
-
-             bmt(i,j,itu2,itu1) = -two*tmpe
-             bmt(i,j,itu4,itu3) = -two*tmpf
-          enddo
-       enddo
+        case (v2f)
+
+            ! Set some variables depending on the block face on which the
+            ! subface is located. Needed for a general treatment.
+
+            select case (BCFaceID(nn))
+            case (iMin)
+                iiMax = jl; jjMax = kl
+                bmt => bmti1; bvt => bvti1; ww2 => w(2, 1:, 1:, :)
+                rlv2 => rlv(2, 1:, 1:); dd2Wall => d2Wall(2, :, :)
+
+            case (iMax)
+                iiMax = jl; jjMax = kl
+                bmt => bmti2; bvt => bvti2; ww2 => w(il, 1:, 1:, :)
+                rlv2 => rlv(il, 1:, 1:); dd2Wall => d2Wall(il, :, :)
+
+            case (jMin)
+                iiMax = il; jjMax = kl
+                bmt => bmtj1; bvt => bvtj1; ww2 => w(1:, 2, 1:, :)
+                rlv2 => rlv(1:, 2, 1:); dd2Wall => d2Wall(:, 2, :)
+
+            case (jMax)
+                iiMax = il; jjMax = kl
+                bmt => bmtj2; bvt => bvtj2; ww2 => w(1:, jl, 1:, :)
+                rlv2 => rlv(1:, jl, 1:); dd2Wall => d2Wall(:, jl, :)
+
+            case (kMin)
+                iiMax = il; jjMax = jl
+                bmt => bmtk1; bvt => bvtk1; ww2 => w(1:, 1:, 2, :)
+                rlv2 => rlv(1:, 1:, 2); dd2Wall => d2Wall(:, :, 2)
+
+            case (kMax)
+                iiMax = il; jjMax = jl
+                bmt => bmtk2; bvt => bvtk2; ww2 => w(1:, 1:, kl, :)
+                rlv2 => rlv(1:, 1:, kl); dd2Wall => d2Wall(:, :, kl)
+            end select
+
+            ! V2f turbulence model. Same story for the wall distance as
+            ! for k-omega. For this model there is a coupling between the
+            ! equations via the boundary conditions.
+
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                jj = max(2, min(j, jjMax))
+
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    ii = max(2, min(i, iiMax))
+
+                    nu = rlv2(i, j)/ww2(i, j, irho)
+                    tmpd = one/(dd2Wall(ii - 1, jj - 1)**2)
+                    tmpe = two*nu*tmpd
+                    tmpf = -20.0_realType*(nu*tmpd)**2 &
+                           /abs(tmpe*ww2(i, j, itu1))
+                    if (rvfN == 6) tmpf = zero
+
+                    bmt(i, j, itu1, itu1) = one
+                    bmt(i, j, itu2, itu2) = one
+                    bmt(i, j, itu3, itu3) = one
+                    bmt(i, j, itu4, itu4) = one
+
+                    bmt(i, j, itu2, itu1) = -two*tmpe
+                    bmt(i, j, itu4, itu3) = -two*tmpf
+                end do
+            end do
 #endif
-    end select
-  end subroutine bcTurbWall
-
-  subroutine turb2ndHalo(nn)
-    !
-    !       turb2ndHalo sets the turbulent variables in the second halo
-    !       cell for the given subface. Simple constant extrapolation is
-    !       used to avoid problems.
-    !
-    use constants
-    use blockPointers
-    use flowVarRefState
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, l
-
-    ! Determine the face on which this subface is located and set
-    ! some pointers accordingly.
-
-    ! Loop over the turbulent variables and set the second halo
-    ! value. If this is an eddy model, also set the eddy viscosity.
-
-    select case (BCFaceID(nn))
-    case (iMin)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             do l=nt1,nt2
-                w(0,i,j,l) = w(1,i,j,l)
-             enddo
-             if( eddyModel ) rev(0,i,j) = rev(1,i,j)
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (iMax)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             do l=nt1,nt2
-                w(ib,i,j,l) = w(ie,i,j,l)
-             enddo
-             if( eddyModel ) rev(ib,i,j) = rev(ie,i,j)
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (jMin)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             do l=nt1,nt2
-                w(i,0,j,l) = w(i,1,j,l)
-             enddo
-             if( eddyModel ) rev(i,0,j) = rev(i,1,j)
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (jMax)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             do l=nt1,nt2
-                w(i,jb,j,l) = w(i,je,j,l)
-             enddo
-             if( eddyModel ) rev(i,jb,j) = rev(i,je,j)
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (kMin)
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             do l=nt1,nt2
-                w(i,j,0,l) = w(i,j,1,l)
-             enddo
-             if( eddyModel ) rev(i,j,0) = rev(i,j,1)
-          enddo
-       enddo
-
-       !===============================================================
-
-    case (kMax)
-
-       do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-          do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-             do l=nt1,nt2
-                w(i,j,kb,l) = w(i,j,ke,l)
-             enddo
-             if( eddyModel ) rev(i,j,kb) = rev(i,j,ke)
-          enddo
-       enddo
-
-    end select
-
-  end subroutine turb2ndHalo
-
-  subroutine turbBCNSWall(secondHalo)
-    !
-    !       turbBCNSWall applies the viscous wall boundary conditions
-    !       of the turbulent transport equations to a block. It is assumed
-    !       that the pointers in blockPointers are already set to the
-    !       correct block on the correct grid level.
-    !
-    use constants
-    use blockPointers
-    use flowVarRefState
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    logical, intent(in) :: secondHalo
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, i, j, l, m
-
-
-    ! Loop over the viscous subfaces of this block.
-
-    bocos: do nn=1,nViscBocos
-
-       ! Set the corresponding arrays.
-
-       call BCTurbWall(nn)
-
-       ! Loop over the faces and set the state in
-       ! the turbulent halo cells.
-
-       select case (BCFaceID(nn))
-       case (iMin)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-
-                do l=nt1,nt2
-                   w(1,i,j,l) = bvti1(i,j,l)
-                   do m=nt1,nt2
-                      w(1,i,j,l) = w(1,i,j,l) - bmti1(i,j,l,m)*w(2,i,j,m)
-                   enddo
-                   if (secondHalo) w(0,i,j,l) = w(1,i,j,l)
+        end select
+    end subroutine bcTurbWall
+
+    subroutine turb2ndHalo(nn)
+        !
+        !       turb2ndHalo sets the turbulent variables in the second halo
+        !       cell for the given subface. Simple constant extrapolation is
+        !       used to avoid problems.
+        !
+        use constants
+        use blockPointers
+        use flowVarRefState
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, l
+
+        ! Determine the face on which this subface is located and set
+        ! some pointers accordingly.
+
+        ! Loop over the turbulent variables and set the second halo
+        ! value. If this is an eddy model, also set the eddy viscosity.
+
+        select case (BCFaceID(nn))
+        case (iMin)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    do l = nt1, nt2
+                        w(0, i, j, l) = w(1, i, j, l)
+                    end do
+                    if (eddyModel) rev(0, i, j) = rev(1, i, j)
                 end do
+            end do
 
-                if (eddyModel) then
-                   rev(1,i,j) = -rev(2,i,j)
-                   if (secondHalo) then
-                      rev(0,i,j) = rev(1,i,j)
-                   end if
-                end if
-             end do
-          end do
-       case (iMax)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-
-                do l=nt1,nt2
-                   w(ie,i,j,l) = bvti2(i,j,l)
-                   do m=nt1,nt2
-                      w(ie,i,j,l) = w(ie,i,j,l) - bmti2(i,j,l,m)*w(il,i,j,m)
-                   enddo
-                   if (secondHalo) w(ib,i,j,l) = w(ie,i,j,l)
-                end do
+            !===============================================================
 
-                if (eddyModel) then
-                   rev(ie,i,j) = -rev(il,i,j)
-                   if (secondHalo) then
-                      rev(ib,i,j) = rev(ie,i,j)
-                   end if
-                end if
-             end do
-          end do
-       case (jMin)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-
-                do l=nt1,nt2
-                   w(i,1,j,l) = bvtj1(i,j,l)
-                   do m=nt1,nt2
-                      w(i,1,j,l) = w(i,1,j,l) - bmtj1(i,j,l,m)*w(i,2,j,m)
-                   enddo
-                   if (secondHalo) w(i,0,j,l) = w(i,1,j,l)
+        case (iMax)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    do l = nt1, nt2
+                        w(ib, i, j, l) = w(ie, i, j, l)
+                    end do
+                    if (eddyModel) rev(ib, i, j) = rev(ie, i, j)
                 end do
+            end do
 
-                if (eddyModel) then
-                   rev(i,1,j) = -rev(i,2,j)
-                   if (secondHalo) then
-                      rev(i,0,j) = rev(i,1,j)
-                   end if
-                end if
-             end do
-          end do
-       case (jMax)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-
-                do l=nt1,nt2
-                   w(i,je,j,l) = bvtj2(i,j,l)
-                   do m=nt1,nt2
-                      w(i,je,j,l) = w(i,je,j,l) - bmtj2(i,j,l,m)*w(i,jl,j,m)
-                   enddo
-                   if (secondHalo) w(i,jb,j,l) = w(i,je,j,l)
+            !===============================================================
+
+        case (jMin)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    do l = nt1, nt2
+                        w(i, 0, j, l) = w(i, 1, j, l)
+                    end do
+                    if (eddyModel) rev(i, 0, j) = rev(i, 1, j)
                 end do
+            end do
 
-                if (eddyModel) then
-                   rev(i,je,j) = -rev(i,jl,j)
-                   if (secondHalo) then
-                      rev(i,jb,j) = rev(i,je,j)
-                   end if
-                end if
-             end do
-          end do
-       case (kMin)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-
-                do l=nt1,nt2
-                   w(i,j,1,l) = bvtk1(i,j,l)
-                   do m=nt1,nt2
-                      w(i,j,1,l) = w(i,j,1,l) - bmtk1(i,j,l,m)*w(i,j,2,m)
-                   enddo
-                   if (secondHalo) w(i,j,0,l) = w(i,j,1,l)
+            !===============================================================
+
+        case (jMax)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    do l = nt1, nt2
+                        w(i, jb, j, l) = w(i, je, j, l)
+                    end do
+                    if (eddyModel) rev(i, jb, j) = rev(i, je, j)
                 end do
+            end do
 
-                if (eddyModel) then
-                   rev(i,j,1) = -rev(i,j,2)
-                   if (secondHalo) then
-                      rev(i,j,0) = rev(i,j,1)
-                   end if
-                end if
-             end do
-          end do
-       case (kMax)
-          do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-             do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-
-                do l=nt1,nt2
-                   w(i,j,ke,l) = bvtk2(i,j,l)
-                   do m=nt1,nt2
-                      w(i,j,ke,l) = w(i,j,ke,l) - bmtk2(i,j,l,m)*w(i,j,kl,m)
-                   enddo
-                   if (secondHalo) w(i,j,kb,l) = w(i,j,ke,l)
+            !===============================================================
+
+        case (kMin)
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    do l = nt1, nt2
+                        w(i, j, 0, l) = w(i, j, 1, l)
+                    end do
+                    if (eddyModel) rev(i, j, 0) = rev(i, j, 1)
                 end do
+            end do
 
-                if (eddyModel) then
-                   rev(i,j,ke) = -rev(i,j,kl)
-                   if (secondHalo) then
-                      rev(i,j,kb) = rev(i,j,ke)
-                   end if
-                end if
-             end do
-          end do
-       end select
-    enddo bocos
-  end subroutine turbBCNSWall
+            !===============================================================
+
+        case (kMax)
+
+            do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                    do l = nt1, nt2
+                        w(i, j, kb, l) = w(i, j, ke, l)
+                    end do
+                    if (eddyModel) rev(i, j, kb) = rev(i, j, ke)
+                end do
+            end do
+
+        end select
+
+    end subroutine turb2ndHalo
+
+    subroutine turbBCNSWall(secondHalo)
+        !
+        !       turbBCNSWall applies the viscous wall boundary conditions
+        !       of the turbulent transport equations to a block. It is assumed
+        !       that the pointers in blockPointers are already set to the
+        !       correct block on the correct grid level.
+        !
+        use constants
+        use blockPointers
+        use flowVarRefState
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        logical, intent(in) :: secondHalo
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, i, j, l, m
+
+        ! Loop over the viscous subfaces of this block.
+
+        bocos: do nn = 1, nViscBocos
+
+            ! Set the corresponding arrays.
+
+            call BCTurbWall(nn)
+
+            ! Loop over the faces and set the state in
+            ! the turbulent halo cells.
+
+            select case (BCFaceID(nn))
+            case (iMin)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+
+                        do l = nt1, nt2
+                            w(1, i, j, l) = bvti1(i, j, l)
+                            do m = nt1, nt2
+                                w(1, i, j, l) = w(1, i, j, l) - bmti1(i, j, l, m)*w(2, i, j, m)
+                            end do
+                            if (secondHalo) w(0, i, j, l) = w(1, i, j, l)
+                        end do
+
+                        if (eddyModel) then
+                            rev(1, i, j) = -rev(2, i, j)
+                            if (secondHalo) then
+                                rev(0, i, j) = rev(1, i, j)
+                            end if
+                        end if
+                    end do
+                end do
+            case (iMax)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+
+                        do l = nt1, nt2
+                            w(ie, i, j, l) = bvti2(i, j, l)
+                            do m = nt1, nt2
+                                w(ie, i, j, l) = w(ie, i, j, l) - bmti2(i, j, l, m)*w(il, i, j, m)
+                            end do
+                            if (secondHalo) w(ib, i, j, l) = w(ie, i, j, l)
+                        end do
+
+                        if (eddyModel) then
+                            rev(ie, i, j) = -rev(il, i, j)
+                            if (secondHalo) then
+                                rev(ib, i, j) = rev(ie, i, j)
+                            end if
+                        end if
+                    end do
+                end do
+            case (jMin)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+
+                        do l = nt1, nt2
+                            w(i, 1, j, l) = bvtj1(i, j, l)
+                            do m = nt1, nt2
+                                w(i, 1, j, l) = w(i, 1, j, l) - bmtj1(i, j, l, m)*w(i, 2, j, m)
+                            end do
+                            if (secondHalo) w(i, 0, j, l) = w(i, 1, j, l)
+                        end do
+
+                        if (eddyModel) then
+                            rev(i, 1, j) = -rev(i, 2, j)
+                            if (secondHalo) then
+                                rev(i, 0, j) = rev(i, 1, j)
+                            end if
+                        end if
+                    end do
+                end do
+            case (jMax)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+
+                        do l = nt1, nt2
+                            w(i, je, j, l) = bvtj2(i, j, l)
+                            do m = nt1, nt2
+                                w(i, je, j, l) = w(i, je, j, l) - bmtj2(i, j, l, m)*w(i, jl, j, m)
+                            end do
+                            if (secondHalo) w(i, jb, j, l) = w(i, je, j, l)
+                        end do
+
+                        if (eddyModel) then
+                            rev(i, je, j) = -rev(i, jl, j)
+                            if (secondHalo) then
+                                rev(i, jb, j) = rev(i, je, j)
+                            end if
+                        end if
+                    end do
+                end do
+            case (kMin)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+
+                        do l = nt1, nt2
+                            w(i, j, 1, l) = bvtk1(i, j, l)
+                            do m = nt1, nt2
+                                w(i, j, 1, l) = w(i, j, 1, l) - bmtk1(i, j, l, m)*w(i, j, 2, m)
+                            end do
+                            if (secondHalo) w(i, j, 0, l) = w(i, j, 1, l)
+                        end do
+
+                        if (eddyModel) then
+                            rev(i, j, 1) = -rev(i, j, 2)
+                            if (secondHalo) then
+                                rev(i, j, 0) = rev(i, j, 1)
+                            end if
+                        end if
+                    end do
+                end do
+            case (kMax)
+                do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                    do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+
+                        do l = nt1, nt2
+                            w(i, j, ke, l) = bvtk2(i, j, l)
+                            do m = nt1, nt2
+                                w(i, j, ke, l) = w(i, j, ke, l) - bmtk2(i, j, l, m)*w(i, j, kl, m)
+                            end do
+                            if (secondHalo) w(i, j, kb, l) = w(i, j, ke, l)
+                        end do
+
+                        if (eddyModel) then
+                            rev(i, j, ke) = -rev(i, j, kl)
+                            if (secondHalo) then
+                                rev(i, j, kb) = rev(i, j, ke)
+                            end if
+                        end if
+                    end do
+                end do
+            end select
+        end do bocos
+    end subroutine turbBCNSWall
 
-  function  saRoughFact(i,j,k)
+    function saRoughFact(i, j, k)
 
-    ! returns either the regular SA-boundary condition
-    ! or the modified Roughness-boundary condition
+        ! returns either the regular SA-boundary condition
+        ! or the modified Roughness-boundary condition
 
-   use constants
-   use inputPhysics, only : useRoughSA
-   use BlockPointers, only : ks, d2wall
-   implicit none
+        use constants
+        use inputPhysics, only: useRoughSA
+        use BlockPointers, only: ks, d2wall
+        implicit none
 
-   ! dummy arguments
-   real(kind=realType) :: saRoughFact
+        ! dummy arguments
+        real(kind=realType) :: saRoughFact
 
-   ! local variablse
-   integer(kind=intType) :: i, j, k
+        ! local variablse
+        integer(kind=intType) :: i, j, k
 
-   if (.not. useRoughSA) then
-      saRoughFact = -one
-      return
-   end if
+        if (.not. useRoughSA) then
+            saRoughFact = -one
+            return
+        end if
 
-   saRoughFact = (ks(i,j,k) - d2wall(i,j,k)/0.03_realType) / &
-        (ks(i,j,k) + d2wall(i,j,k)/0.03_realType)
+        saRoughFact = (ks(i, j, k) - d2wall(i, j, k)/0.03_realType)/ &
+                      (ks(i, j, k) + d2wall(i, j, k)/0.03_realType)
 
-  end function  saRoughFact
+    end function saRoughFact
 
 end module turbBCRoutines
diff --git a/src/utils/utils.F90 b/src/utils/utils.F90
index 5c9e6314f..4dca871b4 100644
--- a/src/utils/utils.F90
+++ b/src/utils/utils.F90
@@ -1,6592 +1,6569 @@
 module utils
- implicit none
-
- contains
-
-  function char2str(charArray, n)
-    use constants
-    !
-    ! some gymnastics to cast a char array to string
-    !
-    implicit none
-    !
-    !      Function arguments.
-    !
-    character, dimension(maxCGNSNameLen), intent(in) :: charArray
-    integer(kind=intType), intent(in) :: n
-    !
-    !      Function type
-    !
-    character(len=n) :: char2str
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i
-    do i=1,n
-      char2str(i:i) = charArray(i)
-    end do
-
-  end function char2str
-
-  function TSbeta(degreePolBeta,   coefPolBeta,       &
-       degreeFourBeta,  omegaFourBeta,     &
-       cosCoefFourBeta, sinCoefFourBeta, t)
-    !
-    !       TSbeta computes the angle of attack for a given Time interval
-    !       in a time spectral solution.
-    !
-    use constants
-    use inputPhysics, only : equationMode
-    implicit none
-    !
-    !      Function type
-    !
-    real(kind=realType) :: TSbeta
-    !
-    !      Function arguments.
-    !
-    integer(kind=intType), intent(in) :: degreePolBeta
-    integer(kind=intType), intent(in) :: degreeFourBeta
-
-    real(kind=realType), intent(in) :: omegaFourBeta, t
-
-    real(kind=realType), dimension(0:*), intent(in) :: coefPolBeta
-    real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourBeta
-    real(kind=realType), dimension(*),   intent(in) :: sinCoefFourBeta
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    real(kind=realType) :: beta, val
-
-    ! Return immediately if this is a steady computation.
-
-    if(equationMode == steady) then
-       TSBeta = zero
-       return
-    endif
-
-    ! Compute the polynomial contribution. If no polynomial was
-    ! specified, the value of index 0 is set to zero automatically.
-
-    beta = coefPolBeta(0)
-    do nn=1,degreePolBeta
-       beta = beta + coefPolBeta(nn)*(t**nn)
-    enddo
-
-    ! Compute the fourier contribution. Again the cosine coefficient
-    ! of index 0 is defaulted to zero if not specified.
-
-    beta = beta + cosCoefFourBeta(0)
-    do nn=1,degreeFourBeta
-       val = nn*omegaFourBeta*t
-       beta = beta + cosCoefFourbeta(nn)*cos(val) &
-            + sinCoefFourbeta(nn)*sin(val)
-    enddo
-
-    ! Set TSBeta to phi.
-
-    TSBeta = beta
-
-  end function TSbeta
-
-  function TSbetadot(degreePolBeta,   coefPolBeta,       &
-       degreeFourBeta,  omegaFourBeta,     &
-       cosCoefFourBeta, sinCoefFourBeta, t)
-    !
-    !       TSbeta computes the angle of attack for a given Time interval
-    !       in a time spectral solution.
-    !
-    use constants
-    use inputPhysics, only : equationMode
-    implicit none
-    !
-    !      Function type
-    !
-    real(kind=realType) :: TSbetadot
-    !
-    !      Function arguments.
-    !
-    integer(kind=intType), intent(in) :: degreePolBeta
-    integer(kind=intType), intent(in) :: degreeFourBeta
-
-    real(kind=realType), intent(in) :: omegaFourBeta, t
-
-    real(kind=realType), dimension(0:*), intent(in) :: coefPolBeta
-    real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourBeta
-    real(kind=realType), dimension(*),   intent(in) :: sinCoefFourBeta
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    real(kind=realType) :: betadot, val
-
-    ! Return immediately if this is a steady computation.
-
-    if(equationMode == steady) then
-       TSBetadot = zero
-       return
-    endif
-
-    ! Compute the polynomial contribution. If no polynomial was
-    ! specified, the value of index 0 is set to zero automatically.
-
-    betadot = zero
-    do nn=1,degreePolBeta
-       betadot = betadot + nn*coefPolBeta(nn)*(t**(nn-1))
-    enddo
-
-    ! Compute the fourier contribution. Again the cosine coefficient
-    ! of index 0 is defaulted to zero if not specified.
-
-    do nn=1,degreeFourBeta
-       val = nn*omegaFourBeta
-       betadot = betadot -val* cosCoefFourbeta(nn)*sin(val*t) &
-            +val* sinCoefFourbeta(nn)*cos(val*t)
-    enddo
-
-    ! Set TSBeta to phi.
-
-    TSBetadot = betadot
-
-  end function TSbetadot
-
-  function TSMach(degreePolMach,   coefPolMach,       &
-       degreeFourMach,  omegaFourMach,     &
-       cosCoefFourMach, sinCoefFourMach, t)
-    !
-    !       TSMach computes the Mach Number for a given time interval
-    !       in a time spectral solution.
-    !
-    use constants
-    use inputPhysics, only : equationMode
-    implicit none
-    !
-    !      Function type
-    !
-    real(kind=realType) :: TSmach
-    !
-    !      Function arguments.
-    !
-    integer(kind=intType), intent(in) :: degreePolMach
-    integer(kind=intType), intent(in) :: degreeFourMach
-
-    real(kind=realType), intent(in) :: omegaFourMach, t
-
-    real(kind=realType), dimension(0:*), intent(in) :: coefPolMach
-    real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourMach
-    real(kind=realType), dimension(*),   intent(in) :: sinCoefFourMach
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    real(kind=realType) :: intervalMach, val
-
-    ! Return immediately if this is a steady computation.
-
-    if(equationMode == steady) then
-       TSMach = zero
-       return
-    endif
-
-    ! Compute the polynomial contribution. If no polynomial was
-    ! specified, the value of index 0 is set to zero automatically.
-
-    intervalMach = coefPolMach(0)
-    do nn=1,degreePolMach
-       intervalMach = intervalMach + coefPolMach(nn)*(t**nn)
-    enddo
-
-    ! Compute the fourier contribution. Again the cosine coefficient
-    ! of index 0 is defaulted to zero if not specified.
-
-    intervalMach = intervalMach + cosCoefFourMach(0)
-    do nn=1,degreeFourMach
-       val = nn*omegaFourMach*t
-       intervalMach = intervalMach + cosCoefFourmach(nn)*cos(val) &
-            + sinCoefFourmach(nn)*sin(val)
-    enddo
-    print *,'inTSMach',intervalMach,nn,val,t
-    ! Set TSMach to phi.
-
-    TSMach = intervalMach
-
-  end function TSmach
-
-  function TSMachdot(degreePolMach,   coefPolMach,       &
-       degreeFourMach,  omegaFourMach,     &
-       cosCoefFourMach, sinCoefFourMach, t)
-    !
-    !       TSmach computes the angle of attack for a given Time interval
-    !       in a time spectral solution.
-    !
-    use constants
-    use inputPhysics, only : equationMode
-    implicit none
-    !
-    !      Function type
-    !
-    real(kind=realType) :: TSmachdot
-    !
-    !      Function arguments.
-    !
-    integer(kind=intType), intent(in) :: degreePolMach
-    integer(kind=intType), intent(in) :: degreeFourMach
-
-    real(kind=realType), intent(in) :: omegaFourMach, t
-
-    real(kind=realType), dimension(0:*), intent(in) :: coefPolMach
-    real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourMach
-    real(kind=realType), dimension(*),   intent(in) :: sinCoefFourMach
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    real(kind=realType) :: machdot, val
-
-    ! Return immediately if this is a steady computation.
-
-    if(equationMode == steady) then
-       TSMachdot = zero
-       return
-    endif
-
-    ! Compute the polynomial contribution. If no polynomial was
-    ! specified, the value of index 0 is set to zero automatically.
-
-    machdot = zero
-    do nn=1,degreePolMach
-       machdot = machdot + nn*coefPolMach(nn)*(t**(nn-1))
-    enddo
-
-    ! Compute the fourier contribution. Again the cosine coefficient
-    ! of index 0 is defaulted to zero if not specified.
-
-    do nn=1,degreeFourMach
-       val = nn*omegaFourMach
-       machdot = machdot -val* cosCoefFourmach(nn)*sin(val*t) &
-            +val* sinCoefFourmach(nn)*cos(val*t)
-    enddo
-
-    ! Set TSMach to phi.
-
-    TSMachdot = machdot
-
-  end function TSmachdot
-
-  function TSalpha(degreePolAlpha,   coefPolAlpha,       &
-       degreeFourAlpha,  omegaFourAlpha,     &
-       cosCoefFourAlpha, sinCoefFourAlpha, t)
-    !
-    !       TSalpha computes the angle of attack for a given Time interval
-    !       in a time spectral solution.
-    !
-    use constants
-    use inputPhysics, only : equationMode
-    implicit none
-    !
-    !      Function type
-    !
-    real(kind=realType) :: TSalpha
-    !
-    !      Function arguments.
-    !
-    integer(kind=intType), intent(in) :: degreePolAlpha
-    integer(kind=intType), intent(in) :: degreeFourAlpha
-
-    real(kind=realType), intent(in) :: omegaFourAlpha, t
-
-    real(kind=realType), dimension(0:*), intent(in) :: coefPolAlpha
-    real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourAlpha
-    real(kind=realType), dimension(*),   intent(in) :: sinCoefFourAlpha
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    real(kind=realType) :: alpha, val
-
-    ! Return immediately if this is a steady computation.
-
-    if(equationMode == steady) then
-       TSAlpha = zero
-       return
-    endif
-
-    ! Compute the polynomial contribution. If no polynomial was
-    ! specified, the value of index 0 is set to zero automatically.
-    alpha = coefPolAlpha(0)
-    do nn=1,degreePolAlpha
-       alpha = alpha + coefPolAlpha(nn)*(t**nn)
-    enddo
-
-    ! Compute the fourier contribution. Again the cosine coefficient
-    ! of index 0 is defaulted to zero if not specified.
-
-    alpha = alpha + cosCoefFourAlpha(0)
-    do nn=1,degreeFourAlpha
-       val = nn*omegaFourAlpha*t
-       alpha = alpha + cosCoefFouralpha(nn)*cos(val) &
-            + sinCoefFouralpha(nn)*sin(val)
-    enddo
-    !print *,'inTSalpha',alpha,nn,val,t
-    ! Set TSAlpha to phi.
-
-    TSAlpha = alpha
-
-  end function TSalpha
-
-  function TSalphadot(degreePolAlpha,   coefPolAlpha,       &
-       degreeFourAlpha,  omegaFourAlpha,     &
-       cosCoefFourAlpha, sinCoefFourAlpha, t)
-    !
-    !       TSalpha computes the angle of attack for a given Time interval
-    !       in a time spectral solution.
-    !
-    use constants
-    use inputPhysics, only : equationMode
-    implicit none
-    !
-    !      Function type
-    !
-    real(kind=realType) :: TSalphadot
-    !
-    !      Function arguments.
-    !
-    integer(kind=intType), intent(in) :: degreePolAlpha
-    integer(kind=intType), intent(in) :: degreeFourAlpha
-
-    real(kind=realType), intent(in) :: omegaFourAlpha, t
-
-    real(kind=realType), dimension(0:*), intent(in) :: coefPolAlpha
-    real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourAlpha
-    real(kind=realType), dimension(*),   intent(in) :: sinCoefFourAlpha
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    real(kind=realType) :: alphadot, val
-
-    ! Return immediately if this is a steady computation.
-
-    if(equationMode == steady) then
-       TSAlphadot = zero
-       return
-    endif
-
-    ! Compute the polynomial contribution. If no polynomial was
-    ! specified, the value of index 0 is set to zero automatically.
-
-    alphadot = zero
-    do nn=1,degreePolAlpha
-       alphadot = alphadot + nn*coefPolAlpha(nn)*(t**(nn-1))
-    enddo
-
-    ! Compute the fourier contribution. Again the cosine coefficient
-    ! of index 0 is defaulted to zero if not specified.
-
-    do nn=1,degreeFourAlpha
-       val = nn*omegaFourAlpha
-       alphadot = alphadot -val* cosCoefFouralpha(nn)*sin(val*t) &
-            +val* sinCoefFouralpha(nn)*cos(val*t)
-    enddo
-
-    ! Set TSAlpha to phi.
-
-    TSAlphadot = alphadot
-
-  end function TSalphadot
-
-
-  function derivativeRigidRotAngle(degreePolRot,   &
-       coefPolRot,     &
-       degreeFourRot,  &
-       omegaFourRot,   &
-       cosCoefFourRot, &
-       sinCoefFourRot, t)
-    !
-    !       derivativeRigidRotAngle computes the time derivative of the
-    !       rigid body rotation angle at the given time for the given
-    !       arguments. The angle is described by a combination of a
-    !       polynomial and fourier series.
-    !
-    use constants
-    use inputPhysics, only : equationMode
-    use flowVarRefState, only : timeRef
-    implicit none
-    !
-    !      Function type
-    !
-    real(kind=realType) :: derivativeRigidRotAngle
-    !
-    !      Function arguments.
-    !
-    integer(kind=intType), intent(in) :: degreePolRot
-    integer(kind=intType), intent(in) :: degreeFourRot
-
-    real(kind=realType), intent(in) :: omegaFourRot, t
-
-    real(kind=realType), dimension(0:*), intent(in) :: coefPolRot
-    real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourRot
-    real(kind=realType), dimension(*),   intent(in) :: sinCoefFourRot
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    real(kind=realType) :: dPhi, val
-
-    ! Return immediately if this is a steady computation.
-
-    if(equationMode == steady) then
-       derivativeRigidRotAngle = zero
-       return
-    endif
-
-    ! Compute the polynomial contribution.
-
-    dPhi = zero
-    do nn=1,degreePolRot
-       dPhi = dPhi + nn*coefPolRot(nn)*(t**(nn-1))
-    enddo
-
-    ! Compute the fourier contribution.
-
-    do nn=1,degreeFourRot
-       val = nn*omegaFourRot
-       dPhi = dPhi - val*cosCoefFourRot(nn)*sin(val*t)
-       dPhi = dPhi + val*sinCoefFourRot(nn)*cos(val*t)
-    enddo
-
-    ! Set derivativeRigidRotAngle to dPhi. Multiply by timeRef
-    ! to obtain the correct non-dimensional value.
-
-    derivativeRigidRotAngle = timeRef*dPhi
-
-  end function derivativeRigidRotAngle
-
-  function myDim (x,y)
-
-    use constants
-
-    real(kind=realType) x,y
-    real(kind=realType) :: myDim
-
-    myDim = x - y
-    if (myDim < 0.0) then
-       myDim = 0.0
-    end if
-
-  end function myDim
-
-  function getCorrectForK()
-
-    use constants
-    use flowVarRefState, only : kPresent
-    use iteration, only : currentLevel, groundLevel
     implicit none
 
-    logical :: getCorrectForK
-
-    if( kPresent .and. currentLevel <= groundLevel) then
-       getCorrectForK = .true.
-    else
-       getCorrectForK = .false.
-    end if
-  end function getCorrectForK
-  subroutine terminate(routineName, errorMessage)
-    !
-    !       terminate writes an error message to standard output and
-    !       terminates the execution of the program.
-    !
-    use constants
-    use communication, only : adflow_comm_world, myid
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    character(len=*), intent(in) :: routineName
-    character(len=*), intent(in) :: errorMessage
+contains
+
+    function char2str(charArray, n)
+        use constants
+        !
+        ! some gymnastics to cast a char array to string
+        !
+        implicit none
+        !
+        !      Function arguments.
+        !
+        character, dimension(maxCGNSNameLen), intent(in) :: charArray
+        integer(kind=intType), intent(in) :: n
+        !
+        !      Function type
+        !
+        character(len=n) :: char2str
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i
+        do i = 1, n
+            char2str(i:i) = charArray(i)
+        end do
+
+    end function char2str
+
+    function TSbeta(degreePolBeta, coefPolBeta, &
+                    degreeFourBeta, omegaFourBeta, &
+                    cosCoefFourBeta, sinCoefFourBeta, t)
+        !
+        !       TSbeta computes the angle of attack for a given Time interval
+        !       in a time spectral solution.
+        !
+        use constants
+        use inputPhysics, only: equationMode
+        implicit none
+        !
+        !      Function type
+        !
+        real(kind=realType) :: TSbeta
+        !
+        !      Function arguments.
+        !
+        integer(kind=intType), intent(in) :: degreePolBeta
+        integer(kind=intType), intent(in) :: degreeFourBeta
+
+        real(kind=realType), intent(in) :: omegaFourBeta, t
+
+        real(kind=realType), dimension(0:*), intent(in) :: coefPolBeta
+        real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourBeta
+        real(kind=realType), dimension(*), intent(in) :: sinCoefFourBeta
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        real(kind=realType) :: beta, val
+
+        ! Return immediately if this is a steady computation.
+
+        if (equationMode == steady) then
+            TSBeta = zero
+            return
+        end if
+
+        ! Compute the polynomial contribution. If no polynomial was
+        ! specified, the value of index 0 is set to zero automatically.
+
+        beta = coefPolBeta(0)
+        do nn = 1, degreePolBeta
+            beta = beta + coefPolBeta(nn)*(t**nn)
+        end do
+
+        ! Compute the fourier contribution. Again the cosine coefficient
+        ! of index 0 is defaulted to zero if not specified.
+
+        beta = beta + cosCoefFourBeta(0)
+        do nn = 1, degreeFourBeta
+            val = nn*omegaFourBeta*t
+            beta = beta + cosCoefFourbeta(nn)*cos(val) &
+                   + sinCoefFourbeta(nn)*sin(val)
+        end do
+
+        ! Set TSBeta to phi.
+
+        TSBeta = beta
+
+    end function TSbeta
+
+    function TSbetadot(degreePolBeta, coefPolBeta, &
+                       degreeFourBeta, omegaFourBeta, &
+                       cosCoefFourBeta, sinCoefFourBeta, t)
+        !
+        !       TSbeta computes the angle of attack for a given Time interval
+        !       in a time spectral solution.
+        !
+        use constants
+        use inputPhysics, only: equationMode
+        implicit none
+        !
+        !      Function type
+        !
+        real(kind=realType) :: TSbetadot
+        !
+        !      Function arguments.
+        !
+        integer(kind=intType), intent(in) :: degreePolBeta
+        integer(kind=intType), intent(in) :: degreeFourBeta
+
+        real(kind=realType), intent(in) :: omegaFourBeta, t
+
+        real(kind=realType), dimension(0:*), intent(in) :: coefPolBeta
+        real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourBeta
+        real(kind=realType), dimension(*), intent(in) :: sinCoefFourBeta
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        real(kind=realType) :: betadot, val
+
+        ! Return immediately if this is a steady computation.
+
+        if (equationMode == steady) then
+            TSBetadot = zero
+            return
+        end if
+
+        ! Compute the polynomial contribution. If no polynomial was
+        ! specified, the value of index 0 is set to zero automatically.
+
+        betadot = zero
+        do nn = 1, degreePolBeta
+            betadot = betadot + nn*coefPolBeta(nn)*(t**(nn - 1))
+        end do
+
+        ! Compute the fourier contribution. Again the cosine coefficient
+        ! of index 0 is defaulted to zero if not specified.
+
+        do nn = 1, degreeFourBeta
+            val = nn*omegaFourBeta
+            betadot = betadot - val*cosCoefFourbeta(nn)*sin(val*t) &
+                      + val*sinCoefFourbeta(nn)*cos(val*t)
+        end do
+
+        ! Set TSBeta to phi.
+
+        TSBetadot = betadot
+
+    end function TSbetadot
+
+    function TSMach(degreePolMach, coefPolMach, &
+                    degreeFourMach, omegaFourMach, &
+                    cosCoefFourMach, sinCoefFourMach, t)
+        !
+        !       TSMach computes the Mach Number for a given time interval
+        !       in a time spectral solution.
+        !
+        use constants
+        use inputPhysics, only: equationMode
+        implicit none
+        !
+        !      Function type
+        !
+        real(kind=realType) :: TSmach
+        !
+        !      Function arguments.
+        !
+        integer(kind=intType), intent(in) :: degreePolMach
+        integer(kind=intType), intent(in) :: degreeFourMach
+
+        real(kind=realType), intent(in) :: omegaFourMach, t
+
+        real(kind=realType), dimension(0:*), intent(in) :: coefPolMach
+        real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourMach
+        real(kind=realType), dimension(*), intent(in) :: sinCoefFourMach
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        real(kind=realType) :: intervalMach, val
+
+        ! Return immediately if this is a steady computation.
+
+        if (equationMode == steady) then
+            TSMach = zero
+            return
+        end if
+
+        ! Compute the polynomial contribution. If no polynomial was
+        ! specified, the value of index 0 is set to zero automatically.
+
+        intervalMach = coefPolMach(0)
+        do nn = 1, degreePolMach
+            intervalMach = intervalMach + coefPolMach(nn)*(t**nn)
+        end do
+
+        ! Compute the fourier contribution. Again the cosine coefficient
+        ! of index 0 is defaulted to zero if not specified.
+
+        intervalMach = intervalMach + cosCoefFourMach(0)
+        do nn = 1, degreeFourMach
+            val = nn*omegaFourMach*t
+            intervalMach = intervalMach + cosCoefFourmach(nn)*cos(val) &
+                           + sinCoefFourmach(nn)*sin(val)
+        end do
+        print *, 'inTSMach', intervalMach, nn, val, t
+        ! Set TSMach to phi.
+
+        TSMach = intervalMach
+
+    end function TSmach
+
+    function TSMachdot(degreePolMach, coefPolMach, &
+                       degreeFourMach, omegaFourMach, &
+                       cosCoefFourMach, sinCoefFourMach, t)
+        !
+        !       TSmach computes the angle of attack for a given Time interval
+        !       in a time spectral solution.
+        !
+        use constants
+        use inputPhysics, only: equationMode
+        implicit none
+        !
+        !      Function type
+        !
+        real(kind=realType) :: TSmachdot
+        !
+        !      Function arguments.
+        !
+        integer(kind=intType), intent(in) :: degreePolMach
+        integer(kind=intType), intent(in) :: degreeFourMach
+
+        real(kind=realType), intent(in) :: omegaFourMach, t
+
+        real(kind=realType), dimension(0:*), intent(in) :: coefPolMach
+        real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourMach
+        real(kind=realType), dimension(*), intent(in) :: sinCoefFourMach
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        real(kind=realType) :: machdot, val
+
+        ! Return immediately if this is a steady computation.
+
+        if (equationMode == steady) then
+            TSMachdot = zero
+            return
+        end if
+
+        ! Compute the polynomial contribution. If no polynomial was
+        ! specified, the value of index 0 is set to zero automatically.
+
+        machdot = zero
+        do nn = 1, degreePolMach
+            machdot = machdot + nn*coefPolMach(nn)*(t**(nn - 1))
+        end do
+
+        ! Compute the fourier contribution. Again the cosine coefficient
+        ! of index 0 is defaulted to zero if not specified.
+
+        do nn = 1, degreeFourMach
+            val = nn*omegaFourMach
+            machdot = machdot - val*cosCoefFourmach(nn)*sin(val*t) &
+                      + val*sinCoefFourmach(nn)*cos(val*t)
+        end do
+
+        ! Set TSMach to phi.
+
+        TSMachdot = machdot
+
+    end function TSmachdot
+
+    function TSalpha(degreePolAlpha, coefPolAlpha, &
+                     degreeFourAlpha, omegaFourAlpha, &
+                     cosCoefFourAlpha, sinCoefFourAlpha, t)
+        !
+        !       TSalpha computes the angle of attack for a given Time interval
+        !       in a time spectral solution.
+        !
+        use constants
+        use inputPhysics, only: equationMode
+        implicit none
+        !
+        !      Function type
+        !
+        real(kind=realType) :: TSalpha
+        !
+        !      Function arguments.
+        !
+        integer(kind=intType), intent(in) :: degreePolAlpha
+        integer(kind=intType), intent(in) :: degreeFourAlpha
+
+        real(kind=realType), intent(in) :: omegaFourAlpha, t
+
+        real(kind=realType), dimension(0:*), intent(in) :: coefPolAlpha
+        real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourAlpha
+        real(kind=realType), dimension(*), intent(in) :: sinCoefFourAlpha
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        real(kind=realType) :: alpha, val
+
+        ! Return immediately if this is a steady computation.
+
+        if (equationMode == steady) then
+            TSAlpha = zero
+            return
+        end if
+
+        ! Compute the polynomial contribution. If no polynomial was
+        ! specified, the value of index 0 is set to zero automatically.
+        alpha = coefPolAlpha(0)
+        do nn = 1, degreePolAlpha
+            alpha = alpha + coefPolAlpha(nn)*(t**nn)
+        end do
+
+        ! Compute the fourier contribution. Again the cosine coefficient
+        ! of index 0 is defaulted to zero if not specified.
+
+        alpha = alpha + cosCoefFourAlpha(0)
+        do nn = 1, degreeFourAlpha
+            val = nn*omegaFourAlpha*t
+            alpha = alpha + cosCoefFouralpha(nn)*cos(val) &
+                    + sinCoefFouralpha(nn)*sin(val)
+        end do
+        !print *,'inTSalpha',alpha,nn,val,t
+        ! Set TSAlpha to phi.
+
+        TSAlpha = alpha
+
+    end function TSalpha
+
+    function TSalphadot(degreePolAlpha, coefPolAlpha, &
+                        degreeFourAlpha, omegaFourAlpha, &
+                        cosCoefFourAlpha, sinCoefFourAlpha, t)
+        !
+        !       TSalpha computes the angle of attack for a given Time interval
+        !       in a time spectral solution.
+        !
+        use constants
+        use inputPhysics, only: equationMode
+        implicit none
+        !
+        !      Function type
+        !
+        real(kind=realType) :: TSalphadot
+        !
+        !      Function arguments.
+        !
+        integer(kind=intType), intent(in) :: degreePolAlpha
+        integer(kind=intType), intent(in) :: degreeFourAlpha
+
+        real(kind=realType), intent(in) :: omegaFourAlpha, t
+
+        real(kind=realType), dimension(0:*), intent(in) :: coefPolAlpha
+        real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourAlpha
+        real(kind=realType), dimension(*), intent(in) :: sinCoefFourAlpha
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        real(kind=realType) :: alphadot, val
+
+        ! Return immediately if this is a steady computation.
+
+        if (equationMode == steady) then
+            TSAlphadot = zero
+            return
+        end if
+
+        ! Compute the polynomial contribution. If no polynomial was
+        ! specified, the value of index 0 is set to zero automatically.
+
+        alphadot = zero
+        do nn = 1, degreePolAlpha
+            alphadot = alphadot + nn*coefPolAlpha(nn)*(t**(nn - 1))
+        end do
+
+        ! Compute the fourier contribution. Again the cosine coefficient
+        ! of index 0 is defaulted to zero if not specified.
+
+        do nn = 1, degreeFourAlpha
+            val = nn*omegaFourAlpha
+            alphadot = alphadot - val*cosCoefFouralpha(nn)*sin(val*t) &
+                       + val*sinCoefFouralpha(nn)*cos(val*t)
+        end do
+
+        ! Set TSAlpha to phi.
+
+        TSAlphadot = alphadot
+
+    end function TSalphadot
+
+    function derivativeRigidRotAngle(degreePolRot, &
+                                     coefPolRot, &
+                                     degreeFourRot, &
+                                     omegaFourRot, &
+                                     cosCoefFourRot, &
+                                     sinCoefFourRot, t)
+        !
+        !       derivativeRigidRotAngle computes the time derivative of the
+        !       rigid body rotation angle at the given time for the given
+        !       arguments. The angle is described by a combination of a
+        !       polynomial and fourier series.
+        !
+        use constants
+        use inputPhysics, only: equationMode
+        use flowVarRefState, only: timeRef
+        implicit none
+        !
+        !      Function type
+        !
+        real(kind=realType) :: derivativeRigidRotAngle
+        !
+        !      Function arguments.
+        !
+        integer(kind=intType), intent(in) :: degreePolRot
+        integer(kind=intType), intent(in) :: degreeFourRot
+
+        real(kind=realType), intent(in) :: omegaFourRot, t
+
+        real(kind=realType), dimension(0:*), intent(in) :: coefPolRot
+        real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourRot
+        real(kind=realType), dimension(*), intent(in) :: sinCoefFourRot
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        real(kind=realType) :: dPhi, val
+
+        ! Return immediately if this is a steady computation.
+
+        if (equationMode == steady) then
+            derivativeRigidRotAngle = zero
+            return
+        end if
+
+        ! Compute the polynomial contribution.
+
+        dPhi = zero
+        do nn = 1, degreePolRot
+            dPhi = dPhi + nn*coefPolRot(nn)*(t**(nn - 1))
+        end do
+
+        ! Compute the fourier contribution.
+
+        do nn = 1, degreeFourRot
+            val = nn*omegaFourRot
+            dPhi = dPhi - val*cosCoefFourRot(nn)*sin(val*t)
+            dPhi = dPhi + val*sinCoefFourRot(nn)*cos(val*t)
+        end do
+
+        ! Set derivativeRigidRotAngle to dPhi. Multiply by timeRef
+        ! to obtain the correct non-dimensional value.
+
+        derivativeRigidRotAngle = timeRef*dPhi
+
+    end function derivativeRigidRotAngle
+
+    function myDim(x, y)
+
+        use constants
+
+        real(kind=realType) x, y
+        real(kind=realType) :: myDim
+
+        myDim = x - y
+        if (myDim < 0.0) then
+            myDim = 0.0
+        end if
+
+    end function myDim
+
+    function getCorrectForK()
+
+        use constants
+        use flowVarRefState, only: kPresent
+        use iteration, only: currentLevel, groundLevel
+        implicit none
+
+        logical :: getCorrectForK
+
+        if (kPresent .and. currentLevel <= groundLevel) then
+            getCorrectForK = .true.
+        else
+            getCorrectForK = .false.
+        end if
+    end function getCorrectForK
+    subroutine terminate(routineName, errorMessage)
+        !
+        !       terminate writes an error message to standard output and
+        !       terminates the execution of the program.
+        !
+        use constants
+        use communication, only: adflow_comm_world, myid
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        character(len=*), intent(in) :: routineName
+        character(len=*), intent(in) :: errorMessage
 #ifndef USE_TAPENADE
 
-    !
-    !      Local parameter
-    !
-    integer, parameter :: maxCharLine = 55
-    !
-    !      Local variables
-    !
-    integer :: ierr, len, i2
-    logical :: firstTime
+        !
+        !      Local parameter
+        !
+        integer, parameter :: maxCharLine = 55
+        !
+        !      Local variables
+        !
+        integer :: ierr, len, i2
+        logical :: firstTime
 
-    character(len=len_trim(errorMessage)) :: message
-    character(len=8) :: integerString
+        character(len=len_trim(errorMessage)) :: message
+        character(len=8) :: integerString
 
-    !
-    ! Copy the errorMessage into message. It is not possible to work
-    ! with errorMessage directly, because it is modified in this
-    ! routine. Sometimes a constant string is passed to this routine
-    ! and some compilers simply fail then.
+        !
+        ! Copy the errorMessage into message. It is not possible to work
+        ! with errorMessage directly, because it is modified in this
+        ! routine. Sometimes a constant string is passed to this routine
+        ! and some compilers simply fail then.
 
-    message = errorMessage
+        message = errorMessage
 
-    ! Print a nice error message. In case of a parallel executable
-    ! also the processor id is printed.
+        ! Print a nice error message. In case of a parallel executable
+        ! also the processor id is printed.
 
-    print "(a)", "#"
-    print "(a)", "#--------------------------- !!! Error !!! &
-         &----------------------------"
+        print "(a)", "#"
+        print "(a)", "#--------------------------- !!! Error !!! &
+             &----------------------------"
 
-    write(integerString,"(i8)") myID
-    integerString = adjustl(integerString)
+        write (integerString, "(i8)") myID
+        integerString = adjustl(integerString)
 
-    print "(2a)", "#* Terminate called by processor ", &
-         trim(integerString)
+        print "(2a)", "#* Terminate called by processor ", &
+            trim(integerString)
 
-    ! Write the header of the error message.
+        ! Write the header of the error message.
 
-    print "(2a)", "#* Run-time error in procedure ", &
-         trim(routineName)
+        print "(2a)", "#* Run-time error in procedure ", &
+            trim(routineName)
 
-    ! Loop to write the error message. If the message is too long it
-    ! is split over several lines.
+        ! Loop to write the error message. If the message is too long it
+        ! is split over several lines.
 
-    firstTime = .true.
-    do
-       ! Determine the remaining error message to be written.
-       ! If longer than the maximum number of characters allowed
-       ! on a line, it is attempted to split the message.
+        firstTime = .true.
+        do
+            ! Determine the remaining error message to be written.
+            ! If longer than the maximum number of characters allowed
+            ! on a line, it is attempted to split the message.
 
-       message = adjustl(message)
-       len = len_trim(message)
-       i2  = min(maxCharLine,len)
+            message = adjustl(message)
+            len = len_trim(message)
+            i2 = min(maxCharLine, len)
 
-       if(i2 < len) i2 = index(message(:i2), " ", .true.) - 1
-       if(i2 < 0)   i2 = index(message, " ") - 1
-       if(i2 < 0)   i2 = len
+            if (i2 < len) i2 = index(message(:i2), " ", .true.) - 1
+            if (i2 < 0) i2 = index(message, " ") - 1
+            if (i2 < 0) i2 = len
 
-       ! Write this part of the error message. If it is the first
-       ! line of the message some additional stuff is printed.
+            ! Write this part of the error message. If it is the first
+            ! line of the message some additional stuff is printed.
 
-       if( firstTime ) then
-          print "(2a)", "#* Error message: ", &
-               trim(message(:i2))
-          firstTime = .false.
-       else
-          print "(2a)", "#*                ", &
-               trim(message(:i2))
-       endif
+            if (firstTime) then
+                print "(2a)", "#* Error message: ", &
+                    trim(message(:i2))
+                firstTime = .false.
+            else
+                print "(2a)", "#*                ", &
+                    trim(message(:i2))
+            end if
 
-       ! Exit the loop if the entire message has been written.
+            ! Exit the loop if the entire message has been written.
 
-       if(i2 == len) exit
+            if (i2 == len) exit
 
-       ! Adapt the string for the next part to be written.
+            ! Adapt the string for the next part to be written.
 
-       message = message(i2+1:)
+            message = message(i2 + 1:)
 
-    enddo
+        end do
 
-    ! Write the trailing message.
+        ! Write the trailing message.
 
-    print "(a)", "#*"
-    print "(a)", "#* Now exiting"
-    print "(a)", "#------------------------------------------&
-         &----------------------------"
-    print "(a)", "#"
+        print "(a)", "#*"
+        print "(a)", "#* Now exiting"
+        print "(a)", "#------------------------------------------&
+             &----------------------------"
+        print "(a)", "#"
 
-    ! Call abort and stop the program. This stop should be done in
-    ! abort, but just to be sure.
+        ! Call abort and stop the program. This stop should be done in
+        ! abort, but just to be sure.
 
-    call mpi_abort(ADflow_comm_world, 1, ierr)
-    stop
+        call mpi_abort(ADflow_comm_world, 1, ierr)
+        stop
 
 #endif
 
-  end subroutine terminate
-
-  subroutine rotMatrixRigidBody(tNew, tOld, rotationMatrix, &
-       rotationPoint)
-    !
-    !       rotMatrixRigidBody determines the rotation matrix and the
-    !       rotation point to determine the coordinates of the new time
-    !       level starting from the coordinates of the old time level.
-    !
-    use constants
-    use inputMotion
-    use flowVarRefState, only : Lref
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    real(kind=realType), intent(in) :: tNew, tOld
-
-    real(kind=realType), dimension(3),   intent(out) :: rotationPoint
-    real(kind=realType), dimension(3,3), intent(out) :: rotationMatrix
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j
-
-    real(kind=realType) :: phi
-    real(kind=realType) :: cosX, cosY, cosZ, sinX, sinY, sinZ
-
-    real(kind=realType), dimension(3,3) :: mNew, mOld
-
-    ! Determine the rotation angle around the x-axis for the new
-    ! time level and the corresponding values of the sine and cosine.
-
-    phi = rigidRotAngle(degreePolXRot,   coefPolXRot,     &
-         degreeFourXRot,  omegaFourXRot,   &
-         cosCoefFourXRot, sinCoefFourXRot, tNew)
-    sinX = sin(phi)
-    cosX = cos(phi)
-
-    ! Idem for the y-axis.
-
-    phi = rigidRotAngle(degreePolYRot,   coefPolYRot,     &
-         degreeFourYRot,  omegaFourYRot,   &
-         cosCoefFourYRot, sinCoefFourYRot, tNew)
-    sinY = sin(phi)
-    cosY = cos(phi)
-
-    ! Idem for the z-axis.
-
-    phi = rigidRotAngle(degreePolZRot,   coefPolZRot,     &
-         degreeFourZRot,  omegaFourZRot,   &
-         cosCoefFourZRot, sinCoefFourZRot, tNew)
-    sinZ = sin(phi)
-    cosZ = cos(phi)
-
-    ! Construct the transformation matrix at the new time level.
-    ! It is assumed that the sequence of rotation is first around the
-    ! x-axis then around the y-axis and finally around the z-axis.
-
-    mNew(1,1) =  cosY*cosZ
-    mNew(2,1) =  cosY*sinZ
-    mNew(3,1) = -sinY
-
-    mNew(1,2) = sinX*sinY*cosZ - cosX*sinZ
-    mNew(2,2) = sinX*sinY*sinZ + cosX*cosZ
-    mNew(3,2) = sinX*cosY
-
-    mNew(1,3) = cosX*sinY*cosZ + sinX*sinZ
-    mNew(2,3) = cosX*sinY*sinZ - sinX*cosZ
-    mNew(3,3) = cosX*cosY
-
-    ! Determine the rotation angle around the x-axis for the old
-    ! time level and the corresponding values of the sine and cosine.
-
-    phi = rigidRotAngle(degreePolXRot,   coefPolXRot,     &
-         degreeFourXRot,  omegaFourXRot,   &
-         cosCoefFourXRot, sinCoefFourXRot, tOld)
-    sinX = sin(phi)
-    cosX = cos(phi)
-
-    ! Idem for the y-axis.
-
-    phi = rigidRotAngle(degreePolYRot,   coefPolYRot,     &
-         degreeFourYRot,  omegaFourYRot,   &
-         cosCoefFourYRot, sinCoefFourYRot, tOld)
-    sinY = sin(phi)
-    cosY = cos(phi)
-
-    ! Idem for the z-axis.
-
-    phi = rigidRotAngle(degreePolZRot,   coefPolZRot,     &
-         degreeFourZRot,  omegaFourZRot,   &
-         cosCoefFourZRot, sinCoefFourZRot, tOld)
-    sinZ = sin(phi)
-    cosZ = cos(phi)
-
-    ! Construct the transformation matrix at the old time level.
-
-    mOld(1,1) =  cosY*cosZ
-    mOld(2,1) =  cosY*sinZ
-    mOld(3,1) = -sinY
-
-    mOld(1,2) = sinX*sinY*cosZ - cosX*sinZ
-    mOld(2,2) = sinX*sinY*sinZ + cosX*cosZ
-    mOld(3,2) = sinX*cosY
-
-    mOld(1,3) = cosX*sinY*cosZ + sinX*sinZ
-    mOld(2,3) = cosX*sinY*sinZ - sinX*cosZ
-    mOld(3,3) = cosX*cosY
-
-    ! Construct the transformation matrix between the new and the
-    ! old time level. This is mNew*inverse(mOld). However the
-    ! inverse of mOld is the transpose.
-
-    do j=1,3
-       do i=1,3
-          rotationMatrix(i,j) = mNew(i,1)*mOld(j,1) &
-               + mNew(i,2)*mOld(j,2) &
-               + mNew(i,3)*mOld(j,3)
-       enddo
-    enddo
-
-    ! Determine the rotation point at the old time level; it is
-    ! possible that this value changes due to translation of the grid.
-
-    !  aInf = sqrt(gammaInf*pInf/rhoInf)
-
-    !  rotationPoint(1) = LRef*rotPoint(1) &
-    !                   + MachGrid(1)*aInf*tOld/timeRef
-    !  rotationPoint(2) = LRef*rotPoint(2) &
-    !                   + MachGrid(2)*aInf*tOld/timeRef
-    !  rotationPoint(3) = LRef*rotPoint(3) &
-    !                   + MachGrid(3)*aInf*tOld/timeRef
-
-    rotationPoint(1) = LRef*rotPoint(1)
-    rotationPoint(2) = LRef*rotPoint(2)
-    rotationPoint(3) = LRef*rotPoint(3)
-
-  end subroutine rotMatrixRigidBody
-
-  function secondDerivativeRigidRotAngle(degreePolRot,   &
-       coefPolRot,     &
-       degreeFourRot,  &
-       omegaFourRot,   &
-       cosCoefFourRot, &
-       sinCoefFourRot, t)
-    !
-    !       2ndderivativeRigidRotAngle computes the 2nd time derivative of
-    !       the rigid body rotation angle at the given time for the given
-    !       arguments. The angle is described by a combination of a
-    !       polynomial and fourier series.
-    !
-    use constants
-    use flowVarRefState, only : timeRef
-    use inputPhysics, only : equationMode
-    implicit none
-    !
-    !      Function type
-    !
-    real(kind=realType) :: secondDerivativeRigidRotAngle
-    !
-    !      Function arguments.
-    !
-    integer(kind=intType), intent(in) :: degreePolRot
-    integer(kind=intType), intent(in) :: degreeFourRot
-
-    real(kind=realType), intent(in) :: omegaFourRot, t
-
-    real(kind=realType), dimension(0:*), intent(in) :: coefPolRot
-    real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourRot
-    real(kind=realType), dimension(*),   intent(in) :: sinCoefFourRot
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    real(kind=realType) :: dPhi, val
-
-    ! Return immediately if this is a steady computation.
-
-    if(equationMode == steady) then
-       secondDerivativeRigidRotAngle = zero
-       return
-    endif
-
-    ! Compute the polynomial contribution.
-
-    dPhi = zero
-    do nn=2,degreePolRot
-       dPhi = dPhi + (nn-1)*nn*coefPolRot(nn)*(t**(nn-2))
-    enddo
-
-    ! Compute the fourier contribution.
-
-    do nn=1,degreeFourRot
-       val = nn*omegaFourRot
-       dPhi = dPhi - val**2*sinCoefFourRot(nn)*sin(val*t)
-       dPhi = dPhi - val**2*cosCoefFourRot(nn)*cos(val*t)
-    enddo
-
-    ! Set derivativeRigidRotAngle to dPhi. Multiply by timeRef
-    ! to obtain the correct non-dimensional value.
-
-    secondDerivativeRigidRotAngle = timeRef**2*dPhi
-
-  end function secondDerivativeRigidRotAngle
-
-  function rigidRotAngle(degreePolRot,   coefPolRot,       &
-       degreeFourRot,  omegaFourRot,     &
-       cosCoefFourRot, sinCoefFourRot, t)
-    !
-    !       rigidRotAngle computes the rigid body rotation angle at the
-    !       given time for the given arguments. The angle is described by
-    !       a combination of a polynomial and fourier series.
-    !
-    use constants
-    use inputPhysics, only : equationMode
-    implicit none
-    !
-    !      Function type
-    !
-    real(kind=realType) :: rigidRotAngle
-    !
-    !      Function arguments.
-    !
-    integer(kind=intType), intent(in) :: degreePolRot
-    integer(kind=intType), intent(in) :: degreeFourRot
-
-    real(kind=realType), intent(in) :: omegaFourRot, t
-
-    real(kind=realType), dimension(0:*), intent(in) :: coefPolRot
-    real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourRot
-    real(kind=realType), dimension(*),   intent(in) :: sinCoefFourRot
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    real(kind=realType) :: phi, val
-
-    ! Return immediately if this is a steady computation.
-
-    if(equationMode == steady) then
-       rigidRotAngle = zero
-       return
-    endif
-
-    ! Compute the polynomial contribution. If no polynomial was
-    ! specified, the value of index 0 is set to zero automatically.
-
-    phi = coefPolRot(0)
-    do nn=1,degreePolRot
-       phi = phi + coefPolRot(nn)*(t**nn)
-    enddo
-
-    ! Compute the fourier contribution. Again the cosine coefficient
-    ! of index 0 is defaulted to zero if not specified.
-
-    phi = phi + cosCoefFourRot(0)
-    do nn=1,degreeFourRot
-       val = nn*omegaFourRot*t
-       phi = phi + cosCoefFourRot(nn)*cos(val) &
-            + sinCoefFourRot(nn)*sin(val)
-    enddo
-
-    ! Set rigidRotAngle to phi.
-
-    rigidRotAngle = phi
-
-  end function rigidRotAngle
-
-  subroutine setBCPointers(nn, spatialPointers)
-    !
-    !       setBCPointers sets the pointers needed for the boundary
-    !       condition treatment on a general face, such that the boundary
-    !       routines are only implemented once instead of 6 times.
-    !
-    use constants
-    use blockPointers, only : w, p, rlv, rev, gamma, x, d2wall, &
-         si, sj, sk, s,  globalCell, BCData, nx, il, ie, ib, &
-         ny, jl, je, jb, nz, kl, ke, kb, BCFaceID, &
-         addgridvelocities, sFaceI, sFaceJ, sFaceK, addGridVelocities
-    use BCPointers, only : ww0, ww1, ww2, ww3, pp0, pp1, pp2, pp3, &
-         rlv0, rlv1, rlv2, rlv3, rev0, rev1, rev2, rev3, &
-         gamma0, gamma1, gamma2, gamma3, gcp, xx, ss, ssi, ssj, ssk, dd2wall, &
-         sFace, iStart, iEnd, jStart, jEnd, iSize, jSize
-    use inputPhysics, only: cpModel, equations
-    implicit none
-
-    ! Subroutine arguments.
-    integer(kind=intType), intent(in) :: nn
-    logical, intent(in) :: spatialPointers
-
-    ! Determine the sizes of each face and point to just the range we
-    ! need on each face.
-    iStart = BCData(nn)%icBeg
-    iEnd   = BCData(nn)%icEnd
-    jStart = BCData(nn)%jcBeg
-    jEnd   = BCData(nn)%jcEnd
-
-    ! Set the size of the subface
-    isize = iEnd-iStart + 1
-    jsize = jEnd-jStart + 1
-
-    ! Determine the face id on which the subface is located and set
-    ! the pointers accordinly.
-
-    select case (BCFaceID(nn))
-
-       !---------------------------------------------------------------------------
-    case (iMin)
-
-       ww3 => w(3, 1:, 1:, :)
-       ww2 => w(2, 1:, 1:, :)
-       ww1 => w(1, 1:, 1:, :)
-       ww0 => w(0, 1:, 1:, :)
-
-       pp3 => p(3, 1:, 1:)
-       pp2 => p(2, 1:, 1:)
-       pp1 => p(1, 1:, 1:)
-       pp0 => p(0, 1:, 1:)
-
-       rlv3 => rlv(3, 1:, 1:)
-       rlv2 => rlv(2, 1:, 1:)
-       rlv1 => rlv(1, 1:, 1:)
-       rlv0 => rlv(0, 1:, 1:)
-
-       rev3 => rev(3, 1:, 1:)
-       rev2 => rev(2, 1:, 1:)
-       rev1 => rev(1, 1:, 1:)
-       rev0 => rev(0, 1:, 1:)
-
-       gamma3 => gamma(3, 1:, 1:)
-       gamma2 => gamma(2, 1:, 1:)
-       gamma1 => gamma(1, 1:, 1:)
-       gamma0 => gamma(0, 1:, 1:)
-
-       gcp => globalCell(2, 1:, 1:)
-       !---------------------------------------------------------------------------
-
-    case (iMax)
-
-       ww3 => w(nx, 1:, 1:, :)
-       ww2 => w(il, 1:, 1:, :)
-       ww1 => w(ie, 1:, 1:, :)
-       ww0 => w(ib, 1:, 1:, :)
-
-       pp3 => p(nx, 1:, 1:)
-       pp2 => p(il, 1:, 1:)
-       pp1 => p(ie, 1:, 1:)
-       pp0 => p(ib, 1:, 1:)
-
-       rlv3 => rlv(nx, 1:, 1:)
-       rlv2 => rlv(il, 1:, 1:)
-       rlv1 => rlv(ie, 1:, 1:)
-       rlv0 => rlv(ib, 1:, 1:)
-
-       rev3 => rev(nx, 1:, 1:)
-       rev2 => rev(il, 1:, 1:)
-       rev1 => rev(ie, 1:, 1:)
-       rev0 => rev(ib, 1:, 1:)
-
-       gamma3 => gamma(nx, 1:, 1:)
-       gamma2 => gamma(il, 1:, 1:)
-       gamma1 => gamma(ie, 1:, 1:)
-       gamma0 => gamma(ib, 1:, 1:)
-
-       gcp => globalCell(il, 1:, 1:)
-       !---------------------------------------------------------------------------
-
-    case (jMin)
-
-       ww3 => w(1:, 3, 1:, :)
-       ww2 => w(1:, 2, 1:, :)
-       ww1 => w(1:, 1, 1:, :)
-       ww0 => w(1:, 0, 1:, :)
-
-       pp3 => p(1:, 3, 1:)
-       pp2 => p(1:, 2, 1:)
-       pp1 => p(1:, 1, 1:)
-       pp0 => p(1:, 0, 1:)
-
-       rlv3 => rlv(1:, 3, 1:)
-       rlv2 => rlv(1:, 2, 1:)
-       rlv1 => rlv(1:, 1, 1:)
-       rlv0 => rlv(1:, 0, 1:)
-
-       rev3 => rev(1:, 3, 1:)
-       rev2 => rev(1:, 2, 1:)
-       rev1 => rev(1:, 1, 1:)
-       rev0 => rev(1:, 0, 1:)
-
-       gamma3 => gamma(1:, 3, 1:)
-       gamma2 => gamma(1:, 2, 1:)
-       gamma1 => gamma(1:, 1, 1:)
-       gamma0 => gamma(1:, 0, 1:)
-
-       gcp => globalCell(1:, 2, 1:)
-       !---------------------------------------------------------------------------
-
-    case (jMax)
-
-       ww3 => w(1:, ny, 1:, :)
-       ww2 => w(1:, jl, 1:, :)
-       ww1 => w(1:, je, 1:, :)
-       ww0 => w(1:, jb, 1:, :)
-
-       pp3 => p(1:, ny, 1:)
-       pp2 => p(1:, jl, 1:)
-       pp1 => p(1:, je, 1:)
-       pp0 => p(1:, jb, 1:)
-
-       rlv3 => rlv(1:, ny, 1:)
-       rlv2 => rlv(1:, jl, 1:)
-       rlv1 => rlv(1:, je, 1:)
-       rlv0 => rlv(1:, jb, 1:)
-
-       rev3 => rev(1:, ny, 1:)
-       rev2 => rev(1:, jl, 1:)
-       rev1 => rev(1:, je, 1:)
-       rev0 => rev(1:, jb, 1:)
-
-       gamma3 => gamma(1:, ny, 1:)
-       gamma2 => gamma(1:, jl, 1:)
-       gamma1 => gamma(1:, je, 1:)
-       gamma0 => gamma(1:, jb, 1:)
-
-       gcp => globalCell(1:, jl, 1:)
-       !---------------------------------------------------------------------------
-
-    case (kMin)
-
-       ww3 => w(1:, 1:, 3, :)
-       ww2 => w(1:, 1:, 2, :)
-       ww1 => w(1:, 1:, 1, :)
-       ww0 => w(1:, 1:, 0, :)
-
-       pp3 => p(1:, 1:, 3)
-       pp2 => p(1:, 1:, 2)
-       pp1 => p(1:, 1:, 1)
-       pp0 => p(1:, 1:, 0)
-
-       rlv3 => rlv(1:, 1:, 3)
-       rlv2 => rlv(1:, 1:, 2)
-       rlv1 => rlv(1:, 1:, 1)
-       rlv0 => rlv(1:, 1:, 0)
-
-       rev3 => rev(1:, 1:, 3)
-       rev2 => rev(1:, 1:, 2)
-       rev1 => rev(1:, 1:, 1)
-       rev0 => rev(1:, 1:, 0)
-
-       gamma3 => gamma(1:, 1:, 3)
-       gamma2 => gamma(1:, 1:, 2)
-       gamma1 => gamma(1:, 1:, 1)
-       gamma0 => gamma(1:, 1:, 0)
-
-       gcp => globalCell(1:, 1:, 2)
-       !---------------------------------------------------------------------------
-
-    case (kMax)
-
-       ww3 => w(1:, 1:, nz, :)
-       ww2 => w(1:, 1:, kl, :)
-       ww1 => w(1:, 1:, ke, :)
-       ww0 => w(1:, 1:, kb, :)
-
-       pp3 => p(1:, 1:, nz)
-       pp2 => p(1:, 1:, kl)
-       pp1 => p(1:, 1:, ke)
-       pp0 => p(1:, 1:, kb)
-
-       rlv3 => rlv(1:, 1:, nz)
-       rlv2 => rlv(1:, 1:, kl)
-       rlv1 => rlv(1:, 1:, ke)
-       rlv0 => rlv(1:, 1:, kb)
-
-       rev3 => rev(1:, 1:, nz)
-       rev2 => rev(1:, 1:, kl)
-       rev1 => rev(1:, 1:, ke)
-       rev0 => rev(1:, 1:, kb)
-
-       gamma3 => gamma(1:, 1:, nz)
-       gamma2 => gamma(1:, 1:, kl)
-       gamma1 => gamma(1:, 1:, ke)
-       gamma0 => gamma(1:, 1:, kb)
-
-       gcp => globalCell(1:, 1:, kl)
-    end select
-
-    if (spatialPointers) then
-       select case (BCFaceID(nn))
-       case (iMin)
-          xx => x(1,:,:,:)
-          ssi => si(1,:,:,:)
-          ssj => sj(2,:,:,:)
-          ssk => sk(2,:,:,:)
-          ss  => s (2,:,:,:)
-       case (iMax)
-          xx => x(il,:,:,:)
-          ssi => si(il,:,:,:)
-          ssj => sj(il,:,:,:)
-          ssk => sk(il,:,:,:)
-          ss  =>  s(il,:,:,:)
-       case (jMin)
-          xx => x(:,1,:,:)
-          ssi => sj(:,1,:,:)
-          ssj => si(:,2,:,:)
-          ssk => sk(:,2,:,:)
-          ss   => s(:,2,:,:)
-       case (jMax)
-          xx => x(:,jl,:,:)
-          ssi => sj(:,jl,:,:)
-          ssj => si(:,jl,:,:)
-          ssk => sk(:,jl,:,:)
-          ss  =>  s(:,jl,:,:)
-       case (kMin)
-          xx => x(:,:,1,:)
-          ssi => sk(:,:,1,:)
-          ssj => si(:,:,2,:)
-          ssk => sj(:,:,2,:)
-          ss  =>  s(:,:,2,:)
-       case (kMax)
-          xx => x(:,:,kl,:)
-          ssi => sk(:,:,kl,:)
-          ssj => si(:,:,kl,:)
-          ssk => sj(:,:,kl,:)
-          ss  =>  s(:,:,kl,:)
-       end select
-
-       if (addGridVelocities) then
-         select case (BCFaceID(nn))
-         case (iMin)
-            sFace => sFaceI(1,:,:)
-         case (iMax)
-            sFace => sFaceI(il,:,:)
-         case (jMin)
-            sFace => sFaceJ(:,1,:)
-         case (jMax)
-            sFace => sFaceJ(:,jl,:)
-         case (kMin)
-            sFace => sFaceK(:,:,1)
-         case (kMax)
-            sFace => sFaceK(:,:,kl)
-         end select
-      end if
-
-       if(equations == RANSEquations) then
-          select case (BCFaceID(nn))
-          case (iMin)
-             dd2Wall => d2Wall(2,:,:)
-          case (iMax)
-             dd2Wall => d2Wall(il,:,:)
-          case (jMin)
-             dd2Wall => d2Wall(:,2,:)
-          case (jMax)
-             dd2Wall => d2Wall(:,jl,:)
-          case (kMin)
-             dd2Wall => d2Wall(:,:,2)
-          case (kMax)
-             dd2Wall => d2Wall(:,:,kl)
-          end select
-       end if
-    end if
-  end subroutine setBCPointers
-
-  subroutine computeRootBendingMoment(cf, cm, bendingMoment)
-
-    !                                                      *
-    ! Compute a normalized bending moment coefficient from *
-    ! the force and moment coefficient. At the moment this *
-    ! Routine only works for a half body. Additional logic *
-    ! would be needed for a full body.                     *
-    !                                                      *
-
-    use constants
-    use inputPhysics, only : lengthRef, pointRef, pointRefEC, liftIndex
-    implicit none
-
-    !input/output variables
-    real(kind=realType), intent(in), dimension(3) :: cf, cm
-    real(kind=realType), intent(out) :: bendingMoment
-
-    !Subroutine Variables
-    real(kind=realType):: elasticMomentx, elasticMomenty, elasticMomentz
-    bendingMoment = zero
-    if (liftIndex == 2) then
-       !z out wing sum momentx,momentz
-       elasticMomentx = cm(1) + cf(2)*(pointRefEC(3)-pointRef(3))/lengthref-cf(3)*(pointRefEC(2)-pointRef(2))/lengthref
-       elasticMomentz = cm(3) - cf(2)*(pointRefEC(1)-pointref(1))/lengthref+cf(1)*(pointRefEC(2)-pointRef(2))/lengthref
-       bendingMoment = sqrt(elasticMomentx**2+elasticMomentz**2)
-    elseif (liftIndex == 3) then
-       !y out wing sum momentx,momenty
-       elasticMomentx = cm(1) + cf(3)*(pointrefEC(2)-pointRef(2))/lengthref+cf(3)*(pointrefEC(3)-pointref(3))/lengthref
-       elasticMomenty = cm(2) + cf(3)*(pointRefEC(1)-pointRef(1))/lengthref+cf(1)*(pointrefEC(3)-pointRef(3))/lengthref
-       bendingMoment = sqrt(elasticMomentx**2+elasticMomenty**2)
-    end if
-
-  end subroutine computeRootBendingMoment
-
-  subroutine computeLeastSquaresRegression(y,x,npts,m,b)
-    !
-    !       Computes the slope of best fit for a set of x,y data of length
-    !       npts
-    !
-    use constants
-    implicit none
-    !Subroutine arguments
-    integer(kind=intType)::npts
-    real(kind=realType),dimension(npts)  :: x,y
-    real(kind=realType)::m,b
-
-    !local variables
-    real(kind=realType)::sumx,sumy,sumx2,sumxy
-    integer(kind=intType)::i
-
-    !begin execution
-    sumx=0.0
-    sumy=0.0
-    sumx2=0.0
-    sumxy=0.0
-    do i = 1,npts
-
-       sumx=sumx+x(i)
-       sumy=sumy+y(i)
-       sumx2=sumx2+x(i)*x(i)
-       sumxy=sumxy+x(i)*y(i)
-    enddo
-
-    m = ((npts*sumxy)-(sumy*sumx))/((npts*sumx2)-(sumx)**2)
-    b = (sumy*sumx2-(sumx*sumxy))/((npts*sumx2)-(sumx)**2)
-
-  end subroutine computeLeastSquaresRegression
-
-  subroutine computeTSDerivatives(force, moment, coef0, dcdalpha, &
-       dcdalphadot, dcdq, dcdqdot)
-    !
-    !      Computes the stability derivatives based on the time spectral
-    !      solution of a given mesh. Takes in the force coefficients at
-    !      all time instantces and computes the agregate parameters
-    !
-    use constants
-    use communication
-    use inputPhysics
-    use inputTimeSpectral
-    use inputTSStabDeriv
-    use flowvarrefstate
-    use monitor
-    use section
-    use inputMotion
-    implicit none
-
-    !
-    !     Subroutine arguments.
-    !
-    real(kind=realType), dimension(3, nTimeIntervalsSpectral) :: force, moment
-    real(kind=realType), dimension(8):: dcdq, dcdqdot
-    real(kind=realType), dimension(8):: dcdalpha,dcdalphadot
-    real(kind=realType), dimension(8):: Coef0
-
-    ! Working Variables
-    real(kind=realType), dimension(nTimeIntervalsSpectral, 8) :: baseCoef
-    real(kind=realType), dimension(8) ::coef0dot
-    real(kind=realType), dimension(nTimeIntervalsSpectral,8)::ResBaseCoef
-    real(kind=realType), dimension(nTimeIntervalsSpectral)  :: intervalAlpha,intervalAlphadot
-    real(kind=realType), dimension(nTimeIntervalsSpectral)  :: intervalMach,intervalMachdot
-    real(kind=realType), dimension(nSections) :: t
-    integer(kind=intType):: i,sps,nn
-    !speed of sound: for normalization of q derivatives
-    real(kind=realType)::a
-    real(kind=realType) :: fact, factMoment
-    ! Functions
-    real(kind=realType),dimension(nTimeIntervalsSpectral)  :: dPhix, dPhiy, dphiz
-    real(kind=realType),dimension(nTimeIntervalsSpectral)  :: dPhixdot, dPhiydot, dphizdot
-    real(kind=realType)::derivativeRigidRotAngle, secondDerivativeRigidRotAngle
-
-
-    fact = two/(gammaInf*pInf*MachCoef**2 &
-         *surfaceRef*LRef**2)
-    factMoment = fact/(lengthRef*LRef)
-
-    if (TSqMode)then
-
-       print *,'TS Q Mode code needs to be updated in computeTSDerivatives!'
-       stop
-
-       ! !q is pitch
-       ! do sps =1,nTimeIntervalsSpectral
-       !    !compute the time of this intervavc
-       !    t = timeUnsteadyRestart
-
-       !    if(equationMode == timeSpectral) then
-       !       do nn=1,nSections
-       !          t(nn) = t(nn) + (sps-1)*sections(nn)%timePeriod &
-       !               /         (nTimeIntervalsSpectral*1.0)
-       !       enddo
-       !    endif
-
-       !    ! Compute the time derivative of the rotation angles around the
-       !    ! z-axis. i.e. compute q
-
-       !    dphiZ(sps) = derivativeRigidRotAngle(degreePolZRot,   &
-       !         coefPolZRot,     &
-       !         degreeFourZRot,  &
-       !         omegaFourZRot,   &
-       !         cosCoefFourZRot, &
-       !         sinCoefFourZRot, t)
-
-       !    ! add in q_dot computation
-       !    dphiZdot(sps) = secondDerivativeRigidRotAngle(degreePolZRot,   &
-       !         coefPolZRot,     &
-       !         degreeFourZRot,  &
-       !         omegaFourZRot,   &
-       !         cosCoefFourZRot, &
-       !         sinCoefFourZRot, t)
-       ! end do
-
-       ! !now compute dCl/dq
-       ! do i =1,8
-       !    call computeLeastSquaresRegression(BaseCoef(:,i),dphiz,nTimeIntervalsSpectral,dcdq(i),coef0(i))
-       ! end do
-
-       ! ! now subtract off estimated cl,cmz and use remainder to compute
-       ! ! clqdot and cmzqdot.
-       ! do i = 1,8
-       !    do sps = 1,nTimeIntervalsSpectral
-       !       ResBaseCoef(sps,i) = BaseCoef(sps,i)-(dcdq(i)*dphiz(sps)+Coef0(i))
-       !    enddo
-       ! enddo
-
-       ! !now normalize the results...
-       ! a  = sqrt(gammaInf*pInfDim/rhoInfDim)
-       ! dcdq = dcdq*timeRef*2*(machGrid*a)/lengthRef
-
-       ! !now compute dCl/dpdot
-       ! do i = 1,8
-       !    call computeLeastSquaresRegression(ResBaseCoef(:,i),dphizdot,nTimeIntervalsSpectral,dcdqdot(i),Coef0dot(i))
-       ! enddo
-
-    elseif(TSAlphaMode)then
-
-       do sps=1,nTimeIntervalsSpectral
-
-          !compute the time of this interval
-          t = timeUnsteadyRestart
-
-          if(equationMode == timeSpectral) then
-             do nn=1,nSections
-                t(nn) = t(nn) + (sps-1)*sections(nn)%timePeriod &
-                     /         (nTimeIntervalsSpectral*1.0)
-             enddo
-          endif
-
-          intervalAlpha(sps) = TSAlpha(degreePolAlpha,   coefPolAlpha,       &
-               degreeFourAlpha,  omegaFourAlpha,     &
-               cosCoefFourAlpha, sinCoefFourAlpha, t(1))
-
-          intervalAlphadot(sps) = TSAlphadot(degreePolAlpha,   coefPolAlpha,       &
-               degreeFourAlpha,  omegaFourAlpha,     &
-               cosCoefFourAlpha, sinCoefFourAlpha, t(1))
-
-          ! THIS CALL IS WRONG!!!!
-          !call getDirAngle(velDirFreestream,liftDirection,liftIndex,alpha+intervalAlpha(sps), beta)
-
-          BaseCoef(sps,1) = fact*(&
-               force(1, sps)*liftDirection(1) + &
-               force(2, sps)*liftDirection(2) + &
-               force(3, sps)*liftDIrection(3))
-          BaseCoef(sps,2) = fact*(&
-               force(1, sps)*dragDirection(1) + &
-               force(2, sps)*dragDirection(2) + &
-               force(3, sps)*dragDIrection(3))
-          BaseCoef(sps,3) = force(1, sps)*fact
-          BaseCoef(sps,4) = force(2, sps)*fact
-          BaseCoef(sps,5) = force(3, sps)*fact
-          BaseCoef(sps,6) = moment(1, sps)*factMoment
-          BaseCoef(sps,7) = moment(2, sps)*factMoment
-          BaseCoef(sps,8) = moment(3, sps)*factMoment
-       end do
-
-       !now compute dCl/dalpha
-       do i =1,8
-          call computeLeastSquaresRegression(BaseCoef(:,i),intervalAlpha,nTimeIntervalsSpectral,dcdAlpha(i),coef0(i))
-       end do
-
-       ! now subtract off estimated cl,cmz and use remainder to compute
-       ! clalphadot and cmzalphadot.
-       do i = 1,8
-          do sps = 1,nTimeIntervalsSpectral
-             ResBaseCoef(sps,i) = BaseCoef(sps,i)-(dcdalpha(i)*intervalAlpha(sps)+Coef0(i))
-          enddo
-       enddo
-
-       !now compute dCi/dalphadot
-       do i = 1,8
-          call computeLeastSquaresRegression(ResBaseCoef(:,i),intervalAlphadot,nTimeIntervalsSpectral,dcdalphadot(i),Coef0dot(i))
-       enddo
-
-       a  = sqrt(gammaInf*pInfDim/rhoInfDim)
-       dcdalphadot = dcdalphadot*2*(machGrid*a)/lengthRef
-
-    else
-       call terminate('computeTSDerivatives','Not a valid stability motion')
-    endif
-
-  end subroutine computeTSDerivatives
-
-  subroutine getDirAngle(freeStreamAxis,liftAxis,liftIndex,alpha,beta)
-    !
-    !      Convert the wind axes to angle of attack and side slip angle.
-    !      The direction angles alpha and beta are computed given the
-    !      components of the wind direction vector (freeStreamAxis), the
-    !      lift direction vector (liftAxis) and assuming that the
-    !      body direction (xb,yb,zb) is in the default ijk coordinate
-    !      system. The rotations are determined by first determining
-    !      whether the lift is primarily in the j or k direction and then
-    !      determining the angles accordingly.
-    !      direction vector:
-    !        1) Rotation about the zb or yb -axis: alpha clockwise (CW)
-    !           (xb,yb,zb) -> (x1,y1,z1)
-    !        2) Rotation about the yl or z1 -axis: beta counter-clockwise
-    !           (CCW) (x1,y1,z1) -> (xw,yw,zw)
-    !         input arguments:
-    !            freeStreamAxis = wind vector in body axes
-    !            liftAxis       = lift direction vector in body axis
-    !         output arguments:
-    !            alpha    = angle of attack in radians
-    !            beta     = side slip angle in radians
-    !
-    use constants
-
-    implicit none
-    !
-    !     Subroutine arguments.
-    !
-    !      real(kind=realType), intent(in)  :: xw, yw, zw
-    real(kind=realType), dimension(3),intent(in) :: freeStreamAxis
-    real(kind=realType), dimension(3),intent(in) :: liftAxis
-    real(kind=realType), intent(out) :: alpha, beta
-    integer(kind=intType), intent(out)::liftIndex
-    !
-    !     Local variables.
-    !
-    real(kind=realType) :: rnorm
-    integer(kind=intType):: flowIndex,i
-    real(kind=realType), dimension(3) :: freeStreamAxisNorm
-    integer(kind=intType) ::  temp
-
-
-    ! Assume domoniate flow is x
-
-    flowIndex = 1
-
-    ! Determine the dominant lift direction
-    if ( abs(liftAxis(1)) > abs(liftAxis(2)) .and. &
-         abs(liftAxis(1)) > abs(liftAxis(3))) then
-       temp = 1
-    else if( abs(liftAxis(2)) > abs(liftAxis(1)) .and. &
-         abs(liftAxis(2)) > abs(liftAxis(3))) then
-       temp = 2
-    else
-       temp = 3
-    end if
-
-    liftIndex = temp
-
-    ! Normalize the freeStreamDirection vector.
-    rnorm = sqrt( freeStreamAxis(1)**2 + freeStreamAxis(2)**2 + freeStreamAxis(3)**2 )
-    do i =1,3
-       freeStreamAxisNorm(i) = freeStreamAxis(i)/rnorm
-    enddo
-
-    if (liftIndex == 2) then
-       ! different coordinate system for aerosurf
-       ! Wing is in z- direction
-       ! Compute angle of attack alpha.
-
-       alpha = asin(freeStreamAxisNorm(2))
-
-       ! Compute side-slip angle beta.
-
-       beta  = -atan2(freeStreamAxisNorm(3),freeStreamAxisNorm(1))
-
-
-    elseif (liftIndex == 3) then
-       ! Wing is in y- direction
-
-       ! Compute angle of attack alpha.
-
-       alpha = asin(freeStreamAxisNorm(3))
-
-       ! Compute side-slip angle beta.
-
-       beta  = atan2(freeStreamAxisNorm(2),freeStreamAxisNorm(1))
-    else
-       call terminate('getDirAngle', 'Invalid Lift Direction')
-    endif
-  end subroutine getDirAngle
-
-  subroutine stabilityDerivativeDriver
-    !
-    !      Runs the Time spectral stability derivative routines from the
-    !      main program file
-    !
-    use precision
-    implicit none
-    !
-    !     Local variables.
-    !
-    real(kind=realType),dimension(8)::dcdalpha,dcdalphadot,dcdbeta,&
-         dcdbetadot,dcdMach,dcdMachdot
-    real(kind=realType),dimension(8)::dcdp,dcdpdot,dcdq,dcdqdot,dcdr,dcdrdot
-    real(kind=realType),dimension(8)::Coef0,Coef0dot
-
-    !call computeTSDerivatives(coef0,dcdalpha,dcdalphadot,dcdq,dcdqdot)
-
-  end subroutine stabilityDerivativeDriver
-  subroutine setCoefTimeIntegrator
-    !
-    !       setCoefTimeIntegrator determines the coefficients of the
-    !       time integration scheme in unsteady mode. Normally these are
-    !       equal to the coefficients corresponding to the specified
-    !       accuracy. However during the initial phase there are not
-    !       enough states in the past and the accuracy is reduced.
-    !
-    use constants
-    use inputUnsteady
-    use inputPhysics
-    use iteration
-    use monitor
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, nLevelsSet
-
-    ! Determine which time integrator must be used.
-
-    ! Modified by HDN
-    select case (timeAccuracy)
-    case (firstOrder)
-
-       ! 1st order. No need to check the number of available
-       ! states in the past. Set the two coefficients and
-       ! nLevelsSet to 2.
-
-       coefTime(0) =  1.0_realType
-       coefTime(1) = -1.0_realType
-
-       if ( useALE .and. equationMode .eq. unsteady)  then
-          coefTimeALE(1)   = 1.0_realType
-          coefMeshALE(1,1) = half
-          coefMeshALE(1,2) = half
-       end if
-
-       nLevelsSet = 2
-
-       !--------------------------------------------------
-
-    case (secondOrder)
-
-       ! Second order time integrator. Determine the amount of
-       ! available states and set the coefficients accordingly.
-       select case (nOldSolAvail)
-
-       case (1_intType)
-          coefTime(0) =  1.0_realType
-          coefTime(1) = -1.0_realType
-
-          if ( useALE .and. equationMode .eq. unsteady)  then
-             coefTimeALE(1)   = half
-             coefTimeALE(2)   = half
-             coefTimeALE(3)   = zero
-             coefTimeALE(4)   = zero
-
-             coefMeshALE(1,1) = half
-             coefMeshALE(1,2) = half
-             coefMeshALE(2,1) = half
-             coefMeshALE(2,2) = half
-          end if
-
-          nLevelsSet  = 2
-
-       case default   ! 2 or bigger.
-          coefTime(0) =  1.5_realType
-          coefTime(1) = -2.0_realType
-          coefTime(2) =  0.5_realType
-
-          if ( useALE .and. equationMode .eq. unsteady)  then
-             coefTimeALE(1)   = threefourth
-             coefTimeALE(2)   = threefourth
-             coefTimeALE(3)   = -fourth
-             coefTimeALE(4)   = -fourth
-
-             coefMeshALE(1,1) = half*(1.0_realType+1.0_realType/sqrtthree)
-             coefMeshALE(1,2) = half*(1.0_realType-1.0_realType/sqrtthree)
-             coefMeshALE(2,1) = coefMeshALE(1,2)
-             coefMeshALE(2,2) = coefMeshALE(1,1)
-          end if
-
-          nLevelsSet  = 3
-
-       end select
-
-       !--------------------------------------------------
-
-    case (thirdOrder)
-
-       ! Third order time integrator.  Determine the amount of
-       ! available states and set the coefficients accordingly.
-
-       select case (nOldSolAvail)
-
-       case (1_intType)
-          coefTime(0) =  1.0_realType
-          coefTime(1) = -1.0_realType
-
-          if ( useALE .and. equationMode .eq. unsteady)  then
-             coefTimeALE(1)   = 1.0_realType
-             coefMeshALE(1,1) = half
-             coefMeshALE(1,2) = half
-          end if
-
-          nLevelsSet  = 2
-
-       case (2_intType)
-          coefTime(0) =  1.5_realType
-          coefTime(1) = -2.0_realType
-          coefTime(2) =  0.5_realType
-
-          if ( useALE .and. equationMode .eq. unsteady)  then
-             coefTimeALE(1)   = threefourth
-             coefTimeALE(2)   = -fourth
-             coefMeshALE(1,1) = half*(1.0_realType+1.0_realType/sqrtthree)
-             coefMeshALE(1,2) = half*(1.0_realType-1.0_realType/sqrtthree)
-             coefMeshALE(2,1) = coefMeshALE(1,2)
-             coefMeshALE(2,2) = coefMeshALE(1,1)
-          end if
-
-          nLevelsSet  = 3
-
-       case default   ! 3 or bigger.
-          coefTime(0) = 11.0_realType/6.0_realType
-          coefTime(1) = -3.0_realType
-          coefTime(2) =  1.5_realType
-          coefTime(3) = -1.0_realType/3.0_realType
-
-          ! These numbers are NOT correct
-          ! DO NOT use 3rd order ALE for now
-          if ( useALE .and. equationMode .eq. unsteady)  then
-             print *, 'Third-order ALE not implemented yet.'
-             coefTimeALE(1)   = threefourth
-             coefTimeALE(2)   = threefourth
-             coefTimeALE(3)   = -fourth
-             coefTimeALE(4)   = -fourth
-             coefMeshALE(1,1) = half*(1.0_realType+1.0_realType/sqrtthree)
-             coefMeshALE(1,2) = half*(1.0_realType-1.0_realType/sqrtthree)
-             coefMeshALE(2,1) = coefMeshALE(1,2)
-             coefMeshALE(2,2) = coefMeshALE(1,1)
-             coefMeshALE(3,1) = coefMeshALE(1,2)
-             coefMeshALE(3,2) = coefMeshALE(1,1)
-          end if
-
-          nLevelsSet  = 4
-
-       end select
-
-    end select
-
-    ! Set the rest of the coefficients to 0 if not enough states
-    ! in the past are available.
-
-    do nn=nLevelsSet,nOldLevels
-       coefTime(nn) = zero
-    enddo
-
-  end subroutine setCoefTimeIntegrator
-
-  function myNorm2(x)
-    use constants
-    implicit none
-    real(kind=realType), dimension(3), intent(in) :: x
-    real(kind=realType) :: myNorm2
-    myNorm2 = sqrt(x(1)**2 + x(2)**2 + x(3)**2)
-  end function myNorm2
-
-  function isWallType(bType)
-
-    use constants
-    implicit none
-    integer(kind=intType) :: bType
-    logical :: isWallType
-
-    isWallType = .False.
-    if (bType == NSWallAdiabatic .or. &
-         bType == NSWallIsoThermal .or. &
-         bType == EulerWall) then
-       isWallType = .True.
-    end if
-
-  end function isWallType
-
-subroutine cross_prod(a,b,c)
-
-  use precision
-
-  ! Inputs
-  real(kind=realType), dimension(3), intent(in) :: a,b
-
-  ! Outputs
-  real(kind=realType), dimension(3), intent(out) :: c
-
-  c(1) = a(2) * b(3) - a(3) * b(2)
-  c(2) = a(3) * b(1) - a(1) * b(3)
-  c(3) = a(1) * b(2) - a(2) * b(1)
-
-end subroutine cross_prod
-
- subroutine siAngle(angle, mult, trans)
-
-    use constants
-    use su_cgns, only : Radian, Degree
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, intent(in)              :: angle
-    real(kind=realType), intent(out) :: mult, trans
+    end subroutine terminate
+
+    subroutine rotMatrixRigidBody(tNew, tOld, rotationMatrix, &
+                                  rotationPoint)
+        !
+        !       rotMatrixRigidBody determines the rotation matrix and the
+        !       rotation point to determine the coordinates of the new time
+        !       level starting from the coordinates of the old time level.
+        !
+        use constants
+        use inputMotion
+        use flowVarRefState, only: Lref
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), intent(in) :: tNew, tOld
+
+        real(kind=realType), dimension(3), intent(out) :: rotationPoint
+        real(kind=realType), dimension(3, 3), intent(out) :: rotationMatrix
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j
+
+        real(kind=realType) :: phi
+        real(kind=realType) :: cosX, cosY, cosZ, sinX, sinY, sinZ
+
+        real(kind=realType), dimension(3, 3) :: mNew, mOld
+
+        ! Determine the rotation angle around the x-axis for the new
+        ! time level and the corresponding values of the sine and cosine.
+
+        phi = rigidRotAngle(degreePolXRot, coefPolXRot, &
+                            degreeFourXRot, omegaFourXRot, &
+                            cosCoefFourXRot, sinCoefFourXRot, tNew)
+        sinX = sin(phi)
+        cosX = cos(phi)
+
+        ! Idem for the y-axis.
+
+        phi = rigidRotAngle(degreePolYRot, coefPolYRot, &
+                            degreeFourYRot, omegaFourYRot, &
+                            cosCoefFourYRot, sinCoefFourYRot, tNew)
+        sinY = sin(phi)
+        cosY = cos(phi)
+
+        ! Idem for the z-axis.
+
+        phi = rigidRotAngle(degreePolZRot, coefPolZRot, &
+                            degreeFourZRot, omegaFourZRot, &
+                            cosCoefFourZRot, sinCoefFourZRot, tNew)
+        sinZ = sin(phi)
+        cosZ = cos(phi)
+
+        ! Construct the transformation matrix at the new time level.
+        ! It is assumed that the sequence of rotation is first around the
+        ! x-axis then around the y-axis and finally around the z-axis.
+
+        mNew(1, 1) = cosY*cosZ
+        mNew(2, 1) = cosY*sinZ
+        mNew(3, 1) = -sinY
+
+        mNew(1, 2) = sinX*sinY*cosZ - cosX*sinZ
+        mNew(2, 2) = sinX*sinY*sinZ + cosX*cosZ
+        mNew(3, 2) = sinX*cosY
+
+        mNew(1, 3) = cosX*sinY*cosZ + sinX*sinZ
+        mNew(2, 3) = cosX*sinY*sinZ - sinX*cosZ
+        mNew(3, 3) = cosX*cosY
+
+        ! Determine the rotation angle around the x-axis for the old
+        ! time level and the corresponding values of the sine and cosine.
+
+        phi = rigidRotAngle(degreePolXRot, coefPolXRot, &
+                            degreeFourXRot, omegaFourXRot, &
+                            cosCoefFourXRot, sinCoefFourXRot, tOld)
+        sinX = sin(phi)
+        cosX = cos(phi)
+
+        ! Idem for the y-axis.
+
+        phi = rigidRotAngle(degreePolYRot, coefPolYRot, &
+                            degreeFourYRot, omegaFourYRot, &
+                            cosCoefFourYRot, sinCoefFourYRot, tOld)
+        sinY = sin(phi)
+        cosY = cos(phi)
+
+        ! Idem for the z-axis.
+
+        phi = rigidRotAngle(degreePolZRot, coefPolZRot, &
+                            degreeFourZRot, omegaFourZRot, &
+                            cosCoefFourZRot, sinCoefFourZRot, tOld)
+        sinZ = sin(phi)
+        cosZ = cos(phi)
+
+        ! Construct the transformation matrix at the old time level.
+
+        mOld(1, 1) = cosY*cosZ
+        mOld(2, 1) = cosY*sinZ
+        mOld(3, 1) = -sinY
+
+        mOld(1, 2) = sinX*sinY*cosZ - cosX*sinZ
+        mOld(2, 2) = sinX*sinY*sinZ + cosX*cosZ
+        mOld(3, 2) = sinX*cosY
+
+        mOld(1, 3) = cosX*sinY*cosZ + sinX*sinZ
+        mOld(2, 3) = cosX*sinY*sinZ - sinX*cosZ
+        mOld(3, 3) = cosX*cosY
+
+        ! Construct the transformation matrix between the new and the
+        ! old time level. This is mNew*inverse(mOld). However the
+        ! inverse of mOld is the transpose.
+
+        do j = 1, 3
+            do i = 1, 3
+                rotationMatrix(i, j) = mNew(i, 1)*mOld(j, 1) &
+                                       + mNew(i, 2)*mOld(j, 2) &
+                                       + mNew(i, 3)*mOld(j, 3)
+            end do
+        end do
+
+        ! Determine the rotation point at the old time level; it is
+        ! possible that this value changes due to translation of the grid.
+
+        !  aInf = sqrt(gammaInf*pInf/rhoInf)
+
+        !  rotationPoint(1) = LRef*rotPoint(1) &
+        !                   + MachGrid(1)*aInf*tOld/timeRef
+        !  rotationPoint(2) = LRef*rotPoint(2) &
+        !                   + MachGrid(2)*aInf*tOld/timeRef
+        !  rotationPoint(3) = LRef*rotPoint(3) &
+        !                   + MachGrid(3)*aInf*tOld/timeRef
+
+        rotationPoint(1) = LRef*rotPoint(1)
+        rotationPoint(2) = LRef*rotPoint(2)
+        rotationPoint(3) = LRef*rotPoint(3)
+
+    end subroutine rotMatrixRigidBody
+
+    function secondDerivativeRigidRotAngle(degreePolRot, &
+                                           coefPolRot, &
+                                           degreeFourRot, &
+                                           omegaFourRot, &
+                                           cosCoefFourRot, &
+                                           sinCoefFourRot, t)
+        !
+        !       2ndderivativeRigidRotAngle computes the 2nd time derivative of
+        !       the rigid body rotation angle at the given time for the given
+        !       arguments. The angle is described by a combination of a
+        !       polynomial and fourier series.
+        !
+        use constants
+        use flowVarRefState, only: timeRef
+        use inputPhysics, only: equationMode
+        implicit none
+        !
+        !      Function type
+        !
+        real(kind=realType) :: secondDerivativeRigidRotAngle
+        !
+        !      Function arguments.
+        !
+        integer(kind=intType), intent(in) :: degreePolRot
+        integer(kind=intType), intent(in) :: degreeFourRot
+
+        real(kind=realType), intent(in) :: omegaFourRot, t
+
+        real(kind=realType), dimension(0:*), intent(in) :: coefPolRot
+        real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourRot
+        real(kind=realType), dimension(*), intent(in) :: sinCoefFourRot
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        real(kind=realType) :: dPhi, val
+
+        ! Return immediately if this is a steady computation.
+
+        if (equationMode == steady) then
+            secondDerivativeRigidRotAngle = zero
+            return
+        end if
+
+        ! Compute the polynomial contribution.
+
+        dPhi = zero
+        do nn = 2, degreePolRot
+            dPhi = dPhi + (nn - 1)*nn*coefPolRot(nn)*(t**(nn - 2))
+        end do
+
+        ! Compute the fourier contribution.
+
+        do nn = 1, degreeFourRot
+            val = nn*omegaFourRot
+            dPhi = dPhi - val**2*sinCoefFourRot(nn)*sin(val*t)
+            dPhi = dPhi - val**2*cosCoefFourRot(nn)*cos(val*t)
+        end do
+
+        ! Set derivativeRigidRotAngle to dPhi. Multiply by timeRef
+        ! to obtain the correct non-dimensional value.
+
+        secondDerivativeRigidRotAngle = timeRef**2*dPhi
+
+    end function secondDerivativeRigidRotAngle
+
+    function rigidRotAngle(degreePolRot, coefPolRot, &
+                           degreeFourRot, omegaFourRot, &
+                           cosCoefFourRot, sinCoefFourRot, t)
+        !
+        !       rigidRotAngle computes the rigid body rotation angle at the
+        !       given time for the given arguments. The angle is described by
+        !       a combination of a polynomial and fourier series.
+        !
+        use constants
+        use inputPhysics, only: equationMode
+        implicit none
+        !
+        !      Function type
+        !
+        real(kind=realType) :: rigidRotAngle
+        !
+        !      Function arguments.
+        !
+        integer(kind=intType), intent(in) :: degreePolRot
+        integer(kind=intType), intent(in) :: degreeFourRot
+
+        real(kind=realType), intent(in) :: omegaFourRot, t
+
+        real(kind=realType), dimension(0:*), intent(in) :: coefPolRot
+        real(kind=realType), dimension(0:*), intent(in) :: cosCoefFourRot
+        real(kind=realType), dimension(*), intent(in) :: sinCoefFourRot
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        real(kind=realType) :: phi, val
+
+        ! Return immediately if this is a steady computation.
+
+        if (equationMode == steady) then
+            rigidRotAngle = zero
+            return
+        end if
+
+        ! Compute the polynomial contribution. If no polynomial was
+        ! specified, the value of index 0 is set to zero automatically.
+
+        phi = coefPolRot(0)
+        do nn = 1, degreePolRot
+            phi = phi + coefPolRot(nn)*(t**nn)
+        end do
+
+        ! Compute the fourier contribution. Again the cosine coefficient
+        ! of index 0 is defaulted to zero if not specified.
+
+        phi = phi + cosCoefFourRot(0)
+        do nn = 1, degreeFourRot
+            val = nn*omegaFourRot*t
+            phi = phi + cosCoefFourRot(nn)*cos(val) &
+                  + sinCoefFourRot(nn)*sin(val)
+        end do
+
+        ! Set rigidRotAngle to phi.
+
+        rigidRotAngle = phi
+
+    end function rigidRotAngle
+
+    subroutine setBCPointers(nn, spatialPointers)
+        !
+        !       setBCPointers sets the pointers needed for the boundary
+        !       condition treatment on a general face, such that the boundary
+        !       routines are only implemented once instead of 6 times.
+        !
+        use constants
+        use blockPointers, only: w, p, rlv, rev, gamma, x, d2wall, &
+                                 si, sj, sk, s, globalCell, BCData, nx, il, ie, ib, &
+                                 ny, jl, je, jb, nz, kl, ke, kb, BCFaceID, &
+                                 addgridvelocities, sFaceI, sFaceJ, sFaceK, addGridVelocities
+        use BCPointers, only: ww0, ww1, ww2, ww3, pp0, pp1, pp2, pp3, &
+                              rlv0, rlv1, rlv2, rlv3, rev0, rev1, rev2, rev3, &
+                              gamma0, gamma1, gamma2, gamma3, gcp, xx, ss, ssi, ssj, ssk, dd2wall, &
+                              sFace, iStart, iEnd, jStart, jEnd, iSize, jSize
+        use inputPhysics, only: cpModel, equations
+        implicit none
+
+        ! Subroutine arguments.
+        integer(kind=intType), intent(in) :: nn
+        logical, intent(in) :: spatialPointers
+
+        ! Determine the sizes of each face and point to just the range we
+        ! need on each face.
+        iStart = BCData(nn)%icBeg
+        iEnd = BCData(nn)%icEnd
+        jStart = BCData(nn)%jcBeg
+        jEnd = BCData(nn)%jcEnd
+
+        ! Set the size of the subface
+        isize = iEnd - iStart + 1
+        jsize = jEnd - jStart + 1
+
+        ! Determine the face id on which the subface is located and set
+        ! the pointers accordinly.
+
+        select case (BCFaceID(nn))
+
+            !---------------------------------------------------------------------------
+        case (iMin)
+
+            ww3 => w(3, 1:, 1:, :)
+            ww2 => w(2, 1:, 1:, :)
+            ww1 => w(1, 1:, 1:, :)
+            ww0 => w(0, 1:, 1:, :)
+
+            pp3 => p(3, 1:, 1:)
+            pp2 => p(2, 1:, 1:)
+            pp1 => p(1, 1:, 1:)
+            pp0 => p(0, 1:, 1:)
+
+            rlv3 => rlv(3, 1:, 1:)
+            rlv2 => rlv(2, 1:, 1:)
+            rlv1 => rlv(1, 1:, 1:)
+            rlv0 => rlv(0, 1:, 1:)
+
+            rev3 => rev(3, 1:, 1:)
+            rev2 => rev(2, 1:, 1:)
+            rev1 => rev(1, 1:, 1:)
+            rev0 => rev(0, 1:, 1:)
+
+            gamma3 => gamma(3, 1:, 1:)
+            gamma2 => gamma(2, 1:, 1:)
+            gamma1 => gamma(1, 1:, 1:)
+            gamma0 => gamma(0, 1:, 1:)
+
+            gcp => globalCell(2, 1:, 1:)
+            !---------------------------------------------------------------------------
+
+        case (iMax)
+
+            ww3 => w(nx, 1:, 1:, :)
+            ww2 => w(il, 1:, 1:, :)
+            ww1 => w(ie, 1:, 1:, :)
+            ww0 => w(ib, 1:, 1:, :)
+
+            pp3 => p(nx, 1:, 1:)
+            pp2 => p(il, 1:, 1:)
+            pp1 => p(ie, 1:, 1:)
+            pp0 => p(ib, 1:, 1:)
+
+            rlv3 => rlv(nx, 1:, 1:)
+            rlv2 => rlv(il, 1:, 1:)
+            rlv1 => rlv(ie, 1:, 1:)
+            rlv0 => rlv(ib, 1:, 1:)
+
+            rev3 => rev(nx, 1:, 1:)
+            rev2 => rev(il, 1:, 1:)
+            rev1 => rev(ie, 1:, 1:)
+            rev0 => rev(ib, 1:, 1:)
+
+            gamma3 => gamma(nx, 1:, 1:)
+            gamma2 => gamma(il, 1:, 1:)
+            gamma1 => gamma(ie, 1:, 1:)
+            gamma0 => gamma(ib, 1:, 1:)
+
+            gcp => globalCell(il, 1:, 1:)
+            !---------------------------------------------------------------------------
+
+        case (jMin)
+
+            ww3 => w(1:, 3, 1:, :)
+            ww2 => w(1:, 2, 1:, :)
+            ww1 => w(1:, 1, 1:, :)
+            ww0 => w(1:, 0, 1:, :)
+
+            pp3 => p(1:, 3, 1:)
+            pp2 => p(1:, 2, 1:)
+            pp1 => p(1:, 1, 1:)
+            pp0 => p(1:, 0, 1:)
+
+            rlv3 => rlv(1:, 3, 1:)
+            rlv2 => rlv(1:, 2, 1:)
+            rlv1 => rlv(1:, 1, 1:)
+            rlv0 => rlv(1:, 0, 1:)
+
+            rev3 => rev(1:, 3, 1:)
+            rev2 => rev(1:, 2, 1:)
+            rev1 => rev(1:, 1, 1:)
+            rev0 => rev(1:, 0, 1:)
+
+            gamma3 => gamma(1:, 3, 1:)
+            gamma2 => gamma(1:, 2, 1:)
+            gamma1 => gamma(1:, 1, 1:)
+            gamma0 => gamma(1:, 0, 1:)
+
+            gcp => globalCell(1:, 2, 1:)
+            !---------------------------------------------------------------------------
+
+        case (jMax)
+
+            ww3 => w(1:, ny, 1:, :)
+            ww2 => w(1:, jl, 1:, :)
+            ww1 => w(1:, je, 1:, :)
+            ww0 => w(1:, jb, 1:, :)
+
+            pp3 => p(1:, ny, 1:)
+            pp2 => p(1:, jl, 1:)
+            pp1 => p(1:, je, 1:)
+            pp0 => p(1:, jb, 1:)
+
+            rlv3 => rlv(1:, ny, 1:)
+            rlv2 => rlv(1:, jl, 1:)
+            rlv1 => rlv(1:, je, 1:)
+            rlv0 => rlv(1:, jb, 1:)
+
+            rev3 => rev(1:, ny, 1:)
+            rev2 => rev(1:, jl, 1:)
+            rev1 => rev(1:, je, 1:)
+            rev0 => rev(1:, jb, 1:)
+
+            gamma3 => gamma(1:, ny, 1:)
+            gamma2 => gamma(1:, jl, 1:)
+            gamma1 => gamma(1:, je, 1:)
+            gamma0 => gamma(1:, jb, 1:)
+
+            gcp => globalCell(1:, jl, 1:)
+            !---------------------------------------------------------------------------
+
+        case (kMin)
+
+            ww3 => w(1:, 1:, 3, :)
+            ww2 => w(1:, 1:, 2, :)
+            ww1 => w(1:, 1:, 1, :)
+            ww0 => w(1:, 1:, 0, :)
+
+            pp3 => p(1:, 1:, 3)
+            pp2 => p(1:, 1:, 2)
+            pp1 => p(1:, 1:, 1)
+            pp0 => p(1:, 1:, 0)
+
+            rlv3 => rlv(1:, 1:, 3)
+            rlv2 => rlv(1:, 1:, 2)
+            rlv1 => rlv(1:, 1:, 1)
+            rlv0 => rlv(1:, 1:, 0)
+
+            rev3 => rev(1:, 1:, 3)
+            rev2 => rev(1:, 1:, 2)
+            rev1 => rev(1:, 1:, 1)
+            rev0 => rev(1:, 1:, 0)
+
+            gamma3 => gamma(1:, 1:, 3)
+            gamma2 => gamma(1:, 1:, 2)
+            gamma1 => gamma(1:, 1:, 1)
+            gamma0 => gamma(1:, 1:, 0)
+
+            gcp => globalCell(1:, 1:, 2)
+            !---------------------------------------------------------------------------
+
+        case (kMax)
+
+            ww3 => w(1:, 1:, nz, :)
+            ww2 => w(1:, 1:, kl, :)
+            ww1 => w(1:, 1:, ke, :)
+            ww0 => w(1:, 1:, kb, :)
+
+            pp3 => p(1:, 1:, nz)
+            pp2 => p(1:, 1:, kl)
+            pp1 => p(1:, 1:, ke)
+            pp0 => p(1:, 1:, kb)
+
+            rlv3 => rlv(1:, 1:, nz)
+            rlv2 => rlv(1:, 1:, kl)
+            rlv1 => rlv(1:, 1:, ke)
+            rlv0 => rlv(1:, 1:, kb)
+
+            rev3 => rev(1:, 1:, nz)
+            rev2 => rev(1:, 1:, kl)
+            rev1 => rev(1:, 1:, ke)
+            rev0 => rev(1:, 1:, kb)
+
+            gamma3 => gamma(1:, 1:, nz)
+            gamma2 => gamma(1:, 1:, kl)
+            gamma1 => gamma(1:, 1:, ke)
+            gamma0 => gamma(1:, 1:, kb)
+
+            gcp => globalCell(1:, 1:, kl)
+        end select
 
-    ! Determine the situation we are having here.
+        if (spatialPointers) then
+            select case (BCFaceID(nn))
+            case (iMin)
+                xx => x(1, :, :, :)
+                ssi => si(1, :, :, :)
+                ssj => sj(2, :, :, :)
+                ssk => sk(2, :, :, :)
+                ss => s(2, :, :, :)
+            case (iMax)
+                xx => x(il, :, :, :)
+                ssi => si(il, :, :, :)
+                ssj => sj(il, :, :, :)
+                ssk => sk(il, :, :, :)
+                ss => s(il, :, :, :)
+            case (jMin)
+                xx => x(:, 1, :, :)
+                ssi => sj(:, 1, :, :)
+                ssj => si(:, 2, :, :)
+                ssk => sk(:, 2, :, :)
+                ss => s(:, 2, :, :)
+            case (jMax)
+                xx => x(:, jl, :, :)
+                ssi => sj(:, jl, :, :)
+                ssj => si(:, jl, :, :)
+                ssk => sk(:, jl, :, :)
+                ss => s(:, jl, :, :)
+            case (kMin)
+                xx => x(:, :, 1, :)
+                ssi => sk(:, :, 1, :)
+                ssj => si(:, :, 2, :)
+                ssk => sj(:, :, 2, :)
+                ss => s(:, :, 2, :)
+            case (kMax)
+                xx => x(:, :, kl, :)
+                ssi => sk(:, :, kl, :)
+                ssj => si(:, :, kl, :)
+                ssk => sj(:, :, kl, :)
+                ss => s(:, :, kl, :)
+            end select
+
+            if (addGridVelocities) then
+                select case (BCFaceID(nn))
+                case (iMin)
+                    sFace => sFaceI(1, :, :)
+                case (iMax)
+                    sFace => sFaceI(il, :, :)
+                case (jMin)
+                    sFace => sFaceJ(:, 1, :)
+                case (jMax)
+                    sFace => sFaceJ(:, jl, :)
+                case (kMin)
+                    sFace => sFaceK(:, :, 1)
+                case (kMax)
+                    sFace => sFaceK(:, :, kl)
+                end select
+            end if
+
+            if (equations == RANSEquations) then
+                select case (BCFaceID(nn))
+                case (iMin)
+                    dd2Wall => d2Wall(2, :, :)
+                case (iMax)
+                    dd2Wall => d2Wall(il, :, :)
+                case (jMin)
+                    dd2Wall => d2Wall(:, 2, :)
+                case (jMax)
+                    dd2Wall => d2Wall(:, jl, :)
+                case (kMin)
+                    dd2Wall => d2Wall(:, :, 2)
+                case (kMax)
+                    dd2Wall => d2Wall(:, :, kl)
+                end select
+            end if
+        end if
+    end subroutine setBCPointers
+
+    subroutine computeRootBendingMoment(cf, cm, bendingMoment)
+
+        !                                                      *
+        ! Compute a normalized bending moment coefficient from *
+        ! the force and moment coefficient. At the moment this *
+        ! Routine only works for a half body. Additional logic *
+        ! would be needed for a full body.                     *
+        !                                                      *
+
+        use constants
+        use inputPhysics, only: lengthRef, pointRef, pointRefEC, liftIndex
+        implicit none
+
+        !input/output variables
+        real(kind=realType), intent(in), dimension(3) :: cf, cm
+        real(kind=realType), intent(out) :: bendingMoment
+
+        !Subroutine Variables
+        real(kind=realType):: elasticMomentx, elasticMomenty, elasticMomentz
+        bendingMoment = zero
+        if (liftIndex == 2) then
+            !z out wing sum momentx,momentz
+            elasticMomentx = cm(1) + cf(2)*(pointRefEC(3) - pointRef(3))/lengthref - cf(3)*(pointRefEC(2) - pointRef(2))/lengthref
+            elasticMomentz = cm(3) - cf(2)*(pointRefEC(1) - pointref(1))/lengthref + cf(1)*(pointRefEC(2) - pointRef(2))/lengthref
+            bendingMoment = sqrt(elasticMomentx**2 + elasticMomentz**2)
+        elseif (liftIndex == 3) then
+            !y out wing sum momentx,momenty
+            elasticMomentx = cm(1) + cf(3)*(pointrefEC(2) - pointRef(2))/lengthref + cf(3)*(pointrefEC(3) - pointref(3))/lengthref
+            elasticMomenty = cm(2) + cf(3)*(pointRefEC(1) - pointRef(1))/lengthref + cf(1)*(pointrefEC(3) - pointRef(3))/lengthref
+            bendingMoment = sqrt(elasticMomentx**2 + elasticMomenty**2)
+        end if
+
+    end subroutine computeRootBendingMoment
+
+    subroutine computeLeastSquaresRegression(y, x, npts, m, b)
+        !
+        !       Computes the slope of best fit for a set of x,y data of length
+        !       npts
+        !
+        use constants
+        implicit none
+        !Subroutine arguments
+        integer(kind=intType)::npts
+        real(kind=realType), dimension(npts)  :: x, y
+        real(kind=realType)::m, b
+
+        !local variables
+        real(kind=realType)::sumx, sumy, sumx2, sumxy
+        integer(kind=intType)::i
+
+        !begin execution
+        sumx = 0.0
+        sumy = 0.0
+        sumx2 = 0.0
+        sumxy = 0.0
+        do i = 1, npts
+
+            sumx = sumx + x(i)
+            sumy = sumy + y(i)
+            sumx2 = sumx2 + x(i)*x(i)
+            sumxy = sumxy + x(i)*y(i)
+        end do
+
+        m = ((npts*sumxy) - (sumy*sumx))/((npts*sumx2) - (sumx)**2)
+        b = (sumy*sumx2 - (sumx*sumxy))/((npts*sumx2) - (sumx)**2)
+
+    end subroutine computeLeastSquaresRegression
+
+    subroutine computeTSDerivatives(force, moment, coef0, dcdalpha, &
+                                    dcdalphadot, dcdq, dcdqdot)
+        !
+        !      Computes the stability derivatives based on the time spectral
+        !      solution of a given mesh. Takes in the force coefficients at
+        !      all time instantces and computes the agregate parameters
+        !
+        use constants
+        use communication
+        use inputPhysics
+        use inputTimeSpectral
+        use inputTSStabDeriv
+        use flowvarrefstate
+        use monitor
+        use section
+        use inputMotion
+        implicit none
+
+        !
+        !     Subroutine arguments.
+        !
+        real(kind=realType), dimension(3, nTimeIntervalsSpectral) :: force, moment
+        real(kind=realType), dimension(8):: dcdq, dcdqdot
+        real(kind=realType), dimension(8):: dcdalpha, dcdalphadot
+        real(kind=realType), dimension(8):: Coef0
+
+        ! Working Variables
+        real(kind=realType), dimension(nTimeIntervalsSpectral, 8) :: baseCoef
+        real(kind=realType), dimension(8) ::coef0dot
+        real(kind=realType), dimension(nTimeIntervalsSpectral, 8)::ResBaseCoef
+        real(kind=realType), dimension(nTimeIntervalsSpectral)  :: intervalAlpha, intervalAlphadot
+        real(kind=realType), dimension(nTimeIntervalsSpectral)  :: intervalMach, intervalMachdot
+        real(kind=realType), dimension(nSections) :: t
+        integer(kind=intType):: i, sps, nn
+        !speed of sound: for normalization of q derivatives
+        real(kind=realType)::a
+        real(kind=realType) :: fact, factMoment
+        ! Functions
+        real(kind=realType), dimension(nTimeIntervalsSpectral)  :: dPhix, dPhiy, dphiz
+        real(kind=realType), dimension(nTimeIntervalsSpectral)  :: dPhixdot, dPhiydot, dphizdot
+        real(kind=realType)::derivativeRigidRotAngle, secondDerivativeRigidRotAngle
+
+        fact = two/(gammaInf*pInf*MachCoef**2 &
+                    *surfaceRef*LRef**2)
+        factMoment = fact/(lengthRef*LRef)
+
+        if (TSqMode) then
+
+            print *, 'TS Q Mode code needs to be updated in computeTSDerivatives!'
+            stop
+
+            ! !q is pitch
+            ! do sps =1,nTimeIntervalsSpectral
+            !    !compute the time of this intervavc
+            !    t = timeUnsteadyRestart
+
+            !    if(equationMode == timeSpectral) then
+            !       do nn=1,nSections
+            !          t(nn) = t(nn) + (sps-1)*sections(nn)%timePeriod &
+            !               /         (nTimeIntervalsSpectral*1.0)
+            !       enddo
+            !    endif
+
+            !    ! Compute the time derivative of the rotation angles around the
+            !    ! z-axis. i.e. compute q
+
+            !    dphiZ(sps) = derivativeRigidRotAngle(degreePolZRot,   &
+            !         coefPolZRot,     &
+            !         degreeFourZRot,  &
+            !         omegaFourZRot,   &
+            !         cosCoefFourZRot, &
+            !         sinCoefFourZRot, t)
+
+            !    ! add in q_dot computation
+            !    dphiZdot(sps) = secondDerivativeRigidRotAngle(degreePolZRot,   &
+            !         coefPolZRot,     &
+            !         degreeFourZRot,  &
+            !         omegaFourZRot,   &
+            !         cosCoefFourZRot, &
+            !         sinCoefFourZRot, t)
+            ! end do
+
+            ! !now compute dCl/dq
+            ! do i =1,8
+            !    call computeLeastSquaresRegression(BaseCoef(:,i),dphiz,nTimeIntervalsSpectral,dcdq(i),coef0(i))
+            ! end do
+
+            ! ! now subtract off estimated cl,cmz and use remainder to compute
+            ! ! clqdot and cmzqdot.
+            ! do i = 1,8
+            !    do sps = 1,nTimeIntervalsSpectral
+            !       ResBaseCoef(sps,i) = BaseCoef(sps,i)-(dcdq(i)*dphiz(sps)+Coef0(i))
+            !    enddo
+            ! enddo
+
+            ! !now normalize the results...
+            ! a  = sqrt(gammaInf*pInfDim/rhoInfDim)
+            ! dcdq = dcdq*timeRef*2*(machGrid*a)/lengthRef
+
+            ! !now compute dCl/dpdot
+            ! do i = 1,8
+            !    call computeLeastSquaresRegression(ResBaseCoef(:,i),dphizdot,nTimeIntervalsSpectral,dcdqdot(i),Coef0dot(i))
+            ! enddo
+
+        elseif (TSAlphaMode) then
+
+            do sps = 1, nTimeIntervalsSpectral
+
+                !compute the time of this interval
+                t = timeUnsteadyRestart
+
+                if (equationMode == timeSpectral) then
+                    do nn = 1, nSections
+                        t(nn) = t(nn) + (sps - 1)*sections(nn)%timePeriod &
+                                /(nTimeIntervalsSpectral*1.0)
+                    end do
+                end if
+
+                intervalAlpha(sps) = TSAlpha(degreePolAlpha, coefPolAlpha, &
+                                             degreeFourAlpha, omegaFourAlpha, &
+                                             cosCoefFourAlpha, sinCoefFourAlpha, t(1))
+
+                intervalAlphadot(sps) = TSAlphadot(degreePolAlpha, coefPolAlpha, &
+                                                   degreeFourAlpha, omegaFourAlpha, &
+                                                   cosCoefFourAlpha, sinCoefFourAlpha, t(1))
+
+                ! THIS CALL IS WRONG!!!!
+                !call getDirAngle(velDirFreestream,liftDirection,liftIndex,alpha+intervalAlpha(sps), beta)
+
+                BaseCoef(sps, 1) = fact*( &
+                                   force(1, sps)*liftDirection(1) + &
+                                   force(2, sps)*liftDirection(2) + &
+                                   force(3, sps)*liftDIrection(3))
+                BaseCoef(sps, 2) = fact*( &
+                                   force(1, sps)*dragDirection(1) + &
+                                   force(2, sps)*dragDirection(2) + &
+                                   force(3, sps)*dragDIrection(3))
+                BaseCoef(sps, 3) = force(1, sps)*fact
+                BaseCoef(sps, 4) = force(2, sps)*fact
+                BaseCoef(sps, 5) = force(3, sps)*fact
+                BaseCoef(sps, 6) = moment(1, sps)*factMoment
+                BaseCoef(sps, 7) = moment(2, sps)*factMoment
+                BaseCoef(sps, 8) = moment(3, sps)*factMoment
+            end do
+
+            !now compute dCl/dalpha
+            do i = 1, 8
+                call computeLeastSquaresRegression(BaseCoef(:, i), intervalAlpha, nTimeIntervalsSpectral, dcdAlpha(i), coef0(i))
+            end do
+
+            ! now subtract off estimated cl,cmz and use remainder to compute
+            ! clalphadot and cmzalphadot.
+            do i = 1, 8
+                do sps = 1, nTimeIntervalsSpectral
+                    ResBaseCoef(sps, i) = BaseCoef(sps, i) - (dcdalpha(i)*intervalAlpha(sps) + Coef0(i))
+                end do
+            end do
+
+            !now compute dCi/dalphadot
+            do i = 1, 8
+        call computeLeastSquaresRegression(ResBaseCoef(:, i), intervalAlphadot, nTimeIntervalsSpectral, dcdalphadot(i), Coef0dot(i))
+            end do
+
+            a = sqrt(gammaInf*pInfDim/rhoInfDim)
+            dcdalphadot = dcdalphadot*2*(machGrid*a)/lengthRef
+
+        else
+            call terminate('computeTSDerivatives', 'Not a valid stability motion')
+        end if
+
+    end subroutine computeTSDerivatives
+
+    subroutine getDirAngle(freeStreamAxis, liftAxis, liftIndex, alpha, beta)
+        !
+        !      Convert the wind axes to angle of attack and side slip angle.
+        !      The direction angles alpha and beta are computed given the
+        !      components of the wind direction vector (freeStreamAxis), the
+        !      lift direction vector (liftAxis) and assuming that the
+        !      body direction (xb,yb,zb) is in the default ijk coordinate
+        !      system. The rotations are determined by first determining
+        !      whether the lift is primarily in the j or k direction and then
+        !      determining the angles accordingly.
+        !      direction vector:
+        !        1) Rotation about the zb or yb -axis: alpha clockwise (CW)
+        !           (xb,yb,zb) -> (x1,y1,z1)
+        !        2) Rotation about the yl or z1 -axis: beta counter-clockwise
+        !           (CCW) (x1,y1,z1) -> (xw,yw,zw)
+        !         input arguments:
+        !            freeStreamAxis = wind vector in body axes
+        !            liftAxis       = lift direction vector in body axis
+        !         output arguments:
+        !            alpha    = angle of attack in radians
+        !            beta     = side slip angle in radians
+        !
+        use constants
+
+        implicit none
+        !
+        !     Subroutine arguments.
+        !
+        !      real(kind=realType), intent(in)  :: xw, yw, zw
+        real(kind=realType), dimension(3), intent(in) :: freeStreamAxis
+        real(kind=realType), dimension(3), intent(in) :: liftAxis
+        real(kind=realType), intent(out) :: alpha, beta
+        integer(kind=intType), intent(out)::liftIndex
+        !
+        !     Local variables.
+        !
+        real(kind=realType) :: rnorm
+        integer(kind=intType):: flowIndex, i
+        real(kind=realType), dimension(3) :: freeStreamAxisNorm
+        integer(kind=intType) ::  temp
+
+        ! Assume domoniate flow is x
+
+        flowIndex = 1
+
+        ! Determine the dominant lift direction
+        if (abs(liftAxis(1)) > abs(liftAxis(2)) .and. &
+            abs(liftAxis(1)) > abs(liftAxis(3))) then
+            temp = 1
+        else if (abs(liftAxis(2)) > abs(liftAxis(1)) .and. &
+                 abs(liftAxis(2)) > abs(liftAxis(3))) then
+            temp = 2
+        else
+            temp = 3
+        end if
+
+        liftIndex = temp
+
+        ! Normalize the freeStreamDirection vector.
+        rnorm = sqrt(freeStreamAxis(1)**2 + freeStreamAxis(2)**2 + freeStreamAxis(3)**2)
+        do i = 1, 3
+            freeStreamAxisNorm(i) = freeStreamAxis(i)/rnorm
+        end do
+
+        if (liftIndex == 2) then
+            ! different coordinate system for aerosurf
+            ! Wing is in z- direction
+            ! Compute angle of attack alpha.
+
+            alpha = asin(freeStreamAxisNorm(2))
+
+            ! Compute side-slip angle beta.
+
+            beta = -atan2(freeStreamAxisNorm(3), freeStreamAxisNorm(1))
+
+        elseif (liftIndex == 3) then
+            ! Wing is in y- direction
+
+            ! Compute angle of attack alpha.
+
+            alpha = asin(freeStreamAxisNorm(3))
+
+            ! Compute side-slip angle beta.
+
+            beta = atan2(freeStreamAxisNorm(2), freeStreamAxisNorm(1))
+        else
+            call terminate('getDirAngle', 'Invalid Lift Direction')
+        end if
+    end subroutine getDirAngle
+
+    subroutine stabilityDerivativeDriver
+        !
+        !      Runs the Time spectral stability derivative routines from the
+        !      main program file
+        !
+        use precision
+        implicit none
+        !
+        !     Local variables.
+        !
+        real(kind=realType), dimension(8)::dcdalpha, dcdalphadot, dcdbeta, &
+                                            dcdbetadot, dcdMach, dcdMachdot
+        real(kind=realType), dimension(8)::dcdp, dcdpdot, dcdq, dcdqdot, dcdr, dcdrdot
+        real(kind=realType), dimension(8)::Coef0, Coef0dot
+
+        !call computeTSDerivatives(coef0,dcdalpha,dcdalphadot,dcdq,dcdqdot)
+
+    end subroutine stabilityDerivativeDriver
+    subroutine setCoefTimeIntegrator
+        !
+        !       setCoefTimeIntegrator determines the coefficients of the
+        !       time integration scheme in unsteady mode. Normally these are
+        !       equal to the coefficients corresponding to the specified
+        !       accuracy. However during the initial phase there are not
+        !       enough states in the past and the accuracy is reduced.
+        !
+        use constants
+        use inputUnsteady
+        use inputPhysics
+        use iteration
+        use monitor
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, nLevelsSet
+
+        ! Determine which time integrator must be used.
+
+        ! Modified by HDN
+        select case (timeAccuracy)
+        case (firstOrder)
+
+            ! 1st order. No need to check the number of available
+            ! states in the past. Set the two coefficients and
+            ! nLevelsSet to 2.
+
+            coefTime(0) = 1.0_realType
+            coefTime(1) = -1.0_realType
+
+            if (useALE .and. equationMode .eq. unsteady) then
+                coefTimeALE(1) = 1.0_realType
+                coefMeshALE(1, 1) = half
+                coefMeshALE(1, 2) = half
+            end if
+
+            nLevelsSet = 2
+
+            !--------------------------------------------------
+
+        case (secondOrder)
+
+            ! Second order time integrator. Determine the amount of
+            ! available states and set the coefficients accordingly.
+            select case (nOldSolAvail)
+
+            case (1_intType)
+                coefTime(0) = 1.0_realType
+                coefTime(1) = -1.0_realType
+
+                if (useALE .and. equationMode .eq. unsteady) then
+                    coefTimeALE(1) = half
+                    coefTimeALE(2) = half
+                    coefTimeALE(3) = zero
+                    coefTimeALE(4) = zero
+
+                    coefMeshALE(1, 1) = half
+                    coefMeshALE(1, 2) = half
+                    coefMeshALE(2, 1) = half
+                    coefMeshALE(2, 2) = half
+                end if
+
+                nLevelsSet = 2
+
+            case default   ! 2 or bigger.
+                coefTime(0) = 1.5_realType
+                coefTime(1) = -2.0_realType
+                coefTime(2) = 0.5_realType
+
+                if (useALE .and. equationMode .eq. unsteady) then
+                    coefTimeALE(1) = threefourth
+                    coefTimeALE(2) = threefourth
+                    coefTimeALE(3) = -fourth
+                    coefTimeALE(4) = -fourth
+
+                    coefMeshALE(1, 1) = half*(1.0_realType + 1.0_realType/sqrtthree)
+                    coefMeshALE(1, 2) = half*(1.0_realType - 1.0_realType/sqrtthree)
+                    coefMeshALE(2, 1) = coefMeshALE(1, 2)
+                    coefMeshALE(2, 2) = coefMeshALE(1, 1)
+                end if
+
+                nLevelsSet = 3
+
+            end select
+
+            !--------------------------------------------------
+
+        case (thirdOrder)
+
+            ! Third order time integrator.  Determine the amount of
+            ! available states and set the coefficients accordingly.
+
+            select case (nOldSolAvail)
+
+            case (1_intType)
+                coefTime(0) = 1.0_realType
+                coefTime(1) = -1.0_realType
+
+                if (useALE .and. equationMode .eq. unsteady) then
+                    coefTimeALE(1) = 1.0_realType
+                    coefMeshALE(1, 1) = half
+                    coefMeshALE(1, 2) = half
+                end if
+
+                nLevelsSet = 2
+
+            case (2_intType)
+                coefTime(0) = 1.5_realType
+                coefTime(1) = -2.0_realType
+                coefTime(2) = 0.5_realType
+
+                if (useALE .and. equationMode .eq. unsteady) then
+                    coefTimeALE(1) = threefourth
+                    coefTimeALE(2) = -fourth
+                    coefMeshALE(1, 1) = half*(1.0_realType + 1.0_realType/sqrtthree)
+                    coefMeshALE(1, 2) = half*(1.0_realType - 1.0_realType/sqrtthree)
+                    coefMeshALE(2, 1) = coefMeshALE(1, 2)
+                    coefMeshALE(2, 2) = coefMeshALE(1, 1)
+                end if
+
+                nLevelsSet = 3
 
-    if(angle == Radian) then
+            case default   ! 3 or bigger.
+                coefTime(0) = 11.0_realType/6.0_realType
+                coefTime(1) = -3.0_realType
+                coefTime(2) = 1.5_realType
+                coefTime(3) = -1.0_realType/3.0_realType
+
+                ! These numbers are NOT correct
+                ! DO NOT use 3rd order ALE for now
+                if (useALE .and. equationMode .eq. unsteady) then
+                    print *, 'Third-order ALE not implemented yet.'
+                    coefTimeALE(1) = threefourth
+                    coefTimeALE(2) = threefourth
+                    coefTimeALE(3) = -fourth
+                    coefTimeALE(4) = -fourth
+                    coefMeshALE(1, 1) = half*(1.0_realType + 1.0_realType/sqrtthree)
+                    coefMeshALE(1, 2) = half*(1.0_realType - 1.0_realType/sqrtthree)
+                    coefMeshALE(2, 1) = coefMeshALE(1, 2)
+                    coefMeshALE(2, 2) = coefMeshALE(1, 1)
+                    coefMeshALE(3, 1) = coefMeshALE(1, 2)
+                    coefMeshALE(3, 2) = coefMeshALE(1, 1)
+                end if
+
+                nLevelsSet = 4
+
+            end select
 
-       ! Angle is already given in radIans. No need for a conversion.
+        end select
 
-       mult  = one
-       trans = zero
+        ! Set the rest of the coefficients to 0 if not enough states
+        ! in the past are available.
 
-    else if(angle == Degree) then
+        do nn = nLevelsSet, nOldLevels
+            coefTime(nn) = zero
+        end do
 
-       ! Angle is given in degrees. A multiplication must be performed.
+    end subroutine setCoefTimeIntegrator
 
-       mult  = pi/180.0_realType
-       trans = zero
+    function myNorm2(x)
+        use constants
+        implicit none
+        real(kind=realType), dimension(3), intent(in) :: x
+        real(kind=realType) :: myNorm2
+        myNorm2 = sqrt(x(1)**2 + x(2)**2 + x(3)**2)
+    end function myNorm2
 
-    else
+    function isWallType(bType)
 
-       call terminate("siAngle", &
-            "No idea how to convert this to SI units")
+        use constants
+        implicit none
+        integer(kind=intType) :: bType
+        logical :: isWallType
 
-    endif
+        isWallType = .False.
+        if (bType == NSWallAdiabatic .or. &
+            bType == NSWallIsoThermal .or. &
+            bType == EulerWall) then
+            isWallType = .True.
+        end if
 
-  end subroutine siAngle
+    end function isWallType
 
+    subroutine cross_prod(a, b, c)
 
-  subroutine siDensity(mass, len, mult, trans)
-    !
-    !       siDensity computes the conversion from the given density
-    !       unit, which can be constructed from mass and length, to the
-    !       SI-unit kg/m^3. The conversion will look like:
-    !       density in kg/m^3 = mult*(density in NCU) + trans.
-    !       NCU means non-christian units, i.e. everything that is not SI.
-    !
-    use constants
-    use su_cgns, only : Kilogram, meter
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, intent(in)              :: mass, len
-    real(kind=realType), intent(out) :: mult, trans
+        use precision
 
-    ! Determine the situation we are having here.
+        ! Inputs
+        real(kind=realType), dimension(3), intent(in) :: a, b
 
-    if(mass == Kilogram .and. len == Meter) then
+        ! Outputs
+        real(kind=realType), dimension(3), intent(out) :: c
 
-       ! Density is given in kg/m^3, i.e. no need for a conversion.
+        c(1) = a(2)*b(3) - a(3)*b(2)
+        c(2) = a(3)*b(1) - a(1)*b(3)
+        c(3) = a(1)*b(2) - a(2)*b(1)
 
-       mult  = one
-       trans = zero
+    end subroutine cross_prod
 
-    else
+    subroutine siAngle(angle, mult, trans)
 
-       call terminate("siDensity", &
-            "No idea how to convert this to SI units")
+        use constants
+        use su_cgns, only: Radian, Degree
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, intent(in)              :: angle
+        real(kind=realType), intent(out) :: mult, trans
 
-    endif
+        ! Determine the situation we are having here.
 
-  end subroutine siDensity
+        if (angle == Radian) then
 
-  subroutine siLen(len, mult, trans)
-    !
-    !       siLen computes the conversion from the given length unit to
-    !       the SI-unit meter. The conversion will look like:
-    !       length in meter = mult*(length in NCU) + trans.
-    !       NCU means non-christian units, i.e. everything that is not SI.
-    !
-    use constants
-    use su_cgns, only: Meter, Centimeter, millimeter, Foot, Inch
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, intent(in)              :: len
-    real(kind=realType), intent(out) :: mult, trans
+            ! Angle is already given in radIans. No need for a conversion.
 
-    ! Determine the situation we are having here.
+            mult = one
+            trans = zero
 
-    select case (len)
+        else if (angle == Degree) then
 
-    case (Meter)
-       mult = one; trans = zero
+            ! Angle is given in degrees. A multiplication must be performed.
 
-    case (CenTimeter)
-       mult = 0.01_realType; trans = zero
+            mult = pi/180.0_realType
+            trans = zero
 
-    case (Millimeter)
-       mult = 0.001_realType; trans = zero
+        else
 
-    case (Foot)
-       mult = 0.3048_realType; trans = zero
+            call terminate("siAngle", &
+                           "No idea how to convert this to SI units")
 
-    case (Inch)
-       mult = 0.0254_realType; trans = zero
+        end if
 
-    case default
-       call terminate("siLen", &
-            "No idea how to convert this to SI units")
+    end subroutine siAngle
 
-    end select
+    subroutine siDensity(mass, len, mult, trans)
+        !
+        !       siDensity computes the conversion from the given density
+        !       unit, which can be constructed from mass and length, to the
+        !       SI-unit kg/m^3. The conversion will look like:
+        !       density in kg/m^3 = mult*(density in NCU) + trans.
+        !       NCU means non-christian units, i.e. everything that is not SI.
+        !
+        use constants
+        use su_cgns, only: Kilogram, meter
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, intent(in)              :: mass, len
+        real(kind=realType), intent(out) :: mult, trans
 
-  end subroutine siLen
+        ! Determine the situation we are having here.
 
-  subroutine siPressure(mass, len, time, mult, trans)
-    !
-    !       siPressure computes the conversion from the given pressure
-    !       unit, which can be constructed from mass, length and time, to
-    !       the SI-unit Pa. The conversion will look like:
-    !       pressure in Pa = mult*(pressure in NCU) + trans.
-    !       NCU means non-christian units, i.e. everything that is not SI.
-    !
-    use constants
-    use su_cgns, only : Kilogram, Meter, Second
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, intent(in)              :: mass, len, time
-    real(kind=realType), intent(out) :: mult, trans
+        if (mass == Kilogram .and. len == Meter) then
 
-    ! Determine the situation we are having here.
+            ! Density is given in kg/m^3, i.e. no need for a conversion.
 
-    if(mass == Kilogram .and. len == Meter .and. time == Second) then
+            mult = one
+            trans = zero
 
-       ! Pressure is given in Pa, i.e. no need for a conversion.
+        else
 
-       mult  = one
-       trans = zero
+            call terminate("siDensity", &
+                           "No idea how to convert this to SI units")
 
-    else
+        end if
 
-       call terminate("siPressure", &
-            "No idea how to convert this to SI units")
+    end subroutine siDensity
 
-    endif
+    subroutine siLen(len, mult, trans)
+        !
+        !       siLen computes the conversion from the given length unit to
+        !       the SI-unit meter. The conversion will look like:
+        !       length in meter = mult*(length in NCU) + trans.
+        !       NCU means non-christian units, i.e. everything that is not SI.
+        !
+        use constants
+        use su_cgns, only: Meter, Centimeter, millimeter, Foot, Inch
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, intent(in)              :: len
+        real(kind=realType), intent(out) :: mult, trans
 
-  end subroutine siPressure
+        ! Determine the situation we are having here.
 
-  subroutine siTemperature(temp, mult, trans)
-    !
-    !       siTemperature computes the conversion from the given
-    !       temperature unit to the SI-unit kelvin. The conversion will
-    !       look like:
-    !       temperature in K = mult*(temperature in NCU) + trans.
-    !       NCU means non-christian units, i.e. everything that is not SI.
-    !
-    use constants
-    use su_cgns, only : Kelvin, Celsius, Rankine, Fahrenheit
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, intent(in)              :: temp
-    real(kind=realType), intent(out) :: mult, trans
+        select case (len)
 
-    ! Determine the situation we are having here.
+        case (Meter)
+            mult = one; trans = zero
 
-    select case (temp)
+        case (CenTimeter)
+            mult = 0.01_realType; trans = zero
 
-    case (Kelvin)
+        case (Millimeter)
+            mult = 0.001_realType; trans = zero
 
-       ! Temperature is already given in Kelvin. No need to convert.
+        case (Foot)
+            mult = 0.3048_realType; trans = zero
 
-       mult  = one
-       trans = zero
+        case (Inch)
+            mult = 0.0254_realType; trans = zero
 
-    case (Celsius)      ! is it Celcius or Celsius?
+        case default
+            call terminate("siLen", &
+                           "No idea how to convert this to SI units")
 
-       ! Temperature is in Celsius. Only an offset must be applied.
+        end select
 
-       mult  = one
-       trans = 273.16_realType
+    end subroutine siLen
 
-    case (Rankine)
+    subroutine siPressure(mass, len, time, mult, trans)
+        !
+        !       siPressure computes the conversion from the given pressure
+        !       unit, which can be constructed from mass, length and time, to
+        !       the SI-unit Pa. The conversion will look like:
+        !       pressure in Pa = mult*(pressure in NCU) + trans.
+        !       NCU means non-christian units, i.e. everything that is not SI.
+        !
+        use constants
+        use su_cgns, only: Kilogram, Meter, Second
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, intent(in)              :: mass, len, time
+        real(kind=realType), intent(out) :: mult, trans
 
-       ! Temperature is in Rankine. Only a multiplication needs to
-       ! be performed.
+        ! Determine the situation we are having here.
 
-       mult  = 5.0_realType/9.0_realType
-       trans = zero
+        if (mass == Kilogram .and. len == Meter .and. time == Second) then
 
-    case (Fahrenheit)
+            ! Pressure is given in Pa, i.e. no need for a conversion.
 
-       ! Temperature is in Fahrenheit. Both a multiplication and an
-       ! offset must be applied.
+            mult = one
+            trans = zero
 
-       mult  = 5.0_realType/9.0_realType
-       trans = 255.382
+        else
 
-    case default
+            call terminate("siPressure", &
+                           "No idea how to convert this to SI units")
 
-       ! Unknown temperature unit.
+        end if
 
-       call terminate("siTemperature", &
-            "No idea how to convert this to SI units")
+    end subroutine siPressure
 
-    end select
+    subroutine siTemperature(temp, mult, trans)
+        !
+        !       siTemperature computes the conversion from the given
+        !       temperature unit to the SI-unit kelvin. The conversion will
+        !       look like:
+        !       temperature in K = mult*(temperature in NCU) + trans.
+        !       NCU means non-christian units, i.e. everything that is not SI.
+        !
+        use constants
+        use su_cgns, only: Kelvin, Celsius, Rankine, Fahrenheit
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, intent(in)              :: temp
+        real(kind=realType), intent(out) :: mult, trans
 
-  end subroutine siTemperature
-  subroutine siTurb(mass, len, time, temp, turbName, mult, trans)
-    !
-    !       siTurb computes the conversion from the given turbulence
-    !       unit, which can be constructed from mass, len, time and temp,
-    !       to the SI-unit for the given variable. The conversion will
-    !       look like: var in SI = mult*(var in NCU) + trans.
-    !       NCU means non-christian units, i.e. everything that is not SI.
-    !
-    use constants
-    use su_cgns, only : Kilogram, Meter, Second, Kelvin
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, intent(in)              :: mass, len, time, temp
-    character(len=*), intent(in)     :: turbName
-    real(kind=realType), intent(out) :: mult, trans
+        ! Determine the situation we are having here.
 
-    ! Determine the situation we are having here.
+        select case (temp)
 
-    if(mass == Kilogram .and. len  == Meter .and. &
-         time == Second   .and. temp == Kelvin) then
+        case (Kelvin)
 
-       ! Everthing is already in SI units. No conversion needed.
+            ! Temperature is already given in Kelvin. No need to convert.
 
-       mult  = one
-       trans = zero
+            mult = one
+            trans = zero
 
-    else
+        case (Celsius)      ! is it Celcius or Celsius?
 
-       call terminate("siTurb", &
-            "No idea how to convert this to SI units")
+            ! Temperature is in Celsius. Only an offset must be applied.
 
-    endif
+            mult = one
+            trans = 273.16_realType
 
-  end subroutine siTurb
+        case (Rankine)
 
-  subroutine siVelocity(length, time, mult, trans)
-    !
-    !       siVelocity computes the conversion from the given velocity
-    !       unit, which can be constructed from length and time, to the
-    !       SI-unit m/s. The conversion will look like:
-    !       velocity in m/s = mult*(velocity in ncu) + trans.
-    !       Ncu means non-christian units, i.e. everything that is not SI.
-    !
-    use constants
-    use su_cgns, only : Meter, CentiMeter, Millimeter, Foot, Inch, Second
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, intent(in)              :: length, time
-    real(kind=realType), intent(out) :: mult, trans
+            ! Temperature is in Rankine. Only a multiplication needs to
+            ! be performed.
 
-    ! Determine the situation we are having here.
-    ! First the length.
+            mult = 5.0_realType/9.0_realType
+            trans = zero
 
-    select case (length)
+        case (Fahrenheit)
 
-    case (Meter)
-       mult = one; trans = zero
+            ! Temperature is in Fahrenheit. Both a multiplication and an
+            ! offset must be applied.
 
-    case (CenTimeter)
-       mult = 0.01_realType; trans = zero
+            mult = 5.0_realType/9.0_realType
+            trans = 255.382
 
-    case (Millimeter)
-       mult = 0.001_realType; trans = zero
+        case default
 
-    case (Foot)
-       mult = 0.3048_realType; trans = zero
+            ! Unknown temperature unit.
 
-    case (Inch)
-       mult = 0.0254_realType; trans = zero
+            call terminate("siTemperature", &
+                           "No idea how to convert this to SI units")
 
-    case default
-       call terminate("siVelocity", &
-            "No idea how to convert this length to SI units")
+        end select
 
-    end select
+    end subroutine siTemperature
+    subroutine siTurb(mass, len, time, temp, turbName, mult, trans)
+        !
+        !       siTurb computes the conversion from the given turbulence
+        !       unit, which can be constructed from mass, len, time and temp,
+        !       to the SI-unit for the given variable. The conversion will
+        !       look like: var in SI = mult*(var in NCU) + trans.
+        !       NCU means non-christian units, i.e. everything that is not SI.
+        !
+        use constants
+        use su_cgns, only: Kilogram, Meter, Second, Kelvin
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, intent(in)              :: mass, len, time, temp
+        character(len=*), intent(in)     :: turbName
+        real(kind=realType), intent(out) :: mult, trans
+
+        ! Determine the situation we are having here.
+
+        if (mass == Kilogram .and. len == Meter .and. &
+            time == Second .and. temp == Kelvin) then
+
+            ! Everthing is already in SI units. No conversion needed.
+
+            mult = one
+            trans = zero
+
+        else
+
+            call terminate("siTurb", &
+                           "No idea how to convert this to SI units")
+
+        end if
+
+    end subroutine siTurb
+
+    subroutine siVelocity(length, time, mult, trans)
+        !
+        !       siVelocity computes the conversion from the given velocity
+        !       unit, which can be constructed from length and time, to the
+        !       SI-unit m/s. The conversion will look like:
+        !       velocity in m/s = mult*(velocity in ncu) + trans.
+        !       Ncu means non-christian units, i.e. everything that is not SI.
+        !
+        use constants
+        use su_cgns, only: Meter, CentiMeter, Millimeter, Foot, Inch, Second
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, intent(in)              :: length, time
+        real(kind=realType), intent(out) :: mult, trans
+
+        ! Determine the situation we are having here.
+        ! First the length.
+
+        select case (length)
+
+        case (Meter)
+            mult = one; trans = zero
+
+        case (CenTimeter)
+            mult = 0.01_realType; trans = zero
+
+        case (Millimeter)
+            mult = 0.001_realType; trans = zero
+
+        case (Foot)
+            mult = 0.3048_realType; trans = zero
+
+        case (Inch)
+            mult = 0.0254_realType; trans = zero
+
+        case default
+            call terminate("siVelocity", &
+                           "No idea how to convert this length to SI units")
 
-    ! And the time.
+        end select
 
-    select case (time)
+        ! And the time.
 
-    case (Second)
-       mult = mult
+        select case (time)
 
-    case default
-       call terminate("siVelocity", &
-            "No idea how to convert this time to SI units")
+        case (Second)
+            mult = mult
 
-    end select
+        case default
+            call terminate("siVelocity", &
+                           "No idea how to convert this time to SI units")
 
-  end subroutine siVelocity
+        end select
 
+    end subroutine siVelocity
 
-  ! ----------------------------------------------------------------------
-  !                                                                      |
-  !                    No Tapenade Routine below this line               |
-  !                                                                      |
-  ! ----------------------------------------------------------------------
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 
 #ifndef  USE_TAPENADE
-  subroutine setbcpointers_d(nn, spatialpointers)
+    subroutine setbcpointers_d(nn, spatialpointers)
 !
 !       setbcpointers sets the pointers needed for the boundary
 !       condition treatment on a general face, such that the boundary
 !       routines are only implemented once instead of 6 times.
 !
-    use constants
-    use blockpointers, only : w, wd, p, pd, rlv, rlvd, rev, revd, &
-&   gamma, x, xd, d2wall, d2walld, si, sid, sj, sjd, sk, skd, s, sd, &
-&   globalcell, bcdata, bcdatad, nx, il, ie, ib, ny, jl, je, jb, nz, kl,&
-&   ke, kb, bcfaceid, addgridvelocities, sfacei, sfaceid, sfacej, &
-&   sfacejd, sfacek, sfacekd, addgridvelocities
-    use bcpointers_d, only : ww0, ww0d, ww1, ww1d, ww2, ww2d, ww3, ww3d,&
-&   pp0, pp0d, pp1, pp1d, pp2, pp2d, pp3, pp3d, rlv0, rlv0d, rlv1, rlv1d&
-&   , rlv2, rlv2d, rlv3, rlv3d, rev0, rev0d, rev1, rev1d, rev2, rev2d, &
-&   rev3, rev3d, gamma0, gamma1, gamma2, gamma3, gcp, xx, xxd, ss, ssd, &
-&   ssi, ssid, ssj, ssjd, ssk, sskd, dd2wall, sface, istart, iend, &
-&   jstart, jend, isize, jsize
-    use inputphysics, only : cpmodel, equations
-    implicit none
+        use constants
+        use blockpointers, only: w, wd, p, pd, rlv, rlvd, rev, revd, &
+    &   gamma, x, xd, d2wall, d2walld, si, sid, sj, sjd, sk, skd, s, sd, &
+    &   globalcell, bcdata, bcdatad, nx, il, ie, ib, ny, jl, je, jb, nz, kl,&
+    &   ke, kb, bcfaceid, addgridvelocities, sfacei, sfaceid, sfacej, &
+    &   sfacejd, sfacek, sfacekd, addgridvelocities
+        use bcpointers_d, only: ww0, ww0d, ww1, ww1d, ww2, ww2d, ww3, ww3d,&
+    &   pp0, pp0d, pp1, pp1d, pp2, pp2d, pp3, pp3d, rlv0, rlv0d, rlv1, rlv1d&
+    &   , rlv2, rlv2d, rlv3, rlv3d, rev0, rev0d, rev1, rev1d, rev2, rev2d, &
+    &   rev3, rev3d, gamma0, gamma1, gamma2, gamma3, gcp, xx, xxd, ss, ssd, &
+    &   ssi, ssid, ssj, ssjd, ssk, sskd, dd2wall, sface, istart, iend, &
+    &   jstart, jend, isize, jsize
+        use inputphysics, only: cpmodel, equations
+        implicit none
 ! subroutine arguments.
-    integer(kind=inttype), intent(in) :: nn
-    logical, intent(in) :: spatialpointers
+        integer(kind=inttype), intent(in) :: nn
+        logical, intent(in) :: spatialpointers
 ! determine the sizes of each face and point to just the range we
 ! need on each face.
-    istart = bcdata(nn)%icbeg
-    iend = bcdata(nn)%icend
-    jstart = bcdata(nn)%jcbeg
-    jend = bcdata(nn)%jcend
+        istart = bcdata(nn)%icbeg
+        iend = bcdata(nn)%icend
+        jstart = bcdata(nn)%jcbeg
+        jend = bcdata(nn)%jcend
 ! set the size of the subface
-    isize = iend - istart + 1
-    jsize = jend - jstart + 1
+        isize = iend - istart + 1
+        jsize = jend - jstart + 1
 ! determine the face id on which the subface is located and set
 ! the pointers accordinly.
-    select case  (bcfaceid(nn))
-    case (imin)
-!---------------------------------------------------------------------------
-      ww3d => wd(3, 1:, 1:, :)
-      ww3 => w(3, 1:, 1:, :)
-      ww2d => wd(2, 1:, 1:, :)
-      ww2 => w(2, 1:, 1:, :)
-      ww1d => wd(1, 1:, 1:, :)
-      ww1 => w(1, 1:, 1:, :)
-      ww0d => wd(0, 1:, 1:, :)
-      ww0 => w(0, 1:, 1:, :)
-      pp3d => pd(3, 1:, 1:)
-      pp3 => p(3, 1:, 1:)
-      pp2d => pd(2, 1:, 1:)
-      pp2 => p(2, 1:, 1:)
-      pp1d => pd(1, 1:, 1:)
-      pp1 => p(1, 1:, 1:)
-      pp0d => pd(0, 1:, 1:)
-      pp0 => p(0, 1:, 1:)
-      rlv3d => rlvd(3, 1:, 1:)
-      rlv3 => rlv(3, 1:, 1:)
-      rlv2d => rlvd(2, 1:, 1:)
-      rlv2 => rlv(2, 1:, 1:)
-      rlv1d => rlvd(1, 1:, 1:)
-      rlv1 => rlv(1, 1:, 1:)
-      rlv0d => rlvd(0, 1:, 1:)
-      rlv0 => rlv(0, 1:, 1:)
-      rev3d => revd(3, 1:, 1:)
-      rev3 => rev(3, 1:, 1:)
-      rev2d => revd(2, 1:, 1:)
-      rev2 => rev(2, 1:, 1:)
-      rev1d => revd(1, 1:, 1:)
-      rev1 => rev(1, 1:, 1:)
-      rev0d => revd(0, 1:, 1:)
-      rev0 => rev(0, 1:, 1:)
-      gamma3 => gamma(3, 1:, 1:)
-      gamma2 => gamma(2, 1:, 1:)
-      gamma1 => gamma(1, 1:, 1:)
-      gamma0 => gamma(0, 1:, 1:)
-      gcp => globalcell(2, 1:, 1:)
-    case (imax)
-!---------------------------------------------------------------------------
-      ww3d => wd(nx, 1:, 1:, :)
-      ww3 => w(nx, 1:, 1:, :)
-      ww2d => wd(il, 1:, 1:, :)
-      ww2 => w(il, 1:, 1:, :)
-      ww1d => wd(ie, 1:, 1:, :)
-      ww1 => w(ie, 1:, 1:, :)
-      ww0d => wd(ib, 1:, 1:, :)
-      ww0 => w(ib, 1:, 1:, :)
-      pp3d => pd(nx, 1:, 1:)
-      pp3 => p(nx, 1:, 1:)
-      pp2d => pd(il, 1:, 1:)
-      pp2 => p(il, 1:, 1:)
-      pp1d => pd(ie, 1:, 1:)
-      pp1 => p(ie, 1:, 1:)
-      pp0d => pd(ib, 1:, 1:)
-      pp0 => p(ib, 1:, 1:)
-      rlv3d => rlvd(nx, 1:, 1:)
-      rlv3 => rlv(nx, 1:, 1:)
-      rlv2d => rlvd(il, 1:, 1:)
-      rlv2 => rlv(il, 1:, 1:)
-      rlv1d => rlvd(ie, 1:, 1:)
-      rlv1 => rlv(ie, 1:, 1:)
-      rlv0d => rlvd(ib, 1:, 1:)
-      rlv0 => rlv(ib, 1:, 1:)
-      rev3d => revd(nx, 1:, 1:)
-      rev3 => rev(nx, 1:, 1:)
-      rev2d => revd(il, 1:, 1:)
-      rev2 => rev(il, 1:, 1:)
-      rev1d => revd(ie, 1:, 1:)
-      rev1 => rev(ie, 1:, 1:)
-      rev0d => revd(ib, 1:, 1:)
-      rev0 => rev(ib, 1:, 1:)
-      gamma3 => gamma(nx, 1:, 1:)
-      gamma2 => gamma(il, 1:, 1:)
-      gamma1 => gamma(ie, 1:, 1:)
-      gamma0 => gamma(ib, 1:, 1:)
-      gcp => globalcell(il, 1:, 1:)
-    case (jmin)
-!---------------------------------------------------------------------------
-      ww3d => wd(1:, 3, 1:, :)
-      ww3 => w(1:, 3, 1:, :)
-      ww2d => wd(1:, 2, 1:, :)
-      ww2 => w(1:, 2, 1:, :)
-      ww1d => wd(1:, 1, 1:, :)
-      ww1 => w(1:, 1, 1:, :)
-      ww0d => wd(1:, 0, 1:, :)
-      ww0 => w(1:, 0, 1:, :)
-      pp3d => pd(1:, 3, 1:)
-      pp3 => p(1:, 3, 1:)
-      pp2d => pd(1:, 2, 1:)
-      pp2 => p(1:, 2, 1:)
-      pp1d => pd(1:, 1, 1:)
-      pp1 => p(1:, 1, 1:)
-      pp0d => pd(1:, 0, 1:)
-      pp0 => p(1:, 0, 1:)
-      rlv3d => rlvd(1:, 3, 1:)
-      rlv3 => rlv(1:, 3, 1:)
-      rlv2d => rlvd(1:, 2, 1:)
-      rlv2 => rlv(1:, 2, 1:)
-      rlv1d => rlvd(1:, 1, 1:)
-      rlv1 => rlv(1:, 1, 1:)
-      rlv0d => rlvd(1:, 0, 1:)
-      rlv0 => rlv(1:, 0, 1:)
-      rev3d => revd(1:, 3, 1:)
-      rev3 => rev(1:, 3, 1:)
-      rev2d => revd(1:, 2, 1:)
-      rev2 => rev(1:, 2, 1:)
-      rev1d => revd(1:, 1, 1:)
-      rev1 => rev(1:, 1, 1:)
-      rev0d => revd(1:, 0, 1:)
-      rev0 => rev(1:, 0, 1:)
-      gamma3 => gamma(1:, 3, 1:)
-      gamma2 => gamma(1:, 2, 1:)
-      gamma1 => gamma(1:, 1, 1:)
-      gamma0 => gamma(1:, 0, 1:)
-      gcp => globalcell(1:, 2, 1:)
-    case (jmax)
-!---------------------------------------------------------------------------
-      ww3d => wd(1:, ny, 1:, :)
-      ww3 => w(1:, ny, 1:, :)
-      ww2d => wd(1:, jl, 1:, :)
-      ww2 => w(1:, jl, 1:, :)
-      ww1d => wd(1:, je, 1:, :)
-      ww1 => w(1:, je, 1:, :)
-      ww0d => wd(1:, jb, 1:, :)
-      ww0 => w(1:, jb, 1:, :)
-      pp3d => pd(1:, ny, 1:)
-      pp3 => p(1:, ny, 1:)
-      pp2d => pd(1:, jl, 1:)
-      pp2 => p(1:, jl, 1:)
-      pp1d => pd(1:, je, 1:)
-      pp1 => p(1:, je, 1:)
-      pp0d => pd(1:, jb, 1:)
-      pp0 => p(1:, jb, 1:)
-      rlv3d => rlvd(1:, ny, 1:)
-      rlv3 => rlv(1:, ny, 1:)
-      rlv2d => rlvd(1:, jl, 1:)
-      rlv2 => rlv(1:, jl, 1:)
-      rlv1d => rlvd(1:, je, 1:)
-      rlv1 => rlv(1:, je, 1:)
-      rlv0d => rlvd(1:, jb, 1:)
-      rlv0 => rlv(1:, jb, 1:)
-      rev3d => revd(1:, ny, 1:)
-      rev3 => rev(1:, ny, 1:)
-      rev2d => revd(1:, jl, 1:)
-      rev2 => rev(1:, jl, 1:)
-      rev1d => revd(1:, je, 1:)
-      rev1 => rev(1:, je, 1:)
-      rev0d => revd(1:, jb, 1:)
-      rev0 => rev(1:, jb, 1:)
-      gamma3 => gamma(1:, ny, 1:)
-      gamma2 => gamma(1:, jl, 1:)
-      gamma1 => gamma(1:, je, 1:)
-      gamma0 => gamma(1:, jb, 1:)
-      gcp => globalcell(1:, jl, 1:)
-    case (kmin)
-!---------------------------------------------------------------------------
-      ww3d => wd(1:, 1:, 3, :)
-      ww3 => w(1:, 1:, 3, :)
-      ww2d => wd(1:, 1:, 2, :)
-      ww2 => w(1:, 1:, 2, :)
-      ww1d => wd(1:, 1:, 1, :)
-      ww1 => w(1:, 1:, 1, :)
-      ww0d => wd(1:, 1:, 0, :)
-      ww0 => w(1:, 1:, 0, :)
-      pp3d => pd(1:, 1:, 3)
-      pp3 => p(1:, 1:, 3)
-      pp2d => pd(1:, 1:, 2)
-      pp2 => p(1:, 1:, 2)
-      pp1d => pd(1:, 1:, 1)
-      pp1 => p(1:, 1:, 1)
-      pp0d => pd(1:, 1:, 0)
-      pp0 => p(1:, 1:, 0)
-      rlv3d => rlvd(1:, 1:, 3)
-      rlv3 => rlv(1:, 1:, 3)
-      rlv2d => rlvd(1:, 1:, 2)
-      rlv2 => rlv(1:, 1:, 2)
-      rlv1d => rlvd(1:, 1:, 1)
-      rlv1 => rlv(1:, 1:, 1)
-      rlv0d => rlvd(1:, 1:, 0)
-      rlv0 => rlv(1:, 1:, 0)
-      rev3d => revd(1:, 1:, 3)
-      rev3 => rev(1:, 1:, 3)
-      rev2d => revd(1:, 1:, 2)
-      rev2 => rev(1:, 1:, 2)
-      rev1d => revd(1:, 1:, 1)
-      rev1 => rev(1:, 1:, 1)
-      rev0d => revd(1:, 1:, 0)
-      rev0 => rev(1:, 1:, 0)
-      gamma3 => gamma(1:, 1:, 3)
-      gamma2 => gamma(1:, 1:, 2)
-      gamma1 => gamma(1:, 1:, 1)
-      gamma0 => gamma(1:, 1:, 0)
-      gcp => globalcell(1:, 1:, 2)
-    case (kmax)
-!---------------------------------------------------------------------------
-      ww3d => wd(1:, 1:, nz, :)
-      ww3 => w(1:, 1:, nz, :)
-      ww2d => wd(1:, 1:, kl, :)
-      ww2 => w(1:, 1:, kl, :)
-      ww1d => wd(1:, 1:, ke, :)
-      ww1 => w(1:, 1:, ke, :)
-      ww0d => wd(1:, 1:, kb, :)
-      ww0 => w(1:, 1:, kb, :)
-      pp3d => pd(1:, 1:, nz)
-      pp3 => p(1:, 1:, nz)
-      pp2d => pd(1:, 1:, kl)
-      pp2 => p(1:, 1:, kl)
-      pp1d => pd(1:, 1:, ke)
-      pp1 => p(1:, 1:, ke)
-      pp0d => pd(1:, 1:, kb)
-      pp0 => p(1:, 1:, kb)
-      rlv3d => rlvd(1:, 1:, nz)
-      rlv3 => rlv(1:, 1:, nz)
-      rlv2d => rlvd(1:, 1:, kl)
-      rlv2 => rlv(1:, 1:, kl)
-      rlv1d => rlvd(1:, 1:, ke)
-      rlv1 => rlv(1:, 1:, ke)
-      rlv0d => rlvd(1:, 1:, kb)
-      rlv0 => rlv(1:, 1:, kb)
-      rev3d => revd(1:, 1:, nz)
-      rev3 => rev(1:, 1:, nz)
-      rev2d => revd(1:, 1:, kl)
-      rev2 => rev(1:, 1:, kl)
-      rev1d => revd(1:, 1:, ke)
-      rev1 => rev(1:, 1:, ke)
-      rev0d => revd(1:, 1:, kb)
-      rev0 => rev(1:, 1:, kb)
-      gamma3 => gamma(1:, 1:, nz)
-      gamma2 => gamma(1:, 1:, kl)
-      gamma1 => gamma(1:, 1:, ke)
-      gamma0 => gamma(1:, 1:, kb)
-      gcp => globalcell(1:, 1:, kl)
-    end select
-    if (spatialpointers) then
-      select case  (bcfaceid(nn))
-      case (imin)
-        xxd => xd(1, :, :, :)
-        xx => x(1, :, :, :)
-        ssid => sid(1, :, :, :)
-        ssi => si(1, :, :, :)
-        ssjd => sjd(2, :, :, :)
-        ssj => sj(2, :, :, :)
-        sskd => skd(2, :, :, :)
-        ssk => sk(2, :, :, :)
-        ssd => sd(2, :, :, :)
-        ss => s(2, :, :, :)
-      case (imax)
-        xxd => xd(il, :, :, :)
-        xx => x(il, :, :, :)
-        ssid => sid(il, :, :, :)
-        ssi => si(il, :, :, :)
-        ssjd => sjd(il, :, :, :)
-        ssj => sj(il, :, :, :)
-        sskd => skd(il, :, :, :)
-        ssk => sk(il, :, :, :)
-        ssd => sd(il, :, :, :)
-        ss => s(il, :, :, :)
-      case (jmin)
-        xxd => xd(:, 1, :, :)
-        xx => x(:, 1, :, :)
-        ssid => sjd(:, 1, :, :)
-        ssi => sj(:, 1, :, :)
-        ssjd => sid(:, 2, :, :)
-        ssj => si(:, 2, :, :)
-        sskd => skd(:, 2, :, :)
-        ssk => sk(:, 2, :, :)
-        ssd => sd(:, 2, :, :)
-        ss => s(:, 2, :, :)
-      case (jmax)
-        xxd => xd(:, jl, :, :)
-        xx => x(:, jl, :, :)
-        ssid => sjd(:, jl, :, :)
-        ssi => sj(:, jl, :, :)
-        ssjd => sid(:, jl, :, :)
-        ssj => si(:, jl, :, :)
-        sskd => skd(:, jl, :, :)
-        ssk => sk(:, jl, :, :)
-        ssd => sd(:, jl, :, :)
-        ss => s(:, jl, :, :)
-      case (kmin)
-        xxd => xd(:, :, 1, :)
-        xx => x(:, :, 1, :)
-        ssid => skd(:, :, 1, :)
-        ssi => sk(:, :, 1, :)
-        ssjd => sid(:, :, 2, :)
-        ssj => si(:, :, 2, :)
-        sskd => sjd(:, :, 2, :)
-        ssk => sj(:, :, 2, :)
-        ssd => sd(:, :, 2, :)
-        ss => s(:, :, 2, :)
-      case (kmax)
-        xxd => xd(:, :, kl, :)
-        xx => x(:, :, kl, :)
-        ssid => skd(:, :, kl, :)
-        ssi => sk(:, :, kl, :)
-        ssjd => sid(:, :, kl, :)
-        ssj => si(:, :, kl, :)
-        sskd => sjd(:, :, kl, :)
-        ssk => sj(:, :, kl, :)
-        ssd => sd(:, :, kl, :)
-        ss => s(:, :, kl, :)
-      end select
-      if (addgridvelocities) then
-        select case  (bcfaceid(nn))
-        case (imin)
-          sface => sfacei(1, :, :)
-        case (imax)
-          sface => sfacei(il, :, :)
-        case (jmin)
-          sface => sfacej(:, 1, :)
-        case (jmax)
-          sface => sfacej(:, jl, :)
-        case (kmin)
-          sface => sfacek(:, :, 1)
-        case (kmax)
-          sface => sfacek(:, :, kl)
-        end select
-      end if
-      if (equations .eq. ransequations) then
-        select case  (bcfaceid(nn))
+        select case (bcfaceid(nn))
         case (imin)
-          dd2wall => d2wall(2, :, :)
+!---------------------------------------------------------------------------
+            ww3d => wd(3, 1:, 1:, :)
+            ww3 => w(3, 1:, 1:, :)
+            ww2d => wd(2, 1:, 1:, :)
+            ww2 => w(2, 1:, 1:, :)
+            ww1d => wd(1, 1:, 1:, :)
+            ww1 => w(1, 1:, 1:, :)
+            ww0d => wd(0, 1:, 1:, :)
+            ww0 => w(0, 1:, 1:, :)
+            pp3d => pd(3, 1:, 1:)
+            pp3 => p(3, 1:, 1:)
+            pp2d => pd(2, 1:, 1:)
+            pp2 => p(2, 1:, 1:)
+            pp1d => pd(1, 1:, 1:)
+            pp1 => p(1, 1:, 1:)
+            pp0d => pd(0, 1:, 1:)
+            pp0 => p(0, 1:, 1:)
+            rlv3d => rlvd(3, 1:, 1:)
+            rlv3 => rlv(3, 1:, 1:)
+            rlv2d => rlvd(2, 1:, 1:)
+            rlv2 => rlv(2, 1:, 1:)
+            rlv1d => rlvd(1, 1:, 1:)
+            rlv1 => rlv(1, 1:, 1:)
+            rlv0d => rlvd(0, 1:, 1:)
+            rlv0 => rlv(0, 1:, 1:)
+            rev3d => revd(3, 1:, 1:)
+            rev3 => rev(3, 1:, 1:)
+            rev2d => revd(2, 1:, 1:)
+            rev2 => rev(2, 1:, 1:)
+            rev1d => revd(1, 1:, 1:)
+            rev1 => rev(1, 1:, 1:)
+            rev0d => revd(0, 1:, 1:)
+            rev0 => rev(0, 1:, 1:)
+            gamma3 => gamma(3, 1:, 1:)
+            gamma2 => gamma(2, 1:, 1:)
+            gamma1 => gamma(1, 1:, 1:)
+            gamma0 => gamma(0, 1:, 1:)
+            gcp => globalcell(2, 1:, 1:)
         case (imax)
-          dd2wall => d2wall(il, :, :)
+!---------------------------------------------------------------------------
+            ww3d => wd(nx, 1:, 1:, :)
+            ww3 => w(nx, 1:, 1:, :)
+            ww2d => wd(il, 1:, 1:, :)
+            ww2 => w(il, 1:, 1:, :)
+            ww1d => wd(ie, 1:, 1:, :)
+            ww1 => w(ie, 1:, 1:, :)
+            ww0d => wd(ib, 1:, 1:, :)
+            ww0 => w(ib, 1:, 1:, :)
+            pp3d => pd(nx, 1:, 1:)
+            pp3 => p(nx, 1:, 1:)
+            pp2d => pd(il, 1:, 1:)
+            pp2 => p(il, 1:, 1:)
+            pp1d => pd(ie, 1:, 1:)
+            pp1 => p(ie, 1:, 1:)
+            pp0d => pd(ib, 1:, 1:)
+            pp0 => p(ib, 1:, 1:)
+            rlv3d => rlvd(nx, 1:, 1:)
+            rlv3 => rlv(nx, 1:, 1:)
+            rlv2d => rlvd(il, 1:, 1:)
+            rlv2 => rlv(il, 1:, 1:)
+            rlv1d => rlvd(ie, 1:, 1:)
+            rlv1 => rlv(ie, 1:, 1:)
+            rlv0d => rlvd(ib, 1:, 1:)
+            rlv0 => rlv(ib, 1:, 1:)
+            rev3d => revd(nx, 1:, 1:)
+            rev3 => rev(nx, 1:, 1:)
+            rev2d => revd(il, 1:, 1:)
+            rev2 => rev(il, 1:, 1:)
+            rev1d => revd(ie, 1:, 1:)
+            rev1 => rev(ie, 1:, 1:)
+            rev0d => revd(ib, 1:, 1:)
+            rev0 => rev(ib, 1:, 1:)
+            gamma3 => gamma(nx, 1:, 1:)
+            gamma2 => gamma(il, 1:, 1:)
+            gamma1 => gamma(ie, 1:, 1:)
+            gamma0 => gamma(ib, 1:, 1:)
+            gcp => globalcell(il, 1:, 1:)
         case (jmin)
-          dd2wall => d2wall(:, 2, :)
+!---------------------------------------------------------------------------
+            ww3d => wd(1:, 3, 1:, :)
+            ww3 => w(1:, 3, 1:, :)
+            ww2d => wd(1:, 2, 1:, :)
+            ww2 => w(1:, 2, 1:, :)
+            ww1d => wd(1:, 1, 1:, :)
+            ww1 => w(1:, 1, 1:, :)
+            ww0d => wd(1:, 0, 1:, :)
+            ww0 => w(1:, 0, 1:, :)
+            pp3d => pd(1:, 3, 1:)
+            pp3 => p(1:, 3, 1:)
+            pp2d => pd(1:, 2, 1:)
+            pp2 => p(1:, 2, 1:)
+            pp1d => pd(1:, 1, 1:)
+            pp1 => p(1:, 1, 1:)
+            pp0d => pd(1:, 0, 1:)
+            pp0 => p(1:, 0, 1:)
+            rlv3d => rlvd(1:, 3, 1:)
+            rlv3 => rlv(1:, 3, 1:)
+            rlv2d => rlvd(1:, 2, 1:)
+            rlv2 => rlv(1:, 2, 1:)
+            rlv1d => rlvd(1:, 1, 1:)
+            rlv1 => rlv(1:, 1, 1:)
+            rlv0d => rlvd(1:, 0, 1:)
+            rlv0 => rlv(1:, 0, 1:)
+            rev3d => revd(1:, 3, 1:)
+            rev3 => rev(1:, 3, 1:)
+            rev2d => revd(1:, 2, 1:)
+            rev2 => rev(1:, 2, 1:)
+            rev1d => revd(1:, 1, 1:)
+            rev1 => rev(1:, 1, 1:)
+            rev0d => revd(1:, 0, 1:)
+            rev0 => rev(1:, 0, 1:)
+            gamma3 => gamma(1:, 3, 1:)
+            gamma2 => gamma(1:, 2, 1:)
+            gamma1 => gamma(1:, 1, 1:)
+            gamma0 => gamma(1:, 0, 1:)
+            gcp => globalcell(1:, 2, 1:)
         case (jmax)
-          dd2wall => d2wall(:, jl, :)
+!---------------------------------------------------------------------------
+            ww3d => wd(1:, ny, 1:, :)
+            ww3 => w(1:, ny, 1:, :)
+            ww2d => wd(1:, jl, 1:, :)
+            ww2 => w(1:, jl, 1:, :)
+            ww1d => wd(1:, je, 1:, :)
+            ww1 => w(1:, je, 1:, :)
+            ww0d => wd(1:, jb, 1:, :)
+            ww0 => w(1:, jb, 1:, :)
+            pp3d => pd(1:, ny, 1:)
+            pp3 => p(1:, ny, 1:)
+            pp2d => pd(1:, jl, 1:)
+            pp2 => p(1:, jl, 1:)
+            pp1d => pd(1:, je, 1:)
+            pp1 => p(1:, je, 1:)
+            pp0d => pd(1:, jb, 1:)
+            pp0 => p(1:, jb, 1:)
+            rlv3d => rlvd(1:, ny, 1:)
+            rlv3 => rlv(1:, ny, 1:)
+            rlv2d => rlvd(1:, jl, 1:)
+            rlv2 => rlv(1:, jl, 1:)
+            rlv1d => rlvd(1:, je, 1:)
+            rlv1 => rlv(1:, je, 1:)
+            rlv0d => rlvd(1:, jb, 1:)
+            rlv0 => rlv(1:, jb, 1:)
+            rev3d => revd(1:, ny, 1:)
+            rev3 => rev(1:, ny, 1:)
+            rev2d => revd(1:, jl, 1:)
+            rev2 => rev(1:, jl, 1:)
+            rev1d => revd(1:, je, 1:)
+            rev1 => rev(1:, je, 1:)
+            rev0d => revd(1:, jb, 1:)
+            rev0 => rev(1:, jb, 1:)
+            gamma3 => gamma(1:, ny, 1:)
+            gamma2 => gamma(1:, jl, 1:)
+            gamma1 => gamma(1:, je, 1:)
+            gamma0 => gamma(1:, jb, 1:)
+            gcp => globalcell(1:, jl, 1:)
         case (kmin)
-          dd2wall => d2wall(:, :, 2)
+!---------------------------------------------------------------------------
+            ww3d => wd(1:, 1:, 3, :)
+            ww3 => w(1:, 1:, 3, :)
+            ww2d => wd(1:, 1:, 2, :)
+            ww2 => w(1:, 1:, 2, :)
+            ww1d => wd(1:, 1:, 1, :)
+            ww1 => w(1:, 1:, 1, :)
+            ww0d => wd(1:, 1:, 0, :)
+            ww0 => w(1:, 1:, 0, :)
+            pp3d => pd(1:, 1:, 3)
+            pp3 => p(1:, 1:, 3)
+            pp2d => pd(1:, 1:, 2)
+            pp2 => p(1:, 1:, 2)
+            pp1d => pd(1:, 1:, 1)
+            pp1 => p(1:, 1:, 1)
+            pp0d => pd(1:, 1:, 0)
+            pp0 => p(1:, 1:, 0)
+            rlv3d => rlvd(1:, 1:, 3)
+            rlv3 => rlv(1:, 1:, 3)
+            rlv2d => rlvd(1:, 1:, 2)
+            rlv2 => rlv(1:, 1:, 2)
+            rlv1d => rlvd(1:, 1:, 1)
+            rlv1 => rlv(1:, 1:, 1)
+            rlv0d => rlvd(1:, 1:, 0)
+            rlv0 => rlv(1:, 1:, 0)
+            rev3d => revd(1:, 1:, 3)
+            rev3 => rev(1:, 1:, 3)
+            rev2d => revd(1:, 1:, 2)
+            rev2 => rev(1:, 1:, 2)
+            rev1d => revd(1:, 1:, 1)
+            rev1 => rev(1:, 1:, 1)
+            rev0d => revd(1:, 1:, 0)
+            rev0 => rev(1:, 1:, 0)
+            gamma3 => gamma(1:, 1:, 3)
+            gamma2 => gamma(1:, 1:, 2)
+            gamma1 => gamma(1:, 1:, 1)
+            gamma0 => gamma(1:, 1:, 0)
+            gcp => globalcell(1:, 1:, 2)
         case (kmax)
-          dd2wall => d2wall(:, :, kl)
+!---------------------------------------------------------------------------
+            ww3d => wd(1:, 1:, nz, :)
+            ww3 => w(1:, 1:, nz, :)
+            ww2d => wd(1:, 1:, kl, :)
+            ww2 => w(1:, 1:, kl, :)
+            ww1d => wd(1:, 1:, ke, :)
+            ww1 => w(1:, 1:, ke, :)
+            ww0d => wd(1:, 1:, kb, :)
+            ww0 => w(1:, 1:, kb, :)
+            pp3d => pd(1:, 1:, nz)
+            pp3 => p(1:, 1:, nz)
+            pp2d => pd(1:, 1:, kl)
+            pp2 => p(1:, 1:, kl)
+            pp1d => pd(1:, 1:, ke)
+            pp1 => p(1:, 1:, ke)
+            pp0d => pd(1:, 1:, kb)
+            pp0 => p(1:, 1:, kb)
+            rlv3d => rlvd(1:, 1:, nz)
+            rlv3 => rlv(1:, 1:, nz)
+            rlv2d => rlvd(1:, 1:, kl)
+            rlv2 => rlv(1:, 1:, kl)
+            rlv1d => rlvd(1:, 1:, ke)
+            rlv1 => rlv(1:, 1:, ke)
+            rlv0d => rlvd(1:, 1:, kb)
+            rlv0 => rlv(1:, 1:, kb)
+            rev3d => revd(1:, 1:, nz)
+            rev3 => rev(1:, 1:, nz)
+            rev2d => revd(1:, 1:, kl)
+            rev2 => rev(1:, 1:, kl)
+            rev1d => revd(1:, 1:, ke)
+            rev1 => rev(1:, 1:, ke)
+            rev0d => revd(1:, 1:, kb)
+            rev0 => rev(1:, 1:, kb)
+            gamma3 => gamma(1:, 1:, nz)
+            gamma2 => gamma(1:, 1:, kl)
+            gamma1 => gamma(1:, 1:, ke)
+            gamma0 => gamma(1:, 1:, kb)
+            gcp => globalcell(1:, 1:, kl)
         end select
-      end if
-    end if
-  end subroutine setbcpointers_d
-
-  subroutine maxEddyv(eddyvisMax)
-    !
-    !       maxEddyv determines the maximum value of the eddy viscosity
-    !       ratio of the block given by the pointers in blockPointes.
-    !
-    use constants
-    use blockPointers, only : il, jl, kl, rlv, rev
-    use flowVarRefState, only : nwf, eddyModel
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    real(kind=realType), intent(out) :: eddyvisMax
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-
-    real(kind=realType) :: eddyvis
-
-    ! Initialize the maximum value to zero and return immediately if
-    ! not an eddy viscosity model is used.
-
-    eddyvisMax = zero
-    if(.not. eddyModel) return
-
-    ! Loop over the owned cells of this block.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the local viscosity ratio and take the maximum
-             ! with the currently stored value.
-
-             eddyvis = rev(i,j,k)/rlv(i,j,k)
-             eddyvisMax = max(eddyvisMax, eddyvis)
-
-          enddo
-       enddo
-    enddo
-
-  end subroutine maxEddyv
-
-  subroutine maxHdiffMach(hdiffMax, MachMax)
-    !
-    !       maxHdiffMach determines the maximum value of the Mach number
-    !       and total enthalpy (or better the relative total enthalpy
-    !       difference with the freestream).
-    !
-    use constants
-    use blockPointers, only : il, jl, kl, w, p, gamma
-    use flowVarRefState, only : pInfCorr, rhoInf, wInf
-    use monitor, only : monMachOrHMax
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    real(kind=realType), intent(out) :: hdiffMax, MachMax
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-
-    real(kind=realType) :: hdiff, hInf, Mach2
-
-    ! Initialize the maximum values to zero.
-
-    hdiffMax = zero
-    MachMax  = zero
+        if (spatialpointers) then
+            select case (bcfaceid(nn))
+            case (imin)
+                xxd => xd(1, :, :, :)
+                xx => x(1, :, :, :)
+                ssid => sid(1, :, :, :)
+                ssi => si(1, :, :, :)
+                ssjd => sjd(2, :, :, :)
+                ssj => sj(2, :, :, :)
+                sskd => skd(2, :, :, :)
+                ssk => sk(2, :, :, :)
+                ssd => sd(2, :, :, :)
+                ss => s(2, :, :, :)
+            case (imax)
+                xxd => xd(il, :, :, :)
+                xx => x(il, :, :, :)
+                ssid => sid(il, :, :, :)
+                ssi => si(il, :, :, :)
+                ssjd => sjd(il, :, :, :)
+                ssj => sj(il, :, :, :)
+                sskd => skd(il, :, :, :)
+                ssk => sk(il, :, :, :)
+                ssd => sd(il, :, :, :)
+                ss => s(il, :, :, :)
+            case (jmin)
+                xxd => xd(:, 1, :, :)
+                xx => x(:, 1, :, :)
+                ssid => sjd(:, 1, :, :)
+                ssi => sj(:, 1, :, :)
+                ssjd => sid(:, 2, :, :)
+                ssj => si(:, 2, :, :)
+                sskd => skd(:, 2, :, :)
+                ssk => sk(:, 2, :, :)
+                ssd => sd(:, 2, :, :)
+                ss => s(:, 2, :, :)
+            case (jmax)
+                xxd => xd(:, jl, :, :)
+                xx => x(:, jl, :, :)
+                ssid => sjd(:, jl, :, :)
+                ssi => sj(:, jl, :, :)
+                ssjd => sid(:, jl, :, :)
+                ssj => si(:, jl, :, :)
+                sskd => skd(:, jl, :, :)
+                ssk => sk(:, jl, :, :)
+                ssd => sd(:, jl, :, :)
+                ss => s(:, jl, :, :)
+            case (kmin)
+                xxd => xd(:, :, 1, :)
+                xx => x(:, :, 1, :)
+                ssid => skd(:, :, 1, :)
+                ssi => sk(:, :, 1, :)
+                ssjd => sid(:, :, 2, :)
+                ssj => si(:, :, 2, :)
+                sskd => sjd(:, :, 2, :)
+                ssk => sj(:, :, 2, :)
+                ssd => sd(:, :, 2, :)
+                ss => s(:, :, 2, :)
+            case (kmax)
+                xxd => xd(:, :, kl, :)
+                xx => x(:, :, kl, :)
+                ssid => skd(:, :, kl, :)
+                ssi => sk(:, :, kl, :)
+                ssjd => sid(:, :, kl, :)
+                ssj => si(:, :, kl, :)
+                sskd => sjd(:, :, kl, :)
+                ssk => sj(:, :, kl, :)
+                ssd => sd(:, :, kl, :)
+                ss => s(:, :, kl, :)
+            end select
+            if (addgridvelocities) then
+                select case (bcfaceid(nn))
+                case (imin)
+                    sface => sfacei(1, :, :)
+                case (imax)
+                    sface => sfacei(il, :, :)
+                case (jmin)
+                    sface => sfacej(:, 1, :)
+                case (jmax)
+                    sface => sfacej(:, jl, :)
+                case (kmin)
+                    sface => sfacek(:, :, 1)
+                case (kmax)
+                    sface => sfacek(:, :, kl)
+                end select
+            end if
+            if (equations .eq. ransequations) then
+                select case (bcfaceid(nn))
+                case (imin)
+                    dd2wall => d2wall(2, :, :)
+                case (imax)
+                    dd2wall => d2wall(il, :, :)
+                case (jmin)
+                    dd2wall => d2wall(:, 2, :)
+                case (jmax)
+                    dd2wall => d2wall(:, jl, :)
+                case (kmin)
+                    dd2wall => d2wall(:, :, 2)
+                case (kmax)
+                    dd2wall => d2wall(:, :, kl)
+                end select
+            end if
+        end if
+    end subroutine setbcpointers_d
+
+    subroutine maxEddyv(eddyvisMax)
+        !
+        !       maxEddyv determines the maximum value of the eddy viscosity
+        !       ratio of the block given by the pointers in blockPointes.
+        !
+        use constants
+        use blockPointers, only: il, jl, kl, rlv, rev
+        use flowVarRefState, only: nwf, eddyModel
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), intent(out) :: eddyvisMax
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
+
+        real(kind=realType) :: eddyvis
+
+        ! Initialize the maximum value to zero and return immediately if
+        ! not an eddy viscosity model is used.
+
+        eddyvisMax = zero
+        if (.not. eddyModel) return
+
+        ! Loop over the owned cells of this block.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the local viscosity ratio and take the maximum
+                    ! with the currently stored value.
+
+                    eddyvis = rev(i, j, k)/rlv(i, j, k)
+                    eddyvisMax = max(eddyvisMax, eddyvis)
 
-    ! In case none of the two variables needs to be monitored,
-    ! a return is made.
+                end do
+            end do
+        end do
 
-    if(.not. monMachOrHMax) return
+    end subroutine maxEddyv
 
-    ! Set the free stream value of the total enthalpy.
+    subroutine maxHdiffMach(hdiffMax, MachMax)
+        !
+        !       maxHdiffMach determines the maximum value of the Mach number
+        !       and total enthalpy (or better the relative total enthalpy
+        !       difference with the freestream).
+        !
+        use constants
+        use blockPointers, only: il, jl, kl, w, p, gamma
+        use flowVarRefState, only: pInfCorr, rhoInf, wInf
+        use monitor, only: monMachOrHMax
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), intent(out) :: hdiffMax, MachMax
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
 
-    hInf = (wInf(irhoE) + pInfCorr)/rhoInf
+        real(kind=realType) :: hdiff, hInf, Mach2
 
-    ! Loop over the owned cells of this block.
+        ! Initialize the maximum values to zero.
 
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
+        hdiffMax = zero
+        MachMax = zero
 
-             ! Compute the local total enthalpy and Mach number squared.
+        ! In case none of the two variables needs to be monitored,
+        ! a return is made.
 
-             hdiff = abs((w(i,j,k,irhoE) + p(i,j,k))/w(i,j,k,irho) - hInf)
-             Mach2 = (w(i,j,k,ivx)**2 + w(i,j,k,ivy)**2 &
-                  +  w(i,j,k,ivz)**2)*w(i,j,k,irho)/(gamma(i,j,k)*p(i,j,k))
+        if (.not. monMachOrHMax) return
 
-             ! Determine the maximum of these values and the
-             ! currently stored maximum values.
+        ! Set the free stream value of the total enthalpy.
 
-             hdiffMax = max(hdiffMax, hdiff)
-             MachMax  = max(MachMax,  Mach2)
+        hInf = (wInf(irhoE) + pInfCorr)/rhoInf
 
-          enddo
-       enddo
-    enddo
+        ! Loop over the owned cells of this block.
 
-    ! Currently the maximum Mach number squared is stored in
-    ! MachMax. Take the square root. Also create a relative
-    ! total enthalpy difference.
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
 
-    MachMax  = sqrt(MachMax)
-    hdiffMax = hdiffMax/hInf
+                    ! Compute the local total enthalpy and Mach number squared.
 
-  end subroutine maxHdiffMach
+                    hdiff = abs((w(i, j, k, irhoE) + p(i, j, k))/w(i, j, k, irho) - hInf)
+                    Mach2 = (w(i, j, k, ivx)**2 + w(i, j, k, ivy)**2 &
+                             + w(i, j, k, ivz)**2)*w(i, j, k, irho)/(gamma(i, j, k)*p(i, j, k))
 
+                    ! Determine the maximum of these values and the
+                    ! currently stored maximum values.
 
+                    hdiffMax = max(hdiffMax, hdiff)
+                    MachMax = max(MachMax, Mach2)
 
-  function delta(val1,val2)
-    !
-    !       delta is a function used to determine the contents of the full
-    !       transformation matrix from the shorthand form. It returns 1
-    !       if the absolute value of the two arguments are identical.
-    !       Otherwise it returns 0.
-    !
-    use constants
-    implicit none
-    !
-    !      Function type.
-    !
-    integer(kind=intType) :: delta
-    !
-    !      Function arguments.
-    !
-    integer(kind=intType) :: val1, val2
-
-    if(abs(val1) == abs(val2)) then
-       delta = 1_intType
-    else
-       delta = 0_intType
-    endif
-
-  end function delta
-
-  subroutine nullifyCGNSDomPointers(nn)
-    !
-    !       nullifyCGNSDomPointers nullifies all the pointers of the
-    !       given CGNS block.
-    !
-    use constants
-    use cgnsGrid, only : cgnsDoms
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-    !
-    nullify(cgnsDoms(nn)%procStored)
-    nullify(cgnsDoms(nn)%conn1to1)
-    nullify(cgnsDoms(nn)%connNonMatchAbutting)
-    nullify(cgnsDoms(nn)%bocoInfo)
-
-  end subroutine nullifyCGNSDomPointers
-
-  subroutine nullifyFlowDomPointers(nn,level,sps)
-    !
-    !       nullifyFlowDomPointers nullifies all the pointers of the
-    !       given block.
-    !
-    use constants
-    use block, only : flowDoms
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn, level, sps
-
-    nullify(flowDoms(nn,level,sps)%BCType)
-    nullify(flowDoms(nn,level,sps)%BCFaceID)
-    nullify(flowDoms(nn,level,sps)%cgnsSubface)
-
-    nullify(flowDoms(nn,level,sps)%inBeg)
-    nullify(flowDoms(nn,level,sps)%jnBeg)
-    nullify(flowDoms(nn,level,sps)%knBeg)
-    nullify(flowDoms(nn,level,sps)%inEnd)
-    nullify(flowDoms(nn,level,sps)%jnEnd)
-    nullify(flowDoms(nn,level,sps)%knEnd)
-
-    nullify(flowDoms(nn,level,sps)%dinBeg)
-    nullify(flowDoms(nn,level,sps)%djnBeg)
-    nullify(flowDoms(nn,level,sps)%dknBeg)
-    nullify(flowDoms(nn,level,sps)%dinEnd)
-    nullify(flowDoms(nn,level,sps)%djnEnd)
-    nullify(flowDoms(nn,level,sps)%dknEnd)
-
-    nullify(flowDoms(nn,level,sps)%icBeg)
-    nullify(flowDoms(nn,level,sps)%jcBeg)
-    nullify(flowDoms(nn,level,sps)%kcBeg)
-    nullify(flowDoms(nn,level,sps)%icEnd)
-    nullify(flowDoms(nn,level,sps)%jcEnd)
-    nullify(flowDoms(nn,level,sps)%kcEnd)
-
-    nullify(flowDoms(nn,level,sps)%neighBlock)
-    nullify(flowDoms(nn,level,sps)%neighProc)
-    nullify(flowDoms(nn,level,sps)%l1)
-    nullify(flowDoms(nn,level,sps)%l2)
-    nullify(flowDoms(nn,level,sps)%l3)
-    nullify(flowDoms(nn,level,sps)%groupNum)
-
-    nullify(flowDoms(nn,level,sps)%iblank)
-    nullify(flowDoms(nn,level,sps)%forcedRecv)
-    nullify(flowDoms(nn,level,sps)%status)
-    nullify(flowDoms(nn,level,sps)%fringes)
-    nullify(flowDoms(nn,level,sps)%orphans)
-
-    nullify(flowDoms(nn,level,sps)%BCData)
-    nullify(flowDoms(nn,level,sps)%viscSubface)
-
-    nullify(flowDoms(nn,level,sps)%viscIminPointer)
-    nullify(flowDoms(nn,level,sps)%viscImaxPointer)
-    nullify(flowDoms(nn,level,sps)%viscJminPointer)
-    nullify(flowDoms(nn,level,sps)%viscJmaxPointer)
-    nullify(flowDoms(nn,level,sps)%viscKminPointer)
-    nullify(flowDoms(nn,level,sps)%viscKmaxPointer)
-
-    nullify(flowDoms(nn,level,sps)%x)
-    nullify(flowDoms(nn,level,sps)%xOld)
-    nullify(flowDoms(nn,level,sps)%si)
-    nullify(flowDoms(nn,level,sps)%sj)
-    nullify(flowDoms(nn,level,sps)%sk)
-    nullify(flowDoms(nn,level,sps)%vol)
-    nullify(flowDoms(nn,level,sps)%volRef)
-    nullify(flowDoms(nn,level,sps)%volOld)
-
-    nullify(flowDoms(nn,level,sps)%pori)
-    nullify(flowDoms(nn,level,sps)%porj)
-    nullify(flowDoms(nn,level,sps)%pork)
-
-    nullify(flowDoms(nn,level,sps)%indFamilyI)
-    nullify(flowDoms(nn,level,sps)%indFamilyJ)
-    nullify(flowDoms(nn,level,sps)%indFamilyK)
-
-    nullify(flowDoms(nn,level,sps)%factFamilyI)
-    nullify(flowDoms(nn,level,sps)%factFamilyJ)
-    nullify(flowDoms(nn,level,sps)%factFamilyK)
-
-    nullify(flowDoms(nn,level,sps)%rotMatrixI)
-    nullify(flowDoms(nn,level,sps)%rotMatrixJ)
-    nullify(flowDoms(nn,level,sps)%rotMatrixK)
-
-    nullify(flowDoms(nn,level,sps)%sFaceI)
-    nullify(flowDoms(nn,level,sps)%sFaceJ)
-    nullify(flowDoms(nn,level,sps)%sFaceK)
-
-    nullify(flowDoms(nn,level,sps)%w)
-    nullify(flowDoms(nn,level,sps)%wOld)
-    nullify(flowDoms(nn,level,sps)%p)
-    nullify(flowDoms(nn,level,sps)%aa)
-    nullify(flowDoms(nn,level,sps)%gamma)
-    nullify(flowDoms(nn,level,sps)%rlv)
-    nullify(flowDoms(nn,level,sps)%rev)
-    nullify(flowDoms(nn,level,sps)%s)
-
-    nullify(flowDoms(nn,level,sps)%ux)
-    nullify(flowDoms(nn,level,sps)%uy)
-    nullify(flowDoms(nn,level,sps)%uz)
-
-    nullify(flowDoms(nn,level,sps)%vx)
-    nullify(flowDoms(nn,level,sps)%vy)
-    nullify(flowDoms(nn,level,sps)%vz)
-
-    nullify(flowDoms(nn,level,sps)%wx)
-    nullify(flowDoms(nn,level,sps)%wy)
-    nullify(flowDoms(nn,level,sps)%wz)
-
-    nullify(flowDoms(nn,level,sps)%qx)
-    nullify(flowDoms(nn,level,sps)%qy)
-    nullify(flowDoms(nn,level,sps)%qz)
-
-    nullify(flowDoms(nn,level,sps)%dw)
-    nullify(flowDoms(nn,level,sps)%fw)
-    nullify(flowDoms(nn,level,sps)%scratch)
-    nullify(flowDoms(nn,level,sps)%shockSensor)
-
-    nullify(flowDoms(nn,level,sps)%dwOldRK)
-
-    nullify(flowDoms(nn,level,sps)%p1)
-    nullify(flowDoms(nn,level,sps)%w1)
-    nullify(flowDoms(nn,level,sps)%wr)
-
-    nullify(flowDoms(nn,level,sps)%mgIFine)
-    nullify(flowDoms(nn,level,sps)%mgJFine)
-    nullify(flowDoms(nn,level,sps)%mgKFine)
-
-    nullify(flowDoms(nn,level,sps)%mgIWeight)
-    nullify(flowDoms(nn,level,sps)%mgJWeight)
-    nullify(flowDoms(nn,level,sps)%mgKWeight)
-
-    nullify(flowDoms(nn,level,sps)%mgICoarse)
-    nullify(flowDoms(nn,level,sps)%mgJCoarse)
-    nullify(flowDoms(nn,level,sps)%mgKCoarse)
-
-    nullify(flowDoms(nn,level,sps)%ico)
-    nullify(flowDoms(nn,level,sps)%jco)
-    nullify(flowDoms(nn,level,sps)%kco)
-
-    nullify(flowDoms(nn,level,sps)%wn)
-    nullify(flowDoms(nn,level,sps)%pn)
-    nullify(flowDoms(nn,level,sps)%dtl)
-    nullify(flowDoms(nn,level,sps)%radI)
-    nullify(flowDoms(nn,level,sps)%radJ)
-    nullify(flowDoms(nn,level,sps)%radK)
-
-    nullify(flowDoms(nn,level,sps)%d2Wall)
-
-    nullify(flowDoms(nn,level,sps)%bmti1)
-    nullify(flowDoms(nn,level,sps)%bmti2)
-    nullify(flowDoms(nn,level,sps)%bmtj1)
-    nullify(flowDoms(nn,level,sps)%bmtj2)
-    nullify(flowDoms(nn,level,sps)%bmtk1)
-    nullify(flowDoms(nn,level,sps)%bmtk2)
-
-    nullify(flowDoms(nn,level,sps)%bvti1)
-    nullify(flowDoms(nn,level,sps)%bvti2)
-    nullify(flowDoms(nn,level,sps)%bvtj1)
-    nullify(flowDoms(nn,level,sps)%bvtj2)
-    nullify(flowDoms(nn,level,sps)%bvtk1)
-    nullify(flowDoms(nn,level,sps)%bvtk2)
-
-    nullify(flowDoms(nn,level,sps)%globalCell)
-    nullify(flowDoms(nn,level,sps)%globalNode)
-    nullify(flowDOms(nn,level,sps)%surfNodeIndices)
-    nullify(flowDOms(nn,level,sps)%uv)
-    nullify(flowDoms(nn,level,sps)%wallInd)
-    nullify(flowDoms(nn,level,sps)%xSeed)
-
-    ! Added by HDN
-    nullify(flowDoms(nn,level,sps)%xALE)
-    nullify(flowDoms(nn,level,sps)%sIALE)
-    nullify(flowDoms(nn,level,sps)%sJALE)
-    nullify(flowDoms(nn,level,sps)%sKALE)
-    nullify(flowDoms(nn,level,sps)%sFaceIALE)
-    nullify(flowDoms(nn,level,sps)%sFaceJALE)
-    nullify(flowDoms(nn,level,sps)%sFaceKALE)
-    nullify(flowDoms(nn,level,sps)%dwALE)
-    nullify(flowDoms(nn,level,sps)%fwALE)
+                end do
+            end do
+        end do
+
+        ! Currently the maximum Mach number squared is stored in
+        ! MachMax. Take the square root. Also create a relative
+        ! total enthalpy difference.
+
+        MachMax = sqrt(MachMax)
+        hdiffMax = hdiffMax/hInf
+
+    end subroutine maxHdiffMach
+
+    function delta(val1, val2)
+        !
+        !       delta is a function used to determine the contents of the full
+        !       transformation matrix from the shorthand form. It returns 1
+        !       if the absolute value of the two arguments are identical.
+        !       Otherwise it returns 0.
+        !
+        use constants
+        implicit none
+        !
+        !      Function type.
+        !
+        integer(kind=intType) :: delta
+        !
+        !      Function arguments.
+        !
+        integer(kind=intType) :: val1, val2
+
+        if (abs(val1) == abs(val2)) then
+            delta = 1_intType
+        else
+            delta = 0_intType
+        end if
+
+    end function delta
+
+    subroutine nullifyCGNSDomPointers(nn)
+        !
+        !       nullifyCGNSDomPointers nullifies all the pointers of the
+        !       given CGNS block.
+        !
+        use constants
+        use cgnsGrid, only: cgnsDoms
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+        !
+        nullify (cgnsDoms(nn)%procStored)
+        nullify (cgnsDoms(nn)%conn1to1)
+        nullify (cgnsDoms(nn)%connNonMatchAbutting)
+        nullify (cgnsDoms(nn)%bocoInfo)
+
+    end subroutine nullifyCGNSDomPointers
+
+    subroutine nullifyFlowDomPointers(nn, level, sps)
+        !
+        !       nullifyFlowDomPointers nullifies all the pointers of the
+        !       given block.
+        !
+        use constants
+        use block, only: flowDoms
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn, level, sps
+
+        nullify (flowDoms(nn, level, sps)%BCType)
+        nullify (flowDoms(nn, level, sps)%BCFaceID)
+        nullify (flowDoms(nn, level, sps)%cgnsSubface)
+
+        nullify (flowDoms(nn, level, sps)%inBeg)
+        nullify (flowDoms(nn, level, sps)%jnBeg)
+        nullify (flowDoms(nn, level, sps)%knBeg)
+        nullify (flowDoms(nn, level, sps)%inEnd)
+        nullify (flowDoms(nn, level, sps)%jnEnd)
+        nullify (flowDoms(nn, level, sps)%knEnd)
+
+        nullify (flowDoms(nn, level, sps)%dinBeg)
+        nullify (flowDoms(nn, level, sps)%djnBeg)
+        nullify (flowDoms(nn, level, sps)%dknBeg)
+        nullify (flowDoms(nn, level, sps)%dinEnd)
+        nullify (flowDoms(nn, level, sps)%djnEnd)
+        nullify (flowDoms(nn, level, sps)%dknEnd)
+
+        nullify (flowDoms(nn, level, sps)%icBeg)
+        nullify (flowDoms(nn, level, sps)%jcBeg)
+        nullify (flowDoms(nn, level, sps)%kcBeg)
+        nullify (flowDoms(nn, level, sps)%icEnd)
+        nullify (flowDoms(nn, level, sps)%jcEnd)
+        nullify (flowDoms(nn, level, sps)%kcEnd)
+
+        nullify (flowDoms(nn, level, sps)%neighBlock)
+        nullify (flowDoms(nn, level, sps)%neighProc)
+        nullify (flowDoms(nn, level, sps)%l1)
+        nullify (flowDoms(nn, level, sps)%l2)
+        nullify (flowDoms(nn, level, sps)%l3)
+        nullify (flowDoms(nn, level, sps)%groupNum)
+
+        nullify (flowDoms(nn, level, sps)%iblank)
+        nullify (flowDoms(nn, level, sps)%forcedRecv)
+        nullify (flowDoms(nn, level, sps)%status)
+        nullify (flowDoms(nn, level, sps)%fringes)
+        nullify (flowDoms(nn, level, sps)%orphans)
+
+        nullify (flowDoms(nn, level, sps)%BCData)
+        nullify (flowDoms(nn, level, sps)%viscSubface)
+
+        nullify (flowDoms(nn, level, sps)%viscIminPointer)
+        nullify (flowDoms(nn, level, sps)%viscImaxPointer)
+        nullify (flowDoms(nn, level, sps)%viscJminPointer)
+        nullify (flowDoms(nn, level, sps)%viscJmaxPointer)
+        nullify (flowDoms(nn, level, sps)%viscKminPointer)
+        nullify (flowDoms(nn, level, sps)%viscKmaxPointer)
+
+        nullify (flowDoms(nn, level, sps)%x)
+        nullify (flowDoms(nn, level, sps)%xOld)
+        nullify (flowDoms(nn, level, sps)%si)
+        nullify (flowDoms(nn, level, sps)%sj)
+        nullify (flowDoms(nn, level, sps)%sk)
+        nullify (flowDoms(nn, level, sps)%vol)
+        nullify (flowDoms(nn, level, sps)%volRef)
+        nullify (flowDoms(nn, level, sps)%volOld)
+
+        nullify (flowDoms(nn, level, sps)%pori)
+        nullify (flowDoms(nn, level, sps)%porj)
+        nullify (flowDoms(nn, level, sps)%pork)
+
+        nullify (flowDoms(nn, level, sps)%indFamilyI)
+        nullify (flowDoms(nn, level, sps)%indFamilyJ)
+        nullify (flowDoms(nn, level, sps)%indFamilyK)
+
+        nullify (flowDoms(nn, level, sps)%factFamilyI)
+        nullify (flowDoms(nn, level, sps)%factFamilyJ)
+        nullify (flowDoms(nn, level, sps)%factFamilyK)
+
+        nullify (flowDoms(nn, level, sps)%rotMatrixI)
+        nullify (flowDoms(nn, level, sps)%rotMatrixJ)
+        nullify (flowDoms(nn, level, sps)%rotMatrixK)
+
+        nullify (flowDoms(nn, level, sps)%sFaceI)
+        nullify (flowDoms(nn, level, sps)%sFaceJ)
+        nullify (flowDoms(nn, level, sps)%sFaceK)
+
+        nullify (flowDoms(nn, level, sps)%w)
+        nullify (flowDoms(nn, level, sps)%wOld)
+        nullify (flowDoms(nn, level, sps)%p)
+        nullify (flowDoms(nn, level, sps)%aa)
+        nullify (flowDoms(nn, level, sps)%gamma)
+        nullify (flowDoms(nn, level, sps)%rlv)
+        nullify (flowDoms(nn, level, sps)%rev)
+        nullify (flowDoms(nn, level, sps)%s)
+
+        nullify (flowDoms(nn, level, sps)%ux)
+        nullify (flowDoms(nn, level, sps)%uy)
+        nullify (flowDoms(nn, level, sps)%uz)
+
+        nullify (flowDoms(nn, level, sps)%vx)
+        nullify (flowDoms(nn, level, sps)%vy)
+        nullify (flowDoms(nn, level, sps)%vz)
+
+        nullify (flowDoms(nn, level, sps)%wx)
+        nullify (flowDoms(nn, level, sps)%wy)
+        nullify (flowDoms(nn, level, sps)%wz)
+
+        nullify (flowDoms(nn, level, sps)%qx)
+        nullify (flowDoms(nn, level, sps)%qy)
+        nullify (flowDoms(nn, level, sps)%qz)
+
+        nullify (flowDoms(nn, level, sps)%dw)
+        nullify (flowDoms(nn, level, sps)%fw)
+        nullify (flowDoms(nn, level, sps)%scratch)
+        nullify (flowDoms(nn, level, sps)%shockSensor)
+
+        nullify (flowDoms(nn, level, sps)%dwOldRK)
+
+        nullify (flowDoms(nn, level, sps)%p1)
+        nullify (flowDoms(nn, level, sps)%w1)
+        nullify (flowDoms(nn, level, sps)%wr)
+
+        nullify (flowDoms(nn, level, sps)%mgIFine)
+        nullify (flowDoms(nn, level, sps)%mgJFine)
+        nullify (flowDoms(nn, level, sps)%mgKFine)
+
+        nullify (flowDoms(nn, level, sps)%mgIWeight)
+        nullify (flowDoms(nn, level, sps)%mgJWeight)
+        nullify (flowDoms(nn, level, sps)%mgKWeight)
+
+        nullify (flowDoms(nn, level, sps)%mgICoarse)
+        nullify (flowDoms(nn, level, sps)%mgJCoarse)
+        nullify (flowDoms(nn, level, sps)%mgKCoarse)
+
+        nullify (flowDoms(nn, level, sps)%ico)
+        nullify (flowDoms(nn, level, sps)%jco)
+        nullify (flowDoms(nn, level, sps)%kco)
+
+        nullify (flowDoms(nn, level, sps)%wn)
+        nullify (flowDoms(nn, level, sps)%pn)
+        nullify (flowDoms(nn, level, sps)%dtl)
+        nullify (flowDoms(nn, level, sps)%radI)
+        nullify (flowDoms(nn, level, sps)%radJ)
+        nullify (flowDoms(nn, level, sps)%radK)
+
+        nullify (flowDoms(nn, level, sps)%d2Wall)
+
+        nullify (flowDoms(nn, level, sps)%bmti1)
+        nullify (flowDoms(nn, level, sps)%bmti2)
+        nullify (flowDoms(nn, level, sps)%bmtj1)
+        nullify (flowDoms(nn, level, sps)%bmtj2)
+        nullify (flowDoms(nn, level, sps)%bmtk1)
+        nullify (flowDoms(nn, level, sps)%bmtk2)
+
+        nullify (flowDoms(nn, level, sps)%bvti1)
+        nullify (flowDoms(nn, level, sps)%bvti2)
+        nullify (flowDoms(nn, level, sps)%bvtj1)
+        nullify (flowDoms(nn, level, sps)%bvtj2)
+        nullify (flowDoms(nn, level, sps)%bvtk1)
+        nullify (flowDoms(nn, level, sps)%bvtk2)
+
+        nullify (flowDoms(nn, level, sps)%globalCell)
+        nullify (flowDoms(nn, level, sps)%globalNode)
+        nullify (flowDOms(nn, level, sps)%surfNodeIndices)
+        nullify (flowDOms(nn, level, sps)%uv)
+        nullify (flowDoms(nn, level, sps)%wallInd)
+        nullify (flowDoms(nn, level, sps)%xSeed)
+
+        ! Added by HDN
+        nullify (flowDoms(nn, level, sps)%xALE)
+        nullify (flowDoms(nn, level, sps)%sIALE)
+        nullify (flowDoms(nn, level, sps)%sJALE)
+        nullify (flowDoms(nn, level, sps)%sKALE)
+        nullify (flowDoms(nn, level, sps)%sFaceIALE)
+        nullify (flowDoms(nn, level, sps)%sFaceJALE)
+        nullify (flowDoms(nn, level, sps)%sFaceKALE)
+        nullify (flowDoms(nn, level, sps)%dwALE)
+        nullify (flowDoms(nn, level, sps)%fwALE)
 #ifndef USE_TAPENADE
-    nullify(flowDoms(nn,level,sps)%PCMat)
-    nullify(flowDoms(nn,level,sps)%i_D_Fact)
-    nullify(flowDoms(nn,level,sps)%i_L_Fact)
-    nullify(flowDoms(nn,level,sps)%i_U_Fact)
-    nullify(flowDoms(nn,level,sps)%i_U2_Fact)
-
-    nullify(flowDoms(nn,level,sps)%j_D_Fact)
-    nullify(flowDoms(nn,level,sps)%j_L_Fact)
-    nullify(flowDoms(nn,level,sps)%j_U_Fact)
-    nullify(flowDoms(nn,level,sps)%j_U2_Fact)
-
-    nullify(flowDoms(nn,level,sps)%k_D_Fact)
-    nullify(flowDoms(nn,level,sps)%k_L_Fact)
-    nullify(flowDoms(nn,level,sps)%k_U_Fact)
-    nullify(flowDoms(nn,level,sps)%k_U2_Fact)
+        nullify (flowDoms(nn, level, sps)%PCMat)
+        nullify (flowDoms(nn, level, sps)%i_D_Fact)
+        nullify (flowDoms(nn, level, sps)%i_L_Fact)
+        nullify (flowDoms(nn, level, sps)%i_U_Fact)
+        nullify (flowDoms(nn, level, sps)%i_U2_Fact)
+
+        nullify (flowDoms(nn, level, sps)%j_D_Fact)
+        nullify (flowDoms(nn, level, sps)%j_L_Fact)
+        nullify (flowDoms(nn, level, sps)%j_U_Fact)
+        nullify (flowDoms(nn, level, sps)%j_U2_Fact)
+
+        nullify (flowDoms(nn, level, sps)%k_D_Fact)
+        nullify (flowDoms(nn, level, sps)%k_L_Fact)
+        nullify (flowDoms(nn, level, sps)%k_U_Fact)
+        nullify (flowDoms(nn, level, sps)%k_U2_Fact)
 #endif
 
-  end subroutine nullifyFlowDomPointers
-
-  subroutine reallocateInteger(intArray, newSize, oldSize, &
-       alwaysFreeMem)
-    !
-    !       reallocateInteger reallocates the given integer array to the
-    !       given new size. The old values of the array are copied. Note
-    !       that newSize can be both smaller and larger than oldSize.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), dimension(:), pointer :: intArray
-    integer(kind=intType), intent(in) :: newSize, oldSize
-    logical, intent(in) :: alwaysFreeMem
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), dimension(:), pointer :: tmp
-
-    integer(kind=intType) :: i, nn, ll
-
-    integer :: ierr
-
-    ! Determine the minimum of newSize and oldSize.
-
-    nn = min(newSize, oldSize)
-
-    ! Set the pointer for tmp to intArray.
-
-    tmp => intArray
-
-    ! Allocate the memory for intArray in case newSize is larger
-    ! than 0 or if alwaysFreeMem is .true. And copy the old data
-    ! into it. Preserve the lower bound.
-
-    if(newSize > 0 .or. alwaysFreeMem) then
-
-       ll = 1
-       if (associated(intArray)) ll = lbound(intArray,1)
-
-       allocate(intArray(ll:newSize+ll-1), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("reallocateInteger", &
-            "Memory allocation failure for intArray")
-       do i=ll,ll+nn-1
-          intArray(i) = tmp(i)
-       enddo
-    endif
-
-    ! Release the memory for tmp in case oldSize is larger than 0 or
-    ! if alwaysFreeMem is .true.
-
-    if(oldSize > 0 .or. alwaysFreeMem) then
-       deallocate(tmp, stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("reallocateInteger", &
-            "Deallocation error for tmp")
-    endif
-
-  end subroutine reallocateInteger
-
-  !---------------------------------------------------------------------------=
-
-  subroutine reallocateMpiOffsetKindInteger(intArray, newSize, &
-       oldSize, alwaysFreeMem)
-    !
-    !       reallocateMpiOffsetKindInteger reallocates the given
-    !       mpi_offset_kind integer array to the given new size. The old
-    !       values of the array are copied. Note that newSize can be both
-    !       smaller and larger than oldSize.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=mpi_offset_kind), dimension(:), pointer :: intArray
-    integer(kind=intType), intent(in) :: newSize, oldSize
-    logical, intent(in) :: alwaysFreeMem
-    !
-    !      Local variables.
-    !
-    integer(kind=mpi_offset_kind), dimension(:), pointer :: tmp
-
-    integer(kind=intType) :: i, nn, ll
-
-    integer :: ierr
-
-    ! Determine the minimum of newSize and oldSize.
-
-    nn = min(newSize, oldSize)
-
-    ! Set the pointer for tmp to intArray.
-
-    tmp => intArray
-
-    ! Allocate the memory for intArray in case newSize is larger
-    ! than 0 or if alwaysFreeMem is .true. And copy the old data
-    ! into it. Preserve the lower bound.
-
-    if(newSize > 0 .or. alwaysFreeMem) then
-
-       ll = 1
-       if (associated(intArray)) ll = lbound(intArray,1)
-
-       allocate(intArray(ll:newSize+ll-1), stat=ierr)
-       if(ierr /= 0)                                      &
-            call terminate("reallocateMpiOffsetKindInteger", &
-            "Memory allocation failure for intArray")
-       do i=ll,ll+nn-1
-          intArray(i) = tmp(i)
-       enddo
-    endif
-
-    ! Release the memory for tmp in case oldSize is larger than 0 or
-    ! if alwaysFreeMem is .true.
-
-    if(oldSize > 0 .or. alwaysFreeMem) then
-       deallocate(tmp, stat=ierr)
-       if(ierr /= 0)                                      &
-            call terminate("reallocateMpiOffsetKindInteger", &
-            "Deallocation error for tmp")
-    endif
-
-  end subroutine reallocateMpiOffsetKindInteger
-
-  !---------------------------------------------------------------------------=
-
-  subroutine reallocateInteger2(intArray, newSize1, newSize2, &
-       oldSize1, oldSize2,           &
-       alwaysFreeMem)
-    !
-    !       reallocateInteger2 reallocates the given 2D integer array to
-    !       the given new sizes. The old values of the array are copied.
-    !       Note that the newSizes can be both smaller and larger than
-    !       the oldSizes.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), dimension(:,:), pointer :: intArray
-    integer(kind=intType), intent(in) :: newSize1, newSize2, &
-         oldSize1, oldSize2
-    logical, intent(in) :: alwaysFreeMem
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), dimension(:,:), pointer :: tmp
-
-    integer(kind=intType) :: newSize, oldSize
-    integer(kind=intType) :: nn1, nn2, nn
-
-    integer(kind=intType) :: i, j
-
-    integer :: ierr
-
-    ! Determine the total new and old size.
-
-    newSize = newSize1*newSize2
-    oldSize = oldSize1*oldSize2
-
-    ! Determine for each of the 2 components the minimum of the new
-    ! and the old size. Multiply these values to obtain the total
-    ! amount of data that must be copied.
-
-    nn1 = min(newSize1, oldSize1)
-    nn2 = min(newSize2, oldSize2)
-
-    nn = nn1*nn2
-
-    ! Set the pointer for tmp.
-
-    tmp => intArray
-
-    ! Allocate the memory for intArray in case newSize is larger
-    ! than 0 or if alwaysFreeMem is .true. and copy the old data
-    ! into it.
-
-    if(newSize > 0 .or. alwaysFreeMem) then
-       allocate(intArray(newSize1,newSize2), stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("reallocateInteger2", &
-            "Memory allocation failure for intArray")
-       do j=1,nn2
-          do i=1,nn1
-             intArray(i,j) = tmp(i,j)
-          enddo
-       enddo
-    endif
-
-    ! Release the memory of tmp in case oldSize is larger than 0
-    ! or if alwaysFreeMem is .true..
-
-    if(oldSize > 0 .or. alwaysFreeMem) then
-       deallocate(tmp, stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("reallocateInteger2", &
-            "Deallocation error for tmp")
-    endif
-
-  end subroutine reallocateInteger2
-
-  subroutine reallocateReal(realArray, newSize, oldSize, &
-       alwaysFreeMem)
-    !
-    !       ReallocateReal reallocates the given real array to the given
-    !       new size. The old values of the array are copied. Note that
-    !       newSize can be both smaller and larger than oldSize.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    real(kind=realType), dimension(:), pointer :: realArray
-    integer(kind=intType), intent(in) :: newSize, oldSize
-    logical, intent(in) :: alwaysFreeMem
-    !
-    !      Local variables.
-    !
-    real(kind=realType), dimension(:), pointer :: tmp
-
-    integer(kind=intType) :: i, nn
-
-    integer :: ierr
-
-    ! Determine the minimum of newSize and oldSize.
-
-    nn = min(newSize, oldSize)
-
-    ! Set the pointer for tmp to realArray.
-
-    tmp => realArray
-
-    ! Allocate the memory for realArray in case newSize is larger
-    ! than 0 or if alwaysFreeMem is .True. And copy the old data
-    ! into it.
-
-    if(newSize > 0 .or. alwaysFreeMem) then
-       allocate(realArray(newSize), stat=ierr)
-       if(ierr /= 0)                       &
-            call terminate("reallocateReal", &
-            "Memory allocation failure for realArray")
-       do i=1,nn
-          realArray(i) = tmp(i)
-       enddo
-    endif
-
-    ! Release the memory for tmp in case oldSize is larger than 0 or
-    ! if alwaysFreeMem is .True.
-
-    if(oldSize > 0 .or. alwaysFreeMem) then
-       deallocate(tmp, stat=ierr)
-       if(ierr /= 0)                       &
-            call terminate("reallocateReal", &
-            "Deallocation error for tmp")
-    endif
-
-  end subroutine reallocateReal
-
-  !---------------------------------------------------------------------------=
-
-  subroutine reallocateReal2(realArray, newSize1, newSize2, &
-       oldSize1, oldSize2,            &
-       alwaysFreeMem)
-    !
-    !       ReallocateReal2 reallocates the given 2d integer array to
-    !       the given new sizes. The old values of the array are copied.
-    !       Note that the newSizes can be both smaller and larger than
-    !       the oldSizes.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    real(kind=realType), dimension(:,:), pointer :: realArray
-    integer(kind=intType), intent(in) :: newSize1, newSize2, &
-         oldSize1, oldSize2
-    logical, intent(in) :: alwaysFreeMem
-    !
-    !      Local variables.
-    !
-    real(kind=realType), dimension(:,:), pointer :: tmp
-
-    integer(kind=intType) :: newSize, oldSize
-    integer(kind=intType) :: nn1, nn2, nn
-
-    integer(kind=intType) :: i, j
-
-    integer :: ierr
-
-    ! Determine the total new and old size.
-
-    newSize = newSize1*newSize2
-    oldSize = oldSize1*oldSize2
-
-    ! Determine for each of the 2 components the minimum of the new
-    ! and the old size. Multiply these values to obtain the total
-    ! amount of data that must be copied.
-
-    nn1 = min(newSize1, oldSize1)
-    nn2 = min(newSize2, oldSize2)
-
-    nn = nn1*nn2
-
-    ! Set the pointer for tmp.
-
-    tmp => realArray
-
-    ! Allocate the memory for realArray in case newSize is larger
-    ! than 0 or if alwaysFreeMem is .True. And copy the old data
-    ! into it.
-
-    if(newSize > 0 .or. alwaysFreeMem) then
-       allocate(realArray(newSize1,newSize2), stat=ierr)
-       if(ierr /= 0)                           &
-            call terminate("reallocateReal2", &
-            "Memory allocation failure for realArray")
-       do j=1,nn2
-          do i=1,nn1
-             realArray(i,j) = tmp(i,j)
-          enddo
-       enddo
-    endif
-
-    ! Release the memory of tmp in case oldSize is larger than 0
-    ! or if alwaysFreeMem is .True..
-
-    if(oldSize > 0 .or. alwaysFreeMem) then
-       deallocate(tmp, stat=ierr)
-       if(ierr /= 0)                           &
-            call terminate("reallocateReal2", &
-            "Deallocation error for tmp")
-    endif
-
-  end subroutine reallocateReal2
-
-  subroutine setBufferSizes(level, sps, determine1to1Buf, determineOversetBuf)
-    !
-    !       setBufferSizes determines the size of the send and receive
-    !       buffers for this grid level. After that the maximum value of
-    !       these sizes and the currently stored value is taken, such that
-    !       for all mg levels the same buffer can be used. Normally the
-    !       size on the finest grid should be enough, but it is just as
-    !       safe to check on all mg levels.
-    !
-    use constants
-    use communication, only : commPatternNode_1st, commPatternCell_2nd, &
-         commPatternOverset,  recvBufferSize, recvBufferSize_1to1, &
-         recvBufferSizeOver, recvBufferSizeOver, sendBufferSize, recvBufferSize, &
-         sendBufferSizeOver, sendBufferSize_1to1
-    use flowVarRefState, only : nw, eddyModel, viscous
-    use inputPhysics, only : cpModel
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
-    logical, intent(in) :: determine1to1Buf
-    logical, intent(in) :: determineOversetBuf
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i
-    integer(kind=intType) :: sendSize, recvSize, nVarComm
-
-    ! Determine the maximum number of variables to be communicated.
-
-    nVarComm = nw + 1
-    if(cpModel == cpTempCurveFits) nVarComm = nVarComm + 1
-    if( viscous )   nVarComm = nVarComm + 1
-    if( eddyModel ) nVarComm = nVarComm + 1
-
-    ! Check if the 1 to 1 communication must be considered.
-
-    if( determine1to1Buf ) then
-
-       ! Store the send and receive buffer sizes needed for the nodal
-       ! exchange. Determine the maximum for the number of send and
-       ! receive processors.
-
-       i = commPatternNode_1st(level)%nProcSend
-       sendSize = commPatternNode_1st(level)%nsendCum(i)
-
-       i = commPatternNode_1st(level)%nProcRecv
-       recvSize = commPatternNode_1st(level)%nrecvCum(i)
-
-       ! Determine the buffer sizes for the 2nd level cell exchange and
-       ! set the size for this processor to the maximum needed. Note
-       ! that it is not needed to test the 1st level cell halo, because
-       ! it is entirely incorporated in the 2nd level.
-       ! Determine the maximum for the number of send and receive
-       ! processors as well.
-
-       i = commPatternCell_2nd(level)%nProcSend
-       sendSize = max(sendSize, &
-            commPatternCell_2nd(level)%nsendCum(i))
+    end subroutine nullifyFlowDomPointers
+
+    subroutine reallocateInteger(intArray, newSize, oldSize, &
+                                 alwaysFreeMem)
+        !
+        !       reallocateInteger reallocates the given integer array to the
+        !       given new size. The old values of the array are copied. Note
+        !       that newSize can be both smaller and larger than oldSize.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), dimension(:), pointer :: intArray
+        integer(kind=intType), intent(in) :: newSize, oldSize
+        logical, intent(in) :: alwaysFreeMem
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), dimension(:), pointer :: tmp
+
+        integer(kind=intType) :: i, nn, ll
+
+        integer :: ierr
+
+        ! Determine the minimum of newSize and oldSize.
+
+        nn = min(newSize, oldSize)
+
+        ! Set the pointer for tmp to intArray.
+
+        tmp => intArray
+
+        ! Allocate the memory for intArray in case newSize is larger
+        ! than 0 or if alwaysFreeMem is .true. And copy the old data
+        ! into it. Preserve the lower bound.
+
+        if (newSize > 0 .or. alwaysFreeMem) then
+
+            ll = 1
+            if (associated(intArray)) ll = lbound(intArray, 1)
+
+            allocate (intArray(ll:newSize + ll - 1), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("reallocateInteger", &
+                               "Memory allocation failure for intArray")
+            do i = ll, ll + nn - 1
+                intArray(i) = tmp(i)
+            end do
+        end if
+
+        ! Release the memory for tmp in case oldSize is larger than 0 or
+        ! if alwaysFreeMem is .true.
+
+        if (oldSize > 0 .or. alwaysFreeMem) then
+            deallocate (tmp, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("reallocateInteger", &
+                               "Deallocation error for tmp")
+        end if
+
+    end subroutine reallocateInteger
+
+    !---------------------------------------------------------------------------=
+
+    subroutine reallocateMpiOffsetKindInteger(intArray, newSize, &
+                                              oldSize, alwaysFreeMem)
+        !
+        !       reallocateMpiOffsetKindInteger reallocates the given
+        !       mpi_offset_kind integer array to the given new size. The old
+        !       values of the array are copied. Note that newSize can be both
+        !       smaller and larger than oldSize.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=mpi_offset_kind), dimension(:), pointer :: intArray
+        integer(kind=intType), intent(in) :: newSize, oldSize
+        logical, intent(in) :: alwaysFreeMem
+        !
+        !      Local variables.
+        !
+        integer(kind=mpi_offset_kind), dimension(:), pointer :: tmp
+
+        integer(kind=intType) :: i, nn, ll
+
+        integer :: ierr
+
+        ! Determine the minimum of newSize and oldSize.
+
+        nn = min(newSize, oldSize)
+
+        ! Set the pointer for tmp to intArray.
+
+        tmp => intArray
+
+        ! Allocate the memory for intArray in case newSize is larger
+        ! than 0 or if alwaysFreeMem is .true. And copy the old data
+        ! into it. Preserve the lower bound.
+
+        if (newSize > 0 .or. alwaysFreeMem) then
+
+            ll = 1
+            if (associated(intArray)) ll = lbound(intArray, 1)
+
+            allocate (intArray(ll:newSize + ll - 1), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("reallocateMpiOffsetKindInteger", &
+                               "Memory allocation failure for intArray")
+            do i = ll, ll + nn - 1
+                intArray(i) = tmp(i)
+            end do
+        end if
+
+        ! Release the memory for tmp in case oldSize is larger than 0 or
+        ! if alwaysFreeMem is .true.
+
+        if (oldSize > 0 .or. alwaysFreeMem) then
+            deallocate (tmp, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("reallocateMpiOffsetKindInteger", &
+                               "Deallocation error for tmp")
+        end if
+
+    end subroutine reallocateMpiOffsetKindInteger
+
+    !---------------------------------------------------------------------------=
+
+    subroutine reallocateInteger2(intArray, newSize1, newSize2, &
+                                  oldSize1, oldSize2, &
+                                  alwaysFreeMem)
+        !
+        !       reallocateInteger2 reallocates the given 2D integer array to
+        !       the given new sizes. The old values of the array are copied.
+        !       Note that the newSizes can be both smaller and larger than
+        !       the oldSizes.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), dimension(:, :), pointer :: intArray
+        integer(kind=intType), intent(in) :: newSize1, newSize2, &
+                                             oldSize1, oldSize2
+        logical, intent(in) :: alwaysFreeMem
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), dimension(:, :), pointer :: tmp
+
+        integer(kind=intType) :: newSize, oldSize
+        integer(kind=intType) :: nn1, nn2, nn
+
+        integer(kind=intType) :: i, j
+
+        integer :: ierr
+
+        ! Determine the total new and old size.
+
+        newSize = newSize1*newSize2
+        oldSize = oldSize1*oldSize2
+
+        ! Determine for each of the 2 components the minimum of the new
+        ! and the old size. Multiply these values to obtain the total
+        ! amount of data that must be copied.
+
+        nn1 = min(newSize1, oldSize1)
+        nn2 = min(newSize2, oldSize2)
+
+        nn = nn1*nn2
+
+        ! Set the pointer for tmp.
+
+        tmp => intArray
+
+        ! Allocate the memory for intArray in case newSize is larger
+        ! than 0 or if alwaysFreeMem is .true. and copy the old data
+        ! into it.
 
-       i = commPatternCell_2nd(level)%nProcRecv
-       recvSize = max(recvSize, &
-            commPatternCell_2nd(level)%nrecvCum(i))
-
-       ! Multiply sendSize and recvSize with the number of variables to
-       ! be communicated.
-
-       sendSize = sendSize*nVarComm
-       recvSize = recvSize*nVarComm
-
-       ! Store the maximum of the current values and the old values
-       ! in sendBufferSize1to1 and recvBufferSize1to1.
-
-       sendBufferSize_1to1 = max(sendBufferSize_1to1, sendSize)
-       recvBufferSize_1to1 = max(recvBufferSize_1to1, recvSize)
-
-    endif
-
-    ! Check if the overset communication must be considered.
-
-    if( determineOversetBuf ) then
-
-       ! Same deal for the overset communication.
-
-       i = commPatternOverset(level,sps)%nProcSend
-       sendSize = commPatternOverset(level,sps)%nsendCum(i)
-
-       i = commPatternOverset(level,sps)%nProcRecv
-       recvSize = commPatternOverset(level,sps)%nrecvCum(i)
-
-       ! Multiply sendSize and recvSize with the number of variables to
-       ! be communicated.
-
-       sendSize = sendSize*nVarComm
-       recvSize = recvSize*nVarComm
-
-       ! Store the maximum of the current values and the old values.
-
-       sendBufferSizeOver = max(sendBufferSizeOver, sendSize)
-       recvBufferSizeOver = max(recvBufferSizeOver, recvSize)
-
-    endif
-
-    ! Take the maximum for of all the buffers to
-    ! obtain the actual size to be allocated.
-
-    sendBufferSize = max(sendBufferSize_1to1, &
-         sendBufferSizeOver)
-    recvBufferSize = max(recvBufferSize_1to1, &
-         recvBufferSizeOver)
-
-  end subroutine setBufferSizes
-
-  subroutine setPointers(nn,mm,ll)
-    !
-    !       setPointers makes the variables in blockPointers point to
-    !       block nn for grid level mm and spectral solution ll.
-    !
-    ! Make an exception to use..only. We literally need everything
-    ! from blockPointers so use a bare use.
-    use constants
-    use blockPointers
-    use inputPhysics, only : useRoughSA
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType), intent(in) :: nn, mm, ll
-
-    ! Store the info of the current block, such that inside the
-    ! module blockPointers it is known to which block the data
-    ! belongs.
-
-    sectionID   = 1 ! We currently are only ever allowed 1 section
-    nbkLocal    = nn
-    nbkGlobal   = flowDoms(nn,mm,ll)%cgnsBlockID
-    mgLevel     = mm
-    spectralSol = ll
-
-    ! Block dimensions.
-
-    nx = flowDoms(nn,mm,ll)%nx
-    ny = flowDoms(nn,mm,ll)%ny
-    nz = flowDoms(nn,mm,ll)%nz
-
-    il = flowDoms(nn,mm,ll)%il
-    jl = flowDoms(nn,mm,ll)%jl
-    kl = flowDoms(nn,mm,ll)%kl
-
-    ie = flowDoms(nn,mm,ll)%ie
-    je = flowDoms(nn,mm,ll)%je
-    ke = flowDoms(nn,mm,ll)%ke
-
-    ib = flowDoms(nn,mm,ll)%ib
-    jb = flowDoms(nn,mm,ll)%jb
-    kb = flowDoms(nn,mm,ll)%kb
-
-    imaxDim = max(ie,je)
-    jmaxDim = max(je,ke)
-
-    rightHanded = flowDoms(nn,mm,ll)%righthanded
-
-    ! Point range in the corresponding cgns block
-
-    iBegor = flowDoms(nn,mm,ll)%iBegor
-    iEndor = flowDoms(nn,mm,ll)%iEndor
-    jBegor = flowDoms(nn,mm,ll)%jBegor
-    jEndor = flowDoms(nn,mm,ll)%jEndor
-    kBegor = flowDoms(nn,mm,ll)%kBegor
-    kEndor = flowDoms(nn,mm,ll)%kEndor
-
-    ! Subface info. Note that the pointers point to the 1st spectral
-    ! mode, because this is the only one allocated. The info is the
-    ! same for all modes.
-
-    nSubface   = flowDoms(nn,mm,ll)%nSubface
-    n1to1      = flowDoms(nn,mm,ll)%n1to1
-    nBocos     = flowDoms(nn,mm,ll)%nBocos
-    nViscBocos = flowDoms(nn,mm,ll)%nViscBocos
-
-    BCType      => flowDoms(nn,mm,1)%BCType
-    BCFaceID    => flowDoms(nn,mm,1)%BCFaceID
-    cgnsSubface => flowDoms(nn,mm,1)%cgnsSubface
-
-    inBeg => flowDoms(nn,mm,1)%inBeg
-    jnBeg => flowDoms(nn,mm,1)%jnBeg
-    knBeg => flowDoms(nn,mm,1)%knBeg
-    inEnd => flowDoms(nn,mm,1)%inEnd
-    jnEnd => flowDoms(nn,mm,1)%jnEnd
-    knEnd => flowDoms(nn,mm,1)%knEnd
-
-    dinBeg => flowDoms(nn,mm,1)%dinBeg
-    djnBeg => flowDoms(nn,mm,1)%djnBeg
-    dknBeg => flowDoms(nn,mm,1)%dknBeg
-    dinEnd => flowDoms(nn,mm,1)%dinEnd
-    djnEnd => flowDoms(nn,mm,1)%djnEnd
-    dknEnd => flowDoms(nn,mm,1)%dknEnd
-
-    icBeg => flowDoms(nn,mm,1)%icBeg
-    jcBeg => flowDoms(nn,mm,1)%jcBeg
-    kcBeg => flowDoms(nn,mm,1)%kcBeg
-    icEnd => flowDoms(nn,mm,1)%icEnd
-    jcEnd => flowDoms(nn,mm,1)%jcEnd
-    kcEnd => flowDoms(nn,mm,1)%kcEnd
-
-    neighBlock => flowDoms(nn,mm,1)%neighBlock
-    neighProc  => flowDoms(nn,mm,1)%neighProc
-    l1         => flowDoms(nn,mm,1)%l1
-    l2         => flowDoms(nn,mm,1)%l2
-    l3         => flowDoms(nn,mm,1)%l3
-    groupNum   => flowDoms(nn,mm,1)%groupNum
-
-    ! Overset boundary and hole info.
-    iblank => flowDoms(nn,mm,ll)%iblank
-    status => flowDoms(nn,mm,ll)%status
-    forcedRecv => flowDoms(nn,mm,ll)%forcedRecv
-
-    fringes => flowDoms(nn,mm,ll)%fringes
-    fringePtr => flowDoms(nn,mm,ll)%fringePtr
-    gind => flowDoms(nn, mm, ll)%gInd
-    nDonors => flowDoms(nn,mm,ll)%nDonors
-
-    orphans => flowDoms(nn,mm,ll)%orphans
-    nOrphans = flowDoms(nn,mm,ll)%nOrphans
-
-    ! The data for boundary subfaces.
-
-    BCData => flowDoms(nn,mm,ll)%BCData
-
-    ! The stress tensor and heat flux vector at viscous wall faces
-    ! as well as the face pointers to these viscous wall faces.
-    ! The latter point to the 1st spectral mode, because they are
-    ! the only ones allocated. The info is the same for all modes.
-
-    viscSubface => flowDoms(nn,mm,ll)%viscSubface
-
-    viscIminPointer => flowDoms(nn,mm,1)%viscIminPointer
-    viscImaxPointer => flowDoms(nn,mm,1)%viscImaxPointer
-    viscJminPointer => flowDoms(nn,mm,1)%viscJminPointer
-    viscJmaxPointer => flowDoms(nn,mm,1)%viscJmaxPointer
-    viscKminPointer => flowDoms(nn,mm,1)%viscKminPointer
-    viscKmaxPointer => flowDoms(nn,mm,1)%viscKmaxPointer
-
-    ! Mesh related variables. The porosities point to the 1st
-    ! spectral mode, because they are the only ones allocated.
-    ! The info is the same for all modes.
-    ! Note that xOld and volOld always point to the finest
-    ! grid level.
-
-    x    => flowDoms(nn,mm,ll)%x
-    xOld => flowDoms(nn,1,ll)%xOld
-
-    si     => flowDoms(nn,mm,ll)%si
-    sj     => flowDoms(nn,mm,ll)%sj
-    sk     => flowDoms(nn,mm,ll)%sk
-
-    vol    => flowDoms(nn,mm,ll)%vol
-    volRef => flowDoms(nn,mm,ll)%volRef
-    volOld => flowDoms(nn,1,ll)%volOld
-
-    porI => flowDoms(nn,mm,1)%porI
-    porJ => flowDoms(nn,mm,1)%porJ
-    porK => flowDoms(nn,mm,1)%porK
-
-    indFamilyI => flowDoms(nn,mm,1)%indFamilyI
-    indFamilyJ => flowDoms(nn,mm,1)%indFamilyJ
-    indFamilyK => flowDoms(nn,mm,1)%indFamilyK
-
-    factFamilyI => flowDoms(nn,mm,1)%factFamilyI
-    factFamilyJ => flowDoms(nn,mm,1)%factFamilyJ
-    factFamilyK => flowDoms(nn,mm,1)%factFamilyK
-
-    rotMatrixI => flowDoms(nn,mm,ll)%rotMatrixI
-    rotMatrixJ => flowDoms(nn,mm,ll)%rotMatrixJ
-    rotMatrixK => flowDoms(nn,mm,ll)%rotMatrixK
-
-    blockIsMoving     = flowDoms(nn,mm,ll)%blockIsMoving
-    addGridVelocities = flowDoms(nn,mm,ll)%addGridVelocities
-
-    sFaceI => flowDoms(nn,mm,ll)%sFaceI
-    sFaceJ => flowDoms(nn,mm,ll)%sFaceJ
-    sFaceK => flowDoms(nn,mm,ll)%sFaceK
-
-    ! Flow variables. Note that wOld, gamma and the laminar viscosity
-    ! point to the entries on the finest mesh. The reason is that
-    ! they are computed from the other variables. For the eddy
-    ! viscosity this is not the case because in a decoupled solver
-    ! its values are obtained from the fine grid level.
-
-    w     => flowDoms(nn,mm,ll)%w
-    wOld  => flowDoms(nn,1, ll)%wOld
-    p     => flowDoms(nn,mm,ll)%p
-    aa    => flowDoms(nn,mm,ll)%aa
-    shockSensor => flowDoms(nn,mm,ll)%shockSensor
-
-    gamma => flowDoms(nn,1, ll)%gamma
-    rlv   => flowDoms(nn,1, ll)%rlv
-    rev   => flowDoms(nn,mm,ll)%rev
-    s     => flowDoms(nn,mm,ll)%s
-
-    ux => flowDoms(nn,mm,ll)%ux
-    uy => flowDoms(nn,mm,ll)%uy
-    uz => flowDoms(nn,mm,ll)%uz
-
-    vx => flowDoms(nn,mm,ll)%vx
-    vy => flowDoms(nn,mm,ll)%vy
-    vz => flowDoms(nn,mm,ll)%vz
-
-    wx => flowDoms(nn,mm,ll)%wx
-    wy => flowDoms(nn,mm,ll)%wy
-    wz => flowDoms(nn,mm,ll)%wz
-
-    qx => flowDoms(nn,mm,ll)%qx
-    qy => flowDoms(nn,mm,ll)%qy
-    qz => flowDoms(nn,mm,ll)%qz
-
-
-    ! Residual and multigrid variables. The residual point to the
-    ! finest grid entry, the multigrid variables to their own level.
-
-    dw => flowDoms(nn,1,ll)%dw
-    fw => flowDoms(nn,1,ll)%fw
-    dwOldRK => flowDoms(nn,1,ll)%dwOldRK
-    scratch => flowDoms(nn,1,ll)%scratch
-
-    p1 => flowDoms(nn,mm,ll)%p1
-    w1 => flowDoms(nn,mm,ll)%w1
-    wr => flowDoms(nn,mm,ll)%wr
-
-    ! Variables, which allow a more flexible multigrid treatment.
-    ! They are the same for all spectral modes and therefore they
-    ! point to the 1st mode.
-
-    mgIFine => flowDoms(nn,mm,1)%mgIFine
-    mgJFine => flowDoms(nn,mm,1)%mgJFine
-    mgKFine => flowDoms(nn,mm,1)%mgKFine
-
-    mgIWeight => flowDoms(nn,mm,1)%mgIWeight
-    mgJWeight => flowDoms(nn,mm,1)%mgJWeight
-    mgKWeight => flowDoms(nn,mm,1)%mgKWeight
-
-    mgICoarse => flowDoms(nn,mm,1)%mgICoarse
-    mgJCoarse => flowDoms(nn,mm,1)%mgJCoarse
-    mgKCoarse => flowDoms(nn,mm,1)%mgKCoarse
-
-    ! Time-stepping variables and spectral radIi.
-    ! They all point to the fine mesh entry.
-
-    wn  => flowDoms(nn,1,ll)%wn
-    pn  => flowDoms(nn,1,ll)%pn
-    dtl => flowDoms(nn,1,ll)%dtl
-
-    radI => flowDoms(nn,1,ll)%radI
-    radJ => flowDoms(nn,1,ll)%radJ
-    radK => flowDoms(nn,1,ll)%radK
-
-    ! Wall distance for the turbulence models.
-
-    d2Wall => flowDoms(nn,mm,ll)%d2Wall
-    filterDES   => flowDoms(nn,mm,ll)%filterDES  ! eran-des
-    if (useRoughSA) then
-       ks => flowDoms(nn,mm,ll)%ks
-    end if
-
-    ! Arrays used for the implicit treatment of the turbulent wall
-    ! boundary conditions. As these variables are only allocated for
-    ! the 1st spectral solution of the fine mesh, the pointers point
-    ! to those arrays.
-
-    bmti1 => flowDoms(nn,1,1)%bmti1
-    bmti2 => flowDoms(nn,1,1)%bmti2
-    bmtj1 => flowDoms(nn,1,1)%bmtj1
-    bmtj2 => flowDoms(nn,1,1)%bmtj2
-    bmtk1 => flowDoms(nn,1,1)%bmtk1
-    bmtk2 => flowDoms(nn,1,1)%bmtk2
-
-    bvti1 => flowDoms(nn,1,1)%bvti1
-    bvti2 => flowDoms(nn,1,1)%bvti2
-    bvtj1 => flowDoms(nn,1,1)%bvtj1
-    bvtj2 => flowDoms(nn,1,1)%bvtj2
-    bvtk1 => flowDoms(nn,1,1)%bvtk1
-    bvtk2 => flowDoms(nn,1,1)%bvtk2
-
-    ! Pointers for globalCell/Node
-    globalCell =>flowDoms(nn,mm,ll)%globalCell
-    globalNode =>flowDoms(nn,mm,ll)%globalNode
-
-    xSeed => flowDoms(nn,mm,ll)%xSeed
-    wallInd => flowDoms(nn,mm,ll)%wallInd
-
-    ! Added by HDN
-    ! Kept the same dim as their counterparts
-    xALE      => flowDoms(nn,mm,ll)%xALE
-    sVeloIALE => flowDoms(nn,mm,ll)%sVeloIALE
-    sVeloJALE => flowDoms(nn,mm,ll)%sVeloJALE
-    sVeloKALE => flowDoms(nn,mm,ll)%sVeloKALE
-    sIALE     => flowDoms(nn,mm,ll)%sIALE
-    sJALE     => flowDoms(nn,mm,ll)%sJALE
-    sKALE     => flowDoms(nn,mm,ll)%sKALE
-    sFaceIALE => flowDoms(nn,mm,ll)%sFaceIALE
-    sFaceJALE => flowDoms(nn,mm,ll)%sFaceJALE
-    sFaceKALE => flowDoms(nn,mm,ll)%sFaceKALE
-    dwALE     => flowDoms(nn,1,ll)%dwALE
-    fwALE     => flowDoms(nn,1,ll)%fwALE
-
-    ! Pointers for PC
-    PCMat => flowDoms(nn,mm,ll)%pcMat
-
-    i_D_Fact => flowDoms(nn,mm,ll)%i_D_fact
-    i_L_Fact => flowDoms(nn,mm,ll)%i_L_fact
-    i_U_Fact => flowDoms(nn,mm,ll)%i_U_fact
-    i_U2_Fact => flowDoms(nn,mm,ll)%i_U2_fact
-
-    j_D_Fact => flowDoms(nn,mm,ll)%j_D_fact
-    j_L_Fact => flowDoms(nn,mm,ll)%j_L_fact
-    j_U_Fact => flowDoms(nn,mm,ll)%j_U_fact
-    j_U2_Fact => flowDoms(nn,mm,ll)%j_U2_fact
-
-    k_D_Fact => flowDoms(nn,mm,ll)%k_D_fact
-    k_L_Fact => flowDoms(nn,mm,ll)%k_L_fact
-    k_U_Fact => flowDoms(nn,mm,ll)%k_U_fact
-    k_U2_Fact => flowDoms(nn,mm,ll)%k_U2_fact
-
-    PCVec1 => flowDoms(nn,mm,ll)%PCVec1
-    PCVec2 => flowDoms(nn,mm,ll)%PCVec2
-
-    i_ipiv => flowDoms(nn,mm,ll)%i_ipiv
-    j_ipiv => flowDoms(nn,mm,ll)%j_ipiv
-    k_ipiv => flowDoms(nn,mm,ll)%k_ipiv
-
-  end subroutine setPointers
-
-  subroutine setPointers_b(nn, level, sps)
-    use constants
-    implicit none
-    integer(kind=intType), intent(in) :: nn,level,sps
-
-    ! Alias for setPonters_d
-    call setPointers_d(nn, level, sps)
-
-  end subroutine setPointers_b
-
-  ! Set the pointers for the derivative values AND the normal pointers
-  subroutine setPointers_d(nn, level, sps)
-
-    use block, only : flowDomsd
-    use blockPointers
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType), intent(in) :: nn,level,sps
-
-    ! Set normal pointers
-    call setPointers(nn, level, sps)
-
-    viscSubfaced => flowDomsd(nn,1,sps)%viscSubface
-
-    xd    => flowDomsd(nn,1,sps)%x
-
-    sid     => flowDomsd(nn,1,sps)%si
-    sjd     => flowDomsd(nn,1,sps)%sj
-    skd     => flowDomsd(nn,1,sps)%sk
-
-    vold    => flowDomsd(nn,1,sps)%vol
-
-    rotMatrixId => flowDomsd(nn,1,sps)%rotMatrixI
-    rotMatrixJd => flowDomsd(nn,1,sps)%rotMatrixJ
-    rotMatrixKd => flowDomsd(nn,1,sps)%rotMatrixK
-
-    sFaceId => flowDomsd(nn,1,sps)%sFaceI
-    sFaceJd => flowDomsd(nn,1,sps)%sFaceJ
-    sFaceKd => flowDomsd(nn,1,sps)%sFaceK
-
-    ! Flow variables. Note that wOld, gamma and the laminar viscosity
-    ! point to the entries on the finest mesh. The reason is that
-    ! they are computed from the other variables. For the eddy
-    ! viscosity this is not the case because in a decoupled solver
-    ! its values are obtained from the fine grid level.
-
-    wd     => flowDomsd(nn,1,sps)%w
-    pd     => flowDomsd(nn,1,sps)%p
-
-    gammad => flowDomsd(nn,1,sps)%gamma
-    aad    => flowDomsd(nn,1,sps)%aa
-    rlvd   => flowDomsd(nn,1,sps)%rlv
-    revd   => flowDomsd(nn,1,sps)%rev
-    sd     => flowDomsd(nn,1,sps)%s
-
-    uxd => flowDomsd(nn,1,sps)%ux
-    uyd => flowDomsd(nn,1,sps)%uy
-    uzd => flowDomsd(nn,1,sps)%uz
-
-    vxd => flowDomsd(nn,1,sps)%vx
-    vyd => flowDomsd(nn,1,sps)%vy
-    vzd => flowDomsd(nn,1,sps)%vz
-
-    wxd => flowDomsd(nn,1,sps)%wx
-    wyd => flowDomsd(nn,1,sps)%wy
-    wzd => flowDomsd(nn,1,sps)%wz
-
-    qxd => flowDomsd(nn,1,sps)%qx
-    qyd => flowDomsd(nn,1,sps)%qy
-    qzd => flowDomsd(nn,1,sps)%qz
-
-    ! Residual and multigrid variables. The residual point to the
-    ! finest grid entry, the multigrid variables to their own level.
-
-    dwd => flowDomsd(nn,1,sps)%dw
-    fwd => flowDomsd(nn,1,sps)%fw
-    scratchd => flowDomsd(nn,1,sps)%scratch
-
-    dtld => flowDomsd(nn,1,sps)%dtl
-
-    ! Time-stepping variables and spectral radIi.
-    ! They asps point to the fine mesh entry.
-
-    radId => flowDomsd(nn,1,sps)%radI
-    radJd => flowDomsd(nn,1,sps)%radJ
-    radKd => flowDomsd(nn,1,sps)%radK
-
-    d2Walld => flowDomsd(nn,1,sps)%d2Wall
-
-    ! Arrays used for the implicit treatment of the turbulent wasps
-    ! boundary conditions. As these variables are only aspocated for
-    ! the 1st spectral solution of the fine mesh, the pointers point
-    ! to those arrays.
-
-    bmti1d => flowDomsd(nn,1,1)%bmti1
-    bmti2d => flowDomsd(nn,1,1)%bmti2
-    bmtj1d => flowDomsd(nn,1,1)%bmtj1
-    bmtj2d => flowDomsd(nn,1,1)%bmtj2
-    bmtk1d => flowDomsd(nn,1,1)%bmtk1
-    bmtk2d => flowDomsd(nn,1,1)%bmtk2
-
-    bvti1d => flowDomsd(nn,1,1)%bvti1
-    bvti2d => flowDomsd(nn,1,1)%bvti2
-    bvtj1d => flowDomsd(nn,1,1)%bvtj1
-    bvtj2d => flowDomsd(nn,1,1)%bvtj2
-    bvtk1d => flowDomsd(nn,1,1)%bvtk1
-    bvtk2d => flowDomsd(nn,1,1)%bvtk2
-
-    !BCData Array
-    BCDatad => flowDomsd(nn,1,sps)%BCdata
-
-  end subroutine setPointers_d
-
-
-  subroutine spectralInterpolCoef(nsps, t, alpScal, alpMat)
-    !
-    !       spectralInterpolCoef determines the scalar and matrix
-    !       spectral interpolation coefficients for the given number of
-    !       spectral solutions for the given t, where t is the ratio of
-    !       the time and the periodic interval time. Note that the index
-    !       of the spectral solutions of both alpScal and alpMat start
-    !       at 0. In this way these coefficients are easier to determine.
-    !
-    use constants
-    use inputTimeSpectral, only : nTimeIntervalsSpectral, rotMatrixSpectral
-    use section, only : nSections, sections
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nsps
-    real(kind=realType),   intent(in) :: t
-
-    real(kind=realType), dimension(0:nsps-1), intent(out) :: alpScal
-    real(kind=realType), dimension(nSections,0:nsps-1,3,3), &
-         intent(out) :: alpMat
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: jj, nn, j, p, r, nhalfM1, m, mhalfM1
-
-    real(kind=realType) :: nspsInv, mInv, tm, alp
-
-    real(kind=realType), dimension(3,3) :: rp, tmp
-
-    !       Scalar coefficients.
-    !
-    ! Loop over the number of spectral solutions to compute the
-    ! coefficients. Note that the loop starts at 0.
-
-    if (mod(nsps,2).eq.0) then
-       nhalfM1 = nsps/2 - 1
-    else
-       nhalfM1 = (nsps-1)/2
-    endif
-
-    nspsInv = one/real(nsps,realType)
-
-    do j=0,(nsps-1)
-       if (mod(nsps,2).eq.0) then
-          alpScal(j) = one + cos(j*pi)*cos(nsps*pi*t)
-       else
-          alpScal(j) = one + cos(j*pi*(nsps+1)/nsps)*cos((nsps+1)*pi*t)
-       endif
-
-       do r=1,nhalfM1
-          alpScal(j) = alpScal(j)                                  &
-               + two*cos(r*j*two*pi*nspsInv)*cos(r*two*pi*t) &
-               + two*sin(r*j*two*pi*nspsInv)*sin(r*two*pi*t)
-       enddo
-
-       alpScal(j) = alpScal(j)*nspsInv
-
-    enddo
-    !
-    !       Matrix coefficients. These are (can be) different for every
-    !       section and they must therefore be determined for every
-    !       section.
-    !
-    ! Loop over the number of sections in the grid.
-
-    sectionLoop: do nn=1,nSections
-
-       ! Compute the numbers for the entire wheel for this section.
-       ! Note that also t must be adapted, because t is a ratio between
-       ! the actual time and the periodic time.
-
-       m        = nsps*sections(nn)%nSlices
-       if (mod(m,2).eq.0) then
-          mhalfM1 = m/2 - 1
-       else
-          mhalfM1 = (m-1)/2
-       endif
-       mInv    = one/real(m,realType)
-       tm       = t/real(sections(nn)%nSlices,realType)
-
-       ! Loop over the number of spectral solutions.
-
-       spectralLoop: do jj=0,(nsps-1)
-
-          ! Initialize the matrix coefficients to zero and the matrix
-          ! rp to the identity matrix. Rp is the rotation matrix of this
-          ! section to the power p, which starts at 0, i.e. rp = i.
-
-          alpMat(nn,jj,1,1) = zero
-          alpMat(nn,jj,1,2) = zero
-          alpMat(nn,jj,1,3) = zero
-
-          alpMat(nn,jj,2,1) = zero
-          alpMat(nn,jj,2,2) = zero
-          alpMat(nn,jj,2,3) = zero
-
-          alpMat(nn,jj,3,1) = zero
-          alpMat(nn,jj,3,2) = zero
-          alpMat(nn,jj,3,3) = zero
-
-          rp(1,1) = one
-          rp(1,2) = zero
-          rp(1,3) = zero
-
-          rp(2,1) = zero
-          rp(2,2) = one
-          rp(2,3) = zero
-
-          rp(3,1) = zero
-          rp(3,2) = zero
-          rp(3,3) = one
-
-          ! Loop over the number of slices of this section. Note that
-          ! this loop starts at zero, which simplifies the formulas.
-
-          slicesLoop: do p=0,(sections(nn)%nSlices-1)
-
-             ! Determine the index j, the index of alp in the entire
-             ! wheel.
-
-             j = jj + p*nsps
-
-             ! Compute the scalar coefficient alp of the index j in
-             ! the entire wheel.
-
-             if (mod(m,2).eq.0) then
-                alp = one + cos(j*pi)*cos(m*pi*tm)
-             else
-                alp = one + cos(j*pi*(m+1)/m)*cos((m+1)*pi*tm)
-             endif
-             do r=1,mhalfM1
-                alp = alp + two*cos(r*j*two*pi*mInv)*cos(r*two*pi*tm) &
-                     +       two*sin(r*j*two*pi*mInv)*sin(r*two*pi*tm)
-             enddo
-
-             alp = alp*mInv
-
-             ! Update the matrix coefficient.
-
-             do r=1,3
-                do j=1,3
-                   alpMat(nn,jj,r,j) = alpMat(nn,jj,r,j) + alp*rp(r,j)
-                enddo
-             enddo
-
-             ! Multiply rp by the rotation matrix to obtain the correct
-             ! matrix for the next slice. Use tmp as temporary storage.
-
-             do r=1,3
-                do j=1,3
-                   tmp(r,j) = rp(r,1)*rotMatrixSpectral(nn,1,j) &
-                        + rp(r,2)*rotMatrixSpectral(nn,2,j) &
-                        + rp(r,3)*rotMatrixSpectral(nn,3,j)
-                enddo
-             enddo
-
-             rp = tmp
-
-          enddo slicesLoop
-       enddo spectralLoop
-    enddo sectionLoop
-
-  end subroutine spectralInterpolCoef
-
-  subroutine deallocateTempMemory(resNeeded)
-    !
-    !       deallocateTempMemory deallocates memory used in the solver,
-    !       but which is not needed to store the actual solution. In this
-    !       way the memory can be used differently, e.g. when writing the
-    !       solution or computing the wall distances.
-    !
-    use constants
-    use block, only : flowDoms, nDom
-    use communication, only : sendBuffer, recvBuffer
-    use inputIteration, only : smoother
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: resNeeded
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, mm
-
-    ! Deallocate the communication buffers
-
-    deallocate(sendBuffer, recvBuffer, stat=ierr)
-    if(ierr /= 0)                              &
-         call terminate("deallocateTempMemory", &
-         "Deallocation error for communication buffers")
-
-    ! Loop over the spectral modes and domains. Note that only memory
-    ! on the finest grid is released, because a) most of these
-    ! variables are only allocated on the fine grid and b) the coarser
-    ! grids do not contribute that much in the memory usage anyway.
-
-    spectralModes: do mm=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
-
-          ! Check if the residual, time step, etc. Is needed.
-
-          if(.not. resNeeded) then
-
-             ! Residual, etc. Not needed.
-             ! Deallocate residual, the time step and the spectral radii
-             ! of the fine level.
-
-             deallocate(flowDoms(nn,1,mm)%dw,   flowDoms(nn,1,mm)%fw,   &
-                  flowDoms(nn,1,mm)%dtl,  flowDoms(nn,1,mm)%radI, &
-                  flowDoms(nn,1,mm)%radJ, flowDoms(nn,1,mm)%radK, &
-                  stat=ierr)
-             if(ierr /= 0)                            &
-                  call terminate("deallocateTempMemory", &
-                  "Deallocation error for dw, fw, dtl and &
-                  &spectral radii.")
-          endif
-
-          ! The memory for the zeroth Runge Kutta stage
-          ! if a Runge Kutta scheme is used.
-
-          if(smoother == RungeKutta) then
-
-             deallocate(flowDoms(nn,1,mm)%wn, flowDoms(nn,1,mm)%pn, &
-                  stat=ierr)
-             if(ierr /= 0)                            &
-                  call terminate("deallocateTempMemory", &
-                  "Deallocation error for wn and pn")
-          endif
-
-       enddo domains
-    enddo spectralModes
-
-  end subroutine deallocateTempMemory
-
-  subroutine allocateTempMemory(resNeeded)
-    !
-    !       AllocateTempMemory allocates the memory again that was
-    !       temporarily deallocted by deallocateTempMemory.
-    !
-    use constants
-    use block, only : flowDoms, nDom
-    use communication, only : sendBuffer, recvBuffer, sendBufferSize, recvBufferSize
-    use inputIteration, only : smoother
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowVarRefState, only : nw, nwf
-
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: resNeeded
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn,mm
-    integer(kind=intType) :: il, jl, kl, ie, je, ke, ib, jb, kb
-
-    ! The memory for the receive buffers.
-
-    allocate(sendBuffer(sendBufferSize), &
-         recvBuffer(recvBufferSize), stat=ierr)
-    if(ierr /= 0)                          &
-         call terminate("allocateTempMemory", &
-         "Memory allocation failure for comm buffers")
-
-    ! Loop over the spectral modes and domains. Note that only memory
-    ! on the finest mesh level needs to be reallocated, because the
-    ! memory on the coarser levels has not been released or is not
-    ! needed .
-
-    spectralModes: do mm=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
-
-          ! Store some dimensions a bit easier.
-
-          il = flowDoms(nn,1,mm)%il
-          jl = flowDoms(nn,1,mm)%jl
-          kl = flowDoms(nn,1,mm)%kl
-
-          ie = flowDoms(nn,1,mm)%ie
-          je = flowDoms(nn,1,mm)%je
-          ke = flowDoms(nn,1,mm)%ke
-
-          ib = flowDoms(nn,1,mm)%ib
-          jb = flowDoms(nn,1,mm)%jb
-          kb = flowDoms(nn,1,mm)%kb
-
-          ! Check if the residual, time step, etc. was deallocated.
-
-          if(.not. resNeeded) then
-
-             ! Allocate the residual, the time step and
-             ! the spectral radii.
-
-             allocate(flowDoms(nn,1,mm)%dw(0:ib,0:jb,0:kb,1:nw),  &
-                  flowDoms(nn,1,mm)%fw(0:ib,0:jb,0:kb,1:nwf), &
-                  flowDoms(nn,1,mm)%dtl(1:ie,1:je,1:ke),      &
-                  flowDoms(nn,1,mm)%radI(1:ie,1:je,1:ke),     &
-                  flowDoms(nn,1,mm)%radJ(1:ie,1:je,1:ke),     &
-                  flowDoms(nn,1,mm)%radK(1:ie,1:je,1:ke), stat=ierr)
-             if(ierr /= 0)                            &
-                  call terminate("allocateTempMemory", &
-                  "Memory allocation failure for dw, fw, &
-                  &dtl and the spectral radii.")
-
-             ! Initialize dw and fw to zero to avoid possible overflows
-             ! of the halo's.
-
-             flowDoms(nn,1,mm)%dw = zero
-             flowDoms(nn,1,mm)%fw = zero
-
-          endif
+        if (newSize > 0 .or. alwaysFreeMem) then
+            allocate (intArray(newSize1, newSize2), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("reallocateInteger2", &
+                               "Memory allocation failure for intArray")
+            do j = 1, nn2
+                do i = 1, nn1
+                    intArray(i, j) = tmp(i, j)
+                end do
+            end do
+        end if
+
+        ! Release the memory of tmp in case oldSize is larger than 0
+        ! or if alwaysFreeMem is .true..
+
+        if (oldSize > 0 .or. alwaysFreeMem) then
+            deallocate (tmp, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("reallocateInteger2", &
+                               "Deallocation error for tmp")
+        end if
+
+    end subroutine reallocateInteger2
+
+    subroutine reallocateReal(realArray, newSize, oldSize, &
+                              alwaysFreeMem)
+        !
+        !       ReallocateReal reallocates the given real array to the given
+        !       new size. The old values of the array are copied. Note that
+        !       newSize can be both smaller and larger than oldSize.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), dimension(:), pointer :: realArray
+        integer(kind=intType), intent(in) :: newSize, oldSize
+        logical, intent(in) :: alwaysFreeMem
+        !
+        !      Local variables.
+        !
+        real(kind=realType), dimension(:), pointer :: tmp
+
+        integer(kind=intType) :: i, nn
+
+        integer :: ierr
+
+        ! Determine the minimum of newSize and oldSize.
+
+        nn = min(newSize, oldSize)
+
+        ! Set the pointer for tmp to realArray.
+
+        tmp => realArray
+
+        ! Allocate the memory for realArray in case newSize is larger
+        ! than 0 or if alwaysFreeMem is .True. And copy the old data
+        ! into it.
+
+        if (newSize > 0 .or. alwaysFreeMem) then
+            allocate (realArray(newSize), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("reallocateReal", &
+                               "Memory allocation failure for realArray")
+            do i = 1, nn
+                realArray(i) = tmp(i)
+            end do
+        end if
+
+        ! Release the memory for tmp in case oldSize is larger than 0 or
+        ! if alwaysFreeMem is .True.
+
+        if (oldSize > 0 .or. alwaysFreeMem) then
+            deallocate (tmp, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("reallocateReal", &
+                               "Deallocation error for tmp")
+        end if
+
+    end subroutine reallocateReal
+
+    !---------------------------------------------------------------------------=
+
+    subroutine reallocateReal2(realArray, newSize1, newSize2, &
+                               oldSize1, oldSize2, &
+                               alwaysFreeMem)
+        !
+        !       ReallocateReal2 reallocates the given 2d integer array to
+        !       the given new sizes. The old values of the array are copied.
+        !       Note that the newSizes can be both smaller and larger than
+        !       the oldSizes.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), dimension(:, :), pointer :: realArray
+        integer(kind=intType), intent(in) :: newSize1, newSize2, &
+                                             oldSize1, oldSize2
+        logical, intent(in) :: alwaysFreeMem
+        !
+        !      Local variables.
+        !
+        real(kind=realType), dimension(:, :), pointer :: tmp
+
+        integer(kind=intType) :: newSize, oldSize
+        integer(kind=intType) :: nn1, nn2, nn
+
+        integer(kind=intType) :: i, j
+
+        integer :: ierr
+
+        ! Determine the total new and old size.
+
+        newSize = newSize1*newSize2
+        oldSize = oldSize1*oldSize2
+
+        ! Determine for each of the 2 components the minimum of the new
+        ! and the old size. Multiply these values to obtain the total
+        ! amount of data that must be copied.
+
+        nn1 = min(newSize1, oldSize1)
+        nn2 = min(newSize2, oldSize2)
+
+        nn = nn1*nn2
+
+        ! Set the pointer for tmp.
+
+        tmp => realArray
+
+        ! Allocate the memory for realArray in case newSize is larger
+        ! than 0 or if alwaysFreeMem is .True. And copy the old data
+        ! into it.
+
+        if (newSize > 0 .or. alwaysFreeMem) then
+            allocate (realArray(newSize1, newSize2), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("reallocateReal2", &
+                               "Memory allocation failure for realArray")
+            do j = 1, nn2
+                do i = 1, nn1
+                    realArray(i, j) = tmp(i, j)
+                end do
+            end do
+        end if
+
+        ! Release the memory of tmp in case oldSize is larger than 0
+        ! or if alwaysFreeMem is .True..
+
+        if (oldSize > 0 .or. alwaysFreeMem) then
+            deallocate (tmp, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("reallocateReal2", &
+                               "Deallocation error for tmp")
+        end if
+
+    end subroutine reallocateReal2
 
-          ! The memory for the zeroth runge kutta stage
-          ! if a runge kutta scheme is used.
+    subroutine setBufferSizes(level, sps, determine1to1Buf, determineOversetBuf)
+        !
+        !       setBufferSizes determines the size of the send and receive
+        !       buffers for this grid level. After that the maximum value of
+        !       these sizes and the currently stored value is taken, such that
+        !       for all mg levels the same buffer can be used. Normally the
+        !       size on the finest grid should be enough, but it is just as
+        !       safe to check on all mg levels.
+        !
+        use constants
+        use communication, only: commPatternNode_1st, commPatternCell_2nd, &
+                                 commPatternOverset, recvBufferSize, recvBufferSize_1to1, &
+                                 recvBufferSizeOver, recvBufferSizeOver, sendBufferSize, recvBufferSize, &
+                                 sendBufferSizeOver, sendBufferSize_1to1
+        use flowVarRefState, only: nw, eddyModel, viscous
+        use inputPhysics, only: cpModel
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+        logical, intent(in) :: determine1to1Buf
+        logical, intent(in) :: determineOversetBuf
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i
+        integer(kind=intType) :: sendSize, recvSize, nVarComm
+
+        ! Determine the maximum number of variables to be communicated.
+
+        nVarComm = nw + 1
+        if (cpModel == cpTempCurveFits) nVarComm = nVarComm + 1
+        if (viscous) nVarComm = nVarComm + 1
+        if (eddyModel) nVarComm = nVarComm + 1
+
+        ! Check if the 1 to 1 communication must be considered.
+
+        if (determine1to1Buf) then
+
+            ! Store the send and receive buffer sizes needed for the nodal
+            ! exchange. Determine the maximum for the number of send and
+            ! receive processors.
+
+            i = commPatternNode_1st(level)%nProcSend
+            sendSize = commPatternNode_1st(level)%nsendCum(i)
+
+            i = commPatternNode_1st(level)%nProcRecv
+            recvSize = commPatternNode_1st(level)%nrecvCum(i)
+
+            ! Determine the buffer sizes for the 2nd level cell exchange and
+            ! set the size for this processor to the maximum needed. Note
+            ! that it is not needed to test the 1st level cell halo, because
+            ! it is entirely incorporated in the 2nd level.
+            ! Determine the maximum for the number of send and receive
+            ! processors as well.
+
+            i = commPatternCell_2nd(level)%nProcSend
+            sendSize = max(sendSize, &
+                           commPatternCell_2nd(level)%nsendCum(i))
+
+            i = commPatternCell_2nd(level)%nProcRecv
+            recvSize = max(recvSize, &
+                           commPatternCell_2nd(level)%nrecvCum(i))
+
+            ! Multiply sendSize and recvSize with the number of variables to
+            ! be communicated.
+
+            sendSize = sendSize*nVarComm
+            recvSize = recvSize*nVarComm
+
+            ! Store the maximum of the current values and the old values
+            ! in sendBufferSize1to1 and recvBufferSize1to1.
+
+            sendBufferSize_1to1 = max(sendBufferSize_1to1, sendSize)
+            recvBufferSize_1to1 = max(recvBufferSize_1to1, recvSize)
+
+        end if
+
+        ! Check if the overset communication must be considered.
+
+        if (determineOversetBuf) then
+
+            ! Same deal for the overset communication.
+
+            i = commPatternOverset(level, sps)%nProcSend
+            sendSize = commPatternOverset(level, sps)%nsendCum(i)
+
+            i = commPatternOverset(level, sps)%nProcRecv
+            recvSize = commPatternOverset(level, sps)%nrecvCum(i)
+
+            ! Multiply sendSize and recvSize with the number of variables to
+            ! be communicated.
+
+            sendSize = sendSize*nVarComm
+            recvSize = recvSize*nVarComm
+
+            ! Store the maximum of the current values and the old values.
+
+            sendBufferSizeOver = max(sendBufferSizeOver, sendSize)
+            recvBufferSizeOver = max(recvBufferSizeOver, recvSize)
+
+        end if
+
+        ! Take the maximum for of all the buffers to
+        ! obtain the actual size to be allocated.
+
+        sendBufferSize = max(sendBufferSize_1to1, &
+                             sendBufferSizeOver)
+        recvBufferSize = max(recvBufferSize_1to1, &
+                             recvBufferSizeOver)
+
+    end subroutine setBufferSizes
+
+    subroutine setPointers(nn, mm, ll)
+        !
+        !       setPointers makes the variables in blockPointers point to
+        !       block nn for grid level mm and spectral solution ll.
+        !
+        ! Make an exception to use..only. We literally need everything
+        ! from blockPointers so use a bare use.
+        use constants
+        use blockPointers
+        use inputPhysics, only: useRoughSA
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType), intent(in) :: nn, mm, ll
 
-          if(smoother == RungeKutta) then
+        ! Store the info of the current block, such that inside the
+        ! module blockPointers it is known to which block the data
+        ! belongs.
 
-             allocate(flowDoms(nn,1,mm)%wn(2:il,2:jl,2:kl,1:nwf), &
-                  flowDoms(nn,1,mm)%pn(2:il,2:jl,2:kl), stat=ierr)
-             if(ierr /= 0)                            &
-                  call terminate("allocateTempMemory", &
-                  "Memory allocation failure for wn and pn")
-          endif
+        sectionID = 1 ! We currently are only ever allowed 1 section
+        nbkLocal = nn
+        nbkGlobal = flowDoms(nn, mm, ll)%cgnsBlockID
+        mgLevel = mm
+        spectralSol = ll
 
-       enddo domains
-    enddo spectralModes
+        ! Block dimensions.
 
-  end subroutine allocateTempMemory
+        nx = flowDoms(nn, mm, ll)%nx
+        ny = flowDoms(nn, mm, ll)%ny
+        nz = flowDoms(nn, mm, ll)%nz
 
-  subroutine getLiftDirFromSymmetry(liftDir)
+        il = flowDoms(nn, mm, ll)%il
+        jl = flowDoms(nn, mm, ll)%jl
+        kl = flowDoms(nn, mm, ll)%kl
 
-    ! The purpose of this function is to determine what coordinate
-    ! direction the mirror plane is in. It does NOT handle multiple mirror
-    ! planes. It is used just to determine what the lift direction is.
+        ie = flowDoms(nn, mm, ll)%ie
+        je = flowDoms(nn, mm, ll)%je
+        ke = flowDoms(nn, mm, ll)%ke
 
-    use constants
-    use blockPointers, only : x, il, jl, kl, BCType, nDom, BCData, BCFaceID, nBocos
-    use communication, only : adflow_comm_world
-    implicit none
+        ib = flowDoms(nn, mm, ll)%ib
+        jb = flowDoms(nn, mm, ll)%jb
+        kb = flowDoms(nn, mm, ll)%kb
 
-    ! Output
-    integer(kind=intType), intent(out) :: liftDir
-    integer(kind=intType), dimension(3) :: sym_local, sym
-
-    ! Working
-    integer(kind=intType) :: nn, i_index(1), mm, ierr
-    real(kind=realType), dimension(:, :, :), pointer :: xx
-    real(kind=realType) :: cp(3), v1(3), v2(3)
-    ! Loop over each block and each subFace
-
-    sym_local = 0_intType
-    sym       = 0_intType
-    liftDir   = 0_intType
-    do nn=1, nDom
-       call setPointers(nn, 1, 1)
-       do mm=1,nBocos
-          if (bcType(mm) == symm) then
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                xx => x(1, :, :, :)
-             case (iMax)
-                xx => x(il, :, :, :)
-             case (jMin)
-                xx => x(:, 1, :, :)
-             case (jMax)
-                xx => x(:, jl, :, :)
-             case (kMin)
-                xx => x(:, :, 1, :)
-             case (kMax)
-                xx => x(:, :, kl, :)
-             end select
-
-             ! Take the cross product
-             v1(:) = xx(bcData(mm)%inEnd, bcData(mm)%jnEnd, :) - &
-                  xx(bcData(mm)%inBeg, bcData(mm)%jnBeg, :)
-             v2(:) = xx(bcData(mm)%inBeg, bcData(mm)%jnEnd, :) - &
-                  xx(bcData(mm)%inEnd, bcData(mm)%jnBeg, :)
-
-             ! Cross Product
-             cp(1) = (v1(2)*v2(3) - v1(3)*v2(2))
-             cp(2) = (v1(3)*v2(1) - v1(1)*v2(3))
-             cp(3) = (v1(1)*v2(2) - v1(2)*v2(1))
-
-             ! Only interesed in abs values
-             cp = abs(cp)
-
-             ! Location, ie coordiante direction of dominate direction
-             i_index = maxloc(real(cp))
-
-             sym_local(i_index(1)) = 1_intType
-          end if
-       end do
-    end do
-
-    ! Now we have a bunch of sym_locals, mpi_allreduce them and SUM
-
-    call MPI_Allreduce (sym_local, sym, 3, adflow_integer, &
-         MPI_SUM, adflow_comm_world, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Now we should make sure that only ONE of the values is
-    ! non-zero. If more than one value is zero, it means we have
-    ! multiple symmetry planes which we can't support.
-    if (sym(1) == 0 .and. sym(2) == 0 .and. sym(3) == 0) then
-       ! Pass - no sym, can't determine lift dir:
-    else if(sym(1) .ne. 0 .and. sym(2) == 0 .and. sym(3) == 0) then
-       ! Pass - x dir can't be symmetry
-    else if(sym(1) == 0 .and. sym(2) .ne. 0 .and. sym(3) == 0) then
-       liftDir = 3
-    else if(sym(1) == 0 .and. sym(2) == 0 .and. sym(3) .ne. 0) then
-       liftDir = 2
-    else
-       ! Multiple orientations...can't do anything
-    end if
-
-  end subroutine getLiftDirFromSymmetry
-
-
-  subroutine writeIntroMessage
-    !
-    !       writeIntroMessage writes a message to stdout with
-    !       information how the executable was built, e.g. whether single
-    !       or double precision is used for the integers and reals, etc.
-    !       To avoid a messy output only processor 0 prints this info.
-    !
-    use constants
-    use communication, only : myid, nProc
-    implicit none
-    !
-    !      Local variables
-    !
-    character(len=7) :: integerString
+        imaxDim = max(ie, je)
+        jmaxDim = max(je, ke)
 
-    ! Return if this is not processor 0.
+        rightHanded = flowDoms(nn, mm, ll)%righthanded
 
-    if(myID > 0) return
+        ! Point range in the corresponding cgns block
 
-    ! I'm processor 0. Write the info to stdout.
-
-    print "(a)", "#"
-    print "(a)", "# ADflow, multiblock structured flow solver"
-    print "(a)", "#"
-    print "(a)", "# This code solves the 3D RANS, laminar NS or &
-         &Euler equations"
-    print "(a)", "# on multiblock structured hexahedral grids."
-
-
-    write(integerString,"(i7)") nProc
-    integerString = adjustl(integerString)
-    print "(3a)", "# This is a parallel executable running on ", &
-         trim(integerString), " processors."
-    print "(a)", "# It has been compiled with the &
-         &following options:"
-
-    if( debug ) then
-       print "(a)", "# - Debug mode."
-    else
-       print "(a)", "# - Optimized mode."
-    endif
+        iBegor = flowDoms(nn, mm, ll)%iBegor
+        iEndor = flowDoms(nn, mm, ll)%iEndor
+        jBegor = flowDoms(nn, mm, ll)%jBegor
+        jEndor = flowDoms(nn, mm, ll)%jEndor
+        kBegor = flowDoms(nn, mm, ll)%kBegor
+        kEndor = flowDoms(nn, mm, ll)%kEndor
+
+        ! Subface info. Note that the pointers point to the 1st spectral
+        ! mode, because this is the only one allocated. The info is the
+        ! same for all modes.
+
+        nSubface = flowDoms(nn, mm, ll)%nSubface
+        n1to1 = flowDoms(nn, mm, ll)%n1to1
+        nBocos = flowDoms(nn, mm, ll)%nBocos
+        nViscBocos = flowDoms(nn, mm, ll)%nViscBocos
+
+        BCType => flowDoms(nn, mm, 1)%BCType
+        BCFaceID => flowDoms(nn, mm, 1)%BCFaceID
+        cgnsSubface => flowDoms(nn, mm, 1)%cgnsSubface
+
+        inBeg => flowDoms(nn, mm, 1)%inBeg
+        jnBeg => flowDoms(nn, mm, 1)%jnBeg
+        knBeg => flowDoms(nn, mm, 1)%knBeg
+        inEnd => flowDoms(nn, mm, 1)%inEnd
+        jnEnd => flowDoms(nn, mm, 1)%jnEnd
+        knEnd => flowDoms(nn, mm, 1)%knEnd
+
+        dinBeg => flowDoms(nn, mm, 1)%dinBeg
+        djnBeg => flowDoms(nn, mm, 1)%djnBeg
+        dknBeg => flowDoms(nn, mm, 1)%dknBeg
+        dinEnd => flowDoms(nn, mm, 1)%dinEnd
+        djnEnd => flowDoms(nn, mm, 1)%djnEnd
+        dknEnd => flowDoms(nn, mm, 1)%dknEnd
+
+        icBeg => flowDoms(nn, mm, 1)%icBeg
+        jcBeg => flowDoms(nn, mm, 1)%jcBeg
+        kcBeg => flowDoms(nn, mm, 1)%kcBeg
+        icEnd => flowDoms(nn, mm, 1)%icEnd
+        jcEnd => flowDoms(nn, mm, 1)%jcEnd
+        kcEnd => flowDoms(nn, mm, 1)%kcEnd
+
+        neighBlock => flowDoms(nn, mm, 1)%neighBlock
+        neighProc => flowDoms(nn, mm, 1)%neighProc
+        l1 => flowDoms(nn, mm, 1)%l1
+        l2 => flowDoms(nn, mm, 1)%l2
+        l3 => flowDoms(nn, mm, 1)%l3
+        groupNum => flowDoms(nn, mm, 1)%groupNum
+
+        ! Overset boundary and hole info.
+        iblank => flowDoms(nn, mm, ll)%iblank
+        status => flowDoms(nn, mm, ll)%status
+        forcedRecv => flowDoms(nn, mm, ll)%forcedRecv
+
+        fringes => flowDoms(nn, mm, ll)%fringes
+        fringePtr => flowDoms(nn, mm, ll)%fringePtr
+        gind => flowDoms(nn, mm, ll)%gInd
+        nDonors => flowDoms(nn, mm, ll)%nDonors
+
+        orphans => flowDoms(nn, mm, ll)%orphans
+        nOrphans = flowDoms(nn, mm, ll)%nOrphans
+
+        ! The data for boundary subfaces.
+
+        BCData => flowDoms(nn, mm, ll)%BCData
+
+        ! The stress tensor and heat flux vector at viscous wall faces
+        ! as well as the face pointers to these viscous wall faces.
+        ! The latter point to the 1st spectral mode, because they are
+        ! the only ones allocated. The info is the same for all modes.
+
+        viscSubface => flowDoms(nn, mm, ll)%viscSubface
+
+        viscIminPointer => flowDoms(nn, mm, 1)%viscIminPointer
+        viscImaxPointer => flowDoms(nn, mm, 1)%viscImaxPointer
+        viscJminPointer => flowDoms(nn, mm, 1)%viscJminPointer
+        viscJmaxPointer => flowDoms(nn, mm, 1)%viscJmaxPointer
+        viscKminPointer => flowDoms(nn, mm, 1)%viscKminPointer
+        viscKmaxPointer => flowDoms(nn, mm, 1)%viscKmaxPointer
+
+        ! Mesh related variables. The porosities point to the 1st
+        ! spectral mode, because they are the only ones allocated.
+        ! The info is the same for all modes.
+        ! Note that xOld and volOld always point to the finest
+        ! grid level.
+
+        x => flowDoms(nn, mm, ll)%x
+        xOld => flowDoms(nn, 1, ll)%xOld
+
+        si => flowDoms(nn, mm, ll)%si
+        sj => flowDoms(nn, mm, ll)%sj
+        sk => flowDoms(nn, mm, ll)%sk
+
+        vol => flowDoms(nn, mm, ll)%vol
+        volRef => flowDoms(nn, mm, ll)%volRef
+        volOld => flowDoms(nn, 1, ll)%volOld
+
+        porI => flowDoms(nn, mm, 1)%porI
+        porJ => flowDoms(nn, mm, 1)%porJ
+        porK => flowDoms(nn, mm, 1)%porK
+
+        indFamilyI => flowDoms(nn, mm, 1)%indFamilyI
+        indFamilyJ => flowDoms(nn, mm, 1)%indFamilyJ
+        indFamilyK => flowDoms(nn, mm, 1)%indFamilyK
+
+        factFamilyI => flowDoms(nn, mm, 1)%factFamilyI
+        factFamilyJ => flowDoms(nn, mm, 1)%factFamilyJ
+        factFamilyK => flowDoms(nn, mm, 1)%factFamilyK
+
+        rotMatrixI => flowDoms(nn, mm, ll)%rotMatrixI
+        rotMatrixJ => flowDoms(nn, mm, ll)%rotMatrixJ
+        rotMatrixK => flowDoms(nn, mm, ll)%rotMatrixK
+
+        blockIsMoving = flowDoms(nn, mm, ll)%blockIsMoving
+        addGridVelocities = flowDoms(nn, mm, ll)%addGridVelocities
+
+        sFaceI => flowDoms(nn, mm, ll)%sFaceI
+        sFaceJ => flowDoms(nn, mm, ll)%sFaceJ
+        sFaceK => flowDoms(nn, mm, ll)%sFaceK
+
+        ! Flow variables. Note that wOld, gamma and the laminar viscosity
+        ! point to the entries on the finest mesh. The reason is that
+        ! they are computed from the other variables. For the eddy
+        ! viscosity this is not the case because in a decoupled solver
+        ! its values are obtained from the fine grid level.
+
+        w => flowDoms(nn, mm, ll)%w
+        wOld => flowDoms(nn, 1, ll)%wOld
+        p => flowDoms(nn, mm, ll)%p
+        aa => flowDoms(nn, mm, ll)%aa
+        shockSensor => flowDoms(nn, mm, ll)%shockSensor
+
+        gamma => flowDoms(nn, 1, ll)%gamma
+        rlv => flowDoms(nn, 1, ll)%rlv
+        rev => flowDoms(nn, mm, ll)%rev
+        s => flowDoms(nn, mm, ll)%s
+
+        ux => flowDoms(nn, mm, ll)%ux
+        uy => flowDoms(nn, mm, ll)%uy
+        uz => flowDoms(nn, mm, ll)%uz
+
+        vx => flowDoms(nn, mm, ll)%vx
+        vy => flowDoms(nn, mm, ll)%vy
+        vz => flowDoms(nn, mm, ll)%vz
+
+        wx => flowDoms(nn, mm, ll)%wx
+        wy => flowDoms(nn, mm, ll)%wy
+        wz => flowDoms(nn, mm, ll)%wz
+
+        qx => flowDoms(nn, mm, ll)%qx
+        qy => flowDoms(nn, mm, ll)%qy
+        qz => flowDoms(nn, mm, ll)%qz
+
+        ! Residual and multigrid variables. The residual point to the
+        ! finest grid entry, the multigrid variables to their own level.
+
+        dw => flowDoms(nn, 1, ll)%dw
+        fw => flowDoms(nn, 1, ll)%fw
+        dwOldRK => flowDoms(nn, 1, ll)%dwOldRK
+        scratch => flowDoms(nn, 1, ll)%scratch
+
+        p1 => flowDoms(nn, mm, ll)%p1
+        w1 => flowDoms(nn, mm, ll)%w1
+        wr => flowDoms(nn, mm, ll)%wr
+
+        ! Variables, which allow a more flexible multigrid treatment.
+        ! They are the same for all spectral modes and therefore they
+        ! point to the 1st mode.
+
+        mgIFine => flowDoms(nn, mm, 1)%mgIFine
+        mgJFine => flowDoms(nn, mm, 1)%mgJFine
+        mgKFine => flowDoms(nn, mm, 1)%mgKFine
+
+        mgIWeight => flowDoms(nn, mm, 1)%mgIWeight
+        mgJWeight => flowDoms(nn, mm, 1)%mgJWeight
+        mgKWeight => flowDoms(nn, mm, 1)%mgKWeight
+
+        mgICoarse => flowDoms(nn, mm, 1)%mgICoarse
+        mgJCoarse => flowDoms(nn, mm, 1)%mgJCoarse
+        mgKCoarse => flowDoms(nn, mm, 1)%mgKCoarse
+
+        ! Time-stepping variables and spectral radIi.
+        ! They all point to the fine mesh entry.
+
+        wn => flowDoms(nn, 1, ll)%wn
+        pn => flowDoms(nn, 1, ll)%pn
+        dtl => flowDoms(nn, 1, ll)%dtl
+
+        radI => flowDoms(nn, 1, ll)%radI
+        radJ => flowDoms(nn, 1, ll)%radJ
+        radK => flowDoms(nn, 1, ll)%radK
+
+        ! Wall distance for the turbulence models.
+
+        d2Wall => flowDoms(nn, mm, ll)%d2Wall
+        filterDES => flowDoms(nn, mm, ll)%filterDES  ! eran-des
+        if (useRoughSA) then
+            ks => flowDoms(nn, mm, ll)%ks
+        end if
+
+        ! Arrays used for the implicit treatment of the turbulent wall
+        ! boundary conditions. As these variables are only allocated for
+        ! the 1st spectral solution of the fine mesh, the pointers point
+        ! to those arrays.
+
+        bmti1 => flowDoms(nn, 1, 1)%bmti1
+        bmti2 => flowDoms(nn, 1, 1)%bmti2
+        bmtj1 => flowDoms(nn, 1, 1)%bmtj1
+        bmtj2 => flowDoms(nn, 1, 1)%bmtj2
+        bmtk1 => flowDoms(nn, 1, 1)%bmtk1
+        bmtk2 => flowDoms(nn, 1, 1)%bmtk2
+
+        bvti1 => flowDoms(nn, 1, 1)%bvti1
+        bvti2 => flowDoms(nn, 1, 1)%bvti2
+        bvtj1 => flowDoms(nn, 1, 1)%bvtj1
+        bvtj2 => flowDoms(nn, 1, 1)%bvtj2
+        bvtk1 => flowDoms(nn, 1, 1)%bvtk1
+        bvtk2 => flowDoms(nn, 1, 1)%bvtk2
+
+        ! Pointers for globalCell/Node
+        globalCell => flowDoms(nn, mm, ll)%globalCell
+        globalNode => flowDoms(nn, mm, ll)%globalNode
+
+        xSeed => flowDoms(nn, mm, ll)%xSeed
+        wallInd => flowDoms(nn, mm, ll)%wallInd
+
+        ! Added by HDN
+        ! Kept the same dim as their counterparts
+        xALE => flowDoms(nn, mm, ll)%xALE
+        sVeloIALE => flowDoms(nn, mm, ll)%sVeloIALE
+        sVeloJALE => flowDoms(nn, mm, ll)%sVeloJALE
+        sVeloKALE => flowDoms(nn, mm, ll)%sVeloKALE
+        sIALE => flowDoms(nn, mm, ll)%sIALE
+        sJALE => flowDoms(nn, mm, ll)%sJALE
+        sKALE => flowDoms(nn, mm, ll)%sKALE
+        sFaceIALE => flowDoms(nn, mm, ll)%sFaceIALE
+        sFaceJALE => flowDoms(nn, mm, ll)%sFaceJALE
+        sFaceKALE => flowDoms(nn, mm, ll)%sFaceKALE
+        dwALE => flowDoms(nn, 1, ll)%dwALE
+        fwALE => flowDoms(nn, 1, ll)%fwALE
+
+        ! Pointers for PC
+        PCMat => flowDoms(nn, mm, ll)%pcMat
+
+        i_D_Fact => flowDoms(nn, mm, ll)%i_D_fact
+        i_L_Fact => flowDoms(nn, mm, ll)%i_L_fact
+        i_U_Fact => flowDoms(nn, mm, ll)%i_U_fact
+        i_U2_Fact => flowDoms(nn, mm, ll)%i_U2_fact
+
+        j_D_Fact => flowDoms(nn, mm, ll)%j_D_fact
+        j_L_Fact => flowDoms(nn, mm, ll)%j_L_fact
+        j_U_Fact => flowDoms(nn, mm, ll)%j_U_fact
+        j_U2_Fact => flowDoms(nn, mm, ll)%j_U2_fact
+
+        k_D_Fact => flowDoms(nn, mm, ll)%k_D_fact
+        k_L_Fact => flowDoms(nn, mm, ll)%k_L_fact
+        k_U_Fact => flowDoms(nn, mm, ll)%k_U_fact
+        k_U2_Fact => flowDoms(nn, mm, ll)%k_U2_fact
+
+        PCVec1 => flowDoms(nn, mm, ll)%PCVec1
+        PCVec2 => flowDoms(nn, mm, ll)%PCVec2
+
+        i_ipiv => flowDoms(nn, mm, ll)%i_ipiv
+        j_ipiv => flowDoms(nn, mm, ll)%j_ipiv
+        k_ipiv => flowDoms(nn, mm, ll)%k_ipiv
+
+    end subroutine setPointers
+
+    subroutine setPointers_b(nn, level, sps)
+        use constants
+        implicit none
+        integer(kind=intType), intent(in) :: nn, level, sps
+
+        ! Alias for setPonters_d
+        call setPointers_d(nn, level, sps)
+
+    end subroutine setPointers_b
+
+    ! Set the pointers for the derivative values AND the normal pointers
+    subroutine setPointers_d(nn, level, sps)
+
+        use block, only: flowDomsd
+        use blockPointers
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType), intent(in) :: nn, level, sps
+
+        ! Set normal pointers
+        call setPointers(nn, level, sps)
+
+        viscSubfaced => flowDomsd(nn, 1, sps)%viscSubface
+
+        xd => flowDomsd(nn, 1, sps)%x
+
+        sid => flowDomsd(nn, 1, sps)%si
+        sjd => flowDomsd(nn, 1, sps)%sj
+        skd => flowDomsd(nn, 1, sps)%sk
+
+        vold => flowDomsd(nn, 1, sps)%vol
+
+        rotMatrixId => flowDomsd(nn, 1, sps)%rotMatrixI
+        rotMatrixJd => flowDomsd(nn, 1, sps)%rotMatrixJ
+        rotMatrixKd => flowDomsd(nn, 1, sps)%rotMatrixK
+
+        sFaceId => flowDomsd(nn, 1, sps)%sFaceI
+        sFaceJd => flowDomsd(nn, 1, sps)%sFaceJ
+        sFaceKd => flowDomsd(nn, 1, sps)%sFaceK
+
+        ! Flow variables. Note that wOld, gamma and the laminar viscosity
+        ! point to the entries on the finest mesh. The reason is that
+        ! they are computed from the other variables. For the eddy
+        ! viscosity this is not the case because in a decoupled solver
+        ! its values are obtained from the fine grid level.
+
+        wd => flowDomsd(nn, 1, sps)%w
+        pd => flowDomsd(nn, 1, sps)%p
+
+        gammad => flowDomsd(nn, 1, sps)%gamma
+        aad => flowDomsd(nn, 1, sps)%aa
+        rlvd => flowDomsd(nn, 1, sps)%rlv
+        revd => flowDomsd(nn, 1, sps)%rev
+        sd => flowDomsd(nn, 1, sps)%s
+
+        uxd => flowDomsd(nn, 1, sps)%ux
+        uyd => flowDomsd(nn, 1, sps)%uy
+        uzd => flowDomsd(nn, 1, sps)%uz
+
+        vxd => flowDomsd(nn, 1, sps)%vx
+        vyd => flowDomsd(nn, 1, sps)%vy
+        vzd => flowDomsd(nn, 1, sps)%vz
+
+        wxd => flowDomsd(nn, 1, sps)%wx
+        wyd => flowDomsd(nn, 1, sps)%wy
+        wzd => flowDomsd(nn, 1, sps)%wz
+
+        qxd => flowDomsd(nn, 1, sps)%qx
+        qyd => flowDomsd(nn, 1, sps)%qy
+        qzd => flowDomsd(nn, 1, sps)%qz
+
+        ! Residual and multigrid variables. The residual point to the
+        ! finest grid entry, the multigrid variables to their own level.
+
+        dwd => flowDomsd(nn, 1, sps)%dw
+        fwd => flowDomsd(nn, 1, sps)%fw
+        scratchd => flowDomsd(nn, 1, sps)%scratch
+
+        dtld => flowDomsd(nn, 1, sps)%dtl
+
+        ! Time-stepping variables and spectral radIi.
+        ! They asps point to the fine mesh entry.
+
+        radId => flowDomsd(nn, 1, sps)%radI
+        radJd => flowDomsd(nn, 1, sps)%radJ
+        radKd => flowDomsd(nn, 1, sps)%radK
+
+        d2Walld => flowDomsd(nn, 1, sps)%d2Wall
+
+        ! Arrays used for the implicit treatment of the turbulent wasps
+        ! boundary conditions. As these variables are only aspocated for
+        ! the 1st spectral solution of the fine mesh, the pointers point
+        ! to those arrays.
+
+        bmti1d => flowDomsd(nn, 1, 1)%bmti1
+        bmti2d => flowDomsd(nn, 1, 1)%bmti2
+        bmtj1d => flowDomsd(nn, 1, 1)%bmtj1
+        bmtj2d => flowDomsd(nn, 1, 1)%bmtj2
+        bmtk1d => flowDomsd(nn, 1, 1)%bmtk1
+        bmtk2d => flowDomsd(nn, 1, 1)%bmtk2
+
+        bvti1d => flowDomsd(nn, 1, 1)%bvti1
+        bvti2d => flowDomsd(nn, 1, 1)%bvti2
+        bvtj1d => flowDomsd(nn, 1, 1)%bvtj1
+        bvtj2d => flowDomsd(nn, 1, 1)%bvtj2
+        bvtk1d => flowDomsd(nn, 1, 1)%bvtk1
+        bvtk2d => flowDomsd(nn, 1, 1)%bvtk2
+
+        !BCData Array
+        BCDatad => flowDomsd(nn, 1, sps)%BCdata
+
+    end subroutine setPointers_d
+
+    subroutine spectralInterpolCoef(nsps, t, alpScal, alpMat)
+        !
+        !       spectralInterpolCoef determines the scalar and matrix
+        !       spectral interpolation coefficients for the given number of
+        !       spectral solutions for the given t, where t is the ratio of
+        !       the time and the periodic interval time. Note that the index
+        !       of the spectral solutions of both alpScal and alpMat start
+        !       at 0. In this way these coefficients are easier to determine.
+        !
+        use constants
+        use inputTimeSpectral, only: nTimeIntervalsSpectral, rotMatrixSpectral
+        use section, only: nSections, sections
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nsps
+        real(kind=realType), intent(in) :: t
+
+        real(kind=realType), dimension(0:nsps - 1), intent(out) :: alpScal
+        real(kind=realType), dimension(nSections, 0:nsps - 1, 3, 3), &
+            intent(out) :: alpMat
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: jj, nn, j, p, r, nhalfM1, m, mhalfM1
+
+        real(kind=realType) :: nspsInv, mInv, tm, alp
+
+        real(kind=realType), dimension(3, 3) :: rp, tmp
+
+        !       Scalar coefficients.
+        !
+        ! Loop over the number of spectral solutions to compute the
+        ! coefficients. Note that the loop starts at 0.
+
+        if (mod(nsps, 2) .eq. 0) then
+            nhalfM1 = nsps/2 - 1
+        else
+            nhalfM1 = (nsps - 1)/2
+        end if
+
+        nspsInv = one/real(nsps, realType)
+
+        do j = 0, (nsps - 1)
+            if (mod(nsps, 2) .eq. 0) then
+                alpScal(j) = one + cos(j*pi)*cos(nsps*pi*t)
+            else
+                alpScal(j) = one + cos(j*pi*(nsps + 1)/nsps)*cos((nsps + 1)*pi*t)
+            end if
+
+            do r = 1, nhalfM1
+                alpScal(j) = alpScal(j) &
+                             + two*cos(r*j*two*pi*nspsInv)*cos(r*two*pi*t) &
+                             + two*sin(r*j*two*pi*nspsInv)*sin(r*two*pi*t)
+            end do
+
+            alpScal(j) = alpScal(j)*nspsInv
+
+        end do
+        !
+        !       Matrix coefficients. These are (can be) different for every
+        !       section and they must therefore be determined for every
+        !       section.
+        !
+        ! Loop over the number of sections in the grid.
+
+        sectionLoop: do nn = 1, nSections
+
+            ! Compute the numbers for the entire wheel for this section.
+            ! Note that also t must be adapted, because t is a ratio between
+            ! the actual time and the periodic time.
+
+            m = nsps*sections(nn)%nSlices
+            if (mod(m, 2) .eq. 0) then
+                mhalfM1 = m/2 - 1
+            else
+                mhalfM1 = (m - 1)/2
+            end if
+            mInv = one/real(m, realType)
+            tm = t/real(sections(nn)%nSlices, realType)
+
+            ! Loop over the number of spectral solutions.
+
+            spectralLoop: do jj = 0, (nsps - 1)
+
+                ! Initialize the matrix coefficients to zero and the matrix
+                ! rp to the identity matrix. Rp is the rotation matrix of this
+                ! section to the power p, which starts at 0, i.e. rp = i.
+
+                alpMat(nn, jj, 1, 1) = zero
+                alpMat(nn, jj, 1, 2) = zero
+                alpMat(nn, jj, 1, 3) = zero
+
+                alpMat(nn, jj, 2, 1) = zero
+                alpMat(nn, jj, 2, 2) = zero
+                alpMat(nn, jj, 2, 3) = zero
+
+                alpMat(nn, jj, 3, 1) = zero
+                alpMat(nn, jj, 3, 2) = zero
+                alpMat(nn, jj, 3, 3) = zero
+
+                rp(1, 1) = one
+                rp(1, 2) = zero
+                rp(1, 3) = zero
+
+                rp(2, 1) = zero
+                rp(2, 2) = one
+                rp(2, 3) = zero
+
+                rp(3, 1) = zero
+                rp(3, 2) = zero
+                rp(3, 3) = one
+
+                ! Loop over the number of slices of this section. Note that
+                ! this loop starts at zero, which simplifies the formulas.
+
+                slicesLoop: do p = 0, (sections(nn)%nSlices - 1)
+
+                    ! Determine the index j, the index of alp in the entire
+                    ! wheel.
+
+                    j = jj + p*nsps
+
+                    ! Compute the scalar coefficient alp of the index j in
+                    ! the entire wheel.
+
+                    if (mod(m, 2) .eq. 0) then
+                        alp = one + cos(j*pi)*cos(m*pi*tm)
+                    else
+                        alp = one + cos(j*pi*(m + 1)/m)*cos((m + 1)*pi*tm)
+                    end if
+                    do r = 1, mhalfM1
+                        alp = alp + two*cos(r*j*two*pi*mInv)*cos(r*two*pi*tm) &
+                              + two*sin(r*j*two*pi*mInv)*sin(r*two*pi*tm)
+                    end do
+
+                    alp = alp*mInv
+
+                    ! Update the matrix coefficient.
+
+                    do r = 1, 3
+                        do j = 1, 3
+                            alpMat(nn, jj, r, j) = alpMat(nn, jj, r, j) + alp*rp(r, j)
+                        end do
+                    end do
+
+                    ! Multiply rp by the rotation matrix to obtain the correct
+                    ! matrix for the next slice. Use tmp as temporary storage.
+
+                    do r = 1, 3
+                        do j = 1, 3
+                            tmp(r, j) = rp(r, 1)*rotMatrixSpectral(nn, 1, j) &
+                                        + rp(r, 2)*rotMatrixSpectral(nn, 2, j) &
+                                        + rp(r, 3)*rotMatrixSpectral(nn, 3, j)
+                        end do
+                    end do
+
+                    rp = tmp
+
+                end do slicesLoop
+            end do spectralLoop
+        end do sectionLoop
+
+    end subroutine spectralInterpolCoef
+
+    subroutine deallocateTempMemory(resNeeded)
+        !
+        !       deallocateTempMemory deallocates memory used in the solver,
+        !       but which is not needed to store the actual solution. In this
+        !       way the memory can be used differently, e.g. when writing the
+        !       solution or computing the wall distances.
+        !
+        use constants
+        use block, only: flowDoms, nDom
+        use communication, only: sendBuffer, recvBuffer
+        use inputIteration, only: smoother
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: resNeeded
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, mm
+
+        ! Deallocate the communication buffers
+
+        deallocate (sendBuffer, recvBuffer, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("deallocateTempMemory", &
+                           "Deallocation error for communication buffers")
+
+        ! Loop over the spectral modes and domains. Note that only memory
+        ! on the finest grid is released, because a) most of these
+        ! variables are only allocated on the fine grid and b) the coarser
+        ! grids do not contribute that much in the memory usage anyway.
+
+        spectralModes: do mm = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Check if the residual, time step, etc. Is needed.
+
+                if (.not. resNeeded) then
+
+                    ! Residual, etc. Not needed.
+                    ! Deallocate residual, the time step and the spectral radii
+                    ! of the fine level.
+
+                    deallocate (flowDoms(nn, 1, mm)%dw, flowDoms(nn, 1, mm)%fw, &
+                                flowDoms(nn, 1, mm)%dtl, flowDoms(nn, 1, mm)%radI, &
+                                flowDoms(nn, 1, mm)%radJ, flowDoms(nn, 1, mm)%radK, &
+                                stat=ierr)
+                    if (ierr /= 0) &
+                         call terminate("deallocateTempMemory", &
+                         "Deallocation error for dw, fw, dtl and &
+                         &spectral radii.")
+                end if
+
+                ! The memory for the zeroth Runge Kutta stage
+                ! if a Runge Kutta scheme is used.
+
+                if (smoother == RungeKutta) then
+
+                    deallocate (flowDoms(nn, 1, mm)%wn, flowDoms(nn, 1, mm)%pn, &
+                                stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("deallocateTempMemory", &
+                                       "Deallocation error for wn and pn")
+                end if
+
+            end do domains
+        end do spectralModes
+
+    end subroutine deallocateTempMemory
+
+    subroutine allocateTempMemory(resNeeded)
+        !
+        !       AllocateTempMemory allocates the memory again that was
+        !       temporarily deallocted by deallocateTempMemory.
+        !
+        use constants
+        use block, only: flowDoms, nDom
+        use communication, only: sendBuffer, recvBuffer, sendBufferSize, recvBufferSize
+        use inputIteration, only: smoother
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowVarRefState, only: nw, nwf
+
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: resNeeded
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, mm
+        integer(kind=intType) :: il, jl, kl, ie, je, ke, ib, jb, kb
+
+        ! The memory for the receive buffers.
+
+        allocate (sendBuffer(sendBufferSize), &
+                  recvBuffer(recvBufferSize), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("allocateTempMemory", &
+                           "Memory allocation failure for comm buffers")
+
+        ! Loop over the spectral modes and domains. Note that only memory
+        ! on the finest mesh level needs to be reallocated, because the
+        ! memory on the coarser levels has not been released or is not
+        ! needed .
+
+        spectralModes: do mm = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Store some dimensions a bit easier.
+
+                il = flowDoms(nn, 1, mm)%il
+                jl = flowDoms(nn, 1, mm)%jl
+                kl = flowDoms(nn, 1, mm)%kl
+
+                ie = flowDoms(nn, 1, mm)%ie
+                je = flowDoms(nn, 1, mm)%je
+                ke = flowDoms(nn, 1, mm)%ke
+
+                ib = flowDoms(nn, 1, mm)%ib
+                jb = flowDoms(nn, 1, mm)%jb
+                kb = flowDoms(nn, 1, mm)%kb
+
+                ! Check if the residual, time step, etc. was deallocated.
+
+                if (.not. resNeeded) then
+
+                    ! Allocate the residual, the time step and
+                    ! the spectral radii.
+
+                    allocate (flowDoms(nn, 1, mm)%dw(0:ib, 0:jb, 0:kb, 1:nw), &
+                              flowDoms(nn, 1, mm)%fw(0:ib, 0:jb, 0:kb, 1:nwf), &
+                              flowDoms(nn, 1, mm)%dtl(1:ie, 1:je, 1:ke), &
+                              flowDoms(nn, 1, mm)%radI(1:ie, 1:je, 1:ke), &
+                              flowDoms(nn, 1, mm)%radJ(1:ie, 1:je, 1:ke), &
+                              flowDoms(nn, 1, mm)%radK(1:ie, 1:je, 1:ke), stat=ierr)
+                    if (ierr /= 0) &
+                         call terminate("allocateTempMemory", &
+                         "Memory allocation failure for dw, fw, &
+                         &dtl and the spectral radii.")
+
+                    ! Initialize dw and fw to zero to avoid possible overflows
+                    ! of the halo's.
+
+                    flowDoms(nn, 1, mm)%dw = zero
+                    flowDoms(nn, 1, mm)%fw = zero
+
+                end if
+
+                ! The memory for the zeroth runge kutta stage
+                ! if a runge kutta scheme is used.
+
+                if (smoother == RungeKutta) then
+
+                    allocate (flowDoms(nn, 1, mm)%wn(2:il, 2:jl, 2:kl, 1:nwf), &
+                              flowDoms(nn, 1, mm)%pn(2:il, 2:jl, 2:kl), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("allocateTempMemory", &
+                                       "Memory allocation failure for wn and pn")
+                end if
+
+            end do domains
+        end do spectralModes
+
+    end subroutine allocateTempMemory
+
+    subroutine getLiftDirFromSymmetry(liftDir)
+
+        ! The purpose of this function is to determine what coordinate
+        ! direction the mirror plane is in. It does NOT handle multiple mirror
+        ! planes. It is used just to determine what the lift direction is.
+
+        use constants
+        use blockPointers, only: x, il, jl, kl, BCType, nDom, BCData, BCFaceID, nBocos
+        use communication, only: adflow_comm_world
+        implicit none
+
+        ! Output
+        integer(kind=intType), intent(out) :: liftDir
+        integer(kind=intType), dimension(3) :: sym_local, sym
+
+        ! Working
+        integer(kind=intType) :: nn, i_index(1), mm, ierr
+        real(kind=realType), dimension(:, :, :), pointer :: xx
+        real(kind=realType) :: cp(3), v1(3), v2(3)
+        ! Loop over each block and each subFace
+
+        sym_local = 0_intType
+        sym = 0_intType
+        liftDir = 0_intType
+        do nn = 1, nDom
+            call setPointers(nn, 1, 1)
+            do mm = 1, nBocos
+                if (bcType(mm) == symm) then
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        xx => x(1, :, :, :)
+                    case (iMax)
+                        xx => x(il, :, :, :)
+                    case (jMin)
+                        xx => x(:, 1, :, :)
+                    case (jMax)
+                        xx => x(:, jl, :, :)
+                    case (kMin)
+                        xx => x(:, :, 1, :)
+                    case (kMax)
+                        xx => x(:, :, kl, :)
+                    end select
+
+                    ! Take the cross product
+                    v1(:) = xx(bcData(mm)%inEnd, bcData(mm)%jnEnd, :) - &
+                            xx(bcData(mm)%inBeg, bcData(mm)%jnBeg, :)
+                    v2(:) = xx(bcData(mm)%inBeg, bcData(mm)%jnEnd, :) - &
+                            xx(bcData(mm)%inEnd, bcData(mm)%jnBeg, :)
+
+                    ! Cross Product
+                    cp(1) = (v1(2)*v2(3) - v1(3)*v2(2))
+                    cp(2) = (v1(3)*v2(1) - v1(1)*v2(3))
+                    cp(3) = (v1(1)*v2(2) - v1(2)*v2(1))
+
+                    ! Only interesed in abs values
+                    cp = abs(cp)
+
+                    ! Location, ie coordiante direction of dominate direction
+                    i_index = maxloc(real(cp))
+
+                    sym_local(i_index(1)) = 1_intType
+                end if
+            end do
+        end do
+
+        ! Now we have a bunch of sym_locals, mpi_allreduce them and SUM
+
+        call MPI_Allreduce(sym_local, sym, 3, adflow_integer, &
+                           MPI_SUM, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Now we should make sure that only ONE of the values is
+        ! non-zero. If more than one value is zero, it means we have
+        ! multiple symmetry planes which we can't support.
+        if (sym(1) == 0 .and. sym(2) == 0 .and. sym(3) == 0) then
+            ! Pass - no sym, can't determine lift dir:
+        else if (sym(1) .ne. 0 .and. sym(2) == 0 .and. sym(3) == 0) then
+            ! Pass - x dir can't be symmetry
+        else if (sym(1) == 0 .and. sym(2) .ne. 0 .and. sym(3) == 0) then
+            liftDir = 3
+        else if (sym(1) == 0 .and. sym(2) == 0 .and. sym(3) .ne. 0) then
+            liftDir = 2
+        else
+            ! Multiple orientations...can't do anything
+        end if
+
+    end subroutine getLiftDirFromSymmetry
+
+    subroutine writeIntroMessage
+        !
+        !       writeIntroMessage writes a message to stdout with
+        !       information how the executable was built, e.g. whether single
+        !       or double precision is used for the integers and reals, etc.
+        !       To avoid a messy output only processor 0 prints this info.
+        !
+        use constants
+        use communication, only: myid, nProc
+        implicit none
+        !
+        !      Local variables
+        !
+        character(len=7) :: integerString
+
+        ! Return if this is not processor 0.
+
+        if (myID > 0) return
+
+        ! I'm processor 0. Write the info to stdout.
+
+        print "(a)", "#"
+        print "(a)", "# ADflow, multiblock structured flow solver"
+        print "(a)", "#"
+        print "(a)", "# This code solves the 3D RANS, laminar NS or &
+             &Euler equations"
+        print "(a)", "# on multiblock structured hexahedral grids."
+
+        write (integerString, "(i7)") nProc
+        integerString = adjustl(integerString)
+        print "(3a)", "# This is a parallel executable running on ", &
+            trim(integerString), " processors."
+        print "(a)", "# It has been compiled with the &
+             &following options:"
+
+        if (debug) then
+            print "(a)", "# - Debug mode."
+        else
+            print "(a)", "# - Optimized mode."
+        end if
 
 #ifdef USE_LONG_INT
-    print "(a)", "# - Size of standard integers: 8 bytes."
+        print "(a)", "# - Size of standard integers: 8 bytes."
 #else
-    print "(a)", "# - Size of standard integers: 4 bytes."
+        print "(a)", "# - Size of standard integers: 4 bytes."
 #endif
 
 #ifdef USE_SINGLE_PRECISION
-    print "(a)", "# - Size of standard floating point types: &
-         &4 bytes."
+        print "(a)", "# - Size of standard floating point types: &
+             &4 bytes."
 
 #elif  USE_QUADRUPLE_PRECISION
-    print "(a)", "# - Size of standard floating point types: &
-         &16 bytes."
+        print "(a)", "# - Size of standard floating point types: &
+             &16 bytes."
 #else
-    print "(a)", "# - Size of standard floating point types: &
-         &8 bytes."
+        print "(a)", "# - Size of standard floating point types: &
+             &8 bytes."
 #endif
 
 #ifdef USE_NO_CGNS
-    print "(a)", "# - Without cgns support"
+        print "(a)", "# - Without cgns support"
 #else
-    print "(a)", "# - With cgns support"
+        print "(a)", "# - With cgns support"
 #endif
 
 #ifdef USE_NO_SIGNALS
-    print "(a)", "# - Without support for signals."
+        print "(a)", "# - Without support for signals."
 #else
-    print "(a)", "# - With support for signals."
+        print "(a)", "# - With support for signals."
 #endif
 
-    print "(a)", "#"
-
-  end subroutine writeIntroMessage
-
-  subroutine pointReduce(pts, N, tol, uniquePts, link, nUnique)
-
-    ! Given a list of N points (pts) in three space, with possible
-    ! duplicates, (to within tol) return a list of the nUnique
-    ! uniquePoints of points and a link array of length N, that points
-    ! into the unique list
-
-    use constants
-    use kdtree2_module
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType), intent(in) :: N
-    real(kind=realType), intent(in), dimension(3, N) :: pts
-    real(kind=realType), intent(in) :: tol
-
-    ! Output Parametres
-    real(kind=realType), intent(out), dimension(3, N) :: uniquePts
-    integer(kind=intType), intent(out), dimension(N) :: link
-    integer(kind=intType), intent(out) :: nUnique
-
-    ! Working paramters
-    type(kdtree2), pointer :: mytree
-    real(kind=realType) :: tol2, timeb, timea
-    integer(kind=intType) :: nFound, i, j, nAlloc
-    type(kdtree2_result), allocatable, dimension(:) :: results
-
-    if (N==0) then
-       nUnique = 0
-       return
-    end if
-
-    ! We will use the KD_tree to do most of the heavy lifting here:
-
-    mytree => kdtree2_create(pts, sort=.True.)
-
-    ! KD tree works with the square of the tolerance
-    tol2 = tol**2
-
-    ! Unlikely we'll have more than 20 points same, but there is a
-    ! safetly check anwyay.
-    nalloc = 20
-    allocate(results(nalloc))
-
-    link = 0
-    nUnique = 0
-
-    ! Loop over all nodes
-    do i=1, N
-       if (link(i) == 0) then
-          call kdtree2_r_nearest(mytree, pts(:, i), tol2, nFound, nAlloc, results)
-
-          ! Expand if necesary and re-run
-          if (nfound > nalloc) then
-             deallocate(results)
-             nalloc = nfound
-             allocate(results(nalloc))
-             call kdtree2_r_nearest(mytree, pts(:, i), tol2, nFound, nAlloc, results)
-          end if
-
-          if (nFound == 1) then
-             ! This one is easy, it is already a unique node
-             nUnique = nUnique + 1
-             link(i) = nUnique
-             uniquePts(:, nUnique) = pts(:, i)
-          else
-             if (link(i) == 0) then
-                ! This node hasn't been assigned yet:
-                nUnique = nUnique + 1
-                uniquePts(:, nUnique) = pts(:, i)
-
-                do j=1, nFound
-                   link(results(j)%idx) = nUnique
+        print "(a)", "#"
+
+    end subroutine writeIntroMessage
+
+    subroutine pointReduce(pts, N, tol, uniquePts, link, nUnique)
+
+        ! Given a list of N points (pts) in three space, with possible
+        ! duplicates, (to within tol) return a list of the nUnique
+        ! uniquePoints of points and a link array of length N, that points
+        ! into the unique list
+
+        use constants
+        use kdtree2_module
+        implicit none
+
+        ! Input Parameters
+        integer(kind=intType), intent(in) :: N
+        real(kind=realType), intent(in), dimension(3, N) :: pts
+        real(kind=realType), intent(in) :: tol
+
+        ! Output Parametres
+        real(kind=realType), intent(out), dimension(3, N) :: uniquePts
+        integer(kind=intType), intent(out), dimension(N) :: link
+        integer(kind=intType), intent(out) :: nUnique
+
+        ! Working paramters
+        type(kdtree2), pointer :: mytree
+        real(kind=realType) :: tol2, timeb, timea
+        integer(kind=intType) :: nFound, i, j, nAlloc
+        type(kdtree2_result), allocatable, dimension(:) :: results
+
+        if (N == 0) then
+            nUnique = 0
+            return
+        end if
+
+        ! We will use the KD_tree to do most of the heavy lifting here:
+
+        mytree => kdtree2_create(pts, sort=.True.)
+
+        ! KD tree works with the square of the tolerance
+        tol2 = tol**2
+
+        ! Unlikely we'll have more than 20 points same, but there is a
+        ! safetly check anwyay.
+        nalloc = 20
+        allocate (results(nalloc))
+
+        link = 0
+        nUnique = 0
+
+        ! Loop over all nodes
+        do i = 1, N
+            if (link(i) == 0) then
+                call kdtree2_r_nearest(mytree, pts(:, i), tol2, nFound, nAlloc, results)
+
+                ! Expand if necesary and re-run
+                if (nfound > nalloc) then
+                    deallocate (results)
+                    nalloc = nfound
+                    allocate (results(nalloc))
+                    call kdtree2_r_nearest(mytree, pts(:, i), tol2, nFound, nAlloc, results)
+                end if
+
+                if (nFound == 1) then
+                    ! This one is easy, it is already a unique node
+                    nUnique = nUnique + 1
+                    link(i) = nUnique
+                    uniquePts(:, nUnique) = pts(:, i)
+                else
+                    if (link(i) == 0) then
+                        ! This node hasn't been assigned yet:
+                        nUnique = nUnique + 1
+                        uniquePts(:, nUnique) = pts(:, i)
+
+                        do j = 1, nFound
+                            link(results(j)%idx) = nUnique
+                        end do
+                    end if
+                end if
+            end if
+        end do
+
+        ! Done with the tree and the result vector
+        call kdtree2destroy(mytree)
+        deallocate (results)
+
+    end subroutine pointReduce
+    subroutine releaseMemoryPart1
+        !
+        !       releaseMemoryPart1 releases all the memory on the coarser
+        !       grids of flowDoms and the fine grid memory which is not needed
+        !       for the possible interpolation of the spectral solution.
+        !
+
+        ! This is a free-for-all on the imports. Oh well.
+        use block
+        use inputIteration
+        use inputTimeSpectral
+        use inputPhysics
+        use inputUnsteady
+        use monitor
+        use cgnsGrid
+        use communication
+        use iteration
+        use cgnsGrid
+        use section
+        use wallDistanceData
+        use adjointVars
+        use ADJointPETSc
+        use surfaceFamilies
+        implicit none
+        !
+        !      Local variables
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: sps, nLevels, level, nn, l, i, j
+
+        ! Determine the number of grid levels present in flowDoms.
+
+        nLevels = ubound(flowDoms, 2)
+
+        ! Loop over the number of spectral solutions.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+
+            ! Loop over the coarser grid levels and local blocks and
+            ! deallocate all the memory.
+
+            do level = 2, nLevels
+                do nn = 1, nDom
+                    call deallocateBlock(nn, level, sps)
+                end do
+            end do
+
+            ! Release some memory of the fine grid, which is not needed
+            ! anymore.
+
+            do nn = 1, nDom
+                ! Modified by HDN
+                ! Added dwALE, fwALE
+                deallocate ( &
+                    flowDoms(nn, 1, sps)%dw, flowDoms(nn, 1, sps)%fw, &
+                    flowDoms(nn, 1, sps)%dtl, flowDoms(nn, 1, sps)%radI, &
+                    flowDoms(nn, 1, sps)%radJ, flowDoms(nn, 1, sps)%radK, &
+                    flowDoms(nn, 1, sps)%shockSensor, &
+                    stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("releaseMemoryPart1", &
+                     "Deallocation error for dw, fw, dwALE, fwALE, dtl and &
+                     &spectral radii.")
+
+                ! Extra variables for ALE
+                if (equationMode == unSteady .and. useALE) then
+                    deallocate ( &
+                        flowDoms(nn, 1, sps)%dwALE, &
+                        flowDoms(nn, 1, sps)%fwALE, &
+                        stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("releaseMemoryPart1", &
+                                       "Deallocation error for dwALE, fwALE.")
+                end if
+
+                ! Nullify the pointers, such that no attempt is made to
+                ! release the memory again.
+
+                nullify (flowDoms(nn, 1, sps)%dw)
+                nullify (flowDoms(nn, 1, sps)%fw)
+                nullify (flowDoms(nn, 1, sps)%dwALE) ! Added by HDN
+                nullify (flowDoms(nn, 1, sps)%fwALE) ! Added by HDN
+                nullify (flowDoms(nn, 1, sps)%dtl)
+                nullify (flowDoms(nn, 1, sps)%radI)
+                nullify (flowDoms(nn, 1, sps)%radJ)
+                nullify (flowDoms(nn, 1, sps)%radK)
+                nullify (flowDoms(nn, 1, sps)%scratch)
+                nullify (flowDoms(nn, 1, sps)%shockSensor)
+                ! Check if the zeroth stage runge kutta memory has been
+                ! allocated. If so deallocate it and nullify the pointers.
+
+                if (smoother == RungeKutta) then
+
+                    deallocate (flowDoms(nn, 1, sps)%wn, flowDoms(nn, 1, sps)%pn, &
+                                stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("releaseMemoryPart1", &
+                                       "Deallocation error for wn and pn")
+
+                    nullify (flowDoms(nn, 1, sps)%wn)
+                    nullify (flowDoms(nn, 1, sps)%pn)
+
+                end if
+
+                ! Release the memory of the old residuals for the time
+                ! accurate Runge-Kutta schemes.
+
+                if (equationMode == unsteady .and. &
+                    timeIntegrationScheme == explicitRK) then
+
+                    deallocate (flowDoms(nn, 1, sps)%dwOldRK, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("releaseMemoryPart1", &
+                                       "Deallocation error for dwOldRK,")
+
+                    nullify (flowDoms(nn, 1, sps)%dwOldRK)
+                end if
+
+            end do
+
+        end do spectralLoop
+
+        ! derivative values
+        if (derivVarsAllocated) then
+            call deallocDerivativeValues(1)
+        end if
+
+        ! Bunch of extra sutff that hasn't been deallocated
+        if (allocated(cycleStrategy)) then
+            deallocate (cycleStrategy)
+        end if
+
+        if (allocated(monNames)) then
+            deallocate (monNames)
+        end if
+
+        if (allocated(monLoc)) then
+            deallocate (monLoc)
+        end if
+
+        if (allocated(monGlob)) then
+            deallocate (monGlob)
+        end if
+
+        if (allocated(monRef)) then
+            deallocate (monRef)
+        end if
+
+        if (allocated(cgnsFamilies)) then
+            deallocate (cgnsFamilies)
+        end if
+
+        if (allocated(cgnsDomsd)) then
+            deallocate (cgnsDomsd)
+        end if
+        ! deallocate(famIDsDomainInterfaces, &
+        !      bcIDsDomainInterfaces,  &
+        !      famIDsSliding)
+        if (allocated(sections)) then
+            deallocate (sections)
+        end if
+
+        if (allocated(BCFamExchange)) then
+            do j = 1, size(BCFamExchange, 2)
+                do i = 1, size(BCFamExchange, 1)
+                    call destroyFamilyExchange(BCFamExchange(i, j))
+                end do
+            end do
+            deallocate (BCFamExchange)
+        end if
+
+        if (allocated(nCellGlobal)) then
+            deallocate (nCellGlobal)
+        end if
+
+        ! Now deallocate the containers and communication objects.
+        if (allocated(commPatternCell_1st)) then
+            do l = 1, nLevels
+                call deallocateCommType(commPatternCell_1st(l))
+            end do
+            deallocate (commPatternCell_1st)
+        end if
+        if (allocated(commPatternCell_2nd)) then
+            do l = 1, nLevels
+                call deallocateCommType(commPatternCell_2nd(l))
+            end do
+            deallocate (commPatternCell_2nd)
+        end if
+        if (allocated(commPatternNode_1st)) then
+            do l = 1, nLevels
+                call deallocateCommType(commPatternNode_1st(l))
+            end do
+            deallocate (commPatternNode_1st)
+        end if
+        if (allocated(internalCell_1st)) then
+            do l = 1, nLevels
+                call deallocateInternalCommType(internalCell_1st(l))
+            end do
+            deallocate (internalCell_1st)
+        end if
+        if (allocated(internalCell_2nd)) then
+            do l = 1, nLevels
+                call deallocateInternalCommType(internalCell_2nd(l))
+            end do
+            deallocate (internalCell_2nd)
+        end if
+        if (allocated(internalNode_1st)) then
+            do l = 1, nLevels
+                call deallocateInternalCommType(internalNode_1st(l))
+            end do
+            deallocate (internalNode_1st)
+        end if
+
+        ! Send/recv buffer
+        if (allocated(sendBuffer)) then
+            deallocate (sendBuffer)
+        end if
+
+        if (allocated(recvBuffer)) then
+            deallocate (recvBuffer)
+        end if
+
+        ! massFlow stuff from setFamilyInfoFaces.f90
+        if (allocated(massFlowFamilyInv)) then
+            deallocate (massFlowFamilyInv)
+        end if
+        if (allocated(massFlowFamilyDiss)) then
+            deallocate (massFlowFamilyDiss)
+        end if
+
+    end subroutine releaseMemoryPart1
+
+    subroutine deallocateCommType(comm)
+        use communication
+        implicit none
+        integer(kind=intType) :: ierr, i
+
+        type(commType) :: comm
+        ! Deallocate memory in comm
+
+        ! Deallocate the sendLists
+        do i = 1, comm%nProcSend
+            deallocate (comm%sendList(i)%block, stat=ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            deallocate (comm%sendList(i)%indices, stat=ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            deallocate (comm%sendList(i)%interp, stat=ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
+
+        ! Deallocate the recvLists
+        do i = 1, comm%nProcRecv
+            deallocate (comm%recvList(i)%block, stat=ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            deallocate (comm%recvList(i)%indices, stat=ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
+
+        deallocate (comm%sendProc, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%nsend, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%nsendcum, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%sendlist, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%recvProc, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%nrecv, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%nrecvcum, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%recvlist, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%indexsendproc, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%indexrecvproc, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (comm%nPeriodic > 0) then
+            do i = 1, comm%nPeriodic
+                deallocate (comm%periodicData(i)%block, stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                deallocate (comm%periodicData(i)%indices)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+
+            deallocate (comm%periodicData, stat=ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+    end subroutine deallocateCommType
+
+    subroutine deallocateInternalCommType(comm)
+        use communication
+        implicit none
+        integer(kind=intType) :: ierr, i
+
+        type(internalCommType) :: comm
+        ! Deallocate memory in comm
+        deallocate (comm%donorBlock, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%donorIndices, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%donorInterp, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%haloBlock, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        deallocate (comm%haloIndices, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (comm%nPeriodic > 0) then
+            do i = 1, comm%nPeriodic
+                deallocate (comm%periodicData(i)%block, stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                deallocate (comm%periodicData(i)%indices)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+            deallocate (comm%periodicData, stat=ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+    end subroutine deallocateInternalCommType
+
+    subroutine deallocDerivativeValues(level)
+
+        use constants
+        use block, only: flowDomsd, flowDoms, nDom
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use wallDistanceData, only: xSurfVec, xSurfVecd
+        use flowVarRefState, only: winfd
+        use inputPhysics, only: wallDistanceNeeded, useRoughSA
+        use adjointVars, only: derivVarsAllocated
+        use BCPointers_b
+
+        implicit none
+
+        ! Input Parameters
+        integer(kind=intType) :: level
+
+        ! Local variables
+        integer(kind=intType) :: nn, sps, stat, mm, ierr
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+
+                deallocate ( &
+                    flowDomsd(nn, level, sps)%x, &
+                    flowDomsd(nn, level, sps)%si, &
+                    flowDomsd(nn, level, sps)%sj, &
+                    flowDomsd(nn, level, sps)%sk, &
+                    flowDomsd(nn, level, sps)%vol, &
+                    flowDomsd(nn, level, sps)%rotMatrixI, &
+                    flowDomsd(nn, level, sps)%rotMatrixJ, &
+                    flowDomsd(nn, level, sps)%rotMatrixK, &
+                    flowDomsd(nn, level, sps)%s, &
+                    flowDomsd(nn, level, sps)%sFaceI, &
+                    flowDomsd(nn, level, sps)%sFaceJ, &
+                    flowDomsd(nn, level, sps)%sFaceK, &
+                    flowDomsd(nn, level, sps)%w, &
+                    flowDomsd(nn, level, sps)%dw, &
+                    flowDomsd(nn, level, sps)%fw, &
+                    flowDomsd(nn, level, sps)%scratch, &
+                    flowDomsd(nn, level, sps)%p, &
+                    flowDomsd(nn, level, sps)%gamma, &
+                    flowDomsd(nn, level, sps)%aa, &
+                    flowDomsd(nn, level, sps)%rlv, &
+                    flowDomsd(nn, level, sps)%rev, &
+                    flowDomsd(nn, level, sps)%dtl, &
+                    flowDomsd(nn, level, sps)%radI, &
+                    flowDomsd(nn, level, sps)%radJ, &
+                    flowDomsd(nn, level, sps)%radK, &
+                    flowDomsd(nn, level, sps)%ux, &
+                    flowDomsd(nn, level, sps)%uy, &
+                    flowDomsd(nn, level, sps)%uz, &
+                    flowDomsd(nn, level, sps)%vx, &
+                    flowDomsd(nn, level, sps)%vy, &
+                    flowDomsd(nn, level, sps)%vz, &
+                    flowDomsd(nn, level, sps)%wx, &
+                    flowDomsd(nn, level, sps)%wy, &
+                    flowDomsd(nn, level, sps)%wz, &
+                    flowDomsd(nn, level, sps)%qx, &
+                    flowDomsd(nn, level, sps)%qy, &
+                    flowDomsd(nn, level, sps)%qz, &
+                    flowDomsd(nn, level, sps)%bmti1, &
+                    flowDomsd(nn, level, sps)%bmti2, &
+                    flowDomsd(nn, level, sps)%bmtj1, &
+                    flowDomsd(nn, level, sps)%bmtj2, &
+                    flowDomsd(nn, level, sps)%bmtk1, &
+                    flowDomsd(nn, level, sps)%bmtk2, &
+                    flowDomsd(nn, level, sps)%bvti1, &
+                    flowDomsd(nn, level, sps)%bvti2, &
+                    flowDomsd(nn, level, sps)%bvtj1, &
+                    flowDomsd(nn, level, sps)%bvtj2, &
+                    flowDomsd(nn, level, sps)%bvtk1, &
+                    flowDomsd(nn, level, sps)%bvtk2, &
+                    flowDomsd(nn, level, sps)%d2Wall, &
+                    stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Deallocate allocated boundary data
+                do mm = 1, flowDoms(nn, level, sps)%nBocos
+                    deallocate ( &
+                        flowDomsd(nn, level, sps)%BCData(mm)%norm, &
+                        flowDomsd(nn, level, sps)%BCData(mm)%rface, &
+                        flowDomsd(nn, level, sps)%BCData(mm)%Fp, &
+                        flowDomsd(nn, level, sps)%BCData(mm)%Fv, &
+                        flowDomsd(nn, level, sps)%BCData(mm)%Tp, &
+                        flowDomsd(nn, level, sps)%BCData(mm)%Tv, &
+                        flowDomsd(nn, level, sps)%BCData(mm)%F, &
+                        flowDomsd(nn, level, sps)%BCData(mm)%T, &
+                        flowDomsd(nn, level, sps)%BCData(mm)%area, &
+                        flowDomsd(nn, level, sps)%BCData(mm)%uSlip, &
+                        flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall, &
+                        stat=ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
                 end do
-             end if
-          end if
-       end if
-    end do
-
-    ! Done with the tree and the result vector
-    call kdtree2destroy(mytree)
-    deallocate(results)
-
-  end subroutine pointReduce
-  subroutine releaseMemoryPart1
-    !
-    !       releaseMemoryPart1 releases all the memory on the coarser
-    !       grids of flowDoms and the fine grid memory which is not needed
-    !       for the possible interpolation of the spectral solution.
-    !
-
-    ! This is a free-for-all on the imports. Oh well.
-    use block
-    use inputIteration
-    use inputTimeSpectral
-    use inputPhysics
-    use inputUnsteady
-    use monitor
-    use cgnsGrid
-    use communication
-    use iteration
-    use cgnsGrid
-    use section
-    use wallDistanceData
-    use adjointVars
-    use ADJointPETSc
-    use surfaceFamilies
-    implicit none
-    !
-    !      Local variables
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: sps, nLevels, level, nn, l, i, j
-
-    ! Determine the number of grid levels present in flowDoms.
-
-    nLevels = ubound(flowDoms,2)
-
-    ! Loop over the number of spectral solutions.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-
-       ! Loop over the coarser grid levels and local blocks and
-       ! deallocate all the memory.
-
-       do level=2,nLevels
-          do nn=1,nDom
-             call deallocateBlock(nn, level, sps)
-          enddo
-       enddo
-
-       ! Release some memory of the fine grid, which is not needed
-       ! anymore.
-
-       do nn=1,nDom
-          ! Modified by HDN
-          ! Added dwALE, fwALE
-          deallocate( &
-               flowDoms(nn,1,sps)%dw,    flowDoms(nn,1,sps)%fw,    &
-               flowDoms(nn,1,sps)%dtl,   flowDoms(nn,1,sps)%radI,  &
-               flowDoms(nn,1,sps)%radJ,  flowDoms(nn,1,sps)%radK,  &
-               flowDoms(nn,1,sps)%shockSensor, &
-               stat=ierr)
-          if(ierr /= 0)                          &
-               call terminate("releaseMemoryPart1", &
-               "Deallocation error for dw, fw, dwALE, fwALE, dtl and &
-               &spectral radii.")
-
-          ! Extra variables for ALE
-          if (equationMode == unSteady .and. useALE) then
-             deallocate( &
-                  flowDoms(nn,1,sps)%dwALE, &
-                  flowDoms(nn,1,sps)%fwALE, &
-                  stat=ierr)
-             if(ierr /= 0)                              &
-                  call terminate("releaseMemoryPart1", &
-                  "Deallocation error for dwALE, fwALE.")
-           end if
-
-          ! Nullify the pointers, such that no attempt is made to
-          ! release the memory again.
-
-          nullify(flowDoms(nn,1,sps)%dw)
-          nullify(flowDoms(nn,1,sps)%fw)
-          nullify(flowDoms(nn,1,sps)%dwALE) ! Added by HDN
-          nullify(flowDoms(nn,1,sps)%fwALE) ! Added by HDN
-          nullify(flowDoms(nn,1,sps)%dtl)
-          nullify(flowDoms(nn,1,sps)%radI)
-          nullify(flowDoms(nn,1,sps)%radJ)
-          nullify(flowDoms(nn,1,sps)%radK)
-          nullify(flowDoms(nn,1,sps)%scratch)
-          nullify(flowDoms(nn,1,sps)%shockSensor)
-          ! Check if the zeroth stage runge kutta memory has been
-          ! allocated. If so deallocate it and nullify the pointers.
-
-          if(smoother == RungeKutta) then
-
-             deallocate(flowDoms(nn,1,sps)%wn, flowDoms(nn,1,sps)%pn, &
-                  stat=ierr)
-             if(ierr /= 0)                          &
-                  call terminate("releaseMemoryPart1", &
-                  "Deallocation error for wn and pn")
-
-             nullify(flowDoms(nn,1,sps)%wn)
-             nullify(flowDoms(nn,1,sps)%pn)
-
-          endif
-
-          ! Release the memory of the old residuals for the time
-          ! accurate Runge-Kutta schemes.
-
-          if(equationMode          == unsteady .and. &
-               timeIntegrationScheme == explicitRK) then
-
-             deallocate(flowDoms(nn,1,sps)%dwOldRK, stat=ierr)
-             if(ierr /= 0)                          &
-                  call terminate("releaseMemoryPart1", &
-                  "Deallocation error for dwOldRK,")
-
-             nullify(flowDoms(nn,1,sps)%dwOldRK)
-          endif
-
-       enddo
-
-    enddo spectralLoop
-
-    ! derivative values
-    if (derivVarsAllocated) then
-       call deallocDerivativeValues(1)
-    end if
-
-    ! Bunch of extra sutff that hasn't been deallocated
-    if (allocated(cycleStrategy)) then
-       deallocate(cycleStrategy)
-    end if
-
-    if (allocated(monNames)) then
-       deallocate(monNames)
-    end if
-
-    if (allocated(monLoc)) then
-       deallocate(monLoc)
-    end if
-
-    if (allocated(monGlob)) then
-       deallocate(monGlob)
-    end if
-
-    if (allocated(monRef)) then
-       deallocate(monRef)
-    end if
-
-    if (allocated(cgnsFamilies)) then
-       deallocate(cgnsFamilies)
-    end if
-
-    if (allocated(cgnsDomsd)) then
-       deallocate(cgnsDomsd)
-    end if
-    ! deallocate(famIDsDomainInterfaces, &
-    !      bcIDsDomainInterfaces,  &
-    !      famIDsSliding)
-    if (allocated(sections)) then
-       deallocate(sections)
-    end if
-
-    if (allocated(BCFamExchange)) then
-      do j=1, size(BCFamExchange, 2)
-         do i=1, size(BCFamExchange, 1)
-            call destroyFamilyExchange(BCFamExchange(i,j))
-         end do
-      end do
-      deallocate(BCFamExchange)
-    end if
-
-    if (allocated(nCellGlobal)) then
-       deallocate(nCellGlobal)
-    end if
-
-
-   ! Now deallocate the containers and communication objects.
-    if (allocated(commPatternCell_1st)) then
-      do l=1,nLevels
-         call deallocateCommType(commPatternCell_1st(l))
-      end do
-      deallocate(commPatternCell_1st)
-    end if
-    if (allocated(commPatternCell_2nd)) then
-      do l=1,nLevels
-         call deallocateCommType(commPatternCell_2nd(l))
-      end do
-      deallocate(commPatternCell_2nd)
-    end if
-    if (allocated(commPatternNode_1st)) then
-      do l=1,nLevels
-         call deallocateCommType(commPatternNode_1st(l))
-      end do
-      deallocate(commPatternNode_1st)
-    end if
-    if (allocated(internalCell_1st)) then
-      do l=1,nLevels
-         call deallocateInternalCommType(internalCell_1st(l))
-      end do
-      deallocate(internalCell_1st)
-    end if
-    if (allocated(internalCell_2nd)) then
-      do l=1,nLevels
-         call deallocateInternalCommType(internalCell_2nd(l))
-      end do
-      deallocate(internalCell_2nd)
-    end if
-    if (allocated(internalNode_1st)) then
-      do l=1,nLevels
-         call deallocateInternalCommType(internalNode_1st(l))
-      end do
-      deallocate(internalNode_1st)
-    end if
-
-    ! Send/recv buffer
-    if (allocated(sendBuffer)) then
-       deallocate(sendBuffer)
-    end if
-
-    if (allocated(recvBuffer)) then
-       deallocate(recvBuffer)
-    end if
-
-    ! massFlow stuff from setFamilyInfoFaces.f90
-    if (allocated(massFlowFamilyInv)) then
-       deallocate(massFlowFamilyInv)
-    end if
-    if (allocated(massFlowFamilyDiss)) then
-       deallocate(massFlowFamilyDiss)
-    end if
-
-  end subroutine releaseMemoryPart1
-
-  subroutine deallocateCommType(comm)
-    use communication
-    implicit none
-    integer(kind=intType) :: ierr, i
-
-    type(commType) :: comm
-    ! Deallocate memory in comm
-
-    ! Deallocate the sendLists
-    do i=1, comm%nProcSend
-       deallocate(comm%sendList(i)%block, stat=ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       deallocate(comm%sendList(i)%indices, stat=ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       deallocate(comm%sendList(i)%interp, stat=ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    end do
-
-    ! Deallocate the recvLists
-    do i=1, comm%nProcRecv
-       deallocate(comm%recvList(i)%block, stat=ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       deallocate(comm%recvList(i)%indices, stat=ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    end do
-
-    deallocate(comm%sendProc, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%nsend, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%nsendcum, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%sendlist, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%recvProc, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%nrecv, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%nrecvcum, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%recvlist, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%indexsendproc, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%indexrecvproc, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    if (comm%nPeriodic > 0) then
-       do i=1,comm%nPeriodic
-          deallocate(comm%periodicData(i)%block, stat=ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-          deallocate(comm%periodicData(i)%indices)
-          call EChk(ierr, __FILE__, __LINE__)
-       end do
-
-       deallocate(comm%periodicData, stat=ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    end if
-
-  end subroutine deallocateCommType
-
-  subroutine deallocateInternalCommType(comm)
-    use communication
-    implicit none
-    integer(kind=intType) :: ierr, i
-
-    type(internalCommType) :: comm
-    ! Deallocate memory in comm
-    deallocate(comm%donorBlock, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%donorIndices, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%donorInterp, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%haloBlock, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    deallocate(comm%haloIndices, stat=ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    if (comm%nPeriodic > 0) then
-       do i=1,comm%nPeriodic
-          deallocate(comm%periodicData(i)%block, stat=ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-          deallocate(comm%periodicData(i)%indices)
-          call EChk(ierr, __FILE__, __LINE__)
-       end do
-       deallocate(comm%periodicData, stat=ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    end if
-
-  end subroutine deallocateInternalCommType
-
-  subroutine deallocDerivativeValues(level)
-
-    use constants
-    use block, only : flowDomsd, flowDoms, nDom
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use wallDistanceData, only : xSurfVec, xSurfVecd
-    use flowVarRefState, only : winfd
-    use inputPhysics, only : wallDistanceNeeded, useRoughSA
-    use adjointVars, only : derivVarsAllocated
-    use BCPointers_b
-
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType) :: level
-
-    ! Local variables
-    integer(kind=intType) :: nn, sps, stat, mm, ierr
-
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-
-          deallocate(&
-               flowDomsd(nn, level, sps)%x, &
-               flowDomsd(nn, level, sps)%si, &
-               flowDomsd(nn, level, sps)%sj, &
-               flowDomsd(nn, level, sps)%sk, &
-               flowDomsd(nn, level, sps)%vol, &
-               flowDomsd(nn, level, sps)%rotMatrixI, &
-               flowDomsd(nn, level, sps)%rotMatrixJ, &
-               flowDomsd(nn, level, sps)%rotMatrixK, &
-               flowDomsd(nn, level, sps)%s, &
-               flowDomsd(nn, level, sps)%sFaceI, &
-               flowDomsd(nn, level, sps)%sFaceJ, &
-               flowDomsd(nn, level, sps)%sFaceK, &
-               flowDomsd(nn, level, sps)%w, &
-               flowDomsd(nn, level, sps)%dw, &
-               flowDomsd(nn, level, sps)%fw, &
-               flowDomsd(nn, level, sps)%scratch, &
-               flowDomsd(nn, level, sps)%p, &
-               flowDomsd(nn, level, sps)%gamma, &
-               flowDomsd(nn, level, sps)%aa, &
-               flowDomsd(nn, level, sps)%rlv, &
-               flowDomsd(nn, level, sps)%rev, &
-               flowDomsd(nn, level, sps)%dtl, &
-               flowDomsd(nn, level, sps)%radI, &
-               flowDomsd(nn, level, sps)%radJ, &
-               flowDomsd(nn, level, sps)%radK, &
-               flowDomsd(nn, level, sps)%ux, &
-               flowDomsd(nn, level, sps)%uy, &
-               flowDomsd(nn, level, sps)%uz, &
-               flowDomsd(nn, level, sps)%vx, &
-               flowDomsd(nn, level, sps)%vy, &
-               flowDomsd(nn, level, sps)%vz, &
-               flowDomsd(nn, level, sps)%wx, &
-               flowDomsd(nn, level, sps)%wy, &
-               flowDomsd(nn, level, sps)%wz, &
-               flowDomsd(nn, level, sps)%qx, &
-               flowDomsd(nn, level, sps)%qy, &
-               flowDomsd(nn, level, sps)%qz, &
-               flowDomsd(nn, level, sps)%bmti1,&
-               flowDomsd(nn, level, sps)%bmti2,&
-               flowDomsd(nn, level, sps)%bmtj1,&
-               flowDomsd(nn, level, sps)%bmtj2,&
-               flowDomsd(nn, level, sps)%bmtk1,&
-               flowDomsd(nn, level, sps)%bmtk2,&
-               flowDomsd(nn, level, sps)%bvti1,&
-               flowDomsd(nn, level, sps)%bvti2,&
-               flowDomsd(nn, level, sps)%bvtj1,&
-               flowDomsd(nn, level, sps)%bvtj2,&
-               flowDomsd(nn, level, sps)%bvtk1,&
-               flowDomsd(nn, level, sps)%bvtk2,&
-               flowDomsd(nn, level, sps)%d2Wall, &
-               stat=ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Deallocate allocated boundary data
-          do mm=1, flowDoms(nn, level, sps)%nBocos
-             deallocate(&
-                  flowDomsd(nn, level, sps)%BCData(mm)%norm, &
-                  flowDomsd(nn, level, sps)%BCData(mm)%rface, &
-                  flowDomsd(nn, level, sps)%BCData(mm)%Fp, &
-                  flowDomsd(nn, level, sps)%BCData(mm)%Fv, &
-                  flowDomsd(nn, level, sps)%BCData(mm)%Tp, &
-                  flowDomsd(nn, level, sps)%BCData(mm)%Tv, &
-                  flowDomsd(nn, level, sps)%BCData(mm)%F, &
-                  flowDomsd(nn, level, sps)%BCData(mm)%T, &
-                  flowDomsd(nn, level, sps)%BCData(mm)%area, &
-                  flowDomsd(nn, level, sps)%BCData(mm)%uSlip, &
-                  flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall, &
-                  stat=ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-          enddo
-
-          deallocate(flowDomsd(nn, level, sps)%BCData, stat=ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          viscbocoLoop: do mm=1, flowDoms(nn, level, sps)%nViscBocos
-             deallocate(&
-                  flowDomsd(nn, level, sps)%viscSubface(mm)%tau, &
-                  flowDomsd(nn, level, sps)%viscSubface(mm)%q, &
-                  stat=ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-          end do viscbocoLoop
-
-          deallocate(flowDomsd(nn, level, sps)%viscSubFace, stat=ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-       end do
-    end do
-
-    ! Also dealloc winfd
-    deallocate(winfd)
-
-    ! Finally deallocate flowdomsd
-    deallocate(flowdomsd, stat=ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! And the petsc vector(s)
-    if (.not. wallDistanceNeeded) then
-       do sps=1, nTimeIntervalsSpectral
-          call VecDestroy(xSurfVec(1, sps), ierr)
-       end do
-    end if
-
-    do sps=1, nTimeIntervalsSpectral
-       call VecDestroy(xSurfVecd(sps), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    end do
-    deallocate(xSurfVecd)
-
-    derivVarsAllocated = .False.
-  end subroutine deallocDerivativeValues
-
-  !      ---------------------------------------------------------------------------
-
-  subroutine releaseMemoryPart2
-    !
-    !       releaseMemoryPart2 releases all the memory of flowDoms on the
-    !       finest grid as well as the memory allocated in the other
-    !       modules.
-    !
-    use block
-    use inputTimeSpectral
-    use inputPhysics, only : cpmin_family
-    use ADjointPETSc
-    use cgnsGrid
-    implicit none
-    !
-    !      Local variables
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, sps
-
-    ! Release the memory of flowDoms of the finest grid and of the
-    ! array flowDoms afterwards.
-    if (allocated(flowDoms)) then
-      do sps=1,nTimeIntervalsSpectral
-         do nn=1,nDom
-            call deallocateBlock(nn, 1_intType, sps)
-         enddo
-      enddo
-      deallocate(flowDoms, stat=ierr)
-      if(ierr /= 0)                          &
-           call terminate("releaseMemoryPart2", &
-           "Deallocation failure for flowDoms")
-    end if
-
-    ! Some more memory should be deallocated if this code is to
-    ! be used in combination with adaptation.
-
-    ! deallocate the cpmin_family array allocated in inputParamRoutines
-    if (allocated(cpmin_family)) &
-        deallocate(cpmin_family)
-
-    ! Destroy variables allocated in preprocessingAdjoint
-    if (adjointPETScPreProcVarsAllocated) then
-      call vecDestroy(w_like1,PETScIerr)
-      call EChk(PETScIerr, __FILE__, __LINE__)
-
-      call vecDestroy(w_like2,PETScIerr)
-      call EChk(PETScIerr, __FILE__, __LINE__)
-
-      call vecDestroy(psi_like1,PETScIerr)
-      call EChk(PETScIerr, __FILE__, __LINE__)
-
-      call vecDestroy(psi_like2,PETScIerr)
-      call EChk(PETScIerr, __FILE__, __LINE__)
-
-      call vecDestroy(psi_like3,PETScIerr)
-      call EChk(PETScIerr, __FILE__, __LINE__)
-
-      call vecDestroy(x_like,PETScIerr)
-      call EChk(PETScIerr, __FILE__, __LINE__)
-    end if
-
-    ! Finally delete cgnsDoms...but there is still more
-    ! pointers that need to be deallocated...
-    if (allocated(cgnsDoms)) then
-      do nn=1,cgnsNDom
-         if (associated(cgnsDoms(nn)%procStored)) &
-               deallocate(cgnsDoms(nn)%procStored)
-
-         if (associated(cgnsDoms(nn)%conn1to1)) &
-               deallocate(cgnsDoms(nn)%conn1to1)
-
-         if (associated(cgnsDoms(nn)%connNonMatchAbutting)) &
-               deallocate(cgnsDoms(nn)%connNonMatchAbutting)
-
-         if (associated(cgnsDoms(nn)%bocoInfo)) &
-               deallocate(cgnsDoms(nn)%bocoInfo)
-
-         deallocate(&
-               cgnsDoms(nn)%iBegOr, cgnsDoms(nn)%iEndOr, &
-               cgnsDoms(nn)%jBegOr, cgnsDoms(nn)%jEndOr, &
-               cgnsDoms(nn)%kBegOr, cgnsDoms(nn)%kEndOr, &
-               cgnsDoms(nn)%localBlockID)
-      end do
-   end if
-
-  end subroutine releaseMemoryPart2
-
-  subroutine deallocateBlock(nn, level, sps)
-    !
-    !       deallocateBlock deallocates all the allocated memory of the
-    !       given block.
-    !
-    use constants
-    use inputUnsteady
-    use inputPhysics
-    use iteration
-    use block, only : viscSubfaceType, BCDataType, flowDoms
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn, level, sps
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i
-
-    type(viscSubfaceType), dimension(:), pointer :: viscSubface
-    type(BCDataType),      dimension(:), pointer :: BCData
-
-    logical :: deallocationFailure
-
-    ! Initialize deallocationFailure to .false.
-
-    deallocationFailure = .false.
-
-    ! Set the pointer for viscSubface and deallocate the memory
-    ! stored in there. Initialize ierr to 0, such that the terminate
-    ! routine is only called at the end if a memory deallocation
-    ! failure occurs.
-    ierr = 0
-    viscSubface => flowDoms(nn,level,sps)%viscSubface
-    do i=1,flowDoms(nn,level,sps)%nViscBocos
-       deallocate(viscSubface(i)%tau,  viscSubface(i)%q, &
-            viscSubface(i)%utau, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       nullify(viscSubface(i)%tau)
-       nullify(viscSubface(i)%q)
-       nullify(viscSubface(i)%utau)
-    enddo
-
-    ! Set the pointer for BCData and deallocate the memory
-    ! stored in there.
-    BCData => flowDoms(nn,level,sps)%BCData
-    do i=1,flowDoms(nn,level,sps)%nBocos
-
-       if( associated(BCData(i)%norm) ) &
-            deallocate(BCData(i)%norm, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%area) ) &
-            deallocate(BCData(i)%area, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%surfIndex) ) &
-            deallocate(BCData(i)%surfIndex, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%F) ) &
-            deallocate(BCData(i)%F, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%Fv) ) &
-            deallocate(BCData(i)%Fv, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%Fp) ) &
-            deallocate(BCData(i)%Fp, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%T) ) &
-            deallocate(BCData(i)%T, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%Tv) ) &
-            deallocate(BCData(i)%Tv, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%Tp) ) &
-            deallocate(BCData(i)%Tp, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%rface) ) &
-            deallocate(BCData(i)%rface, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%uSlip) ) &
-            deallocate(BCData(i)%uSlip, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%TNS_Wall) ) &
-            deallocate(BCData(i)%TNS_Wall, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%ksNS_Wall) ) &
-            deallocate(BCData(i)%ksNS_Wall, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%ptInlet) ) &
-            deallocate(BCData(i)%ptInlet, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%ttInlet) ) &
-            deallocate(BCData(i)%ttInlet, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%htInlet) ) &
-            deallocate(BCData(i)%htInlet, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%flowXdirInlet) ) &
-            deallocate(BCData(i)%flowXdirInlet, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%flowYdirInlet) ) &
-            deallocate(BCData(i)%flowYdirInlet, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%flowZdirInlet) ) &
-            deallocate(BCData(i)%flowZdirInlet, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%rho) ) &
-            deallocate(BCData(i)%rho, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%velx) ) &
-            deallocate(BCData(i)%velx, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%vely) ) &
-            deallocate(BCData(i)%vely, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%velz) ) &
-            deallocate(BCData(i)%velz, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%ps) ) &
-            deallocate(BCData(i)%ps, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%turbInlet) ) &
-            deallocate(BCData(i)%turbInlet, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%normALE) ) &
-            deallocate(BCData(i)%normALE, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-       if( associated(BCData(i)%rFaceALE) ) &
-            deallocate(BCData(i)%rFaceALE, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-       if( associated(BCData(i)%uSlipALE) ) &
-            deallocate(BCData(i)%uSlipALE, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-       if( associated(BCData(i)%cellHeatFlux) ) &
-            deallocate(BCData(i)%cellHeatFlux, stat=ierr)
-       if( associated(BCData(i)%nodeHeatFlux) ) &
-            deallocate(BCData(i)%nodeHeatFlux, stat=ierr)
-       if(ierr /= 0) deallocationFailure = .true.
-
-       if( associated(BCData(i)%iBlank) ) &
-            deallocate(BCData(i)%iBlank, stat=ierr)
-
-       if(ierr /= 0) deallocationFailure = .true.
-
-       nullify(BCData(i)%norm)
-       nullify(BCData(i)%rface)
-       nullify(BCData(i)%F)
-       nullify(BCData(i)%Fv)
-       nullify(BCData(i)%Fp)
-       nullify(BCData(i)%T)
-       nullify(BCData(i)%Tv)
-       nullify(BCData(i)%Tp)
-
-       nullify(BCData(i)%uSlip)
-       nullify(BCData(i)%TNS_Wall)
-       nullify(BCData(i)%ksNS_Wall)
-
-       nullify(BCData(i)%normALE)
-       nullify(BCData(i)%rfaceALE)
-       nullify(BCData(i)%uSlipALE)
-       nullify(BCData(i)%cellHeatFlux)
-       nullify(BCData(i)%nodeHeatFlux)
-
-       nullify(BCData(i)%ptInlet)
-       nullify(BCData(i)%ttInlet)
-       nullify(BCData(i)%htInlet)
-       nullify(BCData(i)%flowXdirInlet)
-       nullify(BCData(i)%flowYdirInlet)
-       nullify(BCData(i)%flowZdirInlet)
-
-       nullify(BCData(i)%turbInlet)
-
-       nullify(BCData(i)%rho)
-       nullify(BCData(i)%velx)
-       nullify(BCData(i)%vely)
-       nullify(BCData(i)%velz)
-       nullify(BCData(i)%ps)
-       nullify(BCData(i)%iblank)
-
-    enddo
-
-    if( associated(flowDoms(nn,level,sps)%BCType) ) &
-         deallocate(flowDoms(nn,level,sps)%BCType, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%BCFaceID) ) &
-         deallocate(flowDoms(nn,level,sps)%BCFaceID, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%cgnsSubface) ) &
-         deallocate(flowDoms(nn,level,sps)%cgnsSubface, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%inBeg) ) &
-         deallocate(flowDoms(nn,level,sps)%inBeg, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%inEnd) ) &
-         deallocate(flowDoms(nn,level,sps)%inEnd, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%jnBeg) ) &
-         deallocate(flowDoms(nn,level,sps)%jnBeg, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%jnEnd) ) &
-         deallocate(flowDoms(nn,level,sps)%jnEnd, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%knBeg) ) &
-         deallocate(flowDoms(nn,level,sps)%knBeg, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%knEnd) ) &
-         deallocate(flowDoms(nn,level,sps)%knEnd, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%dinBeg) ) &
-         deallocate(flowDoms(nn,level,sps)%dinBeg, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%dinEnd) ) &
-         deallocate(flowDoms(nn,level,sps)%dinEnd, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%djnBeg) ) &
-         deallocate(flowDoms(nn,level,sps)%djnBeg, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%djnEnd) ) &
-         deallocate(flowDoms(nn,level,sps)%djnEnd, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%dknBeg) ) &
-         deallocate(flowDoms(nn,level,sps)%dknBeg, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%dknEnd) ) &
-         deallocate(flowDoms(nn,level,sps)%dknEnd, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%icBeg) ) &
-         deallocate(flowDoms(nn,level,sps)%icBeg, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%icEnd) ) &
-         deallocate(flowDoms(nn,level,sps)%icEnd, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%jcBeg) ) &
-         deallocate(flowDoms(nn,level,sps)%jcBeg, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%jcEnd) ) &
-         deallocate(flowDoms(nn,level,sps)%jcEnd, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%kcBeg) ) &
-         deallocate(flowDoms(nn,level,sps)%kcBeg, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%kcEnd) ) &
-         deallocate(flowDoms(nn,level,sps)%kcEnd, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%neighBlock) ) &
-         deallocate(flowDoms(nn,level,sps)%neighBlock, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%neighProc) ) &
-         deallocate(flowDoms(nn,level,sps)%neighProc, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%l1) ) &
-         deallocate(flowDoms(nn,level,sps)%l1, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
-
-    if( associated(flowDoms(nn,level,sps)%l2) ) &
-         deallocate(flowDoms(nn,level,sps)%l2, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%l3) ) &
-         deallocate(flowDoms(nn,level,sps)%l3, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+                deallocate (flowDomsd(nn, level, sps)%BCData, stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                viscbocoLoop: do mm = 1, flowDoms(nn, level, sps)%nViscBocos
+                    deallocate ( &
+                        flowDomsd(nn, level, sps)%viscSubface(mm)%tau, &
+                        flowDomsd(nn, level, sps)%viscSubface(mm)%q, &
+                        stat=ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end do viscbocoLoop
+
+                deallocate (flowDomsd(nn, level, sps)%viscSubFace, stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+            end do
+        end do
+
+        ! Also dealloc winfd
+        deallocate (winfd)
+
+        ! Finally deallocate flowdomsd
+        deallocate (flowdomsd, stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! And the petsc vector(s)
+        if (.not. wallDistanceNeeded) then
+            do sps = 1, nTimeIntervalsSpectral
+                call VecDestroy(xSurfVec(1, sps), ierr)
+            end do
+        end if
+
+        do sps = 1, nTimeIntervalsSpectral
+            call VecDestroy(xSurfVecd(sps), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
+        deallocate (xSurfVecd)
+
+        derivVarsAllocated = .False.
+    end subroutine deallocDerivativeValues
+
+    !      ---------------------------------------------------------------------------
+
+    subroutine releaseMemoryPart2
+        !
+        !       releaseMemoryPart2 releases all the memory of flowDoms on the
+        !       finest grid as well as the memory allocated in the other
+        !       modules.
+        !
+        use block
+        use inputTimeSpectral
+        use inputPhysics, only: cpmin_family
+        use ADjointPETSc
+        use cgnsGrid
+        implicit none
+        !
+        !      Local variables
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, sps
+
+        ! Release the memory of flowDoms of the finest grid and of the
+        ! array flowDoms afterwards.
+        if (allocated(flowDoms)) then
+            do sps = 1, nTimeIntervalsSpectral
+                do nn = 1, nDom
+                    call deallocateBlock(nn, 1_intType, sps)
+                end do
+            end do
+            deallocate (flowDoms, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("releaseMemoryPart2", &
+                               "Deallocation failure for flowDoms")
+        end if
+
+        ! Some more memory should be deallocated if this code is to
+        ! be used in combination with adaptation.
+
+        ! deallocate the cpmin_family array allocated in inputParamRoutines
+        if (allocated(cpmin_family)) &
+            deallocate (cpmin_family)
+
+        ! Destroy variables allocated in preprocessingAdjoint
+        if (adjointPETScPreProcVarsAllocated) then
+            call vecDestroy(w_like1, PETScIerr)
+            call EChk(PETScIerr, __FILE__, __LINE__)
+
+            call vecDestroy(w_like2, PETScIerr)
+            call EChk(PETScIerr, __FILE__, __LINE__)
+
+            call vecDestroy(psi_like1, PETScIerr)
+            call EChk(PETScIerr, __FILE__, __LINE__)
+
+            call vecDestroy(psi_like2, PETScIerr)
+            call EChk(PETScIerr, __FILE__, __LINE__)
+
+            call vecDestroy(psi_like3, PETScIerr)
+            call EChk(PETScIerr, __FILE__, __LINE__)
+
+            call vecDestroy(x_like, PETScIerr)
+            call EChk(PETScIerr, __FILE__, __LINE__)
+        end if
+
+        ! Finally delete cgnsDoms...but there is still more
+        ! pointers that need to be deallocated...
+        if (allocated(cgnsDoms)) then
+            do nn = 1, cgnsNDom
+                if (associated(cgnsDoms(nn)%procStored)) &
+                    deallocate (cgnsDoms(nn)%procStored)
+
+                if (associated(cgnsDoms(nn)%conn1to1)) &
+                    deallocate (cgnsDoms(nn)%conn1to1)
+
+                if (associated(cgnsDoms(nn)%connNonMatchAbutting)) &
+                    deallocate (cgnsDoms(nn)%connNonMatchAbutting)
+
+                if (associated(cgnsDoms(nn)%bocoInfo)) &
+                    deallocate (cgnsDoms(nn)%bocoInfo)
+
+                deallocate ( &
+                    cgnsDoms(nn)%iBegOr, cgnsDoms(nn)%iEndOr, &
+                    cgnsDoms(nn)%jBegOr, cgnsDoms(nn)%jEndOr, &
+                    cgnsDoms(nn)%kBegOr, cgnsDoms(nn)%kEndOr, &
+                    cgnsDoms(nn)%localBlockID)
+            end do
+        end if
+
+    end subroutine releaseMemoryPart2
+
+    subroutine deallocateBlock(nn, level, sps)
+        !
+        !       deallocateBlock deallocates all the allocated memory of the
+        !       given block.
+        !
+        use constants
+        use inputUnsteady
+        use inputPhysics
+        use iteration
+        use block, only: viscSubfaceType, BCDataType, flowDoms
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn, level, sps
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i
+
+        type(viscSubfaceType), dimension(:), pointer :: viscSubface
+        type(BCDataType), dimension(:), pointer :: BCData
+
+        logical :: deallocationFailure
+
+        ! Initialize deallocationFailure to .false.
+
+        deallocationFailure = .false.
+
+        ! Set the pointer for viscSubface and deallocate the memory
+        ! stored in there. Initialize ierr to 0, such that the terminate
+        ! routine is only called at the end if a memory deallocation
+        ! failure occurs.
+        ierr = 0
+        viscSubface => flowDoms(nn, level, sps)%viscSubface
+        do i = 1, flowDoms(nn, level, sps)%nViscBocos
+            deallocate (viscSubface(i)%tau, viscSubface(i)%q, &
+                        viscSubface(i)%utau, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            nullify (viscSubface(i)%tau)
+            nullify (viscSubface(i)%q)
+            nullify (viscSubface(i)%utau)
+        end do
+
+        ! Set the pointer for BCData and deallocate the memory
+        ! stored in there.
+        BCData => flowDoms(nn, level, sps)%BCData
+        do i = 1, flowDoms(nn, level, sps)%nBocos
+
+            if (associated(BCData(i)%norm)) &
+                deallocate (BCData(i)%norm, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%area)) &
+                deallocate (BCData(i)%area, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%surfIndex)) &
+                deallocate (BCData(i)%surfIndex, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%F)) &
+                deallocate (BCData(i)%F, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%Fv)) &
+                deallocate (BCData(i)%Fv, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%Fp)) &
+                deallocate (BCData(i)%Fp, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%T)) &
+                deallocate (BCData(i)%T, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%Tv)) &
+                deallocate (BCData(i)%Tv, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%Tp)) &
+                deallocate (BCData(i)%Tp, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%rface)) &
+                deallocate (BCData(i)%rface, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%uSlip)) &
+                deallocate (BCData(i)%uSlip, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%TNS_Wall)) &
+                deallocate (BCData(i)%TNS_Wall, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%ksNS_Wall)) &
+                deallocate (BCData(i)%ksNS_Wall, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%ptInlet)) &
+                deallocate (BCData(i)%ptInlet, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%ttInlet)) &
+                deallocate (BCData(i)%ttInlet, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%htInlet)) &
+                deallocate (BCData(i)%htInlet, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%flowXdirInlet)) &
+                deallocate (BCData(i)%flowXdirInlet, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%flowYdirInlet)) &
+                deallocate (BCData(i)%flowYdirInlet, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%flowZdirInlet)) &
+                deallocate (BCData(i)%flowZdirInlet, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%rho)) &
+                deallocate (BCData(i)%rho, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%velx)) &
+                deallocate (BCData(i)%velx, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%vely)) &
+                deallocate (BCData(i)%vely, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%velz)) &
+                deallocate (BCData(i)%velz, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%ps)) &
+                deallocate (BCData(i)%ps, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%turbInlet)) &
+                deallocate (BCData(i)%turbInlet, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%normALE)) &
+                deallocate (BCData(i)%normALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+            if (associated(BCData(i)%rFaceALE)) &
+                deallocate (BCData(i)%rFaceALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+            if (associated(BCData(i)%uSlipALE)) &
+                deallocate (BCData(i)%uSlipALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+            if (associated(BCData(i)%cellHeatFlux)) &
+                deallocate (BCData(i)%cellHeatFlux, stat=ierr)
+            if (associated(BCData(i)%nodeHeatFlux)) &
+                deallocate (BCData(i)%nodeHeatFlux, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
+
+            if (associated(BCData(i)%iBlank)) &
+                deallocate (BCData(i)%iBlank, stat=ierr)
+
+            if (ierr /= 0) deallocationFailure = .true.
+
+            nullify (BCData(i)%norm)
+            nullify (BCData(i)%rface)
+            nullify (BCData(i)%F)
+            nullify (BCData(i)%Fv)
+            nullify (BCData(i)%Fp)
+            nullify (BCData(i)%T)
+            nullify (BCData(i)%Tv)
+            nullify (BCData(i)%Tp)
+
+            nullify (BCData(i)%uSlip)
+            nullify (BCData(i)%TNS_Wall)
+            nullify (BCData(i)%ksNS_Wall)
+
+            nullify (BCData(i)%normALE)
+            nullify (BCData(i)%rfaceALE)
+            nullify (BCData(i)%uSlipALE)
+            nullify (BCData(i)%cellHeatFlux)
+            nullify (BCData(i)%nodeHeatFlux)
+
+            nullify (BCData(i)%ptInlet)
+            nullify (BCData(i)%ttInlet)
+            nullify (BCData(i)%htInlet)
+            nullify (BCData(i)%flowXdirInlet)
+            nullify (BCData(i)%flowYdirInlet)
+            nullify (BCData(i)%flowZdirInlet)
+
+            nullify (BCData(i)%turbInlet)
+
+            nullify (BCData(i)%rho)
+            nullify (BCData(i)%velx)
+            nullify (BCData(i)%vely)
+            nullify (BCData(i)%velz)
+            nullify (BCData(i)%ps)
+            nullify (BCData(i)%iblank)
+
+        end do
+
+        if (associated(flowDoms(nn, level, sps)%BCType)) &
+            deallocate (flowDoms(nn, level, sps)%BCType, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%BCFaceID)) &
+            deallocate (flowDoms(nn, level, sps)%BCFaceID, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%cgnsSubface)) &
+            deallocate (flowDoms(nn, level, sps)%cgnsSubface, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%inBeg)) &
+            deallocate (flowDoms(nn, level, sps)%inBeg, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%inEnd)) &
+            deallocate (flowDoms(nn, level, sps)%inEnd, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%jnBeg)) &
+            deallocate (flowDoms(nn, level, sps)%jnBeg, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%jnEnd)) &
+            deallocate (flowDoms(nn, level, sps)%jnEnd, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%knBeg)) &
+            deallocate (flowDoms(nn, level, sps)%knBeg, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%knEnd)) &
+            deallocate (flowDoms(nn, level, sps)%knEnd, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%dinBeg)) &
+            deallocate (flowDoms(nn, level, sps)%dinBeg, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%dinEnd)) &
+            deallocate (flowDoms(nn, level, sps)%dinEnd, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%djnBeg)) &
+            deallocate (flowDoms(nn, level, sps)%djnBeg, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%djnEnd)) &
+            deallocate (flowDoms(nn, level, sps)%djnEnd, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%dknBeg)) &
+            deallocate (flowDoms(nn, level, sps)%dknBeg, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%dknEnd)) &
+            deallocate (flowDoms(nn, level, sps)%dknEnd, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%icBeg)) &
+            deallocate (flowDoms(nn, level, sps)%icBeg, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%icEnd)) &
+            deallocate (flowDoms(nn, level, sps)%icEnd, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%jcBeg)) &
+            deallocate (flowDoms(nn, level, sps)%jcBeg, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%groupNum) ) &
-         deallocate(flowDoms(nn,level,sps)%groupNum, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%jcEnd)) &
+            deallocate (flowDoms(nn, level, sps)%jcEnd, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%iblank) ) &
-         deallocate(flowDoms(nn,level,sps)%iblank, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%kcBeg)) &
+            deallocate (flowDoms(nn, level, sps)%kcBeg, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%forcedRecv) ) &
-         deallocate(flowDoms(nn,level,sps)%forcedRecv, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%kcEnd)) &
+            deallocate (flowDoms(nn, level, sps)%kcEnd, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%status) ) &
-         deallocate(flowDoms(nn,level,sps)%status, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%neighBlock)) &
+            deallocate (flowDoms(nn, level, sps)%neighBlock, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%BCData) ) &
-         deallocate(flowDoms(nn,level,sps)%BCData, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%neighProc)) &
+            deallocate (flowDoms(nn, level, sps)%neighProc, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%viscSubface) ) &
-         deallocate(flowDoms(nn,level,sps)%viscSubface, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%l1)) &
+            deallocate (flowDoms(nn, level, sps)%l1, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
+        if (associated(flowDoms(nn, level, sps)%l2)) &
+            deallocate (flowDoms(nn, level, sps)%l2, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%viscIminPointer) ) &
-         deallocate(flowDoms(nn,level,sps)%viscIminPointer, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%l3)) &
+            deallocate (flowDoms(nn, level, sps)%l3, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%viscImaxPointer) ) &
-         deallocate(flowDoms(nn,level,sps)%viscImaxPointer, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%groupNum)) &
+            deallocate (flowDoms(nn, level, sps)%groupNum, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%viscJminPointer) ) &
-         deallocate(flowDoms(nn,level,sps)%viscJminPointer, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%iblank)) &
+            deallocate (flowDoms(nn, level, sps)%iblank, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%viscJmaxPointer) ) &
-         deallocate(flowDoms(nn,level,sps)%viscJmaxPointer, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%forcedRecv)) &
+            deallocate (flowDoms(nn, level, sps)%forcedRecv, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%viscKminPointer) ) &
-         deallocate(flowDoms(nn,level,sps)%viscKminPointer, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%status)) &
+            deallocate (flowDoms(nn, level, sps)%status, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%viscKmaxPointer) ) &
-         deallocate(flowDoms(nn,level,sps)%viscKmaxPointer, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%BCData)) &
+            deallocate (flowDoms(nn, level, sps)%BCData, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%x) ) &
-         deallocate(flowDoms(nn,level,sps)%x, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%viscSubface)) &
+            deallocate (flowDoms(nn, level, sps)%viscSubface, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%xOld) ) &
-         deallocate(flowDoms(nn,level,sps)%xOld, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%viscIminPointer)) &
+            deallocate (flowDoms(nn, level, sps)%viscIminPointer, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%si) ) &
-         deallocate(flowDoms(nn,level,sps)%si, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%viscImaxPointer)) &
+            deallocate (flowDoms(nn, level, sps)%viscImaxPointer, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%sj) ) &
-         deallocate(flowDoms(nn,level,sps)%sj, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%viscJminPointer)) &
+            deallocate (flowDoms(nn, level, sps)%viscJminPointer, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%sk) ) &
-         deallocate(flowDoms(nn,level,sps)%sk, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%viscJmaxPointer)) &
+            deallocate (flowDoms(nn, level, sps)%viscJmaxPointer, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%vol) ) &
-         deallocate(flowDoms(nn,level,sps)%vol, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%viscKminPointer)) &
+            deallocate (flowDoms(nn, level, sps)%viscKminPointer, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%volRef) ) &
-         deallocate(flowDoms(nn,level,sps)%volRef, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%viscKmaxPointer)) &
+            deallocate (flowDoms(nn, level, sps)%viscKmaxPointer, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%volOld) ) &
-         deallocate(flowDoms(nn,level,sps)%volOld, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%x)) &
+            deallocate (flowDoms(nn, level, sps)%x, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%pori) ) &
-         deallocate(flowDoms(nn,level,sps)%pori, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%xOld)) &
+            deallocate (flowDoms(nn, level, sps)%xOld, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%porj) ) &
-         deallocate(flowDoms(nn,level,sps)%porj, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%si)) &
+            deallocate (flowDoms(nn, level, sps)%si, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%pork) ) &
-         deallocate(flowDoms(nn,level,sps)%pork, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%sj)) &
+            deallocate (flowDoms(nn, level, sps)%sj, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%indFamilyI) ) &
-         deallocate(flowDoms(nn,level,sps)%indFamilyI, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%sk)) &
+            deallocate (flowDoms(nn, level, sps)%sk, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%indFamilyJ) ) &
-         deallocate(flowDoms(nn,level,sps)%indFamilyJ, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%vol)) &
+            deallocate (flowDoms(nn, level, sps)%vol, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%indFamilyK) ) &
-         deallocate(flowDoms(nn,level,sps)%indFamilyK, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%volRef)) &
+            deallocate (flowDoms(nn, level, sps)%volRef, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%factFamilyI) ) &
-         deallocate(flowDoms(nn,level,sps)%factFamilyI, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%volOld)) &
+            deallocate (flowDoms(nn, level, sps)%volOld, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%factFamilyJ) ) &
-         deallocate(flowDoms(nn,level,sps)%factFamilyJ, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%pori)) &
+            deallocate (flowDoms(nn, level, sps)%pori, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%factFamilyK) ) &
-         deallocate(flowDoms(nn,level,sps)%factFamilyK, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%porj)) &
+            deallocate (flowDoms(nn, level, sps)%porj, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%rotMatrixI) ) &
-         deallocate(flowDoms(nn,level,sps)%rotMatrixI, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%pork)) &
+            deallocate (flowDoms(nn, level, sps)%pork, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%rotMatrixJ) ) &
-         deallocate(flowDoms(nn,level,sps)%rotMatrixJ, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%indFamilyI)) &
+            deallocate (flowDoms(nn, level, sps)%indFamilyI, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%rotMatrixK) ) &
-         deallocate(flowDoms(nn,level,sps)%rotMatrixK, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%indFamilyJ)) &
+            deallocate (flowDoms(nn, level, sps)%indFamilyJ, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%sFaceI) ) &
-         deallocate(flowDoms(nn,level,sps)%sFaceI, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%indFamilyK)) &
+            deallocate (flowDoms(nn, level, sps)%indFamilyK, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%sFaceJ) ) &
-         deallocate(flowDoms(nn,level,sps)%sFaceJ, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%factFamilyI)) &
+            deallocate (flowDoms(nn, level, sps)%factFamilyI, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%sFaceK) ) &
-         deallocate(flowDoms(nn,level,sps)%sFaceK, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%factFamilyJ)) &
+            deallocate (flowDoms(nn, level, sps)%factFamilyJ, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%w) ) &
-         deallocate(flowDoms(nn,level,sps)%w, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%factFamilyK)) &
+            deallocate (flowDoms(nn, level, sps)%factFamilyK, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%wOld) ) &
-         deallocate(flowDoms(nn,level,sps)%wOld, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%rotMatrixI)) &
+            deallocate (flowDoms(nn, level, sps)%rotMatrixI, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%p) ) &
-         deallocate(flowDoms(nn,level,sps)%p, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%rotMatrixJ)) &
+            deallocate (flowDoms(nn, level, sps)%rotMatrixJ, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%aa) ) &
-         deallocate(flowDoms(nn,level,sps)%aa, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%rotMatrixK)) &
+            deallocate (flowDoms(nn, level, sps)%rotMatrixK, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%gamma) ) &
-         deallocate(flowDoms(nn,level,sps)%gamma, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%sFaceI)) &
+            deallocate (flowDoms(nn, level, sps)%sFaceI, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%ux) ) &
-         deallocate(flowDoms(nn,level,sps)%ux, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%sFaceJ)) &
+            deallocate (flowDoms(nn, level, sps)%sFaceJ, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%uy) ) &
-         deallocate(flowDoms(nn,level,sps)%uy, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%sFaceK)) &
+            deallocate (flowDoms(nn, level, sps)%sFaceK, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%uz) ) &
-         deallocate(flowDoms(nn,level,sps)%uz, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%w)) &
+            deallocate (flowDoms(nn, level, sps)%w, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%vx) ) &
-         deallocate(flowDoms(nn,level,sps)%vx, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%wOld)) &
+            deallocate (flowDoms(nn, level, sps)%wOld, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%vy) ) &
-         deallocate(flowDoms(nn,level,sps)%vy, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%p)) &
+            deallocate (flowDoms(nn, level, sps)%p, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%vz) ) &
-         deallocate(flowDoms(nn,level,sps)%vz, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%aa)) &
+            deallocate (flowDoms(nn, level, sps)%aa, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%wx) ) &
-         deallocate(flowDoms(nn,level,sps)%wx, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%gamma)) &
+            deallocate (flowDoms(nn, level, sps)%gamma, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%wy) ) &
-         deallocate(flowDoms(nn,level,sps)%wy, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%ux)) &
+            deallocate (flowDoms(nn, level, sps)%ux, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%wz) ) &
-         deallocate(flowDoms(nn,level,sps)%wz, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%uy)) &
+            deallocate (flowDoms(nn, level, sps)%uy, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%qx) ) &
-         deallocate(flowDoms(nn,level,sps)%qx, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%uz)) &
+            deallocate (flowDoms(nn, level, sps)%uz, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%qy) ) &
-         deallocate(flowDoms(nn,level,sps)%qy, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%vx)) &
+            deallocate (flowDoms(nn, level, sps)%vx, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%qz) ) &
-         deallocate(flowDoms(nn,level,sps)%qz, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%vy)) &
+            deallocate (flowDoms(nn, level, sps)%vy, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%rlv) ) &
-         deallocate(flowDoms(nn,level,sps)%rlv, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%vz)) &
+            deallocate (flowDoms(nn, level, sps)%vz, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%rev) ) &
-         deallocate(flowDoms(nn,level,sps)%rev, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%wx)) &
+            deallocate (flowDoms(nn, level, sps)%wx, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%s) ) &
-         deallocate(flowDoms(nn,level,sps)%s, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%wy)) &
+            deallocate (flowDoms(nn, level, sps)%wy, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%p1) ) &
-         deallocate(flowDoms(nn,level,sps)%p1, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%wz)) &
+            deallocate (flowDoms(nn, level, sps)%wz, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%dw) ) &
-         deallocate(flowDoms(nn,level,sps)%dw, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%qx)) &
+            deallocate (flowDoms(nn, level, sps)%qx, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%fw) ) &
-         deallocate(flowDoms(nn,level,sps)%fw, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%qy)) &
+            deallocate (flowDoms(nn, level, sps)%qy, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%dwOldRK) ) &
-         deallocate(flowDoms(nn,level,sps)%dwOldRK, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%qz)) &
+            deallocate (flowDoms(nn, level, sps)%qz, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%w1) ) &
-         deallocate(flowDoms(nn,level,sps)%w1, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%rlv)) &
+            deallocate (flowDoms(nn, level, sps)%rlv, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%wr) ) &
-         deallocate(flowDoms(nn,level,sps)%wr, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%rev)) &
+            deallocate (flowDoms(nn, level, sps)%rev, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%mgIFine) ) &
-         deallocate(flowDoms(nn,level,sps)%mgIFine, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%s)) &
+            deallocate (flowDoms(nn, level, sps)%s, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%mgJFine) ) &
-         deallocate(flowDoms(nn,level,sps)%mgJFine, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%p1)) &
+            deallocate (flowDoms(nn, level, sps)%p1, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%mgKFine) ) &
-         deallocate(flowDoms(nn,level,sps)%mgKFine, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%dw)) &
+            deallocate (flowDoms(nn, level, sps)%dw, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%mgIWeight) ) &
-         deallocate(flowDoms(nn,level,sps)%mgIWeight, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%fw)) &
+            deallocate (flowDoms(nn, level, sps)%fw, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%mgJWeight) ) &
-         deallocate(flowDoms(nn,level,sps)%mgJWeight, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%dwOldRK)) &
+            deallocate (flowDoms(nn, level, sps)%dwOldRK, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%mgKWeight) ) &
-         deallocate(flowDoms(nn,level,sps)%mgKWeight, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%w1)) &
+            deallocate (flowDoms(nn, level, sps)%w1, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%mgICoarse) ) &
-         deallocate(flowDoms(nn,level,sps)%mgICoarse, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%wr)) &
+            deallocate (flowDoms(nn, level, sps)%wr, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%mgJCoarse) ) &
-         deallocate(flowDoms(nn,level,sps)%mgJCoarse, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%mgIFine)) &
+            deallocate (flowDoms(nn, level, sps)%mgIFine, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%mgKCoarse) ) &
-         deallocate(flowDoms(nn,level,sps)%mgKCoarse, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%mgJFine)) &
+            deallocate (flowDoms(nn, level, sps)%mgJFine, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%iCo) ) &
-         deallocate(flowDoms(nn,level,sps)%iCo, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%mgKFine)) &
+            deallocate (flowDoms(nn, level, sps)%mgKFine, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%jCo) ) &
-         deallocate(flowDoms(nn,level,sps)%jCo, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%mgIWeight)) &
+            deallocate (flowDoms(nn, level, sps)%mgIWeight, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%kCo) ) &
-         deallocate(flowDoms(nn,level,sps)%kCo, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%mgJWeight)) &
+            deallocate (flowDoms(nn, level, sps)%mgJWeight, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%wn) ) &
-         deallocate(flowDoms(nn,level,sps)%wn, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%mgKWeight)) &
+            deallocate (flowDoms(nn, level, sps)%mgKWeight, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%pn) ) &
-         deallocate(flowDoms(nn,level,sps)%pn, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%mgICoarse)) &
+            deallocate (flowDoms(nn, level, sps)%mgICoarse, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%dtl) ) &
-         deallocate(flowDoms(nn,level,sps)%dtl, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%mgJCoarse)) &
+            deallocate (flowDoms(nn, level, sps)%mgJCoarse, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%radI) ) &
-         deallocate(flowDoms(nn,level,sps)%radI, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%mgKCoarse)) &
+            deallocate (flowDoms(nn, level, sps)%mgKCoarse, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%radJ) ) &
-         deallocate(flowDoms(nn,level,sps)%radJ, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%iCo)) &
+            deallocate (flowDoms(nn, level, sps)%iCo, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%radK) ) &
-         deallocate(flowDoms(nn,level,sps)%radK, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%jCo)) &
+            deallocate (flowDoms(nn, level, sps)%jCo, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
+        if (associated(flowDoms(nn, level, sps)%kCo)) &
+            deallocate (flowDoms(nn, level, sps)%kCo, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%d2Wall) ) &
-         deallocate(flowDoms(nn,level,sps)%d2Wall, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%wn)) &
+            deallocate (flowDoms(nn, level, sps)%wn, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
+        if (associated(flowDoms(nn, level, sps)%pn)) &
+            deallocate (flowDoms(nn, level, sps)%pn, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bmti1) ) &
-         deallocate(flowDoms(nn,level,sps)%bmti1, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%dtl)) &
+            deallocate (flowDoms(nn, level, sps)%dtl, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bmti2) ) &
-         deallocate(flowDoms(nn,level,sps)%bmti2, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%radI)) &
+            deallocate (flowDoms(nn, level, sps)%radI, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bmtj1) ) &
-         deallocate(flowDoms(nn,level,sps)%bmtj1, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%radJ)) &
+            deallocate (flowDoms(nn, level, sps)%radJ, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bmtj2) ) &
-         deallocate(flowDoms(nn,level,sps)%bmtj2, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%radK)) &
+            deallocate (flowDoms(nn, level, sps)%radK, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bmtk1) ) &
-         deallocate(flowDoms(nn,level,sps)%bmtk1, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%d2Wall)) &
+            deallocate (flowDoms(nn, level, sps)%d2Wall, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bmtk2) ) &
-         deallocate(flowDoms(nn,level,sps)%bmtk2, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%bmti1)) &
+            deallocate (flowDoms(nn, level, sps)%bmti1, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
+        if (associated(flowDoms(nn, level, sps)%bmti2)) &
+            deallocate (flowDoms(nn, level, sps)%bmti2, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bvti1) ) &
-         deallocate(flowDoms(nn,level,sps)%bvti1, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%bmtj1)) &
+            deallocate (flowDoms(nn, level, sps)%bmtj1, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bvti2) ) &
-         deallocate(flowDoms(nn,level,sps)%bvti2, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%bmtj2)) &
+            deallocate (flowDoms(nn, level, sps)%bmtj2, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bvtj1) ) &
-         deallocate(flowDoms(nn,level,sps)%bvtj1, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%bmtk1)) &
+            deallocate (flowDoms(nn, level, sps)%bmtk1, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bvtj2) ) &
-         deallocate(flowDoms(nn,level,sps)%bvtj2, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%bmtk2)) &
+            deallocate (flowDoms(nn, level, sps)%bmtk2, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bvtk1) ) &
-         deallocate(flowDoms(nn,level,sps)%bvtk1, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%bvti1)) &
+            deallocate (flowDoms(nn, level, sps)%bvti1, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
+
+        if (associated(flowDoms(nn, level, sps)%bvti2)) &
+            deallocate (flowDoms(nn, level, sps)%bvti2, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%bvtk2) ) &
-         deallocate(flowDoms(nn,level,sps)%bvtk2, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%bvtj1)) &
+            deallocate (flowDoms(nn, level, sps)%bvtj1, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%globalCell) ) &
-         deallocate(flowDoms(nn,level,sps)%globalCell, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%bvtj2)) &
+            deallocate (flowDoms(nn, level, sps)%bvtj2, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if( associated(flowDoms(nn,level,sps)%globalNode) ) &
-         deallocate(flowDoms(nn,level,sps)%globalNode, stat=ierr)
-    if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%bvtk1)) &
+            deallocate (flowDoms(nn, level, sps)%bvtk1, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-    if (equationMode == unSteady .and. useALE) then
+        if (associated(flowDoms(nn, level, sps)%bvtk2)) &
+            deallocate (flowDoms(nn, level, sps)%bvtk2, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-      ! Added by HDN
-      if( associated(flowDoms(nn,level,sps)%xALE) ) &
-           deallocate(flowDoms(nn,level,sps)%xALE, stat=ierr)
-      if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%globalCell)) &
+            deallocate (flowDoms(nn, level, sps)%globalCell, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-      if( associated(flowDoms(nn,level,sps)%sIALE) ) &
-           deallocate(flowDoms(nn,level,sps)%sIALE, stat=ierr)
-      if(ierr /= 0) deallocationFailure = .true.
+        if (associated(flowDoms(nn, level, sps)%globalNode)) &
+            deallocate (flowDoms(nn, level, sps)%globalNode, stat=ierr)
+        if (ierr /= 0) deallocationFailure = .true.
 
-      if( associated(flowDoms(nn,level,sps)%sJALE) ) &
-           deallocate(flowDoms(nn,level,sps)%sJALE, stat=ierr)
-      if(ierr /= 0) deallocationFailure = .true.
+        if (equationMode == unSteady .and. useALE) then
+
+            ! Added by HDN
+            if (associated(flowDoms(nn, level, sps)%xALE)) &
+                deallocate (flowDoms(nn, level, sps)%xALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
 
-      if( associated(flowDoms(nn,level,sps)%sKALE) ) &
-           deallocate(flowDoms(nn,level,sps)%sKALE, stat=ierr)
-      if(ierr /= 0) deallocationFailure = .true.
+            if (associated(flowDoms(nn, level, sps)%sIALE)) &
+                deallocate (flowDoms(nn, level, sps)%sIALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
 
-      if( associated(flowDoms(nn,level,sps)%sVeloIALE) ) &
-           deallocate(flowDoms(nn,level,sps)%sVeloIALE, stat=ierr)
-      if(ierr /= 0) deallocationFailure = .true.
+            if (associated(flowDoms(nn, level, sps)%sJALE)) &
+                deallocate (flowDoms(nn, level, sps)%sJALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
 
-      if( associated(flowDoms(nn,level,sps)%sVeloJALE) ) &
-           deallocate(flowDoms(nn,level,sps)%sVeloJALE, stat=ierr)
-      if(ierr /= 0) deallocationFailure = .true.
+            if (associated(flowDoms(nn, level, sps)%sKALE)) &
+                deallocate (flowDoms(nn, level, sps)%sKALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
 
-      if( associated(flowDoms(nn,level,sps)%sVeloKALE) ) &
-           deallocate(flowDoms(nn,level,sps)%sVeloKALE, stat=ierr)
-      if(ierr /= 0) deallocationFailure = .true.
+            if (associated(flowDoms(nn, level, sps)%sVeloIALE)) &
+                deallocate (flowDoms(nn, level, sps)%sVeloIALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
 
-      if( associated(flowDoms(nn,level,sps)%sFaceIALE) ) &
-           deallocate(flowDoms(nn,level,sps)%sFaceIALE, stat=ierr)
-      if(ierr /= 0) deallocationFailure = .true.
+            if (associated(flowDoms(nn, level, sps)%sVeloJALE)) &
+                deallocate (flowDoms(nn, level, sps)%sVeloJALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
 
-      if( associated(flowDoms(nn,level,sps)%sFaceJALE) ) &
-           deallocate(flowDoms(nn,level,sps)%sFaceJALE, stat=ierr)
-      if(ierr /= 0) deallocationFailure = .true.
+            if (associated(flowDoms(nn, level, sps)%sVeloKALE)) &
+                deallocate (flowDoms(nn, level, sps)%sVeloKALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
 
-      if( associated(flowDoms(nn,level,sps)%sFaceKALE) ) &
-           deallocate(flowDoms(nn,level,sps)%sFaceKALE, stat=ierr)
-      if(ierr /= 0) deallocationFailure = .true.
+            if (associated(flowDoms(nn, level, sps)%sFaceIALE)) &
+                deallocate (flowDoms(nn, level, sps)%sFaceIALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
 
-      ! if( associated(flowDoms(nn,level,sps)%dwALE) ) &
-      !      deallocate(flowDoms(nn,level,sps)%dwALE, stat=ierr)
-      ! if(ierr /= 0) deallocationFailure = .true.
-      !
-      ! if( associated(flowDoms(nn,level,sps)%fwALE) ) &
-      !      deallocate(flowDoms(nn,level,sps)%fwALE, stat=ierr)
-      ! if(ierr /= 0) deallocationFailure = .true.
-    end if
-
-    ! Check for errors in the deallocation.
+            if (associated(flowDoms(nn, level, sps)%sFaceJALE)) &
+                deallocate (flowDoms(nn, level, sps)%sFaceJALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
 
-    if( deallocationFailure ) &
-         call terminate("deallocateBlock", &
-         "Something went wrong when deallocating memory")
+            if (associated(flowDoms(nn, level, sps)%sFaceKALE)) &
+                deallocate (flowDoms(nn, level, sps)%sFaceKALE, stat=ierr)
+            if (ierr /= 0) deallocationFailure = .true.
 
-    ! Nullify the pointers of this block.
-    call nullifyFlowDomPointers(nn,level,sps)
+            ! if( associated(flowDoms(nn,level,sps)%dwALE) ) &
+            !      deallocate(flowDoms(nn,level,sps)%dwALE, stat=ierr)
+            ! if(ierr /= 0) deallocationFailure = .true.
+            !
+            ! if( associated(flowDoms(nn,level,sps)%fwALE) ) &
+            !      deallocate(flowDoms(nn,level,sps)%fwALE, stat=ierr)
+            ! if(ierr /= 0) deallocationFailure = .true.
+        end if
 
-  end subroutine deallocateBlock
+        ! Check for errors in the deallocation.
+
+        if (deallocationFailure) &
+            call terminate("deallocateBlock", &
+                           "Something went wrong when deallocating memory")
+
+        ! Nullify the pointers of this block.
+        call nullifyFlowDomPointers(nn, level, sps)
+
+    end subroutine deallocateBlock
 
-  integer function setCGNSRealType()
-    !
-    !       setCGNSRealType sets the cgns real type, depending on the
-    !       compiler options. Note that quadrupole precision is not
-    !       supported by CGNS; double precision is used instead for the
-    !       CGNS IO.
-    !
-    use su_cgns, only : RealSingle, RealDouble
-    implicit none
+    integer function setCGNSRealType()
+        !
+        !       setCGNSRealType sets the cgns real type, depending on the
+        !       compiler options. Note that quadrupole precision is not
+        !       supported by CGNS; double precision is used instead for the
+        !       CGNS IO.
+        !
+        use su_cgns, only: RealSingle, RealDouble
+        implicit none
 
 #ifdef USE_NO_CGNS
 
-    call terminate("setCGNSRealType", &
-         "Function should not be called if no cgns support &
-         &is selected.")
+        call terminate("setCGNSRealType", &
+             "Function should not be called if no cgns support &
+             &is selected.")
 
 #else
 
 # ifdef USE_SINGLE_PRECISION
-    setCGNSRealType = RealSingle
+        setCGNSRealType = RealSingle
 # else
-    setCGNSRealType = RealDouble
+        setCGNSRealType = RealDouble
 # endif
 
 #endif
 
-  end function setCGNSRealType
+    end function setCGNSRealType
 
-  subroutine returnFail(routineName, errorMessage)
-    !
-    !       returnFail writes an error message to standard output and
-    !       sets fail flags to be returned to python.
-    !
-    use constants
-    use communication, only : adflow_comm_world, myid
+    subroutine returnFail(routineName, errorMessage)
+        !
+        !       returnFail writes an error message to standard output and
+        !       sets fail flags to be returned to python.
+        !
+        use constants
+        use communication, only: adflow_comm_world, myid
 #ifndef USE_TAPENADE
-    use killSignals, only : fatalFail, fromPython, routinefailed
+        use killSignals, only: fatalFail, fromPython, routinefailed
 #endif
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    character(len=*), intent(in) :: routineName
-    character(len=*), intent(in) :: errorMessage
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        character(len=*), intent(in) :: routineName
+        character(len=*), intent(in) :: errorMessage
 #ifndef USE_TAPENADE
 
-    !
-    !      Local parameter
-    !
-    integer, parameter :: maxCharLine = 55
-    !
-    !      Local variables
-    !
-    integer :: ierr, len, i2
-    logical :: firstTime
+        !
+        !      Local parameter
+        !
+        integer, parameter :: maxCharLine = 55
+        !
+        !      Local variables
+        !
+        integer :: ierr, len, i2
+        logical :: firstTime
 
-    character(len=len_trim(errorMessage)) :: message
-    character(len=8) :: integerString
+        character(len=len_trim(errorMessage)) :: message
+        character(len=8) :: integerString
 
-    ! Copy the errorMessage into message. It is not possible to work
-    ! with errorMessage directly, because it is modified in this
-    ! routine. Sometimes a constant string is passed to this routine
-    ! and some compilers simply fail then.
+        ! Copy the errorMessage into message. It is not possible to work
+        ! with errorMessage directly, because it is modified in this
+        ! routine. Sometimes a constant string is passed to this routine
+        ! and some compilers simply fail then.
 
-    message = errorMessage
+        message = errorMessage
 
-    ! Print a nice error message. In case of a parallel executable
-    ! also the processor id is printed.
+        ! Print a nice error message. In case of a parallel executable
+        ! also the processor id is printed.
 
-    print "(a)", "#"
-    print "(a)", "#--------------------------- !!! Error !!! &
-         &----------------------------"
+        print "(a)", "#"
+        print "(a)", "#--------------------------- !!! Error !!! &
+             &----------------------------"
 
-    write(integerString,"(i8)") myID
-    integerString = adjustl(integerString)
+        write (integerString, "(i8)") myID
+        integerString = adjustl(integerString)
 
-    print "(2a)", "#* returnFail called by processor ", &
-         trim(integerString)
+        print "(2a)", "#* returnFail called by processor ", &
+            trim(integerString)
 
-    ! Write the header of the error message.
+        ! Write the header of the error message.
 
-    print "(2a)", "#* Run-time error in procedure ", &
-         trim(routineName)
+        print "(2a)", "#* Run-time error in procedure ", &
+            trim(routineName)
 
-    ! Loop to write the error message. If the message is too long it
-    ! is split over several lines.
+        ! Loop to write the error message. If the message is too long it
+        ! is split over several lines.
 
-    firstTime = .true.
-    do
-       ! Determine the remaining error message to be written.
-       ! If longer than the maximum number of characters allowed
-       ! on a line, it is attempted to split the message.
+        firstTime = .true.
+        do
+            ! Determine the remaining error message to be written.
+            ! If longer than the maximum number of characters allowed
+            ! on a line, it is attempted to split the message.
 
-       message = adjustl(message)
-       len = len_trim(message)
-       i2  = min(maxCharLine,len)
+            message = adjustl(message)
+            len = len_trim(message)
+            i2 = min(maxCharLine, len)
 
-       if(i2 < len) i2 = index(message(:i2), " ", .true.) - 1
-       if(i2 < 0)   i2 = index(message, " ") - 1
-       if(i2 < 0)   i2 = len
+            if (i2 < len) i2 = index(message(:i2), " ", .true.) - 1
+            if (i2 < 0) i2 = index(message, " ") - 1
+            if (i2 < 0) i2 = len
 
-       ! Write this part of the error message. If it is the first
-       ! line of the message some additional stuff is printed.
+            ! Write this part of the error message. If it is the first
+            ! line of the message some additional stuff is printed.
 
-       if( firstTime ) then
-          print "(2a)", "#* Error message: ", &
-               trim(message(:i2))
-          firstTime = .false.
-       else
-          print "(2a)", "#*                ", &
-               trim(message(:i2))
-       endif
+            if (firstTime) then
+                print "(2a)", "#* Error message: ", &
+                    trim(message(:i2))
+                firstTime = .false.
+            else
+                print "(2a)", "#*                ", &
+                    trim(message(:i2))
+            end if
 
-       ! Exit the loop if the entire message has been written.
+            ! Exit the loop if the entire message has been written.
 
-       if(i2 == len) exit
+            if (i2 == len) exit
 
-       ! Adapt the string for the next part to be written.
+            ! Adapt the string for the next part to be written.
 
-       message = message(i2+1:)
+            message = message(i2 + 1:)
 
-    enddo
+        end do
 
-    ! Write the trailing message.
+        ! Write the trailing message.
 
-    print "(a)", "#*"
-    print "(a)", "#------------------------------------------&
-         &----------------------------"
-    print "(a)", "#"
+        print "(a)", "#*"
+        print "(a)", "#------------------------------------------&
+             &----------------------------"
+        print "(a)", "#"
 
-    ! Call abort and stop the program. This stop should be done in
-    ! abort, but just to be sure.
+        ! Call abort and stop the program. This stop should be done in
+        ! abort, but just to be sure.
 
-    if (fromPython)then
-       routineFailed=.True.
-       fatalFail = .True.
-    else
-       call mpi_abort(ADflow_comm_world, 1, ierr)
-       stop
-    end if
+        if (fromPython) then
+            routineFailed = .True.
+            fatalFail = .True.
+        else
+            call mpi_abort(ADflow_comm_world, 1, ierr)
+            stop
+        end if
 #endif
 
-  end subroutine returnFail
+    end subroutine returnFail
 
+    subroutine EChk(errorcode, file, line)
 
-  subroutine EChk(errorcode, file, line)
-
-    ! Check if ierr that resulted from a petsc or MPI call is in fact an
-    ! error.
-    use constants
-    use communication, only : adflow_comm_world, myid
-    implicit none
+        ! Check if ierr that resulted from a petsc or MPI call is in fact an
+        ! error.
+        use constants
+        use communication, only: adflow_comm_world, myid
+        implicit none
 
-    integer(kind=intType),intent(in) :: errorcode
-    character(len=*),intent(in) :: file
-    integer(kind=intType),intent(in) :: line
-    integer::ierr
-    character(len=maxStringLen) :: errorCodeFormat, errorLineFormat
+        integer(kind=intType), intent(in) :: errorcode
+        character(len=*), intent(in) :: file
+        integer(kind=intType), intent(in) :: line
+        integer::ierr
+        character(len=maxStringLen) :: errorCodeFormat, errorLineFormat
 
-    errorCodeFormat = "(2(A, I2,)"
-    errorLineFormat = "(A, I5, A, A)"
+        errorCodeFormat = "(2(A, I2,)"
+        errorLineFormat = "(A, I5, A, A)"
 
-    if (errorcode == 0) then
-       return ! No error, return immediately
-    else
+        if (errorcode == 0) then
+            return ! No error, return immediately
+        else
 #ifndef USE_TAPENADE
-       print *,'---------------------------------------------------------------------------'
-       print errorCodeFormat, "PETSc or MPI Error. Error Code ",errorcode,". Detected on Proc ",myid
-       print errorLineFormat, "Error at line: ",line," in file: ",file
-       print *,'---------------------------------------------------------------------------'
-       call MPI_Abort(adflow_comm_world,errorcode,ierr)
-       stop ! Just in case
+            print *, '---------------------------------------------------------------------------'
+            print errorCodeFormat, "PETSc or MPI Error. Error Code ", errorcode, ". Detected on Proc ", myid
+            print errorLineFormat, "Error at line: ", line, " in file: ", file
+            print *, '---------------------------------------------------------------------------'
+            call MPI_Abort(adflow_comm_world, errorcode, ierr)
+            stop ! Just in case
 #else
-       stop
+            stop
 #endif
-    end if
-
-  end subroutine EChk
-
-  subroutine convertToLowerCase(string)
-    !
-    !       convertToLowerCase converts the given string to lower case.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    character (len=*), intent(inout) :: string
-    !
-    !      Local variables
-    !
-    integer(kind=intType), parameter :: upperToLower = iachar("a") - iachar("A")
-
-    integer(kind=intType) :: i, lenString
-
-    ! Determine the length of the given string and convert the upper
-    ! case characters to lower case.
-
-    lenString = len_trim(string)
-    do i=1,lenString
-       if("A" <= string(i:i) .and. string(i:i) <= "Z")    &
-            string(i:i) = achar(iachar(string(i:i)) + upperToLower)
-    enddo
-
-  end subroutine convertToLowerCase
-
-  logical function EulerWallsPresent()
-
-    ! eulerWallsPresent determines whether or not inviscid walls are
-    ! present in the whole grid. It first determines if these walls are
-    ! present locally and performs an allReduce afterwards.
+        end if
+
+    end subroutine EChk
+
+    subroutine convertToLowerCase(string)
+        !
+        !       convertToLowerCase converts the given string to lower case.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        character(len=*), intent(inout) :: string
+        !
+        !      Local variables
+        !
+        integer(kind=intType), parameter :: upperToLower = iachar("a") - iachar("A")
+
+        integer(kind=intType) :: i, lenString
+
+        ! Determine the length of the given string and convert the upper
+        ! case characters to lower case.
+
+        lenString = len_trim(string)
+        do i = 1, lenString
+            if ("A" <= string(i:i) .and. string(i:i) <= "Z") &
+                string(i:i) = achar(iachar(string(i:i)) + upperToLower)
+        end do
+
+    end subroutine convertToLowerCase
+
+    logical function EulerWallsPresent()
+
+        ! eulerWallsPresent determines whether or not inviscid walls are
+        ! present in the whole grid. It first determines if these walls are
+        ! present locally and performs an allReduce afterwards.
+
+        use constants
+        use block, only: nDom, flowDoms
+        use communication, only: adflow_comm_world
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, i
+        integer               :: ierr
+        logical               :: localEulerWalls
+
+        ! Initialize localEulerWalls to .false. and loop over the
+        ! boundary subfaces of the blocks to see if Euler walls are
+        ! present on this processor. As the info is the same for all
+        ! spectral solutions, only the 1st needs to be considered.
+
+        localEulerWalls = .false.
+        do nn = 1, nDom
+            do i = 1, flowDoms(nn, 1, 1)%nBocos
+                if (flowDoms(nn, 1, 1)%BCType(i) == EulerWall) &
+                    localEulerWalls = .true.
+            end do
+        end do
+
+        ! Set i to 1 if Euler walls are present locally and to 0
+        ! otherwise. Determine the maximum over all processors
+        ! and set EulerWallsPresent accordingly.
+
+        i = 0
+        if (localEulerWalls) i = 1
+        call mpi_allreduce(i, nn, 1, adflow_integer, mpi_max, &
+                           ADflow_comm_world, ierr)
+
+        if (nn == 0) then
+            EulerWallsPresent = .false.
+        else
+            EulerWallsPresent = .true.
+        end if
+
+    end function EulerWallsPresent
+    subroutine allocConvArrays(nIterTot)
+        !
+        !       allocConvArrays allocates the memory for the convergence
+        !       arrays. The number of iterations allocated, nIterTot, is
+        !       enough to store the maximum number of iterations specified
+        !       plus possible earlier iterations read from the restart file.
+        !       This routine MAY be called with data already inside of
+        !       convArray and this will be saved.
+        !
+        use constants
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputIO, only: storeConvInnerIter
+        use monitor, only: convArray, nMon, solverDataArray, solverTypeArray, showCPU
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType) :: nIterTot
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType):: nSolverMon ! number of solver monitor variables
+
+        ! Return immediately if the convergence history (of the inner
+        ! iterations) does not need to be stored. This logical can
+        ! only be .false. for an unsteady computation.
+        if (.not. storeConvInnerIter) return
+
+        if (allocated(convArray)) then
+            deallocate (convArray)
+        end if
+        if (allocated(solverDataArray)) then
+            deallocate (solverDataArray)
+        end if
+        if (allocated(solverTypeArray)) then
+            deallocate (solverTypeArray)
+        end if
+
+        if (showCPU) then
+            nSolverMon = 5
+        else
+            nSolverMon = 4
+        end if
+
+        allocate (convArray(0:nIterTot, nTimeIntervalsSpectral, nMon))
+        allocate (solverDataArray(0:nIterTot, nTimeIntervalsSpectral, nSolverMon))
+        allocate (solverTypeArray(0:nIterTot, nTimeIntervalsSpectral))
+
+        ! Zero Array:
+        convArray = zero
+        solverDataArray = zero
+
+    end subroutine allocConvArrays
+
+    subroutine allocTimeArrays(nTimeTot)
+        !
+        !       allocTimeArrays allocates the memory for the arrays to store
+        !       the time history of the unsteady computation. The number of
+        !       time steps specified is enought to store the total number of
+        !       time steps of the current computation plus possible earlier
+        !       computations.
+        !
+        use constants
+        use monitor, only: timeArray, timeDataArray, nMon
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType) :: nTimeTot
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        ! Allocate the memory for both the time array as well as the
+        ! data array.
+
+        if (allocated(timeArray)) then
+            deallocate (timeArray)
+        end if
+        if (allocated(timeDataArray)) then
+            deallocate (timeDataArray)
+        end if
+
+        allocate (timeArray(nTimeTot), &
+                  timeDataArray(nTimeTot, nMon), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("allocTimeArrays", &
+             "Memory allocation failure for timeArray &
+             &and timeDataArray")
+
+    end subroutine allocTimeArrays
+
+    subroutine getMonitorVariableNames(nvar, monitor_variables)
+        !
+        !  copy the names in monnames to another array so that is can be
+        !  passed back up the python level
+        !
+        use constants
+        use monitor, only: nmon, monnames
+        implicit none
+
+        ! save the monitor variable names into a new array
+        integer(kind=intType), intent(in):: nvar
+        character, dimension(nvar, maxCGNSNameLen), intent(out):: monitor_variables
+
+        ! working variables
+        character(len=maxCGNSNameLen) :: var_name
+        integer(kind=intType) :: c, idx_mon
+
+        do idx_mon = 1, nvar
+            var_name = monNames(idx_mon)
+
+            do c = 1, len(monNames(idx_mon))
+                monitor_variables(idx_mon, c) = var_name(c:c)
+            end do
+        end do
+
+    end subroutine getMonitorVariableNames
+
+    subroutine getSolverTypeArray(niter, nsps, type_array)
+        !
+        !  copy the names in sovlerTypeArray to another array so that is can be
+        !  passed back up the python level
+        !
+        use constants
+        use monitor, only: solverTypeArray
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: itertot
+        implicit none
+
+        ! save the monitor variable names into a new array
+        integer(kind=intType), intent(in):: niter, nsps
+        character, dimension(0:niter, ntimeintervalsspectral, maxIterTypelen), intent(out):: type_array
+
+        ! working variables
+        character(len=maxIterTypelen) :: type_name
+        integer(kind=intType) :: c, idx_sps, idx_iter
+
+        do idx_sps = 1, ntimeintervalsspectral
+            do idx_iter = 0, itertot
+                type_name = solverTypeArray(idx_iter, idx_sps)
+
+                do c = 1, len(solverTypeArray(idx_iter, idx_sps))
+                    type_array(idx_iter, idx_sps, c) = type_name(c:c)
+                end do
 
-    use constants
-    use block, only : nDom, flowDoms
-    use communication, only : adflow_comm_world
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, i
-    integer               :: ierr
-    logical               :: localEulerWalls
-
-    ! Initialize localEulerWalls to .false. and loop over the
-    ! boundary subfaces of the blocks to see if Euler walls are
-    ! present on this processor. As the info is the same for all
-    ! spectral solutions, only the 1st needs to be considered.
-
-    localEulerWalls = .false.
-    do nn=1,nDom
-       do i=1,flowDoms(nn,1,1)%nBocos
-          if(flowDoms(nn,1,1)%BCType(i) == EulerWall) &
-               localEulerWalls = .true.
-       enddo
-    enddo
-
-    ! Set i to 1 if Euler walls are present locally and to 0
-    ! otherwise. Determine the maximum over all processors
-    ! and set EulerWallsPresent accordingly.
-
-    i = 0
-    if( localEulerWalls ) i = 1
-    call mpi_allreduce(i, nn, 1, adflow_integer, mpi_max, &
-         ADflow_comm_world, ierr)
-
-    if(nn == 0) then
-       EulerWallsPresent = .false.
-    else
-       EulerWallsPresent = .true.
-    endif
-
-  end function EulerWallsPresent
-  subroutine allocConvArrays(nIterTot)
-    !
-    !       allocConvArrays allocates the memory for the convergence
-    !       arrays. The number of iterations allocated, nIterTot, is
-    !       enough to store the maximum number of iterations specified
-    !       plus possible earlier iterations read from the restart file.
-    !       This routine MAY be called with data already inside of
-    !       convArray and this will be saved.
-    !
-    use constants
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputIO, only : storeConvInnerIter
-    use monitor, only : convArray, nMon, solverDataArray, solverTypeArray, showCPU
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType) :: nIterTot
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType):: nSolverMon ! number of solver monitor variables
-
-    ! Return immediately if the convergence history (of the inner
-    ! iterations) does not need to be stored. This logical can
-    ! only be .false. for an unsteady computation.
-    if(.not. storeConvInnerIter) return
-
-    if (allocated(convArray)) then
-       deallocate(convArray)
-    end if
-    if (allocated(solverDataArray)) then
-       deallocate(solverDataArray)
-    end if
-    if (allocated(solverTypeArray)) then
-       deallocate(solverTypeArray)
-    end if
-
-    if (showCPU) then
-      nSolverMon = 5
-    else
-      nSolverMon = 4
-    end if
-
-    allocate(convArray(0:nIterTot, nTimeIntervalsSpectral, nMon))
-    allocate(solverDataArray(0:nIterTot, nTimeIntervalsSpectral, nSolverMon))
-    allocate(solverTypeArray(0:nIterTot, nTimeIntervalsSpectral))
-
-    ! Zero Array:
-    convArray = zero
-    solverDataArray = zero
-
-
-
-  end subroutine allocConvArrays
-
-  subroutine allocTimeArrays(nTimeTot)
-    !
-    !       allocTimeArrays allocates the memory for the arrays to store
-    !       the time history of the unsteady computation. The number of
-    !       time steps specified is enought to store the total number of
-    !       time steps of the current computation plus possible earlier
-    !       computations.
-    !
-    use constants
-    use monitor, only : timeArray, timeDataArray, nMon
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType) :: nTimeTot
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    ! Allocate the memory for both the time array as well as the
-    ! data array.
-
-    if (allocated(timeArray)) then
-       deallocate(timeArray)
-    end if
-    if (allocated(timeDataArray)) then
-       deallocate(timeDataArray)
-    end if
-
-    allocate(timeArray(nTimeTot), &
-         timeDataArray(nTimeTot,nMon), stat=ierr)
-    if(ierr /= 0)                       &
-         call terminate("allocTimeArrays", &
-         "Memory allocation failure for timeArray &
-         &and timeDataArray")
-
-  end subroutine allocTimeArrays
-
-  subroutine getMonitorVariableNames(nvar, monitor_variables)
-   !
-   !  copy the names in monnames to another array so that is can be
-   !  passed back up the python level
-   !
-   use constants
-   use monitor, only: nmon, monnames
-   implicit none
-
-   ! save the monitor variable names into a new array
-   integer(kind=intType), intent(in):: nvar
-   character, dimension(nvar,maxCGNSNameLen), intent(out):: monitor_variables
-
-   ! working variables
-   character(len=maxCGNSNameLen) :: var_name
-   integer(kind=intType) :: c, idx_mon
-
-   do idx_mon=1,nvar
-      var_name =  monNames(idx_mon)
-
-      do c =1,len(monNames(idx_mon))
-         monitor_variables(idx_mon, c) =var_name(c:c)
-      end do
-   end do
-
-
-  end subroutine getMonitorVariableNames
-
-  subroutine getSolverTypeArray(niter, nsps, type_array)
-   !
-   !  copy the names in sovlerTypeArray to another array so that is can be
-   !  passed back up the python level
-   !
-   use constants
-   use monitor, only:  solverTypeArray
-   use inputTimeSpectral, only : nTimeIntervalsSpectral
-   use iteration, only: itertot
-   implicit none
-
-   ! save the monitor variable names into a new array
-   integer(kind=intType), intent(in):: niter, nsps
-   character, dimension(0:niter, ntimeintervalsspectral, maxIterTypelen), intent(out):: type_array
-
-   ! working variables
-   character(len=maxIterTypelen) :: type_name
-   integer(kind=intType) :: c, idx_sps, idx_iter
-
-   do idx_sps=1,ntimeintervalsspectral
-      do idx_iter=0,itertot
-         type_name =  solverTypeArray(idx_iter, idx_sps)
-
-         do c =1,len(solverTypeArray(idx_iter, idx_sps))
-            type_array(idx_iter, idx_sps, c) = type_name(c:c)
-         end do
-
-      end do
-   end do
-
-   end subroutine getSolverTypeArray
-
-  subroutine convergenceHeader
-    !
-    !       convergenceHeader writes the convergence header to stdout.
-    !
-    use cgnsNames
-    use inputPhysics
-    use inputUnsteady
-    use flowVarRefState
-    use monitor
-    use iteration
-    use inputIteration
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, nCharWrite
-    logical :: writeIterations
-
-    ! Determine whether or not the iterations must be written.
-
-    if (printIterations) then
-       writeIterations = .true.
-       if(equationMode          == unsteady .and. &
-            timeIntegrationScheme == explicitRK) writeIterations = .false.
-
-       ! Determine the number of characters to write.
-       ! First initialize this number with the variables which are
-       ! always written. This depends on the equation mode. For unsteady
-       ! and spectral computations a bit more info is written.
-
-       nCharWrite = 10
-       if( writeIterations ) nCharWrite = nCharWrite + 7 + 7 + 9 + 7 + 7 + 10
-       if(equationMode == unsteady) then
-          nCharWrite = nCharWrite + 7 + fieldWidth + 1
-       else if(equationMode == timeSpectral) then
-          nCharWrite = nCharWrite + 11
-       endif
-
-       ! Add the number of characters needed for the actual variables.
+            end do
+        end do
+
+    end subroutine getSolverTypeArray
+
+    subroutine convergenceHeader
+        !
+        !       convergenceHeader writes the convergence header to stdout.
+        !
+        use cgnsNames
+        use inputPhysics
+        use inputUnsteady
+        use flowVarRefState
+        use monitor
+        use iteration
+        use inputIteration
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, nCharWrite
+        logical :: writeIterations
+
+        ! Determine whether or not the iterations must be written.
+
+        if (printIterations) then
+            writeIterations = .true.
+            if (equationMode == unsteady .and. &
+                timeIntegrationScheme == explicitRK) writeIterations = .false.
+
+            ! Determine the number of characters to write.
+            ! First initialize this number with the variables which are
+            ! always written. This depends on the equation mode. For unsteady
+            ! and spectral computations a bit more info is written.
+
+            nCharWrite = 10
+            if (writeIterations) nCharWrite = nCharWrite + 7 + 7 + 9 + 7 + 7 + 10
+            if (equationMode == unsteady) then
+                nCharWrite = nCharWrite + 7 + fieldWidth + 1
+            else if (equationMode == timeSpectral) then
+                nCharWrite = nCharWrite + 11
+            end if
+
+            ! Add the number of characters needed for the actual variables.
 
 #ifndef USE_COMPLEX
-       ! for the real version this is easy
-       nCharWrite = nCharWrite + nMon*(fieldWidthLarge+1)
+            ! for the real version this is easy
+            nCharWrite = nCharWrite + nMon*(fieldWidthLarge + 1)
 #else
-      ! for complex we need to differentiate between residuals and functionals
-      do i=1, nMon
-         select case (monNames(i))
-
-         case (cgnsL2resRho,  cgnsL2resMomx,    &
-               cgnsL2resMomy, cgnsL2resMomz,    &
-               cgnsL2resRhoe, cgnsL2resNu,      &
-               cgnsL2resK,    cgnsL2resOmega,   &
-               cgnsL2resTau,  cgnsL2resEpsilon, &
-               cgnsL2resV2,   cgnsL2resF, 'totalR')
-
-            ! complex residuals need 9 more characters
-            nCharWrite = nCharWrite + fieldWidthLarge+1 + 9
-
-         case default
-            ! complex functionals need 25 more characters
-            nCharWrite = nCharWrite + fieldWidthLarge+1 + 25
-
-         end select
-      end do
+            ! for complex we need to differentiate between residuals and functionals
+            do i = 1, nMon
+                select case (monNames(i))
+
+                case (cgnsL2resRho, cgnsL2resMomx, &
+                      cgnsL2resMomy, cgnsL2resMomz, &
+                      cgnsL2resRhoe, cgnsL2resNu, &
+                      cgnsL2resK, cgnsL2resOmega, &
+                      cgnsL2resTau, cgnsL2resEpsilon, &
+                      cgnsL2resV2, cgnsL2resF, 'totalR')
+
+                    ! complex residuals need 9 more characters
+                    nCharWrite = nCharWrite + fieldWidthLarge + 1 + 9
+
+                case default
+                    ! complex functionals need 25 more characters
+                    nCharWrite = nCharWrite + fieldWidthLarge + 1 + 25
+
+                end select
+            end do
 #endif
 
-       if( showCPU ) nCharWrite = nCharWrite + fieldWidth + 1
+            if (showCPU) nCharWrite = nCharWrite + fieldWidth + 1
 
-       ! Write the line of - signs. This line starts with a #, such
-       ! that it is ignored by some plotting software.
+            ! Write the line of - signs. This line starts with a #, such
+            ! that it is ignored by some plotting software.
 
-       write(*,"(a)",advance="no") "#"
-       do i=2,nCharWrite
-          write(*,"(a)",advance="no") "-"
-       enddo
-       print "(1x)"
+            write (*, "(a)", advance="no") "#"
+            do i = 2, nCharWrite
+                write (*, "(a)", advance="no") "-"
+            end do
+            print "(1x)"
 
-       ! Write the first line of the header. First the variables that
-       ! will always be written. Some extra variables must be written
-       ! for unsteady and time spectral problems.
-       write(*,'("# ")',advance="no")
-       write(*,"(a)",advance="no") " Grid  |"
+            ! Write the first line of the header. First the variables that
+            ! will always be written. Some extra variables must be written
+            ! for unsteady and time spectral problems.
+            write (*, '("# ")', advance="no")
+            write (*, "(a)", advance="no") " Grid  |"
 
-       if(equationMode == unsteady) then
-          write(*,"(a)",advance="no") " Time |    Time    |"
-       else if(equationMode == timeSpectral) then
-          write(*,"(a)",advance="no") " Spectral |"
-       endif
+            if (equationMode == unsteady) then
+                write (*, "(a)", advance="no") " Time |    Time    |"
+            else if (equationMode == timeSpectral) then
+                write (*, "(a)", advance="no") " Spectral |"
+            end if
 
-       if( writeIterations ) write(*,"(a)",advance="no") " Iter | Iter |  Iter  |   CFL   | Step | Lin  |"
-       if( showCPU )         write(*,"(a)",advance="no") "    Wall    |"
+            if (writeIterations) write (*, "(a)", advance="no") " Iter | Iter |  Iter  |   CFL   | Step | Lin  |"
+            if (showCPU) write (*, "(a)", advance="no") "    Wall    |"
 
-       ! Write the header for the variables to be monitored.
-       do i=1, nMon
-          ! Determine the variable name and write the
-          ! corresponding text.
+            ! Write the header for the variables to be monitored.
+            do i = 1, nMon
+                ! Determine the variable name and write the
+                ! corresponding text.
 
-         ! we do the real and complex versions separately
+                ! we do the real and complex versions separately
 #ifndef USE_COMPLEX
-         ! real versions print the full 16 digits so these spacings are "regular" and the same for all
-          select case (monNames(i))
+                ! real versions print the full 16 digits so these spacings are "regular" and the same for all
+                select case (monNames(i))
 
-          case ("totalR")
-             write(*,"(a)",advance="no") "        totalRes        |"
+                case ("totalR")
+                    write (*, "(a)", advance="no") "        totalRes        |"
 
-          case (cgnsL2resRho)
-             write(*,"(a)",advance="no") "        Res rho         |"
+                case (cgnsL2resRho)
+                    write (*, "(a)", advance="no") "        Res rho         |"
 
-          case (cgnsL2resMomx)
-             write(*,"(a)",advance="no") "        Res rhou        |"
+                case (cgnsL2resMomx)
+                    write (*, "(a)", advance="no") "        Res rhou        |"
 
-          case (cgnsL2resMomy)
-             write(*,"(a)",advance="no") "        Res rhov        |"
+                case (cgnsL2resMomy)
+                    write (*, "(a)", advance="no") "        Res rhov        |"
 
-          case (cgnsL2resMomz)
-             write(*,"(a)",advance="no") "        Res rhow        |"
+                case (cgnsL2resMomz)
+                    write (*, "(a)", advance="no") "        Res rhow        |"
 
-          case (cgnsL2resRhoe)
-             write(*,"(a)",advance="no") "        Res rhoE        |"
+                case (cgnsL2resRhoe)
+                    write (*, "(a)", advance="no") "        Res rhoE        |"
 
-          case (cgnsL2resNu)
-             write(*,"(a)",advance="no") "       Res nuturb       |"
+                case (cgnsL2resNu)
+                    write (*, "(a)", advance="no") "       Res nuturb       |"
 
-          case (cgnsL2resK)
-             write(*,"(a)",advance="no") "       Res kturb        |"
+                case (cgnsL2resK)
+                    write (*, "(a)", advance="no") "       Res kturb        |"
 
-          case (cgnsL2resOmega)
-             write(*,"(a)",advance="no") "       Res wturb        |"
+                case (cgnsL2resOmega)
+                    write (*, "(a)", advance="no") "       Res wturb        |"
 
-          case (cgnsL2resTau)
-             write(*,"(a)",advance="no") "       Res tauturb      |"
+                case (cgnsL2resTau)
+                    write (*, "(a)", advance="no") "       Res tauturb      |"
 
-          case (cgnsL2resEpsilon)
-             write(*,"(a)",advance="no") "       Res epsturb      |"
+                case (cgnsL2resEpsilon)
+                    write (*, "(a)", advance="no") "       Res epsturb      |"
 
-          case (cgnsL2resV2)
-             write(*,"(a)",advance="no") "       Res v2turb       |"
+                case (cgnsL2resV2)
+                    write (*, "(a)", advance="no") "       Res v2turb       |"
 
-          case (cgnsL2resF)
-             write(*,"(a)",advance="no") "       Res fturb        |"
+                case (cgnsL2resF)
+                    write (*, "(a)", advance="no") "       Res fturb        |"
 
-          case (cgnsCl)
-             write(*,"(a)",advance="no") "         C_lift         |"
+                case (cgnsCl)
+                    write (*, "(a)", advance="no") "         C_lift         |"
 
-          case (cgnsClp)
-             write(*,"(a)",advance="no") "        C_lift_p        |"
+                case (cgnsClp)
+                    write (*, "(a)", advance="no") "        C_lift_p        |"
 
-          case (cgnsClv)
-             write(*,"(a)",advance="no") "        C_lift_v        |"
+                case (cgnsClv)
+                    write (*, "(a)", advance="no") "        C_lift_v        |"
 
-          case (cgnsCd)
-             write(*,"(a)",advance="no") "        C_drag          |"
+                case (cgnsCd)
+                    write (*, "(a)", advance="no") "        C_drag          |"
 
-          case (cgnsCdp)
-             write(*,"(a)",advance="no") "        C_drag_p        |"
+                case (cgnsCdp)
+                    write (*, "(a)", advance="no") "        C_drag_p        |"
 
-          case (cgnsCdv)
-             write(*,"(a)",advance="no") "        C_drag_v        |"
+                case (cgnsCdv)
+                    write (*, "(a)", advance="no") "        C_drag_v        |"
 
-          case (cgnsCfx)
-             write(*,"(a)",advance="no") "          C_Fx          |"
+                case (cgnsCfx)
+                    write (*, "(a)", advance="no") "          C_Fx          |"
 
-          case (cgnsCfy)
-             write(*,"(a)",advance="no") "          C_Fy          |"
+                case (cgnsCfy)
+                    write (*, "(a)", advance="no") "          C_Fy          |"
 
-          case (cgnsCfz)
-             write(*,"(a)",advance="no") "          C_Fz          |"
+                case (cgnsCfz)
+                    write (*, "(a)", advance="no") "          C_Fz          |"
 
-          case (cgnsCmx)
-             write(*,"(a)",advance="no") "          C_Mx          |"
+                case (cgnsCmx)
+                    write (*, "(a)", advance="no") "          C_Mx          |"
 
-          case (cgnsCmy)
-             write(*,"(a)",advance="no") "          C_My          |"
+                case (cgnsCmy)
+                    write (*, "(a)", advance="no") "          C_My          |"
 
-          case (cgnsCmz)
-             write(*,"(a)",advance="no") "          C_Mz          |"
+                case (cgnsCmz)
+                    write (*, "(a)", advance="no") "          C_Mz          |"
 
-          case (cgnsHdiffMax)
-             write(*,"(a)",advance="no") "       |H-H_inf|        |"
+                case (cgnsHdiffMax)
+                    write (*, "(a)", advance="no") "       |H-H_inf|        |"
 
-          case (cgnsMachMax)
-             write(*,"(a)",advance="no") "        Mach_max        |"
+                case (cgnsMachMax)
+                    write (*, "(a)", advance="no") "        Mach_max        |"
 
-          case (cgnsYplusMax)
-             write(*,"(a)",advance="no") "         Y+_max         |"
+                case (cgnsYplusMax)
+                    write (*, "(a)", advance="no") "         Y+_max         |"
 
-          case (cgnsEddyMax)
-             write(*,"(a)",advance="no") "        Eddyv_max       |"
+                case (cgnsEddyMax)
+                    write (*, "(a)", advance="no") "        Eddyv_max       |"
 
-          case (cgnsSepSensor)
-             write(*,"(a)",advance="no") "        SepSensor       |"
+                case (cgnsSepSensor)
+                    write (*, "(a)", advance="no") "        SepSensor       |"
 
-          case (cgnsCavitation)
-             write(*,"(a)",advance="no") "       Cavitation       |"
+                case (cgnsCavitation)
+                    write (*, "(a)", advance="no") "       Cavitation       |"
 
-          case (cgnsAxisMoment)
-             write(*,"(a)",advance="no") "       AxisMoment       |"
+                case (cgnsAxisMoment)
+                    write (*, "(a)", advance="no") "       AxisMoment       |"
 
-          end select
+                end select
 #else
-         ! complex versions print the full 16 digits for real and complex for "functionals"
-         ! but shorter versions only for residuals
-         select case (monNames(i))
+                ! complex versions print the full 16 digits for real and complex for "functionals"
+                ! but shorter versions only for residuals
+                select case (monNames(i))
 
-         case ("totalR")
-            write(*,"(a)",advance="no") "            totalRes             |"
+                case ("totalR")
+                    write (*, "(a)", advance="no") "            totalRes             |"
 
-         case (cgnsL2resRho)
-            write(*,"(a)",advance="no") "            Res rho              |"
+                case (cgnsL2resRho)
+                    write (*, "(a)", advance="no") "            Res rho              |"
 
-         case (cgnsL2resMomx)
-            write(*,"(a)",advance="no") "            Res rhou             |"
+                case (cgnsL2resMomx)
+                    write (*, "(a)", advance="no") "            Res rhou             |"
 
-         case (cgnsL2resMomy)
-            write(*,"(a)",advance="no") "            Res rhov             |"
+                case (cgnsL2resMomy)
+                    write (*, "(a)", advance="no") "            Res rhov             |"
 
-         case (cgnsL2resMomz)
-            write(*,"(a)",advance="no") "            Res rhow             |"
+                case (cgnsL2resMomz)
+                    write (*, "(a)", advance="no") "            Res rhow             |"
 
-         case (cgnsL2resRhoe)
-            write(*,"(a)",advance="no") "            Res rhoE             |"
+                case (cgnsL2resRhoe)
+                    write (*, "(a)", advance="no") "            Res rhoE             |"
 
-         case (cgnsL2resNu)
-            write(*,"(a)",advance="no") "           Res nuturb            |"
+                case (cgnsL2resNu)
+                    write (*, "(a)", advance="no") "           Res nuturb            |"
 
-         case (cgnsL2resK)
-            write(*,"(a)",advance="no") "           Res kturb             |"
+                case (cgnsL2resK)
+                    write (*, "(a)", advance="no") "           Res kturb             |"
 
-         case (cgnsL2resOmega)
-            write(*,"(a)",advance="no") "           Res wturb             |"
+                case (cgnsL2resOmega)
+                    write (*, "(a)", advance="no") "           Res wturb             |"
 
-         case (cgnsL2resTau)
-            write(*,"(a)",advance="no") "           Res tauturb           |"
+                case (cgnsL2resTau)
+                    write (*, "(a)", advance="no") "           Res tauturb           |"
 
-         case (cgnsL2resEpsilon)
-            write(*,"(a)",advance="no") "           Res epsturb           |"
+                case (cgnsL2resEpsilon)
+                    write (*, "(a)", advance="no") "           Res epsturb           |"
 
-         case (cgnsL2resV2)
-            write(*,"(a)",advance="no") "           Res v2turb            |"
+                case (cgnsL2resV2)
+                    write (*, "(a)", advance="no") "           Res v2turb            |"
 
-         case (cgnsL2resF)
-            write(*,"(a)",advance="no") "           Res fturb             |"
+                case (cgnsL2resF)
+                    write (*, "(a)", advance="no") "           Res fturb             |"
 
-         case (cgnsCl)
-            write(*,"(a)",advance="no") "                     C_lift                      |"
+                case (cgnsCl)
+                    write (*, "(a)", advance="no") "                     C_lift                      |"
 
-         case (cgnsClp)
-            write(*,"(a)",advance="no") "                    C_lift_p                     |"
+                case (cgnsClp)
+                    write (*, "(a)", advance="no") "                    C_lift_p                     |"
 
-         case (cgnsClv)
-            write(*,"(a)",advance="no") "                    C_lift_v                     |"
+                case (cgnsClv)
+                    write (*, "(a)", advance="no") "                    C_lift_v                     |"
 
-         case (cgnsCd)
-            write(*,"(a)",advance="no") "                    C_drag                       |"
+                case (cgnsCd)
+                    write (*, "(a)", advance="no") "                    C_drag                       |"
 
-         case (cgnsCdp)
-            write(*,"(a)",advance="no") "                    C_drag_p                     |"
+                case (cgnsCdp)
+                    write (*, "(a)", advance="no") "                    C_drag_p                     |"
 
-         case (cgnsCdv)
-            write(*,"(a)",advance="no") "                    C_drag_v                     |"
+                case (cgnsCdv)
+                    write (*, "(a)", advance="no") "                    C_drag_v                     |"
 
-         case (cgnsCfx)
-            write(*,"(a)",advance="no") "                      C_Fx                       |"
+                case (cgnsCfx)
+                    write (*, "(a)", advance="no") "                      C_Fx                       |"
 
-         case (cgnsCfy)
-            write(*,"(a)",advance="no") "                      C_Fy                       |"
+                case (cgnsCfy)
+                    write (*, "(a)", advance="no") "                      C_Fy                       |"
 
-         case (cgnsCfz)
-            write(*,"(a)",advance="no") "                      C_Fz                       |"
+                case (cgnsCfz)
+                    write (*, "(a)", advance="no") "                      C_Fz                       |"
 
-         case (cgnsCmx)
-            write(*,"(a)",advance="no") "                      C_Mx                       |"
+                case (cgnsCmx)
+                    write (*, "(a)", advance="no") "                      C_Mx                       |"
 
-         case (cgnsCmy)
-            write(*,"(a)",advance="no") "                      C_My                       |"
+                case (cgnsCmy)
+                    write (*, "(a)", advance="no") "                      C_My                       |"
 
-         case (cgnsCmz)
-            write(*,"(a)",advance="no") "                      C_Mz                       |"
+                case (cgnsCmz)
+                    write (*, "(a)", advance="no") "                      C_Mz                       |"
 
-         case (cgnsHdiffMax)
-            write(*,"(a)",advance="no") "                   |H-H_inf|                     |"
+                case (cgnsHdiffMax)
+                    write (*, "(a)", advance="no") "                   |H-H_inf|                     |"
 
-         case (cgnsMachMax)
-            write(*,"(a)",advance="no") "                    Mach_max                     |"
+                case (cgnsMachMax)
+                    write (*, "(a)", advance="no") "                    Mach_max                     |"
 
-         case (cgnsYplusMax)
-            write(*,"(a)",advance="no") "                     Y+_max                      |"
+                case (cgnsYplusMax)
+                    write (*, "(a)", advance="no") "                     Y+_max                      |"
 
-         case (cgnsEddyMax)
-            write(*,"(a)",advance="no") "                    Eddyv_max                    |"
+                case (cgnsEddyMax)
+                    write (*, "(a)", advance="no") "                    Eddyv_max                    |"
 
-         case (cgnsSepSensor)
-            write(*,"(a)",advance="no") "                    SepSensor                    |"
+                case (cgnsSepSensor)
+                    write (*, "(a)", advance="no") "                    SepSensor                    |"
 
-         case (cgnsCavitation)
-            write(*,"(a)",advance="no") "                   Cavitation                    |"
+                case (cgnsCavitation)
+                    write (*, "(a)", advance="no") "                   Cavitation                    |"
 
-         case (cgnsAxisMoment)
-            write(*,"(a)",advance="no") "                   AxisMoment                    |"
+                case (cgnsAxisMoment)
+                    write (*, "(a)", advance="no") "                   AxisMoment                    |"
 
-         end select
+                end select
 #endif
 
-       enddo
-
-       print "(1x)"
+            end do
 
-       ! Write the second line of the header. Most of them are empty,
-       ! but some variables require a second line.
-       write(*,'("# ")',advance="no")
+            print "(1x)"
 
-       write(*,"(a)",advance="no")   " level |"
+            ! Write the second line of the header. Most of them are empty,
+            ! but some variables require a second line.
+            write (*, '("# ")', advance="no")
 
-       if(equationMode == unsteady) then
-          write(*,"(a)",advance="no") " Step |            |"
-       else if(equationMode == timeSpectral) then
-          write(*,"(a)",advance="no") " Solution |"
-       endif
+            write (*, "(a)", advance="no") " level |"
 
+            if (equationMode == unsteady) then
+                write (*, "(a)", advance="no") " Step |            |"
+            else if (equationMode == timeSpectral) then
+                write (*, "(a)", advance="no") " Solution |"
+            end if
 
-       if( writeIterations ) write(*,"(a)",advance="no") "      | Tot  |  Type  |         |      | Res  |"
-       if( showCPU )         write(*,"(a)",advance="no") " Clock (s)  |"
+            if (writeIterations) write (*, "(a)", advance="no") "      | Tot  |  Type  |         |      | Res  |"
+            if (showCPU) write (*, "(a)", advance="no") " Clock (s)  |"
 
-       ! Loop over the variables to be monitored and write the
-       ! second line.
+            ! Loop over the variables to be monitored and write the
+            ! second line.
 
-       do i=1,nMon
+            do i = 1, nMon
 
-          ! Determine the variable name and write the
-          ! corresponding text.
+                ! Determine the variable name and write the
+                ! corresponding text.
 #ifndef USE_COMPLEX
-          ! real mode gets the same width for all variables
-          select case (monNames(i))
+                ! real mode gets the same width for all variables
+                select case (monNames(i))
 
-          case (cgnsHdiffMax)
-             write(*,"(a)",advance="no") "           max          |"
+                case (cgnsHdiffMax)
+                    write (*, "(a)", advance="no") "           max          |"
 
-          case default
-             write(*,"(a)",advance="no") "                        |"
+                case default
+                    write (*, "(a)", advance="no") "                        |"
 
-          end select
+                end select
 #else
-         ! complex mode gets shorter spacing for residuals
-         select case (monNames(i))
-
-         case (cgnsHdiffMax)
-            write(*,"(a)",advance="no") "                       max                       |"
-
-         case (cgnsL2resRho,  cgnsL2resMomx,    &
-               cgnsL2resMomy, cgnsL2resMomz,    &
-               cgnsL2resRhoe, cgnsL2resNu,      &
-               cgnsL2resK,    cgnsL2resOmega,   &
-               cgnsL2resTau,  cgnsL2resEpsilon, &
-               cgnsL2resV2,   cgnsL2resF, 'totalR')
-            ! residuals get a shorter line
-            write(*,"(a)",advance="no") "                                 |"
-         case default
-            ! regular functionals get the long empty line
-            write(*,"(a)",advance="no") "                                                 |"
-
-         end select
+                ! complex mode gets shorter spacing for residuals
+                select case (monNames(i))
+
+                case (cgnsHdiffMax)
+                    write (*, "(a)", advance="no") "                       max                       |"
+
+                case (cgnsL2resRho, cgnsL2resMomx, &
+                      cgnsL2resMomy, cgnsL2resMomz, &
+                      cgnsL2resRhoe, cgnsL2resNu, &
+                      cgnsL2resK, cgnsL2resOmega, &
+                      cgnsL2resTau, cgnsL2resEpsilon, &
+                      cgnsL2resV2, cgnsL2resF, 'totalR')
+                    ! residuals get a shorter line
+                    write (*, "(a)", advance="no") "                                 |"
+                case default
+                    ! regular functionals get the long empty line
+                    write (*, "(a)", advance="no") "                                                 |"
+
+                end select
 #endif
 
-       end do
-       print "(1x)"
-    end if
-
-    ! Write again a line of - signs (starting with a #).
-
-    write(*,"(a)",advance="no") "#"
-    do i=2,nCharWrite
-       write(*,"(a)",advance="no") "-"
-    enddo
-    print "(1x)"
-
-  end subroutine convergenceHeader
-  subroutine sumResiduals(nn, mm)
-    !
-    !       sumResiduals adds the sum of the residuals squared at
-    !       position nn to the array monLoc at position mm. It is assumed
-    !       that the arrays of blockPointers already point to the correct
-    !       block.
-    !
-    use blockPointers
-    use monitor
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn, mm
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-
-    ! Loop over the number of owned cells of this block and
-    ! accumulate the residual.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
+            end do
+            print "(1x)"
+        end if
+
+        ! Write again a line of - signs (starting with a #).
+
+        write (*, "(a)", advance="no") "#"
+        do i = 2, nCharWrite
+            write (*, "(a)", advance="no") "-"
+        end do
+        print "(1x)"
+
+    end subroutine convergenceHeader
+    subroutine sumResiduals(nn, mm)
+        !
+        !       sumResiduals adds the sum of the residuals squared at
+        !       position nn to the array monLoc at position mm. It is assumed
+        !       that the arrays of blockPointers already point to the correct
+        !       block.
+        !
+        use blockPointers
+        use monitor
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn, mm
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
+
+        ! Loop over the number of owned cells of this block and
+        ! accumulate the residual.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
 #ifndef USE_COMPLEX
-             monLoc(mm) = monLoc(mm) + (dw(i,j,k,nn)/vol(i,j,k))**2
+                    monLoc(mm) = monLoc(mm) + (dw(i, j, k, nn)/vol(i, j, k))**2
 #else
-            ! TODO squaring the complex residual when its order 1e-200 underflows and we need a better approach here
-            ! we need to square and sum the real and complex parts separately
-            monLoc(mm) = monLoc(mm) + &
-                         cmplx((real(dw(i,j,k,nn)/vol(i,j,k)))**2, &
-                         (aimag(dw(i,j,k,nn)/vol(i,j,k)))**2)
+                    ! TODO squaring the complex residual when its order 1e-200 underflows and we need a better approach here
+                    ! we need to square and sum the real and complex parts separately
+                    monLoc(mm) = monLoc(mm) + &
+                                 cmplx((real(dw(i, j, k, nn)/vol(i, j, k)))**2, &
+                                       (aimag(dw(i, j, k, nn)/vol(i, j, k)))**2)
 #endif
-          enddo
-       enddo
-    enddo
-
-  end subroutine sumResiduals
-
-  subroutine sumAllResiduals(mm)
-    !
-    !       sumAllResiduals adds the sum of the ALL residuals squared at
-    !       to monLoc at position mm.
-    !
-    use blockPointers
-    use monitor
-    use flowvarrefstate
-    use inputIteration
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: mm
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, l
-    real(kind=realType) :: state_sum,ovv
-
-
-    ! Loop over the number of owned cells of this block and
-    ! accumulate the residual.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             state_sum = 0.0
-             ovv = one/vol(i,j,k)
-             do l=1,nwf
+                end do
+            end do
+        end do
+
+    end subroutine sumResiduals
+
+    subroutine sumAllResiduals(mm)
+        !
+        !       sumAllResiduals adds the sum of the ALL residuals squared at
+        !       to monLoc at position mm.
+        !
+        use blockPointers
+        use monitor
+        use flowvarrefstate
+        use inputIteration
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: mm
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, l
+        real(kind=realType) :: state_sum, ovv
+
+        ! Loop over the number of owned cells of this block and
+        ! accumulate the residual.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    state_sum = 0.0
+                    ovv = one/vol(i, j, k)
+                    do l = 1, nwf
 #ifndef USE_COMPLEX
-                state_sum = state_sum + (dw(i,j,k,l)*ovv)**2
+                        state_sum = state_sum + (dw(i, j, k, l)*ovv)**2
 #else
-               ! TODO squaring the complex residual when its order 1e-200 underflows and we need a better approach here
-                ! we need to square and sum the real and complex parts separately
-                state_sum = state_sum + &
-                            cmplx((real(dw(i,j,k,l)*ovv))**2, &
-                            (aimag(dw(i,j,k,l)*ovv))**2)
+                        ! TODO squaring the complex residual when its order 1e-200 underflows and we need a better approach here
+                        ! we need to square and sum the real and complex parts separately
+                        state_sum = state_sum + &
+                                    cmplx((real(dw(i, j, k, l)*ovv))**2, &
+                                          (aimag(dw(i, j, k, l)*ovv))**2)
 #endif
-             end do
-             do l=nt1,nt2
-               ! l-nt1+1 will index the turbResScale properly
+                    end do
+                    do l = nt1, nt2
+                        ! l-nt1+1 will index the turbResScale properly
 #ifndef USE_COMPLEX
-               state_sum = state_sum + (dw(i,j,k,l)*ovv*turbResScale(l-nt1+1))**2
+                        state_sum = state_sum + (dw(i, j, k, l)*ovv*turbResScale(l - nt1 + 1))**2
 #else
-               ! we need to square and sum the real and complex parts separately
-               state_sum = state_sum + &
-                           cmplx((real(dw(i,j,k,l)*ovv*turbResScale(l-nt1+1)))**2, &
-                           (aimag(dw(i,j,k,l)*ovv*turbResScale(l-nt1+1)))**2)
+                        ! we need to square and sum the real and complex parts separately
+                        state_sum = state_sum + &
+                                    cmplx((real(dw(i, j, k, l)*ovv*turbResScale(l - nt1 + 1)))**2, &
+                                          (aimag(dw(i, j, k, l)*ovv*turbResScale(l - nt1 + 1)))**2)
 #endif
-             end do
-             monLoc(mm) = monLoc(mm) + state_sum
-          enddo
-       enddo
-    enddo
-
-  end subroutine sumAllResiduals
-
-  subroutine unsteadyHeader
-    !
-    !       unsteadyHeader writes a header to stdout when a new time step
-    !       is started.
-    !
-    use constants
-    use monitor, only : nTimeStepsRestart, timeUnsteadyRestart, timeUnsteady, timeStepUnsteady
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Local variables
-    !
-    character(len=7)  :: integerString
-    character(len=12) :: realString
-
-    ! Write the time step number to the integer string and the
-    ! physical time to the real string.
-
-    write(integerString,"(i7)") timeStepUnsteady + nTimeStepsRestart
-    write(realString,"(es12.5)") timeUnsteady + timeUnsteadyRestart
-
-    integerString = adjustl(integerString)
-    realString    = adjustl(realString)
-
-    ! Write the header to stdout.
-
-    print "(a)", "#"
-    print "(a)", "#**************************************************************************"
-    print "(a)", "#"
-    print strings, "# Unsteady time step ", trim(integerString),", physical time ", trim(realString), " seconds"
-    print "(a)", "#"
-    print "(a)", "#**************************************************************************"
-    print "(a)", "#"
-
-  end subroutine unsteadyHeader
-
-  subroutine getCellCenters(level, n, xCen)
-
-    use constants
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use blockPointers, only : nDom, il, jl, kl, x
-
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: level, n
-    real(kind=realType), dimension(3, n), intent(out) :: xCen
-
-    ! Working
-    integer(kind=intType) :: i, j, k, ii, nn, sps
-
-    ii = 0
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
-
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   ii = ii + 1
-
-                   xCen(:, ii) = eighth*(&
-                        x(i-1, j-1, k-1, :) + &
-                        x(i  , j-1, k-1, :) + &
-                        x(i-1, j  , k-1, :) + &
-                        x(i  , j  , k-1, :) + &
-                        x(i-1, j-1, k  , :) + &
-                        x(i  , j-1, k  , :) + &
-                        x(i-1, j  , k  , :) + &
-                        x(i  , j  , k  , :))
+                    end do
+                    monLoc(mm) = monLoc(mm) + state_sum
                 end do
-             end do
-          end do
-       end do
-    end do
-  end subroutine getCellCenters
+            end do
+        end do
+
+    end subroutine sumAllResiduals
+
+    subroutine unsteadyHeader
+        !
+        !       unsteadyHeader writes a header to stdout when a new time step
+        !       is started.
+        !
+        use constants
+        use monitor, only: nTimeStepsRestart, timeUnsteadyRestart, timeUnsteady, timeStepUnsteady
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Local variables
+        !
+        character(len=7)  :: integerString
+        character(len=12) :: realString
+
+        ! Write the time step number to the integer string and the
+        ! physical time to the real string.
+
+        write (integerString, "(i7)") timeStepUnsteady + nTimeStepsRestart
+        write (realString, "(es12.5)") timeUnsteady + timeUnsteadyRestart
+
+        integerString = adjustl(integerString)
+        realString = adjustl(realString)
+
+        ! Write the header to stdout.
+
+        print "(a)", "#"
+        print "(a)", "#**************************************************************************"
+        print "(a)", "#"
+        print strings, "# Unsteady time step ", trim(integerString), ", physical time ", trim(realString), " seconds"
+        print "(a)", "#"
+        print "(a)", "#**************************************************************************"
+        print "(a)", "#"
+
+    end subroutine unsteadyHeader
+
+    subroutine getCellCenters(level, n, xCen)
+
+        use constants
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use blockPointers, only: nDom, il, jl, kl, x
+
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: level, n
+        real(kind=realType), dimension(3, n), intent(out) :: xCen
+
+        ! Working
+        integer(kind=intType) :: i, j, k, ii, nn, sps
+
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
+
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            ii = ii + 1
+
+                            xCen(:, ii) = eighth*( &
+                                          x(i - 1, j - 1, k - 1, :) + &
+                                          x(i, j - 1, k - 1, :) + &
+                                          x(i - 1, j, k - 1, :) + &
+                                          x(i, j, k - 1, :) + &
+                                          x(i - 1, j - 1, k, :) + &
+                                          x(i, j - 1, k, :) + &
+                                          x(i - 1, j, k, :) + &
+                                          x(i, j, k, :))
+                        end do
+                    end do
+                end do
+            end do
+        end do
+    end subroutine getCellCenters
 
-  subroutine getCellCGNSBlockIDs(level, n, cellID)
+    subroutine getCellCGNSBlockIDs(level, n, cellID)
 
-    use constants
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use blockPointers, only : nDom, il, jl, kl, nbkGlobal
+        use constants
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use blockPointers, only: nDom, il, jl, kl, nbkGlobal
 
-    implicit none
+        implicit none
 
-    ! Input/Output
-    integer(kind=intType), intent(in) :: level, n
-    real(kind=realType), dimension(n), intent(out) :: cellID
+        ! Input/Output
+        integer(kind=intType), intent(in) :: level, n
+        real(kind=realType), dimension(n), intent(out) :: cellID
 
-    ! Working
-    integer(kind=intType) :: i, j, k, ii, nn, sps
+        ! Working
+        integer(kind=intType) :: i, j, k, ii, nn, sps
 
-    ii = 0
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
 
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   ii = ii + 1
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            ii = ii + 1
 
-                   cellID(ii) = nbkGlobal
+                            cellID(ii) = nbkGlobal
 
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-  end subroutine getCellCGNSBlockIDs
-
-  subroutine getNCGNSZones(nZones)
-    use cgnsGrid
-    implicit none
-    integer(kind=inttype), intent(out) :: nZones
+            end do
+        end do
+    end subroutine getCellCGNSBlockIDs
+
+    subroutine getNCGNSZones(nZones)
+        use cgnsGrid
+        implicit none
+        integer(kind=inttype), intent(out) :: nZones
 
-    nZones = cgnsNDom
+        nZones = cgnsNDom
 
-  end subroutine getNCGNSZones
+    end subroutine getNCGNSZones
 
-  subroutine getCGNSZoneName(i, zone)
-    use cgnsGrid
-      implicit none
-      character(len=maxCGNSNameLen), intent(out) :: zone
-      integer(kind=intType), intent(in) :: i
+    subroutine getCGNSZoneName(i, zone)
+        use cgnsGrid
+        implicit none
+        character(len=maxCGNSNameLen), intent(out) :: zone
+        integer(kind=intType), intent(in) :: i
 
-      zone = cgnsDoms(i)%zoneName
+        zone = cgnsDoms(i)%zoneName
 
     end subroutine getCGNSZoneName
 
diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index ab3c76fc9..c8fa38eb2 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -1,2343 +1,2337 @@
 module wallDistance
 
-  use constants, only : intType, realType
-  use wallDistanceData
-  implicit none
-  save
+    use constants, only: intType, realType
+    use wallDistanceData
+    implicit none
+    save
 
 #ifndef USE_TAPENADE
-  ! nquadVisc:             Number of local quads on the viscous
-  !                        bodies.
-  ! nNodeVisc:             Number of local nodes on the viscous
-  !                        bodies.
-  ! nquadViscGlob:         Global number of viscous quads.
-  ! connVisc(4,nquadVisc): Connectivity of the local viscous
-  !                        quads.
-  ! coorVisc(3,nNodeVisc): The coordinates of the local nodes.
-
-  integer(kind=intType) :: nquadVisc, nNodeVisc
-  integer(kind=intType) :: nquadViscGlob
-
-  integer(kind=intType), dimension(:,:), allocatable :: connVisc
-  real(kind=realType),   dimension(:,:), allocatable :: coorVisc
-
-  ! rotMatrixSections(nSections,3,3): Rotation matrices needed
-  !                                   for the alignment of the
-  !                                   sections. The rotation
-  !                                   matrix is a**n, where a is
-  !                                   periodic transformation
-  !                                   matrix; n is an integer.
-
-  real(kind=realType), dimension(:,:,:), allocatable :: rotMatrixSections
+    ! nquadVisc:             Number of local quads on the viscous
+    !                        bodies.
+    ! nNodeVisc:             Number of local nodes on the viscous
+    !                        bodies.
+    ! nquadViscGlob:         Global number of viscous quads.
+    ! connVisc(4,nquadVisc): Connectivity of the local viscous
+    !                        quads.
+    ! coorVisc(3,nNodeVisc): The coordinates of the local nodes.
+
+    integer(kind=intType) :: nquadVisc, nNodeVisc
+    integer(kind=intType) :: nquadViscGlob
+
+    integer(kind=intType), dimension(:, :), allocatable :: connVisc
+    real(kind=realType), dimension(:, :), allocatable :: coorVisc
+
+    ! rotMatrixSections(nSections,3,3): Rotation matrices needed
+    !                                   for the alignment of the
+    !                                   sections. The rotation
+    !                                   matrix is a**n, where a is
+    !                                   periodic transformation
+    !                                   matrix; n is an integer.
+
+    real(kind=realType), dimension(:, :, :), allocatable :: rotMatrixSections
 #endif
 
 contains
 
-  subroutine updateWallDistancesQuickly(nn, level, sps)
+    subroutine updateWallDistancesQuickly(nn, level, sps)
 
-    ! This is the actual update routine that uses xSurf. It is done on
-    ! block-level-sps basis.  This is the used to update the wall
-    ! distance. Most importantly, this routine is included in the
-    ! reverse mode AD routines, but NOT the forward mode. Since it is
-    ! done on a per-block basis, it is assumed that the required block
-    ! pointers are already set.
+        ! This is the actual update routine that uses xSurf. It is done on
+        ! block-level-sps basis.  This is the used to update the wall
+        ! distance. Most importantly, this routine is included in the
+        ! reverse mode AD routines, but NOT the forward mode. Since it is
+        ! done on a per-block basis, it is assumed that the required block
+        ! pointers are already set.
 
-    use constants
-    use blockPointers, only : nx, ny, nz, il, jl, kl, x, flowDoms, d2wall
-    implicit none
+        use constants
+        use blockPointers, only: nx, ny, nz, il, jl, kl, x, flowDoms, d2wall
+        implicit none
 
-    ! Subroutine arguments
-    integer(kind=intType) :: nn, level, sps
+        ! Subroutine arguments
+        integer(kind=intType) :: nn, level, sps
 
-    ! Local Variables
-    integer(kind=intType) :: i, j, k, ii, ind(4)
-    real(kind=realType) :: xp(3), xc(3), u, v
+        ! Local Variables
+        integer(kind=intType) :: i, j, k, ii, ind(4)
+        real(kind=realType) :: xp(3), xc(3), u, v
 
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, nx*ny*nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii/nx, ny) + 2
+            k = ii/(nx*ny) + 2
 #else
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
 
-                if (flowDoms(nn, level, sps)%surfNodeIndices(1, i, j, k) == 0) then
-                   ! This node is too far away and has no
-                   ! association. Set the distance to a large constant.
-                   d2wall(i, j, k) = large
-                   cycle
-                end if
+                        if (flowDoms(nn, level, sps)%surfNodeIndices(1, i, j, k) == 0) then
+                            ! This node is too far away and has no
+                            ! association. Set the distance to a large constant.
+                            d2wall(i, j, k) = large
+                            cycle
+                        end if
+
+                        ! Extract elemID and u-v position for the association of
+                        ! this cell:
+
+                        ind = flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k)
+                        u = flowDoms(nn, level, sps)%uv(1, i, j, k)
+                        v = flowDoms(nn, level, sps)%uv(2, i, j, k)
+
+                        ! Now we have the 4 corners, use bi-linear shape
+                        ! functions o to get target: (CCW ordering remember!)
+
+                        xp(:) = &
+                            (one - u)*(one - v)*xSurf(3*(ind(1) - 1) + 1:3*ind(1)) + &
+                            (u)*(one - v)*xSurf(3*(ind(2) - 1) + 1:3*ind(2)) + &
+                            (u)*(v)*xSurf(3*(ind(3) - 1) + 1:3*ind(3)) + &
+                            (one - u)*(v)*xSurf(3*(ind(4) - 1) + 1:3*ind(4))
+
+                        ! Get the cell center
+                        xc(1) = eighth*(x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
+                                        + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
+                                        + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
+                                        + x(i - 1, j, k, 1) + x(i, j, k, 1))
+
+                        xc(2) = eighth*(x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
+                                        + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
+                                        + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
+                                        + x(i - 1, j, k, 2) + x(i, j, k, 2))
+
+                        xc(3) = eighth*(x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
+                                        + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
+                                        + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
+                                        + x(i - 1, j, k, 3) + x(i, j, k, 3))
+
+                        ! Now we have the two points...just take the norm of the
+                        ! distance between them
+
+                        d2wall(i, j, k) = sqrt( &
+                                          (xc(1) - xp(1))**2 + (xc(2) - xp(2))**2 + (xc(3) - xp(3))**2)
+#ifdef TAPENADE_REVERSE
+                    end do
+#else
+                end do
+            end do
+        end do
+#endif
+
+    end subroutine updateWallDistancesQuickly
 
-                ! Extract elemID and u-v position for the association of
-                ! this cell:
+    subroutine updateWallRoughness()
 
-                ind = flowDoms(nn,level,sps)%surfNodeIndices(:, i, j, k)
-                u   = flowDoms(nn,level,sps)%uv(1,i,j,k)
-                v    = flowDoms(nn,level,sps)%uv(2,i,j,k)
+#ifndef USE_TAPENADE
 
-                ! Now we have the 4 corners, use bi-linear shape
-                ! functions o to get target: (CCW ordering remember!)
+        ! Sets the roughness-value (ks) of the nearest wall-cell in the volume cells.
+        !
+        ! At first, it creates two lists: (1) ks values on the surface; (2) global
+        ! cellIndex corresponding to this ks-value.
+        !
+        ! Then it gathers the full list on each proc *THIS DOES NOT SCALE IN MEMORY*
+        !
+        ! After that, it iterate through every volume cell and finds the index in
+        ! list (1) that corresponds to the cellIndex of the nearest surface-cell.
+        ! Then it uses this index to set the ks value listed in (2).
+        !
+        !
+        ! A more memory efficient approach would be to create a 'PETSc Scatter'.
+        ! This should be straight forward using the cellIndex-list mentioned above.
+        ! You might take a look at 'wallScatter' further down this file for
+        ! inspiration.
+
+        use constants
+        use blockPointers
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputPhysics, only: useRoughSA
+        use utils, only: setPointers, EChk, terminate
+        use surfaceFamilies, only: BCFamGroups
+        use communication, only: adflow_comm_world, nProc, myID
+        use sorting, only: famInList
+        use wallDistanceData, only: nCellBlockOffset
+        implicit none
+
+        ! Local Variables
+        integer(kind=intType) :: i, j, k, ii, jj, ierr, iCell
+        integer(kind=intType) :: iBeg, jBeg, iEnd, jEnd, ni, nj
+        integer(kind=intType) :: nn, sps, level, nLevels, mm
+        integer(kind=intType), dimension(:), allocatable :: nCellProc, cumCellProc
+        integer(kind=intType), dimension(:), pointer :: wallFamList
+        integer(kind=intType), dimension(:), allocatable :: cellIdLocal, cellIdGlobal
+        integer(kind=intType) :: nCellsLocal, nCellsGlobal
+        real(kind=realType), dimension(:), allocatable :: ksLocal, ksGlobal
+
+        character(len=maxStringLen) :: errorMessage
+
+        ! exit if not in use
+        if (.not. useRoughSA) then
+            return
+        end if
+
+        wallFamList => BCFamGroups(iBCGroupWalls)%famList
+        nLevels = ubound(flowDoms, 2)
+
+        do level = 1, nLevels
+            do sps = 1, nTimeIntervalsSpectral
+
+                ! figure out the local space needed
+                nCellsLocal = 0
+                do nn = 1, nDom
+                    call setPointers(nn, level, sps)
+
+                    do mm = 1, nBocos
+                        if (.not. famInList(BCdata(mm)%famID, wallFamlist)) then
+                            cycle
+                        end if
+                        nCellsLocal = nCellsLocal + &
+                                      (bcData(mm)%inEnd - bcData(mm)%inBeg)*(bcData(mm)%jnEnd - bcData(mm)%jnBeg)
+                    end do
+                end do
 
-                xp(:) = &
-                     (one-u)*(one-v)*xSurf(3*(ind(1)-1)+1:3*ind(1)) + &
-                     (    u)*(one-v)*xSurf(3*(ind(2)-1)+1:3*ind(2)) + &
-                     (    u)*(    v)*xSurf(3*(ind(3)-1)+1:3*ind(3)) + &
-                     (one-u)*(    v)*xSurf(3*(ind(4)-1)+1:3*ind(4))
+                ! Now communicate these sizes with everyone
+                allocate (nCellProc(nProc), cumCellProc(0:nProc))
 
-                ! Get the cell center
-                xc(1) = eighth*(x(i-1,j-1,k-1,1) + x(i,j-1,k-1,1)  &
-                     +         x(i-1,j,  k-1,1) + x(i,j,  k-1,1)  &
-                     +         x(i-1,j-1,k,  1) + x(i,j-1,k,  1)  &
-                     +         x(i-1,j,  k,  1) + x(i,j,  k,  1))
+                call mpi_allgather(nCellsLocal, 1, adflow_integer, nCellProc, 1, adflow_integer, &
+                                   adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-                xc(2) = eighth*(x(i-1,j-1,k-1,2) + x(i,j-1,k-1,2)  &
-                     +         x(i-1,j,  k-1,2) + x(i,j,  k-1,2)  &
-                     +         x(i-1,j-1,k,  2) + x(i,j-1,k,  2)  &
-                     +         x(i-1,j,  k,  2) + x(i,j,  k,  2))
+                ! Now make cumulative versions of these
+                cumCellProc(0) = 0
+                do i = 1, nProc
+                    cumCellProc(i) = cumCellProc(i - 1) + nCellProc(i)
+                end do
 
-                xc(3) = eighth*(x(i-1,j-1,k-1,3) + x(i,j-1,k-1,3)  &
-                     +         x(i-1,j,  k-1,3) + x(i,j,  k-1,3)  &
-                     +         x(i-1,j-1,k,  3) + x(i,j-1,k,  3)  &
-                     +         x(i-1,j,  k,  3) + x(i,j,  k,  3))
+                ! And save the total number of nodes and cells for reference
+                nCellsGlobal = cumCellProc(nProc)
+
+                ! Allocate the space for the local ks values and cellId's
+                allocate (ksLocal(nCellsLocal), cellIdLocal(nCellsLocal))
+
+                ! Move all the local ks-values in a list
+                ! Create a second list with the global cell ID corresponding to the ks-values
+                iCell = 0
+                do nn = 1, nDom
+                    call setPointers(nn, level, sps)
+                    do mm = 1, nBocos
+                        if (.not. famInList(BCdata(mm)%famID, wallFamlist)) then
+                            cycle
+                        end if
+
+                        jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                        iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                        ni = iEnd - iBeg
+                        nj = jEnd - jBeg
+
+                        do jj = 1, nj
+                            do ii = 1, ni
+                                iCell = iCell + 1
+
+                                ! saving local ks-value is easy
+                                ksLocal(iCell) = BCData(mm)%ksNS_Wall(ii, jj)
+
+                                ! to calculate the global cellID, we must associate the
+                                ! BC-cell to the volume cell first. We basically have to
+                                ! set surface i-j values to global i,j,k values.
+
+                                select case (BCFaceID(mm))
+                                case (iMin)
+                                    i = 2
+                                    j = ii + 1
+                                    k = jj + 1
+                                case (iMax)
+                                    i = il
+                                    j = ii + 1
+                                    k = jj + 1
+                                case (jMin)
+                                    i = ii + 1
+                                    j = 2
+                                    k = jj + 1
+                                case (jMax)
+                                    i = ii + 1
+                                    j = jl
+                                    k = jj + 1
+                                case (kMin)
+                                    i = ii + 1
+                                    j = jj + 1
+                                    k = 2
+                                case (kMax)
+                                    i = ii + 1
+                                    j = jj + 1
+                                    k = kl
+                                end select
+
+                                cellIdLocal(iCell) = nCellBLockOffset(level, nn)*nTimeIntervalsSpectral + nx*ny*nz*(sps - 1) + &
+                                                     (i - 2) + (j - 2)*nx + (k - 2)*nx*ny
+                            end do
+                        end do
+                    end do
+                end do
 
+                ! allocate global arrays
+                allocate (ksGlobal(nCellsGlobal), cellIdGlobal(nCellsGlobal))
+
+                ! gather all the surface-ks values on each proc
+                call mpi_allgatherv(ksLocal, nCellsLocal, adflow_real, &
+                                    ksGlobal, nCellProc, cumCellProc, adflow_real, &
+                                    adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! gather all the cellId's on each proc
+                call mpi_allgatherv(cellIdLocal, nCellsLocal, adflow_integer, &
+                                    cellIdGlobal, nCellProc, cumCellProc, adflow_integer, &
+                                    adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! free local memory
+                deallocate (cumCellProc, nCellProc, ksLocal, cellIdLocal)
+
+                ! set the ks-values in the volume
+                do nn = 1, nDom
+                    call setPointers(nn, level, sps)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                if (flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) == -1) then
+                                    ! This cell is too far away and has no
+                                    ! association. Set the roughness to zero.
+                                    ks(i, j, k) = zero
+                                    cycle
+                                end if
+
+                                ! find the index of the surface cell (Requires gfortran > 9.0 )
+                                iCell = findloc(cellIdGlobal, flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k), DIM=1)
+
+                                if (iCell == 0) then
+                                    write (errorMessage, 100) &
+                                        flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k)
+100                                 format("Could not find surface cell with id ", I10.1)
+                                    call terminate("updateWallRoughness", errorMessage)
+                                end if
+
+                                ! set the ks value
+                                ks(i, j, k) = ksGlobal(iCell)
+                            end do
+                        end do
+                    end do
+                end do
 
-                ! Now we have the two points...just take the norm of the
-                ! distance between them
+                ! free global memory
+                deallocate (ksGlobal, cellIdGlobal)
+            end do
+        end do
 
-                d2wall(i,j,k) = sqrt(&
-                     (xc(1)-xp(1))**2 + (xc(2)-xp(2))**2 + (xc(3)-xp(3))**2)
-#ifdef TAPENADE_REVERSE
-             end do
-#else
-          enddo
-       enddo
-    enddo
 #endif
+    end subroutine updateWallRoughness
 
-  end subroutine updateWallDistancesQuickly
-
-  subroutine updateWallRoughness()
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 
 #ifndef USE_TAPENADE
+    subroutine computeWallDistance(level, allocMem)
+        !
+        !       wallDistance computes the distances of the cell centers to
+        !       the nearest viscous wall. An adt type of method is used, which
+        !       guarantees to find the minimum distance to the wall. Possible
+        !       periodic transformations are taken into account, such that
+        !       also in case of a periodic problem the correct distance is
+        !       computed; the nearest wall point may lie in a periodic domain.
+        use constants
+        use blockPointers, only: nDom
+        use communication, only: sendBuffer, recvBuffer, myid, adflow_comm_world, &
+                                 sendBufferSize, recvBufferSize
+        use inputPhysics, only: equations, wallDistanceNeeded
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputDiscretization, only: useApproxWallDistance
+        use utils, only: setPointers, EChk, terminate, &
+                         deallocateTempMemory, allocateTempMemory
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        logical, intent(in)               :: allocMem
+        !
+        !      Local variables.
+        !
+        integer :: ierr, i, j, k, nn, ii, l
+
+        integer(kind=intType) :: sps, sps2, ll, nLevels
+        logical :: tempLogical
+        real(kind=alwaysRealType) :: t0
+        character(len=3) :: integerString
+
+        ! Check if the RANS equations are solved. If not, the wall
+        ! distance is not needed and a return can be made.
+
+        if (equations /= RANSEquations) return
+
+        ! If the turbulence model is wall distance free just compute the
+        ! normal spacing of the first cell and store this in the wall
+        ! distance. It may be needed for the boundary conditions and
+        ! the monitoring of the y+. Return afterwards.
+
+        if (.not. wallDistanceNeeded) then
+
+            ! Loop over the number of spectral solutions, initialize the
+            ! distance and compute the initial normal spacing.
+
+            do sps = 1, nTimeIntervalsSpectral
+                call initWallDistance(level, sps, allocMem)
+                call computeNormalSpacing(level, sps)
+            end do
 
-    ! Sets the roughness-value (ks) of the nearest wall-cell in the volume cells.
-    !
-    ! At first, it creates two lists: (1) ks values on the surface; (2) global
-    ! cellIndex corresponding to this ks-value.
-    !
-    ! Then it gathers the full list on each proc *THIS DOES NOT SCALE IN MEMORY*
-    !
-    ! After that, it iterate through every volume cell and finds the index in
-    ! list (1) that corresponds to the cellIndex of the nearest surface-cell.
-    ! Then it uses this index to set the ks value listed in (2).
-    !
-    !
-    ! A more memory efficient approach would be to create a 'PETSc Scatter'.
-    ! This should be straight forward using the cellIndex-list mentioned above.
-    ! You might take a look at 'wallScatter' further down this file for
-    ! inspiration.
-
-    use constants
-    use blockPointers
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputPhysics, only : useRoughSA
-    use utils, only : setPointers, EChk, terminate
-    use surfaceFamilies, only : BCFamGroups
-    use communication, only : adflow_comm_world, nProc, myID
-    use sorting, only : famInList
-    use wallDistanceData, only : nCellBlockOffset
-    implicit none
+            ! And return.
 
-    ! Local Variables
-    integer(kind=intType) :: i, j, k, ii, jj, ierr, iCell
-    integer(kind=intType) :: iBeg, jBeg, iEnd, jEnd, ni, nj
-    integer(kind=intType) :: nn, sps, level, nLevels, mm
-    integer(kind=intType), dimension(:), allocatable :: nCellProc, cumCellProc
-    integer(kind=intType), dimension(:), pointer :: wallFamList
-    integer(kind=intType), dimension(:), allocatable :: cellIdLocal, cellIdGlobal
-    integer(kind=intType) :: nCellsLocal, nCellsGlobal
-    real(kind=realType), dimension(:), allocatable :: ksLocal, ksGlobal
+            return
+        end if
 
-    character(len=maxStringLen) :: errorMessage
+        ! Write a message to stdout to indicate that the wall distance
+        ! computation starts for the given level.
 
-    ! exit if not in use
-    if (.not. useRoughSA) then
-       return
-    end if
+        write (integerString, "(i2)") level
+        integerString = adjustl(integerString)
 
-    wallFamList => BCFamGroups(iBCGroupWalls)%famList
-    nLevels = ubound(flowDoms,2)
+        ! Store the start time.
 
-    do level=1, nLevels
-       do sps=1, nTimeIntervalsSpectral
+        t0 = mpi_wtime()
 
-          ! figure out the local space needed
-          nCellsLocal = 0
-          do nn=1,nDom
-            call setPointers(nn, level, sps)
+        ! Release temporarily some memory such that the overall memory
+        ! requirement is not dictated by this routine. What memory is
+        ! released depends on allocMem. If this is .True. It means that
+        ! this is the first time the wall distances are computed, i.e. in
+        ! the preprocessing phase. Then only send and receive buffers are
+        ! released. If allocMem is .False., this means that the wall
+        ! distances are computed in a moving mesh computation and
+        ! consequently some more memory can be released.
 
-            do mm=1, nBocos
-                if (.not. famInList(BCdata(mm)%famID, wallFamlist)) then
-                   cycle
-                end if
-                nCellsLocal = nCellsLocal + &
-                (bcData(mm)%inEnd - bcData(mm)%inBeg)*(bcData(mm)%jnEnd - bcData(mm)%jnBeg)
-            end do
-          end do
+        if (allocMem) then
+            deallocate (sendBuffer, recvBuffer, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("wallDistance", &
+                               "Deallocation error for communication buffers")
+        else
+            call deallocateTempMemory(.false.)
+        end if
+
+        ! There are two different searches we can do: the original code
+        ! always works and it capable to dealing with rotating/periodic
+        ! geometries. It uses constant memory and is slow. The
+        ! alternative method uses memory that scales with the size of
+        ! the surface grid per processor and only works for
+        ! steady/unsteady simulations without periodic/rotating
+        ! components. But it is fast. It is designed to be used for
+        ! updating the wall distances between iterations of
+        ! aerostructural solutions.
 
-          ! Now communicate these sizes with everyone
-          allocate(nCellProc(nProc), cumCellProc(0:nProc))
+        ! Normal, original wall distance calc. Cannot be used when
+        ! overset is present due to possibility of overlapping walls.
+        if (.not. useApproxWallDistance) then
+            ! Loop over the number of spectral solutions.
+            spectralLoop: do sps = 1, nTimeIntervalsSpectral
 
-          call mpi_allgather(nCellsLocal, 1, adflow_integer, nCellProc, 1, adflow_integer, &
-               adflow_comm_world, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
+                ! Initialize the wall distances.
 
-          ! Now make cumulative versions of these
-          cumCellProc(0) = 0
-          do i=1,nProc
-             cumCellProc(i) = cumCellProc(i-1) + nCellProc(i)
-          end do
+                call initWallDistance(level, sps, allocMem)
 
-          ! And save the total number of nodes and cells for reference
-          nCellsGlobal = cumCellProc(nProc)
+                ! Build the viscous surface mesh.
 
-          ! Allocate the space for the local ks values and cellId's
-          allocate(ksLocal(nCellsLocal), cellIdLocal(nCellsLocal))
+                call viscousSurfaceMesh(level, sps)
 
+                ! If there are no viscous faces, processor 0 prints a warning
+                ! and the wall distances are not computed.
 
-          ! Move all the local ks-values in a list
-          ! Create a second list with the global cell ID corresponding to the ks-values
-          iCell = 0
-          do nn=1, nDom
-             call setPointers(nn, level, sps)
-             do mm=1, nBocos
-                if (.not. famInList(BCdata(mm)%famID, wallFamlist)) then
-                   cycle
+                if (nquadViscGlob == 0) then
+
+                    if (myID == 0) then
+                        print "(a)", "#"
+                        print "(a)", "#               Warning!!!!"
+                        print "(a)", "# No viscous boundary found. Wall &
+                             &distances are set to infinity"
+                        print "(a)", "#"
+                    end if
+
+                else
+                    ! Determine the wall distances for the owned cells.
+                    call determineDistance(level, sps)
                 end if
+            end do spectralLoop
+        else ! The user wants to use approx wall distance calcs OR we
+            ! have overset mesh. :
 
-                jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-                iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-                ni = iEnd - iBeg
-                nj = jEnd - jBeg
-
-                do jj=1, nj
-                   do ii=1, ni
-                      iCell = iCell + 1
-
-                      ! saving local ks-value is easy
-                      ksLocal(iCell) = BCData(mm)%ksNS_Wall(ii, jj)
-
-                      ! to calculate the global cellID, we must associate the
-                      ! BC-cell to the volume cell first. We basically have to
-                      ! set surface i-j values to global i,j,k values.
-
-                      select case (BCFaceID(mm))
-                      case (iMin)
-                         i = 2
-                         j = ii + 1
-                         k = jj + 1
-                      case (iMax)
-                         i = il
-                         j = ii + 1
-                         k = jj + 1
-                      case (jMin)
-                         i = ii + 1
-                         j = 2
-                         k = jj + 1
-                      case (jMax)
-                         i = ii + 1
-                         j = jl
-                         k = jj + 1
-                      case (kMin)
-                         i = ii + 1
-                         j = jj + 1
-                         k = 2
-                      case (kMax)
-                         i = ii + 1
-                         j = jj + 1
-                         k = kl
-                      end select
-
-                      cellIdLocal(iCell) = nCellBLockOffset(level,nn)*nTimeIntervalsSpectral+nx*ny*nz*(sps-1)+&
-                           (i-2) +(j-2)*nx +(k-2)*nx*ny
-                   end do
-                end do
-             end do
-          end do
-
-
-          ! allocate global arrays
-          allocate(ksGlobal(nCellsGlobal), cellIdGlobal(nCellsGlobal))
-
-          ! gather all the surface-ks values on each proc
-          call mpi_allgatherv(ksLocal, nCellsLocal, adflow_real, &
-               ksGlobal, nCellProc, cumCellProc, adflow_real, &
-               adflow_comm_world, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-          ! gather all the cellId's on each proc
-          call mpi_allgatherv(cellIdLocal, nCellsLocal, adflow_integer, &
-               cellIdGlobal, nCellProc, cumCellProc, adflow_integer, &
-               adflow_comm_world, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-
-          ! free local memory
-          deallocate(cumCellProc, nCellProc, ksLocal, cellIdLocal)
-
-          ! set the ks-values in the volume
-          do nn=1, nDom
-             call setPointers(nn, level, sps)
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
-                      if (flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) == -1) then
-                         ! This cell is too far away and has no
-                         ! association. Set the roughness to zero.
-                         ks(i, j, k) = zero
-                         cycle
-                      end if
-
-                      ! find the index of the surface cell (Requires gfortran > 9.0 )
-                      iCell = findloc(cellIdGlobal, flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k), DIM=1)
-
-                      if (iCell == 0) then
-                         write(errorMessage,100) &
-                              flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k)
-100                      format("Could not find surface cell with id ", I10.1)
-                         call terminate("updateWallRoughness", errorMessage)
-                      endif
-
-                      ! set the ks value
-                      ks(i, j, k) = ksGlobal(iCell)
-                   end do
+            if (updateWallAssociation(level)) then
+
+                ! Initialize the wall distance
+                spectralLoop2: do sps = 1, nTimeIntervalsSpectral
+                    call initWallDistance(level, sps, allocMem)
+                end do spectralLoop2
+
+                ! Destroy the PETSc wall distance data if necessary
+                call destroyWallDistanceDataLevel(level)
+
+                ! Do the associtaion. This allocates the data destroyed
+                ! in the destroyWallDistanceData call
+
+                do sps = 1, nTimeIntervalsSpectral
+                    call determineWallAssociation(level, sps)
                 end do
-             end do
-          end do
 
-          ! free global memory
-          deallocate(ksGlobal, cellIdGlobal)
-       end do
-    end do
+                updateWallAssociation(level) = .False.
+            end if
 
-#endif
-  end subroutine updateWallRoughness
+            ! Update the xsurf vector from X
+            call updateXSurf(level)
 
-  ! ----------------------------------------------------------------------
-  !                                                                      |
-  !                    No Tapenade Routine below this line               |
-  !                                                                      |
-  ! ----------------------------------------------------------------------
+            ! Call the actual update routine, on each of the sps instances and blocks
+            do sps = 1, nTimeIntervalsSpectral
 
-#ifndef USE_TAPENADE
-  subroutine computeWallDistance(level, allocMem)
-    !
-    !       wallDistance computes the distances of the cell centers to
-    !       the nearest viscous wall. An adt type of method is used, which
-    !       guarantees to find the minimum distance to the wall. Possible
-    !       periodic transformations are taken into account, such that
-    !       also in case of a periodic problem the correct distance is
-    !       computed; the nearest wall point may lie in a periodic domain.
-    use constants
-    use blockPointers, only : nDom
-    use communication, only : sendBuffer, recvBuffer, myid, adflow_comm_world, &
-         sendBufferSize, recvBufferSize
-    use inputPhysics, only : equations, wallDistanceNeeded
-    use inputTimeSpectral, only :nTimeIntervalsSpectral
-    use inputDiscretization, only: useApproxWallDistance
-    use utils, only : setPointers, EChk, terminate, &
-         deallocateTempMemory, allocateTempMemory
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    logical, intent(in)               :: allocMem
-    !
-    !      Local variables.
-    !
-    integer :: ierr, i, j, k, nn, ii, l
-
-    integer(kind=intType) :: sps, sps2, ll, nLevels
-    logical :: tempLogical
-    real(kind=alwaysRealType) :: t0
-    character(len=3) :: integerString
-
-    ! Check if the RANS equations are solved. If not, the wall
-    ! distance is not needed and a return can be made.
-
-    if(equations /= RANSEquations) return
-
-    ! If the turbulence model is wall distance free just compute the
-    ! normal spacing of the first cell and store this in the wall
-    ! distance. It may be needed for the boundary conditions and
-    ! the monitoring of the y+. Return afterwards.
-
-    if(.not. wallDistanceNeeded) then
-
-       ! Loop over the number of spectral solutions, initialize the
-       ! distance and compute the initial normal spacing.
-
-       do sps=1,nTimeIntervalsSpectral
-          call initWallDistance(level, sps, allocMem)
-          call computeNormalSpacing(level, sps)
-       enddo
-
-       ! And return.
-
-       return
-    endif
-
-    ! Write a message to stdout to indicate that the wall distance
-    ! computation starts for the given level.
+                ! Now extract the vector of the surface data we need
+                call VecGetArrayF90(xSurfVec(level, sps), xSurf, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-    write(integerString,"(i2)") level
-    integerString = adjustl(integerString)
-
-    ! Store the start time.
-
-    t0 = mpi_wtime()
-
-    ! Release temporarily some memory such that the overall memory
-    ! requirement is not dictated by this routine. What memory is
-    ! released depends on allocMem. If this is .True. It means that
-    ! this is the first time the wall distances are computed, i.e. in
-    ! the preprocessing phase. Then only send and receive buffers are
-    ! released. If allocMem is .False., this means that the wall
-    ! distances are computed in a moving mesh computation and
-    ! consequently some more memory can be released.
+                do nn = 1, nDom
+                    call setPointers(nn, level, sps)
+                    call updateWallDistancesQuickly(nn, level, sps)
+                end do
 
-    if( allocMem ) then
-       deallocate(sendBuffer, recvBuffer, stat=ierr)
-       if(ierr /= 0)                    &
-            call terminate("wallDistance", &
-            "Deallocation error for communication buffers")
-    else
-       call deallocateTempMemory(.false.)
-    endif
-
-    ! There are two different searches we can do: the original code
-    ! always works and it capable to dealing with rotating/periodic
-    ! geometries. It uses constant memory and is slow. The
-    ! alternative method uses memory that scales with the size of
-    ! the surface grid per processor and only works for
-    ! steady/unsteady simulations without periodic/rotating
-    ! components. But it is fast. It is designed to be used for
-    ! updating the wall distances between iterations of
-    ! aerostructural solutions.
-
-    ! Normal, original wall distance calc. Cannot be used when
-    ! overset is present due to possibility of overlapping walls.
-    if (.not. useApproxWallDistance) then
-       ! Loop over the number of spectral solutions.
-       spectralLoop: do sps=1,nTimeIntervalsSpectral
+                call VecRestoreArrayF90(xSurfVec(level, sps), xSurf, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-          ! Initialize the wall distances.
+            end do
+        end if
+
+        ! Allocate the temporarily released memory again. For more info
+        ! see the comments at the beginning of this routine.
+
+        if (allocMem) then
+            allocate (sendBuffer(sendBufferSize), &
+                      recvBuffer(recvBufferSize), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("wallDistance", &
+                               "Memory allocation failure for comm buffers")
+        else
+            call allocateTempMemory(.false.)
+        end if
+
+        ! Synchronize the processors.
+
+        call mpi_barrier(ADflow_comm_world, ierr)
+
+        ! Write a message to stdout with the amount of time it
+        ! took to compute the distances.
+
+        !        if(myID == 0) then
+        !          print "(*(A))", "# End wall distances level ", trim(integerString)
+        !          print "(*(A, ES12.5))", "# Wall clock time:", mpi_wtime() - t0," sec."
+        !          print "(a)", "#"
+        !        endif
+
+    end subroutine computeWallDistance
+
+    subroutine computeNormalSpacing(level, sps)
+        !
+        !       computeNormalSpacing computes the normal spacing of the first
+        !       cell center from the viscous wall for the given multigrid
+        !       level and spectral solution. This routine is called for
+        !       turbulence models, which do not need the wall distance.
+        !       However, they do need info of the first normal spacing for the
+        !       monitoring of y+ and possibly for the boundary conditions.
+        !       This is computed in this routine.
+        !
+        use constants
+        use blockPointers, only: x, d2wall, nViscBocos, BCFaceID, BCData, &
+                                 nx, ny, nz, il, jl, kl, nDom
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, i, j
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+
+        real(kind=realType) :: nnx, nny, nnz, vecx, vecy, vecz, dot
+
+        real(kind=realType), dimension(:, :, :), pointer :: xFace, xInt
+        real(kind=realType), dimension(:, :), pointer :: dd2Wall
+
+        ! Loop over the domains.
+
+        domain: do nn = 1, nDom
+
+            ! Set the pointers for this block.
 
-          call initWallDistance(level, sps, allocMem)
-
-          ! Build the viscous surface mesh.
-
-          call viscousSurfaceMesh(level, sps)
-
-          ! If there are no viscous faces, processor 0 prints a warning
-          ! and the wall distances are not computed.
-
-          if(nquadViscGlob == 0) then
-
-             if(myID == 0) then
-                print "(a)", "#"
-                print "(a)", "#               Warning!!!!"
-                print "(a)", "# No viscous boundary found. Wall &
-                     &distances are set to infinity"
-                print "(a)", "#"
-             endif
-
-          else
-             ! Determine the wall distances for the owned cells.
-             call determineDistance(level, sps)
-          end if
-       end do spectralLoop
-    else ! The user wants to use approx wall distance calcs OR we
-       ! have overset mesh. :
-
-       if (updateWallAssociation(level)) then
+            call setPointers(nn, level, sps)
 
-          ! Initialize the wall distance
-          spectralLoop2: do sps=1,nTimeIntervalsSpectral
-             call initWallDistance(level, sps, allocMem)
-          end do spectralLoop2
-
-          ! Destroy the PETSc wall distance data if necessary
-          call destroyWallDistanceDataLevel(level)
-
-          ! Do the associtaion. This allocates the data destroyed
-          ! in the destroyWallDistanceData call
-
-          do sps=1, nTimeIntervalsSpectral
-             call determineWallAssociation(level, sps)
-          end do
-
-          updateWallAssociation(level) = .False.
-       end if
-
-       ! Update the xsurf vector from X
-       call updateXSurf(level)
-
-       ! Call the actual update routine, on each of the sps instances and blocks
-       do sps=1, nTimeIntervalsSpectral
-
-          ! Now extract the vector of the surface data we need
-          call VecGetArrayF90(xSurfVec(level, sps), xSurf, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          do nn=1,nDom
-             call setPointers(nn, level, sps)
-             call updateWallDistancesQuickly(nn, level, sps)
-          end do
-
-          call VecRestoreArrayF90(xSurfVec(level, sps), xSurf, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-       end do
-    end if
-
-    ! Allocate the temporarily released memory again. For more info
-    ! see the comments at the beginning of this routine.
-
-    if( allocMem ) then
-       allocate(sendBuffer(sendBufferSize), &
-            recvBuffer(recvBufferSize), stat=ierr)
-       if(ierr /= 0)                    &
-            call terminate("wallDistance", &
-            "Memory allocation failure for comm buffers")
-    else
-       call allocateTempMemory(.false.)
-    endif
-
-    ! Synchronize the processors.
-
-    call mpi_barrier(ADflow_comm_world, ierr)
-
-    ! Write a message to stdout with the amount of time it
-    ! took to compute the distances.
-
-    !        if(myID == 0) then
-    !          print "(*(A))", "# End wall distances level ", trim(integerString)
-    !          print "(*(A, ES12.5))", "# Wall clock time:", mpi_wtime() - t0," sec."
-    !          print "(a)", "#"
-    !        endif
+            ! Loop over the viscous subfaces of this block. Note that
+            ! these are numbered first.
 
-  end subroutine computeWallDistance
+            bocos: do mm = 1, nViscBocos
 
+                ! Set the pointers for the plane on the surface, one plane
+                ! into the computational domain and the wall distance.
+                ! This depends on the block face on which the subface is
+                ! located. Note that the starting index of d2Wall is 2 and
+                ! therefore a pointer offset will be needed later on.
 
-  subroutine computeNormalSpacing(level, sps)
-    !
-    !       computeNormalSpacing computes the normal spacing of the first
-    !       cell center from the viscous wall for the given multigrid
-    !       level and spectral solution. This routine is called for
-    !       turbulence models, which do not need the wall distance.
-    !       However, they do need info of the first normal spacing for the
-    !       monitoring of y+ and possibly for the boundary conditions.
-    !       This is computed in this routine.
-    !
-    use constants
-    use blockPointers, only : x, d2wall, nViscBocos, BCFaceID, BCData, &
-         nx, ny, nz, il, jl, kl, nDom
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, i, j
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+                select case (BCFaceID(mm))
 
-    real(kind=realType) :: nnx, nny, nnz, vecx, vecy, vecz, dot
+                case (iMin)
+                    xFace => x(1, 1:, 1:, :); xInt => x(2, 1:, 1:, :)
+                    dd2Wall => d2Wall(2, :, :)
 
-    real(kind=realType), dimension(:,:,:), pointer :: xFace, xInt
-    real(kind=realType), dimension(:,:),   pointer :: dd2Wall
+                case (iMax)
+                    xFace => x(il, 1:, 1:, :); xInt => x(nx, 1:, 1:, :)
+                    dd2Wall => d2Wall(il, :, :)
 
-    ! Loop over the domains.
+                case (jMin)
+                    xFace => x(1:, 1, 1:, :); xInt => x(1:, 2, 1:, :)
+                    dd2Wall => d2Wall(:, 2, :)
 
-    domain: do nn=1, nDom
+                case (jMax)
+                    xFace => x(1:, jl, 1:, :); xInt => x(1:, ny, 1:, :)
+                    dd2Wall => d2Wall(:, jl, :)
 
-       ! Set the pointers for this block.
+                case (kMin)
+                    xFace => x(1:, 1:, 1, :); xInt => x(1:, 1:, 2, :)
+                    dd2Wall => d2Wall(:, :, 2)
 
-       call setPointers(nn, level, sps)
+                case (kMax)
+                    xFace => x(1:, 1:, kl, :); xInt => x(1:, 1:, nz, :)
+                    dd2Wall => d2Wall(:, :, kl)
 
-       ! Loop over the viscous subfaces of this block. Note that
-       ! these are numbered first.
+                end select
 
-       bocos: do mm=1,nViscBocos
+                ! Store the face range of this subface a bit easier.
 
-          ! Set the pointers for the plane on the surface, one plane
-          ! into the computational domain and the wall distance.
-          ! This depends on the block face on which the subface is
-          ! located. Note that the starting index of d2Wall is 2 and
-          ! therefore a pointer offset will be needed later on.
+                jBeg = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                iBeg = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
 
-          select case (BCFaceID(mm))
+                ! Loop over the faces of the subfaces.
 
-          case (iMin)
-             xFace   => x(1, 1:,1:,:); xInt => x(2, 1:,1:,:)
-             dd2Wall => d2Wall(2, :,:)
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
 
-          case (iMax)
-             xFace   => x(il,1:,1:,:); xInt => x(nx,1:,1:,:)
-             dd2Wall => d2Wall(il,:,:)
+                        ! Store the three components of the unit normal a
+                        ! bit easier.
 
-          case (jMin)
-             xFace   => x(1:,1, 1:,:); xInt => x(1:,2, 1:,:)
-             dd2Wall => d2Wall(:,2 ,:)
+                        nnx = BCData(mm)%norm(i, j, 1)
+                        nny = BCData(mm)%norm(i, j, 2)
+                        nnz = BCData(mm)%norm(i, j, 3)
 
-          case (jMax)
-             xFace   => x(1:,jl,1:,:); xInt => x(1:,ny,1:,:)
-             dd2Wall => d2Wall(:,jl,:)
+                        ! Compute the vector from centroid of the adjacent cell
+                        ! to the centroid of the face.
 
-          case (kMin)
-             xFace   => x(1:,1:,1, :); xInt => x(1:,1:,2 ,:)
-             dd2Wall => d2Wall(:,:,2 )
+                        vecx = eighth*(xFace(i - 1, j - 1, 1) + xFace(i - 1, j, 1) &
+                                       + xFace(i, j - 1, 1) + xFace(i, j, 1) &
+                                       - xInt(i - 1, j - 1, 1) - xInt(i - 1, j, 1) &
+                                       - xInt(i, j - 1, 1) - xInt(i, j, 1))
 
-          case (kMax)
-             xFace   => x(1:,1:,kl,:); xInt => x(1:,1:,nz,:)
-             dd2Wall => d2Wall(:,:,kl)
+                        vecy = eighth*(xFace(i - 1, j - 1, 2) + xFace(i - 1, j, 2) &
+                                       + xFace(i, j - 1, 2) + xFace(i, j, 2) &
+                                       - xInt(i - 1, j - 1, 2) - xInt(i - 1, j, 2) &
+                                       - xInt(i, j - 1, 2) - xInt(i, j, 2))
 
-          end select
+                        vecz = eighth*(xFace(i - 1, j - 1, 3) + xFace(i - 1, j, 3) &
+                                       + xFace(i, j - 1, 3) + xFace(i, j, 3) &
+                                       - xInt(i - 1, j - 1, 3) - xInt(i - 1, j, 3) &
+                                       - xInt(i, j - 1, 3) - xInt(i, j, 3))
 
-          ! Store the face range of this subface a bit easier.
+                        ! Compute the projection of this vector onto the normal
+                        ! vector of the face. For a decent mesh there will not be
+                        ! much of a difference between the projection and the
+                        ! original mesh, but it does not hurt to do it.
 
-          jBeg = BCData(mm)%jnBeg+1; jEnd = BCData(mm)%jnEnd
-          iBeg = BCData(mm)%inBeg+1; iEnd = BCData(mm)%inEnd
+                        dot = nnx*vecx + nny*vecy + nnz*vecz
 
-          ! Loop over the faces of the subfaces.
+                        ! As (nnx,nny,nnz) is a unit vector the distance to the
+                        ! wall of the first cell center is given by the absolute
+                        ! value of dot. Due to the use of pointers and the fact
+                        ! that the original d2Wall array starts at 2 and offset
+                        ! of -1 must be used to store the data at the correct
+                        ! location.
 
-          do j=jBeg,jEnd
-             do i=iBeg,iEnd
+                        dd2Wall(i - 1, j - 1) = abs(dot)
 
-                ! Store the three components of the unit normal a
-                ! bit easier.
+                    end do
+                end do
 
-                nnx = BCData(mm)%norm(i,j,1)
-                nny = BCData(mm)%norm(i,j,2)
-                nnz = BCData(mm)%norm(i,j,3)
+            end do bocos
+
+        end do domain
+
+    end subroutine computeNormalSpacing
+
+    subroutine initWallDistance(level, sps, allocMem)
+        !
+        !       initWallDistance allocates the memory for the wall distance,
+        !       if needed, and initializes the wall distance to a large value.
+        !
+        use constants
+        use blockPointers, only: nDom, flowDoms
+        use inputPhysics, only: useRoughSA
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+        logical, intent(in)               :: allocMem
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, il, jl, kl
+
+        ! Loop over the domains.
+
+        domain: do nn = 1, nDom
+
+            ! Allocate the memory for d2Wall, if desired.
+
+            if (allocMem) then
+
+                il = flowDoms(nn, level, sps)%il
+                jl = flowDoms(nn, level, sps)%jl
+                kl = flowDoms(nn, level, sps)%kl
+
+                allocate (flowDoms(nn, level, sps)%d2Wall(2:il, 2:jl, 2:kl), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("initWallDistance", &
+                                   "Memory allocation failure for d2Wall")
+                if (useRoughSA) then
+                    allocate (flowDoms(nn, level, sps)%ks(2:il, 2:jl, 2:kl), &
+                              stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("initWallDistance", &
+                                       "Memory allocation failure for ks")
+                end if
+            end if
+
+            ! Initialize the wall distances to a large value.
+
+            flowDoms(nn, level, sps)%d2Wall = large
+
+        end do domain
+
+    end subroutine initWallDistance
+
+    subroutine determineDistance(level, sps)
+        !
+        !       determineDistance determines the distance from the center
+        !       of the cell to the nearest viscous wall for owned cells.
+        !
+        use constants
+        use adtAPI, only: adtBuildSurfaceADT, adtMinDistanceSearch, adtDeallocateADTs
+        use blockPointers, only: x, flowDoms, kl, jl, il, nDom, nx, ny, nz, &
+                                 sectionID, d2Wall
+        use communication, only: adflow_comm_world
+        use section, only: sections
+        use inputPhysics, only: wallOffset
+        use utils, only: setPointers, terminate
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType), intent(in) :: level, sps
+        !
+        !      Local parameter, which defines the name of the adt to be create
+        !      and used here. Only needed because of the api of the adt library
+        !
+        character(len=10), parameter :: viscAdt = "ViscousADT"
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer, dimension(:), allocatable :: procID
+
+        integer(kind=intType) :: nCell, nCellPer, nTria
+
+        integer(kind=intType), dimension(1, 1) :: connTria
+        real(kind=realType), dimension(3, 2) :: dummy
+
+        integer(kind=intType), dimension(:), allocatable :: elementID
+
+        real(kind=realType), dimension(:), allocatable :: dist2
+        real(kind=realType), dimension(:), allocatable :: dist2per
+        real(kind=realType), dimension(:, :), allocatable :: coor, uvw
+        real(kind=realType), dimension(:, :), allocatable :: coorPer
+
+        integer(kind=adtElementType), dimension(:), allocatable :: &
+            elementType
+
+        integer(kind=intType) :: nn, mm, ll, ii, jj, i, j, k
+
+        real(kind=realType), dimension(3) :: xc
+
+        ! Build the adt of the surface grid. As the api requires to
+        ! specify both the quadrilateral and the triangular connectivity,
+        ! some dummy variables must be passed.
+
+        nTria = 0
+        connTria = 0
+        dummy = zero
+
+        call adtBuildSurfaceADT(nTria, nquadVisc, nNodeVisc, &
+                                coorVisc, connTria, connVisc, &
+                                dummy, .false., ADflow_comm_world, &
+                                viscAdt)
+
+        ! Determine the number of cell centers for which the distance
+        ! must be computed. Also determine how many of them are periodic.
+
+        nCell = 0
+        nCellPer = 0
+
+        do nn = 1, nDom
+            ll = flowDoms(nn, level, sps)%nx*flowDoms(nn, level, sps)%ny &
+                 *flowDoms(nn, level, sps)%nz
+            nCell = nCell + ll
+
+            mm = flowDoms(nn, level, sps)%sectionID
+            if (sections(mm)%periodic) nCellPer = nCellPer + ll
+        end do
+
+        ! Allocate the memory for the arrays needed by the ADT.
+
+        allocate (coor(3, nCell), procID(nCell), elementType(nCell), &
+                  elementID(nCell), uvw(3, nCell), dist2(nCell), &
+                  coorPer(3, nCellPer), dist2Per(nCellPer), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("determineDistance", &
+             "Memory allocation failure for the variables &
+             &needed by the adt.")
+        !
+        !       Step 1: The search of the original coordinates; possibly a
+        !               rotational periodic transformation is applied to align
+        !               the sections.
+        !
+        ! Loop over the domains to store the coordinates of the cell
+        ! centers of the owned cells. Apply the transformation such that
+        ! the sections are aligned.
+
+        mm = 0
+        domains: do nn = 1, nDom
+
+            ! Set the pointers for this block, store the section id a bit
+            ! easier in ll and loop over the cell centers.
 
-                ! Compute the vector from centroid of the adjacent cell
-                ! to the centroid of the face.
+            call setPointers(nn, level, sps)
+            ll = sectionID
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+
+                        ! Compute the coordinates of the cell center relative
+                        ! to the rotation center of this section.
+
+                        xc(1) = eighth*(x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
+                                        + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
+                                        + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
+                                        + x(i - 1, j, k, 1) + x(i, j, k, 1)) &
+                                - sections(ll)%rotCenter(1)
+
+                        xc(2) = eighth*(x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
+                                        + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
+                                        + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
+                                        + x(i - 1, j, k, 2) + x(i, j, k, 2)) &
+                                - sections(ll)%rotCenter(2)
+
+                        xc(3) = eighth*(x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
+                                        + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
+                                        + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
+                                        + x(i - 1, j, k, 3) + x(i, j, k, 3)) &
+                                - sections(ll)%rotCenter(3)
+
+                        ! Apply the periodic transformation for this section to
+                        ! align it with other sections and store this coordinate
+                        ! in the appropriate place in coor.
+
+                        mm = mm + 1
+                        coor(1, mm) = rotMatrixSections(ll, 1, 1)*xc(1) &
+                                      + rotMatrixSections(ll, 1, 2)*xc(2) &
+                                      + rotMatrixSections(ll, 1, 3)*xc(3) &
+                                      + sections(ll)%rotCenter(1)
+
+                        coor(2, mm) = rotMatrixSections(ll, 2, 1)*xc(1) &
+                                      + rotMatrixSections(ll, 2, 2)*xc(2) &
+                                      + rotMatrixSections(ll, 2, 3)*xc(3) &
+                                      + sections(ll)%rotCenter(2)
+
+                        coor(3, mm) = rotMatrixSections(ll, 3, 1)*xc(1) &
+                                      + rotMatrixSections(ll, 3, 2)*xc(2) &
+                                      + rotMatrixSections(ll, 3, 3)*xc(3) &
+                                      + sections(ll)%rotCenter(3)
+
+                        ! Initialize the distance squared, because this is an
+                        ! inout argument in the call to adtMinDistanceSearch.
+
+                        dist2(mm) = d2Wall(i, j, k)
+
+                    end do
+                end do
+            end do
+        end do domains
+
+        ! Perform the search. As no no interpolations are required,
+        ! some dummies are passed.
+
+        call adtMinDistanceSearch(nCell, coor, viscAdt, &
+                                  procID, elementType, elementID, &
+                                  uvw, dist2, 0_intType, &
+                                  dummy, dummy)
+        !        if (myid == 0) then
+        !           print *,'procID = '
+        !           print *,procID
+        !           print *,'elemID:',elementID
+        !        end if
+
+        !
+        !       Step 2: For periodic sections the nearest wall may be in the
+        !               periodic part of the grid that is not stored.
+        !               Therefore apply the periodic transformation to the
+        !               node and compute the minimum distance for this
+        !               coordinate.
+        !
+        ! Initialize the counters mm and ii. Mm is the counter for coor;
+        ! ii is the counter for coorPer.
+
+        mm = 0
+        ii = 0
+
+        ! Loop over the domains and find the periodic ones.
+
+        domainsPer1: do nn = 1, nDom
+            jj = flowDoms(nn, level, sps)%nx*flowDoms(nn, level, sps)%ny &
+                 *flowDoms(nn, level, sps)%nz
+
+            ll = flowDoms(nn, level, sps)%sectionID
+
+            ! Check if the section is periodic.
+
+            if (sections(ll)%periodic) then
+
+                ! Loop over the corresponding entries in coor of this block
+                ! and apply the periodic transformation. The transformed
+                ! coordinates are stored in coorPer. Also initialize
+                ! the wall distance squared to the value just computed.
+
+                do i = 1, jj
+                    mm = mm + 1
+                    ii = ii + 1
+
+                    xc(1) = coor(1, mm) - sections(ll)%rotCenter(1)
+                    xc(2) = coor(2, mm) - sections(ll)%rotCenter(2)
+                    xc(3) = coor(3, mm) - sections(ll)%rotCenter(3)
+
+                    coorPer(1, ii) = sections(ll)%rotMatrix(1, 1)*xc(1) &
+                                     + sections(ll)%rotMatrix(1, 2)*xc(2) &
+                                     + sections(ll)%rotMatrix(1, 3)*xc(3) &
+                                     + sections(ll)%rotCenter(1) &
+                                     + sections(ll)%translation(1)
+
+                    coorPer(2, ii) = sections(ll)%rotMatrix(2, 1)*xc(1) &
+                                     + sections(ll)%rotMatrix(2, 2)*xc(2) &
+                                     + sections(ll)%rotMatrix(2, 3)*xc(3) &
+                                     + sections(ll)%rotCenter(2) &
+                                     + sections(ll)%translation(2)
+
+                    coorPer(3, ii) = sections(ll)%rotMatrix(3, 1)*xc(1) &
+                                     + sections(ll)%rotMatrix(3, 2)*xc(2) &
+                                     + sections(ll)%rotMatrix(3, 3)*xc(3) &
+                                     + sections(ll)%rotCenter(3) &
+                                     + sections(ll)%translation(3)
+
+                    dist2Per(ii) = dist2(mm)
+                end do
 
-                vecx = eighth*(xFace(i-1,j-1,1) + xFace(i-1,j,1) &
-                     +         xFace(i,  j-1,1) + xFace(i,  j,1) &
-                     -          xInt(i-1,j-1,1) -  xInt(i-1,j,1) &
-                     -          xInt(i,  j-1,1) -  xInt(i,  j,1))
+            else
+                ! Section is not periodic. Update the counter mm.
 
-                vecy = eighth*(xFace(i-1,j-1,2) + xFace(i-1,j,2) &
-                     +         xFace(i,  j-1,2) + xFace(i,  j,2) &
-                     -          xInt(i-1,j-1,2) -  xInt(i-1,j,2) &
-                     -          xInt(i,  j-1,2) -  xInt(i,  j,2))
+                mm = mm + jj
+            end if
 
-                vecz = eighth*(xFace(i-1,j-1,3) + xFace(i-1,j,3) &
-                     +         xFace(i,  j-1,3) + xFace(i,  j,3) &
-                     -          xInt(i-1,j-1,3) -  xInt(i-1,j,3) &
-                     -          xInt(i,  j-1,3) -  xInt(i,  j,3))
+        end do domainsPer1
 
-                ! Compute the projection of this vector onto the normal
-                ! vector of the face. For a decent mesh there will not be
-                ! much of a difference between the projection and the
-                ! original mesh, but it does not hurt to do it.
+        ! Perform the adt search of this set of periodic coordinates.
+        ! As no no interpolations are required, some dummies are passed.
 
-                dot = nnx*vecx + nny*vecy + nnz*vecz
+        call adtMinDistanceSearch(nCellPer, coorPer, viscAdt, &
+                                  procID, elementType, elementID, &
+                                  uvw, dist2Per, 0_intType, &
+                                  dummy, dummy)
+        !
+        !       Step 3: Also apply the inverse periodic transformation.
+        !
+        ! Initialize the counters mm and ii. Mm is the counter for coor;
+        ! ii is the counter for coorPer.
 
-                ! As (nnx,nny,nnz) is a unit vector the distance to the
-                ! wall of the first cell center is given by the absolute
-                ! value of dot. Due to the use of pointers and the fact
-                ! that the original d2Wall array starts at 2 and offset
-                ! of -1 must be used to store the data at the correct
-                ! location.
+        mm = 0
+        ii = 0
 
-                dd2Wall(i-1,j-1) = abs(dot)
+        ! Loop over the domains and find the periodic ones.
 
-             enddo
-          enddo
+        domainsPer2: do nn = 1, nDom
+            jj = flowDoms(nn, level, sps)%nx*flowDoms(nn, level, sps)%ny &
+                 *flowDoms(nn, level, sps)%nz
 
-       enddo bocos
+            ll = flowDoms(nn, level, sps)%sectionID
 
-    enddo domain
+            ! Check if the section is periodic.
 
-  end subroutine computeNormalSpacing
+            if (sections(ll)%periodic) then
 
-  subroutine initWallDistance(level, sps, allocMem)
-    !
-    !       initWallDistance allocates the memory for the wall distance,
-    !       if needed, and initializes the wall distance to a large value.
-    !
-    use constants
-    use blockPointers, only : nDom, flowDoms
-    use inputPhysics, only : useRoughSA
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
-    logical, intent(in)               :: allocMem
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, il, jl, kl
-
-    ! Loop over the domains.
-
-    domain: do nn=1, nDom
-
-       ! Allocate the memory for d2Wall, if desired.
-
-       if( allocMem ) then
-
-          il = flowDoms(nn,level,sps)%il
-          jl = flowDoms(nn,level,sps)%jl
-          kl = flowDoms(nn,level,sps)%kl
-
-          allocate(flowDoms(nn,level,sps)%d2Wall(2:il,2:jl,2:kl), &
-               stat=ierr)
-          if(ierr /= 0)                          &
-               call terminate("initWallDistance", &
-               "Memory allocation failure for d2Wall")
-          if (useRoughSA) then
-            allocate(flowDoms(nn,level,sps)%ks(2:il,2:jl,2:kl), &
-                stat=ierr)
-            if(ierr /= 0)                          &
-                call terminate("initWallDistance", &
-                "Memory allocation failure for ks")
-          end if
-       endif
-
-       ! Initialize the wall distances to a large value.
-
-       flowDoms(nn,level,sps)%d2Wall = large
-
-    enddo domain
-
-  end subroutine initWallDistance
-
-  subroutine determineDistance(level, sps)
-    !
-    !       determineDistance determines the distance from the center
-    !       of the cell to the nearest viscous wall for owned cells.
-    !
-    use constants
-    use adtAPI, only :adtBuildSurfaceADT, adtMinDistanceSearch, adtDeallocateADTs
-    use blockPointers, only : x, flowDoms, kl, jl, il, nDom, nx, ny, nz, &
-         sectionID, d2Wall
-    use communication, only : adflow_comm_world
-    use section, only : sections
-    use inputPhysics, only : wallOffset
-    use utils, only : setPointers, terminate
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType), intent(in) :: level, sps
-    !
-    !      Local parameter, which defines the name of the adt to be create
-    !      and used here. Only needed because of the api of the adt library
-    !
-    character(len=10), parameter :: viscAdt = "ViscousADT"
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer, dimension(:), allocatable :: procID
-
-    integer(kind=intType) :: nCell, nCellPer, nTria
-
-    integer(kind=intType), dimension(1,1) :: connTria
-    real(kind=realType),   dimension(3,2) :: dummy
-
-    integer(kind=intType), dimension(:), allocatable :: elementID
-
-    real(kind=realType), dimension(:),   allocatable :: dist2
-    real(kind=realType), dimension(:),   allocatable :: dist2per
-    real(kind=realType), dimension(:,:), allocatable :: coor, uvw
-    real(kind=realType), dimension(:,:), allocatable :: coorPer
-
-    integer(kind=adtElementType), dimension(:), allocatable :: &
-         elementType
-
-    integer(kind=intType) :: nn, mm, ll, ii, jj, i, j, k
-
-    real(kind=realType), dimension(3) :: xc
-
-    ! Build the adt of the surface grid. As the api requires to
-    ! specify both the quadrilateral and the triangular connectivity,
-    ! some dummy variables must be passed.
-
-    nTria    = 0
-    connTria = 0
-    dummy    = zero
-
-    call adtBuildSurfaceADT(nTria,    nquadVisc, nNodeVisc,       &
-         coorVisc, connTria,  connVisc,        &
-         dummy,    .false.,   ADflow_comm_world, &
-         viscAdt)
-
-    ! Determine the number of cell centers for which the distance
-    ! must be computed. Also determine how many of them are periodic.
-
-    nCell    = 0
-    nCellPer = 0
-
-    do nn=1,nDom
-       ll    = flowDoms(nn,level,sps)%nx * flowDoms(nn,level,sps)%ny &
-            * flowDoms(nn,level,sps)%nz
-       nCell = nCell + ll
-
-       mm = flowDoms(nn,level,sps)%sectionID
-       if( sections(mm)%periodic ) nCellPer = nCellPer + ll
-    enddo
-
-    ! Allocate the memory for the arrays needed by the ADT.
-
-    allocate(coor(3,nCell), procID(nCell), elementType(nCell), &
-         elementID(nCell), uvw(3,nCell), dist2(nCell),      &
-         coorPer(3,nCellPer), dist2Per(nCellPer), stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("determineDistance", &
-         "Memory allocation failure for the variables &
-         &needed by the adt.")
-    !
-    !       Step 1: The search of the original coordinates; possibly a
-    !               rotational periodic transformation is applied to align
-    !               the sections.
-    !
-    ! Loop over the domains to store the coordinates of the cell
-    ! centers of the owned cells. Apply the transformation such that
-    ! the sections are aligned.
-
-    mm = 0
-    domains: do nn=1,nDom
-
-       ! Set the pointers for this block, store the section id a bit
-       ! easier in ll and loop over the cell centers.
-
-       call setPointers(nn, level, sps)
-       ll = sectionID
-
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-
-                ! Compute the coordinates of the cell center relative
-                ! to the rotation center of this section.
-
-                xc(1) = eighth*(x(i-1,j-1,k-1,1) + x(i,j-1,k-1,1)  &
-                     +         x(i-1,j,  k-1,1) + x(i,j,  k-1,1)  &
-                     +         x(i-1,j-1,k,  1) + x(i,j-1,k,  1)  &
-                     +         x(i-1,j,  k,  1) + x(i,j,  k,  1)) &
-                     - sections(ll)%rotCenter(1)
-
-                xc(2) = eighth*(x(i-1,j-1,k-1,2) + x(i,j-1,k-1,2)  &
-                     +         x(i-1,j,  k-1,2) + x(i,j,  k-1,2)  &
-                     +         x(i-1,j-1,k,  2) + x(i,j-1,k,  2)  &
-                     +         x(i-1,j,  k,  2) + x(i,j,  k,  2)) &
-                     - sections(ll)%rotCenter(2)
-
-                xc(3) = eighth*(x(i-1,j-1,k-1,3) + x(i,j-1,k-1,3)  &
-                     +         x(i-1,j,  k-1,3) + x(i,j,  k-1,3)  &
-                     +         x(i-1,j-1,k,  3) + x(i,j-1,k,  3)  &
-                     +         x(i-1,j,  k,  3) + x(i,j,  k,  3)) &
-                     - sections(ll)%rotCenter(3)
-
-                ! Apply the periodic transformation for this section to
-                ! align it with other sections and store this coordinate
-                ! in the appropriate place in coor.
-
-                mm = mm + 1
-                coor(1,mm) = rotMatrixSections(ll,1,1)*xc(1) &
-                     + rotMatrixSections(ll,1,2)*xc(2) &
-                     + rotMatrixSections(ll,1,3)*xc(3) &
-                     + sections(ll)%rotCenter(1)
-
-                coor(2,mm) = rotMatrixSections(ll,2,1)*xc(1) &
-                     + rotMatrixSections(ll,2,2)*xc(2) &
-                     + rotMatrixSections(ll,2,3)*xc(3) &
-                     + sections(ll)%rotCenter(2)
-
-                coor(3,mm) = rotMatrixSections(ll,3,1)*xc(1) &
-                     + rotMatrixSections(ll,3,2)*xc(2) &
-                     + rotMatrixSections(ll,3,3)*xc(3) &
-                     + sections(ll)%rotCenter(3)
-
-                ! Initialize the distance squared, because this is an
-                ! inout argument in the call to adtMinDistanceSearch.
-
-                dist2(mm) = d2Wall(i,j,k)
-
-             enddo
-          enddo
-       enddo
-    enddo domains
-
-    ! Perform the search. As no no interpolations are required,
-    ! some dummies are passed.
-
-    call adtMinDistanceSearch(nCell,  coor,        viscAdt,      &
-         procID, elementType, elementID,    &
-         uvw,    dist2,       0_intType, &
-         dummy,  dummy)
-    !        if (myid == 0) then
-    !           print *,'procID = '
-    !           print *,procID
-    !           print *,'elemID:',elementID
-    !        end if
-
-    !
-    !       Step 2: For periodic sections the nearest wall may be in the
-    !               periodic part of the grid that is not stored.
-    !               Therefore apply the periodic transformation to the
-    !               node and compute the minimum distance for this
-    !               coordinate.
-    !
-    ! Initialize the counters mm and ii. Mm is the counter for coor;
-    ! ii is the counter for coorPer.
-
-    mm = 0
-    ii = 0
-
-    ! Loop over the domains and find the periodic ones.
-
-    domainsPer1: do nn=1,nDom
-       jj    = flowDoms(nn,level,sps)%nx * flowDoms(nn,level,sps)%ny &
-            * flowDoms(nn,level,sps)%nz
-
-       ll = flowDoms(nn,level,sps)%sectionID
-
-       ! Check if the section is periodic.
-
-       if( sections(ll)%periodic ) then
-
-          ! Loop over the corresponding entries in coor of this block
-          ! and apply the periodic transformation. The transformed
-          ! coordinates are stored in coorPer. Also initialize
-          ! the wall distance squared to the value just computed.
-
-          do i=1,jj
-             mm = mm + 1
-             ii = ii + 1
-
-             xc(1) = coor(1,mm) - sections(ll)%rotCenter(1)
-             xc(2) = coor(2,mm) - sections(ll)%rotCenter(2)
-             xc(3) = coor(3,mm) - sections(ll)%rotCenter(3)
-
-             coorPer(1,ii) = sections(ll)%rotMatrix(1,1)*xc(1) &
-                  + sections(ll)%rotMatrix(1,2)*xc(2) &
-                  + sections(ll)%rotMatrix(1,3)*xc(3) &
-                  + sections(ll)%rotCenter(1)         &
-                  + sections(ll)%translation(1)
-
-             coorPer(2,ii) = sections(ll)%rotMatrix(2,1)*xc(1) &
-                  + sections(ll)%rotMatrix(2,2)*xc(2) &
-                  + sections(ll)%rotMatrix(2,3)*xc(3) &
-                  + sections(ll)%rotCenter(2)         &
-                  + sections(ll)%translation(2)
-
-             coorPer(3,ii) = sections(ll)%rotMatrix(3,1)*xc(1) &
-                  + sections(ll)%rotMatrix(3,2)*xc(2) &
-                  + sections(ll)%rotMatrix(3,3)*xc(3) &
-                  + sections(ll)%rotCenter(3)         &
-                  + sections(ll)%translation(3)
-
-             dist2Per(ii) = dist2(mm)
-          enddo
-
-       else
-          ! Section is not periodic. Update the counter mm.
-
-          mm = mm + jj
-       endif
-
-    enddo domainsPer1
-
-    ! Perform the adt search of this set of periodic coordinates.
-    ! As no no interpolations are required, some dummies are passed.
-
-    call adtMinDistanceSearch(nCellPer, coorPer,     viscAdt,      &
-         procID,   elementType, elementID,    &
-         uvw,      dist2Per,    0_intType, &
-         dummy,    dummy)
-    !
-    !       Step 3: Also apply the inverse periodic transformation.
-    !
-    ! Initialize the counters mm and ii. Mm is the counter for coor;
-    ! ii is the counter for coorPer.
-
-    mm = 0
-    ii = 0
-
-    ! Loop over the domains and find the periodic ones.
-
-    domainsPer2: do nn=1,nDom
-       jj    = flowDoms(nn,level,sps)%nx * flowDoms(nn,level,sps)%ny &
-            * flowDoms(nn,level,sps)%nz
-
-       ll = flowDoms(nn,level,sps)%sectionID
-
-       ! Check if the section is periodic.
-
-       if( sections(ll)%periodic ) then
-
-          ! Loop over the corresponding entries in coor of this block
-          ! and apply the inverse periodic transformation. Again the
-          ! transformed coordinates are stored in coorPer. Note that
-          ! the inverse of the rotation matrix is the transpose and
-          ! that the translation vector should be multiplied by the
-          ! inverse of the rotation matrix. Note that the wall distance
-          ! has already been initialized in the previous periodic
-          ! search.
-
-          do i=1,jj
-             mm = mm + 1
-             ii = ii + 1
+                ! Loop over the corresponding entries in coor of this block
+                ! and apply the inverse periodic transformation. Again the
+                ! transformed coordinates are stored in coorPer. Note that
+                ! the inverse of the rotation matrix is the transpose and
+                ! that the translation vector should be multiplied by the
+                ! inverse of the rotation matrix. Note that the wall distance
+                ! has already been initialized in the previous periodic
+                ! search.
 
-             xc(1) = coor(1,mm) - sections(ll)%rotCenter(1) &
-                  -              sections(ll)%translation(1)
-             xc(2) = coor(2,mm) - sections(ll)%rotCenter(2) &
-                  -              sections(ll)%translation(2)
-             xc(3) = coor(3,mm) - sections(ll)%rotCenter(3) &
-                  -              sections(ll)%translation(3)
+                do i = 1, jj
+                    mm = mm + 1
+                    ii = ii + 1
 
-             coorPer(1,ii) = sections(ll)%rotMatrix(1,1)*xc(1) &
-                  + sections(ll)%rotMatrix(2,1)*xc(2) &
-                  + sections(ll)%rotMatrix(3,1)*xc(3) &
-                  + sections(ll)%rotCenter(1)
+                    xc(1) = coor(1, mm) - sections(ll)%rotCenter(1) &
+                            - sections(ll)%translation(1)
+                    xc(2) = coor(2, mm) - sections(ll)%rotCenter(2) &
+                            - sections(ll)%translation(2)
+                    xc(3) = coor(3, mm) - sections(ll)%rotCenter(3) &
+                            - sections(ll)%translation(3)
 
-             coorPer(2,ii) = sections(ll)%rotMatrix(1,2)*xc(1) &
-                  + sections(ll)%rotMatrix(2,2)*xc(2) &
-                  + sections(ll)%rotMatrix(3,2)*xc(3) &
-                  + sections(ll)%rotCenter(2)
-
-             coorPer(3,ii) = sections(ll)%rotMatrix(1,3)*xc(1) &
-                  + sections(ll)%rotMatrix(2,3)*xc(2) &
-                  + sections(ll)%rotMatrix(3,3)*xc(3) &
-                  + sections(ll)%rotCenter(3)
-          enddo
-
-       else
-          ! Section is not periodic. Update the counter mm.
-
-          mm = mm + jj
-       endif
-
-    enddo domainsPer2
-
-    ! Perform the adt search of this set of periodic coordinates.
-    ! As no no interpolations are required, some dummies are passed.
-
-    call adtMinDistanceSearch(nCellPer, coorPer,     viscAdt,      &
-         procID,   elementType, elementID,    &
-         uvw,      dist2Per,    0_intType, &
-         dummy,    dummy)
-    !
-    !       Step 4: Store the minimum distance in the block type.
-    !
-    mm = 0
-    ii = 0
-
-    domainsStore: do nn=1,nDom
-
-       ! Set the pointers for this block and store the section id a
-       ! bit easier in ll.
-
-       call setPointers(nn, level, sps)
-       ll = sectionID
-
-       ! Check if the section is periodic. If so the distance is set
-       ! to the minimum of the value computed in step 1 and the
-       ! periodic values. Note that mm should not be updated in this
-       ! loop, because it is updated in the loop over the cell centers
-       ! of this block. Instead the counter j is used.
-
-       if( sections(ll)%periodic ) then
-
-          jj = nx*ny*nz
-
-          j = mm
-          do i=1,jj
-             j  = j  + 1
-             ii = ii + 1
-
-             dist2(j) = min(dist2(j), dist2Per(ii))
-          enddo
-       endif
-
-       ! Loop over the cell centers of the block to store the wall
-       ! distance. Note that dist2 stores the distance squared and
-       ! thus a square root must be taken.
-       ! Add a possible offset for the debugging of wall functions.
-
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-                mm = mm + 1
-                d2Wall(i,j,k) = sqrt(dist2(mm)) + wallOffset
-             enddo
-          enddo
-       enddo
-
-    enddo domainsStore
-
-    ! Release the memory of the ADT and the arrays of the module
-    ! viscSurface.
-
-    call adtDeallocateADTs(viscAdt)
-
-    deallocate(connVisc, coorVisc, rotMatrixSections, stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("determineDistance", &
-         "Deallocation error for the arrays &
-         &of viscSurface")
-
-    ! Release the variables needed by the ADT.
-
-    deallocate(coor, procID, elementType, elementID, uvw, dist2, &
-         coorPer, dist2Per, stat=ierr)
-    if(ierr /= 0)                          &
-         call terminate("determineDistance", &
-         "Deallocation failure for the variables &
-         &needed by the adt.")
-
-  end subroutine determineDistance
-
-
-  subroutine localViscousSurfaceMesh(multSections, level, sps)
-    !
-    !       localViscousSurfaceMesh stores the local viscous surface
-    !       mesh (with possible periodic extensions in conn and coor.
-    !
-    use constants
-    use blockPointers, only : BCData, x, il, jl, kl, BCFaceID, sectionID, &
-         flowDoms, nBocos, nDom, BCType
-    use communication, only : myid, adflow_comm_world
-    use section, only : sections, nSections
-    use utils, only : setPointers, terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
-    integer(kind=intType), dimension(*), intent(in) :: multSections
-    !
-    !      Local variables.
-    !
-    integer :: size, ierr
-
-    integer(kind=intType) :: nn, mm, i, j, k
-    integer(kind=intType) :: np, nq, npOld, np1, nqOld, mp, mq
-    integer(kind=intType) :: nq1, nq2, nq3, nq4, sec, row, col
-    integer(kind=intType) :: iBeg, jBeg, iEnd, jEnd
-
-    integer(kind=intType), dimension(3) :: ind
-
-    real(kind=realType) :: length, dot, xx, yy, zz, r1, r2, aaa, bbb
-    real(kind=realType) :: theta, cosTheta, sinTheta
-
-    real(kind=realType), dimension(3,3) :: a
-    real(kind=realType), dimension(nSections) :: thetaNMin, &
-         thetaNMax, &
-         thetaPMin, &
-         thetaPMax, tmp
-    real(kind=realType), dimension(nSections,3) :: rad1, rad2, axis
-
-    real(kind=realType), dimension(:,:,:), pointer :: xface
-
-    !       Determine the unit vectors of the local coordinate system
-    !       aligned with the rotation axis of the possible rotational
-    !       periodic section.
-    !
-    do nn=1,nSections
-       if(sections(nn)%nSlices == 1) cycle
-
-       ! Section is rotational periodic. First determine the rotation
-       ! axis. This is the eigenvector which corresponds to the
-       ! eigenvalue 1 of the transformation matrix.
-
-       ! Store rot - i in a and initialize ind.
-
-       ind(1) = 1; ind(2) = 2; ind(3) = 3
-
-       a(1,1) = sections(nn)%rotMatrix(1,1) - one
-       a(1,2) = sections(nn)%rotMatrix(1,2)
-       a(1,3) = sections(nn)%rotMatrix(1,3)
-
-       a(2,1) = sections(nn)%rotMatrix(2,1)
-       a(2,2) = sections(nn)%rotMatrix(2,2) - one
-       a(2,3) = sections(nn)%rotMatrix(2,3)
-
-       a(3,1) = sections(nn)%rotMatrix(3,1)
-       a(3,2) = sections(nn)%rotMatrix(3,2)
-       a(3,3) = sections(nn)%rotMatrix(3,3) - one
-
-       ! Loop over the two times that Gaussian elimination must be
-       ! applied.
-
-       loopGauss: do k=1,2
-
-          ! Find the largest value in the sub-matrix.
-
-          aaa = abs(a(k,k)); row = k; col = k
-          do j=k,3
-             do i=k,3
-                bbb = abs(a(i,j))
-                if(bbb > aaa) then
-                   aaa  = bbb
-                   row = i
-                   col = j
-                endif
-             enddo
-          enddo
-
-          ! Swap the rows k and row.
-
-          do j=1,3
-             aaa       = a(k,j)
-             a(k,j)   = a(row,j)
-             a(row,j) = aaa
-          enddo
-
-          ! Swap the colums k and col; also swap ind(k) and ind(col).
-
-          i        = ind(k)
-          ind(k)   = ind(col)
-          ind(col) = i
-          do i=1,3
-             aaa       = a(i,k)
-             a(i,k)   = a(i,col)
-             a(i,col) = aaa
-          enddo
-
-          ! Perform gaussian eliMination, because now it's sure that
-          ! the element (k,k) is non-zero.
-
-          aaa = one/a(k,k)
-          do i=(k+1),3
-             bbb = a(i,k)*aaa
-             do j=k,3
-                a(i,j) = a(i,j) - bbb*a(k,j)
-             enddo
-          enddo
-
-       enddo loopGauss
-
-       ! Due to the full pivoting it is now guaranteed that the elements
-       ! a(ind(1),ind(1)) and a(ind(2),ind(2)) are nonzero and
-       ! a(ind(3),ind(3)) == zero. Remember that the rotation matrix
-       ! only has 1 eigenvalue of one. Set axis(ind(3)) to one and
-       ! determine the other two elements of the eigen vector.
-
-       axis(nn,ind(3)) =   one
-       axis(nn,ind(2)) = -(a(2,3)*axis(nn,ind(3)))/a(2,2)
-       axis(nn,ind(1)) = -(a(1,3)*axis(nn,ind(3)) &
-            +   a(1,2)*axis(nn,ind(2)))/a(1,1)
-
-       ! Create a unit vector.
-
-       length = one/sqrt(axis(nn,1)**2 + axis(nn,2)**2 + axis(nn,3)**2)
-       axis(nn,1) = axis(nn,1)*length
-       axis(nn,2) = axis(nn,2)*length
-       axis(nn,3) = axis(nn,3)*length
-
-       ! Make sure that the largest component of this vector is
-       ! positive, such that a unique definition of the rotation
-       ! axis is obtained. Use dot and length as a temporary
-       ! storage.
-
-       dot = axis(nn,1); length = abs(dot)
-       if(abs(axis(nn,2)) > length) then
-          dot = axis(nn,2); length = abs(dot)
-       endif
-       if(abs(axis(nn,3)) > length) then
-          dot = axis(nn,3); length = abs(dot)
-       endif
+                    coorPer(1, ii) = sections(ll)%rotMatrix(1, 1)*xc(1) &
+                                     + sections(ll)%rotMatrix(2, 1)*xc(2) &
+                                     + sections(ll)%rotMatrix(3, 1)*xc(3) &
+                                     + sections(ll)%rotCenter(1)
 
-       if(dot < zero) then
-          axis(nn,1) = -axis(nn,1)
-          axis(nn,2) = -axis(nn,2)
-          axis(nn,3) = -axis(nn,3)
-       endif
+                    coorPer(2, ii) = sections(ll)%rotMatrix(1, 2)*xc(1) &
+                                     + sections(ll)%rotMatrix(2, 2)*xc(2) &
+                                     + sections(ll)%rotMatrix(3, 2)*xc(3) &
+                                     + sections(ll)%rotCenter(2)
 
-       ! Determine the two vectors which determine the plane normal
-       ! to the axis of rotation.
-
-       ! Initial guess of rad1. First try the y-axis. If not good
-       ! enough try the z-axis.
-
-       if(abs(axis(nn,2)) < 0.707107_realType) then
-          rad1(nn,1) = zero
-          rad1(nn,2) = one
-          rad1(nn,3) = zero
-       else
-          rad1(nn,1) = zero
-          rad1(nn,2) = zero
-          rad1(nn,3) = one
-       endif
-
-       ! Make sure that rad1 is normal to axis. Create a unit
-       ! vector again.
-
-       dot = rad1(nn,1)*axis(nn,1) + rad1(nn,2)*axis(nn,2) &
-            + rad1(nn,3)*axis(nn,3)
-       rad1(nn,1) = rad1(nn,1) - dot*axis(nn,1)
-       rad1(nn,2) = rad1(nn,2) - dot*axis(nn,2)
-       rad1(nn,3) = rad1(nn,3) - dot*axis(nn,3)
-
-       length = one/(rad1(nn,1)**2 + rad1(nn,2)**2 + rad1(nn,3)**2)
-       rad1(nn,1) = rad1(nn,1)*length
-       rad1(nn,2) = rad1(nn,2)*length
-       rad1(nn,3) = rad1(nn,3)*length
+                    coorPer(3, ii) = sections(ll)%rotMatrix(1, 3)*xc(1) &
+                                     + sections(ll)%rotMatrix(2, 3)*xc(2) &
+                                     + sections(ll)%rotMatrix(3, 3)*xc(3) &
+                                     + sections(ll)%rotCenter(3)
+                end do
 
-       ! Create the second vector which spans the radIal plane. This
-       ! must be normal to both axis and rad1, i.e. the cross-product.
+            else
+                ! Section is not periodic. Update the counter mm.
 
-       rad2(nn,1) = axis(nn,2)*rad1(nn,3) - axis(nn,3)*rad1(nn,2)
-       rad2(nn,2) = axis(nn,3)*rad1(nn,1) - axis(nn,1)*rad1(nn,3)
-       rad2(nn,3) = axis(nn,1)*rad1(nn,2) - axis(nn,2)*rad1(nn,1)
+                mm = mm + jj
+            end if
 
-    enddo
+        end do domainsPer2
 
-    ! Initialize the values of thetaNMin, etc.
+        ! Perform the adt search of this set of periodic coordinates.
+        ! As no no interpolations are required, some dummies are passed.
 
-    thetaNMin =  zero
-    thetaNMax = -pi
-    thetaPMin =  pi
-    thetaPMax =  zero
-    !
-    !       Determine the local values of thetaNMin, etc. for the
-    !       different sections.
-    !
-    do nn=1,nDom
+        call adtMinDistanceSearch(nCellPer, coorPer, viscAdt, &
+                                  procID, elementType, elementID, &
+                                  uvw, dist2Per, 0_intType, &
+                                  dummy, dummy)
+        !
+        !       Step 4: Store the minimum distance in the block type.
+        !
+        mm = 0
+        ii = 0
 
-       ! Store the section id of the block a bit easier. Continue with
-       ! the next block if this section consist of only one slice.
+        domainsStore: do nn = 1, nDom
 
-       sec = flowDoms(nn,level,sps)%sectionID
-       if(sections(sec)%nSlices == 1) cycle
+            ! Set the pointers for this block and store the section id a
+            ! bit easier in ll.
 
-       ! Set the pointers for this block.
+            call setPointers(nn, level, sps)
+            ll = sectionID
 
-       call setPointers(nn, level,sps)
+            ! Check if the section is periodic. If so the distance is set
+            ! to the minimum of the value computed in step 1 and the
+            ! periodic values. Note that mm should not be updated in this
+            ! loop, because it is updated in the loop over the cell centers
+            ! of this block. Instead the counter j is used.
 
-       ! Initialize nq1, nq2, nq3 and nq4 to 0. These integers store
-       ! the number of nodes in the first, second, third and fourth
-       ! quadrant respectivily.
+            if (sections(ll)%periodic) then
 
-       nq1 = 0; nq2 = 0; nq3 = 0; nq4 = 0
+                jj = nx*ny*nz
 
-       ! Loop over the nodes of this block.
+                j = mm
+                do i = 1, jj
+                    j = j + 1
+                    ii = ii + 1
 
-       do k=1,kl
-          do j=1,jl
-             do i=1,il
+                    dist2(j) = min(dist2(j), dist2Per(ii))
+                end do
+            end if
+
+            ! Loop over the cell centers of the block to store the wall
+            ! distance. Note that dist2 stores the distance squared and
+            ! thus a square root must be taken.
+            ! Add a possible offset for the debugging of wall functions.
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        mm = mm + 1
+                        d2Wall(i, j, k) = sqrt(dist2(mm)) + wallOffset
+                    end do
+                end do
+            end do
 
-                ! Determine the coordinates relative to the
-                ! center of rotation.
+        end do domainsStore
+
+        ! Release the memory of the ADT and the arrays of the module
+        ! viscSurface.
+
+        call adtDeallocateADTs(viscAdt)
+
+        deallocate (connVisc, coorVisc, rotMatrixSections, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("determineDistance", &
+             "Deallocation error for the arrays &
+             &of viscSurface")
+
+        ! Release the variables needed by the ADT.
+
+        deallocate (coor, procID, elementType, elementID, uvw, dist2, &
+                    coorPer, dist2Per, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("determineDistance", &
+             "Deallocation failure for the variables &
+             &needed by the adt.")
+
+    end subroutine determineDistance
+
+    subroutine localViscousSurfaceMesh(multSections, level, sps)
+        !
+        !       localViscousSurfaceMesh stores the local viscous surface
+        !       mesh (with possible periodic extensions in conn and coor.
+        !
+        use constants
+        use blockPointers, only: BCData, x, il, jl, kl, BCFaceID, sectionID, &
+                                 flowDoms, nBocos, nDom, BCType
+        use communication, only: myid, adflow_comm_world
+        use section, only: sections, nSections
+        use utils, only: setPointers, terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+        integer(kind=intType), dimension(*), intent(in) :: multSections
+        !
+        !      Local variables.
+        !
+        integer :: size, ierr
+
+        integer(kind=intType) :: nn, mm, i, j, k
+        integer(kind=intType) :: np, nq, npOld, np1, nqOld, mp, mq
+        integer(kind=intType) :: nq1, nq2, nq3, nq4, sec, row, col
+        integer(kind=intType) :: iBeg, jBeg, iEnd, jEnd
+
+        integer(kind=intType), dimension(3) :: ind
+
+        real(kind=realType) :: length, dot, xx, yy, zz, r1, r2, aaa, bbb
+        real(kind=realType) :: theta, cosTheta, sinTheta
+
+        real(kind=realType), dimension(3, 3) :: a
+        real(kind=realType), dimension(nSections) :: thetaNMin, &
+                                                     thetaNMax, &
+                                                     thetaPMin, &
+                                                     thetaPMax, tmp
+        real(kind=realType), dimension(nSections, 3) :: rad1, rad2, axis
+
+        real(kind=realType), dimension(:, :, :), pointer :: xface
+
+        !       Determine the unit vectors of the local coordinate system
+        !       aligned with the rotation axis of the possible rotational
+        !       periodic section.
+        !
+        do nn = 1, nSections
+            if (sections(nn)%nSlices == 1) cycle
+
+            ! Section is rotational periodic. First determine the rotation
+            ! axis. This is the eigenvector which corresponds to the
+            ! eigenvalue 1 of the transformation matrix.
+
+            ! Store rot - i in a and initialize ind.
+
+            ind(1) = 1; ind(2) = 2; ind(3) = 3
+
+            a(1, 1) = sections(nn)%rotMatrix(1, 1) - one
+            a(1, 2) = sections(nn)%rotMatrix(1, 2)
+            a(1, 3) = sections(nn)%rotMatrix(1, 3)
+
+            a(2, 1) = sections(nn)%rotMatrix(2, 1)
+            a(2, 2) = sections(nn)%rotMatrix(2, 2) - one
+            a(2, 3) = sections(nn)%rotMatrix(2, 3)
+
+            a(3, 1) = sections(nn)%rotMatrix(3, 1)
+            a(3, 2) = sections(nn)%rotMatrix(3, 2)
+            a(3, 3) = sections(nn)%rotMatrix(3, 3) - one
+
+            ! Loop over the two times that Gaussian elimination must be
+            ! applied.
+
+            loopGauss: do k = 1, 2
+
+                ! Find the largest value in the sub-matrix.
+
+                aaa = abs(a(k, k)); row = k; col = k
+                do j = k, 3
+                    do i = k, 3
+                        bbb = abs(a(i, j))
+                        if (bbb > aaa) then
+                            aaa = bbb
+                            row = i
+                            col = j
+                        end if
+                    end do
+                end do
 
-                xx = x(i,j,k,1) - sections(sec)%rotCenter(1)
-                yy = x(i,j,k,2) - sections(sec)%rotCenter(2)
-                zz = x(i,j,k,3) - sections(sec)%rotCenter(3)
+                ! Swap the rows k and row.
 
-                ! Determine the radIal components in the local
-                ! cylindrical coordinate system of the section.
+                do j = 1, 3
+                    aaa = a(k, j)
+                    a(k, j) = a(row, j)
+                    a(row, j) = aaa
+                end do
 
-                r1 = xx*rad1(sec,1) + yy*rad1(sec,2) + zz*rad1(sec,3)
-                r2 = xx*rad2(sec,1) + yy*rad2(sec,2) + zz*rad2(sec,3)
+                ! Swap the colums k and col; also swap ind(k) and ind(col).
 
-                ! Determine the angle if r1 or r2 is nonzero.
+                i = ind(k)
+                ind(k) = ind(col)
+                ind(col) = i
+                do i = 1, 3
+                    aaa = a(i, k)
+                    a(i, k) = a(i, col)
+                    a(i, col) = aaa
+                end do
 
-                if((abs(r1) >= eps) .or. (abs(r2) >= eps)) then
+                ! Perform gaussian eliMination, because now it's sure that
+                ! the element (k,k) is non-zero.
 
-                   theta = atan2(r2,r1)
+                aaa = one/a(k, k)
+                do i = (k + 1), 3
+                    bbb = a(i, k)*aaa
+                    do j = k, 3
+                        a(i, j) = a(i, j) - bbb*a(k, j)
+                    end do
+                end do
 
-                   ! Update the minimum and maximum angle for this
-                   ! section, depending on the sign of theta.
+            end do loopGauss
+
+            ! Due to the full pivoting it is now guaranteed that the elements
+            ! a(ind(1),ind(1)) and a(ind(2),ind(2)) are nonzero and
+            ! a(ind(3),ind(3)) == zero. Remember that the rotation matrix
+            ! only has 1 eigenvalue of one. Set axis(ind(3)) to one and
+            ! determine the other two elements of the eigen vector.
+
+            axis(nn, ind(3)) = one
+            axis(nn, ind(2)) = -(a(2, 3)*axis(nn, ind(3)))/a(2, 2)
+            axis(nn, ind(1)) = -(a(1, 3)*axis(nn, ind(3)) &
+                                 + a(1, 2)*axis(nn, ind(2)))/a(1, 1)
+
+            ! Create a unit vector.
+
+            length = one/sqrt(axis(nn, 1)**2 + axis(nn, 2)**2 + axis(nn, 3)**2)
+            axis(nn, 1) = axis(nn, 1)*length
+            axis(nn, 2) = axis(nn, 2)*length
+            axis(nn, 3) = axis(nn, 3)*length
+
+            ! Make sure that the largest component of this vector is
+            ! positive, such that a unique definition of the rotation
+            ! axis is obtained. Use dot and length as a temporary
+            ! storage.
+
+            dot = axis(nn, 1); length = abs(dot)
+            if (abs(axis(nn, 2)) > length) then
+                dot = axis(nn, 2); length = abs(dot)
+            end if
+            if (abs(axis(nn, 3)) > length) then
+                dot = axis(nn, 3); length = abs(dot)
+            end if
+
+            if (dot < zero) then
+                axis(nn, 1) = -axis(nn, 1)
+                axis(nn, 2) = -axis(nn, 2)
+                axis(nn, 3) = -axis(nn, 3)
+            end if
+
+            ! Determine the two vectors which determine the plane normal
+            ! to the axis of rotation.
+
+            ! Initial guess of rad1. First try the y-axis. If not good
+            ! enough try the z-axis.
+
+            if (abs(axis(nn, 2)) < 0.707107_realType) then
+                rad1(nn, 1) = zero
+                rad1(nn, 2) = one
+                rad1(nn, 3) = zero
+            else
+                rad1(nn, 1) = zero
+                rad1(nn, 2) = zero
+                rad1(nn, 3) = one
+            end if
+
+            ! Make sure that rad1 is normal to axis. Create a unit
+            ! vector again.
+
+            dot = rad1(nn, 1)*axis(nn, 1) + rad1(nn, 2)*axis(nn, 2) &
+                  + rad1(nn, 3)*axis(nn, 3)
+            rad1(nn, 1) = rad1(nn, 1) - dot*axis(nn, 1)
+            rad1(nn, 2) = rad1(nn, 2) - dot*axis(nn, 2)
+            rad1(nn, 3) = rad1(nn, 3) - dot*axis(nn, 3)
+
+            length = one/(rad1(nn, 1)**2 + rad1(nn, 2)**2 + rad1(nn, 3)**2)
+            rad1(nn, 1) = rad1(nn, 1)*length
+            rad1(nn, 2) = rad1(nn, 2)*length
+            rad1(nn, 3) = rad1(nn, 3)*length
+
+            ! Create the second vector which spans the radIal plane. This
+            ! must be normal to both axis and rad1, i.e. the cross-product.
+
+            rad2(nn, 1) = axis(nn, 2)*rad1(nn, 3) - axis(nn, 3)*rad1(nn, 2)
+            rad2(nn, 2) = axis(nn, 3)*rad1(nn, 1) - axis(nn, 1)*rad1(nn, 3)
+            rad2(nn, 3) = axis(nn, 1)*rad1(nn, 2) - axis(nn, 2)*rad1(nn, 1)
+
+        end do
+
+        ! Initialize the values of thetaNMin, etc.
+
+        thetaNMin = zero
+        thetaNMax = -pi
+        thetaPMin = pi
+        thetaPMax = zero
+        !
+        !       Determine the local values of thetaNMin, etc. for the
+        !       different sections.
+        !
+        do nn = 1, nDom
+
+            ! Store the section id of the block a bit easier. Continue with
+            ! the next block if this section consist of only one slice.
+
+            sec = flowDoms(nn, level, sps)%sectionID
+            if (sections(sec)%nSlices == 1) cycle
+
+            ! Set the pointers for this block.
 
-                   if(theta >= zero) then
-                      thetaPMin(sec) = min(thetaPMin(sec),theta)
-                      thetaPMax(sec) = max(thetaPMax(sec),theta)
-                   endif
+            call setPointers(nn, level, sps)
 
-                   if(theta <= zero) then
-                      thetaNMin(sec) = min(thetaNMin(sec),theta)
-                      thetaNMax(sec) = max(thetaNMax(sec),theta)
-                   endif
+            ! Initialize nq1, nq2, nq3 and nq4 to 0. These integers store
+            ! the number of nodes in the first, second, third and fourth
+            ! quadrant respectivily.
 
-                   ! Determine the quadrant in which this node is located
-                   ! and update the corresponding counter.
+            nq1 = 0; nq2 = 0; nq3 = 0; nq4 = 0
 
-                   if(theta <= -half*pi) then
-                      nq3 = nq3 + 1
-                   else if(theta <= zero) then
-                      nq4 = nq4 + 1
-                   else if(theta <= half*pi) then
-                      nq1 = nq1 + 1
-                   else
-                      nq2 = nq2 + 1
-                   endif
+            ! Loop over the nodes of this block.
 
-                endif
+            do k = 1, kl
+                do j = 1, jl
+                    do i = 1, il
 
-             enddo
-          enddo
-       enddo
+                        ! Determine the coordinates relative to the
+                        ! center of rotation.
 
-       ! Modify the minimum and maximum angles if nodes are present
-       ! in multiple quadrants.
+                        xx = x(i, j, k, 1) - sections(sec)%rotCenter(1)
+                        yy = x(i, j, k, 2) - sections(sec)%rotCenter(2)
+                        zz = x(i, j, k, 3) - sections(sec)%rotCenter(3)
 
-       if(nq1 > 0 .and. nq4 > 0) then
+                        ! Determine the radIal components in the local
+                        ! cylindrical coordinate system of the section.
 
-          ! Nodes in both the 1st and 4th quadrant. Update the
-          ! corresponding minimum and maximum angle.
+                        r1 = xx*rad1(sec, 1) + yy*rad1(sec, 2) + zz*rad1(sec, 3)
+                        r2 = xx*rad2(sec, 1) + yy*rad2(sec, 2) + zz*rad2(sec, 3)
 
-          thetaNMax(sec) = zero
-          thetaPMin(sec) = zero
+                        ! Determine the angle if r1 or r2 is nonzero.
 
-       endif
+                        if ((abs(r1) >= eps) .or. (abs(r2) >= eps)) then
 
-       if(nq2 > 0 .and. nq3 > 0) then
+                            theta = atan2(r2, r1)
 
-          ! Nodes in both the 2nd and 3rd quadrant. Update the
-          ! corresponding minimum and maximum angle.
+                            ! Update the minimum and maximum angle for this
+                            ! section, depending on the sign of theta.
 
-          thetaNMin(sec) = -pi
-          thetaPMax(sec) =  pi
+                            if (theta >= zero) then
+                                thetaPMin(sec) = min(thetaPMin(sec), theta)
+                                thetaPMax(sec) = max(thetaPMax(sec), theta)
+                            end if
 
-       endif
+                            if (theta <= zero) then
+                                thetaNMin(sec) = min(thetaNMin(sec), theta)
+                                thetaNMax(sec) = max(thetaNMax(sec), theta)
+                            end if
 
-    enddo
+                            ! Determine the quadrant in which this node is located
+                            ! and update the corresponding counter.
 
-    ! Determine the minimum of the minimum angles and the maximum of
-    ! the maximum angles for all sections.
+                            if (theta <= -half*pi) then
+                                nq3 = nq3 + 1
+                            else if (theta <= zero) then
+                                nq4 = nq4 + 1
+                            else if (theta <= half*pi) then
+                                nq1 = nq1 + 1
+                            else
+                                nq2 = nq2 + 1
+                            end if
 
-    size = nSections
-    call mpi_allreduce(thetaNMax, tmp, size, adflow_real, mpi_max, &
-         ADflow_comm_world, ierr)
-    thetaNMax = tmp
+                        end if
 
-    call mpi_allreduce(thetaPMax, tmp, size, adflow_real, mpi_max, &
-         ADflow_comm_world, ierr)
-    thetaPMax = tmp
+                    end do
+                end do
+            end do
 
-    call mpi_allreduce(thetaNMin, tmp, size, adflow_real, mpi_min, &
-         ADflow_comm_world, ierr)
-    thetaNMin = tmp
+            ! Modify the minimum and maximum angles if nodes are present
+            ! in multiple quadrants.
 
-    call mpi_allreduce(thetaPMin, tmp, size, adflow_real, mpi_min, &
-         ADflow_comm_world, ierr)
-    thetaPMin = tmp
+            if (nq1 > 0 .and. nq4 > 0) then
 
-    ! Allocate the memory for rotMatrixSections, the rotation
-    ! matrices of the sections needed for the alignment.
+                ! Nodes in both the 1st and 4th quadrant. Update the
+                ! corresponding minimum and maximum angle.
 
-    allocate(rotMatrixSections(nSections,3,3), stat=ierr)
-    if(ierr /= 0)                                  &
-         call terminate("localViscousSurfaceMesh", &
-         "Memory allocation failure for &
-         &rotMatrixSections")
-    !
-    !       Determine the rotation matrix for each section, which aligns
-    !       the rotational periodic sections with other sections.
-    !
-    do nn=1,nSections
+                thetaNMax(sec) = zero
+                thetaPMin(sec) = zero
 
-       ! Test if a rotation is actually needed.
+            end if
 
-       testRot: if(sections(nn)%nSlices == 1 .or. &
-            thetaPMin(nn) == zero) then
+            if (nq2 > 0 .and. nq3 > 0) then
 
-          ! Section consist out of 1 slice or the slice crosses the
-          ! line theta == 0. For both cases the rotation matrix is
-          ! the identity matrix.
+                ! Nodes in both the 2nd and 3rd quadrant. Update the
+                ! corresponding minimum and maximum angle.
 
-          rotMatrixSections(nn,1,1) = one
-          rotMatrixSections(nn,1,2) = zero
-          rotMatrixSections(nn,1,3) = zero
+                thetaNMin(sec) = -pi
+                thetaPMax(sec) = pi
 
-          rotMatrixSections(nn,2,1) = zero
-          rotMatrixSections(nn,2,2) = one
-          rotMatrixSections(nn,2,3) = zero
+            end if
 
-          rotMatrixSections(nn,3,1) = zero
-          rotMatrixSections(nn,3,2) = zero
-          rotMatrixSections(nn,3,3) = one
+        end do
 
-       else testRot
+        ! Determine the minimum of the minimum angles and the maximum of
+        ! the maximum angles for all sections.
 
-          ! Section consist out of multiple slices and the current slice
-          ! does not cross the line theta == 0. The rotation matrix
-          ! for alignment must be computed.
+        size = nSections
+        call mpi_allreduce(thetaNMax, tmp, size, adflow_real, mpi_max, &
+                           ADflow_comm_world, ierr)
+        thetaNMax = tmp
 
-          theta = two*pi/sections(nn)%nSlices
+        call mpi_allreduce(thetaPMax, tmp, size, adflow_real, mpi_max, &
+                           ADflow_comm_world, ierr)
+        thetaPMax = tmp
 
-          ! Determine the number of rotations needed to align the mesh.
+        call mpi_allreduce(thetaNMin, tmp, size, adflow_real, mpi_min, &
+                           ADflow_comm_world, ierr)
+        thetaNMin = tmp
 
-          if(thetaNMin(nn) < zero) then
+        call mpi_allreduce(thetaPMin, tmp, size, adflow_real, mpi_min, &
+                           ADflow_comm_world, ierr)
+        thetaPMin = tmp
 
-             ! The section lies (at least partially) in the third and
-             ! fourth quadrant. Determine the number of rotations for
-             ! alignment; this is a positive number.
+        ! Allocate the memory for rotMatrixSections, the rotation
+        ! matrices of the sections needed for the alignment.
 
-             mm = -thetaNMax(nn)/theta + 1
+        allocate (rotMatrixSections(nSections, 3, 3), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("localViscousSurfaceMesh", &
+             "Memory allocation failure for &
+             &rotMatrixSections")
+        !
+        !       Determine the rotation matrix for each section, which aligns
+        !       the rotational periodic sections with other sections.
+        !
+        do nn = 1, nSections
 
-          else
+            ! Test if a rotation is actually needed.
 
-             ! The section lies (completely) in the first and second
-             ! quadrant. The number of rotations will be a negative
-             ! number now.
+            testRot: if (sections(nn)%nSlices == 1 .or. &
+                         thetaPMin(nn) == zero) then
 
-             mm = -thetaPMin(nn)/theta - 1
+                ! Section consist out of 1 slice or the slice crosses the
+                ! line theta == 0. For both cases the rotation matrix is
+                ! the identity matrix.
 
-          endif
+                rotMatrixSections(nn, 1, 1) = one
+                rotMatrixSections(nn, 1, 2) = zero
+                rotMatrixSections(nn, 1, 3) = zero
 
-          ! Compute the rotation angle in the local cylindrical frame
-          ! and its sine and cosine.
+                rotMatrixSections(nn, 2, 1) = zero
+                rotMatrixSections(nn, 2, 2) = one
+                rotMatrixSections(nn, 2, 3) = zero
 
-          theta = mm*theta
-          cosTheta = cos(theta)
-          sinTheta = sin(theta)
+                rotMatrixSections(nn, 3, 1) = zero
+                rotMatrixSections(nn, 3, 2) = zero
+                rotMatrixSections(nn, 3, 3) = one
 
-          ! Apply the transformation to obtain the matrix in the
-          ! original cartesian frame.
+                else testRot
 
-          rotMatrixSections(nn,1,1) = axis(nn,1)*axis(nn,1)             &
-               + cosTheta*(rad1(nn,1)*rad1(nn,1) + rad2(nn,1)*rad2(nn,1))
-          rotMatrixSections(nn,1,2) = axis(nn,1)*axis(nn,2)             &
-               + cosTheta*(rad1(nn,1)*rad1(nn,2) + rad2(nn,1)*rad2(nn,2)) &
-               + sinTheta*(rad1(nn,2)*rad2(nn,1) - rad1(nn,1)*rad2(nn,2))
-          rotMatrixSections(nn,1,3) = axis(nn,1)*axis(nn,3)             &
-               + cosTheta*(rad1(nn,1)*rad1(nn,3) + rad2(nn,1)*rad2(nn,3)) &
-               + sinTheta*(rad1(nn,3)*rad2(nn,1) - rad1(nn,1)*rad2(nn,3))
+                ! Section consist out of multiple slices and the current slice
+                ! does not cross the line theta == 0. The rotation matrix
+                ! for alignment must be computed.
 
-          rotMatrixSections(nn,2,1) = axis(nn,1)*axis(nn,2)             &
-               + cosTheta*(rad1(nn,1)*rad1(nn,2) + rad2(nn,1)*rad2(nn,2)) &
-               - sinTheta*(rad1(nn,2)*rad2(nn,1) - rad1(nn,1)*rad2(nn,2))
-          rotMatrixSections(nn,2,2) = axis(nn,2)*axis(nn,2)             &
-               + cosTheta*(rad1(nn,2)*rad1(nn,2) + rad2(nn,2)*rad2(nn,2))
-          rotMatrixSections(nn,2,3) = axis(nn,2)*axis(nn,3)             &
-               + cosTheta*(rad1(nn,2)*rad1(nn,3) + rad2(nn,2)*rad2(nn,3)) &
-               + sinTheta*(rad1(nn,3)*rad2(nn,2) - rad1(nn,2)*rad2(nn,3))
+                theta = two*pi/sections(nn)%nSlices
 
-          rotMatrixSections(nn,3,1) = axis(nn,1)*axis(nn,3)             &
-               + cosTheta*(rad1(nn,1)*rad1(nn,3) + rad2(nn,1)*rad2(nn,3)) &
-               - sinTheta*(rad1(nn,3)*rad2(nn,1) - rad1(nn,1)*rad2(nn,3))
-          rotMatrixSections(nn,3,2) = axis(nn,2)*axis(nn,3)             &
-               + cosTheta*(rad1(nn,2)*rad1(nn,3) + rad2(nn,2)*rad2(nn,3)) &
-               - sinTheta*(rad1(nn,3)*rad2(nn,2) - rad1(nn,2)*rad2(nn,3))
-          rotMatrixSections(nn,3,3) = axis(nn,3)*axis(nn,3)             &
-               + cosTheta*(rad1(nn,3)*rad1(nn,3) + rad2(nn,3)*rad2(nn,3))
+                ! Determine the number of rotations needed to align the mesh.
 
-       endif testRot
+                if (thetaNMin(nn) < zero) then
 
-    enddo
-    !
-    !       Determine the local viscous surface grid.
-    !
-    np = 0
-    nq = 0
+                    ! The section lies (at least partially) in the third and
+                    ! fourth quadrant. Determine the number of rotations for
+                    ! alignment; this is a positive number.
 
-    loopDomains: do nn=1,nDom
+                    mm = -thetaNMax(nn)/theta + 1
 
-       ! Set the pointers for this block and store the section id
-       ! a bit easier.
+                else
 
-       call setPointers(nn, level, sps)
-       sec = sectionID
+                    ! The section lies (completely) in the first and second
+                    ! quadrant. The number of rotations will be a negative
+                    ! number now.
 
-       ! Loop over the subfaces of this block and test if this is
-       ! a viscous subface.
+                    mm = -thetaPMin(nn)/theta - 1
 
-       loopBocos: do mm=1,nBocos
-          testViscous: if(BCType(mm) == NSWallAdiabatic .or. &
-               BCType(mm) == NSWallIsothermal) then
+                end if
 
-             ! Viscous subface. Set the pointer for the coordinates of
-             ! the face.
+                ! Compute the rotation angle in the local cylindrical frame
+                ! and its sine and cosine.
+
+                theta = mm*theta
+                cosTheta = cos(theta)
+                sinTheta = sin(theta)
+
+                ! Apply the transformation to obtain the matrix in the
+                ! original cartesian frame.
+
+                rotMatrixSections(nn, 1, 1) = axis(nn, 1)*axis(nn, 1) &
+                                              + cosTheta*(rad1(nn, 1)*rad1(nn, 1) + rad2(nn, 1)*rad2(nn, 1))
+                rotMatrixSections(nn, 1, 2) = axis(nn, 1)*axis(nn, 2) &
+                                              + cosTheta*(rad1(nn, 1)*rad1(nn, 2) + rad2(nn, 1)*rad2(nn, 2)) &
+                                              + sinTheta*(rad1(nn, 2)*rad2(nn, 1) - rad1(nn, 1)*rad2(nn, 2))
+                rotMatrixSections(nn, 1, 3) = axis(nn, 1)*axis(nn, 3) &
+                                              + cosTheta*(rad1(nn, 1)*rad1(nn, 3) + rad2(nn, 1)*rad2(nn, 3)) &
+                                              + sinTheta*(rad1(nn, 3)*rad2(nn, 1) - rad1(nn, 1)*rad2(nn, 3))
+
+                rotMatrixSections(nn, 2, 1) = axis(nn, 1)*axis(nn, 2) &
+                                              + cosTheta*(rad1(nn, 1)*rad1(nn, 2) + rad2(nn, 1)*rad2(nn, 2)) &
+                                              - sinTheta*(rad1(nn, 2)*rad2(nn, 1) - rad1(nn, 1)*rad2(nn, 2))
+                rotMatrixSections(nn, 2, 2) = axis(nn, 2)*axis(nn, 2) &
+                                              + cosTheta*(rad1(nn, 2)*rad1(nn, 2) + rad2(nn, 2)*rad2(nn, 2))
+                rotMatrixSections(nn, 2, 3) = axis(nn, 2)*axis(nn, 3) &
+                                              + cosTheta*(rad1(nn, 2)*rad1(nn, 3) + rad2(nn, 2)*rad2(nn, 3)) &
+                                              + sinTheta*(rad1(nn, 3)*rad2(nn, 2) - rad1(nn, 2)*rad2(nn, 3))
+
+                rotMatrixSections(nn, 3, 1) = axis(nn, 1)*axis(nn, 3) &
+                                              + cosTheta*(rad1(nn, 1)*rad1(nn, 3) + rad2(nn, 1)*rad2(nn, 3)) &
+                                              - sinTheta*(rad1(nn, 3)*rad2(nn, 1) - rad1(nn, 1)*rad2(nn, 3))
+                rotMatrixSections(nn, 3, 2) = axis(nn, 2)*axis(nn, 3) &
+                                              + cosTheta*(rad1(nn, 2)*rad1(nn, 3) + rad2(nn, 2)*rad2(nn, 3)) &
+                                              - sinTheta*(rad1(nn, 3)*rad2(nn, 2) - rad1(nn, 2)*rad2(nn, 3))
+                rotMatrixSections(nn, 3, 3) = axis(nn, 3)*axis(nn, 3) &
+                                              + cosTheta*(rad1(nn, 3)*rad1(nn, 3) + rad2(nn, 3)*rad2(nn, 3))
+
+            end if testRot
+
+        end do
+        !
+        !       Determine the local viscous surface grid.
+        !
+        np = 0
+        nq = 0
+
+        loopDomains: do nn = 1, nDom
+
+            ! Set the pointers for this block and store the section id
+            ! a bit easier.
 
-             select case (BCFaceID(mm))
+            call setPointers(nn, level, sps)
+            sec = sectionID
 
-             case (iMin)
-                xface => x(1,1:,1:,:)
+            ! Loop over the subfaces of this block and test if this is
+            ! a viscous subface.
 
-             case (iMax)
-                xface => x(il,1:,1:,:)
+            loopBocos: do mm = 1, nBocos
+                testViscous: if (BCType(mm) == NSWallAdiabatic .or. &
+                                 BCType(mm) == NSWallIsothermal) then
 
-             case (jMin)
-                xface => x(1:,1,1:,:)
+                    ! Viscous subface. Set the pointer for the coordinates of
+                    ! the face.
 
-             case (jMax)
-                xface => x(1:,jl,1:,:)
+                    select case (BCFaceID(mm))
 
-             case (kMin)
-                xface => x(1:,1:,1,:)
+                    case (iMin)
+                        xface => x(1, 1:, 1:, :)
 
-             case (kMax)
-                xface => x(1:,1:,kl,:)
+                    case (iMax)
+                        xface => x(il, 1:, 1:, :)
 
-             end select
+                    case (jMin)
+                        xface => x(1:, 1, 1:, :)
 
-             ! Store the nodal range of this subface a bit easier.
+                    case (jMax)
+                        xface => x(1:, jl, 1:, :)
 
-             jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                    case (kMin)
+                        xface => x(1:, 1:, 1, :)
 
-             ! Store the old value of the number of points stored and
-             ! determine the new coordinates.
+                    case (kMax)
+                        xface => x(1:, 1:, kl, :)
 
-             npOld = np
-             do j=jBeg,jEnd
-                do i=iBeg,iEnd
+                    end select
 
-                   ! Determine the coordinates relative to the rotation
-                   ! center of this section.
+                    ! Store the nodal range of this subface a bit easier.
 
-                   xx = xface(i,j,1) - sections(sec)%rotCenter(1)
-                   yy = xface(i,j,2) - sections(sec)%rotCenter(2)
-                   zz = xface(i,j,3) - sections(sec)%rotCenter(3)
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
 
-                   ! Update the counter and determine the surface mesh
-                   ! coordinates.
+                    ! Store the old value of the number of points stored and
+                    ! determine the new coordinates.
 
-                   np = np + 1
-                   coorVisc(1,np) = rotMatrixSections(sec,1,1)*xx &
-                        + rotMatrixSections(sec,1,2)*yy &
-                        + rotMatrixSections(sec,1,3)*zz &
-                        + sections(sec)%rotCenter(1)
+                    npOld = np
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
 
-                   coorVisc(2,np) = rotMatrixSections(sec,2,1)*xx &
-                        + rotMatrixSections(sec,2,2)*yy &
-                        + rotMatrixSections(sec,2,3)*zz &
-                        + sections(sec)%rotCenter(2)
+                            ! Determine the coordinates relative to the rotation
+                            ! center of this section.
 
-                   coorVisc(3,np) = rotMatrixSections(sec,3,1)*xx &
-                        + rotMatrixSections(sec,3,2)*yy &
-                        + rotMatrixSections(sec,3,3)*zz &
-                        + sections(sec)%rotCenter(3)
-                enddo
-             enddo
+                            xx = xface(i, j, 1) - sections(sec)%rotCenter(1)
+                            yy = xface(i, j, 2) - sections(sec)%rotCenter(2)
+                            zz = xface(i, j, 3) - sections(sec)%rotCenter(3)
 
-             ! Determine and store the connectivity of this subface.
+                            ! Update the counter and determine the surface mesh
+                            ! coordinates.
 
-             np1 = iEnd - iBeg + 1
-             nqOld = nq
+                            np = np + 1
+                            coorVisc(1, np) = rotMatrixSections(sec, 1, 1)*xx &
+                                              + rotMatrixSections(sec, 1, 2)*yy &
+                                              + rotMatrixSections(sec, 1, 3)*zz &
+                                              + sections(sec)%rotCenter(1)
 
-             do j=(jBeg+1),jEnd
-                do i=(iBeg+1),iEnd
+                            coorVisc(2, np) = rotMatrixSections(sec, 2, 1)*xx &
+                                              + rotMatrixSections(sec, 2, 2)*yy &
+                                              + rotMatrixSections(sec, 2, 3)*zz &
+                                              + sections(sec)%rotCenter(2)
 
-                   ! Update the counter nq and determine the 4 indices
-                   ! of the surface quad.
+                            coorVisc(3, np) = rotMatrixSections(sec, 3, 1)*xx &
+                                              + rotMatrixSections(sec, 3, 2)*yy &
+                                              + rotMatrixSections(sec, 3, 3)*zz &
+                                              + sections(sec)%rotCenter(3)
+                        end do
+                    end do
 
-                   nq = nq + 1
+                    ! Determine and store the connectivity of this subface.
 
-                   connVisc(1,nq) = npOld + (j-jBeg-1)*np1 + i - iBeg
-                   connVisc(2,nq) = connVisc(1,nq) + 1
-                   connVisc(3,nq) = connVisc(2,nq) + np1
-                   connVisc(4,nq) = connVisc(3,nq) - 1
+                    np1 = iEnd - iBeg + 1
+                    nqOld = nq
 
-                enddo
-             enddo
+                    do j = (jBeg + 1), jEnd
+                        do i = (iBeg + 1), iEnd
 
-             ! Loop over the number of times the subface must be stored.
-             ! This happens when the rotational periodicity differs from
-             ! section to section. Note that this loop starts at k == 2.
+                            ! Update the counter nq and determine the 4 indices
+                            ! of the surface quad.
 
-             loopMultiplicity: do k=2,multSections(sec)
+                            nq = nq + 1
 
-                ! Store the current number of nodes and quads
-                ! in mp and mq respectivily.
+                            connVisc(1, nq) = npOld + (j - jBeg - 1)*np1 + i - iBeg
+                            connVisc(2, nq) = connVisc(1, nq) + 1
+                            connVisc(3, nq) = connVisc(2, nq) + np1
+                            connVisc(4, nq) = connVisc(3, nq) - 1
 
-                mp = np
-                mq = nq
+                        end do
+                    end do
 
-                ! Loop over the of points on this subface.
+                    ! Loop over the number of times the subface must be stored.
+                    ! This happens when the rotational periodicity differs from
+                    ! section to section. Note that this loop starts at k == 2.
 
-                do i=(npOld+1),mp
+                    loopMultiplicity: do k = 2, multSections(sec)
 
-                   ! Determine the coordinates relative to the center
-                   ! of rotation.
+                        ! Store the current number of nodes and quads
+                        ! in mp and mq respectivily.
 
-                   np = np + 1
+                        mp = np
+                        mq = nq
 
-                   xx = coorVisc(1,i) - sections(sec)%rotCenter(1)
-                   yy = coorVisc(2,i) - sections(sec)%rotCenter(2)
-                   zz = coorVisc(3,i) - sections(sec)%rotCenter(3)
+                        ! Loop over the of points on this subface.
 
-                   ! Update the counter np and determine the new
-                   ! coordinates after the transformation.
+                        do i = (npOld + 1), mp
 
-                   coorVisc(1,np) = sections(sec)%rotMatrix(1,1)*xx &
-                        + sections(sec)%rotMatrix(1,2)*yy &
-                        + sections(sec)%rotMatrix(1,3)*zz &
-                        + sections(sec)%rotCenter(1)      &
-                        + sections(sec)%translation(1)
+                            ! Determine the coordinates relative to the center
+                            ! of rotation.
 
-                   coorVisc(2,np) = sections(sec)%rotMatrix(2,1)*xx &
-                        + sections(sec)%rotMatrix(2,2)*yy &
-                        + sections(sec)%rotMatrix(2,3)*zz &
-                        + sections(sec)%rotCenter(2)      &
-                        + sections(sec)%translation(2)
+                            np = np + 1
 
-                   coorVisc(3,np) = sections(sec)%rotMatrix(3,1)*xx &
-                        + sections(sec)%rotMatrix(3,2)*yy &
-                        + sections(sec)%rotMatrix(3,3)*zz &
-                        + sections(sec)%rotCenter(3)      &
-                        + sections(sec)%translation(3)
-                enddo
+                            xx = coorVisc(1, i) - sections(sec)%rotCenter(1)
+                            yy = coorVisc(2, i) - sections(sec)%rotCenter(2)
+                            zz = coorVisc(3, i) - sections(sec)%rotCenter(3)
 
-                ! Store the number of nodes in this subface in j
-                ! and determine the connectivity of this rotated part.
+                            ! Update the counter np and determine the new
+                            ! coordinates after the transformation.
 
-                j = np - mp
-                do i=(nqOld+1),mq
+                            coorVisc(1, np) = sections(sec)%rotMatrix(1, 1)*xx &
+                                              + sections(sec)%rotMatrix(1, 2)*yy &
+                                              + sections(sec)%rotMatrix(1, 3)*zz &
+                                              + sections(sec)%rotCenter(1) &
+                                              + sections(sec)%translation(1)
 
-                   ! Update the counter nq and set the new connectivity,
-                   ! which is the old connectivity plus an offset.
+                            coorVisc(2, np) = sections(sec)%rotMatrix(2, 1)*xx &
+                                              + sections(sec)%rotMatrix(2, 2)*yy &
+                                              + sections(sec)%rotMatrix(2, 3)*zz &
+                                              + sections(sec)%rotCenter(2) &
+                                              + sections(sec)%translation(2)
 
-                   nq = nq + 1
-                   connVisc(1,nq) = connVisc(1,i) + j
-                   connVisc(2,nq) = connVisc(2,i) + j
-                   connVisc(3,nq) = connVisc(3,i) + j
-                   connVisc(4,nq) = connVisc(4,i) + j
+                            coorVisc(3, np) = sections(sec)%rotMatrix(3, 1)*xx &
+                                              + sections(sec)%rotMatrix(3, 2)*yy &
+                                              + sections(sec)%rotMatrix(3, 3)*zz &
+                                              + sections(sec)%rotCenter(3) &
+                                              + sections(sec)%translation(3)
+                        end do
 
-                enddo
+                        ! Store the number of nodes in this subface in j
+                        ! and determine the connectivity of this rotated part.
+
+                        j = np - mp
+                        do i = (nqOld + 1), mq
+
+                            ! Update the counter nq and set the new connectivity,
+                            ! which is the old connectivity plus an offset.
+
+                            nq = nq + 1
+                            connVisc(1, nq) = connVisc(1, i) + j
+                            connVisc(2, nq) = connVisc(2, i) + j
+                            connVisc(3, nq) = connVisc(3, i) + j
+                            connVisc(4, nq) = connVisc(4, i) + j
+
+                        end do
+
+                        ! Copy the values of mp and mq into npOld and nqOld for
+                        ! the next multiple of the slice. Idem for np in mp, etc.
+
+                        npOld = mp
+                        nqOld = mq
+
+                        mp = np
+                        mq = nq
+
+                    end do loopMultiplicity
+
+                end if testViscous
+            end do loopBocos
+        end do loopDomains
+
+    end subroutine localViscousSurfaceMesh
+
+    subroutine updateWallDistanceAllLevels
+        !
+        !       updateWallDistanceAllLevels updates the wall distances for
+        !       the cell centers on all grid levels. This routine is typically
+        !       called when grid parts have been moved, either due to a
+        !       physical motion of some parts or due to deformation.
+        !
+        use constants
+        use block, only: flowDoms
+        use inputPhysics, only: equations
+        use iteration, only: groundLevel
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nLevels, nn
+
+        ! Return immediately if the rans equations are not solved.
+
+        if (equations /= RANSEquations) return
+
+        ! Loop over the grid levels and call wallDistance.
+
+        nLevels = ubound(flowDoms, 2)
+        do nn = groundLevel, nLevels
+            call computeWallDistance(nn, .false.)
+        end do
+
+    end subroutine updateWallDistanceAllLevels
+
+    subroutine viscousSurfaceMesh(level, sps)
+        !
+        !       viscousSurfaceMesh determines and stores the entire viscous
+        !       surface possibly extended by periodic parts.
+        !
+        use constants
+        use block, only: flowDoms, nDom
+        use communication, only: adflow_comm_world, myid
+        use section, only: nsections, sections
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, mm, ii
+        integer(kind=intType) :: ni, nj, nk
+
+        integer(kind=intType), dimension(nSections) :: multSections
+
+        ! Determine the minimum number of slices present in a certain
+        ! section. For time accurate computations the number of slices
+        ! is identical for all sections, but for steady flow using the
+        ! mixing plane assumption this is not necessarily the case.
+
+        mm = sections(1)%nSlices
+        do nn = 2, nSections
+            mm = min(mm, sections(nn)%nSlices)
+        end do
+
+        ! Determine the multiplicity of every section needed in the
+        ! surface mesh, such that every part covers an angle which is
+        ! at least equal to the angle of the largest section. Again note
+        ! that this multiplicity is 1 for all sections if a time
+        ! accurate computation is performed.
+
+        do nn = 1, nSections
+            multSections(nn) = sections(nn)%nSlices/mm
+            if (sections(nn)%nSlices > mm*multSections(nn)) &
+                multSections(nn) = multSections(nn) + 1
+        end do
+
+        ! Determine the local number of viscous nodes and quads.
+        ! Note that these numbers are identical for all spectral
+        ! solutions and thus it is okay to take the 1st one.
+
+        nNodeVisc = 0
+        nquadVisc = 0
+
+        do nn = 1, nDom
+            do mm = 1, flowDoms(nn, level, 1)%nBocos
+                if (flowDoms(nn, level, 1)%BCType(mm) == NSWallAdiabatic .or. &
+                    flowDoms(nn, level, 1)%BCType(mm) == NSWallIsothermal) then
+
+                    ! Determine the number of nodes of the subface in the
+                    ! three directions.
+
+                    ni = flowDoms(nn, level, 1)%inEnd(mm) &
+                         - flowDoms(nn, level, 1)%inBeg(mm)
+                    nj = flowDoms(nn, level, 1)%jnEnd(mm) &
+                         - flowDoms(nn, level, 1)%jnBeg(mm)
+                    nk = flowDoms(nn, level, 1)%knEnd(mm) &
+                         - flowDoms(nn, level, 1)%knBeg(mm)
+
+                    ! Determine the multiplication factor, because of the
+                    ! possible multiple sections.
+
+                    ii = flowDoms(nn, level, 1)%sectionId
+                    ii = multSections(ii)
+
+                    ! Update the number of nodes and quads. Take the
+                    ! multiplicity into account.
+
+                    nNodeVisc = nNodeVisc + ii*(ni + 1)*(nj + 1)*(nk + 1)
+                    nquadVisc = nquadVisc + ii*max(ni, 1_intType) &
+                                *max(nj, 1_intType) &
+                                *max(nk, 1_intType)
+                end if
+            end do
+        end do
+
+        ! Determine the global number of elements on the viscous
+        ! surfaces. Return if there are no viscous quads present.
+
+        call mpi_allreduce(nQuadVisc, nquadViscGlob, 1, adflow_integer, &
+                           mpi_sum, ADflow_comm_world, ierr)
+
+        if (nquadViscGlob == 0) return
+
+        ! Allocate the memory for the local connectivity and coordinates.
+
+        allocate (connVisc(4, nquadVisc), coorVisc(3, nNodeVisc), &
+                  stat=ierr)
+        if (ierr /= 0) &
+             call terminate("viscousSurfaceMesh", &
+             "Memory allocation failure for connVisc &
+             &and coorVisc.")
+
+        ! Determine the local viscous surface mesh, possibly rotated
+        ! to align the other sections.
+
+        call localViscousSurfaceMesh(multSections, level, sps)
+
+    end subroutine viscousSurfaceMesh
+
+    subroutine determineWallAssociation(level, sps)
+
+        ! This routine will determine the closest surface point for every
+        ! field cell. Special treatment is required for overlapping surfaces.
+
+        use constants
+        use adtAPI, only: minDistanceSearch
+        use adtData, only: adtBBOXTargetType
+        use adtLocalSearch, only: mindistancetreesearchsinglepoint
+        use adtUtils, only: stack
+        use adtBuild, only: buildSerialQuad, destroyserialquad
+        use blockPointers
+        use communication
+        use inputphysics
+        use inputTimeSpectral
+        use oversetData, only: oversetPresent, oversetWall, nClusters, clusters, cumDomProc
+        use inputOverset
+        use adjointVars
+        use surfaceFamilies, only: BCFamGroups
+        use utils, only: setPointers, EChk
+        use sorting, only: unique
+        implicit none
+
+        ! Input Variables
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Local Variables
+        integer(kind=intType) :: i, j, k, l, ii, jj, kk, nn, mm, iNode, iCell, c
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ni, nj, nUnique, cellID, cellID2
+        integer(kind=intType) :: ierr, iDim
+
+        ! Data for local surface
+        integer(kind=intType) :: nNodes, nCells
+        logical :: gridHasOverset
+
+        ! Overset Walls for storing the surface ADT's
+        type(oversetWall), dimension(:), allocatable, target :: walls
+        type(oversetWall), target :: fullWall
+        integer(kind=intType), dimension(:), allocatable :: link, indicesToGet
+
+        ! Data for the ADT
+        integer(kind=intType) :: intInfo(3), intInfo2(3)
+        real(kind=realType) :: coor(4), uvw(5), uvw2(5)
+        real(kind=realType), dimension(3, 2) :: dummy
+        real(kind=realType), parameter :: tol = 1e-12
+        integer(kind=intType), dimension(:), pointer :: frontLeaves, frontLeavesNew, &
+                                                        BBint, wallFamList
+        type(adtBBoxTargetType), dimension(:), pointer :: BB
+        real(kind=realType), dimension(3) :: xp
+
+        ! The first thing we do is gather all the surface nodes to
+        ! each processor such that every processor can make it's own copy of
+        ! the complete surface mesh to use to search. Note that this
+        ! procedure *DOES NOT SCALE IN MEMORY*...ie eventually the surface
+        ! mesh will become too large to store on a single processor,
+        ! although this will probably not happen until the sizes get up in
+        ! the hundreds of millions of cells.
+
+        allocate (walls(nClusters))
+        wallFamList => BCFamGroups(iBCGroupWalls)%famList
+        call buildClusterWalls(level, sps, .False., walls, walLFamList, size(wallFamList))
+
+        if (oversetPresent) then
+            ! Finally build up a "full wall" that is made up of all the cluster
+            ! walls.
+
+            nNodes = 0
+            nCells = 0
+            do i = 1, nClusters
+                nNodes = nNodes + walls(i)%nNodes
+                nCells = nCells + walls(i)%nCells
+            end do
 
-                ! Copy the values of mp and mq into npOld and nqOld for
-                ! the next multiple of the slice. Idem for np in mp, etc.
+            allocate (fullWall%x(3, nNodes))
+            allocate (fullWall%conn(4, nCells))
+            allocate (fullWall%ind(nNodes))
 
-                npOld = mp
-                nqOld = mq
+            if (useRoughSA) then
+                allocate (fullWall%indCell(nCells))
+            end if
 
-                mp = np
-                mq = nq
+            nNodes = 0
+            nCells = 0
+            ii = 0
+            do i = 1, nClusters
 
-             enddo loopMultiplicity
+                ! Add in the nodes/elements from this cluster
 
-          endif testViscous
-       enddo loopBocos
-    enddo loopDomains
+                do j = 1, walls(i)%nNodes
+                    nNodes = nNodes + 1
+                    fullWall%x(:, nNodes) = walls(i)%x(:, j)
+                    fullWall%ind(nNodes) = walls(i)%ind(j)
+                end do
 
-  end subroutine localViscousSurfaceMesh
+                do j = 1, walls(i)%nCells
+                    nCells = nCells + 1
+                    fullWall%conn(:, nCells) = walls(i)%conn(:, j) + ii
+                    if (useRoughSA) then
+                        fullWall%indCell(nCells) = walls(i)%indCell(j)
+                    end if
+                end do
 
-  subroutine updateWallDistanceAllLevels
-    !
-    !       updateWallDistanceAllLevels updates the wall distances for
-    !       the cell centers on all grid levels. This routine is typically
-    !       called when grid parts have been moved, either due to a
-    !       physical motion of some parts or due to deformation.
-    !
-    use constants
-    use block, only : flowDoms
-    use inputPhysics, only : equations
-    use iteration, only : groundLevel
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nLevels, nn
-
-    ! Return immediately if the rans equations are not solved.
-
-    if(equations /= RANSEquations) return
-
-    ! Loop over the grid levels and call wallDistance.
-
-    nLevels = ubound(flowDoms,2)
-    do nn=groundLevel,nLevels
-       call computeWallDistance(nn, .false.)
-    enddo
-
-  end subroutine updateWallDistanceAllLevels
-
-  subroutine viscousSurfaceMesh(level, sps)
-    !
-    !       viscousSurfaceMesh determines and stores the entire viscous
-    !       surface possibly extended by periodic parts.
-    !
-    use constants
-    use block, only : flowDoms, nDom
-    use communication, only : adflow_comm_world, myid
-    use section, only : nsections, sections
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, mm, ii
-    integer(kind=intType) :: ni, nj, nk
-
-    integer(kind=intType), dimension(nSections) :: multSections
-
-    ! Determine the minimum number of slices present in a certain
-    ! section. For time accurate computations the number of slices
-    ! is identical for all sections, but for steady flow using the
-    ! mixing plane assumption this is not necessarily the case.
-
-    mm = sections(1)%nSlices
-    do nn=2,nSections
-       mm = min(mm,sections(nn)%nSlices)
-    enddo
-
-    ! Determine the multiplicity of every section needed in the
-    ! surface mesh, such that every part covers an angle which is
-    ! at least equal to the angle of the largest section. Again note
-    ! that this multiplicity is 1 for all sections if a time
-    ! accurate computation is performed.
-
-    do nn=1,nSections
-       multSections(nn) = sections(nn)%nSlices/mm
-       if(sections(nn)%nSlices > mm*multSections(nn)) &
-            multSections(nn) = multSections(nn) + 1
-    enddo
-
-    ! Determine the local number of viscous nodes and quads.
-    ! Note that these numbers are identical for all spectral
-    ! solutions and thus it is okay to take the 1st one.
-
-    nNodeVisc = 0
-    nquadVisc = 0
-
-    do nn=1,nDom
-       do mm=1,flowDoms(nn,level,1)%nBocos
-          if(flowDoms(nn,level,1)%BCType(mm) == NSWallAdiabatic .or. &
-               flowDoms(nn,level,1)%BCType(mm) == NSWallIsothermal) then
-
-             ! Determine the number of nodes of the subface in the
-             ! three directions.
-
-             ni = flowDoms(nn,level,1)%inEnd(mm) &
-                  - flowDoms(nn,level,1)%inBeg(mm)
-             nj = flowDoms(nn,level,1)%jnEnd(mm) &
-                  - flowDoms(nn,level,1)%jnBeg(mm)
-             nk = flowDoms(nn,level,1)%knEnd(mm) &
-                  - flowDoms(nn,level,1)%knBeg(mm)
-
-             ! Determine the multiplication factor, because of the
-             ! possible multiple sections.
-
-             ii = flowDoms(nn,level,1)%sectionId
-             ii = multSections(ii)
-
-             ! Update the number of nodes and quads. Take the
-             ! multiplicity into account.
-
-             nNodeVisc = nNodeVisc + ii*(ni+1)*(nj+1)*(nk+1)
-             nquadVisc = nquadVisc + ii*max(ni,1_intType) &
-                  *                max(nj,1_intType) &
-                  *                max(nk,1_intType)
-          endif
-       enddo
-    enddo
-
-    ! Determine the global number of elements on the viscous
-    ! surfaces. Return if there are no viscous quads present.
-
-    call mpi_allreduce(nQuadVisc, nquadViscGlob, 1, adflow_integer, &
-         mpi_sum, ADflow_comm_world, ierr)
-
-    if(nquadViscGlob == 0) return
-
-    ! Allocate the memory for the local connectivity and coordinates.
-
-    allocate(connVisc(4,nquadVisc), coorVisc(3,nNodeVisc), &
-         stat=ierr)
-    if(ierr /= 0)                            &
-         call terminate("viscousSurfaceMesh", &
-         "Memory allocation failure for connVisc &
-         &and coorVisc.")
-
-    ! Determine the local viscous surface mesh, possibly rotated
-    ! to align the other sections.
-
-    call localViscousSurfaceMesh(multSections, level, sps)
-
-  end subroutine viscousSurfaceMesh
-
-  subroutine determineWallAssociation(level, sps)
-
-    ! This routine will determine the closest surface point for every
-    ! field cell. Special treatment is required for overlapping surfaces.
-
-    use constants
-    use adtAPI, only : minDistanceSearch
-    use adtData, only : adtBBOXTargetType
-    use adtLocalSearch, only :  mindistancetreesearchsinglepoint
-    use adtUtils, only : stack
-    use adtBuild, only : buildSerialQuad, destroyserialquad
-    use blockPointers
-    use communication
-    use inputphysics
-    use inputTimeSpectral
-    use oversetData, only : oversetPresent, oversetWall, nClusters, clusters, cumDomProc
-    use inputOverset
-    use adjointVars
-    use surfaceFamilies, only : BCFamGroups
-    use utils, only : setPointers, EChk
-    use sorting, only : unique
-    implicit none
+                ! Increment the node offset
+                ii = ii + walls(i)%nNodes
+            end do
 
-    ! Input Variables
-    integer(kind=intType), intent(in) :: level, sps
-
-    ! Local Variables
-    integer(kind=intType) :: i, j, k, l, ii, jj, kk, nn, mm, iNode, iCell, c
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ni, nj, nUnique, cellID, cellID2
-    integer(kind=intType) :: ierr, iDim
-
-    ! Data for local surface
-    integer(kind=intType) :: nNodes, nCells
-    logical :: gridHasOverset
-
-    ! Overset Walls for storing the surface ADT's
-    type(oversetWall), dimension(:), allocatable, target :: walls
-    type(oversetWall), target :: fullWall
-    integer(kind=intType), dimension(:),  allocatable :: link, indicesToGet
-
-    ! Data for the ADT
-    integer(kind=intType) :: intInfo(3), intInfo2(3)
-    real(kind=realType) :: coor(4), uvw(5), uvw2(5)
-    real(kind=realType), dimension(3, 2) :: dummy
-    real(kind=realType), parameter :: tol=1e-12
-    integer(kind=intType), dimension(:), pointer :: frontLeaves, frontLeavesNew, &
-         BBint, wallFamList
-    type(adtBBoxTargetType), dimension(:), pointer :: BB
-    real(kind=realType), dimension(3) :: xp
-
-    ! The first thing we do is gather all the surface nodes to
-    ! each processor such that every processor can make it's own copy of
-    ! the complete surface mesh to use to search. Note that this
-    ! procedure *DOES NOT SCALE IN MEMORY*...ie eventually the surface
-    ! mesh will become too large to store on a single processor,
-    ! although this will probably not happen until the sizes get up in
-    ! the hundreds of millions of cells.
-
-    allocate(walls(nClusters))
-    wallFamList => BCFamGroups(iBCGroupWalls)%famList
-    call buildClusterWalls(level, sps, .False., walls, walLFamList, size(wallFamList))
-
-    if (oversetPresent) then
-       ! Finally build up a "full wall" that is made up of all the cluster
-       ! walls.
-
-       nNodes = 0
-       nCells = 0
-       do i=1, nClusters
-          nNodes = nNodes+ walls(i)%nNodes
-          nCells = nCells + walls(i)%nCells
-       end do
-
-       allocate(fullWall%x(3, nNodes))
-       allocate(fullWall%conn(4, nCells))
-       allocate(fullWall%ind(nNodes))
-
-       if (useRoughSA) then
-          allocate(fullWall%indCell(nCells))
-       end if
-
-       nNodes = 0
-       nCells = 0
-       ii = 0
-       do i=1, nClusters
-
-          ! Add in the nodes/elements from this cluster
-
-          do j=1, walls(i)%nNodes
-             nNodes = nNodes + 1
-             fullWall%x(:, nNodes) = walls(i)%x(:, j)
-             fullWall%ind(nNodes) = walls(i)%ind(j)
-          end do
-
-          do j=1, walls(i)%nCells
-             nCells = nCells + 1
-             fullWall%conn(:, nCells) = walls(i)%conn(:, j) + ii
-             if (useRoughSA) then
-                fullWall%indCell(nCells) = walls(i)%indCell(j)
-             end if
-          end do
-
-          ! Increment the node offset
-          ii = ii + walls(i)%nNodes
-       end do
-
-       ! Finish the setup of the full wall.
-       fullWall%nCells = nCells
-       fullWall%nNodes = nNodes
-       call buildSerialQuad(nCells, nNodes, fullWall%x, fullWall%conn, fullWall%ADT)
-    end if
-
-    ! Allocate the (pointer) memory that may be resized as necessary for
-    ! the singlePoint search routine.
-    allocate(stack(100), BB(20), BBint(20), frontLeaves(25), frontLeavesNew(25))
-
-    ! We need to store the 4 global node indices defining the quad that
-    ! each point has the closest point wrt. We also ned to store the uv
-    ! values. This allows us to recompute the exact surface point, after
-    ! the rquired nodes are fetched from (a possibly) remote proc.
-
-    do nn=1,nDom
-       call setPointers(nn, level, sps)
-
-       ! Check if elemID and uv are allocated yet.
-       if (.not. associated(flowDoms(nn,level,sps)%surfNodeIndices)) then
-          allocate(flowDoms(nn,level,sps)%surfNodeIndices(4, 2:il, 2:jl, 2:kl))
-          allocate(flowDoms(nn,level,sps)%uv(2, 2:il, 2:jl, 2:kl))
-          if (useRoughSA) then
-             allocate(flowDoms(nn,level,sps)%nearestWallCellInd(2:il, 2:jl, 2:kl))
-          end if
-       end if
-
-       ! Set the cluster for this block
-       c = clusters(cumDomProc(myid) + nn)
-
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-
-                ! Compute the coordinates of the cell center
-                coor(1) = eighth*(x(i-1,j-1,k-1,1) + x(i,j-1,k-1,1)  &
-                     +         x(i-1,j,  k-1,1) + x(i,j,  k-1,1)  &
-                     +         x(i-1,j-1,k,  1) + x(i,j-1,k,  1)  &
-                     +         x(i-1,j,  k,  1) + x(i,j,  k,  1))
-
-                coor(2) = eighth*(x(i-1,j-1,k-1,2) + x(i,j-1,k-1,2)  &
-                     +         x(i-1,j,  k-1,2) + x(i,j,  k-1,2)  &
-                     +         x(i-1,j-1,k,  2) + x(i,j-1,k,  2)  &
-                     +         x(i-1,j,  k,  2) + x(i,j,  k,  2))
-
-                coor(3) = eighth*(x(i-1,j-1,k-1,3) + x(i,j-1,k-1,3)  &
-                     +         x(i-1,j,  k-1,3) + x(i,j,  k-1,3)  &
-                     +         x(i-1,j-1,k,  3) + x(i,j-1,k,  3)  &
-                     +         x(i-1,j,  k,  3) + x(i,j,  k,  3))
-
-                if (.not. oversetPresent) then
-                   ! No overset present. Simply search our own wall,
-                   ! walls(c), (the only one we have) up to the wall
-                   ! cutoff.
-                   coor(4) = wallDistCutoff**2
-                   intInfo(3) = 0 ! Must be initialized since the search
-                   ! may not find closer point.
-                   call minDistancetreeSearchSinglePoint(walls(c)%ADT, coor, intInfo, &
-                        uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
-
-                   cellID = intInfo(3)
-                   if (cellID > 0) then
-                      do kk=1,4
-                         flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
-                              walls(c)%ind(walls(c)%conn(kk, cellID))
-                      end do
-                      flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
-                      if (useRoughSA) then
-                         flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = walls(c)%indCell(cellID)
-                      end if
-                   else
-                      ! Just set dummy values. These will never be used.
-                      flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
-                      flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
-                      if (useRoughSA) then
-                          flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = -1
-                      end if
-                   end if
-
-                   ! We are done with this point.
-                   cycle
-                end if
+            ! Finish the setup of the full wall.
+            fullWall%nCells = nCells
+            fullWall%nNodes = nNodes
+            call buildSerialQuad(nCells, nNodes, fullWall%x, fullWall%conn, fullWall%ADT)
+        end if
 
-                ! This is now the overset (possibly) overlapping surface
-                ! mesh case. It is somewhat more complex since we use
-                ! the same searches to flag cells that are inside the
-                ! body.
-
-                coor(4) = wallDistCutoff**2
-                intInfo(3) = 0
-                call minDistancetreeSearchSinglePoint(fullWall%ADT, coor, &
-                     intInfo, uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
-                cellID = intInfo(3)
-
-                if (cellID > 0) then
-                   ! We found the cell:
-
-                   ! If the cell is outside of near-wall distance or our
-                   ! cluster doesn't have any owned cells. Just accept it.
-                   if (uvw(4) > nearWallDist**2 .or. walls(c)%nCells == 0) then
-
-                      do kk=1,4
-                         flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
-                              fullWall%ind(fullWall%conn(kk, cellID))
-                      end do
-                      flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
-                      if (useRoughSA) then
-                         flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = fullWall%indCell(cellID)
-                      end if
-                   else
-
-                      ! This point is *closer* than the nearWallDist AND
-                      ! it has a wall. Search on our own wall.
-
-                      coor(4) = large
-                      call minDistancetreeSearchSinglePoint(walls(c)%ADT, coor, &
-                           intInfo2, uvw2, dummy, 0, BB, frontLeaves, frontLeavesNew)
-                      cellID2 = intInfo2(3)
-
-                      if (uvw2(4) < nearWallDist**2) then
-                         ! Both are close to the wall. Accept the one
-                         ! from our own wall unconditionally.
-                         do kk=1,4
-                            flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
-                                 walls(c)%ind(walls(c)%conn(kk, cellID2))
-                         end do
-                         flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw2(1:2)
-                         if (useRoughSA) then
-                            flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = fullWall%indCell(cellID2)
-                         end if
-                      else
-                         ! The full wall distance is better. Take that.
-
-                         do kk=1,4
-                            flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
-                                 fullWall%ind(fullWall%conn(kk, cellID))
-                         end do
-                         flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
-                         if (useRoughSA) then
-                            flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = fullWall%indCell(cellID)
-                         end if
-                      end if
-                   end if
-                else
+        ! Allocate the (pointer) memory that may be resized as necessary for
+        ! the singlePoint search routine.
+        allocate (stack(100), BB(20), BBint(20), frontLeaves(25), frontLeavesNew(25))
 
-                   ! What happend here is a cell is outside the
-                   ! wallDistCutoff. We don't care about wall distance
-                   ! info here so just set dummy info.
+        ! We need to store the 4 global node indices defining the quad that
+        ! each point has the closest point wrt. We also ned to store the uv
+        ! values. This allows us to recompute the exact surface point, after
+        ! the rquired nodes are fetched from (a possibly) remote proc.
 
-                   flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
-                   flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
-                   if (useRoughSA) then
-                       flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = -1
-                   end if
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
 
+            ! Check if elemID and uv are allocated yet.
+            if (.not. associated(flowDoms(nn, level, sps)%surfNodeIndices)) then
+                allocate (flowDoms(nn, level, sps)%surfNodeIndices(4, 2:il, 2:jl, 2:kl))
+                allocate (flowDoms(nn, level, sps)%uv(2, 2:il, 2:jl, 2:kl))
+                if (useRoughSA) then
+                    allocate (flowDoms(nn, level, sps)%nearestWallCellInd(2:il, 2:jl, 2:kl))
                 end if
-             end do
-          end do
-       end do
-    end do
-
-    ! Now determine all the node indices this processor needs to get.
-    mm = 0
-    allocate(indicesToGet(nCellsLocal(level)*4), link(nCellsLocal(level)*4))
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                do kk=1,4
-                   mm = mm + 1
-                   indicesToGet(mm) = flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k)
+            end if
+
+            ! Set the cluster for this block
+            c = clusters(cumDomProc(myid) + nn)
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+
+                        ! Compute the coordinates of the cell center
+                        coor(1) = eighth*(x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
+                                          + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
+                                          + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
+                                          + x(i - 1, j, k, 1) + x(i, j, k, 1))
+
+                        coor(2) = eighth*(x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
+                                          + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
+                                          + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
+                                          + x(i - 1, j, k, 2) + x(i, j, k, 2))
+
+                        coor(3) = eighth*(x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
+                                          + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
+                                          + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
+                                          + x(i - 1, j, k, 3) + x(i, j, k, 3))
+
+                        if (.not. oversetPresent) then
+                            ! No overset present. Simply search our own wall,
+                            ! walls(c), (the only one we have) up to the wall
+                            ! cutoff.
+                            coor(4) = wallDistCutoff**2
+                            intInfo(3) = 0 ! Must be initialized since the search
+                            ! may not find closer point.
+                            call minDistancetreeSearchSinglePoint(walls(c)%ADT, coor, intInfo, &
+                                                                  uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
+
+                            cellID = intInfo(3)
+                            if (cellID > 0) then
+                                do kk = 1, 4
+                                    flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
+                                        walls(c)%ind(walls(c)%conn(kk, cellID))
+                                end do
+                                flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
+                                if (useRoughSA) then
+                                    flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = walls(c)%indCell(cellID)
+                                end if
+                            else
+                                ! Just set dummy values. These will never be used.
+                                flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
+                                flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
+                                if (useRoughSA) then
+                                    flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = -1
+                                end if
+                            end if
+
+                            ! We are done with this point.
+                            cycle
+                        end if
+
+                        ! This is now the overset (possibly) overlapping surface
+                        ! mesh case. It is somewhat more complex since we use
+                        ! the same searches to flag cells that are inside the
+                        ! body.
+
+                        coor(4) = wallDistCutoff**2
+                        intInfo(3) = 0
+                        call minDistancetreeSearchSinglePoint(fullWall%ADT, coor, &
+                                                              intInfo, uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
+                        cellID = intInfo(3)
+
+                        if (cellID > 0) then
+                            ! We found the cell:
+
+                            ! If the cell is outside of near-wall distance or our
+                            ! cluster doesn't have any owned cells. Just accept it.
+                            if (uvw(4) > nearWallDist**2 .or. walls(c)%nCells == 0) then
+
+                                do kk = 1, 4
+                                    flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
+                                        fullWall%ind(fullWall%conn(kk, cellID))
+                                end do
+                                flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
+                                if (useRoughSA) then
+                                    flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = fullWall%indCell(cellID)
+                                end if
+                            else
+
+                                ! This point is *closer* than the nearWallDist AND
+                                ! it has a wall. Search on our own wall.
+
+                                coor(4) = large
+                                call minDistancetreeSearchSinglePoint(walls(c)%ADT, coor, &
+                                                                      intInfo2, uvw2, dummy, 0, BB, frontLeaves, frontLeavesNew)
+                                cellID2 = intInfo2(3)
+
+                                if (uvw2(4) < nearWallDist**2) then
+                                    ! Both are close to the wall. Accept the one
+                                    ! from our own wall unconditionally.
+                                    do kk = 1, 4
+                                        flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
+                                            walls(c)%ind(walls(c)%conn(kk, cellID2))
+                                    end do
+                                    flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw2(1:2)
+                                    if (useRoughSA) then
+                                        flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = fullWall%indCell(cellID2)
+                                    end if
+                                else
+                                    ! The full wall distance is better. Take that.
+
+                                    do kk = 1, 4
+                                        flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
+                                            fullWall%ind(fullWall%conn(kk, cellID))
+                                    end do
+                                    flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
+                                    if (useRoughSA) then
+                                        flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = fullWall%indCell(cellID)
+                                    end if
+                                end if
+                            end if
+                        else
+
+                            ! What happend here is a cell is outside the
+                            ! wallDistCutoff. We don't care about wall distance
+                            ! info here so just set dummy info.
+
+                            flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
+                            flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
+                            if (useRoughSA) then
+                                flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = -1
+                            end if
+
+                        end if
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-
-    ! This unique-ifies the indices.
-    call unique(indicesToGet, 4*nCellsLocal(level), nUnique, link)
-
-    ! we need to update the stored indices to use the ordering of the nodes we will receive.
-    mm = 0
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                do kk=1,4
-                   mm = mm + 1
-                   flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = link(mm)
+            end do
+        end do
+
+        ! Now determine all the node indices this processor needs to get.
+        mm = 0
+        allocate (indicesToGet(nCellsLocal(level)*4), link(nCellsLocal(level)*4))
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        do kk = 1, 4
+                            mm = mm + 1
+                            indicesToGet(mm) = flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k)
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-    deallocate(link)
-
-    ! Now create the index set for the nodes we need to get. We have to
-    ! expand "indices to get" to include the DOF. Use link for this
-    ! temporary array operation.
-
-    allocate(link(nUnique*3))
-    do i=1, nUnique
-       link((i-1)*3+1) = indicesToGet(i)*3
-       link((i-1)*3+2) = indicesToGet(i)*3+1
-       link((i-1)*3+3) = indicesToGet(i)*3+2
-    end do
-
-    call ISCreateGeneral(adflow_comm_world, nUnique*3, link, PETSC_COPY_VALUES, IS1, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-    deallocate(link)
-
-    ! Create the volume vector the nodes will be scatter from. Note that
-    ! this vector contains all the spectal instances. It is therefore
-    ! only allocated on the first call with sps=1
-    if (sps == 1) then
-       call VecCreateMPI(ADFLOW_COMM_WORLD, 3*nNodesLocal(level)*nTimeIntervalsSpectral, &
-            PETSC_DETERMINE, xVolumeVec(level), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    end if
-
-    ! This is the vector we will scatter the nodes into.
-    call VecCreateMPI(ADFLOW_COMM_WORLD, 3*nUnique, PETSC_DETERMINE, &
-         xSurfVec(level, sps), ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call VecGetOwnershipRange(xSurfVec(level, sps), i, j, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call ISCreateStride(ADFLOW_COMM_WORLD, j-i, i, 1, IS2, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Create the actual final scatter context.
-    call  VecScatterCreate(xVolumeVec(level), IS1, xSurfVec(level, sps), IS2, &
-         wallScatter(level, sps), ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call ISDestroy(IS1, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call ISDestroy(IS2, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Deallocate all the remaining temporary data
-    deallocate(stack, BB, frontLeaves, frontLeavesNew, BBint)
-
-    do i=1, nClusters
-       deallocate(walls(i)%x, walls(i)%conn, walls(i)%ind)
-       call destroySerialQuad(walls(i)%ADT)
-    end do
-    deallocate(walls)
-
-    if (oversetPresent) then
-       deallocate(fullWall%x, fullWall%conn, fullWall%ind)
-       call destroySerialQuad(fullWall%ADT)
-    end if
-
-  end subroutine determineWallAssociation
-
-  subroutine updateXSurf(level)
-
-    use blockPointers
-    use inputTimeSpectral
-    use utils, only : EChk, setPointers
-    implicit none
+            end do
+        end do
+
+        ! This unique-ifies the indices.
+        call unique(indicesToGet, 4*nCellsLocal(level), nUnique, link)
 
-    ! Input Parameters
-    integer(kind=intType), intent(in) :: level
-
-    ! Working Parameters
-    integer(kind=intType) :: ii, i,j,k,l, nn, sps, ierr
-
-    ! Fill up xVolumeVec
-    call VecGetArrayF90(xVolumeVec(level), xVolume, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ii = 0
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
-          do k=1, kl
-             do j=1, jl
-                do i=1, il
-                   do l= 1,3
-                      ii = ii + 1
-                      xVolume(ii) = X(i, j, k, l)
-                   end do
+        ! we need to update the stored indices to use the ordering of the nodes we will receive.
+        mm = 0
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        do kk = 1, 4
+                            mm = mm + 1
+                            flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = link(mm)
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-    call vecRestoreArrayF90(xVolumeVec(level), xVolume, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Perform the scatter from the global x vector to xSurf. SPS loop since the xSurfVec is done by SPS instance.
-    do sps=1, nTimeIntervalsSpectral
-       call VecScatterBegin(wallScatter(level, sps), xVolumeVec(level), &
-            xSurfVec(level, sps), INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecScatterEnd(wallScatter(level, sps), xVolumeVec(level), &
-            xSurfVec(level, sps), INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    end do
-
-  end subroutine updateXSurf
-
-  subroutine destroyWallDistanceData
-
-    use constants
-    use block, only : flowDoms
-    implicit none
+            end do
+        end do
+        deallocate (link)
+
+        ! Now create the index set for the nodes we need to get. We have to
+        ! expand "indices to get" to include the DOF. Use link for this
+        ! temporary array operation.
+
+        allocate (link(nUnique*3))
+        do i = 1, nUnique
+            link((i - 1)*3 + 1) = indicesToGet(i)*3
+            link((i - 1)*3 + 2) = indicesToGet(i)*3 + 1
+            link((i - 1)*3 + 3) = indicesToGet(i)*3 + 2
+        end do
+
+        call ISCreateGeneral(adflow_comm_world, nUnique*3, link, PETSC_COPY_VALUES, IS1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        deallocate (link)
+
+        ! Create the volume vector the nodes will be scatter from. Note that
+        ! this vector contains all the spectal instances. It is therefore
+        ! only allocated on the first call with sps=1
+        if (sps == 1) then
+            call VecCreateMPI(ADFLOW_COMM_WORLD, 3*nNodesLocal(level)*nTimeIntervalsSpectral, &
+                              PETSC_DETERMINE, xVolumeVec(level), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+        ! This is the vector we will scatter the nodes into.
+        call VecCreateMPI(ADFLOW_COMM_WORLD, 3*nUnique, PETSC_DETERMINE, &
+                          xSurfVec(level, sps), ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecGetOwnershipRange(xSurfVec(level, sps), i, j, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call ISCreateStride(ADFLOW_COMM_WORLD, j - i, i, 1, IS2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Create the actual final scatter context.
+        call VecScatterCreate(xVolumeVec(level), IS1, xSurfVec(level, sps), IS2, &
+                              wallScatter(level, sps), ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call ISDestroy(IS1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call ISDestroy(IS2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Deallocate all the remaining temporary data
+        deallocate (stack, BB, frontLeaves, frontLeavesNew, BBint)
+
+        do i = 1, nClusters
+            deallocate (walls(i)%x, walls(i)%conn, walls(i)%ind)
+            call destroySerialQuad(walls(i)%ADT)
+        end do
+        deallocate (walls)
+
+        if (oversetPresent) then
+            deallocate (fullWall%x, fullWall%conn, fullWall%ind)
+            call destroySerialQuad(fullWall%ADT)
+        end if
+
+    end subroutine determineWallAssociation
+
+    subroutine updateXSurf(level)
+
+        use blockPointers
+        use inputTimeSpectral
+        use utils, only: EChk, setPointers
+        implicit none
+
+        ! Input Parameters
+        integer(kind=intType), intent(in) :: level
+
+        ! Working Parameters
+        integer(kind=intType) :: ii, i, j, k, l, nn, sps, ierr
+
+        ! Fill up xVolumeVec
+        call VecGetArrayF90(xVolumeVec(level), xVolume, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            do l = 1, 3
+                                ii = ii + 1
+                                xVolume(ii) = X(i, j, k, l)
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
+        call vecRestoreArrayF90(xVolumeVec(level), xVolume, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Working
-    integer(kind=intType) :: level, nLevels, l
+        ! Perform the scatter from the global x vector to xSurf. SPS loop since the xSurfVec is done by SPS instance.
+        do sps = 1, nTimeIntervalsSpectral
+            call VecScatterBegin(wallScatter(level, sps), xVolumeVec(level), &
+                                 xSurfVec(level, sps), INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    nLevels = ubound(flowDoms,2)
-    do l=1,nLevels
-       call destroyWallDistanceDataLevel(l)
-    end do
+            call VecScatterEnd(wallScatter(level, sps), xVolumeVec(level), &
+                               xSurfVec(level, sps), INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
 
-    deallocate(xSurfVec, xVolumeVec, wallScatter)
+    end subroutine updateXSurf
 
-  end subroutine destroyWallDistanceData
+    subroutine destroyWallDistanceData
 
-  subroutine destroyWallDistanceDataLevel(level)
-    use constants
-    use inputTimeSpectral, only :nTimeIntervalsspectral
-    use utils, onlY : EChk
-    use block, only : flowDoms
+        use constants
+        use block, only: flowDoms
+        implicit none
 
-    implicit none
+        ! Working
+        integer(kind=intType) :: level, nLevels, l
+
+        nLevels = ubound(flowDoms, 2)
+        do l = 1, nLevels
+            call destroyWallDistanceDataLevel(l)
+        end do
 
-    ! Input Parameters
-    integer(kind=intType), intent(in) :: level
+        deallocate (xSurfVec, xVolumeVec, wallScatter)
 
-    ! Working
-    integer(kind=intType) :: ierr, sps
+    end subroutine destroyWallDistanceData
 
-    ! Determine if we need to deallocate the PETSc data for
-    ! this level
-    if (wallDistanceDataAllocated(level)) then
-       call VecDestroy(xVolumeVec(level), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+    subroutine destroyWallDistanceDataLevel(level)
+        use constants
+        use inputTimeSpectral, only: nTimeIntervalsspectral
+        use utils, onlY: EChk
+        use block, only: flowDoms
 
-       do sps=1, nTimeIntervalsSpectral
-          call VecDestroy(xSurfVec(level, sps), ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+        implicit none
 
-          call VecScatterDestroy(wallScatter(level, sps), ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-       end do
+        ! Input Parameters
+        integer(kind=intType), intent(in) :: level
+
+        ! Working
+        integer(kind=intType) :: ierr, sps
+
+        ! Determine if we need to deallocate the PETSc data for
+        ! this level
+        if (wallDistanceDataAllocated(level)) then
+            call VecDestroy(xVolumeVec(level), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            do sps = 1, nTimeIntervalsSpectral
+                call VecDestroy(xSurfVec(level, sps), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call VecScatterDestroy(wallScatter(level, sps), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
 
-       wallDistanceDataAllocated(level) = .False.
-    end if
-  end subroutine destroyWallDistanceDataLevel
+            wallDistanceDataAllocated(level) = .False.
+        end if
+    end subroutine destroyWallDistanceDataLevel
 
 #endif
 end module wallDistance

From 567a97e55044b74ce37324e283fb58af147d7d89 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Tue, 21 Feb 2023 11:16:37 +0100
Subject: [PATCH 47/60] run fprettify with mdolab options

---
 src/bcdata/BCData.F90                  | 296 ++++++-------
 src/inputParam/inputParamRoutines.F90  | 106 ++---
 src/modules/block.F90                  |   8 +-
 src/modules/blockPointers.F90          |  10 +-
 src/modules/constants.F90              |   4 +-
 src/modules/inputParam.F90             |  42 +-
 src/modules/paramTurb.F90              |   4 +-
 src/modules/wallDistanceData.F90       |   4 +-
 src/output/outputMod.F90               | 444 +++++++++----------
 src/preprocessing/preprocessingAPI.F90 | 358 +++++++--------
 src/turbulence/sa.F90                  | 590 ++++++++++++-------------
 src/turbulence/turbBCRoutines.F90      | 110 ++---
 src/utils/utils.F90                    | 402 ++++++++---------
 src/wallDistance/wallDistance.F90      | 374 ++++++++--------
 14 files changed, 1376 insertions(+), 1376 deletions(-)

diff --git a/src/bcdata/BCData.F90 b/src/bcdata/BCData.F90
index 830bfa65a..8fc7e97db 100644
--- a/src/bcdata/BCData.F90
+++ b/src/bcdata/BCData.F90
@@ -175,7 +175,7 @@ subroutine computeHtot(tt, ht)
         !
         !      Subroutine arguments.
         !
-        real(kind=realType), intent(in)  :: tt
+        real(kind=realType), intent(in) :: tt
         real(kind=realType), intent(out) :: ht
         !
         !      Local variables.
@@ -192,7 +192,7 @@ subroutine computeHtot(tt, ht)
 
             ! Constant cp. The total enthalpy is simply cp*tt.
 
-            ht = gammaConstant*RGasDim*tt/(gammaConstant - one)
+            ht = gammaConstant * RGasDim * tt / (gammaConstant - one)
 
             !        ================================================================
 #ifndef USE_TAPENADE
@@ -219,7 +219,7 @@ subroutine computeHtot(tt, ht)
 
                 end if
 
-                ht = RGasDim*(cpEint(0) + tt + cv0*(tt - cpTrange(0)))
+                ht = RGasDim * (cpEint(0) + tt + cv0 * (tt - cpTrange(0)))
 
             else if (tt > cpTrange(cpNparts)) then
 
@@ -236,7 +236,7 @@ subroutine computeHtot(tt, ht)
                     print "(a)", "#"
                 end if
 
-                ht = RGasDim*(cpEint(cpNparts) + tt + cvn*(tt - cpTrange(cpNparts)))
+                ht = RGasDim * (cpEint(cpNparts) + tt + cvn * (tt - cpTrange(cpNparts)))
 
             else
 
@@ -249,7 +249,7 @@ subroutine computeHtot(tt, ht)
 
                     ! Next guess for the interval.
 
-                    nn = start + ii/2
+                    nn = start + ii / 2
 
                     ! Determine the situation we are having here.
 
@@ -271,7 +271,7 @@ subroutine computeHtot(tt, ht)
 
                     ! Modify ii for the next branch to search.
 
-                    ii = ii/2
+                    ii = ii / 2
 
                 end do interval
 
@@ -281,18 +281,18 @@ subroutine computeHtot(tt, ht)
                 ht = cpTempFit(nn)%eint0
                 do ii = 1, cpTempFit(nn)%nterm
                     if (cpTempFit(nn)%exponents(ii) == -1_intType) then
-                        ht = ht + cpTempFit(nn)%constants(ii)*log(tt)
+                        ht = ht + cpTempFit(nn)%constants(ii) * log(tt)
                     else
                         mm = cpTempFit(nn)%exponents(ii) + 1
                         t2 = tt**mm
-                        ht = ht + cpTempFit(nn)%constants(ii)*t2/mm
+                        ht = ht + cpTempFit(nn)%constants(ii) * t2 / mm
                     end if
                 end do
 
                 ! Multiply ht by RGasDim to obtain the correct
                 ! dimensional value.
 
-                ht = RGasDim*ht
+                ht = RGasDim * ht
 
             end if
 #endif
@@ -320,7 +320,7 @@ subroutine unitVectorsCylSystem(boco)
         !      Local variables.
         !
         integer(kind=intType) :: i, j
-        real(kind=realType)   :: factInlet, var
+        real(kind=realType) :: factInlet, var
 
         real(kind=realType), dimension(3) :: dir
 
@@ -365,9 +365,9 @@ subroutine unitVectorsCylSystem(boco)
         ! Multiply by factInlet to make sure that the normal
         ! is inward pointing.
 
-        dir(1) = dir(1)*factInlet
-        dir(2) = dir(2)*factInlet
-        dir(3) = dir(3)*factInlet
+        dir(1) = dir(1) * factInlet
+        dir(2) = dir(2) * factInlet
+        dir(3) = dir(3) * factInlet
 
         ! Determine three unit vectors, which define the local cartesian
         ! coordinate system of the rotation axis. First the axial
@@ -389,7 +389,7 @@ subroutine unitVectorsCylSystem(boco)
         ! computational domain. If the dot product with dir is
         ! negative the direction of axis should be reversed.
 
-        var = axis(1)*dir(1) + axis(2)*dir(2) + axis(3)*dir(3)
+        var = axis(1) * dir(1) + axis(2) * dir(2) + axis(3) * dir(3)
         if (var < zero) then
             axis(1) = -axis(1); axis(2) = -axis(2); axis(3) = -axis(3)
         end if
@@ -404,24 +404,24 @@ subroutine unitVectorsCylSystem(boco)
             radVec1(1) = zero; radVec1(2) = zero; radVec1(3) = one
         end if
 
-        var = radVec1(1)*axis(1) + radVec1(2)*axis(2) &
-              + radVec1(3)*axis(3)
-        radVec1(1) = radVec1(1) - var*axis(1)
-        radVec1(2) = radVec1(2) - var*axis(2)
-        radVec1(3) = radVec1(3) - var*axis(3)
+        var = radVec1(1) * axis(1) + radVec1(2) * axis(2) &
+              + radVec1(3) * axis(3)
+        radVec1(1) = radVec1(1) - var * axis(1)
+        radVec1(2) = radVec1(2) - var * axis(2)
+        radVec1(3) = radVec1(3) - var * axis(3)
 
-        var = one/sqrt(radVec1(1)**2 + radVec1(2)**2 &
-                       + radVec1(3)**2)
-        radVec1(1) = radVec1(1)*var
-        radVec1(2) = radVec1(2)*var
-        radVec1(3) = radVec1(3)*var
+        var = one / sqrt(radVec1(1)**2 + radVec1(2)**2 &
+                         + radVec1(3)**2)
+        radVec1(1) = radVec1(1) * var
+        radVec1(2) = radVec1(2) * var
+        radVec1(3) = radVec1(3) * var
 
         ! The second vector of the radial plane is obtained
         ! by taking the cross product of axis and radVec1.
 
-        radVec2(1) = axis(2)*radVec1(3) - axis(3)*radVec1(2)
-        radVec2(2) = axis(3)*radVec1(1) - axis(1)*radVec1(3)
-        radVec2(3) = axis(1)*radVec1(2) - axis(2)*radVec1(1)
+        radVec2(1) = axis(2) * radVec1(3) - axis(3) * radVec1(2)
+        radVec2(2) = axis(3) * radVec1(1) - axis(1) * radVec1(3)
+        radVec2(3) = axis(1) * radVec1(2) - axis(2) * radVec1(1)
 
     end subroutine unitVectorsCylSystem
 
@@ -484,7 +484,7 @@ subroutine BCDataIsothermalWall(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
 
         do j = jBeg, jEnd
             do i = iBeg, iEnd
-                BCData(boco)%TNS_Wall(i, j) = (mult*bcVarArray(i, j, 1) + trans)/Tref
+                BCData(boco)%TNS_Wall(i, j) = (mult * bcVarArray(i, j, 1) + trans) / Tref
             end do
         end do
 
@@ -568,7 +568,7 @@ subroutine BCDataSubsonicInflow(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd, allTur
         !
         !      Subroutine arguments.
         !
-        integer(kind=intType), intent(in)    :: boco
+        integer(kind=intType), intent(in) :: boco
         integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
         real(kind=realType), dimension(iBeg:iEnd, jBeg:jEnd, nbcVarMax) :: bcVarArray
         logical, intent(inout) :: allTurbPresent
@@ -707,8 +707,8 @@ subroutine totalSubsonicInlet
 
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        BCData(boco)%ptInlet(i, j) = (mult*bcVarArray(i, j, 1) &
-                                                      + trans)/Pref
+                        BCData(boco)%ptInlet(i, j) = (mult * bcVarArray(i, j, 1) &
+                                                      + trans) / Pref
                     end do
                 end do
             end if
@@ -721,8 +721,8 @@ subroutine totalSubsonicInlet
 
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        BCData(boco)%ttInlet(i, j) = (mult*bcVarArray(i, j, 2) &
-                                                      + trans)/Tref
+                        BCData(boco)%ttInlet(i, j) = (mult * bcVarArray(i, j, 2) &
+                                                      + trans) / Tref
                     end do
                 end do
             end if
@@ -742,9 +742,9 @@ subroutine totalSubsonicInlet
 
                     do j = jBeg, jEnd
                         do i = iBeg, iEnd
-                            rhot = mult*bcVarArray(i, j, 3) + trans
+                            rhot = mult * bcVarArray(i, j, 3) + trans
                             BCData(boco)%ttInlet(i, j) = &
-                                (BCData(boco)%ptInlet(i, j)*pRef/(RGasDim*rhot))/Tref
+                                (BCData(boco)%ptInlet(i, j) * pRef / (RGasDim * rhot)) / Tref
 
                         end do
                     end do
@@ -757,10 +757,10 @@ subroutine totalSubsonicInlet
 
                     do j = jBeg, jEnd
                         do i = iBeg, iEnd
-                            rhot = mult*bcVarArray(i, j, 3) + trans
+                            rhot = mult * bcVarArray(i, j, 3) + trans
 
-                            BCData(boco)%ptInlet(i, j) = (RGasDim*rhot &
-                                                          *BCData(boco)%ttInlet(i, j)*Tref)/Pref
+                            BCData(boco)%ptInlet(i, j) = (RGasDim * rhot &
+                                                          * BCData(boco)%ttInlet(i, j) * Tref) / Pref
                         end do
                     end do
 
@@ -797,25 +797,25 @@ subroutine totalSubsonicInlet
                         ! an offset of 1 is introduced and thus the average should
                         ! be taken of i, i+1, j and j+1.
 
-                        xc(1) = fourth*(xf(i, j, 1) + xf(i + 1, j, 1) &
-                                        + xf(i, j + 1, 1) + xf(i + 1, j + 1, 1)) &
+                        xc(1) = fourth * (xf(i, j, 1) + xf(i + 1, j, 1) &
+                                          + xf(i, j + 1, 1) + xf(i + 1, j + 1, 1)) &
                                 - sections(sectionId)%rotCenter(1)
-                        xc(2) = fourth*(xf(i, j, 2) + xf(i + 1, j, 2) &
-                                        + xf(i, j + 1, 2) + xf(i + 1, j + 1, 2)) &
+                        xc(2) = fourth * (xf(i, j, 2) + xf(i + 1, j, 2) &
+                                          + xf(i, j + 1, 2) + xf(i + 1, j + 1, 2)) &
                                 - sections(sectionId)%rotCenter(2)
-                        xc(3) = fourth*(xf(i, j, 3) + xf(i + 1, j, 3) &
-                                        + xf(i, j + 1, 3) + xf(i + 1, j + 1, 3)) &
+                        xc(3) = fourth * (xf(i, j, 3) + xf(i + 1, j, 3) &
+                                          + xf(i, j + 1, 3) + xf(i + 1, j + 1, 3)) &
                                 - sections(sectionId)%rotCenter(3)
 
                         ! Determine the coordinates in the local cartesian frame,
                         ! i.e. the frame determined by axis, radVec1 and radVec2.
 
-                        ax = xc(1)*axis(1) + xc(2)*axis(2) &
-                             + xc(3)*axis(3)
-                        r1 = xc(1)*radVec1(1) + xc(2)*radVec1(2) &
-                             + xc(3)*radVec1(3)
-                        r2 = xc(1)*radVec2(1) + xc(2)*radVec2(2) &
-                             + xc(3)*radVec2(3)
+                        ax = xc(1) * axis(1) + xc(2) * axis(2) &
+                             + xc(3) * axis(3)
+                        r1 = xc(1) * radVec1(1) + xc(2) * radVec1(2) &
+                             + xc(3) * radVec1(3)
+                        r2 = xc(1) * radVec2(1) + xc(2) * radVec2(2) &
+                             + xc(3) * radVec2(3)
 
                         ! Determine the weights of the unit vectors in the local
                         ! cylindrical system.
@@ -827,24 +827,24 @@ subroutine totalSubsonicInlet
                         ! Determine the direction in the local cartesian frame,
                         ! determined by axis, radVec1 and radVec2.
 
-                        var = one/sqrt(max(eps, (r1*r1 + r2*r2)))
+                        var = one / sqrt(max(eps, (r1 * r1 + r2 * r2)))
                         dir(1) = wax
-                        dir(2) = var*(wrad*r1 - wtheta*r2)
-                        dir(3) = var*(wrad*r2 + wtheta*r1)
+                        dir(2) = var * (wrad * r1 - wtheta * r2)
+                        dir(3) = var * (wrad * r2 + wtheta * r1)
 
                         ! Transform this direction to the global cartesian frame.
 
-                        BCData(boco)%flowXdirInlet(i, j) = dir(1)*axis(1) &
-                                                           + dir(2)*radVec1(1) &
-                                                           + dir(3)*radVec2(1)
+                        BCData(boco)%flowXdirInlet(i, j) = dir(1) * axis(1) &
+                                                           + dir(2) * radVec1(1) &
+                                                           + dir(3) * radVec2(1)
 
-                        BCData(boco)%flowYdirInlet(i, j) = dir(1)*axis(2) &
-                                                           + dir(2)*radVec1(2) &
-                                                           + dir(3)*radVec2(2)
+                        BCData(boco)%flowYdirInlet(i, j) = dir(1) * axis(2) &
+                                                           + dir(2) * radVec1(2) &
+                                                           + dir(3) * radVec2(2)
 
-                        BCData(boco)%flowZdirInlet(i, j) = dir(1)*axis(3) &
-                                                           + dir(2)*radVec1(3) &
-                                                           + dir(3)*radVec2(3)
+                        BCData(boco)%flowZdirInlet(i, j) = dir(1) * axis(3) &
+                                                           + dir(2) * radVec1(3) &
+                                                           + dir(3) * radVec2(3)
                     end do
                 end do
 
@@ -865,7 +865,7 @@ subroutine totalSubsonicInlet
                     do j = jBeg, jEnd
                         do i = iBeg, iEnd
                             BCData(boco)%flowXdirInlet(i, j) = &
-                                cos(mult*bcVarArray(i, j, 4) + trans)
+                                cos(mult * bcVarArray(i, j, 4) + trans)
                         end do
                     end do
 
@@ -893,7 +893,7 @@ subroutine totalSubsonicInlet
                     do j = jBeg, jEnd
                         do i = iBeg, iEnd
                             BCData(boco)%flowYdirInlet(i, j) = &
-                                cos(mult*bcVarArray(i, j, 5) + trans)
+                                cos(mult * bcVarArray(i, j, 5) + trans)
                         end do
                     end do
 
@@ -921,7 +921,7 @@ subroutine totalSubsonicInlet
                     do j = jBeg, jEnd
                         do i = iBeg, iEnd
                             BCData(boco)%flowZdirInlet(i, j) = &
-                                cos(mult*bcVarArray(i, j, 6) + trans)
+                                cos(mult * bcVarArray(i, j, 6) + trans)
                         end do
                     end do
 
@@ -947,9 +947,9 @@ subroutine totalSubsonicInlet
 
                     ! Compute the total enthalpy from the given
                     ! total temperature.
-                    TDim = BCData(boco)%ttInlet(i, j)*Tref
+                    TDim = BCData(boco)%ttInlet(i, j) * Tref
                     call computeHtot(TDim, Hdim)
-                    BCData(boco)%htInlet(i, j) = Hdim/Href
+                    BCData(boco)%htInlet(i, j) = Hdim / Href
 
                     ! Determine the unit vector of the flow direction.
 
@@ -957,11 +957,11 @@ subroutine totalSubsonicInlet
                     dir(2) = BCData(boco)%flowYdirInlet(i, j)
                     dir(3) = BCData(boco)%flowZdirInlet(i, j)
 
-                    var = one/max(eps, sqrt(dir(1)**2 + dir(2)**2 + dir(3)**2))
+                    var = one / max(eps, sqrt(dir(1)**2 + dir(2)**2 + dir(3)**2))
 
-                    BCData(boco)%flowXdirInlet(i, j) = var*dir(1)
-                    BCData(boco)%flowYdirInlet(i, j) = var*dir(2)
-                    BCData(boco)%flowZdirInlet(i, j) = var*dir(3)
+                    BCData(boco)%flowXdirInlet(i, j) = var * dir(1)
+                    BCData(boco)%flowYdirInlet(i, j) = var * dir(2)
+                    BCData(boco)%flowZdirInlet(i, j) = var * dir(3)
 
                 end do
             end do
@@ -975,11 +975,11 @@ subroutine totalSubsonicInlet
                 do i = (BCData(boco)%inbeg + 1), BCData(boco)%inend
 
                     var = BCData(boco)%flowXdirInlet(i, j) &
-                          *BCData(boco)%norm(i, j, 1) &
+                          * BCData(boco)%norm(i, j, 1) &
                           + BCData(boco)%flowYdirInlet(i, j) &
-                          *BCData(boco)%norm(i, j, 2) &
+                          * BCData(boco)%norm(i, j, 2) &
                           + BCData(boco)%flowZdirInlet(i, j) &
-                          *BCData(boco)%norm(i, j, 3)
+                          * BCData(boco)%norm(i, j, 3)
 
                     if (var > zero) nn = nn + 1
 
@@ -1053,7 +1053,7 @@ subroutine BCDataSubsonicOutflow(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd)
         call siPressure(mass(1), length(1), time(1), mult, trans)
         do j = jBeg, jEnd
             do i = iBeg, iEnd
-                BCData(boco)%ps(i, j) = (mult*bcVarArray(i, j, 1) + trans)/Pref
+                BCData(boco)%ps(i, j) = (mult * bcVarArray(i, j, 1) + trans) / Pref
             end do
         end do
 
@@ -1183,9 +1183,9 @@ subroutine BCDataSupersonicInflow(boco, bcVarArray, iBeg, iEnd, jBeg, jEnd, &
         do j = (BCData(boco)%jnbeg + 1), BCData(boco)%jnend
             do i = (BCData(boco)%inbeg + 1), BCData(boco)%inend
 
-                var = BCData(boco)%velx(i, j)*BCData(boco)%norm(i, j, 1) &
-                      + BCData(boco)%vely(i, j)*BCData(boco)%norm(i, j, 2) &
-                      + BCData(boco)%velz(i, j)*BCData(boco)%norm(i, j, 3)
+                var = BCData(boco)%velx(i, j) * BCData(boco)%norm(i, j, 1) &
+                      + BCData(boco)%vely(i, j) * BCData(boco)%norm(i, j, 2) &
+                      + BCData(boco)%velz(i, j) * BCData(boco)%norm(i, j, 3)
 
                 if (var > zero) nn = nn + 1
 
@@ -1228,7 +1228,7 @@ subroutine prescribedSupersonicInlet
 
             do j = jBeg, jEnd
                 do i = iBeg, iEnd
-                    BCData(boco)%rho(i, j) = (mult*bcVarArray(i, j, 1) + trans)/rhoRef
+                    BCData(boco)%rho(i, j) = (mult * bcVarArray(i, j, 1) + trans) / rhoRef
                 end do
             end do
 
@@ -1239,7 +1239,7 @@ subroutine prescribedSupersonicInlet
 
             do j = jBeg, jEnd
                 do i = iBeg, iEnd
-                    BCData(boco)%ps(i, j) = (mult*bcVarArray(i, j, 2) + trans)/pRef
+                    BCData(boco)%ps(i, j) = (mult * bcVarArray(i, j, 2) + trans) / pRef
                 end do
             end do
 
@@ -1282,57 +1282,57 @@ subroutine prescribedSupersonicInlet
                         ! an offset of 1 is introduced and thus the average should
                         ! be taken of i, i+1, j and j+1.
 
-                        xc(1) = fourth*(xf(i, j, 1) + xf(i + 1, j, 1) &
-                                        + xf(i, j + 1, 1) + xf(i + 1, j + 1, 1)) &
+                        xc(1) = fourth * (xf(i, j, 1) + xf(i + 1, j, 1) &
+                                          + xf(i, j + 1, 1) + xf(i + 1, j + 1, 1)) &
                                 - sections(sectionID)%rotCenter(1)
-                        xc(2) = fourth*(xf(i, j, 2) + xf(i + 1, j, 2) &
-                                        + xf(i, j + 1, 2) + xf(i + 1, j + 1, 2)) &
+                        xc(2) = fourth * (xf(i, j, 2) + xf(i + 1, j, 2) &
+                                          + xf(i, j + 1, 2) + xf(i + 1, j + 1, 2)) &
                                 - sections(sectionID)%rotCenter(2)
-                        xc(3) = fourth*(xf(i, j, 3) + xf(i + 1, j, 3) &
-                                        + xf(i, j + 1, 3) + xf(i + 1, j + 1, 3)) &
+                        xc(3) = fourth * (xf(i, j, 3) + xf(i + 1, j, 3) &
+                                          + xf(i, j + 1, 3) + xf(i + 1, j + 1, 3)) &
                                 - sections(sectionID)%rotCenter(3)
 
                         ! Determine the coordinates in the local cartesian frame,
                         ! i.e. the frame determined by axis, radVec1 and radVec2.
 
-                        ax = xc(1)*axis(1) + xc(2)*axis(2) &
-                             + xc(3)*axis(3)
-                        r1 = xc(1)*radVec1(1) + xc(2)*radVec1(2) &
-                             + xc(3)*radVec1(3)
-                        r2 = xc(1)*radVec2(1) + xc(2)*radVec2(2) &
-                             + xc(3)*radVec2(3)
+                        ax = xc(1) * axis(1) + xc(2) * axis(2) &
+                             + xc(3) * axis(3)
+                        r1 = xc(1) * radVec1(1) + xc(2) * radVec1(2) &
+                             + xc(3) * radVec1(3)
+                        r2 = xc(1) * radVec2(1) + xc(2) * radVec2(2) &
+                             + xc(3) * radVec2(3)
 
                         ! Determine the velocity components in the local
                         ! cylindrical system. Take the conversion to si units
                         ! into account.
 
-                        vax = multVel(1)*bcVarArray(i, j, 3) + transVel(1)
-                        vrad = multVel(2)*bcVarArray(i, j, 6) + transVel(2)
+                        vax = multVel(1) * bcVarArray(i, j, 3) + transVel(1)
+                        vrad = multVel(2) * bcVarArray(i, j, 6) + transVel(2)
                         if (veltPresent) &
-                            vtheta = multVel(3)*bcVarArray(i, j, 7) + transVel(3)
+                            vtheta = multVel(3) * bcVarArray(i, j, 7) + transVel(3)
 
                         ! Determine the velocities in the local cartesian
                         ! frame determined by axis, radVec1 and radVec2.
 
-                        var = one/sqrt(max(eps, (r1*r1 + r2*r2)))
+                        var = one / sqrt(max(eps, (r1 * r1 + r2 * r2)))
                         vloc(1) = vax
-                        vloc(2) = var*(vrad*r1 - vtheta*r2)
-                        vloc(3) = var*(vrad*r2 + vtheta*r1)
+                        vloc(2) = var * (vrad * r1 - vtheta * r2)
+                        vloc(3) = var * (vrad * r2 + vtheta * r1)
 
                         ! Transform vloc to the global cartesian frame and
                         ! store the values.
 
-                        BCData(boco)%velx(i, j) = (vloc(1)*axis(1) &
-                                                   + vloc(2)*radVec1(1) &
-                                                   + vloc(3)*radVec2(1))/uRef
+                        BCData(boco)%velx(i, j) = (vloc(1) * axis(1) &
+                                                   + vloc(2) * radVec1(1) &
+                                                   + vloc(3) * radVec2(1)) / uRef
 
-                        BCData(boco)%vely(i, j) = (vloc(1)*axis(2) &
-                                                   + vloc(2)*radVec1(2) &
-                                                   + vloc(3)*radVec2(2))/uRef
+                        BCData(boco)%vely(i, j) = (vloc(1) * axis(2) &
+                                                   + vloc(2) * radVec1(2) &
+                                                   + vloc(3) * radVec2(2)) / uRef
 
-                        BCData(boco)%velz(i, j) = (vloc(1)*axis(3) &
-                                                   + vloc(2)*radVec1(3) &
-                                                   + vloc(3)*radVec2(3))/uRef
+                        BCData(boco)%velz(i, j) = (vloc(1) * axis(3) &
+                                                   + vloc(2) * radVec1(3) &
+                                                   + vloc(3) * radVec2(3)) / uRef
                     end do
                 end do
 
@@ -1351,12 +1351,12 @@ subroutine prescribedSupersonicInlet
 
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        BCData(boco)%velx(i, j) = (multVel(1)*bcVarArray(i, j, 3) &
-                                                   + transVel(1))/uRef
-                        BCData(boco)%vely(i, j) = (multVel(2)*bcVarArray(i, j, 4) &
-                                                   + transVel(2))/uRef
-                        BCData(boco)%velz(i, j) = (multVel(3)*bcVarArray(i, j, 5) &
-                                                   + transVel(3))/uRef
+                        BCData(boco)%velx(i, j) = (multVel(1) * bcVarArray(i, j, 3) &
+                                                   + transVel(1)) / uRef
+                        BCData(boco)%vely(i, j) = (multVel(2) * bcVarArray(i, j, 4) &
+                                                   + transVel(2)) / uRef
+                        BCData(boco)%velz(i, j) = (multVel(3) * bcVarArray(i, j, 5) &
+                                                   + transVel(3)) / uRef
                     end do
                 end do
 
@@ -1398,7 +1398,7 @@ logical function setBCVarTurb(offset, boco, bcVarArray, &
         !      Local variables.
         !
         integer(kind=intType) :: nn, mm, i, j
-        real(kind=realType)   :: mult, trans, nuRef
+        real(kind=realType) :: mult, trans, nuRef
         real(kind=realType), dimension(nt1:nt2) :: ref
 
         ! Initialize setBCVarTurb to .true. And return immediately
@@ -1409,24 +1409,24 @@ logical function setBCVarTurb(offset, boco, bcVarArray, &
 
         ! Set the reference values depending on the turbulence model.
 
-        nuRef = muRef/rhoRef
+        nuRef = muRef / rhoRef
         select case (turbModel)
 
         case (spalartAllmaras, spalartAllmarasEdwards)
             ref(itu1) = nuRef
 
         case (komegaWilcox, komegaModified, menterSST)
-            ref(itu1) = pRef/rhoRef
-            ref(itu2) = ref(itu1)/nuRef
+            ref(itu1) = pRef / rhoRef
+            ref(itu2) = ref(itu1) / nuRef
 
         case (ktau)
-            ref(itu1) = pRef/rhoRef
-            ref(itu2) = nuRef/ref(itu1)
+            ref(itu1) = pRef / rhoRef
+            ref(itu2) = nuRef / ref(itu1)
 
         case (v2f)
-            ref(itu1) = pRef/rhoRef
-            ref(itu4) = ref(itu1)/nuRef
-            ref(itu2) = ref(itu1)*ref(itu4)
+            ref(itu1) = pRef / rhoRef
+            ref(itu4) = ref(itu1) / nuRef
+            ref(itu2) = ref(itu1) * ref(itu4)
             ref(itu3) = ref(itu1)
 
         end select
@@ -1452,7 +1452,7 @@ logical function setBCVarTurb(offset, boco, bcVarArray, &
 
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        turbInlet(i, j, nn) = (mult*bcVarArray(i, j, mm) + trans)/ref(nn)
+                        turbInlet(i, j, nn) = (mult * bcVarArray(i, j, mm) + trans) / ref(nn)
                     end do
                 end do
 
@@ -1495,7 +1495,7 @@ subroutine setBCData(bcDataNamesIn, bcDataIn, famLists, sps, &
         character, dimension(nVar, maxCGNSNameLen), intent(in) :: bcdatanamesin
         real(kind=realType), dimension(nVar), intent(in) :: bcDataIn
         integer(kind=intType), dimension(nVar, nFamMax) :: famLists
-        integer(kind=intType), intent(in) ::  sps, nVar, nFamMax
+        integer(kind=intType), intent(in) :: sps, nVar, nFamMax
         !
         !      Local variables.
         !
@@ -1563,7 +1563,7 @@ subroutine setBCData(bcDataNamesIn, bcDataIn, famLists, sps, &
                     varName = char2str(bcDataNamesIn(iVar, :), maxCGNSNameLen)
 
                     if (trim(varName) == "Thrust") then
-                        actuatorRegions(iRegion)%force = actuatorRegions(iRegion)%axisVec* &
+                        actuatorRegions(iRegion)%force = actuatorRegions(iRegion)%axisVec * &
                                                          bcDataIn(iVar)
                     else if (trim(varName) == "Torque") then
                         actuatorRegions(iRegion)%torque = bcDataIn(iVar)
@@ -1594,7 +1594,7 @@ subroutine setBCData_d(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
         character, dimension(nVar, maxCGNSNameLen), intent(in) :: bcdatanamesin
         real(kind=realType), dimension(nVar), intent(in) :: bcDataIn, bcDataInd
         integer(kind=intType), dimension(nVar, nFamMax) :: famLists
-        integer(kind=intType), intent(in) ::  sps, nVar, nFamMax
+        integer(kind=intType), intent(in) :: sps, nVar, nFamMax
         !
         !      Local variables.
         !
@@ -1664,9 +1664,9 @@ subroutine setBCData_d(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
                     varName = char2str(bcDataNamesIn(iVar, :), maxCGNSNameLen)
 
                     if (trim(varName) == "Thrust") then
-                        actuatorRegions(iRegion)%force = actuatorRegions(iRegion)%axisVec* &
+                        actuatorRegions(iRegion)%force = actuatorRegions(iRegion)%axisVec * &
                                                          bcDataIn(iVar)
-                        actuatorRegionsd(iRegion)%force = actuatorRegions(iRegion)%axisVec* &
+                        actuatorRegionsd(iRegion)%force = actuatorRegions(iRegion)%axisVec * &
                                                           bcDataInd(iVar)
                     else if (trim(varName) == "Torque") then
                         actuatorRegions(iRegion)%torque = bcDataIn(iVar)
@@ -1701,7 +1701,7 @@ subroutine setBCData_b(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
         real(kind=realType), dimension(nVar), intent(in) :: bcDataIn
         real(kind=realType), dimension(nVar), intent(out) :: bcDataInd
         integer(kind=intType), dimension(nVar, nFamMax) :: famLists
-        integer(kind=intType), intent(in) ::  sps, nVar, nFamMax
+        integer(kind=intType), intent(in) :: sps, nVar, nFamMax
 
         !
         !      Local variables.
@@ -1773,7 +1773,7 @@ subroutine setBCData_b(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
 
                     if (trim(varName) == "Thrust") then
                         bcDataInd(ivar) = &
-                            sum(actuatorRegions(iRegion)%axisVec*actuatorRegionsd(iRegion)%force)
+                            sum(actuatorRegions(iRegion)%axisVec * actuatorRegionsd(iRegion)%force)
                     else if (trim(varName) == "Torque") then
                         bcDataInd(ivar) = actuatorRegionsd(iRegion)%torque
                     else if (trim(varName) == "Heat") then
@@ -2074,7 +2074,7 @@ subroutine insertToDataSet(bcDataNamesIn, bcDataIn)
         !      Subroutine arguments.
         !
         character, dimension(:, :), intent(in) :: bcdatanamesin
-        real(kind=realType), dimension(:), intent(in):: bcDataIn
+        real(kind=realType), dimension(:), intent(in) :: bcDataIn
         !
         !      Local variables.
         !
@@ -2136,7 +2136,7 @@ subroutine insertToDataSet_d(bcDataNamesIn, bcDataIn, bcDataInd)
         !      Subroutine arguments.
         !
         character, dimension(:, :), intent(in) :: bcdatanamesin
-        real(kind=realType), dimension(:), intent(in):: bcDataIn, bcDataInd
+        real(kind=realType), dimension(:), intent(in) :: bcDataIn, bcDataInd
         !
         !      Local variables.
         !
@@ -2199,7 +2199,7 @@ subroutine insertToDataSet_b(bcDataNamesIn, bcDataIn, bcDataInd)
         !      Subroutine arguments.
         !
         character, dimension(:, :), intent(in) :: bcdatanamesin
-        real(kind=realType), dimension(:), intent(in):: bcDataIn
+        real(kind=realType), dimension(:), intent(in) :: bcDataIn
         real(kind=realType), dimension(:), intent(out) :: bcDataInd
         !
         !      Local variables.
@@ -3396,9 +3396,9 @@ subroutine setBCDataCoarseGrid
 
                                     ! Compute the total enthalpy.
 
-                                    TDim = BCData(j)%ttInlet(k, l)*Tref
+                                    TDim = BCData(j)%ttInlet(k, l) * Tref
                                     call computeHtot(TDim, Hdim)
-                                    BCData(j)%htInlet(k, l) = Hdim/Href
+                                    BCData(j)%htInlet(k, l) = Hdim / Href
 
                                     ! Flow direction.
 
@@ -3406,12 +3406,12 @@ subroutine setBCDataCoarseGrid
                                     dir(2) = BCData(j)%flowYdirInlet(k, l)
                                     dir(3) = BCData(j)%flowZdirInlet(k, l)
 
-                                    var = one/max(eps, sqrt(dir(1)**2 + dir(2)**2 &
-                                                            + dir(3)**2))
+                                    var = one / max(eps, sqrt(dir(1)**2 + dir(2)**2 &
+                                                              + dir(3)**2))
 
-                                    BCData(j)%flowXdirInlet(k, l) = var*dir(1)
-                                    BCData(j)%flowYdirInlet(k, l) = var*dir(2)
-                                    BCData(j)%flowZdirInlet(k, l) = var*dir(3)
+                                    BCData(j)%flowXdirInlet(k, l) = var * dir(1)
+                                    BCData(j)%flowYdirInlet(k, l) = var * dir(2)
+                                    BCData(j)%flowZdirInlet(k, l) = var * dir(3)
 
                                 end do
                             end do
@@ -3479,10 +3479,10 @@ subroutine interpolateBcData(varCoarse, varFine)
                     ! Compute the coarse grid data as the average of the
                     ! 4 fine grid values.
 
-                    varCoarse(i, j) = fourth*(varFine(if1, jf1) &
-                                              + varFine(if2, jf1) &
-                                              + varFine(if1, jf2) &
-                                              + varFine(if2, jf2))
+                    varCoarse(i, j) = fourth * (varFine(if1, jf1) &
+                                                + varFine(if2, jf1) &
+                                                + varFine(if1, jf2) &
+                                                + varFine(if2, jf2))
                 end do
             end do
 
@@ -3546,10 +3546,10 @@ subroutine interpolateBCVecData(varCoarse, varFine, &
                     ! 4 fine grid values.
 
                     do nn = nstart, nend
-                        varCoarse(i, j, nn) = fourth*(varFine(if1, jf1, nn) &
-                                                      + varFine(if2, jf1, nn) &
-                                                      + varFine(if1, jf2, nn) &
-                                                      + varFine(if2, jf2, nn))
+                        varCoarse(i, j, nn) = fourth * (varFine(if1, jf1, nn) &
+                                                        + varFine(if2, jf1, nn) &
+                                                        + varFine(if1, jf2, nn) &
+                                                        + varFine(if2, jf2, nn))
                     end do
                 end do
             end do
diff --git a/src/inputParam/inputParamRoutines.F90 b/src/inputParam/inputParamRoutines.F90
index b8dad6c7f..cd093921e 100644
--- a/src/inputParam/inputParamRoutines.F90
+++ b/src/inputParam/inputParamRoutines.F90
@@ -830,8 +830,8 @@ subroutine extractMGInfo
         !
         !      Local variables
         !
-        integer                      :: stringLen, error
-        integer(kind=intType)        :: i, ii, nMinus, nn
+        integer :: stringLen, error
+        integer(kind=intType) :: i, ii, nMinus, nn
         character(len=maxStringLen) :: errorMessage
 
         ! For an unsteady computation using explicit Runge-Kutta schemes
@@ -885,7 +885,7 @@ subroutine extractMGInfo
             ! Determine the number of steps in cycleStrategy and allocate
             ! the memory for it.
 
-            nMGSteps = 4*nMGLevels - 4
+            nMGSteps = 4 * nMGLevels - 4
             allocate (cycleStrategy(nMGSteps), stat=error)
             if (error /= 0) then
                 write (errorMessage, *) "Allocation error for", nMGSteps, &
@@ -1112,7 +1112,7 @@ recursive function computeNstepsWcycle(nLevels) result(nSteps)
         else if (nLevels == 2) then
             nSteps = 4
         else
-            nSteps = 4 + 2*computeNstepsWcycle(nLevels - 1)
+            nSteps = 4 + 2 * computeNstepsWcycle(nLevels - 1)
         end if
 
     end function computeNstepsWcycle
@@ -1133,7 +1133,7 @@ recursive subroutine setEntriesWcycle(counter, nLevels)
         !      Subroutine argument.
         !
         integer(kind=intType), intent(inout) :: counter
-        integer(kind=intType), intent(in)    :: nLevels
+        integer(kind=intType), intent(in) :: nLevels
         !
         !      Local variables
         !
@@ -1671,10 +1671,10 @@ subroutine readCpTempCurveFits
         integer :: ios, ierr
 
         integer(kind=intType) :: nn, mm, kk, ii
-        real(kind=realType)   :: T1, T2, e0
+        real(kind=realType) :: T1, T2, e0
 
         character(len=2*maxStringLen) :: errorMessage
-        character(len=512)            :: string
+        character(len=512) :: string
 
         ! Open the file for reading and check if it went okay. If the file
         ! is not found, processor 0 prints an error message.
@@ -1837,17 +1837,17 @@ subroutine readCpTempCurveFits
             ! Update cv0.
 
             T2 = T1**(cpTempFit(1)%exponents(ii))
-            cv0 = cv0 + cpTempFit(1)%constants(ii)*T2
+            cv0 = cv0 + cpTempFit(1)%constants(ii) * T2
 
             ! Update e0, for which this contribution must be integrated.
             ! Take the exceptional case exponent is -1 into account.
 
             if (cpTempFit(1)%exponents(ii) == -1_intType) then
-                e0 = e0 + cpTempFit(1)%constants(ii)*log(T1)
+                e0 = e0 + cpTempFit(1)%constants(ii) * log(T1)
             else
-                T2 = T1*T2
-                e0 = e0 + cpTempFit(1)%constants(ii)*T2 &
-                     /(cpTempFit(1)%exponents(ii) + 1)
+                T2 = T1 * T2
+                e0 = e0 + cpTempFit(1)%constants(ii) * T2 &
+                     / (cpTempFit(1)%exponents(ii) + 1)
             end if
 
         end do
@@ -1856,7 +1856,7 @@ subroutine readCpTempCurveFits
         ! Cv is assumed to be constant in the temperature range 0 - T1.
         ! Idem for the internal enthalpy.
 
-        cpEint(0) = cv0*T1
+        cpEint(0) = cv0 * T1
         cpHint(0) = cpEint(0) + T1
 
         ! Compute the integration constant for the energy.
@@ -1885,11 +1885,11 @@ subroutine readCpTempCurveFits
 
             do ii = 1, cpTempFit(mm)%nterm
                 if (cpTempFit(mm)%exponents(ii) == -1_intType) then
-                    e0 = e0 + cpTempFit(mm)%constants(ii)*log(T1)
+                    e0 = e0 + cpTempFit(mm)%constants(ii) * log(T1)
                 else
                     kk = cpTempFit(mm)%exponents(ii) + 1
                     T2 = T1**kk
-                    e0 = e0 + cpTempFit(mm)%constants(ii)*T2/kk
+                    e0 = e0 + cpTempFit(mm)%constants(ii) * T2 / kk
                 end if
             end do
 
@@ -1904,11 +1904,11 @@ subroutine readCpTempCurveFits
 
             do ii = 1, cpTempFit(nn)%nterm
                 if (cpTempFit(nn)%exponents(ii) == -1_intType) then
-                    e0 = e0 - cpTempFit(nn)%constants(ii)*log(T1)
+                    e0 = e0 - cpTempFit(nn)%constants(ii) * log(T1)
                 else
                     kk = cpTempFit(nn)%exponents(ii) + 1
                     T2 = T1**kk
-                    e0 = e0 - cpTempFit(nn)%constants(ii)*T2/kk
+                    e0 = e0 - cpTempFit(nn)%constants(ii) * T2 / kk
                 end if
             end do
 
@@ -1935,16 +1935,16 @@ subroutine readCpTempCurveFits
             ! Update cvn.
 
             T2 = T1**(cpTempFit(nn)%exponents(ii))
-            cvn = cvn + cpTempFit(nn)%constants(ii)*T2
+            cvn = cvn + cpTempFit(nn)%constants(ii) * T2
 
             ! Update e0, for which this contribution must be integrated.
             ! Take the exceptional case exponent is -1 into account.
 
             if (cpTempFit(nn)%exponents(ii) == -1_intType) then
-                e0 = e0 + cpTempFit(nn)%constants(ii)*log(T1)
+                e0 = e0 + cpTempFit(nn)%constants(ii) * log(T1)
             else
-                e0 = e0 + cpTempFit(nn)%constants(ii)*T2*T1 &
-                     /(cpTempFit(nn)%exponents(ii) + 1)
+                e0 = e0 + cpTempFit(nn)%constants(ii) * T2 * T1 &
+                     / (cpTempFit(nn)%exponents(ii) + 1)
             end if
 
         end do
@@ -1991,20 +1991,20 @@ subroutine readCpTempCurveFits
                 if (T1 > zero) then
                     if (mm == 0_intType) then
                         cpTempFit(nn)%intCpovrT_1 = cpTempFit(nn)%intCpovrT_1 &
-                                                    + cpTempFit(nn)%constants(ii)*log(T1)
+                                                    + cpTempFit(nn)%constants(ii) * log(T1)
                     else
                         cpTempFit(nn)%intCpovrT_1 = cpTempFit(nn)%intCpovrT_1 &
-                                                    + (cpTempFit(nn)%constants(ii)*T1**mm)/mm
+                                                    + (cpTempFit(nn)%constants(ii) * T1**mm) / mm
                     end if
                 end if
 
                 if (T2 > zero) then
                     if (mm == 0_intType) then
                         cpTempFit(nn)%intCpovrT_2 = cpTempFit(nn)%intCpovrT_2 &
-                                                    + cpTempFit(nn)%constants(ii)*log(T2)
+                                                    + cpTempFit(nn)%constants(ii) * log(T2)
                     else
                         cpTempFit(nn)%intCpovrT_2 = cpTempFit(nn)%intCpovrT_2 &
-                                                    + (cpTempFit(nn)%constants(ii)*T2**mm)/mm
+                                                    + (cpTempFit(nn)%constants(ii) * T2**mm) / mm
                     end if
                 end if
 
@@ -2027,7 +2027,7 @@ subroutine findNextInfoLine(readUnit, string)
         !
         !      Subroutine arguments
         !
-        integer, intent(in)             :: readUnit
+        integer, intent(in) :: readUnit
         character(len=512), intent(out) :: string
         !
         !      Local variables.
@@ -2269,15 +2269,15 @@ subroutine setStageCoeffExplicitRK
             ! Low storage (although not exploited in this implemetation)
             ! 3 stage scheme of Le and Moin.
 
-            betaRKUnsteady(1, 1) = 8.0_realType/15.0_realType
-            betaRKUnsteady(2, 1) = -17.0_realType/60.0_realType
-            betaRKUnsteady(2, 2) = 5.0_realType/12.0_realType
-            betaRKUnsteady(3, 2) = -5.0_realType/12.0_realType
-            betaRKUnsteady(3, 3) = 3.0_realType/4.0_realType
+            betaRKUnsteady(1, 1) = 8.0_realType / 15.0_realType
+            betaRKUnsteady(2, 1) = -17.0_realType / 60.0_realType
+            betaRKUnsteady(2, 2) = 5.0_realType / 12.0_realType
+            betaRKUnsteady(3, 2) = -5.0_realType / 12.0_realType
+            betaRKUnsteady(3, 3) = 3.0_realType / 4.0_realType
 
             gammaRKUnsteady(1) = 0.0_realType
-            gammaRKUnsteady(2) = 8.0_realType/15.0_realType
-            gammaRKUnsteady(3) = 2.0_realType/3.0_realType
+            gammaRKUnsteady(2) = 8.0_realType / 15.0_realType
+            gammaRKUnsteady(3) = 2.0_realType / 3.0_realType
 
             ! The TVD Runge Kutta scheme which allows for the maximum
             ! CFL number (1.0).
@@ -2963,9 +2963,9 @@ subroutine checkInputParam
         ! an error message. Only for external flows.
 
         if (flowType == externalFlow) then
-            vecLength = sqrt(velDirFreestream(1)*velDirFreestream(1) &
-                             + velDirFreestream(2)*velDirFreestream(2) &
-                             + velDirFreestream(3)*velDirFreestream(3))
+            vecLength = sqrt(velDirFreestream(1) * velDirFreestream(1) &
+                             + velDirFreestream(2) * velDirFreestream(2) &
+                             + velDirFreestream(3) * velDirFreestream(3))
             if (vecLength < eps) then
                 if (myID == 0) &
                      call terminate("checkInputParam", &
@@ -2974,10 +2974,10 @@ subroutine checkInputParam
                 call mpi_barrier(ADflow_comm_world, ierr)
             end if
 
-            vecLength = one/vecLength
-            velDirFreestream(1) = velDirFreestream(1)*vecLength
-            velDirFreestream(2) = velDirFreestream(2)*vecLength
-            velDirFreestream(3) = velDirFreestream(3)*vecLength
+            vecLength = one / vecLength
+            velDirFreestream(1) = velDirFreestream(1) * vecLength
+            velDirFreestream(2) = velDirFreestream(2) * vecLength
+            velDirFreestream(3) = velDirFreestream(3) * vecLength
         else
             ! Internal flow; simply reset the velocity direction. The value
             ! will be determined later from the inflow boundary conditions.
@@ -2999,9 +2999,9 @@ subroutine checkInputParam
             ! Create a unit vector. Perform the same check as for
             ! for the free stream velocity direction.
 
-            vecLength = sqrt(liftDirection(1)*liftDirection(1) &
-                             + liftDirection(2)*liftDirection(2) &
-                             + liftDirection(3)*liftDirection(3))
+            vecLength = sqrt(liftDirection(1) * liftDirection(1) &
+                             + liftDirection(2) * liftDirection(2) &
+                             + liftDirection(3) * liftDirection(3))
             if (vecLength < eps) then
                 if (myID == 0) &
                     call terminate("checkInputParam", &
@@ -3009,16 +3009,16 @@ subroutine checkInputParam
                 call mpi_barrier(ADflow_comm_world, ierr)
             end if
 
-            vecLength = one/vecLength
-            liftDirection(1) = liftDirection(1)*vecLength
-            liftDirection(2) = liftDirection(2)*vecLength
-            liftDirection(3) = liftDirection(3)*vecLength
+            vecLength = one / vecLength
+            liftDirection(1) = liftDirection(1) * vecLength
+            liftDirection(2) = liftDirection(2) * vecLength
+            liftDirection(3) = liftDirection(3) * vecLength
 
             ! Check the orthogonality with the drag direction.
 
-            dot = liftDirection(1)*dragDirection(1) &
-                  + liftDirection(2)*dragDirection(2) &
-                  + liftDirection(3)*dragDirection(3)
+            dot = liftDirection(1) * dragDirection(1) &
+                  + liftDirection(2) * dragDirection(2) &
+                  + liftDirection(3) * dragDirection(3)
 
             if (abs(dot) > 1.e-3_realType) then
                 if (myID == 0) &
@@ -3788,11 +3788,11 @@ subroutine setDefaultValues
         ! applied.
 
         vis2 = half
-        vis4 = one/64.0_realType
+        vis4 = one / 64.0_realType
         vis2Coarse = half
 
         dirScaling = .true.                 ! Apply isotropic directional
-        adis = two*third              ! scaling in the artificial
+        adis = two * third              ! scaling in the artificial
         ! dissipation schemes.
 
         hScalingInlet = .false.             ! No total enthalpy scaling.
@@ -4104,7 +4104,7 @@ subroutine setIsoSurfaceVariable(variable, iVar)
         !
         !      Subroutine arguments.
         !
-        character(len=*), intent(in):: variable
+        character(len=*), intent(in) :: variable
         integer(kind=intType) :: iVar
 
         select case (variable)
diff --git a/src/modules/block.F90 b/src/modules/block.F90
index b5db7f123..b28c6d4a4 100644
--- a/src/modules/block.F90
+++ b/src/modules/block.F90
@@ -516,10 +516,10 @@ module block
         real(kind=realType), dimension(:, :, :), pointer :: shockSensor
 
         ! Nodal Fluxes: ux,uy,uz,vx,vy,vz,wx,wy,wz,qx,qy,qz(il, jl, kl)
-        real(kind=realType), dimension(:, :, :), pointer:: ux, uy, uz
-        real(kind=realType), dimension(:, :, :), pointer:: vx, vy, vz
-        real(kind=realType), dimension(:, :, :), pointer:: wx, wy, wz
-        real(kind=realType), dimension(:, :, :), pointer:: qx, qy, qz
+        real(kind=realType), dimension(:, :, :), pointer :: ux, uy, uz
+        real(kind=realType), dimension(:, :, :), pointer :: vx, vy, vz
+        real(kind=realType), dimension(:, :, :), pointer :: wx, wy, wz
+        real(kind=realType), dimension(:, :, :), pointer :: qx, qy, qz
 
         !
         !         Residual and multigrid variables.
diff --git a/src/modules/blockPointers.F90 b/src/modules/blockPointers.F90
index 05edd8a3d..2b0e66b6e 100644
--- a/src/modules/blockPointers.F90
+++ b/src/modules/blockPointers.F90
@@ -128,10 +128,10 @@ module blockPointers
     real(kind=realType), dimension(:, :, :, :), pointer :: scratch
     real(kind=realType), dimension(:, :, :, :, :), pointer :: dwOldRK
     real(kind=realType), dimension(:, :, :, :), pointer :: w1, wr
-    real(kind=realType), dimension(:, :, :), pointer:: ux, uy, uz
-    real(kind=realType), dimension(:, :, :), pointer:: vx, vy, vz
-    real(kind=realType), dimension(:, :, :), pointer:: wx, wy, wz
-    real(kind=realType), dimension(:, :, :), pointer:: qx, qy, qz
+    real(kind=realType), dimension(:, :, :), pointer :: ux, uy, uz
+    real(kind=realType), dimension(:, :, :), pointer :: vx, vy, vz
+    real(kind=realType), dimension(:, :, :), pointer :: wx, wy, wz
+    real(kind=realType), dimension(:, :, :), pointer :: qx, qy, qz
 
     integer(kind=intType), dimension(:, :), pointer :: mgIFine
     integer(kind=intType), dimension(:, :), pointer :: mgJFine
@@ -185,7 +185,7 @@ module blockPointers
     REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: xd
     REAL(kind=realtype), DIMENSION(:, :, :, :), POINTER :: sid, sjd, skd
 
-    real(kind=realType), dimension(:, :, :), pointer ::vold
+    real(kind=realType), dimension(:, :, :), pointer :: vold
 
     REAL(kind=realtype), DIMENSION(:, :, :, :, :), POINTER :: rotmatrixid
     REAL(kind=realtype), DIMENSION(:, :, :, :, :), POINTER :: rotmatrixjd
diff --git a/src/modules/constants.F90 b/src/modules/constants.F90
index 867b8279e..28e423dcf 100644
--- a/src/modules/constants.F90
+++ b/src/modules/constants.F90
@@ -78,9 +78,9 @@ module constants
     real(kind=realType), parameter :: eight = 8.0_realType
 
     real(kind=realType), parameter :: half = 0.5_realType
-    real(kind=realType), parameter :: third = one/three
+    real(kind=realType), parameter :: third = one / three
     real(kind=realType), parameter :: fourth = 0.25_realType
-    real(kind=realType), parameter :: sixth = one/six
+    real(kind=realType), parameter :: sixth = one / six
     real(kind=realType), parameter :: eighth = 0.125_realType
     real(kind=realType), parameter :: threefourth = 0.75_realType
     real(kind=realType), parameter :: sqrtthree = 1.7320508075688772_realType
diff --git a/src/modules/inputParam.F90 b/src/modules/inputParam.F90
index 35353559e..9ea6b6925 100644
--- a/src/modules/inputParam.F90
+++ b/src/modules/inputParam.F90
@@ -483,7 +483,7 @@ module inputParallel
     ! loadBalanceIter: The number of refinment iterations to run to try
     !                   to get better load balancing.
     real(realType) :: loadImbalance
-    logical        :: splitBlocks
+    logical :: splitBlocks
     integer(kind=inttype) :: loadBalanceIter, partitionlikenproc
 end module inputParallel
 
@@ -566,8 +566,8 @@ module inputPhysics
     integer(kind=intType) :: equations, equationMode, flowType
     integer(kind=intType) :: turbModel, cpModel, turbProd
     integer(kind=intType) :: rvfN
-    logical               :: rvfB
-    logical               :: useQCR, useRotationSA, useft2SA
+    logical :: rvfB
+    logical :: useQCR, useRotationSA, useft2SA
 
     logical :: wallFunctions, wallDistanceNeeded
 
@@ -586,7 +586,7 @@ module inputPhysics
     real(kind=realType), dimension(3, 2) :: momentAxis
     real(kind=realType) :: SSuthDim, muSuthDim, TSuthDim
     real(kind=realType) :: cavitationnumber
-    logical             :: useRoughSA
+    logical :: useRoughSA
     real(kind=realType) :: cpmin_rho
     real(kind=realType), dimension(:), allocatable :: cpmin_family
 
@@ -648,7 +648,7 @@ module inputTimeSpectral
     ! dtUnsteadyRestartSpectral:    The corresponding time step.
 
     real(kind=realType) :: dtUnsteadyRestartSpectral
-    logical ::             writeUnsteadyRestartSpectral
+    logical :: writeUnsteadyRestartSpectral
 
     ! writeUnsteadyVolSpectral:  Whether or not the corresponding
     !                            unsteady volume solution files
@@ -660,8 +660,8 @@ module inputTimeSpectral
     !                            unsteady solutions to be created.
 
     integer(kind=intType) :: nUnsteadySolSpectral
-    logical ::               writeUnsteadyVolSpectral
-    logical ::               writeUnsteadySurfSpectral
+    logical :: writeUnsteadyVolSpectral
+    logical :: writeUnsteadySurfSpectral
 
     ! rotMatrixSpectral(:,3,3):  The corresponding rotation matrices
     !                            for the velocity. No rotation
@@ -787,7 +787,7 @@ module inputADjoint
 
     ! FillLevel     : Number of levels of fill for the ILU local PC
     ! Overlap       : Amount of overlap in the ASM PC
-    integer(kind=intType):: FillLevel, Overlap
+    integer(kind=intType) :: FillLevel, Overlap
 
     ! adjRelTol     : Relative tolerance
     ! adjAbsTol     : Absolute tolerance
@@ -797,14 +797,14 @@ module inputADjoint
     !                 It has a high impact on the required memory!
     ! adjMonStep    : Convergence monitor step
 
-    real(kind=alwaysRealType)    :: adjRelTol
-    real(kind=alwaysRealType)    :: adjAbsTol
-    real(kind=alwaysRealType)    :: adjRelTolRel
-    real(kind=alwaysRealType)    :: adjDivTol
+    real(kind=alwaysRealType) :: adjRelTol
+    real(kind=alwaysRealType) :: adjAbsTol
+    real(kind=alwaysRealType) :: adjRelTolRel
+    real(kind=alwaysRealType) :: adjDivTol
     real(kind=realType) :: adjMaxL2Dev
-    integer(kind=intType)  :: adjMaxIter
-    integer(kind=intType)  :: adjRestart
-    integer(kind=intType)  :: adjMonStep
+    integer(kind=intType) :: adjMaxIter
+    integer(kind=intType) :: adjRestart
+    integer(kind=intType) :: adjMonStep
 
     ! outerPCIts : Number of iterations to run for on (global) preconditioner
     ! intterPCIts : Number of iterations to run on local preconditioner
@@ -840,14 +840,14 @@ module inputTSStabDeriv
 
     ! TSStability : Whether or not the TS stability derivatives should
     !               be computed
-    logical:: TSStability, TSAlphaMode, TSBetaMode, TSpMode, &
-              TSqMode, TSrMode, TSAltitudeMode, TSMachMode
+    logical :: TSStability, TSAlphaMode, TSBetaMode, TSpMode, &
+               TSqMode, TSrMode, TSAltitudeMode, TSMachMode
     ! TSAlphaFollowing : Whether or not alpha follows the body in p,q,r mode
-    logical:: TSAlphaFollowing
+    logical :: TSAlphaFollowing
 
     ! useWindAxis : whether to rotate around the wind axis or the body
     !               axis...
-    logical:: useWindAxis
+    logical :: useWindAxis
 
 end module inputTSStabDeriv
 
@@ -867,8 +867,8 @@ module inputOverset
     integer(kind=intType) :: oversetUpdateMode
     real(kind=realType) :: selfZipCutoff
     ! nRefine: number of connectivity loops to run
-    integer(kind=intType)::nRefine
-    integer(kind=intType)::nFloodIter
+    integer(kind=intType) :: nRefine
+    integer(kind=intType) :: nFloodIter
     logical :: useZipperMesh
     logical :: useOversetWallScaling
     logical :: oversetDebugPrint
diff --git a/src/modules/paramTurb.F90 b/src/modules/paramTurb.F90
index 51a607929..9adf41ae9 100644
--- a/src/modules/paramTurb.F90
+++ b/src/modules/paramTurb.F90
@@ -15,8 +15,8 @@ module paramTurb
     real(kind=realType), parameter :: rsaCb2 = 0.622_realType
     real(kind=realType), parameter :: rsaCb3 = 0.66666666667_realType
     real(kind=realType), parameter :: rsaCv1 = 7.1_realType
-    real(kind=realType), parameter :: rsaCw1 = rsaCb1/(rsaK*rsaK) &
-                                      + (1.+rsaCb2)/rsaCb3
+    real(kind=realType), parameter :: rsaCw1 = rsaCb1 / (rsaK * rsaK) &
+                                      + (1.+rsaCb2) / rsaCb3
     real(kind=realType), parameter :: rsaCw2 = 0.3_realType
     real(kind=realType), parameter :: rsaCw3 = 2.0_realType
     real(kind=realType), parameter :: rsaCt1 = 1.0_realType
diff --git a/src/modules/wallDistanceData.F90 b/src/modules/wallDistanceData.F90
index a9d613a33..5690cb754 100644
--- a/src/modules/wallDistanceData.F90
+++ b/src/modules/wallDistanceData.F90
@@ -30,8 +30,8 @@ module wallDistanceData
 
     real(kind=realType), dimension(:), pointer :: xSurf
 
-    logical, dimension(:), allocatable  :: wallDistanceDataAllocated
-    logical, dimension(:), allocatable ::  updateWallAssociation
+    logical, dimension(:), allocatable :: wallDistanceDataAllocated
+    logical, dimension(:), allocatable :: updateWallAssociation
 
 #ifndef USE_TAPENADE
     real(kind=realType), dimension(:), pointer :: xSurfd
diff --git a/src/output/outputMod.F90 b/src/output/outputMod.F90
index e1657a058..74c96156a 100644
--- a/src/output/outputMod.F90
+++ b/src/output/outputMod.F90
@@ -750,7 +750,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
         integer(kind=intType), intent(in) :: kBeg, kEnd
 
         real(kind=realType), dimension(*), intent(out) :: buffer
-        character(len=*), intent(in)                   :: solName
+        character(len=*), intent(in) :: solName
 
         logical, intent(in) :: copyInBuffer
         !
@@ -793,7 +793,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        wIO(i, j, k, 1) = w(i, j, k, irho)*w(i, j, k, ivx)
+                        wIO(i, j, k, 1) = w(i, j, k, irho) * w(i, j, k, ivx)
                     end do
                 end do
             end do
@@ -802,7 +802,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        wIO(i, j, k, 1) = w(i, j, k, irho)*w(i, j, k, ivy)
+                        wIO(i, j, k, 1) = w(i, j, k, irho) * w(i, j, k, ivy)
                     end do
                 end do
             end do
@@ -811,7 +811,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        wIO(i, j, k, 1) = w(i, j, k, irho)*w(i, j, k, ivz)
+                        wIO(i, j, k, 1) = w(i, j, k, irho) * w(i, j, k, ivz)
                     end do
                 end do
             end do
@@ -928,17 +928,17 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        wIO(i, j, k, 1) = p(i, j, k)/(RGas*w(i, j, k, irho))
+                        wIO(i, j, k, 1) = p(i, j, k) / (RGas * w(i, j, k, irho))
                     end do
                 end do
             end do
 
         case (cgnsCp)
-            tmp = two/(gammaInf*pInf*MachCoef*MachCoef)
+            tmp = two / (gammaInf * pInf * MachCoef * MachCoef)
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        wIO(i, j, k, 1) = tmp*(p(i, j, k) - pInf)
+                        wIO(i, j, k, 1) = tmp * (p(i, j, k) - pInf)
                     end do
                 end do
             end do
@@ -947,10 +947,10 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        a2 = gamma(i, j, k)*max(p(i, j, k), plim) &
-                             /max(w(i, j, k, irho), rholim)
+                        a2 = gamma(i, j, k) * max(p(i, j, k), plim) &
+                             / max(w(i, j, k, irho), rholim)
                         tmp = (w(i, j, k, ivx)**2 + w(i, j, k, ivy)**2 &
-                               + w(i, j, k, ivz)**2)/a2
+                               + w(i, j, k, ivz)**2) / a2
                         wIO(i, j, k, 1) = sqrt(max(zero, tmp))
                     end do
                 end do
@@ -960,11 +960,11 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        a2 = gamma(i, j, k)*max(p(i, j, k), plim) &
-                             /max(w(i, j, k, irho), rholim)
+                        a2 = gamma(i, j, k) * max(p(i, j, k), plim) &
+                             / max(w(i, j, k, irho), rholim)
                         tmp = ((w(i, j, k, ivx) - s(i, j, k, 1))**2 + &
                                (w(i, j, k, ivy) - s(i, j, k, 2))**2 &
-                               + (w(i, j, k, ivz) - s(i, j, k, 3))**2)/a2
+                               + (w(i, j, k, ivz) - s(i, j, k, 3))**2) / a2
                         wIO(i, j, k, 1) = sqrt(max(zero, tmp))
                     end do
                 end do
@@ -974,8 +974,8 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        tmp = w(i, j, k, irho)*w(i, j, k, itu1) &
-                              /(gamma(i, j, k)*max(p(i, j, k), plim))
+                        tmp = w(i, j, k, irho) * w(i, j, k, itu1) &
+                              / (gamma(i, j, k) * max(p(i, j, k), plim))
                         wIO(i, j, k, 1) = sqrt(max(zero, tmp))
                     end do
                 end do
@@ -994,7 +994,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        wIO(i, j, k, 1) = rev(i, j, k)/rlv(i, j, k)
+                        wIO(i, j, k, 1) = rev(i, j, k) / rlv(i, j, k)
                     end do
                 end do
             end do
@@ -1016,52 +1016,52 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        tmp = half/vol(i, j, k)
-                        uuy = si(i, j, k, 2)*w(i + 1, j, k, ivx) &
-                              - si(i - 1, j, k, 2)*w(i - 1, j, k, ivx) &
-                              + sj(i, j, k, 2)*w(i, j + 1, k, ivx) &
-                              - sj(i, j - 1, k, 2)*w(i, j - 1, k, ivx) &
-                              + sk(i, j, k, 2)*w(i, j, k + 1, ivx) &
-                              - sk(i, j, k - 1, 2)*w(i, j, k - 1, ivx)
-
-                        uuz = si(i, j, k, 3)*w(i + 1, j, k, ivx) &
-                              - si(i - 1, j, k, 3)*w(i - 1, j, k, ivx) &
-                              + sj(i, j, k, 3)*w(i, j + 1, k, ivx) &
-                              - sj(i, j - 1, k, 3)*w(i, j - 1, k, ivx) &
-                              + sk(i, j, k, 3)*w(i, j, k + 1, ivx) &
-                              - sk(i, j, k - 1, 3)*w(i, j, k - 1, ivx)
-
-                        vvx = si(i, j, k, 1)*w(i + 1, j, k, ivy) &
-                              - si(i - 1, j, k, 1)*w(i - 1, j, k, ivy) &
-                              + sj(i, j, k, 1)*w(i, j + 1, k, ivy) &
-                              - sj(i, j - 1, k, 1)*w(i, j - 1, k, ivy) &
-                              + sk(i, j, k, 1)*w(i, j, k + 1, ivy) &
-                              - sk(i, j, k - 1, 1)*w(i, j, k - 1, ivy)
-
-                        vvz = si(i, j, k, 3)*w(i + 1, j, k, ivy) &
-                              - si(i - 1, j, k, 3)*w(i - 1, j, k, ivy) &
-                              + sj(i, j, k, 3)*w(i, j + 1, k, ivy) &
-                              - sj(i, j - 1, k, 3)*w(i, j - 1, k, ivy) &
-                              + sk(i, j, k, 3)*w(i, j, k + 1, ivy) &
-                              - sk(i, j, k - 1, 3)*w(i, j, k - 1, ivy)
-
-                        wwx = si(i, j, k, 1)*w(i + 1, j, k, ivz) &
-                              - si(i - 1, j, k, 1)*w(i - 1, j, k, ivz) &
-                              + sj(i, j, k, 1)*w(i, j + 1, k, ivz) &
-                              - sj(i, j - 1, k, 1)*w(i, j - 1, k, ivz) &
-                              + sk(i, j, k, 1)*w(i, j, k + 1, ivz) &
-                              - sk(i, j, k - 1, 1)*w(i, j, k - 1, ivz)
-
-                        wwy = si(i, j, k, 2)*w(i + 1, j, k, ivz) &
-                              - si(i - 1, j, k, 2)*w(i - 1, j, k, ivz) &
-                              + sj(i, j, k, 2)*w(i, j + 1, k, ivz) &
-                              - sj(i, j - 1, k, 2)*w(i, j - 1, k, ivz) &
-                              + sk(i, j, k, 2)*w(i, j, k + 1, ivz) &
-                              - sk(i, j, k - 1, 2)*w(i, j, k - 1, ivz)
+                        tmp = half / vol(i, j, k)
+                        uuy = si(i, j, k, 2) * w(i + 1, j, k, ivx) &
+                              - si(i - 1, j, k, 2) * w(i - 1, j, k, ivx) &
+                              + sj(i, j, k, 2) * w(i, j + 1, k, ivx) &
+                              - sj(i, j - 1, k, 2) * w(i, j - 1, k, ivx) &
+                              + sk(i, j, k, 2) * w(i, j, k + 1, ivx) &
+                              - sk(i, j, k - 1, 2) * w(i, j, k - 1, ivx)
+
+                        uuz = si(i, j, k, 3) * w(i + 1, j, k, ivx) &
+                              - si(i - 1, j, k, 3) * w(i - 1, j, k, ivx) &
+                              + sj(i, j, k, 3) * w(i, j + 1, k, ivx) &
+                              - sj(i, j - 1, k, 3) * w(i, j - 1, k, ivx) &
+                              + sk(i, j, k, 3) * w(i, j, k + 1, ivx) &
+                              - sk(i, j, k - 1, 3) * w(i, j, k - 1, ivx)
+
+                        vvx = si(i, j, k, 1) * w(i + 1, j, k, ivy) &
+                              - si(i - 1, j, k, 1) * w(i - 1, j, k, ivy) &
+                              + sj(i, j, k, 1) * w(i, j + 1, k, ivy) &
+                              - sj(i, j - 1, k, 1) * w(i, j - 1, k, ivy) &
+                              + sk(i, j, k, 1) * w(i, j, k + 1, ivy) &
+                              - sk(i, j, k - 1, 1) * w(i, j, k - 1, ivy)
+
+                        vvz = si(i, j, k, 3) * w(i + 1, j, k, ivy) &
+                              - si(i - 1, j, k, 3) * w(i - 1, j, k, ivy) &
+                              + sj(i, j, k, 3) * w(i, j + 1, k, ivy) &
+                              - sj(i, j - 1, k, 3) * w(i, j - 1, k, ivy) &
+                              + sk(i, j, k, 3) * w(i, j, k + 1, ivy) &
+                              - sk(i, j, k - 1, 3) * w(i, j, k - 1, ivy)
+
+                        wwx = si(i, j, k, 1) * w(i + 1, j, k, ivz) &
+                              - si(i - 1, j, k, 1) * w(i - 1, j, k, ivz) &
+                              + sj(i, j, k, 1) * w(i, j + 1, k, ivz) &
+                              - sj(i, j - 1, k, 1) * w(i, j - 1, k, ivz) &
+                              + sk(i, j, k, 1) * w(i, j, k + 1, ivz) &
+                              - sk(i, j, k - 1, 1) * w(i, j, k - 1, ivz)
+
+                        wwy = si(i, j, k, 2) * w(i + 1, j, k, ivz) &
+                              - si(i - 1, j, k, 2) * w(i - 1, j, k, ivz) &
+                              + sj(i, j, k, 2) * w(i, j + 1, k, ivz) &
+                              - sj(i, j - 1, k, 2) * w(i, j - 1, k, ivz) &
+                              + sk(i, j, k, 2) * w(i, j, k + 1, ivz) &
+                              - sk(i, j, k - 1, 2) * w(i, j, k - 1, ivz)
 
                         vortx = wwy - vvz; vorty = uuz - wwx; vortz = vvx - uuy
 
-                        wIO(i, j, k, 1) = tmp*sqrt(vortx**2 + vorty**2 + vortz**2)
+                        wIO(i, j, k, 1) = tmp * sqrt(vortx**2 + vorty**2 + vortz**2)
                     end do
                 end do
             end do
@@ -1071,22 +1071,22 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        tmp = half/vol(i, j, k)
-                        vvz = si(i, j, k, 3)*w(i + 1, j, k, ivy) &
-                              - si(i - 1, j, k, 3)*w(i - 1, j, k, ivy) &
-                              + sj(i, j, k, 3)*w(i, j + 1, k, ivy) &
-                              - sj(i, j - 1, k, 3)*w(i, j - 1, k, ivy) &
-                              + sk(i, j, k, 3)*w(i, j, k + 1, ivy) &
-                              - sk(i, j, k - 1, 3)*w(i, j, k - 1, ivy)
-
-                        wwy = si(i, j, k, 2)*w(i + 1, j, k, ivz) &
-                              - si(i - 1, j, k, 2)*w(i - 1, j, k, ivz) &
-                              + sj(i, j, k, 2)*w(i, j + 1, k, ivz) &
-                              - sj(i, j - 1, k, 2)*w(i, j - 1, k, ivz) &
-                              + sk(i, j, k, 2)*w(i, j, k + 1, ivz) &
-                              - sk(i, j, k - 1, 2)*w(i, j, k - 1, ivz)
-
-                        wIO(i, j, k, 1) = tmp*(wwy - vvz)
+                        tmp = half / vol(i, j, k)
+                        vvz = si(i, j, k, 3) * w(i + 1, j, k, ivy) &
+                              - si(i - 1, j, k, 3) * w(i - 1, j, k, ivy) &
+                              + sj(i, j, k, 3) * w(i, j + 1, k, ivy) &
+                              - sj(i, j - 1, k, 3) * w(i, j - 1, k, ivy) &
+                              + sk(i, j, k, 3) * w(i, j, k + 1, ivy) &
+                              - sk(i, j, k - 1, 3) * w(i, j, k - 1, ivy)
+
+                        wwy = si(i, j, k, 2) * w(i + 1, j, k, ivz) &
+                              - si(i - 1, j, k, 2) * w(i - 1, j, k, ivz) &
+                              + sj(i, j, k, 2) * w(i, j + 1, k, ivz) &
+                              - sj(i, j - 1, k, 2) * w(i, j - 1, k, ivz) &
+                              + sk(i, j, k, 2) * w(i, j, k + 1, ivz) &
+                              - sk(i, j, k - 1, 2) * w(i, j, k - 1, ivz)
+
+                        wIO(i, j, k, 1) = tmp * (wwy - vvz)
                     end do
                 end do
             end do
@@ -1096,22 +1096,22 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        tmp = half/vol(i, j, k)
-                        uuz = si(i, j, k, 3)*w(i + 1, j, k, ivx) &
-                              - si(i - 1, j, k, 3)*w(i - 1, j, k, ivx) &
-                              + sj(i, j, k, 3)*w(i, j + 1, k, ivx) &
-                              - sj(i, j - 1, k, 3)*w(i, j - 1, k, ivx) &
-                              + sk(i, j, k, 3)*w(i, j, k + 1, ivx) &
-                              - sk(i, j, k - 1, 3)*w(i, j, k - 1, ivx)
-
-                        wwx = si(i, j, k, 1)*w(i + 1, j, k, ivz) &
-                              - si(i - 1, j, k, 1)*w(i - 1, j, k, ivz) &
-                              + sj(i, j, k, 1)*w(i, j + 1, k, ivz) &
-                              - sj(i, j - 1, k, 1)*w(i, j - 1, k, ivz) &
-                              + sk(i, j, k, 1)*w(i, j, k + 1, ivz) &
-                              - sk(i, j, k - 1, 1)*w(i, j, k - 1, ivz)
-
-                        wIO(i, j, k, 1) = tmp*(uuz - wwx)
+                        tmp = half / vol(i, j, k)
+                        uuz = si(i, j, k, 3) * w(i + 1, j, k, ivx) &
+                              - si(i - 1, j, k, 3) * w(i - 1, j, k, ivx) &
+                              + sj(i, j, k, 3) * w(i, j + 1, k, ivx) &
+                              - sj(i, j - 1, k, 3) * w(i, j - 1, k, ivx) &
+                              + sk(i, j, k, 3) * w(i, j, k + 1, ivx) &
+                              - sk(i, j, k - 1, 3) * w(i, j, k - 1, ivx)
+
+                        wwx = si(i, j, k, 1) * w(i + 1, j, k, ivz) &
+                              - si(i - 1, j, k, 1) * w(i - 1, j, k, ivz) &
+                              + sj(i, j, k, 1) * w(i, j + 1, k, ivz) &
+                              - sj(i, j - 1, k, 1) * w(i, j - 1, k, ivz) &
+                              + sk(i, j, k, 1) * w(i, j, k + 1, ivz) &
+                              - sk(i, j, k - 1, 1) * w(i, j, k - 1, ivz)
+
+                        wIO(i, j, k, 1) = tmp * (uuz - wwx)
                     end do
                 end do
             end do
@@ -1121,22 +1121,22 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
             do k = kBeg, kEnd
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
-                        tmp = half/vol(i, j, k)
-                        uuy = si(i, j, k, 2)*w(i + 1, j, k, ivx) &
-                              - si(i - 1, j, k, 2)*w(i - 1, j, k, ivx) &
-                              + sj(i, j, k, 2)*w(i, j + 1, k, ivx) &
-                              - sj(i, j - 1, k, 2)*w(i, j - 1, k, ivx) &
-                              + sk(i, j, k, 2)*w(i, j, k + 1, ivx) &
-                              - sk(i, j, k - 1, 2)*w(i, j, k - 1, ivx)
-
-                        vvx = si(i, j, k, 1)*w(i + 1, j, k, ivy) &
-                              - si(i - 1, j, k, 1)*w(i - 1, j, k, ivy) &
-                              + sj(i, j, k, 1)*w(i, j + 1, k, ivy) &
-                              - sj(i, j - 1, k, 1)*w(i, j - 1, k, ivy) &
-                              + sk(i, j, k, 1)*w(i, j, k + 1, ivy) &
-                              - sk(i, j, k - 1, 1)*w(i, j, k - 1, ivy)
-
-                        wIO(i, j, k, 1) = tmp*(vvx - uuy)
+                        tmp = half / vol(i, j, k)
+                        uuy = si(i, j, k, 2) * w(i + 1, j, k, ivx) &
+                              - si(i - 1, j, k, 2) * w(i - 1, j, k, ivx) &
+                              + sj(i, j, k, 2) * w(i, j + 1, k, ivx) &
+                              - sj(i, j - 1, k, 2) * w(i, j - 1, k, ivx) &
+                              + sk(i, j, k, 2) * w(i, j, k + 1, ivx) &
+                              - sk(i, j, k - 1, 2) * w(i, j, k - 1, ivx)
+
+                        vvx = si(i, j, k, 1) * w(i + 1, j, k, ivy) &
+                              - si(i - 1, j, k, 1) * w(i - 1, j, k, ivy) &
+                              + sj(i, j, k, 1) * w(i, j + 1, k, ivy) &
+                              - sj(i, j - 1, k, 1) * w(i, j - 1, k, ivy) &
+                              + sk(i, j, k, 1) * w(i, j, k + 1, ivy) &
+                              - sk(i, j, k - 1, 1) * w(i, j, k - 1, ivy)
+
+                        wIO(i, j, k, 1) = tmp * (vvx - uuy)
                     end do
                 end do
             end do
@@ -1147,7 +1147,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
 
             call computePtot(rhoInf, uInf, zero, zero, &
                              pInf, ptotInf)
-            ptotInf = one/ptotInf
+            ptotInf = one / ptotInf
 
             ! Loop over the cell centers and compute the
             ! total pressure loss.
@@ -1159,7 +1159,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
                                          w(i, j, k, ivy), w(i, j, k, ivz), &
                                          p(i, j, k), ptot)
 
-                        wIO(i, j, k, 1) = one - ptot*ptotInf
+                        wIO(i, j, k, 1) = one - ptot * ptotInf
                     end do
                 end do
             end do
@@ -1170,7 +1170,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
                         !   wIO(i,j,k,1) = dw(i,j,k,irho)
-                        wIO(i, j, k, 1) = dw(i, j, k, irho)/vol(i, j, k)
+                        wIO(i, j, k, 1) = dw(i, j, k, irho) / vol(i, j, k)
                     end do
                 end do
             end do
@@ -1181,7 +1181,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
                         !   wIO(i,j,k,1) = dw(i,j,k,imx)
-                        wIO(i, j, k, 1) = dw(i, j, k, imx)/vol(i, j, k)
+                        wIO(i, j, k, 1) = dw(i, j, k, imx) / vol(i, j, k)
                     end do
                 end do
             end do
@@ -1192,7 +1192,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
                         !   wIO(i,j,k,1) = dw(i,j,k,imy)
-                        wIO(i, j, k, 1) = dw(i, j, k, imy)/vol(i, j, k)
+                        wIO(i, j, k, 1) = dw(i, j, k, imy) / vol(i, j, k)
                     end do
                 end do
             end do
@@ -1203,7 +1203,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
                         !   wIO(i,j,k,1) = dw(i,j,k,imz)
-                        wIO(i, j, k, 1) = dw(i, j, k, imz)/vol(i, j, k)
+                        wIO(i, j, k, 1) = dw(i, j, k, imz) / vol(i, j, k)
                     end do
                 end do
             end do
@@ -1214,7 +1214,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
                         !   wIO(i,j,k,1) = dw(i,j,k,irhoE)
-                        wIO(i, j, k, 1) = dw(i, j, k, irhoE)/vol(i, j, k)
+                        wIO(i, j, k, 1) = dw(i, j, k, irhoE) / vol(i, j, k)
                     end do
                 end do
             end do
@@ -1225,7 +1225,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
                         !   wIO(i,j,k,1) = dw(i,j,k,itu1)
-                        wIO(i, j, k, 1) = dw(i, j, k, itu1)/vol(i, j, k)
+                        wIO(i, j, k, 1) = dw(i, j, k, itu1) / vol(i, j, k)
                     end do
                 end do
             end do
@@ -1236,7 +1236,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
                         !   wIO(i,j,k,1) = dw(i,j,k,itu2)
-                        wIO(i, j, k, 1) = dw(i, j, k, itu2)/vol(i, j, k)
+                        wIO(i, j, k, 1) = dw(i, j, k, itu2) / vol(i, j, k)
                     end do
                 end do
             end do
@@ -1247,7 +1247,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
                         !   wIO(i,j,k,1) = dw(i,j,k,itu3)
-                        wIO(i, j, k, 1) = dw(i, j, k, itu3)/vol(i, j, k)
+                        wIO(i, j, k, 1) = dw(i, j, k, itu3) / vol(i, j, k)
                     end do
                 end do
             end do
@@ -1258,7 +1258,7 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
                 do j = jBeg, jEnd
                     do i = iBeg, iEnd
                         !   wIO(i,j,k,1) = dw(i,j,k,itu4)
-                        wIO(i, j, k, 1) = dw(i, j, k, itu4)/vol(i, j, k)
+                        wIO(i, j, k, 1) = dw(i, j, k, itu4) / vol(i, j, k)
                     end do
                 end do
             end do
@@ -1319,36 +1319,36 @@ subroutine storeSolInBuffer(buffer, copyInBuffer, solName, &
                         ! sound and P is the pressure.
 
                         ! U / a
-                        a = sqrt(gamma(i, j, k)*max(p(i, j, k), plim) &
-                                 /max(w(i, j, k, irho), rholim))
-                        UovA = (/w(i, j, k, ivx), w(i, j, k, ivy), w(i, j, k, ivz)/)/a
+                        a = sqrt(gamma(i, j, k) * max(p(i, j, k), plim) &
+                                 / max(w(i, j, k, irho), rholim))
+                        UovA = (/w(i, j, k, ivx), w(i, j, k, ivy), w(i, j, k, ivz)/) / a
 
                         ! grad P / ||grad P||
 
-                        gradP(1) = si(i, j, k, 1)*P(i + 1, j, k) &
-                                   - si(i - 1, j, k, 1)*P(i - 1, j, k) &
-                                   + sj(i, j, k, 1)*P(i, j + 1, k) &
-                                   - sj(i, j - 1, k, 1)*P(i, j - 1, k) &
-                                   + sk(i, j, k, 1)*P(i, j, k + 1) &
-                                   - sk(i, j, k - 1, 1)*P(i, j, k - 1)
-
-                        gradP(2) = si(i, j, k, 2)*P(i + 1, j, k) &
-                                   - si(i - 1, j, k, 2)*P(i - 1, j, k) &
-                                   + sj(i, j, k, 2)*P(i, j + 1, k) &
-                                   - sj(i, j - 1, k, 2)*P(i, j - 1, k) &
-                                   + sk(i, j, k, 2)*P(i, j, k + 1) &
-                                   - sk(i, j, k - 1, 2)*P(i, j, k - 1)
-
-                        gradP(3) = si(i, j, k, 3)*P(i + 1, j, k) &
-                                   - si(i - 1, j, k, 3)*P(i - 1, j, k) &
-                                   + sj(i, j, k, 3)*P(i, j + 1, k) &
-                                   - sj(i, j - 1, k, 3)*P(i, j - 1, k) &
-                                   + sk(i, j, k, 3)*P(i, j, k + 1) &
-                                   - sk(i, j, k - 1, 3)*P(i, j, k - 1)
-
-                        gradP = gradP/sqrt(gradP(1)**2 + gradP(2)**2 + gradP(3)**2)
+                        gradP(1) = si(i, j, k, 1) * P(i + 1, j, k) &
+                                   - si(i - 1, j, k, 1) * P(i - 1, j, k) &
+                                   + sj(i, j, k, 1) * P(i, j + 1, k) &
+                                   - sj(i, j - 1, k, 1) * P(i, j - 1, k) &
+                                   + sk(i, j, k, 1) * P(i, j, k + 1) &
+                                   - sk(i, j, k - 1, 1) * P(i, j, k - 1)
+
+                        gradP(2) = si(i, j, k, 2) * P(i + 1, j, k) &
+                                   - si(i - 1, j, k, 2) * P(i - 1, j, k) &
+                                   + sj(i, j, k, 2) * P(i, j + 1, k) &
+                                   - sj(i, j - 1, k, 2) * P(i, j - 1, k) &
+                                   + sk(i, j, k, 2) * P(i, j, k + 1) &
+                                   - sk(i, j, k - 1, 2) * P(i, j, k - 1)
+
+                        gradP(3) = si(i, j, k, 3) * P(i + 1, j, k) &
+                                   - si(i - 1, j, k, 3) * P(i - 1, j, k) &
+                                   + sj(i, j, k, 3) * P(i, j + 1, k) &
+                                   - sj(i, j - 1, k, 3) * P(i, j - 1, k) &
+                                   + sk(i, j, k, 3) * P(i, j, k + 1) &
+                                   - sk(i, j, k - 1, 3) * P(i, j, k - 1)
+
+                        gradP = gradP / sqrt(gradP(1)**2 + gradP(2)**2 + gradP(3)**2)
                         ! Dot product
-                        wIO(i, j, k, 1) = UovA(1)*gradP(1) + UovA(2)*gradP(2) + UovA(3)*gradP(3)
+                        wIO(i, j, k, 1) = UovA(1) * gradP(1) + UovA(2) * gradP(2) + UovA(3) * gradP(3)
                     end do
                 end do
             end do
@@ -1415,7 +1415,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
         !
         !      Subroutine arguments.
         !
-        integer(kind=intType), intent(in)    :: sps, blockID, faceID
+        integer(kind=intType), intent(in) :: sps, blockID, faceID
         integer(kind=intType), intent(inout) :: nn
         integer(kind=intType), dimension(3, 2), intent(in) :: cellRange
         real(kind=realType), dimension(*), intent(out) :: buffer
@@ -1752,7 +1752,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
         do j = rangeFace(2, 1), rangeFace(2, 2)
             do i = rangeFace(1, 1), rangeFace(1, 2)
                 nn = nn + 1
-                buffer(nn) = half*(ww1(i, j, irho) + ww2(i, j, irho))
+                buffer(nn) = half * (ww1(i, j, irho) + ww2(i, j, irho))
             end do
         end do
 
@@ -1763,7 +1763,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
         do j = rangeFace(2, 1), rangeFace(2, 2)
             do i = rangeFace(1, 1), rangeFace(1, 2)
                 nn = nn + 1
-                buffer(nn) = half*(pp1(i, j) + pp2(i, j))
+                buffer(nn) = half * (pp1(i, j) + pp2(i, j))
             end do
         end do
 
@@ -1775,7 +1775,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
             do i = rangeFace(1, 1), rangeFace(1, 2)
                 nn = nn + 1
                 buffer(nn) = (pp1(i, j) + pp2(i, j)) &
-                             /(RGas*(ww1(i, j, irho) + ww2(i, j, irho)))
+                             / (RGas * (ww1(i, j, irho) + ww2(i, j, irho)))
             end do
         end do
 
@@ -1789,7 +1789,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
                 if (viscousSurfaceVelocities .and. viscous) then
                     buffer(nn) = ww2(i, j, ivx)
                 else
-                    buffer(nn) = half*(ww1(i, j, ivx) + ww2(i, j, ivx))
+                    buffer(nn) = half * (ww1(i, j, ivx) + ww2(i, j, ivx))
                 end if
             end do
         end do
@@ -1804,7 +1804,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
                 if (viscousSurfaceVelocities .and. viscous) then
                     buffer(nn) = ww2(i, j, ivy)
                 else
-                    buffer(nn) = half*(ww1(i, j, ivy) + ww2(i, j, ivy))
+                    buffer(nn) = half * (ww1(i, j, ivy) + ww2(i, j, ivy))
                 end if
             end do
         end do
@@ -1819,7 +1819,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
                 if (viscousSurfaceVelocities .and. viscous) then
                     buffer(nn) = ww2(i, j, ivz)
                 else
-                    buffer(nn) = half*(ww1(i, j, ivz) + ww2(i, j, ivz))
+                    buffer(nn) = half * (ww1(i, j, ivz) + ww2(i, j, ivz))
                 end if
 
             end do
@@ -1834,7 +1834,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
                 if (viscousSurfaceVelocities .and. viscous) then
                     buffer(nn) = ww2(i, j, ivx) - ss2(i, j, 1)
                 else
-                    buffer(nn) = half*(ww1(i, j, ivx) + ww2(i, j, ivx)) - half*(ss1(i, j, 1) + ss2(i, j, 1))
+                    buffer(nn) = half * (ww1(i, j, ivx) + ww2(i, j, ivx)) - half * (ss1(i, j, 1) + ss2(i, j, 1))
                 end if
             end do
         end do
@@ -1848,7 +1848,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
                 if (viscousSurfaceVelocities .and. viscous) then
                     buffer(nn) = ww2(i, j, ivy) - ss2(i, j, 2)
                 else
-                    buffer(nn) = half*(ww1(i, j, ivy) + ww2(i, j, ivy)) - half*(ss1(i, j, 2) + ss2(i, j, 2))
+                    buffer(nn) = half * (ww1(i, j, ivy) + ww2(i, j, ivy)) - half * (ss1(i, j, 2) + ss2(i, j, 2))
                 end if
             end do
         end do
@@ -1862,7 +1862,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
                 if (viscousSurfaceVelocities .and. viscous) then
                     buffer(nn) = ww2(i, j, ivz) - ss2(i, j, 3)
                 else
-                    buffer(nn) = half*(ww1(i, j, ivz) + ww2(i, j, ivz)) - half*(ss1(i, j, 3) + ss2(i, j, 3))
+                    buffer(nn) = half * (ww1(i, j, ivz) + ww2(i, j, ivz)) - half * (ss1(i, j, 3) + ss2(i, j, 3))
                 end if
             end do
         end do
@@ -1878,7 +1878,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
         ! Note that the reference quantities (such as pRef, uRef, rhoInfDim, ..) are defined in  module
         ! flowVarRefState (see flowVarRefState.F90) and first set in the subroutine referenceState
         ! (see initializeFlow.F90).
-        uInfDim2 = (MachCoef*MachCoef*gammaInf*pInf/rhoInf)*uRef*uRef
+        uInfDim2 = (MachCoef * MachCoef * gammaInf * pInf / rhoInf) * uRef * uRef
 
         do j = rangeFace(2, 1), rangeFace(2, 2)
             do i = rangeFace(1, 1), rangeFace(1, 2)
@@ -1887,17 +1887,17 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
                 ! by averaging wall and halo cell centers
                 ! (xx1,xx2 are pointers to the mesh coordinates, see block.F90)
                 rrate_ = cgnsdoms(1)%rotrate
-                r_(1) = (half*(xx1(i, j, 1) + xx2(i, j, 1)))
-                r_(2) = (half*(xx1(i, j, 2) + xx2(i, j, 2)))
-                r_(3) = (half*(xx1(i, j, 3) + xx2(i, j, 3)))
+                r_(1) = (half * (xx1(i, j, 1) + xx2(i, j, 1)))
+                r_(2) = (half * (xx1(i, j, 2) + xx2(i, j, 2)))
+                r_(3) = (half * (xx1(i, j, 3) + xx2(i, j, 3)))
                 ! calc cross-product between rotation rate and r_
                 ! to obtain local apparent wall velocity
-                wCrossR(1) = rrate_(2)*r_(3) - rrate_(3)*r_(2)
-                wCrossR(2) = rrate_(3)*r_(1) - rrate_(1)*r_(3)
-                wCrossR(3) = rrate_(1)*r_(2) - rrate_(2)*r_(1)
+                wCrossR(1) = rrate_(2) * r_(3) - rrate_(3) * r_(2)
+                wCrossR(2) = rrate_(3) * r_(1) - rrate_(1) * r_(3)
+                wCrossR(3) = rrate_(1) * r_(2) - rrate_(2) * r_(1)
                 rot_speed2 = wCrossR(1)**2 + wCrossR(2)**2 + wCrossR(3)**2
-                buffer(nn) = ((half*(pp1(i, j) + pp2(i, j)) - pInf)*pRef) &
-                             /(half*(rhoInfDim)*(uInfDim2 + rot_speed2))
+                buffer(nn) = ((half * (pp1(i, j) + pp2(i, j)) - pInf) * pRef) &
+                             / (half * (rhoInfDim) * (uInfDim2 + rot_speed2))
                 ! Comments on the Cp (buffer(nn)) calculation above:
                 !
                 ! Cp = (P_i - P_0) / (0.5*rho*(U_a)^2)
@@ -1920,24 +1920,24 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
 
         call computePtot(rhoInf, uInf, zero, zero, &
                          pInf, ptotInf)
-        ptotInf = one/ptotInf
+        ptotInf = one / ptotInf
 
         ! Loop over the faces and compute the total pressure loss.
 
         do j = rangeFace(2, 1), rangeFace(2, 2)
             do i = rangeFace(1, 1), rangeFace(1, 2)
 
-                psurf = half*(pp1(i, j) + pp2(i, j))
-                rsurf = half*(ww1(i, j, irho) + ww2(i, j, irho))
-                usurf = half*(ww1(i, j, ivx) + ww2(i, j, ivx))
-                vsurf = half*(ww1(i, j, ivy) + ww2(i, j, ivy))
-                wsurf = half*(ww1(i, j, ivz) + ww2(i, j, ivz))
+                psurf = half * (pp1(i, j) + pp2(i, j))
+                rsurf = half * (ww1(i, j, irho) + ww2(i, j, irho))
+                usurf = half * (ww1(i, j, ivx) + ww2(i, j, ivx))
+                vsurf = half * (ww1(i, j, ivy) + ww2(i, j, ivy))
+                wsurf = half * (ww1(i, j, ivz) + ww2(i, j, ivz))
 
                 call computePtot(rsurf, usurf, vsurf, wsurf, &
                                  psurf, ptot)
 
                 nn = nn + 1
-                buffer(nn) = one - ptot*ptotInf
+                buffer(nn) = one - ptot * ptotInf
             end do
         end do
 
@@ -1948,13 +1948,13 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
         do j = rangeFace(2, 1), rangeFace(2, 2)
             do i = rangeFace(1, 1), rangeFace(1, 2)
 
-                psurf = half*(pp1(i, j) + pp2(i, j))
-                rsurf = half*(ww1(i, j, irho) + ww2(i, j, irho))
-                usurf = half*(ww1(i, j, ivx) + ww2(i, j, ivx))
-                vsurf = half*(ww1(i, j, ivy) + ww2(i, j, ivy))
-                wsurf = half*(ww1(i, j, ivz) + ww2(i, j, ivz))
-                m2surf = rsurf*(usurf**2 + vsurf**2 + wsurf**2) &
-                         /(half*(gamma1(i, j) + gamma2(i, j))*psurf)
+                psurf = half * (pp1(i, j) + pp2(i, j))
+                rsurf = half * (ww1(i, j, irho) + ww2(i, j, irho))
+                usurf = half * (ww1(i, j, ivx) + ww2(i, j, ivx))
+                vsurf = half * (ww1(i, j, ivy) + ww2(i, j, ivy))
+                wsurf = half * (ww1(i, j, ivz) + ww2(i, j, ivz))
+                m2surf = rsurf * (usurf**2 + vsurf**2 + wsurf**2) &
+                         / (half * (gamma1(i, j) + gamma2(i, j)) * psurf)
 
                 nn = nn + 1
                 buffer(nn) = sqrt(m2surf)
@@ -1968,13 +1968,13 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
         do j = rangeFace(2, 1), rangeFace(2, 2)
             do i = rangeFace(1, 1), rangeFace(1, 2)
 
-                psurf = half*(pp1(i, j) + pp2(i, j))
-                rsurf = half*(ww1(i, j, irho) + ww2(i, j, irho))
-                usurf = half*(ww1(i, j, ivx) + ww2(i, j, ivx)) - half*(ss1(i, j, 1) + ss2(i, j, 1))
-                vsurf = half*(ww1(i, j, ivy) + ww2(i, j, ivy)) - half*(ss1(i, j, 2) + ss2(i, j, 2))
-                wsurf = half*(ww1(i, j, ivz) + ww2(i, j, ivz)) - half*(ss1(i, j, 3) + ss2(i, j, 3))
-                m2surf = rsurf*(usurf**2 + vsurf**2 + wsurf**2) &
-                         /(half*(gamma1(i, j) + gamma2(i, j))*psurf)
+                psurf = half * (pp1(i, j) + pp2(i, j))
+                rsurf = half * (ww1(i, j, irho) + ww2(i, j, irho))
+                usurf = half * (ww1(i, j, ivx) + ww2(i, j, ivx)) - half * (ss1(i, j, 1) + ss2(i, j, 1))
+                vsurf = half * (ww1(i, j, ivy) + ww2(i, j, ivy)) - half * (ss1(i, j, 2) + ss2(i, j, 2))
+                wsurf = half * (ww1(i, j, ivz) + ww2(i, j, ivz)) - half * (ss1(i, j, 3) + ss2(i, j, 3))
+                m2surf = rsurf * (usurf**2 + vsurf**2 + wsurf**2) &
+                         / (half * (gamma1(i, j) + gamma2(i, j)) * psurf)
 
                 nn = nn + 1
                 buffer(nn) = sqrt(m2surf)
@@ -1992,7 +1992,7 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
         ! Multiplication factor to obtain the skin friction from
         ! the wall shear stress.
 
-        fact = two/(gammaInf*pInf*MachCoef*MachCoef)
+        fact = two / (gammaInf * pInf * MachCoef * MachCoef)
 
         ! Loop over the given range of faces. As the viscous data is
         ! only present in the owned faces, the values of the halo's
@@ -2056,15 +2056,15 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
                 norm(2) = BCData(mm)%norm(ii, jj, 2)
                 norm(3) = BCData(mm)%norm(ii, jj, 3)
 
-                fx = -(tauxx*norm(1) + tauxy*norm(2) + tauxz*norm(3))
-                fy = -(tauxy*norm(1) + tauyy*norm(2) + tauyz*norm(3))
-                fz = -(tauxz*norm(1) + tauyz*norm(2) + tauzz*norm(3))
+                fx = -(tauxx * norm(1) + tauxy * norm(2) + tauxz * norm(3))
+                fy = -(tauxy * norm(1) + tauyy * norm(2) + tauyz * norm(3))
+                fz = -(tauxz * norm(1) + tauyz * norm(2) + tauzz * norm(3))
 
-                fn = fx*norm(1) + fy*norm(2) + fz*norm(3)
+                fn = fx * norm(1) + fy * norm(2) + fz * norm(3)
 
-                fx = fx - fn*norm(1)
-                fy = fy - fn*norm(2)
-                fz = fz - fn*norm(3)
+                fx = fx - fn * norm(1)
+                fy = fy - fn * norm(2)
+                fz = fz - fn * norm(3)
 
                 ! Determine the variable to be stored and compute it.
                 ! Note that an offset of -1 must be used in dd2Wall,
@@ -2075,22 +2075,22 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
 
                 select case (solName)
                 case (cgnsSkinFmag)
-                    buffer(nn) = fact*sqrt(fx*fx + fy*fy + fz*fz)
+                    buffer(nn) = fact * sqrt(fx * fx + fy * fy + fz * fz)
 
                 case (cgnsSkinFx)
-                    buffer(nn) = fact*fx
+                    buffer(nn) = fact * fx
 
                 case (cgnsSkinFy)
-                    buffer(nn) = fact*fy
+                    buffer(nn) = fact * fy
 
                 case (cgnsSkinFz)
-                    buffer(nn) = fact*fz
+                    buffer(nn) = fact * fz
 
                 case (cgnsYplus)
-                    rsurf = half*(ww1(ii, jj, irho) + ww2(ii, jj, irho))
-                    musurf = half*(rlv1(ii, jj) + rlv2(ii, jj))
-                    buffer(nn) = sqrt(rsurf*sqrt(fx*fx + fy*fy + fz*fz)) &
-                                 *dd2Wall(ii - 1, jj - 1)/musurf
+                    rsurf = half * (ww1(ii, jj, irho) + ww2(ii, jj, irho))
+                    musurf = half * (rlv1(ii, jj) + rlv2(ii, jj))
+                    buffer(nn) = sqrt(rsurf * sqrt(fx * fx + fy * fy + fz * fz)) &
+                                 * dd2Wall(ii - 1, jj - 1) / musurf
                 end select
 
             end do
@@ -2103,9 +2103,9 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
         ! Some constants needed to compute the stanton number.
 
         gm1 = gammaInf - one
-        a2Tot = gammaInf*pInf*(one + half*gm1*MachCoef*MachCoef) &
-                /rhoInf
-        fact = MachCoef*sqrt(gammaInf*pInf*rhoInf)/gm1
+        a2Tot = gammaInf * pInf * (one + half * gm1 * MachCoef * MachCoef) &
+                / rhoInf
+        fact = MachCoef * sqrt(gammaInf * pInf * rhoInf) / gm1
 
         ! Loop over the given range of faces. As the viscous data is
         ! only present in the owned faces, the values of the halo's
@@ -2151,19 +2151,19 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
                 ! Compute the heat flux. Multipy with the sign of the
                 ! normal to obtain the correct value.
 
-                qw = viscSubface(mm)%q(ii, jj, 1)*BCData(mm)%norm(ii, jj, 1) &
-                     + viscSubface(mm)%q(ii, jj, 2)*BCData(mm)%norm(ii, jj, 2) &
-                     + viscSubface(mm)%q(ii, jj, 3)*BCData(mm)%norm(ii, jj, 3)
+                qw = viscSubface(mm)%q(ii, jj, 1) * BCData(mm)%norm(ii, jj, 1) &
+                     + viscSubface(mm)%q(ii, jj, 2) * BCData(mm)%norm(ii, jj, 2) &
+                     + viscSubface(mm)%q(ii, jj, 3) * BCData(mm)%norm(ii, jj, 3)
 
                 ! Compute the speed of sound squared at the wall and
                 ! the stanton number, which is stored in buffer.
 
-                a2 = half*(gamma1(ii, jj) + gamma2(ii, jj)) &
-                     *(pp1(ii, jj) + pp2(ii, jj)) &
-                     /(ww1(ii, jj, irho) + ww2(ii, jj, irho))
+                a2 = half * (gamma1(ii, jj) + gamma2(ii, jj)) &
+                     * (pp1(ii, jj) + pp2(ii, jj)) &
+                     / (ww1(ii, jj, irho) + ww2(ii, jj, irho))
 
                 nn = nn + 1
-                buffer(nn) = qw/(fact*(a2Tot - a2))
+                buffer(nn) = qw / (fact * (a2Tot - a2))
 
             end do
         end do
@@ -2196,28 +2196,28 @@ subroutine storeSurfsolInBuffer(sps, buffer, nn, blockID, &
                 v(3) = ww2(i, j, ivz)
 
                 ! Normalize
-                v = v/(sqrt(v(1)**2 + v(2)**2 + v(3)**2) + 1e-16)
+                v = v / (sqrt(v(1)**2 + v(2)**2 + v(3)**2) + 1e-16)
 
                 ! Dot product with free stream
                 sensor = -dot_product(v, velDirFreeStream)
 
                 !Now run through a smooth heaviside function:
-                sensor = one/(one + exp(-2*sepSensorSharpness*(sensor - sepSensorOffset)))
+                sensor = one / (one + exp(-2 * sepSensorSharpness * (sensor - sepSensorOffset)))
                 buffer(nn) = sensor
             end do
         end do
 
         case (cgnsCavitation)
-        fact = two/(gammaInf*pInf*MachCoef*MachCoef)
+        fact = two / (gammaInf * pInf * MachCoef * MachCoef)
         do j = rangeFace(2, 1), rangeFace(2, 2)
             do i = rangeFace(1, 1), rangeFace(1, 2)
 
                 nn = nn + 1
                 ! Get local pressure
-                plocal = half*(pp1(i, j) + pp2(i, j))
+                plocal = half * (pp1(i, j) + pp2(i, j))
 
-                sensor1 = (-(fact)*(plocal - pInf)) - cavitationnumber
-                sensor1 = (sensor1**cavExponent)/(one + exp(2*cavSensorSharpness*(-sensor1 + cavSensorOffset)))
+                sensor1 = (-(fact) * (plocal - pInf)) - cavitationnumber
+                sensor1 = (sensor1**cavExponent) / (one + exp(2 * cavSensorSharpness * (-sensor1 + cavSensorOffset)))
                 buffer(nn) = sensor1
                 !print*, sensor
             end do
@@ -2328,7 +2328,7 @@ subroutine describeScheme(string)
 
         if (spaceDiscr == dissScalar) then
             if (dirScaling) then
-                write (string, "(2(A, 1X), ES12.5, A)") trim(string), "Directional scaling of dissipation with exponent", adis, "."
+     write (string, "(2(A, 1X), ES12.5, A)") trim(string), "Directional scaling of dissipation with exponent", adis, "."
             else
                 write (string, stringSpace) trim(string), "No directional scaling of dissipation."
             end if
@@ -2349,7 +2349,7 @@ subroutine describeScheme(string)
                 write (string, stringSpace) trim(string), "Quadratic extrapolation of normal pressure gradIent", &
                     "for inviscid wall boundary conditions."
             case (normalMomentum)
-                write (string, stringSpace) trim(string), "Normal momentum equation used to determine pressure gradient", &
+             write (string, stringSpace) trim(string), "Normal momentum equation used to determine pressure gradient", &
                     "for inviscid wall boundary conditions."
             end select
         end if
@@ -2652,7 +2652,7 @@ subroutine setHelpVariablesWriting
         integer(kind=intType) :: i, nn
 
         integer(kind=intType), dimension(cgnsNDom) :: tmp
-        integer(kind=intType), dimension(4, nDom)   :: buffer
+        integer(kind=intType), dimension(4, nDom) :: buffer
 
         ! Determine for each CGNS block how many (sub) blocks are stored
         ! on this processor. Note that this info is the same for all
@@ -2742,8 +2742,8 @@ subroutine writeCGNSHeader(cgnsInd, base)
         real(kind=cgnsRealType) :: val
 
         character(len=2048) :: message
-        character(len=7)    :: integerString
-        character(len=12)   :: realString
+        character(len=7) :: integerString
+        character(len=12) :: realString
 
         ! Set the cgns real type.
 
@@ -3561,7 +3561,7 @@ subroutine writeCGNSReferenceState(cgnsInd, cgnsBase)
                                       1, int(1, cgsize_t), val, ierr)
 
             case (4_intType)
-                val = sqrt(pref/rhoref)
+                val = sqrt(pref / rhoref)
                 call cg_array_write_f(cgnsVelocity, realTypeCGNS, &
                                       1, int(1, cgsize_t), val, ierr)
 
diff --git a/src/preprocessing/preprocessingAPI.F90 b/src/preprocessing/preprocessingAPI.F90
index 9af113422..e583d0bc2 100644
--- a/src/preprocessing/preprocessingAPI.F90
+++ b/src/preprocessing/preprocessingAPI.F90
@@ -501,8 +501,8 @@ subroutine determineNcellGlobal(level)
         nCellLocal = 0
         do nn = 1, nDom
             nCellLocal = nCellLocal + flowDoms(nn, level, 1)%nx &
-                         *flowDoms(nn, level, 1)%ny &
-                         *flowDoms(nn, level, 1)%nz
+                         * flowDoms(nn, level, 1)%ny &
+                         * flowDoms(nn, level, 1)%nz
         end do
 
         ! And determine the global sum.
@@ -954,14 +954,14 @@ subroutine checkSymmetry(level)
                         ! is outward pointing by multiplying it by mult;
                         ! mult is either 1.0 or -1.0.
 
-                        fact = sqrt(faceNorm(1)*faceNorm(1) &
-                                    + faceNorm(2)*faceNorm(2) &
-                                    + faceNorm(3)*faceNorm(3))
-                        if (fact > zero) fact = mult/fact
+                        fact = sqrt(faceNorm(1) * faceNorm(1) &
+                                    + faceNorm(2) * faceNorm(2) &
+                                    + faceNorm(3) * faceNorm(3))
+                        if (fact > zero) fact = mult / fact
 
-                        faceNorm(1) = faceNorm(1)*fact
-                        faceNorm(2) = faceNorm(2)*fact
-                        faceNorm(3) = faceNorm(3)*fact
+                        faceNorm(1) = faceNorm(1) * fact
+                        faceNorm(2) = faceNorm(2) * fact
+                        faceNorm(3) = faceNorm(3) * fact
 
                         ! Check if the symmetry plane is really planar. This is
                         ! only done on the finest mesh and for the 1st spectral
@@ -985,9 +985,9 @@ subroutine checkSymmetry(level)
                                     ! Compute the dot product between the normal of
                                     ! this face and the averaged normal of the plane.
 
-                                    dot = BCData(mm)%norm(i, j, 1)*faceNorm(1) &
-                                          + BCData(mm)%norm(i, j, 2)*faceNorm(2) &
-                                          + BCData(mm)%norm(i, j, 3)*faceNorm(3)
+                                    dot = BCData(mm)%norm(i, j, 1) * faceNorm(1) &
+                                          + BCData(mm)%norm(i, j, 2) * faceNorm(2) &
+                                          + BCData(mm)%norm(i, j, 3) * faceNorm(3)
 
                                     ! And determine the minimum of dot and dotMin
 
@@ -1004,7 +1004,7 @@ subroutine checkSymmetry(level)
                                 ! Determine the corresponding angle in degrees of
                                 ! dotmin.
 
-                                fact = acos(dotMin)*180.0_realType/pi
+                                fact = acos(dotMin) * 180.0_realType / pi
 
                                 ! Store the corresponding cgns block id and the
                                 ! subface in this block a bit easier.
@@ -1018,7 +1018,7 @@ subroutine checkSymmetry(level)
                                 print "(a)", "#                      Warning"
                                 print stringSpace, "# Symmetry boundary face", trim(cgnsDoms(i)%bocoInfo(j)%bocoName), &
                                     "of zone", trim(cgnsDoms(i)%zonename), "is not planar."
-                                write (*, stringSci5) "# Maximum deviation from the mean normal: ", real(fact), " degrees"
+                              write (*, stringSci5) "# Maximum deviation from the mean normal: ", real(fact), " degrees"
                                 print "(a)", "#"
 
                             end if
@@ -1098,13 +1098,13 @@ subroutine xhalo(level)
 
                 do k = 1, kl
                     do j = 1, jl
-                        x(0, j, k, 1) = two*x(1, j, k, 1) - x(2, j, k, 1)
-                        x(0, j, k, 2) = two*x(1, j, k, 2) - x(2, j, k, 2)
-                        x(0, j, k, 3) = two*x(1, j, k, 3) - x(2, j, k, 3)
+                        x(0, j, k, 1) = two * x(1, j, k, 1) - x(2, j, k, 1)
+                        x(0, j, k, 2) = two * x(1, j, k, 2) - x(2, j, k, 2)
+                        x(0, j, k, 3) = two * x(1, j, k, 3) - x(2, j, k, 3)
 
-                        x(ie, j, k, 1) = two*x(il, j, k, 1) - x(nx, j, k, 1)
-                        x(ie, j, k, 2) = two*x(il, j, k, 2) - x(nx, j, k, 2)
-                        x(ie, j, k, 3) = two*x(il, j, k, 3) - x(nx, j, k, 3)
+                        x(ie, j, k, 1) = two * x(il, j, k, 1) - x(nx, j, k, 1)
+                        x(ie, j, k, 2) = two * x(il, j, k, 2) - x(nx, j, k, 2)
+                        x(ie, j, k, 3) = two * x(il, j, k, 3) - x(nx, j, k, 3)
                     end do
                 end do
 
@@ -1112,13 +1112,13 @@ subroutine xhalo(level)
 
                 do k = 1, kl
                     do i = 0, ie
-                        x(i, 0, k, 1) = two*x(i, 1, k, 1) - x(i, 2, k, 1)
-                        x(i, 0, k, 2) = two*x(i, 1, k, 2) - x(i, 2, k, 2)
-                        x(i, 0, k, 3) = two*x(i, 1, k, 3) - x(i, 2, k, 3)
+                        x(i, 0, k, 1) = two * x(i, 1, k, 1) - x(i, 2, k, 1)
+                        x(i, 0, k, 2) = two * x(i, 1, k, 2) - x(i, 2, k, 2)
+                        x(i, 0, k, 3) = two * x(i, 1, k, 3) - x(i, 2, k, 3)
 
-                        x(i, je, k, 1) = two*x(i, jl, k, 1) - x(i, ny, k, 1)
-                        x(i, je, k, 2) = two*x(i, jl, k, 2) - x(i, ny, k, 2)
-                        x(i, je, k, 3) = two*x(i, jl, k, 3) - x(i, ny, k, 3)
+                        x(i, je, k, 1) = two * x(i, jl, k, 1) - x(i, ny, k, 1)
+                        x(i, je, k, 2) = two * x(i, jl, k, 2) - x(i, ny, k, 2)
+                        x(i, je, k, 3) = two * x(i, jl, k, 3) - x(i, ny, k, 3)
                     end do
                 end do
 
@@ -1126,13 +1126,13 @@ subroutine xhalo(level)
 
                 do j = 0, je
                     do i = 0, ie
-                        x(i, j, 0, 1) = two*x(i, j, 1, 1) - x(i, j, 2, 1)
-                        x(i, j, 0, 2) = two*x(i, j, 1, 2) - x(i, j, 2, 2)
-                        x(i, j, 0, 3) = two*x(i, j, 1, 3) - x(i, j, 2, 3)
+                        x(i, j, 0, 1) = two * x(i, j, 1, 1) - x(i, j, 2, 1)
+                        x(i, j, 0, 2) = two * x(i, j, 1, 2) - x(i, j, 2, 2)
+                        x(i, j, 0, 3) = two * x(i, j, 1, 3) - x(i, j, 2, 3)
 
-                        x(i, j, ke, 1) = two*x(i, j, kl, 1) - x(i, j, nz, 1)
-                        x(i, j, ke, 2) = two*x(i, j, kl, 2) - x(i, j, nz, 2)
-                        x(i, j, ke, 3) = two*x(i, j, kl, 3) - x(i, j, nz, 3)
+                        x(i, j, ke, 1) = two * x(i, j, kl, 1) - x(i, j, nz, 1)
+                        x(i, j, ke, 2) = two * x(i, j, kl, 2) - x(i, j, nz, 2)
+                        x(i, j, ke, 3) = two * x(i, j, kl, 3) - x(i, j, nz, 3)
                     end do
                 end do
                 !
@@ -1200,9 +1200,9 @@ subroutine xhalo(level)
                         ! Determine the normal of the face by taking the cross
                         ! product of v1 and v2 and add it to norm.
 
-                        norm(1) = v1(2)*v2(3) - v1(3)*v2(2)
-                        norm(2) = v1(3)*v2(1) - v1(1)*v2(3)
-                        norm(3) = v1(1)*v2(2) - v1(2)*v2(1)
+                        norm(1) = v1(2) * v2(3) - v1(3) * v2(2)
+                        norm(2) = v1(3) * v2(1) - v1(1) * v2(3)
+                        norm(3) = v1(1) * v2(2) - v1(2) * v2(1)
 
                         ! Check if BCData is allocated yet:
                         if (.not. bcData(mm)%symNormSet) then
@@ -1210,9 +1210,9 @@ subroutine xhalo(level)
                             if (length == 0) then
                                 length = eps
                             end if
-                            bcData(mm)%symNorm(1) = norm(1)/length
-                            bcData(mm)%symNorm(2) = norm(2)/length
-                            bcData(mm)%symNorm(3) = norm(3)/length
+                            bcData(mm)%symNorm(1) = norm(1) / length
+                            bcData(mm)%symNorm(2) = norm(2) / length
+                            bcData(mm)%symNorm(3) = norm(3) / length
                             bcData(mm)%symNormSet = .True.
                         else
 
@@ -1220,7 +1220,7 @@ subroutine xhalo(level)
                             ! different from the stored one:
                             length = sqrt(norm(1)**2 + norm(2)**2 + norm(3)**2)
                             if (length > eps) then
-                                tmp = norm/length
+                                tmp = norm / length
                                 tmp2 = bcData(mm)%symNorm
                                 dot = dot_product(tmp, tmp2)
                                 if (abs(dot) < tolDotmin) then
@@ -1249,9 +1249,9 @@ subroutine xhalo(level)
 
                             ! Compute the unit normal of the subface.
 
-                            norm(1) = norm(1)/length
-                            norm(2) = norm(2)/length
-                            norm(3) = norm(3)/length
+                            norm(1) = norm(1) / length
+                            norm(2) = norm(2) / length
+                            norm(3) = norm(3) / length
 
                             ! Add an overlap to the symmetry subface if the
                             ! boundaries coincide with the block boundaries.
@@ -1282,12 +1282,12 @@ subroutine xhalo(level)
                                     ! coordinates of the internal node to obtain the
                                     ! halo coordinates. Again the offset of +1.
 
-                                    dot = two*(v1(1)*norm(1) + v1(2)*norm(2) &
-                                               + v1(3)*norm(3))
+                                    dot = two * (v1(1) * norm(1) + v1(2) * norm(2) &
+                                                 + v1(3) * norm(3))
 
-                                    x0(i + 1, j + 1, 1) = x2(i + 1, j + 1, 1) + dot*norm(1)
-                                    x0(i + 1, j + 1, 2) = x2(i + 1, j + 1, 2) + dot*norm(2)
-                                    x0(i + 1, j + 1, 3) = x2(i + 1, j + 1, 3) + dot*norm(3)
+                                    x0(i + 1, j + 1, 1) = x2(i + 1, j + 1, 1) + dot * norm(1)
+                                    x0(i + 1, j + 1, 2) = x2(i + 1, j + 1, 2) + dot * norm(2)
+                                    x0(i + 1, j + 1, 3) = x2(i + 1, j + 1, 3) + dot * norm(3)
 
                                 end do
                             end do
@@ -1371,7 +1371,7 @@ subroutine setSurfaceFamilyInfo
                             write (*, "(2(A, I4), *(A))") "CGNS Block ", i, ", boundary condition ", j, ", of type ", &
                                 trim(BCTypeName(cgnsDoms(i)%bocoInfo(j)%BCTypeCGNS)), &
                                 " does not have a family. Based on the boundary condition type,", &
-                                " a name of: '", trim(defaultFamName(cgnsDoms(i)%bocoInfo(j)%BCTypeCGNS)), "' will be used."
+                            " a name of: '", trim(defaultFamName(cgnsDoms(i)%bocoInfo(j)%BCTypeCGNS)), "' will be used."
                         end if
                         cgnsDoms(i)%bocoInfo(j)%wallBCName = trim(defaultFamName(cgnsDoms(i)%bocoInfo(j)%BCTypeCGNS))
                     end if
@@ -1744,8 +1744,8 @@ subroutine createNodeScatterForFamilies(famList, exch, sps, localIndices)
 
         ! Send all the nodes to everyone
         allocate (allNodes(3, nNodesTotal))
-        call mpi_allgatherv(localNodes, nNodesLocal*3, adflow_real, allNodes, &
-                            nNodesProc*3, cumNodesProc*3, adflow_real, adflow_comm_world, ierr)
+        call mpi_allgatherv(localNodes, nNodesLocal * 3, adflow_real, allNodes, &
+                            nNodesProc * 3, cumNodesProc * 3, adflow_real, adflow_comm_world, ierr)
         call EChk(ierr, __FILE__, __LINE__)
 
         ! Local nodes is no longer necessary
@@ -1962,8 +1962,8 @@ subroutine setGlobalCellsAndNodes(level)
         do nn = 1, nDom
             ! Set to first spectral instance since we only need sizes
             call setPointers(nn, level, 1_intType)
-            nCellsLocal(level) = nCellsLocal(level) + nx*ny*nz
-            nNodesLocal(level) = nNodesLocal(level) + il*jl*kl
+            nCellsLocal(level) = nCellsLocal(level) + nx * ny * nz
+            nNodesLocal(level) = nNodesLocal(level) + il * jl * kl
         end do
 
         ! Reduce the number of cells in all processors: add up nCellsLocal
@@ -2005,7 +2005,7 @@ subroutine setGlobalCellsAndNodes(level)
         do nn = 2, nDom
             call setPointers(nn - 1, level, 1)
             nCellBlockOffset(level, nn) = nCellBlockOffset(level, nn - 1) &
-                                          + nx*ny*nz
+                                          + nx * ny * nz
         end do
 
         ! Repeat for nodes.
@@ -2016,7 +2016,7 @@ subroutine setGlobalCellsAndNodes(level)
         nNodeBlockOffset(1) = nNodeOffsetLocal(level)
         do nn = 2, nDom
             call setPointers(nn - 1, level, 1)
-            nNodeBlockOffset(nn) = nNodeBLockOffset(nn - 1) + il*jl*kl
+            nNodeBlockOffset(nn) = nNodeBLockOffset(nn - 1) + il * jl * kl
         end do
 
         ! Determine the global block row index for each (i,j,k) cell in
@@ -2032,8 +2032,8 @@ subroutine setGlobalCellsAndNodes(level)
                             ! instances of a give block adjacent to each other in
                             ! the matrix
                             globalCell(i, j, k) = &
-                                nCellBLockOffset(level, nn)*nTimeIntervalsSpectral + nx*ny*nz*(sps - 1) + &
-                                (i - 2) + (j - 2)*nx + (k - 2)*nx*ny
+                                nCellBLockOffset(level, nn) * nTimeIntervalsSpectral + nx * ny * nz * (sps - 1) + &
+                                (i - 2) + (j - 2) * nx + (k - 2) * nx * ny
                         end do
                     end do
                 end do
@@ -2052,8 +2052,8 @@ subroutine setGlobalCellsAndNodes(level)
                             !instances of a give block adjacent to each other in
                             !the matrix
                             globalNode(i, j, k) = &
-                                nNodeBLockOffset(nn)*nTimeIntervalsSpectral + &
-                                il*jl*kl*(sps - 1) + (i - 1) + (j - 1)*il + (k - 1)*il*jl
+                                nNodeBLockOffset(nn) * nTimeIntervalsSpectral + &
+                                il * jl * kl * (sps - 1) + (i - 1) + (j - 1) * il + (k - 1) * il * jl
 
                         end do
                     end do
@@ -2131,7 +2131,7 @@ subroutine setFamilyInfoFaces(level)
         ! followed by the families.
 
         if (monMassSliding) then
-            ii = 2*cgnsNSliding
+            ii = 2 * cgnsNSliding
         else
             ii = 0
         end if
@@ -2234,7 +2234,7 @@ subroutine setFamilyInfoFaces(level)
                     ! set the index to 0.
 
                     if (monMassSliding) then
-                        ii = 2*abs(groupNum(mm))
+                        ii = 2 * abs(groupNum(mm))
                         if (groupNum(mm) < 0) ii = ii - 1
                     else
                         ii = 0
@@ -2851,18 +2851,18 @@ subroutine metric(level)
 
                             ! Compute the coordinates of the center of gravity.
 
-                            xp = eighth*(x(i, j, k, 1) + x(i, m, k, 1) &
-                                         + x(i, m, n, 1) + x(i, j, n, 1) &
-                                         + x(l, j, k, 1) + x(l, m, k, 1) &
-                                         + x(l, m, n, 1) + x(l, j, n, 1))
-                            yp = eighth*(x(i, j, k, 2) + x(i, m, k, 2) &
-                                         + x(i, m, n, 2) + x(i, j, n, 2) &
-                                         + x(l, j, k, 2) + x(l, m, k, 2) &
-                                         + x(l, m, n, 2) + x(l, j, n, 2))
-                            zp = eighth*(x(i, j, k, 3) + x(i, m, k, 3) &
-                                         + x(i, m, n, 3) + x(i, j, n, 3) &
-                                         + x(l, j, k, 3) + x(l, m, k, 3) &
-                                         + x(l, m, n, 3) + x(l, j, n, 3))
+                            xp = eighth * (x(i, j, k, 1) + x(i, m, k, 1) &
+                                           + x(i, m, n, 1) + x(i, j, n, 1) &
+                                           + x(l, j, k, 1) + x(l, m, k, 1) &
+                                           + x(l, m, n, 1) + x(l, j, n, 1))
+                            yp = eighth * (x(i, j, k, 2) + x(i, m, k, 2) &
+                                           + x(i, m, n, 2) + x(i, j, n, 2) &
+                                           + x(l, j, k, 2) + x(l, m, k, 2) &
+                                           + x(l, m, n, 2) + x(l, j, n, 2))
+                            zp = eighth * (x(i, j, k, 3) + x(i, m, k, 3) &
+                                           + x(i, m, n, 3) + x(i, j, n, 3) &
+                                           + x(l, j, k, 3) + x(l, m, k, 3) &
+                                           + x(l, m, n, 3) + x(l, j, n, 3))
 
                             ! Compute the volumes of the 6 sub pyramids. The
                             ! arguments of volpym must be such that for a (regular)
@@ -2902,7 +2902,7 @@ subroutine metric(level)
                             ! pyramid. Remember that volpym computes 6 times the
                             ! volume.
 
-                            vol(i, j, k) = sixth*(vp1 + vp2 + vp3 + vp4 + vp5 + vp6)
+                            vol(i, j, k) = sixth * (vp1 + vp2 + vp3 + vp4 + vp5 + vp6)
 
                             ! Check the volume and update the number of positive
                             ! and negative volumes if needed.
@@ -2924,22 +2924,22 @@ subroutine metric(level)
 
                                 ! Set the threshold for the volume quality.
 
-                                fact = thresVolume*abs(vol(i, j, k))
+                                fact = thresVolume * abs(vol(i, j, k))
 
                                 ! Check the quality of the volume.
 
                                 badVolume = .false.
-                                if (vp1*vol(i, j, k) < zero .and. &
+                                if (vp1 * vol(i, j, k) < zero .and. &
                                     abs(vp1) > fact) badVolume = .true.
-                                if (vp2*vol(i, j, k) < zero .and. &
+                                if (vp2 * vol(i, j, k) < zero .and. &
                                     abs(vp2) > fact) badVolume = .true.
-                                if (vp3*vol(i, j, k) < zero .and. &
+                                if (vp3 * vol(i, j, k) < zero .and. &
                                     abs(vp3) > fact) badVolume = .true.
-                                if (vp4*vol(i, j, k) < zero .and. &
+                                if (vp4 * vol(i, j, k) < zero .and. &
                                     abs(vp4) > fact) badVolume = .true.
-                                if (vp5*vol(i, j, k) < zero .and. &
+                                if (vp5 * vol(i, j, k) < zero .and. &
                                     abs(vp5) > fact) badVolume = .true.
-                                if (vp6*vol(i, j, k) < zero .and. &
+                                if (vp6 * vol(i, j, k) < zero .and. &
                                     abs(vp6) > fact) badVolume = .true.
 
                                 ! Update nVolBad if this is a bad volume.
@@ -2960,10 +2960,10 @@ subroutine metric(level)
 
                 do k = 2, kl
                     do j = 2, jl
-                        if (vol(1, j, k)/vol(2, j, k) < haloCellRatio) then
+                        if (vol(1, j, k) / vol(2, j, k) < haloCellRatio) then
                             vol(1, j, k) = vol(2, j, k)
                         end if
-                        if (vol(ie, j, k)/vol(il, j, k) < haloCellRatio) then
+                        if (vol(ie, j, k) / vol(il, j, k) < haloCellRatio) then
                             vol(ie, j, k) = vol(il, j, k)
                         end if
                     end do
@@ -2971,10 +2971,10 @@ subroutine metric(level)
 
                 do k = 2, kl
                     do i = 1, ie
-                        if (vol(i, 1, k)/vol(i, 2, k) < haloCellRatio) then
+                        if (vol(i, 1, k) / vol(i, 2, k) < haloCellRatio) then
                             vol(i, 1, k) = vol(i, 2, k)
                         end if
-                        if (vol(i, je, k)/voL(i, jl, k) < haloCellRatio) then
+                        if (vol(i, je, k) / voL(i, jl, k) < haloCellRatio) then
                             vol(i, je, k) = vol(i, jl, k)
                         end if
                     end do
@@ -2982,10 +2982,10 @@ subroutine metric(level)
 
                 do j = 1, je
                     do i = 1, ie
-                        if (vol(i, j, 1)/vol(i, j, 2) < haloCellRatio) then
+                        if (vol(i, j, 1) / vol(i, j, 2) < haloCellRatio) then
                             vol(i, j, 1) = vol(i, j, 2)
                         end if
-                        if (vol(i, j, ke)/vol(i, j, kl) < haloCellRatio) then
+                        if (vol(i, j, ke) / vol(i, j, kl) < haloCellRatio) then
                             vol(i, j, ke) = vol(i, j, kl)
                         end if
                     end do
@@ -3063,9 +3063,9 @@ subroutine metric(level)
                             ! diagonal vectors times fact; remember that fact is
                             ! either -0.5 or 0.5.
 
-                            si(i, j, k, 1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-                            si(i, j, k, 2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-                            si(i, j, k, 3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
+                            si(i, j, k, 1) = fact * (v1(2) * v2(3) - v1(3) * v2(2))
+                            si(i, j, k, 2) = fact * (v1(3) * v2(1) - v1(1) * v2(3))
+                            si(i, j, k, 3) = fact * (v1(1) * v2(2) - v1(2) * v2(1))
 
                         end do
                     end do
@@ -3093,9 +3093,9 @@ subroutine metric(level)
                             ! diagonal vectors times fact; remember that fact is
                             ! either -0.5 or 0.5.
 
-                            sj(i, j, k, 1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-                            sj(i, j, k, 2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-                            sj(i, j, k, 3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
+                            sj(i, j, k, 1) = fact * (v1(2) * v2(3) - v1(3) * v2(2))
+                            sj(i, j, k, 2) = fact * (v1(3) * v2(1) - v1(1) * v2(3))
+                            sj(i, j, k, 3) = fact * (v1(1) * v2(2) - v1(2) * v2(1))
 
                         end do
                     end do
@@ -3123,9 +3123,9 @@ subroutine metric(level)
                             ! diagonal vectors times fact; remember that fact is
                             ! either -0.5 or 0.5.
 
-                            sk(i, j, k, 1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-                            sk(i, j, k, 2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-                            sk(i, j, k, 3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
+                            sk(i, j, k, 1) = fact * (v1(2) * v2(3) - v1(3) * v2(2))
+                            sk(i, j, k, 2) = fact * (v1(3) * v2(1) - v1(1) * v2(3))
+                            sk(i, j, k, 3) = fact * (v1(1) * v2(2) - v1(2) * v2(1))
 
                         end do
                     end do
@@ -3173,14 +3173,14 @@ subroutine metric(level)
                             ! and possibly correct for inward pointing.
 
                             xp = ss(i, j, 1); yp = ss(i, j, 2); zp = ss(i, j, 3)
-                            fact = sqrt(xp*xp + yp*yp + zp*zp)
-                            if (fact > zero) fact = mult/fact
+                            fact = sqrt(xp * xp + yp * yp + zp * zp)
+                            if (fact > zero) fact = mult / fact
 
                             ! Compute the unit normal.
 
-                            BCData(mm)%norm(i, j, 1) = fact*xp
-                            BCData(mm)%norm(i, j, 2) = fact*yp
-                            BCData(mm)%norm(i, j, 3) = fact*zp
+                            BCData(mm)%norm(i, j, 1) = fact * xp
+                            BCData(mm)%norm(i, j, 2) = fact * yp
+                            BCData(mm)%norm(i, j, 3) = fact * zp
 
                         end do
                     end do
@@ -3216,31 +3216,31 @@ subroutine metric(level)
                                 ! Store the inverse of the sum of the areas of the
                                 ! six faces in fact.
 
-                                fact = one/(sqrt(si(i, j, k, 1)*si(i, j, k, 1) &
-                                                 + si(i, j, k, 2)*si(i, j, k, 2) &
-                                                 + si(i, j, k, 3)*si(i, j, k, 3)) &
-                                            + sqrt(si(l, j, k, 1)*si(l, j, k, 1) &
-                                                   + si(l, j, k, 2)*si(l, j, k, 2) &
-                                                   + si(l, j, k, 3)*si(l, j, k, 3)) &
-                                            + sqrt(sj(i, j, k, 1)*sj(i, j, k, 1) &
-                                                   + sj(i, j, k, 2)*sj(i, j, k, 2) &
-                                                   + sj(i, j, k, 3)*sj(i, j, k, 3)) &
-                                            + sqrt(sj(i, m, k, 1)*sj(i, m, k, 1) &
-                                                   + sj(i, m, k, 2)*sj(i, m, k, 2) &
-                                                   + sj(i, m, k, 3)*sj(i, m, k, 3)) &
-                                            + sqrt(sk(i, j, k, 1)*sk(i, j, k, 1) &
-                                                   + sk(i, j, k, 2)*sk(i, j, k, 2) &
-                                                   + sk(i, j, k, 3)*sk(i, j, k, 3)) &
-                                            + sqrt(sk(i, j, n, 1)*sk(i, j, n, 1) &
-                                                   + sk(i, j, n, 2)*sk(i, j, n, 2) &
-                                                   + sk(i, j, n, 3)*sk(i, j, n, 3)))
+                                fact = one / (sqrt(si(i, j, k, 1) * si(i, j, k, 1) &
+                                                   + si(i, j, k, 2) * si(i, j, k, 2) &
+                                                   + si(i, j, k, 3) * si(i, j, k, 3)) &
+                                              + sqrt(si(l, j, k, 1) * si(l, j, k, 1) &
+                                                     + si(l, j, k, 2) * si(l, j, k, 2) &
+                                                     + si(l, j, k, 3) * si(l, j, k, 3)) &
+                                              + sqrt(sj(i, j, k, 1) * sj(i, j, k, 1) &
+                                                     + sj(i, j, k, 2) * sj(i, j, k, 2) &
+                                                     + sj(i, j, k, 3) * sj(i, j, k, 3)) &
+                                              + sqrt(sj(i, m, k, 1) * sj(i, m, k, 1) &
+                                                     + sj(i, m, k, 2) * sj(i, m, k, 2) &
+                                                     + sj(i, m, k, 3) * sj(i, m, k, 3)) &
+                                              + sqrt(sk(i, j, k, 1) * sk(i, j, k, 1) &
+                                                     + sk(i, j, k, 2) * sk(i, j, k, 2) &
+                                                     + sk(i, j, k, 3) * sk(i, j, k, 3)) &
+                                              + sqrt(sk(i, j, n, 1) * sk(i, j, n, 1) &
+                                                     + sk(i, j, n, 2) * sk(i, j, n, 2) &
+                                                     + sk(i, j, n, 3) * sk(i, j, n, 3)))
 
                                 ! Multiply v1 by fact to obtain a nonDimensional
                                 ! quantity and take tha absolute value of it.
 
-                                v1(1) = abs(v1(1)*fact)
-                                v1(2) = abs(v1(2)*fact)
-                                v1(3) = abs(v1(3)*fact)
+                                v1(1) = abs(v1(1) * fact)
+                                v1(2) = abs(v1(2) * fact)
+                                v1(3) = abs(v1(3) * fact)
 
                                 ! Check if the control volume is closed.
 
@@ -3356,12 +3356,12 @@ function volpym(xa, ya, za, xb, yb, zb, xc, yc, zc, xd, yd, zd)
             real(kind=realType), intent(in) :: xa, ya, za, xb, yb, zb
             real(kind=realType), intent(in) :: xc, yc, zc, xd, yd, zd
 
-            volpym = (xp - fourth*(xa + xb + xc + xd)) &
-                     *((ya - yc)*(zb - zd) - (za - zc)*(yb - yd)) + &
-                     (yp - fourth*(ya + yb + yc + yd)) &
-                     *((za - zc)*(xb - xd) - (xa - xc)*(zb - zd)) + &
-                     (zp - fourth*(za + zb + zc + zd)) &
-                     *((xa - xc)*(yb - yd) - (ya - yc)*(xb - xd))
+            volpym = (xp - fourth * (xa + xb + xc + xd)) &
+                     * ((ya - yc) * (zb - zd) - (za - zc) * (yb - yd)) + &
+                     (yp - fourth * (ya + yb + yc + yd)) &
+                     * ((za - zc) * (xb - xd) - (xa - xc) * (zb - zd)) + &
+                     (zp - fourth * (za + zb + zc + zd)) &
+                     * ((xa - xc) * (yb - yd) - (ya - yc) * (xb - xd))
 
         end function volpym
 
@@ -3472,14 +3472,14 @@ subroutine writeNegVolumes(checkVolDoms)
                                     do i = 2, il
                                         if (checkVolDoms(nn, sps)%volumeIsNeg(i, j, k)) then
 
-                                            xc(1:3) = eighth*(x(i - 1, j - 1, k - 1, 1:3) &
-                                                              + x(i, j - 1, k - 1, 1:3) &
-                                                              + x(i - 1, j, k - 1, 1:3) &
-                                                              + x(i, j, k - 1, 1:3) &
-                                                              + x(i - 1, j - 1, k, 1:3) &
-                                                              + x(i, j - 1, k, 1:3) &
-                                                              + x(i - 1, j, k, 1:3) &
-                                                              + x(i, j, k, 1:3))
+                                            xc(1:3) = eighth * (x(i - 1, j - 1, k - 1, 1:3) &
+                                                                + x(i, j - 1, k - 1, 1:3) &
+                                                                + x(i - 1, j, k - 1, 1:3) &
+                                                                + x(i, j, k - 1, 1:3) &
+                                                                + x(i - 1, j - 1, k, 1:3) &
+                                                                + x(i, j - 1, k, 1:3) &
+                                                                + x(i - 1, j, k, 1:3) &
+                                                                + x(i, j, k, 1:3))
 
                                             write (intString1, "(i10)") i
                                             write (intString2, "(i10)") j
@@ -3653,7 +3653,7 @@ subroutine computeRotMatrixFace(xx, rotMat, iil, jjl)
             !
             integer(kind=intType), intent(in) :: iil, jjl
 
-            real(kind=realType), dimension(:, :, :), intent(in)  :: xx
+            real(kind=realType), dimension(:, :, :), intent(in) :: xx
             real(kind=realType), dimension(2:, 2:, :, :), intent(out) :: rotMat
             !
             !        Local variables.
@@ -3672,23 +3672,23 @@ subroutine computeRotMatrixFace(xx, rotMat, iil, jjl)
                     ! Compute the coordinates of the face center relative to
                     ! the center of rotation.
 
-                    xF(1) = fourth*(xx(i - 1, j - 1, 1) + xx(i - 1, j, 1) &
-                                    + xx(i, j - 1, 1) + xx(i, j, 1)) - rotCenter(1)
-                    xF(2) = fourth*(xx(i - 1, j - 1, 2) + xx(i - 1, j, 2) &
-                                    + xx(i, j - 1, 2) + xx(i, j, 2)) - rotCenter(2)
-                    xF(3) = fourth*(xx(i - 1, j - 1, 3) + xx(i - 1, j, 3) &
-                                    + xx(i, j - 1, 3) + xx(i, j, 3)) - rotCenter(3)
+                    xF(1) = fourth * (xx(i - 1, j - 1, 1) + xx(i - 1, j, 1) &
+                                      + xx(i, j - 1, 1) + xx(i, j, 1)) - rotCenter(1)
+                    xF(2) = fourth * (xx(i - 1, j - 1, 2) + xx(i - 1, j, 2) &
+                                      + xx(i, j - 1, 2) + xx(i, j, 2)) - rotCenter(2)
+                    xF(3) = fourth * (xx(i - 1, j - 1, 3) + xx(i - 1, j, 3) &
+                                      + xx(i, j - 1, 3) + xx(i, j, 3)) - rotCenter(3)
 
                     ! Determine the two radial components for this point.
 
-                    r1 = xF(1)*vecR1(1) + xF(2)*vecR1(2) + xF(3)*vecR1(3)
-                    r2 = xF(1)*vecR2(1) + xF(2)*vecR2(2) + xF(3)*vecR2(3)
+                    r1 = xF(1) * vecR1(1) + xF(2) * vecR1(2) + xF(3) * vecR1(3)
+                    r2 = xF(1) * vecR2(1) + xF(2) * vecR2(2) + xF(3) * vecR2(3)
 
                     ! Determine the sine and cosine of the polar angle.
 
-                    rInv = one/sqrt(r1*r1 + r2*r2)
-                    cosTheta = r1*rInv
-                    sinTheta = r2*rInv
+                    rInv = one / sqrt(r1 * r1 + r2 * r2)
+                    cosTheta = r1 * rInv
+                    sinTheta = r2 * rInv
 
                     ! Compute the transformation matrix.
 
@@ -3696,13 +3696,13 @@ subroutine computeRotMatrixFace(xx, rotMat, iil, jjl)
                     rotMat(i, j, 1, 2) = axis(2)
                     rotMat(i, j, 1, 3) = axis(3)
 
-                    rotMat(i, j, 2, 1) = cosTheta*vecR1(1) + sinTheta*vecR2(1)
-                    rotMat(i, j, 2, 2) = cosTheta*vecR1(2) + sinTheta*vecR2(2)
-                    rotMat(i, j, 2, 3) = cosTheta*vecR1(3) + sinTheta*vecR2(3)
+                    rotMat(i, j, 2, 1) = cosTheta * vecR1(1) + sinTheta * vecR2(1)
+                    rotMat(i, j, 2, 2) = cosTheta * vecR1(2) + sinTheta * vecR2(2)
+                    rotMat(i, j, 2, 3) = cosTheta * vecR1(3) + sinTheta * vecR2(3)
 
-                    rotMat(i, j, 3, 1) = cosTheta*vecR2(1) - sinTheta*vecR1(1)
-                    rotMat(i, j, 3, 2) = cosTheta*vecR2(2) - sinTheta*vecR1(2)
-                    rotMat(i, j, 3, 3) = cosTheta*vecR2(3) - sinTheta*vecR1(3)
+                    rotMat(i, j, 3, 1) = cosTheta * vecR2(1) - sinTheta * vecR1(1)
+                    rotMat(i, j, 3, 2) = cosTheta * vecR2(2) - sinTheta * vecR1(2)
+                    rotMat(i, j, 3, 3) = cosTheta * vecR2(3) - sinTheta * vecR1(3)
 
                 end do
             end do
@@ -3727,7 +3727,7 @@ subroutine updateCoordinatesAllLevels
         !      Local variables.
         !
         integer(kind=intType) :: nLevels, nn
-        real(kind=realType)   :: origGroundLevel
+        real(kind=realType) :: origGroundLevel
 
         ! Determine the halo coordinates of the fine level.
         origGroundLevel = groundLevel
@@ -3807,7 +3807,7 @@ subroutine updateGridVelocitiesAllLevels
 
         !Local Variables
 
-        integer(kind=inttype):: mm, nnn
+        integer(kind=inttype) :: mm, nnn
 
         real(kind=realType), dimension(nSections) :: t
 
@@ -3822,8 +3822,8 @@ subroutine updateGridVelocitiesAllLevels
 
             if (equationMode == timeSpectral) then
                 do nnn = 1, nSections
-                    t(nnn) = t(nnn) + (mm - 1)*sections(nnn)%timePeriod &
-                             /real(nTimeIntervalsSpectral, realType)
+                    t(nnn) = t(nnn) + (mm - 1) * sections(nnn)%timePeriod &
+                             / real(nTimeIntervalsSpectral, realType)
                 end do
             end if
 
@@ -3886,7 +3886,7 @@ subroutine unitVectorsInAxialPlane(axis, vecR1, vecR2)
         !
         !      Subroutine arguments.
         !
-        real(kind=realType), dimension(3), intent(in)  :: axis
+        real(kind=realType), dimension(3), intent(in) :: axis
         real(kind=realType), dimension(3), intent(out) :: vecR1, vecR2
         !
         !      Local variables.
@@ -3910,22 +3910,22 @@ subroutine unitVectorsInAxialPlane(axis, vecR1, vecR2)
         ! Make sure that vecR1 is normal to axis. Create a unit
         ! vector again.
 
-        dot = vecR1(1)*axis(1) + vecR1(2)*axis(2) + vecR1(3)*axis(3)
-        vecR1(1) = vecR1(1) - dot*axis(1)
-        vecR1(2) = vecR1(2) - dot*axis(2)
-        vecR1(3) = vecR1(3) - dot*axis(3)
+        dot = vecR1(1) * axis(1) + vecR1(2) * axis(2) + vecR1(3) * axis(3)
+        vecR1(1) = vecR1(1) - dot * axis(1)
+        vecR1(2) = vecR1(2) - dot * axis(2)
+        vecR1(3) = vecR1(3) - dot * axis(3)
 
-        dot = one/sqrt(vecR1(1)**2 + vecR1(2)**2 + vecR1(3)**2)
-        vecR1(1) = vecR1(1)*dot
-        vecR1(2) = vecR1(2)*dot
-        vecR1(3) = vecR1(3)*dot
+        dot = one / sqrt(vecR1(1)**2 + vecR1(2)**2 + vecR1(3)**2)
+        vecR1(1) = vecR1(1) * dot
+        vecR1(2) = vecR1(2) * dot
+        vecR1(3) = vecR1(3) * dot
 
         ! Create the second vector which spans the axial plane. This must
         ! be normal to both axis and vecR1, i.e. the cross-product.
 
-        vecR2(1) = axis(2)*vecR1(3) - axis(3)*vecR1(2)
-        vecR2(2) = axis(3)*vecR1(1) - axis(1)*vecR1(3)
-        vecR2(3) = axis(1)*vecR1(2) - axis(2)*vecR1(1)
+        vecR2(1) = axis(2) * vecR1(3) - axis(3) * vecR1(2)
+        vecR2(2) = axis(3) * vecR1(1) - axis(1) * vecR1(3)
+        vecR2(3) = axis(1) * vecR1(2) - axis(2) * vecR1(1)
 
     end subroutine unitVectorsInAxialPlane
 
@@ -3964,9 +3964,9 @@ subroutine preprocessingADjoint
             nState = nw
         end if
 
-        nDimW = nw*nCellsLocal(1_intType)*nTimeIntervalsSpectral
-        nDimPsi = nState*nCellsLocal(1_intType)*nTimeIntervalsSpectral
-        nDimX = 3*nNodesLocal(1_intType)*nTimeIntervalsSpectral
+        nDimW = nw * nCellsLocal(1_intType) * nTimeIntervalsSpectral
+        nDimPsi = nState * nCellsLocal(1_intType) * nTimeIntervalsSpectral
+        nDimX = 3 * nNodesLocal(1_intType) * nTimeIntervalsSpectral
 
         ! Two w-like vectors.
         call VecCreateMPIWithArray(ADFLOW_COMM_WORLD, nw, ndimW, PETSC_DECIDE, &
@@ -4020,7 +4020,7 @@ subroutine updateReferencePoint
         implicit none
 
         ! Working variables
-        integer(kind=intType) ::mm, nnn, nn
+        integer(kind=intType) :: mm, nnn, nn
         real(kind=realType), dimension(nSections) :: t
 
         groundlevel = 1
@@ -4044,8 +4044,8 @@ subroutine updateReferencePoint
 
             if (equationMode == timeSpectral) then
                 do nnn = 1, nSections
-                    t(nnn) = t(nnn) + (mm - 1)*sections(nnn)%timePeriod &
-                             /real(nTimeIntervalsSpectral, realType)
+                    t(nnn) = t(nnn) + (mm - 1) * sections(nnn)%timePeriod &
+                             / real(nTimeIntervalsSpectral, realType)
                 end do
             end if
 
@@ -4071,11 +4071,11 @@ subroutine updateRotationRate(rotCenter, rotRate, blocks, nblocks)
         use solverUtils
         implicit none
 
-        real(kind=realType), intent(in)::rotCenter(3), rotRate(3)
+        real(kind=realType), intent(in) :: rotCenter(3), rotRate(3)
         integer(kind=intType), intent(in) :: nblocks
         integer(kind=intType), intent(in) :: blocks(nblocks)
 
-        integer(kind=intType) ::mm, nnn, nn, level, sps, i
+        integer(kind=intType) :: mm, nnn, nn, level, sps, i
         real(kind=realType), dimension(nSections) :: t
 
         groundlevel = 1
@@ -4109,8 +4109,8 @@ subroutine updateRotationRate(rotCenter, rotRate, blocks, nblocks)
 
             if (equationMode == timeSpectral) then
                 do nnn = 1, nSections
-                    t(nnn) = t(nnn) + (mm - 1)*sections(nnn)%timePeriod &
-                             /real(nTimeIntervalsSpectral, realType)
+                    t(nnn) = t(nnn) + (mm - 1) * sections(nnn)%timePeriod &
+                             / real(nTimeIntervalsSpectral, realType)
                 end do
             end if
 
diff --git a/src/turbulence/sa.F90 b/src/turbulence/sa.F90
index 17cf560af..53ae30bb4 100644
--- a/src/turbulence/sa.F90
+++ b/src/turbulence/sa.F90
@@ -102,7 +102,7 @@ subroutine saSource
         implicit none
 
         ! Local parameters
-        real(kind=realType), parameter :: f23 = two*third
+        real(kind=realType), parameter :: f23 = two * third
 
         ! Local variables.
         integer(kind=intType) :: i, j, k, nn, ii
@@ -120,15 +120,15 @@ subroutine saSource
 
         ! Set model constants
         cv13 = rsaCv1**3
-        kar2Inv = one/(rsaK**2)
+        kar2Inv = one / (rsaK**2)
         cw36 = rsaCw3**6
-        cb3Inv = one/rsaCb3
+        cb3Inv = one / rsaCb3
 
         ! Determine the non-dimensional wheel speed of this block.
 
-        omegax = timeRef*sections(sectionID)%rotRate(1)
-        omegay = timeRef*sections(sectionID)%rotRate(2)
-        omegaz = timeRef*sections(sectionID)%rotRate(3)
+        omegax = timeRef * sections(sectionID)%rotRate(1)
+        omegay = timeRef * sections(sectionID)%rotRate(2)
+        omegaz = timeRef * sections(sectionID)%rotRate(3)
 
         ! Create switches to production term depending on the variable that
         ! should be used
@@ -139,10 +139,10 @@ subroutine saSource
 
 #ifdef TAPENADE_REVERSE
         !$AD II-LOOP
-        do ii = 0, nx*ny*nz - 1
+        do ii = 0, nx * ny * nz - 1
             i = mod(ii, nx) + 2
-            j = mod(ii/nx, ny) + 2
-            k = ii/(nx*ny) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 2
 #else
             do k = 2, kl
                 do j = 2, jl
@@ -153,67 +153,67 @@ subroutine saSource
                         ! such that the cell i,j,k does not give a contribution.
                         ! The gradient is scaled by the factor 2*vol.
 
-                        uux = w(i + 1, j, k, ivx)*si(i, j, k, 1) - w(i - 1, j, k, ivx)*si(i - 1, j, k, 1) &
-                              + w(i, j + 1, k, ivx)*sj(i, j, k, 1) - w(i, j - 1, k, ivx)*sj(i, j - 1, k, 1) &
-                              + w(i, j, k + 1, ivx)*sk(i, j, k, 1) - w(i, j, k - 1, ivx)*sk(i, j, k - 1, 1)
-                        uuy = w(i + 1, j, k, ivx)*si(i, j, k, 2) - w(i - 1, j, k, ivx)*si(i - 1, j, k, 2) &
-                              + w(i, j + 1, k, ivx)*sj(i, j, k, 2) - w(i, j - 1, k, ivx)*sj(i, j - 1, k, 2) &
-                              + w(i, j, k + 1, ivx)*sk(i, j, k, 2) - w(i, j, k - 1, ivx)*sk(i, j, k - 1, 2)
-                        uuz = w(i + 1, j, k, ivx)*si(i, j, k, 3) - w(i - 1, j, k, ivx)*si(i - 1, j, k, 3) &
-                              + w(i, j + 1, k, ivx)*sj(i, j, k, 3) - w(i, j - 1, k, ivx)*sj(i, j - 1, k, 3) &
-                              + w(i, j, k + 1, ivx)*sk(i, j, k, 3) - w(i, j, k - 1, ivx)*sk(i, j, k - 1, 3)
+                        uux = w(i + 1, j, k, ivx) * si(i, j, k, 1) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivx) * sj(i, j, k, 1) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivx) * sk(i, j, k, 1) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 1)
+                        uuy = w(i + 1, j, k, ivx) * si(i, j, k, 2) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivx) * sj(i, j, k, 2) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivx) * sk(i, j, k, 2) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 2)
+                        uuz = w(i + 1, j, k, ivx) * si(i, j, k, 3) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivx) * sj(i, j, k, 3) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivx) * sk(i, j, k, 3) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 3)
 
                         ! Idem for the gradient of v.
 
-                        vvx = w(i + 1, j, k, ivy)*si(i, j, k, 1) - w(i - 1, j, k, ivy)*si(i - 1, j, k, 1) &
-                              + w(i, j + 1, k, ivy)*sj(i, j, k, 1) - w(i, j - 1, k, ivy)*sj(i, j - 1, k, 1) &
-                              + w(i, j, k + 1, ivy)*sk(i, j, k, 1) - w(i, j, k - 1, ivy)*sk(i, j, k - 1, 1)
-                        vvy = w(i + 1, j, k, ivy)*si(i, j, k, 2) - w(i - 1, j, k, ivy)*si(i - 1, j, k, 2) &
-                              + w(i, j + 1, k, ivy)*sj(i, j, k, 2) - w(i, j - 1, k, ivy)*sj(i, j - 1, k, 2) &
-                              + w(i, j, k + 1, ivy)*sk(i, j, k, 2) - w(i, j, k - 1, ivy)*sk(i, j, k - 1, 2)
-                        vvz = w(i + 1, j, k, ivy)*si(i, j, k, 3) - w(i - 1, j, k, ivy)*si(i - 1, j, k, 3) &
-                              + w(i, j + 1, k, ivy)*sj(i, j, k, 3) - w(i, j - 1, k, ivy)*sj(i, j - 1, k, 3) &
-                              + w(i, j, k + 1, ivy)*sk(i, j, k, 3) - w(i, j, k - 1, ivy)*sk(i, j, k - 1, 3)
+                        vvx = w(i + 1, j, k, ivy) * si(i, j, k, 1) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivy) * sj(i, j, k, 1) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivy) * sk(i, j, k, 1) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 1)
+                        vvy = w(i + 1, j, k, ivy) * si(i, j, k, 2) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivy) * sj(i, j, k, 2) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivy) * sk(i, j, k, 2) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 2)
+                        vvz = w(i + 1, j, k, ivy) * si(i, j, k, 3) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivy) * sj(i, j, k, 3) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivy) * sk(i, j, k, 3) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 3)
 
                         ! And for the gradient of w.
 
-                        wwx = w(i + 1, j, k, ivz)*si(i, j, k, 1) - w(i - 1, j, k, ivz)*si(i - 1, j, k, 1) &
-                              + w(i, j + 1, k, ivz)*sj(i, j, k, 1) - w(i, j - 1, k, ivz)*sj(i, j - 1, k, 1) &
-                              + w(i, j, k + 1, ivz)*sk(i, j, k, 1) - w(i, j, k - 1, ivz)*sk(i, j, k - 1, 1)
-                        wwy = w(i + 1, j, k, ivz)*si(i, j, k, 2) - w(i - 1, j, k, ivz)*si(i - 1, j, k, 2) &
-                              + w(i, j + 1, k, ivz)*sj(i, j, k, 2) - w(i, j - 1, k, ivz)*sj(i, j - 1, k, 2) &
-                              + w(i, j, k + 1, ivz)*sk(i, j, k, 2) - w(i, j, k - 1, ivz)*sk(i, j, k - 1, 2)
-                        wwz = w(i + 1, j, k, ivz)*si(i, j, k, 3) - w(i - 1, j, k, ivz)*si(i - 1, j, k, 3) &
-                              + w(i, j + 1, k, ivz)*sj(i, j, k, 3) - w(i, j - 1, k, ivz)*sj(i, j - 1, k, 3) &
-                              + w(i, j, k + 1, ivz)*sk(i, j, k, 3) - w(i, j, k - 1, ivz)*sk(i, j, k - 1, 3)
+                        wwx = w(i + 1, j, k, ivz) * si(i, j, k, 1) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivz) * sj(i, j, k, 1) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivz) * sk(i, j, k, 1) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 1)
+                        wwy = w(i + 1, j, k, ivz) * si(i, j, k, 2) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivz) * sj(i, j, k, 2) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivz) * sk(i, j, k, 2) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 2)
+                        wwz = w(i + 1, j, k, ivz) * si(i, j, k, 3) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivz) * sj(i, j, k, 3) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivz) * sk(i, j, k, 3) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 3)
 
                         ! Compute the components of the stress tensor.
                         ! The combination of the current scaling of the velocity
                         ! gradients (2*vol) and the definition of the stress tensor,
                         ! leads to the factor 1/(4*vol).
 
-                        fact = fourth/vol(i, j, k)
+                        fact = fourth / vol(i, j, k)
 
                         if (turbProd .eq. strain) then
 
-                            sxx = two*fact*uux
-                            syy = two*fact*vvy
-                            szz = two*fact*wwz
+                            sxx = two * fact * uux
+                            syy = two * fact * vvy
+                            szz = two * fact * wwz
 
-                            sxy = fact*(uuy + vvx)
-                            sxz = fact*(uuz + wwx)
-                            syz = fact*(vvz + wwy)
+                            sxy = fact * (uuy + vvx)
+                            sxz = fact * (uuz + wwx)
+                            syz = fact * (vvz + wwy)
 
                             ! Compute 2/3 * divergence of velocity squared
 
-                            div2 = f23*(sxx + syy + szz)**2
+                            div2 = f23 * (sxx + syy + szz)**2
 
                             ! Compute strain production term
 
-                            strainMag2 = two*(sxy**2 + sxz**2 + syz**2) &
+                            strainMag2 = two * (sxy**2 + sxz**2 + syz**2) &
                                          + sxx**2 + syy**2 + szz**2
 
-                            strainProd = two*strainMag2 - div2
+                            strainProd = two * strainMag2 - div2
 
                             ss = sqrt(strainProd)
 
@@ -222,9 +222,9 @@ subroutine saSource
                             ! Compute the three components of the vorticity vector.
                             ! Substract the part coming from the rotating frame.
 
-                            vortx = two*fact*(wwy - vvz) - two*omegax
-                            vorty = two*fact*(uuz - wwx) - two*omegay
-                            vortz = two*fact*(vvx - uuy) - two*omegaz
+                            vortx = two * fact * (wwy - vvz) - two * omegax
+                            vorty = two * fact * (uuz - wwx) - two * omegay
+                            vortz = two * fact * (vvx - uuy) - two * omegaz
 
                             ! Compute the vorticity production term
 
@@ -243,25 +243,25 @@ subroutine saSource
                         ! and nu) and the functions fv1 and fv2. The latter corrects
                         ! the production term near a viscous wall.
 
-                        nu = rlv(i, j, k)/w(i, j, k, irho)
-                        chi = w(i, j, k, itu1)/nu
+                        nu = rlv(i, j, k) / w(i, j, k, irho)
+                        chi = w(i, j, k, itu1) / nu
 
                         if (.not. useRoughSA) then
-                            dist2Inv = one/(d2Wall(i, j, k)**2)
+                            dist2Inv = one / (d2Wall(i, j, k)**2)
                         else
-                            distRough = d2Wall(i, j, k) + 0.03_realType*ks(i, j, k)
-                            dist2Inv = one/(distRough**2)
-                            chi = chi + rsaCr1*ks(i, j, k)/distRough
+                            distRough = d2Wall(i, j, k) + 0.03_realType * ks(i, j, k)
+                            dist2Inv = one / (distRough**2)
+                            chi = chi + rsaCr1 * ks(i, j, k) / distRough
                         end if
 
-                        chi2 = chi*chi
-                        chi3 = chi*chi2
-                        fv1 = chi3/(chi3 + cv13)
+                        chi2 = chi * chi
+                        chi3 = chi * chi2
+                        fv1 = chi3 / (chi3 + cv13)
 
                         if (.not. useRoughSA) then
-                            fv2 = one - chi/(one + chi*fv1)
+                            fv2 = one - chi / (one + chi * fv1)
                         else
-                            fv2 = one - w(i, j, k, itu1)/(nu + w(i, j, k, itu1)*fv1)
+                            fv2 = one - w(i, j, k, itu1) / (nu + w(i, j, k, itu1) * fv1)
                         end if
 
                         ! The function ft2, which is designed to keep a laminar
@@ -269,7 +269,7 @@ subroutine saSource
                         ! this function should be set to 0.0.
 
                         if (useft2SA) then
-                            ft2 = rsaCt3*exp(-rsaCt4*chi2)
+                            ft2 = rsaCt3 * exp(-rsaCt4 * chi2)
                         else
                             ft2 = zero
                         end if
@@ -277,12 +277,12 @@ subroutine saSource
                         ! Correct the production term to account for the influence
                         ! of the wall.
 
-                        sst = ss + w(i, j, k, itu1)*fv2*kar2Inv*dist2Inv
+                        sst = ss + w(i, j, k, itu1) * fv2 * kar2Inv * dist2Inv
 
                         ! Add rotation term (useRotationSA defined in inputParams.F90)
 
                         if (useRotationSA) then
-                            sst = sst + rsaCrot*min(zero, sqrt(two*strainMag2))
+                            sst = sst + rsaCrot * min(zero, sqrt(two * strainMag2))
                         end if
 
                         ! Make sure that this term remains positive
@@ -295,12 +295,12 @@ subroutine saSource
                         ! to avoid numerical problems. This is ok, because the
                         ! asymptotical value of fw is then already reached.
 
-                        rr = w(i, j, k, itu1)*kar2Inv*dist2Inv/sst
+                        rr = w(i, j, k, itu1) * kar2Inv * dist2Inv / sst
                         rr = min(rr, 10.0_realType)
-                        gg = rr + rsaCw2*(rr**6 - rr)
+                        gg = rr + rsaCw2 * (rr**6 - rr)
                         gg6 = gg**6
-                        termFw = ((one + cw36)/(gg6 + cw36))**sixth
-                        fwSa = gg*termFw
+                        termFw = ((one + cw36) / (gg6 + cw36))**sixth
+                        fwSa = gg * termFw
 
                         ! Compute the source term; some terms are saved for the
                         ! linearization. The source term is stored in dvt.
@@ -308,30 +308,30 @@ subroutine saSource
                         if (approxSA) then
                             term1 = zero
                         else
-                            term1 = rsaCb1*(one - ft2)*ss
+                            term1 = rsaCb1 * (one - ft2) * ss
                         end if
-                        term2 = dist2Inv*(kar2Inv*rsaCb1*((one - ft2)*fv2 + ft2) &
-                                          - rsaCw1*fwSa)
+                        term2 = dist2Inv * (kar2Inv * rsaCb1 * ((one - ft2) * fv2 + ft2) &
+                                            - rsaCw1 * fwSa)
 
-                        scratch(i, j, k, idvt) = (term1 + term2*w(i, j, k, itu1))*w(i, j, k, itu1)
+                        scratch(i, j, k, idvt) = (term1 + term2 * w(i, j, k, itu1)) * w(i, j, k, itu1)
 
 #ifndef USE_TAPENADE
                         ! Compute some derivatives w.r.t. nuTilde. These will occur
                         ! in the left hand side, i.e. the matrix for the implicit
                         ! treatment.
 
-                        dfv1 = three*chi2*cv13/((chi3 + cv13)**2)
+                        dfv1 = three * chi2 * cv13 / ((chi3 + cv13)**2)
                         if (.not. useRoughSA) then
-                            dfv2 = (chi2*dfv1 - one)/(nu*((one + chi*fv1)**2))
+                            dfv2 = (chi2 * dfv1 - one) / (nu * ((one + chi * fv1)**2))
                         else
-                            dfv2 = (w(i, j, k, itu1)*dfv1 - nu)/(nu + w(i, j, k, itu1)*fv1)**2
+                            dfv2 = (w(i, j, k, itu1) * dfv1 - nu) / (nu + w(i, j, k, itu1) * fv1)**2
                         end if
-                        dft2 = -two*rsaCt4*chi*ft2/nu
+                        dft2 = -two * rsaCt4 * chi * ft2 / nu
 
-                        drr = (one - rr*(fv2 + w(i, j, k, itu1)*dfv2)) &
-                              *kar2Inv*dist2Inv/sst
-                        dgg = (one - rsaCw2 + six*rsaCw2*(rr**5))*drr
-                        dfw = (cw36/(gg6 + cw36))*termFw*dgg
+                        drr = (one - rr * (fv2 + w(i, j, k, itu1) * dfv2)) &
+                              * kar2Inv * dist2Inv / sst
+                        dgg = (one - rsaCw2 + six * rsaCw2 * (rr**5)) * drr
+                        dfw = (cw36 / (gg6 + cw36)) * termFw * dgg
 
                         ! Compute the source term jacobian. Note that the part
                         ! containing term1 is treated explicitly. The reason is that
@@ -340,10 +340,10 @@ subroutine saSource
                         ! the stability. You may want to play around and try to
                         ! take this term into account in the jacobian.
                         ! Note that -dsource/dnu is stored.
-                        qq(i, j, k) = -two*term2*w(i, j, k, itu1) &
-                                      - dist2Inv*w(i, j, k, itu1)*w(i, j, k, itu1) &
-                                      *(rsaCb1*kar2Inv*(dfv2 - ft2*dfv2 - fv2*dft2 + dft2) &
-                                        - rsaCw1*dfw)
+                        qq(i, j, k) = -two * term2 * w(i, j, k, itu1) &
+                                      - dist2Inv * w(i, j, k, itu1) * w(i, j, k, itu1) &
+                                      * (rsaCb1 * kar2Inv * (dfv2 - ft2 * dfv2 - fv2 * dft2 + dft2) &
+                                         - rsaCw1 * dfw)
 
                         ! A couple of terms in qq may lead to a negative
                         ! contribution. Clip qq to zero, if the total is negative.
@@ -379,19 +379,19 @@ subroutine saViscous
 
         ! Set model constants
         cv13 = rsaCv1**3
-        kar2Inv = one/(rsaK**2)
+        kar2Inv = one / (rsaK**2)
         cw36 = rsaCw3**6
-        cb3Inv = one/rsaCb3
+        cb3Inv = one / rsaCb3
 
         !
         !       Viscous terms in k-direction.
         !
 #ifdef TAPENADE_REVERSE
         !$AD II-LOOP
-        do ii = 0, nx*ny*nz - 1
+        do ii = 0, nx * ny * nz - 1
             i = mod(ii, nx) + 2
-            j = mod(ii/nx, ny) + 2
-            k = ii/(nx*ny) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 2
 #else
             do k = 2, kl
                 do j = 2, jl
@@ -400,22 +400,22 @@ subroutine saViscous
                         ! Compute the metrics in zeta-direction, i.e. along the
                         ! line k = constant.
 
-                        voli = one/vol(i, j, k)
-                        volmi = two/(vol(i, j, k) + vol(i, j, k - 1))
-                        volpi = two/(vol(i, j, k) + vol(i, j, k + 1))
+                        voli = one / vol(i, j, k)
+                        volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                        volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
 
-                        xm = sk(i, j, k - 1, 1)*volmi
-                        ym = sk(i, j, k - 1, 2)*volmi
-                        zm = sk(i, j, k - 1, 3)*volmi
-                        xp = sk(i, j, k, 1)*volpi
-                        yp = sk(i, j, k, 2)*volpi
-                        zp = sk(i, j, k, 3)*volpi
+                        xm = sk(i, j, k - 1, 1) * volmi
+                        ym = sk(i, j, k - 1, 2) * volmi
+                        zm = sk(i, j, k - 1, 3) * volmi
+                        xp = sk(i, j, k, 1) * volpi
+                        yp = sk(i, j, k, 2) * volpi
+                        zp = sk(i, j, k, 3) * volpi
 
-                        xa = half*(sk(i, j, k, 1) + sk(i, j, k - 1, 1))*voli
-                        ya = half*(sk(i, j, k, 2) + sk(i, j, k - 1, 2))*voli
-                        za = half*(sk(i, j, k, 3) + sk(i, j, k - 1, 3))*voli
-                        ttm = xm*xa + ym*ya + zm*za
-                        ttp = xp*xa + yp*ya + zp*za
+                        xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                        ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                        za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                        ttm = xm * xa + ym * ya + zm * za
+                        ttp = xp * xa + yp * ya + zp * za
 
                         ! ttm and ttp ~ 1/deltaX^2
 
@@ -433,23 +433,23 @@ subroutine saViscous
                         ! k+1, k and k-1 in the second derivative. Make sure that
                         ! these coefficients are nonnegative.
 
-                        cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
-                        cam = ttm*cnud
-                        cap = ttp*cnud
+                        cnud = -rsaCb2 * w(i, j, k, itu1) * cb3Inv
+                        cam = ttm * cnud
+                        cap = ttp * cnud
 
                         ! Compute nuTilde at the faces
 
-                        nutm = half*(w(i, j, k - 1, itu1) + w(i, j, k, itu1))
-                        nutp = half*(w(i, j, k + 1, itu1) + w(i, j, k, itu1))
+                        nutm = half * (w(i, j, k - 1, itu1) + w(i, j, k, itu1))
+                        nutp = half * (w(i, j, k + 1, itu1) + w(i, j, k, itu1))
 
                         ! Compute nu at the faces
 
-                        nu = rlv(i, j, k)/w(i, j, k, irho)
-                        num = half*(rlv(i, j, k - 1)/w(i, j, k - 1, irho) + nu)
-                        nup = half*(rlv(i, j, k + 1)/w(i, j, k + 1, irho) + nu)
+                        nu = rlv(i, j, k) / w(i, j, k, irho)
+                        num = half * (rlv(i, j, k - 1) / w(i, j, k - 1, irho) + nu)
+                        nup = half * (rlv(i, j, k + 1) / w(i, j, k + 1, irho) + nu)
 
-                        cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-                        cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+                        cdm = (num + (one + rsaCb2) * nutm) * ttm * cb3Inv
+                        cdp = (nup + (one + rsaCb2) * nutp) * ttp * cb3Inv
 
                         c1m = max(cdm + cam, zero)
                         c1p = max(cdp + cap, zero)
@@ -458,8 +458,8 @@ subroutine saViscous
                         ! Update the residual for this cell and store the possible
                         ! coefficients for the matrix in b1, c1 and d1.
 
-                        scratch(i, j, k, idvt) = scratch(i, j, k, idvt) + c1m*w(i, j, k - 1, itu1) &
-                                                 - c10*w(i, j, k, itu1) + c1p*w(i, j, k + 1, itu1)
+                        scratch(i, j, k, idvt) = scratch(i, j, k, idvt) + c1m * w(i, j, k - 1, itu1) &
+                                                 - c10 * w(i, j, k, itu1) + c1p * w(i, j, k + 1, itu1)
 #ifndef USE_TAPENADE
                         b1 = -c1m
                         c1 = c10
@@ -475,10 +475,10 @@ subroutine saViscous
 
                         if (k == 2) then
                             qq(i, j, k) = qq(i, j, k) + c1 &
-                                          - b1*max(bmtk1(i, j, itu1, itu1), zero)
+                                          - b1 * max(bmtk1(i, j, itu1, itu1), zero)
                         else if (k == kl) then
                             qq(i, j, k) = qq(i, j, k) + c1 &
-                                          - d1*max(bmtk2(i, j, itu1, itu1), zero)
+                                          - d1 * max(bmtk2(i, j, itu1, itu1), zero)
                         else
                             qq(i, j, k) = qq(i, j, k) + c1
                         end if
@@ -495,10 +495,10 @@ subroutine saViscous
         !
 #ifdef TAPENADE_REVERSE
         !$AD II-LOOP
-        do ii = 0, nx*ny*nz - 1
+        do ii = 0, nx * ny * nz - 1
             i = mod(ii, nx) + 2
-            j = mod(ii/nx, ny) + 2
-            k = ii/(nx*ny) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 2
 #else
             do k = 2, kl
                 do j = 2, jl
@@ -507,22 +507,22 @@ subroutine saViscous
                         ! Compute the metrics in eta-direction, i.e. along the
                         ! line j = constant.
 
-                        voli = one/vol(i, j, k)
-                        volmi = two/(vol(i, j, k) + vol(i, j - 1, k))
-                        volpi = two/(vol(i, j, k) + vol(i, j + 1, k))
+                        voli = one / vol(i, j, k)
+                        volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                        volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
 
-                        xm = sj(i, j - 1, k, 1)*volmi
-                        ym = sj(i, j - 1, k, 2)*volmi
-                        zm = sj(i, j - 1, k, 3)*volmi
-                        xp = sj(i, j, k, 1)*volpi
-                        yp = sj(i, j, k, 2)*volpi
-                        zp = sj(i, j, k, 3)*volpi
+                        xm = sj(i, j - 1, k, 1) * volmi
+                        ym = sj(i, j - 1, k, 2) * volmi
+                        zm = sj(i, j - 1, k, 3) * volmi
+                        xp = sj(i, j, k, 1) * volpi
+                        yp = sj(i, j, k, 2) * volpi
+                        zp = sj(i, j, k, 3) * volpi
 
-                        xa = half*(sj(i, j, k, 1) + sj(i, j - 1, k, 1))*voli
-                        ya = half*(sj(i, j, k, 2) + sj(i, j - 1, k, 2))*voli
-                        za = half*(sj(i, j, k, 3) + sj(i, j - 1, k, 3))*voli
-                        ttm = xm*xa + ym*ya + zm*za
-                        ttp = xp*xa + yp*ya + zp*za
+                        xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                        ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                        za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                        ttm = xm * xa + ym * ya + zm * za
+                        ttp = xp * xa + yp * ya + zp * za
 
                         ! Computation of the viscous terms in eta-direction; note
                         ! that cross-derivatives are neglected, i.e. the mesh is
@@ -538,17 +538,17 @@ subroutine saViscous
                         ! j+1, j and j-1 in the second derivative. Make sure that
                         ! these coefficients are nonnegative.
 
-                        cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
-                        cam = ttm*cnud
-                        cap = ttp*cnud
+                        cnud = -rsaCb2 * w(i, j, k, itu1) * cb3Inv
+                        cam = ttm * cnud
+                        cap = ttp * cnud
 
-                        nutm = half*(w(i, j - 1, k, itu1) + w(i, j, k, itu1))
-                        nutp = half*(w(i, j + 1, k, itu1) + w(i, j, k, itu1))
-                        nu = rlv(i, j, k)/w(i, j, k, irho)
-                        num = half*(rlv(i, j - 1, k)/w(i, j - 1, k, irho) + nu)
-                        nup = half*(rlv(i, j + 1, k)/w(i, j + 1, k, irho) + nu)
-                        cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-                        cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+                        nutm = half * (w(i, j - 1, k, itu1) + w(i, j, k, itu1))
+                        nutp = half * (w(i, j + 1, k, itu1) + w(i, j, k, itu1))
+                        nu = rlv(i, j, k) / w(i, j, k, irho)
+                        num = half * (rlv(i, j - 1, k) / w(i, j - 1, k, irho) + nu)
+                        nup = half * (rlv(i, j + 1, k) / w(i, j + 1, k, irho) + nu)
+                        cdm = (num + (one + rsaCb2) * nutm) * ttm * cb3Inv
+                        cdp = (nup + (one + rsaCb2) * nutp) * ttp * cb3Inv
 
                         c1m = max(cdm + cam, zero)
                         c1p = max(cdp + cap, zero)
@@ -557,8 +557,8 @@ subroutine saViscous
                         ! Update the residual for this cell and store the possible
                         ! coefficients for the matrix in b1, c1 and d1.
 
-                        scratch(i, j, k, idvt) = scratch(i, j, k, idvt) + c1m*w(i, j - 1, k, itu1) &
-                                                 - c10*w(i, j, k, itu1) + c1p*w(i, j + 1, k, itu1)
+                        scratch(i, j, k, idvt) = scratch(i, j, k, idvt) + c1m * w(i, j - 1, k, itu1) &
+                                                 - c10 * w(i, j, k, itu1) + c1p * w(i, j + 1, k, itu1)
 #ifndef USE_TAPENADE
                         b1 = -c1m
                         c1 = c10
@@ -574,10 +574,10 @@ subroutine saViscous
 
                         if (j == 2) then
                             qq(i, j, k) = qq(i, j, k) + c1 &
-                                          - b1*max(bmtj1(i, k, itu1, itu1), zero)
+                                          - b1 * max(bmtj1(i, k, itu1, itu1), zero)
                         else if (j == jl) then
                             qq(i, j, k) = qq(i, j, k) + c1 &
-                                          - d1*max(bmtj2(i, k, itu1, itu1), zero)
+                                          - d1 * max(bmtj2(i, k, itu1, itu1), zero)
                         else
                             qq(i, j, k) = qq(i, j, k) + c1
                         end if
@@ -594,10 +594,10 @@ subroutine saViscous
         !
 #ifdef TAPENADE_REVERSE
         !$AD II-LOOP
-        do ii = 0, nx*ny*nz - 1
+        do ii = 0, nx * ny * nz - 1
             i = mod(ii, nx) + 2
-            j = mod(ii/nx, ny) + 2
-            k = ii/(nx*ny) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 2
 #else
             do k = 2, kl
                 do j = 2, jl
@@ -606,22 +606,22 @@ subroutine saViscous
                         ! Compute the metrics in xi-direction, i.e. along the
                         ! line i = constant.
 
-                        voli = one/vol(i, j, k)
-                        volmi = two/(vol(i, j, k) + vol(i - 1, j, k))
-                        volpi = two/(vol(i, j, k) + vol(i + 1, j, k))
+                        voli = one / vol(i, j, k)
+                        volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                        volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
 
-                        xm = si(i - 1, j, k, 1)*volmi
-                        ym = si(i - 1, j, k, 2)*volmi
-                        zm = si(i - 1, j, k, 3)*volmi
-                        xp = si(i, j, k, 1)*volpi
-                        yp = si(i, j, k, 2)*volpi
-                        zp = si(i, j, k, 3)*volpi
+                        xm = si(i - 1, j, k, 1) * volmi
+                        ym = si(i - 1, j, k, 2) * volmi
+                        zm = si(i - 1, j, k, 3) * volmi
+                        xp = si(i, j, k, 1) * volpi
+                        yp = si(i, j, k, 2) * volpi
+                        zp = si(i, j, k, 3) * volpi
 
-                        xa = half*(si(i, j, k, 1) + si(i - 1, j, k, 1))*voli
-                        ya = half*(si(i, j, k, 2) + si(i - 1, j, k, 2))*voli
-                        za = half*(si(i, j, k, 3) + si(i - 1, j, k, 3))*voli
-                        ttm = xm*xa + ym*ya + zm*za
-                        ttp = xp*xa + yp*ya + zp*za
+                        xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                        ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                        za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                        ttm = xm * xa + ym * ya + zm * za
+                        ttp = xp * xa + yp * ya + zp * za
 
                         ! Computation of the viscous terms in xi-direction; note
                         ! that cross-derivatives are neglected, i.e. the mesh is
@@ -637,17 +637,17 @@ subroutine saViscous
                         ! i+1, i and i-1 in the second derivative. Make sure that
                         ! these coefficients are nonnegative.
 
-                        cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
-                        cam = ttm*cnud
-                        cap = ttp*cnud
+                        cnud = -rsaCb2 * w(i, j, k, itu1) * cb3Inv
+                        cam = ttm * cnud
+                        cap = ttp * cnud
 
-                        nutm = half*(w(i - 1, j, k, itu1) + w(i, j, k, itu1))
-                        nutp = half*(w(i + 1, j, k, itu1) + w(i, j, k, itu1))
-                        nu = rlv(i, j, k)/w(i, j, k, irho)
-                        num = half*(rlv(i - 1, j, k)/w(i - 1, j, k, irho) + nu)
-                        nup = half*(rlv(i + 1, j, k)/w(i + 1, j, k, irho) + nu)
-                        cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-                        cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+                        nutm = half * (w(i - 1, j, k, itu1) + w(i, j, k, itu1))
+                        nutp = half * (w(i + 1, j, k, itu1) + w(i, j, k, itu1))
+                        nu = rlv(i, j, k) / w(i, j, k, irho)
+                        num = half * (rlv(i - 1, j, k) / w(i - 1, j, k, irho) + nu)
+                        nup = half * (rlv(i + 1, j, k) / w(i + 1, j, k, irho) + nu)
+                        cdm = (num + (one + rsaCb2) * nutm) * ttm * cb3Inv
+                        cdp = (nup + (one + rsaCb2) * nutp) * ttp * cb3Inv
 
                         c1m = max(cdm + cam, zero)
                         c1p = max(cdp + cap, zero)
@@ -656,8 +656,8 @@ subroutine saViscous
                         ! Update the residual for this cell and store the possible
                         ! coefficients for the matrix in b1, c1 and d1.
 
-                        scratch(i, j, k, idvt) = scratch(i, j, k, idvt) + c1m*w(i - 1, j, k, itu1) &
-                                                 - c10*w(i, j, k, itu1) + c1p*w(i + 1, j, k, itu1)
+                        scratch(i, j, k, idvt) = scratch(i, j, k, idvt) + c1m * w(i - 1, j, k, itu1) &
+                                                 - c10 * w(i, j, k, itu1) + c1p * w(i + 1, j, k, itu1)
 #ifndef USE_TAPENADE
                         b1 = -c1m
                         c1 = c10
@@ -673,10 +673,10 @@ subroutine saViscous
 
                         if (i == 2) then
                             qq(i, j, k) = qq(i, j, k) + c1 &
-                                          - b1*max(bmti1(j, k, itu1, itu1), zero)
+                                          - b1 * max(bmti1(j, k, itu1, itu1), zero)
                         else if (i == il) then
                             qq(i, j, k) = qq(i, j, k) + c1 &
-                                          - d1*max(bmti2(j, k, itu1, itu1), zero)
+                                          - d1 * max(bmti2(j, k, itu1, itu1), zero)
                         else
                             qq(i, j, k) = qq(i, j, k) + c1
                         end if
@@ -708,17 +708,17 @@ subroutine saResScale
 
 #ifdef TAPENADE_REVERSE
         !$AD II-LOOP
-        do ii = 0, nx*ny*nz - 1
+        do ii = 0, nx * ny * nz - 1
             i = mod(ii, nx) + 2
-            j = mod(ii/nx, ny) + 2
-            k = ii/(nx*ny) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 2
 #else
             do k = 2, kl
                 do j = 2, jl
                     do i = 2, il
 #endif
                         rblank = max(real(iblank(i, j, k), realType), zero)
-                        dw(i, j, k, itu1) = -volRef(i, j, k)*scratch(i, j, k, idvt)*rblank
+                        dw(i, j, k, itu1) = -volRef(i, j, k) * scratch(i, j, k, idvt) * rblank
 #ifdef TAPENADE_REVERSE
                     end do
 #else
@@ -834,11 +834,11 @@ subroutine saSolve
                         ! the update. Also note that the curve fits contain the
                         ! non-dimensional value.
 
-                        yp = ww(i, j, irho)*dd2Wall(i - 1, j - 1) &
-                             *viscSubface(nn)%utau(i, j)/rrlv(i, j)
+                        yp = ww(i, j, irho) * dd2Wall(i - 1, j - 1) &
+                             * viscSubface(nn)%utau(i, j) / rrlv(i, j)
 
                         call curveTupYp(tu1p, yp, itu1, itu1)
-                        ddvt(i, j, 1) = tu1p(1)*rrlv(i, j)/ww(i, j, irho) - ww(i, j, itu1)
+                        ddvt(i, j, 1) = tu1p(1) * rrlv(i, j) / ww(i, j, irho) - ww(i, j, itu1)
 
                         ! Set the wall flag to .true.
 
@@ -862,13 +862,13 @@ subroutine saSolve
 
         factor = one
         if (turbRelax == turbRelaxImplicit) &
-            factor = one + (one - alfaTurb)/alfaTurb
+            factor = one + (one - alfaTurb) / alfaTurb
 
         do k = 2, kl
             do j = 2, jl
                 do i = 2, il
 
-                    qq(i, j, k) = factor*qq(i, j, k)
+                    qq(i, j, k) = factor * qq(i, j, k)
 
                     ! Set qq to 1 if the value is determined by the
                     ! wall function table.
@@ -905,37 +905,37 @@ subroutine saSolve
                     ! Consequently, see the j-loop to build the residual for
                     ! the comments.
 
-                    voli = one/vol(i, j, k)
-                    volmi = two/(vol(i, j, k) + vol(i, j - 1, k))
-                    volpi = two/(vol(i, j, k) + vol(i, j + 1, k))
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
 
-                    xm = sj(i, j - 1, k, 1)*volmi
-                    ym = sj(i, j - 1, k, 2)*volmi
-                    zm = sj(i, j - 1, k, 3)*volmi
-                    xp = sj(i, j, k, 1)*volpi
-                    yp = sj(i, j, k, 2)*volpi
-                    zp = sj(i, j, k, 3)*volpi
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
 
-                    xa = half*(sj(i, j, k, 1) + sj(i, j - 1, k, 1))*voli
-                    ya = half*(sj(i, j, k, 2) + sj(i, j - 1, k, 2))*voli
-                    za = half*(sj(i, j, k, 3) + sj(i, j - 1, k, 3))*voli
-                    ttm = xm*xa + ym*ya + zm*za
-                    ttp = xp*xa + yp*ya + zp*za
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
 
-                    cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
-                    cam = ttm*cnud
-                    cap = ttp*cnud
+                    cnud = -rsaCb2 * w(i, j, k, itu1) * cb3Inv
+                    cam = ttm * cnud
+                    cap = ttp * cnud
 
                     ! Off-diagonal terms due to the diffusion terms
                     ! in j-direction.
 
-                    nutm = half*(w(i, j - 1, k, itu1) + w(i, j, k, itu1))
-                    nutp = half*(w(i, j + 1, k, itu1) + w(i, j, k, itu1))
-                    nu = rlv(i, j, k)/w(i, j, k, irho)
-                    num = half*(rlv(i, j - 1, k)/w(i, j - 1, k, irho) + nu)
-                    nup = half*(rlv(i, j + 1, k)/w(i, j + 1, k, irho) + nu)
-                    cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-                    cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+                    nutm = half * (w(i, j - 1, k, itu1) + w(i, j, k, itu1))
+                    nutp = half * (w(i, j + 1, k, itu1) + w(i, j, k, itu1))
+                    nu = rlv(i, j, k) / w(i, j, k, irho)
+                    num = half * (rlv(i, j - 1, k) / w(i, j - 1, k, irho) + nu)
+                    nup = half * (rlv(i, j + 1, k) / w(i, j + 1, k, irho) + nu)
+                    cdm = (num + (one + rsaCb2) * nutm) * ttm * cb3Inv
+                    cdp = (nup + (one + rsaCb2) * nutp) * ttp * cb3Inv
 
                     c1m = max(cdm + cam, zero)
                     c1p = max(cdp + cap, zero)
@@ -947,12 +947,12 @@ subroutine saSolve
                     ! It is taken as the average of j and j-1,
 
                     if (addGridVelocities) &
-                        qs = half*(sFaceJ(i, j, k) + sFaceJ(i, j - 1, k))*voli
+                        qs = half * (sFaceJ(i, j, k) + sFaceJ(i, j - 1, k)) * voli
 
                     ! Off-diagonal terms due to the advection term in
                     ! j-direction. First order approximation.
 
-                    uu = xa*w(i, j, k, ivx) + ya*w(i, j, k, ivy) + za*w(i, j, k, ivz) - qs
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
                     um = zero
                     up = zero
                     if (uu < zero) um = uu
@@ -968,10 +968,10 @@ subroutine saSolve
                     rblank = max(real(iblank(i, j, k), realType), zero)
 
                     cc(j) = qq(i, j, k)
-                    ff(j) = scratch(i, j, k, idvt)*rblank
+                    ff(j) = scratch(i, j, k, idvt) * rblank
 
-                    bb(j) = bb(j)*rblank
-                    dd(j) = dd(j)*rblank
+                    bb(j) = bb(j) * rblank
+                    dd(j) = dd(j) * rblank
 
                     ! Set the off diagonal terms to zero if the wall is flagged.
 
@@ -991,24 +991,24 @@ subroutine saSolve
                 ! First the backward sweep to eliMinate the upper diagonal dd.
 
                 do j = ny, 2, -1
-                    f = dd(j)/cc(j + 1)
-                    cc(j) = cc(j) - f*bb(j + 1)
-                    ff(j) = ff(j) - f*ff(j + 1)
+                    f = dd(j) / cc(j + 1)
+                    cc(j) = cc(j) - f * bb(j + 1)
+                    ff(j) = ff(j) - f * ff(j + 1)
                 end do
 
                 ! The matrix is now in lower block bi-diagonal form.
                 ! Perform a forward sweep to compute the solution.
 
-                ff(2) = ff(2)/cc(2)
+                ff(2) = ff(2) / cc(2)
                 do j = 3, jl
-                    ff(j) = ff(j) - bb(j)*ff(j - 1)
-                    ff(j) = ff(j)/cc(j)
+                    ff(j) = ff(j) - bb(j) * ff(j - 1)
+                    ff(j) = ff(j) / cc(j)
                 end do
 
                 ! Determine the new rhs for the next direction.
 
                 do j = 2, jl
-                    scratch(i, j, k, idvt) = ff(j)*qq(i, j, k)
+                    scratch(i, j, k, idvt) = ff(j) * qq(i, j, k)
                 end do
 
             end do
@@ -1029,37 +1029,37 @@ subroutine saSolve
                     ! Consequently, see the i-loop to build the residual for
                     ! the comments.
 
-                    voli = one/vol(i, j, k)
-                    volmi = two/(vol(i, j, k) + vol(i - 1, j, k))
-                    volpi = two/(vol(i, j, k) + vol(i + 1, j, k))
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
 
-                    xm = si(i - 1, j, k, 1)*volmi
-                    ym = si(i - 1, j, k, 2)*volmi
-                    zm = si(i - 1, j, k, 3)*volmi
-                    xp = si(i, j, k, 1)*volpi
-                    yp = si(i, j, k, 2)*volpi
-                    zp = si(i, j, k, 3)*volpi
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
 
-                    xa = half*(si(i, j, k, 1) + si(i - 1, j, k, 1))*voli
-                    ya = half*(si(i, j, k, 2) + si(i - 1, j, k, 2))*voli
-                    za = half*(si(i, j, k, 3) + si(i - 1, j, k, 3))*voli
-                    ttm = xm*xa + ym*ya + zm*za
-                    ttp = xp*xa + yp*ya + zp*za
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
 
-                    cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
-                    cam = ttm*cnud
-                    cap = ttp*cnud
+                    cnud = -rsaCb2 * w(i, j, k, itu1) * cb3Inv
+                    cam = ttm * cnud
+                    cap = ttp * cnud
 
                     ! Off-diagonal terms due to the diffusion terms
                     ! in i-direction.
 
-                    nutm = half*(w(i - 1, j, k, itu1) + w(i, j, k, itu1))
-                    nutp = half*(w(i + 1, j, k, itu1) + w(i, j, k, itu1))
-                    nu = rlv(i, j, k)/w(i, j, k, irho)
-                    num = half*(rlv(i - 1, j, k)/w(i - 1, j, k, irho) + nu)
-                    nup = half*(rlv(i + 1, j, k)/w(i + 1, j, k, irho) + nu)
-                    cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-                    cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+                    nutm = half * (w(i - 1, j, k, itu1) + w(i, j, k, itu1))
+                    nutp = half * (w(i + 1, j, k, itu1) + w(i, j, k, itu1))
+                    nu = rlv(i, j, k) / w(i, j, k, irho)
+                    num = half * (rlv(i - 1, j, k) / w(i - 1, j, k, irho) + nu)
+                    nup = half * (rlv(i + 1, j, k) / w(i + 1, j, k, irho) + nu)
+                    cdm = (num + (one + rsaCb2) * nutm) * ttm * cb3Inv
+                    cdp = (nup + (one + rsaCb2) * nutp) * ttp * cb3Inv
 
                     c1m = max(cdm + cam, zero)
                     c1p = max(cdp + cap, zero)
@@ -1071,12 +1071,12 @@ subroutine saSolve
                     ! It is taken as the average of i and i-1,
 
                     if (addGridVelocities) &
-                        qs = half*(sFaceI(i, j, k) + sFaceI(i - 1, j, k))*voli
+                        qs = half * (sFaceI(i, j, k) + sFaceI(i - 1, j, k)) * voli
 
                     ! Off-diagonal terms due to the advection term in
                     ! i-direction. First order approximation.
 
-                    uu = xa*w(i, j, k, ivx) + ya*w(i, j, k, ivy) + za*w(i, j, k, ivz) - qs
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
                     um = zero
                     up = zero
                     if (uu < zero) um = uu
@@ -1092,10 +1092,10 @@ subroutine saSolve
                     rblank = max(real(iblank(i, j, k), realType), zero)
 
                     cc(i) = qq(i, j, k)
-                    ff(i) = scratch(i, j, k, idvt)*rblank
+                    ff(i) = scratch(i, j, k, idvt) * rblank
 
-                    bb(i) = bb(i)*rblank
-                    dd(i) = dd(i)*rblank
+                    bb(i) = bb(i) * rblank
+                    dd(i) = dd(i) * rblank
 
                     ! Set the off diagonal terms to zero if the wall is flagged.
 
@@ -1115,24 +1115,24 @@ subroutine saSolve
                 ! First the backward sweep to eliMinate the upper diagonal dd.
 
                 do i = nx, 2, -1
-                    f = dd(i)/cc(i + 1)
-                    cc(i) = cc(i) - f*bb(i + 1)
-                    ff(i) = ff(i) - f*ff(i + 1)
+                    f = dd(i) / cc(i + 1)
+                    cc(i) = cc(i) - f * bb(i + 1)
+                    ff(i) = ff(i) - f * ff(i + 1)
                 end do
 
                 ! The matrix is now in lower block bi-diagonal form.
                 ! Perform a forward sweep to compute the solution.
 
-                ff(2) = ff(2)/cc(2)
+                ff(2) = ff(2) / cc(2)
                 do i = 3, il
-                    ff(i) = ff(i) - bb(i)*ff(i - 1)
-                    ff(i) = ff(i)/cc(i)
+                    ff(i) = ff(i) - bb(i) * ff(i - 1)
+                    ff(i) = ff(i) / cc(i)
                 end do
 
                 ! Determine the new rhs for the next direction.
 
                 do i = 2, il
-                    scratch(i, j, k, idvt) = ff(i)*qq(i, j, k)
+                    scratch(i, j, k, idvt) = ff(i) * qq(i, j, k)
                 end do
 
             end do
@@ -1153,37 +1153,37 @@ subroutine saSolve
                     ! Consequently, see the k-loop to build the residual for
                     ! the comments.
 
-                    voli = one/vol(i, j, k)
-                    volmi = two/(vol(i, j, k) + vol(i, j, k - 1))
-                    volpi = two/(vol(i, j, k) + vol(i, j, k + 1))
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
 
-                    xm = sk(i, j, k - 1, 1)*volmi
-                    ym = sk(i, j, k - 1, 2)*volmi
-                    zm = sk(i, j, k - 1, 3)*volmi
-                    xp = sk(i, j, k, 1)*volpi
-                    yp = sk(i, j, k, 2)*volpi
-                    zp = sk(i, j, k, 3)*volpi
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
 
-                    xa = half*(sk(i, j, k, 1) + sk(i, j, k - 1, 1))*voli
-                    ya = half*(sk(i, j, k, 2) + sk(i, j, k - 1, 2))*voli
-                    za = half*(sk(i, j, k, 3) + sk(i, j, k - 1, 3))*voli
-                    ttm = xm*xa + ym*ya + zm*za
-                    ttp = xp*xa + yp*ya + zp*za
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
 
-                    cnud = -rsaCb2*w(i, j, k, itu1)*cb3Inv
-                    cam = ttm*cnud
-                    cap = ttp*cnud
+                    cnud = -rsaCb2 * w(i, j, k, itu1) * cb3Inv
+                    cam = ttm * cnud
+                    cap = ttp * cnud
 
                     ! Off-diagonal terms due to the diffusion terms
                     ! in k-direction.
 
-                    nutm = half*(w(i, j, k - 1, itu1) + w(i, j, k, itu1))
-                    nutp = half*(w(i, j, k + 1, itu1) + w(i, j, k, itu1))
-                    nu = rlv(i, j, k)/w(i, j, k, irho)
-                    num = half*(rlv(i, j, k - 1)/w(i, j, k - 1, irho) + nu)
-                    nup = half*(rlv(i, j, k + 1)/w(i, j, k + 1, irho) + nu)
-                    cdm = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-                    cdp = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
+                    nutm = half * (w(i, j, k - 1, itu1) + w(i, j, k, itu1))
+                    nutp = half * (w(i, j, k + 1, itu1) + w(i, j, k, itu1))
+                    nu = rlv(i, j, k) / w(i, j, k, irho)
+                    num = half * (rlv(i, j, k - 1) / w(i, j, k - 1, irho) + nu)
+                    nup = half * (rlv(i, j, k + 1) / w(i, j, k + 1, irho) + nu)
+                    cdm = (num + (one + rsaCb2) * nutm) * ttm * cb3Inv
+                    cdp = (nup + (one + rsaCb2) * nutp) * ttp * cb3Inv
 
                     c1m = max(cdm + cam, zero)
                     c1p = max(cdp + cap, zero)
@@ -1195,12 +1195,12 @@ subroutine saSolve
                     ! It is taken as the average of k and k-1,
 
                     if (addGridVelocities) &
-                        qs = half*(sFaceK(i, j, k) + sFaceK(i, j, k - 1))*voli
+                        qs = half * (sFaceK(i, j, k) + sFaceK(i, j, k - 1)) * voli
 
                     ! Off-diagonal terms due to the advection term in
                     ! k-direction. First order approximation.
 
-                    uu = xa*w(i, j, k, ivx) + ya*w(i, j, k, ivy) + za*w(i, j, k, ivz) - qs
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
                     um = zero
                     up = zero
                     if (uu < zero) um = uu
@@ -1216,10 +1216,10 @@ subroutine saSolve
                     rblank = max(real(iblank(i, j, k), realType), zero)
 
                     cc(k) = qq(i, j, k)
-                    ff(k) = scratch(i, j, k, idvt)*rblank
+                    ff(k) = scratch(i, j, k, idvt) * rblank
 
-                    bb(k) = bb(k)*rblank
-                    dd(k) = dd(k)*rblank
+                    bb(k) = bb(k) * rblank
+                    dd(k) = dd(k) * rblank
 
                     ! Set the off diagonal terms to zero if the wall is flagged.
 
@@ -1239,18 +1239,18 @@ subroutine saSolve
                 ! First the backward sweep to eliMinate the upper diagonal dd.
 
                 do k = nz, 2, -1
-                    f = dd(k)/cc(k + 1)
-                    cc(k) = cc(k) - f*bb(k + 1)
-                    ff(k) = ff(k) - f*ff(k + 1)
+                    f = dd(k) / cc(k + 1)
+                    cc(k) = cc(k) - f * bb(k + 1)
+                    ff(k) = ff(k) - f * ff(k + 1)
                 end do
 
                 ! The matrix is now in lower block bi-diagonal form.
                 ! Perform a forward sweep to compute the solution.
 
-                ff(2) = ff(2)/cc(2)
+                ff(2) = ff(2) / cc(2)
                 do k = 3, kl
-                    ff(k) = ff(k) - bb(k)*ff(k - 1)
-                    ff(k) = ff(k)/cc(k)
+                    ff(k) = ff(k) - bb(k) * ff(k - 1)
+                    ff(k) = ff(k) / cc(k)
                 end do
 
                 ! Store the update in dvt.
@@ -1272,7 +1272,7 @@ subroutine saSolve
         do k = 2, kl
             do j = 2, jl
                 do i = 2, il
-                    w(i, j, k, itu1) = w(i, j, k, itu1) + factor*scratch(i, j, k, idvt)
+                    w(i, j, k, itu1) = w(i, j, k, itu1) + factor * scratch(i, j, k, idvt)
                     w(i, j, k, itu1) = max(w(i, j, k, itu1), zero)
                 end do
             end do
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index e21230950..fe431b14d 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -114,7 +114,7 @@ subroutine applyAllTurbBCThisBlock(secondHalo)
                 do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
                         do l = nt1, nt2
-                            ww1(i, j, l) = bvt(i, j, l) - bmt(i, j, l, l)*ww2(i, j, l)
+                            ww1(i, j, l) = bvt(i, j, l) - bmt(i, j, l, l) * ww2(i, j, l)
                             do m = nt1, nt2
                                 if (m /= l .and. bmt(i, j, l, m) /= zero) &
                                     ww1(i, j, l) = ww2(i, j, l)
@@ -132,7 +132,7 @@ subroutine applyAllTurbBCThisBlock(secondHalo)
                             do l = nt1, nt2
                                 w(1, i, j, l) = bvti1(i, j, l)
                                 do m = nt1, nt2
-                                    w(1, i, j, l) = w(1, i, j, l) - bmti1(i, j, l, m)*w(2, i, j, m)
+                                    w(1, i, j, l) = w(1, i, j, l) - bmti1(i, j, l, m) * w(2, i, j, m)
                                 end do
                             end do
                         end do
@@ -144,7 +144,7 @@ subroutine applyAllTurbBCThisBlock(secondHalo)
                             do l = nt1, nt2
                                 w(ie, i, j, l) = bvti2(i, j, l)
                                 do m = nt1, nt2
-                                    w(ie, i, j, l) = w(ie, i, j, l) - bmti2(i, j, l, m)*w(il, i, j, m)
+                                    w(ie, i, j, l) = w(ie, i, j, l) - bmti2(i, j, l, m) * w(il, i, j, m)
                                 end do
                             end do
                         end do
@@ -156,7 +156,7 @@ subroutine applyAllTurbBCThisBlock(secondHalo)
                             do l = nt1, nt2
                                 w(i, 1, j, l) = bvtj1(i, j, l)
                                 do m = nt1, nt2
-                                    w(i, 1, j, l) = w(i, 1, j, l) - bmtj1(i, j, l, m)*w(i, 2, j, m)
+                                    w(i, 1, j, l) = w(i, 1, j, l) - bmtj1(i, j, l, m) * w(i, 2, j, m)
                                 end do
                             end do
                         end do
@@ -168,7 +168,7 @@ subroutine applyAllTurbBCThisBlock(secondHalo)
                             do l = nt1, nt2
                                 w(i, je, j, l) = bvtj2(i, j, l)
                                 do m = nt1, nt2
-                                    w(i, je, j, l) = w(i, je, j, l) - bmtj2(i, j, l, m)*w(i, jl, j, m)
+                                    w(i, je, j, l) = w(i, je, j, l) - bmtj2(i, j, l, m) * w(i, jl, j, m)
                                 end do
                             end do
                         end do
@@ -180,7 +180,7 @@ subroutine applyAllTurbBCThisBlock(secondHalo)
                             do l = nt1, nt2
                                 w(i, j, 1, l) = bvtk1(i, j, l)
                                 do m = nt1, nt2
-                                    w(i, j, 1, l) = w(i, j, 1, l) - bmtk1(i, j, l, m)*w(i, j, 2, m)
+                                    w(i, j, 1, l) = w(i, j, 1, l) - bmtk1(i, j, l, m) * w(i, j, 2, m)
                                 end do
                             end do
                         end do
@@ -192,7 +192,7 @@ subroutine applyAllTurbBCThisBlock(secondHalo)
                             do l = nt1, nt2
                                 w(i, j, ke, l) = bvtk2(i, j, l)
                                 do m = nt1, nt2
-                                    w(i, j, ke, l) = w(i, j, ke, l) - bmtk2(i, j, l, m)*w(i, j, kl, m)
+                                    w(i, j, ke, l) = w(i, j, ke, l) - bmtk2(i, j, l, m) * w(i, j, kl, m)
                                 end do
                             end do
                         end do
@@ -329,42 +329,42 @@ subroutine bcEddyWall(nn)
         case (iMin)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(1, i, j) = saRoughFact(2, i, j)*rev(2, i, j)
+                    rev(1, i, j) = saRoughFact(2, i, j) * rev(2, i, j)
                 end do
             end do
 
         case (iMax)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(ie, i, j) = saRoughFact(il, i, j)*rev(il, i, j)
+                    rev(ie, i, j) = saRoughFact(il, i, j) * rev(il, i, j)
                 end do
             end do
 
         case (jMin)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(i, 1, j) = saRoughFact(i, 2, j)*rev(i, 2, j)
+                    rev(i, 1, j) = saRoughFact(i, 2, j) * rev(i, 2, j)
                 end do
             end do
 
         case (jMax)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(i, je, j) = saRoughFact(i, jl, j)*rev(i, jl, j)
+                    rev(i, je, j) = saRoughFact(i, jl, j) * rev(i, jl, j)
                 end do
             end do
 
         case (kMin)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(i, j, 1) = saRoughFact(i, j, 2)*rev(i, j, 2)
+                    rev(i, j, 1) = saRoughFact(i, j, 2) * rev(i, j, 2)
                 end do
             end do
 
         case (kMax)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(i, j, ke) = saRoughFact(i, j, kl)*rev(i, j, kl)
+                    rev(i, j, ke) = saRoughFact(i, j, kl) * rev(i, j, kl)
                 end do
             end do
         end select
@@ -404,9 +404,9 @@ subroutine bcTurbFarfield(nn)
                 ! normal and the free stream velocity direction and add the
                 ! possible grid velocity.
 
-                dot = BCData(nn)%norm(i, j, 1)*wInf(ivx) + &
-                      BCData(nn)%norm(i, j, 2)*wInf(ivy) + &
-                      BCData(nn)%norm(i, j, 3)*wInf(ivz) - BCData(nn)%rface(i, j)
+                dot = BCData(nn)%norm(i, j, 1) * wInf(ivx) + &
+                      BCData(nn)%norm(i, j, 2) * wInf(ivy) + &
+                      BCData(nn)%norm(i, j, 3) * wInf(ivz) - BCData(nn)%rface(i, j)
 
                 ! Determine whether we are dealing with an inflow or
                 ! outflow boundary here.
@@ -491,22 +491,22 @@ subroutine bcTurbInflow(nn)
                 do l = nt1, nt2
                     select case (BCFaceID(nn))
                     case (iMin)
-                        bvti1(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bvti1(i, j, l) = two * BCData(nn)%turbInlet(i, j, l)
                         bmti1(i, j, l, l) = one
                     case (iMax)
-                        bvti2(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bvti2(i, j, l) = two * BCData(nn)%turbInlet(i, j, l)
                         bmti2(i, j, l, l) = one
                     case (jMin)
-                        bvtj1(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bvtj1(i, j, l) = two * BCData(nn)%turbInlet(i, j, l)
                         bmtj1(i, j, l, l) = one
                     case (jMax)
-                        bvtj2(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bvtj2(i, j, l) = two * BCData(nn)%turbInlet(i, j, l)
                         bmtj2(i, j, l, l) = one
                     case (kMin)
-                        bvtk1(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bvtk1(i, j, l) = two * BCData(nn)%turbInlet(i, j, l)
                         bmtk1(i, j, l, l) = one
                     case (kMax)
-                        bvtk2(i, j, l) = two*BCData(nn)%turbInlet(i, j, l)
+                        bvtk2(i, j, l) = two * BCData(nn)%turbInlet(i, j, l)
                         bmtk2(i, j, l, l) = one
                     end select
                 end do
@@ -900,13 +900,13 @@ subroutine bcTurbWall(nn)
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
                         ii = max(2, min(i, iiMax))
 
-                        nu = rlv(2, i, j)/w(2, i, j, irho)
-                        tmpd = one/(rkwBeta1*(d2Wall(2, ii, jj)**2))
+                        nu = rlv(2, i, j) / w(2, i, j, irho)
+                        tmpd = one / (rkwBeta1 * (d2Wall(2, ii, jj)**2))
 
                         bmti1(i, j, itu1, itu1) = one
                         bmti1(i, j, itu2, itu2) = one
 
-                        bvti1(i, j, itu2) = two*60.0_realType*nu*tmpd
+                        bvti1(i, j, itu2) = two * 60.0_realType * nu * tmpd
                     end do
                 end do
 
@@ -919,13 +919,13 @@ subroutine bcTurbWall(nn)
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
                         ii = max(2, min(i, iiMax))
 
-                        nu = rlv(jl, i, j)/w(il, i, j, irho)
-                        tmpd = one/(rkwBeta1*(d2Wall(il, ii, jj)**2))
+                        nu = rlv(jl, i, j) / w(il, i, j, irho)
+                        tmpd = one / (rkwBeta1 * (d2Wall(il, ii, jj)**2))
 
                         bmti2(i, j, itu1, itu1) = one
                         bmti2(i, j, itu2, itu2) = one
 
-                        bvti2(i, j, itu2) = two*60.0_realType*nu*tmpd
+                        bvti2(i, j, itu2) = two * 60.0_realType * nu * tmpd
                     end do
                 end do
 
@@ -938,13 +938,13 @@ subroutine bcTurbWall(nn)
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
                         ii = max(2, min(i, iiMax))
 
-                        nu = rlv(i, 2, j)/w(i, 2, j, irho)
-                        tmpd = one/(rkwBeta1*(d2Wall(ii, 2, jj)**2))
+                        nu = rlv(i, 2, j) / w(i, 2, j, irho)
+                        tmpd = one / (rkwBeta1 * (d2Wall(ii, 2, jj)**2))
 
                         bmtj1(i, j, itu1, itu1) = one
                         bmtj1(i, j, itu2, itu2) = one
 
-                        bvtj1(i, j, itu2) = two*60.0_realType*nu*tmpd
+                        bvtj1(i, j, itu2) = two * 60.0_realType * nu * tmpd
                     end do
                 end do
 
@@ -957,13 +957,13 @@ subroutine bcTurbWall(nn)
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
                         ii = max(2, min(i, iiMax))
 
-                        nu = rlv(i, jl, j)/w(i, jl, j, irho)
-                        tmpd = one/(rkwBeta1*(d2Wall(ii, jl, jj)**2))
+                        nu = rlv(i, jl, j) / w(i, jl, j, irho)
+                        tmpd = one / (rkwBeta1 * (d2Wall(ii, jl, jj)**2))
 
                         bmtj2(i, j, itu1, itu1) = one
                         bmtj2(i, j, itu2, itu2) = one
 
-                        bvtj2(i, j, itu2) = two*60.0_realType*nu*tmpd
+                        bvtj2(i, j, itu2) = two * 60.0_realType * nu * tmpd
                     end do
                 end do
 
@@ -976,13 +976,13 @@ subroutine bcTurbWall(nn)
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
                         ii = max(2, min(i, iiMax))
 
-                        nu = rlv(i, j, 2)/w(i, j, 2, irho)
-                        tmpd = one/(rkwBeta1*(d2Wall(ii, jj, 2)**2))
+                        nu = rlv(i, j, 2) / w(i, j, 2, irho)
+                        tmpd = one / (rkwBeta1 * (d2Wall(ii, jj, 2)**2))
 
                         bmtk1(i, j, itu1, itu1) = one
                         bmtk1(i, j, itu2, itu2) = one
 
-                        bvtk1(i, j, itu2) = two*60.0_realType*nu*tmpd
+                        bvtk1(i, j, itu2) = two * 60.0_realType * nu * tmpd
                     end do
                 end do
 
@@ -995,13 +995,13 @@ subroutine bcTurbWall(nn)
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
                         ii = max(2, min(i, iiMax))
 
-                        nu = rlv(i, j, kl)/w(i, j, kl, irho)
-                        tmpd = one/(rkwBeta1*(d2Wall(ii, jj, kl)**2))
+                        nu = rlv(i, j, kl) / w(i, j, kl, irho)
+                        tmpd = one / (rkwBeta1 * (d2Wall(ii, jj, kl)**2))
 
                         bmtk2(i, j, itu1, itu1) = one
                         bmtk2(i, j, itu2, itu2) = one
 
-                        bvtk2(i, j, itu2) = two*60.0_realType*nu*tmpd
+                        bvtk2(i, j, itu2) = two * 60.0_realType * nu * tmpd
                     end do
                 end do
             end select
@@ -1108,11 +1108,11 @@ subroutine bcTurbWall(nn)
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
                     ii = max(2, min(i, iiMax))
 
-                    nu = rlv2(i, j)/ww2(i, j, irho)
-                    tmpd = one/(dd2Wall(ii - 1, jj - 1)**2)
-                    tmpe = two*nu*tmpd
-                    tmpf = -20.0_realType*(nu*tmpd)**2 &
-                           /abs(tmpe*ww2(i, j, itu1))
+                    nu = rlv2(i, j) / ww2(i, j, irho)
+                    tmpd = one / (dd2Wall(ii - 1, jj - 1)**2)
+                    tmpe = two * nu * tmpd
+                    tmpf = -20.0_realType * (nu * tmpd)**2 &
+                           / abs(tmpe * ww2(i, j, itu1))
                     if (rvfN == 6) tmpf = zero
 
                     bmt(i, j, itu1, itu1) = one
@@ -1120,8 +1120,8 @@ subroutine bcTurbWall(nn)
                     bmt(i, j, itu3, itu3) = one
                     bmt(i, j, itu4, itu4) = one
 
-                    bmt(i, j, itu2, itu1) = -two*tmpe
-                    bmt(i, j, itu4, itu3) = -two*tmpf
+                    bmt(i, j, itu2, itu1) = -two * tmpe
+                    bmt(i, j, itu4, itu3) = -two * tmpf
                 end do
             end do
 #endif
@@ -1268,7 +1268,7 @@ subroutine turbBCNSWall(secondHalo)
                         do l = nt1, nt2
                             w(1, i, j, l) = bvti1(i, j, l)
                             do m = nt1, nt2
-                                w(1, i, j, l) = w(1, i, j, l) - bmti1(i, j, l, m)*w(2, i, j, m)
+                                w(1, i, j, l) = w(1, i, j, l) - bmti1(i, j, l, m) * w(2, i, j, m)
                             end do
                             if (secondHalo) w(0, i, j, l) = w(1, i, j, l)
                         end do
@@ -1288,7 +1288,7 @@ subroutine turbBCNSWall(secondHalo)
                         do l = nt1, nt2
                             w(ie, i, j, l) = bvti2(i, j, l)
                             do m = nt1, nt2
-                                w(ie, i, j, l) = w(ie, i, j, l) - bmti2(i, j, l, m)*w(il, i, j, m)
+                                w(ie, i, j, l) = w(ie, i, j, l) - bmti2(i, j, l, m) * w(il, i, j, m)
                             end do
                             if (secondHalo) w(ib, i, j, l) = w(ie, i, j, l)
                         end do
@@ -1308,7 +1308,7 @@ subroutine turbBCNSWall(secondHalo)
                         do l = nt1, nt2
                             w(i, 1, j, l) = bvtj1(i, j, l)
                             do m = nt1, nt2
-                                w(i, 1, j, l) = w(i, 1, j, l) - bmtj1(i, j, l, m)*w(i, 2, j, m)
+                                w(i, 1, j, l) = w(i, 1, j, l) - bmtj1(i, j, l, m) * w(i, 2, j, m)
                             end do
                             if (secondHalo) w(i, 0, j, l) = w(i, 1, j, l)
                         end do
@@ -1328,7 +1328,7 @@ subroutine turbBCNSWall(secondHalo)
                         do l = nt1, nt2
                             w(i, je, j, l) = bvtj2(i, j, l)
                             do m = nt1, nt2
-                                w(i, je, j, l) = w(i, je, j, l) - bmtj2(i, j, l, m)*w(i, jl, j, m)
+                                w(i, je, j, l) = w(i, je, j, l) - bmtj2(i, j, l, m) * w(i, jl, j, m)
                             end do
                             if (secondHalo) w(i, jb, j, l) = w(i, je, j, l)
                         end do
@@ -1348,7 +1348,7 @@ subroutine turbBCNSWall(secondHalo)
                         do l = nt1, nt2
                             w(i, j, 1, l) = bvtk1(i, j, l)
                             do m = nt1, nt2
-                                w(i, j, 1, l) = w(i, j, 1, l) - bmtk1(i, j, l, m)*w(i, j, 2, m)
+                                w(i, j, 1, l) = w(i, j, 1, l) - bmtk1(i, j, l, m) * w(i, j, 2, m)
                             end do
                             if (secondHalo) w(i, j, 0, l) = w(i, j, 1, l)
                         end do
@@ -1368,7 +1368,7 @@ subroutine turbBCNSWall(secondHalo)
                         do l = nt1, nt2
                             w(i, j, ke, l) = bvtk2(i, j, l)
                             do m = nt1, nt2
-                                w(i, j, ke, l) = w(i, j, ke, l) - bmtk2(i, j, l, m)*w(i, j, kl, m)
+                                w(i, j, ke, l) = w(i, j, ke, l) - bmtk2(i, j, l, m) * w(i, j, kl, m)
                             end do
                             if (secondHalo) w(i, j, kb, l) = w(i, j, ke, l)
                         end do
@@ -1406,8 +1406,8 @@ function saRoughFact(i, j, k)
             return
         end if
 
-        saRoughFact = (ks(i, j, k) - d2wall(i, j, k)/0.03_realType)/ &
-                      (ks(i, j, k) + d2wall(i, j, k)/0.03_realType)
+        saRoughFact = (ks(i, j, k) - d2wall(i, j, k) / 0.03_realType) / &
+                      (ks(i, j, k) + d2wall(i, j, k) / 0.03_realType)
 
     end function saRoughFact
 
diff --git a/src/utils/utils.F90 b/src/utils/utils.F90
index 4dca871b4..0e20fe694 100644
--- a/src/utils/utils.F90
+++ b/src/utils/utils.F90
@@ -72,7 +72,7 @@ function TSbeta(degreePolBeta, coefPolBeta, &
 
         beta = coefPolBeta(0)
         do nn = 1, degreePolBeta
-            beta = beta + coefPolBeta(nn)*(t**nn)
+            beta = beta + coefPolBeta(nn) * (t**nn)
         end do
 
         ! Compute the fourier contribution. Again the cosine coefficient
@@ -80,9 +80,9 @@ function TSbeta(degreePolBeta, coefPolBeta, &
 
         beta = beta + cosCoefFourBeta(0)
         do nn = 1, degreeFourBeta
-            val = nn*omegaFourBeta*t
-            beta = beta + cosCoefFourbeta(nn)*cos(val) &
-                   + sinCoefFourbeta(nn)*sin(val)
+            val = nn * omegaFourBeta * t
+            beta = beta + cosCoefFourbeta(nn) * cos(val) &
+                   + sinCoefFourbeta(nn) * sin(val)
         end do
 
         ! Set TSBeta to phi.
@@ -135,16 +135,16 @@ function TSbetadot(degreePolBeta, coefPolBeta, &
 
         betadot = zero
         do nn = 1, degreePolBeta
-            betadot = betadot + nn*coefPolBeta(nn)*(t**(nn - 1))
+            betadot = betadot + nn * coefPolBeta(nn) * (t**(nn - 1))
         end do
 
         ! Compute the fourier contribution. Again the cosine coefficient
         ! of index 0 is defaulted to zero if not specified.
 
         do nn = 1, degreeFourBeta
-            val = nn*omegaFourBeta
-            betadot = betadot - val*cosCoefFourbeta(nn)*sin(val*t) &
-                      + val*sinCoefFourbeta(nn)*cos(val*t)
+            val = nn * omegaFourBeta
+            betadot = betadot - val * cosCoefFourbeta(nn) * sin(val * t) &
+                      + val * sinCoefFourbeta(nn) * cos(val * t)
         end do
 
         ! Set TSBeta to phi.
@@ -197,7 +197,7 @@ function TSMach(degreePolMach, coefPolMach, &
 
         intervalMach = coefPolMach(0)
         do nn = 1, degreePolMach
-            intervalMach = intervalMach + coefPolMach(nn)*(t**nn)
+            intervalMach = intervalMach + coefPolMach(nn) * (t**nn)
         end do
 
         ! Compute the fourier contribution. Again the cosine coefficient
@@ -205,9 +205,9 @@ function TSMach(degreePolMach, coefPolMach, &
 
         intervalMach = intervalMach + cosCoefFourMach(0)
         do nn = 1, degreeFourMach
-            val = nn*omegaFourMach*t
-            intervalMach = intervalMach + cosCoefFourmach(nn)*cos(val) &
-                           + sinCoefFourmach(nn)*sin(val)
+            val = nn * omegaFourMach * t
+            intervalMach = intervalMach + cosCoefFourmach(nn) * cos(val) &
+                           + sinCoefFourmach(nn) * sin(val)
         end do
         print *, 'inTSMach', intervalMach, nn, val, t
         ! Set TSMach to phi.
@@ -260,16 +260,16 @@ function TSMachdot(degreePolMach, coefPolMach, &
 
         machdot = zero
         do nn = 1, degreePolMach
-            machdot = machdot + nn*coefPolMach(nn)*(t**(nn - 1))
+            machdot = machdot + nn * coefPolMach(nn) * (t**(nn - 1))
         end do
 
         ! Compute the fourier contribution. Again the cosine coefficient
         ! of index 0 is defaulted to zero if not specified.
 
         do nn = 1, degreeFourMach
-            val = nn*omegaFourMach
-            machdot = machdot - val*cosCoefFourmach(nn)*sin(val*t) &
-                      + val*sinCoefFourmach(nn)*cos(val*t)
+            val = nn * omegaFourMach
+            machdot = machdot - val * cosCoefFourmach(nn) * sin(val * t) &
+                      + val * sinCoefFourmach(nn) * cos(val * t)
         end do
 
         ! Set TSMach to phi.
@@ -321,7 +321,7 @@ function TSalpha(degreePolAlpha, coefPolAlpha, &
         ! specified, the value of index 0 is set to zero automatically.
         alpha = coefPolAlpha(0)
         do nn = 1, degreePolAlpha
-            alpha = alpha + coefPolAlpha(nn)*(t**nn)
+            alpha = alpha + coefPolAlpha(nn) * (t**nn)
         end do
 
         ! Compute the fourier contribution. Again the cosine coefficient
@@ -329,9 +329,9 @@ function TSalpha(degreePolAlpha, coefPolAlpha, &
 
         alpha = alpha + cosCoefFourAlpha(0)
         do nn = 1, degreeFourAlpha
-            val = nn*omegaFourAlpha*t
-            alpha = alpha + cosCoefFouralpha(nn)*cos(val) &
-                    + sinCoefFouralpha(nn)*sin(val)
+            val = nn * omegaFourAlpha * t
+            alpha = alpha + cosCoefFouralpha(nn) * cos(val) &
+                    + sinCoefFouralpha(nn) * sin(val)
         end do
         !print *,'inTSalpha',alpha,nn,val,t
         ! Set TSAlpha to phi.
@@ -384,16 +384,16 @@ function TSalphadot(degreePolAlpha, coefPolAlpha, &
 
         alphadot = zero
         do nn = 1, degreePolAlpha
-            alphadot = alphadot + nn*coefPolAlpha(nn)*(t**(nn - 1))
+            alphadot = alphadot + nn * coefPolAlpha(nn) * (t**(nn - 1))
         end do
 
         ! Compute the fourier contribution. Again the cosine coefficient
         ! of index 0 is defaulted to zero if not specified.
 
         do nn = 1, degreeFourAlpha
-            val = nn*omegaFourAlpha
-            alphadot = alphadot - val*cosCoefFouralpha(nn)*sin(val*t) &
-                       + val*sinCoefFouralpha(nn)*cos(val*t)
+            val = nn * omegaFourAlpha
+            alphadot = alphadot - val * cosCoefFouralpha(nn) * sin(val * t) &
+                       + val * sinCoefFouralpha(nn) * cos(val * t)
         end do
 
         ! Set TSAlpha to phi.
@@ -451,21 +451,21 @@ function derivativeRigidRotAngle(degreePolRot, &
 
         dPhi = zero
         do nn = 1, degreePolRot
-            dPhi = dPhi + nn*coefPolRot(nn)*(t**(nn - 1))
+            dPhi = dPhi + nn * coefPolRot(nn) * (t**(nn - 1))
         end do
 
         ! Compute the fourier contribution.
 
         do nn = 1, degreeFourRot
-            val = nn*omegaFourRot
-            dPhi = dPhi - val*cosCoefFourRot(nn)*sin(val*t)
-            dPhi = dPhi + val*sinCoefFourRot(nn)*cos(val*t)
+            val = nn * omegaFourRot
+            dPhi = dPhi - val * cosCoefFourRot(nn) * sin(val * t)
+            dPhi = dPhi + val * sinCoefFourRot(nn) * cos(val * t)
         end do
 
         ! Set derivativeRigidRotAngle to dPhi. Multiply by timeRef
         ! to obtain the correct non-dimensional value.
 
-        derivativeRigidRotAngle = timeRef*dPhi
+        derivativeRigidRotAngle = timeRef * dPhi
 
     end function derivativeRigidRotAngle
 
@@ -666,17 +666,17 @@ subroutine rotMatrixRigidBody(tNew, tOld, rotationMatrix, &
         ! It is assumed that the sequence of rotation is first around the
         ! x-axis then around the y-axis and finally around the z-axis.
 
-        mNew(1, 1) = cosY*cosZ
-        mNew(2, 1) = cosY*sinZ
+        mNew(1, 1) = cosY * cosZ
+        mNew(2, 1) = cosY * sinZ
         mNew(3, 1) = -sinY
 
-        mNew(1, 2) = sinX*sinY*cosZ - cosX*sinZ
-        mNew(2, 2) = sinX*sinY*sinZ + cosX*cosZ
-        mNew(3, 2) = sinX*cosY
+        mNew(1, 2) = sinX * sinY * cosZ - cosX * sinZ
+        mNew(2, 2) = sinX * sinY * sinZ + cosX * cosZ
+        mNew(3, 2) = sinX * cosY
 
-        mNew(1, 3) = cosX*sinY*cosZ + sinX*sinZ
-        mNew(2, 3) = cosX*sinY*sinZ - sinX*cosZ
-        mNew(3, 3) = cosX*cosY
+        mNew(1, 3) = cosX * sinY * cosZ + sinX * sinZ
+        mNew(2, 3) = cosX * sinY * sinZ - sinX * cosZ
+        mNew(3, 3) = cosX * cosY
 
         ! Determine the rotation angle around the x-axis for the old
         ! time level and the corresponding values of the sine and cosine.
@@ -705,17 +705,17 @@ subroutine rotMatrixRigidBody(tNew, tOld, rotationMatrix, &
 
         ! Construct the transformation matrix at the old time level.
 
-        mOld(1, 1) = cosY*cosZ
-        mOld(2, 1) = cosY*sinZ
+        mOld(1, 1) = cosY * cosZ
+        mOld(2, 1) = cosY * sinZ
         mOld(3, 1) = -sinY
 
-        mOld(1, 2) = sinX*sinY*cosZ - cosX*sinZ
-        mOld(2, 2) = sinX*sinY*sinZ + cosX*cosZ
-        mOld(3, 2) = sinX*cosY
+        mOld(1, 2) = sinX * sinY * cosZ - cosX * sinZ
+        mOld(2, 2) = sinX * sinY * sinZ + cosX * cosZ
+        mOld(3, 2) = sinX * cosY
 
-        mOld(1, 3) = cosX*sinY*cosZ + sinX*sinZ
-        mOld(2, 3) = cosX*sinY*sinZ - sinX*cosZ
-        mOld(3, 3) = cosX*cosY
+        mOld(1, 3) = cosX * sinY * cosZ + sinX * sinZ
+        mOld(2, 3) = cosX * sinY * sinZ - sinX * cosZ
+        mOld(3, 3) = cosX * cosY
 
         ! Construct the transformation matrix between the new and the
         ! old time level. This is mNew*inverse(mOld). However the
@@ -723,9 +723,9 @@ subroutine rotMatrixRigidBody(tNew, tOld, rotationMatrix, &
 
         do j = 1, 3
             do i = 1, 3
-                rotationMatrix(i, j) = mNew(i, 1)*mOld(j, 1) &
-                                       + mNew(i, 2)*mOld(j, 2) &
-                                       + mNew(i, 3)*mOld(j, 3)
+                rotationMatrix(i, j) = mNew(i, 1) * mOld(j, 1) &
+                                       + mNew(i, 2) * mOld(j, 2) &
+                                       + mNew(i, 3) * mOld(j, 3)
             end do
         end do
 
@@ -741,9 +741,9 @@ subroutine rotMatrixRigidBody(tNew, tOld, rotationMatrix, &
         !  rotationPoint(3) = LRef*rotPoint(3) &
         !                   + MachGrid(3)*aInf*tOld/timeRef
 
-        rotationPoint(1) = LRef*rotPoint(1)
-        rotationPoint(2) = LRef*rotPoint(2)
-        rotationPoint(3) = LRef*rotPoint(3)
+        rotationPoint(1) = LRef * rotPoint(1)
+        rotationPoint(2) = LRef * rotPoint(2)
+        rotationPoint(3) = LRef * rotPoint(3)
 
     end subroutine rotMatrixRigidBody
 
@@ -796,21 +796,21 @@ function secondDerivativeRigidRotAngle(degreePolRot, &
 
         dPhi = zero
         do nn = 2, degreePolRot
-            dPhi = dPhi + (nn - 1)*nn*coefPolRot(nn)*(t**(nn - 2))
+            dPhi = dPhi + (nn - 1) * nn * coefPolRot(nn) * (t**(nn - 2))
         end do
 
         ! Compute the fourier contribution.
 
         do nn = 1, degreeFourRot
-            val = nn*omegaFourRot
-            dPhi = dPhi - val**2*sinCoefFourRot(nn)*sin(val*t)
-            dPhi = dPhi - val**2*cosCoefFourRot(nn)*cos(val*t)
+            val = nn * omegaFourRot
+            dPhi = dPhi - val**2 * sinCoefFourRot(nn) * sin(val * t)
+            dPhi = dPhi - val**2 * cosCoefFourRot(nn) * cos(val * t)
         end do
 
         ! Set derivativeRigidRotAngle to dPhi. Multiply by timeRef
         ! to obtain the correct non-dimensional value.
 
-        secondDerivativeRigidRotAngle = timeRef**2*dPhi
+        secondDerivativeRigidRotAngle = timeRef**2 * dPhi
 
     end function secondDerivativeRigidRotAngle
 
@@ -859,7 +859,7 @@ function rigidRotAngle(degreePolRot, coefPolRot, &
 
         phi = coefPolRot(0)
         do nn = 1, degreePolRot
-            phi = phi + coefPolRot(nn)*(t**nn)
+            phi = phi + coefPolRot(nn) * (t**nn)
         end do
 
         ! Compute the fourier contribution. Again the cosine coefficient
@@ -867,9 +867,9 @@ function rigidRotAngle(degreePolRot, coefPolRot, &
 
         phi = phi + cosCoefFourRot(0)
         do nn = 1, degreeFourRot
-            val = nn*omegaFourRot*t
-            phi = phi + cosCoefFourRot(nn)*cos(val) &
-                  + sinCoefFourRot(nn)*sin(val)
+            val = nn * omegaFourRot * t
+            phi = phi + cosCoefFourRot(nn) * cos(val) &
+                  + sinCoefFourRot(nn) * sin(val)
         end do
 
         ! Set rigidRotAngle to phi.
@@ -1191,17 +1191,17 @@ subroutine computeRootBendingMoment(cf, cm, bendingMoment)
         real(kind=realType), intent(out) :: bendingMoment
 
         !Subroutine Variables
-        real(kind=realType):: elasticMomentx, elasticMomenty, elasticMomentz
+        real(kind=realType) :: elasticMomentx, elasticMomenty, elasticMomentz
         bendingMoment = zero
         if (liftIndex == 2) then
             !z out wing sum momentx,momentz
-            elasticMomentx = cm(1) + cf(2)*(pointRefEC(3) - pointRef(3))/lengthref - cf(3)*(pointRefEC(2) - pointRef(2))/lengthref
-            elasticMomentz = cm(3) - cf(2)*(pointRefEC(1) - pointref(1))/lengthref + cf(1)*(pointRefEC(2) - pointRef(2))/lengthref
+  elasticMomentx = cm(1) + cf(2)*(pointRefEC(3) - pointRef(3))/lengthref - cf(3)*(pointRefEC(2) - pointRef(2))/lengthref
+  elasticMomentz = cm(3) - cf(2)*(pointRefEC(1) - pointref(1))/lengthref + cf(1)*(pointRefEC(2) - pointRef(2))/lengthref
             bendingMoment = sqrt(elasticMomentx**2 + elasticMomentz**2)
         elseif (liftIndex == 3) then
             !y out wing sum momentx,momenty
-            elasticMomentx = cm(1) + cf(3)*(pointrefEC(2) - pointRef(2))/lengthref + cf(3)*(pointrefEC(3) - pointref(3))/lengthref
-            elasticMomenty = cm(2) + cf(3)*(pointRefEC(1) - pointRef(1))/lengthref + cf(1)*(pointrefEC(3) - pointRef(3))/lengthref
+  elasticMomentx = cm(1) + cf(3)*(pointrefEC(2) - pointRef(2))/lengthref + cf(3)*(pointrefEC(3) - pointref(3))/lengthref
+  elasticMomenty = cm(2) + cf(3)*(pointRefEC(1) - pointRef(1))/lengthref + cf(1)*(pointrefEC(3) - pointRef(3))/lengthref
             bendingMoment = sqrt(elasticMomentx**2 + elasticMomenty**2)
         end if
 
@@ -1215,13 +1215,13 @@ subroutine computeLeastSquaresRegression(y, x, npts, m, b)
         use constants
         implicit none
         !Subroutine arguments
-        integer(kind=intType)::npts
-        real(kind=realType), dimension(npts)  :: x, y
-        real(kind=realType)::m, b
+        integer(kind=intType) :: npts
+        real(kind=realType), dimension(npts) :: x, y
+        real(kind=realType) :: m, b
 
         !local variables
-        real(kind=realType)::sumx, sumy, sumx2, sumxy
-        integer(kind=intType)::i
+        real(kind=realType) :: sumx, sumy, sumx2, sumxy
+        integer(kind=intType) :: i
 
         !begin execution
         sumx = 0.0
@@ -1232,12 +1232,12 @@ subroutine computeLeastSquaresRegression(y, x, npts, m, b)
 
             sumx = sumx + x(i)
             sumy = sumy + y(i)
-            sumx2 = sumx2 + x(i)*x(i)
-            sumxy = sumxy + x(i)*y(i)
+            sumx2 = sumx2 + x(i) * x(i)
+            sumxy = sumxy + x(i) * y(i)
         end do
 
-        m = ((npts*sumxy) - (sumy*sumx))/((npts*sumx2) - (sumx)**2)
-        b = (sumy*sumx2 - (sumx*sumxy))/((npts*sumx2) - (sumx)**2)
+        m = ((npts * sumxy) - (sumy * sumx)) / ((npts * sumx2) - (sumx)**2)
+        b = (sumy * sumx2 - (sumx * sumxy)) / ((npts * sumx2) - (sumx)**2)
 
     end subroutine computeLeastSquaresRegression
 
@@ -1263,29 +1263,29 @@ subroutine computeTSDerivatives(force, moment, coef0, dcdalpha, &
         !     Subroutine arguments.
         !
         real(kind=realType), dimension(3, nTimeIntervalsSpectral) :: force, moment
-        real(kind=realType), dimension(8):: dcdq, dcdqdot
-        real(kind=realType), dimension(8):: dcdalpha, dcdalphadot
-        real(kind=realType), dimension(8):: Coef0
+        real(kind=realType), dimension(8) :: dcdq, dcdqdot
+        real(kind=realType), dimension(8) :: dcdalpha, dcdalphadot
+        real(kind=realType), dimension(8) :: Coef0
 
         ! Working Variables
         real(kind=realType), dimension(nTimeIntervalsSpectral, 8) :: baseCoef
-        real(kind=realType), dimension(8) ::coef0dot
-        real(kind=realType), dimension(nTimeIntervalsSpectral, 8)::ResBaseCoef
-        real(kind=realType), dimension(nTimeIntervalsSpectral)  :: intervalAlpha, intervalAlphadot
-        real(kind=realType), dimension(nTimeIntervalsSpectral)  :: intervalMach, intervalMachdot
+        real(kind=realType), dimension(8) :: coef0dot
+        real(kind=realType), dimension(nTimeIntervalsSpectral, 8) :: ResBaseCoef
+        real(kind=realType), dimension(nTimeIntervalsSpectral) :: intervalAlpha, intervalAlphadot
+        real(kind=realType), dimension(nTimeIntervalsSpectral) :: intervalMach, intervalMachdot
         real(kind=realType), dimension(nSections) :: t
-        integer(kind=intType):: i, sps, nn
+        integer(kind=intType) :: i, sps, nn
         !speed of sound: for normalization of q derivatives
-        real(kind=realType)::a
+        real(kind=realType) :: a
         real(kind=realType) :: fact, factMoment
         ! Functions
-        real(kind=realType), dimension(nTimeIntervalsSpectral)  :: dPhix, dPhiy, dphiz
-        real(kind=realType), dimension(nTimeIntervalsSpectral)  :: dPhixdot, dPhiydot, dphizdot
-        real(kind=realType)::derivativeRigidRotAngle, secondDerivativeRigidRotAngle
+        real(kind=realType), dimension(nTimeIntervalsSpectral) :: dPhix, dPhiy, dphiz
+        real(kind=realType), dimension(nTimeIntervalsSpectral) :: dPhixdot, dPhiydot, dphizdot
+        real(kind=realType) :: derivativeRigidRotAngle, secondDerivativeRigidRotAngle
 
-        fact = two/(gammaInf*pInf*MachCoef**2 &
-                    *surfaceRef*LRef**2)
-        factMoment = fact/(lengthRef*LRef)
+        fact = two / (gammaInf * pInf * MachCoef**2 &
+                      * surfaceRef * LRef**2)
+        factMoment = fact / (lengthRef * LRef)
 
         if (TSqMode) then
 
@@ -1354,8 +1354,8 @@ subroutine computeTSDerivatives(force, moment, coef0, dcdalpha, &
 
                 if (equationMode == timeSpectral) then
                     do nn = 1, nSections
-                        t(nn) = t(nn) + (sps - 1)*sections(nn)%timePeriod &
-                                /(nTimeIntervalsSpectral*1.0)
+                        t(nn) = t(nn) + (sps - 1) * sections(nn)%timePeriod &
+                                / (nTimeIntervalsSpectral * 1.0)
                     end do
                 end if
 
@@ -1370,32 +1370,32 @@ subroutine computeTSDerivatives(force, moment, coef0, dcdalpha, &
                 ! THIS CALL IS WRONG!!!!
                 !call getDirAngle(velDirFreestream,liftDirection,liftIndex,alpha+intervalAlpha(sps), beta)
 
-                BaseCoef(sps, 1) = fact*( &
-                                   force(1, sps)*liftDirection(1) + &
-                                   force(2, sps)*liftDirection(2) + &
-                                   force(3, sps)*liftDIrection(3))
-                BaseCoef(sps, 2) = fact*( &
-                                   force(1, sps)*dragDirection(1) + &
-                                   force(2, sps)*dragDirection(2) + &
-                                   force(3, sps)*dragDIrection(3))
-                BaseCoef(sps, 3) = force(1, sps)*fact
-                BaseCoef(sps, 4) = force(2, sps)*fact
-                BaseCoef(sps, 5) = force(3, sps)*fact
-                BaseCoef(sps, 6) = moment(1, sps)*factMoment
-                BaseCoef(sps, 7) = moment(2, sps)*factMoment
-                BaseCoef(sps, 8) = moment(3, sps)*factMoment
+                BaseCoef(sps, 1) = fact * ( &
+                                   force(1, sps) * liftDirection(1) + &
+                                   force(2, sps) * liftDirection(2) + &
+                                   force(3, sps) * liftDIrection(3))
+                BaseCoef(sps, 2) = fact * ( &
+                                   force(1, sps) * dragDirection(1) + &
+                                   force(2, sps) * dragDirection(2) + &
+                                   force(3, sps) * dragDIrection(3))
+                BaseCoef(sps, 3) = force(1, sps) * fact
+                BaseCoef(sps, 4) = force(2, sps) * fact
+                BaseCoef(sps, 5) = force(3, sps) * fact
+                BaseCoef(sps, 6) = moment(1, sps) * factMoment
+                BaseCoef(sps, 7) = moment(2, sps) * factMoment
+                BaseCoef(sps, 8) = moment(3, sps) * factMoment
             end do
 
             !now compute dCl/dalpha
             do i = 1, 8
-                call computeLeastSquaresRegression(BaseCoef(:, i), intervalAlpha, nTimeIntervalsSpectral, dcdAlpha(i), coef0(i))
+        call computeLeastSquaresRegression(BaseCoef(:, i), intervalAlpha, nTimeIntervalsSpectral, dcdAlpha(i), coef0(i))
             end do
 
             ! now subtract off estimated cl,cmz and use remainder to compute
             ! clalphadot and cmzalphadot.
             do i = 1, 8
                 do sps = 1, nTimeIntervalsSpectral
-                    ResBaseCoef(sps, i) = BaseCoef(sps, i) - (dcdalpha(i)*intervalAlpha(sps) + Coef0(i))
+                    ResBaseCoef(sps, i) = BaseCoef(sps, i) - (dcdalpha(i) * intervalAlpha(sps) + Coef0(i))
                 end do
             end do
 
@@ -1404,8 +1404,8 @@ subroutine computeTSDerivatives(force, moment, coef0, dcdalpha, &
         call computeLeastSquaresRegression(ResBaseCoef(:, i), intervalAlphadot, nTimeIntervalsSpectral, dcdalphadot(i), Coef0dot(i))
             end do
 
-            a = sqrt(gammaInf*pInfDim/rhoInfDim)
-            dcdalphadot = dcdalphadot*2*(machGrid*a)/lengthRef
+            a = sqrt(gammaInf * pInfDim / rhoInfDim)
+            dcdalphadot = dcdalphadot * 2 * (machGrid * a) / lengthRef
 
         else
             call terminate('computeTSDerivatives', 'Not a valid stability motion')
@@ -1445,14 +1445,14 @@ subroutine getDirAngle(freeStreamAxis, liftAxis, liftIndex, alpha, beta)
         real(kind=realType), dimension(3), intent(in) :: freeStreamAxis
         real(kind=realType), dimension(3), intent(in) :: liftAxis
         real(kind=realType), intent(out) :: alpha, beta
-        integer(kind=intType), intent(out)::liftIndex
+        integer(kind=intType), intent(out) :: liftIndex
         !
         !     Local variables.
         !
         real(kind=realType) :: rnorm
-        integer(kind=intType):: flowIndex, i
+        integer(kind=intType) :: flowIndex, i
         real(kind=realType), dimension(3) :: freeStreamAxisNorm
-        integer(kind=intType) ::  temp
+        integer(kind=intType) :: temp
 
         ! Assume domoniate flow is x
 
@@ -1474,7 +1474,7 @@ subroutine getDirAngle(freeStreamAxis, liftAxis, liftIndex, alpha, beta)
         ! Normalize the freeStreamDirection vector.
         rnorm = sqrt(freeStreamAxis(1)**2 + freeStreamAxis(2)**2 + freeStreamAxis(3)**2)
         do i = 1, 3
-            freeStreamAxisNorm(i) = freeStreamAxis(i)/rnorm
+            freeStreamAxisNorm(i) = freeStreamAxis(i) / rnorm
         end do
 
         if (liftIndex == 2) then
@@ -1513,10 +1513,10 @@ subroutine stabilityDerivativeDriver
         !
         !     Local variables.
         !
-        real(kind=realType), dimension(8)::dcdalpha, dcdalphadot, dcdbeta, &
-                                            dcdbetadot, dcdMach, dcdMachdot
-        real(kind=realType), dimension(8)::dcdp, dcdpdot, dcdq, dcdqdot, dcdr, dcdrdot
-        real(kind=realType), dimension(8)::Coef0, Coef0dot
+        real(kind=realType), dimension(8) :: dcdalpha, dcdalphadot, dcdbeta, &
+                                             dcdbetadot, dcdMach, dcdMachdot
+        real(kind=realType), dimension(8) :: dcdp, dcdpdot, dcdq, dcdqdot, dcdr, dcdrdot
+        real(kind=realType), dimension(8) :: Coef0, Coef0dot
 
         !call computeTSDerivatives(coef0,dcdalpha,dcdalphadot,dcdq,dcdqdot)
 
@@ -1598,8 +1598,8 @@ subroutine setCoefTimeIntegrator
                     coefTimeALE(3) = -fourth
                     coefTimeALE(4) = -fourth
 
-                    coefMeshALE(1, 1) = half*(1.0_realType + 1.0_realType/sqrtthree)
-                    coefMeshALE(1, 2) = half*(1.0_realType - 1.0_realType/sqrtthree)
+                    coefMeshALE(1, 1) = half * (1.0_realType + 1.0_realType / sqrtthree)
+                    coefMeshALE(1, 2) = half * (1.0_realType - 1.0_realType / sqrtthree)
                     coefMeshALE(2, 1) = coefMeshALE(1, 2)
                     coefMeshALE(2, 2) = coefMeshALE(1, 1)
                 end if
@@ -1637,8 +1637,8 @@ subroutine setCoefTimeIntegrator
                 if (useALE .and. equationMode .eq. unsteady) then
                     coefTimeALE(1) = threefourth
                     coefTimeALE(2) = -fourth
-                    coefMeshALE(1, 1) = half*(1.0_realType + 1.0_realType/sqrtthree)
-                    coefMeshALE(1, 2) = half*(1.0_realType - 1.0_realType/sqrtthree)
+                    coefMeshALE(1, 1) = half * (1.0_realType + 1.0_realType / sqrtthree)
+                    coefMeshALE(1, 2) = half * (1.0_realType - 1.0_realType / sqrtthree)
                     coefMeshALE(2, 1) = coefMeshALE(1, 2)
                     coefMeshALE(2, 2) = coefMeshALE(1, 1)
                 end if
@@ -1646,10 +1646,10 @@ subroutine setCoefTimeIntegrator
                 nLevelsSet = 3
 
             case default   ! 3 or bigger.
-                coefTime(0) = 11.0_realType/6.0_realType
+                coefTime(0) = 11.0_realType / 6.0_realType
                 coefTime(1) = -3.0_realType
                 coefTime(2) = 1.5_realType
-                coefTime(3) = -1.0_realType/3.0_realType
+                coefTime(3) = -1.0_realType / 3.0_realType
 
                 ! These numbers are NOT correct
                 ! DO NOT use 3rd order ALE for now
@@ -1659,8 +1659,8 @@ subroutine setCoefTimeIntegrator
                     coefTimeALE(2) = threefourth
                     coefTimeALE(3) = -fourth
                     coefTimeALE(4) = -fourth
-                    coefMeshALE(1, 1) = half*(1.0_realType + 1.0_realType/sqrtthree)
-                    coefMeshALE(1, 2) = half*(1.0_realType - 1.0_realType/sqrtthree)
+                    coefMeshALE(1, 1) = half * (1.0_realType + 1.0_realType / sqrtthree)
+                    coefMeshALE(1, 2) = half * (1.0_realType - 1.0_realType / sqrtthree)
                     coefMeshALE(2, 1) = coefMeshALE(1, 2)
                     coefMeshALE(2, 2) = coefMeshALE(1, 1)
                     coefMeshALE(3, 1) = coefMeshALE(1, 2)
@@ -1716,9 +1716,9 @@ subroutine cross_prod(a, b, c)
         ! Outputs
         real(kind=realType), dimension(3), intent(out) :: c
 
-        c(1) = a(2)*b(3) - a(3)*b(2)
-        c(2) = a(3)*b(1) - a(1)*b(3)
-        c(3) = a(1)*b(2) - a(2)*b(1)
+        c(1) = a(2) * b(3) - a(3) * b(2)
+        c(2) = a(3) * b(1) - a(1) * b(3)
+        c(3) = a(1) * b(2) - a(2) * b(1)
 
     end subroutine cross_prod
 
@@ -1730,7 +1730,7 @@ subroutine siAngle(angle, mult, trans)
         !
         !      Subroutine arguments.
         !
-        integer, intent(in)              :: angle
+        integer, intent(in) :: angle
         real(kind=realType), intent(out) :: mult, trans
 
         ! Determine the situation we are having here.
@@ -1746,7 +1746,7 @@ subroutine siAngle(angle, mult, trans)
 
             ! Angle is given in degrees. A multiplication must be performed.
 
-            mult = pi/180.0_realType
+            mult = pi / 180.0_realType
             trans = zero
 
         else
@@ -1772,7 +1772,7 @@ subroutine siDensity(mass, len, mult, trans)
         !
         !      Subroutine arguments.
         !
-        integer, intent(in)              :: mass, len
+        integer, intent(in) :: mass, len
         real(kind=realType), intent(out) :: mult, trans
 
         ! Determine the situation we are having here.
@@ -1806,7 +1806,7 @@ subroutine siLen(len, mult, trans)
         !
         !      Subroutine arguments.
         !
-        integer, intent(in)              :: len
+        integer, intent(in) :: len
         real(kind=realType), intent(out) :: mult, trans
 
         ! Determine the situation we are having here.
@@ -1850,7 +1850,7 @@ subroutine siPressure(mass, len, time, mult, trans)
         !
         !      Subroutine arguments.
         !
-        integer, intent(in)              :: mass, len, time
+        integer, intent(in) :: mass, len, time
         real(kind=realType), intent(out) :: mult, trans
 
         ! Determine the situation we are having here.
@@ -1885,7 +1885,7 @@ subroutine siTemperature(temp, mult, trans)
         !
         !      Subroutine arguments.
         !
-        integer, intent(in)              :: temp
+        integer, intent(in) :: temp
         real(kind=realType), intent(out) :: mult, trans
 
         ! Determine the situation we are having here.
@@ -1911,7 +1911,7 @@ subroutine siTemperature(temp, mult, trans)
             ! Temperature is in Rankine. Only a multiplication needs to
             ! be performed.
 
-            mult = 5.0_realType/9.0_realType
+            mult = 5.0_realType / 9.0_realType
             trans = zero
 
         case (Fahrenheit)
@@ -1919,7 +1919,7 @@ subroutine siTemperature(temp, mult, trans)
             ! Temperature is in Fahrenheit. Both a multiplication and an
             ! offset must be applied.
 
-            mult = 5.0_realType/9.0_realType
+            mult = 5.0_realType / 9.0_realType
             trans = 255.382
 
         case default
@@ -1946,8 +1946,8 @@ subroutine siTurb(mass, len, time, temp, turbName, mult, trans)
         !
         !      Subroutine arguments.
         !
-        integer, intent(in)              :: mass, len, time, temp
-        character(len=*), intent(in)     :: turbName
+        integer, intent(in) :: mass, len, time, temp
+        character(len=*), intent(in) :: turbName
         real(kind=realType), intent(out) :: mult, trans
 
         ! Determine the situation we are having here.
@@ -1983,7 +1983,7 @@ subroutine siVelocity(length, time, mult, trans)
         !
         !      Subroutine arguments.
         !
-        integer, intent(in)              :: length, time
+        integer, intent(in) :: length, time
         real(kind=realType), intent(out) :: mult, trans
 
         ! Determine the situation we are having here.
@@ -2443,7 +2443,7 @@ subroutine maxEddyv(eddyvisMax)
                     ! Compute the local viscosity ratio and take the maximum
                     ! with the currently stored value.
 
-                    eddyvis = rev(i, j, k)/rlv(i, j, k)
+                    eddyvis = rev(i, j, k) / rlv(i, j, k)
                     eddyvisMax = max(eddyvisMax, eddyvis)
 
                 end do
@@ -2486,7 +2486,7 @@ subroutine maxHdiffMach(hdiffMax, MachMax)
 
         ! Set the free stream value of the total enthalpy.
 
-        hInf = (wInf(irhoE) + pInfCorr)/rhoInf
+        hInf = (wInf(irhoE) + pInfCorr) / rhoInf
 
         ! Loop over the owned cells of this block.
 
@@ -2496,9 +2496,9 @@ subroutine maxHdiffMach(hdiffMax, MachMax)
 
                     ! Compute the local total enthalpy and Mach number squared.
 
-                    hdiff = abs((w(i, j, k, irhoE) + p(i, j, k))/w(i, j, k, irho) - hInf)
+                    hdiff = abs((w(i, j, k, irhoE) + p(i, j, k)) / w(i, j, k, irho) - hInf)
                     Mach2 = (w(i, j, k, ivx)**2 + w(i, j, k, ivy)**2 &
-                             + w(i, j, k, ivz)**2)*w(i, j, k, irho)/(gamma(i, j, k)*p(i, j, k))
+                             + w(i, j, k, ivz)**2) * w(i, j, k, irho) / (gamma(i, j, k) * p(i, j, k))
 
                     ! Determine the maximum of these values and the
                     ! currently stored maximum values.
@@ -2515,7 +2515,7 @@ subroutine maxHdiffMach(hdiffMax, MachMax)
         ! total enthalpy difference.
 
         MachMax = sqrt(MachMax)
-        hdiffMax = hdiffMax/hInf
+        hdiffMax = hdiffMax / hInf
 
     end subroutine maxHdiffMach
 
@@ -2928,8 +2928,8 @@ subroutine reallocateInteger2(intArray, newSize1, newSize2, &
 
         ! Determine the total new and old size.
 
-        newSize = newSize1*newSize2
-        oldSize = oldSize1*oldSize2
+        newSize = newSize1 * newSize2
+        oldSize = oldSize1 * oldSize2
 
         ! Determine for each of the 2 components the minimum of the new
         ! and the old size. Multiply these values to obtain the total
@@ -2938,7 +2938,7 @@ subroutine reallocateInteger2(intArray, newSize1, newSize2, &
         nn1 = min(newSize1, oldSize1)
         nn2 = min(newSize2, oldSize2)
 
-        nn = nn1*nn2
+        nn = nn1 * nn2
 
         ! Set the pointer for tmp.
 
@@ -3064,8 +3064,8 @@ subroutine reallocateReal2(realArray, newSize1, newSize2, &
 
         ! Determine the total new and old size.
 
-        newSize = newSize1*newSize2
-        oldSize = oldSize1*oldSize2
+        newSize = newSize1 * newSize2
+        oldSize = oldSize1 * oldSize2
 
         ! Determine for each of the 2 components the minimum of the new
         ! and the old size. Multiply these values to obtain the total
@@ -3074,7 +3074,7 @@ subroutine reallocateReal2(realArray, newSize1, newSize2, &
         nn1 = min(newSize1, oldSize1)
         nn2 = min(newSize2, oldSize2)
 
-        nn = nn1*nn2
+        nn = nn1 * nn2
 
         ! Set the pointer for tmp.
 
@@ -3176,8 +3176,8 @@ subroutine setBufferSizes(level, sps, determine1to1Buf, determineOversetBuf)
             ! Multiply sendSize and recvSize with the number of variables to
             ! be communicated.
 
-            sendSize = sendSize*nVarComm
-            recvSize = recvSize*nVarComm
+            sendSize = sendSize * nVarComm
+            recvSize = recvSize * nVarComm
 
             ! Store the maximum of the current values and the old values
             ! in sendBufferSize1to1 and recvBufferSize1to1.
@@ -3202,8 +3202,8 @@ subroutine setBufferSizes(level, sps, determine1to1Buf, determineOversetBuf)
             ! Multiply sendSize and recvSize with the number of variables to
             ! be communicated.
 
-            sendSize = sendSize*nVarComm
-            recvSize = recvSize*nVarComm
+            sendSize = sendSize * nVarComm
+            recvSize = recvSize * nVarComm
 
             ! Store the maximum of the current values and the old values.
 
@@ -3692,27 +3692,27 @@ subroutine spectralInterpolCoef(nsps, t, alpScal, alpMat)
         ! coefficients. Note that the loop starts at 0.
 
         if (mod(nsps, 2) .eq. 0) then
-            nhalfM1 = nsps/2 - 1
+            nhalfM1 = nsps / 2 - 1
         else
-            nhalfM1 = (nsps - 1)/2
+            nhalfM1 = (nsps - 1) / 2
         end if
 
-        nspsInv = one/real(nsps, realType)
+        nspsInv = one / real(nsps, realType)
 
         do j = 0, (nsps - 1)
             if (mod(nsps, 2) .eq. 0) then
-                alpScal(j) = one + cos(j*pi)*cos(nsps*pi*t)
+                alpScal(j) = one + cos(j * pi) * cos(nsps * pi * t)
             else
-                alpScal(j) = one + cos(j*pi*(nsps + 1)/nsps)*cos((nsps + 1)*pi*t)
+                alpScal(j) = one + cos(j * pi * (nsps + 1) / nsps) * cos((nsps + 1) * pi * t)
             end if
 
             do r = 1, nhalfM1
                 alpScal(j) = alpScal(j) &
-                             + two*cos(r*j*two*pi*nspsInv)*cos(r*two*pi*t) &
-                             + two*sin(r*j*two*pi*nspsInv)*sin(r*two*pi*t)
+                             + two * cos(r * j * two * pi * nspsInv) * cos(r * two * pi * t) &
+                             + two * sin(r * j * two * pi * nspsInv) * sin(r * two * pi * t)
             end do
 
-            alpScal(j) = alpScal(j)*nspsInv
+            alpScal(j) = alpScal(j) * nspsInv
 
         end do
         !
@@ -3728,14 +3728,14 @@ subroutine spectralInterpolCoef(nsps, t, alpScal, alpMat)
             ! Note that also t must be adapted, because t is a ratio between
             ! the actual time and the periodic time.
 
-            m = nsps*sections(nn)%nSlices
+            m = nsps * sections(nn)%nSlices
             if (mod(m, 2) .eq. 0) then
-                mhalfM1 = m/2 - 1
+                mhalfM1 = m / 2 - 1
             else
-                mhalfM1 = (m - 1)/2
+                mhalfM1 = (m - 1) / 2
             end if
-            mInv = one/real(m, realType)
-            tm = t/real(sections(nn)%nSlices, realType)
+            mInv = one / real(m, realType)
+            tm = t / real(sections(nn)%nSlices, realType)
 
             ! Loop over the number of spectral solutions.
 
@@ -3777,28 +3777,28 @@ subroutine spectralInterpolCoef(nsps, t, alpScal, alpMat)
                     ! Determine the index j, the index of alp in the entire
                     ! wheel.
 
-                    j = jj + p*nsps
+                    j = jj + p * nsps
 
                     ! Compute the scalar coefficient alp of the index j in
                     ! the entire wheel.
 
                     if (mod(m, 2) .eq. 0) then
-                        alp = one + cos(j*pi)*cos(m*pi*tm)
+                        alp = one + cos(j * pi) * cos(m * pi * tm)
                     else
-                        alp = one + cos(j*pi*(m + 1)/m)*cos((m + 1)*pi*tm)
+                        alp = one + cos(j * pi * (m + 1) / m) * cos((m + 1) * pi * tm)
                     end if
                     do r = 1, mhalfM1
-                        alp = alp + two*cos(r*j*two*pi*mInv)*cos(r*two*pi*tm) &
-                              + two*sin(r*j*two*pi*mInv)*sin(r*two*pi*tm)
+                        alp = alp + two * cos(r * j * two * pi * mInv) * cos(r * two * pi * tm) &
+                              + two * sin(r * j * two * pi * mInv) * sin(r * two * pi * tm)
                     end do
 
-                    alp = alp*mInv
+                    alp = alp * mInv
 
                     ! Update the matrix coefficient.
 
                     do r = 1, 3
                         do j = 1, 3
-                            alpMat(nn, jj, r, j) = alpMat(nn, jj, r, j) + alp*rp(r, j)
+                            alpMat(nn, jj, r, j) = alpMat(nn, jj, r, j) + alp * rp(r, j)
                         end do
                     end do
 
@@ -3807,9 +3807,9 @@ subroutine spectralInterpolCoef(nsps, t, alpScal, alpMat)
 
                     do r = 1, 3
                         do j = 1, 3
-                            tmp(r, j) = rp(r, 1)*rotMatrixSpectral(nn, 1, j) &
-                                        + rp(r, 2)*rotMatrixSpectral(nn, 2, j) &
-                                        + rp(r, 3)*rotMatrixSpectral(nn, 3, j)
+                            tmp(r, j) = rp(r, 1) * rotMatrixSpectral(nn, 1, j) &
+                                        + rp(r, 2) * rotMatrixSpectral(nn, 2, j) &
+                                        + rp(r, 3) * rotMatrixSpectral(nn, 3, j)
                         end do
                     end do
 
@@ -4044,9 +4044,9 @@ subroutine getLiftDirFromSymmetry(liftDir)
                             xx(bcData(mm)%inEnd, bcData(mm)%jnBeg, :)
 
                     ! Cross Product
-                    cp(1) = (v1(2)*v2(3) - v1(3)*v2(2))
-                    cp(2) = (v1(3)*v2(1) - v1(1)*v2(3))
-                    cp(3) = (v1(1)*v2(2) - v1(2)*v2(1))
+                    cp(1) = (v1(2) * v2(3) - v1(3) * v2(2))
+                    cp(2) = (v1(3) * v2(1) - v1(1) * v2(3))
+                    cp(3) = (v1(1) * v2(2) - v1(2) * v2(1))
 
                     ! Only interesed in abs values
                     cp = abs(cp)
@@ -5714,7 +5714,7 @@ subroutine EChk(errorcode, file, line)
         integer(kind=intType), intent(in) :: errorcode
         character(len=*), intent(in) :: file
         integer(kind=intType), intent(in) :: line
-        integer::ierr
+        integer :: ierr
         character(len=maxStringLen) :: errorCodeFormat, errorLineFormat
 
         errorCodeFormat = "(2(A, I2,)"
@@ -5779,8 +5779,8 @@ logical function EulerWallsPresent()
         !      Local variables.
         !
         integer(kind=intType) :: nn, i
-        integer               :: ierr
-        logical               :: localEulerWalls
+        integer :: ierr
+        logical :: localEulerWalls
 
         ! Initialize localEulerWalls to .false. and loop over the
         ! boundary subfaces of the blocks to see if Euler walls are
@@ -5834,7 +5834,7 @@ subroutine allocConvArrays(nIterTot)
         !
         integer :: ierr
 
-        integer(kind=intType):: nSolverMon ! number of solver monitor variables
+        integer(kind=intType) :: nSolverMon ! number of solver monitor variables
 
         ! Return immediately if the convergence history (of the inner
         ! iterations) does not need to be stored. This logical can
@@ -5916,8 +5916,8 @@ subroutine getMonitorVariableNames(nvar, monitor_variables)
         implicit none
 
         ! save the monitor variable names into a new array
-        integer(kind=intType), intent(in):: nvar
-        character, dimension(nvar, maxCGNSNameLen), intent(out):: monitor_variables
+        integer(kind=intType), intent(in) :: nvar
+        character, dimension(nvar, maxCGNSNameLen), intent(out) :: monitor_variables
 
         ! working variables
         character(len=maxCGNSNameLen) :: var_name
@@ -5945,8 +5945,8 @@ subroutine getSolverTypeArray(niter, nsps, type_array)
         implicit none
 
         ! save the monitor variable names into a new array
-        integer(kind=intType), intent(in):: niter, nsps
-        character, dimension(0:niter, ntimeintervalsspectral, maxIterTypelen), intent(out):: type_array
+        integer(kind=intType), intent(in) :: niter, nsps
+        character, dimension(0:niter, ntimeintervalsspectral, maxIterTypelen), intent(out) :: type_array
 
         ! working variables
         character(len=maxIterTypelen) :: type_name
@@ -6007,7 +6007,7 @@ subroutine convergenceHeader
 
 #ifndef USE_COMPLEX
             ! for the real version this is easy
-            nCharWrite = nCharWrite + nMon*(fieldWidthLarge + 1)
+            nCharWrite = nCharWrite + nMon * (fieldWidthLarge + 1)
 #else
             ! for complex we need to differentiate between residuals and functionals
             do i = 1, nMon
@@ -6366,13 +6366,13 @@ subroutine sumResiduals(nn, mm)
             do j = 2, jl
                 do i = 2, il
 #ifndef USE_COMPLEX
-                    monLoc(mm) = monLoc(mm) + (dw(i, j, k, nn)/vol(i, j, k))**2
+                    monLoc(mm) = monLoc(mm) + (dw(i, j, k, nn) / vol(i, j, k))**2
 #else
                     ! TODO squaring the complex residual when its order 1e-200 underflows and we need a better approach here
                     ! we need to square and sum the real and complex parts separately
                     monLoc(mm) = monLoc(mm) + &
-                                 cmplx((real(dw(i, j, k, nn)/vol(i, j, k)))**2, &
-                                       (aimag(dw(i, j, k, nn)/vol(i, j, k)))**2)
+                                 cmplx((real(dw(i, j, k, nn) / vol(i, j, k)))**2, &
+                                       (aimag(dw(i, j, k, nn) / vol(i, j, k)))**2)
 #endif
                 end do
             end do
@@ -6407,27 +6407,27 @@ subroutine sumAllResiduals(mm)
             do j = 2, jl
                 do i = 2, il
                     state_sum = 0.0
-                    ovv = one/vol(i, j, k)
+                    ovv = one / vol(i, j, k)
                     do l = 1, nwf
 #ifndef USE_COMPLEX
-                        state_sum = state_sum + (dw(i, j, k, l)*ovv)**2
+                        state_sum = state_sum + (dw(i, j, k, l) * ovv)**2
 #else
                         ! TODO squaring the complex residual when its order 1e-200 underflows and we need a better approach here
                         ! we need to square and sum the real and complex parts separately
                         state_sum = state_sum + &
-                                    cmplx((real(dw(i, j, k, l)*ovv))**2, &
-                                          (aimag(dw(i, j, k, l)*ovv))**2)
+                                    cmplx((real(dw(i, j, k, l) * ovv))**2, &
+                                          (aimag(dw(i, j, k, l) * ovv))**2)
 #endif
                     end do
                     do l = nt1, nt2
                         ! l-nt1+1 will index the turbResScale properly
 #ifndef USE_COMPLEX
-                        state_sum = state_sum + (dw(i, j, k, l)*ovv*turbResScale(l - nt1 + 1))**2
+                        state_sum = state_sum + (dw(i, j, k, l) * ovv * turbResScale(l - nt1 + 1))**2
 #else
                         ! we need to square and sum the real and complex parts separately
                         state_sum = state_sum + &
-                                    cmplx((real(dw(i, j, k, l)*ovv*turbResScale(l - nt1 + 1)))**2, &
-                                          (aimag(dw(i, j, k, l)*ovv*turbResScale(l - nt1 + 1)))**2)
+                                    cmplx((real(dw(i, j, k, l) * ovv * turbResScale(l - nt1 + 1)))**2, &
+                                          (aimag(dw(i, j, k, l) * ovv * turbResScale(l - nt1 + 1)))**2)
 #endif
                     end do
                     monLoc(mm) = monLoc(mm) + state_sum
@@ -6449,7 +6449,7 @@ subroutine unsteadyHeader
         !
         !      Local variables
         !
-        character(len=7)  :: integerString
+        character(len=7) :: integerString
         character(len=12) :: realString
 
         ! Write the time step number to the integer string and the
@@ -6498,7 +6498,7 @@ subroutine getCellCenters(level, n, xCen)
                         do i = 2, il
                             ii = ii + 1
 
-                            xCen(:, ii) = eighth*( &
+                            xCen(:, ii) = eighth * ( &
                                           x(i - 1, j - 1, k - 1, :) + &
                                           x(i, j - 1, k - 1, :) + &
                                           x(i - 1, j, k - 1, :) + &
diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index c8fa38eb2..0ec6baebf 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -55,10 +55,10 @@ subroutine updateWallDistancesQuickly(nn, level, sps)
 
 #ifdef TAPENADE_REVERSE
         !$AD II-LOOP
-        do ii = 0, nx*ny*nz - 1
+        do ii = 0, nx * ny * nz - 1
             i = mod(ii, nx) + 2
-            j = mod(ii/nx, ny) + 2
-            k = ii/(nx*ny) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 2
 #else
             do k = 2, kl
                 do j = 2, jl
@@ -83,26 +83,26 @@ subroutine updateWallDistancesQuickly(nn, level, sps)
                         ! functions o to get target: (CCW ordering remember!)
 
                         xp(:) = &
-                            (one - u)*(one - v)*xSurf(3*(ind(1) - 1) + 1:3*ind(1)) + &
-                            (u)*(one - v)*xSurf(3*(ind(2) - 1) + 1:3*ind(2)) + &
-                            (u)*(v)*xSurf(3*(ind(3) - 1) + 1:3*ind(3)) + &
-                            (one - u)*(v)*xSurf(3*(ind(4) - 1) + 1:3*ind(4))
+                            (one - u) * (one - v) * xSurf(3 * (ind(1) - 1) + 1:3 * ind(1)) + &
+                            (u) * (one - v) * xSurf(3 * (ind(2) - 1) + 1:3 * ind(2)) + &
+                            (u) * (v) * xSurf(3 * (ind(3) - 1) + 1:3 * ind(3)) + &
+                            (one - u) * (v) * xSurf(3 * (ind(4) - 1) + 1:3 * ind(4))
 
                         ! Get the cell center
-                        xc(1) = eighth*(x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
-                                        + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
-                                        + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
-                                        + x(i - 1, j, k, 1) + x(i, j, k, 1))
+                        xc(1) = eighth * (x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
+                                          + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
+                                          + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
+                                          + x(i - 1, j, k, 1) + x(i, j, k, 1))
 
-                        xc(2) = eighth*(x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
-                                        + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
-                                        + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
-                                        + x(i - 1, j, k, 2) + x(i, j, k, 2))
+                        xc(2) = eighth * (x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
+                                          + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
+                                          + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
+                                          + x(i - 1, j, k, 2) + x(i, j, k, 2))
 
-                        xc(3) = eighth*(x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
-                                        + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
-                                        + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
-                                        + x(i - 1, j, k, 3) + x(i, j, k, 3))
+                        xc(3) = eighth * (x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
+                                          + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
+                                          + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
+                                          + x(i - 1, j, k, 3) + x(i, j, k, 3))
 
                         ! Now we have the two points...just take the norm of the
                         ! distance between them
@@ -184,7 +184,7 @@ subroutine updateWallRoughness()
                             cycle
                         end if
                         nCellsLocal = nCellsLocal + &
-                                      (bcData(mm)%inEnd - bcData(mm)%inBeg)*(bcData(mm)%jnEnd - bcData(mm)%jnBeg)
+                                      (bcData(mm)%inEnd - bcData(mm)%inBeg) * (bcData(mm)%jnEnd - bcData(mm)%jnBeg)
                     end do
                 end do
 
@@ -260,8 +260,8 @@ subroutine updateWallRoughness()
                                     k = kl
                                 end select
 
-                                cellIdLocal(iCell) = nCellBLockOffset(level, nn)*nTimeIntervalsSpectral + nx*ny*nz*(sps - 1) + &
-                                                     (i - 2) + (j - 2)*nx + (k - 2)*nx*ny
+                cellIdLocal(iCell) = nCellBLockOffset(level, nn) * nTimeIntervalsSpectral + nx * ny * nz * (sps - 1) + &
+                                                     (i - 2) + (j - 2) * nx + (k - 2) * nx * ny
                             end do
                         end do
                     end do
@@ -299,7 +299,7 @@ subroutine updateWallRoughness()
                                 end if
 
                                 ! find the index of the surface cell (Requires gfortran > 9.0 )
-                                iCell = findloc(cellIdGlobal, flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k), DIM=1)
+                              iCell = findloc(cellIdGlobal, flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k), DIM=1)
 
                                 if (iCell == 0) then
                                     write (errorMessage, 100) &
@@ -352,7 +352,7 @@ subroutine computeWallDistance(level, allocMem)
         !      Subroutine arguments.
         !
         integer(kind=intType), intent(in) :: level
-        logical, intent(in)               :: allocMem
+        logical, intent(in) :: allocMem
         !
         !      Local variables.
         !
@@ -627,27 +627,27 @@ subroutine computeNormalSpacing(level, sps)
                         ! Compute the vector from centroid of the adjacent cell
                         ! to the centroid of the face.
 
-                        vecx = eighth*(xFace(i - 1, j - 1, 1) + xFace(i - 1, j, 1) &
-                                       + xFace(i, j - 1, 1) + xFace(i, j, 1) &
-                                       - xInt(i - 1, j - 1, 1) - xInt(i - 1, j, 1) &
-                                       - xInt(i, j - 1, 1) - xInt(i, j, 1))
+                        vecx = eighth * (xFace(i - 1, j - 1, 1) + xFace(i - 1, j, 1) &
+                                         + xFace(i, j - 1, 1) + xFace(i, j, 1) &
+                                         - xInt(i - 1, j - 1, 1) - xInt(i - 1, j, 1) &
+                                         - xInt(i, j - 1, 1) - xInt(i, j, 1))
 
-                        vecy = eighth*(xFace(i - 1, j - 1, 2) + xFace(i - 1, j, 2) &
-                                       + xFace(i, j - 1, 2) + xFace(i, j, 2) &
-                                       - xInt(i - 1, j - 1, 2) - xInt(i - 1, j, 2) &
-                                       - xInt(i, j - 1, 2) - xInt(i, j, 2))
+                        vecy = eighth * (xFace(i - 1, j - 1, 2) + xFace(i - 1, j, 2) &
+                                         + xFace(i, j - 1, 2) + xFace(i, j, 2) &
+                                         - xInt(i - 1, j - 1, 2) - xInt(i - 1, j, 2) &
+                                         - xInt(i, j - 1, 2) - xInt(i, j, 2))
 
-                        vecz = eighth*(xFace(i - 1, j - 1, 3) + xFace(i - 1, j, 3) &
-                                       + xFace(i, j - 1, 3) + xFace(i, j, 3) &
-                                       - xInt(i - 1, j - 1, 3) - xInt(i - 1, j, 3) &
-                                       - xInt(i, j - 1, 3) - xInt(i, j, 3))
+                        vecz = eighth * (xFace(i - 1, j - 1, 3) + xFace(i - 1, j, 3) &
+                                         + xFace(i, j - 1, 3) + xFace(i, j, 3) &
+                                         - xInt(i - 1, j - 1, 3) - xInt(i - 1, j, 3) &
+                                         - xInt(i, j - 1, 3) - xInt(i, j, 3))
 
                         ! Compute the projection of this vector onto the normal
                         ! vector of the face. For a decent mesh there will not be
                         ! much of a difference between the projection and the
                         ! original mesh, but it does not hurt to do it.
 
-                        dot = nnx*vecx + nny*vecy + nnz*vecz
+                        dot = nnx * vecx + nny * vecy + nnz * vecz
 
                         ! As (nnx,nny,nnz) is a unit vector the distance to the
                         ! wall of the first cell center is given by the absolute
@@ -681,7 +681,7 @@ subroutine initWallDistance(level, sps, allocMem)
         !      Subroutine arguments.
         !
         integer(kind=intType), intent(in) :: level, sps
-        logical, intent(in)               :: allocMem
+        logical, intent(in) :: allocMem
         !
         !      Local variables.
         !
@@ -792,8 +792,8 @@ subroutine determineDistance(level, sps)
         nCellPer = 0
 
         do nn = 1, nDom
-            ll = flowDoms(nn, level, sps)%nx*flowDoms(nn, level, sps)%ny &
-                 *flowDoms(nn, level, sps)%nz
+            ll = flowDoms(nn, level, sps)%nx * flowDoms(nn, level, sps)%ny &
+                 * flowDoms(nn, level, sps)%nz
             nCell = nCell + ll
 
             mm = flowDoms(nn, level, sps)%sectionID
@@ -834,22 +834,22 @@ subroutine determineDistance(level, sps)
                         ! Compute the coordinates of the cell center relative
                         ! to the rotation center of this section.
 
-                        xc(1) = eighth*(x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
-                                        + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
-                                        + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
-                                        + x(i - 1, j, k, 1) + x(i, j, k, 1)) &
+                        xc(1) = eighth * (x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
+                                          + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
+                                          + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
+                                          + x(i - 1, j, k, 1) + x(i, j, k, 1)) &
                                 - sections(ll)%rotCenter(1)
 
-                        xc(2) = eighth*(x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
-                                        + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
-                                        + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
-                                        + x(i - 1, j, k, 2) + x(i, j, k, 2)) &
+                        xc(2) = eighth * (x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
+                                          + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
+                                          + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
+                                          + x(i - 1, j, k, 2) + x(i, j, k, 2)) &
                                 - sections(ll)%rotCenter(2)
 
-                        xc(3) = eighth*(x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
-                                        + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
-                                        + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
-                                        + x(i - 1, j, k, 3) + x(i, j, k, 3)) &
+                        xc(3) = eighth * (x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
+                                          + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
+                                          + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
+                                          + x(i - 1, j, k, 3) + x(i, j, k, 3)) &
                                 - sections(ll)%rotCenter(3)
 
                         ! Apply the periodic transformation for this section to
@@ -857,19 +857,19 @@ subroutine determineDistance(level, sps)
                         ! in the appropriate place in coor.
 
                         mm = mm + 1
-                        coor(1, mm) = rotMatrixSections(ll, 1, 1)*xc(1) &
-                                      + rotMatrixSections(ll, 1, 2)*xc(2) &
-                                      + rotMatrixSections(ll, 1, 3)*xc(3) &
+                        coor(1, mm) = rotMatrixSections(ll, 1, 1) * xc(1) &
+                                      + rotMatrixSections(ll, 1, 2) * xc(2) &
+                                      + rotMatrixSections(ll, 1, 3) * xc(3) &
                                       + sections(ll)%rotCenter(1)
 
-                        coor(2, mm) = rotMatrixSections(ll, 2, 1)*xc(1) &
-                                      + rotMatrixSections(ll, 2, 2)*xc(2) &
-                                      + rotMatrixSections(ll, 2, 3)*xc(3) &
+                        coor(2, mm) = rotMatrixSections(ll, 2, 1) * xc(1) &
+                                      + rotMatrixSections(ll, 2, 2) * xc(2) &
+                                      + rotMatrixSections(ll, 2, 3) * xc(3) &
                                       + sections(ll)%rotCenter(2)
 
-                        coor(3, mm) = rotMatrixSections(ll, 3, 1)*xc(1) &
-                                      + rotMatrixSections(ll, 3, 2)*xc(2) &
-                                      + rotMatrixSections(ll, 3, 3)*xc(3) &
+                        coor(3, mm) = rotMatrixSections(ll, 3, 1) * xc(1) &
+                                      + rotMatrixSections(ll, 3, 2) * xc(2) &
+                                      + rotMatrixSections(ll, 3, 3) * xc(3) &
                                       + sections(ll)%rotCenter(3)
 
                         ! Initialize the distance squared, because this is an
@@ -911,8 +911,8 @@ subroutine determineDistance(level, sps)
         ! Loop over the domains and find the periodic ones.
 
         domainsPer1: do nn = 1, nDom
-            jj = flowDoms(nn, level, sps)%nx*flowDoms(nn, level, sps)%ny &
-                 *flowDoms(nn, level, sps)%nz
+            jj = flowDoms(nn, level, sps)%nx * flowDoms(nn, level, sps)%ny &
+                 * flowDoms(nn, level, sps)%nz
 
             ll = flowDoms(nn, level, sps)%sectionID
 
@@ -933,21 +933,21 @@ subroutine determineDistance(level, sps)
                     xc(2) = coor(2, mm) - sections(ll)%rotCenter(2)
                     xc(3) = coor(3, mm) - sections(ll)%rotCenter(3)
 
-                    coorPer(1, ii) = sections(ll)%rotMatrix(1, 1)*xc(1) &
-                                     + sections(ll)%rotMatrix(1, 2)*xc(2) &
-                                     + sections(ll)%rotMatrix(1, 3)*xc(3) &
+                    coorPer(1, ii) = sections(ll)%rotMatrix(1, 1) * xc(1) &
+                                     + sections(ll)%rotMatrix(1, 2) * xc(2) &
+                                     + sections(ll)%rotMatrix(1, 3) * xc(3) &
                                      + sections(ll)%rotCenter(1) &
                                      + sections(ll)%translation(1)
 
-                    coorPer(2, ii) = sections(ll)%rotMatrix(2, 1)*xc(1) &
-                                     + sections(ll)%rotMatrix(2, 2)*xc(2) &
-                                     + sections(ll)%rotMatrix(2, 3)*xc(3) &
+                    coorPer(2, ii) = sections(ll)%rotMatrix(2, 1) * xc(1) &
+                                     + sections(ll)%rotMatrix(2, 2) * xc(2) &
+                                     + sections(ll)%rotMatrix(2, 3) * xc(3) &
                                      + sections(ll)%rotCenter(2) &
                                      + sections(ll)%translation(2)
 
-                    coorPer(3, ii) = sections(ll)%rotMatrix(3, 1)*xc(1) &
-                                     + sections(ll)%rotMatrix(3, 2)*xc(2) &
-                                     + sections(ll)%rotMatrix(3, 3)*xc(3) &
+                    coorPer(3, ii) = sections(ll)%rotMatrix(3, 1) * xc(1) &
+                                     + sections(ll)%rotMatrix(3, 2) * xc(2) &
+                                     + sections(ll)%rotMatrix(3, 3) * xc(3) &
                                      + sections(ll)%rotCenter(3) &
                                      + sections(ll)%translation(3)
 
@@ -981,8 +981,8 @@ subroutine determineDistance(level, sps)
         ! Loop over the domains and find the periodic ones.
 
         domainsPer2: do nn = 1, nDom
-            jj = flowDoms(nn, level, sps)%nx*flowDoms(nn, level, sps)%ny &
-                 *flowDoms(nn, level, sps)%nz
+            jj = flowDoms(nn, level, sps)%nx * flowDoms(nn, level, sps)%ny &
+                 * flowDoms(nn, level, sps)%nz
 
             ll = flowDoms(nn, level, sps)%sectionID
 
@@ -1010,19 +1010,19 @@ subroutine determineDistance(level, sps)
                     xc(3) = coor(3, mm) - sections(ll)%rotCenter(3) &
                             - sections(ll)%translation(3)
 
-                    coorPer(1, ii) = sections(ll)%rotMatrix(1, 1)*xc(1) &
-                                     + sections(ll)%rotMatrix(2, 1)*xc(2) &
-                                     + sections(ll)%rotMatrix(3, 1)*xc(3) &
+                    coorPer(1, ii) = sections(ll)%rotMatrix(1, 1) * xc(1) &
+                                     + sections(ll)%rotMatrix(2, 1) * xc(2) &
+                                     + sections(ll)%rotMatrix(3, 1) * xc(3) &
                                      + sections(ll)%rotCenter(1)
 
-                    coorPer(2, ii) = sections(ll)%rotMatrix(1, 2)*xc(1) &
-                                     + sections(ll)%rotMatrix(2, 2)*xc(2) &
-                                     + sections(ll)%rotMatrix(3, 2)*xc(3) &
+                    coorPer(2, ii) = sections(ll)%rotMatrix(1, 2) * xc(1) &
+                                     + sections(ll)%rotMatrix(2, 2) * xc(2) &
+                                     + sections(ll)%rotMatrix(3, 2) * xc(3) &
                                      + sections(ll)%rotCenter(2)
 
-                    coorPer(3, ii) = sections(ll)%rotMatrix(1, 3)*xc(1) &
-                                     + sections(ll)%rotMatrix(2, 3)*xc(2) &
-                                     + sections(ll)%rotMatrix(3, 3)*xc(3) &
+                    coorPer(3, ii) = sections(ll)%rotMatrix(1, 3) * xc(1) &
+                                     + sections(ll)%rotMatrix(2, 3) * xc(2) &
+                                     + sections(ll)%rotMatrix(3, 3) * xc(3) &
                                      + sections(ll)%rotCenter(3)
                 end do
 
@@ -1063,7 +1063,7 @@ subroutine determineDistance(level, sps)
 
             if (sections(ll)%periodic) then
 
-                jj = nx*ny*nz
+                jj = nx * ny * nz
 
                 j = mm
                 do i = 1, jj
@@ -1221,11 +1221,11 @@ subroutine localViscousSurfaceMesh(multSections, level, sps)
                 ! Perform gaussian eliMination, because now it's sure that
                 ! the element (k,k) is non-zero.
 
-                aaa = one/a(k, k)
+                aaa = one / a(k, k)
                 do i = (k + 1), 3
-                    bbb = a(i, k)*aaa
+                    bbb = a(i, k) * aaa
                     do j = k, 3
-                        a(i, j) = a(i, j) - bbb*a(k, j)
+                        a(i, j) = a(i, j) - bbb * a(k, j)
                     end do
                 end do
 
@@ -1238,16 +1238,16 @@ subroutine localViscousSurfaceMesh(multSections, level, sps)
             ! determine the other two elements of the eigen vector.
 
             axis(nn, ind(3)) = one
-            axis(nn, ind(2)) = -(a(2, 3)*axis(nn, ind(3)))/a(2, 2)
-            axis(nn, ind(1)) = -(a(1, 3)*axis(nn, ind(3)) &
-                                 + a(1, 2)*axis(nn, ind(2)))/a(1, 1)
+            axis(nn, ind(2)) = -(a(2, 3) * axis(nn, ind(3))) / a(2, 2)
+            axis(nn, ind(1)) = -(a(1, 3) * axis(nn, ind(3)) &
+                                 + a(1, 2) * axis(nn, ind(2))) / a(1, 1)
 
             ! Create a unit vector.
 
-            length = one/sqrt(axis(nn, 1)**2 + axis(nn, 2)**2 + axis(nn, 3)**2)
-            axis(nn, 1) = axis(nn, 1)*length
-            axis(nn, 2) = axis(nn, 2)*length
-            axis(nn, 3) = axis(nn, 3)*length
+            length = one / sqrt(axis(nn, 1)**2 + axis(nn, 2)**2 + axis(nn, 3)**2)
+            axis(nn, 1) = axis(nn, 1) * length
+            axis(nn, 2) = axis(nn, 2) * length
+            axis(nn, 3) = axis(nn, 3) * length
 
             ! Make sure that the largest component of this vector is
             ! positive, such that a unique definition of the rotation
@@ -1287,23 +1287,23 @@ subroutine localViscousSurfaceMesh(multSections, level, sps)
             ! Make sure that rad1 is normal to axis. Create a unit
             ! vector again.
 
-            dot = rad1(nn, 1)*axis(nn, 1) + rad1(nn, 2)*axis(nn, 2) &
-                  + rad1(nn, 3)*axis(nn, 3)
-            rad1(nn, 1) = rad1(nn, 1) - dot*axis(nn, 1)
-            rad1(nn, 2) = rad1(nn, 2) - dot*axis(nn, 2)
-            rad1(nn, 3) = rad1(nn, 3) - dot*axis(nn, 3)
+            dot = rad1(nn, 1) * axis(nn, 1) + rad1(nn, 2) * axis(nn, 2) &
+                  + rad1(nn, 3) * axis(nn, 3)
+            rad1(nn, 1) = rad1(nn, 1) - dot * axis(nn, 1)
+            rad1(nn, 2) = rad1(nn, 2) - dot * axis(nn, 2)
+            rad1(nn, 3) = rad1(nn, 3) - dot * axis(nn, 3)
 
-            length = one/(rad1(nn, 1)**2 + rad1(nn, 2)**2 + rad1(nn, 3)**2)
-            rad1(nn, 1) = rad1(nn, 1)*length
-            rad1(nn, 2) = rad1(nn, 2)*length
-            rad1(nn, 3) = rad1(nn, 3)*length
+            length = one / (rad1(nn, 1)**2 + rad1(nn, 2)**2 + rad1(nn, 3)**2)
+            rad1(nn, 1) = rad1(nn, 1) * length
+            rad1(nn, 2) = rad1(nn, 2) * length
+            rad1(nn, 3) = rad1(nn, 3) * length
 
             ! Create the second vector which spans the radIal plane. This
             ! must be normal to both axis and rad1, i.e. the cross-product.
 
-            rad2(nn, 1) = axis(nn, 2)*rad1(nn, 3) - axis(nn, 3)*rad1(nn, 2)
-            rad2(nn, 2) = axis(nn, 3)*rad1(nn, 1) - axis(nn, 1)*rad1(nn, 3)
-            rad2(nn, 3) = axis(nn, 1)*rad1(nn, 2) - axis(nn, 2)*rad1(nn, 1)
+            rad2(nn, 1) = axis(nn, 2) * rad1(nn, 3) - axis(nn, 3) * rad1(nn, 2)
+            rad2(nn, 2) = axis(nn, 3) * rad1(nn, 1) - axis(nn, 1) * rad1(nn, 3)
+            rad2(nn, 3) = axis(nn, 1) * rad1(nn, 2) - axis(nn, 2) * rad1(nn, 1)
 
         end do
 
@@ -1351,8 +1351,8 @@ subroutine localViscousSurfaceMesh(multSections, level, sps)
                         ! Determine the radIal components in the local
                         ! cylindrical coordinate system of the section.
 
-                        r1 = xx*rad1(sec, 1) + yy*rad1(sec, 2) + zz*rad1(sec, 3)
-                        r2 = xx*rad2(sec, 1) + yy*rad2(sec, 2) + zz*rad2(sec, 3)
+                        r1 = xx * rad1(sec, 1) + yy * rad1(sec, 2) + zz * rad1(sec, 3)
+                        r2 = xx * rad2(sec, 1) + yy * rad2(sec, 2) + zz * rad2(sec, 3)
 
                         ! Determine the angle if r1 or r2 is nonzero.
 
@@ -1376,11 +1376,11 @@ subroutine localViscousSurfaceMesh(multSections, level, sps)
                             ! Determine the quadrant in which this node is located
                             ! and update the corresponding counter.
 
-                            if (theta <= -half*pi) then
+                            if (theta <= -half * pi) then
                                 nq3 = nq3 + 1
                             else if (theta <= zero) then
                                 nq4 = nq4 + 1
-                            else if (theta <= half*pi) then
+                            else if (theta <= half * pi) then
                                 nq1 = nq1 + 1
                             else
                                 nq2 = nq2 + 1
@@ -1478,7 +1478,7 @@ subroutine localViscousSurfaceMesh(multSections, level, sps)
                 ! does not cross the line theta == 0. The rotation matrix
                 ! for alignment must be computed.
 
-                theta = two*pi/sections(nn)%nSlices
+                theta = two * pi / sections(nn)%nSlices
 
                 ! Determine the number of rotations needed to align the mesh.
 
@@ -1488,7 +1488,7 @@ subroutine localViscousSurfaceMesh(multSections, level, sps)
                     ! fourth quadrant. Determine the number of rotations for
                     ! alignment; this is a positive number.
 
-                    mm = -thetaNMax(nn)/theta + 1
+                    mm = -thetaNMax(nn) / theta + 1
 
                 else
 
@@ -1496,46 +1496,46 @@ subroutine localViscousSurfaceMesh(multSections, level, sps)
                     ! quadrant. The number of rotations will be a negative
                     ! number now.
 
-                    mm = -thetaPMin(nn)/theta - 1
+                    mm = -thetaPMin(nn) / theta - 1
 
                 end if
 
                 ! Compute the rotation angle in the local cylindrical frame
                 ! and its sine and cosine.
 
-                theta = mm*theta
+                theta = mm * theta
                 cosTheta = cos(theta)
                 sinTheta = sin(theta)
 
                 ! Apply the transformation to obtain the matrix in the
                 ! original cartesian frame.
 
-                rotMatrixSections(nn, 1, 1) = axis(nn, 1)*axis(nn, 1) &
-                                              + cosTheta*(rad1(nn, 1)*rad1(nn, 1) + rad2(nn, 1)*rad2(nn, 1))
-                rotMatrixSections(nn, 1, 2) = axis(nn, 1)*axis(nn, 2) &
-                                              + cosTheta*(rad1(nn, 1)*rad1(nn, 2) + rad2(nn, 1)*rad2(nn, 2)) &
-                                              + sinTheta*(rad1(nn, 2)*rad2(nn, 1) - rad1(nn, 1)*rad2(nn, 2))
-                rotMatrixSections(nn, 1, 3) = axis(nn, 1)*axis(nn, 3) &
-                                              + cosTheta*(rad1(nn, 1)*rad1(nn, 3) + rad2(nn, 1)*rad2(nn, 3)) &
-                                              + sinTheta*(rad1(nn, 3)*rad2(nn, 1) - rad1(nn, 1)*rad2(nn, 3))
-
-                rotMatrixSections(nn, 2, 1) = axis(nn, 1)*axis(nn, 2) &
-                                              + cosTheta*(rad1(nn, 1)*rad1(nn, 2) + rad2(nn, 1)*rad2(nn, 2)) &
-                                              - sinTheta*(rad1(nn, 2)*rad2(nn, 1) - rad1(nn, 1)*rad2(nn, 2))
-                rotMatrixSections(nn, 2, 2) = axis(nn, 2)*axis(nn, 2) &
-                                              + cosTheta*(rad1(nn, 2)*rad1(nn, 2) + rad2(nn, 2)*rad2(nn, 2))
-                rotMatrixSections(nn, 2, 3) = axis(nn, 2)*axis(nn, 3) &
-                                              + cosTheta*(rad1(nn, 2)*rad1(nn, 3) + rad2(nn, 2)*rad2(nn, 3)) &
-                                              + sinTheta*(rad1(nn, 3)*rad2(nn, 2) - rad1(nn, 2)*rad2(nn, 3))
-
-                rotMatrixSections(nn, 3, 1) = axis(nn, 1)*axis(nn, 3) &
-                                              + cosTheta*(rad1(nn, 1)*rad1(nn, 3) + rad2(nn, 1)*rad2(nn, 3)) &
-                                              - sinTheta*(rad1(nn, 3)*rad2(nn, 1) - rad1(nn, 1)*rad2(nn, 3))
-                rotMatrixSections(nn, 3, 2) = axis(nn, 2)*axis(nn, 3) &
-                                              + cosTheta*(rad1(nn, 2)*rad1(nn, 3) + rad2(nn, 2)*rad2(nn, 3)) &
-                                              - sinTheta*(rad1(nn, 3)*rad2(nn, 2) - rad1(nn, 2)*rad2(nn, 3))
-                rotMatrixSections(nn, 3, 3) = axis(nn, 3)*axis(nn, 3) &
-                                              + cosTheta*(rad1(nn, 3)*rad1(nn, 3) + rad2(nn, 3)*rad2(nn, 3))
+                rotMatrixSections(nn, 1, 1) = axis(nn, 1) * axis(nn, 1) &
+                                              + cosTheta * (rad1(nn, 1) * rad1(nn, 1) + rad2(nn, 1) * rad2(nn, 1))
+                rotMatrixSections(nn, 1, 2) = axis(nn, 1) * axis(nn, 2) &
+                                              + cosTheta * (rad1(nn, 1) * rad1(nn, 2) + rad2(nn, 1) * rad2(nn, 2)) &
+                                              + sinTheta * (rad1(nn, 2) * rad2(nn, 1) - rad1(nn, 1) * rad2(nn, 2))
+                rotMatrixSections(nn, 1, 3) = axis(nn, 1) * axis(nn, 3) &
+                                              + cosTheta * (rad1(nn, 1) * rad1(nn, 3) + rad2(nn, 1) * rad2(nn, 3)) &
+                                              + sinTheta * (rad1(nn, 3) * rad2(nn, 1) - rad1(nn, 1) * rad2(nn, 3))
+
+                rotMatrixSections(nn, 2, 1) = axis(nn, 1) * axis(nn, 2) &
+                                              + cosTheta * (rad1(nn, 1) * rad1(nn, 2) + rad2(nn, 1) * rad2(nn, 2)) &
+                                              - sinTheta * (rad1(nn, 2) * rad2(nn, 1) - rad1(nn, 1) * rad2(nn, 2))
+                rotMatrixSections(nn, 2, 2) = axis(nn, 2) * axis(nn, 2) &
+                                              + cosTheta * (rad1(nn, 2) * rad1(nn, 2) + rad2(nn, 2) * rad2(nn, 2))
+                rotMatrixSections(nn, 2, 3) = axis(nn, 2) * axis(nn, 3) &
+                                              + cosTheta * (rad1(nn, 2) * rad1(nn, 3) + rad2(nn, 2) * rad2(nn, 3)) &
+                                              + sinTheta * (rad1(nn, 3) * rad2(nn, 2) - rad1(nn, 2) * rad2(nn, 3))
+
+                rotMatrixSections(nn, 3, 1) = axis(nn, 1) * axis(nn, 3) &
+                                              + cosTheta * (rad1(nn, 1) * rad1(nn, 3) + rad2(nn, 1) * rad2(nn, 3)) &
+                                              - sinTheta * (rad1(nn, 3) * rad2(nn, 1) - rad1(nn, 1) * rad2(nn, 3))
+                rotMatrixSections(nn, 3, 2) = axis(nn, 2) * axis(nn, 3) &
+                                              + cosTheta * (rad1(nn, 2) * rad1(nn, 3) + rad2(nn, 2) * rad2(nn, 3)) &
+                                              - sinTheta * (rad1(nn, 3) * rad2(nn, 2) - rad1(nn, 2) * rad2(nn, 3))
+                rotMatrixSections(nn, 3, 3) = axis(nn, 3) * axis(nn, 3) &
+                                              + cosTheta * (rad1(nn, 3) * rad1(nn, 3) + rad2(nn, 3) * rad2(nn, 3))
 
             end if testRot
 
@@ -1609,19 +1609,19 @@ subroutine localViscousSurfaceMesh(multSections, level, sps)
                             ! coordinates.
 
                             np = np + 1
-                            coorVisc(1, np) = rotMatrixSections(sec, 1, 1)*xx &
-                                              + rotMatrixSections(sec, 1, 2)*yy &
-                                              + rotMatrixSections(sec, 1, 3)*zz &
+                            coorVisc(1, np) = rotMatrixSections(sec, 1, 1) * xx &
+                                              + rotMatrixSections(sec, 1, 2) * yy &
+                                              + rotMatrixSections(sec, 1, 3) * zz &
                                               + sections(sec)%rotCenter(1)
 
-                            coorVisc(2, np) = rotMatrixSections(sec, 2, 1)*xx &
-                                              + rotMatrixSections(sec, 2, 2)*yy &
-                                              + rotMatrixSections(sec, 2, 3)*zz &
+                            coorVisc(2, np) = rotMatrixSections(sec, 2, 1) * xx &
+                                              + rotMatrixSections(sec, 2, 2) * yy &
+                                              + rotMatrixSections(sec, 2, 3) * zz &
                                               + sections(sec)%rotCenter(2)
 
-                            coorVisc(3, np) = rotMatrixSections(sec, 3, 1)*xx &
-                                              + rotMatrixSections(sec, 3, 2)*yy &
-                                              + rotMatrixSections(sec, 3, 3)*zz &
+                            coorVisc(3, np) = rotMatrixSections(sec, 3, 1) * xx &
+                                              + rotMatrixSections(sec, 3, 2) * yy &
+                                              + rotMatrixSections(sec, 3, 3) * zz &
                                               + sections(sec)%rotCenter(3)
                         end do
                     end do
@@ -1639,7 +1639,7 @@ subroutine localViscousSurfaceMesh(multSections, level, sps)
 
                             nq = nq + 1
 
-                            connVisc(1, nq) = npOld + (j - jBeg - 1)*np1 + i - iBeg
+                            connVisc(1, nq) = npOld + (j - jBeg - 1) * np1 + i - iBeg
                             connVisc(2, nq) = connVisc(1, nq) + 1
                             connVisc(3, nq) = connVisc(2, nq) + np1
                             connVisc(4, nq) = connVisc(3, nq) - 1
@@ -1675,21 +1675,21 @@ subroutine localViscousSurfaceMesh(multSections, level, sps)
                             ! Update the counter np and determine the new
                             ! coordinates after the transformation.
 
-                            coorVisc(1, np) = sections(sec)%rotMatrix(1, 1)*xx &
-                                              + sections(sec)%rotMatrix(1, 2)*yy &
-                                              + sections(sec)%rotMatrix(1, 3)*zz &
+                            coorVisc(1, np) = sections(sec)%rotMatrix(1, 1) * xx &
+                                              + sections(sec)%rotMatrix(1, 2) * yy &
+                                              + sections(sec)%rotMatrix(1, 3) * zz &
                                               + sections(sec)%rotCenter(1) &
                                               + sections(sec)%translation(1)
 
-                            coorVisc(2, np) = sections(sec)%rotMatrix(2, 1)*xx &
-                                              + sections(sec)%rotMatrix(2, 2)*yy &
-                                              + sections(sec)%rotMatrix(2, 3)*zz &
+                            coorVisc(2, np) = sections(sec)%rotMatrix(2, 1) * xx &
+                                              + sections(sec)%rotMatrix(2, 2) * yy &
+                                              + sections(sec)%rotMatrix(2, 3) * zz &
                                               + sections(sec)%rotCenter(2) &
                                               + sections(sec)%translation(2)
 
-                            coorVisc(3, np) = sections(sec)%rotMatrix(3, 1)*xx &
-                                              + sections(sec)%rotMatrix(3, 2)*yy &
-                                              + sections(sec)%rotMatrix(3, 3)*zz &
+                            coorVisc(3, np) = sections(sec)%rotMatrix(3, 1) * xx &
+                                              + sections(sec)%rotMatrix(3, 2) * yy &
+                                              + sections(sec)%rotMatrix(3, 3) * zz &
                                               + sections(sec)%rotCenter(3) &
                                               + sections(sec)%translation(3)
                         end do
@@ -1800,8 +1800,8 @@ subroutine viscousSurfaceMesh(level, sps)
         ! accurate computation is performed.
 
         do nn = 1, nSections
-            multSections(nn) = sections(nn)%nSlices/mm
-            if (sections(nn)%nSlices > mm*multSections(nn)) &
+            multSections(nn) = sections(nn)%nSlices / mm
+            if (sections(nn)%nSlices > mm * multSections(nn)) &
                 multSections(nn) = multSections(nn) + 1
         end do
 
@@ -1836,10 +1836,10 @@ subroutine viscousSurfaceMesh(level, sps)
                     ! Update the number of nodes and quads. Take the
                     ! multiplicity into account.
 
-                    nNodeVisc = nNodeVisc + ii*(ni + 1)*(nj + 1)*(nk + 1)
-                    nquadVisc = nquadVisc + ii*max(ni, 1_intType) &
-                                *max(nj, 1_intType) &
-                                *max(nk, 1_intType)
+                    nNodeVisc = nNodeVisc + ii * (ni + 1) * (nj + 1) * (nk + 1)
+                    nquadVisc = nquadVisc + ii * max(ni, 1_intType) &
+                                * max(nj, 1_intType) &
+                                * max(nk, 1_intType)
                 end if
             end do
         end do
@@ -2009,20 +2009,20 @@ subroutine determineWallAssociation(level, sps)
                     do i = 2, il
 
                         ! Compute the coordinates of the cell center
-                        coor(1) = eighth*(x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
-                                          + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
-                                          + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
-                                          + x(i - 1, j, k, 1) + x(i, j, k, 1))
+                        coor(1) = eighth * (x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
+                                            + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
+                                            + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
+                                            + x(i - 1, j, k, 1) + x(i, j, k, 1))
 
-                        coor(2) = eighth*(x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
-                                          + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
-                                          + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
-                                          + x(i - 1, j, k, 2) + x(i, j, k, 2))
+                        coor(2) = eighth * (x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
+                                            + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
+                                            + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
+                                            + x(i - 1, j, k, 2) + x(i, j, k, 2))
 
-                        coor(3) = eighth*(x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
-                                          + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
-                                          + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
-                                          + x(i - 1, j, k, 3) + x(i, j, k, 3))
+                        coor(3) = eighth * (x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
+                                            + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
+                                            + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
+                                            + x(i - 1, j, k, 3) + x(i, j, k, 3))
 
                         if (.not. oversetPresent) then
                             ! No overset present. Simply search our own wall,
@@ -2090,7 +2090,7 @@ subroutine determineWallAssociation(level, sps)
 
                                 coor(4) = large
                                 call minDistancetreeSearchSinglePoint(walls(c)%ADT, coor, &
-                                                                      intInfo2, uvw2, dummy, 0, BB, frontLeaves, frontLeavesNew)
+                                                              intInfo2, uvw2, dummy, 0, BB, frontLeaves, frontLeavesNew)
                                 cellID2 = intInfo2(3)
 
                                 if (uvw2(4) < nearWallDist**2) then
@@ -2137,7 +2137,7 @@ subroutine determineWallAssociation(level, sps)
 
         ! Now determine all the node indices this processor needs to get.
         mm = 0
-        allocate (indicesToGet(nCellsLocal(level)*4), link(nCellsLocal(level)*4))
+        allocate (indicesToGet(nCellsLocal(level) * 4), link(nCellsLocal(level) * 4))
         do nn = 1, nDom
             call setPointers(nn, level, sps)
             do k = 2, kl
@@ -2153,7 +2153,7 @@ subroutine determineWallAssociation(level, sps)
         end do
 
         ! This unique-ifies the indices.
-        call unique(indicesToGet, 4*nCellsLocal(level), nUnique, link)
+        call unique(indicesToGet, 4 * nCellsLocal(level), nUnique, link)
 
         ! we need to update the stored indices to use the ordering of the nodes we will receive.
         mm = 0
@@ -2176,14 +2176,14 @@ subroutine determineWallAssociation(level, sps)
         ! expand "indices to get" to include the DOF. Use link for this
         ! temporary array operation.
 
-        allocate (link(nUnique*3))
+        allocate (link(nUnique * 3))
         do i = 1, nUnique
-            link((i - 1)*3 + 1) = indicesToGet(i)*3
-            link((i - 1)*3 + 2) = indicesToGet(i)*3 + 1
-            link((i - 1)*3 + 3) = indicesToGet(i)*3 + 2
+            link((i - 1) * 3 + 1) = indicesToGet(i) * 3
+            link((i - 1) * 3 + 2) = indicesToGet(i) * 3 + 1
+            link((i - 1) * 3 + 3) = indicesToGet(i) * 3 + 2
         end do
 
-        call ISCreateGeneral(adflow_comm_world, nUnique*3, link, PETSC_COPY_VALUES, IS1, ierr)
+        call ISCreateGeneral(adflow_comm_world, nUnique * 3, link, PETSC_COPY_VALUES, IS1, ierr)
         call EChk(ierr, __FILE__, __LINE__)
         deallocate (link)
 
@@ -2191,13 +2191,13 @@ subroutine determineWallAssociation(level, sps)
         ! this vector contains all the spectal instances. It is therefore
         ! only allocated on the first call with sps=1
         if (sps == 1) then
-            call VecCreateMPI(ADFLOW_COMM_WORLD, 3*nNodesLocal(level)*nTimeIntervalsSpectral, &
+            call VecCreateMPI(ADFLOW_COMM_WORLD, 3 * nNodesLocal(level) * nTimeIntervalsSpectral, &
                               PETSC_DETERMINE, xVolumeVec(level), ierr)
             call EChk(ierr, __FILE__, __LINE__)
         end if
 
         ! This is the vector we will scatter the nodes into.
-        call VecCreateMPI(ADFLOW_COMM_WORLD, 3*nUnique, PETSC_DETERMINE, &
+        call VecCreateMPI(ADFLOW_COMM_WORLD, 3 * nUnique, PETSC_DETERMINE, &
                           xSurfVec(level, sps), ierr)
         call EChk(ierr, __FILE__, __LINE__)
 

From 12b66c1aabcec24428b84b1d291fd5867e236f60 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Tue, 21 Feb 2023 11:48:39 +0100
Subject: [PATCH 48/60] prepare for merge

---
 .git-blame-ignore-revs                        |     8 +-
 .github/azure-pipelines.yaml                  |     1 +
 setup.py                                      |     2 +-
 src/ADT/adtAPI.F90                            |   765 +-
 src/ADT/adtBuild.F90                          |  2451 ++--
 src/ADT/adtLocalSearch.F90                    |  4183 +++---
 src/ADT/adtSearch.F90                         |  3042 ++--
 src/ADT/adtUtils.F90                          |  1631 ++-
 src/NKSolver/NKSolvers.F90                    |  7133 +++++----
 src/NKSolver/blockette.F90                    | 12063 ++++++++--------
 .../ADFirstAidKit/tapenade_dp_test.f90        |   344 +-
 src/adjoint/adjointAPI.F90                    |  2300 ++-
 src/adjoint/adjointDebug.F90                  |   945 +-
 src/adjoint/adjointExtra.F90                  |  1303 +-
 src/adjoint/adjointUtils.F90                  |  4490 +++---
 src/adjoint/fortranPC.F90                     |  1663 ++-
 src/adjoint/masterRoutines.F90                |  2614 ++--
 src/adjoint/outputForward/residuals_d.f90     |    48 +-
 src/adjoint/outputForward/solverutils_d.f90   |     8 +-
 src/adjoint/outputReverse/residuals_b.f90     |    48 +-
 src/adjoint/outputReverse/solverutils_b.f90   |     8 +-
 .../outputReverseFast/residuals_fast_b.f90    |    48 +-
 .../outputReverseFast/solverutils_fast_b.f90  |     4 +-
 src/bcdata/BCData.F90                         |     4 +-
 src/f2py/adflow.pyf                           |     8 +-
 src/initFlow/initializeFlow.F90               |  5977 ++++----
 src/initFlow/variableReading.F90              |  4298 +++---
 src/modules/ADjointPETSc.F90                  |    41 +-
 src/modules/ADjointVars.F90                   |    74 +-
 src/modules/BCDataMod.F90                     |    92 +-
 src/modules/BCPointers.F90                    |    54 +-
 src/modules/CpCurveFits.f90                   |    76 +-
 src/modules/IOModule.f90                      |    54 +-
 src/modules/actuatorRegionData.F90            |    58 +-
 src/modules/adtData.F90                       |   265 +-
 src/modules/bleedFlows.f90                    |    48 +-
 src/modules/cgnsGrid.F90                      |   604 +-
 src/modules/commonFormats.F90                 |    38 +-
 src/modules/communication.F90                 |   332 +-
 src/modules/coolingModelLevel0.f90            |   100 +-
 src/modules/diffSizes.f90                     |     6 +-
 src/modules/flowVarRefState.F90               |   147 +-
 src/modules/iteration.f90                     |   251 +-
 src/modules/kd_tree.f90                       |  3755 +++--
 src/modules/killSignals.f90                   |    40 +-
 src/modules/monitor.f90                       |   198 +-
 src/modules/myPushPopLib.F90                  |    22 +-
 src/modules/overset.F90                       |   932 +-
 src/modules/precision.F90                     |   196 +-
 src/modules/precision_tapenade.f90            |    15 +-
 src/modules/section.f90                       |    58 +-
 src/modules/stencils.f90                      |   182 +-
 src/modules/su_cgns.F90                       |    51 +-
 src/modules/surfaceFamilies.F90               |   126 +-
 src/modules/userSurfaceIntegrationData.F90    |    62 +-
 src/output/tecplotIO.F90                      |  4471 +++---
 src/output/writeCGNSGrid.F90                  |  2592 ++--
 src/output/writeCGNSSurface.F90               |  5273 ++++---
 src/output/writeCGNSVolume.F90                |  2417 ++--
 src/output/writeSol.F90                       |   142 +-
 src/overset/buildClusterWalls.F90             |  1118 +-
 src/overset/cartMesh.F90                      |  1896 ++-
 src/overset/computeCellWallPoint.F90          |   240 +-
 src/overset/computeHolesInsideBody.F90        |  1481 +-
 src/overset/determineDonors.F90               |   354 +-
 src/overset/finalOversetCommStructures.F90    |   681 +-
 src/overset/flagNearWall.F90                  |   487 +-
 src/overset/floodInteriorCells.F90            |   490 +-
 src/overset/fringeSearch.F90                  |   332 +-
 src/overset/makeBoundaryStrings.F90           |  1198 +-
 src/overset/oversetAPI.F90                    |  4714 +++---
 src/overset/oversetCommUtilites.F90           |  4137 +++---
 src/overset/oversetInitialization.F90         |  1201 +-
 src/overset/oversetPackingRoutines.F90        |  1465 +-
 src/overset/oversetUtilities.F90              |  4699 +++---
 src/overset/stringOps.F90                     |  6443 ++++-----
 src/overset/surfaceCorrection.F90             |   330 +-
 src/overset/wallSearch.F90                    |   837 +-
 src/overset/zipperMesh.F90                    |  3122 ++--
 src/partitioning/gridChecking.F90             |  2323 ++-
 src/partitioning/loadBalance.F90              |  5482 ++++---
 src/partitioning/partitionMod.F90             |  3143 ++--
 src/partitioning/partitioning.F90             |  4142 +++---
 src/partitioning/readCGNSGrid.F90             |  5196 ++++---
 src/preprocessing/coarseUtils.F90             |  2714 ++--
 src/preprocessing/pointMatchedCommPattern.F90 |  6576 +++++----
 src/preprocessing/preprocessingAPI.F90        |     6 +-
 src/preprocessing/preprocessingModules.F90    |  2404 +--
 src/solver/ALEUtils.F90                       |  1940 ++-
 src/solver/BCRoutines.F90                     |  3146 ++--
 src/solver/actuatorRegion.F90                 |  1128 +-
 src/solver/agmg.F90                           |  1302 +-
 src/solver/fluxes.F90                         |  9787 +++++++------
 src/solver/multiGrid.F90                      |  2875 ++--
 src/solver/residuals.F90                      |  3905 +++--
 src/solver/smoothers.F90                      |  1112 +-
 src/solver/solverUtils.F90                    |  7085 +++++----
 src/solver/solvers.F90                        |  3188 ++--
 src/solver/surfaceIntegrations.F90            |  3120 ++--
 src/solver/userSurfaceIntegrations.F90        |  2980 ++--
 src/solver/zipperIntegrations.F90             |  1449 +-
 src/turbulence/SST.F90                        |  2969 ++--
 src/turbulence/kt.F90                         |  2433 ++--
 src/turbulence/kw.F90                         |  2064 +--
 src/turbulence/sa.F90                         |     2 +
 src/turbulence/turbAPI.F90                    |   245 +-
 src/turbulence/turbCurveFits.F90              |  7762 +++++-----
 src/turbulence/turbMod.F90                    |    90 +-
 src/turbulence/turbUtils.F90                  |  3651 +++--
 src/turbulence/vf.F90                         |  4060 +++---
 src/utils/flowUtils.F90                       |  3573 +++--
 src/utils/genericISNAN.F90                    |    66 +-
 src/utils/haloExchange.F90                    |  6458 +++++----
 src/utils/signals.F90                         |   251 +-
 src/utils/sorting.F90                         |  2613 ++--
 src/utils/surfaceUtils.F90                    |  1316 +-
 src/utils/utils.F90                           |     8 +-
 src/wallDistance/wallDistance.F90             |    18 +-
 src/warping/getAreas.f90                      |   586 +-
 src/warping/getForces.F90                     |  2964 ++--
 src/warping/setCpTarget.f90                   |   127 +-
 src/warping/setTNSWall.f90                    |    91 +-
 src/warping/warping.F90                       |   897 +-
 src_cs/modules/complexify.f90                 |  1322 +-
 src_cs/modules/precision.F90                  |   200 +-
 125 files changed, 115861 insertions(+), 116294 deletions(-)

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 13ce9e2e3..226eb345c 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -1,3 +1,5 @@
-# formatting (#135)
-84a3970ddf5ec3ffe84249fc7d6e2b23fb75be9e
-5a883999fd65d6a8506babbb64406e2fb47402ec
+# black formatting (#135)
+cd66e9a3b348b522aaa5b96aedb659adb621731d
+
+# fprettify (#229)
+cd6ec63f37e3d8027aeba1a8ae071e6e7a8e501d
diff --git a/.github/azure-pipelines.yaml b/.github/azure-pipelines.yaml
index 945eab2c8..53d8e67b2 100644
--- a/.github/azure-pipelines.yaml
+++ b/.github/azure-pipelines.yaml
@@ -20,3 +20,4 @@ extends:
     INTEL_CONFIG: config/defaults/config.LINUX_INTEL.mk
     COVERAGE: true
     TAPENADE: true
+    FPRETTIFY: true
diff --git a/setup.py b/setup.py
index 04df798b7..67655caea 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@
     packages=find_packages(include=["adflow*"]),
     package_data={"adflow": ["*.so"]},
     install_requires=[
-        "numpy>=1.16,<1.22",
+        "numpy>=1.16,<1.24",
         "mdolab-baseclasses>=1.4",
         "mpi4py>=3.0",
         "petsc4py>=3.11",
diff --git a/src/ADT/adtAPI.F90 b/src/ADT/adtAPI.F90
index 7249812ff..6dc88a865 100644
--- a/src/ADT/adtAPI.F90
+++ b/src/ADT/adtAPI.F90
@@ -1,397 +1,392 @@
 module adtAPI
-  !
-  !      Module, which defines the API of the ADT routines. It is
-  !      included in a module, such that an explicit interface is
-  !      present.
-  !
-
-  use constants
-  use adtBuild, only : buildVolumeADT, buildSurfaceADT
-  use adtSearch, only : minDistanceSearch, failSafeSearch, &
-       containmentsearch
-  implicit none
-
-  !=================================================================
-
-contains
-
-  !===============================================================
-
-  subroutine adtBuildSurfaceADT(nTria, nQuads,   nNodes,    &
-       coor,  triaConn, quadsConn, &
-       BBox,  useBBox,  comm,      &
-       adtID)
-    !
-    !        This routine builds the 6 dimensional ADT, which stores the
-    !        given surface grid. The memory intensive part of these
-    !        arguments, the arrays with the coordinates and
-    !        connectivities, are not copied. Instead pointers are set to
-    !        these arrays. It is therefore the responsibility of the user
-    !        not to deallocate this memory before all the searches have
-    !        been performed.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nNodes:    Number of local nodes in the given grid.
-    !        nTria:     Idem for the triangles.
-    !        nQuads:    Idem for the quadrilaterals.
-    !        BBox(3,2): The possible bounding box. Only elements within
-    !                   this box will be stored in the ADT.
-    !        useBBox:   Whether or not to use the bounding box.
-    !        comm:      MPI-communicator for the global ADT.
-    !        adtID:     The ID of the ADT.
-    !        Subroutine intent(in), target arguments.
-    !        ----------------------------------------
-    !        coor(3,nNodes):      Nodal coordinates of the local grid.
-    !        triaConn(3,nTria):   Local connectivity of the triangles.
-    !        quadsConn(4,nQuads): Idem for the quadrilaterals.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer, intent(in)          :: comm
-    character(len=*), intent(in) :: adtID
-
-    integer(kind=intType), intent(in) :: nTria
-    integer(kind=intType), intent(in) :: nQuads
-    integer(kind=intType), intent(in) :: nNodes
-
-    logical, intent(in) :: useBBox
-
-    integer(kind=intType), dimension(:,:), intent(in) :: triaConn
-    integer(kind=intType), dimension(:,:), intent(in) :: quadsConn
-
-    real(kind=realType), dimension(3,2), intent(in) :: BBox
-
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-
-    !===============================================================
-
-    ! Call the subroutine buildSurfaceADT to do the actual work.
-
-    call buildSurfaceADT(nTria,    nQuads,    nNodes, coor,    &
-         triaConn, quadsConn, BBox,   useBBox, &
-         comm,     adtID)
-
-  end subroutine adtBuildSurfaceADT
-
-
-  subroutine adtBuildVolumeADT(nTetra,    nPyra,    nPrisms,    &
-       nHexa,     nNodes,   coor,       &
-       tetraConn, pyraConn, prismsConn, &
-       hexaConn,  BBox,     useBBox,    &
-       comm,      adtID)
     !
-    !        This routine builds the 6 dimensional ADT, which stores the
-    !        given volume grid. The memory intensive part of these
-    !        arguments, the arrays with the coordinates and
-    !        connectivities, are not copied. Instead pointers are set to
-    !        these arrays. It is therefore the responsibility of the user
-    !        not to deallocate this memory before all the searches have
-    !        been performed.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nNodes:    Number of local nodes in the given grid.
-    !        nTetra:    Idem for the tetrahedra.
-    !        nPyra:     Idem for the pyramids.
-    !        nPrisms:   Idem for the prisms.
-    !        nHexa:     Idem for the hexahedra.
-    !        BBox(3,2): The possible bounding box. Only elements within
-    !                   this box will be stored in the ADT.
-    !        useBBox:   Whether or not to use the bounding box.
-    !        comm:      MPI-communicator for the global ADT.
-    !        adtID:     The ID of the ADT.
-    !        Subroutine intent(in), target arguments.
-    !        ----------------------------------------
-    !        coor(3,nNodes):        Nodal coordinates of the local grid.
-    !        tetraConn(4,nTetra):   Local connectivity of the tetrahedra.
-    !        pyraConn(5,nPyra):     Idem for the pyramids.
-    !        prismsConn(6,nPrisms): Idem for the prisms.
-    !        hexaConn(8,nHexa):     Idem for the hexahedra.
+    !      Module, which defines the API of the ADT routines. It is
+    !      included in a module, such that an explicit interface is
+    !      present.
     !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer, intent(in)          :: comm
-    character(len=*), intent(in) :: adtID
-
-    integer(kind=intType), intent(in) :: nTetra
-    integer(kind=intType), intent(in) :: nPyra
-    integer(kind=intType), intent(in) :: nPrisms
-    integer(kind=intType), intent(in) :: nHexa
-    integer(kind=intType), intent(in) :: nNodes
-
-    logical, intent(in) :: useBBox
-
-    integer(kind=intType), dimension(:,:), intent(in) :: tetraConn
-    integer(kind=intType), dimension(:,:), intent(in) :: pyraConn
-    integer(kind=intType), dimension(:,:), intent(in) :: prismsConn
-    integer(kind=intType), dimension(:,:), intent(in) :: hexaConn
-
-    real(kind=realType), dimension(3,2), intent(in) :: BBox
-
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-
-    !===============================================================
-
-    ! Call the subroutine buildVolumeADT to do the actual work.
-
-    call buildVolumeADT(nTetra,     nPyra,     nPrisms,   nHexa,    &
-         nNodes,     coor,      tetraConn, pyraConn, &
-         prismsConn, hexaConn,  BBox,      useBBox,  &
-         comm,       adtID)
 
-  end subroutine adtBuildVolumeADT
-
-
-  subroutine adtContainmentSearch(nCoor,  coor,        adtID,     &
-       procID, elementType, elementID, &
-       uvw,    nInterpol,   arrDonor,  &
-       arrInterpol)
-    !
-    !        This routine attempts for every coordinate to find the
-    !        element in the given ADT, which contains that coordinate.
-    !        If no element is found the corresponding entry in procID is
-    !        set to -1 to indicate failure.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nCoor:     Number of coordinates for which the element must
-    !                   be determined.
-    !        coor:      The coordinates of these points.
-    !        adtID:     The ADT to be searched.
-    !        nInterpol: Number of variables to be interpolated.
-    !        arrDonor:  Array with the donor data; needed to obtain the
-    !                   interpolated data.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        procID:      The ID of the processor in the group of the ADT
-    !                     where the element containing the point is
-    !                     stored. If no element is found for a given
-    !                     point the corresponding entry in procID is set
-    !                     to -1 to indicate failure. Remember that the
-    !                     processor ID's start at 0 and not at 1.
-    !        elementType: The type of element which contains the point.
-    !        elementID:   The entry in the connectivity of this element
-    !                     which contains the point.
-    !        uvw:         The parametric coordinates of the point in the
-    !                     transformed element; this transformation is
-    !                     such that every element is transformed into a
-    !                     standard element in parametric space. The u, v
-    !                     and w coordinates can be used to determine the
-    !                     actual interpolation weights.
-    !        arrInterpol: Array with the interpolated data.
-    !
+    use constants
+    use adtBuild, only: buildVolumeADT, buildSurfaceADT
+    use adtSearch, only: minDistanceSearch, failSafeSearch, &
+                         containmentsearch
     implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nCoor, nInterpol
-    character(len=*),         intent(in) :: adtID
-
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-    real(kind=realType), dimension(:,:), intent(in) :: arrDonor
-
-    integer,                  dimension(:), intent(out) :: procID
-    integer(kind=intType), dimension(:), intent(out) :: elementID
-
-    integer(kind=adtElementType), dimension(:), intent(out) :: &
-         elementType
-    real(kind=realType), dimension(:,:), intent(out) :: uvw
-    real(kind=realType), dimension(:,:), intent(out) :: arrInterpol
-
-    !===============================================================
 
-    ! Call the subroutine containmentSearch to do the actual work.
+    !=================================================================
 
-    call containmentSearch(nCoor,       coor,      adtID, procID,    &
-         elementType, elementID, uvw,   nInterpol, &
-         arrDonor,    arrInterpol)
-
-  end subroutine adtContainmentSearch
-
-
-  subroutine adtDeallocateADTs(adtID)
-    !
-    !        This routine deallocates the memory for the given entry in
-    !        the array ADTs and it tries to reallocate ADTs itself
-    !        accordingly.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        adtID: The entry in ADTs to be deallocated.
-    !
-    use adtUtils, only : deallocateADTs
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    character(len=*), intent(in) :: adtID
-
-    !===============================================================
-
-    ! Call the subroutine deallocateADTs to do the actual work.
-
-    call deallocateADTs(adtID)
-
-  end subroutine adtDeallocateADTs
-
-
-  subroutine adtFailSafeSearch(nCoor,    coor,        adtID,     &
-       procID,   elementType, elementID, &
-       uvw,      dist2,       nInterpol, &
-       arrDonor, arrInterpol)
-    !
-    !        This routine attempts for every coordinate to find the
-    !        element in the given ADT, which contains that coordinate.
-    !        If no element is found a minimum distance search is
-    !        performed, such that always an interpolation can be
-    !        performed. To indicate that the element does not contain the
-    !        point the element ID is negated.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nCoor: Number of coordinates for which the element must be
-    !               determined.
-    !        coor:  The coordinates of these points.
-    !        adtID: The ADT to be searched.
-    !        nInterpol: Number of variables to be interpolated.
-    !        arrDonor:  Array with the donor data; needed to obtain the
-    !                   interpolated data.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        ---------------------------------
-    !        procID:      The ID of the processor in the group of the ADT
-    !                     where the element containing the point is
-    !                     stored. If no element is found for a given
-    !                     point the corresponding entry in procID is set
-    !                     to -1 to indicate failure. Remember that the
-    !                     processor ID's start at 0 and not at 1.
-    !        elementType: The type of element which contains the point.
-    !        elementID:   The entry in the connectivity of this element
-    !                     which contains the point. The ID is negative if
-    !                     the coordinate is outside the element, i.e. if
-    !                     a minimum distance search had to be used.
-    !        uvw:         The parametric coordinates of the point in the
-    !                     transformed element; this transformation is
-    !                     such that every element is transformed into a
-    !                     standard element in parametric space. The u, v
-    !                     and w coordinates can be used to determine the
-    !                     actual interpolation weights.
-    !        arrInterpol: Array with the interpolated data.
-    !        Subroutine intent(inout) arguments.
-    !        -----------------------------------
-    !        dist2: Minimum distance squared of the coordinates to the
-    !               elements of the ADT. On input it should be
-    !               initialized by the calling program, possibly to a
-    !               large value. In this way it is possible to handle
-    !               periodic problems as efficiently as possible.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nCoor, nInterpol
-    character(len=*),         intent(in) :: adtID
-
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-    real(kind=realType), dimension(:,:), intent(in) :: arrDonor
-
-    integer,                  dimension(:), intent(out) :: procID
-    integer(kind=intType), dimension(:), intent(out) :: elementID
-
-    integer(kind=adtElementType), dimension(:), intent(out) :: &
-         elementType
-
-    real(kind=realType), dimension(:,:), intent(out) :: uvw
-    real(kind=realType), dimension(:,:), intent(out) :: arrInterpol
-
-    real(kind=realType), dimension(:), intent(inout) :: dist2
-
-    !===============================================================
-
-    ! Call the subroutine failSafeSearch to do the actual work.
-
-    call failSafeSearch(nCoor,       coor,      adtID,    procID, &
-         elementType, elementID, uvw,      dist2,  &
-         nInterpol,   arrDonor, arrInterpol)
-
-  end subroutine adtFailSafeSearch
-
-
-  subroutine adtMinDistanceSearch(nCoor,    coor,        adtID,     &
-       procID,   elementType, elementID, &
-       uvw,      dist2,       nInterpol, &
-       arrDonor, arrInterpol)
-    !
-    !        This routine attempts for every coordinate to find the
-    !        element in the given ADT which minimizes the distance to
-    !        this point.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nCoor: Number of coordinates for which the element must be
-    !               determined.
-    !        coor:  The coordinates of these points.
-    !        adtID: The ADT to be searched.
-    !        nInterpol: Number of variables to be interpolated.
-    !        arrDonor:  Array with the donor data; needed to obtain the
-    !                   interpolated data.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        procID:      The ID of the processor in the group of the ADT
-    !                     where the element containing the point is
-    !                     stored. If no element is found for a given
-    !                     point the corresponding entry in procID is set
-    !                     to -1 to indicate failure. Remember that the
-    !                     processor ID's start at 0 and not at 1.
-    !        elementType: The type of element which contains the point.
-    !        elementID:   The entry in the connectivity of this element
-    !                     which contains the point. The ID is negative if
-    !                     the coordinate is outside the element.
-    !        uvw:         The parametric coordinates of the point in the
-    !                     transformed element; this transformation is
-    !                     such that every element is transformed into a
-    !                     standard element in parametric space. The u, v
-    !                     and w coordinates can be used to determine the
-    !                     actual interpolation weights. If the tree
-    !                     corresponds to a surface mesh the third entry
-    !                     of this array will not be filled.
-    !        arrInterpol: Array with the interpolated data.
-    !        Subroutine intent(inout) arguments.
-    !        -----------------------------------
-    !        dist2: Minimum distance squared of the coordinates to the
-    !               elements of the ADT. On input it should be
-    !               initialized by the calling program, possibly to a
-    !               large value. In this way it is possible to handle
-    !               periodic problems as efficiently as possible.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nCoor, nInterpol
-    character(len=*),         intent(in) :: adtID
-
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-    real(kind=realType), dimension(:,:), intent(in) :: arrDonor
-
-    integer, dimension(:), intent(out) :: procID
-    integer(kind=intType), dimension(:), intent(out) :: elementID
-
-    integer(kind=adtElementType), dimension(:), intent(out) :: &
-         elementType
-
-    real(kind=realType), dimension(:,:), intent(out) :: uvw
-    real(kind=realType), dimension(:,:), intent(out) :: arrInterpol
-
-    real(kind=realType), dimension(:), intent(inout) :: dist2
+contains
 
     !===============================================================
 
-    ! Call the subroutine minDistanceSearch to do the actual work.
-
-    call minDistanceSearch(nCoor,       coor,      adtID,    procID, &
-         elementType, elementID, uvw,      dist2,  &
-         nInterpol,   arrDonor,  arrInterpol)
-
-  end subroutine adtMinDistanceSearch
+    subroutine adtBuildSurfaceADT(nTria, nQuads, nNodes, &
+                                  coor, triaConn, quadsConn, &
+                                  BBox, useBBox, comm, &
+                                  adtID)
+        !
+        !        This routine builds the 6 dimensional ADT, which stores the
+        !        given surface grid. The memory intensive part of these
+        !        arguments, the arrays with the coordinates and
+        !        connectivities, are not copied. Instead pointers are set to
+        !        these arrays. It is therefore the responsibility of the user
+        !        not to deallocate this memory before all the searches have
+        !        been performed.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nNodes:    Number of local nodes in the given grid.
+        !        nTria:     Idem for the triangles.
+        !        nQuads:    Idem for the quadrilaterals.
+        !        BBox(3,2): The possible bounding box. Only elements within
+        !                   this box will be stored in the ADT.
+        !        useBBox:   Whether or not to use the bounding box.
+        !        comm:      MPI-communicator for the global ADT.
+        !        adtID:     The ID of the ADT.
+        !        Subroutine intent(in), target arguments.
+        !        ----------------------------------------
+        !        coor(3,nNodes):      Nodal coordinates of the local grid.
+        !        triaConn(3,nTria):   Local connectivity of the triangles.
+        !        quadsConn(4,nQuads): Idem for the quadrilaterals.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer, intent(in) :: comm
+        character(len=*), intent(in) :: adtID
+
+        integer(kind=intType), intent(in) :: nTria
+        integer(kind=intType), intent(in) :: nQuads
+        integer(kind=intType), intent(in) :: nNodes
+
+        logical, intent(in) :: useBBox
+
+        integer(kind=intType), dimension(:, :), intent(in) :: triaConn
+        integer(kind=intType), dimension(:, :), intent(in) :: quadsConn
+
+        real(kind=realType), dimension(3, 2), intent(in) :: BBox
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+
+        !===============================================================
+
+        ! Call the subroutine buildSurfaceADT to do the actual work.
+
+        call buildSurfaceADT(nTria, nQuads, nNodes, coor, &
+                             triaConn, quadsConn, BBox, useBBox, &
+                             comm, adtID)
+
+    end subroutine adtBuildSurfaceADT
+
+    subroutine adtBuildVolumeADT(nTetra, nPyra, nPrisms, &
+                                 nHexa, nNodes, coor, &
+                                 tetraConn, pyraConn, prismsConn, &
+                                 hexaConn, BBox, useBBox, &
+                                 comm, adtID)
+        !
+        !        This routine builds the 6 dimensional ADT, which stores the
+        !        given volume grid. The memory intensive part of these
+        !        arguments, the arrays with the coordinates and
+        !        connectivities, are not copied. Instead pointers are set to
+        !        these arrays. It is therefore the responsibility of the user
+        !        not to deallocate this memory before all the searches have
+        !        been performed.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nNodes:    Number of local nodes in the given grid.
+        !        nTetra:    Idem for the tetrahedra.
+        !        nPyra:     Idem for the pyramids.
+        !        nPrisms:   Idem for the prisms.
+        !        nHexa:     Idem for the hexahedra.
+        !        BBox(3,2): The possible bounding box. Only elements within
+        !                   this box will be stored in the ADT.
+        !        useBBox:   Whether or not to use the bounding box.
+        !        comm:      MPI-communicator for the global ADT.
+        !        adtID:     The ID of the ADT.
+        !        Subroutine intent(in), target arguments.
+        !        ----------------------------------------
+        !        coor(3,nNodes):        Nodal coordinates of the local grid.
+        !        tetraConn(4,nTetra):   Local connectivity of the tetrahedra.
+        !        pyraConn(5,nPyra):     Idem for the pyramids.
+        !        prismsConn(6,nPrisms): Idem for the prisms.
+        !        hexaConn(8,nHexa):     Idem for the hexahedra.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer, intent(in) :: comm
+        character(len=*), intent(in) :: adtID
+
+        integer(kind=intType), intent(in) :: nTetra
+        integer(kind=intType), intent(in) :: nPyra
+        integer(kind=intType), intent(in) :: nPrisms
+        integer(kind=intType), intent(in) :: nHexa
+        integer(kind=intType), intent(in) :: nNodes
+
+        logical, intent(in) :: useBBox
+
+        integer(kind=intType), dimension(:, :), intent(in) :: tetraConn
+        integer(kind=intType), dimension(:, :), intent(in) :: pyraConn
+        integer(kind=intType), dimension(:, :), intent(in) :: prismsConn
+        integer(kind=intType), dimension(:, :), intent(in) :: hexaConn
+
+        real(kind=realType), dimension(3, 2), intent(in) :: BBox
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+
+        !===============================================================
+
+        ! Call the subroutine buildVolumeADT to do the actual work.
+
+        call buildVolumeADT(nTetra, nPyra, nPrisms, nHexa, &
+                            nNodes, coor, tetraConn, pyraConn, &
+                            prismsConn, hexaConn, BBox, useBBox, &
+                            comm, adtID)
+
+    end subroutine adtBuildVolumeADT
+
+    subroutine adtContainmentSearch(nCoor, coor, adtID, &
+                                    procID, elementType, elementID, &
+                                    uvw, nInterpol, arrDonor, &
+                                    arrInterpol)
+        !
+        !        This routine attempts for every coordinate to find the
+        !        element in the given ADT, which contains that coordinate.
+        !        If no element is found the corresponding entry in procID is
+        !        set to -1 to indicate failure.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nCoor:     Number of coordinates for which the element must
+        !                   be determined.
+        !        coor:      The coordinates of these points.
+        !        adtID:     The ADT to be searched.
+        !        nInterpol: Number of variables to be interpolated.
+        !        arrDonor:  Array with the donor data; needed to obtain the
+        !                   interpolated data.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        procID:      The ID of the processor in the group of the ADT
+        !                     where the element containing the point is
+        !                     stored. If no element is found for a given
+        !                     point the corresponding entry in procID is set
+        !                     to -1 to indicate failure. Remember that the
+        !                     processor ID's start at 0 and not at 1.
+        !        elementType: The type of element which contains the point.
+        !        elementID:   The entry in the connectivity of this element
+        !                     which contains the point.
+        !        uvw:         The parametric coordinates of the point in the
+        !                     transformed element; this transformation is
+        !                     such that every element is transformed into a
+        !                     standard element in parametric space. The u, v
+        !                     and w coordinates can be used to determine the
+        !                     actual interpolation weights.
+        !        arrInterpol: Array with the interpolated data.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nCoor, nInterpol
+        character(len=*), intent(in) :: adtID
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+        real(kind=realType), dimension(:, :), intent(in) :: arrDonor
+
+        integer, dimension(:), intent(out) :: procID
+        integer(kind=intType), dimension(:), intent(out) :: elementID
+
+        integer(kind=adtElementType), dimension(:), intent(out) :: &
+            elementType
+        real(kind=realType), dimension(:, :), intent(out) :: uvw
+        real(kind=realType), dimension(:, :), intent(out) :: arrInterpol
+
+        !===============================================================
+
+        ! Call the subroutine containmentSearch to do the actual work.
+
+        call containmentSearch(nCoor, coor, adtID, procID, &
+                               elementType, elementID, uvw, nInterpol, &
+                               arrDonor, arrInterpol)
+
+    end subroutine adtContainmentSearch
+
+    subroutine adtDeallocateADTs(adtID)
+        !
+        !        This routine deallocates the memory for the given entry in
+        !        the array ADTs and it tries to reallocate ADTs itself
+        !        accordingly.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        adtID: The entry in ADTs to be deallocated.
+        !
+        use adtUtils, only: deallocateADTs
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        character(len=*), intent(in) :: adtID
+
+        !===============================================================
+
+        ! Call the subroutine deallocateADTs to do the actual work.
+
+        call deallocateADTs(adtID)
+
+    end subroutine adtDeallocateADTs
+
+    subroutine adtFailSafeSearch(nCoor, coor, adtID, &
+                                 procID, elementType, elementID, &
+                                 uvw, dist2, nInterpol, &
+                                 arrDonor, arrInterpol)
+        !
+        !        This routine attempts for every coordinate to find the
+        !        element in the given ADT, which contains that coordinate.
+        !        If no element is found a minimum distance search is
+        !        performed, such that always an interpolation can be
+        !        performed. To indicate that the element does not contain the
+        !        point the element ID is negated.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nCoor: Number of coordinates for which the element must be
+        !               determined.
+        !        coor:  The coordinates of these points.
+        !        adtID: The ADT to be searched.
+        !        nInterpol: Number of variables to be interpolated.
+        !        arrDonor:  Array with the donor data; needed to obtain the
+        !                   interpolated data.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        ---------------------------------
+        !        procID:      The ID of the processor in the group of the ADT
+        !                     where the element containing the point is
+        !                     stored. If no element is found for a given
+        !                     point the corresponding entry in procID is set
+        !                     to -1 to indicate failure. Remember that the
+        !                     processor ID's start at 0 and not at 1.
+        !        elementType: The type of element which contains the point.
+        !        elementID:   The entry in the connectivity of this element
+        !                     which contains the point. The ID is negative if
+        !                     the coordinate is outside the element, i.e. if
+        !                     a minimum distance search had to be used.
+        !        uvw:         The parametric coordinates of the point in the
+        !                     transformed element; this transformation is
+        !                     such that every element is transformed into a
+        !                     standard element in parametric space. The u, v
+        !                     and w coordinates can be used to determine the
+        !                     actual interpolation weights.
+        !        arrInterpol: Array with the interpolated data.
+        !        Subroutine intent(inout) arguments.
+        !        -----------------------------------
+        !        dist2: Minimum distance squared of the coordinates to the
+        !               elements of the ADT. On input it should be
+        !               initialized by the calling program, possibly to a
+        !               large value. In this way it is possible to handle
+        !               periodic problems as efficiently as possible.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nCoor, nInterpol
+        character(len=*), intent(in) :: adtID
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+        real(kind=realType), dimension(:, :), intent(in) :: arrDonor
+
+        integer, dimension(:), intent(out) :: procID
+        integer(kind=intType), dimension(:), intent(out) :: elementID
+
+        integer(kind=adtElementType), dimension(:), intent(out) :: &
+            elementType
+
+        real(kind=realType), dimension(:, :), intent(out) :: uvw
+        real(kind=realType), dimension(:, :), intent(out) :: arrInterpol
+
+        real(kind=realType), dimension(:), intent(inout) :: dist2
+
+        !===============================================================
+
+        ! Call the subroutine failSafeSearch to do the actual work.
+
+        call failSafeSearch(nCoor, coor, adtID, procID, &
+                            elementType, elementID, uvw, dist2, &
+                            nInterpol, arrDonor, arrInterpol)
+
+    end subroutine adtFailSafeSearch
+
+    subroutine adtMinDistanceSearch(nCoor, coor, adtID, &
+                                    procID, elementType, elementID, &
+                                    uvw, dist2, nInterpol, &
+                                    arrDonor, arrInterpol)
+        !
+        !        This routine attempts for every coordinate to find the
+        !        element in the given ADT which minimizes the distance to
+        !        this point.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nCoor: Number of coordinates for which the element must be
+        !               determined.
+        !        coor:  The coordinates of these points.
+        !        adtID: The ADT to be searched.
+        !        nInterpol: Number of variables to be interpolated.
+        !        arrDonor:  Array with the donor data; needed to obtain the
+        !                   interpolated data.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        procID:      The ID of the processor in the group of the ADT
+        !                     where the element containing the point is
+        !                     stored. If no element is found for a given
+        !                     point the corresponding entry in procID is set
+        !                     to -1 to indicate failure. Remember that the
+        !                     processor ID's start at 0 and not at 1.
+        !        elementType: The type of element which contains the point.
+        !        elementID:   The entry in the connectivity of this element
+        !                     which contains the point. The ID is negative if
+        !                     the coordinate is outside the element.
+        !        uvw:         The parametric coordinates of the point in the
+        !                     transformed element; this transformation is
+        !                     such that every element is transformed into a
+        !                     standard element in parametric space. The u, v
+        !                     and w coordinates can be used to determine the
+        !                     actual interpolation weights. If the tree
+        !                     corresponds to a surface mesh the third entry
+        !                     of this array will not be filled.
+        !        arrInterpol: Array with the interpolated data.
+        !        Subroutine intent(inout) arguments.
+        !        -----------------------------------
+        !        dist2: Minimum distance squared of the coordinates to the
+        !               elements of the ADT. On input it should be
+        !               initialized by the calling program, possibly to a
+        !               large value. In this way it is possible to handle
+        !               periodic problems as efficiently as possible.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nCoor, nInterpol
+        character(len=*), intent(in) :: adtID
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+        real(kind=realType), dimension(:, :), intent(in) :: arrDonor
+
+        integer, dimension(:), intent(out) :: procID
+        integer(kind=intType), dimension(:), intent(out) :: elementID
+
+        integer(kind=adtElementType), dimension(:), intent(out) :: &
+            elementType
+
+        real(kind=realType), dimension(:, :), intent(out) :: uvw
+        real(kind=realType), dimension(:, :), intent(out) :: arrInterpol
+
+        real(kind=realType), dimension(:), intent(inout) :: dist2
+
+        !===============================================================
+
+        ! Call the subroutine minDistanceSearch to do the actual work.
+
+        call minDistanceSearch(nCoor, coor, adtID, procID, &
+                               elementType, elementID, uvw, dist2, &
+                               nInterpol, arrDonor, arrInterpol)
+
+    end subroutine adtMinDistanceSearch
 
 end module adtAPI
diff --git a/src/ADT/adtBuild.F90 b/src/ADT/adtBuild.F90
index 9552cc408..a91e6d695 100644
--- a/src/ADT/adtBuild.F90
+++ b/src/ADT/adtBuild.F90
@@ -1,1417 +1,1414 @@
 module adtBuild
-  !
-  !      Module which contains all the subroutines for the building of
-  !      an ADT, both surface and volume.
-  !
-   use constants
-   use adtUtils, only : adtLeafType, adts, stack, nStack, adtTerminate, &
-        qsortbboxes, reallocateadts, allocateadts
-   use adtData, only : adtType
-   implicit none
-
-  !=================================================================
-
-contains
-
-  !===============================================================
-
-  subroutine buildADT(ADT)
     !
-    !        This routine builds the 6 dimensional ADT for the given
-    !        ADT. When this routine is called it is assumed that the
-    !        bounding boxes of the grid have already been computed; the
-    !        ADT for these bounding boxes is built here.
-    !        Subroutine intent(inout) arguments.
-    !        --------------------------------
-    !        ADT: adt derived type to build
+    !      Module which contains all the subroutines for the building of
+    !      an ADT, both surface and volume.
     !
+    use constants
+    use adtUtils, only: adtLeafType, adts, stack, nStack, adtTerminate, &
+                        qsortbboxes, reallocateadts, allocateadts
+    use adtData, only: adtType
     implicit none
-    !
-    !       Subroutine arguments.
-    !
-    type(adtType), intent(inout) :: ADT
-    !
-    !       Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i, j, k, ii, kk, ll, mm, nn, nfl, nfr
-    integer(kind=intType) :: nLeaves, nBBoxes, splitDir
-    integer(kind=intType) :: nLeavesToDivide, nLeavesToDivideNew
-    integer(kind=intType) :: nLeavesTot
-
-    integer(kind=intType), dimension(:), pointer :: BB_IDs
-    integer(kind=intType), dimension(:), pointer :: BB_IDsNew
-    integer(kind=intType), dimension(:), pointer :: nBB_IDs
-    integer(kind=intType), dimension(:), pointer :: nBB_IDsNew
-    integer(kind=intType), dimension(:), pointer :: curLeaf
-    integer(kind=intType), dimension(:), pointer :: curLeafNew
-    integer(kind=intType), dimension(:), pointer :: tmpIntPointer
-
-    integer(kind=intType), dimension(0:ADT%nProcs-1) :: tmpArr
-
-    real(kind=realType), dimension(:,:), pointer :: xBBox
-
-    real(kind=realType), dimension(3,2) :: rootLeafBBox
-    real(kind=realType), dimension(3,2,0:ADT%nProcs-1) :: &
-         rootLeavesBBox
-
-    type(adtLeafType), dimension(:), pointer :: ADTree
-
-    ! Initialize nStack and allocate the corresponding array stack.
-    ! These are used in the qsort routine for the bounding boxes.
-    ! As this routine is called quite often it is more efficient to
-    ! have the stack array available rather than allocate it over
-    ! and over again.
-
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                       &
-         call adtTerminate(ADT, "buildADT", &
-         "Memory allocation failure for stack.")
-
-    ! Determine the number of leaves of the adt. It can be proved
-    ! that nLeaves equals nBBoxes - 1 for an optimally balanced
-    ! tree. Take the exceptional case of nBBoxes == 0 and
-    ! nBBoxes == 1 into account.
-
-    nBBoxes = ADT%nBBoxes
-    nLeaves = nBBoxes - 1
-    if(nBBoxes <= 1) nLeaves = nLeaves + 1
-
-    ADT%nLeaves = nLeaves
-
-    ! Allocate the memory for the adt.
-
-    allocate(ADT%ADTree(nLeaves), stat=ierr)
-    if(ierr /= 0)                       &
-         call adtTerminate(ADT, "buildADT", &
-         "Memory allocation failure for ADTree.")
-
-    ! Set some pointers to make the code more readable.
-
-    xBBox  => ADT%xBBox
-    ADTree => ADT%ADTree
-
-    ! Allocate the memory for the arrays which control the
-    ! subdivision of the leaves.
-
-    nn = (nBBoxes+1)/2
-    nn = max(nn, 1_intType)
-
-    allocate(BB_IDs(nBBoxes), BB_IDsNew(nBBoxes), &
-         nBB_IDs(0:nn),   nBB_IDsNew(0:nn),   &
-         curLeaf(nn),     curLeafNew(nn),     stat=ierr)
-    if(ierr /= 0)                       &
-         call adtTerminate(ADT, "buildADT", &
-         "Memory allocation failure for the arrays &
-         &used in the subdivision.")
-
-    ! Initialize the arrays BB_IDs, nBB_IDs and curLeaf, such that
-    ! all bounding boxes belong to the root leaf. Also set the
-    ! counters nLeavesToDivide and nLeavesTot, depending on the
-    ! situation
-
-    nBB_IDs(0) = 0; nBB_IDs(1) = nBBoxes
-    curLeaf(1) = 1
-
-    do i=1,nBBoxes
-       BB_IDs(i) = i
-    enddo
-
-    nLeavesToDivide = min(nLeaves, 1_intType)
-    nLeavesTot      = nLeavesToDivide
-
-    ! Initialize splitDir to 0, such that the first time it will
-    ! split in direction 1.
-
-    splitDir = 0
-
-    ! Loop to subdivide the leaves. The division is such that the
-    ! adt is optimally balanced.
-
-    leafDivision: do
-
-       ! Criterion to exit the loop.
-
-       if(nLeavesToDivide == 0) exit
-
-       ! Initializations for the next round of subdivisions and
-       ! increment splitDir.
-
-       nLeavesToDivideNew = 0
-       nBB_IDsNew(0) = 0
-
-       splitdir = splitDir + 1
-       if(splitDir > 6) splitDir = 1
-
-       ! Loop over the current number of leaves to be divided.
-
-       currentLeavesLoop: do i=1,nLeavesToDivide
-
-          ! Store the number of bounding boxes present in the leaf
-          ! in nn, the current leaf number in mm and i-1 in ii.
-
-          ii = i-1
-          nn = nBB_IDs(i) - nBB_IDs(ii)
-          mm = curLeaf(i)
-
-          ! Determine the bounding box coordinates of this leaf.
-
-          ll = BB_IDs(nBB_IDs(ii)+1)
-          ADTree(mm)%xMin(1) = xBBox(1,ll)
-          ADTree(mm)%xMin(2) = xBBox(2,ll)
-          ADTree(mm)%xMin(3) = xBBox(3,ll)
-          ADTree(mm)%xMin(4) = xBBox(4,ll)
-          ADTree(mm)%xMin(5) = xBBox(5,ll)
-          ADTree(mm)%xMin(6) = xBBox(6,ll)
-
-          ADTree(mm)%xMax(1) = xBBox(1,ll)
-          ADTree(mm)%xMax(2) = xBBox(2,ll)
-          ADTree(mm)%xMax(3) = xBBox(3,ll)
-          ADTree(mm)%xMax(4) = xBBox(4,ll)
-          ADTree(mm)%xMax(5) = xBBox(5,ll)
-          ADTree(mm)%xMax(6) = xBBox(6,ll)
-
-          do j=(nBB_IDs(ii)+2),nBB_IDs(i)
-             ll = BB_IDs(j)
-
-             ADTree(mm)%xMin(1) = min(ADTree(mm)%xMin(1), xBBox(1,ll))
-             ADTree(mm)%xMin(2) = min(ADTree(mm)%xMin(2), xBBox(2,ll))
-             ADTree(mm)%xMin(3) = min(ADTree(mm)%xMin(3), xBBox(3,ll))
-             ADTree(mm)%xMin(4) = min(ADTree(mm)%xMin(4), xBBox(4,ll))
-             ADTree(mm)%xMin(5) = min(ADTree(mm)%xMin(5), xBBox(5,ll))
-             ADTree(mm)%xMin(6) = min(ADTree(mm)%xMin(6), xBBox(6,ll))
-
-             ADTree(mm)%xMax(1) = max(ADTree(mm)%xMax(1), xBBox(1,ll))
-             ADTree(mm)%xMax(2) = max(ADTree(mm)%xMax(2), xBBox(2,ll))
-             ADTree(mm)%xMax(3) = max(ADTree(mm)%xMax(3), xBBox(3,ll))
-             ADTree(mm)%xMax(4) = max(ADTree(mm)%xMax(4), xBBox(4,ll))
-             ADTree(mm)%xMax(5) = max(ADTree(mm)%xMax(5), xBBox(5,ll))
-             ADTree(mm)%xMax(6) = max(ADTree(mm)%xMax(6), xBBox(6,ll))
-          enddo
-
-          ! Determine the situation of the leaf. It is either a
-          ! terminal leaf or a leaf that must be subdivided.
-
-          terminalTest: if(nn <= 2) then
-
-             ! Terminal leaf. Store the ID's of the bounding boxes with
-             ! negative numbers in children.
-
-             ADTree(mm)%children(1) = -BB_IDs(nBB_IDs(ii)+1)
-             ADTree(mm)%children(2) = -BB_IDs(nBB_IDs(i))
-
-          else terminalTest
-
-             ! Leaf must be divided. Sort the bounding boxes of the
-             ! current leaf in increasing order; the sorting is based
-             ! on the coordinate in the split direction.
-
-             call qsortBBoxes(BB_IDs(nBB_IDs(ii)+1:), nn, ADT, splitDir)
-
-             ! Determine the number of bounding boxes in the left leaf.
-             ! This number is at least 2. The actual number stored in
-             ! kk is this number plus an offset. Also initialize the
-             ! counter nfl, which is used to store the bounding boxes
-             ! in the arrays for the new round.
-
-             kk  = (nn+1)/2 + nBB_IDs(ii)
-             nfl = nBB_IDsNew(nLeavesToDivideNew)
-
-             ! Copy the ID's of the left bounding boxes into BB_IDsNew.
-             ! Also update nLeavesToDivideNew and the corresponding
-             ! entry in nBB_IDsNew.
-
-             do k=(nBB_IDs(ii)+1),kk
-                nfl = nfl + 1
-                BB_IDsNew(nfl) = BB_IDs(k)
-             enddo
-
-             nLeavesToDivideNew = nLeavesToDivideNew + 1
-             nBB_IDsNew(nLeavesToDivideNew) = nfl
-
-             ! Update the total number of leaves and store this number
-             ! in child 1 of the current leaf and in the current leaves
-             ! for the next round.
-
-             nLeavesTot = nLeavesTot + 1
-             ADTree(mm)%children(1) = nLeavesTot
-             curLeafNew(nLeavesToDivideNew) = nLeavesTot
-
-             ! The right leaf will only be created if it has more than
-             ! one bounding box in it, i.e. if the original leaf has
-             ! more than three bounding boxes. If the new leaf only has
-             ! one bounding box in it, it is not created; instead the
-             ! bounding box is stored in the current leaf.
-
-             if(nn == 3) then
-
-                ! Only three bounding boxes present in the current leaf.
-                ! The right leaf is not created and the last bounding
-                ! box is stored as the second child of the current leaf.
-
-                ADTree(mm)%children(2) = -BB_IDs(nBB_IDs(i))
-
-             else
-
-                ! More than 3 bounding boxes are present and thus the
-                ! right leaf is created. Copy the ID's from BB_IDs into
-                ! BB_IDsNew and update the counters for the new round.
-
-                nfr = nBB_IDsNew(nLeavesToDivideNew)
-                do k=(kk+1),nBB_IDs(i)
-                   nfr = nfr + 1
-                   BB_IDsNew(nfr) = BB_IDs(k)
-                enddo
-
-                nLeavesToDivideNew = nLeavesToDivideNew + 1
-                nBB_IDsNew(nLeavesToDivideNew) = nfr
-
-                ! Update the total number of leaves and store this number
-                ! in child 2 of the current leaf and in the current
-                ! leaves for the next round.
-
-                nLeavesTot = nLeavesTot + 1
-                ADTree(mm)%children(2) = nLeavesTot
-                curLeafNew(nLeavesToDivideNew) = nLeavesTot
-
-             endif
-
-          endif terminalTest
-
-       enddo currentLeavesLoop
-
-       ! Swap the pointers for the next round.
-
-       nLeavesToDivide = nLeavesToDivideNew
-
-       tmpIntPointer => BB_IDs
-       BB_IDs        => BB_IDsNew
-       BB_IDsNew     => tmpIntPointer
-
-       tmpIntPointer => nBB_IDs
-       nBB_IDs       => nBB_IDsNew
-       nBB_IDsNew    => tmpIntPointer
-
-       tmpIntPointer => curLeaf
-       curLeaf       => curLeafNew
-       curLeafNew    => tmpIntPointer
-
-    enddo leafDivision
-
-    ! Deallocate the arrays used to build the local tree.
-
-    deallocate(stack, BB_IDs, BB_IDsNew, nBB_IDs, nBB_IDsNew, &
-         curLeaf, curLeafNew, stat=ierr)
-    if(ierr /= 0)                       &
-         call adtTerminate(ADT, "buildADT", &
-         "Deallocation failure for the local arrays.")
-    !
-    !        Local tree has been built. Now determine the global
-    !        information for this tree.
-    !
-    ! Determine the number and processor ID's of the non-empty local
-    ! trees by gathering the data from the participating processors.
-
-    ii = 1
-    if(nBBoxes == 0) ii = 0
-
-    call mpi_allgather(ii, 1, adflow_integer, tmpArr, 1, adflow_integer, &
-         ADT%comm, ierr)
-
-    ii = 0
-    do i=0,(ADT%nProcs-1)
-       ii = ii + tmpArr(i)
-    enddo
-
-    ADT%nRootLeaves = ii
-
-    ! Allocate the memory for both the processor ID's and the
-    ! 3D bounding box of the root leaf.
-
-    allocate(ADT%rootLeavesProcs(ii), &
-         ADT%rootBBoxes(3,2,ii), stat=ierr)
-    if(ierr /= 0)                       &
-         call adtTerminate(ADT, "buildADT", &
-         "Memory allocation failure for &
-         &rootLeavesProcs and rootBBoxes.")
-
-    ! Determine the processor ID's of the non-empty trees.
 
-    ii = 0
-    ADT%myEntryInRootProcs = 0
+    !=================================================================
 
-    do i=0,(ADT%nProcs-1)
-       if(tmpArr(i) > 0) then
-          ii = ii + 1
-          ADT%rootLeavesProcs(ii) = i
-          if(ADT%myID == i) ADT%myEntryInRootProcs = ii
-       endif
-    enddo
-
-    ! Determine the local 3D bounding box of the root leaf.
-    ! If no local tree is present, just set it to zero to avoid
-    ! problems. The values do not matter.
-
-    if(nBBoxes == 0) then
-       rootLeafBBox = zero
-    else
-       rootLeafBBox(1,1) = ADTree(1)%xMin(1)
-       rootLeafBBox(2,1) = ADTree(1)%xMin(2)
-       rootLeafBBox(3,1) = ADTree(1)%xMin(3)
-
-       rootLeafBBox(1,2) = ADTree(1)%xMax(4)
-       rootLeafBBox(2,2) = ADTree(1)%xMax(5)
-       rootLeafBBox(3,2) = ADTree(1)%xMax(6)
-    endif
-
-    ! Gather the data of the root leaves.
-
-    call mpi_allgather(rootLeafBBox, 6, adflow_real, rootLeavesBBox, &
-         6, adflow_real, ADT%comm, ierr)
-
-    ! Store the 3D root bounding boxes of the non-empty trees in
-    ! the data structure for the current ADT.
-
-    ii = 0
-    do i=0,(ADT%nProcs-1)
-       if(tmpArr(i) > 0) then
-          ii = ii + 1
-
-          ADT%rootBBoxes(1,1,ii) = rootLeavesBBox(1,1,i)
-          ADT%rootBBoxes(2,1,ii) = rootLeavesBBox(2,1,i)
-          ADT%rootBBoxes(3,1,ii) = rootLeavesBBox(3,1,i)
-
-          ADT%rootBBoxes(1,2,ii) = rootLeavesBBox(1,2,i)
-          ADT%rootBBoxes(2,2,ii) = rootLeavesBBox(2,2,i)
-          ADT%rootBBoxes(3,2,ii) = rootLeavesBBox(3,2,i)
-       endif
-    enddo
-
-  end subroutine buildADT
-
-
-  subroutine buildSurfaceADT(nTria,  nQuads,   nNodes,    &
-       coor,   triaConn, quadsConn, &
-       BBox,   useBBox,  comm,      &
-       adtID)
-    !
-    !        This routine builds the 6 dimensional ADT, which stores the
-    !        given surface grid. The memory intensive part of these
-    !        arguments, the arrays with the coordinates and
-    !        connectivities, are not copied. Instead pointers are set to
-    !        these arrays. It is therefore the responsibility of the user
-    !        not to deallocate this memory before all the searches have
-    !        been performed.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nNodes:    Number of local nodes in the given grid.
-    !        nTria:     Idem for the triangles.
-    !        nQuads:    Idem for the quadrilaterals.
-    !        BBox(3,2): The possible bounding box. Only elements within
-    !                   this box will be stored in the ADT.
-    !        useBBox:   Whether or not to use the bounding box.
-    !        comm:      MPI-communicator for the global ADT.
-    !        adtID:     The ID of the ADT.
-    !        Subroutine intent(in), target arguments.
-    !        ----------------------------------------
-    !        coor(3,nNodes):      Nodal coordinates of the local grid.
-    !        triaConn(3,nTria):   Local connectivity of the triangles.
-    !        quadsConn(4,nQuads): Idem for the quadrilaterals.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer, intent(in)          :: comm
-    character(len=*), intent(in) :: adtID
-
-    integer(kind=intType), intent(in) :: nTria
-    integer(kind=intType), intent(in) :: nQuads
-    integer(kind=intType), intent(in) :: nNodes
-
-    logical, intent(in) :: useBBox
-
-    integer(kind=intType), dimension(:,:), intent(in), &
-         target :: triaConn
-    integer(kind=intType), dimension(:,:), intent(in), &
-         target :: quadsConn
-
-    real(kind=realType), dimension(3,2), intent(in) :: BBox
-
-    real(kind=realType), dimension(:,:), intent(in), &
-         target :: coor
-    !
-    !       Local variables.
-    !
-    integer :: ierr, ll, nNPE, jj
-
-    integer(kind=adtElementType) :: elType
-
-    integer(kind=intType) :: i, j, ii, mm, nn, nElem
-
-    integer(kind=intType), dimension(:,:), pointer :: conn
-
-    real(kind=realType), dimension(3) :: xMin, xMax
-
-    logical, dimension(:), allocatable :: elementWithinBBox
-
-    type(adtType), pointer :: ADT
-
-    ! ! Allocate or reallocate the memory for ADTs. This depends
-    ! ! whether or not this is the first ADT to be built.
-
-    if( allocated(ADTs) ) then
-       call reallocateADTs(adtID, jj)
-    else
-       call allocateADTs
-       jj = 1
-    endif
-
-    ! The ADT we are currently working with
-    ADT => ADTs(jj)
-
-    ! Make sure the ADT is active and store the ID of this ADT.
-
-    ADT%isActive = .true.
-    ADT%adtID    = adtID
-
-    ! Copy the communicator and determine the number of processors
-    ! and my processor ID in this group.
-
-    ADT%comm = comm
-    call mpi_comm_rank(comm, ADT%myID,   ierr)
-    call mpi_comm_size(comm, ADT%nProcs, ierr)
-
-    ! Set the ADT type, which is a surface ADT.
-
-    ADT%adtType = adtSurfaceADT
-
-    ! Copy the number of nodes and surface elements and set the
-    ! number of volume elements to 0; only a surface grid has been
-    ! given.
-
-    ADT%nNodes = nNodes
-    ADT%nTria  = nTria
-    ADT%nQuads = nQuads
-
-    ADT%nTetra  = 0
-    ADT%nPyra   = 0
-    ADT%nPrisms = 0
-    ADT%nHexa   = 0
-
-    ! Set the pointers for the coordinates and the
-    ! surface connectivities.
-
-    ADT%coor      => coor
-    ADT%triaConn  => triaConn
-    ADT%quadsConn => quadsConn
-
-    ! Determine the number of elements to be stored in the ADT.
-    ! This depends whether or not the global bounding box should be
-    ! used when building the ADT.
-
-    testBBox: if( useBBox ) then
-
-       ! Global bounding box is used. Allocate the memory for the
-       ! logical elementWithinBBox.
-
-       nn = nTria + nQuads
-       allocate(elementWithinBBox(nn), stat=ierr)
-       if(ierr /= 0)                              &
-            call adtTerminate(ADT, "buildSurfaceADT", &
-            "Memory allocation failure for &
-            &elementWithinBBox.")
-
-       ! Loop over the number of element types.
-
-       ii = 0
-       elementLoop1: do ll=1,2
-
-          ! Set the correct pointers for this element.
-
-          call setSurfacePointers(ll)
-
-          ! Loop over the elements and determine the bounding box of
-          ! each element.
-
-          do i=1,nElem
-             ii = ii + 1
-
-             mm = conn(1,i)
-             xMin(1) = coor(1,mm); xMax(1) = coor(1,mm)
-             xMin(2) = coor(2,mm); xMax(2) = coor(2,mm)
-             xMin(3) = coor(3,mm); xMax(3) = coor(3,mm)
-
-             do j=2,nNPE
-                mm = conn(j,i)
-
-                xMin(1) = min(xMin(1),coor(1,mm))
-                xMin(2) = min(xMin(2),coor(2,mm))
-                xMin(3) = min(xMin(3),coor(3,mm))
-
-                xMax(1) = max(xMax(1),coor(1,mm))
-                xMax(2) = max(xMax(2),coor(2,mm))
-                xMax(3) = max(xMax(3),coor(3,mm))
-             enddo
-
-             ! Check if the bounding box is (partially) inside the
-             ! global bounding box. If so, set elementWithinBBox
-             ! to .true.; otherwise set it to .false.
-
-             if(xMax(1) >= BBox(1,1) .and. xMin(1) <= BBox(1,2) .and. &
-                  xMax(2) >= BBox(2,1) .and. xMin(2) <= BBox(2,2) .and. &
-                  xMax(3) >= BBox(3,1) .and. xMin(3) <= BBox(3,2)) then
-                elementWithinBBox(ii) = .true.
-             else
-                elementWithinBBox(ii) = .false.
-             endif
-
-          enddo
-       enddo elementLoop1
-
-       ! Determine the local number of elements within the global
-       ! bounding box.
-
-       ii = 0
-       do i=1,nn
-          if( elementWithinBBox(i) ) ii = ii + 1
-       enddo
-
-       ADT%nBBoxes = ii
+contains
 
-       ! Allocate the memory for the bounding box coordinates, the
-       ! corresponding element type and the index in the connectivity.
+    !===============================================================
 
-       allocate(ADT%xBBox(6,ii), ADT%elementType(ii), &
-            ADT%elementID(ii), stat=ierr)
-       if(ierr /= 0)                              &
-            call adtTerminate(ADT, "buildSurfaceADT", &
-            "Memory allocation failure for bounding &
-            &box data.")
+    subroutine buildADT(ADT)
+        !
+        !        This routine builds the 6 dimensional ADT for the given
+        !        ADT. When this routine is called it is assumed that the
+        !        bounding boxes of the grid have already been computed; the
+        !        ADT for these bounding boxes is built here.
+        !        Subroutine intent(inout) arguments.
+        !        --------------------------------
+        !        ADT: adt derived type to build
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        type(adtType), intent(inout) :: ADT
+        !
+        !       Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, j, k, ii, kk, ll, mm, nn, nfl, nfr
+        integer(kind=intType) :: nLeaves, nBBoxes, splitDir
+        integer(kind=intType) :: nLeavesToDivide, nLeavesToDivideNew
+        integer(kind=intType) :: nLeavesTot
 
-       ! Repeat the loop over the all the elements, but now store
-       ! the bounding boxes in the ADT.
+        integer(kind=intType), dimension(:), pointer :: BB_IDs
+        integer(kind=intType), dimension(:), pointer :: BB_IDsNew
+        integer(kind=intType), dimension(:), pointer :: nBB_IDs
+        integer(kind=intType), dimension(:), pointer :: nBB_IDsNew
+        integer(kind=intType), dimension(:), pointer :: curLeaf
+        integer(kind=intType), dimension(:), pointer :: curLeafNew
+        integer(kind=intType), dimension(:), pointer :: tmpIntPointer
 
-       ii = 0
-       nn = 0
-       elementLoop2: do ll=1,2
+        integer(kind=intType), dimension(0:ADT%nProcs - 1) :: tmpArr
 
-          ! Set the correct pointers for this element.
+        real(kind=realType), dimension(:, :), pointer :: xBBox
 
-          call setSurfacePointers(ll)
+        real(kind=realType), dimension(3, 2) :: rootLeafBBox
+        real(kind=realType), dimension(3, 2, 0:ADT%nProcs - 1) :: &
+            rootLeavesBBox
 
-          ! Loop over the elements and store the bounding box info,
-          ! if needed.
+        type(adtLeafType), dimension(:), pointer :: ADTree
 
-          do i=1,nElem
-             ii = ii + 1
-             testWithin: if( elementWithinBBox(ii) ) then
+        ! Initialize nStack and allocate the corresponding array stack.
+        ! These are used in the qsort routine for the bounding boxes.
+        ! As this routine is called quite often it is more efficient to
+        ! have the stack array available rather than allocate it over
+        ! and over again.
 
-                nn = nn + 1
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call adtTerminate(ADT, "buildADT", &
+                              "Memory allocation failure for stack.")
 
-                ADT%elementType(nn) = elType
-                ADT%elementID(nn)   = i
+        ! Determine the number of leaves of the adt. It can be proved
+        ! that nLeaves equals nBBoxes - 1 for an optimally balanced
+        ! tree. Take the exceptional case of nBBoxes == 0 and
+        ! nBBoxes == 1 into account.
 
-                mm = conn(1,i)
-                xMin(1) = coor(1,mm); xMax(1) = coor(1,mm)
-                xMin(2) = coor(2,mm); xMax(2) = coor(2,mm)
-                xMin(3) = coor(3,mm); xMax(3) = coor(3,mm)
+        nBBoxes = ADT%nBBoxes
+        nLeaves = nBBoxes - 1
+        if (nBBoxes <= 1) nLeaves = nLeaves + 1
 
-                do j=2,nNPE
-                   mm = conn(j,i)
+        ADT%nLeaves = nLeaves
 
-                   xMin(1) = min(xMin(1),coor(1,mm))
-                   xMin(2) = min(xMin(2),coor(2,mm))
-                   xMin(3) = min(xMin(3),coor(3,mm))
+        ! Allocate the memory for the adt.
 
-                   xMax(1) = max(xMax(1),coor(1,mm))
-                   xMax(2) = max(xMax(2),coor(2,mm))
-                   xMax(3) = max(xMax(3),coor(3,mm))
-                enddo
+        allocate (ADT%ADTree(nLeaves), stat=ierr)
+        if (ierr /= 0) &
+            call adtTerminate(ADT, "buildADT", &
+                              "Memory allocation failure for ADTree.")
+
+        ! Set some pointers to make the code more readable.
+
+        xBBox => ADT%xBBox
+        ADTree => ADT%ADTree
+
+        ! Allocate the memory for the arrays which control the
+        ! subdivision of the leaves.
 
-                ADT%xBBox(1,nn) = xMin(1)
-                ADT%xBBox(2,nn) = xMin(2)
-                ADT%xBBox(3,nn) = xMin(3)
+        nn = (nBBoxes + 1) / 2
+        nn = max(nn, 1_intType)
 
-                ADT%xBBox(4,nn) = xMax(1)
-                ADT%xBBox(5,nn) = xMax(2)
-                ADT%xBBox(6,nn) = xMax(3)
+        allocate (BB_IDs(nBBoxes), BB_IDsNew(nBBoxes), &
+                  nBB_IDs(0:nn), nBB_IDsNew(0:nn), &
+                  curLeaf(nn), curLeafNew(nn), stat=ierr)
+        if (ierr /= 0) &
+             call adtTerminate(ADT, "buildADT", &
+             "Memory allocation failure for the arrays &
+             &used in the subdivision.")
 
-             endif testWithin
-          enddo
-       enddo elementLoop2
+        ! Initialize the arrays BB_IDs, nBB_IDs and curLeaf, such that
+        ! all bounding boxes belong to the root leaf. Also set the
+        ! counters nLeavesToDivide and nLeavesTot, depending on the
+        ! situation
 
-       ! Deallocate the memory for elementWithinBBox.
+        nBB_IDs(0) = 0; nBB_IDs(1) = nBBoxes
+        curLeaf(1) = 1
 
-       deallocate(elementWithinBBox, stat=ierr)
-       if(ierr /= 0)                              &
-            call adtTerminate(ADT, "buildSurfaceADT", &
-            "Deallocation failure for &
-            &elementWithinBBox.")
+        do i = 1, nBBoxes
+            BB_IDs(i) = i
+        end do
 
-    else testBBox
+        nLeavesToDivide = min(nLeaves, 1_intType)
+        nLeavesTot = nLeavesToDivide
 
-       ! No global bounding box. The number of local bounding boxes
-       ! to be stored is the total number of local surface elements.
+        ! Initialize splitDir to 0, such that the first time it will
+        ! split in direction 1.
 
-       ii = nTria + nQuads
-       ADT%nBBoxes = ii
+        splitDir = 0
 
-       ! Allocate the memory for the bounding box coordinates, the
-       ! corresponding element type and the index in the connectivity.
+        ! Loop to subdivide the leaves. The division is such that the
+        ! adt is optimally balanced.
 
-       allocate(ADT%xBBox(6,ii), ADT%elementType(ii), &
-            ADT%elementID(ii), stat=ierr)
-       if(ierr /= 0)                              &
-            call adtTerminate(ADT, "buildSurfaceADT", &
-            "Memory allocation failure for bounding &
-            &box data.")
+        leafDivision: do
 
-       ! Loop over the number of element types present, i.e. 2,
-       ! to store the bounding boxes; nn is the counter.
+            ! Criterion to exit the loop.
 
-       nn = 0
-       elementLoop3: do ll=1,2
+            if (nLeavesToDivide == 0) exit
 
-          ! Set the correct pointers for this element.
+            ! Initializations for the next round of subdivisions and
+            ! increment splitDir.
 
-          call setSurfacePointers(ll)
+            nLeavesToDivideNew = 0
+            nBB_IDsNew(0) = 0
 
-          ! Loop over the number of elements and store the bounding
-          ! box info.
+            splitdir = splitDir + 1
+            if (splitDir > 6) splitDir = 1
 
-          do i=1,nElem
-             nn = nn + 1
+            ! Loop over the current number of leaves to be divided.
 
-             ADT%elementType(nn) = elType
-             ADT%elementID(nn)   = i
+            currentLeavesLoop: do i = 1, nLeavesToDivide
 
-             mm = conn(1,i)
-             xMin(1) = coor(1,mm); xMax(1) = coor(1,mm)
-             xMin(2) = coor(2,mm); xMax(2) = coor(2,mm)
-             xMin(3) = coor(3,mm); xMax(3) = coor(3,mm)
+                ! Store the number of bounding boxes present in the leaf
+                ! in nn, the current leaf number in mm and i-1 in ii.
 
-             do j=2,nNPE
-                mm = conn(j,i)
+                ii = i - 1
+                nn = nBB_IDs(i) - nBB_IDs(ii)
+                mm = curLeaf(i)
 
-                xMin(1) = min(xMin(1),coor(1,mm))
-                xMin(2) = min(xMin(2),coor(2,mm))
-                xMin(3) = min(xMin(3),coor(3,mm))
+                ! Determine the bounding box coordinates of this leaf.
 
-                xMax(1) = max(xMax(1),coor(1,mm))
-                xMax(2) = max(xMax(2),coor(2,mm))
-                xMax(3) = max(xMax(3),coor(3,mm))
-             enddo
+                ll = BB_IDs(nBB_IDs(ii) + 1)
+                ADTree(mm)%xMin(1) = xBBox(1, ll)
+                ADTree(mm)%xMin(2) = xBBox(2, ll)
+                ADTree(mm)%xMin(3) = xBBox(3, ll)
+                ADTree(mm)%xMin(4) = xBBox(4, ll)
+                ADTree(mm)%xMin(5) = xBBox(5, ll)
+                ADTree(mm)%xMin(6) = xBBox(6, ll)
 
-             ADT%xBBox(1,nn) = xMin(1)
-             ADT%xBBox(2,nn) = xMin(2)
-             ADT%xBBox(3,nn) = xMin(3)
+                ADTree(mm)%xMax(1) = xBBox(1, ll)
+                ADTree(mm)%xMax(2) = xBBox(2, ll)
+                ADTree(mm)%xMax(3) = xBBox(3, ll)
+                ADTree(mm)%xMax(4) = xBBox(4, ll)
+                ADTree(mm)%xMax(5) = xBBox(5, ll)
+                ADTree(mm)%xMax(6) = xBBox(6, ll)
 
-             ADT%xBBox(4,nn) = xMax(1)
-             ADT%xBBox(5,nn) = xMax(2)
-             ADT%xBBox(6,nn) = xMax(3)
-          enddo
+                do j = (nBB_IDs(ii) + 2), nBB_IDs(i)
+                    ll = BB_IDs(j)
 
-       enddo elementLoop3
+                    ADTree(mm)%xMin(1) = min(ADTree(mm)%xMin(1), xBBox(1, ll))
+                    ADTree(mm)%xMin(2) = min(ADTree(mm)%xMin(2), xBBox(2, ll))
+                    ADTree(mm)%xMin(3) = min(ADTree(mm)%xMin(3), xBBox(3, ll))
+                    ADTree(mm)%xMin(4) = min(ADTree(mm)%xMin(4), xBBox(4, ll))
+                    ADTree(mm)%xMin(5) = min(ADTree(mm)%xMin(5), xBBox(5, ll))
+                    ADTree(mm)%xMin(6) = min(ADTree(mm)%xMin(6), xBBox(6, ll))
 
-    endif testBBox
+                    ADTree(mm)%xMax(1) = max(ADTree(mm)%xMax(1), xBBox(1, ll))
+                    ADTree(mm)%xMax(2) = max(ADTree(mm)%xMax(2), xBBox(2, ll))
+                    ADTree(mm)%xMax(3) = max(ADTree(mm)%xMax(3), xBBox(3, ll))
+                    ADTree(mm)%xMax(4) = max(ADTree(mm)%xMax(4), xBBox(4, ll))
+                    ADTree(mm)%xMax(5) = max(ADTree(mm)%xMax(5), xBBox(5, ll))
+                    ADTree(mm)%xMax(6) = max(ADTree(mm)%xMax(6), xBBox(6, ll))
+                end do
 
-    ! Build the ADT from the now known boundary boxes.
+                ! Determine the situation of the leaf. It is either a
+                ! terminal leaf or a leaf that must be subdivided.
 
-    call buildADT(ADT)
+                terminalTest: if (nn <= 2) then
 
-    !===============================================================
-
-  contains
-
-    !=============================================================
-
-    subroutine setSurfacePointers(ll)
-      !
-      !          This internal subroutine sets the pointers to the correct
-      !          surface element, such that a loop over the element types
-      !          can be used.
-      !          Subroutine intent(in) arguments.
-      !          --------------------------------
-      !          ll: Element type for which the pointers must be used.
-      !
-      implicit none
-      !
-      !         Subroutine arguments.
-      !
-      integer, intent(in) :: ll
-
-      select case (ll)
-      case (1)
-         elType = adtTriangle;      nElem = nTria;  nNPE = 3
-         conn => triaConn
-      case (2)
-         elType = adtQuadrilateral; nElem = nQuads; nNPE = 4
-         conn => quadsConn
-      case (3)
-      end select
-
-    end subroutine setSurfacePointers
-
-  end subroutine buildSurfaceADT
-
-
-  subroutine buildVolumeADT(nTetra,    nPyra,    nPrisms,    &
-       nHexa,     nNodes,   coor,       &
-       tetraConn, pyraConn, prismsConn, &
-       hexaConn,  BBox,     useBBox,    &
-       comm,      adtID)
-    !
-    !        This routine builds the 6 dimensional ADT, which stores the
-    !        given volume grid. The memory intensive part of these
-    !        arguments, the arrays with the coordinates and
-    !        connectivities, are not copied. Instead pointers are set to
-    !        these arrays. It is therefore the responsibility of the user
-    !        not to deallocate this memory before all the searches have
-    !        been performed.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nNodes:    Number of local nodes in the given grid.
-    !        nTetra:    Idem for the tetrahedra.
-    !        nPyra:     Idem for the pyramids.
-    !        nPrisms:   Idem for the prisms.
-    !        nHexa:     Idem for the hexahedra.
-    !        BBox(3,2): The possible bounding box. Only elements within
-    !                   this box will be stored in the ADT.
-    !        useBBox:   Whether or not to use the bounding box.
-    !        comm:      MPI-communicator for the global ADT.
-    !        adtID:     The ID of the ADT.
-    !        Subroutine intent(in), target arguments.
-    !        ----------------------------------------
-    !        coor(3,nNodes):        Nodal coordinates of the local grid.
-    !        tetraConn(4,nTetra):   Local connectivity of the tetrahedra.
-    !        pyraConn(5,nPyra):     Idem for the pyramids.
-    !        prismsConn(6,nPrisms): Idem for the prisms.
-    !        hexaConn(8,nHexa):     Idem for the hexahedra.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer, intent(in)          :: comm
-    character(len=*), intent(in) :: adtID
-
-    integer(kind=intType), intent(in) :: nTetra
-    integer(kind=intType), intent(in) :: nPyra
-    integer(kind=intType), intent(in) :: nPrisms
-    integer(kind=intType), intent(in) :: nHexa
-    integer(kind=intType), intent(in) :: nNodes
-
-    logical, intent(in) :: useBBox
-
-    integer(kind=intType), dimension(:,:), intent(in), &
-         target :: tetraConn
-    integer(kind=intType), dimension(:,:), intent(in), &
-         target :: pyraConn
-    integer(kind=intType), dimension(:,:), intent(in), &
-         target :: prismsConn
-    integer(kind=intType), dimension(:,:), intent(in), &
-         target :: hexaConn
-
-    real(kind=realType), dimension(3,2), intent(in) :: BBox
-
-    real(kind=realType), dimension(:,:), intent(in), &
-         target :: coor
-    !
-    !       Local variables.
-    !
-    integer :: ierr, ll, nNPE
+                    ! Terminal leaf. Store the ID's of the bounding boxes with
+                    ! negative numbers in children.
 
-    integer(kind=adtElementType) :: elType
+                    ADTree(mm)%children(1) = -BB_IDs(nBB_IDs(ii) + 1)
+                    ADTree(mm)%children(2) = -BB_IDs(nBB_IDs(i))
 
-    integer(kind=intType) :: i, j, ii, jj, mm, nn, nElem
+                    else terminalTest
 
-    integer(kind=intType), dimension(:,:), pointer :: conn
+                    ! Leaf must be divided. Sort the bounding boxes of the
+                    ! current leaf in increasing order; the sorting is based
+                    ! on the coordinate in the split direction.
 
-    real(kind=realType), dimension(3) :: xMin, xMax
+                    call qsortBBoxes(BB_IDs(nBB_IDs(ii) + 1:), nn, ADT, splitDir)
 
-    logical, dimension(:), allocatable :: elementWithinBBox
+                    ! Determine the number of bounding boxes in the left leaf.
+                    ! This number is at least 2. The actual number stored in
+                    ! kk is this number plus an offset. Also initialize the
+                    ! counter nfl, which is used to store the bounding boxes
+                    ! in the arrays for the new round.
 
-    type(adtType), pointer :: ADT
+                    kk = (nn + 1) / 2 + nBB_IDs(ii)
+                    nfl = nBB_IDsNew(nLeavesToDivideNew)
 
-    ! Allocate or reallocate the memory for ADTs. This depends
-    ! whether or not this is the first ADT to be built.
+                    ! Copy the ID's of the left bounding boxes into BB_IDsNew.
+                    ! Also update nLeavesToDivideNew and the corresponding
+                    ! entry in nBB_IDsNew.
 
-    if( allocated(ADTs) ) then
-       call reallocateADTs(adtID, jj)
-    else
-       call allocateADTs
-       jj = 1
-    endif
+                    do k = (nBB_IDs(ii) + 1), kk
+                        nfl = nfl + 1
+                        BB_IDsNew(nfl) = BB_IDs(k)
+                    end do
 
-    ADT => ADTs(jj)
+                    nLeavesToDivideNew = nLeavesToDivideNew + 1
+                    nBB_IDsNew(nLeavesToDivideNew) = nfl
 
-    ! Make sure the ADT is active and store the ID of this ADT.
+                    ! Update the total number of leaves and store this number
+                    ! in child 1 of the current leaf and in the current leaves
+                    ! for the next round.
 
-    ADT%isActive = .true.
-    ADT%adtID    = adtID
+                    nLeavesTot = nLeavesTot + 1
+                    ADTree(mm)%children(1) = nLeavesTot
+                    curLeafNew(nLeavesToDivideNew) = nLeavesTot
 
-    ! Copy the communicator and determine the number of processors
-    ! and my processor ID in this group.
+                    ! The right leaf will only be created if it has more than
+                    ! one bounding box in it, i.e. if the original leaf has
+                    ! more than three bounding boxes. If the new leaf only has
+                    ! one bounding box in it, it is not created; instead the
+                    ! bounding box is stored in the current leaf.
 
-    ADT%comm = comm
-    call mpi_comm_rank(comm, ADT%myID,   ierr)
-    call mpi_comm_size(comm, ADT%nProcs, ierr)
+                    if (nn == 3) then
 
-    ! Set the ADT type, which is a volume ADT.
+                        ! Only three bounding boxes present in the current leaf.
+                        ! The right leaf is not created and the last bounding
+                        ! box is stored as the second child of the current leaf.
 
-    ADT%adtType = adtVolumeADT
+                        ADTree(mm)%children(2) = -BB_IDs(nBB_IDs(i))
 
-    ! Copy the number of nodes and volume elements and set the number
-    ! of surface elements to 0; only a volume grid has been given.
+                    else
 
-    ADT%nNodes  = nNodes
-    ADT%nTetra  = nTetra
-    ADT%nPyra   = nPyra
-    ADT%nPrisms = nPrisms
-    ADT%nHexa   = nHexa
+                        ! More than 3 bounding boxes are present and thus the
+                        ! right leaf is created. Copy the ID's from BB_IDs into
+                        ! BB_IDsNew and update the counters for the new round.
 
-    ADT%nTria  = 0
-    ADT%nQuads = 0
+                        nfr = nBB_IDsNew(nLeavesToDivideNew)
+                        do k = (kk + 1), nBB_IDs(i)
+                            nfr = nfr + 1
+                            BB_IDsNew(nfr) = BB_IDs(k)
+                        end do
 
-    ! Set the pointers for the coordinates and the
-    ! volume connectivities.
+                        nLeavesToDivideNew = nLeavesToDivideNew + 1
+                        nBB_IDsNew(nLeavesToDivideNew) = nfr
 
-    ADT%coor       => coor
-    ADT%tetraConn  => tetraConn
-    ADT%pyraConn   => pyraConn
-    ADT%prismsConn => prismsConn
-    ADT%hexaConn   => hexaConn
+                        ! Update the total number of leaves and store this number
+                        ! in child 2 of the current leaf and in the current
+                        ! leaves for the next round.
 
-    ! Determine the number of elements to be stored in the ADT.
-    ! This depends whether or not the global bounding box should be
-    ! used when building the ADT.
+                        nLeavesTot = nLeavesTot + 1
+                        ADTree(mm)%children(2) = nLeavesTot
+                        curLeafNew(nLeavesToDivideNew) = nLeavesTot
 
-    testBBox: if( useBBox ) then
+                    end if
 
-       ! Global bounding box is used. Allocate the memory for the
-       ! logical elementWithinBBox.
+                end if terminalTest
 
-       nn = nTetra + nPyra + nPrisms + nHexa
-       allocate(elementWithinBBox(nn), stat=ierr)
-       if(ierr /= 0)                             &
-            call adtTerminate(ADT, "buildVolumeADT", &
-            "Memory allocation failure for &
-            &elementWithinBBox.")
+            end do currentLeavesLoop
 
-       ! Loop over the number of element types.
+            ! Swap the pointers for the next round.
 
-       ii = 0
-       elementLoop1: do ll=1,4
+            nLeavesToDivide = nLeavesToDivideNew
 
-          ! Set the correct pointers for this element.
+            tmpIntPointer => BB_IDs
+            BB_IDs => BB_IDsNew
+            BB_IDsNew => tmpIntPointer
 
-          call setVolumePointers(ll)
+            tmpIntPointer => nBB_IDs
+            nBB_IDs => nBB_IDsNew
+            nBB_IDsNew => tmpIntPointer
 
-          ! Loop over the elements and determine the bounding box of
-          ! each element.
+            tmpIntPointer => curLeaf
+            curLeaf => curLeafNew
+            curLeafNew => tmpIntPointer
 
-          do i=1,nElem
-             ii = ii + 1
+        end do leafDivision
 
-             mm = conn(1,i)
-             xMin(1) = coor(1,mm); xMax(1) = coor(1,mm)
-             xMin(2) = coor(2,mm); xMax(2) = coor(2,mm)
-             xMin(3) = coor(3,mm); xMax(3) = coor(3,mm)
+        ! Deallocate the arrays used to build the local tree.
 
-             do j=2,nNPE
-                mm = conn(j,i)
+        deallocate (stack, BB_IDs, BB_IDsNew, nBB_IDs, nBB_IDsNew, &
+                    curLeaf, curLeafNew, stat=ierr)
+        if (ierr /= 0) &
+            call adtTerminate(ADT, "buildADT", &
+                              "Deallocation failure for the local arrays.")
+        !
+        !        Local tree has been built. Now determine the global
+        !        information for this tree.
+        !
+        ! Determine the number and processor ID's of the non-empty local
+        ! trees by gathering the data from the participating processors.
+
+        ii = 1
+        if (nBBoxes == 0) ii = 0
+
+        call mpi_allgather(ii, 1, adflow_integer, tmpArr, 1, adflow_integer, &
+                           ADT%comm, ierr)
+
+        ii = 0
+        do i = 0, (ADT%nProcs - 1)
+            ii = ii + tmpArr(i)
+        end do
+
+        ADT%nRootLeaves = ii
+
+        ! Allocate the memory for both the processor ID's and the
+        ! 3D bounding box of the root leaf.
+
+        allocate (ADT%rootLeavesProcs(ii), &
+                  ADT%rootBBoxes(3, 2, ii), stat=ierr)
+        if (ierr /= 0) &
+             call adtTerminate(ADT, "buildADT", &
+             "Memory allocation failure for &
+             &rootLeavesProcs and rootBBoxes.")
+
+        ! Determine the processor ID's of the non-empty trees.
+
+        ii = 0
+        ADT%myEntryInRootProcs = 0
+
+        do i = 0, (ADT%nProcs - 1)
+            if (tmpArr(i) > 0) then
+                ii = ii + 1
+                ADT%rootLeavesProcs(ii) = i
+                if (ADT%myID == i) ADT%myEntryInRootProcs = ii
+            end if
+        end do
+
+        ! Determine the local 3D bounding box of the root leaf.
+        ! If no local tree is present, just set it to zero to avoid
+        ! problems. The values do not matter.
+
+        if (nBBoxes == 0) then
+            rootLeafBBox = zero
+        else
+            rootLeafBBox(1, 1) = ADTree(1)%xMin(1)
+            rootLeafBBox(2, 1) = ADTree(1)%xMin(2)
+            rootLeafBBox(3, 1) = ADTree(1)%xMin(3)
+
+            rootLeafBBox(1, 2) = ADTree(1)%xMax(4)
+            rootLeafBBox(2, 2) = ADTree(1)%xMax(5)
+            rootLeafBBox(3, 2) = ADTree(1)%xMax(6)
+        end if
+
+        ! Gather the data of the root leaves.
+
+        call mpi_allgather(rootLeafBBox, 6, adflow_real, rootLeavesBBox, &
+                           6, adflow_real, ADT%comm, ierr)
+
+        ! Store the 3D root bounding boxes of the non-empty trees in
+        ! the data structure for the current ADT.
+
+        ii = 0
+        do i = 0, (ADT%nProcs - 1)
+            if (tmpArr(i) > 0) then
+                ii = ii + 1
+
+                ADT%rootBBoxes(1, 1, ii) = rootLeavesBBox(1, 1, i)
+                ADT%rootBBoxes(2, 1, ii) = rootLeavesBBox(2, 1, i)
+                ADT%rootBBoxes(3, 1, ii) = rootLeavesBBox(3, 1, i)
+
+                ADT%rootBBoxes(1, 2, ii) = rootLeavesBBox(1, 2, i)
+                ADT%rootBBoxes(2, 2, ii) = rootLeavesBBox(2, 2, i)
+                ADT%rootBBoxes(3, 2, ii) = rootLeavesBBox(3, 2, i)
+            end if
+        end do
+
+    end subroutine buildADT
+
+    subroutine buildSurfaceADT(nTria, nQuads, nNodes, &
+                               coor, triaConn, quadsConn, &
+                               BBox, useBBox, comm, &
+                               adtID)
+        !
+        !        This routine builds the 6 dimensional ADT, which stores the
+        !        given surface grid. The memory intensive part of these
+        !        arguments, the arrays with the coordinates and
+        !        connectivities, are not copied. Instead pointers are set to
+        !        these arrays. It is therefore the responsibility of the user
+        !        not to deallocate this memory before all the searches have
+        !        been performed.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nNodes:    Number of local nodes in the given grid.
+        !        nTria:     Idem for the triangles.
+        !        nQuads:    Idem for the quadrilaterals.
+        !        BBox(3,2): The possible bounding box. Only elements within
+        !                   this box will be stored in the ADT.
+        !        useBBox:   Whether or not to use the bounding box.
+        !        comm:      MPI-communicator for the global ADT.
+        !        adtID:     The ID of the ADT.
+        !        Subroutine intent(in), target arguments.
+        !        ----------------------------------------
+        !        coor(3,nNodes):      Nodal coordinates of the local grid.
+        !        triaConn(3,nTria):   Local connectivity of the triangles.
+        !        quadsConn(4,nQuads): Idem for the quadrilaterals.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer, intent(in) :: comm
+        character(len=*), intent(in) :: adtID
+
+        integer(kind=intType), intent(in) :: nTria
+        integer(kind=intType), intent(in) :: nQuads
+        integer(kind=intType), intent(in) :: nNodes
+
+        logical, intent(in) :: useBBox
+
+        integer(kind=intType), dimension(:, :), intent(in), &
+            target :: triaConn
+        integer(kind=intType), dimension(:, :), intent(in), &
+            target :: quadsConn
+
+        real(kind=realType), dimension(3, 2), intent(in) :: BBox
+
+        real(kind=realType), dimension(:, :), intent(in), &
+            target :: coor
+        !
+        !       Local variables.
+        !
+        integer :: ierr, ll, nNPE, jj
+
+        integer(kind=adtElementType) :: elType
+
+        integer(kind=intType) :: i, j, ii, mm, nn, nElem
 
-                xMin(1) = min(xMin(1),coor(1,mm))
-                xMin(2) = min(xMin(2),coor(2,mm))
-                xMin(3) = min(xMin(3),coor(3,mm))
+        integer(kind=intType), dimension(:, :), pointer :: conn
+
+        real(kind=realType), dimension(3) :: xMin, xMax
+
+        logical, dimension(:), allocatable :: elementWithinBBox
+
+        type(adtType), pointer :: ADT
+
+        ! ! Allocate or reallocate the memory for ADTs. This depends
+        ! ! whether or not this is the first ADT to be built.
 
-                xMax(1) = max(xMax(1),coor(1,mm))
-                xMax(2) = max(xMax(2),coor(2,mm))
-                xMax(3) = max(xMax(3),coor(3,mm))
-             enddo
+        if (allocated(ADTs)) then
+            call reallocateADTs(adtID, jj)
+        else
+            call allocateADTs
+            jj = 1
+        end if
+
+        ! The ADT we are currently working with
+        ADT => ADTs(jj)
+
+        ! Make sure the ADT is active and store the ID of this ADT.
+
+        ADT%isActive = .true.
+        ADT%adtID = adtID
+
+        ! Copy the communicator and determine the number of processors
+        ! and my processor ID in this group.
+
+        ADT%comm = comm
+        call mpi_comm_rank(comm, ADT%myID, ierr)
+        call mpi_comm_size(comm, ADT%nProcs, ierr)
 
-             ! Check if the bounding box is (partially) inside the
-             ! global bounding box. If so, set elementWithinBBox
-             ! to .true.; otherwise set it to .false.
+        ! Set the ADT type, which is a surface ADT.
+
+        ADT%adtType = adtSurfaceADT
+
+        ! Copy the number of nodes and surface elements and set the
+        ! number of volume elements to 0; only a surface grid has been
+        ! given.
+
+        ADT%nNodes = nNodes
+        ADT%nTria = nTria
+        ADT%nQuads = nQuads
+
+        ADT%nTetra = 0
+        ADT%nPyra = 0
+        ADT%nPrisms = 0
+        ADT%nHexa = 0
 
-             if(xMax(1) >= BBox(1,1) .and. xMin(1) <= BBox(1,2) .and. &
-                  xMax(2) >= BBox(2,1) .and. xMin(2) <= BBox(2,2) .and. &
-                  xMax(3) >= BBox(3,1) .and. xMin(3) <= BBox(3,2)) then
-                elementWithinBBox(ii) = .true.
-             else
-                elementWithinBBox(ii) = .false.
-             endif
+        ! Set the pointers for the coordinates and the
+        ! surface connectivities.
 
-          enddo
-       enddo elementLoop1
+        ADT%coor => coor
+        ADT%triaConn => triaConn
+        ADT%quadsConn => quadsConn
 
-       ! Determine the local number of elements within the global
-       ! bounding box.
+        ! Determine the number of elements to be stored in the ADT.
+        ! This depends whether or not the global bounding box should be
+        ! used when building the ADT.
 
-       ii = 0
-       do i=1,nn
-          if( elementWithinBBox(i) ) ii = ii + 1
-       enddo
+        testBBox: if (useBBox) then
 
-       ADT%nBBoxes = ii
+            ! Global bounding box is used. Allocate the memory for the
+            ! logical elementWithinBBox.
 
-       ! Allocate the memory for the bounding box coordinates, the
-       ! corresponding element type and the index in the connectivity.
+            nn = nTria + nQuads
+            allocate (elementWithinBBox(nn), stat=ierr)
+            if (ierr /= 0) &
+                 call adtTerminate(ADT, "buildSurfaceADT", &
+                 "Memory allocation failure for &
+                 &elementWithinBBox.")
 
-       allocate(ADT%xBBox(6,ii), ADT%elementType(ii), &
-            ADT%elementID(ii), stat=ierr)
-       if(ierr /= 0)                             &
-            call adtTerminate(ADT, "buildVolumeADT", &
-            "Memory allocation failure for bounding &
-            &box data.")
+            ! Loop over the number of element types.
 
-       ! Repeat the loop over the all the elements, but now store
-       ! the bounding boxes in the ADT.
+            ii = 0
+            elementLoop1: do ll = 1, 2
 
-       ii = 0
-       nn = 0
-       elementLoop2: do ll=1,4
+                ! Set the correct pointers for this element.
 
-          ! Set the correct pointers for this element.
+                call setSurfacePointers(ll)
 
-          call setVolumePointers(ll)
+                ! Loop over the elements and determine the bounding box of
+                ! each element.
 
-          ! Loop over the elements and store the bounding box info,
-          ! if needed.
+                do i = 1, nElem
+                    ii = ii + 1
 
-          do i=1,nElem
-             ii = ii + 1
-             testWithin: if( elementWithinBBox(ii) ) then
+                    mm = conn(1, i)
+                    xMin(1) = coor(1, mm); xMax(1) = coor(1, mm)
+                    xMin(2) = coor(2, mm); xMax(2) = coor(2, mm)
+                    xMin(3) = coor(3, mm); xMax(3) = coor(3, mm)
 
-                nn = nn + 1
+                    do j = 2, nNPE
+                        mm = conn(j, i)
 
-                ADT%elementType(nn) = elType
-                ADT%elementID(nn)   = i
+                        xMin(1) = min(xMin(1), coor(1, mm))
+                        xMin(2) = min(xMin(2), coor(2, mm))
+                        xMin(3) = min(xMin(3), coor(3, mm))
 
-                mm = conn(1,i)
-                xMin(1) = coor(1,mm); xMax(1) = coor(1,mm)
-                xMin(2) = coor(2,mm); xMax(2) = coor(2,mm)
-                xMin(3) = coor(3,mm); xMax(3) = coor(3,mm)
+                        xMax(1) = max(xMax(1), coor(1, mm))
+                        xMax(2) = max(xMax(2), coor(2, mm))
+                        xMax(3) = max(xMax(3), coor(3, mm))
+                    end do
 
-                do j=2,nNPE
-                   mm = conn(j,i)
+                    ! Check if the bounding box is (partially) inside the
+                    ! global bounding box. If so, set elementWithinBBox
+                    ! to .true.; otherwise set it to .false.
 
-                   xMin(1) = min(xMin(1),coor(1,mm))
-                   xMin(2) = min(xMin(2),coor(2,mm))
-                   xMin(3) = min(xMin(3),coor(3,mm))
+                    if (xMax(1) >= BBox(1, 1) .and. xMin(1) <= BBox(1, 2) .and. &
+                        xMax(2) >= BBox(2, 1) .and. xMin(2) <= BBox(2, 2) .and. &
+                        xMax(3) >= BBox(3, 1) .and. xMin(3) <= BBox(3, 2)) then
+                        elementWithinBBox(ii) = .true.
+                    else
+                        elementWithinBBox(ii) = .false.
+                    end if
 
-                   xMax(1) = max(xMax(1),coor(1,mm))
-                   xMax(2) = max(xMax(2),coor(2,mm))
-                   xMax(3) = max(xMax(3),coor(3,mm))
-                enddo
+                end do
+            end do elementLoop1
 
-                ADT%xBBox(1,nn) = xMin(1)
-                ADT%xBBox(2,nn) = xMin(2)
-                ADT%xBBox(3,nn) = xMin(3)
+            ! Determine the local number of elements within the global
+            ! bounding box.
 
-                ADT%xBBox(4,nn) = xMax(1)
-                ADT%xBBox(5,nn) = xMax(2)
-                ADT%xBBox(6,nn) = xMax(3)
+            ii = 0
+            do i = 1, nn
+                if (elementWithinBBox(i)) ii = ii + 1
+            end do
 
-             endif testWithin
-          enddo
-       enddo elementLoop2
+            ADT%nBBoxes = ii
 
-       ! Deallocate the memory for elementWithinBBox.
+            ! Allocate the memory for the bounding box coordinates, the
+            ! corresponding element type and the index in the connectivity.
 
-       deallocate(elementWithinBBox, stat=ierr)
-       if(ierr /= 0)                             &
-            call adtTerminate(ADT, "buildVolumeADT", &
-            "Deallocation failure for &
-            &elementWithinBBox.")
+            allocate (ADT%xBBox(6, ii), ADT%elementType(ii), &
+                      ADT%elementID(ii), stat=ierr)
+            if (ierr /= 0) &
+                 call adtTerminate(ADT, "buildSurfaceADT", &
+                 "Memory allocation failure for bounding &
+                 &box data.")
 
-    else testBBox
+            ! Repeat the loop over the all the elements, but now store
+            ! the bounding boxes in the ADT.
 
-       ! No global bounding box. The number of local bounding boxes
-       ! to be stored is the total number of local volume elements.
+            ii = 0
+            nn = 0
+            elementLoop2: do ll = 1, 2
 
-       ii = nTetra + nPyra + nPrisms + nHexa
-       ADT%nBBoxes = ii
+                ! Set the correct pointers for this element.
 
-       ! Allocate the memory for the bounding box coordinates, the
-       ! corresponding element type and the index in the connectivity.
+                call setSurfacePointers(ll)
 
-       allocate(ADT%xBBox(6,ii), ADT%elementType(ii), &
-            ADT%elementID(ii), stat=ierr)
-       if(ierr /= 0)                             &
-            call adtTerminate(ADT, "buildVolumeADT", &
-            "Memory allocation failure for bounding &
-            &box data.")
+                ! Loop over the elements and store the bounding box info,
+                ! if needed.
 
-       ! Loop over the number of element types present, i.e. 4,
-       ! to store the bounding boxes; nn is the counter.
+                do i = 1, nElem
+                    ii = ii + 1
+                    testWithin: if (elementWithinBBox(ii)) then
 
-       nn = 0
-       elementLoop3: do ll=1,4
+                        nn = nn + 1
 
-          ! Set the correct pointers for this element.
+                        ADT%elementType(nn) = elType
+                        ADT%elementID(nn) = i
 
-          call setVolumePointers(ll)
+                        mm = conn(1, i)
+                        xMin(1) = coor(1, mm); xMax(1) = coor(1, mm)
+                        xMin(2) = coor(2, mm); xMax(2) = coor(2, mm)
+                        xMin(3) = coor(3, mm); xMax(3) = coor(3, mm)
 
-          ! Loop over the number of elements and store the bounding
-          ! box info.
+                        do j = 2, nNPE
+                            mm = conn(j, i)
 
-          do i=1,nElem
-             nn = nn + 1
+                            xMin(1) = min(xMin(1), coor(1, mm))
+                            xMin(2) = min(xMin(2), coor(2, mm))
+                            xMin(3) = min(xMin(3), coor(3, mm))
 
-             ADT%elementType(nn) = elType
-             ADT%elementID(nn)   = i
+                            xMax(1) = max(xMax(1), coor(1, mm))
+                            xMax(2) = max(xMax(2), coor(2, mm))
+                            xMax(3) = max(xMax(3), coor(3, mm))
+                        end do
 
-             mm = conn(1,i)
-             xMin(1) = coor(1,mm); xMax(1) = coor(1,mm)
-             xMin(2) = coor(2,mm); xMax(2) = coor(2,mm)
-             xMin(3) = coor(3,mm); xMax(3) = coor(3,mm)
+                        ADT%xBBox(1, nn) = xMin(1)
+                        ADT%xBBox(2, nn) = xMin(2)
+                        ADT%xBBox(3, nn) = xMin(3)
 
-             do j=2,nNPE
-                mm = conn(j,i)
+                        ADT%xBBox(4, nn) = xMax(1)
+                        ADT%xBBox(5, nn) = xMax(2)
+                        ADT%xBBox(6, nn) = xMax(3)
 
-                xMin(1) = min(xMin(1),coor(1,mm))
-                xMin(2) = min(xMin(2),coor(2,mm))
-                xMin(3) = min(xMin(3),coor(3,mm))
+                    end if testWithin
+                end do
+            end do elementLoop2
 
-                xMax(1) = max(xMax(1),coor(1,mm))
-                xMax(2) = max(xMax(2),coor(2,mm))
-                xMax(3) = max(xMax(3),coor(3,mm))
-             enddo
+            ! Deallocate the memory for elementWithinBBox.
 
-             ADT%xBBox(1,nn) = xMin(1)
-             ADT%xBBox(2,nn) = xMin(2)
-             ADT%xBBox(3,nn) = xMin(3)
+            deallocate (elementWithinBBox, stat=ierr)
+            if (ierr /= 0) &
+                 call adtTerminate(ADT, "buildSurfaceADT", &
+                 "Deallocation failure for &
+                 &elementWithinBBox.")
 
-             ADT%xBBox(4,nn) = xMax(1)
-             ADT%xBBox(5,nn) = xMax(2)
-             ADT%xBBox(6,nn) = xMax(3)
-          enddo
+            else testBBox
 
-       enddo elementLoop3
+            ! No global bounding box. The number of local bounding boxes
+            ! to be stored is the total number of local surface elements.
 
-    endif testBBox
+            ii = nTria + nQuads
+            ADT%nBBoxes = ii
 
-    ! Build the ADT from the now known boundary boxes.
+            ! Allocate the memory for the bounding box coordinates, the
+            ! corresponding element type and the index in the connectivity.
 
-    call buildADT(ADT)
+            allocate (ADT%xBBox(6, ii), ADT%elementType(ii), &
+                      ADT%elementID(ii), stat=ierr)
+            if (ierr /= 0) &
+                 call adtTerminate(ADT, "buildSurfaceADT", &
+                 "Memory allocation failure for bounding &
+                 &box data.")
 
-    !===============================================================
+            ! Loop over the number of element types present, i.e. 2,
+            ! to store the bounding boxes; nn is the counter.
 
-  contains
-
-    !=============================================================
-
-    subroutine setVolumePointers(ll)
-      !
-      !          This internal subroutine sets the pointers to the correct
-      !          volume element, such that a loop over the element types
-      !          can be used.
-      !          Subroutine intent(in) arguments.
-      !          --------------------------------
-      !          ll: Element type for which the pointers must be used.
-      !
-      implicit none
-      !
-      !         Subroutine arguments.
-      !
-      integer, intent(in) :: ll
-
-      select case (ll)
-      case (1)
-         elType = adtTetrahedron; nElem = nTetra;  nNPE = 4
-         conn => tetraConn
-      case (2)
-         elType = adtPyramid;     nElem = nPyra;   nNPE = 5
-         conn => pyraConn
-      case (3)
-         elType = adtPrism;       nElem = nPrisms; nNPE = 6
-         conn => prismsConn
-      case (4)
-         elType = adtHexahedron;  nElem = nHexa;   nNPE = 8
-         conn => hexaConn
-      end select
-
-    end subroutine setVolumePointers
-
-  end subroutine buildVolumeADT
-
-  subroutine buildSerialHex(nHexa, nNodes, coor, hexaConn, ADT)
-    !
-    !        This a specialized routine that builds and ADT tree for
-    !        hex volumes only and only in serial. Also, this routine does
-    !        use adtDats's ADTs() array list...the user must supply the
-    !        adtType to use and is responsible for all data management of
-    !        this type.
-    !        The memory intensive part of these
-    !        arguments, the arrays with the coordinates and
-    !        connectivities, are not copied. Instead pointers are set to
-    !        these arrays. It is therefore the responsibility of the user
-    !        not to deallocate this memory before all the searches have
-    !        been performed.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !       * nHexa :    Number of hexa cells
-    !        nNodes:    Number of nodes in the given grid.
-    !        Subroutine intent(in), target arguments.
-    !        ----------------------------------------
-    !        coor(3,nNodes):        Nodal coordinates of the local grid.
-    !        hexaConn(8,nHexa):     Idem for the hexahedra.
-    !        Subroutine intent(out), arguments.
-    !        ----------------------------------------
-    !        ADT : The newly completed ADT
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nHexa
-    integer(kind=intType), intent(in) :: nNodes
+            nn = 0
+            elementLoop3: do ll = 1, 2
 
-    integer(kind=intType), dimension(:,:), intent(in), &
-         target :: hexaConn
+                ! Set the correct pointers for this element.
 
-    real(kind=realType), dimension(:,:), intent(in), &
-         target :: coor
-    type(adtType), intent(out) :: ADT
-    !
-    !       Local variables.
-    !
-    integer :: ierr, ll, nNPE
-    integer(kind=intType) :: i, j, mm
-    real(kind=realType), dimension(3) :: xMin, xMax
+                call setSurfacePointers(ll)
 
-    ! We need to set comm...explictly mpi_comm_self
-    ADT%comm = MPI_COMM_SELF
-    ADT%nProcs = 1
-    ADT%myID = 0
-    ! Set the ADT type, which is a volume ADT.
+                ! Loop over the number of elements and store the bounding
+                ! box info.
+
+                do i = 1, nElem
+                    nn = nn + 1
+
+                    ADT%elementType(nn) = elType
+                    ADT%elementID(nn) = i
+
+                    mm = conn(1, i)
+                    xMin(1) = coor(1, mm); xMax(1) = coor(1, mm)
+                    xMin(2) = coor(2, mm); xMax(2) = coor(2, mm)
+                    xMin(3) = coor(3, mm); xMax(3) = coor(3, mm)
+
+                    do j = 2, nNPE
+                        mm = conn(j, i)
+
+                        xMin(1) = min(xMin(1), coor(1, mm))
+                        xMin(2) = min(xMin(2), coor(2, mm))
+                        xMin(3) = min(xMin(3), coor(3, mm))
+
+                        xMax(1) = max(xMax(1), coor(1, mm))
+                        xMax(2) = max(xMax(2), coor(2, mm))
+                        xMax(3) = max(xMax(3), coor(3, mm))
+                    end do
+
+                    ADT%xBBox(1, nn) = xMin(1)
+                    ADT%xBBox(2, nn) = xMin(2)
+                    ADT%xBBox(3, nn) = xMin(3)
+
+                    ADT%xBBox(4, nn) = xMax(1)
+                    ADT%xBBox(5, nn) = xMax(2)
+                    ADT%xBBox(6, nn) = xMax(3)
+                end do
+
+            end do elementLoop3
+
+        end if testBBox
+
+        ! Build the ADT from the now known boundary boxes.
+
+        call buildADT(ADT)
+
+        !===============================================================
+
+    contains
+
+        !=============================================================
+
+        subroutine setSurfacePointers(ll)
+            !
+            !          This internal subroutine sets the pointers to the correct
+            !          surface element, such that a loop over the element types
+            !          can be used.
+            !          Subroutine intent(in) arguments.
+            !          --------------------------------
+            !          ll: Element type for which the pointers must be used.
+            !
+            implicit none
+            !
+            !         Subroutine arguments.
+            !
+            integer, intent(in) :: ll
+
+            select case (ll)
+            case (1)
+                elType = adtTriangle; nElem = nTria; nNPE = 3
+                conn => triaConn
+            case (2)
+                elType = adtQuadrilateral; nElem = nQuads; nNPE = 4
+                conn => quadsConn
+            case (3)
+            end select
+
+        end subroutine setSurfacePointers
+
+    end subroutine buildSurfaceADT
+
+    subroutine buildVolumeADT(nTetra, nPyra, nPrisms, &
+                              nHexa, nNodes, coor, &
+                              tetraConn, pyraConn, prismsConn, &
+                              hexaConn, BBox, useBBox, &
+                              comm, adtID)
+        !
+        !        This routine builds the 6 dimensional ADT, which stores the
+        !        given volume grid. The memory intensive part of these
+        !        arguments, the arrays with the coordinates and
+        !        connectivities, are not copied. Instead pointers are set to
+        !        these arrays. It is therefore the responsibility of the user
+        !        not to deallocate this memory before all the searches have
+        !        been performed.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nNodes:    Number of local nodes in the given grid.
+        !        nTetra:    Idem for the tetrahedra.
+        !        nPyra:     Idem for the pyramids.
+        !        nPrisms:   Idem for the prisms.
+        !        nHexa:     Idem for the hexahedra.
+        !        BBox(3,2): The possible bounding box. Only elements within
+        !                   this box will be stored in the ADT.
+        !        useBBox:   Whether or not to use the bounding box.
+        !        comm:      MPI-communicator for the global ADT.
+        !        adtID:     The ID of the ADT.
+        !        Subroutine intent(in), target arguments.
+        !        ----------------------------------------
+        !        coor(3,nNodes):        Nodal coordinates of the local grid.
+        !        tetraConn(4,nTetra):   Local connectivity of the tetrahedra.
+        !        pyraConn(5,nPyra):     Idem for the pyramids.
+        !        prismsConn(6,nPrisms): Idem for the prisms.
+        !        hexaConn(8,nHexa):     Idem for the hexahedra.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer, intent(in) :: comm
+        character(len=*), intent(in) :: adtID
+
+        integer(kind=intType), intent(in) :: nTetra
+        integer(kind=intType), intent(in) :: nPyra
+        integer(kind=intType), intent(in) :: nPrisms
+        integer(kind=intType), intent(in) :: nHexa
+        integer(kind=intType), intent(in) :: nNodes
+
+        logical, intent(in) :: useBBox
+
+        integer(kind=intType), dimension(:, :), intent(in), &
+            target :: tetraConn
+        integer(kind=intType), dimension(:, :), intent(in), &
+            target :: pyraConn
+        integer(kind=intType), dimension(:, :), intent(in), &
+            target :: prismsConn
+        integer(kind=intType), dimension(:, :), intent(in), &
+            target :: hexaConn
+
+        real(kind=realType), dimension(3, 2), intent(in) :: BBox
+
+        real(kind=realType), dimension(:, :), intent(in), &
+            target :: coor
+        !
+        !       Local variables.
+        !
+        integer :: ierr, ll, nNPE
+
+        integer(kind=adtElementType) :: elType
+
+        integer(kind=intType) :: i, j, ii, jj, mm, nn, nElem
+
+        integer(kind=intType), dimension(:, :), pointer :: conn
+
+        real(kind=realType), dimension(3) :: xMin, xMax
+
+        logical, dimension(:), allocatable :: elementWithinBBox
+
+        type(adtType), pointer :: ADT
+
+        ! Allocate or reallocate the memory for ADTs. This depends
+        ! whether or not this is the first ADT to be built.
+
+        if (allocated(ADTs)) then
+            call reallocateADTs(adtID, jj)
+        else
+            call allocateADTs
+            jj = 1
+        end if
+
+        ADT => ADTs(jj)
+
+        ! Make sure the ADT is active and store the ID of this ADT.
+
+        ADT%isActive = .true.
+        ADT%adtID = adtID
+
+        ! Copy the communicator and determine the number of processors
+        ! and my processor ID in this group.
+
+        ADT%comm = comm
+        call mpi_comm_rank(comm, ADT%myID, ierr)
+        call mpi_comm_size(comm, ADT%nProcs, ierr)
+
+        ! Set the ADT type, which is a volume ADT.
+
+        ADT%adtType = adtVolumeADT
+
+        ! Copy the number of nodes and volume elements and set the number
+        ! of surface elements to 0; only a volume grid has been given.
+
+        ADT%nNodes = nNodes
+        ADT%nTetra = nTetra
+        ADT%nPyra = nPyra
+        ADT%nPrisms = nPrisms
+        ADT%nHexa = nHexa
+
+        ADT%nTria = 0
+        ADT%nQuads = 0
 
-    ADT%adtType = adtVolumeADT
+        ! Set the pointers for the coordinates and the
+        ! volume connectivities.
 
-    ! Copy the number of nodes and volume elements and set the number
-    ! of surface elements to 0; only a volume grid has been given.
+        ADT%coor => coor
+        ADT%tetraConn => tetraConn
+        ADT%pyraConn => pyraConn
+        ADT%prismsConn => prismsConn
+        ADT%hexaConn => hexaConn
 
-    ADT%nNodes  = nNodes
-    ADT%nHexa   = nHexa
-    ADT%nTetra  = 0
-    ADT%nPyra   = 0
-    ADT%nPrisms = 0
-    ADT%nTria  = 0
-    ADT%nQuads = 0
+        ! Determine the number of elements to be stored in the ADT.
+        ! This depends whether or not the global bounding box should be
+        ! used when building the ADT.
 
-    ! Set the pointers for the coordinates and the
-    ! volume connectivities.
+        testBBox: if (useBBox) then
 
-    ADT%coor       => coor
-    ADT%hexaConn   => hexaConn
-    nullify(ADT%tetraConn, ADT%pyraConn, ADT%prismsConn)
+            ! Global bounding box is used. Allocate the memory for the
+            ! logical elementWithinBBox.
 
-    ADT%nBBoxes = nHexa
+            nn = nTetra + nPyra + nPrisms + nHexa
+            allocate (elementWithinBBox(nn), stat=ierr)
+            if (ierr /= 0) &
+                 call adtTerminate(ADT, "buildVolumeADT", &
+                 "Memory allocation failure for &
+                 &elementWithinBBox.")
 
-    ! Allocate the memory for the bounding box coordinates, the
-    ! corresponding element type and the index in the connectivity.
+            ! Loop over the number of element types.
 
-    allocate(ADT%xBBox(6, nHexa))
-    allocate(ADT%elementType(nHexa))
-    allocate(ADT%elementID(nHexa))
+            ii = 0
+            elementLoop1: do ll = 1, 4
 
-    ! All hexas
-    ADT%elementType = adtHexahedron
+                ! Set the correct pointers for this element.
 
+                call setVolumePointers(ll)
 
-    ! Loop over the number of elements and store the bounding
-    ! box info.
-    nNPE = 8
+                ! Loop over the elements and determine the bounding box of
+                ! each element.
 
-    do i=1,nHexa
+                do i = 1, nElem
+                    ii = ii + 1
 
-       mm = hexaConn(1,i)
-       xMin(1) = coor(1,mm); xMax(1) = coor(1,mm)
-       xMin(2) = coor(2,mm); xMax(2) = coor(2,mm)
-       xMin(3) = coor(3,mm); xMax(3) = coor(3,mm)
+                    mm = conn(1, i)
+                    xMin(1) = coor(1, mm); xMax(1) = coor(1, mm)
+                    xMin(2) = coor(2, mm); xMax(2) = coor(2, mm)
+                    xMin(3) = coor(3, mm); xMax(3) = coor(3, mm)
 
-       do j=2,nNPE
-          mm = hexaConn(j,i)
+                    do j = 2, nNPE
+                        mm = conn(j, i)
 
-          xMin(1) = min(xMin(1),coor(1,mm))
-          xMin(2) = min(xMin(2),coor(2,mm))
-          xMin(3) = min(xMin(3),coor(3,mm))
+                        xMin(1) = min(xMin(1), coor(1, mm))
+                        xMin(2) = min(xMin(2), coor(2, mm))
+                        xMin(3) = min(xMin(3), coor(3, mm))
 
-          xMax(1) = max(xMax(1),coor(1,mm))
-          xMax(2) = max(xMax(2),coor(2,mm))
-          xMax(3) = max(xMax(3),coor(3,mm))
-       enddo
+                        xMax(1) = max(xMax(1), coor(1, mm))
+                        xMax(2) = max(xMax(2), coor(2, mm))
+                        xMax(3) = max(xMax(3), coor(3, mm))
+                    end do
 
-       ADT%xBBox(1,i) = xMin(1)
-       ADT%xBBox(2,i) = xMin(2)
-       ADT%xBBox(3,i) = xMin(3)
+                    ! Check if the bounding box is (partially) inside the
+                    ! global bounding box. If so, set elementWithinBBox
+                    ! to .true.; otherwise set it to .false.
 
-       ADT%xBBox(4,i) = xMax(1)
-       ADT%xBBox(5,i) = xMax(2)
-       ADT%xBBox(6,i) = xMax(3)
+                    if (xMax(1) >= BBox(1, 1) .and. xMin(1) <= BBox(1, 2) .and. &
+                        xMax(2) >= BBox(2, 1) .and. xMin(2) <= BBox(2, 2) .and. &
+                        xMax(3) >= BBox(3, 1) .and. xMin(3) <= BBox(3, 2)) then
+                        elementWithinBBox(ii) = .true.
+                    else
+                        elementWithinBBox(ii) = .false.
+                    end if
 
-       ! elementID is just sequential since we only have 1 element type
-       ADT%elementID(i) = i
+                end do
+            end do elementLoop1
 
-    enddo
+            ! Determine the local number of elements within the global
+            ! bounding box.
 
-    ! Build the ADT from the now known boundary boxes.
+            ii = 0
+            do i = 1, nn
+                if (elementWithinBBox(i)) ii = ii + 1
+            end do
 
-    call buildADT(ADT)
+            ADT%nBBoxes = ii
 
-  end subroutine buildSerialHex
+            ! Allocate the memory for the bounding box coordinates, the
+            ! corresponding element type and the index in the connectivity.
 
-  subroutine destroySerialHex(ADT)
-    ! Deallocate the data allocated from the ADT
+            allocate (ADT%xBBox(6, ii), ADT%elementType(ii), &
+                      ADT%elementID(ii), stat=ierr)
+            if (ierr /= 0) &
+                 call adtTerminate(ADT, "buildVolumeADT", &
+                 "Memory allocation failure for bounding &
+                 &box data.")
 
-    implicit none
-    type(adtType), intent(inout) :: ADT
+            ! Repeat the loop over the all the elements, but now store
+            ! the bounding boxes in the ADT.
 
-    deallocate(ADT%xBBox)
-    deallocate(ADT%elementType)
-    deallocate(ADT%elementID)
-    deallocate(ADT%ADTree)
-  end subroutine destroySerialHex
+            ii = 0
+            nn = 0
+            elementLoop2: do ll = 1, 4
 
-  subroutine buildSerialQuad(nQuad, nNodes, coor, quadsConn, ADT)
-    !
-    !        This a specialized routine that builds and ADT tree for
-    !        quad surface meshes and only in serial. Also, this routine
-    !        does not
-    !        use adtDats's ADTs() array list...the user must supply the
-    !        adtType to use and is responsible for all data management of
-    !        this type.
-    !        The memory intensive part of these
-    !        arguments, the arrays with the coordinates and
-    !        connectivities, are not copied. Instead pointers are set to
-    !        these arrays. It is therefore the responsibility of the user
-    !        not to deallocate this memory before all the searches have
-    !        been performed.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !       * nQuad :    Number of quad cells
-    !        nNodes:    Number of nodes in the given grid.
-    !        Subroutine intent(in), target arguments.
-    !        ----------------------------------------
-    !        coor(3,nNodes):        Nodal coordinates of the local grid.
-    !        quadsConn(8,nHexa):     Connectivity for quad cells
-    !        Subroutine intent(out), arguments.
-    !        ----------------------------------------
-    !        ADT : The newly completed ADT
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nQuad
-    integer(kind=intType), intent(in) :: nNodes
+                ! Set the correct pointers for this element.
 
-    integer(kind=intType), dimension(:,:), intent(in), &
-         target :: quadsConn
+                call setVolumePointers(ll)
 
-    real(kind=realType), dimension(:,:), intent(in), &
-         target :: coor
-    type(adtType), intent(out) :: ADT
-    !
-    !       Local variables.
-    !
-    integer :: ierr, ll, nNPE
-    integer(kind=intType) :: i, j, mm
-    real(kind=realType), dimension(3) :: xMin, xMax
+                ! Loop over the elements and store the bounding box info,
+                ! if needed.
 
-    ! We need to set comm...explictly mpi_comm_self
-    ADT%comm = MPI_COMM_SELF
-    ADT%nProcs = 1
-    ADT%myID = 0
-    ! Set the ADT type, which is a surface ADT.
+                do i = 1, nElem
+                    ii = ii + 1
+                    testWithin: if (elementWithinBBox(ii)) then
 
-    ADT%adtType = adtSurfaceADT
+                        nn = nn + 1
 
-    ! Copy the number of nodes and volume elements and set the number
-    ! of surface elements to 0; only a volume grid has been given.
+                        ADT%elementType(nn) = elType
+                        ADT%elementID(nn) = i
 
-    ADT%nNodes  = nNodes
-    ADT%nHexa   = 0
-    ADT%nTetra  = 0
-    ADT%nPyra   = 0
-    ADT%nPrisms = 0
-    ADT%nTria  = 0
-    ADT%nQuads = nQuad
+                        mm = conn(1, i)
+                        xMin(1) = coor(1, mm); xMax(1) = coor(1, mm)
+                        xMin(2) = coor(2, mm); xMax(2) = coor(2, mm)
+                        xMin(3) = coor(3, mm); xMax(3) = coor(3, mm)
 
-    ! Set the pointers for the coordinates and the
-    ! volume connectivities.
+                        do j = 2, nNPE
+                            mm = conn(j, i)
 
-    ADT%coor       => coor
-    ADT%quadsConn   => quadsConn
-    nullify(ADT%triaConn)
+                            xMin(1) = min(xMin(1), coor(1, mm))
+                            xMin(2) = min(xMin(2), coor(2, mm))
+                            xMin(3) = min(xMin(3), coor(3, mm))
 
-    ADT%nBBoxes = nQuad
+                            xMax(1) = max(xMax(1), coor(1, mm))
+                            xMax(2) = max(xMax(2), coor(2, mm))
+                            xMax(3) = max(xMax(3), coor(3, mm))
+                        end do
 
-    ! Allocate the memory for the bounding box coordinates, the
-    ! corresponding element type and the index in the connectivity.
+                        ADT%xBBox(1, nn) = xMin(1)
+                        ADT%xBBox(2, nn) = xMin(2)
+                        ADT%xBBox(3, nn) = xMin(3)
 
-    allocate(ADT%xBBox(6, nQuad))
-    allocate(ADT%elementType(nQuad))
-    allocate(ADT%elementID(nQuad))
+                        ADT%xBBox(4, nn) = xMax(1)
+                        ADT%xBBox(5, nn) = xMax(2)
+                        ADT%xBBox(6, nn) = xMax(3)
 
-    ! All hexas
-    ADT%elementType = adtQuadrilateral
+                    end if testWithin
+                end do
+            end do elementLoop2
 
-    ! Loop over the number of elements and store the bounding
-    ! box info.
-    nNPE = 4
+            ! Deallocate the memory for elementWithinBBox.
 
-    do i=1, nQuad
+            deallocate (elementWithinBBox, stat=ierr)
+            if (ierr /= 0) &
+                 call adtTerminate(ADT, "buildVolumeADT", &
+                 "Deallocation failure for &
+                 &elementWithinBBox.")
 
-       mm = quadsConn(1,i)
-       xMin(1) = coor(1,mm); xMax(1) = coor(1,mm)
-       xMin(2) = coor(2,mm); xMax(2) = coor(2,mm)
-       xMin(3) = coor(3,mm); xMax(3) = coor(3,mm)
+            else testBBox
 
-       do j=2,nNPE
-          mm = quadsConn(j,i)
+            ! No global bounding box. The number of local bounding boxes
+            ! to be stored is the total number of local volume elements.
 
-          xMin(1) = min(xMin(1),coor(1,mm))
-          xMin(2) = min(xMin(2),coor(2,mm))
-          xMin(3) = min(xMin(3),coor(3,mm))
+            ii = nTetra + nPyra + nPrisms + nHexa
+            ADT%nBBoxes = ii
 
-          xMax(1) = max(xMax(1),coor(1,mm))
-          xMax(2) = max(xMax(2),coor(2,mm))
-          xMax(3) = max(xMax(3),coor(3,mm))
-       enddo
+            ! Allocate the memory for the bounding box coordinates, the
+            ! corresponding element type and the index in the connectivity.
 
-       ADT%xBBox(1,i) = xMin(1)
-       ADT%xBBox(2,i) = xMin(2)
-       ADT%xBBox(3,i) = xMin(3)
+            allocate (ADT%xBBox(6, ii), ADT%elementType(ii), &
+                      ADT%elementID(ii), stat=ierr)
+            if (ierr /= 0) &
+                 call adtTerminate(ADT, "buildVolumeADT", &
+                 "Memory allocation failure for bounding &
+                 &box data.")
 
-       ADT%xBBox(4,i) = xMax(1)
-       ADT%xBBox(5,i) = xMax(2)
-       ADT%xBBox(6,i) = xMax(3)
+            ! Loop over the number of element types present, i.e. 4,
+            ! to store the bounding boxes; nn is the counter.
 
-       ! elementID is just sequential since we only have 1 element type
-       ADT%elementID(i) = i
+            nn = 0
+            elementLoop3: do ll = 1, 4
 
-    enddo
+                ! Set the correct pointers for this element.
 
-    ! Build the ADT from the now known boundary boxes.
+                call setVolumePointers(ll)
 
-    call buildADT(ADT)
+                ! Loop over the number of elements and store the bounding
+                ! box info.
+
+                do i = 1, nElem
+                    nn = nn + 1
+
+                    ADT%elementType(nn) = elType
+                    ADT%elementID(nn) = i
+
+                    mm = conn(1, i)
+                    xMin(1) = coor(1, mm); xMax(1) = coor(1, mm)
+                    xMin(2) = coor(2, mm); xMax(2) = coor(2, mm)
+                    xMin(3) = coor(3, mm); xMax(3) = coor(3, mm)
+
+                    do j = 2, nNPE
+                        mm = conn(j, i)
+
+                        xMin(1) = min(xMin(1), coor(1, mm))
+                        xMin(2) = min(xMin(2), coor(2, mm))
+                        xMin(3) = min(xMin(3), coor(3, mm))
+
+                        xMax(1) = max(xMax(1), coor(1, mm))
+                        xMax(2) = max(xMax(2), coor(2, mm))
+                        xMax(3) = max(xMax(3), coor(3, mm))
+                    end do
+
+                    ADT%xBBox(1, nn) = xMin(1)
+                    ADT%xBBox(2, nn) = xMin(2)
+                    ADT%xBBox(3, nn) = xMin(3)
+
+                    ADT%xBBox(4, nn) = xMax(1)
+                    ADT%xBBox(5, nn) = xMax(2)
+                    ADT%xBBox(6, nn) = xMax(3)
+                end do
+
+            end do elementLoop3
+
+        end if testBBox
+
+        ! Build the ADT from the now known boundary boxes.
+
+        call buildADT(ADT)
+
+        !===============================================================
+
+    contains
+
+        !=============================================================
+
+        subroutine setVolumePointers(ll)
+            !
+            !          This internal subroutine sets the pointers to the correct
+            !          volume element, such that a loop over the element types
+            !          can be used.
+            !          Subroutine intent(in) arguments.
+            !          --------------------------------
+            !          ll: Element type for which the pointers must be used.
+            !
+            implicit none
+            !
+            !         Subroutine arguments.
+            !
+            integer, intent(in) :: ll
+
+            select case (ll)
+            case (1)
+                elType = adtTetrahedron; nElem = nTetra; nNPE = 4
+                conn => tetraConn
+            case (2)
+                elType = adtPyramid; nElem = nPyra; nNPE = 5
+                conn => pyraConn
+            case (3)
+                elType = adtPrism; nElem = nPrisms; nNPE = 6
+                conn => prismsConn
+            case (4)
+                elType = adtHexahedron; nElem = nHexa; nNPE = 8
+                conn => hexaConn
+            end select
+
+        end subroutine setVolumePointers
+
+    end subroutine buildVolumeADT
+
+    subroutine buildSerialHex(nHexa, nNodes, coor, hexaConn, ADT)
+        !
+        !        This a specialized routine that builds and ADT tree for
+        !        hex volumes only and only in serial. Also, this routine does
+        !        use adtDats's ADTs() array list...the user must supply the
+        !        adtType to use and is responsible for all data management of
+        !        this type.
+        !        The memory intensive part of these
+        !        arguments, the arrays with the coordinates and
+        !        connectivities, are not copied. Instead pointers are set to
+        !        these arrays. It is therefore the responsibility of the user
+        !        not to deallocate this memory before all the searches have
+        !        been performed.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !       * nHexa :    Number of hexa cells
+        !        nNodes:    Number of nodes in the given grid.
+        !        Subroutine intent(in), target arguments.
+        !        ----------------------------------------
+        !        coor(3,nNodes):        Nodal coordinates of the local grid.
+        !        hexaConn(8,nHexa):     Idem for the hexahedra.
+        !        Subroutine intent(out), arguments.
+        !        ----------------------------------------
+        !        ADT : The newly completed ADT
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nHexa
+        integer(kind=intType), intent(in) :: nNodes
+
+        integer(kind=intType), dimension(:, :), intent(in), &
+            target :: hexaConn
+
+        real(kind=realType), dimension(:, :), intent(in), &
+            target :: coor
+        type(adtType), intent(out) :: ADT
+        !
+        !       Local variables.
+        !
+        integer :: ierr, ll, nNPE
+        integer(kind=intType) :: i, j, mm
+        real(kind=realType), dimension(3) :: xMin, xMax
+
+        ! We need to set comm...explictly mpi_comm_self
+        ADT%comm = MPI_COMM_SELF
+        ADT%nProcs = 1
+        ADT%myID = 0
+        ! Set the ADT type, which is a volume ADT.
+
+        ADT%adtType = adtVolumeADT
+
+        ! Copy the number of nodes and volume elements and set the number
+        ! of surface elements to 0; only a volume grid has been given.
+
+        ADT%nNodes = nNodes
+        ADT%nHexa = nHexa
+        ADT%nTetra = 0
+        ADT%nPyra = 0
+        ADT%nPrisms = 0
+        ADT%nTria = 0
+        ADT%nQuads = 0
+
+        ! Set the pointers for the coordinates and the
+        ! volume connectivities.
+
+        ADT%coor => coor
+        ADT%hexaConn => hexaConn
+        nullify (ADT%tetraConn, ADT%pyraConn, ADT%prismsConn)
+
+        ADT%nBBoxes = nHexa
+
+        ! Allocate the memory for the bounding box coordinates, the
+        ! corresponding element type and the index in the connectivity.
+
+        allocate (ADT%xBBox(6, nHexa))
+        allocate (ADT%elementType(nHexa))
+        allocate (ADT%elementID(nHexa))
+
+        ! All hexas
+        ADT%elementType = adtHexahedron
+
+        ! Loop over the number of elements and store the bounding
+        ! box info.
+        nNPE = 8
+
+        do i = 1, nHexa
+
+            mm = hexaConn(1, i)
+            xMin(1) = coor(1, mm); xMax(1) = coor(1, mm)
+            xMin(2) = coor(2, mm); xMax(2) = coor(2, mm)
+            xMin(3) = coor(3, mm); xMax(3) = coor(3, mm)
+
+            do j = 2, nNPE
+                mm = hexaConn(j, i)
+
+                xMin(1) = min(xMin(1), coor(1, mm))
+                xMin(2) = min(xMin(2), coor(2, mm))
+                xMin(3) = min(xMin(3), coor(3, mm))
+
+                xMax(1) = max(xMax(1), coor(1, mm))
+                xMax(2) = max(xMax(2), coor(2, mm))
+                xMax(3) = max(xMax(3), coor(3, mm))
+            end do
+
+            ADT%xBBox(1, i) = xMin(1)
+            ADT%xBBox(2, i) = xMin(2)
+            ADT%xBBox(3, i) = xMin(3)
+
+            ADT%xBBox(4, i) = xMax(1)
+            ADT%xBBox(5, i) = xMax(2)
+            ADT%xBBox(6, i) = xMax(3)
+
+            ! elementID is just sequential since we only have 1 element type
+            ADT%elementID(i) = i
+
+        end do
+
+        ! Build the ADT from the now known boundary boxes.
+
+        call buildADT(ADT)
+
+    end subroutine buildSerialHex
+
+    subroutine destroySerialHex(ADT)
+        ! Deallocate the data allocated from the ADT
+
+        implicit none
+        type(adtType), intent(inout) :: ADT
+
+        deallocate (ADT%xBBox)
+        deallocate (ADT%elementType)
+        deallocate (ADT%elementID)
+        deallocate (ADT%ADTree)
+    end subroutine destroySerialHex
+
+    subroutine buildSerialQuad(nQuad, nNodes, coor, quadsConn, ADT)
+        !
+        !        This a specialized routine that builds and ADT tree for
+        !        quad surface meshes and only in serial. Also, this routine
+        !        does not
+        !        use adtDats's ADTs() array list...the user must supply the
+        !        adtType to use and is responsible for all data management of
+        !        this type.
+        !        The memory intensive part of these
+        !        arguments, the arrays with the coordinates and
+        !        connectivities, are not copied. Instead pointers are set to
+        !        these arrays. It is therefore the responsibility of the user
+        !        not to deallocate this memory before all the searches have
+        !        been performed.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !       * nQuad :    Number of quad cells
+        !        nNodes:    Number of nodes in the given grid.
+        !        Subroutine intent(in), target arguments.
+        !        ----------------------------------------
+        !        coor(3,nNodes):        Nodal coordinates of the local grid.
+        !        quadsConn(8,nHexa):     Connectivity for quad cells
+        !        Subroutine intent(out), arguments.
+        !        ----------------------------------------
+        !        ADT : The newly completed ADT
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nQuad
+        integer(kind=intType), intent(in) :: nNodes
+
+        integer(kind=intType), dimension(:, :), intent(in), &
+            target :: quadsConn
+
+        real(kind=realType), dimension(:, :), intent(in), &
+            target :: coor
+        type(adtType), intent(out) :: ADT
+        !
+        !       Local variables.
+        !
+        integer :: ierr, ll, nNPE
+        integer(kind=intType) :: i, j, mm
+        real(kind=realType), dimension(3) :: xMin, xMax
+
+        ! We need to set comm...explictly mpi_comm_self
+        ADT%comm = MPI_COMM_SELF
+        ADT%nProcs = 1
+        ADT%myID = 0
+        ! Set the ADT type, which is a surface ADT.
+
+        ADT%adtType = adtSurfaceADT
+
+        ! Copy the number of nodes and volume elements and set the number
+        ! of surface elements to 0; only a volume grid has been given.
+
+        ADT%nNodes = nNodes
+        ADT%nHexa = 0
+        ADT%nTetra = 0
+        ADT%nPyra = 0
+        ADT%nPrisms = 0
+        ADT%nTria = 0
+        ADT%nQuads = nQuad
+
+        ! Set the pointers for the coordinates and the
+        ! volume connectivities.
+
+        ADT%coor => coor
+        ADT%quadsConn => quadsConn
+        nullify (ADT%triaConn)
+
+        ADT%nBBoxes = nQuad
+
+        ! Allocate the memory for the bounding box coordinates, the
+        ! corresponding element type and the index in the connectivity.
+
+        allocate (ADT%xBBox(6, nQuad))
+        allocate (ADT%elementType(nQuad))
+        allocate (ADT%elementID(nQuad))
+
+        ! All hexas
+        ADT%elementType = adtQuadrilateral
+
+        ! Loop over the number of elements and store the bounding
+        ! box info.
+        nNPE = 4
+
+        do i = 1, nQuad
+
+            mm = quadsConn(1, i)
+            xMin(1) = coor(1, mm); xMax(1) = coor(1, mm)
+            xMin(2) = coor(2, mm); xMax(2) = coor(2, mm)
+            xMin(3) = coor(3, mm); xMax(3) = coor(3, mm)
+
+            do j = 2, nNPE
+                mm = quadsConn(j, i)
+
+                xMin(1) = min(xMin(1), coor(1, mm))
+                xMin(2) = min(xMin(2), coor(2, mm))
+                xMin(3) = min(xMin(3), coor(3, mm))
+
+                xMax(1) = max(xMax(1), coor(1, mm))
+                xMax(2) = max(xMax(2), coor(2, mm))
+                xMax(3) = max(xMax(3), coor(3, mm))
+            end do
+
+            ADT%xBBox(1, i) = xMin(1)
+            ADT%xBBox(2, i) = xMin(2)
+            ADT%xBBox(3, i) = xMin(3)
+
+            ADT%xBBox(4, i) = xMax(1)
+            ADT%xBBox(5, i) = xMax(2)
+            ADT%xBBox(6, i) = xMax(3)
+
+            ! elementID is just sequential since we only have 1 element type
+            ADT%elementID(i) = i
+
+        end do
+
+        ! Build the ADT from the now known boundary boxes.
+
+        call buildADT(ADT)
 
-  end subroutine buildSerialQuad
+    end subroutine buildSerialQuad
 
-  subroutine destroySerialQuad(ADT)
-    ! Deallocate the data allocated from the ADT
+    subroutine destroySerialQuad(ADT)
+        ! Deallocate the data allocated from the ADT
 
-    implicit none
-    type(adtType), intent(inout) :: ADT
+        implicit none
+        type(adtType), intent(inout) :: ADT
 
-    deallocate(ADT%xBBox)
-    deallocate(ADT%elementType)
-    deallocate(ADT%elementID)
-    deallocate(ADT%ADTree)
-  end subroutine destroySerialQuad
+        deallocate (ADT%xBBox)
+        deallocate (ADT%elementType)
+        deallocate (ADT%elementID)
+        deallocate (ADT%ADTree)
+    end subroutine destroySerialQuad
 end module adtBuild
diff --git a/src/ADT/adtLocalSearch.F90 b/src/ADT/adtLocalSearch.F90
index cf79616bc..d9a633400 100644
--- a/src/ADT/adtLocalSearch.F90
+++ b/src/ADT/adtLocalSearch.F90
@@ -1,2456 +1,2449 @@
 module adtLocalSearch
-  !
-  !      Module which contains the subroutines to perform the local
-  !      searches, i.e. the tree traversals.
-  !
-  use constants
-  use adtUtils, only : ADTs, adtLeafType, adtBBoxTargetType, stack, &
-       nStack, adtTerminate, reallocbboxtargettypeplus, reallocplus, &
-       qsortbboxtargets
-  use adtData, only : adtType
-  implicit none
-
-  !=================================================================
-
-contains
-
-  !===============================================================
-
-  subroutine containmentTreeSearch(ADT,       coor,   &
-       intInfo,   uvw,    &
-       arrDonor,  nCoor,  &
-       nInterpol)
-    !
-    !        This routine performs the actual containment search in the
-    !        local tree. It is a local routine in the sense that no
-    !        communication is involved.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        ADT:       ADT type whose ADT must be searched
-    !        nCoor:     Number of coordinates for which the element must
-    !                   be determined.
-    !        coor:      The coordinates of these points.
-    !        nInterpol: Number of variables to be interpolated.
-    !        arrDonor:  Array with the donor data; needed to obtain the
-    !                   interpolated data.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        intInfo: 2D integer array, in which the following output
-    !                 be be stored:
-    !                 intInfo(1,:): processor ID of the processor where
-    !                               the element is stored. This of course
-    !                               is myID. If no element is found this
-    !                               value is set to -1.
-    !                 intInfo(2,:): The element type of the element.
-    !                 intInfo(3,:): The element ID of the element in the
-    !                               connectivity.
-    !        uvw:     2D floating point array to store the parametric
-    !                 coordinates of the point in the transformed element
-    !                 and the interpolated data:
-    !                 uvw(1, :): Parametric u-weight.
-    !                 uvw(2, :): Parametric v-weight.
-    !                 uvw(3, :): Parametric w-weight.
-    !                 uvw(4:,:): Interpolated solution, if desired. It is
-    !                            possible to call this routine with
-    !                            nInterpol == 0.
     !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    type(adtType), intent(inout) :: ADT
-    integer(kind=intType), intent(in) :: nCoor
-    integer(kind=intType), intent(in) :: nInterpol
-
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-    real(kind=realType), dimension(:,:), intent(in) :: arrDonor
-
-    integer(kind=intType), dimension(:,:), intent(out) :: intInfo
-    real(kind=realType),   dimension(:,:), intent(out) :: uvw
-    !
-    !       Local variables.
-    !
-    integer(kind=intType), dimension(:), pointer :: BB
-    integer(kind=intType), dimension(:), pointer :: frontLeaves
-    integer(kind=intType), dimension(:), pointer :: frontLeavesNew
-    integer(kind=intType) :: nAllocBB, nAllocFront
-    integer(kind=intType) :: ierr, nn
-    logical :: failed
-
-    ! Initial allocation of the arrays for the tree traversal.
-
-    nAllocBB    = 10
-    nAllocFront = 25
-
-    allocate(BB(nAllocBB), frontLeaves(nAllocFront), &
-         frontLeavesNew(nAllocFront), stat=ierr)
-    if(ierr /= 0)                                    &
-         call adtTerminate(ADT, "containmentTreeSearch", &
-         "Memory allocation failure for BB, &
-         &frontLeaves and frontLeavesNew.")
-
-    ! Loop over the number of coordinates to be treated.
-
-    coorLoop: do nn=1, nCoor
-
-       call containmentTreeSearchSinglePoint(ADT, coor(:, nn), &
-            intInfo(:, nn), uvw(:, nn), arrDonor, nInterpol, BB,  &
-            frontLeaves, frontLeavesNew, failed)
-
-    end do coorLoop
-
-    ! Release the memory allocated in this routine.
-
-    deallocate(BB, frontLeaves, frontLeavesNew, stat=ierr)
-    if(ierr /= 0)                                    &
-         call adtTerminate(ADT, "containmentTreeSearch", &
-         "Deallocation failure for BB, &
-         &frontLeaves and frontLeavesNew.")
-
-  end subroutine containmentTreeSearch
-
-  subroutine containmentTreeSearchSinglePoint(ADT, coor, &
-       intInfo, uvw, arrDonor, nInterpol, BB, &
-       frontLeaves, frontLeavesNew, failed)
-    !
-    !        This routine is replaces the original containment
-    !        tree search, however, it has been optimized for searching a
-    !        single query point at a time. Specifically, this means that
-    !        there are no allocatable arrays inside this routine; they
-    !        must be supplied exterally. Also since only a single point
-    !        is searched we can fix some of the dimensions. This routine
-    !        requires pointers for BB, frontLeaves and frontLeavesNew to
-    !        be passed to the routine. Since this routine is called a
-    !        very large number of times, these cannot be allocated
-    !        dynamically inside for speed purposes.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        ADT:       ADT type whose ADT must be searched
-    !        coor:      The coordinate of the point to be searched.
-    !        nInterpol: Number of variables to be interpolated.
-    !        arrDonor:  Array with the donor data; needed to obtain the
-    !                   interpolated data.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        intInfo: 1D integer array of length three , in which the
-    !               following output
-    !                 be be stored:
-    !       *          intInfo(1): processor ID of the processor where
-    !                               the element is stored. This of course
-    !                               is myID. If no element is found this
-    !                               value is set to -1.
-    !       *          intInfo(2): The element type of the element.
-    !       *          intInfo(3): The element ID of the element in the
-    !                               connectivity.
-    !        uvw:     1D floating point array to store the parametric
-    !                 coordinates of the point in the transformed element
-    !                 and the interpolated data:
-    !                 uvw(1): Parametric u-weight.
-    !                 uvw(2): Parametric v-weight.
-    !                 uvw(3): Parametric w-weight.
-    !                 uvw(4:): Interpolated solution(s), if desired. It is
-    !                            possible to call this routine with
-    !                            nInterpol == 0.
+    !      Module which contains the subroutines to perform the local
+    !      searches, i.e. the tree traversals.
     !
     use constants
+    use adtUtils, only: ADTs, adtLeafType, adtBBoxTargetType, stack, &
+                        nStack, adtTerminate, reallocbboxtargettypeplus, reallocplus, &
+                        qsortbboxtargets
+    use adtData, only: adtType
     implicit none
-    !
-    !       Subroutine arguments.
-    !
-    type(adtType), intent(inout) :: ADT
-    integer(kind=intType), intent(in) :: nInterpol
-
-    real(kind=realType), dimension(3), intent(in) :: coor
-    real(kind=realType), dimension(:,:), intent(in) :: arrDonor
-
-    integer(kind=intType), dimension(3), intent(out) :: intInfo
-    real(kind=realType),   dimension(:), intent(out) :: uvw
-    logical, intent(out) :: failed
-    integer(kind=intType), dimension(:), pointer :: BB
-    integer(kind=intType), dimension(:), pointer :: frontLeaves
-    integer(kind=intType), dimension(:), pointer :: frontLeavesNew
-
-    !
-    !       Local parameters used in the Newton algorithm.
-    !
-    integer(kind=intType), parameter :: iterMax   = 15
-    real(kind=realType),   parameter :: adtEps    = 1.e-25_realType
-    real(kind=realType),   parameter :: thresConv = 1.e-10_realType
-    !
-    !       Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: ii, kk, ll, mm, nn
-    integer(kind=intType) :: nBB, nFrontLeaves, nFrontLeavesNew
-    integer(kind=intType) :: nAllocBB, nAllocFront
-    integer(kind=intType) :: i, nNodeElement
-
-    integer(kind=intType), dimension(8) :: n
-
-    real(kind=realType) :: u, v, w, uv, uw, vw, wvu, du, dv, dw
-    real(kind=realType) :: oneMinusU, oneMinusV, oneMinusW
-    real(kind=realType) :: oneMinusUMinusV
-    real(kind=realType) :: a11, a12, a13, a21, a22, a23
-    real(kind=realType) :: a31, a32, a33, val
-
-    real(kind=realType), dimension(3)     :: x, f
-    real(kind=realType), dimension(8)     :: weight
-    real(kind=realType), dimension(3,2:8) :: xn
-
-    real(kind=realType), dimension(:,:), pointer :: xBBox
-
-    logical :: elementFound
-
-    type(adtLeafType), dimension(:), pointer :: ADTree
-
-    ! Set some pointers to make the code more readable.
-
-    xBBox  => ADT%xBBox
-    ADTree => ADT%ADTree
-
-    ! Determine the sizes from the arrays we have been passed
-
-    nAllocBB    = size(BB)
-    nAllocFront = size(frontLeaves)
-
-    ! Initialize the processor ID to -1 to indicate that no
-    ! corresponding volume element is found.
-
-    intInfo(1) = -1
-    failed = .True.
-    !
-    !          Part 1. Traverse the tree and determine the target
-    !                  bounding boxes, which may contain the element.
-    !
-    ! Start at the root, i.e. set the front leaf to the root leaf.
-    ! Also initialize the number of possible bounding boxes to 0.
-
-    nBB = 0
-
-    nFrontLeaves   = 1
-    frontLeaves(1) = 1
-
-    treeTraversalLoop: do
-
-       ! Initialize the number of leaves for the new front, i.e.
-       ! the front of the next round, to 0.
-
-       nFrontLeavesNew = 0
-
-       ! Loop over the leaves of the current front.
-
-       currentFrontLoop: do ii=1,nFrontLeaves
-
-          ! Store the ID of the leaf a bit easier and loop over
-          ! its two children.
-
-          ll = frontLeaves(ii)
-
-          childrenLoop: do mm=1,2
-
-             ! Determine whether this child contains a bounding box
-             ! or a leaf of the next level.
-
-             kk = ADTree(ll)%children(mm)
-
-             terminalTest: if(kk < 0) then
-
-                ! Child contains a bounding box. Check if the
-                ! coordinate is inside the bounding box.
-
-                kk = -kk
-                if(coor(1) >= xBBox(1,kk) .and. &
-                     coor(1) <= xBBox(4,kk) .and. &
-                     coor(2) >= xBBox(2,kk) .and. &
-                     coor(2) <= xBBox(5,kk) .and. &
-                     coor(3) >= xBBox(3,kk) .and. &
-                     coor(3) <= xBBox(6,kk)) then
-
-                   ! Coordinate is inside the bounding box. Store the
-                   ! bounding box in the list of possible candidates.
-
-                   if(nBB == nAllocBB) &
-                        call reallocPlus(BB, nAllocBB, 100, ADT)
-
-                   nBB     = nBB + 1
-                   BB(nBB) = kk
-                endif
-
-             else terminalTest
-
-                ! Child contains a leaf. Check if the coordinate is
-                ! inside the bounding box of the leaf.
-
-                if(coor(1) >= ADTree(kk)%xMin(1) .and. &
-                     coor(1) <= ADTree(kk)%xMax(4) .and. &
-                     coor(2) >= ADTree(kk)%xMin(2) .and. &
-                     coor(2) <= ADTree(kk)%xMax(5) .and. &
-                     coor(3) >= ADTree(kk)%xMin(3) .and. &
-                     coor(3) <= ADTree(kk)%xMax(6)) then
-
-                   ! Coordinate is inside the leaf. Store the leaf in
-                   ! the list for the new front.
-
-                   if(nFrontLeavesNew == nAllocFront) then
-                      i = nAllocFront
-                      call reallocPlus(frontLeavesNew, i, 250, ADT)
-                      call reallocPlus(frontLeaves, nAllocFront, 250, ADT)
-                   endif
-
-                   nFrontLeavesNew = nFrontLeavesNew + 1
-                   frontLeavesNew(nFrontLeavesNew) = kk
-
-                endif
-
-             endif terminalTest
-
-          enddo childrenLoop
-
-       enddo currentFrontLoop
-
-       ! End of the loop over the current front. If the new front
-       ! is empty the entire tree has been traversed and an exit is
-       ! made from the corresponding loop.
-
-       if(nFrontLeavesNew == 0) exit treeTraversalLoop
-
-       ! Copy the data of the new front leaves into the current
-       ! front for the next round.
-
-       nFrontLeaves = nFrontLeavesNew
-       do ll=1,nFrontLeaves
-          frontLeaves(ll) = frontLeavesNew(ll)
-       enddo
-
-    enddo treeTraversalLoop
-    !
-    !          Part 2: Loop over the selected bounding boxes and check if
-    !                  the corresponding elements contain the point.
-    !
-    elementFound = .false.
-
-    BBoxLoop: do mm=1,nBB
-
-       ! Determine the element type stored in this bounding box.
-
-       kk = BB(mm)
-       select case (ADT%elementType(kk))
-
-       case (adtTetrahedron)
-
-          ! Element is a tetrahedron.
-          ! Compute the coordinates relative to node 1.
-
-          ll   = ADT%elementID(kk)
-          n(1) = ADT%tetraConn(1,ll)
-
-          do i=2,4
-             n(i) = ADT%tetraConn(i,ll)
-
-             xn(1,i) = ADT%coor(1,n(i)) - ADT%coor(1,n(1))
-             xn(2,i) = ADT%coor(2,n(i)) - ADT%coor(2,n(1))
-             xn(3,i) = ADT%coor(3,n(i)) - ADT%coor(3,n(1))
-          enddo
-
-          x(1) = coor(1) - ADT%coor(1,n(1))
-          x(2) = coor(2) - ADT%coor(2,n(1))
-          x(3) = coor(3) - ADT%coor(3,n(1))
-
-          ! Determine the matrix for the linear transformation
-          ! from the standard element to the current element.
-
-          a11 = xn(1,2); a12 = xn(1,3); a13 = xn(1,4)
-          a21 = xn(2,2); a22 = xn(2,3); a23 = xn(2,4)
-          a31 = xn(3,2); a32 = xn(3,3); a33 = xn(3,4)
-
-          ! Compute the determinant. Make sure that it is not zero
-          ! and invert the value.
-
-          val = a11*(a22*a33 - a32*a23) + a21*(a13*a32 - a12*a33) &
-               + a31*(a12*a23 - a13*a22)
-          val = sign(one,val)/max(abs(val),adtEps)
-
-          ! Compute the u, v, w weights for the given coordinate.
-
-          u = val*((a22*a33 - a23*a32)*x(1) &
-               +      (a13*a32 - a12*a33)*x(2) &
-               +      (a12*a23 - a13*a22)*x(3))
-          v = val*((a23*a31 - a21*a33)*x(1) &
-               +      (a11*a33 - a13*a31)*x(2) &
-               +      (a13*a21 - a11*a23)*x(3))
-          w = val*((a21*a32 - a22*a31)*x(1) &
-               +      (a12*a31 - a11*a32)*x(2) &
-               +      (a11*a22 - a12*a21)*x(3))
-
-          ! Check if the coordinate is inside the tetrahedron.
-          ! If so, set elementFound to .true. and determine the
-          ! interpolation weights.
-
-          if(u >= zero .and. v >= zero .and. &
-               w >= zero .and. (u+v+w) <= one) then
-             elementFound = .true.
-             failed = .False. !No iteration for tetrahedrons
-
-             ! Set the number of interpolation nodes to 4 and
-             ! determine the interpolation weights.
-
-             nNodeElement = 4
-
-             weight(1) = one - u - v - w
-             weight(2) = u
-             weight(3) = v
-             weight(4) = w
-          endif
-
-          !=========================================================
-
-       case (adtPyramid)
-
-          ! Element is a pyramid.
-          ! Compute the coordinates relative to node 1.
-
-          ll   = ADT%elementID(kk)
-          n(1) = ADT%pyraConn(1,ll)
-
-          do i=2,5
-             n(i) = ADT%pyraConn(i,ll)
-
-             xn(1,i) = ADT%coor(1,n(i)) - ADT%coor(1,n(1))
-             xn(2,i) = ADT%coor(2,n(i)) - ADT%coor(2,n(1))
-             xn(3,i) = ADT%coor(3,n(i)) - ADT%coor(3,n(1))
-          enddo
-
-          x(1) = coor(1) - ADT%coor(1,n(1))
-          x(2) = coor(2) - ADT%coor(2,n(1))
-          x(3) = coor(3) - ADT%coor(3,n(1))
-
-          ! Modify the coordinates of node 3, such that it
-          ! corresponds to the weights of the u*v term in the
-          ! transformation.
-
-          xn(1,3) = xn(1,3) - xn(1,2) - xn(1,4)
-          xn(2,3) = xn(2,3) - xn(2,2) - xn(2,4)
-          xn(3,3) = xn(3,3) - xn(3,2) - xn(3,4)
-
-          ! Set the starting values of u, v and w such that it is
-          ! somewhere in the middle of the element. In this way the
-          ! Jacobian matrix is always regular, even if the element
-          ! is degenerate.
-
-          u = half; v = half; w = half
-
-          ! The Newton algorithm to determine the parametric
-          ! weights u, v and w for the given coordinate.
-
-          NewtonPyra: do ll=1,iterMax
-
-             ! Compute the RHS.
-
-             uv = u*v
-             oneMinusW = one - w
-
-             f(1) = oneMinusW*(xn(1,2)*u + xn(1,4)*v + xn(1,3)*uv) &
-                  + xn(1,5)*w - x(1)
-             f(2) = oneMinusW*(xn(2,2)*u + xn(2,4)*v + xn(2,3)*uv) &
-                  + xn(2,5)*w - x(2)
-             f(3) = oneMinusW*(xn(3,2)*u + xn(3,4)*v + xn(3,3)*uv) &
-                  + xn(3,5)*w - x(3)
-
-             ! Compute the Jacobian.
-
-             a11 = oneMinusW*(xn(1,2) + xn(1,3)*v)
-             a12 = oneMinusW*(xn(1,4) + xn(1,3)*u)
-             a13 = xn(1,5) - xn(1,2)*u - xn(1,4)*v - xn(1,3)*uv
-
-             a21 = oneMinusW*(xn(2,2) + xn(2,3)*v)
-             a22 = oneMinusW*(xn(2,4) + xn(2,3)*u)
-             a23 = xn(2,5) - xn(2,2)*u - xn(2,4)*v - xn(2,3)*uv
-
-             a31 = oneMinusW*(xn(3,2) + xn(3,3)*v)
-             a32 = oneMinusW*(xn(3,4) + xn(3,3)*u)
-             a33 = xn(3,5) - xn(3,2)*u - xn(3,4)*v - xn(3,3)*uv
-
-             ! Compute the determinant. Make sure that it is not zero
-             ! and invert the value. The cut off is needed to be able
-             ! to handle exceptional cases for degenerate elements.
-
-             val = a11*(a22*a33 - a32*a23) + a21*(a13*a32 - a12*a33) &
-                  + a31*(a12*a23 - a13*a22)
-             val = sign(one,val)/max(abs(val),adtEps)
-
-             ! Compute the new values of u, v and w.
-
-             du = val*((a22*a33 - a23*a32)*f(1) &
-                  +      (a13*a32 - a12*a33)*f(2) &
-                  +      (a12*a23 - a13*a22)*f(3))
-             dv = val*((a23*a31 - a21*a33)*f(1) &
-                  +      (a11*a33 - a13*a31)*f(2) &
-                  +      (a13*a21 - a11*a23)*f(3))
-             dw = val*((a21*a32 - a22*a31)*f(1) &
-                  +      (a12*a31 - a11*a32)*f(2) &
-                  +      (a11*a22 - a12*a21)*f(3))
-
-             u = u - du; v = v - dv; w = w - dw
-
-             ! Exit the loop if the update of the parametric
-             ! weights is below the threshold
-
-             val = sqrt(du*du + dv*dv + dw*dw)
-             if(val <= thresConv) then
-                failed = .False.
-                exit NewtonPyra
-             end if
-          enddo NewtonPyra
-
-          ! Check if the coordinate is inside the pyramid.
-          ! If so, set elementFound to .true. and determine the
-          ! interpolation weights.
-
-          if(u     >= zero .and. v     >= zero .and. &
-               w     >= zero .and. (u+w) <= one  .and. &
-               (v+w) <= one .and. .not. failed) then
-             elementFound = .true.
-
-             ! Set the number of interpolation nodes to 5 and
-             ! determine the interpolation weights.
-
-             nNodeElement = 5
-
-             oneMinusU = one - u
-             oneMinusV = one - v
-             oneMinusW = one - w
-
-             weight(1) = oneMinusU*oneMinusV*oneMinusW
-             weight(2) =         u*oneMinusV*oneMinusW
-             weight(3) =         u*        v*oneMinusW
-             weight(4) = oneMinusU*        v*oneMinusW
-             weight(5) =                             w
-          endif
-
-          !=========================================================
-
-       case (adtPrism)
-
-          ! Element is a prism.
-          ! Compute the coordinates relative to node 1.
-
-          ll   = ADT%elementID(kk)
-          n(1) = ADT%prismsConn(1,ll)
-
-          do i=2,6
-             n(i) = ADT%prismsConn(i,ll)
-
-             xn(1,i) = ADT%coor(1,n(i)) - ADT%coor(1,n(1))
-             xn(2,i) = ADT%coor(2,n(i)) - ADT%coor(2,n(1))
-             xn(3,i) = ADT%coor(3,n(i)) - ADT%coor(3,n(1))
-          enddo
-
-          x(1) = coor(1) - ADT%coor(1,n(1))
-          x(2) = coor(2) - ADT%coor(2,n(1))
-          x(3) = coor(3) - ADT%coor(3,n(1))
-
-          ! Modify the coordinates of node 5 and 6, such that they
-          ! correspond to the weights of the u*w and v*w term in the
-          ! transformation respectively.
-
-          xn(1,5) = xn(1,5) - xn(1,2) - xn(1,4)
-          xn(2,5) = xn(2,5) - xn(2,2) - xn(2,4)
-          xn(3,5) = xn(3,5) - xn(3,2) - xn(3,4)
 
-          xn(1,6) = xn(1,6) - xn(1,3) - xn(1,4)
-          xn(2,6) = xn(2,6) - xn(2,3) - xn(2,4)
-          xn(3,6) = xn(3,6) - xn(3,3) - xn(3,4)
+    !=================================================================
 
-          ! Set the starting values of u, v and w such that it is
-          ! somewhere in the middle of the element. In this way the
-          ! Jacobian matrix is always regular, even if the element
-          ! is degenerate.
+contains
 
-          u = fourth; v = fourth; w = half
+    !===============================================================
+
+    subroutine containmentTreeSearch(ADT, coor, &
+                                     intInfo, uvw, &
+                                     arrDonor, nCoor, &
+                                     nInterpol)
+        !
+        !        This routine performs the actual containment search in the
+        !        local tree. It is a local routine in the sense that no
+        !        communication is involved.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        ADT:       ADT type whose ADT must be searched
+        !        nCoor:     Number of coordinates for which the element must
+        !                   be determined.
+        !        coor:      The coordinates of these points.
+        !        nInterpol: Number of variables to be interpolated.
+        !        arrDonor:  Array with the donor data; needed to obtain the
+        !                   interpolated data.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        intInfo: 2D integer array, in which the following output
+        !                 be be stored:
+        !                 intInfo(1,:): processor ID of the processor where
+        !                               the element is stored. This of course
+        !                               is myID. If no element is found this
+        !                               value is set to -1.
+        !                 intInfo(2,:): The element type of the element.
+        !                 intInfo(3,:): The element ID of the element in the
+        !                               connectivity.
+        !        uvw:     2D floating point array to store the parametric
+        !                 coordinates of the point in the transformed element
+        !                 and the interpolated data:
+        !                 uvw(1, :): Parametric u-weight.
+        !                 uvw(2, :): Parametric v-weight.
+        !                 uvw(3, :): Parametric w-weight.
+        !                 uvw(4:,:): Interpolated solution, if desired. It is
+        !                            possible to call this routine with
+        !                            nInterpol == 0.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        type(adtType), intent(inout) :: ADT
+        integer(kind=intType), intent(in) :: nCoor
+        integer(kind=intType), intent(in) :: nInterpol
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+        real(kind=realType), dimension(:, :), intent(in) :: arrDonor
+
+        integer(kind=intType), dimension(:, :), intent(out) :: intInfo
+        real(kind=realType), dimension(:, :), intent(out) :: uvw
+        !
+        !       Local variables.
+        !
+        integer(kind=intType), dimension(:), pointer :: BB
+        integer(kind=intType), dimension(:), pointer :: frontLeaves
+        integer(kind=intType), dimension(:), pointer :: frontLeavesNew
+        integer(kind=intType) :: nAllocBB, nAllocFront
+        integer(kind=intType) :: ierr, nn
+        logical :: failed
+
+        ! Initial allocation of the arrays for the tree traversal.
+
+        nAllocBB = 10
+        nAllocFront = 25
+
+        allocate (BB(nAllocBB), frontLeaves(nAllocFront), &
+                  frontLeavesNew(nAllocFront), stat=ierr)
+        if (ierr /= 0) &
+             call adtTerminate(ADT, "containmentTreeSearch", &
+             "Memory allocation failure for BB, &
+             &frontLeaves and frontLeavesNew.")
+
+        ! Loop over the number of coordinates to be treated.
+
+        coorLoop: do nn = 1, nCoor
+
+            call containmentTreeSearchSinglePoint(ADT, coor(:, nn), &
+                                                  intInfo(:, nn), uvw(:, nn), arrDonor, nInterpol, BB, &
+                                                  frontLeaves, frontLeavesNew, failed)
+
+        end do coorLoop
+
+        ! Release the memory allocated in this routine.
+
+        deallocate (BB, frontLeaves, frontLeavesNew, stat=ierr)
+        if (ierr /= 0) &
+             call adtTerminate(ADT, "containmentTreeSearch", &
+             "Deallocation failure for BB, &
+             &frontLeaves and frontLeavesNew.")
+
+    end subroutine containmentTreeSearch
+
+    subroutine containmentTreeSearchSinglePoint(ADT, coor, &
+                                                intInfo, uvw, arrDonor, nInterpol, BB, &
+                                                frontLeaves, frontLeavesNew, failed)
+        !
+        !        This routine is replaces the original containment
+        !        tree search, however, it has been optimized for searching a
+        !        single query point at a time. Specifically, this means that
+        !        there are no allocatable arrays inside this routine; they
+        !        must be supplied exterally. Also since only a single point
+        !        is searched we can fix some of the dimensions. This routine
+        !        requires pointers for BB, frontLeaves and frontLeavesNew to
+        !        be passed to the routine. Since this routine is called a
+        !        very large number of times, these cannot be allocated
+        !        dynamically inside for speed purposes.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        ADT:       ADT type whose ADT must be searched
+        !        coor:      The coordinate of the point to be searched.
+        !        nInterpol: Number of variables to be interpolated.
+        !        arrDonor:  Array with the donor data; needed to obtain the
+        !                   interpolated data.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        intInfo: 1D integer array of length three , in which the
+        !               following output
+        !                 be be stored:
+        !       *          intInfo(1): processor ID of the processor where
+        !                               the element is stored. This of course
+        !                               is myID. If no element is found this
+        !                               value is set to -1.
+        !       *          intInfo(2): The element type of the element.
+        !       *          intInfo(3): The element ID of the element in the
+        !                               connectivity.
+        !        uvw:     1D floating point array to store the parametric
+        !                 coordinates of the point in the transformed element
+        !                 and the interpolated data:
+        !                 uvw(1): Parametric u-weight.
+        !                 uvw(2): Parametric v-weight.
+        !                 uvw(3): Parametric w-weight.
+        !                 uvw(4:): Interpolated solution(s), if desired. It is
+        !                            possible to call this routine with
+        !                            nInterpol == 0.
+        !
+        use constants
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        type(adtType), intent(inout) :: ADT
+        integer(kind=intType), intent(in) :: nInterpol
+
+        real(kind=realType), dimension(3), intent(in) :: coor
+        real(kind=realType), dimension(:, :), intent(in) :: arrDonor
+
+        integer(kind=intType), dimension(3), intent(out) :: intInfo
+        real(kind=realType), dimension(:), intent(out) :: uvw
+        logical, intent(out) :: failed
+        integer(kind=intType), dimension(:), pointer :: BB
+        integer(kind=intType), dimension(:), pointer :: frontLeaves
+        integer(kind=intType), dimension(:), pointer :: frontLeavesNew
+
+        !
+        !       Local parameters used in the Newton algorithm.
+        !
+        integer(kind=intType), parameter :: iterMax = 15
+        real(kind=realType), parameter :: adtEps = 1.e-25_realType
+        real(kind=realType), parameter :: thresConv = 1.e-10_realType
+        !
+        !       Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: ii, kk, ll, mm, nn
+        integer(kind=intType) :: nBB, nFrontLeaves, nFrontLeavesNew
+        integer(kind=intType) :: nAllocBB, nAllocFront
+        integer(kind=intType) :: i, nNodeElement
+
+        integer(kind=intType), dimension(8) :: n
+
+        real(kind=realType) :: u, v, w, uv, uw, vw, wvu, du, dv, dw
+        real(kind=realType) :: oneMinusU, oneMinusV, oneMinusW
+        real(kind=realType) :: oneMinusUMinusV
+        real(kind=realType) :: a11, a12, a13, a21, a22, a23
+        real(kind=realType) :: a31, a32, a33, val
+
+        real(kind=realType), dimension(3) :: x, f
+        real(kind=realType), dimension(8) :: weight
+        real(kind=realType), dimension(3, 2:8) :: xn
+
+        real(kind=realType), dimension(:, :), pointer :: xBBox
+
+        logical :: elementFound
+
+        type(adtLeafType), dimension(:), pointer :: ADTree
+
+        ! Set some pointers to make the code more readable.
+
+        xBBox => ADT%xBBox
+        ADTree => ADT%ADTree
+
+        ! Determine the sizes from the arrays we have been passed
+
+        nAllocBB = size(BB)
+        nAllocFront = size(frontLeaves)
+
+        ! Initialize the processor ID to -1 to indicate that no
+        ! corresponding volume element is found.
+
+        intInfo(1) = -1
+        failed = .True.
+        !
+        !          Part 1. Traverse the tree and determine the target
+        !                  bounding boxes, which may contain the element.
+        !
+        ! Start at the root, i.e. set the front leaf to the root leaf.
+        ! Also initialize the number of possible bounding boxes to 0.
+
+        nBB = 0
+
+        nFrontLeaves = 1
+        frontLeaves(1) = 1
 
-          ! The Newton algorithm to determine the parametric
-          ! weights u, v and w for the given coordinate.
+        treeTraversalLoop: do
 
-          NewtonPrisms: do ll=1,iterMax
+            ! Initialize the number of leaves for the new front, i.e.
+            ! the front of the next round, to 0.
 
-             ! Compute the RHS.
+            nFrontLeavesNew = 0
 
-             uw = u*w; vw = v*w
+            ! Loop over the leaves of the current front.
 
-             f(1) = xn(1,2)*u  + xn(1,3)*v  + xn(1,4)*w &
-                  + xn(1,5)*uw + xn(1,6)*vw - x(1)
-             f(2) = xn(2,2)*u  + xn(2,3)*v  + xn(2,4)*w &
-                  + xn(2,5)*uw + xn(2,6)*vw - x(2)
-             f(3) = xn(3,2)*u  + xn(3,3)*v  + xn(3,4)*w &
-                  + xn(3,5)*uw + xn(3,6)*vw - x(3)
+            currentFrontLoop: do ii = 1, nFrontLeaves
 
-             ! Compute the Jacobian.
+                ! Store the ID of the leaf a bit easier and loop over
+                ! its two children.
 
-             a11 = xn(1,2) + xn(1,5)*w
-             a12 = xn(1,3) + xn(1,6)*w
-             a13 = xn(1,4) + xn(1,5)*u + xn(1,6)*v
+                ll = frontLeaves(ii)
 
-             a21 = xn(2,2) + xn(2,5)*w
-             a22 = xn(2,3) + xn(2,6)*w
-             a23 = xn(2,4) + xn(2,5)*u + xn(2,6)*v
+                childrenLoop: do mm = 1, 2
 
-             a31 = xn(3,2) + xn(3,5)*w
-             a32 = xn(3,3) + xn(3,6)*w
-             a33 = xn(3,4) + xn(3,5)*u + xn(3,6)*v
+                    ! Determine whether this child contains a bounding box
+                    ! or a leaf of the next level.
 
-             ! Compute the determinant. Make sure that it is not zero
-             ! and invert the value. The cut off is needed to be able
-             ! to handle exceptional cases for degenerate elements.
+                    kk = ADTree(ll)%children(mm)
 
-             val = a11*(a22*a33 - a32*a23) + a21*(a13*a32 - a12*a33) &
-                  + a31*(a12*a23 - a13*a22)
-             val = sign(one,val)/max(abs(val),adtEps)
+                    terminalTest: if (kk < 0) then
 
-             ! Compute the new values of u, v and w.
+                        ! Child contains a bounding box. Check if the
+                        ! coordinate is inside the bounding box.
 
-             du = val*((a22*a33 - a23*a32)*f(1) &
-                  +      (a13*a32 - a12*a33)*f(2) &
-                  +      (a12*a23 - a13*a22)*f(3))
-             dv = val*((a23*a31 - a21*a33)*f(1) &
-                  +      (a11*a33 - a13*a31)*f(2) &
-                  +      (a13*a21 - a11*a23)*f(3))
-             dw = val*((a21*a32 - a22*a31)*f(1) &
-                  +      (a12*a31 - a11*a32)*f(2) &
-                  +      (a11*a22 - a12*a21)*f(3))
+                        kk = -kk
+                        if (coor(1) >= xBBox(1, kk) .and. &
+                            coor(1) <= xBBox(4, kk) .and. &
+                            coor(2) >= xBBox(2, kk) .and. &
+                            coor(2) <= xBBox(5, kk) .and. &
+                            coor(3) >= xBBox(3, kk) .and. &
+                            coor(3) <= xBBox(6, kk)) then
 
-             u = u - du; v = v - dv; w = w - dw
+                            ! Coordinate is inside the bounding box. Store the
+                            ! bounding box in the list of possible candidates.
 
-             ! Exit the loop if the update of the parametric
-             ! weights is below the threshold
+                            if (nBB == nAllocBB) &
+                                call reallocPlus(BB, nAllocBB, 100, ADT)
 
-             val = sqrt(du*du + dv*dv + dw*dw)
-             if(val <= thresConv) then
-                failed = .False.
-                exit NewtonPrisms
-             end if
+                            nBB = nBB + 1
+                            BB(nBB) = kk
+                        end if
 
-          enddo NewtonPrisms
+                        else terminalTest
 
-          ! Check if the coordinate is inside the prism.
-          ! If so, set elementFound to .true. and determine the
-          ! interpolation weights.
+                        ! Child contains a leaf. Check if the coordinate is
+                        ! inside the bounding box of the leaf.
 
-          if(u     >= zero .and. v >= zero .and. &
-               w     >= zero .and. w <= one  .and. &
-               (u+v) <= one .and. .not. failed) then
-             elementFound = .true.
+                        if (coor(1) >= ADTree(kk)%xMin(1) .and. &
+                            coor(1) <= ADTree(kk)%xMax(4) .and. &
+                            coor(2) >= ADTree(kk)%xMin(2) .and. &
+                            coor(2) <= ADTree(kk)%xMax(5) .and. &
+                            coor(3) >= ADTree(kk)%xMin(3) .and. &
+                            coor(3) <= ADTree(kk)%xMax(6)) then
 
-             ! Set the number of interpolation nodes to 6 and
-             ! determine the interpolation weights.
+                            ! Coordinate is inside the leaf. Store the leaf in
+                            ! the list for the new front.
 
-             nNodeElement = 6
+                            if (nFrontLeavesNew == nAllocFront) then
+                                i = nAllocFront
+                                call reallocPlus(frontLeavesNew, i, 250, ADT)
+                                call reallocPlus(frontLeaves, nAllocFront, 250, ADT)
+                            end if
 
-             oneMinusUminusV = one - u - v
-             oneMinusW       = one - w
+                            nFrontLeavesNew = nFrontLeavesNew + 1
+                            frontLeavesNew(nFrontLeavesNew) = kk
 
-             weight(1) = oneMinusUminusV*oneMinusW
-             weight(2) =               u*oneMinusW
-             weight(3) =               v*oneMinusW
-             weight(4) = oneMinusUminusV*        w
-             weight(5) =               u*        w
-             weight(6) =               v*        w
-          endif
+                        end if
 
-          !=========================================================
+                    end if terminalTest
 
-       case (adtHexahedron)
+                end do childrenLoop
 
-          ! Element is a hexahedron.
-          ! Compute the coordinates relative to node 1.
+            end do currentFrontLoop
 
-          ll   = ADT%elementID(kk)
-          n(1) = ADT%hexaConn(1,ll)
+            ! End of the loop over the current front. If the new front
+            ! is empty the entire tree has been traversed and an exit is
+            ! made from the corresponding loop.
 
-          do i=2,8
-             n(i) = ADT%hexaConn(i,ll)
+            if (nFrontLeavesNew == 0) exit treeTraversalLoop
 
-             xn(1,i) = ADT%coor(1,n(i)) - ADT%coor(1,n(1))
-             xn(2,i) = ADT%coor(2,n(i)) - ADT%coor(2,n(1))
-             xn(3,i) = ADT%coor(3,n(i)) - ADT%coor(3,n(1))
-          enddo
+            ! Copy the data of the new front leaves into the current
+            ! front for the next round.
 
-          x(1) = coor(1) - ADT%coor(1,n(1))
-          x(2) = coor(2) - ADT%coor(2,n(1))
-          x(3) = coor(3) - ADT%coor(3,n(1))
+            nFrontLeaves = nFrontLeavesNew
+            do ll = 1, nFrontLeaves
+                frontLeaves(ll) = frontLeavesNew(ll)
+            end do
 
-          ! Modify the coordinates of node 3, 6, 8 and 7 such that
-          ! they correspond to the weights of the u*v, u*w, v*w and
-          ! u*v*w term in the transformation respectively.
+        end do treeTraversalLoop
+        !
+        !          Part 2: Loop over the selected bounding boxes and check if
+        !                  the corresponding elements contain the point.
+        !
+        elementFound = .false.
 
-          xn(1,7) = xn(1,7) + xn(1,2) + xn(1,4) + xn(1,5) &
-               -           xn(1,3) - xn(1,6) - xn(1,8)
-          xn(2,7) = xn(2,7) + xn(2,2) + xn(2,4) + xn(2,5) &
-               -           xn(2,3) - xn(2,6) - xn(2,8)
-          xn(3,7) = xn(3,7) + xn(3,2) + xn(3,4) + xn(3,5) &
-               -           xn(3,3) - xn(3,6) - xn(3,8)
+        BBoxLoop: do mm = 1, nBB
 
-          xn(1,3) = xn(1,3) - xn(1,2) - xn(1,4)
-          xn(2,3) = xn(2,3) - xn(2,2) - xn(2,4)
-          xn(3,3) = xn(3,3) - xn(3,2) - xn(3,4)
+            ! Determine the element type stored in this bounding box.
 
-          xn(1,6) = xn(1,6) - xn(1,2) - xn(1,5)
-          xn(2,6) = xn(2,6) - xn(2,2) - xn(2,5)
-          xn(3,6) = xn(3,6) - xn(3,2) - xn(3,5)
+            kk = BB(mm)
+            select case (ADT%elementType(kk))
 
-          xn(1,8) = xn(1,8) - xn(1,4) - xn(1,5)
-          xn(2,8) = xn(2,8) - xn(2,4) - xn(2,5)
-          xn(3,8) = xn(3,8) - xn(3,4) - xn(3,5)
+            case (adtTetrahedron)
 
-          ! Set the starting values of u, v and w such that it is
-          ! somewhere in the middle of the element. In this way the
-          ! Jacobian matrix is always regular, even if the element
-          ! is degenerate.
+                ! Element is a tetrahedron.
+                ! Compute the coordinates relative to node 1.
 
-          u = half; v = half; w = half
+                ll = ADT%elementID(kk)
+                n(1) = ADT%tetraConn(1, ll)
 
-          ! The Newton algorithm to determine the parametric
-          ! weights u, v and w for the given coordinate.
+                do i = 2, 4
+                    n(i) = ADT%tetraConn(i, ll)
 
-          NewtonHexa: do ll=1,iterMax
+                    xn(1, i) = ADT%coor(1, n(i)) - ADT%coor(1, n(1))
+                    xn(2, i) = ADT%coor(2, n(i)) - ADT%coor(2, n(1))
+                    xn(3, i) = ADT%coor(3, n(i)) - ADT%coor(3, n(1))
+                end do
 
-             ! Compute the RHS.
+                x(1) = coor(1) - ADT%coor(1, n(1))
+                x(2) = coor(2) - ADT%coor(2, n(1))
+                x(3) = coor(3) - ADT%coor(3, n(1))
 
-             uv = u*v; uw = u*w; vw = v*w; wvu = u*v*w
+                ! Determine the matrix for the linear transformation
+                ! from the standard element to the current element.
 
-             f(1) = xn(1,2)*u   + xn(1,4)*v  + xn(1,5)*w  &
-                  + xn(1,3)*uv  + xn(1,6)*uw + xn(1,8)*vw &
-                  + xn(1,7)*wvu - x(1)
-             f(2) = xn(2,2)*u   + xn(2,4)*v  + xn(2,5)*w  &
-                  + xn(2,3)*uv  + xn(2,6)*uw + xn(2,8)*vw &
-                  + xn(2,7)*wvu - x(2)
-             f(3) = xn(3,2)*u   + xn(3,4)*v  + xn(3,5)*w  &
-                  + xn(3,3)*uv  + xn(3,6)*uw + xn(3,8)*vw &
-                  + xn(3,7)*wvu - x(3)
+                a11 = xn(1, 2); a12 = xn(1, 3); a13 = xn(1, 4)
+                a21 = xn(2, 2); a22 = xn(2, 3); a23 = xn(2, 4)
+                a31 = xn(3, 2); a32 = xn(3, 3); a33 = xn(3, 4)
 
-             ! Compute the Jacobian.
+                ! Compute the determinant. Make sure that it is not zero
+                ! and invert the value.
 
-             a11 = xn(1,2) + xn(1,3)*v + xn(1,6)*w + xn(1,7)*vw
-             a12 = xn(1,4) + xn(1,3)*u + xn(1,8)*w + xn(1,7)*uw
-             a13 = xn(1,5) + xn(1,6)*u + xn(1,8)*v + xn(1,7)*uv
+                val = a11 * (a22 * a33 - a32 * a23) + a21 * (a13 * a32 - a12 * a33) &
+                      + a31 * (a12 * a23 - a13 * a22)
+                val = sign(one, val) / max(abs(val), adtEps)
 
-             a21 = xn(2,2) + xn(2,3)*v + xn(2,6)*w + xn(2,7)*vw
-             a22 = xn(2,4) + xn(2,3)*u + xn(2,8)*w + xn(2,7)*uw
-             a23 = xn(2,5) + xn(2,6)*u + xn(2,8)*v + xn(2,7)*uv
+                ! Compute the u, v, w weights for the given coordinate.
 
-             a31 = xn(3,2) + xn(3,3)*v + xn(3,6)*w + xn(3,7)*vw
-             a32 = xn(3,4) + xn(3,3)*u + xn(3,8)*w + xn(3,7)*uw
-             a33 = xn(3,5) + xn(3,6)*u + xn(3,8)*v + xn(3,7)*uv
+                u = val * ((a22 * a33 - a23 * a32) * x(1) &
+                           + (a13 * a32 - a12 * a33) * x(2) &
+                           + (a12 * a23 - a13 * a22) * x(3))
+                v = val * ((a23 * a31 - a21 * a33) * x(1) &
+                           + (a11 * a33 - a13 * a31) * x(2) &
+                           + (a13 * a21 - a11 * a23) * x(3))
+                w = val * ((a21 * a32 - a22 * a31) * x(1) &
+                           + (a12 * a31 - a11 * a32) * x(2) &
+                           + (a11 * a22 - a12 * a21) * x(3))
 
-             ! Compute the determinant. Make sure that it is not zero
-             ! and invert the value. The cut off is needed to be able
-             ! to handle exceptional cases for degenerate elements.
+                ! Check if the coordinate is inside the tetrahedron.
+                ! If so, set elementFound to .true. and determine the
+                ! interpolation weights.
 
-             val = a11*(a22*a33 - a32*a23) + a21*(a13*a32 - a12*a33) &
-                  + a31*(a12*a23 - a13*a22)
-             val = sign(one,val)/max(abs(val),adtEps)
+                if (u >= zero .and. v >= zero .and. &
+                    w >= zero .and. (u + v + w) <= one) then
+                    elementFound = .true.
+                    failed = .False. !No iteration for tetrahedrons
 
-             ! Compute the new values of u, v and w.
+                    ! Set the number of interpolation nodes to 4 and
+                    ! determine the interpolation weights.
 
-             du = val*((a22*a33 - a23*a32)*f(1) &
-                  +      (a13*a32 - a12*a33)*f(2) &
-                  +      (a12*a23 - a13*a22)*f(3))
-             dv = val*((a23*a31 - a21*a33)*f(1) &
-                  +      (a11*a33 - a13*a31)*f(2) &
-                  +      (a13*a21 - a11*a23)*f(3))
-             dw = val*((a21*a32 - a22*a31)*f(1) &
-                  +      (a12*a31 - a11*a32)*f(2) &
-                  +      (a11*a22 - a12*a21)*f(3))
+                    nNodeElement = 4
 
-             u = u - du; v = v - dv; w = w - dw
+                    weight(1) = one - u - v - w
+                    weight(2) = u
+                    weight(3) = v
+                    weight(4) = w
+                end if
 
-             ! Exit the loop if the update of the parametric
-             ! weights is below the threshold
+                !=========================================================
 
-             val = sqrt(du*du + dv*dv + dw*dw)
-             if(val <= thresConv) then
-                failed = .False.
-                exit NewtonHexa
-             end if
+            case (adtPyramid)
 
-          enddo NewtonHexa
+                ! Element is a pyramid.
+                ! Compute the coordinates relative to node 1.
 
-          ! Check if the coordinate is inside the hexahedron.
-          ! If so, set elementFound to .true. and determine the
-          ! interpolation weights.
+                ll = ADT%elementID(kk)
+                n(1) = ADT%pyraConn(1, ll)
 
-          if(u >= zero .and. u <= one .and. &
-               v >= zero .and. v <= one .and. &
-               w >= zero .and. w <= one .and. .not. failed) then
-             elementFound = .true.
+                do i = 2, 5
+                    n(i) = ADT%pyraConn(i, ll)
 
-             ! Set the number of interpolation nodes to 8 and
-             ! determine the interpolation weights.
+                    xn(1, i) = ADT%coor(1, n(i)) - ADT%coor(1, n(1))
+                    xn(2, i) = ADT%coor(2, n(i)) - ADT%coor(2, n(1))
+                    xn(3, i) = ADT%coor(3, n(i)) - ADT%coor(3, n(1))
+                end do
 
-             nNodeElement = 8
+                x(1) = coor(1) - ADT%coor(1, n(1))
+                x(2) = coor(2) - ADT%coor(2, n(1))
+                x(3) = coor(3) - ADT%coor(3, n(1))
 
-             oneMinusU = one - u
-             oneMinusV = one - v
-             oneMinusW = one - w
+                ! Modify the coordinates of node 3, such that it
+                ! corresponds to the weights of the u*v term in the
+                ! transformation.
 
-             weight(1) = oneMinusU*oneMinusV*oneMinusW
-             weight(2) =         u*oneMinusV*oneMinusW
-             weight(3) =         u*        v*oneMinusW
-             weight(4) = oneMinusU*        v*oneMinusW
-             weight(5) = oneMinusU*oneMinusV*        w
-             weight(6) =         u*oneMinusV*        w
-             weight(7) =         u*        v*        w
-             weight(8) = oneMinusU*        v*        w
-          endif
+                xn(1, 3) = xn(1, 3) - xn(1, 2) - xn(1, 4)
+                xn(2, 3) = xn(2, 3) - xn(2, 2) - xn(2, 4)
+                xn(3, 3) = xn(3, 3) - xn(3, 2) - xn(3, 4)
 
-       end select
+                ! Set the starting values of u, v and w such that it is
+                ! somewhere in the middle of the element. In this way the
+                ! Jacobian matrix is always regular, even if the element
+                ! is degenerate.
 
-       ! If the coordinate is inside the element store all the
-       ! necessary information and exit the loop over the target
-       ! bounding boxes.
+                u = half; v = half; w = half
 
-       if( elementFound ) then
+                ! The Newton algorithm to determine the parametric
+                ! weights u, v and w for the given coordinate.
 
-          ! The processor, element type and local element ID.
+                NewtonPyra: do ll = 1, iterMax
 
-          intInfo(1) = ADT%myID
-          intInfo(2) = ADT%elementType(kk)
-          intInfo(3) = ADT%elementID(kk)
+                    ! Compute the RHS.
 
-          ! The parametric weights.
+                    uv = u * v
+                    oneMinusW = one - w
 
-          uvw(1) = u
-          uvw(2) = v
-          uvw(3) = w
+                    f(1) = oneMinusW * (xn(1, 2) * u + xn(1, 4) * v + xn(1, 3) * uv) &
+                           + xn(1, 5) * w - x(1)
+                    f(2) = oneMinusW * (xn(2, 2) * u + xn(2, 4) * v + xn(2, 3) * uv) &
+                           + xn(2, 5) * w - x(2)
+                    f(3) = oneMinusW * (xn(3, 2) * u + xn(3, 4) * v + xn(3, 3) * uv) &
+                           + xn(3, 5) * w - x(3)
 
-          ! The interpolated solution.
+                    ! Compute the Jacobian.
 
-          do ll=1,nInterpol
-             ii = 3+ll
-             uvw(ii) = weight(1)*arrDonor(ll,n(1))
-             do i=2,nNodeElement
-                uvw(ii) = uvw(ii) + weight(i)*arrDonor(ll,n(i))
-             enddo
-          enddo
+                    a11 = oneMinusW * (xn(1, 2) + xn(1, 3) * v)
+                    a12 = oneMinusW * (xn(1, 4) + xn(1, 3) * u)
+                    a13 = xn(1, 5) - xn(1, 2) * u - xn(1, 4) * v - xn(1, 3) * uv
 
-          ! And exit the loop over the bounding boxes.
+                    a21 = oneMinusW * (xn(2, 2) + xn(2, 3) * v)
+                    a22 = oneMinusW * (xn(2, 4) + xn(2, 3) * u)
+                    a23 = xn(2, 5) - xn(2, 2) * u - xn(2, 4) * v - xn(2, 3) * uv
 
-          exit BBoxLoop
-       endif
+                    a31 = oneMinusW * (xn(3, 2) + xn(3, 3) * v)
+                    a32 = oneMinusW * (xn(3, 4) + xn(3, 3) * u)
+                    a33 = xn(3, 5) - xn(3, 2) * u - xn(3, 4) * v - xn(3, 3) * uv
 
-    enddo BBoxLoop
+                    ! Compute the determinant. Make sure that it is not zero
+                    ! and invert the value. The cut off is needed to be able
+                    ! to handle exceptional cases for degenerate elements.
 
-  end subroutine containmentTreeSearchSinglePoint
+                    val = a11 * (a22 * a33 - a32 * a23) + a21 * (a13 * a32 - a12 * a33) &
+                          + a31 * (a12 * a23 - a13 * a22)
+                    val = sign(one, val) / max(abs(val), adtEps)
 
+                    ! Compute the new values of u, v and w.
 
-  subroutine minDistanceTreeSearch(ADT,       coor,   &
-       intInfo,   uvw,    &
-       arrDonor,  nCoor,  &
-       nInterpol)
-    !
-    !        This routine performs the actual minimum distance search in
-    !        the local tree. It is a local routine in the sense that no
-    !        communication is involved.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        ADT:       ADT type whose ADT must be searched
-    !        nCoor:     Number of coordinates for which the element must
-    !                   be determined.
-    !        coor:      The coordinates and the currently stored minimum
-    !                   distance squared of these points:
-    !                   coor(1,;): Coordinate 1.
-    !                   coor(2,;): Coordinate 2.
-    !                   coor(3,;): Coordinate 3.
-    !                   coor(4,;): The currently stored minimum distance
-    !                   squared.
-    !        nInterpol: Number of variables to be interpolated.
-    !        arrDonor:  Array with the donor data; needed to obtain the
-    !                   interpolated data.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        intInfo: 2D integer array, in which the following output
-    !                 will be stored:
-    !                 intInfo(1,:): processor ID of the processor where
-    !                               the element is stored. This of course
-    !                               is myID. If no element is found this
-    !                               value is set to -1.
-    !                 intInfo(2,:): The element type of the element.
-    !                 intInfo(3,:): The element ID of the element in the
-    !                               the connectivity.
-    !        uvw:     2D floating point array to store the parametric
-    !                 coordinates of the point in the transformed element
-    !                 as well as the new distance squared and the
-    !                 interpolated solution:
-    !                 uvw(1, :): Parametric u-weight.
-    !                 uvw(2, :): Parametric v-weight.
-    !                 uvw(3, :): Parametric w-weight.
-    !                 uvw(4, :): The new distance squared.
-    !                 uvw(5:,:): Interpolated solution, if desired. It is
-    !                            possible to call this routine with
-    !                            nInterpol == 0.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    type(adtType), intent(inout) :: ADT
-    integer(kind=intType), intent(in) :: nCoor
-    integer(kind=intType), intent(in) :: nInterpol
+                    du = val * ((a22 * a33 - a23 * a32) * f(1) &
+                                + (a13 * a32 - a12 * a33) * f(2) &
+                                + (a12 * a23 - a13 * a22) * f(3))
+                    dv = val * ((a23 * a31 - a21 * a33) * f(1) &
+                                + (a11 * a33 - a13 * a31) * f(2) &
+                                + (a13 * a21 - a11 * a23) * f(3))
+                    dw = val * ((a21 * a32 - a22 * a31) * f(1) &
+                                + (a12 * a31 - a11 * a32) * f(2) &
+                                + (a11 * a22 - a12 * a21) * f(3))
 
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-    real(kind=realType), dimension(:,:), intent(in) :: arrDonor
+                    u = u - du; v = v - dv; w = w - dw
 
-    integer(kind=intType), dimension(:,:), intent(out) :: intInfo
-    real(kind=realType),   dimension(:,:), intent(out) :: uvw
-    !
-    !       Local parameters used in the Newton algorithm.
-    !
-    integer(kind=intType), parameter :: iterMax   = 15
-    real(kind=realType),   parameter :: adtEps    = 1.e-25_realType
-    real(kind=realType),   parameter :: thresConv = 1.e-10_realType
-    !
-    !       Local variables.
-    !
-    integer :: ierr, nn
-    integer(kind=intType) :: nAllocBB, nAllocFront, nStack
-    integer(kind=intType), dimension(:), pointer :: frontLeaves
-    integer(kind=intType), dimension(:), pointer :: frontLeavesNew
-    type(adtBBoxTargetType), dimension(:), pointer :: BB
-
-    ! Initial allocation of the arrays for the tree traversal as well
-    ! as the stack array used in the qsort routine. The latter is
-    ! done, because the qsort routine is called for every coordinate
-    ! and therefore it is more efficient to allocate the stack once
-    ! rather than over and over again. The disadvantage of course is
-    ! that an essentially local variable, stack, is now stored in
-    ! adtData.
-
-    nAllocBB    =  10
-    nAllocFront =  25
-    nStack      = 100
-
-    allocate(stack(nStack), BB(nAllocBB), frontLeaves(nAllocFront), &
-         frontLeavesNew(nAllocFront), stat=ierr)
-    if(ierr /= 0)                                    &
-         call adtTerminate(ADT, "minDistanceTreeSearch", &
-         "Memory allocation failure for stack, BB, &
-         &etc.")
-
-    ! Loop over the number of coordinates to be treated.
+                    ! Exit the loop if the update of the parametric
+                    ! weights is below the threshold
 
-    coorLoop: do nn=1,nCoor
+                    val = sqrt(du * du + dv * dv + dw * dw)
+                    if (val <= thresConv) then
+                        failed = .False.
+                        exit NewtonPyra
+                    end if
+                end do NewtonPyra
 
-       call minDistanceTreeSearchSinglePoint(ADT, coor(:, nn), &
-            intInfo(:, nn), uvw(:, nn), arrDonor, nInterpol, BB, &
-            frontLeaves, frontLeavesNew)
+                ! Check if the coordinate is inside the pyramid.
+                ! If so, set elementFound to .true. and determine the
+                ! interpolation weights.
 
-    enddo coorLoop
+                if (u >= zero .and. v >= zero .and. &
+                    w >= zero .and. (u + w) <= one .and. &
+                    (v + w) <= one .and. .not. failed) then
+                    elementFound = .true.
 
-    ! Release the memory allocated in this routine.
+                    ! Set the number of interpolation nodes to 5 and
+                    ! determine the interpolation weights.
 
-    deallocate(stack, BB, frontLeaves, frontLeavesNew, stat=ierr)
-    if(ierr /= 0)                                    &
-         call adtTerminate(ADT, "minDistanceTreeSearch", &
-         "Deallocation failure for stack, BB, etc.")
+                    nNodeElement = 5
 
-  end subroutine minDistanceTreeSearch
+                    oneMinusU = one - u
+                    oneMinusV = one - v
+                    oneMinusW = one - w
 
+                    weight(1) = oneMinusU * oneMinusV * oneMinusW
+                    weight(2) = u * oneMinusV * oneMinusW
+                    weight(3) = u * v * oneMinusW
+                    weight(4) = oneMinusU * v * oneMinusW
+                    weight(5) = w
+                end if
 
-  subroutine minDistanceTreeSearchSinglePoint(ADT, coor, intInfo, &
-       uvw, arrDonor, nInterpol, BB, frontLeaves, frontLeavesNew)
-    !
-    !        This routine performs the actual minimum distance search for
-    !        a single point on the local tree. It is local in the sens
-    !        that no communication is involved. This routine does the
-    !        actual search. The minDistanceTreeSearch is just a wrapper
-    !        around this routine. The reason for the split is that the
-    !        overset mesh connectivity requires efficient calling with
-    !        a single coordinate. Therefore, this rouine does not
-    !        allocate/deallocate any variables.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        ADT:       ADT type whose ADT must be searched
-    !        coor:      The coordinates and the currently stored minimum
-    !                   distance squared of these points:
-    !                   coor(1): Coordinate 1.
-    !                   coor(2): Coordinate 2.
-    !                   coor(3): Coordinate 3.
-    !                   coor(4): The currently stored minimum distance
-    !                   squared.
-    !        nInterpol: Number of variables to be interpolated.
-    !        arrDonor:  Array with the donor data; needed to obtain the
-    !                   interpolated data.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        intInfo: 1D integer array, in which the following output
-    !                 will be stored:
-    !                 intInfo(1): processor ID of the processor where
-    !                               the element is stored. This of course
-    !                               is myID. If no element is found this
-    !                               value is set to -1.
-    !                 intInfo(2): The element type of the element.
-    !                 intInfo(3): The element ID of the element in the
-    !                               the connectivity.
-    !        uvw:     2D floating point array to store the parametric
-    !                 coordinates of the point in the transformed element
-    !                 as well as the new distance squared and the
-    !                 interpolated solution:
-    !                 uvw(1): Parametric u-weight.
-    !                 uvw(2): Parametric v-weight.
-    !                 uvw(3): Parametric w-weight.
-    !                 uvw(4): The new distance squared.
-    !                 uvw(5): Interpolated solution, if desired. It is
-    !                            possible to call this routine with
-    !                            nInterpol == 0.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    type(adtType), intent(inout) :: ADT
-    integer(kind=intType), intent(in) :: nInterpol
-
-    real(kind=realType), dimension(4), intent(in) :: coor
-    real(kind=realType), dimension(:,:), intent(in) :: arrDonor
-
-    integer(kind=intType), dimension(3), intent(out) :: intInfo
-    real(kind=realType),   dimension(5), intent(out) :: uvw
-    integer(kind=intType), dimension(:), pointer :: frontLeaves
-    integer(kind=intType), dimension(:), pointer :: frontLeavesNew
-    type(adtBBoxTargetType), dimension(:), pointer :: BB
-    !
-    !       Local parameters used in the Newton algorithm.
-    !
-    integer(kind=intType), parameter :: iterMax   = 15
-    real(kind=realType),   parameter :: adtEps    = 1.e-25_realType
-    real(kind=realType),   parameter :: thresConv = 1.e-10_realType
-    !
-    !       Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: ii, kk, ll, mm, nn, activeLeaf
-    integer(kind=intType) :: nBB, nFrontLeaves, nFrontLeavesNew
-    integer(kind=intType) :: nAllocBB, nAllocFront, nNodeElement
-    integer(kind=intType) :: i, kkk
-
-    integer(kind=intType), dimension(8) :: n, m
+                !=========================================================
 
-    real(kind=realType) :: dx, dy, dz, d1, d2, invLen, val
-    real(kind=realType) :: u, v, w, uv, uold, vold, vn, du, dv
-    real(kind=realType) :: uu, vv, ww
+            case (adtPrism)
 
-    real(kind=realType), dimension(2) :: dd
-    real(kind=realType), dimension(3) :: x1, x21, x41, x3142, xf
-    real(kind=realType), dimension(3) :: vf, vt, a, b, norm, an, bn
-    real(kind=realType), dimension(3) :: chi
-    real(kind=realType), dimension(8) :: weight
+                ! Element is a prism.
+                ! Compute the coordinates relative to node 1.
 
-    real(kind=realType), dimension(:,:), pointer :: xBBox
+                ll = ADT%elementID(kk)
+                n(1) = ADT%prismsConn(1, ll)
 
-    logical :: elementFound
-    type(adtLeafType),       dimension(:), pointer :: ADTree
+                do i = 2, 6
+                    n(i) = ADT%prismsConn(i, ll)
 
-    ! Set some pointers to make the code more readable.
+                    xn(1, i) = ADT%coor(1, n(i)) - ADT%coor(1, n(1))
+                    xn(2, i) = ADT%coor(2, n(i)) - ADT%coor(2, n(1))
+                    xn(3, i) = ADT%coor(3, n(i)) - ADT%coor(3, n(1))
+                end do
 
-    xBBox  => ADT%xBBox
-    ADTree => ADT%ADTree
+                x(1) = coor(1) - ADT%coor(1, n(1))
+                x(2) = coor(2) - ADT%coor(2, n(1))
+                x(3) = coor(3) - ADT%coor(3, n(1))
 
-    ! Initial allocation of the arrays for the tree traversal as well
-    ! as the stack array used in the qsort routine. The latter is
-    ! done, because the qsort routine is called for every coordinate
-    ! and therefore it is more efficient to allocate the stack once
-    ! rather than over and over again. The disadvantage of course is
-    ! that an essentially local variable, stack, is now stored in
-    ! adtData.
+                ! Modify the coordinates of node 5 and 6, such that they
+                ! correspond to the weights of the u*w and v*w term in the
+                ! transformation respectively.
 
-    nAllocBB    =  size(BB)
-    nAllocFront =  size(frontLeaves)
-    nStack      =  size(stack)
+                xn(1, 5) = xn(1, 5) - xn(1, 2) - xn(1, 4)
+                xn(2, 5) = xn(2, 5) - xn(2, 2) - xn(2, 4)
+                xn(3, 5) = xn(3, 5) - xn(3, 2) - xn(3, 4)
 
-    ! Initialize the processor ID to -1 to indicate that no
-    ! corresponding volume element is found and the new minimum
-    ! distance squared to the old value.
+                xn(1, 6) = xn(1, 6) - xn(1, 3) - xn(1, 4)
+                xn(2, 6) = xn(2, 6) - xn(2, 3) - xn(2, 4)
+                xn(3, 6) = xn(3, 6) - xn(3, 3) - xn(3, 4)
 
-    intInfo(1)     = -1
-    uvw(4) = coor(4)
-    !
-    !          Part 1. Determine the possible minimum distance squared to
-    !                  the root leaf. If larger than the current distance
-    !                  there is no need to search this tree.
-    !
-    if(     coor(1) < ADTree(1)%xMin(1)) then
-       dx =  coor(1) - ADTree(1)%xMin(1)
-    else if(coor(1) > ADTree(1)%xMax(4)) then
-       dx =  coor(1) - ADTree(1)%xMax(4)
-    else
-       dx = zero
-    endif
-
-    if(     coor(2) < ADTree(1)%xMin(2)) then
-       dy =  coor(2) - ADTree(1)%xMin(2)
-    else if(coor(2) > ADTree(1)%xMax(5)) then
-       dy =  coor(2) - ADTree(1)%xMax(5)
-    else
-       dy = zero
-    endif
-
-    if(     coor(3) < ADTree(1)%xMin(3)) then
-       dz =  coor(3) - ADTree(1)%xMin(3)
-    else if(coor(3) > ADTree(1)%xMax(6)) then
-       dz =  coor(3) - ADTree(1)%xMax(6)
-    else
-       dz = zero
-    endif
-
-    ! Continue with the next coordinate if the possible distance
-    ! squared to the root leaf is larger than the currently stored
-    ! value.
-
-    if((dx*dx + dy*dy + dz*dz) >= uvw(4)) return
-    !
-    !          Part 2. Find a likely bounding box, which minimizes the
-    !                  guaranteed distance.
-    !
-    activeLeaf = 1
+                ! Set the starting values of u, v and w such that it is
+                ! somewhere in the middle of the element. In this way the
+                ! Jacobian matrix is always regular, even if the element
+                ! is degenerate.
 
-    ! Traverse the tree until a terminal leaf is found.
+                u = fourth; v = fourth; w = half
 
-    treeTraversal1: do
+                ! The Newton algorithm to determine the parametric
+                ! weights u, v and w for the given coordinate.
 
-       ! Exit the loop when a terminal leaf has been found.
-       ! This is indicated by a negative value of activeLeaf.
+                NewtonPrisms: do ll = 1, iterMax
 
-       if(activeLeaf < 0) exit treeTraversal1
+                    ! Compute the RHS.
 
-       ! Determine the guaranteed distance squared for both children.
+                    uw = u * w; vw = v * w
 
-       do mm=1,2
+                    f(1) = xn(1, 2) * u + xn(1, 3) * v + xn(1, 4) * w &
+                           + xn(1, 5) * uw + xn(1, 6) * vw - x(1)
+                    f(2) = xn(2, 2) * u + xn(2, 3) * v + xn(2, 4) * w &
+                           + xn(2, 5) * uw + xn(2, 6) * vw - x(2)
+                    f(3) = xn(3, 2) * u + xn(3, 3) * v + xn(3, 4) * w &
+                           + xn(3, 5) * uw + xn(3, 6) * vw - x(3)
 
-          ! Determine whether the child contains a bounding box or
-          ! another leaf of the tree.
+                    ! Compute the Jacobian.
 
-          ll = ADTree(activeLeaf)%children(mm)
-          if(ll > 0) then
+                    a11 = xn(1, 2) + xn(1, 5) * w
+                    a12 = xn(1, 3) + xn(1, 6) * w
+                    a13 = xn(1, 4) + xn(1, 5) * u + xn(1, 6) * v
 
-             ! Child contains a leaf. Determine the guaranteed distance
-             ! vector to the leaf.
+                    a21 = xn(2, 2) + xn(2, 5) * w
+                    a22 = xn(2, 3) + xn(2, 6) * w
+                    a23 = xn(2, 4) + xn(2, 5) * u + xn(2, 6) * v
 
-             d1 = abs(coor(1) - ADTree(ll)%xMin(1))
-             d2 = abs(coor(1) - ADTree(ll)%xMax(4))
-             dx = max(d1,d2)
+                    a31 = xn(3, 2) + xn(3, 5) * w
+                    a32 = xn(3, 3) + xn(3, 6) * w
+                    a33 = xn(3, 4) + xn(3, 5) * u + xn(3, 6) * v
 
-             d1 = abs(coor(2) - ADTree(ll)%xMin(2))
-             d2 = abs(coor(2) - ADTree(ll)%xMax(5))
-             dy = max(d1,d2)
+                    ! Compute the determinant. Make sure that it is not zero
+                    ! and invert the value. The cut off is needed to be able
+                    ! to handle exceptional cases for degenerate elements.
 
-             d1 = abs(coor(3) - ADTree(ll)%xMin(3))
-             d2 = abs(coor(3) - ADTree(ll)%xMax(6))
-             dz = max(d1,d2)
+                    val = a11 * (a22 * a33 - a32 * a23) + a21 * (a13 * a32 - a12 * a33) &
+                          + a31 * (a12 * a23 - a13 * a22)
+                    val = sign(one, val) / max(abs(val), adtEps)
 
-          else
+                    ! Compute the new values of u, v and w.
 
-             ! Child contains a bounding box. Determine the guaranteed
-             ! distance vector to it.
+                    du = val * ((a22 * a33 - a23 * a32) * f(1) &
+                                + (a13 * a32 - a12 * a33) * f(2) &
+                                + (a12 * a23 - a13 * a22) * f(3))
+                    dv = val * ((a23 * a31 - a21 * a33) * f(1) &
+                                + (a11 * a33 - a13 * a31) * f(2) &
+                                + (a13 * a21 - a11 * a23) * f(3))
+                    dw = val * ((a21 * a32 - a22 * a31) * f(1) &
+                                + (a12 * a31 - a11 * a32) * f(2) &
+                                + (a11 * a22 - a12 * a21) * f(3))
 
-             ll = -ll
+                    u = u - du; v = v - dv; w = w - dw
 
-             d1 = abs(coor(1) - xBBox(1,ll))
-             d2 = abs(coor(1) - xBBox(4,ll))
-             dx = max(d1,d2)
+                    ! Exit the loop if the update of the parametric
+                    ! weights is below the threshold
 
-             d1 = abs(coor(2) - xBBox(2,ll))
-             d2 = abs(coor(2) - xBBox(5,ll))
-             dy = max(d1,d2)
+                    val = sqrt(du * du + dv * dv + dw * dw)
+                    if (val <= thresConv) then
+                        failed = .False.
+                        exit NewtonPrisms
+                    end if
 
-             d1 = abs(coor(3) - xBBox(3,ll))
-             d2 = abs(coor(3) - xBBox(6,ll))
-             dz = max(d1,d2)
+                end do NewtonPrisms
 
-          endif
+                ! Check if the coordinate is inside the prism.
+                ! If so, set elementFound to .true. and determine the
+                ! interpolation weights.
 
-          ! Compute the guaranteed distance squared for this child.
+                if (u >= zero .and. v >= zero .and. &
+                    w >= zero .and. w <= one .and. &
+                    (u + v) <= one .and. .not. failed) then
+                    elementFound = .true.
 
-          dd(mm) = dx*dx + dy*dy + dz*dz
+                    ! Set the number of interpolation nodes to 6 and
+                    ! determine the interpolation weights.
 
-       enddo
+                    nNodeElement = 6
 
-       ! Determine which will be the next leaf in the tree traversal.
-       ! This will be the leaf which has the minimum guaranteed
-       ! distance. In case of ties take the left leaf, because this
-       ! leaf may have more children.
+                    oneMinusUminusV = one - u - v
+                    oneMinusW = one - w
 
-       if(dd(1) <= dd(2)) then
-          activeLeaf = ADTree(activeLeaf)%children(1)
-       else
-          activeLeaf = ADTree(activeLeaf)%children(2)
-       endif
-
-    enddo treeTraversal1
-
-    ! Store the minimum of the just computed guaranteed distance
-    ! squared and the currently stored value in uvw.
-
-    uvw(4) = min(uvw(4),dd(1),dd(2))
-    !
-    !          Part 3. Find the bounding boxes whose possible minimum
-    !                  distance is less than the currently stored value.
-    !
-    ! In part 1 it was already tested that the possible distance
-    ! squared of the root leaf was less than the current value.
-    ! Therefore initialize the current front to the root leaf and
-    ! set the number of bounding boxes to 0.
+                    weight(1) = oneMinusUminusV * oneMinusW
+                    weight(2) = u * oneMinusW
+                    weight(3) = v * oneMinusW
+                    weight(4) = oneMinusUminusV * w
+                    weight(5) = u * w
+                    weight(6) = v * w
+                end if
 
-    nBB = 0
+                !=========================================================
 
-    nFrontLeaves   = 1
-    frontLeaves(1) = 1
+            case (adtHexahedron)
 
-    ! Second tree traversal. Now to find all possible bounding
-    ! box candidates.
+                ! Element is a hexahedron.
+                ! Compute the coordinates relative to node 1.
 
-    treeTraversal2: do
+                ll = ADT%elementID(kk)
+                n(1) = ADT%hexaConn(1, ll)
 
-       ! Initialize the number of leaves for the new front, i.e.
-       ! the front of the next round, to 0.
+                do i = 2, 8
+                    n(i) = ADT%hexaConn(i, ll)
 
-       nFrontLeavesNew = 0
+                    xn(1, i) = ADT%coor(1, n(i)) - ADT%coor(1, n(1))
+                    xn(2, i) = ADT%coor(2, n(i)) - ADT%coor(2, n(1))
+                    xn(3, i) = ADT%coor(3, n(i)) - ADT%coor(3, n(1))
+                end do
 
-       ! Loop over the leaves of the current front.
+                x(1) = coor(1) - ADT%coor(1, n(1))
+                x(2) = coor(2) - ADT%coor(2, n(1))
+                x(3) = coor(3) - ADT%coor(3, n(1))
 
-       currentFrontLoop: do ii=1,nFrontLeaves
+                ! Modify the coordinates of node 3, 6, 8 and 7 such that
+                ! they correspond to the weights of the u*v, u*w, v*w and
+                ! u*v*w term in the transformation respectively.
 
-          ! Store the ID of the leaf a bit easier and loop over
-          ! its two children.
+                xn(1, 7) = xn(1, 7) + xn(1, 2) + xn(1, 4) + xn(1, 5) &
+                           - xn(1, 3) - xn(1, 6) - xn(1, 8)
+                xn(2, 7) = xn(2, 7) + xn(2, 2) + xn(2, 4) + xn(2, 5) &
+                           - xn(2, 3) - xn(2, 6) - xn(2, 8)
+                xn(3, 7) = xn(3, 7) + xn(3, 2) + xn(3, 4) + xn(3, 5) &
+                           - xn(3, 3) - xn(3, 6) - xn(3, 8)
 
-          ll = frontLeaves(ii)
+                xn(1, 3) = xn(1, 3) - xn(1, 2) - xn(1, 4)
+                xn(2, 3) = xn(2, 3) - xn(2, 2) - xn(2, 4)
+                xn(3, 3) = xn(3, 3) - xn(3, 2) - xn(3, 4)
 
-          childrenLoop: do mm=1,2
+                xn(1, 6) = xn(1, 6) - xn(1, 2) - xn(1, 5)
+                xn(2, 6) = xn(2, 6) - xn(2, 2) - xn(2, 5)
+                xn(3, 6) = xn(3, 6) - xn(3, 2) - xn(3, 5)
 
-             ! Determine whether this child contains a bounding box
-             ! or a leaf of the next level.
+                xn(1, 8) = xn(1, 8) - xn(1, 4) - xn(1, 5)
+                xn(2, 8) = xn(2, 8) - xn(2, 4) - xn(2, 5)
+                xn(3, 8) = xn(3, 8) - xn(3, 4) - xn(3, 5)
 
-             kk = ADTree(ll)%children(mm)
-             terminalTest: if(kk < 0) then
+                ! Set the starting values of u, v and w such that it is
+                ! somewhere in the middle of the element. In this way the
+                ! Jacobian matrix is always regular, even if the element
+                ! is degenerate.
 
-                ! Child contains a bounding box. Determine the possible
-                ! minimum distance squared to this bounding box.
+                u = half; v = half; w = half
 
-                kk = -kk
+                ! The Newton algorithm to determine the parametric
+                ! weights u, v and w for the given coordinate.
 
-                if(     coor(1) < xBBox(1,kk)) then
-                   dx =  coor(1) - xBBox(1,kk)
-                else if(coor(1) > xBBox(4,kk)) then
-                   dx =  coor(1) - xBBox(4,kk)
-                else
-                   dx = zero
-                endif
+                NewtonHexa: do ll = 1, iterMax
 
-                if(     coor(2) < xBBox(2,kk)) then
-                   dy =  coor(2) - xBBox(2,kk)
-                else if(coor(2) > xBBox(5,kk)) then
-                   dy =  coor(2) - xBBox(5,kk)
-                else
-                   dy = zero
-                endif
+                    ! Compute the RHS.
 
-                if(     coor(3) < xBBox(3,kk)) then
-                   dz =  coor(3) - xBBox(3,kk)
-                else if(coor(3) > xBBox(6,kk)) then
-                   dz =  coor(3) - xBBox(6,kk)
-                else
-                   dz = zero
-                endif
+                    uv = u * v; uw = u * w; vw = v * w; wvu = u * v * w
 
-                d2 = dx*dx + dy*dy + dz*dz
+                    f(1) = xn(1, 2) * u + xn(1, 4) * v + xn(1, 5) * w &
+                           + xn(1, 3) * uv + xn(1, 6) * uw + xn(1, 8) * vw &
+                           + xn(1, 7) * wvu - x(1)
+                    f(2) = xn(2, 2) * u + xn(2, 4) * v + xn(2, 5) * w &
+                           + xn(2, 3) * uv + xn(2, 6) * uw + xn(2, 8) * vw &
+                           + xn(2, 7) * wvu - x(2)
+                    f(3) = xn(3, 2) * u + xn(3, 4) * v + xn(3, 5) * w &
+                           + xn(3, 3) * uv + xn(3, 6) * uw + xn(3, 8) * vw &
+                           + xn(3, 7) * wvu - x(3)
 
-                ! If this distance squared is less than the current
-                ! value, store this bounding box as a target.
+                    ! Compute the Jacobian.
 
-                testStoreBBox: if(d2 < uvw(4)) then
+                    a11 = xn(1, 2) + xn(1, 3) * v + xn(1, 6) * w + xn(1, 7) * vw
+                    a12 = xn(1, 4) + xn(1, 3) * u + xn(1, 8) * w + xn(1, 7) * uw
+                    a13 = xn(1, 5) + xn(1, 6) * u + xn(1, 8) * v + xn(1, 7) * uv
 
-                   ! Check if the memory must be reallocated.
+                    a21 = xn(2, 2) + xn(2, 3) * v + xn(2, 6) * w + xn(2, 7) * vw
+                    a22 = xn(2, 4) + xn(2, 3) * u + xn(2, 8) * w + xn(2, 7) * uw
+                    a23 = xn(2, 5) + xn(2, 6) * u + xn(2, 8) * v + xn(2, 7) * uv
 
-                   if(nBB == nAllocBB) &
-                        call reallocBBoxTargetTypePlus(BB, nAllocBB, &
-                        100, ADT)
+                    a31 = xn(3, 2) + xn(3, 3) * v + xn(3, 6) * w + xn(3, 7) * vw
+                    a32 = xn(3, 4) + xn(3, 3) * u + xn(3, 8) * w + xn(3, 7) * uw
+                    a33 = xn(3, 5) + xn(3, 6) * u + xn(3, 8) * v + xn(3, 7) * uv
 
-                   ! Update the counter and store the data.
+                    ! Compute the determinant. Make sure that it is not zero
+                    ! and invert the value. The cut off is needed to be able
+                    ! to handle exceptional cases for degenerate elements.
 
-                   nBB              = nBB + 1
-                   BB(nBB)%ID       = kk
-                   BB(nBB)%posDist2 = d2
+                    val = a11 * (a22 * a33 - a32 * a23) + a21 * (a13 * a32 - a12 * a33) &
+                          + a31 * (a12 * a23 - a13 * a22)
+                    val = sign(one, val) / max(abs(val), adtEps)
 
-                   ! Although in step 2, i.e. the first tree traversal,
-                   ! the guaranteed distance squared to a bounding box
-                   ! has already been computed, this has been done only
-                   ! for a likely candidate and not for all the possible
-                   ! candidates. As this test is relatively cheap, do it
-                   ! now for this bounding box.
+                    ! Compute the new values of u, v and w.
 
-                   d1 = abs(coor(1) - xBBox(1,kk))
-                   d2 = abs(coor(1) - xBBox(4,kk))
-                   dx = max(d1,d2)
+                    du = val * ((a22 * a33 - a23 * a32) * f(1) &
+                                + (a13 * a32 - a12 * a33) * f(2) &
+                                + (a12 * a23 - a13 * a22) * f(3))
+                    dv = val * ((a23 * a31 - a21 * a33) * f(1) &
+                                + (a11 * a33 - a13 * a31) * f(2) &
+                                + (a13 * a21 - a11 * a23) * f(3))
+                    dw = val * ((a21 * a32 - a22 * a31) * f(1) &
+                                + (a12 * a31 - a11 * a32) * f(2) &
+                                + (a11 * a22 - a12 * a21) * f(3))
+
+                    u = u - du; v = v - dv; w = w - dw
 
-                   d1 = abs(coor(2) - xBBox(2,kk))
-                   d2 = abs(coor(2) - xBBox(5,kk))
-                   dy = max(d1,d2)
+                    ! Exit the loop if the update of the parametric
+                    ! weights is below the threshold
 
-                   d1 = abs(coor(3) - xBBox(3,kk))
-                   d2 = abs(coor(3) - xBBox(6,kk))
-                   dz = max(d1,d2)
+                    val = sqrt(du * du + dv * dv + dw * dw)
+                    if (val <= thresConv) then
+                        failed = .False.
+                        exit NewtonHexa
+                    end if
+
+                end do NewtonHexa
 
-                   d2 = dx*dx + dy*dy + dz*dz
-                   uvw(4) = min(uvw(4),d2)
+                ! Check if the coordinate is inside the hexahedron.
+                ! If so, set elementFound to .true. and determine the
+                ! interpolation weights.
 
-                endif testStoreBBox
+                if (u >= zero .and. u <= one .and. &
+                    v >= zero .and. v <= one .and. &
+                    w >= zero .and. w <= one .and. .not. failed) then
+                    elementFound = .true.
+
+                    ! Set the number of interpolation nodes to 8 and
+                    ! determine the interpolation weights.
 
-             else terminalTest
+                    nNodeElement = 8
 
-                ! Child contains a leaf. Compute the possible minimum
-                ! distance squared to the current coordinate.
+                    oneMinusU = one - u
+                    oneMinusV = one - v
+                    oneMinusW = one - w
 
-                if(     coor(1) < ADTree(kk)%xMin(1)) then
-                   dx =  coor(1) - ADTree(kk)%xMin(1)
-                else if(coor(1) > ADTree(kk)%xMax(4)) then
-                   dx =  coor(1) - ADTree(kk)%xMax(4)
-                else
-                   dx = zero
-                endif
+                    weight(1) = oneMinusU * oneMinusV * oneMinusW
+                    weight(2) = u * oneMinusV * oneMinusW
+                    weight(3) = u * v * oneMinusW
+                    weight(4) = oneMinusU * v * oneMinusW
+                    weight(5) = oneMinusU * oneMinusV * w
+                    weight(6) = u * oneMinusV * w
+                    weight(7) = u * v * w
+                    weight(8) = oneMinusU * v * w
+                end if
 
-                if(     coor(2) < ADTree(kk)%xMin(2)) then
-                   dy =  coor(2) - ADTree(kk)%xMin(2)
-                else if(coor(2) > ADTree(kk)%xMax(5)) then
-                   dy =  coor(2) - ADTree(kk)%xMax(5)
-                else
-                   dy = zero
-                endif
+            end select
+
+            ! If the coordinate is inside the element store all the
+            ! necessary information and exit the loop over the target
+            ! bounding boxes.
+
+            if (elementFound) then
+
+                ! The processor, element type and local element ID.
+
+                intInfo(1) = ADT%myID
+                intInfo(2) = ADT%elementType(kk)
+                intInfo(3) = ADT%elementID(kk)
+
+                ! The parametric weights.
+
+                uvw(1) = u
+                uvw(2) = v
+                uvw(3) = w
+
+                ! The interpolated solution.
+
+                do ll = 1, nInterpol
+                    ii = 3 + ll
+                    uvw(ii) = weight(1) * arrDonor(ll, n(1))
+                    do i = 2, nNodeElement
+                        uvw(ii) = uvw(ii) + weight(i) * arrDonor(ll, n(i))
+                    end do
+                end do
+
+                ! And exit the loop over the bounding boxes.
+
+                exit BBoxLoop
+            end if
+
+        end do BBoxLoop
+
+    end subroutine containmentTreeSearchSinglePoint
+
+    subroutine minDistanceTreeSearch(ADT, coor, &
+                                     intInfo, uvw, &
+                                     arrDonor, nCoor, &
+                                     nInterpol)
+        !
+        !        This routine performs the actual minimum distance search in
+        !        the local tree. It is a local routine in the sense that no
+        !        communication is involved.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        ADT:       ADT type whose ADT must be searched
+        !        nCoor:     Number of coordinates for which the element must
+        !                   be determined.
+        !        coor:      The coordinates and the currently stored minimum
+        !                   distance squared of these points:
+        !                   coor(1,;): Coordinate 1.
+        !                   coor(2,;): Coordinate 2.
+        !                   coor(3,;): Coordinate 3.
+        !                   coor(4,;): The currently stored minimum distance
+        !                   squared.
+        !        nInterpol: Number of variables to be interpolated.
+        !        arrDonor:  Array with the donor data; needed to obtain the
+        !                   interpolated data.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        intInfo: 2D integer array, in which the following output
+        !                 will be stored:
+        !                 intInfo(1,:): processor ID of the processor where
+        !                               the element is stored. This of course
+        !                               is myID. If no element is found this
+        !                               value is set to -1.
+        !                 intInfo(2,:): The element type of the element.
+        !                 intInfo(3,:): The element ID of the element in the
+        !                               the connectivity.
+        !        uvw:     2D floating point array to store the parametric
+        !                 coordinates of the point in the transformed element
+        !                 as well as the new distance squared and the
+        !                 interpolated solution:
+        !                 uvw(1, :): Parametric u-weight.
+        !                 uvw(2, :): Parametric v-weight.
+        !                 uvw(3, :): Parametric w-weight.
+        !                 uvw(4, :): The new distance squared.
+        !                 uvw(5:,:): Interpolated solution, if desired. It is
+        !                            possible to call this routine with
+        !                            nInterpol == 0.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        type(adtType), intent(inout) :: ADT
+        integer(kind=intType), intent(in) :: nCoor
+        integer(kind=intType), intent(in) :: nInterpol
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+        real(kind=realType), dimension(:, :), intent(in) :: arrDonor
+
+        integer(kind=intType), dimension(:, :), intent(out) :: intInfo
+        real(kind=realType), dimension(:, :), intent(out) :: uvw
+        !
+        !       Local parameters used in the Newton algorithm.
+        !
+        integer(kind=intType), parameter :: iterMax = 15
+        real(kind=realType), parameter :: adtEps = 1.e-25_realType
+        real(kind=realType), parameter :: thresConv = 1.e-10_realType
+        !
+        !       Local variables.
+        !
+        integer :: ierr, nn
+        integer(kind=intType) :: nAllocBB, nAllocFront, nStack
+        integer(kind=intType), dimension(:), pointer :: frontLeaves
+        integer(kind=intType), dimension(:), pointer :: frontLeavesNew
+        type(adtBBoxTargetType), dimension(:), pointer :: BB
+
+        ! Initial allocation of the arrays for the tree traversal as well
+        ! as the stack array used in the qsort routine. The latter is
+        ! done, because the qsort routine is called for every coordinate
+        ! and therefore it is more efficient to allocate the stack once
+        ! rather than over and over again. The disadvantage of course is
+        ! that an essentially local variable, stack, is now stored in
+        ! adtData.
+
+        nAllocBB = 10
+        nAllocFront = 25
+        nStack = 100
+
+        allocate (stack(nStack), BB(nAllocBB), frontLeaves(nAllocFront), &
+                  frontLeavesNew(nAllocFront), stat=ierr)
+        if (ierr /= 0) &
+             call adtTerminate(ADT, "minDistanceTreeSearch", &
+             "Memory allocation failure for stack, BB, &
+             &etc.")
+
+        ! Loop over the number of coordinates to be treated.
+
+        coorLoop: do nn = 1, nCoor
+
+            call minDistanceTreeSearchSinglePoint(ADT, coor(:, nn), &
+                                                  intInfo(:, nn), uvw(:, nn), arrDonor, nInterpol, BB, &
+                                                  frontLeaves, frontLeavesNew)
+
+        end do coorLoop
+
+        ! Release the memory allocated in this routine.
+
+        deallocate (stack, BB, frontLeaves, frontLeavesNew, stat=ierr)
+        if (ierr /= 0) &
+            call adtTerminate(ADT, "minDistanceTreeSearch", &
+                              "Deallocation failure for stack, BB, etc.")
+
+    end subroutine minDistanceTreeSearch
+
+    subroutine minDistanceTreeSearchSinglePoint(ADT, coor, intInfo, &
+                                                uvw, arrDonor, nInterpol, BB, frontLeaves, frontLeavesNew)
+        !
+        !        This routine performs the actual minimum distance search for
+        !        a single point on the local tree. It is local in the sens
+        !        that no communication is involved. This routine does the
+        !        actual search. The minDistanceTreeSearch is just a wrapper
+        !        around this routine. The reason for the split is that the
+        !        overset mesh connectivity requires efficient calling with
+        !        a single coordinate. Therefore, this rouine does not
+        !        allocate/deallocate any variables.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        ADT:       ADT type whose ADT must be searched
+        !        coor:      The coordinates and the currently stored minimum
+        !                   distance squared of these points:
+        !                   coor(1): Coordinate 1.
+        !                   coor(2): Coordinate 2.
+        !                   coor(3): Coordinate 3.
+        !                   coor(4): The currently stored minimum distance
+        !                   squared.
+        !        nInterpol: Number of variables to be interpolated.
+        !        arrDonor:  Array with the donor data; needed to obtain the
+        !                   interpolated data.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        intInfo: 1D integer array, in which the following output
+        !                 will be stored:
+        !                 intInfo(1): processor ID of the processor where
+        !                               the element is stored. This of course
+        !                               is myID. If no element is found this
+        !                               value is set to -1.
+        !                 intInfo(2): The element type of the element.
+        !                 intInfo(3): The element ID of the element in the
+        !                               the connectivity.
+        !        uvw:     2D floating point array to store the parametric
+        !                 coordinates of the point in the transformed element
+        !                 as well as the new distance squared and the
+        !                 interpolated solution:
+        !                 uvw(1): Parametric u-weight.
+        !                 uvw(2): Parametric v-weight.
+        !                 uvw(3): Parametric w-weight.
+        !                 uvw(4): The new distance squared.
+        !                 uvw(5): Interpolated solution, if desired. It is
+        !                            possible to call this routine with
+        !                            nInterpol == 0.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        type(adtType), intent(inout) :: ADT
+        integer(kind=intType), intent(in) :: nInterpol
+
+        real(kind=realType), dimension(4), intent(in) :: coor
+        real(kind=realType), dimension(:, :), intent(in) :: arrDonor
+
+        integer(kind=intType), dimension(3), intent(out) :: intInfo
+        real(kind=realType), dimension(5), intent(out) :: uvw
+        integer(kind=intType), dimension(:), pointer :: frontLeaves
+        integer(kind=intType), dimension(:), pointer :: frontLeavesNew
+        type(adtBBoxTargetType), dimension(:), pointer :: BB
+        !
+        !       Local parameters used in the Newton algorithm.
+        !
+        integer(kind=intType), parameter :: iterMax = 15
+        real(kind=realType), parameter :: adtEps = 1.e-25_realType
+        real(kind=realType), parameter :: thresConv = 1.e-10_realType
+        !
+        !       Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: ii, kk, ll, mm, nn, activeLeaf
+        integer(kind=intType) :: nBB, nFrontLeaves, nFrontLeavesNew
+        integer(kind=intType) :: nAllocBB, nAllocFront, nNodeElement
+        integer(kind=intType) :: i, kkk
+
+        integer(kind=intType), dimension(8) :: n, m
+
+        real(kind=realType) :: dx, dy, dz, d1, d2, invLen, val
+        real(kind=realType) :: u, v, w, uv, uold, vold, vn, du, dv
+        real(kind=realType) :: uu, vv, ww
+
+        real(kind=realType), dimension(2) :: dd
+        real(kind=realType), dimension(3) :: x1, x21, x41, x3142, xf
+        real(kind=realType), dimension(3) :: vf, vt, a, b, norm, an, bn
+        real(kind=realType), dimension(3) :: chi
+        real(kind=realType), dimension(8) :: weight
+
+        real(kind=realType), dimension(:, :), pointer :: xBBox
+
+        logical :: elementFound
+        type(adtLeafType), dimension(:), pointer :: ADTree
+
+        ! Set some pointers to make the code more readable.
+
+        xBBox => ADT%xBBox
+        ADTree => ADT%ADTree
+
+        ! Initial allocation of the arrays for the tree traversal as well
+        ! as the stack array used in the qsort routine. The latter is
+        ! done, because the qsort routine is called for every coordinate
+        ! and therefore it is more efficient to allocate the stack once
+        ! rather than over and over again. The disadvantage of course is
+        ! that an essentially local variable, stack, is now stored in
+        ! adtData.
+
+        nAllocBB = size(BB)
+        nAllocFront = size(frontLeaves)
+        nStack = size(stack)
+
+        ! Initialize the processor ID to -1 to indicate that no
+        ! corresponding volume element is found and the new minimum
+        ! distance squared to the old value.
+
+        intInfo(1) = -1
+        uvw(4) = coor(4)
+        !
+        !          Part 1. Determine the possible minimum distance squared to
+        !                  the root leaf. If larger than the current distance
+        !                  there is no need to search this tree.
+        !
+        if (coor(1) < ADTree(1)%xMin(1)) then
+            dx = coor(1) - ADTree(1)%xMin(1)
+        else if (coor(1) > ADTree(1)%xMax(4)) then
+            dx = coor(1) - ADTree(1)%xMax(4)
+        else
+            dx = zero
+        end if
+
+        if (coor(2) < ADTree(1)%xMin(2)) then
+            dy = coor(2) - ADTree(1)%xMin(2)
+        else if (coor(2) > ADTree(1)%xMax(5)) then
+            dy = coor(2) - ADTree(1)%xMax(5)
+        else
+            dy = zero
+        end if
+
+        if (coor(3) < ADTree(1)%xMin(3)) then
+            dz = coor(3) - ADTree(1)%xMin(3)
+        else if (coor(3) > ADTree(1)%xMax(6)) then
+            dz = coor(3) - ADTree(1)%xMax(6)
+        else
+            dz = zero
+        end if
+
+        ! Continue with the next coordinate if the possible distance
+        ! squared to the root leaf is larger than the currently stored
+        ! value.
+
+        if ((dx * dx + dy * dy + dz * dz) >= uvw(4)) return
+        !
+        !          Part 2. Find a likely bounding box, which minimizes the
+        !                  guaranteed distance.
+        !
+        activeLeaf = 1
+
+        ! Traverse the tree until a terminal leaf is found.
+
+        treeTraversal1: do
+
+            ! Exit the loop when a terminal leaf has been found.
+            ! This is indicated by a negative value of activeLeaf.
+
+            if (activeLeaf < 0) exit treeTraversal1
+
+            ! Determine the guaranteed distance squared for both children.
+
+            do mm = 1, 2
+
+                ! Determine whether the child contains a bounding box or
+                ! another leaf of the tree.
+
+                ll = ADTree(activeLeaf)%children(mm)
+                if (ll > 0) then
+
+                    ! Child contains a leaf. Determine the guaranteed distance
+                    ! vector to the leaf.
+
+                    d1 = abs(coor(1) - ADTree(ll)%xMin(1))
+                    d2 = abs(coor(1) - ADTree(ll)%xMax(4))
+                    dx = max(d1, d2)
+
+                    d1 = abs(coor(2) - ADTree(ll)%xMin(2))
+                    d2 = abs(coor(2) - ADTree(ll)%xMax(5))
+                    dy = max(d1, d2)
+
+                    d1 = abs(coor(3) - ADTree(ll)%xMin(3))
+                    d2 = abs(coor(3) - ADTree(ll)%xMax(6))
+                    dz = max(d1, d2)
 
-                if(     coor(3) < ADTree(kk)%xMin(3)) then
-                   dz =  coor(3) - ADTree(kk)%xMin(3)
-                else if(coor(3) > ADTree(kk)%xMax(6)) then
-                   dz =  coor(3) - ADTree(kk)%xMax(6)
                 else
-                   dz = zero
-                endif
-
-                d2 = dx*dx + dy*dy + dz*dz
 
-                ! If this distance squared is less than the current
-                ! value, store this leaf in the new front.
+                    ! Child contains a bounding box. Determine the guaranteed
+                    ! distance vector to it.
 
-                testStoreLeave: if(d2 < uvw(4)) then
+                    ll = -ll
 
-                   ! Check if enough memory has been allocated and
-                   ! store the leaf.
+                    d1 = abs(coor(1) - xBBox(1, ll))
+                    d2 = abs(coor(1) - xBBox(4, ll))
+                    dx = max(d1, d2)
 
-                   if(nFrontLeavesNew == nAllocFront) then
-                      i = nAllocFront
-                      call reallocPlus(frontLeavesNew, i, 250, ADT)
-                      call reallocPlus(frontLeaves, nAllocFront, 250, ADT)
-                   endif
+                    d1 = abs(coor(2) - xBBox(2, ll))
+                    d2 = abs(coor(2) - xBBox(5, ll))
+                    dy = max(d1, d2)
 
-                   nFrontLeavesNew = nFrontLeavesNew + 1
-                   frontLeavesNew(nFrontLeavesNew) = kk
+                    d1 = abs(coor(3) - xBBox(3, ll))
+                    d2 = abs(coor(3) - xBBox(6, ll))
+                    dz = max(d1, d2)
 
-                   ! Compute the guaranteed distance squared to this leaf.
-                   ! It may be less than the currently stored value.
-
-                   d1 = abs(coor(1) - ADTree(kk)%xMin(1))
-                   d2 = abs(coor(1) - ADTree(kk)%xMax(4))
-                   dx = max(d1,d2)
-
-                   d1 = abs(coor(2) - ADTree(kk)%xMin(2))
-                   d2 = abs(coor(2) - ADTree(kk)%xMax(5))
-                   dy = max(d1,d2)
+                end if
 
-                   d1 = abs(coor(3) - ADTree(kk)%xMin(3))
-                   d2 = abs(coor(3) - ADTree(kk)%xMax(6))
-                   dz = max(d1,d2)
+                ! Compute the guaranteed distance squared for this child.
 
-                   d2 = dx*dx + dy*dy + dz*dz
-                   uvw(4) = min(uvw(4),d2)
+                dd(mm) = dx * dx + dy * dy + dz * dz
 
-                endif testStoreLeave
+            end do
 
-             endif terminalTest
-          enddo childrenLoop
-       enddo currentFrontLoop
+            ! Determine which will be the next leaf in the tree traversal.
+            ! This will be the leaf which has the minimum guaranteed
+            ! distance. In case of ties take the left leaf, because this
+            ! leaf may have more children.
 
-       ! End of the loop over the current front. If the new front
-       ! is empty the entire tree has been traversed and an exit is
-       ! made from the corresponding loop.
+            if (dd(1) <= dd(2)) then
+                activeLeaf = ADTree(activeLeaf)%children(1)
+            else
+                activeLeaf = ADTree(activeLeaf)%children(2)
+            end if
 
-       if(nFrontLeavesNew == 0) exit treeTraversal2
+        end do treeTraversal1
 
-       ! Copy the data of the new front leaves into the current
-       ! front for the next round.
+        ! Store the minimum of the just computed guaranteed distance
+        ! squared and the currently stored value in uvw.
 
-       nFrontLeaves = nFrontLeavesNew
-       do ll=1,nFrontLeaves
-          frontLeaves(ll) = frontLeavesNew(ll)
-       enddo
+        uvw(4) = min(uvw(4), dd(1), dd(2))
+        !
+        !          Part 3. Find the bounding boxes whose possible minimum
+        !                  distance is less than the currently stored value.
+        !
+        ! In part 1 it was already tested that the possible distance
+        ! squared of the root leaf was less than the current value.
+        ! Therefore initialize the current front to the root leaf and
+        ! set the number of bounding boxes to 0.
 
-    enddo treeTraversal2
+        nBB = 0
 
-    ! Sort the target bounding boxes in increasing order such that
-    ! the one with the smallest possible distance is first.
+        nFrontLeaves = 1
+        frontLeaves(1) = 1
 
-    call qsortBBoxTargets(BB, nBB, ADT)
-    !
-    !          Part 4: Loop over the selected bounding boxes and check if
-    !                  the corresponding element minimizes the distance.
-    !
-    elementFound = .false.
+        ! Second tree traversal. Now to find all possible bounding
+        ! box candidates.
 
-    BBoxLoop: do mm=1,nBB
+        treeTraversal2: do
 
-       ! Exit the loop if the possible minimum distance of this
-       ! bounding box is not smaller than the current value.
-       ! Remember that BB has been sorted in increasing order.
+            ! Initialize the number of leaves for the new front, i.e.
+            ! the front of the next round, to 0.
 
-       if(uvw(4) <= BB(mm)%posDist2) exit BBoxLoop
+            nFrontLeavesNew = 0
 
-       ! Determine the element type stored in this bounding box.
+            ! Loop over the leaves of the current front.
 
-       kk = BB(mm)%ID
-       select case (ADT%elementType(kk))
+            currentFrontLoop: do ii = 1, nFrontLeaves
 
-       case (adtTriangle)
-          call adtTerminate(ADT, "minDistanceTreeSearch", &
-               "Minimum distance search for &
-               &triangles not implemented yet")
+                ! Store the ID of the leaf a bit easier and loop over
+                ! its two children.
 
-          !=========================================================
+                ll = frontLeaves(ii)
 
-       case (adtQuadrilateral)
+                childrenLoop: do mm = 1, 2
 
-          ! Temporary implementation. I'm waiting for Juan to come
-          ! up with his more sophisticated algorithm.
+                    ! Determine whether this child contains a bounding box
+                    ! or a leaf of the next level.
 
-          ! This is a surface element, so set the parametric weight
-          ! w to zero.
+                    kk = ADTree(ll)%children(mm)
+                    terminalTest: if (kk < 0) then
 
-          w = zero
+                        ! Child contains a bounding box. Determine the possible
+                        ! minimum distance squared to this bounding box.
 
-          ! Determine the 4 vectors which completely describe
-          ! the quadrilateral face
+                        kk = -kk
 
-          ll   = ADT%elementID(kk)
-          n(1) = ADT%quadsConn(1,ll)
-          n(2) = ADT%quadsConn(2,ll)
-          n(3) = ADT%quadsConn(3,ll)
-          n(4) = ADT%quadsConn(4,ll)
+                        if (coor(1) < xBBox(1, kk)) then
+                            dx = coor(1) - xBBox(1, kk)
+                        else if (coor(1) > xBBox(4, kk)) then
+                            dx = coor(1) - xBBox(4, kk)
+                        else
+                            dx = zero
+                        end if
 
-          x1(1) = ADT%coor(1,n(1))
-          x1(2) = ADT%coor(2,n(1))
-          x1(3) = ADT%coor(3,n(1))
+                        if (coor(2) < xBBox(2, kk)) then
+                            dy = coor(2) - xBBox(2, kk)
+                        else if (coor(2) > xBBox(5, kk)) then
+                            dy = coor(2) - xBBox(5, kk)
+                        else
+                            dy = zero
+                        end if
 
-          x21(1) = ADT%coor(1,n(2)) - x1(1)
-          x21(2) = ADT%coor(2,n(2)) - x1(2)
-          x21(3) = ADT%coor(3,n(2)) - x1(3)
+                        if (coor(3) < xBBox(3, kk)) then
+                            dz = coor(3) - xBBox(3, kk)
+                        else if (coor(3) > xBBox(6, kk)) then
+                            dz = coor(3) - xBBox(6, kk)
+                        else
+                            dz = zero
+                        end if
 
-          x41(1) = ADT%coor(1,n(4)) - x1(1)
-          x41(2) = ADT%coor(2,n(4)) - x1(2)
-          x41(3) = ADT%coor(3,n(4)) - x1(3)
+                        d2 = dx * dx + dy * dy + dz * dz
 
-          x3142(1) = ADT%coor(1,n(3)) - x1(1) - x21(1) - x41(1)
-          x3142(2) = ADT%coor(2,n(3)) - x1(2) - x21(2) - x41(2)
-          x3142(3) = ADT%coor(3,n(3)) - x1(3) - x21(3) - x41(3)
+                        ! If this distance squared is less than the current
+                        ! value, store this bounding box as a target.
 
-          ! Initialize u and v to 0.5 and determine the
-          ! corresponding coordinates on the face, which is the
-          ! centroid.
+                        testStoreBBox: if (d2 < uvw(4)) then
 
-          u  = half
-          v  = half
-          uv = u*v
+                            ! Check if the memory must be reallocated.
 
-          xf(1) = x1(1) + u*x21(1) + v*x41(1) + uv*x3142(1)
-          xf(2) = x1(2) + u*x21(2) + v*x41(2) + uv*x3142(2)
-          xf(3) = x1(3) + u*x21(3) + v*x41(3) + uv*x3142(3)
+                            if (nBB == nAllocBB) &
+                                call reallocBBoxTargetTypePlus(BB, nAllocBB, &
+                                                               100, ADT)
 
-          ! Newton loop to determine the point on the surface,
-          ! which minimizes the distance to the given coordinate.
+                            ! Update the counter and store the data.
 
-          NewtonQuads: do ll=1,iterMax
+                            nBB = nBB + 1
+                            BB(nBB)%ID = kk
+                            BB(nBB)%posDist2 = d2
 
-             ! Store the current values of u and v for a stop
-             ! criterion later on.
+                            ! Although in step 2, i.e. the first tree traversal,
+                            ! the guaranteed distance squared to a bounding box
+                            ! has already been computed, this has been done only
+                            ! for a likely candidate and not for all the possible
+                            ! candidates. As this test is relatively cheap, do it
+                            ! now for this bounding box.
 
-             uold = u
-             vold = v
+                            d1 = abs(coor(1) - xBBox(1, kk))
+                            d2 = abs(coor(1) - xBBox(4, kk))
+                            dx = max(d1, d2)
 
-             ! Determine the vector vf from xf to given coordinate.
+                            d1 = abs(coor(2) - xBBox(2, kk))
+                            d2 = abs(coor(2) - xBBox(5, kk))
+                            dy = max(d1, d2)
 
-             vf(1) = coor(1) - xf(1)
-             vf(2) = coor(2) - xf(2)
-             vf(3) = coor(3) - xf(3)
+                            d1 = abs(coor(3) - xBBox(3, kk))
+                            d2 = abs(coor(3) - xBBox(6, kk))
+                            dz = max(d1, d2)
 
-             ! Determine the tangent vectors in u- and v-direction.
-             ! Store these in a and b respectively.
+                            d2 = dx * dx + dy * dy + dz * dz
+                            uvw(4) = min(uvw(4), d2)
 
-             a(1) = x21(1) + v*x3142(1)
-             a(2) = x21(2) + v*x3142(2)
-             a(3) = x21(3) + v*x3142(3)
+                        end if testStoreBBox
 
-             b(1) = x41(1) + u*x3142(1)
-             b(2) = x41(2) + u*x3142(2)
-             b(3) = x41(3) + u*x3142(3)
+                        else terminalTest
 
-             ! Determine the normal vector of the face by taking the
-             ! cross product of a and b. Afterwards this vector will
-             ! be scaled to a unit vector.
+                        ! Child contains a leaf. Compute the possible minimum
+                        ! distance squared to the current coordinate.
 
-             norm(1) = a(2)*b(3) - a(3)*b(2)
-             norm(2) = a(3)*b(1) - a(1)*b(3)
-             norm(3) = a(1)*b(2) - a(2)*b(1)
+                        if (coor(1) < ADTree(kk)%xMin(1)) then
+                            dx = coor(1) - ADTree(kk)%xMin(1)
+                        else if (coor(1) > ADTree(kk)%xMax(4)) then
+                            dx = coor(1) - ADTree(kk)%xMax(4)
+                        else
+                            dx = zero
+                        end if
 
-             invLen = one/max(adtEps,sqrt(norm(1)*norm(1) &
-                  +                        norm(2)*norm(2) &
-                  +                        norm(3)*norm(3)))
+                        if (coor(2) < ADTree(kk)%xMin(2)) then
+                            dy = coor(2) - ADTree(kk)%xMin(2)
+                        else if (coor(2) > ADTree(kk)%xMax(5)) then
+                            dy = coor(2) - ADTree(kk)%xMax(5)
+                        else
+                            dy = zero
+                        end if
 
-             norm(1) = norm(1)*invLen
-             norm(2) = norm(2)*invLen
-             norm(3) = norm(3)*invLen
+                        if (coor(3) < ADTree(kk)%xMin(3)) then
+                            dz = coor(3) - ADTree(kk)%xMin(3)
+                        else if (coor(3) > ADTree(kk)%xMax(6)) then
+                            dz = coor(3) - ADTree(kk)%xMax(6)
+                        else
+                            dz = zero
+                        end if
 
-             ! Determine the projection of the vector vf onto
-             ! the face.
+                        d2 = dx * dx + dy * dy + dz * dz
 
-             vn = vf(1)*norm(1) + vf(2)*norm(2) + vf(3)*norm(3)
-             vt(1) = vf(1) - vn*norm(1)
-             vt(2) = vf(2) - vn*norm(2)
-             vt(3) = vf(3) - vn*norm(3)
+                        ! If this distance squared is less than the current
+                        ! value, store this leaf in the new front.
 
-             ! The vector vt points from the current point on the
-             ! face to the new point. However this new point lies on
-             ! the plane determined by the vectors a and b, but not
-             ! necessarily on the face itself. The new point on the
-             ! face is obtained by projecting the point in the a-b
-             ! plane onto the face. this can be done by determining
-             ! the coefficients du and dv, such that vt = du*a + dv*b.
-             ! To solve du and dv the vectors normal to a and b
-             ! inside the plane ab are needed.
+                        testStoreLeave: if (d2 < uvw(4)) then
 
-             an(1) = a(2)*norm(3) - a(3)*norm(2)
-             an(2) = a(3)*norm(1) - a(1)*norm(3)
-             an(3) = a(1)*norm(2) - a(2)*norm(1)
+                            ! Check if enough memory has been allocated and
+                            ! store the leaf.
 
-             bn(1) = b(2)*norm(3) - b(3)*norm(2)
-             bn(2) = b(3)*norm(1) - b(1)*norm(3)
-             bn(3) = b(1)*norm(2) - b(2)*norm(1)
+                            if (nFrontLeavesNew == nAllocFront) then
+                                i = nAllocFront
+                                call reallocPlus(frontLeavesNew, i, 250, ADT)
+                                call reallocPlus(frontLeaves, nAllocFront, 250, ADT)
+                            end if
 
-             ! Solve du and dv. the clipping of vn should not be
-             ! active, as this would mean that the vectors a and b
-             ! are parallel. This corresponds to a quad degenerated
-             ! to a line, which should not occur in the surface mesh.
+                            nFrontLeavesNew = nFrontLeavesNew + 1
+                            frontLeavesNew(nFrontLeavesNew) = kk
 
-             vn = a(1)*bn(1) + a(2)*bn(2) + a(3)*bn(3)
-             vn = sign(max(adtEps,abs(vn)),vn)
-             du = (vt(1)*bn(1) + vt(2)*bn(2) + vt(3)*bn(3))/vn
+                            ! Compute the guaranteed distance squared to this leaf.
+                            ! It may be less than the currently stored value.
 
-             vn = b(1)*an(1) + b(2)*an(2) + b(3)*an(3)
-             vn = sign(max(adtEps,abs(vn)),vn)
-             dv = (vt(1)*an(1) + vt(2)*an(2) + vt(3)*an(3))/vn
+                            d1 = abs(coor(1) - ADTree(kk)%xMin(1))
+                            d2 = abs(coor(1) - ADTree(kk)%xMax(4))
+                            dx = max(d1, d2)
 
-             ! Determine the new parameter values uu and vv. These
-             ! are limited to 0 <= (uu,vv) <= 1.
+                            d1 = abs(coor(2) - ADTree(kk)%xMin(2))
+                            d2 = abs(coor(2) - ADTree(kk)%xMax(5))
+                            dy = max(d1, d2)
 
-             u = u + du; u = min(one,max(zero,u))
-             v = v + dv; v = min(one,max(zero,v))
+                            d1 = abs(coor(3) - ADTree(kk)%xMin(3))
+                            d2 = abs(coor(3) - ADTree(kk)%xMax(6))
+                            dz = max(d1, d2)
 
-             ! Determine the final values of the corrections.
+                            d2 = dx * dx + dy * dy + dz * dz
+                            uvw(4) = min(uvw(4), d2)
 
-             du = abs(u-uold)
-             dv = abs(v-vold)
+                        end if testStoreLeave
 
-             ! Determine the new coordinates of the point xf.
+                    end if terminalTest
+                end do childrenLoop
+            end do currentFrontLoop
 
-             uv  = u*v
-             xf(1) = x1(1) + u*x21(1) + v*x41(1) + uv*x3142(1)
-             xf(2) = x1(2) + u*x21(2) + v*x41(2) + uv*x3142(2)
-             xf(3) = x1(3) + u*x21(3) + v*x41(3) + uv*x3142(3)
+            ! End of the loop over the current front. If the new front
+            ! is empty the entire tree has been traversed and an exit is
+            ! made from the corresponding loop.
 
-             ! Exit the loop if the update of the parametric
-             ! weights is below the threshold
+            if (nFrontLeavesNew == 0) exit treeTraversal2
 
-             val = sqrt(du*du + dv*dv)
-             if(val <= thresConv) exit NewtonQuads
+            ! Copy the data of the new front leaves into the current
+            ! front for the next round.
 
-          enddo NewtonQuads
+            nFrontLeaves = nFrontLeavesNew
+            do ll = 1, nFrontLeaves
+                frontLeaves(ll) = frontLeavesNew(ll)
+            end do
 
-          ! Compute the distance squared between the given
-          ! coordinate and the point xf.
+        end do treeTraversal2
 
-          dx = coor(1) - xf(1)
-          dy = coor(2) - xf(2)
-          dz = coor(3) - xf(3)
+        ! Sort the target bounding boxes in increasing order such that
+        ! the one with the smallest possible distance is first.
 
-          val = dx*dx + dy*dy + dz*dz
+        call qsortBBoxTargets(BB, nBB, ADT)
+        !
+        !          Part 4: Loop over the selected bounding boxes and check if
+        !                  the corresponding element minimizes the distance.
+        !
+        elementFound = .false.
 
-          ! If the distance squared is less than the current value
-          ! store the wall distance and interpolation info and
-          ! indicate that an element was found.
+        BBoxLoop: do mm = 1, nBB
 
-          if(val < uvw(4)) then
-             uvw(4)    = val
-             nNodeElement = 4
-             elementFound = .true.
+            ! Exit the loop if the possible minimum distance of this
+            ! bounding box is not smaller than the current value.
+            ! Remember that BB has been sorted in increasing order.
 
-             kkk  = kk;   uu   = u;    vv   = v;    ww   = w
-             m(1) = n(1); m(2) = n(2); m(3) = n(3); m(4) = n(4)
+            if (uvw(4) <= BB(mm)%posDist2) exit BBoxLoop
 
-             weight(1) = (one - u)*(one - v)
-             weight(2) =           u *(one - v)
-             weight(3) =           u *          v
-             weight(4) = (one - u)*          v
-          endif
+            ! Determine the element type stored in this bounding box.
 
-          !=========================================================
+            kk = BB(mm)%ID
+            select case (ADT%elementType(kk))
 
-       case (adtTetrahedron)
-          call adtTerminate(ADT, "minDistanceTreeSearch", &
-               "Minimum distance search for &
-               &tetrahedra not implemented yet")
+            case (adtTriangle)
+                call adtTerminate(ADT, "minDistanceTreeSearch", &
+                     "Minimum distance search for &
+                     &triangles not implemented yet")
 
-          !===========================================================
+                !=========================================================
 
-       case (adtPyramid)
-          call adtTerminate(ADT, "minDistanceTreeSearch", &
-               "Minimum distance search for &
-               &pyramids not implemented yet")
+            case (adtQuadrilateral)
 
-          !===========================================================
+                ! Temporary implementation. I'm waiting for Juan to come
+                ! up with his more sophisticated algorithm.
 
-       case (adtPrism)
-          call adtTerminate(ADT, "minDistanceTreeSearch", &
-               "Minimum distance search for &
-               &prisms not implemented yet")
+                ! This is a surface element, so set the parametric weight
+                ! w to zero.
 
-          !===========================================================
+                w = zero
 
-       case (adtHexahedron)
+                ! Determine the 4 vectors which completely describe
+                ! the quadrilateral face
 
-          ! Determine the element ID and the corresponding
-          ! 8 node ID's.
+                ll = ADT%elementID(kk)
+                n(1) = ADT%quadsConn(1, ll)
+                n(2) = ADT%quadsConn(2, ll)
+                n(3) = ADT%quadsConn(3, ll)
+                n(4) = ADT%quadsConn(4, ll)
 
-          ll = ADT%elementID(kk)
-          n(1) = ADT%hexaConn(1,ll)
-          n(2) = ADT%hexaConn(2,ll)
-          n(3) = ADT%hexaConn(3,ll)
-          n(4) = ADT%hexaConn(4,ll)
-          n(5) = ADT%hexaConn(5,ll)
-          n(6) = ADT%hexaConn(6,ll)
-          n(7) = ADT%hexaConn(7,ll)
-          n(8) = ADT%hexaConn(8,ll)
+                x1(1) = ADT%coor(1, n(1))
+                x1(2) = ADT%coor(2, n(1))
+                x1(3) = ADT%coor(3, n(1))
 
-          ! Call the subroutine minD2Hexa to do the work.
+                x21(1) = ADT%coor(1, n(2)) - x1(1)
+                x21(2) = ADT%coor(2, n(2)) - x1(2)
+                x21(3) = ADT%coor(3, n(2)) - x1(3)
 
-          call minD2Hexa(coor(1:3),            &
-               ADT%coor(1:3,n(1)), &
-               ADT%coor(1:3,n(2)), &
-               ADT%coor(1:3,n(3)), &
-               ADT%coor(1:3,n(4)), &
-               ADT%coor(1:3,n(5)), &
-               ADT%coor(1:3,n(6)), &
-               ADT%coor(1:3,n(7)), &
-               ADT%coor(1:3,n(8)), &
-               val, chi, ierr)
+                x41(1) = ADT%coor(1, n(4)) - x1(1)
+                x41(2) = ADT%coor(2, n(4)) - x1(2)
+                x41(3) = ADT%coor(3, n(4)) - x1(3)
 
-          ! If the distance squared is less than the current value
-          ! store the wall distance and interpolation info and
-          ! indicate that an element was found.
+                x3142(1) = ADT%coor(1, n(3)) - x1(1) - x21(1) - x41(1)
+                x3142(2) = ADT%coor(2, n(3)) - x1(2) - x21(2) - x41(2)
+                x3142(3) = ADT%coor(3, n(3)) - x1(3) - x21(3) - x41(3)
 
-          if(val < uvw(4)) then
-             uvw(4)    = val
-             nNodeElement = 8
-             elementFound = .true.
+                ! Initialize u and v to 0.5 and determine the
+                ! corresponding coordinates on the face, which is the
+                ! centroid.
 
-             kkk = kk;
-             uu  = chi(1); vv = chi(2); ww = chi(3)
+                u = half
+                v = half
+                uv = u * v
 
-             m(1) = n(1); m(2) = n(2); m(3) = n(3); m(4) = n(4)
-             m(5) = n(5); m(6) = n(6); m(7) = n(7); m(8) = n(8)
+                xf(1) = x1(1) + u * x21(1) + v * x41(1) + uv * x3142(1)
+                xf(2) = x1(2) + u * x21(2) + v * x41(2) + uv * x3142(2)
+                xf(3) = x1(3) + u * x21(3) + v * x41(3) + uv * x3142(3)
 
-             weight(1) = (one - uu)*(one - vv)*(one - ww)
-             weight(2) =           uu *(one - vv)*(one - ww)
-             weight(3) =           uu *          vv *(one - ww)
-             weight(4) = (one - uu)*          vv *(one - ww)
-             weight(5) = (one - uu)*(one - vv)*          ww
-             weight(6) =           uu *(one - vv)*          ww
-             weight(7) =           uu *          vv *          ww
-             weight(8) = (one - uu)*          vv *          ww
-          endif
+                ! Newton loop to determine the point on the surface,
+                ! which minimizes the distance to the given coordinate.
 
-       end select
+                NewtonQuads: do ll = 1, iterMax
 
-    enddo BBoxLoop
+                    ! Store the current values of u and v for a stop
+                    ! criterion later on.
 
-    ! Check if an element was found. As all the minimum distance
-    ! searches are initialized by the calling routine (to support
-    ! periodic searches) this is not always the case.
+                    uold = u
+                    vold = v
 
-    if( elementFound ) then
+                    ! Determine the vector vf from xf to given coordinate.
 
-       ! Store the interpolation information for this point.
-       ! First the integer info, i.e. the processor ID, element type
-       ! and local element ID.
+                    vf(1) = coor(1) - xf(1)
+                    vf(2) = coor(2) - xf(2)
+                    vf(3) = coor(3) - xf(3)
 
-       intInfo(1) = ADT%myID
-       intInfo(2) = ADT%elementType(kkk)
-       intInfo(3) = ADT%elementID(kkk)
+                    ! Determine the tangent vectors in u- and v-direction.
+                    ! Store these in a and b respectively.
 
-       ! The parametric weights. Note that the wall distance
-       ! squared, stored in the 4th position of uvw, already
-       ! contains the correct value.
+                    a(1) = x21(1) + v * x3142(1)
+                    a(2) = x21(2) + v * x3142(2)
+                    a(3) = x21(3) + v * x3142(3)
 
-       uvw(1) = uu
-       uvw(2) = vv
-       uvw(3) = ww
+                    b(1) = x41(1) + u * x3142(1)
+                    b(2) = x41(2) + u * x3142(2)
+                    b(3) = x41(3) + u * x3142(3)
 
-       ! The interpolated solution, if needed.
+                    ! Determine the normal vector of the face by taking the
+                    ! cross product of a and b. Afterwards this vector will
+                    ! be scaled to a unit vector.
 
-       do ll=1,nInterpol
-          ii = 4+ll
-          uvw(ii) = weight(1)*arrDonor(ll,m(1))
-          do i=2, nNodeElement
-             uvw(ii) = uvw(ii) + weight(i)*arrDonor(ll,m(i))
-          enddo
-       enddo
+                    norm(1) = a(2) * b(3) - a(3) * b(2)
+                    norm(2) = a(3) * b(1) - a(1) * b(3)
+                    norm(3) = a(1) * b(2) - a(2) * b(1)
 
-    endif
-
-  end subroutine minDistanceTreeSearchSinglePoint
-
-  subroutine intersectionTreeSearchSinglePoint(ADT, coor, &
-       intInfo, BB, frontLeaves, frontLeavesNew)
-    !
-    !        This routine is used in the ray casting approach to determine
-    !        if a given ray intersects any of the surface elements. The
-    !        purpose is to determine if a point of interest is inside
-    !        or outside the (closed) surface defined by the ADT.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        ADT:       ADT type whose ADT must be searched
-    !        coor(3):   The coordinate of the point to be searched.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        intInfo: Intersection info. The number of intersections we
-    !       *          found.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    type(adtType), intent(inout) :: ADT
+                    invLen = one / max(adtEps, sqrt(norm(1) * norm(1) &
+                                                    + norm(2) * norm(2) &
+                                                    + norm(3) * norm(3)))
 
-    real(kind=realType), dimension(3), intent(in) :: coor
-    integer(kind=intType), intent(out) :: intInfo
+                    norm(1) = norm(1) * invLen
+                    norm(2) = norm(2) * invLen
+                    norm(3) = norm(3) * invLen
 
-    integer(kind=intType), dimension(:), pointer :: BB
-    integer(kind=intType), dimension(:), pointer :: frontLeaves
-    integer(kind=intType), dimension(:), pointer :: frontLeavesNew
-    !
-    !       Local variables.
-    !
-    integer :: ierr
+                    ! Determine the projection of the vector vf onto
+                    ! the face.
 
-    integer(kind=intType) :: ii, kk, ll, mm, nn
-    integer(kind=intType) :: nBB, nFrontLeaves, nFrontLeavesNew
-    integer(kind=intType) :: nAllocBB, nAllocFront
-    integer(kind=intType) :: i, nNodeElement
-    real(kind=realType), dimension(:,:), pointer :: xBBox
-    logical :: elementFound
-    type(adtLeafType), dimension(:), pointer :: ADTree
+                    vn = vf(1) * norm(1) + vf(2) * norm(2) + vf(3) * norm(3)
+                    vt(1) = vf(1) - vn * norm(1)
+                    vt(2) = vf(2) - vn * norm(2)
+                    vt(3) = vf(3) - vn * norm(3)
 
-    ! Set some pointers to make the code more readable.
+                    ! The vector vt points from the current point on the
+                    ! face to the new point. However this new point lies on
+                    ! the plane determined by the vectors a and b, but not
+                    ! necessarily on the face itself. The new point on the
+                    ! face is obtained by projecting the point in the a-b
+                    ! plane onto the face. this can be done by determining
+                    ! the coefficients du and dv, such that vt = du*a + dv*b.
+                    ! To solve du and dv the vectors normal to a and b
+                    ! inside the plane ab are needed.
 
-    xBBox  => ADT%xBBox
-    ADTree => ADT%ADTree
+                    an(1) = a(2) * norm(3) - a(3) * norm(2)
+                    an(2) = a(3) * norm(1) - a(1) * norm(3)
+                    an(3) = a(1) * norm(2) - a(2) * norm(1)
 
-    ! Determine the sizes from the arrays we have been passed
+                    bn(1) = b(2) * norm(3) - b(3) * norm(2)
+                    bn(2) = b(3) * norm(1) - b(1) * norm(3)
+                    bn(3) = b(1) * norm(2) - b(2) * norm(1)
 
-    nAllocBB    = size(BB)
-    nAllocFront = size(frontLeaves)
+                    ! Solve du and dv. the clipping of vn should not be
+                    ! active, as this would mean that the vectors a and b
+                    ! are parallel. This corresponds to a quad degenerated
+                    ! to a line, which should not occur in the surface mesh.
 
-    ! Initialize the number of possible intersections to 0
+                    vn = a(1) * bn(1) + a(2) * bn(2) + a(3) * bn(3)
+                    vn = sign(max(adtEps, abs(vn)), vn)
+                    du = (vt(1) * bn(1) + vt(2) * bn(2) + vt(3) * bn(3)) / vn
 
-    intInfo = 0
-    !
-    !          Part 1. Traverse the tree and determine the target
-    !                  bounding boxes, which may contain the intersection
-    !
-    ! Start at the root, i.e. set the front leaf to the root leaf.
-    ! Also initialize the number of possible bounding boxes to 0.
+                    vn = b(1) * an(1) + b(2) * an(2) + b(3) * an(3)
+                    vn = sign(max(adtEps, abs(vn)), vn)
+                    dv = (vt(1) * an(1) + vt(2) * an(2) + vt(3) * an(3)) / vn
 
-    nBB = 0
+                    ! Determine the new parameter values uu and vv. These
+                    ! are limited to 0 <= (uu,vv) <= 1.
 
-    nFrontLeaves   = 1
-    frontLeaves(1) = 1
+                    u = u + du; u = min(one, max(zero, u))
+                    v = v + dv; v = min(one, max(zero, v))
 
-    treeTraversalLoop: do
+                    ! Determine the final values of the corrections.
 
-       ! Initialize the number of leaves for the new front, i.e.
-       ! the front of the next round, to 0.
+                    du = abs(u - uold)
+                    dv = abs(v - vold)
 
-       nFrontLeavesNew = 0
+                    ! Determine the new coordinates of the point xf.
 
-       ! Loop over the leaves of the current front.
+                    uv = u * v
+                    xf(1) = x1(1) + u * x21(1) + v * x41(1) + uv * x3142(1)
+                    xf(2) = x1(2) + u * x21(2) + v * x41(2) + uv * x3142(2)
+                    xf(3) = x1(3) + u * x21(3) + v * x41(3) + uv * x3142(3)
 
-       currentFrontLoop: do ii=1,nFrontLeaves
+                    ! Exit the loop if the update of the parametric
+                    ! weights is below the threshold
 
-          ! Store the ID of the leaf a bit easier and loop over
-          ! its two children.
+                    val = sqrt(du * du + dv * dv)
+                    if (val <= thresConv) exit NewtonQuads
 
-          ll = frontLeaves(ii)
+                end do NewtonQuads
 
-          childrenLoop: do mm=1,2
+                ! Compute the distance squared between the given
+                ! coordinate and the point xf.
 
-             ! Determine whether this child contains a bounding box
-             ! or a leaf of the next level.
+                dx = coor(1) - xf(1)
+                dy = coor(2) - xf(2)
+                dz = coor(3) - xf(3)
 
-             kk = ADTree(ll)%children(mm)
-             terminalTest: if(kk < 0) then
+                val = dx * dx + dy * dy + dz * dz
 
-                ! Child contains a bounding box. Check if the
-                ! ray is inside the bounding box.
+                ! If the distance squared is less than the current value
+                ! store the wall distance and interpolation info and
+                ! indicate that an element was found.
 
-                kk = -kk
-                if(  coor(1) <= xBBox(4,kk) .and. &
+                if (val < uvw(4)) then
+                    uvw(4) = val
+                    nNodeElement = 4
+                    elementFound = .true.
 
-                     coor(2) >= xBBox(2,kk) .and. &
-                     coor(2) <= xBBox(5,kk) .and. &
-                     coor(3) >= xBBox(3,kk) .and. &
-                     coor(3) <= xBBox(6,kk)) then
+                    kkk = kk; uu = u; vv = v; ww = w
+                    m(1) = n(1); m(2) = n(2); m(3) = n(3); m(4) = n(4)
 
-                   ! Ray intersectst he bounding box. That's all we
-                   ! wanted to know:
-                   intInfo = 1
-                   exit treeTraversalLoop
+                    weight(1) = (one - u) * (one - v)
+                    weight(2) = u * (one - v)
+                    weight(3) = u * v
+                    weight(4) = (one - u) * v
                 end if
-             else terminalTest
-
-                ! Child contains a leaf. Check if the coordinate is
-                ! inside the bounding box of the leaf.
-
-                if(coor(1) <= ADTree(kk)%xMax(4) .and. &
 
-                     coor(2) >= ADTree(kk)%xMin(2) .and. &
-                     coor(2) <= ADTree(kk)%xMax(5) .and. &
-                     coor(3) >= ADTree(kk)%xMin(3) .and. &
-                     coor(3) <= ADTree(kk)%xMax(6)) then
-
-                   ! Coordinate is inside the leaf. Store the leaf in
-                   ! the list for the new front.
-
-                   if(nFrontLeavesNew == nAllocFront) then
-                      i = nAllocFront
-                      call reallocPlus(frontLeavesNew, i, 250, ADT)
-                      call reallocPlus(frontLeaves, nAllocFront, 250, ADT)
-                   endif
-
-                   nFrontLeavesNew = nFrontLeavesNew + 1
-                   frontLeavesNew(nFrontLeavesNew) = kk
+                !=========================================================
+
+            case (adtTetrahedron)
+                call adtTerminate(ADT, "minDistanceTreeSearch", &
+                     "Minimum distance search for &
+                     &tetrahedra not implemented yet")
+
+                !===========================================================
+
+            case (adtPyramid)
+                call adtTerminate(ADT, "minDistanceTreeSearch", &
+                     "Minimum distance search for &
+                     &pyramids not implemented yet")
+
+                !===========================================================
+
+            case (adtPrism)
+                call adtTerminate(ADT, "minDistanceTreeSearch", &
+                     "Minimum distance search for &
+                     &prisms not implemented yet")
+
+                !===========================================================
+
+            case (adtHexahedron)
+
+                ! Determine the element ID and the corresponding
+                ! 8 node ID's.
+
+                ll = ADT%elementID(kk)
+                n(1) = ADT%hexaConn(1, ll)
+                n(2) = ADT%hexaConn(2, ll)
+                n(3) = ADT%hexaConn(3, ll)
+                n(4) = ADT%hexaConn(4, ll)
+                n(5) = ADT%hexaConn(5, ll)
+                n(6) = ADT%hexaConn(6, ll)
+                n(7) = ADT%hexaConn(7, ll)
+                n(8) = ADT%hexaConn(8, ll)
+
+                ! Call the subroutine minD2Hexa to do the work.
+
+                call minD2Hexa(coor(1:3), &
+                               ADT%coor(1:3, n(1)), &
+                               ADT%coor(1:3, n(2)), &
+                               ADT%coor(1:3, n(3)), &
+                               ADT%coor(1:3, n(4)), &
+                               ADT%coor(1:3, n(5)), &
+                               ADT%coor(1:3, n(6)), &
+                               ADT%coor(1:3, n(7)), &
+                               ADT%coor(1:3, n(8)), &
+                               val, chi, ierr)
+
+                ! If the distance squared is less than the current value
+                ! store the wall distance and interpolation info and
+                ! indicate that an element was found.
+
+                if (val < uvw(4)) then
+                    uvw(4) = val
+                    nNodeElement = 8
+                    elementFound = .true.
+
+                    kkk = kk; 
+                    uu = chi(1); vv = chi(2); ww = chi(3)
+
+                    m(1) = n(1); m(2) = n(2); m(3) = n(3); m(4) = n(4)
+                    m(5) = n(5); m(6) = n(6); m(7) = n(7); m(8) = n(8)
+
+                    weight(1) = (one - uu) * (one - vv) * (one - ww)
+                    weight(2) = uu * (one - vv) * (one - ww)
+                    weight(3) = uu * vv * (one - ww)
+                    weight(4) = (one - uu) * vv * (one - ww)
+                    weight(5) = (one - uu) * (one - vv) * ww
+                    weight(6) = uu * (one - vv) * ww
+                    weight(7) = uu * vv * ww
+                    weight(8) = (one - uu) * vv * ww
                 end if
 
-             endif terminalTest
+            end select
+
+        end do BBoxLoop
+
+        ! Check if an element was found. As all the minimum distance
+        ! searches are initialized by the calling routine (to support
+        ! periodic searches) this is not always the case.
+
+        if (elementFound) then
+
+            ! Store the interpolation information for this point.
+            ! First the integer info, i.e. the processor ID, element type
+            ! and local element ID.
 
-          enddo childrenLoop
+            intInfo(1) = ADT%myID
+            intInfo(2) = ADT%elementType(kkk)
+            intInfo(3) = ADT%elementID(kkk)
 
-       enddo currentFrontLoop
+            ! The parametric weights. Note that the wall distance
+            ! squared, stored in the 4th position of uvw, already
+            ! contains the correct value.
 
-       ! End of the loop over the current front. If the new front
-       ! is empty the entire tree has been traversed and an exit is
-       ! made from the corresponding loop.
+            uvw(1) = uu
+            uvw(2) = vv
+            uvw(3) = ww
 
-       if(nFrontLeavesNew == 0) then
-          exit treeTraversalLoop
-       end if
-       ! Copy the data of the new front leaves into the current
-       ! front for the next round.
+            ! The interpolated solution, if needed.
 
-       nFrontLeaves = nFrontLeavesNew
-       do ll=1,nFrontLeaves
-          frontLeaves(ll) = frontLeavesNew(ll)
-       enddo
+            do ll = 1, nInterpol
+                ii = 4 + ll
+                uvw(ii) = weight(1) * arrDonor(ll, m(1))
+                do i = 2, nNodeElement
+                    uvw(ii) = uvw(ii) + weight(i) * arrDonor(ll, m(i))
+                end do
+            end do
 
-    enddo treeTraversalLoop
-    !
-    !          Part 2: Loop over the selected bounding boxes and check if
-    !                  the corresponding elements contain the point.
-    !
-    !intInfo = nBB
-    ! elementFound = .false.
+        end if
 
-    ! BBoxLoop: do mm=1,nBB
+    end subroutine minDistanceTreeSearchSinglePoint
 
-    !   ! Determine the element type stored in this bounding box.
+    subroutine intersectionTreeSearchSinglePoint(ADT, coor, &
+                                                 intInfo, BB, frontLeaves, frontLeavesNew)
+        !
+        !        This routine is used in the ray casting approach to determine
+        !        if a given ray intersects any of the surface elements. The
+        !        purpose is to determine if a point of interest is inside
+        !        or outside the (closed) surface defined by the ADT.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        ADT:       ADT type whose ADT must be searched
+        !        coor(3):   The coordinate of the point to be searched.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        intInfo: Intersection info. The number of intersections we
+        !       *          found.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        type(adtType), intent(inout) :: ADT
 
-    !   kk = BB(mm)
-    !   select case (ADT%elementType(kk))
+        real(kind=realType), dimension(3), intent(in) :: coor
+        integer(kind=intType), intent(out) :: intInfo
 
-    !     case (adtQuadrilateral)
+        integer(kind=intType), dimension(:), pointer :: BB
+        integer(kind=intType), dimension(:), pointer :: frontLeaves
+        integer(kind=intType), dimension(:), pointer :: frontLeavesNew
+        !
+        !       Local variables.
+        !
+        integer :: ierr
 
+        integer(kind=intType) :: ii, kk, ll, mm, nn
+        integer(kind=intType) :: nBB, nFrontLeaves, nFrontLeavesNew
+        integer(kind=intType) :: nAllocBB, nAllocFront
+        integer(kind=intType) :: i, nNodeElement
+        real(kind=realType), dimension(:, :), pointer :: xBBox
+        logical :: elementFound
+        type(adtLeafType), dimension(:), pointer :: ADTree
 
-    !     !=========================================================
+        ! Set some pointers to make the code more readable.
 
-    !     case (adtTriangle)
+        xBBox => ADT%xBBox
+        ADTree => ADT%ADTree
 
+        ! Determine the sizes from the arrays we have been passed
 
+        nAllocBB = size(BB)
+        nAllocFront = size(frontLeaves)
 
-    !   end select
+        ! Initialize the number of possible intersections to 0
 
-    ! enddo BBoxLoop
+        intInfo = 0
+        !
+        !          Part 1. Traverse the tree and determine the target
+        !                  bounding boxes, which may contain the intersection
+        !
+        ! Start at the root, i.e. set the front leaf to the root leaf.
+        ! Also initialize the number of possible bounding boxes to 0.
 
-  end subroutine intersectionTreeSearchSinglePoint
+        nBB = 0
 
-  subroutine minD2Hexa(xP,x1,x2,x3,x4,x5,x6,x7,x8,d2,chi,iErr)
-    !
-    !      Subroutine to compute a fail-safe minimum distance computation
-    !      between a point, P, and a hexahedral element.  This subroutine
-    !      can provide the minimum distance whether P is inside or
-    !      outside of the hexahedral element.  If P is inside the element
-    !      the distance returned is zero, while if P is outside of the
-    !      element, the distance returned is the minimum distance between
-    !      P and any of the bounding faces of the hexahedral element.
-    !      The basic idea of this subroutine is to perform a
-    !      bound-constrained minimization of the distance between the
-    !      point P and a point inside the element given by parametric
-    !      coordinates chi(3) using a modified Newton step bound
-    !      constrained optimizer.
-    !      For more details of the optimization algorithms, a good
-    !      reference is:
-    !      Nocedal, J. and Wright, S. J., Numerical Optimization,
-    !      Springer, 1999.
-    !      Subroutine arguments:
-    !      xp(3)        - The Cartesian coordinates of point P.
-    !      x1(3)-x8(3)  - The Cartesian coordinates of the 8 nodes that
-    !                      make up the hexahedron, in the order specified
-    !                      in the CHIMPS standard.
-    !      d2           - The squared of the minimum distance between the
-    !                      point P and the hexahedral element.
-    !      chi(3)       - Parametric coordinates of the point that
-    !                      belongs to the hexahedron where the minimum
-    !                      distance has been found.  If P is inside the
-    !                      element, 0<(ksi,eta,zeta)<1, while if P is
-    !                      strictly outside of the element, then one or
-    !                      more of the values of (ksi,eta,zeta) will be
-    !                      exactly zero or one.
-    !      iErr         - Output status of this subroutine:
-    !                      iErr =  0, Proper minimum found.
-    !                      iErr = -1, Distance minimization failed.
-    !
-    use precision
+        nFrontLeaves = 1
+        frontLeaves(1) = 1
 
-    implicit none
-    !
-    !     Subroutine arguments.
-    !
-    real(kind=realType), dimension(3), intent(in) :: xP
-    real(kind=realType), dimension(3), intent(in) :: x1, x2, x3, x4
-    real(kind=realType), dimension(3), intent(in) :: x5, x6, x7, x8
-
-    integer(kind=intType), intent(out) :: iErr
-
-    real(kind=realType), intent(out) :: d2
-    real(kind=realType), dimension(3), intent(out) :: chi
-    !
-    !     Local variables.
-    !
-    integer(kind=intType) , parameter :: maxIt = 30
-    integer(kind=intType) :: i, itCount = 0
-    integer(kind=intType), dimension(3) :: actSet, chiGradConv
-
-    real(kind=realType) :: inactGradNorm, normDeltaChi, x0, y0, z0
-    real(kind=realType), parameter :: gradTol = 1.0e-14_realType
-    real(kind=realType), parameter :: deltaChiTol = 1.0e-14_realType
-    real(kind=realType), dimension(3) :: deltaChi, actualDeltaChi
-    real(kind=realType), dimension(3) :: lwrBnd, uppBnd
-    real(kind=realType), dimension(3) :: grad, oldChi
-    real(kind=realType), dimension(3,3) :: hess
-
-    logical :: convDeltaChi, convGradD2
-    !
-    !      Initialization section.
-    !
-
-    ! Setup initial values for the parametric coordinates of the
-    ! minimum distance point.  One may be able to do better than this
-    ! in the future, but this is probably a good guess.
-
-    chi(1) = 0.5_realType
-    chi(2) = 0.5_realType
-    chi(3) = 0.5_realType
-
-    ! Initialize the active set array, actSet(i), i=1,2,3
-    ! actSet(i) =  0 if bound constraint i is inactive
-    ! actSet(i) = +1 if bound constraint i is active at upper bound
-    ! actSet(i) = -1 if bound constraint i is active at lower bound
-
-    actSet(:) = 0
-
-    ! Initialize the upper and lower bounds for all chi variables
-
-    lwrBnd(:) = 0.0_realType
-    uppBnd(:) = 1.0_realType
-
-    ! Initialize actualDeltaChi (step size) to a large value so that the
-    ! convergence criteria for the step size is guaranteed not to pass
-    ! on the first iteration.
-
-    actualDeltaChi(:) = 1.0_realType
-
-    !
-    !      Main iteration loop.
-    !
-    itCount = 0
-
-    IterLoop: do while (itCount <= maxIt)
-
-       ! Increment iteration counter
-
-       itCount = itCount +1
-
-       !
-       !      Compute the gradient of d2
-       !
-
-       call gradD2Hexa(xP,x1,x2,x3,x4,x5,x6,x7,x8,chi,x0,y0,z0,grad,iErr)
-
-       !
-       !      Convergence test
-       !
-
-       convDeltaChi = .false.
-       convGradD2   = .false.
-
-       ! Convergence test for step size
-
-       normDeltaChi = sqrt(actualDeltaChi(1)**2 +actualDeltaChi(2)**2 +actualDeltaChi(3)**2)
-
-       if (normDeltaChi < deltaChiTol) convDeltaChi = .true.
-
-       ! Convergence test for the gradient.  Note that this gradient
-       ! test is such that, in order to pass it, the following must
-       ! be true:
-       !
-       ! 1) \frac{\partial d2}{\partial \chi_i} > 0 for i in the active
-       !     set at the lower bound, actSet(i) = -1
-       ! 2) \frac{\partial d2}{\partial \chi_i} < 0 for i in the active
-       !     set at the upper bound, actSet(i) = +1
-       ! 3) norm of components of the gradient in the inactive set must
-       !     be smaller than gradTol
-       !
-
-       inactGradNorm  = 0.0_realType
-       chiGradConv(:) = 0
-
-       do i=1,3
-
-          ! If this component of chi is in the inactive set, accumulate
-          ! the total value of the gradient and deal with it later.
-
-          if (actSet(i) == 0) inactGradNorm = inactGradNorm +grad(i)**2
-
-          ! If this component of chi is active at a lower bound, check
-          ! for the convergence criterion.  Also deactivate the constraint
-          ! if the gradient is pointing in the proper direction.
-
-          if (actSet(i) == -1) then
-             if (grad(i) > 0.0_realType) then
-                chiGradConv(i) = 1
-             else
-                actSet(i) = 0
-             end if
-          end if
-
-          ! If this component of chi is active at an upper bound, check
-          ! for the convergence criterion.
+        treeTraversalLoop: do
 
-          if (actSet(i) ==  1) then
-             if (grad(i) < 0.0_realType) then
-                chiGradConv(i) = 1
-             else
-                actSet(i) = 0
-             end if
-          end if
+            ! Initialize the number of leaves for the new front, i.e.
+            ! the front of the next round, to 0.
 
-       end do
+            nFrontLeavesNew = 0
 
-       ! Check for convergence on the accumulated values of the inactive
-       ! components of the gradient.
+            ! Loop over the leaves of the current front.
 
-       if (inactGradNorm < gradTol) then
-          do i=1,3
-             if (actSet(i) == 0) chiGradConv(i) = 1
-          end do
-       end if
+            currentFrontLoop: do ii = 1, nFrontLeaves
 
-       if (sum(chiGradConv(1:3)) == 3) convGradD2 = .true.
+                ! Store the ID of the leaf a bit easier and loop over
+                ! its two children.
 
-       ! Test for convergence using both criteria
+                ll = frontLeaves(ii)
 
-       if (convDeltaChi .and. convGradD2) exit IterLoop
+                childrenLoop: do mm = 1, 2
 
-       !
-       !      Compute the Hessian of d2
-       !
+                    ! Determine whether this child contains a bounding box
+                    ! or a leaf of the next level.
 
-       call hessD2Hexa(xP,x1,x2,x3,x4,x5,x6,x7,x8,chi,hess,iErr)
+                    kk = ADTree(ll)%children(mm)
+                    terminalTest: if (kk < 0) then
+
+                        ! Child contains a bounding box. Check if the
+                        ! ray is inside the bounding box.
+
+                        kk = -kk
+                        if (coor(1) <= xBBox(4, kk) .and. &
+                            coor(2) >= xBBox(2, kk) .and. &
+                            coor(2) <= xBBox(5, kk) .and. &
+                            coor(3) >= xBBox(3, kk) .and. &
+                            coor(3) <= xBBox(6, kk)) then
+
+                            ! Ray intersectst he bounding box. That's all we
+                            ! wanted to know:
+                            intInfo = 1
+                            exit treeTraversalLoop
+                        end if
+                        else terminalTest
+
+                        ! Child contains a leaf. Check if the coordinate is
+                        ! inside the bounding box of the leaf.
+
+                        if (coor(1) <= ADTree(kk)%xMax(4) .and. &
+                            coor(2) >= ADTree(kk)%xMin(2) .and. &
+                            coor(2) <= ADTree(kk)%xMax(5) .and. &
+                            coor(3) >= ADTree(kk)%xMin(3) .and. &
+                            coor(3) <= ADTree(kk)%xMax(6)) then
+
+                            ! Coordinate is inside the leaf. Store the leaf in
+                            ! the list for the new front.
+
+                            if (nFrontLeavesNew == nAllocFront) then
+                                i = nAllocFront
+                                call reallocPlus(frontLeavesNew, i, 250, ADT)
+                                call reallocPlus(frontLeaves, nAllocFront, 250, ADT)
+                            end if
+
+                            nFrontLeavesNew = nFrontLeavesNew + 1
+                            frontLeavesNew(nFrontLeavesNew) = kk
+                        end if
+
+                    end if terminalTest
+
+                end do childrenLoop
+
+            end do currentFrontLoop
+
+            ! End of the loop over the current front. If the new front
+            ! is empty the entire tree has been traversed and an exit is
+            ! made from the corresponding loop.
+
+            if (nFrontLeavesNew == 0) then
+                exit treeTraversalLoop
+            end if
+            ! Copy the data of the new front leaves into the current
+            ! front for the next round.
+
+            nFrontLeaves = nFrontLeavesNew
+            do ll = 1, nFrontLeaves
+                frontLeaves(ll) = frontLeavesNew(ll)
+            end do
+
+        end do treeTraversalLoop
+        !
+        !          Part 2: Loop over the selected bounding boxes and check if
+        !                  the corresponding elements contain the point.
+        !
+        !intInfo = nBB
+        ! elementFound = .false.
+
+        ! BBoxLoop: do mm=1,nBB
+
+        !   ! Determine the element type stored in this bounding box.
+
+        !   kk = BB(mm)
+        !   select case (ADT%elementType(kk))
+
+        !     case (adtQuadrilateral)
+
+        !     !=========================================================
+
+        !     case (adtTriangle)
+
+        !   end select
+
+        ! enddo BBoxLoop
+
+    end subroutine intersectionTreeSearchSinglePoint
+
+    subroutine minD2Hexa(xP, x1, x2, x3, x4, x5, x6, x7, x8, d2, chi, iErr)
+        !
+        !      Subroutine to compute a fail-safe minimum distance computation
+        !      between a point, P, and a hexahedral element.  This subroutine
+        !      can provide the minimum distance whether P is inside or
+        !      outside of the hexahedral element.  If P is inside the element
+        !      the distance returned is zero, while if P is outside of the
+        !      element, the distance returned is the minimum distance between
+        !      P and any of the bounding faces of the hexahedral element.
+        !      The basic idea of this subroutine is to perform a
+        !      bound-constrained minimization of the distance between the
+        !      point P and a point inside the element given by parametric
+        !      coordinates chi(3) using a modified Newton step bound
+        !      constrained optimizer.
+        !      For more details of the optimization algorithms, a good
+        !      reference is:
+        !      Nocedal, J. and Wright, S. J., Numerical Optimization,
+        !      Springer, 1999.
+        !      Subroutine arguments:
+        !      xp(3)        - The Cartesian coordinates of point P.
+        !      x1(3)-x8(3)  - The Cartesian coordinates of the 8 nodes that
+        !                      make up the hexahedron, in the order specified
+        !                      in the CHIMPS standard.
+        !      d2           - The squared of the minimum distance between the
+        !                      point P and the hexahedral element.
+        !      chi(3)       - Parametric coordinates of the point that
+        !                      belongs to the hexahedron where the minimum
+        !                      distance has been found.  If P is inside the
+        !                      element, 0<(ksi,eta,zeta)<1, while if P is
+        !                      strictly outside of the element, then one or
+        !                      more of the values of (ksi,eta,zeta) will be
+        !                      exactly zero or one.
+        !      iErr         - Output status of this subroutine:
+        !                      iErr =  0, Proper minimum found.
+        !                      iErr = -1, Distance minimization failed.
+        !
+        use precision
+
+        implicit none
+        !
+        !     Subroutine arguments.
+        !
+        real(kind=realType), dimension(3), intent(in) :: xP
+        real(kind=realType), dimension(3), intent(in) :: x1, x2, x3, x4
+        real(kind=realType), dimension(3), intent(in) :: x5, x6, x7, x8
+
+        integer(kind=intType), intent(out) :: iErr
+
+        real(kind=realType), intent(out) :: d2
+        real(kind=realType), dimension(3), intent(out) :: chi
+        !
+        !     Local variables.
+        !
+        integer(kind=intType), parameter :: maxIt = 30
+        integer(kind=intType) :: i, itCount = 0
+        integer(kind=intType), dimension(3) :: actSet, chiGradConv
+
+        real(kind=realType) :: inactGradNorm, normDeltaChi, x0, y0, z0
+        real(kind=realType), parameter :: gradTol = 1.0e-14_realType
+        real(kind=realType), parameter :: deltaChiTol = 1.0e-14_realType
+        real(kind=realType), dimension(3) :: deltaChi, actualDeltaChi
+        real(kind=realType), dimension(3) :: lwrBnd, uppBnd
+        real(kind=realType), dimension(3) :: grad, oldChi
+        real(kind=realType), dimension(3, 3) :: hess
+
+        logical :: convDeltaChi, convGradD2
+        !
+        !      Initialization section.
+        !
+
+        ! Setup initial values for the parametric coordinates of the
+        ! minimum distance point.  One may be able to do better than this
+        ! in the future, but this is probably a good guess.
+
+        chi(1) = 0.5_realType
+        chi(2) = 0.5_realType
+        chi(3) = 0.5_realType
+
+        ! Initialize the active set array, actSet(i), i=1,2,3
+        ! actSet(i) =  0 if bound constraint i is inactive
+        ! actSet(i) = +1 if bound constraint i is active at upper bound
+        ! actSet(i) = -1 if bound constraint i is active at lower bound
+
+        actSet(:) = 0
+
+        ! Initialize the upper and lower bounds for all chi variables
+
+        lwrBnd(:) = 0.0_realType
+        uppBnd(:) = 1.0_realType
+
+        ! Initialize actualDeltaChi (step size) to a large value so that the
+        ! convergence criteria for the step size is guaranteed not to pass
+        ! on the first iteration.
+
+        actualDeltaChi(:) = 1.0_realType
+
+        !
+        !      Main iteration loop.
+        !
+        itCount = 0
+
+        IterLoop: do while (itCount <= maxIt)
+
+            ! Increment iteration counter
+
+            itCount = itCount + 1
+
+            !
+            !      Compute the gradient of d2
+            !
+
+            call gradD2Hexa(xP, x1, x2, x3, x4, x5, x6, x7, x8, chi, x0, y0, z0, grad, iErr)
+
+            !
+            !      Convergence test
+            !
+
+            convDeltaChi = .false.
+            convGradD2 = .false.
+
+            ! Convergence test for step size
+
+            normDeltaChi = sqrt(actualDeltaChi(1)**2 + actualDeltaChi(2)**2 + actualDeltaChi(3)**2)
+
+            if (normDeltaChi < deltaChiTol) convDeltaChi = .true.
+
+            ! Convergence test for the gradient.  Note that this gradient
+            ! test is such that, in order to pass it, the following must
+            ! be true:
+            !
+            ! 1) \frac{\partial d2}{\partial \chi_i} > 0 for i in the active
+            !     set at the lower bound, actSet(i) = -1
+            ! 2) \frac{\partial d2}{\partial \chi_i} < 0 for i in the active
+            !     set at the upper bound, actSet(i) = +1
+            ! 3) norm of components of the gradient in the inactive set must
+            !     be smaller than gradTol
+            !
 
-       !
-       !      Compute the Newton step
-       !
+            inactGradNorm = 0.0_realType
+            chiGradConv(:) = 0
 
-       call newtonStep(hess,grad,deltaChi,iErr)
+            do i = 1, 3
 
-       !
-       !      Update the current guess (appropriately clipped at
-       !      the bounds) and update the active set array as needed
-       !
+                ! If this component of chi is in the inactive set, accumulate
+                ! the total value of the gradient and deal with it later.
 
-       ! Loop over the components of chi
+                if (actSet(i) == 0) inactGradNorm = inactGradNorm + grad(i)**2
 
-       oldChi(:) = chi(:)
+                ! If this component of chi is active at a lower bound, check
+                ! for the convergence criterion.  Also deactivate the constraint
+                ! if the gradient is pointing in the proper direction.
 
-       do i=1,3
-
-          ! Update only the components of chi that were not active
-
-          if (actSet(i) == 0) then
-
-             chi(i) = chi(i) +deltaChi(i)
-
-             ! Check to see this degree of freedom has become
-             ! actively constrained at either an upper or
-             ! lower bound and update active set.
-
-             if (chi(i) > uppBnd(i)) then
-                chi(i)    = uppBnd(i)
-                actSet(i) = 1
-             else if (chi(i) < lwrBnd(i)) then
-                chi(i)    = lwrBnd(i)
-                actSet(i) = -1
-             end if
-          end if
-       end do
-
-       actualDeltaChi(:) = chi(:) -oldChi(:)
-    end do IterLoop
-
-    !
-    !      Return the results to the calling function and print info
-    !      for debugging purposes.
-    !
+                if (actSet(i) == -1) then
+                    if (grad(i) > 0.0_realType) then
+                        chiGradConv(i) = 1
+                    else
+                        actSet(i) = 0
+                    end if
+                end if
 
-    ! Compute the minimum distance (squared) that the algorithm found
+                ! If this component of chi is active at an upper bound, check
+                ! for the convergence criterion.
 
-    d2 = (xP(1) -x0)**2 +(xP(2) -y0)**2 +(xP(3) -z0)**2
+                if (actSet(i) == 1) then
+                    if (grad(i) < 0.0_realType) then
+                        chiGradConv(i) = 1
+                    else
+                        actSet(i) = 0
+                    end if
+                end if
 
-    ! Print some stuff out to the screen for debugging purposes
+            end do
 
-    ! write(*,*)
-    ! write(*,*) 'Results of minD2Hexa'
-    ! write(*, coordinateFormat)'Point P                = (',xP(1),xP(2),xP(3),' )'
-    ! write(*, coordinateFormat)'Found point            = (',x0,y0,z0,' )'
-    ! write(*, coordinateFormat)'Parametric coordinates = (',chi(1),chi(2),chi(3),' )'
-    ! write(*, distanceFormat)'Minimum distance       =',sqrt(d2)
-    ! write(*, iterationFormat)'Number of iterations   =',itCount
+            ! Check for convergence on the accumulated values of the inactive
+            ! components of the gradient.
 
-    ! character(len=maxStringLen) :: iterationFormat = '(A, 1x, i3)'
-    ! character(len=maxStringLen) :: coordinateFormat = '(A, 3f10.6, A)'
-    ! character(len=maxStringLen) :: distanceFormat = '(A, f20.17, A)'
+            if (inactGradNorm < gradTol) then
+                do i = 1, 3
+                    if (actSet(i) == 0) chiGradConv(i) = 1
+                end do
+            end if
 
-    return
+            if (sum(chiGradConv(1:3)) == 3) convGradD2 = .true.
 
-  end subroutine minD2Hexa
+            ! Test for convergence using both criteria
 
-  subroutine newtonStep(hess,grad,step,iErr)
-    !
-    !      Compute the Newton step given by the Hessian matrix and the
-    !      gradient vector of the distance squared function.
-    !
-    use precision
+            if (convDeltaChi .and. convGradD2) exit IterLoop
 
-    implicit none
-    !
-    !     Subroutine arguments.
-    !
-    real(kind=realType), dimension(3),   intent(in)  :: grad
-    real(kind=realType), dimension(3),  intent(out)  :: step
-    real(kind=realType), dimension(3,3), intent(in)  :: hess
+            !
+            !      Compute the Hessian of d2
+            !
 
-    integer(kind=intType), intent(out) :: iErr
-
-    !
-    !     Local variables.
-    !
-    real(kind=realType) :: determinant
+            call hessD2Hexa(xP, x1, x2, x3, x4, x5, x6, x7, x8, chi, hess, iErr)
 
-    !
-    !      Compute the Newton step as the solution of the problem
-    !      Hessian . step = -grad
-    !      using simple Cramer's rule
-    !
-    ! Compute the determinant of the Hessian Matrix
+            !
+            !      Compute the Newton step
+            !
 
-    determinant = hess(1,1)*(hess(2,2)*hess(3,3)-hess(2,3)*hess(3,2)) &
-         +hess(1,2)*(hess(2,3)*hess(3,1)-hess(2,1)*hess(3,3)) &
-         +hess(1,3)*(hess(2,1)*hess(3,2)-hess(2,2)*hess(3,1))
+            call newtonStep(hess, grad, deltaChi, iErr)
 
-    ! First component of the step
+            !
+            !      Update the current guess (appropriately clipped at
+            !      the bounds) and update the active set array as needed
+            !
 
-    step(1) = grad(1)  *(hess(2,2)*hess(3,3)-hess(2,3)*hess(3,2)) &
-         +hess(1,2)*(hess(2,3)*grad(3)  -grad(2)  *hess(3,3)) &
-         +hess(1,3)*(grad(2)  *hess(3,2)-hess(2,2)*grad(3)  )
-    step(1) = -step(1)/determinant
+            ! Loop over the components of chi
 
-    ! Second component of the step
+            oldChi(:) = chi(:)
 
-    step(2) = hess(1,1)*(grad(2)  *hess(3,3)-hess(2,3)*grad(3)  ) &
-         +grad(1)  *(hess(2,3)*hess(3,1)-hess(2,1)*hess(3,3)) &
-         +hess(1,3)*(hess(2,1)*grad(3)  -grad(2)  *hess(3,1))
-    step(2) = -step(2)/determinant
+            do i = 1, 3
 
-    ! First component of the step
+                ! Update only the components of chi that were not active
 
-    step(3) = hess(1,1)*(hess(2,2)*grad(3)  -grad(2)  *hess(3,2)) &
-         +hess(1,2)*(grad(2)  *hess(3,1)-hess(2,1)*grad(3)  ) &
-         +grad(1)  *(hess(2,1)*hess(3,2)-hess(2,2)*hess(3,1))
-    step(3) = -step(3)/determinant
+                if (actSet(i) == 0) then
 
-    ! Return iErr = 0 for the time being.
+                    chi(i) = chi(i) + deltaChi(i)
 
-    iErr = 0
+                    ! Check to see this degree of freedom has become
+                    ! actively constrained at either an upper or
+                    ! lower bound and update active set.
 
-    return
+                    if (chi(i) > uppBnd(i)) then
+                        chi(i) = uppBnd(i)
+                        actSet(i) = 1
+                    else if (chi(i) < lwrBnd(i)) then
+                        chi(i) = lwrBnd(i)
+                        actSet(i) = -1
+                    end if
+                end if
+            end do
 
-  end subroutine newtonStep
+            actualDeltaChi(:) = chi(:) - oldChi(:)
+        end do IterLoop
 
-  subroutine hessD2Hexa(xP,x1,x2,x3,x4,x5,x6,x7,x8,chi,hess,iErr)
-    !
-    !      Compute the Hessian of the square of the distance between the
-    !      point xP, and the actual point in the hexahedron represented
-    !      by chi(1:3).
-    !
-    use precision
+        !
+        !      Return the results to the calling function and print info
+        !      for debugging purposes.
+        !
 
-    implicit none
-    !
-    !     Subroutine arguments.
-    !
-    real(kind=realType), dimension(3), intent(in)  :: xP, chi
-    real(kind=realType), dimension(3), intent(in)  :: x1, x2, x3, x4
-    real(kind=realType), dimension(3), intent(in)  :: x5, x6, x7, x8
-    real(kind=realType), dimension(3,3), intent(out) :: hess
+        ! Compute the minimum distance (squared) that the algorithm found
 
-    integer(kind=intType), intent(out) :: iErr
+        d2 = (xP(1) - x0)**2 + (xP(2) - y0)**2 + (xP(3) - z0)**2
 
-    !
-    !     Local variables.
-    !
-    integer(kind=intType) :: i
-
-    real(kind=realType) :: ksi, eta, zeta, x0, y0, z0
-    real(kind=realType), dimension(8,3) :: alpha
-    real(kind=realType) :: dxdksi, dxdeta, dxdzeta
-    real(kind=realType) :: dydksi, dydeta, dydzeta
-    real(kind=realType) :: dzdksi, dzdeta, dzdzeta
-    real(kind=realType) :: d2xdksideta, d2xdksidzeta, d2xdetadzeta
-    real(kind=realType) :: d2ydksideta, d2ydksidzeta, d2ydetadzeta
-    real(kind=realType) :: d2zdksideta, d2zdksidzeta, d2zdetadzeta
-
-    ! Initialize the parametric coordinates with a more recognizable
-    ! name.
-
-    ksi  = chi(1)
-    eta  = chi(2)
-    zeta = chi(3)
-
-    ! Initialize the alpha array (obtained by regrouping terms in the
-    ! parametric expansion of the (ksi,eta,zeta)->(x,y,z) mapping.
-    ! The second index of the alpha array corresponds to the x, y, or
-    ! z coordinate mapping.
-
-    do i=1,3
-       alpha(1,i) = x1(i)
-       alpha(2,i) = x2(i) -x1(i)
-       alpha(3,i) = x4(i) -x1(i)
-       alpha(4,i) = x5(i) -x1(i)
-       alpha(5,i) = x3(i) -x2(i) +x1(i) -x4(i)
-       alpha(6,i) = x6(i) -x5(i) +x1(i) -x2(i)
-       alpha(7,i) = x8(i) -x5(i) +x1(i) -x4(i)
-       alpha(8,i) = x7(i) -x8(i) +x5(i) -x6(i) +x4(i) -x3(i) +x2(i) -x1(i)
-    end do
-
-    ! Compute the value of the partial derivatives of x, y, and z with
-    ! respect to ksi, eta, and zeta
-
-    dxdksi = alpha(2,1) +alpha(5,1)*eta +alpha(6,1)*zeta +alpha(8,1)*eta*zeta
-    dydksi = alpha(2,2) +alpha(5,2)*eta +alpha(6,2)*zeta +alpha(8,2)*eta*zeta
-    dzdksi = alpha(2,3) +alpha(5,3)*eta +alpha(6,3)*zeta +alpha(8,3)*eta*zeta
-
-    dxdeta = alpha(3,1) +alpha(5,1)*ksi +alpha(7,1)*zeta +alpha(8,1)*ksi*zeta
-    dydeta = alpha(3,2) +alpha(5,2)*ksi +alpha(7,2)*zeta +alpha(8,2)*ksi*zeta
-    dzdeta = alpha(3,3) +alpha(5,3)*ksi +alpha(7,3)*zeta +alpha(8,3)*ksi*zeta
-
-    dxdzeta = alpha(4,1) +alpha(6,1)*ksi +alpha(7,1)*eta +alpha(8,1)*ksi*eta
-    dydzeta = alpha(4,2) +alpha(6,2)*ksi +alpha(7,2)*eta +alpha(8,2)*ksi*eta
-    dzdzeta = alpha(4,3) +alpha(6,3)*ksi +alpha(7,3)*eta +alpha(8,3)*ksi*eta
-
-    ! Compute the value of the non-zero partial derivatives of x, y, and z
-    ! with respect to ksi, eta, and zeta.  Note that the second derivatives
-    ! of x, y, and z with respect to ksi2, eta2, and zeta2 are identically
-    ! zero
-
-    d2xdksideta  = alpha(5,1) +alpha(8,1)*zeta
-    d2ydksideta  = alpha(5,2) +alpha(8,2)*zeta
-    d2zdksideta  = alpha(5,3) +alpha(8,3)*zeta
-
-    d2xdksidzeta = alpha(6,1) +alpha(8,1)*eta
-    d2ydksidzeta = alpha(6,2) +alpha(8,2)*eta
-    d2zdksidzeta = alpha(6,3) +alpha(8,3)*eta
-
-    d2xdetadzeta = alpha(7,1) +alpha(8,1)*ksi
-    d2ydetadzeta = alpha(7,2) +alpha(8,2)*ksi
-    d2zdetadzeta = alpha(7,3) +alpha(8,3)*ksi
-
-    ! Compute the actual location of the point
-
-    x0 = alpha(1,1) +alpha(2,1)*ksi +alpha(3,1)*eta +alpha(4,1)*zeta            &
-         +alpha(5,1)*ksi*eta +alpha(6,1)*ksi*zeta +alpha(7,1)*eta*zeta           &
-         +alpha(8,1)*ksi*eta*zeta
-    y0 = alpha(1,2) +alpha(2,2)*ksi +alpha(3,2)*eta +alpha(4,2)*zeta            &
-         +alpha(5,2)*ksi*eta +alpha(6,2)*ksi*zeta +alpha(7,2)*eta*zeta           &
-         +alpha(8,2)*ksi*eta*zeta
-    z0 = alpha(1,3) +alpha(2,3)*ksi +alpha(3,3)*eta +alpha(4,3)*zeta            &
-         +alpha(5,3)*ksi*eta +alpha(6,3)*ksi*zeta +alpha(7,3)*eta*zeta           &
-         +alpha(8,3)*ksi*eta*zeta
-
-    ! Compute the elements of the Hessian matrix
-
-    hess(1,1) = 2.0_realType*((dxdksi *dxdksi)  +(dydksi *dydksi)  +(dzdksi *dzdksi))
-    hess(2,2) = 2.0_realType*((dxdeta *dxdeta)  +(dydeta *dydeta)  +(dzdeta *dzdeta))
-    hess(3,3) = 2.0_realType*((dxdzeta*dxdzeta) +(dydzeta*dydzeta) +(dzdzeta*dzdzeta))
-
-    hess(1,2) = 2.0_realType*((dxdksi*dxdeta) +(dydksi*dydeta) +(dzdksi*dzdeta)  &
-         -((xP(1) -x0)*d2xdksideta) -((xP(2) -y0)*d2ydksideta) -((xP(3) -z0)*d2zdksideta))
-    hess(1,3) = 2.0_realType*((dxdksi*dxdzeta) +(dydksi*dydzeta) +(dzdksi*dzdzeta)  &
-         -((xP(1) -x0)*d2xdksidzeta) -((xP(2) -y0)*d2ydksidzeta) -((xP(3) -z0)*d2zdksidzeta))
-    hess(2,3) = 2.0_realType*((dxdeta*dxdzeta) +(dydeta*dydzeta) +(dzdeta*dzdzeta)  &
-         -((xP(1) -x0)*d2xdetadzeta) -((xP(2) -y0)*d2ydetadzeta) -((xP(3) -z0)*d2zdetadzeta))
-
-    hess(2,1) = hess(1,2)
-    hess(3,1) = hess(1,3)
-    hess(3,2) = hess(2,3)
-
-    ! Return iErr = 0 for the time being.
-
-    iErr = 0
-
-    return
-
-  end subroutine hessD2Hexa
-
-  subroutine gradD2Hexa(xP,x1,x2,x3,x4,x5,x6,x7,x8,chi,x0,y0,z0,grad,iErr)
-    !
-    !      Compute the gradient of the square of the distance between the
-    !      point xP, and the actual point in the hexahedron represented
-    !      by chi(1:3).
-    !
-    use precision
+        ! Print some stuff out to the screen for debugging purposes
 
-    implicit none
-    !
-    !     Subroutine arguments.
-    !
-    real(kind=realType), intent(out)  :: x0, y0, z0
-    real(kind=realType), dimension(3), intent(in)  :: xP, chi
-    real(kind=realType), dimension(3), intent(in)  :: x1, x2, x3, x4
-    real(kind=realType), dimension(3), intent(in)  :: x5, x6, x7, x8
-    real(kind=realType), dimension(3), intent(out) :: grad
+        ! write(*,*)
+        ! write(*,*) 'Results of minD2Hexa'
+        ! write(*, coordinateFormat)'Point P                = (',xP(1),xP(2),xP(3),' )'
+        ! write(*, coordinateFormat)'Found point            = (',x0,y0,z0,' )'
+        ! write(*, coordinateFormat)'Parametric coordinates = (',chi(1),chi(2),chi(3),' )'
+        ! write(*, distanceFormat)'Minimum distance       =',sqrt(d2)
+        ! write(*, iterationFormat)'Number of iterations   =',itCount
+
+        ! character(len=maxStringLen) :: iterationFormat = '(A, 1x, i3)'
+        ! character(len=maxStringLen) :: coordinateFormat = '(A, 3f10.6, A)'
+        ! character(len=maxStringLen) :: distanceFormat = '(A, f20.17, A)'
+
+        return
 
-    integer(kind=intType), intent(out) :: iErr
+    end subroutine minD2Hexa
+
+    subroutine newtonStep(hess, grad, step, iErr)
+        !
+        !      Compute the Newton step given by the Hessian matrix and the
+        !      gradient vector of the distance squared function.
+        !
+        use precision
 
-    !
-    !     Local variables.
-    !
-    integer(kind=intType) :: i
-
-    real(kind=realType) :: ksi, eta, zeta
-    real(kind=realType), dimension(8,3) :: alpha
-    real(kind=realType) :: dxdksi, dxdeta, dxdzeta
-    real(kind=realType) :: dydksi, dydeta, dydzeta
-    real(kind=realType) :: dzdksi, dzdeta, dzdzeta
-
-    ! Initialize the parametric coordinates with a more recognizable
-    ! name.
-
-    ksi  = chi(1)
-    eta  = chi(2)
-    zeta = chi(3)
-
-    ! Initialize the alpha array (obtained by regrouping terms in the
-    ! parametric expansion of the (ksi,eta,zeta)->(x,y,z) mapping.
-    ! The second index of the alpha array corresponds to the x, y, or
-    ! z coordinate mapping.
-
-    do i=1,3
-       alpha(1,i) = x1(i)
-       alpha(2,i) = x2(i) -x1(i)
-       alpha(3,i) = x4(i) -x1(i)
-       alpha(4,i) = x5(i) -x1(i)
-       alpha(5,i) = x3(i) -x2(i) +x1(i) -x4(i)
-       alpha(6,i) = x6(i) -x5(i) +x1(i) -x2(i)
-       alpha(7,i) = x8(i) -x5(i) +x1(i) -x4(i)
-       alpha(8,i) = x7(i) -x8(i) +x5(i) -x6(i) +x4(i) -x3(i) +x2(i) -x1(i)
-    end do
-
-    ! Compute the value of the partial derivatives of x, y, and z with
-    ! respect to ksi, eta, and zeta
-
-    dxdksi = alpha(2,1) +alpha(5,1)*eta +alpha(6,1)*zeta +alpha(8,1)*eta*zeta
-    dydksi = alpha(2,2) +alpha(5,2)*eta +alpha(6,2)*zeta +alpha(8,2)*eta*zeta
-    dzdksi = alpha(2,3) +alpha(5,3)*eta +alpha(6,3)*zeta +alpha(8,3)*eta*zeta
-
-    dxdeta = alpha(3,1) +alpha(5,1)*ksi +alpha(7,1)*zeta +alpha(8,1)*ksi*zeta
-    dydeta = alpha(3,2) +alpha(5,2)*ksi +alpha(7,2)*zeta +alpha(8,2)*ksi*zeta
-    dzdeta = alpha(3,3) +alpha(5,3)*ksi +alpha(7,3)*zeta +alpha(8,3)*ksi*zeta
-
-    dxdzeta = alpha(4,1) +alpha(6,1)*ksi +alpha(7,1)*eta +alpha(8,1)*ksi*eta
-    dydzeta = alpha(4,2) +alpha(6,2)*ksi +alpha(7,2)*eta +alpha(8,2)*ksi*eta
-    dzdzeta = alpha(4,3) +alpha(6,3)*ksi +alpha(7,3)*eta +alpha(8,3)*ksi*eta
-
-    ! Compute the actual location of the point
-
-    x0 = alpha(1,1) +alpha(2,1)*ksi +alpha(3,1)*eta +alpha(4,1)*zeta            &
-         +alpha(5,1)*ksi*eta +alpha(6,1)*ksi*zeta +alpha(7,1)*eta*zeta           &
-         +alpha(8,1)*ksi*eta*zeta
-    y0 = alpha(1,2) +alpha(2,2)*ksi +alpha(3,2)*eta +alpha(4,2)*zeta            &
-         +alpha(5,2)*ksi*eta +alpha(6,2)*ksi*zeta +alpha(7,2)*eta*zeta           &
-         +alpha(8,2)*ksi*eta*zeta
-    z0 = alpha(1,3) +alpha(2,3)*ksi +alpha(3,3)*eta +alpha(4,3)*zeta            &
-         +alpha(5,3)*ksi*eta +alpha(6,3)*ksi*zeta +alpha(7,3)*eta*zeta           &
-         +alpha(8,3)*ksi*eta*zeta
-
-    ! Compute the gradient components
-
-    grad(1) = -2.0_realType*(xP(1) -x0)*dxdksi                               &
-         -2.0_realType*(xP(2) -y0)*dydksi                               &
-         -2.0_realType*(xP(3) -z0)*dzdksi
-    grad(2) = -2.0_realType*(xP(1) -x0)*dxdeta                               &
-         -2.0_realType*(xP(2) -y0)*dydeta                               &
-         -2.0_realType*(xP(3) -z0)*dzdeta
-    grad(3) = -2.0_realType*(xP(1) -x0)*dxdzeta                              &
-         -2.0_realType*(xP(2) -y0)*dydzeta                              &
-         -2.0_realType*(xP(3) -z0)*dzdzeta
-
-    ! Return iErr = 0 for the time being.
-
-    iErr = 0
-
-    return
-
-  end subroutine gradD2Hexa
+        implicit none
+        !
+        !     Subroutine arguments.
+        !
+        real(kind=realType), dimension(3), intent(in) :: grad
+        real(kind=realType), dimension(3), intent(out) :: step
+        real(kind=realType), dimension(3, 3), intent(in) :: hess
+
+        integer(kind=intType), intent(out) :: iErr
+
+        !
+        !     Local variables.
+        !
+        real(kind=realType) :: determinant
+
+        !
+        !      Compute the Newton step as the solution of the problem
+        !      Hessian . step = -grad
+        !      using simple Cramer's rule
+        !
+        ! Compute the determinant of the Hessian Matrix
+
+        determinant = hess(1, 1) * (hess(2, 2) * hess(3, 3) - hess(2, 3) * hess(3, 2)) &
+                      + hess(1, 2) * (hess(2, 3) * hess(3, 1) - hess(2, 1) * hess(3, 3)) &
+                      + hess(1, 3) * (hess(2, 1) * hess(3, 2) - hess(2, 2) * hess(3, 1))
+
+        ! First component of the step
+
+        step(1) = grad(1) * (hess(2, 2) * hess(3, 3) - hess(2, 3) * hess(3, 2)) &
+                  + hess(1, 2) * (hess(2, 3) * grad(3) - grad(2) * hess(3, 3)) &
+                  + hess(1, 3) * (grad(2) * hess(3, 2) - hess(2, 2) * grad(3))
+        step(1) = -step(1) / determinant
+
+        ! Second component of the step
+
+        step(2) = hess(1, 1) * (grad(2) * hess(3, 3) - hess(2, 3) * grad(3)) &
+                  + grad(1) * (hess(2, 3) * hess(3, 1) - hess(2, 1) * hess(3, 3)) &
+                  + hess(1, 3) * (hess(2, 1) * grad(3) - grad(2) * hess(3, 1))
+        step(2) = -step(2) / determinant
+
+        ! First component of the step
+
+        step(3) = hess(1, 1) * (hess(2, 2) * grad(3) - grad(2) * hess(3, 2)) &
+                  + hess(1, 2) * (grad(2) * hess(3, 1) - hess(2, 1) * grad(3)) &
+                  + grad(1) * (hess(2, 1) * hess(3, 2) - hess(2, 2) * hess(3, 1))
+        step(3) = -step(3) / determinant
+
+        ! Return iErr = 0 for the time being.
+
+        iErr = 0
+
+        return
+
+    end subroutine newtonStep
+
+    subroutine hessD2Hexa(xP, x1, x2, x3, x4, x5, x6, x7, x8, chi, hess, iErr)
+        !
+        !      Compute the Hessian of the square of the distance between the
+        !      point xP, and the actual point in the hexahedron represented
+        !      by chi(1:3).
+        !
+        use precision
+
+        implicit none
+        !
+        !     Subroutine arguments.
+        !
+        real(kind=realType), dimension(3), intent(in) :: xP, chi
+        real(kind=realType), dimension(3), intent(in) :: x1, x2, x3, x4
+        real(kind=realType), dimension(3), intent(in) :: x5, x6, x7, x8
+        real(kind=realType), dimension(3, 3), intent(out) :: hess
+
+        integer(kind=intType), intent(out) :: iErr
+
+        !
+        !     Local variables.
+        !
+        integer(kind=intType) :: i
+
+        real(kind=realType) :: ksi, eta, zeta, x0, y0, z0
+        real(kind=realType), dimension(8, 3) :: alpha
+        real(kind=realType) :: dxdksi, dxdeta, dxdzeta
+        real(kind=realType) :: dydksi, dydeta, dydzeta
+        real(kind=realType) :: dzdksi, dzdeta, dzdzeta
+        real(kind=realType) :: d2xdksideta, d2xdksidzeta, d2xdetadzeta
+        real(kind=realType) :: d2ydksideta, d2ydksidzeta, d2ydetadzeta
+        real(kind=realType) :: d2zdksideta, d2zdksidzeta, d2zdetadzeta
+
+        ! Initialize the parametric coordinates with a more recognizable
+        ! name.
+
+        ksi = chi(1)
+        eta = chi(2)
+        zeta = chi(3)
+
+        ! Initialize the alpha array (obtained by regrouping terms in the
+        ! parametric expansion of the (ksi,eta,zeta)->(x,y,z) mapping.
+        ! The second index of the alpha array corresponds to the x, y, or
+        ! z coordinate mapping.
+
+        do i = 1, 3
+            alpha(1, i) = x1(i)
+            alpha(2, i) = x2(i) - x1(i)
+            alpha(3, i) = x4(i) - x1(i)
+            alpha(4, i) = x5(i) - x1(i)
+            alpha(5, i) = x3(i) - x2(i) + x1(i) - x4(i)
+            alpha(6, i) = x6(i) - x5(i) + x1(i) - x2(i)
+            alpha(7, i) = x8(i) - x5(i) + x1(i) - x4(i)
+            alpha(8, i) = x7(i) - x8(i) + x5(i) - x6(i) + x4(i) - x3(i) + x2(i) - x1(i)
+        end do
+
+        ! Compute the value of the partial derivatives of x, y, and z with
+        ! respect to ksi, eta, and zeta
+
+        dxdksi = alpha(2, 1) + alpha(5, 1) * eta + alpha(6, 1) * zeta + alpha(8, 1) * eta * zeta
+        dydksi = alpha(2, 2) + alpha(5, 2) * eta + alpha(6, 2) * zeta + alpha(8, 2) * eta * zeta
+        dzdksi = alpha(2, 3) + alpha(5, 3) * eta + alpha(6, 3) * zeta + alpha(8, 3) * eta * zeta
+
+        dxdeta = alpha(3, 1) + alpha(5, 1) * ksi + alpha(7, 1) * zeta + alpha(8, 1) * ksi * zeta
+        dydeta = alpha(3, 2) + alpha(5, 2) * ksi + alpha(7, 2) * zeta + alpha(8, 2) * ksi * zeta
+        dzdeta = alpha(3, 3) + alpha(5, 3) * ksi + alpha(7, 3) * zeta + alpha(8, 3) * ksi * zeta
+
+        dxdzeta = alpha(4, 1) + alpha(6, 1) * ksi + alpha(7, 1) * eta + alpha(8, 1) * ksi * eta
+        dydzeta = alpha(4, 2) + alpha(6, 2) * ksi + alpha(7, 2) * eta + alpha(8, 2) * ksi * eta
+        dzdzeta = alpha(4, 3) + alpha(6, 3) * ksi + alpha(7, 3) * eta + alpha(8, 3) * ksi * eta
+
+        ! Compute the value of the non-zero partial derivatives of x, y, and z
+        ! with respect to ksi, eta, and zeta.  Note that the second derivatives
+        ! of x, y, and z with respect to ksi2, eta2, and zeta2 are identically
+        ! zero
+
+        d2xdksideta = alpha(5, 1) + alpha(8, 1) * zeta
+        d2ydksideta = alpha(5, 2) + alpha(8, 2) * zeta
+        d2zdksideta = alpha(5, 3) + alpha(8, 3) * zeta
+
+        d2xdksidzeta = alpha(6, 1) + alpha(8, 1) * eta
+        d2ydksidzeta = alpha(6, 2) + alpha(8, 2) * eta
+        d2zdksidzeta = alpha(6, 3) + alpha(8, 3) * eta
+
+        d2xdetadzeta = alpha(7, 1) + alpha(8, 1) * ksi
+        d2ydetadzeta = alpha(7, 2) + alpha(8, 2) * ksi
+        d2zdetadzeta = alpha(7, 3) + alpha(8, 3) * ksi
+
+        ! Compute the actual location of the point
+
+        x0 = alpha(1, 1) + alpha(2, 1) * ksi + alpha(3, 1) * eta + alpha(4, 1) * zeta &
+             + alpha(5, 1) * ksi * eta + alpha(6, 1) * ksi * zeta + alpha(7, 1) * eta * zeta &
+             + alpha(8, 1) * ksi * eta * zeta
+        y0 = alpha(1, 2) + alpha(2, 2) * ksi + alpha(3, 2) * eta + alpha(4, 2) * zeta &
+             + alpha(5, 2) * ksi * eta + alpha(6, 2) * ksi * zeta + alpha(7, 2) * eta * zeta &
+             + alpha(8, 2) * ksi * eta * zeta
+        z0 = alpha(1, 3) + alpha(2, 3) * ksi + alpha(3, 3) * eta + alpha(4, 3) * zeta &
+             + alpha(5, 3) * ksi * eta + alpha(6, 3) * ksi * zeta + alpha(7, 3) * eta * zeta &
+             + alpha(8, 3) * ksi * eta * zeta
+
+        ! Compute the elements of the Hessian matrix
+
+        hess(1, 1) = 2.0_realType * ((dxdksi * dxdksi) + (dydksi * dydksi) + (dzdksi * dzdksi))
+        hess(2, 2) = 2.0_realType * ((dxdeta * dxdeta) + (dydeta * dydeta) + (dzdeta * dzdeta))
+        hess(3, 3) = 2.0_realType * ((dxdzeta * dxdzeta) + (dydzeta * dydzeta) + (dzdzeta * dzdzeta))
+
+        hess(1, 2) = 2.0_realType * ((dxdksi * dxdeta) + (dydksi * dydeta) + (dzdksi * dzdeta) &
+                           - ((xP(1) - x0) * d2xdksideta) - ((xP(2) - y0) * d2ydksideta) - ((xP(3) - z0) * d2zdksideta))
+        hess(1, 3) = 2.0_realType * ((dxdksi * dxdzeta) + (dydksi * dydzeta) + (dzdksi * dzdzeta) &
+                        - ((xP(1) - x0) * d2xdksidzeta) - ((xP(2) - y0) * d2ydksidzeta) - ((xP(3) - z0) * d2zdksidzeta))
+        hess(2, 3) = 2.0_realType * ((dxdeta * dxdzeta) + (dydeta * dydzeta) + (dzdeta * dzdzeta) &
+                        - ((xP(1) - x0) * d2xdetadzeta) - ((xP(2) - y0) * d2ydetadzeta) - ((xP(3) - z0) * d2zdetadzeta))
+
+        hess(2, 1) = hess(1, 2)
+        hess(3, 1) = hess(1, 3)
+        hess(3, 2) = hess(2, 3)
+
+        ! Return iErr = 0 for the time being.
+
+        iErr = 0
+
+        return
+
+    end subroutine hessD2Hexa
+
+    subroutine gradD2Hexa(xP, x1, x2, x3, x4, x5, x6, x7, x8, chi, x0, y0, z0, grad, iErr)
+        !
+        !      Compute the gradient of the square of the distance between the
+        !      point xP, and the actual point in the hexahedron represented
+        !      by chi(1:3).
+        !
+        use precision
+
+        implicit none
+        !
+        !     Subroutine arguments.
+        !
+        real(kind=realType), intent(out) :: x0, y0, z0
+        real(kind=realType), dimension(3), intent(in) :: xP, chi
+        real(kind=realType), dimension(3), intent(in) :: x1, x2, x3, x4
+        real(kind=realType), dimension(3), intent(in) :: x5, x6, x7, x8
+        real(kind=realType), dimension(3), intent(out) :: grad
+
+        integer(kind=intType), intent(out) :: iErr
+
+        !
+        !     Local variables.
+        !
+        integer(kind=intType) :: i
+
+        real(kind=realType) :: ksi, eta, zeta
+        real(kind=realType), dimension(8, 3) :: alpha
+        real(kind=realType) :: dxdksi, dxdeta, dxdzeta
+        real(kind=realType) :: dydksi, dydeta, dydzeta
+        real(kind=realType) :: dzdksi, dzdeta, dzdzeta
+
+        ! Initialize the parametric coordinates with a more recognizable
+        ! name.
+
+        ksi = chi(1)
+        eta = chi(2)
+        zeta = chi(3)
+
+        ! Initialize the alpha array (obtained by regrouping terms in the
+        ! parametric expansion of the (ksi,eta,zeta)->(x,y,z) mapping.
+        ! The second index of the alpha array corresponds to the x, y, or
+        ! z coordinate mapping.
+
+        do i = 1, 3
+            alpha(1, i) = x1(i)
+            alpha(2, i) = x2(i) - x1(i)
+            alpha(3, i) = x4(i) - x1(i)
+            alpha(4, i) = x5(i) - x1(i)
+            alpha(5, i) = x3(i) - x2(i) + x1(i) - x4(i)
+            alpha(6, i) = x6(i) - x5(i) + x1(i) - x2(i)
+            alpha(7, i) = x8(i) - x5(i) + x1(i) - x4(i)
+            alpha(8, i) = x7(i) - x8(i) + x5(i) - x6(i) + x4(i) - x3(i) + x2(i) - x1(i)
+        end do
+
+        ! Compute the value of the partial derivatives of x, y, and z with
+        ! respect to ksi, eta, and zeta
+
+        dxdksi = alpha(2, 1) + alpha(5, 1) * eta + alpha(6, 1) * zeta + alpha(8, 1) * eta * zeta
+        dydksi = alpha(2, 2) + alpha(5, 2) * eta + alpha(6, 2) * zeta + alpha(8, 2) * eta * zeta
+        dzdksi = alpha(2, 3) + alpha(5, 3) * eta + alpha(6, 3) * zeta + alpha(8, 3) * eta * zeta
+
+        dxdeta = alpha(3, 1) + alpha(5, 1) * ksi + alpha(7, 1) * zeta + alpha(8, 1) * ksi * zeta
+        dydeta = alpha(3, 2) + alpha(5, 2) * ksi + alpha(7, 2) * zeta + alpha(8, 2) * ksi * zeta
+        dzdeta = alpha(3, 3) + alpha(5, 3) * ksi + alpha(7, 3) * zeta + alpha(8, 3) * ksi * zeta
+
+        dxdzeta = alpha(4, 1) + alpha(6, 1) * ksi + alpha(7, 1) * eta + alpha(8, 1) * ksi * eta
+        dydzeta = alpha(4, 2) + alpha(6, 2) * ksi + alpha(7, 2) * eta + alpha(8, 2) * ksi * eta
+        dzdzeta = alpha(4, 3) + alpha(6, 3) * ksi + alpha(7, 3) * eta + alpha(8, 3) * ksi * eta
+
+        ! Compute the actual location of the point
+
+        x0 = alpha(1, 1) + alpha(2, 1) * ksi + alpha(3, 1) * eta + alpha(4, 1) * zeta &
+             + alpha(5, 1) * ksi * eta + alpha(6, 1) * ksi * zeta + alpha(7, 1) * eta * zeta &
+             + alpha(8, 1) * ksi * eta * zeta
+        y0 = alpha(1, 2) + alpha(2, 2) * ksi + alpha(3, 2) * eta + alpha(4, 2) * zeta &
+             + alpha(5, 2) * ksi * eta + alpha(6, 2) * ksi * zeta + alpha(7, 2) * eta * zeta &
+             + alpha(8, 2) * ksi * eta * zeta
+        z0 = alpha(1, 3) + alpha(2, 3) * ksi + alpha(3, 3) * eta + alpha(4, 3) * zeta &
+             + alpha(5, 3) * ksi * eta + alpha(6, 3) * ksi * zeta + alpha(7, 3) * eta * zeta &
+             + alpha(8, 3) * ksi * eta * zeta
+
+        ! Compute the gradient components
+
+        grad(1) = -2.0_realType * (xP(1) - x0) * dxdksi &
+                  - 2.0_realType * (xP(2) - y0) * dydksi &
+                  - 2.0_realType * (xP(3) - z0) * dzdksi
+        grad(2) = -2.0_realType * (xP(1) - x0) * dxdeta &
+                  - 2.0_realType * (xP(2) - y0) * dydeta &
+                  - 2.0_realType * (xP(3) - z0) * dzdeta
+        grad(3) = -2.0_realType * (xP(1) - x0) * dxdzeta &
+                  - 2.0_realType * (xP(2) - y0) * dydzeta &
+                  - 2.0_realType * (xP(3) - z0) * dzdzeta
+
+        ! Return iErr = 0 for the time being.
+
+        iErr = 0
+
+        return
+
+    end subroutine gradD2Hexa
 
 end module adtLocalSearch
diff --git a/src/ADT/adtSearch.F90 b/src/ADT/adtSearch.F90
index a36e091ee..3de487f40 100644
--- a/src/ADT/adtSearch.F90
+++ b/src/ADT/adtSearch.F90
@@ -1,1709 +1,1705 @@
 module adtSearch
-  !
-  !      Module which contains the subroutines for the global search.
-  !
-  use constants
-  use adtLocalSearch, only : containmenttreesearch, mindistancetreesearch
-  use adtUtils, only : adtTerminate
-  use adtData
-  implicit none
-
-  !=================================================================
-
-contains
-
-  !===============================================================
-
-  subroutine containmentSearch(nCoor,       coor,       &
-       adtID,       procID,     &
-       elementType, elementID,  &
-       uvw,         nInterpol,  &
-       arrDonor,    arrInterpol)
     !
-    !        This routine attempts for every coordinate to find the
-    !        element in the given ADT, which contains that coordinate.
-    !        If no element is found the corresponding entry in procID is
-    !        set to -1 to indicate failure.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nCoor:     Number of coordinates for which the element must
-    !                   be determined.
-    !        coor:      The coordinates of these points.
-    !        adtID:     The ADT to be searched.
-    !        nInterpol: Number of variables to be interpolated.
-    !        arrDonor:  Array with the donor data; needed to obtain the
-    !                   interpolated data.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        procID:      The ID of the processor in the group of the ADT
-    !                     where the element containing the point is
-    !                     stored. If no element is found for a given
-    !                     point the corresponding entry in procID is set
-    !                     to -1 to indicate failure. Remember that the
-    !                     processor ID's start at 0 and not at 1.
-    !        elementType: The type of element which contains the point.
-    !        elementID:   The entry in the connectivity of this element
-    !                     which contains the point.
-    !        uvw:         The parametric coordinates of the point in the
-    !                     transformed element; this transformation is
-    !                     such that every element is transformed into a
-    !                     standard element in parametric space. The u, v
-    !                     and w coordinates can be used to determine the
-    !                     actual interpolation weights.
-    !        arrInterpol: Array with the interpolated data.
+    !      Module which contains the subroutines for the global search.
     !
+    use constants
+    use adtLocalSearch, only: containmenttreesearch, mindistancetreesearch
+    use adtUtils, only: adtTerminate
+    use adtData
     implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nCoor, nInterpol
-    character(len=*),         intent(in) :: adtID
-
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-    real(kind=realType), dimension(:,:), intent(in) :: arrDonor
-
-    integer,                  dimension(:), intent(out) :: procID
-    integer(kind=intType), dimension(:), intent(out) :: elementID
-
-    integer(kind=adtElementType), dimension(:), intent(out) :: &
-         elementType
-    real(kind=realType), dimension(:,:), intent(out) :: uvw
-    real(kind=realType), dimension(:,:), intent(out) :: arrInterpol
-    !
-    !       Local variables.
-    !
-    integer :: ierr
-
-    type(adtType), pointer :: ADT
-
-    integer(kind=intType) :: jj, nAlloc
-
-    real(kind=realType), dimension(1) :: dummy
-
-    ! Determine the index in the array ADTs, which stores the given
-    ! ID. As the number of trees stored is limited, a linear search
-    ! algorithm is okay.
-
-    nAlloc = ubound(ADTs, 1)
-    do jj=1,nAlloc
-       if(adtID == ADTs(jj)%adtID) exit
-    enddo
-
-    ! Check if the ADT to be searched exists. If not stop.
-    ! Note that adtTerminate is not called. The reason is that the
-    ! processor ID is not known.
-
-    if(jj > nAlloc) stop "ADT to be searched does not exist."
-
-    ! Set pointer to the ADT we will be working with
-
-    ADT => ADTs(jj)
-
-    ! Check if the ADT corresponds to a volume grid. If not terminate.
-
-    if(ADT%adtType /= adtVolumeADT) then
-       if(ADT%myID == 0)                       &
-            call adtTerminate(ADT, "containmentSearch", &
-            "ADT does not contain a volume mesh.")
-       call mpi_barrier(ADT%comm, ierr)
-    endif
-
-    ! Initialize the search, i.e. determine the number of
-    ! coordinates to be searched in each of the local ADT's.
-
-    call initSearch(nCoor, coor, dummy, ADT, .true.)
-
-    ! Perform the actual search.
-
-    call search(nCoor,     coor,      procID,   elementType, &
-         elementID, uvw,       dummy,    ADT,         &
-         .true.,    nInterpol, arrDonor, arrInterpol)
-
-  end subroutine containmentSearch
-
-
-  subroutine failSafeSearch(nCoor,       coor,      &
-       adtID,       procID,    &
-       elementType, elementID, &
-       uvw,         dist2,     &
-       nInterpol,   arrDonor,  &
-       arrInterpol)
-    !
-    !        This routine attempts for every coordinate to find the
-    !        element in the given ADT, which contains that coordinate.
-    !        If no element is found a minimum distance search is
-    !        performed, such that always an interpolation can be
-    !        performed. To indicate that the element does not contain the
-    !        point the element ID is negated.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nCoor: Number of coordinates for which the element must be
-    !               determined.
-    !        coor:  The coordinates of these points.
-    !        adtID: The ADT to be searched.
-    !        nInterpol: Number of variables to be interpolated.
-    !        arrDonor:  Array with the donor data; needed to obtain the
-    !                   interpolated data.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        procID:      The ID of the processor in the group of the ADT
-    !                     where the element containing the point is
-    !                     stored. If no element is found for a given
-    !                     point the corresponding entry in procID is set
-    !                     to -1 to indicate failure. Remember that the
-    !                     processor ID's start at 0 and not at 1.
-    !        elementType: The type of element which contains the point.
-    !        elementID:   The entry in the connectivity of this element
-    !                     which contains the point. The ID is negative if
-    !                     the coordinate is outside the element, i.e. if
-    !                     a minimum distance search had to be used.
-    !        uvw:         The parametric coordinates of the point in the
-    !                     transformed element; this transformation is
-    !                     such that every element is transformed into a
-    !                     standard element in parametric space. The u, v
-    !                     and w coordinates can be used to determine the
-    !                     actual interpolation weights.
-    !        arrInterpol: Array with the interpolated data.
-    !        Subroutine intent(inout) arguments.
-    !        -----------------------------------
-    !        dist2: Minimum distance squared of the coordinates to the
-    !               elements of the ADT. On input it should be
-    !               initialized by the calling program, possibly to a
-    !               large value. In this way it is possible to handle
-    !               periodic problems as efficiently as possible.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nCoor, nInterpol
-    character(len=*),         intent(in) :: adtID
-
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-    real(kind=realType), dimension(:,:), intent(in) :: arrDonor
-
-    integer,                  dimension(:), intent(out) :: procID
-    integer(kind=intType), dimension(:), intent(out) :: elementID
-
-    integer(kind=adtElementType), dimension(:), intent(out) :: &
-         elementType
-
-    real(kind=realType), dimension(:,:), intent(out) :: uvw
-    real(kind=realType), dimension(:,:), intent(out) :: arrInterpol
-
-    real(kind=realType), dimension(:), intent(inout) :: dist2
-    !
-    !       Local variables.
-    !
-    integer :: ierr
-    integer, dimension(:), allocatable :: tmpProcID
-
-    type(adtType), pointer :: ADT
-
-    integer(kind=intType) :: i, j, ii, jj, nAlloc, nFail
-    integer(kind=intType), dimension(:), allocatable :: tmpElementID
-    integer(kind=intType), dimension(:), allocatable :: coorIDs
-
-    integer(kind=adtElementType), dimension(:), allocatable :: &
-         tmpElementType
-
-    real(kind=realType), dimension(1) :: dummy
-
-    real(kind=realType), dimension(:),   allocatable :: tmpDist2
-    real(kind=realType), dimension(:,:), allocatable :: tmpCoor
-    real(kind=realType), dimension(:,:), allocatable :: tmpUVW
-    real(kind=realType), dimension(:,:), allocatable :: tmpArrInt
-
-    ! Determine the index in the array ADTs, which stores the given
-    ! ID. As the number of trees stored is limited, a linear search
-    ! algorithm is okay.
-
-    nAlloc = ubound(ADTs, 1)
-    do jj=1,nAlloc
-       if(adtID == ADTs(jj)%adtID) exit
-    enddo
-
-    ! Check if the ADT to be searched exists. If not stop.
-    ! Note that adtTerminate is not called. The reason is that the
-    ! processor ID is not known.
-
-    if(jj > nAlloc) stop "ADT to be searched does not exist."
-
-    ! Set pointer to the ADT we will be working with
-    ADT => ADTs(jj)
-
-    ! Check if the ADT corresponds to a volume grid.
-    ! If not terminate.
-
-    if(ADT%adtType /= adtVolumeADT) then
-       if(ADT%myID == 0)                    &
-            call adtTerminate(ADT, "failSafeSearch", &
-            "ADT does not contain a volume mesh.")
-       call mpi_barrier(ADT%comm, ierr)
-    endif
-
-    ! Perform the containment search.
-
-    call initSearch(nCoor, coor, dummy, ADT, .true.)
-
-    call search(nCoor,     coor,      procID,   elementType, &
-         elementID, uvw,       dummy,    ADT,         &
-         .true.,    nInterpol, arrDonor, arrInterpol)
-
-    ! Determine the number of coordinates for which the containment
-    ! search failed. Set for the other coordinates the distance to
-    ! zero.
-
-    nFail = 0
-    do i=1,nCoor
-       if(procID(i) == -1) then
-          nFail = nFail + 1
-       else
-          dist2(i) = zero
-       endif
-    enddo
-
-    ! Determine the global sum of nFail, which is stored in ii.
-
-    call mpi_allreduce(nFail, ii, 1, adflow_integer, mpi_max, &
-         ADT%comm, ierr)
-
-    ! Return if ii == 0, because the minimum distance search
-    ! is not needed.
-
-    if(ii == 0) return
-
-    ! Allocate the memory for the arrays needed for the minimum
-    ! distance search.
-
-    allocate(tmpCoor(3,nFail),      tmpProcID(nFail),           &
-         tmpElementType(nFail), tmpElementID(nFail),        &
-         tmpUVW(3,nFail),       tmpDist2(nFail),            &
-         coorIDs(nFail),        tmpArrInt(nInterpol,nFail), &
-         stat=ierr)
-    if(ierr /= 0)                             &
-         call adtTerminate(ADT, "failSafeSearch", &
-         "Memory allocation failure for the arrays &
-         &for the minimum distance search.")
-
-    ! Store the information needed for minimum distance search in
-    ! the arrays tmpCoor, tmpDist2 and coorIDs.
-
-    ii = 0
-    do i=1,nCoor
-       if(procID(i) == -1) then
-          ii = ii + 1
-
-          tmpCoor(1,ii) = coor(1,i)
-          tmpCoor(2,ii) = coor(2,i)
-          tmpCoor(3,ii) = coor(3,i)
-          tmpDist2(ii)  = dist2(i)
-          coorIDs(ii)   = i
-       endif
-    enddo
-
-    ! Perform the minimum distance search for the coordinates for
-    ! which the containment search failed.
-
-    call initSearch(nFail, tmpCoor, tmpDist2, ADT, .false.)
-
-    call search(nFail,        tmpCoor,   tmpProcID, tmpElementType, &
-         tmpElementID, tmpUVW,    tmpDist2, ADT,             &
-         .false.,      nInterpol, arrDonor, tmpArrInt)
-
-    ! Copy for the successful searches the data into the arrays to
-    ! be returned by this subroutine. Note that the element IDs are
-    ! negated to indicate that the coordinate is outside the element.
-
-    do i=1,nFail
-       if(tmpProcID(i) /= -1) then
-          ii = coorIDs(i)
-
-          procID(ii)      =  tmpProcID(i)
-          elementType(ii) =  tmpElementType(i)
-          elementID(ii)   = -tmpElementID(i)
-          uvw(1,ii)       =  tmpUVW(1,i)
-          uvw(2,ii)       =  tmpUVW(2,i)
-          uvw(3,ii)       =  tmpUVW(3,i)
-          dist2(ii)       =  tmpDist2(i)
-
-          do j=1,nInterpol
-             arrInterpol(j,ii) = tmpArrInt(j,i)
-          enddo
-       endif
-    enddo
-
-    ! Release the memory used in the minimum distance search.
-
-    deallocate(tmpCoor, tmpProcID, tmpElementType, tmpElementID, &
-         tmpUVW,  tmpDist2,  coorIDs,        tmpArrInt,    &
-         stat=ierr)
-    if(ierr /= 0)                             &
-         call adtTerminate(ADT, "failSafeSearch", &
-         "Deallocation failure for the arrays &
-         &for the minimum distance search.")
-
-  end subroutine failSafeSearch
-
-
-  subroutine initSearch(nCoor, coor, dist2, ADT, containmentSearch)
-    !
-    !        This routine performs the initialization tasks before the
-    !        actual search takes place. It determines the number and the
-    !        ID's of the coordinates every local tree may have to
-    !        interpolate. From this info this routine also determines the
-    !        number of rounds needed in the actual algorithm to avoid a
-    !        memory bottleneck.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nCoor:             Number of local coordinates for which the
-    !                           element must be determined.
-    !        coor:              The coordinates of these points.
-    !        ADT:               ADT type whose ADT must be searched
-    !        containmentSearch: Whether or not a containment search must
-    !                           be performed. If not a minimum distance
-    !                           search algorithm is used, which is more
-    !                           expensive.
-    !        Subroutine intent(inout) arguments.
-    !        -----------------------------------
-    !        dist2: Guaranteed minimum distance squared of the
-    !               coordinates to the elements of the ADT. This array
-    !               should be initialized by the calling routine,
-    !               possibly to a large value. It is only used for a
-    !               minimum distance search.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    type(adtType), intent(inout) :: ADT
-    integer(kind=intType), intent(in) :: nCoor
-
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-    real(kind=realType), dimension(:),   intent(inout) :: dist2
-
-    logical, intent(in) :: containmentSearch
-    !
-    !       Local variables.
-    !
-    integer :: ierr, nRootLeaves, comm, nProcs, myID
-    integer :: myEntryInRootProcs
-
-    integer, dimension(:),   pointer :: rootLeavesProcs
-
-    integer(kind=intType) :: i, j, k, mm, nn
-
-    integer(kind=intType), dimension(:), allocatable :: nCoorPerProc
-    integer(kind=intType), dimension(:), allocatable :: nCoorFromProc
-
-    real(kind=realType) :: d1, d2, dx, dy, dz
-
-    real(kind=realType), dimension(:,:,:), pointer :: rootBBoxes
-
-    ! Set some pointers to make the code more readable.
-
-    nRootLeaves        = ADT%nRootLeaves
-    myEntryInRootProcs = ADT%myEntryInRootProcs
-    rootLeavesProcs   => ADT%rootLeavesProcs
-    rootBBoxes        => ADT%rootBBoxes
 
-    comm   = ADT%comm
-    nProcs = ADT%nProcs
-    myID   = ADT%myID
+    !=================================================================
 
-    ! Determine the global maximum of nCoor. This number will serve
-    ! as an upper bound for the number of points to be searched
-    ! during a round.
-
-    call mpi_allreduce(nCoor, nCoorMax, 1, adflow_integer, mpi_max, &
-         comm, ierr)
-    nCoorMax = max(nCoorMax,nCoorMaxLowerLimit)
-    !
-    !        Determine for every root leaf the local coordinates, which
-    !        should be searched in the corresponding ADT. The criterion
-    !        for a containment search is of course different from a
-    !        minimum distance search and therefore a distinction must be
-    !        made between the two methods.
-    !
-    ! Allocate the memory for nCoorPerRootLeaf and mCoorPerRootLeaf.
-    ! Initially the latter is a copy of the former, but its data
-    ! will change in the outer loop in the iterative algorithm, see
-    ! adtSearch. The numbering starts at 0, because these arrays
-    ! will be cumulative storage format arrays.
-
-    allocate(nCoorPerRootLeaf(0:nProcs), &
-         mCoorPerRootLeaf(0:nProcs), stat=ierr)
-    if(ierr /= 0)                         &
-         call adtTerminate(ADT, "initSearch", &
-         "Memory allocation failure for &
-         &nCoorPerRootLeaf and mCoorPerRootLeaf.")
-
-    ! Determine for a minimum distance search the guaranteed minimum
-    ! distance squared to each of the root leaves and store the
-    ! absolute minimum.
-
-    testMinDistance: if(.not. containmentSearch ) then
-
-       do j=1,nRootLeaves
-          do i=1,nCoor
-             d1 = abs(coor(1,i) - rootBBoxes(1,1,j))
-             d2 = abs(coor(1,i) - rootBBoxes(1,2,j))
-             dx = max(d1,d2)
-
-             d1 = abs(coor(2,i) - rootBBoxes(2,1,j))
-             d2 = abs(coor(2,i) - rootBBoxes(2,2,j))
-             dy = max(d1,d2)
-
-             d1 = abs(coor(3,i) - rootBBoxes(3,1,j))
-             d2 = abs(coor(3,i) - rootBBoxes(3,2,j))
-             dz = max(d1,d2)
-
-             d2       = dx*dx + dy*dy + dz*dz
-             dist2(i) = min(dist2(i), d2)
-          enddo
-       enddo
-
-    endif testMinDistance
-
-    ! Determine the local number of coordinates to be searched in
-    ! each of the local trees, i.e. the array nCoorPerRootLeaf. Note
-    ! that the processor storing the ADT stores this number rather
-    ! than the root leaf. Furthermore nCoorPerRootLeaf is stored in
-    ! cumulative storage format and therefore an outer loop over the
-    ! number of root leaves is the most logical thing to do.
-
-    nCoorPerRootLeaf(0) = 0
-    mm = 0
-
-    loop1RootLeaves: do j=1,nRootLeaves
-
-       ! Determine the next processor which stores a tree; initialize
-       ! in the same loop nCoorPerRootLeaf of the processors in
-       ! between. The counter mm contains the entry in
-       ! nCoorPerRootLeaf where the number is stored. Remember that
-       ! the processor ID's start at 0.
-
-       do k=(mm+1),(rootLeavesProcs(j)+1)
-          nCoorPerRootLeaf(k) = nCoorPerRootLeaf(mm)
-       enddo
-
-       mm = rootLeavesProcs(j)+1
-
-       ! Make a distinction between a containment and a
-       ! minimum distance search.
-
-       test1Containment: if( containmentSearch ) then
-
-          ! Containment search. Loop over the local number of
-          ! coordinates and check if the coordinates are within the
-          ! bounding box of the current root leaf. If so, update
-          ! the counter.
-
-          do i=1,nCoor
-             if(coor(1,i) >= rootBBoxes(1,1,j) .and. &
-                  coor(1,i) <= rootBBoxes(1,2,j) .and. &
-                  coor(2,i) >= rootBBoxes(2,1,j) .and. &
-                  coor(2,i) <= rootBBoxes(2,2,j) .and. &
-                  coor(3,i) >= rootBBoxes(3,1,j) .and. &
-                  coor(3,i) <= rootBBoxes(3,2,j)) &
-                  nCoorPerRootLeaf(mm) = nCoorPerRootLeaf(mm) + 1
-          enddo
-
-       else test1Containment
-
-          ! Minimum distance search. Loop over the local number of
-          ! coordinates and determine the possible minimum distance
-          ! squared to the bounding box of the current root leaf.
-          ! If less than the currently stored guaranteed minimum
-          ! distance squared, update the counter.
-
-          do i=1,nCoor
-             if(     coor(1,i) < rootBBoxes(1,1,j)) then
-                dx =  coor(1,i) - rootBBoxes(1,1,j)
-             else if(coor(1,i) > rootBBoxes(1,2,j)) then
-                dx =  coor(1,i) - rootBBoxes(1,2,j)
-             else
-                dx = zero
-             endif
-
-             if(     coor(2,i) < rootBBoxes(2,1,j)) then
-                dy =  coor(2,i) - rootBBoxes(2,1,j)
-             else if(coor(2,i) > rootBBoxes(2,2,j)) then
-                dy =  coor(2,i) - rootBBoxes(2,2,j)
-             else
-                dy = zero
-             endif
-
-             if(     coor(3,i) < rootBBoxes(3,1,j)) then
-                dz =  coor(3,i) - rootBBoxes(3,1,j)
-             else if(coor(3,i) > rootBBoxes(3,2,j)) then
-                dz =  coor(3,i) - rootBBoxes(3,2,j)
-             else
-                dz = zero
-             endif
-
-             d2 = dx*dx + dy*dy + dz*dz
-             if(d2 < dist2(i)) &
-                  nCoorPerRootLeaf(mm) = nCoorPerRootLeaf(mm) + 1
-
-          enddo
-
-       endif test1Containment
-
-    enddo loop1RootLeaves
-
-    ! Fill the rest of the array nCoorPerRootLeaf.
-
-    do k=(mm+1),nProcs
-       nCoorPerRootLeaf(k) = nCoorPerRootLeaf(mm)
-    enddo
-
-    ! Copy nCoorPerRootLeaf to mCoorPerRootLeaf.
-
-    do j=0,nProcs
-       mCoorPerRootLeaf(j) = nCoorPerRootLeaf(j)
-    enddo
-
-    ! Allocate the memory for coorPerRootLeaf.
-
-    nn = nCoorPerRootLeaf(nProcs)
-    allocate(coorPerRootLeaf(nn), stat=ierr)
-    if(ierr /= 0)                                   &
-         call adtTerminate(ADT, "initSearch",           &
-         "Memory allocation failure for &
-         &coorPerRootLeaf.")
-
-    ! Repeat the loop over the number of root leaves, but now store
-    ! the ID's of the local coordinates.
-
-    nn = 0
-    loop2RootLeaves: do j=1,nRootLeaves
-
-       ! Make a distinction between a containment and a
-       ! minimum distance search.
-
-       test2Containment: if( containmentSearch ) then
-
-          ! Containment search. Store the nodes which are within the
-          ! bounding box of the root leaf.
-
-          do i=1,nCoor
-             if(coor(1,i) >= rootBBoxes(1,1,j) .and. &
-                  coor(1,i) <= rootBBoxes(1,2,j) .and. &
-                  coor(2,i) >= rootBBoxes(2,1,j) .and. &
-                  coor(2,i) <= rootBBoxes(2,2,j) .and. &
-                  coor(3,i) >= rootBBoxes(3,1,j) .and. &
-                  coor(3,i) <= rootBBoxes(3,2,j)) then
-                nn = nn + 1
-                coorPerRootLeaf(nn) = i
-             endif
-          enddo
-
-       else test2Containment
-
-          ! Minimum distance search. Store the nodes which have a
-          ! smaller possible minimum distance squared than the
-          ! currently  stored value.
-
-          do i=1,nCoor
-             if(     coor(1,i) < rootBBoxes(1,1,j)) then
-                dx =  coor(1,i) - rootBBoxes(1,1,j)
-             else if(coor(1,i) > rootBBoxes(1,2,j)) then
-                dx =  coor(1,i) - rootBBoxes(1,2,j)
-             else
-                dx = zero
-             endif
-
-             if(     coor(2,i) < rootBBoxes(2,1,j)) then
-                dy =  coor(2,i) - rootBBoxes(2,1,j)
-             else if(coor(2,i) > rootBBoxes(2,2,j)) then
-                dy =  coor(2,i) - rootBBoxes(2,2,j)
-             else
-                dy = zero
-             endif
-
-             if(     coor(3,i) < rootBBoxes(3,1,j)) then
-                dz =  coor(3,i) - rootBBoxes(3,1,j)
-             else if(coor(3,i) > rootBBoxes(3,2,j)) then
-                dz =  coor(3,i) - rootBBoxes(3,2,j)
-             else
-                dz = zero
-             endif
-
-             d2 = dx*dx + dy*dy + dz*dz
-             if(d2 < dist2(i)) then
-                nn = nn + 1
-                coorPerRootLeaf(nn) = i
-             endif
-          enddo
-
-       endif test2Containment
-
-    enddo loop2RootLeaves
-    !
-    !        Determine for every tree the number of coordinates from
-    !        other processors it should search and from that information
-    !        the number of rounds in the outer loop of the search
-    !        algorithm in search.
-    !
-    ! Allocate the memory for some help arrays.
-
-    allocate(procRecv(nProcs-1),       nCoorProcRecv(nProcs-1),   &
-         nCoorPerProc(0:nProcs-1), nCoorFromProc(0:nProcs-1), &
-         stat=ierr)
-    if(ierr /= 0)                         &
-         call adtTerminate(ADT, "initSearch", &
-         "Memory allocation failure for help arrays.")
-
-    ! Determine the number of coordinates I want to be searched in
-    ! the trees of other processors. Store the number I have to
-    ! search in my own tree and set it to 0 afterwards; the local
-    ! interpolations are handled differently.
-
-    do i=0,(nProcs-1)
-       nCoorPerProc(i) = nCoorPerRootLeaf(i+1) - nCoorPerRootLeaf(i)
-    enddo
-
-    nLocalInterpol     = nCoorPerProc(myID)
-    nCoorPerProc(myID) = 0
-
-    ! Communicate these numbers to the other processors.
-
-    call mpi_alltoall(nCoorPerProc,  1, adflow_integer, &
-         nCoorFromProc, 1, adflow_integer, comm, ierr)
-
-    ! Determine the total number of coordinates I have to interpolate
-    ! and the processor ID's I will receive coordinates from.
-
-    nn = 0
-    nProcRecv = 0
-
-    do i=0,(nProcs-1)
-       if(nCoorFromProc(i) > 0) then
-          nProcRecv = nProcRecv + 1
-          procRecv(nProcRecv) = i
-          nn = nn + nCoorFromProc(i)
-       endif
-    enddo
-
-    ! Determine the number of rounds in the outer loop of the
-    ! interpolation algorithm.
-
-    i = real(nn,realType)/real(nCoorMax,realType)
-    if(i*nCoorMax < nn) i = i + 1
-    i = max(i, 1_intType)
-
-    call mpi_allreduce(i, nRounds, 1, adflow_integer, mpi_max, &
-         comm, ierr)
-
-    ! Modify the sequence of procRecv, such that number of
-    ! coordinates sent from processors during a certain round
-    ! is more balanced.
-
-    nn = max(nRootLeaves,1_intType)
-    j  = (real(myEntryInRootProcs,realType)/real(nn,realType)) &
-         * nProcRecv
-
-    do i=1,nProcRecv
-       j = j+1
-       if(j > nProcRecv) j = 1
-
-       nn = procRecv(i)
-       procRecv(i) = procRecv(j)
-       procRecv(j) = nn
-    enddo
-
-    ! Store the number of coordinates to be received in
-    ! nCoorProcRecv.
-
-    do i=1,nProcRecv
-       nCoorProcRecv(i) = nCoorFromProc(procRecv(i))
-    enddo
-
-    ! Release the memory of the local help arrays.
-
-    deallocate(nCoorPerProc, nCoorFromProc, stat=ierr)
-    if(ierr /= 0)                         &
-         call adtTerminate(ADT, "initSearch", &
-         "Deallocation failure for nCoorPerProc and &
-         &nCoorFromProc")
-
-  end subroutine initSearch
-
-
-  subroutine minDistanceSearch(nCoor,       coor,      &
-       adtID,       procID,    &
-       elementType, elementID, &
-       uvw,         dist2,     &
-       nInterpol,   arrDonor,  &
-       arrInterpol)
-    !
-    !        This routine attempts for every coordinate to find the
-    !        element in the given ADT which minimizes the distance to
-    !        this point.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nCoor: Number of coordinates for which the element must be
-    !               determined.
-    !        coor:  The coordinates of these points.
-    !        adtID: The ADT to be searched.
-    !        nInterpol: Number of variables to be interpolated.
-    !        arrDonor:  Array with the donor data; needed to obtain the
-    !                   interpolated data.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        procID:      The ID of the processor in the group of the ADT
-    !                     where the element containing the point is
-    !                     stored. If no element is found for a given
-    !                     point the corresponding entry in procID is set
-    !                     to -1 to indicate failure. Remember that the
-    !                     processor ID's start at 0 and not at 1.
-    !        elementType: The type of element which contains the point.
-    !        elementID:   The entry in the connectivity of this element
-    !                     which contains the point. The ID is negative if
-    !                     the coordinate is outside the element.
-    !        uvw:         The parametric coordinates of the point in the
-    !                     transformed element; this transformation is
-    !                     such that every element is transformed into a
-    !                     standard element in parametric space. The u, v
-    !                     and w coordinates can be used to determine the
-    !                     actual interpolation weights. If the tree
-    !                     corresponds to a surface mesh the third entry
-    !                     of this array will not be filled.
-    !        arrInterpol: Array with the interpolated data.
-    !        Subroutine intent(inout) arguments.
-    !        -----------------------------------
-    !        dist2: Minimum distance squared of the coordinates to the
-    !               elements of the ADT. On input it should be
-    !               initialized by the calling program, possibly to a
-    !               large value. In this way it is possible to handle
-    !               periodic problems as efficiently as possible.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nCoor, nInterpol
-    character(len=*),         intent(in) :: adtID
-
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-    real(kind=realType), dimension(:,:), intent(in) :: arrDonor
-
-    integer,                  dimension(:), intent(out) :: procID
-    integer(kind=intType), dimension(:), intent(out) :: elementID
-
-    integer(kind=adtElementType), dimension(:), intent(out) :: &
-         elementType
-
-    real(kind=realType), dimension(:,:), intent(out) :: uvw
-    real(kind=realType), dimension(:,:), intent(out) :: arrInterpol
-
-    real(kind=realType), dimension(:), intent(inout) :: dist2
-    !
-    !       Local variables.
-    !
-    integer(kind=intType) :: jj, nAlloc
-
-    type(adtType), pointer :: ADT
-
-    ! Determine the index in the array ADTs, which stores the given
-    ! ID. As the number of trees stored is limited, a linear search
-    ! algorithm is okay.
-
-    nAlloc = ubound(ADTs, 1)
-    do jj=1,nAlloc
-       if(adtID == ADTs(jj)%adtID) exit
-    enddo
-
-    ! Check if the ADT to be searched exists. If not stop.
-    ! Note that adtTerminate is not called. The reason is that the
-    ! processor ID is not known.
-
-    if(jj > nAlloc) stop "ADT to be searched does not exist."
-
-    ! Set pointer to the ADT we will be working with
-
-    ADT => ADTs(jj)
-
-    ! Initialize the search, i.e. determine the number of
-    ! coordinates to be searched in each of the local ADT's.
-
-    call initSearch(nCoor, coor, dist2, ADT, .false.)
-
-    ! Perform the actual search.
-
-    call search(nCoor,     coor,      procID,   elementType, &
-         elementID, uvw,       dist2,    ADT,         &
-         .false.,   nInterpol, arrDonor, arrInterpol)
-
-    ! Negate the elementID if the coordinate is outside the element,
-    ! i.e. if the distance is larger than zero.
-
-    do jj=1,nCoor
-       if(dist2(jj) > zero) elementID(jj) = -elementID(jj)
-    enddo
-
-  end subroutine minDistanceSearch
-
-
-  subroutine search(nCoor,       coor,      procID,            &
-       elementType, elementID, uvw,               &
-       dist2,       ADT,       containmentSearch, &
-       nInterpol,   arrDonor,  arrInterpol)
-    !
-    !        This routine implements the parallel part of the search
-    !        algorithm and calls the appropriate local tree searches.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nCoor:             Number of local coordinates for which the
-    !                           element must be determined.
-    !        coor:              The coordinates of these points.
-    !        ADT:                ADT type whose ADT must be searched
-    !        containmentSearch: Whether or not a containment search must
-    !                           be performed. If not a minimum distance
-    !                           search algorithm is used, which is more
-    !                           expensive.
-    !        nInterpol:         Number of variables to be interpolated.
-    !        arrDonor:          Array with the donor data; needed to
-    !                           obtain the interpolated data.
-    !        Subroutine intent(inout) arguments.
-    !        -----------------------------------
-    !        dist2: Minimum distance squared of the coordinates to the
-    !               elements of the ADT. On input it contains the
-    !               guarenteed distance squared to one of the root
-    !               leaves. On output it contains the distance squared to
-    !               the nearest element of the global tree. It is only
-    !               used for a minimum distance search.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        procID:      The ID of the processor in the group of the ADT
-    !                     where the element containing the point is
-    !                     stored.
-    !        elementType: The type of element which contains the point or
-    !                     minimizes the distance to the point.
-    !        elementID:   The entry in the connectivity of this element.
-    !        uvw:         The parametric coordinates of (the projection
-    !                     of) the point in the transformed element; this
-    !                     transformation is such that every element is
-    !                     transformed into a standard element in
-    !                     parametric space. The u, v and w coordinates
-    !                     can be used to determine the actual
-    !                     interpolation weights.
-    !        arrInterpol: Array with the interpolated data.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    type(adtType), intent(inout) :: ADT
-    integer(kind=intType), intent(in) :: nCoor
-    integer(kind=intType), intent(in) :: nInterpol
-
-    real(kind=realType), dimension(:,:), intent(in) :: coor
-    real(kind=realType), dimension(:),   intent(inout) :: dist2
-
-    real(kind=realType), dimension(:,:), intent(in) :: arrDonor
+contains
 
-    integer,                  dimension(:), intent(out) :: procID
-    integer(kind=intType), dimension(:), intent(out) :: elementID
+    !===============================================================
+
+    subroutine containmentSearch(nCoor, coor, &
+                                 adtID, procID, &
+                                 elementType, elementID, &
+                                 uvw, nInterpol, &
+                                 arrDonor, arrInterpol)
+        !
+        !        This routine attempts for every coordinate to find the
+        !        element in the given ADT, which contains that coordinate.
+        !        If no element is found the corresponding entry in procID is
+        !        set to -1 to indicate failure.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nCoor:     Number of coordinates for which the element must
+        !                   be determined.
+        !        coor:      The coordinates of these points.
+        !        adtID:     The ADT to be searched.
+        !        nInterpol: Number of variables to be interpolated.
+        !        arrDonor:  Array with the donor data; needed to obtain the
+        !                   interpolated data.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        procID:      The ID of the processor in the group of the ADT
+        !                     where the element containing the point is
+        !                     stored. If no element is found for a given
+        !                     point the corresponding entry in procID is set
+        !                     to -1 to indicate failure. Remember that the
+        !                     processor ID's start at 0 and not at 1.
+        !        elementType: The type of element which contains the point.
+        !        elementID:   The entry in the connectivity of this element
+        !                     which contains the point.
+        !        uvw:         The parametric coordinates of the point in the
+        !                     transformed element; this transformation is
+        !                     such that every element is transformed into a
+        !                     standard element in parametric space. The u, v
+        !                     and w coordinates can be used to determine the
+        !                     actual interpolation weights.
+        !        arrInterpol: Array with the interpolated data.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nCoor, nInterpol
+        character(len=*), intent(in) :: adtID
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+        real(kind=realType), dimension(:, :), intent(in) :: arrDonor
+
+        integer, dimension(:), intent(out) :: procID
+        integer(kind=intType), dimension(:), intent(out) :: elementID
+
+        integer(kind=adtElementType), dimension(:), intent(out) :: &
+            elementType
+        real(kind=realType), dimension(:, :), intent(out) :: uvw
+        real(kind=realType), dimension(:, :), intent(out) :: arrInterpol
+        !
+        !       Local variables.
+        !
+        integer :: ierr
+
+        type(adtType), pointer :: ADT
+
+        integer(kind=intType) :: jj, nAlloc
+
+        real(kind=realType), dimension(1) :: dummy
+
+        ! Determine the index in the array ADTs, which stores the given
+        ! ID. As the number of trees stored is limited, a linear search
+        ! algorithm is okay.
+
+        nAlloc = ubound(ADTs, 1)
+        do jj = 1, nAlloc
+            if (adtID == ADTs(jj)%adtID) exit
+        end do
+
+        ! Check if the ADT to be searched exists. If not stop.
+        ! Note that adtTerminate is not called. The reason is that the
+        ! processor ID is not known.
+
+        if (jj > nAlloc) stop "ADT to be searched does not exist."
+
+        ! Set pointer to the ADT we will be working with
+
+        ADT => ADTs(jj)
+
+        ! Check if the ADT corresponds to a volume grid. If not terminate.
+
+        if (ADT%adtType /= adtVolumeADT) then
+            if (ADT%myID == 0) &
+                call adtTerminate(ADT, "containmentSearch", &
+                                  "ADT does not contain a volume mesh.")
+            call mpi_barrier(ADT%comm, ierr)
+        end if
+
+        ! Initialize the search, i.e. determine the number of
+        ! coordinates to be searched in each of the local ADT's.
+
+        call initSearch(nCoor, coor, dummy, ADT, .true.)
+
+        ! Perform the actual search.
+
+        call search(nCoor, coor, procID, elementType, &
+                    elementID, uvw, dummy, ADT, &
+                    .true., nInterpol, arrDonor, arrInterpol)
+
+    end subroutine containmentSearch
+
+    subroutine failSafeSearch(nCoor, coor, &
+                              adtID, procID, &
+                              elementType, elementID, &
+                              uvw, dist2, &
+                              nInterpol, arrDonor, &
+                              arrInterpol)
+        !
+        !        This routine attempts for every coordinate to find the
+        !        element in the given ADT, which contains that coordinate.
+        !        If no element is found a minimum distance search is
+        !        performed, such that always an interpolation can be
+        !        performed. To indicate that the element does not contain the
+        !        point the element ID is negated.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nCoor: Number of coordinates for which the element must be
+        !               determined.
+        !        coor:  The coordinates of these points.
+        !        adtID: The ADT to be searched.
+        !        nInterpol: Number of variables to be interpolated.
+        !        arrDonor:  Array with the donor data; needed to obtain the
+        !                   interpolated data.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        procID:      The ID of the processor in the group of the ADT
+        !                     where the element containing the point is
+        !                     stored. If no element is found for a given
+        !                     point the corresponding entry in procID is set
+        !                     to -1 to indicate failure. Remember that the
+        !                     processor ID's start at 0 and not at 1.
+        !        elementType: The type of element which contains the point.
+        !        elementID:   The entry in the connectivity of this element
+        !                     which contains the point. The ID is negative if
+        !                     the coordinate is outside the element, i.e. if
+        !                     a minimum distance search had to be used.
+        !        uvw:         The parametric coordinates of the point in the
+        !                     transformed element; this transformation is
+        !                     such that every element is transformed into a
+        !                     standard element in parametric space. The u, v
+        !                     and w coordinates can be used to determine the
+        !                     actual interpolation weights.
+        !        arrInterpol: Array with the interpolated data.
+        !        Subroutine intent(inout) arguments.
+        !        -----------------------------------
+        !        dist2: Minimum distance squared of the coordinates to the
+        !               elements of the ADT. On input it should be
+        !               initialized by the calling program, possibly to a
+        !               large value. In this way it is possible to handle
+        !               periodic problems as efficiently as possible.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nCoor, nInterpol
+        character(len=*), intent(in) :: adtID
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+        real(kind=realType), dimension(:, :), intent(in) :: arrDonor
+
+        integer, dimension(:), intent(out) :: procID
+        integer(kind=intType), dimension(:), intent(out) :: elementID
+
+        integer(kind=adtElementType), dimension(:), intent(out) :: &
+            elementType
+
+        real(kind=realType), dimension(:, :), intent(out) :: uvw
+        real(kind=realType), dimension(:, :), intent(out) :: arrInterpol
+
+        real(kind=realType), dimension(:), intent(inout) :: dist2
+        !
+        !       Local variables.
+        !
+        integer :: ierr
+        integer, dimension(:), allocatable :: tmpProcID
+
+        type(adtType), pointer :: ADT
+
+        integer(kind=intType) :: i, j, ii, jj, nAlloc, nFail
+        integer(kind=intType), dimension(:), allocatable :: tmpElementID
+        integer(kind=intType), dimension(:), allocatable :: coorIDs
+
+        integer(kind=adtElementType), dimension(:), allocatable :: &
+            tmpElementType
+
+        real(kind=realType), dimension(1) :: dummy
+
+        real(kind=realType), dimension(:), allocatable :: tmpDist2
+        real(kind=realType), dimension(:, :), allocatable :: tmpCoor
+        real(kind=realType), dimension(:, :), allocatable :: tmpUVW
+        real(kind=realType), dimension(:, :), allocatable :: tmpArrInt
+
+        ! Determine the index in the array ADTs, which stores the given
+        ! ID. As the number of trees stored is limited, a linear search
+        ! algorithm is okay.
+
+        nAlloc = ubound(ADTs, 1)
+        do jj = 1, nAlloc
+            if (adtID == ADTs(jj)%adtID) exit
+        end do
+
+        ! Check if the ADT to be searched exists. If not stop.
+        ! Note that adtTerminate is not called. The reason is that the
+        ! processor ID is not known.
+
+        if (jj > nAlloc) stop "ADT to be searched does not exist."
+
+        ! Set pointer to the ADT we will be working with
+        ADT => ADTs(jj)
+
+        ! Check if the ADT corresponds to a volume grid.
+        ! If not terminate.
+
+        if (ADT%adtType /= adtVolumeADT) then
+            if (ADT%myID == 0) &
+                call adtTerminate(ADT, "failSafeSearch", &
+                                  "ADT does not contain a volume mesh.")
+            call mpi_barrier(ADT%comm, ierr)
+        end if
+
+        ! Perform the containment search.
+
+        call initSearch(nCoor, coor, dummy, ADT, .true.)
+
+        call search(nCoor, coor, procID, elementType, &
+                    elementID, uvw, dummy, ADT, &
+                    .true., nInterpol, arrDonor, arrInterpol)
+
+        ! Determine the number of coordinates for which the containment
+        ! search failed. Set for the other coordinates the distance to
+        ! zero.
+
+        nFail = 0
+        do i = 1, nCoor
+            if (procID(i) == -1) then
+                nFail = nFail + 1
+            else
+                dist2(i) = zero
+            end if
+        end do
+
+        ! Determine the global sum of nFail, which is stored in ii.
+
+        call mpi_allreduce(nFail, ii, 1, adflow_integer, mpi_max, &
+                           ADT%comm, ierr)
+
+        ! Return if ii == 0, because the minimum distance search
+        ! is not needed.
+
+        if (ii == 0) return
+
+        ! Allocate the memory for the arrays needed for the minimum
+        ! distance search.
+
+        allocate (tmpCoor(3, nFail), tmpProcID(nFail), &
+                  tmpElementType(nFail), tmpElementID(nFail), &
+                  tmpUVW(3, nFail), tmpDist2(nFail), &
+                  coorIDs(nFail), tmpArrInt(nInterpol, nFail), &
+                  stat=ierr)
+        if (ierr /= 0) &
+             call adtTerminate(ADT, "failSafeSearch", &
+             "Memory allocation failure for the arrays &
+             &for the minimum distance search.")
+
+        ! Store the information needed for minimum distance search in
+        ! the arrays tmpCoor, tmpDist2 and coorIDs.
+
+        ii = 0
+        do i = 1, nCoor
+            if (procID(i) == -1) then
+                ii = ii + 1
+
+                tmpCoor(1, ii) = coor(1, i)
+                tmpCoor(2, ii) = coor(2, i)
+                tmpCoor(3, ii) = coor(3, i)
+                tmpDist2(ii) = dist2(i)
+                coorIDs(ii) = i
+            end if
+        end do
+
+        ! Perform the minimum distance search for the coordinates for
+        ! which the containment search failed.
+
+        call initSearch(nFail, tmpCoor, tmpDist2, ADT, .false.)
+
+        call search(nFail, tmpCoor, tmpProcID, tmpElementType, &
+                    tmpElementID, tmpUVW, tmpDist2, ADT, &
+                    .false., nInterpol, arrDonor, tmpArrInt)
+
+        ! Copy for the successful searches the data into the arrays to
+        ! be returned by this subroutine. Note that the element IDs are
+        ! negated to indicate that the coordinate is outside the element.
+
+        do i = 1, nFail
+            if (tmpProcID(i) /= -1) then
+                ii = coorIDs(i)
+
+                procID(ii) = tmpProcID(i)
+                elementType(ii) = tmpElementType(i)
+                elementID(ii) = -tmpElementID(i)
+                uvw(1, ii) = tmpUVW(1, i)
+                uvw(2, ii) = tmpUVW(2, i)
+                uvw(3, ii) = tmpUVW(3, i)
+                dist2(ii) = tmpDist2(i)
+
+                do j = 1, nInterpol
+                    arrInterpol(j, ii) = tmpArrInt(j, i)
+                end do
+            end if
+        end do
+
+        ! Release the memory used in the minimum distance search.
+
+        deallocate (tmpCoor, tmpProcID, tmpElementType, tmpElementID, &
+                    tmpUVW, tmpDist2, coorIDs, tmpArrInt, &
+                    stat=ierr)
+        if (ierr /= 0) &
+             call adtTerminate(ADT, "failSafeSearch", &
+             "Deallocation failure for the arrays &
+             &for the minimum distance search.")
+
+    end subroutine failSafeSearch
+
+    subroutine initSearch(nCoor, coor, dist2, ADT, containmentSearch)
+        !
+        !        This routine performs the initialization tasks before the
+        !        actual search takes place. It determines the number and the
+        !        ID's of the coordinates every local tree may have to
+        !        interpolate. From this info this routine also determines the
+        !        number of rounds needed in the actual algorithm to avoid a
+        !        memory bottleneck.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nCoor:             Number of local coordinates for which the
+        !                           element must be determined.
+        !        coor:              The coordinates of these points.
+        !        ADT:               ADT type whose ADT must be searched
+        !        containmentSearch: Whether or not a containment search must
+        !                           be performed. If not a minimum distance
+        !                           search algorithm is used, which is more
+        !                           expensive.
+        !        Subroutine intent(inout) arguments.
+        !        -----------------------------------
+        !        dist2: Guaranteed minimum distance squared of the
+        !               coordinates to the elements of the ADT. This array
+        !               should be initialized by the calling routine,
+        !               possibly to a large value. It is only used for a
+        !               minimum distance search.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        type(adtType), intent(inout) :: ADT
+        integer(kind=intType), intent(in) :: nCoor
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+        real(kind=realType), dimension(:), intent(inout) :: dist2
+
+        logical, intent(in) :: containmentSearch
+        !
+        !       Local variables.
+        !
+        integer :: ierr, nRootLeaves, comm, nProcs, myID
+        integer :: myEntryInRootProcs
+
+        integer, dimension(:), pointer :: rootLeavesProcs
+
+        integer(kind=intType) :: i, j, k, mm, nn
+
+        integer(kind=intType), dimension(:), allocatable :: nCoorPerProc
+        integer(kind=intType), dimension(:), allocatable :: nCoorFromProc
+
+        real(kind=realType) :: d1, d2, dx, dy, dz
+
+        real(kind=realType), dimension(:, :, :), pointer :: rootBBoxes
+
+        ! Set some pointers to make the code more readable.
+
+        nRootLeaves = ADT%nRootLeaves
+        myEntryInRootProcs = ADT%myEntryInRootProcs
+        rootLeavesProcs => ADT%rootLeavesProcs
+        rootBBoxes => ADT%rootBBoxes
+
+        comm = ADT%comm
+        nProcs = ADT%nProcs
+        myID = ADT%myID
+
+        ! Determine the global maximum of nCoor. This number will serve
+        ! as an upper bound for the number of points to be searched
+        ! during a round.
+
+        call mpi_allreduce(nCoor, nCoorMax, 1, adflow_integer, mpi_max, &
+                           comm, ierr)
+        nCoorMax = max(nCoorMax, nCoorMaxLowerLimit)
+        !
+        !        Determine for every root leaf the local coordinates, which
+        !        should be searched in the corresponding ADT. The criterion
+        !        for a containment search is of course different from a
+        !        minimum distance search and therefore a distinction must be
+        !        made between the two methods.
+        !
+        ! Allocate the memory for nCoorPerRootLeaf and mCoorPerRootLeaf.
+        ! Initially the latter is a copy of the former, but its data
+        ! will change in the outer loop in the iterative algorithm, see
+        ! adtSearch. The numbering starts at 0, because these arrays
+        ! will be cumulative storage format arrays.
+
+        allocate (nCoorPerRootLeaf(0:nProcs), &
+                  mCoorPerRootLeaf(0:nProcs), stat=ierr)
+        if (ierr /= 0) &
+             call adtTerminate(ADT, "initSearch", &
+             "Memory allocation failure for &
+             &nCoorPerRootLeaf and mCoorPerRootLeaf.")
+
+        ! Determine for a minimum distance search the guaranteed minimum
+        ! distance squared to each of the root leaves and store the
+        ! absolute minimum.
+
+        testMinDistance: if (.not. containmentSearch) then
+
+            do j = 1, nRootLeaves
+                do i = 1, nCoor
+                    d1 = abs(coor(1, i) - rootBBoxes(1, 1, j))
+                    d2 = abs(coor(1, i) - rootBBoxes(1, 2, j))
+                    dx = max(d1, d2)
+
+                    d1 = abs(coor(2, i) - rootBBoxes(2, 1, j))
+                    d2 = abs(coor(2, i) - rootBBoxes(2, 2, j))
+                    dy = max(d1, d2)
+
+                    d1 = abs(coor(3, i) - rootBBoxes(3, 1, j))
+                    d2 = abs(coor(3, i) - rootBBoxes(3, 2, j))
+                    dz = max(d1, d2)
+
+                    d2 = dx * dx + dy * dy + dz * dz
+                    dist2(i) = min(dist2(i), d2)
+                end do
+            end do
+
+        end if testMinDistance
+
+        ! Determine the local number of coordinates to be searched in
+        ! each of the local trees, i.e. the array nCoorPerRootLeaf. Note
+        ! that the processor storing the ADT stores this number rather
+        ! than the root leaf. Furthermore nCoorPerRootLeaf is stored in
+        ! cumulative storage format and therefore an outer loop over the
+        ! number of root leaves is the most logical thing to do.
+
+        nCoorPerRootLeaf(0) = 0
+        mm = 0
+
+        loop1RootLeaves: do j = 1, nRootLeaves
+
+            ! Determine the next processor which stores a tree; initialize
+            ! in the same loop nCoorPerRootLeaf of the processors in
+            ! between. The counter mm contains the entry in
+            ! nCoorPerRootLeaf where the number is stored. Remember that
+            ! the processor ID's start at 0.
+
+            do k = (mm + 1), (rootLeavesProcs(j) + 1)
+                nCoorPerRootLeaf(k) = nCoorPerRootLeaf(mm)
+            end do
+
+            mm = rootLeavesProcs(j) + 1
+
+            ! Make a distinction between a containment and a
+            ! minimum distance search.
+
+            test1Containment: if (containmentSearch) then
+
+                ! Containment search. Loop over the local number of
+                ! coordinates and check if the coordinates are within the
+                ! bounding box of the current root leaf. If so, update
+                ! the counter.
+
+                do i = 1, nCoor
+                    if (coor(1, i) >= rootBBoxes(1, 1, j) .and. &
+                        coor(1, i) <= rootBBoxes(1, 2, j) .and. &
+                        coor(2, i) >= rootBBoxes(2, 1, j) .and. &
+                        coor(2, i) <= rootBBoxes(2, 2, j) .and. &
+                        coor(3, i) >= rootBBoxes(3, 1, j) .and. &
+                        coor(3, i) <= rootBBoxes(3, 2, j)) &
+                        nCoorPerRootLeaf(mm) = nCoorPerRootLeaf(mm) + 1
+                end do
+
+                else test1Containment
+
+                ! Minimum distance search. Loop over the local number of
+                ! coordinates and determine the possible minimum distance
+                ! squared to the bounding box of the current root leaf.
+                ! If less than the currently stored guaranteed minimum
+                ! distance squared, update the counter.
+
+                do i = 1, nCoor
+                    if (coor(1, i) < rootBBoxes(1, 1, j)) then
+                        dx = coor(1, i) - rootBBoxes(1, 1, j)
+                    else if (coor(1, i) > rootBBoxes(1, 2, j)) then
+                        dx = coor(1, i) - rootBBoxes(1, 2, j)
+                    else
+                        dx = zero
+                    end if
+
+                    if (coor(2, i) < rootBBoxes(2, 1, j)) then
+                        dy = coor(2, i) - rootBBoxes(2, 1, j)
+                    else if (coor(2, i) > rootBBoxes(2, 2, j)) then
+                        dy = coor(2, i) - rootBBoxes(2, 2, j)
+                    else
+                        dy = zero
+                    end if
+
+                    if (coor(3, i) < rootBBoxes(3, 1, j)) then
+                        dz = coor(3, i) - rootBBoxes(3, 1, j)
+                    else if (coor(3, i) > rootBBoxes(3, 2, j)) then
+                        dz = coor(3, i) - rootBBoxes(3, 2, j)
+                    else
+                        dz = zero
+                    end if
+
+                    d2 = dx * dx + dy * dy + dz * dz
+                    if (d2 < dist2(i)) &
+                        nCoorPerRootLeaf(mm) = nCoorPerRootLeaf(mm) + 1
+
+                end do
+
+            end if test1Containment
+
+        end do loop1RootLeaves
+
+        ! Fill the rest of the array nCoorPerRootLeaf.
+
+        do k = (mm + 1), nProcs
+            nCoorPerRootLeaf(k) = nCoorPerRootLeaf(mm)
+        end do
+
+        ! Copy nCoorPerRootLeaf to mCoorPerRootLeaf.
+
+        do j = 0, nProcs
+            mCoorPerRootLeaf(j) = nCoorPerRootLeaf(j)
+        end do
+
+        ! Allocate the memory for coorPerRootLeaf.
+
+        nn = nCoorPerRootLeaf(nProcs)
+        allocate (coorPerRootLeaf(nn), stat=ierr)
+        if (ierr /= 0) &
+             call adtTerminate(ADT, "initSearch", &
+             "Memory allocation failure for &
+             &coorPerRootLeaf.")
+
+        ! Repeat the loop over the number of root leaves, but now store
+        ! the ID's of the local coordinates.
+
+        nn = 0
+        loop2RootLeaves: do j = 1, nRootLeaves
+
+            ! Make a distinction between a containment and a
+            ! minimum distance search.
+
+            test2Containment: if (containmentSearch) then
+
+                ! Containment search. Store the nodes which are within the
+                ! bounding box of the root leaf.
+
+                do i = 1, nCoor
+                    if (coor(1, i) >= rootBBoxes(1, 1, j) .and. &
+                        coor(1, i) <= rootBBoxes(1, 2, j) .and. &
+                        coor(2, i) >= rootBBoxes(2, 1, j) .and. &
+                        coor(2, i) <= rootBBoxes(2, 2, j) .and. &
+                        coor(3, i) >= rootBBoxes(3, 1, j) .and. &
+                        coor(3, i) <= rootBBoxes(3, 2, j)) then
+                        nn = nn + 1
+                        coorPerRootLeaf(nn) = i
+                    end if
+                end do
+
+                else test2Containment
+
+                ! Minimum distance search. Store the nodes which have a
+                ! smaller possible minimum distance squared than the
+                ! currently  stored value.
+
+                do i = 1, nCoor
+                    if (coor(1, i) < rootBBoxes(1, 1, j)) then
+                        dx = coor(1, i) - rootBBoxes(1, 1, j)
+                    else if (coor(1, i) > rootBBoxes(1, 2, j)) then
+                        dx = coor(1, i) - rootBBoxes(1, 2, j)
+                    else
+                        dx = zero
+                    end if
+
+                    if (coor(2, i) < rootBBoxes(2, 1, j)) then
+                        dy = coor(2, i) - rootBBoxes(2, 1, j)
+                    else if (coor(2, i) > rootBBoxes(2, 2, j)) then
+                        dy = coor(2, i) - rootBBoxes(2, 2, j)
+                    else
+                        dy = zero
+                    end if
+
+                    if (coor(3, i) < rootBBoxes(3, 1, j)) then
+                        dz = coor(3, i) - rootBBoxes(3, 1, j)
+                    else if (coor(3, i) > rootBBoxes(3, 2, j)) then
+                        dz = coor(3, i) - rootBBoxes(3, 2, j)
+                    else
+                        dz = zero
+                    end if
+
+                    d2 = dx * dx + dy * dy + dz * dz
+                    if (d2 < dist2(i)) then
+                        nn = nn + 1
+                        coorPerRootLeaf(nn) = i
+                    end if
+                end do
+
+            end if test2Containment
+
+        end do loop2RootLeaves
+        !
+        !        Determine for every tree the number of coordinates from
+        !        other processors it should search and from that information
+        !        the number of rounds in the outer loop of the search
+        !        algorithm in search.
+        !
+        ! Allocate the memory for some help arrays.
+
+        allocate (procRecv(nProcs - 1), nCoorProcRecv(nProcs - 1), &
+                  nCoorPerProc(0:nProcs - 1), nCoorFromProc(0:nProcs - 1), &
+                  stat=ierr)
+        if (ierr /= 0) &
+            call adtTerminate(ADT, "initSearch", &
+                              "Memory allocation failure for help arrays.")
+
+        ! Determine the number of coordinates I want to be searched in
+        ! the trees of other processors. Store the number I have to
+        ! search in my own tree and set it to 0 afterwards; the local
+        ! interpolations are handled differently.
+
+        do i = 0, (nProcs - 1)
+            nCoorPerProc(i) = nCoorPerRootLeaf(i + 1) - nCoorPerRootLeaf(i)
+        end do
+
+        nLocalInterpol = nCoorPerProc(myID)
+        nCoorPerProc(myID) = 0
+
+        ! Communicate these numbers to the other processors.
+
+        call mpi_alltoall(nCoorPerProc, 1, adflow_integer, &
+                          nCoorFromProc, 1, adflow_integer, comm, ierr)
+
+        ! Determine the total number of coordinates I have to interpolate
+        ! and the processor ID's I will receive coordinates from.
+
+        nn = 0
+        nProcRecv = 0
+
+        do i = 0, (nProcs - 1)
+            if (nCoorFromProc(i) > 0) then
+                nProcRecv = nProcRecv + 1
+                procRecv(nProcRecv) = i
+                nn = nn + nCoorFromProc(i)
+            end if
+        end do
+
+        ! Determine the number of rounds in the outer loop of the
+        ! interpolation algorithm.
+
+        i = real(nn, realType) / real(nCoorMax, realType)
+        if (i * nCoorMax < nn) i = i + 1
+        i = max(i, 1_intType)
+
+        call mpi_allreduce(i, nRounds, 1, adflow_integer, mpi_max, &
+                           comm, ierr)
+
+        ! Modify the sequence of procRecv, such that number of
+        ! coordinates sent from processors during a certain round
+        ! is more balanced.
+
+        nn = max(nRootLeaves, 1_intType)
+        j = (real(myEntryInRootProcs, realType) / real(nn, realType)) &
+            * nProcRecv
+
+        do i = 1, nProcRecv
+            j = j + 1
+            if (j > nProcRecv) j = 1
+
+            nn = procRecv(i)
+            procRecv(i) = procRecv(j)
+            procRecv(j) = nn
+        end do
+
+        ! Store the number of coordinates to be received in
+        ! nCoorProcRecv.
+
+        do i = 1, nProcRecv
+            nCoorProcRecv(i) = nCoorFromProc(procRecv(i))
+        end do
+
+        ! Release the memory of the local help arrays.
+
+        deallocate (nCoorPerProc, nCoorFromProc, stat=ierr)
+        if (ierr /= 0) &
+             call adtTerminate(ADT, "initSearch", &
+             "Deallocation failure for nCoorPerProc and &
+             &nCoorFromProc")
+
+    end subroutine initSearch
+
+    subroutine minDistanceSearch(nCoor, coor, &
+                                 adtID, procID, &
+                                 elementType, elementID, &
+                                 uvw, dist2, &
+                                 nInterpol, arrDonor, &
+                                 arrInterpol)
+        !
+        !        This routine attempts for every coordinate to find the
+        !        element in the given ADT which minimizes the distance to
+        !        this point.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nCoor: Number of coordinates for which the element must be
+        !               determined.
+        !        coor:  The coordinates of these points.
+        !        adtID: The ADT to be searched.
+        !        nInterpol: Number of variables to be interpolated.
+        !        arrDonor:  Array with the donor data; needed to obtain the
+        !                   interpolated data.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        procID:      The ID of the processor in the group of the ADT
+        !                     where the element containing the point is
+        !                     stored. If no element is found for a given
+        !                     point the corresponding entry in procID is set
+        !                     to -1 to indicate failure. Remember that the
+        !                     processor ID's start at 0 and not at 1.
+        !        elementType: The type of element which contains the point.
+        !        elementID:   The entry in the connectivity of this element
+        !                     which contains the point. The ID is negative if
+        !                     the coordinate is outside the element.
+        !        uvw:         The parametric coordinates of the point in the
+        !                     transformed element; this transformation is
+        !                     such that every element is transformed into a
+        !                     standard element in parametric space. The u, v
+        !                     and w coordinates can be used to determine the
+        !                     actual interpolation weights. If the tree
+        !                     corresponds to a surface mesh the third entry
+        !                     of this array will not be filled.
+        !        arrInterpol: Array with the interpolated data.
+        !        Subroutine intent(inout) arguments.
+        !        -----------------------------------
+        !        dist2: Minimum distance squared of the coordinates to the
+        !               elements of the ADT. On input it should be
+        !               initialized by the calling program, possibly to a
+        !               large value. In this way it is possible to handle
+        !               periodic problems as efficiently as possible.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nCoor, nInterpol
+        character(len=*), intent(in) :: adtID
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+        real(kind=realType), dimension(:, :), intent(in) :: arrDonor
+
+        integer, dimension(:), intent(out) :: procID
+        integer(kind=intType), dimension(:), intent(out) :: elementID
+
+        integer(kind=adtElementType), dimension(:), intent(out) :: &
+            elementType
+
+        real(kind=realType), dimension(:, :), intent(out) :: uvw
+        real(kind=realType), dimension(:, :), intent(out) :: arrInterpol
+
+        real(kind=realType), dimension(:), intent(inout) :: dist2
+        !
+        !       Local variables.
+        !
+        integer(kind=intType) :: jj, nAlloc
+
+        type(adtType), pointer :: ADT
+
+        ! Determine the index in the array ADTs, which stores the given
+        ! ID. As the number of trees stored is limited, a linear search
+        ! algorithm is okay.
+
+        nAlloc = ubound(ADTs, 1)
+        do jj = 1, nAlloc
+            if (adtID == ADTs(jj)%adtID) exit
+        end do
+
+        ! Check if the ADT to be searched exists. If not stop.
+        ! Note that adtTerminate is not called. The reason is that the
+        ! processor ID is not known.
+
+        if (jj > nAlloc) stop "ADT to be searched does not exist."
+
+        ! Set pointer to the ADT we will be working with
+
+        ADT => ADTs(jj)
+
+        ! Initialize the search, i.e. determine the number of
+        ! coordinates to be searched in each of the local ADT's.
+
+        call initSearch(nCoor, coor, dist2, ADT, .false.)
+
+        ! Perform the actual search.
+
+        call search(nCoor, coor, procID, elementType, &
+                    elementID, uvw, dist2, ADT, &
+                    .false., nInterpol, arrDonor, arrInterpol)
+
+        ! Negate the elementID if the coordinate is outside the element,
+        ! i.e. if the distance is larger than zero.
+
+        do jj = 1, nCoor
+            if (dist2(jj) > zero) elementID(jj) = -elementID(jj)
+        end do
+
+    end subroutine minDistanceSearch
+
+    subroutine search(nCoor, coor, procID, &
+                      elementType, elementID, uvw, &
+                      dist2, ADT, containmentSearch, &
+                      nInterpol, arrDonor, arrInterpol)
+        !
+        !        This routine implements the parallel part of the search
+        !        algorithm and calls the appropriate local tree searches.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nCoor:             Number of local coordinates for which the
+        !                           element must be determined.
+        !        coor:              The coordinates of these points.
+        !        ADT:                ADT type whose ADT must be searched
+        !        containmentSearch: Whether or not a containment search must
+        !                           be performed. If not a minimum distance
+        !                           search algorithm is used, which is more
+        !                           expensive.
+        !        nInterpol:         Number of variables to be interpolated.
+        !        arrDonor:          Array with the donor data; needed to
+        !                           obtain the interpolated data.
+        !        Subroutine intent(inout) arguments.
+        !        -----------------------------------
+        !        dist2: Minimum distance squared of the coordinates to the
+        !               elements of the ADT. On input it contains the
+        !               guarenteed distance squared to one of the root
+        !               leaves. On output it contains the distance squared to
+        !               the nearest element of the global tree. It is only
+        !               used for a minimum distance search.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        procID:      The ID of the processor in the group of the ADT
+        !                     where the element containing the point is
+        !                     stored.
+        !        elementType: The type of element which contains the point or
+        !                     minimizes the distance to the point.
+        !        elementID:   The entry in the connectivity of this element.
+        !        uvw:         The parametric coordinates of (the projection
+        !                     of) the point in the transformed element; this
+        !                     transformation is such that every element is
+        !                     transformed into a standard element in
+        !                     parametric space. The u, v and w coordinates
+        !                     can be used to determine the actual
+        !                     interpolation weights.
+        !        arrInterpol: Array with the interpolated data.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        type(adtType), intent(inout) :: ADT
+        integer(kind=intType), intent(in) :: nCoor
+        integer(kind=intType), intent(in) :: nInterpol
+
+        real(kind=realType), dimension(:, :), intent(in) :: coor
+        real(kind=realType), dimension(:), intent(inout) :: dist2
+
+        real(kind=realType), dimension(:, :), intent(in) :: arrDonor
+
+        integer, dimension(:), intent(out) :: procID
+        integer(kind=intType), dimension(:), intent(out) :: elementID
+
+        integer(kind=adtElementType), dimension(:), intent(out) :: &
+            elementType
+
+        real(kind=realType), dimension(:, :), intent(out) :: uvw
+        real(kind=realType), dimension(:, :), intent(out) :: arrInterpol
+
+        logical, intent(in) :: containmentSearch
+        !
+        !       Local variables.
+        !
+        integer :: ierr
+        integer :: comm, nProcs, myID
+        integer :: nProcRecvCur, nProcSendCur, nVarCoor, nVarUVW
+        integer :: startProcRecv, procCur, sizeMessage
+
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer, dimension(:), allocatable :: procSendCur
+        integer, dimension(:), allocatable :: sendRequest
+        integer, dimension(:, :), allocatable :: sendRecvRequest
+
+        integer(kind=intType) :: i, j, k, k1, l, m, ii, mm, nn
+        integer(kind=intType) :: nLocalInterpolRound
+        integer(kind=intType) :: iStartLocal, iEndLocal, nCoorRecv
+
+        integer(kind=intType), dimension(:), allocatable :: nCoorPerProc
+        integer(kind=intType), dimension(:), allocatable :: nCoorFromProc
+
+        integer(kind=intType), dimension(:, :), allocatable :: intRecv
+        integer(kind=intType), dimension(:, :), allocatable :: intBuf
+
+        real(kind=realType), dimension(:, :), allocatable :: coorBuf
+        real(kind=realType), dimension(:, :), allocatable :: coorRecv
+        real(kind=realType), dimension(:, :), allocatable :: uvwRecv
+        real(kind=realType), dimension(:, :), allocatable :: uvwBuf
+
+        logical, dimension(:), allocatable :: coorRequested
+
+        ! Some abbreviations to make the code more readable.
+
+        comm = ADT%comm
+        nProcs = ADT%nProcs
+        myID = ADT%myID
+
+        ! Determine the number of variables stored in the coordinate
+        ! buffers. For a minimum distance search also the distance
+        ! squared is stored, i.e. 4 variables instead of 3.
+
+        if (containmentSearch) then
+            nVarCoor = 3
+        else
+            nVarCoor = 4
+        end if
+
+        ! And the size of the uvw buffers. These contain nVarCoor plus
+        ! the number of variables to be interpolated.
+
+        nVarUVW = nVarCoor + max(nInterpol, 0_intType)
+
+        ! Initialize procID to -1, which indicates failure.
+
+        do i = 1, nCoor
+            procID(i) = -1
+        end do
+
+        ! Allocate the memory for some help arrays used in the search
+        ! algorithm.
+
+        nn = nCoorPerRootLeaf(nProcs)
+        allocate (procSendCur(nProcs - 1), sendRequest(nProcs - 1), &
+                  nCoorPerProc(0:nProcs - 1), nCoorFromProc(0:nProcs - 1), &
+                  sendRecvRequest(2, nProcs - 1), coorRequested(nn), &
+                  stat=ierr)
+        if (ierr /= 0) &
+            call adtTerminate(ADT, "search", &
+                              "Memory allocation failure for help arrays.")
 
-    integer(kind=adtElementType), dimension(:), intent(out) :: &
-         elementType
+        ! Initialize coorRequested to .false. This indicates that
+        ! the corresponding entry in coorPerRootLeaf has not been
+        ! requested for interpolation.
 
-    real(kind=realType), dimension(:,:), intent(out) :: uvw
-    real(kind=realType), dimension(:,:), intent(out) :: arrInterpol
+        do j = 1, nn
+            coorRequested(j) = .false.
+        end do
 
-    logical, intent(in) :: containmentSearch
-    !
-    !       Local variables.
-    !
-    integer :: ierr
-    integer :: comm, nProcs, myID
-    integer :: nProcRecvCur, nProcSendCur, nVarCoor, nVarUVW
-    integer :: startProcRecv, procCur, sizeMessage
+        ! Initialize the starting position in the array procRecv to 1.
+        ! This variable indicates the starting position in procRecv for
+        ! the current round. Also initializes nCoorFromProc to 0.
 
-    integer, dimension(mpi_status_size) :: mpiStatus
+        startProcRecv = 1
 
-    integer, dimension(:),   allocatable :: procSendCur
-    integer, dimension(:),   allocatable :: sendRequest
-    integer, dimension(:,:), allocatable :: sendRecvRequest
+        do i = 0, (nProcs - 1)
+            nCoorFromProc(i) = 0
+        end do
 
-    integer(kind=intType) :: i, j, k, k1, l, m, ii, mm, nn
-    integer(kind=intType) :: nLocalInterpolRound
-    integer(kind=intType) :: iStartLocal, iEndLocal, nCoorRecv
+        ! Determine the number of local interpolations per round and
+        ! initialize the iStartLocal and iEndLocal, the start and end
+        ! indices for the local interpolation of the current round.
 
-    integer(kind=intType), dimension(:), allocatable :: nCoorPerProc
-    integer(kind=intType), dimension(:), allocatable :: nCoorFromProc
+        nn = nLocalInterpol / nRounds
+        if (nn * nRounds < nLocalInterpol) nn = nn + 1
+        nLocalInterpolRound = nn
 
-    integer(kind=intType), dimension(:,:), allocatable :: intRecv
-    integer(kind=intType), dimension(:,:), allocatable :: intBuf
+        iStartLocal = 0
+        iEndLocal = nLocalInterpolRound
+        !
+        !        Iterative algorithm to determine the elements containing the
+        !        coordinates or the elements which minimize the distance. The
+        !        algorithm consists of a synchronous outer loop over the
+        !        number of times the inner loop should be executed. This
+        !        inner loop is asynchronous and performs the actual ADT
+        !        search. The outer loop is present to avoid that too much
+        !        data is communicated to a single processor at once such that
+        !        a memory bottleneck occurs.
+        !
+        outerLoop: do mm = 1, nRounds
 
-    real(kind=realType), dimension(:,:), allocatable :: coorBuf
-    real(kind=realType), dimension(:,:), allocatable :: coorRecv
-    real(kind=realType), dimension(:,:), allocatable :: uvwRecv
-    real(kind=realType), dimension(:,:), allocatable :: uvwBuf
+            ! Determine the processors I want data from in this round
+            ! as well as the number of coordinates from these nodes.
 
-    logical, dimension(:), allocatable :: coorRequested
+            nProcRecvCur = 0
+            nCoorRecv = 0
 
-    ! Some abbreviations to make the code more readable.
+            do i = startProcRecv, nProcRecv
 
-    comm   = ADT%comm
-    nProcs = ADT%nProcs
-    myID   = ADT%myID
+                ! Exit the loop if the maximum number of nodes has been
+                ! reached.
 
-    ! Determine the number of variables stored in the coordinate
-    ! buffers. For a minimum distance search also the distance
-    ! squared is stored, i.e. 4 variables instead of 3.
+                if (nCoorRecv == nCoorMax) exit
 
-    if( containmentSearch ) then
-       nVarCoor = 3
-    else
-       nVarCoor = 4
-    endif
+                ! Update the number of processors from which I will receive
+                ! data during this round.
 
-    ! And the size of the uvw buffers. These contain nVarCoor plus
-    ! the number of variables to be interpolated.
+                nProcRecvCur = nProcRecvCur + 1
 
-    nVarUVW = nVarCoor + max(nInterpol,0_intType)
+                ! Determine the amount I can receive from this processor.
+                ! If the upper limit is exceeded cut it off.
 
-    ! Initialize procID to -1, which indicates failure.
+                j = nCoorProcRecv(i)
+                if ((nCoorRecv + j) > nCoorMax) j = nCoorMax - nCoorRecv
+                nCoorRecv = nCoorRecv + j
 
-    do i=1,nCoor
-       procID(i) = -1
-    enddo
+                ! Store the amount j in the appropriate place of
+                ! nCoorFromProc and determine the number of nodes to be
+                ! sent to this processor in the next round (possibly 0).
 
-    ! Allocate the memory for some help arrays used in the search
-    ! algorithm.
+                nCoorFromProc(procRecv(i)) = j
+                nCoorProcRecv(i) = nCoorProcRecv(i) - j
 
-    nn = nCoorPerRootLeaf(nProcs)
-    allocate(procSendCur(nProcs-1),       sendRequest(nProcs-1),     &
-         nCoorPerProc(0:nProcs-1),    nCoorFromProc(0:nProcs-1), &
-         sendRecvRequest(2,nProcs-1), coorRequested(nn),         &
-         stat=ierr)
-    if(ierr /= 0)                     &
-         call adtTerminate(ADT, "search", &
-         "Memory allocation failure for help arrays.")
+                ! Exit the loop if still some data should be received from
+                ! this processor in a next round. The difference between this
+                ! exit and the one in the beginning of the do loop is that
+                ! here the counter i is not update yet and therefore
+                ! startProcRecv will be set correctly for the next round.
 
-    ! Initialize coorRequested to .false. This indicates that
-    ! the corresponding entry in coorPerRootLeaf has not been
-    ! requested for interpolation.
+                if (nCoorProcRecv(i) > 0) exit
+            end do
 
-    do j=1,nn
-       coorRequested(j) = .false.
-    enddo
+            ! Do an all to all communication such that every processor
+            ! knows the amount of data it should send to other processors.
 
-    ! Initialize the starting position in the array procRecv to 1.
-    ! This variable indicates the starting position in procRecv for
-    ! the current round. Also initializes nCoorFromProc to 0.
+            call mpi_alltoall(nCoorFromProc, 1, adflow_integer, &
+                              nCoorPerProc, 1, adflow_integer, comm, ierr)
 
-    startProcRecv = 1
+            ! Set the non-zero entries of nCoorFromProc to zero again
+            ! for the next round.
 
-    do i=0,(nProcs-1)
-       nCoorFromProc(i) = 0
-    enddo
+            nn = min(i, nProcRecv)
+            do j = startProcRecv, nn
+                nCoorFromProc(procRecv(j)) = 0
+            end do
 
-    ! Determine the number of local interpolations per round and
-    ! initialize the iStartLocal and iEndLocal, the start and end
-    ! indices for the local interpolation of the current round.
+            ! Set the starting index for the next round.
 
-    nn = nLocalInterpol/nRounds
-    if(nn*nRounds < nLocalInterpol) nn = nn + 1
-    nLocalInterpolRound = nn
+            startProcRecv = i
 
-    iStartLocal = 0
-    iEndLocal   = nLocalInterpolRound
-    !
-    !        Iterative algorithm to determine the elements containing the
-    !        coordinates or the elements which minimize the distance. The
-    !        algorithm consists of a synchronous outer loop over the
-    !        number of times the inner loop should be executed. This
-    !        inner loop is asynchronous and performs the actual ADT
-    !        search. The outer loop is present to avoid that too much
-    !        data is communicated to a single processor at once such that
-    !        a memory bottleneck occurs.
-    !
-    outerLoop: do mm=1,nRounds
+            ! Determine the number of messages I have to send, the
+            ! corresponding processors and the total number of points to
+            ! be sent.
 
-       ! Determine the processors I want data from in this round
-       ! as well as the number of coordinates from these nodes.
+            nProcSendCur = 0
+            nn = 0
 
-       nProcRecvCur = 0
-       nCoorRecv    = 0
+            do i = 0, (nProcs - 1)
+                if (nCoorPerProc(i) > 0) then
+                    nProcSendCur = nProcSendCur + 1
+                    procSendCur(nProcSendCur) = i
+                    nn = nn + nCoorPerProc(i)
+                end if
+            end do
 
-       do i=startProcRecv,nProcRecv
+            ! Allocate the memory for the send buffer of the coordinates
+            ! and possibly the minimum distance squared.
 
-          ! Exit the loop if the maximum number of nodes has been
-          ! reached.
+            allocate (coorBuf(nVarCoor, nn), stat=ierr)
+            if (ierr /= 0) &
+                call adtTerminate(ADT, "search", &
+                                  "Memory allocation failure for coorBuf.")
 
-          if(nCoorRecv == nCoorMax) exit
+            ! Send the coordinates to the appropriate processors.
+            ! Initialize the counter k in the coordinate buffer to 0.
 
-          ! Update the number of processors from which I will receive
-          ! data during this round.
+            k = 0
+            sendCoorLoop: do i = 1, nProcSendCur
 
-          nProcRecvCur = nProcRecvCur + 1
+                ! Store the processor ID and the starting entry in
+                ! coorPerRootLeaf a bit easier and initialize k1 to k.
 
-          ! Determine the amount I can receive from this processor.
-          ! If the upper limit is exceeded cut it off.
+                procCur = procSendCur(i)
+                nn = mCoorPerRootLeaf(procCur)
+                k1 = k
 
-          j = nCoorProcRecv(i)
-          if((nCoorRecv+j) > nCoorMax) j = nCoorMax - nCoorRecv
-          nCoorRecv = nCoorRecv + j
+                ! Loop to fill to buffer to this processor.
+                ! A distinction is made between a containment search and a
+                ! minimum distance search. In the former case a coordinate is
+                ! only sent if it has not been interpolated yet; in the
+                ! latter case it is sent if the distance is larger than zero.
+                ! For a minimum distance search also the current distance
+                ! squared is stored.
 
-          ! Store the amount j in the appropriate place of
-          ! nCoorFromProc and determine the number of nodes to be
-          ! sent to this processor in the next round (possibly 0).
+                test1Containment: if (containmentSearch) then
 
-          nCoorFromProc(procRecv(i)) = j
-          nCoorProcRecv(i) = nCoorProcRecv(i) - j
+                    ! Containment search. Store the coordinates of the points
+                    ! to be searched in coorBuf. Note that the counter j is
+                    ! updated inside the loop. This is done to be able to send
+                    ! the maximum number of coordinates possible.
 
-          ! Exit the loop if still some data should be received from
-          ! this processor in a next round. The difference between this
-          ! exit and the one in the beginning of the do loop is that
-          ! here the counter i is not update yet and therefore
-          ! startProcRecv will be set correctly for the next round.
+                    j = 0
+                    do
+                        ! Exit the loop if the maximum number or if the last
+                        ! coordinate for this processor has been reached.
 
-          if(nCoorProcRecv(i) > 0) exit
-       enddo
+                        if (j == nCoorPerProc(procCur) .or. &
+                            nn == nCoorPerRootLeaf(procCur + 1)) exit
 
-       ! Do an all to all communication such that every processor
-       ! knows the amount of data it should send to other processors.
+                        ! Update the counter nn and check if this coordinate
+                        ! still needs to be interpolated.
 
-       call mpi_alltoall(nCoorFromProc, 1, adflow_integer, &
-            nCoorPerProc,  1, adflow_integer, comm, ierr)
+                        nn = nn + 1
+                        l = coorPerRootLeaf(nn)
 
-       ! Set the non-zero entries of nCoorFromProc to zero again
-       ! for the next round.
+                        if (procID(l) == -1) then
 
-       nn = min(i,nProcRecv)
-       do j=startProcRecv, nn
-          nCoorFromProc(procRecv(j)) = 0
-       enddo
+                            ! Coordinate needs to be interpolated. Update the
+                            ! counters j and k and copy the coordinate in the
+                            ! send buffer.
 
-       ! Set the starting index for the next round.
+                            j = j + 1
+                            k = k + 1
+                            coorBuf(1, k) = coor(1, l)
+                            coorBuf(2, k) = coor(2, l)
+                            coorBuf(3, k) = coor(3, l)
 
-       startProcRecv = i
+                            ! Set the entry in coorRequested to .true. to indicate
+                            ! that a request was sent.
 
-       ! Determine the number of messages I have to send, the
-       ! corresponding processors and the total number of points to
-       ! be sent.
+                            coorRequested(nn) = .true.
 
-       nProcSendCur = 0
-       nn = 0
+                        end if
+                    end do
 
-       do i=0,(nProcs-1)
-          if(nCoorPerProc(i) > 0) then
-             nProcSendCur = nProcSendCur + 1
-             procSendCur(nProcSendCur) = i
-             nn = nn + nCoorPerProc(i)
-          endif
-       enddo
+                    else test1Containment
 
-       ! Allocate the memory for the send buffer of the coordinates
-       ! and possibly the minimum distance squared.
+                    ! Minimum distance search. Even if an earlier minimum
+                    ! distance was found, it is possible that an even smaller
+                    ! distance can be found. Therefore only points with a
+                    ! minimum distance squared of zero will not be sent. Both
+                    ! the coordinates and the current minimum distance squared
+                    ! is sent. Again the counter j is updated inside the loop.
 
-       allocate(coorBuf(nVarCoor,nn), stat=ierr)
-       if(ierr /= 0)                     &
-            call adtTerminate(ADT, "search", &
-            "Memory allocation failure for coorBuf.")
-
-       ! Send the coordinates to the appropriate processors.
-       ! Initialize the counter k in the coordinate buffer to 0.
-
-       k = 0
-       sendCoorLoop: do i=1,nProcSendCur
+                    j = 0
+                    do
+                        ! Exit the loop if the maximum number or if the last
+                        ! coordinate for this processor has been reached.
 
-          ! Store the processor ID and the starting entry in
-          ! coorPerRootLeaf a bit easier and initialize k1 to k.
+                        if (j == nCoorPerProc(procCur) .or. &
+                            nn == nCoorPerRootLeaf(procCur + 1)) exit
 
-          procCur = procSendCur(i)
-          nn      = mCoorPerRootLeaf(procCur)
-          k1      = k
+                        ! Update the counter nn and check if this coordinate still
+                        ! needs to be interpolated.
 
-          ! Loop to fill to buffer to this processor.
-          ! A distinction is made between a containment search and a
-          ! minimum distance search. In the former case a coordinate is
-          ! only sent if it has not been interpolated yet; in the
-          ! latter case it is sent if the distance is larger than zero.
-          ! For a minimum distance search also the current distance
-          ! squared is stored.
+                        nn = nn + 1
+                        l = coorPerRootLeaf(nn)
 
-          test1Containment: if( containmentSearch ) then
+                        if (dist2(l) > zero) then
 
-             ! Containment search. Store the coordinates of the points
-             ! to be searched in coorBuf. Note that the counter j is
-             ! updated inside the loop. This is done to be able to send
-             ! the maximum number of coordinates possible.
+                            ! Coordinate needs to be interpolated. Update the
+                            ! counters j and k and store the coordinates and the
+                            ! distance squared in the send buffer.
 
-             j = 0
-             do
-                ! Exit the loop if the maximum number or if the last
-                ! coordinate for this processor has been reached.
+                            j = j + 1
+                            k = k + 1
+                            coorBuf(1, k) = coor(1, l)
+                            coorBuf(2, k) = coor(2, l)
+                            coorBuf(3, k) = coor(3, l)
+                            coorBuf(4, k) = dist2(l)
 
-                if(j  == nCoorPerProc(procCur) .or. &
-                     nn == nCoorPerRootLeaf(procCur+1)) exit
+                            ! Set the entry in coorRequested to .true. to indicate
+                            ! that a request was sent.
 
-                ! Update the counter nn and check if this coordinate
-                ! still needs to be interpolated.
+                            coorRequested(nn) = .true.
 
-                nn = nn + 1
-                l  = coorPerRootLeaf(nn)
+                        end if
+                    end do
 
-                if(procID(l) == -1) then
+                end if test1Containment
 
-                   ! Coordinate needs to be interpolated. Update the
-                   ! counters j and k and copy the coordinate in the
-                   ! send buffer.
+                ! Determine the size of the message and send it.
+                ! Use nonblocking sends to avoid deadlock.
 
-                   j = j + 1
-                   k = k + 1
-                   coorBuf(1,k) = coor(1,l)
-                   coorBuf(2,k) = coor(2,l)
-                   coorBuf(3,k) = coor(3,l)
+                sizeMessage = nVarCoor * (k - k1)
+                call mpi_isend(coorBuf(1, k1 + 1), sizeMessage, adflow_real, &
+                               procCur, procCur, comm, &
+                               sendRequest(i), ierr)
 
-                   ! Set the entry in coorRequested to .true. to indicate
-                   ! that a request was sent.
+            end do sendCoorLoop
+            !
+            !          Perform the local interpolations. This is done here to
+            !          have an overlap between communication and computation.
+            !
+            ! Determine the local number to be interpolated in this round.
 
-                   coorRequested(nn) = .true.
+            nn = iEndLocal - iStartLocal
 
-                endif
-             enddo
+            ! Allocate the memory for the coorRecv, the integers for
+            ! storing the processor ID, element type and element ID, and
+            ! uvwRecv. This memory is also used for the local
+            ! interpolation, which explains the max test for intRecv and
+            ! uvwRecv. The coordinates are different, because they will be
+            ! released after each received message.
 
-          else test1Containment
+            i = max(nn, nCoorRecv)
+            allocate (coorRecv(nVarCoor, nn), intRecv(3, i), &
+                      uvwRecv(nVarUVW, i), stat=ierr)
+            if (ierr /= 0) &
+                 call adtTerminate(ADT, "search", &
+                 "Memory allocation failure for &
+                 &recv arrays")
 
-             ! Minimum distance search. Even if an earlier minimum
-             ! distance was found, it is possible that an even smaller
-             ! distance can be found. Therefore only points with a
-             ! minimum distance squared of zero will not be sent. Both
-             ! the coordinates and the current minimum distance squared
-             ! is sent. Again the counter j is updated inside the loop.
+            ! Initialize the counters i and j, which are used to fill the
+            ! buffer coorRecv.
 
-             j = 0
-             do
-                ! Exit the loop if the maximum number or if the last
-                ! coordinate for this processor has been reached.
+            j = 0
+            i = mCoorPerRootLeaf(myID)
 
-                if(j  == nCoorPerProc(procCur) .or. &
-                     nn == nCoorPerRootLeaf(procCur+1)) exit
+            ! Make a distinction between a containment and a minimum
+            ! distance search.
 
-                ! Update the counter nn and check if this coordinate still
-                ! needs to be interpolated.
+            test2Containment: if (containmentSearch) then
 
-                nn = nn + 1
-                l  = coorPerRootLeaf(nn)
+                ! Containment search. Copy the local coordinates to be
+                ! searched in coorRecv. As it is possible that in an earlier
+                ! round the coordinate has already been found, only take
+                ! coordinates whose element has not been found yet.
 
-                if(dist2(l) > zero) then
+                do
+                    ! Exit the loop if the maximum number or if the last
+                    ! coordinate of the local interpolation has been reached.
 
-                   ! Coordinate needs to be interpolated. Update the
-                   ! counters j and k and store the coordinates and the
-                   ! distance squared in the send buffer.
+                    if (j == nn .or. i == nCoorPerRootLeaf(myID + 1)) exit
 
-                   j = j + 1
-                   k = k + 1
-                   coorBuf(1,k) = coor(1,l)
-                   coorBuf(2,k) = coor(2,l)
-                   coorBuf(3,k) = coor(3,l)
-                   coorBuf(4,k) = dist2(l)
+                    ! Update the counter i and check if the corresponding
+                    ! coordinate still needs to be interpolated. If so, store it
+                    ! in coorRecv and set the corresponding entry in
+                    ! coorRequested to .true.
 
-                   ! Set the entry in coorRequested to .true. to indicate
-                   ! that a request was sent.
+                    i = i + 1
+                    l = coorPerRootLeaf(i)
 
-                   coorRequested(nn) = .true.
+                    if (procID(l) == -1) then
+                        j = j + 1
 
-                endif
-             enddo
+                        coorRecv(1, j) = coor(1, l)
+                        coorRecv(2, j) = coor(2, l)
+                        coorRecv(3, j) = coor(3, l)
 
-          endif test1Containment
+                        coorRequested(i) = .true.
+                    end if
+                end do
 
-          ! Determine the size of the message and send it.
-          ! Use nonblocking sends to avoid deadlock.
+                ! Perform the local interpolations. Note that j contains the
+                ! actual number of coordinates to be searched.
 
-          sizeMessage = nVarCoor*(k-k1)
-          call mpi_isend(coorBuf(1,k1+1), sizeMessage, adflow_real, &
-               procCur,         procCur,     comm,     &
-               sendRequest(i),  ierr)
+                nn = j
+                call containmentTreeSearch(ADT, coorRecv, intRecv, &
+                                           uvwRecv, arrDonor, nn, &
+                                           nInterpol)
 
-       enddo sendCoorLoop
-       !
-       !          Perform the local interpolations. This is done here to
-       !          have an overlap between communication and computation.
-       !
-       ! Determine the local number to be interpolated in this round.
+                ! Store the interpolation data at the correct location in
+                ! the corresponding arrays.
 
-       nn = iEndLocal - iStartLocal
+                i = mCoorPerRootLeaf(myID)
+                do j = 1, nn
 
-       ! Allocate the memory for the coorRecv, the integers for
-       ! storing the processor ID, element type and element ID, and
-       ! uvwRecv. This memory is also used for the local
-       ! interpolation, which explains the max test for intRecv and
-       ! uvwRecv. The coordinates are different, because they will be
-       ! released after each received message.
+                    ! Determine the coordinate entry, which corresponds to the
+                    ! counter j. Remember that in the calls to search routines
+                    ! only nodes are given which were not interpolated.
 
-       i = max(nn,nCoorRecv)
-       allocate(coorRecv(nVarCoor,nn), intRecv(3,i), &
-            uvwRecv(nVarUVW,i),    stat=ierr)
-       if(ierr /= 0)                     &
-            call adtTerminate(ADT, "search", &
-            "Memory allocation failure for &
-            &recv arrays")
+                    do
+                        i = i + 1
+                        if (coorRequested(i)) exit
+                    end do
+                    l = coorPerRootLeaf(i)
 
-       ! Initialize the counters i and j, which are used to fill the
-       ! buffer coorRecv.
+                    ! Copy the data if an actual element was found.
 
-       j = 0
-       i = mCoorPerRootLeaf(myID)
+                    if (intRecv(1, j) >= 0) then
+                        procID(l) = intRecv(1, j)
+                        elementType(l) = intRecv(2, j)
+                        elementID(l) = intRecv(3, j)
+                        uvw(1, l) = uvwRecv(1, j)
+                        uvw(2, l) = uvwRecv(2, j)
+                        uvw(3, l) = uvwRecv(3, j)
 
-       ! Make a distinction between a containment and a minimum
-       ! distance search.
+                        do m = 1, nInterpol
+                            arrInterpol(m, l) = uvwRecv(m + nVarCoor, j)
+                        end do
+                    end if
 
-       test2Containment: if( containmentSearch ) then
+                end do
 
-          ! Containment search. Copy the local coordinates to be
-          ! searched in coorRecv. As it is possible that in an earlier
-          ! round the coordinate has already been found, only take
-          ! coordinates whose element has not been found yet.
+                else test2Containment
 
-          do
-             ! Exit the loop if the maximum number or if the last
-             ! coordinate of the local interpolation has been reached.
+                ! Minimum distance search. Store the local coordinates and
+                ! distance in coorRecv for the coordinates with non-zero
+                ! distance.
 
-             if(j == nn .or. i == nCoorPerRootLeaf(myID+1)) exit
+                do
+                    ! Exit the loop if the maximum number or if the last
+                    ! coordinate of the local interpolation has been reached.
 
-             ! Update the counter i and check if the corresponding
-             ! coordinate still needs to be interpolated. If so, store it
-             ! in coorRecv and set the corresponding entry in
-             ! coorRequested to .true.
+                    if (j == nn .or. i == nCoorPerRootLeaf(myID + 1)) exit
 
-             i = i + 1
-             l = coorPerRootLeaf(i)
+                    ! Update the counter i and check if the corresponding
+                    ! coordinate still needs to be interpolated. If so, store
+                    ! its coordinates and distance squared in coorRecv and set
+                    ! the corresponding entry in coorRequested to .true.
 
-             if(procID(l) == -1) then
-                j = j + 1
+                    i = i + 1
+                    l = coorPerRootLeaf(i)
 
-                coorRecv(1,j) = coor(1,l)
-                coorRecv(2,j) = coor(2,l)
-                coorRecv(3,j) = coor(3,l)
+                    if (dist2(l) > zero) then
+                        j = j + 1
 
-                coorRequested(i) = .true.
-             endif
-          enddo
+                        coorRecv(1, j) = coor(1, l)
+                        coorRecv(2, j) = coor(2, l)
+                        coorRecv(3, j) = coor(3, l)
+                        coorRecv(4, j) = dist2(l)
 
-          ! Perform the local interpolations. Note that j contains the
-          ! actual number of coordinates to be searched.
+                        coorRequested(i) = .true.
+                    end if
+                end do
 
-          nn = j
-          call containmentTreeSearch(ADT,      coorRecv, intRecv, &
-               uvwRecv,   arrDonor, nn,      &
-               nInterpol)
+                ! Perform the local interpolations. Note that j contains the
+                ! actual number of coordinates to be searched.
 
-          ! Store the interpolation data at the correct location in
-          ! the corresponding arrays.
+                nn = j
+                call minDistanceTreeSearch(ADT, coorRecv, intRecv, &
+                                           uvwRecv, arrDonor, nn, &
+                                           nInterpol)
 
-          i = mCoorPerRootLeaf(myID)
-          do j=1,nn
+                ! Store the interpolation data at the correct location in
+                ! the corresponding arrays.
 
-             ! Determine the coordinate entry, which corresponds to the
-             ! counter j. Remember that in the calls to search routines
-             ! only nodes are given which were not interpolated.
+                i = mCoorPerRootLeaf(myID)
 
-             do
-                i = i + 1
-                if( coorRequested(i) ) exit
-             enddo
-             l = coorPerRootLeaf(i)
+                do j = 1, nn
 
-             ! Copy the data if an actual element was found.
+                    ! Determine the next coordinate entry, whose request was
+                    ! sent to be interpolated. Remember that it is possible
+                    ! that some nodes were not sent to be interpolated (if
+                    ! their distance squared is zero already).
+
+                    do
+                        i = i + 1
+                        if (coorRequested(i)) exit
+                    end do
+                    l = coorPerRootLeaf(i)
+
+                    ! Copy the data if an actual element was found and if
+                    ! the corresponding distance squared is less than the
+                    ! currently stored value.
+
+                    if (intRecv(1, j) >= 0 .and. uvwRecv(4, j) < dist2(l)) then
+
+                        procID(l) = intRecv(1, j)
+                        elementType(l) = intRecv(2, j)
+                        elementID(l) = intRecv(3, j)
+                        uvw(1, l) = uvwRecv(1, j)
+                        uvw(2, l) = uvwRecv(2, j)
+                        uvw(3, l) = uvwRecv(3, j)
+                        dist2(l) = uvwRecv(4, j)
+
+                        do m = 1, nInterpol
+                            arrInterpol(m, l) = uvwRecv(m + nVarCoor, j)
+                        end do
+                    end if
+
+                end do
+
+            end if test2Containment
 
-             if(intRecv(1,j) >= 0) then
-                procID(l)      = intRecv(1,j)
-                elementType(l) = intRecv(2,j)
-                elementID(l)   = intRecv(3,j)
-                uvw(1,l)       = uvwRecv(1,j)
-                uvw(2,l)       = uvwRecv(2,j)
-                uvw(3,l)       = uvwRecv(3,j)
+            ! The buffer coorRecv is not needed anymore.
+            ! Release the memory.
 
-                do m=1,nInterpol
-                   arrInterpol(m,l) = uvwRecv(m+nVarCoor,j)
-                enddo
-             endif
+            deallocate (coorRecv, stat=ierr)
+            if (ierr /= 0) &
+                call adtTerminate(ADT, "search", &
+                                  "Deallocation failure for coorRecv.")
 
-          enddo
+            ! Set iStartLocal and iEndLocal for the next round.
+            ! Also update mCoorPerRootLeaf(myID).
+
+            iStartLocal = iEndLocal
+            iEndLocal = iEndLocal + nLocalInterpolRound
+            iEndLocal = max(iEndLocal, nLocalInterpol)
+
+            mCoorPerRootLeaf(myID) = i
+            !
+            !          Perform the interpolations from the other processors.
+            !          Their coordinates are received in an arbitrary sequence
+            !          using blocking receives and the interpolated data is sent
+            !          back using nonblocking sends.
+            !
+            ! Loop over the number of messages I will receive. The counter
+            ! ii contains the current starting position for the buffers
+            ! to be sent back to the requesting processors.
 
-       else test2Containment
+            ii = 1
+            recvSendLoop: do i = 1, nProcRecvCur
 
-          ! Minimum distance search. Store the local coordinates and
-          ! distance in coorRecv for the coordinates with non-zero
-          ! distance.
+                ! Block until a message arrives and find the source and size
+                ! of the message.
 
-          do
-             ! Exit the loop if the maximum number or if the last
-             ! coordinate of the local interpolation has been reached.
+                call mpi_probe(mpi_any_source, myID, comm, mpiStatus, ierr)
 
-             if(j == nn .or. i == nCoorPerRootLeaf(myID+1)) exit
+                procCur = mpiStatus(mpi_source)
+                call mpi_get_count(mpiStatus, adflow_real, sizeMessage, ierr)
 
-             ! Update the counter i and check if the corresponding
-             ! coordinate still needs to be interpolated. If so, store
-             ! its coordinates and distance squared in coorRecv and set
-             ! the corresponding entry in coorRequested to .true.
+                ! Check in debug mode that the message is of correct size.
 
-             i = i + 1
-             l = coorPerRootLeaf(i)
+                if (debug) then
+                    if (sizeMessage == mpi_undefined .or. &
+                        mod(sizeMessage, nVarCoor) /= 0) &
+                        call adtTerminate(ADT, "search", &
+                                          "Unexpected size of message")
+                end if
 
-             if(dist2(l) > zero) then
-                j = j + 1
+                ! Allocate the memory for the coordinates to be received and
+                ! receive them using a blocking receive; the message has
+                ! already arrived.
 
-                coorRecv(1,j) = coor(1,l)
-                coorRecv(2,j) = coor(2,l)
-                coorRecv(3,j) = coor(3,l)
-                coorRecv(4,j) = dist2(l)
+                nn = sizeMessage / nVarCoor
+                allocate (coorRecv(nVarCoor, nn), stat=ierr)
+                if (ierr /= 0) &
+                     call adtTerminate(ADT, "search", &
+                     "Memory allocation failure for &
+                     &coorRecv.")
+
+                call mpi_recv(coorRecv, sizeMessage, adflow_real, procCur, &
+                              myID, comm, mpiStatus, ierr)
+
+                ! Search the corresponding elements in the local tree and
+                ! release coorRecv afterwards.
 
-                coorRequested(i) = .true.
-             endif
-          enddo
+                if (containmentSearch) then
+                    call containmentTreeSearch(ADT, coorRecv, &
+                                               intRecv(:, ii:), uvwRecv(:, ii:), &
+                                               arrDonor, nn, &
+                                               nInterpol)
+                else
+                    call minDistanceTreeSearch(ADT, coorRecv, &
+                                               intRecv(:, ii:), uvwRecv(:, ii:), &
+                                               arrDonor, nn, &
+                                               nInterpol)
+                end if
 
-          ! Perform the local interpolations. Note that j contains the
-          ! actual number of coordinates to be searched.
+                deallocate (coorRecv, stat=ierr)
+                if (ierr /= 0) &
+                    call adtTerminate(ADT, "search", &
+                                      "Deallocation failure for coorRecv.")
 
-          nn = j
-          call minDistanceTreeSearch(ADT,       coorRecv, intRecv, &
-               uvwRecv,   arrDonor, nn,      &
-               nInterpol)
+                ! Send the integer and the floating point information back to
+                ! the requesting processor.
 
-          ! Store the interpolation data at the correct location in
-          ! the corresponding arrays.
+                sizeMessage = 3 * nn
+                call mpi_isend(intRecv(1, ii), sizeMessage, adflow_integer, &
+                               procCur, procCur + 1, comm, &
+                               sendRecvRequest(1, i), ierr)
 
-          i = mCoorPerRootLeaf(myID)
+                sizeMessage = nVarUVW * nn
+                call mpi_isend(uvwRecv(1, ii), sizeMessage, adflow_real, &
+                               procCur, procCur + 2, comm, &
+                               sendRecvRequest(2, i), ierr)
 
-          do j=1,nn
+                ! Update the counter ii for the next message.
 
-             ! Determine the next coordinate entry, whose request was
-             ! sent to be interpolated. Remember that it is possible
-             ! that some nodes were not sent to be interpolated (if
-             ! their distance squared is zero already).
+                ii = ii + nn
 
-             do
-                i = i + 1
-                if( coorRequested(i) ) exit
-             enddo
-             l = coorPerRootLeaf(i)
+            end do recvSendLoop
 
-             ! Copy the data if an actual element was found and if
-             ! the corresponding distance squared is less than the
-             ! currently stored value.
+            ! Complete the nonblocking coordinate sends and release the
+            ! memory of coorBuf.
 
-             if(intRecv(1,j) >= 0 .and. uvwRecv(4,j) < dist2(l)) then
+            do i = 1, nProcSendCur
+                call mpi_waitany(nProcSendCur, sendRequest, sizeMessage, &
+                                 mpiStatus, ierr)
+            end do
 
-                procID(l)      = intRecv(1,j)
-                elementType(l) = intRecv(2,j)
-                elementID(l)   = intRecv(3,j)
-                uvw(1,l)       = uvwRecv(1,j)
-                uvw(2,l)       = uvwRecv(2,j)
-                uvw(3,l)       = uvwRecv(3,j)
-                dist2(l)       = uvwRecv(4,j)
+            deallocate (coorBuf, stat=ierr)
+            if (ierr /= 0) &
+                call adtTerminate(ADT, "search", &
+                                  "Deallocation failure for coorBuf.")
 
-                do m=1,nInterpol
-                   arrInterpol(m,l) = uvwRecv(m+nVarCoor,j)
-                enddo
-             endif
+            ! Loop over the number of processors to which I sent requests.
+            ! Now it is time to receive the information they interpolated.
+            ! The sequence of receiving messages is arbitrary.
 
-          enddo
+            recvLoop: do ii = 1, nProcSendCur
 
-       endif test2Containment
+                ! Block until an integer message arrives. These messages have
+                ! tags of myID+1. Also determine the sending processor and
+                ! the size of the message.
 
-       ! The buffer coorRecv is not needed anymore.
-       ! Release the memory.
+                call mpi_probe(mpi_any_source, myID + 1, comm, mpiStatus, ierr)
 
-       deallocate(coorRecv, stat=ierr)
-       if(ierr /= 0)                     &
-            call adtTerminate(ADT, "search", &
-            "Deallocation failure for coorRecv.")
-
-       ! Set iStartLocal and iEndLocal for the next round.
-       ! Also update mCoorPerRootLeaf(myID).
-
-       iStartLocal = iEndLocal
-       iEndLocal   = iEndLocal + nLocalInterpolRound
-       iEndLocal   = max(iEndLocal, nLocalInterpol)
-
-       mCoorPerRootLeaf(myID) = i
-       !
-       !          Perform the interpolations from the other processors.
-       !          Their coordinates are received in an arbitrary sequence
-       !          using blocking receives and the interpolated data is sent
-       !          back using nonblocking sends.
-       !
-       ! Loop over the number of messages I will receive. The counter
-       ! ii contains the current starting position for the buffers
-       ! to be sent back to the requesting processors.
-
-       ii = 1
-       recvSendLoop: do i=1,nProcRecvCur
-
-          ! Block until a message arrives and find the source and size
-          ! of the message.
-
-          call mpi_probe(mpi_any_source, myID, comm, mpiStatus, ierr)
-
-          procCur = mpiStatus(mpi_source)
-          call mpi_get_count(mpiStatus, adflow_real, sizeMessage, ierr)
-
-          ! Check in debug mode that the message is of correct size.
-
-          if( debug ) then
-             if(sizeMessage == mpi_undefined .or. &
-                  mod(sizeMessage,nVarCoor) /= 0)   &
-                  call adtTerminate(ADT, "search",    &
-                  "Unexpected size of message")
-          endif
-
-          ! Allocate the memory for the coordinates to be received and
-          ! receive them using a blocking receive; the message has
-          ! already arrived.
-
-          nn = sizeMessage/nVarCoor
-          allocate(coorRecv(nVarCoor,nn), stat=ierr)
-          if(ierr /= 0)                     &
-               call adtTerminate(ADT, "search", &
-               "Memory allocation failure for &
-               &coorRecv.")
-
-          call mpi_recv(coorRecv, sizeMessage, adflow_real, procCur, &
-               myID,     comm,        mpiStatus,   ierr)
-
-          ! Search the corresponding elements in the local tree and
-          ! release coorRecv afterwards.
-
-          if( containmentSearch ) then
-             call containmentTreeSearch(ADT,            coorRecv,       &
-                  intRecv(:,ii:), uvwRecv(:,ii:), &
-                  arrDonor,       nn,             &
-                  nInterpol)
-          else
-             call minDistanceTreeSearch(ADT,            coorRecv,       &
-                  intRecv(:,ii:), uvwRecv(:,ii:), &
-                  arrDonor,       nn,             &
-                  nInterpol)
-          endif
-
-          deallocate(coorRecv, stat=ierr)
-          if(ierr /= 0)                     &
-               call adtTerminate(ADT, "search", &
-               "Deallocation failure for coorRecv.")
-
-          ! Send the integer and the floating point information back to
-          ! the requesting processor.
-
-          sizeMessage = 3*nn
-          call mpi_isend(intRecv(1,ii),        sizeMessage, adflow_integer, &
-               procCur,              procCur+1,   comm,        &
-               sendRecvRequest(1,i), ierr)
-
-          sizeMessage = nVarUVW*nn
-          call mpi_isend(uvwRecv(1,ii),        sizeMessage, adflow_real, &
-               procCur,              procCur+2,   comm,     &
-               sendRecvRequest(2,i), ierr)
-
-          ! Update the counter ii for the next message.
-
-          ii = ii + nn
-
-       enddo recvSendLoop
-
-       ! Complete the nonblocking coordinate sends and release the
-       ! memory of coorBuf.
-
-       do i=1,nProcSendCur
-          call mpi_waitany(nProcSendCur, sendRequest, sizeMessage, &
-               mpiStatus,       ierr)
-       enddo
-
-       deallocate(coorBuf, stat=ierr)
-       if(ierr /= 0)                     &
-            call adtTerminate(ADT, "search", &
-            "Deallocation failure for coorBuf.")
+                procCur = mpiStatus(mpi_source)
+                call mpi_get_count(mpiStatus, adflow_integer, sizeMessage, ierr)
 
-       ! Loop over the number of processors to which I sent requests.
-       ! Now it is time to receive the information they interpolated.
-       ! The sequence of receiving messages is arbitrary.
+                ! Check in debug mode that the message is of correct size.
 
-       recvLoop: do ii=1,nProcSendCur
+                if (debug) then
+                    if (sizeMessage == mpi_undefined .or. &
+                        mod(sizeMessage, 3) /= 0) &
+                        call adtTerminate(ADT, "search", &
+                                          "Unexpected size of message")
+                end if
 
-          ! Block until an integer message arrives. These messages have
-          ! tags of myID+1. Also determine the sending processor and
-          ! the size of the message.
+                ! Allocate the memory for the integer and uvw buffers, such
+                ! that the messages can be received.
 
-          call mpi_probe(mpi_any_source, myID+1, comm, mpiStatus, ierr)
+                nn = sizeMessage / 3
+                allocate (intBuf(3, nn), uvwBuf(nVarUVW, nn), stat=ierr)
+                if (ierr /= 0) &
+                     call adtTerminate(ADT, "search", &
+                     "Memory allocation failure for intBuf &
+                     &and uvwBuf.")
 
-          procCur = mpiStatus(mpi_source)
-          call mpi_get_count(mpiStatus, adflow_integer, sizeMessage, ierr)
+                call mpi_recv(intBuf, sizeMessage, adflow_integer, procCur, &
+                              myID + 1, comm, mpiStatus, ierr)
 
-          ! Check in debug mode that the message is of correct size.
+                sizeMessage = nVarUVW * nn
+                call mpi_recv(uvwBuf, sizeMessage, adflow_real, procCur, &
+                              myID + 2, comm, mpiStatus, ierr)
 
-          if( debug ) then
-             if(sizeMessage == mpi_undefined .or. &
-                  mod(sizeMessage,3) /= 0)          &
-                  call adtTerminate(ADT, "search",    &
-                  "Unexpected size of message")
-          endif
+                ! Store the interpolation data at the correct location in the
+                ! corresponding arrays. A distinction must be made between
+                ! containment search and minimum distance search, because for
+                ! the latter it is possible that a better candidate is
+                ! already stored.
 
-          ! Allocate the memory for the integer and uvw buffers, such
-          ! that the messages can be received.
+                i = mCoorPerRootLeaf(procCur)
 
-          nn = sizeMessage/3
-          allocate(intBuf(3,nn), uvwBuf(nVarUVW,nn), stat=ierr)
-          if(ierr /= 0)                     &
-               call adtTerminate(ADT, "search", &
-               "Memory allocation failure for intBuf &
-               &and uvwBuf.")
+                test3Containment: if (containmentSearch) then
 
-          call mpi_recv(intBuf, sizeMessage, adflow_integer, procCur, &
-               myID+1, comm,        mpiStatus,      ierr)
+                    ! Containment search. Loop over the number of points
+                    ! requested on the other processor.
 
-          sizeMessage = nVarUVW*nn
-          call mpi_recv(uvwBuf, sizeMessage, adflow_real, procCur, &
-               myID+2, comm,        mpiStatus,   ierr)
+                    do j = 1, nn
 
-          ! Store the interpolation data at the correct location in the
-          ! corresponding arrays. A distinction must be made between
-          ! containment search and minimum distance search, because for
-          ! the latter it is possible that a better candidate is
-          ! already stored.
+                        ! Determine the next coordinate entry, whose request was
+                        ! sent to be interpolated. Remember that only nodes are
+                        ! sent which were not interpolated.
 
-          i = mCoorPerRootLeaf(procCur)
+                        do
+                            i = i + 1
+                            if (coorRequested(i)) exit
+                        end do
 
-          test3Containment: if( containmentSearch ) then
+                        ! Copy the data if an actual element was found.
 
-             ! Containment search. Loop over the number of points
-             ! requested on the other processor.
+                        if (intBuf(1, j) >= 0) then
+                            l = coorPerRootLeaf(i)
 
-             do j=1,nn
+                            procID(l) = intBuf(1, j)
+                            elementType(l) = intBuf(2, j)
+                            elementID(l) = intBuf(3, j)
+                            uvw(1, l) = uvwBuf(1, j)
+                            uvw(2, l) = uvwBuf(2, j)
+                            uvw(3, l) = uvwBuf(3, j)
 
-                ! Determine the next coordinate entry, whose request was
-                ! sent to be interpolated. Remember that only nodes are
-                ! sent which were not interpolated.
+                            do m = 1, nInterpol
+                                arrInterpol(m, l) = uvwBuf(m + nVarCoor, j)
+                            end do
+                        end if
 
-                do
-                   i = i + 1
-                   if( coorRequested(i) ) exit
-                enddo
+                    end do
 
-                ! Copy the data if an actual element was found.
+                    else test3Containment
 
-                if(intBuf(1,j) >= 0) then
-                   l = coorPerRootLeaf(i)
+                    ! Minimum distance search. Loop over the number of points
+                    ! requested on the other processor.
 
-                   procID(l)      = intBuf(1,j)
-                   elementType(l) = intBuf(2,j)
-                   elementID(l)   = intBuf(3,j)
-                   uvw(1,l)       = uvwBuf(1,j)
-                   uvw(2,l)       = uvwBuf(2,j)
-                   uvw(3,l)       = uvwBuf(3,j)
+                    do j = 1, nn
 
-                   do m=1,nInterpol
-                      arrInterpol(m,l) = uvwBuf(m+nVarCoor,j)
-                   enddo
-                endif
+                        ! Determine the next coordinate entry, whose request was
+                        ! sent to be interpolated. Remember that it is possible
+                        ! that some nodes were not sent to be interpolated (if
+                        ! their distance squared is zero already).
 
-             enddo
+                        do
+                            i = i + 1
+                            if (coorRequested(i)) exit
+                        end do
 
-          else test3Containment
+                        ! Copy the data if an actual element was found and if
+                        ! the corresponding distance squared is less than the
+                        ! currently stored value.
 
-             ! Minimum distance search. Loop over the number of points
-             ! requested on the other processor.
+                        l = coorPerRootLeaf(i)
+                        if (intBuf(1, j) >= 0 .and. uvwBuf(4, j) < dist2(l)) then
 
-             do j=1,nn
+                            procID(l) = intBuf(1, j)
+                            elementType(l) = intBuf(2, j)
+                            elementID(l) = intBuf(3, j)
+                            uvw(1, l) = uvwBuf(1, j)
+                            uvw(2, l) = uvwBuf(2, j)
+                            uvw(3, l) = uvwBuf(3, j)
+                            dist2(l) = uvwBuf(4, j)
 
-                ! Determine the next coordinate entry, whose request was
-                ! sent to be interpolated. Remember that it is possible
-                ! that some nodes were not sent to be interpolated (if
-                ! their distance squared is zero already).
+                            do m = 1, nInterpol
+                                arrInterpol(m, l) = uvwBuf(m + nVarCoor, j)
+                            end do
+                        end if
 
-                do
-                   i = i + 1
-                   if( coorRequested(i) ) exit
-                enddo
+                    end do
 
-                ! Copy the data if an actual element was found and if
-                ! the corresponding distance squared is less than the
-                ! currently stored value.
+                end if test3Containment
 
-                l = coorPerRootLeaf(i)
-                if(intBuf(1,j) >= 0 .and. uvwBuf(4,j) < dist2(l)) then
+                ! Update mCoorPerRootLeaf(procCur) for the next round and
+                ! release the memory of intBuf and uvwBuf.
 
-                   procID(l)      = intBuf(1,j)
-                   elementType(l) = intBuf(2,j)
-                   elementID(l)   = intBuf(3,j)
-                   uvw(1,l)       = uvwBuf(1,j)
-                   uvw(2,l)       = uvwBuf(2,j)
-                   uvw(3,l)       = uvwBuf(3,j)
-                   dist2(l)       = uvwBuf(4,j)
+                mCoorPerRootLeaf(procCur) = i
 
-                   do m=1,nInterpol
-                      arrInterpol(m,l) = uvwBuf(m+nVarCoor,j)
-                   enddo
-                endif
+                deallocate (intBuf, uvwBuf, stat=ierr)
+                if (ierr /= 0) &
+                     call adtTerminate(ADT, "search", &
+                     "Deallocation failure for intBuf &
+                     &and uvwBuf.")
+            end do recvLoop
 
-             enddo
+            ! Complete the nonblocking sends of the interpolated data.
 
-          endif test3Containment
+            !nProcRecvCur = 2*nProcRecvCur
+            do i = 1, nProcRecvCur
+                call mpi_waitany(nProcRecvCur, sendRecvRequest(1, :), sizeMessage, &
+                                 mpiStatus, ierr)
+                call mpi_waitany(nProcRecvCur, sendRecvRequest(2, :), sizeMessage, &
+                                 mpiStatus, ierr)
+            end do
 
-          ! Update mCoorPerRootLeaf(procCur) for the next round and
-          ! release the memory of intBuf and uvwBuf.
+            ! Release the memory of the buffers used in the nonblocking
+            ! sends of the interpolated data.
 
-          mCoorPerRootLeaf(procCur) = i
+            deallocate (intRecv, uvwRecv, stat=ierr)
+            if (ierr /= 0) &
+                 call adtTerminate(ADT, "search", &
+                 "Deallocation failure for intRecv and &
+                 &uvwRecv")
 
-          deallocate(intBuf, uvwBuf, stat=ierr)
-          if(ierr /= 0)                     &
-               call adtTerminate(ADT, "search", &
-               "Deallocation failure for intBuf &
-               &and uvwBuf.")
-       enddo recvLoop
+            ! Synchronize the processors, because wild cards have been
+            ! used in the communication.
 
-       ! Complete the nonblocking sends of the interpolated data.
+            call mpi_barrier(comm, ierr)
 
-       !nProcRecvCur = 2*nProcRecvCur
-       do i=1,nProcRecvCur
-          call mpi_waitany(nProcRecvCur, sendRecvRequest(1,:), sizeMessage, &
-               mpiStatus,       ierr)
-          call mpi_waitany(nProcRecvCur, sendRecvRequest(2,:), sizeMessage, &
-               mpiStatus,       ierr)
-       enddo
+        end do outerLoop
 
-       ! Release the memory of the buffers used in the nonblocking
-       ! sends of the interpolated data.
+        ! Release the memory of the help arrays allocated in this
+        ! routine.
 
-       deallocate(intRecv, uvwRecv, stat=ierr)
-       if(ierr /= 0)                     &
+        deallocate (procSendCur, sendRequest, nCoorPerProc, &
+                    nCoorFromProc, sendRecvRequest, coorRequested, &
+                    stat=ierr)
+        if (ierr /= 0) &
             call adtTerminate(ADT, "search", &
-            "Deallocation failure for intRecv and &
-            &uvwRecv")
-
-       ! Synchronize the processors, because wild cards have been
-       ! used in the communication.
-
-       call mpi_barrier(comm, ierr)
-
-    enddo outerLoop
-
-    ! Release the memory of the help arrays allocated in this
-    ! routine.
-
-    deallocate(procSendCur,   sendRequest,     nCoorPerProc,  &
-         nCoorFromProc, sendRecvRequest, coorRequested, &
-         stat=ierr)
-    if(ierr /= 0)                     &
-         call adtTerminate(ADT, "search", &
-         "Deallocation failure for help arrays.")
+                              "Deallocation failure for help arrays.")
 
-    ! Release the memory of the help arrays stored in the module
-    ! adtData.
+        ! Release the memory of the help arrays stored in the module
+        ! adtData.
 
-    deallocate(procRecv,         nCoorProcRecv,   nCoorPerRootLeaf, &
-         mCoorPerRootLeaf, coorPerRootLeaf, stat=ierr)
-    if(ierr /= 0)                     &
-         call adtTerminate(ADT, "search", &
-         "Deallocation failure for help arrays &
-         &stored in the module adtData.")
+        deallocate (procRecv, nCoorProcRecv, nCoorPerRootLeaf, &
+                    mCoorPerRootLeaf, coorPerRootLeaf, stat=ierr)
+        if (ierr /= 0) &
+             call adtTerminate(ADT, "search", &
+             "Deallocation failure for help arrays &
+             &stored in the module adtData.")
 
-  end subroutine search
+    end subroutine search
 
 end module adtSearch
diff --git a/src/ADT/adtUtils.F90 b/src/ADT/adtUtils.F90
index a90073b80..ab1a5d46f 100644
--- a/src/ADT/adtUtils.F90
+++ b/src/ADT/adtUtils.F90
@@ -1,930 +1,923 @@
 module adtUtils
-  !
-  !      Module, which contains small subroutines which perform useful
-  !      tasks.
-  !
-  use constants
-  use adtData
-  implicit none
-  !
-  !                Variables stored in this module.
-  !
-  ! nStack:   Number of elements allocated in the stack array;
-  !           needed for a more efficient implementation of the
-  !           local qsort routines.
-  ! stack(:): The corresponding array to store the stack.
-  !           This is a pointer, such that the reallocation
-  !           is easier.
-
-  integer(kind=intType) :: nStack
-  integer(kind=intType), dimension(:), pointer :: stack
-
-  !=================================================================
-
-contains
-
-  !===============================================================
-
-  subroutine adtTerminate(ADT, routineName, errorMessage)
     !
-    !        This routine writes the given error message to standard
-    !        output and terminates the executation of the program.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        routineName: Name of the routine where the error occured.
-    !        ADT:         Currently active ADT.
-    !        Subroutine intent(inout) arguments.
-    !        -----------------------------------
-    !        errorMessage: On input it contains the error message to be
-    !                      written. It is modified in this routine, such
-    !                      that it fits on one line. On output its
-    !                      contents is undefined, which does not matter
-    !                      a whole lot.
+    !      Module, which contains small subroutines which perform useful
+    !      tasks.
     !
+    use constants
+    use adtData
     implicit none
     !
-    !       Subroutine arguments
+    !                Variables stored in this module.
     !
-    type(adtType), intent(in) :: ADT
+    ! nStack:   Number of elements allocated in the stack array;
+    !           needed for a more efficient implementation of the
+    !           local qsort routines.
+    ! stack(:): The corresponding array to store the stack.
+    !           This is a pointer, such that the reallocation
+    !           is easier.
 
-    character(len=*), intent(in) :: routineName
-    character(len=*), intent(in) :: errorMessage
-    !
-    !       Local parameter
-    !
-    integer, parameter :: maxCharLine = 55
-    !
-    !       Local variables
-    !
-    integer :: ierr, len, i2
-    logical :: firstTime
-
-    character(len=len_trim(errorMessage)) :: message
-    character(len=8) :: integerString
+    integer(kind=intType) :: nStack
+    integer(kind=intType), dimension(:), pointer :: stack
 
-    ! Copy the errorMessage into message. It is not possible to work
-    ! with errorMessage directly, because it is modified in this
-    ! routine. Sometimes a constant string is passed to this routine
-    ! and some compilers simply fail then.
+    !=================================================================
 
-    message = errorMessage
-
-    ! Print a nice error message. In case of a parallel executable
-    ! also the processor ID is printed.
+contains
 
-    print "(a)", "#"
-    print "(a)", "#=========================== !!! Error !!! &
-         &============================"
+    !===============================================================
+
+    subroutine adtTerminate(ADT, routineName, errorMessage)
+        !
+        !        This routine writes the given error message to standard
+        !        output and terminates the executation of the program.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        routineName: Name of the routine where the error occured.
+        !        ADT:         Currently active ADT.
+        !        Subroutine intent(inout) arguments.
+        !        -----------------------------------
+        !        errorMessage: On input it contains the error message to be
+        !                      written. It is modified in this routine, such
+        !                      that it fits on one line. On output its
+        !                      contents is undefined, which does not matter
+        !                      a whole lot.
+        !
+        implicit none
+        !
+        !       Subroutine arguments
+        !
+        type(adtType), intent(in) :: ADT
+
+        character(len=*), intent(in) :: routineName
+        character(len=*), intent(in) :: errorMessage
+        !
+        !       Local parameter
+        !
+        integer, parameter :: maxCharLine = 55
+        !
+        !       Local variables
+        !
+        integer :: ierr, len, i2
+        logical :: firstTime
+
+        character(len=len_trim(errorMessage)) :: message
+        character(len=8) :: integerString
+
+        ! Copy the errorMessage into message. It is not possible to work
+        ! with errorMessage directly, because it is modified in this
+        ! routine. Sometimes a constant string is passed to this routine
+        ! and some compilers simply fail then.
+
+        message = errorMessage
+
+        ! Print a nice error message. In case of a parallel executable
+        ! also the processor ID is printed.
+
+        print "(a)", "#"
+        print "(a)", "#=========================== !!! Error !!! &
+             &============================"
 
 #ifndef SEQUENTIAL_MODE
-    write(integerString,"(i8)") ADT%myID
-    integerString = adjustl(integerString)
+        write (integerString, "(i8)") ADT%myID
+        integerString = adjustl(integerString)
 
-    print "(2a)", "#* adtTerminate called by processor ", &
-         trim(integerString)
+        print "(2a)", "#* adtTerminate called by processor ", &
+            trim(integerString)
 #endif
 
-    print "(2a)", "#* Run-time error in procedure ", &
-         trim(routineName)
-
-    ! Loop to write the error message. If the message is too long it
-    ! is split over several lines.
-
-    firstTime = .true.
-    do
-       ! Determine the remaining error message to be written.
-       ! If longer than the maximum number of characters allowed
-       ! on a line, it is attempted to split the message.
-
-       message = adjustl(message)
-       len = len_trim(message)
-       i2  = min(maxCharLine,len)
-
-       if(i2 < len) i2 = index(message(:i2), " ", .true.) - 1
-       if(i2 < 0)   i2 = index(message, " ") - 1
-       if(i2 < 0)   i2 = len
-
-       ! Write this part of the error message. If it is the first
-       ! line of the message some additional stuff is printed.
-
-       if( firstTime ) then
-          print "(2a)", "#* Error message: ", trim(message(:i2))
-          firstTime = .false.
-       else
-          print "(2a)", "#*                ", trim(message(:i2))
-       endif
-
-       ! Exit the loop if the entire message has been written.
-
-       if(i2 == len) exit
-
-       ! Adapt the string for the next part to be written.
-
-       message = message(i2+1:)
-
-    enddo
-
-    ! Write the trailing message.
-
-    print "(a)", "#*"
-    print "(a)", "#* Now exiting"
-    print "(a)", "#==========================================&
-         &============================"
-    print "(a)", "#"
-
-    ! Call abort and stop the program. This stop should be done in
-    ! abort, but just to be sure.
-
-    call mpi_abort(ADT%comm, 1, ierr)
-    stop
-
-  end subroutine adtTerminate
-
-
-  subroutine allocateADTs
-    !
-    !        This routine allocates the memory for the first ADT and is
-    !        only called when no other ADT's are present.
-    !
-    implicit none
-    !
-    !       Local variables.
-    !
-    integer :: ierr
-
-    ! Allocate the memory for 1 ADT. Note that adtTerminate is not
-    ! called when the memory allocation fails. The reason is that
-    ! the processor ID for the current tree is used in this routine
-    ! and that value has not been set yet.
-
-    allocate(ADTs(1), stat=ierr)
-    if(ierr /= 0) stop "Allocation failure for ADTs"
-
-    ! Nullify the pointers of ADTs(1).
-
-    nullify(ADTs(1)%coor)
-
-    nullify(ADTs(1)%triaConn)
-    nullify(ADTs(1)%quadsConn)
-    nullify(ADTs(1)%tetraConn)
-    nullify(ADTs(1)%pyraConn)
-    nullify(ADTs(1)%prismsConn)
-    nullify(ADTs(1)%hexaConn)
-
-    nullify(ADTs(1)%rootLeavesProcs)
-    nullify(ADTs(1)%rootBBoxes)
-
-    nullify(ADTs(1)%elementType)
-    nullify(ADTs(1)%elementID)
-    nullify(ADTs(1)%xBBox)
-
-    nullify(ADTs(1)%ADTree)
-
-  end subroutine allocateADTs
-
+        print "(2a)", "#* Run-time error in procedure ", &
+            trim(routineName)
 
-  subroutine deallocateADTs(adtID)
-    !
-    !        This routine deallocates the memory for the given entry in
-    !        the array ADTs and it tries to reallocate ADTs itself
-    !        accordingly.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        adtID: The entry in ADTs to be deallocated.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    character(len=*), intent(in) :: adtID
-    !
-    !       Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: jj, nn, nAlloc, nAllocNew
-
-    type(adtType), dimension(:), allocatable :: tmpADTs
-
-    ! Determine the index in the array ADTs, which stores the given
-    ! ID. As the number of trees stored is limited, a linear search
-    ! algorithm is okay.
-
-    if( allocated(ADTs) ) then
-       nAlloc = ubound(ADTs, 1)
-    else
-       nAlloc = 0
-    endif
-
-    do jj=1,nAlloc
-       if(adtID == ADTs(jj)%adtID) exit
-    enddo
-
-    ! Return immediately if the ID is not present.
-
-    if(jj > nAlloc) return
-
-    ! Deallocate the data for this ADT entry. Note that the memory
-    ! for the nodal coordinates and the connectivity should not be
-    ! deallocated, because these pointers are just set to the given
-    ! input. The deallocation only takes place if the tree is active.
-
-    if( ADTs(jj)%isActive ) then
-
-       deallocate(ADTs(jj)%rootLeavesProcs, ADTs(jj)%rootBBoxes, &
-            ADTs(jj)%elementType,     ADTs(jj)%elementID,  &
-            ADTs(jj)%xBBox,           ADTs(jj)%ADTree,     &
-            stat=ierr)
-       if(ierr /= 0)                             &
-            call adtTerminate(ADTs(jj), "deallocateADTs", &
-            "Deallocation failure for the ADT data")
-    endif
-
-    ! Make sure the ADT is inactive and nullify the pointers.
-
-    ADTs(jj)%isActive = .false.
-
-    nullify(ADTs(jj)%coor)
-
-    nullify(ADTs(jj)%triaConn)
-    nullify(ADTs(jj)%quadsConn)
-    nullify(ADTs(jj)%tetraConn)
-    nullify(ADTs(jj)%pyraConn)
-    nullify(ADTs(jj)%prismsConn)
-    nullify(ADTs(jj)%hexaConn)
-
-    nullify(ADTs(jj)%rootLeavesProcs)
-    nullify(ADTs(jj)%rootBBoxes)
+        ! Loop to write the error message. If the message is too long it
+        ! is split over several lines.
 
-    nullify(ADTs(jj)%elementType)
-    nullify(ADTs(jj)%elementID)
-    nullify(ADTs(jj)%xBBox)
+        firstTime = .true.
+        do
+            ! Determine the remaining error message to be written.
+            ! If longer than the maximum number of characters allowed
+            ! on a line, it is attempted to split the message.
 
-    nullify(ADTs(jj)%ADTree)
+            message = adjustl(message)
+            len = len_trim(message)
+            i2 = min(maxCharLine, len)
 
-    ! Determine the highest entry in ADTs which is still valid.
+            if (i2 < len) i2 = index(message(:i2), " ", .true.) - 1
+            if (i2 < 0) i2 = index(message, " ") - 1
+            if (i2 < 0) i2 = len
 
-    do nn=nAlloc,1,-1
-       if( ADTs(nn)%isActive ) exit
-    enddo
+            ! Write this part of the error message. If it is the first
+            ! line of the message some additional stuff is printed.
 
-    ! Determine the situation we are having here.
+            if (firstTime) then
+                print "(2a)", "#* Error message: ", trim(message(:i2))
+                firstTime = .false.
+            else
+                print "(2a)", "#*                ", trim(message(:i2))
+            end if
 
-    if(nn == 0) then
+            ! Exit the loop if the entire message has been written.
 
-       ! No active ADT's anymore. Deallocte the entire array.
-       ! Note that adtTerminate cannot be called when something
-       ! goes wrong.
+            if (i2 == len) exit
 
-       deallocate(ADTs, stat=ierr)
-       if(ierr /= 0) stop "Deallocation failure for ADTs"
+            ! Adapt the string for the next part to be written.
 
-    else if(nn < nAlloc) then
+            message = message(i2 + 1:)
 
-       ! There are still some active ADT's, but the highest ones
-       ! are inactive. Therefore ADTs is reallocated. First allocate
-       ! the memory for tmpADTs to be able to retrieve the currently
-       ! stored data later on.
+        end do
 
-       nAllocNew = nn
-       allocate(tmpADTs(nAllocNew), stat=ierr)
-       if(ierr /= 0) &
-            call adtTerminate(ADTs(jj), "adtDeallocateADTs", &
-            "Memory allocation failure for tmpADTs")
+        ! Write the trailing message.
 
-       ! Copy the data from ADTs to tmpADTs.
+        print "(a)", "#*"
+        print "(a)", "#* Now exiting"
+        print "(a)", "#==========================================&
+             &============================"
+        print "(a)", "#"
 
-       do nn=1,nAllocNew
-          tmpADTs(nn) = ADTs(nn)
-       enddo
+        ! Call abort and stop the program. This stop should be done in
+        ! abort, but just to be sure.
 
-       ! Deallocate and allocate the memory for ADTs. Note that
-       ! adtTerminate is not called when the memory allocation fails.
-       ! The reason is that the processor ID for the current tree is
-       ! used in this routine and that value may not be available
-       ! anymore.
+        call mpi_abort(ADT%comm, 1, ierr)
+        stop
 
-       deallocate(ADTs, stat=ierr)
-       if(ierr /= 0) stop "Deallocation failure for ADTs"
+    end subroutine adtTerminate
 
-       allocate(ADTs(nAllocNew), stat=ierr)
-       if(ierr /= 0) stop "Allocation failure for ADTs"
+    subroutine allocateADTs
+        !
+        !        This routine allocates the memory for the first ADT and is
+        !        only called when no other ADT's are present.
+        !
+        implicit none
+        !
+        !       Local variables.
+        !
+        integer :: ierr
 
-       ! Copy the data back into ADTs and release the memory of
-       ! tmpADTs afterwards.
+        ! Allocate the memory for 1 ADT. Note that adtTerminate is not
+        ! called when the memory allocation fails. The reason is that
+        ! the processor ID for the current tree is used in this routine
+        ! and that value has not been set yet.
 
-       do nn=1,nAllocNew
-          ADTs(nn) = tmpADTs(nn)
-       enddo
+        allocate (ADTs(1), stat=ierr)
+        if (ierr /= 0) stop "Allocation failure for ADTs"
 
-       deallocate(tmpADTs, stat=ierr)
-       if(ierr /= 0) stop "Deallocation failure for tmpADTs"
+        ! Nullify the pointers of ADTs(1).
 
-    endif
+        nullify (ADTs(1)%coor)
 
-  end subroutine deallocateADTs
+        nullify (ADTs(1)%triaConn)
+        nullify (ADTs(1)%quadsConn)
+        nullify (ADTs(1)%tetraConn)
+        nullify (ADTs(1)%pyraConn)
+        nullify (ADTs(1)%prismsConn)
+        nullify (ADTs(1)%hexaConn)
 
+        nullify (ADTs(1)%rootLeavesProcs)
+        nullify (ADTs(1)%rootBBoxes)
 
-  subroutine qsortBBoxes(arr, nn, ADT, dir)
-    !
-    !        This routine sorts the integer array arr, such that the
-    !        coordinate of the corresponding bounding box in the
-    !        direction dir is in increasing order. Note that the array to
-    !        store the stack is stored in this module. The reason is that
-    !        this routine is called quite often and in this way constant
-    !        allocation, deallocation and reallocation of stack is
-    !        avoided.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        nn:  Size of the array to be sorted.
-    !        ADT: The ADTfrom which the coordinate of
-    !             the bounding box must be taken.
-    !        dir: Index of the coordinate, which must be sorted.
-    !        Subroutine intent(inout) arguments.
-    !        -----------------------------------
-    !        arr(:): On input it contains the bounding box ID's which
-    !                must be sorted. On output these ID's are sorted,
-    !                such that the given coordinate is in increasing
-    !                order.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    type(adtType), intent(in) :: ADT
-    integer(kind=intType), intent(in)  :: nn, dir
-
-    integer(kind=intType), dimension(:), intent(inout) :: arr
-    !
-    !       Local parameters.
-    !
-    integer(kind=intType), parameter :: m = 7
-    !
-    !       Local variables.
-    !
-    integer(kind=intType) :: i, j, k, r, l, jStack
-    integer(kind=intType) :: a, tmp
-
-    real(kind=realType) :: ra
-    real(kind=realType), dimension(:,:), pointer :: xBBox
+        nullify (ADTs(1)%elementType)
+        nullify (ADTs(1)%elementID)
+        nullify (ADTs(1)%xBBox)
 
-    ! Set the pointer for the coordinates of the bounding boxes.
+        nullify (ADTs(1)%ADTree)
 
-    xBBox => ADT%xBBox
+    end subroutine allocateADTs
 
-    ! Initialize the variables that control the sorting.
+    subroutine deallocateADTs(adtID)
+        !
+        !        This routine deallocates the memory for the given entry in
+        !        the array ADTs and it tries to reallocate ADTs itself
+        !        accordingly.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        adtID: The entry in ADTs to be deallocated.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        character(len=*), intent(in) :: adtID
+        !
+        !       Local variables.
+        !
+        integer :: ierr
 
-    jStack = 0
-    l      = 1
-    r      = nn
+        integer(kind=intType) :: jj, nn, nAlloc, nAllocNew
 
-    ! Start of the algorithm.
+        type(adtType), dimension(:), allocatable :: tmpADTs
 
-    sortLoop: do
+        ! Determine the index in the array ADTs, which stores the given
+        ! ID. As the number of trees stored is limited, a linear search
+        ! algorithm is okay.
 
-       ! Check for the size of the subarray.
+        if (allocated(ADTs)) then
+            nAlloc = ubound(ADTs, 1)
+        else
+            nAlloc = 0
+        end if
 
-       testInsertion: if((r-l) < m) then
+        do jj = 1, nAlloc
+            if (adtID == ADTs(jj)%adtID) exit
+        end do
 
-          ! Perform the insertion sort.
+        ! Return immediately if the ID is not present.
 
-          do j=(l+1),r
-             a  = arr(j)
-             ra = xBBox(dir,a)
-             do i=(j-1),l,-1
-                if(xBBox(dir,arr(i)) <= ra) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+        if (jj > nAlloc) return
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+        ! Deallocate the data for this ADT entry. Note that the memory
+        ! for the nodal coordinates and the connectivity should not be
+        ! deallocated, because these pointers are just set to the given
+        ! input. The deallocation only takes place if the tree is active.
 
-          if(jStack == 0) exit sortLoop
+        if (ADTs(jj)%isActive) then
 
-          ! Pop stack and begin a new round of partitioning.
+            deallocate (ADTs(jj)%rootLeavesProcs, ADTs(jj)%rootBBoxes, &
+                        ADTs(jj)%elementType, ADTs(jj)%elementID, &
+                        ADTs(jj)%xBBox, ADTs(jj)%ADTree, &
+                        stat=ierr)
+            if (ierr /= 0) &
+                call adtTerminate(ADTs(jj), "deallocateADTs", &
+                                  "Deallocation failure for the ADT data")
+        end if
+
+        ! Make sure the ADT is inactive and nullify the pointers.
+
+        ADTs(jj)%isActive = .false.
+
+        nullify (ADTs(jj)%coor)
+
+        nullify (ADTs(jj)%triaConn)
+        nullify (ADTs(jj)%quadsConn)
+        nullify (ADTs(jj)%tetraConn)
+        nullify (ADTs(jj)%pyraConn)
+        nullify (ADTs(jj)%prismsConn)
+        nullify (ADTs(jj)%hexaConn)
+
+        nullify (ADTs(jj)%rootLeavesProcs)
+        nullify (ADTs(jj)%rootBBoxes)
+
+        nullify (ADTs(jj)%elementType)
+        nullify (ADTs(jj)%elementID)
+        nullify (ADTs(jj)%xBBox)
+
+        nullify (ADTs(jj)%ADTree)
+
+        ! Determine the highest entry in ADTs which is still valid.
+
+        do nn = nAlloc, 1, -1
+            if (ADTs(nn)%isActive) exit
+        end do
+
+        ! Determine the situation we are having here.
+
+        if (nn == 0) then
+
+            ! No active ADT's anymore. Deallocte the entire array.
+            ! Note that adtTerminate cannot be called when something
+            ! goes wrong.
+
+            deallocate (ADTs, stat=ierr)
+            if (ierr /= 0) stop "Deallocation failure for ADTs"
+
+        else if (nn < nAlloc) then
+
+            ! There are still some active ADT's, but the highest ones
+            ! are inactive. Therefore ADTs is reallocated. First allocate
+            ! the memory for tmpADTs to be able to retrieve the currently
+            ! stored data later on.
+
+            nAllocNew = nn
+            allocate (tmpADTs(nAllocNew), stat=ierr)
+            if (ierr /= 0) &
+                call adtTerminate(ADTs(jj), "adtDeallocateADTs", &
+                                  "Memory allocation failure for tmpADTs")
+
+            ! Copy the data from ADTs to tmpADTs.
+
+            do nn = 1, nAllocNew
+                tmpADTs(nn) = ADTs(nn)
+            end do
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+            ! Deallocate and allocate the memory for ADTs. Note that
+            ! adtTerminate is not called when the memory allocation fails.
+            ! The reason is that the processor ID for the current tree is
+            ! used in this routine and that value may not be available
+            ! anymore.
 
-       else testInsertion
+            deallocate (ADTs, stat=ierr)
+            if (ierr /= 0) stop "Deallocation failure for ADTs"
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a. Also rearrange so that
-          ! (l) <= (l+1) <= (r).
+            allocate (ADTs(nAllocNew), stat=ierr)
+            if (ierr /= 0) stop "Allocation failure for ADTs"
 
-          k = (l+r)/2
-          tmp      = arr(k)      ! Swap the elements
-          arr(k)   = arr(l+1)    ! k and l+1.
-          arr(l+1) = tmp
+            ! Copy the data back into ADTs and release the memory of
+            ! tmpADTs afterwards.
 
-          if(xBBox(dir,arr(r)) < xBBox(dir,arr(l))) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+            do nn = 1, nAllocNew
+                ADTs(nn) = tmpADTs(nn)
+            end do
 
-          if(xBBox(dir,arr(r)) < xBBox(dir,arr(l+1))) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+            deallocate (tmpADTs, stat=ierr)
+            if (ierr /= 0) stop "Deallocation failure for tmpADTs"
 
-          if(xBBox(dir,arr(l+1)) < xBBox(dir,arr(l))) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+        end if
 
-          ! Initialize the pointers for partitioning.
+    end subroutine deallocateADTs
 
-          i  = l+1
-          j  = r
-          a  = arr(l+1)
-          ra = xBBox(dir,a)
+    subroutine qsortBBoxes(arr, nn, ADT, dir)
+        !
+        !        This routine sorts the integer array arr, such that the
+        !        coordinate of the corresponding bounding box in the
+        !        direction dir is in increasing order. Note that the array to
+        !        store the stack is stored in this module. The reason is that
+        !        this routine is called quite often and in this way constant
+        !        allocation, deallocation and reallocation of stack is
+        !        avoided.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        nn:  Size of the array to be sorted.
+        !        ADT: The ADTfrom which the coordinate of
+        !             the bounding box must be taken.
+        !        dir: Index of the coordinate, which must be sorted.
+        !        Subroutine intent(inout) arguments.
+        !        -----------------------------------
+        !        arr(:): On input it contains the bounding box ID's which
+        !                must be sorted. On output these ID's are sorted,
+        !                such that the given coordinate is in increasing
+        !                order.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        type(adtType), intent(in) :: ADT
+        integer(kind=intType), intent(in) :: nn, dir
 
-          ! The innermost loop.
+        integer(kind=intType), dimension(:), intent(inout) :: arr
+        !
+        !       Local parameters.
+        !
+        integer(kind=intType), parameter :: m = 7
+        !
+        !       Local variables.
+        !
+        integer(kind=intType) :: i, j, k, r, l, jStack
+        integer(kind=intType) :: a, tmp
+
+        real(kind=realType) :: ra
+        real(kind=realType), dimension(:, :), pointer :: xBBox
 
-          innerLoop: do
+        ! Set the pointer for the coordinates of the bounding boxes.
+
+        xBBox => ADT%xBBox
+
+        ! Initialize the variables that control the sorting.
+
+        jStack = 0
+        l = 1
+        r = nn
+
+        ! Start of the algorithm.
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(ra <= xBBox(dir,arr(i))) exit
-             enddo
+        sortLoop: do
+
+            ! Check for the size of the subarray.
+
+            testInsertion: if ((r - l) < m) then
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(xBBox(dir,arr(j)) <= ra) exit
-             enddo
+                ! Perform the insertion sort.
 
-             ! Exit the loop in case the pointers i and j crossed.
+                do j = (l + 1), r
+                    a = arr(j)
+                    ra = xBBox(dir, a)
+                    do i = (j - 1), l, -1
+                        if (xBBox(dir, arr(i)) <= ra) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
+
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
+
+                if (jStack == 0) exit sortLoop
 
-             if(j < i) exit innerLoop
+                ! Pop stack and begin a new round of partitioning.
 
-             ! Swap the element i and j.
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
+
+                else testInsertion
+
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a. Also rearrange so that
+                ! (l) <= (l+1) <= (r).
+
+                k = (l + r) / 2
+                tmp = arr(k)      ! Swap the elements
+                arr(k) = arr(l + 1)    ! k and l+1.
+                arr(l + 1) = tmp
+
+                if (xBBox(dir, arr(r)) < xBBox(dir, arr(l))) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
+
+                if (xBBox(dir, arr(r)) < xBBox(dir, arr(l + 1))) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
+
+                if (xBBox(dir, arr(l + 1)) < xBBox(dir, arr(l))) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
+
+                ! Initialize the pointers for partitioning.
+
+                i = l + 1
+                j = r
+                a = arr(l + 1)
+                ra = xBBox(dir, a)
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
+                ! The innermost loop.
 
-          enddo innerLoop
+                innerLoop: do
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (ra <= xBBox(dir, arr(i))) exit
+                    end do
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (xBBox(dir, arr(j)) <= ra) exit
+                    end do
 
-          ! Push pointers to larger subarray on stack; process smaller
-          ! subarray immediately. Check if enough memory is available.
-          ! If not reallocate it.
+                    ! Exit the loop in case the pointers i and j crossed.
 
-          jStack = jStack + 2
+                    if (j < i) exit innerLoop
 
-          if(jStack > nStack) call reallocPlus(stack, nStack, 100, ADT)
+                    ! Swap the element i and j.
 
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
 
-       endif testInsertion
-    enddo sortLoop
+                end do innerLoop
 
-    ! Check in debug mode if the sort has been done correctly.
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(xBBox(dir,arr(i+1)) < xBBox(dir,arr(i))) then
-             call adtTerminate(ADT, "qsortBBoxes", &
-                  "Array is not sorted correctly")
-          endif
-       enddo
-    endif
-
-  end subroutine qsortBBoxes
-
-
-  subroutine qsortBBoxTargets(arr, nn, ADT)
-    !
-    !        This routine sorts the given number of bounding box targets
-    !        in increasing order, based on the generalized < operator.
-    !
-    implicit none
-    !
-    !       Subroutine arguments
-    !
-    type(adtType), intent(in) :: ADT
-    integer(kind=intType), intent(in) :: nn
-
-    type(adtBBoxTargetType), dimension(:), pointer :: arr
-    !
-    !       Local variables
-    !
-    integer(kind=intType), parameter :: m = 7
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-    integer(kind=intType) :: i, j, k, r, l, jStack
+                ! Push pointers to larger subarray on stack; process smaller
+                ! subarray immediately. Check if enough memory is available.
+                ! If not reallocate it.
 
-    type(adtBBoxTargetType) :: a, tmp
+                jStack = jStack + 2
 
-    ! Initialize the variables that control the sorting.
+                if (jStack > nStack) call reallocPlus(stack, nStack, 100, ADT)
 
-    jStack = 0
-    l      = 1
-    r      = nn
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
 
-    ! Start of the algorithm
+            end if testInsertion
+        end do sortLoop
 
-    sortLoop: do
+        ! Check in debug mode if the sort has been done correctly.
 
-       ! Check for the size of the subarray.
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (xBBox(dir, arr(i + 1)) < xBBox(dir, arr(i))) then
+                    call adtTerminate(ADT, "qsortBBoxes", &
+                                      "Array is not sorted correctly")
+                end if
+            end do
+        end if
 
-       testInsertion: if((r-l) < m) then
+    end subroutine qsortBBoxes
 
-          ! Perform insertion sort
+    subroutine qsortBBoxTargets(arr, nn, ADT)
+        !
+        !        This routine sorts the given number of bounding box targets
+        !        in increasing order, based on the generalized < operator.
+        !
+        implicit none
+        !
+        !       Subroutine arguments
+        !
+        type(adtType), intent(in) :: ADT
+        integer(kind=intType), intent(in) :: nn
 
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+        type(adtBBoxTargetType), dimension(:), pointer :: arr
+        !
+        !       Local variables
+        !
+        integer(kind=intType), parameter :: m = 7
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+        integer(kind=intType) :: i, j, k, r, l, jStack
 
-          if(jStack == 0) exit sortLoop
+        type(adtBBoxTargetType) :: a, tmp
 
-          ! Pop stack and begin a new round of partitioning.
+        ! Initialize the variables that control the sorting.
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+        jStack = 0
+        l = 1
+        r = nn
 
-       else testInsertion
+        ! Start of the algorithm
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a. Also rearrange so that
-          ! (l) <= (l+1) <= (r).
+        sortLoop: do
 
-          k = (l+r)/2
-          tmp      = arr(k)      ! Swap the elements
-          arr(k)   = arr(l+1)    ! k and l+1.
-          arr(l+1) = tmp
+            ! Check for the size of the subarray.
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+            testInsertion: if ((r - l) < m) then
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+                ! Perform insertion sort
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
-
-          ! Initialize the pointers for partitioning.
-
-          i = l+1
-          j = r
-          a = arr(l+1)
-
-          ! The innermost loop
-
-          innerLoop: do
-
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
-
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
-
-             ! Exit the loop in case the pointers i and j crossed.
-
-             if(j < i) exit innerLoop
-
-             ! Swap the element i and j.
-
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-
-          enddo innerLoop
-
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
-
-          arr(l+1) = arr(j)
-          arr(j)   = a
-
-          ! Push pointers to larger subarray on stack; process smaller
-          ! subarray immediately. Check if enough memory is available.
-          ! If not reallocate it.
-
-          jStack = jStack + 2
-
-          if(jStack > nStack) call reallocPlus(stack, nStack, 100, ADT)
-
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
-
-       endif testInsertion
-    enddo sortLoop
-
-    ! Check in debug mode whether the array is really sorted.
-
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                       &
-               call adtTerminate(ADT, "qsortBBoxTargets", &
-               "Array is not sorted correctly")
-       enddo
-    endif
-
-  end subroutine qsortBBoxTargets
-
-
-  subroutine reallocateADTs(adtID, jj)
-    !
-    !        This routine reallocates the memory for the ADTs array, such
-    !        that it is possible to store a new ADT. First it is tried to
-    !        find an empty spot in the currently allocated array. If this
-    !        is not present a true reallocation takes place.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        adtID: The ID of the ADT.
-    !        Subroutine intent(out) arguments.
-    !        ---------------------------------
-    !        jj: The index in the array ADTs, where the new entry will be
-    !            stored.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    character(len=*), intent(in)          :: adtID
-    integer(kind=intType), intent(out) :: jj
-    !
-    !       Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, nAlloc
-
-    type(adtType), dimension(:), allocatable :: tmpADTs
-
-    ! Determine the current size of ADTs and look for an empty spot
-    ! in the currently allocated array. Also check if an ADT with
-    ! the given ID is not already active. A linear search algorithm
-    ! is used, because the number of ADT's stored is limited.
-
-    nAlloc = ubound(ADTs, 1)
-    jj = nAlloc + 1
-    do nn=1,nAlloc
-       if( ADTs(nn)%isActive ) then
-          if(adtID == ADTs(nn)%adtID) exit
-       else if(jj > nAlloc) then
-          jj = nn
-       endif
-    enddo
-
-    ! If the given ID corresponds to an already active tree,
-    ! terminate. To avoid a messy output only processor 0 prints
-    ! an error message while the other ones wait to get killed.
-
-    if(nn <= nAlloc) then
-       if(ADTs(nn)%myID == 0)                    &
-            call adtTerminate(ADTs(nn), "reallocateADTs", &
-            "Given ID corresponds to an already &
-            &active ADT")
-       call mpi_barrier(ADTs(nn)%comm, ierr)
-    endif
-
-    ! Check if a reallocate must be done.
-
-    checkReallocate: if(jj > nAlloc) then
-
-       ! No empty spot present in ADTs. A true reallocation must be
-       ! performed. First allocate the memory for tmpADTs to be able
-       ! to retrieve the currently stored data later on. Note that
-       ! adtTerminate is not called when the memory allocation fails.
-       ! The reason is that the processor ID for the current tree is
-       ! used in this routine and that value has not been set yet.
-
-       allocate(tmpADTs(nAlloc), stat=ierr)
-       if(ierr /= 0) stop "Allocation failure for tmpADTs"
-
-       ! Copy the data from ADTs to tmpADTs.
-
-       do nn=1,nAlloc
-          tmpADTs(nn) = ADTs(nn)
-       enddo
-
-       ! Release the memory of ADTs and allocate it again with
-       ! increased size.
-
-       deallocate(ADTs, stat=ierr)
-       if(ierr /= 0) stop "Deallocation failure for ADTs"
-
-       allocate(ADTs(jj), stat=ierr)
-       if(ierr /= 0) stop "Allocation failure for ADTs"
-
-       ! Copy the data back from tmpADTs.
-
-       do nn=1,nAlloc
-          ADTs(nn) = tmpADTs(nn)
-       enddo
-
-       ! Release the memory of tmpADTs.
-
-       deallocate(tmpADTs, stat=ierr)
-       if(ierr /= 0) stop "Deallocation failure for tmpADTs"
-
-       ! Nullify the pointers of the new entry.
-
-       nullify(ADTs(jj)%coor)
-
-       nullify(ADTs(jj)%triaConn)
-       nullify(ADTs(jj)%quadsConn)
-       nullify(ADTs(jj)%tetraConn)
-       nullify(ADTs(jj)%pyraConn)
-       nullify(ADTs(jj)%prismsConn)
-       nullify(ADTs(jj)%hexaConn)
-
-       nullify(ADTs(jj)%rootLeavesProcs)
-       nullify(ADTs(jj)%rootBBoxes)
-
-       nullify(ADTs(jj)%elementType)
-       nullify(ADTs(jj)%elementID)
-       nullify(ADTs(jj)%xBBox)
-
-       nullify(ADTs(jj)%ADTree)
-
-    endif checkReallocate
-
-  end subroutine reallocateADTs
-
-
-  subroutine reallocBBoxTargetTypePlus(arr, nSize, nInc, ADT)
-    !
-    !        This routine reallocates the memory of the given
-    !        adtBBoxTargetType pointer array.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        ADT:         Currently active ADT.
-    !        nInc: Increment of the size of the array.
-    !        Subroutine intent(inout) arguments.
-    !        -----------------------------------
-    !        nSize: On input it contains the size of the given array.
-    !               On output this value is incremented by nInc.
-    !        Subroutine pointer arguments.
-    !        -----------------------------
-    !        arr: Array to be reallocated.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    type(adtType), intent(in) :: ADT
-    integer,                  intent(in)    :: nInc
-    integer(kind=intType), intent(inout) :: nSize
-
-    type(adtBBoxTargetType), dimension(:), pointer :: arr
-    !
-    !       Local variables.
-    !
-    integer :: ierr
-    integer(kind=intType) :: i, nOld
-
-    type(adtBBoxTargetType), dimension(:), pointer :: tmp
-
-    ! Store the input value of nSize and set the pointer tmp to the
-    ! original array.
-
-    nOld = nSize
-    tmp => arr
-
-    ! Allocate the new memory for the array.
-
-    nSize = nSize + nInc
-    allocate(arr(nSize), stat=ierr)
-    if(ierr /= 0) &
-         call adtTerminate(ADT, "reallocBBoxTargetTypePlus", &
-         "Memory allocation failure for arr.")
-
-    ! Copy the data from the original array into arr.
-
-    nOld = min(nOld,nSize)
-    do i=1,nOld
-       arr(i) = tmp(i)
-    enddo
-
-    ! Release the memory of tmp, which points to the original
-    ! memory of the given array.
-
-    deallocate(tmp, stat=ierr)
-    if(ierr /= 0) &
-         call adtTerminate(ADT, "reallocBBoxTargetTypePlus", &
-         "Deallocation failure for tmp.")
-
-  end subroutine reallocBBoxTargetTypePlus
-
-
-  subroutine reallocPlus(arr, nSize, nInc, ADT)
-    !
-    !        This internal routine reallocates the memory of the given
-    !        pointer array.
-    !        Subroutine intent(in) arguments.
-    !        --------------------------------
-    !        ADT:         Currently active ADT.
-    !        nInc: Increment of the size of the array.
-    !        Subroutine intent(inout) arguments.
-    !        -----------------------------------
-    !        nSize: On input it contains the size of the given array.
-    !               On output this value is incremented by nInc.
-    !        Subroutine pointer arguments.
-    !        -----------------------------
-    !        arr: Array to be reallocated.
-    !
-    implicit none
-    !
-    !       Subroutine arguments.
-    !
-    type(adtType), intent(in) :: ADT
-    integer,                  intent(in)    :: nInc
-    integer(kind=intType), intent(inout) :: nSize
-
-    integer(kind=intType), dimension(:), pointer :: arr
-    !
-    !       Local variables.
-    !
-    integer :: ierr
-    integer(kind=intType) :: i, nOld
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-    integer(kind=intType), dimension(:), pointer :: tmp
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-    ! Store the input value of nSize and set the pointer tmp to the
-    ! original array.
+                if (jStack == 0) exit sortLoop
 
-    nOld = nSize
-    tmp => arr
+                ! Pop stack and begin a new round of partitioning.
 
-    ! Allocate the new memory for the array.
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-    nSize = nSize + nInc
-    allocate(arr(nSize), stat=ierr)
-    if(ierr /= 0)                          &
-         call adtTerminate(ADT, "reallocPlus", &
-         "Memory allocation failure for arr.")
+                else testInsertion
 
-    ! Copy the data from the original array into arr.
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a. Also rearrange so that
+                ! (l) <= (l+1) <= (r).
 
-    nOld = min(nOld,nSize)
-    do i=1,nOld
-       arr(i) = tmp(i)
-    enddo
+                k = (l + r) / 2
+                tmp = arr(k)      ! Swap the elements
+                arr(k) = arr(l + 1)    ! k and l+1.
+                arr(l + 1) = tmp
 
-    ! Release the memory of tmp, which points to the original
-    ! memory of the given array.
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
+
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
+
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
+
+                ! Initialize the pointers for partitioning.
+
+                i = l + 1
+                j = r
+                a = arr(l + 1)
+
+                ! The innermost loop
+
+                innerLoop: do
+
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
+
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
+
+                    ! Exit the loop in case the pointers i and j crossed.
+
+                    if (j < i) exit innerLoop
+
+                    ! Swap the element i and j.
+
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+
+                end do innerLoop
+
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
+
+                arr(l + 1) = arr(j)
+                arr(j) = a
+
+                ! Push pointers to larger subarray on stack; process smaller
+                ! subarray immediately. Check if enough memory is available.
+                ! If not reallocate it.
+
+                jStack = jStack + 2
+
+                if (jStack > nStack) call reallocPlus(stack, nStack, 100, ADT)
+
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
+
+            end if testInsertion
+        end do sortLoop
+
+        ! Check in debug mode whether the array is really sorted.
+
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call adtTerminate(ADT, "qsortBBoxTargets", &
+                                      "Array is not sorted correctly")
+            end do
+        end if
+
+    end subroutine qsortBBoxTargets
+
+    subroutine reallocateADTs(adtID, jj)
+        !
+        !        This routine reallocates the memory for the ADTs array, such
+        !        that it is possible to store a new ADT. First it is tried to
+        !        find an empty spot in the currently allocated array. If this
+        !        is not present a true reallocation takes place.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        adtID: The ID of the ADT.
+        !        Subroutine intent(out) arguments.
+        !        ---------------------------------
+        !        jj: The index in the array ADTs, where the new entry will be
+        !            stored.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        character(len=*), intent(in) :: adtID
+        integer(kind=intType), intent(out) :: jj
+        !
+        !       Local variables.
+        !
+        integer :: ierr
 
-    deallocate(tmp, stat=ierr)
-    if(ierr /= 0)                          &
-         call adtTerminate(ADT, "reallocPlus", &
-         "Deallocation failure for tmp.")
+        integer(kind=intType) :: nn, nAlloc
 
-  end subroutine reallocPlus
+        type(adtType), dimension(:), allocatable :: tmpADTs
+
+        ! Determine the current size of ADTs and look for an empty spot
+        ! in the currently allocated array. Also check if an ADT with
+        ! the given ID is not already active. A linear search algorithm
+        ! is used, because the number of ADT's stored is limited.
+
+        nAlloc = ubound(ADTs, 1)
+        jj = nAlloc + 1
+        do nn = 1, nAlloc
+            if (ADTs(nn)%isActive) then
+                if (adtID == ADTs(nn)%adtID) exit
+            else if (jj > nAlloc) then
+                jj = nn
+            end if
+        end do
+
+        ! If the given ID corresponds to an already active tree,
+        ! terminate. To avoid a messy output only processor 0 prints
+        ! an error message while the other ones wait to get killed.
+
+        if (nn <= nAlloc) then
+            if (ADTs(nn)%myID == 0) &
+                 call adtTerminate(ADTs(nn), "reallocateADTs", &
+                 "Given ID corresponds to an already &
+                 &active ADT")
+            call mpi_barrier(ADTs(nn)%comm, ierr)
+        end if
+
+        ! Check if a reallocate must be done.
+
+        checkReallocate: if (jj > nAlloc) then
+
+            ! No empty spot present in ADTs. A true reallocation must be
+            ! performed. First allocate the memory for tmpADTs to be able
+            ! to retrieve the currently stored data later on. Note that
+            ! adtTerminate is not called when the memory allocation fails.
+            ! The reason is that the processor ID for the current tree is
+            ! used in this routine and that value has not been set yet.
+
+            allocate (tmpADTs(nAlloc), stat=ierr)
+            if (ierr /= 0) stop "Allocation failure for tmpADTs"
+
+            ! Copy the data from ADTs to tmpADTs.
+
+            do nn = 1, nAlloc
+                tmpADTs(nn) = ADTs(nn)
+            end do
+
+            ! Release the memory of ADTs and allocate it again with
+            ! increased size.
+
+            deallocate (ADTs, stat=ierr)
+            if (ierr /= 0) stop "Deallocation failure for ADTs"
+
+            allocate (ADTs(jj), stat=ierr)
+            if (ierr /= 0) stop "Allocation failure for ADTs"
+
+            ! Copy the data back from tmpADTs.
+
+            do nn = 1, nAlloc
+                ADTs(nn) = tmpADTs(nn)
+            end do
+
+            ! Release the memory of tmpADTs.
+
+            deallocate (tmpADTs, stat=ierr)
+            if (ierr /= 0) stop "Deallocation failure for tmpADTs"
+
+            ! Nullify the pointers of the new entry.
+
+            nullify (ADTs(jj)%coor)
+
+            nullify (ADTs(jj)%triaConn)
+            nullify (ADTs(jj)%quadsConn)
+            nullify (ADTs(jj)%tetraConn)
+            nullify (ADTs(jj)%pyraConn)
+            nullify (ADTs(jj)%prismsConn)
+            nullify (ADTs(jj)%hexaConn)
+
+            nullify (ADTs(jj)%rootLeavesProcs)
+            nullify (ADTs(jj)%rootBBoxes)
+
+            nullify (ADTs(jj)%elementType)
+            nullify (ADTs(jj)%elementID)
+            nullify (ADTs(jj)%xBBox)
+
+            nullify (ADTs(jj)%ADTree)
+
+        end if checkReallocate
+
+    end subroutine reallocateADTs
+
+    subroutine reallocBBoxTargetTypePlus(arr, nSize, nInc, ADT)
+        !
+        !        This routine reallocates the memory of the given
+        !        adtBBoxTargetType pointer array.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        ADT:         Currently active ADT.
+        !        nInc: Increment of the size of the array.
+        !        Subroutine intent(inout) arguments.
+        !        -----------------------------------
+        !        nSize: On input it contains the size of the given array.
+        !               On output this value is incremented by nInc.
+        !        Subroutine pointer arguments.
+        !        -----------------------------
+        !        arr: Array to be reallocated.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        type(adtType), intent(in) :: ADT
+        integer, intent(in) :: nInc
+        integer(kind=intType), intent(inout) :: nSize
+
+        type(adtBBoxTargetType), dimension(:), pointer :: arr
+        !
+        !       Local variables.
+        !
+        integer :: ierr
+        integer(kind=intType) :: i, nOld
+
+        type(adtBBoxTargetType), dimension(:), pointer :: tmp
+
+        ! Store the input value of nSize and set the pointer tmp to the
+        ! original array.
+
+        nOld = nSize
+        tmp => arr
+
+        ! Allocate the new memory for the array.
+
+        nSize = nSize + nInc
+        allocate (arr(nSize), stat=ierr)
+        if (ierr /= 0) &
+            call adtTerminate(ADT, "reallocBBoxTargetTypePlus", &
+                              "Memory allocation failure for arr.")
+
+        ! Copy the data from the original array into arr.
+
+        nOld = min(nOld, nSize)
+        do i = 1, nOld
+            arr(i) = tmp(i)
+        end do
+
+        ! Release the memory of tmp, which points to the original
+        ! memory of the given array.
+
+        deallocate (tmp, stat=ierr)
+        if (ierr /= 0) &
+            call adtTerminate(ADT, "reallocBBoxTargetTypePlus", &
+                              "Deallocation failure for tmp.")
+
+    end subroutine reallocBBoxTargetTypePlus
+
+    subroutine reallocPlus(arr, nSize, nInc, ADT)
+        !
+        !        This internal routine reallocates the memory of the given
+        !        pointer array.
+        !        Subroutine intent(in) arguments.
+        !        --------------------------------
+        !        ADT:         Currently active ADT.
+        !        nInc: Increment of the size of the array.
+        !        Subroutine intent(inout) arguments.
+        !        -----------------------------------
+        !        nSize: On input it contains the size of the given array.
+        !               On output this value is incremented by nInc.
+        !        Subroutine pointer arguments.
+        !        -----------------------------
+        !        arr: Array to be reallocated.
+        !
+        implicit none
+        !
+        !       Subroutine arguments.
+        !
+        type(adtType), intent(in) :: ADT
+        integer, intent(in) :: nInc
+        integer(kind=intType), intent(inout) :: nSize
+
+        integer(kind=intType), dimension(:), pointer :: arr
+        !
+        !       Local variables.
+        !
+        integer :: ierr
+        integer(kind=intType) :: i, nOld
+
+        integer(kind=intType), dimension(:), pointer :: tmp
+
+        ! Store the input value of nSize and set the pointer tmp to the
+        ! original array.
+
+        nOld = nSize
+        tmp => arr
+
+        ! Allocate the new memory for the array.
+
+        nSize = nSize + nInc
+        allocate (arr(nSize), stat=ierr)
+        if (ierr /= 0) &
+            call adtTerminate(ADT, "reallocPlus", &
+                              "Memory allocation failure for arr.")
+
+        ! Copy the data from the original array into arr.
+
+        nOld = min(nOld, nSize)
+        do i = 1, nOld
+            arr(i) = tmp(i)
+        end do
+
+        ! Release the memory of tmp, which points to the original
+        ! memory of the given array.
+
+        deallocate (tmp, stat=ierr)
+        if (ierr /= 0) &
+            call adtTerminate(ADT, "reallocPlus", &
+                              "Deallocation failure for tmp.")
+
+    end subroutine reallocPlus
 
 end module adtUtils
diff --git a/src/NKSolver/NKSolvers.F90 b/src/NKSolver/NKSolvers.F90
index f59a99d60..a65d4c7ca 100644
--- a/src/NKSolver/NKSolvers.F90
+++ b/src/NKSolver/NKSolvers.F90
@@ -1,3473 +1,3906 @@
 module NKSolver
 
-  use constants
+    use constants
 
-  ! MPI comes from constants, so we need to avoid MPIF_H in PETSc
+    ! MPI comes from constants, so we need to avoid MPIF_H in PETSc
 #include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
-
-  ! PETSc Matrices:
-  ! dRdw: This is the actual matrix-free matrix computed with FD
-  ! dRdwPre: The preconditoner matrix for NK method. This matrix is stored.
-  ! dRdwPseudo: Shell matrix used with the pseudo-transient
-  !             continuation method.
-
-  Mat  dRdw, dRdwPre, dRdwPseudo
-
-  ! PETSc Vectors:
-  ! wVec: PETsc version of ADflow 'w'
-  ! rVec: PETSc version of ADflow 'dw', but divided by volume
-  ! deltaW: Update to the wVec from linear solution
-  ! diagV: Diagonal lumping term
-
-  Vec wVec, rVec, deltaW, work, g, baseRes
-
-  ! NK_KSP: The ksp object for solving the newton udpate
-  KSP  NK_KSP
-
-  PetscFortranAddr   ctx(1)
-
-  ! Options for NK Slver
-  logical :: useNKSolver
-  integer(kind=intType) :: NK_jacobianLag
-  integer(kind=intType) :: NK_subspace
-  integer(kind=intType) :: NK_asmOverlap
-  integer(kind=intType) :: NK_iluFill
-  integer(kind=intType) :: NK_innerPreConIts
-  integer(kind=intType) :: NK_outerPreConIts
-  integer(kind=intType) :: NK_LS
-  logical :: NK_useEW
-  logical :: NK_ADPC
-  logical :: NK_viscPC
-  real(kind=realType) :: NK_CFL0
-  real(kind=realType) :: NK_switchTol
-  real(kind=realType) :: NK_rtolInit
-  real(kind=realType) :: NK_divTol = 10
-  real(kind=realType) :: NK_fixedStep
-
-  ! Misc variables
-  logical :: NK_solverSetup=.False.
-  integer(kind=intType) :: NK_iter
-
-  ! Eisenstat-Walker Parameters
-  integer(kind=intType) :: ew_version
-  real(kind=realType) :: ew_rtol_0
-  real(kind=realType) :: ew_rtol_max
-  real(kind=realType) :: ew_gamma
-  real(kind=realType) :: ew_alpha
-  real(kind=realType) :: ew_alpha2
-  real(kind=realType) :: ew_threshold
-  real(kind=alwaysRealType) :: rtolLast, oldNorm
-
-  ! Misc Parameters
-  logical :: freeStreamResSet=.False.
-  real(kind=realType) :: NK_CFL
-
-  ! Variables for non-monotone line search
-  real(kind=realType), dimension(:), allocatable :: NKLSFuncEvals
-  integer(kind=intType) :: Mmax=5
-  integer(kind=intType) :: iter_k
-  integer(kind=intType) :: iter_m
-
-  ! Parameter for external preconditioner
-  integer(kind=intType) :: applyPCSubSpaceSize
-
-contains
-
-  subroutine setupNKsolver
-
-    ! Setup the PETSc objects for the Newton-Krylov
-    ! solver. destroyNKsolver can be used to destroy the objects created
-    ! in this function
-
-    use constants
-    use stencils, only : visc_pc_stencil, euler_pc_stencil, N_visc_pc, N_euler_pc
-    use communication, only : adflow_comm_world
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputIteration, only : useLinResMonitor
-    use flowVarRefState, only : nw, viscous
-    use InputAdjoint, only: viscPC, precondtype
-    use ADjointVars , only: nCellsLocal
-    use utils, only : EChk
-    use adjointUtils, only : myMatCreate, statePreAllocation
-    use agmg, only : setupAGMG
+    use petsc
     implicit none
 
-    ! Working Variables
-    integer(kind=intType) :: ierr, nDimw
-    integer(kind=intType) , dimension(:), allocatable :: nnzDiagonal, nnzOffDiag
-    integer(kind=intType) :: n_stencil
-    integer(kind=intType), dimension(:, :), pointer :: stencil
-    integer(kind=intType) :: level
-
-    ! Make sure we don't have memory for the approximate and exact
-    ! Newton solvers kicking around at the same time.
-    !call destroyANKSolver()
+    ! PETSc Matrices:
+    ! dRdw: This is the actual matrix-free matrix computed with FD
+    ! dRdwPre: The preconditoner matrix for NK method. This matrix is stored.
+    ! dRdwPseudo: Shell matrix used with the pseudo-transient
+    !             continuation method.
+
+    Mat dRdw, dRdwPre, dRdwPseudo
+
+    ! PETSc Vectors:
+    ! wVec: PETsc version of ADflow 'w'
+    ! rVec: PETSc version of ADflow 'dw', but divided by volume
+    ! deltaW: Update to the wVec from linear solution
+    ! diagV: Diagonal lumping term
+
+    Vec wVec, rVec, deltaW, work, g, baseRes
+
+    ! NK_KSP: The ksp object for solving the newton udpate
+    KSP NK_KSP
+
+    PetscFortranAddr ctx(1)
+
+    ! Options for NK Slver
+    logical :: useNKSolver
+    integer(kind=intType) :: NK_jacobianLag
+    integer(kind=intType) :: NK_subspace
+    integer(kind=intType) :: NK_asmOverlap
+    integer(kind=intType) :: NK_iluFill
+    integer(kind=intType) :: NK_innerPreConIts
+    integer(kind=intType) :: NK_outerPreConIts
+    integer(kind=intType) :: NK_LS
+    logical :: NK_useEW
+    logical :: NK_ADPC
+    logical :: NK_viscPC
+    real(kind=realType) :: NK_CFL0
+    real(kind=realType) :: NK_switchTol
+    real(kind=realType) :: NK_rtolInit
+    real(kind=realType) :: NK_divTol = 10
+    real(kind=realType) :: NK_fixedStep
+
+    ! Misc variables
+    logical :: NK_solverSetup = .False.
+    integer(kind=intType) :: NK_iter
+
+    ! Eisenstat-Walker Parameters
+    integer(kind=intType) :: ew_version
+    real(kind=realType) :: ew_rtol_0
+    real(kind=realType) :: ew_rtol_max
+    real(kind=realType) :: ew_gamma
+    real(kind=realType) :: ew_alpha
+    real(kind=realType) :: ew_alpha2
+    real(kind=realType) :: ew_threshold
+    real(kind=alwaysRealType) :: rtolLast, oldNorm
+
+    ! Misc Parameters
+    logical :: freeStreamResSet = .False.
+    real(kind=realType) :: NK_CFL
+
+    ! Variables for non-monotone line search
+    real(kind=realType), dimension(:), allocatable :: NKLSFuncEvals
+    integer(kind=intType) :: Mmax = 5
+    integer(kind=intType) :: iter_k
+    integer(kind=intType) :: iter_m
+
+    ! Parameter for external preconditioner
+    integer(kind=intType) :: applyPCSubSpaceSize
 
-    if (.not. NK_solverSetup) then
-       nDimW = nw * nCellsLocal(1_intTYpe) * nTimeIntervalsSpectral
+contains
 
-       call VecCreate(ADFLOW_COMM_WORLD, wVec, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+    subroutine setupNKsolver
+
+        ! Setup the PETSc objects for the Newton-Krylov
+        ! solver. destroyNKsolver can be used to destroy the objects created
+        ! in this function
+
+        use constants
+        use stencils, only: visc_pc_stencil, euler_pc_stencil, N_visc_pc, N_euler_pc
+        use communication, only: adflow_comm_world
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputIteration, only: useLinResMonitor
+        use flowVarRefState, only: nw, viscous
+        use InputAdjoint, only: viscPC, precondtype
+        use ADjointVars, only: nCellsLocal
+        use utils, only: EChk
+        use adjointUtils, only: myMatCreate, statePreAllocation
+        use agmg, only: setupAGMG
+        implicit none
+
+        ! Working Variables
+        integer(kind=intType) :: ierr, nDimw
+        integer(kind=intType), dimension(:), allocatable :: nnzDiagonal, nnzOffDiag
+        integer(kind=intType) :: n_stencil
+        integer(kind=intType), dimension(:, :), pointer :: stencil
+        integer(kind=intType) :: level
+
+        ! Make sure we don't have memory for the approximate and exact
+        ! Newton solvers kicking around at the same time.
+        !call destroyANKSolver()
+
+        if (.not. NK_solverSetup) then
+            nDimW = nw * nCellsLocal(1_intTYpe) * nTimeIntervalsSpectral
+
+            call VecCreate(ADFLOW_COMM_WORLD, wVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecSetSizes(wVec, nDimW, PETSC_DECIDE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecSetSizes(wVec, nDimW, PETSC_DECIDE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecSetBlockSize(wVec, nw, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecSetBlockSize(wVec, nw, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecSetType(wVec, VECMPI, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecSetType(wVec, VECMPI, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       !  Create duplicates for residual and delta
-       call VecDuplicate(wVec, rVec, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            !  Create duplicates for residual and delta
+            call VecDuplicate(wVec, rVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecDuplicate(wVec, deltaW, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecDuplicate(wVec, deltaW, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecDuplicate(wVec, baseRes, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecDuplicate(wVec, baseRes, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Create the two additional work vectors for the line search:
-       call VecDuplicate(wVec, g, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Create the two additional work vectors for the line search:
+            call VecDuplicate(wVec, g, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecDuplicate(wVec, work, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecDuplicate(wVec, work, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Create Pre-Conditioning Matrix
-       allocate(nnzDiagonal(nCellsLocal(1_intType)*nTimeIntervalsSpectral), &
-            nnzOffDiag(nCellsLocal(1_intType)*nTimeIntervalsSpectral) )
+            ! Create Pre-Conditioning Matrix
+            allocate (nnzDiagonal(nCellsLocal(1_intType) * nTimeIntervalsSpectral), &
+                      nnzOffDiag(nCellsLocal(1_intType) * nTimeIntervalsSpectral))
 
-       if (viscous .and. NK_viscPC) then
-          stencil => visc_pc_stencil
-          n_stencil = N_visc_pc
-       else
-          stencil => euler_pc_stencil
-          n_stencil = N_euler_pc
-       end if
+            if (viscous .and. NK_viscPC) then
+                stencil => visc_pc_stencil
+                n_stencil = N_visc_pc
+            else
+                stencil => euler_pc_stencil
+                n_stencil = N_euler_pc
+            end if
 
-       level = 1
-       call statePreAllocation(nnzDiagonal, nnzOffDiag, nDimW/nw, stencil, n_stencil, &
-            level, .False.)
-       call myMatCreate(dRdwPre, nw, nDimW, nDimW, nnzDiagonal, nnzOffDiag, &
-            __FILE__, __LINE__)
+            level = 1
+            call statePreAllocation(nnzDiagonal, nnzOffDiag, nDimW / nw, stencil, n_stencil, &
+                                    level, .False.)
+            call myMatCreate(dRdwPre, nw, nDimW, nDimW, nnzDiagonal, nnzOffDiag, &
+                             __FILE__, __LINE__)
 
-       call matSetOption(dRdwPre, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-       deallocate(nnzDiagonal, nnzOffDiag)
+            call matSetOption(dRdwPre, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+            deallocate (nnzDiagonal, nnzOffDiag)
 
-       ! Setup Matrix-Free dRdw matrix and its function
-       call MatCreateMFFD(adflow_comm_world, nDimW, nDimW, &
-            PETSC_DETERMINE, PETSC_DETERMINE, dRdw, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Setup Matrix-Free dRdw matrix and its function
+            call MatCreateMFFD(adflow_comm_world, nDimW, nDimW, &
+                               PETSC_DETERMINE, PETSC_DETERMINE, dRdw, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call MatMFFDSetFunction(dRdw, FormFunction_mf, ctx, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call MatMFFDSetFunction(dRdw, FormFunction_mf, ctx, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Setup a matrix free matrix for drdw
-       call MatCreateShell(ADFLOW_COMM_WORLD, nDimW, nDimW, PETSC_DETERMINE, &
-            PETSC_DETERMINE, ctx, dRdwPseudo, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Setup a matrix free matrix for drdw
+            call MatCreateShell(ADFLOW_COMM_WORLD, nDimW, nDimW, PETSC_DETERMINE, &
+                                PETSC_DETERMINE, ctx, dRdwPseudo, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Set the shell operation for doing matrix vector multiplies
-       call MatShellSetOperation(dRdwPseudo, MATOP_MULT, NKMatMult, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Set the shell operation for doing matrix vector multiplies
+            call MatShellSetOperation(dRdwPseudo, MATOP_MULT, NKMatMult, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Set the mat_row_oriented option to false so that dense
-       ! subblocks can be passed in in fortran column-oriented format
-       call MatSetOption(dRdWPre, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Set the mat_row_oriented option to false so that dense
+            ! subblocks can be passed in in fortran column-oriented format
+            call MatSetOption(dRdWPre, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call MatSetOption(dRdW, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call MatSetOption(dRdW, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       if (preCondType == 'mg') then
-          call setupAGMG(drdwpre, nDimW/nw, nw)
-       end if
+            if (preCondType == 'mg') then
+                call setupAGMG(drdwpre, nDimW / nw, nw)
+            end if
 
-       !  Create the linear solver context
-       call KSPCreate(ADFLOW_COMM_WORLD, NK_KSP, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            !  Create the linear solver context
+            call KSPCreate(ADFLOW_COMM_WORLD, NK_KSP, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Set operators for the solver
-       call KSPSetOperators(NK_KSP, dRdw, dRdwPre, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Set operators for the solver
+            call KSPSetOperators(NK_KSP, dRdw, dRdwPre, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       if (useLinResMonitor) then
+            if (useLinResMonitor) then
 #if PETSC_VERSION_GE(3,8,0)
-          ! This could be wrong. There is no petsc_null_context???
-          call KSPMonitorSet(NK_KSP, linearResidualMonitor, PETSC_NULL_FUNCTION, &
-               PETSC_NULL_FUNCTION, ierr)
+                ! This could be wrong. There is no petsc_null_context???
+                call KSPMonitorSet(NK_KSP, linearResidualMonitor, PETSC_NULL_FUNCTION, &
+                                   PETSC_NULL_FUNCTION, ierr)
 #else
-          call KSPMonitorSet(NK_KSP, linearResidualMonitor, PETSC_NULL_OBJECT, &
-               PETSC_NULL_FUNCTION, ierr)
+                call KSPMonitorSet(NK_KSP, linearResidualMonitor, PETSC_NULL_OBJECT, &
+                                   PETSC_NULL_FUNCTION, ierr)
 #endif
-          call EChk(ierr, __FILE__, __LINE__)
-       end if
-
-       NK_solverSetup = .True.
-       NK_iter = 0
-    end if
-
-  end subroutine setupNKsolver
+                call EChk(ierr, __FILE__, __LINE__)
+            end if
 
-  subroutine linearResidualMonitor(myKSP, n, rnorm, dummy, ierr)
-    use communication, only : myid
-    implicit none
-    !
-    !     Subroutine arguments.
-    !
-    ! myKsp - Iterative context
-    ! n     - Iteration number
-    ! rnorm - 2-norm (preconditioned) residual value
-    ! dummy - Optional user-defined monitor context (unused here)
-    ! ierr  - Return error code
-
-    KSP myKSP
-    integer(kind=intType) :: n, dummy, ierr
-    real(kind=alwaysRealType)   :: rnorm
-
-    ! Write the residual norm to stdout every adjMonStep iterations.
-    if (myid == 0) then
-       print *, n, rnorm
-    end if
-    ierr = 0
-  end subroutine LinearResidualMonitor
-
-  subroutine NKMatMult(A, vecX,  vecY, ierr)
-
-    ! PETSc user-defied call back function for computing the product of
-    ! dRdw with a vector. Here we just call the much more broadly
-    ! useful routine computeMatrixFreeProductFwd()
+            NK_solverSetup = .True.
+            NK_iter = 0
+        end if
 
-    use constants
-    use utils, only : EChk
-    implicit none
+    end subroutine setupNKsolver
+
+    subroutine linearResidualMonitor(myKSP, n, rnorm, dummy, ierr)
+        use communication, only: myid
+        implicit none
+        !
+        !     Subroutine arguments.
+        !
+        ! myKsp - Iterative context
+        ! n     - Iteration number
+        ! rnorm - 2-norm (preconditioned) residual value
+        ! dummy - Optional user-defined monitor context (unused here)
+        ! ierr  - Return error code
+
+        KSP myKSP
+        integer(kind=intType) :: n, dummy, ierr
+        real(kind=alwaysRealType) :: rnorm
+
+        ! Write the residual norm to stdout every adjMonStep iterations.
+        if (myid == 0) then
+            print *, n, rnorm
+        end if
+        ierr = 0
+    end subroutine LinearResidualMonitor
 
-    ! PETSc Arguments
-    Mat   A
-    Vec   vecX, vecY
-    integer(kind=intType) ::ierr, i, j, k, l, nn, sps, ii
-    real(kind=realType) :: dt
-    real(kind=realType), pointer :: yPtr(:), xPtr(:)
+    subroutine NKMatMult(A, vecX, vecY, ierr)
 
-    ! Frist run the underlying matrix-free mult
-    call matMult(dRdw, vecX, vecY, ierr)
+        ! PETSc user-defied call back function for computing the product of
+        ! dRdw with a vector. Here we just call the much more broadly
+        ! useful routine computeMatrixFreeProductFwd()
 
-    call VecGetArrayF90(vecY, yPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        use constants
+        use utils, only: EChk
+        implicit none
 
-    call VecGetArrayReadF90(vecX, xPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        ! PETSc Arguments
+        Mat A
+        Vec vecX, vecY
+        integer(kind=intType) :: ierr, i, j, k, l, nn, sps, ii
+        real(kind=realType) :: dt
+        real(kind=realType), pointer :: yPtr(:), xPtr(:)
 
-    yPtr = yPtr + one/NK_CFL*xPtr
+        ! Frist run the underlying matrix-free mult
+        call matMult(dRdw, vecX, vecY, ierr)
 
-    call VecRestorearrayF90(vecY, yPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecGetArrayF90(vecY, yPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecRestorearrayReadF90(vecX, xPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecGetArrayReadF90(vecX, xPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine NKMatMult
+        yPtr = yPtr + one / NK_CFL * xPtr
 
-  subroutine getFreeStreamResidual(rhoRes, totalRRes)
+        call VecRestorearrayF90(vecY, yPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use blockPointers, only : nDom, ib, jb, kb, w
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowVarRefState, only : nw, winf
-    use utils, only : setPointers
-    implicit none
+        call VecRestorearrayReadF90(vecX, xPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    real(kind=realType), intent(out) :: rhoRes, totalRRes
-    real(kind=realType), dimension(:), allocatable :: tmp
-    integer(kind=intType) :: nDimW, nDimP, counter
-    integer(kind=intType) :: nn, sps, i, j, k, l, n
+    end subroutine NKMatMult
+
+    subroutine getFreeStreamResidual(rhoRes, totalRRes)
+
+        use constants
+        use blockPointers, only: nDom, ib, jb, kb, w
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowVarRefState, only: nw, winf
+        use utils, only: setPointers
+        implicit none
+
+        real(kind=realType), intent(out) :: rhoRes, totalRRes
+        real(kind=realType), dimension(:), allocatable :: tmp
+        integer(kind=intType) :: nDimW, nDimP, counter
+        integer(kind=intType) :: nn, sps, i, j, k, l, n
+
+        ! Get the residual cooresponding to the free-stream on the fine
+        ! grid-level --- This saves the current values in W, P, rlv and rev
+        ! and restores them when finished.
+
+        call getInfoSize(n)
+        allocate (tmp(n))
+        call getInfo(tmp, n)
+
+        ! Set the w-variables to the ones of the uniform flow field.
+        spectralLoop4b: do sps = 1, nTimeIntervalsSpectral
+            domains4b: do nn = 1, nDom
+                call setPointers(nn, 1, sps)
+                do l = 1, nw
+                    do k = 0, kb
+                        do j = 0, jb
+                            do i = 0, ib
+                                w(i, j, k, l) = winf(l)
+                            end do
+                        end do
+                    end do
+                end do
+            end do domains4b
+        end do spectralLoop4b
+
+        ! Evaluate the residual now
+        call computeResidualNK(useUpdateIntermed=.True.)
+        call getCurrentResidual(rhoRes, totalRRes)
+
+        ! Put everything back
+        call setInfo(tmp, n)
+
+        deallocate (tmp)
+
+        ! propogate the old values throught the code.
+        ! This is not needed for euler and shouldn't be needed for
+        ! viscous equations either, but becuase of an issue else where it is.
+        ! see https://github.com/mdolab/adflow/pull/46 for the discussion.
+        call computeResidualNK(useUpdateIntermed=.True.)
+
+    end subroutine getFreeStreamResidual
+
+    subroutine getCurrentResidual(rhoRes, totalRRes)
+
+        use constants
+        use communication, only: adflow_comm_world
+        use block, only: nCellGlobal
+        use blockPointers, only: nDom
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: currentLevel
+        use monitor, only: monLoc, monGlob, nMonSum
+        use utils, only: setPointers, sumResiduals, sumAllResiduals
+        implicit none
+
+        ! Compute rhoRes and totalR. The actual residual must have already
+        ! been evaluated
+
+        real(kind=realType), intent(out) :: rhoRes, totalRRes
+        integer(kind=intType) :: sps, nn, ierr
+
+        monLoc = zero
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, currentLevel, sps)
+                call sumResiduals(1, 1) ! Sum 1st state res into first mon location
+                call sumAllResiduals(2) ! Sum into second mon location
+            end do
+        end do
 
-    ! Get the residual cooresponding to the free-stream on the fine
-    ! grid-level --- This saves the current values in W, P, rlv and rev
-    ! and restores them when finished.
+        ! This is the same calc as in convergence info, just for rehoRes and
+        ! totalR only.
+        call mpi_allreduce(monLoc, monGlob, nMonSum, adflow_real, &
+                           mpi_sum, ADflow_comm_world, ierr)
 
-    call getInfoSize(n)
-    allocate(tmp(n))
-    call getInfo(tmp, n)
+        rhoRes = sqrt(monGlob(1) / nCellGlobal(currentLevel))
+        totalRRes = sqrt(monGlob(2))
 
-    ! Set the w-variables to the ones of the uniform flow field.
-    spectralLoop4b: do sps=1, nTimeIntervalsSpectral
-       domains4b: do nn=1, nDom
-          call setPointers(nn, 1, sps)
-          do l=1,nw
-             do k=0, kb
-                do j=0, jb
-                   do i=0, ib
-                      w(i,j,k,l) = winf(l)
-                   enddo
-                enddo
-             enddo
-          end do
-       end do domains4b
-    end do spectralLoop4b
+    end subroutine getCurrentResidual
 
-    ! Evaluate the residual now
-    call computeResidualNK(useUpdateIntermed = .True.)
-    call getCurrentResidual(rhoRes, totalRRes)
+    subroutine FormJacobianNK
 
-    ! Put everything back
-    call setInfo(tmp, n)
+        use constants
+        use inputADjoint, only: viscPC, precondType
+        use utils, only: EChk
+        use adjointUtils, only: setupStateResidualMatrix, setupStandardKSP, setupStandardMultigrid
+        implicit none
 
-    deallocate(tmp)
+        ! Local Variables
+        character(len=maxStringLen) :: preConSide, localPCType, kspObjectType, globalPCType, localOrdering
+        integer(kind=intType) :: ierr
+        logical :: useAD, usePC, useTranspose, useObjective, tmp
+        integer(kind=intType) :: i, j, k, l, ii, nn, sps
+        logical :: useCoarseMats
 
-    ! propogate the old values throught the code.
-    ! This is not needed for euler and shouldn't be needed for
-    ! viscous equations either, but becuase of an issue else where it is.
-    ! see https://github.com/mdolab/adflow/pull/46 for the discussion.
-    call computeResidualNK(useUpdateIntermed = .True.)
+        ! Dummy assembly begin/end calls for the matrix-free Matrx
+        call MatAssemblyBegin(dRdw, MAT_FINAL_ASSEMBLY, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        call MatAssemblyEnd(dRdw, MAT_FINAL_ASSEMBLY, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
+        ! Assemble the approximate PC (fine leve, level 1)
+        useAD = NK_ADPC
+        usePC = .True.
+        useTranspose = .False.
+        useObjective = .False.
+        tmp = viscPC ! Save what is in viscPC and set to the NKvarible
+        viscPC = NK_viscPC
 
-  end subroutine getFreeStreamResidual
+        if (preCondType == 'mg') then
+            useCoarseMats = .True.
+        else
+            useCoarseMats = .False.
+        end if
 
-  subroutine getCurrentResidual(rhoRes,totalRRes)
+        call setupStateResidualMatrix(dRdwPre, useAD, usePC, useTranspose, &
+                                      useObjective, .False., 1_intType, useCoarseMats=useCoarseMats)
+        ! Reset saved value
+        viscPC = tmp
+
+        ! Setup KSP Options
+        preConSide = 'right'
+        localPCType = 'ilu'
+        kspObjectType = 'gmres'
+        globalPCType = 'asm'
+        localOrdering = 'rcm'
+
+        ! Setup the KSP using the same code as used for the adjoint
+        if (PreCondType == 'asm') then
+            call setupStandardKSP(NK_KSP, kspObjectType, NK_subSpace, &
+                                  preConSide, globalPCType, NK_asmOverlap, NK_outerPreConIts, localPCType, &
+                                  localOrdering, NK_iluFill, NK_innerPreConIts)
+        else
+            call setupStandardMultigrid(NK_KSP, kspObjectType, NK_subSpace, &
+                                        preConSide, NK_asmOverlap, NK_outerPreConIts, &
+                                        localOrdering, NK_iluFill)
+        end if
 
-    use constants
-    use communication, only : adflow_comm_world
-    use block, only : nCellGlobal
-    use blockPointers, only : nDom
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use iteration, only : currentLevel
-    use monitor, only: monLoc, monGlob, nMonSum
-    use utils, only : setPointers, sumResiduals, sumAllResiduals
-    implicit none
+        ! Don't do iterative refinement for the NKSolver.
+        call KSPGMRESSetCGSRefinementType(NK_KSP, &
+                                          KSP_GMRES_CGS_REFINE_NEVER, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Compute rhoRes and totalR. The actual residual must have already
-    ! been evaluated
+    end subroutine FormJacobianNK
 
-    real(kind=realType), intent(out) :: rhoRes,totalRRes
-    integer(kind=intType) :: sps,nn,ierr
+    subroutine FormFunction_mf(ctx, wVec, rVec, ierr)
 
-    monLoc = zero
-    do sps=1, nTimeIntervalsSpectral
-       do nn=1, nDom
-          call setPointers(nn, currentLevel, sps)
-          call sumResiduals(1, 1) ! Sum 1st state res into first mon location
-          call sumAllResiduals(2) ! Sum into second mon location
-       end do
-    end do
+        ! This is basically a copy of FormFunction, however it has a
+        ! different calling sequence from PETSc. It performs the identical
+        ! function. This is used for linear solve application for the
+        ! aerostructural system pre-conditioner
 
-    ! This is the same calc as in convergence info, just for rehoRes and
-    ! totalR only.
-    call mpi_allreduce(monLoc, monGlob, nMonSum, adflow_real, &
-         mpi_sum, ADflow_comm_world, ierr)
+        use constants
+        implicit none
 
-    rhoRes = sqrt(monGlob(1)/nCellGlobal(currentLevel))
-    totalRRes = sqrt(monGlob(2))
+        ! PETSc Variables
+        PetscFortranAddr ctx(*)
+        Vec wVec, rVec
+        integer(kind=intType) :: ierr
 
-  end subroutine getCurrentResidual
+        ! This is just a shell routine that runs the more broadly useful
+        ! computeResidualNK subroutine
 
-  subroutine FormJacobianNK
+        call setW(wVec)
+        call computeResidualNK(useUpdateIntermed=.False.)
+        call setRVec(rVec)
+        ! We don't check an error here, so just pass back zero
+        ierr = 0
 
-    use constants
-    use inputADjoint, only : viscPC, precondType
-    use utils, only : EChk
-    use adjointUtils, only :setupStateResidualMatrix, setupStandardKSP, setupStandardMultigrid
-    implicit none
+    end subroutine FormFunction_mf
 
-    ! Local Variables
-    character(len=maxStringLen) :: preConSide, localPCType, kspObjectType, globalPCType, localOrdering
-    integer(kind=intType) :: ierr
-    logical :: useAD, usePC, useTranspose, useObjective, tmp
-    integer(kind=intType) :: i, j, k, l, ii, nn, sps
-    logical :: useCoarseMats
-
-    ! Dummy assembly begin/end calls for the matrix-free Matrx
-    call MatAssemblyBegin(dRdw, MAT_FINAL_ASSEMBLY, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    call MatAssemblyEnd(dRdw, MAT_FINAL_ASSEMBLY, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Assemble the approximate PC (fine leve, level 1)
-    useAD = NK_ADPC
-    usePC = .True.
-    useTranspose = .False.
-    useObjective = .False.
-    tmp = viscPC ! Save what is in viscPC and set to the NKvarible
-    viscPC = NK_viscPC
-
-    if (preCondType == 'mg') then
-       useCoarseMats = .True.
-    else
-       useCoarseMats = .False.
-    end if
-
-    call setupStateResidualMatrix(dRdwPre, useAD, usePC, useTranspose, &
-         useObjective, .False., 1_intType, useCoarseMats=useCoarseMats)
-    ! Reset saved value
-    viscPC = tmp
-
-    ! Setup KSP Options
-    preConSide = 'right'
-    localPCType = 'ilu'
-    kspObjectType = 'gmres'
-    globalPCType = 'asm'
-    localOrdering = 'rcm'
-
-    ! Setup the KSP using the same code as used for the adjoint
-    if (PreCondType == 'asm') then
-       call setupStandardKSP(NK_KSP, kspObjectType, NK_subSpace, &
-            preConSide, globalPCType, NK_asmOverlap, NK_outerPreConIts, localPCType, &
-            localOrdering, NK_iluFill, NK_innerPreConIts)
-    else
-       call setupStandardMultigrid(NK_KSP, kspObjectType, NK_subSpace, &
-            preConSide, NK_asmOverlap, NK_outerPreConIts, &
-            localOrdering, NK_iluFill)
-    end if
-
-
-    ! Don't do iterative refinement for the NKSolver.
-    call KSPGMRESSetCGSRefinementType(NK_KSP, &
-         KSP_GMRES_CGS_REFINE_NEVER, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-  end subroutine FormJacobianNK
-
-  subroutine FormFunction_mf(ctx, wVec, rVec, ierr)
-
-    ! This is basically a copy of FormFunction, however it has a
-    ! different calling sequence from PETSc. It performs the identical
-    ! function. This is used for linear solve application for the
-    ! aerostructural system pre-conditioner
+    subroutine destroyNKsolver
 
-    use constants
-    implicit none
+        ! Destroy all the PETSc objects for the Newton-Krylov
+        ! solver.
 
-    ! PETSc Variables
-    PetscFortranAddr ctx(*)
-    Vec     wVec, rVec
-    integer(kind=intType) :: ierr
+        use constants
+        use utils, only: EChk
+        use agmg, only: destroyAGMG
+        implicit none
+        integer(kind=intType) :: ierr
 
-    ! This is just a shell routine that runs the more broadly useful
-    ! computeResidualNK subroutine
+        if (NK_solverSetup) then
 
-    call setW(wVec)
-    call computeResidualNK(useUpdateIntermed = .False.)
-    call setRVec(rVec)
-    ! We don't check an error here, so just pass back zero
-    ierr = 0
-
-  end subroutine FormFunction_mf
-
-  subroutine destroyNKsolver
+            call MatDestroy(dRdw, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    ! Destroy all the PETSc objects for the Newton-Krylov
-    ! solver.
+            call MatDestroy(dRdwPre, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use utils, only: EChk
-    use agmg, only : destroyAGMG
-    implicit none
-    integer(kind=intType) :: ierr
+            call MatDestroy(dRdwPseudo, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    if (NK_solverSetup) then
+            call VecDestroy(wVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call MatDestroy(dRdw, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecDestroy(rVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call MatDestroy(dRdwPre, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecDestroy(deltaW, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call MatDestroy(dRdwPseudo, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecDestroy(baseRes, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecDestroy(wVec, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecDestroy(g, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecDestroy(rVec, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecDestroy(work, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecDestroy(deltaW, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call KSPDestroy(NK_KSP, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecDestroy(baseRes, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call destroyAGMG()
 
-       call VecDestroy(g, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            NK_solverSetup = .False.
+        end if
 
-       call VecDestroy(work, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+    end subroutine destroyNKsolver
+
+    subroutine NKStep(firstCall)
+
+        use constants
+        use flowVarRefState, only: nw
+        use inputPhysics, only: equations
+        use flowVarRefState, only: nw, nwf
+        use inputIteration, only: L2conv
+        use iteration, only: approxTotalIts, totalR0, stepMonitor, LinResMonitor, iterType
+        use utils, only: EChk
+        use killSignals, only: routineFailed
+        implicit none
+
+        ! Input Variables
+        logical, intent(in) :: firstCall
+
+        ! Working Variables
+        integer(kind=intType) :: iter, ierr, kspIterations
+        integer(kind=intType) :: maxNonLinearIts, nfevals, maxIt
+        real(kind=alwaysRealType) :: norm, rtol, atol
+        real(kind=alwaysrealType) :: fnorm, ynorm, gnorm
+        logical :: flag
+        real(kind=alwaysRealType) :: resHist(NK_subspace + 1)
+
+        if (firstCall) then
+            call setupNKSolver()
+
+            ! Copy the adflow 'w' into the petsc wVec
+            call setwVec(wVec)
+
+            ! Evaluate the residual before we start and put the residual in
+            ! 'g', which is what would be the case after a linesearch.
+            call computeResidualNK(useUpdateIntermed=.False.)
+            call setRVec(rVec)
+            iter_k = 1
+            iter_m = 0
+        else
+            NK_iter = NK_iter + 1
 
-       call KSPDestroy(NK_KSP, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Increment counter for the nonmonotne line serach
+            iter_k = iter_k + 1
+        end if
 
-       call destroyAGMG()
+        ! Compute the norm of rVec for use in EW Criteria
+        call VecNorm(rVec, NORM_2, norm, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-       NK_solverSetup = .False.
-    end if
+        ! Determine if if we need to form the Preconditioner
+        if (mod(NK_iter, NK_jacobianLag) == 0) then
+            NK_CFL = NK_CFL0 * (totalR0 / norm)**1.5
+            iterType = "     *NK"
+            call FormJacobianNK()
+        else
 
-  end subroutine destroyNKsolver
+            call MatAssemblyBegin(dRdw, MAT_FINAL_ASSEMBLY, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+            call MatAssemblyEnd(dRdw, MAT_FINAL_ASSEMBLY, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+            iterType = "      NK"
+        end if
 
-  subroutine NKStep(firstCall)
+        if (NK_iter == 0 .or. .not. NK_useEW) then
+            rtol = NK_rtolInit
+        else
+            call getEWTol(norm, oldNorm, rtolLast, rtol)
+        end if
 
-    use constants
-    use flowVarRefState, only : nw
-    use inputPhysics, only : equations
-    use flowVarRefState, only :  nw, nwf
-    use inputIteration, only : L2conv
-    use iteration, only : approxTotalIts, totalR0, stepMonitor, LinResMonitor, iterType
-    use utils, only : EChk
-    use killSignals, only : routineFailed
-    implicit none
+        ! Save the old rtol and norm for the next iteration
+        oldNorm = norm
+        rtolLast = rtol
 
-    ! Input Variables
-    logical, intent(in) :: firstCall
-
-    ! Working Variables
-    integer(kind=intType) :: iter, ierr, kspIterations
-    integer(kind=intType) :: maxNonLinearIts, nfevals, maxIt
-    real(kind=alwaysRealType) :: norm, rtol, atol
-    real(kind=alwaysrealType) :: fnorm, ynorm, gnorm
-    logical :: flag
-    real(kind=alwaysRealType) :: resHist(NK_subspace+1)
-
-    if (firstCall) then
-       call setupNKSolver()
-
-       ! Copy the adflow 'w' into the petsc wVec
-       call setwVec(wVec)
-
-       ! Evaluate the residual before we start and put the residual in
-       ! 'g', which is what would be the case after a linesearch.
-       call computeResidualNK(useUpdateIntermed = .False.)
-       call setRVec(rVec)
-       iter_k = 1
-       iter_m = 0
-    else
-       NK_iter = NK_iter + 1
-
-       ! Increment counter for the nonmonotne line serach
-       iter_k = iter_k + 1
-    end if
-
-    ! Compute the norm of rVec for use in EW Criteria
-    call VecNorm(rVec, NORM_2, norm, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Determine if if we need to form the Preconditioner
-    if (mod(NK_iter, NK_jacobianLag) == 0) then
-       NK_CFL = NK_CFL0 * (totalR0 / norm)**1.5
-       iterType = "     *NK"
-       call FormJacobianNK()
-    else
-
-       call MatAssemblyBegin(dRdw, MAT_FINAL_ASSEMBLY, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-       call MatAssemblyEnd(dRdw, MAT_FINAL_ASSEMBLY, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-       iterType = "      NK"
-    end if
-
-    if (NK_iter == 0 .or. .not. NK_useEW) then
-       rtol = NK_rtolInit
-    else
-       call getEWTol(norm, oldNorm, rtolLast, rtol)
-    end if
-
-    ! Save the old rtol and norm for the next iteration
-    oldNorm = norm
-    rtolLast = rtol
-
-    ! Set all tolerances for linear solver.
-
-    ! The 0.01 multiplier for atol requires some explanation:
-    ! The linear residual is roughly the same magnitude
-    ! as the nonlinear one at the start of the linear solution,
-    ! assuming the initial guess does not have a large effect
-    ! on the linear residual. KSPSolve exits when either the
-    ! rtol or atol is satisfied, which means that the atol
-    ! only comes into play near the end of the nonlinear solution.
-    ! We use atol because in the final Newton step when we are
-    ! close to the L2 target, we won't need to solve the linear
-    ! system tightly. For example, if we are one order of magnitude
-    ! away from the nonlinear solver target, then there is
-    ! no point in solving the linear system to 8 orders of
-    ! magnitude convergence in the linear residual. Instead,
-    ! we can stop early using atol. However, in very rare situations,
-    ! it can happen that the nonlinear residual is *just* above
-    ! the convergence criteria, while the linear residual is
-    ! *just* below. What happens is that the linear solver hits
-    ! the atol limit immediately, doesn't do anything, and then
-    ! the nonlinear convergence check can't do anything either.
-    ! By multiplying by 0.01, we make sure that the linear solver
-    ! actually has to do *something* and not just exit immediately.
+        ! Set all tolerances for linear solver.
+
+        ! The 0.01 multiplier for atol requires some explanation:
+        ! The linear residual is roughly the same magnitude
+        ! as the nonlinear one at the start of the linear solution,
+        ! assuming the initial guess does not have a large effect
+        ! on the linear residual. KSPSolve exits when either the
+        ! rtol or atol is satisfied, which means that the atol
+        ! only comes into play near the end of the nonlinear solution.
+        ! We use atol because in the final Newton step when we are
+        ! close to the L2 target, we won't need to solve the linear
+        ! system tightly. For example, if we are one order of magnitude
+        ! away from the nonlinear solver target, then there is
+        ! no point in solving the linear system to 8 orders of
+        ! magnitude convergence in the linear residual. Instead,
+        ! we can stop early using atol. However, in very rare situations,
+        ! it can happen that the nonlinear residual is *just* above
+        ! the convergence criteria, while the linear residual is
+        ! *just* below. What happens is that the linear solver hits
+        ! the atol limit immediately, doesn't do anything, and then
+        ! the nonlinear convergence check can't do anything either.
+        ! By multiplying by 0.01, we make sure that the linear solver
+        ! actually has to do *something* and not just exit immediately.
 
 #ifndef USE_COMPLEX
-    ! in the real mode, we set the atol slightly lower than the target L2 convergence
-    ! as explained in the comment block above
-    atol = totalR0*L2Conv*0.01_realType
+        ! in the real mode, we set the atol slightly lower than the target L2 convergence
+        ! as explained in the comment block above
+        atol = totalR0 * L2Conv * 0.01_realType
 #else
-    ! in complex mode, we want to tightly solve the linear system every time
-    ! because even though the real residuals converge, complex ones might (and do) lag
-    ! this approach makes sure that even with a converged real system, the linear solver
-    ! still converges the linear system tightly and this helps with the complex system convergence
-    atol = totalR0*L2Conv*1e-6_realType
+        ! in complex mode, we want to tightly solve the linear system every time
+        ! because even though the real residuals converge, complex ones might (and do) lag
+        ! this approach makes sure that even with a converged real system, the linear solver
+        ! still converges the linear system tightly and this helps with the complex system convergence
+        atol = totalR0 * L2Conv * 1e-6_realType
 #endif
 
-    maxIt = NK_subspace
+        maxIt = NK_subspace
 
-    call KSPSetTolerances(NK_KSP, real(rtol), &
-         real(atol), real(NK_divTol), maxIt, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call KSPSetTolerances(NK_KSP, real(rtol), &
+                              real(atol), real(NK_divTol), maxIt, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call KSPSetResidualHistory(NK_KSP, resHist, maxIt+1, PETSC_TRUE, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call KSPSetResidualHistory(NK_KSP, resHist, maxIt + 1, PETSC_TRUE, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! set the BaseVector of the matrix-free matrix
-    call formFunction_mf(ctx, wVec, baseRes, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    call MatMFFDSetBase(dRdW, wVec, baseRes, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! set the BaseVector of the matrix-free matrix
+        call formFunction_mf(ctx, wVec, baseRes, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        call MatMFFDSetBase(dRdW, wVec, baseRes, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Actually do the Linear Krylov Solve
-    call KSPSolve(NK_KSP, rVec, deltaW, ierr)
+        ! Actually do the Linear Krylov Solve
+        call KSPSolve(NK_KSP, rVec, deltaW, ierr)
 
-    ! DON'T just check the error. We want to catch error code 72
-    ! which is a floating point error. This is ok, we just reset and
-    ! keep going
-    if (ierr == 72) then
-       ! The convergence check will get the nan
-    else
-       call EChk(ierr, __FILE__, __LINE__)
-    end if
+        ! DON'T just check the error. We want to catch error code 72
+        ! which is a floating point error. This is ok, we just reset and
+        ! keep going
+        if (ierr == 72) then
+            ! The convergence check will get the nan
+        else
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
 
-    nfevals = 0
-    if (NK_LS == noLineSearch) then
-       call LSNone(wVec, rVec, g, deltaW, work, nfevals, flag, stepMonitor)
-    else if(NK_LS == cubicLineSearch) then
-       call LSCubic(wVec, rVec, g, deltaW, work, fnorm, ynorm, gnorm, &
-            nfevals, flag, stepMonitor)
-    else if (NK_LS == nonMonotoneLineSearch) then
-       iter_m = min(iter_m+1, mMax)
-       call LSNM(wVec, rVec, g, deltaW, work, fnorm, ynorm, gnorm, &
-            nfevals, flag, stepMonitor)
-    end if
+        nfevals = 0
+        if (NK_LS == noLineSearch) then
+            call LSNone(wVec, rVec, g, deltaW, work, nfevals, flag, stepMonitor)
+        else if (NK_LS == cubicLineSearch) then
+            call LSCubic(wVec, rVec, g, deltaW, work, fnorm, ynorm, gnorm, &
+                         nfevals, flag, stepMonitor)
+        else if (NK_LS == nonMonotoneLineSearch) then
+            iter_m = min(iter_m + 1, mMax)
+            call LSNM(wVec, rVec, g, deltaW, work, fnorm, ynorm, gnorm, &
+                      nfevals, flag, stepMonitor)
+        end if
 
-    if (.not. flag) then
-       routineFailed = .True.
-    end if
+        if (.not. flag) then
+            routineFailed = .True.
+        end if
 
-    ! Copy the work vector to wVec. This is our new state vector
-    call VecCopy(work, wVec, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Copy the work vector to wVec. This is our new state vector
+        call VecCopy(work, wVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Use the result from the line sesarch for the residual
-    call vecCopy(g, rVec, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Use the result from the line sesarch for the residual
+        call vecCopy(g, rVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Update the approximate iteration counter. The +nFevals is for the
-    ! iterations taken during the linesearch
+        ! Update the approximate iteration counter. The +nFevals is for the
+        ! iterations taken during the linesearch
 
-    call KSPGetIterationNumber(NK_KSP, kspIterations, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call KSPGetIterationNumber(NK_KSP, kspIterations, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    linResMonitor = resHist(kspIterations+1)/resHist(1)
+        linResMonitor = resHist(kspIterations + 1) / resHist(1)
+
+        approxTotalIts = approxTotalIts + nfEvals + kspIterations
+
+    end subroutine NKStep
+
+    subroutine LSCubic(x, f, g, y, w, fnorm, ynorm, gnorm, nfevals, flag, lambda)
+
+        use constants
+        use utils, only: EChk
+        use genericISNAN, only: myisnan
+        use communication, only: myid
+        use initializeFlow, only: setUniformFlow
+        use iteration, only: totalR0
+        implicit none
+
+        ! Input/Output
+        Vec x, f, g, y, w
+        !x    - current iterate
+        !f    - residual evaluated at x
+        !y    - search direction
+        !w    - work vector -> On output, new iterate
+        !g    - residual evaluated at new iterate y
+
+        real(kind=alwaysrealType) :: fnorm, gnorm, ynorm
+        real(kind=realType) :: alpha
+        logical :: flag
+        integer(kind=intType) :: nfevals
+        !   Note that for line search purposes we work with with the related
+        !   minimization problem:
+        !      min  z(x):  R^n -> R,
+        !   where z(x) = .5 * fnorm*fnorm, and fnorm = || f ||_2.
+        !
+
+        real(kind=realType) :: initslope, lambdaprev, gnormprev, a, b, d, t1, t2
+        real(kind=alwaysRealType) :: minlambda, lambda, lambdatemp
+        real(kind=alwaysRealType) :: rellength
+        integer(kind=intType) :: ierr, iter
+        real(kind=alwaysRealType) :: turbRes1, turbRes2, flowRes1, flowRes2, totalRes1, totalRes2
+        logical :: hadANan
+        ! Call to get the split norms
+        call setRVec(g, flowRes1, turbRes1, totalRes1)
+
+        ! Set some defaults:
+        alpha = 1.e-2_realType
+        minlambda = .01
+        nfevals = 0
+        flag = .True.
+        lambda = 1.0_realType
+        ! Compute the two norms we need:
+        call VecNorm(y, NORM_2, ynorm, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    approxTotalIts = approxTotalIts + nfEvals + kspIterations
+        call VecNorm(f, NORM_2, fnorm, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine NKStep
+        call MatMult(dRdw, y, w, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        nfevals = nfevals + 1
 
-  subroutine LSCubic(x, f, g, y, w, fnorm, ynorm, gnorm, nfevals, flag, lambda)
+        call VecDot(f, w, initslope, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use utils, only : EChk
-    use genericISNAN, only : myisnan
-    use communication, only : myid
-    use initializeFlow, only : setUniformFlow
-    use iteration, only : totalR0
-    implicit none
+        if (initslope > zero) then
+            initslope = -initslope
+        end if
 
-    ! Input/Output
-    Vec x, f, g, y, w
-    !x    - current iterate
-    !f    - residual evaluated at x
-    !y    - search direction
-    !w    - work vector -> On output, new iterate
-    !g    - residual evaluated at new iterate y
-
-    real(kind=alwaysrealType) :: fnorm, gnorm, ynorm
-    real(kind=realType) :: alpha
-    logical :: flag
-    integer(kind=intType) :: nfevals
-    !   Note that for line search purposes we work with with the related
-    !   minimization problem:
-    !      min  z(x):  R^n -> R,
-    !   where z(x) = .5 * fnorm*fnorm, and fnorm = || f ||_2.
-    !
-
-    real(kind=realType) :: initslope, lambdaprev, gnormprev, a, b, d, t1, t2
-    real(kind=alwaysRealType) :: minlambda, lambda, lambdatemp
-    real(kind=alwaysRealType) :: rellength
-    integer(kind=intType) :: ierr, iter
-    real(kind=alwaysRealType) :: turbRes1, turbRes2, flowRes1, flowRes2, totalRes1, totalRes2
-    logical :: hadANan
-    ! Call to get the split norms
-    call setRVec(g, flowRes1, turbRes1, totalRes1)
-
-    ! Set some defaults:
-    alpha = 1.e-2_realType
-    minlambda     = .01
-    nfevals = 0
-    flag = .True.
-    lambda     = 1.0_realType
-    ! Compute the two norms we need:
-    call VecNorm(y, NORM_2, ynorm, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call VecNorm(f, NORM_2, fnorm, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call MatMult(dRdw, y, w, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    nfevals = nfevals + 1
-
-    call VecDot(f, w, initslope, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    if (initslope > zero)  then
-       initslope = -initslope
-    end if
-
-    if (initslope == 0.0_realType) then
-       initslope = -1.0_realType
-    end if
+        if (initslope == 0.0_realType) then
+            initslope = -1.0_realType
+        end if
 #ifdef USE_COMPLEX
-    call VecWAXPY(w, cmplx(-lambda, 0.0), y, x, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecWAXPY(w, cmplx(-lambda, 0.0), y, x, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 #else
-    call VecWAXPY(w, -lambda, y, x, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecWAXPY(w, -lambda, y, x, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 #endif
 
-    ! Compute Function:
-    call setW(w)
-    call computeResidualNK(useUpdateIntermed = .True.)
-    call setRVec(g, flowRes2, turbRes2, gnorm)
-
-    nfevals = nfevals + 1
-
-    ! Before we get to the actual line search we do two additional
-    ! checks:
-
-    ! 1. If the full step has a nan, we backtrack until we get a valid
-    ! step. We then lower the NK switch tol such that the solver is
-    ! forced back up to ANK or DADI/RK to keep going a bit further.
-    !
-    ! 2. If the turbulence residual goes up by large-ish factor (2.0),
-    ! we pre-limit the step. The reason for this is that a unit step
-    ! might lower the total residual, but the turb res could go up an
-    ! order of magnitude or more.
-
-    hadANan = .False.
-    if (myisnan(gnorm) .or. turbRes2 > 2.0*turbRes1) then
-       ! Special testing for nans
-
-       if (myisnan(gnorm)) then
-          hadANan = .True.
-          call setUniformFlow()
-          lambda = 0.5
-       else
-          ! Large turb jump
-          lambda = lambda * (turbRes1 / turbRes2)
-          lambda = max(lambda, 0.1)
-       end if
-
-       backtrack: do iter=1, 10
-          ! Compute new x value:
+        ! Compute Function:
+        call setW(w)
+        call computeResidualNK(useUpdateIntermed=.True.)
+        call setRVec(g, flowRes2, turbRes2, gnorm)
+
+        nfevals = nfevals + 1
+
+        ! Before we get to the actual line search we do two additional
+        ! checks:
+
+        ! 1. If the full step has a nan, we backtrack until we get a valid
+        ! step. We then lower the NK switch tol such that the solver is
+        ! forced back up to ANK or DADI/RK to keep going a bit further.
+        !
+        ! 2. If the turbulence residual goes up by large-ish factor (2.0),
+        ! we pre-limit the step. The reason for this is that a unit step
+        ! might lower the total residual, but the turb res could go up an
+        ! order of magnitude or more.
+
+        hadANan = .False.
+        if (myisnan(gnorm) .or. turbRes2 > 2.0 * turbRes1) then
+            ! Special testing for nans
+
+            if (myisnan(gnorm)) then
+                hadANan = .True.
+                call setUniformFlow()
+                lambda = 0.5
+            else
+                ! Large turb jump
+                lambda = lambda * (turbRes1 / turbRes2)
+                lambda = max(lambda, 0.1)
+            end if
+
+            backtrack: do iter = 1, 10
+                ! Compute new x value:
 #ifdef USE_COMPLEX
-          call VecWAXPY(w, cmplx(-lambda, 0.0), y, x, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
+                call VecWAXPY(w, cmplx(-lambda, 0.0), y, x, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 #else
-          call VecWAXPY(w, -lambda, y, x, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
+                call VecWAXPY(w, -lambda, y, x, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 #endif
 
-          ! Compute Function
-          call setW(w)
-          call computeResidualNK(useUpdateIntermed = .True.)
-          call setRVec(g, flowRes2, turbRes2, gnorm)
-
-          nfevals = nfevals + 1
-
-          if (myisnan(gnorm)) then
-             ! Just reset the flow, adjust the step back and keep
-             ! going
-             call setUniformFlow()
-             lambda = lambda * .5
-          else
-
-             ! Sufficient reduction! Whoo! This is great we're done!
-             if (0.5_realType*gnorm*gnorm <= 0.5_realType*fnorm*fnorm + alpha*initslope) then
-                exit
-             end if
-
-             ! If we're less than min lambda, just take it. This could
-             ! let the residual go up slightly. That's ok.
-             if (lambda < minlambda)  then
-                exit
-             end if
-
-             ! Otherwise, cut back the lambda
-             lambda = lambda * 0.5
-
-          end if
-       end do backtrack
-
-       if (hadANan) then
-          ! Adjust the NK switch tolerance such that the ANK or DADI
-          ! goes a little further.
-          nk_switchtol = 0.8*(gnorm/totalR0)
-       end if
-
-       ! All finished with this "pre" line search.
-       return
-    end if
-
-    ! Sufficient reduction from the basic step. This is the return for
-    ! a unit step. This is what we want.
-    if (0.5_realType*gnorm*gnorm <= 0.5_realType*fnorm*fnorm + alpha*initslope) then
-       goto 100
-    end if
-
-    ! Fit points with quadratic
-    lambda     = 1.0_realType
-    lambdatemp = -initslope/(gnorm*gnorm - fnorm*fnorm - 2.0_realType*initslope)
-    lambdaprev = lambda
-    gnormprev  = gnorm
-    if (lambdatemp > 0.5_realType*lambda) then
-       lambdatemp = 0.5_realType*lambda
-    end if
-
-    if (lambdatemp <= .1_realType*lambda) then
-       lambda = .1_realType*lambda
-    else
-       lambda = lambdatemp
-    end if
+                ! Compute Function
+                call setW(w)
+                call computeResidualNK(useUpdateIntermed=.True.)
+                call setRVec(g, flowRes2, turbRes2, gnorm)
+
+                nfevals = nfevals + 1
+
+                if (myisnan(gnorm)) then
+                    ! Just reset the flow, adjust the step back and keep
+                    ! going
+                    call setUniformFlow()
+                    lambda = lambda*.5
+                else
+
+                    ! Sufficient reduction! Whoo! This is great we're done!
+                    if (0.5_realType * gnorm * gnorm <= 0.5_realType * fnorm * fnorm + alpha * initslope) then
+                        exit
+                    end if
+
+                    ! If we're less than min lambda, just take it. This could
+                    ! let the residual go up slightly. That's ok.
+                    if (lambda < minlambda) then
+                        exit
+                    end if
+
+                    ! Otherwise, cut back the lambda
+                    lambda = lambda * 0.5
+
+                end if
+            end do backtrack
+
+            if (hadANan) then
+                ! Adjust the NK switch tolerance such that the ANK or DADI
+                ! goes a little further.
+                nk_switchtol = 0.8 * (gnorm / totalR0)
+            end if
+
+            ! All finished with this "pre" line search.
+            return
+        end if
+
+        ! Sufficient reduction from the basic step. This is the return for
+        ! a unit step. This is what we want.
+        if (0.5_realType * gnorm * gnorm <= 0.5_realType * fnorm * fnorm + alpha * initslope) then
+            goto 100
+        end if
+
+        ! Fit points with quadratic
+        lambda = 1.0_realType
+        lambdatemp = -initslope / (gnorm * gnorm - fnorm * fnorm - 2.0_realType * initslope)
+        lambdaprev = lambda
+        gnormprev = gnorm
+        if (lambdatemp > 0.5_realType * lambda) then
+            lambdatemp = 0.5_realType * lambda
+        end if
+
+        if (lambdatemp <= .1_realType * lambda) then
+            lambda = .1_realType * lambda
+        else
+            lambda = lambdatemp
+        end if
 
 #ifdef USE_COMPLEX
-    call VecWAXPY(w, -cmplx(lambda, 0.0), y, x, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecWAXPY(w, -cmplx(lambda, 0.0), y, x, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 #else
-    call VecWAXPY(w, -lambda, y, x, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecWAXPY(w, -lambda, y, x, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 #endif
 
-    ! Compute new function again:
-    call setW(w)
-    call computeResidualNK(useUpdateIntermed = .True.)
-    call setRVec(g)
-
-    nfevals = nfevals + 1
-
-    call VecNorm(g, NORM_2, gnorm, ierr)
-    if (ierr == PETSC_ERR_FP) then
-       flag = .False.
-       return
-    end if
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Sufficient reduction
-    if (0.5_realType*gnorm*gnorm <= 0.5_realType*fnorm*fnorm + lambda*alpha*initslope) then
-       goto 100
-    end if
-
-    ! Fit points with cubic
-    cubic_loop: do while (.True.)
-
-       if (lambda <= minlambda) then
-          exit cubic_loop
-       end if
-       t1 = 0.5_realType*(gnorm*gnorm - fnorm*fnorm) - lambda*initslope
-       t2 = 0.5_realType*(gnormprev*gnormprev  - fnorm*fnorm) - lambdaprev*initslope
-
-       a  = (t1/(lambda*lambda) - t2/(lambdaprev*lambdaprev))/(lambda-lambdaprev)
-       b  = (-lambdaprev*t1/(lambda*lambda) + lambda*t2/(lambdaprev*lambdaprev))/(lambda-lambdaprev)
-       d  = b*b - three*a*initslope
-       if (d < 0.0_realType) then
-          d = 0.0_realType
-       end if
-
-       if (a == 0.0_realType) then
-          lambdatemp = -initslope/(2.0_realType*b)
-       else
-          lambdatemp = (-b + sqrt(d))/(3.0_realType*a)
-       end if
-
-       lambdaprev = lambda
-       gnormprev  = gnorm
-
-       if (lambdatemp > 0.5_realType*lambda)  then
-          lambdatemp = 0.5_realType*lambda
-       end if
-       if (lambdatemp <= .1_realType*lambda) then
-          lambda = .1_realType*lambda
-       else
-          lambda = lambdatemp
-       end if
-
-       if (myisnan(lambda)) then
-          flag = .False.
-          exit cubic_loop
-       end if
+        ! Compute new function again:
+        call setW(w)
+        call computeResidualNK(useUpdateIntermed=.True.)
+        call setRVec(g)
+
+        nfevals = nfevals + 1
+
+        call VecNorm(g, NORM_2, gnorm, ierr)
+        if (ierr == PETSC_ERR_FP) then
+            flag = .False.
+            return
+        end if
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Sufficient reduction
+        if (0.5_realType * gnorm * gnorm <= 0.5_realType * fnorm * fnorm + lambda * alpha * initslope) then
+            goto 100
+        end if
+
+        ! Fit points with cubic
+        cubic_loop: do while (.True.)
+
+            if (lambda <= minlambda) then
+                exit cubic_loop
+            end if
+            t1 = 0.5_realType * (gnorm * gnorm - fnorm * fnorm) - lambda * initslope
+            t2 = 0.5_realType * (gnormprev * gnormprev - fnorm * fnorm) - lambdaprev * initslope
+
+            a = (t1 / (lambda * lambda) - t2 / (lambdaprev * lambdaprev)) / (lambda - lambdaprev)
+            b = (-lambdaprev * t1 / (lambda * lambda) + lambda * t2 / (lambdaprev * lambdaprev)) / (lambda - lambdaprev)
+            d = b * b - three * a * initslope
+            if (d < 0.0_realType) then
+                d = 0.0_realType
+            end if
+
+            if (a == 0.0_realType) then
+                lambdatemp = -initslope / (2.0_realType * b)
+            else
+                lambdatemp = (-b + sqrt(d)) / (3.0_realType * a)
+            end if
+
+            lambdaprev = lambda
+            gnormprev = gnorm
+
+            if (lambdatemp > 0.5_realType * lambda) then
+                lambdatemp = 0.5_realType * lambda
+            end if
+            if (lambdatemp <= .1_realType * lambda) then
+                lambda = .1_realType * lambda
+            else
+                lambda = lambdatemp
+            end if
+
+            if (myisnan(lambda)) then
+                flag = .False.
+                exit cubic_loop
+            end if
 
 #ifdef USE_COMPLEX
-       call VecWAXPY(w, cmplx(-lambda, 0.0), y, x, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecWAXPY(w, cmplx(-lambda, 0.0), y, x, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 #else
-       call VecWAXPY(w, -lambda, y, x, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecWAXPY(w, -lambda, y, x, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 #endif
-       ! Compute new function again:
-       call setW(w)
-       call computeResidualNK(useUpdateIntermed = .True.)
-       call setRVec(g)
-       nfevals = nfevals + 1
+            ! Compute new function again:
+            call setW(w)
+            call computeResidualNK(useUpdateIntermed=.True.)
+            call setRVec(g)
+            nfevals = nfevals + 1
 
-       call VecNorm(g, NORM_2, gnorm, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecNorm(g, NORM_2, gnorm, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Is reduction enough?
-       if (0.5_realType*gnorm*gnorm <= 0.5_realType*fnorm*fnorm + lambda*alpha*initslope) then
-          exit cubic_loop
-       end if
-    end do cubic_loop
+            ! Is reduction enough?
+            if (0.5_realType * gnorm * gnorm <= 0.5_realType * fnorm * fnorm + lambda * alpha * initslope) then
+                exit cubic_loop
+            end if
+        end do cubic_loop
+
+100     continue
+
+    end subroutine LSCubic
+
+    subroutine LSNone(x, f, g, y, w, nfevals, flag, step)
+
+        use constants
+        use utils, only: EChk
+        use communication
+        implicit none
+
+        ! Input/Output
+        Vec x, f, g, y, w
+        !x    - current iterate
+        !f    - residual evaluated at x
+        !y    - search direction
+        !w    - work vector -> On output, new iterate
+        !g    - residual evaluated at new iterate y
+
+        integer(kind=intType) :: nfevals
+        integer(kind=intType) :: ierr
+        logical :: flag
+        real(kind=alwaysRealType) :: step
+        real(kind=realType) :: tmp
+        flag = .True.
+        ! We just accept the step and compute the new residual at the new iterate
+        nfevals = 0
+        step = nk_fixedStep
+        tmp = -step
+
+        call VecWAXPY(w, tmp, y, x, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-100 continue
+        ! Compute new function:
+        call setW(w)
+        call computeResidualNK(useUpdateIntermed=.True.)
+        call setRVec(g)
+
+        nfevals = nfevals + 1
+    end subroutine LSNone
+
+    subroutine LSNM(x, f, g, y, w, fnorm, ynorm, gnorm, nfevals, flag, step)
+
+        use constants
+        use utils, only: EChk
+        implicit none
+
+        ! Input/Output
+        Vec x, f, g, y, w
+        !x    - current iterate
+        !f    - residual evaluated at x
+        !y    - search direction
+        !w    - work vector -> On output, new iterate
+        !g    - residual evaluated at new iterate y
+
+        real(kind=alwaysRealType) :: fnorm, gnorm, ynorm
+        real(kind=realType) :: alpha
+        real(kind=alwaysRealType) :: step
+        logical :: flag
+        integer(kind=intType) :: nfevals
+        !   Note that for line search purposes we work with with the related
+        !   minimization problem:
+        !      min  z(x):  R^n -> R,
+        !   where z(x) = .5 * fnorm*fnorm, and fnorm = || f ||_2.
+        !
+        real(kind=realType) :: initslope, gamma, sigma, max_val
+        integer(kind=intType) :: ierr, iter, j
+
+        ! Set some defaults:
+        gamma = 1e-3_realType
+        sigma = 0.5_realType
+
+        nfevals = 0
+        flag = .True.
+
+        ! Compute the two norms we need:
+        call VecNorm(y, NORM_2, ynorm, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine LSCubic
+        call VecNorm(f, NORM_2, fnorm, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  subroutine LSNone(x, f, g, y, w, nfevals, flag, step)
+        NKLSFuncEvals(iter_k) = 0.5_realType * fnorm * fnorm
 
-    use constants
-    use utils, only : EChk
-    use communication
-    implicit none
+        call MatMult(dRdw, y, w, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        nfevals = nfevals + 1
 
-    ! Input/Output
-    Vec x, f, g, y, w
-    !x    - current iterate
-    !f    - residual evaluated at x
-    !y    - search direction
-    !w    - work vector -> On output, new iterate
-    !g    - residual evaluated at new iterate y
-
-    integer(kind=intType) :: nfevals
-    integer(kind=intType) :: ierr
-    logical :: flag
-    real(kind=alwaysRealType) :: step
-    real(kind=realType) :: tmp
-    flag = .True.
-    ! We just accept the step and compute the new residual at the new iterate
-    nfevals = 0
-    step = nk_fixedStep
-    tmp = -step
-
-    call VecWAXPY(w, tmp, y, x, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Compute new function:
-    call setW(w)
-    call computeResidualNK(useUpdateIntermed = .True.)
-    call setRVec(g)
-
-    nfevals = nfevals + 1
-  end subroutine LSNone
-
-  subroutine LSNM(x, f, g, y, w, fnorm, ynorm, gnorm, nfevals, flag, step)
+        call VecDot(f, w, initslope, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use utils, only : EChk
-    implicit none
+        if (initslope > 0.0_realType) then
+            initslope = -initslope
+        end if
 
-    ! Input/Output
-    Vec x, f, g, y, w
-    !x    - current iterate
-    !f    - residual evaluated at x
-    !y    - search direction
-    !w    - work vector -> On output, new iterate
-    !g    - residual evaluated at new iterate y
-
-    real(kind=alwaysRealType) :: fnorm, gnorm, ynorm
-    real(kind=realType) :: alpha
-    real(kind=alwaysRealType) :: step
-    logical :: flag
-    integer(kind=intType) :: nfevals
-    !   Note that for line search purposes we work with with the related
-    !   minimization problem:
-    !      min  z(x):  R^n -> R,
-    !   where z(x) = .5 * fnorm*fnorm, and fnorm = || f ||_2.
-    !
-    real(kind=realType) :: initslope, gamma, sigma,  max_val
-    integer(kind=intType) :: ierr, iter, j
-
-    ! Set some defaults:
-    gamma = 1e-3_realType
-    sigma = 0.5_realType
-
-    nfevals = 0
-    flag = .True.
-
-    ! Compute the two norms we need:
-    call VecNorm(y, NORM_2, ynorm, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call VecNorm(f, NORM_2, fnorm, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    NKLSFuncEvals(iter_k) = 0.5_realType*fnorm*fnorm
-
-    call MatMult(dRdw, y, w, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    nfevals = nfevals + 1
-
-    call VecDot(f, w, initslope, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    if (initslope > 0.0_realType)  then
-       initslope = -initslope
-    end if
-
-    if (initslope == 0.0_realType) then
-       initslope = -1.0_realType
-    end if
-
-    alpha = 1.0 ! Initial step length:
-    backtrack: do iter=1, 10
-
-       ! Compute new x value:
-       call VecWAXPY(w, -alpha, y, x, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Compute Function @ new x (w is the work vector
-       call setW(w)
-       call computeResidualNK(useUpdateIntermed = .True.)
-       call setRVec(g)
-       nfevals = nfevals + 1
-
-       ! Compute the norm at the new trial location
-       call VecNorm(g, NORM_2, gnorm, ierr)
-       if (ierr == PETSC_ERR_FP) then ! Error code 72 floating point error
-          ! Just apply the step limit and keep going (back to the loop start)
-          alpha = alpha * sigma
-       else
-          call EChk(ierr, __FILE__, __LINE__)
-
-          max_val = NKLSFuncEvals(iter_k) + alpha*gamma*initSlope
-
-          ! Loop over the previous, m function values and find the max:
-          do j=iter_k-1, iter_k-iter_m+1, -1
-             max_val = max(max_val, NKLSFuncEvals(j) + alpha*gamma*initSlope)
-          end do
-
-          ! Sufficient reduction
-          if (0.5_realType*gnorm*gnorm <= max_val) then
-             exit backtrack
-          else
-             alpha = alpha * sigma
-          end if
-       end if
-    end do backtrack
-    step = alpha
-  end subroutine LSNM
-
-  subroutine computeResidualNK(useUpdateIntermed)
+        if (initslope == 0.0_realType) then
+            initslope = -1.0_realType
+        end if
 
-    use constants
-    use blockette, only : blocketteRes
-    implicit none
+        alpha = 1.0 ! Initial step length:
+        backtrack: do iter = 1, 10
 
-    logical, intent(in), optional :: useUpdateIntermed
-    logical :: updateIntermed
+            ! Compute new x value:
+            call VecWAXPY(w, -alpha, y, x, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    ! Only update the time step if explicitly requested
-    updateIntermed = .false.
+            ! Compute Function @ new x (w is the work vector
+            call setW(w)
+            call computeResidualNK(useUpdateIntermed=.True.)
+            call setRVec(g)
+            nfevals = nfevals + 1
+
+            ! Compute the norm at the new trial location
+            call VecNorm(g, NORM_2, gnorm, ierr)
+            if (ierr == PETSC_ERR_FP) then ! Error code 72 floating point error
+                ! Just apply the step limit and keep going (back to the loop start)
+                alpha = alpha * sigma
+            else
+                call EChk(ierr, __FILE__, __LINE__)
 
-    if (present(useUpdateIntermed)) then
-      updateIntermed = useUpdateIntermed
-    end if
+                max_val = NKLSFuncEvals(iter_k) + alpha * gamma * initSlope
 
-    ! Shell function to maintain backward compatibility with code using computeResidualNK
-    call blocketteRes(useUpdateIntermed = updateIntermed)
+                ! Loop over the previous, m function values and find the max:
+                do j = iter_k - 1, iter_k - iter_m + 1, -1
+                    max_val = max(max_val, NKLSFuncEvals(j) + alpha * gamma * initSlope)
+                end do
 
-  end subroutine computeResidualNK
+                ! Sufficient reduction
+                if (0.5_realType * gnorm * gnorm <= max_val) then
+                    exit backtrack
+                else
+                    alpha = alpha * sigma
+                end if
+            end if
+        end do backtrack
+        step = alpha
+    end subroutine LSNM
 
-  subroutine applyPC(in_vec, out_vec, ndof)
+    subroutine computeResidualNK(useUpdateIntermed)
 
-    ! Apply the NK PC to the in_vec. This subroutine is ONLY used as a
-    ! preconditioner for a global Aero-Structural Newton-Krylov Method
+        use constants
+        use blockette, only: blocketteRes
+        implicit none
 
-    use constants
-    use utils, only : EChk
+        logical, intent(in), optional :: useUpdateIntermed
+        logical :: updateIntermed
 
-    implicit none
+        ! Only update the time step if explicitly requested
+        updateIntermed = .false.
 
-    ! Input/Output
-    integer(kind=intType) :: ndof
-    real(kind=realType), dimension(ndof), intent(in)    :: in_vec
-    real(kind=realTYpe), dimension(ndof), intent(inout) :: out_vec
+        if (present(useUpdateIntermed)) then
+            updateIntermed = useUpdateIntermed
+        end if
 
-    ! Working Variables
-    integer(kind=intType) :: ierr
+        ! Shell function to maintain backward compatibility with code using computeResidualNK
+        call blocketteRes(useUpdateIntermed=updateIntermed)
 
-    ! Setup the NKsolver if not already done so
-    if (.not. NK_solverSetup) then
-       call setupNKSolver
-    end if
+    end subroutine computeResidualNK
 
-    ! We possibly need to re-form the jacobian
-    if (mod(NK_iter, NK_jacobianLag) == 0) then
-       call FormJacobianNK()
-    end if
+    subroutine applyPC(in_vec, out_vec, ndof)
 
-    ! Place the two arrays into two vectos. We reuse 'work' and 'g'.
-    call VecPlaceArray(work, in_vec, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Apply the NK PC to the in_vec. This subroutine is ONLY used as a
+        ! preconditioner for a global Aero-Structural Newton-Krylov Method
 
-    call VecPlaceArray(g, out_vec, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        use constants
+        use utils, only: EChk
 
-    ! Set the base vec
-    call setwVec(wVec)
+        implicit none
 
-    ! Set the base vec
-    call setwVec(wVec)
-    call formFunction_mf(ctx, wVec, baseRes, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    call MatMFFDSetBase(dRdW, wVec, baseRes, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    ! This needs to be a bit better...
-    call KSPSetTolerances(NK_KSP, 1e-8, 1e-16, 10.0, &
-         applyPCSubSpaceSize, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Input/Output
+        integer(kind=intType) :: ndof
+        real(kind=realType), dimension(ndof), intent(in) :: in_vec
+        real(kind=realTYpe), dimension(ndof), intent(inout) :: out_vec
 
-    ! Actually do the Linear Krylov Solve
-    call KSPSolve(NK_KSP, work, g, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Working Variables
+        integer(kind=intType) :: ierr
 
-    ! Reset the array pointers:
-    call VecResetArray(work, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Setup the NKsolver if not already done so
+        if (.not. NK_solverSetup) then
+            call setupNKSolver
+        end if
 
-    call VecResetArray(g, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! We possibly need to re-form the jacobian
+        if (mod(NK_iter, NK_jacobianLag) == 0) then
+            call FormJacobianNK()
+        end if
 
-    NK_iter = NK_iter + 1
+        ! Place the two arrays into two vectos. We reuse 'work' and 'g'.
+        call VecPlaceArray(work, in_vec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine applyPC
+        call VecPlaceArray(g, out_vec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  subroutine applyAdjointPC(in_vec, out_vec, ndof)
+        ! Set the base vec
+        call setwVec(wVec)
 
-    ! Apply the Adjoint PC to the in_vec. This subroutine is ONLY used as a
-    ! preconditioner for a global Aero-Structural Krylov Method
+        ! Set the base vec
+        call setwVec(wVec)
+        call formFunction_mf(ctx, wVec, baseRes, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        call MatMFFDSetBase(dRdW, wVec, baseRes, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        ! This needs to be a bit better...
+        call KSPSetTolerances(NK_KSP, 1e-8, 1e-16, 10.0, &
+                              applyPCSubSpaceSize, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use ADjointPETSc, only : adjointKSP, KSP_NORM_NONE, PETSC_DEFAULT_REAL, &
-         psi_like1, psi_like2
-    use inputAdjoint, only : applyAdjointPCSubSpaceSize
-    use utils, only : EChk
-    implicit none
+        ! Actually do the Linear Krylov Solve
+        call KSPSolve(NK_KSP, work, g, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Input/Output
-    integer(kind=intType) :: ndof
-    real(kind=realType), dimension(ndof), intent(in)    :: in_vec
-    real(kind=realTYpe), dimension(ndof), intent(inout) :: out_vec
+        ! Reset the array pointers:
+        call VecResetArray(work, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Working Variables
-    integer(kind=intType) :: ierr
+        call VecResetArray(g, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Hijack adjoint and adjointRes with in_vec and out_vec
-    call VecPlaceArray(psi_like1, in_vec, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        NK_iter = NK_iter + 1
 
-    call VecPlaceArray(psi_like2, out_vec, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+    end subroutine applyPC
 
-    ! Set KSP_NORM Type to none. Implictly turns off convergence
-    ! check. Since we just want to run a fixed number of iterations this
-    ! is fine. The should be set regardless of the KSPType.
+    subroutine applyAdjointPC(in_vec, out_vec, ndof)
 
-    call KSPSetNormType(adjointKSP, KSP_NORM_NONE, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Apply the Adjoint PC to the in_vec. This subroutine is ONLY used as a
+        ! preconditioner for a global Aero-Structural Krylov Method
 
-    ! This needs to be a bit better...
-    call KSPSetTolerances(adjointKSP, PETSC_DEFAULT_REAL, &
-         PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, &
-         applyAdjointPCSubSpaceSize, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        use constants
+        use ADjointPETSc, only: adjointKSP, KSP_NORM_NONE, PETSC_DEFAULT_REAL, &
+                                psi_like1, psi_like2
+        use inputAdjoint, only: applyAdjointPCSubSpaceSize
+        use utils, only: EChk
+        implicit none
 
-    ! Actually do the Linear Krylov Solve
-    call KSPSolve(adjointKSP, psi_like1, psi_like2, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Input/Output
+        integer(kind=intType) :: ndof
+        real(kind=realType), dimension(ndof), intent(in) :: in_vec
+        real(kind=realTYpe), dimension(ndof), intent(inout) :: out_vec
 
-    ! Reset the array pointers:
-    call VecResetArray(psi_like1, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Working Variables
+        integer(kind=intType) :: ierr
 
-    call VecResetArray(psi_like2, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Hijack adjoint and adjointRes with in_vec and out_vec
+        call VecPlaceArray(psi_like1, in_vec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine applyAdjointPC
+        call VecPlaceArray(psi_like2, out_vec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  subroutine setWVec(wVec)
+        ! Set KSP_NORM Type to none. Implictly turns off convergence
+        ! check. Since we just want to run a fixed number of iterations this
+        ! is fine. The should be set regardless of the KSPType.
 
-    ! Set the current residual in dw into the PETSc Vector
+        call KSPSetNormType(adjointKSP, KSP_NORM_NONE, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, w
-    use inputtimespectral, only : ntimeIntervalsSpectral
-    use flowvarrefstate, only : nw
-    use utils, only : setPointers, EChk
-    implicit none
+        ! This needs to be a bit better...
+        call KSPSetTolerances(adjointKSP, PETSC_DEFAULT_REAL, &
+                              PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, &
+                              applyAdjointPCSubSpaceSize, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    Vec   wVec
-    integer(kind=intType) :: ierr,nn,sps,i,j,k,l,ii
-    real(kind=realType),pointer :: wvec_pointer(:)
-
-    call VecGetArrayF90(wVec,wvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-    ii = 1
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn,1_intType,sps)
-          ! Copy off w to wVec
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   do l=1,nw
-                      wvec_pointer(ii) = w(i,j,k,l)
-                      ii = ii + 1
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
+        ! Actually do the Linear Krylov Solve
+        call KSPSolve(adjointKSP, psi_like1, psi_like2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecRestoreArrayF90(wVec,wvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        ! Reset the array pointers:
+        call VecResetArray(psi_like1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine setWVec
+        call VecResetArray(psi_like2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  subroutine setRVec(rVec, flowRes, turbRes, totalRes)
+    end subroutine applyAdjointPC
 
-    ! Set the current residual in dw into the PETSc Vector
-    use constants
-    use blockPointers, only : nDom, volRef, il, jl, kl, dw
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use flowvarrefstate, only : nw, nwf, nt1, nt2
-    use inputIteration, only : turbResScale
-    use utils, only : setPointers, EChk
-    use communication, only : adflow_comm_world
-    implicit none
+    subroutine setWVec(wVec)
 
-    Vec    rVec
-    integer(kind=intType) :: ierr,nn,sps,i,j,k,l,ii
-    real(kind=realType),pointer :: rvec_pointer(:)
-    real(Kind=realType) :: ovv
-    real(kind=alwaysRealType), intent(out), optional :: flowRes, turbRes, totalRes
-    real(kind=realType) :: tmp, tmp2(2), flowResLocal, turbResLocal
-
-    flowResLocal = zero
-    turbResLocal = zero
-
-    call VecGetArrayF90(rVec,rvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-    ii = 1
-
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn,1_intType,sps)
-          ! Copy off dw/vol to rVec
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   ovv = 1/volRef(i, j, k)
-                   do l=1,nwf
-                      tmp =  dw(i, j, k, l)*ovv
-                      rvec_pointer(ii) = tmp
-                      ii = ii + 1
-                      flowResLocal = flowResLocal + tmp**2
-                   end do
-                   do l=nt1,nt2
-                      tmp = dw(i, j, k, l)*ovv*turbResScale(l-nt1+1)
-                      rvec_pointer(ii) = tmp
-                      ii = ii + 1
-                      turbResLocal = turbResLocal + tmp**2
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
-
-    call VecRestoreArrayF90(rVec,rvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    if (present(flowRes) .and. present(turbRes) .and. present(totalRes)) then
-       call mpi_allreduce((/flowResLocal, turbResLocal/), tmp2, 2, adflow_real, &
-            mpi_sum, ADflow_comm_world, ierr)
-       flowRes = sqrt(tmp2(1))
-       totalRes = sqrt(tmp2(1) + tmp2(2))
-       if (tmp2(2) > zero) then
-          turbRes = sqrt(tmp2(2))
-       else
-          turbRes = zero
-       end if
-    end if
-
-  end subroutine setRVec
-
-  subroutine setW(wVec)
+        ! Set the current residual in dw into the PETSc Vector
 
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, w
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowVarRefState, only : nwf, nt1, nt2, winf
-    use utils, only : setPointers, EChk
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, w
+        use inputtimespectral, only: ntimeIntervalsSpectral
+        use flowvarrefstate, only: nw
+        use utils, only: setPointers, EChk
+        implicit none
 
-    implicit none
+        Vec wVec
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
+        real(kind=realType), pointer :: wvec_pointer(:)
 
-    Vec  wVec
-    integer(kind=intType) :: ierr,nn,sps,i,j,k,l,ii
-    real(kind=realType),pointer :: wvec_pointer(:)
-
-
-    call VecGetArrayReadF90(wVec,wvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ii = 1
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn,1_intType,sps)
-
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   do l=1,nwf
-                      w(i,j,k,l) = wvec_pointer(ii)
-                      ii = ii + 1
-                   end do
-                   ! Clip the turb to prevent negative turb SA
-                   ! values. This is similar to the pressure
-                   ! clip. Need to check this for other Turb models.
-                   do l=nt1, nt2
-                      w(i, j, k, l) = max(1e-6*winf(l), wvec_pointer(ii))
-                      ii = ii + 1
-                   end do
+        call VecGetArrayF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        ii = 1
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+                ! Copy off w to wVec
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nw
+                                wvec_pointer(ii) = w(i, j, k, l)
+                                ii = ii + 1
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
+            end do
+        end do
 
-    call VecRestoreArrayReadF90(wVec,wvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecRestoreArrayF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine setW
+    end subroutine setWVec
 
-  subroutine getStates(states,ndimw)
-    ! Return the state vector, w to Python
+    subroutine setRVec(rVec, flowRes, turbRes, totalRes)
 
-    use constants
-    use blockPointers, only : il, jl, kl, nDom, w
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowvarrefstate, only : nw
-    use utils, only : setPointers
+        ! Set the current residual in dw into the PETSc Vector
+        use constants
+        use blockPointers, only: nDom, volRef, il, jl, kl, dw
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use flowvarrefstate, only: nw, nwf, nt1, nt2
+        use inputIteration, only: turbResScale
+        use utils, only: setPointers, EChk
+        use communication, only: adflow_comm_world
+        implicit none
 
-    implicit none
+        Vec rVec
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
+        real(kind=realType), pointer :: rvec_pointer(:)
+        real(Kind=realType) :: ovv
+        real(kind=alwaysRealType), intent(out), optional :: flowRes, turbRes, totalRes
+        real(kind=realType) :: tmp, tmp2(2), flowResLocal, turbResLocal
+
+        flowResLocal = zero
+        turbResLocal = zero
 
-    integer(kind=intType),intent(in):: ndimw
-    real(kind=realType),dimension(ndimw),intent(out) :: states(ndimw)
-
-    ! Local Variables
-    integer(kind=intType) :: nn,i,j,k,l,counter,sps
-
-    counter = 0
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn,1,sps)
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   do l=1,nw
-                      counter = counter + 1
-                      states(counter) = w(i,j,k,l)
-                   end do
+        call VecGetArrayF90(rVec, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        ii = 1
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+                ! Copy off dw/vol to rVec
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            ovv = 1 / volRef(i, j, k)
+                            do l = 1, nwf
+                                tmp = dw(i, j, k, l) * ovv
+                                rvec_pointer(ii) = tmp
+                                ii = ii + 1
+                                flowResLocal = flowResLocal + tmp**2
+                            end do
+                            do l = nt1, nt2
+                                tmp = dw(i, j, k, l) * ovv * turbResScale(l - nt1 + 1)
+                                rvec_pointer(ii) = tmp
+                                ii = ii + 1
+                                turbResLocal = turbResLocal + tmp**2
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-  end subroutine getStates
+            end do
+        end do
 
-  subroutine getRes(res,ndimw)
+        call VecRestoreArrayF90(rVec, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Compute the residual and return result to Python
-    use constants
-    use blockPointers, only : il, jl, kl, nDom, dw, volRef
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowvarrefstate, only : nw
-    use utils, only : setPointers
+        if (present(flowRes) .and. present(turbRes) .and. present(totalRes)) then
+            call mpi_allreduce((/flowResLocal, turbResLocal/), tmp2, 2, adflow_real, &
+                               mpi_sum, ADflow_comm_world, ierr)
+            flowRes = sqrt(tmp2(1))
+            totalRes = sqrt(tmp2(1) + tmp2(2))
+            if (tmp2(2) > zero) then
+                turbRes = sqrt(tmp2(2))
+            else
+                turbRes = zero
+            end if
+        end if
 
-    implicit none
+    end subroutine setRVec
 
-    integer(kind=intType),intent(in):: ndimw
-    real(kind=realType),dimension(ndimw),intent(inout) :: res(ndimw)
-
-    ! Local Variables
-    integer(kind=intType) :: nn,i,j,k,l,counter,sps
-    real(kind=realType) :: ovv
-
-    call computeResidualNK(useUpdateIntermed = .True.)
-    counter = 0
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn,1,sps)
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   ovv = one/volRef(i,j,k)
-                   do l=1,nw
-                      counter = counter + 1
-                      res(counter) = dw(i,j,k,l)*ovv
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
+    subroutine setW(wVec)
 
-  end subroutine getRes
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, w
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowVarRefState, only: nwf, nt1, nt2, winf
+        use utils, only: setPointers, EChk
 
-  subroutine setStates(states,ndimw)
+        implicit none
 
-    ! Take in externallly generated states and set them in ADflow
-    use constants
-    use blockPointers, only : il, jl, kl, nDom, w
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowvarrefstate, only : nw
-    use utils, only : setPointers
+        Vec wVec
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
+        real(kind=realType), pointer :: wvec_pointer(:)
 
-    implicit none
+        call VecGetArrayReadF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    integer(kind=intType),intent(in):: ndimw
-    real(kind=realType),dimension(ndimw),intent(in) :: states(ndimw)
-
-    ! Local Variables
-    integer(kind=intType) :: nn,i,j,k,l,counter,sps
-
-    counter = 0
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn,1,sps)
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   do l=1,nw
-                      counter = counter + 1
-                      w(i,j,k,l) = states(counter)
-                   end do
+        ii = 1
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nwf
+                                w(i, j, k, l) = wvec_pointer(ii)
+                                ii = ii + 1
+                            end do
+                            ! Clip the turb to prevent negative turb SA
+                            ! values. This is similar to the pressure
+                            ! clip. Need to check this for other Turb models.
+                            do l = nt1, nt2
+                                w(i, j, k, l) = max(1e-6 * winf(l), wvec_pointer(ii))
+                                ii = ii + 1
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-  end subroutine setStates
+            end do
+        end do
 
-  subroutine getInfoSize(iSize)
-    use constants
-    use blockPointers, only : ib, jb, kb, nDom
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowvarrefstate, only : nw, viscous, eddymodel
-    use utils, only : setPointers
+        call VecRestoreArrayReadF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    implicit none
-    integer(kind=intType), intent(out) :: iSize
-    integer(kind=intType) :: nn, sps, nc
-    ! Determine the size of a flat array needed to store w, P, ( and
-    ! rlv, rev if necessary) with full double halos.
-    iSize = 0
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn,1_intType,sps)
-          nc = (kb+1)*(jb+1)*(ib+1)
-          iSize = iSize + nc*(nw + 1) ! plus 1 for the P
-          if (viscous) then
-             iSize = iSize + nc
-          end if
-          if (eddyModel) then
-             iSize = iSize + nc
-          end if
-       end do
-    end do
-  end subroutine getInfoSize
-
-  subroutine setInfo(info, iSize)
+    end subroutine setW
 
-    use constants
-    use blockPointers, only : w, p, ib, jb, kb, rlv, rev, nDom
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowvarrefstate, only : nw, viscous, eddymodel
-    use utils, only : setPointers
-    implicit none
+    subroutine getStates(states, ndimw)
+        ! Return the state vector, w to Python
 
-    integer(kind=intType), intent(in) :: iSize
-    real(kind=realType), intent(in), dimension(iSize) :: info
-    integer(kind=intType) :: nn, counter, i, j, k, l, sps
-    ! Determine the size of a flat array needed to store w, P, ( and
-    ! rlv, rev if necessary) with full double halos.
-    counter = 0
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn,1,sps)
-          do k=0,kb
-             do j=0,jb
-                do i=0,ib
-                   do l=1,nw
-                      counter = counter + 1
-                      w(i,j,k,l) = info(counter)
-                   end do
-
-                   counter = counter + 1
-                   P(i,j,k) = info(counter)
-
-                   if (viscous) then
-                      counter = counter + 1
-                      rlv(i,j,k) = info(counter)
-                   end if
-
-                   if (eddyModel) then
-                      counter = counter + 1
-                      rev(i,j,k) = info(counter)
-                   end if
-                end do
-             end do
-          end do
-       end do
-    end do
-  end subroutine setInfo
+        use constants
+        use blockPointers, only: il, jl, kl, nDom, w
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowvarrefstate, only: nw
+        use utils, only: setPointers
 
-  subroutine getInfo(info, iSize)
+        implicit none
 
-    use constants
-    use blockPointers, only : w, p, ib, jb, kb, rlv, rev, nDom
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowvarrefstate, only : nw, viscous, eddymodel
-    use utils, only : setPointers
+        integer(kind=intType), intent(in) :: ndimw
+        real(kind=realType), dimension(ndimw), intent(out) :: states(ndimw)
 
-    implicit none
+        ! Local Variables
+        integer(kind=intType) :: nn, i, j, k, l, counter, sps
 
-    integer(kind=intType), intent(in) :: iSize
-    real(kind=realType), intent(out), dimension(iSize) :: info
-    integer(kind=intType) ::  nn, counter, i, j, k, l, sps
-    ! Determine the size of a flat array needed to store w, P, ( and
-    ! rlv, rev if necessary) with full double halos.
-    counter = 0
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn,1,sps)
-          do k=0,kb
-             do j=0,jb
-                do i=0,ib
-                   do l=1,nw
-                      counter = counter + 1
-                      info(counter) = w(i,j,k,l)
-                   end do
-
-                   counter = counter + 1
-                   info(counter) = P(i,j,k)
-
-                   if (viscous) then
-                      counter = counter + 1
-                      info(counter) = rlv(i,j,k)
-                   end if
-
-                   if (eddyModel) then
-                      counter = counter + 1
-                      info(counter) = rev(i,j,k)
-                   end if
+        counter = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nw
+                                counter = counter + 1
+                                states(counter) = w(i, j, k, l)
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
+    end subroutine getStates
+
+    subroutine getRes(res, ndimw)
+
+        ! Compute the residual and return result to Python
+        use constants
+        use blockPointers, only: il, jl, kl, nDom, dw, volRef
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowvarrefstate, only: nw
+        use utils, only: setPointers
+
+        implicit none
+
+        integer(kind=intType), intent(in) :: ndimw
+        real(kind=realType), dimension(ndimw), intent(inout) :: res(ndimw)
+
+        ! Local Variables
+        integer(kind=intType) :: nn, i, j, k, l, counter, sps
+        real(kind=realType) :: ovv
+
+        call computeResidualNK(useUpdateIntermed=.True.)
+        counter = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            ovv = one / volRef(i, j, k)
+                            do l = 1, nw
+                                counter = counter + 1
+                                res(counter) = dw(i, j, k, l) * ovv
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-  end subroutine getInfo
+            end do
+        end do
 
-  subroutine getEWTol(norm, old_norm, rtol_last, rtol)
+    end subroutine getRes
 
-    use constants
-    implicit none
+    subroutine setStates(states, ndimw)
 
-    ! There are the default EW Parameters from PETSc. They seem to work well
-    !version:           2
-    !rtol_0:  0.300000000000000
-    !rtol_max:  0.900000000000000
-    !gamma:   1.00000000000000
-    !alpha:   1.61803398874989
-    !alpha2:   1.61803398874989
-    !threshold:  0.100000000000000
-
-    real(kind=alwaysrealType), intent(in) :: norm, old_norm, rtol_last
-    real(kind=alwaysrealType), intent(out) :: rtol
-    real(kind=alwaysrealType) :: rtol_max, gamma, alpha, alpha2, threshold, stol
-
-    rtol_max  = 0.8_realType
-    gamma     = 1.0_realType
-    alpha     = (1.0_realType+sqrt(five))/2.0_realType
-    alpha2    = (1.0_realType+sqrt(five))/2.0_realType
-    threshold = 0.10_realType
-    ! We use version 2:
-    rtol = gamma*(norm/old_norm)**alpha
-    stol = gamma*rtol_last**alpha
-
-    if (stol > threshold) then
-       rtol = max(rtol, stol)
-    end if
-
-    ! Safeguard: avoid rtol greater than one
-    rtol = min(rtol, rtol_max)
-
-  end subroutine getEWTol
-end module NKSolver
+        ! Take in externallly generated states and set them in ADflow
+        use constants
+        use blockPointers, only: il, jl, kl, nDom, w
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowvarrefstate, only: nw
+        use utils, only: setPointers
 
-module ANKSolver
+        implicit none
 
-  use constants
-#include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
-
-  Mat  dRdw, dRdwPre
-  Vec wVec, rVec, deltaW, baseRes
-  KSP  ANK_KSP
-
-  ! Turb KSP related PETSc objects
-  Mat  dRdwTurb, dRdwPreTurb
-  Vec wVecTurb, rVecTurb, deltaWTurb, baseResTurb
-  KSP  ANK_KSPTurb
-
-  PetscFortranAddr   ctx(1)
-
-  ! Options for ANK Solver
-  logical :: useANKSolver
-  integer(kind=intType) :: ANK_jacobianLag
-  integer(kind=intType) :: ANK_subSpace
-  integer(kind=intType) :: ANK_maxIter
-  integer(kind=intType) :: ANK_asmOverlap
-  integer(kind=intType) :: ANK_iluFill
-  integer(kind=intType) :: ANK_innerPreConIts
-  integer(kind=intType) :: ANK_outerPreConIts
-  real(kind=realType)   :: ANK_rtol
-  real(kind=realType)   :: ANK_linResMax
-  real(kind=realType)   :: ANK_switchTol
-  real(kind=realType)   :: ANK_divTol = 10
-  logical :: ANK_useTurbDADI
-  logical :: ANK_useApproxSA
-  real(kind=realType) :: ANK_turbcflscale
-  logical :: ANK_useFullVisc
-  logical :: ANK_ADPC
-  logical :: ANK_turbDebug
-  logical :: ANK_useMatrixFree
-  integer(kind=intType) :: ANK_nsubIterTurb
-
-  ! Misc variables
-  real(kind=realType) :: ANK_CFL, ANK_CFL0, ANK_CFLLimit, ANK_CFLFactor, ANK_CFLCutback
-  real(kind=realType) :: ANK_CFLMin0, ANK_CFLMin, ANK_CFLMinBase, ANK_CFLExponent
-  real(kind=realType) :: ANK_stepMin, ANK_StepFactor, ANK_constCFLStep
-  real(kind=realType) :: ANK_secondOrdSwitchTol, ANK_coupledSwitchTol
-  real(kind=realType) :: ANK_physLSTol, ANK_unstdyLSTol
-  real(kind=realType) :: ANK_pcUpdateTol
-  real(kind=realType) :: ANK_pcUpdateCutoff
-  real(kind=realType) :: lambda
-  logical :: ANK_solverSetup=.False.
-  integer(kind=intTYpe) :: ANK_iter
-  integer(kind=intType) :: nState
-  real(kind=alwaysRealType) :: totalR_old, totalR_pcUpdate ! for recording the previous residual
-  real(kind=alwaysRealType) :: rtolLast, linResOld ! for recording the previous relativel tolerance for Eisenstat-Walker
-  logical :: ANK_useDissApprox
-
-  ! Turb KSP related modifications
-  logical :: ANK_coupled=.False.
-  logical :: ANK_turbSetup=.False.
-  integer(kind=intType) :: ANK_iterTurb, nStateTurb
-  real(kind=realType) :: lambdaTurb, ANK_physLSTolTurb
-  real(kind=alwaysRealType) :: linResOldTurb
+        integer(kind=intType), intent(in) :: ndimw
+        real(kind=realType), dimension(ndimw), intent(in) :: states(ndimw)
 
-contains
+        ! Local Variables
+        integer(kind=intType) :: nn, i, j, k, l, counter, sps
+
+        counter = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nw
+                                counter = counter + 1
+                                w(i, j, k, l) = states(counter)
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
+    end subroutine setStates
+
+    subroutine getInfoSize(iSize)
+        use constants
+        use blockPointers, only: ib, jb, kb, nDom
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowvarrefstate, only: nw, viscous, eddymodel
+        use utils, only: setPointers
+
+        implicit none
+        integer(kind=intType), intent(out) :: iSize
+        integer(kind=intType) :: nn, sps, nc
+        ! Determine the size of a flat array needed to store w, P, ( and
+        ! rlv, rev if necessary) with full double halos.
+        iSize = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+                nc = (kb + 1) * (jb + 1) * (ib + 1)
+                iSize = iSize + nc * (nw + 1) ! plus 1 for the P
+                if (viscous) then
+                    iSize = iSize + nc
+                end if
+                if (eddyModel) then
+                    iSize = iSize + nc
+                end if
+            end do
+        end do
+    end subroutine getInfoSize
+
+    subroutine setInfo(info, iSize)
+
+        use constants
+        use blockPointers, only: w, p, ib, jb, kb, rlv, rev, nDom
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowvarrefstate, only: nw, viscous, eddymodel
+        use utils, only: setPointers
+        implicit none
+
+        integer(kind=intType), intent(in) :: iSize
+        real(kind=realType), intent(in), dimension(iSize) :: info
+        integer(kind=intType) :: nn, counter, i, j, k, l, sps
+        ! Determine the size of a flat array needed to store w, P, ( and
+        ! rlv, rev if necessary) with full double halos.
+        counter = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1, sps)
+                do k = 0, kb
+                    do j = 0, jb
+                        do i = 0, ib
+                            do l = 1, nw
+                                counter = counter + 1
+                                w(i, j, k, l) = info(counter)
+                            end do
+
+                            counter = counter + 1
+                            P(i, j, k) = info(counter)
+
+                            if (viscous) then
+                                counter = counter + 1
+                                rlv(i, j, k) = info(counter)
+                            end if
+
+                            if (eddyModel) then
+                                counter = counter + 1
+                                rev(i, j, k) = info(counter)
+                            end if
+                        end do
+                    end do
+                end do
+            end do
+        end do
+    end subroutine setInfo
+
+    subroutine getInfo(info, iSize)
+
+        use constants
+        use blockPointers, only: w, p, ib, jb, kb, rlv, rev, nDom
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowvarrefstate, only: nw, viscous, eddymodel
+        use utils, only: setPointers
+
+        implicit none
+
+        integer(kind=intType), intent(in) :: iSize
+        real(kind=realType), intent(out), dimension(iSize) :: info
+        integer(kind=intType) :: nn, counter, i, j, k, l, sps
+        ! Determine the size of a flat array needed to store w, P, ( and
+        ! rlv, rev if necessary) with full double halos.
+        counter = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1, sps)
+                do k = 0, kb
+                    do j = 0, jb
+                        do i = 0, ib
+                            do l = 1, nw
+                                counter = counter + 1
+                                info(counter) = w(i, j, k, l)
+                            end do
+
+                            counter = counter + 1
+                            info(counter) = P(i, j, k)
+
+                            if (viscous) then
+                                counter = counter + 1
+                                info(counter) = rlv(i, j, k)
+                            end if
+
+                            if (eddyModel) then
+                                counter = counter + 1
+                                info(counter) = rev(i, j, k)
+                            end if
+                        end do
+                    end do
+                end do
+            end do
+        end do
+    end subroutine getInfo
+
+    subroutine getEWTol(norm, old_norm, rtol_last, rtol)
+
+        use constants
+        implicit none
+
+        ! There are the default EW Parameters from PETSc. They seem to work well
+        !version:           2
+        !rtol_0:  0.300000000000000
+        !rtol_max:  0.900000000000000
+        !gamma:   1.00000000000000
+        !alpha:   1.61803398874989
+        !alpha2:   1.61803398874989
+        !threshold:  0.100000000000000
+
+        real(kind=alwaysrealType), intent(in) :: norm, old_norm, rtol_last
+        real(kind=alwaysrealType), intent(out) :: rtol
+        real(kind=alwaysrealType) :: rtol_max, gamma, alpha, alpha2, threshold, stol
+
+        rtol_max = 0.8_realType
+        gamma = 1.0_realType
+        alpha = (1.0_realType + sqrt(five)) / 2.0_realType
+        alpha2 = (1.0_realType + sqrt(five)) / 2.0_realType
+        threshold = 0.10_realType
+        ! We use version 2:
+        rtol = gamma * (norm / old_norm)**alpha
+        stol = gamma * rtol_last**alpha
+
+        if (stol > threshold) then
+            rtol = max(rtol, stol)
+        end if
+
+        ! Safeguard: avoid rtol greater than one
+        rtol = min(rtol, rtol_max)
 
-  subroutine setupANKsolver
+    end subroutine getEWTol
+end module NKSolver
 
-    ! Setup the PETSc objects for the Newton-Krylov
-    ! solver. destroyNKsolver can be used to destroy the objects created
-    ! in this function
+module ANKSolver
 
     use constants
-    use stencils, only : euler_PC_stencil, N_euler_PC
-    use communication, only : adflow_comm_world, myid
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputIteration, only : useLinResMonitor
-    use inputPhysics, only : equations
-    use flowVarRefState, only : nw, viscous, nwf, nt1, nt2
-    use ADjointVars , only: nCellsLocal
-    use NKSolver, only : destroyNKSolver, linearResidualMonitor
-    use utils, only : EChk
-    use adjointUtils, only : myMatCreate, statePreAllocation
-    use inputadjoint, only : precondtype
-    use agmg, only : setupAGMG
+#include <petsc/finclude/petsc.h>
+    use petsc
     implicit none
 
-    ! Working Variables
-    integer(kind=intType) :: ierr, nDimw, nDimWTurb
-    integer(kind=intType) , dimension(:), allocatable :: nnzDiagonal, nnzOffDiag
-    integer(kind=intType) :: n_stencil
-    integer(kind=intType), dimension(:, :), pointer :: stencil
-    integer(kind=intType) :: level
-
-    ! Make sure we don't have memory for the approximate and exact
-    ! Newton solvers kicking around at the same time.
-    call destroyNKSolver()
+    Mat dRdw, dRdwPre
+    Vec wVec, rVec, deltaW, baseRes
+    KSP ANK_KSP
+
+    ! Turb KSP related PETSc objects
+    Mat dRdwTurb, dRdwPreTurb
+    Vec wVecTurb, rVecTurb, deltaWTurb, baseResTurb
+    KSP ANK_KSPTurb
+
+    PetscFortranAddr ctx(1)
+
+    ! Options for ANK Solver
+    logical :: useANKSolver
+    integer(kind=intType) :: ANK_jacobianLag
+    integer(kind=intType) :: ANK_subSpace
+    integer(kind=intType) :: ANK_maxIter
+    integer(kind=intType) :: ANK_asmOverlap
+    integer(kind=intType) :: ANK_iluFill
+    integer(kind=intType) :: ANK_innerPreConIts
+    integer(kind=intType) :: ANK_outerPreConIts
+    real(kind=realType) :: ANK_rtol
+    real(kind=realType) :: ANK_linResMax
+    real(kind=realType) :: ANK_switchTol
+    real(kind=realType) :: ANK_divTol = 10
+    logical :: ANK_useTurbDADI
+    logical :: ANK_useApproxSA
+    real(kind=realType) :: ANK_turbcflscale
+    logical :: ANK_useFullVisc
+    logical :: ANK_ADPC
+    logical :: ANK_turbDebug
+    logical :: ANK_useMatrixFree
+    integer(kind=intType) :: ANK_nsubIterTurb
+
+    ! Misc variables
+    real(kind=realType) :: ANK_CFL, ANK_CFL0, ANK_CFLLimit, ANK_CFLFactor, ANK_CFLCutback
+    real(kind=realType) :: ANK_CFLMin0, ANK_CFLMin, ANK_CFLMinBase, ANK_CFLExponent
+    real(kind=realType) :: ANK_stepMin, ANK_StepFactor, ANK_constCFLStep
+    real(kind=realType) :: ANK_secondOrdSwitchTol, ANK_coupledSwitchTol
+    real(kind=realType) :: ANK_physLSTol, ANK_unstdyLSTol
+    real(kind=realType) :: ANK_pcUpdateTol
+    real(kind=realType) :: ANK_pcUpdateCutoff
+    real(kind=realType) :: lambda
+    logical :: ANK_solverSetup = .False.
+    integer(kind=intTYpe) :: ANK_iter
+    integer(kind=intType) :: nState
+    real(kind=alwaysRealType) :: totalR_old, totalR_pcUpdate ! for recording the previous residual
+    real(kind=alwaysRealType) :: rtolLast, linResOld ! for recording the previous relativel tolerance for Eisenstat-Walker
+    logical :: ANK_useDissApprox
+
+    ! Turb KSP related modifications
+    logical :: ANK_coupled = .False.
+    logical :: ANK_turbSetup = .False.
+    integer(kind=intType) :: ANK_iterTurb, nStateTurb
+    real(kind=realType) :: lambdaTurb, ANK_physLSTolTurb
+    real(kind=alwaysRealType) :: linResOldTurb
 
-    if (.not. ANK_solverSetup) then
+contains
 
-       ! Determine if we are in coupled mode
-       if (ANK_coupled) then
-          nState = nw
-       else
-          nState = nwf
-       endif
+    subroutine setupANKsolver
+
+        ! Setup the PETSc objects for the Newton-Krylov
+        ! solver. destroyNKsolver can be used to destroy the objects created
+        ! in this function
+
+        use constants
+        use stencils, only: euler_PC_stencil, N_euler_PC
+        use communication, only: adflow_comm_world, myid
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputIteration, only: useLinResMonitor
+        use inputPhysics, only: equations
+        use flowVarRefState, only: nw, viscous, nwf, nt1, nt2
+        use ADjointVars, only: nCellsLocal
+        use NKSolver, only: destroyNKSolver, linearResidualMonitor
+        use utils, only: EChk
+        use adjointUtils, only: myMatCreate, statePreAllocation
+        use inputadjoint, only: precondtype
+        use agmg, only: setupAGMG
+        implicit none
+
+        ! Working Variables
+        integer(kind=intType) :: ierr, nDimw, nDimWTurb
+        integer(kind=intType), dimension(:), allocatable :: nnzDiagonal, nnzOffDiag
+        integer(kind=intType) :: n_stencil
+        integer(kind=intType), dimension(:, :), pointer :: stencil
+        integer(kind=intType) :: level
+
+        ! Make sure we don't have memory for the approximate and exact
+        ! Newton solvers kicking around at the same time.
+        call destroyNKSolver()
+
+        if (.not. ANK_solverSetup) then
+
+            ! Determine if we are in coupled mode
+            if (ANK_coupled) then
+                nState = nw
+            else
+                nState = nwf
+            end if
 
-       nDimW = nState * nCellsLocal(1_intTYpe) * nTimeIntervalsSpectral
+            nDimW = nState * nCellsLocal(1_intTYpe) * nTimeIntervalsSpectral
 
-       call VecCreate(ADFLOW_COMM_WORLD, wVec, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecCreate(ADFLOW_COMM_WORLD, wVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecSetSizes(wVec, nDimW, PETSC_DECIDE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecSetSizes(wVec, nDimW, PETSC_DECIDE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecSetBlockSize(wVec, nState, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecSetBlockSize(wVec, nState, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecSetType(wVec, VECMPI, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecSetType(wVec, VECMPI, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       !  Create duplicates for residual and delta
-       call VecDuplicate(wVec, rVec, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            !  Create duplicates for residual and delta
+            call VecDuplicate(wVec, rVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecDuplicate(wVec, deltaW, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecDuplicate(wVec, deltaW, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecDuplicate(wVec, baseRes, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call VecDuplicate(wVec, baseRes, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Create Pre-Conditioning Matrix
-       allocate(nnzDiagonal(nCellsLocal(1_intType)*nTimeIntervalsSpectral), &
-            nnzOffDiag(nCellsLocal(1_intType)*nTimeIntervalsSpectral) )
+            ! Create Pre-Conditioning Matrix
+            allocate (nnzDiagonal(nCellsLocal(1_intType) * nTimeIntervalsSpectral), &
+                      nnzOffDiag(nCellsLocal(1_intType) * nTimeIntervalsSpectral))
 
-       stencil => euler_pc_stencil
-       n_stencil = N_euler_pc
+            stencil => euler_pc_stencil
+            n_stencil = N_euler_pc
 
-       level = 1
-       call statePreAllocation(nnzDiagonal, nnzOffDiag, nDimW/nState, stencil, n_stencil, &
-            level, .False.)
-       call myMatCreate(dRdwPre, nState, nDimW, nDimW, nnzDiagonal, nnzOffDiag, &
-            __FILE__, __LINE__)
+            level = 1
+            call statePreAllocation(nnzDiagonal, nnzOffDiag, nDimW / nState, stencil, n_stencil, &
+                                    level, .False.)
+            call myMatCreate(dRdwPre, nState, nDimW, nDimW, nnzDiagonal, nnzOffDiag, &
+                             __FILE__, __LINE__)
 
-       call matSetOption(dRdwPre, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-       deallocate(nnzDiagonal, nnzOffDiag)
+            call matSetOption(dRdwPre, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+            deallocate (nnzDiagonal, nnzOffDiag)
 
-       ! Set the mat_row_oriented option to false so that dense
-       ! subblocks can be passed in in fortran column-oriented format
-       call MatSetOption(dRdWPre, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Set the mat_row_oriented option to false so that dense
+            ! subblocks can be passed in in fortran column-oriented format
+            call MatSetOption(dRdWPre, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Setup Matrix-Free dRdw matrix and its function
-       call MatCreateMFFD(ADFLOW_COMM_WORLD, nDimW, nDimW, &
-            PETSC_DETERMINE, PETSC_DETERMINE, dRdw, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Setup Matrix-Free dRdw matrix and its function
+            call MatCreateMFFD(ADFLOW_COMM_WORLD, nDimW, nDimW, &
+                               PETSC_DETERMINE, PETSC_DETERMINE, dRdw, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call MatMFFDSetFunction(dRdw, FormFunction_mf, ctx, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call MatMFFDSetFunction(dRdw, FormFunction_mf, ctx, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call MatSetOption(dRdW, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call MatSetOption(dRdW, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       if (preCondType == 'mg') then
-          call setupAGMG(drdwpre, nDimW/nState, nState)
-       end if
+            if (preCondType == 'mg') then
+                call setupAGMG(drdwpre, nDimW / nState, nState)
+            end if
 
-       !  Create the linear solver context
-       call KSPCreate(ADFLOW_COMM_WORLD, ANK_KSP, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            !  Create the linear solver context
+            call KSPCreate(ADFLOW_COMM_WORLD, ANK_KSP, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Set operators for the solver
-       if (ANK_useMatrixFree) then
-           ! Matrix free drdw
-           call KSPSetOperators(ANK_KSP, dRdw, dRdwPre, ierr)
-       else
-           ! Matrix based drdw = drdwpre
-           call KSPSetOperators(ANK_KSP, dRdwPre, dRdwPre, ierr)
-       end if
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Set operators for the solver
+            if (ANK_useMatrixFree) then
+                ! Matrix free drdw
+                call KSPSetOperators(ANK_KSP, dRdw, dRdwPre, ierr)
+            else
+                ! Matrix based drdw = drdwpre
+                call KSPSetOperators(ANK_KSP, dRdwPre, dRdwPre, ierr)
+            end if
+            call EChk(ierr, __FILE__, __LINE__)
 
-       if (useLinResMonitor) then
+            if (useLinResMonitor) then
 
 #if PETSC_VERSION_GE(3,8,0)
-          ! This is probably wrong. NO petsc_null_context
-          call KSPMonitorSet(ANK_KSP, LinearResidualMonitor, PETSC_NULL_FUNCTION, &
-               PETSC_NULL_FUNCTION, ierr)
+                ! This is probably wrong. NO petsc_null_context
+                call KSPMonitorSet(ANK_KSP, LinearResidualMonitor, PETSC_NULL_FUNCTION, &
+                                   PETSC_NULL_FUNCTION, ierr)
 #else
-          call KSPMonitorSet(ANK_KSP, LinearResidualMonitor, PETSC_NULL_OBJECT, &
-               PETSC_NULL_FUNCTION, ierr)
+                call KSPMonitorSet(ANK_KSP, LinearResidualMonitor, PETSC_NULL_OBJECT, &
+                                   PETSC_NULL_FUNCTION, ierr)
 
 #endif
-          call EChk(ierr, __FILE__, __LINE__)
-       end if
+                call EChk(ierr, __FILE__, __LINE__)
+            end if
 
-       ANK_solverSetup = .True.
-       ANK_iter = 0
-       ANK_useDissApprox = .False.
+            ANK_solverSetup = .True.
+            ANK_iter = 0
+            ANK_useDissApprox = .False.
 
-       ! Check if we need to set up the Turb KSP
-       if ((.not. ANK_coupled) .and. (.not. ANK_useTurbDADI) .and. equations==RANSEquations) then
-           nStateTurb = nt2-nt1+1
+            ! Check if we need to set up the Turb KSP
+            if ((.not. ANK_coupled) .and. (.not. ANK_useTurbDADI) .and. equations == RANSEquations) then
+                nStateTurb = nt2 - nt1 + 1
 
-           nDimWTurb = nStateTurb * nCellsLocal(1_intTYpe) * nTimeIntervalsSpectral
+                nDimWTurb = nStateTurb * nCellsLocal(1_intTYpe) * nTimeIntervalsSpectral
 
-           call VecCreate(ADFLOW_COMM_WORLD, wVecTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                call VecCreate(ADFLOW_COMM_WORLD, wVecTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           call VecSetSizes(wVecTurb, nDimWTurb, PETSC_DECIDE, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                call VecSetSizes(wVecTurb, nDimWTurb, PETSC_DECIDE, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           call VecSetBlockSize(wVecTurb, nStateTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                call VecSetBlockSize(wVecTurb, nStateTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           call VecSetType(wVecTurb, VECMPI, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                call VecSetType(wVecTurb, VECMPI, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           !  Create duplicates for residual and delta
-           call VecDuplicate(wVecTurb, rVecTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                !  Create duplicates for residual and delta
+                call VecDuplicate(wVecTurb, rVecTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           call VecDuplicate(wVecTurb, deltaWTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                call VecDuplicate(wVecTurb, deltaWTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           call VecDuplicate(wVecTurb, baseResTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                call VecDuplicate(wVecTurb, baseResTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           ! Create Pre-Conditioning Matrix
-           allocate(nnzDiagonal(nCellsLocal(1_intType)*nTimeIntervalsSpectral), &
-                nnzOffDiag(nCellsLocal(1_intType)*nTimeIntervalsSpectral) )
+                ! Create Pre-Conditioning Matrix
+                allocate (nnzDiagonal(nCellsLocal(1_intType) * nTimeIntervalsSpectral), &
+                          nnzOffDiag(nCellsLocal(1_intType) * nTimeIntervalsSpectral))
 
-           stencil => euler_pc_stencil
-           n_stencil = N_euler_pc
+                stencil => euler_pc_stencil
+                n_stencil = N_euler_pc
 
-           level = 1
-           call statePreAllocation(nnzDiagonal, nnzOffDiag, nDimWTurb/nStateTurb, stencil, n_stencil, &
-                level, .False.)
-           call myMatCreate(dRdwPreTurb, nStateTurb, nDimWTurb, nDimWTurb, nnzDiagonal, nnzOffDiag, &
-                __FILE__, __LINE__)
+                level = 1
+                call statePreAllocation(nnzDiagonal, nnzOffDiag, nDimWTurb / nStateTurb, stencil, n_stencil, &
+                                        level, .False.)
+                call myMatCreate(dRdwPreTurb, nStateTurb, nDimWTurb, nDimWTurb, nnzDiagonal, nnzOffDiag, &
+                                 __FILE__, __LINE__)
 
-           call matSetOption(dRdwPreTurb, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
-           deallocate(nnzDiagonal, nnzOffDiag)
+                call matSetOption(dRdwPreTurb, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+                deallocate (nnzDiagonal, nnzOffDiag)
 
-           ! Set the mat_row_oriented option to false so that dense
-           ! subblocks can be passed in in fortran column-oriented format
-           call MatSetOption(dRdWPreTurb, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                ! Set the mat_row_oriented option to false so that dense
+                ! subblocks can be passed in in fortran column-oriented format
+                call MatSetOption(dRdWPreTurb, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           ! Setup Matrix-Free dRdw matrix and its function
-           call MatCreateMFFD(ADFLOW_COMM_WORLD, nDimWTurb, nDimWTurb, &
-                PETSC_DETERMINE, PETSC_DETERMINE, dRdwTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                ! Setup Matrix-Free dRdw matrix and its function
+                call MatCreateMFFD(ADFLOW_COMM_WORLD, nDimWTurb, nDimWTurb, &
+                                   PETSC_DETERMINE, PETSC_DETERMINE, dRdwTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           call MatMFFDSetFunction(dRdwTurb, FormFunction_mf_Turb, ctx, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                call MatMFFDSetFunction(dRdwTurb, FormFunction_mf_Turb, ctx, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           call MatSetOption(dRdWTurb, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                call MatSetOption(dRdWTurb, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           !  Create the linear solver context
-           call KSPCreate(ADFLOW_COMM_WORLD, ANK_KSPTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+                !  Create the linear solver context
+                call KSPCreate(ADFLOW_COMM_WORLD, ANK_KSPTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-           ! Set operators for the solver
-           if (ANK_useMatrixFree) then
-              ! Matrix free
-              call KSPSetOperators(ANK_KSPTurb, dRdwTurb, dRdwPreTurb, ierr)
-           else
-              ! Matrix based
-              call KSPSetOperators(ANK_KSPTurb, dRdwPreTurb, dRdwPreTurb, ierr)
-           end if
-           call EChk(ierr, __FILE__, __LINE__)
+                ! Set operators for the solver
+                if (ANK_useMatrixFree) then
+                    ! Matrix free
+                    call KSPSetOperators(ANK_KSPTurb, dRdwTurb, dRdwPreTurb, ierr)
+                else
+                    ! Matrix based
+                    call KSPSetOperators(ANK_KSPTurb, dRdwPreTurb, dRdwPreTurb, ierr)
+                end if
+                call EChk(ierr, __FILE__, __LINE__)
 
-           ANK_turbSetup = .True.
-           ANK_iterTurb = 0
-       end if
-    end if
+                ANK_turbSetup = .True.
+                ANK_iterTurb = 0
+            end if
+        end if
 
-  end subroutine setupANKsolver
+    end subroutine setupANKsolver
+
+    subroutine FormJacobianANK
+
+        use constants
+        use flowVarRefState, only: nw, nwf, nt1, nt2
+        use blockPointers, only: nDom, volRef, il, jl, kl, w, dw, dtl, globalCell, iblank
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputIteration, only: turbResScale
+        use inputADjoint, only: viscPC
+        use inputDiscretization, only: approxSA
+        use iteration, only: totalR0, totalR
+        use utils, only: EChk, setPointers
+        use adjointUtils, only: setupStateResidualMatrix, setupStandardKSP, setupStandardMultigrid
+        use communication
+        use agmg, only: setupShellPC, destroyShellPC, applyShellPC, agmgLevels, coarseIndices, A
+        use inputadjoint, only: precondtype
+        implicit none
+
+        ! Local Variables
+        character(len=maxStringLen) :: preConSide, localPCType, kspObjectType, globalPCType, localOrdering
+        integer(kind=intType) :: ierr
+        logical :: useAD, usePC, useTranspose, useObjective, tmp, frozenTurb
+        real(kind=realType) :: dtinv, rho
+        integer(kind=intType) :: i, j, k, l, ii, irow, nn, sps, outerPreConIts, subspace, lvl
+        integer(kind=intType), dimension(2:10) :: coarseRows
+        real(kind=realType), dimension(:, :), allocatable :: blk
+        logical :: useCoarseMats
+        PC shellPC
+
+        if (preCondType == 'mg') then
+            useCoarseMats = .True.
+        else
+            useCoarseMats = .False.
+        end if
 
-  subroutine FormJacobianANK
+        ! Assemble the approximate PC (fine leve, level 1)
+        useAD = ANK_ADPC
+        frozenTurb = (.not. ANK_coupled)
+        usePC = .True.
+        useTranspose = .False.
+        useObjective = .False.
+        tmp = viscPC ! Save what is in viscPC and set to the NKvarible
+        viscPC = .False.
 
-    use constants
-    use flowVarRefState, only : nw, nwf, nt1, nt2
-    use blockPointers, only : nDom, volRef, il, jl, kl, w, dw, dtl, globalCell, iblank
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputIteration, only : turbResScale
-    use inputADjoint, only : viscPC
-    use inputDiscretization, only : approxSA
-    use iteration, only : totalR0, totalR
-    use utils, only : EChk, setPointers
-    use adjointUtils, only :setupStateResidualMatrix, setupStandardKSP, setupStandardMultigrid
-    use communication
-    use agmg, only : setupShellPC, destroyShellPC, applyShellPC, agmgLevels, coarseIndices, A
-    use inputadjoint, only : precondtype
-    implicit none
+        if (totalR > ANK_secondOrdSwitchTol * totalR0) &
+            approxSA = .True.
 
-    ! Local Variables
-    character(len=maxStringLen) :: preConSide, localPCType, kspObjectType, globalPCType, localOrdering
-    integer(kind=intType) ::ierr
-    logical :: useAD, usePC, useTranspose, useObjective, tmp, frozenTurb
-    real(kind=realType) :: dtinv, rho
-    integer(kind=intType) :: i, j, k, l, ii, irow, nn, sps, outerPreConIts, subspace, lvl
-    integer(kind=intType), dimension(2:10) :: coarseRows
-    real(kind=realType), dimension(:,:), allocatable :: blk
-    logical :: useCoarseMats
-    PC shellPC
-
-    if (preCondType == 'mg') then
-       useCoarseMats = .True.
-    else
-       useCoarseMats = .False.
-    end if
-
-    ! Assemble the approximate PC (fine leve, level 1)
-    useAD = ANK_ADPC
-    frozenTurb = (.not. ANK_coupled)
-    usePC = .True.
-    useTranspose = .False.
-    useObjective = .False.
-    tmp = viscPC ! Save what is in viscPC and set to the NKvarible
-    viscPC = .False.
-
-    if (totalR > ANK_secondOrdSwitchTol*totalR0) &
-       approxSA = .True.
-
-    ! Create the preconditoner matrix
-    call setupStateResidualMatrix(dRdwPre, useAD, usePC, useTranspose, &
-         useObjective, frozenTurb, 1_intType, useCoarseMats=useCoarseMats)
-
-    ! Reset saved value
-    viscPC = tmp
-    approxSA = .False.
-
-    ! Add the contribution from the time step term
-
-    ! Generic block to use while setting values
-    allocate(blk(nState, nState))
-
-    ! Zero the block once, since the previous entries will be overwritten
-    ! for each cell, and zero entries will remain zero.
-    blk = zero
-
-    if (.not. ANK_coupled) then
-       ! For the segragated solver, only calculate the time step for flow variables
-       do nn=1, nDom
-          do sps=1, nTimeIntervalsSpectral
-             call setPointers(nn,1_intType,sps)
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      ! Calculate one over time step for this cell. Multiply
-                      ! the dtl by cell volume to get the actual time step
-                      ! required for a CFL of one, then multiply with the
-                      ! actual cfl number in the solver
-                      dtinv = one/(ANK_CFL * dtl(i,j,k) * volRef(i,j,k))
-
-                      ! We need to convert the momentum residuals to velocity
-                      ! residuals to get the desired effect from time steps.
-                      ! To do this, save a "pseudo" jacobian for this cell,
-                      ! that has dU/du, where U is the vector of conservative
-                      ! variables, and u are the primitive variables. For this
-                      ! jacobian, only the velocity entries are modified,
-                      ! since ADflow saves density, velocities and total
-                      ! energy in the state vector w(:,:,:,:).
-
-                      ! Density and energy updates are unchanged.
-                      blk(iRho, iRho) = dtinv
-                      blk(iRhoE, iRhoE) = dtinv
-
-                      ! save the density
-                      rho = w(i,j,k,iRho)
-
-                      ! x-velocity
-                      blk(ivx, iRho) = w(i,j,k,ivx)*dtinv
-                      blk(ivx, ivx)  = rho*dtinv
-
-                      ! y-velocity
-                      blk(ivy, iRho) = w(i,j,k,ivy)*dtinv
-                      blk(ivy, ivy)  = rho*dtinv
-
-                      ! z-velocity
-                      blk(ivz, iRho) = w(i,j,k,ivz)*dtinv
-                      blk(ivz, ivz)  = rho*dtinv
-
-                      ! get the global cell index
-                      irow = globalCell(i, j, k)
-
-                      if (useCoarseMats) then
-                         do lvl=1, agmgLevels-1
-                            coarseRows(lvl+1) = coarseIndices(nn, lvl)%arr(i, j, k)
-                         end do
-                      end if
-
-                      ! Add the contribution to the matrix in PETSc
-                      call setBlock()
-                   end do
+        ! Create the preconditoner matrix
+        call setupStateResidualMatrix(dRdwPre, useAD, usePC, useTranspose, &
+                                      useObjective, frozenTurb, 1_intType, useCoarseMats=useCoarseMats)
+
+        ! Reset saved value
+        viscPC = tmp
+        approxSA = .False.
+
+        ! Add the contribution from the time step term
+
+        ! Generic block to use while setting values
+        allocate (blk(nState, nState))
+
+        ! Zero the block once, since the previous entries will be overwritten
+        ! for each cell, and zero entries will remain zero.
+        blk = zero
+
+        if (.not. ANK_coupled) then
+            ! For the segragated solver, only calculate the time step for flow variables
+            do nn = 1, nDom
+                do sps = 1, nTimeIntervalsSpectral
+                    call setPointers(nn, 1_intType, sps)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                ! Calculate one over time step for this cell. Multiply
+                                ! the dtl by cell volume to get the actual time step
+                                ! required for a CFL of one, then multiply with the
+                                ! actual cfl number in the solver
+                                dtinv = one / (ANK_CFL * dtl(i, j, k) * volRef(i, j, k))
+
+                                ! We need to convert the momentum residuals to velocity
+                                ! residuals to get the desired effect from time steps.
+                                ! To do this, save a "pseudo" jacobian for this cell,
+                                ! that has dU/du, where U is the vector of conservative
+                                ! variables, and u are the primitive variables. For this
+                                ! jacobian, only the velocity entries are modified,
+                                ! since ADflow saves density, velocities and total
+                                ! energy in the state vector w(:,:,:,:).
+
+                                ! Density and energy updates are unchanged.
+                                blk(iRho, iRho) = dtinv
+                                blk(iRhoE, iRhoE) = dtinv
+
+                                ! save the density
+                                rho = w(i, j, k, iRho)
+
+                                ! x-velocity
+                                blk(ivx, iRho) = w(i, j, k, ivx) * dtinv
+                                blk(ivx, ivx) = rho * dtinv
+
+                                ! y-velocity
+                                blk(ivy, iRho) = w(i, j, k, ivy) * dtinv
+                                blk(ivy, ivy) = rho * dtinv
+
+                                ! z-velocity
+                                blk(ivz, iRho) = w(i, j, k, ivz) * dtinv
+                                blk(ivz, ivz) = rho * dtinv
+
+                                ! get the global cell index
+                                irow = globalCell(i, j, k)
+
+                                if (useCoarseMats) then
+                                    do lvl = 1, agmgLevels - 1
+                                        coarseRows(lvl + 1) = coarseIndices(nn, lvl)%arr(i, j, k)
+                                    end do
+                                end if
+
+                                ! Add the contribution to the matrix in PETSc
+                                call setBlock()
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    else
-       ! For the coupled solver, CFL number for the turbulent variable needs scaling
-       ! because the residuals are scaled, and additional scaling of the time step
-       ! for the turbulence variable might be required.
-       ii = 1
-       do nn=1, nDom
-          do sps=1, nTimeIntervalsSpectral
-             call setPointers(nn,1_intType,sps)
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      ! See the comment for the same calculation above
-                      dtinv = one/(ANK_CFL * dtl(i,j,k) * volRef(i,j,k))
-
-                      ! We need to convert the momentum residuals to velocity
-                      ! residuals to get the desired effect from time steps.
-                      ! To do this, save a "pseudo" jacobian for this cell,
-                      ! that has dU/du, where U is the vector of conservative
-                      ! variables, and u are the primitive variables. For this
-                      ! jacobian, only the velocity entries are modified,
-                      ! since ADflow saves density, velocities and total
-                      ! energy in the state vector w(:,:,:,:).
-
-                      ! Density update is unchanged.
-                      blk(iRho, iRho) = dtinv
-
-                      ! save the density
-                      rho = w(i,j,k,iRho)
-
-                      ! x-velocity
-                      blk(ivx, iRho) = w(i,j,k,ivx)*dtinv
-                      blk(ivx, ivx)  = rho*dtinv
-
-                      ! y-velocity
-                      blk(ivy, iRho) = w(i,j,k,ivy)*dtinv
-                      blk(ivy, ivy)  = rho*dtinv
-
-                      ! z-velocity
-                      blk(ivz, iRho) = w(i,j,k,ivz)*dtinv
-                      blk(ivz, ivz)  = rho*dtinv
-
-                      ! Energy update is unchanged
-                      blk(iRhoE, iRhoE) = dtinv
-
-                      ! For the turbulence variable, additionally scale the cfl.
-                      ! turbresscale is required because the turbulent residuals
-                      ! are scaled with it. Furthermore, the turbulence variable
-                      ! can get a different CFL number. Scale it by turbCFLScale
-                      blk(nt1, nt1) = dtinv*turbResScale(1)/ANK_turbCFLScale
-
-                      ! get the global cell index
-                      irow = globalCell(i, j, k)
-
-                      if (useCoarseMats) then
-                         do lvl=1, agmgLevels-1
-                            coarseRows(lvl+1) = coarseIndices(nn, lvl)%arr(i, j, k)
-                         end do
-                      end if
-
-                      ! Add the contribution to the matrix in PETSc
-                      call setBlock()
-                   end do
+            end do
+        else
+            ! For the coupled solver, CFL number for the turbulent variable needs scaling
+            ! because the residuals are scaled, and additional scaling of the time step
+            ! for the turbulence variable might be required.
+            ii = 1
+            do nn = 1, nDom
+                do sps = 1, nTimeIntervalsSpectral
+                    call setPointers(nn, 1_intType, sps)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                ! See the comment for the same calculation above
+                                dtinv = one / (ANK_CFL * dtl(i, j, k) * volRef(i, j, k))
+
+                                ! We need to convert the momentum residuals to velocity
+                                ! residuals to get the desired effect from time steps.
+                                ! To do this, save a "pseudo" jacobian for this cell,
+                                ! that has dU/du, where U is the vector of conservative
+                                ! variables, and u are the primitive variables. For this
+                                ! jacobian, only the velocity entries are modified,
+                                ! since ADflow saves density, velocities and total
+                                ! energy in the state vector w(:,:,:,:).
+
+                                ! Density update is unchanged.
+                                blk(iRho, iRho) = dtinv
+
+                                ! save the density
+                                rho = w(i, j, k, iRho)
+
+                                ! x-velocity
+                                blk(ivx, iRho) = w(i, j, k, ivx) * dtinv
+                                blk(ivx, ivx) = rho * dtinv
+
+                                ! y-velocity
+                                blk(ivy, iRho) = w(i, j, k, ivy) * dtinv
+                                blk(ivy, ivy) = rho * dtinv
+
+                                ! z-velocity
+                                blk(ivz, iRho) = w(i, j, k, ivz) * dtinv
+                                blk(ivz, ivz) = rho * dtinv
+
+                                ! Energy update is unchanged
+                                blk(iRhoE, iRhoE) = dtinv
+
+                                ! For the turbulence variable, additionally scale the cfl.
+                                ! turbresscale is required because the turbulent residuals
+                                ! are scaled with it. Furthermore, the turbulence variable
+                                ! can get a different CFL number. Scale it by turbCFLScale
+                                blk(nt1, nt1) = dtinv * turbResScale(1) / ANK_turbCFLScale
+
+                                ! get the global cell index
+                                irow = globalCell(i, j, k)
+
+                                if (useCoarseMats) then
+                                    do lvl = 1, agmgLevels - 1
+                                        coarseRows(lvl + 1) = coarseIndices(nn, lvl)%arr(i, j, k)
+                                    end do
+                                end if
+
+                                ! Add the contribution to the matrix in PETSc
+                                call setBlock()
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end if
-
-    ! PETSc Matrix Assembly begin
-    call MatAssemblyBegin(dRdwPre, MAT_FINAL_ASSEMBLY, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Setup KSP Options
-    preConSide = 'right'
-    localPCType = 'ilu'
-    kspObjectType = 'gmres'
-    globalPCType = 'asm'
-    localOrdering = 'rcm'
-    outerPreConIts = ank_outerPreconIts
-
-    ! Setup the KSP using the same code as used for the adjoint
-    if (ank_subspace < 0) then
-       subspace = ANK_maxIter
-    else
-       subspace = ANK_subspace
-    end if
-
-    ! de-allocate the generic block
-    deallocate(blk)
-
-    ! Complete the matrix assembly.
-    call MatAssemblyEnd  (dRdwPre, MAT_FINAL_ASSEMBLY, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    if (useCoarseMats) then
-       do lvl=2, agmgLevels
-          call MatAssemblyBegin(A(lvl), MAT_FINAL_ASSEMBLY, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-          call MatAssemblyEnd(A(lvl), MAT_FINAL_ASSEMBLY, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-       end do
-    end if
-
-    if (PreCondType == 'asm') then
-       ! Run the super-dee-duper function to setup the ksp object:
-
-       call setupStandardKSP(ANK_KSP, kspObjectType, subSpace, &
-            preConSide, globalPCType, ANK_asmOverlap, outerPreConIts, localPCType, &
-            localOrdering, ANK_iluFill, ANK_innerPreConIts)
-    else if (PreCondType == 'mg') then
-
-       ! Setup the MG preconditioner!
-       call setupStandardMultigrid(ANK_KSP, kspObjectType, subSpace, &
-            preConSide, ANK_asmOverlap, outerPreConIts, &
-            localOrdering, ANK_iluFill)
-    end if
-
-    ! Don't do iterative refinement for the NKSolver.
-    call KSPGMRESSetCGSRefinementType(ANK_KSP, &
-         KSP_GMRES_CGS_REFINE_NEVER, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-  contains
-    subroutine setBlock()
-      ! This subroutine is used to set the diagonal time stepping terms
-      ! for the Jacobians in ANK. It is only used to set diagonal blocks
-
-      implicit none
-
-      call MatSetValuesBlocked(dRdwPre, 1, irow, 1, irow, blk, &
-           ADD_VALUES, ierr)
-      call EChk(ierr, __FILE__, __LINE__)
-
-      ! Extension for setting coarse grids:
-      if (useCoarseMats) then
-         do lvl=2, agmgLevels
-            call MatSetValuesBlocked(A(lvl), 1, coarseRows(lvl), 1, coarseRows(lvl), &
-                 blk, ADD_VALUES, ierr)
-         end do
-      end if
-
-
-    end subroutine setBlock
-  end subroutine FormJacobianANK
-
-  subroutine FormJacobianANKTurb
+            end do
+        end if
 
-    use constants
-    use flowVarRefState, only : nw, nwf, nt1, nt2
-    use blockPointers, only : nDom, volRef, il, jl, kl, w, dw, dtl, globalCell
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputIteration, only : turbResScale
-    use inputADjoint, only : viscPC
-    use inputDiscretization, only : approxSA
-    use iteration, only : totalR0, totalR
-    use utils, only : EChk, setPointers
-    use adjointUtils, only :setupStateResidualMatrix, setupStandardKSP
-    use communication
-    implicit none
+        ! PETSc Matrix Assembly begin
+        call MatAssemblyBegin(dRdwPre, MAT_FINAL_ASSEMBLY, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Setup KSP Options
+        preConSide = 'right'
+        localPCType = 'ilu'
+        kspObjectType = 'gmres'
+        globalPCType = 'asm'
+        localOrdering = 'rcm'
+        outerPreConIts = ank_outerPreconIts
+
+        ! Setup the KSP using the same code as used for the adjoint
+        if (ank_subspace < 0) then
+            subspace = ANK_maxIter
+        else
+            subspace = ANK_subspace
+        end if
+
+        ! de-allocate the generic block
+        deallocate (blk)
+
+        ! Complete the matrix assembly.
+        call MatAssemblyEnd(dRdwPre, MAT_FINAL_ASSEMBLY, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (useCoarseMats) then
+            do lvl = 2, agmgLevels
+                call MatAssemblyBegin(A(lvl), MAT_FINAL_ASSEMBLY, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+                call MatAssemblyEnd(A(lvl), MAT_FINAL_ASSEMBLY, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+            end do
+        end if
+
+        if (PreCondType == 'asm') then
+            ! Run the super-dee-duper function to setup the ksp object:
+
+            call setupStandardKSP(ANK_KSP, kspObjectType, subSpace, &
+                                  preConSide, globalPCType, ANK_asmOverlap, outerPreConIts, localPCType, &
+                                  localOrdering, ANK_iluFill, ANK_innerPreConIts)
+        else if (PreCondType == 'mg') then
+
+            ! Setup the MG preconditioner!
+            call setupStandardMultigrid(ANK_KSP, kspObjectType, subSpace, &
+                                        preConSide, ANK_asmOverlap, outerPreConIts, &
+                                        localOrdering, ANK_iluFill)
+        end if
+
+        ! Don't do iterative refinement for the NKSolver.
+        call KSPGMRESSetCGSRefinementType(ANK_KSP, &
+                                          KSP_GMRES_CGS_REFINE_NEVER, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+    contains
+        subroutine setBlock()
+            ! This subroutine is used to set the diagonal time stepping terms
+            ! for the Jacobians in ANK. It is only used to set diagonal blocks
+
+            implicit none
 
-    ! Local Variables
-    character(len=maxStringLen) :: preConSide, localPCType, kspObjectType, globalPCType, localOrdering
-    integer(kind=intType) ::ierr
-    logical :: useAD, usePC, useTranspose, useObjective, tmp, frozenTurb
-    real(kind=realType) :: dtinv, rho
-    integer(kind=intType) :: i, j, k, l, l1, ii, irow, nn, sps, outerPreConIts, subspace
-    real(kind=realType), dimension(:,:), allocatable :: blk
-
-    ! Assemble the approximate PC (fine leve, level 1)
-    useAD = ANK_ADPC
-    frozenTurb = .False.
-    usePC = .True.
-    useTranspose = .False.
-    useObjective = .False.
-    tmp = viscPC ! Save what is in viscPC and set to the NKvarible
-    viscPC = .False.
-
-    if (totalR > ANK_secondOrdSwitchTol*totalR0) &
-       approxSA = .True.
-
-    ! Create the preconditoner matrix
-    call setupStateResidualMatrix(dRdwPreTurb, useAD, usePC, useTranspose, &
-         useObjective, frozenTurb, 1_intType, .True.)
-
-    ! Reset saved value
-    viscPC = tmp
-    approxSA = .False.
-
-    ! Add the contribution from the time step term
-
-    ! Generic block to use while setting values
-    allocate(blk(nStateTurb, nStateTurb))
-
-    ! Zero the block once, since the previous entries will be overwritten
-    ! for each cell, and zero entries will remain zero.
-    blk = zero
-
-    ! For the coupled solver, CFL number for the turbulent variable needs scaling
-    ! because the residuals are scaled, and additional scaling of the time step
-    ! for the turbulence variable might be required.
-    ii = 1
-    do nn=1, nDom
-        do sps=1, nTimeIntervalsSpectral
-            call setPointers(nn,1_intType,sps)
-            do k=2, kl
-                do j=2, jl
-                    do i=2, il
-
-                      ! See the comment for the same calculation above
-                      dtinv = one/(ANK_CFL * dtl(i,j,k) * volRef(i,j,k))
-
-                      do l=nt1, nt2
-
-                        ! l1 is just l that starts with 1 on the turb variables
-                        l1 = l-nt1+1
-
-                        ! For the turbulence variable, additionally scale the cfl.
-                        ! turbresscale is required because the turbulent residuals
-                        ! are scaled with it. Furthermore, the turbulence variable
-                        ! can get a different CFL number. Scale it by turbCFLScale
-                        blk(l1, l1) = dtinv*turbResScale(l1)/ANK_turbCFLScale
-                      end do
-
-                      ! get the global cell index
-                      irow = globalCell(i, j, k)
-
-                      ! Add the contribution to the matrix in PETSc
-                      call setBlock()
+            call MatSetValuesBlocked(dRdwPre, 1, irow, 1, irow, blk, &
+                                     ADD_VALUES, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Extension for setting coarse grids:
+            if (useCoarseMats) then
+                do lvl = 2, agmgLevels
+                    call MatSetValuesBlocked(A(lvl), 1, coarseRows(lvl), 1, coarseRows(lvl), &
+                                             blk, ADD_VALUES, ierr)
+                end do
+            end if
+
+        end subroutine setBlock
+    end subroutine FormJacobianANK
+
+    subroutine FormJacobianANKTurb
+
+        use constants
+        use flowVarRefState, only: nw, nwf, nt1, nt2
+        use blockPointers, only: nDom, volRef, il, jl, kl, w, dw, dtl, globalCell
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputIteration, only: turbResScale
+        use inputADjoint, only: viscPC
+        use inputDiscretization, only: approxSA
+        use iteration, only: totalR0, totalR
+        use utils, only: EChk, setPointers
+        use adjointUtils, only: setupStateResidualMatrix, setupStandardKSP
+        use communication
+        implicit none
+
+        ! Local Variables
+        character(len=maxStringLen) :: preConSide, localPCType, kspObjectType, globalPCType, localOrdering
+        integer(kind=intType) :: ierr
+        logical :: useAD, usePC, useTranspose, useObjective, tmp, frozenTurb
+        real(kind=realType) :: dtinv, rho
+        integer(kind=intType) :: i, j, k, l, l1, ii, irow, nn, sps, outerPreConIts, subspace
+        real(kind=realType), dimension(:, :), allocatable :: blk
+
+        ! Assemble the approximate PC (fine leve, level 1)
+        useAD = ANK_ADPC
+        frozenTurb = .False.
+        usePC = .True.
+        useTranspose = .False.
+        useObjective = .False.
+        tmp = viscPC ! Save what is in viscPC and set to the NKvarible
+        viscPC = .False.
+
+        if (totalR > ANK_secondOrdSwitchTol * totalR0) &
+            approxSA = .True.
+
+        ! Create the preconditoner matrix
+        call setupStateResidualMatrix(dRdwPreTurb, useAD, usePC, useTranspose, &
+                                      useObjective, frozenTurb, 1_intType, .True.)
+
+        ! Reset saved value
+        viscPC = tmp
+        approxSA = .False.
+
+        ! Add the contribution from the time step term
+
+        ! Generic block to use while setting values
+        allocate (blk(nStateTurb, nStateTurb))
+
+        ! Zero the block once, since the previous entries will be overwritten
+        ! for each cell, and zero entries will remain zero.
+        blk = zero
+
+        ! For the coupled solver, CFL number for the turbulent variable needs scaling
+        ! because the residuals are scaled, and additional scaling of the time step
+        ! for the turbulence variable might be required.
+        ii = 1
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+
+                            ! See the comment for the same calculation above
+                            dtinv = one / (ANK_CFL * dtl(i, j, k) * volRef(i, j, k))
+
+                            do l = nt1, nt2
+
+                                ! l1 is just l that starts with 1 on the turb variables
+                                l1 = l - nt1 + 1
+
+                                ! For the turbulence variable, additionally scale the cfl.
+                                ! turbresscale is required because the turbulent residuals
+                                ! are scaled with it. Furthermore, the turbulence variable
+                                ! can get a different CFL number. Scale it by turbCFLScale
+                                blk(l1, l1) = dtinv * turbResScale(l1) / ANK_turbCFLScale
+                            end do
+
+                            ! get the global cell index
+                            irow = globalCell(i, j, k)
+
+                            ! Add the contribution to the matrix in PETSc
+                            call setBlock()
+                        end do
+                    end do
+                end do
+            end do
+        end do
+
+        ! PETSc Matrix Assembly begin
+        call MatAssemblyBegin(dRdwPreTurb, MAT_FINAL_ASSEMBLY, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Setup KSP Options
+        preConSide = 'right'
+        localPCType = 'ilu'
+        kspObjectType = 'gmres'
+        globalPCType = 'asm'
+        localOrdering = 'rcm'
+        outerPreConIts = 1
+        ! Setup the KSP using the same code as used for the adjoint
+        if (ank_subspace < 0) then
+            subspace = ANK_maxIter
+        else
+            subspace = ANK_subspace
+        end if
+
+        ! de-allocate the generic block
+        deallocate (blk)
+
+        ! Complete the matrix assembly.
+        call MatAssemblyEnd(dRdwPreTurb, MAT_FINAL_ASSEMBLY, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call setupStandardKSP(ANK_KSPTurb, kspObjectType, subSpace, &
+                              preConSide, globalPCType, ANK_asmOverlap, outerPreConIts, localPCType, &
+                              localOrdering, ANK_iluFill, ANK_innerPreConIts)
+
+        ! Don't do iterative refinement for the NKSolver.
+        call KSPGMRESSetCGSRefinementType(ANK_KSPTurb, &
+                                          KSP_GMRES_CGS_REFINE_NEVER, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+    contains
+        subroutine setBlock()
+            ! This subroutine is used to set the diagonal time stepping terms
+            ! for the Jacobians in ANK. It is only used to set diagonal blocks
+
+            implicit none
+
+            call MatSetValuesBlocked(dRdwPreTurb, 1, irow, 1, irow, blk, &
+                                     ADD_VALUES, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+        end subroutine setBlock
+    end subroutine FormJacobianANKTurb
+
+    subroutine FormFunction_mf(ctx, inVec, rVec, ierr)
+
+        ! This is the function used for the matrix-free matrix-vector products
+        ! for the GMRES solver used in ANK
+
+        use constants
+        use blockPointers, only: nDom, volRef, il, jl, kl, dw, dtl
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use inputIteration, only: turbResScale
+        use flowvarrefstate, only: nwf, nt1, nt2
+        use NKSolver, only: setRvec
+        use utils, only: setPointers, EChk
+        use blockette, only: blocketteRes
+        implicit none
+
+        ! PETSc Variables
+        PetscFortranAddr ctx(*)
+        Vec inVec, rVec
+        real(kind=realType) :: dtinv, rho
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii, iiRho
+        real(kind=realType), pointer :: rvec_pointer(:)
+        real(kind=realType), pointer :: invec_pointer(:)
+        real(kind=realType), pointer :: wvec_pointer(:)
+        logical :: useViscApprox
+
+        ! get the input vector
+        call setWANK(inVec, 1, nState)
+
+        ! determine if we want the approximate viscous fluxes
+        useViscApprox = (.not. ANK_useFullVisc) .and. ANK_useDissApprox
+
+        ! Determine if we want the turb residuals
+        call blocketteRes(useDissApprox=ANK_useDissApprox, useViscApprox=useViscApprox, &
+                          useTurbRes=ANK_coupled, useStoreWall=.False.)
+
+        ! Copy the residuals to rVec in petsc
+        if (ANK_coupled) then
+            call setRVec(rVec)
+        else
+            call setRVecANK(rVec)
+        end if
+
+        ! Add the contribution from the time stepping term
+
+        call VecGetArrayF90(rVec, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! inVec contains the perturbed state vector
+        call VecGetArrayReadF90(inVec, invec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Also read the wVec to access the un-perturbed state vector.
+        call VecGetArrayReadF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (.not. ANK_coupled) then
+            ! Only flow variables
+            ii = 1
+            do nn = 1, nDom
+                do sps = 1, nTimeIntervalsSpectral
+                    call setPointers(nn, 1_intType, sps)
+                    ! read the density residuals and set local CFL
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                dtinv = one / (ANK_CFL * dtl(i, j, k) * volRef(i, j, k))
+
+                                ! Update the first entry in this block, corresponds to
+                                ! density. Also save this density value.
+                                rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii) * dtinv
+                                rho = wvec_pointer(ii)
+                                iirho = ii
+                                ii = ii + 1
+
+                                ! updates 2nd-4th are velocities. They need to get converted
+                                ! to momentum residuals.
+
+                                rvec_pointer(ii) = rvec_pointer(ii) + dtinv * ( &
+                                                   wvec_pointer(ii) * invec_pointer(iiRho) + &
+                                                   rho * invec_pointer(ii))
+                                ii = ii + 1
+
+                                rvec_pointer(ii) = rvec_pointer(ii) + dtinv * ( &
+                                                   wvec_pointer(ii) * invec_pointer(iiRho) + &
+                                                   rho * invec_pointer(ii))
+                                ii = ii + 1
+
+                                rvec_pointer(ii) = rvec_pointer(ii) + dtinv * ( &
+                                                   wvec_pointer(ii) * invec_pointer(iiRho) + &
+                                                   rho * invec_pointer(ii))
+                                ii = ii + 1
+
+                                ! Finally energy gets the same update
+                                rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii) * dtinv
+                                ii = ii + 1
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        else
+            ! Include time step for turbulence
+            ii = 1
+            do nn = 1, nDom
+                do sps = 1, nTimeIntervalsSpectral
+                    call setPointers(nn, 1_intType, sps)
+                    ! read the density residuals and set local CFL
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                dtinv = one / (ANK_CFL * dtl(i, j, k) * volRef(i, j, k))
+
+                                ! Update the first entry in this block, corresponds to
+                                ! density. Also save this density value.
+                                rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii) * dtinv
+                                rho = wvec_pointer(ii)
+                                iirho = ii
+                                ii = ii + 1
+
+                                ! updates 2nd-4th are velocities. They need to get converted
+                                ! to momentum residuals.
+
+                                rvec_pointer(ii) = rvec_pointer(ii) + dtinv * ( &
+                                                   wvec_pointer(ii) * invec_pointer(iiRho) + &
+                                                   rho * invec_pointer(ii))
+                                ii = ii + 1
+
+                                rvec_pointer(ii) = rvec_pointer(ii) + dtinv * ( &
+                                                   wvec_pointer(ii) * invec_pointer(iiRho) + &
+                                                   rho * invec_pointer(ii))
+                                ii = ii + 1
+
+                                rvec_pointer(ii) = rvec_pointer(ii) + dtinv * ( &
+                                                   wvec_pointer(ii) * invec_pointer(iiRho) + &
+                                                   rho * invec_pointer(ii))
+                                ii = ii + 1
+
+                                ! energy gets the same update
+                                rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii) * dtinv
+                                ii = ii + 1
+
+                                do l = nt1, nt2
+                                    ! turbulence variable needs additional scaling, and it may
+                                    ! get a different CFL number
+                                    rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii) * &
+                                                       dtinv * turbResScale(l - nt1 + 1) / ANK_turbCFLScale
+                                    ii = ii + 1
+                                end do
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end if
+
+        call VecRestoreArrayF90(rVec, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecRestoreArrayReadF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecRestoreArrayReadF90(inVec, invec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! We don't check an error here, so just pass back zero
+        ierr = 0
+
+    end subroutine FormFunction_mf
+
+    subroutine FormFunction_mf_turb(ctx, inVec, rVec, ierr)
+
+        ! This is the function used for the matrix-free matrix-vector products
+        ! for the GMRES solver used in ANK
+
+        use constants
+        use blockPointers, only: nDom, volRef, il, jl, kl, dw, dtl
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use inputIteration, only: turbResScale
+        use flowvarrefstate, only: nwf, nt1, nt2
+        use NKSolver, only: setRvec
+        use utils, only: setPointers, EChk
+        use blockette, only: blocketteRes
+        implicit none
+
+        ! PETSc Variables
+        PetscFortranAddr ctx(*)
+        Vec inVec, rVec
+        real(kind=realType) :: dtinv, rho
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii, iiRho
+        real(kind=realType), pointer :: rvec_pointer(:)
+        real(kind=realType), pointer :: invec_pointer(:)
+
+        ! get the input vector
+        call setWANK(inVec, nt1, nt2)
+
+        call blocketteRes(useFlowRes=.False., useStoreWall=.False.)
+        call setRVecANKTurb(rVec)
+
+        ! Add the contribution from the time stepping term
+
+        call VecGetArrayF90(rVec, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! inVec contains the perturbed state vector
+        call VecGetArrayReadF90(inVec, invec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Include time step for turbulence
+        ii = 1
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+                ! read the density residuals and set local CFL
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            ! needs to be modified
+                            dtinv = one / (ANK_CFL * dtl(i, j, k) * volRef(i, j, k))
+
+                            do l = nt1, nt2
+                                ! turbulence variable needs additional scaling, and it may
+                                ! get a different CFL number
+                                rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii) * &
+                                                   dtinv * turbResScale(l - nt1 + 1) / ANK_turbCFLScale
+                                ii = ii + 1
+                            end do
+                        end do
                     end do
                 end do
             end do
         end do
-    end do
 
-    ! PETSc Matrix Assembly begin
-    call MatAssemblyBegin(dRdwPreTurb, MAT_FINAL_ASSEMBLY, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecRestoreArrayF90(rVec, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecRestoreArrayReadF90(inVec, invec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Setup KSP Options
-    preConSide = 'right'
-    localPCType = 'ilu'
-    kspObjectType = 'gmres'
-    globalPCType = 'asm'
-    localOrdering = 'rcm'
-    outerPreConIts = 1
-    ! Setup the KSP using the same code as used for the adjoint
-    if (ank_subspace < 0) then
-       subspace = ANK_maxIter
-    else
-       subspace = ANK_subspace
-    end if
+        ! We don't check an error here, so just pass back zero
+        ierr = 0
+
+    end subroutine FormFunction_mf_turb
+
+    subroutine computeUnsteadyResANK(omega)
+
+        ! This routine calculates the unsteady residual in a given iteration.
+        ! It needs the following variables/vectors:
+        !
+        !   omega:      This is the step size taken in the last update to the state
+        !   deltaW:     Vector that contains the full update given from the
+        !               Newton/Euler iteration.
+        !   w(:,:,:,:): Should contain the updated state with the given step size
+        !               lambdaLS and given update deltaW
+        !   ANK_CFL:    The CFL number used for this non-linear iteration
+        !   dtl:        Array containing time step values giving a CFL number of 1
+        !               on each cell.
+        !
+        ! The routine calculates the unsteady residual and leaves the result in
+        ! rVec, which was previously used to keep the steady residual only. This
+        ! is done because the norm of this vector can easily be calculated with
+        ! PETSc, however, after the line search, the rVec vector needs to be
+        ! updated to contain only the steady state residuals. This can be done with
+        ! setRVecANK/setRVec, with a dw(:,:,:,:) that is also up to date.
+
+        use constants
+        use blockPointers, only: nDom, volRef, il, jl, kl, w, dw, dtl
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use inputIteration, only: turbResScale
+        use flowvarrefstate, only: nwf, nt1, nt2
+        use NKSolver, only: setRvec
+        use utils, only: setPointers, EChk
+        use blockette, only: blocketteRes
+        implicit none
+
+        real(kind=realType), intent(in) :: omega
+
+        real(kind=realType) :: dtinv, rho, uu, vv, ww
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii, iiRho
+        real(kind=realType), pointer :: rvec_pointer(:)
+        real(kind=realType), pointer :: dvec_pointer(:)
+
+        ! Calculate the steady residuals
+        call blocketteRes(useTurbRes=ANK_coupled)
+        call setRVecANK(rVec)
+
+        ! Add the contribution from the time stepping term
+
+        call VecGetArrayF90(rVec, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! de-allocate the generic block
-    deallocate(blk)
+        ! deltaW contains the full update to the state
+        call VecGetArrayReadF90(deltaW, dvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! TODO AY: check if this routine is fine with complex mode...
+        ! dtl and volume can both have complex values in them
+
+        if (.not. ANK_coupled) then
+            ! Only flow variables
+            ii = 1
+            do nn = 1, nDom
+                do sps = 1, nTimeIntervalsSpectral
+                    call setPointers(nn, 1_intType, sps)
+                    ! read the density residuals and set local CFL
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                dtinv = one / (ANK_CFL * dtl(i, j, k) * volRef(i, j, k))
+
+                                ! Update the first entry in this block, corresponds to
+                                ! density. Also save this density value.
+                                rvec_pointer(ii) = rvec_pointer(ii) - omega * dtinv * dvec_pointer(ii)
+
+                                ! calculate the density in the previous non-linear iteration
+                                rho = w(i, j, k, iRho) + omega * dvec_pointer(ii)
+                                iiRho = ii
+                                ii = ii + 1
+
+                                ! updates 2nd-4th are velocities. They need to get converted
+                                ! to momentum residuals.
+
+                                ! Calculate the u velocity in the previous non-linear iter.
+                                uu = w(i, j, k, ivx) + omega * dvec_pointer(ii)
+
+                                rvec_pointer(ii) = rvec_pointer(ii) - omega * dtinv * ( &
+                                                   uu * dvec_pointer(iiRho) + &
+                                                   rho * dvec_pointer(ii))
+                                ii = ii + 1
+
+                                vv = w(i, j, k, ivx) + omega * dvec_pointer(ii)
+
+                                rvec_pointer(ii) = rvec_pointer(ii) - omega * dtinv * ( &
+                                                   vv * dvec_pointer(iiRho) + &
+                                                   rho * dvec_pointer(ii))
+                                ii = ii + 1
+
+                                ww = w(i, j, k, ivx) + omega * dvec_pointer(ii)
+
+                                rvec_pointer(ii) = rvec_pointer(ii) - omega * dtinv * ( &
+                                                   ww * dvec_pointer(iiRho) + &
+                                                   rho * dvec_pointer(ii))
+                                ii = ii + 1
+
+                                ! Finally energy gets the same update
+                                rvec_pointer(ii) = rvec_pointer(ii) - omega * dtinv * dvec_pointer(ii)
+                                ii = ii + 1
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        else
+            ! Include time step for turbulence
+            ii = 1
+            do nn = 1, nDom
+                do sps = 1, nTimeIntervalsSpectral
+                    call setPointers(nn, 1_intType, sps)
+                    ! read the density residuals and set local CFL
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                dtinv = one / (ANK_CFL * dtl(i, j, k) * volRef(i, j, k))
+
+                                ! Update the first entry in this block, corresponds to
+                                ! density. Also save this density value.
+                                rvec_pointer(ii) = rvec_pointer(ii) - omega * dtinv * dvec_pointer(ii)
+
+                                ! calculate the density in the previous non-linear iteration
+                                rho = w(i, j, k, iRho) + omega * dvec_pointer(ii)
+                                iiRho = ii
+                                ii = ii + 1
+
+                                ! updates 2nd-4th are velocities. They need to get converted
+                                ! to momentum residuals.
+
+                                ! Calculate the u velocity in the previous non-linear iter.
+                                uu = w(i, j, k, ivx) + omega * dvec_pointer(ii)
+
+                                rvec_pointer(ii) = rvec_pointer(ii) - omega * dtinv * ( &
+                                                   uu * dvec_pointer(iiRho) + &
+                                                   rho * dvec_pointer(ii))
+                                ii = ii + 1
+
+                                vv = w(i, j, k, ivx) + omega * dvec_pointer(ii)
+
+                                rvec_pointer(ii) = rvec_pointer(ii) - omega * dtinv * ( &
+                                                   vv * dvec_pointer(iiRho) + &
+                                                   rho * dvec_pointer(ii))
+                                ii = ii + 1
+
+                                ww = w(i, j, k, ivx) + omega * dvec_pointer(ii)
+
+                                rvec_pointer(ii) = rvec_pointer(ii) - omega * dtinv * ( &
+                                                   ww * dvec_pointer(iiRho) + &
+                                                   rho * dvec_pointer(ii))
+                                ii = ii + 1
+
+                                ! Finally energy gets the same update
+                                rvec_pointer(ii) = rvec_pointer(ii) - omega * dtinv * dvec_pointer(ii)
+                                ii = ii + 1
+
+                                do l = nt1, nt2
+                                    ! turbulence variable needs additional scaling, and it may
+                                    ! get a different CFL number
+                                    rvec_pointer(ii) = rvec_pointer(ii) - omega * dvec_pointer(ii) * &
+                                                       dtinv * turbResScale(l - nt1 + 1) / ANK_turbCFLScale
+                                    ii = ii + 1
+                                end do
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end if
+
+        call VecRestoreArrayF90(rVec, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecRestoreArrayReadF90(deltaW, dvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! We don't check an error here, so just pass back zero
+        ierr = 0
+
+    end subroutine computeUnsteadyResANK
+
+    subroutine computeUnsteadyResANKTurb(omega)
+
+        ! This routine calculates the unsteady residual in a given iteration.
+        ! It needs the following variables/vectors:
+        !
+        !   omega:      This is the step size taken in the last update to the state
+        !   deltaWTurb: Vector that contains the full update given from the
+        !               Newton/Euler iteration.
+        !   w(:,:,:,:): Should contain the updated state with the given step size
+        !               lambdaLS and given update deltaW
+        !   ANK_CFL:    The CFL number used for this non-linear iteration
+        !   dtl:        Array containing time step values giving a CFL number of 1
+        !               on each cell.
+        !
+        ! The routine calculates the unsteady residual and leaves the result in
+        ! rVecTurb, which was previously used to keep the steady residual only. This
+        ! is done because the norm of this vector can easily be calculated with
+        ! PETSc, however, after the line search, the rVec vector needs to be
+        ! updated to contain only the steady state residuals. This can be done with
+        ! setRVecANK/setRVec, with a dw(:,:,:,:) that is also up to date.
+
+        use constants
+        use blockPointers, only: nDom, volRef, il, jl, kl, w, dw, dtl
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use inputIteration, only: turbResScale
+        use flowvarrefstate, only: nwf, nt1, nt2
+        use NKSolver, only: setRvec
+        use utils, only: setPointers, EChk
+        use blockette, only: blocketteRes
+        implicit none
+
+        real(kind=realType), intent(in) :: omega
+
+        real(kind=realType) :: dtinv, rho, uu, vv, ww
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii, iiRho
+        real(kind=realType), pointer :: rvec_pointer(:)
+        real(kind=realType), pointer :: dvec_pointer(:)
+
+        ! TODO AY: check if this routine is fine in complex mode...
+        ! dtl and volume can both have complex values in them
+
+        ! Calculate the steady residuals
+        call blocketteRes(useFlowRes=.False.)
+        call setRVecANKTurb(rVecTurb)
+
+        ! Add the contribution from the time stepping term
+
+        call VecGetArrayF90(rVecTurb, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! deltaW contains the full update to the state
+        call VecGetArrayReadF90(deltaWTurb, dvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Include time step for turbulence
+        ii = 1
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+                ! read the density residuals and set local CFL
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            dtinv = one / (ANK_CFL * dtl(i, j, k) * volRef(i, j, k))
+
+                            do l = nt1, nt2
+                                ! turbulence variable needs additional scaling, and it may
+                                ! get a different CFL number
+                                rvec_pointer(ii) = rvec_pointer(ii) - omega * dvec_pointer(ii) * &
+                                                   dtinv * turbResScale(l - nt1 + 1) / ANK_turbCFLScale
+                                ii = ii + 1
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
+
+        call VecRestoreArrayF90(rVecTurb, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecRestoreArrayReadF90(deltaWTurb, dvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! We don't check an error here, so just pass back zero
+        ierr = 0
+
+    end subroutine computeUnsteadyResANKTurb
+
+    subroutine destroyANKsolver
+
+        ! Destroy all the PETSc objects for the Newton-Krylov
+        ! solver.
+
+        use constants
+        use utils, only: EChk
+        use agmg, only: destroyAGMG
+        implicit none
+        integer(kind=intType) :: ierr
+
+        if (ANK_SolverSetup) then
+
+            call MatDestroy(dRdw, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    ! Complete the matrix assembly.
-    call MatAssemblyEnd  (dRdwPreTurb, MAT_FINAL_ASSEMBLY, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            call MatDestroy(dRdwPre, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
+            call VecDestroy(wVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    call setupStandardKSP(ANK_KSPTurb, kspObjectType, subSpace, &
-         preConSide, globalPCType, ANK_asmOverlap, outerPreConIts, localPCType, &
-         localOrdering, ANK_iluFill, ANK_innerPreConIts)
+            call VecDestroy(rVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    ! Don't do iterative refinement for the NKSolver.
-    call KSPGMRESSetCGSRefinementType(ANK_KSPTurb, &
-         KSP_GMRES_CGS_REFINE_NEVER, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            call VecDestroy(deltaW, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-  contains
-    subroutine setBlock()
-      ! This subroutine is used to set the diagonal time stepping terms
-      ! for the Jacobians in ANK. It is only used to set diagonal blocks
+            call VecDestroy(baseRes, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-      implicit none
+            call KSPDestroy(ANK_KSP, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-      call MatSetValuesBlocked(dRdwPreTurb, 1, irow, 1, irow, blk, &
-           ADD_VALUES, ierr)
-      call EChk(ierr, __FILE__, __LINE__)
+            call destroyAGMG()
 
-    end subroutine setBlock
-  end subroutine FormJacobianANKTurb
+            ANK_SolverSetup = .False.
 
-  subroutine FormFunction_mf(ctx, inVec, rVec, ierr)
+            if (ANK_turbSetup) then
 
-    ! This is the function used for the matrix-free matrix-vector products
-    ! for the GMRES solver used in ANK
+                call MatDestroy(dRdwTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use blockPointers, only : nDom, volRef, il, jl, kl, dw, dtl
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use inputIteration, only : turbResScale
-    use flowvarrefstate, only : nwf, nt1, nt2
-    use NKSolver, only : setRvec
-    use utils, only : setPointers, EChk
-    use blockette, only : blocketteRes
-    implicit none
+                call MatDestroy(dRdwPreTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-    ! PETSc Variables
-    PetscFortranAddr ctx(*)
-    Vec     inVec, rVec
-    real(kind=realType) :: dtinv, rho
-    integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii, iiRho
-    real(kind=realType),pointer :: rvec_pointer(:)
-    real(kind=realType),pointer :: invec_pointer(:)
-    real(kind=realType),pointer :: wvec_pointer(:)
-    logical :: useViscApprox
-
-    ! get the input vector
-    call setWANK(inVec,1,nState)
-
-    ! determine if we want the approximate viscous fluxes
-    useViscApprox = (.not. ANK_useFullVisc) .and. ANK_useDissApprox
-
-    ! Determine if we want the turb residuals
-    call blocketteRes(useDissApprox=ANK_useDissApprox, useViscApprox=useViscApprox, &
-         useTurbRes=ANK_coupled, useStoreWall=.False.)
-
-    ! Copy the residuals to rVec in petsc
-    if (ANK_coupled) then
-       call setRVec(rVec)
-    else
-       call setRVecANK(rVec)
-    end if
-
-    ! Add the contribution from the time stepping term
-
-    call VecGetArrayF90(rVec,rvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! inVec contains the perturbed state vector
-    call VecGetArrayReadF90(inVec,invec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Also read the wVec to access the un-perturbed state vector.
-    call VecGetArrayReadF90(wVec,wvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    if (.not. ANK_coupled) then
-       ! Only flow variables
-       ii = 1
-       do nn=1, nDom
-          do sps=1, nTimeIntervalsSpectral
-             call setPointers(nn,1_intType,sps)
-             ! read the density residuals and set local CFL
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      dtinv = one/(ANK_CFL * dtl(i,j,k) * volRef(i,j,k))
-
-                      ! Update the first entry in this block, corresponds to
-                      ! density. Also save this density value.
-                      rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii)*dtinv
-                      rho = wvec_pointer(ii)
-                      iirho = ii
-                      ii = ii + 1
-
-                      ! updates 2nd-4th are velocities. They need to get converted
-                      ! to momentum residuals.
-
-                      rvec_pointer(ii) = rvec_pointer(ii) + dtinv*( &
-                           wvec_pointer(ii)*invec_pointer(iiRho)+&
-                           rho * invec_pointer(ii))
-                      ii = ii + 1
-
-                      rvec_pointer(ii) = rvec_pointer(ii) + dtinv*( &
-                           wvec_pointer(ii)*invec_pointer(iiRho)+&
-                           rho * invec_pointer(ii))
-                      ii = ii + 1
-
-                      rvec_pointer(ii) = rvec_pointer(ii) + dtinv*( &
-                           wvec_pointer(ii)*invec_pointer(iiRho)+&
-                           rho * invec_pointer(ii))
-                      ii = ii + 1
-
-                      ! Finally energy gets the same update
-                      rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii)*dtinv
-                      ii = ii + 1
-                   end do
-                end do
-             end do
-          end do
-       end do
-    else
-       ! Include time step for turbulence
-       ii = 1
-       do nn=1, nDom
-          do sps=1, nTimeIntervalsSpectral
-             call setPointers(nn,1_intType,sps)
-             ! read the density residuals and set local CFL
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      dtinv = one/(ANK_CFL * dtl(i,j,k) * volRef(i,j,k))
-
-                      ! Update the first entry in this block, corresponds to
-                      ! density. Also save this density value.
-                      rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii)*dtinv
-                      rho = wvec_pointer(ii)
-                      iirho = ii
-                      ii = ii + 1
-
-                      ! updates 2nd-4th are velocities. They need to get converted
-                      ! to momentum residuals.
-
-                      rvec_pointer(ii) = rvec_pointer(ii) + dtinv*( &
-                           wvec_pointer(ii)*invec_pointer(iiRho)+&
-                           rho * invec_pointer(ii))
-                      ii = ii + 1
-
-                      rvec_pointer(ii) = rvec_pointer(ii) + dtinv*( &
-                           wvec_pointer(ii)*invec_pointer(iiRho)+&
-                           rho * invec_pointer(ii))
-                      ii = ii + 1
-
-                      rvec_pointer(ii) = rvec_pointer(ii) + dtinv*( &
-                           wvec_pointer(ii)*invec_pointer(iiRho)+&
-                           rho * invec_pointer(ii))
-                      ii = ii + 1
-
-                      ! energy gets the same update
-                      rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii)*dtinv
-                      ii = ii + 1
-
-                      do l=nt1, nt2
-                         ! turbulence variable needs additional scaling, and it may
-                         ! get a different CFL number
-                         rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii)* &
-                              dtinv*turbResScale(l-nt1+1)/ANK_turbCFLScale
-                         ii = ii + 1
-                      end do
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end if
+                call VecDestroy(wVecTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-    call VecRestoreArrayF90(rVec, rvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+                call VecDestroy(rVecTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-    call VecRestoreArrayReadF90(wVec, wvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+                call VecDestroy(deltaWTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-    call VecRestoreArrayReadF90(inVec, invec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+                call VecDestroy(baseResTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-    ! We don't check an error here, so just pass back zero
-    ierr = 0
+                call KSPDestroy(ANK_KSPTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine FormFunction_mf
+                ANK_turbSetup = .False.
+            end if
+        end if
+    end subroutine destroyANKsolver
 
-  subroutine FormFunction_mf_turb(ctx, inVec, rVec, ierr)
+    subroutine setWVecANK(wVec, lStart, lEnd)
+        ! Set the current FLOW variables in the PETSc Vector
 
-    ! This is the function used for the matrix-free matrix-vector products
-    ! for the GMRES solver used in ANK
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, w
+        use inputtimespectral, only: ntimeIntervalsSpectral
+        use utils, only: setPointers, EChk
+        implicit none
 
-    use constants
-    use blockPointers, only : nDom, volRef, il, jl, kl, dw, dtl
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use inputIteration, only : turbResScale
-    use flowvarrefstate, only : nwf, nt1, nt2
-    use NKSolver, only : setRvec
-    use utils, only : setPointers, EChk
-    use blockette, only : blocketteRes
-    implicit none
+        Vec wVec
+        integer(kind=intType), intent(in) :: lStart, lEnd
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
+        real(kind=realType), pointer :: wvec_pointer(:)
 
-    ! PETSc Variables
-    PetscFortranAddr ctx(*)
-    Vec     inVec, rVec
-    real(kind=realType) :: dtinv, rho
-    integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii, iiRho
-    real(kind=realType),pointer :: rvec_pointer(:)
-    real(kind=realType),pointer :: invec_pointer(:)
-
-    ! get the input vector
-    call setWANK(inVec,nt1,nt2)
-
-    call blocketteRes(useFlowRes=.False., useStoreWall=.False.)
-    call setRVecANKTurb(rVec)
-
-    ! Add the contribution from the time stepping term
-
-    call VecGetArrayF90(rVec,rvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! inVec contains the perturbed state vector
-    call VecGetArrayReadF90(inVec,invec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Include time step for turbulence
-    ii = 1
-    do nn=1, nDom
-        do sps=1, nTimeIntervalsSpectral
-            call setPointers(nn,1_intType,sps)
-            ! read the density residuals and set local CFL
-            do k=2, kl
-                do j=2, jl
-                    do i=2, il
-                     ! needs to be modified
-                        dtinv = one/(ANK_CFL * dtl(i,j,k) * volRef(i,j,k))
-
-                        do l=nt1, nt2
-                           ! turbulence variable needs additional scaling, and it may
-                           ! get a different CFL number
-                           rvec_pointer(ii) = rvec_pointer(ii) + invec_pointer(ii)* &
-                           dtinv*turbResScale(l-nt1+1)/ANK_turbCFLScale
-                           ii = ii + 1
+        call VecGetArrayF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+                ! Copy off w to wVec
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = lStart, lEnd
+                                ii = ii + 1
+                                wvec_pointer(ii) = w(i, j, k, l)
+                            end do
                         end do
                     end do
                 end do
             end do
         end do
-    end do
-
-
-    call VecRestoreArrayF90(rVec, rvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
 
-    call VecRestoreArrayReadF90(inVec, invec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecRestoreArrayF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! We don't check an error here, so just pass back zero
-    ierr = 0
+    end subroutine setWVecANK
+
+    subroutine setRVecANK(rVec)
+
+        ! Set the current FLOW residual in dw into the PETSc Vector
+        use constants
+        use blockPointers, only: nDom, volRef, il, jl, kl, dw
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use flowvarrefstate, only: nwf, nt1, nt2
+        use inputIteration, only: turbResScale
+        use utils, only: setPointers, EChk
+        implicit none
+        Vec rVec
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
+        real(kind=realType), pointer :: rvec_pointer(:)
+        real(Kind=realType) :: ovv
+        call VecGetArrayF90(rVec, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+                ! Copy off dw/vol to rVec
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            ovv = one / volRef(i, j, k)
+                            do l = 1, nwf
+                                ii = ii + 1
+                                rvec_pointer(ii) = dw(i, j, k, l) * ovv
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
 
-  end subroutine FormFunction_mf_turb
+        call VecRestoreArrayF90(rVec, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  subroutine computeUnsteadyResANK(omega)
+    end subroutine setRVecANK
+
+    subroutine setRVecANKTurb(rVecTurb)
+
+        ! Set the current Turb residual in dw into the PETSc Vector
+        use constants
+        use blockPointers, only: nDom, volRef, il, jl, kl, dw
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use flowvarrefstate, only: nt1, nt2
+        use inputIteration, only: turbResScale
+        use utils, only: setPointers, EChk
+        implicit none
+        Vec rVecTurb
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
+        real(kind=realType), pointer :: rvec_pointer(:)
+        real(Kind=realType) :: ovv
+        call VecGetArrayF90(rVecTurb, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+                ! Copy off dw/vol to rVec
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            ovv = one / volRef(i, j, k)
+                            do l = nt1, nt2
+                                ii = ii + 1
+                                rvec_pointer(ii) = dw(i, j, k, l) * ovv * turbResScale(1)
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
 
-    ! This routine calculates the unsteady residual in a given iteration.
-    ! It needs the following variables/vectors:
-    !
-    !   omega:      This is the step size taken in the last update to the state
-    !   deltaW:     Vector that contains the full update given from the
-    !               Newton/Euler iteration.
-    !   w(:,:,:,:): Should contain the updated state with the given step size
-    !               lambdaLS and given update deltaW
-    !   ANK_CFL:    The CFL number used for this non-linear iteration
-    !   dtl:        Array containing time step values giving a CFL number of 1
-    !               on each cell.
-    !
-    ! The routine calculates the unsteady residual and leaves the result in
-    ! rVec, which was previously used to keep the steady residual only. This
-    ! is done because the norm of this vector can easily be calculated with
-    ! PETSc, however, after the line search, the rVec vector needs to be
-    ! updated to contain only the steady state residuals. This can be done with
-    ! setRVecANK/setRVec, with a dw(:,:,:,:) that is also up to date.
+        call VecRestoreArrayF90(rVecTurb, rvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use blockPointers, only : nDom, volRef, il, jl, kl, w, dw, dtl
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use inputIteration, only : turbResScale
-    use flowvarrefstate, only : nwf, nt1, nt2
-    use NKSolver, only : setRvec
-    use utils, only : setPointers, EChk
-    use blockette, only : blocketteRes
-    implicit none
+    end subroutine setRVecANKTurb
 
-    real(kind=realType), intent(in) :: omega
+    subroutine setWANK(wVec, lStart, lEnd)
+        ! Get the updated solution from the PETSc Vector
 
-    real(kind=realType) :: dtinv, rho, uu, vv, ww
-    integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii, iiRho
-    real(kind=realType),pointer :: rvec_pointer(:)
-    real(kind=realType),pointer :: dvec_pointer(:)
+        use constants
+        use blockPointers, only: nDom, vol, il, jl, kl, w
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers, EChk
+        implicit none
 
-    ! Calculate the steady residuals
-    call blocketteRes(useTurbRes=ANK_coupled)
-    call setRVecANK(rVec)
+        Vec wVec
+        integer(kind=intType), intent(in) :: lStart, lEnd
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
+        real(kind=realType), pointer :: wvec_pointer(:)
+        call VecGetArrayReadF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Add the contribution from the time stepping term
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = lStart, lEnd
+                                ii = ii + 1
+                                w(i, j, k, l) = wvec_pointer(ii)
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
+        call VecRestoreArrayReadF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecGetArrayF90(rVec,rvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+    end subroutine setWANK
+
+    subroutine physicalityCheckANK(lambdaP)
+
+        use constants
+        use blockPointers, only: ndom, il, jl, kl
+        use flowVarRefState, only: nw, nwf, nt1, nt2
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers, EChk
+        use genericISNAN, only: myisnan
+        use communication, only: ADflow_comm_world
+        implicit none
+
+        ! input variable
+        real(kind=realType), intent(inout) :: lambdaP
+
+        ! local variables
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
+        real(kind=realType), pointer :: wvec_pointer(:)
+        real(kind=realType), pointer :: dvec_pointer(:)
+        real(kind=alwaysRealType) :: lambdaL ! L is for local
+        real(kind=alwaysRealType) :: lambdaP_recv ! to receive the global step
+        real(kind=alwaysRealType) :: ratio
+
+        ! Determine the maximum step size that would yield
+        ! a maximum relative change of ANK_physLSTol in density, and total energy.
+        ! We also check for turbulence, but only limit the step
+        ! for updates that decrease the value of the turbulence working variable.
+
+        ! Initialize the local step size as lambdaP which is an i/o variable
+        lambdaL = real(lambdaP)
+
+        ! First we need to read both the update and the state
+        ! from PETSc because the w in ADFlow currently contains
+        ! the state that is perturbed during the matrix-free
+        ! operations.
+
+        ! wVec contains the state vector
+        call VecGetArrayF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! deltaW contains the full update to the state
-    call VecGetArrayReadF90(deltaW,dvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        ! deltaW contains the full update
+        call VecGetArrayF90(deltaW, dvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
+        ! in decoupled, we just have the flow variables
+        if (.not. ANK_coupled) then
+            ii = 1
+            do nn = 1, nDom
+                do sps = 1, nTimeIntervalsSpectral
+                    call setPointers(nn, 1_intType, sps)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                ! multiply the ratios by ANK_physLSTol to check if the change in a
+                                ! variable is greater than ANK_physLSTol of the variable itself.
+
+                                ! check density
+#ifndef USE_COMPLEX
+                                ! to have the real mode slightly more efficient, we do not check if variables are real
+                                ratio = abs(wvec_pointer(ii) / (dvec_pointer(ii) + eps)) * ANK_physLSTol
+#else
+                                ! We dont care what happens to the complex part of the update because
+                                ! that is a linear system. So again check the real update for the physical
+                                ! line search. Towards the end of the simulation, real part gets smaller and
+                                ! and smaller updates, so this routine will always give a step of 1 which is what
+                                ! we want for the complex parts.
+                                ratio = abs(real(wvec_pointer(ii)) / real(dvec_pointer(ii) + eps)) * real(ANK_physLSTol)
+#endif
+                                lambdaL = min(lambdaL, ratio)
 
-    ! TODO AY: check if this routine is fine with complex mode...
-    ! dtl and volume can both have complex values in them
+                                ! increment by 4 because we want to skip momentum variables
+                                ii = ii + 4
 
-    if (.not. ANK_coupled) then
-       ! Only flow variables
-       ii = 1
-       do nn=1, nDom
-          do sps=1, nTimeIntervalsSpectral
-             call setPointers(nn,1_intType,sps)
-             ! read the density residuals and set local CFL
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      dtinv = one/(ANK_CFL * dtl(i,j,k) * volRef(i,j,k))
+                                ! check energy
+#ifndef USE_COMPLEX
+                                ! see the comment above for the difference between real and complex versions
+                                ratio = abs(wvec_pointer(ii) / (dvec_pointer(ii) + eps)) * ANK_physLSTol
+#else
+                                ratio = abs(real(wvec_pointer(ii)) / real(dvec_pointer(ii) + eps)) * real(ANK_physLSTol)
+#endif
+                                lambdaL = min(lambdaL, ratio)
+                                ii = ii + 1
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+            ! in coupled, we also have the turbulence variables
+        else
+            ii = 1
+            do nn = 1, nDom
+                do sps = 1, nTimeIntervalsSpectral
+                    call setPointers(nn, 1_intType, sps)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+
+                                ! multiply the ratios by ANK_physLSTol to check if the change in a
+                                ! variable is greater than ANK_physLSTol of the variable itself.
+
+                                ! check density
+#ifndef USE_COMPLEX
+                                ! to have the real mode slightly more efficient, we do not check if variables are real
+                                ratio = abs(wvec_pointer(ii) / (dvec_pointer(ii) + eps)) * ANK_physLSTol
+#else
+                                ! We dont care what happens to the complex part of the update because
+                                ! that is a linear system. So again check the real update for the physical
+                                ! line search. Towards the end of the simulation, real part gets smaller and
+                                ! and smaller updates, so this routine will always give a step of 1 which is what
+                                ! we want for the complex parts.
+                                ratio = abs(real(wvec_pointer(ii)) / real(dvec_pointer(ii) + eps)) * real(ANK_physLSTol)
+#endif
+                                lambdaL = min(lambdaL, ratio)
 
-                      ! Update the first entry in this block, corresponds to
-                      ! density. Also save this density value.
-                      rvec_pointer(ii) = rvec_pointer(ii) - omega*dtinv*dvec_pointer(ii)
+                                ! increment by 4 because we want to skip momentum variables
+                                ii = ii + 4
 
-                      ! calculate the density in the previous non-linear iteration
-                      rho = w(i,j,k,iRho) + omega*dvec_pointer(ii)
-                      iiRho = ii
-                      ii = ii + 1
+                                ! check energy
+#ifndef USE_COMPLEX
+                                ! see the comment above for the difference between real and complex versions
+                                ratio = abs(wvec_pointer(ii) / (dvec_pointer(ii) + eps)) * ANK_physLSTol
+#else
+                                ratio = abs(real(wvec_pointer(ii)) / real(dvec_pointer(ii) + eps)) * real(ANK_physLSTol)
+#endif
+                                lambdaL = min(lambdaL, ratio)
+                                ii = ii + 1
 
-                      ! updates 2nd-4th are velocities. They need to get converted
-                      ! to momentum residuals.
+                                ! if coupled ank is used, nstate = nw and this loop is executed
+                                ! if no turbulence variables, this loop will be automatically skipped
+                                ! check turbulence variable
+#ifndef USE_COMPLEX
+                                ratio = (wvec_pointer(ii) / (dvec_pointer(ii) + eps)) * ANK_physLSTolTurb
+#else
+                               ratio = (real(wvec_pointer(ii)) / real(dvec_pointer(ii) + eps)) * real(ANK_physLSTolTurb)
+#endif
+                                ! if the ratio is less than min step, the update is either
+                                ! in the positive direction, therefore we do not clip it,
+                                ! or the update is very limiting, so we just clip the
+                                ! individual update for this cell.
+                                if (ratio .lt. ANK_stepFactor * ANK_stepMin) then
+                                    ! The update was very limiting, so just clip this
+                                    ! individual update and dont change the overall
+                                    ! step size. To select the new update, instead of
+                                    ! clipping to zero, we clip to 1 percent of the original.
+                                    if (ratio .gt. zero) &
+                                        dvec_pointer(ii) = wvec_pointer(ii) * ANK_physLSTolTurb
+
+                                    ! Either case, set the ratio to one. Positive updates
+                                    ! do not limit the step, negative updates below minimum
+                                    ! step were already clipped.
+                                    ratio = one
+                                end if
+                                lambdaL = min(lambdaL, ratio)
+                                ii = ii + 1
+
+                                ! TODO: Do we need physicality checks for the additional turbulence model variables?
+                                !  do l=nt1+1, nt2
+                                !     ii = ii + 1
+                                !  end do
+                                ! do this instead of the above loop for now...
+                                ! Will need to modify this if we want physicality check
+                                ! for the new turb model variables.
+                                ii = ii + (nt2 - nt1)
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end if
 
-                      ! Calculate the u velocity in the previous non-linear iter.
-                      uu = w(i,j,k,ivx) + omega*dvec_pointer(ii)
+        ! Restore the pointers to PETSc vectors
 
-                      rvec_pointer(ii) = rvec_pointer(ii) - omega*dtinv*( &
-                           uu*dvec_pointer(iiRho)+&
-                           rho * dvec_pointer(ii))
-                      ii = ii + 1
+        call VecRestoreArrayF90(wVec, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-                      vv = w(i,j,k,ivx) + omega*dvec_pointer(ii)
+        call VecRestoreArrayF90(deltaW, dvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-                      rvec_pointer(ii) = rvec_pointer(ii) - omega*dtinv*( &
-                           vv*dvec_pointer(iiRho)+&
-                           rho * dvec_pointer(ii))
-                      ii = ii + 1
+        ! Make sure that we did not get any NaN's in the process
+        if (myisnan(lambdaL)) then
+            lambdaL = zero
+        end if
 
-                      ww = w(i,j,k,ivx) + omega*dvec_pointer(ii)
+        ! Finally, communicate the step size across processes and return
+        ! mpi allreduce is not defined for complex numbers with the min operation
+        ! so we will use the lambdaP_recv variable to receive
+        call mpi_allreduce(lambdaL, lambdaP_recv, 1_intType, MPI_DOUBLE, &
+                           mpi_min, ADflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-                      rvec_pointer(ii) = rvec_pointer(ii) - omega*dtinv*( &
-                           ww*dvec_pointer(iiRho)+&
-                           rho * dvec_pointer(ii))
-                      ii = ii + 1
+#ifndef USE_COMPLEX
+        lambdaP = lambdaP_recv
+#else
+        ! finally, as a safety check, purge the complex part of lambda
+        lambdaP = cmplx(lambdaP_recv, 0.0_realType)
+#endif
 
-                      ! Finally energy gets the same update
-                      rvec_pointer(ii) = rvec_pointer(ii) - omega*dtinv*dvec_pointer(ii)
-                      ii = ii + 1
-                   end do
-                end do
-             end do
-          end do
-       end do
-    else
-       ! Include time step for turbulence
-       ii = 1
-       do nn=1, nDom
-          do sps=1, nTimeIntervalsSpectral
-             call setPointers(nn,1_intType,sps)
-             ! read the density residuals and set local CFL
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      dtinv = one/(ANK_CFL * dtl(i,j,k) * volRef(i,j,k))
-
-                      ! Update the first entry in this block, corresponds to
-                      ! density. Also save this density value.
-                      rvec_pointer(ii) = rvec_pointer(ii) - omega*dtinv*dvec_pointer(ii)
-
-                      ! calculate the density in the previous non-linear iteration
-                      rho = w(i,j,k,iRho) + omega*dvec_pointer(ii)
-                      iiRho = ii
-                      ii = ii + 1
-
-                      ! updates 2nd-4th are velocities. They need to get converted
-                      ! to momentum residuals.
-
-                      ! Calculate the u velocity in the previous non-linear iter.
-                      uu = w(i,j,k,ivx) + omega*dvec_pointer(ii)
-
-                      rvec_pointer(ii) = rvec_pointer(ii) - omega*dtinv*( &
-                           uu*dvec_pointer(iiRho)+&
-                           rho * dvec_pointer(ii))
-                      ii = ii + 1
-
-                      vv = w(i,j,k,ivx) + omega*dvec_pointer(ii)
-
-                      rvec_pointer(ii) = rvec_pointer(ii) - omega*dtinv*( &
-                           vv*dvec_pointer(iiRho)+&
-                           rho * dvec_pointer(ii))
-                      ii = ii + 1
-
-                      ww = w(i,j,k,ivx) + omega*dvec_pointer(ii)
-
-                      rvec_pointer(ii) = rvec_pointer(ii) - omega*dtinv*( &
-                           ww*dvec_pointer(iiRho)+&
-                           rho * dvec_pointer(ii))
-                      ii = ii + 1
-
-                      ! Finally energy gets the same update
-                      rvec_pointer(ii) = rvec_pointer(ii) - omega*dtinv*dvec_pointer(ii)
-                      ii = ii + 1
-
-                      do l=nt1, nt2
-                         ! turbulence variable needs additional scaling, and it may
-                         ! get a different CFL number
-                         rvec_pointer(ii) = rvec_pointer(ii) - omega*dvec_pointer(ii)* &
-                              dtinv*turbResScale(l-nt1+1)/ANK_turbCFLScale
-                         ii = ii + 1
-                      end do
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end if
-
-    call VecRestoreArrayF90(rVec, rvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call VecRestoreArrayReadF90(deltaW, dvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! We don't check an error here, so just pass back zero
-    ierr = 0
-
-  end subroutine computeUnsteadyResANK
-
-  subroutine computeUnsteadyResANKTurb(omega)
-
-    ! This routine calculates the unsteady residual in a given iteration.
-    ! It needs the following variables/vectors:
-    !
-    !   omega:      This is the step size taken in the last update to the state
-    !   deltaWTurb: Vector that contains the full update given from the
-    !               Newton/Euler iteration.
-    !   w(:,:,:,:): Should contain the updated state with the given step size
-    !               lambdaLS and given update deltaW
-    !   ANK_CFL:    The CFL number used for this non-linear iteration
-    !   dtl:        Array containing time step values giving a CFL number of 1
-    !               on each cell.
-    !
-    ! The routine calculates the unsteady residual and leaves the result in
-    ! rVecTurb, which was previously used to keep the steady residual only. This
-    ! is done because the norm of this vector can easily be calculated with
-    ! PETSc, however, after the line search, the rVec vector needs to be
-    ! updated to contain only the steady state residuals. This can be done with
-    ! setRVecANK/setRVec, with a dw(:,:,:,:) that is also up to date.
+    end subroutine physicalityCheckANK
+
+    subroutine physicalityCheckANKTurb(lambdaP)
+
+        use constants
+        use blockPointers, only: ndom, il, jl, kl
+        use flowVarRefState, only: nw, nwf, nt1, nt2
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers, EChk
+        use genericISNAN, only: myisnan
+        use communication, only: ADflow_comm_world
+        implicit none
+
+        ! input variable
+        real(kind=realType), intent(inout) :: lambdaP
+
+        ! local variables
+        integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
+        real(kind=realType), pointer :: wvec_pointer(:)
+        real(kind=realType), pointer :: dvec_pointer(:)
+        real(kind=alwaysRealType) :: lambdaL ! L is for local
+        real(kind=alwaysRealType) :: lambdaP_recv ! to receive the global step
+        real(kind=alwaysRealType) :: ratio
+
+        ! Determine the maximum step size that would yield
+        ! a maximum change of 10% in density, total energy,
+        ! and turbulence variable after a KSP solve.
+
+        ! Initialize the local step size as ANK_stepFactor
+        ! because the initial step is likely to be equal to this.
+        lambdaL = real(lambdaP)
+
+        ! First we need to read both the update and the state
+        ! from PETSc because the w in ADFlow currently contains
+        ! the state that is perturbed during the matrix-free
+        ! operations.
+
+        ! wVec contains the state vector
+        call VecGetArrayF90(wVecTurb, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use blockPointers, only : nDom, volRef, il, jl, kl, w, dw, dtl
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use inputIteration, only : turbResScale
-    use flowvarrefstate, only : nwf, nt1, nt2
-    use NKSolver, only : setRvec
-    use utils, only : setPointers, EChk
-    use blockette, only : blocketteRes
-    implicit none
+        ! deltaW contains the full update
+        call VecGetArrayF90(deltaWTurb, dvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    real(kind=realType), intent(in) :: omega
-
-    real(kind=realType) :: dtinv, rho, uu, vv, ww
-    integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii, iiRho
-    real(kind=realType),pointer :: rvec_pointer(:)
-    real(kind=realType),pointer :: dvec_pointer(:)
-
-    ! TODO AY: check if this routine is fine in complex mode...
-    ! dtl and volume can both have complex values in them
-
-    ! Calculate the steady residuals
-    call blocketteRes(useFlowRes=.False.)
-    call setRVecANKTurb(rVecTurb)
-
-    ! Add the contribution from the time stepping term
-
-    call VecGetArrayF90(rVecTurb,rvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! deltaW contains the full update to the state
-    call VecGetArrayReadF90(deltaWTurb,dvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Include time step for turbulence
-    ii = 1
-    do nn=1, nDom
-        do sps=1, nTimeIntervalsSpectral
-            call setPointers(nn,1_intType,sps)
-            ! read the density residuals and set local CFL
-            do k=2, kl
-                do j=2, jl
-                    do i=2, il
-                        dtinv = one/(ANK_CFL * dtl(i,j,k) * volRef(i,j,k))
-
-                        do l=nt1, nt2
-                           ! turbulence variable needs additional scaling, and it may
-                           ! get a different CFL number
-                           rvec_pointer(ii) = rvec_pointer(ii) - omega*dvec_pointer(ii)* &
-                                dtinv*turbResScale(l-nt1+1)/ANK_turbCFLScale
-                           ii = ii + 1
+        ii = 1
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1_intType, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            ! multiply the ratios by 10 to check if the change in a
+                            ! variable is greater than 10% of the variable itself.
+
+                            ! needs to be modified
+                            ! if coupled ank is used, nstate = nw and this loop is executed
+                            ! if no turbulence variables, this loop will be automatically skipped
+                            ! check turbulence variable
+#ifndef USE_COMPLEX
+                            ratio = (wvec_pointer(ii) / (dvec_pointer(ii) + eps)) * ANK_physLSTolTurb
+#else
+                            ratio = (real(wvec_pointer(ii)) / real(dvec_pointer(ii) + eps)) * real(ANK_physLSTolTurb)
+#endif
+                            ! if the ratio is less than min step, the update is either
+                            ! in the positive direction, therefore we do not clip it,
+                            ! or the update is very limiting, so we just clip the
+                            ! individual update for this cell.
+                            if (ratio .lt. ANK_stepFactor * ANK_stepMin) then
+                                ! The update was very limiting, so just clip this
+                                ! individual update and dont change the overall
+                                ! step size. To select the new update, instead of
+                                ! clipping to zero, we clip to 1 percent of the original.
+                                if (ratio .gt. zero) &
+                                    dvec_pointer(ii) = wvec_pointer(ii) * ANK_physLSTolTurb
+
+                                ! Either case, set the ratio to one. Positive updates
+                                ! do not limit the step, negative updates below minimum
+                                ! step were already clipped.
+                                ratio = one
+                            end if
+                            lambdaL = min(lambdaL, ratio)
+                            ii = ii + 1
+
+                            ! TODO: Do we need physicality checks for the additional turbulence model variables?
+                            !  do l=nt1+1, nt2
+                            !     ii = ii + 1
+                            !  end do
+                            ! do this instead of the above loop for now...
+                            ! Will need to modify this if we want physicality check
+                            ! for the new turb model variables.
+                            ii = ii + (nt2 - nt1)
                         end do
                     end do
                 end do
             end do
         end do
-    end do
-
-    call VecRestoreArrayF90(rVecTurb, rvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
 
-    call VecRestoreArrayReadF90(deltaWTurb, dvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        ! Restore the pointers to PETSc vectors
 
-    ! We don't check an error here, so just pass back zero
-    ierr = 0
-
-  end subroutine computeUnsteadyResANKTurb
-
-  subroutine destroyANKsolver
-
-    ! Destroy all the PETSc objects for the Newton-Krylov
-    ! solver.
+        call VecRestoreArrayF90(wVecTurb, wvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use utils, only : EChk
-    use agmg, only : destroyAGMG
-    implicit none
-    integer(kind=intType) :: ierr
+        call VecRestoreArrayF90(deltaWTurb, dvec_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    if (ANK_SolverSetup) then
+        ! Make sure that we did not get any NaN's in the process
+        if (myisnan(lambdaL)) then
+            lambdaL = zero
+        end if
 
-       call MatDestroy(dRdw, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+        ! Finally, communicate the step size across processes and return
+        ! mpi allreduce is not defined for complex numbers with the min operation
+        ! so we will use the lambdaP_recv variable to receive
+        call mpi_allreduce(lambdaL, lambdaP_recv, 1_intType, MPI_DOUBLE, &
+                           mpi_min, ADflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-       call MatDestroy(dRdwPre, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+#ifndef USE_COMPLEX
+        lambdaP = lambdaP_recv
+#else
+        ! finally, as a safety check, purge the complex part of lambda
+        lambdaP = cmplx(lambdaP_recv, 0.0_realType)
+#endif
 
-       call VecDestroy(wVec, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+    end subroutine physicalityCheckANKTurb
+
+    subroutine ANKTurbSolveKSP
+
+        ! This routine solves the turbulence model equation using
+        ! a similar approach to the main ank solver.
+
+        use constants
+        use blockPointers, only: nDom, flowDoms
+        use inputIteration, only: L2conv
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputDiscretization, only: approxSA, orderturb
+        use iteration, only: approxTotalIts, totalR0, totalR, currentLevel
+        use utils, only: EChk, setPointers
+        use genericISNAN, only: myisnan
+        use solverUtils, only: computeUTau
+        use NKSolver, only: getEwTol
+        use BCRoutines, only: applyAllBC, applyAllBC_block
+        use haloExchange, only: whalo2
+        use oversetData, only: oversetPresent
+        use flowVarRefState, only: nw, nwf, nt1, nt2, kPresent, pInfCorr
+        use communication
+        use blockette, only: blocketteRes
+        implicit none
+
+        ! Working Variables
+        integer(kind=intType) :: ierr, maxIt, kspIterations, nn, sps, reason, nHist, iter, feval, orderturbsave
+        integer(kind=intType) :: i, j, k, n
+        real(kind=realType) :: atol, val, v2, factK, gm1
+        real(kind=alwaysRealType) :: rtol, totalR_dummy, linearRes, norm
+        real(kind=alwaysRealType) :: resHist(ANK_maxIter + 1)
+        real(kind=alwaysRealType) :: unsteadyNorm, unsteadyNorm_old
+        real(kind=alwaysRealType) :: linResMonitorTurb, totalRTurb
+        logical :: correctForK, LSFailed
+
+        ! Calculate the residuals and set rVecTurb before the first iteration
+        call blocketteRes(useFlowRes=.False., useStoreWall=.False.)
+        call setRVecANKTurb(rVecTurb)
+
+        do n = 1, ANK_nsubIterTurb
+
+            ! Compute the norm of rVecTurb, which is identical to the
+            ! norm of the unsteady residual vector.
+            call VecNorm(rVecTurb, NORM_2, totalRTurb, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecDestroy(rVec, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Determine if we need to form the Preconditioner
+            if (mod(ANK_iterTurb, ANK_jacobianLag) == 0) then
 
-       call VecDestroy(deltaW, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+                ! Actually form the preconditioner and factorize it.
+                if (myid .eq. 0 .and. ANK_turbDebug) &
+                    write (*, *) "Re-doing turb PC"
+                call FormJacobianANKTurb()
+                ANK_iterTurb = 0
+            end if
 
-       call VecDestroy(baseRes, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Increment the iteration counter
+            ANK_iterTurb = ANK_iterTurb + 1
 
-       call KSPDestroy(ANK_KSP, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Start with trying to take the full step set by the user.
+#ifndef USE_COMPLEX
+            lambdaTurb = ANK_StepFactor
+#else
+            ! make sure we zero out the complex part of the step size
+            lambdaTurb = cmplx(real(ANK_StepFactor), 0.0_realType)
+#endif
 
-       call destroyAGMG()
+            ! Dummy matrix assembly for the matrix-free matrix
+            call MatAssemblyBegin(dRdwTurb, MAT_FINAL_ASSEMBLY, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+            call MatAssemblyEnd(dRdwTurb, MAT_FINAL_ASSEMBLY, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ANK_SolverSetup = .False.
+            if (totalR > ANK_secondOrdSwitchTol * totalR0) then
+                ! Save if second order turbulence is used, we will only use 1st order during ANK (only matters for the coupled solver)
+                approxSA = .True.
+                orderturbsave = orderturb
+                orderturb = firstOrder
+
+                ! Determine the relative convergence for the KSP solver
+                rtol = ANK_rtol ! Just use the input relative tolerance for approximate fluxes
+            else
+                ! If the second order fluxes are used, Eisenstat-Walker algorithm to determine relateive
+                ! convergence tolerance helps with performance.
+                totalR_dummy = totalR
+                call getEWTol(totalR_dummy, totalR_old, rtolLast, rtol)
+
+                ! Use the ANK rtol if E-W algorithm is not picking anything lower
+                rtol = min(ANK_rtol, rtol)
+            end if
 
-       if (ANK_turbSetup) then
+            ! also check if we are using approxSA always
+            if (ANK_useApproxSA) &
+                approxSA = .True.
 
-           call MatDestroy(dRdwTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+            ! Record the total residual and relative convergence for next iteration
+            totalR_old = totalR
+            rtolLast = rtol
 
-           call MatDestroy(dRdwPreTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+            ! Set all tolerances for linear solve:
+#ifndef USE_COMPLEX
+            ! in the real mode, we set the atol slightly lower than the target L2 convergence
+            ! the reasoning for this is detailed in the NKStep subroutine
+            atol = totalR0 * L2Conv * 0.01_realType
+#else
+            ! in complex mode, we want to tightly solve the linear system every time
+            ! again, see the NKStep subroutine for the explanation
+            atol = totalR0 * L2Conv * 1e-6_realType
+#endif
 
-           call VecDestroy(wVecTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+            ! Set the iteration limit to maxIt, determined by which fluxes are used.
+            ! This is because ANK step require 0.1 convergence for stability during initial stages.
+            ! Due to an outdated preconditioner, the KSP solve might take more iterations.
+            ! If this happens, the preconditioner is re-computed and because of this,
+            ! ANK iterations usually don't take more than 2 times number of ANK_subSpace size iterations
+            call KSPSetTolerances(ANK_KSPTurb, rtol, &
+                                  real(atol), real(ANK_divTol), ank_maxIter, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-           call VecDestroy(rVecTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+            call KSPSetResidualHistory(ANK_KSPTurb, resHist, ank_maxIter + 1, PETSC_TRUE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-           call VecDestroy(deltaWTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+            ! Set the BaseVector of the matrix-free matrix:
+            call formFunction_mf_turb(ctx, wVecTurb, baseResTurb, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+            call MatMFFDSetBase(dRdWTurb, wVecTurb, baseResTurb, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-           call VecDestroy(baseResTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+            ! Actually do the Linear Krylov Solve
+            call KSPSolve(ANK_KSPTurb, rVecTurb, deltaWTurb, ierr)
 
-           call KSPDestroy(ANK_KSPTurb, ierr)
-           call EChk(ierr, __FILE__, __LINE__)
+            ! DON'T just check the error. We want to catch error code 72
+            ! which is a floating point error. This is ok, we just reset and
+            ! keep going
+            if (ierr == 72) then
+                ! The convergence check will get the nan
+            else
+                call EChk(ierr, __FILE__, __LINE__)
+            end if
 
-           ANK_turbSetup = .False.
-       end if
-    end if
-  end subroutine destroyANKsolver
+            ! Get the number of iterations from the KSP solver
+            call KSPGetIterationNumber(ANK_KSPTurb, kspIterations, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-  subroutine setWVecANK(wVec,lStart,lEnd)
-    ! Set the current FLOW variables in the PETSc Vector
+            call KSPGetConvergedReason(ANK_KSPTurb, reason, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, w
-    use inputtimespectral, only : ntimeIntervalsSpectral
-    use utils, only : setPointers, EChk
-    implicit none
+            ! Return previously changed variables back to normal, VERY IMPORTANT
+            if (totalR > ANK_secondOrdSwitchTol * totalR0) then
+                ! Replace the second order turbulence option
+                orderturb = orderturbsave
+                approxSA = .False.
+            end if
 
-    Vec   wVec
-    integer(kind=intType),intent(in) :: lStart, lEnd
-    integer(kind=intType) :: ierr,nn,sps,i,j,k,l,ii
-    real(kind=realType),pointer :: wvec_pointer(:)
-
-    call VecGetArrayF90(wVec,wvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-    ii = 0
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, 1_intType, sps)
-          ! Copy off w to wVec
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   do l=lStart, lEnd
-                      ii = ii + 1
-                      wvec_pointer(ii) = w(i, j, k, l)
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
+            ! put back the approxsa flag if we were using it
+            if (ANK_useApproxSA) &
+                approxSA = .False.
 
-    call VecRestoreArrayF90(wVec, wvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+            ! Compute the maximum step that will limit the change
+            ! in SA variable to some user defined fraction.
+            call physicalityCheckANKTurb(lambdaTurb)
+            !if (myid .eq. 0) write(*,*)"physicality check lambda: ",lambdaTurb
+            !lambdaTurb = max(ANK_stepMin, lambdaTurb)
 
-  end subroutine setWVecANK
+            ! Take the uodate after the physicality check.
+            call VecAXPY(wVecTurb, -lambdaTurb, deltaWTurb, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-  subroutine setRVecANK(rVec)
+            ! Set the updated state variables
+            call setWANK(wVecTurb, nt1, nt2)
 
-    ! Set the current FLOW residual in dw into the PETSc Vector
-    use constants
-    use blockPointers, only : nDom, volRef, il, jl, kl, dw
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use flowvarrefstate, only : nwf, nt1, nt2
-    use inputIteration, only : turbResScale
-    use utils, only : setPointers, EChk
-    implicit none
-    Vec    rVec
-    integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
-    real(kind=realType),pointer :: rvec_pointer(:)
-    real(Kind=realType) :: ovv
-    call VecGetArrayF90(rVec,rvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-    ii = 0
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn,1_intType,sps)
-          ! Copy off dw/vol to rVec
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   ovv = one/volRef(i,j,k)
-                   do l=1, nwf
-                      ii = ii + 1
-                      rvec_pointer(ii) = dw(i, j, k, l)*ovv
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
+            ! Compute the unsteady residuals. The actual residuals
+            ! also get calculated in the process, and are stored in
+            ! dw. Make sure to call setRVec/setRVecANK after this
+            ! routine because rVec contains the unsteady residuals,
+            ! and we need the steady residuals for the next iteration.
+            call computeUnsteadyResANKTurb(lambdaTurb)
 
-    call VecRestoreArrayF90(rVec, rvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+            ! Count the number of of residual evaluations outside the KSP solve
+            feval = 1_intType
 
-  end subroutine setRVecANK
+            ! Check if the norm of the rVec is bad:
+            call VecNorm(rVecTurb, NORM_2, unsteadyNorm, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-  subroutine setRVecANKTurb(rVecTurb)
+            ! initialize this outside the ls
+            LSFailed = .False.
 
-    ! Set the current Turb residual in dw into the PETSc Vector
-    use constants
-    use blockPointers, only : nDom, volRef, il, jl, kl, dw
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use flowvarrefstate, only : nt1, nt2
-    use inputIteration, only : turbResScale
-    use utils, only : setPointers, EChk
-    implicit none
-    Vec    rVecTurb
-    integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
-    real(kind=realType),pointer :: rvec_pointer(:)
-    real(Kind=realType) :: ovv
-    call VecGetArrayF90(rVecTurb,rvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-    ii = 0
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn,1_intType,sps)
-          ! Copy off dw/vol to rVec
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   ovv = one/volRef(i,j,k)
-                   do l=nt1, nt2
-                      ii = ii + 1
-                      rvec_pointer(ii) = dw(i, j, k, l)*ovv*turbResScale(1)
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
+            if ((unsteadyNorm > totalRTurb * ANK_unstdyLSTol .or. myisnan(unsteadyNorm))) then
+                ! The unsteady residual is too high or we have a NAN. Do a
+                ! backtracking line search until we get a residual that is lower.
 
-    call VecRestoreArrayF90(rVecTurb, rvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+                LSFailed = .True.
 
-  end subroutine setRVecANKTurb
+                ! Restore the starting (old) w value by adding lamda*deltaW
+                call VecAXPY(wVecTurb, lambdaTurb, deltaWTurb, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-  subroutine setWANK(wVec,lStart,lEnd)
-    ! Get the updated solution from the PETSc Vector
+                ! Set the initial new lambda. This is working off the
+                ! potentially already physically limited step.
+                lambdaTurb = 0.7_realType * lambdaTurb
 
-    use constants
-    use blockPointers, only : nDom, vol, il, jl, kl, w
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers, EChk
-    implicit none
+                backtrack: do iter = 1, 12
 
-    Vec  wVec
-    integer(kind=intType),intent(in) :: lStart, lEnd
-    integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
-    real(kind=realType), pointer :: wvec_pointer(:)
-    call VecGetArrayReadF90(wVec, wvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ii = 0
-    do nn=1, nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn, 1_intType, sps)
-
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   do l=lStart, lEnd
-                      ii = ii + 1
-                      w(i, j, k, l) = wvec_pointer(ii)
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
-    call VecRestoreArrayReadF90(wVec, wvec_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+                    ! Apply the new step
+                    call VecAXPY(wVecTurb, -lambdaTurb, deltaWTurb, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine setWANK
+                    ! Set and recompute
+                    call setWANK(wVecTurb, nt1, nt2)
 
-  subroutine physicalityCheckANK(lambdaP)
+                    ! Compute the unsteady residuals with the current step
+                    call computeUnsteadyResANKTurb(lambdaTurb)
+                    feval = feval + 1
 
-    use constants
-    use blockPointers, only : ndom, il, jl, kl
-    use flowVarRefState, only : nw, nwf, nt1, nt2
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers, EChk
-    use genericISNAN, only : myisnan
-    use communication, only : ADflow_comm_world
-    implicit none
+                    call VecNorm(rVecTurb, NORM_2, unsteadyNorm, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
 
-    ! input variable
-    real(kind=realType) , intent(inout) :: lambdaP
-
-    ! local variables
-    integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
-    real(kind=realType), pointer :: wvec_pointer(:)
-    real(kind=realType), pointer :: dvec_pointer(:)
-    real(kind=alwaysRealType) :: lambdaL ! L is for local
-    real(kind=alwaysRealType) :: lambdaP_recv ! to receive the global step
-    real(kind=alwaysRealType) :: ratio
-
-    ! Determine the maximum step size that would yield
-    ! a maximum relative change of ANK_physLSTol in density, and total energy.
-    ! We also check for turbulence, but only limit the step
-    ! for updates that decrease the value of the turbulence working variable.
-
-    ! Initialize the local step size as lambdaP which is an i/o variable
-    lambdaL = real(lambdaP)
-
-    ! First we need to read both the update and the state
-    ! from PETSc because the w in ADFlow currently contains
-    ! the state that is perturbed during the matrix-free
-    ! operations.
-
-    ! wVec contains the state vector
-    call VecGetArrayF90(wVec,wvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! deltaW contains the full update
-    call VecGetArrayF90(deltaW,dvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! in decoupled, we just have the flow variables
-    if(.not. ANK_coupled) then
-       ii = 1
-       do nn=1, nDom
-          do sps=1, nTimeIntervalsSpectral
-             call setPointers(nn,1_intType,sps)
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      ! multiply the ratios by ANK_physLSTol to check if the change in a
-                      ! variable is greater than ANK_physLSTol of the variable itself.
-
-                      ! check density
-#ifndef USE_COMPLEX
-                      ! to have the real mode slightly more efficient, we do not check if variables are real
-                      ratio = abs(wvec_pointer(ii)/(dvec_pointer(ii)+eps))*ANK_physLSTol
-#else
-                      ! We dont care what happens to the complex part of the update because
-                      ! that is a linear system. So again check the real update for the physical
-                      ! line search. Towards the end of the simulation, real part gets smaller and
-                      ! and smaller updates, so this routine will always give a step of 1 which is what
-                      ! we want for the complex parts.
-                      ratio = abs(real(wvec_pointer(ii)) / real(dvec_pointer(ii) + eps)) * real(ANK_physLSTol)
-#endif
-                      lambdaL = min(lambdaL, ratio)
+                    if (unsteadyNorm > totalRTurb * ANK_unstdyLSTol .or. myisnan(unsteadyNorm)) then
+
+                        ! Restore back to the original wVec
+                        call VecAXPY(wVecTurb, lambdaTurb, deltaWTurb, ierr)
+                        call EChk(ierr, __FILE__, __LINE__)
+
+                        ! Haven't backed off enough yet....keep going
+                        lambdaTurb = lambdaTurb * 0.7_realType
+                    else
+                        ! We have succefssfully reduced the norm
+                        LSFailed = .False.
+                        exit
+                    end if
+                end do backtrack
+
+                if (LSFailed .or. myisnan(unsteadyNorm)) then
+                    ! the line search wasn't much help.
+
+                    if (ANK_CFL > ANK_CFLMin) then
+                        ! the cfl number is not already at the lower limit.  We
+                        ! can cut the CFL back and try again. Set lambda to zero
+                        ! to indicate we never took a step.
+                        lambdaTurb = zero
+                    else
+                        ! cfl is as low as it goes, try taking the step
+                        ! anyway. We can't do  anything else
+                        call VecAXPY(wVecTurb, -lambdaTurb, deltaWTurb, ierr)
+                        call EChk(ierr, __FILE__, __LINE__)
+                    end if
+
+                    ! Set the state vec and compute the new residual
+                    call setWANK(wVecTurb, nt1, nt2)
+                    call blocketteRes(useFlowRes=.False., &
+                                      useStoreWall=.False.)
+                    feval = feval + 1
+                end if
+            end if
 
-                      ! increment by 4 because we want to skip momentum variables
-                      ii = ii + 4
+            call setRvecANKTurb(rVecTurb)
 
-                      ! check energy
-#ifndef USE_COMPLEX
-                      ! see the comment above for the difference between real and complex versions
-                      ratio = abs(wvec_pointer(ii)/(dvec_pointer(ii)+eps))*ANK_physLSTol
-#else
-                      ratio = abs(real(wvec_pointer(ii))/real(dvec_pointer(ii)+eps))* real(ANK_physLSTol)
-#endif
-                      lambdaL = min(lambdaL, ratio)
-                      ii = ii + 1
-                   end do
-                end do
-             end do
-          end do
-       end do
-    ! in coupled, we also have the turbulence variables
-    else
-       ii = 1
-       do nn=1, nDom
-          do sps=1, nTimeIntervalsSpectral
-             call setPointers(nn,1_intType,sps)
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-
-                      ! multiply the ratios by ANK_physLSTol to check if the change in a
-                      ! variable is greater than ANK_physLSTol of the variable itself.
-
-                      ! check density
-#ifndef USE_COMPLEX
-                      ! to have the real mode slightly more efficient, we do not check if variables are real
-                      ratio = abs(wvec_pointer(ii)/(dvec_pointer(ii)+eps))*ANK_physLSTol
-#else
-                      ! We dont care what happens to the complex part of the update because
-                      ! that is a linear system. So again check the real update for the physical
-                      ! line search. Towards the end of the simulation, real part gets smaller and
-                      ! and smaller updates, so this routine will always give a step of 1 which is what
-                      ! we want for the complex parts.
-                      ratio = abs(real(wvec_pointer(ii)) / real(dvec_pointer(ii) + eps)) * real(ANK_physLSTol)
-#endif
-                      lambdaL = min(lambdaL, ratio)
+            linResMonitorTurb = resHist(kspIterations + 1) / resHist(1)
 
-                      ! increment by 4 because we want to skip momentum variables
-                      ii = ii + 4
+            if ((linResMonitorTurb .ge. ANK_rtol .and. &
+                 totalR > ANK_secondOrdSwitchTol * totalR0 .and. &
+                 linResOldTurb .le. ANK_rtol) &
+                !.or. LSFailed) then
+                !            .or. lambdaTurb .le. ANK_stepMin) then
+                .or. (lambdaTurb .eq. zero)) then
 
-                      ! check energy
-#ifndef USE_COMPLEX
-                      ! see the comment above for the difference between real and complex versions
-                      ratio = abs(wvec_pointer(ii)/(dvec_pointer(ii)+eps))*ANK_physLSTol
-#else
-                      ratio = abs(real(wvec_pointer(ii))/real(dvec_pointer(ii)+eps))* real(ANK_physLSTol)
-#endif
-                      lambdaL = min(lambdaL, ratio)
-                      ii = ii + 1
+                ! We should reform the PC since it took longer than we want,
+                ! or we need to adjust the CFL because the last update was bad,
+                ! or convergence since the last PC update was good enough and we
+                ! would benefit from re-calculating the PC.
+                ANK_iterTurb = 0
+            end if
 
-                      ! if coupled ank is used, nstate = nw and this loop is executed
-                      ! if no turbulence variables, this loop will be automatically skipped
-                      ! check turbulence variable
-#ifndef USE_COMPLEX
-                      ratio = (wvec_pointer(ii)/(dvec_pointer(ii)+eps))*ANK_physLSTolTurb
-#else
-                      ratio = ( real(wvec_pointer(ii))/real(dvec_pointer(ii)+eps))*real(ANK_physLSTolTurb)
-#endif
-                      ! if the ratio is less than min step, the update is either
-                      ! in the positive direction, therefore we do not clip it,
-                      ! or the update is very limiting, so we just clip the
-                      ! individual update for this cell.
-                      if (ratio .lt. ANK_stepFactor*ANK_stepMin) then
-                        ! The update was very limiting, so just clip this
-                        ! individual update and dont change the overall
-                        ! step size. To select the new update, instead of
-                        ! clipping to zero, we clip to 1 percent of the original.
-                        if (ratio .gt. zero) &
-                          dvec_pointer(ii) = wvec_pointer(ii)*ANK_physLSTolTurb
-
-                        ! Either case, set the ratio to one. Positive updates
-                        ! do not limit the step, negative updates below minimum
-                        ! step were already clipped.
-                        ratio = one
-                      end if
-                      lambdaL = min(lambdaL, ratio)
-                      ii = ii + 1
-
-                      ! TODO: Do we need physicality checks for the additional turbulence model variables?
-                     !  do l=nt1+1, nt2
-                     !     ii = ii + 1
-                     !  end do
-                      ! do this instead of the above loop for now...
-                      ! Will need to modify this if we want physicality check
-                      ! for the new turb model variables.
-                      ii = ii + (nt2-nt1)
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end if
+            ! update the linear residual for next iteration
+            linResOldTurb = linResMonitorTurb
 
-    ! Restore the pointers to PETSc vectors
+            ! Update step monitor
+            ! stepMonitor = lambda
 
-    call VecRestoreArrayF90(wVec,wvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+            ! Update the approximate iteration counter. The +1 is for the
+            ! residual evaluations.
+            ! approxTotalIts = approxTotalIts + feval + kspIterations
 
-    call VecRestoreArrayF90(deltaW,dvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+            ! Print some info about the turbulence ksp
+            if (myid == 0 .and. ANK_turbDebug) then
+                Write (*, *) "LIN RES, ITER, INITRES, REASON, STEP", linResMonitorTurb, kspIterations, &
+                    reshist(1), reason, lambdaTurb
+            end if
 
-    ! Make sure that we did not get any NaN's in the process
-    if (myisnan(lambdaL)) then
-      lambdaL = zero
-    end if
+        end do
 
-    ! Finally, communicate the step size across processes and return
-    ! mpi allreduce is not defined for complex numbers with the min operation
-    ! so we will use the lambdaP_recv variable to receive
-    call mpi_allreduce(lambdaL, lambdaP_recv, 1_intType, MPI_DOUBLE, &
-         mpi_min, ADflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+    end subroutine ANKTurbSolveKSP
+
+    subroutine ANKStep(firstCall)
+
+        use constants
+        use blockPointers, only: nDom, flowDoms, shockSensor, ib, jb, kb, p, w, gamma
+        use inputPhysics, only: equations
+        use inputIteration, only: L2conv
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputDiscretization, only: lumpedDiss, approxSA, orderturb
+        use iteration, only: approxTotalIts, totalR0, totalR, stepMonitor, linResMonitor, currentLevel, iterType
+        use utils, only: EChk, setPointers
+        use genericISNAN, only: myisnan
+        use turbAPI, only: turbSolveDDADI
+        use solverUtils, only: computeUTau
+        use adjointUtils, only: referenceShockSensor
+        use NKSolver, only: setRVec, getEwTol
+        use initializeFlow, only: setUniformFlow
+        use BCRoutines, only: applyAllBC, applyAllBC_block
+        use haloExchange, only: whalo2
+        use oversetData, only: oversetPresent
+        use flowVarRefState, only: nw, nwf, nt1, nt2, kPresent, pInfCorr
+        use flowUtils, only: computeLamViscosity
+        use turbUtils, only: computeEddyViscosity
+        use communication
+        use blockette, only: blocketteRes
+        implicit none
+
+        ! Input Variables
+        logical, intent(in) :: firstCall
+
+        ! Working Variables
+        integer(kind=intType) :: ierr, maxIt, kspIterations, nn, sps, reason, nHist, iter, feval, orderturbsave
+        integer(kind=intType) :: i, j, k
+        real(kind=realType) :: atol, val, v2, factK, gm1
+        real(kind=alwaysRealType) :: rtol, totalR_dummy, linearRes, norm
+        real(kind=alwaysRealType) :: resHist(ANK_maxIter + 1)
+        real(kind=alwaysRealType) :: unsteadyNorm, unsteadyNorm_old, rel_pcUpdateTol
+        logical :: correctForK, LSFailed
+
+        ! Enter this check if this is the first ANK step OR we are switching to the coupled ANK solver
+        if (firstCall .or. &
+            ((totalR .le. ANK_coupledSwitchTol * totalR0) .and. (.not. ANK_coupled) &
+             .and. (equations .eq. RANSEquations))) then
+
+            ! If this is a first call, we need to change the coupled switch
+            ! to the correct value.
+            if (firstCall) then
+
+                ! Check if we are above or below the coupled switch tolerance
+                if (totalR .le. ANK_coupledSwitchTol * totalR0 .and. equations .eq. RANSEquations) then
+                    ANK_coupled = .True.
+                else
+                    ANK_coupled = .False.
+                end if
 
-#ifndef USE_COMPLEX
-    lambdaP = lambdaP_recv
-#else
-    ! finally, as a safety check, purge the complex part of lambda
-    lambdaP = cmplx(lambdaP_recv, 0.0_realType)
-#endif
+                ! This is not a first call, and the only option left is that,
+                ! we may be switching from uncoupled to coupled
+            else
+                ANK_coupled = .True.
+            end if
 
-  end subroutine physicalityCheckANK
+            ! If we are in here, destroy the solver regardless,
+            ! and set up with the correct coupling mode.
+            call destroyANKSolver()
+            call setupANKSolver()
 
-  subroutine physicalityCheckANKTurb(lambdaP)
+            ! Copy the adflow 'w' into the petsc wVec
+            call setwVecANK(wVec, 1, nstate)
 
-    use constants
-    use blockPointers, only : ndom, il, jl, kl
-    use flowVarRefState, only : nw, nwf, nt1,nt2
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers, EChk
-    use genericISNAN, only : myisnan
-    use communication, only : ADflow_comm_world
-    implicit none
+            ! Evaluate the residual before we start
+            call blocketteRes(useUpdateIntermed=.True.)
+            if (ANK_coupled) then
+                call setRvec(rVec)
+            else
+                call setRVecANK(rVec)
+            end if
 
-    ! input variable
-    real(kind=realType) , intent(inout) :: lambdaP
-
-    ! local variables
-    integer(kind=intType) :: ierr, nn, sps, i, j, k, l, ii
-    real(kind=realType), pointer :: wvec_pointer(:)
-    real(kind=realType), pointer :: dvec_pointer(:)
-    real(kind=alwaysRealType) :: lambdaL ! L is for local
-    real(kind=alwaysRealType) :: lambdaP_recv ! to receive the global step
-    real(kind=alwaysRealType) :: ratio
-
-    ! Determine the maximum step size that would yield
-    ! a maximum change of 10% in density, total energy,
-    ! and turbulence variable after a KSP solve.
-
-    ! Initialize the local step size as ANK_stepFactor
-    ! because the initial step is likely to be equal to this.
-    lambdaL = real(lambdaP)
-
-    ! First we need to read both the update and the state
-    ! from PETSc because the w in ADFlow currently contains
-    ! the state that is perturbed during the matrix-free
-    ! operations.
-
-    ! wVec contains the state vector
-    call VecGetArrayF90(wVecTurb,wvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! deltaW contains the full update
-    call VecGetArrayF90(deltaWTurb,dvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ii = 1
-    do nn=1, nDom
-        do sps=1, nTimeIntervalsSpectral
-            call setPointers(nn,1_intType,sps)
-            do k=2, kl
-                do j=2, jl
-                    do i=2, il
-                        ! multiply the ratios by 10 to check if the change in a
-                        ! variable is greater than 10% of the variable itself.
-
-                        ! needs to be modified
-                        ! if coupled ank is used, nstate = nw and this loop is executed
-                        ! if no turbulence variables, this loop will be automatically skipped
-                        ! check turbulence variable
-#ifndef USE_COMPLEX
-                        ratio = (wvec_pointer(ii)/(dvec_pointer(ii)+eps))* ANK_physLSTolTurb
-#else
-                        ratio = (real(wvec_pointer(ii))/real(dvec_pointer(ii)+eps))* real(ANK_physLSTolTurb)
-#endif
-                        ! if the ratio is less than min step, the update is either
-                        ! in the positive direction, therefore we do not clip it,
-                        ! or the update is very limiting, so we just clip the
-                        ! individual update for this cell.
-                        if (ratio .lt. ANK_stepFactor*ANK_stepMin) then
-                          ! The update was very limiting, so just clip this
-                          ! individual update and dont change the overall
-                          ! step size. To select the new update, instead of
-                          ! clipping to zero, we clip to 1 percent of the original.
-                          if (ratio .gt. zero) &
-                            dvec_pointer(ii) = wvec_pointer(ii)*ANK_physLSTolTurb
-
-                          ! Either case, set the ratio to one. Positive updates
-                          ! do not limit the step, negative updates below minimum
-                          ! step were already clipped.
-                          ratio = one
-                        end if
-                        lambdaL = min(lambdaL, ratio)
-                        ii = ii + 1
-
-                        ! TODO: Do we need physicality checks for the additional turbulence model variables?
-                        !  do l=nt1+1, nt2
-                        !     ii = ii + 1
-                        !  end do
-                        ! do this instead of the above loop for now...
-                        ! Will need to modify this if we want physicality check
-                        ! for the new turb model variables.
-                        ii = ii + (nt2-nt1)
-                    end do
-                end do
-            end do
-        end do
-    end do
+            ! Check if we are using the turb KSP
+            if ((.not. ANK_coupled) .and. (.not. ANK_useTurbDADI) .and. equations == RANSEquations) then
+                call setwVecANK(wVecTurb, nt1, nt2)
+                call setRVecANKTurb(rVecTurb)
+            end if
 
-    ! Restore the pointers to PETSc vectors
+            if (firstCall) then
+                ! Start with the selected fraction of the ANK_StepFactor
+                lambda = ANK_StepFactor
+                lambdaTurb = ANK_stepFactor
+
+                ! Initialize some variables
+                totalR_old = totalR ! Record the old residual for the first iteration
+                rtolLast = ANK_rtol ! Set the previous relative convergence tolerance for the first iteration
+                ANK_CFL = ANK_CFL0 ! only set the initial cfl for the first iteration
+                ANK_CFLMinBase = ANK_CFLMin0
+                totalR_pcUpdate = totalR ! only update the residual at last PC calculation for the first iteration
+                linResOld = zero
+                linResOldTurb = zero
+                ANK_iter = 0
+            end if
+        else
+            ANK_iter = ANK_iter + 1
+        end if
 
-    call VecRestoreArrayF90(wVecTurb,wvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        ! figure out if we want to scale the ANKPCUpdateTol
+        if (.not. ANK_coupled) then
+            rel_pcUpdateTol = ANK_pcUpdateTol
+        else
+            ! for coupled ANK, we dont want to update the PC as frequently,
+            ! so we reduce the relative tol by 4 orders of magnitude,
+            ! *if* we are converged past pc update cutoff wrt free stream already
+            if (totalR / totalR0 .lt. ANK_pcUpdateCutoff) then
+                rel_pcUpdateTol = ANK_pcUpdateTol * 1e-4_realType
+            else
+                ! if we are not that far down converged, use the option directly
+                rel_pcUpdateTol = ANK_pcUpdateTol
+            end if
+        end if
 
-    call VecRestoreArrayF90(deltaWTurb,dvec_pointer,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        ! Compute the norm of rVec, which is identical to the
+        ! norm of the unsteady residual vector.
+        call VecNorm(rVec, NORM_2, unsteadyNorm_old, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Make sure that we did not get any NaN's in the process
-    if (myisnan(lambdaL)) then
-      lambdaL = zero
-    end if
+        ! Determine if if we need to form the Preconditioner
+        if (mod(ANK_iter, ANK_jacobianLag) == 0 .or. totalR / totalR_pcUpdate < rel_pcUpdateTol) then
 
-    ! Finally, communicate the step size across processes and return
-    ! mpi allreduce is not defined for complex numbers with the min operation
-    ! so we will use the lambdaP_recv variable to receive
-    call mpi_allreduce(lambdaL, lambdaP_recv, 1_intType, MPI_DOUBLE, &
-         mpi_min, ADflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+            ! First of all, update the minimum cfl wrt the overall convergence
+            ANK_CFLMin = min(ANK_CFLLimit, ANK_CFLMinBase * (totalR0 / totalR)**ANK_CFLExponent)
 
-#ifndef USE_COMPLEX
-    lambdaP = lambdaP_recv
-#else
-    ! finally, as a safety check, purge the complex part of lambda
-    lambdaP = cmplx(lambdaP_recv, 0.0_realType)
-#endif
+            ! Update the CFL number depending on the outcome of the last iteration
+            if (lambda < ANK_stepMin * ANK_stepFactor) then
 
-  end subroutine physicalityCheckANKTurb
+                ! The step was too small, cut back the cfl
+                ANK_CFL = max(ANK_CFL * ANK_CFLCutback, ANK_CFLMin)
 
-  subroutine ANKTurbSolveKSP
+            else if (totalR < totalR_pcUpdate .and. lambda .ge. ANK_constCFLStep * ANK_stepFactor) then
 
-    ! This routine solves the turbulence model equation using
-    ! a similar approach to the main ank solver.
+                ! total residuals have decreased since the last cfl
+                ! change, or the step was large enough, we can ramp
+                ! the cfl up
+                ANK_CFL = max(min(ANK_CFL * ANK_CFLFactor** &
+                                  ((totalR_pcUpdate - totalR) / totalR_pcUpdate), ANK_CFLLimit), ANK_CFLMin)
 
-    use constants
-    use blockPointers, only : nDom, flowDoms
-    use inputIteration, only : L2conv
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputDiscretization, only : approxSA, orderturb
-    use iteration, only : approxTotalIts, totalR0, totalR, currentLevel
-    use utils, only : EChk, setPointers
-    use genericISNAN, only : myisnan
-    use solverUtils, only : computeUTau
-    use NKSolver, only : getEwTol
-    use BCRoutines, only : applyAllBC, applyAllBC_block
-    use haloExchange, only : whalo2
-    use oversetData, only : oversetPresent
-    use flowVarRefState, only : nw, nwf, nt1,nt2 , kPresent, pInfCorr
-    use communication
-    use blockette, only : blocketteRes
-    implicit none
+            else
 
-    ! Working Variables
-    integer(kind=intType) :: ierr, maxIt, kspIterations, nn, sps, reason, nHist, iter, feval, orderturbsave
-    integer(kind=intType) :: i,j,k,n
-    real(kind=realType) :: atol, val, v2, factK, gm1
-    real(kind=alwaysRealType) :: rtol, totalR_dummy, linearRes, norm
-    real(kind=alwaysRealType) :: resHist(ANK_maxIter+1)
-    real(kind=alwaysRealType) :: unsteadyNorm, unsteadyNorm_old
-    real(kind=alwaysRealType) :: linResMonitorTurb, totalRTurb
-    logical :: correctForK, LSFailed
+                ! The step was not small, but it was not large enough
+                ! to ramp up the cfl, so we keep it constant. Just
+                ! make sure that the cfl does not go below the
+                ! minimum value.
+                ANK_CFL = max(ANK_CFL, ANK_CFLMin)
 
-    ! Calculate the residuals and set rVecTurb before the first iteration
-    call blocketteRes(useFlowRes=.False.,useStoreWall=.False.)
-    call setRVecANKTurb(rVecTurb)
+            end if
 
-    do n = 1,ANK_nsubIterTurb
+            ! Record the total residuals when the PC is calculated.
+            totalR_pcUpdate = totalR
 
-        ! Compute the norm of rVecTurb, which is identical to the
-        ! norm of the unsteady residual vector.
-        call VecNorm(rVecTurb, NORM_2, totalRTurb, ierr)
-        call EChk(ierr, __FILE__, __LINE__)
+            ! Actually form the preconditioner and factorize it.
 
-        ! Determine if we need to form the Preconditioner
-        if (mod(ANK_iterTurb, ANK_jacobianLag) == 0) then
+            call FormJacobianANK()
+            if (totalR .le. ANK_secondOrdSwitchTol * totalR0) then
+                if (ANK_coupled) then
+                    iterType = "  *CSANK"
+                else
+                    iterType = "   *SANK"
+                end if
+            else
+                if (ANK_coupled) then
+                    iterType = "   *CANK"
+                else
+                    iterType = "    *ANK"
+                end if
+            end if
+            ANK_iter = 0
 
-            ! Actually form the preconditioner and factorize it.
-            if (myid .eq. 0 .and. ANK_turbDebug) &
-            write(*,*) "Re-doing turb PC"
-            call FormJacobianANKTurb()
+            ! Also update the turb PC bec. the CFL has changed
             ANK_iterTurb = 0
+        else
+            if (totalR .le. ANK_secondOrdSwitchTol * totalR0) then
+                if (ANK_coupled) then
+                    iterType = "   CSANK"
+                else
+                    iterType = "    SANK"
+                end if
+            else
+                if (ANK_coupled) then
+                    iterType = "    CANK"
+                else
+                    iterType = "     ANK"
+                end if
+            end if
         end if
 
-        ! Increment the iteration counter
-        ANK_iterTurb = ANK_iterTurb + 1
-
         ! Start with trying to take the full step set by the user.
 #ifndef USE_COMPLEX
-        lambdaTurb = ANK_StepFactor
+        lambda = ANK_StepFactor
 #else
         ! make sure we zero out the complex part of the step size
-        lambdaTurb = cmplx(real(ANK_StepFactor), 0.0_realType)
+        lambda = cmplx(real(ANK_StepFactor), 0.0_realType)
 #endif
 
         ! Dummy matrix assembly for the matrix-free matrix
-        call MatAssemblyBegin(dRdwTurb, MAT_FINAL_ASSEMBLY, ierr)
+        call MatAssemblyBegin(dRdw, MAT_FINAL_ASSEMBLY, ierr)
         call EChk(ierr, __FILE__, __LINE__)
-        call MatAssemblyEnd(dRdwTurb, MAT_FINAL_ASSEMBLY, ierr)
+        call MatAssemblyEnd(dRdw, MAT_FINAL_ASSEMBLY, ierr)
         call EChk(ierr, __FILE__, __LINE__)
 
-        if (totalR > ANK_secondOrdSwitchTol*totalR0) then
-            ! Save if second order turbulence is used, we will only use 1st order during ANK (only matters for the coupled solver)
+        ! ============== Flow Update =============
+
+        ! For the approximate solver, we need the approximate flux routines
+        ! We set the variables required for approximate fluxes here and they will be used
+        ! for the matrix-free matrix-vector product routines when the KSP solver calls it
+        ! Very important to set the variables back to their original values after each
+        ! KSP solve because we want actual flux functions when calculating residuals
+        if (totalR > ANK_secondOrdSwitchTol * totalR0) then
+            ! Setting lumped dissipation to true gives approximate fluxes
+            ANK_useDissApprox = .True.
+            lumpedDiss = .True.
             approxSA = .True.
+
+            ! Save the turbulence order, we will only use 1st order during ANK (only matters for the coupled solver)
             orderturbsave = orderturb
             orderturb = firstOrder
 
+            ! Calculate the shock sensor here because the approximate routines do not
+            call referenceShockSensor()
+
             ! Determine the relative convergence for the KSP solver
             rtol = ANK_rtol ! Just use the input relative tolerance for approximate fluxes
+
         else
             ! If the second order fluxes are used, Eisenstat-Walker algorithm to determine relateive
             ! convergence tolerance helps with performance.
@@ -3480,7 +3913,7 @@ subroutine ANKTurbSolveKSP
 
         ! also check if we are using approxSA always
         if (ANK_useApproxSA) &
-           approxSA = .True.
+            approxSA = .True.
 
         ! Record the total residual and relative convergence for next iteration
         totalR_old = totalR
@@ -3490,11 +3923,11 @@ subroutine ANKTurbSolveKSP
 #ifndef USE_COMPLEX
         ! in the real mode, we set the atol slightly lower than the target L2 convergence
         ! the reasoning for this is detailed in the NKStep subroutine
-        atol = totalR0*L2Conv*0.01_realType
+        atol = totalR0 * L2Conv * 0.01_realType
 #else
         ! in complex mode, we want to tightly solve the linear system every time
         ! again, see the NKStep subroutine for the explanation
-        atol = totalR0*L2Conv*1e-6_realType
+        atol = totalR0 * L2Conv * 1e-6_realType
 #endif
 
         ! Set the iteration limit to maxIt, determined by which fluxes are used.
@@ -3502,21 +3935,21 @@ subroutine ANKTurbSolveKSP
         ! Due to an outdated preconditioner, the KSP solve might take more iterations.
         ! If this happens, the preconditioner is re-computed and because of this,
         ! ANK iterations usually don't take more than 2 times number of ANK_subSpace size iterations
-        call KSPSetTolerances(ANK_KSPTurb, rtol, &
-        real(atol), real(ANK_divTol), ank_maxIter, ierr)
+        call KSPSetTolerances(ANK_KSP, rtol, &
+                              real(atol), real(ANK_divTol), ank_maxIter, ierr)
         call EChk(ierr, __FILE__, __LINE__)
 
-        call KSPSetResidualHistory(ANK_KSPTurb, resHist, ank_maxIter+1, PETSC_TRUE, ierr)
+        call KSPSetResidualHistory(ANK_KSP, resHist, ank_maxIter + 1, PETSC_TRUE, ierr)
         call EChk(ierr, __FILE__, __LINE__)
 
         ! Set the BaseVector of the matrix-free matrix:
-        call formFunction_mf_turb(ctx, wVecTurb, baseResTurb, ierr)
+        call formFunction_mf(ctx, wVec, baseRes, ierr)
         call EChk(ierr, __FILE__, __LINE__)
-        call MatMFFDSetBase(dRdWTurb, wVecTurb, baseResTurb, ierr)
+        call MatMFFDSetBase(dRdW, wVec, baseRes, ierr)
         call EChk(ierr, __FILE__, __LINE__)
 
         ! Actually do the Linear Krylov Solve
-        call KSPSolve(ANK_KSPTurb, rVecTurb, deltaWTurb, ierr)
+        call KSPSolve(ANK_KSP, rVec, deltaW, ierr)
 
         ! DON'T just check the error. We want to catch error code 72
         ! which is a floating point error. This is ok, we just reset and
@@ -3528,91 +3961,96 @@ subroutine ANKTurbSolveKSP
         end if
 
         ! Get the number of iterations from the KSP solver
-        call KSPGetIterationNumber(ANK_KSPTurb, kspIterations, ierr)
+        call KSPGetIterationNumber(ANK_KSP, kspIterations, ierr)
         call EChk(ierr, __FILE__, __LINE__)
 
-        call KSPGetConvergedReason(ANK_KSPTurb, reason, ierr)
+        call KSPGetConvergedReason(ANK_KSP, reason, ierr)
         call EChk(ierr, __FILE__, __LINE__)
 
         ! Return previously changed variables back to normal, VERY IMPORTANT
-        if (totalR > ANK_secondOrdSwitchTol*totalR0) then
-            ! Replace the second order turbulence option
-            orderturb = orderturbsave
+        if (totalR > ANK_secondOrdSwitchTol * totalR0) then
+            ! Set ANK_useDissApprox back to False to go back to using actual flux routines
+            ANK_useDissApprox = .False.
+            lumpedDiss = .False.
             approxSA = .False.
+
+            ! Replace turbulence order
+            orderturb = orderturbsave
+
         end if
 
         ! put back the approxsa flag if we were using it
         if (ANK_useApproxSA) &
-          approxSA = .False.
+            approxSA = .False.
 
-        ! Compute the maximum step that will limit the change
-        ! in SA variable to some user defined fraction.
-        call physicalityCheckANKTurb(lambdaTurb)
-        !if (myid .eq. 0) write(*,*)"physicality check lambda: ",lambdaTurb
-        !lambdaTurb = max(ANK_stepMin, lambdaTurb)
+        ! Compute the maximum step that will limit the change in pressure
+        ! and energy to some user defined fraction.
+        call physicalityCheckANK(lambda)
+        if (ANK_CFL .gt. ANK_CFLMin .and. lambda .lt. ANK_stepMin) &
+            lambda = zero
 
         ! Take the uodate after the physicality check.
-        call VecAXPY(wVecTurb, -lambdaTurb, deltaWTurb, ierr)
+        call VecAXPY(wVec, -lambda, deltaW, ierr)
         call EChk(ierr, __FILE__, __LINE__)
 
         ! Set the updated state variables
-        call setWANK(wVecTurb,nt1,nt2)
+        call setWANK(wVec, 1, nState)
 
         ! Compute the unsteady residuals. The actual residuals
         ! also get calculated in the process, and are stored in
         ! dw. Make sure to call setRVec/setRVecANK after this
         ! routine because rVec contains the unsteady residuals,
         ! and we need the steady residuals for the next iteration.
-        call computeUnsteadyResANKTurb(lambdaTurb)
+        call computeUnsteadyResANK(lambda)
 
         ! Count the number of of residual evaluations outside the KSP solve
         feval = 1_intType
 
         ! Check if the norm of the rVec is bad:
-        call VecNorm(rVecTurb, NORM_2, unsteadyNorm, ierr)
+        call VecNorm(rVec, NORM_2, unsteadyNorm, ierr)
         call EChk(ierr, __FILE__, __LINE__)
 
         ! initialize this outside the ls
         LSFailed = .False.
 
-        if ((unsteadyNorm > totalRTurb*ANK_unstdyLSTol .or. myisnan(unsteadyNorm))) then
+        if ((unsteadyNorm > unsteadyNorm_old * ANK_unstdyLSTol .or. myisnan(unsteadyNorm))) then
             ! The unsteady residual is too high or we have a NAN. Do a
             ! backtracking line search until we get a residual that is lower.
 
             LSFailed = .True.
 
             ! Restore the starting (old) w value by adding lamda*deltaW
-            call VecAXPY(wVecTurb, lambdaTurb, deltaWTurb, ierr)
+            call VecAXPY(wVec, lambda, deltaW, ierr)
             call EChk(ierr, __FILE__, __LINE__)
 
             ! Set the initial new lambda. This is working off the
             ! potentially already physically limited step.
-            lambdaTurb = 0.7_realType * lambdaTurb
+            lambda = 0.7_realType * lambda
 
-            backtrack: do iter=1, 12
+            backtrack: do iter = 1, 12
 
                 ! Apply the new step
-                call VecAXPY(wVecTurb, -lambdaTurb, deltaWTurb, ierr)
+                call VecAXPY(wVec, -lambda, deltaW, ierr)
                 call EChk(ierr, __FILE__, __LINE__)
 
                 ! Set and recompute
-                call setWANK(wVecTurb,nt1,nt2)
+                call setWANK(wVec, 1, nState)
 
                 ! Compute the unsteady residuals with the current step
-                call computeUnsteadyResANKTurb(lambdaTurb)
+                call computeUnsteadyResANK(lambda)
                 feval = feval + 1
 
-                call VecNorm(rVecTurb, NORM_2, unsteadyNorm, ierr)
+                call VecNorm(rVec, NORM_2, unsteadyNorm, ierr)
                 call EChk(ierr, __FILE__, __LINE__)
 
-                if (unsteadyNorm > totalRTurb*ANK_unstdyLSTol .or. myisnan(unsteadyNorm)) then
+                if (unsteadyNorm > unsteadyNorm_old * ANK_unstdyLSTol .or. myisnan(unsteadyNorm)) then
 
                     ! Restore back to the original wVec
-                    call VecAXPY(wVecTurb, lambdaTurb, deltaWTurb, ierr)
+                    call VecAXPY(wVec, lambda, deltaW, ierr)
                     call EChk(ierr, __FILE__, __LINE__)
 
                     ! Haven't backed off enough yet....keep going
-                    lambdaTurb = lambdaTurb * 0.7_realType
+                    lambda = lambda * 0.7_realType
                 else
                     ! We have succefssfully reduced the norm
                     LSFailed = .False.
@@ -3627,534 +4065,89 @@ subroutine ANKTurbSolveKSP
                     ! the cfl number is not already at the lower limit.  We
                     ! can cut the CFL back and try again. Set lambda to zero
                     ! to indicate we never took a step.
-                    lambdaTurb = zero
+                    lambda = zero
                 else
                     ! cfl is as low as it goes, try taking the step
                     ! anyway. We can't do  anything else
-                    call VecAXPY(wVecTurb, -lambdaTurb, deltaWTurb, ierr)
+                    call VecAXPY(wVec, -lambda, deltaW, ierr)
                     call EChk(ierr, __FILE__, __LINE__)
                 end if
 
                 ! Set the state vec and compute the new residual
-                call setWANK(wVecTurb,nt1,nt2)
-                call blocketteRes(useFlowRes=.False., &
-                                  useStoreWall=.False.)
+                call setWANK(wVec, 1, nState)
+                if (.not. ANK_coupled) then
+                    call blocketteRes(useTurbRes=.False., useStoreWall=.False.)
+                else
+                    call blocketteRes()
+                end if
                 feval = feval + 1
+            else
             end if
         end if
 
-        call setRvecANKTurb(rVecTurb)
+        ! ============== Turb Update =============
+        if ((.not. ANK_coupled) .and. equations == RANSEquations .and. lambda > zero) then
 
-        linResMonitorTurb = resHist(kspIterations+1)/resHist(1)
+            if (ANK_useTurbDADI) then
+                ! actually do the turbulence update
+                call computeUtau
+                call turbSolveDDADI
+            else
+                call ANKTurbSolveKSP
+            end if
+        end if
 
-        if ((linResMonitorTurb .ge. ANK_rtol .and. &
-            totalR > ANK_secondOrdSwitchTol*totalR0 .and.&
-            linResOldTurb .le. ANK_rtol) &
-            !.or. LSFailed) then
-!            .or. lambdaTurb .le. ANK_stepMin) then
-            .or. (lambdaTurb .eq. zero)) then
+        ! We need to now compute the residual for the next iteration.  We
+        ! also need the to update the update the time step and the
+        ! viscWall pointer stuff
+
+        call blocketteRes(useUpdateIntermed=.True.)
+
+        feval = feval + 1
+        if (ANK_coupled) then
+            call setRvec(rVec)
+        else
+            call setRVecANK(rVec)
+        end if
 
+        linResMonitor = resHist(kspIterations + 1) / resHist(1)
+
+        if ((linResMonitor .ge. ANK_rtol .and. &
+             totalR > ANK_secondOrdSwitchTol * totalR0 .and. &
+             linResOld .le. ANK_rtol) &
+            !.or. LSFailed) then
+            !.or. lambda .le. ANK_stepMin) then
+            .or. (lambda .eq. zero)) then
             ! We should reform the PC since it took longer than we want,
             ! or we need to adjust the CFL because the last update was bad,
             ! or convergence since the last PC update was good enough and we
             ! would benefit from re-calculating the PC.
-            ANK_iterTurb = 0
+            ANK_iter = -1
         end if
 
         ! update the linear residual for next iteration
-        linResOldTurb = linResMonitorTurb
+        linResOld = linResMonitor
 
         ! Update step monitor
-        ! stepMonitor = lambda
+        stepMonitor = lambda
+
+        ! Check if the linear solutions are failing.
+        ! If the lin res is above .5 or so, the solver
+        ! might stall, so we might be better off just
+        ! reducing the CFL and keep going. We Modify
+        ! the CFLMin by altering CFLMinBase.
+        if (linResMonitor .gt. ANK_linResMax) then
+            ! This will adjust MinBase such that we can halve the cfl
+            ! based on the current CFL.
+            ANK_CFLMinBase = ANK_CFLCutback * ANK_CFL * ((totalR / totalR0)**ANK_CFLExponent)
+            ! flags to refresh the Jacobian and cut back the CFL
+            ANK_iter = -1
+            lambda = zero
+        end if
 
         ! Update the approximate iteration counter. The +1 is for the
         ! residual evaluations.
-        ! approxTotalIts = approxTotalIts + feval + kspIterations
-
-                    ! Print some info about the turbulence ksp
-        if (myid == 0 .and. ANK_turbDebug) then
-          Write(*,*) "LIN RES, ITER, INITRES, REASON, STEP", linResMonitorTurb, kspIterations, &
-          reshist(1), reason, lambdaTurb
-        end if
-
-    end do
-
-  end subroutine ANKTurbSolveKSP
+        approxTotalIts = approxTotalIts + feval + kspIterations
 
-  subroutine ANKStep(firstCall)
-
-    use constants
-    use blockPointers, only : nDom, flowDoms, shockSensor, ib, jb, kb, p, w, gamma
-    use inputPhysics, only : equations
-    use inputIteration, only : L2conv
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputDiscretization, only : lumpedDiss, approxSA, orderturb
-    use iteration, only : approxTotalIts, totalR0, totalR, stepMonitor, linResMonitor, currentLevel, iterType
-    use utils, only : EChk, setPointers
-    use genericISNAN, only : myisnan
-    use turbAPI, only : turbSolveDDADI
-    use solverUtils, only : computeUTau
-    use adjointUtils, only : referenceShockSensor
-    use NKSolver, only : setRVec, getEwTol
-    use initializeFlow, only : setUniformFlow
-    use BCRoutines, only : applyAllBC, applyAllBC_block
-    use haloExchange, only : whalo2
-    use oversetData, only : oversetPresent
-    use flowVarRefState, only : nw, nwf, nt1,nt2 , kPresent, pInfCorr
-    use flowUtils, only : computeLamViscosity
-    use turbUtils, only : computeEddyViscosity
-    use communication
-    use blockette, only : blocketteRes
-    implicit none
-
-    ! Input Variables
-    logical, intent(in) :: firstCall
-
-    ! Working Variables
-    integer(kind=intType) :: ierr, maxIt, kspIterations, nn, sps, reason, nHist, iter, feval, orderturbsave
-    integer(kind=intType) :: i,j,k
-    real(kind=realType) :: atol, val, v2, factK, gm1
-    real(kind=alwaysRealType) :: rtol, totalR_dummy, linearRes, norm
-    real(kind=alwaysRealType) :: resHist(ANK_maxIter+1)
-    real(kind=alwaysRealType) :: unsteadyNorm, unsteadyNorm_old, rel_pcUpdateTol
-    logical :: correctForK, LSFailed
-
-    ! Enter this check if this is the first ANK step OR we are switching to the coupled ANK solver
-    if (firstCall .or. &
-       ((totalR .le. ANK_coupledSwitchTol * totalR0) .and. (.not. ANK_coupled) &
-        .and. (equations .eq. RANSEquations))) then
-
-       ! If this is a first call, we need to change the coupled switch
-       ! to the correct value.
-       if (firstCall) then
-
-         ! Check if we are above or below the coupled switch tolerance
-         if (totalR .le. ANK_coupledSwitchTol * totalR0 .and. equations .eq. RANSEquations) then
-           ANK_coupled = .True.
-         else
-           ANK_coupled = .False.
-         end if
-
-       ! This is not a first call, and the only option left is that,
-       ! we may be switching from uncoupled to coupled
-       else
-         ANK_coupled = .True.
-       end if
-
-       ! If we are in here, destroy the solver regardless,
-       ! and set up with the correct coupling mode.
-       call destroyANKSolver()
-       call setupANKSolver()
-
-       ! Copy the adflow 'w' into the petsc wVec
-       call setwVecANK(wVec,1,nstate)
-
-       ! Evaluate the residual before we start
-       call blocketteRes(useUpdateIntermed=.True.)
-       if (ANK_coupled) then
-          call setRvec(rVec)
-       else
-          call setRVecANK(rVec)
-       end if
-
-       ! Check if we are using the turb KSP
-       if ((.not. ANK_coupled) .and. (.not. ANK_useTurbDADI) .and. equations == RANSEquations) then
-          call setwVecANK(wVecTurb,nt1,nt2)
-          call setRVecANKTurb(rVecTurb)
-       end if
-
-       if (firstCall) then
-          ! Start with the selected fraction of the ANK_StepFactor
-          lambda = ANK_StepFactor
-          lambdaTurb = ANK_stepFactor
-
-          ! Initialize some variables
-          totalR_old = totalR ! Record the old residual for the first iteration
-          rtolLast = ANK_rtol ! Set the previous relative convergence tolerance for the first iteration
-          ANK_CFL = ANK_CFL0 ! only set the initial cfl for the first iteration
-          ANK_CFLMinBase = ANK_CFLMin0
-          totalR_pcUpdate = totalR ! only update the residual at last PC calculation for the first iteration
-          linResOld = zero
-          linResOldTurb=zero
-          ANK_iter = 0
-       end if
-    else
-       ANK_iter = ANK_iter + 1
-    end if
-
-    ! figure out if we want to scale the ANKPCUpdateTol
-    if (.not. ANK_coupled) then
-      rel_pcUpdateTol = ANK_pcUpdateTol
-    else
-      ! for coupled ANK, we dont want to update the PC as frequently,
-      ! so we reduce the relative tol by 4 orders of magnitude,
-      ! *if* we are converged past pc update cutoff wrt free stream already
-      if (totalR / totalR0 .lt. ANK_pcUpdateCutoff) then
-         rel_pcUpdateTol = ANK_pcUpdateTol * 1e-4_realType
-      else
-         ! if we are not that far down converged, use the option directly
-         rel_pcUpdateTol = ANK_pcUpdateTol
-      end if
-    end if
-
-    ! Compute the norm of rVec, which is identical to the
-    ! norm of the unsteady residual vector.
-    call VecNorm(rVec, NORM_2, unsteadyNorm_old, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Determine if if we need to form the Preconditioner
-    if (mod(ANK_iter, ANK_jacobianLag) == 0 .or. totalR/totalR_pcUpdate < rel_pcUpdateTol) then
-
-       ! First of all, update the minimum cfl wrt the overall convergence
-       ANK_CFLMin = min(ANK_CFLLimit, ANK_CFLMinBase*(totalR0/totalR)**ANK_CFLExponent)
-
-       ! Update the CFL number depending on the outcome of the last iteration
-       if (lambda < ANK_stepMin * ANK_stepFactor) then
-
-          ! The step was too small, cut back the cfl
-          ANK_CFL = max(ANK_CFL*ANK_CFLCutback, ANK_CFLMin)
-
-       else if (totalR < totalR_pcUpdate .and. lambda .ge. ANK_constCFLStep * ANK_stepFactor) then
-
-          ! total residuals have decreased since the last cfl
-          ! change, or the step was large enough, we can ramp
-          ! the cfl up
-          ANK_CFL = max(min(ANK_CFL * ANK_CFLFactor**&
-               ((totalR_pcUpdate-totalR)/totalR_pcUpdate), ANK_CFLLimit), ANK_CFLMin)
-
-       else
-
-          ! The step was not small, but it was not large enough
-          ! to ramp up the cfl, so we keep it constant. Just
-          ! make sure that the cfl does not go below the
-          ! minimum value.
-          ANK_CFL = max(ANK_CFL, ANK_CFLMin)
-
-       end if
-
-       ! Record the total residuals when the PC is calculated.
-       totalR_pcUpdate = totalR
-
-       ! Actually form the preconditioner and factorize it.
-
-       call FormJacobianANK()
-       if (totalR .le. ANK_secondOrdSwitchTol*totalR0) then
-           if (ANK_coupled) then
-               iterType = "  *CSANK"
-           else
-               iterType = "   *SANK"
-           end if
-       else
-           if (ANK_coupled) then
-               iterType = "   *CANK"
-           else
-               iterType = "    *ANK"
-           end if
-       end if
-       ANK_iter = 0
-
-       ! Also update the turb PC bec. the CFL has changed
-       ANK_iterTurb = 0
-    else
-       if (totalR .le. ANK_secondOrdSwitchTol*totalR0) then
-           if (ANK_coupled) then
-               iterType = "   CSANK"
-           else
-               iterType = "    SANK"
-           end if
-       else
-           if (ANK_coupled) then
-               iterType = "    CANK"
-           else
-               iterType = "     ANK"
-           end if
-       end if
-    end if
-
-    ! Start with trying to take the full step set by the user.
-#ifndef USE_COMPLEX
-    lambda = ANK_StepFactor
-#else
-    ! make sure we zero out the complex part of the step size
-    lambda = cmplx(real(ANK_StepFactor), 0.0_realType)
-#endif
-
-    ! Dummy matrix assembly for the matrix-free matrix
-    call MatAssemblyBegin(dRdw, MAT_FINAL_ASSEMBLY, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    call MatAssemblyEnd(dRdw, MAT_FINAL_ASSEMBLY, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! ============== Flow Update =============
-
-    ! For the approximate solver, we need the approximate flux routines
-    ! We set the variables required for approximate fluxes here and they will be used
-    ! for the matrix-free matrix-vector product routines when the KSP solver calls it
-    ! Very important to set the variables back to their original values after each
-    ! KSP solve because we want actual flux functions when calculating residuals
-    if (totalR > ANK_secondOrdSwitchTol*totalR0) then
-       ! Setting lumped dissipation to true gives approximate fluxes
-       ANK_useDissApprox =.True.
-       lumpedDiss = .True.
-       approxSA = .True.
-
-       ! Save the turbulence order, we will only use 1st order during ANK (only matters for the coupled solver)
-       orderturbsave = orderturb
-       orderturb = firstOrder
-
-       ! Calculate the shock sensor here because the approximate routines do not
-       call referenceShockSensor()
-
-       ! Determine the relative convergence for the KSP solver
-       rtol = ANK_rtol ! Just use the input relative tolerance for approximate fluxes
-
-    else
-       ! If the second order fluxes are used, Eisenstat-Walker algorithm to determine relateive
-       ! convergence tolerance helps with performance.
-       totalR_dummy = totalR
-       call getEWTol(totalR_dummy, totalR_old, rtolLast, rtol)
-
-       ! Use the ANK rtol if E-W algorithm is not picking anything lower
-       rtol = min(ANK_rtol, rtol)
-    end if
-
-    ! also check if we are using approxSA always
-    if (ANK_useApproxSA) &
-      approxSA = .True.
-
-    ! Record the total residual and relative convergence for next iteration
-    totalR_old = totalR
-    rtolLast = rtol
-
-    ! Set all tolerances for linear solve:
-#ifndef USE_COMPLEX
-    ! in the real mode, we set the atol slightly lower than the target L2 convergence
-    ! the reasoning for this is detailed in the NKStep subroutine
-    atol = totalR0*L2Conv*0.01_realType
-#else
-    ! in complex mode, we want to tightly solve the linear system every time
-    ! again, see the NKStep subroutine for the explanation
-    atol = totalR0*L2Conv*1e-6_realType
-#endif
-
-    ! Set the iteration limit to maxIt, determined by which fluxes are used.
-    ! This is because ANK step require 0.1 convergence for stability during initial stages.
-    ! Due to an outdated preconditioner, the KSP solve might take more iterations.
-    ! If this happens, the preconditioner is re-computed and because of this,
-    ! ANK iterations usually don't take more than 2 times number of ANK_subSpace size iterations
-    call KSPSetTolerances(ANK_KSP, rtol, &
-         real(atol), real(ANK_divTol), ank_maxIter, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call KSPSetResidualHistory(ANK_KSP, resHist, ank_maxIter+1, PETSC_TRUE, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set the BaseVector of the matrix-free matrix:
-    call formFunction_mf(ctx, wVec, baseRes, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    call MatMFFDSetBase(dRdW, wVec, baseRes, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Actually do the Linear Krylov Solve
-    call KSPSolve(ANK_KSP, rVec, deltaW, ierr)
-
-    ! DON'T just check the error. We want to catch error code 72
-    ! which is a floating point error. This is ok, we just reset and
-    ! keep going
-    if (ierr == 72) then
-       ! The convergence check will get the nan
-    else
-       call EChk(ierr, __FILE__, __LINE__)
-    end if
-
-    ! Get the number of iterations from the KSP solver
-    call KSPGetIterationNumber(ANK_KSP, kspIterations, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call KSPGetConvergedReason(ANK_KSP, reason, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Return previously changed variables back to normal, VERY IMPORTANT
-    if (totalR > ANK_secondOrdSwitchTol*totalR0) then
-       ! Set ANK_useDissApprox back to False to go back to using actual flux routines
-       ANK_useDissApprox =.False.
-       lumpedDiss = .False.
-       approxSA = .False.
-
-       ! Replace turbulence order
-       orderturb = orderturbsave
-
-    end if
-
-   ! put back the approxsa flag if we were using it
-    if (ANK_useApproxSA) &
-      approxSA = .False.
-
-    ! Compute the maximum step that will limit the change in pressure
-    ! and energy to some user defined fraction.
-    call physicalityCheckANK(lambda)
-    if (ANK_CFL .gt. ANK_CFLMin .and. lambda .lt. ANK_stepMin) &
-        lambda = zero
-
-    ! Take the uodate after the physicality check.
-    call VecAXPY(wVec, -lambda, deltaW, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set the updated state variables
-    call setWANK(wVec,1,nState)
-
-    ! Compute the unsteady residuals. The actual residuals
-    ! also get calculated in the process, and are stored in
-    ! dw. Make sure to call setRVec/setRVecANK after this
-    ! routine because rVec contains the unsteady residuals,
-    ! and we need the steady residuals for the next iteration.
-    call computeUnsteadyResANK(lambda)
-
-    ! Count the number of of residual evaluations outside the KSP solve
-    feval = 1_intType
-
-    ! Check if the norm of the rVec is bad:
-    call VecNorm(rVec, NORM_2, unsteadyNorm, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! initialize this outside the ls
-    LSFailed = .False.
-
-    if ((unsteadyNorm > unsteadyNorm_old*ANK_unstdyLSTol .or. myisnan(unsteadyNorm))) then
-       ! The unsteady residual is too high or we have a NAN. Do a
-       ! backtracking line search until we get a residual that is lower.
-
-       LSFailed = .True.
-
-       ! Restore the starting (old) w value by adding lamda*deltaW
-       call VecAXPY(wVec, lambda, deltaW, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Set the initial new lambda. This is working off the
-       ! potentially already physically limited step.
-       lambda = 0.7_realType * lambda
-
-       backtrack: do iter=1, 12
-
-          ! Apply the new step
-          call VecAXPY(wVec, -lambda, deltaW, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-          ! Set and recompute
-          call setWANK(wVec,1,nState)
-
-          ! Compute the unsteady residuals with the current step
-          call computeUnsteadyResANK(lambda)
-          feval = feval + 1
-
-          call VecNorm(rVec, NORM_2, unsteadyNorm, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-          if (unsteadyNorm > unsteadyNorm_old*ANK_unstdyLSTol .or. myisnan(unsteadyNorm)) then
-
-             ! Restore back to the original wVec
-             call VecAXPY(wVec, lambda, deltaW, ierr)
-             call EChk(ierr, __FILE__, __LINE__)
-
-             ! Haven't backed off enough yet....keep going
-             lambda = lambda * 0.7_realType
-          else
-             ! We have succefssfully reduced the norm
-             LSFailed = .False.
-             exit
-          end if
-       end do backtrack
-
-       if (LSFailed .or. myisnan(unsteadyNorm)) then
-          ! the line search wasn't much help.
-
-          if (ANK_CFL > ANK_CFLMin) then
-             ! the cfl number is not already at the lower limit.  We
-             ! can cut the CFL back and try again. Set lambda to zero
-             ! to indicate we never took a step.
-             lambda = zero
-          else
-             ! cfl is as low as it goes, try taking the step
-             ! anyway. We can't do  anything else
-             call VecAXPY(wVec, -lambda, deltaW, ierr)
-             call EChk(ierr, __FILE__, __LINE__)
-          end if
-
-          ! Set the state vec and compute the new residual
-          call setWANK(wVec,1,nState)
-          if (.not. ANK_coupled) then
-             call blocketteRes(useTurbRes=.False., useStoreWall=.False.)
-          else
-             call blocketteRes()
-          end if
-          feval = feval + 1
-       else
-       end if
-    end if
-
-    ! ============== Turb Update =============
-    if ((.not. ANK_coupled) .and. equations==RANSEquations .and. lambda > zero) then
-
-        if (ANK_useTurbDADI) then
-            ! actually do the turbulence update
-            call computeUtau
-            call turbSolveDDADI
-        else
-            call ANKTurbSolveKSP
-        end if
-    end if
-
-    ! We need to now compute the residual for the next iteration.  We
-    ! also need the to update the update the time step and the
-    ! viscWall pointer stuff
-
-    call blocketteRes(useUpdateIntermed=.True.)
-
-    feval = feval + 1
-    if (ANK_coupled) then
-       call setRvec(rVec)
-    else
-       call setRVecANK(rVec)
-    end if
-
-    linResMonitor = resHist(kspIterations+1)/resHist(1)
-
-    if ((linResMonitor .ge. ANK_rtol .and. &
-         totalR > ANK_secondOrdSwitchTol*totalR0 .and.&
-         linResOld .le. ANK_rtol) &
-         !.or. LSFailed) then
-         !.or. lambda .le. ANK_stepMin) then
-         .or. (lambda .eq. zero)) then
-       ! We should reform the PC since it took longer than we want,
-       ! or we need to adjust the CFL because the last update was bad,
-       ! or convergence since the last PC update was good enough and we
-       ! would benefit from re-calculating the PC.
-       ANK_iter = -1
-    end if
-
-    ! update the linear residual for next iteration
-    linResOld = linResMonitor
-
-    ! Update step monitor
-    stepMonitor = lambda
-
-    ! Check if the linear solutions are failing.
-    ! If the lin res is above .5 or so, the solver
-    ! might stall, so we might be better off just
-    ! reducing the CFL and keep going. We Modify
-    ! the CFLMin by altering CFLMinBase.
-    if (linResMonitor .gt. ANK_linResMax) then
-      ! This will adjust MinBase such that we can halve the cfl
-      ! based on the current CFL.
-      ANK_CFLMinBase = ANK_CFLCutback*ANK_CFL*((totalR/totalR0)**ANK_CFLExponent)
-      ! flags to refresh the Jacobian and cut back the CFL
-      ANK_iter = -1
-      lambda = zero
-    end if
-
-    ! Update the approximate iteration counter. The +1 is for the
-    ! residual evaluations.
-    approxTotalIts = approxTotalIts + feval + kspIterations
-
-  end subroutine ANKStep
+    end subroutine ANKStep
 end module ANKSolver
diff --git a/src/NKSolver/blockette.F90 b/src/NKSolver/blockette.F90
index 8293db443..df33e7723 100644
--- a/src/NKSolver/blockette.F90
+++ b/src/NKSolver/blockette.F90
@@ -1,6867 +1,6850 @@
 module blockette
 
-  use constants
-  ! This temporary module contains all cache-blocked code. It also
-  ! contains the statically allocated variables on which the blocked
-  ! code operates.
-
-  ! Dummy Block dimensions
-  integer(kind=intType), parameter :: BS=8
-  integer(kind=intType), parameter :: bbil=BS+1, bbjl=BS+1, bbkl=BS+1
-  integer(kind=intType), parameter :: bbie=BS+2, bbje=BS+2, bbke=BS+2
-  integer(kind=intType), parameter :: bbib=BS+3, bbjb=BS+3, bbkb=BS+3
-
-  ! Actual dimensions to execute
-  integer(kind=intType) :: nx, ny, nz, il, jl, kl, ie, je, ke, ib, jb, kb
-
-  ! Variables to track transferring variables between blockettes
-  integer(kind=intType) :: singleHaloStart, doubleHaloStart, nodeStart
-
-  ! Current indices into the original block
-  integer(kind=intType) :: ii, jj, kk
-
-  ! Double halos
-  real(kind=realType), dimension(0:bbib, 0:bbjb, 0:bbkb, 1:6) :: w
-  real(kind=realType), dimension(0:bbib, 0:bbjb, 0:bbkb) :: P, gamma
-  real(kind=realType), dimension(0:bbib, 0:bbjb, 0:bbkb) :: ss ! Entropy
-
-  ! Single halos
-  real(kind=realType), dimension(0:bbie, 0:bbje, 0:bbke, 3) :: x
-  real(kind=realType), dimension(1:bbie, 1:bbje, 1:bbke):: rlv, rev, vol, aa
-  real(kind=realType), dimension(1:bbie, 1:bbje, 1:bbke) :: radI, radJ, radK, dtl
-  real(kind=realType),dimension(1:bbie, 1:bbje, 1:bbke, 3) :: dss ! Shock sensor
-
-  ! No halos
-  real(kind=realType), dimension(2:bbil, 2:bbjl, 2:bbkl) :: volRef, d2wall
-  integer(kind=intType), dimension(2:bbil, 2:bbjl, 2:bbkl) :: iblank
-
-  ! Face Porosities
-  integer(kind=porType), dimension(1:bbil, 2:bbjl, 2:bbkl) :: porI
-  integer(kind=porType), dimension(2:bbil, 1:bbjl, 2:bbkl) :: porJ
-  integer(kind=porType), dimension(2:bbil, 2:bbjl, 1:bbkl) :: porK
-
-  ! Single halos (only owned cells significant)
-  real(kind=realType), dimension(1:bbie, 1:bbje, 1:bbke, 1:5) :: fw
-  real(kind=realType), dimension(1:bbie, 1:bbje, 1:bbke, 1:6) :: dw
-
-  ! Face projected areas
-  real(kind=realType), dimension(0:bbie, 1:bbje, 1:bbke, 3) :: sI
-  real(kind=realType), dimension(1:bbie, 0:bbje, 1:bbke, 3) :: sJ
-  real(kind=realType), dimension(1:bbie, 1:bbje, 0:bbke, 3) :: sK
-
-  ! Face velocities
-  real(kind=realType), dimension(0:bbie, 1:bbje, 1:bbke) :: sFaceI
-  real(kind=realType), dimension(1:bbie, 0:bbje, 1:bbke) :: sFaceJ
-  real(kind=realType), dimension(1:bbie, 1:bbje, 0:bbke) :: sFaceK
-
-  ! Nodal gradients
-  real(kind=realType), dimension(1:bbil, 1:bbjl, 1:bbkl) :: ux, uy, uz
-  real(kind=realType), dimension(1:bbil, 1:bbjl, 1:bbkl) :: vx, vy, vz
-  real(kind=realType), dimension(1:bbil, 1:bbjl, 1:bbkl) :: wx, wy, wz
-  real(kind=realType), dimension(1:bbil, 1:bbjl, 1:bbkl) :: qx, qy, qz
-
-  ! Make *all* of these variables tread-private
-  !$OMP THREADPRIVATE(nx, ny, nz, il, jl, kl, ie, je, ke, ib, jb, kb)
-  !$OMP THREADPRIVATE(w, p, gamma, ss, x, rlv, rev, vol, aa, radI, radJ, radK)
-  !$OMP THREADPRIVATE(dss, volRef, d2wall, iblank, porI, porJ, porK, fw, dw)
-  !$OMP THREADPRIVATE(sI, sJ, sK, ux, uy, uz, vx, vy, vz, wx, wy, wz, qx, qy, qz)
-contains
+    use constants
+    ! This temporary module contains all cache-blocked code. It also
+    ! contains the statically allocated variables on which the blocked
+    ! code operates.
 
-  subroutine blocketteRes(useDissApprox, useViscApprox, useUpdateIntermed, useFlowRes, useTurbRes, useSpatial, &
-       useStoreWall, famLists, funcValues, forces, bcDataNames, bcDataValues, bcDataFamLists)
+    ! Dummy Block dimensions
+    integer(kind=intType), parameter :: BS = 8
+    integer(kind=intType), parameter :: bbil = BS + 1, bbjl = BS + 1, bbkl = BS + 1
+    integer(kind=intType), parameter :: bbie = BS + 2, bbje = BS + 2, bbke = BS + 2
+    integer(kind=intType), parameter :: bbib = BS + 3, bbjb = BS + 3, bbkb = BS + 3
 
-    ! Copy the values from blockPointers (assumed set) into the
-    ! blockette
+    ! Actual dimensions to execute
+    integer(kind=intType) :: nx, ny, nz, il, jl, kl, ie, je, ke, ib, jb, kb
 
-    use constants
-    use block, only : nDom
-    use BCRoutines, only : applyallBC_block
-    use bcdata, only : setBCData, setBCDataFineGrid
-    use turbbcRoutines, only : applyallTurbBCthisblock, bcTurbTreatment
-    use inputPhysics , only : turbProd, equationMode, equations, turbModel
-    use inputDiscretization, only : lowSpeedPreconditioner, useApproxWallDistance, useBlockettes
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowUtils, only : computeLamViscosity, computePressureSimple, adjustInflowAngle
-    use flowVarRefState, only : nwf, nw, nt1, nt2
-    use initializeFlow, only : referenceState
-    use section, only: sections, nSections
-    use iteration, only : rFil, currentLevel
-    use haloExchange, only : exchangeCoor, whalo2
-    use wallDistance, only : updateWallDistancesQuickly
-    use utils, only : setPointers, EChk
-    use turbUtils, only : computeEddyViscosity
-    use residuals, only : sourceTerms_block
-    use surfaceIntegrations, only : getSolution
-    use adjointExtra, only : volume_block, metric_block, boundaryNormals, xhalo_block
-    use oversetData, only : oversetPresent
-    use inputOverset, only : oversetUpdateMode
-    use oversetCommUtilities, only : updateOversetConnectivity
-    use actuatorRegionData, only : nActuatorRegions
-    implicit none
-
-    ! Input/Output
-    logical, intent(in), optional :: useDissApprox, useViscApprox, useUpdateIntermed, useFlowRes
-    logical, intent(in), optional :: useTurbRes, useSpatial, useStoreWall
-    integer(kind=intType), optional, dimension(:, :), intent(in) :: famLists
-    real(kind=realType), optional, dimension(:, :), intent(out) :: funcValues
-    character, optional, dimension(:, :), intent(in) :: bcDataNames
-    real(kind=realType), optional, dimension(:), intent(in) :: bcDataValues
-    integer(kind=intType), optional, dimension(:, :) :: bcDataFamLists
-    real(kind=realType), intent(out), optional, dimension(:, :, :) :: forces
-
-    ! Misc
-    logical :: dissApprox, viscApprox, updateIntermed, flowRes, turbRes, spatial, storeWall
-    integer(kind=intType) :: nn, sps, fSize, lstart, lend, iRegion
-    real(kind=realType) ::  pLocal
-
-    ! Set the defaults. The default is to compute the full, exact,
-    ! RANS residual without updating the spatial values or the local
-    ! timeStep.
-    dissApprox = .False.
-    viscApprox = .False.
-    ! Update intermediate flag is to copy out intermediate variables
-    ! that are computed during the blockette residual computation from
-    ! blockette memory back to the main memory. These are the time
-    ! step, spectral radii for all cases, and nodal gradients and
-    ! speed of sound squared for viscous simulations. The regular
-    ! "block" residuals do not need to copy out these since they
-    ! are already computed in place. For the block residual, this
-    ! flag only determines if we update the time step along with
-    ! the spectral radii.
-    updateIntermed = .False.
-    flowRes = .True.
-    turbRes = .True.
-    spatial = .False.
-    storeWall = .True.
-
-    ! Parse the input variables
-    if (present(useDissApprox)) then
-       dissApprox = useDissApprox
-    end if
-
-    if (present(useViscApprox)) then
-       viscApprox = useViscApprox
-    end if
-
-    if (present(useUpdateIntermed)) then
-      updateIntermed = useUpdateIntermed
-    end if
-
-    if (present(useFlowRes)) then
-       flowRes = useFlowRes
-    end if
-
-    if (present(useTurbRes)) then
-       turbRes = useTurbRes
-    end if
-
-    if (present(useSpatial)) then
-       spatial = useSpatial
-    end if
-
-    if (present(useStoreWall)) then
-       storeWall = useStoreWall
-    end if
-
-    ! Spatial-only updates first
-    if (spatial) then
-       call adjustInflowAngle()
-
-       ! Update all the BCData
-       call referenceState
-       if (present(bcDataNames)) then
-          do sps=1,nTimeIntervalsSpectral
-             call setBCData(bcDataNames, bcDataValues, bcDataFamLists, sps, &
-                  size(bcDataValues), size(bcDataFamLIsts, 2))
-          end do
-          call setBCDataFineGrid(.true.)
-       end if
-
-       do sps=1, nTimeIntervalsSpectral
-          do nn=1, nDom
-             call setPointers(nn, currentLevel, sps)
-             call xhalo_block()
-          end do
-       end do
-
-       ! Now exchange the coordinates (fine level only)
-       call exchangecoor(1)
-
-       do sps=1, nTimeIntervalsSpectral
-          ! Update overset connectivity if necessary
-          if (oversetPresent .and. oversetUpdateMode == updateFast) then
-             call updateOversetConnectivity(1_intType, sps)
-          end if
-       end do
-    end if
-
-    ! Compute the required derived values and apply the BCs
-    do sps=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-          call setPointers(nn, currentLevel, sps)
-
-          if (spatial) then
-             call volume_block
-             call metric_block
-             call boundaryNormals
-
-             if (equations == RANSEquations .and. useApproxWallDistance) then
-                call updateWallDistancesQuickly(nn, 1, sps)
-             end if
-          end if
-
-          ! Compute the pressures/viscositites
-          call computePressureSimple(.False.)
-
-          ! Compute Laminar/eddy viscosity if required
-          call computeLamViscosity(.False.)
-          call computeEddyViscosity(.False.)
-
-          ! Make sure to call the turb BC's first incase we need to
-          ! correct for K
-          if( equations == RANSEquations .and. turbRes) then
-             call BCTurbTreatment
-             call applyAllTurbBCthisblock(.True.)
-          end if
-          call applyAllBC_block(.True.)
-
-       end do
-    end do
-
-    ! Compute the ranges of the residuals we are dealing with:
-    if (flowRes .and. turbRes) then
-       lStart = 1
-       lEnd =  nw
-
-    else if (flowRes .and. (.not. turbRes)) then
-       lStart = 1
-       lEnd = nwf
-
-    else if ((.not. flowRes) .and. turbres) then
-       lStart = nt1
-       lEnd   = nt2
-    end if
-
-    ! Exchange values
-    call whalo2(1_intType, lStart, lEnd, .True., .True., .True.)
-
-    ! Need to re-apply the BCs. The reason is that BC halos behind
-    ! interpolated cells need to be recomputed with their new
-    ! interpolated values from actual compute cells. Only needed for
-    ! overset.
-    if (oversetPresent) then
-       do sps=1,nTimeIntervalsSpectral
-          do nn=1,nDom
-             call setPointers(nn, currentLevel, sps)
-             if( equations == RANSEquations .and. turbRes) then
-                call BCTurbTreatment
-                call applyAllTurbBCthisblock(.True.)
-             end if
-             call applyAllBC_block(.True.)
-          end do
-       end do
-    end if
-
-    ! Main loop for the residual...This is where the blockette magic happens.
-    spsLoop: do sps=1, nTimeIntervalsSpectral
-       blockLoop: do  nn=1, nDom
-          call setPointers(nn, currentLevel, sps)
-
-          rFil = one
-          blockettes: if (useBlockettes) then
-             call blocketteResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall)
-          else
-             call blockResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall, nn, sps)
-          end if blockettes
-
-          if (currentLevel == 1) then
-             do iRegion=1, nActuatorRegions
-                call sourceTerms_block(nn, .True., iRegion, pLocal)
-             end do
-          end if
-       end do blockLoop
-    end do spsLoop
-
-    ! Compute the final solution values
-    if (present(famLists)) then
-       call getSolution(famLists, funcValues)
-    end if
-
-    if (present(forces)) then
-       do sps=1, nTimeIntervalsSpectral
-          ! Now we can retrieve the forces/tractions for this spectral instance
-          fSize = size(forces, 2)
-          call getForces(forces(:, :, sps), fSize, sps)
-       end do
-    end if
-  end subroutine blocketteRes
-
-  subroutine blocketteResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall)
-
-    ! Main subroutine for computing the reisdual for the given block using blockettes
-    use constants
+    ! Variables to track transferring variables between blockettes
+    integer(kind=intType) :: singleHaloStart, doubleHaloStart, nodeStart
 
-    use constants
-    use blockPointers, only : &
-         bnx=>nx, bny=>ny, bnz=>nz, &
-         bil=>il, bjl=>jl, bkl=>kl, &
-         bie=>ie, bje=>je, bke=>ke, &
-         bib=>ib, bjb=>jb, bkb=>kb, &
-         bw=>w, bp=>p, bgamma=>gamma, &
-         bradi=>radi, bradj=>radj, bradk=>radk, &
-         bux=>ux, buy=>uy, buz=>uz, &
-         bvx=>vx, bvy=>vy, bvz=>vz, &
-         bwx=>wx, bwy=>wy, bwz=>wz, &
-         bqx=>qx, bqy=>qy, bqz=>qz, &
-         bx=>x, brlv=>rlv, brev=>rev, bvol=>vol, bVolRef=>volRef, bd2wall=>d2wall, &
-         biblank=>iblank, bPorI=>porI, bPorJ=>porJ, bPorK=>porK, bdw=>dw, bfw=>fw, &
-         bShockSensor=>shockSensor, &
-         bsi=>si, bsj=>sj, bsk=>sk, &
-         bsFaceI=>sFaceI, bsFaceJ=>sFaceJ, bsFaceK=>sFaceK , &
-         bdtl=>dtl, baa=>aa, &
-         addGridVelocities
-    use flowVarRefState, only : nwf, nw, viscous, nt1, nt2
-    use iteration, only : currentLevel
-    use inputPhysics , only : equationMode, equations, turbModel
-    use inputDiscretization, only : spaceDiscr
-    use utils, only : setPointers, EChk
-    use turbUtils, only : computeEddyViscosity
-    use oversetData, only : oversetPresent
-
-    implicit none
-
-    ! Input
-    logical, intent(in) :: dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall
-
-    ! Working:
-    integer(kind=intType) :: i, j, k, l, lStart, lEnd
-
-    ! Compute the ranges of the residuals we are dealing with:
-    if (flowRes .and. turbRes) then
-       lStart = 1
-       lEnd =  nw
-
-    else if (flowRes .and. (.not. turbRes)) then
-       lStart = 1
-       lEnd = nwf
-
-    else if ((.not. flowRes) .and. turbres) then
-       lStart = nt1
-       lEnd   = nt2
-    end if
-
-    ! Block loop over the owned cells
-    !$OMP parallel do private(i,j,k,l) collapse(2)
-    do kk=2, bkl, BS
-       do jj=2, bjl, BS
-          do ii=2, bil, BS
-
-             ! Determine the actual size this block will be and set
-             ! the sizes in the blockette module for each of the
-             ! subroutines.
-
-             nx = min(ii+BS-1, bil) - ii + 1
-             ny = min(jj+BS-1, bjl) - jj + 1
-             nz = min(kk+BS-1, bkl) - kk + 1
-
-             il = nx + 1; jl = ny + 1; kl = nz + 1
-             ie = nx + 2; je = ny + 2; ke = nz + 2
-             ib = nx + 3; jb = ny + 3; kb = nz + 3
-
-             firstBlockette: if (ii==2) then
-
-                ! First loop. Need to compute the extra stuff. Set
-                ! the generic starts and copy the extra
-                ! variables in to the starting slots
-                singleHaloStart = 1
-                doubleHaloStart = 0
-                nodeStart = 1
-
-                ! Double halos
-                do k=0, kb
-                   do j=0, jb
-                      do i=0, 3
-                         w(i,j,k,1:nw) = bw(i+ii-2, j+jj-2, k+kk-2, 1:nw)
-                         p(i,j,k) = bP(i+ii-2, j+jj-2, k+kk-2)
-                         gamma(i,j,k) = bgamma(i+ii-2, j+jj-2, k+kk-2)
-                         if (currentLevel == 1) then
-                            ss(i,j,k) = bShockSensor(i+ii-2, j+jj-2,k+kk-2)
-                         end if
-                      end do
-                   end do
-                end do
+    ! Current indices into the original block
+    integer(kind=intType) :: ii, jj, kk
 
-                ! Single halos
-                do k=1, ke
-                   do j=1, je
-                      do i=1, 2
-                         rlv(i,j,k) = brlv(i+ii-2, j+jj-2, k+kk-2)
-                         rev(i,j,k) = brev(i+ii-2, j+jj-2, k+kk-2)
-                         vol(i,j,k) = bvol(i+ii-2, j+jj-2, k+kk-2)
-                      end do
-                   end do
-                end do
+    ! Double halos
+    real(kind=realType), dimension(0:bbib, 0:bbjb, 0:bbkb, 1:6) :: w
+    real(kind=realType), dimension(0:bbib, 0:bbjb, 0:bbkb) :: P, gamma
+    real(kind=realType), dimension(0:bbib, 0:bbjb, 0:bbkb) :: ss ! Entropy
+
+    ! Single halos
+    real(kind=realType), dimension(0:bbie, 0:bbje, 0:bbke, 3) :: x
+    real(kind=realType), dimension(1:bbie, 1:bbje, 1:bbke) :: rlv, rev, vol, aa
+    real(kind=realType), dimension(1:bbie, 1:bbje, 1:bbke) :: radI, radJ, radK, dtl
+    real(kind=realType), dimension(1:bbie, 1:bbje, 1:bbke, 3) :: dss ! Shock sensor
+
+    ! No halos
+    real(kind=realType), dimension(2:bbil, 2:bbjl, 2:bbkl) :: volRef, d2wall
+    integer(kind=intType), dimension(2:bbil, 2:bbjl, 2:bbkl) :: iblank
+
+    ! Face Porosities
+    integer(kind=porType), dimension(1:bbil, 2:bbjl, 2:bbkl) :: porI
+    integer(kind=porType), dimension(2:bbil, 1:bbjl, 2:bbkl) :: porJ
+    integer(kind=porType), dimension(2:bbil, 2:bbjl, 1:bbkl) :: porK
+
+    ! Single halos (only owned cells significant)
+    real(kind=realType), dimension(1:bbie, 1:bbje, 1:bbke, 1:5) :: fw
+    real(kind=realType), dimension(1:bbie, 1:bbje, 1:bbke, 1:6) :: dw
+
+    ! Face projected areas
+    real(kind=realType), dimension(0:bbie, 1:bbje, 1:bbke, 3) :: sI
+    real(kind=realType), dimension(1:bbie, 0:bbje, 1:bbke, 3) :: sJ
+    real(kind=realType), dimension(1:bbie, 1:bbje, 0:bbke, 3) :: sK
+
+    ! Face velocities
+    real(kind=realType), dimension(0:bbie, 1:bbje, 1:bbke) :: sFaceI
+    real(kind=realType), dimension(1:bbie, 0:bbje, 1:bbke) :: sFaceJ
+    real(kind=realType), dimension(1:bbie, 1:bbje, 0:bbke) :: sFaceK
+
+    ! Nodal gradients
+    real(kind=realType), dimension(1:bbil, 1:bbjl, 1:bbkl) :: ux, uy, uz
+    real(kind=realType), dimension(1:bbil, 1:bbjl, 1:bbkl) :: vx, vy, vz
+    real(kind=realType), dimension(1:bbil, 1:bbjl, 1:bbkl) :: wx, wy, wz
+    real(kind=realType), dimension(1:bbil, 1:bbjl, 1:bbkl) :: qx, qy, qz
+
+    ! Make *all* of these variables tread-private
+    !$OMP THREADPRIVATE(nx, ny, nz, il, jl, kl, ie, je, ke, ib, jb, kb)
+    !$OMP THREADPRIVATE(w, p, gamma, ss, x, rlv, rev, vol, aa, radI, radJ, radK)
+    !$OMP THREADPRIVATE(dss, volRef, d2wall, iblank, porI, porJ, porK, fw, dw)
+    !$OMP THREADPRIVATE(sI, sJ, sK, ux, uy, uz, vx, vy, vz, wx, wy, wz, qx, qy, qz)
+contains
 
-                ! X
-                do k=0, ke
-                   do j=0, je
-                      do i=0, 1
-                         x(i,j,k,:) = bx(i+ii-2, j+jj-2, k+kk-2, :)
-                      end do
-                   end do
-                end do
-             else
-
-                ! Subsequent loop. We can save a bunch of work by
-                ! copying some of the pre-computed values from the
-                ! previous blockette to this blockette. Basically the
-                ! values that are at the "I end" get shuffled back to
-                ! the I-start. We *also* do this for some of the
-                ! intermediate variables that are costly to compute
-                ! like the nodal gradients, and spectral radius which
-                ! helps cut back on the amount of data duplication.
-
-                ! Important Note: This cell is not the first cell. If
-                ! this code is being executed, the previous blockette
-                ! was copied fully in the i direction.
-                ! Therefore, we can just copy the values from
-                ! the end of the blockette as it is allocated.
-                ! To do this, we ignore the dimensions of the "current"
-                ! blockette, and just take the baseline BS dimensions
-                ! as the current blockette might be partially filled
-                ! in the i direction.
-
-                singleHaloStart = 3
-                doubleHaloStart = 4
-                nodeStart = 2
-
-                ! Double halos
-                do k=0, kb
-                   do j=0, jb
-                      do i=0, 3
-                         w(i,j,k,1:nw) = w(BS+i, j, k, 1:nw)
-                         p(i,j,k) = p(BS+i, j, k)
-                         gamma(i,j,k) = gamma(BS+i, j, k)
-                         ss(i,j,k) = ss(BS+i, j, k)
-                      end do
-                   end do
+    subroutine blocketteRes(useDissApprox, useViscApprox, useUpdateIntermed, useFlowRes, useTurbRes, useSpatial, &
+                            useStoreWall, famLists, funcValues, forces, bcDataNames, bcDataValues, bcDataFamLists)
+
+        ! Copy the values from blockPointers (assumed set) into the
+        ! blockette
+
+        use constants
+        use block, only: nDom
+        use BCRoutines, only: applyallBC_block
+        use bcdata, only: setBCData, setBCDataFineGrid
+        use turbbcRoutines, only: applyallTurbBCthisblock, bcTurbTreatment
+        use inputPhysics, only: turbProd, equationMode, equations, turbModel
+        use inputDiscretization, only: lowSpeedPreconditioner, useApproxWallDistance, useBlockettes
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowUtils, only: computeLamViscosity, computePressureSimple, adjustInflowAngle
+        use flowVarRefState, only: nwf, nw, nt1, nt2
+        use initializeFlow, only: referenceState
+        use section, only: sections, nSections
+        use iteration, only: rFil, currentLevel
+        use haloExchange, only: exchangeCoor, whalo2
+        use wallDistance, only: updateWallDistancesQuickly
+        use utils, only: setPointers, EChk
+        use turbUtils, only: computeEddyViscosity
+        use residuals, only: sourceTerms_block
+        use surfaceIntegrations, only: getSolution
+        use adjointExtra, only: volume_block, metric_block, boundaryNormals, xhalo_block
+        use oversetData, only: oversetPresent
+        use inputOverset, only: oversetUpdateMode
+        use oversetCommUtilities, only: updateOversetConnectivity
+        use actuatorRegionData, only: nActuatorRegions
+        implicit none
+
+        ! Input/Output
+        logical, intent(in), optional :: useDissApprox, useViscApprox, useUpdateIntermed, useFlowRes
+        logical, intent(in), optional :: useTurbRes, useSpatial, useStoreWall
+        integer(kind=intType), optional, dimension(:, :), intent(in) :: famLists
+        real(kind=realType), optional, dimension(:, :), intent(out) :: funcValues
+        character, optional, dimension(:, :), intent(in) :: bcDataNames
+        real(kind=realType), optional, dimension(:), intent(in) :: bcDataValues
+        integer(kind=intType), optional, dimension(:, :) :: bcDataFamLists
+        real(kind=realType), intent(out), optional, dimension(:, :, :) :: forces
+
+        ! Misc
+        logical :: dissApprox, viscApprox, updateIntermed, flowRes, turbRes, spatial, storeWall
+        integer(kind=intType) :: nn, sps, fSize, lstart, lend, iRegion
+        real(kind=realType) :: pLocal
+
+        ! Set the defaults. The default is to compute the full, exact,
+        ! RANS residual without updating the spatial values or the local
+        ! timeStep.
+        dissApprox = .False.
+        viscApprox = .False.
+        ! Update intermediate flag is to copy out intermediate variables
+        ! that are computed during the blockette residual computation from
+        ! blockette memory back to the main memory. These are the time
+        ! step, spectral radii for all cases, and nodal gradients and
+        ! speed of sound squared for viscous simulations. The regular
+        ! "block" residuals do not need to copy out these since they
+        ! are already computed in place. For the block residual, this
+        ! flag only determines if we update the time step along with
+        ! the spectral radii.
+        updateIntermed = .False.
+        flowRes = .True.
+        turbRes = .True.
+        spatial = .False.
+        storeWall = .True.
+
+        ! Parse the input variables
+        if (present(useDissApprox)) then
+            dissApprox = useDissApprox
+        end if
+
+        if (present(useViscApprox)) then
+            viscApprox = useViscApprox
+        end if
+
+        if (present(useUpdateIntermed)) then
+            updateIntermed = useUpdateIntermed
+        end if
+
+        if (present(useFlowRes)) then
+            flowRes = useFlowRes
+        end if
+
+        if (present(useTurbRes)) then
+            turbRes = useTurbRes
+        end if
+
+        if (present(useSpatial)) then
+            spatial = useSpatial
+        end if
+
+        if (present(useStoreWall)) then
+            storeWall = useStoreWall
+        end if
+
+        ! Spatial-only updates first
+        if (spatial) then
+            call adjustInflowAngle()
+
+            ! Update all the BCData
+            call referenceState
+            if (present(bcDataNames)) then
+                do sps = 1, nTimeIntervalsSpectral
+                    call setBCData(bcDataNames, bcDataValues, bcDataFamLists, sps, &
+                                   size(bcDataValues), size(bcDataFamLIsts, 2))
                 end do
+                call setBCDataFineGrid(.true.)
+            end if
 
-                ! Single halos
-                do k=1, ke
-                   do j=1, je
-                      do i=1, 2
-                         rlv(i,j,k) = rlv(BS+i, j, k)
-                         rev(i,j,k) = rev(BS+i, j, k)
-                         vol(i,j,k) = vol(BS+i, j, k)
-
-                         ! Computed variables
-
-                         ! DONT Copy the spectral-radii. The loop that calculates
-                         ! spectral radii also calculates portion of the time step,
-                         ! so we don't want to mess with its boundaries to keep
-                         ! it simple.
-                         aa(i,j,k) = aa(BS+i, j, k)
-                         dss(i,j,k,:) = dss(BS+i, j, k, :)
-                      end do
-                   end do
+            do sps = 1, nTimeIntervalsSpectral
+                do nn = 1, nDom
+                    call setPointers(nn, currentLevel, sps)
+                    call xhalo_block()
                 end do
+            end do
 
-                ! X
-                do k=0, ke
-                   do j=0, je
-                      do i=0, 1
-                         x(i,j,k,:) = x(BS+i, j, k, :)
-                      end do
-                   end do
-                end do
+            ! Now exchange the coordinates (fine level only)
+            call exchangecoor(1)
 
-                ! Nodal gradients
-                do k=1, kl
-                   do j=1, jl
-                      ux(1, j, k) = ux(BS+1, j, k)
-                      uy(1, j, k) = uy(BS+1, j, k)
-                      uz(1, j, k) = uz(BS+1, j, k)
-
-                      vx(1, j, k) = vx(BS+1, j, k)
-                      vy(1, j, k) = vy(BS+1, j, k)
-                      vz(1, j, k) = vz(BS+1, j, k)
-
-                      wx(1, j, k) = wx(BS+1, j, k)
-                      wy(1, j, k) = wy(BS+1, j, k)
-                      wz(1, j, k) = wz(BS+1, j, k)
-
-                      qx(1, j, k) = qx(BS+1, j, k)
-                      qy(1, j, k) = qy(BS+1, j, k)
-                      qz(1, j, k) = qz(BS+1, j, k)
-                   end do
-                end do
-             end if firstBlockette
-
-             ! -------------------------------------
-             !      Fill in the remaining values
-             ! -------------------------------------
-
-             ! Double halos
-             do k=0, kb
-                do j=0, jb
-                   do i=4, ib
-                      w(i,j,k,1:nw) = bw(i+ii-2, j+jj-2, k+kk-2, 1:nw)
-                      p(i,j,k) = bP(i+ii-2, j+jj-2, k+kk-2)
-                      gamma(i,j,k) = bgamma(i+ii-2, j+jj-2, k+kk-2)
-                      if (currentLevel == 1) then
-                         ss(i,j,k) = bShockSensor(i+ii-2, j+jj-2,k+kk-2)
-                      end if
-                   end do
-                end do
-             end do
-
-             ! Single halos
-             do k=1, ke
-                do j=1, je
-                   do i=3, ie
-                      rlv(i,j,k) = brlv(i+ii-2, j+jj-2, k+kk-2)
-                      rev(i,j,k) = brev(i+ii-2, j+jj-2, k+kk-2)
-                      vol(i,j,k) = bvol(i+ii-2, j+jj-2, k+kk-2)
-                   end do
-                end do
-             end do
-
-             ! X
-             do k=0, ke
-                do j=0, je
-                   do i=2, ie
-                      x(i,j,k,:) = bx(i+ii-2, j+jj-2, k+kk-2, :)
-                   end do
-                end do
-             end do
-
-             ! No Halos (no change)
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      iblank(i,j,k) = biblank(i+ii-2,j+jj-2,k+kk-2)
-                      if (equations .eq. ransequations) &
-                      d2wall(i,j,k) = bd2wall(i+ii-2,j+jj-2,k+kk-2)
-                      volRef(i,j,k) = bvolRef(i+ii-2,j+jj-2,k+kk-2)
-                   end do
-                end do
-             end do
-
-             ! Porosities (no change)
-             do k=2, kl
-                do j=2, jl
-                   do i=1, il
-                      porI(i,j,k) = bporI(i+ii-2,j+jj-2,k+kk-2)
-                   end do
-                end do
-             end do
+            do sps = 1, nTimeIntervalsSpectral
+                ! Update overset connectivity if necessary
+                if (oversetPresent .and. oversetUpdateMode == updateFast) then
+                    call updateOversetConnectivity(1_intType, sps)
+                end if
+            end do
+        end if
+
+        ! Compute the required derived values and apply the BCs
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, currentLevel, sps)
+
+                if (spatial) then
+                    call volume_block
+                    call metric_block
+                    call boundaryNormals
+
+                    if (equations == RANSEquations .and. useApproxWallDistance) then
+                        call updateWallDistancesQuickly(nn, 1, sps)
+                    end if
+                end if
 
-             do k=2, kl
-                do j=1, jl
-                   do i=2, il
-                      PorJ(i,j,k) = bporJ(i+ii-2,j+jj-2,k+kk-2)
-                   end do
-                end do
-             end do
+                ! Compute the pressures/viscositites
+                call computePressureSimple(.False.)
 
-             do k=1, kl
-                do j=2, jl
-                   do i=2, il
-                      PorK(i,j,k) = bporK(i+ii-2,j+jj-2,k+kk-2)
-                   end do
-                end do
-             end do
-
-             ! Face velocities if necessary
-             if (addGridVelocities) then
-                do k=1, ke
-                   do j=1, je
-                      do i=0, ie
-                         sFaceI(i, j, k) = bsFaceI(ii+ii-2, j+jj-2, k+kk-2)
-                      end do
-                   end do
-                end do
+                ! Compute Laminar/eddy viscosity if required
+                call computeLamViscosity(.False.)
+                call computeEddyViscosity(.False.)
 
-                do k=1, ke
-                   do j=0, je
-                      do i=1, ie
-                         sFaceJ(i, j, k) = bsFaceJ(ii+ii-2, j+jj-2, k+kk-2)
-                      end do
-                   end do
+                ! Make sure to call the turb BC's first incase we need to
+                ! correct for K
+                if (equations == RANSEquations .and. turbRes) then
+                    call BCTurbTreatment
+                    call applyAllTurbBCthisblock(.True.)
+                end if
+                call applyAllBC_block(.True.)
+
+            end do
+        end do
+
+        ! Compute the ranges of the residuals we are dealing with:
+        if (flowRes .and. turbRes) then
+            lStart = 1
+            lEnd = nw
+
+        else if (flowRes .and. (.not. turbRes)) then
+            lStart = 1
+            lEnd = nwf
+
+        else if ((.not. flowRes) .and. turbres) then
+            lStart = nt1
+            lEnd = nt2
+        end if
+
+        ! Exchange values
+        call whalo2(1_intType, lStart, lEnd, .True., .True., .True.)
+
+        ! Need to re-apply the BCs. The reason is that BC halos behind
+        ! interpolated cells need to be recomputed with their new
+        ! interpolated values from actual compute cells. Only needed for
+        ! overset.
+        if (oversetPresent) then
+            do sps = 1, nTimeIntervalsSpectral
+                do nn = 1, nDom
+                    call setPointers(nn, currentLevel, sps)
+                    if (equations == RANSEquations .and. turbRes) then
+                        call BCTurbTreatment
+                        call applyAllTurbBCthisblock(.True.)
+                    end if
+                    call applyAllBC_block(.True.)
                 end do
+            end do
+        end if
 
-                do k=0, ke
-                   do j=1, je
-                      do i=1, ie
-                         sFaceK(i, j, k) = bsFaceK(ii+ii-2, j+jj-2, k+kk-2)
-                      end do
-                   end do
-                end do
-             else
-                sFaceI = zero
-                sFaceJ = zero
-                sFaceK = zero
-             end if
+        ! Main loop for the residual...This is where the blockette magic happens.
+        spsLoop: do sps = 1, nTimeIntervalsSpectral
+            blockLoop: do nn = 1, nDom
+                call setPointers(nn, currentLevel, sps)
 
-             ! Clear the viscous flux before we start.
-             fw = zero
+                rFil = one
+                blockettes: if (useBlockettes) then
+                    call blocketteResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall)
+                else
+                    call blockResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall, nn, sps)
+                end if blockettes
+
+                if (currentLevel == 1) then
+                    do iRegion = 1, nActuatorRegions
+                        call sourceTerms_block(nn, .True., iRegion, pLocal)
+                    end do
+                end if
+            end do blockLoop
+        end do spsLoop
+
+        ! Compute the final solution values
+        if (present(famLists)) then
+            call getSolution(famLists, funcValues)
+        end if
+
+        if (present(forces)) then
+            do sps = 1, nTimeIntervalsSpectral
+                ! Now we can retrieve the forces/tractions for this spectral instance
+                fSize = size(forces, 2)
+                call getForces(forces(:, :, sps), fSize, sps)
+            end do
+        end if
+    end subroutine blocketteRes
+
+    subroutine blocketteResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall)
+
+        ! Main subroutine for computing the reisdual for the given block using blockettes
+        use constants
+
+        use constants
+        use blockPointers, only: &
+            bnx => nx, bny => ny, bnz => nz, &
+            bil => il, bjl => jl, bkl => kl, &
+            bie => ie, bje => je, bke => ke, &
+            bib => ib, bjb => jb, bkb => kb, &
+            bw => w, bp => p, bgamma => gamma, &
+            bradi => radi, bradj => radj, bradk => radk, &
+            bux => ux, buy => uy, buz => uz, &
+            bvx => vx, bvy => vy, bvz => vz, &
+            bwx => wx, bwy => wy, bwz => wz, &
+            bqx => qx, bqy => qy, bqz => qz, &
+            bx => x, brlv => rlv, brev => rev, bvol => vol, bVolRef => volRef, bd2wall => d2wall, &
+            biblank => iblank, bPorI => porI, bPorJ => porJ, bPorK => porK, bdw => dw, bfw => fw, &
+            bShockSensor => shockSensor, &
+            bsi => si, bsj => sj, bsk => sk, &
+            bsFaceI => sFaceI, bsFaceJ => sFaceJ, bsFaceK => sFaceK, &
+            bdtl => dtl, baa => aa, &
+            addGridVelocities
+        use flowVarRefState, only: nwf, nw, viscous, nt1, nt2
+        use iteration, only: currentLevel
+        use inputPhysics, only: equationMode, equations, turbModel
+        use inputDiscretization, only: spaceDiscr
+        use utils, only: setPointers, EChk
+        use turbUtils, only: computeEddyViscosity
+        use oversetData, only: oversetPresent
+
+        implicit none
+
+        ! Input
+        logical, intent(in) :: dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall
+
+        ! Working:
+        integer(kind=intType) :: i, j, k, l, lStart, lEnd
+
+        ! Compute the ranges of the residuals we are dealing with:
+        if (flowRes .and. turbRes) then
+            lStart = 1
+            lEnd = nw
+
+        else if (flowRes .and. (.not. turbRes)) then
+            lStart = 1
+            lEnd = nwf
+
+        else if ((.not. flowRes) .and. turbres) then
+            lStart = nt1
+            lEnd = nt2
+        end if
+
+        ! Block loop over the owned cells
+        !$OMP parallel do private(i,j,k,l) collapse(2)
+        do kk = 2, bkl, BS
+            do jj = 2, bjl, BS
+                do ii = 2, bil, BS
+
+                    ! Determine the actual size this block will be and set
+                    ! the sizes in the blockette module for each of the
+                    ! subroutines.
+
+                    nx = min(ii + BS - 1, bil) - ii + 1
+                    ny = min(jj + BS - 1, bjl) - jj + 1
+                    nz = min(kk + BS - 1, bkl) - kk + 1
+
+                    il = nx + 1; jl = ny + 1; kl = nz + 1
+                    ie = nx + 2; je = ny + 2; ke = nz + 2
+                    ib = nx + 3; jb = ny + 3; kb = nz + 3
+
+                    firstBlockette: if (ii == 2) then
+
+                        ! First loop. Need to compute the extra stuff. Set
+                        ! the generic starts and copy the extra
+                        ! variables in to the starting slots
+                        singleHaloStart = 1
+                        doubleHaloStart = 0
+                        nodeStart = 1
+
+                        ! Double halos
+                        do k = 0, kb
+                            do j = 0, jb
+                                do i = 0, 3
+                                    w(i, j, k, 1:nw) = bw(i + ii - 2, j + jj - 2, k + kk - 2, 1:nw)
+                                    p(i, j, k) = bP(i + ii - 2, j + jj - 2, k + kk - 2)
+                                    gamma(i, j, k) = bgamma(i + ii - 2, j + jj - 2, k + kk - 2)
+                                    if (currentLevel == 1) then
+                                        ss(i, j, k) = bShockSensor(i + ii - 2, j + jj - 2, k + kk - 2)
+                                    end if
+                                end do
+                            end do
+                        end do
+
+                        ! Single halos
+                        do k = 1, ke
+                            do j = 1, je
+                                do i = 1, 2
+                                    rlv(i, j, k) = brlv(i + ii - 2, j + jj - 2, k + kk - 2)
+                                    rev(i, j, k) = brev(i + ii - 2, j + jj - 2, k + kk - 2)
+                                    vol(i, j, k) = bvol(i + ii - 2, j + jj - 2, k + kk - 2)
+                                end do
+                            end do
+                        end do
+
+                        ! X
+                        do k = 0, ke
+                            do j = 0, je
+                                do i = 0, 1
+                                    x(i, j, k, :) = bx(i + ii - 2, j + jj - 2, k + kk - 2, :)
+                                end do
+                            end do
+                        end do
+                    else
+
+                        ! Subsequent loop. We can save a bunch of work by
+                        ! copying some of the pre-computed values from the
+                        ! previous blockette to this blockette. Basically the
+                        ! values that are at the "I end" get shuffled back to
+                        ! the I-start. We *also* do this for some of the
+                        ! intermediate variables that are costly to compute
+                        ! like the nodal gradients, and spectral radius which
+                        ! helps cut back on the amount of data duplication.
+
+                        ! Important Note: This cell is not the first cell. If
+                        ! this code is being executed, the previous blockette
+                        ! was copied fully in the i direction.
+                        ! Therefore, we can just copy the values from
+                        ! the end of the blockette as it is allocated.
+                        ! To do this, we ignore the dimensions of the "current"
+                        ! blockette, and just take the baseline BS dimensions
+                        ! as the current blockette might be partially filled
+                        ! in the i direction.
+
+                        singleHaloStart = 3
+                        doubleHaloStart = 4
+                        nodeStart = 2
+
+                        ! Double halos
+                        do k = 0, kb
+                            do j = 0, jb
+                                do i = 0, 3
+                                    w(i, j, k, 1:nw) = w(BS + i, j, k, 1:nw)
+                                    p(i, j, k) = p(BS + i, j, k)
+                                    gamma(i, j, k) = gamma(BS + i, j, k)
+                                    ss(i, j, k) = ss(BS + i, j, k)
+                                end do
+                            end do
+                        end do
+
+                        ! Single halos
+                        do k = 1, ke
+                            do j = 1, je
+                                do i = 1, 2
+                                    rlv(i, j, k) = rlv(BS + i, j, k)
+                                    rev(i, j, k) = rev(BS + i, j, k)
+                                    vol(i, j, k) = vol(BS + i, j, k)
+
+                                    ! Computed variables
+
+                                    ! DONT Copy the spectral-radii. The loop that calculates
+                                    ! spectral radii also calculates portion of the time step,
+                                    ! so we don't want to mess with its boundaries to keep
+                                    ! it simple.
+                                    aa(i, j, k) = aa(BS + i, j, k)
+                                    dss(i, j, k, :) = dss(BS + i, j, k, :)
+                                end do
+                            end do
+                        end do
+
+                        ! X
+                        do k = 0, ke
+                            do j = 0, je
+                                do i = 0, 1
+                                    x(i, j, k, :) = x(BS + i, j, k, :)
+                                end do
+                            end do
+                        end do
 
-             ! Call the routines in order:
-             call metrics
-             call initRes(lStart, lEnd)
+                        ! Nodal gradients
+                        do k = 1, kl
+                            do j = 1, jl
+                                ux(1, j, k) = ux(BS + 1, j, k)
+                                uy(1, j, k) = uy(BS + 1, j, k)
+                                uz(1, j, k) = uz(BS + 1, j, k)
+
+                                vx(1, j, k) = vx(BS + 1, j, k)
+                                vy(1, j, k) = vy(BS + 1, j, k)
+                                vz(1, j, k) = vz(BS + 1, j, k)
+
+                                wx(1, j, k) = wx(BS + 1, j, k)
+                                wy(1, j, k) = wy(BS + 1, j, k)
+                                wz(1, j, k) = wz(BS + 1, j, k)
+
+                                qx(1, j, k) = qx(BS + 1, j, k)
+                                qy(1, j, k) = qy(BS + 1, j, k)
+                                qz(1, j, k) = qz(BS + 1, j, k)
+                            end do
+                        end do
+                    end if firstBlockette
+
+                    ! -------------------------------------
+                    !      Fill in the remaining values
+                    ! -------------------------------------
+
+                    ! Double halos
+                    do k = 0, kb
+                        do j = 0, jb
+                            do i = 4, ib
+                                w(i, j, k, 1:nw) = bw(i + ii - 2, j + jj - 2, k + kk - 2, 1:nw)
+                                p(i, j, k) = bP(i + ii - 2, j + jj - 2, k + kk - 2)
+                                gamma(i, j, k) = bgamma(i + ii - 2, j + jj - 2, k + kk - 2)
+                                if (currentLevel == 1) then
+                                    ss(i, j, k) = bShockSensor(i + ii - 2, j + jj - 2, k + kk - 2)
+                                end if
+                            end do
+                        end do
+                    end do
+
+                    ! Single halos
+                    do k = 1, ke
+                        do j = 1, je
+                            do i = 3, ie
+                                rlv(i, j, k) = brlv(i + ii - 2, j + jj - 2, k + kk - 2)
+                                rev(i, j, k) = brev(i + ii - 2, j + jj - 2, k + kk - 2)
+                                vol(i, j, k) = bvol(i + ii - 2, j + jj - 2, k + kk - 2)
+                            end do
+                        end do
+                    end do
+
+                    ! X
+                    do k = 0, ke
+                        do j = 0, je
+                            do i = 2, ie
+                                x(i, j, k, :) = bx(i + ii - 2, j + jj - 2, k + kk - 2, :)
+                            end do
+                        end do
+                    end do
+
+                    ! No Halos (no change)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                iblank(i, j, k) = biblank(i + ii - 2, j + jj - 2, k + kk - 2)
+                                if (equations .eq. ransequations) &
+                                    d2wall(i, j, k) = bd2wall(i + ii - 2, j + jj - 2, k + kk - 2)
+                                volRef(i, j, k) = bvolRef(i + ii - 2, j + jj - 2, k + kk - 2)
+                            end do
+                        end do
+                    end do
+
+                    ! Porosities (no change)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 1, il
+                                porI(i, j, k) = bporI(i + ii - 2, j + jj - 2, k + kk - 2)
+                            end do
+                        end do
+                    end do
 
-             ! Compute turbulence residual for RANS equations
-             if( equations == RANSEquations .and. turbRes) then
+                    do k = 2, kl
+                        do j = 1, jl
+                            do i = 2, il
+                                PorJ(i, j, k) = bporJ(i + ii - 2, j + jj - 2, k + kk - 2)
+                            end do
+                        end do
+                    end do
 
-                ! Initialize only the Turblent Variables
-                !call unsteadyTurbSpectral_block(itu1, itu1, nn, sps)
+                    do k = 1, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                PorK(i, j, k) = bporK(i + ii - 2, j + jj - 2, k + kk - 2)
+                            end do
+                        end do
+                    end do
+
+                    ! Face velocities if necessary
+                    if (addGridVelocities) then
+                        do k = 1, ke
+                            do j = 1, je
+                                do i = 0, ie
+                                    sFaceI(i, j, k) = bsFaceI(ii + ii - 2, j + jj - 2, k + kk - 2)
+                                end do
+                            end do
+                        end do
 
-                select case (turbModel)
+                        do k = 1, ke
+                            do j = 0, je
+                                do i = 1, ie
+                                    sFaceJ(i, j, k) = bsFaceJ(ii + ii - 2, j + jj - 2, k + kk - 2)
+                                end do
+                            end do
+                        end do
 
-                case (spalartAllmaras)
-                   call saSource
-                   call saAdvection
-                   !call unsteadyTurbTerm(1_intType, 1_intType, itu1-1, qq)
-                   call saViscous
-                   call saResScale
+                        do k = 0, ke
+                            do j = 1, je
+                                do i = 1, ie
+                                    sFaceK(i, j, k) = bsFaceK(ii + ii - 2, j + jj - 2, k + kk - 2)
+                                end do
+                            end do
+                        end do
+                    else
+                        sFaceI = zero
+                        sFaceJ = zero
+                        sFaceK = zero
+                    end if
+
+                    ! Clear the viscous flux before we start.
+                    fw = zero
+
+                    ! Call the routines in order:
+                    call metrics
+                    call initRes(lStart, lEnd)
+
+                    ! Compute turbulence residual for RANS equations
+                    if (equations == RANSEquations .and. turbRes) then
+
+                        ! Initialize only the Turblent Variables
+                        !call unsteadyTurbSpectral_block(itu1, itu1, nn, sps)
+
+                        select case (turbModel)
+
+                        case (spalartAllmaras)
+                            call saSource
+                            call saAdvection
+                            !call unsteadyTurbTerm(1_intType, 1_intType, itu1-1, qq)
+                            call saViscous
+                            call saResScale
+                        end select
+                    end if
+
+                    call timeStep(updateIntermed)
+
+                    if (flowRes) then
+                        call inviscidCentralFlux
+
+                        if (dissApprox) then
+                            select case (spaceDiscr)
+                            case (dissScalar)
+                                call inviscidDissFluxScalarApprox
+                            case (dissMatrix)
+                                call inviscidDissFluxMatrixApprox
+                            case (upwind)
+                                call inviscidUpwindFlux(.False.)
+                            end select
+                        else
+                            select case (spaceDiscr)
+                            case (dissScalar)
+                                call inviscidDissFluxScalar
+                            case (dissMatrix)
+                                call inviscidDissFluxMatrix
+                            case (upwind)
+                                call inviscidUpwindFlux(.True.)
+                            end select
+                        end if
+
+                        if (viscous) then
+                            call computeSpeedOfSoundSquared
+                            if (viscApprox) then
+                                call viscousFluxApprox
+                            else
+                                call allNodalGradients
+                                call viscousFlux(storeWall)
+                            end if
+                        end if
+
+                        call sumDwAndFw
+                    end if
+
+                    ! Now we can just set the part of dw we computed
+                    ! (owned cells only) and we're done!
+                    do l = lStart, lEnd
+                        do k = 2, kl
+                            do j = 2, jl
+                                do i = 2, il
+                                    bdw(i + ii - 2, j + jj - 2, k + kk - 2, l) = dw(i, j, k, l)
+                                end do
+                            end do
+                        end do
+                    end do
+
+                    ! Also copy out the intermediate variables if asked for them
+                    ! we need these to be updated in main memory because
+                    ! the reverse mode AD routines do use these variables.
+                    ! after every ANK and NK step, blocketteRes is called
+                    ! with updateIntermed = True, and it will update these
+                    ! arrays in main memory. The time step is required
+                    ! for the ANK and MG solver steps.
+                    intermed: if (updateIntermed) then
+                        ! time step
+                        do k = 2, kl
+                            do j = 2, jl
+                                do i = 2, il
+                                    bdtl(i + ii - 2, j + jj - 2, k + kk - 2) = dtl(i, j, k)
+                                end do
+                            end do
+                        end do
+
+                        ! Spectral radii
+                        do k = 1, ke
+                            do j = 1, je
+                                do i = 1, ie
+                                    bradi(i + ii - 2, j + jj - 2, k + kk - 2) = radi(i, j, k)
+                                    bradj(i + ii - 2, j + jj - 2, k + kk - 2) = radj(i, j, k)
+                                    bradk(i + ii - 2, j + jj - 2, k + kk - 2) = radk(i, j, k)
+                                end do
+                            end do
+                        end do
+
+                        ! need aa and nodal gradients if we have viscous fluxes
+                        visc: if (viscous .and. flowRes) then
+
+                            ! speed of sound squared
+                            do k = 1, ke
+                                do j = 1, je
+                                    do i = 1, ie
+                                        baa(i + ii - 2, j + jj - 2, k + kk - 2) = aa(i, j, k)
+                                    end do
+                                end do
+                            end do
+
+                            ! nodal gradients
+                            do k = 1, kl
+                                do j = 1, jl
+                                    do i = 1, il
+
+                                        bux(i + ii - 2, j + jj - 2, k + kk - 2) = ux(i, j, k)
+                                        buy(i + ii - 2, j + jj - 2, k + kk - 2) = uy(i, j, k)
+                                        buz(i + ii - 2, j + jj - 2, k + kk - 2) = uz(i, j, k)
+
+                                        bvx(i + ii - 2, j + jj - 2, k + kk - 2) = vx(i, j, k)
+                                        bvy(i + ii - 2, j + jj - 2, k + kk - 2) = vy(i, j, k)
+                                        bvz(i + ii - 2, j + jj - 2, k + kk - 2) = vz(i, j, k)
+
+                                        bwx(i + ii - 2, j + jj - 2, k + kk - 2) = wx(i, j, k)
+                                        bwy(i + ii - 2, j + jj - 2, k + kk - 2) = wy(i, j, k)
+                                        bwz(i + ii - 2, j + jj - 2, k + kk - 2) = wz(i, j, k)
+
+                                        bqx(i + ii - 2, j + jj - 2, k + kk - 2) = qx(i, j, k)
+                                        bqy(i + ii - 2, j + jj - 2, k + kk - 2) = qy(i, j, k)
+                                        bqz(i + ii - 2, j + jj - 2, k + kk - 2) = qz(i, j, k)
+
+                                    end do
+                                end do
+                            end do
+                        end if visc
+
+                    end if intermed
+
+                end do
+            end do
+        end do
+        !$OMP END PARALLEL DO
+    end subroutine blocketteResCore
+
+    subroutine blockResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall, nn, sps)
+
+        use constants
+        use fluxes, only: inviscidCentralFlux_block => inviscidCentralFlux, &
+                          inviscidDissFluxScalar_block => inviscidDissFluxScalar, &
+                          inviscidDissFluxMatrix_block => inviscidDissFluxMatrix, &
+                          inviscidUpwindFlux_block => inviscidUpwindFlux, &
+                          inviscidDissFluxScalarApprox_block => inviscidDissFluxScalarApprox, &
+                          inviscidDissFluxMatrixApprox_block => inviscidDissFluxMatrixApprox, &
+                          viscousFlux_block => viscousFlux, &
+                          viscousFluxApprox_block => viscousFluxApprox
+        use solverUtils, only: timeStep_block
+        use flowVarRefState, only: nwf, nw, viscous, nt1, nt2
+        use inputPhysics, only: equationMode, equations, turbModel
+        use residuals, only: initres_block
+        use sa, only: sa_block
+        use adjointExtra, only: sumDwAndFw_block => sumDwAndFw
+        use inputDiscretization, only: spaceDiscr
+        use flowUtils, only: allNodalGradients_block => allNodalGradients, &
+                             computeSpeedOfSoundSquared_block => computeSpeedOfSoundSquared
+
+        implicit none
+        ! Input
+        logical, intent(in) :: dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall
+        integer(kind=intType), intent(in) :: nn, sps
+
+        ! Working:
+        integer(kind=intType) :: i, j, k, lStart, lEnd
+
+        ! Compute the ranges of the residuals we are dealing with:
+        if (flowRes .and. turbRes) then
+            lStart = 1
+            lEnd = nw
+
+        else if (flowRes .and. (.not. turbRes)) then
+            lStart = 1
+            lEnd = nwf
+
+        else if ((.not. flowRes) .and. turbres) then
+            lStart = nt1
+            lEnd = nt2
+        end if
+
+        ! Compute time step
+        call timestep_block(.not. updateIntermed)
+
+        call initres_block(lStart, lEnd, nn, sps) ! Initialize only the Turblent Variables
+
+        fw = zero
+
+        ! Possible Turblent Equations
+        if (equations == RANSEquations .and. turbRes) then
+            ! Compute the skin-friction velocity (wall functions only)
+            !call computeUtau_block
+
+            ! Now call the selected turbulence model
+            select case (turbModel)
+            case (spalartAllmaras)
+                call sa_block(.true.)
+            end select
+        end if
+
+        if (flowRes) then
+
+            call inviscidCentralFlux_block
+            if (dissApprox) then
+                select case (spaceDiscr)
+                case (dissScalar)
+                    call inviscidDissFluxScalarApprox_block
+                case (dissMatrix)
+                    call inviscidDissFluxMatrixApprox_block
+                case (upwind)
+                    call inviscidUpwindFlux_block(.True.)
                 end select
-             endif
-
-             call timeStep(updateIntermed)
-
-             if (flowRes) then
-                call inviscidCentralFlux
-
-                if (dissApprox) then
-                   select case (spaceDiscr)
-                   case (dissScalar)
-                      call inviscidDissFluxScalarApprox
-                   case (dissMatrix)
-                      call inviscidDissFluxMatrixApprox
-                   case (upwind)
-                      call inviscidUpwindFlux(.False.)
-                   end select
+            else
+                select case (spaceDiscr)
+                case (dissScalar)
+                    call inviscidDissFluxScalar_block
+                case (dissMatrix)
+                    call inviscidDissFluxMatrix_block
+                case (upwind)
+                    call inviscidUpwindFlux_block(.True.)
+                end select
+            end if
+
+            if (viscous) then
+                call computeSpeedOfSoundSquared_block
+                if (viscApprox) then
+                    call viscousFluxApprox_block
                 else
-                   select case (spaceDiscr)
-                   case (dissScalar)
-                      call inviscidDissFluxScalar
-                   case (dissMatrix)
-                      call inviscidDissFluxMatrix
-                   case (upwind)
-                      call inviscidUpwindFlux(.True.)
-                   end select
+                    call allNodalGradients_block
+                    call viscousFlux_block
                 end if
+            end if
+
+            call sumDwAndFw_block
+        end if
+    end subroutine blockResCore
+
+    subroutine metrics
+        ! ---------------------------------------------
+        !              Metric computation
+        ! ---------------------------------------------
+
+        use constants
+        use blockPointers, only: rightHanded
+        implicit none
+
+        integer(kind=intType) :: i, j, k, l, m, n
+        real(kind=realType), dimension(3) :: v1, v2
+        real(kind=realType) :: fact
+
+        ! Projected areas of cell faces in the i direction.
+        if (rightHanded) then
+            fact = half
+        else
+            fact = -half
+        end if
+        do k = 1, ke
+            n = k - 1
+            do j = 1, je
+                m = j - 1
+                do i = 0, ie
+
+                    ! Determine the two diagonal vectors of the face.
+
+                    v1(1) = x(i, j, n, 1) - x(i, m, k, 1)
+                    v1(2) = x(i, j, n, 2) - x(i, m, k, 2)
+                    v1(3) = x(i, j, n, 3) - x(i, m, k, 3)
+
+                    v2(1) = x(i, j, k, 1) - x(i, m, n, 1)
+                    v2(2) = x(i, j, k, 2) - x(i, m, n, 2)
+                    v2(3) = x(i, j, k, 3) - x(i, m, n, 3)
+
+                    ! The face normal, which is the cross product of the two
+                    ! diagonal vectors times fact; remember that fact is
+                    ! either -0.5 or 0.5.
+
+                    si(i, j, k, 1) = fact * (v1(2) * v2(3) - v1(3) * v2(2))
+                    si(i, j, k, 2) = fact * (v1(3) * v2(1) - v1(1) * v2(3))
+                    si(i, j, k, 3) = fact * (v1(1) * v2(2) - v1(2) * v2(1))
 
-                if (viscous) then
-                   call computeSpeedOfSoundSquared
-                   if (viscApprox) then
-                      call viscousFluxApprox
-                   else
-                      call allNodalGradients
-                      call viscousFlux(storeWall)
-                   end if
-                end if
+                end do
+            end do
+        end do
+
+        ! Projected areas of cell faces in the j direction.
+
+        do k = 1, ke
+            n = k - 1
+            do j = 0, je
+                do i = 1, ie
+                    l = i - 1
+
+                    ! Determine the two diagonal vectors of the face.
+
+                    v1(1) = x(i, j, n, 1) - x(l, j, k, 1)
+                    v1(2) = x(i, j, n, 2) - x(l, j, k, 2)
+                    v1(3) = x(i, j, n, 3) - x(l, j, k, 3)
+
+                    v2(1) = x(l, j, n, 1) - x(i, j, k, 1)
+                    v2(2) = x(l, j, n, 2) - x(i, j, k, 2)
+                    v2(3) = x(l, j, n, 3) - x(i, j, k, 3)
+
+                    ! The face normal, which is the cross product of the two
+                    ! diagonal vectors times fact; remember that fact is
+                    ! either -0.5 or 0.5.
+
+                    sj(i, j, k, 1) = fact * (v1(2) * v2(3) - v1(3) * v2(2))
+                    sj(i, j, k, 2) = fact * (v1(3) * v2(1) - v1(1) * v2(3))
+                    sj(i, j, k, 3) = fact * (v1(1) * v2(2) - v1(2) * v2(1))
 
-                call sumDwAndFw
-             end if
-
-             ! Now we can just set the part of dw we computed
-             ! (owned cells only) and we're done!
-             do l=lStart, lEnd
-                do k=2, kl
-                   do j=2, jl
-                      do i=2, il
-                         bdw(i+ii-2,j+jj-2,k+kk-2,l) = dw(i,j,k,l)
-                      end do
-                   end do
                 end do
-             end do
-
-             ! Also copy out the intermediate variables if asked for them
-             ! we need these to be updated in main memory because
-             ! the reverse mode AD routines do use these variables.
-             ! after every ANK and NK step, blocketteRes is called
-             ! with updateIntermed = True, and it will update these
-             ! arrays in main memory. The time step is required
-             ! for the ANK and MG solver steps.
-             intermed: if (updateIntermed) then
-               ! time step
-               do k=2, kl
-                  do j=2, jl
-                     do i=2, il
-                        bdtl(i+ii-2, j+jj-2, k+kk-2) = dtl(i, j, k)
-                     end do
-                  end do
-               end do
-
-               ! Spectral radii
-               do k=1, ke
-                  do j=1, je
-                     do i=1, ie
-                        bradi(i+ii-2, j+jj-2, k+kk-2) = radi(i, j, k)
-                        bradj(i+ii-2, j+jj-2, k+kk-2) = radj(i, j, k)
-                        bradk(i+ii-2, j+jj-2, k+kk-2) = radk(i, j, k)
-                     end do
-                  end do
-               end do
-
-               ! need aa and nodal gradients if we have viscous fluxes
-               visc: if (viscous .and. flowRes) then
-
-                  ! speed of sound squared
-                  do k=1, ke
-                     do j=1, je
-                        do i=1, ie
-                           baa(i+ii-2, j+jj-2, k+kk-2) = aa(i, j, k)
-                        end do
-                     end do
-                  end do
+            end do
+        end do
 
-                  ! nodal gradients
-                  do k=1, kl
-                     do j=1, jl
-                        do i=1, il
+        ! Projected areas of cell faces in the k direction.
 
-                           bux(i+ii-2, j+jj-2, k+kk-2) = ux(i, j, k)
-                           buy(i+ii-2, j+jj-2, k+kk-2) = uy(i, j, k)
-                           buz(i+ii-2, j+jj-2, k+kk-2) = uz(i, j, k)
+        do k = 0, ke
+            do j = 1, je
+                m = j - 1
+                do i = 1, ie
+                    l = i - 1
 
-                           bvx(i+ii-2, j+jj-2, k+kk-2) = vx(i, j, k)
-                           bvy(i+ii-2, j+jj-2, k+kk-2) = vy(i, j, k)
-                           bvz(i+ii-2, j+jj-2, k+kk-2) = vz(i, j, k)
+                    ! Determine the two diagonal vectors of the face.
 
-                           bwx(i+ii-2, j+jj-2, k+kk-2) = wx(i, j, k)
-                           bwy(i+ii-2, j+jj-2, k+kk-2) = wy(i, j, k)
-                           bwz(i+ii-2, j+jj-2, k+kk-2) = wz(i, j, k)
+                    v1(1) = x(i, j, k, 1) - x(l, m, k, 1)
+                    v1(2) = x(i, j, k, 2) - x(l, m, k, 2)
+                    v1(3) = x(i, j, k, 3) - x(l, m, k, 3)
 
-                           bqx(i+ii-2, j+jj-2, k+kk-2) = qx(i, j, k)
-                           bqy(i+ii-2, j+jj-2, k+kk-2) = qy(i, j, k)
-                           bqz(i+ii-2, j+jj-2, k+kk-2) = qz(i, j, k)
+                    v2(1) = x(l, j, k, 1) - x(i, m, k, 1)
+                    v2(2) = x(l, j, k, 2) - x(i, m, k, 2)
+                    v2(3) = x(l, j, k, 3) - x(i, m, k, 3)
 
-                        end do
-                     end do
-                  end do
-               end if visc
+                    ! The face normal, which is the cross product of the two
+                    ! diagonal vectors times fact; remember that fact is
+                    ! either -0.5 or 0.5.
 
-             end if intermed
+                    sk(i, j, k, 1) = fact * (v1(2) * v2(3) - v1(3) * v2(2))
+                    sk(i, j, k, 2) = fact * (v1(3) * v2(1) - v1(1) * v2(3))
+                    sk(i, j, k, 3) = fact * (v1(1) * v2(2) - v1(2) * v2(1))
 
-          end do
-       end do
-    end do
-    !$OMP END PARALLEL DO
-  end subroutine blocketteResCore
+                end do
+            end do
+        end do
+    end subroutine metrics
+
+    subroutine initRes(varStart, varEnd)
+        ! ---------------------------------------------
+        !                     Init Res
+        ! ---------------------------------------------
+
+        use constants
+        implicit none
+
+        integer(kind=intType) :: varStart, varEnd
+        ! Obviously this needs to be more complex for the actual code.
+        dw(:, :, :, varStart:varEnd) = zero
+
+    end subroutine initRes
+
+    subroutine saSource
+        ! ---------------------------------------------
+        !                    SA Source Term
+        ! ---------------------------------------------
+
+        use constants
+        use paramTurb
+        use blockPointers, only: sectionID
+        use inputPhysics, only: useft2SA, useRotationSA, turbProd, equations
+        use inputDiscretization, only: approxSA
+        use section, only: sections
+        use sa, only: cv13, kar2Inv, cw36, cb3Inv
+        use flowvarRefState, only: timeRef
+
+        implicit none
+
+        ! Variables for sa Souce
+        real(kind=realType) :: fv1, fv2, ft2
+        real(kind=realType) :: sst, nu, dist2Inv, chi, chi2, chi3
+        real(kind=realType) :: rr, gg, gg6, termFw, fwSa, term1, term2
+        real(kind=realType) :: dfv1, dfv2, dft2, drr, dgg, dfw, sqrtProd
+        real(kind=realType) :: uux, uuy, uuz, vvx, vvy, vvz, wwx, wwy, wwz
+        real(kind=realType) :: div2, fact, sxx, syy, szz, sxy, sxz, syz
+        real(kind=realType) :: vortx, vorty, vortz
+        real(kind=realType) :: omegax, omegay, omegaz
+        real(kind=realType) :: strainMag2, prod
+        real(kind=realType), parameter :: xminn = 1.e-10_realType
+        real(kind=realType), parameter :: f23 = two * third
+        integer(kind=intType) :: i, j, k
+        real(kind=realType) :: term1Fact
+
+        ! Set model constants
+        cv13 = rsaCv1**3
+        kar2Inv = one / (rsaK**2)
+        cw36 = rsaCw3**6
+        cb3Inv = one / rsaCb3
+
+        ! set the approximate multiplier here
+        term1Fact = one
+        if (approxSA) term1Fact = zero
+
+        ! Determine the non-dimensional wheel speed of this block.
+
+        omegax = timeRef * sections(sectionID)%rotRate(1)
+        omegay = timeRef * sections(sectionID)%rotRate(2)
+        omegaz = timeRef * sections(sectionID)%rotRate(3)
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the gradient of u in the cell center. Use is made
+                    ! of the fact that the surrounding normals sum up to zero,
+                    ! such that the cell i,j,k does not give a contribution.
+                    ! The gradient is scaled by the factor 2*vol.
+
+                    uux = w(i + 1, j, k, ivx) * si(i, j, k, 1) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 1) &
+                          + w(i, j + 1, k, ivx) * sj(i, j, k, 1) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 1) &
+                          + w(i, j, k + 1, ivx) * sk(i, j, k, 1) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 1)
+                    uuy = w(i + 1, j, k, ivx) * si(i, j, k, 2) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 2) &
+                          + w(i, j + 1, k, ivx) * sj(i, j, k, 2) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 2) &
+                          + w(i, j, k + 1, ivx) * sk(i, j, k, 2) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 2)
+                    uuz = w(i + 1, j, k, ivx) * si(i, j, k, 3) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 3) &
+                          + w(i, j + 1, k, ivx) * sj(i, j, k, 3) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 3) &
+                          + w(i, j, k + 1, ivx) * sk(i, j, k, 3) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 3)
+
+                    ! Idem for the gradient of v.
+
+                    vvx = w(i + 1, j, k, ivy) * si(i, j, k, 1) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 1) &
+                          + w(i, j + 1, k, ivy) * sj(i, j, k, 1) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 1) &
+                          + w(i, j, k + 1, ivy) * sk(i, j, k, 1) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 1)
+                    vvy = w(i + 1, j, k, ivy) * si(i, j, k, 2) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 2) &
+                          + w(i, j + 1, k, ivy) * sj(i, j, k, 2) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 2) &
+                          + w(i, j, k + 1, ivy) * sk(i, j, k, 2) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 2)
+                    vvz = w(i + 1, j, k, ivy) * si(i, j, k, 3) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 3) &
+                          + w(i, j + 1, k, ivy) * sj(i, j, k, 3) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 3) &
+                          + w(i, j, k + 1, ivy) * sk(i, j, k, 3) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 3)
+
+                    ! And for the gradient of w.
+
+                    wwx = w(i + 1, j, k, ivz) * si(i, j, k, 1) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 1) &
+                          + w(i, j + 1, k, ivz) * sj(i, j, k, 1) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 1) &
+                          + w(i, j, k + 1, ivz) * sk(i, j, k, 1) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 1)
+                    wwy = w(i + 1, j, k, ivz) * si(i, j, k, 2) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 2) &
+                          + w(i, j + 1, k, ivz) * sj(i, j, k, 2) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 2) &
+                          + w(i, j, k + 1, ivz) * sk(i, j, k, 2) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 2)
+                    wwz = w(i + 1, j, k, ivz) * si(i, j, k, 3) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 3) &
+                          + w(i, j + 1, k, ivz) * sj(i, j, k, 3) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 3) &
+                          + w(i, j, k + 1, ivz) * sk(i, j, k, 3) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 3)
+
+                    ! Compute the components of the stress tensor.
+                    ! The combination of the current scaling of the velocity
+                    ! gradients (2*vol) and the definition of the stress tensor,
+                    ! leads to the factor 1/(4*vol).
+
+                    fact = fourth / vol(i, j, k)
+
+                    ! -- Calcs for strain --
+                    sxx = two * fact * uux
+                    syy = two * fact * vvy
+                    szz = two * fact * wwz
+
+                    sxy = fact * (uuy + vvx)
+                    sxz = fact * (uuz + wwx)
+                    syz = fact * (vvz + wwy)
+
+                    ! Compute 2/3 * divergence of velocity squared
+
+                    div2 = f23 * (sxx + syy + szz)**2
+
+                    ! Compute strain production term
+
+                    strainMag2 = two * (sxy**2 + sxz**2 + syz**2) &
+                                 + sxx**2 + syy**2 + szz**2
+
+                    ! -- Calcs for vorticity --
+
+                    ! Compute the three components of the vorticity vector.
+                    ! Substract the part coming from the rotating frame.
+
+                    vortx = two * fact * (wwy - vvz) - two * omegax
+                    vorty = two * fact * (uuz - wwx) - two * omegay
+                    vortz = two * fact * (vvx - uuy) - two * omegaz
+
+                    if (turbProd == strain) then
+                        sqrtProd = sqrt(max(two * strainMag2 - div2, eps))
+                    else
+                        sqrtProd = sqrt(vortx**2 + vorty**2 + vortz**2)
+                    end if
+
+                    ! Compute the laminar kinematic viscosity, the inverse of
+                    ! wall distance squared, the ratio chi (ratio of nuTilde
+                    ! and nu) and the functions fv1 and fv2. The latter corrects
+                    ! the production term near a viscous wall.
+
+                    nu = rlv(i, j, k) / w(i, j, k, irho)
+                    dist2Inv = one / (d2Wall(i, j, k)**2)
+                    chi = w(i, j, k, itu1) / nu
+                    chi2 = chi * chi
+                    chi3 = chi * chi2
+                    fv1 = chi3 / (chi3 + cv13)
+                    fv2 = one - chi / (one + chi * fv1)
+
+                    ! The function ft2, which is designed to keep a laminar
+                    ! solution laminar. When running in fully turbulent mode
+                    ! this function should be set to 0.0.
+
+                    ft2 = zero
+                    if (useft2SA) then
+                        ft2 = rsaCt3 * exp(-rsaCt4 * chi2)
+                    end if
+
+                    ! Correct the production term to account for the influence
+                    ! of the wall.
 
-  subroutine blockResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall, nn, sps)
+                    sst = sqrtProd + w(i, j, k, itu1) * fv2 * kar2Inv * dist2Inv
+
+                    ! Add rotation term (useRotationSA defined in inputParams.F90)
 
-    use constants
-    use fluxes, only : inviscidCentralFlux_block=>inviscidCentralFlux, &
-         inviscidDissFluxScalar_block=>inviscidDissFluxScalar, &
-         inviscidDissFluxMatrix_block=>inviscidDissFluxMatrix, &
-         inviscidUpwindFlux_block=>inviscidUpwindFlux, &
-         inviscidDissFluxScalarApprox_block=>inviscidDissFluxScalarApprox, &
-         inviscidDissFluxMatrixApprox_block=>inviscidDissFluxMatrixApprox, &
-         viscousFlux_block=>viscousFlux, &
-         viscousFluxApprox_block=>viscousFluxApprox
-    use solverUtils, only : timeStep_block
-    use flowVarRefState, only : nwf, nw, viscous, nt1, nt2
-    use inputPhysics , only : equationMode, equations, turbModel
-    use residuals, only : initres_block
-    use sa, only : sa_block
-    use adjointExtra, only : sumDwAndFw_block=>sumDwAndFw
-    use inputDiscretization, only : spaceDiscr
-    use flowUtils, only : allNodalGradients_block=>allNodalGradients, &
-         computeSpeedOfSoundSquared_block=>computeSpeedOfSoundSquared
-
-    implicit none
-    ! Input
-    logical, intent(in) :: dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall
-    integer(kind=intType), intent(in) :: nn, sps
-
-    ! Working:
-    integer(kind=intType) :: i, j, k, lStart, lEnd
-
-    ! Compute the ranges of the residuals we are dealing with:
-    if (flowRes .and. turbRes) then
-       lStart = 1
-       lEnd =  nw
-
-    else if (flowRes .and. (.not. turbRes)) then
-       lStart = 1
-       lEnd = nwf
-
-    else if ((.not. flowRes) .and. turbres) then
-       lStart = nt1
-       lEnd   = nt2
-    end if
-
-    ! Compute time step
-    call timestep_block(.not. updateIntermed)
-
-    call initres_block(lStart, lEnd, nn, sps) ! Initialize only the Turblent Variables
-
-    fw = zero
-
-    ! Possible Turblent Equations
-    if(equations == RANSEquations .and. turbRes) then
-       ! Compute the skin-friction velocity (wall functions only)
-       !call computeUtau_block
-
-       ! Now call the selected turbulence model
-       select case (turbModel)
-       case (spalartAllmaras)
-          call sa_block(.true.)
-       end select
-    endif
-
-    if (flowRes) then
-
-       call inviscidCentralFlux_block
-       if (dissApprox) then
-          select case (spaceDiscr)
-          case (dissScalar)
-             call inviscidDissFluxScalarApprox_block
-          case (dissMatrix)
-             call inviscidDissFluxMatrixApprox_block
-          case (upwind)
-             call inviscidUpwindFlux_block(.True.)
-          end select
-       else
-          select case (spaceDiscr)
-          case (dissScalar)
-             call inviscidDissFluxScalar_block
-          case (dissMatrix)
-             call inviscidDissFluxMatrix_block
-          case (upwind)
-             call inviscidUpwindFlux_block(.True.)
-          end select
-       end if
-
-       if (viscous) then
-          call computeSpeedOfSoundSquared_block
-          if (viscApprox) then
-             call viscousFluxApprox_block
-          else
-             call allNodalGradients_block
-             call viscousFlux_block
-          end if
-       end if
-
-       call sumDwAndFw_block
-    end if
-  end subroutine blockResCore
-
-  subroutine metrics
-    ! ---------------------------------------------
-    !              Metric computation
-    ! ---------------------------------------------
+                    if (useRotationSA) then
+                        sst = sst + rsaCrot * min(zero, sqrt(two * strainMag2))
+                    end if
 
-    use constants
-    use blockPointers, only : rightHanded
-    implicit none
+                    ! Make sure that this term remains positive
+                    ! (the function fv2 is negative between chi = 1 and 18.4,
+                    ! which can cause sst to go negative, which is undesirable).
 
-    integer(kind=intType) :: i, j, k, l, m, n
-    real(kind=realType), dimension(3) :: v1, v2
-    real(kind=realType) :: fact
+                    sst = max(sst, xminn)
 
-    ! Projected areas of cell faces in the i direction.
-    if (rightHanded) then
-       fact = half
-    else
-       fact = -half
-    end if
-    do k=1,ke
-       n = k -1
-       do j=1,je
-          m = j -1
-          do i=0,ie
+                    ! Compute the function fw. The argument rr is cut off at 10
+                    ! to avoid numerical problems. This is ok, because the
+                    ! asymptotical value of fw is then already reached.
 
-             ! Determine the two diagonal vectors of the face.
+                    rr = w(i, j, k, itu1) * kar2Inv * dist2Inv / sst
+                    rr = min(rr, 10.0_realType)
+                    gg = rr + rsaCw2 * (rr**6 - rr)
+                    gg6 = gg**6
+                    termFw = ((one + cw36) / (gg6 + cw36))**sixth
+                    fwSa = gg * termFw
 
-             v1(1) = x(i,j,n,1) - x(i,m,k,1)
-             v1(2) = x(i,j,n,2) - x(i,m,k,2)
-             v1(3) = x(i,j,n,3) - x(i,m,k,3)
+                    ! Compute the source term; some terms are saved for the
+                    ! linearization. The source term is stored in dvt.
 
-             v2(1) = x(i,j,k,1) - x(i,m,n,1)
-             v2(2) = x(i,j,k,2) - x(i,m,n,2)
-             v2(3) = x(i,j,k,3) - x(i,m,n,3)
+                    term1 = rsaCb1 * (one - ft2) * sqrtProd * term1Fact
+                    term2 = dist2Inv * (kar2Inv * rsaCb1 * ((one - ft2) * fv2 + ft2) &
+                                        - rsaCw1 * fwSa)
 
-             ! The face normal, which is the cross product of the two
-             ! diagonal vectors times fact; remember that fact is
-             ! either -0.5 or 0.5.
+                    dw(i, j, k, itu1) = dw(i, j, k, itu1) + (term1 + term2 * w(i, j, k, itu1)) * w(i, j, k, itu1)
 
-             si(i,j,k,1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-             si(i,j,k,2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-             si(i,j,k,3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
+                end do
+            end do
+        end do
+    end subroutine saSource
+
+    subroutine saViscous
+        ! ---------------------------------------------
+        !                    SA Viscous Term
+        ! ---------------------------------------------
+
+        use constants
+        use sa, only: cv13, kar2Inv, cw36, cb3Inv
+        use paramTurb
+        implicit none
+
+        ! Variables for sa Viscous
+        real(kind=realType) :: voli, volmi, volpi, xm, ym, zm, xp, yp, zp
+        real(kind=realType) :: xa, ya, za, ttm, ttp, cnud, cam, cap
+        real(kind=realType) :: nutm, nutp, num, nup, cdm, cdp
+        real(kind=realType) :: c1m, c1p, c10, b1, c1, d1, qs, nu
+        integer(Kind=intType) :: i, j, k
+
+        ! Set model constants
+        cv13 = rsaCv1**3
+        kar2Inv = one / (rsaK**2)
+        cw36 = rsaCw3**6
+        cb3Inv = one / rsaCb3
+
+        !
+        !       Viscous terms in k-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in zeta-direction, i.e. along the
+                    ! line k = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
+
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in zeta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! Furthermore, the grad(nu)**2 has been rewritten as
+                    ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
+                    ! The second derivative in zeta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
+                    ! In this way the metric can be taken into account.
+
+                    ! Compute the diffusion coefficients multiplying the nodes
+                    ! k+1, k and k-1 in the second derivative. Make sure that
+                    ! these coefficients are nonnegative.
+
+                    cnud = -rsaCb2 * w(i, j, k, itu1) * cb3Inv
+                    cam = ttm * cnud
+                    cap = ttp * cnud
+
+                    nutm = half * (w(i, j, k - 1, itu1) + w(i, j, k, itu1))
+                    nutp = half * (w(i, j, k + 1, itu1) + w(i, j, k, itu1))
+                    nu = rlv(i, j, k) / w(i, j, k, irho)
+                    num = half * (rlv(i, j, k - 1) / w(i, j, k - 1, irho) + nu)
+                    nup = half * (rlv(i, j, k + 1) / w(i, j, k + 1, irho) + nu)
+                    cdm = (num + (one + rsaCb2) * nutm) * ttm * cb3Inv
+                    cdp = (nup + (one + rsaCb2) * nutp) * ttp * cb3Inv
+
+                    c1m = max(cdm + cam, zero)
+                    c1p = max(cdp + cap, zero)
+                    c10 = c1m + c1p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, c1 and d1.
+
+                    dw(i, j, k, itu1) = dw(i, j, k, itu1) + c1m * w(i, j, k - 1, itu1) &
+                                        - c10 * w(i, j, k, itu1) + c1p * w(i, j, k + 1, itu1)
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in j-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in eta-direction, i.e. along the
+                    ! line j = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
+
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in eta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! Furthermore, the grad(nu)**2 has been rewritten as
+                    ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
+                    ! The second derivative in eta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
+                    ! In this way the metric can be taken into account.
+
+                    ! Compute the diffusion coefficients multiplying the nodes
+                    ! j+1, j and j-1 in the second derivative. Make sure that
+                    ! these coefficients are nonnegative.
+
+                    cnud = -rsaCb2 * w(i, j, k, itu1) * cb3Inv
+                    cam = ttm * cnud
+                    cap = ttp * cnud
+
+                    nutm = half * (w(i, j - 1, k, itu1) + w(i, j, k, itu1))
+                    nutp = half * (w(i, j + 1, k, itu1) + w(i, j, k, itu1))
+                    nu = rlv(i, j, k) / w(i, j, k, irho)
+                    num = half * (rlv(i, j - 1, k) / w(i, j - 1, k, irho) + nu)
+                    nup = half * (rlv(i, j + 1, k) / w(i, j + 1, k, irho) + nu)
+                    cdm = (num + (one + rsaCb2) * nutm) * ttm * cb3Inv
+                    cdp = (nup + (one + rsaCb2) * nutp) * ttp * cb3Inv
+
+                    c1m = max(cdm + cam, zero)
+                    c1p = max(cdp + cap, zero)
+                    c10 = c1m + c1p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, c1 and d1.
+
+                    dw(i, j, k, itu1) = dw(i, j, k, itu1) + c1m * w(i, j - 1, k, itu1) &
+                                        - c10 * w(i, j, k, itu1) + c1p * w(i, j + 1, k, itu1)
 
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in xi-direction, i.e. along the
+                    ! line i = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
+
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in xi-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! Furthermore, the grad(nu)**2 has been rewritten as
+                    ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
+                    ! The second derivative in xi-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
+                    ! In this way the metric can be taken into account.
+
+                    ! Compute the diffusion coefficients multiplying the nodes
+                    ! i+1, i and i-1 in the second derivative. Make sure that
+                    ! these coefficients are nonnegative.
+
+                    cnud = -rsaCb2 * w(i, j, k, itu1) * cb3Inv
+                    cam = ttm * cnud
+                    cap = ttp * cnud
+
+                    nutm = half * (w(i - 1, j, k, itu1) + w(i, j, k, itu1))
+                    nutp = half * (w(i + 1, j, k, itu1) + w(i, j, k, itu1))
+                    nu = rlv(i, j, k) / w(i, j, k, irho)
+                    num = half * (rlv(i - 1, j, k) / w(i - 1, j, k, irho) + nu)
+                    nup = half * (rlv(i + 1, j, k) / w(i + 1, j, k, irho) + nu)
+                    cdm = (num + (one + rsaCb2) * nutm) * ttm * cb3Inv
+                    cdp = (nup + (one + rsaCb2) * nutp) * ttp * cb3Inv
+
+                    c1m = max(cdm + cam, zero)
+                    c1p = max(cdp + cap, zero)
+                    c10 = c1m + c1p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, c1 and d1.
+
+                    dw(i, j, k, itu1) = dw(i, j, k, itu1) + c1m * w(i - 1, j, k, itu1) &
+                                        - c10 * w(i, j, k, itu1) + c1p * w(i + 1, j, k, itu1)
+                end do
+            end do
+        end do
+    end subroutine saViscous
 
-    ! Projected areas of cell faces in the j direction.
+    subroutine saAdvection
+        ! ---------------------------------------------
+        !                SA Advection
+        ! ---------------------------------------------
+        use constants
+        use inputDiscretization, only: orderTurb
+        use iteration, only: groundlevel
+        use turbMod, only: secondOrd
+        implicit none
 
-    do k=1,ke
-       n = k -1
-       do j=0,je
-          do i=1,ie
-             l = i -1
+        ! Variables for sa Advection
+        real(kind=realType) :: uu, dwt, dwtm1, dwtp1, dwti, dwtj, dwtk, qs
+        real(kind=realType) :: voli, xa, ya, za
+        integer(kind=intType), parameter :: nAdv = 1
+        integer(kind=intType) :: offset, i, j, k, ii, jj
 
-             ! Determine the two diagonal vectors of the face.
+        ! Determine whether or not a second order discretization for the
+        ! advective terms must be used.
+        secondOrd = .false.
+        if (groundLevel == 1_intType .and. &
+            orderTurb == secondOrder) secondOrd = .true.
 
-             v1(1) = x(i,j,n,1) - x(l,j,k,1)
-             v1(2) = x(i,j,n,2) - x(l,j,k,2)
-             v1(3) = x(i,j,n,3) - x(l,j,k,3)
+        offset = itu1 - 1
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
 
-             v2(1) = x(l,j,n,1) - x(i,j,k,1)
-             v2(2) = x(l,j,n,2) - x(i,j,k,2)
-             v2(3) = x(l,j,n,3) - x(i,j,k,3)
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of k and k-1,
 
-             ! The face normal, which is the cross product of the two
-             ! diagonal vectors times fact; remember that fact is
-             ! either -0.5 or 0.5.
+                    voli = half / vol(i, j, k)
+                    qs = (sFaceK(i, j, k) + sFaceK(i, j, k - 1)) * voli
 
-             sj(i,j,k,1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-             sj(i,j,k,2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-             sj(i,j,k,3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
+                    ! Compute the normal velocity, where the normal direction
+                    ! is taken as the average of faces k and k-1.
 
-          enddo
-       enddo
-    enddo
+                    xa = (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
 
-    ! Projected areas of cell faces in the k direction.
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
 
-    do k=0,ke
-       do j=1,je
-          m = j -1
-          do i=1,ie
-             l = i -1
+                    ! Determine the situation we are having here, i.e. positive
+                    ! or negative normal velocity.
 
-             ! Determine the two diagonal vectors of the face.
+                    velKdir: if (uu > zero) then
 
-             v1(1) = x(i,j,k,1) - x(l,m,k,1)
-             v1(2) = x(i,j,k,2) - x(l,m,k,2)
-             v1(3) = x(i,j,k,3) - x(l,m,k,3)
+                        ! Velocity has a component in positive k-direction.
+                        ! Loop over the number of advection equations.
 
-             v2(1) = x(l,j,k,1) - x(i,m,k,1)
-             v2(2) = x(l,j,k,2) - x(i,m,k,2)
-             v2(3) = x(l,j,k,3) - x(i,m,k,3)
+                        do ii = 1, nAdv
 
-             ! The face normal, which is the cross product of the two
-             ! diagonal vectors times fact; remember that fact is
-             ! either -0.5 or 0.5.
+                            ! Set the value of jj such that it corresponds to the
+                            ! turbulent entry in w.
 
-             sk(i,j,k,1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-             sk(i,j,k,2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-             sk(i,j,k,3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
+                            jj = ii + offset
 
-          enddo
-       enddo
-    enddo
-  end subroutine metrics
+                            ! Check whether a first or a second order discretization
+                            ! must be used.
 
-  subroutine initRes(varStart, varEnd)
-    ! ---------------------------------------------
-    !                     Init Res
-    ! ---------------------------------------------
+                            if (secondOrd) then
 
-    use constants
-    implicit none
+                                ! Second order; store the three differences for the
+                                ! discretization of the derivative in k-direction.
 
-    integer(kind=intType) :: varStart, varEnd
-    ! Obviously this needs to be more complex for the actual code.
-    dw(:, :, :, varStart:varEnd) = zero
+                                dwtm1 = w(i, j, k - 1, jj) - w(i, j, k - 2, jj)
+                                dwt = w(i, j, k, jj) - w(i, j, k - 1, jj)
+                                dwtp1 = w(i, j, k + 1, jj) - w(i, j, k, jj)
 
-  end subroutine initRes
+                                ! Construct the derivative in this cell center. This
+                                ! is the first order upwind derivative with two
+                                ! nonlinear corrections.
 
-  subroutine saSource
-    ! ---------------------------------------------
-    !                    SA Source Term
-    ! ---------------------------------------------
+                                dwtk = dwt
 
-    use constants
-    use paramTurb
-    use blockPointers, only : sectionID
-    use inputPhysics, only :useft2SA, useRotationSA, turbProd, equations
-    use inputDiscretization, only : approxSA
-    use section, only : sections
-    use sa, only : cv13, kar2Inv, cw36, cb3Inv
-    use flowvarRefState, only : timeRef
-
-    implicit none
-
-    ! Variables for sa Souce
-    real(kind=realType) :: fv1, fv2, ft2
-    real(kind=realType) :: sst, nu, dist2Inv, chi, chi2, chi3
-    real(kind=realType) :: rr, gg, gg6, termFw, fwSa, term1, term2
-    real(kind=realType) :: dfv1, dfv2, dft2, drr, dgg, dfw, sqrtProd
-    real(kind=realType) :: uux, uuy, uuz, vvx, vvy, vvz, wwx, wwy, wwz
-    real(kind=realType) :: div2, fact, sxx, syy, szz, sxy, sxz, syz
-    real(kind=realType) :: vortx, vorty, vortz
-    real(kind=realType) :: omegax, omegay, omegaz
-    real(kind=realType) :: strainMag2, prod
-    real(kind=realType), parameter :: xminn = 1.e-10_realType
-    real(kind=realType), parameter :: f23 = two*third
-    integer(kind=intType) :: i, j, k
-    real(kind=realType) :: term1Fact
-
-    ! Set model constants
-    cv13    = rsaCv1**3
-    kar2Inv = one/(rsaK**2)
-    cw36    = rsaCw3**6
-    cb3Inv  = one/rsaCb3
-
-    ! set the approximate multiplier here
-    term1Fact = one
-    if (approxSA) term1Fact = zero
-
-    ! Determine the non-dimensional wheel speed of this block.
-
-    omegax = timeRef*sections(sectionID)%rotRate(1)
-    omegay = timeRef*sections(sectionID)%rotRate(2)
-    omegaz = timeRef*sections(sectionID)%rotRate(3)
-    do k=2, kl
-       do j=2, jl
-          do i=2, il
-
-             ! Compute the gradient of u in the cell center. Use is made
-             ! of the fact that the surrounding normals sum up to zero,
-             ! such that the cell i,j,k does not give a contribution.
-             ! The gradient is scaled by the factor 2*vol.
-
-             uux = w(i+1,j,k,ivx)*si(i,j,k,1) - w(i-1,j,k,ivx)*si(i-1,j,k,1) &
-                  + w(i,j+1,k,ivx)*sj(i,j,k,1) - w(i,j-1,k,ivx)*sj(i,j-1,k,1) &
-                  + w(i,j,k+1,ivx)*sk(i,j,k,1) - w(i,j,k-1,ivx)*sk(i,j,k-1,1)
-             uuy = w(i+1,j,k,ivx)*si(i,j,k,2) - w(i-1,j,k,ivx)*si(i-1,j,k,2) &
-                  + w(i,j+1,k,ivx)*sj(i,j,k,2) - w(i,j-1,k,ivx)*sj(i,j-1,k,2) &
-                  + w(i,j,k+1,ivx)*sk(i,j,k,2) - w(i,j,k-1,ivx)*sk(i,j,k-1,2)
-             uuz = w(i+1,j,k,ivx)*si(i,j,k,3) - w(i-1,j,k,ivx)*si(i-1,j,k,3) &
-                  + w(i,j+1,k,ivx)*sj(i,j,k,3) - w(i,j-1,k,ivx)*sj(i,j-1,k,3) &
-                  + w(i,j,k+1,ivx)*sk(i,j,k,3) - w(i,j,k-1,ivx)*sk(i,j,k-1,3)
-
-             ! Idem for the gradient of v.
-
-             vvx = w(i+1,j,k,ivy)*si(i,j,k,1) - w(i-1,j,k,ivy)*si(i-1,j,k,1) &
-                  + w(i,j+1,k,ivy)*sj(i,j,k,1) - w(i,j-1,k,ivy)*sj(i,j-1,k,1) &
-                  + w(i,j,k+1,ivy)*sk(i,j,k,1) - w(i,j,k-1,ivy)*sk(i,j,k-1,1)
-             vvy = w(i+1,j,k,ivy)*si(i,j,k,2) - w(i-1,j,k,ivy)*si(i-1,j,k,2) &
-                  + w(i,j+1,k,ivy)*sj(i,j,k,2) - w(i,j-1,k,ivy)*sj(i,j-1,k,2) &
-                  + w(i,j,k+1,ivy)*sk(i,j,k,2) - w(i,j,k-1,ivy)*sk(i,j,k-1,2)
-             vvz = w(i+1,j,k,ivy)*si(i,j,k,3) - w(i-1,j,k,ivy)*si(i-1,j,k,3) &
-                  + w(i,j+1,k,ivy)*sj(i,j,k,3) - w(i,j-1,k,ivy)*sj(i,j-1,k,3) &
-                  + w(i,j,k+1,ivy)*sk(i,j,k,3) - w(i,j,k-1,ivy)*sk(i,j,k-1,3)
-
-             ! And for the gradient of w.
-
-             wwx = w(i+1,j,k,ivz)*si(i,j,k,1) - w(i-1,j,k,ivz)*si(i-1,j,k,1) &
-                  + w(i,j+1,k,ivz)*sj(i,j,k,1) - w(i,j-1,k,ivz)*sj(i,j-1,k,1) &
-                  + w(i,j,k+1,ivz)*sk(i,j,k,1) - w(i,j,k-1,ivz)*sk(i,j,k-1,1)
-             wwy = w(i+1,j,k,ivz)*si(i,j,k,2) - w(i-1,j,k,ivz)*si(i-1,j,k,2) &
-                  + w(i,j+1,k,ivz)*sj(i,j,k,2) - w(i,j-1,k,ivz)*sj(i,j-1,k,2) &
-                  + w(i,j,k+1,ivz)*sk(i,j,k,2) - w(i,j,k-1,ivz)*sk(i,j,k-1,2)
-             wwz = w(i+1,j,k,ivz)*si(i,j,k,3) - w(i-1,j,k,ivz)*si(i-1,j,k,3) &
-                  + w(i,j+1,k,ivz)*sj(i,j,k,3) - w(i,j-1,k,ivz)*sj(i,j-1,k,3) &
-                  + w(i,j,k+1,ivz)*sk(i,j,k,3) - w(i,j,k-1,ivz)*sk(i,j,k-1,3)
-
-             ! Compute the components of the stress tensor.
-             ! The combination of the current scaling of the velocity
-             ! gradients (2*vol) and the definition of the stress tensor,
-             ! leads to the factor 1/(4*vol).
-
-             fact = fourth/vol(i,j,k)
-
-             ! -- Calcs for strain --
-             sxx = two*fact*uux
-             syy = two*fact*vvy
-             szz = two*fact*wwz
-
-             sxy = fact*(uuy + vvx)
-             sxz = fact*(uuz + wwx)
-             syz = fact*(vvz + wwy)
-
-             ! Compute 2/3 * divergence of velocity squared
-
-             div2 = f23*(sxx+syy+szz)**2
-
-             ! Compute strain production term
-
-             strainMag2 = two*(sxy**2 + sxz**2 + syz**2) &
-                  +           sxx**2 + syy**2 + szz**2
-
-             ! -- Calcs for vorticity --
-
-             ! Compute the three components of the vorticity vector.
-             ! Substract the part coming from the rotating frame.
-
-             vortx = two*fact*(wwy - vvz) - two*omegax
-             vorty = two*fact*(uuz - wwx) - two*omegay
-             vortz = two*fact*(vvx - uuy) - two*omegaz
-
-             if (turbProd == strain) then
-                sqrtProd = sqrt(max(two*strainMag2-div2, eps))
-             else
-                sqrtProd = sqrt(vortx**2 + vorty**2 + vortz**2)
-             end if
-
-             ! Compute the laminar kinematic viscosity, the inverse of
-             ! wall distance squared, the ratio chi (ratio of nuTilde
-             ! and nu) and the functions fv1 and fv2. The latter corrects
-             ! the production term near a viscous wall.
-
-             nu       = rlv(i,j,k)/w(i,j,k,irho)
-             dist2Inv = one/(d2Wall(i,j,k)**2)
-             chi      = w(i,j,k,itu1)/nu
-             chi2     = chi*chi
-             chi3     = chi*chi2
-             fv1      = chi3/(chi3+cv13)
-             fv2      = one - chi/(one + chi*fv1)
-
-             ! The function ft2, which is designed to keep a laminar
-             ! solution laminar. When running in fully turbulent mode
-             ! this function should be set to 0.0.
-
-             ft2 = zero
-             if (useft2SA) then
-                ft2 = rsaCt3*exp(-rsaCt4*chi2)
-             end if
-
-             ! Correct the production term to account for the influence
-             ! of the wall.
-
-             sst = sqrtProd + w(i,j,k,itu1)*fv2*kar2Inv*dist2Inv
-
-             ! Add rotation term (useRotationSA defined in inputParams.F90)
-
-             if (useRotationSA) then
-                sst = sst + rsaCrot*min(zero,sqrt(two*strainMag2))
-             end if
+                                if (dwt * dwtp1 > zero) then
+                                    if (abs(dwt) < abs(dwtp1)) then
+                                        dwtk = dwtk + half * dwt
+                                    else
+                                        dwtk = dwtk + half * dwtp1
+                                    end if
+                                end if
+
+                                if (dwt * dwtm1 > zero) then
+                                    if (abs(dwt) < abs(dwtm1)) then
+                                        dwtk = dwtk - half * dwt
+                                    else
+                                        dwtk = dwtk - half * dwtm1
+                                    end if
+                                end if
 
-             ! Make sure that this term remains positive
-             ! (the function fv2 is negative between chi = 1 and 18.4,
-             ! which can cause sst to go negative, which is undesirable).
+                            else
 
-             sst = max(sst,xminn)
+                                ! 1st order upwind scheme.
 
-             ! Compute the function fw. The argument rr is cut off at 10
-             ! to avoid numerical problems. This is ok, because the
-             ! asymptotical value of fw is then already reached.
+                                dwtk = w(i, j, k, jj) - w(i, j, k - 1, jj)
 
-             rr     = w(i,j,k,itu1)*kar2Inv*dist2Inv/sst
-             rr     = min(rr,10.0_realType)
-             gg     = rr + rsaCw2*(rr**6 - rr)
-             gg6    = gg**6
-             termFw = ((one + cw36)/(gg6 + cw36))**sixth
-             fwSa   = gg*termFw
+                            end if
 
-             ! Compute the source term; some terms are saved for the
-             ! linearization. The source term is stored in dvt.
+                            ! Update the residual. The convective term must be
+                            ! substracted, because it appears on the other side of
+                            ! the equation as the source and viscous terms.
 
-             term1 = rsaCb1*(one-ft2)*sqrtProd*term1Fact
-             term2 = dist2Inv*(kar2Inv*rsaCb1*((one-ft2)*fv2 + ft2) &
-                  -           rsaCw1*fwSa)
+                            dw(i, j, k, itu1 + ii - 1) = dw(i, j, k, itu1 + ii - 1) - uu * dwtk
+                        end do
 
-             dw(i, j, k, itu1) = dw(i, j, k, itu1) + (term1 + term2*w(i,j,k,itu1))*w(i,j,k,itu1)
+                        else velKdir
 
-          enddo
-       enddo
-    enddo
-  end subroutine saSource
+                        ! Velocity has a component in negative k-direction.
+                        ! Loop over the number of advection equations
+                        do ii = 1, nAdv
 
-  subroutine saViscous
-    ! ---------------------------------------------
-    !                    SA Viscous Term
-    ! ---------------------------------------------
+                            ! Set the value of jj such that it corresponds to the
+                            ! turbulent entry in w.
 
-    use constants
-    use sa, only : cv13, kar2Inv, cw36, cb3Inv
-    use paramTurb
-    implicit none
-
-    ! Variables for sa Viscous
-    real(kind=realType) :: voli, volmi, volpi, xm, ym, zm, xp, yp, zp
-    real(kind=realType) :: xa, ya, za, ttm, ttp, cnud, cam, cap
-    real(kind=realType) :: nutm, nutp, num, nup, cdm, cdp
-    real(kind=realType) :: c1m, c1p, c10, b1, c1, d1, qs, nu
-    integer(Kind=intType) :: i, j, k
-
-    ! Set model constants
-    cv13    = rsaCv1**3
-    kar2Inv = one/(rsaK**2)
-    cw36    = rsaCw3**6
-    cb3Inv  = one/rsaCb3
-
-    !
-    !       Viscous terms in k-direction.
-    !
-    do k=2, kl
-       do j=2, jl
-          do i=2, il
-
-             ! Compute the metrics in zeta-direction, i.e. along the
-             ! line k = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in zeta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! Furthermore, the grad(nu)**2 has been rewritten as
-             ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
-             ! The second derivative in zeta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
-             ! In this way the metric can be taken into account.
-
-             ! Compute the diffusion coefficients multiplying the nodes
-             ! k+1, k and k-1 in the second derivative. Make sure that
-             ! these coefficients are nonnegative.
-
-             cnud = -rsaCb2*w(i,j,k,itu1)*cb3Inv
-             cam  =  ttm*cnud
-             cap  =  ttp*cnud
-
-             nutm = half*(w(i,j,k-1,itu1) + w(i,j,k,itu1))
-             nutp = half*(w(i,j,k+1,itu1) + w(i,j,k,itu1))
-             nu   = rlv(i,j,k)/w(i,j,k,irho)
-             num  = half*(rlv(i,j,k-1)/w(i,j,k-1,irho) + nu)
-             nup  = half*(rlv(i,j,k+1)/w(i,j,k+1,irho) + nu)
-             cdm  = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-             cdp  = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
-
-             c1m = max(cdm+cam, zero)
-             c1p = max(cdp+cap, zero)
-             c10 = c1m + c1p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, c1 and d1.
-
-             dw(i,j,k,itu1) = dw(i,j,k,itu1)      + c1m*w(i,j,k-1,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i,j,k+1,itu1)
-          end do
-       enddo
-    enddo
-    !
-    !       Viscous terms in j-direction.
-    !
-    do k=2, kl
-       do j=2, jl
-          do i=2, il
-
-             ! Compute the metrics in eta-direction, i.e. along the
-             ! line j = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in eta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! Furthermore, the grad(nu)**2 has been rewritten as
-             ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
-             ! The second derivative in eta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
-             ! In this way the metric can be taken into account.
-
-             ! Compute the diffusion coefficients multiplying the nodes
-             ! j+1, j and j-1 in the second derivative. Make sure that
-             ! these coefficients are nonnegative.
-
-             cnud = -rsaCb2*w(i,j,k,itu1)*cb3Inv
-             cam  =  ttm*cnud
-             cap  =  ttp*cnud
-
-             nutm = half*(w(i,j-1,k,itu1) + w(i,j,k,itu1))
-             nutp = half*(w(i,j+1,k,itu1) + w(i,j,k,itu1))
-             nu   = rlv(i,j,k)/w(i,j,k,irho)
-             num  = half*(rlv(i,j-1,k)/w(i,j-1,k,irho) + nu)
-             nup  = half*(rlv(i,j+1,k)/w(i,j+1,k,irho) + nu)
-             cdm  = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-             cdp  = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
-
-             c1m = max(cdm+cam, zero)
-             c1p = max(cdp+cap, zero)
-             c10 = c1m + c1p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, c1 and d1.
-
-             dw(i,j,k,itu1) = dw(i,j,k,itu1)      + c1m*w(i,j-1,k,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i,j+1,k,itu1)
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Viscous terms in i-direction.
-    !
-    do k=2, kl
-       do j=2, jl
-          do i=2, il
-
-             ! Compute the metrics in xi-direction, i.e. along the
-             ! line i = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
-
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in xi-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! Furthermore, the grad(nu)**2 has been rewritten as
-             ! div(nu grad(nu)) - nu div(grad nu) to enhance stability.
-             ! The second derivative in xi-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
-             ! In this way the metric can be taken into account.
-
-             ! Compute the diffusion coefficients multiplying the nodes
-             ! i+1, i and i-1 in the second derivative. Make sure that
-             ! these coefficients are nonnegative.
-
-             cnud = -rsaCb2*w(i,j,k,itu1)*cb3Inv
-             cam  =  ttm*cnud
-             cap  =  ttp*cnud
-
-             nutm = half*(w(i-1,j,k,itu1) + w(i,j,k,itu1))
-             nutp = half*(w(i+1,j,k,itu1) + w(i,j,k,itu1))
-             nu   = rlv(i,j,k)/w(i,j,k,irho)
-             num  = half*(rlv(i-1,j,k)/w(i-1,j,k,irho) + nu)
-             nup  = half*(rlv(i+1,j,k)/w(i+1,j,k,irho) + nu)
-             cdm  = (num + (one + rsaCb2)*nutm)*ttm*cb3Inv
-             cdp  = (nup + (one + rsaCb2)*nutp)*ttp*cb3Inv
-
-             c1m = max(cdm+cam, zero)
-             c1p = max(cdp+cap, zero)
-             c10 = c1m + c1p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, c1 and d1.
-
-             dw(i,j,k,itu1) = dw(i,j,k,itu1)      + c1m*w(i-1,j,k,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i+1,j,k,itu1)
-          enddo
-       enddo
-    enddo
-  end subroutine saViscous
-
-  subroutine saAdvection
-    ! ---------------------------------------------
-    !                SA Advection
-    ! ---------------------------------------------
-    use constants
-    use inputDiscretization, only : orderTurb
-    use iteration, only : groundlevel
-    use turbMod, only : secondOrd
-    implicit none
+                            jj = ii + offset
+
+                            ! Check whether a first or a second order discretization
+                            ! must be used.
+
+                            if (secondOrd) then
+
+                                ! Store the three differences for the discretization of
+                                ! the derivative in k-direction.
+
+                                dwtm1 = w(i, j, k, jj) - w(i, j, k - 1, jj)
+                                dwt = w(i, j, k + 1, jj) - w(i, j, k, jj)
+                                dwtp1 = w(i, j, k + 2, jj) - w(i, j, k + 1, jj)
+
+                                ! Construct the derivative in this cell center. This is
+                                ! the first order upwind derivative with two nonlinear
+                                ! corrections.
 
-    ! Variables for sa Advection
-    real(kind=realType) :: uu, dwt, dwtm1, dwtp1, dwti, dwtj, dwtk, qs
-    real(kind=realType) :: voli, xa, ya, za
-    integer(kind=intType), parameter :: nAdv=1
-    integer(kind=intType) :: offset, i, j, k, ii, jj
+                                dwtk = dwt
 
-    ! Determine whether or not a second order discretization for the
-    ! advective terms must be used.
-    secondOrd = .false.
-    if(groundLevel == 1_intType .and. &
-         orderTurb == secondOrder) secondOrd = .true.
+                                if (dwt * dwtp1 > zero) then
+                                    if (abs(dwt) < abs(dwtp1)) then
+                                        dwtk = dwtk - half * dwt
+                                    else
+                                        dwtk = dwtk - half * dwtp1
+                                    end if
+                                end if
 
-    offset=itu1-1
-    do k=2, kl
-       do j=2, jl
-          do i=2, il
+                                if (dwt * dwtm1 > zero) then
+                                    if (abs(dwt) < abs(dwtm1)) then
+                                        dwtk = dwtk + half * dwt
+                                    else
+                                        dwtk = dwtk + half * dwtm1
+                                    end if
+                                end if
 
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of k and k-1,
+                            else
 
-             voli = half/vol(i,j,k)
-             qs = (sFaceK(i,j,k) + sFaceK(i,j,k-1))*voli
+                                ! 1st order upwind scheme.
 
-             ! Compute the normal velocity, where the normal direction
-             ! is taken as the average of faces k and k-1.
+                                dwtk = w(i, j, k + 1, jj) - w(i, j, k, jj)
 
-             xa = (sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya = (sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za = (sk(i,j,k,3) + sk(i,j,k-1,3))*voli
+                            end if
 
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
+                            ! Update the residual. The convective term must be
+                            ! substracted, because it appears on the other side
+                            ! of the equation as the source and viscous terms.
 
-             ! Determine the situation we are having here, i.e. positive
-             ! or negative normal velocity.
+                            dw(i, j, k, itu1 + ii - 1) = dw(i, j, k, itu1 + ii - 1) - uu * dwtk
+                        end do
+                    end if velKdir
+                end do
+            end do
+        end do
+
+        !
+        !       Upwind discretization of the convective term in j (eta)
+        !       direction. Either the 1st order upwind or the second order
+        !       fully upwind interpolation scheme, kappa = -1, is used in
+        !       combination with the minmod limiter.
+        !       The possible grid velocity must be taken into account.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of j and j-1,
+
+                    voli = half / vol(i, j, k)
+                    qs = (sFaceJ(i, j, k) + sFaceJ(i, j - 1, k)) * voli
+
+                    ! Compute the normal velocity, where the normal direction
+                    ! is taken as the average of faces j and j-1.
 
-             velKdir: if(uu > zero) then
+                    xa = (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
 
-                ! Velocity has a component in positive k-direction.
-                ! Loop over the number of advection equations.
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
 
-                do ii=1,nAdv
+                    ! Determine the situation we are having here, i.e. positive
+                    ! or negative normal velocity.
 
-                   ! Set the value of jj such that it corresponds to the
-                   ! turbulent entry in w.
+                    velJdir: if (uu > zero) then
 
-                   jj = ii + offset
+                        ! Velocity has a component in positive j-direction.
+                        ! Loop over the number of advection equations.
+                        do ii = 1, nAdv
 
-                   ! Check whether a first or a second order discretization
-                   ! must be used.
+                            ! Set the value of jj such that it corresponds to the
+                            ! turbulent entry in w.
 
-                   if( secondOrd ) then
+                            jj = ii + offset
 
-                      ! Second order; store the three differences for the
-                      ! discretization of the derivative in k-direction.
+                            ! Check whether a first or a second order discretization
+                            ! must be used.
 
-                      dwtm1 = w(i,j,k-1,jj) - w(i,j,k-2,jj)
-                      dwt   = w(i,j,k,  jj) - w(i,j,k-1,jj)
-                      dwtp1 = w(i,j,k+1,jj) - w(i,j,k,  jj)
+                            if (secondOrd) then
 
-                      ! Construct the derivative in this cell center. This
-                      ! is the first order upwind derivative with two
-                      ! nonlinear corrections.
+                                ! Second order; store the three differences for the
+                                ! discretization of the derivative in j-direction.
 
-                      dwtk = dwt
+                                dwtm1 = w(i, j - 1, k, jj) - w(i, j - 2, k, jj)
+                                dwt = w(i, j, k, jj) - w(i, j - 1, k, jj)
+                                dwtp1 = w(i, j + 1, k, jj) - w(i, j, k, jj)
 
-                      if(dwt*dwtp1 > zero) then
-                         if(abs(dwt) < abs(dwtp1)) then
-                            dwtk = dwtk + half*dwt
-                         else
-                            dwtk = dwtk + half*dwtp1
-                         endif
-                      endif
+                                ! Construct the derivative in this cell center. This is
+                                ! the first order upwind derivative with two nonlinear
+                                ! corrections.
 
-                      if(dwt*dwtm1 > zero) then
-                         if(abs(dwt) < abs(dwtm1)) then
-                            dwtk = dwtk - half*dwt
-                         else
-                            dwtk = dwtk - half*dwtm1
-                         endif
-                      endif
+                                dwtj = dwt
 
-                   else
+                                if (dwt * dwtp1 > zero) then
+                                    if (abs(dwt) < abs(dwtp1)) then
+                                        dwtj = dwtj + half * dwt
+                                    else
+                                        dwtj = dwtj + half * dwtp1
+                                    end if
+                                end if
 
-                      ! 1st order upwind scheme.
+                                if (dwt * dwtm1 > zero) then
+                                    if (abs(dwt) < abs(dwtm1)) then
+                                        dwtj = dwtj - half * dwt
+                                    else
+                                        dwtj = dwtj - half * dwtm1
+                                    end if
+                                end if
 
-                      dwtk = w(i,j,k,jj) - w(i,j,k-1,jj)
+                            else
 
-                   endif
+                                ! 1st order upwind scheme.
 
-                   ! Update the residual. The convective term must be
-                   ! substracted, because it appears on the other side of
-                   ! the equation as the source and viscous terms.
+                                dwtj = w(i, j, k, jj) - w(i, j - 1, k, jj)
 
-                   dw(i,j,k,itu1+ii-1) = dw(i,j,k,itu1+ii-1) - uu*dwtk
-                enddo
+                            end if
 
-             else velKdir
+                            ! Update the residual. The convective term must be
+                            ! substracted, because it appears on the other side of
+                            ! the equation as the source and viscous terms.
 
-                ! Velocity has a component in negative k-direction.
-                ! Loop over the number of advection equations
-                do ii=1,nAdv
+                            dw(i, j, k, itu1 + ii - 1) = dw(i, j, k, itu1 + ii - 1) - uu * dwtj
+                        end do
+
+                        else velJdir
+
+                        ! Velocity has a component in negative j-direction.
+                        ! Loop over the number of advection equations.
+                        do ii = 1, nAdv
 
-                   ! Set the value of jj such that it corresponds to the
-                   ! turbulent entry in w.
+                            ! Set the value of jj such that it corresponds to the
+                            ! turbulent entry in w.
 
-                   jj = ii + offset
+                            jj = ii + offset
 
-                   ! Check whether a first or a second order discretization
-                   ! must be used.
+                            ! Check whether a first or a second order discretization
+                            ! must be used.
 
-                   if( secondOrd ) then
+                            if (secondOrd) then
 
-                      ! Store the three differences for the discretization of
-                      ! the derivative in k-direction.
+                                ! Store the three differences for the discretization of
+                                ! the derivative in j-direction.
 
-                      dwtm1 = w(i,j,k,  jj) - w(i,j,k-1,jj)
-                      dwt   = w(i,j,k+1,jj) - w(i,j,k,  jj)
-                      dwtp1 = w(i,j,k+2,jj) - w(i,j,k+1,jj)
+                                dwtm1 = w(i, j, k, jj) - w(i, j - 1, k, jj)
+                                dwt = w(i, j + 1, k, jj) - w(i, j, k, jj)
+                                dwtp1 = w(i, j + 2, k, jj) - w(i, j + 1, k, jj)
 
-                      ! Construct the derivative in this cell center. This is
-                      ! the first order upwind derivative with two nonlinear
-                      ! corrections.
+                                ! Construct the derivative in this cell center. This is
+                                ! the first order upwind derivative with two nonlinear
+                                ! corrections.
 
-                      dwtk = dwt
+                                dwtj = dwt
 
-                      if(dwt*dwtp1 > zero) then
-                         if(abs(dwt) < abs(dwtp1)) then
-                            dwtk = dwtk - half*dwt
-                         else
-                            dwtk = dwtk - half*dwtp1
-                         endif
-                      endif
+                                if (dwt * dwtp1 > zero) then
+                                    if (abs(dwt) < abs(dwtp1)) then
+                                        dwtj = dwtj - half * dwt
+                                    else
+                                        dwtj = dwtj - half * dwtp1
+                                    end if
+                                end if
 
-                      if(dwt*dwtm1 > zero) then
-                         if(abs(dwt) < abs(dwtm1)) then
-                            dwtk = dwtk + half*dwt
-                         else
-                            dwtk = dwtk + half*dwtm1
-                         endif
-                      endif
+                                if (dwt * dwtm1 > zero) then
+                                    if (abs(dwt) < abs(dwtm1)) then
+                                        dwtj = dwtj + half * dwt
+                                    else
+                                        dwtj = dwtj + half * dwtm1
+                                    end if
+                                end if
 
-                   else
+                            else
 
-                      ! 1st order upwind scheme.
+                                ! 1st order upwind scheme.
 
-                      dwtk = w(i,j,k+1,jj) - w(i,j,k,jj)
+                                dwtj = w(i, j + 1, k, jj) - w(i, j, k, jj)
 
-                   endif
+                            end if
 
-                   ! Update the residual. The convective term must be
-                   ! substracted, because it appears on the other side
-                   ! of the equation as the source and viscous terms.
+                            ! Update the residual. The convective term must be
+                            ! substracted, because it appears on the other side
+                            ! of the equation as the source and viscous terms.
 
-                   dw(i,j,k,itu1+ii-1) = dw(i,j,k,itu1+ii-1) - uu*dwtk
+                            dw(i, j, k, itu1 + ii - 1) = dw(i, j, k, itu1 + ii - 1) - uu * dwtj
+                        end do
+                    end if velJdir
                 end do
-             endif velKdir
-          enddo
-       enddo
-    enddo
+            end do
+        end do
+        !
+        !       Upwind discretization of the convective term in i (xi)
+        !       direction. Either the 1st order upwind or the second order
+        !       fully upwind interpolation scheme, kappa = -1, is used in
+        !       combination with the minmod limiter.
+        !       The possible grid velocity must be taken into account.
+        !
+        qs = zero
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of i and i-1,
 
-    !
-    !       Upwind discretization of the convective term in j (eta)
-    !       direction. Either the 1st order upwind or the second order
-    !       fully upwind interpolation scheme, kappa = -1, is used in
-    !       combination with the minmod limiter.
-    !       The possible grid velocity must be taken into account.
-    !
-    do k=2, kl
-       do j=2, jl
-          do i=2, il
+                    voli = half / vol(i, j, k)
+                    qs = (sFaceI(i, j, k) + sFaceI(i - 1, j, k)) * voli
 
+                    ! Compute the normal velocity, where the normal direction
+                    ! is taken as the average of faces i and i-1.
 
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of j and j-1,
+                    xa = (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
 
-             voli = half/vol(i,j,k)
-             qs = (sFaceJ(i,j,k) + sFaceJ(i,j-1,k))*voli
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
 
-             ! Compute the normal velocity, where the normal direction
-             ! is taken as the average of faces j and j-1.
+                    ! Determine the situation we are having here, i.e. positive
+                    ! or negative normal velocity.
 
-             xa = (sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya = (sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za = (sj(i,j,k,3) + sj(i,j-1,k,3))*voli
+                    velIdir: if (uu > zero) then
 
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
+                        ! Velocity has a component in positive i-direction.
+                        ! Loop over the number of advection equations.
+                        do ii = 1, nAdv
 
-             ! Determine the situation we are having here, i.e. positive
-             ! or negative normal velocity.
+                            ! Set the value of jj such that it corresponds to the
+                            ! turbulent entry in w.
 
-             velJdir: if(uu > zero) then
+                            jj = ii + offset
 
-                ! Velocity has a component in positive j-direction.
-                ! Loop over the number of advection equations.
-                do ii=1,nAdv
+                            ! Check whether a first or a second order discretization
+                            ! must be used.
 
-                   ! Set the value of jj such that it corresponds to the
-                   ! turbulent entry in w.
+                            if (secondOrd) then
 
-                   jj = ii + offset
+                                ! Second order; store the three differences for the
+                                ! discretization of the derivative in i-direction.
 
-                   ! Check whether a first or a second order discretization
-                   ! must be used.
+                                dwtm1 = w(i - 1, j, k, jj) - w(i - 2, j, k, jj)
+                                dwt = w(i, j, k, jj) - w(i - 1, j, k, jj)
+                                dwtp1 = w(i + 1, j, k, jj) - w(i, j, k, jj)
 
-                   if( secondOrd ) then
+                                ! Construct the derivative in this cell center. This is
+                                ! the first order upwind derivative with two nonlinear
+                                ! corrections.
 
-                      ! Second order; store the three differences for the
-                      ! discretization of the derivative in j-direction.
+                                dwti = dwt
 
-                      dwtm1 = w(i,j-1,k,jj) - w(i,j-2,k,jj)
-                      dwt   = w(i,j,  k,jj) - w(i,j-1,k,jj)
-                      dwtp1 = w(i,j+1,k,jj) - w(i,j,  k,jj)
+                                if (dwt * dwtp1 > zero) then
+                                    if (abs(dwt) < abs(dwtp1)) then
+                                        dwti = dwti + half * dwt
+                                    else
+                                        dwti = dwti + half * dwtp1
+                                    end if
+                                end if
 
-                      ! Construct the derivative in this cell center. This is
-                      ! the first order upwind derivative with two nonlinear
-                      ! corrections.
+                                if (dwt * dwtm1 > zero) then
+                                    if (abs(dwt) < abs(dwtm1)) then
+                                        dwti = dwti - half * dwt
+                                    else
+                                        dwti = dwti - half * dwtm1
+                                    end if
+                                end if
 
-                      dwtj = dwt
+                            else
 
-                      if(dwt*dwtp1 > zero) then
-                         if(abs(dwt) < abs(dwtp1)) then
-                            dwtj = dwtj + half*dwt
-                         else
-                            dwtj = dwtj + half*dwtp1
-                         endif
-                      endif
+                                ! 1st order upwind scheme.
 
-                      if(dwt*dwtm1 > zero) then
-                         if(abs(dwt) < abs(dwtm1)) then
-                            dwtj = dwtj - half*dwt
-                         else
-                            dwtj = dwtj - half*dwtm1
-                         endif
-                      endif
+                                dwti = w(i, j, k, jj) - w(i - 1, j, k, jj)
 
-                   else
+                            end if
 
-                      ! 1st order upwind scheme.
+                            ! Update the residual. The convective term must be
+                            ! substracted, because it appears on the other side of
+                            ! the equation as the source and viscous terms.
 
-                      dwtj = w(i,j,k,jj) - w(i,j-1,k,jj)
+                            dw(i, j, k, itu1 + ii - 1) = dw(i, j, k, itu1 + ii - 1) - uu * dwti
+                        end do
 
-                   endif
+                        else velIdir
 
-                   ! Update the residual. The convective term must be
-                   ! substracted, because it appears on the other side of
-                   ! the equation as the source and viscous terms.
+                        ! Velocity has a component in negative i-direction.
+                        ! Loop over the number of advection equations.
+                        do ii = 1, nAdv
 
-                   dw(i,j,k,itu1+ii-1) = dw(i,j,k,itu1+ii-1) - uu*dwtj
-                enddo
+                            ! Set the value of jj such that it corresponds to the
+                            ! turbulent entry in w.
 
-             else velJdir
+                            jj = ii + offset
 
-                ! Velocity has a component in negative j-direction.
-                ! Loop over the number of advection equations.
-                do ii=1,nAdv
+                            ! Check whether a first or a second order discretization
+                            ! must be used.
 
-                   ! Set the value of jj such that it corresponds to the
-                   ! turbulent entry in w.
+                            if (secondOrd) then
 
-                   jj = ii + offset
+                                ! Second order; store the three differences for the
+                                ! discretization of the derivative in i-direction.
 
-                   ! Check whether a first or a second order discretization
-                   ! must be used.
+                                dwtm1 = w(i, j, k, jj) - w(i - 1, j, k, jj)
+                                dwt = w(i + 1, j, k, jj) - w(i, j, k, jj)
+                                dwtp1 = w(i + 2, j, k, jj) - w(i + 1, j, k, jj)
 
-                   if( secondOrd ) then
+                                ! Construct the derivative in this cell center. This is
+                                ! the first order upwind derivative with two nonlinear
+                                ! corrections.
 
-                      ! Store the three differences for the discretization of
-                      ! the derivative in j-direction.
+                                dwti = dwt
 
-                      dwtm1 = w(i,j,  k,jj) - w(i,j-1,k,jj)
-                      dwt   = w(i,j+1,k,jj) - w(i,j,  k,jj)
-                      dwtp1 = w(i,j+2,k,jj) - w(i,j+1,k,jj)
+                                if (dwt * dwtp1 > zero) then
+                                    if (abs(dwt) < abs(dwtp1)) then
+                                        dwti = dwti - half * dwt
+                                    else
+                                        dwti = dwti - half * dwtp1
+                                    end if
+                                end if
 
-                      ! Construct the derivative in this cell center. This is
-                      ! the first order upwind derivative with two nonlinear
-                      ! corrections.
+                                if (dwt * dwtm1 > zero) then
+                                    if (abs(dwt) < abs(dwtm1)) then
+                                        dwti = dwti + half * dwt
+                                    else
+                                        dwti = dwti + half * dwtm1
+                                    end if
+                                end if
 
-                      dwtj = dwt
+                            else
 
-                      if(dwt*dwtp1 > zero) then
-                         if(abs(dwt) < abs(dwtp1)) then
-                            dwtj = dwtj - half*dwt
-                         else
-                            dwtj = dwtj - half*dwtp1
-                         endif
-                      endif
+                                ! 1st order upwind scheme.
 
-                      if(dwt*dwtm1 > zero) then
-                         if(abs(dwt) < abs(dwtm1)) then
-                            dwtj = dwtj + half*dwt
-                         else
-                            dwtj = dwtj + half*dwtm1
-                         endif
-                      endif
+                                dwti = w(i + 1, j, k, jj) - w(i, j, k, jj)
 
-                   else
+                            end if
 
-                      ! 1st order upwind scheme.
+                            ! Update the residual. The convective term must be
+                            ! substracted, because it appears on the other side
+                            ! of the equation as the source and viscous terms.
 
-                      dwtj = w(i,j+1,k,jj) - w(i,j,k,jj)
+                            dw(i, j, k, itu1 + ii - 1) = dw(i, j, k, itu1 + ii - 1) - uu * dwti
 
-                   endif
+                            ! Update the central jacobian. First the term which is
+                            ! always present, i.e. -uu.
+                        end do
 
-                   ! Update the residual. The convective term must be
-                   ! substracted, because it appears on the other side
-                   ! of the equation as the source and viscous terms.
+                    end if velIdir
+                end do
+            end do
+        end do
+    end subroutine saAdvection
+
+    subroutine saResScale
+
+        !
+        !  Multiply the residual by the volume and store this in dw; this
+        ! * is done for monitoring reasons only. The multiplication with the
+        ! * volume is present to be consistent with the flow residuals; also
+        !  the negative value is taken, again to be consistent with the
+        ! * flow equations. Also multiply by iblank so that no updates occur
+        !  in holes or the overset boundary.
+        use constants
+        implicit none
+
+        ! Local variables
+        integer(kind=intType) :: i, j, k, ii
+        real(kind=realType) :: rblank
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    rblank = max(real(iblank(i, j, k), realType), zero)
+                    dw(i, j, k, itu1) = -volRef(i, j, k) * dw(i, j, k, itu1) * rblank
+                end do
+            end do
+        end do
 
-                   dw(i,j,k,itu1+ii-1) = dw(i,j,k,itu1+ii-1) - uu*dwtj
-                enddo
-             endif velJdir
-          enddo
-       enddo
-    enddo
-    !
-    !       Upwind discretization of the convective term in i (xi)
-    !       direction. Either the 1st order upwind or the second order
-    !       fully upwind interpolation scheme, kappa = -1, is used in
-    !       combination with the minmod limiter.
-    !       The possible grid velocity must be taken into account.
-    !
-    qs = zero
-    do k=2, kl
-       do j=2, jl
-          do i=2, il
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of i and i-1,
+    end subroutine saResScale
 
-             voli = half/vol(i,j,k)
-             qs = (sFaceI(i,j,k) + sFaceI(i-1,j,k))*voli
+    subroutine timeStep(updateDtl)
+        ! ---------------------------------------------
+        !               Spectral Radius
+        ! ---------------------------------------------
 
-             ! Compute the normal velocity, where the normal direction
-             ! is taken as the average of faces i and i-1.
+        use constants
+        use blockPointers, only: sectionID
+        use flowvarRefState, only: pInfCorr, rhoInf, gammaInf, viscous, timeRef
+        use inputPhysics, only: equationMode
+        use inputDiscretization, only: adis
+        use section, only: sections
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
 
-             xa = (si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya = (si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za = (si(i,j,k,3) + si(i-1,j,k,3))*voli
+        implicit none
 
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
+        ! Input
+        logical, intent(in), optional :: updateDtl
 
-             ! Determine the situation we are having here, i.e. positive
-             ! or negative normal velocity.
+        ! Local parameters.
+        real(kind=realType), parameter :: b = 2.0_realType
 
-             velIdir: if(uu > zero) then
+        ! Variables for spectral Radius
+        real(kind=realType) :: plim, rlim, clim2
+        real(kind=realType) :: cc2, qsi, qsj, qsk, sx, sy, sz, rmu
+        real(kind=realType) :: ri, rj, rk, rij, rjk, rki
+        real(kind=realType) :: vsi, vsj, vsk, rfl, dpi, dpj, dpk
+        real(kind=realType) :: sFace, tmp, uux, uuy, uuz
+        logical :: doScaling, updateDt
+        integer(kind=intType) :: i, j, k
 
-                ! Velocity has a component in positive i-direction.
-                ! Loop over the number of advection equations.
-                do ii=1,nAdv
+        updateDt = .False.
+        if (present(updateDtl)) then
+            updateDt = .True.
+        end if
 
-                   ! Set the value of jj such that it corresponds to the
-                   ! turbulent entry in w.
+        ! Set the value of plim. To be fully consistent this must have
+        ! the dimension of a pressure. Therefore a fraction of pInfCorr
+        ! is used. Idem for rlim; compute clim2 as well.
 
-                   jj = ii + offset
+        plim = 0.001_realType * pInfCorr
+        rlim = 0.001_realType * rhoInf
+        clim2 = 0.000001_realType * gammaInf * pInfCorr / rhoInf
+        doScaling = .True.
 
-                   ! Check whether a first or a second order discretization
-                   ! must be used.
+        ! Initialize sFace to zero. This value will be used if the
+        ! block is not moving.
 
-                   if( secondOrd ) then
+        sFace = zero
+        !
+        !           Inviscid contribution, depending on the preconditioner.
+        !           Compute the cell centered values of the spectral radii.
+        !
+        ! Note:  DON'T change the ranges for i. It will mess up dtl.
+        ! we don't copy the spectral-radii and dtl to keep the
+        ! code simple, therefore this loop needs the full single
+        ! halo range.
+        do k = 1, ke
+            do j = 1, je
+                do i = 1, ie
 
-                      ! Second order; store the three differences for the
-                      ! discretization of the derivative in i-direction.
+                    ! Compute the velocities and speed of sound squared.
 
-                      dwtm1 = w(i-1,j,k,jj) - w(i-2,j,k,jj)
-                      dwt   = w(i,  j,k,jj) - w(i-1,j,k,jj)
-                      dwtp1 = w(i+1,j,k,jj) - w(i,  j,k,jj)
+                    uux = w(i, j, k, ivx)
+                    uuy = w(i, j, k, ivy)
+                    uuz = w(i, j, k, ivz)
+                    cc2 = gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho)
+                    cc2 = max(cc2, clim2)
 
-                      ! Construct the derivative in this cell center. This is
-                      ! the first order upwind derivative with two nonlinear
-                      ! corrections.
+                    ! Set the dot product of the grid velocity and the
+                    ! normal in i-direction for a moving face. To avoid
+                    ! a number of multiplications by 0.5 simply the sum
+                    ! is taken.
 
-                      dwti = dwt
+                    sFace = sFaceI(i - 1, j, k) + sFaceI(i, j, k)
 
-                      if(dwt*dwtp1 > zero) then
-                         if(abs(dwt) < abs(dwtp1)) then
-                            dwti = dwti + half*dwt
-                         else
-                            dwti = dwti + half*dwtp1
-                         endif
-                      endif
+                    ! Spectral radius in i-direction.
 
-                      if(dwt*dwtm1 > zero) then
-                         if(abs(dwt) < abs(dwtm1)) then
-                            dwti = dwti - half*dwt
-                         else
-                            dwti = dwti - half*dwtm1
-                         endif
-                      endif
+                    sx = si(i - 1, j, k, 1) + si(i, j, k, 1)
+                    sy = si(i - 1, j, k, 2) + si(i, j, k, 2)
+                    sz = si(i - 1, j, k, 3) + si(i, j, k, 3)
 
-                   else
+                    qsi = uux * sx + uuy * sy + uuz * sz - sFace
 
-                      ! 1st order upwind scheme.
+                    ri = half * (abs(qsi) &
+                                 + sqrt(cc2 * (sx**2 + sy**2 + sz**2)))
 
-                      dwti = w(i,j,k,jj) - w(i-1,j,k,jj)
+                    ! The grid velocity in j-direction.
+                    sFace = sFaceJ(i, j - 1, k) + sFaceJ(i, j, k)
 
-                   endif
+                    ! Spectral radius in j-direction.
 
-                   ! Update the residual. The convective term must be
-                   ! substracted, because it appears on the other side of
-                   ! the equation as the source and viscous terms.
+                    sx = sj(i, j - 1, k, 1) + sj(i, j, k, 1)
+                    sy = sj(i, j - 1, k, 2) + sj(i, j, k, 2)
+                    sz = sj(i, j - 1, k, 3) + sj(i, j, k, 3)
 
-                   dw(i,j,k,itu1+ii-1) = dw(i,j,k,itu1+ii-1) - uu*dwti
-                enddo
+                    qsj = uux * sx + uuy * sy + uuz * sz - sFace
 
-             else velIdir
+                    rj = half * (abs(qsj) &
+                                 + sqrt(cc2 * (sx**2 + sy**2 + sz**2)))
 
-                ! Velocity has a component in negative i-direction.
-                ! Loop over the number of advection equations.
-                do ii=1,nAdv
+                    ! The grid velocity in k-direction.
+                    sFace = sFaceK(i, j, k - 1) + sFaceK(i, j, k)
 
-                   ! Set the value of jj such that it corresponds to the
-                   ! turbulent entry in w.
+                    ! Spectral radius in k-direction.
 
-                   jj = ii + offset
+                    sx = sk(i, j, k - 1, 1) + sk(i, j, k, 1)
+                    sy = sk(i, j, k - 1, 2) + sk(i, j, k, 2)
+                    sz = sk(i, j, k - 1, 3) + sk(i, j, k, 3)
 
-                   ! Check whether a first or a second order discretization
-                   ! must be used.
+                    qsk = uux * sx + uuy * sy + uuz * sz - sFace
 
-                   if( secondOrd ) then
+                    rk = half * (abs(qsk) &
+                                 + sqrt(cc2 * (sx**2 + sy**2 + sz**2)))
 
-                      ! Second order; store the three differences for the
-                      ! discretization of the derivative in i-direction.
+                    ! Store in tdl if required
+                    if (updateDt) then
+                        dtl(i, j, k) = ri + rj + rk
+                    end if
 
-                      dwtm1 = w(i,  j,k,jj) - w(i-1,j,k,jj)
-                      dwt   = w(i+1,j,k,jj) - w(i,  j,k,jj)
-                      dwtp1 = w(i+2,j,k,jj) - w(i+1,j,k,jj)
+                    ! Avoid division by zero by clipping radi, radJ and
+                    ! radK.
 
-                      ! Construct the derivative in this cell center. This is
-                      ! the first order upwind derivative with two nonlinear
-                      ! corrections.
+                    ri = max(ri, eps)
+                    rj = max(rj, eps)
+                    rk = max(rk, eps)
 
-                      dwti = dwt
+                    ! Compute the scaling in the three coordinate
+                    ! directions.
 
-                      if(dwt*dwtp1 > zero) then
-                         if(abs(dwt) < abs(dwtp1)) then
-                            dwti = dwti - half*dwt
-                         else
-                            dwti = dwti - half*dwtp1
-                         endif
-                      endif
+                    rij = (ri / rj)**adis
+                    rjk = (rj / rk)**adis
+                    rki = (rk / ri)**adis
 
-                      if(dwt*dwtm1 > zero) then
-                         if(abs(dwt) < abs(dwtm1)) then
-                            dwti = dwti + half*dwt
-                         else
-                            dwti = dwti + half*dwtm1
-                         endif
-                      endif
+                    ! Create the scaled versions of the aspect ratios.
+                    ! Note that the multiplication is done with radi, radJ
+                    ! and radK, such that the influence of the clipping
+                    ! is negligible.
 
-                   else
+                    radi(i, j, k) = ri * (one + one / rij + rki)
+                    radJ(i, j, k) = rj * (one + one / rjk + rij)
+                    radK(i, j, k) = rk * (one + one / rki + rjk)
+                end do
+            end do
+        end do
 
-                      ! 1st order upwind scheme.
+        ! The rest is only necessary if the timeStep needs to be computed
+        if (updateDt) then
 
-                      dwti = w(i+1,j,k,jj) - w(i,j,k,jj)
+            viscousTerm: if (viscous) then
 
-                   endif
+                ! Loop over the owned cell centers.
 
-                   ! Update the residual. The convective term must be
-                   ! substracted, because it appears on the other side
-                   ! of the equation as the source and viscous terms.
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
 
-                   dw(i,j,k,itu1+ii-1) = dw(i,j,k,itu1+ii-1) - uu*dwti
+                            ! Compute the effective viscosity coefficient. The
+                            ! factor 0.5 is a combination of two things. In the
+                            ! standard central discretization of a second
+                            ! derivative there is a factor 2 multiplying the
+                            ! central node. However in the code below not the
+                            ! average but the sum of the left and the right face
+                            ! is taken and squared. This leads to a factor 4.
+                            ! Combining both effects leads to 0.5. Furthermore,
+                            ! it is divided by the volume and density to obtain
+                            ! the correct dimensions and multiplied by the
+                            ! non-dimensional factor factVis.
 
-                   ! Update the central jacobian. First the term which is
-                   ! always present, i.e. -uu.
-                enddo
+                            rmu = rlv(i, j, k)
+                            rmu = rmu + rev(i, j, k)
+                            rmu = half * rmu / (w(i, j, k, irho) * vol(i, j, k))
 
-             endif velIdir
-          enddo
-       enddo
-    enddo
-  end subroutine saAdvection
+                            ! Add the viscous contribution in i-direction to the
+                            ! (inverse) of the time step.
 
-  subroutine saResScale
+                            sx = si(i, j, k, 1) + si(i - 1, j, k, 1)
+                            sy = si(i, j, k, 2) + si(i - 1, j, k, 2)
+                            sz = si(i, j, k, 3) + si(i - 1, j, k, 3)
 
-    !
-    !  Multiply the residual by the volume and store this in dw; this
-    ! * is done for monitoring reasons only. The multiplication with the
-    ! * volume is present to be consistent with the flow residuals; also
-    !  the negative value is taken, again to be consistent with the
-    ! * flow equations. Also multiply by iblank so that no updates occur
-    !  in holes or the overset boundary.
-    use constants
-    implicit none
+                            vsi = rmu * (sx * sx + sy * sy + sz * sz)
+                            dtl(i, j, k) = dtl(i, j, k) + vsi
 
-    ! Local variables
-    integer(kind=intType) :: i,j,k,ii
-    real(kind=realType) :: rblank
+                            ! Add the viscous contribution in j-direction to the
+                            ! (inverse) of the time step.
 
-    do k=2, kl
-       do j=2, jl
-          do i=2, il
-             rblank = max(real(iblank(i,j,k), realType), zero)
-             dw(i,j,k,itu1) = -volRef(i,j,k)*dw(i,j,k,itu1)*rblank
-          enddo
-       enddo
-    enddo
+                            sx = sj(i, j, k, 1) + sj(i, j - 1, k, 1)
+                            sy = sj(i, j, k, 2) + sj(i, j - 1, k, 2)
+                            sz = sj(i, j, k, 3) + sj(i, j - 1, k, 3)
 
-  end subroutine saResScale
+                            vsj = rmu * (sx * sx + sy * sy + sz * sz)
+                            dtl(i, j, k) = dtl(i, j, k) + vsj
 
-  subroutine timeStep(updateDtl)
-    ! ---------------------------------------------
-    !               Spectral Radius
-    ! ---------------------------------------------
+                            ! Add the viscous contribution in k-direction to the
+                            ! (inverse) of the time step.
 
-    use constants
-    use blockPointers, only : sectionID
-    use flowvarRefState, only : pInfCorr, rhoInf, gammaInf, viscous, timeRef
-    use inputPhysics, only : equationMode
-    use inputDiscretization, only : adis
-    use section, only : sections
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
+                            sx = sk(i, j, k, 1) + sk(i, j, k - 1, 1)
+                            sy = sk(i, j, k, 2) + sk(i, j, k - 1, 2)
+                            sz = sk(i, j, k, 3) + sk(i, j, k - 1, 3)
 
-    implicit none
+                            vsk = rmu * (sx * sx + sy * sy + sz * sz)
+                            dtl(i, j, k) = dtl(i, j, k) + vsk
 
-    ! Input
-    logical, intent(in), optional :: updateDtl
+                        end do
+                    end do
+                end do
+            end if viscousTerm
 
-    ! Local parameters.
-    real(kind=realType), parameter :: b = 2.0_realType
+            ! For the spectral mode an additional term term must be
+            ! taken into account, which corresponds to the contribution
+            ! of the highest frequency.
 
-    ! Variables for spectral Radius
-    real(kind=realType) :: plim, rlim, clim2
-    real(kind=realType) :: cc2, qsi, qsj, qsk, sx, sy, sz, rmu
-    real(kind=realType) :: ri, rj, rk, rij, rjk, rki
-    real(kind=realType) :: vsi, vsj, vsk, rfl, dpi, dpj, dpk
-    real(kind=realType) :: sFace, tmp, uux, uuy, uuz
-    logical :: doScaling, updateDt
-    integer(kind=intType) :: i, j, k
+            if (equationMode == timeSpectral) then
 
-    updateDt = .False.
-    if (present(updateDtl)) then
-       updateDt = .True.
-    end if
+                tmp = nTimeIntervalsSpectral * pi * timeRef &
+                      / sections(sectionID)%timePeriod
 
-    ! Set the value of plim. To be fully consistent this must have
-    ! the dimension of a pressure. Therefore a fraction of pInfCorr
-    ! is used. Idem for rlim; compute clim2 as well.
+                ! Loop over the owned cell centers and add the term.
 
-    plim  = 0.001_realType*pInfCorr
-    rlim  = 0.001_realType*rhoInf
-    clim2 = 0.000001_realType*gammaInf*pInfCorr/rhoInf
-    doScaling = .True.
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            dtl(i, j, k) = dtl(i, j, k) + tmp * vol(i, j, k)
+                        end do
+                    end do
+                end do
 
-    ! Initialize sFace to zero. This value will be used if the
-    ! block is not moving.
+            end if
+
+            ! Currently the inverse of dt/vol is stored in dtl. Invert
+            ! this value such that the time step per unit cfl number is
+            ! stored and correct in cases of high gradients.
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        dpi = abs(p(i + 1, j, k) - two * p(i, j, k) + p(i - 1, j, k)) &
+                              / (p(i + 1, j, k) + two * p(i, j, k) + p(i - 1, j, k) + plim)
+                        dpj = abs(p(i, j + 1, k) - two * p(i, j, k) + p(i, j - 1, k)) &
+                              / (p(i, j + 1, k) + two * p(i, j, k) + p(i, j - 1, k) + plim)
+                        dpk = abs(p(i, j, k + 1) - two * p(i, j, k) + p(i, j, k - 1)) &
+                              / (p(i, j, k + 1) + two * p(i, j, k) + p(i, j, k - 1) + plim)
+                        rfl = one / (one + b * (dpi + dpj + dpk))
+
+                        dtl(i, j, k) = rfl / dtl(i, j, k)
+                    end do
+                end do
+            end do
+        end if
+
+    end subroutine timeStep
+
+    subroutine inviscidCentralFlux
+
+        ! ---------------------------------------------
+        !               Inviscid central flux
+        ! ---------------------------------------------
+        use constants
+        use blockPointers, only: blockIsMoving, nBkGlobal
+        use flowVarRefState, only: timeRef
+        use cgnsGrid, only: cgnsDoms
+        use inputPhysics, only: equationMode
+        implicit none
+
+        ! Variables for inviscid central flux
+        real(kind=realType) :: qsp, qsm, rqsp, rqsm, porVel, porFlux
+        real(kind=realType) :: pa, vnp, vnm, fs, sFace
+        integer(kind=intType) :: i, j, k
+        real(kind=realType) :: wwx, wwy, wwz, rvol
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 1, il
+
+                    ! Set the dot product of the grid velocity and the
+                    ! normal in i-direction for a moving face.
+
+                    sFace = sFaceI(i, j, k)
+
+                    ! Compute the normal velocities of the left and right state.
+
+                    vnp = w(i + 1, j, k, ivx) * sI(i, j, k, 1) &
+                          + w(i + 1, j, k, ivy) * sI(i, j, k, 2) &
+                          + w(i + 1, j, k, ivz) * sI(i, j, k, 3)
+                    vnm = w(i, j, k, ivx) * sI(i, j, k, 1) &
+                          + w(i, j, k, ivy) * sI(i, j, k, 2) &
+                          + w(i, j, k, ivz) * sI(i, j, k, 3)
+                    ! Set the values of the porosities for this face.
+                    ! porVel defines the porosity w.r.t. velocity;
+                    ! porFlux defines the porosity w.r.t. the entire flux.
+                    ! The latter is only zero for a discontinuous block
+                    ! boundary that must be treated conservatively.
+                    ! The default value of porFlux is 0.5, such that the
+                    ! correct central flux is scattered to both cells.
+                    ! In case of a boundFlux the normal velocity is set
+                    ! to sFace.
+
+                    porVel = one
+                    porFlux = half
+                    if (porI(i, j, k) == noFlux) porFlux = zero
+                    if (porI(i, j, k) == boundFlux) then
+                        porVel = zero
+                        vnp = sFace
+                        vnm = sFace
+                    end if
+
+                    ! Incorporate porFlux in porVel.
+
+                    porVel = porVel * porFlux
+
+                    ! Compute the normal velocities relative to the grid for
+                    ! the face as well as the mass fluxes.
+
+                    qsp = (vnp - sFace) * porVel
+                    qsm = (vnm - sFace) * porVel
+
+                    rqsp = qsp * w(i + 1, j, k, irho)
+                    rqsm = qsm * w(i, j, k, irho)
+
+                    ! Compute the sum of the pressure multiplied by porFlux.
+                    ! For the default value of porFlux, 0.5, this leads to
+                    ! the average pressure.
+
+                    pa = porFlux * (p(i + 1, j, k) + p(i, j, k))
+
+                    ! Compute the fluxes and scatter them to the cells
+                    ! i,j,k and i+1,j,k. Store the density flux in the
+                    ! mass flow of the appropriate sliding mesh interface.
+
+                    fs = rqsp + rqsm
+                    dw(i + 1, j, k, irho) = dw(i + 1, j, k, irho) - fs
+                    dw(i, j, k, irho) = dw(i, j, k, irho) + fs
+
+                    fs = rqsp * w(i + 1, j, k, ivx) + rqsm * w(i, j, k, ivx) &
+                         + pa * sI(i, j, k, 1)
+                    dw(i + 1, j, k, imx) = dw(i + 1, j, k, imx) - fs
+                    dw(i, j, k, imx) = dw(i, j, k, imx) + fs
+
+                    fs = rqsp * w(i + 1, j, k, ivy) + rqsm * w(i, j, k, ivy) &
+                         + pa * sI(i, j, k, 2)
+                    dw(i + 1, j, k, imy) = dw(i + 1, j, k, imy) - fs
+                    dw(i, j, k, imy) = dw(i, j, k, imy) + fs
+
+                    fs = rqsp * w(i + 1, j, k, ivz) + rqsm * w(i, j, k, ivz) &
+                         + pa * sI(i, j, k, 3)
+                    dw(i + 1, j, k, imz) = dw(i + 1, j, k, imz) - fs
+                    dw(i, j, k, imz) = dw(i, j, k, imz) + fs
+
+                    fs = qsp * w(i + 1, j, k, irhoE) + qsm * w(i, j, k, irhoE) &
+                         + porFlux * (vnp * p(i + 1, j, k) + vnm * p(i, j, k))
+                    dw(i + 1, j, k, irhoE) = dw(i + 1, j, k, irhoE) - fs
+                    dw(i, j, k, irhoE) = dw(i, j, k, irhoE) + fs
+                end do
+            end do
+        end do
+
+        do k = 2, kl
+            do j = 1, jl
+                do i = 2, il
+
+                    ! Set the dot product of the grid velocity and the
+                    ! normal in j-direction for a moving face.
+
+                    sFace = sFaceJ(i, j, k)
+
+                    ! Compute the normal velocities of the left and right state.
+
+                    vnp = w(i, j + 1, k, ivx) * sJ(i, j, k, 1) &
+                          + w(i, j + 1, k, ivy) * sJ(i, j, k, 2) &
+                          + w(i, j + 1, k, ivz) * sJ(i, j, k, 3)
+                    vnm = w(i, j, k, ivx) * sJ(i, j, k, 1) &
+                          + w(i, j, k, ivy) * sJ(i, j, k, 2) &
+                          + w(i, j, k, ivz) * sJ(i, j, k, 3)
+
+                    ! Set the values of the porosities for this face.
+                    ! porVel defines the porosity w.r.t. velocity;
+                    ! porFlux defines the porosity w.r.t. the entire flux.
+                    ! The latter is only zero for a discontinuous block
+                    ! boundary that must be treated conservatively.
+                    ! The default value of porFlux is 0.5, such that the
+                    ! correct central flux is scattered to both cells.
+                    ! In case of a boundFlux the normal velocity is set
+                    ! to sFace.
+
+                    porVel = one
+                    porFlux = half
+                    if (porJ(i, j, k) == noFlux) porFlux = zero
+                    if (porJ(i, j, k) == boundFlux) then
+                        porVel = zero
+                        vnp = sFace
+                        vnm = sFace
+                    end if
+
+                    ! Incorporate porFlux in porVel.
+
+                    porVel = porVel * porFlux
+
+                    ! Compute the normal velocities for the face as well as the
+                    ! mass fluxes.
+
+                    qsp = (vnp - sFace) * porVel
+                    qsm = (vnm - sFace) * porVel
+
+                    rqsp = qsp * w(i, j + 1, k, irho)
+                    rqsm = qsm * w(i, j, k, irho)
+
+                    ! Compute the sum of the pressure multiplied by porFlux.
+                    ! For the default value of porFlux, 0.5, this leads to
+                    ! the average pressure.
+
+                    pa = porFlux * (p(i, j + 1, k) + p(i, j, k))
+
+                    ! Compute the fluxes and scatter them to the cells
+                    ! i,j,k and i,j+1,k. Store the density flux in the
+                    ! mass flow of the appropriate sliding mesh interface.
+
+                    fs = rqsp + rqsm
+                    dw(i, j + 1, k, irho) = dw(i, j + 1, k, irho) - fs
+                    dw(i, j, k, irho) = dw(i, j, k, irho) + fs
+
+                    fs = rqsp * w(i, j + 1, k, ivx) + rqsm * w(i, j, k, ivx) &
+                         + pa * sJ(i, j, k, 1)
+                    dw(i, j + 1, k, imx) = dw(i, j + 1, k, imx) - fs
+                    dw(i, j, k, imx) = dw(i, j, k, imx) + fs
+
+                    fs = rqsp * w(i, j + 1, k, ivy) + rqsm * w(i, j, k, ivy) &
+                         + pa * sJ(i, j, k, 2)
+                    dw(i, j + 1, k, imy) = dw(i, j + 1, k, imy) - fs
+                    dw(i, j, k, imy) = dw(i, j, k, imy) + fs
+
+                    fs = rqsp * w(i, j + 1, k, ivz) + rqsm * w(i, j, k, ivz) &
+                         + pa * sJ(i, j, k, 3)
+                    dw(i, j + 1, k, imz) = dw(i, j + 1, k, imz) - fs
+                    dw(i, j, k, imz) = dw(i, j, k, imz) + fs
+
+                    fs = qsp * w(i, j + 1, k, irhoE) + qsm * w(i, j, k, irhoE) &
+                         + porFlux * (vnp * p(i, j + 1, k) + vnm * p(i, j, k))
+                    dw(i, j + 1, k, irhoE) = dw(i, j + 1, k, irhoE) - fs
+                    dw(i, j, k, irhoE) = dw(i, j, k, irhoE) + fs
+                end do
+            end do
+        end do
 
-    sFace = zero
-    !
-    !           Inviscid contribution, depending on the preconditioner.
-    !           Compute the cell centered values of the spectral radii.
-    !
-    ! Note:  DON'T change the ranges for i. It will mess up dtl.
-    ! we don't copy the spectral-radii and dtl to keep the
-    ! code simple, therefore this loop needs the full single
-    ! halo range.
-    do k=1,ke
-       do j=1,je
-          do i=1,ie
+        do k = 1, kl
+            do j = 2, jl
+                do i = 2, il
 
-             ! Compute the velocities and speed of sound squared.
+                    ! Set the dot product of the grid velocity and the
+                    ! normal in k-direction for a moving face.
 
-             uux  = w(i,j,k,ivx)
-             uuy  = w(i,j,k,ivy)
-             uuz  = w(i,j,k,ivz)
-             cc2 = gamma(i,j,k)*p(i,j,k)/w(i,j,k,irho)
-             cc2 = max(cc2,clim2)
+                    sFace = sFaceK(i, j, k)
 
-             ! Set the dot product of the grid velocity and the
-             ! normal in i-direction for a moving face. To avoid
-             ! a number of multiplications by 0.5 simply the sum
-             ! is taken.
+                    ! Compute the normal velocities of the left and right state.
 
-             sFace = sFaceI(i-1,j,k) + sFaceI(i,j,k)
+                    vnp = w(i, j, k + 1, ivx) * sK(i, j, k, 1) &
+                          + w(i, j, k + 1, ivy) * sK(i, j, k, 2) &
+                          + w(i, j, k + 1, ivz) * sK(i, j, k, 3)
+                    vnm = w(i, j, k, ivx) * sK(i, j, k, 1) &
+                          + w(i, j, k, ivy) * sK(i, j, k, 2) &
+                          + w(i, j, k, ivz) * sK(i, j, k, 3)
 
-             ! Spectral radius in i-direction.
+                    ! Set the values of the porosities for this face.
+                    ! porVel defines the porosity w.r.t. velocity;
+                    ! porFlux defines the porosity w.r.t. the entire flux.
+                    ! The latter is only zero for a discontinuous block
+                    ! block boundary that must be treated conservatively.
+                    ! The default value of porFlux is 0.5, such that the
+                    ! correct central flux is scattered to both cells.
+                    ! In case of a boundFlux the normal velocity is set
+                    ! to sFace.
 
-             sx = si(i-1,j,k,1) + si(i,j,k,1)
-             sy = si(i-1,j,k,2) + si(i,j,k,2)
-             sz = si(i-1,j,k,3) + si(i,j,k,3)
+                    porVel = one
+                    porFlux = half
 
-             qsi = uux*sx + uuy*sy + uuz*sz - sFace
+                    if (porK(i, j, k) == noFlux) porFlux = zero
+                    if (porK(i, j, k) == boundFlux) then
+                        porVel = zero
+                        vnp = sFace
+                        vnm = sFace
+                    end if
 
-             ri = half*(abs(qsi) &
-                  +       sqrt(cc2*(sx**2 + sy**2 + sz**2)))
+                    ! Incorporate porFlux in porVel.
 
-             ! The grid velocity in j-direction.
-             sFace = sFaceJ(i,j-1,k) + sFaceJ(i,j,k)
+                    porVel = porVel * porFlux
 
-             ! Spectral radius in j-direction.
+                    ! Compute the normal velocities for the face as well as the
+                    ! mass fluxes.
 
-             sx = sj(i,j-1,k,1) + sj(i,j,k,1)
-             sy = sj(i,j-1,k,2) + sj(i,j,k,2)
-             sz = sj(i,j-1,k,3) + sj(i,j,k,3)
+                    qsp = (vnp - sFace) * porVel
+                    qsm = (vnm - sFace) * porVel
 
-             qsj = uux*sx + uuy*sy + uuz*sz - sFace
+                    rqsp = qsp * w(i, j, k + 1, irho)
+                    rqsm = qsm * w(i, j, k, irho)
 
-             rj = half*(abs(qsj) &
-                  +       sqrt(cc2*(sx**2 + sy**2 + sz**2)))
+                    ! Compute the sum of the pressure multiplied by porFlux.
+                    ! For the default value of porFlux, 0.5, this leads to
+                    ! the average pressure.
 
-             ! The grid velocity in k-direction.
-             sFace = sFaceK(i,j,k-1) + sFaceK(i,j,k)
+                    pa = porFlux * (p(i, j, k + 1) + p(i, j, k))
 
-             ! Spectral radius in k-direction.
+                    ! Compute the fluxes and scatter them to the cells
+                    ! i,j,k and i,j,k+1. Store the density flux in the
+                    ! mass flow of the appropriate sliding mesh interface.
 
-             sx = sk(i,j,k-1,1) + sk(i,j,k,1)
-             sy = sk(i,j,k-1,2) + sk(i,j,k,2)
-             sz = sk(i,j,k-1,3) + sk(i,j,k,3)
+                    fs = rqsp + rqsm
+                    dw(i, j, k + 1, irho) = dw(i, j, k + 1, irho) - fs
+                    dw(i, j, k, irho) = dw(i, j, k, irho) + fs
 
-             qsk = uux*sx + uuy*sy + uuz*sz - sFace
+                    fs = rqsp * w(i, j, k + 1, ivx) + rqsm * w(i, j, k, ivx) &
+                         + pa * sK(i, j, k, 1)
+                    dw(i, j, k + 1, imx) = dw(i, j, k + 1, imx) - fs
+                    dw(i, j, k, imx) = dw(i, j, k, imx) + fs
 
-             rk = half*(abs(qsk) &
-                  +       sqrt(cc2*(sx**2 + sy**2 + sz**2)))
+                    fs = rqsp * w(i, j, k + 1, ivy) + rqsm * w(i, j, k, ivy) &
+                         + pa * sK(i, j, k, 2)
+                    dw(i, j, k + 1, imy) = dw(i, j, k + 1, imy) - fs
+                    dw(i, j, k, imy) = dw(i, j, k, imy) + fs
 
-             ! Store in tdl if required
-             if (updateDt) then
-                dtl(i,j,k) = ri + rj + rk
-             end if
+                    fs = rqsp * w(i, j, k + 1, ivz) + rqsm * w(i, j, k, ivz) &
+                         + pa * sK(i, j, k, 3)
+                    dw(i, j, k + 1, imz) = dw(i, j, k + 1, imz) - fs
+                    dw(i, j, k, imz) = dw(i, j, k, imz) + fs
 
-             ! Avoid division by zero by clipping radi, radJ and
-             ! radK.
+                    fs = qsp * w(i, j, k + 1, irhoE) + qsm * w(i, j, k, irhoE) &
+                         + porFlux * (vnp * p(i, j, k + 1) + vnm * p(i, j, k))
+                    dw(i, j, k + 1, irhoE) = dw(i, j, k + 1, irhoE) - fs
+                    dw(i, j, k, irhoE) = dw(i, j, k, irhoE) + fs
 
-             ri = max(ri, eps)
-             rj = max(rj, eps)
-             rk = max(rk, eps)
+                end do
+            end do
+        end do
+
+        rotation: if (blockIsMoving .and. equationMode == steady) then
+
+            ! Compute the three nonDimensional angular velocities.
+
+            wwx = timeRef * cgnsDoms(nbkGlobal)%rotRate(1)
+            wwy = timeRef * cgnsDoms(nbkGlobal)%rotRate(2)
+            wwz = timeRef * cgnsDoms(nbkGlobal)%rotRate(3)
+
+            ! Loop over the internal cells of this block to compute the
+            ! rotational terms for the momentum equations.
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        rvol = w(i, j, k, irho) * vol(i, j, k)
+                        dw(i, j, k, imx) = dw(i, j, k, imx) &
+                                           + rvol * (wwy * w(i, j, k, ivz) - wwz * w(i, j, k, ivy))
+                        dw(i, j, k, imy) = dw(i, j, k, imy) &
+                                           + rvol * (wwz * w(i, j, k, ivx) - wwx * w(i, j, k, ivz))
+                        dw(i, j, k, imz) = dw(i, j, k, imz) &
+                                           + rvol * (wwx * w(i, j, k, ivy) - wwy * w(i, j, k, ivx))
+                    end do
+                end do
+            end do
+        end if rotation
+
+    end subroutine inviscidCentralFlux
+
+    subroutine inviscidDissFluxMatrix
+        !
+        !       inviscidDissFluxMatrix computes the matrix artificial
+        !       dissipation term. Instead of the spectral radius, as used in
+        !       the scalar dissipation scheme, the absolute value of the flux
+        !       jacobian is used. This leads to a less diffusive and
+        !       consequently more accurate scheme. It is assumed that the
+        !       pointers in blockPointers already point to the correct block.
+        !
+        use constants
+        use flowVarRefState, only: pInfCorr
+        use inputDiscretization, only: vis2, vis4
+        use inputPhysics, only: equations
+        use iteration, only: rFil
+        use utils, only: getCorrectForK, myDim
+        implicit none
+        !
+        !      Local parameters.
+        !
+        real(kind=realType), parameter :: dpMax = 0.25_realType
+        real(kind=realType), parameter :: epsAcoustic = 0.25_realType
+        real(kind=realType), parameter :: epsShear = 0.025_realType
+        real(kind=realType), parameter :: omega = 0.5_realType
+        real(kind=realType), parameter :: oneMinOmega = one - omega
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ind, ii
+
+        real(kind=realType) :: plim, sface
+        real(kind=realType) :: sfil, fis2, fis4
+        real(kind=realType) :: gammaAvg, gm1, ovgm1, gm53
+        real(kind=realType) :: ppor, rrad, dis2, dis4
+        real(kind=realType) :: dp1, dp2, tmp, fs
+        real(kind=realType) :: ddw1, ddw2, ddw3, ddw4, ddw5, ddw6
+        real(kind=realType) :: dr, dru, drv, drw, dre, drk, sx, sy, sz
+        real(kind=realType) :: uAvg, vAvg, wAvg, a2Avg, aAvg, hAvg
+        real(kind=realType) :: alphaAvg, unAvg, ovaAvg, ova2Avg
+        real(kind=realType) :: kAvg, lam1, lam2, lam3, area
+        real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
+        logical :: correctForK
+
+        ! Set the value of plim. To be fully consistent this must have
+        ! the dimension of a pressure. Therefore a fraction of pInfCorr
+        ! is used.
+
+        plim = 0.001_realType * pInfCorr
+
+        ! Determine whether or not the total energy must be corrected
+        ! for the presence of the turbulent kinetic energy.
+
+        correctForK = getCorrectForK()
+
+        ! Initialize sface to zero. This value will be used if the
+        ! block is not moving.
+
+        sface = zero
+
+        ! Set a couple of constants for the scheme.
+
+        fis2 = rFil * vis2
+        fis4 = rFil * vis4
+        sfil = one - rFil
+
+        ! Initialize the dissipative residual to a certain times,
+        ! possibly zero, the previously stored value.
+
+        fw = sfil * fw
+
+        ! Compute the pressure sensor for each cell, in each direction:
+        do k = 1, ke
+            do j = 1, je
+                do i = singleHaloStart, ie
+                    dss(i, j, k, 1) = abs((p(i + 1, j, k) - two * p(i, j, k) + p(i - 1, j, k)) &
+                                          / (omega * (p(i + 1, j, k) + two * p(i, j, k) + p(i - 1, j, k)) &
+                                             + oneMinOmega * (abs(p(i + 1, j, k) - p(i, j, k)) &
+                                                              + abs(p(i, j, k) - p(i - 1, j, k))) + plim))
+
+                    dss(i, j, k, 2) = abs((p(i, j + 1, k) - two * p(i, j, k) + p(i, j - 1, k)) &
+                                          / (omega * (p(i, j + 1, k) + two * p(i, j, k) + p(i, j - 1, k)) &
+                                             + oneMinOmega * (abs(p(i, j + 1, k) - p(i, j, k)) &
+                                                              + abs(p(i, j, k) - p(i, j - 1, k))) + plim))
+
+                    dss(i, j, k, 3) = abs((p(i, j, k + 1) - two * p(i, j, k) + p(i, j, k - 1)) &
+                                          / (omega * (p(i, j, k + 1) + two * p(i, j, k) + p(i, j, k - 1)) &
+                                             + oneMinOmega * (abs(p(i, j, k + 1) - p(i, j, k)) &
+                                                              + abs(p(i, j, k) - p(i, j, k - 1))) + plim))
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 1, il
+
+                    ! Compute the dissipation coefficients for this face.
+
+                    ppor = zero
+                    if (porI(i, j, k) == normalFlux) ppor = one
+                    dis2 = ppor * fis2 * min(dpMax, max(dss(i, j, k, 1), dss(i + 1, j, k, 1)))
+                    dis4 = dim(ppor * fis4, dis2)
+
+                    ! Construct the vector of the first and third differences
+                    ! multiplied by the appropriate constants.
+
+                    ddw1 = w(i + 1, j, k, irho) - w(i, j, k, irho)
+                    dr = dis2 * ddw1 &
+                         - dis4 * (w(i + 2, j, k, irho) - w(i - 1, j, k, irho) - three * ddw1)
+
+                    ddw2 = w(i + 1, j, k, irho) * w(i + 1, j, k, ivx) &
+                           - w(i, j, k, irho) * w(i, j, k, ivx)
+                    dru = dis2 * ddw2 &
+                          - dis4 * (w(i + 2, j, k, irho) * w(i + 2, j, k, ivx) &
+                                    - w(i - 1, j, k, irho) * w(i - 1, j, k, ivx) - three * ddw2)
+
+                    ddw3 = w(i + 1, j, k, irho) * w(i + 1, j, k, ivy) &
+                           - w(i, j, k, irho) * w(i, j, k, ivy)
+                    drv = dis2 * ddw3 &
+                          - dis4 * (w(i + 2, j, k, irho) * w(i + 2, j, k, ivy) &
+                                    - w(i - 1, j, k, irho) * w(i - 1, j, k, ivy) - three * ddw3)
+
+                    ddw4 = w(i + 1, j, k, irho) * w(i + 1, j, k, ivz) &
+                           - w(i, j, k, irho) * w(i, j, k, ivz)
+                    drw = dis2 * ddw4 &
+                          - dis4 * (w(i + 2, j, k, irho) * w(i + 2, j, k, ivz) &
+                                    - w(i - 1, j, k, irho) * w(i - 1, j, k, ivz) - three * ddw4)
+
+                    ddw5 = w(i + 1, j, k, irhoE) - w(i, j, k, irhoE)
+                    dre = dis2 * ddw5 &
+                          - dis4 * (w(i + 2, j, k, irhoE) - w(i - 1, j, k, irhoE) - three * ddw5)
+
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
+                    drk = zero
+                    kAvg = zero
+
+                    if (correctForK) then
+                        ddw6 = w(i + 1, j, k, irho) * w(i + 1, j, k, itu1) &
+                               - w(i, j, k, irho) * w(i, j, k, itu1)
+                        drk = dis2 * ddw6 &
+                              - dis4 * (w(i + 2, j, k, irho) * w(i + 2, j, k, itu1) &
+                                        - w(i - 1, j, k, irho) * w(i - 1, j, k, itu1) - three * ddw6)
 
-             ! Compute the scaling in the three coordinate
-             ! directions.
+                        kAvg = half * (w(i, j, k, itu1) + w(i + 1, j, k, itu1))
+                    end if
 
-             rij = (ri/rj)**adis
-             rjk = (rj/rk)**adis
-             rki = (rk/ri)**adis
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-             ! Create the scaled versions of the aspect ratios.
-             ! Note that the multiplication is done with radi, radJ
-             ! and radK, such that the influence of the clipping
-             ! is negligible.
+                    gammaAvg = half * (gamma(i + 1, j, k) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-             radi(i,j,k) = ri*(one + one/rij + rki)
-             radJ(i,j,k) = rj*(one + one/rjk + rij)
-             radK(i,j,k) = rk*(one + one/rki + rjk)
-          end do
-       enddo
-    enddo
+                    ! Compute the average state at the interface.
 
+                    uAvg = half * (w(i + 1, j, k, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i + 1, j, k, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i + 1, j, k, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i + 1, j, k) * p(i + 1, j, k) / w(i + 1, j, k, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-    ! The rest is only necessary if the timeStep needs to be computed
-    if (updateDt) then
+                    area = sqrt(si(i, j, k, 1)**2 + si(i, j, k, 2)**2 + si(i, j, k, 3)**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = si(i, j, k, 1) * tmp
+                    sy = si(i, j, k, 2) * tmp
+                    sz = si(i, j, k, 3) * tmp
 
-       viscousTerm: if( viscous ) then
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-          ! Loop over the owned cell centers.
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
+                    sface = sFaceI(i, j, k) * tmp
 
-                   ! Compute the effective viscosity coefficient. The
-                   ! factor 0.5 is a combination of two things. In the
-                   ! standard central discretization of a second
-                   ! derivative there is a factor 2 multiplying the
-                   ! central node. However in the code below not the
-                   ! average but the sum of the left and the right face
-                   ! is taken and squared. This leads to a factor 4.
-                   ! Combining both effects leads to 0.5. Furthermore,
-                   ! it is divided by the volume and density to obtain
-                   ! the correct dimensions and multiplied by the
-                   ! non-dimensional factor factVis.
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-                   rmu = rlv(i,j,k)
-                   rmu = rmu + rev(i,j,k)
-                   rmu = half*rmu/(w(i,j,k,irho)*vol(i,j,k))
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-                   ! Add the viscous contribution in i-direction to the
-                   ! (inverse) of the time step.
+                    rrad = lam3 + aAvg
 
-                   sx = si(i,j,k,1) + si(i-1,j,k,1)
-                   sy = si(i,j,k,2) + si(i-1,j,k,2)
-                   sz = si(i,j,k,3) + si(i-1,j,k,3)
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-                   vsi        = rmu*(sx*sx + sy*sy + sz*sz)
-                   dtl(i,j,k) = dtl(i,j,k) + vsi
+                    lam1 = max(lam1, epsAcoustic * rrad) * area
+                    lam2 = max(lam2, epsAcoustic * rrad) * area
+                    lam3 = max(lam3, epsShear * rrad) * area
 
-                   ! Add the viscous contribution in j-direction to the
-                   ! (inverse) of the time step.
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
-                   sx = sj(i,j,k,1) + sj(i,j-1,k,1)
-                   sy = sj(i,j,k,2) + sj(i,j-1,k,2)
-                   sz = sj(i,j,k,3) + sj(i,j-1,k,3)
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-                   vsj        = rmu*(sx*sx + sy*sy + sz*sz)
-                   dtl(i,j,k) = dtl(i,j,k) + vsj
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-                   ! Add the viscous contribution in k-direction to the
-                   ! (inverse) of the time step.
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-                   sx = sk(i,j,k,1) + sk(i,j,k-1,1)
-                   sy = sk(i,j,k,2) + sk(i,j,k-1,2)
-                   sz = sk(i,j,k,3) + sk(i,j,k-1,3)
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-                   vsk        = rmu*(sx*sx + sy*sy + sz*sz)
-                   dtl(i,j,k) = dtl(i,j,k) + vsk
+                    fs = lam3 * dr + abv6
+                    fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-                enddo
-             enddo
-          enddo
-       endif viscousTerm
+                    ! X-momentum.
 
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-       ! For the spectral mode an additional term term must be
-       ! taken into account, which corresponds to the contribution
-       ! of the highest frequency.
+                    ! Y-momentum.
 
-       if(equationMode == timeSpectral) then
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-          tmp = nTimeIntervalsSpectral*pi*timeRef &
-               / sections(sectionID)%timePeriod
+                    ! Z-momentum.
 
-          ! Loop over the owned cell centers and add the term.
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   dtl(i,j,k) = dtl(i,j,k) + tmp*vol(i,j,k)
-                enddo
-             enddo
-          enddo
+                    ! Energy.
 
-       endif
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-       ! Currently the inverse of dt/vol is stored in dtl. Invert
-       ! this value such that the time step per unit cfl number is
-       ! stored and correct in cases of high gradients.
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the j-direction.
+        !
+        do k = 2, kl
+            do j = 1, jl
+                do i = 2, il
+
+                    ! Compute the dissipation coefficients for this face.
+
+                    ppor = zero
+                    if (porJ(i, j, k) == normalFlux) ppor = one
+
+                    dis2 = ppor * fis2 * min(dpMax, max(dss(i, j, k, 2), dss(i, j + 1, k, 2)))
+                    dis4 = dim(ppor * fis4, dis2)
+
+                    ! Construct the vector of the first and third differences
+                    ! multiplied by the appropriate constants.
+
+                    ddw1 = w(i, j + 1, k, irho) - w(i, j, k, irho)
+                    dr = dis2 * ddw1 &
+                         - dis4 * (w(i, j + 2, k, irho) - w(i, j - 1, k, irho) - three * ddw1)
+
+                    ddw2 = w(i, j + 1, k, irho) * w(i, j + 1, k, ivx) &
+                           - w(i, j, k, irho) * w(i, j, k, ivx)
+                    dru = dis2 * ddw2 &
+                          - dis4 * (w(i, j + 2, k, irho) * w(i, j + 2, k, ivx) &
+                                    - w(i, j - 1, k, irho) * w(i, j - 1, k, ivx) - three * ddw2)
+
+                    ddw3 = w(i, j + 1, k, irho) * w(i, j + 1, k, ivy) &
+                           - w(i, j, k, irho) * w(i, j, k, ivy)
+                    drv = dis2 * ddw3 &
+                          - dis4 * (w(i, j + 2, k, irho) * w(i, j + 2, k, ivy) &
+                                    - w(i, j - 1, k, irho) * w(i, j - 1, k, ivy) - three * ddw3)
+
+                    ddw4 = w(i, j + 1, k, irho) * w(i, j + 1, k, ivz) &
+                           - w(i, j, k, irho) * w(i, j, k, ivz)
+                    drw = dis2 * ddw4 &
+                          - dis4 * (w(i, j + 2, k, irho) * w(i, j + 2, k, ivz) &
+                                    - w(i, j - 1, k, irho) * w(i, j - 1, k, ivz) - three * ddw4)
+
+                    ddw5 = w(i, j + 1, k, irhoE) - w(i, j, k, irhoE)
+                    dre = dis2 * ddw5 &
+                          - dis4 * (w(i, j + 2, k, irhoE) - w(i, j - 1, k, irhoE) - three * ddw5)
+
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
+                    drk = zero
+                    kAvg = zero
 
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-                dpi = abs(p(i+1,j,k) - two*p(i,j,k) + p(i-1,j,k)) &
-                     /    (p(i+1,j,k) + two*p(i,j,k) + p(i-1,j,k) + plim)
-                dpj = abs(p(i,j+1,k) - two*p(i,j,k) + p(i,j-1,k)) &
-                     /    (p(i,j+1,k) + two*p(i,j,k) + p(i,j-1,k) + plim)
-                dpk = abs(p(i,j,k+1) - two*p(i,j,k) + p(i,j,k-1)) &
-                     /    (p(i,j,k+1) + two*p(i,j,k) + p(i,j,k-1) + plim)
-                rfl = one/(one + b*(dpi  +dpj  +dpk))
+                    if (correctForK) then
+                        ddw6 = w(i, j + 1, k, irho) * w(i, j + 1, k, itu1) &
+                               - w(i, j, k, irho) * w(i, j, k, itu1)
+                        drk = dis2 * ddw6 &
+                              - dis4 * (w(i, j + 2, k, irho) * w(i, j + 2, k, itu1) &
+                                        - w(i, j - 1, k, irho) * w(i, j - 1, k, itu1) - three * ddw6)
 
-                dtl(i,j,k) = rfl/dtl(i,j,k)
-             enddo
-          enddo
-       enddo
-    end if
+                        kAvg = half * (w(i, j, k, itu1) + w(i, j + 1, k, itu1))
+                    end if
 
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-  end subroutine timeStep
+                    gammaAvg = half * (gamma(i, j + 1, k) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-  subroutine inviscidCentralFlux
+                    ! Compute the average state at the interface.
 
-    ! ---------------------------------------------
-    !               Inviscid central flux
-    ! ---------------------------------------------
-    use constants
-    use blockPointers, only : blockIsMoving, nBkGlobal
-    use flowVarRefState, only : timeRef
-    use cgnsGrid, only : cgnsDoms
-    use inputPhysics, only : equationMode
-    implicit none
-
-    ! Variables for inviscid central flux
-    real(kind=realType) :: qsp, qsm, rqsp, rqsm, porVel, porFlux
-    real(kind=realType) :: pa, vnp, vnm, fs, sFace
-    integer(kind=intType) :: i, j, k
-    real(kind=realType) :: wwx, wwy, wwz, rvol
-
-    do k=2, kl
-       do j=2, jl
-          do i=1, il
-
-             ! Set the dot product of the grid velocity and the
-             ! normal in i-direction for a moving face.
-
-             sFace = sFaceI(i,j,k)
-
-             ! Compute the normal velocities of the left and right state.
-
-             vnp = w(i+1,j,k,ivx)*sI(i,j,k,1) &
-                  + w(i+1,j,k,ivy)*sI(i,j,k,2) &
-                  + w(i+1,j,k,ivz)*sI(i,j,k,3)
-             vnm = w(i,  j,k,ivx)*sI(i,j,k,1) &
-                  + w(i,  j,k,ivy)*sI(i,j,k,2) &
-                  + w(i,  j,k,ivz)*sI(i,j,k,3)
-             ! Set the values of the porosities for this face.
-             ! porVel defines the porosity w.r.t. velocity;
-             ! porFlux defines the porosity w.r.t. the entire flux.
-             ! The latter is only zero for a discontinuous block
-             ! boundary that must be treated conservatively.
-             ! The default value of porFlux is 0.5, such that the
-             ! correct central flux is scattered to both cells.
-             ! In case of a boundFlux the normal velocity is set
-             ! to sFace.
-
-             porVel  = one
-             porFlux = half
-             if(porI(i,j,k) == noFlux)    porFlux = zero
-             if(porI(i,j,k) == boundFlux) then
-                porVel = zero
-                vnp    = sFace
-                vnm    = sFace
-             endif
-
-             ! Incorporate porFlux in porVel.
-
-             porVel = porVel*porFlux
-
-             ! Compute the normal velocities relative to the grid for
-             ! the face as well as the mass fluxes.
-
-             qsp = (vnp -sFace)*porVel
-             qsm = (vnm -sFace)*porVel
-
-             rqsp = qsp*w(i+1,j,k,irho)
-             rqsm = qsm*w(i,  j,k,irho)
-
-             ! Compute the sum of the pressure multiplied by porFlux.
-             ! For the default value of porFlux, 0.5, this leads to
-             ! the average pressure.
-
-             pa = porFlux*(p(i+1,j,k) + p(i,j,k))
-
-             ! Compute the fluxes and scatter them to the cells
-             ! i,j,k and i+1,j,k. Store the density flux in the
-             ! mass flow of the appropriate sliding mesh interface.
-
-             fs = rqsp + rqsm
-             dw(i+1,j,k,irho) = dw(i+1,j,k,irho) - fs
-             dw(i,  j,k,irho) = dw(i,  j,k,irho) + fs
-
-             fs = rqsp*w(i+1,j,k,ivx) + rqsm*w(i,j,k,ivx) &
-                  + pa*sI(i,j,k,1)
-             dw(i+1,j,k,imx) = dw(i+1,j,k,imx) - fs
-             dw(i,  j,k,imx) = dw(i,  j,k,imx) + fs
-
-             fs = rqsp*w(i+1,j,k,ivy) + rqsm*w(i,j,k,ivy) &
-                  + pa*sI(i,j,k,2)
-             dw(i+1,j,k,imy) = dw(i+1,j,k,imy) - fs
-             dw(i,  j,k,imy) = dw(i,  j,k,imy) + fs
-
-             fs = rqsp*w(i+1,j,k,ivz) + rqsm*w(i,j,k,ivz) &
-                  + pa*sI(i,j,k,3)
-             dw(i+1,j,k,imz) = dw(i+1,j,k,imz) - fs
-             dw(i,  j,k,imz) = dw(i,  j,k,imz) + fs
-
-             fs = qsp*w(i+1,j,k,irhoE) + qsm*w(i,j,k,irhoE) &
-                  + porFlux*(vnp*p(i+1,j,k) + vnm*p(i,j,k))
-             dw(i+1,j,k,irhoE) = dw(i+1,j,k,irhoE) - fs
-             dw(i,  j,k,irhoE) = dw(i,  j,k,irhoE) + fs
-          enddo
-       enddo
-    enddo
-
-    do k=2,kl
-       do j=1,jl
-          do i=2,il
-
-             ! Set the dot product of the grid velocity and the
-             ! normal in j-direction for a moving face.
-
-             sFace = sFaceJ(i,j,k)
-
-             ! Compute the normal velocities of the left and right state.
-
-             vnp = w(i,j+1,k,ivx)*sJ(i,j,k,1) &
-                  + w(i,j+1,k,ivy)*sJ(i,j,k,2) &
-                  + w(i,j+1,k,ivz)*sJ(i,j,k,3)
-             vnm = w(i,j,  k,ivx)*sJ(i,j,k,1) &
-                  + w(i,j,  k,ivy)*sJ(i,j,k,2) &
-                  + w(i,j,  k,ivz)*sJ(i,j,k,3)
-
-             ! Set the values of the porosities for this face.
-             ! porVel defines the porosity w.r.t. velocity;
-             ! porFlux defines the porosity w.r.t. the entire flux.
-             ! The latter is only zero for a discontinuous block
-             ! boundary that must be treated conservatively.
-             ! The default value of porFlux is 0.5, such that the
-             ! correct central flux is scattered to both cells.
-             ! In case of a boundFlux the normal velocity is set
-             ! to sFace.
-
-             porVel  = one
-             porFlux = half
-             if(porJ(i,j,k) == noFlux)    porFlux = zero
-             if(porJ(i,j,k) == boundFlux) then
-                porVel = zero
-                vnp    = sFace
-                vnm    = sFace
-             endif
-
-             ! Incorporate porFlux in porVel.
-
-             porVel = porVel*porFlux
-
-             ! Compute the normal velocities for the face as well as the
-             ! mass fluxes.
-
-             qsp = (vnp - sFace)*porVel
-             qsm = (vnm - sFace)*porVel
-
-             rqsp = qsp*w(i,j+1,k,irho)
-             rqsm = qsm*w(i,j,  k,irho)
+                    uAvg = half * (w(i, j + 1, k, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i, j + 1, k, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i, j + 1, k, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i, j + 1, k) * p(i, j + 1, k) / w(i, j + 1, k, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-             ! Compute the sum of the pressure multiplied by porFlux.
-             ! For the default value of porFlux, 0.5, this leads to
-             ! the average pressure.
-
-             pa = porFlux*(p(i,j+1,k) + p(i,j,k))
-
-             ! Compute the fluxes and scatter them to the cells
-             ! i,j,k and i,j+1,k. Store the density flux in the
-             ! mass flow of the appropriate sliding mesh interface.
-
-             fs = rqsp + rqsm
-             dw(i,j+1,k,irho) = dw(i,j+1,k,irho) - fs
-             dw(i,j,  k,irho) = dw(i,j,  k,irho) + fs
-
-             fs = rqsp*w(i,j+1,k,ivx) + rqsm*w(i,j,k,ivx) &
-                  + pa*sJ(i,j,k,1)
-             dw(i,j+1,k,imx) = dw(i,j+1,k,imx) - fs
-             dw(i,j,  k,imx) = dw(i,j,  k,imx) + fs
-
-             fs = rqsp*w(i,j+1,k,ivy) + rqsm*w(i,j,k,ivy) &
-                  + pa*sJ(i,j,k,2)
-             dw(i,j+1,k,imy) = dw(i,j+1,k,imy) - fs
-             dw(i,j,  k,imy) = dw(i,j,  k,imy) + fs
-
-             fs = rqsp*w(i,j+1,k,ivz) + rqsm*w(i,j,k,ivz) &
-                  + pa*sJ(i,j,k,3)
-             dw(i,j+1,k,imz) = dw(i,j+1,k,imz) - fs
-             dw(i,j,  k,imz) = dw(i,j,  k,imz) + fs
-
-             fs = qsp*w(i,j+1,k,irhoE) + qsm*w(i,j,k,irhoE) &
-                  + porFlux*(vnp*p(i,j+1,k) + vnm*p(i,j,k))
-             dw(i,j+1,k,irhoE) = dw(i,j+1,k,irhoE) - fs
-             dw(i,j,  k,irhoE) = dw(i,j,  k,irhoE) + fs
-          enddo
-       enddo
-    enddo
-
-    do k=1,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Set the dot product of the grid velocity and the
-             ! normal in k-direction for a moving face.
-
-             sFace = sFaceK(i,j,k)
-
-             ! Compute the normal velocities of the left and right state.
-
-             vnp = w(i,j,k+1,ivx)*sK(i,j,k,1) &
-                  + w(i,j,k+1,ivy)*sK(i,j,k,2) &
-                  + w(i,j,k+1,ivz)*sK(i,j,k,3)
-             vnm = w(i,j,k,  ivx)*sK(i,j,k,1) &
-                  + w(i,j,k,  ivy)*sK(i,j,k,2) &
-                  + w(i,j,k,  ivz)*sK(i,j,k,3)
-
-             ! Set the values of the porosities for this face.
-             ! porVel defines the porosity w.r.t. velocity;
-             ! porFlux defines the porosity w.r.t. the entire flux.
-             ! The latter is only zero for a discontinuous block
-             ! block boundary that must be treated conservatively.
-             ! The default value of porFlux is 0.5, such that the
-             ! correct central flux is scattered to both cells.
-             ! In case of a boundFlux the normal velocity is set
-             ! to sFace.
-
-             porVel  = one
-             porFlux = half
-
-             if(porK(i,j,k) == noFlux)    porFlux = zero
-             if(porK(i,j,k) == boundFlux) then
-                porVel = zero
-                vnp    = sFace
-                vnm    = sFace
-             endif
-
-             ! Incorporate porFlux in porVel.
-
-             porVel = porVel*porFlux
-
-             ! Compute the normal velocities for the face as well as the
-             ! mass fluxes.
-
-             qsp = (vnp - sFace)*porVel
-             qsm = (vnm - sFace)*porVel
-
-             rqsp = qsp*w(i,j,k+1,irho)
-             rqsm = qsm*w(i,j,k,  irho)
-
-             ! Compute the sum of the pressure multiplied by porFlux.
-             ! For the default value of porFlux, 0.5, this leads to
-             ! the average pressure.
-
-             pa = porFlux*(p(i,j,k+1) + p(i,j,k))
-
-             ! Compute the fluxes and scatter them to the cells
-             ! i,j,k and i,j,k+1. Store the density flux in the
-             ! mass flow of the appropriate sliding mesh interface.
-
-             fs = rqsp + rqsm
-             dw(i,j,k+1,irho) = dw(i,j,k+1,irho) - fs
-             dw(i,j,k,  irho) = dw(i,j,k,  irho) + fs
-
-             fs = rqsp*w(i,j,k+1,ivx) + rqsm*w(i,j,k,ivx) &
-                  + pa*sK(i,j,k,1)
-             dw(i,j,k+1,imx) = dw(i,j,k+1,imx) - fs
-             dw(i,j,k,  imx) = dw(i,j,k,  imx) + fs
-
-             fs = rqsp*w(i,j,k+1,ivy) + rqsm*w(i,j,k,ivy) &
-                  + pa*sK(i,j,k,2)
-             dw(i,j,k+1,imy) = dw(i,j,k+1,imy) - fs
-             dw(i,j,k,  imy) = dw(i,j,k,  imy) + fs
-
-             fs = rqsp*w(i,j,k+1,ivz) + rqsm*w(i,j,k,ivz) &
-                  + pa*sK(i,j,k,3)
-             dw(i,j,k+1,imz) = dw(i,j,k+1,imz) - fs
-             dw(i,j,k,  imz) = dw(i,j,k,  imz) + fs
-
-             fs = qsp*w(i,j,k+1,irhoE) + qsm*w(i,j,k,irhoE) &
-                  + porFlux*(vnp*p(i,j,k+1) + vnm*p(i,j,k))
-             dw(i,j,k+1,irhoE) = dw(i,j,k+1,irhoE) - fs
-             dw(i,j,k,  irhoE) = dw(i,j,k,  irhoE) + fs
-
-          enddo
-       enddo
-    enddo
-
-    rotation: if(blockIsMoving .and. equationMode == steady) then
-
-       ! Compute the three nonDimensional angular velocities.
-
-       wwx = timeRef*cgnsDoms(nbkGlobal)%rotRate(1)
-       wwy = timeRef*cgnsDoms(nbkGlobal)%rotRate(2)
-       wwz = timeRef*cgnsDoms(nbkGlobal)%rotRate(3)
-
-       ! Loop over the internal cells of this block to compute the
-       ! rotational terms for the momentum equations.
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                rvol = w(i, j, k, irho)*vol(i, j, k)
-                dw(i,j,k,imx) = dw(i,j,k,imx) &
-                     + rvol*(wwy*w(i,j,k,ivz) - wwz*w(i,j,k,ivy))
-                dw(i,j,k,imy) = dw(i,j,k,imy) &
-                     + rvol*(wwz*w(i,j,k,ivx) - wwx*w(i,j,k,ivz))
-                dw(i,j,k,imz) = dw(i,j,k,imz) &
-                     + rvol*(wwx*w(i,j,k,ivy) - wwy*w(i,j,k,ivx))
-             enddo
-          end do
-       end do
-    endif rotation
-
-  end subroutine inviscidCentralFlux
-
-  subroutine inviscidDissFluxMatrix
-    !
-    !       inviscidDissFluxMatrix computes the matrix artificial
-    !       dissipation term. Instead of the spectral radius, as used in
-    !       the scalar dissipation scheme, the absolute value of the flux
-    !       jacobian is used. This leads to a less diffusive and
-    !       consequently more accurate scheme. It is assumed that the
-    !       pointers in blockPointers already point to the correct block.
-    !
-    use constants
-    use flowVarRefState, only : pInfCorr
-    use inputDiscretization, only: vis2, vis4
-    use inputPhysics, only : equations
-    use iteration, only : rFil
-    use utils, only : getCorrectForK, myDim
-    implicit none
-    !
-    !      Local parameters.
-    !
-    real(kind=realType), parameter :: dpMax        = 0.25_realType
-    real(kind=realType), parameter :: epsAcoustic  = 0.25_realType
-    real(kind=realType), parameter :: epsShear     = 0.025_realType
-    real(kind=realType), parameter :: omega        = 0.5_realType
-    real(kind=realType), parameter :: oneMinOmega = one - omega
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ind, ii
-
-    real(kind=realType) :: plim, sface
-    real(kind=realType) :: sfil, fis2, fis4
-    real(kind=realType) :: gammaAvg, gm1, ovgm1, gm53
-    real(kind=realType) :: ppor, rrad, dis2, dis4
-    real(kind=realType) :: dp1, dp2, tmp, fs
-    real(kind=realType) :: ddw1, ddw2, ddw3, ddw4, ddw5, ddw6
-    real(kind=realType) :: dr, dru, drv, drw, dre, drk, sx, sy, sz
-    real(kind=realType) :: uAvg, vAvg, wAvg, a2Avg, aAvg, hAvg
-    real(kind=realType) :: alphaAvg, unAvg, ovaAvg, ova2Avg
-    real(kind=realType) :: kAvg, lam1, lam2, lam3, area
-    real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
-    logical :: correctForK
-
-    ! Set the value of plim. To be fully consistent this must have
-    ! the dimension of a pressure. Therefore a fraction of pInfCorr
-    ! is used.
-
-    plim = 0.001_realType*pInfCorr
-
-    ! Determine whether or not the total energy must be corrected
-    ! for the presence of the turbulent kinetic energy.
-
-    correctForK = getCorrectForK()
-
-    ! Initialize sface to zero. This value will be used if the
-    ! block is not moving.
-
-    sface = zero
-
-    ! Set a couple of constants for the scheme.
-
-    fis2 = rFil*vis2
-    fis4 = rFil*vis4
-    sfil = one - rFil
-
-    ! Initialize the dissipative residual to a certain times,
-    ! possibly zero, the previously stored value.
-
-    fw = sfil*fw
-
-    ! Compute the pressure sensor for each cell, in each direction:
-    do k=1,ke
-       do j=1,je
-          do i=singleHaloStart,ie
-             dss(i,j,k,1) =abs((p(i+1,j,k) - two*p(i,j,k) + p(i-1,j,k))        &
-                  /     (omega*(p(i+1,j,k) + two*p(i,j,k) + p(i-1,j,k)) &
-                  +      oneMinOmega*(abs(p(i+1,j,k) - p(i,j,k))      &
-                  +                   abs(p(i,j,k) - p(i-1,j,k))) + plim))
-
-
-             dss(i,j,k,2) =abs((p(i,j+1,k) - two*p(i,j,k) + p(i,j-1,k))        &
-                  /     (omega*(p(i,j+1,k) + two*p(i,j,k) + p(i,j-1,k)) &
-                  +      oneMinOmega*(abs(p(i,j+1,k) - p(i,j,k))      &
-                  +                   abs(p(i,j,k) - p(i,j-1,k))) + plim))
-
-             dss(i,j,k,3) =  abs((p(i,j,k+1) - two*p(i,j,k) + p(i,j,k-1))        &
-                  /     (omega*(p(i,j,k+1) + two*p(i,j,k) + p(i,j,k-1)) &
-                  +      oneMinOmega*(abs(p(i,j,k+1) - p(i,j,k))      &
-                  +                   abs(p(i,j,k) - p(i,j,k-1))) + plim))
-          end do
-       end do
-    end do
-    !
-    !       Dissipative fluxes in the i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=1,il
-
-             ! Compute the dissipation coefficients for this face.
-
-             ppor = zero
-             if(porI(i,j,k) == normalFlux) ppor = one
-             dis2 = ppor*fis2*min(dpMax, max(dss(i,j,k,1), dss(i+1,j,k,1)))
-             dis4 = dim(ppor*fis4, dis2)
-
-             ! Construct the vector of the first and third differences
-             ! multiplied by the appropriate constants.
-
-             ddw1 = w(i+1,j,k,irho) - w(i,j,k,irho)
-             dr  = dis2*ddw1 &
-                  - dis4*(w(i+2,j,k,irho) - w(i-1,j,k,irho) - three*ddw1)
-
-             ddw2 = w(i+1,j,k,irho)*w(i+1,j,k,ivx) &
-                  - w(i,j,k,irho)*w(i,j,k,ivx)
-             dru = dis2*ddw2                             &
-                  - dis4*(w(i+2,j,k,irho)*w(i+2,j,k,ivx) &
-                  -       w(i-1,j,k,irho)*w(i-1,j,k,ivx) - three*ddw2)
-
-             ddw3 = w(i+1,j,k,irho)*w(i+1,j,k,ivy) &
-                  - w(i,j,k,irho)*w(i,j,k,ivy)
-             drv = dis2*ddw3                             &
-                  - dis4*(w(i+2,j,k,irho)*w(i+2,j,k,ivy) &
-                  -       w(i-1,j,k,irho)*w(i-1,j,k,ivy) - three*ddw3)
-
-             ddw4 = w(i+1,j,k,irho)*w(i+1,j,k,ivz) &
-                  - w(i,j,k,irho)*w(i,j,k,ivz)
-             drw = dis2*ddw4                             &
-                  - dis4*(w(i+2,j,k,irho)*w(i+2,j,k,ivz) &
-                  -       w(i-1,j,k,irho)*w(i-1,j,k,ivz) - three*ddw4)
-
-             ddw5 = w(i+1,j,k,irhoE) - w(i,j,k,irhoE)
-             dre = dis2*ddw5 &
-                  - dis4*(w(i+2,j,k,irhoE) - w(i-1,j,k,irhoE) - three*ddw5)
-
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
-             drk   = zero
-             kAvg = zero
+                    area = sqrt(sj(i, j, k, 1)**2 + sj(i, j, k, 2)**2 + sj(i, j, k, 3)**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sj(i, j, k, 1) * tmp
+                    sy = sj(i, j, k, 2) * tmp
+                    sz = sj(i, j, k, 3) * tmp
 
-             if( correctForK ) then
-                ddw6 = w(i+1,j,k,irho)*w(i+1,j,k,itu1) &
-                     - w(i,j,k,irho)*w(i,j,k,itu1)
-                drk = dis2*ddw6                              &
-                     - dis4*(w(i+2,j,k,irho)*w(i+2,j,k,itu1) &
-                     -       w(i-1,j,k,irho)*w(i-1,j,k,itu1) - three*ddw6)
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-                kAvg = half*(w(i,j,k,itu1) + w(i+1,j,k,itu1))
-             endif
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
-
-             gammaAvg = half*(gamma(i+1,j,k) + gamma(i,j,k))
-             gm1      = gammaAvg - one
-             ovgm1    = one/gm1
-             gm53     = gammaAvg - five*third
-
-             ! Compute the average state at the interface.
-
-             uAvg  = half*(w(i+1,j,k,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i+1,j,k,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i+1,j,k,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i+1,j,k)*p(i+1,j,k)/w(i+1,j,k,irho) &
-                  +       gamma(i,  j,k)*p(i,  j,k)/w(i,  j,k,irho))
-
-             area = sqrt(si(i,j,k,1)**2 + si(i,j,k,2)**2 + si(i,j,k,3)**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = si(i,j,k,1)*tmp
-             sy   = si(i,j,k,2)*tmp
-             sz   = si(i,j,k,3)*tmp
-
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
-
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
-
-             sface = sFaceI(i,j,k)*tmp
-
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
-
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
-
-             rrad = lam3 + aAvg
+                    sface = sFaceJ(i, j, k) * tmp
 
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-             lam1 = max(lam1,epsAcoustic*rrad)*area
-             lam2 = max(lam2,epsAcoustic*rrad)*area
-             lam3 = max(lam3,epsShear*rrad)*area
-
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
-
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
-
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
-
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
-
-             ! Compute and scatter the dissipative flux.
-             ! Density.
-
-             fs               = lam3*dr  + abv6
-             fw(i+1,j,k,irho) = fw(i+1,j,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-             ! X-momentum.
-
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i+1,j,k,imx) = fw(i+1,j,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    rrad = lam3 + aAvg
 
-             ! Y-momentum.
-
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i+1,j,k,imy) = fw(i+1,j,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
-
-             ! Z-momentum.
-
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i+1,j,k,imz) = fw(i+1,j,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-             ! Energy.
-
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    lam1 = max(lam1, epsAcoustic * rrad) * area
+                    lam2 = max(lam2, epsAcoustic * rrad) * area
+                    lam3 = max(lam3, epsShear * rrad) * area
 
-          end do
-       end do
-    end do
-    !
-    !       Dissipative fluxes in the j-direction.
-    !
-    do k=2,kl
-       do j=1,jl
-          do i=2,il
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-             ! Compute the dissipation coefficients for this face.
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-             ppor = zero
-             if(porJ(i,j,k) == normalFlux) ppor = one
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-             dis2 = ppor*fis2*min(dpMax, max(dss(i,j,k,2), dss(i,j+1,k,2)))
-             dis4 = dim(ppor*fis4, dis2)
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             ! Construct the vector of the first and third differences
-             ! multiplied by the appropriate constants.
+                    fs = lam3 * dr + abv6
+                    fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             ddw1 = w(i,j+1,k,irho) - w(i,j,k,irho)
-             dr  = dis2*ddw1 &
-                  - dis4*(w(i,j+2,k,irho) - w(i,j-1,k,irho) - three*ddw1)
+                    ! X-momentum.
 
-             ddw2 = w(i,j+1,k,irho)*w(i,j+1,k,ivx) &
-                  - w(i,j,k,irho)*w(i,j,k,ivx)
-             dru = dis2*ddw2                             &
-                  - dis4*(w(i,j+2,k,irho)*w(i,j+2,k,ivx) &
-                  -       w(i,j-1,k,irho)*w(i,j-1,k,ivx) - three*ddw2)
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             ddw3 = w(i,j+1,k,irho)*w(i,j+1,k,ivy) &
-                  - w(i,j,k,irho)*w(i,j,k,ivy)
-             drv = dis2*ddw3 &
-                  - dis4*(w(i,j+2,k,irho)*w(i,j+2,k,ivy) &
-                  -       w(i,j-1,k,irho)*w(i,j-1,k,ivy) - three*ddw3)
+                    ! Y-momentum.
 
-             ddw4 = w(i,j+1,k,irho)*w(i,j+1,k,ivz) &
-                  - w(i,j,k,irho)*w(i,j,k,ivz)
-             drw = dis2*ddw4 &
-                  - dis4*(w(i,j+2,k,irho)*w(i,j+2,k,ivz) &
-                  -       w(i,j-1,k,irho)*w(i,j-1,k,ivz) - three*ddw4)
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             ddw5 = w(i,j+1,k,irhoE) - w(i,j,k,irhoE)
-             dre = dis2*ddw5 &
-                  - dis4*(w(i,j+2,k,irhoE) - w(i,j-1,k,irhoE) - three*ddw5)
+                    ! Z-momentum.
 
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
-             drk   = zero
-             kAvg = zero
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             if( correctForK ) then
-                ddw6 = w(i,j+1,k,irho)*w(i,j+1,k,itu1) &
-                     - w(i,j,k,irho)*w(i,j,k,itu1)
-                drk = dis2*ddw6                              &
-                     - dis4*(w(i,j+2,k,irho)*w(i,j+2,k,itu1) &
-                     -       w(i,j-1,k,irho)*w(i,j-1,k,itu1) - three*ddw6)
+                    ! Energy.
 
-                kAvg = half*(w(i,j,k,itu1) + w(i,j+1,k,itu1))
-             endif
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the k-direction.
+        !
+        do k = 1, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the dissipation coefficients for this face.
+
+                    ppor = zero
+                    if (porK(i, j, k) == normalFlux) ppor = one
+
+                    dis2 = ppor * fis2 * min(dpMax, max(dss(i, j, k, 3), dss(i, j, k + 1, 3)))
+                    dis4 = dim(ppor * fis4, dis2)
+
+                    ! Construct the vector of the first and third differences
+                    ! multiplied by the appropriate constants.
+
+                    ddw1 = w(i, j, k + 1, irho) - w(i, j, k, irho)
+                    dr = dis2 * ddw1 &
+                         - dis4 * (w(i, j, k + 2, irho) - w(i, j, k - 1, irho) - three * ddw1)
+
+                    ddw2 = w(i, j, k + 1, irho) * w(i, j, k + 1, ivx) &
+                           - w(i, j, k, irho) * w(i, j, k, ivx)
+                    dru = dis2 * ddw2 &
+                          - dis4 * (w(i, j, k + 2, irho) * w(i, j, k + 2, ivx) &
+                                    - w(i, j, k - 1, irho) * w(i, j, k - 1, ivx) - three * ddw2)
+
+                    ddw3 = w(i, j, k + 1, irho) * w(i, j, k + 1, ivy) &
+                           - w(i, j, k, irho) * w(i, j, k, ivy)
+                    drv = dis2 * ddw3 &
+                          - dis4 * (w(i, j, k + 2, irho) * w(i, j, k + 2, ivy) &
+                                    - w(i, j, k - 1, irho) * w(i, j, k - 1, ivy) - three * ddw3)
+
+                    ddw4 = w(i, j, k + 1, irho) * w(i, j, k + 1, ivz) &
+                           - w(i, j, k, irho) * w(i, j, k, ivz)
+                    drw = dis2 * ddw4 &
+                          - dis4 * (w(i, j, k + 2, irho) * w(i, j, k + 2, ivz) &
+                                    - w(i, j, k - 1, irho) * w(i, j, k - 1, ivz) - three * ddw4)
+
+                    ddw5 = w(i, j, k + 1, irhoE) - w(i, j, k, irhoE)
+                    dre = dis2 * ddw5 &
+                          - dis4 * (w(i, j, k + 2, irhoE) - w(i, j, k - 1, irhoE) - three * ddw5)
+
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
+                    drk = zero
+                    kAvg = zero
 
-             gammaAvg = half*(gamma(i,j+1,k) + gamma(i,j,k))
-             gm1      = gammaAvg - one
-             ovgm1    = one/gm1
-             gm53     = gammaAvg - five*third
+                    if (correctForK) then
+                        ddw6 = w(i, j, k + 1, irho) * w(i, j, k + 1, itu1) &
+                               - w(i, j, k, irho) * w(i, j, k, itu1)
+                        drk = dis2 * ddw6 &
+                              - dis4 * (w(i, j, k + 2, irho) * w(i, j, k + 2, itu1) &
+                                        - w(i, j, k - 1, irho) * w(i, j, k - 1, itu1) - three * ddw6)
 
-             ! Compute the average state at the interface.
+                        kAvg = half * (w(i, j, k + 1, itu1) + w(i, j, k, itu1))
+                    end if
 
-             uAvg  = half*(w(i,j+1,k,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i,j+1,k,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i,j+1,k,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i,j+1,k)*p(i,j+1,k)/w(i,j+1,k,irho) &
-                  +       gamma(i,j,  k)*p(i,j,  k)/w(i,j,  k,irho))
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-             area = sqrt(sj(i,j,k,1)**2 + sj(i,j,k,2)**2 + sj(i,j,k,3)**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = sj(i,j,k,1)*tmp
-             sy   = sj(i,j,k,2)*tmp
-             sz   = sj(i,j,k,3)*tmp
+                    gammaAvg = half * (gamma(i, j, k + 1) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
+                    ! Compute the average state at the interface.
 
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
-
-             sface = sFaceJ(i,j,k)*tmp
+                    uAvg = half * (w(i, j, k + 1, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i, j, k + 1, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i, j, k + 1, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i, j, k + 1) * p(i, j, k + 1) / w(i, j, k + 1, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
-
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
+                    area = sqrt(sk(i, j, k, 1)**2 + sk(i, j, k, 2)**2 + sk(i, j, k, 3)**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sk(i, j, k, 1) * tmp
+                    sy = sk(i, j, k, 2) * tmp
+                    sz = sk(i, j, k, 3) * tmp
 
-             rrad = lam3 + aAvg
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
-
-             lam1 = max(lam1,epsAcoustic*rrad)*area
-             lam2 = max(lam2,epsAcoustic*rrad)*area
-             lam3 = max(lam3,epsShear*rrad)*area
-
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
-
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
-
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
-
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
-
-             ! Compute and scatter the dissipative flux.
-             ! Density.
-
-             fs               = lam3*dr  + abv6
-             fw(i,j+1,k,irho) = fw(i,j+1,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
-
-             ! X-momentum.
-
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i,j+1,k,imx) = fw(i,j+1,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
-
-             ! Y-momentum.
-
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i,j+1,k,imy) = fw(i,j+1,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
-
-             ! Z-momentum.
-
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i,j+1,k,imz) = fw(i,j+1,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
-
-             ! Energy.
-
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-          end do
-       end do
-    end do
-    !
-    !       Dissipative fluxes in the k-direction.
-    !
-    do k=1,kl
-       do j=2,jl
-          do i=2,il
+                    sface = sFaceK(i, j, k) * tmp
 
-             ! Compute the dissipation coefficients for this face.
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-             ppor = zero
-             if(porK(i,j,k) == normalFlux) ppor = one
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-             dis2 = ppor*fis2*min(dpMax, max(dss(i,j,k,3), dss(i,j,k+1,3)))
-             dis4 = dim(ppor*fis4, dis2)
+                    rrad = lam3 + aAvg
 
-             ! Construct the vector of the first and third differences
-             ! multiplied by the appropriate constants.
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-             ddw1 = w(i,j,k+1,irho) - w(i,j,k,irho)
-             dr  = dis2*ddw1 &
-                  - dis4*(w(i,j,k+2,irho) - w(i,j,k-1,irho) - three*ddw1)
+                    lam1 = max(lam1, epsAcoustic * rrad) * area
+                    lam2 = max(lam2, epsAcoustic * rrad) * area
+                    lam3 = max(lam3, epsShear * rrad) * area
 
-             ddw2 = w(i,j,k+1,irho)*w(i,j,k+1,ivx) &
-                  - w(i,j,k,irho)*w(i,j,k,ivx)
-             dru = dis2*ddw2                             &
-                  - dis4*(w(i,j,k+2,irho)*w(i,j,k+2,ivx) &
-                  -       w(i,j,k-1,irho)*w(i,j,k-1,ivx) - three*ddw2)
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
-             ddw3 = w(i,j,k+1,irho)*w(i,j,k+1,ivy) &
-                  - w(i,j,k,irho)*w(i,j,k,ivy)
-             drv = dis2*ddw3 &
-                  - dis4*(w(i,j,k+2,irho)*w(i,j,k+2,ivy) &
-                  -       w(i,j,k-1,irho)*w(i,j,k-1,ivy) - three*ddw3)
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-             ddw4 = w(i,j,k+1,irho)*w(i,j,k+1,ivz) &
-                  - w(i,j,k,irho)*w(i,j,k,ivz)
-             drw = dis2*ddw4 &
-                  - dis4*(w(i,j,k+2,irho)*w(i,j,k+2,ivz) &
-                  -       w(i,j,k-1,irho)*w(i,j,k-1,ivz) - three*ddw4)
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-             ddw5 = w(i,j,k+1,irhoE) - w(i,j,k,irhoE)
-             dre = dis2*ddw5 &
-                  - dis4*(w(i,j,k+2,irhoE) - w(i,j,k-1,irhoE) - three*ddw5)
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
-             drk   = zero
-             kAvg = zero
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             if( correctForK ) then
-                ddw6 = w(i,j,k+1,irho)*w(i,j,k+1,itu1) &
-                     - w(i,j,k,irho)*w(i,j,k,itu1)
-                drk = dis2*ddw6                              &
-                     - dis4*(w(i,j,k+2,irho)*w(i,j,k+2,itu1) &
-                     -       w(i,j,k-1,irho)*w(i,j,k-1,itu1) - three*ddw6)
+                    fs = lam3 * dr + abv6
+                    fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-                kAvg = half*(w(i,j,k+1,itu1) + w(i,j,k,itu1))
-             endif
+                    ! X-momentum.
 
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             gammaAvg = half*(gamma(i,j,k+1) + gamma(i,j,k))
-             gm1      = gammaAvg - one
-             ovgm1    = one/gm1
-             gm53     = gammaAvg - five*third
+                    ! Y-momentum.
 
-             ! Compute the average state at the interface.
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             uAvg  = half*(w(i,j,k+1,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i,j,k+1,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i,j,k+1,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i,j,k+1)*p(i,j,k+1)/w(i,j,k+1,irho) &
-                  +       gamma(i,j,k)  *p(i,j,k)  /w(i,j,k,  irho))
+                    ! Z-momentum.
 
-             area = sqrt(sk(i,j,k,1)**2 + sk(i,j,k,2)**2 + sk(i,j,k,3)**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = sk(i,j,k,1)*tmp
-             sy   = sk(i,j,k,2)*tmp
-             sz   = sk(i,j,k,3)*tmp
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
+                    ! Energy.
 
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-             sface = sFaceK(i,j,k)*tmp
+                end do
+            end do
+        end do
+
+    end subroutine inviscidDissFluxMatrix
+
+    subroutine inviscidDissFluxScalar
+        ! ---------------------------------------------
+        !             Inviscid Diss Flux Scalar
+        ! ---------------------------------------------
+
+        use constants
+        use flowVarRefState, only: pInfCorr
+        use inputDiscretization, only: vis2, vis4
+        use inputPhysics, only: equations
+        use iteration, only: rFil
+        use flowVarRefState, only: gammaInf, pInfCorr, rhoInf
+        implicit none
+
+        ! Variables for inviscid diss flux scalar
+        real(kind=realType), parameter :: dssMax = 0.25_realType
+        real(kind=realType) :: sslim, rhoi
+        real(kind=realType) :: sfil, fis2, fis4
+        real(kind=realType) :: ppor, rrad, dis2, dis4, fs
+        real(kind=realType) :: ddw1, ddw2, ddw3, ddw4, ddw5
+        integer(kind=intType) :: i, j, k
+
+        ! Determine the variables used to compute the switch.
+        ! For the inviscid case this is the pressure; for the viscous
+        ! case it is the entropy.
+
+        select case (equations)
+        case (EulerEquations)
+
+            ! Inviscid case. Pressure switch is based on the pressure.
+            ! Also set the value of sslim. To be fully consistent this
+            ! must have the dimension of pressure and it is therefore
+            ! set to a fraction of the free stream value.
+
+            sslim = 0.001_realType * pInfCorr
+
+            ! Copy the pressure in ss. Only need the entries used in the
+            ! discretization, i.e. not including the corner halo's, but we'll
+            ! just copy all anyway.
+
+            ss = P
+            !===============================================================
+
+        case (NSEquations, RANSEquations)
+
+            ! Viscous case. Pressure switch is based on the entropy.
+            ! Also set the value of sslim. To be fully consistent this
+            ! must have the dimension of entropy and it is therefore
+            ! set to a fraction of the free stream value.
+
+            sslim = 0.001_realType * pInfCorr / (rhoInf**gammaInf)
+
+            ! Store the entropy in ss. See above.
+            do k = 0, kb
+                do j = 0, jb
+                    do i = doubleHaloStart, ib
+                        ss(i, j, k) = p(i, j, k) / (w(i, j, k, irho)**gamma(i, j, k))
+                    end do
+                end do
+            end do
+        end select
 
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
+        ! Compute the pressure sensor for each cell, in each direction:
+        do k = 1, ke
+            do j = 1, je
+                do i = singleHaloStart, ie
+                    dss(i, j, k, 1) = abs((ss(i + 1, j, k) - two * ss(i, j, k) + ss(i - 1, j, k)) &
+                                          / (ss(i + 1, j, k) + two * ss(i, j, k) + ss(i - 1, j, k) + sslim))
 
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
+                    dss(i, j, k, 2) = abs((ss(i, j + 1, k) - two * ss(i, j, k) + ss(i, j - 1, k)) &
+                                          / (ss(i, j + 1, k) + two * ss(i, j, k) + ss(i, j - 1, k) + sslim))
 
-             rrad = lam3 + aAvg
+                    dss(i, j, k, 3) = abs((ss(i, j, k + 1) - two * ss(i, j, k) + ss(i, j, k - 1)) &
+                                          / (ss(i, j, k + 1) + two * ss(i, j, k) + ss(i, j, k - 1) + sslim))
+                end do
+            end do
+        end do
 
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
+        ! Set a couple of constants for the scheme.
 
-             lam1 = max(lam1,epsAcoustic*rrad)*area
-             lam2 = max(lam2,epsAcoustic*rrad)*area
-             lam3 = max(lam3,epsShear*rrad)*area
+        fis2 = rFil * vis2
+        fis4 = rFil * vis4
+        sfil = one - rFil
 
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
+        ! Initialize the dissipative residual to a certain times,
+        ! possibly zero, the previously stored value. Owned cells
+        ! only, because the halo values do not matter.
 
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
+        fw = sfil * fw
+        !
+        !       Dissipative fluxes in the i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 1, il
 
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
+                    ! Compute the dissipation coefficients for this face.
 
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
+                    ppor = zero
+                    if (porI(i, j, k) == normalFlux) ppor = half
+                    rrad = ppor * (radI(i, j, k) + radI(i + 1, j, k))
 
-             ! Compute and scatter the dissipative flux.
-             ! Density.
+                    dis2 = fis2 * rrad * min(dssMax, max(dss(i, j, k, 1), dss(i + 1, j, k, 1)))
+                    dis4 = dim(fis4 * rrad, dis2)
 
-             fs               = lam3*dr  + abv6
-             fw(i,j,k+1,irho) = fw(i,j,k+1,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ! Compute and scatter the dissipative flux.
+                    ! Density. Store it in the mass flow of the
+                    ! appropriate sliding mesh interface.
 
-             ! X-momentum.
+                    ddw1 = w(i + 1, j, k, irho) - w(i, j, k, irho)
+                    fs = dis2 * ddw1 &
+                         - dis4 * (w(i + 2, j, k, irho) - w(i - 1, j, k, irho) - three * ddw1)
 
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i,j,k+1,imx) = fw(i,j,k+1,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             ! Y-momentum.
+                    ! X-momentum.
 
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i,j,k+1,imy) = fw(i,j,k+1,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    ddw2 = w(i + 1, j, k, ivx) * w(i + 1, j, k, irho) - w(i, j, k, ivx) * w(i, j, k, irho)
+                    fs = dis2 * ddw2 &
+       - dis4 * (w(i + 2, j, k, ivx) * w(i + 2, j, k, irho) - w(i - 1, j, k, ivx) * w(i - 1, j, k, irho) - three * ddw2)
 
-             ! Z-momentum.
+                    fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i,j,k+1,imz) = fw(i,j,k+1,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    ! Y-momentum.
 
-             ! Energy.
+                    ddw3 = w(i + 1, j, k, ivy) * w(i + 1, j, k, irho) - w(i, j, k, ivy) * w(i, j, k, irho)
+                    fs = dis2 * ddw3 &
+       - dis4 * (w(i + 2, j, k, ivy) * w(i + 2, j, k, irho) - w(i - 1, j, k, ivy) * w(i - 1, j, k, irho) - three * ddw3)
 
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-          end do
-       end do
-    end do
+                    ! Z-momentum.
 
-  end subroutine inviscidDissFluxMatrix
+                    ddw4 = w(i + 1, j, k, ivz) * w(i + 1, j, k, irho) - w(i, j, k, ivz) * w(i, j, k, irho)
+                    fs = dis2 * ddw4 &
+       - dis4 * (w(i + 2, j, k, ivz) * w(i + 2, j, k, irho) - w(i - 1, j, k, ivz) * w(i - 1, j, k, irho) - three * ddw4)
 
+                    fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-  subroutine inviscidDissFluxScalar
-    ! ---------------------------------------------
-    !             Inviscid Diss Flux Scalar
-    ! ---------------------------------------------
+                    ! Energy.
 
-    use constants
-    use flowVarRefState, only : pInfCorr
-    use inputDiscretization, only: vis2, vis4
-    use inputPhysics, only : equations
-    use iteration, only : rFil
-    use flowVarRefState, only : gammaInf, pInfCorr, rhoInf
-    implicit none
-
-    ! Variables for inviscid diss flux scalar
-    real(kind=realType), parameter :: dssMax = 0.25_realType
-    real(kind=realType) :: sslim, rhoi
-    real(kind=realType) :: sfil, fis2, fis4
-    real(kind=realType) :: ppor, rrad, dis2, dis4, fs
-    real(kind=realType) :: ddw1,ddw2,ddw3,ddw4,ddw5
-    integer(kind=intType) :: i, j, k
-
-    ! Determine the variables used to compute the switch.
-    ! For the inviscid case this is the pressure; for the viscous
-    ! case it is the entropy.
-
-    select case (equations)
-    case (EulerEquations)
-
-       ! Inviscid case. Pressure switch is based on the pressure.
-       ! Also set the value of sslim. To be fully consistent this
-       ! must have the dimension of pressure and it is therefore
-       ! set to a fraction of the free stream value.
+                    ddw5 = (w(i + 1, j, k, irhoE) + P(i + 1, j, K)) - (w(i, j, k, irhoE) + P(i, j, k))
+                    fs = dis2 * ddw5 &
+           - dis4 * ((w(i + 2, j, k, irhoE) + P(i + 2, j, k)) - (w(i - 1, j, k, irhoE) + P(i - 1, j, k)) - three * ddw5)
 
-       sslim = 0.001_realType*pInfCorr
+                    fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the j-direction.
+        !
+        do k = 2, kl
+            do j = 1, jl
+                do i = 2, il
 
-       ! Copy the pressure in ss. Only need the entries used in the
-       ! discretization, i.e. not including the corner halo's, but we'll
-       ! just copy all anyway.
+                    ! Compute the dissipation coefficients for this face.
 
-       ss = P
-       !===============================================================
+                    ppor = zero
+                    if (porJ(i, j, k) == normalFlux) ppor = half
+                    rrad = ppor * (radJ(i, j, k) + radJ(i, j + 1, k))
 
-    case (NSEquations, RANSEquations)
+                    dis2 = fis2 * rrad * min(dssMax, max(dss(i, j, k, 2), dss(i, j + 1, k, 2)))
+                    dis4 = dim(fis4 * rrad, dis2)
 
-       ! Viscous case. Pressure switch is based on the entropy.
-       ! Also set the value of sslim. To be fully consistent this
-       ! must have the dimension of entropy and it is therefore
-       ! set to a fraction of the free stream value.
+                    ! Compute and scatter the dissipative flux.
+                    ! Density. Store it in the mass flow of the
+                    ! appropriate sliding mesh interface.
 
-       sslim = 0.001_realType*pInfCorr/(rhoInf**gammaInf)
+                    ddw1 = w(i, j + 1, k, irho) - w(i, j, k, irho)
+                    fs = dis2 * ddw1 &
+                         - dis4 * (w(i, j + 2, k, irho) - w(i, j - 1, k, irho) - three * ddw1)
 
-       ! Store the entropy in ss. See above.
-       do k=0, kb
-          do j=0, jb
-             do i=doubleHaloStart, ib
-                ss(i,j,k) = p(i,j,k)/(w(i,j,k,irho)**gamma(i,j,k))
-             end do
-          end do
-       end do
-    end select
-
-    ! Compute the pressure sensor for each cell, in each direction:
-    do k=1,ke
-       do j=1,je
-          do i=singleHaloStart,ie
-             dss(i,j,k,1) = abs((ss(i+1,j,k) - two*ss(i,j,k) + ss(i-1,j,k)) &
-                  /     (ss(i+1,j,k) + two*ss(i,j,k) + ss(i-1,j,k) + sslim))
-
-             dss(i,j,k,2) = abs((ss(i,j+1,k) - two*ss(i,j,k) + ss(i,j-1,k)) &
-                  /     (ss(i,j+1,k) + two*ss(i,j,k) + ss(i,j-1,k) + sslim))
-
-             dss(i,j,k,3) = abs((ss(i,j,k+1) - two*ss(i,j,k) + ss(i,j,k-1)) &
-                  /     (ss(i,j,k+1) + two*ss(i,j,k) + ss(i,j,k-1) + sslim))
-          end do
-       end do
-    end do
+                    fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-    ! Set a couple of constants for the scheme.
-
-    fis2 = rFil*vis2
-    fis4 = rFil*vis4
-    sfil = one - rFil
+                    ! X-momentum.
 
-    ! Initialize the dissipative residual to a certain times,
-    ! possibly zero, the previously stored value. Owned cells
-    ! only, because the halo values do not matter.
+                    ddw2 = w(i, j + 1, k, ivx) * w(i, j + 1, k, irho) - w(i, j, k, ivx) * w(i, j, k, irho)
+                    fs = dis2 * ddw2 &
+       - dis4 * (w(i, j + 2, k, ivx) * w(i, j + 2, k, irho) - w(i, j - 1, k, ivx) * w(i, j - 1, k, irho) - three * ddw2)
 
-    fw = sfil*fw
-    !
-    !       Dissipative fluxes in the i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=1,il
+                    fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             ! Compute the dissipation coefficients for this face.
+                    ! Y-momentum.
 
-             ppor = zero
-             if(porI(i,j,k) == normalFlux) ppor = half
-             rrad = ppor*(radI(i,j,k) + radI(i+1,j,k))
+                    ddw3 = w(i, j + 1, k, ivy) * w(i, j + 1, k, irho) - w(i, j, k, ivy) * w(i, j, k, irho)
+                    fs = dis2 * ddw3 &
+       - dis4 * (w(i, j + 2, k, ivy) * w(i, j + 2, k, irho) - w(i, j - 1, k, ivy) * w(i, j - 1, k, irho) - three * ddw3)
 
-             dis2 = fis2*rrad*min(dssMax, max(dss(i,j,k,1), dss(i+1,j,k,1)))
-             dis4 = dim(fis4*rrad, dis2)
+                    fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             ! Compute and scatter the dissipative flux.
-             ! Density. Store it in the mass flow of the
-             ! appropriate sliding mesh interface.
+                    ! Z-momentum.
 
-             ddw1 = w(i+1,j,k,irho) - w(i,j,k,irho)
-             fs  = dis2*ddw1 &
-                  - dis4*(w(i+2,j,k,irho) - w(i-1,j,k,irho) - three*ddw1)
+                    ddw4 = w(i, j + 1, k, ivz) * w(i, j + 1, k, irho) - w(i, j, k, ivz) * w(i, j, k, irho)
+                    fs = dis2 * ddw4 &
+       - dis4 * (w(i, j + 2, k, ivz) * w(i, j + 2, k, irho) - w(i, j - 1, k, ivz) * w(i, j - 1, k, irho) - three * ddw4)
 
-             fw(i+1,j,k,irho) = fw(i+1,j,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             ! X-momentum.
+                    ! Energy.
 
-             ddw2 = w(i+1,j,k,ivx)*w(i+1,j,k,irho) - w(i,j,k,ivx)*w(i,j,k,irho)
-             fs  = dis2*ddw2 &
-                  - dis4*(w(i+2,j,k,ivx)*w(i+2,j,k,irho) - w(i-1,j,k,ivx)*w(i-1,j,k,irho) - three*ddw2)
+                    ddw5 = (w(i, j + 1, k, irhoE) + P(i, j + 1, k)) - (w(i, j, k, irhoE) + P(i, j, k))
+                    fs = dis2 * ddw5 &
+           - dis4 * ((w(i, j + 2, k, irhoE) + P(i, j + 2, k)) - (w(i, j - 1, k, irhoE) + P(i, j - 1, k)) - three * ddw5)
 
-             fw(i+1,j,k,imx) = fw(i+1,j,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the k-direction.
+        !
+        do k = 1, kl
+            do j = 2, jl
+                do i = 2, il
 
-             ! Y-momentum.
+                    ! Compute the dissipation coefficients for this face.
 
-             ddw3 = w(i+1,j,k,ivy)*w(i+1,j,k,irho) - w(i,j,k,ivy)*w(i,j,k,irho)
-             fs  = dis2*ddw3 &
-                  - dis4*(w(i+2,j,k,ivy)*w(i+2,j,k,irho) - w(i-1,j,k,ivy)*w(i-1,j,k,irho) - three*ddw3)
+                    ppor = zero
+                    if (porK(i, j, k) == normalFlux) ppor = half
+                    rrad = ppor * (radK(i, j, k) + radK(i, j, k + 1))
 
-             fw(i+1,j,k,imy) = fw(i+1,j,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    dis2 = fis2 * rrad * min(dssMax, max(dss(i, j, k, 3), dss(i, j, k + 1, 3)))
+                    dis4 = dim(fis4 * rrad, dis2)
 
-             ! Z-momentum.
+                    ! Compute and scatter the dissipative flux.
+                    ! Density. Store it in the mass flow of the
+                    ! appropriate sliding mesh interface.
 
-             ddw4 = w(i+1,j,k,ivz)*w(i+1,j,k,irho) - w(i,j,k,ivz)*w(i,j,k,irho)
-             fs  = dis2*ddw4 &
-                  - dis4*(w(i+2,j,k,ivz)*w(i+2,j,k,irho) - w(i-1,j,k,ivz)*w(i-1,j,k,irho) - three*ddw4)
+                    ddw1 = w(i, j, k + 1, irho) - w(i, j, k, irho)
+                    fs = dis2 * ddw1 &
+                         - dis4 * (w(i, j, k + 2, irho) - w(i, j, k - 1, irho) - three * ddw1)
 
-             fw(i+1,j,k,imz) = fw(i+1,j,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             ! Energy.
+                    ! X-momentum.
 
-             ddw5 = (w(i+1,j,k,irhoE) + P(i+1,j,K))- (w(i,j,k,irhoE) + P(i,j,k))
-             fs  = dis2*ddw5 &
-                  - dis4*((w(i+2,j,k,irhoE) + P(i+2,j,k)) - (w(i-1,j,k,irhoE) + P(i-1,j,k)) - three*ddw5)
+                    ddw2 = w(i, j, k + 1, ivx) * w(i, j, k + 1, irho) - w(i, j, k, ivx) * w(i, j, k, irho)
+                    fs = dis2 * ddw2 &
+       - dis4 * (w(i, j, k + 2, ivx) * w(i, j, k + 2, irho) - w(i, j, k - 1, ivx) * w(i, j, k - 1, irho) - three * ddw2)
 
-             fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
-          end do
-       end do
-    end do
-    !
-    !       Dissipative fluxes in the j-direction.
-    !
-    do k=2,kl
-       do j=1,jl
-          do i=2,il
+                    fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             ! Compute the dissipation coefficients for this face.
+                    ! Y-momentum.
 
-             ppor = zero
-             if(porJ(i,j,k) == normalFlux) ppor = half
-             rrad = ppor*(radJ(i,j,k) + radJ(i,j+1,k))
+                    ddw3 = w(i, j, k + 1, ivy) * w(i, j, k + 1, irho) - w(i, j, k, ivy) * w(i, j, k, irho)
+                    fs = dis2 * ddw3 &
+       - dis4 * (w(i, j, k + 2, ivy) * w(i, j, k + 2, irho) - w(i, j, k - 1, ivy) * w(i, j, k - 1, irho) - three * ddw3)
 
-             dis2 = fis2*rrad*min(dssMax, max(dss(i,j,k,2),dss(i,j+1,k,2)))
-             dis4 = dim(fis4*rrad, dis2)
+                    fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             ! Compute and scatter the dissipative flux.
-             ! Density. Store it in the mass flow of the
-             ! appropriate sliding mesh interface.
+                    ! Z-momentum.
 
-             ddw1 = w(i,j+1,k,irho) - w(i,j,k,irho)
-             fs  = dis2*ddw1 &
-                  - dis4*(w(i,j+2,k,irho) - w(i,j-1,k,irho) - three*ddw1)
+                    ddw4 = w(i, j, k + 1, ivz) * w(i, j, k + 1, irho) - w(i, j, k, ivz) * w(i, j, k, irho)
+                    fs = dis2 * ddw4 &
+       - dis4 * (w(i, j, k + 2, ivz) * w(i, j, k + 2, irho) - w(i, j, k - 1, ivz) * w(i, j, k - 1, irho) - three * ddw4)
 
-             fw(i,j+1,k,irho) = fw(i,j+1,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             ! X-momentum.
+                    ! Energy.
 
-             ddw2 = w(i,j+1,k,ivx)*w(i,j+1,k,irho) - w(i,j,k,ivx)*w(i,j,k,irho)
-             fs  = dis2*ddw2 &
-                  - dis4*(w(i,j+2,k,ivx)*w(i,j+2,k,irho) - w(i,j-1,k,ivx)*w(i,j-1,k,irho) - three*ddw2)
+                    ddw5 = (w(i, j, k + 1, irhoE) + P(i, j, k + 1)) - (w(i, j, k, irhoE) + P(i, j, k))
+                    fs = dis2 * ddw5 &
+           - dis4 * ((w(i, j, k + 2, irhoE) + P(i, j, k + 2)) - (w(i, j, k - 1, irhoE) + P(i, j, k - 1)) - three * ddw5)
 
-             fw(i,j+1,k,imx) = fw(i,j+1,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
+                end do
+            end do
+        end do
+    end subroutine inviscidDissFluxScalar
+
+    subroutine inviscidUpwindFlux(fineGrid)
+        !
+        !       inviscidUpwindFlux computes the artificial dissipation part of
+        !       the Euler fluxes by means of an approximate solution of the 1D
+        !       Riemann problem on the face. For first order schemes,
+        !       fineGrid == .false., the states in the cells are assumed to
+        !       be constant; for the second order schemes on the fine grid a
+        !       nonlinear reconstruction of the left and right state is done
+        !       for which several options exist.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use flowVarRefState, only: kPresent, nw, nwf, rgas, tref
+        use blockPointers, only: rotMatrixI, rotMatrixJ, rotMatrixK
+        use inputDiscretization, only: limiter, precond, riemann, &
+                                       riemannCoarse, orderTurb, kappaCoef
+        use inputPhysics, only: equations
+        use iteration, only: rFil, currentLevel, groundLevel
+        use utils, only: getCorrectForK, terminate
+        use flowUtils, only: eTot
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: fineGrid
+        !
+        !      Local variables.
+        !
+        integer(kind=porType) :: por
+
+        integer(kind=intType) :: nwInt
+        integer(kind=intType) :: i, j, k, ind
+        integer(kind=intType) :: limUsed, riemannUsed
+
+        real(kind=realType) :: sx, sy, sz, omk, opk, sFil, gammaFace
+        real(kind=realType) :: factMinmod, sFace
+
+        real(kind=realType), dimension(nw) :: left, right
+        real(kind=realType), dimension(nw) :: du1, du2, du3
+        real(kind=realType), dimension(nwf) :: flux
+
+        logical :: firstOrderK, correctForK, rotationalPeriodic
+
+        ! Check if the formulation for rotational periodic problems
+        ! must be used.
+
+        if (associated(rotMatrixI)) then
+            rotationalPeriodic = .true.
+        else
+            rotationalPeriodic = .false.
+        end if
+
+        ! Initialize the dissipative residual to a certain times,
+        ! possibly zero, the previously stored value. Owned cells
+        ! only, because the halo values do not matter.
+
+        sFil = one - rFil
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    fw(i, j, k, irho) = sFil * fw(i, j, k, irho)
+                    fw(i, j, k, imx) = sFil * fw(i, j, k, imx)
+                    fw(i, j, k, imy) = sFil * fw(i, j, k, imy)
+                    fw(i, j, k, imz) = sFil * fw(i, j, k, imz)
+                    fw(i, j, k, irhoE) = sFil * fw(i, j, k, irhoE)
+                end do
+            end do
+        end do
 
-             ! Y-momentum.
+        ! Determine whether or not the total energy must be corrected
+        ! for the presence of the turbulent kinetic energy.
+        correctForK = getCorrectForK()
 
-             ddw3 = w(i,j+1,k,ivy)*w(i,j+1,k,irho) - w(i,j,k,ivy)*w(i,j,k,irho)
-             fs  = dis2*ddw3 &
-                  - dis4*(w(i,j+2,k,ivy)*w(i,j+2,k,irho) - w(i,j-1,k,ivy)*w(i,j-1,k,irho) - three*ddw3)
+        ! Compute the factor used in the minmod limiter.
 
-             fw(i,j+1,k,imy) = fw(i,j+1,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+        factMinmod = (three - kappaCoef) &
+                     / max(1.e-10_realType, one - kappaCoef)
 
-             ! Z-momentum.
+        ! Determine the limiter scheme to be used. On the fine grid the
+        ! user specified scheme is used; on the coarse grid a first order
+        ! scheme is computed.
 
-             ddw4 = w(i,j+1,k,ivz)*w(i,j+1,k,irho) - w(i,j,k,ivz)*w(i,j,k,irho)
-             fs  = dis2*ddw4 &
-                  - dis4*(w(i,j+2,k,ivz)*w(i,j+2,k,irho) - w(i,j-1,k,ivz)*w(i,j-1,k,irho) - three*ddw4)
+        limUsed = firstOrder
+        if (fineGrid) limUsed = limiter
 
-             fw(i,j+1,k,imz) = fw(i,j+1,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+        ! Determine the riemann solver which must be used.
 
-             ! Energy.
+        riemannUsed = riemannCoarse
+        if (fineGrid) riemannUsed = riemann
 
-             ddw5 = (w(i,j+1,k,irhoE) + P(i,j+1,k)) - (w(i,j,k,irhoE) + P(i,j,k))
-             fs  = dis2*ddw5 &
-                  - dis4*((w(i,j+2,k,irhoE) + P(i,j+2,k)) - (w(i,j-1,k,irhoE) + P(i,j-1,k)) - three*ddw5)
+        ! Store 1-kappa and 1+kappa a bit easier and multiply it by 0.25.
 
-             fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
-          end do
-       end do
-    end do
-    !
-    !       Dissipative fluxes in the k-direction.
-    !
-    do k=1,kl
-       do j=2,jl
-          do i=2,il
+        omk = fourth * (one - kappaCoef)
+        opk = fourth * (one + kappaCoef)
 
-             ! Compute the dissipation coefficients for this face.
+        ! Set the number of variables to be interpolated depending
+        ! whether or not a k-equation is present. If a k-equation is
+        ! present also set the logical firstOrderK. This indicates
+        ! whether or not only a first order approximation is to be used
+        ! for the turbulent kinetic energy.
 
-             ppor = zero
-             if(porK(i,j,k) == normalFlux) ppor = half
-             rrad = ppor*(radK(i,j,k) + radK(i,j,k+1))
+        if (correctForK) then
+            if (orderTurb == firstOrder) then
+                nwInt = nwf
+                firstOrderK = .true.
+            else
+                nwInt = itu1
+                firstOrderK = .false.
+            end if
+        else
+            nwInt = nwf
+            firstOrderK = .false.
+        end if
+        !
+        !       Flux computation. A distinction is made between first and
+        !       second order schemes to avoid the overhead for the first order
+        !       scheme.
+        !
+        orderTest: if (limUsed == firstOrder) then
+            !
+            !         First order reconstruction. The states in the cells are
+            !         constant. The left and right states are constructed easily.
+            !
+            ! Fluxes in the i-direction.
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 1, il
+
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
+
+                        sx = si(i, j, k, 1); sy = si(i, j, k, 2); sz = si(i, j, k, 3)
+                        por = porI(i, j, k)
+                        sFace = sFaceI(i, j, k)
+
+                        ! Determine the left and right state.
+
+                        left(irho) = w(i, j, k, irho)
+                        left(ivx) = w(i, j, k, ivx)
+                        left(ivy) = w(i, j, k, ivy)
+                        left(ivz) = w(i, j, k, ivz)
+                        left(irhoE) = p(i, j, k)
+                        if (correctForK) left(itu1) = w(i, j, k, itu1)
+
+                        right(irho) = w(i + 1, j, k, irho)
+                        right(ivx) = w(i + 1, j, k, ivx)
+                        right(ivy) = w(i + 1, j, k, ivy)
+                        right(ivz) = w(i + 1, j, k, ivz)
+                        right(irhoE) = p(i + 1, j, k)
+                        if (correctForK) right(itu1) = w(i + 1, j, k, itu1)
+
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
+
+                        gammaFace = half * (gamma(i, j, k) + gamma(i + 1, j, k))
+
+                        ! Compute the dissipative flux across the interface.
+
+                        call riemannFlux(left, right, flux)
+
+                        ! And scatter it to the left and right.
+
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
+
+                        fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) - flux(irho)
+                        fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) - flux(imx)
+                        fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) - flux(imy)
+                        fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) - flux(imz)
+                        fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) - flux(irhoE)
+
+                    end do
+                end do
+            end do
 
-             dis2 = fis2*rrad*min(dssMax, max(dss(i,j,k,3), dss(i,j,k+1,3)))
-             dis4 = dim(fis4*rrad, dis2)
+            ! Fluxes in j-direction.
 
-             ! Compute and scatter the dissipative flux.
-             ! Density. Store it in the mass flow of the
-             ! appropriate sliding mesh interface.
+            do k = 2, kl
+                do j = 1, jl
+                    do i = 2, il
 
-             ddw1 = w(i,j,k+1,irho) - w(i,j,k,irho)
-             fs  = dis2*ddw1 &
-                  - dis4*(w(i,j,k+2,irho) - w(i,j,k-1,irho) - three*ddw1)
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
 
-             fw(i,j,k+1,irho) = fw(i,j,k+1,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                        sx = sj(i, j, k, 1); sy = sj(i, j, k, 2); sz = sj(i, j, k, 3)
+                        por = porJ(i, j, k)
+                        sFace = sFaceJ(i, j, k)
 
-             ! X-momentum.
+                        ! Determine the left and right state.
 
-             ddw2 = w(i,j,k+1,ivx)*w(i,j,k+1,irho) - w(i,j,k,ivx)*w(i,j,k,irho)
-             fs  = dis2*ddw2 &
-                  - dis4*(w(i,j,k+2,ivx)*w(i,j,k+2,irho) - w(i,j,k-1,ivx)*w(i,j,k-1,irho) - three*ddw2)
+                        left(irho) = w(i, j, k, irho)
+                        left(ivx) = w(i, j, k, ivx)
+                        left(ivy) = w(i, j, k, ivy)
+                        left(ivz) = w(i, j, k, ivz)
+                        left(irhoE) = p(i, j, k)
+                        if (correctForK) left(itu1) = w(i, j, k, itu1)
 
-             fw(i,j,k+1,imx) = fw(i,j,k+1,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                        right(irho) = w(i, j + 1, k, irho)
+                        right(ivx) = w(i, j + 1, k, ivx)
+                        right(ivy) = w(i, j + 1, k, ivy)
+                        right(ivz) = w(i, j + 1, k, ivz)
+                        right(irhoE) = p(i, j + 1, k)
+                        if (correctForK) right(itu1) = w(i, j + 1, k, itu1)
 
-             ! Y-momentum.
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
 
-             ddw3 = w(i,j,k+1,ivy)*w(i,j,k+1,irho) - w(i,j,k,ivy)*w(i,j,k,irho)
-             fs  = dis2*ddw3 &
-                  - dis4*(w(i,j,k+2,ivy)*w(i,j,k+2,irho) - w(i,j,k-1,ivy)*w(i,j,k-1,irho) - three*ddw3)
+                        gammaFace = half * (gamma(i, j, k) + gamma(i, j + 1, k))
 
-             fw(i,j,k+1,imy) = fw(i,j,k+1,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                        ! Compute the dissipative flux across the interface.
 
-             ! Z-momentum.
+                        call riemannFlux(left, right, flux)
 
-             ddw4 = w(i,j,k+1,ivz)*w(i,j,k+1,irho) - w(i,j,k,ivz)*w(i,j,k,irho)
-             fs  = dis2*ddw4 &
-                  - dis4*(w(i,j,k+2,ivz)*w(i,j,k+2,irho) - w(i,j,k-1,ivz)*w(i,j,k-1,irho) - three*ddw4)
+                        ! And scatter it to the left and right.
 
-             fw(i,j,k+1,imz) = fw(i,j,k+1,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
 
-             ! Energy.
+                        fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) - flux(irho)
+                        fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) - flux(imx)
+                        fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) - flux(imy)
+                        fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) - flux(imz)
+                        fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) - flux(irhoE)
+                    end do
+                end do
+            end do
 
-             ddw5 = (w(i,j,k+1,irhoE) + P(i,j,k+1)) - (w(i,j,k,irhoE) + P(i,j,k))
-             fs  = dis2*ddw5 &
-                  - dis4*((w(i,j,k+2,irhoE) + P(i,j,k+2)) - (w(i,j,k-1,irhoE) + P(i,j,k-1)) - three*ddw5)
+            ! Fluxes in k-direction.
 
-             fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
-          end do
-       end do
-    end do
-  end subroutine inviscidDissFluxScalar
+            do k = 1, kl
+                do j = 2, jl
+                    do i = 2, il
 
-  subroutine inviscidUpwindFlux(fineGrid)
-    !
-    !       inviscidUpwindFlux computes the artificial dissipation part of
-    !       the Euler fluxes by means of an approximate solution of the 1D
-    !       Riemann problem on the face. For first order schemes,
-    !       fineGrid == .false., the states in the cells are assumed to
-    !       be constant; for the second order schemes on the fine grid a
-    !       nonlinear reconstruction of the left and right state is done
-    !       for which several options exist.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use flowVarRefState, only : kPresent, nw, nwf, rgas, tref
-    use blockPointers, only : rotMatrixI, rotMatrixJ, rotMatrixK
-    use inputDiscretization, only: limiter, precond, riemann, &
-         riemannCoarse, orderTurb, kappaCoef
-    use inputPhysics, only : equations
-    use iteration, only : rFil, currentLevel, groundLevel
-    use utils, only : getCorrectForK, terminate
-    use flowUtils, only : eTot
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: fineGrid
-    !
-    !      Local variables.
-    !
-    integer(kind=porType) :: por
-
-    integer(kind=intType) :: nwInt
-    integer(kind=intType) :: i, j, k, ind
-    integer(kind=intType) :: limUsed, riemannUsed
-
-    real(kind=realType) :: sx, sy, sz, omk, opk, sFil, gammaFace
-    real(kind=realType) :: factMinmod, sFace
-
-    real(kind=realType), dimension(nw)  :: left, right
-    real(kind=realType), dimension(nw)  :: du1, du2, du3
-    real(kind=realType), dimension(nwf) :: flux
-
-    logical :: firstOrderK, correctForK, rotationalPeriodic
-
-    ! Check if the formulation for rotational periodic problems
-    ! must be used.
-
-    if( associated(rotMatrixI) ) then
-       rotationalPeriodic = .true.
-    else
-       rotationalPeriodic = .false.
-    endif
-
-    ! Initialize the dissipative residual to a certain times,
-    ! possibly zero, the previously stored value. Owned cells
-    ! only, because the halo values do not matter.
-
-    sFil = one - rFil
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             fw(i,j,k,irho)  = sFil*fw(i,j,k,irho)
-             fw(i,j,k,imx)   = sFil*fw(i,j,k,imx)
-             fw(i,j,k,imy)   = sFil*fw(i,j,k,imy)
-             fw(i,j,k,imz)   = sFil*fw(i,j,k,imz)
-             fw(i,j,k,irhoE) = sFil*fw(i,j,k,irhoE)
-          enddo
-       enddo
-    enddo
-
-    ! Determine whether or not the total energy must be corrected
-    ! for the presence of the turbulent kinetic energy.
-    correctForK = getCorrectForK()
-
-    ! Compute the factor used in the minmod limiter.
-
-    factMinmod = (three-kappaCoef) &
-         / max(1.e-10_realType, one-kappaCoef)
-
-    ! Determine the limiter scheme to be used. On the fine grid the
-    ! user specified scheme is used; on the coarse grid a first order
-    ! scheme is computed.
-
-    limUsed = firstOrder
-    if( fineGrid ) limUsed = limiter
-
-    ! Determine the riemann solver which must be used.
-
-    riemannUsed = riemannCoarse
-    if( fineGrid ) riemannUsed = riemann
-
-    ! Store 1-kappa and 1+kappa a bit easier and multiply it by 0.25.
-
-    omk = fourth*(one - kappaCoef)
-    opk = fourth*(one + kappaCoef)
-
-    ! Set the number of variables to be interpolated depending
-    ! whether or not a k-equation is present. If a k-equation is
-    ! present also set the logical firstOrderK. This indicates
-    ! whether or not only a first order approximation is to be used
-    ! for the turbulent kinetic energy.
-
-    if( correctForK ) then
-       if(orderTurb == firstOrder) then
-          nwInt       = nwf
-          firstOrderK = .true.
-       else
-          nwInt       = itu1
-          firstOrderK = .false.
-       endif
-    else
-       nwInt       = nwf
-       firstOrderK = .false.
-    endif
-    !
-    !       Flux computation. A distinction is made between first and
-    !       second order schemes to avoid the overhead for the first order
-    !       scheme.
-    !
-    orderTest: if(limUsed == firstOrder) then
-       !
-       !         First order reconstruction. The states in the cells are
-       !         constant. The left and right states are constructed easily.
-       !
-       ! Fluxes in the i-direction.
-
-       do k=2,kl
-          do j=2,jl
-             do i=1,il
-
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
-
-                sx = si(i,j,k,1); sy = si(i,j,k,2); sz = si(i,j,k,3)
-                por = porI(i,j,k)
-                sFace = sFaceI(i,j,k)
-
-                ! Determine the left and right state.
-
-                left(irho)  = w(i,j,k,irho)
-                left(ivx)   = w(i,j,k,ivx)
-                left(ivy)   = w(i,j,k,ivy)
-                left(ivz)   = w(i,j,k,ivz)
-                left(irhoE) = p(i,j,k)
-                if( correctForK ) left(itu1) = w(i,j,k,itu1)
-
-                right(irho)  = w(i+1,j,k,irho)
-                right(ivx)   = w(i+1,j,k,ivx)
-                right(ivy)   = w(i+1,j,k,ivy)
-                right(ivz)   = w(i+1,j,k,ivz)
-                right(irhoE) = p(i+1,j,k)
-                if( correctForK ) right(itu1) = w(i+1,j,k,itu1)
-
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
-
-                gammaFace = half*(gamma(i,j,k) + gamma(i+1,j,k))
-
-                ! Compute the dissipative flux across the interface.
-
-                call riemannFlux(left, right, flux)
-
-                ! And scatter it to the left and right.
-
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
-
-                fw(i+1,j,k,irho)  = fw(i+1,j,k,irho)  - flux(irho)
-                fw(i+1,j,k,imx)   = fw(i+1,j,k,imx)   - flux(imx)
-                fw(i+1,j,k,imy)   = fw(i+1,j,k,imy)   - flux(imy)
-                fw(i+1,j,k,imz)   = fw(i+1,j,k,imz)   - flux(imz)
-                fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) - flux(irhoE)
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
 
-             enddo
-          enddo
-       enddo
-
-       ! Fluxes in j-direction.
-
-       do k=2,kl
-          do j=1,jl
-             do i=2,il
-
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
-
-                sx = sj(i,j,k,1); sy = sj(i,j,k,2); sz = sj(i,j,k,3)
-                por = porJ(i,j,k)
-                sFace = sFaceJ(i,j,k)
-
-                ! Determine the left and right state.
-
-                left(irho)  = w(i,j,k,irho)
-                left(ivx)   = w(i,j,k,ivx)
-                left(ivy)   = w(i,j,k,ivy)
-                left(ivz)   = w(i,j,k,ivz)
-                left(irhoE) = p(i,j,k)
-                if( correctForK ) left(itu1) = w(i,j,k,itu1)
-
-                right(irho)  = w(i,j+1,k,irho)
-                right(ivx)   = w(i,j+1,k,ivx)
-                right(ivy)   = w(i,j+1,k,ivy)
-                right(ivz)   = w(i,j+1,k,ivz)
-                right(irhoE) = p(i,j+1,k)
-                if( correctForK ) right(itu1) = w(i,j+1,k,itu1)
-
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
-
-                gammaFace = half*(gamma(i,j,k) + gamma(i,j+1,k))
-
-                ! Compute the dissipative flux across the interface.
-
-                call riemannFlux(left, right, flux)
+                        sx = sk(i, j, k, 1); sy = sk(i, j, k, 2); sz = sk(i, j, k, 3)
+                        por = porK(i, j, k)
+                        sFace = sFaceK(i, j, k)
 
-                ! And scatter it to the left and right.
+                        ! Determine the left and right state.
 
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
+                        left(irho) = w(i, j, k, irho)
+                        left(ivx) = w(i, j, k, ivx)
+                        left(ivy) = w(i, j, k, ivy)
+                        left(ivz) = w(i, j, k, ivz)
+                        left(irhoE) = p(i, j, k)
+                        if (correctForK) left(itu1) = w(i, j, k, itu1)
 
-                fw(i,j+1,k,irho)  = fw(i,j+1,k,irho)  - flux(irho)
-                fw(i,j+1,k,imx)   = fw(i,j+1,k,imx)   - flux(imx)
-                fw(i,j+1,k,imy)   = fw(i,j+1,k,imy)   - flux(imy)
-                fw(i,j+1,k,imz)   = fw(i,j+1,k,imz)   - flux(imz)
-                fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) - flux(irhoE)
-             enddo
-          enddo
-       enddo
+                        right(irho) = w(i, j, k + 1, irho)
+                        right(ivx) = w(i, j, k + 1, ivx)
+                        right(ivy) = w(i, j, k + 1, ivy)
+                        right(ivz) = w(i, j, k + 1, ivz)
+                        right(irhoE) = p(i, j, k + 1)
+                        if (correctForK) right(itu1) = w(i, j, k + 1, itu1)
 
-       ! Fluxes in k-direction.
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
 
-       do k=1,kl
-          do j=2,jl
-             do i=2,il
+                        gammaFace = half * (gamma(i, j, k) + gamma(i, j, k + 1))
 
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
+                        ! Compute the dissipative flux across the interface.
 
-                sx = sk(i,j,k,1); sy = sk(i,j,k,2); sz = sk(i,j,k,3)
-                por = porK(i,j,k)
-                sFace = sFaceK(i,j,k)
+                        call riemannFlux(left, right, flux)
 
-                ! Determine the left and right state.
+                        ! And scatter it the left and right.
 
-                left(irho)  = w(i,j,k,irho)
-                left(ivx)   = w(i,j,k,ivx)
-                left(ivy)   = w(i,j,k,ivy)
-                left(ivz)   = w(i,j,k,ivz)
-                left(irhoE) = p(i,j,k)
-                if( correctForK ) left(itu1) = w(i,j,k,itu1)
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
 
-                right(irho)  = w(i,j,k+1,irho)
-                right(ivx)   = w(i,j,k+1,ivx)
-                right(ivy)   = w(i,j,k+1,ivy)
-                right(ivz)   = w(i,j,k+1,ivz)
-                right(irhoE) = p(i,j,k+1)
-                if( correctForK ) right(itu1) = w(i,j,k+1,itu1)
-
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
-
-                gammaFace = half*(gamma(i,j,k) + gamma(i,j,k+1))
+                        fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) - flux(irho)
+                        fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) - flux(imx)
+                        fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) - flux(imy)
+                        fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) - flux(imz)
+                        fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) - flux(irhoE)
 
-                ! Compute the dissipative flux across the interface.
-
-                call riemannFlux(left, right, flux)
+                    end do
+                end do
+            end do
 
-                ! And scatter it the left and right.
+            !      ==================================================================
 
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
+            else orderTest
 
-                fw(i,j,k+1,irho)  = fw(i,j,k+1,irho)  - flux(irho)
-                fw(i,j,k+1,imx)   = fw(i,j,k+1,imx)   - flux(imx)
-                fw(i,j,k+1,imy)   = fw(i,j,k+1,imy)   - flux(imy)
-                fw(i,j,k+1,imz)   = fw(i,j,k+1,imz)   - flux(imz)
-                fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) - flux(irhoE)
+            !      ==================================================================
+            !
+            !         Second order reconstruction of the left and right state.
+            !         The three differences used in the, possibly nonlinear,
+            !         interpolation are constructed here; the actual left and
+            !         right states, or at least the differences from the first
+            !         order interpolation, are computed in the subroutine
+            !         leftRightState.
+            !
+            ! Fluxes in the i-direction.
 
-             enddo
-          enddo
-       enddo
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 1, il
 
-       !      ==================================================================
+                        ! Store the three differences used in the interpolation
+                        ! in du1, du2, du3.
 
-    else orderTest
+                        du1(irho) = w(i, j, k, irho) - w(i - 1, j, k, irho)
+                        du2(irho) = w(i + 1, j, k, irho) - w(i, j, k, irho)
+                        du3(irho) = w(i + 2, j, k, irho) - w(i + 1, j, k, irho)
 
-       !      ==================================================================
-       !
-       !         Second order reconstruction of the left and right state.
-       !         The three differences used in the, possibly nonlinear,
-       !         interpolation are constructed here; the actual left and
-       !         right states, or at least the differences from the first
-       !         order interpolation, are computed in the subroutine
-       !         leftRightState.
-       !
-       ! Fluxes in the i-direction.
+                        du1(ivx) = w(i, j, k, ivx) - w(i - 1, j, k, ivx)
+                        du2(ivx) = w(i + 1, j, k, ivx) - w(i, j, k, ivx)
+                        du3(ivx) = w(i + 2, j, k, ivx) - w(i + 1, j, k, ivx)
 
-       do k=2,kl
-          do j=2,jl
-             do i=1,il
+                        du1(ivy) = w(i, j, k, ivy) - w(i - 1, j, k, ivy)
+                        du2(ivy) = w(i + 1, j, k, ivy) - w(i, j, k, ivy)
+                        du3(ivy) = w(i + 2, j, k, ivy) - w(i + 1, j, k, ivy)
 
-                ! Store the three differences used in the interpolation
-                ! in du1, du2, du3.
+                        du1(ivz) = w(i, j, k, ivz) - w(i - 1, j, k, ivz)
+                        du2(ivz) = w(i + 1, j, k, ivz) - w(i, j, k, ivz)
+                        du3(ivz) = w(i + 2, j, k, ivz) - w(i + 1, j, k, ivz)
 
-                du1(irho) = w(i,  j,k,irho) - w(i-1,j,k,irho)
-                du2(irho) = w(i+1,j,k,irho) - w(i,  j,k,irho)
-                du3(irho) = w(i+2,j,k,irho) - w(i+1,j,k,irho)
+                        du1(irhoE) = p(i, j, k) - p(i - 1, j, k)
+                        du2(irhoE) = p(i + 1, j, k) - p(i, j, k)
+                        du3(irhoE) = p(i + 2, j, k) - p(i + 1, j, k)
 
-                du1(ivx) = w(i,  j,k,ivx) - w(i-1,j,k,ivx)
-                du2(ivx) = w(i+1,j,k,ivx) - w(i,  j,k,ivx)
-                du3(ivx) = w(i+2,j,k,ivx) - w(i+1,j,k,ivx)
+                        if (correctForK) then
+                            du1(itu1) = w(i, j, k, itu1) - w(i - 1, j, k, itu1)
+                            du2(itu1) = w(i + 1, j, k, itu1) - w(i, j, k, itu1)
+                            du3(itu1) = w(i + 2, j, k, itu1) - w(i + 1, j, k, itu1)
+                        end if
 
-                du1(ivy) = w(i,  j,k,ivy) - w(i-1,j,k,ivy)
-                du2(ivy) = w(i+1,j,k,ivy) - w(i,  j,k,ivy)
-                du3(ivy) = w(i+2,j,k,ivy) - w(i+1,j,k,ivy)
+                        ! Compute the differences from the first order scheme.
 
-                du1(ivz) = w(i,  j,k,ivz) - w(i-1,j,k,ivz)
-                du2(ivz) = w(i+1,j,k,ivz) - w(i,  j,k,ivz)
-                du3(ivz) = w(i+2,j,k,ivz) - w(i+1,j,k,ivz)
+                        call leftRightState(du1, du2, du3, rotMatrixI, &
+                                            left, right)
 
-                du1(irhoE) = p(i,  j,k) - p(i-1,j,k)
-                du2(irhoE) = p(i+1,j,k) - p(i,  j,k)
-                du3(irhoE) = p(i+2,j,k) - p(i+1,j,k)
+                        ! Add the first order part to the currently stored
+                        ! differences, such that the correct state vector
+                        ! is stored.
 
-                if( correctForK ) then
-                   du1(itu1) = w(i,  j,k,itu1) - w(i-1,j,k,itu1)
-                   du2(itu1) = w(i+1,j,k,itu1) - w(i,  j,k,itu1)
-                   du3(itu1) = w(i+2,j,k,itu1) - w(i+1,j,k,itu1)
-                endif
+                        left(irho) = left(irho) + w(i, j, k, irho)
+                        left(ivx) = left(ivx) + w(i, j, k, ivx)
+                        left(ivy) = left(ivy) + w(i, j, k, ivy)
+                        left(ivz) = left(ivz) + w(i, j, k, ivz)
+                        left(irhoE) = left(irhoE) + p(i, j, k)
 
-                ! Compute the differences from the first order scheme.
+                        right(irho) = right(irho) + w(i + 1, j, k, irho)
+                        right(ivx) = right(ivx) + w(i + 1, j, k, ivx)
+                        right(ivy) = right(ivy) + w(i + 1, j, k, ivy)
+                        right(ivz) = right(ivz) + w(i + 1, j, k, ivz)
+                        right(irhoE) = right(irhoE) + p(i + 1, j, k)
 
-                call leftRightState(du1, du2, du3, rotMatrixI, &
-                     left, right)
+                        if (correctForK) then
+                            left(itu1) = left(itu1) + w(i, j, k, itu1)
+                            right(itu1) = right(itu1) + w(i + 1, j, k, itu1)
+                        end if
 
-                ! Add the first order part to the currently stored
-                ! differences, such that the correct state vector
-                ! is stored.
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
 
-                left(irho)  = left(irho)  + w(i,j,k,irho)
-                left(ivx)   = left(ivx)   + w(i,j,k,ivx)
-                left(ivy)   = left(ivy)   + w(i,j,k,ivy)
-                left(ivz)   = left(ivz)   + w(i,j,k,ivz)
-                left(irhoE) = left(irhoE) + p(i,j,k)
+                        sx = si(i, j, k, 1); sy = si(i, j, k, 2); sz = si(i, j, k, 3)
+                        por = porI(i, j, k)
+                        sFace = sFaceI(i, j, k)
 
-                right(irho)  = right(irho)  + w(i+1,j,k,irho)
-                right(ivx)   = right(ivx)   + w(i+1,j,k,ivx)
-                right(ivy)   = right(ivy)   + w(i+1,j,k,ivy)
-                right(ivz)   = right(ivz)   + w(i+1,j,k,ivz)
-                right(irhoE) = right(irhoE) + p(i+1,j,k)
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
 
-                if( correctForK ) then
-                   left(itu1)  = left(itu1)  + w(i,j,k,itu1)
-                   right(itu1) = right(itu1) + w(i+1,j,k,itu1)
-                endif
+                        gammaFace = half * (gamma(i, j, k) + gamma(i + 1, j, k))
 
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
+                        ! Compute the dissipative flux across the interface.
 
-                sx = si(i,j,k,1); sy = si(i,j,k,2); sz = si(i,j,k,3)
-                por = porI(i,j,k)
-                sFace = sFaceI(i,j,k)
+                        call riemannFlux(left, right, flux)
 
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
+                        ! And scatter it to the left and right.
 
-                gammaFace = half*(gamma(i,j,k) + gamma(i+1,j,k))
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
 
-                ! Compute the dissipative flux across the interface.
+                        fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) - flux(irho)
+                        fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) - flux(imx)
+                        fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) - flux(imy)
+                        fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) - flux(imz)
+                        fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) - flux(irhoE)
 
-                call riemannFlux(left, right, flux)
+                    end do
+                end do
+            end do
 
-                ! And scatter it to the left and right.
+            ! Fluxes in the j-direction.
 
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
+            do k = 2, kl
+                do j = 1, jl
+                    do i = 2, il
 
-                fw(i+1,j,k,irho)  = fw(i+1,j,k,irho)  - flux(irho)
-                fw(i+1,j,k,imx)   = fw(i+1,j,k,imx)   - flux(imx)
-                fw(i+1,j,k,imy)   = fw(i+1,j,k,imy)   - flux(imy)
-                fw(i+1,j,k,imz)   = fw(i+1,j,k,imz)   - flux(imz)
-                fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) - flux(irhoE)
+                        ! Store the three differences used in the interpolation
+                        ! in du1, du2, du3.
 
-             enddo
-          enddo
-       enddo
+                        du1(irho) = w(i, j, k, irho) - w(i, j - 1, k, irho)
+                        du2(irho) = w(i, j + 1, k, irho) - w(i, j, k, irho)
+                        du3(irho) = w(i, j + 2, k, irho) - w(i, j + 1, k, irho)
 
-       ! Fluxes in the j-direction.
+                        du1(ivx) = w(i, j, k, ivx) - w(i, j - 1, k, ivx)
+                        du2(ivx) = w(i, j + 1, k, ivx) - w(i, j, k, ivx)
+                        du3(ivx) = w(i, j + 2, k, ivx) - w(i, j + 1, k, ivx)
 
-       do k=2,kl
-          do j=1,jl
-             do i=2,il
+                        du1(ivy) = w(i, j, k, ivy) - w(i, j - 1, k, ivy)
+                        du2(ivy) = w(i, j + 1, k, ivy) - w(i, j, k, ivy)
+                        du3(ivy) = w(i, j + 2, k, ivy) - w(i, j + 1, k, ivy)
 
-                ! Store the three differences used in the interpolation
-                ! in du1, du2, du3.
+                        du1(ivz) = w(i, j, k, ivz) - w(i, j - 1, k, ivz)
+                        du2(ivz) = w(i, j + 1, k, ivz) - w(i, j, k, ivz)
+                        du3(ivz) = w(i, j + 2, k, ivz) - w(i, j + 1, k, ivz)
 
-                du1(irho) = w(i,  j,k,irho) - w(i,j-1,k,irho)
-                du2(irho) = w(i,j+1,k,irho) - w(i,  j,k,irho)
-                du3(irho) = w(i,j+2,k,irho) - w(i,j+1,k,irho)
+                        du1(irhoE) = p(i, j, k) - p(i, j - 1, k)
+                        du2(irhoE) = p(i, j + 1, k) - p(i, j, k)
+                        du3(irhoE) = p(i, j + 2, k) - p(i, j + 1, k)
 
-                du1(ivx) = w(i,  j,k,ivx) - w(i,j-1,k,ivx)
-                du2(ivx) = w(i,j+1,k,ivx) - w(i,  j,k,ivx)
-                du3(ivx) = w(i,j+2,k,ivx) - w(i,j+1,k,ivx)
+                        if (correctForK) then
+                            du1(itu1) = w(i, j, k, itu1) - w(i, j - 1, k, itu1)
+                            du2(itu1) = w(i, j + 1, k, itu1) - w(i, j, k, itu1)
+                            du3(itu1) = w(i, j + 2, k, itu1) - w(i, j + 1, k, itu1)
+                        end if
 
-                du1(ivy) = w(i,  j,k,ivy) - w(i,j-1,k,ivy)
-                du2(ivy) = w(i,j+1,k,ivy) - w(i,  j,k,ivy)
-                du3(ivy) = w(i,j+2,k,ivy) - w(i,j+1,k,ivy)
+                        ! Compute the differences from the first order scheme.
 
-                du1(ivz) = w(i,  j,k,ivz) - w(i,j-1,k,ivz)
-                du2(ivz) = w(i,j+1,k,ivz) - w(i,  j,k,ivz)
-                du3(ivz) = w(i,j+2,k,ivz) - w(i,j+1,k,ivz)
+                        call leftRightState(du1, du2, du3, rotMatrixJ, &
+                                            left, right)
 
-                du1(irhoE) = p(i,  j,k) - p(i,j-1,k)
-                du2(irhoE) = p(i,j+1,k) - p(i,  j,k)
-                du3(irhoE) = p(i,j+2,k) - p(i,j+1,k)
+                        ! Add the first order part to the currently stored
+                        ! differences, such that the correct state vector
+                        ! is stored.
 
-                if( correctForK ) then
-                   du1(itu1) = w(i,  j,k,itu1) - w(i,j-1,k,itu1)
-                   du2(itu1) = w(i,j+1,k,itu1) - w(i,  j,k,itu1)
-                   du3(itu1) = w(i,j+2,k,itu1) - w(i,j+1,k,itu1)
-                endif
+                        left(irho) = left(irho) + w(i, j, k, irho)
+                        left(ivx) = left(ivx) + w(i, j, k, ivx)
+                        left(ivy) = left(ivy) + w(i, j, k, ivy)
+                        left(ivz) = left(ivz) + w(i, j, k, ivz)
+                        left(irhoE) = left(irhoE) + p(i, j, k)
 
-                ! Compute the differences from the first order scheme.
+                        right(irho) = right(irho) + w(i, j + 1, k, irho)
+                        right(ivx) = right(ivx) + w(i, j + 1, k, ivx)
+                        right(ivy) = right(ivy) + w(i, j + 1, k, ivy)
+                        right(ivz) = right(ivz) + w(i, j + 1, k, ivz)
+                        right(irhoE) = right(irhoE) + p(i, j + 1, k)
 
-                call leftRightState(du1, du2, du3, rotMatrixJ, &
-                     left, right)
+                        if (correctForK) then
+                            left(itu1) = left(itu1) + w(i, j, k, itu1)
+                            right(itu1) = right(itu1) + w(i, j + 1, k, itu1)
+                        end if
 
-                ! Add the first order part to the currently stored
-                ! differences, such that the correct state vector
-                ! is stored.
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
 
-                left(irho)  = left(irho)  + w(i,j,k,irho)
-                left(ivx)   = left(ivx)   + w(i,j,k,ivx)
-                left(ivy)   = left(ivy)   + w(i,j,k,ivy)
-                left(ivz)   = left(ivz)   + w(i,j,k,ivz)
-                left(irhoE) = left(irhoE) + p(i,j,k)
+                        sx = sj(i, j, k, 1); sy = sj(i, j, k, 2); sz = sj(i, j, k, 3)
+                        por = porJ(i, j, k)
+                        sFace = sFaceJ(i, j, k)
 
-                right(irho)  = right(irho)  + w(i,j+1,k,irho)
-                right(ivx)   = right(ivx)   + w(i,j+1,k,ivx)
-                right(ivy)   = right(ivy)   + w(i,j+1,k,ivy)
-                right(ivz)   = right(ivz)   + w(i,j+1,k,ivz)
-                right(irhoE) = right(irhoE) + p(i,j+1,k)
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
 
-                if( correctForK ) then
-                   left(itu1)  = left(itu1)  + w(i,j,k,itu1)
-                   right(itu1) = right(itu1) + w(i,j+1,k,itu1)
-                endif
+                        gammaFace = half * (gamma(i, j, k) + gamma(i, j + 1, k))
 
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
+                        ! Compute the dissipative flux across the interface.
 
-                sx = sj(i,j,k,1); sy = sj(i,j,k,2); sz = sj(i,j,k,3)
-                por = porJ(i,j,k)
-                sFace = sFaceJ(i,j,k)
+                        call riemannFlux(left, right, flux)
 
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
+                        ! And scatter it to the left and right.
 
-                gammaFace = half*(gamma(i,j,k) + gamma(i,j+1,k))
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
 
-                ! Compute the dissipative flux across the interface.
+                        fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) - flux(irho)
+                        fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) - flux(imx)
+                        fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) - flux(imy)
+                        fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) - flux(imz)
+                        fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) - flux(irhoE)
+                    end do
+                end do
+            end do
 
-                call riemannFlux(left, right, flux)
+            ! Fluxes in the k-direction.
 
-                ! And scatter it to the left and right.
+            do k = 1, kl
+                do j = 2, jl
+                    do i = 2, il
 
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
+                        ! Store the three differences used in the interpolation
+                        ! in du1, du2, du3.
 
-                fw(i,j+1,k,irho)  = fw(i,j+1,k,irho)  - flux(irho)
-                fw(i,j+1,k,imx)   = fw(i,j+1,k,imx)   - flux(imx)
-                fw(i,j+1,k,imy)   = fw(i,j+1,k,imy)   - flux(imy)
-                fw(i,j+1,k,imz)   = fw(i,j+1,k,imz)   - flux(imz)
-                fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) - flux(irhoE)
-             enddo
-          enddo
-       enddo
+                        du1(irho) = w(i, j, k, irho) - w(i, j, k - 1, irho)
+                        du2(irho) = w(i, j, k + 1, irho) - w(i, j, k, irho)
+                        du3(irho) = w(i, j, k + 2, irho) - w(i, j, k + 1, irho)
 
-       ! Fluxes in the k-direction.
+                        du1(ivx) = w(i, j, k, ivx) - w(i, j, k - 1, ivx)
+                        du2(ivx) = w(i, j, k + 1, ivx) - w(i, j, k, ivx)
+                        du3(ivx) = w(i, j, k + 2, ivx) - w(i, j, k + 1, ivx)
 
-       do k=1,kl
-          do j=2,jl
-             do i=2,il
+                        du1(ivy) = w(i, j, k, ivy) - w(i, j, k - 1, ivy)
+                        du2(ivy) = w(i, j, k + 1, ivy) - w(i, j, k, ivy)
+                        du3(ivy) = w(i, j, k + 2, ivy) - w(i, j, k + 1, ivy)
 
-                ! Store the three differences used in the interpolation
-                ! in du1, du2, du3.
+                        du1(ivz) = w(i, j, k, ivz) - w(i, j, k - 1, ivz)
+                        du2(ivz) = w(i, j, k + 1, ivz) - w(i, j, k, ivz)
+                        du3(ivz) = w(i, j, k + 2, ivz) - w(i, j, k + 1, ivz)
 
-                du1(irho) = w(i,  j,k,irho) - w(i,j,k-1,irho)
-                du2(irho) = w(i,j,k+1,irho) - w(i,  j,k,irho)
-                du3(irho) = w(i,j,k+2,irho) - w(i,j,k+1,irho)
+                        du1(irhoE) = p(i, j, k) - p(i, j, k - 1)
+                        du2(irhoE) = p(i, j, k + 1) - p(i, j, k)
+                        du3(irhoE) = p(i, j, k + 2) - p(i, j, k + 1)
 
-                du1(ivx) = w(i,  j,k,ivx) - w(i,j,k-1,ivx)
-                du2(ivx) = w(i,j,k+1,ivx) - w(i,  j,k,ivx)
-                du3(ivx) = w(i,j,k+2,ivx) - w(i,j,k+1,ivx)
-
-                du1(ivy) = w(i,  j,k,ivy) - w(i,j,k-1,ivy)
-                du2(ivy) = w(i,j,k+1,ivy) - w(i,  j,k,ivy)
-                du3(ivy) = w(i,j,k+2,ivy) - w(i,j,k+1,ivy)
-
-                du1(ivz) = w(i,  j,k,ivz) - w(i,j,k-1,ivz)
-                du2(ivz) = w(i,j,k+1,ivz) - w(i,  j,k,ivz)
-                du3(ivz) = w(i,j,k+2,ivz) - w(i,j,k+1,ivz)
-
-                du1(irhoE) = p(i,  j,k) - p(i,j,k-1)
-                du2(irhoE) = p(i,j,k+1) - p(i,  j,k)
-                du3(irhoE) = p(i,j,k+2) - p(i,j,k+1)
-
-                if( correctForK ) then
-                   du1(itu1) = w(i,  j,k,itu1) - w(i,j,k-1,itu1)
-                   du2(itu1) = w(i,j,k+1,itu1) - w(i,  j,k,itu1)
-                   du3(itu1) = w(i,j,k+2,itu1) - w(i,j,k+1,itu1)
-                endif
+                        if (correctForK) then
+                            du1(itu1) = w(i, j, k, itu1) - w(i, j, k - 1, itu1)
+                            du2(itu1) = w(i, j, k + 1, itu1) - w(i, j, k, itu1)
+                            du3(itu1) = w(i, j, k + 2, itu1) - w(i, j, k + 1, itu1)
+                        end if
 
-                ! Compute the differences from the first order scheme.
+                        ! Compute the differences from the first order scheme.
 
-                call leftRightState(du1, du2, du3, rotMatrixK, &
-                     left, right)
+                        call leftRightState(du1, du2, du3, rotMatrixK, &
+                                            left, right)
 
-                ! Add the first order part to the currently stored
-                ! differences, such that the correct state vector
-                ! is stored.
+                        ! Add the first order part to the currently stored
+                        ! differences, such that the correct state vector
+                        ! is stored.
 
-                left(irho)  = left(irho)  + w(i,j,k,irho)
-                left(ivx)   = left(ivx)   + w(i,j,k,ivx)
-                left(ivy)   = left(ivy)   + w(i,j,k,ivy)
-                left(ivz)   = left(ivz)   + w(i,j,k,ivz)
-                left(irhoE) = left(irhoE) + p(i,j,k)
+                        left(irho) = left(irho) + w(i, j, k, irho)
+                        left(ivx) = left(ivx) + w(i, j, k, ivx)
+                        left(ivy) = left(ivy) + w(i, j, k, ivy)
+                        left(ivz) = left(ivz) + w(i, j, k, ivz)
+                        left(irhoE) = left(irhoE) + p(i, j, k)
 
-                right(irho)  = right(irho)  + w(i,j,k+1,irho)
-                right(ivx)   = right(ivx)   + w(i,j,k+1,ivx)
-                right(ivy)   = right(ivy)   + w(i,j,k+1,ivy)
-                right(ivz)   = right(ivz)   + w(i,j,k+1,ivz)
-                right(irhoE) = right(irhoE) + p(i,j,k+1)
+                        right(irho) = right(irho) + w(i, j, k + 1, irho)
+                        right(ivx) = right(ivx) + w(i, j, k + 1, ivx)
+                        right(ivy) = right(ivy) + w(i, j, k + 1, ivy)
+                        right(ivz) = right(ivz) + w(i, j, k + 1, ivz)
+                        right(irhoE) = right(irhoE) + p(i, j, k + 1)
 
-                if( correctForK ) then
-                   left(itu1)  = left(itu1)  + w(i,j,k,itu1)
-                   right(itu1) = right(itu1) + w(i,j,k+1,itu1)
-                endif
+                        if (correctForK) then
+                            left(itu1) = left(itu1) + w(i, j, k, itu1)
+                            right(itu1) = right(itu1) + w(i, j, k + 1, itu1)
+                        end if
 
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
 
-                sx = sk(i,j,k,1); sy = sk(i,j,k,2); sz = sk(i,j,k,3)
-                por = porK(i,j,k)
-                sFace = sFaceK(i,j,k)
+                        sx = sk(i, j, k, 1); sy = sk(i, j, k, 2); sz = sk(i, j, k, 3)
+                        por = porK(i, j, k)
+                        sFace = sFaceK(i, j, k)
 
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
 
-                gammaFace = half*(gamma(i,j,k) + gamma(i,j,k+1))
+                        gammaFace = half * (gamma(i, j, k) + gamma(i, j, k + 1))
 
-                ! Compute the dissipative flux across the interface.
+                        ! Compute the dissipative flux across the interface.
 
-                call riemannFlux(left, right, flux)
+                        call riemannFlux(left, right, flux)
 
-                ! And scatter it to the left and right.
+                        ! And scatter it to the left and right.
 
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
 
-                fw(i,j,k+1,irho)  = fw(i,j,k+1,irho)  - flux(irho)
-                fw(i,j,k+1,imx)   = fw(i,j,k+1,imx)   - flux(imx)
-                fw(i,j,k+1,imy)   = fw(i,j,k+1,imy)   - flux(imy)
-                fw(i,j,k+1,imz)   = fw(i,j,k+1,imz)   - flux(imz)
-                fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) - flux(irhoE)
+                        fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) - flux(irho)
+                        fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) - flux(imx)
+                        fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) - flux(imy)
+                        fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) - flux(imz)
+                        fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) - flux(irhoE)
 
-             enddo
-          enddo
-       enddo
+                    end do
+                end do
+            end do
 
-    endif orderTest
+        end if orderTest
 
-    !      ==================================================================
+        !      ==================================================================
 
-  contains
+    contains
 
-    subroutine leftRightState(du1, du2, du3, rotMatrix, left, right)
-      !
-      !         leftRightState computes the differences in the left and
-      !         right state compared to the first order interpolation. For a
-      !         monotonic second order discretization the interpolations
-      !         need to be nonlinear. The linear second order scheme can be
-      !         stable (depending on the value of kappa), but it will have
-      !         oscillations near discontinuities.
-      !
-      implicit none
-      !
-      !        Local parameter.
-      !
-      real(kind=realType), parameter :: epsLim = 1.e-10_realType
-      !
-      !        Subroutine arguments.
-      !
-      real(kind=realType), dimension(:), intent(inout) :: du1, du2, du3
-      real(kind=realType), dimension(:), intent(out)   :: left, right
+        subroutine leftRightState(du1, du2, du3, rotMatrix, left, right)
+            !
+            !         leftRightState computes the differences in the left and
+            !         right state compared to the first order interpolation. For a
+            !         monotonic second order discretization the interpolations
+            !         need to be nonlinear. The linear second order scheme can be
+            !         stable (depending on the value of kappa), but it will have
+            !         oscillations near discontinuities.
+            !
+            implicit none
+            !
+            !        Local parameter.
+            !
+            real(kind=realType), parameter :: epsLim = 1.e-10_realType
+            !
+            !        Subroutine arguments.
+            !
+            real(kind=realType), dimension(:), intent(inout) :: du1, du2, du3
+            real(kind=realType), dimension(:), intent(out) :: left, right
 
-      real(kind=realType), dimension(:,:,:,:,:), pointer :: rotMatrix
-      !
-      !        Local variables.
-      !
-      integer(kind=intType) :: l
+            real(kind=realType), dimension(:, :, :, :, :), pointer :: rotMatrix
+            !
+            !        Local variables.
+            !
+            integer(kind=intType) :: l
 
-      real(kind=realType) :: rl1, rl2, rr1, rr2, tmp, dvx, dvy, dvz
+            real(kind=realType) :: rl1, rl2, rr1, rr2, tmp, dvx, dvy, dvz
 
-      real(kind=realType), dimension(3,3) :: rot
+            real(kind=realType), dimension(3, 3) :: rot
 
-      ! Check if the velocity components should be transformed to
-      ! the cylindrical frame.
+            ! Check if the velocity components should be transformed to
+            ! the cylindrical frame.
 
-      if( rotationalPeriodic ) then
+            if (rotationalPeriodic) then
 
-         ! Store the rotation matrix a bit easier. Note that the i,j,k
-         ! come from the main subroutine.
+                ! Store the rotation matrix a bit easier. Note that the i,j,k
+                ! come from the main subroutine.
 
-         rot(1,1) = rotMatrix(i+ii-2, j+jj-2,k+kk-2, 1,1)
-         rot(1,2) = rotMatrix(i+ii-2, j+jj-2,k+kk-2, 1,2)
-         rot(1,3) = rotMatrix(i+ii-2, j+jj-2,k+kk-2, 1,3)
+                rot(1, 1) = rotMatrix(i + ii - 2, j + jj - 2, k + kk - 2, 1, 1)
+                rot(1, 2) = rotMatrix(i + ii - 2, j + jj - 2, k + kk - 2, 1, 2)
+                rot(1, 3) = rotMatrix(i + ii - 2, j + jj - 2, k + kk - 2, 1, 3)
 
-         rot(2,1) = rotMatrix(i+ii-2, j+jj-2, k+kk-2, 2,1)
-         rot(2,2) = rotMatrix(i+ii-2, j+jj-2, k+kk-2, 2,2)
-         rot(2,3) = rotMatrix(i+ii-2, j+jj-2, k+kk-2, 2,3)
+                rot(2, 1) = rotMatrix(i + ii - 2, j + jj - 2, k + kk - 2, 2, 1)
+                rot(2, 2) = rotMatrix(i + ii - 2, j + jj - 2, k + kk - 2, 2, 2)
+                rot(2, 3) = rotMatrix(i + ii - 2, j + jj - 2, k + kk - 2, 2, 3)
 
-         rot(3,1) = rotMatrix(i+ii-2, j+jj-2, k+kk-2, 3,1)
-         rot(3,2) = rotMatrix(i+ii-2, j+jj-2, k+kk-2, 3,2)
-         rot(3,3) = rotMatrix(i+ii-2, j+jj-2, k+kk-2, 3,3)
+                rot(3, 1) = rotMatrix(i + ii - 2, j + jj - 2, k + kk - 2, 3, 1)
+                rot(3, 2) = rotMatrix(i + ii - 2, j + jj - 2, k + kk - 2, 3, 2)
+                rot(3, 3) = rotMatrix(i + ii - 2, j + jj - 2, k + kk - 2, 3, 3)
 
-         ! Apply the transformation to the velocity components
-         ! of du1, du2 and du3.
+                ! Apply the transformation to the velocity components
+                ! of du1, du2 and du3.
 
-         dvx = du1(ivx); dvy = du1(ivy); dvz = du1(ivz)
-         du1(ivx) = rot(1,1)*dvx + rot(1,2)*dvy + rot(1,3)*dvz
-         du1(ivy) = rot(2,1)*dvx + rot(2,2)*dvy + rot(2,3)*dvz
-         du1(ivz) = rot(3,1)*dvx + rot(3,2)*dvy + rot(3,3)*dvz
+                dvx = du1(ivx); dvy = du1(ivy); dvz = du1(ivz)
+                du1(ivx) = rot(1, 1) * dvx + rot(1, 2) * dvy + rot(1, 3) * dvz
+                du1(ivy) = rot(2, 1) * dvx + rot(2, 2) * dvy + rot(2, 3) * dvz
+                du1(ivz) = rot(3, 1) * dvx + rot(3, 2) * dvy + rot(3, 3) * dvz
 
-         dvx = du2(ivx); dvy = du2(ivy); dvz = du2(ivz)
-         du2(ivx) = rot(1,1)*dvx + rot(1,2)*dvy + rot(1,3)*dvz
-         du2(ivy) = rot(2,1)*dvx + rot(2,2)*dvy + rot(2,3)*dvz
-         du2(ivz) = rot(3,1)*dvx + rot(3,2)*dvy + rot(3,3)*dvz
+                dvx = du2(ivx); dvy = du2(ivy); dvz = du2(ivz)
+                du2(ivx) = rot(1, 1) * dvx + rot(1, 2) * dvy + rot(1, 3) * dvz
+                du2(ivy) = rot(2, 1) * dvx + rot(2, 2) * dvy + rot(2, 3) * dvz
+                du2(ivz) = rot(3, 1) * dvx + rot(3, 2) * dvy + rot(3, 3) * dvz
 
-         dvx = du3(ivx); dvy = du3(ivy); dvz = du3(ivz)
-         du3(ivx) = rot(1,1)*dvx + rot(1,2)*dvy + rot(1,3)*dvz
-         du3(ivy) = rot(2,1)*dvx + rot(2,2)*dvy + rot(2,3)*dvz
-         du3(ivz) = rot(3,1)*dvx + rot(3,2)*dvy + rot(3,3)*dvz
+                dvx = du3(ivx); dvy = du3(ivy); dvz = du3(ivz)
+                du3(ivx) = rot(1, 1) * dvx + rot(1, 2) * dvy + rot(1, 3) * dvz
+                du3(ivy) = rot(2, 1) * dvx + rot(2, 2) * dvy + rot(2, 3) * dvz
+                du3(ivz) = rot(3, 1) * dvx + rot(3, 2) * dvy + rot(3, 3) * dvz
 
-      endif
+            end if
 
-      ! Determine the limiter used.
+            ! Determine the limiter used.
 
-      select case (limUsed)
+            select case (limUsed)
 
-      case (noLimiter)
+            case (noLimiter)
 
-         ! Linear interpolation; no limiter.
-         ! Loop over the number of variables to be interpolated.
+                ! Linear interpolation; no limiter.
+                ! Loop over the number of variables to be interpolated.
 
-         do l=1,nwInt
-            left(l)  =  omk*du1(l) + opk*du2(l)
-            right(l) = -omk*du3(l) - opk*du2(l)
-         enddo
+                do l = 1, nwInt
+                    left(l) = omk * du1(l) + opk * du2(l)
+                    right(l) = -omk * du3(l) - opk * du2(l)
+                end do
 
-         !          ==============================================================
+                !          ==============================================================
 
-      case (vanAlbeda)
+            case (vanAlbeda)
 
-         ! Nonlinear interpolation using the van albeda limiter.
-         ! Loop over the number of variables to be interpolated.
+                ! Nonlinear interpolation using the van albeda limiter.
+                ! Loop over the number of variables to be interpolated.
 
-         do l=1,nwInt
+                do l = 1, nwInt
 
-            ! Compute the limiter argument rl1, rl2, rr1 and rr2.
-            ! Note the cut off to 0.0.
+                    ! Compute the limiter argument rl1, rl2, rr1 and rr2.
+                    ! Note the cut off to 0.0.
 
-            tmp = one/sign(max(abs(du2(l)),epsLim),du2(l))
-            rl1 = max(zero, &
-                 du2(l)/sign(max(abs(du1(l)),epsLim),du1(l)))
-            rl2 = max(zero,du1(l)*tmp)
+                    tmp = one / sign(max(abs(du2(l)), epsLim), du2(l))
+                    rl1 = max(zero, &
+                              du2(l) / sign(max(abs(du1(l)), epsLim), du1(l)))
+                    rl2 = max(zero, du1(l) * tmp)
 
-            rr1 = max(zero,du3(l)*tmp)
-            rr2 = max(zero, &
-                 du2(l)/sign(max(abs(du3(l)),epsLim),du3(l)))
+                    rr1 = max(zero, du3(l) * tmp)
+                    rr2 = max(zero, &
+                              du2(l) / sign(max(abs(du3(l)), epsLim), du3(l)))
 
-            ! Compute the corresponding limiter values.
+                    ! Compute the corresponding limiter values.
 
-            rl1 = rl1*(rl1 + one)/(rl1*rl1 + one)
-            rl2 = rl2*(rl2 + one)/(rl2*rl2 + one)
-            rr1 = rr1*(rr1 + one)/(rr1*rr1 + one)
-            rr2 = rr2*(rr2 + one)/(rr2*rr2 + one)
+                    rl1 = rl1 * (rl1 + one) / (rl1 * rl1 + one)
+                    rl2 = rl2 * (rl2 + one) / (rl2 * rl2 + one)
+                    rr1 = rr1 * (rr1 + one) / (rr1 * rr1 + one)
+                    rr2 = rr2 * (rr2 + one) / (rr2 * rr2 + one)
 
-            ! Compute the nonlinear corrections to the first order
-            ! scheme.
+                    ! Compute the nonlinear corrections to the first order
+                    ! scheme.
 
-            left(l)  =  omk*rl1*du1(l) + opk*rl2*du2(l)
-            right(l) = -opk*rr1*du2(l) - omk*rr2*du3(l)
+                    left(l) = omk * rl1 * du1(l) + opk * rl2 * du2(l)
+                    right(l) = -opk * rr1 * du2(l) - omk * rr2 * du3(l)
 
-         enddo
+                end do
 
-         !          ==============================================================
+                !          ==============================================================
 
-      case (minmod)
+            case (minmod)
 
-         ! Nonlinear interpolation using the minmod limiter.
-         ! Loop over the number of variables to be interpolated.
+                ! Nonlinear interpolation using the minmod limiter.
+                ! Loop over the number of variables to be interpolated.
 
-         do l=1,nwInt
+                do l = 1, nwInt
 
-            ! Compute the limiter argument rl1, rl2, rr1 and rr2.
-            ! Note the cut off to 0.0.
+                    ! Compute the limiter argument rl1, rl2, rr1 and rr2.
+                    ! Note the cut off to 0.0.
 
-            tmp = one/sign(max(abs(du2(l)),epsLim),du2(l))
-            rl1 = max(zero, &
-                 du2(l)/sign(max(abs(du1(l)),epsLim),du1(l)))
-            rl2 = max(zero,du1(l)*tmp)
+                    tmp = one / sign(max(abs(du2(l)), epsLim), du2(l))
+                    rl1 = max(zero, &
+                              du2(l) / sign(max(abs(du1(l)), epsLim), du1(l)))
+                    rl2 = max(zero, du1(l) * tmp)
 
-            rr1 = max(zero,du3(l)*tmp)
-            rr2 = max(zero, &
-                 du2(l)/sign(max(abs(du3(l)),epsLim),du3(l)))
+                    rr1 = max(zero, du3(l) * tmp)
+                    rr2 = max(zero, &
+                              du2(l) / sign(max(abs(du3(l)), epsLim), du3(l)))
 
-            ! Compute the corresponding limiter values.
+                    ! Compute the corresponding limiter values.
 
-            rl1 = min(one, factMinmod*rl1)
-            rl2 = min(one, factMinmod*rl2)
-            rr1 = min(one, factMinmod*rr1)
-            rr2 = min(one, factMinmod*rr2)
+                    rl1 = min(one, factMinmod * rl1)
+                    rl2 = min(one, factMinmod * rl2)
+                    rr1 = min(one, factMinmod * rr1)
+                    rr2 = min(one, factMinmod * rr2)
 
-            ! Compute the nonlinear corrections to the first order
-            ! scheme.
+                    ! Compute the nonlinear corrections to the first order
+                    ! scheme.
 
-            left(l)  =  omk*rl1*du1(l) + opk*rl2*du2(l)
-            right(l) = -opk*rr1*du2(l) - omk*rr2*du3(l)
+                    left(l) = omk * rl1 * du1(l) + opk * rl2 * du2(l)
+                    right(l) = -opk * rr1 * du2(l) - omk * rr2 * du3(l)
 
-         enddo
+                end do
 
-      end select
+            end select
 
-      ! In case only a first order scheme must be used for the
-      ! turbulent transport equations, set the correction for the
-      ! turbulent kinetic energy to 0.
+            ! In case only a first order scheme must be used for the
+            ! turbulent transport equations, set the correction for the
+            ! turbulent kinetic energy to 0.
 
-      if( firstOrderK ) then
-         left(itu1)  = zero
-         right(itu1) = zero
-      endif
+            if (firstOrderK) then
+                left(itu1) = zero
+                right(itu1) = zero
+            end if
 
-      ! For rotational periodic problems transform the velocity
-      ! differences back to Cartesian again. Note that now the
-      ! transpose of the rotation matrix must be used.
+            ! For rotational periodic problems transform the velocity
+            ! differences back to Cartesian again. Note that now the
+            ! transpose of the rotation matrix must be used.
 
-      if( rotationalPeriodic ) then
+            if (rotationalPeriodic) then
 
-         ! Left state.
+                ! Left state.
 
-         dvx = left(ivx); dvy = left(ivy); dvz = left(ivz)
-         left(ivx) = rot(1,1)*dvx + rot(2,1)*dvy + rot(3,1)*dvz
-         left(ivy) = rot(1,2)*dvx + rot(2,2)*dvy + rot(3,2)*dvz
-         left(ivz) = rot(1,3)*dvx + rot(2,3)*dvy + rot(3,3)*dvz
+                dvx = left(ivx); dvy = left(ivy); dvz = left(ivz)
+                left(ivx) = rot(1, 1) * dvx + rot(2, 1) * dvy + rot(3, 1) * dvz
+                left(ivy) = rot(1, 2) * dvx + rot(2, 2) * dvy + rot(3, 2) * dvz
+                left(ivz) = rot(1, 3) * dvx + rot(2, 3) * dvy + rot(3, 3) * dvz
 
-         ! Right state.
+                ! Right state.
 
-         dvx = right(ivx); dvy = right(ivy); dvz = right(ivz)
-         right(ivx) = rot(1,1)*dvx + rot(2,1)*dvy + rot(3,1)*dvz
-         right(ivy) = rot(1,2)*dvx + rot(2,2)*dvy + rot(3,2)*dvz
-         right(ivz) = rot(1,3)*dvx + rot(2,3)*dvy + rot(3,3)*dvz
+                dvx = right(ivx); dvy = right(ivy); dvz = right(ivz)
+                right(ivx) = rot(1, 1) * dvx + rot(2, 1) * dvy + rot(3, 1) * dvz
+                right(ivy) = rot(1, 2) * dvx + rot(2, 2) * dvy + rot(3, 2) * dvz
+                right(ivz) = rot(1, 3) * dvx + rot(2, 3) * dvy + rot(3, 3) * dvz
 
-      endif
+            end if
 
-    end subroutine leftRightState
+        end subroutine leftRightState
 
-    !        ================================================================
+        !        ================================================================
 
-    subroutine riemannFlux(left, right, flux)
-      !
-      !         riemannFlux computes the flux for the given face and left
-      !         and right states.
-      !
-      implicit none
-      !
-      !        Subroutine arguments.
-      !
-      real(kind=realType), dimension(*), intent(in)  :: left, right
-      real(kind=realType), dimension(*), intent(out) :: flux
-      !
-      !        Local variables.
-      !
-      real(kind=realType) :: porFlux, rFace
-      real(kind=realType) :: Etl, Etr, z1l, z1r, tmp
-      real(kind=realType) :: dr, dru, drv, drw, drE, drk
-      real(kind=realType) :: rAvg, uAvg, vAvg, wAvg, hAvg, kAvg
-      real(kind=realType) :: alphaAvg, a2Avg, aAvg, unAvg
-      real(kind=realType) :: ovaAvg, ova2Avg, area, Eta
-      real(kind=realType) :: gm1, gm53
-      real(kind=realType) :: lam1, lam2, lam3
-      real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
-      real(kind=realType), dimension(2) :: ktmp
+        subroutine riemannFlux(left, right, flux)
+            !
+            !         riemannFlux computes the flux for the given face and left
+            !         and right states.
+            !
+            implicit none
+            !
+            !        Subroutine arguments.
+            !
+            real(kind=realType), dimension(*), intent(in) :: left, right
+            real(kind=realType), dimension(*), intent(out) :: flux
+            !
+            !        Local variables.
+            !
+            real(kind=realType) :: porFlux, rFace
+            real(kind=realType) :: Etl, Etr, z1l, z1r, tmp
+            real(kind=realType) :: dr, dru, drv, drw, drE, drk
+            real(kind=realType) :: rAvg, uAvg, vAvg, wAvg, hAvg, kAvg
+            real(kind=realType) :: alphaAvg, a2Avg, aAvg, unAvg
+            real(kind=realType) :: ovaAvg, ova2Avg, area, Eta
+            real(kind=realType) :: gm1, gm53
+            real(kind=realType) :: lam1, lam2, lam3
+            real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
+            real(kind=realType), dimension(2) :: ktmp
 
-      ! Set the porosity for the flux. The default value, 0.5*rFil, is
-      ! a scaling factor where an rFil != 1 is taken into account.
+            ! Set the porosity for the flux. The default value, 0.5*rFil, is
+            ! a scaling factor where an rFil != 1 is taken into account.
 
-      porFlux = half*rFil
-      if(por == noFlux .or. por == boundFlux)    porFlux = zero
+            porFlux = half * rFil
+            if (por == noFlux .or. por == boundFlux) porFlux = zero
 
-      ! Abbreviate some expressions in which gamma occurs.
+            ! Abbreviate some expressions in which gamma occurs.
 
-      gm1  = gammaFace - one
-      gm53 = gammaFace - five*third
+            gm1 = gammaFace - one
+            gm53 = gammaFace - five * third
 
-      ! Determine which riemann solver must be solved.
+            ! Determine which riemann solver must be solved.
 
-      select case (riemannUsed)
+            select case (riemannUsed)
 
-      case (Roe)
+            case (Roe)
 
-         ! Determine the preconditioner used.
+                ! Determine the preconditioner used.
 
-         select case (precond)
+                select case (precond)
 
-         case (noPrecond)
+                case (noPrecond)
 
-            ! No preconditioner used. Use the Roe scheme of the
-            ! standard equations.
+                    ! No preconditioner used. Use the Roe scheme of the
+                    ! standard equations.
 
-            ! Compute the square root of the left and right densities
-            ! and the inverse of the sum.
+                    ! Compute the square root of the left and right densities
+                    ! and the inverse of the sum.
 
-            z1l = sqrt(left(irho))
-            z1r = sqrt(right(irho))
-            tmp = one/(z1l + z1r)
+                    z1l = sqrt(left(irho))
+                    z1r = sqrt(right(irho))
+                    tmp = one / (z1l + z1r)
 
-            ! Compute some variables depending whether or not a
-            ! k-equation is present.
+                    ! Compute some variables depending whether or not a
+                    ! k-equation is present.
 
-            if( correctForK ) then
+                    if (correctForK) then
 
-               ! Store the left and right kinetic energy in ktmp,
-               ! which is needed to compute the total energy.
+                        ! Store the left and right kinetic energy in ktmp,
+                        ! which is needed to compute the total energy.
 
-               ktmp(1) = left(itu1)
-               ktmp(2) = right(itu1)
+                        ktmp(1) = left(itu1)
+                        ktmp(2) = right(itu1)
 
-               ! Store the difference of the turbulent kinetic energy
-               ! per unit volume, i.e. the conserved variable.
+                        ! Store the difference of the turbulent kinetic energy
+                        ! per unit volume, i.e. the conserved variable.
 
-               drk = right(irho)*right(itu1) - left(irho)*left(itu1)
+                        drk = right(irho) * right(itu1) - left(irho) * left(itu1)
 
-               ! Compute the average turbulent energy per unit mass
-               ! using Roe averages.
+                        ! Compute the average turbulent energy per unit mass
+                        ! using Roe averages.
 
-               kAvg = tmp*(z1l*left(itu1) + z1r*right(itu1))
+                        kAvg = tmp * (z1l * left(itu1) + z1r * right(itu1))
 
-            else
+                    else
 
-               ! Set the difference of the turbulent kinetic energy
-               ! per unit volume and the averaged kinetic energy per
-               ! unit mass to zero.
+                        ! Set the difference of the turbulent kinetic energy
+                        ! per unit volume and the averaged kinetic energy per
+                        ! unit mass to zero.
 
-               drk  = 0.0
-               kAvg = 0.0
+                        drk = 0.0
+                        kAvg = 0.0
 
-            endif
+                    end if
 
-            ! Compute the total energy of the left and right state.
-            call etot(left(irho), left(ivx), left(ivy), left(ivz), &
-                 left(irhoe), ktmp(1), Etl, correctForK)
+                    ! Compute the total energy of the left and right state.
+                    call etot(left(irho), left(ivx), left(ivy), left(ivz), &
+                              left(irhoe), ktmp(1), Etl, correctForK)
 
-            call etot(right(irho), right(ivx), right(ivy), right(ivz), &
-                 right(irhoe), ktmp(2), Etr, correctForK)
+                    call etot(right(irho), right(ivx), right(ivy), right(ivz), &
+                              right(irhoe), ktmp(2), Etr, correctForK)
 
-            ! Compute the difference of the conservative mean
-            ! flow variables.
+                    ! Compute the difference of the conservative mean
+                    ! flow variables.
 
-            dr  = right(irho) - left(irho)
-            dru = right(irho)*right(ivx) - left(irho)*left(ivx)
-            drv = right(irho)*right(ivy) - left(irho)*left(ivy)
-            drw = right(irho)*right(ivz) - left(irho)*left(ivz)
-            drE = Etr - Etl
-
-            ! Compute the Roe average variables, which can be
-            ! computed directly from the average Roe vector.
+                    dr = right(irho) - left(irho)
+                    dru = right(irho) * right(ivx) - left(irho) * left(ivx)
+                    drv = right(irho) * right(ivy) - left(irho) * left(ivy)
+                    drw = right(irho) * right(ivz) - left(irho) * left(ivz)
+                    drE = Etr - Etl
 
-            rAvg = fourth*(z1r + z1l)**2
-            uAvg = tmp*(z1l*left(ivx) + z1r*right(ivx))
-            vAvg = tmp*(z1l*left(ivy) + z1r*right(ivy))
-            wAvg = tmp*(z1l*left(ivz) + z1r*right(ivz))
-            hAvg = tmp*((Etl+left(irhoE)) /z1l &
-                 +      (Etr+right(irhoE))/z1r)
-
-            ! Compute the unit vector and store the area of the
-            ! normal. Also compute the unit normal velocity of the face.
-
-            area  = sqrt(sx**2 + sy**2 + sz**2)
-            tmp   = one/max(1.e-25_realType,area)
-            sx    = sx*tmp
-            sy    = sy*tmp
-            sz    = sz*tmp
-            rFace = sFace*tmp
+                    ! Compute the Roe average variables, which can be
+                    ! computed directly from the average Roe vector.
 
-            ! Compute some dependent variables at the Roe
-            ! average state.
+                    rAvg = fourth * (z1r + z1l)**2
+                    uAvg = tmp * (z1l * left(ivx) + z1r * right(ivx))
+                    vAvg = tmp * (z1l * left(ivy) + z1r * right(ivy))
+                    wAvg = tmp * (z1l * left(ivz) + z1r * right(ivz))
+                    hAvg = tmp * ((Etl + left(irhoE)) / z1l &
+                                  + (Etr + right(irhoE)) / z1r)
 
-            alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-            a2Avg    = abs(gm1*(hAvg - alphaAvg) - gm53*kAvg)
-            aAvg     = sqrt(a2Avg)
-            unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
+                    ! Compute the unit vector and store the area of the
+                    ! normal. Also compute the unit normal velocity of the face.
 
-            ovaAvg  = one/aAvg
-            ova2Avg = one/a2Avg
+                    area = sqrt(sx**2 + sy**2 + sz**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sx * tmp
+                    sy = sy * tmp
+                    sz = sz * tmp
+                    rFace = sFace * tmp
 
-            ! Set for a boundary the normal velocity to rFace, the
-            ! normal velocity of the boundary.
+                    ! Compute some dependent variables at the Roe
+                    ! average state.
 
-            if(por == boundFlux) unAvg = rFace
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    a2Avg = abs(gm1 * (hAvg - alphaAvg) - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
 
-            ! Compute the coefficient eta for the entropy correction.
-            ! At the moment a 1D entropy correction is used, which
-            ! removes expansion shocks. Although it also reduces the
-            ! carbuncle phenomenon, it does not remove it completely.
-            ! In other to do that a multi-dimensional entropy fix is
-            ! needed, see Sanders et. al, JCP, vol. 145, 1998,
-            ! pp. 511 - 537. Although relatively easy to implement,
-            ! an efficient implementation requires the storage of
-            ! all the left and right states, which is rather
-            ! expensive in terms of memory.
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-            eta = half*(abs((left(ivx) - right(ivx))*sx        &
-                 +           (left(ivy) - right(ivy))*sy        &
-                 +           (left(ivz) - right(ivz))*sz)       &
-                 +       abs(sqrt(gammaFace*left(irhoE)/left(irho)) &
-                 -           sqrt(gammaFace*right(irhoE)/right(irho))))
+                    ! Set for a boundary the normal velocity to rFace, the
+                    ! normal velocity of the boundary.
 
-            ! Compute the absolute values of the three eigenvalues.
+                    if (por == boundFlux) unAvg = rFace
 
-            lam1 = abs(unAvg - rFace + aAvg)
-            lam2 = abs(unAvg - rFace - aAvg)
-            lam3 = abs(unAvg - rFace)
+                    ! Compute the coefficient eta for the entropy correction.
+                    ! At the moment a 1D entropy correction is used, which
+                    ! removes expansion shocks. Although it also reduces the
+                    ! carbuncle phenomenon, it does not remove it completely.
+                    ! In other to do that a multi-dimensional entropy fix is
+                    ! needed, see Sanders et. al, JCP, vol. 145, 1998,
+                    ! pp. 511 - 537. Although relatively easy to implement,
+                    ! an efficient implementation requires the storage of
+                    ! all the left and right states, which is rather
+                    ! expensive in terms of memory.
 
-            ! Apply the entropy correction to the eigenvalues.
+                    eta = half * (abs((left(ivx) - right(ivx)) * sx &
+                                      + (left(ivy) - right(ivy)) * sy &
+                                      + (left(ivz) - right(ivz)) * sz) &
+                                  + abs(sqrt(gammaFace * left(irhoE) / left(irho)) &
+                                        - sqrt(gammaFace * right(irhoE) / right(irho))))
 
-            tmp = two*eta
-            if(lam1 < tmp) lam1 = eta + fourth*lam1*lam1/eta
-            if(lam2 < tmp) lam2 = eta + fourth*lam2*lam2/eta
-            if(lam3 < tmp) lam3 = eta + fourth*lam3*lam3/eta
+                    ! Compute the absolute values of the three eigenvalues.
 
-            ! Multiply the eigenvalues by the area to obtain
-            ! the correct values for the dissipation term.
+                    lam1 = abs(unAvg - rFace + aAvg)
+                    lam2 = abs(unAvg - rFace - aAvg)
+                    lam3 = abs(unAvg - rFace)
 
-            lam1 = lam1*area
-            lam2 = lam2*area
-            lam3 = lam3*area
+                    ! Apply the entropy correction to the eigenvalues.
 
-            ! Some abbreviations, which occur quite often in the
-            ! dissipation terms.
+                    tmp = two * eta
+                    if (lam1 < tmp) lam1 = eta + fourth * lam1 * lam1 / eta
+                    if (lam2 < tmp) lam2 = eta + fourth * lam2 * lam2 / eta
+                    if (lam3 < tmp) lam3 = eta + fourth * lam3 * lam3 / eta
 
-            abv1 = half*(lam1 + lam2)
-            abv2 = half*(lam1 - lam2)
-            abv3 = abv1 - lam3
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-            abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                 -      wAvg*drw + drE) - gm53*drk
-            abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
+                    lam1 = lam1 * area
+                    lam2 = lam2 * area
+                    lam3 = lam3 * area
 
-            abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-            abv7 = abv2*abv4*ovaAvg  + abv3*abv5
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
+
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
+
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + drE) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
+
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
+
+                    ! Compute the dissipation term, -|a| (wr - wl), which is
+                    ! multiplied by porFlux. Note that porFlux is either
+                    ! 0.0 or 0.5*rFil.
+
+                    flux(irho) = -porFlux * (lam3 * dr + abv6)
+                    flux(imx) = -porFlux * (lam3 * dru + uAvg * abv6 &
+                                            + sx * abv7)
+                    flux(imy) = -porFlux * (lam3 * drv + vAvg * abv6 &
+                                            + sy * abv7)
+                    flux(imz) = -porFlux * (lam3 * drw + wAvg * abv6 &
+                                            + sz * abv7)
+                    flux(irhoE) = -porFlux * (lam3 * drE + hAvg * abv6 &
+                                              + unAvg * abv7)
+
+                    !          tmp = max(lam1,lam2,lam3)
+
+                    !          flux(irho)  = -porFlux*(tmp*dr)
+                    !          flux(imx)   = -porFlux*(tmp*dru)
+                    !          flux(imy)   = -porFlux*(tmp*drv)
+                    !          flux(imz)   = -porFlux*(tmp*drw)
+                    !          flux(irhoE) = -porFlux*(tmp*drE)
+
+                case (Turkel)
+                    call terminate( &
+                        "riemannFlux", &
+                        "Turkel preconditioner not implemented yet")
+
+                case (ChoiMerkle)
+                    call terminate("riemannFlux", &
+                                   "choi merkle preconditioner not implemented yet")
 
-            ! Compute the dissipation term, -|a| (wr - wl), which is
-            ! multiplied by porFlux. Note that porFlux is either
-            ! 0.0 or 0.5*rFil.
+                end select
 
-            flux(irho)  = -porFlux*(lam3*dr  + abv6)
-            flux(imx)   = -porFlux*(lam3*dru + uAvg*abv6 &
-                 + sx*abv7)
-            flux(imy)   = -porFlux*(lam3*drv + vAvg*abv6 &
-                 + sy*abv7)
-            flux(imz)   = -porFlux*(lam3*drw + wAvg*abv6 &
-                 + sz*abv7)
-            flux(irhoE) = -porFlux*(lam3*drE + hAvg*abv6 &
-                 + unAvg*abv7)
+            case (vanLeer)
+                call terminate("riemannFlux", "van leer fvs not implemented yet")
 
-            !          tmp = max(lam1,lam2,lam3)
+            case (ausmdv)
+                call terminate("riemannFlux", "ausmdv fvs not implemented yet")
 
-            !          flux(irho)  = -porFlux*(tmp*dr)
-            !          flux(imx)   = -porFlux*(tmp*dru)
-            !          flux(imy)   = -porFlux*(tmp*drv)
-            !          flux(imz)   = -porFlux*(tmp*drw)
-            !          flux(irhoE) = -porFlux*(tmp*drE)
+            end select
 
-         case (Turkel)
-            call terminate(&
-                 "riemannFlux",&
-                 "Turkel preconditioner not implemented yet")
+        end subroutine riemannFlux
 
-         case (ChoiMerkle)
-            call terminate("riemannFlux",&
-                 "choi merkle preconditioner not implemented yet")
+    end subroutine inviscidUpwindFlux
 
-         end select
+    subroutine inviscidDissFluxScalarApprox
+        ! ---------------------------------------------
+        !             Inviscid Diss Flux Scalar
+        ! ---------------------------------------------
 
-      case (vanLeer)
-         call terminate("riemannFlux", "van leer fvs not implemented yet")
+        use constants
+        use flowVarRefState, only: pInfCorr
+        use inputDiscretization, only: vis2, vis4, sigma
+        use inputPhysics, only: equations
+        use iteration, only: rFil
+        use flowVarRefState, only: gammaInf, pInfCorr, rhoInf
+        implicit none
 
-      case (ausmdv)
-         call terminate("riemannFlux","ausmdv fvs not implemented yet")
+        ! Variables for inviscid diss flux scalar
+        real(kind=realType), parameter :: dssMax = 0.25_realType
+        real(kind=realType) :: sslim, rhoi
+        real(kind=realType) :: sfil, fis2, fis4
+        real(kind=realType) :: ppor, rrad, dis2, dis4, fs
+        real(kind=realType) :: ddw
+        integer(kind=intType) :: i, j, k
+        select case (equations)
+        case (EulerEquations)
 
-      end select
+            ! Inviscid case. Pressure switch is based on the pressure.
+            ! Also set the value of sslim. To be fully consistent this
+            ! must have the dimension of pressure and it is therefore
+            ! set to a fraction of the free stream value.
 
-    end subroutine riemannFlux
+            sslim = 0.001_realType * pInfCorr
 
-  end subroutine inviscidUpwindFlux
+            !===============================================================
 
+        case (NSEquations, RANSEquations)
 
-  subroutine inviscidDissFluxScalarApprox
-    ! ---------------------------------------------
-    !             Inviscid Diss Flux Scalar
-    ! ---------------------------------------------
+            ! Viscous case. Pressure switch is based on the entropy.
+            ! Also set the value of sslim. To be fully consistent this
+            ! must have the dimension of entropy and it is therefore
+            ! set to a fraction of the free stream value.
 
-    use constants
-    use flowVarRefState, only : pInfCorr
-    use inputDiscretization, only: vis2, vis4, sigma
-    use inputPhysics, only : equations
-    use iteration, only : rFil
-    use flowVarRefState, only : gammaInf, pInfCorr, rhoInf
-    implicit none
+            sslim = 0.001_realType * pInfCorr / (rhoInf**gammaInf)
 
-    ! Variables for inviscid diss flux scalar
-    real(kind=realType), parameter :: dssMax = 0.25_realType
-    real(kind=realType) :: sslim, rhoi
-    real(kind=realType) :: sfil, fis2, fis4
-    real(kind=realType) :: ppor, rrad, dis2, dis4, fs
-    real(kind=realType) :: ddw
-    integer(kind=intType) :: i, j, k
-    select case (equations)
-    case (EulerEquations)
+        end select
 
-       ! Inviscid case. Pressure switch is based on the pressure.
-       ! Also set the value of sslim. To be fully consistent this
-       ! must have the dimension of pressure and it is therefore
-       ! set to a fraction of the free stream value.
+        ! Compute the pressure sensor for each cell, in each direction:
+        do k = 1, ke
+            do j = 1, je
+                do i = singleHaloStart, ie
+                    dss(i, j, k, 1) = abs((ss(i + 1, j, k) - two * ss(i, j, k) + ss(i - 1, j, k)) &
+                                          / (ss(i + 1, j, k) + two * ss(i, j, k) + ss(i - 1, j, k) + sslim))
 
-       sslim = 0.001_realType*pInfCorr
+                    dss(i, j, k, 2) = abs((ss(i, j + 1, k) - two * ss(i, j, k) + ss(i, j - 1, k)) &
+                                          / (ss(i, j + 1, k) + two * ss(i, j, k) + ss(i, j - 1, k) + sslim))
 
-       !===============================================================
+                    dss(i, j, k, 3) = abs((ss(i, j, k + 1) - two * ss(i, j, k) + ss(i, j, k - 1)) &
+                                          / (ss(i, j, k + 1) + two * ss(i, j, k) + ss(i, j, k - 1) + sslim))
+                end do
+            end do
+        end do
 
-    case (NSEquations, RANSEquations)
+        ! Set a couple of constants for the scheme.
+        fis2 = vis2
+        fis4 = vis4
+        !
+        !       Dissipative fluxes in the i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 1, il
 
-       ! Viscous case. Pressure switch is based on the entropy.
-       ! Also set the value of sslim. To be fully consistent this
-       ! must have the dimension of entropy and it is therefore
-       ! set to a fraction of the free stream value.
+                    ! Compute the dissipation coefficients for this face.
 
-       sslim = 0.001_realType*pInfCorr/(rhoInf**gammaInf)
+                    ppor = zero
+                    if (porI(i, j, k) == normalFlux) ppor = half
+                    rrad = ppor * (radI(i, j, k) + radI(i + 1, j, k))
 
-    end select
+                    dis2 = fis2 * rrad * min(dssMax, max(dss(i, j, k, 1), dss(i + 1, j, k, 1))) + sigma * fis4 * rrad
 
-    ! Compute the pressure sensor for each cell, in each direction:
-    do k=1,ke
-       do j=1,je
-          do i=singleHaloStart,ie
-             dss(i,j,k,1) = abs((ss(i+1,j,k) - two*ss(i,j,k) + ss(i-1,j,k)) &
-                  /     (ss(i+1,j,k) + two*ss(i,j,k) + ss(i-1,j,k) + sslim))
+                    ! Compute and scatter the dissipative flux.
+                    ! Density. Store it in the mass flow of the
+                    ! appropriate sliding mesh interface.
 
-             dss(i,j,k,2) = abs((ss(i,j+1,k) - two*ss(i,j,k) + ss(i,j-1,k)) &
-                  /     (ss(i,j+1,k) + two*ss(i,j,k) + ss(i,j-1,k) + sslim))
+                    ddw = w(i + 1, j, k, irho) - w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             dss(i,j,k,3) = abs((ss(i,j,k+1) - two*ss(i,j,k) + ss(i,j,k-1)) &
-                  /     (ss(i,j,k+1) + two*ss(i,j,k) + ss(i,j,k-1) + sslim))
-          end do
-       end do
-    end do
+                    fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-    ! Set a couple of constants for the scheme.
-    fis2 = vis2
-    fis4 = vis4
-    !
-    !       Dissipative fluxes in the i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=1,il
+                    ! X-momentum.
 
-             ! Compute the dissipation coefficients for this face.
+                    ddw = w(i + 1, j, k, ivx) * w(i + 1, j, k, irho) - w(i, j, k, ivx) * w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             ppor = zero
-             if(porI(i,j,k) == normalFlux) ppor = half
-             rrad = ppor*(radI(i,j,k) + radI(i+1,j,k))
+                    fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             dis2 = fis2*rrad*min(dssMax, max(dss(i,j,k,1), dss(i+1,j,k,1))) + sigma*fis4*rrad
+                    ! Y-momentum.
 
-             ! Compute and scatter the dissipative flux.
-             ! Density. Store it in the mass flow of the
-             ! appropriate sliding mesh interface.
+                    ddw = w(i + 1, j, k, ivy) * w(i + 1, j, k, irho) - w(i, j, k, ivy) * w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             ddw = w(i+1,j,k,irho) - w(i,j,k,irho)
-             fs  = dis2*ddw
+                    fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             fw(i+1,j,k,irho) = fw(i+1,j,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ! Z-momentum.
+                    ddw = w(i + 1, j, k, ivz) * w(i + 1, j, k, irho) - w(i, j, k, ivz) * w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             ! X-momentum.
+                    fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             ddw = w(i+1,j,k,ivx)*w(i+1,j,k,irho) - w(i,j,k,ivx)*w(i,j,k,irho)
-             fs  = dis2*ddw
+                    ! Energy.
+                    ddw = (w(i + 1, j, k, irhoE) + P(i + 1, j, K)) - (w(i, j, k, irhoE) + P(i, j, k))
+                    fs = dis2 * ddw
 
-             fw(i+1,j,k,imx) = fw(i+1,j,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-             ! Y-momentum.
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the j-direction.
+        !
+        do k = 2, kl
+            do j = 1, jl
+                do i = 2, il
 
-             ddw = w(i+1,j,k,ivy)*w(i+1,j,k,irho) - w(i,j,k,ivy)*w(i,j,k,irho)
-             fs  = dis2*ddw
+                    ! Compute the dissipation coefficients for this face.
 
-             fw(i+1,j,k,imy) = fw(i+1,j,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    ppor = zero
+                    if (porJ(i, j, k) == normalFlux) ppor = half
+                    rrad = ppor * (radJ(i, j, k) + radJ(i, j + 1, k))
 
-             ! Z-momentum.
-             ddw = w(i+1,j,k,ivz)*w(i+1,j,k,irho) - w(i,j,k,ivz)*w(i,j,k,irho)
-             fs  = dis2*ddw
+                    dis2 = fis2 * rrad * min(dssMax, max(dss(i, j, k, 2), dss(i, j + 1, k, 2))) + sigma * fis4 * rrad
 
-             fw(i+1,j,k,imz) = fw(i+1,j,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    ! Compute and scatter the dissipative flux.
+                    ! Density. Store it in the mass flow of the
+                    ! appropriate sliding mesh interface.
 
-             ! Energy.
-             ddw = (w(i+1,j,k,irhoE) + P(i+1,j,K))- (w(i,j,k,irhoE) + P(i,j,k))
-             fs  = dis2*ddw
+                    ddw = w(i, j + 1, k, irho) - w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-          end do
-       end do
-    end do
-    !
-    !       Dissipative fluxes in the j-direction.
-    !
-    do k=2,kl
-       do j=1,jl
-          do i=2,il
+                    ! X-momentum.
 
-             ! Compute the dissipation coefficients for this face.
+                    ddw = w(i, j + 1, k, ivx) * w(i, j + 1, k, irho) - w(i, j, k, ivx) * w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             ppor = zero
-             if(porJ(i,j,k) == normalFlux) ppor = half
-             rrad = ppor*(radJ(i,j,k) + radJ(i,j+1,k))
+                    fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             dis2 = fis2*rrad*min(dssMax, max(dss(i,j,k,2),dss(i,j+1,k,2))) +sigma*fis4*rrad
+                    ! Y-momentum.
 
-             ! Compute and scatter the dissipative flux.
-             ! Density. Store it in the mass flow of the
-             ! appropriate sliding mesh interface.
+                    ddw = w(i, j + 1, k, ivy) * w(i, j + 1, k, irho) - w(i, j, k, ivy) * w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             ddw = w(i,j+1,k,irho) - w(i,j,k,irho)
-             fs  = dis2*ddw
+                    fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             fw(i,j+1,k,irho) = fw(i,j+1,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ! Z-momentum.
 
-             ! X-momentum.
+                    ddw = w(i, j + 1, k, ivz) * w(i, j + 1, k, irho) - w(i, j, k, ivz) * w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             ddw = w(i,j+1,k,ivx)*w(i,j+1,k,irho) - w(i,j,k,ivx)*w(i,j,k,irho)
-             fs  = dis2*ddw
+                    fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             fw(i,j+1,k,imx) = fw(i,j+1,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    ! Energy.
 
-             ! Y-momentum.
+                    ddw = (w(i, j + 1, k, irhoE) + P(i, j + 1, k)) - (w(i, j, k, irhoE) + P(i, j, k))
+                    fs = dis2 * ddw
 
-             ddw = w(i,j+1,k,ivy)*w(i,j+1,k,irho) - w(i,j,k,ivy)*w(i,j,k,irho)
-             fs  = dis2*ddw
+                    fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the k-direction.
+        !
+        do k = 1, kl
+            do j = 2, jl
+                do i = 2, il
 
-             fw(i,j+1,k,imy) = fw(i,j+1,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    ! Compute the dissipation coefficients for this face.
 
-             ! Z-momentum.
+                    ppor = zero
+                    if (porK(i, j, k) == normalFlux) ppor = half
+                    rrad = ppor * (radK(i, j, k) + radK(i, j, k + 1))
 
-             ddw = w(i,j+1,k,ivz)*w(i,j+1,k,irho) - w(i,j,k,ivz)*w(i,j,k,irho)
-             fs  = dis2*ddw
+                    dis2 = fis2 * rrad * min(dssMax, max(dss(i, j, k, 3), dss(i, j, k + 1, 3))) + sigma * fis4 * rrad
 
-             fw(i,j+1,k,imz) = fw(i,j+1,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    ! Compute and scatter the dissipative flux.
+                    ! Density. Store it in the mass flow of the
+                    ! appropriate sliding mesh interface.
 
-             ! Energy.
+                    ddw = w(i, j, k + 1, irho) - w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             ddw = (w(i,j+1,k,irhoE) + P(i,j+1,k)) - (w(i,j,k,irhoE) + P(i,j,k))
-             fs  = dis2*ddw
+                    fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
-          end do
-       end do
-    end do
-    !
-    !       Dissipative fluxes in the k-direction.
-    !
-    do k=1,kl
-       do j=2,jl
-          do i=2,il
+                    ! X-momentum.
 
-             ! Compute the dissipation coefficients for this face.
+                    ddw = w(i, j, k + 1, ivx) * w(i, j, k + 1, irho) - w(i, j, k, ivx) * w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             ppor = zero
-             if(porK(i,j,k) == normalFlux) ppor = half
-             rrad = ppor*(radK(i,j,k) + radK(i,j,k+1))
+                    fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             dis2 = fis2*rrad*min(dssMax, max(dss(i,j,k,3), dss(i,j,k+1,3))) + sigma*fis4*rrad
+                    ! Y-momentum.
 
-             ! Compute and scatter the dissipative flux.
-             ! Density. Store it in the mass flow of the
-             ! appropriate sliding mesh interface.
+                    ddw = w(i, j, k + 1, ivy) * w(i, j, k + 1, irho) - w(i, j, k, ivy) * w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             ddw = w(i,j,k+1,irho) - w(i,j,k,irho)
-             fs  = dis2*ddw
+                    fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             fw(i,j,k+1,irho) = fw(i,j,k+1,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ! Z-momentum.
 
-             ! X-momentum.
+                    ddw = w(i, j, k + 1, ivz) * w(i, j, k + 1, irho) - w(i, j, k, ivz) * w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             ddw = w(i,j,k+1,ivx)*w(i,j,k+1,irho) - w(i,j,k,ivx)*w(i,j,k,irho)
-             fs  = dis2*ddw
+                    fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             fw(i,j,k+1,imx) = fw(i,j,k+1,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    ! Energy.
+                    ddw = (w(i, j, k + 1, irhoE) + P(i, j, k + 1)) - (w(i, j, k, irhoE) + P(i, j, k))
+                    fs = dis2 * ddw
 
-             ! Y-momentum.
+                    fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
+                end do
+            end do
+        end do
+    end subroutine inviscidDissFluxScalarApprox
+
+    subroutine inviscidDissFluxMatrixApprox
+        !
+        !       inviscidDissFluxMatrix computes the matrix artificial
+        !       dissipation term. Instead of the spectral radius, as used in
+        !       the scalar dissipation scheme, the absolute value of the flux
+        !       jacobian is used. This leads to a less diffusive and
+        !       consequently more accurate scheme. It is assumed that the
+        !       pointers in blockPointers already point to the correct block.
+        !
+        use constants
+        use flowVarRefState, only: pInfCorr
+        use inputDiscretization, only: vis2, vis4, sigma
+        use inputPhysics, only: equations
+        use iteration, only: rFil
+        use utils, only: getCorrectForK, myDim
+        implicit none
+        !
+        !      Local parameters.
+        !
+        real(kind=realType), parameter :: dpMax = 0.25_realType
+        real(kind=realType), parameter :: epsAcoustic = 0.25_realType
+        real(kind=realType), parameter :: epsShear = 0.025_realType
+        real(kind=realType), parameter :: omega = 0.5_realType
+        real(kind=realType), parameter :: oneMinOmega = one - omega
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ind, ii
+
+        real(kind=realType) :: plim, sface
+        real(kind=realType) :: sfil, fis2, fis4
+        real(kind=realType) :: gammaAvg, gm1, ovgm1, gm53
+        real(kind=realType) :: ppor, rrad, dis2, dis4
+        real(kind=realType) :: dp1, dp2, tmp, fs
+        real(kind=realType) :: ddw, ddw6
+        real(kind=realType) :: dr, dru, drv, drw, dre, drk, sx, sy, sz
+        real(kind=realType) :: uAvg, vAvg, wAvg, a2Avg, aAvg, hAvg
+        real(kind=realType) :: alphaAvg, unAvg, ovaAvg, ova2Avg
+        real(kind=realType) :: kAvg, lam1, lam2, lam3, area
+        real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
+        logical :: correctForK
+
+        ! Set the value of plim. To be fully consistent this must have
+        ! the dimension of a pressure. Therefore a fraction of pInfCorr
+        ! is used.
+
+        plim = 0.001_realType * pInfCorr
+
+        ! Determine whether or not the total energy must be corrected
+        ! for the presence of the turbulent kinetic energy.
+
+        correctForK = getCorrectForK()
+
+        ! Initialize sface to zero. This value will be used if the
+        ! block is not moving.
+
+        sface = zero
+
+        ! Set a couple of constants for the scheme.
+
+        fis2 = rFil * vis2
+        fis4 = rFil * vis4
+        sfil = one - rFil
+
+        ! Initialize the dissipative residual to a certain times,
+        ! possibly zero, the previously stored value.
+
+        fw = sfil * fw
+
+        ! Compute the pressure sensor for each cell, in each direction:
+        do k = 1, ke
+            do j = 1, je
+                do i = singleHaloStart, ie
+                    dss(i, j, k, 1) = abs((ss(i + 1, j, k) - two * ss(i, j, k) + ss(i - 1, j, k)) &
+                                          / (omega * (ss(i + 1, j, k) + two * ss(i, j, k) + ss(i - 1, j, k)) &
+                                             + oneMinOmega * (abs(ss(i + 1, j, k) - ss(i, j, k)) &
+                                                              + abs(ss(i, j, k) - ss(i - 1, j, k))) + plim))
+
+                    dss(i, j, k, 2) = abs((ss(i, j + 1, k) - two * ss(i, j, k) + ss(i, j - 1, k)) &
+                                          / (omega * (ss(i, j + 1, k) + two * ss(i, j, k) + ss(i, j - 1, k)) &
+                                             + oneMinOmega * (abs(ss(i, j + 1, k) - ss(i, j, k)) &
+                                                              + abs(ss(i, j, k) - ss(i, j - 1, k))) + plim))
+
+                    dss(i, j, k, 3) = abs((ss(i, j, k + 1) - two * ss(i, j, k) + ss(i, j, k - 1)) &
+                                          / (omega * (ss(i, j, k + 1) + two * ss(i, j, k) + ss(i, j, k - 1)) &
+                                             + oneMinOmega * (abs(ss(i, j, k + 1) - ss(i, j, k)) &
+                                                              + abs(ss(i, j, k) - ss(i, j, k - 1))) + plim))
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 1, il
 
-             ddw = w(i,j,k+1,ivy)*w(i,j,k+1,irho) - w(i,j,k,ivy)*w(i,j,k,irho)
-             fs  = dis2*ddw
+                    ! Compute the dissipation coefficients for this face.
 
-             fw(i,j,k+1,imy) = fw(i,j,k+1,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    ppor = zero
+                    if (porI(i, j, k) == normalFlux) ppor = one
 
-             ! Z-momentum.
+                    dis2 = fis2 * ppor * min(dpMax, max(dss(i, j, k, 1), dss(i + 1, j, k, 1))) &
+                           + sigma * fis4 * ppor
 
-             ddw = w(i,j,k+1,ivz)*w(i,j,k+1,irho) - w(i,j,k,ivz)*w(i,j,k,irho)
-             fs  = dis2*ddw
+                    ! Construct the vector of the first and third differences
+                    ! multiplied by the appropriate constants.
 
-             fw(i,j,k+1,imz) = fw(i,j,k+1,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    ddw = w(i + 1, j, k, irho) - w(i, j, k, irho)
+                    dr = dis2 * ddw
 
-             ! Energy.
-             ddw = (w(i,j,k+1,irhoE) + P(i,j,k+1)) - (w(i,j,k,irhoE) + P(i,j,k))
-             fs  = dis2*ddw
+                    ddw = w(i + 1, j, k, irho) * w(i + 1, j, k, ivx) &
+                          - w(i, j, k, irho) * w(i, j, k, ivx)
+                    dru = dis2 * ddw
 
-             fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
-          end do
-       end do
-    end do
-  end subroutine inviscidDissFluxScalarApprox
+                    ddw = w(i + 1, j, k, irho) * w(i + 1, j, k, ivy) &
+                          - w(i, j, k, irho) * w(i, j, k, ivy)
+                    drv = dis2 * ddw
 
-  subroutine inviscidDissFluxMatrixApprox
-    !
-    !       inviscidDissFluxMatrix computes the matrix artificial
-    !       dissipation term. Instead of the spectral radius, as used in
-    !       the scalar dissipation scheme, the absolute value of the flux
-    !       jacobian is used. This leads to a less diffusive and
-    !       consequently more accurate scheme. It is assumed that the
-    !       pointers in blockPointers already point to the correct block.
-    !
-    use constants
-    use flowVarRefState, only : pInfCorr
-    use inputDiscretization, only: vis2, vis4, sigma
-    use inputPhysics, only : equations
-    use iteration, only : rFil
-    use utils, only : getCorrectForK, myDim
-    implicit none
-    !
-    !      Local parameters.
-    !
-    real(kind=realType), parameter :: dpMax        = 0.25_realType
-    real(kind=realType), parameter :: epsAcoustic  = 0.25_realType
-    real(kind=realType), parameter :: epsShear     = 0.025_realType
-    real(kind=realType), parameter :: omega        = 0.5_realType
-    real(kind=realType), parameter :: oneMinOmega = one - omega
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ind, ii
-
-    real(kind=realType) :: plim, sface
-    real(kind=realType) :: sfil, fis2, fis4
-    real(kind=realType) :: gammaAvg, gm1, ovgm1, gm53
-    real(kind=realType) :: ppor, rrad, dis2, dis4
-    real(kind=realType) :: dp1, dp2, tmp, fs
-    real(kind=realType) :: ddw, ddw6
-    real(kind=realType) :: dr, dru, drv, drw, dre, drk, sx, sy, sz
-    real(kind=realType) :: uAvg, vAvg, wAvg, a2Avg, aAvg, hAvg
-    real(kind=realType) :: alphaAvg, unAvg, ovaAvg, ova2Avg
-    real(kind=realType) :: kAvg, lam1, lam2, lam3, area
-    real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
-    logical :: correctForK
-
-    ! Set the value of plim. To be fully consistent this must have
-    ! the dimension of a pressure. Therefore a fraction of pInfCorr
-    ! is used.
-
-    plim = 0.001_realType*pInfCorr
-
-    ! Determine whether or not the total energy must be corrected
-    ! for the presence of the turbulent kinetic energy.
-
-    correctForK = getCorrectForK()
-
-    ! Initialize sface to zero. This value will be used if the
-    ! block is not moving.
-
-    sface = zero
-
-    ! Set a couple of constants for the scheme.
-
-    fis2 = rFil*vis2
-    fis4 = rFil*vis4
-    sfil = one - rFil
-
-    ! Initialize the dissipative residual to a certain times,
-    ! possibly zero, the previously stored value.
-
-    fw = sfil*fw
-
-    ! Compute the pressure sensor for each cell, in each direction:
-    do k=1,ke
-       do j=1,je
-          do i=singleHaloStart,ie
-             dss(i,j,k,1) =abs((ss(i+1,j,k) - two*ss(i,j,k) + ss(i-1,j,k))        &
-                  /     (omega*(ss(i+1,j,k) + two*ss(i,j,k) + ss(i-1,j,k)) &
-                  +      oneMinOmega*(abs(ss(i+1,j,k) - ss(i,j,k))      &
-                  +                   abs(ss(i,j,k) - ss(i-1,j,k))) + plim))
-
-
-             dss(i,j,k,2) =abs((ss(i,j+1,k) - two*ss(i,j,k) + ss(i,j-1,k))        &
-                  /     (omega*(ss(i,j+1,k) + two*ss(i,j,k) + ss(i,j-1,k)) &
-                  +      oneMinOmega*(abs(ss(i,j+1,k) - ss(i,j,k))      &
-                  +                   abs(ss(i,j,k) - ss(i,j-1,k))) + plim))
-
-             dss(i,j,k,3) =  abs((ss(i,j,k+1) - two*ss(i,j,k) + ss(i,j,k-1))        &
-                  /     (omega*(ss(i,j,k+1) + two*ss(i,j,k) + ss(i,j,k-1)) &
-                  +      oneMinOmega*(abs(ss(i,j,k+1) - ss(i,j,k))      &
-                  +                   abs(ss(i,j,k) - ss(i,j,k-1))) + plim))
-          end do
-       end do
-    end do
-    !
-    !       Dissipative fluxes in the i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=1,il
-
-             ! Compute the dissipation coefficients for this face.
-
-             ppor = zero
-             if(porI(i,j,k) == normalFlux) ppor = one
-
-             dis2 = fis2*ppor*min(dpMax,max(dss(i,j,k,1),dss(i+1,j,k,1)))&
-                    +sigma*fis4*ppor
-
-             ! Construct the vector of the first and third differences
-             ! multiplied by the appropriate constants.
-
-             ddw = w(i+1,j,k,irho) - w(i,j,k,irho)
-             dr  = dis2*ddw
-
-             ddw = w(i+1,j,k,irho)*w(i+1,j,k,ivx) &
-                  - w(i,j,k,irho)*w(i,j,k,ivx)
-             dru = dis2*ddw
-
-             ddw = w(i+1,j,k,irho)*w(i+1,j,k,ivy) &
-                  - w(i,j,k,irho)*w(i,j,k,ivy)
-             drv = dis2*ddw
-
-             ddw = w(i+1,j,k,irho)*w(i+1,j,k,ivz) &
-                  - w(i,j,k,irho)*w(i,j,k,ivz)
-             drw = dis2*ddw
+                    ddw = w(i + 1, j, k, irho) * w(i + 1, j, k, ivz) &
+                          - w(i, j, k, irho) * w(i, j, k, ivz)
+                    drw = dis2 * ddw
 
-             ddw = w(i+1,j,k,irhoE) - w(i,j,k,irhoE)
-             dre = dis2*ddw
+                    ddw = w(i + 1, j, k, irhoE) - w(i, j, k, irhoE)
+                    dre = dis2 * ddw
 
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
-             drk   = zero
-             kAvg = zero
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
+                    drk = zero
+                    kAvg = zero
 
-             if( correctForK ) then
-                ddw6 = w(i+1,j,k,irho)*w(i+1,j,k,itu1) &
-                     - w(i,j,k,irho)*w(i,j,k,itu1)
-                drk = dis2*ddw6                              &
-                     - dis4*(w(i+2,j,k,irho)*w(i+2,j,k,itu1) &
-                     -       w(i-1,j,k,irho)*w(i-1,j,k,itu1) - three*ddw6)
+                    if (correctForK) then
+                        ddw6 = w(i + 1, j, k, irho) * w(i + 1, j, k, itu1) &
+                               - w(i, j, k, irho) * w(i, j, k, itu1)
+                        drk = dis2 * ddw6 &
+                              - dis4 * (w(i + 2, j, k, irho) * w(i + 2, j, k, itu1) &
+                                        - w(i - 1, j, k, irho) * w(i - 1, j, k, itu1) - three * ddw6)
 
-                kAvg = half*(w(i,j,k,itu1) + w(i+1,j,k,itu1))
-             endif
+                        kAvg = half * (w(i, j, k, itu1) + w(i + 1, j, k, itu1))
+                    end if
 
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-             gammaAvg = half*(gamma(i+1,j,k) + gamma(i,j,k))
-             gm1      = gammaAvg - one
-             ovgm1    = one/gm1
-             gm53     = gammaAvg - five*third
+                    gammaAvg = half * (gamma(i + 1, j, k) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-             ! Compute the average state at the interface.
+                    ! Compute the average state at the interface.
 
-             uAvg  = half*(w(i+1,j,k,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i+1,j,k,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i+1,j,k,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i+1,j,k)*p(i+1,j,k)/w(i+1,j,k,irho) &
-                  +       gamma(i,  j,k)*p(i,  j,k)/w(i,  j,k,irho))
+                    uAvg = half * (w(i + 1, j, k, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i + 1, j, k, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i + 1, j, k, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i + 1, j, k) * p(i + 1, j, k) / w(i + 1, j, k, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-             area = sqrt(si(i,j,k,1)**2 + si(i,j,k,2)**2 + si(i,j,k,3)**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = si(i,j,k,1)*tmp
-             sy   = si(i,j,k,2)*tmp
-             sz   = si(i,j,k,3)*tmp
+                    area = sqrt(si(i, j, k, 1)**2 + si(i, j, k, 2)**2 + si(i, j, k, 3)**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = si(i, j, k, 1) * tmp
+                    sy = si(i, j, k, 2) * tmp
+                    sz = si(i, j, k, 3) * tmp
 
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-             sface = sFaceI(i,j,k)*tmp
+                    sface = sFaceI(i, j, k) * tmp
 
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-             rrad = lam3 + aAvg
+                    rrad = lam3 + aAvg
 
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-             lam1 = max(lam1,epsAcoustic*rrad)*area
-             lam2 = max(lam2,epsAcoustic*rrad)*area
-             lam3 = max(lam3,epsShear*rrad)*area
+                    lam1 = max(lam1, epsAcoustic * rrad) * area
+                    lam2 = max(lam2, epsAcoustic * rrad) * area
+                    lam3 = max(lam3, epsShear * rrad) * area
 
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-             ! Compute and scatter the dissipative flux.
-             ! Density.
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             fs               = lam3*dr  + abv6
-             fw(i+1,j,k,irho) = fw(i+1,j,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    fs = lam3 * dr + abv6
+                    fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             ! X-momentum.
+                    ! X-momentum.
 
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i+1,j,k,imx) = fw(i+1,j,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             ! Y-momentum.
+                    ! Y-momentum.
 
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i+1,j,k,imy) = fw(i+1,j,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             ! Z-momentum.
+                    ! Z-momentum.
 
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i+1,j,k,imz) = fw(i+1,j,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             ! Energy.
+                    ! Energy.
 
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-          end do
-       end do
-    end do
-    !
-    !       Dissipative fluxes in the j-direction.
-    !
-    do k=2,kl
-       do j=1,jl
-          do i=2,il
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the j-direction.
+        !
+        do k = 2, kl
+            do j = 1, jl
+                do i = 2, il
 
+                    ! Compute the dissipation coefficients for this face.
 
-             ! Compute the dissipation coefficients for this face.
+                    ppor = zero
+                    if (porJ(i, j, k) == normalFlux) ppor = one
 
-             ppor = zero
-             if(porJ(i,j,k) == normalFlux) ppor = one
+                    dis2 = fis2 * ppor * min(dpMax, max(dss(i, j, k, 2), dss(i, j + 1, k, 2))) &
+                           + sigma * fis4 * ppor
 
-             dis2 = fis2*ppor*min(dpMax,max(dss(i,j,k,2),dss(i,j+1,k,2)))&
-                    +sigma*fis4*ppor
+                    ! Construct the vector of the first and third differences
+                    ! multiplied by the appropriate constants.
 
-             ! Construct the vector of the first and third differences
-             ! multiplied by the appropriate constants.
+                    ddw = w(i, j + 1, k, irho) - w(i, j, k, irho)
+                    dr = dis2 * ddw
 
-             ddw = w(i,j+1,k,irho) - w(i,j,k,irho)
-             dr  = dis2*ddw
+                    ddw = w(i, j + 1, k, irho) * w(i, j + 1, k, ivx) &
+                          - w(i, j, k, irho) * w(i, j, k, ivx)
+                    dru = dis2 * ddw
 
-             ddw = w(i,j+1,k,irho)*w(i,j+1,k,ivx) &
-                  - w(i,j,k,irho)*w(i,j,k,ivx)
-             dru = dis2*ddw
+                    ddw = w(i, j + 1, k, irho) * w(i, j + 1, k, ivy) &
+                          - w(i, j, k, irho) * w(i, j, k, ivy)
+                    drv = dis2 * ddw
 
-             ddw = w(i,j+1,k,irho)*w(i,j+1,k,ivy) &
-                  - w(i,j,k,irho)*w(i,j,k,ivy)
-             drv = dis2*ddw
+                    ddw = w(i, j + 1, k, irho) * w(i, j + 1, k, ivz) &
+                          - w(i, j, k, irho) * w(i, j, k, ivz)
+                    drw = dis2 * ddw
 
-             ddw = w(i,j+1,k,irho)*w(i,j+1,k,ivz) &
-                  - w(i,j,k,irho)*w(i,j,k,ivz)
-             drw = dis2*ddw
+                    ddw = w(i, j + 1, k, irhoE) - w(i, j, k, irhoE)
+                    dre = dis2 * ddw
 
-             ddw = w(i,j+1,k,irhoE) - w(i,j,k,irhoE)
-             dre = dis2*ddw
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
+                    drk = zero
+                    kAvg = zero
 
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
-             drk   = zero
-             kAvg = zero
+                    if (correctForK) then
+                        ddw6 = w(i, j + 1, k, irho) * w(i, j + 1, k, itu1) &
+                               - w(i, j, k, irho) * w(i, j, k, itu1)
+                        drk = dis2 * ddw6 &
+                              - dis4 * (w(i, j + 2, k, irho) * w(i, j + 2, k, itu1) &
+                                        - w(i, j - 1, k, irho) * w(i, j - 1, k, itu1) - three * ddw6)
 
-             if( correctForK ) then
-                ddw6 = w(i,j+1,k,irho)*w(i,j+1,k,itu1) &
-                     - w(i,j,k,irho)*w(i,j,k,itu1)
-                drk = dis2*ddw6                              &
-                     - dis4*(w(i,j+2,k,irho)*w(i,j+2,k,itu1) &
-                     -       w(i,j-1,k,irho)*w(i,j-1,k,itu1) - three*ddw6)
+                        kAvg = half * (w(i, j, k, itu1) + w(i, j + 1, k, itu1))
+                    end if
 
-                kAvg = half*(w(i,j,k,itu1) + w(i,j+1,k,itu1))
-             endif
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
+                    gammaAvg = half * (gamma(i, j + 1, k) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-             gammaAvg = half*(gamma(i,j+1,k) + gamma(i,j,k))
-             gm1      = gammaAvg - one
-             ovgm1    = one/gm1
-             gm53     = gammaAvg - five*third
+                    ! Compute the average state at the interface.
 
-             ! Compute the average state at the interface.
+                    uAvg = half * (w(i, j + 1, k, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i, j + 1, k, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i, j + 1, k, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i, j + 1, k) * p(i, j + 1, k) / w(i, j + 1, k, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-             uAvg  = half*(w(i,j+1,k,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i,j+1,k,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i,j+1,k,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i,j+1,k)*p(i,j+1,k)/w(i,j+1,k,irho) &
-                  +       gamma(i,j,  k)*p(i,j,  k)/w(i,j,  k,irho))
+                    area = sqrt(sj(i, j, k, 1)**2 + sj(i, j, k, 2)**2 + sj(i, j, k, 3)**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sj(i, j, k, 1) * tmp
+                    sy = sj(i, j, k, 2) * tmp
+                    sz = sj(i, j, k, 3) * tmp
 
-             area = sqrt(sj(i,j,k,1)**2 + sj(i,j,k,2)**2 + sj(i,j,k,3)**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = sj(i,j,k,1)*tmp
-             sy   = sj(i,j,k,2)*tmp
-             sz   = sj(i,j,k,3)*tmp
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
+                    sface = sFaceJ(i, j, k) * tmp
 
-             sface = sFaceJ(i,j,k)*tmp
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
+                    rrad = lam3 + aAvg
 
-             rrad = lam3 + aAvg
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
+                    lam1 = max(lam1, epsAcoustic * rrad) * area
+                    lam2 = max(lam2, epsAcoustic * rrad) * area
+                    lam3 = max(lam3, epsShear * rrad) * area
 
-             lam1 = max(lam1,epsAcoustic*rrad)*area
-             lam2 = max(lam2,epsAcoustic*rrad)*area
-             lam3 = max(lam3,epsShear*rrad)*area
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             ! Compute and scatter the dissipative flux.
-             ! Density.
+                    fs = lam3 * dr + abv6
+                    fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             fs               = lam3*dr  + abv6
-             fw(i,j+1,k,irho) = fw(i,j+1,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ! X-momentum.
 
-             ! X-momentum.
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i,j+1,k,imx) = fw(i,j+1,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    ! Y-momentum.
 
-             ! Y-momentum.
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i,j+1,k,imy) = fw(i,j+1,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    ! Z-momentum.
 
-             ! Z-momentum.
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i,j+1,k,imz) = fw(i,j+1,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    ! Energy.
 
-             ! Energy.
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the k-direction.
+        !
+        do k = 1, kl
+            do j = 2, jl
+                do i = 2, il
 
-          end do
-       end do
-    end do
-    !
-    !       Dissipative fluxes in the k-direction.
-    !
-    do k=1,kl
-       do j=2,jl
-          do i=2,il
+                    ! Compute the dissipation coefficients for this face.
 
-             ! Compute the dissipation coefficients for this face.
+                    ppor = zero
+                    if (porK(i, j, k) == normalFlux) ppor = one
 
-             ppor = zero
-             if(porK(i,j,k) == normalFlux) ppor = one
+                    dis2 = fis2 * ppor * min(dpMax, max(dss(i, j, k, 3), dss(i, j, k + 1, 3))) &
+                           + sigma * fis4 * ppor
 
-             dis2 = fis2*ppor*min(dpMax,max(dss(i,j,k,3),dss(i,j,k+1,3)))&
-                    +sigma*fis4*ppor
+                    ! Construct the vector of the first and third differences
+                    ! multiplied by the appropriate constants.
 
-             ! Construct the vector of the first and third differences
-             ! multiplied by the appropriate constants.
+                    ddw = w(i, j, k + 1, irho) - w(i, j, k, irho)
+                    dr = dis2 * ddw
 
-             ddw = w(i,j,k+1,irho) - w(i,j,k,irho)
-             dr  = dis2*ddw
+                    ddw = w(i, j, k + 1, irho) * w(i, j, k + 1, ivx) &
+                          - w(i, j, k, irho) * w(i, j, k, ivx)
+                    dru = dis2 * ddw
 
-             ddw = w(i,j,k+1,irho)*w(i,j,k+1,ivx) &
-                  - w(i,j,k,irho)*w(i,j,k,ivx)
-             dru = dis2*ddw
+                    ddw = w(i, j, k + 1, irho) * w(i, j, k + 1, ivy) &
+                          - w(i, j, k, irho) * w(i, j, k, ivy)
+                    drv = dis2 * ddw
 
-             ddw = w(i,j,k+1,irho)*w(i,j,k+1,ivy) &
-                  - w(i,j,k,irho)*w(i,j,k,ivy)
-             drv = dis2*ddw
+                    ddw = w(i, j, k + 1, irho) * w(i, j, k + 1, ivz) &
+                          - w(i, j, k, irho) * w(i, j, k, ivz)
+                    drw = dis2 * ddw
 
-             ddw = w(i,j,k+1,irho)*w(i,j,k+1,ivz) &
-                  - w(i,j,k,irho)*w(i,j,k,ivz)
-             drw = dis2*ddw
+                    ddw = w(i, j, k + 1, irhoE) - w(i, j, k, irhoE)
+                    dre = dis2 * ddw
 
-             ddw = w(i,j,k+1,irhoE) - w(i,j,k,irhoE)
-             dre = dis2*ddw
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
+                    drk = zero
+                    kAvg = zero
 
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
-             drk   = zero
-             kAvg = zero
+                    if (correctForK) then
+                        ddw6 = w(i, j, k + 1, irho) * w(i, j, k + 1, itu1) &
+                               - w(i, j, k, irho) * w(i, j, k, itu1)
+                        drk = dis2 * ddw6 &
+                              - dis4 * (w(i, j, k + 2, irho) * w(i, j, k + 2, itu1) &
+                                        - w(i, j, k - 1, irho) * w(i, j, k - 1, itu1) - three * ddw6)
 
-             if( correctForK ) then
-                ddw6 = w(i,j,k+1,irho)*w(i,j,k+1,itu1) &
-                     - w(i,j,k,irho)*w(i,j,k,itu1)
-                drk = dis2*ddw6                              &
-                     - dis4*(w(i,j,k+2,irho)*w(i,j,k+2,itu1) &
-                     -       w(i,j,k-1,irho)*w(i,j,k-1,itu1) - three*ddw6)
+                        kAvg = half * (w(i, j, k + 1, itu1) + w(i, j, k, itu1))
+                    end if
 
-                kAvg = half*(w(i,j,k+1,itu1) + w(i,j,k,itu1))
-             endif
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
+                    gammaAvg = half * (gamma(i, j, k + 1) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-             gammaAvg = half*(gamma(i,j,k+1) + gamma(i,j,k))
-             gm1      = gammaAvg - one
-             ovgm1    = one/gm1
-             gm53     = gammaAvg - five*third
+                    ! Compute the average state at the interface.
 
-             ! Compute the average state at the interface.
+                    uAvg = half * (w(i, j, k + 1, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i, j, k + 1, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i, j, k + 1, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i, j, k + 1) * p(i, j, k + 1) / w(i, j, k + 1, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-             uAvg  = half*(w(i,j,k+1,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i,j,k+1,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i,j,k+1,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i,j,k+1)*p(i,j,k+1)/w(i,j,k+1,irho) &
-                  +       gamma(i,j,k)  *p(i,j,k)  /w(i,j,k,  irho))
+                    area = sqrt(sk(i, j, k, 1)**2 + sk(i, j, k, 2)**2 + sk(i, j, k, 3)**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sk(i, j, k, 1) * tmp
+                    sy = sk(i, j, k, 2) * tmp
+                    sz = sk(i, j, k, 3) * tmp
 
-             area = sqrt(sk(i,j,k,1)**2 + sk(i,j,k,2)**2 + sk(i,j,k,3)**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = sk(i,j,k,1)*tmp
-             sy   = sk(i,j,k,2)*tmp
-             sz   = sk(i,j,k,3)*tmp
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
+                    sface = sFaceK(i, j, k) * tmp
 
-             sface = sFaceK(i,j,k)*tmp
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
+                    rrad = lam3 + aAvg
 
-             rrad = lam3 + aAvg
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
+                    lam1 = max(lam1, epsAcoustic * rrad) * area
+                    lam2 = max(lam2, epsAcoustic * rrad) * area
+                    lam3 = max(lam3, epsShear * rrad) * area
 
-             lam1 = max(lam1,epsAcoustic*rrad)*area
-             lam2 = max(lam2,epsAcoustic*rrad)*area
-             lam3 = max(lam3,epsShear*rrad)*area
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             ! Compute and scatter the dissipative flux.
-             ! Density.
+                    fs = lam3 * dr + abv6
+                    fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             fs               = lam3*dr  + abv6
-             fw(i,j,k+1,irho) = fw(i,j,k+1,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ! X-momentum.
 
-             ! X-momentum.
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i,j,k+1,imx) = fw(i,j,k+1,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    ! Y-momentum.
 
-             ! Y-momentum.
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i,j,k+1,imy) = fw(i,j,k+1,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    ! Z-momentum.
 
-             ! Z-momentum.
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i,j,k+1,imz) = fw(i,j,k+1,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    ! Energy.
 
-             ! Energy.
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                end do
+            end do
+        end do
+
+    end subroutine inviscidDissFluxMatrixApprox
+
+    subroutine computeSpeedOfSoundSquared
+        ! ---------------------------------------------
+        !           Compute the speed of sound squared
+        ! ---------------------------------------------
+        use constants
+        use utils, only: getCorrectForK
+        implicit none
+
+        ! Variables for speed of sound
+        logical :: correctForK
+        real(kind=realType) :: pp
+        real(kind=realType), parameter :: twoThird = two * third
+        integer(kind=intType) :: i, j, k
+
+        ! Determine if we need to correct for K
+        correctForK = getCorrectForK()
+
+        if (correctForK) then
+            do k = 1, ke
+                do j = 1, je
+                    do i = singleHaloStart, ie
+                        pp = p(i, j, k) - twoThird * w(i, j, k, irho) * w(i, j, k, itu1)
+                        aa(i, j, k) = gamma(i, j, k) * pp / w(i, j, k, irho)
+                    end do
+                end do
+            end do
+        else
+            do k = 1, ke
+                do j = 1, je
+                    do i = singleHaloStart, ie
+                        aa(i, j, k) = gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho)
+                    end do
+                end do
+            end do
+        end if
+    end subroutine computeSpeedOfSoundSquared
+
+    subroutine allNodalGradients
+
+        ! ---------------------------------------------
+        !           Compute nodal gradients
+        ! ---------------------------------------------
+        use constants
+        implicit none
+
+        ! Variables for nodal gradients
+        real(kind=realType) :: a2, oVol, uBar, vBar, wBar, sx, sy, sz
+        integer(kind=intType) :: i, j, k
+
+        ! Zero just the required part of the nodal gradients since the
+        ! first value may be useful.
+        ux(nodeStart:, :, :) = zero
+        uy(nodeStart:, :, :) = zero
+        uz(nodeStart:, :, :) = zero
+
+        vx(nodeStart:, :, :) = zero
+        vy(nodeStart:, :, :) = zero
+        vz(nodeStart:, :, :) = zero
+
+        wx(nodeStart:, :, :) = zero
+        wy(nodeStart:, :, :) = zero
+        wz(nodeStart:, :, :) = zero
+
+        qx(nodeStart:, :, :) = zero
+        qy(nodeStart:, :, :) = zero
+        qz(nodeStart:, :, :) = zero
+
+        ! First part. Contribution in the k-direction.
+        ! The contribution is scattered to both the left and right node
+        ! in k-direction.
+
+        do k = 1, ke
+            do j = 1, jl
+                do i = nodeStart, il
+
+                    ! Compute 8 times the average normal for this part of
+                    ! the control volume. The factor 8 is taken care of later
+                    ! on when the division by the volume takes place.
+
+                    sx = sk(i, j, k - 1, 1) + sk(i + 1, j, k - 1, 1) &
+                         + sk(i, j + 1, k - 1, 1) + sk(i + 1, j + 1, k - 1, 1) &
+                         + sk(i, j, k, 1) + sk(i + 1, j, k, 1) &
+                         + sk(i, j + 1, k, 1) + sk(i + 1, j + 1, k, 1)
+                    sy = sk(i, j, k - 1, 2) + sk(i + 1, j, k - 1, 2) &
+                         + sk(i, j + 1, k - 1, 2) + sk(i + 1, j + 1, k - 1, 2) &
+                         + sk(i, j, k, 2) + sk(i + 1, j, k, 2) &
+                         + sk(i, j + 1, k, 2) + sk(i + 1, j + 1, k, 2)
+                    sz = sk(i, j, k - 1, 3) + sk(i + 1, j, k - 1, 3) &
+                         + sk(i, j + 1, k - 1, 3) + sk(i + 1, j + 1, k - 1, 3) &
+                         + sk(i, j, k, 3) + sk(i + 1, j, k, 3) &
+                         + sk(i, j + 1, k, 3) + sk(i + 1, j + 1, k, 3)
+
+                    ! Compute the average velocities and speed of sound squared
+                    ! for this integration point. Node that these variables are
+                    ! stored in w(ivx), w(ivy), w(ivz) and p.
+
+                    ubar = fourth * (w(i, j, k, ivx) + w(i + 1, j, k, ivx) &
+                                     + w(i, j + 1, k, ivx) + w(i + 1, j + 1, k, ivx))
+                    vbar = fourth * (w(i, j, k, ivy) + w(i + 1, j, k, ivy) &
+                                     + w(i, j + 1, k, ivy) + w(i + 1, j + 1, k, ivy))
+                    wbar = fourth * (w(i, j, k, ivz) + w(i + 1, j, k, ivz) &
+                                     + w(i, j + 1, k, ivz) + w(i + 1, j + 1, k, ivz))
+
+                    a2 = fourth * (aa(i, j, k) + aa(i + 1, j, k) + aa(i, j + 1, k) + aa(i + 1, j + 1, k))
+
+                    ! Add the contributions to the surface integral to the node
+                    ! j-1 and substract it from the node j. For the heat flux it
+                    ! is reversed, because the negative of the gradient of the
+                    ! speed of sound must be computed.
+
+                    if (k > 1) then
+                        ux(i, j, k - 1) = ux(i, j, k - 1) + ubar * sx
+                        uy(i, j, k - 1) = uy(i, j, k - 1) + ubar * sy
+                        uz(i, j, k - 1) = uz(i, j, k - 1) + ubar * sz
+
+                        vx(i, j, k - 1) = vx(i, j, k - 1) + vbar * sx
+                        vy(i, j, k - 1) = vy(i, j, k - 1) + vbar * sy
+                        vz(i, j, k - 1) = vz(i, j, k - 1) + vbar * sz
+
+                        wx(i, j, k - 1) = wx(i, j, k - 1) + wbar * sx
+                        wy(i, j, k - 1) = wy(i, j, k - 1) + wbar * sy
+                        wz(i, j, k - 1) = wz(i, j, k - 1) + wbar * sz
+
+                        qx(i, j, k - 1) = qx(i, j, k - 1) - a2 * sx
+                        qy(i, j, k - 1) = qy(i, j, k - 1) - a2 * sy
+                        qz(i, j, k - 1) = qz(i, j, k - 1) - a2 * sz
+                    end if
+
+                    if (k < ke) then
+                        ux(i, j, k) = ux(i, j, k) - ubar * sx
+                        uy(i, j, k) = uy(i, j, k) - ubar * sy
+                        uz(i, j, k) = uz(i, j, k) - ubar * sz
+
+                        vx(i, j, k) = vx(i, j, k) - vbar * sx
+                        vy(i, j, k) = vy(i, j, k) - vbar * sy
+                        vz(i, j, k) = vz(i, j, k) - vbar * sz
+
+                        wx(i, j, k) = wx(i, j, k) - wbar * sx
+                        wy(i, j, k) = wy(i, j, k) - wbar * sy
+                        wz(i, j, k) = wz(i, j, k) - wbar * sz
+
+                        qx(i, j, k) = qx(i, j, k) + a2 * sx
+                        qy(i, j, k) = qy(i, j, k) + a2 * sy
+                        qz(i, j, k) = qz(i, j, k) + a2 * sz
+                    end if
+                end do
+            end do
+        end do
+
+        ! Second part. Contribution in the j-direction.
+        ! The contribution is scattered to both the left and right node
+        ! in j-direction.
+
+        do k = 1, kl
+            do j = 1, je
+                do i = nodeStart, il
+
+                    ! Compute 8 times the average normal for this part of
+                    ! the control volume. The factor 8 is taken care of later
+                    ! on when the division by the volume takes place.
+
+                    sx = sj(i, j - 1, k, 1) + sj(i + 1, j - 1, k, 1) &
+                         + sj(i, j - 1, k + 1, 1) + sj(i + 1, j - 1, k + 1, 1) &
+                         + sj(i, j, k, 1) + sj(i + 1, j, k, 1) &
+                         + sj(i, j, k + 1, 1) + sj(i + 1, j, k + 1, 1)
+                    sy = sj(i, j - 1, k, 2) + sj(i + 1, j - 1, k, 2) &
+                         + sj(i, j - 1, k + 1, 2) + sj(i + 1, j - 1, k + 1, 2) &
+                         + sj(i, j, k, 2) + sj(i + 1, j, k, 2) &
+                         + sj(i, j, k + 1, 2) + sj(i + 1, j, k + 1, 2)
+                    sz = sj(i, j - 1, k, 3) + sj(i + 1, j - 1, k, 3) &
+                         + sj(i, j - 1, k + 1, 3) + sj(i + 1, j - 1, k + 1, 3) &
+                         + sj(i, j, k, 3) + sj(i + 1, j, k, 3) &
+                         + sj(i, j, k + 1, 3) + sj(i + 1, j, k + 1, 3)
+
+                    ! Compute the average velocities and speed of sound squared
+                    ! for this integration point. Node that these variables are
+                    ! stored in w(ivx), w(ivy), w(ivz) and p.
+
+                    ubar = fourth * (w(i, j, k, ivx) + w(i + 1, j, k, ivx) &
+                                     + w(i, j, k + 1, ivx) + w(i + 1, j, k + 1, ivx))
+                    vbar = fourth * (w(i, j, k, ivy) + w(i + 1, j, k, ivy) &
+                                     + w(i, j, k + 1, ivy) + w(i + 1, j, k + 1, ivy))
+                    wbar = fourth * (w(i, j, k, ivz) + w(i + 1, j, k, ivz) &
+                                     + w(i, j, k + 1, ivz) + w(i + 1, j, k + 1, ivz))
+
+                    a2 = fourth * (aa(i, j, k) + aa(i + 1, j, k) + aa(i, j, k + 1) + aa(i + 1, j, k + 1))
+
+                    ! Add the contributions to the surface integral to the node
+                    ! j-1 and substract it from the node j. For the heat flux it
+                    ! is reversed, because the negative of the gradient of the
+                    ! speed of sound must be computed.
+
+                    if (j > 1) then
+                        ux(i, j - 1, k) = ux(i, j - 1, k) + ubar * sx
+                        uy(i, j - 1, k) = uy(i, j - 1, k) + ubar * sy
+                        uz(i, j - 1, k) = uz(i, j - 1, k) + ubar * sz
+
+                        vx(i, j - 1, k) = vx(i, j - 1, k) + vbar * sx
+                        vy(i, j - 1, k) = vy(i, j - 1, k) + vbar * sy
+                        vz(i, j - 1, k) = vz(i, j - 1, k) + vbar * sz
+
+                        wx(i, j - 1, k) = wx(i, j - 1, k) + wbar * sx
+                        wy(i, j - 1, k) = wy(i, j - 1, k) + wbar * sy
+                        wz(i, j - 1, k) = wz(i, j - 1, k) + wbar * sz
+
+                        qx(i, j - 1, k) = qx(i, j - 1, k) - a2 * sx
+                        qy(i, j - 1, k) = qy(i, j - 1, k) - a2 * sy
+                        qz(i, j - 1, k) = qz(i, j - 1, k) - a2 * sz
+                    end if
+
+                    if (j < je) then
+                        ux(i, j, k) = ux(i, j, k) - ubar * sx
+                        uy(i, j, k) = uy(i, j, k) - ubar * sy
+                        uz(i, j, k) = uz(i, j, k) - ubar * sz
+
+                        vx(i, j, k) = vx(i, j, k) - vbar * sx
+                        vy(i, j, k) = vy(i, j, k) - vbar * sy
+                        vz(i, j, k) = vz(i, j, k) - vbar * sz
+
+                        wx(i, j, k) = wx(i, j, k) - wbar * sx
+                        wy(i, j, k) = wy(i, j, k) - wbar * sy
+                        wz(i, j, k) = wz(i, j, k) - wbar * sz
+
+                        qx(i, j, k) = qx(i, j, k) + a2 * sx
+                        qy(i, j, k) = qy(i, j, k) + a2 * sy
+                        qz(i, j, k) = qz(i, j, k) + a2 * sz
+                    end if
+                end do
+            end do
+        end do
+        !
+        ! Third part. Contribution in the i-direction.
+        ! The contribution is scattered to both the left and right node
+        ! in i-direction.
+        !
+        do k = 1, kl
+            do j = 1, jl
+                do i = nodeStart, ie
+
+                    ! Compute 8 times the average normal for this part of
+                    ! the control volume. The factor 8 is taken care of later
+                    ! on when the division by the volume takes place.
+
+                    sx = si(i - 1, j, k, 1) + si(i - 1, j + 1, k, 1) &
+                         + si(i - 1, j, k + 1, 1) + si(i - 1, j + 1, k + 1, 1) &
+                         + si(i, j, k, 1) + si(i, j + 1, k, 1) &
+                         + si(i, j, k + 1, 1) + si(i, j + 1, k + 1, 1)
+                    sy = si(i - 1, j, k, 2) + si(i - 1, j + 1, k, 2) &
+                         + si(i - 1, j, k + 1, 2) + si(i - 1, j + 1, k + 1, 2) &
+                         + si(i, j, k, 2) + si(i, j + 1, k, 2) &
+                         + si(i, j, k + 1, 2) + si(i, j + 1, k + 1, 2)
+                    sz = si(i - 1, j, k, 3) + si(i - 1, j + 1, k, 3) &
+                         + si(i - 1, j, k + 1, 3) + si(i - 1, j + 1, k + 1, 3) &
+                         + si(i, j, k, 3) + si(i, j + 1, k, 3) &
+                         + si(i, j, k + 1, 3) + si(i, j + 1, k + 1, 3)
+
+                    ! Compute the average velocities and speed of sound squared
+                    ! for this integration point. Node that these variables are
+                    ! stored in w(ivx), w(ivy), w(ivz) and p.
+
+                    ubar = fourth * (w(i, j, k, ivx) + w(i, j + 1, k, ivx) &
+                                     + w(i, j, k + 1, ivx) + w(i, j + 1, k + 1, ivx))
+                    vbar = fourth * (w(i, j, k, ivy) + w(i, j + 1, k, ivy) &
+                                     + w(i, j, k + 1, ivy) + w(i, j + 1, k + 1, ivy))
+                    wbar = fourth * (w(i, j, k, ivz) + w(i, j + 1, k, ivz) &
+                                     + w(i, j, k + 1, ivz) + w(i, j + 1, k + 1, ivz))
+
+                    a2 = fourth * (aa(i, j, k) + aa(i, j + 1, k) + aa(i, j, k + 1) + aa(i, j + 1, k + 1))
+
+                    ! Add the contributions to the surface integral to the node
+                    ! j-1 and substract it from the node j. For the heat flux it
+                    ! is reversed, because the negative of the gradient of the
+                    ! speed of sound must be computed.
+
+                    if (i > nodeStart) then
+                        ux(i - 1, j, k) = ux(i - 1, j, k) + ubar * sx
+                        uy(i - 1, j, k) = uy(i - 1, j, k) + ubar * sy
+                        uz(i - 1, j, k) = uz(i - 1, j, k) + ubar * sz
+
+                        vx(i - 1, j, k) = vx(i - 1, j, k) + vbar * sx
+                        vy(i - 1, j, k) = vy(i - 1, j, k) + vbar * sy
+                        vz(i - 1, j, k) = vz(i - 1, j, k) + vbar * sz
+
+                        wx(i - 1, j, k) = wx(i - 1, j, k) + wbar * sx
+                        wy(i - 1, j, k) = wy(i - 1, j, k) + wbar * sy
+                        wz(i - 1, j, k) = wz(i - 1, j, k) + wbar * sz
+
+                        qx(i - 1, j, k) = qx(i - 1, j, k) - a2 * sx
+                        qy(i - 1, j, k) = qy(i - 1, j, k) - a2 * sy
+                        qz(i - 1, j, k) = qz(i - 1, j, k) - a2 * sz
+                    end if
+
+                    if (i < ie) then
+                        ux(i, j, k) = ux(i, j, k) - ubar * sx
+                        uy(i, j, k) = uy(i, j, k) - ubar * sy
+                        uz(i, j, k) = uz(i, j, k) - ubar * sz
+
+                        vx(i, j, k) = vx(i, j, k) - vbar * sx
+                        vy(i, j, k) = vy(i, j, k) - vbar * sy
+                        vz(i, j, k) = vz(i, j, k) - vbar * sz
+
+                        wx(i, j, k) = wx(i, j, k) - wbar * sx
+                        wy(i, j, k) = wy(i, j, k) - wbar * sy
+                        wz(i, j, k) = wz(i, j, k) - wbar * sz
+
+                        qx(i, j, k) = qx(i, j, k) + a2 * sx
+                        qy(i, j, k) = qy(i, j, k) + a2 * sy
+                        qz(i, j, k) = qz(i, j, k) + a2 * sz
+                    end if
+                end do
+            end do
+        end do
 
-          end do
-       end do
-    end do
+        ! Divide by 8 times the volume to obtain the correct gradients.
 
-  end subroutine inviscidDissFluxMatrixApprox
+        do k = 1, kl
+            do j = 1, jl
+                do i = nodeStart, il
 
+                    ! Compute the inverse of 8 times the volume for this node.
 
-  subroutine computeSpeedOfSoundSquared
-    ! ---------------------------------------------
-    !           Compute the speed of sound squared
-    ! ---------------------------------------------
-    use constants
-    use utils, only : getCorrectForK
-    implicit none
-
-    ! Variables for speed of sound
-    logical :: correctForK
-    real(kind=realType) :: pp
-    real(kind=realType), parameter :: twoThird = two*third
-    integer(kind=intType) :: i, j, k
-
-    ! Determine if we need to correct for K
-    correctForK = getCorrectForK()
-
-    if (correctForK) then
-       do k=1,ke
-          do j=1,je
-             do i=singleHaloStart,ie
-                pp = p(i,j,k) - twoThird*w(i,j,k,irho)*w(i,j,k,itu1)
-                aa(i,j,k) = gamma(i,j,k)*pp/w(i,j,k,irho)
-             enddo
-          enddo
-       enddo
-    else
-       do k=1,ke
-          do j=1,je
-             do i=singleHaloStart,ie
-                aa(i,j,k) = gamma(i,j,k)*p(i,j,k)/w(i,j,k,irho)
-             enddo
-          enddo
-       enddo
-    end if
-  end subroutine computeSpeedOfSoundSquared
-
-  subroutine allNodalGradients
-
-    ! ---------------------------------------------
-    !           Compute nodal gradients
-    ! ---------------------------------------------
-    use constants
-    implicit none
-
-    ! Variables for nodal gradients
-    real(kind=realType) :: a2, oVol, uBar, vBar, wBar, sx, sy, sz
-    integer(kind=intType) :: i, j, k
-
-    ! Zero just the required part of the nodal gradients since the
-    ! first value may be useful.
-    ux(nodeStart:, :, :) = zero
-    uy(nodeStart:, :, :) = zero
-    uz(nodeStart:, :, :) = zero
-
-    vx(nodeStart:, :, :) = zero
-    vy(nodeStart:, :, :) = zero
-    vz(nodeStart:, :, :) = zero
-
-    wx(nodeStart:, :, :) = zero
-    wy(nodeStart:, :, :) = zero
-    wz(nodeStart:, :, :) = zero
-
-    qx(nodeStart:, :, :) = zero
-    qy(nodeStart:, :, :) = zero
-    qz(nodeStart:, :, :) = zero
-
-    ! First part. Contribution in the k-direction.
-    ! The contribution is scattered to both the left and right node
-    ! in k-direction.
-
-    do k=1, ke
-       do j=1, jl
-          do i=nodeStart, il
-
-             ! Compute 8 times the average normal for this part of
-             ! the control volume. The factor 8 is taken care of later
-             ! on when the division by the volume takes place.
-
-             sx = sk(i,j,k-1,  1) + sk(i+1,j,k-1,  1) &
-                  + sk(i,j+1,k-1,1) + sk(i+1,j+1,k-1,1) &
-                  + sk(i,j,  k,  1) + sk(i+1,j,  k,  1) &
-                  + sk(i,j+1,k  ,1) + sk(i+1,j+1,k  ,1)
-             sy = sk(i,j,k-1,  2) + sk(i+1,j,k-1,  2) &
-                  + sk(i,j+1,k-1,2) + sk(i+1,j+1,k-1,2) &
-                  + sk(i,j,  k,  2) + sk(i+1,j,  k,  2) &
-                  + sk(i,j+1,k  ,2) + sk(i+1,j+1,k  ,2)
-             sz = sk(i,j,k-1,  3) + sk(i+1,j,k-1,  3) &
-                  + sk(i,j+1,k-1,3) + sk(i+1,j+1,k-1,3) &
-                  + sk(i,j,  k,  3) + sk(i+1,j,  k,  3) &
-                  + sk(i,j+1,k  ,3) + sk(i+1,j+1,k  ,3)
-
-             ! Compute the average velocities and speed of sound squared
-             ! for this integration point. Node that these variables are
-             ! stored in w(ivx), w(ivy), w(ivz) and p.
-
-             ubar = fourth*(w(i,j,  k,ivx) + w(i+1,j,  k,ivx) &
-                  +         w(i,j+1,k,ivx) + w(i+1,j+1,k,ivx))
-             vbar = fourth*(w(i,j,  k,ivy) + w(i+1,j,  k,ivy) &
-                  +         w(i,j+1,k,ivy) + w(i+1,j+1,k,ivy))
-             wbar = fourth*(w(i,j,  k,ivz) + w(i+1,j,  k,ivz) &
-                  +         w(i,j+1,k,ivz) + w(i+1,j+1,k,ivz))
-
-             a2 = fourth*(aa(i,j,k) + aa(i+1,j,k) + aa(i,j+1,k) + aa(i+1,j+1,k))
-
-             ! Add the contributions to the surface integral to the node
-             ! j-1 and substract it from the node j. For the heat flux it
-             ! is reversed, because the negative of the gradient of the
-             ! speed of sound must be computed.
-
-             if(k > 1) then
-                ux(i,j,k-1) = ux(i,j,k-1) + ubar*sx
-                uy(i,j,k-1) = uy(i,j,k-1) + ubar*sy
-                uz(i,j,k-1) = uz(i,j,k-1) + ubar*sz
-
-                vx(i,j,k-1) = vx(i,j,k-1) + vbar*sx
-                vy(i,j,k-1) = vy(i,j,k-1) + vbar*sy
-                vz(i,j,k-1) = vz(i,j,k-1) + vbar*sz
-
-                wx(i,j,k-1) = wx(i,j,k-1) + wbar*sx
-                wy(i,j,k-1) = wy(i,j,k-1) + wbar*sy
-                wz(i,j,k-1) = wz(i,j,k-1) + wbar*sz
-
-                qx(i,j,k-1) = qx(i,j,k-1) - a2*sx
-                qy(i,j,k-1) = qy(i,j,k-1) - a2*sy
-                qz(i,j,k-1) = qz(i,j,k-1) - a2*sz
-             endif
-
-             if(k < ke) then
-                ux(i,j,k) = ux(i,j,k) - ubar*sx
-                uy(i,j,k) = uy(i,j,k) - ubar*sy
-                uz(i,j,k) = uz(i,j,k) - ubar*sz
-
-                vx(i,j,k) = vx(i,j,k) - vbar*sx
-                vy(i,j,k) = vy(i,j,k) - vbar*sy
-                vz(i,j,k) = vz(i,j,k) - vbar*sz
-
-                wx(i,j,k) = wx(i,j,k) - wbar*sx
-                wy(i,j,k) = wy(i,j,k) - wbar*sy
-                wz(i,j,k) = wz(i,j,k) - wbar*sz
-
-                qx(i,j,k) = qx(i,j,k) + a2*sx
-                qy(i,j,k) = qy(i,j,k) + a2*sy
-                qz(i,j,k) = qz(i,j,k) + a2*sz
-             endif
-          end do
-       enddo
-    enddo
-
-
-    ! Second part. Contribution in the j-direction.
-    ! The contribution is scattered to both the left and right node
-    ! in j-direction.
-
-    do k=1, kl
-       do j=1, je
-          do i=nodeStart, il
-
-             ! Compute 8 times the average normal for this part of
-             ! the control volume. The factor 8 is taken care of later
-             ! on when the division by the volume takes place.
-
-             sx = sj(i,j-1,k,  1) + sj(i+1,j-1,k,  1) &
-                  + sj(i,j-1,k+1,1) + sj(i+1,j-1,k+1,1) &
-                  + sj(i,j,  k,  1) + sj(i+1,j,  k,  1) &
-                  + sj(i,j,  k+1,1) + sj(i+1,j,  k+1,1)
-             sy = sj(i,j-1,k,  2) + sj(i+1,j-1,k,  2) &
-                  + sj(i,j-1,k+1,2) + sj(i+1,j-1,k+1,2) &
-                  + sj(i,j,  k,  2) + sj(i+1,j,  k,  2) &
-                  + sj(i,j,  k+1,2) + sj(i+1,j,  k+1,2)
-             sz = sj(i,j-1,k,  3) + sj(i+1,j-1,k,  3) &
-                  + sj(i,j-1,k+1,3) + sj(i+1,j-1,k+1,3) &
-                  + sj(i,j,  k,  3) + sj(i+1,j,  k,  3) &
-                  + sj(i,j,  k+1,3) + sj(i+1,j,  k+1,3)
-
-             ! Compute the average velocities and speed of sound squared
-             ! for this integration point. Node that these variables are
-             ! stored in w(ivx), w(ivy), w(ivz) and p.
-
-             ubar = fourth*(w(i,j,k,  ivx) + w(i+1,j,k,  ivx) &
-                  +         w(i,j,k+1,ivx) + w(i+1,j,k+1,ivx))
-             vbar = fourth*(w(i,j,k,  ivy) + w(i+1,j,k,  ivy) &
-                  +         w(i,j,k+1,ivy) + w(i+1,j,k+1,ivy))
-             wbar = fourth*(w(i,j,k,  ivz) + w(i+1,j,k,  ivz) &
-                  +         w(i,j,k+1,ivz) + w(i+1,j,k+1,ivz))
-
-             a2 = fourth*(aa(i,j,k) + aa(i+1,j,k) + aa(i,j,k+1) + aa(i+1,j,k+1))
-
-             ! Add the contributions to the surface integral to the node
-             ! j-1 and substract it from the node j. For the heat flux it
-             ! is reversed, because the negative of the gradient of the
-             ! speed of sound must be computed.
-
-             if(j > 1) then
-                ux(i,j-1,k) = ux(i,j-1,k) + ubar*sx
-                uy(i,j-1,k) = uy(i,j-1,k) + ubar*sy
-                uz(i,j-1,k) = uz(i,j-1,k) + ubar*sz
-
-                vx(i,j-1,k) = vx(i,j-1,k) + vbar*sx
-                vy(i,j-1,k) = vy(i,j-1,k) + vbar*sy
-                vz(i,j-1,k) = vz(i,j-1,k) + vbar*sz
-
-                wx(i,j-1,k) = wx(i,j-1,k) + wbar*sx
-                wy(i,j-1,k) = wy(i,j-1,k) + wbar*sy
-                wz(i,j-1,k) = wz(i,j-1,k) + wbar*sz
-
-                qx(i,j-1,k) = qx(i,j-1,k) - a2*sx
-                qy(i,j-1,k) = qy(i,j-1,k) - a2*sy
-                qz(i,j-1,k) = qz(i,j-1,k) - a2*sz
-             endif
-
-             if(j < je) then
-                ux(i,j,k) = ux(i,j,k) - ubar*sx
-                uy(i,j,k) = uy(i,j,k) - ubar*sy
-                uz(i,j,k) = uz(i,j,k) - ubar*sz
-
-                vx(i,j,k) = vx(i,j,k) - vbar*sx
-                vy(i,j,k) = vy(i,j,k) - vbar*sy
-                vz(i,j,k) = vz(i,j,k) - vbar*sz
-
-                wx(i,j,k) = wx(i,j,k) - wbar*sx
-                wy(i,j,k) = wy(i,j,k) - wbar*sy
-                wz(i,j,k) = wz(i,j,k) - wbar*sz
-
-                qx(i,j,k) = qx(i,j,k) + a2*sx
-                qy(i,j,k) = qy(i,j,k) + a2*sy
-                qz(i,j,k) = qz(i,j,k) + a2*sz
-             endif
-          end do
-       enddo
-    enddo
-    !
-    ! Third part. Contribution in the i-direction.
-    ! The contribution is scattered to both the left and right node
-    ! in i-direction.
-    !
-    do k=1,kl
-       do j=1,jl
-          do i=nodeStart,ie
-
-             ! Compute 8 times the average normal for this part of
-             ! the control volume. The factor 8 is taken care of later
-             ! on when the division by the volume takes place.
-
-             sx = si(i-1,j,k,  1) + si(i-1,j+1,k,  1) &
-                  + si(i-1,j,k+1,1) + si(i-1,j+1,k+1,1) &
-                  + si(i,  j,k,  1) + si(i,  j+1,k,  1) &
-                  + si(i,  j,k+1,1) + si(i,  j+1,k+1,1)
-             sy = si(i-1,j,k,  2) + si(i-1,j+1,k,  2) &
-                  + si(i-1,j,k+1,2) + si(i-1,j+1,k+1,2) &
-                  + si(i,  j,k,  2) + si(i,  j+1,k,  2) &
-                  + si(i,  j,k+1,2) + si(i,  j+1,k+1,2)
-             sz = si(i-1,j,k,  3) + si(i-1,j+1,k,  3) &
-                  + si(i-1,j,k+1,3) + si(i-1,j+1,k+1,3) &
-                  + si(i,  j,k,  3) + si(i,  j+1,k,  3) &
-                  + si(i,  j,k+1,3) + si(i,  j+1,k+1,3)
-
-             ! Compute the average velocities and speed of sound squared
-             ! for this integration point. Node that these variables are
-             ! stored in w(ivx), w(ivy), w(ivz) and p.
-
-             ubar = fourth*(w(i,j,k,  ivx) + w(i,j+1,k,  ivx) &
-                  +         w(i,j,k+1,ivx) + w(i,j+1,k+1,ivx))
-             vbar = fourth*(w(i,j,k,  ivy) + w(i,j+1,k,  ivy) &
-                  +         w(i,j,k+1,ivy) + w(i,j+1,k+1,ivy))
-             wbar = fourth*(w(i,j,k,  ivz) + w(i,j+1,k,  ivz) &
-                  +         w(i,j,k+1,ivz) + w(i,j+1,k+1,ivz))
-
-             a2 = fourth*(aa(i,j,k) + aa(i,j+1,k) + aa(i,j,k+1) + aa(i,j+1,k+1))
-
-             ! Add the contributions to the surface integral to the node
-             ! j-1 and substract it from the node j. For the heat flux it
-             ! is reversed, because the negative of the gradient of the
-             ! speed of sound must be computed.
-
-             if(i > nodeStart) then
-                ux(i-1,j,k) = ux(i-1,j,k) + ubar*sx
-                uy(i-1,j,k) = uy(i-1,j,k) + ubar*sy
-                uz(i-1,j,k) = uz(i-1,j,k) + ubar*sz
-
-                vx(i-1,j,k) = vx(i-1,j,k) + vbar*sx
-                vy(i-1,j,k) = vy(i-1,j,k) + vbar*sy
-                vz(i-1,j,k) = vz(i-1,j,k) + vbar*sz
-
-                wx(i-1,j,k) = wx(i-1,j,k) + wbar*sx
-                wy(i-1,j,k) = wy(i-1,j,k) + wbar*sy
-                wz(i-1,j,k) = wz(i-1,j,k) + wbar*sz
-
-                qx(i-1,j,k) = qx(i-1,j,k) - a2*sx
-                qy(i-1,j,k) = qy(i-1,j,k) - a2*sy
-                qz(i-1,j,k) = qz(i-1,j,k) - a2*sz
-             endif
-
-             if(i < ie) then
-                ux(i,j,k) = ux(i,j,k) - ubar*sx
-                uy(i,j,k) = uy(i,j,k) - ubar*sy
-                uz(i,j,k) = uz(i,j,k) - ubar*sz
-
-                vx(i,j,k) = vx(i,j,k) - vbar*sx
-                vy(i,j,k) = vy(i,j,k) - vbar*sy
-                vz(i,j,k) = vz(i,j,k) - vbar*sz
-
-                wx(i,j,k) = wx(i,j,k) - wbar*sx
-                wy(i,j,k) = wy(i,j,k) - wbar*sy
-                wz(i,j,k) = wz(i,j,k) - wbar*sz
-
-                qx(i,j,k) = qx(i,j,k) + a2*sx
-                qy(i,j,k) = qy(i,j,k) + a2*sy
-                qz(i,j,k) = qz(i,j,k) + a2*sz
-             endif
-          enddo
-       enddo
-    enddo
-
-    ! Divide by 8 times the volume to obtain the correct gradients.
-
-    do k=1,kl
-       do j=1,jl
-          do i=nodeStart,il
-
-             ! Compute the inverse of 8 times the volume for this node.
-
-             oVol = one/(vol(i,  j,  k) + vol(i,  j,  k+1) &
-                  +      vol(i+1,j,  k) + vol(i+1,j,  k+1) &
-                  +      vol(i,  j+1,k) + vol(i,  j+1,k+1) &
-                  +      vol(i+1,j+1,k) + vol(i+1,j+1,k+1))
-
-             ! Compute the correct velocity gradients and "unit" heat
-             ! fluxes. The velocity gradients are stored in ux, etc.
-
-             ux(i,j,k) = ux(i,j,k)*oVol
-             uy(i,j,k) = uy(i,j,k)*oVol
-             uz(i,j,k) = uz(i,j,k)*oVol
-
-             vx(i,j,k) = vx(i,j,k)*oVol
-             vy(i,j,k) = vy(i,j,k)*oVol
-             vz(i,j,k) = vz(i,j,k)*oVol
-
-             wx(i,j,k) = wx(i,j,k)*oVol
-             wy(i,j,k) = wy(i,j,k)*oVol
-             wz(i,j,k) = wz(i,j,k)*oVol
-
-             qx(i,j,k) = qx(i,j,k)*oVol
-             qy(i,j,k) = qy(i,j,k)*oVol
-             qz(i,j,k) = qz(i,j,k)*oVol
-          end do
-       enddo
-    enddo
-  end subroutine allNodalGradients
-
-  subroutine viscousFlux(storeWallTensor)
-    ! ---------------------------------------------
-    !                   Viscous Flux
-    ! ---------------------------------------------
+                    oVol = one / (vol(i, j, k) + vol(i, j, k + 1) &
+                                  + vol(i + 1, j, k) + vol(i + 1, j, k + 1) &
+                                  + vol(i, j + 1, k) + vol(i, j + 1, k + 1) &
+                                  + vol(i + 1, j + 1, k) + vol(i + 1, j + 1, k + 1))
 
-    use constants
-    use inputPhysics, only : useQCR, prandtl, prandtlturb
-    use flowvarRefState, only : eddyModel
-    use iteration, only : rFil
-    use blockPointers, only : bil => il, bjl=>jl, bkl=>kl, &
-         viscIminPointer, viscImaxPointer, viscSubFace, &
-         viscJminPointer, viscJmaxPointer, &
-         viscKminPointer, viscKmaxPointer
-    implicit none
-
-    ! Input
-    logical, intent(in), optional :: storeWallTensor
-
-    ! Variables for viscous flux
-    real(kind=realType) :: rFilv, por, mul, mue, mut, heatCoef
-    real(kind=realType) :: gm1, factLamHeat, factTurbHeat
-    real(kind=realType) :: u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z
-    real(kind=realType) :: q_x, q_y, q_z
-    real(kind=realType) :: corr, ssx, ssy, ssz, fracDiv, snrm
-    real(kind=realType) :: tauxx, tauyy, tauzz
-    real(kind=realType) :: tauxy, tauxz, tauyz
-    real(kind=realType) :: tauxxS, tauyyS, tauzzS
-    real(kind=realType) :: tauxyS, tauxzS, tauyzS
-    real(kind=realType) :: ubar, vbar, wbar
-    real(kind=realType) :: exx, eyy, ezz
-    real(kind=realType) :: exy, exz, eyz
-    real(kind=realType) :: Wxx, Wyy, Wzz
-    real(kind=realType) :: Wxy, Wxz, Wyz, Wyx, Wzx, Wzy
-    real(kind=realType) :: den, Ccr1
-    real(kind=realType) :: fmx, fmy, fmz, frhoE, fact
-    integer(kind=intType) :: i, j, k, io, jo, ko
-    real(kind=realType), parameter :: xminn = 1.e-10_realType
-    real(kind=realType), parameter :: twoThird = two*third
-    real(kind=realType), dimension(9, 2:max(il,jl), 2: max(jl,kl), 2) :: tmpStore
-
-    logical :: storeWall
-
-    storeWall = .False.
-    if (present(storeWallTensor)) then
-       storeWall = storeWallTensor
-    end if
-
-    ! Set QCR parameters
-    Ccr1 = 0.3_realType
-    rFilv = rFil
-
-    ! The diagonals of the vorticity tensor components are always zero
-    Wxx = zero
-    Wyy = zero
-    Wzz = zero
-    !
-    !         viscous fluxes in the k-direction.
-    !
-    mue = zero
-    do k=1,kl
-       do j=2,jl
-          do i=2,il
-
-
-             ! Set the value of the porosity. If not zero, it is set
-             ! to average the eddy-viscosity and to take the factor
-             ! rFilv into account.
-
-             por = half*rFilv
-             if(porK(i,j,k) == noFlux) por = zero
-
-             ! Compute the laminar and (if present) the eddy viscosities
-             ! multiplied by the porosity. Compute the factor in front of
-             ! the gradients of the speed of sound squared for the heat
-             ! flux.
-
-             mul = por*(rlv(i,j,k) + rlv(i,j,k+1))
-             mue = por*(rev(i,j,k) + rev(i,j,k+1))
-             mut = mul + mue
-
-             gm1          = half*(gamma(i,j,k) + gamma(i,j,k+1)) - one
-             factLamHeat  = one/(prandtl*gm1)
-             factTurbHeat = one/(prandtlTurb*gm1)
-
-             heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-             ! Compute the gradients at the face by averaging the four
-             ! nodal values.
-
-             u_x = fourth*(ux(i-1,j-1,k) + ux(i,j-1,k) &
-                  +         ux(i-1,j,  k) + ux(i,j,  k))
-             u_y = fourth*(uy(i-1,j-1,k) + uy(i,j-1,k) &
-                  +         uy(i-1,j,  k) + uy(i,j,  k))
-             u_z = fourth*(uz(i-1,j-1,k) + uz(i,j-1,k) &
-                  +         uz(i-1,j,  k) + uz(i,j,  k))
-
-             v_x = fourth*(vx(i-1,j-1,k) + vx(i,j-1,k) &
-                  +         vx(i-1,j,  k) + vx(i,j,  k))
-             v_y = fourth*(vy(i-1,j-1,k) + vy(i,j-1,k) &
-                  +         vy(i-1,j,  k) + vy(i,j,  k))
-             v_z = fourth*(vz(i-1,j-1,k) + vz(i,j-1,k) &
-                  +         vz(i-1,j,  k) + vz(i,j,  k))
-
-             w_x = fourth*(wx(i-1,j-1,k) + wx(i,j-1,k) &
-                  +         wx(i-1,j,  k) + wx(i,j,  k))
-             w_y = fourth*(wy(i-1,j-1,k) + wy(i,j-1,k) &
-                  +         wy(i-1,j,  k) + wy(i,j,  k))
-             w_z = fourth*(wz(i-1,j-1,k) + wz(i,j-1,k) &
-                  +         wz(i-1,j,  k) + wz(i,j,  k))
-
-             q_x = fourth*(qx(i-1,j-1,k) + qx(i,j-1,k) &
-                  +         qx(i-1,j,  k) + qx(i,j,  k))
-             q_y = fourth*(qy(i-1,j-1,k) + qy(i,j-1,k) &
-                  +         qy(i-1,j,  k) + qy(i,j,  k))
-             q_z = fourth*(qz(i-1,j-1,k) + qz(i,j-1,k) &
-                  +         qz(i-1,j,  k) + qz(i,j,  k))
-
-
-             ! The gradients in the normal direction are corrected, such
-             ! that no averaging takes places here.
-             ! First determine the vector in the direction from the
-             ! cell center k to cell center k+1.
-
-             ssx = eighth*(x(i-1,j-1,k+1,1) - x(i-1,j-1,k-1,1) &
-                  +         x(i-1,j,  k+1,1) - x(i-1,j,  k-1,1) &
-                  +         x(i,  j-1,k+1,1) - x(i,  j-1,k-1,1) &
-                  +         x(i,  j,  k+1,1) - x(i,  j,  k-1,1))
-             ssy = eighth*(x(i-1,j-1,k+1,2) - x(i-1,j-1,k-1,2) &
-                  +         x(i-1,j,  k+1,2) - x(i-1,j,  k-1,2) &
-                  +         x(i,  j-1,k+1,2) - x(i,  j-1,k-1,2) &
-                  +         x(i,  j,  k+1,2) - x(i,  j,  k-1,2))
-             ssz = eighth*(x(i-1,j-1,k+1,3) - x(i-1,j-1,k-1,3) &
-                  +         x(i-1,j,  k+1,3) - x(i-1,j,  k-1,3) &
-                  +         x(i,  j-1,k+1,3) - x(i,  j-1,k-1,3) &
-                  +         x(i,  j,  k+1,3) - x(i,  j,  k-1,3))
-
-             ! Determine the length of this vector and create the
-             ! unit normal.
-
-             snrm  = one/sqrt(ssx*ssx + ssy*ssy + ssz*ssz)
-             ssx = snrm*ssx
-             ssy = snrm*ssy
-             ssz = snrm*ssz
-
-             ! Correct the gradients.
-
-             corr = u_x*ssx + u_y*ssy + u_z*ssz        &
-                  - (w(i,j,k+1,ivx) - w(i,j,k,ivx))*snrm
-             u_x  = u_x - corr*ssx
-             u_y  = u_y - corr*ssy
-             u_z  = u_z - corr*ssz
-
-             corr = v_x*ssx + v_y*ssy + v_z*ssz        &
-                  - (w(i,j,k+1,ivy) - w(i,j,k,ivy))*snrm
-             v_x  = v_x - corr*ssx
-             v_y  = v_y - corr*ssy
-             v_z  = v_z - corr*ssz
-
-             corr = w_x*ssx + w_y*ssy + w_z*ssz        &
-                  - (w(i,j,k+1,ivz) - w(i,j,k,ivz))*snrm
-             w_x  = w_x - corr*ssx
-             w_y  = w_y - corr*ssy
-             w_z  = w_z - corr*ssz
-
-             corr = q_x*ssx + q_y*ssy + q_z*ssz &
-                  + (aa(i,j,k+1) - aa(i,j,k))*snrm
-             q_x  = q_x - corr*ssx
-             q_y  = q_y - corr*ssy
-             q_z  = q_z - corr*ssz
-
-             ! Compute the stress tensor and the heat flux vector.
-             ! We remove the viscosity from the stress tensor (tau)
-             ! to define tauS since we still need to separate between
-             ! laminar and turbulent stress for QCR.
-             ! Therefore, laminar tau = mue*tauS, turbulent
-             ! tau = mue*tauS, and total tau = mut*tauS.
-
-             fracDiv = twoThird*(u_x + v_y + w_z)
-
-             tauxxS = two*u_x - fracDiv
-             tauyyS = two*v_y - fracDiv
-             tauzzS = two*w_z - fracDiv
-
-             tauxyS = u_y + v_x
-             tauxzS = u_z + w_x
-             tauyzS = v_z + w_y
-
-             q_x = heatCoef*q_x
-             q_y = heatCoef*q_y
-             q_z = heatCoef*q_z
-
-             ! Add QCR corrections if necessary
-             if (useQCR) then
-
-                ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
-                !
-                ! tau_ij,QCR = tau_ij - e_ij
-                !
-                ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
-                !
-                ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
-                !
-                ! We are computing O_ik as follows:
-                !
-                ! O_ik = 2*W_ik/den
-                !
-                ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
-
-                ! Compute denominator
-                den = sqrt(u_x*u_x + u_y*u_y + u_z*u_z + &
-                     v_x*v_x + v_y*v_y + v_z*v_z + &
-                     w_x*w_x + w_y*w_y + w_z*w_z)
-
-                ! Denominator should be limited to avoid division by zero in regions with
-                ! no gradients
-                den = max(den, xminn)
-
-                ! Compute factor that will multiply all tensor components.
-                ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
-                ! components as well.
-                fact = mue*Ccr1/den
-
-                ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
-                ! The diagonals of the vorticity tensor components are always zero
-                Wxy = u_y - v_x
-                Wxz = u_z - w_x
-                Wyz = v_z - w_y
-                Wyx = -Wxy
-                Wzx = -Wxz
-                Wzy = -Wyz
-
-                ! Compute the extra terms of the Boussinesq relation
-                exx = fact*(Wxy*tauxyS + Wxz*tauxzS)*two
-                eyy = fact*(Wyx*tauxyS + Wyz*tauyzS)*two
-                ezz = fact*(Wzx*tauxzS + Wzy*tauyzS)*two
-
-                exy = fact*(Wxy*tauyyS + Wxz*tauyzS + &
-                     Wyx*tauxxS + Wyz*tauxzS)
-                exz = fact*(Wxy*tauyzS + Wxz*tauzzS + &
-                     Wzx*tauxxS + Wzy*tauxyS)
-                eyz = fact*(Wyx*tauxzS + Wyz*tauzzS + &
-                     Wzx*tauxyS + Wzy*tauyyS)
-
-                ! Apply the total viscosity to the stress tensor and add extra terms
-                tauxx = mut*tauxxS - exx
-                tauyy = mut*tauyyS - eyy
-                tauzz = mut*tauzzS - ezz
-                tauxy = mut*tauxyS - exy
-                tauxz = mut*tauxzS - exz
-                tauyz = mut*tauyzS - eyz
-
-             else
-
-                ! Just apply the total viscosity to the stress tensor
-                tauxx = mut*tauxxS
-                tauyy = mut*tauyyS
-                tauzz = mut*tauzzS
-                tauxy = mut*tauxyS
-                tauxz = mut*tauxzS
-                tauyz = mut*tauyzS
-
-             end if
-
-             ! Compute the average velocities for the face. Remember that
-             ! the velocities are stored and not the momentum.
-
-             ubar = half*(w(i,j,k,ivx) + w(i,j,k+1,ivx))
-             vbar = half*(w(i,j,k,ivy) + w(i,j,k+1,ivy))
-             wbar = half*(w(i,j,k,ivz) + w(i,j,k+1,ivz))
-
-             ! Compute the viscous fluxes for this k-face.
-
-             fmx   = tauxx*sk(i,j,k,1) + tauxy*sk(i,j,k,2) &
-                  + tauxz*sk(i,j,k,3)
-             fmy   = tauxy*sk(i,j,k,1) + tauyy*sk(i,j,k,2) &
-                  + tauyz*sk(i,j,k,3)
-             fmz   = tauxz*sk(i,j,k,1) + tauyz*sk(i,j,k,2) &
-                  + tauzz*sk(i,j,k,3)
-             frhoE =         (ubar*tauxx + vbar*tauxy + wbar*tauxz)*sk(i,j,k,1)
-             frhoE = frhoE + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*sk(i,j,k,2)
-             frhoE = frhoE + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*sk(i,j,k,3)
-             frhoE = frhoE -  q_x*sk(i,j,k,1) - q_y*sk(i,j,k,2) - q_z*sk(i,j,k,3)
-
-             ! Update the residuals of cell k and k+1.
-
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-             fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-             fw(i,j,k+1,imx)   = fw(i,j,k+1,imx)   + fmx
-             fw(i,j,k+1,imy)   = fw(i,j,k+1,imy)   + fmy
-             fw(i,j,k+1,imz)   = fw(i,j,k+1,imz)   + fmz
-             fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + frhoE
-
-             ! Temporarily store the shear stress and heat flux, even
-             ! if we won't need it. This can still vectorize
-
-             if (k == 1) then
-                tmpStore(1, i, j, 1) = tauxx
-                tmpStore(2, i, j, 1) = tauyy
-                tmpStore(3, i, j, 1) = tauzz
-                tmpStore(4, i, j, 1) = tauxy
-                tmpStore(5, i, j, 1) = tauxz
-                tmpStore(6, i, j, 1) = tauyz
-
-                tmpStore(7, i, j, 1) = q_x
-                tmpStore(8, i, j, 1) = q_y
-                tmpStore(9, i, j, 1) = q_z
-             end if
-
-             if (k == kl) then
-                tmpStore(1, i, j, 2) = tauxx
-                tmpStore(2, i, j, 2) = tauyy
-                tmpStore(3, i, j, 2) = tauzz
-                tmpStore(4, i, j, 2) = tauxy
-                tmpStore(5, i, j, 2) = tauxz
-                tmpStore(6, i, j, 2) = tauyz
-
-                tmpStore(7, i, j, 2) = q_x
-                tmpStore(8, i, j, 2) = q_y
-                tmpStore(9, i, j, 2) = q_z
-             end if
-
-          end do
-       enddo
-    end do
-
-    ! Save into the subface if necessary
-    if (storeWall) then
-
-       origKMin: if (kk-1 == 1) then
-          do j=2, jl
-             do i=2, il
-                io = i + ii - 2
-                jo = j + jj - 2
-
-                if (viscKminPointer(io, jo) > 0) then
-                   viscSubface(viscKminPointer(io, jo))%tau(io, jo, :) = tmpStore(1:6, i, j, 1)
-                   viscSubface(viscKminPointer(io, jo))%q(io, jo, :) = tmpStore(7:9, i, j, 1)
-                endif
-             end do
-          end do
-       end if origKMin
-
-       origKMax: if (kk + nz - 1 == bkl) then
-          do j=2, jl
-             do i=2, il
-                io = i + ii - 2
-                jo = j + jj - 2
-                if(viscKmaxPointer(io, jo) > 0) then
-                   viscSubface(viscKmaxPointer(io, jo))%tau(io, jo, :) = tmpStore(1:6, i, j, 2)
-                   viscSubface(viscKmaxPointer(io, jo))%q(io, jo, :) = tmpStore(7:9, i, j, 2)
-                endif
-             end do
-          end do
-       end if origKMax
-    end if
-    !
-    !         Viscous fluxes in the j-direction.
-    !
-    do k=2,kl
-       do j=1,jl
-          do i=2,il
-
-
-             ! Set the value of the porosity. If not zero, it is set
-             ! to average the eddy-viscosity and to take the factor
-             ! rFilv into account.
-
-             por = half*rFilv
-             if(porJ(i,j,k) == noFlux) por = zero
-
-             ! Compute the laminar and (if present) the eddy viscosities
-             ! multiplied by the porosity. Compute the factor in front of
-             ! the gradients of the speed of sound squared for the heat
-             ! flux.
-
-             mul = por*(rlv(i,j,k) + rlv(i,j+1,k))
-             mue = por*(rev(i,j,k) + rev(i,j+1,k))
-             mut = mul + mue
-
-             gm1          = half*(gamma(i,j,k) + gamma(i,j+1,k)) - one
-             factLamHeat  = one/(prandtl*gm1)
-             factTurbHeat = one/(prandtlTurb*gm1)
-
-             heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-             ! Compute the gradients at the face by averaging the four
-             ! nodal values.
-
-             u_x = fourth*(ux(i-1,j,k-1) + ux(i,j,k-1) &
-                  +        ux(i-1,j,k  ) + ux(i,j,k  ))
-             u_y = fourth*(uy(i-1,j,k-1) + uy(i,j,k-1) &
-                  +        uy(i-1,j,k  ) + uy(i,j,k  ))
-             u_z = fourth*(uz(i-1,j,k-1) + uz(i,j,k-1) &
-                  +        uz(i-1,j,k  ) + uz(i,j,k  ))
-
-             v_x = fourth*(vx(i-1,j,k-1) + vx(i,j,k-1) &
-                  +        vx(i-1,j,k  ) + vx(i,j,k  ))
-             v_y = fourth*(vy(i-1,j,k-1) + vy(i,j,k-1) &
-                  +        vy(i-1,j,k  ) + vy(i,j,k  ))
-             v_z = fourth*(vz(i-1,j,k-1) + vz(i,j,k-1) &
-                  +        vz(i-1,j,k  ) + vz(i,j,k  ))
-
-             w_x = fourth*(wx(i-1,j,k-1) + wx(i,j,k-1) &
-                  +        wx(i-1,j,k  ) + wx(i,j,k  ))
-             w_y = fourth*(wy(i-1,j,k-1) + wy(i,j,k-1) &
-                  +        wy(i-1,j,k  ) + wy(i,j,k  ))
-             w_z = fourth*(wz(i-1,j,k-1) + wz(i,j,k-1) &
-                  +        wz(i-1,j,k  ) + wz(i,j,k  ))
-
-             q_x = fourth*(qx(i-1,j,k-1) + qx(i,j,k-1) &
-                  +        qx(i-1,j,k  ) + qx(i,j,k  ))
-             q_y = fourth*(qy(i-1,j,k-1) + qy(i,j,k-1) &
-                  +        qy(i-1,j,k  ) + qy(i,j,k  ))
-             q_z = fourth*(qz(i-1,j,k-1) + qz(i,j,k-1) &
-                  +        qz(i-1,j,k  ) + qz(i,j,k  ))
-
-             ! The gradients in the normal direction are corrected, such
-             ! that no averaging takes places here.
-             ! First determine the vector in the direction from the
-             ! cell center j to cell center j+1.
-
-             ssx = eighth*(x(i-1,j+1,k-1,1) - x(i-1,j-1,k-1,1) &
-                  +         x(i-1,j+1,k,  1) - x(i-1,j-1,k,  1) &
-                  +         x(i,  j+1,k-1,1) - x(i,  j-1,k-1,1) &
-                  +         x(i,  j+1,k,  1) - x(i,  j-1,k,  1))
-             ssy = eighth*(x(i-1,j+1,k-1,2) - x(i-1,j-1,k-1,2) &
-                  +         x(i-1,j+1,k,  2) - x(i-1,j-1,k,  2) &
-                  +         x(i,  j+1,k-1,2) - x(i,  j-1,k-1,2) &
-                  +         x(i,  j+1,k,  2) - x(i,  j-1,k,  2))
-             ssz = eighth*(x(i-1,j+1,k-1,3) - x(i-1,j-1,k-1,3) &
-                  +         x(i-1,j+1,k,  3) - x(i-1,j-1,k,  3) &
-                  +         x(i,  j+1,k-1,3) - x(i,  j-1,k-1,3) &
-                  +         x(i,  j+1,k,  3) - x(i,  j-1,k,  3))
-
-             ! Determine the length of this vector and create the
-             ! unit normal.
-
-             snrm  = one/sqrt(ssx*ssx + ssy*ssy + ssz*ssz)
-             ssx = snrm*ssx
-             ssy = snrm*ssy
-             ssz = snrm*ssz
-
-             ! Correct the gradients.
-
-             corr = u_x*ssx + u_y*ssy + u_z*ssz        &
-                  - (w(i,j+1,k,ivx) - w(i,j,k,ivx))*snrm
-             u_x  = u_x - corr*ssx
-             u_y  = u_y - corr*ssy
-             u_z  = u_z - corr*ssz
-
-             corr = v_x*ssx + v_y*ssy + v_z*ssz        &
-                  - (w(i,j+1,k,ivy) - w(i,j,k,ivy))*snrm
-             v_x  = v_x - corr*ssx
-             v_y  = v_y - corr*ssy
-             v_z  = v_z - corr*ssz
-
-             corr = w_x*ssx + w_y*ssy + w_z*ssz        &
-                  - (w(i,j+1,k,ivz) - w(i,j,k,ivz))*snrm
-             w_x  = w_x - corr*ssx
-             w_y  = w_y - corr*ssy
-             w_z  = w_z - corr*ssz
-
-             corr = q_x*ssx + q_y*ssy + q_z*ssz &
-                  + (aa(i,j+1,k) - aa(i,j,k))*snrm
-             q_x  = q_x - corr*ssx
-             q_y  = q_y - corr*ssy
-             q_z  = q_z - corr*ssz
-
-             ! Compute the stress tensor and the heat flux vector.
-             ! We remove the viscosity from the stress tensor (tau)
-             ! to define tauS since we still need to separate between
-             ! laminar and turbulent stress for QCR.
-             ! Therefore, laminar tau = mue*tauS, turbulent
-             ! tau = mue*tauS, and total tau = mut*tauS.
-
-             fracDiv = twoThird*(u_x + v_y + w_z)
-
-             tauxxS = two*u_x - fracDiv
-             tauyyS = two*v_y - fracDiv
-             tauzzS = two*w_z - fracDiv
-
-             tauxyS = u_y + v_x
-             tauxzS = u_z + w_x
-             tauyzS = v_z + w_y
-
-             q_x = heatCoef*q_x
-             q_y = heatCoef*q_y
-             q_z = heatCoef*q_z
-
-             ! Add QCR corrections if necessary
-             if (useQCR) then
-
-                ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
-                !
-                ! tau_ij,QCR = tau_ij - e_ij
-                !
-                ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
-                !
-                ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
-                !
-                ! We are computing O_ik as follows:
-                !
-                ! O_ik = 2*W_ik/den
-                !
-                ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
-
-                ! Compute denominator
-                den = sqrt(u_x*u_x + u_y*u_y + u_z*u_z + &
-                     v_x*v_x + v_y*v_y + v_z*v_z + &
-                     w_x*w_x + w_y*w_y + w_z*w_z)
-
-                ! Denominator should be limited to avoid division by zero in regions with
-                ! no gradients
-                den = max(den, xminn)
-
-                ! Compute factor that will multiply all tensor components.
-                ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
-                ! components as well.
-                fact = mue*Ccr1/den
-
-                ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
-                ! The diagonals of the vorticity tensor components are always zero
-                Wxy = u_y - v_x
-                Wxz = u_z - w_x
-                Wyz = v_z - w_y
-                Wyx = -Wxy
-                Wzx = -Wxz
-                Wzy = -Wyz
-
-                ! Compute the extra terms of the Boussinesq relation
-                exx = fact*(Wxy*tauxyS + Wxz*tauxzS)*two
-                eyy = fact*(Wyx*tauxyS + Wyz*tauyzS)*two
-                ezz = fact*(Wzx*tauxzS + Wzy*tauyzS)*two
-
-                exy = fact*(Wxy*tauyyS + Wxz*tauyzS + &
-                     Wyx*tauxxS + Wyz*tauxzS)
-                exz = fact*(Wxy*tauyzS + Wxz*tauzzS + &
-                     Wzx*tauxxS + Wzy*tauxyS)
-                eyz = fact*(Wyx*tauxzS + Wyz*tauzzS + &
-                     Wzx*tauxyS + Wzy*tauyyS)
-
-                ! Apply the total viscosity to the stress tensor and add extra terms
-                tauxx = mut*tauxxS - exx
-                tauyy = mut*tauyyS - eyy
-                tauzz = mut*tauzzS - ezz
-                tauxy = mut*tauxyS - exy
-                tauxz = mut*tauxzS - exz
-                tauyz = mut*tauyzS - eyz
-
-             else
-
-                ! Just apply the total viscosity to the stress tensor
-                tauxx = mut*tauxxS
-                tauyy = mut*tauyyS
-                tauzz = mut*tauzzS
-                tauxy = mut*tauxyS
-                tauxz = mut*tauxzS
-                tauyz = mut*tauyzS
-
-             end if
-
-             ! Compute the average velocities for the face. Remember that
-             ! the velocities are stored and not the momentum.
-
-             ubar = half*(w(i,j,k,ivx) + w(i,j+1,k,ivx))
-             vbar = half*(w(i,j,k,ivy) + w(i,j+1,k,ivy))
-             wbar = half*(w(i,j,k,ivz) + w(i,j+1,k,ivz))
-
-             ! Compute the viscous fluxes for this j-face.
-
-             fmx   = tauxx*sj(i,j,k,1) + tauxy*sj(i,j,k,2) &
-                  + tauxz*sj(i,j,k,3)
-             fmy   = tauxy*sj(i,j,k,1) + tauyy*sj(i,j,k,2) &
-                  + tauyz*sj(i,j,k,3)
-             fmz   = tauxz*sj(i,j,k,1) + tauyz*sj(i,j,k,2) &
-                  + tauzz*sj(i,j,k,3)
-             frhoE = (ubar*tauxx + vbar*tauxy + wbar*tauxz)*sj(i,j,k,1) &
-                  + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*sj(i,j,k,2) &
-                  + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*sj(i,j,k,3) &
-                  - q_x*sj(i,j,k,1) - q_y*sj(i,j,k,2) - q_z*sj(i,j,k,3)
-
-             ! Update the residuals of cell j and j+1.
-
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-             fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-             fw(i,j+1,k,imx)   = fw(i,j+1,k,imx)   + fmx
-             fw(i,j+1,k,imy)   = fw(i,j+1,k,imy)   + fmy
-             fw(i,j+1,k,imz)   = fw(i,j+1,k,imz)   + fmz
-             fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + frhoE
-
-             ! Temporarily store the shear stress and heat flux, even
-             ! if we won't need it. This can still vectorize
-
-             if (j == 1) then
-                tmpStore(1, i, k, 1) = tauxx
-                tmpStore(2, i, k, 1) = tauyy
-                tmpStore(3, i, k, 1) = tauzz
-                tmpStore(4, i, k, 1) = tauxy
-                tmpStore(5, i, k, 1) = tauxz
-                tmpStore(6, i, k, 1) = tauyz
-
-                tmpStore(7, i, k, 1) = q_x
-                tmpStore(8, i, k, 1) = q_y
-                tmpStore(9, i, k, 1) = q_z
-             end if
-
-             if (j == jl) then
-                tmpStore(1, i, k, 2) = tauxx
-                tmpStore(2, i, k, 2) = tauyy
-                tmpStore(3, i, k, 2) = tauzz
-                tmpStore(4, i, k, 2) = tauxy
-                tmpStore(5, i, k, 2) = tauxz
-                tmpStore(6, i, k, 2) = tauyz
-
-                tmpStore(7, i, k, 2) = q_x
-                tmpStore(8, i, k, 2) = q_y
-                tmpStore(9, i, k, 2) = q_z
-             end if
-          enddo
-       enddo
-    enddo
-    ! Save into the subface if necessary
-    if (storeWall) then
-       origJMin: if (jj-1 == 1) then
-          do k=2, kl
-             do i=2, il
-                io = i + ii - 2
-                ko = k + kk - 2
-
-                if (viscJminPointer(io, ko) > 0) then
-                   viscSubface(viscJminPointer(io, ko))%tau(io, ko, :) = tmpStore(1:6, i, k, 1)
-                   viscSubface(viscJminPointer(io, ko))%q(io, ko, :) = tmpStore(7:9, i, k, 1)
-                endif
-             end do
-          end do
-       end if origJMin
-
-       origJMax: if (jj + ny - 1 == bjl) then
-          do k=2, kl
-             do i=2, il
-                io = i + ii - 2
-                ko = k + kk - 2
-                if(viscJmaxPointer(io, ko) > 0) then
-                   viscSubface(viscJmaxPointer(io, ko))%tau(io, ko, :) = tmpStore(1:6, i, k, 2)
-                   viscSubface(viscJmaxPointer(io, ko))%q(io, ko, :) = tmpStore(7:9, i, k, 2)
-                endif
-             end do
-          end do
-       end if origJMax
-    end if
-    !
-    !         Viscous fluxes in the i-direction.
-    !
-    do k=2, kl
-       do j=2, jl
-          do i=1, il
-             ! Set the value of the porosity. If not zero, it is set
-             ! to average the eddy-viscosity and to take the factor
-             ! rFilv into account.
-
-             por = half*rFilv
-             if(porI(i,j,k) == noFlux) por = zero
-
-             ! Compute the laminar and (if present) the eddy viscosities
-             ! multiplied the porosity. Compute the factor in front of
-             ! the gradients of the speed of sound squared for the heat
-             ! flux.
-
-             mul = por*(rlv(i,j,k) + rlv(i+1,j,k))
-             mue = por*(rev(i,j,k) + rev(i+1,j,k))
-             mut = mul + mue
-
-             gm1          = half*(gamma(i,j,k) + gamma(i+1,j,k)) - one
-             factLamHeat  = one/(prandtl*gm1)
-             factTurbHeat = one/(prandtlTurb*gm1)
-
-             heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-             ! Compute the gradients at the face by averaging the four
-             ! nodal values.
-
-             u_x = fourth*(ux(i,j-1,k-1) + ux(i,j,k-1) &
-                  +        ux(i,j-1,k  ) + ux(i,j,k  ))
-             u_y = fourth*(uy(i,j-1,k-1) + uy(i,j,k-1) &
-                  +        uy(i,j-1,k  ) + uy(i,j,k  ))
-             u_z = fourth*(uz(i,j-1,k-1) + uz(i,j,k-1) &
-                  +        uz(i,j-1,k  ) + uz(i,j,k  ))
-
-             v_x = fourth*(vx(i,j-1,k-1) + vx(i,j,k-1) &
-                  +        vx(i,j-1,k  ) + vx(i,j,k  ))
-             v_y = fourth*(vy(i,j-1,k-1) + vy(i,j,k-1) &
-                  +        vy(i,j-1,k  ) + vy(i,j,k  ))
-             v_z = fourth*(vz(i,j-1,k-1) + vz(i,j,k-1) &
-                  +        vz(i,j-1,k  ) + vz(i,j,k  ))
-
-             w_x = fourth*(wx(i,j-1,k-1) + wx(i,j,k-1) &
-                  +        wx(i,j-1,k  ) + wx(i,j,k  ))
-             w_y = fourth*(wy(i,j-1,k-1) + wy(i,j,k-1) &
-                  +        wy(i,j-1,k  ) + wy(i,j,k  ))
-             w_z = fourth*(wz(i,j-1,k-1) + wz(i,j,k-1) &
-                  +        wz(i,j-1,k  ) + wz(i,j,k  ))
-
-             q_x = fourth*(qx(i,j-1,k-1) + qx(i,j,k-1) &
-                  +        qx(i,j-1,k  ) + qx(i,j,k  ))
-             q_y = fourth*(qy(i,j-1,k-1) + qy(i,j,k-1) &
-                  +        qy(i,j-1,k  ) + qy(i,j,k  ))
-             q_z = fourth*(qz(i,j-1,k-1) + qz(i,j,k-1) &
-                  +        qz(i,j-1,k  ) + qz(i,j,k  ))
-
-             ! The gradients in the normal direction are corrected, such
-             ! that no averaging takes places here.
-             ! First determine the vector in the direction from the
-             ! cell center i to cell center i+1.
-
-             ssx = eighth*(x(i+1,j-1,k-1,1) - x(i-1,j-1,k-1,1) &
-                  +         x(i+1,j-1,k,  1) - x(i-1,j-1,k,  1) &
-                  +         x(i+1,j,  k-1,1) - x(i-1,j,  k-1,1) &
-                  +         x(i+1,j,  k,  1) - x(i-1,j,  k,  1))
-             ssy = eighth*(x(i+1,j-1,k-1,2) - x(i-1,j-1,k-1,2) &
-                  +         x(i+1,j-1,k,  2) - x(i-1,j-1,k,  2) &
-                  +         x(i+1,j,  k-1,2) - x(i-1,j,  k-1,2) &
-                  +         x(i+1,j,  k,  2) - x(i-1,j,  k,  2))
-             ssz = eighth*(x(i+1,j-1,k-1,3) - x(i-1,j-1,k-1,3) &
-                  +         x(i+1,j-1,k,  3) - x(i-1,j-1,k,  3) &
-                  +         x(i+1,j,  k-1,3) - x(i-1,j,  k-1,3) &
-                  +         x(i+1,j,  k,  3) - x(i-1,j,  k,  3))
-
-             ! Determine the length of this vector and create the
-             ! unit normal.
-
-             snrm  = one/sqrt(ssx*ssx + ssy*ssy + ssz*ssz)
-             ssx = snrm*ssx
-             ssy = snrm*ssy
-             ssz = snrm*ssz
-
-             ! Correct the gradients.
-
-             corr = u_x*ssx + u_y*ssy + u_z*ssz        &
-                  - (w(i+1,j,k,ivx) - w(i,j,k,ivx))*snrm
-             u_x  = u_x - corr*ssx
-             u_y  = u_y - corr*ssy
-             u_z  = u_z - corr*ssz
-
-             corr = v_x*ssx + v_y*ssy + v_z*ssz        &
-                  - (w(i+1,j,k,ivy) - w(i,j,k,ivy))*snrm
-             v_x  = v_x - corr*ssx
-             v_y  = v_y - corr*ssy
-             v_z  = v_z - corr*ssz
-
-             corr = w_x*ssx + w_y*ssy + w_z*ssz        &
-                  - (w(i+1,j,k,ivz) - w(i,j,k,ivz))*snrm
-             w_x  = w_x - corr*ssx
-             w_y  = w_y - corr*ssy
-             w_z  = w_z - corr*ssz
-
-             corr = q_x*ssx + q_y*ssy + q_z*ssz &
-                  + (aa(i+1,j,k) - aa(i,j,k))*snrm
-             q_x  = q_x - corr*ssx
-             q_y  = q_y - corr*ssy
-             q_z  = q_z - corr*ssz
-
-             ! Compute the stress tensor and the heat flux vector.
-             ! We remove the viscosity from the stress tensor (tau)
-             ! to define tauS since we still need to separate between
-             ! laminar and turbulent stress for QCR.
-             ! Therefore, laminar tau = mue*tauS, turbulent
-             ! tau = mue*tauS, and total tau = mut*tauS.
-
-             fracDiv = twoThird*(u_x + v_y + w_z)
-
-             tauxxS = two*u_x - fracDiv
-             tauyyS = two*v_y - fracDiv
-             tauzzS = two*w_z - fracDiv
-
-             tauxyS = u_y + v_x
-             tauxzS = u_z + w_x
-             tauyzS = v_z + w_y
-
-             q_x = heatCoef*q_x
-             q_y = heatCoef*q_y
-             q_z = heatCoef*q_z
-
-             ! Add QCR corrections if necessary
-             if (useQCR) then
-
-                ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
-                !
-                ! tau_ij,QCR = tau_ij - e_ij
-                !
-                ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
-                !
-                ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
-                !
-                ! We are computing O_ik as follows:
-                !
-                ! O_ik = 2*W_ik/den
-                !
-                ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
-
-                ! Compute denominator
-                den = sqrt(u_x*u_x + u_y*u_y + u_z*u_z + &
-                     v_x*v_x + v_y*v_y + v_z*v_z + &
-                     w_x*w_x + w_y*w_y + w_z*w_z)
-
-                ! Denominator should be limited to avoid division by zero in regions with
-                ! no gradients
-                den = max(den, xminn)
-
-                ! Compute factor that will multiply all tensor components.
-                ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
-                ! components as well.
-                fact = mue*Ccr1/den
-
-                ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
-                ! The diagonals of the vorticity tensor components are always zero
-                Wxy = u_y - v_x
-                Wxz = u_z - w_x
-                Wyz = v_z - w_y
-                Wyx = -Wxy
-                Wzx = -Wxz
-                Wzy = -Wyz
-
-                ! Compute the extra terms of the Boussinesq relation
-                exx = fact*(Wxy*tauxyS + Wxz*tauxzS)*two
-                eyy = fact*(Wyx*tauxyS + Wyz*tauyzS)*two
-                ezz = fact*(Wzx*tauxzS + Wzy*tauyzS)*two
-
-                exy = fact*(Wxy*tauyyS + Wxz*tauyzS + &
-                     Wyx*tauxxS + Wyz*tauxzS)
-                exz = fact*(Wxy*tauyzS + Wxz*tauzzS + &
-                     Wzx*tauxxS + Wzy*tauxyS)
-                eyz = fact*(Wyx*tauxzS + Wyz*tauzzS + &
-                     Wzx*tauxyS + Wzy*tauyyS)
-
-                ! Apply the total viscosity to the stress tensor and add extra terms
-                tauxx = mut*tauxxS - exx
-                tauyy = mut*tauyyS - eyy
-                tauzz = mut*tauzzS - ezz
-                tauxy = mut*tauxyS - exy
-                tauxz = mut*tauxzS - exz
-                tauyz = mut*tauyzS - eyz
-
-             else
-
-                ! Just apply the total viscosity to the stress tensor
-                tauxx = mut*tauxxS
-                tauyy = mut*tauyyS
-                tauzz = mut*tauzzS
-                tauxy = mut*tauxyS
-                tauxz = mut*tauxzS
-                tauyz = mut*tauyzS
-
-             end if
-
-             ! Compute the average velocities for the face. Remember that
-             ! the velocities are stored and not the momentum.
-
-             ubar = half*(w(i,j,k,ivx) + w(i+1,j,k,ivx))
-             vbar = half*(w(i,j,k,ivy) + w(i+1,j,k,ivy))
-             wbar = half*(w(i,j,k,ivz) + w(i+1,j,k,ivz))
-
-             ! Compute the viscous fluxes for this i-face.
-
-             fmx   = tauxx*si(i,j,k,1) + tauxy*si(i,j,k,2) &
-                  + tauxz*si(i,j,k,3)
-             fmy   = tauxy*si(i,j,k,1) + tauyy*si(i,j,k,2) &
-                  + tauyz*si(i,j,k,3)
-             fmz   = tauxz*si(i,j,k,1) + tauyz*si(i,j,k,2) &
-                  + tauzz*si(i,j,k,3)
-             frhoE = (ubar*tauxx + vbar*tauxy + wbar*tauxz)*si(i,j,k,1) &
-                  + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*si(i,j,k,2) &
-                  + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*si(i,j,k,3) &
-                  - q_x*si(i,j,k,1) - q_y*si(i,j,k,2) - q_z*si(i,j,k,3)
-
-             ! Update the residuals of cell i and i+1.
-             fw(i+1,j,k,imx)   = fw(i+1,j,k,imx)   + fmx
-             fw(i+1,j,k,imy)   = fw(i+1,j,k,imy)   + fmy
-             fw(i+1,j,k,imz)   = fw(i+1,j,k,imz)   + fmz
-             fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + frhoE
-
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-             fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-
-             ! Temporarily store the shear stress and heat flux, even
-             ! if we won't need it. This can still vectorize
-
-             if (i == 1) then
-                tmpStore(1, j, k, 1) = tauxx
-                tmpStore(2, j, k, 1) = tauyy
-                tmpStore(3, j, k, 1) = tauzz
-                tmpStore(4, j, k, 1) = tauxy
-                tmpStore(5, j, k, 1) = tauxz
-                tmpStore(6, j, k, 1) = tauyz
-
-                tmpStore(7, j, k, 1) = q_x
-                tmpStore(8, j, k, 1) = q_y
-                tmpStore(9, j, k, 1) = q_z
-             end if
-
-             if (i == il) then
-                tmpStore(1, j, k, 2) = tauxx
-                tmpStore(2, j, k, 2) = tauyy
-                tmpStore(3, j, k, 2) = tauzz
-                tmpStore(4, j, k, 2) = tauxy
-                tmpStore(5, j, k, 2) = tauxz
-                tmpStore(6, j, k, 2) = tauyz
-
-                tmpStore(7, j, k, 2) = q_x
-                tmpStore(8, j, k, 2) = q_y
-                tmpStore(9, j, k, 2) = q_z
-             end if
-          enddo
-       enddo
-    enddo
-    ! Save into the subface if necessary
-    if (storeWall) then
-       origIMin: if (ii-1 == 1) then
-          do k=2, kl
-             do j=2, jl
-                jo = j + jj - 2
-                ko = k + kk - 2
-
-                if (viscIminPointer(jo, ko) > 0) then
-                   viscSubface(viscIminPointer(jo, ko))%tau(jo, ko, :) = tmpStore(1:6, j, k, 1)
-                   viscSubface(viscIminPointer(jo, ko))%q(jo, ko, :) = tmpStore(7:9, j, k, 1)
-                endif
-             end do
-          end do
-       end if origIMin
-
-       origIMax: if (ii + nx - 1 == bil) then
-          do k=2, kl
-             do j=2, jl
-                jo = j + jj - 2
-                ko = k + kk - 2
-                if(viscImaxPointer(jo, ko) > 0) then
-                   viscSubface(viscImaxPointer(jo, ko))%tau(jo, ko, :) = tmpStore(1:6, j, k, 2)
-                   viscSubface(viscImaxPointer(jo, ko))%q(jo, ko, :) = tmpStore(7:9, j, k, 2)
-                endif
-             end do
-          end do
-       end if origIMax
-    end if
-  end subroutine viscousFlux
-
-  subroutine viscousFluxApprox
+                    ! Compute the correct velocity gradients and "unit" heat
+                    ! fluxes. The velocity gradients are stored in ux, etc.
 
-    use constants
-    use flowVarRefState
-    use inputPhysics
-    use iteration
-    implicit none
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: twoThird = two*third
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-    integer(kind=intType) :: ii, jj, kk
+                    ux(i, j, k) = ux(i, j, k) * oVol
+                    uy(i, j, k) = uy(i, j, k) * oVol
+                    uz(i, j, k) = uz(i, j, k) * oVol
 
-    real(kind=realType) :: rFilv, por, mul, mue, mut, heatCoef
-    real(kind=realType) :: gm1, factLamHeat, factTurbHeat
-    real(kind=realType) :: u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z
-    real(kind=realType) :: q_x, q_y, q_z, ubar, vbar, wbar
-    real(kind=realType) :: corr, ssx, ssy, ssz, ss, fracDiv
-    real(kind=realType) :: tauxx, tauyy, tauzz
-    real(kind=realType) :: tauxy, tauxz, tauyz
-    real(kind=realType) :: fmx, fmy, fmz, frhoE
-    real(kind=realType) :: dd
-    logical :: correctForK
-
-    mue = zero
-    rFilv = rFil
-
-    ! Viscous fluxes in the I-direction
-
-    do k=2,kl
-       do j=2,jl
-          do i=1,il
-
-             ! Compute the vector from the center of cell i to cell i+1
-             ssx = eighth*(x(i+1,j-1,k-1,1) - x(i-1,j-1,k-1,1) &
-                  +         x(i+1,j-1,k,  1) - x(i-1,j-1,k,  1) &
-                  +         x(i+1,j,  k-1,1) - x(i-1,j,  k-1,1) &
-                  +         x(i+1,j,  k,  1) - x(i-1,j,  k,  1))
-             ssy = eighth*(x(i+1,j-1,k-1,2) - x(i-1,j-1,k-1,2) &
-                  +         x(i+1,j-1,k,  2) - x(i-1,j-1,k,  2) &
-                  +         x(i+1,j,  k-1,2) - x(i-1,j,  k-1,2) &
-                  +         x(i+1,j,  k,  2) - x(i-1,j,  k,  2))
-             ssz = eighth*(x(i+1,j-1,k-1,3) - x(i-1,j-1,k-1,3) &
-                  +         x(i+1,j-1,k,  3) - x(i-1,j-1,k,  3) &
-                  +         x(i+1,j,  k-1,3) - x(i-1,j,  k-1,3) &
-                  +         x(i+1,j,  k,  3) - x(i-1,j,  k,  3))
-
-             ! And determine one/ length of vector squared
-             ss  = one/(ssx*ssx + ssy*ssy + ssz*ssz)
-             ssx = ss*ssx
-             ssy = ss*ssy
-             ssz = ss*ssz
-
-             ! Now compute each gradient
-             dd = w(i+1, j, k, ivx)-w(i, j, k, ivx)
-             u_x = dd*ssx
-             u_y = dd*ssy
-             u_z = dd*ssz
-
-             dd = w(i+1, j, k, ivy)-w(i, j, k, ivy)
-             v_x = dd*ssx
-             v_y = dd*ssy
-             v_z = dd*ssz
-
-             dd = w(i+1, j, k, ivz)-w(i, j, k, ivz)
-             w_x = dd*ssx
-             w_y = dd*ssy
-             w_z = dd*ssz
-
-             dd = aa(i+1, j, k)-aa(i, j, k)
-             q_x = -dd*ssx
-             q_y = -dd*ssy
-             q_z = -dd*ssz
-
-             por = half*rFilv
-             if(porI(i,j,k) == noFlux) por = zero
-
-             ! Compute the laminar and (if present) the eddy viscosities
-             ! multiplied by the porosity. Compute the factor in front of
-             ! the gradients of the speed of sound squared for the heat
-             ! flux.
-
-             mul = por*(rlv(i,j,k) + rlv(i+1,j,k))
-             mue = por*(rev(i,j,k) + rev(i+1,j,k))
-             mut = mul + mue
-
-             gm1          = half*(gamma(i,j,k) +gamma(i+1,j,k))- one
-             factLamHeat  = one/(prandtl*gm1)
-             factTurbHeat = one/(prandtlTurb*gm1)
-
-             heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-             ! Compute the stress tensor and the heat flux vector.
-
-             fracDiv = twoThird*(u_x + v_y + w_z)
-
-             tauxx = mut*(two*u_x - fracDiv)
-             tauyy = mut*(two*v_y - fracDiv)
-             tauzz = mut*(two*w_z - fracDiv)
-
-             tauxy = mut*(u_y + v_x)
-             tauxz = mut*(u_z + w_x)
-             tauyz = mut*(v_z + w_y)
-
-             q_x = heatCoef*q_x
-             q_y = heatCoef*q_y
-             q_z = heatCoef*q_z
-
-             ! Compute the average velocities for the face. Remember that
-             ! the velocities are stored and not the momentum.
-
-             ubar = half*(w(i,j,k,ivx) + w(i+1,j,k,ivx))
-             vbar = half*(w(i,j,k,ivy) + w(i+1,j,k,ivy))
-             wbar = half*(w(i,j,k,ivz) + w(i+1,j,k,ivz))
-
-             ! Compute the viscous fluxes for this i-face.
-
-             fmx   = tauxx*si(i,j,k,1) + tauxy*si(i,j,k,2) + tauxz*si(i,j,k,3)
-             fmy   = tauxy*si(i,j,k,1) + tauyy*si(i,j,k,2) + tauyz*si(i,j,k,3)
-             fmz   = tauxz*si(i,j,k,1) + tauyz*si(i,j,k,2) + tauzz*si(i,j,k,3)
-             frhoE = (ubar*tauxx + vbar*tauxy + wbar*tauxz)*si(i,j,k,1) &
-                  + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*si(i,j,k,2) &
-                  + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*si(i,j,k,3) &
-                  - q_x*si(i,j,k,1) - q_y*si(i,j,k,2) - q_z*si(i,j,k,3)
-
-             ! Update the residuals of cell i and i+1.
-             fw(i+1,j,k,imx)   = fw(i+1,j,k,imx)   + fmx
-             fw(i+1,j,k,imy)   = fw(i+1,j,k,imy)   + fmy
-             fw(i+1,j,k,imz)   = fw(i+1,j,k,imz)   + fmz
-             fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + frhoE
-
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-             fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-
-          end do
-       end do
-    end do
-
-    ! Viscous fluxes in the J-direction
-
-    do k=2,kl
-       do j=1,jl
-          do i=2,il
-
-             ! Compute the vector from the center of cell j to cell j+1
-             ssx = eighth*(x(i-1,j+1,k-1,1) - x(i-1,j-1,k-1,1) &
-                  +         x(i-1,j+1,k,  1) - x(i-1,j-1,k,  1) &
-                  +         x(i,  j+1,k-1,1) - x(i,  j-1,k-1,1) &
-                  +         x(i,  j+1,k,  1) - x(i,  j-1,k,  1))
-             ssy = eighth*(x(i-1,j+1,k-1,2) - x(i-1,j-1,k-1,2) &
-                  +         x(i-1,j+1,k,  2) - x(i-1,j-1,k,  2) &
-                  +         x(i,  j+1,k-1,2) - x(i,  j-1,k-1,2) &
-                  +         x(i,  j+1,k,  2) - x(i,  j-1,k,  2))
-             ssz = eighth*(x(i-1,j+1,k-1,3) - x(i-1,j-1,k-1,3) &
-                  +         x(i-1,j+1,k,  3) - x(i-1,j-1,k,  3) &
-                  +         x(i,  j+1,k-1,3) - x(i,  j-1,k-1,3) &
-                  +         x(i,  j+1,k,  3) - x(i,  j-1,k,  3))
-
-             ! And determine one/ length of vector squared
-             ss  = one/(ssx*ssx + ssy*ssy + ssz*ssz)
-             ssx = ss*ssx
-             ssy = ss*ssy
-             ssz = ss*ssz
-
-             ! Now compute each gradient
-             dd = w(i, j+1, k, ivx)-w(i, j, k, ivx)
-             u_x = dd*ssx
-             u_y = dd*ssy
-             u_z = dd*ssz
-
-             dd = w(i, j+1, k, ivy)-w(i, j, k, ivy)
-             v_x = dd*ssx
-             v_y = dd*ssy
-             v_z = dd*ssz
-
-             dd = w(i, j+1, k, ivz)-w(i, j, k, ivz)
-             w_x = dd*ssx
-             w_y = dd*ssy
-             w_z = dd*ssz
-
-             dd = aa(i, j+1, k)-aa(i, j, k)
-             q_x = -dd*ssx
-             q_y = -dd*ssy
-             q_z = -dd*ssz
-
-             por = half*rFilv
-             if(porJ(i,j,k) == noFlux) por = zero
-
-             ! Compute the laminar and (if present) the eddy viscosities
-             ! multiplied by the porosity. Compute the factor in front of
-             ! the gradients of the speed of sound squared for the heat
-             ! flux.
-
-             mul = por*(rlv(i,j,k) + rlv(i,j+1,k))
-             mue = por*(rev(i,j,k) + rev(i,j+1,k))
-             mut = mul + mue
-
-             gm1          = half*(gamma(i,j,k) + gamma(i,j+1,k)) - one
-             factLamHeat  = one/(prandtl*gm1)
-             factTurbHeat = one/(prandtlTurb*gm1)
-
-             heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-             ! Compute the stress tensor and the heat flux vector.
-
-             fracDiv = twoThird*(u_x + v_y + w_z)
-
-             tauxx = mut*(two*u_x - fracDiv)
-             tauyy = mut*(two*v_y - fracDiv)
-             tauzz = mut*(two*w_z - fracDiv)
-
-             tauxy = mut*(u_y + v_x)
-             tauxz = mut*(u_z + w_x)
-             tauyz = mut*(v_z + w_y)
-
-             q_x = heatCoef*q_x
-             q_y = heatCoef*q_y
-             q_z = heatCoef*q_z
-
-             ! Compute the average velocities for the face. Remember that
-             ! the velocities are stored and not the momentum.
-
-             ubar = half*(w(i,j,k,ivx) + w(i,j+1,k,ivx))
-             vbar = half*(w(i,j,k,ivy) + w(i,j+1,k,ivy))
-             wbar = half*(w(i,j,k,ivz) + w(i,j+1,k,ivz))
-
-             ! Compute the viscous fluxes for this j-face.
-
-             fmx   = tauxx*sj(i,j,k,1) + tauxy*sj(i,j,k,2) + tauxz*sj(i,j,k,3)
-             fmy   = tauxy*sj(i,j,k,1) + tauyy*sj(i,j,k,2) + tauyz*sj(i,j,k,3)
-             fmz   = tauxz*sj(i,j,k,1) + tauyz*sj(i,j,k,2) + tauzz*sj(i,j,k,3)
-             frhoE = (ubar*tauxx + vbar*tauxy + wbar*tauxz)*sj(i,j,k,1) &
-                  + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*sj(i,j,k,2) &
-                  + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*sj(i,j,k,3) &
-                  - q_x*sj(i,j,k,1) - q_y*sj(i,j,k,2) - q_z*sj(i,j,k,3)
-
-             ! Update the residuals of cell j and j+1.
-
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-             fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-             fw(i,j+1,k,imx)   = fw(i,j+1,k,imx)   + fmx
-             fw(i,j+1,k,imy)   = fw(i,j+1,k,imy)   + fmy
-             fw(i,j+1,k,imz)   = fw(i,j+1,k,imz)   + fmz
-             fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + frhoE
-
-          end do
-       end do
-    end do
-
-    ! Viscous fluxes in the K-direction
-
-    do k=1,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the vector from the center of cell k to cell k+1
-             ssx = eighth*(x(i-1,j-1,k+1,1) - x(i-1,j-1,k-1,1) &
-                  +         x(i-1,j,  k+1,1) - x(i-1,j,  k-1,1) &
-                  +         x(i,  j-1,k+1,1) - x(i,  j-1,k-1,1) &
-                  +         x(i,  j,  k+1,1) - x(i,  j,  k-1,1))
-             ssy = eighth*(x(i-1,j-1,k+1,2) - x(i-1,j-1,k-1,2) &
-                  +         x(i-1,j,  k+1,2) - x(i-1,j,  k-1,2) &
-                  +         x(i,  j-1,k+1,2) - x(i,  j-1,k-1,2) &
-                  +         x(i,  j,  k+1,2) - x(i,  j,  k-1,2))
-             ssz = eighth*(x(i-1,j-1,k+1,3) - x(i-1,j-1,k-1,3) &
-                  +         x(i-1,j,  k+1,3) - x(i-1,j,  k-1,3) &
-                  +         x(i,  j-1,k+1,3) - x(i,  j-1,k-1,3) &
-                  +         x(i,  j,  k+1,3) - x(i,  j,  k-1,3))
-             ! And determine one/ length of vector squared
-             ss  = one/(ssx*ssx + ssy*ssy + ssz*ssz)
-             ssx = ss*ssx
-             ssy = ss*ssy
-             ssz = ss*ssz
-
-             ! Now compute each gradient
-             dd = w(i, j, k+1, ivx)-w(i, j, k, ivx)
-             u_x = dd*ssx
-             u_y = dd*ssy
-             u_z = dd*ssz
-
-             dd = w(i, j, k+1, ivy)-w(i, j, k, ivy)
-             v_x = dd*ssx
-             v_y = dd*ssy
-             v_z = dd*ssz
-
-             dd = w(i, j, k+1, ivz)-w(i, j, k, ivz)
-             w_x = dd*ssx
-             w_y = dd*ssy
-             w_z = dd*ssz
-
-             dd = aa(i, j, k+1)-aa(i, j, k)
-             q_x = -dd*ssx
-             q_y = -dd*ssy
-             q_z = -dd*ssz
-
-             por = half*rFilv
-             if(porK(i,j,k) == noFlux) por = zero
-
-             ! Compute the laminar and (if present) the eddy viscosities
-             ! multiplied by the porosity. Compute the factor in front of
-             ! the gradients of the speed of sound squared for the heat
-             ! flux.
-
-             mul = por*(rlv(i,j,k) + rlv(i,j,k+1))
-             mue = por*(rev(i,j,k) + rev(i,j,k+1))
-             mut = mul + mue
-
-             gm1          = half*(gamma(i,j,k) + gamma(i,j,k+1)) - one
-             factLamHeat  = one/(prandtl*gm1)
-             factTurbHeat = one/(prandtlTurb*gm1)
-
-             heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-             ! Compute the stress tensor and the heat flux vector.
-
-             fracDiv = twoThird*(u_x + v_y + w_z)
-
-             tauxx = mut*(two*u_x - fracDiv)
-             tauyy = mut*(two*v_y - fracDiv)
-             tauzz = mut*(two*w_z - fracDiv)
-
-             tauxy = mut*(u_y + v_x)
-             tauxz = mut*(u_z + w_x)
-             tauyz = mut*(v_z + w_y)
-
-             q_x = heatCoef*q_x
-             q_y = heatCoef*q_y
-             q_z = heatCoef*q_z
-
-             ! Compute the average velocities for the face. Remember that
-             ! the velocities are stored and not the momentum.
-
-             ubar = half*(w(i,j,k,ivx) + w(i,j,k+1,ivx))
-             vbar = half*(w(i,j,k,ivy) + w(i,j,k+1,ivy))
-             wbar = half*(w(i,j,k,ivz) + w(i,j,k+1,ivz))
-
-             ! Compute the viscous fluxes for this j-face.
-
-             fmx   = tauxx*sk(i,j,k,1) + tauxy*sk(i,j,k,2) + tauxz*sk(i,j,k,3)
-             fmy   = tauxy*sk(i,j,k,1) + tauyy*sk(i,j,k,2) + tauyz*sk(i,j,k,3)
-             fmz   = tauxz*sk(i,j,k,1) + tauyz*sk(i,j,k,2) + tauzz*sk(i,j,k,3)
-             frhoE = (ubar*tauxx + vbar*tauxy + wbar*tauxz)*sk(i,j,k,1) &
-                  + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*sk(i,j,k,2) &
-                  + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*sk(i,j,k,3) &
-                  - q_x*sk(i,j,k,1) - q_y*sk(i,j,k,2) - q_z*sk(i,j,k,3)
-
-             ! Update the residuals of cell j and j+1.
-
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-             fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-             fw(i,j,k+1,imx)   = fw(i,j,k+1,imx)   + fmx
-             fw(i,j,k+1,imy)   = fw(i,j,k+1,imy)   + fmy
-             fw(i,j,k+1,imz)   = fw(i,j,k+1,imz)   + fmz
-             fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + frhoE
-
-          end do
-       end do
-    end do
-  end subroutine viscousFluxApprox
-
-  subroutine sumDwandFw
-
-    ! ---------------------------------------------
-    !                 Sum dw and fw/res scale
-    ! ---------------------------------------------
-    use constants
-    use flowVarRefState, only : nw, nwf, nt1, nt2
-    use inputIteration, only : turbResScale
-    implicit none
-
-    ! Variables for final summing
-    integer(kind=intType) :: nTurb, i, j, k, l
-    real(kind=realType) :: oVol, rBlank
-
-    nTurb = nt2-nt1+1
-    do l=1,nwf
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                rblank = max(real(iblank(i,j,k), realType), zero)
-                dw(i, j, k, l) = (dw(i, j, k, l) + fw(i, j, k, l))*rBlank
-             end do
-          end do
-       end do
-    end do
-  end subroutine sumDwandFw
-
-  subroutine resScale
+                    vx(i, j, k) = vx(i, j, k) * oVol
+                    vy(i, j, k) = vy(i, j, k) * oVol
+                    vz(i, j, k) = vz(i, j, k) * oVol
 
-    use constants
-    use flowVarRefState, only : nwf, nt1, nt2
-    use inputIteration, only : turbResScale
-    implicit none
-
-    ! Local Variables
-    integer(kind=intType) :: i, j, k, ii, nTurb
-    real(kind=realType) :: ovol
-
-    ! Divide through by the reference volume
-    nTurb = nt2-nt1+1
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             oVol = one/volRef(i,j,k)
-             dw(i, j, k, 1:nwf) = dw(i,j, k, 1:nwf)* ovol
-             dw(i, j, k, nt1:nt2) = dw(i, j, k, nt1:nt2) * ovol * turbResScale(1:nTurb)
-          enddo
-       enddo
-    enddo
-
-  end subroutine resScale
+                    wx(i, j, k) = wx(i, j, k) * oVol
+                    wy(i, j, k) = wy(i, j, k) * oVol
+                    wz(i, j, k) = wz(i, j, k) * oVol
+
+                    qx(i, j, k) = qx(i, j, k) * oVol
+                    qy(i, j, k) = qy(i, j, k) * oVol
+                    qz(i, j, k) = qz(i, j, k) * oVol
+                end do
+            end do
+        end do
+    end subroutine allNodalGradients
+
+    subroutine viscousFlux(storeWallTensor)
+        ! ---------------------------------------------
+        !                   Viscous Flux
+        ! ---------------------------------------------
+
+        use constants
+        use inputPhysics, only: useQCR, prandtl, prandtlturb
+        use flowvarRefState, only: eddyModel
+        use iteration, only: rFil
+        use blockPointers, only: bil => il, bjl => jl, bkl => kl, &
+                                 viscIminPointer, viscImaxPointer, viscSubFace, &
+                                 viscJminPointer, viscJmaxPointer, &
+                                 viscKminPointer, viscKmaxPointer
+        implicit none
+
+        ! Input
+        logical, intent(in), optional :: storeWallTensor
+
+        ! Variables for viscous flux
+        real(kind=realType) :: rFilv, por, mul, mue, mut, heatCoef
+        real(kind=realType) :: gm1, factLamHeat, factTurbHeat
+        real(kind=realType) :: u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z
+        real(kind=realType) :: q_x, q_y, q_z
+        real(kind=realType) :: corr, ssx, ssy, ssz, fracDiv, snrm
+        real(kind=realType) :: tauxx, tauyy, tauzz
+        real(kind=realType) :: tauxy, tauxz, tauyz
+        real(kind=realType) :: tauxxS, tauyyS, tauzzS
+        real(kind=realType) :: tauxyS, tauxzS, tauyzS
+        real(kind=realType) :: ubar, vbar, wbar
+        real(kind=realType) :: exx, eyy, ezz
+        real(kind=realType) :: exy, exz, eyz
+        real(kind=realType) :: Wxx, Wyy, Wzz
+        real(kind=realType) :: Wxy, Wxz, Wyz, Wyx, Wzx, Wzy
+        real(kind=realType) :: den, Ccr1
+        real(kind=realType) :: fmx, fmy, fmz, frhoE, fact
+        integer(kind=intType) :: i, j, k, io, jo, ko
+        real(kind=realType), parameter :: xminn = 1.e-10_realType
+        real(kind=realType), parameter :: twoThird = two * third
+        real(kind=realType), dimension(9, 2:max(il, jl), 2:max(jl, kl), 2) :: tmpStore
+
+        logical :: storeWall
+
+        storeWall = .False.
+        if (present(storeWallTensor)) then
+            storeWall = storeWallTensor
+        end if
+
+        ! Set QCR parameters
+        Ccr1 = 0.3_realType
+        rFilv = rFil
+
+        ! The diagonals of the vorticity tensor components are always zero
+        Wxx = zero
+        Wyy = zero
+        Wzz = zero
+        !
+        !         viscous fluxes in the k-direction.
+        !
+        mue = zero
+        do k = 1, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Set the value of the porosity. If not zero, it is set
+                    ! to average the eddy-viscosity and to take the factor
+                    ! rFilv into account.
+
+                    por = half * rFilv
+                    if (porK(i, j, k) == noFlux) por = zero
+
+                    ! Compute the laminar and (if present) the eddy viscosities
+                    ! multiplied by the porosity. Compute the factor in front of
+                    ! the gradients of the speed of sound squared for the heat
+                    ! flux.
+
+                    mul = por * (rlv(i, j, k) + rlv(i, j, k + 1))
+                    mue = por * (rev(i, j, k) + rev(i, j, k + 1))
+                    mut = mul + mue
+
+                    gm1 = half * (gamma(i, j, k) + gamma(i, j, k + 1)) - one
+                    factLamHeat = one / (prandtl * gm1)
+                    factTurbHeat = one / (prandtlTurb * gm1)
+
+                    heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                    ! Compute the gradients at the face by averaging the four
+                    ! nodal values.
+
+                    u_x = fourth * (ux(i - 1, j - 1, k) + ux(i, j - 1, k) &
+                                    + ux(i - 1, j, k) + ux(i, j, k))
+                    u_y = fourth * (uy(i - 1, j - 1, k) + uy(i, j - 1, k) &
+                                    + uy(i - 1, j, k) + uy(i, j, k))
+                    u_z = fourth * (uz(i - 1, j - 1, k) + uz(i, j - 1, k) &
+                                    + uz(i - 1, j, k) + uz(i, j, k))
+
+                    v_x = fourth * (vx(i - 1, j - 1, k) + vx(i, j - 1, k) &
+                                    + vx(i - 1, j, k) + vx(i, j, k))
+                    v_y = fourth * (vy(i - 1, j - 1, k) + vy(i, j - 1, k) &
+                                    + vy(i - 1, j, k) + vy(i, j, k))
+                    v_z = fourth * (vz(i - 1, j - 1, k) + vz(i, j - 1, k) &
+                                    + vz(i - 1, j, k) + vz(i, j, k))
+
+                    w_x = fourth * (wx(i - 1, j - 1, k) + wx(i, j - 1, k) &
+                                    + wx(i - 1, j, k) + wx(i, j, k))
+                    w_y = fourth * (wy(i - 1, j - 1, k) + wy(i, j - 1, k) &
+                                    + wy(i - 1, j, k) + wy(i, j, k))
+                    w_z = fourth * (wz(i - 1, j - 1, k) + wz(i, j - 1, k) &
+                                    + wz(i - 1, j, k) + wz(i, j, k))
+
+                    q_x = fourth * (qx(i - 1, j - 1, k) + qx(i, j - 1, k) &
+                                    + qx(i - 1, j, k) + qx(i, j, k))
+                    q_y = fourth * (qy(i - 1, j - 1, k) + qy(i, j - 1, k) &
+                                    + qy(i - 1, j, k) + qy(i, j, k))
+                    q_z = fourth * (qz(i - 1, j - 1, k) + qz(i, j - 1, k) &
+                                    + qz(i - 1, j, k) + qz(i, j, k))
+
+                    ! The gradients in the normal direction are corrected, such
+                    ! that no averaging takes places here.
+                    ! First determine the vector in the direction from the
+                    ! cell center k to cell center k+1.
+
+                    ssx = eighth * (x(i - 1, j - 1, k + 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                    + x(i - 1, j, k + 1, 1) - x(i - 1, j, k - 1, 1) &
+                                    + x(i, j - 1, k + 1, 1) - x(i, j - 1, k - 1, 1) &
+                                    + x(i, j, k + 1, 1) - x(i, j, k - 1, 1))
+                    ssy = eighth * (x(i - 1, j - 1, k + 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                    + x(i - 1, j, k + 1, 2) - x(i - 1, j, k - 1, 2) &
+                                    + x(i, j - 1, k + 1, 2) - x(i, j - 1, k - 1, 2) &
+                                    + x(i, j, k + 1, 2) - x(i, j, k - 1, 2))
+                    ssz = eighth * (x(i - 1, j - 1, k + 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                    + x(i - 1, j, k + 1, 3) - x(i - 1, j, k - 1, 3) &
+                                    + x(i, j - 1, k + 1, 3) - x(i, j - 1, k - 1, 3) &
+                                    + x(i, j, k + 1, 3) - x(i, j, k - 1, 3))
+
+                    ! Determine the length of this vector and create the
+                    ! unit normal.
+
+                    snrm = one / sqrt(ssx * ssx + ssy * ssy + ssz * ssz)
+                    ssx = snrm * ssx
+                    ssy = snrm * ssy
+                    ssz = snrm * ssz
+
+                    ! Correct the gradients.
+
+                    corr = u_x * ssx + u_y * ssy + u_z * ssz &
+                           - (w(i, j, k + 1, ivx) - w(i, j, k, ivx)) * snrm
+                    u_x = u_x - corr * ssx
+                    u_y = u_y - corr * ssy
+                    u_z = u_z - corr * ssz
+
+                    corr = v_x * ssx + v_y * ssy + v_z * ssz &
+                           - (w(i, j, k + 1, ivy) - w(i, j, k, ivy)) * snrm
+                    v_x = v_x - corr * ssx
+                    v_y = v_y - corr * ssy
+                    v_z = v_z - corr * ssz
+
+                    corr = w_x * ssx + w_y * ssy + w_z * ssz &
+                           - (w(i, j, k + 1, ivz) - w(i, j, k, ivz)) * snrm
+                    w_x = w_x - corr * ssx
+                    w_y = w_y - corr * ssy
+                    w_z = w_z - corr * ssz
+
+                    corr = q_x * ssx + q_y * ssy + q_z * ssz &
+                           + (aa(i, j, k + 1) - aa(i, j, k)) * snrm
+                    q_x = q_x - corr * ssx
+                    q_y = q_y - corr * ssy
+                    q_z = q_z - corr * ssz
+
+                    ! Compute the stress tensor and the heat flux vector.
+                    ! We remove the viscosity from the stress tensor (tau)
+                    ! to define tauS since we still need to separate between
+                    ! laminar and turbulent stress for QCR.
+                    ! Therefore, laminar tau = mue*tauS, turbulent
+                    ! tau = mue*tauS, and total tau = mut*tauS.
+
+                    fracDiv = twoThird * (u_x + v_y + w_z)
+
+                    tauxxS = two * u_x - fracDiv
+                    tauyyS = two * v_y - fracDiv
+                    tauzzS = two * w_z - fracDiv
+
+                    tauxyS = u_y + v_x
+                    tauxzS = u_z + w_x
+                    tauyzS = v_z + w_y
+
+                    q_x = heatCoef * q_x
+                    q_y = heatCoef * q_y
+                    q_z = heatCoef * q_z
+
+                    ! Add QCR corrections if necessary
+                    if (useQCR) then
+
+                        ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
+                        !
+                        ! tau_ij,QCR = tau_ij - e_ij
+                        !
+                        ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
+                        !
+                        ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
+                        !
+                        ! We are computing O_ik as follows:
+                        !
+                        ! O_ik = 2*W_ik/den
+                        !
+                        ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
+
+                        ! Compute denominator
+                        den = sqrt(u_x * u_x + u_y * u_y + u_z * u_z + &
+                                   v_x * v_x + v_y * v_y + v_z * v_z + &
+                                   w_x * w_x + w_y * w_y + w_z * w_z)
+
+                        ! Denominator should be limited to avoid division by zero in regions with
+                        ! no gradients
+                        den = max(den, xminn)
+
+                        ! Compute factor that will multiply all tensor components.
+                        ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
+                        ! components as well.
+                        fact = mue * Ccr1 / den
+
+                        ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
+                        ! The diagonals of the vorticity tensor components are always zero
+                        Wxy = u_y - v_x
+                        Wxz = u_z - w_x
+                        Wyz = v_z - w_y
+                        Wyx = -Wxy
+                        Wzx = -Wxz
+                        Wzy = -Wyz
+
+                        ! Compute the extra terms of the Boussinesq relation
+                        exx = fact * (Wxy * tauxyS + Wxz * tauxzS) * two
+                        eyy = fact * (Wyx * tauxyS + Wyz * tauyzS) * two
+                        ezz = fact * (Wzx * tauxzS + Wzy * tauyzS) * two
+
+                        exy = fact * (Wxy * tauyyS + Wxz * tauyzS + &
+                                      Wyx * tauxxS + Wyz * tauxzS)
+                        exz = fact * (Wxy * tauyzS + Wxz * tauzzS + &
+                                      Wzx * tauxxS + Wzy * tauxyS)
+                        eyz = fact * (Wyx * tauxzS + Wyz * tauzzS + &
+                                      Wzx * tauxyS + Wzy * tauyyS)
+
+                        ! Apply the total viscosity to the stress tensor and add extra terms
+                        tauxx = mut * tauxxS - exx
+                        tauyy = mut * tauyyS - eyy
+                        tauzz = mut * tauzzS - ezz
+                        tauxy = mut * tauxyS - exy
+                        tauxz = mut * tauxzS - exz
+                        tauyz = mut * tauyzS - eyz
+
+                    else
+
+                        ! Just apply the total viscosity to the stress tensor
+                        tauxx = mut * tauxxS
+                        tauyy = mut * tauyyS
+                        tauzz = mut * tauzzS
+                        tauxy = mut * tauxyS
+                        tauxz = mut * tauxzS
+                        tauyz = mut * tauyzS
+
+                    end if
+
+                    ! Compute the average velocities for the face. Remember that
+                    ! the velocities are stored and not the momentum.
+
+                    ubar = half * (w(i, j, k, ivx) + w(i, j, k + 1, ivx))
+                    vbar = half * (w(i, j, k, ivy) + w(i, j, k + 1, ivy))
+                    wbar = half * (w(i, j, k, ivz) + w(i, j, k + 1, ivz))
+
+                    ! Compute the viscous fluxes for this k-face.
+
+                    fmx = tauxx * sk(i, j, k, 1) + tauxy * sk(i, j, k, 2) &
+                          + tauxz * sk(i, j, k, 3)
+                    fmy = tauxy * sk(i, j, k, 1) + tauyy * sk(i, j, k, 2) &
+                          + tauyz * sk(i, j, k, 3)
+                    fmz = tauxz * sk(i, j, k, 1) + tauyz * sk(i, j, k, 2) &
+                          + tauzz * sk(i, j, k, 3)
+                    frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * sk(i, j, k, 1)
+                    frhoE = frhoE + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * sk(i, j, k, 2)
+                    frhoE = frhoE + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * sk(i, j, k, 3)
+                    frhoE = frhoE - q_x * sk(i, j, k, 1) - q_y * sk(i, j, k, 2) - q_z * sk(i, j, k, 3)
+
+                    ! Update the residuals of cell k and k+1.
+
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                    fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fmx
+                    fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fmy
+                    fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fmz
+                    fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + frhoE
+
+                    ! Temporarily store the shear stress and heat flux, even
+                    ! if we won't need it. This can still vectorize
+
+                    if (k == 1) then
+                        tmpStore(1, i, j, 1) = tauxx
+                        tmpStore(2, i, j, 1) = tauyy
+                        tmpStore(3, i, j, 1) = tauzz
+                        tmpStore(4, i, j, 1) = tauxy
+                        tmpStore(5, i, j, 1) = tauxz
+                        tmpStore(6, i, j, 1) = tauyz
+
+                        tmpStore(7, i, j, 1) = q_x
+                        tmpStore(8, i, j, 1) = q_y
+                        tmpStore(9, i, j, 1) = q_z
+                    end if
+
+                    if (k == kl) then
+                        tmpStore(1, i, j, 2) = tauxx
+                        tmpStore(2, i, j, 2) = tauyy
+                        tmpStore(3, i, j, 2) = tauzz
+                        tmpStore(4, i, j, 2) = tauxy
+                        tmpStore(5, i, j, 2) = tauxz
+                        tmpStore(6, i, j, 2) = tauyz
+
+                        tmpStore(7, i, j, 2) = q_x
+                        tmpStore(8, i, j, 2) = q_y
+                        tmpStore(9, i, j, 2) = q_z
+                    end if
+
+                end do
+            end do
+        end do
+
+        ! Save into the subface if necessary
+        if (storeWall) then
+
+            origKMin: if (kk - 1 == 1) then
+                do j = 2, jl
+                    do i = 2, il
+                        io = i + ii - 2
+                        jo = j + jj - 2
+
+                        if (viscKminPointer(io, jo) > 0) then
+                            viscSubface(viscKminPointer(io, jo))%tau(io, jo, :) = tmpStore(1:6, i, j, 1)
+                            viscSubface(viscKminPointer(io, jo))%q(io, jo, :) = tmpStore(7:9, i, j, 1)
+                        end if
+                    end do
+                end do
+            end if origKMin
+
+            origKMax: if (kk + nz - 1 == bkl) then
+                do j = 2, jl
+                    do i = 2, il
+                        io = i + ii - 2
+                        jo = j + jj - 2
+                        if (viscKmaxPointer(io, jo) > 0) then
+                            viscSubface(viscKmaxPointer(io, jo))%tau(io, jo, :) = tmpStore(1:6, i, j, 2)
+                            viscSubface(viscKmaxPointer(io, jo))%q(io, jo, :) = tmpStore(7:9, i, j, 2)
+                        end if
+                    end do
+                end do
+            end if origKMax
+        end if
+        !
+        !         Viscous fluxes in the j-direction.
+        !
+        do k = 2, kl
+            do j = 1, jl
+                do i = 2, il
+
+                    ! Set the value of the porosity. If not zero, it is set
+                    ! to average the eddy-viscosity and to take the factor
+                    ! rFilv into account.
+
+                    por = half * rFilv
+                    if (porJ(i, j, k) == noFlux) por = zero
+
+                    ! Compute the laminar and (if present) the eddy viscosities
+                    ! multiplied by the porosity. Compute the factor in front of
+                    ! the gradients of the speed of sound squared for the heat
+                    ! flux.
+
+                    mul = por * (rlv(i, j, k) + rlv(i, j + 1, k))
+                    mue = por * (rev(i, j, k) + rev(i, j + 1, k))
+                    mut = mul + mue
+
+                    gm1 = half * (gamma(i, j, k) + gamma(i, j + 1, k)) - one
+                    factLamHeat = one / (prandtl * gm1)
+                    factTurbHeat = one / (prandtlTurb * gm1)
+
+                    heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                    ! Compute the gradients at the face by averaging the four
+                    ! nodal values.
+
+                    u_x = fourth * (ux(i - 1, j, k - 1) + ux(i, j, k - 1) &
+                                    + ux(i - 1, j, k) + ux(i, j, k))
+                    u_y = fourth * (uy(i - 1, j, k - 1) + uy(i, j, k - 1) &
+                                    + uy(i - 1, j, k) + uy(i, j, k))
+                    u_z = fourth * (uz(i - 1, j, k - 1) + uz(i, j, k - 1) &
+                                    + uz(i - 1, j, k) + uz(i, j, k))
+
+                    v_x = fourth * (vx(i - 1, j, k - 1) + vx(i, j, k - 1) &
+                                    + vx(i - 1, j, k) + vx(i, j, k))
+                    v_y = fourth * (vy(i - 1, j, k - 1) + vy(i, j, k - 1) &
+                                    + vy(i - 1, j, k) + vy(i, j, k))
+                    v_z = fourth * (vz(i - 1, j, k - 1) + vz(i, j, k - 1) &
+                                    + vz(i - 1, j, k) + vz(i, j, k))
+
+                    w_x = fourth * (wx(i - 1, j, k - 1) + wx(i, j, k - 1) &
+                                    + wx(i - 1, j, k) + wx(i, j, k))
+                    w_y = fourth * (wy(i - 1, j, k - 1) + wy(i, j, k - 1) &
+                                    + wy(i - 1, j, k) + wy(i, j, k))
+                    w_z = fourth * (wz(i - 1, j, k - 1) + wz(i, j, k - 1) &
+                                    + wz(i - 1, j, k) + wz(i, j, k))
+
+                    q_x = fourth * (qx(i - 1, j, k - 1) + qx(i, j, k - 1) &
+                                    + qx(i - 1, j, k) + qx(i, j, k))
+                    q_y = fourth * (qy(i - 1, j, k - 1) + qy(i, j, k - 1) &
+                                    + qy(i - 1, j, k) + qy(i, j, k))
+                    q_z = fourth * (qz(i - 1, j, k - 1) + qz(i, j, k - 1) &
+                                    + qz(i - 1, j, k) + qz(i, j, k))
+
+                    ! The gradients in the normal direction are corrected, such
+                    ! that no averaging takes places here.
+                    ! First determine the vector in the direction from the
+                    ! cell center j to cell center j+1.
+
+                    ssx = eighth * (x(i - 1, j + 1, k - 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                    + x(i - 1, j + 1, k, 1) - x(i - 1, j - 1, k, 1) &
+                                    + x(i, j + 1, k - 1, 1) - x(i, j - 1, k - 1, 1) &
+                                    + x(i, j + 1, k, 1) - x(i, j - 1, k, 1))
+                    ssy = eighth * (x(i - 1, j + 1, k - 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                    + x(i - 1, j + 1, k, 2) - x(i - 1, j - 1, k, 2) &
+                                    + x(i, j + 1, k - 1, 2) - x(i, j - 1, k - 1, 2) &
+                                    + x(i, j + 1, k, 2) - x(i, j - 1, k, 2))
+                    ssz = eighth * (x(i - 1, j + 1, k - 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                    + x(i - 1, j + 1, k, 3) - x(i - 1, j - 1, k, 3) &
+                                    + x(i, j + 1, k - 1, 3) - x(i, j - 1, k - 1, 3) &
+                                    + x(i, j + 1, k, 3) - x(i, j - 1, k, 3))
+
+                    ! Determine the length of this vector and create the
+                    ! unit normal.
+
+                    snrm = one / sqrt(ssx * ssx + ssy * ssy + ssz * ssz)
+                    ssx = snrm * ssx
+                    ssy = snrm * ssy
+                    ssz = snrm * ssz
+
+                    ! Correct the gradients.
+
+                    corr = u_x * ssx + u_y * ssy + u_z * ssz &
+                           - (w(i, j + 1, k, ivx) - w(i, j, k, ivx)) * snrm
+                    u_x = u_x - corr * ssx
+                    u_y = u_y - corr * ssy
+                    u_z = u_z - corr * ssz
+
+                    corr = v_x * ssx + v_y * ssy + v_z * ssz &
+                           - (w(i, j + 1, k, ivy) - w(i, j, k, ivy)) * snrm
+                    v_x = v_x - corr * ssx
+                    v_y = v_y - corr * ssy
+                    v_z = v_z - corr * ssz
+
+                    corr = w_x * ssx + w_y * ssy + w_z * ssz &
+                           - (w(i, j + 1, k, ivz) - w(i, j, k, ivz)) * snrm
+                    w_x = w_x - corr * ssx
+                    w_y = w_y - corr * ssy
+                    w_z = w_z - corr * ssz
+
+                    corr = q_x * ssx + q_y * ssy + q_z * ssz &
+                           + (aa(i, j + 1, k) - aa(i, j, k)) * snrm
+                    q_x = q_x - corr * ssx
+                    q_y = q_y - corr * ssy
+                    q_z = q_z - corr * ssz
+
+                    ! Compute the stress tensor and the heat flux vector.
+                    ! We remove the viscosity from the stress tensor (tau)
+                    ! to define tauS since we still need to separate between
+                    ! laminar and turbulent stress for QCR.
+                    ! Therefore, laminar tau = mue*tauS, turbulent
+                    ! tau = mue*tauS, and total tau = mut*tauS.
+
+                    fracDiv = twoThird * (u_x + v_y + w_z)
+
+                    tauxxS = two * u_x - fracDiv
+                    tauyyS = two * v_y - fracDiv
+                    tauzzS = two * w_z - fracDiv
+
+                    tauxyS = u_y + v_x
+                    tauxzS = u_z + w_x
+                    tauyzS = v_z + w_y
+
+                    q_x = heatCoef * q_x
+                    q_y = heatCoef * q_y
+                    q_z = heatCoef * q_z
+
+                    ! Add QCR corrections if necessary
+                    if (useQCR) then
+
+                        ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
+                        !
+                        ! tau_ij,QCR = tau_ij - e_ij
+                        !
+                        ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
+                        !
+                        ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
+                        !
+                        ! We are computing O_ik as follows:
+                        !
+                        ! O_ik = 2*W_ik/den
+                        !
+                        ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
+
+                        ! Compute denominator
+                        den = sqrt(u_x * u_x + u_y * u_y + u_z * u_z + &
+                                   v_x * v_x + v_y * v_y + v_z * v_z + &
+                                   w_x * w_x + w_y * w_y + w_z * w_z)
+
+                        ! Denominator should be limited to avoid division by zero in regions with
+                        ! no gradients
+                        den = max(den, xminn)
+
+                        ! Compute factor that will multiply all tensor components.
+                        ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
+                        ! components as well.
+                        fact = mue * Ccr1 / den
+
+                        ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
+                        ! The diagonals of the vorticity tensor components are always zero
+                        Wxy = u_y - v_x
+                        Wxz = u_z - w_x
+                        Wyz = v_z - w_y
+                        Wyx = -Wxy
+                        Wzx = -Wxz
+                        Wzy = -Wyz
+
+                        ! Compute the extra terms of the Boussinesq relation
+                        exx = fact * (Wxy * tauxyS + Wxz * tauxzS) * two
+                        eyy = fact * (Wyx * tauxyS + Wyz * tauyzS) * two
+                        ezz = fact * (Wzx * tauxzS + Wzy * tauyzS) * two
+
+                        exy = fact * (Wxy * tauyyS + Wxz * tauyzS + &
+                                      Wyx * tauxxS + Wyz * tauxzS)
+                        exz = fact * (Wxy * tauyzS + Wxz * tauzzS + &
+                                      Wzx * tauxxS + Wzy * tauxyS)
+                        eyz = fact * (Wyx * tauxzS + Wyz * tauzzS + &
+                                      Wzx * tauxyS + Wzy * tauyyS)
+
+                        ! Apply the total viscosity to the stress tensor and add extra terms
+                        tauxx = mut * tauxxS - exx
+                        tauyy = mut * tauyyS - eyy
+                        tauzz = mut * tauzzS - ezz
+                        tauxy = mut * tauxyS - exy
+                        tauxz = mut * tauxzS - exz
+                        tauyz = mut * tauyzS - eyz
+
+                    else
+
+                        ! Just apply the total viscosity to the stress tensor
+                        tauxx = mut * tauxxS
+                        tauyy = mut * tauyyS
+                        tauzz = mut * tauzzS
+                        tauxy = mut * tauxyS
+                        tauxz = mut * tauxzS
+                        tauyz = mut * tauyzS
+
+                    end if
+
+                    ! Compute the average velocities for the face. Remember that
+                    ! the velocities are stored and not the momentum.
+
+                    ubar = half * (w(i, j, k, ivx) + w(i, j + 1, k, ivx))
+                    vbar = half * (w(i, j, k, ivy) + w(i, j + 1, k, ivy))
+                    wbar = half * (w(i, j, k, ivz) + w(i, j + 1, k, ivz))
+
+                    ! Compute the viscous fluxes for this j-face.
+
+                    fmx = tauxx * sj(i, j, k, 1) + tauxy * sj(i, j, k, 2) &
+                          + tauxz * sj(i, j, k, 3)
+                    fmy = tauxy * sj(i, j, k, 1) + tauyy * sj(i, j, k, 2) &
+                          + tauyz * sj(i, j, k, 3)
+                    fmz = tauxz * sj(i, j, k, 1) + tauyz * sj(i, j, k, 2) &
+                          + tauzz * sj(i, j, k, 3)
+                    frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * sj(i, j, k, 1) &
+                            + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * sj(i, j, k, 2) &
+                            + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * sj(i, j, k, 3) &
+                            - q_x * sj(i, j, k, 1) - q_y * sj(i, j, k, 2) - q_z * sj(i, j, k, 3)
+
+                    ! Update the residuals of cell j and j+1.
+
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                    fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fmx
+                    fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fmy
+                    fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fmz
+                    fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + frhoE
+
+                    ! Temporarily store the shear stress and heat flux, even
+                    ! if we won't need it. This can still vectorize
+
+                    if (j == 1) then
+                        tmpStore(1, i, k, 1) = tauxx
+                        tmpStore(2, i, k, 1) = tauyy
+                        tmpStore(3, i, k, 1) = tauzz
+                        tmpStore(4, i, k, 1) = tauxy
+                        tmpStore(5, i, k, 1) = tauxz
+                        tmpStore(6, i, k, 1) = tauyz
+
+                        tmpStore(7, i, k, 1) = q_x
+                        tmpStore(8, i, k, 1) = q_y
+                        tmpStore(9, i, k, 1) = q_z
+                    end if
+
+                    if (j == jl) then
+                        tmpStore(1, i, k, 2) = tauxx
+                        tmpStore(2, i, k, 2) = tauyy
+                        tmpStore(3, i, k, 2) = tauzz
+                        tmpStore(4, i, k, 2) = tauxy
+                        tmpStore(5, i, k, 2) = tauxz
+                        tmpStore(6, i, k, 2) = tauyz
+
+                        tmpStore(7, i, k, 2) = q_x
+                        tmpStore(8, i, k, 2) = q_y
+                        tmpStore(9, i, k, 2) = q_z
+                    end if
+                end do
+            end do
+        end do
+        ! Save into the subface if necessary
+        if (storeWall) then
+            origJMin: if (jj - 1 == 1) then
+                do k = 2, kl
+                    do i = 2, il
+                        io = i + ii - 2
+                        ko = k + kk - 2
+
+                        if (viscJminPointer(io, ko) > 0) then
+                            viscSubface(viscJminPointer(io, ko))%tau(io, ko, :) = tmpStore(1:6, i, k, 1)
+                            viscSubface(viscJminPointer(io, ko))%q(io, ko, :) = tmpStore(7:9, i, k, 1)
+                        end if
+                    end do
+                end do
+            end if origJMin
+
+            origJMax: if (jj + ny - 1 == bjl) then
+                do k = 2, kl
+                    do i = 2, il
+                        io = i + ii - 2
+                        ko = k + kk - 2
+                        if (viscJmaxPointer(io, ko) > 0) then
+                            viscSubface(viscJmaxPointer(io, ko))%tau(io, ko, :) = tmpStore(1:6, i, k, 2)
+                            viscSubface(viscJmaxPointer(io, ko))%q(io, ko, :) = tmpStore(7:9, i, k, 2)
+                        end if
+                    end do
+                end do
+            end if origJMax
+        end if
+        !
+        !         Viscous fluxes in the i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 1, il
+                    ! Set the value of the porosity. If not zero, it is set
+                    ! to average the eddy-viscosity and to take the factor
+                    ! rFilv into account.
+
+                    por = half * rFilv
+                    if (porI(i, j, k) == noFlux) por = zero
+
+                    ! Compute the laminar and (if present) the eddy viscosities
+                    ! multiplied the porosity. Compute the factor in front of
+                    ! the gradients of the speed of sound squared for the heat
+                    ! flux.
+
+                    mul = por * (rlv(i, j, k) + rlv(i + 1, j, k))
+                    mue = por * (rev(i, j, k) + rev(i + 1, j, k))
+                    mut = mul + mue
+
+                    gm1 = half * (gamma(i, j, k) + gamma(i + 1, j, k)) - one
+                    factLamHeat = one / (prandtl * gm1)
+                    factTurbHeat = one / (prandtlTurb * gm1)
+
+                    heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                    ! Compute the gradients at the face by averaging the four
+                    ! nodal values.
+
+                    u_x = fourth * (ux(i, j - 1, k - 1) + ux(i, j, k - 1) &
+                                    + ux(i, j - 1, k) + ux(i, j, k))
+                    u_y = fourth * (uy(i, j - 1, k - 1) + uy(i, j, k - 1) &
+                                    + uy(i, j - 1, k) + uy(i, j, k))
+                    u_z = fourth * (uz(i, j - 1, k - 1) + uz(i, j, k - 1) &
+                                    + uz(i, j - 1, k) + uz(i, j, k))
+
+                    v_x = fourth * (vx(i, j - 1, k - 1) + vx(i, j, k - 1) &
+                                    + vx(i, j - 1, k) + vx(i, j, k))
+                    v_y = fourth * (vy(i, j - 1, k - 1) + vy(i, j, k - 1) &
+                                    + vy(i, j - 1, k) + vy(i, j, k))
+                    v_z = fourth * (vz(i, j - 1, k - 1) + vz(i, j, k - 1) &
+                                    + vz(i, j - 1, k) + vz(i, j, k))
+
+                    w_x = fourth * (wx(i, j - 1, k - 1) + wx(i, j, k - 1) &
+                                    + wx(i, j - 1, k) + wx(i, j, k))
+                    w_y = fourth * (wy(i, j - 1, k - 1) + wy(i, j, k - 1) &
+                                    + wy(i, j - 1, k) + wy(i, j, k))
+                    w_z = fourth * (wz(i, j - 1, k - 1) + wz(i, j, k - 1) &
+                                    + wz(i, j - 1, k) + wz(i, j, k))
+
+                    q_x = fourth * (qx(i, j - 1, k - 1) + qx(i, j, k - 1) &
+                                    + qx(i, j - 1, k) + qx(i, j, k))
+                    q_y = fourth * (qy(i, j - 1, k - 1) + qy(i, j, k - 1) &
+                                    + qy(i, j - 1, k) + qy(i, j, k))
+                    q_z = fourth * (qz(i, j - 1, k - 1) + qz(i, j, k - 1) &
+                                    + qz(i, j - 1, k) + qz(i, j, k))
+
+                    ! The gradients in the normal direction are corrected, such
+                    ! that no averaging takes places here.
+                    ! First determine the vector in the direction from the
+                    ! cell center i to cell center i+1.
+
+                    ssx = eighth * (x(i + 1, j - 1, k - 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                    + x(i + 1, j - 1, k, 1) - x(i - 1, j - 1, k, 1) &
+                                    + x(i + 1, j, k - 1, 1) - x(i - 1, j, k - 1, 1) &
+                                    + x(i + 1, j, k, 1) - x(i - 1, j, k, 1))
+                    ssy = eighth * (x(i + 1, j - 1, k - 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                    + x(i + 1, j - 1, k, 2) - x(i - 1, j - 1, k, 2) &
+                                    + x(i + 1, j, k - 1, 2) - x(i - 1, j, k - 1, 2) &
+                                    + x(i + 1, j, k, 2) - x(i - 1, j, k, 2))
+                    ssz = eighth * (x(i + 1, j - 1, k - 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                    + x(i + 1, j - 1, k, 3) - x(i - 1, j - 1, k, 3) &
+                                    + x(i + 1, j, k - 1, 3) - x(i - 1, j, k - 1, 3) &
+                                    + x(i + 1, j, k, 3) - x(i - 1, j, k, 3))
+
+                    ! Determine the length of this vector and create the
+                    ! unit normal.
+
+                    snrm = one / sqrt(ssx * ssx + ssy * ssy + ssz * ssz)
+                    ssx = snrm * ssx
+                    ssy = snrm * ssy
+                    ssz = snrm * ssz
+
+                    ! Correct the gradients.
+
+                    corr = u_x * ssx + u_y * ssy + u_z * ssz &
+                           - (w(i + 1, j, k, ivx) - w(i, j, k, ivx)) * snrm
+                    u_x = u_x - corr * ssx
+                    u_y = u_y - corr * ssy
+                    u_z = u_z - corr * ssz
+
+                    corr = v_x * ssx + v_y * ssy + v_z * ssz &
+                           - (w(i + 1, j, k, ivy) - w(i, j, k, ivy)) * snrm
+                    v_x = v_x - corr * ssx
+                    v_y = v_y - corr * ssy
+                    v_z = v_z - corr * ssz
+
+                    corr = w_x * ssx + w_y * ssy + w_z * ssz &
+                           - (w(i + 1, j, k, ivz) - w(i, j, k, ivz)) * snrm
+                    w_x = w_x - corr * ssx
+                    w_y = w_y - corr * ssy
+                    w_z = w_z - corr * ssz
+
+                    corr = q_x * ssx + q_y * ssy + q_z * ssz &
+                           + (aa(i + 1, j, k) - aa(i, j, k)) * snrm
+                    q_x = q_x - corr * ssx
+                    q_y = q_y - corr * ssy
+                    q_z = q_z - corr * ssz
+
+                    ! Compute the stress tensor and the heat flux vector.
+                    ! We remove the viscosity from the stress tensor (tau)
+                    ! to define tauS since we still need to separate between
+                    ! laminar and turbulent stress for QCR.
+                    ! Therefore, laminar tau = mue*tauS, turbulent
+                    ! tau = mue*tauS, and total tau = mut*tauS.
+
+                    fracDiv = twoThird * (u_x + v_y + w_z)
+
+                    tauxxS = two * u_x - fracDiv
+                    tauyyS = two * v_y - fracDiv
+                    tauzzS = two * w_z - fracDiv
+
+                    tauxyS = u_y + v_x
+                    tauxzS = u_z + w_x
+                    tauyzS = v_z + w_y
+
+                    q_x = heatCoef * q_x
+                    q_y = heatCoef * q_y
+                    q_z = heatCoef * q_z
+
+                    ! Add QCR corrections if necessary
+                    if (useQCR) then
+
+                        ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
+                        !
+                        ! tau_ij,QCR = tau_ij - e_ij
+                        !
+                        ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
+                        !
+                        ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
+                        !
+                        ! We are computing O_ik as follows:
+                        !
+                        ! O_ik = 2*W_ik/den
+                        !
+                        ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
+
+                        ! Compute denominator
+                        den = sqrt(u_x * u_x + u_y * u_y + u_z * u_z + &
+                                   v_x * v_x + v_y * v_y + v_z * v_z + &
+                                   w_x * w_x + w_y * w_y + w_z * w_z)
+
+                        ! Denominator should be limited to avoid division by zero in regions with
+                        ! no gradients
+                        den = max(den, xminn)
+
+                        ! Compute factor that will multiply all tensor components.
+                        ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
+                        ! components as well.
+                        fact = mue * Ccr1 / den
+
+                        ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
+                        ! The diagonals of the vorticity tensor components are always zero
+                        Wxy = u_y - v_x
+                        Wxz = u_z - w_x
+                        Wyz = v_z - w_y
+                        Wyx = -Wxy
+                        Wzx = -Wxz
+                        Wzy = -Wyz
+
+                        ! Compute the extra terms of the Boussinesq relation
+                        exx = fact * (Wxy * tauxyS + Wxz * tauxzS) * two
+                        eyy = fact * (Wyx * tauxyS + Wyz * tauyzS) * two
+                        ezz = fact * (Wzx * tauxzS + Wzy * tauyzS) * two
+
+                        exy = fact * (Wxy * tauyyS + Wxz * tauyzS + &
+                                      Wyx * tauxxS + Wyz * tauxzS)
+                        exz = fact * (Wxy * tauyzS + Wxz * tauzzS + &
+                                      Wzx * tauxxS + Wzy * tauxyS)
+                        eyz = fact * (Wyx * tauxzS + Wyz * tauzzS + &
+                                      Wzx * tauxyS + Wzy * tauyyS)
+
+                        ! Apply the total viscosity to the stress tensor and add extra terms
+                        tauxx = mut * tauxxS - exx
+                        tauyy = mut * tauyyS - eyy
+                        tauzz = mut * tauzzS - ezz
+                        tauxy = mut * tauxyS - exy
+                        tauxz = mut * tauxzS - exz
+                        tauyz = mut * tauyzS - eyz
+
+                    else
+
+                        ! Just apply the total viscosity to the stress tensor
+                        tauxx = mut * tauxxS
+                        tauyy = mut * tauyyS
+                        tauzz = mut * tauzzS
+                        tauxy = mut * tauxyS
+                        tauxz = mut * tauxzS
+                        tauyz = mut * tauyzS
+
+                    end if
+
+                    ! Compute the average velocities for the face. Remember that
+                    ! the velocities are stored and not the momentum.
+
+                    ubar = half * (w(i, j, k, ivx) + w(i + 1, j, k, ivx))
+                    vbar = half * (w(i, j, k, ivy) + w(i + 1, j, k, ivy))
+                    wbar = half * (w(i, j, k, ivz) + w(i + 1, j, k, ivz))
+
+                    ! Compute the viscous fluxes for this i-face.
+
+                    fmx = tauxx * si(i, j, k, 1) + tauxy * si(i, j, k, 2) &
+                          + tauxz * si(i, j, k, 3)
+                    fmy = tauxy * si(i, j, k, 1) + tauyy * si(i, j, k, 2) &
+                          + tauyz * si(i, j, k, 3)
+                    fmz = tauxz * si(i, j, k, 1) + tauyz * si(i, j, k, 2) &
+                          + tauzz * si(i, j, k, 3)
+                    frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * si(i, j, k, 1) &
+                            + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * si(i, j, k, 2) &
+                            + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * si(i, j, k, 3) &
+                            - q_x * si(i, j, k, 1) - q_y * si(i, j, k, 2) - q_z * si(i, j, k, 3)
+
+                    ! Update the residuals of cell i and i+1.
+                    fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fmx
+                    fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fmy
+                    fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fmz
+                    fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + frhoE
+
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                    ! Temporarily store the shear stress and heat flux, even
+                    ! if we won't need it. This can still vectorize
+
+                    if (i == 1) then
+                        tmpStore(1, j, k, 1) = tauxx
+                        tmpStore(2, j, k, 1) = tauyy
+                        tmpStore(3, j, k, 1) = tauzz
+                        tmpStore(4, j, k, 1) = tauxy
+                        tmpStore(5, j, k, 1) = tauxz
+                        tmpStore(6, j, k, 1) = tauyz
+
+                        tmpStore(7, j, k, 1) = q_x
+                        tmpStore(8, j, k, 1) = q_y
+                        tmpStore(9, j, k, 1) = q_z
+                    end if
+
+                    if (i == il) then
+                        tmpStore(1, j, k, 2) = tauxx
+                        tmpStore(2, j, k, 2) = tauyy
+                        tmpStore(3, j, k, 2) = tauzz
+                        tmpStore(4, j, k, 2) = tauxy
+                        tmpStore(5, j, k, 2) = tauxz
+                        tmpStore(6, j, k, 2) = tauyz
+
+                        tmpStore(7, j, k, 2) = q_x
+                        tmpStore(8, j, k, 2) = q_y
+                        tmpStore(9, j, k, 2) = q_z
+                    end if
+                end do
+            end do
+        end do
+        ! Save into the subface if necessary
+        if (storeWall) then
+            origIMin: if (ii - 1 == 1) then
+                do k = 2, kl
+                    do j = 2, jl
+                        jo = j + jj - 2
+                        ko = k + kk - 2
+
+                        if (viscIminPointer(jo, ko) > 0) then
+                            viscSubface(viscIminPointer(jo, ko))%tau(jo, ko, :) = tmpStore(1:6, j, k, 1)
+                            viscSubface(viscIminPointer(jo, ko))%q(jo, ko, :) = tmpStore(7:9, j, k, 1)
+                        end if
+                    end do
+                end do
+            end if origIMin
+
+            origIMax: if (ii + nx - 1 == bil) then
+                do k = 2, kl
+                    do j = 2, jl
+                        jo = j + jj - 2
+                        ko = k + kk - 2
+                        if (viscImaxPointer(jo, ko) > 0) then
+                            viscSubface(viscImaxPointer(jo, ko))%tau(jo, ko, :) = tmpStore(1:6, j, k, 2)
+                            viscSubface(viscImaxPointer(jo, ko))%q(jo, ko, :) = tmpStore(7:9, j, k, 2)
+                        end if
+                    end do
+                end do
+            end if origIMax
+        end if
+    end subroutine viscousFlux
+
+    subroutine viscousFluxApprox
+
+        use constants
+        use flowVarRefState
+        use inputPhysics
+        use iteration
+        implicit none
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: twoThird = two * third
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
+        integer(kind=intType) :: ii, jj, kk
+
+        real(kind=realType) :: rFilv, por, mul, mue, mut, heatCoef
+        real(kind=realType) :: gm1, factLamHeat, factTurbHeat
+        real(kind=realType) :: u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z
+        real(kind=realType) :: q_x, q_y, q_z, ubar, vbar, wbar
+        real(kind=realType) :: corr, ssx, ssy, ssz, ss, fracDiv
+        real(kind=realType) :: tauxx, tauyy, tauzz
+        real(kind=realType) :: tauxy, tauxz, tauyz
+        real(kind=realType) :: fmx, fmy, fmz, frhoE
+        real(kind=realType) :: dd
+        logical :: correctForK
+
+        mue = zero
+        rFilv = rFil
+
+        ! Viscous fluxes in the I-direction
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 1, il
+
+                    ! Compute the vector from the center of cell i to cell i+1
+                    ssx = eighth * (x(i + 1, j - 1, k - 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                    + x(i + 1, j - 1, k, 1) - x(i - 1, j - 1, k, 1) &
+                                    + x(i + 1, j, k - 1, 1) - x(i - 1, j, k - 1, 1) &
+                                    + x(i + 1, j, k, 1) - x(i - 1, j, k, 1))
+                    ssy = eighth * (x(i + 1, j - 1, k - 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                    + x(i + 1, j - 1, k, 2) - x(i - 1, j - 1, k, 2) &
+                                    + x(i + 1, j, k - 1, 2) - x(i - 1, j, k - 1, 2) &
+                                    + x(i + 1, j, k, 2) - x(i - 1, j, k, 2))
+                    ssz = eighth * (x(i + 1, j - 1, k - 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                    + x(i + 1, j - 1, k, 3) - x(i - 1, j - 1, k, 3) &
+                                    + x(i + 1, j, k - 1, 3) - x(i - 1, j, k - 1, 3) &
+                                    + x(i + 1, j, k, 3) - x(i - 1, j, k, 3))
+
+                    ! And determine one/ length of vector squared
+                    ss = one / (ssx * ssx + ssy * ssy + ssz * ssz)
+                    ssx = ss * ssx
+                    ssy = ss * ssy
+                    ssz = ss * ssz
+
+                    ! Now compute each gradient
+                    dd = w(i + 1, j, k, ivx) - w(i, j, k, ivx)
+                    u_x = dd * ssx
+                    u_y = dd * ssy
+                    u_z = dd * ssz
+
+                    dd = w(i + 1, j, k, ivy) - w(i, j, k, ivy)
+                    v_x = dd * ssx
+                    v_y = dd * ssy
+                    v_z = dd * ssz
+
+                    dd = w(i + 1, j, k, ivz) - w(i, j, k, ivz)
+                    w_x = dd * ssx
+                    w_y = dd * ssy
+                    w_z = dd * ssz
+
+                    dd = aa(i + 1, j, k) - aa(i, j, k)
+                    q_x = -dd * ssx
+                    q_y = -dd * ssy
+                    q_z = -dd * ssz
+
+                    por = half * rFilv
+                    if (porI(i, j, k) == noFlux) por = zero
+
+                    ! Compute the laminar and (if present) the eddy viscosities
+                    ! multiplied by the porosity. Compute the factor in front of
+                    ! the gradients of the speed of sound squared for the heat
+                    ! flux.
+
+                    mul = por * (rlv(i, j, k) + rlv(i + 1, j, k))
+                    mue = por * (rev(i, j, k) + rev(i + 1, j, k))
+                    mut = mul + mue
+
+                    gm1 = half * (gamma(i, j, k) + gamma(i + 1, j, k)) - one
+                    factLamHeat = one / (prandtl * gm1)
+                    factTurbHeat = one / (prandtlTurb * gm1)
+
+                    heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                    ! Compute the stress tensor and the heat flux vector.
+
+                    fracDiv = twoThird * (u_x + v_y + w_z)
+
+                    tauxx = mut * (two * u_x - fracDiv)
+                    tauyy = mut * (two * v_y - fracDiv)
+                    tauzz = mut * (two * w_z - fracDiv)
+
+                    tauxy = mut * (u_y + v_x)
+                    tauxz = mut * (u_z + w_x)
+                    tauyz = mut * (v_z + w_y)
+
+                    q_x = heatCoef * q_x
+                    q_y = heatCoef * q_y
+                    q_z = heatCoef * q_z
+
+                    ! Compute the average velocities for the face. Remember that
+                    ! the velocities are stored and not the momentum.
+
+                    ubar = half * (w(i, j, k, ivx) + w(i + 1, j, k, ivx))
+                    vbar = half * (w(i, j, k, ivy) + w(i + 1, j, k, ivy))
+                    wbar = half * (w(i, j, k, ivz) + w(i + 1, j, k, ivz))
+
+                    ! Compute the viscous fluxes for this i-face.
+
+                    fmx = tauxx * si(i, j, k, 1) + tauxy * si(i, j, k, 2) + tauxz * si(i, j, k, 3)
+                    fmy = tauxy * si(i, j, k, 1) + tauyy * si(i, j, k, 2) + tauyz * si(i, j, k, 3)
+                    fmz = tauxz * si(i, j, k, 1) + tauyz * si(i, j, k, 2) + tauzz * si(i, j, k, 3)
+                    frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * si(i, j, k, 1) &
+                            + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * si(i, j, k, 2) &
+                            + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * si(i, j, k, 3) &
+                            - q_x * si(i, j, k, 1) - q_y * si(i, j, k, 2) - q_z * si(i, j, k, 3)
+
+                    ! Update the residuals of cell i and i+1.
+                    fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fmx
+                    fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fmy
+                    fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fmz
+                    fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + frhoE
+
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                end do
+            end do
+        end do
+
+        ! Viscous fluxes in the J-direction
+
+        do k = 2, kl
+            do j = 1, jl
+                do i = 2, il
+
+                    ! Compute the vector from the center of cell j to cell j+1
+                    ssx = eighth * (x(i - 1, j + 1, k - 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                    + x(i - 1, j + 1, k, 1) - x(i - 1, j - 1, k, 1) &
+                                    + x(i, j + 1, k - 1, 1) - x(i, j - 1, k - 1, 1) &
+                                    + x(i, j + 1, k, 1) - x(i, j - 1, k, 1))
+                    ssy = eighth * (x(i - 1, j + 1, k - 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                    + x(i - 1, j + 1, k, 2) - x(i - 1, j - 1, k, 2) &
+                                    + x(i, j + 1, k - 1, 2) - x(i, j - 1, k - 1, 2) &
+                                    + x(i, j + 1, k, 2) - x(i, j - 1, k, 2))
+                    ssz = eighth * (x(i - 1, j + 1, k - 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                    + x(i - 1, j + 1, k, 3) - x(i - 1, j - 1, k, 3) &
+                                    + x(i, j + 1, k - 1, 3) - x(i, j - 1, k - 1, 3) &
+                                    + x(i, j + 1, k, 3) - x(i, j - 1, k, 3))
+
+                    ! And determine one/ length of vector squared
+                    ss = one / (ssx * ssx + ssy * ssy + ssz * ssz)
+                    ssx = ss * ssx
+                    ssy = ss * ssy
+                    ssz = ss * ssz
+
+                    ! Now compute each gradient
+                    dd = w(i, j + 1, k, ivx) - w(i, j, k, ivx)
+                    u_x = dd * ssx
+                    u_y = dd * ssy
+                    u_z = dd * ssz
+
+                    dd = w(i, j + 1, k, ivy) - w(i, j, k, ivy)
+                    v_x = dd * ssx
+                    v_y = dd * ssy
+                    v_z = dd * ssz
+
+                    dd = w(i, j + 1, k, ivz) - w(i, j, k, ivz)
+                    w_x = dd * ssx
+                    w_y = dd * ssy
+                    w_z = dd * ssz
+
+                    dd = aa(i, j + 1, k) - aa(i, j, k)
+                    q_x = -dd * ssx
+                    q_y = -dd * ssy
+                    q_z = -dd * ssz
+
+                    por = half * rFilv
+                    if (porJ(i, j, k) == noFlux) por = zero
+
+                    ! Compute the laminar and (if present) the eddy viscosities
+                    ! multiplied by the porosity. Compute the factor in front of
+                    ! the gradients of the speed of sound squared for the heat
+                    ! flux.
+
+                    mul = por * (rlv(i, j, k) + rlv(i, j + 1, k))
+                    mue = por * (rev(i, j, k) + rev(i, j + 1, k))
+                    mut = mul + mue
+
+                    gm1 = half * (gamma(i, j, k) + gamma(i, j + 1, k)) - one
+                    factLamHeat = one / (prandtl * gm1)
+                    factTurbHeat = one / (prandtlTurb * gm1)
+
+                    heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                    ! Compute the stress tensor and the heat flux vector.
+
+                    fracDiv = twoThird * (u_x + v_y + w_z)
+
+                    tauxx = mut * (two * u_x - fracDiv)
+                    tauyy = mut * (two * v_y - fracDiv)
+                    tauzz = mut * (two * w_z - fracDiv)
+
+                    tauxy = mut * (u_y + v_x)
+                    tauxz = mut * (u_z + w_x)
+                    tauyz = mut * (v_z + w_y)
+
+                    q_x = heatCoef * q_x
+                    q_y = heatCoef * q_y
+                    q_z = heatCoef * q_z
+
+                    ! Compute the average velocities for the face. Remember that
+                    ! the velocities are stored and not the momentum.
+
+                    ubar = half * (w(i, j, k, ivx) + w(i, j + 1, k, ivx))
+                    vbar = half * (w(i, j, k, ivy) + w(i, j + 1, k, ivy))
+                    wbar = half * (w(i, j, k, ivz) + w(i, j + 1, k, ivz))
+
+                    ! Compute the viscous fluxes for this j-face.
+
+                    fmx = tauxx * sj(i, j, k, 1) + tauxy * sj(i, j, k, 2) + tauxz * sj(i, j, k, 3)
+                    fmy = tauxy * sj(i, j, k, 1) + tauyy * sj(i, j, k, 2) + tauyz * sj(i, j, k, 3)
+                    fmz = tauxz * sj(i, j, k, 1) + tauyz * sj(i, j, k, 2) + tauzz * sj(i, j, k, 3)
+                    frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * sj(i, j, k, 1) &
+                            + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * sj(i, j, k, 2) &
+                            + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * sj(i, j, k, 3) &
+                            - q_x * sj(i, j, k, 1) - q_y * sj(i, j, k, 2) - q_z * sj(i, j, k, 3)
+
+                    ! Update the residuals of cell j and j+1.
+
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                    fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fmx
+                    fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fmy
+                    fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fmz
+                    fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + frhoE
+
+                end do
+            end do
+        end do
+
+        ! Viscous fluxes in the K-direction
+
+        do k = 1, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the vector from the center of cell k to cell k+1
+                    ssx = eighth * (x(i - 1, j - 1, k + 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                    + x(i - 1, j, k + 1, 1) - x(i - 1, j, k - 1, 1) &
+                                    + x(i, j - 1, k + 1, 1) - x(i, j - 1, k - 1, 1) &
+                                    + x(i, j, k + 1, 1) - x(i, j, k - 1, 1))
+                    ssy = eighth * (x(i - 1, j - 1, k + 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                    + x(i - 1, j, k + 1, 2) - x(i - 1, j, k - 1, 2) &
+                                    + x(i, j - 1, k + 1, 2) - x(i, j - 1, k - 1, 2) &
+                                    + x(i, j, k + 1, 2) - x(i, j, k - 1, 2))
+                    ssz = eighth * (x(i - 1, j - 1, k + 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                    + x(i - 1, j, k + 1, 3) - x(i - 1, j, k - 1, 3) &
+                                    + x(i, j - 1, k + 1, 3) - x(i, j - 1, k - 1, 3) &
+                                    + x(i, j, k + 1, 3) - x(i, j, k - 1, 3))
+                    ! And determine one/ length of vector squared
+                    ss = one / (ssx * ssx + ssy * ssy + ssz * ssz)
+                    ssx = ss * ssx
+                    ssy = ss * ssy
+                    ssz = ss * ssz
+
+                    ! Now compute each gradient
+                    dd = w(i, j, k + 1, ivx) - w(i, j, k, ivx)
+                    u_x = dd * ssx
+                    u_y = dd * ssy
+                    u_z = dd * ssz
+
+                    dd = w(i, j, k + 1, ivy) - w(i, j, k, ivy)
+                    v_x = dd * ssx
+                    v_y = dd * ssy
+                    v_z = dd * ssz
+
+                    dd = w(i, j, k + 1, ivz) - w(i, j, k, ivz)
+                    w_x = dd * ssx
+                    w_y = dd * ssy
+                    w_z = dd * ssz
+
+                    dd = aa(i, j, k + 1) - aa(i, j, k)
+                    q_x = -dd * ssx
+                    q_y = -dd * ssy
+                    q_z = -dd * ssz
+
+                    por = half * rFilv
+                    if (porK(i, j, k) == noFlux) por = zero
+
+                    ! Compute the laminar and (if present) the eddy viscosities
+                    ! multiplied by the porosity. Compute the factor in front of
+                    ! the gradients of the speed of sound squared for the heat
+                    ! flux.
+
+                    mul = por * (rlv(i, j, k) + rlv(i, j, k + 1))
+                    mue = por * (rev(i, j, k) + rev(i, j, k + 1))
+                    mut = mul + mue
+
+                    gm1 = half * (gamma(i, j, k) + gamma(i, j, k + 1)) - one
+                    factLamHeat = one / (prandtl * gm1)
+                    factTurbHeat = one / (prandtlTurb * gm1)
+
+                    heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                    ! Compute the stress tensor and the heat flux vector.
+
+                    fracDiv = twoThird * (u_x + v_y + w_z)
+
+                    tauxx = mut * (two * u_x - fracDiv)
+                    tauyy = mut * (two * v_y - fracDiv)
+                    tauzz = mut * (two * w_z - fracDiv)
+
+                    tauxy = mut * (u_y + v_x)
+                    tauxz = mut * (u_z + w_x)
+                    tauyz = mut * (v_z + w_y)
+
+                    q_x = heatCoef * q_x
+                    q_y = heatCoef * q_y
+                    q_z = heatCoef * q_z
+
+                    ! Compute the average velocities for the face. Remember that
+                    ! the velocities are stored and not the momentum.
+
+                    ubar = half * (w(i, j, k, ivx) + w(i, j, k + 1, ivx))
+                    vbar = half * (w(i, j, k, ivy) + w(i, j, k + 1, ivy))
+                    wbar = half * (w(i, j, k, ivz) + w(i, j, k + 1, ivz))
+
+                    ! Compute the viscous fluxes for this j-face.
+
+                    fmx = tauxx * sk(i, j, k, 1) + tauxy * sk(i, j, k, 2) + tauxz * sk(i, j, k, 3)
+                    fmy = tauxy * sk(i, j, k, 1) + tauyy * sk(i, j, k, 2) + tauyz * sk(i, j, k, 3)
+                    fmz = tauxz * sk(i, j, k, 1) + tauyz * sk(i, j, k, 2) + tauzz * sk(i, j, k, 3)
+                    frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * sk(i, j, k, 1) &
+                            + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * sk(i, j, k, 2) &
+                            + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * sk(i, j, k, 3) &
+                            - q_x * sk(i, j, k, 1) - q_y * sk(i, j, k, 2) - q_z * sk(i, j, k, 3)
+
+                    ! Update the residuals of cell j and j+1.
+
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                    fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fmx
+                    fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fmy
+                    fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fmz
+                    fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + frhoE
+
+                end do
+            end do
+        end do
+    end subroutine viscousFluxApprox
+
+    subroutine sumDwandFw
+
+        ! ---------------------------------------------
+        !                 Sum dw and fw/res scale
+        ! ---------------------------------------------
+        use constants
+        use flowVarRefState, only: nw, nwf, nt1, nt2
+        use inputIteration, only: turbResScale
+        implicit none
+
+        ! Variables for final summing
+        integer(kind=intType) :: nTurb, i, j, k, l
+        real(kind=realType) :: oVol, rBlank
+
+        nTurb = nt2 - nt1 + 1
+        do l = 1, nwf
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        rblank = max(real(iblank(i, j, k), realType), zero)
+                        dw(i, j, k, l) = (dw(i, j, k, l) + fw(i, j, k, l)) * rBlank
+                    end do
+                end do
+            end do
+        end do
+    end subroutine sumDwandFw
+
+    subroutine resScale
+
+        use constants
+        use flowVarRefState, only: nwf, nt1, nt2
+        use inputIteration, only: turbResScale
+        implicit none
+
+        ! Local Variables
+        integer(kind=intType) :: i, j, k, ii, nTurb
+        real(kind=realType) :: ovol
+
+        ! Divide through by the reference volume
+        nTurb = nt2 - nt1 + 1
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    oVol = one / volRef(i, j, k)
+                    dw(i, j, k, 1:nwf) = dw(i, j, k, 1:nwf) * ovol
+                    dw(i, j, k, nt1:nt2) = dw(i, j, k, nt1:nt2) * ovol * turbResScale(1:nTurb)
+                end do
+            end do
+        end do
+
+    end subroutine resScale
 
 end module blockette
diff --git a/src/adjoint/ADFirstAidKit/tapenade_dp_test.f90 b/src/adjoint/ADFirstAidKit/tapenade_dp_test.f90
index 039989029..d2e9a9c4a 100644
--- a/src/adjoint/ADFirstAidKit/tapenade_dp_test.f90
+++ b/src/adjoint/ADFirstAidKit/tapenade_dp_test.f90
@@ -1,179 +1,179 @@
 MODULE TAPENADE_DP_TEST
 
-  IMPLICIT NONE
-  REAL*8 fwdarray(1000), revarray(1000)
-  CHARACTER*20 dppoints(0:1000)
-  INTEGER :: indexinarray = 1
-  INTEGER :: maxindexinarray = 0
-  REAL*8 :: dotp=0.0
-  INTEGER :: unitcallcount = 0
-  INTEGER :: dptriggercount = 1
-  REAL*8 :: dpdelta
+    IMPLICIT NONE
+    REAL * 8 fwdarray(1000), revarray(1000)
+    CHARACTER * 20 dppoints(0:1000)
+    INTEGER :: indexinarray = 1
+    INTEGER :: maxindexinarray = 0
+    REAL*8 :: dotp = 0.0
+    INTEGER :: unitcallcount = 0
+    INTEGER :: dptriggercount = 1
+    REAL*8 :: dpdelta
 
 CONTAINS
 
-  subroutine DPFWDUPDATECOUNT(unit)
-    CHARACTER unit*(*)
-    unitcallcount = unitcallcount + 1
-  end subroutine DPFWDUPDATECOUNT
-
-  subroutine DPREVUPDATECOUNT(unit)
-    CHARACTER unit*(*)
-    unitcallcount = unitcallcount - 1
-  end subroutine DPREVUPDATECOUNT
-
-  LOGICAL FUNCTION DPFWDTESTCOUNT(unit, pos)
-    CHARACTER unit*(*)
-    INTEGER pos
-    DPFWDTESTCOUNT = (unitcallcount.EQ.dptriggercount)
-  end FUNCTION DPFWDTESTCOUNT
-
-  LOGICAL FUNCTION DPREVTESTCOUNT(unit, pos)
-    CHARACTER unit*(*)
-    INTEGER pos
-    DPREVTESTCOUNT = (unitcallcount.EQ.dptriggercount)
-  end FUNCTION DPREVTESTCOUNT
-
-  subroutine DPFWDREAL8(xd)
-    REAL*8 xd
-    call DPPUSHREAL8(xd)
-    dotp = dotp + xd*xd
-  end subroutine DPFWDREAL8
-
-  subroutine DPFWDREAL8ARRAY(xd,sz)
-    INTEGER sz,i
-    REAL*8 xd(sz)
-    call DPPUSHREAL8ARRAY(xd,sz)
-    do i=1,sz
-       dotp = dotp + xd(i)*xd(i)
-    enddo
-  end subroutine DPFWDREAL8ARRAY
-
-  subroutine DPFWDREAL4(xd)
-    REAL xd
-    call DPPUSHREAL4(xd)
-    dotp = dotp + xd*xd
-  end subroutine DPFWDREAL4
-
-  subroutine DPFWDREAL4ARRAY(xd,sz)
-    INTEGER sz,i
-    REAL xd(sz)
-    call DPPUSHREAL4ARRAY(xd,sz)
-    do i=1,sz
-       dotp = dotp + xd(i)*xd(i)
-    enddo
-  end subroutine DPFWDREAL4ARRAY
-
-  subroutine DPCUTFWDREAL8(xd)
-    REAL*8 xd
-    xd = 0.0
-    call DPPUSHREAL8(xd)
-  end subroutine DPCUTFWDREAL8
-
-  subroutine DPCUTFWDREAL8ARRAY(xd,sz)
-    INTEGER sz,i
-    REAL*8 xd(sz)
-    do i=1,sz
-       xd(i) = 0.0
-    enddo
-    call DPPUSHREAL8ARRAY(xd,sz)
-  end subroutine DPCUTFWDREAL8ARRAY
-
-  subroutine DPREVREAL8(xb)
-    REAL*8 xb, xd
-    call DPPOPREAL8(xd)
-    dotp = dotp + xd*xb
-    xb = xd
-  end subroutine DPREVREAL8
-
-  subroutine DPREVREAL8ARRAY(xb,sz)
-    INTEGER sz,i
-    REAL*8 xb(sz), xd(100000)
-    call DPPOPREAL8ARRAY(xd,sz)
-    do i=1,sz
-       dotp = dotp + xd(i)*xb(i)
-       xb(i) = xd(i)
-    enddo
-  end subroutine DPREVREAL8ARRAY
-
-  subroutine DPREVREAL4(xb)
-    REAL xb, xd
-    call DPPOPREAL4(xd)
-    dotp = dotp + xd*xb
-    xb = xd
-  end subroutine DPREVREAL4
-
-  subroutine DPREVREAL4ARRAY(xb,sz)
-    INTEGER sz,i
-    REAL xb(sz), xd(100000)
-    call DPPOPREAL4ARRAY(xd,sz)
-    do i=1,sz
-       dotp = dotp + xd(i)*xb(i)
-       xb(i) = xd(i)
-    enddo
-  end subroutine DPREVREAL4ARRAY
-
-  subroutine DPFWDINITDISPLAY(point)
-    CHARACTER point*(*)
-    dotp = 0.0
-    dppoints(0)=point
-    print *, point,' call#',unitcallcount
-  end subroutine DPFWDINITDISPLAY
-
-  subroutine DPREVINITDISPLAY(point)
-    CHARACTER point*(*)
-    dotp = 0.0
-    print *, point,' call#',unitcallcount
-  end subroutine DPREVINITDISPLAY
-
-  subroutine DPFWDDISPLAY(point)
-    CHARACTER point*(*)
-    print *, 'DotProduct = ', dotp
-    dppoints(indexinarray)=point
-    fwdarray(indexinarray)=dotp
-    maxindexinarray = indexinarray
-    indexinarray = indexinarray+1
-    dotp = 0.0
-    print *, point,' call#',unitcallcount
-  end subroutine DPFWDDISPLAY
-
-  subroutine DPREVDISPLAY(point)
-    CHARACTER point*(*)
-    print *, 'DotProduct = ', dotp
-    indexinarray = indexinarray-1
-    revarray(indexinarray)=dotp
-    dotp = 0.0
-    print *, point,' call#',unitcallcount
-  end subroutine DPREVDISPLAY
-
-  subroutine DPPRINTSUMMARY()
-    INTEGER i
-    print *, 'Dot Product test summary:'
-    print *,dppoints(0)
-    do i=1,maxindexinarray,1
-       if (.not.(fwdarray(i).eq.revarray(i))) then
-          print *,fwdarray(i)
-          print *,revarray(i)
-       else
-          print *,fwdarray(i),'OK!'
-       endif
-       print *,dppoints(i)
-    enddo
-  end subroutine DPPRINTSUMMARY
-
-  subroutine DPPRINTPARTIALSUM(point)
-    CHARACTER point*(*)
-    print *, 'Partial sum at ',point,': ',dotp
-  end subroutine DPPRINTPARTIALSUM
-
-  subroutine DPINITDELTA()
-    dpdelta = 0.0_8
-  end subroutine DPINITDELTA
-
-  subroutine DPPRINTDELTA(point)
-    CHARACTER point*(*)
-    print *, 'Delta at ',point,': ',dotp-dpdelta
-    dpdelta = dotp
-  end subroutine DPPRINTDELTA
+    subroutine DPFWDUPDATECOUNT(unit)
+        CHARACTER unit * (*)
+        unitcallcount = unitcallcount + 1
+    end subroutine DPFWDUPDATECOUNT
+
+    subroutine DPREVUPDATECOUNT(unit)
+        CHARACTER unit * (*)
+        unitcallcount = unitcallcount - 1
+    end subroutine DPREVUPDATECOUNT
+
+    LOGICAL FUNCTION DPFWDTESTCOUNT(unit, pos)
+        CHARACTER unit * (*)
+        INTEGER pos
+        DPFWDTESTCOUNT = (unitcallcount .EQ. dptriggercount)
+    end FUNCTION DPFWDTESTCOUNT
+
+    LOGICAL FUNCTION DPREVTESTCOUNT(unit, pos)
+        CHARACTER unit * (*)
+        INTEGER pos
+        DPREVTESTCOUNT = (unitcallcount .EQ. dptriggercount)
+    end FUNCTION DPREVTESTCOUNT
+
+    subroutine DPFWDREAL8(xd)
+        REAL * 8 xd
+        call DPPUSHREAL8(xd)
+        dotp = dotp + xd * xd
+    end subroutine DPFWDREAL8
+
+    subroutine DPFWDREAL8ARRAY(xd, sz)
+        INTEGER sz, i
+        REAL * 8 xd(sz)
+        call DPPUSHREAL8ARRAY(xd, sz)
+        do i = 1, sz
+            dotp = dotp + xd(i) * xd(i)
+        end do
+    end subroutine DPFWDREAL8ARRAY
+
+    subroutine DPFWDREAL4(xd)
+        REAL xd
+        call DPPUSHREAL4(xd)
+        dotp = dotp + xd * xd
+    end subroutine DPFWDREAL4
+
+    subroutine DPFWDREAL4ARRAY(xd, sz)
+        INTEGER sz, i
+        REAL xd(sz)
+        call DPPUSHREAL4ARRAY(xd, sz)
+        do i = 1, sz
+            dotp = dotp + xd(i) * xd(i)
+        end do
+    end subroutine DPFWDREAL4ARRAY
+
+    subroutine DPCUTFWDREAL8(xd)
+        REAL * 8 xd
+        xd = 0.0
+        call DPPUSHREAL8(xd)
+    end subroutine DPCUTFWDREAL8
+
+    subroutine DPCUTFWDREAL8ARRAY(xd, sz)
+        INTEGER sz, i
+        REAL * 8 xd(sz)
+        do i = 1, sz
+            xd(i) = 0.0
+        end do
+        call DPPUSHREAL8ARRAY(xd, sz)
+    end subroutine DPCUTFWDREAL8ARRAY
+
+    subroutine DPREVREAL8(xb)
+        REAL * 8 xb, xd
+        call DPPOPREAL8(xd)
+        dotp = dotp + xd * xb
+        xb = xd
+    end subroutine DPREVREAL8
+
+    subroutine DPREVREAL8ARRAY(xb, sz)
+        INTEGER sz, i
+        REAL * 8 xb(sz), xd(100000)
+        call DPPOPREAL8ARRAY(xd, sz)
+        do i = 1, sz
+            dotp = dotp + xd(i) * xb(i)
+            xb(i) = xd(i)
+        end do
+    end subroutine DPREVREAL8ARRAY
+
+    subroutine DPREVREAL4(xb)
+        REAL xb, xd
+        call DPPOPREAL4(xd)
+        dotp = dotp + xd * xb
+        xb = xd
+    end subroutine DPREVREAL4
+
+    subroutine DPREVREAL4ARRAY(xb, sz)
+        INTEGER sz, i
+        REAL xb(sz), xd(100000)
+        call DPPOPREAL4ARRAY(xd, sz)
+        do i = 1, sz
+            dotp = dotp + xd(i) * xb(i)
+            xb(i) = xd(i)
+        end do
+    end subroutine DPREVREAL4ARRAY
+
+    subroutine DPFWDINITDISPLAY(point)
+        CHARACTER point * (*)
+        dotp = 0.0
+        dppoints(0) = point
+        print *, point, ' call#', unitcallcount
+    end subroutine DPFWDINITDISPLAY
+
+    subroutine DPREVINITDISPLAY(point)
+        CHARACTER point * (*)
+        dotp = 0.0
+        print *, point, ' call#', unitcallcount
+    end subroutine DPREVINITDISPLAY
+
+    subroutine DPFWDDISPLAY(point)
+        CHARACTER point * (*)
+        print *, 'DotProduct = ', dotp
+        dppoints(indexinarray) = point
+        fwdarray(indexinarray) = dotp
+        maxindexinarray = indexinarray
+        indexinarray = indexinarray + 1
+        dotp = 0.0
+        print *, point, ' call#', unitcallcount
+    end subroutine DPFWDDISPLAY
+
+    subroutine DPREVDISPLAY(point)
+        CHARACTER point * (*)
+        print *, 'DotProduct = ', dotp
+        indexinarray = indexinarray - 1
+        revarray(indexinarray) = dotp
+        dotp = 0.0
+        print *, point, ' call#', unitcallcount
+    end subroutine DPREVDISPLAY
+
+    subroutine DPPRINTSUMMARY()
+        INTEGER i
+        print *, 'Dot Product test summary:'
+        print *, dppoints(0)
+        do i = 1, maxindexinarray, 1
+            if (.not. (fwdarray(i) .eq. revarray(i))) then
+                print *, fwdarray(i)
+                print *, revarray(i)
+            else
+                print *, fwdarray(i), 'OK!'
+            end if
+            print *, dppoints(i)
+        end do
+    end subroutine DPPRINTSUMMARY
+
+    subroutine DPPRINTPARTIALSUM(point)
+        CHARACTER point * (*)
+        print *, 'Partial sum at ', point, ': ', dotp
+    end subroutine DPPRINTPARTIALSUM
+
+    subroutine DPINITDELTA()
+        dpdelta = 0.0_8
+    end subroutine DPINITDELTA
+
+    subroutine DPPRINTDELTA(point)
+        CHARACTER point * (*)
+        print *, 'Delta at ', point, ': ', dotp - dpdelta
+        dpdelta = dotp
+    end subroutine DPPRINTDELTA
 
 END MODULE TAPENADE_DP_TEST
diff --git a/src/adjoint/adjointAPI.F90 b/src/adjoint/adjointAPI.F90
index 24888542b..307b901d2 100644
--- a/src/adjoint/adjointAPI.F90
+++ b/src/adjoint/adjointAPI.F90
@@ -1,1257 +1,1249 @@
 module adjointAPI
 
-  use constants, only: realType, intType, alwaysRealType, adflow_real, mpi_max, mpi_sum, mpi_double_precision, &
-   mpi_integer, mpi_double_complex, mpi_wtime, maxStringLen, one, zero, NSEquations, RANSEquations
+    use constants, only: realType, intType, alwaysRealType, adflow_real, mpi_max, mpi_sum, mpi_double_precision, &
+                         mpi_integer, mpi_double_complex, mpi_wtime, maxStringLen, one, zero, NSEquations, RANSEquations
 
-  character(len=maxStringLen) :: timeFormat = "(A, 1X, F8.2)"
-  character(len=maxStringLen) :: exitFormat = "(1X, A, 1X, I5, 1X, A)"
+    character(len=maxStringLen) :: timeFormat = "(A, 1X, F8.2)"
+    character(len=maxStringLen) :: exitFormat = "(1X, A, 1X, I5, 1X, A)"
 
 contains
 #ifndef USE_COMPLEX
-  subroutine computeMatrixFreeProductFwd(xvdot, extradot, wdot, bcDataValuesdot, useSpatial, &
-       useState, famLists, bcDataNames, bcDataValues, bcDataFamLists, bcVarsEmpty, dwdot, funcsDot, fDot, &
-       costSize, fSize, nTime)
-
-    ! This is the main matrix-free forward mode computation
-    use adjointvars
-    use blockPointers, only : nDom
-    use communication, only : adflow_comm_world
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputPhysics, only :pointRefd, alphad, betad, equations, machCoefd, &
-         machd, machGridd, rgasdimd
-    use iteration, only : currentLevel, groundLevel
-    use flowVarRefState, only : pInfDimd, rhoInfDimd, TinfDimd
-    use adjointUtils, only : allocDerivativeValues, zeroADSeeds
-    use masterRoutines, only : master_d
-    implicit none
-
-    ! Input Variables
-    real(kind=realType), dimension(:), intent(in) :: xvdot
-    real(kind=realType), dimension(:), intent(in) :: extradot
-    real(kind=realType), dimension(:), intent(in) :: wdot
-    logical, intent(in) :: useSpatial, useState
-    integer(kind=intType), dimension(:, :) :: famLists
-    integer(kind=intType) :: costSize, fSize, nTime
-
-    character, dimension(:, :), intent(in) :: bcDataNames
-    real(kind=realType), dimension(:), intent(in) :: bcDataValues, bcDataValuesDot
-    integer(kind=intType), dimension(:, :) :: bcDataFamLists
-    logical, intent(in) :: BCVarsEmpty
-
-    ! Ouput Variables
-    real(kind=realType), dimension(size(wdot)), intent(out) :: dwDot
-    real(kind=realType), dimension(costSize, size(famLists,1)), intent(out) :: funcsDot
-    real(kind=realType), dimension(3, fSize, nTime), intent(out) :: fDot
-
-    ! Working Variables
-    integer(kind=intType) :: nn,sps, level
-    real(kind=realType), dimension(costSize, size(famLists,1)) :: funcs
-
-    ! Need to trick the residual evalution to use coupled (mean flow and
-    ! turbulent) together.
-    level = 1
-    currentLevel = level
-    groundLevel = level
-
-    ! Allocate the memory we need for derivatives if not done so
-    ! already. Note this isn't deallocated until the adflow is
-    ! destroyed.
-    if (.not. derivVarsAllocated) then
-       call allocDerivativeValues(level)
-    end if
-
-    ! Zero all AD seesd.
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call zeroADSeeds(nn,level, sps)
-       end do
-    end do
-
-    ! Set the extra seeds now do the extra ones. Note that we are assuming the
-    ! machNumber used for the coefficients follows the Mach number,
-    ! not the grid mach number.
-    alphad = extraDot(iAlpha)
-    betad = extraDot(iBeta)
-    machd = extraDot(iMach)
-    machCoefd = extraDot(iMach)
-    machGridd = extraDot(iMachGrid)
-    PinfDimd = extraDot(iPressure)
-    rhoinfDimd = extraDot(iDensity)
-    tinfdimd = extraDot(iTemperature)
-    pointrefd(1) = extraDot(iPointRefX)
-    pointrefd(2) = extraDot(iPointRefY)
-    pointrefd(3) = extraDot(iPointRefZ)
-    rgasdimd = zero
-
-    ! Run the super-dee-duper master forward rotuine
-   if (bcVarsEmpty) then
-      call master_d(wDot, xVDot, fDot, dwDot, famLists, funcs, funcsDot)
-   else
-      call master_d(wDot, xVDot, fDot, dwDot, &
-           famLists, funcs, funcsDot, bcDataNames, bcDataValues, bcDataValuesdot, bcDataFamLists)
-    end if
-
-  end subroutine computeMatrixFreeProductFwd
-
-  subroutine computeMatrixFreeProductBwd(dwbar, funcsBar, fbar, useSpatial, useState, xvbar, &
-       extrabar, wbar, spatialSize, extraSize, stateSize, famLists, &
-       bcDataNames, bcDataValues, bcDataValuesbar, bcDataFamLists, BCVarsEmpty)
-    use communication, only : adflow_comm_world
-    use blockPointers, only : nDom, dwd, il, jl, kl
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputPhysics, only : equations
-    use iteration, only : currentLevel, groundLevel
-    use flowVarRefState, only : nw, nwf
-    use inputAdjoint, only : frozenTurbulence
-    use ADjointPETSc, only : x_like, psi_like3
-    use adjointvars, only : derivVarsAllocated
-    use utils, only : setPointers_d, EChk
-    use adjointUtils, only : allocDerivativeValues, zeroADSeeds
-    use masterRoutines, only : master_b
-    implicit none
-
-    ! Input Variables
-    integer(kind=intType), intent(in) :: stateSize, extraSize, spatialSize
-    real(kind=realType), dimension(:), intent(in) :: dwbar
-    real(kind=realType), dimension(:, :), intent(in) :: funcsBar
-    real(kind=realType), dimension(:, :, :) :: fBar
-    logical, intent(in) :: useSpatial, useState
-    integer(kind=intType), intent(in) :: famLists(:, :)
-    character, dimension(:, :), intent(in) :: bcDataNames
-    real(kind=realType), dimension(:), intent(in) :: bcDataValues
-    integer(kind=intType), dimension(:, :) :: bcDataFamLists
-    logical, intent(in) :: BCVarsEmpty
-
-    ! Ouput Variables
-    real(kind=realType), dimension(stateSize), intent(out) :: wbar
-    real(kind=realType), dimension(extraSize), intent(out) :: extrabar
-    real(kind=realType), dimension(spatialSize), intent(out) :: xvbar
-    real(kind=realType), dimension(size(bcDataValues)), intent(out) :: bcDataValuesbar
-
-    ! Working variables
-    integer(kind=intType) :: nn, sps, i, j, k, l, ii, level, nState, mm
-    logical :: resetToRans
-    real(kind=realType), dimension(size(funcsBar,1), size(funcsBar, 2)) :: funcs
-
-    ! Setup number of state variable based on turbulence assumption
-    if ( frozenTurbulence ) then
-       nState = nwf
-    else
-       nState = nw
-    endif
-
-    ! Need to trick the residual evalution to use coupled (mean flow and
-    ! turbulent) together.
-    level = 1
-    currentLevel = level
-    groundLevel = level
-
-    ! Determine if we want to use frozenTurbulent Adjoint
-    resetToRANS = .False.
-    if (frozenTurbulence .and. equations == RANSEquations) then
-       equations = NSEquations
-       resetToRANS = .True.
-    end if
-
-    ! Allocate the memory for reverse
-    if (.not. derivVarsAllocated) then
-       call allocDerivativeValues(level)
-    end if
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-         call zeroADSeeds(nn,level, sps)
-       end do
-    end do
-
-    if (bcVarsEmpty) then
-       call master_b(wbar, xvbar, extraBar, fBar, dwbar, nState, famLists, &
-            funcs, funcsBar)
-    else
-       call master_b(wbar, xvbar, extraBar, fBar, dwbar, nState, famLists, &
-            funcs, funcsBar, bcDataNames, bcDataValues, bcDataValuesbar, bcDataFamLists)
-    end if
-
-    ! Reset the correct equation parameters if we were useing the frozen
-    ! Turbulent
-    if (resetToRANS) then
-       equations = RANSEquations
-    end if
-
-  end subroutine computeMatrixFreeProductBwd
-
-  subroutine computeMatrixFreeProductBwdFast(dwbar, wbar, stateSize)
-    ! This is the "Fast" ie. State variable only version of the reverse
-    ! mode computation. It is intended to compute dRdw^T product
-    ! ONLY. The main purpose is for fast matrix-vector products for the
-    ! actual adjoint solve.
-    use inputPhysics, only : equations
-    use inputAdjoint, only : frozenTurbulence
-    use flowVarRefState, only : nw, nwf
-    use iteration, only : currentLevel, groundLevel
-    use masterRoutines, only : master_state_b, master_b
-    use adjointvars, only : derivVarsAllocated
-    use blockpointers
-    use inputtimespectral
-    use adjointutils
-    implicit none
-
-    ! Input Variables
-    integer(kind=intType), intent(in) :: stateSize
-    real(kind=realType), dimension(stateSize), intent(in) :: dwbar
-
-    ! Ouput Variables
-    real(kind=realType), dimension(stateSize), intent(out) :: wbar
-
-
-    real(kind=realType), dimension(:), allocatable :: extrabar
-    real(kind=realType), dimension(:), allocatable :: xvbar
-    real(kind=realType), dimension(:, :, :), allocatable :: fBar
-
-
-    ! Working variables
-    integer(kind=intType) :: nState, level, nn, sps
-    logical :: resetToRans
-
-    ! Setup number of state variable based on turbulence assumption
-    if ( frozenTurbulence ) then
-       nState = nwf
-    else
-       nState = nw
-    endif
-
-    ! Assembling matrix on coarser levels is not entirely implemented yet.
-    level = 1
-    currentLevel = level
-    groundLevel = level
-
-    ! Determine if we want to use frozenTurbulent Adjoint
-    resetToRANS = .False.
-    if (frozenTurbulence .and. equations == RANSEquations) then
-       equations = NSEquations
-       resetToRANS = .True.
-    end if
-
-    ! Note: The calling routine is responsible for ensuring that the
-    ! derivative values are allocated AND ZEROED! This routine makes use
-    ! of the fact that only wbar needs to be zeroed since all other
-    ! required seeds are zeroed in the individual fast routines. This is
-    ! slightly unsafe, but it necessary for speed.
-
-    ! Allocate the memory for reverse
-    if (.not. derivVarsAllocated) then
-      call allocDerivativeValues(level)
-    end if
-     do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call zeroADSeeds(nn,level, sps)
-       end do
-    end do
-    ! allocate(xvbar(1000000), extraBar(100), fBar(3, 1466, 1))
-    ! extraBar = zero
-    ! xvbar = zero
-    ! fbar = zero
-    call master_state_b(wBar, dwBar, nState)
-    !call master_b(wbar, xvbar, extraBar, fBar, dwBar, nstate)
-
-    ! Reset the correct equation parameters if we are using the frozen
-    ! Turbulent
-    if (resetToRANS) then
-       equations = RANSEquations
-    end if
-  end subroutine computeMatrixFreeProductBwdFast
+    subroutine computeMatrixFreeProductFwd(xvdot, extradot, wdot, bcDataValuesdot, useSpatial, &
+                    useState, famLists, bcDataNames, bcDataValues, bcDataFamLists, bcVarsEmpty, dwdot, funcsDot, fDot, &
+                                           costSize, fSize, nTime)
+
+        ! This is the main matrix-free forward mode computation
+        use adjointvars
+        use blockPointers, only: nDom
+        use communication, only: adflow_comm_world
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputPhysics, only: pointRefd, alphad, betad, equations, machCoefd, &
+                                machd, machGridd, rgasdimd
+        use iteration, only: currentLevel, groundLevel
+        use flowVarRefState, only: pInfDimd, rhoInfDimd, TinfDimd
+        use adjointUtils, only: allocDerivativeValues, zeroADSeeds
+        use masterRoutines, only: master_d
+        implicit none
+
+        ! Input Variables
+        real(kind=realType), dimension(:), intent(in) :: xvdot
+        real(kind=realType), dimension(:), intent(in) :: extradot
+        real(kind=realType), dimension(:), intent(in) :: wdot
+        logical, intent(in) :: useSpatial, useState
+        integer(kind=intType), dimension(:, :) :: famLists
+        integer(kind=intType) :: costSize, fSize, nTime
+
+        character, dimension(:, :), intent(in) :: bcDataNames
+        real(kind=realType), dimension(:), intent(in) :: bcDataValues, bcDataValuesDot
+        integer(kind=intType), dimension(:, :) :: bcDataFamLists
+        logical, intent(in) :: BCVarsEmpty
+
+        ! Ouput Variables
+        real(kind=realType), dimension(size(wdot)), intent(out) :: dwDot
+        real(kind=realType), dimension(costSize, size(famLists, 1)), intent(out) :: funcsDot
+        real(kind=realType), dimension(3, fSize, nTime), intent(out) :: fDot
+
+        ! Working Variables
+        integer(kind=intType) :: nn, sps, level
+        real(kind=realType), dimension(costSize, size(famLists, 1)) :: funcs
+
+        ! Need to trick the residual evalution to use coupled (mean flow and
+        ! turbulent) together.
+        level = 1
+        currentLevel = level
+        groundLevel = level
+
+        ! Allocate the memory we need for derivatives if not done so
+        ! already. Note this isn't deallocated until the adflow is
+        ! destroyed.
+        if (.not. derivVarsAllocated) then
+            call allocDerivativeValues(level)
+        end if
+
+        ! Zero all AD seesd.
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call zeroADSeeds(nn, level, sps)
+            end do
+        end do
+
+        ! Set the extra seeds now do the extra ones. Note that we are assuming the
+        ! machNumber used for the coefficients follows the Mach number,
+        ! not the grid mach number.
+        alphad = extraDot(iAlpha)
+        betad = extraDot(iBeta)
+        machd = extraDot(iMach)
+        machCoefd = extraDot(iMach)
+        machGridd = extraDot(iMachGrid)
+        PinfDimd = extraDot(iPressure)
+        rhoinfDimd = extraDot(iDensity)
+        tinfdimd = extraDot(iTemperature)
+        pointrefd(1) = extraDot(iPointRefX)
+        pointrefd(2) = extraDot(iPointRefY)
+        pointrefd(3) = extraDot(iPointRefZ)
+        rgasdimd = zero
+
+        ! Run the super-dee-duper master forward rotuine
+        if (bcVarsEmpty) then
+            call master_d(wDot, xVDot, fDot, dwDot, famLists, funcs, funcsDot)
+        else
+            call master_d(wDot, xVDot, fDot, dwDot, &
+                          famLists, funcs, funcsDot, bcDataNames, bcDataValues, bcDataValuesdot, bcDataFamLists)
+        end if
+
+    end subroutine computeMatrixFreeProductFwd
+
+    subroutine computeMatrixFreeProductBwd(dwbar, funcsBar, fbar, useSpatial, useState, xvbar, &
+                                           extrabar, wbar, spatialSize, extraSize, stateSize, famLists, &
+                                           bcDataNames, bcDataValues, bcDataValuesbar, bcDataFamLists, BCVarsEmpty)
+        use communication, only: adflow_comm_world
+        use blockPointers, only: nDom, dwd, il, jl, kl
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputPhysics, only: equations
+        use iteration, only: currentLevel, groundLevel
+        use flowVarRefState, only: nw, nwf
+        use inputAdjoint, only: frozenTurbulence
+        use ADjointPETSc, only: x_like, psi_like3
+        use adjointvars, only: derivVarsAllocated
+        use utils, only: setPointers_d, EChk
+        use adjointUtils, only: allocDerivativeValues, zeroADSeeds
+        use masterRoutines, only: master_b
+        implicit none
+
+        ! Input Variables
+        integer(kind=intType), intent(in) :: stateSize, extraSize, spatialSize
+        real(kind=realType), dimension(:), intent(in) :: dwbar
+        real(kind=realType), dimension(:, :), intent(in) :: funcsBar
+        real(kind=realType), dimension(:, :, :) :: fBar
+        logical, intent(in) :: useSpatial, useState
+        integer(kind=intType), intent(in) :: famLists(:, :)
+        character, dimension(:, :), intent(in) :: bcDataNames
+        real(kind=realType), dimension(:), intent(in) :: bcDataValues
+        integer(kind=intType), dimension(:, :) :: bcDataFamLists
+        logical, intent(in) :: BCVarsEmpty
+
+        ! Ouput Variables
+        real(kind=realType), dimension(stateSize), intent(out) :: wbar
+        real(kind=realType), dimension(extraSize), intent(out) :: extrabar
+        real(kind=realType), dimension(spatialSize), intent(out) :: xvbar
+        real(kind=realType), dimension(size(bcDataValues)), intent(out) :: bcDataValuesbar
+
+        ! Working variables
+        integer(kind=intType) :: nn, sps, i, j, k, l, ii, level, nState, mm
+        logical :: resetToRans
+        real(kind=realType), dimension(size(funcsBar, 1), size(funcsBar, 2)) :: funcs
+
+        ! Setup number of state variable based on turbulence assumption
+        if (frozenTurbulence) then
+            nState = nwf
+        else
+            nState = nw
+        end if
+
+        ! Need to trick the residual evalution to use coupled (mean flow and
+        ! turbulent) together.
+        level = 1
+        currentLevel = level
+        groundLevel = level
+
+        ! Determine if we want to use frozenTurbulent Adjoint
+        resetToRANS = .False.
+        if (frozenTurbulence .and. equations == RANSEquations) then
+            equations = NSEquations
+            resetToRANS = .True.
+        end if
+
+        ! Allocate the memory for reverse
+        if (.not. derivVarsAllocated) then
+            call allocDerivativeValues(level)
+        end if
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call zeroADSeeds(nn, level, sps)
+            end do
+        end do
+
+        if (bcVarsEmpty) then
+            call master_b(wbar, xvbar, extraBar, fBar, dwbar, nState, famLists, &
+                          funcs, funcsBar)
+        else
+            call master_b(wbar, xvbar, extraBar, fBar, dwbar, nState, famLists, &
+                          funcs, funcsBar, bcDataNames, bcDataValues, bcDataValuesbar, bcDataFamLists)
+        end if
+
+        ! Reset the correct equation parameters if we were useing the frozen
+        ! Turbulent
+        if (resetToRANS) then
+            equations = RANSEquations
+        end if
+
+    end subroutine computeMatrixFreeProductBwd
+
+    subroutine computeMatrixFreeProductBwdFast(dwbar, wbar, stateSize)
+        ! This is the "Fast" ie. State variable only version of the reverse
+        ! mode computation. It is intended to compute dRdw^T product
+        ! ONLY. The main purpose is for fast matrix-vector products for the
+        ! actual adjoint solve.
+        use inputPhysics, only: equations
+        use inputAdjoint, only: frozenTurbulence
+        use flowVarRefState, only: nw, nwf
+        use iteration, only: currentLevel, groundLevel
+        use masterRoutines, only: master_state_b, master_b
+        use adjointvars, only: derivVarsAllocated
+        use blockpointers
+        use inputtimespectral
+        use adjointutils
+        implicit none
+
+        ! Input Variables
+        integer(kind=intType), intent(in) :: stateSize
+        real(kind=realType), dimension(stateSize), intent(in) :: dwbar
+
+        ! Ouput Variables
+        real(kind=realType), dimension(stateSize), intent(out) :: wbar
+
+        real(kind=realType), dimension(:), allocatable :: extrabar
+        real(kind=realType), dimension(:), allocatable :: xvbar
+        real(kind=realType), dimension(:, :, :), allocatable :: fBar
+
+        ! Working variables
+        integer(kind=intType) :: nState, level, nn, sps
+        logical :: resetToRans
+
+        ! Setup number of state variable based on turbulence assumption
+        if (frozenTurbulence) then
+            nState = nwf
+        else
+            nState = nw
+        end if
+
+        ! Assembling matrix on coarser levels is not entirely implemented yet.
+        level = 1
+        currentLevel = level
+        groundLevel = level
+
+        ! Determine if we want to use frozenTurbulent Adjoint
+        resetToRANS = .False.
+        if (frozenTurbulence .and. equations == RANSEquations) then
+            equations = NSEquations
+            resetToRANS = .True.
+        end if
+
+        ! Note: The calling routine is responsible for ensuring that the
+        ! derivative values are allocated AND ZEROED! This routine makes use
+        ! of the fact that only wbar needs to be zeroed since all other
+        ! required seeds are zeroed in the individual fast routines. This is
+        ! slightly unsafe, but it necessary for speed.
+
+        ! Allocate the memory for reverse
+        if (.not. derivVarsAllocated) then
+            call allocDerivativeValues(level)
+        end if
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call zeroADSeeds(nn, level, sps)
+            end do
+        end do
+        ! allocate(xvbar(1000000), extraBar(100), fBar(3, 1466, 1))
+        ! extraBar = zero
+        ! xvbar = zero
+        ! fbar = zero
+        call master_state_b(wBar, dwBar, nState)
+        !call master_b(wbar, xvbar, extraBar, fBar, dwBar, nstate)
+
+        ! Reset the correct equation parameters if we are using the frozen
+        ! Turbulent
+        if (resetToRANS) then
+            equations = RANSEquations
+        end if
+    end subroutine computeMatrixFreeProductBwdFast
 
+#endif
+    ! if def for complex
+
+    subroutine solveAdjointForRHS(inVec, outVec, nDOF, relativeTolerance)
+
+        use ADJointPETSc
+        use inputADjoint
+        use adjointvars
+        use killsignals
+        use blockPointers
+        use inputTimeSpectral
+        use utils, only: EChk
+        use adjointUtils, only: allocDerivativeValues, zeroADSeeds
+        implicit none
+
+        ! Input Variables
+        integer(kind=intType), intent(in) :: nDOF
+        real(kind=realType), dimension(ndof), intent(in) :: inVec
+        real(kind=realType), dimension(ndof), intent(out) :: outVec
+        real(kind=realType), intent(in) :: relativeTolerance
+        integer(kind=intTYpe) :: adjointConvergedReason
+        ! Working variables
+        integer(kind=intType) :: ierr, nn, sps
+        real(kind=realType) :: val
+
+#ifndef USE_COMPLEX
+
+        ! Place the arrays
+        call VecPlaceArray(psi_like1, inVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecPlaceArray(psi_like2, outVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Zero out initial solution
+        call VecSet(psi_like2, zero, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Reset normType to default in case the globalPrecon function has been called
+        ! by the user.-1 is KSP_NORM_DEFAULT, which isn't in the fortran
+        ! header for some reason
+        call KSPSetNormType(adjointKSP, -1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set desired realtive tolerance
+        call KSPSetTolerances(adjointKSP, relativeTolerance, adjAbsTol, adjDivTol, &
+                              adjMaxIter, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Make sure the derivative memory is allocated and zeroed.
+        if (.not. derivVarsAllocated) then
+            call allocDerivativeValues(1_intType)
+        end if
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call zeroADSeeds(nn, 1_intType, sps)
+            end do
+        end do
+
+        ! Solve (remember this is actually a transpose solve)
+        call KSPSolve(adjointKSP, psi_like1, psi_like2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call KSPGetConvergedReason(adjointKSP, adjointConvergedReason, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (adjointConvergedReason == KSP_CONVERGED_RTOL .or. &
+            adjointConvergedReason == KSP_CONVERGED_ATOL .or. &
+            adjointConvergedReason == KSP_CONVERGED_HAPPY_BREAKDOWN) then
+            adjointFailed = .False.
+        else
+            adjointFailed = .True.
+        end if
+
+        ! Rest arrays
+        call VecResetArray(psi_like1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecResetArray(psi_like2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
 #endif
-  ! if def for complex
-
-  subroutine solveAdjointForRHS(inVec, outVec, nDOF, relativeTolerance)
-
-    use ADJointPETSc
-    use inputADjoint
-    use adjointvars
-    use killsignals
-    use blockPointers
-    use inputTimeSpectral
-    use utils, only : EChk
-    use adjointUtils, only : allocDerivativeValues, zeroADSeeds
-    implicit none
-
-    ! Input Variables
-    integer(kind=intType), intent(in) :: nDOF
-    real(kind=realType), dimension(ndof), intent(in) :: inVec
-    real(kind=realType), dimension(ndof), intent(out) :: outVec
-    real(kind=realType), intent(in) :: relativeTolerance
-    integer(kind=intTYpe) :: adjointConvergedReason
-    ! Working variables
-    integer(kind=intType) :: ierr, nn, sps
-    real(kind=realType) :: val
+
+    end subroutine solveAdjointForRHS
+
+    subroutine solveDirectForRHS(inVec, outVec, nDOF, relativeTolerance)
+
+        use ADJointPETSc
+        use inputADjoint
+        use adjointVars
+        use killsignals
+        use blockPointers
+        use inputTimeSpectral
+        use utils, only: EChk
+        use adjointUtils, only: allocDerivativeValues, zeroADSeeds
+        implicit none
+
+        ! Input Variables
+        integer(kind=intType), intent(in) :: nDOF
+        real(kind=realType), dimension(ndof), intent(in) :: inVec
+        real(kind=realType), dimension(ndof), intent(out) :: outVec
+        real(kind=realType), intent(in) :: relativeTolerance
+        integer(kind=intTYpe) :: adjointConvergedReason
+        ! Working variables
+        integer(kind=intType) :: ierr, nn, sps
 
 #ifndef USE_COMPLEX
 
-    ! Place the arrays
-    call VecPlaceArray(psi_like1, inVec, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call VecPlaceArray(psi_like2, outVec, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Zero out initial solution
-    call VecSet(psi_like2, zero, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Reset normType to default in case the globalPrecon function has been called
-    ! by the user.-1 is KSP_NORM_DEFAULT, which isn't in the fortran
-    ! header for some reason
-    call KSPSetNormType(adjointKSP, -1, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set desired realtive tolerance
-    call KSPSetTolerances(adjointKSP, relativeTolerance, adjAbsTol, adjDivTol, &
-         adjMaxIter, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Make sure the derivative memory is allocated and zeroed.
-    if (.not. derivVarsAllocated) then
-       call allocDerivativeValues(1_intType)
-    end if
-
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call zeroADSeeds(nn, 1_intType, sps)
-       end do
-    end do
-
-    ! Solve (remember this is actually a transpose solve)
-    call KSPSolve(adjointKSP, psi_like1, psi_like2, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call KSPGetConvergedReason(adjointKSP, adjointConvergedReason, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    if (adjointConvergedReason ==  KSP_CONVERGED_RTOL .or. &
-         adjointConvergedReason ==  KSP_CONVERGED_ATOL .or. &
-         adjointConvergedReason ==  KSP_CONVERGED_HAPPY_BREAKDOWN) then
-       adjointFailed = .False.
-    else
-       adjointFailed = .True.
-    end if
-
-    ! Rest arrays
-    call VecResetArray(psi_like1,  ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call VecResetArray(psi_like2,  ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        ! Place the arrays
+        call VecPlaceArray(psi_like1, inVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecPlaceArray(psi_like2, outVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Zero out initial solution
+        call VecSet(psi_like2, zero, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Reset normType to default in case the globalPrecon function has been called
+        ! by the user.-1 is KSP_NORM_DEFAULT, which isn't in the fortran
+        ! header for some reason
+        call KSPSetNormType(adjointKSP, -1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set desired realtive tolerance
+        call KSPSetTolerances(adjointKSP, relativeTolerance, adjAbsTol, adjDivTol, &
+                              adjMaxIter, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Make sure the derivative memory is allocated and zeroed.
+        if (.not. derivVarsAllocated) then
+            call allocDerivativeValues(1_intType)
+        end if
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call zeroADSeeds(nn, 1_intType, sps)
+            end do
+        end do
+
+        ! Solve (this is the transpose solve of a transpose matrix, so it's direct)
+        call KSPSolveTranspose(adjointKSP, psi_like1, psi_like2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call KSPGetConvergedReason(adjointKSP, adjointConvergedReason, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (adjointConvergedReason == KSP_CONVERGED_RTOL .or. &
+            adjointConvergedReason == KSP_CONVERGED_ATOL .or. &
+            adjointConvergedReason == KSP_CONVERGED_HAPPY_BREAKDOWN) then
+            adjointFailed = .False.
+        else
+            adjointFailed = .True.
+        end if
+
+        ! Rest arrays
+        call VecResetArray(psi_like1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecResetArray(psi_like2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
 #endif
 
-  end subroutine solveAdjointForRHS
+    end subroutine solveDirectForRHS
 
-  subroutine solveDirectForRHS(inVec, outVec, nDOF, relativeTolerance)
+    subroutine saveADjointMatrix(fileName)
 
+        use ADjointPETSc, only: drdwt
+        use communication, only: adflow_comm_world
+        use utils, only: EChk
+#include <petsc/finclude/petsc.h>
+        use petsc
+        implicit none
 
-    use ADJointPETSc
-    use inputADjoint
-    use adjointVars
-    use killsignals
-    use blockPointers
-    use inputTimeSpectral
-    use utils, only : EChk
-    use adjointUtils, only : allocDerivativeValues, zeroADSeeds
-    implicit none
+        ! Input params
+        character(len=*), intent(in) :: fileName
 
-    ! Input Variables
-    integer(kind=intType), intent(in) :: nDOF
-    real(kind=realType), dimension(ndof), intent(in) :: inVec
-    real(kind=realType), dimension(ndof), intent(out) :: outVec
-    real(kind=realType), intent(in) :: relativeTolerance
-    integer(kind=intTYpe) :: adjointConvergedReason
-    ! Working variables
-    integer(kind=intType) :: ierr, nn, sps
+        ! Working parameters
+        PetscViewer binViewer
+        integer(kind=intType) :: ierr
 
-#ifndef USE_COMPLEX
+        call PetscViewerBinaryOpen(adflow_comm_world, fileName, FILE_MODE_WRITE, binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Place the arrays
-    call VecPlaceArray(psi_like1, inVec, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call VecPlaceArray(psi_like2, outVec, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Zero out initial solution
-    call VecSet(psi_like2, zero, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Reset normType to default in case the globalPrecon function has been called
-    ! by the user.-1 is KSP_NORM_DEFAULT, which isn't in the fortran
-    ! header for some reason
-    call KSPSetNormType(adjointKSP, -1, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set desired realtive tolerance
-    call KSPSetTolerances(adjointKSP, relativeTolerance, adjAbsTol, adjDivTol, &
-         adjMaxIter, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Make sure the derivative memory is allocated and zeroed.
-    if (.not. derivVarsAllocated) then
-       call allocDerivativeValues(1_intType)
-    end if
-
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call zeroADSeeds(nn, 1_intType, sps)
-       end do
-    end do
-
-    ! Solve (this is the transpose solve of a transpose matrix, so it's direct)
-    call KSPSolveTranspose(adjointKSP, psi_like1, psi_like2, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call KSPGetConvergedReason(adjointKSP, adjointConvergedReason, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    if (adjointConvergedReason ==  KSP_CONVERGED_RTOL .or. &
-         adjointConvergedReason ==  KSP_CONVERGED_ATOL .or. &
-         adjointConvergedReason ==  KSP_CONVERGED_HAPPY_BREAKDOWN) then
-       adjointFailed = .False.
-    else
-       adjointFailed = .True.
-    end if
-
-    ! Rest arrays
-    call VecResetArray(psi_like1, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call VecResetArray(psi_like2, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call MatView(dRdwT, binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-#endif
+        call PetscViewerDestroy(binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine solveDirectForRHS
+    end subroutine saveADjointMatrix
 
-  subroutine saveADjointMatrix(fileName)
+    subroutine saveAdjointPC(fileName)
 
-    use ADjointPETSc, only: drdwt
-    use communication, only : adflow_comm_world
-    use utils, only : EChk
+        use ADjointPETSc, only: drdwpret
+        use communication, only: adflow_comm_world
+        use utils, only: EChk
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
+        use petsc
+        implicit none
 
-    ! Input params
-    character(len=*), intent(in) :: fileName
+        ! Input params
+        character(len=*), intent(in) :: fileName
 
-    ! Working parameters
-    PetscViewer binViewer
-    integer(kind=intType) :: ierr
+        ! Working parameters
+        PetscViewer binViewer
+        integer(kind=intType) :: ierr
 
-    call PetscViewerBinaryOpen(adflow_comm_world, fileName, FILE_MODE_WRITE, binViewer, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call PetscViewerBinaryOpen(adflow_comm_world, fileName, FILE_MODE_WRITE, binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call MatView(dRdwT, binViewer, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call MatView(dRdwPreT, binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call PetscViewerDestroy(binViewer,ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call PetscViewerDestroy(binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine saveADjointMatrix
+    end subroutine saveAdjointPC
 
-  subroutine saveAdjointPC(fileName)
+    subroutine saveAdjointRHS(RHS, fileName, nstate)
 
-    use ADjointPETSc, only: drdwpret
-    use communication, only : adflow_comm_world
-    use utils, only : EChk
+        use ADjointPETSc, only: psi_like1
+        use communication, only: adflow_comm_world
+        use utils, only: EChk
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
+        use petsc
+        implicit none
+
+        ! Input params
+        character(len=*), intent(in) :: fileName
+        integer(kind=intType) :: nstate
+        real(kind=realType), dimension(nState) :: RHS
+
+        ! Working parameters
+        PetscViewer binViewer
+        integer(kind=intType) :: ierr
+
+        ! Dump RHS into psi_like1
+        call VecPlaceArray(psi_like1, RHS, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call PetscViewerBinaryOpen(adflow_comm_world, fileName, FILE_MODE_WRITE, binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecView(psi_like1, binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call PetscViewerDestroy(binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecResetArray(psi_like1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+    end subroutine saveAdjointRHS
+
+    subroutine spectralPrecscribedMotion(input, nin, dXv, nout)
+
+        use blockPointers, only: il, jl, kl, nDom
+        use section, only: sections, nSections
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use monitor, only: timeUnsteadyRestart, timeUnsteady
+        use utils, only: setPointers, rotMatrixRigidBody
+        implicit none
+        ! Input/Output Variables
+        integer(kind=intType), intent(in) :: nin, nout
+        real(kind=realType), intent(out) :: dXv(nout)
+        real(kind=realType), intent(in) :: input(nin)
+
+        ! Local Variables
+        integer(kind=intType) :: ierr, sps, i, nn, mm, counter0, counter1
+        integer(kind=intType) :: nodes_on_block, cum_nodes_on_block
+        real(kind=realType), dimension(3) :: rotationPoint, r
+        real(kind=realType), dimension(3, 3) :: rotationMatrix
+        real(kind=realType) :: t(nSections), dt(nSections)
+        real(kind=realType) :: tOld, tNew, pt(3)
+        real(kind=realType), pointer :: xvec_pointer(:)
+        real(kind=realType) :: time(3)
+
+        !       For the TimeSpectral case, we need to include    *
+        !      the operation that rotates the base grid to each time instance
+        !      This is basically the reverse of the operation that is done in
+        !      setGrid.f90
+        !      The operation in setGrid.f90 is the following
+        !      X_sps = M(X - rotPoint) + rotPoint
+        !      where
+        !      X_sps is the set of coordinates at each time instance
+        !      M is the rotation matrix calculated by rotMatrixRigidBody
+        !      rotPoint is the point about which the motion takes place
+        !      It is easy to see dX_sps/dX = M
+        !      What we are actually computing is the following:
+        !                 T          T
+        !        /dX_sps \ /   dR   \
+        !        |-------| |------- |  psi
+        !        \  dX   / \ dX_sps /
+
+        ! Zero dXv for time spectral case since we add to array.
+        dXv = zero
+
+        do nn = 1, nSections
+            dt(nn) = sections(nn)%timePeriod &
+                     / real(nTimeIntervalsSpectral, realType)
+        end do
+
+        timeUnsteady = zero
+        counter0 = 0
+        cum_nodes_on_block = 0
+        ! The nDom loop followed by the sps loop is required to follow
+        ! the globalNode ordering such that we can use the pointer from
+        ! vecGetArrayF90
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+
+                call setPointers(nn, 1, sps)
+                nodes_on_block = il * jl * kl
+
+                do mm = 1, nSections
+                    t(mm) = (sps - 1) * dt(mm)
+                end do
 
-    ! Input params
-    character(len=*), intent(in) :: fileName
+                ! Compute the displacements due to the rigid motion of the mesh.
 
-    ! Working parameters
-    PetscViewer binViewer
-    integer(kind=intType) :: ierr
+                tNew = timeUnsteady + timeUnsteadyRestart
+                tOld = tNew - t(1)
 
-    call PetscViewerBinaryOpen(adflow_comm_world, fileName, FILE_MODE_WRITE, binViewer, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+                call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
 
-    call MatView(dRdwPreT, binViewer, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+                ! Take rotation Matrix Transpose
+                rotationMatrix = transpose(rotationMatrix)
 
-    call PetscViewerDestroy(binViewer,ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+                counter1 = cum_nodes_on_block
 
-  end subroutine saveAdjointPC
+                ! Loop over the localally owned nodes:
+                do i = 1, nodes_on_block
+                    pt = (/input(3 * counter0 + 1), &
+                           input(3 * counter0 + 2), &
+                           input(3 * counter0 + 3)/)
 
-  subroutine saveAdjointRHS(RHS, fileName, nstate)
+                    dXv(3 * counter1 + 1:3 * counter1 + 3) = &
+                        dXv(3 * counter1 + 1:3 * counter1 + 3) + &
+                        matmul(rotationMatrix, pt)
 
-    use ADjointPETSc, only: psi_like1
-    use communication, only : adflow_comm_world
-    use utils, only : EChk
-#include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! Input params
-    character(len=*), intent(in) :: fileName
-    integer(kind=intType) :: nstate
-    real(kind=realType), dimension(nState) :: RHS
-
-    ! Working parameters
-    PetscViewer binViewer
-    integer(kind=intType) :: ierr
-
-    ! Dump RHS into psi_like1
-    call VecPlaceArray(psi_like1, RHS, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call PetscViewerBinaryOpen(adflow_comm_world, fileName, FILE_MODE_WRITE, binViewer, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call VecView(psi_like1, binViewer, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call PetscViewerDestroy(binViewer,ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call VecResetArray(psi_like1, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-  end subroutine saveAdjointRHS
-
-
-  subroutine spectralPrecscribedMotion(input, nin, dXv, nout)
-
-    use blockPointers, only : il, jl, kl, nDom
-    use section, only : sections, nSections
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use monitor , only : timeUnsteadyRestart, timeUnsteady
-    use utils, only : setPointers, rotMatrixRigidBody
-    implicit none
-    ! Input/Output Variables
-    integer(kind=intType), intent(in) :: nin, nout
-    real(kind=realType), intent(out)  :: dXv(nout)
-    real(kind=realType), intent(in)   :: input(nin)
-
-    ! Local Variables
-    integer(kind=intType) :: ierr, sps, i, nn, mm, counter0, counter1
-    integer(kind=intType) :: nodes_on_block, cum_nodes_on_block
-    real(kind=realType), dimension(3)   :: rotationPoint, r
-    real(kind=realType), dimension(3, 3) :: rotationMatrix
-    real(kind=realType) :: t(nSections), dt(nSections)
-    real(kind=realType) :: tOld, tNew, pt(3)
-    real(kind=realType), pointer :: xvec_pointer(:)
-    real(kind=realType) :: time(3)
-
-    !       For the TimeSpectral case, we need to include    *
-    !      the operation that rotates the base grid to each time instance
-    !      This is basically the reverse of the operation that is done in
-    !      setGrid.f90
-    !      The operation in setGrid.f90 is the following
-    !      X_sps = M(X - rotPoint) + rotPoint
-    !      where
-    !      X_sps is the set of coordinates at each time instance
-    !      M is the rotation matrix calculated by rotMatrixRigidBody
-    !      rotPoint is the point about which the motion takes place
-    !      It is easy to see dX_sps/dX = M
-    !      What we are actually computing is the following:
-    !                 T          T
-    !        /dX_sps \ /   dR   \
-    !        |-------| |------- |  psi
-    !        \  dX   / \ dX_sps /
-
-    ! Zero dXv for time spectral case since we add to array.
-    dXv = zero
-
-    do nn=1, nSections
-       dt(nn) = sections(nn)%timePeriod &
-            / real(nTimeIntervalsSpectral, realType)
-    enddo
-
-    timeUnsteady = zero
-    counter0 = 0
-    cum_nodes_on_block = 0
-    ! The nDom loop followed by the sps loop is required to follow
-    ! the globalNode ordering such that we can use the pointer from
-    ! vecGetArrayF90
-
-    do nn=1, nDom
-       do sps = 1, nTimeIntervalsSpectral
-
-          call setPointers(nn, 1, sps)
-          nodes_on_block = il*jl*kl
-
-          do mm=1, nSections
-             t(mm) = (sps-1)*dt(mm)
-          enddo
-
-          ! Compute the displacements due to the rigid motion of the mesh.
-
-          tNew = timeUnsteady + timeUnsteadyRestart
-          tOld = tNew - t(1)
-
-          call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
-
-          ! Take rotation Matrix Transpose
-          rotationMatrix = transpose(rotationMatrix)
-
-          counter1 = cum_nodes_on_block
-
-          ! Loop over the localally owned nodes:
-          do i=1, nodes_on_block
-             pt = (/input(3*counter0+1), &
-                  input(3*counter0+2), &
-                  input(3*counter0+3)/)
-
-             dXv(3*counter1+1:3*counter1+3) = &
-                  dXv(3*counter1+1:3*counter1+3) + &
-                  matmul(rotationMatrix, pt)
-
-             counter0 = counter0 + 1
-             counter1 = counter1 + 1
-          end do
-
-       end do
-       ! Increment the cumulative number of nodes by the nodes on the
-       ! block we just did
-       cum_nodes_on_block = cum_nodes_on_block + nodes_on_block
-    end do
-
-  end subroutine spectralPrecscribedMotion
-
-  subroutine setupAllResidualMatricesfwd
-
-    use ADjointPETSc, only : dRdwT
-    use communication, only : adflow_comm_world, myid
-    use inputADjoint, only : frozenTurbulence, useMatrixFreedRdw
-    use adjointUtils, only : setupStateResidualMatrix
-    use utils, only : EChk
-    implicit none
-
-    logical :: useAD, useTranspose, usePC, useObjective
-    real(kind=realType) :: timeAdjLocal, timeAdj, time(2)
-    integer(kind=intType) :: ierr
-
-    ! If we are assembling matrices...we ned to assemble the
-    ! 'transpose', with 'AD', we want the exact matrix not the 'PC',
-    ! and will compute objective RHS
-    useAD = .True.
-    usePC = .False.
-    useTranspose = .True.
-    useObjective = .True.
-
-    if (.not. useMatrixFreedRdw) then
-       if( myid ==0 ) then
-          write(*, "(A)") "Assembling State Residual Matrix in Forward mode..."
-       end if
-       time(1) = mpi_wtime()
-       call setupStateResidualMatrix(drdwT, useAD, usePC, useTranspose, &
-            useObjective, frozenTurbulence, 1_intType)
-       time(2) = mpi_wtime()
-       timeAdjLocal = time(2)-time(1)
-
-       call mpi_reduce(timeAdjLocal, timeAdj, 1, adflow_real, &
-            mpi_max, 0, ADFLOW_COMM_WORLD, ierr)
-       call EChk(ierr,  __FILE__, __LINE__)
-
-       if(myid ==0)  then
-          write(*, timeFormat) "Assembling State Residaul Matrices Fwd time (s) = ", timeAdj
-       end if
-    end if
-
-  end subroutine setupAllResidualMatricesfwd
-
-  subroutine solveAdjoint(RHS, psi, checkSolution, nState)
-    !
-    !      Solve the linear discrete ADjoint system of equations
-    !          [dR/dW]T . psi = {RHS}
-    !      using preconditioned GMRES provided by PETSc. The values in psi
-    !      are significant as they are used as the inital guess.
-    !
-
-    use ADjointPETSc, only : dRdwT, psi_like1, psi_like2, adjointKSP, &
-         adjResInit, adjResStart, adjResFinal
-
-    use killsignals, only : adjointFailed
-    use inputADjoint, only : adjAbsTol, adjDivTol, adjMaxIter, adjRelTol, &
-         adjRelTolRel, adjMaxL2Dev, printTiming
-    use adjointVars, only: derivVarsAllocated
-    use communication, only : myid, adflow_comm_world
-    use blockPointers, only : nDom
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use adjointUtils, only : allocDerivativeValues, zeroADSeeds
-    use utils, only : EChk
+                    counter0 = counter0 + 1
+                    counter1 = counter1 + 1
+                end do
+
+            end do
+            ! Increment the cumulative number of nodes by the nodes on the
+            ! block we just did
+            cum_nodes_on_block = cum_nodes_on_block + nodes_on_block
+        end do
+
+    end subroutine spectralPrecscribedMotion
+
+    subroutine setupAllResidualMatricesfwd
+
+        use ADjointPETSc, only: dRdwT
+        use communication, only: adflow_comm_world, myid
+        use inputADjoint, only: frozenTurbulence, useMatrixFreedRdw
+        use adjointUtils, only: setupStateResidualMatrix
+        use utils, only: EChk
+        implicit none
+
+        logical :: useAD, useTranspose, usePC, useObjective
+        real(kind=realType) :: timeAdjLocal, timeAdj, time(2)
+        integer(kind=intType) :: ierr
+
+        ! If we are assembling matrices...we ned to assemble the
+        ! 'transpose', with 'AD', we want the exact matrix not the 'PC',
+        ! and will compute objective RHS
+        useAD = .True.
+        usePC = .False.
+        useTranspose = .True.
+        useObjective = .True.
+
+        if (.not. useMatrixFreedRdw) then
+            if (myid == 0) then
+                write (*, "(A)") "Assembling State Residual Matrix in Forward mode..."
+            end if
+            time(1) = mpi_wtime()
+            call setupStateResidualMatrix(drdwT, useAD, usePC, useTranspose, &
+                                          useObjective, frozenTurbulence, 1_intType)
+            time(2) = mpi_wtime()
+            timeAdjLocal = time(2) - time(1)
+
+            call mpi_reduce(timeAdjLocal, timeAdj, 1, adflow_real, &
+                            mpi_max, 0, ADFLOW_COMM_WORLD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            if (myid == 0) then
+                write (*, timeFormat) "Assembling State Residaul Matrices Fwd time (s) = ", timeAdj
+            end if
+        end if
+
+    end subroutine setupAllResidualMatricesfwd
+
+    subroutine solveAdjoint(RHS, psi, checkSolution, nState)
+        !
+        !      Solve the linear discrete ADjoint system of equations
+        !          [dR/dW]T . psi = {RHS}
+        !      using preconditioned GMRES provided by PETSc. The values in psi
+        !      are significant as they are used as the inital guess.
+        !
+
+        use ADjointPETSc, only: dRdwT, psi_like1, psi_like2, adjointKSP, &
+                                adjResInit, adjResStart, adjResFinal
+
+        use killsignals, only: adjointFailed
+        use inputADjoint, only: adjAbsTol, adjDivTol, adjMaxIter, adjRelTol, &
+                                adjRelTolRel, adjMaxL2Dev, printTiming
+        use adjointVars, only: derivVarsAllocated
+        use communication, only: myid, adflow_comm_world
+        use blockPointers, only: nDom
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use adjointUtils, only: allocDerivativeValues, zeroADSeeds
+        use utils, only: EChk
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType) :: nState
-    real(kind=realType), dimension(nState) :: RHS, psi
-    logical :: checkSolution
-    !
-    !     Local variables.
-    real(kind=alwaysRealType)   :: norm
-    real(kind=alwaysRealType), dimension(2) :: time
-    real(kind=alwaysRealType)               :: timeAdjLocal, timeAdj
-    real(kind=alwaysRealType) :: l2abs, l2rel
-    integer(kind=intType) :: ierr, nn, sps
-    integer(kind=intType) :: adjConvIts
-    KSPConvergedReason adjointConvergedReason
-    Vec adjointRes, RHSVec
+        use petsc
+        implicit none
+
+        ! Input Parameters
+        integer(kind=intType) :: nState
+        real(kind=realType), dimension(nState) :: RHS, psi
+        logical :: checkSolution
+        !
+        !     Local variables.
+        real(kind=alwaysRealType) :: norm
+        real(kind=alwaysRealType), dimension(2) :: time
+        real(kind=alwaysRealType) :: timeAdjLocal, timeAdj
+        real(kind=alwaysRealType) :: l2abs, l2rel
+        integer(kind=intType) :: ierr, nn, sps
+        integer(kind=intType) :: adjConvIts
+        KSPConvergedReason adjointConvergedReason
+        Vec adjointRes, RHSVec
 
 #ifndef USE_COMPLEX
-    ! Send some feedback to screen.
-
-    if(myid ==0 .and. printTiming)  &
-         write(*, "(A)") "Solving ADjoint Transpose with PETSc..."
+        ! Send some feedback to screen.
+
+        if (myid == 0 .and. printTiming) &
+            write (*, "(A)") "Solving ADjoint Transpose with PETSc..."
 
-    call cpu_time(time(1))
+        call cpu_time(time(1))
 
-    ! Make sure the derivative memory is allocated and zeroed.
-    if (.not. derivVarsAllocated) then
-       call allocDerivativeValues(1_intType)
-    end if
+        ! Make sure the derivative memory is allocated and zeroed.
+        if (.not. derivVarsAllocated) then
+            call allocDerivativeValues(1_intType)
+        end if
 
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call zeroADSeeds(nn, 1_intType, sps)
-       end do
-    end do
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call zeroADSeeds(nn, 1_intType, sps)
+            end do
+        end do
 
-    ! Dump psi into psi_like1 and RHS into psi_like2
-    call VecPlaceArray(psi_like1, psi, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        ! Dump psi into psi_like1 and RHS into psi_like2
+        call VecPlaceArray(psi_like1, psi, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecPlaceArray(psi_like2, RHS, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecPlaceArray(psi_like2, RHS, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecDuplicate(psi_like1, adjointRes, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-    if (checkSolution) then
-       call VecDuplicate(psi_like1, RHSVec, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call vecCopy(psi_like2, RHSVec, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    end if
-
-    ! Get the RHS norm....this is the 'init' norm:
-    call VecNorm(psi_like2, NORM_2, adjResInit, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Get Current Residual -- we always solve for the delta
-    call MatMult(dRdWT, psi_like1, adjointRes, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! AdjointRes = AdjointRes - adjointRHS
-    call VecAXPY(adjointRes, -one, psi_like2, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecDuplicate(psi_like1, adjointRes, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        if (checkSolution) then
+            call VecDuplicate(psi_like1, RHSVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call vecCopy(psi_like2, RHSVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+        ! Get the RHS norm....this is the 'init' norm:
+        call VecNorm(psi_like2, NORM_2, adjResInit, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Get Current Residual -- we always solve for the delta
+        call MatMult(dRdWT, psi_like1, adjointRes, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! AdjointRes = AdjointRes - adjointRHS
+        call VecAXPY(adjointRes, -one, psi_like2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Norm of adjoint Residual
-    call VecNorm(adjointRes, NORM_2, adjResStart,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        ! Norm of adjoint Residual
+        call VecNorm(adjointRes, NORM_2, adjResStart, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! The way we use tolerances are as follows: The residual must
-    ! statify:
-    ! res < adjRelTol * adjResInit OR
-    ! res < adjRelTolRel * adjResStart OR
-    ! res < adjAbsTol
-
-    ! L2Abs is used to stipulate an exit criteria for adjreltolrel
-    L2abs = adjResStart * adjreltolrel
-
-    ! If L2Abs is less that what we actually want as the absolute
-    ! tolerance, clip it
-    if (L2Abs < adjAbsTol) then
-       L2abs = adjabstol
-    end if
-
-    ! L2Rel is a little tricky since if the start residual is *larger*
-    ! than the init residual, it won't converge enough. While this seems
-    ! strange this is *always* the case for restarted RANS-based
-    ! adjoints.
-    L2Rel = (adjReltol * adjResInit) / adjResStart
-
-    ! We need to clip L2Rel such that it can never be greater than one.
-    L2Rel = min(L2Rel, 0.9)
-
-    ! Reset normType to default in case the globalPrecon function has been called
-    ! by the user. -1 is KSP_NORM_DEFAULT, which isn't in the fortran
-    ! header for some reason
-    call KSPSetNormType(adjointKSP, -1, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set the tolerances
-    call KSPSetTolerances(adjointKSP, L2Rel, L2Abs, adjDivTol, &
-         adjMaxIter, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Solve the update (psi_like2)
-    call KSPSolve(adjointKSP, adjointRes, psi_like2, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Now compute the update to psi_like1 (psi)
-    call VecAXPY(psi_like1, -one, psi_like2, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    if (checkSolution) then
-
-       ! Get new time and compute the elapsed time.
-       call cpu_time(time(2))
-       timeAdjLocal = time(2)-time(1)
-
-       ! Determine the maximum time using MPI reduce
-       ! with operation mpi_max.
-
-       call mpi_reduce(timeAdjLocal, timeAdj, 1, adflow_real, mpi_max, 0, ADFLOW_COMM_WORLD, ierr)
-
-       call MatMult(dRdWT, psi_like1, adjointRes, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecAXPY(adjointRes, -one, RHSVec, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecNorm(adjointRes, NORM_2, norm,ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-       adjResFinal = norm
-
-       call KSPGetIterationNumber(adjointKSP,adjConvIts,ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Use the root processor to display the output summary, such as
-       ! the norm of error and the number of iterations
-
-       if( myid ==0 .and. printTiming) then
-          write(*, timeFormat) "Solving ADjoint Transpose with PETSc time (s) =", timeAdj
-          write(*, "(1X, A, 1X, ES10.4, 4X, A, 1X, I4)") "Norm of error =",norm,"Iterations =",adjConvIts
-          write(*,*) "------------------------------------------------"
-          if( adjConvIts.lt.0 ) then
-             write(*, exitFormat) "PETSc solver diverged after", -adjConvIts, "iterations..."
-          else
-            write(*, exitFormat) "PETSc solver converged after", adjConvIts, "iterations."
-          endif
-          write(*,*) "------------------------------------------------"
-       endif
-
-       call VecDestroy(RHSVec, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    end if
-
-    ! Destroy the temporary vector and reset the arrays
-    call VecDestroy(adjointRes, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call VecResetArray(psi_like1, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call VecResetArray(psi_like2, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Get the petsc converged reason and set the fail flag
-    call KSPGetConvergedReason(adjointKSP, adjointConvergedReason,ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    if (adjointConvergedReason ==  KSP_CONVERGED_RTOL .or. &
-         adjointConvergedReason ==  KSP_CONVERGED_ATOL .or. &
-         adjointConvergedReason ==  KSP_CONVERGED_HAPPY_BREAKDOWN .or. &
-         adjResFinal / adjResStart < L2Rel * adjMaxL2Dev) then
-       adjointFailed = .False.
-    else
-       adjointFailed = .True.
-    end if
+        ! The way we use tolerances are as follows: The residual must
+        ! statify:
+        ! res < adjRelTol * adjResInit OR
+        ! res < adjRelTolRel * adjResStart OR
+        ! res < adjAbsTol
+
+        ! L2Abs is used to stipulate an exit criteria for adjreltolrel
+        L2abs = adjResStart * adjreltolrel
+
+        ! If L2Abs is less that what we actually want as the absolute
+        ! tolerance, clip it
+        if (L2Abs < adjAbsTol) then
+            L2abs = adjabstol
+        end if
+
+        ! L2Rel is a little tricky since if the start residual is *larger*
+        ! than the init residual, it won't converge enough. While this seems
+        ! strange this is *always* the case for restarted RANS-based
+        ! adjoints.
+        L2Rel = (adjReltol * adjResInit) / adjResStart
+
+        ! We need to clip L2Rel such that it can never be greater than one.
+        L2Rel = min(L2Rel, 0.9)
+
+        ! Reset normType to default in case the globalPrecon function has been called
+        ! by the user. -1 is KSP_NORM_DEFAULT, which isn't in the fortran
+        ! header for some reason
+        call KSPSetNormType(adjointKSP, -1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set the tolerances
+        call KSPSetTolerances(adjointKSP, L2Rel, L2Abs, adjDivTol, &
+                              adjMaxIter, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Solve the update (psi_like2)
+        call KSPSolve(adjointKSP, adjointRes, psi_like2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Now compute the update to psi_like1 (psi)
+        call VecAXPY(psi_like1, -one, psi_like2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (checkSolution) then
+
+            ! Get new time and compute the elapsed time.
+            call cpu_time(time(2))
+            timeAdjLocal = time(2) - time(1)
+
+            ! Determine the maximum time using MPI reduce
+            ! with operation mpi_max.
+
+            call mpi_reduce(timeAdjLocal, timeAdj, 1, adflow_real, mpi_max, 0, ADFLOW_COMM_WORLD, ierr)
+
+            call MatMult(dRdWT, psi_like1, adjointRes, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecAXPY(adjointRes, -one, RHSVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecNorm(adjointRes, NORM_2, norm, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+            adjResFinal = norm
+
+            call KSPGetIterationNumber(adjointKSP, adjConvIts, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Use the root processor to display the output summary, such as
+            ! the norm of error and the number of iterations
+
+            if (myid == 0 .and. printTiming) then
+                write (*, timeFormat) "Solving ADjoint Transpose with PETSc time (s) =", timeAdj
+                write (*, "(1X, A, 1X, ES10.4, 4X, A, 1X, I4)") "Norm of error =", norm, "Iterations =", adjConvIts
+                write (*, *) "------------------------------------------------"
+                if (adjConvIts .lt. 0) then
+                    write (*, exitFormat) "PETSc solver diverged after", -adjConvIts, "iterations..."
+                else
+                    write (*, exitFormat) "PETSc solver converged after", adjConvIts, "iterations."
+                end if
+                write (*, *) "------------------------------------------------"
+            end if
+
+            call VecDestroy(RHSVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+        ! Destroy the temporary vector and reset the arrays
+        call VecDestroy(adjointRes, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecResetArray(psi_like1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecResetArray(psi_like2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Get the petsc converged reason and set the fail flag
+        call KSPGetConvergedReason(adjointKSP, adjointConvergedReason, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (adjointConvergedReason == KSP_CONVERGED_RTOL .or. &
+            adjointConvergedReason == KSP_CONVERGED_ATOL .or. &
+            adjointConvergedReason == KSP_CONVERGED_HAPPY_BREAKDOWN .or. &
+            adjResFinal / adjResStart < L2Rel * adjMaxL2Dev) then
+            adjointFailed = .False.
+        else
+            adjointFailed = .True.
+        end if
 
 #endif
 
-  end subroutine solveAdjoint
+    end subroutine solveAdjoint
 
-  subroutine setupPETScKsp
+    subroutine setupPETScKsp
 
-    use ADjointPETSc, only: drdwpret, drdwt, adjointKSP
-    use inputADjoint
-    use utils, only : ECHk, terminate
-    use adjointUtils, only : mykspmonitor
-    use adjointUtils, only : setupStateResidualMatrix, setupStandardKSP, setupStandardMultigrid
-    use communication
-    use agmg, only : setupShellPC, destroyShellPC, applyShellPC
+        use ADjointPETSc, only: drdwpret, drdwt, adjointKSP
+        use inputADjoint
+        use utils, only: ECHk, terminate
+        use adjointUtils, only: mykspmonitor
+        use adjointUtils, only: setupStateResidualMatrix, setupStandardKSP, setupStandardMultigrid
+        use communication
+        use agmg, only: setupShellPC, destroyShellPC, applyShellPC
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    !     Local variables.
-    logical :: useAD, usePC, useTranspose, useObjective, useCoarseMats
-    integer(kind=intType) :: ierr
-    real(kind=realType) :: timeA
-    PC shellPC
-    if (ApproxPC)then
-       !setup the approximate PC Matrix
-       useAD = ADPC
-       useTranspose = .True.
-       usePC = .True.
-       useObjective = .False.
-       useCoarseMats = .False.
-       if (preCondType == 'mg') then
-          useCoarseMats = .True.
-       end if
-
-       call setupStateResidualMatrix(drdwpret, useAD, usePC, useTranspose, &
-            useObjective, frozenTurbulence, 1_intType, useCoarseMats=useCoarseMats)
-
-       call KSPSetOperators(adjointKSP, dRdwT, dRdWPreT, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-    else
-       ! Use the exact jacobian.  Here the matrix that defines the
-       ! linear system also serves as the preconditioning matrix. This
-       ! is only valid if useMatrixFree is flase.
-       if (useMatrixfreedRdw) then
-          call terminate("setupPETScKSP", "useMatrixFreedRdW option cannot be true when the approxPC option is False")
-       end if
-       call KSPSetOperators(adjointKSP, dRdWt, dRdWT, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    end if
-
-    if (PreCondType == 'asm') then
-       ! Run the super-dee-duper function to setup the ksp object:
-
-       call setupStandardKSP(adjointKSP, ADjointSolverType, adjRestart, adjointpcside, &
-            PreCondType, overlap, outerPreConIts, localPCType, &
-            matrixOrdering, FillLevel, innerPreConIts)
-    else if (PreCondType == 'mg') then
-
-       call setupStandardMultigrid(adjointKSP, ADjointSolverType, adjRestart, &
-            adjointPCSide, overlap, outerPreconIts, matrixOrdering,  fillLevel)
-    end if
-
-    ! Setup monitor if necessary:
-    if (setMonitor) then
-       ! PETSC_NULL_CONTEXT doesn't exit...
-       call KSPMonitorSet(adjointKSP, MyKSPMonitor, PETSC_NULL_FUNCTION, &
-            PETSC_NULL_FUNCTION, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    endif
-
-  end subroutine setupPETScKsp
-
-  subroutine saveCellCenters(fileName)
-
-    use blockPointers
-    use iteration
-    use inputTimeSpectral
-    use adjointVars, only: nCellsLocal
-    use communication
-    use utils, only : setPointers, EChk
+        use petsc
+        implicit none
+
+        !     Local variables.
+        logical :: useAD, usePC, useTranspose, useObjective, useCoarseMats
+        integer(kind=intType) :: ierr
+        real(kind=realType) :: timeA
+        PC shellPC
+        if (ApproxPC) then
+            !setup the approximate PC Matrix
+            useAD = ADPC
+            useTranspose = .True.
+            usePC = .True.
+            useObjective = .False.
+            useCoarseMats = .False.
+            if (preCondType == 'mg') then
+                useCoarseMats = .True.
+            end if
+
+            call setupStateResidualMatrix(drdwpret, useAD, usePC, useTranspose, &
+                                          useObjective, frozenTurbulence, 1_intType, useCoarseMats=useCoarseMats)
+
+            call KSPSetOperators(adjointKSP, dRdwT, dRdWPreT, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+        else
+            ! Use the exact jacobian.  Here the matrix that defines the
+            ! linear system also serves as the preconditioning matrix. This
+            ! is only valid if useMatrixFree is flase.
+            if (useMatrixfreedRdw) then
+            call terminate("setupPETScKSP", "useMatrixFreedRdW option cannot be true when the approxPC option is False")
+            end if
+            call KSPSetOperators(adjointKSP, dRdWt, dRdWT, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+        if (PreCondType == 'asm') then
+            ! Run the super-dee-duper function to setup the ksp object:
+
+            call setupStandardKSP(adjointKSP, ADjointSolverType, adjRestart, adjointpcside, &
+                                  PreCondType, overlap, outerPreConIts, localPCType, &
+                                  matrixOrdering, FillLevel, innerPreConIts)
+        else if (PreCondType == 'mg') then
+
+            call setupStandardMultigrid(adjointKSP, ADjointSolverType, adjRestart, &
+                                        adjointPCSide, overlap, outerPreconIts, matrixOrdering, fillLevel)
+        end if
+
+        ! Setup monitor if necessary:
+        if (setMonitor) then
+            ! PETSC_NULL_CONTEXT doesn't exit...
+            call KSPMonitorSet(adjointKSP, MyKSPMonitor, PETSC_NULL_FUNCTION, &
+                               PETSC_NULL_FUNCTION, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+    end subroutine setupPETScKsp
+
+    subroutine saveCellCenters(fileName)
+
+        use blockPointers
+        use iteration
+        use inputTimeSpectral
+        use adjointVars, only: nCellsLocal
+        use communication
+        use utils, only: setPointers, EChk
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! Input params
-    character(len=*), intent(in) :: fileName
-
-    ! Working parameters
-    PetscViewer binViewer
-    integer(kind=intType) :: nn,sps,i,j,k,n
-    integer(kind=intType) :: ierr, iRow, level
-    real(kind=realType),dimension(3)::cellCenter
-
-    Vec cellCenters
-
-    call VecCreateMPI(ADFLOW_COMM_WORLD, nCellsLocal(1)*3, &
-         PETSC_DETERMINE, cellCenters, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    call VecSetBlockSize(cellCenters, 3, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! compute and store the cell centers
-    level = 1
-    domainLoop: do nn=1, nDom
-       spectralLoop: do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   iRow = flowDoms(nn, level, sps)%globalCell(i, j, k)
-                   ! The location of the cell center is determined
-                   ! by averaging the cell coordinates.
-                   do n=1,3
-                      cellCenter(n) = (x(i-1,j-1,k-1,n) + x(i,j-1,k-1,n)  &
-                           +  x(i-1,j,  k-1,n) + x(i,j,  k-1,n)  &
-                           +  x(i-1,j-1,k,  n) + x(i,j-1,k,  n)  &
-                           +  x(i-1,j,  k,  n) + x(i,j,  k,  n))/8
-                   end do
-                   call VecSetValues(cellCenters, 1, [iRow], [cellCenter], INSERT_VALUES, ierr)
-                   call EChk(ierr, __FILE__, __LINE__)
+        use petsc
+        implicit none
+
+        ! Input params
+        character(len=*), intent(in) :: fileName
+
+        ! Working parameters
+        PetscViewer binViewer
+        integer(kind=intType) :: nn, sps, i, j, k, n
+        integer(kind=intType) :: ierr, iRow, level
+        real(kind=realType), dimension(3) :: cellCenter
+
+        Vec cellCenters
+
+        call VecCreateMPI(ADFLOW_COMM_WORLD, nCellsLocal(1) * 3, &
+                          PETSC_DETERMINE, cellCenters, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        call VecSetBlockSize(cellCenters, 3, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! compute and store the cell centers
+        level = 1
+        domainLoop: do nn = 1, nDom
+            spectralLoop: do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            iRow = flowDoms(nn, level, sps)%globalCell(i, j, k)
+                            ! The location of the cell center is determined
+                            ! by averaging the cell coordinates.
+                            do n = 1, 3
+                                cellCenter(n) = (x(i - 1, j - 1, k - 1, n) + x(i, j - 1, k - 1, n) &
+                                                 + x(i - 1, j, k - 1, n) + x(i, j, k - 1, n) &
+                                                 + x(i - 1, j - 1, k, n) + x(i, j - 1, k, n) &
+                                                 + x(i - 1, j, k, n) + x(i, j, k, n)) / 8
+                            end do
+                            call VecSetValues(cellCenters, 1, [iRow], [cellCenter], INSERT_VALUES, ierr)
+                            call EChk(ierr, __FILE__, __LINE__)
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do spectralLoop
-    end do domainLoop
+            end do spectralLoop
+        end do domainLoop
 
-    ! PETSc Matrix Assembly begin
-    call VecAssemblyBegin(cellCenters, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! PETSc Matrix Assembly begin
+        call VecAssemblyBegin(cellCenters, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecAssemblyEnd  (cellCenters, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecAssemblyEnd(cellCenters, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call PetscViewerBinaryOpen(adflow_comm_world, fileName, FILE_MODE_WRITE, binViewer, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call PetscViewerBinaryOpen(adflow_comm_world, fileName, FILE_MODE_WRITE, binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecView(cellCenters, binViewer, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecView(cellCenters, binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call PetscViewerDestroy(binViewer,ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call PetscViewerDestroy(binViewer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine saveCellCenters
+    end subroutine saveCellCenters
 
- subroutine dRdwTMatMult(A, vecX,  vecY, ierr)
+    subroutine dRdwTMatMult(A, vecX, vecY, ierr)
 
-    ! PETSc user-defied call back function for computing the product of
-    ! dRdwT with a vector. Here we just call the much more broadly
-    ! useful routine computeMatrixFreeProductBwdFast()
+        ! PETSc user-defied call back function for computing the product of
+        ! dRdwT with a vector. Here we just call the much more broadly
+        ! useful routine computeMatrixFreeProductBwdFast()
 
-    use communication
-    use blockPointers
-    use iteration
-    use flowVarRefState
-    use inputAdjoint
-    use ADjointVars
-    use inputTimeSpectral
-    use utils, only : EChk
+        use communication
+        use blockPointers
+        use iteration
+        use flowVarRefState
+        use inputAdjoint
+        use ADjointVars
+        use inputTimeSpectral
+        use utils, only: EChk
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
+        use petsc
+        implicit none
 
-    ! PETSc Arguments
-    Mat   A
-    Vec   vecX, vecY
-    integer(kind=intType) ::ierr
+        ! PETSc Arguments
+        Mat A
+        Vec vecX, vecY
+        integer(kind=intType) :: ierr
 
-    real(kind=realType), pointer :: dwb_pointer(:)
-    real(kind=realType), pointer :: wb_pointer(:)
+        real(kind=realType), pointer :: dwb_pointer(:)
+        real(kind=realType), pointer :: wb_pointer(:)
 
 #ifndef USE_COMPLEX
-    call VecGetArrayReadF90(vecX, dwb_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecGetArrayReadF90(vecX, dwb_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecGetArrayF90(VecY, wb_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecGetArrayF90(VecY, wb_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call computeMatrixFreeProductBwdFast(dwb_pointer, wb_pointer, size(wb_pointer))
+        call computeMatrixFreeProductBwdFast(dwb_pointer, wb_pointer, size(wb_pointer))
 
-    call VecRestoreArrayF90(vecX, dwb_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecRestoreArrayF90(vecX, dwb_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ierr = 0
+        ierr = 0
 #endif
 
-  end subroutine dRdwTMatMult
-
-  subroutine dRdwMatMult(A, vecX,  vecY, ierr)
-
-    ! PETSc user-defied call back function for computing the product of
-    ! dRdw with a vector. Here we just call the much more broadly
-    ! useful routine computeMatrixFreeProductFwd()
-
-    use communication
-    use blockPointers
-    use iteration
-    use flowVarRefState
-    use inputAdjoint
-    use ADjointVars
-    use inputTimeSpectral
-    use surfaceFamilies, only : fullFamList, BCFamGroups
-    use utils, only : EChk
-    use surfaceUtils, only : getSurfaceSize
-    use adjointUtils, only : allocDerivativeValues, zeroADSeeds
+    end subroutine dRdwTMatMult
+
+    subroutine dRdwMatMult(A, vecX, vecY, ierr)
+
+        ! PETSc user-defied call back function for computing the product of
+        ! dRdw with a vector. Here we just call the much more broadly
+        ! useful routine computeMatrixFreeProductFwd()
+
+        use communication
+        use blockPointers
+        use iteration
+        use flowVarRefState
+        use inputAdjoint
+        use ADjointVars
+        use inputTimeSpectral
+        use surfaceFamilies, only: fullFamList, BCFamGroups
+        use utils, only: EChk
+        use surfaceUtils, only: getSurfaceSize
+        use adjointUtils, only: allocDerivativeValues, zeroADSeeds
 #ifndef USE_COMPLEX
-    use masterRoutines, only : master_d
+        use masterRoutines, only: master_d
 #endif
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! PETSc Arguments
-    Mat   A
-    Vec   vecX, vecY
-    integer(kind=intType) ::ierr
-
-    real(kind=realType), pointer :: wd_pointer(:)
-    real(kind=realType), pointer :: dwd_pointer(:)
-    integer(kind=intType) :: stateSize, costSize, fSize, fSIzeCell, spatialSize
-    real(kind=realType), dimension(:), allocatable :: Xvdot
-    real(kind=realType), dimension(:, :, :), allocatable :: fDot
-    integer(kind=intType) :: nn, sps
-    integer(kind=intType), dimension(:), pointer :: walLFamList
+        use petsc
+        implicit none
+
+        ! PETSc Arguments
+        Mat A
+        Vec vecX, vecY
+        integer(kind=intType) :: ierr
+
+        real(kind=realType), pointer :: wd_pointer(:)
+        real(kind=realType), pointer :: dwd_pointer(:)
+        integer(kind=intType) :: stateSize, costSize, fSize, fSIzeCell, spatialSize
+        real(kind=realType), dimension(:), allocatable :: Xvdot
+        real(kind=realType), dimension(:, :, :), allocatable :: fDot
+        integer(kind=intType) :: nn, sps
+        integer(kind=intType), dimension(:), pointer :: walLFamList
 
 #ifndef USE_COMPLEX
 
-    call VecGetArrayReadF90(vecX, wd_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecGetArrayReadF90(vecX, wd_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecGetArrayF90(VecY, dwd_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecGetArrayF90(VecY, dwd_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    if (.not. derivVarsAllocated) then
-       call allocDerivativeValues(1)
-    end if
+        if (.not. derivVarsAllocated) then
+            call allocDerivativeValues(1)
+        end if
 
-    ! Zero all AD seesd.
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call zeroADSeeds(nn, 1, sps)
-       end do
-    end do
+        ! Zero all AD seesd.
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call zeroADSeeds(nn, 1, sps)
+            end do
+        end do
 
-    wallFamList => BCFamGroups(iBCGroupWalls)%famList
-    call getSurfaceSize(fSize, fSizeCell, wallFamList, size(wallFamList), .True.)
-    spatialSize =  3 * nNodesLocal(1_intType)*nTimeIntervalsSpectral
+        wallFamList => BCFamGroups(iBCGroupWalls)%famList
+        call getSurfaceSize(fSize, fSizeCell, wallFamList, size(wallFamList), .True.)
+        spatialSize = 3 * nNodesLocal(1_intType) * nTimeIntervalsSpectral
 
-    allocate(xvdot(spatialSize))
-    allocate(fdot(3, fSize, nTimeIntervalsSpectral))
+        allocate (xvdot(spatialSize))
+        allocate (fdot(3, fSize, nTimeIntervalsSpectral))
 
-    xvdot = zero
-    fdot = zero
+        xvdot = zero
+        fdot = zero
 
-    call master_d(wd_pointer, xvDot, fDot, dwd_pointer)
+        call master_d(wd_pointer, xvDot, fDot, dwd_pointer)
 
-    deallocate(xvDot, Fdot)
+        deallocate (xvDot, Fdot)
 
-    call VecRestoreArrayReadF90(vecX, wd_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecRestoreArrayReadF90(vecX, wd_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecRestoreArrayF90(VecY, dwd_pointer, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecRestoreArrayF90(VecY, dwd_pointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ierr = 0
+        ierr = 0
 #endif
-  end subroutine dRdwMatMult
-
-  subroutine createPETScVars
-    !
-    !      Create the matrices/vectors that are required for the adjoint
-    !
-    use ADjointPETSc, only: dRdwT, dRdwPreT, &
-         adjointKSP, matfreectx, x_like, psi_like1, adjointPETScVarsAllocated
-    use ADjointVars
-    use communication, only : adflow_comm_world, myid
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowVarRefState, only : nwf, nw, viscous
-    use inputADjoint, only : approxPC, frozenTurbulence, useMatrixFreedRdw, viscPC
-    use stencils, only : N_visc_drdw, n_euler_drdw, visc_drdw_stencil,  euler_drdw_stencil, &
-         visc_drdw_stencil, visc_pc_stencil, N_visc_PC, N_euler_PC, euler_PC_stencil
-    use utils, only : EChk, setPointers
-    use adjointUtils, only : myMatCreate, destroyPETScVars, statePreAllocation
-    use agmg, only : setupAGMG
+    end subroutine dRdwMatMult
+
+    subroutine createPETScVars
+        !
+        !      Create the matrices/vectors that are required for the adjoint
+        !
+        use ADjointPETSc, only: dRdwT, dRdwPreT, &
+                                adjointKSP, matfreectx, x_like, psi_like1, adjointPETScVarsAllocated
+        use ADjointVars
+        use communication, only: adflow_comm_world, myid
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowVarRefState, only: nwf, nw, viscous
+        use inputADjoint, only: approxPC, frozenTurbulence, useMatrixFreedRdw, viscPC
+        use stencils, only: N_visc_drdw, n_euler_drdw, visc_drdw_stencil, euler_drdw_stencil, &
+                            visc_drdw_stencil, visc_pc_stencil, N_visc_PC, N_euler_PC, euler_PC_stencil
+        use utils, only: EChk, setPointers
+        use adjointUtils, only: myMatCreate, destroyPETScVars, statePreAllocation
+        use agmg, only: setupAGMG
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    !     Local variables.
-    integer(kind=intType)  :: nDimW, nDimX
-    integer(kind=intType) :: i, n_stencil, nState
-    integer(kind=intType), dimension(:), allocatable :: nnzDiagonal, nnzOffDiag
-    integer(kind=intType), dimension(:), allocatable :: nnzDiagonal2, nnzOffDiag2
-    integer(kind=intType), dimension(:, :), pointer :: stencil
-    integer(kind=intType) :: level, ierr, nlevels
-    integer(kind=intType) :: rows(4), iCol, nn, sps, ii
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iDim, iStride, j, mm
-    integer(kind=intType) :: npts, ncells, nTS
-
-    ! Destroy variables if they already exist
-    call destroyPETScVars()
-
-    ! DETERMINE ALL SIZES HERE!
-    if ( frozenTurbulence ) then
-       nState = nwf
-    else
-       nState = nw
-    endif
-
-    nDimW = nState * nCellsLocal(1_intType)*nTimeIntervalsSpectral
-    nDimX = 3 * nNodesLocal(1_intType)*nTimeIntervalsSpectral
-
-
-    if (.not. useMatrixFreedRdw) then
-       ! Setup matrix-based dRdwT
-       allocate(nnzDiagonal(nCellsLocal(1_intType)*nTimeIntervalsSpectral), &
-            nnzOffDiag(nCellsLocal(1_intType)*nTimeIntervalsSpectral) )
-
-       if (viscous) then
-          n_stencil = N_visc_drdw
-          stencil => visc_drdw_stencil
-       else
-          n_stencil = N_euler_drdw
-          stencil => euler_drdw_stencil
-       end if
-
-       level = 1
-
-       call statePreAllocation(nnzDiagonal, nnzOffDiag, nDimW/nState, stencil, n_stencil, &
-            level, .true.)
-       call myMatCreate(dRdwT, nState, nDimW, nDimW, nnzDiagonal, nnzOffDiag, &
-            __FILE__, __LINE__)
-
-       call matSetOption(dRdwT, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       deallocate(nnzDiagonal, nnzOffDiag)
-    else
-       ! Setup matrix-free dRdwT
-       call MatCreateShell(ADFLOW_COMM_WORLD, nDimW, nDimW, PETSC_DETERMINE, &
-            PETSC_DETERMINE, matfreectx, dRdwT, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Set the shell operation for doing matrix vector multiplies
-       call MatShellSetOperation(dRdwT, MATOP_MULT, dRdwTMatMult, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Set the shell operation for doing TRNASPOSE matrix vector
-       ! multiplies
-       call MatShellSetOperation(dRdwT, MATOP_MULT_TRANSPOSE, dRdwMatMult, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       call MatSetup(dRdwT, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    end if
-
-    ! Create the approxPC if required
-    if (ApproxPC) then
-       ! ------------------- Determine Preallocation for dRdwPre -------------
-       allocate(nnzDiagonal(nCellsLocal(1_intType)*nTimeIntervalsSpectral), &
-            nnzOffDiag(nCellsLocal(1_intType)*nTimeIntervalsSpectral) )
-
-       if (viscous .and. viscPC) then
-          stencil => visc_pc_stencil
-          n_stencil = N_visc_pc
-       else
-          stencil => euler_pc_stencil
-          n_stencil = N_euler_pc
-       end if
-
-       level = 1
-       call statePreAllocation(nnzDiagonal, nnzOffDiag, nDimW/nState, stencil, n_stencil, &
-            level, .true.)
-       call myMatCreate(dRdwPreT, nState, nDimW, nDimW, nnzDiagonal, nnzOffDiag, &
-            __FILE__, __LINE__)
-
-       call matSetOption(dRdwPreT, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       deallocate(nnzDiagonal, nnzOffDiag)
-    end if
-
-
-    call setupAGMG(drdwpret, nDimW/nState, nState)
-
-    ! Create the KSP Object
-    call KSPCreate(ADFLOW_COMM_WORLD, adjointKSP, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    adjointPETScVarsAllocated = .True.
-  end subroutine createPETScVars
+        use petsc
+        implicit none
+
+        !     Local variables.
+        integer(kind=intType) :: nDimW, nDimX
+        integer(kind=intType) :: i, n_stencil, nState
+        integer(kind=intType), dimension(:), allocatable :: nnzDiagonal, nnzOffDiag
+        integer(kind=intType), dimension(:), allocatable :: nnzDiagonal2, nnzOffDiag2
+        integer(kind=intType), dimension(:, :), pointer :: stencil
+        integer(kind=intType) :: level, ierr, nlevels
+        integer(kind=intType) :: rows(4), iCol, nn, sps, ii
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iDim, iStride, j, mm
+        integer(kind=intType) :: npts, ncells, nTS
+
+        ! Destroy variables if they already exist
+        call destroyPETScVars()
+
+        ! DETERMINE ALL SIZES HERE!
+        if (frozenTurbulence) then
+            nState = nwf
+        else
+            nState = nw
+        end if
+
+        nDimW = nState * nCellsLocal(1_intType) * nTimeIntervalsSpectral
+        nDimX = 3 * nNodesLocal(1_intType) * nTimeIntervalsSpectral
+
+        if (.not. useMatrixFreedRdw) then
+            ! Setup matrix-based dRdwT
+            allocate (nnzDiagonal(nCellsLocal(1_intType) * nTimeIntervalsSpectral), &
+                      nnzOffDiag(nCellsLocal(1_intType) * nTimeIntervalsSpectral))
+
+            if (viscous) then
+                n_stencil = N_visc_drdw
+                stencil => visc_drdw_stencil
+            else
+                n_stencil = N_euler_drdw
+                stencil => euler_drdw_stencil
+            end if
+
+            level = 1
+
+            call statePreAllocation(nnzDiagonal, nnzOffDiag, nDimW / nState, stencil, n_stencil, &
+                                    level, .true.)
+            call myMatCreate(dRdwT, nState, nDimW, nDimW, nnzDiagonal, nnzOffDiag, &
+                             __FILE__, __LINE__)
+
+            call matSetOption(dRdwT, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            deallocate (nnzDiagonal, nnzOffDiag)
+        else
+            ! Setup matrix-free dRdwT
+            call MatCreateShell(ADFLOW_COMM_WORLD, nDimW, nDimW, PETSC_DETERMINE, &
+                                PETSC_DETERMINE, matfreectx, dRdwT, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Set the shell operation for doing matrix vector multiplies
+            call MatShellSetOperation(dRdwT, MATOP_MULT, dRdwTMatMult, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Set the shell operation for doing TRNASPOSE matrix vector
+            ! multiplies
+            call MatShellSetOperation(dRdwT, MATOP_MULT_TRANSPOSE, dRdwMatMult, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call MatSetup(dRdwT, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+        ! Create the approxPC if required
+        if (ApproxPC) then
+            ! ------------------- Determine Preallocation for dRdwPre -------------
+            allocate (nnzDiagonal(nCellsLocal(1_intType) * nTimeIntervalsSpectral), &
+                      nnzOffDiag(nCellsLocal(1_intType) * nTimeIntervalsSpectral))
+
+            if (viscous .and. viscPC) then
+                stencil => visc_pc_stencil
+                n_stencil = N_visc_pc
+            else
+                stencil => euler_pc_stencil
+                n_stencil = N_euler_pc
+            end if
+
+            level = 1
+            call statePreAllocation(nnzDiagonal, nnzOffDiag, nDimW / nState, stencil, n_stencil, &
+                                    level, .true.)
+            call myMatCreate(dRdwPreT, nState, nDimW, nDimW, nnzDiagonal, nnzOffDiag, &
+                             __FILE__, __LINE__)
+
+            call matSetOption(dRdwPreT, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            deallocate (nnzDiagonal, nnzOffDiag)
+        end if
+
+        call setupAGMG(drdwpret, nDimW / nState, nState)
+
+        ! Create the KSP Object
+        call KSPCreate(ADFLOW_COMM_WORLD, adjointKSP, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        adjointPETScVarsAllocated = .True.
+    end subroutine createPETScVars
 
 end module adjointAPI
diff --git a/src/adjoint/adjointDebug.F90 b/src/adjoint/adjointDebug.F90
index 8dea83c29..90aca5b51 100644
--- a/src/adjoint/adjointDebug.F90
+++ b/src/adjoint/adjointDebug.F90
@@ -6,30 +6,30 @@ module adjointDebug
 
 #ifndef USE_COMPLEX
 
-    subroutine computeMatrixFreeProductFwdFD(xvdot, extradot, wdot, bcDataValuesdot,&
-        useSpatial, useState, famLists,&
-        bcDataNames, bcDataValues, bcDataFamLists, bcVarsEmpty,&
-        dwdot, funcsDot, fDot, &
-        costSize, fSize, nTime, h)
+    subroutine computeMatrixFreeProductFwdFD(xvdot, extradot, wdot, bcDataValuesdot, &
+                                             useSpatial, useState, famLists, &
+                                             bcDataNames, bcDataValues, bcDataFamLists, bcVarsEmpty, &
+                                             dwdot, funcsDot, fDot, &
+                                             costSize, fSize, nTime, h)
 
         ! This routine is used to debug master_d. It uses the forward seeds to set perturbations
         ! and then computes the value of the derivatives using forward finite diffenece
 
         use constants
         use adjointvars
-        use blockPointers, only : nDom
-        use communication, only : adflow_comm_world
-        use inputTimeSpectral, only : nTimeIntervalsSpectral
-        use inputPhysics, only :pointRef, alpha, beta, equations, machCoef, &
-            mach, machGrid, rgasdim
-        use iteration, only : currentLevel, groundLevel
-        use flowVarRefState, only : pInfDim, rhoInfDim, TinfDim
-        use blockPointers, only : nDom, il, jl, kl, wd, x, w, dw, dwd, nBocos, nViscBocos
-
-        use adjointUtils, only : allocDerivativeValues, zeroADSeeds
-        use masterRoutines, only : master
-        use utils, only : isWallType, setPointers, setPointers_d, EChk
-        use flowVarRefState, only : nw, nwf
+        use blockPointers, only: nDom
+        use communication, only: adflow_comm_world
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputPhysics, only: pointRef, alpha, beta, equations, machCoef, &
+                                mach, machGrid, rgasdim
+        use iteration, only: currentLevel, groundLevel
+        use flowVarRefState, only: pInfDim, rhoInfDim, TinfDim
+        use blockPointers, only: nDom, il, jl, kl, wd, x, w, dw, dwd, nBocos, nViscBocos
+
+        use adjointUtils, only: allocDerivativeValues, zeroADSeeds
+        use masterRoutines, only: master
+        use utils, only: isWallType, setPointers, setPointers_d, EChk
+        use flowVarRefState, only: nw, nwf
         use wallDistanceData, only: xSurf, xSurfVec
         use wallDistance, only: updateXSurf
         implicit none
@@ -45,7 +45,7 @@ subroutine computeMatrixFreeProductFwdFD(xvdot, extradot, wdot, bcDataValuesdot,
         logical, intent(in) :: useSpatial, useState
         integer(kind=intType), dimension(:, :) :: famLists
         integer(kind=intType) :: costSize, fSize, nTime
-        
+
         character, dimension(:, :), intent(in) :: bcDataNames
         real(kind=realType), dimension(:), intent(inout) :: bcDataValues, bcDataValuesDot
         integer(kind=intType), dimension(:, :) :: bcDataFamLists
@@ -59,28 +59,23 @@ subroutine computeMatrixFreeProductFwdFD(xvdot, extradot, wdot, bcDataValuesdot,
         !
         ! Output derivative seeds
         real(kind=realType), dimension(size(wDot)), intent(out) :: dwDot
-        real(kind=realType), dimension(costSize, size(famLists,1)), intent(out) :: funcsDot
+        real(kind=realType), dimension(costSize, size(famLists, 1)), intent(out) :: funcsDot
         real(kind=realType), dimension(3, fSize, nTime), intent(out) :: fDot
 
         !
         ! Working Variables
         !
-        integer(kind=intType) :: nn,sps, level
-        integer(kind=intType) :: ierr, mm,i,j,k, l,  ii, jj, iRegion
-
-        real(kind=realType), dimension(costSize, size(famLists,1)) :: funcs
+        integer(kind=intType) :: nn, sps, level
+        integer(kind=intType) :: ierr, mm, i, j, k, l, ii, jj, iRegion
 
+        real(kind=realType), dimension(costSize, size(famLists, 1)) :: funcs
 
         ! Input Arguments for master:
-        real(kind=realType), dimension(costSize, size(famLists,1)) :: funcValues
+        real(kind=realType), dimension(costSize, size(famLists, 1)) :: funcValues
         real(kind=realType), dimension(:, :, :), allocatable :: forces
 
-
         fSize = size(fDot, 2)
-        allocate(forces(3, fSize, nTimeIntervalsSpectral))
-
-
-
+        allocate (forces(3, fSize, nTimeIntervalsSpectral))
 
         ! Need to trick the residual evalution to use coupled (mean flow and
         ! turbulent) together.
@@ -98,13 +93,12 @@ subroutine computeMatrixFreeProductFwdFD(xvdot, extradot, wdot, bcDataValuesdot,
         end if
 
         ! Zero all AD seesd.
-        do nn=1,nDom
-            do sps=1,nTimeIntervalsSpectral
-            call zeroADSeeds(nn,level, sps)
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call zeroADSeeds(nn, level, sps)
             end do
         end do
 
-
         ! ----------------------------- Run Master ---------------------------------
         ! Run the super-dee-duper master rotuine
         if (bcVarsEmpty) then
@@ -118,26 +112,25 @@ subroutine computeMatrixFreeProductFwdFD(xvdot, extradot, wdot, bcDataValuesdot,
                         forces, &
                         bcDataNames, bcDataValues, bcDataFamLists)
         end if
-      
 
         ! Copy base val (f) into variables for the final vals (f(x+dx) - f)/dx
-        ! we add the negative sign here instead of doing it later 
-                    
-        ii =0
-        do nn=1, nDom
-            do sps=1,nTimeIntervalsSpectral
-            call setPointers_d(nn, 1, sps)
-            do k=2, kl
-                do j=2, jl
-                    do i=2, il
-                        do l=1, nw
-                        ii = ii + 1
-                        dwd(i,j,k,l) = -dw(i,j,k,l)
+        ! we add the negative sign here instead of doing it later
+
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers_d(nn, 1, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nw
+                                ii = ii + 1
+                                dwd(i, j, k, l) = -dw(i, j, k, l)
+                            end do
                         end do
                     end do
                 end do
             end do
-            end do
         end do
 
         fDot = -forces
@@ -148,59 +141,56 @@ subroutine computeMatrixFreeProductFwdFD(xvdot, extradot, wdot, bcDataValuesdot,
         ! machNumber used for the coefficients follows the Mach number,
         ! not the grid mach number.
 
-
-        alpha = alpha + h*extraDot(iAlpha)
-        beta = beta + h*extraDot(iBeta)
-        mach = mach + h*extraDot(iMach)
-        machCoef = machCoef + h*extraDot(iMach)
-        machGrid = machGrid + h*extraDot(iMachGrid)
-        PinfDim = PinfDim + h*extraDot(iPressure)
-        rhoinfDim = rhoinfDim + h*extraDot(iDensity)
-        tinfdim = tinfdim + h*extraDot(iTemperature)
-        pointref(1) = pointref(1) + h*extraDot(iPointRefX)
-        pointref(2) = pointref(2) + h*extraDot(iPointRefY)
-        pointref(3) = pointref(3) + h*extraDot(iPointRefZ)
-        rgasdim = rgasdim + h*zero
-
+        alpha = alpha + h * extraDot(iAlpha)
+        beta = beta + h * extraDot(iBeta)
+        mach = mach + h * extraDot(iMach)
+        machCoef = machCoef + h * extraDot(iMach)
+        machGrid = machGrid + h * extraDot(iMachGrid)
+        PinfDim = PinfDim + h * extraDot(iPressure)
+        rhoinfDim = rhoinfDim + h * extraDot(iDensity)
+        tinfdim = tinfdim + h * extraDot(iTemperature)
+        pointref(1) = pointref(1) + h * extraDot(iPointRefX)
+        pointref(2) = pointref(2) + h * extraDot(iPointRefY)
+        pointref(3) = pointref(3) + h * extraDot(iPointRefZ)
+        rgasdim = rgasdim + h * zero
 
         ! Set the provided w and x seeds:
         ii = 0
         jj = 0
-        domainLoop1: do nn=1,nDom
-            spectalLoop1: do sps=1,nTimeIntervalsSpectral
-            call setPointers(nn, 1, sps)
-            do k=1, kl
-                do j=1,jl
-                    do i=1,il
-                        do l=1,3
-                        ii = ii + 1
-                        x(i, j, k, l) = x(i, j, k, l) +  xvDot(ii)* h
+        domainLoop1: do nn = 1, nDom
+            spectalLoop1: do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1, sps)
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            do l = 1, 3
+                                ii = ii + 1
+                                x(i, j, k, l) = x(i, j, k, l) + xvDot(ii) * h
+                            end do
                         end do
                     end do
                 end do
-            end do
-            do k=2, kl
-                do j=2,jl
-                    do i=2,il
-                        do l = 1, nw
-                        jj = jj + 1
-                        w(i, j, k, l) = w(i, j, k, l) +  wDot(jj)*h
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nw
+                                jj = jj + 1
+                                w(i, j, k, l) = w(i, j, k, l) + wDot(jj) * h
+                            end do
                         end do
                     end do
                 end do
-            end do
             end do spectalLoop1
         end do domainLoop1
 
-        bcDataValues = bcDataValues + bcDataValuesdot*h
+        bcDataValues = bcDataValues + bcDataValuesdot * h
 
         ! The xvolume that is set by used by update Xsurf is only allocated with
         ! rans
-        if(equations == RANSEquations) then
+        if (equations == RANSEquations) then
             call updateXSurf(level)
         end if
 
-
         ! ----------------------------- Run Master ---------------------------------
         ! Run the super-dee-duper master rotuine
         if (bcVarsEmpty) then
@@ -221,87 +211,79 @@ subroutine computeMatrixFreeProductFwdFD(xvdot, extradot, wdot, bcDataValuesdot,
         ! machNumber used for the coefficients follows the Mach number,
         ! not the grid mach number.
 
-
-        alpha = alpha - h*extraDot(iAlpha)
-        beta = beta - h*extraDot(iBeta)
-        mach = mach - h*extraDot(iMach)
-        machCoef = machCoef - h*extraDot(iMach)
-        machGrid = machGrid - h*extraDot(iMachGrid)
-        PinfDim = PinfDim - h*extraDot(iPressure)
-        rhoinfDim = rhoinfDim - h*extraDot(iDensity)
-        tinfdim = tinfdim - h*extraDot(iTemperature)
-        pointref(1) = pointref(1) - h*extraDot(iPointRefX)
-        pointref(2) = pointref(2) - h*extraDot(iPointRefY)
-        pointref(3) = pointref(3) - h*extraDot(iPointRefZ)
-        rgasdim = rgasdim - h*zero
-
-
-
+        alpha = alpha - h * extraDot(iAlpha)
+        beta = beta - h * extraDot(iBeta)
+        mach = mach - h * extraDot(iMach)
+        machCoef = machCoef - h * extraDot(iMach)
+        machGrid = machGrid - h * extraDot(iMachGrid)
+        PinfDim = PinfDim - h * extraDot(iPressure)
+        rhoinfDim = rhoinfDim - h * extraDot(iDensity)
+        tinfdim = tinfdim - h * extraDot(iTemperature)
+        pointref(1) = pointref(1) - h * extraDot(iPointRefX)
+        pointref(2) = pointref(2) - h * extraDot(iPointRefY)
+        pointref(3) = pointref(3) - h * extraDot(iPointRefZ)
+        rgasdim = rgasdim - h * zero
 
         ! Copy out the residual derivative into the provided dwDot and remove the
         ! perturbation
         ii = 0
         jj = 0
-        do nn=1, nDom
-            do sps=1,nTimeIntervalsSpectral
-            call setPointers_d(nn, 1, sps)
-            do k=1, kl
-                do j=1,jl
-                    do i=1,il
-                        do l=1,3
-                        ii = ii + 1
-                        x(i, j, k, l) = x(i, j, k, l) -  xvDot(ii)* h
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers_d(nn, 1, sps)
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            do l = 1, 3
+                                ii = ii + 1
+                                x(i, j, k, l) = x(i, j, k, l) - xvDot(ii) * h
+                            end do
                         end do
                     end do
                 end do
-            end do
-            do k=2, kl
-                do j=2, jl
-                    do i=2, il
-                        do l=1, nw
-                        jj = jj + 1
-                        w(i, j, k, l) = w(i, j, k, l) -  wDot(jj)*h
-                        dwd(i,j,k,l) = (dwd(i,j,k,l) + dw(i,j,k,l))/h
-                        dwDot(jj) = dwd(i,j,k,l) ! copy values to output
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nw
+                                jj = jj + 1
+                                w(i, j, k, l) = w(i, j, k, l) - wDot(jj) * h
+                                dwd(i, j, k, l) = (dwd(i, j, k, l) + dw(i, j, k, l)) / h
+                                dwDot(jj) = dwd(i, j, k, l) ! copy values to output
+                            end do
                         end do
                     end do
                 end do
-            end do
 
             end do
         end do
 
-
-        if(equations == RANSEquations) then
+        if (equations == RANSEquations) then
             call updateXSurf(level)
         end if
-        bcDataValues = bcDataValues - bcDataValuesdot*h
+        bcDataValues = bcDataValues - bcDataValuesdot * h
 
-        
-        fDot = (fDot + forces)/h
-        funcsDot = (funcsDot + funcValues)/h
+        fDot = (fDot + forces) / h
+        funcsDot = (funcsDot + funcValues) / h
 
     end subroutine computeMatrixFreeProductFwdFD
 
-
     subroutine printADSeeds(nn, level, sps)
 !DIR$ NOOPTIMIZE
         ! this routine is used for debugging master_d, and master_b.
         ! it prints all the AD seeds used
         ! it is useful to save the output and compare it with a diff tool
 
-
         use constants
-        use block, only : flowDomsd, flowDoms
+        use block, only: flowDomsd, flowDoms
         use blockPointers
         use inputTimeSpectral
         use flowVarRefState
         use inputPhysics
         use BCPointers_b
         use communication
-        use oversetData, only : oversetPresent
-        use cgnsGrid, only : cgnsDoms, cgnsDomsd, cgnsNDom
-        use actuatorRegionData, only : nActuatorRegions, actuatorRegionsd
+        use oversetData, only: oversetPresent
+        use cgnsGrid, only: cgnsDoms, cgnsDomsd, cgnsNDom
+        use actuatorRegionData, only: nActuatorRegions, actuatorRegionsd
         implicit none
 
         ! Input parameters
@@ -310,306 +292,304 @@ subroutine printADSeeds(nn, level, sps)
         ! Working parameters
         integer(kind=intType) :: mm, i, iDom
         integer(kind=intType) :: iBoco, iData, iDirichlet
-        write(*,*) 'ADSeeds for block', nn, ' at level ', level, ' at sps ', sps
-        write(*,*) 'd2wall ', minval(flowDomsd(nn, level, sps)%d2wall), &
-                        maxval(flowDomsd(nn, level, sps)%d2wall), &
-                        norm2(flowDomsd(nn, level, sps)%d2wall)
-        write(*,*) 'x ', minval(flowDomsd(nn, level, sps)%x), &
-                        maxval(flowDomsd(nn, level, sps)%x), &
-                        norm2(flowDomsd(nn, level, sps)%x)
-        write(*,*) 'si ', minval(flowDomsd(nn, level, sps)%si), &
-                        maxval(flowDomsd(nn, level, sps)%si), &
-                        norm2(flowDomsd(nn, level, sps)%si)
-        write(*,*) 'sj ', minval(flowDomsd(nn, level, sps)%sj), &
-                        maxval(flowDomsd(nn, level, sps)%sj), &
-                        norm2(flowDomsd(nn, level, sps)%sj)
-        write(*,*) 'sk ', minval(flowDomsd(nn, level, sps)%sk), &
-                        maxval(flowDomsd(nn, level, sps)%sk), &
-                        norm2(flowDomsd(nn, level, sps)%sk)
-        write(*,*) 'vol ', minval(flowDomsd(nn, level, sps)%vol), &
-                        maxval(flowDomsd(nn, level, sps)%vol), &
-                        norm2(flowDomsd(nn, level, sps)%vol)
-
-        write(*,*) 's ', minval(flowDomsd(nn, level, sps)%s), &
-                        maxval(flowDomsd(nn, level, sps)%s), &
-                        norm2(flowDomsd(nn, level, sps)%s)
-        write(*,*) 'sFaceI ', minval(flowDomsd(nn, level, sps)%sFaceI), &
-                        maxval(flowDomsd(nn, level, sps)%sFaceI), &
-                        norm2(flowDomsd(nn, level, sps)%sFaceI)
-        write(*,*) 'sFaceJ ', minval(flowDomsd(nn, level, sps)%sFaceJ), &
-                        maxval(flowDomsd(nn, level, sps)%sFaceJ), &
-                        norm2(flowDomsd(nn, level, sps)%sFaceJ)
-        write(*,*) 'sFaceK ', minval(flowDomsd(nn, level, sps)%sFaceK), &
-                        maxval(flowDomsd(nn, level, sps)%sFaceK), &
-                        norm2(flowDomsd(nn, level, sps)%sFaceK)
-
-        write(*,*) 'w ', minval(flowDomsd(nn, level, sps)%w), &
-                        maxval(flowDomsd(nn, level, sps)%w), &
-                        norm2(flowDomsd(nn, level, sps)%w)
-        write(*,*) 'dw ', minval(flowDomsd(nn, level, sps)%dw), &
-                        maxval(flowDomsd(nn, level, sps)%dw), &
-                        norm2(flowDomsd(nn, level, sps)%dw)
-        write(*,*) 'fw ', minval(flowDomsd(nn, level, sps)%fw), &
-                        maxval(flowDomsd(nn, level, sps)%fw), &
-                        norm2(flowDomsd(nn, level, sps)%fw)
-        write(*,*) 'scratch ', minval(flowDomsd(nn, level, sps)%scratch), &
-                        maxval(flowDomsd(nn, level, sps)%scratch), &
-                        norm2(flowDomsd(nn, level, sps)%scratch)
-
-        write(*,*) 'p ', minval(flowDomsd(nn, level, sps)%p), &
-                        maxval(flowDomsd(nn, level, sps)%p), &
-                        norm2(flowDomsd(nn, level, sps)%p)
-        write(*,*) 'gamma ', minval(flowDomsd(nn, level, sps)%gamma), &
-                        maxval(flowDomsd(nn, level, sps)%gamma), &
-                        norm2(flowDomsd(nn, level, sps)%gamma)
-        write(*,*) 'aa ', minval(flowDomsd(nn, level, sps)%aa), &
-                        maxval(flowDomsd(nn, level, sps)%aa), &
-                        norm2(flowDomsd(nn, level, sps)%aa)
-
-        write(*,*) 'rlv ', minval(flowDomsd(nn, level, sps)%rlv), &
-                        maxval(flowDomsd(nn, level, sps)%rlv), &
-                        norm2(flowDomsd(nn, level, sps)%rlv)
-        write(*,*) 'rev ', minval(flowDomsd(nn, level, sps)%rev), &
-                        maxval(flowDomsd(nn, level, sps)%rev), &
-                        norm2(flowDomsd(nn, level, sps)%rev)
-
-        write(*,*) 'radI ', minval(flowDomsd(nn, level, sps)%radI), &
-                        maxval(flowDomsd(nn, level, sps)%radI), &
-                        norm2(flowDomsd(nn, level, sps)%radI)
-        write(*,*) 'radJ ', minval(flowDomsd(nn, level, sps)%radJ), &
-                        maxval(flowDomsd(nn, level, sps)%radJ), &
-                        norm2(flowDomsd(nn, level, sps)%radJ)
-        write(*,*) 'radK ', minval(flowDomsd(nn, level, sps)%radK), &
-                        maxval(flowDomsd(nn, level, sps)%radK), &
-                        norm2(flowDomsd(nn, level, sps)%radK)
-
-        write(*,*) 'ux ', minval(flowDomsd(nn, level, sps)%ux), &
-                        maxval(flowDomsd(nn, level, sps)%ux), &
-                        norm2(flowDomsd(nn, level, sps)%ux)
-        write(*,*) 'uy ', minval(flowDomsd(nn, level, sps)%uy), &
-                        maxval(flowDomsd(nn, level, sps)%uy), &
-                        norm2(flowDomsd(nn, level, sps)%uy)
-        write(*,*) 'uz ', minval(flowDomsd(nn, level, sps)%uz), &
-                        maxval(flowDomsd(nn, level, sps)%uz), &
-                        norm2(flowDomsd(nn, level, sps)%uz)
-        write(*,*) 'vx ', minval(flowDomsd(nn, level, sps)%vx), &
-                        maxval(flowDomsd(nn, level, sps)%vx), &
-                        norm2(flowDomsd(nn, level, sps)%vx)
-        write(*,*) 'vy ', minval(flowDomsd(nn, level, sps)%vy), &
-                        maxval(flowDomsd(nn, level, sps)%vy), &
-                        norm2(flowDomsd(nn, level, sps)%vy)
-        write(*,*) 'vz ', minval(flowDomsd(nn, level, sps)%vz), &
-                        maxval(flowDomsd(nn, level, sps)%vz), &
-                        norm2(flowDomsd(nn, level, sps)%vz)
-        write(*,*) 'wx ', minval(flowDomsd(nn, level, sps)%wx), &
-                        maxval(flowDomsd(nn, level, sps)%wx), &
-                        norm2(flowDomsd(nn, level, sps)%wx)
-        write(*,*) 'wy ', minval(flowDomsd(nn, level, sps)%wy), &
-                        maxval(flowDomsd(nn, level, sps)%wy), &
-                        norm2(flowDomsd(nn, level, sps)%wy)
-        write(*,*) 'wz ', minval(flowDomsd(nn, level, sps)%wz), &
-                        maxval(flowDomsd(nn, level, sps)%wz), &
-                        norm2(flowDomsd(nn, level, sps)%wz)
-        write(*,*) 'qx ', minval(flowDomsd(nn, level, sps)%qx), &
-                        maxval(flowDomsd(nn, level, sps)%qx), &
-                        norm2(flowDomsd(nn, level, sps)%qx)
-        write(*,*) 'qy ', minval(flowDomsd(nn, level, sps)%qy), &
-                        maxval(flowDomsd(nn, level, sps)%qy), &
-                        norm2(flowDomsd(nn, level, sps)%qy)
-        write(*,*) 'qz ', minval(flowDomsd(nn, level, sps)%qz), &
-                        maxval(flowDomsd(nn, level, sps)%qz), &
-                        norm2(flowDomsd(nn, level, sps)%qz)
-
-        write(*,*) 'bmti1 ',minval(flowDomsd(nn, level, sps)%bmti1), &
-                                maxval(flowDomsd(nn, level, sps)%bmti1), &
-                                norm2(flowDomsd(nn, level, sps)%bmti1)
-        write(*,*) 'bmti2 ',minval(flowDomsd(nn, level, sps)%bmti2), &
-                                maxval(flowDomsd(nn, level, sps)%bmti2), &
-                                norm2(flowDomsd(nn, level, sps)%bmti2)
-        write(*,*) 'bmtj1 ',minval(flowDomsd(nn, level, sps)%bmtj1), &
-                                maxval(flowDomsd(nn, level, sps)%bmtj1), &
-                                norm2(flowDomsd(nn, level, sps)%bmtj1)
-        write(*,*) 'bmtj2 ',minval(flowDomsd(nn, level, sps)%bmtj2), &
-                                maxval(flowDomsd(nn, level, sps)%bmtj2), &
-                                norm2(flowDomsd(nn, level, sps)%bmtj2)
-        write(*,*) 'bmtk1 ',minval(flowDomsd(nn, level, sps)%bmtk1), &
-                                maxval(flowDomsd(nn, level, sps)%bmtk1), &
-                                norm2(flowDomsd(nn, level, sps)%bmtk1)
-        write(*,*) 'bmtk2 ',minval(flowDomsd(nn, level, sps)%bmtk2), &
-                                maxval(flowDomsd(nn, level, sps)%bmtk2), &
-                                norm2(flowDomsd(nn, level, sps)%bmtk2)
-        write(*,*) 'bvti1 ',minval(flowDomsd(nn, level, sps)%bvti1), &
-                                maxval(flowDomsd(nn, level, sps)%bvti1), &
-                                norm2(flowDomsd(nn, level, sps)%bvti1)
-        write(*,*) 'bvti2 ',minval(flowDomsd(nn, level, sps)%bvti2), &
-                                maxval(flowDomsd(nn, level, sps)%bvti2), &
-                                norm2(flowDomsd(nn, level, sps)%bvti2)
-        write(*,*) 'bvtj1 ',minval(flowDomsd(nn, level, sps)%bvtj1), &
-                                maxval(flowDomsd(nn, level, sps)%bvtj1), &
-                                norm2(flowDomsd(nn, level, sps)%bvtj1)
-        write(*,*) 'bvtj2 ',minval(flowDomsd(nn, level, sps)%bvtj2), &
-                                maxval(flowDomsd(nn, level, sps)%bvtj2), &
-                                norm2(flowDomsd(nn, level, sps)%bvtj2)
-        write(*,*) 'bvtk1 ',minval(flowDomsd(nn, level, sps)%bvtk1), &
-                                maxval(flowDomsd(nn, level, sps)%bvtk1), &
-                                norm2(flowDomsd(nn, level, sps)%bvtk1)
-        write(*,*) 'bvtk2 ',minval(flowDomsd(nn, level, sps)%bvtk2), &
-                                maxval(flowDomsd(nn, level, sps)%bvtk2), &
-                                norm2(flowDomsd(nn, level, sps)%bvtk2)
-
-        bocoLoop: do mm=1, flowDoms(nn, level, sps)%nBocos
-            write(*,*) 'mm', mm, 'BCData(mm)%norm',minval(flowDomsd(nn, level, sps)%BCData(mm)%norm), &
-                                    maxval(flowDomsd(nn, level, sps)%BCData(mm)%norm), &
-                                    norm2(flowDomsd(nn, level, sps)%BCData(mm)%norm)
-            write(*,*) 'mm', mm, 'bcData(mm)%rface ',minval(flowDomsd(nn, level, sps)%bcData(mm)%rface), &
-                                    maxval(flowDomsd(nn, level, sps)%bcData(mm)%rface), &
-                                    norm2(flowDomsd(nn, level, sps)%bcData(mm)%rface)
-            write(*,*) 'mm', mm, 'bcData(mm)%Fv ',minval(flowDomsd(nn, level, sps)%bcData(mm)%Fv), &
-                                    maxval(flowDomsd(nn, level, sps)%bcData(mm)%Fv), &
-                                    norm2(flowDomsd(nn, level, sps)%bcData(mm)%Fv)
-            write(*,*) 'mm', mm, 'bcData(mm)%Fp ',minval(flowDomsd(nn, level, sps)%bcData(mm)%Fp), &
-                                    maxval(flowDomsd(nn, level, sps)%bcData(mm)%Fp), &
-                                    norm2(flowDomsd(nn, level, sps)%bcData(mm)%Fp)
-            write(*,*) 'mm', mm, 'bcData(mm)%Tv ',minval(flowDomsd(nn, level, sps)%bcData(mm)%Tv), &
-                                    maxval(flowDomsd(nn, level, sps)%bcData(mm)%Tv), &
-                                    norm2(flowDomsd(nn, level, sps)%bcData(mm)%Tv)
-            write(*,*) 'mm', mm, 'bcData(mm)%Tp ',minval(flowDomsd(nn, level, sps)%bcData(mm)%Tp), &
-                                    maxval(flowDomsd(nn, level, sps)%bcData(mm)%Tp), &
-                                    norm2(flowDomsd(nn, level, sps)%bcData(mm)%Tp)
-            write(*,*) 'mm', mm, 'bcData(mm)%area ',minval(flowDomsd(nn, level, sps)%bcData(mm)%area), &
-                                    maxval(flowDomsd(nn, level, sps)%bcData(mm)%area), &
-                                    norm2(flowDomsd(nn, level, sps)%bcData(mm)%area)
-            write(*,*) 'mm', mm, 'BCData(mm)%uSlip ',minval(flowDomsd(nn, level, sps)%BCData(mm)%uSlip), &
-                                    maxval(flowDomsd(nn, level, sps)%BCData(mm)%uSlip), &
-                                    norm2(flowDomsd(nn, level, sps)%BCData(mm)%uSlip)
-            write(*,*) 'mm', mm, 'BCData(mm)%TNS_Wall ',minval(flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall), &
-                                    maxval(flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall), &
-                                    norm2(flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall)
-            write(*,*) 'mm', mm, 'BCData(mm)%ptInlet ',minval(flowDomsd(nn, level, sps)%BCData(mm)%ptInlet), &
-                                    maxval(flowDomsd(nn, level, sps)%BCData(mm)%ptInlet), &
-                                    norm2(flowDomsd(nn, level, sps)%BCData(mm)%ptInlet)
-            write(*,*) 'mm', mm, 'BCData(mm)%htInlet ',minval(flowDomsd(nn, level, sps)%BCData(mm)%htInlet), &
-                                    maxval(flowDomsd(nn, level, sps)%BCData(mm)%htInlet), &
-                                    norm2(flowDomsd(nn, level, sps)%BCData(mm)%htInlet)
-            write(*,*) 'mm', mm, 'BCData(mm)%ttInlet ',minval(flowDomsd(nn, level, sps)%BCData(mm)%ttInlet), &
-                                    maxval(flowDomsd(nn, level, sps)%BCData(mm)%ttInlet), &
-                                    norm2(flowDomsd(nn, level, sps)%BCData(mm)%ttInlet)
-            write(*,*) 'mm', mm, 'BCData(mm)%turbInlet ',minval(flowDomsd(nn, level, sps)%BCData(mm)%turbInlet), &
-                                    maxval(flowDomsd(nn, level, sps)%BCData(mm)%turbInlet), &
-                                    norm2(flowDomsd(nn, level, sps)%BCData(mm)%turbInlet)
-            write(*,*) 'mm', mm, 'BCData(mm)%ps ',minval(flowDomsd(nn, level, sps)%BCData(mm)%ps), &
-                                    maxval(flowDomsd(nn, level, sps)%BCData(mm)%ps), &
-                                    norm2(flowDomsd(nn, level, sps)%BCData(mm)%ps)
+        write (*, *) 'ADSeeds for block', nn, ' at level ', level, ' at sps ', sps
+        write (*, *) 'd2wall ', minval(flowDomsd(nn, level, sps)%d2wall), &
+            maxval(flowDomsd(nn, level, sps)%d2wall), &
+            norm2(flowDomsd(nn, level, sps)%d2wall)
+        write (*, *) 'x ', minval(flowDomsd(nn, level, sps)%x), &
+            maxval(flowDomsd(nn, level, sps)%x), &
+            norm2(flowDomsd(nn, level, sps)%x)
+        write (*, *) 'si ', minval(flowDomsd(nn, level, sps)%si), &
+            maxval(flowDomsd(nn, level, sps)%si), &
+            norm2(flowDomsd(nn, level, sps)%si)
+        write (*, *) 'sj ', minval(flowDomsd(nn, level, sps)%sj), &
+            maxval(flowDomsd(nn, level, sps)%sj), &
+            norm2(flowDomsd(nn, level, sps)%sj)
+        write (*, *) 'sk ', minval(flowDomsd(nn, level, sps)%sk), &
+            maxval(flowDomsd(nn, level, sps)%sk), &
+            norm2(flowDomsd(nn, level, sps)%sk)
+        write (*, *) 'vol ', minval(flowDomsd(nn, level, sps)%vol), &
+            maxval(flowDomsd(nn, level, sps)%vol), &
+            norm2(flowDomsd(nn, level, sps)%vol)
+
+        write (*, *) 's ', minval(flowDomsd(nn, level, sps)%s), &
+            maxval(flowDomsd(nn, level, sps)%s), &
+            norm2(flowDomsd(nn, level, sps)%s)
+        write (*, *) 'sFaceI ', minval(flowDomsd(nn, level, sps)%sFaceI), &
+            maxval(flowDomsd(nn, level, sps)%sFaceI), &
+            norm2(flowDomsd(nn, level, sps)%sFaceI)
+        write (*, *) 'sFaceJ ', minval(flowDomsd(nn, level, sps)%sFaceJ), &
+            maxval(flowDomsd(nn, level, sps)%sFaceJ), &
+            norm2(flowDomsd(nn, level, sps)%sFaceJ)
+        write (*, *) 'sFaceK ', minval(flowDomsd(nn, level, sps)%sFaceK), &
+            maxval(flowDomsd(nn, level, sps)%sFaceK), &
+            norm2(flowDomsd(nn, level, sps)%sFaceK)
+
+        write (*, *) 'w ', minval(flowDomsd(nn, level, sps)%w), &
+            maxval(flowDomsd(nn, level, sps)%w), &
+            norm2(flowDomsd(nn, level, sps)%w)
+        write (*, *) 'dw ', minval(flowDomsd(nn, level, sps)%dw), &
+            maxval(flowDomsd(nn, level, sps)%dw), &
+            norm2(flowDomsd(nn, level, sps)%dw)
+        write (*, *) 'fw ', minval(flowDomsd(nn, level, sps)%fw), &
+            maxval(flowDomsd(nn, level, sps)%fw), &
+            norm2(flowDomsd(nn, level, sps)%fw)
+        write (*, *) 'scratch ', minval(flowDomsd(nn, level, sps)%scratch), &
+            maxval(flowDomsd(nn, level, sps)%scratch), &
+            norm2(flowDomsd(nn, level, sps)%scratch)
+
+        write (*, *) 'p ', minval(flowDomsd(nn, level, sps)%p), &
+            maxval(flowDomsd(nn, level, sps)%p), &
+            norm2(flowDomsd(nn, level, sps)%p)
+        write (*, *) 'gamma ', minval(flowDomsd(nn, level, sps)%gamma), &
+            maxval(flowDomsd(nn, level, sps)%gamma), &
+            norm2(flowDomsd(nn, level, sps)%gamma)
+        write (*, *) 'aa ', minval(flowDomsd(nn, level, sps)%aa), &
+            maxval(flowDomsd(nn, level, sps)%aa), &
+            norm2(flowDomsd(nn, level, sps)%aa)
+
+        write (*, *) 'rlv ', minval(flowDomsd(nn, level, sps)%rlv), &
+            maxval(flowDomsd(nn, level, sps)%rlv), &
+            norm2(flowDomsd(nn, level, sps)%rlv)
+        write (*, *) 'rev ', minval(flowDomsd(nn, level, sps)%rev), &
+            maxval(flowDomsd(nn, level, sps)%rev), &
+            norm2(flowDomsd(nn, level, sps)%rev)
+
+        write (*, *) 'radI ', minval(flowDomsd(nn, level, sps)%radI), &
+            maxval(flowDomsd(nn, level, sps)%radI), &
+            norm2(flowDomsd(nn, level, sps)%radI)
+        write (*, *) 'radJ ', minval(flowDomsd(nn, level, sps)%radJ), &
+            maxval(flowDomsd(nn, level, sps)%radJ), &
+            norm2(flowDomsd(nn, level, sps)%radJ)
+        write (*, *) 'radK ', minval(flowDomsd(nn, level, sps)%radK), &
+            maxval(flowDomsd(nn, level, sps)%radK), &
+            norm2(flowDomsd(nn, level, sps)%radK)
+
+        write (*, *) 'ux ', minval(flowDomsd(nn, level, sps)%ux), &
+            maxval(flowDomsd(nn, level, sps)%ux), &
+            norm2(flowDomsd(nn, level, sps)%ux)
+        write (*, *) 'uy ', minval(flowDomsd(nn, level, sps)%uy), &
+            maxval(flowDomsd(nn, level, sps)%uy), &
+            norm2(flowDomsd(nn, level, sps)%uy)
+        write (*, *) 'uz ', minval(flowDomsd(nn, level, sps)%uz), &
+            maxval(flowDomsd(nn, level, sps)%uz), &
+            norm2(flowDomsd(nn, level, sps)%uz)
+        write (*, *) 'vx ', minval(flowDomsd(nn, level, sps)%vx), &
+            maxval(flowDomsd(nn, level, sps)%vx), &
+            norm2(flowDomsd(nn, level, sps)%vx)
+        write (*, *) 'vy ', minval(flowDomsd(nn, level, sps)%vy), &
+            maxval(flowDomsd(nn, level, sps)%vy), &
+            norm2(flowDomsd(nn, level, sps)%vy)
+        write (*, *) 'vz ', minval(flowDomsd(nn, level, sps)%vz), &
+            maxval(flowDomsd(nn, level, sps)%vz), &
+            norm2(flowDomsd(nn, level, sps)%vz)
+        write (*, *) 'wx ', minval(flowDomsd(nn, level, sps)%wx), &
+            maxval(flowDomsd(nn, level, sps)%wx), &
+            norm2(flowDomsd(nn, level, sps)%wx)
+        write (*, *) 'wy ', minval(flowDomsd(nn, level, sps)%wy), &
+            maxval(flowDomsd(nn, level, sps)%wy), &
+            norm2(flowDomsd(nn, level, sps)%wy)
+        write (*, *) 'wz ', minval(flowDomsd(nn, level, sps)%wz), &
+            maxval(flowDomsd(nn, level, sps)%wz), &
+            norm2(flowDomsd(nn, level, sps)%wz)
+        write (*, *) 'qx ', minval(flowDomsd(nn, level, sps)%qx), &
+            maxval(flowDomsd(nn, level, sps)%qx), &
+            norm2(flowDomsd(nn, level, sps)%qx)
+        write (*, *) 'qy ', minval(flowDomsd(nn, level, sps)%qy), &
+            maxval(flowDomsd(nn, level, sps)%qy), &
+            norm2(flowDomsd(nn, level, sps)%qy)
+        write (*, *) 'qz ', minval(flowDomsd(nn, level, sps)%qz), &
+            maxval(flowDomsd(nn, level, sps)%qz), &
+            norm2(flowDomsd(nn, level, sps)%qz)
+
+        write (*, *) 'bmti1 ', minval(flowDomsd(nn, level, sps)%bmti1), &
+            maxval(flowDomsd(nn, level, sps)%bmti1), &
+            norm2(flowDomsd(nn, level, sps)%bmti1)
+        write (*, *) 'bmti2 ', minval(flowDomsd(nn, level, sps)%bmti2), &
+            maxval(flowDomsd(nn, level, sps)%bmti2), &
+            norm2(flowDomsd(nn, level, sps)%bmti2)
+        write (*, *) 'bmtj1 ', minval(flowDomsd(nn, level, sps)%bmtj1), &
+            maxval(flowDomsd(nn, level, sps)%bmtj1), &
+            norm2(flowDomsd(nn, level, sps)%bmtj1)
+        write (*, *) 'bmtj2 ', minval(flowDomsd(nn, level, sps)%bmtj2), &
+            maxval(flowDomsd(nn, level, sps)%bmtj2), &
+            norm2(flowDomsd(nn, level, sps)%bmtj2)
+        write (*, *) 'bmtk1 ', minval(flowDomsd(nn, level, sps)%bmtk1), &
+            maxval(flowDomsd(nn, level, sps)%bmtk1), &
+            norm2(flowDomsd(nn, level, sps)%bmtk1)
+        write (*, *) 'bmtk2 ', minval(flowDomsd(nn, level, sps)%bmtk2), &
+            maxval(flowDomsd(nn, level, sps)%bmtk2), &
+            norm2(flowDomsd(nn, level, sps)%bmtk2)
+        write (*, *) 'bvti1 ', minval(flowDomsd(nn, level, sps)%bvti1), &
+            maxval(flowDomsd(nn, level, sps)%bvti1), &
+            norm2(flowDomsd(nn, level, sps)%bvti1)
+        write (*, *) 'bvti2 ', minval(flowDomsd(nn, level, sps)%bvti2), &
+            maxval(flowDomsd(nn, level, sps)%bvti2), &
+            norm2(flowDomsd(nn, level, sps)%bvti2)
+        write (*, *) 'bvtj1 ', minval(flowDomsd(nn, level, sps)%bvtj1), &
+            maxval(flowDomsd(nn, level, sps)%bvtj1), &
+            norm2(flowDomsd(nn, level, sps)%bvtj1)
+        write (*, *) 'bvtj2 ', minval(flowDomsd(nn, level, sps)%bvtj2), &
+            maxval(flowDomsd(nn, level, sps)%bvtj2), &
+            norm2(flowDomsd(nn, level, sps)%bvtj2)
+        write (*, *) 'bvtk1 ', minval(flowDomsd(nn, level, sps)%bvtk1), &
+            maxval(flowDomsd(nn, level, sps)%bvtk1), &
+            norm2(flowDomsd(nn, level, sps)%bvtk1)
+        write (*, *) 'bvtk2 ', minval(flowDomsd(nn, level, sps)%bvtk2), &
+            maxval(flowDomsd(nn, level, sps)%bvtk2), &
+            norm2(flowDomsd(nn, level, sps)%bvtk2)
+
+        bocoLoop: do mm = 1, flowDoms(nn, level, sps)%nBocos
+            write (*, *) 'mm', mm, 'BCData(mm)%norm', minval(flowDomsd(nn, level, sps)%BCData(mm)%norm), &
+                maxval(flowDomsd(nn, level, sps)%BCData(mm)%norm), &
+                norm2(flowDomsd(nn, level, sps)%BCData(mm)%norm)
+            write (*, *) 'mm', mm, 'bcData(mm)%rface ', minval(flowDomsd(nn, level, sps)%bcData(mm)%rface), &
+                maxval(flowDomsd(nn, level, sps)%bcData(mm)%rface), &
+                norm2(flowDomsd(nn, level, sps)%bcData(mm)%rface)
+            write (*, *) 'mm', mm, 'bcData(mm)%Fv ', minval(flowDomsd(nn, level, sps)%bcData(mm)%Fv), &
+                maxval(flowDomsd(nn, level, sps)%bcData(mm)%Fv), &
+                norm2(flowDomsd(nn, level, sps)%bcData(mm)%Fv)
+            write (*, *) 'mm', mm, 'bcData(mm)%Fp ', minval(flowDomsd(nn, level, sps)%bcData(mm)%Fp), &
+                maxval(flowDomsd(nn, level, sps)%bcData(mm)%Fp), &
+                norm2(flowDomsd(nn, level, sps)%bcData(mm)%Fp)
+            write (*, *) 'mm', mm, 'bcData(mm)%Tv ', minval(flowDomsd(nn, level, sps)%bcData(mm)%Tv), &
+                maxval(flowDomsd(nn, level, sps)%bcData(mm)%Tv), &
+                norm2(flowDomsd(nn, level, sps)%bcData(mm)%Tv)
+            write (*, *) 'mm', mm, 'bcData(mm)%Tp ', minval(flowDomsd(nn, level, sps)%bcData(mm)%Tp), &
+                maxval(flowDomsd(nn, level, sps)%bcData(mm)%Tp), &
+                norm2(flowDomsd(nn, level, sps)%bcData(mm)%Tp)
+            write (*, *) 'mm', mm, 'bcData(mm)%area ', minval(flowDomsd(nn, level, sps)%bcData(mm)%area), &
+                maxval(flowDomsd(nn, level, sps)%bcData(mm)%area), &
+                norm2(flowDomsd(nn, level, sps)%bcData(mm)%area)
+            write (*, *) 'mm', mm, 'BCData(mm)%uSlip ', minval(flowDomsd(nn, level, sps)%BCData(mm)%uSlip), &
+                maxval(flowDomsd(nn, level, sps)%BCData(mm)%uSlip), &
+                norm2(flowDomsd(nn, level, sps)%BCData(mm)%uSlip)
+            write (*, *) 'mm', mm, 'BCData(mm)%TNS_Wall ', minval(flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall), &
+                maxval(flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall), &
+                norm2(flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall)
+            write (*, *) 'mm', mm, 'BCData(mm)%ptInlet ', minval(flowDomsd(nn, level, sps)%BCData(mm)%ptInlet), &
+                maxval(flowDomsd(nn, level, sps)%BCData(mm)%ptInlet), &
+                norm2(flowDomsd(nn, level, sps)%BCData(mm)%ptInlet)
+            write (*, *) 'mm', mm, 'BCData(mm)%htInlet ', minval(flowDomsd(nn, level, sps)%BCData(mm)%htInlet), &
+                maxval(flowDomsd(nn, level, sps)%BCData(mm)%htInlet), &
+                norm2(flowDomsd(nn, level, sps)%BCData(mm)%htInlet)
+            write (*, *) 'mm', mm, 'BCData(mm)%ttInlet ', minval(flowDomsd(nn, level, sps)%BCData(mm)%ttInlet), &
+                maxval(flowDomsd(nn, level, sps)%BCData(mm)%ttInlet), &
+                norm2(flowDomsd(nn, level, sps)%BCData(mm)%ttInlet)
+            write (*, *) 'mm', mm, 'BCData(mm)%turbInlet ', minval(flowDomsd(nn, level, sps)%BCData(mm)%turbInlet), &
+                maxval(flowDomsd(nn, level, sps)%BCData(mm)%turbInlet), &
+                norm2(flowDomsd(nn, level, sps)%BCData(mm)%turbInlet)
+            write (*, *) 'mm', mm, 'BCData(mm)%ps ', minval(flowDomsd(nn, level, sps)%BCData(mm)%ps), &
+                maxval(flowDomsd(nn, level, sps)%BCData(mm)%ps), &
+                norm2(flowDomsd(nn, level, sps)%BCData(mm)%ps)
 
         end do bocoLoop
 
-
-        viscbocoLoop: do mm=1,flowDoms(nn, level, sps)%nViscBocos
-            write(*,*) 'mm', mm, 'viscSubface(mm)%tau ',minval(flowDomsd(nn, level, sps)%viscSubface(mm)%tau), &
-                                    maxval(flowDomsd(nn, level, sps)%viscSubface(mm)%tau), &
-                                    norm2(flowDomsd(nn, level, sps)%viscSubface(mm)%tau)
-            write(*,*) 'mm', mm, 'viscSubface(mm)%q ',minval(flowDomsd(nn, level, sps)%viscSubface(mm)%q), &
-                                    maxval(flowDomsd(nn, level, sps)%viscSubface(mm)%q), &
-                                    norm2(flowDomsd(nn, level, sps)%viscSubface(mm)%q)
+        viscbocoLoop: do mm = 1, flowDoms(nn, level, sps)%nViscBocos
+            write (*, *) 'mm', mm, 'viscSubface(mm)%tau ', minval(flowDomsd(nn, level, sps)%viscSubface(mm)%tau), &
+                maxval(flowDomsd(nn, level, sps)%viscSubface(mm)%tau), &
+                norm2(flowDomsd(nn, level, sps)%viscSubface(mm)%tau)
+            write (*, *) 'mm', mm, 'viscSubface(mm)%q ', minval(flowDomsd(nn, level, sps)%viscSubface(mm)%q), &
+                maxval(flowDomsd(nn, level, sps)%viscSubface(mm)%q), &
+                norm2(flowDomsd(nn, level, sps)%viscSubface(mm)%q)
         end do viscbocoLoop
 
         ! For overset, the weights may be active in the comm structure. We
         ! need to zero them before we can accumulate.
         if (oversetPresent) then
             ! Pointers to the overset comms to make it easier to read
-            sends: do i=1,commPatternOverset(level, sps)%nProcSend
-            write(*,*) 'commPatternOverset(level, sps)%sendList(i)%interpd ',&
-                        minval(commPatternOverset(level, sps)%sendList(i)%interpd), &
-                        maxval(commPatternOverset(level, sps)%sendList(i)%interpd), &
-                        norm2(commPatternOverset(level, sps)%sendList(i)%interpd)
+            sends: do i = 1, commPatternOverset(level, sps)%nProcSend
+                write (*, *) 'commPatternOverset(level, sps)%sendList(i)%interpd ', &
+                    minval(commPatternOverset(level, sps)%sendList(i)%interpd), &
+                    maxval(commPatternOverset(level, sps)%sendList(i)%interpd), &
+                    norm2(commPatternOverset(level, sps)%sendList(i)%interpd)
             end do sends
-            write(*,*) 'internalOverset(level, sps)%donorInterpd ',minval(internalOverset(level, sps)%donorInterpd), &
-                                    maxval(internalOverset(level, sps)%donorInterpd), &
-                                    norm2(internalOverset(level, sps)%donorInterpd)
+           write (*, *) 'internalOverset(level, sps)%donorInterpd ', minval(internalOverset(level, sps)%donorInterpd), &
+                maxval(internalOverset(level, sps)%donorInterpd), &
+                norm2(internalOverset(level, sps)%donorInterpd)
         end if
 
-        write(*,*) 'alphad ',alphad
-        write(*,*) 'betad ',betad
-        write(*,*) 'machd ',machd
-        write(*,*) 'machGridd ',machGridd
-        write(*,*) 'machCoefd ',machCoefd
-        write(*,*) 'pinfdimd ',pinfdimd
-        write(*,*) 'tinfdimd ',tinfdimd
-        write(*,*) 'rhoinfdimd ',rhoinfdimd
-        write(*,*) 'rgasdimd ',rgasdimd
-        write(*,*) 'pointrefd ',pointrefd
-        write(*,*) 'prefd ',prefd
-        write(*,*) 'rhoRefd ',rhoRefd
-        write(*,*) 'Trefd ',Trefd
-        write(*,*) 'murefd ',murefd
-        write(*,*) 'urefd ',urefd
-        write(*,*) 'hrefd ',hrefd
-        write(*,*) 'timerefd ',timerefd
-        write(*,*) 'pinfd ',pinfd
-        write(*,*) 'pinfCorrd ',pinfCorrd
-        write(*,*) 'rhoinfd ',rhoinfd
-        write(*,*) 'uinfd ',uinfd
-        write(*,*) 'rgasd ',rgasd
-        write(*,*) 'muinfd ',muinfd
-        write(*,*) 'gammainfd ',gammainfd
-        write(*,*) 'winfd ',winfd
-        write(*,*) 'veldirfreestreamd ',veldirfreestreamd
-        write(*,*) 'liftdirectiond ',liftdirectiond
-        write(*,*) 'dragdirectiond ',dragdirectiond
+        write (*, *) 'alphad ', alphad
+        write (*, *) 'betad ', betad
+        write (*, *) 'machd ', machd
+        write (*, *) 'machGridd ', machGridd
+        write (*, *) 'machCoefd ', machCoefd
+        write (*, *) 'pinfdimd ', pinfdimd
+        write (*, *) 'tinfdimd ', tinfdimd
+        write (*, *) 'rhoinfdimd ', rhoinfdimd
+        write (*, *) 'rgasdimd ', rgasdimd
+        write (*, *) 'pointrefd ', pointrefd
+        write (*, *) 'prefd ', prefd
+        write (*, *) 'rhoRefd ', rhoRefd
+        write (*, *) 'Trefd ', Trefd
+        write (*, *) 'murefd ', murefd
+        write (*, *) 'urefd ', urefd
+        write (*, *) 'hrefd ', hrefd
+        write (*, *) 'timerefd ', timerefd
+        write (*, *) 'pinfd ', pinfd
+        write (*, *) 'pinfCorrd ', pinfCorrd
+        write (*, *) 'rhoinfd ', rhoinfd
+        write (*, *) 'uinfd ', uinfd
+        write (*, *) 'rgasd ', rgasd
+        write (*, *) 'muinfd ', muinfd
+        write (*, *) 'gammainfd ', gammainfd
+        write (*, *) 'winfd ', winfd
+        write (*, *) 'veldirfreestreamd ', veldirfreestreamd
+        write (*, *) 'liftdirectiond ', liftdirectiond
+        write (*, *) 'dragdirectiond ', dragdirectiond
 
         ! Zero all the reverse seeds in the dirichlet input arrays
-        do iDom=1, cgnsNDom
-            do iBoco=1, cgnsDoms(iDom)%nBocos
-            if (associated(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet)) then
-                do iData=1, size(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet)
-                    if (associated(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)) then
-                        do iDirichlet = 1, size(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)
-                        write(*,*) iDom, iBoco, iData, iDirichlet, 'dataArr(:) '&
-                        ,cgnsDomsd(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(iDirichlet)%dataArr(:)
-                        end do
-                    end if
-                end do
-            end if
+        do iDom = 1, cgnsNDom
+            do iBoco = 1, cgnsDoms(iDom)%nBocos
+                if (associated(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet)) then
+                    do iData = 1, size(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet)
+                        if (associated(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)) then
+                            do iDirichlet = 1, size(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)
+                                write (*, *) iDom, iBoco, iData, iDirichlet, 'dataArr(:) ' &
+                                 , cgnsDomsd(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(iDirichlet)%dataArr(:)
+                            end do
+                        end if
+                    end do
+                end if
             end do
         end do
 
         ! And the reverse seeds in the actuator zones
-        do i=1, nActuatorRegions
-            write(*,*) 'actuatorRegionsd(i)%Force ',actuatorRegionsd(i)%force
-            write(*,*) 'actuatorRegionsd(i)%Torque ',actuatorRegionsd(i)%torque
+        do i = 1, nActuatorRegions
+            write (*, *) 'actuatorRegionsd(i)%Force ', actuatorRegionsd(i)%force
+            write (*, *) 'actuatorRegionsd(i)%Torque ', actuatorRegionsd(i)%torque
         end do
 
     end subroutine printADSeeds
 
-
 #else
 
     subroutine computeMatrixFreeProductFwdCS(xvdot, extradot, wdot, bcDataValuesdot, &
-                                            useSpatial, useState, famLists,&
-                                            bcDataNames, bcDataValues, bcDataFamLists, bcVarsEmpty,&
-                                            dwdot, funcsDot, fDot, &
-                                            costSize, fSize, nTime, h_mag)
+                                             useSpatial, useState, famLists, &
+                                             bcDataNames, bcDataValues, bcDataFamLists, bcVarsEmpty, &
+                                             dwdot, funcsDot, fDot, &
+                                             costSize, fSize, nTime, h_mag)
 
         ! This routine is used to debug master_d. It uses the forward seeds to set perturbations
         ! and then computes the value of the derivatives using forward complex step
 
         use constants
         use adjointvars
-        use blockPointers, only : nDom
-        use communication, only : adflow_comm_world
-        use inputTimeSpectral, only : nTimeIntervalsSpectral
-        use inputPhysics, only :pointRef, alpha, beta, equations, machCoef, &
-            mach, machGrid, rgasdim
-        use iteration, only : currentLevel, groundLevel
-        use flowVarRefState, only : pInfDim, rhoInfDim, TinfDim
-        use blockPointers, only : nDom, il, jl, kl, wd, x, w, dw, dwd, nBocos, nViscBocos
-
-        use adjointUtils, only : allocDerivativeValues, zeroADSeeds
-        use masterRoutines, only : master
-        use utils, only : isWallType, setPointers, setPointers_d, EChk
-        use flowVarRefState, only : nw, nwf
+        use blockPointers, only: nDom
+        use communication, only: adflow_comm_world
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputPhysics, only: pointRef, alpha, beta, equations, machCoef, &
+                                mach, machGrid, rgasdim
+        use iteration, only: currentLevel, groundLevel
+        use flowVarRefState, only: pInfDim, rhoInfDim, TinfDim
+        use blockPointers, only: nDom, il, jl, kl, wd, x, w, dw, dwd, nBocos, nViscBocos
+
+        use adjointUtils, only: allocDerivativeValues, zeroADSeeds
+        use masterRoutines, only: master
+        use utils, only: isWallType, setPointers, setPointers_d, EChk
+        use flowVarRefState, only: nw, nwf
         use wallDistanceData, only: xSurf, xSurfVec
-        use wallDistance, only : updateXSurf
+        use wallDistance, only: updateXSurf
 
         implicit none
 
@@ -618,7 +598,6 @@ subroutine computeMatrixFreeProductFwdCS(xvdot, extradot, wdot, bcDataValuesdot,
         complex(kind=realType), dimension(:), intent(in) :: extradot
         complex(kind=realType), dimension(:), intent(in) :: wdot
 
-
         logical, intent(in) :: useSpatial, useState
         integer(kind=intType), dimension(:, :) :: famLists
         integer(kind=intType) :: costSize, fSize, nTime
@@ -631,36 +610,30 @@ subroutine computeMatrixFreeProductFwdCS(xvdot, extradot, wdot, bcDataValuesdot,
         ! step parameters
         real(kind=alwaysRealType), intent(in) :: h_mag ! step size for step
 
-
         ! Ouput Variables
         complex(kind=realType), dimension(size(wdot)), intent(out) :: dwDot
-        complex(kind=realType), dimension(costSize, size(famLists,1)), intent(out) :: funcsDot
+        complex(kind=realType), dimension(costSize, size(famLists, 1)), intent(out) :: funcsDot
         complex(kind=realType), dimension(3, fSize, nTime), intent(out) :: fDot
 
         ! Working Variables
-        integer(kind=intType) :: nn,sps, level
-        integer(kind=intType) :: ierr, mm,i,j,k, l,  ii, jj, iRegion
-
-        complex(kind=realType), dimension(costSize, size(famLists,1)) :: funcs
+        integer(kind=intType) :: nn, sps, level
+        integer(kind=intType) :: ierr, mm, i, j, k, l, ii, jj, iRegion
 
+        complex(kind=realType), dimension(costSize, size(famLists, 1)) :: funcs
 
         ! Input Arguments for master:
-        complex(kind=realType), dimension(costSize, size(famLists,1)) :: funcValues
+        complex(kind=realType), dimension(costSize, size(famLists, 1)) :: funcValues
 
-
-            ! Working Variables
+        ! Working Variables
         complex(kind=realType), dimension(:, :, :), allocatable :: forces
         complex(kind=realType) :: h ! step size for Finite Difference
 
         ! note that h_mag does not have to be complex
-        ! it is just the magnitude of the complex perturbation 
+        ! it is just the magnitude of the complex perturbation
         h = cmplx(0, h_mag)
 
         fSize = size(fDot, 2)
-        allocate(forces(3, fSize, nTimeIntervalsSpectral))
-
-
-
+        allocate (forces(3, fSize, nTimeIntervalsSpectral))
 
         ! Need to trick the residual evalution to use coupled (mean flow and
         ! turbulent) together.
@@ -676,9 +649,9 @@ subroutine computeMatrixFreeProductFwdCS(xvdot, extradot, wdot, bcDataValuesdot,
         end if
 
         ! Zero all AD seesd.
-        do nn=1,nDom
-            do sps=1,nTimeIntervalsSpectral
-            call zeroADSeeds(nn,level, sps)
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call zeroADSeeds(nn, level, sps)
             end do
         end do
 
@@ -686,54 +659,52 @@ subroutine computeMatrixFreeProductFwdCS(xvdot, extradot, wdot, bcDataValuesdot,
         ! machNumber used for the coefficients follows the Mach number,
         ! not the grid mach number.
 
-        alpha = alpha + h*extraDot(iAlpha)
-        beta = beta + h*extraDot(iBeta)
-        mach = mach + h*extraDot(iMach)
-        machCoef = machCoef + h*extraDot(iMach)
-        machGrid = machGrid + h*extraDot(iMachGrid)
-        PinfDim = PinfDim + h*extraDot(iPressure)
-        rhoinfDim = rhoinfDim + h*extraDot(iDensity)
-        tinfdim = tinfdim + h*extraDot(iTemperature)
-        pointref(1) = pointref(1) + h*extraDot(iPointRefX)
-        pointref(2) = pointref(2) + h*extraDot(iPointRefY)
-        pointref(3) = pointref(3) + h*extraDot(iPointRefZ)
-        rgasdim = rgasdim + h*zero
-
+        alpha = alpha + h * extraDot(iAlpha)
+        beta = beta + h * extraDot(iBeta)
+        mach = mach + h * extraDot(iMach)
+        machCoef = machCoef + h * extraDot(iMach)
+        machGrid = machGrid + h * extraDot(iMachGrid)
+        PinfDim = PinfDim + h * extraDot(iPressure)
+        rhoinfDim = rhoinfDim + h * extraDot(iDensity)
+        tinfdim = tinfdim + h * extraDot(iTemperature)
+        pointref(1) = pointref(1) + h * extraDot(iPointRefX)
+        pointref(2) = pointref(2) + h * extraDot(iPointRefY)
+        pointref(3) = pointref(3) + h * extraDot(iPointRefZ)
+        rgasdim = rgasdim + h * zero
 
         !  --------------------- apply the perturbations ----------------------------
         ! Set the provided w and x seeds:
         ii = 0
         jj = 0
-        domainLoop1: do nn=1,nDom
-            spectalLoop1: do sps=1,nTimeIntervalsSpectral
-            call setPointers(nn, 1, sps)
-            do k=1, kl
-                do j=1,jl
-                    do i=1,il
-                        do l=1,3
-                        ii = ii + 1
-                        x(i, j, k, l) = x(i, j, k, l) +  xvdot(ii)*h
+        domainLoop1: do nn = 1, nDom
+            spectalLoop1: do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1, sps)
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            do l = 1, 3
+                                ii = ii + 1
+                                x(i, j, k, l) = x(i, j, k, l) + xvdot(ii) * h
+                            end do
                         end do
                     end do
                 end do
-            end do
-            do k=2, kl
-                do j=2,jl
-                    do i=2,il
-                        do l = 1, nw
-                        jj = jj + 1
-                        w(i, j, k, l) = w(i, j, k, l) +  wDot(jj)*h
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nw
+                                jj = jj + 1
+                                w(i, j, k, l) = w(i, j, k, l) + wDot(jj) * h
+                            end do
                         end do
                     end do
                 end do
-            end do
             end do spectalLoop1
         end do domainLoop1
 
-        bcDataValues = bcDataValues + bcDataValuesdot*h
-
+        bcDataValues = bcDataValues + bcDataValuesdot * h
 
-        if(equations == RANSEquations) then
+        if (equations == RANSEquations) then
             call updateXSurf(level)
         end if
 
@@ -749,62 +720,60 @@ subroutine computeMatrixFreeProductFwdCS(xvdot, extradot, wdot, bcDataValuesdot,
                         forces, &
                         bcDataNames, bcDataValues, bcDataFamLists)
         end if
-                    
-                    
+
         ! Copy out the residual derivative into the provided dwDot and remove the
         ! perturbation
         ii = 0
         jj = 0
-        do nn=1, nDom
-            do sps=1,nTimeIntervalsSpectral
-            call setPointers_d(nn, 1, sps)
-            do k=1, kl
-                do j=1,jl
-                    do i=1,il
-                        do l=1,3
-                        ii = ii + 1
-                        x(i, j, k, l) = x(i, j, k, l) -  xvdot(ii)* h
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers_d(nn, 1, sps)
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            do l = 1, 3
+                                ii = ii + 1
+                                x(i, j, k, l) = x(i, j, k, l) - xvdot(ii) * h
+                            end do
                         end do
                     end do
                 end do
-            end do
-            do k=2, kl
-                do j=2, jl
-                    do i=2, il
-                        do l=1, nw
-                        jj = jj + 1
-                        w(i, j, k, l) = w(i, j, k, l) -  wDot(jj)*h
-                        dwd(i,j,k,l) = aimag(dw(i,j,k,l))/aimag(h)
-                        dwdot(jj) = dwd(i,j,k,l) ! copy values to output
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nw
+                                jj = jj + 1
+                                w(i, j, k, l) = w(i, j, k, l) - wDot(jj) * h
+                                dwd(i, j, k, l) = aimag(dw(i, j, k, l)) / aimag(h)
+                                dwdot(jj) = dwd(i, j, k, l) ! copy values to output
+                            end do
                         end do
                     end do
                 end do
-            end do
 
             end do
         end do
 
-        alpha = alpha - h*extraDot(iAlpha)
-        beta = beta - h*extraDot(iBeta)
-        mach = mach - h*extraDot(iMach)
-        machCoef = machCoef - h*extraDot(iMach)
-        machGrid = machGrid - h*extraDot(iMachGrid)
-        PinfDim = PinfDim - h*extraDot(iPressure)
-        rhoinfDim = rhoinfDim - h*extraDot(iDensity)
-        tinfdim = tinfdim - h*extraDot(iTemperature)
-        pointref(1) = pointref(1) - h*extraDot(iPointRefX)
-        pointref(2) = pointref(2) - h*extraDot(iPointRefY)
-        pointref(3) = pointref(3) - h*extraDot(iPointRefZ)
-        rgasdim = rgasdim - h*zero
-        
-        bcDataValues = bcDataValues - bcDataValuesdot*h
-        if(equations == RANSEquations) then
+        alpha = alpha - h * extraDot(iAlpha)
+        beta = beta - h * extraDot(iBeta)
+        mach = mach - h * extraDot(iMach)
+        machCoef = machCoef - h * extraDot(iMach)
+        machGrid = machGrid - h * extraDot(iMachGrid)
+        PinfDim = PinfDim - h * extraDot(iPressure)
+        rhoinfDim = rhoinfDim - h * extraDot(iDensity)
+        tinfdim = tinfdim - h * extraDot(iTemperature)
+        pointref(1) = pointref(1) - h * extraDot(iPointRefX)
+        pointref(2) = pointref(2) - h * extraDot(iPointRefY)
+        pointref(3) = pointref(3) - h * extraDot(iPointRefZ)
+        rgasdim = rgasdim - h * zero
+
+        bcDataValues = bcDataValues - bcDataValuesdot * h
+        if (equations == RANSEquations) then
             call updateXSurf(level)
-        endif
-
-        fDot = aimag(forces)/aimag(h)
-        funcsDot = aimag(funcValues)/aimag(h)
+        end if
 
+        fDot = aimag(forces) / aimag(h)
+        funcsDot = aimag(funcValues) / aimag(h)
 
     end subroutine computeMatrixFreeProductFwdCS
 
diff --git a/src/adjoint/adjointExtra.F90 b/src/adjoint/adjointExtra.F90
index b20f84df4..71ef9e328 100644
--- a/src/adjoint/adjointExtra.F90
+++ b/src/adjoint/adjointExtra.F90
@@ -2,665 +2,660 @@ module adjointExtra
 
 contains
 
-  subroutine volume_block
-
-    ! This is COPY of metric.f90. It was necessary to copy this file
-    ! since there is debugging stuff in the original that is not
-    ! necessary for AD.
-    use constants
-    use blockPointers
-    use cgnsGrid
-    use communication
-    use inputTimeSpectral
-
-    implicit none
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: thresVolume = 1.e-2_realType
-    real(kind=realType), parameter :: haloCellRatio = 1e-10_realType
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, n, m, l, ii
-    integer(kind=intType) :: mm
-    real(kind=realType) :: fact, mult
-    real(kind=realType) :: xp, yp, zp, vp1, vp2, vp3, vp4, vp5, vp6
-    real(kind=realType) :: xxp, yyp, zzp
-    real(kind=realType), dimension(3) :: v1, v2
-
-
-
-    ! Compute the volumes. The hexahedron is split into 6 pyramids
-    ! whose volumes are computed. The volume is positive for a
-    ! right handed block.
-    ! Initialize the volumes to zero. The reasons is that the second
-    ! level halo's must be initialized to zero and for convenience
-    ! all the volumes are set to zero.
-
-    vol = zero
-
-    do k=1, ke
-       n = k - 1
-       do j=1, je
-          m = j - 1
-          do i=1, ie
-
-             l = i - 1
-
-             ! Compute the coordinates of the center of gravity.
-
-             xp = eighth*(x(i,j,k,1) + x(i,m,k,1) &
-                  +         x(i,m,n,1) + x(i,j,n,1) &
-                  +         x(l,j,k,1) + x(l,m,k,1) &
-                  +         x(l,m,n,1) + x(l,j,n,1))
-             yp = eighth*(x(i,j,k,2) + x(i,m,k,2) &
-                  +         x(i,m,n,2) + x(i,j,n,2) &
-                  +         x(l,j,k,2) + x(l,m,k,2) &
-                  +         x(l,m,n,2) + x(l,j,n,2))
-             zp = eighth*(x(i,j,k,3) + x(i,m,k,3) &
-                  +         x(i,m,n,3) + x(i,j,n,3) &
-                  +         x(l,j,k,3) + x(l,m,k,3) &
-                  +         x(l,m,n,3) + x(l,j,n,3))
-
-
-             ! Compute the volumes of the 6 sub pyramids. The
-             ! arguments of volpym must be such that for a (regular)
-             ! right handed hexahedron all volumes are positive.
-
-             call volpym(x(i,j,k,1), x(i,j,k,2), x(i,j,k,3), &
-                  x(i,j,n,1), x(i,j,n,2), x(i,j,n,3), &
-                  x(i,m,n,1), x(i,m,n,2), x(i,m,n,3), &
-                  x(i,m,k,1), x(i,m,k,2), x(i,m,k,3),vp1)
-
-             call volpym(x(l,j,k,1), x(l,j,k,2), x(l,j,k,3), &
-                  x(l,m,k,1), x(l,m,k,2), x(l,m,k,3), &
-                  x(l,m,n,1), x(l,m,n,2), x(l,m,n,3), &
-                  x(l,j,n,1), x(l,j,n,2), x(l,j,n,3),vp2)
-
-             call volpym(x(i,j,k,1), x(i,j,k,2), x(i,j,k,3), &
-                  x(l,j,k,1), x(l,j,k,2), x(l,j,k,3), &
-                  x(l,j,n,1), x(l,j,n,2), x(l,j,n,3), &
-                  x(i,j,n,1), x(i,j,n,2), x(i,j,n,3),vp3)
-
-             call volpym(x(i,m,k,1), x(i,m,k,2), x(i,m,k,3), &
-                  x(i,m,n,1), x(i,m,n,2), x(i,m,n,3), &
-                  x(l,m,n,1), x(l,m,n,2), x(l,m,n,3), &
-                  x(l,m,k,1), x(l,m,k,2), x(l,m,k,3),vp4)
-
-             call volpym(x(i,j,k,1), x(i,j,k,2), x(i,j,k,3), &
-                  x(i,m,k,1), x(i,m,k,2), x(i,m,k,3), &
-                  x(l,m,k,1), x(l,m,k,2), x(l,m,k,3), &
-                  x(l,j,k,1), x(l,j,k,2), x(l,j,k,3),vp5)
-
-             call volpym(x(i,j,n,1), x(i,j,n,2), x(i,j,n,3), &
-                  x(l,j,n,1), x(l,j,n,2), x(l,j,n,3), &
-                  x(l,m,n,1), x(l,m,n,2), x(l,m,n,3), &
-                  x(i,m,n,1), x(i,m,n,2), x(i,m,n,3),vp6)
-
-             ! Set the volume to 1/6 of the sum of the volumes of the
-             ! pyramid. Remember that volpym computes 6 times the
-             ! volume.
-
-             vol(i,j,k) = sixth*(vp1 + vp2 + vp3 + vp4 + vp5 + vp6)
-
-             ! Set the volume to the absolute value.
-             vol(i, j, k) = abs(vol(i, j, k))
-          enddo
-       enddo
-    enddo
-
-    ! Some additional safety stuff for halo volumes.
-
-    do k=2,kl
-       do j=2,jl
-          if(vol(1, j,k)/vol(2, j, k) < haloCellRatio) then
-             vol(1, j,k) = vol(2, j,k)
-          end if
-          if(vol(ie,j,k)/vol(il,j,k)  < haloCellRatio) then
-             vol(ie,j,k) = vol(il,j,k)
-          end if
-       enddo
-    enddo
-
-    do k=2,kl
-       do i=1,ie
-          if(vol(i,1, k)/vol(i,2,k) < haloCellRatio) then
-             vol(i,1, k) = vol(i,2, k)
-          end if
-          if(vol(i,je,k)/voL(i,jl,k) < haloCellRatio) then
-             vol(i,je,k) = vol(i,jl,k)
-          end if
-       enddo
-    enddo
-
-    do j=1,je
-       do i=1,ie
-          if(vol(i,j,1)/vol(i,j,2)  < haloCellRatio) then
-             vol(i,j,1)  = vol(i,j,2)
-          end if
-          if(vol(i,j,ke)/vol(i,j,kl) < haloCellRatio) then
-             vol(i,j,ke) = vol(i,j,kl)
-          end if
-       enddo
-    enddo
-
-
-  contains
-
-    subroutine volpym(xa,ya,za,xb,yb,zb,xc,yc,zc,xd,yd,zd,volume)
-      !
-      !         volpym computes 6 times the volume of a pyramid. Node p,
-      !         whose coordinates are set in the subroutine metric itself,
-      !         is the top node and a-b-c-d is the quadrilateral surface.
-      !         It is assumed that the cross product vCa * vDb points in
-      !         the direction of the top node. Here vCa is the diagonal
-      !         running from node c to node a and vDb the diagonal from
-      !         node d to node b.
-      !
-      use precision
-      implicit none
-      !
-      !        Function type.
-      !
-      real(kind=realType) :: volume
-      !
-      !        Function arguments.
-      !
-      real(kind=realType), intent(in) :: xa, ya, za, xb, yb, zb
-      real(kind=realType), intent(in) :: xc, yc, zc, xd, yd, zd
-
-      volume = (xp - fourth*(xa + xb  + xc + xd))              &
-           * ((ya - yc)*(zb - zd) - (za - zc)*(yb - yd))   + &
-           (yp - fourth*(ya + yb  + yc + yd))              &
-           * ((za - zc)*(xb - xd) - (xa - xc)*(zb - zd))   + &
-           (zp - fourth*(za + zb  + zc + zd))              &
-           * ((xa - xc)*(yb - yd) - (ya - yc)*(xb - xd))
-
-    end subroutine volpym
-  end subroutine volume_block
-
-  subroutine metric_block
-    use constants
-    use blockPointers
-    implicit none
-
-    ! Local variables.
-    integer(kind=intType) :: i, j, k, n, m, l, ii
-    real(kind=realType) :: fact
-    real(kind=realType) :: xxp, yyp, zzp
-    real(kind=realType), dimension(3) :: v1, v2
-
-    ! Set the factor in the surface normals computation. For a
-    ! left handed block this factor is negative, such that the
-    ! normals still point in the direction of increasing index.
-    ! The formulae used later on assume a right handed block
-    ! and fact is used to correct this for a left handed block,
-    ! as well as the scaling factor of 0.5
-
-    if (rightHanded) then
-       fact =  half
-    else
-       fact = -half
-    endif
-
-    !
-    !  Computation of the face normals in i-, j- and k-direction.
-    !  Formula's are valid for a right handed block; for a left
-    !  handed block the correct orientation is obtained via fact.
-    !  The normals point in the direction of increasing index.
-    !  The absolute value of fact is 0.5, because the cross
-    !  product of the two diagonals is twice the normal vector.
-    !  Note that also the normals of the first level halo cells
-    !  are computed. These are needed for the viscous fluxes.
-    !
-    ! Projected areas of cell faces in the i direction.
-    !$AD II-LOOP
-    do ii=0,ke*je*(ie+1)-1
-       i = mod(ii, ie+1) + 0 ! 0:ie
-       j = mod(ii/(ie+1), je) + 1 !1:je
-       k = ii/((ie+1)*je) + 1 !1:ke
-
-       n = k -1
-       m = j -1
-
-       ! Determine the two diagonal vectors of the face.
-
-       v1(1) = x(i,j,n,1) - x(i,m,k,1)
-       v1(2) = x(i,j,n,2) - x(i,m,k,2)
-       v1(3) = x(i,j,n,3) - x(i,m,k,3)
-
-       v2(1) = x(i,j,k,1) - x(i,m,n,1)
-       v2(2) = x(i,j,k,2) - x(i,m,n,2)
-       v2(3) = x(i,j,k,3) - x(i,m,n,3)
-
-       ! The face normal, which is the cross product of the two
-       ! diagonal vectors times fact; remember that fact is
-       ! either -0.5 or 0.5.
-
-       si(i,j,k,1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-       si(i,j,k,2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-       si(i,j,k,3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
-    enddo
-
-
-    ! Projected areas of cell faces in the j direction
-    !$AD II-LOOP
-    do ii=0,ke*(je+1)*ie-1
-       i = mod(ii, ie) + 1 ! 1:ie
-       j = mod(ii/ie, je+1) + 0 !0:je
-       k = ii/(ie*(je+1)) + 1 !1:ke
-       n = k -1
-       l = i -1
-
-       ! Determine the two diagonal vectors of the face.
-
-       v1(1) = x(i,j,n,1) - x(l,j,k,1)
-       v1(2) = x(i,j,n,2) - x(l,j,k,2)
-       v1(3) = x(i,j,n,3) - x(l,j,k,3)
-
-       v2(1) = x(l,j,n,1) - x(i,j,k,1)
-       v2(2) = x(l,j,n,2) - x(i,j,k,2)
-       v2(3) = x(l,j,n,3) - x(i,j,k,3)
-
-       ! The face normal, which is the cross product of the two
-       ! diagonal vectors times fact; remember that fact is
-       ! either -0.5 or 0.5.
-
-       sj(i,j,k,1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-       sj(i,j,k,2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-       sj(i,j,k,3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
-
-    enddo
-
-    ! Projected areas of cell faces in the k direction.
-    !$AD II-LOOP
-    do ii=0,(ke+1)*je*ie-1
-       i = mod(ii, ie) + 1 ! 1:ie
-       j = mod(ii/ie, je) + 1 !1:je
-       k = ii/(ie*je) + 0 !0:ke
-       m = j -1
-       l = i -1
-
-       ! Determine the two diagonal vectors of the face.
-
-       v1(1) = x(i,j,k,1) - x(l,m,k,1)
-       v1(2) = x(i,j,k,2) - x(l,m,k,2)
-       v1(3) = x(i,j,k,3) - x(l,m,k,3)
-
-       v2(1) = x(l,j,k,1) - x(i,m,k,1)
-       v2(2) = x(l,j,k,2) - x(i,m,k,2)
-       v2(3) = x(l,j,k,3) - x(i,m,k,3)
-
-       ! The face normal, which is the cross product of the two
-       ! diagonal vectors times fact; remember that fact is
-       ! either -0.5 or 0.5.
-
-       sk(i,j,k,1) = fact*(v1(2)*v2(3) - v1(3)*v2(2))
-       sk(i,j,k,2) = fact*(v1(3)*v2(1) - v1(1)*v2(3))
-       sk(i,j,k,3) = fact*(v1(1)*v2(2) - v1(2)*v2(1))
-    enddo
-  end subroutine metric_block
-
-  subroutine boundaryNormals
-
-    !  The unit normals on the boundary faces. These always point
-    !  out of the domain, so a multiplication by -1 is needed for
-    !  the iMin, jMin and kMin boundaries.
-    !
-    use constants
-    use blockPointers
-    use cgnsGrid
-    use communication
-    use inputTimeSpectral
-    implicit none
-
-    ! Local variables.
-    integer(kind=intType) :: i, j, ii
-    integer(kind=intType) :: mm
-    real(kind=realType) :: fact, mult
-    real(kind=realType) :: xxp, yyp, zzp
-
-    !Loop over the boundary subfaces of this block.
-    !$AD II-LOOP
-    bocoLoop: do mm=1,nBocos
-
-       ! Loop over the boundary faces of the subface.
-       !$AD II-LOOP
-       do ii=0,(BCData(mm)%jcEnd - bcData(mm)%jcBeg + 1)*(BCData(mm)%icEnd - BCData(mm)%icBeg + 1) - 1
-          i = mod(ii, (BCData(mm)%icEnd - BCData(mm)%icBeg + 1)) + BCData(mm)%icBeg
-          j = ii/(BCData(mm)%icEnd - BCData(mm)%icBeg + 1) + BCData(mm)%jcBeg
-
-          select case (BCFaceID(mm))
-          case (iMin)
-             mult = -one
-             xxp = si(1,i,j,1);  yyp = si(1,i,j,2);  zzp = si(1,i,j,3)
-          case (iMax)
-             mult = one
-             xxp = si(il,i,j,1);  yyp = si(il,i,j,2);  zzp = si(il,i,j,3)
-          case (jMin)
-             mult = -one
-             xxp = sj(i,1,j,1);  yyp = sj(i,1,j,2);  zzp = sj(i,1,j,3)
-          case (jMax)
-             mult = one
-             xxp = sj(i,jl,j,1);  yyp = sj(i,jl,j,2);  zzp = sj(i,jl,j,3)
-          case (kMin)
-             mult = -one
-             xxp = sk(i,j,1,1);  yyp = sk(i,j,1,2);  zzp = sk(i,j,1,3)
-          case (kMax)
-             mult = one
-             xxp = sk(i,j,kl,1);  yyp = sk(i,j,kl,2);  zzp = sk(i,j,kl,3)
-          end select
-
-          ! Compute the inverse of the length of the normal vector
-          ! and possibly correct for inward pointing.
-
-          fact = sqrt(xxp*xxp + yyp*yyp + zzp*zzp)
-          if(fact > zero) fact = mult/fact
-
-          ! Compute the unit normal.
-
-          BCData(mm)%norm(i,j,1) = fact*xxp
-          BCData(mm)%norm(i,j,2) = fact*yyp
-          BCData(mm)%norm(i,j,3) = fact*zzp
-       end do
-    enddo bocoLoop
-  end subroutine boundaryNormals
-
-  subroutine xhalo_block
-    !
-    !       xhalo determines the coordinates of the nodal halo's.
-    !       First it sets all halo coordinates by simple extrapolation,
-    !       then the symmetry planes are treated (also the unit normal of
-    !       symmetry planes are determined) and finally an exchange is
-    !       made for the internal halo's.
-    !
-    use constants
-    use blockPointers
-    use communication
-    use inputTimeSpectral
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: mm, i, j, k
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iiMax, jjMax
-    logical err
-    real(kind=realType) :: length, dot
-    real(kind=realType), dimension(3) :: v1, v2, norm
-
-    ! Extrapolation in i-direction.
-
-    do k=1,kl
-       do j=1,jl
-          x(0,j,k,1) = two*x(1,j,k,1) - x(2,j,k,1)
-          x(0,j,k,2) = two*x(1,j,k,2) - x(2,j,k,2)
-          x(0,j,k,3) = two*x(1,j,k,3) - x(2,j,k,3)
-
-          x(ie,j,k,1) = two*x(il,j,k,1) - x(nx,j,k,1)
-          x(ie,j,k,2) = two*x(il,j,k,2) - x(nx,j,k,2)
-          x(ie,j,k,3) = two*x(il,j,k,3) - x(nx,j,k,3)
-       enddo
-    enddo
-
-    ! Extrapolation in j-direction.
-
-    do k=1,kl
-       do i=0,ie
-          x(i,0,k,1) = two*x(i,1,k,1) - x(i,2,k,1)
-          x(i,0,k,2) = two*x(i,1,k,2) - x(i,2,k,2)
-          x(i,0,k,3) = two*x(i,1,k,3) - x(i,2,k,3)
-
-          x(i,je,k,1) = two*x(i,jl,k,1) - x(i,ny,k,1)
-          x(i,je,k,2) = two*x(i,jl,k,2) - x(i,ny,k,2)
-          x(i,je,k,3) = two*x(i,jl,k,3) - x(i,ny,k,3)
-       enddo
-    enddo
-
-    ! Extrapolation in k-direction.
-
-    do j=0,je
-       do i=0,ie
-          x(i,j,0,1) = two*x(i,j,1,1) - x(i,j,2,1)
-          x(i,j,0,2) = two*x(i,j,1,2) - x(i,j,2,2)
-          x(i,j,0,3) = two*x(i,j,1,3) - x(i,j,2,3)
-
-          x(i,j,ke,1) = two*x(i,j,kl,1) - x(i,j,nz,1)
-          x(i,j,ke,2) = two*x(i,j,kl,2) - x(i,j,nz,2)
-          x(i,j,ke,3) = two*x(i,j,kl,3) - x(i,j,nz,3)
-       enddo
-    enddo
-    !
-    !           Mirror the halo coordinates adjacent to the symmetry
-    !           planes
-    !
-    ! Loop over boundary subfaces.
-
-    loopBocos: do mm=1,nBocos
-
-       ! The actual correction of the coordinates only takes
-       ! place for symmetry planes.
-
-       testSymmetry: if(BCType(mm) == Symm) then
-
-          ! Set some variables, depending on the block face on
-          ! which the subface is located.
-          norm(1) = bcData(mm)%symNorm(1)
-          norm(2) = bcData(mm)%symNorm(2)
-          norm(3) = bcData(mm)%symNorm(3)
-
-          length = sqrt(norm(1)**2 + norm(2)**2 + norm(3)**2)
-
-          ! Compute the unit normal of the subface.
-
-          norm(1) = norm(1)/length
-          norm(2) = norm(2)/length
-          norm(3) = norm(3)/length
-          ! See xhalo_block for comments for below:
-          testSingular: if(length > eps) then
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                iBeg = jnBeg(mm); iEnd = jnEnd(mm); iiMax = jl
-                jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
-
-                if(iBeg == 1)     iBeg = 0
-                if(iEnd == iiMax) iEnd = iiMax + 1
-
-                if(jBeg == 1)     jBeg = 0
-                if(jEnd == jjMax) jEnd = jjMax + 1
-
-                do j=jBeg,jEnd
-                   do i=iBeg,iEnd
-                      v1(1) = x(1, i,j,1) - x(2, i,j,1)
-                      v1(2) = x(1, i,j,2) - x(2, i,j,2)
-                      v1(3) = x(1, i,j,3) - x(2, i,j,3)
-                      dot = two*(v1(1)*norm(1) + v1(2)*norm(2) &
-                           +      v1(3)*norm(3))
-                      x(0,i,j,1) = x(2,i,j,1) + dot*norm(1)
-                      x(0,i,j,2) = x(2,i,j,2) + dot*norm(2)
-                      x(0,i,j,3) = x(2,i,j,3) + dot*norm(3)
-                   enddo
-                enddo
-
-             case (iMax)
-                iBeg = jnBeg(mm); iEnd = jnEnd(mm); iiMax = jl
-                jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
-
-                if(iBeg == 1)     iBeg = 0
-                if(iEnd == iiMax) iEnd = iiMax + 1
-
-                if(jBeg == 1)     jBeg = 0
-                if(jEnd == jjMax) jEnd = jjMax + 1
-
-                do j=jBeg,jEnd
-                   do i=iBeg,iEnd
-                      v1(1) = x(il, i,j,1) - x(nx, i,j,1)
-                      v1(2) = x(il, i,j,2) - x(nx, i,j,2)
-                      v1(3) = x(il, i,j,3) - x(nx, i,j,3)
-                      dot = two*(v1(1)*norm(1) + v1(2)*norm(2) &
-                           +      v1(3)*norm(3))
-                      x(ie,i,j,1) = x(nx,i,j,1) + dot*norm(1)
-                      x(ie,i,j,2) = x(nx,i,j,2) + dot*norm(2)
-                      x(ie,i,j,3) = x(nx,i,j,3) + dot*norm(3)
-                   enddo
-                enddo
-
-             case (jMin)
-                iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
-                jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
-
-                if(iBeg == 1)     iBeg = 0
-                if(iEnd == iiMax) iEnd = iiMax + 1
-
-                if(jBeg == 1)     jBeg = 0
-                if(jEnd == jjMax) jEnd = jjMax + 1
-
-                do j=jBeg,jEnd
-                   do i=iBeg,iEnd
-                      v1(1) = x(i,1,j,1) - x(i,2,j,1)
-                      v1(2) = x(i,1,j,2) - x(i,2,j,2)
-                      v1(3) = x(i,1,j,3) - x(i,2,j,3)
-                      dot = two*(v1(1)*norm(1) + v1(2)*norm(2) &
-                           +      v1(3)*norm(3))
-                      x(i,0,j,1) = x(i,2,j,1) + dot*norm(1)
-                      x(i,0,j,2) = x(i,2,j,2) + dot*norm(2)
-                      x(i,0,j,3) = x(i,2,j,3) + dot*norm(3)
-                   enddo
-                enddo
-
-             case (jMax)
-                iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
-                jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
-
-                if(iBeg == 1)     iBeg = 0
-                if(iEnd == iiMax) iEnd = iiMax + 1
-
-                if(jBeg == 1)     jBeg = 0
-                if(jEnd == jjMax) jEnd = jjMax + 1
-
-                do j=jBeg,jEnd
-                   do i=iBeg,iEnd
-                      v1(1) = x(i,jl,j,1) - x(i,ny,j,1)
-                      v1(2) = x(i,jl,j,2) - x(i,ny,j,2)
-                      v1(3) = x(i,jl,j,3) - x(i,ny,j,3)
-                      dot = two*(v1(1)*norm(1) + v1(2)*norm(2) &
-                           +      v1(3)*norm(3))
-                      x(i,je,j,1) = x(i,ny,j,1) + dot*norm(1)
-                      x(i,je,j,2) = x(i,ny,j,2) + dot*norm(2)
-                      x(i,je,j,3) = x(i,ny,j,3) + dot*norm(3)
-                   enddo
-                enddo
-
-             case (kMin)
-                iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
-                jBeg = jnBeg(mm); jEnd = jnEnd(mm); jjMax = jl
-
-                if(iBeg == 1)     iBeg = 0
-                if(iEnd == iiMax) iEnd = iiMax + 1
-
-                if(jBeg == 1)     jBeg = 0
-                if(jEnd == jjMax) jEnd = jjMax + 1
-
-                do j=jBeg,jEnd
-                   do i=iBeg,iEnd
-                      v1(1) = x(i,j,1,1) - x(i,j,2,1)
-                      v1(2) = x(i,j,1,2) - x(i,j,2,2)
-                      v1(3) = x(i,j,1,3) - x(i,j,2,3)
-                      dot = two*(v1(1)*norm(1) + v1(2)*norm(2) &
-                           +      v1(3)*norm(3))
-                      x(i,j,0,1) = x(i,j,2,1) + dot*norm(1)
-                      x(i,j,0,2) = x(i,j,2,2) + dot*norm(2)
-                      x(i,j,0,3) = x(i,j,2,3) + dot*norm(3)
-                   enddo
-                enddo
-
-             case (kMax)
-                iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
-                jBeg = jnBeg(mm); jEnd = jnEnd(mm); jjMax = jl
-
-                if(iBeg == 1)     iBeg = 0
-                if(iEnd == iiMax) iEnd = iiMax + 1
-
-                if(jBeg == 1)     jBeg = 0
-                if(jEnd == jjMax) jEnd = jjMax + 1
-
-                do j=jBeg,jEnd
-                   do i=iBeg,iEnd
-                      v1(1) = x(i,j,kl,1) - x(i,j,nz,1)
-                      v1(2) = x(i,j,kl,2) - x(i,j,nz,2)
-                      v1(3) = x(i,j,kl,3) - x(i,j,nz,3)
-                      dot = two*(v1(1)*norm(1) + v1(2)*norm(2) &
-                           +      v1(3)*norm(3))
-                      x(i,j,ke,1) = x(i,j,nz,1) + dot*norm(1)
-                      x(i,j,ke,2) = x(i,j,nz,2) + dot*norm(2)
-                      x(i,j,ke,3) = x(i,j,nz,3) + dot*norm(3)
-                   enddo
-                enddo
-             end select
-          endif testSingular
-       end if testSymmetry
-    enddo loopBocos
-  end subroutine xhalo_block
-
-  subroutine resScale
-
-    use constants
-    use blockPointers, only : il, jl, kl, nx, ny, nz, volRef, dw
-    use flowVarRefState, only : nwf, nt1, nt2
-    use inputIteration, only : turbResScale
-    implicit none
-
-    ! Local Variables
-    integer(kind=intType) :: i, j, k, ii, nTurb
-    real(kind=realType) :: ovol
-
-    ! Divide through by the reference volume
-    nTurb = nt2-nt1+1
+    subroutine volume_block
+
+        ! This is COPY of metric.f90. It was necessary to copy this file
+        ! since there is debugging stuff in the original that is not
+        ! necessary for AD.
+        use constants
+        use blockPointers
+        use cgnsGrid
+        use communication
+        use inputTimeSpectral
+
+        implicit none
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: thresVolume = 1.e-2_realType
+        real(kind=realType), parameter :: haloCellRatio = 1e-10_realType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, n, m, l, ii
+        integer(kind=intType) :: mm
+        real(kind=realType) :: fact, mult
+        real(kind=realType) :: xp, yp, zp, vp1, vp2, vp3, vp4, vp5, vp6
+        real(kind=realType) :: xxp, yyp, zzp
+        real(kind=realType), dimension(3) :: v1, v2
+
+        ! Compute the volumes. The hexahedron is split into 6 pyramids
+        ! whose volumes are computed. The volume is positive for a
+        ! right handed block.
+        ! Initialize the volumes to zero. The reasons is that the second
+        ! level halo's must be initialized to zero and for convenience
+        ! all the volumes are set to zero.
+
+        vol = zero
+
+        do k = 1, ke
+            n = k - 1
+            do j = 1, je
+                m = j - 1
+                do i = 1, ie
+
+                    l = i - 1
+
+                    ! Compute the coordinates of the center of gravity.
+
+                    xp = eighth * (x(i, j, k, 1) + x(i, m, k, 1) &
+                                   + x(i, m, n, 1) + x(i, j, n, 1) &
+                                   + x(l, j, k, 1) + x(l, m, k, 1) &
+                                   + x(l, m, n, 1) + x(l, j, n, 1))
+                    yp = eighth * (x(i, j, k, 2) + x(i, m, k, 2) &
+                                   + x(i, m, n, 2) + x(i, j, n, 2) &
+                                   + x(l, j, k, 2) + x(l, m, k, 2) &
+                                   + x(l, m, n, 2) + x(l, j, n, 2))
+                    zp = eighth * (x(i, j, k, 3) + x(i, m, k, 3) &
+                                   + x(i, m, n, 3) + x(i, j, n, 3) &
+                                   + x(l, j, k, 3) + x(l, m, k, 3) &
+                                   + x(l, m, n, 3) + x(l, j, n, 3))
+
+                    ! Compute the volumes of the 6 sub pyramids. The
+                    ! arguments of volpym must be such that for a (regular)
+                    ! right handed hexahedron all volumes are positive.
+
+                    call volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), &
+                                x(i, j, n, 1), x(i, j, n, 2), x(i, j, n, 3), &
+                                x(i, m, n, 1), x(i, m, n, 2), x(i, m, n, 3), &
+                                x(i, m, k, 1), x(i, m, k, 2), x(i, m, k, 3), vp1)
+
+                    call volpym(x(l, j, k, 1), x(l, j, k, 2), x(l, j, k, 3), &
+                                x(l, m, k, 1), x(l, m, k, 2), x(l, m, k, 3), &
+                                x(l, m, n, 1), x(l, m, n, 2), x(l, m, n, 3), &
+                                x(l, j, n, 1), x(l, j, n, 2), x(l, j, n, 3), vp2)
+
+                    call volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), &
+                                x(l, j, k, 1), x(l, j, k, 2), x(l, j, k, 3), &
+                                x(l, j, n, 1), x(l, j, n, 2), x(l, j, n, 3), &
+                                x(i, j, n, 1), x(i, j, n, 2), x(i, j, n, 3), vp3)
+
+                    call volpym(x(i, m, k, 1), x(i, m, k, 2), x(i, m, k, 3), &
+                                x(i, m, n, 1), x(i, m, n, 2), x(i, m, n, 3), &
+                                x(l, m, n, 1), x(l, m, n, 2), x(l, m, n, 3), &
+                                x(l, m, k, 1), x(l, m, k, 2), x(l, m, k, 3), vp4)
+
+                    call volpym(x(i, j, k, 1), x(i, j, k, 2), x(i, j, k, 3), &
+                                x(i, m, k, 1), x(i, m, k, 2), x(i, m, k, 3), &
+                                x(l, m, k, 1), x(l, m, k, 2), x(l, m, k, 3), &
+                                x(l, j, k, 1), x(l, j, k, 2), x(l, j, k, 3), vp5)
+
+                    call volpym(x(i, j, n, 1), x(i, j, n, 2), x(i, j, n, 3), &
+                                x(l, j, n, 1), x(l, j, n, 2), x(l, j, n, 3), &
+                                x(l, m, n, 1), x(l, m, n, 2), x(l, m, n, 3), &
+                                x(i, m, n, 1), x(i, m, n, 2), x(i, m, n, 3), vp6)
+
+                    ! Set the volume to 1/6 of the sum of the volumes of the
+                    ! pyramid. Remember that volpym computes 6 times the
+                    ! volume.
+
+                    vol(i, j, k) = sixth * (vp1 + vp2 + vp3 + vp4 + vp5 + vp6)
+
+                    ! Set the volume to the absolute value.
+                    vol(i, j, k) = abs(vol(i, j, k))
+                end do
+            end do
+        end do
+
+        ! Some additional safety stuff for halo volumes.
+
+        do k = 2, kl
+            do j = 2, jl
+                if (vol(1, j, k) / vol(2, j, k) < haloCellRatio) then
+                    vol(1, j, k) = vol(2, j, k)
+                end if
+                if (vol(ie, j, k) / vol(il, j, k) < haloCellRatio) then
+                    vol(ie, j, k) = vol(il, j, k)
+                end if
+            end do
+        end do
+
+        do k = 2, kl
+            do i = 1, ie
+                if (vol(i, 1, k) / vol(i, 2, k) < haloCellRatio) then
+                    vol(i, 1, k) = vol(i, 2, k)
+                end if
+                if (vol(i, je, k) / voL(i, jl, k) < haloCellRatio) then
+                    vol(i, je, k) = vol(i, jl, k)
+                end if
+            end do
+        end do
+
+        do j = 1, je
+            do i = 1, ie
+                if (vol(i, j, 1) / vol(i, j, 2) < haloCellRatio) then
+                    vol(i, j, 1) = vol(i, j, 2)
+                end if
+                if (vol(i, j, ke) / vol(i, j, kl) < haloCellRatio) then
+                    vol(i, j, ke) = vol(i, j, kl)
+                end if
+            end do
+        end do
+
+    contains
+
+        subroutine volpym(xa, ya, za, xb, yb, zb, xc, yc, zc, xd, yd, zd, volume)
+            !
+            !         volpym computes 6 times the volume of a pyramid. Node p,
+            !         whose coordinates are set in the subroutine metric itself,
+            !         is the top node and a-b-c-d is the quadrilateral surface.
+            !         It is assumed that the cross product vCa * vDb points in
+            !         the direction of the top node. Here vCa is the diagonal
+            !         running from node c to node a and vDb the diagonal from
+            !         node d to node b.
+            !
+            use precision
+            implicit none
+            !
+            !        Function type.
+            !
+            real(kind=realType) :: volume
+            !
+            !        Function arguments.
+            !
+            real(kind=realType), intent(in) :: xa, ya, za, xb, yb, zb
+            real(kind=realType), intent(in) :: xc, yc, zc, xd, yd, zd
+
+            volume = (xp - fourth * (xa + xb + xc + xd)) &
+                     * ((ya - yc) * (zb - zd) - (za - zc) * (yb - yd)) + &
+                     (yp - fourth * (ya + yb + yc + yd)) &
+                     * ((za - zc) * (xb - xd) - (xa - xc) * (zb - zd)) + &
+                     (zp - fourth * (za + zb + zc + zd)) &
+                     * ((xa - xc) * (yb - yd) - (ya - yc) * (xb - xd))
+
+        end subroutine volpym
+    end subroutine volume_block
+
+    subroutine metric_block
+        use constants
+        use blockPointers
+        implicit none
+
+        ! Local variables.
+        integer(kind=intType) :: i, j, k, n, m, l, ii
+        real(kind=realType) :: fact
+        real(kind=realType) :: xxp, yyp, zzp
+        real(kind=realType), dimension(3) :: v1, v2
+
+        ! Set the factor in the surface normals computation. For a
+        ! left handed block this factor is negative, such that the
+        ! normals still point in the direction of increasing index.
+        ! The formulae used later on assume a right handed block
+        ! and fact is used to correct this for a left handed block,
+        ! as well as the scaling factor of 0.5
+
+        if (rightHanded) then
+            fact = half
+        else
+            fact = -half
+        end if
+
+        !
+        !  Computation of the face normals in i-, j- and k-direction.
+        !  Formula's are valid for a right handed block; for a left
+        !  handed block the correct orientation is obtained via fact.
+        !  The normals point in the direction of increasing index.
+        !  The absolute value of fact is 0.5, because the cross
+        !  product of the two diagonals is twice the normal vector.
+        !  Note that also the normals of the first level halo cells
+        !  are computed. These are needed for the viscous fluxes.
+        !
+        ! Projected areas of cell faces in the i direction.
+        !$AD II-LOOP
+        do ii = 0, ke * je * (ie + 1) - 1
+            i = mod(ii, ie + 1) + 0 ! 0:ie
+            j = mod(ii / (ie + 1), je) + 1 !1:je
+            k = ii / ((ie + 1) * je) + 1 !1:ke
+
+            n = k - 1
+            m = j - 1
+
+            ! Determine the two diagonal vectors of the face.
+
+            v1(1) = x(i, j, n, 1) - x(i, m, k, 1)
+            v1(2) = x(i, j, n, 2) - x(i, m, k, 2)
+            v1(3) = x(i, j, n, 3) - x(i, m, k, 3)
+
+            v2(1) = x(i, j, k, 1) - x(i, m, n, 1)
+            v2(2) = x(i, j, k, 2) - x(i, m, n, 2)
+            v2(3) = x(i, j, k, 3) - x(i, m, n, 3)
+
+            ! The face normal, which is the cross product of the two
+            ! diagonal vectors times fact; remember that fact is
+            ! either -0.5 or 0.5.
+
+            si(i, j, k, 1) = fact * (v1(2) * v2(3) - v1(3) * v2(2))
+            si(i, j, k, 2) = fact * (v1(3) * v2(1) - v1(1) * v2(3))
+            si(i, j, k, 3) = fact * (v1(1) * v2(2) - v1(2) * v2(1))
+        end do
+
+        ! Projected areas of cell faces in the j direction
+        !$AD II-LOOP
+        do ii = 0, ke * (je + 1) * ie - 1
+            i = mod(ii, ie) + 1 ! 1:ie
+            j = mod(ii / ie, je + 1) + 0 !0:je
+            k = ii / (ie * (je + 1)) + 1 !1:ke
+            n = k - 1
+            l = i - 1
+
+            ! Determine the two diagonal vectors of the face.
+
+            v1(1) = x(i, j, n, 1) - x(l, j, k, 1)
+            v1(2) = x(i, j, n, 2) - x(l, j, k, 2)
+            v1(3) = x(i, j, n, 3) - x(l, j, k, 3)
+
+            v2(1) = x(l, j, n, 1) - x(i, j, k, 1)
+            v2(2) = x(l, j, n, 2) - x(i, j, k, 2)
+            v2(3) = x(l, j, n, 3) - x(i, j, k, 3)
+
+            ! The face normal, which is the cross product of the two
+            ! diagonal vectors times fact; remember that fact is
+            ! either -0.5 or 0.5.
+
+            sj(i, j, k, 1) = fact * (v1(2) * v2(3) - v1(3) * v2(2))
+            sj(i, j, k, 2) = fact * (v1(3) * v2(1) - v1(1) * v2(3))
+            sj(i, j, k, 3) = fact * (v1(1) * v2(2) - v1(2) * v2(1))
+
+        end do
+
+        ! Projected areas of cell faces in the k direction.
+        !$AD II-LOOP
+        do ii = 0, (ke + 1) * je * ie - 1
+            i = mod(ii, ie) + 1 ! 1:ie
+            j = mod(ii / ie, je) + 1 !1:je
+            k = ii / (ie * je) + 0 !0:ke
+            m = j - 1
+            l = i - 1
+
+            ! Determine the two diagonal vectors of the face.
+
+            v1(1) = x(i, j, k, 1) - x(l, m, k, 1)
+            v1(2) = x(i, j, k, 2) - x(l, m, k, 2)
+            v1(3) = x(i, j, k, 3) - x(l, m, k, 3)
+
+            v2(1) = x(l, j, k, 1) - x(i, m, k, 1)
+            v2(2) = x(l, j, k, 2) - x(i, m, k, 2)
+            v2(3) = x(l, j, k, 3) - x(i, m, k, 3)
+
+            ! The face normal, which is the cross product of the two
+            ! diagonal vectors times fact; remember that fact is
+            ! either -0.5 or 0.5.
+
+            sk(i, j, k, 1) = fact * (v1(2) * v2(3) - v1(3) * v2(2))
+            sk(i, j, k, 2) = fact * (v1(3) * v2(1) - v1(1) * v2(3))
+            sk(i, j, k, 3) = fact * (v1(1) * v2(2) - v1(2) * v2(1))
+        end do
+    end subroutine metric_block
+
+    subroutine boundaryNormals
+
+        !  The unit normals on the boundary faces. These always point
+        !  out of the domain, so a multiplication by -1 is needed for
+        !  the iMin, jMin and kMin boundaries.
+        !
+        use constants
+        use blockPointers
+        use cgnsGrid
+        use communication
+        use inputTimeSpectral
+        implicit none
+
+        ! Local variables.
+        integer(kind=intType) :: i, j, ii
+        integer(kind=intType) :: mm
+        real(kind=realType) :: fact, mult
+        real(kind=realType) :: xxp, yyp, zzp
+
+        !Loop over the boundary subfaces of this block.
+        !$AD II-LOOP
+        bocoLoop: do mm = 1, nBocos
+
+            ! Loop over the boundary faces of the subface.
+            !$AD II-LOOP
+            do ii = 0, (BCData(mm)%jcEnd - bcData(mm)%jcBeg + 1) * (BCData(mm)%icEnd - BCData(mm)%icBeg + 1) - 1
+                i = mod(ii, (BCData(mm)%icEnd - BCData(mm)%icBeg + 1)) + BCData(mm)%icBeg
+                j = ii / (BCData(mm)%icEnd - BCData(mm)%icBeg + 1) + BCData(mm)%jcBeg
+
+                select case (BCFaceID(mm))
+                case (iMin)
+                    mult = -one
+                    xxp = si(1, i, j, 1); yyp = si(1, i, j, 2); zzp = si(1, i, j, 3)
+                case (iMax)
+                    mult = one
+                    xxp = si(il, i, j, 1); yyp = si(il, i, j, 2); zzp = si(il, i, j, 3)
+                case (jMin)
+                    mult = -one
+                    xxp = sj(i, 1, j, 1); yyp = sj(i, 1, j, 2); zzp = sj(i, 1, j, 3)
+                case (jMax)
+                    mult = one
+                    xxp = sj(i, jl, j, 1); yyp = sj(i, jl, j, 2); zzp = sj(i, jl, j, 3)
+                case (kMin)
+                    mult = -one
+                    xxp = sk(i, j, 1, 1); yyp = sk(i, j, 1, 2); zzp = sk(i, j, 1, 3)
+                case (kMax)
+                    mult = one
+                    xxp = sk(i, j, kl, 1); yyp = sk(i, j, kl, 2); zzp = sk(i, j, kl, 3)
+                end select
+
+                ! Compute the inverse of the length of the normal vector
+                ! and possibly correct for inward pointing.
+
+                fact = sqrt(xxp * xxp + yyp * yyp + zzp * zzp)
+                if (fact > zero) fact = mult / fact
+
+                ! Compute the unit normal.
+
+                BCData(mm)%norm(i, j, 1) = fact * xxp
+                BCData(mm)%norm(i, j, 2) = fact * yyp
+                BCData(mm)%norm(i, j, 3) = fact * zzp
+            end do
+        end do bocoLoop
+    end subroutine boundaryNormals
+
+    subroutine xhalo_block
+        !
+        !       xhalo determines the coordinates of the nodal halo's.
+        !       First it sets all halo coordinates by simple extrapolation,
+        !       then the symmetry planes are treated (also the unit normal of
+        !       symmetry planes are determined) and finally an exchange is
+        !       made for the internal halo's.
+        !
+        use constants
+        use blockPointers
+        use communication
+        use inputTimeSpectral
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: mm, i, j, k
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iiMax, jjMax
+        logical err
+        real(kind=realType) :: length, dot
+        real(kind=realType), dimension(3) :: v1, v2, norm
+
+        ! Extrapolation in i-direction.
+
+        do k = 1, kl
+            do j = 1, jl
+                x(0, j, k, 1) = two * x(1, j, k, 1) - x(2, j, k, 1)
+                x(0, j, k, 2) = two * x(1, j, k, 2) - x(2, j, k, 2)
+                x(0, j, k, 3) = two * x(1, j, k, 3) - x(2, j, k, 3)
+
+                x(ie, j, k, 1) = two * x(il, j, k, 1) - x(nx, j, k, 1)
+                x(ie, j, k, 2) = two * x(il, j, k, 2) - x(nx, j, k, 2)
+                x(ie, j, k, 3) = two * x(il, j, k, 3) - x(nx, j, k, 3)
+            end do
+        end do
+
+        ! Extrapolation in j-direction.
+
+        do k = 1, kl
+            do i = 0, ie
+                x(i, 0, k, 1) = two * x(i, 1, k, 1) - x(i, 2, k, 1)
+                x(i, 0, k, 2) = two * x(i, 1, k, 2) - x(i, 2, k, 2)
+                x(i, 0, k, 3) = two * x(i, 1, k, 3) - x(i, 2, k, 3)
+
+                x(i, je, k, 1) = two * x(i, jl, k, 1) - x(i, ny, k, 1)
+                x(i, je, k, 2) = two * x(i, jl, k, 2) - x(i, ny, k, 2)
+                x(i, je, k, 3) = two * x(i, jl, k, 3) - x(i, ny, k, 3)
+            end do
+        end do
+
+        ! Extrapolation in k-direction.
+
+        do j = 0, je
+            do i = 0, ie
+                x(i, j, 0, 1) = two * x(i, j, 1, 1) - x(i, j, 2, 1)
+                x(i, j, 0, 2) = two * x(i, j, 1, 2) - x(i, j, 2, 2)
+                x(i, j, 0, 3) = two * x(i, j, 1, 3) - x(i, j, 2, 3)
+
+                x(i, j, ke, 1) = two * x(i, j, kl, 1) - x(i, j, nz, 1)
+                x(i, j, ke, 2) = two * x(i, j, kl, 2) - x(i, j, nz, 2)
+                x(i, j, ke, 3) = two * x(i, j, kl, 3) - x(i, j, nz, 3)
+            end do
+        end do
+        !
+        !           Mirror the halo coordinates adjacent to the symmetry
+        !           planes
+        !
+        ! Loop over boundary subfaces.
+
+        loopBocos: do mm = 1, nBocos
+
+            ! The actual correction of the coordinates only takes
+            ! place for symmetry planes.
+
+            testSymmetry: if (BCType(mm) == Symm) then
+
+                ! Set some variables, depending on the block face on
+                ! which the subface is located.
+                norm(1) = bcData(mm)%symNorm(1)
+                norm(2) = bcData(mm)%symNorm(2)
+                norm(3) = bcData(mm)%symNorm(3)
+
+                length = sqrt(norm(1)**2 + norm(2)**2 + norm(3)**2)
+
+                ! Compute the unit normal of the subface.
+
+                norm(1) = norm(1) / length
+                norm(2) = norm(2) / length
+                norm(3) = norm(3) / length
+                ! See xhalo_block for comments for below:
+                testSingular: if (length > eps) then
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        iBeg = jnBeg(mm); iEnd = jnEnd(mm); iiMax = jl
+                        jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
+
+                        if (iBeg == 1) iBeg = 0
+                        if (iEnd == iiMax) iEnd = iiMax + 1
+
+                        if (jBeg == 1) jBeg = 0
+                        if (jEnd == jjMax) jEnd = jjMax + 1
+
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                v1(1) = x(1, i, j, 1) - x(2, i, j, 1)
+                                v1(2) = x(1, i, j, 2) - x(2, i, j, 2)
+                                v1(3) = x(1, i, j, 3) - x(2, i, j, 3)
+                                dot = two * (v1(1) * norm(1) + v1(2) * norm(2) &
+                                             + v1(3) * norm(3))
+                                x(0, i, j, 1) = x(2, i, j, 1) + dot * norm(1)
+                                x(0, i, j, 2) = x(2, i, j, 2) + dot * norm(2)
+                                x(0, i, j, 3) = x(2, i, j, 3) + dot * norm(3)
+                            end do
+                        end do
+
+                    case (iMax)
+                        iBeg = jnBeg(mm); iEnd = jnEnd(mm); iiMax = jl
+                        jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
+
+                        if (iBeg == 1) iBeg = 0
+                        if (iEnd == iiMax) iEnd = iiMax + 1
+
+                        if (jBeg == 1) jBeg = 0
+                        if (jEnd == jjMax) jEnd = jjMax + 1
+
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                v1(1) = x(il, i, j, 1) - x(nx, i, j, 1)
+                                v1(2) = x(il, i, j, 2) - x(nx, i, j, 2)
+                                v1(3) = x(il, i, j, 3) - x(nx, i, j, 3)
+                                dot = two * (v1(1) * norm(1) + v1(2) * norm(2) &
+                                             + v1(3) * norm(3))
+                                x(ie, i, j, 1) = x(nx, i, j, 1) + dot * norm(1)
+                                x(ie, i, j, 2) = x(nx, i, j, 2) + dot * norm(2)
+                                x(ie, i, j, 3) = x(nx, i, j, 3) + dot * norm(3)
+                            end do
+                        end do
+
+                    case (jMin)
+                        iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
+                        jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
+
+                        if (iBeg == 1) iBeg = 0
+                        if (iEnd == iiMax) iEnd = iiMax + 1
+
+                        if (jBeg == 1) jBeg = 0
+                        if (jEnd == jjMax) jEnd = jjMax + 1
+
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                v1(1) = x(i, 1, j, 1) - x(i, 2, j, 1)
+                                v1(2) = x(i, 1, j, 2) - x(i, 2, j, 2)
+                                v1(3) = x(i, 1, j, 3) - x(i, 2, j, 3)
+                                dot = two * (v1(1) * norm(1) + v1(2) * norm(2) &
+                                             + v1(3) * norm(3))
+                                x(i, 0, j, 1) = x(i, 2, j, 1) + dot * norm(1)
+                                x(i, 0, j, 2) = x(i, 2, j, 2) + dot * norm(2)
+                                x(i, 0, j, 3) = x(i, 2, j, 3) + dot * norm(3)
+                            end do
+                        end do
+
+                    case (jMax)
+                        iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
+                        jBeg = knBeg(mm); jEnd = knEnd(mm); jjMax = kl
+
+                        if (iBeg == 1) iBeg = 0
+                        if (iEnd == iiMax) iEnd = iiMax + 1
+
+                        if (jBeg == 1) jBeg = 0
+                        if (jEnd == jjMax) jEnd = jjMax + 1
+
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                v1(1) = x(i, jl, j, 1) - x(i, ny, j, 1)
+                                v1(2) = x(i, jl, j, 2) - x(i, ny, j, 2)
+                                v1(3) = x(i, jl, j, 3) - x(i, ny, j, 3)
+                                dot = two * (v1(1) * norm(1) + v1(2) * norm(2) &
+                                             + v1(3) * norm(3))
+                                x(i, je, j, 1) = x(i, ny, j, 1) + dot * norm(1)
+                                x(i, je, j, 2) = x(i, ny, j, 2) + dot * norm(2)
+                                x(i, je, j, 3) = x(i, ny, j, 3) + dot * norm(3)
+                            end do
+                        end do
+
+                    case (kMin)
+                        iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
+                        jBeg = jnBeg(mm); jEnd = jnEnd(mm); jjMax = jl
+
+                        if (iBeg == 1) iBeg = 0
+                        if (iEnd == iiMax) iEnd = iiMax + 1
+
+                        if (jBeg == 1) jBeg = 0
+                        if (jEnd == jjMax) jEnd = jjMax + 1
+
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                v1(1) = x(i, j, 1, 1) - x(i, j, 2, 1)
+                                v1(2) = x(i, j, 1, 2) - x(i, j, 2, 2)
+                                v1(3) = x(i, j, 1, 3) - x(i, j, 2, 3)
+                                dot = two * (v1(1) * norm(1) + v1(2) * norm(2) &
+                                             + v1(3) * norm(3))
+                                x(i, j, 0, 1) = x(i, j, 2, 1) + dot * norm(1)
+                                x(i, j, 0, 2) = x(i, j, 2, 2) + dot * norm(2)
+                                x(i, j, 0, 3) = x(i, j, 2, 3) + dot * norm(3)
+                            end do
+                        end do
+
+                    case (kMax)
+                        iBeg = inBeg(mm); iEnd = inEnd(mm); iiMax = il
+                        jBeg = jnBeg(mm); jEnd = jnEnd(mm); jjMax = jl
+
+                        if (iBeg == 1) iBeg = 0
+                        if (iEnd == iiMax) iEnd = iiMax + 1
+
+                        if (jBeg == 1) jBeg = 0
+                        if (jEnd == jjMax) jEnd = jjMax + 1
+
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                v1(1) = x(i, j, kl, 1) - x(i, j, nz, 1)
+                                v1(2) = x(i, j, kl, 2) - x(i, j, nz, 2)
+                                v1(3) = x(i, j, kl, 3) - x(i, j, nz, 3)
+                                dot = two * (v1(1) * norm(1) + v1(2) * norm(2) &
+                                             + v1(3) * norm(3))
+                                x(i, j, ke, 1) = x(i, j, nz, 1) + dot * norm(1)
+                                x(i, j, ke, 2) = x(i, j, nz, 2) + dot * norm(2)
+                                x(i, j, ke, 3) = x(i, j, nz, 3) + dot * norm(3)
+                            end do
+                        end do
+                    end select
+                end if testSingular
+            end if testSymmetry
+        end do loopBocos
+    end subroutine xhalo_block
+
+    subroutine resScale
+
+        use constants
+        use blockPointers, only: il, jl, kl, nx, ny, nz, volRef, dw
+        use flowVarRefState, only: nwf, nt1, nt2
+        use inputIteration, only: turbResScale
+        implicit none
+
+        ! Local Variables
+        integer(kind=intType) :: i, j, k, ii, nTurb
+        real(kind=realType) :: ovol
+
+        ! Divide through by the reference volume
+        nTurb = nt2 - nt1 + 1
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, nx * ny * nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 2
 #else
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
-                oVol = one/volRef(i,j,k)
-                dw(i, j, k, 1:nwf) = dw(i,j, k, 1:nwf)* ovol
-                dw(i, j, k, nt1:nt2) = dw(i, j, k, nt1:nt2) * ovol * turbResScale(1:nTurb)
+                        oVol = one / volRef(i, j, k)
+                        dw(i, j, k, 1:nwf) = dw(i, j, k, 1:nwf) * ovol
+                        dw(i, j, k, nt1:nt2) = dw(i, j, k, nt1:nt2) * ovol * turbResScale(1:nTurb)
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-  end subroutine resScale
-
-  subroutine sumDwAndFw
-
-    use constants
-    use blockPointers, only :il, jl, kl, dw, fw, iBlank
-    use flowVarRefState, only : nwf
-
-    implicit none
-
-    ! Local Variables
-    integer(kind=intType) :: i, j, k, l
-
-    do l=1, nwf
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                dw(i,j,k,l) = (dw(i,j,k,l) + fw(i,j,k,l)) &
-                     * max(real(iblank(i,j,k), realType), zero)
-             end do
-          end do
-       end do
-    end do
-  end subroutine sumDwAndFw
+    end subroutine resScale
+
+    subroutine sumDwAndFw
+
+        use constants
+        use blockPointers, only: il, jl, kl, dw, fw, iBlank
+        use flowVarRefState, only: nwf
+
+        implicit none
+
+        ! Local Variables
+        integer(kind=intType) :: i, j, k, l
+
+        do l = 1, nwf
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        dw(i, j, k, l) = (dw(i, j, k, l) + fw(i, j, k, l)) &
+                                         * max(real(iblank(i, j, k), realType), zero)
+                    end do
+                end do
+            end do
+        end do
+    end subroutine sumDwAndFw
 
 end module adjointExtra
diff --git a/src/adjoint/adjointUtils.F90 b/src/adjoint/adjointUtils.F90
index f5347c7d5..7041519fd 100644
--- a/src/adjoint/adjointUtils.F90
+++ b/src/adjoint/adjointUtils.F90
@@ -4,2342 +4,2336 @@ module adjointUtils
 
 contains
 
-  subroutine setupStateResidualMatrix(matrix, useAD, usePC, useTranspose, &
-       useObjective, frozenTurb, level, useTurbOnly, useCoarseMats)
-
-    !      Compute the state derivative matrix using a forward mode calc
-    !      There are three different flags that determine how this
-    !      routine is run:
-    !      useAD: if True, AD is used for derivative calculation, if
-    !             False, FD is used.
-    !      usePC: if True, the reduced 1st order stencil with dissipation
-    !             lumping is assembled instead of the actual exact
-    !             full stencil jacobian
-    !      useTranspose: If true, the transpose of dRdw is assembled.
-    !                    For use with the adjoint this must be true.
-    !      useObjective: If true, the force matrix is assembled
-    !      level : What level to use to form the matrix. Level 1 is
-    !              always the finest level
-    !
-    use block, only : flowDomsd
-    use blockPointers
-    use inputDiscretization
-    use inputTimeSpectral
-    use inputPhysics
-    use iteration
-    use flowVarRefState
-    use inputAdjoint
-    use stencils
-    use diffSizes
-    use communication
-    use adjointVars
-    use turbMod
-    use surfaceFamilies, only : fullFamList
-    use oversetUtilities, only : fracToWeights
-    use utils, only : EChk, setPointers, getDirAngle, setPointers_d
-    use haloExchange, only : whalo2
-    use masterRoutines, only : block_res_state, master
-    use agmg, only : agmgLevels, A, coarseIndices, coarseOversetIndices
+    subroutine setupStateResidualMatrix(matrix, useAD, usePC, useTranspose, &
+                                        useObjective, frozenTurb, level, useTurbOnly, useCoarseMats)
+
+        !      Compute the state derivative matrix using a forward mode calc
+        !      There are three different flags that determine how this
+        !      routine is run:
+        !      useAD: if True, AD is used for derivative calculation, if
+        !             False, FD is used.
+        !      usePC: if True, the reduced 1st order stencil with dissipation
+        !             lumping is assembled instead of the actual exact
+        !             full stencil jacobian
+        !      useTranspose: If true, the transpose of dRdw is assembled.
+        !                    For use with the adjoint this must be true.
+        !      useObjective: If true, the force matrix is assembled
+        !      level : What level to use to form the matrix. Level 1 is
+        !              always the finest level
+        !
+        use block, only: flowDomsd
+        use blockPointers
+        use inputDiscretization
+        use inputTimeSpectral
+        use inputPhysics
+        use iteration
+        use flowVarRefState
+        use inputAdjoint
+        use stencils
+        use diffSizes
+        use communication
+        use adjointVars
+        use turbMod
+        use surfaceFamilies, only: fullFamList
+        use oversetUtilities, only: fracToWeights
+        use utils, only: EChk, setPointers, getDirAngle, setPointers_d
+        use haloExchange, only: whalo2
+        use masterRoutines, only: block_res_state, master
+        use agmg, only: agmgLevels, A, coarseIndices, coarseOversetIndices
 #ifndef USE_COMPLEX
-    use masterRoutines, only : block_res_state_d
+        use masterRoutines, only: block_res_state_d
 #endif
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! PETSc Matrix Variable
-    Mat :: matrix
-
-    ! Input Variables
-    logical, intent(in) :: useAD, usePC, useTranspose, useObjective, frozenTurb
-    logical, intent(in), optional :: useTurbOnly, useCoarseMats
-    integer(kind=intType), intent(in) :: level
-
-    ! Local variables.
-    integer(kind=intType) :: ierr, nn, sps, sps2, i, j, k, l, ll, ii, jj, kk
-    integer(kind=intType) :: nColor, iColor, jColor, irow, icol, fmDim, frow
-    integer(kind=intType) :: nTransfer, nState, lStart, lEnd, tmp, icount, cols(8), nCol
-    integer(kind=intType) :: n_stencil, i_stencil, m, iFringe, fInd, lvl, orderturbsave
-    integer(kind=intType), dimension(:, :), pointer :: stencil
-    real(kind=alwaysRealType) :: delta_x, one_over_dx
-    real(kind=realType) :: weights(8)
-    real(kind=realType), dimension(:,:), allocatable :: blk
-    integer(kind=intType), dimension(2:10) :: coarseRows
-    integer(kind=intType), dimension(8, 2:10) :: coarseCols
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, mm, colInd
-    logical :: resetToRANS, turbOnly, flowRes, turbRes, buildCoarseMats
-
-    ! Determine if we are assembling a turb only PC
-    turbOnly = .False.
-    if (present(useTurbOnly)) then
-       turbOnly = useTurbOnly
-    end if
-
-    buildCoarseMats = .False.
-    if (present(useCoarseMats)) then
-       buildCoarseMats = useCoarseMats
-    end if
-
-    if (turbOnly) then
-       ! we are making a PC for turbulence only KSP
-       flowRes = .False.
-       turbRes = .True.
-       lStart = nt1
-       lEnd = nt2
-       nState = nt2 - nt1 + 1
-    else
-       ! We are making a "matrix" for either NK or adjoint
-       ! Setup number of state variable based on turbulence assumption
-       if ( frozenTurb ) then
-          flowRes = .True.
-          turbRes = .False.
-          lStart = 1
-          lEnd =nwf
-          nState = nwf
-       else
-          flowRes = .True.
-          turbRes = .True.
-          lStart = 1
-          lEnd =nw
-          nState = nw
-       endif
-    end if
-
-    ! Generic block to use while setting values
-    allocate(blk(nState, nState))
-
-    ! Exchange data and call the residual to make sure its up to date
-    ! withe current w
-    call whalo2(1_intType, 1_intType, nw, .True., .True., .True.)
-
-    ! This routine will not use the extra variables to block_res or the
-    ! extra outputs, so we must zero them here
-    alphad = zero
-    betad  = zero
-    machd  = zero
-    machGridd = zero
-    machcoefd = zero
-    pointRefd  = zero
-    lengthRefd = zero
-    pinfdimd = zero
-    rhoinfdimd = zero
-    tinfdimd = zero
-
-    rkStage = 0
-
-    ! Zero out the matrix before we start
-    call MatZeroEntries(matrix, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set the diagonal to 1 if the cell is not a compute cell:
-
-    ! Make an identity block
-    blk = zero
-    do i=1, nState
-       blk(i,i) = one
-    end do
-
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   if (iblank(i, j, k) /= 1) then
-                      iRow = flowDoms(nn, level, sps)%globalCell(i, j, k)
-                      cols(1) = irow
-                      nCol = 1
-
-                      if (buildCoarseMats) then
-                         do lvl=1, agmgLevels-1
-                            coarseRows(lvl+1) = coarseIndices(nn, lvl)%arr(i, j, k)
-                            coarseCols(1, lvl+1) = coarseRows(lvl+1)
-                         end do
-                      end if
-
-                      call setBlock(blk)
-                   end if
+        use petsc
+        implicit none
+
+        ! PETSc Matrix Variable
+        Mat :: matrix
+
+        ! Input Variables
+        logical, intent(in) :: useAD, usePC, useTranspose, useObjective, frozenTurb
+        logical, intent(in), optional :: useTurbOnly, useCoarseMats
+        integer(kind=intType), intent(in) :: level
+
+        ! Local variables.
+        integer(kind=intType) :: ierr, nn, sps, sps2, i, j, k, l, ll, ii, jj, kk
+        integer(kind=intType) :: nColor, iColor, jColor, irow, icol, fmDim, frow
+        integer(kind=intType) :: nTransfer, nState, lStart, lEnd, tmp, icount, cols(8), nCol
+        integer(kind=intType) :: n_stencil, i_stencil, m, iFringe, fInd, lvl, orderturbsave
+        integer(kind=intType), dimension(:, :), pointer :: stencil
+        real(kind=alwaysRealType) :: delta_x, one_over_dx
+        real(kind=realType) :: weights(8)
+        real(kind=realType), dimension(:, :), allocatable :: blk
+        integer(kind=intType), dimension(2:10) :: coarseRows
+        integer(kind=intType), dimension(8, 2:10) :: coarseCols
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, mm, colInd
+        logical :: resetToRANS, turbOnly, flowRes, turbRes, buildCoarseMats
+
+        ! Determine if we are assembling a turb only PC
+        turbOnly = .False.
+        if (present(useTurbOnly)) then
+            turbOnly = useTurbOnly
+        end if
+
+        buildCoarseMats = .False.
+        if (present(useCoarseMats)) then
+            buildCoarseMats = useCoarseMats
+        end if
+
+        if (turbOnly) then
+            ! we are making a PC for turbulence only KSP
+            flowRes = .False.
+            turbRes = .True.
+            lStart = nt1
+            lEnd = nt2
+            nState = nt2 - nt1 + 1
+        else
+            ! We are making a "matrix" for either NK or adjoint
+            ! Setup number of state variable based on turbulence assumption
+            if (frozenTurb) then
+                flowRes = .True.
+                turbRes = .False.
+                lStart = 1
+                lEnd = nwf
+                nState = nwf
+            else
+                flowRes = .True.
+                turbRes = .True.
+                lStart = 1
+                lEnd = nw
+                nState = nw
+            end if
+        end if
+
+        ! Generic block to use while setting values
+        allocate (blk(nState, nState))
+
+        ! Exchange data and call the residual to make sure its up to date
+        ! withe current w
+        call whalo2(1_intType, 1_intType, nw, .True., .True., .True.)
+
+        ! This routine will not use the extra variables to block_res or the
+        ! extra outputs, so we must zero them here
+        alphad = zero
+        betad = zero
+        machd = zero
+        machGridd = zero
+        machcoefd = zero
+        pointRefd = zero
+        lengthRefd = zero
+        pinfdimd = zero
+        rhoinfdimd = zero
+        tinfdimd = zero
+
+        rkStage = 0
+
+        ! Zero out the matrix before we start
+        call MatZeroEntries(matrix, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set the diagonal to 1 if the cell is not a compute cell:
+
+        ! Make an identity block
+        blk = zero
+        do i = 1, nState
+            blk(i, i) = one
+        end do
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            if (iblank(i, j, k) /= 1) then
+                                iRow = flowDoms(nn, level, sps)%globalCell(i, j, k)
+                                cols(1) = irow
+                                nCol = 1
+
+                                if (buildCoarseMats) then
+                                    do lvl = 1, agmgLevels - 1
+                                        coarseRows(lvl + 1) = coarseIndices(nn, lvl)%arr(i, j, k)
+                                        coarseCols(1, lvl + 1) = coarseRows(lvl + 1)
+                                    end do
+                                end if
+
+                                call setBlock(blk)
+                            end if
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-
-    ! Set a pointer to the correct set of stencil depending on if we are
-    ! using the first order stencil or the full jacobian
-
-    if (usePC) then
-       if (viscous .and. viscPC) then
-          stencil => visc_pc_stencil
-          n_stencil = N_visc_pc
-       else
-          stencil => euler_pc_stencil
-          n_stencil = N_euler_pc
-       end if
-
-       ! Very important to use only Second-Order dissipation for PC
-       lumpedDiss=.True.
-       ! also use first order advection terms for turbulence
-       orderturbsave = orderturb
-       orderturb = firstOrder
-    else
-       if (viscous) then
-          stencil => visc_drdw_stencil
-          n_stencil = N_visc_drdw
-       else
-          stencil => euler_drdw_stencil
-          n_stencil = N_euler_drdw
-       end if
-    end if
-
-    ! Need to trick the residual evalution to use coupled (mean flow and
-    ! turbulent) together.
-
-    ! If we want to do the matrix on a coarser level, we must first
-    ! restrict the fine grid solutions, since it is possible the
-    ! NKsolver was used an the coarse grid solutions are (very!) out of
-    ! date.
-
-    ! Assembling matrix on coarser levels is not entirely implemented yet.
-    currentLevel = level
-    groundLevel = level
-
-    ! Set delta_x
-    delta_x = 1e-9_realType
-    one_over_dx = one/delta_x
-    rkStage = 0
-
-    ! Determine if we want to use frozenTurbulent Adjoint
-    resetToRANS = .False.
-    if (frozenTurb .and. equations == RANSEquations) then
-       equations = NSEquations
-       resetToRANS = .True.
-    end if
-
-    ! Allocate the additional memory we need for doing forward mode AD
-    !  derivatives and copy any required reference values:
-    if (.not. derivVarsAllocated .and. useAD) then
-       call allocDerivativeValues(level)
-    end if
-
-    ! For AD the initial seeds must all be zeroed.
-    if (useAD) then
-       do nn=1, nDom
-          do sps=1, nTimeIntervalsSpectral
-             call setPointers(nn, level, sps)
-             call zeroADSeeds(nn, level, sps)
-          end do
-       end do
-    end if
-
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
-
-          ! Allocate some extra routines used only for assembly
-          allocate(&
-               flowDoms(nn, level, sps)%dw_deriv(2:il, 2:jl, 2:kl, 1:nw, 1:nw), &
-               flowDoms(nn, level, sps)%wtmp(0:ib,0:jb,0:kb,1:nw), &
-               flowDoms(nn, level, sps)%dwtmp(0:ib,0:jb,0:kb,1:nw), &
-               flowDoms(nn, level, sps)%dwtmp2(0:ib,0:jb,0:kb,1:nw), &
-          stat=ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-          if (sps == 1) then
-             allocate(flowDoms(nn, level, sps)%color(0:ib, 0:jb, 0:kb), stat=ierr)
-             call EChk(ierr, __FILE__, __LINE__)
-          end if
-       end do
-    end do
-
-    ! For the PC we don't linearize the shock sensor so it must be
-    ! computed here.
-
-    if (usePC) then
-       call referenceShockSensor
-    end if
-
-    ! For FD, the initial reference values must be computed and stored.
-    if (.not. useAD) then
-       call setFDReference(level)
-    end if
-
-    ! Master Domain Loop
-    domainLoopAD: do nn=1, nDom
-
-       ! Set pointers to the first timeInstance...just to getSizes
-       call setPointers(nn, level, 1)
-       ! Set unknown sizes in diffSizes for AD routine
-       ISIZE1OFDrfbcdata = nBocos
-       ISIZE1OFDrfviscsubface = nViscBocos
-
-       ! Setup the coloring for this block depending on if its
-       ! drdw or a PC
-
-       ! List of all Coloring Routines:
-       !   Debugging Colorings Below:
-       !       call setup_3x3x3_coloring(nn, level,  nColor)
-       !       call setup_5x5x5_coloring(nn, level,  nColor)
-       !       call setup_BF_coloring(nn, level,  nColor)
-       !   Regular:
-       !       call setup_PC_coloring(nn, level,  nColor)
-       !       call setup_dRdw_euler_coloring(nn, level,  nColor)
-       !       call setup_dRdw_visc_coloring(nn, level,  nColor)
-
-       if (usePC) then
-          if (viscous .and. viscPC) then
-             call setup_3x3x3_coloring(nn, level,  nColor) ! dense 3x3x3 coloring
-          else
-             call setup_PC_coloring(nn, level,  nColor) ! Euler Colorings
-          end if
-       else
-          if (viscous) then
-             !call setup_5x5x5_coloring(nn, level,  nColor)
-             call setup_dRdw_visc_coloring(nn, level,  nColor)! Viscous/RANS
-          else
-             call setup_dRdw_euler_coloring(nn, level,  nColor) ! Euler Colorings
-          end if
-       end if
-
-       spectralLoop: do sps=1, nTimeIntervalsSpectral
-          ! Set pointers and (possibly derivative pointers)
-          if (useAD) then
-             call setPointers_d(nn, level, sps)
-          else
-             call setPointers(nn, level, sps)
-          end if
-
-          ! Do Coloring and perturb states
-          colorLoop: do iColor = 1, nColor
-             do sps2 = 1, nTimeIntervalsSpectral
-                flowDoms(nn, 1, sps2)%dw_deriv(:, :, :, :, :) = zero
-             end do
-
-             ! Master State Loop
-             stateLoop: do l=lStart, lEnd
-
-                ! Reset All States and possibe AD seeds
-                do sps2 = 1, nTimeIntervalsSpectral
-                   if (.not. useAD) then
-                      do ll=1,nw
-                         do k=0,kb
-                            do j=0,jb
-                               do i=0,ib
-                                  flowDoms(nn, level, sps2)%w(i,j,k,ll) =  flowDoms(nn, 1, sps2)%wtmp(i,j,k,ll)
-                               end do
-                            end do
-                         end do
-                      end do
-                   end if
+            end do
+        end do
+
+        ! Set a pointer to the correct set of stencil depending on if we are
+        ! using the first order stencil or the full jacobian
+
+        if (usePC) then
+            if (viscous .and. viscPC) then
+                stencil => visc_pc_stencil
+                n_stencil = N_visc_pc
+            else
+                stencil => euler_pc_stencil
+                n_stencil = N_euler_pc
+            end if
 
-                   if (useAD) then
-                      flowdomsd(nn, 1, sps2)%w = zero ! This is actually w seed
-                   end if
+            ! Very important to use only Second-Order dissipation for PC
+            lumpedDiss = .True.
+            ! also use first order advection terms for turbulence
+            orderturbsave = orderturb
+            orderturb = firstOrder
+        else
+            if (viscous) then
+                stencil => visc_drdw_stencil
+                n_stencil = N_visc_drdw
+            else
+                stencil => euler_drdw_stencil
+                n_stencil = N_euler_drdw
+            end if
+        end if
+
+        ! Need to trick the residual evalution to use coupled (mean flow and
+        ! turbulent) together.
+
+        ! If we want to do the matrix on a coarser level, we must first
+        ! restrict the fine grid solutions, since it is possible the
+        ! NKsolver was used an the coarse grid solutions are (very!) out of
+        ! date.
+
+        ! Assembling matrix on coarser levels is not entirely implemented yet.
+        currentLevel = level
+        groundLevel = level
+
+        ! Set delta_x
+        delta_x = 1e-9_realType
+        one_over_dx = one / delta_x
+        rkStage = 0
+
+        ! Determine if we want to use frozenTurbulent Adjoint
+        resetToRANS = .False.
+        if (frozenTurb .and. equations == RANSEquations) then
+            equations = NSEquations
+            resetToRANS = .True.
+        end if
+
+        ! Allocate the additional memory we need for doing forward mode AD
+        !  derivatives and copy any required reference values:
+        if (.not. derivVarsAllocated .and. useAD) then
+            call allocDerivativeValues(level)
+        end if
+
+        ! For AD the initial seeds must all be zeroed.
+        if (useAD) then
+            do nn = 1, nDom
+                do sps = 1, nTimeIntervalsSpectral
+                    call setPointers(nn, level, sps)
+                    call zeroADSeeds(nn, level, sps)
                 end do
+            end do
+        end if
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
+
+                ! Allocate some extra routines used only for assembly
+                allocate ( &
+                    flowDoms(nn, level, sps)%dw_deriv(2:il, 2:jl, 2:kl, 1:nw, 1:nw), &
+                    flowDoms(nn, level, sps)%wtmp(0:ib, 0:jb, 0:kb, 1:nw), &
+                    flowDoms(nn, level, sps)%dwtmp(0:ib, 0:jb, 0:kb, 1:nw), &
+                    flowDoms(nn, level, sps)%dwtmp2(0:ib, 0:jb, 0:kb, 1:nw), &
+                    stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                if (sps == 1) then
+                    allocate (flowDoms(nn, level, sps)%color(0:ib, 0:jb, 0:kb), stat=ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end if
+            end do
+        end do
+
+        ! For the PC we don't linearize the shock sensor so it must be
+        ! computed here.
+
+        if (usePC) then
+            call referenceShockSensor
+        end if
+
+        ! For FD, the initial reference values must be computed and stored.
+        if (.not. useAD) then
+            call setFDReference(level)
+        end if
+
+        ! Master Domain Loop
+        domainLoopAD: do nn = 1, nDom
+
+            ! Set pointers to the first timeInstance...just to getSizes
+            call setPointers(nn, level, 1)
+            ! Set unknown sizes in diffSizes for AD routine
+            ISIZE1OFDrfbcdata = nBocos
+            ISIZE1OFDrfviscsubface = nViscBocos
+
+            ! Setup the coloring for this block depending on if its
+            ! drdw or a PC
+
+            ! List of all Coloring Routines:
+            !   Debugging Colorings Below:
+            !       call setup_3x3x3_coloring(nn, level,  nColor)
+            !       call setup_5x5x5_coloring(nn, level,  nColor)
+            !       call setup_BF_coloring(nn, level,  nColor)
+            !   Regular:
+            !       call setup_PC_coloring(nn, level,  nColor)
+            !       call setup_dRdw_euler_coloring(nn, level,  nColor)
+            !       call setup_dRdw_visc_coloring(nn, level,  nColor)
+
+            if (usePC) then
+                if (viscous .and. viscPC) then
+                    call setup_3x3x3_coloring(nn, level, nColor) ! dense 3x3x3 coloring
+                else
+                    call setup_PC_coloring(nn, level, nColor) ! Euler Colorings
+                end if
+            else
+                if (viscous) then
+                    !call setup_5x5x5_coloring(nn, level,  nColor)
+                    call setup_dRdw_visc_coloring(nn, level, nColor)! Viscous/RANS
+                else
+                    call setup_dRdw_euler_coloring(nn, level, nColor) ! Euler Colorings
+                end if
+            end if
 
-                ! Peturb w or set AD Seed according to iColor. Note:
-                ! Do NOT try to putt he useAD if check inside the
-                ! color if check. ifort barfs hard-core on that and it
-                ! segfaults with AVX2
+            spectralLoop: do sps = 1, nTimeIntervalsSpectral
+                ! Set pointers and (possibly derivative pointers)
                 if (useAD) then
-                   do k=0, kb
-                      do j=0, jb
-                         do i=0, ib
-                            if (flowdoms(nn, 1, 1)%color(i, j, k) == icolor) then
-                               flowDomsd(nn, 1, sps)%w(i, j, k, l) = one
-                            end if
-                         end do
-                      end do
-                   end do
+                    call setPointers_d(nn, level, sps)
                 else
-                   do k=0, kb
-                      do j=0, jb
-                         do i=0, ib
-                            if (flowdoms(nn, 1, 1)%color(i, j, k) == icolor) then
-                               w(i, j, k, l) = w(i, j, k, l) + delta_x
-                            end if
-                         end do
-                      end do
-                   end do
+                    call setPointers(nn, level, sps)
                 end if
 
-                ! Run Block-based residual
-                if (useAD) then
+                ! Do Coloring and perturb states
+                colorLoop: do iColor = 1, nColor
+                    do sps2 = 1, nTimeIntervalsSpectral
+                        flowDoms(nn, 1, sps2)%dw_deriv(:, :, :, :, :) = zero
+                    end do
+
+                    ! Master State Loop
+                    stateLoop: do l = lStart, lEnd
+
+                        ! Reset All States and possibe AD seeds
+                        do sps2 = 1, nTimeIntervalsSpectral
+                            if (.not. useAD) then
+                                do ll = 1, nw
+                                    do k = 0, kb
+                                        do j = 0, jb
+                                            do i = 0, ib
+                                      flowDoms(nn, level, sps2)%w(i, j, k, ll) = flowDoms(nn, 1, sps2)%wtmp(i, j, k, ll)
+                                            end do
+                                        end do
+                                    end do
+                                end do
+                            end if
+
+                            if (useAD) then
+                                flowdomsd(nn, 1, sps2)%w = zero ! This is actually w seed
+                            end if
+                        end do
+
+                        ! Peturb w or set AD Seed according to iColor. Note:
+                        ! Do NOT try to putt he useAD if check inside the
+                        ! color if check. ifort barfs hard-core on that and it
+                        ! segfaults with AVX2
+                        if (useAD) then
+                            do k = 0, kb
+                                do j = 0, jb
+                                    do i = 0, ib
+                                        if (flowdoms(nn, 1, 1)%color(i, j, k) == icolor) then
+                                            flowDomsd(nn, 1, sps)%w(i, j, k, l) = one
+                                        end if
+                                    end do
+                                end do
+                            end do
+                        else
+                            do k = 0, kb
+                                do j = 0, jb
+                                    do i = 0, ib
+                                        if (flowdoms(nn, 1, 1)%color(i, j, k) == icolor) then
+                                            w(i, j, k, l) = w(i, j, k, l) + delta_x
+                                        end if
+                                    end do
+                                end do
+                            end do
+                        end if
+
+                        ! Run Block-based residual
+                        if (useAD) then
 #ifndef USE_COMPLEX
-                   call block_res_state_d(nn, sps)
+                            call block_res_state_d(nn, sps)
 #else
-                   print *, 'Forward AD routines are not complexified'
-                   stop
+                            print *, 'Forward AD routines are not complexified'
+                            stop
 #endif
-                else
-                   call block_res_state(nn, sps, useFlowRes=flowRes, useTurbRes=turbRes)
-                end if
+                        else
+                            call block_res_state(nn, sps, useFlowRes=flowRes, useTurbRes=turbRes)
+                        end if
 
-                ! Set the computed residual in dw_deriv. If using FD,
-                ! actually do the FD calculation if AD, just copy out dw
-                ! in flowdomsd
-
-                ! Compute/Copy all derivatives
-                do sps2 = 1, nTimeIntervalsSpectral
-                   do ll=lStart, lEnd
-                      do k=2, kl
-                         do j=2, jl
-                            do i=2, il
-                               if (useAD) then
-                                  flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
-                                       flowdomsd(nn, 1, sps2)%dw(i, j, k, ll)
-                               else
-                                  if (sps2 == sps) then
-                                     ! If the peturbation is on this
-                                     ! instance, we've computed the spatial
-                                     ! contribution so subtrace dwtmp
-
-                                     flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
-                                          one_over_dx * &
-                                          (flowDoms(nn, 1, sps2)%dw(i, j, k, ll) - &
-                                          flowDoms(nn, 1, sps2)%dwtmp(i, j, k, ll))
-                                  else
-                                     ! If the peturbation is on an off
-                                     ! instance, only subtract dwtmp2
-                                     ! which is the reference result
-                                     ! after initres
-
-                                     flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
-                                          one_over_dx*(&
-                                          flowDoms(nn, 1, sps2)%dw(i, j, k, ll) - &
-                                          flowDoms(nn, 1, sps2)%dwtmp2(i, j, k, ll))
-                                  end if
-                               end if
+                        ! Set the computed residual in dw_deriv. If using FD,
+                        ! actually do the FD calculation if AD, just copy out dw
+                        ! in flowdomsd
+
+                        ! Compute/Copy all derivatives
+                        do sps2 = 1, nTimeIntervalsSpectral
+                            do ll = lStart, lEnd
+                                do k = 2, kl
+                                    do j = 2, jl
+                                        do i = 2, il
+                                            if (useAD) then
+                                                flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
+                                                    flowdomsd(nn, 1, sps2)%dw(i, j, k, ll)
+                                            else
+                                                if (sps2 == sps) then
+                                                    ! If the peturbation is on this
+                                                    ! instance, we've computed the spatial
+                                                    ! contribution so subtrace dwtmp
+
+                                                    flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
+                                                        one_over_dx * &
+                                                        (flowDoms(nn, 1, sps2)%dw(i, j, k, ll) - &
+                                                         flowDoms(nn, 1, sps2)%dwtmp(i, j, k, ll))
+                                                else
+                                                    ! If the peturbation is on an off
+                                                    ! instance, only subtract dwtmp2
+                                                    ! which is the reference result
+                                                    ! after initres
+
+                                                    flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
+                                                        one_over_dx * ( &
+                                                        flowDoms(nn, 1, sps2)%dw(i, j, k, ll) - &
+                                                        flowDoms(nn, 1, sps2)%dwtmp2(i, j, k, ll))
+                                                end if
+                                            end if
+                                        end do
+                                    end do
+                                end do
                             end do
-                         end do
-                      end do
-                   end do
-                end do
-             end do stateLoop
-
-             ! Set derivatives by block in "matrix" after we've peturbed
-             ! all states in "color"
-
-             kLoop: do k=0, kb
-                jLoop: do j=0, jb
-                   iLoop: do i=0, ib
-                      colBlank: if (flowDoms(nn, level, sps)%iblank(i, j, k) == 1 .or. &
-                           flowDoms(nn, level, sps)%iBlank(i, j, k) == -1) then
-
-                         ! If the cell we perturned ('iCol') is an
-                         ! interpolated cell, we don't actually use
-                         ! iCol, rather we use the 8 real donors that
-                         ! comprise the cell's value.
-                         if (flowDoms(nn, level, sps)%iblank(i, j, k) == 1) then
-                            cols(1) = flowDoms(nn, level, sps)%globalCell(i, j, k)
-                            nCol = 1
-
-                            if (buildCoarseMats) then
-                               do lvl=1, agmgLevels-1
-                                  coarseCols(1, lvl+1) = coarseIndices(nn, lvl)%arr(i, j, k)
-                               end do
-                            end if
+                        end do
+                    end do stateLoop
+
+                    ! Set derivatives by block in "matrix" after we've peturbed
+                    ! all states in "color"
+
+                    kLoop: do k = 0, kb
+                        jLoop: do j = 0, jb
+                            iLoop: do i = 0, ib
+                                colBlank: if (flowDoms(nn, level, sps)%iblank(i, j, k) == 1 .or. &
+                                              flowDoms(nn, level, sps)%iBlank(i, j, k) == -1) then
+
+                                    ! If the cell we perturned ('iCol') is an
+                                    ! interpolated cell, we don't actually use
+                                    ! iCol, rather we use the 8 real donors that
+                                    ! comprise the cell's value.
+                                    if (flowDoms(nn, level, sps)%iblank(i, j, k) == 1) then
+                                        cols(1) = flowDoms(nn, level, sps)%globalCell(i, j, k)
+                                        nCol = 1
+
+                                        if (buildCoarseMats) then
+                                            do lvl = 1, agmgLevels - 1
+                                                coarseCols(1, lvl + 1) = coarseIndices(nn, lvl)%arr(i, j, k)
+                                            end do
+                                        end if
+
+                                    else
+                                        do m = 1, 8
+                                            cols(m) = flowDoms(nn, level, sps)%gInd(m, i, j, k)
+
+                                            if (buildCoarseMats) then
+                                                do lvl = 1, agmgLevels - 1
+                                                  coarseCols(m, lvl + 1) = coarseOversetIndices(nn, lvl)%arr(m, i, j, k)
+                                                end do
+                                            end if
+                                        end do
+
+                                        fInd = fringePtr(1, i, j, k)
+                                        call fracToWeights(flowDoms(nn, level, sps)%fringes(fInd)%donorFrac, &
+                                                           weights)
+                                        nCol = 8
+                                    end if
+
+                                    colorCheck: if (flowdoms(nn, 1, 1)%color(i, j, k) == icolor) then
+
+                                        ! i, j, k are now the "Center" cell that we
+                                        ! actually petrubed. From knowledge of the
+                                        ! stencil, we can simply take this cell and
+                                        ! using the stencil, set the values around it
+                                        ! in PETSc
+
+                                        stencilLoop: do i_stencil = 1, n_stencil
+                                            ii = stencil(i_stencil, 1)
+                                            jj = stencil(i_stencil, 2)
+                                            kk = stencil(i_stencil, 3)
+
+                                            ! Check to see if the cell in this
+                                            ! sentcil is on a physical cell, not a
+                                            ! halo/BC halo
+                                            onBlock: if (i + ii >= 2 .and. i + ii <= il .and. &
+                                                         j + jj >= 2 .and. j + jj <= jl .and. &
+                                                         k + kk >= 2 .and. k + kk <= kl) then
+
+                                                irow = flowDoms(nn, level, sps)%globalCell( &
+                                                       i + ii, j + jj, k + kk)
+
+                                                if (buildCoarseMats) then
+                                                    do lvl = 1, agmgLevels - 1
+                                                coarseRows(lvl + 1) = coarseIndices(nn, lvl)%arr(i + ii, j + jj, k + kk)
+                                                    end do
+                                                end if
+
+                                        rowBlank: if (flowDoms(nn, level, sps)%iBlank(i + ii, j + jj, k + kk) == 1) then
+
+                                                    centerCell: if (ii == 0 .and. jj == 0 .and. kk == 0) then
+                                                        useDiagPC: if (usePC .and. useDiagTSPC) then
+                                                            ! If we're doing the PC and we want
+                                                            ! to use TS diagonal form, only set
+                                                            ! values for on-time insintance
+                                                           blk = flowDoms(nn, 1, sps)%dw_deriv(i + ii, j + jj, k + kk, &
+                                                                                               lStart:lEnd, lStart:lEnd)
+                                                            call setBlock(blk)
+                                                        else
+                                                            ! Otherwise loop over spectral
+                                                            ! instances and set all.
+                                                            do sps2 = 1, nTimeIntervalsSpectral
+                                                                irow = flowDoms(nn, level, sps2)% &
+                                                                       globalCell(i + ii, j + jj, k + kk)
+                                                          blk = flowDoms(nn, 1, sps2)%dw_deriv(i + ii, j + jj, k + kk, &
+                                                                                               lStart:lEnd, lStart:lEnd)
+                                                                call setBlock(blk)
+                                                            end do
+                                                        end if useDiagPC
+                                                    else
+                                                        ! ALl other cells just set.
+                                                        blk = flowDoms(nn, 1, sps)%dw_deriv(i + ii, j + jj, k + kk, &
+                                                                                            lStart:lEnd, lStart:lEnd)
+                                                        call setBlock(blk)
+                                                    end if centerCell
+                                                end if rowBlank
+                                            end if onBlock
+                                        end do stencilLoop
+                                    end if colorCheck
+                                end if colBlank
+                            end do iLoop
+                        end do jLoop
+                    end do kLoop
+                end do colorLoop
+            end do spectralLoop
+        end do domainLoopAD
+
+        ! PETSc Matrix Assembly begin
+        call MatAssemblyBegin(matrix, MAT_FINAL_ASSEMBLY, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (buildCoarseMats) then
+            do lvl = 2, agmgLevels
+                call MatAssemblyBegin(A(lvl), MAT_FINAL_ASSEMBLY, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+        end if
+
+        ! Maybe we can do something useful while the communication happens?
+        ! Deallocate the temporary memory used in this routine.
+
+        ! Deallocate and reset values
+        if (.not. useAD) then
+            call resetFDReference(level)
+        end if
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                deallocate ( &
+                    flowDoms(nn, 1, sps)%dw_deriv, &
+                    flowDoms(nn, 1, sps)%wTmp, &
+                    flowDoms(nn, 1, sps)%dwTmp, &
+                    flowDoms(nn, 1, sps)%dwTmp2)
+                if (sps == 1) then
+                    deallocate (flowDoms(nn, 1, 1)%color)
+                end if
+            end do
+        end do
 
-                         else
-                            do m=1,8
-                               cols(m) = flowDoms(nn, level, sps)%gInd(m, i, j, k)
+        ! Return dissipation Parameters to normal -> VERY VERY IMPORTANT
+        if (usePC) then
+            lumpedDiss = .False.
+            ! also recover the turbulence advection order
+            orderturb = orderturbsave
+        end if
 
-                               if (buildCoarseMats) then
-                                  do lvl=1, agmgLevels-1
-                                     coarseCols(m, lvl+1) = coarseOversetIndices(nn, lvl)%arr(m, i, j, k)
-                                  end do
-                               end if
-                            end do
+        ! Reset the correct equation parameters if we were useing the frozen
+        ! Turbulent
+        if (resetToRANS) then
+            equations = RANSEquations
+        end if
 
-                            fInd = fringePtr(1,i,j,k)
-                            call fracToWeights(flowDoms(nn, level, sps)%fringes(fInd)%donorFrac, &
-                                 weights)
-                            nCol = 8
-                         end if
-
-                         colorCheck: if (flowdoms(nn, 1, 1)%color(i, j, k) == icolor) then
-
-                            ! i, j, k are now the "Center" cell that we
-                            ! actually petrubed. From knowledge of the
-                            ! stencil, we can simply take this cell and
-                            ! using the stencil, set the values around it
-                            ! in PETSc
-
-                            stencilLoop: do i_stencil=1, n_stencil
-                               ii = stencil(i_stencil, 1)
-                               jj = stencil(i_stencil, 2)
-                               kk = stencil(i_stencil, 3)
-
-                               ! Check to see if the cell in this
-                               ! sentcil is on a physical cell, not a
-                               ! halo/BC halo
-                               onBlock: if ( i+ii >= 2 .and. i+ii <= il .and. &
-                                    j+jj >= 2 .and. j+jj <= jl .and. &
-                                    k+kk >= 2 .and. k+kk <= kl) then
-
-                                  irow = flowDoms(nn, level, sps)%globalCell(&
-                                       i+ii, j+jj, k+kk)
-
-                                  if (buildCoarseMats) then
-                                     do lvl=1, agmgLevels-1
-                                        coarseRows(lvl+1) = coarseIndices(nn, lvl)%arr(i+ii, j+jj, k+kk)
-                                     end do
-                                  end if
-
-                                  rowBlank: if (flowDoms(nn, level, sps)%iBlank(i+ii, j+jj, k+kk) == 1) then
-
-                                     centerCell: if ( ii == 0 .and. jj == 0  .and. kk == 0) then
-                                        useDiagPC: if (usePC .and. useDiagTSPC) then
-                                           ! If we're doing the PC and we want
-                                           ! to use TS diagonal form, only set
-                                           ! values for on-time insintance
-                                           blk = flowDoms(nn, 1, sps)%dw_deriv(i+ii, j+jj, k+kk, &
-                                                lStart:lEnd, lStart:lEnd)
-                                           call setBlock(blk)
-                                        else
-                                           ! Otherwise loop over spectral
-                                           ! instances and set all.
-                                           do sps2=1, nTimeIntervalsSpectral
-                                              irow = flowDoms(nn, level, sps2)%&
-                                                   globalCell(i+ii, j+jj, k+kk)
-                                              blk = flowDoms(nn, 1, sps2)%dw_deriv(i+ii, j+jj, k+kk, &
-                                                   lStart:lEnd, lStart:lEnd)
-                                              call setBlock(blk)
-                                           end do
-                                        end if useDiagPC
-                                     else
-                                        ! ALl other cells just set.
-                                        blk = flowDoms(nn, 1, sps)%dw_deriv(i+ii, j+jj, k+kk, &
-                                             lStart:lEnd, lStart:lEnd)
-                                        call setBlock(blk)
-                                     end if centerCell
-                                  end if rowBlank
-                               end if onBlock
-                            end do stencilLoop
-                         end if colorCheck
-                      end if colBlank
-                   end do iLoop
-                end do jLoop
-             end do kLoop
-          end do colorLoop
-       end do spectralLoop
-    end do domainLoopAD
-
-    ! PETSc Matrix Assembly begin
-    call MatAssemblyBegin(matrix, MAT_FINAL_ASSEMBLY, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    if (buildCoarseMats) then
-       do lvl=2, agmgLevels
-          call MatAssemblyBegin(A(lvl), MAT_FINAL_ASSEMBLY, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-       end do
-    end if
-
-    ! Maybe we can do something useful while the communication happens?
-    ! Deallocate the temporary memory used in this routine.
-
-    ! Deallocate and reset values
-    if (.not. useAD) then
-       call resetFDReference(level)
-    end if
-
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          deallocate(&
-               flowDoms(nn, 1, sps)%dw_deriv, &
-               flowDoms(nn, 1, sps)%wTmp, &
-               flowDoms(nn, 1, sps)%dwTmp, &
-               flowDoms(nn, 1, sps)%dwTmp2)
-          if (sps==1) then
-             deallocate(flowDoms(nn, 1, 1)%color)
-          end if
-       end do
-    end do
-
-    ! Return dissipation Parameters to normal -> VERY VERY IMPORTANT
-    if (usePC) then
-       lumpedDiss = .False.
-       ! also recover the turbulence advection order
-       orderturb = orderturbsave
-    end if
-
-    ! Reset the correct equation parameters if we were useing the frozen
-    ! Turbulent
-    if (resetToRANS) then
-       equations = RANSEquations
-    end if
-
-    deallocate(blk)
-
-    ! Complete the matrix assembly.
-    call MatAssemblyEnd  (matrix, MAT_FINAL_ASSEMBLY, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    if (buildCoarseMats) then
-       do lvl=2, agmgLevels
-          call MatAssemblyEnd(A(lvl), MAT_FINAL_ASSEMBLY, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-       end do
-    end if
-
-    call MatSetOption(matrix, MAT_NEW_NONZERO_LOCATIONS, PETSC_FALSE, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! ================= Important ===================
-
-    ! We must run the residual computation to make sure that all
-    ! intermediate variables are up to date. We can just call master
-    ! for this. No need to recompute spatial terms.
-    call master(.false.)
-
-  contains
-
-    subroutine setBlock(blk)
-      ! Sets a block at irow, icol, if useTranspose is False
-      ! Sets a block at icol, irow with transpose of blk if useTranspose is True
-
-      use genericISNAN, only : myisnan
-      implicit none
-      real(kind=realType), dimension(nState, nState) :: blk
-
-      ! local variables
-      integer(kind=intType) :: i, j, tmp, iRowSet, iColSet
-      logical :: zeroFlag
-      zeroFlag = .False.
+        deallocate (blk)
+
+        ! Complete the matrix assembly.
+        call MatAssemblyEnd(matrix, MAT_FINAL_ASSEMBLY, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (buildCoarseMats) then
+            do lvl = 2, agmgLevels
+                call MatAssemblyEnd(A(lvl), MAT_FINAL_ASSEMBLY, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+        end if
+
+        call MatSetOption(matrix, MAT_NEW_NONZERO_LOCATIONS, PETSC_FALSE, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! ================= Important ===================
+
+        ! We must run the residual computation to make sure that all
+        ! intermediate variables are up to date. We can just call master
+        ! for this. No need to recompute spatial terms.
+        call master(.false.)
+
+    contains
+
+        subroutine setBlock(blk)
+            ! Sets a block at irow, icol, if useTranspose is False
+            ! Sets a block at icol, irow with transpose of blk if useTranspose is True
+
+            use genericISNAN, only: myisnan
+            implicit none
+            real(kind=realType), dimension(nState, nState) :: blk
+
+            ! local variables
+            integer(kind=intType) :: i, j, tmp, iRowSet, iColSet
+            logical :: zeroFlag
+            zeroFlag = .False.
 
 #ifndef USE_COMPLEX
-      ! Check if the blk is all zeros
+            ! Check if the blk is all zeros
 
-      zeroFlag = .True.
-      do i = 1, nState
-         do j = 1, nState
-            if ( .not. blk(i,j) == zero) then
-               zeroFlag = .False.
+            zeroFlag = .True.
+            do i = 1, nState
+                do j = 1, nState
+                    if (.not. blk(i, j) == zero) then
+                        zeroFlag = .False.
+                    end if
+                end do
+            end do
+
+            ! Check if the blk has nan
+            if (myisnan(sum(blk))) then
+                print *, 'Bad Block:', blk
+                print *, 'irow:', irow
+                print *, 'icol', cols(1:ncol)
+                print *, 'nn:', nn
+                print *, 'ijk:', i, j, k
+                call EChk(1, __FILE__, __LINE__)
             end if
-         end do
-      end do
-
-      ! Check if the blk has nan
-      if (myisnan(sum(blk))) then
-         print *,'Bad Block:',blk
-         print *,'irow:',irow
-         print *,'icol',cols(1:ncol)
-         print *,'nn:',nn
-         print *,'ijk:',i,j,k
-         call EChk(1, __FILE__, __LINE__)
-      end if
 #endif
 
-      if (.not. zeroFlag) then
-         if (nCol == 1) then
-            if (useTranspose) then
-               blk = transpose(blk)
-               call MatSetValuesBlocked(matrix, 1, cols(1), 1, irow, blk, &
-                    ADD_VALUES, ierr)
-               call EChk(ierr, __FILE__, __LINE__)
-            else
-               call MatSetValuesBlocked(matrix, 1, irow, 1, cols(1), blk, &
-                    ADD_VALUES, ierr)
-               call EChk(ierr, __FILE__, __LINE__)
-            end if
-         else
-            if (useTranspose) then
-               blk = transpose(blk)
-               do m=1, ncol
-                  if (cols(m) >= 0) then
-                     call MatSetValuesBlocked(matrix, 1, cols(m), 1, irow, blk*weights(m), &
-                          ADD_VALUES, ierr)
-                     call EChk(ierr, __FILE__, __LINE__)
-                  end if
-               end do
-            else
-               do m=1, ncol
-                  if (cols(m) >= 0) then
-                     call MatSetValuesBlocked(matrix, 1, irow, 1, cols(m), blk*weights(m), &
-                          ADD_VALUES, ierr)
-                     call EChk(ierr, __FILE__, __LINE__)
-                  end if
-               end do
+            if (.not. zeroFlag) then
+                if (nCol == 1) then
+                    if (useTranspose) then
+                        blk = transpose(blk)
+                        call MatSetValuesBlocked(matrix, 1, cols(1), 1, irow, blk, &
+                                                 ADD_VALUES, ierr)
+                        call EChk(ierr, __FILE__, __LINE__)
+                    else
+                        call MatSetValuesBlocked(matrix, 1, irow, 1, cols(1), blk, &
+                                                 ADD_VALUES, ierr)
+                        call EChk(ierr, __FILE__, __LINE__)
+                    end if
+                else
+                    if (useTranspose) then
+                        blk = transpose(blk)
+                        do m = 1, ncol
+                            if (cols(m) >= 0) then
+                                call MatSetValuesBlocked(matrix, 1, cols(m), 1, irow, blk * weights(m), &
+                                                         ADD_VALUES, ierr)
+                                call EChk(ierr, __FILE__, __LINE__)
+                            end if
+                        end do
+                    else
+                        do m = 1, ncol
+                            if (cols(m) >= 0) then
+                                call MatSetValuesBlocked(matrix, 1, irow, 1, cols(m), blk * weights(m), &
+                                                         ADD_VALUES, ierr)
+                                call EChk(ierr, __FILE__, __LINE__)
+                            end if
+                        end do
+                    end if
+                end if
+
+                ! Extension for setting coarse grids:
+                if (buildCoarseMats) then
+                    if (nCol == 1) then
+                        do lvl = 2, agmgLevels
+                            if (useTranspose) then
+                                ! Loop over the coarser levels
+                                call MatSetValuesBlocked(A(lvl), 1, coarseCols(1, lvl), 1, coarseRows(lvl), &
+                                                         blk, ADD_VALUES, ierr)
+                            else
+                                call MatSetValuesBlocked(A(lvl), 1, coarseRows(lvl), 1, coarseCols(1, lvl), &
+                                                         blk, ADD_VALUES, ierr)
+                            end if
+                        end do
+                    else
+                        do m = 1, nCol
+                            do lvl = 2, agmgLevels
+                                if (coarseCols(m, lvl) >= 0) then
+                                    if (useTranspose) then
+                                        ! Loop over the coarser levels
+                                        call MatSetValuesBlocked(A(lvl), 1, coarseCols(m, lvl), 1, coarseRows(lvl), &
+                                                                 blk * weights(m), ADD_VALUES, ierr)
+                                    else
+                                        call MatSetValuesBlocked(A(lvl), 1, coarseRows(lvl), 1, coarseCols(m, lvl), &
+                                                                 blk * weights(m), ADD_VALUES, ierr)
+                                    end if
+                                end if
+                            end do
+                        end do
+                    end if
+                end if
             end if
-         end if
-
-         ! Extension for setting coarse grids:
-         if (buildCoarseMats) then
-            if (nCol == 1) then
-               do lvl=2, agmgLevels
-                  if (useTranspose) then
-                     ! Loop over the coarser levels
-                     call MatSetValuesBlocked(A(lvl), 1, coarseCols(1, lvl), 1, coarseRows(lvl), &
-                          blk, ADD_VALUES, ierr)
-                  else
-                     call MatSetValuesBlocked(A(lvl), 1, coarseRows(lvl), 1, coarseCols(1, lvl), &
-                          blk, ADD_VALUES, ierr)
-                  end if
-               end do
-            else
-               do m=1, nCol
-                  do lvl=2, agmgLevels
-                     if (coarseCols(m, lvl) >= 0) then
-                        if (useTranspose) then
-                           ! Loop over the coarser levels
-                           call MatSetValuesBlocked(A(lvl), 1, coarseCols(m, lvl), 1, coarseRows(lvl), &
-                                blk*weights(m), ADD_VALUES, ierr)
-                        else
-                           call MatSetValuesBlocked(A(lvl), 1, coarseRows(lvl), 1, coarseCols(m, lvl), &
-                                blk*weights(m), ADD_VALUES, ierr)
+        end subroutine setBlock
+    end subroutine setupStateResidualMatrix
+
+    subroutine allocDerivativeValues(level)
+
+        use constants
+        use block, only: flowDoms, flowDomsd
+        use blockPointers, only: nDom, il, jl, kl, ie, je, ke, ib, jb, kb, BCData, &
+                                 nBOcos, nViscBocos
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use flowvarrefstate, only: winf, winfd, nw, nt1, nt2
+        use inputDiscretization, only: useApproxWallDistance
+        use inputPhysics, only: wallDistanceNeeded
+        use communication, only: adflow_comm_world
+        use wallDistanceData, only: xSurfVec, xSurfVecd!, PETSC_DETERMINE
+        use BCPointers_b
+        use adjointVars, only: derivVarsAllocated
+        use utils, only: EChk, setPointers, getDirAngle
+        use cgnsGrid, only: cgnsDoms, cgnsDomsd, cgnsNDom
+        implicit none
+
+        ! Input parameters
+        integer(kind=intType) :: level
+
+        ! Local variables
+        integer(kind=intType) :: sps, ierr, i, j, k, l, mm, nn
+        integer(kind=intType) :: iBeg, jBeg, iStop, jStop, isizemax, jsizemax
+        integer(kind=intType) :: inBeg, jnBeg, inStop, jnStop
+        integer(kind=intType) :: massShape(2), max_face_size
+        integer(kind=intType) :: iBoco, nDataset, iData, nDirichlet, iDirichlet, nArray
+        ! First create the derivative flowdoms structure flowDomsd. Note we
+        ! only allocate information for the finest grid.
+
+        allocate (flowDomsd(nDom, 1, nTimeIntervalsSpectral), stat=ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! winfd hasn't be allocated so we'll do it here
+        allocate (winfd(size(winf)))
+
+        ! If we are not using RANS with walDistance create a dummy xSurfVec
+        ! since one does not yet exist
+        if (.not. wallDistanceNeeded .or. .not. useApproxWallDistance) then
+            do sps = 1, nTimeIntervalsSpectral
+                call VecCreateMPI(ADFLOW_COMM_WORLD, 1, PETSC_DETERMINE, xSurfVec(1, sps), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+        end if
+
+        ! Duplicate the PETSc Xsurf Vec, but only on the first level:
+        allocate (xSurfVecd(nTimeIntervalsSpectral))
+        do sps = 1, nTimeIntervalsSpectral
+            call VecDuplicate(xSurfVec(1, sps), xSurfVecd(sps), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
+
+                ! Allocate d2wall if not already done so
+                if (.not. associated(flowDoms(nn, 1, sps)%d2wall)) then
+                    allocate (flowDoms(nn, 1, sps)%d2wall(2:il, 2:jl, 2:kl))
+                    call EChk(ierr, __FILE__, __LINE__)
+                end if
+
+                ! Now allocate all valus that have a differentiable
+                ! dependence.
+                allocate ( &
+                    flowDomsd(nn, level, sps)%x(0:ie, 0:je, 0:ke, 3), &
+                    flowDomsd(nn, level, sps)%vol(0:ib, 0:jb, 0:kb), &
+                    flowDomsd(nn, level, sps)%si(0:ie, 1:je, 1:ke, 3), &
+                    flowDomsd(nn, level, sps)%sj(1:ie, 0:je, 1:ke, 3), &
+                    flowDomsd(nn, level, sps)%sk(1:ie, 1:je, 0:ke, 3), &
+                    flowDomsd(nn, level, sps)%rotMatrixI(il, 2:jl, 2:kl, 3, 3), &
+                    flowDomsd(nn, level, sps)%rotMatrixJ(2:il, jl, 2:kl, 3, 3), &
+                    flowDomsd(nn, level, sps)%rotMatrixK(2:il, 2:jl, kl, 3, 3), &
+                    flowDomsd(nn, level, sps)%s(ie, je, ke, 3), &
+                    flowDomsd(nn, level, sps)%sFaceI(0:ie, je, ke), &
+                    flowDomsd(nn, level, sps)%sFaceJ(ie, 0:je, ke), &
+                    flowDomsd(nn, level, sps)%sFaceK(ie, je, 0:ke), &
+                    flowDomsd(nn, level, sps)%w(0:ib, 0:jb, 0:kb, 1:nw), &
+                    flowDomsd(nn, level, sps)%dw(0:ib, 0:jb, 0:kb, 1:nw), &
+                    flowDomsd(nn, level, sps)%fw(0:ib, 0:jb, 0:kb, 1:nw), &
+                    flowDomsd(nn, level, sps)%scratch(0:ib, 0:jb, 0:kb, 5), &
+                    flowDomsd(nn, level, sps)%p(0:ib, 0:jb, 0:kb), &
+                    flowDomsd(nn, level, sps)%gamma(0:ib, 0:jb, 0:kb), &
+                    flowDomsd(nn, level, sps)%aa(0:ib, 0:jb, 0:kb), &
+                    flowDomsd(nn, level, sps)%ux(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%uy(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%uz(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%vx(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%vy(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%vz(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%wx(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%wy(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%wz(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%qx(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%qy(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%qz(il, jl, kl), &
+                    flowDomsd(nn, level, sps)%rlv(0:ib, 0:jb, 0:kb), &
+                    flowDomsd(nn, level, sps)%rev(0:ib, 0:jb, 0:kb), &
+                    flowDomsd(nn, level, sps)%dtl(1:ie, 1:je, 1:ke), &
+                    flowDomsd(nn, level, sps)%radI(1:ie, 1:je, 1:ke), &
+                    flowDomsd(nn, level, sps)%radJ(1:ie, 1:je, 1:ke), &
+                    flowDomsd(nn, level, sps)%radK(1:ie, 1:je, 1:ke), &
+                    flowDomsd(nn, level, sps)%BCData(nBocos), &
+                    flowDomsd(nn, level, sps)%bmti1(je, ke, nt1:nt2, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%bmti2(je, ke, nt1:nt2, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%bmtj1(ie, ke, nt1:nt2, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%bmtj2(ie, ke, nt1:nt2, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%bmtk1(ie, je, nt1:nt2, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%bmtk2(ie, je, nt1:nt2, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%bvti1(je, ke, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%bvti2(je, ke, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%bvtj1(ie, ke, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%bvtj2(ie, ke, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%bvtk1(ie, je, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%bvtk2(ie, je, nt1:nt2), &
+                    flowDomsd(nn, level, sps)%d2Wall(2:il, 2:jl, 2:kl), &
+                    stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                allocate (flowDomsd(nn, level, sps)%viscSubface(nviscBocos), &
+                          stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Set the number of bocos/viscbocs
+                flowdomsd(nn, level, sps)%nBocos = flowdoms(nn, level, sps)%nbocos
+                flowDomsd(nn, level, sps)%nViscBocos = flowDoms(nn, level, sps)%nViscBocos
+                bocoLoop: do mm = 1, nBocos
+
+                    ! Store the cell range of the boundary subface
+                    ! a bit easier.
+
+                    iBeg = BCData(mm)%icbeg; iStop = BCData(mm)%icend
+                    jBeg = BCData(mm)%jcbeg; jStop = BCData(mm)%jcend
+
+                    inBeg = BCData(mm)%inBeg; inStop = BCData(mm)%inEnd
+                    jnBeg = BCdata(mm)%jnBeg; jnStop = BCData(mm)%jnEnd
+                    allocate ( &
+                        flowDomsd(nn, level, sps)%BCData(mm)%norm(iBeg:iStop, jBeg:jStop, 3), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%rface(iBeg:iStop, jBeg:jStop), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%Fp(inBeg + 1:inStop, jnBeg + 1:jnStop, 3), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%Fv(inBeg + 1:inStop, jnBeg + 1:jnStop, 3), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%Tp(inBeg:inStop, jnBeg:jnStop, 3), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%Tv(inBeg:inStop, jnBeg:jnStop, 3), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%F(inBeg:inStop, jnBeg:jnStop, 3), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%T(inBeg:inStop, jnBeg:jnStop, 3), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%area(inBeg + 1:inStop, jnBeg + 1:jnStop), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%uSlip(iBeg:iStop, jBeg:jStop, 3), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall(iBeg:iStop, jBeg:jStop), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%ptInlet(iBeg:iStop, jBeg:jStop), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%htInlet(iBeg:iStop, jBeg:jStop), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%ttInlet(iBeg:iStop, jBeg:jStop), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%turbInlet(iBeg:iStop, jBeg:jStop, nt1:nt2), &
+                        flowDomsd(nn, level, sps)%BCData(mm)%ps(iBeg:iStop, jBeg:jStop), stat=ierr)
+
+                    call EChk(ierr, __FILE__, __LINE__)
+                end do bocoLoop
+
+                viscbocoLoop: do mm = 1, nviscBocos
+
+                    iBeg = BCData(mm)%inBeg + 1; iStop = BCData(mm)%inEnd
+                    jBeg = BCData(mm)%jnBeg + 1; jStop = BCData(mm)%jnEnd
+
+                    allocate ( &
+                        flowDomsd(nn, level, sps)%viscSubface(mm)%tau(iBeg:iStop, jBeg:jStop, 6), &
+                        flowDomsd(nn, level, sps)%viscSubface(mm)%q(iBeg:iStop, jBeg:jStop, 6), &
+                        stat=ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end do viscbocoLoop
+            end do
+        end do
+
+        ! Allocate the derivatives values for the CGNS data structure used
+        ! to store boundary condition values
+        do nn = 1, cgnsNDom
+            nBocos = cgnsDoms(nn)%nBocos
+            allocate (cgnsDomsd(nn)%bocoInfo(nBocos))
+            do iBoco = 1, nBocos
+                if (associated(cgnsDoms(nn)%bocoInfo(iBoco)%dataSet)) then
+                    nDataSet = size(cgnsDoms(nn)%bocoInfo(iBoco)%dataSet)
+                    allocate (cgnsDomsd(nn)%bocoInfo(iBoco)%dataSet(nDataSet))
+
+                    do iData = 1, nDataSet
+                        if (associated(cgnsDoms(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)) then
+                            nDirichlet = size(cgnsDoms(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)
+                            allocate (cgnsDomsd(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(nDirichlet))
+
+                            do iDirichlet = 1, nDirichlet
+                          nArray = size(cgnsDoms(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(iDirichlet)%dataArr)
+                     allocate (cgnsDomsd(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(iDirichlet)%dataArr(nArray))
+                         cgnsDomsd(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(iDirichlet)%dataArr(nArray) = zero
+                            end do
                         end if
-                     end if
-                  end do
-               end do
-            end if
-         end if
-      end if
-    end subroutine setBlock
-  end subroutine setupStateResidualMatrix
-
-  subroutine allocDerivativeValues(level)
-
-    use constants
-    use block, only : flowDoms, flowDomsd
-    use blockPointers, only : nDom, il, jl, kl, ie, je, ke, ib, jb, kb, BCData, &
-         nBOcos, nViscBocos
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use flowvarrefstate, only : winf, winfd, nw, nt1, nt2
-    use inputDiscretization, only : useApproxWallDistance
-    use inputPhysics, only : wallDistanceNeeded
-    use communication, only : adflow_comm_world
-    use wallDistanceData, only : xSurfVec, xSurfVecd!, PETSC_DETERMINE
-    use BCPointers_b
-    use adjointVars, only : derivVarsAllocated
-    use utils, only : EChk, setPointers, getDirAngle
-    use cgnsGrid, only : cgnsDoms, cgnsDomsd, cgnsNDom
-    implicit none
-
-    ! Input parameters
-    integer(kind=intType) :: level
-
-    ! Local variables
-    integer(kind=intType) :: sps,ierr,i,j,k,l, mm, nn
-    integer(kind=intType) :: iBeg, jBeg, iStop, jStop, isizemax, jsizemax
-    integer(kind=intType) :: inBeg, jnBeg, inStop, jnStop
-    integer(kind=intType) :: massShape(2), max_face_size
-    integer(kind=intType) :: iBoco, nDataset, iData, nDirichlet, iDirichlet, nArray
-    ! First create the derivative flowdoms structure flowDomsd. Note we
-    ! only allocate information for the finest grid.
-
-    allocate(flowDomsd(nDom, 1, nTimeIntervalsSpectral), stat=ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! winfd hasn't be allocated so we'll do it here
-    allocate(winfd(size(winf)))
-
-    ! If we are not using RANS with walDistance create a dummy xSurfVec
-    ! since one does not yet exist
-    if (.not. wallDistanceNeeded .or. .not. useApproxWallDistance) then
-       do sps=1, nTimeIntervalsSpectral
-          call VecCreateMPI(ADFLOW_COMM_WORLD, 1, PETSC_DETERMINE, xSurfVec(1, sps), ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-       end do
-    end if
-
-    ! Duplicate the PETSc Xsurf Vec, but only on the first level:
-    allocate(xSurfVecd(nTimeIntervalsSpectral))
-    do sps=1, nTimeIntervalsSpectral
-       call VecDuplicate(xSurfVec(1, sps), xSurfVecd(sps), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    end do
-
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
-
-          ! Allocate d2wall if not already done so
-          if (.not. associated(flowDoms(nn, 1, sps)%d2wall)) then
-             allocate(flowDoms(nn, 1, sps)%d2wall(2:il, 2:jl, 2:kl))
-             call EChk(ierr,__FILE__,__LINE__)
-          end if
-
-          ! Now allocate all valus that have a differentiable
-          ! dependence.
-          allocate(&
-               flowDomsd(nn, level, sps)%x(0:ie,0:je,0:ke,3), &
-               flowDomsd(nn, level, sps)%vol(0:ib,0:jb,0:kb), &
-               flowDomsd(nn, level, sps)%si(0:ie,1:je,1:ke,3), &
-               flowDomsd(nn, level, sps)%sj(1:ie,0:je,1:ke,3), &
-               flowDomsd(nn, level, sps)%sk(1:ie,1:je,0:ke,3), &
-               flowDomsd(nn, level, sps)%rotMatrixI(il,2:jl,2:kl,3,3), &
-               flowDomsd(nn, level, sps)%rotMatrixJ(2:il,jl,2:kl,3,3), &
-               flowDomsd(nn, level, sps)%rotMatrixK(2:il,2:jl,kl,3,3), &
-               flowDomsd(nn, level, sps)%s(ie,je,ke,3),      &
-               flowDomsd(nn, level, sps)%sFaceI(0:ie,je,ke), &
-               flowDomsd(nn, level, sps)%sFaceJ(ie,0:je,ke), &
-               flowDomsd(nn, level, sps)%sFaceK(ie,je,0:ke), &
-               flowDomsd(nn, level, sps)%w (0:ib,0:jb,0:kb,1:nw), &
-               flowDomsd(nn, level, sps)%dw(0:ib,0:jb,0:kb,1:nw), &
-               flowDomsd(nn, level, sps)%fw(0:ib,0:jb,0:kb,1:nw), &
-               flowDomsd(nn, level, sps)%scratch(0:ib,0:jb,0:kb,5), &
-               flowDomsd(nn, level, sps)%p(0:ib,0:jb,0:kb), &
-               flowDomsd(nn, level, sps)%gamma(0:ib,0:jb,0:kb),  &
-               flowDomsd(nn, level, sps)%aa(0:ib,0:jb,0:kb), &
-               flowDomsd(nn, level, sps)%ux(il,jl,kl), &
-               flowDomsd(nn, level, sps)%uy(il,jl,kl), &
-               flowDomsd(nn, level, sps)%uz(il,jl,kl), &
-               flowDomsd(nn, level, sps)%vx(il,jl,kl), &
-               flowDomsd(nn, level, sps)%vy(il,jl,kl), &
-               flowDomsd(nn, level, sps)%vz(il,jl,kl), &
-               flowDomsd(nn, level, sps)%wx(il,jl,kl), &
-               flowDomsd(nn, level, sps)%wy(il,jl,kl), &
-               flowDomsd(nn, level, sps)%wz(il,jl,kl), &
-               flowDomsd(nn, level, sps)%qx(il,jl,kl), &
-               flowDomsd(nn, level, sps)%qy(il,jl,kl), &
-               flowDomsd(nn, level, sps)%qz(il,jl,kl), &
-               flowDomsd(nn, level, sps)%rlv(0:ib,0:jb,0:kb), &
-               flowDomsd(nn, level, sps)%rev(0:ib,0:jb,0:kb), &
-               flowDomsd(nn, level, sps)%dtl(1:ie,1:je,1:ke), &
-               flowDomsd(nn, level, sps)%radI(1:ie,1:je,1:ke), &
-               flowDomsd(nn, level, sps)%radJ(1:ie,1:je,1:ke), &
-               flowDomsd(nn, level, sps)%radK(1:ie,1:je,1:ke), &
-               flowDomsd(nn, level, sps)%BCData(nBocos), &
-               flowDomsd(nn, level, sps)%bmti1(je,ke,nt1:nt2,nt1:nt2), &
-               flowDomsd(nn, level, sps)%bmti2(je,ke,nt1:nt2,nt1:nt2), &
-               flowDomsd(nn, level, sps)%bmtj1(ie,ke,nt1:nt2,nt1:nt2), &
-               flowDomsd(nn, level, sps)%bmtj2(ie,ke,nt1:nt2,nt1:nt2), &
-               flowDomsd(nn, level, sps)%bmtk1(ie,je,nt1:nt2,nt1:nt2), &
-               flowDomsd(nn, level, sps)%bmtk2(ie,je,nt1:nt2,nt1:nt2), &
-               flowDomsd(nn, level, sps)%bvti1(je,ke,nt1:nt2), &
-               flowDomsd(nn, level, sps)%bvti2(je,ke,nt1:nt2), &
-               flowDomsd(nn, level, sps)%bvtj1(ie,ke,nt1:nt2), &
-               flowDomsd(nn, level, sps)%bvtj2(ie,ke,nt1:nt2), &
-               flowDomsd(nn, level, sps)%bvtk1(ie,je,nt1:nt2), &
-               flowDomsd(nn, level, sps)%bvtk2(ie,je,nt1:nt2), &
-               flowDomsd(nn, level, sps)%d2Wall(2:il,2:jl,2:kl), &
-               stat=ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          allocate(flowDomsd(nn, level, sps)%viscSubface(nviscBocos), &
-               stat=ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Set the number of bocos/viscbocs
-          flowdomsd(nn, level, sps)%nBocos = flowdoms(nn, level, sps)%nbocos
-          flowDomsd(nn, level, sps)%nViscBocos = flowDoms(nn, level, sps)%nViscBocos
-          bocoLoop: do mm=1,nBocos
-
-             ! Store the cell range of the boundary subface
-             ! a bit easier.
-
-             iBeg = BCData(mm)%icbeg; iStop = BCData(mm)%icend
-             jBeg = BCData(mm)%jcbeg; jStop = BCData(mm)%jcend
-
-             inBeg = BCData(mm)%inBeg; inStop = BCData(mm)%inEnd
-             jnBeg = BCdata(mm)%jnBeg; jnStop = BCData(mm)%jnEnd
-             allocate(&
-                  flowDomsd(nn, level, sps)%BCData(mm)%norm(iBeg:iStop,jBeg:jStop,3), &
-                  flowDomsd(nn, level, sps)%BCData(mm)%rface(iBeg:iStop,jBeg:jStop), &
-                  flowDomsd(nn, level, sps)%BCData(mm)%Fp(inBeg+1:inStop, jnBeg+1:jnStop, 3),&
-                  flowDomsd(nn, level, sps)%BCData(mm)%Fv(inBeg+1:inStop, jnBeg+1:jnStop, 3),&
-                  flowDomsd(nn, level, sps)%BCData(mm)%Tp(inBeg:inStop, jnBeg:jnStop, 3),&
-                  flowDomsd(nn, level, sps)%BCData(mm)%Tv(inBeg:inStop, jnBeg:jnStop, 3),&
-                  flowDomsd(nn, level, sps)%BCData(mm)%F(inBeg:inStop, jnBeg:jnStop, 3),&
-                  flowDomsd(nn, level, sps)%BCData(mm)%T(inBeg:inStop, jnBeg:jnStop, 3),&
-                  flowDomsd(nn, level, sps)%BCData(mm)%area(inBeg+1:inStop, jnBeg+1:jnStop), &
-                  flowDomsd(nn, level, sps)%BCData(mm)%uSlip(iBeg:iStop,jBeg:jStop,3), &
-                  flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall(iBeg:iStop,jBeg:jStop), &
-                  flowDomsd(nn, level, sps)%BCData(mm)%ptInlet(iBeg:iStop,jBeg:jStop), &
-                  flowDomsd(nn, level, sps)%BCData(mm)%htInlet(iBeg:iStop,jBeg:jStop), &
-                  flowDomsd(nn, level, sps)%BCData(mm)%ttInlet(iBeg:iStop,jBeg:jStop), &
-                  flowDomsd(nn, level, sps)%BCData(mm)%turbInlet(iBeg:iStop,jBeg:jStop,nt1:nt2), &
-                  flowDomsd(nn, level, sps)%BCData(mm)%ps(iBeg:iStop,jBeg:jStop), stat=ierr)
-
-             call EChk(ierr,__FILE__,__LINE__)
-          end do bocoLoop
-
-          viscbocoLoop: do mm=1,nviscBocos
-
-             iBeg = BCData(mm)%inBeg + 1; iStop = BCData(mm)%inEnd
-             jBeg = BCData(mm)%jnBeg + 1; jStop = BCData(mm)%jnEnd
-
-             allocate(&
-                  flowDomsd(nn, level, sps)%viscSubface(mm)%tau(iBeg:iStop,jBeg:jStop,6), &
-                  flowDomsd(nn, level, sps)%viscSubface(mm)%q(iBeg:iStop,jBeg:jStop,6), &
-                  stat=ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-          enddo viscbocoLoop
-       end do
-    end do
-
-    ! Allocate the derivatives values for the CGNS data structure used
-    ! to store boundary condition values
-    do nn=1, cgnsNDom
-       nBocos = cgnsDoms(nn)%nBocos
-       allocate(cgnsDomsd(nn)%bocoInfo(nBocos))
-       do iBoco = 1,nBocos
-          if (associated(cgnsDoms(nn)%bocoInfo(iBoco)%dataSet)) then
-             nDataSet = size(cgnsDoms(nn)%bocoInfo(iBoco)%dataSet)
-             allocate(cgnsDomsd(nn)%bocoInfo(iBoco)%dataSet(nDataSet))
-
-             do iData=1, nDataSet
-                if (associated(cgnsDoms(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)) then
-                   nDirichlet = size(cgnsDoms(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)
-                   allocate(cgnsDomsd(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(nDirichlet))
-
-                   do iDirichlet = 1, nDirichlet
-                      nArray = size(cgnsDoms(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(iDirichlet)%dataArr)
-                      allocate(cgnsDomsd(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(iDirichlet)%dataArr(nArray))
-                      cgnsDomsd(nn)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(iDirichlet)%dataArr(nArray) = zero
-                   end do
+                    end do
                 end if
-             end do
-          end if
-       end do
-    end do
-
-    derivVarsAllocated = .True.
-  end subroutine allocDerivativeValues
-
-  subroutine zeroADSeeds(nn, level, sps)
-
-    use constants
-    use block, only : flowDomsd, flowDoms
-    use blockPointers
-    use inputTimeSpectral
-    use flowVarRefState
-    use inputPhysics
-    use BCPointers_b
-    use communication
-    use oversetData, only : oversetPresent
-    use cgnsGrid, only : cgnsDoms, cgnsDomsd, cgnsNDom
-    use actuatorRegionData, only : nActuatorRegions, actuatorRegionsd
-    implicit none
-
-    ! Input parameters
-    integer(kind=intType) :: nn, level, sps
-
-    ! Working parameters
-    integer(kind=intType) :: mm, i, iDom
-    integer(kind=intType) :: iBoco, iData, iDirichlet
-    flowDomsd(nn, level, sps)%d2wall = zero
-    flowDomsd(nn, level, sps)%x = zero
-    flowDomsd(nn, level, sps)%si = zero
-    flowDomsd(nn, level, sps)%sj = zero
-    flowDomsd(nn, level, sps)%sk = zero
-    flowDomsd(nn, level, sps)%vol = zero
-
-    flowDomsd(nn, level, sps)%s = zero
-    flowDomsd(nn, level, sps)%sFaceI = zero
-    flowDomsd(nn, level, sps)%sFaceJ = zero
-    flowDomsd(nn, level, sps)%sFaceK = zero
-
-    flowDomsd(nn, level, sps)%w = zero
-    flowDomsd(nn, level, sps)%dw = zero
-    flowDomsd(nn, level, sps)%fw = zero
-    flowDomsd(nn, level, sps)%scratch = zero
-
-    flowDomsd(nn, level, sps)%p = zero
-    flowDomsd(nn, level, sps)%gamma = zero
-    flowDomsd(nn, level, sps)%aa = zero
-
-    flowDomsd(nn, level, sps)%rlv = zero
-    flowDomsd(nn, level, sps)%rev = zero
-
-    flowDomsd(nn, level, sps)%dtl = zero
-
-    flowDomsd(nn, level, sps)%radI = zero
-    flowDomsd(nn, level, sps)%radJ = zero
-    flowDomsd(nn, level, sps)%radK = zero
-
-    flowDomsd(nn, level, sps)%ux = zero
-    flowDomsd(nn, level, sps)%uy = zero
-    flowDomsd(nn, level, sps)%uz = zero
-    flowDomsd(nn, level, sps)%vx = zero
-    flowDomsd(nn, level, sps)%vy = zero
-    flowDomsd(nn, level, sps)%vz = zero
-    flowDomsd(nn, level, sps)%wx = zero
-    flowDomsd(nn, level, sps)%wy = zero
-    flowDomsd(nn, level, sps)%wz = zero
-    flowDomsd(nn, level, sps)%qx = zero
-    flowDomsd(nn, level, sps)%qy = zero
-    flowDomsd(nn, level, sps)%qz = zero
-
-    flowDomsd(nn, level, sps)%bmti1 = zero
-    flowDomsd(nn, level, sps)%bmti2 = zero
-    flowDomsd(nn, level, sps)%bmtj1 = zero
-    flowDomsd(nn, level, sps)%bmtj2 = zero
-    flowDomsd(nn, level, sps)%bmtk1 = zero
-    flowDomsd(nn, level, sps)%bmtk2 = zero
-    flowDomsd(nn, level, sps)%bvti1 = zero
-    flowDomsd(nn, level, sps)%bvti2 = zero
-    flowDomsd(nn, level, sps)%bvtj1 = zero
-    flowDomsd(nn, level, sps)%bvtj2 = zero
-    flowDomsd(nn, level, sps)%bvtk1 = zero
-    flowDomsd(nn, level, sps)%bvtk2 = zero
-
-    bocoLoop: do mm=1, flowDoms(nn, level, sps)%nBocos
-       flowDomsd(nn, level, sps)%BCData(mm)%norm= zero
-       flowDomsd(nn, level, sps)%bcData(mm)%rface = zero
-       flowDomsd(nn, level, sps)%bcData(mm)%Fv = zero
-       flowDomsd(nn, level, sps)%bcData(mm)%Fp = zero
-       flowDomsd(nn, level, sps)%bcData(mm)%Tv = zero
-       flowDomsd(nn, level, sps)%bcData(mm)%Tp = zero
-       flowDomsd(nn, level, sps)%bcData(mm)%area = zero
-       flowDomsd(nn, level, sps)%BCData(mm)%uSlip = zero
-       flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall = zero
-       flowDomsd(nn, level, sps)%BCData(mm)%ptInlet = zero
-       flowDomsd(nn, level, sps)%BCData(mm)%htInlet = zero
-       flowDomsd(nn, level, sps)%BCData(mm)%ttInlet = zero
-       flowDomsd(nn, level, sps)%BCData(mm)%turbInlet = zero
-       flowDomsd(nn, level, sps)%BCData(mm)%ps = zero
-    end do bocoLoop
-
-
-    viscbocoLoop: do mm=1,flowDoms(nn, level, sps)%nViscBocos
-       flowDomsd(nn, level, sps)%viscSubface(mm)%tau = zero
-       flowDomsd(nn, level, sps)%viscSubface(mm)%q = zero
-    end do viscbocoLoop
-
-    ! For overset, the weights may be active in the comm structure. We
-    ! need to zero them before we can accumulate.
-    if (oversetPresent) then
-       ! Pointers to the overset comms to make it easier to read
-       sends: do i=1,commPatternOverset(level, sps)%nProcSend
-          commPatternOverset(level, sps)%sendList(i)%interpd = zero
-       end do sends
-       internalOverset(level, sps)%donorInterpd = zero
-    end if
-
-    alphad = zero
-    betad = zero
-    machd = zero
-    machGridd = zero
-    machCoefd = zero
-    pinfdimd = zero
-    tinfdimd = zero
-    rhoinfdimd = zero
-    rgasdimd = zero
-    pointrefd = zero
-    prefd = zero
-    rhoRefd = zero
-    Trefd = zero
-    murefd = zero
-    urefd = zero
-    hrefd = zero
-    timerefd = zero
-    pinfd = zero
-    pinfCorrd = zero
-    rhoinfd = zero
-    uinfd = zero
-    rgasd = zero
-    muinfd = zero
-    gammainfd = zero
-    winfd = zero
-    veldirfreestreamd = zero
-    liftdirectiond = zero
-    dragdirectiond = zero
-
-    ! Zero all the reverse seeds in the dirichlet input arrays
-    do iDom=1, cgnsNDom
-       do iBoco=1, cgnsDoms(iDom)%nBocos
-          if (associated(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet)) then
-             do iData=1, size(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet)
-                if (associated(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)) then
-                   do iDirichlet = 1, size(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)
-                      cgnsDomsd(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(iDirichlet)%dataArr(:) = zero
-                   end do
+            end do
+        end do
+
+        derivVarsAllocated = .True.
+    end subroutine allocDerivativeValues
+
+    subroutine zeroADSeeds(nn, level, sps)
+
+        use constants
+        use block, only: flowDomsd, flowDoms
+        use blockPointers
+        use inputTimeSpectral
+        use flowVarRefState
+        use inputPhysics
+        use BCPointers_b
+        use communication
+        use oversetData, only: oversetPresent
+        use cgnsGrid, only: cgnsDoms, cgnsDomsd, cgnsNDom
+        use actuatorRegionData, only: nActuatorRegions, actuatorRegionsd
+        implicit none
+
+        ! Input parameters
+        integer(kind=intType) :: nn, level, sps
+
+        ! Working parameters
+        integer(kind=intType) :: mm, i, iDom
+        integer(kind=intType) :: iBoco, iData, iDirichlet
+        flowDomsd(nn, level, sps)%d2wall = zero
+        flowDomsd(nn, level, sps)%x = zero
+        flowDomsd(nn, level, sps)%si = zero
+        flowDomsd(nn, level, sps)%sj = zero
+        flowDomsd(nn, level, sps)%sk = zero
+        flowDomsd(nn, level, sps)%vol = zero
+
+        flowDomsd(nn, level, sps)%s = zero
+        flowDomsd(nn, level, sps)%sFaceI = zero
+        flowDomsd(nn, level, sps)%sFaceJ = zero
+        flowDomsd(nn, level, sps)%sFaceK = zero
+
+        flowDomsd(nn, level, sps)%w = zero
+        flowDomsd(nn, level, sps)%dw = zero
+        flowDomsd(nn, level, sps)%fw = zero
+        flowDomsd(nn, level, sps)%scratch = zero
+
+        flowDomsd(nn, level, sps)%p = zero
+        flowDomsd(nn, level, sps)%gamma = zero
+        flowDomsd(nn, level, sps)%aa = zero
+
+        flowDomsd(nn, level, sps)%rlv = zero
+        flowDomsd(nn, level, sps)%rev = zero
+
+        flowDomsd(nn, level, sps)%dtl = zero
+
+        flowDomsd(nn, level, sps)%radI = zero
+        flowDomsd(nn, level, sps)%radJ = zero
+        flowDomsd(nn, level, sps)%radK = zero
+
+        flowDomsd(nn, level, sps)%ux = zero
+        flowDomsd(nn, level, sps)%uy = zero
+        flowDomsd(nn, level, sps)%uz = zero
+        flowDomsd(nn, level, sps)%vx = zero
+        flowDomsd(nn, level, sps)%vy = zero
+        flowDomsd(nn, level, sps)%vz = zero
+        flowDomsd(nn, level, sps)%wx = zero
+        flowDomsd(nn, level, sps)%wy = zero
+        flowDomsd(nn, level, sps)%wz = zero
+        flowDomsd(nn, level, sps)%qx = zero
+        flowDomsd(nn, level, sps)%qy = zero
+        flowDomsd(nn, level, sps)%qz = zero
+
+        flowDomsd(nn, level, sps)%bmti1 = zero
+        flowDomsd(nn, level, sps)%bmti2 = zero
+        flowDomsd(nn, level, sps)%bmtj1 = zero
+        flowDomsd(nn, level, sps)%bmtj2 = zero
+        flowDomsd(nn, level, sps)%bmtk1 = zero
+        flowDomsd(nn, level, sps)%bmtk2 = zero
+        flowDomsd(nn, level, sps)%bvti1 = zero
+        flowDomsd(nn, level, sps)%bvti2 = zero
+        flowDomsd(nn, level, sps)%bvtj1 = zero
+        flowDomsd(nn, level, sps)%bvtj2 = zero
+        flowDomsd(nn, level, sps)%bvtk1 = zero
+        flowDomsd(nn, level, sps)%bvtk2 = zero
+
+        bocoLoop: do mm = 1, flowDoms(nn, level, sps)%nBocos
+            flowDomsd(nn, level, sps)%BCData(mm)%norm = zero
+            flowDomsd(nn, level, sps)%bcData(mm)%rface = zero
+            flowDomsd(nn, level, sps)%bcData(mm)%Fv = zero
+            flowDomsd(nn, level, sps)%bcData(mm)%Fp = zero
+            flowDomsd(nn, level, sps)%bcData(mm)%Tv = zero
+            flowDomsd(nn, level, sps)%bcData(mm)%Tp = zero
+            flowDomsd(nn, level, sps)%bcData(mm)%area = zero
+            flowDomsd(nn, level, sps)%BCData(mm)%uSlip = zero
+            flowDomsd(nn, level, sps)%BCData(mm)%TNS_Wall = zero
+            flowDomsd(nn, level, sps)%BCData(mm)%ptInlet = zero
+            flowDomsd(nn, level, sps)%BCData(mm)%htInlet = zero
+            flowDomsd(nn, level, sps)%BCData(mm)%ttInlet = zero
+            flowDomsd(nn, level, sps)%BCData(mm)%turbInlet = zero
+            flowDomsd(nn, level, sps)%BCData(mm)%ps = zero
+        end do bocoLoop
+
+        viscbocoLoop: do mm = 1, flowDoms(nn, level, sps)%nViscBocos
+            flowDomsd(nn, level, sps)%viscSubface(mm)%tau = zero
+            flowDomsd(nn, level, sps)%viscSubface(mm)%q = zero
+        end do viscbocoLoop
+
+        ! For overset, the weights may be active in the comm structure. We
+        ! need to zero them before we can accumulate.
+        if (oversetPresent) then
+            ! Pointers to the overset comms to make it easier to read
+            sends: do i = 1, commPatternOverset(level, sps)%nProcSend
+                commPatternOverset(level, sps)%sendList(i)%interpd = zero
+            end do sends
+            internalOverset(level, sps)%donorInterpd = zero
+        end if
+
+        alphad = zero
+        betad = zero
+        machd = zero
+        machGridd = zero
+        machCoefd = zero
+        pinfdimd = zero
+        tinfdimd = zero
+        rhoinfdimd = zero
+        rgasdimd = zero
+        pointrefd = zero
+        prefd = zero
+        rhoRefd = zero
+        Trefd = zero
+        murefd = zero
+        urefd = zero
+        hrefd = zero
+        timerefd = zero
+        pinfd = zero
+        pinfCorrd = zero
+        rhoinfd = zero
+        uinfd = zero
+        rgasd = zero
+        muinfd = zero
+        gammainfd = zero
+        winfd = zero
+        veldirfreestreamd = zero
+        liftdirectiond = zero
+        dragdirectiond = zero
+
+        ! Zero all the reverse seeds in the dirichlet input arrays
+        do iDom = 1, cgnsNDom
+            do iBoco = 1, cgnsDoms(iDom)%nBocos
+                if (associated(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet)) then
+                    do iData = 1, size(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet)
+                        if (associated(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)) then
+                            do iDirichlet = 1, size(cgnsDoms(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays)
+                            cgnsDomsd(iDom)%bocoInfo(iBoco)%dataSet(iData)%dirichletArrays(iDirichlet)%dataArr(:) = zero
+                            end do
+                        end if
+                    end do
                 end if
-             end do
-          end if
-       end do
-    end do
-
-    ! And the reverse seeds in the actuator zones
-    do i=1, nActuatorRegions
-       actuatorRegionsd(i)%force = zero
-       actuatorRegionsd(i)%torque = zero
-       actuatorRegionsd(i)%heat = zero
-    end do
-
-  end subroutine zeroADSeeds
-  ! This is a special function that is sued to dealloc derivative values
-  ! in blockpointers_d for use with the AD code.
-
-  ! This routine setups "coloring" stencils for various sizes
+            end do
+        end do
+
+        ! And the reverse seeds in the actuator zones
+        do i = 1, nActuatorRegions
+            actuatorRegionsd(i)%force = zero
+            actuatorRegionsd(i)%torque = zero
+            actuatorRegionsd(i)%heat = zero
+        end do
+
+    end subroutine zeroADSeeds
+    ! This is a special function that is sued to dealloc derivative values
+    ! in blockpointers_d for use with the AD code.
+
+    ! This routine setups "coloring" stencils for various sizes
+
+    subroutine setup_PC_coloring(nn, level, nColor)
+
+        use constants
+        use blockPointers, only: flowDoms, ib, jb, kb
+        use utils, only: setPointers
+        implicit none
+
+        ! Input parameters
+        integer(kind=intType), intent(in) :: nn, level
+
+        ! Output parameters
+        integer(kind=intTYpe), intent(out) :: nColor
+
+        ! Working
+        integer(kind=intType) :: i, j, k
+
+        call setPointers(nn, level, 1)
+        !DIR$ NOVECTOR
+        do k = 0, kb
+            do j = 0, jb
+                do i = 0, ib
+                    ! Add the extra one for 1-based numbering (as opposed to zero-based)
+                    flowDoms(nn, level, 1)%color(i, j, k) = &
+                        mod(i + 5 * j + 4 * k, 7) + 1
+                end do
+            end do
+        end do
+
+        nColor = 7
 
-  subroutine setup_PC_coloring(nn, level, nColor)
+    end subroutine setup_PC_coloring
 
-    use constants
-    use blockPointers, only : flowDoms, ib, jb, kb
-    use utils, only : setPointers
-    implicit none
+    subroutine setup_dRdw_euler_coloring(nn, level, nColor)
 
-    ! Input parameters
-    integer(kind=intType), intent(in) :: nn, level
-
-    ! Output parameters
-    integer(kind=intTYpe), intent(out) :: nColor
+        use constants
+        use blockPointers, only: flowDoms, ib, jb, kb
+        use utils, only: setPointers
+        implicit none
 
-    ! Working
-    integer(kind=intType) :: i, j, k
+        ! Input parameters
+        integer(kind=intType), intent(in) :: nn, level
 
-    call setPointers(nn, level, 1)
-    !DIR$ NOVECTOR
-    do k=0, kb
-       do j=0, jb
-          do i=0, ib
-             ! Add the extra one for 1-based numbering (as opposed to zero-based)
-             flowDoms(nn, level, 1)%color(i, j, k) = &
-                  mod(i + 5*j + 4*k, 7) + 1
-          end do
-       end do
-    end do
-
-    nColor = 7
-
-  end subroutine setup_PC_coloring
-
-  subroutine setup_dRdw_euler_coloring(nn, level, nColor)
-
-    use constants
-    use blockPointers, only : flowDoms, ib, jb, kb
-    use utils, only : setPointers
-    implicit none
+        ! Output parameters
+        integer(kind=intTYpe), intent(out) :: nColor
 
-    ! Input parameters
-    integer(kind=intType), intent(in) :: nn, level
-
-    ! Output parameters
-    integer(kind=intTYpe), intent(out) :: nColor
+        ! Working
+        integer(kind=intType) :: i, j, k
 
-    ! Working
-    integer(kind=intType) :: i, j, k
+        call setPointers(nn, level, 1)
+        !DIR$ NOVECTOR
+        do k = 0, kb
+            do j = 0, jb
+                do i = 0, ib
+                    ! Add the extra one for 1-based numbering (as opposed to zero-based)
+                    flowDoms(nn, level, 1)%color(i, j, k) = &
+                        mod(i + 3 * j + 4 * k, 13) + 1
+                end do
+            end do
+        end do
 
-    call setPointers(nn, level, 1)
-    !DIR$ NOVECTOR
-    do k=0, kb
-       do j=0, jb
-          do i=0, ib
-             ! Add the extra one for 1-based numbering (as opposed to zero-based)
-             flowDoms(nn, level, 1)%color(i, j, k) = &
-                  mod( i + 3*j + 4*k , 13) + 1
-          end do
-       end do
-    end do
-
-    nColor = 13
-
-  end subroutine setup_dRdw_euler_coloring
-
-  subroutine setup_dRdw_visc_coloring(nn, level, nColor)
-
-    use constants
-    use blockPointers, only : flowDoms, ib, jb, kb
-    use utils, only : setPointers
-    implicit none
-
-    ! Input parameters
-    integer(kind=intType), intent(in) :: nn, level
+        nColor = 13
 
-    ! Output parameters
-    integer(kind=intTYpe), intent(out) :: nColor
+    end subroutine setup_dRdw_euler_coloring
 
-    ! Working
-    integer(kind=intType) :: i, j, k
+    subroutine setup_dRdw_visc_coloring(nn, level, nColor)
 
-    call setPointers(nn, level, 1) ! Just to get the correct sizes
-    !DIR$ NOVECTOR
-    do k=0, kb
-       do j=0, jb
-          do i=0, ib
-             ! Add the extra one for 1-based numbering (as opposed to zero-based)
-             flowDoms(nn, level, 1)%color(i,j,k) = &
-                  mod( i + 19*j + 11*k ,35) + 1
-          end do
-       end do
-    end do
+        use constants
+        use blockPointers, only: flowDoms, ib, jb, kb
+        use utils, only: setPointers
+        implicit none
 
-    nColor = 35
+        ! Input parameters
+        integer(kind=intType), intent(in) :: nn, level
 
-  end subroutine setup_dRdw_visc_coloring
+        ! Output parameters
+        integer(kind=intTYpe), intent(out) :: nColor
 
-  ! -------------------------------------------------------------
-  !                   Debugging Color Colorings
-  ! -------------------------------------------------------------
+        ! Working
+        integer(kind=intType) :: i, j, k
 
-  subroutine setup_3x3x3_coloring(nn, level, nColor)
+        call setPointers(nn, level, 1) ! Just to get the correct sizes
+        !DIR$ NOVECTOR
+        do k = 0, kb
+            do j = 0, jb
+                do i = 0, ib
+                    ! Add the extra one for 1-based numbering (as opposed to zero-based)
+                    flowDoms(nn, level, 1)%color(i, j, k) = &
+                        mod(i + 19 * j + 11 * k, 35) + 1
+                end do
+            end do
+        end do
 
-    use constants
-    use blockPointers, only : flowDoms, ib, jb, kb
-    use utils, only : setPointers
-    implicit none
+        nColor = 35
 
-    ! This is a dense 3x3x3 cube for debugging only
-    ! Input parameters
-    integer(kind=intType), intent(in) :: nn, level
+    end subroutine setup_dRdw_visc_coloring
 
-    ! Output parameters
-    integer(kind=intTYpe), intent(out) :: nColor
+    ! -------------------------------------------------------------
+    !                   Debugging Color Colorings
+    ! -------------------------------------------------------------
 
-    ! Working
-    integer(kind=intType) :: i, j, k, modi, modj, modk
-
-    call setPointers(nn, level, 1)
-    !DIR$ NOVECTOR
-    do k=0, kb
-       do j=0, jb
-          do i=0, ib
-             ! Add the extra one for 1-based numbering (as opposed to zero-based)
-             modi = mod(i, 3)
-             modj = mod(j, 3)
-             modk = mod(k, 3)
+    subroutine setup_3x3x3_coloring(nn, level, nColor)
 
-             flowDoms(nn, level, 1)%color(i, j, k) = modi + 3*modj + 9*modk + 1
+        use constants
+        use blockPointers, only: flowDoms, ib, jb, kb
+        use utils, only: setPointers
+        implicit none
 
-          end do
-       end do
-    end do
+        ! This is a dense 3x3x3 cube for debugging only
+        ! Input parameters
+        integer(kind=intType), intent(in) :: nn, level
 
-    nColor = 27
-  end subroutine setup_3x3x3_coloring
+        ! Output parameters
+        integer(kind=intTYpe), intent(out) :: nColor
 
-  subroutine setup_5x5x5_coloring(nn, level, nColor)
+        ! Working
+        integer(kind=intType) :: i, j, k, modi, modj, modk
 
-    use constants
-    use blockPointers, only : flowDoms, ib, jb, kb
-    use utils, only : setPointers
-    implicit none
+        call setPointers(nn, level, 1)
+        !DIR$ NOVECTOR
+        do k = 0, kb
+            do j = 0, jb
+                do i = 0, ib
+                    ! Add the extra one for 1-based numbering (as opposed to zero-based)
+                    modi = mod(i, 3)
+                    modj = mod(j, 3)
+                    modk = mod(k, 3)
 
-    ! This is a dense 5x5x5 cube for debugging only
-    ! Input parameters
-    integer(kind=intType), intent(in) :: nn, level
+                    flowDoms(nn, level, 1)%color(i, j, k) = modi + 3 * modj + 9 * modk + 1
 
-    ! Output parameters
-    integer(kind=intTYpe), intent(out) :: nColor
+                end do
+            end do
+        end do
 
-    ! Working
-    integer(kind=intType) :: i, j, k, modi, modj, modk
+        nColor = 27
+    end subroutine setup_3x3x3_coloring
 
-    call setPointers(nn, level, 1)
-    !DIR$ NOVECTOR
-    do k=0, kb
-       do j=0, jb
-          do i=0, ib
-             ! Add the extra one for 1-based numbering (as opposed to zero-based)
-             modi = mod(i, 5)
-             modj = mod(j, 5)
-             modk = mod(k, 5)
+    subroutine setup_5x5x5_coloring(nn, level, nColor)
 
-             flowDoms(nn, level, 1)%color(i, j, k) = modi + 5*modj + 25*modk + 1
+        use constants
+        use blockPointers, only: flowDoms, ib, jb, kb
+        use utils, only: setPointers
+        implicit none
 
-          end do
-       end do
-    end do
+        ! This is a dense 5x5x5 cube for debugging only
+        ! Input parameters
+        integer(kind=intType), intent(in) :: nn, level
 
-    nColor = 125
-  end subroutine setup_5x5x5_coloring
+        ! Output parameters
+        integer(kind=intTYpe), intent(out) :: nColor
 
-  subroutine setup_BF_coloring(nn, level, nColor)
+        ! Working
+        integer(kind=intType) :: i, j, k, modi, modj, modk
 
-    use constants
-    use blockPointers, only : flowDoms, ib, jb, kb
-    use utils, only : setPointers
-    implicit none
+        call setPointers(nn, level, 1)
+        !DIR$ NOVECTOR
+        do k = 0, kb
+            do j = 0, jb
+                do i = 0, ib
+                    ! Add the extra one for 1-based numbering (as opposed to zero-based)
+                    modi = mod(i, 5)
+                    modj = mod(j, 5)
+                    modk = mod(k, 5)
 
-    ! Input parameters
-    integer(kind=intType), intent(in) :: nn, level
+                    flowDoms(nn, level, 1)%color(i, j, k) = modi + 5 * modj + 25 * modk + 1
 
-    ! Output parameters
-    integer(kind=intTYpe), intent(out) :: nColor
+                end do
+            end do
+        end do
 
-    ! Working
-    integer(kind=intType) :: i, j, k
+        nColor = 125
+    end subroutine setup_5x5x5_coloring
 
-    ! This is a REALLY brute force coloring for debugging
+    subroutine setup_BF_coloring(nn, level, nColor)
 
-    call setPointers(nn, level, 1)
-    !DIR$ NOVECTOR
-    do k=0, kb
-       do j=0, jb
-          do i=0, ib
-             ! Add the extra one for 1-based numbering (as opposed to zero-based)
+        use constants
+        use blockPointers, only: flowDoms, ib, jb, kb
+        use utils, only: setPointers
+        implicit none
 
-             flowDoms(nn, level, 1)%color(i, j, k) = i + j*(ib+1) + k*((ib+1)*(jb+1)) + 1
-          end do
-       end do
-    end do
+        ! Input parameters
+        integer(kind=intType), intent(in) :: nn, level
 
-    nColor = (ib+1)*(jb+1)*(kb+1)
-  end subroutine setup_BF_coloring
+        ! Output parameters
+        integer(kind=intTYpe), intent(out) :: nColor
 
+        ! Working
+        integer(kind=intType) :: i, j, k
 
-  subroutine myMatCreate(matrix, blockSize, m, n, nnzDiagonal, nnzOffDiag, &
-       file, line)
-    ! Function to create petsc matrix to make stuff a little cleaner in
-    ! the code above. Also, PETSc always thinks is a good idea to
-    ! RANDOMLY change syntax between versions so this way there is only
-    ! one place to make a change based on petsc version.
+        ! This is a REALLY brute force coloring for debugging
 
-    use constants
-    use communication, only : adflow_comm_world
-    use utils, only : EChk, setPointers
+        call setPointers(nn, level, 1)
+        !DIR$ NOVECTOR
+        do k = 0, kb
+            do j = 0, jb
+                do i = 0, ib
+                    ! Add the extra one for 1-based numbering (as opposed to zero-based)
+
+                    flowDoms(nn, level, 1)%color(i, j, k) = i + j * (ib + 1) + k * ((ib + 1) * (jb + 1)) + 1
+                end do
+            end do
+        end do
+
+        nColor = (ib + 1) * (jb + 1) * (kb + 1)
+    end subroutine setup_BF_coloring
+
+    subroutine myMatCreate(matrix, blockSize, m, n, nnzDiagonal, nnzOffDiag, &
+                           file, line)
+        ! Function to create petsc matrix to make stuff a little cleaner in
+        ! the code above. Also, PETSc always thinks is a good idea to
+        ! RANDOMLY change syntax between versions so this way there is only
+        ! one place to make a change based on petsc version.
+
+        use constants
+        use communication, only: adflow_comm_world
+        use utils, only: EChk, setPointers
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    Mat matrix
-    integer(kind=intType), intent(in) :: blockSize, m, n
-    integer(kind=intType), intent(in), dimension(*) :: nnzDiagonal, nnzOffDiag
-    character(len=*) :: file
-    integer(kind=intType) :: ierr, line
-    ! if (blockSize > 1) then
-       call MatCreateBAIJ(ADFLOW_COMM_WORLD, blockSize, &
-            m, n, PETSC_DETERMINE, PETSC_DETERMINE, &
-            0, nnzDiagonal, 0, nnzOffDiag, matrix, ierr)
-    ! else
-       ! call MatCreateAIJ(ADFLOW_COMM_WORLD,&
-            ! m, n, PETSC_DETERMINE, PETSC_DETERMINE, &
-            ! 0, nnzDiagonal, 0, nnzOffDiag, matrix, ierr)
-       call EChk(ierr, file, line)
-    ! end if
-
-    ! Warning: The array values is logically two-dimensional,
-    ! containing the values that are to be inserted. By default the
-    ! values are given in row major order, which is the opposite of
-    ! the Fortran convention, meaning that the value to be put in row
-    ! idxm[i] and column idxn[j] is located in values[i*n+j]. To allow
-    ! the insertion of values in column major order, one can call the
-    ! command MatSetOption(Mat A, MAT COLUMN ORIENTED);
-
-    call MatSetOption(matrix, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call MatSetOption(matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-  end subroutine myMatCreate
-
-  subroutine MyKSPMonitor(myKsp, n, rnorm, dummy, ierr)
-    !
-    !      This is a user-defined routine for monitoring the KSP
-    !      iterative solvers. Instead of outputing the L2-norm at every
-    !      iteration (default PETSc monitor), it only does it every
-    !      'adjMonStep' iterations.
-    !
-    use ADjointPETSc
-    use inputADjoint
-    use communication
-    implicit none
-    !
-    !     Subroutine arguments.
-    !
-    ! myKsp - Iterative context
-    ! n     - Iteration number
-    ! rnorm - 2-norm (preconditioned) residual value
-    ! dummy - Optional user-defined monitor context (unused here)
-    ! ierr  - Return error code
-
-    real(kind=realType), pointer, dimension(:, :) :: myKsp
-    integer(kind=intType) :: n, dummy, ierr
-    real(kind=realType)   :: rnorm
-
-    ! Write the residual norm to stdout every adjMonStep iterations.
-
-    if(mod(n, adjMonStep) ==0 ) then
-       if( myid==0 ) write(*, "(I4, 1X, A, 1X, ES16.10)") n, 'KSP Residual norm', rnorm
-    end if
-
-    ierr = 0
-
-  end subroutine MyKSPMonitor
-
-  subroutine setupStandardKSP(kspObject, kspObjectType, gmresRestart, preConSide, &
-       globalPCType, ASMOverlap, globalPreConIts, localPCType, &
-       localMatrixOrdering, localFillLevel, localPreConIts)
-
-    ! This function sets up the supplied kspObject in the followin
-    ! specific fashion. The reason this setup is in
-    ! its own function is that it is used in the following places:
-    ! 1. Setting up the preconditioner to use for the NKsolver
-    ! 2. Setting up the preconditioner to use for the adjoint solver
-    ! 3. Setting up the smoothers on the coarse multigrid levels.
-    !
-    ! The hierarchy of the setup is:
-    !  kspObject --> Supplied KSP object
-    !  |
-    !  --> master_PC --> Preconditioner type set to KSP
-    !      |
-    !      --> master_PC_KSP --> KSP type set to Richardson with 'globalPreConIts'
-    !          |
-    !           --> globalPC --> PC type set to 'globalPCType'
-    !               |            Usually Additive Schwarz and overlap is set
-    !               |            with 'ASMOverlap'. Use 0 to get BlockJacobi
-    !               |
-    !               --> subKSP --> KSP type set to Richardon with 'LocalPreConIts'
-    !                   |
-    !                   --> subPC -->  PC type set to 'loclaPCType'.
-    !                                  Usually ILU. 'localFillLevel' is
-    !                                  set and 'localMatrixOrder' is used.
-    !
-    ! Note that if globalPreConIts=1 then maser_PC_KSP is NOT created and master_PC=globalPC
-    ! and if localPreConIts=1 then subKSP is set to preOnly.
-    use constants
-    use utils, only : ECHk
-    use inputADjoint, only : GMRESOrthogType
+        use petsc
+        implicit none
+
+        Mat matrix
+        integer(kind=intType), intent(in) :: blockSize, m, n
+        integer(kind=intType), intent(in), dimension(*) :: nnzDiagonal, nnzOffDiag
+        character(len=*) :: file
+        integer(kind=intType) :: ierr, line
+        ! if (blockSize > 1) then
+        call MatCreateBAIJ(ADFLOW_COMM_WORLD, blockSize, &
+                           m, n, PETSC_DETERMINE, PETSC_DETERMINE, &
+                           0, nnzDiagonal, 0, nnzOffDiag, matrix, ierr)
+        ! else
+        ! call MatCreateAIJ(ADFLOW_COMM_WORLD,&
+        ! m, n, PETSC_DETERMINE, PETSC_DETERMINE, &
+        ! 0, nnzDiagonal, 0, nnzOffDiag, matrix, ierr)
+        call EChk(ierr, file, line)
+        ! end if
+
+        ! Warning: The array values is logically two-dimensional,
+        ! containing the values that are to be inserted. By default the
+        ! values are given in row major order, which is the opposite of
+        ! the Fortran convention, meaning that the value to be put in row
+        ! idxm[i] and column idxn[j] is located in values[i*n+j]. To allow
+        ! the insertion of values in column major order, one can call the
+        ! command MatSetOption(Mat A, MAT COLUMN ORIENTED);
+
+        call MatSetOption(matrix, MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call MatSetOption(matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+    end subroutine myMatCreate
+
+    subroutine MyKSPMonitor(myKsp, n, rnorm, dummy, ierr)
+        !
+        !      This is a user-defined routine for monitoring the KSP
+        !      iterative solvers. Instead of outputing the L2-norm at every
+        !      iteration (default PETSc monitor), it only does it every
+        !      'adjMonStep' iterations.
+        !
+        use ADjointPETSc
+        use inputADjoint
+        use communication
+        implicit none
+        !
+        !     Subroutine arguments.
+        !
+        ! myKsp - Iterative context
+        ! n     - Iteration number
+        ! rnorm - 2-norm (preconditioned) residual value
+        ! dummy - Optional user-defined monitor context (unused here)
+        ! ierr  - Return error code
+
+        real(kind=realType), pointer, dimension(:, :) :: myKsp
+        integer(kind=intType) :: n, dummy, ierr
+        real(kind=realType) :: rnorm
+
+        ! Write the residual norm to stdout every adjMonStep iterations.
+
+        if (mod(n, adjMonStep) == 0) then
+            if (myid == 0) write (*, "(I4, 1X, A, 1X, ES16.10)") n, 'KSP Residual norm', rnorm
+        end if
+
+        ierr = 0
+
+    end subroutine MyKSPMonitor
+
+    subroutine setupStandardKSP(kspObject, kspObjectType, gmresRestart, preConSide, &
+                                globalPCType, ASMOverlap, globalPreConIts, localPCType, &
+                                localMatrixOrdering, localFillLevel, localPreConIts)
+
+        ! This function sets up the supplied kspObject in the followin
+        ! specific fashion. The reason this setup is in
+        ! its own function is that it is used in the following places:
+        ! 1. Setting up the preconditioner to use for the NKsolver
+        ! 2. Setting up the preconditioner to use for the adjoint solver
+        ! 3. Setting up the smoothers on the coarse multigrid levels.
+        !
+        ! The hierarchy of the setup is:
+        !  kspObject --> Supplied KSP object
+        !  |
+        !  --> master_PC --> Preconditioner type set to KSP
+        !      |
+        !      --> master_PC_KSP --> KSP type set to Richardson with 'globalPreConIts'
+        !          |
+        !           --> globalPC --> PC type set to 'globalPCType'
+        !               |            Usually Additive Schwarz and overlap is set
+        !               |            with 'ASMOverlap'. Use 0 to get BlockJacobi
+        !               |
+        !               --> subKSP --> KSP type set to Richardon with 'LocalPreConIts'
+        !                   |
+        !                   --> subPC -->  PC type set to 'loclaPCType'.
+        !                                  Usually ILU. 'localFillLevel' is
+        !                                  set and 'localMatrixOrder' is used.
+        !
+        ! Note that if globalPreConIts=1 then maser_PC_KSP is NOT created and master_PC=globalPC
+        ! and if localPreConIts=1 then subKSP is set to preOnly.
+        use constants
+        use utils, only: ECHk
+        use inputADjoint, only: GMRESOrthogType
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! Input Params
-    KSP kspObject
-    character(len=*), intent(in) :: kspObjectType, preConSide
-    character(len=*), intent(in) :: globalPCType, localPCType
-    character(len=*), intent(in) :: localMatrixOrdering
-    integer(kind=intType), intent(in) :: ASMOverlap, localFillLevel, gmresRestart
-    integer(kind=intType), intent(in) :: globalPreConIts, localPreConIts
-
-    ! Working Variables
-    PC  master_PC, globalPC, subpc
-    KSP master_PC_KSP, subksp
-    integer(kind=intType) :: nlocal, first, ierr
-
-
-    ! First, KSPSetFromOptions MUST be called
-    call KSPSetFromOptions(kspObject, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set the type of solver to use:
-    call KSPSetType(kspObject, kspObjectType, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! If we're using GMRES set the possible gmres restart
-    call KSPGMRESSetRestart(kspObject, gmresRestart, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set the orthogonalization method for GMRES
-    select case (GMRESOrthogType)
-       case ('modified_gram_schmidt')
-          ! Use modified Gram-Schmidt
-          call KSPGMRESSetOrthogonalization(kspObject, KSPGMRESModifiedGramSchmidtOrthogonalization, ierr)
-       case ('cgs_never_refine')
-          ! Use classical Gram-Schmidt with no refinement
-          call KSPGMRESSetCGSRefinementType(kspObject, KSP_GMRES_CGS_REFINE_NEVER, ierr)
-       case ('cgs_refine_if_needed')
-          ! Use classical Gram-Schmidt with refinement if needed
-          call KSPGMRESSetCGSRefinementType(kspObject, KSP_GMRES_CGS_REFINE_IFNEEDED, ierr)
-       case ('cgs_always_refine')
-          ! Use classical Gram-Schmidt with refinement at every iteration
-          call KSPGMRESSetCGSRefinementType(kspObject, KSP_GMRES_CGS_REFINE_ALWAYS, ierr)
-    end select
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set the preconditioner side from option:
-    if (trim(preConSide) == 'right') then
-       call KSPSetPCSide(kspObject, PC_RIGHT, ierr)
-    else
-       call KSPSetPCSide(kspObject, PC_LEFT, ierr)
-    end if
-    call EChk(ierr, __FILE__, __LINE__)
-
-    if (trim(kspObjectType) == 'richardson') then
-       call KSPSetPCSide(kspObject, PC_LEFT, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    end if
-
-    ! Since there is an extraneous matMult required when using the
-    ! richardson precondtiter with only 1 iteration, only use it we need
-    ! to do more than 1 iteration.
-    if (globalPreConIts > 1) then
-       ! Extract preconditioning context for main KSP solver: (master_PC)
-       call KSPGetPC(kspObject, master_PC, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Set the type of master_PC to ksp. This lets us do multiple
-       ! iterations of preconditioner application
-       call PCSetType(master_PC, 'ksp', ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Get the ksp context from master_PC which is the actual preconditioner:
-       call PCKSPGetKSP(master_PC, master_PC_KSP, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! master_PC_KSP type will always be of type richardson. If the
-       ! number  of iterations is set to 1, this ksp object is transparent.
-
-       call KSPSetType(master_PC_KSP, 'richardson', ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Important to set the norm-type to None for efficiency.
-       call kspsetnormtype(master_PC_KSP, KSP_NORM_NONE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Do one iteration of the outer ksp preconditioners. Note the
-       ! tolerances are unsued since we have set KSP_NORM_NON
-       call KSPSetTolerances(master_PC_KSP, PETSC_DEFAULT_REAL, &
-            PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, &
-            globalPreConIts, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Get the 'preconditioner for master_PC_KSP, called 'globalPC'. This
-       ! preconditioner is potentially run multiple times.
-       call KSPgetPC(master_PC_KSP, globalPC, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    else
-       ! Just pull out the pc-object if we are not using kspRichardson
-       call KSPGetPC(kspObject, globalPC, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    end if
-
-    ! Set the type of 'globalPC'. This will almost always be additive Schwarz
-    call PCSetType(globalPC, 'asm', ierr)!globalPCType, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set the overlap required
-    call PCASMSetOverlap(globalPC, ASMOverlap, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    !Setup the main ksp context before extracting the subdomains
-    call KSPSetUp(kspObject, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Extract the ksp objects for each subdomain
-    call PCASMGetSubKSP(globalPC, nlocal, first, subksp, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Since there is an extraneous matMult required when using the
-    ! richardson precondtiter with only 1 iteration, only use it we need
-    ! to do more than 1 iteration.
-    if (localPreConIts > 1) then
-       ! This 'subksp' object will ALSO be of type richardson so we can do
-       ! multiple iterations on the sub-domains
-       call KSPSetType(subksp, 'richardson', ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Set the number of iterations to do on local blocks. Tolerances are ignored.
-
-       call KSPSetTolerances(subksp, PETSC_DEFAULT_REAL, &
-            PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, &
-            localPreConIts, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Again, norm_type is NONE since we don't want to check error
-       call kspsetnormtype(subksp, KSP_NORM_NONE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    else
-       call KSPSetType(subksp, 'preonly', ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    end if
-
-    ! Extract the preconditioner for subksp object.
-    call KSPGetPC(subksp, subpc, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! The subpc type will almost always be ILU
-    call PCSetType(subpc, localPCType, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Setup the matrix ordering for the subpc object:
-    call PCFactorSetMatOrderingtype(subpc, localMatrixOrdering, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set the ILU parameters
-    call PCFactorSetLevels(subpc, localFillLevel , ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-  end subroutine setupStandardKSP
-
-  subroutine setupStandardMultigrid(kspObject, kspObjectType, gmresRestart, &
-       preConSide, ASMoverlap, outerPreconIts, localMatrixOrdering, fillLevel)
-
-    ! and if localPreConIts=1 then subKSP is set to preOnly.
-    use constants
-    use utils, only : ECHk
-    use agmg, only : agmgOuterIts, agmgASMOverlap, agmgFillLevel, agmgMatrixOrdering, &
-         setupShellPC, destroyShellPC, applyShellPC
+        use petsc
+        implicit none
+
+        ! Input Params
+        KSP kspObject
+        character(len=*), intent(in) :: kspObjectType, preConSide
+        character(len=*), intent(in) :: globalPCType, localPCType
+        character(len=*), intent(in) :: localMatrixOrdering
+        integer(kind=intType), intent(in) :: ASMOverlap, localFillLevel, gmresRestart
+        integer(kind=intType), intent(in) :: globalPreConIts, localPreConIts
+
+        ! Working Variables
+        PC master_PC, globalPC, subpc
+        KSP master_PC_KSP, subksp
+        integer(kind=intType) :: nlocal, first, ierr
+
+        ! First, KSPSetFromOptions MUST be called
+        call KSPSetFromOptions(kspObject, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set the type of solver to use:
+        call KSPSetType(kspObject, kspObjectType, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! If we're using GMRES set the possible gmres restart
+        call KSPGMRESSetRestart(kspObject, gmresRestart, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set the orthogonalization method for GMRES
+        select case (GMRESOrthogType)
+        case ('modified_gram_schmidt')
+            ! Use modified Gram-Schmidt
+            call KSPGMRESSetOrthogonalization(kspObject, KSPGMRESModifiedGramSchmidtOrthogonalization, ierr)
+        case ('cgs_never_refine')
+            ! Use classical Gram-Schmidt with no refinement
+            call KSPGMRESSetCGSRefinementType(kspObject, KSP_GMRES_CGS_REFINE_NEVER, ierr)
+        case ('cgs_refine_if_needed')
+            ! Use classical Gram-Schmidt with refinement if needed
+            call KSPGMRESSetCGSRefinementType(kspObject, KSP_GMRES_CGS_REFINE_IFNEEDED, ierr)
+        case ('cgs_always_refine')
+            ! Use classical Gram-Schmidt with refinement at every iteration
+            call KSPGMRESSetCGSRefinementType(kspObject, KSP_GMRES_CGS_REFINE_ALWAYS, ierr)
+        end select
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set the preconditioner side from option:
+        if (trim(preConSide) == 'right') then
+            call KSPSetPCSide(kspObject, PC_RIGHT, ierr)
+        else
+            call KSPSetPCSide(kspObject, PC_LEFT, ierr)
+        end if
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (trim(kspObjectType) == 'richardson') then
+            call KSPSetPCSide(kspObject, PC_LEFT, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+        ! Since there is an extraneous matMult required when using the
+        ! richardson precondtiter with only 1 iteration, only use it we need
+        ! to do more than 1 iteration.
+        if (globalPreConIts > 1) then
+            ! Extract preconditioning context for main KSP solver: (master_PC)
+            call KSPGetPC(kspObject, master_PC, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Set the type of master_PC to ksp. This lets us do multiple
+            ! iterations of preconditioner application
+            call PCSetType(master_PC, 'ksp', ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Get the ksp context from master_PC which is the actual preconditioner:
+            call PCKSPGetKSP(master_PC, master_PC_KSP, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! master_PC_KSP type will always be of type richardson. If the
+            ! number  of iterations is set to 1, this ksp object is transparent.
+
+            call KSPSetType(master_PC_KSP, 'richardson', ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Important to set the norm-type to None for efficiency.
+            call kspsetnormtype(master_PC_KSP, KSP_NORM_NONE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Do one iteration of the outer ksp preconditioners. Note the
+            ! tolerances are unsued since we have set KSP_NORM_NON
+            call KSPSetTolerances(master_PC_KSP, PETSC_DEFAULT_REAL, &
+                                  PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, &
+                                  globalPreConIts, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Get the 'preconditioner for master_PC_KSP, called 'globalPC'. This
+            ! preconditioner is potentially run multiple times.
+            call KSPgetPC(master_PC_KSP, globalPC, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        else
+            ! Just pull out the pc-object if we are not using kspRichardson
+            call KSPGetPC(kspObject, globalPC, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+        ! Set the type of 'globalPC'. This will almost always be additive Schwarz
+        call PCSetType(globalPC, 'asm', ierr)!globalPCType, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set the overlap required
+        call PCASMSetOverlap(globalPC, ASMOverlap, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        !Setup the main ksp context before extracting the subdomains
+        call KSPSetUp(kspObject, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Extract the ksp objects for each subdomain
+        call PCASMGetSubKSP(globalPC, nlocal, first, subksp, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Since there is an extraneous matMult required when using the
+        ! richardson precondtiter with only 1 iteration, only use it we need
+        ! to do more than 1 iteration.
+        if (localPreConIts > 1) then
+            ! This 'subksp' object will ALSO be of type richardson so we can do
+            ! multiple iterations on the sub-domains
+            call KSPSetType(subksp, 'richardson', ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Set the number of iterations to do on local blocks. Tolerances are ignored.
+
+            call KSPSetTolerances(subksp, PETSC_DEFAULT_REAL, &
+                                  PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, &
+                                  localPreConIts, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Again, norm_type is NONE since we don't want to check error
+            call kspsetnormtype(subksp, KSP_NORM_NONE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        else
+            call KSPSetType(subksp, 'preonly', ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+        ! Extract the preconditioner for subksp object.
+        call KSPGetPC(subksp, subpc, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! The subpc type will almost always be ILU
+        call PCSetType(subpc, localPCType, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Setup the matrix ordering for the subpc object:
+        call PCFactorSetMatOrderingtype(subpc, localMatrixOrdering, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set the ILU parameters
+        call PCFactorSetLevels(subpc, localFillLevel, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+    end subroutine setupStandardKSP
+
+    subroutine setupStandardMultigrid(kspObject, kspObjectType, gmresRestart, &
+                                      preConSide, ASMoverlap, outerPreconIts, localMatrixOrdering, fillLevel)
+
+        ! and if localPreConIts=1 then subKSP is set to preOnly.
+        use constants
+        use utils, only: ECHk
+        use agmg, only: agmgOuterIts, agmgASMOverlap, agmgFillLevel, agmgMatrixOrdering, &
+                        setupShellPC, destroyShellPC, applyShellPC
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
+        use petsc
+        implicit none
+
+        ! Input Params
+        KSP kspObject
+        character(len=*), intent(in) :: kspObjectType, preConSide
+        character(len=*), intent(in) :: localMatrixOrdering
+        integer(kind=intType), intent(in) :: ASMOverlap, fillLevel, gmresRestart
+        integer(kind=intType), intent(in) :: outerPreconIts
+
+        ! Working Variables
+        PC shellPC
+        integer(kind=intType) :: ierr
+
+        call KSPSetType(kspObject, kspObjectType, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set the preconditioner side from option:
+        if (trim(preConSide) == 'right') then
+            call KSPSetPCSide(kspObject, PC_RIGHT, ierr)
+        else
+            call KSPSetPCSide(kspObject, PC_LEFT, ierr)
+        end if
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call KSPGMRESSetRestart(kspObject, gmresRestart, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Input Params
-    KSP kspObject
-    character(len=*), intent(in) :: kspObjectType, preConSide
-    character(len=*), intent(in) :: localMatrixOrdering
-    integer(kind=intType), intent(in) :: ASMOverlap, fillLevel, gmresRestart
-    integer(kind=intType), intent(in) :: outerPreconIts
+        call KSPGetPC(kspObject, shellPC, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Working Variables
-    PC  shellPC
-    integer(kind=intType) :: ierr
+        call PCSetType(shellPC, PCSHELL, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call KSPSetType(kspObject, kspObjectType, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call PCShellSetSetup(shellPC, setupShellPC, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Set the preconditioner side from option:
-    if (trim(preConSide) == 'right') then
-       call KSPSetPCSide(kspObject, PC_RIGHT, ierr)
-    else
-       call KSPSetPCSide(kspObject, PC_LEFT, ierr)
-    end if
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call KSPGMRESSetRestart(kspObject, gmresRestart, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call KSPGetPC(kspObject, shellPC, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call PCSetType(shellPC, PCSHELL, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call PCShellSetSetup(shellPC, setupShellPC, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call PCShellSetDestroy(shellPC, destroyShellPC, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call PCShellSetApply(shellPC, applyShellPC, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Just save the remaining pieces ofinformation in the agmg module.
-    agmgOuterIts = outerPreConIts
-    agmgASMOverlap = asmOverlap
-    agmgFillLevel = fillLevel
-    agmgMatrixOrdering = localMatrixOrdering
-  end subroutine setupStandardMultigrid
-
-  subroutine destroyPETScVars
-
-    use constants
-    use ADjointPETSc, only : dRdWT, dRdwPreT, adjointKSP, adjointPETScVarsAllocated
-    use inputAdjoint, only : approxPC
-    use agmg, only : destroyAGMG
-    use utils, only : EChk
-    implicit none
-
-    integer(kind=intType) ::  ierr
-
-    if (adjointPETScVarsAllocated) then
-
-       ! Matrices
-       call MatDestroy(dRdWT, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       if (ApproxPC) then
-          call MatDestroy(dRdWPreT, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-       end if
-
-       call KSPDestroy(adjointKSP, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call destroyAGMG()
-
-       adjointPETScVarsAllocated = .False.
-    end if
-
-  end subroutine destroyPETScVars
-
-  subroutine initializePETSc
-
-    ! Call the C-version of the petsc initialize routine
-
-    use ADjointPETSc, only : petsc_comm_world
-    use communication, only : adflow_comm_world
-    implicit none
-
-    PETSC_COMM_WORLD= ADFLOW_COMM_WORLD
-    call initPETScWrap()
-
-  end subroutine initializePETSc
-
-  subroutine finalizePETSc
-    !
-    !      Finalize PETSc by calling the appropriate routine
-    !      PetscFinalize provided in the PETSc library. This
-    !      automatically calls MPI_Finalize().
-    !
-    use ADjointPETSc, only : PETScIerr
-    implicit none
-    call PetscFinalize(PETScIerr)
-  end subroutine finalizePETSc
-
-subroutine statePreAllocation(onProc, offProc, wSize, stencil, N_stencil, &
-     level, transposed)
-
-  ! This is a generic function that determines the correct
-  ! pre-allocation for on and off processor parts of the TRANSPOSED
-  ! matrix. With overset, it is quite tricky to determine the
-  ! transpose sparsity structure exactly, so we use an alternative
-  ! approach. We proceed to determine the non-zeros of the untranposed
-  ! matrix, but instead of assigning a non-zero the row we're looping
-  ! over, we assign it to the column, which will become a row in the
-  ! tranposed matrix. Since this requires communication we use a petsc
-  ! vector for doing off processor values. This is not strictly
-  ! correct since we will be using the real values as floats, but
-  ! since the number of non-zeros per row is always going to be
-  ! bounded, we don't have to worry about the integer/floating point
-  ! conversions.
-
-  use constants
-  use blockPointers, only : nDom, il, jl, kl, fringes, flowDoms, globalCell, &
-       iBlank, gInd
-  use communication, only : adflow_comm_world
-  use inputTimeSpectral , only : nTimeIntervalsSpectral
-  use utils, only : setPointers, EChk
-  use sorting, only : unique
+        call PCShellSetDestroy(shellPC, destroyShellPC, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call PCShellSetApply(shellPC, applyShellPC, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Just save the remaining pieces ofinformation in the agmg module.
+        agmgOuterIts = outerPreConIts
+        agmgASMOverlap = asmOverlap
+        agmgFillLevel = fillLevel
+        agmgMatrixOrdering = localMatrixOrdering
+    end subroutine setupStandardMultigrid
+
+    subroutine destroyPETScVars
+
+        use constants
+        use ADjointPETSc, only: dRdWT, dRdwPreT, adjointKSP, adjointPETScVarsAllocated
+        use inputAdjoint, only: approxPC
+        use agmg, only: destroyAGMG
+        use utils, only: EChk
+        implicit none
+
+        integer(kind=intType) :: ierr
+
+        if (adjointPETScVarsAllocated) then
+
+            ! Matrices
+            call MatDestroy(dRdWT, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            if (ApproxPC) then
+                call MatDestroy(dRdWPreT, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end if
+
+            call KSPDestroy(adjointKSP, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call destroyAGMG()
+
+            adjointPETScVarsAllocated = .False.
+        end if
+
+    end subroutine destroyPETScVars
+
+    subroutine initializePETSc
+
+        ! Call the C-version of the petsc initialize routine
+
+        use ADjointPETSc, only: petsc_comm_world
+        use communication, only: adflow_comm_world
+        implicit none
+
+        PETSC_COMM_WORLD = ADFLOW_COMM_WORLD
+        call initPETScWrap()
+
+    end subroutine initializePETSc
+
+    subroutine finalizePETSc
+        !
+        !      Finalize PETSc by calling the appropriate routine
+        !      PetscFinalize provided in the PETSc library. This
+        !      automatically calls MPI_Finalize().
+        !
+        use ADjointPETSc, only: PETScIerr
+        implicit none
+        call PetscFinalize(PETScIerr)
+    end subroutine finalizePETSc
+
+    subroutine statePreAllocation(onProc, offProc, wSize, stencil, N_stencil, &
+                                  level, transposed)
+
+        ! This is a generic function that determines the correct
+        ! pre-allocation for on and off processor parts of the TRANSPOSED
+        ! matrix. With overset, it is quite tricky to determine the
+        ! transpose sparsity structure exactly, so we use an alternative
+        ! approach. We proceed to determine the non-zeros of the untranposed
+        ! matrix, but instead of assigning a non-zero the row we're looping
+        ! over, we assign it to the column, which will become a row in the
+        ! tranposed matrix. Since this requires communication we use a petsc
+        ! vector for doing off processor values. This is not strictly
+        ! correct since we will be using the real values as floats, but
+        ! since the number of non-zeros per row is always going to be
+        ! bounded, we don't have to worry about the integer/floating point
+        ! conversions.
+
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, fringes, flowDoms, globalCell, &
+                                 iBlank, gInd
+        use communication, only: adflow_comm_world
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers, EChk
+        use sorting, only: unique
 #include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
-
-  ! Subroutine Arguments
-  integer(kind=intType), intent(in)  :: wSize
-  integer(kind=intType), intent(in)  :: N_stencil
-  integer(kind=intType), intent(in)  :: stencil(N_stencil, 3)
-  integer(kind=intType), intent(out) :: onProc(wSize), offProc(wSize)
-  integer(kind=intType), intent(in)  :: level
-  logical, intent(in) :: transposed
-
-  ! Local Variables
-  integer(kind=intType) :: nn, i, j, k, sps, ii, jj, kk, iii, jjj, kkk, n, m, gc
-  integer(kind=intType) :: iRowStart, iRowEnd, ierr, fInd
-  integer(kind=intType), dimension((N_stencil-1)*8+1) :: cellBuffer, dummy
-  Vec offProcVec
-  logical :: overset
-  real(kind=realType), pointer :: tmpPointer(:)
-
-
-  call vecCreateMPI(adflow_comm_world, wSize, PETSC_DETERMINE, offProcVec, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-
-  ! Zero the cell movement counter
-  ii = 0
-
-  ! Set the onProc values for each cell to the number of "OFF" time
-  ! spectral instances. The "on" spectral instances are accounted for
-  ! in the stencil
-  onProc(:) = nTimeIntervalsSpectral-1
-  offProc(:) = 0
-  ! Determine the range of onProc in dRdwT
-  iRowStart = flowDoms(1, 1, 1)%globalCell(2,2,2)
-  call setPointers(nDom, 1, nTimeIntervalsSpectral)
-  iRowEnd   = flowDoms(nDom, 1, nTimeIntervalsSpectral)%globalCell(il, jl, kl)
-
-  do nn=1, nDom
-     do sps=1, nTimeIntervalsSpectral
-        call setPointers(nn, level, sps)
-        ! Loop over each real cell
-        do k=2, kl
-           do j=2, jl
-              do i=2, il
-
-                 ! Increment the running ii counter ONLY for each each
-                 ! movement of center cell
-                 ii = ii + 1
-
-                 ! Reset the running tally of the number of neighbours
-                 n = 0
-
-                 blankedTest: if (iblank(i, j, k) == 1) then
-
-                    ! Short-cut flag for cells without interpolated
-                    ! cells in it's stencil
-                    overset = .False.
-
-                    ! Loop over the cells in the provided stencil:
-                    do jj=1, N_stencil
-
-                       ! Determine the cell we are dealing with
-                       iii = stencil(jj, 1) + i
-                       jjj = stencil(jj, 2) + j
-                       kkk = stencil(jj, 3) + k
-
-                       ! Index of the cell we are dealing with. Make
-                       ! code easier to read
-                       gc = globalCell(iii, jjj, kkk)
-
-                       ! Check if the cell in question is a fringe or not:
-                       if (iblank(iii, jjj, kkk) == 1) then
-                          ! regular cell, add to our list, if it is
-                          ! not a boundary
-                          if (gc >= 0) then
-                             n = n + 1
-                             cellBuffer(n) = gc
-                          end if
-
-                       else if (iblank(iii, jjj, kkk) == -1) then
-                          ! Fringe cell. What we do here is loop over
-                          ! the donors for this cell and add any
-                          ! entries that are real cells
-                          overset = .True.
-                          do kk=1,8
-                             gc = gInd(kk, iii, jjj, kkk)
-                             if (gc >= 0) then
-                                n = n + 1
-                                cellBuffer(n) = gc
-                             end if
-                          end do
-                       end if
+        use petsc
+        implicit none
+
+        ! Subroutine Arguments
+        integer(kind=intType), intent(in) :: wSize
+        integer(kind=intType), intent(in) :: N_stencil
+        integer(kind=intType), intent(in) :: stencil(N_stencil, 3)
+        integer(kind=intType), intent(out) :: onProc(wSize), offProc(wSize)
+        integer(kind=intType), intent(in) :: level
+        logical, intent(in) :: transposed
+
+        ! Local Variables
+        integer(kind=intType) :: nn, i, j, k, sps, ii, jj, kk, iii, jjj, kkk, n, m, gc
+        integer(kind=intType) :: iRowStart, iRowEnd, ierr, fInd
+        integer(kind=intType), dimension((N_stencil - 1)*8 + 1) :: cellBuffer, dummy
+        Vec offProcVec
+        logical :: overset
+        real(kind=realType), pointer :: tmpPointer(:)
+
+        call vecCreateMPI(adflow_comm_world, wSize, PETSC_DETERMINE, offProcVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Zero the cell movement counter
+        ii = 0
+
+        ! Set the onProc values for each cell to the number of "OFF" time
+        ! spectral instances. The "on" spectral instances are accounted for
+        ! in the stencil
+        onProc(:) = nTimeIntervalsSpectral - 1
+        offProc(:) = 0
+        ! Determine the range of onProc in dRdwT
+        iRowStart = flowDoms(1, 1, 1)%globalCell(2, 2, 2)
+        call setPointers(nDom, 1, nTimeIntervalsSpectral)
+        iRowEnd = flowDoms(nDom, 1, nTimeIntervalsSpectral)%globalCell(il, jl, kl)
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
+                ! Loop over each real cell
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+
+                            ! Increment the running ii counter ONLY for each each
+                            ! movement of center cell
+                            ii = ii + 1
+
+                            ! Reset the running tally of the number of neighbours
+                            n = 0
+
+                            blankedTest: if (iblank(i, j, k) == 1) then
+
+                                ! Short-cut flag for cells without interpolated
+                                ! cells in it's stencil
+                                overset = .False.
+
+                                ! Loop over the cells in the provided stencil:
+                                do jj = 1, N_stencil
+
+                                    ! Determine the cell we are dealing with
+                                    iii = stencil(jj, 1) + i
+                                    jjj = stencil(jj, 2) + j
+                                    kkk = stencil(jj, 3) + k
+
+                                    ! Index of the cell we are dealing with. Make
+                                    ! code easier to read
+                                    gc = globalCell(iii, jjj, kkk)
+
+                                    ! Check if the cell in question is a fringe or not:
+                                    if (iblank(iii, jjj, kkk) == 1) then
+                                        ! regular cell, add to our list, if it is
+                                        ! not a boundary
+                                        if (gc >= 0) then
+                                            n = n + 1
+                                            cellBuffer(n) = gc
+                                        end if
+
+                                    else if (iblank(iii, jjj, kkk) == -1) then
+                                        ! Fringe cell. What we do here is loop over
+                                        ! the donors for this cell and add any
+                                        ! entries that are real cells
+                                        overset = .True.
+                                        do kk = 1, 8
+                                            gc = gInd(kk, iii, jjj, kkk)
+                                            if (gc >= 0) then
+                                                n = n + 1
+                                                cellBuffer(n) = gc
+                                            end if
+                                        end do
+                                    end if
+                                end do
+
+                                ! We have now added 'n' cells to our buffer. For
+                                ! the overset interpolation case, it is possible
+                                ! (actually highly likely) that the same donor
+                                ! cells are used in multiple fringes. To avoid
+                                ! allocating more space than necessary, we
+                                ! unique-ify the values, producing 'm' unique
+                                ! values. If overset wasn't present, we can be
+                                ! sure that m=n and we simply don't do the unique
+                                ! operation.
+
+                                if (overset) then
+                                    call unique(cellBuffer, n, m, dummy)
+                                else
+                                    m = n
+                                end if
+
+                                ! -------------------- Non-transposed code ----------------
+                                if (.not. transposed) then
+                                    ! Now we loop over the total number of
+                                    ! (unique) neighbours we have and assign them
+                                    ! to either an on-proc or an off-proc entry:
+                                    do jj = 1, m
+                                        gc = cellBuffer(jj)
+
+                                        if (gc >= irowStart .and. gc <= iRowEnd) then
+                                            onProc(ii) = onProc(ii) + 1
+                                        else
+                                            offProc(ii) = offProc(ii) + 1
+                                        end if
+                                    end do
+                                else
+                                    ! -------------------- Ttransposed code ----------------
+
+                                    ! Now we ALSO loop over the total number of
+                                    ! (unique) neighbours. However, instead of
+                                    ! adding to the non-zeros to the on/offproc for
+                                    ! row 'ii', we add them to the column index
+                                    ! which will be the row index for the
+                                    ! transposed matrix.
+                                    do jj = 1, m
+                                        gc = cellBuffer(jj)
+
+                                        if (gc >= irowStart .and. gc <= iRowEnd) then
+                                            ! On processor values can be dealt with
+                                            ! directly since the diagonal part is square.
+                                            onProc(gc - iRowStart + 1) = onProc(gc - iRowStart + 1) + 1
+                                        else
+                                            ! The offproc values need to be sent to
+                                            ! the other processors and summed.
+                                            call VecSetValue(offProcVec, gc, real(1), ADD_VALUES, ierr)
+                                            call EChk(ierr, __FILE__, __LINE__)
+                                        end if
+                                    end do
+                                end if
+                            else
+                                ! Blanked and interpolated cells only need a single
+                                ! non-zero per row for the identity on the diagonal.
+                                onProc(ii) = onProc(ii) + 1
+                            end if blankedTest
+                        end do ! I loop
+                    end do ! J loop
+                end do ! K loop
+            end do ! sps loop
+        end do ! Domain Loop
+
+        ! Assemble the offproc vector. This doesn't take any time for the
+        ! non-transposed operation.
+        call VecAssemblyBegin(offProcVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecAssemblyEnd(offProcVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (transposed) then
+            ! Pull the local vector out and convert it back to integers.
+            call VecGetArrayF90(offProcVec, tmpPointer, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+            do i = 1, wSize
+                offProc(i) = int(tmpPointer(i) + half) ! Make sure, say 14.99999 is 15.
+            end do
+
+            call VecRestoreArrayF90(offProcVec, tmpPointer, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+        ! Done with the temporary offProcVec
+        call vecDestroy(offProcVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+    end subroutine statePreAllocation
+    subroutine referenceShockSensor
+
+        ! Compute the reference shock sensor for PC computations
+        use constants
+        use blockPointers, only: ib, jb, kb, il, jl, kl, ie, je, ke, shockSensor, &
+                                 w, gamma, p, nDom, flowDoms
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputPhysics, only: equations
+        use inputDiscretization, only: spaceDiscr
+        use utils, only: setPointers, EChk
+        implicit none
+
+        ! Working variables
+        integer(kind=intType) :: nn, level, sps, i, j, k
+
+        level = 1
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
+
+                if (equations == EulerEquations .or. spaceDiscr == dissMatrix) then
+                    !shockSensor is Pressure
+                    do k = 0, kb
+                        do j = 0, jb
+                            do i = 0, ib
+                                shockSensor(i, j, k) = P(i, j, k)
+                            end do
+                        end do
+                    end do
+                else
+                    ! Enthalpy is used instead
+                    do k = 0, kb
+                        do j = 2, jl
+                            do i = 2, il
+                                shockSensor(i, j, k) = p(i, j, k) / (w(i, j, k, irho)**gamma(i, j, k))
+                            end do
+                        end do
                     end do
 
-                    ! We have now added 'n' cells to our buffer. For
-                    ! the overset interpolation case, it is possible
-                    ! (actually highly likely) that the same donor
-                    ! cells are used in multiple fringes. To avoid
-                    ! allocating more space than necessary, we
-                    ! unique-ify the values, producing 'm' unique
-                    ! values. If overset wasn't present, we can be
-                    ! sure that m=n and we simply don't do the unique
-                    ! operation.
-
-                    if (overset) then
-                       call unique(cellBuffer, n, m, dummy)
-                    else
-                       m = n
-                    end if
+                    do k = 2, kl
+                        do j = 2, jl
+                            shockSensor(0, j, k) = p(0, j, k) / (w(0, j, k, irho)**gamma(0, j, k))
+                            shockSensor(1, j, k) = p(1, j, k) / (w(1, j, k, irho)**gamma(1, j, k))
+                            shockSensor(ie, j, k) = p(ie, j, k) / (w(ie, j, k, irho)**gamma(ie, j, k))
+                            shockSensor(ib, j, k) = p(ib, j, k) / (w(ib, j, k, irho)**gamma(ib, j, k))
+                        end do
+                    end do
 
-                    ! -------------------- Non-transposed code ----------------
-                    if (.not. transposed) then
-                       ! Now we loop over the total number of
-                       ! (unique) neighbours we have and assign them
-                       ! to either an on-proc or an off-proc entry:
-                       do jj=1, m
-                          gc = cellBuffer(jj)
-
-                          if (gc >= irowStart .and. gc <= iRowEnd) then
-                             onProc(ii) = onProc(ii) + 1
-                          else
-                             offProc(ii) = offProc(ii) + 1
-                          end if
-                       end do
-                    else
-                       ! -------------------- Ttransposed code ----------------
-
-                       ! Now we ALSO loop over the total number of
-                       ! (unique) neighbours. However, instead of
-                       ! adding to the non-zeros to the on/offproc for
-                       ! row 'ii', we add them to the column index
-                       ! which will be the row index for the
-                       ! transposed matrix.
-                       do jj=1, m
-                          gc = cellBuffer(jj)
-
-                          if (gc >= irowStart .and. gc <= iRowEnd) then
-                             ! On processor values can be dealt with
-                             ! directly since the diagonal part is square.
-                             onProc(gc-iRowStart + 1) = onProc(gc-iRowStart+1)  +1
-                          else
-                             ! The offproc values need to be sent to
-                             ! the other processors and summed.
-                             call VecSetValue(offProcVec, gc, real(1), ADD_VALUES, ierr)
-                             call EChk(ierr, __FILE__, __LINE__)
-                          end if
-                       end do
-                    end if
-                 else
-                    ! Blanked and interpolated cells only need a single
-                    ! non-zero per row for the identity on the diagonal.
-                    onProc(ii) = onProc(ii) + 1
-                 end if blankedTest
-              end do ! I loop
-           end do ! J loop
-        end do ! K loop
-     end do ! sps loop
-  end do ! Domain Loop
-
-  ! Assemble the offproc vector. This doesn't take any time for the
-  ! non-transposed operation.
-  call VecAssemblyBegin(offProcVec, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call VecAssemblyEnd(offProcVec, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  if (transposed) then
-     ! Pull the local vector out and convert it back to integers.
-     call VecGetArrayF90(offProcVec, tmpPointer, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-     do i=1,wSize
-        offProc(i) = int(tmpPointer(i) + half) ! Make sure, say 14.99999 is 15.
-     end do
-
-     call VecRestoreArrayF90(offProcVec, tmpPointer, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-  end if
-
-  ! Done with the temporary offProcVec
-  call vecDestroy(offProcVec, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-end subroutine statePreAllocation
-  subroutine referenceShockSensor
-
-    ! Compute the reference shock sensor for PC computations
-    use constants
-    use blockPointers, only : ib, jb, kb, il, jl, kl, ie, je, ke, shockSensor, &
-         w, gamma, p, nDom, flowDoms
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputPhysics, only : equations
-    use inputDiscretization, only : spaceDiscr
-    use utils, only : setPointers, EChk
-    implicit none
-
-    ! Working variables
-    integer(kind=intType) :: nn, level, sps, i, j, k
-
-    level = 1
-
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
-
-          if (equations == EulerEquations .or. spaceDiscr == dissMatrix) then
-             !shockSensor is Pressure
-             do k=0, kb
-                do j=0, jb
-                   do i=0, ib
-                      shockSensor(i,j,k) = P(i,j,k)
-                   end do
-                end do
-             end do
-          else
-             ! Enthalpy is used instead
-             do k=0, kb
-                do j=2, jl
-                   do i=2, il
-                      shockSensor(i, j, k) = p(i, j, k)/(w(i, j, k, irho)**gamma(i, j, k))
-                   enddo
-                enddo
-             enddo
-
-             do k=2, kl
-                do j=2, jl
-                   shockSensor(0,  j, k) = p(0,  j, k)/(w(0,  j, k, irho)**gamma(0,  j, k))
-                   shockSensor(1,  j, k) = p(1,  j, k)/(w(1,  j, k, irho)**gamma(1,  j, k))
-                   shockSensor(ie, j, k) = p(ie, j, k)/(w(ie, j, k, irho)**gamma(ie, j, k))
-                   shockSensor(ib, j, k) = p(ib, j, k)/(w(ib, j, k, irho)**gamma(ib, j, k))
-                enddo
-             enddo
-
-             do k=2, kl
-                do i=2, il
-                   shockSensor(i, 0,  k) = p(i, 0,  k)/(w(i, 0,  k, irho)**gamma(i, 0,  k))
-                   shockSensor(i, 1,  k) = p(i, 1,  k)/(w(i, 1,  k, irho)**gamma(i, 1,  k))
-                   shockSensor(i, je, k) = p(i, je, k)/(w(i, je, k, irho)**gamma(i, je, k))
-                   shockSensor(i, jb, k) = p(i, jb, k)/(w(i, jb, k, irho)**gamma(i, jb, k))
-                enddo
-             enddo
-          end if
-       end do
-    end do
-  end subroutine referenceShockSensor
-
-  subroutine setFDReference(level)
-    use constants
-    use blockPointers, only : nDom, flowDoms, ib, jb, kb, il, jl, kl, &
-         shockSensor, w, volRef, dw
-    use inputPhysics, only : liftDirection, velDirFreeStream
-    use flowVarRefState, only : nw, nwf
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : EChk, setPointers, getDirAngle
-    use residuals, only : initRes_block
-    use masterRoutines, only : block_res_state
-
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType) :: level
-    ! Working Parameters
-    integer(kind=intType) :: i, j, k, l, nn, sps
-
-    ! Compute the reference values for doing jacobian with FD
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-
-          call setPointers(nn, level, sps)
-          call block_res_state(nn, sps)
-          ! Set the values
-          do l=1, nw
-             do k=0, kb
-                do j=0, jb
-                   do i=0, ib
-                      flowdoms(nn, 1, sps)%wtmp(i,j,k,l)  = w(i, j, k, l)
-                      flowdoms(nn, 1, sps)%dwtmp(i, j, k, l) = dw(i, j, k, l)
-                   end do
+                    do k = 2, kl
+                        do i = 2, il
+                            shockSensor(i, 0, k) = p(i, 0, k) / (w(i, 0, k, irho)**gamma(i, 0, k))
+                            shockSensor(i, 1, k) = p(i, 1, k) / (w(i, 1, k, irho)**gamma(i, 1, k))
+                            shockSensor(i, je, k) = p(i, je, k) / (w(i, je, k, irho)**gamma(i, je, k))
+                            shockSensor(i, jb, k) = p(i, jb, k) / (w(i, jb, k, irho)**gamma(i, jb, k))
+                        end do
+                    end do
+                end if
+            end do
+        end do
+    end subroutine referenceShockSensor
+
+    subroutine setFDReference(level)
+        use constants
+        use blockPointers, only: nDom, flowDoms, ib, jb, kb, il, jl, kl, &
+                                 shockSensor, w, volRef, dw
+        use inputPhysics, only: liftDirection, velDirFreeStream
+        use flowVarRefState, only: nw, nwf
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: EChk, setPointers, getDirAngle
+        use residuals, only: initRes_block
+        use masterRoutines, only: block_res_state
+
+        implicit none
+
+        ! Input Parameters
+        integer(kind=intType) :: level
+        ! Working Parameters
+        integer(kind=intType) :: i, j, k, l, nn, sps
+
+        ! Compute the reference values for doing jacobian with FD
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+
+                call setPointers(nn, level, sps)
+                call block_res_state(nn, sps)
+                ! Set the values
+                do l = 1, nw
+                    do k = 0, kb
+                        do j = 0, jb
+                            do i = 0, ib
+                                flowdoms(nn, 1, sps)%wtmp(i, j, k, l) = w(i, j, k, l)
+                                flowdoms(nn, 1, sps)%dwtmp(i, j, k, l) = dw(i, j, k, l)
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
 
-          call initRes_block(1, nwf, nn, sps)
+                call initRes_block(1, nwf, nn, sps)
 
-          ! Note: we have to divide by the volume for dwtmp2 since
-          ! normally, dw would have been mulitpiled by 1/Vol in block_res_state
+                ! Note: we have to divide by the volume for dwtmp2 since
+                ! normally, dw would have been mulitpiled by 1/Vol in block_res_state
 
-          do l=1, nw
-             do k=0, kb
-                do j=0, jb
-                   do i=0, ib
-                      flowdoms(nn, 1, sps)%dwtmp2(i, j, k, l) = &
-                           dw(i, j, k, l)/volRef(i, j, k)
-                   end do
+                do l = 1, nw
+                    do k = 0, kb
+                        do j = 0, jb
+                            do i = 0, ib
+                                flowdoms(nn, 1, sps)%dwtmp2(i, j, k, l) = &
+                                    dw(i, j, k, l) / volRef(i, j, k)
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-  end subroutine setFDReference
-
-  subroutine resetFDReference(level)
-
-    use constants
-    use blockPointers, only : nDom, flowDoms, ib, jb, kb, w, dw
-    use flowVarRefState, only : nw, nwf
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType) :: level
-
-    ! Working Parameters
-    integer(kind=intType) :: i, j, k, l, nn, sps
-    real(kind=realType) :: sepSensor, Cavitation, axisMoment
-
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
-          ! Reset w and dw
-          do l=1, nw
-             do k=0, kb
-                do j=0, jb
-                   do i=0, ib
-                      w(i, j, k, l) = flowdoms(nn, 1, sps)%wtmp(i, j, k, l)
-                      dw(i, j, k, l) = flowdoms(nn, 1, sps)%dwtmp(i, j, k, l)
-                   end do
+            end do
+        end do
+    end subroutine setFDReference
+
+    subroutine resetFDReference(level)
+
+        use constants
+        use blockPointers, only: nDom, flowDoms, ib, jb, kb, w, dw
+        use flowVarRefState, only: nw, nwf
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers
+        implicit none
+
+        ! Input Parameters
+        integer(kind=intType) :: level
+
+        ! Working Parameters
+        integer(kind=intType) :: i, j, k, l, nn, sps
+        real(kind=realType) :: sepSensor, Cavitation, axisMoment
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
+                ! Reset w and dw
+                do l = 1, nw
+                    do k = 0, kb
+                        do j = 0, jb
+                            do i = 0, ib
+                                w(i, j, k, l) = flowdoms(nn, 1, sps)%wtmp(i, j, k, l)
+                                dw(i, j, k, l) = flowdoms(nn, 1, sps)%dwtmp(i, j, k, l)
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-  end subroutine resetFDReference
-
-  subroutine setDiffSizes
-    !
-    !       This routine set the sizes for the pointers that will be
-    !       used in the forward debug mode and reverse mode AD.
-    !
-    use constants
-    use blockPointers, only : flowDoms, ib, jb, kb, ie, je, ke, ib, jb, ke, &
-         nBocos, nViscBocos, nDom
-    use flowVarRefState, only : nw, nwf, nt1, nt2
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputPhysics, only : equations
-    use diffSizes
-    implicit none
-
-    ! local variables
-    integer(kind=intType) :: nLevels
-
-    ! Compute nlevels
-    nLevels = ubound(flowDoms, 2)
-
-
-    ! Set the size for dynamic pointers to zero for debug purpose
-    ! bcdata%norm
-    ISIZE1OFDrfDrfbcdata_norm = 0
-    ISIZE2OFDrfDrfbcdata_norm = 0
-    ISIZE3OFDrfDrfbcdata_norm = 0
-
-    ! bcdata%rface
-    ISIZE1OFDrfDrfbcdata_rface = 0
-    ISIZE2OFDrfDrfbcdata_rface = 0
-
-    ! bcdata%m
-    ISIZE1OFDrfDrfbcdata_m = 0
-    ISIZE2OFDrfDrfbcdata_m = 0
-    ISIZE3OFDrfDrfbcdata_m = 0
-
-    ! bcdata%fp
-    ISIZE1OFDrfDrfbcdata_fp = 0
-    ISIZE2OFDrfDrfbcdata_fp = 0
-    ISIZE3OFDrfDrfbcdata_fp = 0
-
-    ! bcdata%fv
-    ISIZE1OFDrfDrfbcdata_fv = 0
-    ISIZE2OFDrfDrfbcdata_fv = 0
-    ISIZE3OFDrfDrfbcdata_fv = 0
-
-    ! bcdata%fp
-    ISIZE1OFDrfDrfbcdata_oarea = 0
-    ISIZE2OFDrfDrfbcdata_oarea = 0
-    ISIZE3OFDrfDrfbcdata_oarea = 0
-
-    ! sepSensor
-    ISIZE1OFDrfDrfbcdata_sepSensor = 0
-    ISIZE2OFDrfDrfbcdata_sepSensor = 0
-
-    ! Cavitation
-    ISIZE1OFDrfDrfbcdata_Cavitation = 0
-    ISIZE2OFDrfDrfbcdata_Cavitation = 0
-
-    ! AxisMoment
-    ISIZE1OFDrfDrfbcdata_axisMoment = 0
-    ISIZE2OFDrfDrfbcdata_axisMoment = 0
-
-    ! viscsubface%tau
-    ISIZE1OFDrfDrfviscsubface_tau = 0
-    ISIZE2OFDrfDrfviscsubface_tau = 0
-    ISIZE3OFDrfDrfviscsubface_tau = 0
-
-    ! prod
-    ISIZE1OFDrfprod = 0
-    ISIZE2OFDrfprod = 0
-    ISIZE3OFDrfprod = 0
-
-    ! vort
-    ISIZE1OFDrfvort = 0
-    ISIZE2OFDrfvort = 0
-    ISIZE3OFDrfvort = 0
-
-    ! dvt
-    ISIZE1OFDrfdvt = 0
-    ISIZE2OFDrfdvt = 0
-    ISIZE3OFDrfdvt = 0
-    ISIZE4OFDrfdvt = 0
-
-    ! vol
-    ISIZE1OFDrfvol = 0
-    ISIZE2OFDrfvol = 0
-    ISIZE3OFDrfvol = 0
-
-    ! rho,etot,u,v,w,p,k
-    ISIZE1OFrho = 0
-    ISIZE1OFetot = 0
-    ISIZE1OFu = 0
-    ISIZE1OFv = 0
-    ISIZE1OFw = 0
-    ISIZE1OFp = 0
-    ISIZE1OFk = 0
-
-    ! Du1, Du2, Du3
-    ISIZE1OFDu1 = 0
-    ISIZE1OFDu2 = 0
-    ISIZE1OFDu3 = 0
-
-    ! Left, Right, Flux
-    ISIZE1OFLeft = 0
-    ISIZE1OFRight = 0
-    ISIZE1OFFlux = 0
-
-    ! bcdata
-    ISIZE1OFDrfbcdata = 0!nbocos
-
-    ! s
-    ISIZE1OFDrfs = 0!ie
-    ISIZE2OFDrfs = je
-    ISIZE3OFDrfs = ke
-    ISIZE4OFDrfs = 3
-
-    ! sfacei
-    ISIZE3OFDrfsfaceI = 0!ie + 1
-    ISIZE2OFDrfsfaceI = je
-    ISIZE1OFDrfsfaceI = ke
-
-    ! sfacej
-    ISIZE3OFDrfsfaceJ = 0!ie
-    ISIZE2OFDrfsfaceJ = je + 1
-    ISIZE1OFDrfsfaceJ = ke
-
-    ! sfacek
-    ISIZE3OFDrfsfaceK = 0!ie
-    ISIZE2OFDrfsfaceK = je
-    ISIZE1OFDrfsfaceK = ke + 1
-
-    ! Define size for the pointers
-    ! flowdoms
-    ISIZE1OFDrfflowdoms = nDom
-    ISIZE2OFDrfflowdoms = nLevels
-    ISIZE3OFDrfflowdoms = nTimeIntervalsSpectral
-
-    !viscSubface
-    ISIZE1OFDrfviscsubface = nViscBocos
-    ISIZE1OFDrfflowdoms_bcdata = nBocos
-
-    ! x
-    ISIZE4OFDrfx = 3
-    ISIZE1OFDrfx = ie + 1
-    ISIZE2OFDrfx = je + 1
-    ISIZE3OFDrfx = ke + 1
-
-    ! flowdoms_x
-    ISIZE4OFDRFFLOWDOMS_X = 3
-    ISIZE1OFDRFFLOWDOMS_X = ie + 1
-    ISIZE2OFDRFFLOWDOMS_X = je + 1
-    ISIZE3OFDRFFLOWDOMS_X = ke + 1
-
-    if ( equations == RANSEquations ) then
-       ! rev
-       ISIZE1OFDrfrev = ib + 1
-       ISIZE2OFDrfrev = jb + 1
-       ISIZE3OFDrfrev = kb + 1
-    else
-       ! rev
-       ISIZE1OFDrfrev = 0
-       ISIZE2OFDrfrev = 0
-       ISIZE3OFDrfrev = 0
-    end if
-
-    ! rlv
-    ISIZE1OFDrfrlv = ib + 1
-    ISIZE2OFDrfrlv = jb + 1
-    ISIZE3OFDrfrlv = kb + 1
-
-    ! w
-    ISIZE4OFDrfw = nw
-    ISIZE1OFDrfw = ib + 1
-    ISIZE2OFDrfw = jb + 1
-    ISIZE3OFDrfw = kb + 1
-
-    ! flowdoms_x
-    ISIZE4OFDRFFLOWDOMS_W = nw
-    ISIZE1OFDRFFLOWDOMS_W = ib + 1
-    ISIZE2OFDRFFLOWDOMS_W = jb + 1
-    ISIZE3OFDRFFLOWDOMS_W = kb + 1
-
-    ! flowdoms_dw
-    ISIZE4OFDRFFLOWDOMS_dw = nw
-    ISIZE1OFDRFFLOWDOMS_dw = ib + 1
-    ISIZE2OFDRFFLOWDOMS_dw = jb + 1
-    ISIZE3OFDRFFLOWDOMS_dw = kb + 1
-
-    ! flowdoms_vol
-    ISIZE1OFDRFFLOWDOMS_vol = ib + 1
-    ISIZE2OFDRFFLOWDOMS_vol = jb + 1
-    ISIZE3OFDRFFLOWDOMS_vol = kb + 1
-
-    ! fw
-    ISIZE4OFDrffw = nwf
-    ISIZE1OFDrffw = ib + 1
-    ISIZE2OFDrffw = jb + 1
-    ISIZE3OFDrffw = kb + 1
-
-    ! dw
-    ISIZE4OFDrfdw = nw
-    ISIZE1OFDrfdw = ib + 1
-    ISIZE2OFDrfdw = jb + 1
-    ISIZE3OFDrfdw = kb + 1
-
-    ! p
-    ISIZE1OFDrfp = ib + 1
-    ISIZE2OFDrfp = jb + 1
-    ISIZE3OFDrfp = kb + 1
-
-    ! gamma
-    ISIZE1OFDrfgamma = ib + 1
-    ISIZE2OFDrfgamma = jb + 1
-    ISIZE3OFDrfgamma = kb + 1
-
-    ! dtl
-    ISIZE1OFDrfdtl = ie
-    ISIZE2OFDrfdtl = je
-    ISIZE3OFDrfdtl = ke
-
-    ! radI
-    ISIZE1OFDrfradI = ie
-    ISIZE2OFDrfradI = je
-    ISIZE3OFDrfradI = ke
-
-    ! radJ
-    ISIZE1OFDrfradJ = ie
-    ISIZE2OFDrfradJ = je
-    ISIZE3OFDrfradJ = ke
-
-    ! radK
-    ISIZE1OFDrfradK = ie
-    ISIZE2OFDrfradK = je
-    ISIZE3OFDrfradK = ke
-
-    ! sI
-    ISIZE1OFDRFsI = ie + 1
-    ISIZE2OFDRFsI = je
-    ISIZE3OFDRFsI = ke
-    ISIZE4OFDRFsI = 3
-
-    ! sJ
-    ISIZE1OFDRFsJ = ie
-    ISIZE2OFDRFsJ = je + 1
-    ISIZE3OFDRFsJ = ke
-    ISIZE4OFDRFsJ = 3
-
-    ! sK
-    ISIZE1OFDRFsK = ie
-    ISIZE2OFDRFsK = je
-    ISIZE3OFDRFsK = ke + 1
-    ISIZE4OFDRFsK = 3
-
-    !bmti1
-    ISIZE1OFDrfbmti1 = je
-    ISIZE2OFDrfbmti1 = ke
-    ISIZE3OFDrfbmti1 = nt2 - nt1 + 1
-    ISIZE4OFDrfbmti1 = nt2 - nt1 + 1
-
-    !bmti2
-    ISIZE1OFDrfbmti2 = je
-    ISIZE2OFDrfbmti2 = ke
-    ISIZE3OFDrfbmti2 = nt2 - nt1 + 1
-    ISIZE4OFDrfbmti2 = nt2 - nt1 + 1
-
-    !bmtj1
-    ISIZE1OFDrfbmtj1 = ie
-    ISIZE2OFDrfbmtj1 = ke
-    ISIZE3OFDrfbmtj1 = nt2 - nt1 + 1
-    ISIZE4OFDrfbmtj1 = nt2 - nt1 + 1
-
-    !bmtj2
-    ISIZE1OFDrfbmtj2 = ie
-    ISIZE2OFDrfbmtj2 = ke
-    ISIZE3OFDrfbmtj2 = nt2 - nt1 + 1
-    ISIZE4OFDrfbmtj2 = nt2 - nt1 + 1
-
-    !bmtk1
-    ISIZE1OFDrfbmtk1 = ie
-    ISIZE2OFDrfbmtk1 = je
-    ISIZE3OFDrfbmtk1 = nt2 - nt1 + 1
-    ISIZE4OFDrfbmtk1 = nt2 - nt1 + 1
-
-    !bmtk2
-    ISIZE1OFDrfbmtk2 = ie
-    ISIZE2OFDrfbmtk2 = je
-    ISIZE3OFDrfbmtk2 = nt2 - nt1 + 1
-    ISIZE4OFDrfbmtk2 = nt2 - nt1 + 1
-
-    !bvti1
-    ISIZE1OFDrfbvti1 = je
-    ISIZE2OFDrfbvti1 = ke
-    ISIZE3OFDrfbvti1 = nt2 - nt1 + 1
-
-    !bvti2
-    ISIZE1OFDrfbvti2 = je
-    ISIZE2OFDrfbvti2 = ke
-    ISIZE3OFDrfbvti2 = nt2 - nt1 + 1
-    !bvti1
-    ISIZE1OFDrfbvti1 = je
-    ISIZE2OFDrfbvti1 = ke
-    ISIZE3OFDrfbvti1 = nt2 - nt1 + 1
-
-    !bvti2
-    ISIZE1OFDrfbvti2 = je
-    ISIZE2OFDrfbvti2 = ke
-    ISIZE3OFDrfbvti2 = nt2 - nt1 + 1
-
-    !bvtk1
-    ISIZE1OFDrfbvtk1 = ie
-    ISIZE2OFDrfbvtk1 = je
-    ISIZE3OFDrfbvtk1 = nt2 - nt1 + 1
-
-    !bvtk2
-    ISIZE1OFDrfbvtk2 = ie
-    ISIZE2OFDrfbvtk2 = je
-    ISIZE3OFDrfbvtk2 = nt2 - nt1 + 1
-
-  end subroutine setDiffSizes
+            end do
+        end do
+    end subroutine resetFDReference
+
+    subroutine setDiffSizes
+        !
+        !       This routine set the sizes for the pointers that will be
+        !       used in the forward debug mode and reverse mode AD.
+        !
+        use constants
+        use blockPointers, only: flowDoms, ib, jb, kb, ie, je, ke, ib, jb, ke, &
+                                 nBocos, nViscBocos, nDom
+        use flowVarRefState, only: nw, nwf, nt1, nt2
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputPhysics, only: equations
+        use diffSizes
+        implicit none
+
+        ! local variables
+        integer(kind=intType) :: nLevels
+
+        ! Compute nlevels
+        nLevels = ubound(flowDoms, 2)
+
+        ! Set the size for dynamic pointers to zero for debug purpose
+        ! bcdata%norm
+        ISIZE1OFDrfDrfbcdata_norm = 0
+        ISIZE2OFDrfDrfbcdata_norm = 0
+        ISIZE3OFDrfDrfbcdata_norm = 0
+
+        ! bcdata%rface
+        ISIZE1OFDrfDrfbcdata_rface = 0
+        ISIZE2OFDrfDrfbcdata_rface = 0
+
+        ! bcdata%m
+        ISIZE1OFDrfDrfbcdata_m = 0
+        ISIZE2OFDrfDrfbcdata_m = 0
+        ISIZE3OFDrfDrfbcdata_m = 0
+
+        ! bcdata%fp
+        ISIZE1OFDrfDrfbcdata_fp = 0
+        ISIZE2OFDrfDrfbcdata_fp = 0
+        ISIZE3OFDrfDrfbcdata_fp = 0
+
+        ! bcdata%fv
+        ISIZE1OFDrfDrfbcdata_fv = 0
+        ISIZE2OFDrfDrfbcdata_fv = 0
+        ISIZE3OFDrfDrfbcdata_fv = 0
+
+        ! bcdata%fp
+        ISIZE1OFDrfDrfbcdata_oarea = 0
+        ISIZE2OFDrfDrfbcdata_oarea = 0
+        ISIZE3OFDrfDrfbcdata_oarea = 0
+
+        ! sepSensor
+        ISIZE1OFDrfDrfbcdata_sepSensor = 0
+        ISIZE2OFDrfDrfbcdata_sepSensor = 0
+
+        ! Cavitation
+        ISIZE1OFDrfDrfbcdata_Cavitation = 0
+        ISIZE2OFDrfDrfbcdata_Cavitation = 0
+
+        ! AxisMoment
+        ISIZE1OFDrfDrfbcdata_axisMoment = 0
+        ISIZE2OFDrfDrfbcdata_axisMoment = 0
+
+        ! viscsubface%tau
+        ISIZE1OFDrfDrfviscsubface_tau = 0
+        ISIZE2OFDrfDrfviscsubface_tau = 0
+        ISIZE3OFDrfDrfviscsubface_tau = 0
+
+        ! prod
+        ISIZE1OFDrfprod = 0
+        ISIZE2OFDrfprod = 0
+        ISIZE3OFDrfprod = 0
+
+        ! vort
+        ISIZE1OFDrfvort = 0
+        ISIZE2OFDrfvort = 0
+        ISIZE3OFDrfvort = 0
+
+        ! dvt
+        ISIZE1OFDrfdvt = 0
+        ISIZE2OFDrfdvt = 0
+        ISIZE3OFDrfdvt = 0
+        ISIZE4OFDrfdvt = 0
+
+        ! vol
+        ISIZE1OFDrfvol = 0
+        ISIZE2OFDrfvol = 0
+        ISIZE3OFDrfvol = 0
+
+        ! rho,etot,u,v,w,p,k
+        ISIZE1OFrho = 0
+        ISIZE1OFetot = 0
+        ISIZE1OFu = 0
+        ISIZE1OFv = 0
+        ISIZE1OFw = 0
+        ISIZE1OFp = 0
+        ISIZE1OFk = 0
+
+        ! Du1, Du2, Du3
+        ISIZE1OFDu1 = 0
+        ISIZE1OFDu2 = 0
+        ISIZE1OFDu3 = 0
+
+        ! Left, Right, Flux
+        ISIZE1OFLeft = 0
+        ISIZE1OFRight = 0
+        ISIZE1OFFlux = 0
+
+        ! bcdata
+        ISIZE1OFDrfbcdata = 0!nbocos
+
+        ! s
+        ISIZE1OFDrfs = 0!ie
+        ISIZE2OFDrfs = je
+        ISIZE3OFDrfs = ke
+        ISIZE4OFDrfs = 3
+
+        ! sfacei
+        ISIZE3OFDrfsfaceI = 0!ie + 1
+        ISIZE2OFDrfsfaceI = je
+        ISIZE1OFDrfsfaceI = ke
+
+        ! sfacej
+        ISIZE3OFDrfsfaceJ = 0!ie
+        ISIZE2OFDrfsfaceJ = je + 1
+        ISIZE1OFDrfsfaceJ = ke
+
+        ! sfacek
+        ISIZE3OFDrfsfaceK = 0!ie
+        ISIZE2OFDrfsfaceK = je
+        ISIZE1OFDrfsfaceK = ke + 1
+
+        ! Define size for the pointers
+        ! flowdoms
+        ISIZE1OFDrfflowdoms = nDom
+        ISIZE2OFDrfflowdoms = nLevels
+        ISIZE3OFDrfflowdoms = nTimeIntervalsSpectral
+
+        !viscSubface
+        ISIZE1OFDrfviscsubface = nViscBocos
+        ISIZE1OFDrfflowdoms_bcdata = nBocos
+
+        ! x
+        ISIZE4OFDrfx = 3
+        ISIZE1OFDrfx = ie + 1
+        ISIZE2OFDrfx = je + 1
+        ISIZE3OFDrfx = ke + 1
+
+        ! flowdoms_x
+        ISIZE4OFDRFFLOWDOMS_X = 3
+        ISIZE1OFDRFFLOWDOMS_X = ie + 1
+        ISIZE2OFDRFFLOWDOMS_X = je + 1
+        ISIZE3OFDRFFLOWDOMS_X = ke + 1
+
+        if (equations == RANSEquations) then
+            ! rev
+            ISIZE1OFDrfrev = ib + 1
+            ISIZE2OFDrfrev = jb + 1
+            ISIZE3OFDrfrev = kb + 1
+        else
+            ! rev
+            ISIZE1OFDrfrev = 0
+            ISIZE2OFDrfrev = 0
+            ISIZE3OFDrfrev = 0
+        end if
+
+        ! rlv
+        ISIZE1OFDrfrlv = ib + 1
+        ISIZE2OFDrfrlv = jb + 1
+        ISIZE3OFDrfrlv = kb + 1
+
+        ! w
+        ISIZE4OFDrfw = nw
+        ISIZE1OFDrfw = ib + 1
+        ISIZE2OFDrfw = jb + 1
+        ISIZE3OFDrfw = kb + 1
+
+        ! flowdoms_x
+        ISIZE4OFDRFFLOWDOMS_W = nw
+        ISIZE1OFDRFFLOWDOMS_W = ib + 1
+        ISIZE2OFDRFFLOWDOMS_W = jb + 1
+        ISIZE3OFDRFFLOWDOMS_W = kb + 1
+
+        ! flowdoms_dw
+        ISIZE4OFDRFFLOWDOMS_dw = nw
+        ISIZE1OFDRFFLOWDOMS_dw = ib + 1
+        ISIZE2OFDRFFLOWDOMS_dw = jb + 1
+        ISIZE3OFDRFFLOWDOMS_dw = kb + 1
+
+        ! flowdoms_vol
+        ISIZE1OFDRFFLOWDOMS_vol = ib + 1
+        ISIZE2OFDRFFLOWDOMS_vol = jb + 1
+        ISIZE3OFDRFFLOWDOMS_vol = kb + 1
+
+        ! fw
+        ISIZE4OFDrffw = nwf
+        ISIZE1OFDrffw = ib + 1
+        ISIZE2OFDrffw = jb + 1
+        ISIZE3OFDrffw = kb + 1
+
+        ! dw
+        ISIZE4OFDrfdw = nw
+        ISIZE1OFDrfdw = ib + 1
+        ISIZE2OFDrfdw = jb + 1
+        ISIZE3OFDrfdw = kb + 1
+
+        ! p
+        ISIZE1OFDrfp = ib + 1
+        ISIZE2OFDrfp = jb + 1
+        ISIZE3OFDrfp = kb + 1
+
+        ! gamma
+        ISIZE1OFDrfgamma = ib + 1
+        ISIZE2OFDrfgamma = jb + 1
+        ISIZE3OFDrfgamma = kb + 1
+
+        ! dtl
+        ISIZE1OFDrfdtl = ie
+        ISIZE2OFDrfdtl = je
+        ISIZE3OFDrfdtl = ke
+
+        ! radI
+        ISIZE1OFDrfradI = ie
+        ISIZE2OFDrfradI = je
+        ISIZE3OFDrfradI = ke
+
+        ! radJ
+        ISIZE1OFDrfradJ = ie
+        ISIZE2OFDrfradJ = je
+        ISIZE3OFDrfradJ = ke
+
+        ! radK
+        ISIZE1OFDrfradK = ie
+        ISIZE2OFDrfradK = je
+        ISIZE3OFDrfradK = ke
+
+        ! sI
+        ISIZE1OFDRFsI = ie + 1
+        ISIZE2OFDRFsI = je
+        ISIZE3OFDRFsI = ke
+        ISIZE4OFDRFsI = 3
+
+        ! sJ
+        ISIZE1OFDRFsJ = ie
+        ISIZE2OFDRFsJ = je + 1
+        ISIZE3OFDRFsJ = ke
+        ISIZE4OFDRFsJ = 3
+
+        ! sK
+        ISIZE1OFDRFsK = ie
+        ISIZE2OFDRFsK = je
+        ISIZE3OFDRFsK = ke + 1
+        ISIZE4OFDRFsK = 3
+
+        !bmti1
+        ISIZE1OFDrfbmti1 = je
+        ISIZE2OFDrfbmti1 = ke
+        ISIZE3OFDrfbmti1 = nt2 - nt1 + 1
+        ISIZE4OFDrfbmti1 = nt2 - nt1 + 1
+
+        !bmti2
+        ISIZE1OFDrfbmti2 = je
+        ISIZE2OFDrfbmti2 = ke
+        ISIZE3OFDrfbmti2 = nt2 - nt1 + 1
+        ISIZE4OFDrfbmti2 = nt2 - nt1 + 1
+
+        !bmtj1
+        ISIZE1OFDrfbmtj1 = ie
+        ISIZE2OFDrfbmtj1 = ke
+        ISIZE3OFDrfbmtj1 = nt2 - nt1 + 1
+        ISIZE4OFDrfbmtj1 = nt2 - nt1 + 1
+
+        !bmtj2
+        ISIZE1OFDrfbmtj2 = ie
+        ISIZE2OFDrfbmtj2 = ke
+        ISIZE3OFDrfbmtj2 = nt2 - nt1 + 1
+        ISIZE4OFDrfbmtj2 = nt2 - nt1 + 1
+
+        !bmtk1
+        ISIZE1OFDrfbmtk1 = ie
+        ISIZE2OFDrfbmtk1 = je
+        ISIZE3OFDrfbmtk1 = nt2 - nt1 + 1
+        ISIZE4OFDrfbmtk1 = nt2 - nt1 + 1
+
+        !bmtk2
+        ISIZE1OFDrfbmtk2 = ie
+        ISIZE2OFDrfbmtk2 = je
+        ISIZE3OFDrfbmtk2 = nt2 - nt1 + 1
+        ISIZE4OFDrfbmtk2 = nt2 - nt1 + 1
+
+        !bvti1
+        ISIZE1OFDrfbvti1 = je
+        ISIZE2OFDrfbvti1 = ke
+        ISIZE3OFDrfbvti1 = nt2 - nt1 + 1
+
+        !bvti2
+        ISIZE1OFDrfbvti2 = je
+        ISIZE2OFDrfbvti2 = ke
+        ISIZE3OFDrfbvti2 = nt2 - nt1 + 1
+        !bvti1
+        ISIZE1OFDrfbvti1 = je
+        ISIZE2OFDrfbvti1 = ke
+        ISIZE3OFDrfbvti1 = nt2 - nt1 + 1
+
+        !bvti2
+        ISIZE1OFDrfbvti2 = je
+        ISIZE2OFDrfbvti2 = ke
+        ISIZE3OFDrfbvti2 = nt2 - nt1 + 1
+
+        !bvtk1
+        ISIZE1OFDrfbvtk1 = ie
+        ISIZE2OFDrfbvtk1 = je
+        ISIZE3OFDrfbvtk1 = nt2 - nt1 + 1
+
+        !bvtk2
+        ISIZE1OFDrfbvtk2 = ie
+        ISIZE2OFDrfbvtk2 = je
+        ISIZE3OFDrfbvtk2 = nt2 - nt1 + 1
+
+    end subroutine setDiffSizes
 
 end module adjointUtils
diff --git a/src/adjoint/fortranPC.F90 b/src/adjoint/fortranPC.F90
index d68d5b845..e82512e94 100644
--- a/src/adjoint/fortranPC.F90
+++ b/src/adjoint/fortranPC.F90
@@ -5,952 +5,945 @@
 ! not production ready at all.
 
 module fortranPC
-  contains
-  subroutine allocPCMem(level)
+contains
+    subroutine allocPCMem(level)
 
-    ! This routine allocates memory for the fortran-based PC. It is
-    ! currently not used anywhere, but it become useful in the future.
+        ! This routine allocates memory for the fortran-based PC. It is
+        ! currently not used anywhere, but it become useful in the future.
 
-    use constants
-    use blockPointers, only : nDom, nx, ny, nz, il, jl, kl, ie, je, ke, flowDoms
-    use inputTimeSpectral, onlY : nTimeIntervalsSpectral
-    use flowVarRefState, only : nw
-    use utils, only : EChk, setPointers
-    implicit none
+        use constants
+        use blockPointers, only: nDom, nx, ny, nz, il, jl, kl, ie, je, ke, flowDoms
+        use inputTimeSpectral, onlY: nTimeIntervalsSpectral
+        use flowVarRefState, only: nw
+        use utils, only: EChk, setPointers
+        implicit none
 
-    integer(kind=intType), intent(in) :: level
-    integer(kind=intType) :: nn, sps
+        integer(kind=intType), intent(in) :: level
+        integer(kind=intType) :: nn, sps
 
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
 
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
+                if (.not. associated(flowDoms(nn, level, sps)%PCMat)) then
 
-          if (.not. associated(flowDoms(nn, level, sps)%PCMat)) then
+                    allocate (flowDoms(nn, level, sps)%PCMat(nw, 7 * nw, 2:il, 2:jl, 2:kl))
 
-             allocate(flowDoms(nn, level, sps)%PCMat(nw, 7*nw, 2:il, 2:jl, 2:kl))
+                    ! I direciton data
+                    allocate (flowDoms(nn, level, sps)%i_D_Fact(nw, nx * nw, 2:jl, 2:kl))
+                    allocate (flowDoms(nn, level, sps)%i_L_Fact(nw, (nx - 1) * nw, 2:jl, 2:kl))
+                    allocate (flowDoms(nn, level, sps)%i_U_Fact(nw, (nx - 1) * nw, 2:jl, 2:kl))
+                    allocate (flowDoms(nn, level, sps)%i_U2_Fact(nw, (nx - 2) * nw, 2:jl, 2:kl))
 
-             ! I direciton data
-             allocate(flowDoms(nn, level, sps)%i_D_Fact(nw, nx*nw, 2:jl, 2:kl))
-             allocate(flowDoms(nn, level, sps)%i_L_Fact(nw, (nx-1)*nw, 2:jl, 2:kl))
-             allocate(flowDoms(nn, level, sps)%i_U_Fact(nw, (nx-1)*nw, 2:jl, 2:kl))
-             allocate(flowDoms(nn, level, sps)%i_U2_Fact(nw, (nx-2)*nw, 2:jl, 2:kl))
+                    ! J direction data
+                    allocate (flowDoms(nn, level, sps)%j_D_Fact(nw, ny * nw, 2:il, 2:kl))
+                    allocate (flowDoms(nn, level, sps)%j_L_Fact(nw, (ny - 1) * nw, 2:il, 2:kl))
+                    allocate (flowDoms(nn, level, sps)%j_U_Fact(nw, (ny - 1) * nw, 2:il, 2:kl))
+                    allocate (flowDoms(nn, level, sps)%j_U2_Fact(nw, (ny - 2) * nw, 2:il, 2:kl))
 
-             ! J direction data
-             allocate(flowDoms(nn, level, sps)%j_D_Fact(nw, ny*nw, 2:il, 2:kl))
-             allocate(flowDoms(nn, level, sps)%j_L_Fact(nw, (ny-1)*nw, 2:il, 2:kl))
-             allocate(flowDoms(nn, level, sps)%j_U_Fact(nw, (ny-1)*nw, 2:il, 2:kl))
-             allocate(flowDoms(nn, level, sps)%j_U2_Fact(nw, (ny-2)*nw, 2:il, 2:kl))
+                    ! K direciton data
+                    allocate (flowDoms(nn, level, sps)%k_D_Fact(nw, nz * nw, 2:il, 2:jl))
+                    allocate (flowDoms(nn, level, sps)%k_L_Fact(nw, (nz - 1) * nw, 2:il, 2:jl))
+                    allocate (flowDoms(nn, level, sps)%k_U_Fact(nw, (nz - 1) * nw, 2:il, 2:jl))
+                    allocate (flowDoms(nn, level, sps)%k_U2_Fact(nw, (nz - 2) * nw, 2:il, 2:jl))
 
-             ! K direciton data
-             allocate(flowDoms(nn, level, sps)%k_D_Fact(nw, nz*nw, 2:il, 2:jl))
-             allocate(flowDoms(nn, level, sps)%k_L_Fact(nw, (nz-1)*nw, 2:il, 2:jl))
-             allocate(flowDoms(nn, level, sps)%k_U_Fact(nw, (nz-1)*nw, 2:il, 2:jl))
-             allocate(flowDoms(nn, level, sps)%k_U2_Fact(nw, (nz-2)*nw, 2:il, 2:jl))
+                    ! iPIV arrays
+                    allocate (flowDoms(nn, level, sps)%i_ipiv(nw, nx, 2:jl, 2:kl))
+                    allocate (flowDoms(nn, level, sps)%j_ipiv(nw, ny, 2:il, 2:kl))
+                    allocate (flowDoms(nn, level, sps)%k_ipiv(nw, nz, 2:il, 2:jl))
 
-             ! iPIV arrays
-             allocate(flowDoms(nn, level, sps)%i_ipiv(nw, nx, 2:jl, 2:kl))
-             allocate(flowDoms(nn, level, sps)%j_ipiv(nw, ny, 2:il, 2:kl))
-             allocate(flowDoms(nn, level, sps)%k_ipiv(nw, nz, 2:il, 2:jl))
-
-             ! Vectors
-             allocate(flowDoms(nn, level, sps)%pcVec1(nw, 1:ie, 1:je, 1:ke))
-             allocate(flowDoms(nn, level, sps)%pcVec2(nw, 1:ie, 1:je, 1:ke))
-
-          end if
-       end do
-    end do
-  end subroutine allocPCMem
+                    ! Vectors
+                    allocate (flowDoms(nn, level, sps)%pcVec1(nw, 1:ie, 1:je, 1:ke))
+                    allocate (flowDoms(nn, level, sps)%pcVec2(nw, 1:ie, 1:je, 1:ke))
 
+                end if
+            end do
+        end do
+    end subroutine allocPCMem
 
-  subroutine setupPCMatrix(useAD,  useTranspose, frozenTurb, level)
+    subroutine setupPCMatrix(useAD, useTranspose, frozenTurb, level)
 #ifndef USE_NO_PETSC
 
-    ! This routine generates a fortran form of the PCmatrix. It is
-    ! currently not used anywhere, but it become useful in the future.
-    use block, only : flowDomsd, flowDoms
-    use blockPointers
-    use inputDiscretization
-    use inputTimeSpectral
-    use inputPhysics
-    use iteration
-    use flowVarRefState
-    use inputAdjoint
-    use stencils
-    use diffSizes
-    use communication
-    use adjointVars
-    use turbMod
-    use utils, only : setPointers, EChk, getDirAngle, setPointers_d
-    use haloExchange, only : whalo2
-    use masterRoutines, only : block_res_state
-    use adjointUtils, only : zeroADSeeds
+        ! This routine generates a fortran form of the PCmatrix. It is
+        ! currently not used anywhere, but it become useful in the future.
+        use block, only: flowDomsd, flowDoms
+        use blockPointers
+        use inputDiscretization
+        use inputTimeSpectral
+        use inputPhysics
+        use iteration
+        use flowVarRefState
+        use inputAdjoint
+        use stencils
+        use diffSizes
+        use communication
+        use adjointVars
+        use turbMod
+        use utils, only: setPointers, EChk, getDirAngle, setPointers_d
+        use haloExchange, only: whalo2
+        use masterRoutines, only: block_res_state
+        use adjointUtils, only: zeroADSeeds
 #ifndef USE_COMPLEX
-    use masterRoutines, only : block_res_state_d
+        use masterRoutines, only: block_res_state_d
 #endif
-    implicit none
+        implicit none
 
-    ! Input Variables
-    logical, intent(in) :: useAD,  useTranspose, frozenTurb
-    integer(kind=intType), intent(in) :: level
+        ! Input Variables
+        logical, intent(in) :: useAD, useTranspose, frozenTurb
+        integer(kind=intType), intent(in) :: level
 
-    ! Local variables.
-    integer(kind=intType) :: ierr, nn, sps, sps2, i, j, k, l, ll, ii, jj, kk
-    integer(kind=intType) :: nColor, iColor, jColor, irow, icol, fmDim, frow
-    integer(kind=intType) :: nTransfer, nState, tmp, icount
-    integer(kind=intType) :: n_stencil, i_stencil, ind1, orderturbsave
-    integer(kind=intType), dimension(:, :), pointer :: stencil
-    real(kind=realType) :: delta_x, one_over_dx
-    real(kind=realType) :: delta_x_turb, one_over_dx_turb
+        ! Local variables.
+        integer(kind=intType) :: ierr, nn, sps, sps2, i, j, k, l, ll, ii, jj, kk
+        integer(kind=intType) :: nColor, iColor, jColor, irow, icol, fmDim, frow
+        integer(kind=intType) :: nTransfer, nState, tmp, icount
+        integer(kind=intType) :: n_stencil, i_stencil, ind1, orderturbsave
+        integer(kind=intType), dimension(:, :), pointer :: stencil
+        real(kind=realType) :: delta_x, one_over_dx
+        real(kind=realType) :: delta_x_turb, one_over_dx_turb
 
 #ifdef USE_COMPLEX
-    complex(kind=realType), dimension(:,:), allocatable :: blk
+        complex(kind=realType), dimension(:, :), allocatable :: blk
 #else
-    real(kind=realType), dimension(:,:), allocatable :: blk
+        real(kind=realType), dimension(:, :), allocatable :: blk
 #endif
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, mm, colInd
-    logical :: resetToRANS,  splitMat
-    real :: val
-
-    ! Setup number of state variable based on turbulence assumption
-    if ( frozenTurb ) then
-       nState = nwf
-    else
-       nState = nw
-    endif
-
-    ! Generic block to use while setting values
-    allocate(blk(nState, nState))
-
-    ! Exchange data and call the residual to make sure its up to date
-    ! withe current w
-    call whalo2(1_intType, 1_intType, nw, .True., .True., .True.)
-
-    ! This routine will not use the extra variables to block_res or the
-    ! extra outputs, so we must zero them here
-    alphad = zero
-    betad  = zero
-    machd  = zero
-    machGridd = zero
-    machcoefd = zero
-    pointRefd  = zero
-    lengthRefd = zero
-    pinfdimd = zero
-    tinfdimd = zero
-    rhoinfdimd = zero
-
-
-    ! Set a pointer to the correct set of stencil depending on if we are
-    ! using the first order stencil or the full jacobian
-
-    stencil => euler_pc_stencil
-    n_stencil = N_euler_pc
-
-    ! Very important to use only Second-Order dissipation for PC
-    lumpedDiss=.True.
-    ! also use first order advection terms for turbulence
-    orderturbsave = orderturb
-    orderturb = firstOrder
-
-    ! Need to trick the residual evalution to use coupled (mean flow and
-    ! turbulent) together.
-
-    ! If we want to do the matrix on a coarser level, we must first
-    ! restrict the fine grid solutions, since it is possible the
-    ! NKsolver was used an the coarse grid solutions are (very!) out of
-    ! date.
-
-    ! Assembling matrix on coarser levels is not entirely implemented yet.
-    currentLevel = level
-    groundLevel = level
-
-    ! Set delta_x
-    delta_x = 1e-9_realType
-    one_over_dx = one/delta_x
-
-
-    delta_x_turb = 1e-14
-    one_over_dx_turb = one/delta_x_turb
-
-    rkStage = 0
-
-    ! Determine if we want to use frozenTurbulent Adjoint
-    resetToRANS = .False.
-    if (frozenTurb .and. equations == RANSEquations) then
-       equations = NSEquations
-       resetToRANS = .True.
-    end if
-
-    ! Allocate the additional memory we need for doing forward mode AD
-    !  derivatives and copy any required reference values:
-
-    call allocPCMem(level)
-    if (.not. derivVarsAllocated .and. useAD) then
-       call alloc_derivative_values(level)
-    end if
-
-    ! For AD the initial seeds must all be zeroed.
-    if (useAD) then
-       do nn=1, nDom
-          do sps=1, nTimeIntervalsSpectral
-             call setPointers(nn, level, sps)
-             call zeroADSeeds(nn, level, sps)
-          end do
-       end do
-    end if
-
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, level, sps)
-
-          ! Allocate temporary space only needed while assembling.
-          allocate(flowDoms(nn, 1, sps)%dw_deriv(2:il, 2:jl, 2:kl, 1:nw, 1:nw), stat=ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          allocate(flowDoms(nn, 1, sps)%wtmp(0:ib,0:jb,0:kb,1:nw),stat=ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          allocate(flowDoms(nn, 1, sps)%dwtmp(0:ib,0:jb,0:kb,1:nw),stat=ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          allocate(flowDoms(nn, 1, sps)%dwtmp2(0:ib,0:jb,0:kb,1:nw),stat=ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Only need 1 set of colors on the first sps instance.
-          if (sps == 1) then
-             allocate(flowDoms(nn, 1, 1)%color(0:ib, 0:jb, 0:kb), stat=ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-          end if
-
-          ! Zero the matrix
-          pcMat = zero
-
-       end do
-    end do
-
-    ! For the PC we don't linearize the shock sensor so it must be
-    ! computed here.
-    call referenceShockSensor
-
-    ! For FD, the initial reference values must be computed and stored.
-    if (.not. useAD) then
-       call setFDReference(level)
-    end if
-
-    ! Master Domain Loop
-    domainLoopAD: do nn=1, nDom
-
-       ! Set pointers to the first timeInstance...just to getSizes
-       call setPointers(nn, level, 1)
-       ! Set unknown sizes in diffSizes for AD routine
-       ISIZE1OFDrfbcdata = nBocos
-       ISIZE1OFDrfviscsubface = nViscBocos
-
-       call setup_PC_coloring(nn, level,  nColor) ! Euler Colorings
-
-       spectralLoop: do sps=1, nTimeIntervalsSpectral
-          ! Set pointers and (possibly derivative pointers)
-          if (useAD) then
-             call setPointers_d(nn, level, sps)
-          else
-             call setPointers(nn, level, sps)
-          end if
-
-          ! Do Coloring and perturb states
-          colorLoop: do iColor = 1, nColor
-             do sps2 = 1, nTimeIntervalsSpectral
-                flowDoms(nn, 1, sps2)%dw_deriv(:, :, :, :, :) = zero
-             end do
-
-             ! Master State Loop
-             stateLoop: do l=1, nState
-
-                ! Reset All States and possibe AD seeds
-                do sps2 = 1, nTimeIntervalsSpectral
-                   if (.not. useAD) then
-                      do ll=1,nw
-                         do k=0,kb
-                            do j=0,jb
-                               do i=0,ib
-                                  flowDoms(nn, level, sps2)%w(i,j,k,ll) =  flowDoms(nn, 1, sps2)%wtmp(i,j,k,ll)
-                               end do
-                            end do
-                         end do
-                      end do
-                   end if
-
-                   if (useAD) then
-                      flowdomsd(nn, 1, sps2)%w = zero ! This is actually w seed
-                   end if
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, mm, colInd
+        logical :: resetToRANS, splitMat
+        real :: val
+
+        ! Setup number of state variable based on turbulence assumption
+        if (frozenTurb) then
+            nState = nwf
+        else
+            nState = nw
+        end if
+
+        ! Generic block to use while setting values
+        allocate (blk(nState, nState))
+
+        ! Exchange data and call the residual to make sure its up to date
+        ! withe current w
+        call whalo2(1_intType, 1_intType, nw, .True., .True., .True.)
+
+        ! This routine will not use the extra variables to block_res or the
+        ! extra outputs, so we must zero them here
+        alphad = zero
+        betad = zero
+        machd = zero
+        machGridd = zero
+        machcoefd = zero
+        pointRefd = zero
+        lengthRefd = zero
+        pinfdimd = zero
+        tinfdimd = zero
+        rhoinfdimd = zero
+
+        ! Set a pointer to the correct set of stencil depending on if we are
+        ! using the first order stencil or the full jacobian
+
+        stencil => euler_pc_stencil
+        n_stencil = N_euler_pc
+
+        ! Very important to use only Second-Order dissipation for PC
+        lumpedDiss = .True.
+        ! also use first order advection terms for turbulence
+        orderturbsave = orderturb
+        orderturb = firstOrder
+
+        ! Need to trick the residual evalution to use coupled (mean flow and
+        ! turbulent) together.
+
+        ! If we want to do the matrix on a coarser level, we must first
+        ! restrict the fine grid solutions, since it is possible the
+        ! NKsolver was used an the coarse grid solutions are (very!) out of
+        ! date.
+
+        ! Assembling matrix on coarser levels is not entirely implemented yet.
+        currentLevel = level
+        groundLevel = level
+
+        ! Set delta_x
+        delta_x = 1e-9_realType
+        one_over_dx = one / delta_x
+
+        delta_x_turb = 1e-14
+        one_over_dx_turb = one / delta_x_turb
+
+        rkStage = 0
+
+        ! Determine if we want to use frozenTurbulent Adjoint
+        resetToRANS = .False.
+        if (frozenTurb .and. equations == RANSEquations) then
+            equations = NSEquations
+            resetToRANS = .True.
+        end if
+
+        ! Allocate the additional memory we need for doing forward mode AD
+        !  derivatives and copy any required reference values:
+
+        call allocPCMem(level)
+        if (.not. derivVarsAllocated .and. useAD) then
+            call alloc_derivative_values(level)
+        end if
+
+        ! For AD the initial seeds must all be zeroed.
+        if (useAD) then
+            do nn = 1, nDom
+                do sps = 1, nTimeIntervalsSpectral
+                    call setPointers(nn, level, sps)
+                    call zeroADSeeds(nn, level, sps)
                 end do
+            end do
+        end if
 
-                ! Peturb w or set AD Seed according to iColor
-                do k=0, kb
-                   do j=0, jb
-                      do i=0, ib
-                         if (flowdoms(nn, 1, 1)%color(i, j, k) == icolor) then
-                            if (useAD) then
-                               flowDomsd(nn, 1, sps)%w(i, j, k, l) = one
-                            else
-                               if (l <= nwf) then
-                                  w(i, j, k, l) = w(i, j, k, l) + delta_x
-                               else
-                                  w(i, j, k, l) = w(i, j, k, l) + delta_x_turb
-                               end if
-                            end if
-                         end if
-                      end do
-                   end do
-                end do
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, level, sps)
+
+                ! Allocate temporary space only needed while assembling.
+                allocate (flowDoms(nn, 1, sps)%dw_deriv(2:il, 2:jl, 2:kl, 1:nw, 1:nw), stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                allocate (flowDoms(nn, 1, sps)%wtmp(0:ib, 0:jb, 0:kb, 1:nw), stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                allocate (flowDoms(nn, 1, sps)%dwtmp(0:ib, 0:jb, 0:kb, 1:nw), stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                allocate (flowDoms(nn, 1, sps)%dwtmp2(0:ib, 0:jb, 0:kb, 1:nw), stat=ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Only need 1 set of colors on the first sps instance.
+                if (sps == 1) then
+                    allocate (flowDoms(nn, 1, 1)%color(0:ib, 0:jb, 0:kb), stat=ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end if
+
+                ! Zero the matrix
+                pcMat = zero
+
+            end do
+        end do
+
+        ! For the PC we don't linearize the shock sensor so it must be
+        ! computed here.
+        call referenceShockSensor
+
+        ! For FD, the initial reference values must be computed and stored.
+        if (.not. useAD) then
+            call setFDReference(level)
+        end if
+
+        ! Master Domain Loop
+        domainLoopAD: do nn = 1, nDom
+
+            ! Set pointers to the first timeInstance...just to getSizes
+            call setPointers(nn, level, 1)
+            ! Set unknown sizes in diffSizes for AD routine
+            ISIZE1OFDrfbcdata = nBocos
+            ISIZE1OFDrfviscsubface = nViscBocos
 
-                ! Run Block-based residual
+            call setup_PC_coloring(nn, level, nColor) ! Euler Colorings
+
+            spectralLoop: do sps = 1, nTimeIntervalsSpectral
+                ! Set pointers and (possibly derivative pointers)
                 if (useAD) then
-#ifndef USE_COMPLEX
-                   call block_res_state_d(nn, sps)
-#else
-                   print *, 'Forward AD routines are not complexified'
-                   stop
-#endif
+                    call setPointers_d(nn, level, sps)
                 else
-                   call block_res(nn, sps)
+                    call setPointers(nn, level, sps)
                 end if
 
-                ! Set the computed residual in dw_deriv. If using FD,
-                ! actually do the FD calculation if AD, just copy out dw
-                ! in flowdomsd
-
-                ! Compute/Copy all derivatives
-                do sps2 = 1, nTimeIntervalsSpectral
-                   do ll=1, nState
-                      do k=2, kl
-                         do j=2, jl
-                            do i=2, il
-                               if (useAD) then
-                                  flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
-                                       flowdomsd(nn, 1, sps2)%dw(i, j, k, ll)
-                               else
-                                  if (sps2 == sps) then
-                                     ! If the peturbation is on this
-                                     ! instance, we've computed the spatial
-                                     ! contribution so subtrace dwtmp
-
-                                     if (l <= nwf) then
-                                        flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
-                                             one_over_dx * &
-                                             (flowDoms(nn, 1, sps2)%dw(i, j, k, ll) - &
-                                             flowDoms(nn, 1, sps2)%dwtmp(i, j, k, ll))
-                                     else
-                                        flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
-                                             one_over_dx_turb * &
-                                             (flowDoms(nn, 1, sps2)%dw(i, j, k, ll) - &
-                                             flowDoms(nn, 1, sps2)%dwtmp(i, j, k, ll))
-                                     end if
-                                  else
-                                     ! If the peturbation is on an off
-                                     ! instance, only subtract dwtmp2
-                                     ! which is the reference result
-                                     ! after initres
-
-                                     flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
-                                          one_over_dx*(&
-                                          flowDoms(nn, 1, sps2)%dw(i, j, k, ll) - &
-                                          flowDoms(nn, 1, sps2)%dwtmp2(i, j, k, ll))
-                                  end if
-                               end if
-                            end do
-                         end do
-                      end do
-                   end do
-                end do
-             end do stateLoop
-
-             ! Set derivatives by block in "matrix" after we've peturbed
-             ! all states in "color"
-
-             kLoop: do k=0, kb
-                jLoop: do j=0, jb
-                   iLoop: do i=0, ib
-                      if (flowdoms(nn, 1, 1)%color(i, j, k) == icolor .and. globalCell(i,j,k)>=0) then
-
-                         ! Diagonal block is easy.
-                         if (onBlock(i, j, k)) then
-                            PCMat(:, 1:nw, i, j, k) = flowDoms(nn, 1, sps)%dw_deriv(i, j, k, 1:nstate, 1:nstate)
-                         end if
-
-
-                         if (onBlock(i-1, j, k)) then
-                            PCMat(:, 2*nw+1:3*nw, i-1, j, k) = flowDoms(nn, 1, sps)%dw_deriv(i-1, j, k, 1:nstate, 1:nstate)
-                         end if
-
-                         if (onBlock(i+1, j, k)) then
-                            PCMat(:, 1*nw+1:2*nw, i+1, j, k) = flowDoms(nn, 1, sps)%dw_deriv(i+1, j, k, 1:nstate, 1:nstate)
-                         end if
-
-                         if (onBlock(i, j-1, k)) then
-                            PCMat(:, 4*nw+1:5*nw, i, j-1, k) = flowDoms(nn, 1, sps)%dw_deriv(i, j-1, k, 1:nstate, 1:nstate)
-                         end if
-
-                         if (onBlock(i, j+1, k)) then
-                            PCMat(:, 3*nw+1:4*nw, i, j+1, k) = flowDoms(nn, 1, sps)%dw_deriv(i, j+1, k, 1:nstate, 1:nstate)
-                         end if
-
-                         if (onBlock(i, j, k-1)) then
-                            PCMat(:, 6*nw+1:7*nw, i, j, k-1) = flowDoms(nn, 1, sps)%dw_deriv(i, j, k-1, 1:nstate, 1:nstate)
-                         end if
-
-                         if (onBlock(i, j, k+1)) then
-                            PCMat(:, 5*nw+1:6*nw, i, j, k+1) = flowDoms(nn, 1, sps)%dw_deriv(i, j, k+1, 1:nstate, 1:nstate)
-                         end if
-                      end if
-                   end do iLoop
-                end do jLoop
-             end do kLoop
-          end do colorLoop
-       end do spectralLoop
-    end do domainLoopAD
-
-    ! Maybe we can do something useful while the communication happens?
-    ! Deallocate the temporary memory used in this routine.
-
-    ! Deallocate and reset values
-    if (.not. useAD) then
-       call resetFDReference(level)
-    end if
-
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          deallocate(&
-               flowDoms(nn, 1, sps)%dw_deriv, &
-               flowDoms(nn, 1, sps)%wTmp, &
-               flowDoms(nn, 1, sps)%dwTmp, &
-               flowDoms(nn, 1, sps)%dwTmp2)
-          if (sps == 1) then
-             deallocate(flowDoms(nn, 1, sps)%color)
-          end if
-       end do
-    end do
-
-    ! Return dissipation Parameters to normal -> VERY VERY IMPORTANT
-    lumpedDiss = .False.
-    orderturb = orderturbsave
-
-    ! Reset the correct equation parameters if we were useing the frozen
-    ! Turbulent
-    if (resetToRANS) then
-       equations = RANSEquations
-    end if
-
-    deallocate(blk)
-#endif
-  contains
-    function onBlock(i, j, k)
+                ! Do Coloring and perturb states
+                colorLoop: do iColor = 1, nColor
+                    do sps2 = 1, nTimeIntervalsSpectral
+                        flowDoms(nn, 1, sps2)%dw_deriv(:, :, :, :, :) = zero
+                    end do
+
+                    ! Master State Loop
+                    stateLoop: do l = 1, nState
+
+                        ! Reset All States and possibe AD seeds
+                        do sps2 = 1, nTimeIntervalsSpectral
+                            if (.not. useAD) then
+                                do ll = 1, nw
+                                    do k = 0, kb
+                                        do j = 0, jb
+                                            do i = 0, ib
+                                      flowDoms(nn, level, sps2)%w(i, j, k, ll) = flowDoms(nn, 1, sps2)%wtmp(i, j, k, ll)
+                                            end do
+                                        end do
+                                    end do
+                                end do
+                            end if
 
-      use precision
-      implicit none
+                            if (useAD) then
+                                flowdomsd(nn, 1, sps2)%w = zero ! This is actually w seed
+                            end if
+                        end do
+
+                        ! Peturb w or set AD Seed according to iColor
+                        do k = 0, kb
+                            do j = 0, jb
+                                do i = 0, ib
+                                    if (flowdoms(nn, 1, 1)%color(i, j, k) == icolor) then
+                                        if (useAD) then
+                                            flowDomsd(nn, 1, sps)%w(i, j, k, l) = one
+                                        else
+                                            if (l <= nwf) then
+                                                w(i, j, k, l) = w(i, j, k, l) + delta_x
+                                            else
+                                                w(i, j, k, l) = w(i, j, k, l) + delta_x_turb
+                                            end if
+                                        end if
+                                    end if
+                                end do
+                            end do
+                        end do
 
-      integer(kind=intType), intent(in) :: i, j, k
-      logical :: onBlock
+                        ! Run Block-based residual
+                        if (useAD) then
+#ifndef USE_COMPLEX
+                            call block_res_state_d(nn, sps)
+#else
+                            print *, 'Forward AD routines are not complexified'
+                            stop
+#endif
+                        else
+                            call block_res(nn, sps)
+                        end if
+
+                        ! Set the computed residual in dw_deriv. If using FD,
+                        ! actually do the FD calculation if AD, just copy out dw
+                        ! in flowdomsd
+
+                        ! Compute/Copy all derivatives
+                        do sps2 = 1, nTimeIntervalsSpectral
+                            do ll = 1, nState
+                                do k = 2, kl
+                                    do j = 2, jl
+                                        do i = 2, il
+                                            if (useAD) then
+                                                flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
+                                                    flowdomsd(nn, 1, sps2)%dw(i, j, k, ll)
+                                            else
+                                                if (sps2 == sps) then
+                                                    ! If the peturbation is on this
+                                                    ! instance, we've computed the spatial
+                                                    ! contribution so subtrace dwtmp
+
+                                                    if (l <= nwf) then
+                                                        flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
+                                                            one_over_dx * &
+                                                            (flowDoms(nn, 1, sps2)%dw(i, j, k, ll) - &
+                                                             flowDoms(nn, 1, sps2)%dwtmp(i, j, k, ll))
+                                                    else
+                                                        flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
+                                                            one_over_dx_turb * &
+                                                            (flowDoms(nn, 1, sps2)%dw(i, j, k, ll) - &
+                                                             flowDoms(nn, 1, sps2)%dwtmp(i, j, k, ll))
+                                                    end if
+                                                else
+                                                    ! If the peturbation is on an off
+                                                    ! instance, only subtract dwtmp2
+                                                    ! which is the reference result
+                                                    ! after initres
+
+                                                    flowDoms(nn, 1, sps2)%dw_deriv(i, j, k, ll, l) = &
+                                                        one_over_dx * ( &
+                                                        flowDoms(nn, 1, sps2)%dw(i, j, k, ll) - &
+                                                        flowDoms(nn, 1, sps2)%dwtmp2(i, j, k, ll))
+                                                end if
+                                            end if
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
+                    end do stateLoop
+
+                    ! Set derivatives by block in "matrix" after we've peturbed
+                    ! all states in "color"
+
+                    kLoop: do k = 0, kb
+                        jLoop: do j = 0, jb
+                            iLoop: do i = 0, ib
+                                if (flowdoms(nn, 1, 1)%color(i, j, k) == icolor .and. globalCell(i, j, k) >= 0) then
+
+                                    ! Diagonal block is easy.
+                                    if (onBlock(i, j, k)) then
+                                    PCMat(:, 1:nw, i, j, k) = flowDoms(nn, 1, sps)%dw_deriv(i, j, k, 1:nstate, 1:nstate)
+                                    end if
+
+                                    if (onBlock(i - 1, j, k)) then
+               PCMat(:, 2 * nw + 1:3 * nw, i - 1, j, k) = flowDoms(nn, 1, sps)%dw_deriv(i - 1, j, k, 1:nstate, 1:nstate)
+                                    end if
+
+                                    if (onBlock(i + 1, j, k)) then
+               PCMat(:, 1 * nw + 1:2 * nw, i + 1, j, k) = flowDoms(nn, 1, sps)%dw_deriv(i + 1, j, k, 1:nstate, 1:nstate)
+                                    end if
+
+                                    if (onBlock(i, j - 1, k)) then
+               PCMat(:, 4 * nw + 1:5 * nw, i, j - 1, k) = flowDoms(nn, 1, sps)%dw_deriv(i, j - 1, k, 1:nstate, 1:nstate)
+                                    end if
+
+                                    if (onBlock(i, j + 1, k)) then
+               PCMat(:, 3 * nw + 1:4 * nw, i, j + 1, k) = flowDoms(nn, 1, sps)%dw_deriv(i, j + 1, k, 1:nstate, 1:nstate)
+                                    end if
+
+                                    if (onBlock(i, j, k - 1)) then
+               PCMat(:, 6 * nw + 1:7 * nw, i, j, k - 1) = flowDoms(nn, 1, sps)%dw_deriv(i, j, k - 1, 1:nstate, 1:nstate)
+                                    end if
+
+                                    if (onBlock(i, j, k + 1)) then
+               PCMat(:, 5 * nw + 1:6 * nw, i, j, k + 1) = flowDoms(nn, 1, sps)%dw_deriv(i, j, k + 1, 1:nstate, 1:nstate)
+                                    end if
+                                end if
+                            end do iLoop
+                        end do jLoop
+                    end do kLoop
+                end do colorLoop
+            end do spectralLoop
+        end do domainLoopAD
+
+        ! Maybe we can do something useful while the communication happens?
+        ! Deallocate the temporary memory used in this routine.
+
+        ! Deallocate and reset values
+        if (.not. useAD) then
+            call resetFDReference(level)
+        end if
+
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                deallocate ( &
+                    flowDoms(nn, 1, sps)%dw_deriv, &
+                    flowDoms(nn, 1, sps)%wTmp, &
+                    flowDoms(nn, 1, sps)%dwTmp, &
+                    flowDoms(nn, 1, sps)%dwTmp2)
+                if (sps == 1) then
+                    deallocate (flowDoms(nn, 1, sps)%color)
+                end if
+            end do
+        end do
 
-      if (i >= 2 .and. i <= il .and. j >= 2 .and. j<= jl .and. k >= 2 .and. k <= kl) then
-         onBlock = .True.
-      else
-         onBlock = .False.
-      end if
+        ! Return dissipation Parameters to normal -> VERY VERY IMPORTANT
+        lumpedDiss = .False.
+        orderturb = orderturbsave
 
-    end function onBlock
+        ! Reset the correct equation parameters if we were useing the frozen
+        ! Turbulent
+        if (resetToRANS) then
+            equations = RANSEquations
+        end if
 
-    function getIndex(i, j, k)
+        deallocate (blk)
+#endif
+    contains
+        function onBlock(i, j, k)
 
-      use precision
-      implicit none
+            use precision
+            implicit none
 
-      integer(kind=intType), intent(in) :: i, j, k
-      integer(kind=intType) :: getIndex
-
-      getIndex = ((k-2)*nx*ny + (j-2)*nx + (i-2))*nw  + 1
+            integer(kind=intType), intent(in) :: i, j, k
+            logical :: onBlock
 
-    end function getIndex
-
-  end subroutine setupPCMatrix
-  subroutine testpc()
-
-    ! use constants
-    ! !use communication
-    ! !use adjointPETSc
-    ! use adjointVars
-    ! use flowvarrefstate
-    ! use inputtimespectral
-    ! use blockPointers
-    ! use utils, only : Echk
-    ! use adjointUtils, only : setupStateResidualMatrix
-    ! implicit none
+            if (i >= 2 .and. i <= il .and. j >= 2 .and. j <= jl .and. k >= 2 .and. k <= kl) then
+                onBlock = .True.
+            else
+                onBlock = .False.
+            end if
 
-    ! real(kind=alwaysRealType) , dimension(:), allocatable :: v1, v2
-    ! real(kind=realType) :: timeA, timeB, timeC, val
-    ! integer(kind=intType) :: nDimw, ierr, i, nn, sps, j, k, ii, jj, kk, info
-    ! logical :: useAD
-
-    ! useAD = .True.
-    ! call setupPCMatrix(useAD, .False., .False., 1)
-
-    ! ! Now create all the factors
-    ! call createPETscVars
-    ! call setupStateResidualMatrix(drdwpret, usead, .True. , .False., &
-    !      .False., .False., 1)
-
-    ! nDimW = nw * nCellsLocal(1_intType)*nTimeIntervalsSpectral
-    ! allocate(v1(nDimw), v2(nDimW))
-
-    ! ! Set random into V1
-    ! call random_number(v1)
-
-    ! ! Dump psi into psi_like1 and RHS into psi_like2
-    ! call VecPlaceArray(psi_like1, v1, ierr)
-    ! call EChk(ierr,__FILE__,__LINE__)
-
-    ! call VecPlaceArray(psi_like2, v2, ierr)
-    ! call EChk(ierr,__FILE__,__LINE__)
-
-    ! call mpi_barrier(adflow_comm_world, ierr)
-    ! timeA = mpi_wtime()
-    ! call matMult(drdwpret, psi_like1, psi_like2, ierr)
-    ! call EChk(ierr,__FILE__,__LINE__)
-    ! timeB =mpi_wtime()
-
-    ! print *,'Petsc Time:', myid, timeB-timeA
-    ! call vecNorm(psi_like2, NORM_2, val, ierr)
-    ! print *,'PETsc Norm:', val
-
-
-    ! call mpi_barrier(adflow_comm_world, ierr)
-    ! timeA = mpi_wtime()
-    ! call PCMatMult(drdwpret, psi_like1, psi_like2, ierr)
-    ! timeB = mpi_wtime()
-
-    ! print *,'My Time:', myid, timeB-timeA
-    ! call vecNorm(psi_like2, NORM_2, val, ierr)
-    ! print *,'My Norm:', val
-    ! call VecResetArray(psi_like1, ierr)
-    ! call VecResetArray(psi_like2, ierr)
-  end subroutine testpc
-
-  subroutine factorPCMatrix()
-
-    use communication
-    use adjointPETSc
-    use adjointVars
-    use flowvarrefstate
-    use inputtimespectral
-    use blockPointers
-    use utils, only : Echk, setPointers
-    implicit none
-
-    ! integer(kind=intType) :: ierr, i, nn, sps, j, k, ii, jj, kk, info, timeA, timeB
-
-    ! timeA = mpi_wtime()
-    ! do nn=1,nDom
-    !    do sps=1,nTimeIntervalsSpectral
-    !       call setPointers(nn, 1, sps)
-
-    !       ! ========= I-Lines ============
-    !       do k=2, kl
-    !          do j=2, jl
-
-    !             ! Copy data from PCMat
-    !             ii = 0
-    !             jj = 0
-    !             kk = 0
-    !             do i=2, il
-    !                i_D_fact(:, nw*ii+1:nw*(ii+1), j, k) = PCMat(:,      1:1*nw, i, j, k)
-    !                ii = ii + 1
-
-    !                if (i > 2) then
-    !                   i_L_fact(:, nw*jj+1:nw*(jj+1), j, k) = PCMat(:, 1*nw+1:2*nw, i, j, k)
-    !                   jj = jj + 1
-    !                end if
-
-    !                if (i < il) then
-    !                   i_U_fact(:, nw*kk+1:nw*(kk+1), j, k) = PCMat(:, 2*nw+1:3*nw, i, j, k)
-    !                   kk = kk + 1
-    !                end if
-
-    !             end do
-
-    !             ! ! Perform factorization
-    !             ! call dgeblttrf(nx, nw, i_D_fact(:, :, j, k), i_L_fact(:, :, j, k), &
-    !             !      i_U_fact(:, :, j, k), i_U2_fact(:, :, j, k), i_ipiv(:, :, j, k), info)
-
-    !          end do
-    !       end do
-
-    !       ! ========= J-Lines ============
-    !       do k=2, kl
-    !          do i=2, il
-
-    !             ! Copy data from PCMat
-    !             ii = 0
-    !             jj = 0
-    !             kk = 0
-    !             do j=2, jl
-    !                j_D_fact(:, nw*ii+1:nw*(ii+1), i, k) = PCMat(:,      1:1*nw, i, j, k)
-    !                ii = ii + 1
-
-    !                if (j > 2) then
-    !                   j_L_fact(:, nw*jj+1:nw*(jj+1), i, k) = PCMat(:, 3*nw+1:4*nw, i, j, k)
-    !                   jj = jj + 1
-    !                end if
-
-    !                if (j < jl) then
-    !                   j_U_fact(:, nw*kk+1:nw*(kk+1), i, k) = PCMat(:, 4*nw+1:5*nw, i, j, k)
-    !                   kk =kk + 1
-    !                end if
-    !             end do
-
-    !             ! ! Perform factorization
-    !             ! call dgeblttrf(ny, nw, j_D_fact(:, :, i, k), j_L_fact(:, :, i, k), &
-    !             !      j_U_fact(:, :, i, k), j_U2_fact(:, :, i, k), j_ipiv(:, :, i, k), info)
-
-    !          end do
-    !       end do
-
-    !       ! ========= k-Lines ============
-    !       do j=2, jl
-    !          do i=2, il
-
-    !             ! Copy data from PCMat
-    !             ii = 0
-    !             jj = 0
-    !             kk = 0
-    !             do k=2, kl
-    !                k_D_fact(:, nw*ii+1:nw*(ii+1), i, j) = PCMat(:,      1:1*nw, i, j, k)
-    !                ii = ii + 1
-
-    !                if (k > 2) then
-    !                   k_L_fact(:, nw*jj+1:nw*(jj+1), i, j) = PCMat(:, 5*nw+1:6*nw, i, j, k)
-    !                   jj = jj + 1
-    !                end if
-
-    !                if (k < kl) then
-    !                   k_U_fact(:, nw*kk+1:nw*(kk+1), i, j) = PCMat(:, 6*nw+1:7*nw, i, j, k)
-    !                   kk =kk + 1
-    !                end if
-    !             end do
-
-    !             ! ! Perform factorization
-    !             ! call dgeblttrf(nz, nw, k_D_fact(:, :, i, j), k_L_fact(:, :, i,j), &
-    !             !      k_U_fact(:, :, i, j), k_U2_fact(:, :, i, j), k_ipiv(:, :, i, j), info)
-
-    !          end do
-    !       end do
-    !    end do
-    ! end do
-
-  end subroutine factorPCMatrix
-
-  subroutine PCMatMult(A, vecX,  vecY, ierr)
-
-    ! PETSc user-defied call back function for computing the product of
-    ! PCMat with a vector.
-
-    use constants
-    use communication
-    use blockPointers
-    use iteration
-    use flowVarRefState
-    use inputAdjoint
-    use ADjointVars
-    use inputTimeSpectral
-    use utils, only : EChk, setPointers
+        end function onBlock
+
+        function getIndex(i, j, k)
+
+            use precision
+            implicit none
+
+            integer(kind=intType), intent(in) :: i, j, k
+            integer(kind=intType) :: getIndex
+
+            getIndex = ((k - 2) * nx * ny + (j - 2) * nx + (i - 2)) * nw + 1
+
+        end function getIndex
+
+    end subroutine setupPCMatrix
+    subroutine testpc()
+
+        ! use constants
+        ! !use communication
+        ! !use adjointPETSc
+        ! use adjointVars
+        ! use flowvarrefstate
+        ! use inputtimespectral
+        ! use blockPointers
+        ! use utils, only : Echk
+        ! use adjointUtils, only : setupStateResidualMatrix
+        ! implicit none
+
+        ! real(kind=alwaysRealType) , dimension(:), allocatable :: v1, v2
+        ! real(kind=realType) :: timeA, timeB, timeC, val
+        ! integer(kind=intType) :: nDimw, ierr, i, nn, sps, j, k, ii, jj, kk, info
+        ! logical :: useAD
+
+        ! useAD = .True.
+        ! call setupPCMatrix(useAD, .False., .False., 1)
+
+        ! ! Now create all the factors
+        ! call createPETscVars
+        ! call setupStateResidualMatrix(drdwpret, usead, .True. , .False., &
+        !      .False., .False., 1)
+
+        ! nDimW = nw * nCellsLocal(1_intType)*nTimeIntervalsSpectral
+        ! allocate(v1(nDimw), v2(nDimW))
+
+        ! ! Set random into V1
+        ! call random_number(v1)
+
+        ! ! Dump psi into psi_like1 and RHS into psi_like2
+        ! call VecPlaceArray(psi_like1, v1, ierr)
+        ! call EChk(ierr,__FILE__,__LINE__)
+
+        ! call VecPlaceArray(psi_like2, v2, ierr)
+        ! call EChk(ierr,__FILE__,__LINE__)
+
+        ! call mpi_barrier(adflow_comm_world, ierr)
+        ! timeA = mpi_wtime()
+        ! call matMult(drdwpret, psi_like1, psi_like2, ierr)
+        ! call EChk(ierr,__FILE__,__LINE__)
+        ! timeB =mpi_wtime()
+
+        ! print *,'Petsc Time:', myid, timeB-timeA
+        ! call vecNorm(psi_like2, NORM_2, val, ierr)
+        ! print *,'PETsc Norm:', val
+
+        ! call mpi_barrier(adflow_comm_world, ierr)
+        ! timeA = mpi_wtime()
+        ! call PCMatMult(drdwpret, psi_like1, psi_like2, ierr)
+        ! timeB = mpi_wtime()
+
+        ! print *,'My Time:', myid, timeB-timeA
+        ! call vecNorm(psi_like2, NORM_2, val, ierr)
+        ! print *,'My Norm:', val
+        ! call VecResetArray(psi_like1, ierr)
+        ! call VecResetArray(psi_like2, ierr)
+    end subroutine testpc
+
+    subroutine factorPCMatrix()
+
+        use communication
+        use adjointPETSc
+        use adjointVars
+        use flowvarrefstate
+        use inputtimespectral
+        use blockPointers
+        use utils, only: Echk, setPointers
+        implicit none
+
+        ! integer(kind=intType) :: ierr, i, nn, sps, j, k, ii, jj, kk, info, timeA, timeB
+
+        ! timeA = mpi_wtime()
+        ! do nn=1,nDom
+        !    do sps=1,nTimeIntervalsSpectral
+        !       call setPointers(nn, 1, sps)
+
+        !       ! ========= I-Lines ============
+        !       do k=2, kl
+        !          do j=2, jl
+
+        !             ! Copy data from PCMat
+        !             ii = 0
+        !             jj = 0
+        !             kk = 0
+        !             do i=2, il
+        !                i_D_fact(:, nw*ii+1:nw*(ii+1), j, k) = PCMat(:,      1:1*nw, i, j, k)
+        !                ii = ii + 1
+
+        !                if (i > 2) then
+        !                   i_L_fact(:, nw*jj+1:nw*(jj+1), j, k) = PCMat(:, 1*nw+1:2*nw, i, j, k)
+        !                   jj = jj + 1
+        !                end if
+
+        !                if (i < il) then
+        !                   i_U_fact(:, nw*kk+1:nw*(kk+1), j, k) = PCMat(:, 2*nw+1:3*nw, i, j, k)
+        !                   kk = kk + 1
+        !                end if
+
+        !             end do
+
+        !             ! ! Perform factorization
+        !             ! call dgeblttrf(nx, nw, i_D_fact(:, :, j, k), i_L_fact(:, :, j, k), &
+        !             !      i_U_fact(:, :, j, k), i_U2_fact(:, :, j, k), i_ipiv(:, :, j, k), info)
+
+        !          end do
+        !       end do
+
+        !       ! ========= J-Lines ============
+        !       do k=2, kl
+        !          do i=2, il
+
+        !             ! Copy data from PCMat
+        !             ii = 0
+        !             jj = 0
+        !             kk = 0
+        !             do j=2, jl
+        !                j_D_fact(:, nw*ii+1:nw*(ii+1), i, k) = PCMat(:,      1:1*nw, i, j, k)
+        !                ii = ii + 1
+
+        !                if (j > 2) then
+        !                   j_L_fact(:, nw*jj+1:nw*(jj+1), i, k) = PCMat(:, 3*nw+1:4*nw, i, j, k)
+        !                   jj = jj + 1
+        !                end if
+
+        !                if (j < jl) then
+        !                   j_U_fact(:, nw*kk+1:nw*(kk+1), i, k) = PCMat(:, 4*nw+1:5*nw, i, j, k)
+        !                   kk =kk + 1
+        !                end if
+        !             end do
+
+        !             ! ! Perform factorization
+        !             ! call dgeblttrf(ny, nw, j_D_fact(:, :, i, k), j_L_fact(:, :, i, k), &
+        !             !      j_U_fact(:, :, i, k), j_U2_fact(:, :, i, k), j_ipiv(:, :, i, k), info)
+
+        !          end do
+        !       end do
+
+        !       ! ========= k-Lines ============
+        !       do j=2, jl
+        !          do i=2, il
+
+        !             ! Copy data from PCMat
+        !             ii = 0
+        !             jj = 0
+        !             kk = 0
+        !             do k=2, kl
+        !                k_D_fact(:, nw*ii+1:nw*(ii+1), i, j) = PCMat(:,      1:1*nw, i, j, k)
+        !                ii = ii + 1
+
+        !                if (k > 2) then
+        !                   k_L_fact(:, nw*jj+1:nw*(jj+1), i, j) = PCMat(:, 5*nw+1:6*nw, i, j, k)
+        !                   jj = jj + 1
+        !                end if
+
+        !                if (k < kl) then
+        !                   k_U_fact(:, nw*kk+1:nw*(kk+1), i, j) = PCMat(:, 6*nw+1:7*nw, i, j, k)
+        !                   kk =kk + 1
+        !                end if
+        !             end do
+
+        !             ! ! Perform factorization
+        !             ! call dgeblttrf(nz, nw, k_D_fact(:, :, i, j), k_L_fact(:, :, i,j), &
+        !             !      k_U_fact(:, :, i, j), k_U2_fact(:, :, i, j), k_ipiv(:, :, i, j), info)
+
+        !          end do
+        !       end do
+        !    end do
+        ! end do
+
+    end subroutine factorPCMatrix
+
+    subroutine PCMatMult(A, vecX, vecY, ierr)
+
+        ! PETSc user-defied call back function for computing the product of
+        ! PCMat with a vector.
+
+        use constants
+        use communication
+        use blockPointers
+        use iteration
+        use flowVarRefState
+        use inputAdjoint
+        use ADjointVars
+        use inputTimeSpectral
+        use utils, only: EChk, setPointers
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! PETSc Arguments
-    Mat   A
-    Vec   vecX, vecY
-    integer(kind=intType) ::ierr, i, j, k, l, nn, sps, ii
-    real(kind=realType) :: sum1, sum2, sum3, sum4, sum5
-    real(kind=realType) :: x1, x2, x3, x4, x5
-    real(kind=realType), pointer :: yPtr(:)
-    real(kind=realType) ::  RHS(nw*7)
-    ! We first have to distribute xPtr into the halo cells for the local
-    ! products.
-    print *,'calling pcmatmult'
-    call setPCVec(vecX)
-
-    call VecGetArrayF90(vecY, yPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Now we can compute the acutal matrix vector product.
-    ii = 1
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, 1, sps)
-
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-
-                   ! Fill up the RHS
-
-                   rhs(0*nw+1:1*nw) = PCVec1(:, i  , j, k)
-                   rhs(1*nw+1:2*nw) = PCVec1(:, i-1, j, k)
-                   rhs(2*nw+1:3*nw) = PCVec1(:, i+1, j, k)
-                   rhs(3*nw+1:4*nw) = PCVec1(:, i, j-1, k)
-                   rhs(4*nw+1:5*nw) = PCVec1(:, i, j+1, k)
-                   rhs(5*nw+1:6*nw) = PCVec1(:, i, j, k-1)
-                   rhs(6*nw+1:7*nw) = PCVec1(:, i, j, k+1)
-
-                   ! Call blass mat-vec. We can dump the result directly
-                   ! into yPtr. There doesn't appear to be much
-                   ! difference between the fortran matmul and blas for
-                   ! these sized operations.
-
-                   !yPtr(ii:ii+nw) = matmul(PCMat(:, :, i, j, k), rhs)
-                   call DGEMV('n', nw, nw*7, one, PCMat(:, :, i, j, k), nw, rhs, 1, zero, yPtr(ii), 1)
-
-                   ii = ii + nw
-
+        use petsc
+        implicit none
+
+        ! PETSc Arguments
+        Mat A
+        Vec vecX, vecY
+        integer(kind=intType) :: ierr, i, j, k, l, nn, sps, ii
+        real(kind=realType) :: sum1, sum2, sum3, sum4, sum5
+        real(kind=realType) :: x1, x2, x3, x4, x5
+        real(kind=realType), pointer :: yPtr(:)
+        real(kind=realType) :: RHS(nw * 7)
+        ! We first have to distribute xPtr into the halo cells for the local
+        ! products.
+        print *, 'calling pcmatmult'
+        call setPCVec(vecX)
+
+        call VecGetArrayF90(vecY, yPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Now we can compute the acutal matrix vector product.
+        ii = 1
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1, sps)
+
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+
+                            ! Fill up the RHS
+
+                            rhs(0 * nw + 1:1 * nw) = PCVec1(:, i, j, k)
+                            rhs(1 * nw + 1:2 * nw) = PCVec1(:, i - 1, j, k)
+                            rhs(2 * nw + 1:3 * nw) = PCVec1(:, i + 1, j, k)
+                            rhs(3 * nw + 1:4 * nw) = PCVec1(:, i, j - 1, k)
+                            rhs(4 * nw + 1:5 * nw) = PCVec1(:, i, j + 1, k)
+                            rhs(5 * nw + 1:6 * nw) = PCVec1(:, i, j, k - 1)
+                            rhs(6 * nw + 1:7 * nw) = PCVec1(:, i, j, k + 1)
+
+                            ! Call blass mat-vec. We can dump the result directly
+                            ! into yPtr. There doesn't appear to be much
+                            ! difference between the fortran matmul and blas for
+                            ! these sized operations.
+
+                            !yPtr(ii:ii+nw) = matmul(PCMat(:, :, i, j, k), rhs)
+                            call DGEMV('n', nw, nw * 7, one, PCMat(:, :, i, j, k), nw, rhs, 1, zero, yPtr(ii), 1)
+
+                            ii = ii + nw
+
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
+            end do
+        end do
 
-    call VecRestoreArrayF90(vecY, yPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecRestoreArrayF90(vecY, yPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ierr = 0
+        ierr = 0
 
-  end subroutine PCMatMult
+    end subroutine PCMatMult
 
-  subroutine myShellPCApply(pc, vecX, vecY, ierr)
+    subroutine myShellPCApply(pc, vecX, vecY, ierr)
 
-    use precision
-    use blockPointers
-    use inputTimeSpectral
-    use flowvarrefstate
-    use communication
+        use precision
+        use blockPointers
+        use inputTimeSpectral
+        use flowvarrefstate
+        use communication
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! PETSc Arguments
-    PC   pc
-    Vec   vecX, vecY
-    integer(kind=intType) :: ierr, info, i, j, k, ii, nn, sps, ipiv(nw)
-    real(kind=realType) :: blk(nw, nw), rhs(nw)
-    real(kind=realType), pointer :: yPtr(:)
+        use petsc
+        implicit none
 
+        ! PETSc Arguments
+        PC pc
+        Vec vecX, vecY
+        integer(kind=intType) :: ierr, info, i, j, k, ii, nn, sps, ipiv(nw)
+        real(kind=realType) :: blk(nw, nw), rhs(nw)
+        real(kind=realType), pointer :: yPtr(:)
 
-    ! First copy X to Y. This way we will continually transform vecY
-    ! into the preconditioned vector.
-    call vecCopy(vecX, vecY, ierr)
+        ! First copy X to Y. This way we will continually transform vecY
+        ! into the preconditioned vector.
+        call vecCopy(vecX, vecY, ierr)
 
-    call VecGetArrayF90(vecY, yPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecGetArrayF90(vecY, yPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Do something useful here....
+        ! Do something useful here....
 
-    call VecRestoreArrayF90(vecY, yPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        call VecRestoreArrayF90(vecY, yPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine myShellPCApply
+    end subroutine myShellPCApply
 
-  subroutine setPCVec(vecX)
+    subroutine setPCVec(vecX)
 
-    use constants
-    use communication
-    use blockPointers
-    use inputTimeSpectral
-    use flowVarRefState
-    use utils, only : EChk, setPointers
+        use constants
+        use communication
+        use blockPointers
+        use inputTimeSpectral
+        use flowVarRefState
+        use utils, only: EChk, setPointers
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
+        use petsc
+        implicit none
 
-    ! PETSc Arguments
-    Vec   vecX
-    ! integer(kind=intType) ::ierr, i, j, k, l, nn, mm, sps, nVar, size, index
-    ! integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2, ii, jj, procID
-    ! real(kind=realType), pointer :: xPtr(:)
-    ! type(commType) :: commPattern
-    ! type(internalCommType) :: internal
-    ! integer, dimension(mpi_status_size) :: mpiStatus
+        ! PETSc Arguments
+        Vec vecX
+        ! integer(kind=intType) ::ierr, i, j, k, l, nn, mm, sps, nVar, size, index
+        ! integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2, ii, jj, procID
+        ! real(kind=realType), pointer :: xPtr(:)
+        ! type(commType) :: commPattern
+        ! type(internalCommType) :: internal
+        ! integer, dimension(mpi_status_size) :: mpiStatus
 
-    ! call VecGetArrayReadF90(vecX, xPtr, ierr)
-    ! call EChk(ierr,__FILE__,__LINE__)
+        ! call VecGetArrayReadF90(vecX, xPtr, ierr)
+        ! call EChk(ierr,__FILE__,__LINE__)
 
-    ! ! First set all the owned cells...this is basically just a straight
-    ! ! copy in order
-    ! ii = 0
-    ! do nn=1, nDom
-    !    do sps=1, nTimeIntervalsSpectral
-    !       call setPointers(nn, 1, sps)
-    !       do k=2, kl
-    !          do j=2, jl
-    !             do i=2, il
-    !                do l=1, nw
-    !                   ii = ii + 1
-    !                   flowDoms(nn, 1, sps)%PCVec1(l, i, j, k) = xPtr(ii)
-    !                end do
-    !             end do
-    !          end do
-    !       end do
-    !    end do
-    ! end do
+        ! ! First set all the owned cells...this is basically just a straight
+        ! ! copy in order
+        ! ii = 0
+        ! do nn=1, nDom
+        !    do sps=1, nTimeIntervalsSpectral
+        !       call setPointers(nn, 1, sps)
+        !       do k=2, kl
+        !          do j=2, jl
+        !             do i=2, il
+        !                do l=1, nw
+        !                   ii = ii + 1
+        !                   flowDoms(nn, 1, sps)%PCVec1(l, i, j, k) = xPtr(ii)
+        !                end do
+        !             end do
+        !          end do
+        !       end do
+        !    end do
+        ! end do
 
-    ! ! Done with the vecX.
-    ! call VecRestorearrayReadF90(vecX, xPtr, ierr)
-    ! call EChk(ierr,__FILE__,__LINE__)
+        ! ! Done with the vecX.
+        ! call VecRestorearrayReadF90(vecX, xPtr, ierr)
+        ! call EChk(ierr,__FILE__,__LINE__)
 
-    ! ! Now we do a custom halo exchange. We can use the same pattern as
-    ! ! whlo, but the ordering of the unknowns is different so we do our
-    ! ! own.
+        ! ! Now we do a custom halo exchange. We can use the same pattern as
+        ! ! whlo, but the ordering of the unknowns is different so we do our
+        ! ! own.
 
-    ! nVar = nw
-    ! internal = internalCell_1st(1)
-    ! commPattern = commPatternCell_1st(1)
+        ! nVar = nw
+        ! internal = internalCell_1st(1)
+        ! commPattern = commPatternCell_1st(1)
 
-    ! spectralModes: do mm=1,nTimeIntervalsSpectral
+        ! spectralModes: do mm=1,nTimeIntervalsSpectral
 
-    !    ! Send the variables. The data is first copied into
-    !    ! the send buffer after which the buffer is sent asap.
+        !    ! Send the variables. The data is first copied into
+        !    ! the send buffer after which the buffer is sent asap.
 
-    !    ii = 1
-    !    sends: do i=1,commPattern%nProcSend
+        !    ii = 1
+        !    sends: do i=1,commPattern%nProcSend
 
-    !       ! Store the processor id and the size of the message
-    !       ! a bit easier.
+        !       ! Store the processor id and the size of the message
+        !       ! a bit easier.
 
-    !       procID = commPattern%sendProc(i)
-    !       size    = nVar*commPattern%nsend(i)
+        !       procID = commPattern%sendProc(i)
+        !       size    = nVar*commPattern%nsend(i)
 
-    !       ! Copy the data in the correct part of the send buffer.
+        !       ! Copy the data in the correct part of the send buffer.
 
-    !       jj = ii
-    !       do j=1,commPattern%nsend(i)
+        !       jj = ii
+        !       do j=1,commPattern%nsend(i)
 
-    !          ! Store the block id and the indices of the donor
-    !          ! a bit easier.
+        !          ! Store the block id and the indices of the donor
+        !          ! a bit easier.
 
-    !          d1 = commPattern%sendList(i)%block(j)
-    !          i1 = commPattern%sendList(i)%indices(j,1)
-    !          j1 = commPattern%sendList(i)%indices(j,2)
-    !          k1 = commPattern%sendList(i)%indices(j,3)
+        !          d1 = commPattern%sendList(i)%block(j)
+        !          i1 = commPattern%sendList(i)%indices(j,1)
+        !          j1 = commPattern%sendList(i)%indices(j,2)
+        !          k1 = commPattern%sendList(i)%indices(j,3)
 
-    !          ! Copy the given range of the working variables for
-    !          ! this cell in the buffer. Update the counter jj.
+        !          ! Copy the given range of the working variables for
+        !          ! this cell in the buffer. Update the counter jj.
 
-    !          do k=1, nw
-    !             sendBuffer(jj) = flowDoms(d1,1,mm)%PCVec1(k, i1, j1, k1)
-    !             jj = jj + 1
-    !          enddo
+        !          do k=1, nw
+        !             sendBuffer(jj) = flowDoms(d1,1,mm)%PCVec1(k, i1, j1, k1)
+        !             jj = jj + 1
+        !          enddo
 
-    !       enddo
+        !       enddo
 
-    !       ! Send the data.
+        !       ! Send the data.
 
-    !       call mpi_isend(sendBuffer(ii), size, adflow_real, procID,  &
-    !            procID, ADflow_comm_world, sendRequests(i), &
-    !            ierr)
+        !       call mpi_isend(sendBuffer(ii), size, adflow_real, procID,  &
+        !            procID, ADflow_comm_world, sendRequests(i), &
+        !            ierr)
 
-    !       ! Set ii to jj for the next processor.
+        !       ! Set ii to jj for the next processor.
 
-    !       ii = jj
+        !       ii = jj
 
-    !    enddo sends
+        !    enddo sends
 
-    !    ! Post the nonblocking receives.
+        !    ! Post the nonblocking receives.
 
-    !    ii = 1
-    !    receives: do i=1,commPattern%nProcRecv
+        !    ii = 1
+        !    receives: do i=1,commPattern%nProcRecv
 
-    !       ! Store the processor id and the size of the message
-    !       ! a bit easier.
+        !       ! Store the processor id and the size of the message
+        !       ! a bit easier.
 
-    !       procID = commPattern%recvProc(i)
-    !       size    = nVar*commPattern%nrecv(i)
+        !       procID = commPattern%recvProc(i)
+        !       size    = nVar*commPattern%nrecv(i)
 
-    !       ! Post the receive.
+        !       ! Post the receive.
 
-    !       call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
-    !            myID, ADflow_comm_world, recvRequests(i), ierr)
+        !       call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
+        !            myID, ADflow_comm_world, recvRequests(i), ierr)
 
-    !       ! And update ii.
+        !       ! And update ii.
 
-    !       ii = ii + size
+        !       ii = ii + size
 
-    !    enddo receives
+        !    enddo receives
 
-    !    ! Copy the local data.
+        !    ! Copy the local data.
 
-    !    localCopy: do i=1,internal%ncopy
+        !    localCopy: do i=1,internal%ncopy
 
-    !       ! Store the block and the indices of the donor a bit easier.
+        !       ! Store the block and the indices of the donor a bit easier.
 
-    !       d1 = internal%donorBlock(i)
-    !       i1 = internal%donorIndices(i,1)
-    !       j1 = internal%donorIndices(i,2)
-    !       k1 = internal%donorIndices(i,3)
+        !       d1 = internal%donorBlock(i)
+        !       i1 = internal%donorIndices(i,1)
+        !       j1 = internal%donorIndices(i,2)
+        !       k1 = internal%donorIndices(i,3)
 
-    !       ! Idem for the halo's.
+        !       ! Idem for the halo's.
 
-    !       d2 = internal%haloBlock(i)
-    !       i2 = internal%haloIndices(i,1)
-    !       j2 = internal%haloIndices(i,2)
-    !       k2 = internal%haloIndices(i,3)
+        !       d2 = internal%haloBlock(i)
+        !       i2 = internal%haloIndices(i,1)
+        !       j2 = internal%haloIndices(i,2)
+        !       k2 = internal%haloIndices(i,3)
 
-    !       ! Copy the given range of working variables.
+        !       ! Copy the given range of working variables.
 
-    !       do k=1, nw
-    !          flowDoms(d2,1,mm)%PCVec1(k, i2, j2, k2) = &
-    !               flowDoms(d1,1,mm)%PCVec1(k, i1, j1, k1)
-    !       enddo
+        !       do k=1, nw
+        !          flowDoms(d2,1,mm)%PCVec1(k, i2, j2, k2) = &
+        !               flowDoms(d1,1,mm)%PCVec1(k, i1, j1, k1)
+        !       enddo
 
-    !    enddo localCopy
+        !    enddo localCopy
 
-    !    ! Complete the nonblocking receives in an arbitrary sequence and
-    !    ! copy the variables from the buffer into the halo's.
+        !    ! Complete the nonblocking receives in an arbitrary sequence and
+        !    ! copy the variables from the buffer into the halo's.
 
-    !    size = commPattern%nProcRecv
-    !    completeRecvs: do i=1,commPattern%nProcRecv
+        !    size = commPattern%nProcRecv
+        !    completeRecvs: do i=1,commPattern%nProcRecv
 
-    !       ! Complete any of the requests.
+        !       ! Complete any of the requests.
 
-    !       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+        !       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
 
-    !       ! Copy the data just arrived in the halo's.
+        !       ! Copy the data just arrived in the halo's.
 
-    !       ii = index
-    !       jj = nVar*commPattern%nrecvCum(ii-1)
-    !       do j=1,commPattern%nrecv(ii)
+        !       ii = index
+        !       jj = nVar*commPattern%nrecvCum(ii-1)
+        !       do j=1,commPattern%nrecv(ii)
 
-    !          ! Store the block and the indices of the halo a bit easier.
+        !          ! Store the block and the indices of the halo a bit easier.
 
-    !          d2 = commPattern%recvList(ii)%block(j)
-    !          i2 = commPattern%recvList(ii)%indices(j,1)
-    !          j2 = commPattern%recvList(ii)%indices(j,2)
-    !          k2 = commPattern%recvList(ii)%indices(j,3)
+        !          d2 = commPattern%recvList(ii)%block(j)
+        !          i2 = commPattern%recvList(ii)%indices(j,1)
+        !          j2 = commPattern%recvList(ii)%indices(j,2)
+        !          k2 = commPattern%recvList(ii)%indices(j,3)
 
-    !          do k=1, nw
-    !             jj = jj + 1
-    !             flowDoms(d2,1,mm)%PCVec1(k, i2, j2, k2) = recvBuffer(jj)
-    !          enddo
+        !          do k=1, nw
+        !             jj = jj + 1
+        !             flowDoms(d2,1,mm)%PCVec1(k, i2, j2, k2) = recvBuffer(jj)
+        !          enddo
 
-    !       enddo
+        !       enddo
 
-    !    enddo completeRecvs
+        !    enddo completeRecvs
 
-    !    ! Complete the nonblocking sends.
+        !    ! Complete the nonblocking sends.
 
-    !    size = commPattern%nProcSend
-    !    do i=1,commPattern%nProcSend
-    !       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-    !    enddo
+        !    size = commPattern%nProcSend
+        !    do i=1,commPattern%nProcSend
+        !       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+        !    enddo
 
-    ! enddo spectralModes
+        ! enddo spectralModes
 
-  end subroutine setPCVec
+    end subroutine setPCVec
 
 end module fortranPC
diff --git a/src/adjoint/masterRoutines.F90 b/src/adjoint/masterRoutines.F90
index 6d4781699..d5691f6fa 100644
--- a/src/adjoint/masterRoutines.F90
+++ b/src/adjoint/masterRoutines.F90
@@ -1,1337 +1,1333 @@
 module masterRoutines
 contains
-  subroutine master(useSpatial, famLists, funcValues, forces, &
-       bcDataNames, bcDataValues, bcDataFamLists)
-
-    use constants
-    use communication, only : adflow_comm_world
-    use BCRoutines, only : applyallBC_block
-    use bcdata, only : setBCData, setBCDataFineGrid
-    use turbbcRoutines, only : applyallTurbBCthisblock, bcTurbTreatment
-    use iteration, only : currentLevel
-    use inputAdjoint,  only : viscPC
-    use flowVarRefState, only : nwf, nw
-    use blockPointers, only : nDom, il, jl, kl
-    use flowVarRefState, only : viscous
-    use inputPhysics , only : turbProd, equationMode, equations, turbModel
-    use inputDiscretization, only : lowSpeedPreconditioner, lumpedDiss, spaceDiscr, useAPproxWallDistance
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use initializeFlow, only : referenceState
-    use section, only: sections, nSections
-    use monitor, only : timeUnsteadyRestart
-    use sa, only : saSource, saViscous, saResScale, qq
-    use haloExchange, only : exchangeCoor, whalo2
-    use wallDistance, only : updateWallDistancesQuickly
-    use solverUtils, only : timeStep_block
-    use flowUtils, only : allNodalGradients, computeLamViscosity, computePressureSimple, &
-         computeSpeedOfSoundSquared, adjustInflowAngle
-    use fluxes, only : inviscidDissFluxScalarApprox, inviscidDissFluxMatrixApprox, &
-         inviscidUpwindFlux, inviscidDissFluxScalar, inviscidDissFluxMatrix, &
-         viscousFlux, viscousFluxApprox, inviscidCentralFlux
-    use utils, only : setPointers, EChk
-    use turbUtils, only : turbAdvection, computeEddyViscosity
-    use residuals, only : initRes_block, sourceTerms_block
-    use surfaceIntegrations, only : getSolution
-    use adjointExtra, only : volume_block, metric_block, boundaryNormals,&
-         xhalo_block, sumdwandfw, resScale
-    use oversetData, only : oversetPresent
-    use inputOverset, only : oversetUpdateMode
-    use oversetCommUtilities, only : updateOversetConnectivity
-    use actuatorRegionData, only : nActuatorRegions
-    use wallDistanceData, only : xSurfVec, xSurf
-
-    implicit none
-
-    ! Input Arguments:
-    logical, intent(in) :: useSpatial
-    integer(kind=intType), optional, dimension(:, :), intent(in) :: famLists
-    real(kind=realType), optional, dimension(:, :), intent(out) :: funcValues
-    character, optional, dimension(:, :), intent(in) :: bcDataNames
-    real(kind=realType), optional, dimension(:), intent(in) :: bcDataValues
-    integer(kind=intType), optional, dimension(:, :) :: bcDataFamLists
-
-    ! Output Variables
-    real(kind=realType), intent(out), optional, dimension(:, :, :) :: forces
-
-    ! Working Variables
-    integer(kind=intType) :: ierr, nn, sps, fSize, iRegion
-    real(kind=realType), dimension(nSections) :: time
-    real(kind=realType) :: dummyReal
-
-    if (useSpatial) then
-       call adjustInflowAngle()
-
-       ! Update all the BCData
-       call referenceState
-       if (present(bcDataNames)) then
-          do sps=1,nTimeIntervalsSpectral
-             call setBCData(bcDataNames, bcDataValues, bcDataFamLists, sps, &
-                  size(bcDataValues), size(bcDataFamLIsts, 2))
-          end do
-          call setBCDataFineGrid(.true.)
-       end if
-
-        do sps=1,nTimeIntervalsSpectral
-          do nn=1,nDom
-             call setPointers(nn, 1, sps)
-             call xhalo_block()
-          end do
-       end do
-
-       ! Now exchange the coordinates (fine level only)
-       call exchangecoor(1)
-
-       do sps=1, nTimeIntervalsSpectral
-          ! Update overset connectivity if necessary
-          if (oversetPresent .and. oversetUpdateMode == updateFast) then
-             call updateOversetConnectivity(1_intType, sps)
-          end if
-       end do
-    end if
-
-    do sps=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-          call setPointers(nn, 1, sps)
-
-          if (useSpatial) then
-
-             call VecGetArrayF90(xSurfVec(1, sps), xSurf, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-
-             call volume_block
-             call metric_block
-             call boundaryNormals
-
-             if (equations == RANSEquations .and. useApproxWallDistance) then
-                call updateWallDistancesQuickly(nn, 1, sps)
-             end if
-
-            ! These arrays need to be restored before we can move to the next spectral instance.
-             call VecRestoreArrayF90(xSurfVec(1, sps), xSurf, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
+    subroutine master(useSpatial, famLists, funcValues, forces, &
+                      bcDataNames, bcDataValues, bcDataFamLists)
+
+        use constants
+        use communication, only: adflow_comm_world
+        use BCRoutines, only: applyallBC_block
+        use bcdata, only: setBCData, setBCDataFineGrid
+        use turbbcRoutines, only: applyallTurbBCthisblock, bcTurbTreatment
+        use iteration, only: currentLevel
+        use inputAdjoint, only: viscPC
+        use flowVarRefState, only: nwf, nw
+        use blockPointers, only: nDom, il, jl, kl
+        use flowVarRefState, only: viscous
+        use inputPhysics, only: turbProd, equationMode, equations, turbModel
+        use inputDiscretization, only: lowSpeedPreconditioner, lumpedDiss, spaceDiscr, useAPproxWallDistance
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use initializeFlow, only: referenceState
+        use section, only: sections, nSections
+        use monitor, only: timeUnsteadyRestart
+        use sa, only: saSource, saViscous, saResScale, qq
+        use haloExchange, only: exchangeCoor, whalo2
+        use wallDistance, only: updateWallDistancesQuickly
+        use solverUtils, only: timeStep_block
+        use flowUtils, only: allNodalGradients, computeLamViscosity, computePressureSimple, &
+                             computeSpeedOfSoundSquared, adjustInflowAngle
+        use fluxes, only: inviscidDissFluxScalarApprox, inviscidDissFluxMatrixApprox, &
+                          inviscidUpwindFlux, inviscidDissFluxScalar, inviscidDissFluxMatrix, &
+                          viscousFlux, viscousFluxApprox, inviscidCentralFlux
+        use utils, only: setPointers, EChk
+        use turbUtils, only: turbAdvection, computeEddyViscosity
+        use residuals, only: initRes_block, sourceTerms_block
+        use surfaceIntegrations, only: getSolution
+        use adjointExtra, only: volume_block, metric_block, boundaryNormals, &
+                                xhalo_block, sumdwandfw, resScale
+        use oversetData, only: oversetPresent
+        use inputOverset, only: oversetUpdateMode
+        use oversetCommUtilities, only: updateOversetConnectivity
+        use actuatorRegionData, only: nActuatorRegions
+        use wallDistanceData, only: xSurfVec, xSurf
+
+        implicit none
+
+        ! Input Arguments:
+        logical, intent(in) :: useSpatial
+        integer(kind=intType), optional, dimension(:, :), intent(in) :: famLists
+        real(kind=realType), optional, dimension(:, :), intent(out) :: funcValues
+        character, optional, dimension(:, :), intent(in) :: bcDataNames
+        real(kind=realType), optional, dimension(:), intent(in) :: bcDataValues
+        integer(kind=intType), optional, dimension(:, :) :: bcDataFamLists
+
+        ! Output Variables
+        real(kind=realType), intent(out), optional, dimension(:, :, :) :: forces
+
+        ! Working Variables
+        integer(kind=intType) :: ierr, nn, sps, fSize, iRegion
+        real(kind=realType), dimension(nSections) :: time
+        real(kind=realType) :: dummyReal
+
+        if (useSpatial) then
+            call adjustInflowAngle()
+
+            ! Update all the BCData
+            call referenceState
+            if (present(bcDataNames)) then
+                do sps = 1, nTimeIntervalsSpectral
+                    call setBCData(bcDataNames, bcDataValues, bcDataFamLists, sps, &
+                                   size(bcDataValues), size(bcDataFamLIsts, 2))
+                end do
+                call setBCDataFineGrid(.true.)
+            end if
 
+            do sps = 1, nTimeIntervalsSpectral
+                do nn = 1, nDom
+                    call setPointers(nn, 1, sps)
+                    call xhalo_block()
+                end do
+            end do
+
+            ! Now exchange the coordinates (fine level only)
+            call exchangecoor(1)
+
+            do sps = 1, nTimeIntervalsSpectral
+                ! Update overset connectivity if necessary
+                if (oversetPresent .and. oversetUpdateMode == updateFast) then
+                    call updateOversetConnectivity(1_intType, sps)
+                end if
+            end do
+        end if
+
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, 1, sps)
+
+                if (useSpatial) then
+
+                    call VecGetArrayF90(xSurfVec(1, sps), xSurf, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+
+                    call volume_block
+                    call metric_block
+                    call boundaryNormals
+
+                    if (equations == RANSEquations .and. useApproxWallDistance) then
+                        call updateWallDistancesQuickly(nn, 1, sps)
+                    end if
+
+                    ! These arrays need to be restored before we can move to the next spectral instance.
+                    call VecRestoreArrayF90(xSurfVec(1, sps), xSurf, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+
+                end if
+
+                ! Compute the pressures/viscositites
+                call computePressureSimple(.False.)
+
+                ! Compute Laminar/eddy viscosity if required
+                call computeLamViscosity(.False.)
+                call computeEddyViscosity(.False.)
+
+                ! Make sure to call the turb BC's first incase we need to
+                ! correct for K
+                if (equations == RANSequations) then
+                    call BCTurbTreatment
+                    call applyAllTurbBCthisblock(.True.)
+                end if
+                call applyAllBC_block(.True.)
+            end do
+        end do
+
+        ! Exchange values
+        call whalo2(currentLevel, 1_intType, nw, .True., .True., .True.)
+
+        ! Need to re-apply the BCs. The reason is that BC halos behind
+        ! interpolated cells need to be recomputed with their new
+        ! interpolated values from actual compute cells. Only needed for
+        ! overset.
+        if (oversetPresent) then
+            do sps = 1, nTimeIntervalsSpectral
+                do nn = 1, nDom
+                    call setPointers(nn, 1, sps)
+                    if (equations == RANSequations) then
+                        call BCTurbTreatment
+                        call applyAllTurbBCthisblock(.True.)
+                    end if
+                    call applyAllBC_block(.True.)
+                end do
+            end do
+        end if
+
+        ! Main loop for the residual
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, 1, sps)
+                call initRes_block(1, nw, nn, sps)
+                do iRegion = 1, nActuatorRegions
+                    call sourceTerms_block(nn, .True., iRegion, dummyReal)
+                end do
 
+                call timeStep_block(.false.)
+
+                ! Compute turbulence residual for RANS equations
+                if (equations == RANSEquations) then
+
+                    ! Initialize only the Turblent Variables
+                    !call unsteadyTurbSpectral_block(itu1, itu1, nn, sps)
+
+                    select case (turbModel)
+
+                    case (spalartAllmaras)
+                        allocate (qq(2:il, 2:jl, 2:kl))
+                        call saSource
+                        call turbAdvection(1_intType, 1_intType, itu1 - 1, qq)
+                        !call unsteadyTurbTerm(1_intType, 1_intType, itu1-1, qq)
+                        call saViscous
+                        call saResScale
+                        deallocate (qq)
+                    end select
+                end if
+
+                ! Compute the mean flow residuals
+                call inviscidCentralFlux
+                if (lumpedDiss) then
+                    select case (spaceDiscr)
+                    case (dissScalar)
+                        call inviscidDissFluxScalarApprox
+                    case (dissMatrix)
+                        call inviscidDissFluxMatrixApprox
+                    case (upwind)
+                        call inviscidUpwindFlux(.True.)
+                    end select
+                else
+                    select case (spaceDiscr)
+                    case (dissScalar)
+                        call inviscidDissFluxScalar
+                    case (dissMatrix)
+                        call inviscidDissFluxMatrix
+                    case (upwind)
+                        call inviscidUpwindFlux(.True.)
+                    end select
+                end if
+
+                if (viscous) then
+                    call computeSpeedOfSoundSquared
+                    if (.not. lumpedDiss .or. viscPC) then
+                        call allNodalGradients
+                        call viscousFlux
+                    else
+                        call viscousFluxApprox
+                    end if
+                end if
+                call sumDwAndFw
+                ! if (lowSpeedPreconditioner) then
+                !    call applyLowSpeedPreconditioner
+                ! end if
+                call resScale
+            end do
+        end do
+
+        ! Compute the final solution values
+        if (present(famLists)) then
+            call getSolution(famLists, funcValues)
+        end if
+
+        do sps = 1, nTimeIntervalsSpectral
+            if (present(forces)) then
+                ! Now we can retrieve the forces/tractions for this spectral instance
+                fSize = size(forces, 2)
+                call getForces(forces(:, :, sps), fSize, sps)
             end if
+        end do
 
-          ! Compute the pressures/viscositites
-          call computePressureSimple(.False.)
-
-          ! Compute Laminar/eddy viscosity if required
-          call computeLamViscosity(.False.)
-          call computeEddyViscosity(.False.)
-
-          ! Make sure to call the turb BC's first incase we need to
-          ! correct for K
-          if (equations == RANSequations) then
-             call BCTurbTreatment
-             call applyAllTurbBCthisblock(.True.)
-          end if
-          call applyAllBC_block(.True.)
-       end do
-    end do
-
-    ! Exchange values
-    call whalo2(currentLevel, 1_intType, nw, .True., .True., .True.)
-
-    ! Need to re-apply the BCs. The reason is that BC halos behind
-    ! interpolated cells need to be recomputed with their new
-    ! interpolated values from actual compute cells. Only needed for
-    ! overset.
-    if (oversetPresent) then
-       do sps=1,nTimeIntervalsSpectral
-          do nn=1,nDom
-             call setPointers(nn, 1, sps)
-             if (equations == RANSequations) then
-                call BCTurbTreatment
-                call applyAllTurbBCthisblock(.True.)
-             end if
-             call applyAllBC_block(.True.)
-          end do
-       end do
-    end if
-
-    ! Main loop for the residual
-    do sps=1, nTimeIntervalsSpectral
-       do nn=1, nDom
-          call setPointers(nn, 1, sps)
-          call initRes_block(1, nw, nn, sps)
-          do iRegion=1, nActuatorRegions
-             call sourceTerms_block(nn, .True., iRegion, dummyReal)
-          end do
-
-          call timeStep_block(.false.)
-
-          ! Compute turbulence residual for RANS equations
-          if( equations == RANSEquations) then
-
-             ! Initialize only the Turblent Variables
-             !call unsteadyTurbSpectral_block(itu1, itu1, nn, sps)
-
-             select case (turbModel)
-
-             case (spalartAllmaras)
-                allocate(qq(2:il, 2:jl, 2:kl))
-                call saSource
-                call turbAdvection(1_intType, 1_intType, itu1-1, qq)
-                !call unsteadyTurbTerm(1_intType, 1_intType, itu1-1, qq)
-                call saViscous
-                call saResScale
-                deallocate(qq)
-             end select
-          endif
-
-          ! Compute the mean flow residuals
-          call inviscidCentralFlux
-          if (lumpedDiss) then
-             select case (spaceDiscr)
-             case (dissScalar)
-                call inviscidDissFluxScalarApprox
-             case (dissMatrix)
-                call inviscidDissFluxMatrixApprox
-             case (upwind)
-                call inviscidUpwindFlux(.True.)
-             end select
-          else
-             select case (spaceDiscr)
-             case (dissScalar)
-                call inviscidDissFluxScalar
-             case (dissMatrix)
-                call inviscidDissFluxMatrix
-             case (upwind)
-                call inviscidUpwindFlux(.True.)
-             end select
-          end if
-
-          if (viscous) then
-             call computeSpeedOfSoundSquared
-             if (.not. lumpedDiss .or. viscPC) then
-                call allNodalGradients
-                call viscousFlux
-             else
-                call viscousFluxApprox
-             end if
-          end if
-          call sumDwAndFw
-          ! if (lowSpeedPreconditioner) then
-          !    call applyLowSpeedPreconditioner
-          ! end if
-          call resScale
-       end do
-    end do
-
-    ! Compute the final solution values
-    if (present(famLists)) then
-       call getSolution(famLists, funcValues)
-    end if
-
-    do sps=1, nTimeIntervalsSpectral
-       if (present(forces)) then
-          ! Now we can retrieve the forces/tractions for this spectral instance
-          fSize = size(forces, 2)
-          call getForces(forces(:, :, sps), fSize, sps)
-       end if
-    end do
-
-  end subroutine master
+    end subroutine master
 #ifndef USE_COMPLEX
-  subroutine master_d(wdot, xdot, forcesDot, dwDot, famLists, funcValues, funcValuesd, &
-       bcDataNames, bcDataValues, bcDataValuesd, bcDataFamLists)
-
-    use constants
-    use diffsizes, only :  ISIZE1OFDrfbcdata, ISIZE1OFDrfviscsubface
-    use communication, only : adflow_comm_world, myID
-    use iteration, only : currentLevel
-    use BCExtra_d, only : applyAllBC_Block_d
-    use inputAdjoint,  only : viscPC
-    use flowVarRefState, only : nw, nwf
-    use blockPointers, only : nDom, il, jl, kl, wd, xd, dw, dwd, nBocos, nViscBocos
-    use flowVarRefState, only : viscous, timerefd
-    use inputPhysics , only : turbProd, equationMode, equations, turbModel, wallDistanceNeeded
-    use inputDiscretization, only : lowSpeedPreconditioner, lumpedDiss, spaceDiscr, useAPproxWallDistance
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use section, only: sections, nSections
-    use monitor, only : timeUnsteadyRestart
-    use utils, only : isWallType, setPointers, setPointers_d, EChk
-    use sa_d, only : saSource_d, saViscous_d, saResScale_d, qq
-    use turbutils_d, only : turbAdvection_d, computeEddyViscosity_d
-    use fluxes_d, only :inviscidDissFluxScalarApprox_d, inviscidDissFluxMatrixApprox_d, &
-         inviscidUpwindFlux_d, inviscidDissFluxScalar_d, inviscidDissFluxMatrix_d, &
-         inviscidUpwindFlux_d, viscousFlux_d, viscousFluxApprox_d, inviscidCentralFlux_d
-    use residuals_d, only : sourceTerms_block_d, initres_block_d
-    use adjointPETSc, only : x_like
-    use haloExchange, only : whalo2_d, exchangeCoor_d, exchangeCoor, whalo2
-    use wallDistance_d, only : updateWallDistancesQuickly_d
-    use wallDistanceData, only : xSurfVec, xSurfVecd, xSurf, xSurfd, wallScatter
-    use flowutils_d, only : computePressureSimple_d, computeLamViscosity_d, &
-         computeSpeedOfSoundSquared_d, allNodalGradients_d, adjustInflowAngle_d
-    use solverutils_d, only : timeStep_Block_d, gridvelocitiesfinelevel_block_d, slipvelocitiesfinelevel_block_d, &
-        normalvelocities_block_d
-    use turbbcroutines_d, only : applyAllTurbBCthisblock_d,  bcTurbTreatment_d
-    use initializeflow_d, only : referenceState_d
-    use surfaceIntegrations, only : getSolution_d
-    use adjointExtra_d, only : xhalo_block_d, volume_block_d, metric_BLock_d, boundarynormals_d
-    use adjointextra_d, only : resscale_D, sumdwandfw_d
-    use bcdata, only : setBCData_d, setBCDataFineGrid_d
-    use oversetData, only : oversetPresent
-    use inputOverset, only : oversetUpdateMode
-    use oversetCommUtilities, only : updateOversetConnectivity_d
-    use actuatorRegionData, only : nActuatorRegions
+    subroutine master_d(wdot, xdot, forcesDot, dwDot, famLists, funcValues, funcValuesd, &
+                        bcDataNames, bcDataValues, bcDataValuesd, bcDataFamLists)
+
+        use constants
+        use diffsizes, only: ISIZE1OFDrfbcdata, ISIZE1OFDrfviscsubface
+        use communication, only: adflow_comm_world, myID
+        use iteration, only: currentLevel
+        use BCExtra_d, only: applyAllBC_Block_d
+        use inputAdjoint, only: viscPC
+        use flowVarRefState, only: nw, nwf
+        use blockPointers, only: nDom, il, jl, kl, wd, xd, dw, dwd, nBocos, nViscBocos
+        use flowVarRefState, only: viscous, timerefd
+        use inputPhysics, only: turbProd, equationMode, equations, turbModel, wallDistanceNeeded
+        use inputDiscretization, only: lowSpeedPreconditioner, lumpedDiss, spaceDiscr, useAPproxWallDistance
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use section, only: sections, nSections
+        use monitor, only: timeUnsteadyRestart
+        use utils, only: isWallType, setPointers, setPointers_d, EChk
+        use sa_d, only: saSource_d, saViscous_d, saResScale_d, qq
+        use turbutils_d, only: turbAdvection_d, computeEddyViscosity_d
+        use fluxes_d, only: inviscidDissFluxScalarApprox_d, inviscidDissFluxMatrixApprox_d, &
+                            inviscidUpwindFlux_d, inviscidDissFluxScalar_d, inviscidDissFluxMatrix_d, &
+                            inviscidUpwindFlux_d, viscousFlux_d, viscousFluxApprox_d, inviscidCentralFlux_d
+        use residuals_d, only: sourceTerms_block_d, initres_block_d
+        use adjointPETSc, only: x_like
+        use haloExchange, only: whalo2_d, exchangeCoor_d, exchangeCoor, whalo2
+        use wallDistance_d, only: updateWallDistancesQuickly_d
+        use wallDistanceData, only: xSurfVec, xSurfVecd, xSurf, xSurfd, wallScatter
+        use flowutils_d, only: computePressureSimple_d, computeLamViscosity_d, &
+                               computeSpeedOfSoundSquared_d, allNodalGradients_d, adjustInflowAngle_d
+        use solverutils_d, only: timeStep_Block_d, gridvelocitiesfinelevel_block_d, slipvelocitiesfinelevel_block_d, &
+                                 normalvelocities_block_d
+        use turbbcroutines_d, only: applyAllTurbBCthisblock_d, bcTurbTreatment_d
+        use initializeflow_d, only: referenceState_d
+        use surfaceIntegrations, only: getSolution_d
+        use adjointExtra_d, only: xhalo_block_d, volume_block_d, metric_BLock_d, boundarynormals_d
+        use adjointextra_d, only: resscale_D, sumdwandfw_d
+        use bcdata, only: setBCData_d, setBCDataFineGrid_d
+        use oversetData, only: oversetPresent
+        use inputOverset, only: oversetUpdateMode
+        use oversetCommUtilities, only: updateOversetConnectivity_d
+        use actuatorRegionData, only: nActuatorRegions
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! Input Arguments:
-    real(kind=realType), intent(in), dimension(:) :: wDot, xDot
-    integer(kind=intType), optional, dimension(:, :), intent(in) :: famLists
-    real(kind=realType), optional, dimension(:, :), intent(out) :: funcValues, funcValuesd
-    character, optional, dimension(:, :), intent(in) :: bcDataNames
-    real(kind=realType), optional, dimension(:), intent(in) :: bcDataValues, bcDataValuesd
-    integer(kind=intType), optional, dimension(:, :) :: bcDataFamLists
-
-    ! Output variables:
-    real(kind=realType), intent(out), dimension(:) :: dwDot
-    real(kind=realType), intent(out), dimension(:, :, :) :: forcesDot
-
-    ! Working Variables
-    real(kind=realType), dimension(:, :, :), allocatable :: forces
-    integer(kind=intType) :: ierr, nn, sps, mm,i,j,k, l, fSize, ii, jj, iRegion
-    real(kind=realType), dimension(nSections) :: time
-    real(kind=realType) :: dummyReal, dummyReald
-
-    logical :: useOldCoor ! for adjointextra_d() functions
-    useOldCoor = .FALSE.
-
-    fSize = size(forcesDot, 2)
-    allocate(forces(3, fSize, nTimeIntervalsSpectral))
-
-    call VecPlaceArray(x_like, xdot, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set the provided w and x seeds:
-    ii = 0
-    jj = 0
-    domainLoop1: do nn=1,nDom
-       spectalLoop1: do sps=1,nTimeIntervalsSpectral
-          call setPointers_d(nn, 1, sps)
-          do k=1, kl
-             do j=1,jl
-                do i=1,il
-                   do l=1,3
-                      ii = ii + 1
-                      xd(i, j, k, l) = xdot(ii)
-                   end do
+        use petsc
+        implicit none
+
+        ! Input Arguments:
+        real(kind=realType), intent(in), dimension(:) :: wDot, xDot
+        integer(kind=intType), optional, dimension(:, :), intent(in) :: famLists
+        real(kind=realType), optional, dimension(:, :), intent(out) :: funcValues, funcValuesd
+        character, optional, dimension(:, :), intent(in) :: bcDataNames
+        real(kind=realType), optional, dimension(:), intent(in) :: bcDataValues, bcDataValuesd
+        integer(kind=intType), optional, dimension(:, :) :: bcDataFamLists
+
+        ! Output variables:
+        real(kind=realType), intent(out), dimension(:) :: dwDot
+        real(kind=realType), intent(out), dimension(:, :, :) :: forcesDot
+
+        ! Working Variables
+        real(kind=realType), dimension(:, :, :), allocatable :: forces
+        integer(kind=intType) :: ierr, nn, sps, mm, i, j, k, l, fSize, ii, jj, iRegion
+        real(kind=realType), dimension(nSections) :: time
+        real(kind=realType) :: dummyReal, dummyReald
+
+        logical :: useOldCoor ! for adjointextra_d() functions
+        useOldCoor = .FALSE.
+
+        fSize = size(forcesDot, 2)
+        allocate (forces(3, fSize, nTimeIntervalsSpectral))
+
+        call VecPlaceArray(x_like, xdot, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set the provided w and x seeds:
+        ii = 0
+        jj = 0
+        domainLoop1: do nn = 1, nDom
+            spectalLoop1: do sps = 1, nTimeIntervalsSpectral
+                call setPointers_d(nn, 1, sps)
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            do l = 1, 3
+                                ii = ii + 1
+                                xd(i, j, k, l) = xdot(ii)
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-          do k=2, kl
-             do j=2,jl
-                do i=2,il
-                   do l = 1, nw
-                      jj = jj + 1
-                      wd(i, j, k, l) = wDot(jj)
-                   end do
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nw
+                                jj = jj + 1
+                                wd(i, j, k, l) = wDot(jj)
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do spectalLoop1
-    end do domainLoop1
-
-    do sps=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-          call setPointers_d(nn, 1, sps)
-          call xhalo_block_d()
-       end do
-    end do
-
-    ! Now exchange the coordinates. Note that we *must* exhchange the
-    ! actual coordinates as well becuase xhao_block overwrites all
-    ! halo nodes and exchange coor corrects them.
-    call exchangecoor_d(1)
-    call exchangecoor(1)
-
-    do sps=1, nTimeIntervalsSpectral
-       ! Update overset connectivity if necessary
-       if (oversetPresent) then
-          if (oversetUpdateMode == updateFast) then
-             call updateOversetConnectivity_d(1_intType, sps)
-          else if (oversetUpdateMode == updateFull) then
-             if (myID == 0) then
-                print *,'Full overset update derivatives not implemented'
-             end if
-          end if
-       end if
-    end do
-
-    ! Now set the xsurfd contribution from the full x perturbation.
-    ! scatter from the global seed (in x_like) to xSurfVecd...but only
-    ! if wallDistances were used
-    if (wallDistanceNeeded .and. useApproxWallDistance) then
-       do sps=1, nTimeIntervalsSpectral
-          call VecScatterBegin(wallScatter(1, sps), x_like, xSurfVecd(sps), INSERT_VALUES, SCATTER_FORWARD, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          call VecScatterEnd(wallScatter(1, sps), x_like, xSurfVecd(sps),  INSERT_VALUES, SCATTER_FORWARD, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-       end do
-    end if
-
-    call adjustInflowAngle_d
-    call referenceState_d
-    if (present(bcDataNames)) then
-       do sps=1,nTimeIntervalsSpectral
-          call setBCData_d(bcDataNames, bcDataValues, bcDataValuesd, &
-               bcDataFamLists, sps, size(bcDataValues), size(bcDataFamLists, 2))
-       end do
-       call setBCDataFineGrid_d(.true.)
-    end if
-
-    do sps=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-
-          call setPointers_d(nn, 1, sps)
-          ISIZE1OFDrfbcdata = nBocos
-          ISIZE1OFDrfviscsubface = nViscBocos
-
-          ! Get the pointers from the petsc vector for the surface
-          ! perturbation for wall distance.
-          call VecGetArrayF90(xSurfVec(1, sps), xSurf, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! And it's derivative
-          call VecGetArrayF90(xSurfVecd(sps), xSurfd, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          call volume_block_d()
-          call metric_block_d()
-          call boundaryNormals_d()
-
-          time = timeunsteadyrestart
-          if (equationmode .eq. timespectral) then
-             do mm=1,nsections
-                time(mm) = time(mm) + (sps-1)*sections(mm)%timeperiod/real(&
-                     &         ntimeintervalsspectral, realtype)
-             end do
-          end if
-
-          call gridvelocitiesfinelevel_block_d(useoldcoor, time, sps, nn)
-          ! required for ts
-          call normalvelocities_block_d(sps)
-          ! required for ts
-          call slipvelocitiesfinelevel_block_d(useoldcoor, time, sps, nn)
-
-
-          if (equations == RANSEquations .and. useApproxWallDistance) then
-             call updateWallDistancesQuickly_d(nn, 1, sps)
-          end if
-
-          call computePressureSimple_d(.False.)
-          call computeLamViscosity_d(.False.)
-          call computeEddyViscosity_d(.False.)
-
-          ! Make sure to call the turb BC's first incase we need to
-          ! correct for K
-          if (equations == RANSequations) then
-             call BCTurbTreatment_d
-             call applyAllTurbBCthisblock_d(.True.)
-          end if
-
-          call applyAllBC_block_d(.True.)
-
-          ! These arrays need to be restored before we can move to the next spectral instance.
-          call VecRestoreArrayF90(xSurfVec(1, sps), xSurf, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! And it's derivative
-          call VecRestoreArrayF90(xSurfVecd(sps), xSurfd, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-       end do
-    end do
-
-    ! Just exchange the derivative values.
-    call whalo2_d(1, 1, nw, .True., .True., .True.)
-
-    ! Need to re-apply the BCs. The reason is that BC halos behind
-    ! interpolated cells need to be recomputed with their new
-    ! interpolated values from actual compute cells. Only needed for
-    ! overset.
-    if (oversetPresent) then
-       do sps=1,nTimeIntervalsSpectral
-          do nn=1,nDom
-             call setPointers_d(nn, 1, sps)
-             if (equations == RANSequations) then
-                call BCTurbTreatment_d
-                call applyAllTurbBCthisblock_d(.True.)
-             end if
-             call applyAllBC_block_d(.True.)
-          end do
-       end do
-    end if
-
-    do sps=1, nTimeIntervalsSpectral
-       do nn=1, nDom
-          call setPointers_d(nn, 1, sps)
-          ISIZE1OFDrfbcdata = nBocos
-          ISIZE1OFDrfviscsubface = nViscBocos
-
-          ! initalize the residuals for this block
-          call initRes_block_d(1, nw, nn, sps)
-
-          ! Compute any source terms
-          do iRegion=1, nActuatorRegions
-             call sourceTerms_block_d(nn, .True. , iRegion, dummyReal, dummyReald)
-          end do
-
-          call timeStep_block_d(.false.)
-
-          !Compute turbulence residual for RANS equations
-          if( equations == RANSEquations) then
-             !call unsteadyTurbSpectral_block(itu1, itu1, nn, sps)
-
-             select case (turbModel)
-             case (spalartAllmaras)
-                call saSource_d
-                call turbAdvection_d(1_intType, 1_intType, itu1-1, qq)
+            end do spectalLoop1
+        end do domainLoop1
+
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers_d(nn, 1, sps)
+                call xhalo_block_d()
+            end do
+        end do
+
+        ! Now exchange the coordinates. Note that we *must* exhchange the
+        ! actual coordinates as well becuase xhao_block overwrites all
+        ! halo nodes and exchange coor corrects them.
+        call exchangecoor_d(1)
+        call exchangecoor(1)
+
+        do sps = 1, nTimeIntervalsSpectral
+            ! Update overset connectivity if necessary
+            if (oversetPresent) then
+                if (oversetUpdateMode == updateFast) then
+                    call updateOversetConnectivity_d(1_intType, sps)
+                else if (oversetUpdateMode == updateFull) then
+                    if (myID == 0) then
+                        print *, 'Full overset update derivatives not implemented'
+                    end if
+                end if
+            end if
+        end do
+
+        ! Now set the xsurfd contribution from the full x perturbation.
+        ! scatter from the global seed (in x_like) to xSurfVecd...but only
+        ! if wallDistances were used
+        if (wallDistanceNeeded .and. useApproxWallDistance) then
+            do sps = 1, nTimeIntervalsSpectral
+                call VecScatterBegin(wallScatter(1, sps), x_like, xSurfVecd(sps), INSERT_VALUES, SCATTER_FORWARD, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call VecScatterEnd(wallScatter(1, sps), x_like, xSurfVecd(sps), INSERT_VALUES, SCATTER_FORWARD, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+        end if
+
+        call adjustInflowAngle_d
+        call referenceState_d
+        if (present(bcDataNames)) then
+            do sps = 1, nTimeIntervalsSpectral
+                call setBCData_d(bcDataNames, bcDataValues, bcDataValuesd, &
+                                 bcDataFamLists, sps, size(bcDataValues), size(bcDataFamLists, 2))
+            end do
+            call setBCDataFineGrid_d(.true.)
+        end if
+
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+
+                call setPointers_d(nn, 1, sps)
+                ISIZE1OFDrfbcdata = nBocos
+                ISIZE1OFDrfviscsubface = nViscBocos
+
+                ! Get the pointers from the petsc vector for the surface
+                ! perturbation for wall distance.
+                call VecGetArrayF90(xSurfVec(1, sps), xSurf, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! And it's derivative
+                call VecGetArrayF90(xSurfVecd(sps), xSurfd, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call volume_block_d()
+                call metric_block_d()
+                call boundaryNormals_d()
+
+                time = timeunsteadyrestart
+                if (equationmode .eq. timespectral) then
+                    do mm = 1, nsections
+                        time(mm) = time(mm) + (sps - 1) * sections(mm)%timeperiod / real(&
+                             &         ntimeintervalsspectral, realtype)
+                    end do
+                end if
+
+                call gridvelocitiesfinelevel_block_d(useoldcoor, time, sps, nn)
+                ! required for ts
+                call normalvelocities_block_d(sps)
+                ! required for ts
+                call slipvelocitiesfinelevel_block_d(useoldcoor, time, sps, nn)
+
+                if (equations == RANSEquations .and. useApproxWallDistance) then
+                    call updateWallDistancesQuickly_d(nn, 1, sps)
+                end if
+
+                call computePressureSimple_d(.False.)
+                call computeLamViscosity_d(.False.)
+                call computeEddyViscosity_d(.False.)
+
+                ! Make sure to call the turb BC's first incase we need to
+                ! correct for K
+                if (equations == RANSequations) then
+                    call BCTurbTreatment_d
+                    call applyAllTurbBCthisblock_d(.True.)
+                end if
+
+                call applyAllBC_block_d(.True.)
+
+                ! These arrays need to be restored before we can move to the next spectral instance.
+                call VecRestoreArrayF90(xSurfVec(1, sps), xSurf, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! And it's derivative
+                call VecRestoreArrayF90(xSurfVecd(sps), xSurfd, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+        end do
+
+        ! Just exchange the derivative values.
+        call whalo2_d(1, 1, nw, .True., .True., .True.)
+
+        ! Need to re-apply the BCs. The reason is that BC halos behind
+        ! interpolated cells need to be recomputed with their new
+        ! interpolated values from actual compute cells. Only needed for
+        ! overset.
+        if (oversetPresent) then
+            do sps = 1, nTimeIntervalsSpectral
+                do nn = 1, nDom
+                    call setPointers_d(nn, 1, sps)
+                    if (equations == RANSequations) then
+                        call BCTurbTreatment_d
+                        call applyAllTurbBCthisblock_d(.True.)
+                    end if
+                    call applyAllBC_block_d(.True.)
+                end do
+            end do
+        end if
+
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers_d(nn, 1, sps)
+                ISIZE1OFDrfbcdata = nBocos
+                ISIZE1OFDrfviscsubface = nViscBocos
+
+                ! initalize the residuals for this block
+                call initRes_block_d(1, nw, nn, sps)
+
+                ! Compute any source terms
+                do iRegion = 1, nActuatorRegions
+                    call sourceTerms_block_d(nn, .True., iRegion, dummyReal, dummyReald)
+                end do
+
+                call timeStep_block_d(.false.)
+
+                !Compute turbulence residual for RANS equations
+                if (equations == RANSEquations) then
+                    !call unsteadyTurbSpectral_block(itu1, itu1, nn, sps)
+
+                    select case (turbModel)
+                    case (spalartAllmaras)
+                        call saSource_d
+                        call turbAdvection_d(1_intType, 1_intType, itu1 - 1, qq)
                 !!call unsteadyTurbTerm_d(1_intType, 1_intType, itu1-1, qq)
+                        call saViscous_d
+                        call saResScale_d
+                    end select
+                end if
+
+                ! compute the mean flow residual
+                call inviscidCentralFlux_d
+
+                if (lumpedDiss) then
+                    select case (spaceDiscr)
+                    case (dissScalar)
+                        call inviscidDissFluxScalarApprox_d
+                    case (dissMatrix)
+                        call inviscidDissFluxMatrixApprox_d
+                    case (upwind)
+                        call inviscidUpwindFlux_d(.True.)
+                    end select
+                else
+                    select case (spaceDiscr)
+                    case (dissScalar)
+                        call inviscidDissFluxScalar_d
+                    case (dissMatrix)
+                        call inviscidDissFluxMatrix_d
+                    case (upwind)
+                        call inviscidUpwindFlux_d(.True.)
+                    end select
+                end if
+
+                if (viscous) then
+                    call computeSpeedOfSoundSquared_d
+                    if (.not. lumpedDiss .or. viscPC) then
+                        call allNodalGradients_d
+                        call viscousFlux_d
+                    else
+                        call viscousFluxApprox_d
+                    end if
+                end if
+
+                ! if (lowSpeedPreconditioner) then
+                !    call applyLowSpeedPreconditioner_d
+                ! end if
+                call sumDwAndFw_d
+                call resscale_d
+            end do
+        end do
+
+        ! Compute final solution values
+        if (present(famLists)) then
+            call getSolution_d(famLists, funcValues, funcValuesd)
+        end if
+
+        do sps = 1, nTimeIntervalsSpectral
+            call getForces_d(forces(:, :, sps), forcesDot(:, :, sps), fSize, sps)
+        end do
+
+        ! Copy out the residual derivative into the provided dwDot
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers_d(nn, 1, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nw
+                                ii = ii + 1
+                                dwDot(ii) = dwd(i, j, k, l)
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
+        call VecResetArray(x_like, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        deallocate (forces)
+    end subroutine master_d
+
+    subroutine master_b(wbar, xbar, extraBar, forcesBar, dwBar, nState, famLists, &
+                        funcValues, funcValuesd, bcDataNames, bcDataValues, bcDataValuesd, bcDataFamLists)
+
+        ! This is the main reverse mode differentiaion of master. It
+        ! compute the reverse mode sensitivity of *all* outputs with
+        ! respect to *all* inputs. Anything that needs to be
+        ! differentiated for the adjoint method should be included in this
+        ! function. This routine is written by hand, assembling the various
+        ! individually differentiated tapenade routines.
+
+        use constants
+        use adjointVars, only: iAlpha, iBeta, iMach, iMachGrid, iTemperature, iDensity, &
+                               iPointrefX, iPointRefY, iPointRefZ, iPressure
+        use communication, only: adflow_comm_world, myid
+        use iteration, only: currentLevel
+        use inputAdjoint, only: viscPC
+        use fluxes, only: viscousFlux
+        use flowVarRefState, only: nw, nwf, viscous, pInfDimd, rhoInfDimd, TinfDimd
+        use blockPointers, only: nDom, il, jl, kl, wd, xd, dw, dwd
+        use inputPhysics, only: pointRefd, alphad, betad, equations, machCoefd, &
+                                machd, machGridd, rgasdimd, equationMode, turbModel, wallDistanceNeeded
+        use inputDiscretization, only: lowSpeedPreconditioner, lumpedDiss, spaceDiscr, useAPproxWallDistance
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputAdjoint, only: frozenTurbulence
+        use utils, only: isWallType, setPointers_b, EChk
+        use adjointPETSc, only: x_like
+        use haloExchange, only: whalo2_b, exchangeCoor_b, exchangeCoor, whalo2
+        use wallDistanceData, only: xSurfVec, xSurfVecd, xSurf, xSurfd, wallScatter
+        use surfaceIntegrations, only: getSolution_b
+        use flowUtils, only: fixAllNodalGradientsFromAD
+        use adjointextra_b, only: resscale_B, sumdwandfw_b
+        use adjointExtra_b, only: xhalo_block_b, volume_block_b, metric_block_b, boundarynormals_b
+        use flowutils_b, only: computePressureSimple_b, computeLamViscosity_b, &
+                               computeSpeedOfSoundSquared_b, allNodalGradients_b, adjustInflowAngle_b
+        use solverutils_b, only: timeStep_Block_b, gridvelocitiesfinelevel_block_b, slipvelocitiesfinelevel_block_b, &
+                                 normalvelocities_block_b
+        use turbbcroutines_b, only: applyAllTurbBCthisblock_b, bcTurbTreatment_b
+        use initializeflow_b, only: referenceState_b
+        use wallDistance_b, only: updateWallDistancesQuickly_b
+        use sa_b, only: saSource_b, saViscous_b, saResScale_b, qq
+        use turbutils_b, only: turbAdvection_b, computeEddyViscosity_b
+        use residuals_b, only: sourceTerms_block_b, initRes_block_b
+        use fluxes_b, only: inviscidUpwindFlux_b, inviscidDissFluxScalar_b, &
+                            inviscidDissFluxMatrix_b, viscousFlux_b, inviscidCentralFlux_b
+        use BCExtra_b, only: applyAllBC_Block_b
+        use bcdata, only: setBCData_b, setBCDataFineGrid_b
+        use oversetData, only: oversetPresent
+        use inputOverset, only: oversetUpdateMode
+        use oversetCommUtilities, only: updateOversetConnectivity_b
+        use BCRoutines, only: applyAllBC_block
+        use actuatorRegionData, only: nActuatorRegions
+        use monitor, only: timeUnsteadyRestart
+        use section, only: sections, nSections ! used in time-declaration
+
+#include <petsc/finclude/petsc.h>
+        use petsc
+        implicit none
+
+        ! Input variables:
+        real(kind=realType), intent(in), dimension(:) :: dwBar
+        real(kind=realType), intent(in), dimension(:, :, :) :: forcesBar
+        integer(kind=intType), intent(in) :: nState
+        integer(kind=intType), optional, dimension(:, :), intent(in) :: famLists
+        real(kind=realType), optional, dimension(:, :) :: funcValues, funcValuesd
+        character, optional, dimension(:, :), intent(in) :: bcDataNames
+        real(kind=realType), optional, dimension(:), intent(in) :: bcDataValues
+        integer(kind=intType), optional, dimension(:, :) :: bcDataFamLists
+
+        ! Output Arguments:
+        real(kind=realType), optional, intent(out), dimension(:) :: wBar, xBar, extraBar
+        real(kind=realType), optional, dimension(:), intent(out) :: bcDataValuesd
+
+        ! Working Variables
+        integer(kind=intType) :: ierr, nn, sps, mm, i, j, k, l, fSize, ii, jj, level, iRegion
+        real(kind=realType), dimension(:), allocatable :: extraLocalBar, bcDataValuesdLocal
+        real(kind=realType) :: dummyReal, dummyReald
+        logical :: resetToRans
+        real(kind=realType), dimension(nSections) :: time
+        logical :: useOldCoor ! for solverutils_b() functions
+        useOldCoor = .FALSE.
+
+        ! extraLocalBar accumulates the seeds onto the extra variables
+        allocate (extraLocalBar(size(extrabar)))
+        extraLocalBar = zero
+
+        ! Place the output spatial seed into the temporary petsc x-like
+        ! vector.
+        xBar = zero
+        call VecPlaceArray(x_like, xBar, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Set the residual seeds.
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+
+                ! Set pointers and derivative pointers
+                call setPointers_b(nn, 1, sps)
+
+                ! Set the dw seeds
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nState
+                                ii = ii + 1
+                                dwd(i, j, k, l) = dwbar(ii)
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
+
+        forceSpsLoop: do sps = 1, nTimeIntervalsSpectral
+            fSize = size(forcesBar, 2)
+            call getForces_b(forcesBar(:, :, sps), fSize, sps)
+        end do forceSpsLoop
+
+        ! Call the final getSolution_b routine
+        if (present(famLists)) then
+            call getSolution_b(famLists, funcValues, funcValuesd)
+        end if
+
+        spsLoop1: do sps = 1, nTimeIntervalsSpectral
+
+            domainLoop1: do nn = 1, nDom
+                call setPointers_b(nn, 1, sps)
+
+                ! Now we start running back through the main residual code:
+                call resScale_b
+                call sumDwAndFw_b
+
+                ! if (lowSpeedPreconditioner) then
+                !    call applyLowSpeedPreconditioner_b
+                ! end if
+
+                ! Note that master_b does not include the first order flux
+                ! approxation codes as those are never needed in reverse.
+                if (viscous) then
+                    call viscousFlux_b
+                    call allNodalGradients_b
+                    call computeSpeedOfSoundSquared_b
+                end if
+
+                ! So the all nodal gradients doesnt' perform the final
+                ! scaling by the volume since it isn't necessary for the
+                ! derivative. We have a special routine to fix that.
+                if (viscous) then
+                    call fixAllNodalGradientsFromAD()
+                    call viscousFlux
+                end if
+
+                select case (spaceDiscr)
+                case (dissScalar)
+                    call inviscidDissFluxScalar_b
+                case (dissMatrix)
+                    call inviscidDissFluxMatrix_b
+                case (upwind)
+                    call inviscidUpwindFlux_b(.True.)
+                end select
+
+                call inviscidCentralFlux_b
+                ! Compute turbulence residual for RANS equations
+                if (equations == RANSEquations) then
+                    select case (turbModel)
+                    case (spalartAllmaras)
+                        call saResScale_b
+                        call saViscous_b
+                        !call unsteadyTurbTerm_b(1_intType, 1_intType, itu1-1, qq)
+                        call turbAdvection_b(1_intType, 1_intType, itu1 - 1, qq)
+                        ! turbAdvection_b zeros the faceid. This should be ok since
+                        ! it presumably is the last call in master using faceid and
+                        ! therefore should be the first call in master_b to use faceid
+                        call saSource_b
+                    end select
+
+                    !call unsteadyTurbSpectral_block_b(itu1, itu1, nn, sps)
+                end if
+
+                call timeStep_block_b(.false.)
+
+                ! Just to be safe, zero the pLocald value...should not matter
+                dummyReald = zero
+                do iRegion = 1, nActuatorRegions
+                    call sourceTerms_block_b(nn, .True., iRegion, dummyReal, dummyReald)
+                end do
+
+                call initRes_block_b(1, nw, nn, sps)
+            end do domainLoop1
+        end do spsLoop1
+
+        ! Need to re-apply the BCs. The reason is that BC halos behind
+        ! interpolated cells need to be recomputed with their new
+        ! interpolated values from actual compute cells. Only needed for
+        ! overset.
+        if (oversetPresent) then
+            do sps = 1, nTimeIntervalsSpectral
+                do nn = 1, nDom
+                    call setPointers_b(nn, 1, sps)
+                    call applyAllBC_block_b(.True.)
+                    call applyAllBC_block(.true.)
+
+                    if (equations == RANSequations) then
+                        call applyAllTurbBCThisBlock_b(.True.)
+                        call bcTurbTreatment_b
+                    end if
+                end do
+            end do
+        end if
+
+        ! Exchange the adjoint values.
+        call whalo2_b(currentLevel, 1_intType, nw, .True., .True., .True.)
+
+        spsLoop2: do sps = 1, nTimeIntervalsSpectral
+
+            ! Get the pointers from the petsc vector for the wall
+            ! surface and it's accumulation. Only necessary for wall
+            ! distance.
+            call VecGetArrayF90(xSurfVec(1, sps), xSurf, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! And it's derivative
+            call VecGetArrayF90(xSurfVecd(sps), xSurfd, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            !Zero the accumulation vector on a per-time-spectral instance basis
+            xSurfd = zero
+
+            domainLoop2: do nn = 1, nDom
+                call setPointers_b(nn, 1, sps)
+                call applyAllBC_block_b(.True.)
+
+                ! Run the forward application of the BCs. The reason is that
+                ! the reverse application of the BCs can result in
+                ! inconsisent values in the halos. This has only been
+                ! observed to caluse problems with hot subsonic flow in an
+                ! engine core. This is the same reason we need the applyBCs
+                ! after the _b version above.
+                call applyAllBC_block(.true.)
+
+                if (equations == RANSequations) then
+                    call applyAllTurbBCThisBlock_b(.True.)
+                    call bcTurbTreatment_b
+                end if
+                call computeEddyViscosity_b(.false.)
+                call computeLamViscosity_b(.false.)
+                call computePressureSimple_b(.false.)
+
+                if (equations == RANSEquations .and. useApproxWallDistance) then
+                    call updateWallDistancesQuickly_b(nn, 1, sps)
+                end if
+
+                ! Here we insert the functions related to
+                ! rotational (mesh movement) setup
+                time = timeunsteadyrestart
+                if (equationmode .eq. timespectral) then
+                    do mm = 1, nsections
+                        time(mm) = time(mm) + (sps - 1) * sections(mm)%timeperiod / real(&
+                             &         ntimeintervalsspectral, realtype)
+                    end do
+                end if
+
+                call slipvelocitiesfinelevel_block_b(useoldcoor, time, sps, nn)
+                call normalvelocities_block_b(sps)
+                call gridvelocitiesfinelevel_block_b(useoldcoor, time, sps, nn)
+
+                call boundaryNormals_b
+                call metric_block_b
+                call volume_block_b
+
+            end do domainLoop2
+
+            ! Restore the petsc pointers.
+            call VecGetArrayF90(xSurfVec(1, sps), xSurf, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! And it's derivative
+            call VecGetArrayF90(xSurfVecd(sps), xSurfd, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Now accumulate the xsurfd accumulation by using the wall scatter
+            ! in reverse.
+            if (wallDistanceNeeded .and. useApproxWallDistance) then
+
+                call VecScatterBegin(wallScatter(1, sps), xSurfVecd(sps), x_like, ADD_VALUES, SCATTER_REVERSE, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call VecScatterEnd(wallScatter(1, sps), xSurfVecd(sps), x_like, ADD_VALUES, SCATTER_REVERSE, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end if
+        end do spsLoop2
+
+        if (present(bcDataNames)) then
+            allocate (bcDataValuesdLocal(size(bcDataValuesd)))
+            bcDataValuesdLocal = zero
+            call setBCDataFineGrid_b(.true.)
+            do sps = 1, nTimeIntervalsSpectral
+                call setBCData_b(bcDataNames, bcDataValues, bcDataValuesdLocal, bcDataFamLists, &
+                                 sps, size(bcDataValues), size(bcDataFamLIsts, 2))
+            end do
+            ! Reverse seeds need to accumulated across all processors:
+            call mpi_allreduce(bcDataValuesdLocal, bcDataValuesd, size(bcDataValuesd), adflow_real, &
+                               mpi_sum, ADflow_comm_world, ierr)
+            deallocate (bcDataValuesdLocal)
+        end if
+        call referenceState_b
+        call adjustInflowAngle_b
+
+        do sps = 1, nTimeIntervalsSpectral
+            ! Update overset connectivity if necessary
+            if (oversetPresent) then
+                if (oversetUpdateMode == updateFast) then
+                    call updateOversetConnectivity_b(1_intType, sps)
+                else if (oversetUpdateMode == updateFull) then
+                    if (myID == 0) then
+                        print *, 'Full overset update derivatives not implemented'
+                    end if
+                end if
+            end if
+        end do
+        ! Now the adjoint of the coordinate exhcange
+        call exchangecoor_b(1)
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers_b(nn, 1, sps)
+                call xhalo_block_b()
+            end do
+        end do
+
+        call VecResetArray(x_like, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! =========================================
+        !            End of reverse pass
+        ! =========================================
+
+        ! Store the extra derivatives
+        extraLocalBar(iAlpha) = alphad
+        extraLocalBar(iBeta) = betad
+        extraLocalBar(iMach) = machd + machcoefd
+        extraLocalBar(iMachGrid) = machgridd
+        extraLocalBar(iPressure) = pinfdimd
+        extraLocalBar(iTemperature) = tinfdimd
+        extraLocalBar(iDensity) = rhoinfdimd
+        extraLocalBar(iPointRefX) = pointrefd(1)
+        extraLocalBar(iPointRefY) = pointrefd(2)
+        extraLocalBar(iPointRefZ) = pointrefd(3)
+
+        ! Finally put the output seeds into the provided vectors.
+        ii = 0
+        jj = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+
+                ! Set pointers and derivative pointers
+                call setPointers_b(nn, 1, sps)
+                ! Set the wbar accumulation
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nState
+                                ii = ii + 1
+                                wbar(ii) = wd(i, j, k, l)
+                            end do
+                        end do
+                    end do
+                end do
+
+                ! Set the xvbar accumulation. Note that this must be a sum,
+                ! becuase we may already have wall distance accumulation
+                ! from the wallScatter directly into xbar (through x_like).
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            do l = 1, 3
+                                jj = jj + 1
+                                xbar(jj) = xbar(jj) + xd(i, j, k, l)
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
+
+        ! Finally get the full contribution of the extra variables by
+        ! summing all local contributions.
+        extraBar = zero
+        call mpi_allreduce(extraLocalBar, extraBar, size(extraBar), adflow_real, &
+                           mpi_sum, ADflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+    end subroutine master_b
+
+    subroutine master_state_b(wbar, dwBar, nState)
+
+        ! This is specialized form of master that *ONLY* computes drdw
+        ! products. It uses a few specialzed routines that are
+        ! differentiated without including spatial dependencies. This
+        ! results in slightly faster code. This specialization is
+        ! justififed since this routine is needed for the transpose
+        ! matrix-vector products in solving the adjoint system and thus
+        ! this routine is called several orders of magniutde more than
+        ! master_b. This routine has to be fast!
+
+        use constants
+        use iteration, only: currentLevel
+        use flowVarRefState, only: nw, viscous
+        use blockPointers, only: nDom, il, jl, kl, wd, dwd, iblank
+        use inputPhysics, only: equationMode, turbModel, equations
+        use inputDiscretization, only: lowSpeedPreconditioner, spaceDiscr
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers_d
+        use haloExchange, only: whalo2_b
+        use flowUtils, only: fixAllNodalGradientsFromAD
+        use adjointextra_b, only: resscale_B, sumdwandfw_b
+        use flowutils_b, only: computePressureSimple_b, computeLamViscosity_b, &
+                               computeSpeedOfSoundSquared_b
+        use turbbcroutines_b, only: applyAllTurbBCthisblock_b, bcTurbTreatment_b
+        use turbUtils_b, only: computeEddyViscosity_b
+        use BCExtra_b, only: applyAllBC_Block_b
+
+        use sa_fast_b, only: saresscale_fast_b, saviscous_fast_b, &
+                             sasource_fast_b, qq
+        use turbutils_fast_b, only: turbAdvection_fast_b
+        use fluxes_fast_b, only: inviscidUpwindFlux_fast_b, inviscidDissFluxScalar_fast_b, &
+                                 inviscidDissFluxMatrix_fast_b, viscousFlux_fast_b, inviscidCentralFlux_fast_b
+        use solverutils_fast_b, only: timeStep_block_fast_b
+        use flowutils_fast_b, only: allnodalgradients_fast_b
+        use residuals_fast_b, only: sourceTerms_block_fast_b, initRes_block_fast_b
+        use oversetData, only: oversetPresent
+        use bcroutines, only: applyallbc_block
+        use actuatorRegionData, only: nActuatorRegions
+        implicit none
+
+        ! Input variables:
+        real(kind=realType), intent(in), dimension(:) :: dwBar
+        integer(kind=intType), intent(in) :: nState
+
+        ! Input Arguments:
+        real(kind=realType), intent(out), dimension(:) :: wBar
+
+        ! Working Variables
+        integer(kind=intType) :: ierr, nn, sps, mm, i, j, k, l, fSize, ii, jj, level, iRegion
+        real(kind=realType) :: dummyReal
+
+        ! Set the residual seeds.
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers_d(nn, 1, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nState
+                                ii = ii + 1
+                                dwd(i, j, k, l) = dwbar(ii)
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
+
+        ! ============================================
+        !  reverse the order of calls from master
+        ! ============================================
+
+        spsLoop1: do sps = 1, nTimeIntervalsSpectral
+            domainLoop1: do nn = 1, nDom
+                call setPointers_d(nn, 1, sps)
+
+                ! Now we start running back through the main residual code:
+                call resScale_b
+                call sumDwAndFw_b
+
+                ! if (lowSpeedPreconditioner) then
+                !    call applyLowSpeedPreconditioner_b
+                ! end if
+
+                ! Note that master_b does not include the approximation codes
+                ! as those are never needed in reverse.
+                if (viscous) then
+                    call viscousFlux_fast_b
+                    call allNodalGradients_fast_b
+                    call computeSpeedOfSoundSquared_b
+                end if
+
+                select case (spaceDiscr)
+                case (dissScalar)
+                    call inviscidDissFluxScalar_fast_b
+                case (dissMatrix)
+                    call inviscidDissFluxMatrix_fast_b
+                case (upwind)
+                    call inviscidUpwindFlux_fast_b(.True.)
+                end select
+
+                call inviscidCentralFlux_fast_b
+
+                ! Compute turbulence residual for RANS equations
+                if (equations == RANSEquations) then
+                    select case (turbModel)
+                    case (spalartAllmaras)
+                        call saResScale_fast_b
+                        call saViscous_fast_b
+                        !call unsteadyTurbTerm_b(1_intType, 1_intType, itu1-1, qq)
+                        call turbAdvection_fast_b(1_intType, 1_intType, itu1 - 1, qq)
+                        call saSource_fast_b
+                    end select
+
+                    !call unsteadyTurbSpectral_block_b(itu1, itu1, nn, sps)
+                end if
+
+                call timeStep_block_fast_b(.false.)
+                do iRegion = 1, nActuatorRegions
+                    call sourceTerms_block_fast_b(nn, .True., iRegion, dummyReal)
+                end do
+
+                call initRes_block_fast_b(1, nw, nn, sps)
+            end do domainLoop1
+        end do spsLoop1
+
+        ! Need to re-apply the BCs. The reason is that BC halos behind
+        ! interpolated cells need to be recomputed with their new
+        ! interpolated values from actual compute cells. Only needed for
+        ! overset.
+        if (oversetPresent) then
+            do sps = 1, nTimeIntervalsSpectral
+                do nn = 1, nDom
+                    call setPointers_d(nn, 1, sps)
+                    call applyAllBC_block_b(.True.)
+                    call applyAllBC_block(.true.)
+
+                    if (equations == RANSequations) then
+                        call applyAllTurbBCThisBlock_b(.True.)
+                        call bcTurbTreatment_b
+                    end if
+                end do
+            end do
+        end if
+
+        ! Exchange the adjoint values.
+        call whalo2_b(currentLevel, 1_intType, nw, .True., .True., .True.)
+
+        spsLoop2: do sps = 1, nTimeIntervalsSpectral
+            domainLoop2: do nn = 1, nDom
+                call setPointers_d(nn, 1, sps)
+
+                call applyAllBC_block_b(.True.)
+                call applyAllBC_block(.true.)
+
+                if (equations == RANSequations) then
+                    call applyAllTurbBCThisBlock_b(.True.)
+                    call bcTurbTreatment_b
+                end if
+
+                call computeEddyViscosity_b(.false.)
+                call computeLamViscosity_b(.false.)
+                call computePressureSimple_b(.false.)
+
+            end do domainLoop2
+        end do spsLoop2
+
+        ! Finally put the output seeds into wbar
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers_d(nn, 1, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nState
+                                ii = ii + 1
+                                wbar(ii) = wd(i, j, k, l)!*max(real(iblank(i,j,k)), zero)
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
+    end subroutine master_state_b
+#endif
+    subroutine block_res_state(nn, sps, useFlowRes, useTurbRes)
+
+        ! This is a special state-only routine used only for finite
+        ! differce computations of the jacobian
+        use constants
+        use BCRoutines, only: applyAllBC_Block
+        use inputAdjoint, only: viscPC
+        use blockPointers, only: nDom, wd, xd, dw, il, jl, kl
+        use flowVarRefState, only: viscous
+        use inputPhysics, only: equations, turbModel
+        use inputDiscretization, only: lowSpeedPreconditioner, lumpedDiss, spaceDiscr
+        use utils, only: setPointers, EChk
+        use residuals, only: sourceTerms_block
+        use turbutils, only: computeEddyViscosity
+        use turbbcroutines, only: applyAllTurbBCthisblock, bcTurbTreatment
+        use adjointExtra, only: sumdwandfw, resScale
+        use inputDiscretization, only: useBlockettes
+        use blockette, only: blocketteResCore, blockResCore
+        use flowUtils, only: computeLamViscosity, computePressureSimple
+        use actuatorRegionData, only: nActuatorRegions
+        implicit none
+
+        ! Input Arguments:
+        integer(kind=intType), intent(in) :: nn, sps
+        logical, optional, intent(in) :: useFlowRes, useTurbRes
+
+        ! Working Variables
+        integer(kind=intType) :: ierr, mm, i, j, k, l, fSize, ii, jj, iRegion
+        real(kind=realType) :: pLocal
+        logical :: dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall
+
+        flowRes = .True.
+        if (present(useFlowRes)) then
+            flowRes = useFlowRes
+        end if
+
+        turbRes = .True.
+        if (present(useTurbRes)) then
+            turbRes = useTurbRes
+        end if
+
+        call computePressureSimple(.True.)
+        call computeLamViscosity(.True.)
+        call computeEddyViscosity(.True.)
+
+        ! Make sure to call the turb BC's first incase we need to
+        ! correct for K
+        if (equations == RANSequations) then
+            call BCTurbTreatment
+            call applyAllTurbBCthisblock(.True.)
+        end if
+
+        call applyAllBC_block(.True.)
+
+        ! Set the flags:
+        dissApprox = lumpedDiss
+        viscApprox = lumpedDiss
+        updateIntermed = .False.
+        storeWall = .True.
+        blockettes: if (useBlockettes) then
+            call blocketteResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall)
+        else
+            call blockResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall, nn, sps)
+        end if blockettes
+        do iRegion = 1, nActuatorRegions
+            call sourceTerms_block(nn, .True., iRegion, pLocal)
+        end do
+        call resscale
+
+    end subroutine block_res_state
+#ifndef USE_COMPLEX
+    subroutine block_res_state_d(nn, sps)
+
+        ! This is a special state-only forward mode linearization
+        ! computation used to assemble the jacobian.
+        use constants
+        use BCExtra_d, only: applyAllBC_Block_d
+        use inputAdjoint, only: viscPC
+        use blockPointers, only: nDom, wd, xd, dw, dwd
+        use flowVarRefState, only: viscous
+        use inputPhysics, only: equations, turbModel
+        use inputDiscretization, only: lowSpeedPreconditioner, lumpedDiss, spaceDiscr
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers_d, EChk
+        use sa_d, only: saSource_d, saViscous_d, saResScale_d, qq
+        use turbutils_d, only: turbAdvection_d, computeEddyViscosity_d
+        use fluxes_d, only: inviscidDissFluxScalarApprox_d, inviscidDissFluxMatrixApprox_d, &
+                            inviscidUpwindFlux_d, inviscidDissFluxScalar_d, inviscidDissFluxMatrix_d, &
+                            inviscidUpwindFlux_d, viscousFlux_d, viscousFluxApprox_d, inviscidCentralFlux_d
+        use flowutils_d, only: computePressureSimple_d, computeLamViscosity_d, &
+                               computeSpeedOfSoundSquared_d, allNodalGradients_d
+        use solverutils_d, only: timeStep_Block_d
+        use turbbcroutines_d, only: applyAllTurbBCthisblock_d, bcTurbTreatment_d
+        use adjointextra_d, only: resscale_D, sumdwandfw_d
+        use residuals_d, only: sourceterms_block_d
+        use actuatorRegionData, only: nActuatorRegions
+        implicit none
+
+        ! Input Arguments:
+        integer(kind=intType), intent(in) :: nn, sps
+
+        ! Working Variables
+        integer(kind=intType) :: ierr, mm, i, j, k, l, fSize, ii, jj, iRegion
+        real(kind=realType) :: dummyReal, dummyReald
+
+        call computePressureSimple_d(.True.)
+        call computeLamViscosity_d(.True.)
+        call computeEddyViscosity_d(.True.)
+
+        ! Make sure to call the turb BC's first incase we need to
+        ! correct for K
+        if (equations == RANSequations) then
+            call BCTurbTreatment_d
+            call applyAllTurbBCthisblock_d(.True.)
+        end if
+
+        call applyAllBC_block_d(.True.)
+        call timeStep_block_d(.false.)
+
+        dw = zero ! These two lines are init_res
+        dwd = zero
+
+        ! Compute any source terms
+        do iRegion = 1, nActuatorRegions
+            call sourceTerms_block_d(nn, .True., iRegion, dummyReal, dummyReald)
+        end do
+
+        !Compute turbulence residual for RANS equations
+        if (equations == RANSEquations) then
+            !call unsteadyTurbSpectral_block(itu1, itu1, nn, sps)
+
+            select case (turbModel)
+            case (spalartAllmaras)
+                call saSource_d
+                call turbAdvection_d(1_intType, 1_intType, itu1 - 1, qq)
+          !!call unsteadyTurbTerm_d(1_intType, 1_intType, itu1-1, qq)
                 call saViscous_d
                 call saResScale_d
-             end select
-          end if
+            end select
+        end if
 
-          ! compute the mean flow residual
-          call inviscidCentralFlux_d
+        ! compute the mean flow residual
+        call inviscidCentralFlux_d
 
-          if (lumpedDiss) then
-             select case (spaceDiscr)
-             case (dissScalar)
+        if (lumpedDiss) then
+            select case (spaceDiscr)
+            case (dissScalar)
                 call inviscidDissFluxScalarApprox_d
-             case (dissMatrix)
+            case (dissMatrix)
                 call inviscidDissFluxMatrixApprox_d
-             case (upwind)
+            case (upwind)
                 call inviscidUpwindFlux_d(.True.)
-             end select
-          else
-             select case (spaceDiscr)
-             case (dissScalar)
+            end select
+        else
+            select case (spaceDiscr)
+            case (dissScalar)
                 call inviscidDissFluxScalar_d
-             case (dissMatrix)
+            case (dissMatrix)
                 call inviscidDissFluxMatrix_d
-             case (upwind)
+            case (upwind)
                 call inviscidUpwindFlux_d(.True.)
-             end select
-          end if
+            end select
+        end if
 
-          if (viscous) then
-             call computeSpeedOfSoundSquared_d
-             if (.not. lumpedDiss .or. viscPC) then
+        if (viscous) then
+            call computeSpeedOfSoundSquared_d
+            if (.not. lumpedDiss .or. viscPC) then
                 call allNodalGradients_d
                 call viscousFlux_d
-             else
+            else
                 call viscousFluxApprox_d
-             end if
-          end if
-
-          ! if (lowSpeedPreconditioner) then
-          !    call applyLowSpeedPreconditioner_d
-          ! end if
-          call sumDwAndFw_d
-          call resscale_d
-       end do
-    end do
-
-    ! Compute final solution values
-    if (present(famLists)) then
-       call getSolution_d(famLists, funcValues, funcValuesd)
-    end if
-
-    do sps=1, nTimeIntervalsSpectral
-       call getForces_d(forces(:, :, sps), forcesDot(:, :, sps), fSize, sps)
-    end do
-
-    ! Copy out the residual derivative into the provided dwDot
-    ii =0
-    do nn=1, nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers_d(nn, 1, sps)
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   do l=1, nw
-                      ii = ii + 1
-                      dwDot(ii) = dwd(i,j,k,l)
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
-    call VecResetArray(x_like, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    deallocate(forces)
-  end subroutine master_d
-
-  subroutine master_b(wbar, xbar, extraBar, forcesBar, dwBar, nState, famLists, &
-       funcValues, funcValuesd, bcDataNames, bcDataValues, bcDataValuesd, bcDataFamLists)
-
-    ! This is the main reverse mode differentiaion of master. It
-    ! compute the reverse mode sensitivity of *all* outputs with
-    ! respect to *all* inputs. Anything that needs to be
-    ! differentiated for the adjoint method should be included in this
-    ! function. This routine is written by hand, assembling the various
-    ! individually differentiated tapenade routines.
-
-    use constants
-    use adjointVars, only : iAlpha, iBeta, iMach, iMachGrid, iTemperature, iDensity, &
-         iPointrefX, iPointRefY, iPointRefZ, iPressure
-    use communication, only : adflow_comm_world, myid
-    use iteration, only : currentLevel
-    use inputAdjoint,  only : viscPC
-    use fluxes, only : viscousFlux
-    use flowVarRefState, only : nw, nwf, viscous,pInfDimd, rhoInfDimd, TinfDimd
-    use blockPointers, only : nDom, il, jl, kl, wd, xd, dw, dwd
-    use inputPhysics, only :pointRefd, alphad, betad, equations, machCoefd, &
-         machd, machGridd, rgasdimd, equationMode, turbModel, wallDistanceNeeded
-    use inputDiscretization, only : lowSpeedPreconditioner, lumpedDiss, spaceDiscr, useAPproxWallDistance
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputAdjoint, only : frozenTurbulence
-    use utils, only : isWallType, setPointers_b, EChk
-    use adjointPETSc, only : x_like
-    use haloExchange, only : whalo2_b, exchangeCoor_b, exchangeCoor, whalo2
-    use wallDistanceData, only : xSurfVec, xSurfVecd, xSurf, xSurfd, wallScatter
-    use surfaceIntegrations, only : getSolution_b
-    use flowUtils, only : fixAllNodalGradientsFromAD
-    use adjointextra_b, only : resscale_B, sumdwandfw_b
-    use adjointExtra_b, only : xhalo_block_b, volume_block_b, metric_block_b, boundarynormals_b
-    use flowutils_b, only : computePressureSimple_b, computeLamViscosity_b, &
-         computeSpeedOfSoundSquared_b, allNodalGradients_b, adjustInflowAngle_b
-    use solverutils_b, only : timeStep_Block_b, gridvelocitiesfinelevel_block_b, slipvelocitiesfinelevel_block_b, &
-         normalvelocities_block_b
-    use turbbcroutines_b, only : applyAllTurbBCthisblock_b,  bcTurbTreatment_b
-    use initializeflow_b, only : referenceState_b
-    use wallDistance_b, only : updateWallDistancesQuickly_b
-    use sa_b, only : saSource_b, saViscous_b, saResScale_b, qq
-    use turbutils_b, only : turbAdvection_b, computeEddyViscosity_b
-    use residuals_b, only : sourceTerms_block_b, initRes_block_b
-    use fluxes_b, only :inviscidUpwindFlux_b, inviscidDissFluxScalar_b, &
-         inviscidDissFluxMatrix_b, viscousFlux_b, inviscidCentralFlux_b
-    use BCExtra_b, only : applyAllBC_Block_b
-    use bcdata, only : setBCData_b, setBCDataFineGrid_b
-    use oversetData, only : oversetPresent
-    use inputOverset, only : oversetUpdateMode
-    use oversetCommUtilities, only : updateOversetConnectivity_b
-    use BCRoutines, only : applyAllBC_block
-    use actuatorRegionData, only : nActuatorRegions
-    use monitor, only : timeUnsteadyRestart
-    use section, only: sections,nSections ! used in time-declaration
-
-#include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! Input variables:
-    real(kind=realType), intent(in), dimension(:) :: dwBar
-    real(kind=realType), intent(in), dimension(:, :, :) :: forcesBar
-    integer(kind=intType), intent(in) :: nState
-    integer(kind=intType), optional, dimension(:, :), intent(in) :: famLists
-    real(kind=realType), optional, dimension(:, :) :: funcValues, funcValuesd
-    character, optional, dimension(:, :), intent(in) :: bcDataNames
-    real(kind=realType), optional, dimension(:), intent(in) :: bcDataValues
-    integer(kind=intType), optional, dimension(:, :) :: bcDataFamLists
-
-    ! Output Arguments:
-    real(kind=realType), optional, intent(out), dimension(:) :: wBar, xBar, extraBar
-    real(kind=realType), optional, dimension(:), intent(out) :: bcDataValuesd
-
-    ! Working Variables
-    integer(kind=intType) :: ierr, nn, sps, mm,i,j,k, l, fSize, ii, jj,  level, iRegion
-    real(kind=realType), dimension(:), allocatable :: extraLocalBar, bcDataValuesdLocal
-    real(kind=realType) :: dummyReal, dummyReald
-    logical ::resetToRans
-    real(kind=realType), dimension(nSections) :: time
-    logical :: useOldCoor ! for solverutils_b() functions
-    useOldCoor = .FALSE.
-
-    ! extraLocalBar accumulates the seeds onto the extra variables
-    allocate(extraLocalBar(size(extrabar)))
-    extraLocalBar = zero
-
-    ! Place the output spatial seed into the temporary petsc x-like
-    ! vector.
-    xBar = zero
-    call VecPlaceArray(x_like, xBar, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Set the residual seeds.
-    ii = 0
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-
-          ! Set pointers and derivative pointers
-          call setPointers_b(nn, 1, sps)
-
-          ! Set the dw seeds
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   do l=1, nState
-                      ii = ii + 1
-                      dwd(i, j, k, l) = dwbar(ii)
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
-
-    forceSpsLoop: do sps=1, nTimeIntervalsSpectral
-       fSize = size(forcesBar, 2)
-       call getForces_b(forcesBar(:, :, sps), fSize, sps)
-    end do forceSpsLoop
-
-    ! Call the final getSolution_b routine
-    if (present(famLists)) then
-       call getSolution_b(famLists, funcValues, funcValuesd)
-    end if
-
-    spsLoop1: do sps=1, nTimeIntervalsSpectral
-
-       domainLoop1: do nn=1, nDom
-          call setPointers_b(nn, 1, sps)
-
-          ! Now we start running back through the main residual code:
-          call resScale_b
-          call sumDwAndFw_b
-
-          ! if (lowSpeedPreconditioner) then
-          !    call applyLowSpeedPreconditioner_b
-          ! end if
-
-          ! Note that master_b does not include the first order flux
-          ! approxation codes as those are never needed in reverse.
-          if (viscous) then
-             call viscousFlux_b
-             call allNodalGradients_b
-             call computeSpeedOfSoundSquared_b
-          end if
-
-          ! So the all nodal gradients doesnt' perform the final
-          ! scaling by the volume since it isn't necessary for the
-          ! derivative. We have a special routine to fix that.
-          if (viscous) then
-             call fixAllNodalGradientsFromAD()
-             call viscousFlux
-          end if
-
-          select case (spaceDiscr)
-          case (dissScalar)
-             call inviscidDissFluxScalar_b
-          case (dissMatrix)
-             call inviscidDissFluxMatrix_b
-          case (upwind)
-             call inviscidUpwindFlux_b(.True.)
-          end select
-
-          call inviscidCentralFlux_b
-          ! Compute turbulence residual for RANS equations
-          if( equations == RANSEquations) then
-             select case (turbModel)
-             case (spalartAllmaras)
-                call saResScale_b
-                call saViscous_b
-                !call unsteadyTurbTerm_b(1_intType, 1_intType, itu1-1, qq)
-                call turbAdvection_b(1_intType, 1_intType, itu1-1, qq)
-                ! turbAdvection_b zeros the faceid. This should be ok since
-                ! it presumably is the last call in master using faceid and
-                ! therefore should be the first call in master_b to use faceid
-                call saSource_b
-             end select
-
-             !call unsteadyTurbSpectral_block_b(itu1, itu1, nn, sps)
-          end if
-
-          call timeStep_block_b(.false.)
-
-          ! Just to be safe, zero the pLocald value...should not matter
-          dummyReald = zero
-          do iRegion=1, nActuatorRegions
-             call sourceTerms_block_b(nn, .True. , iRegion, dummyReal, dummyReald)
-          end do
-
-          call initRes_block_b(1, nw, nn, sps)
-       end do domainLoop1
-    end do spsLoop1
-
-    ! Need to re-apply the BCs. The reason is that BC halos behind
-    ! interpolated cells need to be recomputed with their new
-    ! interpolated values from actual compute cells. Only needed for
-    ! overset.
-    if (oversetPresent) then
-       do sps=1, nTimeIntervalsSpectral
-          do nn=1,nDom
-             call setPointers_b(nn, 1, sps)
-             call applyAllBC_block_b(.True.)
-             call applyAllBC_block(.true.)
-
-             if (equations == RANSequations) then
-                call applyAllTurbBCThisBlock_b(.True.)
-                call bcTurbTreatment_b
-             end if
-          end do
-       end do
-    end if
-
-    ! Exchange the adjoint values.
-    call whalo2_b(currentLevel, 1_intType, nw, .True., .True., .True.)
-
-    spsLoop2: do sps=1,nTimeIntervalsSpectral
-
-       ! Get the pointers from the petsc vector for the wall
-       ! surface and it's accumulation. Only necessary for wall
-       ! distance.
-       call VecGetArrayF90(xSurfVec(1, sps), xSurf, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! And it's derivative
-       call VecGetArrayF90(xSurfVecd(sps), xSurfd, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       !Zero the accumulation vector on a per-time-spectral instance basis
-       xSurfd = zero
-
-       domainLoop2: do nn=1,nDom
-          call setPointers_b(nn, 1, sps)
-          call applyAllBC_block_b(.True.)
-
-          ! Run the forward application of the BCs. The reason is that
-          ! the reverse application of the BCs can result in
-          ! inconsisent values in the halos. This has only been
-          ! observed to caluse problems with hot subsonic flow in an
-          ! engine core. This is the same reason we need the applyBCs
-          ! after the _b version above.
-          call applyAllBC_block(.true.)
-
-          if (equations == RANSequations) then
-             call applyAllTurbBCThisBlock_b(.True.)
-             call bcTurbTreatment_b
-          end if
-          call computeEddyViscosity_b(.false.)
-          call computeLamViscosity_b(.false.)
-          call computePressureSimple_b(.false.)
-
-          if (equations == RANSEquations .and. useApproxWallDistance) then
-             call updateWallDistancesQuickly_b(nn, 1, sps)
-          end if
-
-          ! Here we insert the functions related to
-          ! rotational (mesh movement) setup
-          time = timeunsteadyrestart
-          if (equationmode .eq. timespectral) then
-             do mm=1,nsections
-                time(mm) = time(mm) + (sps-1)*sections(mm)%timeperiod/real(&
-                     &         ntimeintervalsspectral, realtype)
-             end do
-          end if
-
-          call slipvelocitiesfinelevel_block_b(useoldcoor, time, sps, nn)
-          call normalvelocities_block_b(sps)
-          call gridvelocitiesfinelevel_block_b(useoldcoor, time, sps, nn)
-
-          call boundaryNormals_b
-          call metric_block_b
-          call volume_block_b
-
-       end do domainLoop2
-
-       ! Restore the petsc pointers.
-       call VecGetArrayF90(xSurfVec(1, sps), xSurf, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! And it's derivative
-       call VecGetArrayF90(xSurfVecd(sps), xSurfd, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Now accumulate the xsurfd accumulation by using the wall scatter
-       ! in reverse.
-       if (wallDistanceNeeded .and. useApproxWallDistance) then
-
-          call VecScatterBegin(wallScatter(1, sps), xSurfVecd(sps), x_like, ADD_VALUES, SCATTER_REVERSE, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          call VecScatterEnd(wallScatter(1, sps), xSurfVecd(sps), x_like, ADD_VALUES, SCATTER_REVERSE, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-       end if
-    end do spsLoop2
-
-
-    if (present(bcDataNames)) then
-       allocate(bcDataValuesdLocal(size(bcDataValuesd)))
-       bcDataValuesdLocal = zero
-       call setBCDataFineGrid_b(.true.)
-       do sps=1, nTimeIntervalsSpectral
-          call setBCData_b(bcDataNames, bcDataValues, bcDataValuesdLocal, bcDataFamLists, &
-               sps, size(bcDataValues), size(bcDataFamLIsts, 2))
-       end do
-       ! Reverse seeds need to accumulated across all processors:
-       call mpi_allreduce(bcDataValuesdLocal, bcDataValuesd, size(bcDataValuesd), adflow_real, &
-            mpi_sum, ADflow_comm_world, ierr)
-       deallocate(bcDataValuesdLocal)
-    end if
-    call referenceState_b
-    call adjustInflowAngle_b
-
-    do sps=1, nTimeIntervalsSpectral
-       ! Update overset connectivity if necessary
-       if (oversetPresent) then
-          if (oversetUpdateMode == updateFast) then
-             call updateOversetConnectivity_b(1_intType, sps)
-          else if (oversetUpdateMode == updateFull) then
-             if (myID == 0) then
-                print *,'Full overset update derivatives not implemented'
-             end if
-          end if
-       end if
-    end do
-    ! Now the adjoint of the coordinate exhcange
-    call exchangecoor_b(1)
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers_b(nn, 1, sps)
-          call xhalo_block_b()
-       end do
-    end do
-
-    call VecResetArray(x_like, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! =========================================
-    !            End of reverse pass
-    ! =========================================
-
-    ! Store the extra derivatives
-    extraLocalBar(iAlpha) = alphad
-    extraLocalBar(iBeta) =  betad
-    extraLocalBar(iMach) =  machd + machcoefd
-    extraLocalBar(iMachGrid) = machgridd
-    extraLocalBar(iPressure) =  pinfdimd
-    extraLocalBar(iTemperature) = tinfdimd
-    extraLocalBar(iDensity) =  rhoinfdimd
-    extraLocalBar(iPointRefX) = pointrefd(1)
-    extraLocalBar(iPointRefY) = pointrefd(2)
-    extraLocalBar(iPointRefZ) = pointrefd(3)
-
-    ! Finally put the output seeds into the provided vectors.
-    ii = 0
-    jj = 0
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-
-          ! Set pointers and derivative pointers
-          call setPointers_b(nn, 1, sps)
-          ! Set the wbar accumulation
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   do l=1, nState
-                      ii = ii + 1
-                      wbar(ii) = wd(i, j, k, l)
-                   end do
-                end do
-             end do
-          end do
-
-          ! Set the xvbar accumulation. Note that this must be a sum,
-          ! becuase we may already have wall distance accumulation
-          ! from the wallScatter directly into xbar (through x_like).
-          do k=1, kl
-             do j=1, jl
-                do i=1, il
-                   do l=1, 3
-                      jj = jj + 1
-                      xbar(jj) = xbar(jj) + xd(i, j, k, l)
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
-
-    ! Finally get the full contribution of the extra variables by
-    ! summing all local contributions.
-    extraBar = zero
-    call mpi_allreduce(extraLocalBar, extraBar, size(extraBar), adflow_real, &
-         mpi_sum, ADflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-  end subroutine master_b
-
-  subroutine master_state_b(wbar, dwBar, nState)
-
-    ! This is specialized form of master that *ONLY* computes drdw
-    ! products. It uses a few specialzed routines that are
-    ! differentiated without including spatial dependencies. This
-    ! results in slightly faster code. This specialization is
-    ! justififed since this routine is needed for the transpose
-    ! matrix-vector products in solving the adjoint system and thus
-    ! this routine is called several orders of magniutde more than
-    ! master_b. This routine has to be fast!
-
-    use constants
-    use iteration, only : currentLevel
-    use flowVarRefState, only : nw, viscous
-    use blockPointers, only : nDom, il, jl, kl, wd, dwd, iblank
-    use inputPhysics, only : equationMode, turbModel, equations
-    use inputDiscretization, only : lowSpeedPreconditioner, spaceDiscr
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers_d
-    use haloExchange, only : whalo2_b
-    use flowUtils, only : fixAllNodalGradientsFromAD
-    use adjointextra_b, only : resscale_B, sumdwandfw_b
-    use flowutils_b, only : computePressureSimple_b, computeLamViscosity_b, &
-         computeSpeedOfSoundSquared_b
-    use turbbcroutines_b, only : applyAllTurbBCthisblock_b,  bcTurbTreatment_b
-    use turbUtils_b, only : computeEddyViscosity_b
-    use BCExtra_b, only : applyAllBC_Block_b
-
-    use sa_fast_b, only : saresscale_fast_b, saviscous_fast_b, &
-         sasource_fast_b, qq
-    use turbutils_fast_b, only : turbAdvection_fast_b
-    use fluxes_fast_b, only :inviscidUpwindFlux_fast_b, inviscidDissFluxScalar_fast_b, &
-         inviscidDissFluxMatrix_fast_b, viscousFlux_fast_b, inviscidCentralFlux_fast_b
-    use solverutils_fast_b, only : timeStep_block_fast_b
-    use flowutils_fast_b, only : allnodalgradients_fast_b
-    use residuals_fast_b, only : sourceTerms_block_fast_b, initRes_block_fast_b
-    use oversetData, only : oversetPresent
-    use bcroutines, only : applyallbc_block
-    use actuatorRegionData, only : nActuatorRegions
-    implicit none
-
-    ! Input variables:
-    real(kind=realType), intent(in), dimension(:) :: dwBar
-    integer(kind=intType), intent(in) :: nState
-
-    ! Input Arguments:
-    real(kind=realType), intent(out), dimension(:) :: wBar
-
-    ! Working Variables
-    integer(kind=intType) :: ierr, nn, sps, mm,i,j,k, l, fSize, ii, jj,  level, iRegion
-    real(kind=realType) :: dummyReal
-
-    ! Set the residual seeds.
-    ii = 0
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers_d(nn, 1, sps)
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   do l=1, nState
-                      ii = ii + 1
-                      dwd(i, j, k, l) = dwbar(ii)
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
-
-    ! ============================================
-    !  reverse the order of calls from master
-    ! ============================================
-
-    spsLoop1: do sps=1, nTimeIntervalsSpectral
-       domainLoop1: do nn=1, nDom
-          call setPointers_d(nn, 1, sps)
-
-          ! Now we start running back through the main residual code:
-          call resScale_b
-          call sumDwAndFw_b
-
-          ! if (lowSpeedPreconditioner) then
-          !    call applyLowSpeedPreconditioner_b
-          ! end if
-
-          ! Note that master_b does not include the approximation codes
-          ! as those are never needed in reverse.
-          if (viscous) then
-             call viscousFlux_fast_b
-             call allNodalGradients_fast_b
-             call computeSpeedOfSoundSquared_b
-          end if
-
-          select case (spaceDiscr)
-          case (dissScalar)
-             call inviscidDissFluxScalar_fast_b
-          case (dissMatrix)
-             call inviscidDissFluxMatrix_fast_b
-          case (upwind)
-             call inviscidUpwindFlux_fast_b(.True.)
-          end select
-
-          call inviscidCentralFlux_fast_b
-
-          ! Compute turbulence residual for RANS equations
-          if( equations == RANSEquations) then
-             select case (turbModel)
-             case (spalartAllmaras)
-                call saResScale_fast_b
-                call saViscous_fast_b
-                !call unsteadyTurbTerm_b(1_intType, 1_intType, itu1-1, qq)
-                call turbAdvection_fast_b(1_intType, 1_intType, itu1-1, qq)
-                call saSource_fast_b
-             end select
-
-             !call unsteadyTurbSpectral_block_b(itu1, itu1, nn, sps)
-          end if
-
-          call timeStep_block_fast_b(.false.)
-          do iRegion=1, nActuatorRegions
-             call sourceTerms_block_fast_b(nn, .True. , iRegion, dummyReal)
-          end do
-
-          call initRes_block_fast_b(1, nw, nn, sps)
-       end do domainLoop1
-    end do spsLoop1
-
-    ! Need to re-apply the BCs. The reason is that BC halos behind
-    ! interpolated cells need to be recomputed with their new
-    ! interpolated values from actual compute cells. Only needed for
-    ! overset.
-    if (oversetPresent) then
-       do sps=1, nTimeIntervalsSpectral
-          do nn=1,nDom
-             call setPointers_d(nn, 1, sps)
-             call applyAllBC_block_b(.True.)
-             call applyAllBC_block(.true.)
-
-             if (equations == RANSequations) then
-                call applyAllTurbBCThisBlock_b(.True.)
-                call bcTurbTreatment_b
-             end if
-          end do
-       end do
-    end if
-
-    ! Exchange the adjoint values.
-    call whalo2_b(currentLevel, 1_intType, nw, .True., .True., .True.)
-
-    spsLoop2: do sps=1,nTimeIntervalsSpectral
-       domainLoop2: do nn=1,nDom
-          call setPointers_d(nn, 1, sps)
-
-          call applyAllBC_block_b(.True.)
-          call applyAllBC_block(.true.)
-
-          if (equations == RANSequations) then
-             call applyAllTurbBCThisBlock_b(.True.)
-             call bcTurbTreatment_b
-          end if
-
-          call computeEddyViscosity_b(.false.)
-          call computeLamViscosity_b(.false.)
-          call computePressureSimple_b(.false.)
-
-       end do domainLoop2
-    end do spsLoop2
-
-    ! Finally put the output seeds into wbar
-    ii = 0
-    do nn=1,nDom
-       do sps=1,nTimeIntervalsSpectral
-          call setPointers_d(nn, 1, sps)
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   do l=1, nState
-                      ii = ii + 1
-                      wbar(ii) = wd(i, j, k, l)!*max(real(iblank(i,j,k)), zero)
-                   end do
-                end do
-             end do
-          end do
-       end do
-    end do
-  end subroutine master_state_b
-#endif
-  subroutine block_res_state(nn, sps, useFlowRes, useTurbRes)
-
-    ! This is a special state-only routine used only for finite
-    ! differce computations of the jacobian
-    use constants
-    use BCRoutines, only : applyAllBC_Block
-    use inputAdjoint,  only : viscPC
-    use blockPointers, only : nDom, wd, xd, dw, il, jl, kl
-    use flowVarRefState, only : viscous
-    use inputPhysics, only : equations, turbModel
-    use inputDiscretization, only : lowSpeedPreconditioner, lumpedDiss, spaceDiscr
-    use utils, only :  setPointers, EChk
-    use residuals, only : sourceTerms_block
-    use turbutils, only : computeEddyViscosity
-    use turbbcroutines, only : applyAllTurbBCthisblock,  bcTurbTreatment
-    use adjointExtra, only :  sumdwandfw, resScale
-    use inputDiscretization, only : useBlockettes
-    use blockette, only : blocketteResCore, blockResCore
-    use flowUtils, only : computeLamViscosity, computePressureSimple
-    use actuatorRegionData, only : nActuatorRegions
-    implicit none
-
-    ! Input Arguments:
-    integer(kind=intType), intent(in) :: nn, sps
-    logical, optional ,intent(in) :: useFlowRes, useTurbRes
-
-    ! Working Variables
-    integer(kind=intType) :: ierr, mm,i,j,k, l, fSize, ii, jj, iRegion
-    real(kind=realType) ::  pLocal
-    logical :: dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall
-
-    flowRes = .True.
-    if (present(useFlowRes)) then
-       flowRes = useFlowRes
-    end if
-
-    turbRes = .True.
-    if (present(useTurbRes)) then
-       turbRes = useTurbRes
-    end if
-
-    call computePressureSimple(.True.)
-    call computeLamViscosity(.True.)
-    call computeEddyViscosity(.True.)
-
-
-    ! Make sure to call the turb BC's first incase we need to
-    ! correct for K
-    if (equations == RANSequations) then
-       call BCTurbTreatment
-       call applyAllTurbBCthisblock(.True.)
-    end if
-
-    call applyAllBC_block(.True.)
-
-    ! Set the flags:
-    dissApprox = lumpedDiss
-    viscApprox = lumpedDiss
-    updateIntermed = .False.
-    storeWall = .True.
-    blockettes: if (useBlockettes) then
-       call blocketteResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall)
-    else
-       call blockResCore(dissApprox, viscApprox, updateIntermed, flowRes, turbRes, storeWall, nn, sps)
-    end if blockettes
-    do iRegion=1, nActuatorRegions
-       call sourceTerms_block(nn, .True., iRegion, pLocal)
-    end do
-    call resscale
-
-  end subroutine block_res_state
-#ifndef USE_COMPLEX
-  subroutine block_res_state_d(nn, sps)
-
-    ! This is a special state-only forward mode linearization
-    ! computation used to assemble the jacobian.
-    use constants
-    use BCExtra_d, only : applyAllBC_Block_d
-    use inputAdjoint,  only : viscPC
-    use blockPointers, only : nDom, wd, xd, dw, dwd
-    use flowVarRefState, only : viscous
-    use inputPhysics, only : equations, turbModel
-    use inputDiscretization, only : lowSpeedPreconditioner, lumpedDiss, spaceDiscr
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers_d, EChk
-    use sa_d, only : saSource_d, saViscous_d, saResScale_d, qq
-    use turbutils_d, only : turbAdvection_d, computeEddyViscosity_d
-    use fluxes_d, only :inviscidDissFluxScalarApprox_d, inviscidDissFluxMatrixApprox_d, &
-         inviscidUpwindFlux_d, inviscidDissFluxScalar_d, inviscidDissFluxMatrix_d, &
-         inviscidUpwindFlux_d, viscousFlux_d, viscousFluxApprox_d, inviscidCentralFlux_d
-    use flowutils_d, only : computePressureSimple_d, computeLamViscosity_d, &
-         computeSpeedOfSoundSquared_d, allNodalGradients_d
-    use solverutils_d, only : timeStep_Block_d
-    use turbbcroutines_d, only : applyAllTurbBCthisblock_d,  bcTurbTreatment_d
-    use adjointextra_d, only : resscale_D, sumdwandfw_d
-    use residuals_d, only: sourceterms_block_d
-    use actuatorRegionData, only : nActuatorRegions
-    implicit none
-
-    ! Input Arguments:
-    integer(kind=intType), intent(in) :: nn, sps
-
-    ! Working Variables
-    integer(kind=intType) :: ierr, mm,i,j,k, l, fSize, ii, jj, iRegion
-    real(kind=realType) :: dummyReal, dummyReald
-
-    call computePressureSimple_d(.True.)
-    call computeLamViscosity_d(.True.)
-    call computeEddyViscosity_d(.True.)
-
-    ! Make sure to call the turb BC's first incase we need to
-    ! correct for K
-    if (equations == RANSequations) then
-       call BCTurbTreatment_d
-       call applyAllTurbBCthisblock_d(.True.)
-    end if
-
-    call applyAllBC_block_d(.True.)
-    call timeStep_block_d(.false.)
-
-    dw = zero ! These two lines are init_res
-    dwd = zero
-
-    ! Compute any source terms
-    do iRegion=1, nActuatorRegions
-       call sourceTerms_block_d(nn, .True. , iRegion, dummyReal, dummyReald)
-    end do
-
-    !Compute turbulence residual for RANS equations
-    if( equations == RANSEquations) then
-       !call unsteadyTurbSpectral_block(itu1, itu1, nn, sps)
-
-       select case (turbModel)
-       case (spalartAllmaras)
-          call saSource_d
-          call turbAdvection_d(1_intType, 1_intType, itu1-1, qq)
-          !!call unsteadyTurbTerm_d(1_intType, 1_intType, itu1-1, qq)
-          call saViscous_d
-          call saResScale_d
-       end select
-    end if
-
-    ! compute the mean flow residual
-    call inviscidCentralFlux_d
-
-    if (lumpedDiss) then
-       select case (spaceDiscr)
-       case (dissScalar)
-          call inviscidDissFluxScalarApprox_d
-       case (dissMatrix)
-          call inviscidDissFluxMatrixApprox_d
-       case (upwind)
-          call inviscidUpwindFlux_d(.True.)
-       end select
-    else
-       select case (spaceDiscr)
-       case (dissScalar)
-          call inviscidDissFluxScalar_d
-       case (dissMatrix)
-          call inviscidDissFluxMatrix_d
-       case (upwind)
-          call inviscidUpwindFlux_d(.True.)
-       end select
-    end if
-
-    if (viscous) then
-       call computeSpeedOfSoundSquared_d
-       if (.not. lumpedDiss .or. viscPC) then
-          call allNodalGradients_d
-          call viscousFlux_d
-       else
-          call viscousFluxApprox_d
-       end if
-    end if
-
-    ! if (lowSpeedPreconditioner) then
-    !    call applyLowSpeedPreconditioner_d
-    ! end if
-    call sumDwAndFw_d
-    call resscale_d
-  end subroutine block_res_state_d
+            end if
+        end if
+
+        ! if (lowSpeedPreconditioner) then
+        !    call applyLowSpeedPreconditioner_d
+        ! end if
+        call sumDwAndFw_d
+        call resscale_d
+    end subroutine block_res_state_d
 #endif
 
 end module masterRoutines
diff --git a/src/adjoint/outputForward/residuals_d.f90 b/src/adjoint/outputForward/residuals_d.f90
index 160617e7c..27d307da4 100644
--- a/src/adjoint/outputForward/residuals_d.f90
+++ b/src/adjoint/outputForward/residuals_d.f90
@@ -176,13 +176,13 @@ subroutine residual_block()
 !    compute speed of sound
             arg1 = gamma(i, j, k)*p(i, j, k)/w(i, j, k, irho)
             sos = sqrt(arg1)
-! compute velocities without rho from state vector 
+! compute velocities without rho from state vector
 !      (w is pointer.. see type blocktype setup in block.f90)
-!      w(0:ib,0:jb,0:kb,1:nw) is allocated in block.f90 
-!      these are per definition nw=[rho,u,v,w,rhoee] 
-!      so the velocity is simply just taken out below... 
-!      we do not have to divide with rho since it is already 
-!      without rho... 
+!      w(0:ib,0:jb,0:kb,1:nw) is allocated in block.f90
+!      these are per definition nw=[rho,u,v,w,rhoee]
+!      so the velocity is simply just taken out below...
+!      we do not have to divide with rho since it is already
+!      without rho...
 ! ivx: l. 60 in constants.f90
             velxrho = w(i, j, k, ivx)
             velyrho = w(i, j, k, ivy)
@@ -190,21 +190,21 @@ subroutine residual_block()
             q = velxrho**2 + velyrho**2 + velzrho**2
             result1 = sqrt(q)
             resm = result1/sos
-! resm above is used as m_a (thesis) and m (paper 2015) 
-! and is the free stream mach number 
-! see routine setup above: 
-! l. 30: real(kind=realtype), parameter :: k1 = 1.05_realtype 
-! random given number for k2: 
-! l. 31: real(kind=realtype), parameter :: k2 = 0.6_realtype 
-! mach number preconditioner activation for k3: 
-! l. 32: real(kind=realtype), parameter :: m0 = 0.2_realtype 
-! 
-!    compute k3 
-! eq. 2.7 in garg 2015. k1, m0 and resm are scalars 
-! 
-! unfortunately, garg has switched the k1 and k3 here in the 
-! code. in both paper and thesis it is k3 that is used to det- 
-! ermine k1 below 
+! resm above is used as m_a (thesis) and m (paper 2015)
+! and is the free stream mach number
+! see routine setup above:
+! l. 30: real(kind=realtype), parameter :: k1 = 1.05_realtype
+! random given number for k2:
+! l. 31: real(kind=realtype), parameter :: k2 = 0.6_realtype
+! mach number preconditioner activation for k3:
+! l. 32: real(kind=realtype), parameter :: m0 = 0.2_realtype
+!
+!    compute k3
+! eq. 2.7 in garg 2015. k1, m0 and resm are scalars
+!
+! unfortunately, garg has switched the k1 and k3 here in the
+! code. in both paper and thesis it is k3 that is used to det-
+! ermine k1 below
 !
 !    compute k3
             k3 = k1*(1+(1-k1*m0**2)*resm**2/(k1*m0**4))
@@ -219,9 +219,9 @@ subroutine residual_block()
             else
               betamr2 = x1
             end if
-! above, the winf is the free stream velocity 
-! 
-! should this first line's first element have sos^4 or sos^2  
+! above, the winf is the free stream velocity
+!
+! should this first line's first element have sos^4 or sos^2
             a11 = betamr2*(1/sos**4)
             a12 = zero
             a13 = zero
diff --git a/src/adjoint/outputForward/solverutils_d.f90 b/src/adjoint/outputForward/solverutils_d.f90
index 110f730d5..cebdc4e5c 100644
--- a/src/adjoint/outputForward/solverutils_d.f90
+++ b/src/adjoint/outputForward/solverutils_d.f90
@@ -1021,8 +1021,8 @@ subroutine gridvelocitiesfinelevel_block_d(useoldcoor, t, sps, nn)
 !             normal grid velocities of the faces.
 !
 ! loop over the three directions.
-! the original code is elegant but the tapenade has a difficult time 
-! to understand it. thus, we unfold it and make it easier for the 
+! the original code is elegant but the tapenade has a difficult time
+! to understand it. thus, we unfold it and make it easier for the
 ! tapenade.
 ! i-direction
         do k=1,ke
@@ -1319,8 +1319,8 @@ subroutine gridvelocitiesfinelevel_block(useoldcoor, t, sps, nn)
 !             normal grid velocities of the faces.
 !
 ! loop over the three directions.
-! the original code is elegant but the tapenade has a difficult time 
-! to understand it. thus, we unfold it and make it easier for the 
+! the original code is elegant but the tapenade has a difficult time
+! to understand it. thus, we unfold it and make it easier for the
 ! tapenade.
 ! i-direction
         do k=1,ke
diff --git a/src/adjoint/outputReverse/residuals_b.f90 b/src/adjoint/outputReverse/residuals_b.f90
index 62ba9a3cb..91c8f07d7 100644
--- a/src/adjoint/outputReverse/residuals_b.f90
+++ b/src/adjoint/outputReverse/residuals_b.f90
@@ -173,34 +173,34 @@ subroutine residual_block()
           do i=2,il
 !    compute speed of sound
             sos = sqrt(gamma(i, j, k)*p(i, j, k)/w(i, j, k, irho))
-! compute velocities without rho from state vector 
+! compute velocities without rho from state vector
 !      (w is pointer.. see type blocktype setup in block.f90)
-!      w(0:ib,0:jb,0:kb,1:nw) is allocated in block.f90 
-!      these are per definition nw=[rho,u,v,w,rhoee] 
-!      so the velocity is simply just taken out below... 
-!      we do not have to divide with rho since it is already 
-!      without rho... 
+!      w(0:ib,0:jb,0:kb,1:nw) is allocated in block.f90
+!      these are per definition nw=[rho,u,v,w,rhoee]
+!      so the velocity is simply just taken out below...
+!      we do not have to divide with rho since it is already
+!      without rho...
 ! ivx: l. 60 in constants.f90
             velxrho = w(i, j, k, ivx)
             velyrho = w(i, j, k, ivy)
             velzrho = w(i, j, k, ivz)
             q = velxrho**2 + velyrho**2 + velzrho**2
             resm = sqrt(q)/sos
-! resm above is used as m_a (thesis) and m (paper 2015) 
-! and is the free stream mach number 
-! see routine setup above: 
-! l. 30: real(kind=realtype), parameter :: k1 = 1.05_realtype 
-! random given number for k2: 
-! l. 31: real(kind=realtype), parameter :: k2 = 0.6_realtype 
-! mach number preconditioner activation for k3: 
-! l. 32: real(kind=realtype), parameter :: m0 = 0.2_realtype 
-! 
-!    compute k3 
-! eq. 2.7 in garg 2015. k1, m0 and resm are scalars 
-! 
-! unfortunately, garg has switched the k1 and k3 here in the 
-! code. in both paper and thesis it is k3 that is used to det- 
-! ermine k1 below 
+! resm above is used as m_a (thesis) and m (paper 2015)
+! and is the free stream mach number
+! see routine setup above:
+! l. 30: real(kind=realtype), parameter :: k1 = 1.05_realtype
+! random given number for k2:
+! l. 31: real(kind=realtype), parameter :: k2 = 0.6_realtype
+! mach number preconditioner activation for k3:
+! l. 32: real(kind=realtype), parameter :: m0 = 0.2_realtype
+!
+!    compute k3
+! eq. 2.7 in garg 2015. k1, m0 and resm are scalars
+!
+! unfortunately, garg has switched the k1 and k3 here in the
+! code. in both paper and thesis it is k3 that is used to det-
+! ermine k1 below
 !
 !    compute k3
             k3 = k1*(1+(1-k1*m0**2)*resm**2/(k1*m0**4))
@@ -215,9 +215,9 @@ subroutine residual_block()
             else
               betamr2 = x1
             end if
-! above, the winf is the free stream velocity 
-! 
-! should this first line's first element have sos^4 or sos^2  
+! above, the winf is the free stream velocity
+!
+! should this first line's first element have sos^4 or sos^2
             a11 = betamr2*(1/sos**4)
             a12 = zero
             a13 = zero
diff --git a/src/adjoint/outputReverse/solverutils_b.f90 b/src/adjoint/outputReverse/solverutils_b.f90
index 99508bc75..8df9bfb8f 100644
--- a/src/adjoint/outputReverse/solverutils_b.f90
+++ b/src/adjoint/outputReverse/solverutils_b.f90
@@ -1352,8 +1352,8 @@ subroutine gridvelocitiesfinelevel_block_b(useoldcoor, t, sps, nn)
 !             normal grid velocities of the faces.
 !
 ! loop over the three directions.
-! the original code is elegant but the tapenade has a difficult time 
-! to understand it. thus, we unfold it and make it easier for the 
+! the original code is elegant but the tapenade has a difficult time
+! to understand it. thus, we unfold it and make it easier for the
 ! tapenade.
 ! i-direction
         do k=1,ke
@@ -1965,8 +1965,8 @@ subroutine gridvelocitiesfinelevel_block(useoldcoor, t, sps, nn)
 !             normal grid velocities of the faces.
 !
 ! loop over the three directions.
-! the original code is elegant but the tapenade has a difficult time 
-! to understand it. thus, we unfold it and make it easier for the 
+! the original code is elegant but the tapenade has a difficult time
+! to understand it. thus, we unfold it and make it easier for the
 ! tapenade.
 ! i-direction
         do k=1,ke
diff --git a/src/adjoint/outputReverseFast/residuals_fast_b.f90 b/src/adjoint/outputReverseFast/residuals_fast_b.f90
index ee9f38610..bbda30651 100644
--- a/src/adjoint/outputReverseFast/residuals_fast_b.f90
+++ b/src/adjoint/outputReverseFast/residuals_fast_b.f90
@@ -173,34 +173,34 @@ subroutine residual_block()
           do i=2,il
 !    compute speed of sound
             sos = sqrt(gamma(i, j, k)*p(i, j, k)/w(i, j, k, irho))
-! compute velocities without rho from state vector 
+! compute velocities without rho from state vector
 !      (w is pointer.. see type blocktype setup in block.f90)
-!      w(0:ib,0:jb,0:kb,1:nw) is allocated in block.f90 
-!      these are per definition nw=[rho,u,v,w,rhoee] 
-!      so the velocity is simply just taken out below... 
-!      we do not have to divide with rho since it is already 
-!      without rho... 
+!      w(0:ib,0:jb,0:kb,1:nw) is allocated in block.f90
+!      these are per definition nw=[rho,u,v,w,rhoee]
+!      so the velocity is simply just taken out below...
+!      we do not have to divide with rho since it is already
+!      without rho...
 ! ivx: l. 60 in constants.f90
             velxrho = w(i, j, k, ivx)
             velyrho = w(i, j, k, ivy)
             velzrho = w(i, j, k, ivz)
             q = velxrho**2 + velyrho**2 + velzrho**2
             resm = sqrt(q)/sos
-! resm above is used as m_a (thesis) and m (paper 2015) 
-! and is the free stream mach number 
-! see routine setup above: 
-! l. 30: real(kind=realtype), parameter :: k1 = 1.05_realtype 
-! random given number for k2: 
-! l. 31: real(kind=realtype), parameter :: k2 = 0.6_realtype 
-! mach number preconditioner activation for k3: 
-! l. 32: real(kind=realtype), parameter :: m0 = 0.2_realtype 
-! 
-!    compute k3 
-! eq. 2.7 in garg 2015. k1, m0 and resm are scalars 
-! 
-! unfortunately, garg has switched the k1 and k3 here in the 
-! code. in both paper and thesis it is k3 that is used to det- 
-! ermine k1 below 
+! resm above is used as m_a (thesis) and m (paper 2015)
+! and is the free stream mach number
+! see routine setup above:
+! l. 30: real(kind=realtype), parameter :: k1 = 1.05_realtype
+! random given number for k2:
+! l. 31: real(kind=realtype), parameter :: k2 = 0.6_realtype
+! mach number preconditioner activation for k3:
+! l. 32: real(kind=realtype), parameter :: m0 = 0.2_realtype
+!
+!    compute k3
+! eq. 2.7 in garg 2015. k1, m0 and resm are scalars
+!
+! unfortunately, garg has switched the k1 and k3 here in the
+! code. in both paper and thesis it is k3 that is used to det-
+! ermine k1 below
 !
 !    compute k3
             k3 = k1*(1+(1-k1*m0**2)*resm**2/(k1*m0**4))
@@ -215,9 +215,9 @@ subroutine residual_block()
             else
               betamr2 = x1
             end if
-! above, the winf is the free stream velocity 
-! 
-! should this first line's first element have sos^4 or sos^2  
+! above, the winf is the free stream velocity
+!
+! should this first line's first element have sos^4 or sos^2
             a11 = betamr2*(1/sos**4)
             a12 = zero
             a13 = zero
diff --git a/src/adjoint/outputReverseFast/solverutils_fast_b.f90 b/src/adjoint/outputReverseFast/solverutils_fast_b.f90
index b91397e80..79cffc411 100644
--- a/src/adjoint/outputReverseFast/solverutils_fast_b.f90
+++ b/src/adjoint/outputReverseFast/solverutils_fast_b.f90
@@ -771,8 +771,8 @@ subroutine gridvelocitiesfinelevel_block(useoldcoor, t, sps, nn)
 !             normal grid velocities of the faces.
 !
 ! loop over the three directions.
-! the original code is elegant but the tapenade has a difficult time 
-! to understand it. thus, we unfold it and make it easier for the 
+! the original code is elegant but the tapenade has a difficult time
+! to understand it. thus, we unfold it and make it easier for the
 ! tapenade.
 ! i-direction
         do k=1,ke
diff --git a/src/bcdata/BCData.F90 b/src/bcdata/BCData.F90
index 8fc7e97db..d63945591 100644
--- a/src/bcdata/BCData.F90
+++ b/src/bcdata/BCData.F90
@@ -1531,7 +1531,7 @@ subroutine setBCData(bcDataNamesIn, bcDataIn, famLists, sps, &
                             call setBCVarNamesAdiabaticWall
                             call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
                         case (NSWallIsothermal)
-                            call setBCVarNamesSupersonicInflow  ! possible bug?
+                            call setBCVarNamesSupersonicInflow
                             call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
                         case (SupersonicInflow)
                             call setBCVarNamesSupersonicInflow
@@ -1740,7 +1740,7 @@ subroutine setBCData_b(bcDataNamesIn, bcDataIn, bcDataInd, famLists, sps, &
                             call setBCVarNamesAdiabaticWall
                             call errorCheckbcDataNamesIn("NSWallAdiabatic", bcDataNamesIn)
                         case (NSWallIsothermal)
-                            call setBCVarNamesSupersonicInflow  ! possible bug?
+                            call setBCVarNamesSupersonicInflow
                             call errorCheckbcDataNamesIn("NSWallIsothermal", bcDataNamesIn)
                         case (SupersonicInflow)
                             call setBCVarNamesSupersonicInflow
diff --git a/src/f2py/adflow.pyf b/src/f2py/adflow.pyf
index 9f102fde3..70ad7633c 100644
--- a/src/f2py/adflow.pyf
+++ b/src/f2py/adflow.pyf
@@ -158,19 +158,19 @@ python module libadflow
          end subroutine setdefaultvalues
 
          subroutine monitorvariables(variables) ! in :test:monitorVariables.f90
-           character*(*) intent(inout) :: variables
+           character*(*) intent(in) :: variables
          end subroutine monitorvariables
 
          subroutine surfacevariables(variables) ! in :test:surfaceVariables.f90
-           character*(*) intent(inout) :: variables
+           character*(*) intent(in) :: variables
          end subroutine surfacevariables
 
          subroutine volumevariables(variables) ! in :test:volumeVariables.f90
-           character*(*) intent(inout) :: variables
+           character*(*) intent(in) :: variables
          end subroutine volumevariables
 
          subroutine isovariables(variables) ! in :test:volumeVariables.f90
-           character*(*) intent(inout) :: variables
+           character*(*) intent(in) :: variables
          end subroutine isovariables
 
          subroutine initializeisosurfacevariables(values,nvalues) ! in :test:setIsoSurfaceValues.F90
diff --git a/src/initFlow/initializeFlow.F90 b/src/initFlow/initializeFlow.F90
index 8c19ef596..f5b11ced8 100644
--- a/src/initFlow/initializeFlow.F90
+++ b/src/initFlow/initializeFlow.F90
@@ -1,3489 +1,3472 @@
 module initializeFlow
 
-  use constants, only : intType, realType, maxStringLen
+    use constants, only: intType, realType, maxStringLen
 
-  implicit none
-  save
+    implicit none
+    save
 
 contains
 
-  subroutine referenceState
-    !
-    !       The original version has been nuked since the computations are
-    !       no longer necessary when calling from python
-    !       This is the most compliclated routine in all of ADflow. It is
-    !       stupidly complicated. This is most likely the reason your
-    !       derivatives are wrong. You don't understand this routine
-    !       and its effects.
-    !       This routine *requries* the following as input:
-    !       Mach, pInfDim, TInfDim, rhoInfDim, rGasDim (machCoef non-SA
-    !        turbulence only)
-    !       Optionally, pRef, rhoRef and Tref are used if they are
-    !       are non-negative. This only happens when you want the equations
-    !       normalized by values other than the freestream
-    !      * This routine computes as output:
-    !      *   muInfDim, (unused anywhere in code)
-    !         pRef, rhoRef, Tref, muRef, timeRef ('dimensional' reference)
-    !         pInf, pInfCorr, rhoInf, uInf, rGas, muInf, gammaInf and wInf
-    !         (Non-dimensionalized values used in actual computations)
-    !
-    use constants
-    use paramTurb
-    use inputPhysics, only : equations, Mach, machCoef, &
-         muSuthDim, TSuthDim, velDirFreeStream, &
-         rGasDim, SSuthDim, eddyVisInfRatio, turbModel, turbIntensityInf
-    use flowVarRefState, only : pInfDim, TinfDim, rhoInfDim,  &
-         muInfDim, &
-         pRef, rhoRef, Tref, muRef, timeRef, uRef, hRef, &
-         pInf, pInfCorr, rhoInf, uInf, rGas, muInf, gammaInf, wInf, &
-         nw, nwf, kPresent, wInf
-    use flowUtils, only : computeGamma, eTot
-    use turbUtils, only : saNuKnownEddyRatio
-    implicit none
-
-    integer(kind=intType) :: sps, nn, mm, ierr
-    real(kind=realType) :: gm1, ratio
-    real(kind=realType) :: nuInf, ktmp, uInf2
-    real(kind=realType) :: vinf, zinf, tmp1(1), tmp2(1)
-
-    ! Compute the dimensional viscosity from Sutherland's law
-    muInfDim = muSuthDim &
-         * ((TSuthDim + SSuthDim)/(TInfDim + SSuthDim)) &
-         * ((TInfDim/TSuthDim)**1.5_realType)
-
-    ! Set the reference values. They *COULD* be different from the
-    ! free-stream values for an internal flow simulation. For now,
-    ! we just use the actual free stream values.
-    pref = PInfDim
-    tref = TInfDim
-    rhoref = rhoInfDim
-
-    ! Compute the value of muRef, such that the nonDimensional
-    ! equations are identical to the dimensional ones.
-    ! Note that in the non-dimensionalization of muRef there is
-    ! a reference length. However this reference length is 1.0
-    ! in this code, because the coordinates are converted to
-    ! meters.
-
-    muRef = sqrt(pRef*rhoRef)
-
-    ! Compute timeRef for a correct nonDimensionalization of the
-    ! unsteady equations. Some story as for the reference viscosity
-    ! concerning the reference length.
-
-    timeRef = sqrt(rhoRef/pRef)
-    hRef = pRef/rhoRef
-    uRef = sqrt(hRef)
-
-    ! Compute the nonDimensional pressure, density, velocity,
-    ! viscosity and gas constant.
-
-    pInf   = pInfDim/pRef
-    rhoInf = rhoInfDim/rhoRef
-    uInf   = Mach*sqrt(gammaInf*pInf/rhoInf)
-    RGas   = RGasDim*rhoRef*TRef/pRef
-    muInf  = muInfDim/muRef
-    tmp1(1) = TinfDim
-    call computeGamma(tmp1, tmp2, 1)
-    gammaInf = tmp2(1)
-
-    ! ----------------------------------------
-    !      Compute the final wInf
-    ! ----------------------------------------
-
-    ! Allocate the memory for wInf if necessary
+    subroutine referenceState
+        !
+        !       The original version has been nuked since the computations are
+        !       no longer necessary when calling from python
+        !       This is the most compliclated routine in all of ADflow. It is
+        !       stupidly complicated. This is most likely the reason your
+        !       derivatives are wrong. You don't understand this routine
+        !       and its effects.
+        !       This routine *requries* the following as input:
+        !       Mach, pInfDim, TInfDim, rhoInfDim, rGasDim (machCoef non-SA
+        !        turbulence only)
+        !       Optionally, pRef, rhoRef and Tref are used if they are
+        !       are non-negative. This only happens when you want the equations
+        !       normalized by values other than the freestream
+        !      * This routine computes as output:
+        !      *   muInfDim, (unused anywhere in code)
+        !         pRef, rhoRef, Tref, muRef, timeRef ('dimensional' reference)
+        !         pInf, pInfCorr, rhoInf, uInf, rGas, muInf, gammaInf and wInf
+        !         (Non-dimensionalized values used in actual computations)
+        !
+        use constants
+        use paramTurb
+        use inputPhysics, only: equations, Mach, machCoef, &
+                                muSuthDim, TSuthDim, velDirFreeStream, &
+                                rGasDim, SSuthDim, eddyVisInfRatio, turbModel, turbIntensityInf
+        use flowVarRefState, only: pInfDim, TinfDim, rhoInfDim, &
+                                   muInfDim, &
+                                   pRef, rhoRef, Tref, muRef, timeRef, uRef, hRef, &
+                                   pInf, pInfCorr, rhoInf, uInf, rGas, muInf, gammaInf, wInf, &
+                                   nw, nwf, kPresent, wInf
+        use flowUtils, only: computeGamma, eTot
+        use turbUtils, only: saNuKnownEddyRatio
+        implicit none
+
+        integer(kind=intType) :: sps, nn, mm, ierr
+        real(kind=realType) :: gm1, ratio
+        real(kind=realType) :: nuInf, ktmp, uInf2
+        real(kind=realType) :: vinf, zinf, tmp1(1), tmp2(1)
+
+        ! Compute the dimensional viscosity from Sutherland's law
+        muInfDim = muSuthDim &
+                   * ((TSuthDim + SSuthDim) / (TInfDim + SSuthDim)) &
+                   * ((TInfDim / TSuthDim)**1.5_realType)
+
+        ! Set the reference values. They *COULD* be different from the
+        ! free-stream values for an internal flow simulation. For now,
+        ! we just use the actual free stream values.
+        pref = PInfDim
+        tref = TInfDim
+        rhoref = rhoInfDim
+
+        ! Compute the value of muRef, such that the nonDimensional
+        ! equations are identical to the dimensional ones.
+        ! Note that in the non-dimensionalization of muRef there is
+        ! a reference length. However this reference length is 1.0
+        ! in this code, because the coordinates are converted to
+        ! meters.
+
+        muRef = sqrt(pRef * rhoRef)
+
+        ! Compute timeRef for a correct nonDimensionalization of the
+        ! unsteady equations. Some story as for the reference viscosity
+        ! concerning the reference length.
+
+        timeRef = sqrt(rhoRef / pRef)
+        hRef = pRef / rhoRef
+        uRef = sqrt(hRef)
+
+        ! Compute the nonDimensional pressure, density, velocity,
+        ! viscosity and gas constant.
+
+        pInf = pInfDim / pRef
+        rhoInf = rhoInfDim / rhoRef
+        uInf = Mach * sqrt(gammaInf * pInf / rhoInf)
+        RGas = RGasDim * rhoRef * TRef / pRef
+        muInf = muInfDim / muRef
+        tmp1(1) = TinfDim
+        call computeGamma(tmp1, tmp2, 1)
+        gammaInf = tmp2(1)
+
+        ! ----------------------------------------
+        !      Compute the final wInf
+        ! ----------------------------------------
+
+        ! Allocate the memory for wInf if necessary
 #ifndef USE_TAPENADE
-    if( allocated(wInf)) deallocate(wInf)
-    allocate(wInf(nw), stat=ierr)
+        if (allocated(wInf)) deallocate (wInf)
+        allocate (wInf(nw), stat=ierr)
 #endif
 
-    ! zero out the winf first
-    wInf(:) = zero
-
-    ! Set the reference value of the flow variables, except the total
-    ! energy. This will be computed at the end of this routine.
-
-    wInf(irho) = rhoInf
-    wInf(ivx)  = uInf*velDirFreestream(1)
-    wInf(ivy)  = uInf*velDirFreestream(2)
-    wInf(ivz)  = uInf*velDirFreestream(3)
+        ! zero out the winf first
+        wInf(:) = zero
 
-    ! Compute the velocity squared based on MachCoef. This gives a
-    ! better indication of the 'speed' of the flow so the turubulence
-    ! intensity ration is more meaningful especially for moving
-    ! geometries. (Not used in SA model)
+        ! Set the reference value of the flow variables, except the total
+        ! energy. This will be computed at the end of this routine.
 
-    uInf2 = MachCoef*MachCoef*gammaInf*pInf/rhoInf
+        wInf(irho) = rhoInf
+        wInf(ivx) = uInf * velDirFreestream(1)
+        wInf(ivy) = uInf * velDirFreestream(2)
+        wInf(ivz) = uInf * velDirFreestream(3)
 
-    ! Set the turbulent variables if transport variables are to be
-    ! solved. We should be checking for RANS equations here,
-    ! however, this code is included in block res. The issue is
-    ! that for frozen turbulence (or ANK jacobian) we call the
-    ! block_res with equationType set to Laminar even though we are
-    ! actually solving the rans equations. The issue is that, the
-    ! freestream turb variables will be changed to zero, thus
-    ! changing the solution. Insteady we check if nw > nwf which
-    ! will accomplish the same thing.
+        ! Compute the velocity squared based on MachCoef. This gives a
+        ! better indication of the 'speed' of the flow so the turubulence
+        ! intensity ration is more meaningful especially for moving
+        ! geometries. (Not used in SA model)
 
-    if(nw > nwf) then
+        uInf2 = MachCoef * MachCoef * gammaInf * pInf / rhoInf
 
-       nuInf  = muInf/rhoInf
+        ! Set the turbulent variables if transport variables are to be
+        ! solved. We should be checking for RANS equations here,
+        ! however, this code is included in block res. The issue is
+        ! that for frozen turbulence (or ANK jacobian) we call the
+        ! block_res with equationType set to Laminar even though we are
+        ! actually solving the rans equations. The issue is that, the
+        ! freestream turb variables will be changed to zero, thus
+        ! changing the solution. Insteady we check if nw > nwf which
+        ! will accomplish the same thing.
 
-       select case(turbModel)
+        if (nw > nwf) then
 
-       case (spalartAllmaras, spalartAllmarasEdwards)
+            nuInf = muInf / rhoInf
 
-          wInf(itu1) = saNuKnownEddyRatio(eddyVisInfRatio, nuInf)
+            select case (turbModel)
 
-          !=============================================================
+            case (spalartAllmaras, spalartAllmarasEdwards)
 
-       case (komegaWilcox, komegaModified, menterSST)
+                wInf(itu1) = saNuKnownEddyRatio(eddyVisInfRatio, nuInf)
 
-          wInf(itu1) = 1.5_realType*uInf2*turbIntensityInf**2
-          wInf(itu2) = wInf(itu1)/(eddyVisInfRatio*nuInf)
+                !=============================================================
 
-          !=============================================================
+            case (komegaWilcox, komegaModified, menterSST)
 
-       case (ktau)
+                wInf(itu1) = 1.5_realType * uInf2 * turbIntensityInf**2
+                wInf(itu2) = wInf(itu1) / (eddyVisInfRatio * nuInf)
 
-          wInf(itu1) = 1.5_realType*uInf2*turbIntensityInf**2
-          wInf(itu2) = eddyVisInfRatio*nuInf/wInf(itu1)
+                !=============================================================
 
-          !=============================================================
+            case (ktau)
 
-       case (v2f)
+                wInf(itu1) = 1.5_realType * uInf2 * turbIntensityInf**2
+                wInf(itu2) = eddyVisInfRatio * nuInf / wInf(itu1)
 
-          wInf(itu1) = 1.5_realType*uInf2*turbIntensityInf**2
-          wInf(itu2) = 0.09_realType*wInf(itu1)**2 &
-               / (eddyVisInfRatio*nuInf)
-          wInf(itu3) = 0.666666_realType*wInf(itu1)
-          wInf(itu4) = 0.0_realType
+                !=============================================================
 
-       end select
+            case (v2f)
 
-    endif
+                wInf(itu1) = 1.5_realType * uInf2 * turbIntensityInf**2
+                wInf(itu2) = 0.09_realType * wInf(itu1)**2 &
+                             / (eddyVisInfRatio * nuInf)
+                wInf(itu3) = 0.666666_realType * wInf(itu1)
+                wInf(itu4) = 0.0_realType
 
-    ! Set the value of pInfCorr. In case a k-equation is present
-    ! add 2/3 times rho*k.
+            end select
 
-    pInfCorr = pInf
-    if( kPresent ) pInfCorr = pInf + two*third*rhoInf*wInf(itu1)
+        end if
 
-    ! Compute the free stream total energy.
+        ! Set the value of pInfCorr. In case a k-equation is present
+        ! add 2/3 times rho*k.
 
-    ktmp = zero
-    if( kPresent ) ktmp = wInf(itu1)
-    vInf = zero
-    zInf = zero
-    call etot(rhoInf, uInf, vInf, zInf, pInfCorr, ktmp, &
-         wInf(irhoE), kPresent)
+        pInfCorr = pInf
+        if (kPresent) pInfCorr = pInf + two * third * rhoInf * wInf(itu1)
 
-  end subroutine referenceState
+        ! Compute the free stream total energy.
 
+        ktmp = zero
+        if (kPresent) ktmp = wInf(itu1)
+        vInf = zero
+        zInf = zero
+        call etot(rhoInf, uInf, vInf, zInf, pInfCorr, ktmp, &
+                  wInf(irhoE), kPresent)
 
+    end subroutine referenceState
 
-
-  ! ----------------------------------------------------------------------
-  !                                                                      |
-  !                    No Tapenade Routine below this line               |
-  !                                                                      |
-  ! ----------------------------------------------------------------------
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 
 #ifndef  USE_TAPENADE
-  subroutine infChangeCorrection(oldWinf)
-    ! Adjust the flow states to a change in wInf
-    use constants
-    use blockPointers, only : il, jl, kl, w, nDom, d2wall
-    use flowVarRefState, only : wInf, nwf, nw
-    use inputPhysics , only : equations
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use haloExchange, only : whalo2
-    use flowUtils, only : adjustInflowAngle
-    use oversetData, only : oversetPresent
-    use iteration, only : currentLevel
-    use turbbcRoutines, only : applyallTurbBCthisblock, bcTurbTreatment
-    use BCRoutines, only : applyallBC_block
-    use utils, only : setPointers, mynorm2
-    implicit none
-
-    real(kind=realType), intent(in), dimension(nwf) :: oldWinf
-    integer(kind=intType) :: sps, nn, i, j, k, l
-    real(kind=realType) :: deltaWinf(nwf)
-
-    ! Make sure we have the updated wInf
-    call adjustInflowAngle()
-    call referenceState
-
-    deltaWinf = Winf(1:nwf) - oldWinf(1:nwf)
-
-    if (mynorm2(deltaWinf) < 1e-12) then
-       ! The change deltaWinf is so small, (or zero) don't do the
-       ! update and just return. This will save some time when the
-       ! solver is called with the same AP conditions multiple times,
-       ! such as during a GS AS solution
-
-       return
-    end if
-
-    ! Loop over all the blocks, adding the subtracting off the oldWinf
-    do sps=1, nTimeIntervalsSpectral
-       do nn=1, nDom
-          call setPointers(nn, 1_intType, sps)
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   do l=1, nwf
-                      w(i, j, k, l) = w(i, j, k, l) + deltaWinf(l)
-                   end do
+    subroutine infChangeCorrection(oldWinf)
+        ! Adjust the flow states to a change in wInf
+        use constants
+        use blockPointers, only: il, jl, kl, w, nDom, d2wall
+        use flowVarRefState, only: wInf, nwf, nw
+        use inputPhysics, only: equations
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use haloExchange, only: whalo2
+        use flowUtils, only: adjustInflowAngle
+        use oversetData, only: oversetPresent
+        use iteration, only: currentLevel
+        use turbbcRoutines, only: applyallTurbBCthisblock, bcTurbTreatment
+        use BCRoutines, only: applyallBC_block
+        use utils, only: setPointers, mynorm2
+        implicit none
+
+        real(kind=realType), intent(in), dimension(nwf) :: oldWinf
+        integer(kind=intType) :: sps, nn, i, j, k, l
+        real(kind=realType) :: deltaWinf(nwf)
+
+        ! Make sure we have the updated wInf
+        call adjustInflowAngle()
+        call referenceState
+
+        deltaWinf = Winf(1:nwf) - oldWinf(1:nwf)
+
+        if (mynorm2(deltaWinf) < 1e-12) then
+            ! The change deltaWinf is so small, (or zero) don't do the
+            ! update and just return. This will save some time when the
+            ! solver is called with the same AP conditions multiple times,
+            ! such as during a GS AS solution
+
+            return
+        end if
+
+        ! Loop over all the blocks, adding the subtracting off the oldWinf
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, 1_intType, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            do l = 1, nwf
+                                w(i, j, k, l) = w(i, j, k, l) + deltaWinf(l)
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-          call applyAllBC_block(.True.)
-       end do
-    end do
-
-    ! Exchange values
-    call whalo2(currentLevel, 1_intType, nw, .True., .True., .True.)
-
-    ! Need to re-apply the BCs. The reason is that BC halos behind
-    ! interpolated cells need to be recomputed with their new
-    ! interpolated values from actual compute cells. Only needed for
-    ! overset.
-    if (oversetPresent) then
-       do sps=1,nTimeIntervalsSpectral
-          do nn=1,nDom
-             call setPointers(nn, 1, sps)
-             if (equations == RANSequations) then
-                call BCTurbTreatment
-                call applyAllTurbBCthisblock(.True.)
-             end if
-             call applyAllBC_block(.True.)
-          end do
-       end do
-    end if
-
-end subroutine infChangeCorrection
-
-
-  ! Section out the BCdata setup so that it can by called from python when needed
-  subroutine updateBCDataAllLevels()
-    ! sets the prescribed boundary data from the CGNS arrays
-
-    use constants
-    use iteration, only : groundLevel
-    use bcdata, only : setbcdataFineGrid, setBCDataCoarseGrid
-    implicit none
-
-    ! Allocate the memory for the prescribed boundary data at the
-    ! boundary faces and determine the data for both the fine grid.
-
-    groundLevel = 1
-
-    ! Determine the reference state.
-    call referenceState
-
-    call setBCDataFineGrid(.true.)
-
-    ! Determine the prescribed data on the coarse grid levels
-    ! by interpolation.
-#ifndef USE_TAPENADE
-    call setBCDataCoarseGrid
-#endif
-
-  end subroutine updateBCDataAllLevels
-
-  subroutine initFlow
-    !
-    !       initFlow allocates the
-    !       memory for and initializes the flow variables. In case a
-    !       restart is performed the owned variables are read from the
-    !       previous solution file(s).
-    !
-    use constants
-    use block, only : flowDoms
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use variableReading, only : halosRead
-
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: sps, level, nLevels
+                call applyAllBC_block(.True.)
+            end do
+        end do
+
+        ! Exchange values
+        call whalo2(currentLevel, 1_intType, nw, .True., .True., .True.)
+
+        ! Need to re-apply the BCs. The reason is that BC halos behind
+        ! interpolated cells need to be recomputed with their new
+        ! interpolated values from actual compute cells. Only needed for
+        ! overset.
+        if (oversetPresent) then
+            do sps = 1, nTimeIntervalsSpectral
+                do nn = 1, nDom
+                    call setPointers(nn, 1, sps)
+                    if (equations == RANSequations) then
+                        call BCTurbTreatment
+                        call applyAllTurbBCthisblock(.True.)
+                    end if
+                    call applyAllBC_block(.True.)
+                end do
+            end do
+        end if
 
-    ! Determine the number of multigrid levels.
+    end subroutine infChangeCorrection
 
-    nLevels = ubound(flowDoms,2)
+    ! Section out the BCdata setup so that it can by called from python when needed
+    subroutine updateBCDataAllLevels()
+        ! sets the prescribed boundary data from the CGNS arrays
 
-    ! As some boundary conditions can be treated in multiple ways,
-    ! some memory allocated must be released again.
+        use constants
+        use iteration, only: groundLevel
+        use bcdata, only: setbcdataFineGrid, setBCDataCoarseGrid
+        implicit none
 
-     call releaseExtraMemBcs
+        ! Allocate the memory for the prescribed boundary data at the
+        ! boundary faces and determine the data for both the fine grid.
 
-    ! Determine for the time spectral mode the matrices for the
-    ! time derivatives.
-    call timeSpectralMatrices
+        groundLevel = 1
 
-    ! Loop over the number of spectral solutions to allocate
-    ! the memory for the w-variables and p on the fine grid.
-    do sps=1,nTimeIntervalsSpectral
-       call allocMemFlovarPart1(sps, 1_intType)
-    enddo
+        ! Determine the reference state.
+        call referenceState
 
-    ! Allocate the memory for the solution variables on the coarse
-    ! grid levels and the memory for the dependent flow variables,
-    ! residuals, etc, on all multigrid levels.
-    do sps=1,nTimeIntervalsSpectral
-       call allocMemFlovarPart2(sps, 1_intType)
+        call setBCDataFineGrid(.true.)
 
-       do level=2,nLevels
-          call allocMemFlovarPart1(sps, level)
-          call allocMemFlovarPart2(sps, level)
-       enddo
-    enddo
+        ! Determine the prescribed data on the coarse grid levels
+        ! by interpolation.
+#ifndef USE_TAPENADE
+        call setBCDataCoarseGrid
+#endif
 
-    ! Initialize free stream field
-    call initFlowfield
+    end subroutine updateBCDataAllLevels
+
+    subroutine initFlow
+        !
+        !       initFlow allocates the
+        !       memory for and initializes the flow variables. In case a
+        !       restart is performed the owned variables are read from the
+        !       previous solution file(s).
+        !
+        use constants
+        use block, only: flowDoms
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use variableReading, only: halosRead
+
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: sps, level, nLevels
+
+        ! Determine the number of multigrid levels.
+
+        nLevels = ubound(flowDoms, 2)
+
+        ! As some boundary conditions can be treated in multiple ways,
+        ! some memory allocated must be released again.
+
+        call releaseExtraMemBcs
+
+        ! Determine for the time spectral mode the matrices for the
+        ! time derivatives.
+        call timeSpectralMatrices
+
+        ! Loop over the number of spectral solutions to allocate
+        ! the memory for the w-variables and p on the fine grid.
+        do sps = 1, nTimeIntervalsSpectral
+            call allocMemFlovarPart1(sps, 1_intType)
+        end do
+
+        ! Allocate the memory for the solution variables on the coarse
+        ! grid levels and the memory for the dependent flow variables,
+        ! residuals, etc, on all multigrid levels.
+        do sps = 1, nTimeIntervalsSpectral
+            call allocMemFlovarPart2(sps, 1_intType)
+
+            do level = 2, nLevels
+                call allocMemFlovarPart1(sps, level)
+                call allocMemFlovarPart2(sps, level)
+            end do
+        end do
+
+        ! Initialize free stream field
+        call initFlowfield
+
+        ! Initialize the dependent flow variables and the halo values.
+
+        call initDepvarAndHalos(halosRead)
+
+    end subroutine initFlow
+
+    subroutine allocMemFlovarPart1(sps, level)
+        !
+        !       allocMemFlovarPart1 allocates the memory for the flow
+        !       variables w and p for all the blocks on the given multigrid
+        !       level and spectral solution sps.
+        !
+        use constants
+        use block, only: flowDoms, nDOm
+        use flowVarRefState, only: nw, nwf, nt1, nt2
+        use inputPhysics, only: equationMode, gammaConstant
+        use inputUnsteady, only: timeIntegrationScheme
+        use inputIteration, only: mgStartLevel, turbTreatment
+        use iteration, only: nOldLevels
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps, level
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn
+        integer(kind=intType) :: il, jl, kl, ie, je, ke, ib, jb, kb
+
+        ! Loop over the domains.
+
+        domains: do nn = 1, nDom
+
+            ! Store some dimensions a bit easier.
+
+            il = flowDoms(nn, level, sps)%il
+            jl = flowDoms(nn, level, sps)%jl
+            kl = flowDoms(nn, level, sps)%kl
+
+            ie = flowDoms(nn, level, sps)%ie
+            je = flowDoms(nn, level, sps)%je
+            ke = flowDoms(nn, level, sps)%ke
+
+            ib = flowDoms(nn, level, sps)%ib
+            jb = flowDoms(nn, level, sps)%jb
+            kb = flowDoms(nn, level, sps)%kb
+
+            ! Allocate the memory for the independent variables.
+            ! Memory is allocated for the turbulent variables (if any) if
+            ! the current level is smaller or equal to the multigrid start
+            ! level or if the turbulent transport equations are solved in
+            ! a coupled manner.
+
+            if (level <= mgStartlevel .or. turbTreatment == coupled) then
+                allocate (flowDoms(nn, level, sps)%w(0:ib, 0:jb, 0:kb, 1:nw), &
+                          stat=ierr)
+            else
+                allocate (flowDoms(nn, level, sps)%w(0:ib, 0:jb, 0:kb, 1:nwf), &
+                          stat=ierr)
+            end if
+            if (ierr /= 0) &
+                call terminate("allocMemFlovarPart1", &
+                               "Memory allocation failure for w")
+
+            ! Alloc mem for nodal gradients
+            allocate (flowDoms(nn, level, sps)%ux(il, jl, kl), stat=ierr)
+            allocate (flowDoms(nn, level, sps)%uy(il, jl, kl), stat=ierr)
+            allocate (flowDoms(nn, level, sps)%uz(il, jl, kl), stat=ierr)
+
+            allocate (flowDoms(nn, level, sps)%vx(il, jl, kl), stat=ierr)
+            allocate (flowDoms(nn, level, sps)%vy(il, jl, kl), stat=ierr)
+            allocate (flowDoms(nn, level, sps)%vz(il, jl, kl), stat=ierr)
+
+            allocate (flowDoms(nn, level, sps)%wx(il, jl, kl), stat=ierr)
+            allocate (flowDoms(nn, level, sps)%wy(il, jl, kl), stat=ierr)
+            allocate (flowDoms(nn, level, sps)%wz(il, jl, kl), stat=ierr)
+
+            allocate (flowDoms(nn, level, sps)%qx(il, jl, kl), stat=ierr)
+            allocate (flowDoms(nn, level, sps)%qy(il, jl, kl), stat=ierr)
+            allocate (flowDoms(nn, level, sps)%qz(il, jl, kl), stat=ierr)
+
+            ! Allocate memory for the pressure.
+            allocate (flowDoms(nn, level, sps)%p(0:ib, 0:jb, 0:kb), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("allocMemFlovarPart1", &
+                               "Memory allocation failure for p")
+
+            ! Allocate memory for the speed of sound squared
+            allocate (flowDoms(nn, level, sps)%aa(0:ib, 0:jb, 0:kb), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("allocMemFlovarPart1", &
+                               "Memory allocation failure for p")
+
+            ! The eddy viscosity for eddy viscosity models.
+            ! Although a dependent variable, it is allocated on all grid
+            ! levels, because the eddy viscosity might be frozen in the
+            ! multigrid.
+
+            ! Always allocate rev due to reverse mode AD- Peter Lyu. Also
+            ! zero so that it doesn't affect laminar cases.
+            allocate (flowDoms(nn, level, sps)%rev(0:ib, 0:jb, 0:kb), &
+                      stat=ierr)
+            flowDoms(nn, level, sps)%rev = zero
+            if (ierr /= 0) &
+                call terminate("allocMemFlovarPart1", &
+                               "Memory allocation failure for rev")
+            !endif
+
+            ! If this is the finest grid some more memory must be allocated.
+
+            fineLevelTest: if (level == 1) then
+
+                ! Allocate the memory for gamma and initialize it to
+                ! the constant gamma value.
+
+                allocate (flowDoms(nn, level, sps)%gamma(0:ib, 0:jb, 0:kb), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("allocMemFlovarPart1", &
+                                   "Memory allocation failure for gamma.")
+
+                flowDoms(nn, level, sps)%gamma = gammaConstant
+
+                ! The laminar viscosity for viscous computations.
+                ! Always allocate rlv due to reverse mode - Peter Lyu
+                !if( viscous ) then
+                allocate (flowDoms(nn, level, sps)%rlv(0:ib, 0:jb, 0:kb), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("allocMemFlovarPart1", &
+                                   "Memory allocation failure for rlv")
+                !endif
+
+                ! The state vectors in the past for unsteady computations.
+
+                if (equationMode == unsteady .and. &
+                    timeIntegrationScheme == BDF) then
+                    allocate ( &
+                        flowDoms(nn, level, sps)%wOld(nOldLevels, 2:il, 2:jl, 2:kl, nw), &
+                        stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("allocMemFlovarPart1", &
+                                       "Memory allocation failure for wOld")
+
+                    ! Initialize wOld to zero, such that it is initialized.
+                    ! The actual values do not matter.
+
+                    flowDoms(nn, level, sps)%wOld = zero
+
+                    ! Added by HDN
+                else if (equationMode == unsteady .and. &
+                         timeIntegrationScheme == MD) then
+                    allocate ( &
+                        flowDoms(nn, level, sps)%wOld(nOldLevels, 2:il, 2:jl, 2:kl, nw), &
+                        stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("allocMemFlovarPart1", &
+                                       "Memory allocation failure for wOld")
+
+                    flowDoms(nn, level, sps)%wOld = zero
+                end if
+
+                ! If this is the 1st spectral solution (note that we are
+                ! already on the finest grid) and the rans equations are
+                ! solved, allocate the memory for the arrays used for the
+                ! implicit boundary condition treatment. Normally this should
+                ! only be allocated for RANS but the derivative calcs require
+                ! these be allocated.
+
+                sps1RansTest: if (sps == 1) then
+                    allocate (flowDoms(nn, level, sps)%bmti1(je, ke, nt1:nt2, nt1:nt2), &
+                              flowDoms(nn, level, sps)%bmti2(je, ke, nt1:nt2, nt1:nt2), &
+                              flowDoms(nn, level, sps)%bmtj1(ie, ke, nt1:nt2, nt1:nt2), &
+                              flowDoms(nn, level, sps)%bmtj2(ie, ke, nt1:nt2, nt1:nt2), &
+                              flowDoms(nn, level, sps)%bmtk1(ie, je, nt1:nt2, nt1:nt2), &
+                              flowDoms(nn, level, sps)%bmtk2(ie, je, nt1:nt2, nt1:nt2), &
+                              flowDoms(nn, level, sps)%bvti1(je, ke, nt1:nt2), &
+                              flowDoms(nn, level, sps)%bvti2(je, ke, nt1:nt2), &
+                              flowDoms(nn, level, sps)%bvtj1(ie, ke, nt1:nt2), &
+                              flowDoms(nn, level, sps)%bvtj2(ie, ke, nt1:nt2), &
+                              flowDoms(nn, level, sps)%bvtk1(ie, je, nt1:nt2), &
+                              flowDoms(nn, level, sps)%bvtk2(ie, je, nt1:nt2), &
+                              stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("allocMemFlovarPart1", &
+                                       "Memory allocation failure for bmti1, etc")
+
+                end if sps1RansTest
+            end if fineLevelTest
+
+        end do domains
+
+    end subroutine allocMemFlovarPart1
+
+    !      ==================================================================
+
+    subroutine allocMemFlovarPart2(sps, level)
+        !
+        !       AllocMemFlovarPart2 allocates the memory for the dependent
+        !       flow variables and iteration variables for all the blocks on
+        !       the given multigrid level and spectral solution sps. Some
+        !       variables are only allocated on the coarser grids, e.g. the
+        !       multigrid forcing terms and the state vector upon entrance on
+        !       the mg level. Other variables are only allocated on the finest
+        !       mesh. These are typically dependent variables like laminar
+        !       viscosity, or residuals, time step, etc. Exceptions are
+        !       pressure and eddy viscosity. Although these are dependent
+        !       variables, they are allocated on all grid levels.
+        !
+        use block
+        use constants
+        use flowVarRefState
+        use inputPhysics
+        use inputDiscretization
+        use inputIteration
+        use inputUnsteady
+        use iteration
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps, level
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, mm
+        integer(kind=intType) :: il, jl, kl, ie, je, ke, ib, jb, kb
+
+        ! Loop over the domains.
+
+        domains: do nn = 1, nDom
+
+            ! Store some dimensions a bit easier.
+
+            il = flowDoms(nn, level, sps)%il
+            jl = flowDoms(nn, level, sps)%jl
+            kl = flowDoms(nn, level, sps)%kl
+
+            ie = flowDoms(nn, level, sps)%ie
+            je = flowDoms(nn, level, sps)%je
+            ke = flowDoms(nn, level, sps)%ke
+
+            ib = flowDoms(nn, level, sps)%ib
+            jb = flowDoms(nn, level, sps)%jb
+            kb = flowDoms(nn, level, sps)%kb
+
+            ! Block is moving. Allocate the memory for s, sFaceI,
+            ! sFaceJ and sFaceK.
+
+            allocate (flowDoms(nn, level, sps)%s(ie, je, ke, 3), &
+                      flowDoms(nn, level, sps)%sFaceI(0:ie, je, ke), &
+                      flowDoms(nn, level, sps)%sFaceJ(ie, 0:je, ke), &
+                      flowDoms(nn, level, sps)%sFaceK(ie, je, 0:ke), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("allocMemFlovarPart2", &
+                 "Memory allocation failure for s, &
+                 &sFaceI, sFaceJ and sFaceK.")
+
+            ! Extra face velocities for ALE
+            if (equationMode == unSteady .and. useALE) then
+                allocate ( &
+                    flowDoms(nn, level, sps)%sVeloIALE(0:ie, je, ke, 3), &
+                    flowDoms(nn, level, sps)%sVeloJALE(ie, 0:je, ke, 3), &
+                    flowDoms(nn, level, sps)%sVeloKALE(ie, je, 0:ke, 3), &
+                    flowDoms(nn, level, sps)%sFaceIALE(0:nALEsteps, 0:ie, je, ke), &
+                    flowDoms(nn, level, sps)%sFaceJALE(0:nALEsteps, ie, 0:je, ke), &
+                    flowDoms(nn, level, sps)%sFaceKALE(0:nALEsteps, ie, je, 0:ke), stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("allocMemFlovarPart2", &
+                     "Memory allocation failure for &
+                     &sVeloIALE, sVeloJALE and sVeloKALE; &
+                     &sFaceIALE, sFaceJALE and sFaceKALE.")
+            end if
+
+            ! Test if we are on the finest mesh.
+
+            fineLevelTest: if (level == 1) then
+
+                ! Allocate the memory that must always be allocated.
+
+                allocate ( &
+                    flowDoms(nn, level, sps)%dw(0:ib, 0:jb, 0:kb, 1:nw), &
+                    flowDoms(nn, level, sps)%fw(0:ib, 0:jb, 0:kb, 1:nwf), &
+                    flowDoms(nn, level, sps)%dtl(1:ie, 1:je, 1:ke), &
+                    flowDoms(nn, level, sps)%radI(1:ie, 1:je, 1:ke), &
+                    flowDoms(nn, level, sps)%radJ(1:ie, 1:je, 1:ke), &
+                    flowDoms(nn, level, sps)%radK(1:ie, 1:je, 1:ke), &
+                    flowDoms(nn, level, sps)%scratch(0:ib, 0:jb, 0:kb, 10), &
+                    flowDoms(nn, level, sps)%shockSensor(0:ib, 0:jb, 0:kb), &
+                    stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("allocMemFlovarPart2", &
+                     "Memory allocation failure for dw, fw, dwOld, fwOld, &
+                     &gamma, dtl and the spectral radii.")
+
+                ! Initialize dw and fw to zero.
+
+                flowDoms(nn, level, sps)%dw = zero
+                flowDoms(nn, level, sps)%fw = zero
+
+                ! Extra variables for ALE
+                if (equationMode == unSteady .and. useALE) then
+                    allocate ( &
+                        flowDoms(nn, level, sps)%dwALE(0:nALEsteps, 0:ib, 0:jb, 0:kb, 1:nw), &
+                        flowDoms(nn, level, sps)%fwALE(0:nALEsteps, 0:ib, 0:jb, 0:kb, 1:nwf), &
+                        stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("allocMemFlovarPart2", &
+                                       "Memory allocation failure for dwALE, fwALE.")
+
+                    flowDoms(nn, level, sps)%dwALE = zero
+                    flowDoms(nn, level, sps)%fwALE = zero
+                end if
+
+                ! Allocate the memory for the zeroth runge kutta stage
+                allocate (flowDoms(nn, level, sps)%wn(2:il, 2:jl, 2:kl, 1:nwf), &
+                          flowDoms(nn, level, sps)%pn(2:il, 2:jl, 2:kl), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("allocMemFlovarPart2", &
+                                   "Memory allocation failure for wn and pn")
+
+                ! For unsteady mode using Runge-Kutta schemes allocate the
+                ! memory for dwOldRK.
+
+                if (equationMode == unsteady .and. &
+                    timeIntegrationScheme == explicitRK) then
+
+                    mm = nRKStagesUnsteady - 1
+                    allocate (flowDoms(nn, level, sps)%dwOldRK(mm, il, jl, kl, nw), &
+                              stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("allocMemFlovarPart2", &
+                                       "Memory allocation failure for dwOldRK.")
+                end if
+
+                else fineLevelTest
+
+                ! Coarser level. Allocate the memory for the multigrid
+                ! forcing term and the state variables upon entry.
+
+                allocate (flowDoms(nn, level, sps)%p1(1:ie, 1:je, 1:ke), &
+                          flowDoms(nn, level, sps)%w1(1:ie, 1:je, 1:ke, 1:nwf), &
+                          flowDoms(nn, level, sps)%wr(2:il, 2:jl, 2:kl, 1:nwf), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("allocMemFlovarPart2", &
+                     "Memory allocation failure for p1, w1 &
+                     &and wr")
+
+                ! Initialize w1 and p1 to zero, just that they
+                ! are initialized.
+
+                flowDoms(nn, level, sps)%p1 = zero
+                flowDoms(nn, level, sps)%w1 = zero
+
+            end if fineLevelTest
+
+        end do domains
+
+    end subroutine allocMemFlovarPart2
+
+    subroutine allocRestartFiles(nFiles)
+        !
+        !          Allocate memory for the restartfles                         *
+        !          The array is populated from Python using setRestartFiles
+        !          If memeory has been allocated for the array there exist at
+        !          least one element in the array.
+        !
+        use constants
+        use inputIO, only: restartFiles
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType) :: nFiles
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        if (allocated(restartFiles)) then
+            deallocate (restartFiles)
+        end if
+
+        allocate (restartFiles(nFiles), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("allocRestartFiles", &
+                           "Memory allocation failure for restartFiles")
+
+        ! Zero Array with empty strings
+        restartFiles = ""
+
+    end subroutine allocRestartFiles
+
+    subroutine copySpectralSolution
+        !
+        !       copySpectralSolution copies the solution of the 1st spectral
+        !       solution to all spectral solutions. This typically occurs when
+        !       a for the spectral mode a restart is made from a steady or an
+        !       unsteady solution. Possible rotation effects are taken into
+        !       account for the velocity components.
+        !
+        use constants
+        use block, only: flowDoms, nDom
+        use flowVarRefState, only: nw
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use IOModule, only: IOVar
+        use monitor, only: timeUnsteadyRestart
+        use section, only: sections, nSections
+        use utils, only: rotMatrixRigidBody
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: sps, spsm1, mm, nn, i, j, k, l
+
+        real(kind=realType) :: dt, tnew, told, tmp
+        real(kind=realType) :: theta, cosTheta, sinTheta
+
+        real(kind=realType), dimension(3) :: rotPoint, uu
+        real(kind=realType), dimension(3) :: xt, yt, zt
+        real(kind=realType), dimension(3, 3) :: rotMat
+
+        real(kind=realType), dimension(nSections, 3, 3) :: rotMatSec
+
+        ! Determine the rotation matrix from one spectral solution to the
+        ! other for every section.
+
+        sectionLoop: do nn = 1, nSections
+
+            ! Test if the section is rotating.
+
+            testRotating: if (sections(nn)%rotating) then
+
+                ! Section is rotating. Determine the angle between the
+                ! spectral solutions and its sine and cosine.
+
+                i = sections(nn)%nSlices * nTimeIntervalsSpectral
+                theta = two * pi / real(i, realType)
+                cosTheta = cos(theta)
+                sinTheta = sin(theta)
+
+                ! Transform to a frame where the xt-axis points in the
+                ! direction of the rotation vector.
+
+                xt(1) = sections(nn)%rotAxis(1)
+                xt(2) = sections(nn)%rotAxis(2)
+                xt(3) = sections(nn)%rotAxis(3)
+
+                ! Construct the yt axis. It does not matter exactly as long
+                ! as it is normal to xt.
+
+                if (abs(xt(2)) < 0.707107_realType) then
+                    yt(1) = zero
+                    yt(2) = one
+                    yt(3) = zero
+                else
+                    yt(1) = zero
+                    yt(2) = zero
+                    yt(3) = one
+                end if
+
+                ! Make sure that yt is normal to xt.
+
+                tmp = xt(1) * yt(1) + xt(2) * yt(2) + xt(3) * yt(3)
+                yt(1) = yt(1) - tmp * xt(1)
+                yt(2) = yt(2) - tmp * xt(2)
+                yt(3) = yt(3) - tmp * xt(3)
+
+                ! And create a unit vector.
+
+                tmp = one / sqrt(yt(1)**2 + yt(2)**2 + yt(3)**2)
+                yt(1) = tmp * yt(1)
+                yt(2) = tmp * yt(2)
+                yt(3) = tmp * yt(3)
+
+                ! Create the vector zt by taking the cross product xt*yt.
+
+                zt(1) = xt(2) * yt(3) - xt(3) * yt(2)
+                zt(2) = xt(3) * yt(1) - xt(1) * yt(3)
+                zt(3) = xt(1) * yt(2) - xt(2) * yt(1)
+
+                ! The rotation matrix in the xt,yt,zt frame is given by
+                !
+                ! R = | 1      0           0      |
+                !     | 0  cos(theta) -sin(theta) |
+                !     | 0  sin(theta)  cos(theta) |
+                !
+                ! The rotation matrix in the standard cartesian frame is then
+                ! given by t * r * t^t, where the colums of the transformation
+                ! matrix t are the unit vectors xt,yt,zt. One can easily check
+                ! this by checking rotation around the y- and z-axis. The
+                ! result of this is the expression below.
+
+                rotMatSec(nn, 1, 1) = xt(1) * xt(1) &
+                                      + cosTheta * (yt(1) * yt(1) + zt(1) * zt(1))
+                rotMatSec(nn, 1, 2) = xt(1) * xt(2) &
+                                      + cosTheta * (yt(1) * yt(2) + zt(1) * zt(2)) &
+                                      - sinTheta * (yt(1) * zt(2) - yt(2) * zt(1))
+                rotMatSec(nn, 1, 3) = xt(1) * xt(3) &
+                                      + cosTheta * (yt(1) * yt(3) + zt(1) * zt(3)) &
+                                      - sinTheta * (yt(1) * zt(3) - yt(3) * zt(1))
+
+                rotMatSec(nn, 2, 1) = xt(1) * xt(2) &
+                                      + cosTheta * (yt(1) * yt(2) + zt(1) * zt(2)) &
+                                      + sinTheta * (yt(1) * zt(2) - yt(2) * zt(1))
+                rotMatSec(nn, 2, 2) = xt(2) * xt(2) &
+                                      + cosTheta * (yt(2) * yt(2) + zt(2) * zt(2))
+                rotMatSec(nn, 2, 3) = xt(2) * xt(3) &
+                                      + cosTheta * (yt(2) * yt(3) + zt(2) * zt(3)) &
+                                      - sinTheta * (yt(2) * zt(3) - yt(3) * zt(2))
 
-    ! Initialize the dependent flow variables and the halo values.
+                rotMatSec(nn, 3, 1) = xt(1) * xt(3) &
+                                      + cosTheta * (yt(1) * yt(3) + zt(1) * zt(3)) &
+                                      + sinTheta * (yt(1) * zt(3) - yt(3) * zt(1))
+                rotMatSec(nn, 3, 2) = xt(2) * xt(3) &
+                                      + cosTheta * (yt(2) * yt(3) + zt(2) * zt(3)) &
+                                      + sinTheta * (yt(2) * zt(3) - yt(3) * zt(2))
+                rotMatSec(nn, 3, 3) = xt(3) * xt(3) &
+                                      + cosTheta * (yt(3) * yt(3) + zt(3) * zt(3))
 
-    call initDepvarAndHalos(halosRead)
+                else testRotating
 
-  end subroutine initFlow
+                ! Section is not rotating. Set the rotation matrix to the
+                ! identity matrix.
 
+                rotMatSec(nn, 1, 1) = one
+                rotMatSec(nn, 1, 2) = zero
+                rotMatSec(nn, 1, 3) = zero
 
-  subroutine allocMemFlovarPart1(sps,level)
-    !
-    !       allocMemFlovarPart1 allocates the memory for the flow
-    !       variables w and p for all the blocks on the given multigrid
-    !       level and spectral solution sps.
-    !
-    use constants
-    use block, only : flowDoms, nDOm
-    use flowVarRefState, only : nw, nwf, nt1, nt2
-    use inputPhysics, only : equationMode, gammaConstant
-    use inputUnsteady, only :timeIntegrationScheme
-    use inputIteration, only : mgStartLevel, turbTreatment
-    use iteration, only : nOldLevels
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps, level
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn
-    integer(kind=intType) :: il, jl, kl, ie, je, ke, ib, jb, kb
-
-    ! Loop over the domains.
-
-    domains: do nn=1,nDom
-
-       ! Store some dimensions a bit easier.
-
-       il = flowDoms(nn,level,sps)%il
-       jl = flowDoms(nn,level,sps)%jl
-       kl = flowDoms(nn,level,sps)%kl
-
-       ie = flowDoms(nn,level,sps)%ie
-       je = flowDoms(nn,level,sps)%je
-       ke = flowDoms(nn,level,sps)%ke
-
-       ib = flowDoms(nn,level,sps)%ib
-       jb = flowDoms(nn,level,sps)%jb
-       kb = flowDoms(nn,level,sps)%kb
-
-       ! Allocate the memory for the independent variables.
-       ! Memory is allocated for the turbulent variables (if any) if
-       ! the current level is smaller or equal to the multigrid start
-       ! level or if the turbulent transport equations are solved in
-       ! a coupled manner.
-
-       if(level <= mgStartlevel .or. turbTreatment == coupled) then
-          allocate(flowDoms(nn,level,sps)%w(0:ib,0:jb,0:kb,1:nw), &
-               stat=ierr)
-       else
-          allocate(flowDoms(nn,level,sps)%w(0:ib,0:jb,0:kb,1:nwf), &
-               stat=ierr)
-       endif
-       if(ierr /= 0)                           &
-            call terminate("allocMemFlovarPart1", &
-            "Memory allocation failure for w")
-
-       ! Alloc mem for nodal gradients
-       allocate(flowDoms(nn,level,sps)%ux(il,jl,kl), stat=ierr)
-       allocate(flowDoms(nn,level,sps)%uy(il,jl,kl), stat=ierr)
-       allocate(flowDoms(nn,level,sps)%uz(il,jl,kl), stat=ierr)
-
-       allocate(flowDoms(nn,level,sps)%vx(il,jl,kl), stat=ierr)
-       allocate(flowDoms(nn,level,sps)%vy(il,jl,kl), stat=ierr)
-       allocate(flowDoms(nn,level,sps)%vz(il,jl,kl), stat=ierr)
-
-       allocate(flowDoms(nn,level,sps)%wx(il,jl,kl), stat=ierr)
-       allocate(flowDoms(nn,level,sps)%wy(il,jl,kl), stat=ierr)
-       allocate(flowDoms(nn,level,sps)%wz(il,jl,kl), stat=ierr)
-
-       allocate(flowDoms(nn,level,sps)%qx(il,jl,kl), stat=ierr)
-       allocate(flowDoms(nn,level,sps)%qy(il,jl,kl), stat=ierr)
-       allocate(flowDoms(nn,level,sps)%qz(il,jl,kl), stat=ierr)
-
-       ! Allocate memory for the pressure.
-       allocate(flowDoms(nn,level,sps)%p(0:ib,0:jb,0:kb), stat=ierr)
-       if(ierr /= 0)                           &
-            call terminate("allocMemFlovarPart1", &
-            "Memory allocation failure for p")
-
-       ! Allocate memory for the speed of sound squared
-       allocate(flowDoms(nn,level,sps)%aa(0:ib,0:jb,0:kb), stat=ierr)
-       if(ierr /= 0)                           &
-            call terminate("allocMemFlovarPart1", &
-            "Memory allocation failure for p")
-
-       ! The eddy viscosity for eddy viscosity models.
-       ! Although a dependent variable, it is allocated on all grid
-       ! levels, because the eddy viscosity might be frozen in the
-       ! multigrid.
-
-       ! Always allocate rev due to reverse mode AD- Peter Lyu. Also
-       ! zero so that it doesn't affect laminar cases.
-       allocate(flowDoms(nn,level,sps)%rev(0:ib,0:jb,0:kb), &
-            stat=ierr)
-       flowDoms(nn, level, sps)%rev = zero
-       if(ierr /= 0)                           &
-            call terminate("allocMemFlovarPart1", &
-            "Memory allocation failure for rev")
-       !endif
-
-       ! If this is the finest grid some more memory must be allocated.
-
-       fineLevelTest: if(level == 1) then
-
-          ! Allocate the memory for gamma and initialize it to
-          ! the constant gamma value.
-
-          allocate(flowDoms(nn,level,sps)%gamma(0:ib,0:jb,0:kb),    &
-               stat=ierr)
-          if(ierr /= 0)                           &
-               call terminate("allocMemFlovarPart1", &
-               "Memory allocation failure for gamma.")
-
-          flowDoms(nn,level,sps)%gamma = gammaConstant
-
-          ! The laminar viscosity for viscous computations.
-          ! Always allocate rlv due to reverse mode - Peter Lyu
-          !if( viscous ) then
-          allocate(flowDoms(nn,level,sps)%rlv(0:ib,0:jb,0:kb), &
-               stat=ierr)
-          if(ierr /= 0)                           &
-               call terminate("allocMemFlovarPart1", &
-               "Memory allocation failure for rlv")
-          !endif
-
-          ! The state vectors in the past for unsteady computations.
-
-          if(equationMode          == unsteady .and. &
-               timeIntegrationScheme == BDF) then
-             allocate( &
-                  flowDoms(nn,level,sps)%wOld(nOldLevels,2:il,2:jl,2:kl,nw), &
-                  stat=ierr)
-             if(ierr /= 0)                           &
-                  call terminate("allocMemFlovarPart1", &
-                  "Memory allocation failure for wOld")
+                rotMatSec(nn, 2, 1) = zero
+                rotMatSec(nn, 2, 2) = one
+                rotMatSec(nn, 2, 3) = zero
+
+                rotMatSec(nn, 3, 1) = zero
+                rotMatSec(nn, 3, 2) = zero
+                rotMatSec(nn, 3, 3) = one
+
+            end if testRotating
+
+        end do sectionLoop
+
+        ! Initialize told to timeUnsteadyRestart. This takes the
+        ! possibility into account that the spectral mode is restarted
+        ! from an unsteady computation. Although not likely this
+        ! possibility is allowed and should therefore be taken into
+        ! account. Anyway told corresponds to the time of the 1st
+        ! spectral solution. Also determine the time step between the
+        ! spectral solutions. Both told and dt are only used to determine
+        ! the rigid body motion and if these are specified it is assumed
+        ! that there is only one section present in the grid.
+
+        told = timeUnsteadyRestart
+        dt = sections(1)%timePeriod &
+             / real(nTimeIntervalsSpectral, realType)
+
+        ! Loop over the number of spectral modes, starting at 2.
+
+        spectralLoop: do sps = 2, nTimeIntervalsSpectral
+
+            ! Determine the corresponding time for this spectral solution
+            ! and store sps - 1 a bit easier.
+
+            tnew = told + dt
+            spsm1 = sps - 1
+
+            ! Determine the rotation matrix and rotation point between the
+            ! told and tnew for the rigid body rotation of the entire mesh.
+            ! The rotation point is not needed for the transformation of the
+            ! velocities, but rotMatrixRigidBody happens to compute it.
+
+            call rotMatrixRigidBody(tnew, told, rotMat, rotPoint)
+
+            ! Loop over the local number of blocks.
+
+            domains: do nn = 1, nDom
+
+                ! Store the section ID of this block a bit easier in mm.
+
+                mm = flowDoms(nn, 1, 1)%sectionId
+
+                ! Loop over the owned cells of this block. As the number of
+                ! cells is identical for all spectral solutions, it does not
+                ! matter which mode is taken for the upper dimensions.
+
+                do k = 2, flowDoms(nn, 1, 1)%kl
+                    do j = 2, flowDoms(nn, 1, 1)%jl
+                        do i = 2, flowDoms(nn, 1, 1)%il
 
-             ! Initialize wOld to zero, such that it is initialized.
-             ! The actual values do not matter.
+                            ! Step 1. Copy the solution variables w from
+                            ! the previous spectral solution.
 
-             flowDoms(nn,level,sps)%wOld = zero
+                            do l = 1, nw
+                                IOVar(nn, sps)%w(i, j, k, l) = IOVar(nn, spsm1)%w(i, j, k, l)
+                            end do
 
-             ! Added by HDN
-          else if ( equationMode == unsteady .and. &
-               timeIntegrationScheme == MD) then
-             allocate( &
-                  flowDoms(nn,level,sps)%wOld(nOldLevels,2:il,2:jl,2:kl,nw), &
-                  stat=ierr)
-             if(ierr /= 0)                           &
-                  call terminate("allocMemFlovarPart1", &
-                  "Memory allocation failure for wOld")
-
-             flowDoms(nn,level,sps)%wOld = zero
-          endif
-
-          ! If this is the 1st spectral solution (note that we are
-          ! already on the finest grid) and the rans equations are
-          ! solved, allocate the memory for the arrays used for the
-          ! implicit boundary condition treatment. Normally this should
-          ! only be allocated for RANS but the derivative calcs require
-          ! these be allocated.
-
-          sps1RansTest: if(sps == 1) then
-             allocate(flowDoms(nn,level,sps)%bmti1(je,ke,nt1:nt2,nt1:nt2), &
-                  flowDoms(nn,level,sps)%bmti2(je,ke,nt1:nt2,nt1:nt2), &
-                  flowDoms(nn,level,sps)%bmtj1(ie,ke,nt1:nt2,nt1:nt2), &
-                  flowDoms(nn,level,sps)%bmtj2(ie,ke,nt1:nt2,nt1:nt2), &
-                  flowDoms(nn,level,sps)%bmtk1(ie,je,nt1:nt2,nt1:nt2), &
-                  flowDoms(nn,level,sps)%bmtk2(ie,je,nt1:nt2,nt1:nt2), &
-                  flowDoms(nn,level,sps)%bvti1(je,ke,nt1:nt2), &
-                  flowDoms(nn,level,sps)%bvti2(je,ke,nt1:nt2), &
-                  flowDoms(nn,level,sps)%bvtj1(ie,ke,nt1:nt2), &
-                  flowDoms(nn,level,sps)%bvtj2(ie,ke,nt1:nt2), &
-                  flowDoms(nn,level,sps)%bvtk1(ie,je,nt1:nt2), &
-                  flowDoms(nn,level,sps)%bvtk2(ie,je,nt1:nt2), &
-                  stat=ierr)
-             if(ierr /= 0)                           &
-                  call terminate("allocMemFlovarPart1", &
-                  "Memory allocation failure for bmti1, etc")
-
-          endif sps1RansTest
-       endif fineLevelTest
-
-    enddo domains
-
-  end subroutine allocMemFlovarPart1
-
-  !      ==================================================================
-
-  subroutine allocMemFlovarPart2(sps, level)
-    !
-    !       AllocMemFlovarPart2 allocates the memory for the dependent
-    !       flow variables and iteration variables for all the blocks on
-    !       the given multigrid level and spectral solution sps. Some
-    !       variables are only allocated on the coarser grids, e.g. the
-    !       multigrid forcing terms and the state vector upon entrance on
-    !       the mg level. Other variables are only allocated on the finest
-    !       mesh. These are typically dependent variables like laminar
-    !       viscosity, or residuals, time step, etc. Exceptions are
-    !       pressure and eddy viscosity. Although these are dependent
-    !       variables, they are allocated on all grid levels.
-    !
-    use block
-    use constants
-    use flowVarRefState
-    use inputPhysics
-    use inputDiscretization
-    use inputIteration
-    use inputUnsteady
-    use iteration
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps, level
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, mm
-    integer(kind=intType) :: il, jl, kl, ie, je, ke, ib, jb, kb
-
-    ! Loop over the domains.
-
-    domains: do nn=1,nDom
-
-       ! Store some dimensions a bit easier.
-
-       il = flowDoms(nn,level,sps)%il
-       jl = flowDoms(nn,level,sps)%jl
-       kl = flowDoms(nn,level,sps)%kl
-
-       ie = flowDoms(nn,level,sps)%ie
-       je = flowDoms(nn,level,sps)%je
-       ke = flowDoms(nn,level,sps)%ke
-
-       ib = flowDoms(nn,level,sps)%ib
-       jb = flowDoms(nn,level,sps)%jb
-       kb = flowDoms(nn,level,sps)%kb
-
-       ! Block is moving. Allocate the memory for s, sFaceI,
-       ! sFaceJ and sFaceK.
-
-       allocate(flowDoms(nn,level,sps)%s(ie,je,ke,3),  &
-            flowDoms(nn,level,sps)%sFaceI(0:ie,je,ke), &
-            flowDoms(nn,level,sps)%sFaceJ(ie,0:je,ke), &
-            flowDoms(nn,level,sps)%sFaceK(ie,je,0:ke), stat=ierr)
-       if(ierr /= 0)                              &
-            call terminate("allocMemFlovarPart2", &
-            "Memory allocation failure for s, &
-            &sFaceI, sFaceJ and sFaceK.")
-
-       ! Extra face velocities for ALE
-       if (equationMode == unSteady .and. useALE) then
-          allocate( &
-               flowDoms(nn,level,sps)%sVeloIALE(0:ie,je,ke,3), &
-               flowDoms(nn,level,sps)%sVeloJALE(ie,0:je,ke,3), &
-               flowDoms(nn,level,sps)%sVeloKALE(ie,je,0:ke,3), &
-               flowDoms(nn,level,sps)%sFaceIALE(0:nALEsteps,0:ie,je,ke), &
-               flowDoms(nn,level,sps)%sFaceJALE(0:nALEsteps,ie,0:je,ke), &
-               flowDoms(nn,level,sps)%sFaceKALE(0:nALEsteps,ie,je,0:ke), stat=ierr)
-          if(ierr /= 0)                              &
-               call terminate("allocMemFlovarPart2", &
-               "Memory allocation failure for &
-               &sVeloIALE, sVeloJALE and sVeloKALE; &
-               &sFaceIALE, sFaceJALE and sFaceKALE.")
-       end if
-
-
-       ! Test if we are on the finest mesh.
-
-       fineLevelTest: if(level == 1) then
-
-          ! Allocate the memory that must always be allocated.
-
-          allocate( &
-               flowDoms(nn,level,sps)%dw(0:ib,0:jb,0:kb,1:nw),  &
-               flowDoms(nn,level,sps)%fw(0:ib,0:jb,0:kb,1:nwf), &
-               flowDoms(nn,level,sps)%dtl(1:ie,1:je,1:ke),      &
-               flowDoms(nn,level,sps)%radI(1:ie,1:je,1:ke),     &
-               flowDoms(nn,level,sps)%radJ(1:ie,1:je,1:ke),     &
-               flowDoms(nn,level,sps)%radK(1:ie,1:je,1:ke),     &
-               flowDoms(nn,level,sps)%scratch(0:ib,0:jb,0:kb,10), &
-               flowDoms(nn,level,sps)%shockSensor(0:ib, 0:jb, 0:kb), &
-               stat=ierr)
-          if(ierr /= 0)                              &
-               call terminate("allocMemFlovarPart2", &
-               "Memory allocation failure for dw, fw, dwOld, fwOld, &
-               &gamma, dtl and the spectral radii.")
-
-          ! Initialize dw and fw to zero.
-
-          flowDoms(nn,level,sps)%dw = zero
-          flowDoms(nn,level,sps)%fw = zero
-
-
-          ! Extra variables for ALE
-          if (equationMode == unSteady .and. useALE) then
-             allocate( &
-                  flowDoms(nn,level,sps)%dwALE(0:nALEsteps,0:ib,0:jb,0:kb,1:nw),  &
-                  flowDoms(nn,level,sps)%fwALE(0:nALEsteps,0:ib,0:jb,0:kb,1:nwf), &
-                  stat=ierr)
-             if(ierr /= 0)                              &
-                  call terminate("allocMemFlovarPart2", &
-                  "Memory allocation failure for dwALE, fwALE.")
+                            ! Step 2. Apply the rigid body motion rotation matrix
+                            ! to the velocity. Use uu as a temporary storage.
 
-             flowDoms(nn,level,sps)%dwALE = zero
-             flowDoms(nn,level,sps)%fwALE = zero
-          end if
+                            uu(1) = rotMat(1, 1) * IOVar(nn, sps)%w(i, j, k, ivx) &
+                                    + rotMat(1, 2) * IOVar(nn, sps)%w(i, j, k, ivy) &
+                                    + rotMat(1, 3) * IOVar(nn, sps)%w(i, j, k, ivz)
 
-          ! Allocate the memory for the zeroth runge kutta stage
-          allocate(flowDoms(nn,level,sps)%wn(2:il,2:jl,2:kl,1:nwf), &
-               flowDoms(nn,level,sps)%pn(2:il,2:jl,2:kl), stat=ierr)
-          if(ierr /= 0)                              &
-               call terminate("allocMemFlovarPart2", &
-               "Memory allocation failure for wn and pn")
+                            uu(2) = rotMat(2, 1) * IOVar(nn, sps)%w(i, j, k, ivx) &
+                                    + rotMat(2, 2) * IOVar(nn, sps)%w(i, j, k, ivy) &
+                                    + rotMat(2, 3) * IOVar(nn, sps)%w(i, j, k, ivz)
 
-          ! For unsteady mode using Runge-Kutta schemes allocate the
-          ! memory for dwOldRK.
+                            uu(3) = rotMat(3, 1) * IOVar(nn, sps)%w(i, j, k, ivx) &
+                                    + rotMat(3, 2) * IOVar(nn, sps)%w(i, j, k, ivy) &
+                                    + rotMat(3, 3) * IOVar(nn, sps)%w(i, j, k, ivz)
 
-          if(equationMode          == unsteady .and. &
-               timeIntegrationScheme == explicitRK) then
+                            ! Step 3. Apply the rotation matrix of the section to
+                            ! the velocity.
 
-             mm = nRKStagesUnsteady - 1
-             allocate(flowDoms(nn,level,sps)%dwOldRK(mm,il,jl,kl,nw), &
-                  stat=ierr)
-             if(ierr /= 0) &
-                  call terminate("allocMemFlovarPart2", &
-                  "Memory allocation failure for dwOldRK.")
-          endif
-
-       else fineLevelTest
-
-          ! Coarser level. Allocate the memory for the multigrid
-          ! forcing term and the state variables upon entry.
-
-          allocate(flowDoms(nn,level,sps)%p1(1:ie,1:je,1:ke),          &
-               flowDoms(nn,level,sps)%w1(1:ie,1:je,1:ke,1:nwf), &
-               flowDoms(nn,level,sps)%wr(2:il,2:jl,2:kl,1:nwf), &
-               stat=ierr)
-          if(ierr /= 0)                              &
-               call terminate("allocMemFlovarPart2", &
-               "Memory allocation failure for p1, w1 &
-               &and wr")
-
-          ! Initialize w1 and p1 to zero, just that they
-          ! are initialized.
-
-          flowDoms(nn,level,sps)%p1 = zero
-          flowDoms(nn,level,sps)%w1 = zero
-
-       endif fineLevelTest
-
-    enddo domains
-
-  end subroutine allocMemFlovarPart2
-
-  subroutine allocRestartFiles(nFiles)
-    !
-    !          Allocate memory for the restartfles                         *
-    !          The array is populated from Python using setRestartFiles
-    !          If memeory has been allocated for the array there exist at
-    !          least one element in the array.
-    !
-    use constants
-    use inputIO, only : restartFiles
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType) :: nFiles
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    if (allocated(restartFiles)) then
-       deallocate(restartFiles)
-    end if
-
-    allocate(restartFiles(nFiles), stat=ierr)
-    if(ierr /= 0)                          &
-         call terminate("allocRestartFiles", &
-         "Memory allocation failure for restartFiles")
-
-    ! Zero Array with empty strings
-    restartFiles = ""
-
-  end subroutine allocRestartFiles
-
-  subroutine copySpectralSolution
-    !
-    !       copySpectralSolution copies the solution of the 1st spectral
-    !       solution to all spectral solutions. This typically occurs when
-    !       a for the spectral mode a restart is made from a steady or an
-    !       unsteady solution. Possible rotation effects are taken into
-    !       account for the velocity components.
-    !
-    use constants
-    use block, only : flowDoms, nDom
-    use flowVarRefState, only : nw
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use IOModule, only : IOVar
-    use monitor, only: timeUnsteadyRestart
-    use section, only : sections, nSections
-    use utils, only : rotMatrixRigidBody
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: sps, spsm1, mm, nn, i, j, k, l
-
-    real(kind=realType) :: dt, tnew, told, tmp
-    real(kind=realType) :: theta, cosTheta, sinTheta
-
-    real(kind=realType), dimension(3)   :: rotPoint, uu
-    real(kind=realType), dimension(3)   :: xt, yt, zt
-    real(kind=realType), dimension(3,3) :: rotMat
-
-    real(kind=realType), dimension(nSections,3,3) :: rotMatSec
-
-    ! Determine the rotation matrix from one spectral solution to the
-    ! other for every section.
-
-    sectionLoop: do nn=1,nSections
-
-       ! Test if the section is rotating.
-
-       testRotating: if( sections(nn)%rotating ) then
-
-          ! Section is rotating. Determine the angle between the
-          ! spectral solutions and its sine and cosine.
-
-          i = sections(nn)%nSlices*nTimeIntervalsSpectral
-          theta     = two*pi/real(i,realType)
-          cosTheta = cos(theta)
-          sinTheta = sin(theta)
-
-          ! Transform to a frame where the xt-axis points in the
-          ! direction of the rotation vector.
-
-          xt(1) = sections(nn)%rotAxis(1)
-          xt(2) = sections(nn)%rotAxis(2)
-          xt(3) = sections(nn)%rotAxis(3)
-
-          ! Construct the yt axis. It does not matter exactly as long
-          ! as it is normal to xt.
-
-          if(abs(xt(2)) < 0.707107_realType) then
-             yt(1) = zero
-             yt(2) = one
-             yt(3) = zero
-          else
-             yt(1) = zero
-             yt(2) = zero
-             yt(3) = one
-          endif
-
-          ! Make sure that yt is normal to xt.
-
-          tmp   = xt(1)*yt(1) + xt(2)*yt(2) + xt(3)*yt(3)
-          yt(1) = yt(1) - tmp*xt(1)
-          yt(2) = yt(2) - tmp*xt(2)
-          yt(3) = yt(3) - tmp*xt(3)
-
-          ! And create a unit vector.
-
-          tmp   = one/sqrt(yt(1)**2 + yt(2)**2 + yt(3)**2)
-          yt(1) = tmp*yt(1)
-          yt(2) = tmp*yt(2)
-          yt(3) = tmp*yt(3)
-
-          ! Create the vector zt by taking the cross product xt*yt.
+                            IOVar(nn, sps)%w(i, j, k, ivx) = rotMatSec(mm, 1, 1) * uu(1) &
+                                                             + rotMatSec(mm, 1, 2) * uu(2) &
+                                                             + rotMatSec(mm, 1, 3) * uu(3)
 
-          zt(1) = xt(2)*yt(3) - xt(3)*yt(2)
-          zt(2) = xt(3)*yt(1) - xt(1)*yt(3)
-          zt(3) = xt(1)*yt(2) - xt(2)*yt(1)
+                            IOVar(nn, sps)%w(i, j, k, ivy) = rotMatSec(mm, 2, 1) * uu(1) &
+                                                             + rotMatSec(mm, 2, 2) * uu(2) &
+                                                             + rotMatSec(mm, 2, 3) * uu(3)
 
-          ! The rotation matrix in the xt,yt,zt frame is given by
-          !
-          ! R = | 1      0           0      |
-          !     | 0  cos(theta) -sin(theta) |
-          !     | 0  sin(theta)  cos(theta) |
-          !
-          ! The rotation matrix in the standard cartesian frame is then
-          ! given by t * r * t^t, where the colums of the transformation
-          ! matrix t are the unit vectors xt,yt,zt. One can easily check
-          ! this by checking rotation around the y- and z-axis. The
-          ! result of this is the expression below.
+                            IOVar(nn, sps)%w(i, j, k, ivz) = rotMatSec(mm, 3, 1) * uu(1) &
+                                                             + rotMatSec(mm, 3, 2) * uu(2) &
+                                                             + rotMatSec(mm, 3, 3) * uu(3)
+                        end do
+                    end do
+                end do
 
-          rotMatSec(nn,1,1) = xt(1)*xt(1)                          &
-               + cosTheta*(yt(1)*yt(1) + zt(1)*zt(1))
-          rotMatSec(nn,1,2) = xt(1)*xt(2)                          &
-               + cosTheta*(yt(1)*yt(2) + zt(1)*zt(2)) &
-               - sinTheta*(yt(1)*zt(2) - yt(2)*zt(1))
-          rotMatSec(nn,1,3) = xt(1)*xt(3)                          &
-               + cosTheta*(yt(1)*yt(3) + zt(1)*zt(3)) &
-               - sinTheta*(yt(1)*zt(3) - yt(3)*zt(1))
+            end do domains
 
-          rotMatSec(nn,2,1) = xt(1)*xt(2)                          &
-               + cosTheta*(yt(1)*yt(2) + zt(1)*zt(2)) &
-               + sinTheta*(yt(1)*zt(2) - yt(2)*zt(1))
-          rotMatSec(nn,2,2) = xt(2)*xt(2)                          &
-               + cosTheta*(yt(2)*yt(2) + zt(2)*zt(2))
-          rotMatSec(nn,2,3) = xt(2)*xt(3)                          &
-               + cosTheta*(yt(2)*yt(3) + zt(2)*zt(3)) &
-               - sinTheta*(yt(2)*zt(3) - yt(3)*zt(2))
+            ! Set told to tnew for the next spectral solution.
 
-          rotMatSec(nn,3,1) = xt(1)*xt(3)                          &
-               + cosTheta*(yt(1)*yt(3) + zt(1)*zt(3)) &
-               + sinTheta*(yt(1)*zt(3) - yt(3)*zt(1))
-          rotMatSec(nn,3,2) = xt(2)*xt(3)                          &
-               + cosTheta*(yt(2)*yt(3) + zt(2)*zt(3)) &
-               + sinTheta*(yt(2)*zt(3) - yt(3)*zt(2))
-          rotMatSec(nn,3,3) = xt(3)*xt(3)                          &
-               + cosTheta*(yt(3)*yt(3) + zt(3)*zt(3))
+            told = tnew
 
-       else testRotating
+        end do spectralLoop
 
-          ! Section is not rotating. Set the rotation matrix to the
-          ! identity matrix.
+    end subroutine copySpectralSolution
 
-          rotMatSec(nn,1,1) = one
-          rotMatSec(nn,1,2) = zero
-          rotMatSec(nn,1,3) = zero
+    subroutine determineSolFileNames
+        !
+        !       determineSolFileNames determines the number and names of the
+        !       files that contain the solutions. For steady computations only
+        !       one file must be present. For unsteady the situation is a
+        !       little more complicated. It is attempted to read as many
+        !       solutions as needed for a consistent restart. If not possible
+        !       as many as possible solutions are read. For an unsteady
+        !       computation the order will be reduced; for time spectral mode
+        !       the solution will be interpolated.
+        !
+        use constants
+        use communication, only: myID
+        use inputIO, only: restartFiles
+        use inputPhysics, only: equationMode
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: nOldSolAvail, oldSolWritten, nOldLevels
+        use variableReading, only: solFiles, nSolsRead, interpolSpectral, copySpectral
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables
+        !
+        integer :: ierr
 
-          rotMatSec(nn,2,1) = zero
-          rotMatSec(nn,2,2) = one
-          rotMatSec(nn,2,3) = zero
+        integer(kind=intType) :: ii, nn
 
-          rotMatSec(nn,3,1) = zero
-          rotMatSec(nn,3,2) = zero
-          rotMatSec(nn,3,3) = one
+        character(len=7) :: integerString
+        character(len=maxStringLen) :: tmpName
 
-       endif testRotating
+        ! Initialize copySpectral and interpolSpectral to .false.
 
-    enddo sectionLoop
+        copySpectral = .false.
+        interpolSpectral = .false.
 
-    ! Initialize told to timeUnsteadyRestart. This takes the
-    ! possibility into account that the spectral mode is restarted
-    ! from an unsteady computation. Although not likely this
-    ! possibility is allowed and should therefore be taken into
-    ! account. Anyway told corresponds to the time of the 1st
-    ! spectral solution. Also determine the time step between the
-    ! spectral solutions. Both told and dt are only used to determine
-    ! the rigid body motion and if these are specified it is assumed
-    ! that there is only one section present in the grid.
-
-    told = timeUnsteadyRestart
-    dt   = sections(1)%timePeriod &
-         / real(nTimeIntervalsSpectral,realType)
-
-    ! Loop over the number of spectral modes, starting at 2.
-
-    spectralLoop: do sps=2,nTimeIntervalsSpectral
-
-       ! Determine the corresponding time for this spectral solution
-       ! and store sps - 1 a bit easier.
-
-       tnew  = told + dt
-       spsm1 = sps - 1
-
-       ! Determine the rotation matrix and rotation point between the
-       ! told and tnew for the rigid body rotation of the entire mesh.
-       ! The rotation point is not needed for the transformation of the
-       ! velocities, but rotMatrixRigidBody happens to compute it.
-
-       call rotMatrixRigidBody(tnew, told, rotMat, rotPoint)
-
-       ! Loop over the local number of blocks.
-
-       domains: do nn=1,nDom
-
-          ! Store the section ID of this block a bit easier in mm.
-
-          mm = flowDoms(nn,1,1)%sectionId
-
-          ! Loop over the owned cells of this block. As the number of
-          ! cells is identical for all spectral solutions, it does not
-          ! matter which mode is taken for the upper dimensions.
-
-          do k=2,flowDoms(nn,1,1)%kl
-             do j=2,flowDoms(nn,1,1)%jl
-                do i=2,flowDoms(nn,1,1)%il
-
-                   ! Step 1. Copy the solution variables w from
-                   ! the previous spectral solution.
-
-                   do l=1,nw
-                      IOVar(nn,sps)%w(i,j,k,l) = IOVar(nn,spsm1)%w(i,j,k,l)
-                   enddo
-
-                   ! Step 2. Apply the rigid body motion rotation matrix
-                   ! to the velocity. Use uu as a temporary storage.
-
-                   uu(1) = rotMat(1,1)*IOVar(nn,sps)%w(i,j,k,ivx) &
-                        + rotMat(1,2)*IOVar(nn,sps)%w(i,j,k,ivy) &
-                        + rotMat(1,3)*IOVar(nn,sps)%w(i,j,k,ivz)
+        ! Determine the desired number of files to be read. This depends
+        ! on the equation mode we have to solve for. Also set the
+        ! corresponding file names.
 
-                   uu(2) = rotMat(2,1)*IOVar(nn,sps)%w(i,j,k,ivx) &
-                        + rotMat(2,2)*IOVar(nn,sps)%w(i,j,k,ivy) &
-                        + rotMat(2,3)*IOVar(nn,sps)%w(i,j,k,ivz)
+        select case (equationMode)
 
-                   uu(3) = rotMat(3,1)*IOVar(nn,sps)%w(i,j,k,ivx) &
-                        + rotMat(3,2)*IOVar(nn,sps)%w(i,j,k,ivy) &
-                        + rotMat(3,3)*IOVar(nn,sps)%w(i,j,k,ivz)
+        case (steady)
 
-                   ! Step 3. Apply the rotation matrix of the section to
-                   ! the velocity.
+            ! Steady computation. Only one solution needs to be read.
+            ! In case a list of restart files were provided in the python
+            ! script, we force a read from only the first solution file.
 
-                   IOVar(nn,sps)%w(i,j,k,ivx) = rotMatSec(mm,1,1)*uu(1) &
-                        + rotMatSec(mm,1,2)*uu(2) &
-                        + rotMatSec(mm,1,3)*uu(3)
+            nSolsRead = 1
+            allocate (solFiles(nSolsRead), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("determineSolFileNames", &
+                               "Memory allocation failure for solFiles")
 
-                   IOVar(nn,sps)%w(i,j,k,ivy) = rotMatSec(mm,2,1)*uu(1) &
-                        + rotMatSec(mm,2,2)*uu(2) &
-                        + rotMatSec(mm,2,3)*uu(3)
+            solFiles(1) = restartFiles(1)
 
-                   IOVar(nn,sps)%w(i,j,k,ivz) = rotMatSec(mm,3,1)*uu(1) &
-                        + rotMatSec(mm,3,2)*uu(2) &
-                        + rotMatSec(mm,3,3)*uu(3)
-                enddo
-             enddo
-          enddo
+            ! Check if the files can be opened, exit if that fails.
+            call checkSolFileNames()
 
-       enddo domains
+            !===============================================================
 
-       ! Set told to tnew for the next spectral solution.
+        case (unsteady)
 
-       told = tnew
+            ! Unsteady computation. For a consistent restart nOldLevels
+            ! solutions must be read. All restart files are provided explicitly
+            ! from python script.
+            call setSolFileNames()
 
-    enddo spectralLoop
+            ! Check if the files can be opened, exit if that fails.
+            call checkSolFileNames()
 
-  end subroutine copySpectralSolution
+            ! Set nOldSolAvail to nSolsRead and check if a consistent
+            ! restart can be made. If not, processor 0 prints a warning.
 
-  subroutine determineSolFileNames
-    !
-    !       determineSolFileNames determines the number and names of the
-    !       files that contain the solutions. For steady computations only
-    !       one file must be present. For unsteady the situation is a
-    !       little more complicated. It is attempted to read as many
-    !       solutions as needed for a consistent restart. If not possible
-    !       as many as possible solutions are read. For an unsteady
-    !       computation the order will be reduced; for time spectral mode
-    !       the solution will be interpolated.
-    !
-    use constants
-    use communication, only : myID
-    use inputIO, only : restartFiles
-    use inputPhysics, only : equationMode
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use iteration, only : nOldSolAvail, oldSolWritten, nOldLevels
-    use variableReading, only : solFiles, nSolsRead, interpolSpectral, copySpectral
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables
-    !
-    integer :: ierr
+            nOldSolAvail = nSolsRead
+            if (myID == 0 .and. nOldSolAvail < nOldLevels) then
 
-    integer(kind=intType) :: ii, nn
+                print "(a)", "#"
+                print "(a)", "#                 Warning"
+                print "(a)", "# Not enough data found for a consistent &
+                     &time accurate restart."
+                print "(a)", "# Order is reduced in the first time steps &
+                     &until enough data is available again."
+                print "(a)", "#"
 
-    character(len=7)            :: integerString
-    character(len=maxStringLen) :: tmpName
+            end if
 
-    ! Initialize copySpectral and interpolSpectral to .false.
+            ! Set the logicals oldSolWritten, such that nothing is
+            ! overwritten when solution files are dumped for this
+            ! computation.
 
-    copySpectral     = .false.
-    interpolSpectral = .false.
+            ii = min(nSolsRead, nOldLevels - 1)
+            do nn = 1, ii
+                oldSolWritten(nn) = .true.
+            end do
+
+            !===============================================================
+
+        case (timeSpectral)
+
+            ! Time spectral computation. For a consistent restart
+            ! nTimeIntervalsSpectral solutions must be read. First
+            ! determine the the restart files.
+            call setSolFileNames()
+
+            ! Check if the files can be opened, exit if that fails.
+            call checkSolFileNames()
+
+            ! Check whether or not the spectral solution must be copied
+            ! or interpolated.
+
+            if (nSolsRead == 1) then
+                copySpectral = .true.
+            else if (nSolsRead /= nTimeIntervalsSpectral) then
+                interpolSpectral = .true.
+            end if
 
-    ! Determine the desired number of files to be read. This depends
-    ! on the equation mode we have to solve for. Also set the
-    ! corresponding file names.
+        end select
 
-    select case(equationMode)
+    end subroutine determineSolFileNames
 
-    case (steady)
+    subroutine setSolFileNames
+        !
+        !       setSolFileNames allocates and set the solution files that
+        !       will be read and loaded in the restart
+        !
+        use communication
+        use inputIO
+        use utils, only: terminate
+        use variableReading, only: solFiles, nSolsRead
+        implicit none
+        !
+        !      Local variables
+        !
+        integer :: ierr
+        integer(kind=intType) :: nn
 
-       ! Steady computation. Only one solution needs to be read.
-       ! In case a list of restart files were provided in the python
-       ! script, we force a read from only the first solution file.
+        ! The length of the array provided gives the number of nSolsRead.
+        nSolsRead = SIZE(restartFiles, 1)
 
-       nSolsRead = 1
-       allocate(solFiles(nSolsRead), stat=ierr)
-       if(ierr /= 0)                              &
+        ! Allocate the memory for the file names and set them.
+        allocate (solFiles(nSolsRead), stat=ierr)
+        if (ierr /= 0) &
             call terminate("determineSolFileNames", &
-            "Memory allocation failure for solFiles")
-
-       solFiles(1) = restartFiles(1)
-
-       ! Check if the files can be opened, exit if that fails.
-       call checkSolFileNames()
-
-       !===============================================================
-
-    case (unsteady)
-
-       ! Unsteady computation. For a consistent restart nOldLevels
-       ! solutions must be read. All restart files are provided explicitly
-       ! from python script.
-       call setSolFileNames()
-
-       ! Check if the files can be opened, exit if that fails.
-       call checkSolFileNames()
-
-       ! Set nOldSolAvail to nSolsRead and check if a consistent
-       ! restart can be made. If not, processor 0 prints a warning.
-
-       nOldSolAvail = nSolsRead
-       if(myID == 0 .and. nOldSolAvail < nOldLevels) then
-
-          print "(a)", "#"
-          print "(a)", "#                 Warning"
-          print "(a)", "# Not enough data found for a consistent &
-               &time accurate restart."
-          print "(a)", "# Order is reduced in the first time steps &
-               &until enough data is available again."
-          print "(a)", "#"
-
-       endif
-
-       ! Set the logicals oldSolWritten, such that nothing is
-       ! overwritten when solution files are dumped for this
-       ! computation.
-
-       ii = min(nSolsRead,nOldLevels-1)
-       do nn=1,ii
-          oldSolWritten(nn) = .true.
-       enddo
-
-       !===============================================================
-
-    case (timeSpectral)
-
-       ! Time spectral computation. For a consistent restart
-       ! nTimeIntervalsSpectral solutions must be read. First
-       ! determine the the restart files.
-       call setSolFileNames()
-
-       ! Check if the files can be opened, exit if that fails.
-       call checkSolFileNames()
-
-       ! Check whether or not the spectral solution must be copied
-       ! or interpolated.
-
-       if(nSolsRead == 1) then
-          copySpectral = .true.
-       else if(nSolsRead /= nTimeIntervalsSpectral) then
-          interpolSpectral = .true.
-       endif
-
-    end select
-
-  end subroutine determineSolFileNames
-
-  subroutine setSolFileNames
-    !
-    !       setSolFileNames allocates and set the solution files that
-    !       will be read and loaded in the restart
-    !
-    use communication
-    use inputIO
-    use utils, only : terminate
-    use variableReading, only : solFiles, nSolsRead
-    implicit none
-    !
-    !      Local variables
-    !
-    integer :: ierr
-    integer(kind=intType) :: nn
-
-
-    ! The length of the array provided gives the number of nSolsRead.
-    nSolsRead = SIZE(restartFiles,1)
-
-    ! Allocate the memory for the file names and set them.
-    allocate(solFiles(nSolsRead), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("determineSolFileNames", &
-         "Memory allocation failure for solFiles")
-
-    do nn=1,nSolsRead
-       solFiles(nn) = restartFiles(nn)
-    enddo
-
-  end subroutine setSolFileNames
-
-
-
-  subroutine checkSolFileNames
-    !
-    !       checkSolFileNames will check if the provided restart files
-    !       are readable on disk. If not readable return fail, if readable
-    !       message will be printed to let the user know that restart file
-    !       will be tried to read.
-    !
-    use communication
-    use utils, only : terminate
-    use variableReading, only : solFiles, nSolsRead
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Local variables
-    !
-    integer :: ierr
-    character(len=maxStringLen) :: errorMessage
-    integer(kind=intType) :: nn
-
-    do nn=1,nSolsRead
-       open(unit=21,file=solFiles(nn),status="old",iostat=ierr)
-       if(ierr /= 0) then
-          write(errorMessage,*) "Restart file ", trim(solFiles(nn)), &
-               " could not be opened for reading"
-          call terminate("checkSolFileNames", errorMessage)
-          exit
-       end if
-       close(unit=21)
-
-       if(myID == 0) then
-          print "(a)", "#"
-          write (*, strings) "# Found restart file: ", trim(solFiles(nn))
-          print "(a)", "#"
-       end if
-    enddo
-
-  end subroutine checkSolFileNames
-
-  subroutine initDepvarAndHalos(halosRead)
-    !
-    !       InitDepvarAndHalos computes the dependent flow variables,
-    !       like viscosities, and initializes the halo cells by applying
-    !       the boundary conditions and exchanging the internal halo's.
-    !       This is all done on the start level grid.
-    !
-    use blockPointers
-    use flowVarRefState
-    use inputIO
-    use inputIteration
-    use inputPhysics
-    use inputTimeSpectral
-    use iteration
-    use monitor
-    use section
-    use utils, only : setPointers
-    use haloExchange, only : whalo2
-    use turbUtils, only : computeEddyViscosity
-    use turbBCRoutines, only :applyAllTurbBC
-    use solverUtils, only : gridVelocitiesFineLevel, gridVelocitiesCoarseLevels, &
-         normalVelocitiesAllLevels, slipVelocitiesFineLevel, slipVelocitiesCoarseLevels
-    use flowUtils, only : computeLamViscosity
-    use BCRoutines, only : applyAllBC
-    use residuals, only : residual
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: halosRead
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm
-    real(kind=realType)   :: relaxBleedsOr
-
-    real(kind=realType), dimension(nSections) :: t
-
-    logical :: initBleeds
-
-    ! Set the logical whether or not to initialize the prescribed
-    ! data for the bleed regions. If the halos were read the bleeds
-    ! have been initialized already and nothing needs to be done.
-
-    if( halosRead ) then
-       initBleeds = .false.
-    else
-       initBleeds = .true.
-    endif
-
-    ! Compute the face velocities and for viscous walls the slip
-    ! velocities. This is done for all the mesh levels.
-
-    currentLevel = 1
-    groundLevel  = 1
-
-    do mm=1,nTimeIntervalsSpectral
-
-       ! Compute the time, which corresponds to this spectral solution.
-       ! For steady and unsteady mode this is simply the restart time;
-       ! for the spectral mode the periodic time must be taken into
-       ! account, which can be different for every section.
-
-       t = timeUnsteadyRestart
-
-       if(equationMode == timeSpectral) then
-          do nn=1,nSections
-             t(nn) = t(nn) + (mm-1)*sections(nn)%timePeriod &
-                  /         real(nTimeIntervalsSpectral,realType)
-          enddo
-       endif
-       call gridVelocitiesFineLevel(.false., t, mm)
-       call gridVelocitiesCoarseLevels(mm)
-       call normalVelocitiesAllLevels(mm)
-
-       call slipVelocitiesFineLevel(.false., t, mm)
-       call slipVelocitiesCoarseLevels(mm)
-
-    enddo
-
-    ! Loop over the number of spectral solutions and blocks
-    ! to compute the laminar viscosity.
-
-    do mm=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-          call setPointers(nn,mgStartlevel,mm)
-          call computeLamViscosity(.False.)
-       enddo
-    enddo
-
-    ! Exchange the solution on the multigrid start level.
-    ! It is possible that the halo values are needed for the boundary
-    ! conditions. Viscosities are not exchanged.
-
-    call whalo2(mgStartlevel, 1_intType, nw, .true., .true., &
-         .false.)
-
-    ! Apply all flow boundary conditions to be sure that the halo's
-    ! contain the correct values. These might be needed to compute
-    ! the eddy-viscosity. Also the data for the outflow bleeds
-    ! is determined.
-
-    currentLevel = mgStartlevel
-    groundLevel  = mgStartlevel
-
-    call applyAllBC(.true.)
-
-    ! Loop over the number of spectral solutions and blocks
-    ! to compute the eddy viscosities.
-
-    do mm=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-
-          ! Set the pointers for this block.
-
-          call setPointers(nn,mgStartlevel,mm)
-
-          ! Compute the eddy viscosity for rans computations using
-          ! an eddy viscosity model.
-
-          call computeEddyViscosity(.False.)
-
-          ! In case of a rans computation and no restart, initialize
-          ! the turbulent variables a bit better for some turbulence
-          ! models.
-
-          ! if(equations == RANSEquations .and. (.not. restart)) then
-          !
-          !   select case (turbModel)
-          !
-          !     case (komegaWilcox, komegaModified, menterSST)
-          !       call initKOmega(0_intType)
-          !       call computeEddyViscosity
-          !
-          !   end select
-          !
-          ! endif
-
-       enddo
-    enddo
-
-    ! Exchange the laminar and eddy viscosities.
-
-    call whalo2(mgStartlevel, 1_intType, 0_intType, .false., &
-         .false., .true.)
-
-    if (equations == RANSEquations) then
-       call applyAllTurbBC(.true.)
-    end if
-    call applyAllBC(.true.)
-
-    ! Exchange the solution for the second time to be sure that all
-    ! halo's are initialized correctly. As this is the initialization
-    ! phase, this is not critical.
-
-    call whalo2(mgStartlevel, 1_intType, nw, .true., .true., &
-         .true.)
-
-  end subroutine initDepvarAndHalos
-
-  subroutine initFlowRestart
-    !
-    !       initFlowRestart loads restart information from the restart
-    !       file into the state variables.
-    !
-    use constants
-    use IOModule, only : IOVar
-    use variableReading, only : solFiles, interpolSpectral, copySpectral, &
-         halosRead
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: ierr
-
-    ! Initialize halosRead to .false. This will be overwritten
-    ! if halo values are read during the restart.
-
-    halosRead = .false.
-
-    ! Determine the number and names of the solution files and
-    ! determine the contents of IOVar, the data structure to
-    ! generalize the IO.
-
-    call determineSolFileNames
-    call setIOVar
+                           "Memory allocation failure for solFiles")
+
+        do nn = 1, nSolsRead
+            solFiles(nn) = restartFiles(nn)
+        end do
+
+    end subroutine setSolFileNames
+
+    subroutine checkSolFileNames
+        !
+        !       checkSolFileNames will check if the provided restart files
+        !       are readable on disk. If not readable return fail, if readable
+        !       message will be printed to let the user know that restart file
+        !       will be tried to read.
+        !
+        use communication
+        use utils, only: terminate
+        use variableReading, only: solFiles, nSolsRead
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Local variables
+        !
+        integer :: ierr
+        character(len=maxStringLen) :: errorMessage
+        integer(kind=intType) :: nn
+
+        do nn = 1, nSolsRead
+            open (unit=21, file=solFiles(nn), status="old", iostat=ierr)
+            if (ierr /= 0) then
+                write (errorMessage, *) "Restart file ", trim(solFiles(nn)), &
+                    " could not be opened for reading"
+                call terminate("checkSolFileNames", errorMessage)
+                exit
+            end if
+            close (unit=21)
+
+            if (myID == 0) then
+                print "(a)", "#"
+                write (*, strings) "# Found restart file: ", trim(solFiles(nn))
+                print "(a)", "#"
+            end if
+        end do
+
+    end subroutine checkSolFileNames
+
+    subroutine initDepvarAndHalos(halosRead)
+        !
+        !       InitDepvarAndHalos computes the dependent flow variables,
+        !       like viscosities, and initializes the halo cells by applying
+        !       the boundary conditions and exchanging the internal halo's.
+        !       This is all done on the start level grid.
+        !
+        use blockPointers
+        use flowVarRefState
+        use inputIO
+        use inputIteration
+        use inputPhysics
+        use inputTimeSpectral
+        use iteration
+        use monitor
+        use section
+        use utils, only: setPointers
+        use haloExchange, only: whalo2
+        use turbUtils, only: computeEddyViscosity
+        use turbBCRoutines, only: applyAllTurbBC
+        use solverUtils, only: gridVelocitiesFineLevel, gridVelocitiesCoarseLevels, &
+                               normalVelocitiesAllLevels, slipVelocitiesFineLevel, slipVelocitiesCoarseLevels
+        use flowUtils, only: computeLamViscosity
+        use BCRoutines, only: applyAllBC
+        use residuals, only: residual
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: halosRead
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm
+        real(kind=realType) :: relaxBleedsOr
+
+        real(kind=realType), dimension(nSections) :: t
+
+        logical :: initBleeds
+
+        ! Set the logical whether or not to initialize the prescribed
+        ! data for the bleed regions. If the halos were read the bleeds
+        ! have been initialized already and nothing needs to be done.
+
+        if (halosRead) then
+            initBleeds = .false.
+        else
+            initBleeds = .true.
+        end if
+
+        ! Compute the face velocities and for viscous walls the slip
+        ! velocities. This is done for all the mesh levels.
+
+        currentLevel = 1
+        groundLevel = 1
+
+        do mm = 1, nTimeIntervalsSpectral
+
+            ! Compute the time, which corresponds to this spectral solution.
+            ! For steady and unsteady mode this is simply the restart time;
+            ! for the spectral mode the periodic time must be taken into
+            ! account, which can be different for every section.
+
+            t = timeUnsteadyRestart
+
+            if (equationMode == timeSpectral) then
+                do nn = 1, nSections
+                    t(nn) = t(nn) + (mm - 1) * sections(nn)%timePeriod &
+                            / real(nTimeIntervalsSpectral, realType)
+                end do
+            end if
+            call gridVelocitiesFineLevel(.false., t, mm)
+            call gridVelocitiesCoarseLevels(mm)
+            call normalVelocitiesAllLevels(mm)
+
+            call slipVelocitiesFineLevel(.false., t, mm)
+            call slipVelocitiesCoarseLevels(mm)
+
+        end do
 
-    ! Determine the format of the files and read them.
-    ! Note that halosRead is possibly overwritten in the
-    ! folloing select case statement below
+        ! Loop over the number of spectral solutions and blocks
+        ! to compute the laminar viscosity.
 
-    call readRestartFile()
+        do mm = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, mgStartlevel, mm)
+                call computeLamViscosity(.False.)
+            end do
+        end do
 
-    ! Copy or interpolate the spectral solution, if needed.
+        ! Exchange the solution on the multigrid start level.
+        ! It is possible that the halo values are needed for the boundary
+        ! conditions. Viscosities are not exchanged.
 
-    if( copySpectral )     call copySpectralSolution
-    if( interpolSpectral ) call interpolateSpectralSolution
+        call whalo2(mgStartlevel, 1_intType, nw, .true., .true., &
+                    .false.)
 
-    ! Release the memory of the file names and IOVar.
+        ! Apply all flow boundary conditions to be sure that the halo's
+        ! contain the correct values. These might be needed to compute
+        ! the eddy-viscosity. Also the data for the outflow bleeds
+        ! is determined.
 
-    deallocate(solFiles, IOVar, stat=ierr)
-    if(ierr /= 0)                &
-         call terminate("initFlow", &
-         "Deallocation failure for solFiles and IOVar")
+        currentLevel = mgStartlevel
+        groundLevel = mgStartlevel
 
-    ! At the moment the pressure is stored at the location of the
-    ! total energy. Copy the pressure to its own arrays and
-    ! compute the total energy.
+        call applyAllBC(.true.)
 
-    call setPressureAndComputeEnergy(halosRead)
+        ! Loop over the number of spectral solutions and blocks
+        ! to compute the eddy viscosities.
 
-    ! Initialize the halo cells if a restart is performed and the
-    ! entire flow field if this is not the case.
+        do mm = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
 
-    call initializeHalos(halosRead)
+                ! Set the pointers for this block.
 
-    ! Initialize the dependent flow variables and the halo values.
-    call initDepvarAndHalos(halosRead)
+                call setPointers(nn, mgStartlevel, mm)
 
-  end subroutine initFlowRestart
+                ! Compute the eddy viscosity for rans computations using
+                ! an eddy viscosity model.
 
-  subroutine initFlowfield
-    !
-    !       initFlowfield initializes the flow field to a uniform flow on
-    !       the start level grid. Exception may be some turbulence
-    !       variables, which are initialized a bit smarter.
-    !
-    use constants
-    use communication, only : myID
-    use inputIteration, only : nCycles, nSGStartup
-    use inputPhysics, only : equationMode
-    use inputUnsteady, only: nTimeStepsFine
-    use iteration, only : nOldSolAvail
-    use monitor, only : nTimeStepsRestart, timeUnsteadyRestart
-    use utils, only : allocConvArrays, allocTimeArrays
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    ! Initialize nTimeStepsRestart to 0 (no restart
-    ! is performed) and allocate the memory for the arrays to store
-    ! convergence history. This allocation is only to be done by
-    ! processor 0. For an unsteady computation the entire convergence
-    ! history is stored and the values after every time step is
-    ! stored in a separate array.
-
-    nTimeStepsRestart   = 0
-    timeUnsteadyRestart = zero
-
-    nn = nsgStartup + nCycles
-    if(equationMode == unsteady) nn = nTimeStepsFine*nn
-    if(myID == 0) call allocConvArrays(nn)
-    if(equationMode == unsteady .and. myId == 0) &
-         call allocTimeArrays(nTimeStepsFine)
-
-    ! Set nOldSolAvail to 1, to indicate that an unsteady
-    ! computation should be started with a lower order
-    ! time integration scheme, because not enough states
-    ! in the past are available.
-
-    nOldSolAvail = 1
-
-    ! Initialize the flow field to uniform flow.
-
-    call setUniformFlow
-
-  end subroutine initFlowfield
-
-  subroutine initializeHalos(halosRead)
-    !
-    !       initializeHalos sets the flow variables in the halo cells
-    !       using a constant extrapolation. If the halos are read only the
-    !       second halos are initialized, otherwise both.
-    !
-    use constants
-    use blockPointers, only : nDom, w, p, rlv, ib, jb, kb, &
-         il, ie, jl, je, kl, ke, rev
-    use flowVarRefState, only : viscous, nw, eddyModel, muInf
-    use inputIteration, only : mgStartLevel
-    use inputPhysics, only : eddyVisInfRatio
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: halosRead
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, i, j, k, l
-    integer(kind=intType) :: jj, kk
+                call computeEddyViscosity(.False.)
 
-    ! Loop over the number of spectral solutions and blocks.
+                ! In case of a rans computation and no restart, initialize
+                ! the turbulent variables a bit better for some turbulence
+                ! models.
 
-    spectralLoop: do mm=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
+                ! if(equations == RANSEquations .and. (.not. restart)) then
+                !
+                !   select case (turbModel)
+                !
+                !     case (komegaWilcox, komegaModified, menterSST)
+                !       call initKOmega(0_intType)
+                !       call computeEddyViscosity
+                !
+                !   end select
+                !
+                ! endif
 
-          ! Set the pointers for this block.
+            end do
+        end do
 
-          call setPointers(nn,mgStartlevel,mm)
+        ! Exchange the laminar and eddy viscosities.
 
-          ! Determine the situation we are dealing with.
+        call whalo2(mgStartlevel, 1_intType, 0_intType, .false., &
+                    .false., .true.)
+
+        if (equations == RANSEquations) then
+            call applyAllTurbBC(.true.)
+        end if
+        call applyAllBC(.true.)
+
+        ! Exchange the solution for the second time to be sure that all
+        ! halo's are initialized correctly. As this is the initialization
+        ! phase, this is not critical.
+
+        call whalo2(mgStartlevel, 1_intType, nw, .true., .true., &
+                    .true.)
+
+    end subroutine initDepvarAndHalos
+
+    subroutine initFlowRestart
+        !
+        !       initFlowRestart loads restart information from the restart
+        !       file into the state variables.
+        !
+        use constants
+        use IOModule, only: IOVar
+        use variableReading, only: solFiles, interpolSpectral, copySpectral, &
+                                   halosRead
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: ierr
 
-          testHalosRead: if( halosRead ) then
+        ! Initialize halosRead to .false. This will be overwritten
+        ! if halo values are read during the restart.
+
+        halosRead = .false.
+
+        ! Determine the number and names of the solution files and
+        ! determine the contents of IOVar, the data structure to
+        ! generalize the IO.
+
+        call determineSolFileNames
+        call setIOVar
+
+        ! Determine the format of the files and read them.
+        ! Note that halosRead is possibly overwritten in the
+        ! folloing select case statement below
+
+        call readRestartFile()
+
+        ! Copy or interpolate the spectral solution, if needed.
+
+        if (copySpectral) call copySpectralSolution
+        if (interpolSpectral) call interpolateSpectralSolution
+
+        ! Release the memory of the file names and IOVar.
+
+        deallocate (solFiles, IOVar, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("initFlow", &
+                           "Deallocation failure for solFiles and IOVar")
+
+        ! At the moment the pressure is stored at the location of the
+        ! total energy. Copy the pressure to its own arrays and
+        ! compute the total energy.
+
+        call setPressureAndComputeEnergy(halosRead)
+
+        ! Initialize the halo cells if a restart is performed and the
+        ! entire flow field if this is not the case.
+
+        call initializeHalos(halosRead)
+
+        ! Initialize the dependent flow variables and the halo values.
+        call initDepvarAndHalos(halosRead)
+
+    end subroutine initFlowRestart
+
+    subroutine initFlowfield
+        !
+        !       initFlowfield initializes the flow field to a uniform flow on
+        !       the start level grid. Exception may be some turbulence
+        !       variables, which are initialized a bit smarter.
+        !
+        use constants
+        use communication, only: myID
+        use inputIteration, only: nCycles, nSGStartup
+        use inputPhysics, only: equationMode
+        use inputUnsteady, only: nTimeStepsFine
+        use iteration, only: nOldSolAvail
+        use monitor, only: nTimeStepsRestart, timeUnsteadyRestart
+        use utils, only: allocConvArrays, allocTimeArrays
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        ! Initialize nTimeStepsRestart to 0 (no restart
+        ! is performed) and allocate the memory for the arrays to store
+        ! convergence history. This allocation is only to be done by
+        ! processor 0. For an unsteady computation the entire convergence
+        ! history is stored and the values after every time step is
+        ! stored in a separate array.
+
+        nTimeStepsRestart = 0
+        timeUnsteadyRestart = zero
+
+        nn = nsgStartup + nCycles
+        if (equationMode == unsteady) nn = nTimeStepsFine * nn
+        if (myID == 0) call allocConvArrays(nn)
+        if (equationMode == unsteady .and. myId == 0) &
+            call allocTimeArrays(nTimeStepsFine)
+
+        ! Set nOldSolAvail to 1, to indicate that an unsteady
+        ! computation should be started with a lower order
+        ! time integration scheme, because not enough states
+        ! in the past are available.
+
+        nOldSolAvail = 1
+
+        ! Initialize the flow field to uniform flow.
 
-             ! The first layer of halo cells have been read. Initialize
-             ! the second layer from the first layer.
+        call setUniformFlow
 
-             ! Halo cells in the i-direction.
+    end subroutine initFlowfield
 
-             do k=0,kb
-                kk = max(1_intType,min(k,ke))
-                do j=0,jb
-                   jj = max(1_intType,min(j,je))
+    subroutine initializeHalos(halosRead)
+        !
+        !       initializeHalos sets the flow variables in the halo cells
+        !       using a constant extrapolation. If the halos are read only the
+        !       second halos are initialized, otherwise both.
+        !
+        use constants
+        use blockPointers, only: nDom, w, p, rlv, ib, jb, kb, &
+                                 il, ie, jl, je, kl, ke, rev
+        use flowVarRefState, only: viscous, nw, eddyModel, muInf
+        use inputIteration, only: mgStartLevel
+        use inputPhysics, only: eddyVisInfRatio
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: halosRead
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, i, j, k, l
+        integer(kind=intType) :: jj, kk
 
-                   do l=1,nw
-                      w(0, j,k,l) = w(1, jj,kk,l)
-                      w(ib,j,k,l) = w(ie,jj,kk,l)
-                   enddo
+        ! Loop over the number of spectral solutions and blocks.
+
+        spectralLoop: do mm = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block.
+
+                call setPointers(nn, mgStartlevel, mm)
+
+                ! Determine the situation we are dealing with.
+
+                testHalosRead: if (halosRead) then
+
+                    ! The first layer of halo cells have been read. Initialize
+                    ! the second layer from the first layer.
+
+                    ! Halo cells in the i-direction.
+
+                    do k = 0, kb
+                        kk = max(1_intType, min(k, ke))
+                        do j = 0, jb
+                            jj = max(1_intType, min(j, je))
+
+                            do l = 1, nw
+                                w(0, j, k, l) = w(1, jj, kk, l)
+                                w(ib, j, k, l) = w(ie, jj, kk, l)
+                            end do
+
+                            p(0, j, k) = p(1, jj, kk)
+                            p(ib, j, k) = p(ie, jj, kk)
+
+                        end do
+                    end do
+
+                    ! Halo cells in j-direction. Note that the i-halo's have
+                    ! already been set.
+
+                    do k = 0, kb
+                        kk = max(1_intType, min(k, ke))
+                        do i = 1, ie
 
-                   p(0, j,k) = p(1, jj,kk)
-                   p(ib,j,k) = p(ie,jj,kk)
+                            do l = 1, nw
+                                w(i, 0, k, l) = w(i, 1, kk, l)
+                                w(i, jb, k, l) = w(i, je, kk, l)
+                            end do
 
-                enddo
-             enddo
+                            p(i, 0, k) = p(i, 1, kk)
+                            p(i, jb, k) = p(i, je, kk)
 
-             ! Halo cells in j-direction. Note that the i-halo's have
-             ! already been set.
+                        end do
+                    end do
 
-             do k=0,kb
-                kk = max(1_intType,min(k,ke))
-                do i=1,ie
+                    ! Halo cells in k-direction. Note that the halo's in both
+                    ! i and j direction have already been set.
 
-                   do l=1,nw
-                      w(i,0, k,l) = w(i,1, kk,l)
-                      w(i,jb,k,l) = w(i,je,kk,l)
-                   enddo
+                    do j = 1, je
+                        do i = 1, ie
 
-                   p(i,0 ,k) = p(i,1, kk)
-                   p(i,jb,k) = p(i,je,kk)
+                            do l = 1, nw
+                                w(i, j, 0, l) = w(i, j, 1, l)
+                                w(i, j, kb, l) = w(i, j, ke, l)
+                            end do
+
+                            p(i, j, 0) = p(i, j, 1)
+                            p(i, j, kb) = p(i, j, ke)
+
+                        end do
+                    end do
+
+                    else testHalosRead
+
+                    ! No halo cells have been read. Initialize both layers
+                    ! using the internal value.
+
+                    ! Halo cells in the i-direction.
+
+                    do k = 0, kb
+                        kk = max(2_intType, min(k, kl))
+                        do j = 0, jb
+                            jj = max(2_intType, min(j, jl))
+
+                            do l = 1, nw
+                                w(0, j, k, l) = w(2, jj, kk, l)
+                                w(1, j, k, l) = w(2, jj, kk, l)
+                                w(ie, j, k, l) = w(il, jj, kk, l)
+                                w(ib, j, k, l) = w(il, jj, kk, l)
+                            end do
 
-                enddo
-             enddo
+                            p(0, j, k) = p(2, jj, kk)
+                            p(1, j, k) = p(2, jj, kk)
+                            p(ie, j, k) = p(il, jj, kk)
+                            p(ib, j, k) = p(il, jj, kk)
 
-             ! Halo cells in k-direction. Note that the halo's in both
-             ! i and j direction have already been set.
+                        end do
+                    end do
 
-             do j=1,je
-                do i=1,ie
+                    ! Halo cells in j-direction. Note that the i-halo's have
+                    ! already been set.
 
-                   do l=1,nw
-                      w(i,j,0, l) = w(i,j,1, l)
-                      w(i,j,kb,l) = w(i,j,ke,l)
-                   enddo
+                    do k = 0, kb
+                        kk = max(2_intType, min(k, kl))
+                        do i = 2, il
 
-                   p(i,j,0)  = p(i,j,1)
-                   p(i,j,kb) = p(i,j,ke)
+                            do l = 1, nw
+                                w(i, 0, k, l) = w(i, 2, kk, l)
+                                w(i, 1, k, l) = w(i, 2, kk, l)
+                                w(i, je, k, l) = w(i, jl, kk, l)
+                                w(i, jb, k, l) = w(i, jl, kk, l)
+                            end do
 
-                enddo
-             enddo
+                            p(i, 0, k) = p(i, 2, kk)
+                            p(i, 1, k) = p(i, 2, kk)
+                            p(i, je, k) = p(i, jl, kk)
+                            p(i, jb, k) = p(i, jl, kk)
 
-          else testHalosRead
+                        end do
+                    end do
 
-             ! No halo cells have been read. Initialize both layers
-             ! using the internal value.
+                    ! Halo cells in k-direction. Note that the halo's in both
+                    ! i and j direction have already been set.
 
-             ! Halo cells in the i-direction.
+                    do j = 2, jl
+                        do i = 2, il
 
-             do k=0,kb
-                kk = max(2_intType,min(k,kl))
-                do j=0,jb
-                   jj = max(2_intType,min(j,jl))
+                            do l = 1, nw
+                                w(i, j, 0, l) = w(i, j, 2, l)
+                                w(i, j, 1, l) = w(i, j, 2, l)
+                                w(i, j, ke, l) = w(i, j, kl, l)
+                                w(i, j, kb, l) = w(i, j, kl, l)
+                            end do
 
-                   do l=1,nw
-                      w(0, j,k,l) = w(2, jj,kk,l)
-                      w(1, j,k,l) = w(2, jj,kk,l)
-                      w(ie,j,k,l) = w(il,jj,kk,l)
-                      w(ib,j,k,l) = w(il,jj,kk,l)
-                   enddo
+                            p(i, j, 0) = p(i, j, 2)
+                            p(i, j, 1) = p(i, j, 2)
+                            p(i, j, ke) = p(i, j, kl)
+                            p(i, j, kb) = p(i, j, kl)
 
-                   p(0, j,k) = p(2, jj,kk)
-                   p(1, j,k) = p(2, jj,kk)
-                   p(ie,j,k) = p(il,jj,kk)
-                   p(ib,j,k) = p(il,jj,kk)
+                        end do
+                    end do
 
-                enddo
-             enddo
+                end if testHalosRead
 
-             ! Halo cells in j-direction. Note that the i-halo's have
-             ! already been set.
+                ! Initialize the laminar and eddy viscosity, if appropriate,
+                ! such that no uninitialized memory is present.
+                ! As the viscosities are dependent variables their values
+                ! are not read during a restart.
 
-             do k=0,kb
-                kk = max(2_intType,min(k,kl))
-                do i=2,il
+                if (viscous) rlv = muInf
+                if (eddyModel) rev = eddyVisInfRatio * muInf
 
-                   do l=1,nw
-                      w(i,0, k,l) = w(i,2, kk,l)
-                      w(i,1, k,l) = w(i,2, kk,l)
-                      w(i,je,k,l) = w(i,jl,kk,l)
-                      w(i,jb,k,l) = w(i,jl,kk,l)
-                   enddo
-
-                   p(i,0 ,k) = p(i,2, kk)
-                   p(i,1 ,k) = p(i,2, kk)
-                   p(i,je,k) = p(i,jl,kk)
-                   p(i,jb,k) = p(i,jl,kk)
-
-                enddo
-             enddo
-
-             ! Halo cells in k-direction. Note that the halo's in both
-             ! i and j direction have already been set.
-
-             do j=2,jl
-                do i=2,il
-
-                   do l=1,nw
-                      w(i,j,0, l) = w(i,j,2, l)
-                      w(i,j,1, l) = w(i,j,2, l)
-                      w(i,j,ke,l) = w(i,j,kl,l)
-                      w(i,j,kb,l) = w(i,j,kl,l)
-                   enddo
-
-                   p(i,j,0)  = p(i,j,2)
-                   p(i,j,1)  = p(i,j,2)
-                   p(i,j,ke) = p(i,j,kl)
-                   p(i,j,kb) = p(i,j,kl)
-
-                enddo
-             enddo
-
-          endif testHalosRead
-
-          ! Initialize the laminar and eddy viscosity, if appropriate,
-          ! such that no uninitialized memory is present.
-          ! As the viscosities are dependent variables their values
-          ! are not read during a restart.
-
-          if( viscous )   rlv = muInf
-          if( eddyModel ) rev = eddyVisInfRatio*muInf
-
-       enddo domains
-    enddo spectralLoop
-
-  end subroutine initializeHalos
-  subroutine interpolateSpectralSolution
-    !
-    !       interpolateSpectralSolution uses a spectral interpolation to
-    !       determine the initialization of the flow solution.
-    !       The solution is interpolated from the solution read, which
-    !       contains a different number of time instances and is stored in
-    !       IOVar()%w. This variable can be found in IOModule.
-    !
-    use constants
-    use blockPointers, only : w, il, jl, kl, nDom, sectionID
-    use flowVarRefState, only : nw
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use IOModule, only : IOVar
-    use section, only: sections, nSections
-    use utils, only : setPointers, terminate, spectralInterpolCoef
-    use variableReading, only : nSolsRead
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+            end do domains
+        end do spectralLoop
 
-    integer(kind=intType) :: jj, nn, ll, sps, i, j, k, l
+    end subroutine initializeHalos
+    subroutine interpolateSpectralSolution
+        !
+        !       interpolateSpectralSolution uses a spectral interpolation to
+        !       determine the initialization of the flow solution.
+        !       The solution is interpolated from the solution read, which
+        !       contains a different number of time instances and is stored in
+        !       IOVar()%w. This variable can be found in IOModule.
+        !
+        use constants
+        use blockPointers, only: w, il, jl, kl, nDom, sectionID
+        use flowVarRefState, only: nw
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use IOModule, only: IOVar
+        use section, only: sections, nSections
+        use utils, only: setPointers, terminate, spectralInterpolCoef
+        use variableReading, only: nSolsRead
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: jj, nn, ll, sps, i, j, k, l
+
+        real(kind=realType) :: t
 
-    real(kind=realType) :: t
+        real(kind=realType), dimension(nSolsRead) :: alpScal
+        real(kind=realType), dimension(nSections, nSolsRead, 3, 3) :: alpMat
 
-    real(kind=realType), dimension(nSolsRead) :: alpScal
-    real(kind=realType), dimension(nSections,nSolsRead,3,3) :: alpMat
+        ! Loop over the number of spectral solutions to be interpolated.
 
-    ! Loop over the number of spectral solutions to be interpolated.
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
 
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
+            ! Determine the ratio of the time of this solution and the
+            ! periodic time.
 
-       ! Determine the ratio of the time of this solution and the
-       ! periodic time.
+            nn = sps - 1
+            t = real(nn, realType) / real(nTimeIntervalsSpectral, realType)
 
-       nn = sps - 1
-       t  = real(nn,realType)/real(nTimeIntervalsSpectral,realType)
+            ! Determine the interpolation coefficients for both the scalar
+            ! and the vector quantities.
 
-       ! Determine the interpolation coefficients for both the scalar
-       ! and the vector quantities.
+            call spectralInterpolCoef(nSolsRead, t, alpScal, alpMat)
 
-       call spectralInterpolCoef(nSolsRead, t, alpScal, alpMat)
+            ! Loop over the local number of blocks.
 
-       ! Loop over the local number of blocks.
+            domains: do nn = 1, nDom
 
-       domains: do nn=1,nDom
+                ! Set the pointers for this block to the finest grid level.
 
-          ! Set the pointers for this block to the finest grid level.
+                call setPointers(nn, 1_intType, sps)
 
-          call setPointers(nn, 1_intType, sps)
+                ! Loop over the number of variables to be interpolated.
 
-          ! Loop over the number of variables to be interpolated.
+                varLoop: do l = 1, nw
 
-          varLoop: do l=1,nw
+                    ! Check if this is a velocity variable.
+
+                    velTest: if (l == ivx .or. l == ivy .or. l == ivz) then
+
+                        ! Velocity variable. Set ll, which is the row in the
+                        ! matrix coefficients.
 
-             ! Check if this is a velocity variable.
+                        select case (l)
+                        case (ivx)
+                            ll = 1
+                        case (ivy)
+                            ll = 2
+                        case (ivz)
+                            ll = 3
+                        end select
 
-             velTest: if(l == ivx .or. l == ivy .or. l == ivz) then
+                        ! Loop over the owned cells to interpolate the variable.
 
-                ! Velocity variable. Set ll, which is the row in the
-                ! matrix coefficients.
+                        do k = 2, kl
+                            do j = 2, jl
+                                do i = 2, il
 
-                select case (l)
-                case (ivx)
-                   ll = 1
-                case (ivy)
-                   ll = 2
-                case (ivz)
-                   ll = 3
-                end select
+                                    ! Initialization to zero and loop over the number of
+                                    ! spectral solutions used in the interpolation and
+                                    ! update the variable accordingly. Note that for the
+                                    ! vector variables the matrix coefficients must be
+                                    ! used; these matrices can be different for the
+                                    ! sections present in the grid.
 
-                ! Loop over the owned cells to interpolate the variable.
+                                    w(i, j, k, l) = zero
+                                    do jj = 1, nSolsRead
+                                        w(i, j, k, l) = w(i, j, k, l) &
+                                                        + alpMat(sectionID, jj, ll, 1) &
+                                                        * IOVar(nn, jj)%w(i, j, k, ivx) &
+                                                        + alpMat(sectionID, jj, ll, 2) &
+                                                        * IOVar(nn, jj)%w(i, j, k, ivy) &
+                                                        + alpMat(sectionID, jj, ll, 3) &
+                                                        * IOVar(nn, jj)%w(i, j, k, ivz)
+                                    end do
 
-                do k=2,kl
-                   do j=2,jl
-                      do i=2,il
+                                end do
+                            end do
+                        end do
 
-                         ! Initialization to zero and loop over the number of
-                         ! spectral solutions used in the interpolation and
-                         ! update the variable accordingly. Note that for the
-                         ! vector variables the matrix coefficients must be
-                         ! used; these matrices can be different for the
-                         ! sections present in the grid.
+                        else velTest
 
-                         w(i,j,k,l) = zero
-                         do jj=1,nSolsRead
-                            w(i,j,k,l) = w(i,j,k,l)                &
-                                 + alpMat(sectionID,jj,ll,1) &
-                                 * IOVar(nn,jj)%w(i,j,k,ivx) &
-                                 + alpMat(sectionID,jj,ll,2) &
-                                 * IOVar(nn,jj)%w(i,j,k,ivy) &
-                                 + alpMat(sectionID,jj,ll,3) &
-                                 * IOVar(nn,jj)%w(i,j,k,ivz)
-                         enddo
+                        ! Scalar variable.
+                        ! Loop over the owned cells to interpolate the variable.
 
-                      enddo
-                   enddo
-                enddo
+                        do k = 2, kl
+                            do j = 2, jl
+                                do i = 2, il
 
-             else velTest
+                                    ! Initialization to zero and loop over the number of
+                                    ! spectral solutions used in the interpolation and
+                                    ! update the variable accordingly.
 
-                ! Scalar variable.
-                ! Loop over the owned cells to interpolate the variable.
+                                    w(i, j, k, l) = zero
+                                    do jj = 1, nSolsRead
+                                        w(i, j, k, l) = w(i, j, k, l) &
+                                                        + alpScal(jj) * IOVar(nn, jj)%w(i, j, k, l)
+                                    end do
 
-                do k=2,kl
-                   do j=2,jl
-                      do i=2,il
+                                end do
+                            end do
+                        end do
 
-                         ! Initialization to zero and loop over the number of
-                         ! spectral solutions used in the interpolation and
-                         ! update the variable accordingly.
+                    end if velTest
 
-                         w(i,j,k,l) = zero
-                         do jj=1,nSolsRead
-                            w(i,j,k,l) = w(i,j,k,l)  &
-                                 + alpScal(jj)*IOVar(nn,jj)%w(i,j,k,l)
-                         enddo
+                end do varLoop
 
-                      enddo
-                   enddo
-                enddo
+            end do domains
 
-             endif velTest
+        end do spectralLoop
 
-          enddo varLoop
+        ! Release the memory of w of IOVar.
 
-       enddo domains
+        do sps = 1, nSolsRead
+            do nn = 1, nDom
+                deallocate (IOVar(nn, sps)%w, stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("interpolateSpectralSolution", &
+                                   "Deallocation failure for w.")
+            end do
+        end do
 
-    enddo spectralLoop
+    end subroutine interpolateSpectralSolution
 
-    ! Release the memory of w of IOVar.
+    subroutine releaseExtraMemBCs
+        !
+        !       releaseExtraMemBCs releases the extra memory allocated in
+        !       allocMemBcdata. This additional memory was allocated, such
+        !       that alternative boundary condition treatments can be handled
+        !       in setBCDataFineGrid.
+        !
+        use constants
+        use blockPointers, only: flowDoms, nDom, BCType, BCData, nBocos
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers, terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: mm, nn, sps, level, nLevels, ii
+
+        ! Determine the number of multigrid levels.
+
+        nLevels = ubound(flowDoms, 2)
+
+        ! Loop over the number of multigrid level, spectral solutions
+        ! and local blocks.
+
+        levelLoop: do level = 1, nLevels
+            spectralLoop: do sps = 1, nTimeIntervalsSpectral
+                domainsLoop: do nn = 1, nDom
+
+                    ! Have the pointers in blockPointers point to the
+                    ! current block to make everything more readable.
+
+                    call setPointers(nn, level, sps)
+
+                    ! Loop over the number of boundary subfaces for this block.
+
+                    bocoLoop: do mm = 1, nBocos
+
+                        ! Determine the boundary condition we are having here.
+
+                        inflowType:select case(BCType(mm))
+
+                        case (SubsonicInflow)
+
+                        ! Subsonic inflow. Determine the boundary condition
+                        ! treatment and release the accordingly. Note that
+                        ! the boundary condition treatment of the finest mesh
+                        ! must be used, because this data is not available yet
+                        ! on the coarse grid.
 
-    do sps=1,nSolsRead
-       do nn=1,nDom
-          deallocate(IOVar(nn,sps)%w, stat=ierr)
-          if(ierr /= 0) &
-               call terminate("interpolateSpectralSolution", &
-               "Deallocation failure for w.")
-       enddo
-    enddo
+                        ii = &
+                            flowDoms(nn, 1, sps)%BCData(mm)%subsonicInletTreatment
 
-  end subroutine interpolateSpectralSolution
+                        select case (ii)
 
-  subroutine releaseExtraMemBCs
-    !
-    !       releaseExtraMemBCs releases the extra memory allocated in
-    !       allocMemBcdata. This additional memory was allocated, such
-    !       that alternative boundary condition treatments can be handled
-    !       in setBCDataFineGrid.
-    !
-    use constants
-    use blockPointers, only : flowDoms, nDom, BCType, BCData, nBocos
-    use inputTimeSpectral, only: nTimeIntervalsSpectral
-    use utils, only : setPointers, terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+                        case (totalConditions)
+
+                            ! Total conditions used. Release the memory for
+                            ! the density and velocities and nullify their
+                            ! pointers.
+
+                            deallocate (BCData(mm)%rho, BCData(mm)%velx, &
+                                        BCData(mm)%vely, BCData(mm)%velz, &
+                                        stat=ierr)
+                            if (ierr /= 0) &
+                                 call terminate("releaseExtraMemBCs", &
+                                 "Deallocation failure for rho, &
+                                 &velx, vely and velz")
+
+                            nullify (BCData(mm)%rho)
+                            nullify (BCData(mm)%velx)
+                            nullify (BCData(mm)%vely)
+                            nullify (BCData(mm)%velz)
+                            !===================================================
+
+                        case (massFlow)
 
-    integer(kind=intType) :: mm, nn, sps, level, nLevels, ii
+                            ! Full velocity vector and density prescribed at
+                            ! inlet boundaries. Release the memory for the
+                            ! total conditions and flow directions and nullify
+                            ! their pointers.
+
+                            deallocate (BCData(mm)%ptInlet, &
+                                        BCData(mm)%ttInlet, &
+                                        BCData(mm)%htInlet, &
+                                        BCData(mm)%flowXdirInlet, &
+                                        BCData(mm)%flowYdirInlet, &
+                                        BCData(mm)%flowZdirInlet, stat=ierr)
+                            if (ierr /= 0) &
+                                 call terminate("releaseExtraMemBCs", &
+                                 "Deallocation failure for the &
+                                 &total conditions.")
+
+                            nullify (BCData(mm)%ptInlet)
+                            nullify (BCData(mm)%ttInlet)
+                            nullify (BCData(mm)%htInlet)
+                            nullify (BCData(mm)%flowXdirInlet)
+                            nullify (BCData(mm)%flowYdirInlet)
+                            nullify (BCData(mm)%flowZdirInlet)
+
+                        end select
+
+                        end select inflowType
+
+                    end do bocoLoop
+                end do domainsLoop
+            end do spectralLoop
+        end do levelLoop
 
-    ! Determine the number of multigrid levels.
+    end subroutine releaseExtraMemBCs
 
-    nLevels = ubound(flowDoms,2)
+    subroutine setIOVar
+        !
+        !       setIOVar allocates the memory for the derived data type IOVar,
+        !       which is simplifies the reading. If an interpolation must be
+        !       performed for the time spectral method also the solution of
+        !       this IO type is allocated. For all other cases the pointers of
+        !       IOVar are set the the appropriate entries of flowDoms, with
+        !       possible offset due to the usage of pointers.
+        !
+        use constants
+        use block, only: flowDoms, nDom
+        use flowVarRefState, only: nw
+        use inputPhysics, only: equationMode
+        use IOModule, only: IOVar
+        use utils, only: terminate
+        use variableReading, only: interpolSpectral, halosRead, nSolsRead
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, mm, il, jl, kl
+
+        ! Allocate the memory for IOVar.
+
+        allocate (IOVar(nDom, nSolsRead), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("setIOVar", &
+                           "Memory allocation failure for solRead")
+
+        ! Determine the equation mode we are solving and set the pointers
+        ! of coorRead accordingly, or even allocate the memory, if needed.
+
+        select case (equationMode)
+
+        case (steady)
+
+            ! Steady computation. Only one solution needs to be read.
+            ! Loop over the number of blocks and set the pointers.
+            ! No pointer offset is needed.
+
+            do nn = 1, nDom
+                IOVar(nn, 1)%pointerOffset = 0
+                IOVar(nn, 1)%w => flowDoms(nn, 1, 1)%w(1:, 1:, 1:, :)
+            end do
+
+            !===============================================================
+
+        case (unsteady)
+
+            ! Unsteady computation. The first solution should be stored in
+            ! w. For the others the pointers point to wOld. As the
+            ! starting indices of wOld are 2, a pointer shift takes place
+            ! here. I know this is a pain in the butt, but that's what
+            ! we have to live with.
+
+            do nn = 1, nDom
+                IOVar(nn, 1)%pointerOffset = 0
+                IOVar(nn, 1)%w => flowDoms(nn, 1, 1)%w(1:, 1:, 1:, :)
+
+                do mm = 2, nSolsRead
+                    IOVar(nn, mm)%pointerOffset = -1
+                    IOVar(nn, mm)%w => flowDoms(nn, 1, 1)%wOld(mm - 1, 2:, 2:, 2:, :)
+                end do
+            end do
 
-    ! Loop over the number of multigrid level, spectral solutions
-    ! and local blocks.
+            !===============================================================
 
-    levelLoop: do level=1,nLevels
-       spectralLoop: do sps=1,nTimeIntervalsSpectral
-          domainsLoop: do nn=1,nDom
-
-             ! Have the pointers in blockPointers point to the
-             ! current block to make everything more readable.
-
-             call setPointers(nn, level, sps)
-
-             ! Loop over the number of boundary subfaces for this block.
-
-             bocoLoop: do mm=1,nBocos
-
-                ! Determine the boundary condition we are having here.
-
-                inflowType: select case (BCType(mm))
-
-                case (SubsonicInflow)
-
-                   ! Subsonic inflow. Determine the boundary condition
-                   ! treatment and release the accordingly. Note that
-                   ! the boundary condition treatment of the finest mesh
-                   ! must be used, because this data is not available yet
-                   ! on the coarse grid.
-
-                   ii = &
-                        flowDoms(nn,1,sps)%BCData(mm)%subsonicInletTreatment
-
-                   select case (ii)
-
-                   case (totalConditions)
-
-                      ! Total conditions used. Release the memory for
-                      ! the density and velocities and nullify their
-                      ! pointers.
-
-                      deallocate(BCData(mm)%rho,  BCData(mm)%velx, &
-                           BCData(mm)%vely, BCData(mm)%velz, &
-                           stat=ierr)
-                      if(ierr /= 0) &
-                           call terminate("releaseExtraMemBCs", &
-                           "Deallocation failure for rho, &
-                           &velx, vely and velz")
-
-                      nullify(BCData(mm)%rho)
-                      nullify(BCData(mm)%velx)
-                      nullify(BCData(mm)%vely)
-                      nullify(BCData(mm)%velz)
-                      !===================================================
-
-                   case (massFlow)
-
-                      ! Full velocity vector and density prescribed at
-                      ! inlet boundaries. Release the memory for the
-                      ! total conditions and flow directions and nullify
-                      ! their pointers.
-
-                      deallocate(BCData(mm)%ptInlet,        &
-                           BCData(mm)%ttInlet,        &
-                           BCData(mm)%htInlet,        &
-                           BCData(mm)%flowXdirInlet, &
-                           BCData(mm)%flowYdirInlet, &
-                           BCData(mm)%flowZdirInlet, stat=ierr)
-                      if(ierr /= 0) &
-                           call terminate("releaseExtraMemBCs", &
-                           "Deallocation failure for the &
-                           &total conditions.")
-
-                      nullify(BCData(mm)%ptInlet)
-                      nullify(BCData(mm)%ttInlet)
-                      nullify(BCData(mm)%htInlet)
-                      nullify(BCData(mm)%flowXdirInlet)
-                      nullify(BCData(mm)%flowYdirInlet)
-                      nullify(BCData(mm)%flowZdirInlet)
-
-                   end select
-
-                end select inflowType
-
-             enddo bocoLoop
-          enddo domainsLoop
-       enddo spectralLoop
-    enddo levelLoop
-
-  end subroutine releaseExtraMemBCs
-
-  subroutine setIOVar
-    !
-    !       setIOVar allocates the memory for the derived data type IOVar,
-    !       which is simplifies the reading. If an interpolation must be
-    !       performed for the time spectral method also the solution of
-    !       this IO type is allocated. For all other cases the pointers of
-    !       IOVar are set the the appropriate entries of flowDoms, with
-    !       possible offset due to the usage of pointers.
-    !
-    use constants
-    use block, only : flowDoms, nDom
-    use flowVarRefState, only : nw
-    use inputPhysics, only : equationMode
-    use IOModule, only : IOVar
-    use utils, only : terminate
-    use variableReading, only : interpolSpectral, halosRead, nSolsRead
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        case (timeSpectral)
 
-    integer(kind=intType) :: nn, mm, il, jl, kl
+            ! Time spectral mode. A further check is required.
 
-    ! Allocate the memory for IOVar.
+            testAllocSolRead: if (interpolSpectral) then
 
-    allocate(IOVar(nDom,nSolsRead), stat=ierr)
-    if(ierr /= 0)                &
-         call terminate("setIOVar", &
-         "Memory allocation failure for solRead")
+                ! A restart is performed using a different number of time
+                ! instances than the previous computation. Consequently the
+                ! solutions will be interpolated later on. Hence some
+                ! additional storage is required for the solutions and thus
+                ! the w variables of IOVar are allocated. No halo data is
+                ! needed here. No pointer offset either due to the explicit
+                ! allocation.
 
-    ! Determine the equation mode we are solving and set the pointers
-    ! of coorRead accordingly, or even allocate the memory, if needed.
+                do nn = 1, nDom
+                    il = flowDoms(nn, 1, 1)%il
+                    jl = flowDoms(nn, 1, 1)%jl
+                    kl = flowDoms(nn, 1, 1)%kl
 
-    select case(equationMode)
+                    do mm = 1, nSolsRead
+                        IOVar(nn, mm)%pointerOffset = 0
 
-    case (steady)
+                        allocate (IOVar(nn, mm)%w(2:il, 2:jl, 2:kl, nw), stat=ierr)
+                        if (ierr /= 0) &
+                            call terminate("setIOVar", &
+                                           "Memory allocation failure for w")
+                    end do
 
-       ! Steady computation. Only one solution needs to be read.
-       ! Loop over the number of blocks and set the pointers.
-       ! No pointer offset is needed.
+                end do
 
-       do nn=1,nDom
-          IOVar(nn,1)%pointerOffset = 0
-          IOVar(nn,1)%w => flowDoms(nn,1,1)%w(1:,1:,1:,:)
-       enddo
+                else testAllocSolRead
 
-       !===============================================================
+                ! A restart is made using either 1 solution or the correct
+                ! number of solution instances. In both cases simply set
+                ! the pointers of IOVar. No pointer offset is needed.
 
-    case (unsteady)
+                do nn = 1, nDom
+                    do mm = 1, nSolsRead
+                        IOVar(nn, mm)%pointerOffset = 0
+                        IOVar(nn, mm)%w => flowDoms(nn, 1, mm)%w(1:, 1:, 1:, :)
+                    end do
+                end do
 
-       ! Unsteady computation. The first solution should be stored in
-       ! w. For the others the pointers point to wOld. As the
-       ! starting indices of wOld are 2, a pointer shift takes place
-       ! here. I know this is a pain in the butt, but that's what
-       ! we have to live with.
+            end if testAllocSolRead
+
+        end select
+
+    end subroutine setIOVar
+
+    subroutine setPressureAndComputeEnergy(halosRead)
+        !
+        !       Due to the usage of the variable IOVar, which generalizes the
+        !       IO and leads to reuse of code, currently the pressure is
+        !       stored at the position of rhoE. In this routine that data is
+        !       copied to the pressure array and the total energy is computed.
+        !       Note that this routine is only called when a restart is done.
+        !
+        use constants
+        use blockPointers, only: nDom, p, w, il, jl, kl
+        use flowVarRefState, only: kPresent
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers
+        use flowUtils, only: computeEtotBlock
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: halosRead
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: sps, nn, nHalo
+        integer(kind=intType) :: i, j, k
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        ! Set the value of nHalo, depending whether or not the halo cells
+        ! have been read from the restart file.
+
+        nHalo = 0
+        if (halosRead) nHalo = 1
+
+        ! Loop over the number of time instances and the local blocks.
+        ! As this routine is only called when a restart is performed,
+        ! the MG start level is the finest level.
+
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+
+                ! Set the pointers to the correct block. As this routine is
+                ! only called when a restart is performed, the MG start level
+                ! is the finest level.
+
+                call setPointers(nn, 1_intType, sps)
+
+                ! Determine the range for which the pressure must be computed.
+
+                iBeg = 2 - nHalo; jBeg = 2 - nHalo; kBeg = 2 - nHalo
+                iEnd = il + nHalo; jEnd = jl + nHalo; kEnd = kl + nHalo
+
+                ! Copy the pressure for the required cells.
+
+                do k = kBeg, kEnd
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            p(i, j, k) = w(i, j, k, irhoE)
+                        end do
+                    end do
+                end do
 
-       do nn=1,nDom
-          IOVar(nn,1)%pointerOffset = 0
-          IOVar(nn,1)%w => flowDoms(nn,1,1)%w(1:,1:,1:,:)
+                ! Compute the total energy as well.
+
+                call computeEtotBlock(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd, kPresent)
+            end do
+        end do
+
+    end subroutine setPressureAndComputeEnergy
+    subroutine setRestartFiles(fileName, i)
+        !
+        !          Populates the restartfiles
+        !          The array is populated from Python using setRestartFiles
+        !
+        use constants
+        use inputIO, only: restartFiles
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        character(len=*), intent(inout) :: fileName
+        integer(kind=intType) :: i
+
+        restartFiles(i) = fileName
+
+    end subroutine setRestartFiles
+
+    subroutine setUniformFlow
+        !
+        !       setUniformFlow set the flow variables of all local blocks on
+        !       the start level to the uniform flow field.
+        !
+        use constants
+        use blockPointers, only: w, dw, fw, flowDoms, ib, jb, kb, &
+                                 rev, rlv, nDom, BCData, p
+        use communication
+        use flowVarRefState, only: eddyModel, viscous, muInf, nw, nwf, &
+                                   pInfCorr, wInf
+        use inputIteration, only: mgStartLevel
+        use inputPhysics, only: equationMode, flowType, eddyVisInfRatio
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, mm, i, j, k, l
+
+        real(kind=realType) :: tmp
+
+        real(kind=realType), dimension(3) :: dirLoc, dirGlob
+
+        ! Loop over the number of spectral solutions and blocks.
+        spectralLoop: do mm = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block.
+
+                call setPointers(nn, mgStartlevel, mm)
+
+                ! Set the w-variables to the ones of the uniform flow field.
+                do l = 1, nw
+                    do k = 0, kb
+                        do j = 0, jb
+                            do i = 0, ib
+                                w(i, j, k, l) = wInf(l)
+                                dw(i, j, k, l) = zero
+                            end do
+                        end do
+                    end do
+                end do
+                !set this here for a reinitialize flow to eliminate possible NAN's
+                do l = 1, nwf
+                    do k = 0, kb
+                        do j = 0, jb
+                            do i = 0, ib
+                                fw(i, j, k, l) = zero
+                            end do
+                        end do
+                    end do
+                end do
 
-          do mm=2,nSolsRead
-             IOVar(nn,mm)%pointerOffset = -1
-             IOVar(nn,mm)%w => flowDoms(nn,1,1)%wOld(mm-1,2:,2:,2:,:)
-          enddo
-       enddo
+                ! Set the pressure.
 
-       !===============================================================
+                p = pInfCorr
 
-    case (timeSpectral)
+                ! Initialize the laminar and eddy viscosity, if appropriate,
+                ! such that no uninitialized memory is present.
 
-       ! Time spectral mode. A further check is required.
+                if (viscous) rlv = muInf
+                if (eddyModel) rev = eddyVisInfRatio * muInf
 
-       testAllocSolRead: if( interpolSpectral ) then
+            end do domains
+        end do spectralLoop
 
-          ! A restart is performed using a different number of time
-          ! instances than the previous computation. Consequently the
-          ! solutions will be interpolated later on. Hence some
-          ! additional storage is required for the solutions and thus
-          ! the w variables of IOVar are allocated. No halo data is
-          ! needed here. No pointer offset either due to the explicit
-          ! allocation.
+        ! Correct for the time spectral method in combination with an
+        ! internal flow computation the velocity direction.
+        ! It is possible that the prescribed direction is different
+        ! for every time instance.
 
-          do nn=1,nDom
-             il = flowDoms(nn,1,1)%il
-             jl = flowDoms(nn,1,1)%jl
-             kl = flowDoms(nn,1,1)%kl
+        testCorrection: if (equationMode == timeSpectral .and. &
+                            flowType == internalFlow) then
 
-             do mm=1,nSolsRead
-                IOVar(nn,mm)%pointerOffset = 0
+            ! Loop over the number of spectral solutions.
 
-                allocate(IOVar(nn,mm)%w(2:il,2:jl,2:kl,nw), stat=ierr)
-                if(ierr /= 0)                &
-                     call terminate("setIOVar", &
-                     "Memory allocation failure for w")
-             enddo
+            spectralLoopCorr: do mm = 1, nTimeIntervalsSpectral
 
-          enddo
+                ! Initialize the local direction to zero. In the loop
+                ! below this direction will be accumulated.
 
-       else testAllocSolRead
+                dirLoc = zero
 
-          ! A restart is made using either 1 solution or the correct
-          ! number of solution instances. In both cases simply set
-          ! the pointers of IOVar. No pointer offset is needed.
+                ! Loop over the number of blocks and determine the average
+                ! velocity direction prescribed at the inlets.
 
-          do nn=1,nDom
-             do mm=1,nSolsRead
-                IOVar(nn,mm)%pointerOffset = 0
-                IOVar(nn,mm)%w => flowDoms(nn,1,mm)%w(1:,1:,1:,:)
-             enddo
-          enddo
+                domainLoop1: do nn = 1, nDom
 
-       endif testAllocSolRead
+                    ! Set the pointer for the BCData to make the code more
+                    ! readable.
 
-    end select
+                    BCData => flowDoms(nn, mgStartlevel, mm)%BCData
 
-  end subroutine setIOVar
+                    ! Loop over the number of boundary subfaces and update
+                    ! the local prescribed velocity direction.
 
-  subroutine setPressureAndComputeEnergy(halosRead)
-    !
-    !       Due to the usage of the variable IOVar, which generalizes the
-    !       IO and leads to reuse of code, currently the pressure is
-    !       stored at the position of rhoE. In this routine that data is
-    !       copied to the pressure array and the total energy is computed.
-    !       Note that this routine is only called when a restart is done.
-    !
-    use constants
-    use blockPointers, only : nDom, p, w, il, jl, kl
-    use flowVarRefState, only : kPresent
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers
-    use flowUtils, only : computeEtotBlock
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: halosRead
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: sps, nn, nHalo
-    integer(kind=intType) :: i, j, k
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    ! Set the value of nHalo, depending whether or not the halo cells
-    ! have been read from the restart file.
-
-    nHalo = 0
-    if( halosRead ) nHalo = 1
-
-    ! Loop over the number of time instances and the local blocks.
-    ! As this routine is only called when a restart is performed,
-    ! the MG start level is the finest level.
-
-    do sps=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-
-          ! Set the pointers to the correct block. As this routine is
-          ! only called when a restart is performed, the MG start level
-          ! is the finest level.
-
-          call setPointers(nn,1_intType,sps)
-
-          ! Determine the range for which the pressure must be computed.
-
-          iBeg = 2 -nHalo; jBeg =  2-nHalo; kBeg =  2-nHalo
-          iEnd = il+nHalo; jEnd = jl+nHalo; kEnd = kl+nHalo
-
-          ! Copy the pressure for the required cells.
-
-          do k=kBeg,kEnd
-             do j=jBeg,jEnd
-                do i=iBeg,iEnd
-                   p(i,j,k) = w(i,j,k,irhoE)
-                enddo
-             enddo
-          enddo
-
-          ! Compute the total energy as well.
-
-          call computeEtotBlock(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd, kPresent)
-       enddo
-    enddo
-
-  end subroutine setPressureAndComputeEnergy
-  subroutine setRestartFiles(fileName, i)
-    !
-    !          Populates the restartfiles
-    !          The array is populated from Python using setRestartFiles
-    !
-    use constants
-    use inputIO, only : restartFiles
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    character(len=*), intent(inout) :: fileName
-    integer(kind=intType) :: i
-
-
-
-    restartFiles(i) = fileName
-
-  end subroutine setRestartFiles
-
-  subroutine setUniformFlow
-    !
-    !       setUniformFlow set the flow variables of all local blocks on
-    !       the start level to the uniform flow field.
-    !
-    use constants
-    use blockPointers, only : w, dw, fw, flowDoms, ib, jb, kb, &
-         rev, rlv, nDom, BCData, p
-    use communication
-    use flowVarRefState, only : eddyModel, viscous, muInf, nw, nwf, &
-         pInfCorr, wInf
-    use inputIteration, only : mgStartLevel
-    use inputPhysics, only : equationMode, flowType, eddyVisInfRatio
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+                    do l = 1, flowDoms(nn, mgStartlevel, mm)%nBocos
+                        call velMagnAndDirectionSubface(tmp, dirLoc, BCData, l)
+                    end do
+                end do domainLoop1
 
-    integer(kind=intType) :: nn, mm, i, j, k, l
+                ! Determine the sum of dirLoc and create a unit vector
+                ! for the global direction.
 
-    real(kind=realType) :: tmp
+                call mpi_allreduce(dirLoc, dirGlob, 3, adflow_real, mpi_sum, &
+                                   ADflow_comm_world, ierr)
 
-    real(kind=realType), dimension(3) :: dirLoc, dirGlob
+                tmp = one / max(eps, sqrt(dirGlob(1)**2 &
+                                          + dirGlob(2)**2 &
+                                          + dirGlob(3)**2))
+                dirGlob(1) = tmp * dirGlob(1)
+                dirGlob(2) = tmp * dirGlob(2)
+                dirGlob(3) = tmp * dirGlob(3)
 
-    ! Loop over the number of spectral solutions and blocks.
-    spectralLoop: do mm=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
+                ! Loop again over the local domains and correct the
+                ! velocity direction.
 
-          ! Set the pointers for this block.
+                domainsLoop2: do nn = 1, nDom
 
-          call setPointers(nn,mgStartlevel,mm)
+                    ! Set the pointers for this block.
 
-          ! Set the w-variables to the ones of the uniform flow field.
-          do l=1,nw
-             do k=0,kb
-                do j=0,jb
-                   do i=0,ib
-                      w(i,j,k,l) = wInf(l)
-                      dw(i,j,k,l) = zero
-                   enddo
-                enddo
-             enddo
-          enddo
-          !set this here for a reinitialize flow to eliminate possible NAN's
-          do l=1,nwf
-             do k=0,kb
-                do j=0,jb
-                   do i=0,ib
-                      fw(i,j,k,l) = zero
-                   enddo
-                enddo
-             enddo
-          enddo
+                    call setPointers(nn, mgStartlevel, mm)
 
-          ! Set the pressure.
+                    do k = 0, kb
+                        do j = 0, jb
+                            do i = 0, ib
+                                tmp = sqrt(w(i, j, k, ivx)**2 &
+                                           + w(i, j, k, ivy)**2 &
+                                           + w(i, j, k, ivz)**2)
+                                w(i, j, k, ivx) = tmp * dirGlob(1)
+                                w(i, j, k, ivy) = tmp * dirGlob(2)
+                                w(i, j, k, ivz) = tmp * dirGlob(3)
+                            end do
+                        end do
+                    end do
 
-          p = pInfCorr
+                end do domainsLoop2
+            end do spectralLoopCorr
 
-          ! Initialize the laminar and eddy viscosity, if appropriate,
-          ! such that no uninitialized memory is present.
+        end if testCorrection
 
-          if( viscous )   rlv = muInf
-          if( eddyModel ) rev = eddyVisInfRatio*muInf
+    end subroutine setUniformFlow
 
-       enddo domains
-    enddo spectralLoop
+    !=================================================================
 
-    ! Correct for the time spectral method in combination with an
-    ! internal flow computation the velocity direction.
-    ! It is possible that the prescribed direction is different
-    ! for every time instance.
+    subroutine velMagnAndDirectionSubface(vmag, dir, BCData, mm)
+        !
+        !       VelMagnAndDirectionSubface determines the maximum value
+        !       of the magnitude of the velocity as well as the sum of the
+        !       flow directions for the currently active subface.
+        !
+        use constants
+        use block
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: mm
 
-    testCorrection: if(equationMode == timeSpectral .and. &
-         flowType     == internalFlow) then
+        real(kind=realType), intent(out) :: vmag
+        real(kind=realType), dimension(3), intent(inout) :: dir
 
-       ! Loop over the number of spectral solutions.
+        type(BCDataType), dimension(:), pointer :: BCData
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j
+        real(kind=realType) :: vel
 
-       spectralLoopCorr: do mm=1,nTimeIntervalsSpectral
+        ! Initialize vmag to -1.0.
 
-          ! Initialize the local direction to zero. In the loop
-          ! below this direction will be accumulated.
+        vmag = -one
 
-          dirLoc = zero
+        ! Check if the velocity is prescribed.
 
-          ! Loop over the number of blocks and determine the average
-          ! velocity direction prescribed at the inlets.
+        if (associated(BCData(mm)%velx) .and. &
+            associated(BCData(mm)%vely) .and. &
+            associated(BCData(mm)%velz)) then
 
-          domainLoop1: do nn=1,nDom
+            ! Loop over the owned faces of the subface. As the cell range
+            ! may contain halo values, the nodal range is used.
 
-             ! Set the pointer for the BCData to make the code more
-             ! readable.
+            do j = (BCData(mm)%jnBeg + 1), BCData(mm)%jnEnd
+                do i = (BCData(mm)%inBeg + 1), BCData(mm)%inEnd
 
-             BCData => flowDoms(nn,mgStartlevel,mm)%BCData
+                    ! Compute the magnitude of the velocity and compare it
+                    ! with the current maximum. Store the maximum of the two.
 
-             ! Loop over the number of boundary subfaces and update
-             ! the local prescribed velocity direction.
+                    vel = sqrt(BCData(mm)%velx(i, j)**2 &
+                               + BCData(mm)%vely(i, j)**2 &
+                               + BCData(mm)%velz(i, j)**2)
+                    vmag = max(vmag, vel)
 
-             do l=1,flowDoms(nn,mgStartlevel,mm)%nBocos
-                call velMagnAndDirectionSubface(tmp, dirLoc, BCData, l)
-             enddo
-          enddo domainLoop1
+                    ! Compute the unit vector of the velocity and add it to dir.
 
-          ! Determine the sum of dirLoc and create a unit vector
-          ! for the global direction.
+                    vel = one / max(eps, vel)
+                    dir(1) = dir(1) + vel * BCData(mm)%velx(i, j)
+                    dir(2) = dir(2) + vel * BCData(mm)%vely(i, j)
+                    dir(3) = dir(3) + vel * BCData(mm)%velz(i, j)
 
-          call mpi_allreduce(dirLoc, dirGlob, 3, adflow_real, mpi_sum, &
-               ADflow_comm_world, ierr)
+                end do
+            end do
+        end if
 
-          tmp         = one/max(eps,sqrt(dirGlob(1)**2 &
-               +                  dirGlob(2)**2 &
-               +                  dirGlob(3)**2))
-          dirGlob(1) = tmp*dirGlob(1)
-          dirGlob(2) = tmp*dirGlob(2)
-          dirGlob(3) = tmp*dirGlob(3)
+        ! Check if the velocity direction is prescribed.
 
-          ! Loop again over the local domains and correct the
-          ! velocity direction.
+        if (associated(BCData(mm)%flowXdirInlet) .and. &
+            associated(BCData(mm)%flowYdirInlet) .and. &
+            associated(BCData(mm)%flowZdirInlet)) then
 
-          domainsLoop2: do nn=1,nDom
+            ! Add the unit vectors to dir by looping over the owned
+            ! faces of the subfaces. Again the nodal range must be
+            ! used for this.
 
-             ! Set the pointers for this block.
+            do j = (BCData(mm)%jnBeg + 1), BCData(mm)%jnEnd
+                do i = (BCData(mm)%inBeg + 1), BCData(mm)%inEnd
 
-             call setPointers(nn,mgStartlevel,mm)
+                    dir(1) = dir(1) + BCData(mm)%flowXdirInlet(i, j)
+                    dir(2) = dir(2) + BCData(mm)%flowYdirInlet(i, j)
+                    dir(3) = dir(3) + BCData(mm)%flowZdirInlet(i, j)
 
-             do k=0,kb
-                do j=0,jb
-                   do i=0,ib
-                      tmp          = sqrt(w(i,j,k,ivx)**2 &
-                           +      w(i,j,k,ivy)**2 &
-                           +      w(i,j,k,ivz)**2)
-                      w(i,j,k,ivx) = tmp*dirGlob(1)
-                      w(i,j,k,ivy) = tmp*dirGlob(2)
-                      w(i,j,k,ivz) = tmp*dirGlob(3)
-                   enddo
-                enddo
-             enddo
+                end do
+            end do
 
-          enddo domainsLoop2
-       enddo spectralLoopCorr
+        end if
 
-    endif testCorrection
+    end subroutine velMagnAndDirectionSubface
+    subroutine timeSpectralCoef(coefSpectral, matrixCoefSpectral, &
+                                diagMatCoefSpectral)
+        !
+        !       timeSpectralCoef computes the time integration coefficients
+        !       for the time spectral method. As it is possible that sections
+        !       have different periodic times these coefficients are
+        !       determined for all the sections. For vector quantities, such
+        !       as momentum, these coefficients can also be different due to
+        !       rotation and the fact that only a part of the wheel is
+        !       simulated.
+        !
+        use constants
+        use flowVarRefState, only: timeRef
+        use inputTimeSpectral, only: nTimeIntervalsSpectral, rotMatrixSpectral
+        use section, only: nSections, sections
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), &
+            dimension(nSections, nTimeIntervalsSpectral - 1), &
+            intent(out) :: coefSpectral
+        real(kind=realType), &
+            dimension(nSections, nTimeIntervalsSpectral - 1, 3, 3), &
+            intent(out) :: matrixCoefSpectral
+        real(kind=realType), dimension(nSections, 3, 3), &
+            intent(out) :: diagMatCoefSpectral
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: pp, nn, mm, ii, i, j, ntot
+        real(kind=realType) :: coef, dAngle, angle, fact, slicesFact
 
-  end subroutine setUniformFlow
+        real(kind=realType), dimension(3, 3) :: rotMat, tmp
 
-  !=================================================================
+        ! Loop over the number of sections.
 
-  subroutine velMagnAndDirectionSubface(vmag, dir, BCData, mm)
-    !
-    !       VelMagnAndDirectionSubface determines the maximum value
-    !       of the magnitude of the velocity as well as the sum of the
-    !       flow directions for the currently active subface.
-    !
-    use constants
-    use block
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: mm
-
-    real(kind=realType), intent(out) :: vmag
-    real(kind=realType), dimension(3), intent(inout) :: dir
-
-    type(BCDataType), dimension(:), pointer :: BCData
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j
-    real(kind=realType)   :: vel
-
-    ! Initialize vmag to -1.0.
-
-    vmag = -one
-
-    ! Check if the velocity is prescribed.
-
-    if( associated(BCData(mm)%velx) .and. &
-         associated(BCData(mm)%vely) .and. &
-         associated(BCData(mm)%velz) ) then
-
-       ! Loop over the owned faces of the subface. As the cell range
-       ! may contain halo values, the nodal range is used.
-
-       do j=(BCData(mm)%jnBeg+1),BCData(mm)%jnEnd
-          do i=(BCData(mm)%inBeg+1),BCData(mm)%inEnd
-
-             ! Compute the magnitude of the velocity and compare it
-             ! with the current maximum. Store the maximum of the two.
-
-             vel  = sqrt(BCData(mm)%velx(i,j)**2 &
-                  +      BCData(mm)%vely(i,j)**2 &
-                  +      BCData(mm)%velz(i,j)**2)
-             vmag = max(vmag, vel)
-
-             ! Compute the unit vector of the velocity and add it to dir.
-
-             vel    = one/max(eps,vel)
-             dir(1) = dir(1) + vel*BCData(mm)%velx(i,j)
-             dir(2) = dir(2) + vel*BCData(mm)%vely(i,j)
-             dir(3) = dir(3) + vel*BCData(mm)%velz(i,j)
-
-          enddo
-       enddo
-    endif
-
-    ! Check if the velocity direction is prescribed.
-
-    if( associated(BCData(mm)%flowXdirInlet) .and. &
-         associated(BCData(mm)%flowYdirInlet) .and. &
-         associated(BCData(mm)%flowZdirInlet) ) then
-
-       ! Add the unit vectors to dir by looping over the owned
-       ! faces of the subfaces. Again the nodal range must be
-       ! used for this.
-
-       do j=(BCData(mm)%jnBeg+1),BCData(mm)%jnEnd
-          do i=(BCData(mm)%inBeg+1),BCData(mm)%inEnd
-
-             dir(1) = dir(1) + BCData(mm)%flowXdirInlet(i,j)
-             dir(2) = dir(2) + BCData(mm)%flowYdirInlet(i,j)
-             dir(3) = dir(3) + BCData(mm)%flowZdirInlet(i,j)
-
-          enddo
-       enddo
-
-    endif
-
-  end subroutine velMagnAndDirectionSubface
-  subroutine timeSpectralCoef(coefSpectral, matrixCoefSpectral, &
-       diagMatCoefSpectral)
-    !
-    !       timeSpectralCoef computes the time integration coefficients
-    !       for the time spectral method. As it is possible that sections
-    !       have different periodic times these coefficients are
-    !       determined for all the sections. For vector quantities, such
-    !       as momentum, these coefficients can also be different due to
-    !       rotation and the fact that only a part of the wheel is
-    !       simulated.
-    !
-    use constants
-    use flowVarRefState, only : timeRef
-    use inputTimeSpectral, only : nTimeIntervalsSpectral, rotMatrixSpectral
-    use section, only : nSections, sections
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    real(kind=realType),                               &
-         dimension(nSections,nTimeIntervalsSpectral-1), &
-         intent(out) :: coefSpectral
-    real(kind=realType),                                    &
-         dimension(nSections,nTimeIntervalsSpectral-1,3,3), &
-         intent(out) :: matrixCoefSpectral
-    real(kind=realType), dimension(nSections,3,3), &
-         intent(out) :: diagMatCoefSpectral
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: pp, nn, mm, ii, i, j, ntot
-    real(kind=realType)   :: coef, dAngle, angle, fact, slicesFact
+        sectionLoop: do mm = 1, nSections
 
-    real(kind=realType), dimension(3,3) :: rotMat, tmp
+            ! Initialize dAngle (smallest angle in the cotangent function)
+            ! and coef, which is the multiplication factor in front of the
+            ! cotangent/cosecant function. Coef is a combination of the 1/2
+            ! and the 2*pi/timePeriod
 
-    ! Loop over the number of sections.
+            dAngle = pi / real(nTimeIntervalsSpectral, realType)
+            coef = pi * timeRef / sections(mm)%timePeriod
 
-    sectionLoop: do mm=1,nSections
+            ! Computation of the scalar coefficients.
 
-       ! Initialize dAngle (smallest angle in the cotangent function)
-       ! and coef, which is the multiplication factor in front of the
-       ! cotangent/cosecant function. Coef is a combination of the 1/2
-       ! and the 2*pi/timePeriod
+            scalarLoop: do nn = 1, (nTimeIntervalsSpectral - 1)
 
-       dAngle = pi/real(nTimeIntervalsSpectral,realType)
-       coef   = pi*timeRef/sections(mm)%timePeriod
+                angle = nn * dAngle
 
-       ! Computation of the scalar coefficients.
+                ! The coefficient for an odd and even number of time
+                ! instances are different; the former is 1/sin, the
+                ! latter cos/sin or 1/tan.
 
-       scalarLoop: do nn=1,(nTimeIntervalsSpectral-1)
+                coefSpectral(mm, nn) = coef / sin(angle)
 
-          angle = nn*dAngle
+                if (mod(nTimeIntervalsSpectral, 2_intType) == 0) &
+                    coefSpectral(mm, nn) = coefSpectral(mm, nn) * cos(angle)
 
-          ! The coefficient for an odd and even number of time
-          ! instances are different; the former is 1/sin, the
-          ! latter cos/sin or 1/tan.
+                ! Negate coef for the next spectral coefficient.
 
-          coefSpectral(mm,nn) = coef/sin(angle)
+                coef = -coef
 
-          if (mod(nTimeIntervalsSpectral,2_intType) == 0) &
-               coefSpectral(mm,nn) = coefSpectral(mm,nn)*cos(angle)
+            end do scalarLoop
 
-          ! Negate coef for the next spectral coefficient.
+            ! Initialize dAngle to the smallest angle in the cotangent
+            ! or cosecant function. Now this angle is for the entire wheel,
+            ! i.e. the number of slices must be taken into account.
 
-          coef = -coef
+            ntot = nTimeIntervalsSpectral * sections(mm)%nSlices
+            dAngle = pi / real(ntot, realType)
 
-       enddo scalarLoop
+            ! Initialize the rotation matrix to the unity matrix.
 
-       ! Initialize dAngle to the smallest angle in the cotangent
-       ! or cosecant function. Now this angle is for the entire wheel,
-       ! i.e. the number of slices must be taken into account.
+            rotMat(1, 1) = one; rotMat(1, 2) = zero; rotMat(1, 3) = zero
+            rotMat(2, 1) = zero; rotMat(2, 2) = one; rotMat(2, 3) = zero
+            rotMat(3, 1) = zero; rotMat(3, 2) = zero; rotMat(3, 3) = one
 
-       ntot   = nTimeIntervalsSpectral*sections(mm)%nSlices
-       dAngle = pi/real(ntot,realType)
+            ! Loop over the number of spectral coefficient to initialize the
+            ! matrix coefficients; this is basically pp == 0 in the loop
+            ! over the number slices. Use is made of the fact that the
+            ! rotation matrix is the identity for pp == 0.
+            ! coef changes sign at every time instance
 
-       ! Initialize the rotation matrix to the unity matrix.
+            slicesFact = one / real(sections(mm)%nSlices, realType)
+            fact = one
 
-       rotMat(1,1) = one;  rotMat(1,2) = zero; rotMat(1,3) = zero
-       rotMat(2,1) = zero; rotMat(2,2) = one;  rotMat(2,3) = zero
-       rotMat(3,1) = zero; rotMat(3,2) = zero; rotMat(3,3) = one
+            do nn = 1, (nTimeIntervalsSpectral - 1)
 
-       ! Loop over the number of spectral coefficient to initialize the
-       ! matrix coefficients; this is basically pp == 0 in the loop
-       ! over the number slices. Use is made of the fact that the
-       ! rotation matrix is the identity for pp == 0.
-       ! coef changes sign at every time instance
+                ! Determine the scalar coefficient. This value depends now
+                ! whether the total number of time instances in the wheel is
+                ! odd or even.
 
-       slicesFact = one/real(sections(mm)%nSlices,realType)
-       fact = one
+                angle = nn * dAngle
+                coef = one / sin(angle)
 
-       do nn=1,(nTimeIntervalsSpectral-1)
+                if (mod(ntot, 2_intType) == 0) &
+                    coef = coef * cos(angle)
 
-          ! Determine the scalar coefficient. This value depends now
-          ! whether the total number of time instances in the wheel is
-          ! odd or even.
+                coef = coef * fact * slicesFact
 
-          angle = nn*dAngle
-          coef  = one/sin(angle)
+                ! The first part of matrixCoefSpectral is a diagonal matrix,
+                ! because this indicates the contribution of the current
+                ! slice to the time derivative.
 
-          if (mod(ntot,2_intType) == 0) &
-               coef = coef*cos(angle)
+                matrixCoefSpectral(mm, nn, 1, 1) = coef
+                matrixCoefSpectral(mm, nn, 1, 2) = zero
+                matrixCoefSpectral(mm, nn, 1, 3) = zero
 
-          coef = coef*fact*slicesFact
+                matrixCoefSpectral(mm, nn, 2, 1) = zero
+                matrixCoefSpectral(mm, nn, 2, 2) = coef
+                matrixCoefSpectral(mm, nn, 2, 3) = zero
 
-          ! The first part of matrixCoefSpectral is a diagonal matrix,
-          ! because this indicates the contribution of the current
-          ! slice to the time derivative.
+                matrixCoefSpectral(mm, nn, 3, 1) = zero
+                matrixCoefSpectral(mm, nn, 3, 2) = zero
+                matrixCoefSpectral(mm, nn, 3, 3) = coef
 
-          matrixCoefSpectral(mm,nn,1,1) = coef
-          matrixCoefSpectral(mm,nn,1,2) = zero
-          matrixCoefSpectral(mm,nn,1,3) = zero
+                fact = -fact
 
-          matrixCoefSpectral(mm,nn,2,1) = zero
-          matrixCoefSpectral(mm,nn,2,2) = coef
-          matrixCoefSpectral(mm,nn,2,3) = zero
+            end do
 
-          matrixCoefSpectral(mm,nn,3,1) = zero
-          matrixCoefSpectral(mm,nn,3,2) = zero
-          matrixCoefSpectral(mm,nn,3,3) = coef
+            ! Initialize diagMatCoefSpectral to zero, because the
+            ! starting index in the loop over the number of slices -1 is
+            ! 1, i.e. the slice where the actual computation takes places
+            ! does not contribute to diagMatCoefSpectral.
 
-          fact = -fact
+            do j = 1, 3
+                do i = 1, 3
+                    diagMatCoefSpectral(mm, i, j) = zero
+                end do
+            end do
 
-       enddo
+            ! Loop over the additional slices which complete an entire
+            ! revolution. To be able to compute the coefficients a bit
+            ! easier the loop runs from 1 to nSlices-1 and not from
+            ! 2 to nSlices.
 
-       ! Initialize diagMatCoefSpectral to zero, because the
-       ! starting index in the loop over the number of slices -1 is
-       ! 1, i.e. the slice where the actual computation takes places
-       ! does not contribute to diagMatCoefSpectral.
+            slicesLoop: do pp = 1, (sections(mm)%nSlices - 1)
 
-       do j=1,3
-          do i=1,3
-             diagMatCoefSpectral(mm,i,j) = zero
-          enddo
-       enddo
+                ! Compute the rotation matrix for this slice. This is the
+                ! old one multiplied by the transformation matrix going from
+                ! one slices to the next. Use tmp as temporary storage.
 
-       ! Loop over the additional slices which complete an entire
-       ! revolution. To be able to compute the coefficients a bit
-       ! easier the loop runs from 1 to nSlices-1 and not from
-       ! 2 to nSlices.
+                do j = 1, 3
+                    do i = 1, 3
+                        tmp(i, j) = rotMatrixSpectral(mm, i, 1) * rotMat(1, j) &
+                                    + rotMatrixSpectral(mm, i, 2) * rotMat(2, j) &
+                                    + rotMatrixSpectral(mm, i, 3) * rotMat(3, j)
+                    end do
+                end do
 
-       slicesLoop: do pp=1,(sections(mm)%nSlices-1)
+                rotMat = tmp
 
-          ! Compute the rotation matrix for this slice. This is the
-          ! old one multiplied by the transformation matrix going from
-          ! one slices to the next. Use tmp as temporary storage.
+                slicesFact = one / real(sections(mm)%nSlices, realType)
 
-          do j=1,3
-             do i=1,3
-                tmp(i,j) = rotMatrixSpectral(mm,i,1)*rotMat(1,j) &
-                     + rotMatrixSpectral(mm,i,2)*rotMat(2,j) &
-                     + rotMatrixSpectral(mm,i,3)*rotMat(3,j)
-             enddo
-          enddo
+                ! Loop over the number of spectral coefficients and update
+                ! matrixCoefSpectral. The multiplication with (-1)**nn
+                ! takes place here too.
 
-          rotMat = tmp
+                ! Multiply also by the term (-1)**(pN+1)
 
-          slicesFact = one/real(sections(mm)%nSlices,realType)
+                fact = one
+                if (mod(pp * nTimeIntervalsSpectral, 2_intType) /= 0) &
+                    fact = -one
+                slicesFact = fact * slicesFact
 
-          ! Loop over the number of spectral coefficients and update
-          ! matrixCoefSpectral. The multiplication with (-1)**nn
-          ! takes place here too.
+                fact = one
+                ii = pp * nTimeIntervalsSpectral
+                do nn = 1, (nTimeIntervalsSpectral - 1)
 
-          ! Multiply also by the term (-1)**(pN+1)
+                    ! Compute the coefficient multiplying the rotation matrix.
+                    ! Again make a distinction between an odd and an even
+                    ! number of time instances for the entire wheel.
 
-          fact = one
-          if (mod(pp*nTimeIntervalsSpectral,2_intType) /= 0) &
-               fact = -one
-          slicesFact = fact*slicesFact
+                    angle = (nn + ii) * dAngle
+                    coef = one / sin(angle)
 
-          fact = one
-          ii = pp*nTimeIntervalsSpectral
-          do nn=1,(nTimeIntervalsSpectral-1)
+                    if (mod(ntot, 2_intType) == 0) &
+                        coef = coef * cos(angle)
 
-             ! Compute the coefficient multiplying the rotation matrix.
-             ! Again make a distinction between an odd and an even
-             ! number of time instances for the entire wheel.
+                    coef = coef * fact * slicesFact
 
-             angle = (nn+ii)*dAngle
-             coef  = one/sin(angle)
+                    ! Update matrixCoefSpectral.
 
-             if (mod(ntot,2_intType) == 0) &
-                  coef = coef*cos(angle)
+                    do j = 1, 3
+                        do i = 1, 3
+                            matrixCoefSpectral(mm, nn, i, j) = &
+                                matrixCoefSpectral(mm, nn, i, j) + coef * rotMat(i, j)
+                        end do
+                    end do
 
-             coef = coef*fact*slicesFact
+                    fact = -fact
 
-             ! Update matrixCoefSpectral.
+                end do
 
-             do j=1,3
-                do i=1,3
-                   matrixCoefSpectral(mm,nn,i,j) = &
-                        matrixCoefSpectral(mm,nn,i,j) + coef*rotMat(i,j)
-                enddo
-             enddo
+                ! Update diagMatCoefSpectral. Also here the distinction
+                ! between odd and even number of time instances.
 
-             fact = -fact
+                angle = ii * dAngle
+                coef = one / sin(angle)
 
-          enddo
+                if (mod(ntot, 2_intType) == 0) &
+                    coef = coef * cos(angle)
 
-          ! Update diagMatCoefSpectral. Also here the distinction
-          ! between odd and even number of time instances.
+                coef = coef * slicesFact
 
-          angle = ii*dAngle
-          coef  = one/sin(angle)
+                do j = 1, 3
+                    do i = 1, 3
+                        diagMatCoefSpectral(mm, i, j) = &
+                            diagMatCoefSpectral(mm, i, j) - coef * rotMat(i, j)
+                    end do
+                end do
 
-          if (mod(ntot,2_intType) == 0) &
-               coef = coef*cos(angle)
+            end do slicesLoop
 
-          coef = coef*slicesFact
+            ! The matrix coefficients must be multiplied by the leading
+            ! coefficient, which depends on the actual periodic time.
 
-          do j=1,3
-             do i=1,3
-                diagMatCoefSpectral(mm,i,j) = &
-                     diagMatCoefSpectral(mm,i,j) - coef*rotMat(i,j)
-             enddo
-          enddo
+            coef = pi * timeRef / sections(mm)%timePeriod
 
-       enddo slicesLoop
+            do j = 1, 3
+                do i = 1, 3
+                    diagMatCoefSpectral(mm, i, j) = &
+                        coef * diagMatCoefSpectral(mm, i, j)
+                end do
+            end do
 
-       ! The matrix coefficients must be multiplied by the leading
-       ! coefficient, which depends on the actual periodic time.
+            do nn = 1, (nTimeIntervalsSpectral - 1)
 
-       coef = pi*timeRef/sections(mm)%timePeriod
+                do j = 1, 3
+                    do i = 1, 3
+                        matrixCoefSpectral(mm, nn, i, j) = &
+                            coef * matrixCoefSpectral(mm, nn, i, j)
+                    end do
+                end do
 
-       do j=1,3
-          do i=1,3
-             diagMatCoefSpectral(mm,i,j) = &
-                  coef*diagMatCoefSpectral(mm,i,j)
-          enddo
-       enddo
+            end do
 
-       do nn=1,(nTimeIntervalsSpectral-1)
+        end do sectionLoop
 
-          do j=1,3
-             do i=1,3
-                matrixCoefSpectral(mm,nn,i,j) = &
-                     coef*matrixCoefSpectral(mm,nn,i,j)
-             enddo
-          enddo
+    end subroutine timeSpectralCoef
 
-       end do
+    subroutine timeSpectralMatrices
+        !
+        !       timeSpectralMatrices computes the matrices for the time
+        !       derivative of the time spectral method for all sections. For
+        !       scalar quantities these matrices only differ if sections have
+        !       different periodic times. For vector quantities, such as
+        !       momentum, these matrices can be different depending on whether
+        !       the section is rotating or not and the number of slices
+        !       present.
+        !
+        use constants
+        use inputPhysics, only: equationMode
+        use inputTimeSpectral, only: nTimeIntervalsSpectral, dscalar, dvector, &
+                                     rotMatrixSpectral
+        use section, only: nSections
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-    enddo sectionLoop
+        integer(kind=intType) :: nn, mm, ll, kk, ii
+        integer(kind=intType) :: i, j
 
-  end subroutine timeSpectralCoef
+        real(kind=realType), dimension(3, 3) :: tmpMat
 
-  subroutine timeSpectralMatrices
-    !
-    !       timeSpectralMatrices computes the matrices for the time
-    !       derivative of the time spectral method for all sections. For
-    !       scalar quantities these matrices only differ if sections have
-    !       different periodic times. For vector quantities, such as
-    !       momentum, these matrices can be different depending on whether
-    !       the section is rotating or not and the number of slices
-    !       present.
-    !
-    use constants
-    use inputPhysics, only : equationMode
-    use inputTimeSpectral, only : nTimeIntervalsSpectral, dscalar, dvector, &
-         rotMatrixSpectral
-    use section, only: nSections
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        real(kind=realType), dimension(:, :), allocatable :: coefSpectral
+        real(kind=realType), dimension(:, :, :, :), allocatable :: &
+            matrixCoefSpectral
+        real(kind=realType), dimension(:, :, :), allocatable :: &
+            diagMatCoefSpectral
+        !
+        ! This routine is only used for the spectral solutions. Return
+        ! immediately if a different mode is solved.
 
-    integer(kind=intType) :: nn, mm, ll, kk, ii
-    integer(kind=intType) :: i, j
+        if (equationMode /= timeSpectral) return
 
-    real(kind=realType), dimension(3,3) :: tmpMat
+        ! Allocate the memory for the matrices as well as the help
+        ! variables needed to construct these matrices.
 
-    real(kind=realType), dimension(:,:), allocatable :: coefSpectral
-    real(kind=realType), dimension(:,:,:,:), allocatable :: &
-         matrixCoefSpectral
-    real(kind=realType), dimension(:,:,:), allocatable :: &
-         diagMatCoefSpectral
-    !
-    ! This routine is only used for the spectral solutions. Return
-    ! immediately if a different mode is solved.
+        !added to allow second call in mdUpdateRoutines
+        if (allocated(dscalar)) deallocate (dscalar)
+        if (allocated(dvector)) deallocate (dvector)
 
-    if(equationMode /= timeSpectral) return
+        nn = nTimeIntervalsSpectral
+        mm = 3 * nn
+        kk = nn - 1
 
-    ! Allocate the memory for the matrices as well as the help
-    ! variables needed to construct these matrices.
+        allocate (dscalar(nSections, nn, nn), &
+                  dvector(nSections, mm, mm), &
+                  coefSpectral(nSections, kk), &
+                  matrixCoefSpectral(nSections, kk, 3, 3), &
+                  diagMatCoefSpectral(nSections, 3, 3), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("timeSpectralMatrices", &
+             "Memory allocation failure for the matrices of &
+             &the spectral time derivatives.")
 
-    !added to allow second call in mdUpdateRoutines
-    if( allocated(dscalar))deallocate(dscalar)
-    if( allocated(dvector))deallocate(dvector)
+        ! Determine the help variables needed to construct the
+        ! actual matrices.
 
-    nn = nTimeIntervalsSpectral
-    mm = 3*nn
-    kk = nn - 1
+        call timeSpectralCoef(coefSpectral, matrixCoefSpectral, &
+                              diagMatCoefSpectral)
+        !
+        !       Determine the time derivative matrices for the sections.
+        !
+        ! Loop over the number of sections.
 
+        sectionLoop: do ii = 1, nSections
+            !
+            !         Matrix for scalar quantities.
+            !
+            ! Loop over the number of rows.
 
+            do nn = 1, nTimeIntervalsSpectral
 
-    allocate(dscalar(nSections,nn,nn),             &
-         dvector(nSections,mm,mm),             &
-         coefSpectral(nSections,kk),           &
-         matrixCoefSpectral(nSections,kk,3,3), &
-         diagMatCoefSpectral(nSections,3,3), stat=ierr)
-    if(ierr /= 0)                              &
-         call terminate("timeSpectralMatrices", &
-         "Memory allocation failure for the matrices of &
-         &the spectral time derivatives.")
+                ! Set the diagonal element to zero, i.e. there is no
+                ! contribution to the own time derivative.
 
-    ! Determine the help variables needed to construct the
-    ! actual matrices.
+                dscalar(ii, nn, nn) = zero
 
-    call timeSpectralCoef(coefSpectral, matrixCoefSpectral, &
-         diagMatCoefSpectral)
-    !
-    !       Determine the time derivative matrices for the sections.
-    !
-    ! Loop over the number of sections.
+                ! Loop over the rest of the columns.
 
-    sectionLoop: do ii=1,nSections
-       !
-       !         Matrix for scalar quantities.
-       !
-       ! Loop over the number of rows.
+                do mm = 1, (nTimeIntervalsSpectral - 1)
 
-       do nn=1,nTimeIntervalsSpectral
+                    ! Determine the corresponding column index.
 
-          ! Set the diagonal element to zero, i.e. there is no
-          ! contribution to the own time derivative.
+                    ll = nn + mm
+                    if (ll > nTimeIntervalsSpectral) &
+                        ll = ll - nTimeIntervalsSpectral
 
-          dscalar(ii,nn,nn) = zero
+                    ! Store the corresponding coefficient in dscalar.
 
-          ! Loop over the rest of the columns.
+                    dscalar(ii, nn, ll) = coefSpectral(ii, mm)
 
-          do mm=1,(nTimeIntervalsSpectral - 1)
+                end do
+            end do
+            !
+            !         Matrices for vector quantities.
+            !
+            ! Loop over the number of time intervals; the number of rows
+            ! is 3 times this number.
+
+            rowLoop: do nn = 1, nTimeIntervalsSpectral
+
+                ! Initialize the diagonal block to diagMatCoefSpectral,
+                ! the additional diagonal entry needed for the rotational
+                ! periodicity.
+
+                kk = 3 * (nn - 1)
+                do j = 1, 3
+                    do i = 1, 3
+                        dvector(ii, kk + i, kk + j) = diagMatCoefSpectral(ii, i, j)
+                    end do
+                end do
 
-             ! Determine the corresponding column index.
+                ! Loop over the other time intervals, which contribute to
+                ! the time derivative.
+
+                columnLoop: do mm = 1, (nTimeIntervalsSpectral - 1)
+
+                    ! Determine the corresponding column index and check the
+                    ! situation we are having here.
+
+                    ll = nn + mm
+                    if (ll > nTimeIntervalsSpectral) then
+
+                        ! Index is outside the range and a shift must be applied.
+
+                        ll = ll - nTimeIntervalsSpectral
+
+                        ! The vector must be rotated. This effect is incorporated
+                        ! directly in the matrix of time derivatives.
+
+                        do j = 1, 3
+                            do i = 1, 3
+                                tmpMat(i, j) = matrixCoefSpectral(ii, mm, i, 1) &
+                                               * rotMatrixSpectral(ii, 1, j) &
+                                               + matrixCoefSpectral(ii, mm, i, 2) &
+                                               * rotMatrixSpectral(ii, 2, j) &
+                                               + matrixCoefSpectral(ii, mm, i, 3) &
+                                               * rotMatrixSpectral(ii, 3, j)
+                            end do
+                        end do
+
+                    else
+
+                        ! Index is in the range. Copy the matrix coefficient
+                        ! into tmpMat.
+
+                        do j = 1, 3
+                            do i = 1, 3
+                                tmpMat(i, j) = matrixCoefSpectral(ii, mm, i, j)
+                            end do
+                        end do
+
+                    end if
+
+                    ! Determine the offset for the column index and store
+                    ! this submatrix in the correct place of dvector.
+
+                    ll = 3 * (ll - 1)
+                    do j = 1, 3
+                        do i = 1, 3
+                            dvector(ii, kk + i, ll + j) = tmpMat(i, j)
+                        end do
+                    end do
+
+                end do columnLoop
+            end do rowLoop
+        end do sectionLoop
+
+        ! Release the memory of the help variables needed to construct
+        ! the matrices of the time derivatives.
+
+        deallocate (coefSpectral, matrixCoefSpectral, &
+                    diagMatCoefSpectral, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("timeSpectralMatrices", &
+                           "Deallocation failure for the help variables.")
+
+    end subroutine timeSpectralMatrices
+
+    subroutine readRestartFile()
+        !
+        !       readRestartFile reads the fine grid solution(s) from the
+        !       restart file(s). If the restart file(s) do not correspond to
+        !       the current mesh, the solution(s) are interpolated onto this
+        !       mesh. It is also allowed to change boundary conditions, e.g.
+        !       an alpha and/or Mach sweep is possible. Furthermore there is
+        !       some support when starting from a different turbulence model,
+        !       although this should be used with care.
+        !
+        use constants
+        use cgnsGrid
+        use su_cgns
+        use variableReading ! Full import since we need basically everything
+        use blockPointers, only: iBegOr, jBegOr, kBegOr, il, jl, kl, nDom, &
+                                 nBKGlobal, nx, ny, nz
+        use communication, only: adflow_comm_world, myid
+        use inputPhysics, only: equationMode
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use monitor, only: nTimeStepsRestart, timeUnsteadyRestart
+        use utils, only: terminate, setPointers
+        use sorting, only: bsearchStrings
+        use commonFormats, only: strings, stringInt1
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: nZones, cellDim, physDim, ierr, nSols
 
-             ll = nn + mm
-             if(ll > nTimeIntervalsSpectral) &
-                  ll = ll - nTimeIntervalsSpectral
+        integer(cgsize_t), dimension(9) :: sizes
+        integer, dimension(9) :: rindSizes
+        integer, dimension(nSolsRead) :: fileIDs
 
-             ! Store the corresponding coefficient in dscalar.
+        integer(kind=intType) :: ii, jj, nn
+        integer(kind=intType) :: nTypeMismatch
+        integer(kind=intType) :: nHiMin, nHjMin, nHkMin
+        integer(kind=intType) :: nHiMax, nHjMax, nHkMax
 
-             dscalar(ii,nn,ll) = coefSpectral(ii,mm)
+        character(len=7) :: integerString
+        character(len=maxCGNSNameLen) :: cgnsName
+        character(len=2*maxStringLen) :: errorMessage
+
+        ! Initialize halosRead to .true. This will be overwritten if
+        ! there is at least one block present for which the halo data
+        ! cannot be read.
+
+        halosRead = .true.
+
+        ! Initialize nTypeMismatch to 0.
+
+        nTypeMismatch = 0
+
+        ! Loop over the number of files to be read and open them.
 
-          enddo
-       enddo
-       !
-       !         Matrices for vector quantities.
-       !
-       ! Loop over the number of time intervals; the number of rows
-       ! is 3 times this number.
-
-       rowLoop: do nn=1,nTimeIntervalsSpectral
-
-          ! Initialize the diagonal block to diagMatCoefSpectral,
-          ! the additional diagonal entry needed for the rotational
-          ! periodicity.
-
-          kk = 3*(nn-1)
-          do j=1,3
-             do i=1,3
-                dvector(ii,kk+i,kk+j) = diagMatCoefSpectral(ii,i,j)
-             enddo
-          enddo
-
-          ! Loop over the other time intervals, which contribute to
-          ! the time derivative.
-
-          columnLoop: do mm=1,(nTimeIntervalsSpectral - 1)
-
-             ! Determine the corresponding column index and check the
-             ! situation we are having here.
-
-             ll = nn + mm
-             if(ll > nTimeIntervalsSpectral) then
-
-                ! Index is outside the range and a shift must be applied.
-
-                ll = ll - nTimeIntervalsSpectral
-
-                ! The vector must be rotated. This effect is incorporated
-                ! directly in the matrix of time derivatives.
-
-                do j=1,3
-                   do i=1,3
-                      tmpMat(i,j) = matrixCoefSpectral(ii,mm,i,1) &
-                           * rotMatrixSpectral(ii,1,j)     &
-                           + matrixCoefSpectral(ii,mm,i,2) &
-                           * rotMatrixSpectral(ii,2,j)     &
-                           + matrixCoefSpectral(ii,mm,i,3) &
-                           * rotMatrixSpectral(ii,3,j)
-                   enddo
-                enddo
-
-             else
-
-                ! Index is in the range. Copy the matrix coefficient
-                ! into tmpMat.
-
-                do j=1,3
-                   do i=1,3
-                      tmpMat(i,j) = matrixCoefSpectral(ii,mm,i,j)
-                   enddo
-                enddo
-
-             endif
-
-             ! Determine the offset for the column index and store
-             ! this submatrix in the correct place of dvector.
-
-             ll = 3*(ll-1)
-             do j=1,3
-                do i=1,3
-                   dvector(ii,kk+i,ll+j) = tmpMat(i,j)
-                enddo
-             enddo
-
-          enddo columnLoop
-       enddo rowLoop
-    enddo sectionLoop
-
-    ! Release the memory of the help variables needed to construct
-    ! the matrices of the time derivatives.
-
-    deallocate(coefSpectral, matrixCoefSpectral, &
-         diagMatCoefSpectral, stat=ierr)
-    if(ierr /= 0)                            &
-         call terminate("timeSpectralMatrices", &
-         "Deallocation failure for the help variables.")
-
-  end subroutine timeSpectralMatrices
-
-
-  subroutine readRestartFile()
-    !
-    !       readRestartFile reads the fine grid solution(s) from the
-    !       restart file(s). If the restart file(s) do not correspond to
-    !       the current mesh, the solution(s) are interpolated onto this
-    !       mesh. It is also allowed to change boundary conditions, e.g.
-    !       an alpha and/or Mach sweep is possible. Furthermore there is
-    !       some support when starting from a different turbulence model,
-    !       although this should be used with care.
-    !
-    use constants
-    use cgnsGrid
-    use su_cgns
-    use variableReading ! Full import since we need basically everything
-    use blockPointers, only: iBegOr, jBegOr, kBegOr, il, jl, kl, nDom, &
-         nBKGlobal, nx, ny, nz
-    use communication, only : adflow_comm_world, myid
-    use inputPhysics, only : equationMode
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use monitor, only : nTimeStepsRestart, timeUnsteadyRestart
-    use utils, only : terminate, setPointers
-    use sorting, only : bsearchStrings
-    use commonFormats, only : strings, stringInt1
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: nZones, cellDim, physDim, ierr, nSols
+        fileOpenLoop: do solID = 1, nSolsRead
 
-    integer(cgsize_t), dimension(9) :: sizes
-    integer, dimension(9) :: rindSizes
-    integer, dimension(nSolsRead) :: fileIDs
+            ! Open the restart file for reading.
 
-    integer(kind=intType) :: ii, jj, nn
-    integer(kind=intType) :: nTypeMismatch
-    integer(kind=intType) :: nHiMin, nHjMin, nHkMin
-    integer(kind=intType) :: nHiMax, nHjMax, nHkMax
+            call cg_open_f(solFiles(solID), mode_read, cgnsInd, ierr)
+            if (ierr /= all_ok) then
+                write (errorMessage, *) "File ", trim(solFiles(solID)), &
+                    " could not be opened for reading"
+                call terminate("readRestartFile", errorMessage)
+            end if
 
-    character(len=7)              :: integerString
-    character(len=maxCGNSNameLen) :: cgnsName
-    character(len=2*maxStringLen) :: errorMessage
+            fileIDs(solID) = cgnsInd
 
-    ! Initialize halosRead to .true. This will be overwritten if
-    ! there is at least one block present for which the halo data
-    ! cannot be read.
+            ! Determine the number of bases in the cgns file.
+            ! This must be at least 1.
 
-    halosRead = .true.
+            call cg_nbases_f(cgnsInd, cgnsBase, ierr)
+            if (ierr /= all_ok) &
+                call terminate("readRestartFile", &
+                               "Something wrong when calling cg_nbases_f")
 
-    ! Initialize nTypeMismatch to 0.
+            if (CGNSBase < 1) then
+                write (errorMessage, *) "CGNS file ", trim(solFiles(solID)), &
+                    " does not contain a base"
+                call terminate("readRestartFile", errorMessage)
+            end if
 
-    nTypeMismatch = 0
+            ! Only data from the first base is read. Information from
+            ! higher bases is ignored.
 
-    ! Loop over the number of files to be read and open them.
+            cgnsBase = 1
 
-    fileOpenLoop: do solID=1,nSolsRead
+            ! Read the cell and physical dimensions as well as the name for
+            ! this base.
 
-       ! Open the restart file for reading.
+            call cg_base_read_f(cgnsInd, cgnsBase, cgnsName, cellDim, &
+                                physDim, ierr)
+            if (ierr /= all_ok) &
+                call terminate("readRestartFile", &
+                               "Something wrong when calling cg_base_read_f")
 
-       call cg_open_f(solFiles(solID), mode_read, cgnsInd, ierr)
-       if(ierr /= all_ok) then
-          write(errorMessage,*) "File ", trim(solFiles(solID)), &
-               " could not be opened for reading"
-          call terminate("readRestartFile", errorMessage)
-       endif
+            ! Check the cell and physical dimensions. Both must be 3 for
+            ! this code to work.
 
-       fileIDs(solID) = cgnsInd
+            if (cellDim /= 3 .or. physDim /= 3) then
+                write (errorMessage, stringInt1) "Both the number of cell and physical dimensions should be 3, not ", &
+                    cellDim, " and ", physDim
+                call terminate("readRestartFile", errorMessage)
+            end if
 
-       ! Determine the number of bases in the cgns file.
-       ! This must be at least 1.
+        end do fileOpenLoop
 
-       call cg_nbases_f(cgnsInd, cgnsBase, ierr)
-       if(ierr /= all_ok)                  &
-            call terminate("readRestartFile", &
-            "Something wrong when calling cg_nbases_f")
+        ! Broadcast nTimeStepsRestart and timeUnsteadyRestart to all
+        ! processors. These values are needed to perform a consistent
+        ! unsteady restart.
 
-       if(CGNSBase < 1) then
-          write(errorMessage,*) "CGNS file ", trim(solFiles(solID)), &
-               " does not contain a base"
-          call terminate("readRestartFile", errorMessage)
-       endif
+        call mpi_bcast(nTimeStepsRestart, 1, adflow_integer, 0, &
+                       ADflow_comm_world, ierr)
+        call mpi_bcast(timeUnsteadyRestart, 1, adflow_real, 0, &
+                       ADflow_comm_world, ierr)
 
-       ! Only data from the first base is read. Information from
-       ! higher bases is ignored.
+        ! Get the scaling factors for density, pressure and velocity
+        ! by reading the reference state.
 
-       cgnsBase = 1
+        call scaleFactors(fileIDs)
 
-       ! Read the cell and physical dimensions as well as the name for
-       ! this base.
+        ! Loop over the number of files to be read and read the solution.
 
-       call cg_base_read_f(cgnsInd, cgnsBase, cgnsName, cellDim, &
-            physDim, ierr)
-       if(ierr /= all_ok)                  &
-            call terminate("readRestartFile", &
-            "Something wrong when calling cg_base_read_f")
+        solLoop: do solID = 1, nSolsRead
 
-       ! Check the cell and physical dimensions. Both must be 3 for
-       ! this code to work.
+            ! Store the file index a bit easier and set the base to 1.
 
-       if(cellDim /= 3 .or. physDim /= 3) then
-          write(errorMessage, stringInt1) "Both the number of cell and physical dimensions should be 3, not ", &
-            cellDim," and ", physDim
-          call terminate("readRestartFile", errorMessage)
-       endif
+            cgnsInd = fileIDs(solID)
+            cgnsBase = 1
 
-    enddo fileOpenLoop
+            ! Determine the number of zones (blocks) in the restart file
+            ! and check if this is identical to the number in the grid file.
 
+            call cg_nzones_f(cgnsInd, cgnsBase, nZones, ierr)
+            if (ierr /= all_ok) &
+                call terminate("readRestartFile", &
+                               "Something wrong when calling cg_nzones_f")
 
-    ! Broadcast nTimeStepsRestart and timeUnsteadyRestart to all
-    ! processors. These values are needed to perform a consistent
-    ! unsteady restart.
+            if (nZones /= cgnsNdom) &
+                 call terminate("readRestartFile", &
+                 "Number of blocks in grid file and restart &
+                 &file differ")
 
-    call mpi_bcast(nTimeStepsRestart, 1, adflow_integer, 0, &
-         ADflow_comm_world, ierr)
-    call mpi_bcast(timeUnsteadyRestart, 1, adflow_real, 0, &
-         ADflow_comm_world, ierr)
+            ! Create a sorted version of the zone names of the restart file
+            ! and store its corresponding zone numbers in zoneNumbers.
 
-    ! Get the scaling factors for density, pressure and velocity
-    ! by reading the reference state.
+            call getSortedZoneNumbers
 
-    call scaleFactors(fileIDs)
+            ! Loop over the number of blocks stored on this processor.
 
-    ! Loop over the number of files to be read and read the solution.
+            domains: do nn = 1, nDom
 
-    solLoop: do solID=1,nSolsRead
+                ! Set the pointers for this block. Make sure that the
+                ! correct data is set.
 
-       ! Store the file index a bit easier and set the base to 1.
+                ii = min(solID, nTimeIntervalsSpectral)
+                call setPointers(nn, 1_intType, ii)
 
-       cgnsInd  = fileIDs(solID)
-       cgnsBase = 1
+                ! Store the zone name of the original grid a bit easier.
 
-       ! Determine the number of zones (blocks) in the restart file
-       ! and check if this is identical to the number in the grid file.
+                cgnsName = cgnsDoms(nbkGlobal)%zoneName
 
-       call cg_nzones_f(cgnsInd, cgnsBase, nZones, ierr)
-       if(ierr /= all_ok)                  &
-            call terminate("readRestartFile", &
-            "Something wrong when calling cg_nzones_f")
+                ! Search in the sorted zone names of the restart file for
+                ! cgnsName. The name must be found; otherwise the restart
+                ! is pointless. If found, the zone number is set accordingly.
 
-       if(nZones /= cgnsNdom)              &
-            call terminate("readRestartFile", &
-            "Number of blocks in grid file and restart &
-            &file differ")
+                jj = bsearchStrings(cgnsname, zoneNames)
+                if (jj == 0) then
+                    write (errorMessage, *) "Zone name ", trim(cgnsName), &
+                        " not found in restart file ", &
+                        trim(solFiles(solID))
+                    call terminate("readRestartFile", errorMessage)
+                else
+                    jj = zoneNumbers(jj)
+                end if
 
-       ! Create a sorted version of the zone names of the restart file
-       ! and store its corresponding zone numbers in zoneNumbers.
+                cgnsZone = jj
 
-       call getSortedZoneNumbers
+                ! Determine the dimensions of the zone and check if these are
+                ! identical to the dimensions of the block in the grid file.
 
-       ! Loop over the number of blocks stored on this processor.
+                call cg_zone_read_f(cgnsInd, cgnsBase, cgnsZone, &
+                                    cgnsname, sizes, ierr)
+                if (ierr /= all_ok) &
+                     call terminate("readRestartFile", &
+                     "Something wrong when calling &
+                     &cg_zone_read_f")
 
-       domains: do nn=1,nDom
+                if (cgnsDoms(nbkGlobal)%il /= sizes(1) .or. &
+                     cgnsDoms(nbkGlobal)%jl /= sizes(2) .or. &
+                     cgnsDoms(nbkGlobal)%kl /= sizes(3)) &
+                     call terminate("readRestartFile", &
+                     "Corresponding zones in restart file and &
+                     &grid file have different dimensions")
 
-          ! Set the pointers for this block. Make sure that the
-          ! correct data is set.
+                ! Determine the number of flow solutions in this zone and
+                ! check if there is a solution stored.
 
-          ii = min(solID,nTimeIntervalsSpectral)
-          call setPointers(nn, 1_intType, ii)
+                call cg_nsols_f(cgnsInd, cgnsBase, cgnsZone, nSols, ierr)
+                if (ierr /= all_ok) &
+                    call terminate("readRestartFile", &
+                                   "Something wrong when calling cg_nsols_f")
 
-          ! Store the zone name of the original grid a bit easier.
+                if (nSols == 0) &
+                    call terminate("readRestartFile", &
+                                   "No solution present in restart file")
 
-          cgnsName = cgnsDoms(nbkGlobal)%zoneName
+                ! Check for multiple solutions. A distinction is needed for
+                ! overset cases because there will be an extra solution node
+                ! for the nodal iblanks.
 
-          ! Search in the sorted zone names of the restart file for
-          ! cgnsName. The name must be found; otherwise the restart
-          ! is pointless. If found, the zone number is set accordingly.
+                if ((nSols > 1) .or. nSols > 2) &
+                    call terminate("readRestartFile", &
+                                   "Multiple solutions present in restart file")
 
-          jj = bsearchStrings(cgnsname, zoneNames)
-          if(jj == 0) then
-             write(errorMessage,*) "Zone name ", trim(cgnsName),  &
-                  " not found in restart file ", &
-                  trim(solFiles(solID))
-             call terminate("readRestartFile", errorMessage)
-          else
-             jj = zoneNumbers(jj)
-          endif
+                ! Determine the location of the solution variables. A loop is
+                ! done over the solution nodes which is either 1 or 2. In the
+                ! latter case, pick the node not named "Nodal Blanks".
 
-          cgnsZone = jj
+                do cgnsSol = 1, nSols
+                    call cg_sol_info_f(cgnsInd, cgnsBase, cgnsZone, cgnsSol, &
+                                       cgnsName, location, ierr)
+                    if (ierr /= all_ok) &
+                         call terminate("readRestartFile", &
+                         "Something wrong when calling &
+                         &cg_sol_info_f")
 
-          ! Determine the dimensions of the zone and check if these are
-          ! identical to the dimensions of the block in the grid file.
+                    if (trim(cgnsName) /= "Nodal Blanks") exit
+                end do
 
-          call cg_zone_read_f(cgnsInd, cgnsBase, cgnsZone, &
-               cgnsname, sizes, ierr)
-          if(ierr /= all_ok)                  &
-               call terminate("readRestartFile", &
-               "Something wrong when calling &
-               &cg_zone_read_f")
+                ! Determine the rind info.
+
+                call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
+                               cgnsZone, "FlowSolution_t", cgnsSol, "end")
+                if (ierr /= all_ok) &
+                    call terminate("readRestartFile", &
+                                   "Something wrong when calling cg_goto_f")
+
+                call cg_rind_read_f(rindSizes, ierr)
+                if (ierr /= all_ok) &
+                     call terminate("readRestartFile", &
+                     "Something wrong when calling &
+                     &cg_rind_read_f")
+
+                ! Check if halo's are present. If not, set halosRead to .false.
+                ! This only needs to be done if this is not an older state for
+                ! an unsteady computation.
+
+                if (solID == 1 .or. equationMode == timeSpectral) then
+                    if (rindSizes(1) == 0 .or. rindSizes(2) == 0 .or. rindSizes(3) == 0 .or. &
+                        rindSizes(4) == 0 .or. rindSizes(5) == 0 .or. rindSizes(6) == 0) &
+                        halosRead = .false.
+                end if
+
+                ! Initialize the number of halo cells to read to 0.
+
+                nHiMin = 0; nHjMin = 0; nHkMin = 0
+                nHiMax = 0; nHjMax = 0; nHkMax = 0
+
+                ! Determine the range which must be read. A few things must be
+                ! taken into account: - in iBegor, iEndor, etc. The nodal
+                !                       range is stored. As in CGNS the cell
+                !                       range start at 1, 1 must be subtracted
+                !                       from the upper bound.
+                !                     - the rind info must be taken into
+                !                       account, because only the upper bound
+                !                       is changed in cgns; the lower bound
+                !                       remains 1.
+                !                     - in case the solution is stored in the
+                !                       vertices one extra variable in each
+                !                       direction is read. An averaging will
+                !                       take place to obtain cell centered
+                !                       values.
+                ! Also when vertex data is present, set halosRead to .false.,
+                ! because it is not possible to determine the halos.
 
-          if(cgnsDoms(nbkGlobal)%il /= sizes(1) .or. &
-               cgnsDoms(nbkGlobal)%jl /= sizes(2) .or. &
-               cgnsDoms(nbkGlobal)%kl /= sizes(3))     &
-               call terminate("readRestartFile", &
-               "Corresponding zones in restart file and &
-               &grid file have different dimensions")
+                if (location == CellCenter) then
+
+                    ! Correct the number of halo cells to be read.
+                    ! Only if this is not an older state in time for an
+                    ! unsteady computation.
 
-          ! Determine the number of flow solutions in this zone and
-          ! check if there is a solution stored.
+                    if (solID == 1 .or. equationMode == timeSpectral) then
+                        if (rindSizes(1) > 0) nHiMin = 1; if (rindSizes(2) > 0) nHiMax = 1
+                        if (rindSizes(3) > 0) nHjMin = 1; if (rindSizes(4) > 0) nHjMax = 1
+                        if (rindSizes(5) > 0) nHkMin = 1; if (rindSizes(6) > 0) nHkMax = 1
+                    end if
+
+                    ! Set the cell range to be read from the CGNS file.
+
+                    rangeMin(1) = iBegOr - nHiMin
+                    rangeMin(2) = jBegOr - nHjMin
+                    rangeMin(3) = kBegOr - nHkMin
 
-          call cg_nsols_f(cgnsInd, cgnsBase, cgnsZone, nSols, ierr)
-          if(ierr /= all_ok)                  &
-               call terminate("readRestartFile", &
-               "Something wrong when calling cg_nsols_f")
+                    rangeMax(1) = rangeMin(1) + nx - 1 + nHiMin + nHiMax
+                    rangeMax(2) = rangeMin(2) + ny - 1 + nHjMin + nHjMax
+                    rangeMax(3) = rangeMin(3) + nz - 1 + nHkMin + nHkMax
+
+                else if (location == Vertex) then
 
-          if(nSols == 0)                      &
-               call terminate("readRestartFile", &
-               "No solution present in restart file")
+                    ! Set the nodal range such that enough info is present
+                    ! to average the nodal data to create the cell centered
+                    ! data in the owned cells. No halo cells will be
+                    ! initialized.
+
+                    halosRead = .false.
 
-          ! Check for multiple solutions. A distinction is needed for
-          ! overset cases because there will be an extra solution node
-          ! for the nodal iblanks.
+                    rangeMin(1) = iBegor
+                    rangeMin(2) = jBegor
+                    rangeMin(3) = kBegor
 
-          if((nSols > 1) .or. nSols > 2) &
-               call terminate("readRestartFile", &
-               "Multiple solutions present in restart file")
+                    rangeMax(1) = rangeMin(1) + nx
+                    rangeMax(2) = rangeMin(2) + ny
+                    rangeMax(3) = rangeMin(3) + nz
+                else
+                    call terminate("readRestartFile", &
+                         "Only CellCenter or Vertex data allowed in &
+                         &restart file")
+                end if
 
-          ! Determine the location of the solution variables. A loop is
-          ! done over the solution nodes which is either 1 or 2. In the
-          ! latter case, pick the node not named "Nodal Blanks".
-
-          do cgnsSol=1,nSols
-             call cg_sol_info_f(cgnsInd, cgnsBase, cgnsZone, cgnsSol, &
-                  cgnsName, location, ierr)
-             if(ierr /= all_ok)                  &
-                  call terminate("readRestartFile", &
-                  "Something wrong when calling &
-                  &cg_sol_info_f")
-
-             if (trim(cgnsName) /= "Nodal Blanks") exit
-          end do
-
-          ! Determine the rind info.
-
-          call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
-               cgnsZone, "FlowSolution_t", cgnsSol, "end")
-          if(ierr /= all_ok)                  &
-               call terminate("readRestartFile", &
-               "Something wrong when calling cg_goto_f")
-
-          call cg_rind_read_f(rindSizes, ierr)
-          if(ierr /= all_ok)                  &
-               call terminate("readRestartFile", &
-               "Something wrong when calling &
-               &cg_rind_read_f")
-
-          ! Check if halo's are present. If not, set halosRead to .false.
-          ! This only needs to be done if this is not an older state for
-          ! an unsteady computation.
-
-          if(solID == 1 .or. equationMode == timeSpectral) then
-             if(rindSizes(1) == 0 .or. rindSizes(2) == 0 .or. rindSizes(3) == 0 .or. &
-                  rindSizes(4) == 0 .or. rindSizes(5) == 0 .or. rindSizes(6) == 0)     &
-                  halosRead = .false.
-          endif
-
-          ! Initialize the number of halo cells to read to 0.
-
-          nHiMin = 0; nHjMin = 0; nHkMin = 0
-          nHiMax = 0; nHjMax = 0; nHkMax = 0
-
-          ! Determine the range which must be read. A few things must be
-          ! taken into account: - in iBegor, iEndor, etc. The nodal
-          !                       range is stored. As in CGNS the cell
-          !                       range start at 1, 1 must be subtracted
-          !                       from the upper bound.
-          !                     - the rind info must be taken into
-          !                       account, because only the upper bound
-          !                       is changed in cgns; the lower bound
-          !                       remains 1.
-          !                     - in case the solution is stored in the
-          !                       vertices one extra variable in each
-          !                       direction is read. An averaging will
-          !                       take place to obtain cell centered
-          !                       values.
-          ! Also when vertex data is present, set halosRead to .false.,
-          ! because it is not possible to determine the halos.
-
-          if(location == CellCenter) then
-
-             ! Correct the number of halo cells to be read.
-             ! Only if this is not an older state in time for an
-             ! unsteady computation.
-
-             if(solID == 1 .or. equationMode == timeSpectral) then
-                if(rindSizes(1) > 0) nHiMin = 1; if(rindSizes(2) > 0) nHiMax = 1
-                if(rindSizes(3) > 0) nHjMin = 1; if(rindSizes(4) > 0) nHjMax = 1
-                if(rindSizes(5) > 0) nHkMin = 1; if(rindSizes(6) > 0) nHkMax = 1
-             endif
-
-             ! Set the cell range to be read from the CGNS file.
-
-             rangeMin(1) = iBegOr - nHiMin
-             rangeMin(2) = jBegOr - nHjMin
-             rangeMin(3) = kBegOr - nHkMin
-
-             rangeMax(1) = rangeMin(1) + nx-1 + nHiMin + nHiMax
-             rangeMax(2) = rangeMin(2) + ny-1 + nHjMin + nHjMax
-             rangeMax(3) = rangeMin(3) + nz-1 + nHkMin + nHkMax
-
-          else if(location == Vertex) then
-
-             ! Set the nodal range such that enough info is present
-             ! to average the nodal data to create the cell centered
-             ! data in the owned cells. No halo cells will be
-             ! initialized.
-
-             halosRead   = .false.
-
-             rangeMin(1) = iBegor
-             rangeMin(2) = jBegor
-             rangeMin(3) = kBegor
+                ! Allocate the memory for buffer, needed to store the variable
+                ! to be read, and bufferVertex in case the solution is stored
+                ! in the vertices.
 
-             rangeMax(1) = rangeMin(1) + nx
-             rangeMax(2) = rangeMin(2) + ny
-             rangeMax(3) = rangeMin(3) + nz
-          else
-             call terminate("readRestartFile", &
-                  "Only CellCenter or Vertex data allowed in &
-                  &restart file")
-          endif
-
-          ! Allocate the memory for buffer, needed to store the variable
-          ! to be read, and bufferVertex in case the solution is stored
-          ! in the vertices.
+                allocate (buffer(2 - nHiMin:il + nHiMax, &
+                                 2 - nHjMin:jl + nHjMax, &
+                                 2 - nHkMin:kl + nHkMax), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("readRestartFile", &
+                                   "Memory allocation failure for buffer")
+
+                if (location == Vertex) then
+                    allocate (bufferVertex(1:il, 1:jl, 1:kl), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("readRestartFile", &
+                                       "Memory allocation failure for bufferVertex")
+                end if
+
+                ! Create a sorted version of the variable names and store the
+                ! corresponding type in varTypes.
+
+                call getSortedVarNumbers
+
+                ! Read the density and the turbulence variables.
+
+                call readDensity(nTypeMismatch)
+                call readTurbvar(nTypeMismatch)
+
+                ! Read the other variables, depending on the situation.
+
+                testPrim: if (solID == 1 .or. equationMode == timeSpectral) then
+
+                    ! Either the first solution or time spectral mode. Read
+                    ! the primitive variables from the restart file.
+
+                    call readXvelocity(nTypeMismatch)
+                    call readYvelocity(nTypeMismatch)
+                    call readZvelocity(nTypeMismatch)
+                    call readPressure(nTypeMismatch)
+
+                    else testPrim
+
+                    ! Old solution in unsteady mode. Read the conservative
+                    ! variables.
+
+                    call readXmomentum(nTypeMismatch)
+                    call readYmomentum(nTypeMismatch)
+                    call readZmomentum(nTypeMismatch)
+                    call readEnergy(nTypeMismatch)
 
-          allocate(buffer(2-nHiMin:il+nHiMax, &
-               2-nHjMin:jl+nHjMax, &
-               2-nHkMin:kl+nHkMax), stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("readRestartFile", &
-               "Memory allocation failure for buffer")
+                end if testPrim
 
-          if(location == Vertex) then
-             allocate(bufferVertex(1:il,1:jl,1:kl), stat=ierr)
-             if(ierr /= 0)                       &
-                  call terminate("readRestartFile", &
-                  "Memory allocation failure for bufferVertex")
-          endif
-
-          ! Create a sorted version of the variable names and store the
-          ! corresponding type in varTypes.
-
-          call getSortedVarNumbers
-
-          ! Read the density and the turbulence variables.
-
-          call readDensity(nTypeMismatch)
-          call readTurbvar(nTypeMismatch)
-
-          ! Read the other variables, depending on the situation.
-
-          testPrim: if(solID == 1 .or. equationMode == timeSpectral) then
-
-             ! Either the first solution or time spectral mode. Read
-             ! the primitive variables from the restart file.
-
-             call readXvelocity(nTypeMismatch)
-             call readYvelocity(nTypeMismatch)
-             call readZvelocity(nTypeMismatch)
-             call readPressure(nTypeMismatch)
-
-          else testPrim
-
-             ! Old solution in unsteady mode. Read the conservative
-             ! variables.
-
-             call readXmomentum(nTypeMismatch)
-             call readYmomentum(nTypeMismatch)
-             call readZmomentum(nTypeMismatch)
-             call readEnergy(nTypeMismatch)
+                ! Release the memory of buffer, varNames and varTypes.
 
-          endif testPrim
+                deallocate (buffer, varNames, varTypes, stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("readRestartFile", &
+                     "Deallocation error for buffer, varNames &
+                     &and varTypes.")
 
-          ! Release the memory of buffer, varNames and varTypes.
+                ! In case bufferVertex is allocated, release it.
 
-          deallocate(buffer, varNames, varTypes, stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("readRestartFile", &
-               "Deallocation error for buffer, varNames &
-               &and varTypes.")
+                if (location == Vertex) then
+                    deallocate (bufferVertex, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("readRestartFile", &
+                                       "Deallocation error for bufferVertex")
+                end if
 
-          ! In case bufferVertex is allocated, release it.
+            end do domains
 
-          if(location == Vertex) then
-             deallocate(bufferVertex, stat=ierr)
-             if(ierr /= 0)                       &
-                  call terminate("readRestartFile", &
-                  "Deallocation error for bufferVertex")
-          endif
+            ! Release the memory of zoneNames and zoneNumbers.
 
-       enddo domains
+            deallocate (zoneNames, zoneNumbers, stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("readRestartFile", &
+                 "Deallocation failure for zoneNames &
+                 &and zoneNumbers.")
 
-       ! Release the memory of zoneNames and zoneNumbers.
+            ! Close the cgns solution file.
 
-       deallocate(zoneNames, zoneNumbers, stat=ierr)
-       if(ierr /= 0)                       &
-            call terminate("readRestartFile", &
-            "Deallocation failure for zoneNames &
-            &and zoneNumbers.")
+            call cg_close_f(cgnsInd, ierr)
+            if (ierr /= all_ok) &
+                call terminate("readRestartFile", &
+                               "Something wrong when calling cg_close_f")
 
-       ! Close the cgns solution file.
+        end do solLoop
 
-       call cg_close_f(cgnsInd, ierr)
-       if(ierr /= all_ok)                  &
-            call terminate("readRestartFile", &
-            "Something wrong when calling cg_close_f")
-
-    enddo solLoop
+        ! Determine the global sum of nTypeMismatch; the result only
+        ! needs to be known on processor 0. Use ii as the global buffer
+        ! to store the result. If a type mismatch occured,
+        ! print a warning.
 
+        call mpi_reduce(nTypeMismatch, ii, 1, adflow_integer, &
+                        mpi_sum, 0, ADflow_comm_world, ierr)
+        if (myID == 0 .and. ii > 0) then
 
-    ! Determine the global sum of nTypeMismatch; the result only
-    ! needs to be known on processor 0. Use ii as the global buffer
-    ! to store the result. If a type mismatch occured,
-    ! print a warning.
+            write (integerString, "(i6)") ii
+            integerString = adjustl(integerString)
 
-    call mpi_reduce(nTypeMismatch, ii, 1, adflow_integer, &
-         mpi_sum, 0, ADflow_comm_world, ierr)
-    if(myID == 0 .and. ii > 0) then
+            print "(a)", "#"
+            print "(a)", "#                      Warning"
+            print strings, "# ", trim(integerString), " type mismatches occured when reading the solution of the blocks"
+            print "(a)", "#"
+        end if
 
-       write(integerString,"(i6)") ii
-       integerString = adjustl(integerString)
+    end subroutine readRestartFile
 
-       print "(a)", "#"
-       print "(a)", "#                      Warning"
-       print strings, "# ", trim(integerString)," type mismatches occured when reading the solution of the blocks"
-       print "(a)", "#"
-    endif
+    subroutine getSortedZoneNumbers
+        !
+        !       getSortedZoneNumbers reads the names of the zones of the
+        !       cgns file given by cgnsInd and cgnsBase. Afterwards the
+        !       zonenames are sorted in increasing order, such that a binary
+        !       search algorithm can be employed. The original zone numbers
+        !       are stored in zoneNumbers.
+        !       If the zone contains a link to a zone containing the
+        !       coordinates the name of the linked zone is taken.
+        !
 
-  end subroutine readRestartFile
+        use constants
+        use cgnsGrid, only: cgnsNDom
+        use su_cgns
+        use variableReading, only: zoneNames, zoneNumbers, cgnsInd, cgnsBase
+        use sorting, only: qsortStrings, bsearchStrings
+        use utils, only: terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer :: zone, zonetype, ncoords, pathLength
+        integer :: pos
 
-  subroutine getSortedZoneNumbers
-    !
-    !       getSortedZoneNumbers reads the names of the zones of the
-    !       cgns file given by cgnsInd and cgnsBase. Afterwards the
-    !       zonenames are sorted in increasing order, such that a binary
-    !       search algorithm can be employed. The original zone numbers
-    !       are stored in zoneNumbers.
-    !       If the zone contains a link to a zone containing the
-    !       coordinates the name of the linked zone is taken.
-    !
+        integer(kind=cgsize_t), dimension(9) :: sizesBlock
 
-    use constants
-    use cgnsGrid, only : cgnsNDom
-    use su_cgns
-    use variableReading, only : zoneNames, zoneNumbers, cgnsInd, cgnsBase
-    use sorting, only : qsortStrings, bsearchStrings
-    use utils, only : terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    integer :: zone, zonetype, ncoords, pathLength
-    integer :: pos
+        integer(kind=intType) :: nn, ii
 
-    integer(kind=cgsize_t), dimension(9) :: sizesBlock
+        character(len=maxStringLen) :: errorMessage, linkPath
+        character(len=maxCGNSNameLen), dimension(cgnsNdom) :: tmpNames
 
-    integer(kind=intType) :: nn, ii
+        logical :: nameFound
 
-    character(len=maxStringLen) :: errorMessage, linkPath
-    character(len=maxCGNSNameLen), dimension(cgnsNdom) :: tmpNames
+        character(len=7) :: int1String, int2String
 
-    logical :: nameFound
+        ! Allocate the memory for zoneNames and zoneNumbers.
 
-    character(len=7) :: int1String, int2String
+        allocate (zoneNames(cgnsNdom), zoneNumbers(cgnsNdom), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("getSortedZoneNumbers", &
+             "Memory allocation failure for zoneNames &
+             &and zoneNumbers")
 
-    ! Allocate the memory for zoneNames and zoneNumbers.
+        ! Loop over the number of zones in the file.
 
-    allocate(zoneNames(cgnsNdom), zoneNumbers(cgnsNdom), stat=ierr)
-    if(ierr /= 0)                            &
-         call terminate("getSortedZoneNumbers", &
-         "Memory allocation failure for zoneNames &
-         &and zoneNumbers")
+        cgnsDomains: do nn = 1, cgnsNdom
 
-    ! Loop over the number of zones in the file.
+            ! Initialize nameFound to .false.
 
-    cgnsDomains: do nn=1,cgnsNdom
+            nameFound = .false.
 
-       ! Initialize nameFound to .false.
+            ! Check if the zone is structured.
 
-       nameFound = .false.
+            zone = nn
+            call cg_zone_type_f(cgnsInd, cgnsBase, zone, zonetype, ierr)
+            if (ierr /= all_ok) &
+                call terminate("getSortedZoneNumbers", &
+                               "Something wrong when calling cg_zone_type_f")
 
-       ! Check if the zone is structured.
+            if (zonetype /= structured) then
 
-       zone = nn
-       call cg_zone_type_f(cgnsInd, cgnsBase, zone, zonetype, ierr)
-       if(ierr /= all_ok)                       &
-            call terminate("getSortedZoneNumbers", &
-            "Something wrong when calling cg_zone_type_f")
+                write (int1String, "(i7)") cgnsBase
+                int1String = adjustl(int1String)
+                write (int2String, "(i7)") zone
+                int2String = adjustl(int2String)
 
-       if(zonetype /= structured) then
+                write (errorMessage, strings) "Base ", trim(int1String), ": Zone ", trim(int2String), &
+                    " of the cgns restart file is not structured"
+                call terminate("getSortedZoneNumbers", errorMessage)
 
-          write(int1String,"(i7)") cgnsBase
-          int1String = adjustl(int1String)
-          write(int2String,"(i7)") zone
-          int2String = adjustl(int2String)
+            end if
 
-          write(errorMessage, strings) "Base ", trim(int1String),": Zone ", trim(int2String), &
-            " of the cgns restart file is not structured"
-          call terminate("getSortedZoneNumbers", errorMessage)
+            ! Determine the number of grid coordinates of this zone.
 
-       endif
+            call cg_ncoords_f(cgnsInd, cgnsBase, zone, ncoords, ierr)
+            if (ierr /= all_ok) &
+                call terminate("getSortedZoneNumbers", &
+                               "Something wrong when calling cg_ncoords_f")
 
-       ! Determine the number of grid coordinates of this zone.
+            ! If ncoords == 3, there are coordinates present. Then check if
+            ! it is a link. If so, take the zone name of the link.
 
-       call cg_ncoords_f(cgnsInd, cgnsBase, zone, ncoords, ierr)
-       if(ierr /= all_ok)                       &
-            call terminate("getSortedZoneNumbers", &
-            "Something wrong when calling cg_ncoords_f")
+            if (ncoords == 3) then
 
-       ! If ncoords == 3, there are coordinates present. Then check if
-       ! it is a link. If so, take the zone name of the link.
+                ! Go to the coordinates node.
 
-       if(ncoords == 3) then
+                call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", zone, &
+                               "GridCoordinates_t", 1, "end")
+                if (ierr /= all_ok) &
+                    call terminate("getSortedZoneNumbers", &
+                                   "Something wrong when calling cg_goto_f")
 
-          ! Go to the coordinates node.
+                ! Check if this node is a link.
 
-          call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", zone, &
-               "GridCoordinates_t", 1, "end")
-          if(ierr /= all_ok)                       &
-               call terminate("getSortedZoneNumbers", &
-               "Something wrong when calling cg_goto_f")
+                call cg_is_link_f(pathLength, ierr)
+                if (ierr /= all_ok) &
+                    call terminate("getSortedZoneNumbers", &
+                                   "Something wrong when calling cg_is_link_f")
 
-          ! Check if this node is a link.
+                if (pathLength > 0) then
 
-          call cg_is_link_f(pathLength, ierr)
-          if(ierr /= all_ok)                       &
-               call terminate("getSortedZoneNumbers", &
-               "Something wrong when calling cg_is_link_f")
+                    ! Determine the name of the linkPath.
 
-          if(pathLength > 0) then
+                    call cg_link_read_f(errorMessage, linkPath, ierr)
+                    if (ierr /= all_ok) &
+                         call terminate("getSortedZoneNumbers", &
+                         "Something wrong when calling &
+                         &cg_link_read_f")
 
-             ! Determine the name of the linkPath.
+                    ! Find the zone name.
+                    ! Find, starting from the back, the forward slash.
 
-             call cg_link_read_f(errorMessage, linkPath, ierr)
-             if(ierr /= all_ok)                       &
-                  call terminate("getSortedZoneNumbers", &
-                  "Something wrong when calling &
-                  &cg_link_read_f")
+                    pos = index(linkPath, "/", .true.)
+                    if (pos > 0) then
+                        linkPath = linkPath(:pos - 1)
 
-             ! Find the zone name.
-             ! Find, starting from the back, the forward slash.
+                        ! Find the next forward slash from the back and
+                        ! remove the leading part from the path name.
 
-             pos = index(linkPath, "/", .true.)
-             if(pos > 0) then
-                linkPath = linkPath(:pos-1)
+                        pos = index(linkPath, "/", .true.)
+                        if (pos > 0) linkPath = linkPath(pos + 1:)
+                    end if
 
-                ! Find the next forward slash from the back and
-                ! remove the leading part from the path name.
+                    ! Create the zone name and set nameFound to .true..
 
-                pos = index(linkPath, "/", .true.)
-                if(pos > 0) linkPath = linkPath(pos+1:)
-             endif
+                    linkPath = adjustl(linkPath)
+                    zoneNames(nn) = trim(linkPath)
+                    nameFound = .true.
 
-             ! Create the zone name and set nameFound to .true..
+                end if
+            end if
 
-             linkPath      = adjustl(linkPath)
-             zoneNames(nn) = trim(linkPath)
-             nameFound     = .true.
+            ! If no name was yet found set it to the name of the
+            ! current zone.
 
-          endif
-       endif
+            if (.not. nameFound) then
+                call cg_zone_read_f(cgnsInd, cgnsBase, zone, &
+                                    zoneNames(nn), sizesBlock, ierr)
+                if (ierr /= all_ok) &
+                     call terminate("getSortedZoneNumbers", &
+                     "Something wrong when calling &
+                     &cg_zone_read_f")
+            end if
 
-       ! If no name was yet found set it to the name of the
-       ! current zone.
+        end do cgnsDomains
 
-       if(.not. nameFound) then
-          call cg_zone_read_f(cgnsInd, cgnsBase, zone, &
-               zoneNames(nn), sizesBlock, ierr)
-          if(ierr /= all_ok)                       &
-               call terminate("getSortedZoneNumbers", &
-               "Something wrong when calling &
-               &cg_zone_read_f")
-       endif
+        ! Set tmpNames to zoneNames and sort the latter
+        ! in increasing order.
 
-    enddo cgnsDomains
+        do nn = 1, cgnsNdom
+            tmpNames(nn) = zoneNames(nn)
+        end do
 
-    ! Set tmpNames to zoneNames and sort the latter
-    ! in increasing order.
+        ! Sort zoneNames in increasing order.
 
-    do nn=1,cgnsNdom
-       tmpNames(nn) = zoneNames(nn)
-    enddo
+        call qsortStrings(zoneNames, cgnsNdom)
 
-    ! Sort zoneNames in increasing order.
+        ! Initialize zoneNumbers to -1. This serves as a check during
+        ! the search.
 
-    call qsortStrings(zoneNames, cgnsNdom)
+        do nn = 1, cgnsNdom
+            zoneNumbers(nn) = -1
+        end do
 
-    ! Initialize zoneNumbers to -1. This serves as a check during
-    ! the search.
+        ! Find the original zone numbers for the sorted zone names.
 
-    do nn=1,cgnsNdom
-       zoneNumbers(nn) = -1
-    enddo
+        do nn = 1, cgnsNdom
+            ii = bsearchStrings(tmpNames(nn), zoneNames)
 
-    ! Find the original zone numbers for the sorted zone names.
+            ! Check if the zone number is not already taken. If this is the
+            ! case, this means that two identical zone names are present.
 
-    do nn=1,cgnsNdom
-       ii = bsearchStrings(tmpNames(nn), zoneNames)
+            if (zoneNumbers(ii) /= -1) &
+                 call terminate("getSortedZoneNumbers", &
+                 "Error occurs only when two identical zone &
+                 &names are present")
 
-       ! Check if the zone number is not already taken. If this is the
-       ! case, this means that two identical zone names are present.
+            ! And set the zone number.
 
-       if(zoneNumbers(ii) /= -1)                &
-            call terminate("getSortedZoneNumbers", &
-            "Error occurs only when two identical zone &
-            &names are present")
+            zoneNumbers(ii) = nn
+        end do
 
-       ! And set the zone number.
+    end subroutine getSortedZoneNumbers
 
-       zoneNumbers(ii) = nn
-    enddo
+    subroutine getSortedVarNumbers
+        !
+        !       getSortedVarNumbers reads the names of variables stored in
+        !       the given solution node of the cgns file, indicated by
+        !       cgnsInd, cgnsBase and cgnsZone. Afterwards the variable
+        !       names are sorted in increasing order, such that they can be
+        !       used in a binary search. Their original variable number and
+        !       type is stored.
+        !
+        use constants
+        use su_cgns
+        use variableReading, only: varTypes, varNames, cgnsBase, cgnsInd, &
+                                   cgnsZone, cgnsSol, nVar
+        use sorting, only: qsortStrings, bsearchStrings
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: i, ierr
+        integer, dimension(:), allocatable :: tmpTypes
 
-  end subroutine getSortedZoneNumbers
+        integer(kind=intType) :: nn, ii
 
-  subroutine getSortedVarNumbers
-    !
-    !       getSortedVarNumbers reads the names of variables stored in
-    !       the given solution node of the cgns file, indicated by
-    !       cgnsInd, cgnsBase and cgnsZone. Afterwards the variable
-    !       names are sorted in increasing order, such that they can be
-    !       used in a binary search. Their original variable number and
-    !       type is stored.
-    !
-    use constants
-    use su_cgns
-    use variableReading, only : varTypes, varNames, cgnsBase, cgnsInd, &
-         cgnsZone, cgnsSol, nVar
-    use sorting, only : qsortStrings, bsearchStrings
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: i, ierr
-    integer, dimension(:), allocatable :: tmpTypes
+        integer(kind=intType), dimension(:), allocatable :: varNumbers
 
-    integer(kind=intType) :: nn, ii
+        character(len=maxCGNSNameLen), allocatable, dimension(:) :: &
+            tmpNames
 
-    integer(kind=intType), dimension(:), allocatable :: varNumbers
+        ! Determine the number of solution variables stored.
 
-    character(len=maxCGNSNameLen), allocatable, dimension(:) :: &
-         tmpNames
+        call cg_nfields_f(cgnsInd, cgnsBase, cgnsZone, cgnsSol, &
+                          nVar, ierr)
+        if (ierr /= all_ok) &
+            call terminate("getSortedVarNumbers", &
+                           "Something wrong when calling cg_nfield_f")
 
-    ! Determine the number of solution variables stored.
+        ! Allocate the memory for varnames, vartypes and varnumber
 
-    call cg_nfields_f(cgnsInd, cgnsBase, cgnsZone, cgnsSol, &
-         nVar, ierr)
-    if(ierr /= all_ok)                      &
-         call terminate("getSortedVarNumbers", &
-         "Something wrong when calling cg_nfield_f")
+        allocate (varNames(nVar), varTypes(nVar), varNumbers(nVar), &
+                  stat=ierr)
+        if (ierr /= 0) &
+            call terminate("getSortedVarNumbers", &
+                           "Memory allocation failure for varNames, etc.")
 
-    ! Allocate the memory for varnames, vartypes and varnumber
+        ! Loop over the number of variables and store their names and
+        ! types.
 
-    allocate(varNames(nVar), varTypes(nVar), varNumbers(nVar), &
-         stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("getSortedVarNumbers", &
-         "Memory allocation failure for varNames, etc.")
+        do i = 1, nVar
+            call cg_field_info_f(cgnsInd, cgnsBase, cgnsZone, cgnsSol, &
+                                 i, varTypes(i), varNames(i), ierr)
+            if (ierr /= 0) &
+                call terminate("getSortedVarNumbers", &
+                               "Something wrong when calling cg_field_info_f")
+        end do
 
-    ! Loop over the number of variables and store their names and
-    ! types.
+        ! Allocate the memory for tmpTypes and tmpNames and initialize
+        ! their values.
 
-    do i=1,nVar
-       call cg_field_info_f(cgnsInd, cgnsBase, cgnsZone, cgnsSol, &
-            i, varTypes(i), varNames(i), ierr)
-       if(ierr /= 0)                           &
+        allocate (tmpTypes(nVar), tmpNames(nVar), stat=ierr)
+        if (ierr /= 0) &
             call terminate("getSortedVarNumbers", &
-            "Something wrong when calling cg_field_info_f")
-    enddo
+                           "Memory allocation failure for tmp variables")
 
-    ! Allocate the memory for tmpTypes and tmpNames and initialize
-    ! their values.
+        do i = 1, nVar
+            tmpTypes(i) = varTypes(i)
+            tmpNames(i) = varNames(i)
+        end do
 
-    allocate(tmpTypes(nVar), tmpNames(nVar), stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("getSortedVarNumbers", &
-         "Memory allocation failure for tmp variables")
+        ! Sort varNames in increasing order.
 
-    do i=1,nVar
-       tmpTypes(i) = varTypes(i)
-       tmpNames(i) = varNames(i)
-    enddo
+        nn = nVar
+        call qsortStrings(varNames, nn)
 
-    ! Sort varNames in increasing order.
+        ! Initialize varNumbers to -1. This serves as a check during
+        ! the search.
 
-    nn = nVar
-    call qsortStrings(varNames, nn)
+        do i = 1, nVar
+            varNumbers(i) = -1
+        end do
 
-    ! Initialize varNumbers to -1. This serves as a check during
-    ! the search.
+        ! Find the original types and numbers for the just sorted
+        ! variable names.
 
-    do i=1,nVar
-       varNumbers(i) = -1
-    enddo
+        do i = 1, nVar
+            ii = bsearchStrings(tmpNames(i), varNames)
 
-    ! Find the original types and numbers for the just sorted
-    ! variable names.
+            ! Check if the variable number is not already taken. If this is
+            ! the case, this means that two identical var names are present.
 
-    do i=1,nVar
-       ii = bsearchStrings(tmpNames(i), varNames)
+            if (varNumbers(ii) /= -1) &
+                 call terminate("getSortedVarNumbers", &
+                 "Error occurs only when two identical &
+                 &variable names are present")
 
-       ! Check if the variable number is not already taken. If this is
-       ! the case, this means that two identical var names are present.
+            ! And set the variable number and type.
 
-       if(varNumbers(ii) /= -1)                &
-            call terminate("getSortedVarNumbers", &
-            "Error occurs only when two identical &
-            &variable names are present")
-
-       ! And set the variable number and type.
+            varNumbers(ii) = i
+            varTypes(ii) = tmpTypes(i)
+        end do
 
-       varNumbers(ii) = i
-       varTypes(ii)   = tmpTypes(i)
-    enddo
+        ! Release the memory of varNumbers, tmpNames and tmpTypes.
 
-    ! Release the memory of varNumbers, tmpNames and tmpTypes.
-
-    deallocate(varNumbers, tmpTypes, tmpNames, stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("getSortedVarNumbers", &
-         "Deallocation error for tmp variables")
+        deallocate (varNumbers, tmpTypes, tmpNames, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("getSortedVarNumbers", &
+                           "Deallocation error for tmp variables")
 
-  end subroutine getSortedVarNumbers
+    end subroutine getSortedVarNumbers
 #endif
 end module initializeFlow
diff --git a/src/initFlow/variableReading.F90 b/src/initFlow/variableReading.F90
index 25e8c3b04..5fb30873c 100644
--- a/src/initFlow/variableReading.F90
+++ b/src/initFlow/variableReading.F90
@@ -1,2719 +1,2713 @@
 module variableReading
 
-  use constants, only : intType, maxCGNSNameLen, cgnsRealType, realType, maxStringLen
-  use su_cgns, only : cgsize_t
-  ! halosRead:Determines if the halos where read or not.
-  logical :: halosRead
-
-  ! cgnsInd:  File index of the CGNS file.
-  ! cgnsBase: Base of the CGNS file, always set to 1.
-  ! cgnsZone: Zone ID in the CGNS file.
-  ! cgnsSol:  Solution ID in the zone ID of the CGNS file.
-  ! location: Location where the variables are stored in CGNS.
-  !           Supported possibilities are Vertex and CellCentered.
-
-  integer :: cgnsInd, cgnsBase, cgnsZone, cgnsSol, location
-
-  ! zoneNumbers: Corresponding zoneNumbers of the sorted
-  !              zoneNames.
-  ! zoneNames:   Zone names, sorted in increasing order, of the
-  !              zones in the CGNS restart file.
-  ! varNames: Variable names, sorted in increasing order,
-  !           of the variables.
-
-  integer(kind=intType), allocatable, dimension(:) :: zoneNumbers
-  character(len=maxCGNSNameLen), allocatable, dimension(:) :: zoneNames
-  character(len=maxCGNSNameLen), allocatable, dimension(:) :: varNames
-
-  ! rangeMin(3):    Lower index in i, j and k direction of the
-  !                 range to be read.
-  ! rangeMax(3):    Upper index in i, j and k direction of the
-  !                 range to be read.
-  integer(kind=cgsize_t), dimension(3) :: rangeMin, rangeMax
-
-  ! nVar:     Number of variables stored in the solution file.
-  ! solID:    Loop variables for the number of solutions to be read.
-
-  integer :: nVar
-  integer(kind=intType) :: solID
-
-  ! interpolSpectral:     Whether or not to interpolate the
-  !                       coordinates/solutions for the time
-  !                       spectral mode.
-  ! copySpectral:         Whether or not to copy the solutions
-  !                       for the time spectral mode.
-  logical ::               interpolSpectral, copySpectral
-
-
-
-  ! rhoScale: Scale factor for the density.
-  ! velScale: Scale factor for the velocity.
-  ! pScale:   Scale factor for the pressure.
-  ! muScale:  Scale factor for the molecular viscosity.
-
-  real(kind=realType) :: rhoScale, velScale, pScale, muScale
-
-  ! nSolsRead:            Number of solution files to read.
-  ! solFiles(nSolsRead):  Names of the solution files to be read.
-
-  integer(kind=intType) :: nSolsRead
-  character(len=maxStringLen), dimension(:), allocatable :: solFiles
-
-  ! varTypes(nVar): Variable types of the variables stored.
-  integer, allocatable, dimension(:) :: varTypes
-
-
-  ! buffer(2:il,2:jl,2:kl): Buffer to read and store the cell
-  !                         centered values.
-  ! bufferVertex(:):        Additional buffer needed to read
-  !                         vertex data and transform them into
-  !                         cell centered values.
-
-  real(kind=cgnsRealType), dimension(:,:,:), allocatable :: buffer
-  real(kind=cgnsRealType), dimension(:,:,:), allocatable :: bufferVertex
-
+    use constants, only: intType, maxCGNSNameLen, cgnsRealType, realType, maxStringLen
+    use su_cgns, only: cgsize_t
+    ! halosRead:Determines if the halos where read or not.
+    logical :: halosRead
+
+    ! cgnsInd:  File index of the CGNS file.
+    ! cgnsBase: Base of the CGNS file, always set to 1.
+    ! cgnsZone: Zone ID in the CGNS file.
+    ! cgnsSol:  Solution ID in the zone ID of the CGNS file.
+    ! location: Location where the variables are stored in CGNS.
+    !           Supported possibilities are Vertex and CellCentered.
+
+    integer :: cgnsInd, cgnsBase, cgnsZone, cgnsSol, location
+
+    ! zoneNumbers: Corresponding zoneNumbers of the sorted
+    !              zoneNames.
+    ! zoneNames:   Zone names, sorted in increasing order, of the
+    !              zones in the CGNS restart file.
+    ! varNames: Variable names, sorted in increasing order,
+    !           of the variables.
+
+    integer(kind=intType), allocatable, dimension(:) :: zoneNumbers
+    character(len=maxCGNSNameLen), allocatable, dimension(:) :: zoneNames
+    character(len=maxCGNSNameLen), allocatable, dimension(:) :: varNames
+
+    ! rangeMin(3):    Lower index in i, j and k direction of the
+    !                 range to be read.
+    ! rangeMax(3):    Upper index in i, j and k direction of the
+    !                 range to be read.
+    integer(kind=cgsize_t), dimension(3) :: rangeMin, rangeMax
+
+    ! nVar:     Number of variables stored in the solution file.
+    ! solID:    Loop variables for the number of solutions to be read.
+
+    integer :: nVar
+    integer(kind=intType) :: solID
+
+    ! interpolSpectral:     Whether or not to interpolate the
+    !                       coordinates/solutions for the time
+    !                       spectral mode.
+    ! copySpectral:         Whether or not to copy the solutions
+    !                       for the time spectral mode.
+    logical :: interpolSpectral, copySpectral
+
+    ! rhoScale: Scale factor for the density.
+    ! velScale: Scale factor for the velocity.
+    ! pScale:   Scale factor for the pressure.
+    ! muScale:  Scale factor for the molecular viscosity.
+
+    real(kind=realType) :: rhoScale, velScale, pScale, muScale
+
+    ! nSolsRead:            Number of solution files to read.
+    ! solFiles(nSolsRead):  Names of the solution files to be read.
+
+    integer(kind=intType) :: nSolsRead
+    character(len=maxStringLen), dimension(:), allocatable :: solFiles
+
+    ! varTypes(nVar): Variable types of the variables stored.
+    integer, allocatable, dimension(:) :: varTypes
+
+    ! buffer(2:il,2:jl,2:kl): Buffer to read and store the cell
+    !                         centered values.
+    ! bufferVertex(:):        Additional buffer needed to read
+    !                         vertex data and transform them into
+    !                         cell centered values.
+
+    real(kind=cgnsRealType), dimension(:, :, :), allocatable :: buffer
+    real(kind=cgnsRealType), dimension(:, :, :), allocatable :: bufferVertex
 
 contains
-  subroutine readDensity(nTypeMismatch)
-    !
-    !       readDensity reads the density from the given place in the
-    !       cgns file. If the density itself is not stored (unlikely),
-    !       then it is tried to construct the density from other
-    !       variables. If this is not possible an error message is printed
-    !       and the program will stop.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : w, nbklocal
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables
-    !
-    integer :: realTypeCGNS
-
-    integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    ! Set the cell range to be copied from the buffer.
-
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
-
-    ! Set the cgns real type and abbreviate the solution variable and
-    ! the pointer offset to improve readability.
-
-    realTypeCGNS = setCGNSRealType()
-
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
-
-    ! Find out if the density is present in the solution file.
-
-    nn = bsearchStrings(cgnsDensity, varNames)
-    if(nn > 0) then
-
-       ! Density is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
-
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
-
-       ! Read the density from the restart file and store it in buffer.
-
-       call readRestartVariable(varNames(nn))
-
-       ! Copy the variables from buffer into w. Multiply by the
-       ! scaling factor to obtain to correct nondimensional value and
-       ! take the possible pointer offset into account.
-
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,irho) = buffer(i,j,k)*rhoScale
-             enddo
-          enddo
-       enddo
-
-       ! Density is read, so a return can be made.
-
-       return
-
-    endif
+    subroutine readDensity(nTypeMismatch)
+        !
+        !       readDensity reads the density from the given place in the
+        !       cgns file. If the density itself is not stored (unlikely),
+        !       then it is tried to construct the density from other
+        !       variables. If this is not possible an error message is printed
+        !       and the program will stop.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: w, nbklocal
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables
+        !
+        integer :: realTypeCGNS
+
+        integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        ! Set the cell range to be copied from the buffer.
+
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
+
+        ! Set the cgns real type and abbreviate the solution variable and
+        ! the pointer offset to improve readability.
+
+        realTypeCGNS = setCGNSRealType()
+
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
+
+        ! Find out if the density is present in the solution file.
+
+        nn = bsearchStrings(cgnsDensity, varNames)
+        if (nn > 0) then
+
+            ! Density is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
+
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
+
+            ! Read the density from the restart file and store it in buffer.
+
+            call readRestartVariable(varNames(nn))
+
+            ! Copy the variables from buffer into w. Multiply by the
+            ! scaling factor to obtain to correct nondimensional value and
+            ! take the possible pointer offset into account.
+
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, irho) = buffer(i, j, k) * rhoScale
+                    end do
+                end do
+            end do
+
+            ! Density is read, so a return can be made.
 
-    ! Not able to determine the density.
-    ! Print an error message and exit.
+            return
 
-    call terminate("readDensity", &
-         "Not able to retrieve density from the &
-         &variables in the restart file.")
+        end if
 
-  end subroutine readDensity
+        ! Not able to determine the density.
+        ! Print an error message and exit.
 
-  subroutine readEnergy(nTypeMismatch)
-    !
-    !       readEnergy reads the energy variable from the given place in
-    !       the cgns file. If the energy is not stored then it is tried to
-    !       construct it from the pressure, density and velocities. If it
-    !       is not possible to create the energy an error message is
-    !       printed and the program will stop. It is assumed that the
-    !       pointers in blockPointers already point to the correct block.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : w, nbklocal
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
-    use flowUtils, only : eTot
-    use flowVarRefState, only : kPresent
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables
-    !
-    integer :: realTypeCGNS
+        call terminate("readDensity", &
+             "Not able to retrieve density from the &
+             &variables in the restart file.")
 
-    integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+    end subroutine readDensity
 
-    real(kind=realType) :: vvx, vvy, vvz, dummyK, pres, rhoInv
+    subroutine readEnergy(nTypeMismatch)
+        !
+        !       readEnergy reads the energy variable from the given place in
+        !       the cgns file. If the energy is not stored then it is tried to
+        !       construct it from the pressure, density and velocities. If it
+        !       is not possible to create the energy an error message is
+        !       printed and the program will stop. It is assumed that the
+        !       pointers in blockPointers already point to the correct block.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: w, nbklocal
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
+        use flowUtils, only: eTot
+        use flowVarRefState, only: kPresent
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables
+        !
+        integer :: realTypeCGNS
 
-    ! Set the cell range to be copied from the buffer.
+        integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
 
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
+        real(kind=realType) :: vvx, vvy, vvz, dummyK, pres, rhoInv
 
-    ! Set the cgns real type and abbreviate the solution variable and
-    ! the pointer offset to improve readability.
+        ! Set the cell range to be copied from the buffer.
 
-    realTypeCGNS = setCGNSRealType()
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
 
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
+        ! Set the cgns real type and abbreviate the solution variable and
+        ! the pointer offset to improve readability.
 
-    ! Find out if the total energy is present in the solution file.
+        realTypeCGNS = setCGNSRealType()
 
-    nn = bsearchStrings(cgnsEnergy, varNames)
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
 
-    testRhoEPresent: if(nn > 0) then
+        ! Find out if the total energy is present in the solution file.
 
-       ! Total energy is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        nn = bsearchStrings(cgnsEnergy, varNames)
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        testRhoEPresent: if (nn > 0) then
 
-       ! Read the energy from the restart file and store it in buffer.
+            ! Total energy is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       call readRestartVariable(varNames(nn))
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Copy the variables from buffer into w. Multiply by the scaling
-       ! factor to obtain to correct non-dimensional value and take the
-       ! possible pointer offset into account.
+            ! Read the energy from the restart file and store it in buffer.
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,irhoE) = buffer(i,j,k)*pScale
-             enddo
-          enddo
-       enddo
-
-       ! Energy has been read, so a return can be made.
-
-       return
-
-    endif testRhoEPresent
-
-    ! Total energy is not present. Check for the pressure.
-
-    nn = bsearchStrings(cgnsPressure, varNames)
-
-    testPressure: if(nn > 0) then
-
-       ! Pressure is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
-
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
-
-       ! Read the pressure from the restart file and store it in buffer.
-
-       call readRestartVariable(varNames(nn))
-
-       ! Compute the total energy. This depends whether or not
-       ! a turbulent kinetic energy is present. Take the possible
-       ! pointer offset into account.
-       ! As this routine is only called to construct the states in
-       ! the past for a time accurate computation, the momentum is
-       ! stored and not the velocity.
-
-       if( kPresent ) then
-
-          do k=kBeg,kEnd
-             kp = k+po
-             do j=jBeg,jEnd
-                jp = j+po
-                do i=iBeg,iEnd
-                   ip = i+po
-                   rhoInv = one/w(ip,jp,kp,irho)
-                   vvx     = w(ip,jp,kp,imx)*rhoInv
-                   vvy     = w(ip,jp,kp,imy)*rhoInv
-                   vvz     = w(ip,jp,kp,imz)*rhoInv
-                   pres   = buffer(i,j,k)*pScale
-                   call etot(w(ip,jp,kp,irho), vvx, vvy, vvz, pres,  &
-                        w(ip,jp,kp,itu1), w(ip,jp,kp,irhoE), &
-                        kPresent)
-                enddo
-             enddo
-          enddo
+            call readRestartVariable(varNames(nn))
 
-       else
+            ! Copy the variables from buffer into w. Multiply by the scaling
+            ! factor to obtain to correct non-dimensional value and take the
+            ! possible pointer offset into account.
 
-          dummyK = zero
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, irhoE) = buffer(i, j, k) * pScale
+                    end do
+                end do
+            end do
+
+            ! Energy has been read, so a return can be made.
+
+            return
+
+        end if testRhoEPresent
+
+        ! Total energy is not present. Check for the pressure.
+
+        nn = bsearchStrings(cgnsPressure, varNames)
+
+        testPressure: if (nn > 0) then
+
+            ! Pressure is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
+
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
+
+            ! Read the pressure from the restart file and store it in buffer.
+
+            call readRestartVariable(varNames(nn))
+
+            ! Compute the total energy. This depends whether or not
+            ! a turbulent kinetic energy is present. Take the possible
+            ! pointer offset into account.
+            ! As this routine is only called to construct the states in
+            ! the past for a time accurate computation, the momentum is
+            ! stored and not the velocity.
+
+            if (kPresent) then
+
+                do k = kBeg, kEnd
+                    kp = k + po
+                    do j = jBeg, jEnd
+                        jp = j + po
+                        do i = iBeg, iEnd
+                            ip = i + po
+                            rhoInv = one / w(ip, jp, kp, irho)
+                            vvx = w(ip, jp, kp, imx) * rhoInv
+                            vvy = w(ip, jp, kp, imy) * rhoInv
+                            vvz = w(ip, jp, kp, imz) * rhoInv
+                            pres = buffer(i, j, k) * pScale
+                            call etot(w(ip, jp, kp, irho), vvx, vvy, vvz, pres, &
+                                      w(ip, jp, kp, itu1), w(ip, jp, kp, irhoE), &
+                                      kPresent)
+                        end do
+                    end do
+                end do
 
-          do k=kBeg,kEnd
-             kp = k+po
-             do j=jBeg,jEnd
-                jp = j+po
-                do i=iBeg,iEnd
-                   ip = i+po
-                   rhoInv = one/w(ip,jp,kp,irho)
-                   vvx     = w(ip,jp,kp,imx)*rhoInv
-                   vvy     = w(ip,jp,kp,imy)*rhoInv
-                   vvz     = w(ip,jp,kp,imz)*rhoInv
-                   pres   = buffer(i,j,k)*pScale
-                   call etot(w(ip,jp,kp,irho), vvx, vvy, vvz, pres, &
-                        dummyK, w(ip,jp,kp,irhoE), kPresent)
-                enddo
-             enddo
-          enddo
+            else
 
-       endif
+                dummyK = zero
 
-       ! Energy has been created. So a return can be made.
+                do k = kBeg, kEnd
+                    kp = k + po
+                    do j = jBeg, jEnd
+                        jp = j + po
+                        do i = iBeg, iEnd
+                            ip = i + po
+                            rhoInv = one / w(ip, jp, kp, irho)
+                            vvx = w(ip, jp, kp, imx) * rhoInv
+                            vvy = w(ip, jp, kp, imy) * rhoInv
+                            vvz = w(ip, jp, kp, imz) * rhoInv
+                            pres = buffer(i, j, k) * pScale
+                            call etot(w(ip, jp, kp, irho), vvx, vvy, vvz, pres, &
+                                      dummyK, w(ip, jp, kp, irhoE), kPresent)
+                        end do
+                    end do
+                end do
 
-       return
-
-    endif testPressure
+            end if
 
-    ! Energy could not be created. Terminate.
+            ! Energy has been created. So a return can be made.
 
-    call terminate("readEnergy", &
-         "Energy could not be created")
+            return
+
+        end if testPressure
 
-  end subroutine readEnergy
+        ! Energy could not be created. Terminate.
 
-  subroutine readPressure(nTypeMismatch)
-    !
-    !       readPressure reads the pressure variable from the given place
-    !       in the cgns file. If the pressure itself is not present it is
-    !       tried to construct if from other variables. In that case it is
-    !       assumed that the density, velocity and turbulent variables are
-    !       already stored in the pointer variable w.
-    !       If it is not possible to create the pressure an error message
-    !       is printed and the program will stop.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : w, nbklocal
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use flowUtils, only : computePressure
-    use sorting, only : bsearchStrings
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables
-    !
-    integer :: realTypeCGNS
-
-    integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    ! Set the cell range to be copied from the buffer.
-
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
-
-    ! Set the cgns real type and abbreviate the solution variable and
-    ! the pointer offset to improve readability.
-
-    realTypeCGNS = setCGNSRealType()
-
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
-
-    ! Find out if the pressure is present in the solution file.
-
-    nn = bsearchStrings(cgnsPressure, varNames)
-    if(nn > 0) then
-
-       ! Pressure is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
-
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
-
-       ! Read the pressure from the restart file and store
-       ! it in buffer.
-
-       call readRestartVariable(varNames(nn))
-
-       ! Copy the variables from buffer into the position of rhoE
-       ! in w. Multiply by the pressure scale factor to obtain the
-       ! correct nondimensional value and take the possible pointer
-       ! offset into account.
-
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,irhoE) = buffer(i,j,k)*pScale
-             enddo
-          enddo
-       enddo
+        call terminate("readEnergy", &
+                       "Energy could not be created")
 
-       ! Pressure is read, so a return can be made.
+    end subroutine readEnergy
 
-       return
+    subroutine readPressure(nTypeMismatch)
+        !
+        !       readPressure reads the pressure variable from the given place
+        !       in the cgns file. If the pressure itself is not present it is
+        !       tried to construct if from other variables. In that case it is
+        !       assumed that the density, velocity and turbulent variables are
+        !       already stored in the pointer variable w.
+        !       If it is not possible to create the pressure an error message
+        !       is printed and the program will stop.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: w, nbklocal
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use flowUtils, only: computePressure
+        use sorting, only: bsearchStrings
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables
+        !
+        integer :: realTypeCGNS
+
+        integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        ! Set the cell range to be copied from the buffer.
+
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
+
+        ! Set the cgns real type and abbreviate the solution variable and
+        ! the pointer offset to improve readability.
+
+        realTypeCGNS = setCGNSRealType()
+
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
+
+        ! Find out if the pressure is present in the solution file.
+
+        nn = bsearchStrings(cgnsPressure, varNames)
+        if (nn > 0) then
+
+            ! Pressure is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
+
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
+
+            ! Read the pressure from the restart file and store
+            ! it in buffer.
+
+            call readRestartVariable(varNames(nn))
+
+            ! Copy the variables from buffer into the position of rhoE
+            ! in w. Multiply by the pressure scale factor to obtain the
+            ! correct nondimensional value and take the possible pointer
+            ! offset into account.
+
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, irhoE) = buffer(i, j, k) * pScale
+                    end do
+                end do
+            end do
 
-    endif
+            ! Pressure is read, so a return can be made.
 
-    ! Pressure is not present. Check for the total energy.
+            return
 
-    nn = bsearchStrings(cgnsEnergy, varNames)
-    if(nn > 0) then
+        end if
 
-       ! Total energy is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        ! Pressure is not present. Check for the total energy.
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        nn = bsearchStrings(cgnsEnergy, varNames)
+        if (nn > 0) then
 
-       ! Read the total energy from the restart file and store
-       ! it in buffer.
+            ! Total energy is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       call readRestartVariable(varNames(nn))
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Copy the variables from buffer into w. Multiply by the
-       ! pressure scale factor to obtain the correct nondimensional
-       ! value and take the possible pointer offset into account.
+            ! Read the total energy from the restart file and store
+            ! it in buffer.
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,irhoE) = buffer(i,j,k)*pScale
-             enddo
-          enddo
-       enddo
+            call readRestartVariable(varNames(nn))
 
-       ! Compute the pressure from energy, density and velocities.
-       ! This will still be stored in the irhoE position of w.
+            ! Copy the variables from buffer into w. Multiply by the
+            ! pressure scale factor to obtain the correct nondimensional
+            ! value and take the possible pointer offset into account.
 
-       call computePressure(iBeg,iEnd,jBeg,jEnd,kBeg,kEnd,po)
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, irhoE) = buffer(i, j, k) * pScale
+                    end do
+                end do
+            end do
 
-       ! Pressure is constructed, so a return can be made.
+            ! Compute the pressure from energy, density and velocities.
+            ! This will still be stored in the irhoE position of w.
 
-       return
+            call computePressure(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd, po)
 
-    endif
+            ! Pressure is constructed, so a return can be made.
 
-    ! Not able to determine the pressure.
-    ! Print an error message and exit.
+            return
 
-    call terminate("readPressure", &
-         "Not able to retrieve the pressure from &
-         &the variables in the restart file.")
+        end if
 
-  end subroutine readPressure
+        ! Not able to determine the pressure.
+        ! Print an error message and exit.
 
-  subroutine readTurbEddyVis(nTypeMismatch, eddyVisPresent)
-    !
-    !       readTurbEddyVis tries to read the eddy viscosity from the
-    !       restart file.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : w, nbklocal, il, jl, kl, rev
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
-    use flowVarRefState, only : muInf
-    use inputPhysics, only : eddyVisInfRatio
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    logical, intent(out)                 :: eddyVisPresent
-    !
-    !      Local variables.
-    !
-    integer :: realTypeCGNS
+        call terminate("readPressure", &
+             "Not able to retrieve the pressure from &
+             &the variables in the restart file.")
 
-    integer(kind=intType) :: i, j, k, nn
+    end subroutine readPressure
 
-    ! Set the cgns real type
+    subroutine readTurbEddyVis(nTypeMismatch, eddyVisPresent)
+        !
+        !       readTurbEddyVis tries to read the eddy viscosity from the
+        !       restart file.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: w, nbklocal, il, jl, kl, rev
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
+        use flowVarRefState, only: muInf
+        use inputPhysics, only: eddyVisInfRatio
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        logical, intent(out) :: eddyVisPresent
+        !
+        !      Local variables.
+        !
+        integer :: realTypeCGNS
 
-    realTypeCGNS = setCGNSRealType()
+        integer(kind=intType) :: i, j, k, nn
 
-    ! Check if the eddy viscosity is present. If so, read it.
+        ! Set the cgns real type
 
-    nn = bsearchStrings(cgnsEddy, varNames)
+        realTypeCGNS = setCGNSRealType()
 
-    if(nn > 0) then
+        ! Check if the eddy viscosity is present. If so, read it.
 
-       ! Eddy viscosity is present. Determine if a type mismatch
-       ! occured, read the buffer from the file and set
-       ! eddyVisPresent to .true.
+        nn = bsearchStrings(cgnsEddy, varNames)
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        if (nn > 0) then
 
-       call readRestartVariable(varNames(nn))
+            ! Eddy viscosity is present. Determine if a type mismatch
+            ! occured, read the buffer from the file and set
+            ! eddyVisPresent to .true.
 
-       eddyVisPresent = .true.
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Scale the eddy viscosity such that it contains the
-       ! correct nonDimensional value.
+            call readRestartVariable(varNames(nn))
 
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-                rev(i,j,k) = muScale*buffer(i,j,k)
-             enddo
-          enddo
-       enddo
+            eddyVisPresent = .true.
 
-       ! Eddy viscosity has been read, so make a return.
+            ! Scale the eddy viscosity such that it contains the
+            ! correct nonDimensional value.
 
-       return
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        rev(i, j, k) = muScale * buffer(i, j, k)
+                    end do
+                end do
+            end do
 
-    endif
+            ! Eddy viscosity has been read, so make a return.
 
-    ! Eddy viscosity is not present. Check if the eddy viscosity
-    ! ratio is present. If so read it and construct the eddy
-    ! viscosity from it.
+            return
 
-    nn = bsearchStrings(cgnsEddyRatio, varNames)
+        end if
 
-    if(nn > 0) then
+        ! Eddy viscosity is not present. Check if the eddy viscosity
+        ! ratio is present. If so read it and construct the eddy
+        ! viscosity from it.
 
-       ! Eddy viscosity ratio is present. Determine if a type
-       ! mismatch occured, read the buffer from the file and set
-       ! eddyVisPresent to .true.
+        nn = bsearchStrings(cgnsEddyRatio, varNames)
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        if (nn > 0) then
 
-       call readRestartVariable(varNames(nn))
+            ! Eddy viscosity ratio is present. Determine if a type
+            ! mismatch occured, read the buffer from the file and set
+            ! eddyVisPresent to .true.
 
-       eddyVisPresent = .true.
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Multiply the eddy viscosity by the laminar viscosity such
-       ! that it contains the correct nonDimensional value.
-       ! As the laminar viscosity is not yet know, use the free
-       ! stream viscosity.
+            call readRestartVariable(varNames(nn))
 
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-                rev(i,j,k) = muInf*buffer(i,j,k)
-             enddo
-          enddo
-       enddo
+            eddyVisPresent = .true.
 
-       ! Eddy viscosity has been read, so make a return.
+            ! Multiply the eddy viscosity by the laminar viscosity such
+            ! that it contains the correct nonDimensional value.
+            ! As the laminar viscosity is not yet know, use the free
+            ! stream viscosity.
 
-       return
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        rev(i, j, k) = muInf * buffer(i, j, k)
+                    end do
+                end do
+            end do
 
-    endif
+            ! Eddy viscosity has been read, so make a return.
 
-    ! Eddy viscosity cannot be constructed. Set it to the
-    ! free stream eddy viscosity.
+            return
 
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             rev(i,j,k) = muInf*eddyVisInfRatio
-          enddo
-       enddo
-    enddo
+        end if
 
-    ! Eddy viscosity is not present, so set it to .false.
+        ! Eddy viscosity cannot be constructed. Set it to the
+        ! free stream eddy viscosity.
 
-    eddyVisPresent = .false.
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    rev(i, j, k) = muInf * eddyVisInfRatio
+                end do
+            end do
+        end do
 
-  end subroutine readTurbEddyVis
+        ! Eddy viscosity is not present, so set it to .false.
 
-  subroutine readTurbKwType(nTypeMismatch)
-    !
-    !       readTurbKwType reads or constructs the k and omega values
-    !       for two-equations turbulence models of the k-omega type.
-    !       If no information could be retrieved some engineering guess of
-    !       the turbulent variables is made.
-    !
-    use constants
-    use cgnsNames
-    use communication, only : myid
-    use blockPointers, only : w, nbklocal, il, jl, kl, rlv, rev
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
-    use flowVarRefState, only : muInf
-    use inputPhysics, only : turbModel
-    use turbUtils, only : initKOmega
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables.
-    !
-    integer :: realTypeCGNS
+        eddyVisPresent = .false.
 
-    integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+    end subroutine readTurbEddyVis
 
-    real(kind=realType) :: nuScale, kScale, omegaScale, val
+    subroutine readTurbKwType(nTypeMismatch)
+        !
+        !       readTurbKwType reads or constructs the k and omega values
+        !       for two-equations turbulence models of the k-omega type.
+        !       If no information could be retrieved some engineering guess of
+        !       the turbulent variables is made.
+        !
+        use constants
+        use cgnsNames
+        use communication, only: myid
+        use blockPointers, only: w, nbklocal, il, jl, kl, rlv, rev
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
+        use flowVarRefState, only: muInf
+        use inputPhysics, only: turbModel
+        use turbUtils, only: initKOmega
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables.
+        !
+        integer :: realTypeCGNS
 
-    logical :: turbKPresent, omegaPresent, eddyVisPresent
+        integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
 
-    ! Set the cell range to be copied from the buffer.
+        real(kind=realType) :: nuScale, kScale, omegaScale, val
 
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
+        logical :: turbKPresent, omegaPresent, eddyVisPresent
 
-    ! Set the cgns real type and abbreviate the solution variable and
-    ! the pointer offset to improve readability.
+        ! Set the cell range to be copied from the buffer.
 
-    realTypeCGNS = setCGNSRealType()
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
 
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
+        ! Set the cgns real type and abbreviate the solution variable and
+        ! the pointer offset to improve readability.
 
-    ! Compute the scales for nu, k and omega.
+        realTypeCGNS = setCGNSRealType()
 
-    nuScale    = muScale/rhoScale
-    kScale     = velScale**2
-    omegaScale = kScale/nuScale
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
 
-    ! First check if k is present.
+        ! Compute the scales for nu, k and omega.
 
-    turbKPresent = .false.
+        nuScale = muScale / rhoScale
+        kScale = velScale**2
+        omegaScale = kScale / nuScale
 
-    nn = bsearchStrings(cgnsTurbK, varNames)
+        ! First check if k is present.
 
-    if(nn > 0) then
+        turbKPresent = .false.
 
-       ! K is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        nn = bsearchStrings(cgnsTurbK, varNames)
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        if (nn > 0) then
 
-       ! Read k from the restart file and store it in buffer.
+            ! K is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       call readRestartVariable(varNames(nn))
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Copy the variables from buffer into w. Multiply by the scale
-       ! factor to obtain the correct non-dimensional value and take
-       ! the possible pointer offset into account.
+            ! Read k from the restart file and store it in buffer.
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,itu1) = kScale*buffer(i,j,k)
-             enddo
-          enddo
-       enddo
+            call readRestartVariable(varNames(nn))
 
-       ! Set turbKPresent to .true.
+            ! Copy the variables from buffer into w. Multiply by the scale
+            ! factor to obtain the correct non-dimensional value and take
+            ! the possible pointer offset into account.
 
-       turbKPresent = .true.
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, itu1) = kScale * buffer(i, j, k)
+                    end do
+                end do
+            end do
 
-    endif
+            ! Set turbKPresent to .true.
 
-    ! Check if omega is present.
+            turbKPresent = .true.
 
-    omegaPresent = .false.
+        end if
 
-    nn = bsearchStrings(cgnsTurbOmega, varNames)
+        ! Check if omega is present.
 
-    if(nn > 0) then
+        omegaPresent = .false.
 
-       ! Omega is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        nn = bsearchStrings(cgnsTurbOmega, varNames)
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        if (nn > 0) then
 
-       ! Read omega from the restart file and store it in buffer.
+            ! Omega is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       call readRestartVariable(varNames(nn))
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Copy the variables from buffer into w. Multiply by the scale
-       ! factor to obtain the correct non-dimensional value and take
-       ! the possible pointer offset into account.
+            ! Read omega from the restart file and store it in buffer.
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,itu2) = omegaScale*buffer(i,j,k)
-             enddo
-          enddo
-       enddo
+            call readRestartVariable(varNames(nn))
 
-       ! Set omegaPresent to .true.
+            ! Copy the variables from buffer into w. Multiply by the scale
+            ! factor to obtain the correct non-dimensional value and take
+            ! the possible pointer offset into account.
 
-       omegaPresent = .true.
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, itu2) = omegaScale * buffer(i, j, k)
+                    end do
+                end do
+            end do
 
-    endif
+            ! Set omegaPresent to .true.
 
-    ! If omega is not present, check if tau is present and
-    ! initialize omega accordingly.
+            omegaPresent = .true.
 
-    if(.not. omegaPresent) then
+        end if
 
-       nn = bsearchStrings(cgnsTurbTau, varNames)
+        ! If omega is not present, check if tau is present and
+        ! initialize omega accordingly.
 
-       if(nn > 0) then
+        if (.not. omegaPresent) then
 
-          ! Tau is present. First determine whether or not a type
-          ! mismatch occurs. If so, update nTypeMismatch.
+            nn = bsearchStrings(cgnsTurbTau, varNames)
 
-          if(realTypeCGNS /= varTypes(nn)) &
-               nTypeMismatch = nTypeMismatch + 1
+            if (nn > 0) then
 
-          ! Read tau from the restart file and store it in buffer.
+                ! Tau is present. First determine whether or not a type
+                ! mismatch occurs. If so, update nTypeMismatch.
 
-          call readRestartVariable(varNames(nn))
+                if (realTypeCGNS /= varTypes(nn)) &
+                    nTypeMismatch = nTypeMismatch + 1
 
-          ! Transform tau to omega and copy the variables from buffer
-          ! into w. Multiply by the scale factor to obtain the correct
-          ! non-dimensional value and take the possible pointer offset
-          ! into account.
+                ! Read tau from the restart file and store it in buffer.
 
-          do k=kBeg,kEnd
-             kp = k+po
-             do j=jBeg,jEnd
-                jp = j+po
-                do i=iBeg,iEnd
-                   ip = i+po
+                call readRestartVariable(varNames(nn))
 
-                   val = buffer(i,j,k)
-                   w(ip,jp,kp,itu2) = omegaScale/max(eps,val)
-                enddo
-             enddo
-          enddo
+                ! Transform tau to omega and copy the variables from buffer
+                ! into w. Multiply by the scale factor to obtain the correct
+                ! non-dimensional value and take the possible pointer offset
+                ! into account.
 
-          ! Set omegaPresent to .true.
+                do k = kBeg, kEnd
+                    kp = k + po
+                    do j = jBeg, jEnd
+                        jp = j + po
+                        do i = iBeg, iEnd
+                            ip = i + po
 
-          omegaPresent = .true.
+                            val = buffer(i, j, k)
+                            w(ip, jp, kp, itu2) = omegaScale / max(eps, val)
+                        end do
+                    end do
+                end do
 
-       endif
+                ! Set omegaPresent to .true.
 
-    endif
+                omegaPresent = .true.
 
-    ! Check if both variables were present.
-    ! If so go to the check to transform omega to tau.
+            end if
 
-    if(turbKPresent .and. omegaPresent) goto 1001
+        end if
 
-    ! K and omega are not both present. It is tried to construct
-    ! their values with the information that is present.
+        ! Check if both variables were present.
+        ! If so go to the check to transform omega to tau.
 
-    ! Try to read the eddy viscosity.
+        if (turbKPresent .and. omegaPresent) goto 1001
 
-    call readTurbEddyVis(nTypeMismatch, eddyVisPresent)
+        ! K and omega are not both present. It is tried to construct
+        ! their values with the information that is present.
 
-    ! The eddy viscosity is either known or still initialized
-    ! to the free stream value. In any case determine the
-    ! situation we are dealing with and try to initialize k and
-    ! omega accordingly.
+        ! Try to read the eddy viscosity.
 
-    if( turbKPresent ) then
+        call readTurbEddyVis(nTypeMismatch, eddyVisPresent)
 
-       ! K is present. Compute omega using the eddy viscosity.
-       ! Assume that the standard k-omega formula is also valid
-       ! for the SST-model.
-       ! Take the possible pointer offset into account.
+        ! The eddy viscosity is either known or still initialized
+        ! to the free stream value. In any case determine the
+        ! situation we are dealing with and try to initialize k and
+        ! omega accordingly.
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,itu2) = w(ip,jp,kp,irho)*w(ip,jp,kp,itu1) &
-                     / rev(i,j,k)
-             enddo
-          enddo
-       enddo
+        if (turbKPresent) then
 
-       ! Print a warning that omega was not present and has been
-       ! constructed. Only processor 0 does this for block 1.
+            ! K is present. Compute omega using the eddy viscosity.
+            ! Assume that the standard k-omega formula is also valid
+            ! for the SST-model.
+            ! Take the possible pointer offset into account.
 
-       if((myID == 0) .and. (nbkLocal == 1)) then
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, itu2) = w(ip, jp, kp, irho) * w(ip, jp, kp, itu1) &
+                                              / rev(i, j, k)
+                    end do
+                end do
+            end do
 
-          print "(a)", "#"
-          print "(a)", "#                 Warning"
-          print "(a)", "# Omega is not present in the restart file."
-          if( eddyVisPresent ) then
-             print "(a)", "# It is initialized using the turbulent &
-                  &kinetic energy and eddy viscosity."
-          else
-             print "(a)", "# It is initialized using the turbulent &
-                  &kinetic energy and free stream eddy &
-                  &viscosity."
-          endif
-          print "(a)", "#"
+            ! Print a warning that omega was not present and has been
+            ! constructed. Only processor 0 does this for block 1.
 
-       endif
+            if ((myID == 0) .and. (nbkLocal == 1)) then
 
-       ! K and omega are initialized.
-       ! Go to the check to transform omega to tau.
+                print "(a)", "#"
+                print "(a)", "#                 Warning"
+                print "(a)", "# Omega is not present in the restart file."
+                if (eddyVisPresent) then
+                    print "(a)", "# It is initialized using the turbulent &
+                         &kinetic energy and eddy viscosity."
+                else
+                    print "(a)", "# It is initialized using the turbulent &
+                         &kinetic energy and free stream eddy &
+                         &viscosity."
+                end if
+                print "(a)", "#"
 
-       goto 1001
+            end if
 
-    endif
+            ! K and omega are initialized.
+            ! Go to the check to transform omega to tau.
 
-    if( omegaPresent ) then
+            goto 1001
 
-       ! Omega is present. Compute k using the eddy viscosity.
-       ! Assume that the standard k-omega formula is also valid
-       ! for the SST-model.
-       ! Take the possible pointer offset into account.
-
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,itu1) = rev(i,j,k)*w(ip,jp,kp,itu2) &
-                     / w(ip,jp,kp,irho)
-             enddo
-          enddo
-       enddo
-
-       ! Print a warning that k was not present and has been
-       ! constructed. Only processor 0 does this for block 1.
-
-       if((myID == 0) .and. (nbkLocal == 1)) then
-
-          print "(a)", "#"
-          print "(a)", "#                 Warning"
-          print "(a)", "# Turbulent kinetic energy is not present &
-               &in the restart file."
-          if( eddyVisPresent ) then
-             print "(a)", "# It is initialized using omega and &
-                  &the eddy viscosity."
-          else
-             print "(a)", "# It is initialized using omega and &
-                  &the free stream eddy viscosity."
-          endif
-          print "(a)", "#"
-
-       endif
-
-       ! K and omega are initialized.
-       ! Go to the check to transform omega to tau.
-
-       goto 1001
-
-    endif
-
-    ! Both k and omega are not present. Use a guess for omega
-    ! and compute k using the known value of the eddy viscosity.
-    ! As the laminar viscosity is not yet known, set it to the
-    ! free-stream value.
-
-    rlv = muInf
-    call initKOmega(po)
-
-    ! Print a warning that both k and omega are not present in
-    ! the restart file. Only processor 0 does this for block 1.
-
-    if((myID == 0) .and. (nbkLocal == 1)) then
-
-       print "(a)", "#"
-       print "(a)", "#                 Warning"
-       print "(a)", "# The turbulent kinetic energy and omega are &
-            &not present in the restart file."
-       if( eddyVisPresent ) then
-          print "(a)", "# They have been initialized using the &
-               &eddy viscosity."
-       else
-          print "(a)", "# The default initialization has been used."
-       endif
-
-       print "(a)", "#"
-
-    endif
-
-    ! For the k-tau model omega must be transformed to tau.
-    ! Take the possible pointer offset into account.
-
-1001 select case (turbModel)
-
-    case (ktau)
-
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,itu2) = one/w(ip,jp,kp,itu2)
-             enddo
-          enddo
-       enddo
+        end if
 
-    end select
+        if (omegaPresent) then
 
-  end subroutine readTurbKwType
+            ! Omega is present. Compute k using the eddy viscosity.
+            ! Assume that the standard k-omega formula is also valid
+            ! for the SST-model.
+            ! Take the possible pointer offset into account.
+
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, itu1) = rev(i, j, k) * w(ip, jp, kp, itu2) &
+                                              / w(ip, jp, kp, irho)
+                    end do
+                end do
+            end do
+
+            ! Print a warning that k was not present and has been
+            ! constructed. Only processor 0 does this for block 1.
+
+            if ((myID == 0) .and. (nbkLocal == 1)) then
+
+                print "(a)", "#"
+                print "(a)", "#                 Warning"
+                print "(a)", "# Turbulent kinetic energy is not present &
+                     &in the restart file."
+                if (eddyVisPresent) then
+                    print "(a)", "# It is initialized using omega and &
+                         &the eddy viscosity."
+                else
+                    print "(a)", "# It is initialized using omega and &
+                         &the free stream eddy viscosity."
+                end if
+                print "(a)", "#"
+
+            end if
+
+            ! K and omega are initialized.
+            ! Go to the check to transform omega to tau.
+
+            goto 1001
+
+        end if
+
+        ! Both k and omega are not present. Use a guess for omega
+        ! and compute k using the known value of the eddy viscosity.
+        ! As the laminar viscosity is not yet known, set it to the
+        ! free-stream value.
+
+        rlv = muInf
+        call initKOmega(po)
+
+        ! Print a warning that both k and omega are not present in
+        ! the restart file. Only processor 0 does this for block 1.
+
+        if ((myID == 0) .and. (nbkLocal == 1)) then
+
+            print "(a)", "#"
+            print "(a)", "#                 Warning"
+            print "(a)", "# The turbulent kinetic energy and omega are &
+                 &not present in the restart file."
+            if (eddyVisPresent) then
+                print "(a)", "# They have been initialized using the &
+                     &eddy viscosity."
+            else
+                print "(a)", "# The default initialization has been used."
+            end if
+
+            print "(a)", "#"
+
+        end if
+
+        ! For the k-tau model omega must be transformed to tau.
+        ! Take the possible pointer offset into account.
+
+1001    select case (turbModel)
+
+        case (ktau)
+
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, itu2) = one / w(ip, jp, kp, itu2)
+                    end do
+                end do
+            end do
 
-  subroutine readTurbSA(nTypeMismatch)
-    !
-    !       readTurbSA reads or constructs the nu tilde transport
-    !       variable for the Spalart-Allmaras type turbulence models.
-    !       If no information could be retrieved some engineering guess of
-    !       the turbulent variables is made.
-    !
-    use constants
-    use cgnsNames
-    use communication, only : myid
-    use blockPointers, only : w, nbklocal, rlv, rev
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
-    use flowVarRefState, only : muInf, wInf
-    use turbUtils, only : saNuKnownEddyRatio
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables.
-    !
-    integer :: realTypeCGNS
+        end select
 
-    integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+    end subroutine readTurbKwType
 
-    real(kind=realType) :: nuScale, ratio, nu
+    subroutine readTurbSA(nTypeMismatch)
+        !
+        !       readTurbSA reads or constructs the nu tilde transport
+        !       variable for the Spalart-Allmaras type turbulence models.
+        !       If no information could be retrieved some engineering guess of
+        !       the turbulent variables is made.
+        !
+        use constants
+        use cgnsNames
+        use communication, only: myid
+        use blockPointers, only: w, nbklocal, rlv, rev
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
+        use flowVarRefState, only: muInf, wInf
+        use turbUtils, only: saNuKnownEddyRatio
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables.
+        !
+        integer :: realTypeCGNS
 
-    logical :: eddyVisPresent
+        integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
 
-    ! Set the cell range to be copied from the buffer.
+        real(kind=realType) :: nuScale, ratio, nu
 
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
+        logical :: eddyVisPresent
 
-    ! Set the cgns real type and abbreviate the solution variable and
-    ! the pointer offset to improve readability.
-    ! Also compute the kinematic viscosity scale.
+        ! Set the cell range to be copied from the buffer.
 
-    realTypeCGNS = setCGNSRealType()
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
 
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
+        ! Set the cgns real type and abbreviate the solution variable and
+        ! the pointer offset to improve readability.
+        ! Also compute the kinematic viscosity scale.
 
-    nuScale = muScale/rhoScale
+        realTypeCGNS = setCGNSRealType()
 
-    ! Check if the nu tilde variable is present.
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
 
-    nn = bsearchStrings(cgnsTurbSANu, varNames)
+        nuScale = muScale / rhoScale
 
-    nuTildePresent: if(nn > 0) then
+        ! Check if the nu tilde variable is present.
 
-       ! Nu tilde is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        nn = bsearchStrings(cgnsTurbSANu, varNames)
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        nuTildePresent: if (nn > 0) then
 
-       ! Read nu tilde from the restart file and store
-       ! it in buffer.
+            ! Nu tilde is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       call readRestartVariable(varNames(nn))
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Copy the variables from buffer into w and take the possible
-       ! pointer offset into account.
+            ! Read nu tilde from the restart file and store
+            ! it in buffer.
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,itu1) = nuScale*buffer(i,j,k)
-             enddo
-          enddo
-       enddo
+            call readRestartVariable(varNames(nn))
 
-       ! Variable is read, so a return can be made.
+            ! Copy the variables from buffer into w and take the possible
+            ! pointer offset into account.
 
-       return
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, itu1) = nuScale * buffer(i, j, k)
+                    end do
+                end do
+            end do
 
-    endif nuTildePresent
-
-    ! NuTilde is not present. Try to construct the eddy viscosity.
-
-    call readTurbEddyVis(nTypeMismatch, eddyVisPresent)
-
-    ! Check if the eddy viscosity has been constructed.
-
-    eddyPresent: if( eddyVisPresent ) then
-
-       ! Eddy viscosity is present. As the laminar viscosity is not
-       ! yet known, set it to the free-stream value.
-
-       rlv = muInf
-
-       ! Compute nuTilde from the known ratio of eddy and laminar
-       ! viscosity. Take the possible pointer offset into account.
-
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
+            ! Variable is read, so a return can be made.
 
-                ! Compute the eddy viscosity ratio and the laminar
-                ! kinematic viscosity and call the function to
-                ! compute the nu tilde variable.
+            return
 
-                ratio            = rev(i,j,k)/rlv(i,j,k)
-                nu               = rlv(i,j,k)/w(ip,jp,kp,irho)
-                w(ip,jp,kp,itu1) = saNuKnownEddyRatio(ratio, nu)
+        end if nuTildePresent
+
+        ! NuTilde is not present. Try to construct the eddy viscosity.
+
+        call readTurbEddyVis(nTypeMismatch, eddyVisPresent)
+
+        ! Check if the eddy viscosity has been constructed.
+
+        eddyPresent: if (eddyVisPresent) then
+
+            ! Eddy viscosity is present. As the laminar viscosity is not
+            ! yet known, set it to the free-stream value.
+
+            rlv = muInf
+
+            ! Compute nuTilde from the known ratio of eddy and laminar
+            ! viscosity. Take the possible pointer offset into account.
+
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
 
-             enddo
-          enddo
-       enddo
+                        ! Compute the eddy viscosity ratio and the laminar
+                        ! kinematic viscosity and call the function to
+                        ! compute the nu tilde variable.
 
-       ! Print a warning that nu tilde has been constructed and
-       ! not read. Only processor 0 does this for block 1.
+                        ratio = rev(i, j, k) / rlv(i, j, k)
+                        nu = rlv(i, j, k) / w(ip, jp, kp, irho)
+                        w(ip, jp, kp, itu1) = saNuKnownEddyRatio(ratio, nu)
 
-       if((myID == 0) .and. (nbkLocal == 1)) then
+                    end do
+                end do
+            end do
 
-          print "(a)", "#"
-          print "(a)", "#                 Warning"
-          print "(a)", "# Nu tilde for Spalart-Allmaras model not &
-               &present in the restart file."
-          print "(a)", "# Variable has been reconstructed from &
-               &the eddy viscosity ratio."
-          print "(a)", "#"
+            ! Print a warning that nu tilde has been constructed and
+            ! not read. Only processor 0 does this for block 1.
 
-       endif
+            if ((myID == 0) .and. (nbkLocal == 1)) then
 
-       ! Variable is constructed, so a return can be made.
+                print "(a)", "#"
+                print "(a)", "#                 Warning"
+                print "(a)", "# Nu tilde for Spalart-Allmaras model not &
+                     &present in the restart file."
+                print "(a)", "# Variable has been reconstructed from &
+                     &the eddy viscosity ratio."
+                print "(a)", "#"
 
-       return
+            end if
 
-    endif eddyPresent
+            ! Variable is constructed, so a return can be made.
 
-    ! No turbulence info is present in the restart file.
-    ! Initialize nu tilde to the free stream value.
-    ! Take the possible pointer offset into account.
+            return
 
-    do k=kBeg,kEnd
-       kp = k+po
-       do j=jBeg,jEnd
-          jp = j+po
-          do i=iBeg,iEnd
-             ip = i+po
-             w(ip,jp,kp,itu1) = wInf(itu1)
-          enddo
-       enddo
-    enddo
+        end if eddyPresent
 
-    ! Print a warning that nu tilde has been set to the
-    ! free stream values. Only processor 0 does this for block 1.
+        ! No turbulence info is present in the restart file.
+        ! Initialize nu tilde to the free stream value.
+        ! Take the possible pointer offset into account.
 
-    if((myID == 0) .and. (nbkLocal == 1)) then
+        do k = kBeg, kEnd
+            kp = k + po
+            do j = jBeg, jEnd
+                jp = j + po
+                do i = iBeg, iEnd
+                    ip = i + po
+                    w(ip, jp, kp, itu1) = wInf(itu1)
+                end do
+            end do
+        end do
 
-       print "(a)", "#"
-       print "(a)", "#                 Warning"
-       print "(a)", "# No turbulent info present in the restart file."
-       print "(a)", "# Nu tilde for Spalart-Allmaras model has &
-            &been set to the free stream value."
-       print "(a)", "#"
+        ! Print a warning that nu tilde has been set to the
+        ! free stream values. Only processor 0 does this for block 1.
 
-    endif
+        if ((myID == 0) .and. (nbkLocal == 1)) then
 
-  end subroutine readTurbSA
-  subroutine readTurbV2f(nTypeMismatch)
-    !
-    !       readTurbV2f reads or constructs the four transport variables
-    !       for the v2f model. If no information could be retrieved some
-    !       engineering guess of the turbulent variables is made.
-    !
-    use constants
-    use cgnsNames
-    use communication, only : myid
-    use blockPointers, only : w, nbklocal
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
-    use flowVarRefState, only : wInf, nt1, nt2
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables.
-    !
-    integer :: realTypeCGNS, itu
-    integer, dimension(4) :: indW
+            print "(a)", "#"
+            print "(a)", "#                 Warning"
+            print "(a)", "# No turbulent info present in the restart file."
+            print "(a)", "# Nu tilde for Spalart-Allmaras model has &
+                 &been set to the free stream value."
+            print "(a)", "#"
 
-    integer(kind=intType) :: i, j, k, ii, nn, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    real(kind=realType) :: nuScale, kScale, epsScale, fScale
+        end if
 
-    real(kind=realType), dimension(4) :: turbScale
+    end subroutine readTurbSA
+    subroutine readTurbV2f(nTypeMismatch)
+        !
+        !       readTurbV2f reads or constructs the four transport variables
+        !       for the v2f model. If no information could be retrieved some
+        !       engineering guess of the turbulent variables is made.
+        !
+        use constants
+        use cgnsNames
+        use communication, only: myid
+        use blockPointers, only: w, nbklocal
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
+        use flowVarRefState, only: wInf, nt1, nt2
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables.
+        !
+        integer :: realTypeCGNS, itu
+        integer, dimension(4) :: indW
 
-    character(len=maxCGNSNameLen), dimension(4) :: namesVar
+        integer(kind=intType) :: i, j, k, ii, nn, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        real(kind=realType) :: nuScale, kScale, epsScale, fScale
 
-    ! Set the cell range to be copied from the buffer.
+        real(kind=realType), dimension(4) :: turbScale
 
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
+        character(len=maxCGNSNameLen), dimension(4) :: namesVar
 
-    ! Set the cgns real type and abbreviate the solution variable and
-    ! the pointer offset to improve readability.
+        ! Set the cell range to be copied from the buffer.
 
-    realTypeCGNS = setCGNSRealType()
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
 
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
+        ! Set the cgns real type and abbreviate the solution variable and
+        ! the pointer offset to improve readability.
 
-    ! Set the names and indices for the four variables.
+        realTypeCGNS = setCGNSRealType()
 
-    indW(1) = itu1; namesVar(1) = cgnsTurbK
-    indW(2) = itu2; namesVar(2) = cgnsTurbEpsilon
-    indW(3) = itu3; namesVar(3) = cgnsTurbV2
-    indW(4) = itu4; namesVar(4) = cgnsTurbF
-
-    ! Compute the scales for nu, k, epsilon and f; v2 has the same
-    ! scaling as k.
-
-    nuScale  = muScale/rhoScale
-    kScale   = velScale**2
-    fScale   = kScale/nuScale
-    epsScale = kScale*fScale
-
-    turbScale(1) = kScale
-    turbScale(2) = epsScale
-    turbScale(3) = kScale
-    turbScale(4) = fScale
-
-    ! Loop over the four variables of the v2f model.
-
-    varLoop: do ii=1,4
-
-       ! Find the index of the variable in the solution file and check
-       ! if it is present. If not exit the loop.
-
-       nn = bsearchStrings(namesVar(ii), varNames)
-
-       if(nn == 0) exit
-
-       ! Variable is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        ! Set the names and indices for the four variables.
 
-       ! Read the variable from the restart file and store
-       ! it in buffer.
+        indW(1) = itu1; namesVar(1) = cgnsTurbK
+        indW(2) = itu2; namesVar(2) = cgnsTurbEpsilon
+        indW(3) = itu3; namesVar(3) = cgnsTurbV2
+        indW(4) = itu4; namesVar(4) = cgnsTurbF
+
+        ! Compute the scales for nu, k, epsilon and f; v2 has the same
+        ! scaling as k.
+
+        nuScale = muScale / rhoScale
+        kScale = velScale**2
+        fScale = kScale / nuScale
+        epsScale = kScale * fScale
+
+        turbScale(1) = kScale
+        turbScale(2) = epsScale
+        turbScale(3) = kScale
+        turbScale(4) = fScale
+
+        ! Loop over the four variables of the v2f model.
+
+        varLoop: do ii = 1, 4
+
+            ! Find the index of the variable in the solution file and check
+            ! if it is present. If not exit the loop.
+
+            nn = bsearchStrings(namesVar(ii), varNames)
+
+            if (nn == 0) exit
+
+            ! Variable is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       call readRestartVariable(varNames(nn))
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Copy the variables from buffer into w.
-       ! Take the possible pointer offset into account.
+            ! Read the variable from the restart file and store
+            ! it in buffer.
 
-       itu = indW(ii)
+            call readRestartVariable(varNames(nn))
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,itu) = turbScale(ii)*buffer(i,j,k)
-             enddo
-          enddo
-       enddo
+            ! Copy the variables from buffer into w.
+            ! Take the possible pointer offset into account.
 
-    enddo varLoop
+            itu = indW(ii)
 
-    ! Check if all variables were present. If not, set all turbulence
-    ! variables to the free-stream values.
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, itu) = turbScale(ii) * buffer(i, j, k)
+                    end do
+                end do
+            end do
 
-    testPresent: if(ii <= 4) then
+        end do varLoop
 
-       ! Not all variables are present. Set all 4 to the free-stream
-       ! values. Take the possible pointer offset into account.
+        ! Check if all variables were present. If not, set all turbulence
+        ! variables to the free-stream values.
 
-       do ii=nt1,nt2
-          do k=kBeg,kEnd
-             kp = k+po
-             do j=jBeg,jEnd
-                jp = j+po
-                do i=iBeg,iEnd
-                   ip = i+po
-                   w(ip,jp,kp,ii) = wInf(ii)
-                enddo
-             enddo
-          enddo
-       enddo
+        testPresent: if (ii <= 4) then
 
-       ! Print a warning that the turbulence has been initialized to
-       ! the free-stream. Only processor 0 does this for block 1.
+            ! Not all variables are present. Set all 4 to the free-stream
+            ! values. Take the possible pointer offset into account.
 
-       if((myID == 0) .and. (nbkLocal == 1)) then
+            do ii = nt1, nt2
+                do k = kBeg, kEnd
+                    kp = k + po
+                    do j = jBeg, jEnd
+                        jp = j + po
+                        do i = iBeg, iEnd
+                            ip = i + po
+                            w(ip, jp, kp, ii) = wInf(ii)
+                        end do
+                    end do
+                end do
+            end do
 
-          print "(a)", "#"
-          print "(a)", "#                 Warning"
-          print "(a)", "# Not all turbulence variables are present &
-               &for the v2f model."
-          print "(a)", "# They have been initialized to the free &
-               &stream values."
-          print "(a)", "#"
+            ! Print a warning that the turbulence has been initialized to
+            ! the free-stream. Only processor 0 does this for block 1.
 
-       endif
+            if ((myID == 0) .and. (nbkLocal == 1)) then
 
-    endif testPresent
+                print "(a)", "#"
+                print "(a)", "#                 Warning"
+                print "(a)", "# Not all turbulence variables are present &
+                     &for the v2f model."
+                print "(a)", "# They have been initialized to the free &
+                     &stream values."
+                print "(a)", "#"
 
-  end subroutine readTurbV2f
+            end if
 
-  subroutine readTurbvar(nTypeMismatch)
-    !
-    !       readTurbvar controls the reading of the turbulent variables
-    !       for a restart. It calls the routine, which corresponds to the
-    !       turbulence model used.
-    !
-    use constants
-    use communication, only : myid, adflow_comm_world
-    use inputPhysics, only : equations, turbModel
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        end if testPresent
 
-    ! Check if the rans equations must be solved. If not return.
+    end subroutine readTurbV2f
 
-    if(equations /= RANSEquations) return
+    subroutine readTurbvar(nTypeMismatch)
+        !
+        !       readTurbvar controls the reading of the turbulent variables
+        !       for a restart. It calls the routine, which corresponds to the
+        !       turbulence model used.
+        !
+        use constants
+        use communication, only: myid, adflow_comm_world
+        use inputPhysics, only: equations, turbModel
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-    ! Determine the turbulence model to be used and call the
-    ! appropriate subroutine.
+        ! Check if the rans equations must be solved. If not return.
 
-    select case (turbModel)
+        if (equations /= RANSEquations) return
 
-    case (spalartAllmaras, spalartAllmarasEdwards)
-       call readTurbSA(nTypeMismatch)
-
-       ! !===============================================================
-
-       ! case (komegaWilcox, komegaModified, menterSST, ktau)
-       !   call readTurbKwType(nTypeMismatch)
+        ! Determine the turbulence model to be used and call the
+        ! appropriate subroutine.
 
-       ! !===============================================================
+        select case (turbModel)
 
-       ! case (v2f)
-       !   call readTurbV2f(nTypeMismatch)
+        case (spalartAllmaras, spalartAllmarasEdwards)
+            call readTurbSA(nTypeMismatch)
+
+            ! !===============================================================
+
+            ! case (komegaWilcox, komegaModified, menterSST, ktau)
+            !   call readTurbKwType(nTypeMismatch)
 
-       !===============================================================
+            ! !===============================================================
 
-    case default
-       if(myID == 0) &
-            call terminate("readTurbvar", "Restart not implemented &
-            &for this turbulence model.")
-       call mpi_barrier(ADflow_comm_world, ierr)
+            ! case (v2f)
+            !   call readTurbV2f(nTypeMismatch)
 
-    end select
+            !===============================================================
 
-  end subroutine readTurbvar
+        case default
+            if (myID == 0) &
+                 call terminate("readTurbvar", "Restart not implemented &
+                 &for this turbulence model.")
+            call mpi_barrier(ADflow_comm_world, ierr)
 
-  subroutine readXmomentum(nTypeMismatch)
-    !
-    !       readXmomentum reads the x-momentum variable from the given
-    !       place in the cgns file. If the x-momentum itself is not stored
-    !       then it is tried to construct it from the x-velocity and
-    !       density; it is assumed that the latter is already stored in
-    !       the pointer variable w.
-    !       If it is not possible to create the x-velocity an error
-    !       message is printed and the program will stop.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : w, nbklocal
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
+        end select
 
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables
-    !
-    integer :: realTypeCGNS
+    end subroutine readTurbvar
 
-    integer(kind=intType) :: i, j, k, nn, mm, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+    subroutine readXmomentum(nTypeMismatch)
+        !
+        !       readXmomentum reads the x-momentum variable from the given
+        !       place in the cgns file. If the x-momentum itself is not stored
+        !       then it is tried to construct it from the x-velocity and
+        !       density; it is assumed that the latter is already stored in
+        !       the pointer variable w.
+        !       If it is not possible to create the x-velocity an error
+        !       message is printed and the program will stop.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: w, nbklocal
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
 
-    real(kind=realType) :: momScale
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables
+        !
+        integer :: realTypeCGNS
 
-    ! Set the cell range to be copied from the buffer.
+        integer(kind=intType) :: i, j, k, nn, mm, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
 
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
+        real(kind=realType) :: momScale
 
-    ! Compute the momentum scaling factor, set the cgns real type and
-    ! abbreviate the solution variable and the pointer offset to
-    ! improve readability.
+        ! Set the cell range to be copied from the buffer.
 
-    momScale     = rhoScale*velScale
-    realTypeCGNS = setCGNSRealType()
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
 
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
+        ! Compute the momentum scaling factor, set the cgns real type and
+        ! abbreviate the solution variable and the pointer offset to
+        ! improve readability.
 
-    ! Find out if the X-momentum is present in the solution file.
+        momScale = rhoScale * velScale
+        realTypeCGNS = setCGNSRealType()
 
-    nn = bsearchStrings(cgnsMomX, varNames)
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
 
-    testMxPresent: if(nn > 0) then
+        ! Find out if the X-momentum is present in the solution file.
 
-       ! X-momentum is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        nn = bsearchStrings(cgnsMomX, varNames)
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        testMxPresent: if (nn > 0) then
 
-       ! Read the x-momentum from the restart file and store it in buffer.
+            ! X-momentum is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       call readRestartVariable(varNames(nn))
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Copy the variables from buffer into w. Multiply by the scale
-       ! factor to obtain the correct non-dimensional value and take
-       ! the possible pointer offset into account.
+            ! Read the x-momentum from the restart file and store it in buffer.
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,imx) = buffer(i,j,k)*momScale
-             enddo
-          enddo
-       enddo
+            call readRestartVariable(varNames(nn))
 
-       ! X-momentum is read, so a return can be made.
+            ! Copy the variables from buffer into w. Multiply by the scale
+            ! factor to obtain the correct non-dimensional value and take
+            ! the possible pointer offset into account.
 
-       return
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, imx) = buffer(i, j, k) * momScale
+                    end do
+                end do
+            end do
 
-    endif testMxPresent
+            ! X-momentum is read, so a return can be made.
 
-    ! X-momentum is not present. Check for x-velocity.
+            return
 
-    nn = bsearchStrings(cgnsVelX, varNames)
+        end if testMxPresent
 
-    testVxPresent: if(nn > 0) then
+        ! X-momentum is not present. Check for x-velocity.
 
-       ! X-velocity is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        nn = bsearchStrings(cgnsVelX, varNames)
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        testVxPresent: if (nn > 0) then
 
-       ! Read the x-velocity from the restart file and store it in buffer.
+            ! X-velocity is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       call readRestartVariable(varNames(nn))
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Copy the variables from buffer into w. Multiply by the
-       ! density and velocity scaling factor to obtain to correct
-       ! non-dimensional value. Take the possible pointer offset
-       ! into account.
+            ! Read the x-velocity from the restart file and store it in buffer.
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,imx) = buffer(i,j,k)*w(ip,jp,kp,irho)*velScale
-             enddo
-          enddo
-       enddo
+            call readRestartVariable(varNames(nn))
 
-       ! X-momentum is constructed, so a return can be made.
+            ! Copy the variables from buffer into w. Multiply by the
+            ! density and velocity scaling factor to obtain to correct
+            ! non-dimensional value. Take the possible pointer offset
+            ! into account.
 
-       return
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, imx) = buffer(i, j, k) * w(ip, jp, kp, irho) * velScale
+                    end do
+                end do
+            end do
 
-    endif testVxPresent
+            ! X-momentum is constructed, so a return can be made.
 
-    ! X-momentum could not be created. Terminate.
+            return
 
-    call terminate("readXmomentum", &
-         "X-Momentum could not be created")
+        end if testVxPresent
 
-  end subroutine readXmomentum
+        ! X-momentum could not be created. Terminate.
 
-  subroutine readXvelocity(nTypeMismatch)
-    !
-    !       readXvelocity reads the x-velocity variable from the given
-    !       place in the cgns file. If the x-velocity itself is not stored
-    !       then it is tried to construct it from the x-momentum and
-    !       density; it is assumed that the latter is already stored in
-    !       the pointer variable w.
-    !       If it is not possible to create the x-velocity an error
-    !       message is printed and the program will stop.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : w, nbklocal
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables
-    !
-    integer :: realTypeCGNS
+        call terminate("readXmomentum", &
+                       "X-Momentum could not be created")
 
-    integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+    end subroutine readXmomentum
 
-    real(kind=realType) :: scale
+    subroutine readXvelocity(nTypeMismatch)
+        !
+        !       readXvelocity reads the x-velocity variable from the given
+        !       place in the cgns file. If the x-velocity itself is not stored
+        !       then it is tried to construct it from the x-momentum and
+        !       density; it is assumed that the latter is already stored in
+        !       the pointer variable w.
+        !       If it is not possible to create the x-velocity an error
+        !       message is printed and the program will stop.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: w, nbklocal
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables
+        !
+        integer :: realTypeCGNS
 
-    ! Set the cell range to be copied from the buffer.
+        integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
 
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
+        real(kind=realType) :: scale
 
-    ! Set the cgns real type and abbreviate the solution variable and
-    ! the pointer offset to improve readability.
+        ! Set the cell range to be copied from the buffer.
 
-    realTypeCGNS = setCGNSRealType()
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
 
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
+        ! Set the cgns real type and abbreviate the solution variable and
+        ! the pointer offset to improve readability.
 
-    ! Find out if the x-velocity is present in the solution file.
+        realTypeCGNS = setCGNSRealType()
 
-    nn = bsearchStrings(cgnsVelX, varNames)
-    if(nn > 0) then
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
 
-       ! X-velocity is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        ! Find out if the x-velocity is present in the solution file.
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
-
-       ! Read the x-velocity from the restart file and store it
-       ! in buffer.
-
-       call readRestartVariable(varNames(nn))
-
-       ! Copy the variables from buffer into w. Multiply by the scale
-       ! factor to obtain the correct nondimensional value and take
-       ! the possible pointer offset into account.
-
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,ivx) = buffer(i,j,k)*velScale
-             enddo
-          enddo
-       enddo
-
-       ! X-velocity is read, so a return can be made.
-
-       return
-
-    endif
+        nn = bsearchStrings(cgnsVelX, varNames)
+        if (nn > 0) then
 
-    ! X-velocity not present. Check for x-momentum.
+            ! X-velocity is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-    nn = bsearchStrings(cgnsMomX, varNames)
-    if(nn > 0) then
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
+
+            ! Read the x-velocity from the restart file and store it
+            ! in buffer.
+
+            call readRestartVariable(varNames(nn))
+
+            ! Copy the variables from buffer into w. Multiply by the scale
+            ! factor to obtain the correct nondimensional value and take
+            ! the possible pointer offset into account.
+
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, ivx) = buffer(i, j, k) * velScale
+                    end do
+                end do
+            end do
+
+            ! X-velocity is read, so a return can be made.
+
+            return
+
+        end if
 
-       ! X-momentum is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        ! X-velocity not present. Check for x-momentum.
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        nn = bsearchStrings(cgnsMomX, varNames)
+        if (nn > 0) then
 
-       ! Read the x-momentum from the restart file and store
-       ! it in buffer.
+            ! X-momentum is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       call readRestartVariable(varNames(nn))
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Construct the x-velocity; it is assumed that the density is
-       ! already stored in w. Multiply by the momentum scale factor
-       ! to obtain the correct nondimensional value and take the
-       ! possible pointer offset into account.
+            ! Read the x-momentum from the restart file and store
+            ! it in buffer.
 
-       scale = rhoScale*velScale
+            call readRestartVariable(varNames(nn))
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,ivx) = buffer(i,j,k)*scale/w(ip,jp,kp,irho)
-             enddo
-          enddo
-       enddo
+            ! Construct the x-velocity; it is assumed that the density is
+            ! already stored in w. Multiply by the momentum scale factor
+            ! to obtain the correct nondimensional value and take the
+            ! possible pointer offset into account.
 
-       ! X-velocity is constructed, so a return can be made.
+            scale = rhoScale * velScale
 
-       return
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, ivx) = buffer(i, j, k) * scale / w(ip, jp, kp, irho)
+                    end do
+                end do
+            end do
 
-    endif
+            ! X-velocity is constructed, so a return can be made.
 
-    ! Not able to determine the x-velocity.
-    ! Print an error message and exit.
+            return
 
-    call terminate("readXvelocity", &
-         "Not able to retrieve x-velocity from the &
-         &variables in the restart file.")
+        end if
 
-  end subroutine readXvelocity
+        ! Not able to determine the x-velocity.
+        ! Print an error message and exit.
 
-  subroutine readYmomentum(nTypeMismatch)
-    !
-    !       readYmomentum reads the y-momentum variable from the given
-    !       place in the cgns file. If the y-momentum itself is not stored
-    !       then it is tried to construct it from the y-velocity and
-    !       density; it is assumed that the latter is already stored in
-    !       the pointer variable w.
-    !       If it is not possible to create the y-velocity an error
-    !       message is printed and the program will stop.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : w, nbklocal
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables
-    !
-    integer :: realTypeCGNS
+        call terminate("readXvelocity", &
+             "Not able to retrieve x-velocity from the &
+             &variables in the restart file.")
 
-    integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+    end subroutine readXvelocity
 
-    real(kind=realType) :: momScale
+    subroutine readYmomentum(nTypeMismatch)
+        !
+        !       readYmomentum reads the y-momentum variable from the given
+        !       place in the cgns file. If the y-momentum itself is not stored
+        !       then it is tried to construct it from the y-velocity and
+        !       density; it is assumed that the latter is already stored in
+        !       the pointer variable w.
+        !       If it is not possible to create the y-velocity an error
+        !       message is printed and the program will stop.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: w, nbklocal
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables
+        !
+        integer :: realTypeCGNS
 
-    ! Set the cell range to be copied from the buffer.
+        integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
 
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
+        real(kind=realType) :: momScale
 
-    ! Compute the momentum scaling factor, set the cgns real type and
-    ! abbreviate the solution variable and the pointer offset to
-    ! improve readability.
+        ! Set the cell range to be copied from the buffer.
 
-    momScale     = rhoScale*velScale
-    realTypeCGNS = setCGNSRealType()
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
 
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
+        ! Compute the momentum scaling factor, set the cgns real type and
+        ! abbreviate the solution variable and the pointer offset to
+        ! improve readability.
 
-    ! Find out if the Y-momentum is present in the solution file.
+        momScale = rhoScale * velScale
+        realTypeCGNS = setCGNSRealType()
 
-    nn = bsearchStrings(cgnsMomY, varNames)
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
 
-    testMyPresent: if(nn > 0) then
+        ! Find out if the Y-momentum is present in the solution file.
 
-       ! Y-momentum is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
-
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
-
-       ! Read the y-momentum from the restart file and store it in buffer.
-
-       call readRestartVariable(varNames(nn))
+        nn = bsearchStrings(cgnsMomY, varNames)
 
-       ! Copy the variables from buffer into w. Multiply by the scale
-       ! factor to obtain the correct non-dimensional value and take
-       ! the possible pointer offset into account.
-
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,imy) = buffer(i,j,k)*momScale
-             enddo
-          enddo
-       enddo
+        testMyPresent: if (nn > 0) then
 
-       ! Y-momentum is read, so a return can be made.
+            ! Y-momentum is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
+
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
+
+            ! Read the y-momentum from the restart file and store it in buffer.
+
+            call readRestartVariable(varNames(nn))
 
-       return
+            ! Copy the variables from buffer into w. Multiply by the scale
+            ! factor to obtain the correct non-dimensional value and take
+            ! the possible pointer offset into account.
+
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, imy) = buffer(i, j, k) * momScale
+                    end do
+                end do
+            end do
 
-    endif testMyPresent
+            ! Y-momentum is read, so a return can be made.
 
-    ! Y-momentum is not present. Check for y-velocity.
+            return
 
-    nn = bsearchStrings(cgnsVelY, varNames)
+        end if testMyPresent
 
-    testVyPresent: if(nn > 0) then
+        ! Y-momentum is not present. Check for y-velocity.
 
-       ! Y-velocity is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        nn = bsearchStrings(cgnsVelY, varNames)
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        testVyPresent: if (nn > 0) then
 
-       ! Read the y-velocity from the restart file and store it in buffer.
+            ! Y-velocity is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       call readRestartVariable(varNames(nn))
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       ! Copy the variables from buffer into w. Multiply by the
-       ! density and velocity scaling factor to obtain to correct
-       ! non-dimensional value. Take the possible pointer offset
-       ! into account.
+            ! Read the y-velocity from the restart file and store it in buffer.
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,imy) = buffer(i,j,k)*w(ip,jp,kp,irho)*velScale
-             enddo
-          enddo
-       enddo
+            call readRestartVariable(varNames(nn))
 
-       ! Y-momentum is constructed, so a return can be made.
+            ! Copy the variables from buffer into w. Multiply by the
+            ! density and velocity scaling factor to obtain to correct
+            ! non-dimensional value. Take the possible pointer offset
+            ! into account.
 
-       return
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, imy) = buffer(i, j, k) * w(ip, jp, kp, irho) * velScale
+                    end do
+                end do
+            end do
 
-    endif testVyPresent
+            ! Y-momentum is constructed, so a return can be made.
 
-    ! Y-momentum could not be created. Terminate.
+            return
 
-    call terminate("readYmomentum", &
-         "Y-Momentum could not be created")
+        end if testVyPresent
 
-  end subroutine readYmomentum
+        ! Y-momentum could not be created. Terminate.
 
-  subroutine readYvelocity(nTypeMismatch)
-    !
-    !       readYvelocity reads the y-velocity variable from the given
-    !       place in the cgns file. If the y-velocity itself is not stored
-    !       then it is tried to construct it from the y-momentum and
-    !       density; it is assumed that the latter is already stored in
-    !       the pointer variable w.
-    !       If it is not possible to create the y-velocity an error
-    !       message is printed and the program will stop.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : w, nbklocal
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
+        call terminate("readYmomentum", &
+                       "Y-Momentum could not be created")
 
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables
-    !
-    integer :: realTypeCGNS
+    end subroutine readYmomentum
 
-    integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+    subroutine readYvelocity(nTypeMismatch)
+        !
+        !       readYvelocity reads the y-velocity variable from the given
+        !       place in the cgns file. If the y-velocity itself is not stored
+        !       then it is tried to construct it from the y-momentum and
+        !       density; it is assumed that the latter is already stored in
+        !       the pointer variable w.
+        !       If it is not possible to create the y-velocity an error
+        !       message is printed and the program will stop.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: w, nbklocal
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
 
-    real(kind=realType) :: scale
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables
+        !
+        integer :: realTypeCGNS
 
-    ! Set the cell range to be copied from the buffer.
+        integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
 
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
+        real(kind=realType) :: scale
 
-    ! Set the cgns real type and abbreviate the solution variable and
-    ! the pointer offset to improve readability.
+        ! Set the cell range to be copied from the buffer.
 
-    realTypeCGNS = setCGNSRealType()
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
 
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
+        ! Set the cgns real type and abbreviate the solution variable and
+        ! the pointer offset to improve readability.
 
-    ! Find out if the y-velocity is present in the solution file.
+        realTypeCGNS = setCGNSRealType()
 
-    nn = bsearchStrings(cgnsVelY, varNames)
-    if(nn > 0) then
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
 
-       ! Y-velocity is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
-
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
-
-       ! Read the y-velocity from the restart file and store it
-       ! in buffer.
-
-       call readRestartVariable(varNames(nn))
-
-       ! Copy the variables from buffer into w. Multiply by the scale
-       ! factor to obtain the correct nondimensional value and take
-       ! the possible pointer offset into account.
-
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,ivy) = buffer(i,j,k)*velScale
-             enddo
-          enddo
-       enddo
-
-       ! Y-velocity is read, so a return can be made.
-
-       return
-
-    endif
+        ! Find out if the y-velocity is present in the solution file.
 
-    ! Y-velocity not present. Check for y-momentum.
+        nn = bsearchStrings(cgnsVelY, varNames)
+        if (nn > 0) then
 
-    nn = bsearchStrings(cgnsMomY, varNames)
-    if(nn > 0) then
+            ! Y-velocity is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
+
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
+
+            ! Read the y-velocity from the restart file and store it
+            ! in buffer.
+
+            call readRestartVariable(varNames(nn))
+
+            ! Copy the variables from buffer into w. Multiply by the scale
+            ! factor to obtain the correct nondimensional value and take
+            ! the possible pointer offset into account.
+
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, ivy) = buffer(i, j, k) * velScale
+                    end do
+                end do
+            end do
+
+            ! Y-velocity is read, so a return can be made.
+
+            return
+
+        end if
 
-       ! Y-momentum is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        ! Y-velocity not present. Check for y-momentum.
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        nn = bsearchStrings(cgnsMomY, varNames)
+        if (nn > 0) then
 
-       ! Read the y-momentum from the restart file and store
-       ! it in buffer.
+            ! Y-momentum is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       call readRestartVariable(varNames(nn))
-
-       ! Construct the y-velocity; it is assumed that the density is
-       ! already stored in w. Multiply by the momentum scale factor
-       ! to obtain the correct nondimensional value and take the
-       ! possible pointer offset into account.
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       scale = rhoScale*velScale
+            ! Read the y-momentum from the restart file and store
+            ! it in buffer.
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,ivy) = buffer(i,j,k)*scale/w(ip,jp,kp,irho)
-             enddo
-          enddo
-       enddo
+            call readRestartVariable(varNames(nn))
+
+            ! Construct the y-velocity; it is assumed that the density is
+            ! already stored in w. Multiply by the momentum scale factor
+            ! to obtain the correct nondimensional value and take the
+            ! possible pointer offset into account.
 
-       ! Y-velocity is constructed, so a return can be made.
+            scale = rhoScale * velScale
 
-       return
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, ivy) = buffer(i, j, k) * scale / w(ip, jp, kp, irho)
+                    end do
+                end do
+            end do
 
-    endif
+            ! Y-velocity is constructed, so a return can be made.
 
-    ! Not able to determine the y-velocity.
-    ! Print an error message and exit.
+            return
 
-    call terminate("readYvelocity", &
-         "Not able to retrieve y-velocity from the &
-         &variables in the restart file.")
+        end if
 
-  end subroutine readYvelocity
-  subroutine readZmomentum(nTypeMismatch)
-    !
-    !       readZmomentum reads the z-momentum variable from the given
-    !       place in the cgns file. If the z-momentum itself is not stored
-    !       then it is tried to construct it from the z-velocity and
-    !       density; it is assumed that the latter is already stored in
-    !       the pointer variable w.
-    !       If it is not possible to create the z-velocity an error
-    !       message is printed and the program will stop.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : w, nbklocal
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables
-    !
-    integer :: realTypeCGNS
+        ! Not able to determine the y-velocity.
+        ! Print an error message and exit.
 
-    integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+        call terminate("readYvelocity", &
+             "Not able to retrieve y-velocity from the &
+             &variables in the restart file.")
 
-    real(kind=realType) :: momScale
+    end subroutine readYvelocity
+    subroutine readZmomentum(nTypeMismatch)
+        !
+        !       readZmomentum reads the z-momentum variable from the given
+        !       place in the cgns file. If the z-momentum itself is not stored
+        !       then it is tried to construct it from the z-velocity and
+        !       density; it is assumed that the latter is already stored in
+        !       the pointer variable w.
+        !       If it is not possible to create the z-velocity an error
+        !       message is printed and the program will stop.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: w, nbklocal
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables
+        !
+        integer :: realTypeCGNS
 
-    ! Set the cell range to be copied from the buffer.
+        integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
 
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
+        real(kind=realType) :: momScale
 
-    ! Compute the momentum scaling factor, set the cgns real type and
-    ! abbreviate the solution variable and the pointer offset to
-    ! improve readability.
+        ! Set the cell range to be copied from the buffer.
 
-    momScale     = rhoScale*velScale
-    realTypeCGNS = setCGNSRealType()
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
 
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
+        ! Compute the momentum scaling factor, set the cgns real type and
+        ! abbreviate the solution variable and the pointer offset to
+        ! improve readability.
 
-    ! Find out if the Z-momentum is present in the solution file.
-
-    nn = bsearchStrings(cgnsMomZ, varNames)
-
-    testMzPresent: if(nn > 0) then
-
-       ! Z-momentum is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
-
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
-
-       ! Read the z-momentum from the restart file and store it in buffer.
-
-       call readRestartVariable(varNames(nn))
-
-       ! Copy the variables from buffer into w. Multiply by the scale
-       ! factor to obtain the correct non-dimensional value and take
-       ! the possible pointer offset into account.
-
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,imz) = buffer(i,j,k)*momScale
-             enddo
-          enddo
-       enddo
-
-       ! Z-momentum is read, so a return can be made.
+        momScale = rhoScale * velScale
+        realTypeCGNS = setCGNSRealType()
 
-       return
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
 
-    endif testMzPresent
+        ! Find out if the Z-momentum is present in the solution file.
+
+        nn = bsearchStrings(cgnsMomZ, varNames)
+
+        testMzPresent: if (nn > 0) then
+
+            ! Z-momentum is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
+
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
+
+            ! Read the z-momentum from the restart file and store it in buffer.
+
+            call readRestartVariable(varNames(nn))
+
+            ! Copy the variables from buffer into w. Multiply by the scale
+            ! factor to obtain the correct non-dimensional value and take
+            ! the possible pointer offset into account.
+
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, imz) = buffer(i, j, k) * momScale
+                    end do
+                end do
+            end do
+
+            ! Z-momentum is read, so a return can be made.
 
-    ! Z-momentum is not present. Check for z-velocity.
+            return
 
-    nn = bsearchStrings(cgnsVelZ, varNames)
+        end if testMzPresent
 
-    testVzPresent: if(nn > 0) then
+        ! Z-momentum is not present. Check for z-velocity.
 
-       ! Z-velocity is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        nn = bsearchStrings(cgnsVelZ, varNames)
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
+        testVzPresent: if (nn > 0) then
 
-       ! Read the z-velocity from the restart file and store it in buffer.
-
-       call readRestartVariable(varNames(nn))
-
-       ! Copy the variables from buffer into w. Multiply by the
-       ! density and velocity scaling factor to obtain to correct
-       ! non-dimensional value. Take the possible pointer offset
-       ! into account.
-
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,imz) = buffer(i,j,k)*w(ip,jp,kp,irho)*velScale
-             enddo
-          enddo
-       enddo
+            ! Z-velocity is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       ! Z-momentum is constructed, so a return can be made.
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
 
-       return
+            ! Read the z-velocity from the restart file and store it in buffer.
+
+            call readRestartVariable(varNames(nn))
+
+            ! Copy the variables from buffer into w. Multiply by the
+            ! density and velocity scaling factor to obtain to correct
+            ! non-dimensional value. Take the possible pointer offset
+            ! into account.
+
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, imz) = buffer(i, j, k) * w(ip, jp, kp, irho) * velScale
+                    end do
+                end do
+            end do
 
-    endif testVzPresent
+            ! Z-momentum is constructed, so a return can be made.
 
-    ! Z-momentum could not be created. Terminate.
+            return
 
-    call terminate("readZmomentum", &
-         "Z-Momentum could not be created")
+        end if testVzPresent
 
-  end subroutine readZmomentum
-  subroutine readZvelocity(nTypeMismatch)
-    !
-    !       readZvelocity reads the z-velocity variable from the given
-    !       place in the cgns file. If the z-velocity itself is not stored
-    !       then it is tried to construct it from the z-momentum and
-    !       density; it is assumed that the latter is already stored in
-    !       the pointer variable w.
-    !       If it is not possible to create the z-velocity an error
-    !       message is printed and the program will stop.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use cgnsNames
-    use blockPointers, only : w, nbklocal
-    use IOModule, only : IOVar
-    use utils, only : setCGNSRealType, terminate
-    use sorting, only : bsearchStrings
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(inout) :: nTypeMismatch
-    !
-    !      Local variables
-    !
-    integer :: realTypeCGNS
+        ! Z-momentum could not be created. Terminate.
 
-    integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+        call terminate("readZmomentum", &
+                       "Z-Momentum could not be created")
 
-    real(kind=realType) :: scale
+    end subroutine readZmomentum
+    subroutine readZvelocity(nTypeMismatch)
+        !
+        !       readZvelocity reads the z-velocity variable from the given
+        !       place in the cgns file. If the z-velocity itself is not stored
+        !       then it is tried to construct it from the z-momentum and
+        !       density; it is assumed that the latter is already stored in
+        !       the pointer variable w.
+        !       If it is not possible to create the z-velocity an error
+        !       message is printed and the program will stop.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use cgnsNames
+        use blockPointers, only: w, nbklocal
+        use IOModule, only: IOVar
+        use utils, only: setCGNSRealType, terminate
+        use sorting, only: bsearchStrings
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(inout) :: nTypeMismatch
+        !
+        !      Local variables
+        !
+        integer :: realTypeCGNS
 
-    ! Set the cell range to be copied from the buffer.
+        integer(kind=intType) :: i, j, k, nn, po, ip, jp, kp
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
 
-    iBeg = lbound(buffer,1); iEnd = ubound(buffer,1)
-    jBeg = lbound(buffer,2); jEnd = ubound(buffer,2)
-    kBeg = lbound(buffer,3); kEnd = ubound(buffer,3)
+        real(kind=realType) :: scale
 
-    ! Set the cgns real type and abbreviate the solution variable and
-    ! the pointer offset to improve readability.
+        ! Set the cell range to be copied from the buffer.
 
-    realTypeCGNS = setCGNSRealType()
+        iBeg = lbound(buffer, 1); iEnd = ubound(buffer, 1)
+        jBeg = lbound(buffer, 2); jEnd = ubound(buffer, 2)
+        kBeg = lbound(buffer, 3); kEnd = ubound(buffer, 3)
 
-    po = IOVar(nbkLocal,solID)%pointerOffset
-    w => IOVar(nbkLocal,solID)%w
+        ! Set the cgns real type and abbreviate the solution variable and
+        ! the pointer offset to improve readability.
 
-    ! Find out if the z-velocity is present in the solution file.
+        realTypeCGNS = setCGNSRealType()
 
-    nn = bsearchStrings(cgnsVelZ, varNames)
-    if(nn > 0) then
+        po = IOVar(nbkLocal, solID)%pointerOffset
+        w => IOVar(nbkLocal, solID)%w
 
-       ! Z-velocity is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        ! Find out if the z-velocity is present in the solution file.
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
-
-       ! Read the z-velocity from the restart file and store it
-       ! in buffer.
-
-       call readRestartVariable(varNames(nn))
-
-       ! Copy the variables from buffer into w. Multiply by the scale
-       ! factor to obtain the correct nondimensional value and take
-       ! the possible pointer offset into account.
-
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,ivz) = buffer(i,j,k)*velScale
-             enddo
-          enddo
-       enddo
-
-       ! Z-velocity is read, so a return can be made.
-
-       return
+        nn = bsearchStrings(cgnsVelZ, varNames)
+        if (nn > 0) then
 
-    endif
+            ! Z-velocity is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-    ! Z-velocity not present. Check for z-momentum.
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
+
+            ! Read the z-velocity from the restart file and store it
+            ! in buffer.
+
+            call readRestartVariable(varNames(nn))
+
+            ! Copy the variables from buffer into w. Multiply by the scale
+            ! factor to obtain the correct nondimensional value and take
+            ! the possible pointer offset into account.
+
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, ivz) = buffer(i, j, k) * velScale
+                    end do
+                end do
+            end do
+
+            ! Z-velocity is read, so a return can be made.
+
+            return
 
-    nn = bsearchStrings(cgnsMomZ, varNames)
-    if(nn > 0) then
+        end if
 
-       ! Z-momentum is present. First determine whether or not a type
-       ! mismatch occurs. If so, update nTypeMismatch.
+        ! Z-velocity not present. Check for z-momentum.
 
-       if(realTypeCGNS /= varTypes(nn)) &
-            nTypeMismatch = nTypeMismatch + 1
-
-       ! Read the z-momentum from the restart file and store
-       ! it in buffer.
+        nn = bsearchStrings(cgnsMomZ, varNames)
+        if (nn > 0) then
 
-       call readRestartVariable(varNames(nn))
-
-       ! Construct the z-velocity; it is assumed that the density is
-       ! already stored in w. Multiply by the momentum scale factor
-       ! to obtain the correct nondimensional value and take the
-       ! possible pointer offset into account.
+            ! Z-momentum is present. First determine whether or not a type
+            ! mismatch occurs. If so, update nTypeMismatch.
 
-       scale = rhoScale*velScale
+            if (realTypeCGNS /= varTypes(nn)) &
+                nTypeMismatch = nTypeMismatch + 1
+
+            ! Read the z-momentum from the restart file and store
+            ! it in buffer.
 
-       do k=kBeg,kEnd
-          kp = k+po
-          do j=jBeg,jEnd
-             jp = j+po
-             do i=iBeg,iEnd
-                ip = i+po
-                w(ip,jp,kp,ivz) = buffer(i,j,k)*scale/w(ip,jp,kp,irho)
-             enddo
-          enddo
-       enddo
+            call readRestartVariable(varNames(nn))
+
+            ! Construct the z-velocity; it is assumed that the density is
+            ! already stored in w. Multiply by the momentum scale factor
+            ! to obtain the correct nondimensional value and take the
+            ! possible pointer offset into account.
 
-       ! Z-velocity is constructed, so a return can be made.
+            scale = rhoScale * velScale
 
-       return
+            do k = kBeg, kEnd
+                kp = k + po
+                do j = jBeg, jEnd
+                    jp = j + po
+                    do i = iBeg, iEnd
+                        ip = i + po
+                        w(ip, jp, kp, ivz) = buffer(i, j, k) * scale / w(ip, jp, kp, irho)
+                    end do
+                end do
+            end do
 
-    endif
+            ! Z-velocity is constructed, so a return can be made.
 
-    ! Not able to determine the z-velocity.
-    ! Print an error message and exit.
+            return
 
-    call terminate("readZvelocity", &
-         "Not able to retrieve z-velocity from the &
-         &variables in the restart file.")
+        end if
 
-  end subroutine readZvelocity
+        ! Not able to determine the z-velocity.
+        ! Print an error message and exit.
 
-  subroutine readTimeHistory(fileIDs)
-    !
-    !       readTimeHistory attempts to read the time history of an
-    !       unsteady computation from the given cgns restart file.
-    !       If present it will be stored in the arrays timeArray and
-    !       timeDataArray, for which memory is allocated.
-    !
-    use constants
-    use cgnsNames
-    use su_cgns
-    use inputUnsteady, only : nTimeStepsFine
-    use monitor, only : timeDataArray, nMon, nTimeStepsRestart, &
-         timeUnsteadyRestart, monNames, timeArray
-    use sorting, only : qsortStrings, bsearchStrings
-    use utils, only : setCGNSRealType, terminate, allocTimeArrays
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, dimension(nSolsRead), intent(in) :: fileIDs
+        call terminate("readZvelocity", &
+             "Not able to retrieve z-velocity from the &
+             &variables in the restart file.")
 
-    !
-    !      Local variables.
-    !
-    integer :: ierr, realTypeCGNS, dummyInt
-    integer :: i, nConv, nDim
-    integer(kind=cgsize_t) :: nSize(1)
+    end subroutine readZvelocity
 
-    integer(kind=intType) :: j, ii, nn
+    subroutine readTimeHistory(fileIDs)
+        !
+        !       readTimeHistory attempts to read the time history of an
+        !       unsteady computation from the given cgns restart file.
+        !       If present it will be stored in the arrays timeArray and
+        !       timeDataArray, for which memory is allocated.
+        !
+        use constants
+        use cgnsNames
+        use su_cgns
+        use inputUnsteady, only: nTimeStepsFine
+        use monitor, only: timeDataArray, nMon, nTimeStepsRestart, &
+                           timeUnsteadyRestart, monNames, timeArray
+        use sorting, only: qsortStrings, bsearchStrings
+        use utils, only: setCGNSRealType, terminate, allocTimeArrays
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, dimension(nSolsRead), intent(in) :: fileIDs
 
-    integer(kind=intType), dimension(:), allocatable :: ind
+        !
+        !      Local variables.
+        !
+        integer :: ierr, realTypeCGNS, dummyInt
+        integer :: i, nConv, nDim
+        integer(kind=cgsize_t) :: nSize(1)
 
-    character(len=maxCGNSNameLen) :: cgnsName
-    character(len=maxCGNSNameLen), allocatable, dimension(:) :: &
-         convNames, tmpNames
+        integer(kind=intType) :: j, ii, nn
 
-    logical :: allConvInfo
+        integer(kind=intType), dimension(:), allocatable :: ind
 
-    ! Store the file ID and the base a bit easier. Note that the time
-    ! history only needs to be present in the first solution file.
+        character(len=maxCGNSNameLen) :: cgnsName
+        character(len=maxCGNSNameLen), allocatable, dimension(:) :: &
+            convNames, tmpNames
 
-    cgnsInd  = fileIDs(1)
-    cgnsBase = 1
+        logical :: allConvInfo
 
-    ! Set the cgns real type.
+        ! Store the file ID and the base a bit easier. Note that the time
+        ! history only needs to be present in the first solution file.
 
-    realTypeCGNS = setCGNSRealType()
+        cgnsInd = fileIDs(1)
+        cgnsBase = 1
 
-    ! Check if the time history is present by trying to read it.
+        ! Set the cgns real type.
 
-    call cg_biter_read_f(cgnsInd, cgnsBase, cgnsName, &
-         dummyInt, ierr)
-    if(ierr /= all_ok) then
+        realTypeCGNS = setCGNSRealType()
 
-       ! No time history present. Set nTimeStepsRestart and
-       ! timeUnsteadyRestart to zero, allocate the memory for the
-       ! time history of the monitoring variables, print a warning
-       ! and return.
+        ! Check if the time history is present by trying to read it.
 
-       nTimeStepsRestart   = 0
-       timeUnsteadyRestart = zero
+        call cg_biter_read_f(cgnsInd, cgnsBase, cgnsName, &
+                             dummyInt, ierr)
+        if (ierr /= all_ok) then
 
-       call allocTimeArrays(nTimeStepsFine)
+            ! No time history present. Set nTimeStepsRestart and
+            ! timeUnsteadyRestart to zero, allocate the memory for the
+            ! time history of the monitoring variables, print a warning
+            ! and return.
 
-       print "(a)", "#"
-       print "(a)", "#                 Warning"
-       print "(a)", "# No time history found in restart file."
-       print "(a)", "# Starting at timestep 1 on the finest level."
-       print "(a)", "#"
+            nTimeStepsRestart = 0
+            timeUnsteadyRestart = zero
 
-       return
+            call allocTimeArrays(nTimeStepsFine)
 
-    endif
+            print "(a)", "#"
+            print "(a)", "#                 Warning"
+            print "(a)", "# No time history found in restart file."
+            print "(a)", "# Starting at timestep 1 on the finest level."
+            print "(a)", "#"
 
-    ! Store the number of old time levels.
+            return
 
-    nTimeStepsRestart = dummyInt
+        end if
 
-    ! Go to the place in the cgns file where the time history
-    ! should be stored.
+        ! Store the number of old time levels.
 
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-         "BaseIterativeData_t", 1, "end")
-    if(ierr /= all_ok)                  &
-         call terminate("readTimeHistory", &
-         "Something wrong when calling cg_goto_f")
+        nTimeStepsRestart = dummyInt
 
-    ! Find out how many convergence variables are stored.
+        ! Go to the place in the cgns file where the time history
+        ! should be stored.
 
-    call cg_narrays_f(nConv, ierr)
-    if(ierr /= all_ok)                  &
-         call terminate("readTimeHistory", &
-         "Something wrong when calling cg_narrays_f")
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                       "BaseIterativeData_t", 1, "end")
+        if (ierr /= all_ok) &
+            call terminate("readTimeHistory", &
+                           "Something wrong when calling cg_goto_f")
 
-    ! Allocate the memory for convNames, tmpNames and ind.
+        ! Find out how many convergence variables are stored.
 
-    allocate(convNames(nConv), tmpNames(nConv), ind(nConv), &
-         stat=ierr)
-    if(ierr /= 0)                       &
-         call terminate("readTimeHistory", &
-         "Memory allocation failure for convNames, etc.")
+        call cg_narrays_f(nConv, ierr)
+        if (ierr /= all_ok) &
+            call terminate("readTimeHistory", &
+                           "Something wrong when calling cg_narrays_f")
 
-    ! Read the names of the convergence variables. Store them in
-    ! tmpNames as well. Furthermore check the dimension of the
-    ! data stored.
+        ! Allocate the memory for convNames, tmpNames and ind.
 
-    do i=1,nConv
-       call cg_array_info_f(i, convNames(i), dummyInt, nDim, &
-            nSize, ierr)
-       if(ierr /= all_ok)                  &
-            call terminate("readConvHistory", &
-            "Something wrong when calling cg_array_info_f")
+        allocate (convNames(nConv), tmpNames(nConv), ind(nConv), &
+                  stat=ierr)
+        if (ierr /= 0) &
+            call terminate("readTimeHistory", &
+                           "Memory allocation failure for convNames, etc.")
 
-       if(nDim /= 1) then
-          print "(a)", "#"
-          print "(a)", "#                 Warning"
-          print strings, "# Dimension of time history for ", trim(convNames(i)), " is not 1."
-          print "(a)", "# Information is ignored."
-          print "(a)", "#"
+        ! Read the names of the convergence variables. Store them in
+        ! tmpNames as well. Furthermore check the dimension of the
+        ! data stored.
 
-          ! Screw up the string such that it does not correspond to
-          ! a legal name. It is appended, because it is important that
-          ! all strings differ.
+        do i = 1, nConv
+            call cg_array_info_f(i, convNames(i), dummyInt, nDim, &
+                                 nSize, ierr)
+            if (ierr /= all_ok) &
+                call terminate("readConvHistory", &
+                               "Something wrong when calling cg_array_info_f")
 
-          convNames(i) = convNames(i)//"#$@&^!#$%!"
-       endif
+            if (nDim /= 1) then
+                print "(a)", "#"
+                print "(a)", "#                 Warning"
+                print strings, "# Dimension of time history for ", trim(convNames(i)), " is not 1."
+                print "(a)", "# Information is ignored."
+                print "(a)", "#"
 
-       if(nSize(1) /= nTimeStepsRestart) then
-          print "(a)", "#"
-          print "(a)", "#                 Warning"
-          print strings, "# Inconsistent time history for ", trim(convNames(i)),"."
-          print "(a)", "# Displayed information might be incorrect."
-          print "(a)", "#"
-       endif
+                ! Screw up the string such that it does not correspond to
+                ! a legal name. It is appended, because it is important that
+                ! all strings differ.
 
-       ! Copy the name in tmpNames for the sorting.
+                convNames(i) = convNames(i)//"#$@&^!#$%!"
+            end if
 
-       tmpNames(i) = convNames(i)
-    enddo
+            if (nSize(1) /= nTimeStepsRestart) then
+                print "(a)", "#"
+                print "(a)", "#                 Warning"
+                print strings, "# Inconsistent time history for ", trim(convNames(i)), "."
+                print "(a)", "# Displayed information might be incorrect."
+                print "(a)", "#"
+            end if
 
-    ! Sort convNames in increasing order.
+            ! Copy the name in tmpNames for the sorting.
 
-    nn = nConv
-    call qsortStrings(convNames, nn)
+            tmpNames(i) = convNames(i)
+        end do
 
-    ! Find the numbers for the just sorted convergence names.
+        ! Sort convNames in increasing order.
 
-    do i=1,nConv
-       ii      = bsearchStrings(tmpNames(i), convNames)
-       ind(ii) = i
-    enddo
+        nn = nConv
+        call qsortStrings(convNames, nn)
 
-    ! Find out whether the old time values are present.
-    ! If not the time history stored will be ignored.
+        ! Find the numbers for the just sorted convergence names.
 
-    ii = bsearchStrings(cgnsTimeValue, convNames)
-    if(ii == 0) then
-       print "(a)", "#"
-       print "(a)", "#                 Warning"
-       print "(a)", "# No time values found in the time history &
-            &in the restart file."
-       print "(a)", "# The rest of the time history is ignored."
-       print "(a)", "# Starting at timestep 1 on the finest level."
-       print "(a)", "#"
+        do i = 1, nConv
+            ii = bsearchStrings(tmpNames(i), convNames)
+            ind(ii) = i
+        end do
 
-       ! Set nTimeStepsRestart and timeUnsteadyRestart to 0.
+        ! Find out whether the old time values are present.
+        ! If not the time history stored will be ignored.
 
-       nTimeStepsRestart   = 0
-       timeUnsteadyRestart = zero
-    endif
+        ii = bsearchStrings(cgnsTimeValue, convNames)
+        if (ii == 0) then
+            print "(a)", "#"
+            print "(a)", "#                 Warning"
+            print "(a)", "# No time values found in the time history &
+                 &in the restart file."
+            print "(a)", "# The rest of the time history is ignored."
+            print "(a)", "# Starting at timestep 1 on the finest level."
+            print "(a)", "#"
 
-    ! Determine the total number of time levels and allocate the
-    ! memory for the time history arrays.
+            ! Set nTimeStepsRestart and timeUnsteadyRestart to 0.
 
-    j = nTimeStepsRestart + nTimeStepsFine
-    call allocTimeArrays(j)
+            nTimeStepsRestart = 0
+            timeUnsteadyRestart = zero
+        end if
 
-    ! If the time values are not present, jump to the place where
-    ! the memory of the variables used in this routine is released.
+        ! Determine the total number of time levels and allocate the
+        ! memory for the time history arrays.
 
-    if(ii == 0) goto 99
+        j = nTimeStepsRestart + nTimeStepsFine
+        call allocTimeArrays(j)
 
-    ! Read the time values.
+        ! If the time values are not present, jump to the place where
+        ! the memory of the variables used in this routine is released.
 
-    i = ind(ii)
-    call cg_array_read_as_f(i, realTypeCGNS, timeArray, ierr)
-    if(ierr /= all_ok)                  &
-         call terminate("readTimeHistory", &
-         "Something wrong when calling &
-         &cg_array_read_as_f")
-
-    ! Set the value of timeUnsteadyRestart to the last value in
-    ! timeArray.
+        if (ii == 0) goto 99
 
-    timeUnsteadyRestart = timeArray(nTimeStepsRestart)
+        ! Read the time values.
 
-    ! Initialize allConvInfo to .true. and perform the loop over
-    ! the number of monitoring variables.
+        i = ind(ii)
+        call cg_array_read_as_f(i, realTypeCGNS, timeArray, ierr)
+        if (ierr /= all_ok) &
+             call terminate("readTimeHistory", &
+             "Something wrong when calling &
+             &cg_array_read_as_f")
 
-    allConvInfo = .true.
+        ! Set the value of timeUnsteadyRestart to the last value in
+        ! timeArray.
 
-    do j=1,nMon
+        timeUnsteadyRestart = timeArray(nTimeStepsRestart)
 
-       ! Search for the monitoring name in the sorted
-       ! convergence names present in the restart file.
+        ! Initialize allConvInfo to .true. and perform the loop over
+        ! the number of monitoring variables.
 
-       ii = bsearchStrings(monNames(j), convNames)
+        allConvInfo = .true.
 
-       ! Check if the name was found.
+        do j = 1, nMon
 
-       if(ii == 0) then
+            ! Search for the monitoring name in the sorted
+            ! convergence names present in the restart file.
 
-          ! Name not present in the restart file. Set allConvInfo
-          ! to .false. and the corresponding entries in timeDataArray
-          ! to zero
+            ii = bsearchStrings(monNames(j), convNames)
 
-          allConvInfo = .false.
-          do i=1,nTimeStepsRestart
-             timeDataArray(i,j) = zero
-          enddo
+            ! Check if the name was found.
 
-       else
+            if (ii == 0) then
 
-          ! Name is present in the restart file. Read the corresponding
-          ! time history.
+                ! Name not present in the restart file. Set allConvInfo
+                ! to .false. and the corresponding entries in timeDataArray
+                ! to zero
 
-          i = ind(ii)
-          call cg_array_read_as_f(i, realTypeCGNS, &
-               timeDataArray(1,j), ierr)
-          if(ierr /= all_ok)                  &
-               call terminate("readTimeHistory", &
-               "Something wrong when calling &
-               &cg_array_read_as_f")
-       endif
+                allConvInfo = .false.
+                do i = 1, nTimeStepsRestart
+                    timeDataArray(i, j) = zero
+                end do
 
-    enddo
+            else
 
-    ! Print a warning in case not all the time history could
-    ! be retrieved from the restart file.
+                ! Name is present in the restart file. Read the corresponding
+                ! time history.
 
-    if(.not. allConvInfo) then
-
-       print "(a)", "#"
-       print "(a)", "#                 Warning"
-       print "(a)", "# Not all the time history could be &
-            &retrieved from the restart file."
-       print "(a)", "# Missing information is initialized to zero."
-       print "(a)", "#"
+                i = ind(ii)
+                call cg_array_read_as_f(i, realTypeCGNS, &
+                                        timeDataArray(1, j), ierr)
+                if (ierr /= all_ok) &
+                     call terminate("readTimeHistory", &
+                     "Something wrong when calling &
+                     &cg_array_read_as_f")
+            end if
 
-    endif
+        end do
 
-99  continue
+        ! Print a warning in case not all the time history could
+        ! be retrieved from the restart file.
 
-    ! Release the memory of the variables allocated in this routine.
+        if (.not. allConvInfo) then
 
-    deallocate(convNames, tmpNames, ind, stat=ierr)
-    if(ierr /= 0)                       &
-         call terminate("readTimeHistory", &
-         "Deallocation error for convNames, etc.")
+            print "(a)", "#"
+            print "(a)", "#                 Warning"
+            print "(a)", "# Not all the time history could be &
+                 &retrieved from the restart file."
+            print "(a)", "# Missing information is initialized to zero."
+            print "(a)", "#"
 
-  end subroutine readTimeHistory
+        end if
 
-  subroutine scaleFactors(fileIDs)
-    !
-    !       scaleFactors determines the scale factors for the density,
-    !       pressure and velocity from either the reference state in the
-    !       given cgns file or they are simply set to 1.0; the latter
-    !       occurs if the input parameter checkRestartSol is .false.
-    !       If no reference state is present checkRestartSol is .true.
-    !       An error message will be printed and the program terminates.
-    !
-    use constants
-    use cgnsNames
-    use su_cgns
-    use communication, only : myID, adflow_comm_world
-    use flowVarRefState, only : pRef, muRef, rhoRef
-    use inputIO, only : checkRestartSol
-    use sorting, only : bsearchStrings, qsortStrings
-    use utils, only : setCGNSRealType, terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, dimension(nSolsRead), intent(in) :: fileIDs
+99      continue
 
-    !      Local variables.
-    !
-    integer :: ierr, realTypeCGNS, typeCGNS
-    integer :: i, nDim, nRef
-    integer :: nsize
-    integer(kind=cgsize_t) :: nsize2(1)
-    integer(kind=intType) :: ii, nn
+        ! Release the memory of the variables allocated in this routine.
 
-    integer(kind=intType), dimension(:), allocatable :: ind
+        deallocate (convNames, tmpNames, ind, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("readTimeHistory", &
+                           "Deallocation error for convNames, etc.")
 
-    real(kind=cgnsRealType) :: tmpScale
+    end subroutine readTimeHistory
 
-    character(len=maxCGNSNameLen), allocatable, dimension(:) :: &
-         refNames, tmpNames
+    subroutine scaleFactors(fileIDs)
+        !
+        !       scaleFactors determines the scale factors for the density,
+        !       pressure and velocity from either the reference state in the
+        !       given cgns file or they are simply set to 1.0; the latter
+        !       occurs if the input parameter checkRestartSol is .false.
+        !       If no reference state is present checkRestartSol is .true.
+        !       An error message will be printed and the program terminates.
+        !
+        use constants
+        use cgnsNames
+        use su_cgns
+        use communication, only: myID, adflow_comm_world
+        use flowVarRefState, only: pRef, muRef, rhoRef
+        use inputIO, only: checkRestartSol
+        use sorting, only: bsearchStrings, qsortStrings
+        use utils, only: setCGNSRealType, terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, dimension(nSolsRead), intent(in) :: fileIDs
 
-    logical :: muScalePresent
+        !      Local variables.
+        !
+        integer :: ierr, realTypeCGNS, typeCGNS
+        integer :: i, nDim, nRef
+        integer :: nsize
+        integer(kind=cgsize_t) :: nsize2(1)
+        integer(kind=intType) :: ii, nn
 
-    ! Store the file ID and the base a bit easier. Note that the
-    ! reference state only needs to be present in the first file.
+        integer(kind=intType), dimension(:), allocatable :: ind
 
-    cgnsInd  = fileIDs(1)
-    cgnsBase = 1
+        real(kind=cgnsRealType) :: tmpScale
 
-    ! Set the cgns real type.
+        character(len=maxCGNSNameLen), allocatable, dimension(:) :: &
+            refNames, tmpNames
 
-    realTypeCGNS = setCGNSRealType()
+        logical :: muScalePresent
 
-    ! Initialize the scale factors to 1.0, i.e. assume that the
-    ! correct non-dimensional solution is stored in the restart file.
+        ! Store the file ID and the base a bit easier. Note that the
+        ! reference state only needs to be present in the first file.
 
-    rhoScale = one
-    velScale = one
-    pScale   = one
-    muScale  = one
+        cgnsInd = fileIDs(1)
+        cgnsBase = 1
 
-    ! Go to the place in the cgns file where the reference state
-    ! should be stored.
+        ! Set the cgns real type.
 
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "end")
-    if(ierr /= all_ok)                &
-         call terminate("scaleFactors", &
-         "Something wrong when calling cg_goto_f")
-
-    ! Try going to the reference state node. If we get an error code,
-    ! it doesn't exist.
+        realTypeCGNS = setCGNSRealType()
 
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-         "ReferenceState_t", 1, "end")
-    if(ierr /= all_ok) then
+        ! Initialize the scale factors to 1.0, i.e. assume that the
+        ! correct non-dimensional solution is stored in the restart file.
 
-       ! Reference state does not exist. Check if the restart solution
-       ! must be checked. If not, return; otherwise print an error
-       ! message and terminate the execution. This error message is
-       ! only printed by processor 0 to avoid a messy output.
+        rhoScale = one
+        velScale = one
+        pScale = one
+        muScale = one
 
-       if(.not. checkRestartSol) return
+        ! Go to the place in the cgns file where the reference state
+        ! should be stored.
 
-       if(myId == 0)                    &
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "end")
+        if (ierr /= all_ok) &
             call terminate("scaleFactors", &
-            "Reference state not presented in restart &
-            &file. Scaling factors cannot be determined.")
-
-       ! The other processors will wait until they are killed.
+                           "Something wrong when calling cg_goto_f")
 
-       call mpi_barrier(ADflow_comm_world, ierr)
+        ! Try going to the reference state node. If we get an error code,
+        ! it doesn't exist.
 
-    endif
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                       "ReferenceState_t", 1, "end")
+        if (ierr /= all_ok) then
 
-    ! Go to the reference state node.
+            ! Reference state does not exist. Check if the restart solution
+            ! must be checked. If not, return; otherwise print an error
+            ! message and terminate the execution. This error message is
+            ! only printed by processor 0 to avoid a messy output.
 
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-         "ReferenceState_t", 1, "end")
-    if(ierr /= all_ok)                &
-         call terminate("scaleFactors", &
-         "Something wrong when calling cg_goto_f")
+            if (.not. checkRestartSol) return
 
-    ! Found out how many reference variables are stored.
+            if (myId == 0) &
+                 call terminate("scaleFactors", &
+                 "Reference state not presented in restart &
+                 &file. Scaling factors cannot be determined.")
 
-    call cg_narrays_f(nRef, ierr)
-    if(ierr /= all_ok)               &
-         call terminate("scaleFactors", &
-         "Something wrong when calling cg_narrays_f")
+            ! The other processors will wait until they are killed.
 
-    ! Allocate the memory for refNames, tmpNames and ind.
+            call mpi_barrier(ADflow_comm_world, ierr)
 
-    allocate(refNames(nRef), tmpNames(nRef), ind(nRef), &
-         stat=ierr)
-    if(ierr /= 0)                     &
-         call terminate("scaleFactors", &
-         "Memory allocation failure for refNames, etc.")
+        end if
 
-    ! Read the names of the reference variables. Store them in
-    ! tmpNames as well.
+        ! Go to the reference state node.
 
-    do i=1,nRef
-       call cg_array_info_f(i, refNames(i), typeCGNS, nDim, &
-            nsize2, ierr)
-       if(ierr /= all_ok)                &
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                       "ReferenceState_t", 1, "end")
+        if (ierr /= all_ok) &
             call terminate("scaleFactors", &
-            "Something wrong when calling cg_array_info_f")
-
-       ! Check the dimension and the size of the array.
-       ! Both should be 1. If not, screw up the name such that it
-       ! will never be found in the search later on.
-
-       if(nDim /= 1 .or. nsize2(1) /= 1) &
-            refNames(i) = refNames(i)//"#$@&^!#$%!"
-
-       ! And copy it in tmpNames.
+                           "Something wrong when calling cg_goto_f")
 
-       tmpNames(i) = refNames(i)
-    enddo
+        ! Found out how many reference variables are stored.
 
-    ! Sort refNames in increasing order.
-
-    nn = nRef
-    call qsortStrings(refNames, nn)
-
-    ! Find the numbers for the just sorted reference names.
+        call cg_narrays_f(nRef, ierr)
+        if (ierr /= all_ok) &
+            call terminate("scaleFactors", &
+                           "Something wrong when calling cg_narrays_f")
 
-    do i=1,nRef
-       ii      = bsearchStrings(tmpNames(i), refNames)
-       ind(ii) = i
-    enddo
+        ! Allocate the memory for refNames, tmpNames and ind.
 
-    ! Determine the scale factors if these must be determined.
+        allocate (refNames(nRef), tmpNames(nRef), ind(nRef), &
+                  stat=ierr)
+        if (ierr /= 0) &
+            call terminate("scaleFactors", &
+                           "Memory allocation failure for refNames, etc.")
 
-    if( checkRestartSol ) then
+        ! Read the names of the reference variables. Store them in
+        ! tmpNames as well.
 
-       ! Read the reference density from the restart file.
+        do i = 1, nRef
+            call cg_array_info_f(i, refNames(i), typeCGNS, nDim, &
+                                 nsize2, ierr)
+            if (ierr /= all_ok) &
+                call terminate("scaleFactors", &
+                               "Something wrong when calling cg_array_info_f")
 
-       ii = bsearchStrings(cgnsDensity, refNames)
-       if(ii == 0) then
-          if(myId == 0)                    &
-               call terminate("scaleFactors", &
-               "No reference density found in restart file")
+            ! Check the dimension and the size of the array.
+            ! Both should be 1. If not, screw up the name such that it
+            ! will never be found in the search later on.
 
-          ! The other processors will wait until they are killed.
+            if (nDim /= 1 .or. nsize2(1) /= 1) &
+                refNames(i) = refNames(i)//"#$@&^!#$%!"
 
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
+            ! And copy it in tmpNames.
 
-       i = ind(ii)
-       call cg_array_read_as_f(i, realTypeCGNS, tmpScale, ierr)
-       if(ierr /= all_ok)               &
-            call terminate("scaleFactors", &
-            "Something wrong when calling &
-            &cg_array_read_as_f")
-       rhoScale = tmpScale
+            tmpNames(i) = refNames(i)
+        end do
 
-       ! Read the reference pressure from the restart file.
+        ! Sort refNames in increasing order.
 
-       ii = bsearchStrings(cgnsPressure, refNames)
-       if(ii == 0) then
-          if(myId == 0)                    &
-               call terminate("scaleFactors", &
-               "No reference pressure found in &
-               &restart file")
+        nn = nRef
+        call qsortStrings(refNames, nn)
 
-          ! The other processors will wait until they are killed.
+        ! Find the numbers for the just sorted reference names.
 
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
+        do i = 1, nRef
+            ii = bsearchStrings(tmpNames(i), refNames)
+            ind(ii) = i
+        end do
 
-       i = ind(ii)
-       call cg_array_read_as_f(i, realTypeCGNS, tmpScale, ierr)
-       if(ierr /= all_ok)                &
-            call terminate("scaleFactors", &
-            "Something wrong when calling &
-            &cg_array_read_as_f")
-       pScale = tmpScale
+        ! Determine the scale factors if these must be determined.
 
-       ! Read the reference velocity from the restart file.
+        if (checkRestartSol) then
 
-       ii = bsearchStrings(cgnsVelocity, refNames)
-       if(ii == 0) then
-          if(myId == 0)                    &
-               call terminate("scaleFactors", &
-               "No reference velocity found in &
-               &restart file")
+            ! Read the reference density from the restart file.
 
-          ! The other processors will wait until they are killed.
+            ii = bsearchStrings(cgnsDensity, refNames)
+            if (ii == 0) then
+                if (myId == 0) &
+                    call terminate("scaleFactors", &
+                                   "No reference density found in restart file")
 
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
+                ! The other processors will wait until they are killed.
 
-       i = ind(ii)
-       call cg_array_read_as_f(i, realTypeCGNS, tmpScale, ierr)
-       if(ierr /= all_ok)                &
-            call terminate("scaleFactors", &
-            "Something wrong when calling &
-            &cg_array_read_as_f")
-       velScale = tmpScale
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-       ! Set muScalePresent to .true. to indicate that it is present
-       ! and read or construct the reference molecular viscosity.
+            i = ind(ii)
+            call cg_array_read_as_f(i, realTypeCGNS, tmpScale, ierr)
+            if (ierr /= all_ok) &
+                 call terminate("scaleFactors", &
+                 "Something wrong when calling &
+                 &cg_array_read_as_f")
+            rhoScale = tmpScale
 
-       muScalePresent = .true.
+            ! Read the reference pressure from the restart file.
 
-       ii = bsearchStrings(cgnsViscMol, refNames)
-       if(ii > 0) then
+            ii = bsearchStrings(cgnsPressure, refNames)
+            if (ii == 0) then
+                if (myId == 0) &
+                     call terminate("scaleFactors", &
+                     "No reference pressure found in &
+                     &restart file")
 
-          ! Scale is present; read the value.
+                ! The other processors will wait until they are killed.
 
-          i = ind(ii)
-          call cg_array_read_as_f(i, realTypeCGNS, tmpScale, ierr)
-          if(ierr /= all_ok)                &
-               call terminate("scaleFactors", &
-               "Something wrong when calling &
-               &cg_array_read_as_f")
-          muScale = tmpScale
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-       else
+            i = ind(ii)
+            call cg_array_read_as_f(i, realTypeCGNS, tmpScale, ierr)
+            if (ierr /= all_ok) &
+                 call terminate("scaleFactors", &
+                 "Something wrong when calling &
+                 &cg_array_read_as_f")
+            pScale = tmpScale
 
-          ! Try to read the kinematic viscosity.
+            ! Read the reference velocity from the restart file.
 
-          ii = bsearchStrings(cgnsViscKin, refNames)
-          if(ii > 0) then
+            ii = bsearchStrings(cgnsVelocity, refNames)
+            if (ii == 0) then
+                if (myId == 0) &
+                     call terminate("scaleFactors", &
+                     "No reference velocity found in &
+                     &restart file")
 
-             ! Scale is present; read the value and multiply it by the
-             ! density.
+                ! The other processors will wait until they are killed.
 
-             i = ind(ii)
-             call cg_array_read_as_f(i, realTypeCGNS, tmpScale, ierr)
-             if(ierr /= all_ok)                &
-                  call terminate("scaleFactors", &
-                  "Something wrong when calling &
-                  &cg_array_read_as_f")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-             muScale = tmpScale*rhoScale
+            i = ind(ii)
+            call cg_array_read_as_f(i, realTypeCGNS, tmpScale, ierr)
+            if (ierr /= all_ok) &
+                 call terminate("scaleFactors", &
+                 "Something wrong when calling &
+                 &cg_array_read_as_f")
+            velScale = tmpScale
 
-          else
+            ! Set muScalePresent to .true. to indicate that it is present
+            ! and read or construct the reference molecular viscosity.
 
-             ! Final possibility. Try to read the length scale.
+            muScalePresent = .true.
 
-             ii = bsearchStrings(cgnsLength, refNames)
-             if(ii > 0) then
+            ii = bsearchStrings(cgnsViscMol, refNames)
+            if (ii > 0) then
 
-                ! Scale is present; read the value and create the
-                ! reference viscosity.
+                ! Scale is present; read the value.
 
                 i = ind(ii)
                 call cg_array_read_as_f(i, realTypeCGNS, tmpScale, ierr)
-                if(ierr /= all_ok)               &
+                if (ierr /= all_ok) &
                      call terminate("scaleFactors", &
                      "Something wrong when calling &
                      &cg_array_read_as_f")
+                muScale = tmpScale
 
-                muScale = tmpScale*sqrt(pScale*rhoScale)
-
-             else
-
-                ! Set the logical muScalePresent to .false.
+            else
 
-                muScalePresent = .false.
+                ! Try to read the kinematic viscosity.
 
-             endif
-          endif
-       endif
+                ii = bsearchStrings(cgnsViscKin, refNames)
+                if (ii > 0) then
 
-       ! Create the correct scaling factors for density, pressure,
-       ! velocity and possibly viscosity.
+                    ! Scale is present; read the value and multiply it by the
+                    ! density.
 
-       rhoScale = rhoScale/rhoRef
-       pScale   = pScale/pRef
-       velScale = velScale/sqrt(pRef/rhoRef)
+                    i = ind(ii)
+                    call cg_array_read_as_f(i, realTypeCGNS, tmpScale, ierr)
+                    if (ierr /= all_ok) &
+                         call terminate("scaleFactors", &
+                         "Something wrong when calling &
+                         &cg_array_read_as_f")
 
-       if( muScalePresent ) muScale = muScale/muRef
+                    muScale = tmpScale * rhoScale
 
-    endif
+                else
 
-    ! Release the memory of refNames, tmpNames and ind.
+                    ! Final possibility. Try to read the length scale.
 
-    deallocate(refNames, tmpNames, ind, stat=ierr)
-    if(ierr /= 0)                    &
-         call terminate("scaleFactors", &
-         "Deallocation error for convNames, etc.")
+                    ii = bsearchStrings(cgnsLength, refNames)
+                    if (ii > 0) then
 
-  end subroutine scaleFactors
+                        ! Scale is present; read the value and create the
+                        ! reference viscosity.
 
+                        i = ind(ii)
+                        call cg_array_read_as_f(i, realTypeCGNS, tmpScale, ierr)
+                        if (ierr /= all_ok) &
+                             call terminate("scaleFactors", &
+                             "Something wrong when calling &
+                             &cg_array_read_as_f")
 
+                        muScale = tmpScale * sqrt(pScale * rhoScale)
 
-  subroutine readRestartVariable(cgnsVarName)
+                    else
 
-    !      readRestartVariable reads the given variable name from the
-    !      cgns restart file.
-    !
-    use constants
-    use blockPointers, only : il, jl, kl
-    use utils, only : terminate, setCGNSRealType
-    use su_cgns
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    character(len=*), intent(in) :: cgnsVarName
-    !
-    !      Local variables.
-    !
-    integer :: ierr, realTypeCGNS
+                        ! Set the logical muScalePresent to .false.
 
-    integer(kind=intType) :: i, j, k
+                        muScalePresent = .false.
 
-    ! Set the cgns real type.
+                    end if
+                end if
+            end if
 
-    realTypeCGNS = setCGNSRealType()
+            ! Create the correct scaling factors for density, pressure,
+            ! velocity and possibly viscosity.
 
-    ! Check where the solution variables are stored.
+            rhoScale = rhoScale / rhoRef
+            pScale = pScale / pRef
+            velScale = velScale / sqrt(pRef / rhoRef)
 
-    locationTest: if(location == CellCenter) then
+            if (muScalePresent) muScale = muScale / muRef
 
-       ! Cell centered values. Read the values directly in the buffer.
+        end if
 
-       call cg_field_read_f(cgnsInd, cgnsBase, cgnsZone, cgnsSol, &
-            cgnsVarName, realTypeCGNS, rangeMin,  &
-            rangeMax, buffer, ierr)
-       if(ierr /= all_ok)                        &
-            call terminate("readRestartVariable", &
-            "Something wrong when calling cg_field_read_f")
-    else locationTest
+        ! Release the memory of refNames, tmpNames and ind.
 
-       ! Vertex centered values. First read the solution in the
-       ! array bufferVertex.
-
-       call cg_field_read_f(cgnsInd, cgnsBase, cgnsZone, cgnsSol, &
-            cgnsVarName, realTypeCGNS, rangeMin, &
-            rangeMax, bufferVertex, ierr)
-       if(ierr /= all_ok)                        &
-            call terminate("readRestartVariable", &
-            "Something wrong when calling cg_field_read_f")
-
-       ! Create the cell centered values by averaging the vertex values.
-
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-                buffer(i,j,k) = eighth*(bufferVertex(i-1,j-1,k-1) &
-                     +         bufferVertex(i,  j-1,k-1) &
-                     +         bufferVertex(i-1,j,  k-1) &
-                     +         bufferVertex(i,  j,  k-1) &
-                     +         bufferVertex(i-1,j-1,k)   &
-                     +         bufferVertex(i,  j-1,k)   &
-                     +         bufferVertex(i-1,j,  k)   &
-                     +         bufferVertex(i,  j,  k))
-             enddo
-          enddo
-       enddo
-
-    endif locationTest
-  end subroutine readRestartVariable
+        deallocate (refNames, tmpNames, ind, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("scaleFactors", &
+                           "Deallocation error for convNames, etc.")
+
+    end subroutine scaleFactors
+
+    subroutine readRestartVariable(cgnsVarName)
+
+        !      readRestartVariable reads the given variable name from the
+        !      cgns restart file.
+        !
+        use constants
+        use blockPointers, only: il, jl, kl
+        use utils, only: terminate, setCGNSRealType
+        use su_cgns
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        character(len=*), intent(in) :: cgnsVarName
+        !
+        !      Local variables.
+        !
+        integer :: ierr, realTypeCGNS
+
+        integer(kind=intType) :: i, j, k
+
+        ! Set the cgns real type.
+
+        realTypeCGNS = setCGNSRealType()
+
+        ! Check where the solution variables are stored.
+
+        locationTest: if (location == CellCenter) then
+
+            ! Cell centered values. Read the values directly in the buffer.
+
+            call cg_field_read_f(cgnsInd, cgnsBase, cgnsZone, cgnsSol, &
+                                 cgnsVarName, realTypeCGNS, rangeMin, &
+                                 rangeMax, buffer, ierr)
+            if (ierr /= all_ok) &
+                call terminate("readRestartVariable", &
+                               "Something wrong when calling cg_field_read_f")
+            else locationTest
+
+            ! Vertex centered values. First read the solution in the
+            ! array bufferVertex.
+
+            call cg_field_read_f(cgnsInd, cgnsBase, cgnsZone, cgnsSol, &
+                                 cgnsVarName, realTypeCGNS, rangeMin, &
+                                 rangeMax, bufferVertex, ierr)
+            if (ierr /= all_ok) &
+                call terminate("readRestartVariable", &
+                               "Something wrong when calling cg_field_read_f")
+
+            ! Create the cell centered values by averaging the vertex values.
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        buffer(i, j, k) = eighth * (bufferVertex(i - 1, j - 1, k - 1) &
+                                                    + bufferVertex(i, j - 1, k - 1) &
+                                                    + bufferVertex(i - 1, j, k - 1) &
+                                                    + bufferVertex(i, j, k - 1) &
+                                                    + bufferVertex(i - 1, j - 1, k) &
+                                                    + bufferVertex(i, j - 1, k) &
+                                                    + bufferVertex(i - 1, j, k) &
+                                                    + bufferVertex(i, j, k))
+                    end do
+                end do
+            end do
+
+        end if locationTest
+    end subroutine readRestartVariable
 
 end module variableReading
diff --git a/src/modules/ADjointPETSc.F90 b/src/modules/ADjointPETSc.F90
index b7b80e20f..5e9195dc0 100644
--- a/src/modules/ADjointPETSc.F90
+++ b/src/modules/ADjointPETSc.F90
@@ -1,30 +1,29 @@
 module ADjointPETSc
 
-  !      This module contains the objects used by PETSc for the
-  !      solution of the discrete adjoint equations.
-  !
-  use constants
+    !      This module contains the objects used by PETSc for the
+    !      solution of the discrete adjoint equations.
+    !
+    use constants
 #include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
+    use petsc
+    implicit none
 
-  Mat     dRdWT, dRdWPreT
+    Mat dRdWT, dRdWPreT
 
-  ! These are empty vectors
-  Vec     w_like1, w_like2, psi_like1, psi_like2, psi_like3, x_like
-  ! This logical is used to indicate whether the vectors have been created
-  logical :: adjointPETScPreProcVarsAllocated
-  PetscErrorCode PETScIerr
-  PetscFortranAddr   matfreectx(1)
+    ! These are empty vectors
+    Vec w_like1, w_like2, psi_like1, psi_like2, psi_like3, x_like
+    ! This logical is used to indicate whether the vectors have been created
+    logical :: adjointPETScPreProcVarsAllocated
+    PetscErrorCode PETScIerr
+    PetscFortranAddr matfreectx(1)
 
-  !adjointKSP   Linear solver (Krylov subspace method) context
-  KSP     adjointKSP
+    !adjointKSP   Linear solver (Krylov subspace method) context
+    KSP adjointKSP
 
-
-  ! Initial, start and final adjoint residuals
-  real(kind=alwaysRealType) :: adjResInit
-  real(kind=alwaysRealType) :: adjResStart
-  real(kind=alwaysRealType) :: adjResFinal
-  logical :: adjointPETScVarsAllocated
+    ! Initial, start and final adjoint residuals
+    real(kind=alwaysRealType) :: adjResInit
+    real(kind=alwaysRealType) :: adjResStart
+    real(kind=alwaysRealType) :: adjResFinal
+    logical :: adjointPETScVarsAllocated
 
 end module ADjointPETSc
diff --git a/src/modules/ADjointVars.F90 b/src/modules/ADjointVars.F90
index bd9a98d1d..115dd53c7 100644
--- a/src/modules/ADjointVars.F90
+++ b/src/modules/ADjointVars.F90
@@ -1,42 +1,42 @@
 
-      module ADjointVars
+module ADjointVars
 
-      use constants, only : intType
-      implicit none
+    use constants, only: intType
+    implicit none
 
-      ! Indices of the extra deisgn design variables.
-      integer(kind=intType), parameter :: iAlpha=1
-      integer(kind=intType), parameter :: iBeta=2
-      integer(kind=intType), parameter :: iMach=3
-      integer(kind=intType), parameter :: iMachGrid=4
-      integer(kind=intType), parameter :: iRotX=5
-      integer(kind=intType), parameter :: iRotY=6
-      integer(kind=intType), parameter :: iRotZ=7
-      integer(kind=intType), parameter :: iRotCenX=8
-      integer(kind=intType), parameter :: iRotCenY=9
-      integer(kind=intType), parameter :: iRotCenZ=10
-      integer(kind=intType), parameter :: iPointRefX=11
-      integer(kind=intType), parameter :: iPointRefY=12
-      integer(kind=intType), parameter :: iPointRefZ=13
-      integer(kind=intType), parameter :: iPressure=14
-      integer(kind=intType), parameter :: iTemperature=15
-      integer(kind=intType), parameter :: iDensity=16
-      integer(kind=intType), parameter :: iaxisx1=17
-      integer(kind=intType), parameter :: iaxisx2=18
-      integer(kind=intType), parameter :: iaxisy1=19
-      integer(kind=intType), parameter :: iaxisy2=20
-      integer(kind=intType), parameter :: iaxisz1=21
-      integer(kind=intType), parameter :: iaxisz2=22
-      integer(kind=intType), parameter :: nDesignExtra=22
+    ! Indices of the extra deisgn design variables.
+    integer(kind=intType), parameter :: iAlpha = 1
+    integer(kind=intType), parameter :: iBeta = 2
+    integer(kind=intType), parameter :: iMach = 3
+    integer(kind=intType), parameter :: iMachGrid = 4
+    integer(kind=intType), parameter :: iRotX = 5
+    integer(kind=intType), parameter :: iRotY = 6
+    integer(kind=intType), parameter :: iRotZ = 7
+    integer(kind=intType), parameter :: iRotCenX = 8
+    integer(kind=intType), parameter :: iRotCenY = 9
+    integer(kind=intType), parameter :: iRotCenZ = 10
+    integer(kind=intType), parameter :: iPointRefX = 11
+    integer(kind=intType), parameter :: iPointRefY = 12
+    integer(kind=intType), parameter :: iPointRefZ = 13
+    integer(kind=intType), parameter :: iPressure = 14
+    integer(kind=intType), parameter :: iTemperature = 15
+    integer(kind=intType), parameter :: iDensity = 16
+    integer(kind=intType), parameter :: iaxisx1 = 17
+    integer(kind=intType), parameter :: iaxisx2 = 18
+    integer(kind=intType), parameter :: iaxisy1 = 19
+    integer(kind=intType), parameter :: iaxisy2 = 20
+    integer(kind=intType), parameter :: iaxisz1 = 21
+    integer(kind=intType), parameter :: iaxisz2 = 22
+    integer(kind=intType), parameter :: nDesignExtra = 22
 
-      ! nNodesGlobal  Total number of nodes on each level
-      ! nNodesLocal   Number of nodes owned by the processor on each level
-      ! nOffsetLocal  Global node number offset per processor on each level
+    ! nNodesGlobal  Total number of nodes on each level
+    ! nNodesLocal   Number of nodes owned by the processor on each level
+    ! nOffsetLocal  Global node number offset per processor on each level
 
-      ! Note: We're going to assume no more than 20 multigrid
-      ! levels...this really should NEVER be exceed...
-      integer(kind=intType), parameter :: maxLevels = 20
-      integer(kind=intType), dimension(maxLevels) :: nNodesGlobal, nNodesLocal, nNodeOffsetLocal
-      integer(kind=intType), dimension(maxLevels) :: nCellsGlobal, nCellsLocal, nCellOffsetLocal
-      logical :: derivVarsAllocated = .False.
-    end module ADjointVars
+    ! Note: We're going to assume no more than 20 multigrid
+    ! levels...this really should NEVER be exceed...
+    integer(kind=intType), parameter :: maxLevels = 20
+    integer(kind=intType), dimension(maxLevels) :: nNodesGlobal, nNodesLocal, nNodeOffsetLocal
+    integer(kind=intType), dimension(maxLevels) :: nCellsGlobal, nCellsLocal, nCellOffsetLocal
+    logical :: derivVarsAllocated = .False.
+end module ADjointVars
diff --git a/src/modules/BCDataMod.F90 b/src/modules/BCDataMod.F90
index 97ded24ed..502ed20b5 100644
--- a/src/modules/BCDataMod.F90
+++ b/src/modules/BCDataMod.F90
@@ -1,66 +1,66 @@
 module BCDataMod
-  !
-  !       This local module contains the variables and subroutine to
-  !       handle the prescribed boundary data.
-  !
-  use cgnsGrid
-  implicit none
-  save
+    !
+    !       This local module contains the variables and subroutine to
+    !       handle the prescribed boundary data.
+    !
+    use cgnsGrid
+    implicit none
+    save
 
-  ! nbcVarMax: Parameter, which defines the maximum number of
-  !              prescribed variables for a boundary.
+    ! nbcVarMax: Parameter, which defines the maximum number of
+    !              prescribed variables for a boundary.
 
-  integer, parameter :: nbcVarMax = 21
+    integer, parameter :: nbcVarMax = 21
 
-  ! mass(nbcVarMax):   Unit of mass for the prescribed data.
-  ! length(nbcVarMax): Unit of length for the prescribed data.
-  ! time(nbcVarMax):   Unit of time for the prescribed data.
-  ! temp(nbcVarMax):   Unit of temperature for the prescribed data.
-  ! angle(nbcVarMax):  Unit of angle for the prescribed data.
+    ! mass(nbcVarMax):   Unit of mass for the prescribed data.
+    ! length(nbcVarMax): Unit of length for the prescribed data.
+    ! time(nbcVarMax):   Unit of time for the prescribed data.
+    ! temp(nbcVarMax):   Unit of temperature for the prescribed data.
+    ! angle(nbcVarMax):  Unit of angle for the prescribed data.
 
-  integer, dimension(nbcVarMax) :: mass, length, time, temp, angle
+    integer, dimension(nbcVarMax) :: mass, length, time, temp, angle
 
-  ! nDataSet: Number of data sets present for the active face.
-  ! cgnsBoco: The corresponding boundary face in the cgns block.
-  ! nbcVar:   Theoretically possible number of variables
-  !           prescribed for this face.
+    ! nDataSet: Number of data sets present for the active face.
+    ! cgnsBoco: The corresponding boundary face in the cgns block.
+    ! nbcVar:   Theoretically possible number of variables
+    !           prescribed for this face.
 
-  integer(kind=intType) :: nDataSet, cgnsBoco, nbcVar
+    integer(kind=intType) :: nDataSet, cgnsBoco, nbcVar
 
-  ! xf(:,:,:): pointer to the coordinates of the block face to
-  !            which the active subface belongs.
+    ! xf(:,:,:): pointer to the coordinates of the block face to
+    !            which the active subface belongs.
 
-  real(kind=realType), dimension(:,:,:), pointer :: xf
+    real(kind=realType), dimension(:, :, :), pointer :: xf
 
-  ! axis(3):    Axial unit vector in the local cylindrical
-  !             coordinate system.
-  ! radVec1(3): First radial unit vector in the local cylindrical
-  !             coordinate system.
-  ! radVec2(3): Second radial unit vector.
+    ! axis(3):    Axial unit vector in the local cylindrical
+    !             coordinate system.
+    ! radVec1(3): First radial unit vector in the local cylindrical
+    !             coordinate system.
+    ! radVec2(3): Second radial unit vector.
 
-  real(kind=realType), dimension(3) :: axis, radVec1, radVec2
+    real(kind=realType), dimension(3) :: axis, radVec1, radVec2
 
-  ! bcVarNames(nbcVarMax): The cgns names of the possible
-  !                        prescribed variables.
+    ! bcVarNames(nbcVarMax): The cgns names of the possible
+    !                        prescribed variables.
 
-  character(len=maxCGNSNameLen), dimension(nbcVarMax) :: bcVarNames
+    character(len=maxCGNSNameLen), dimension(nbcVarMax) :: bcVarNames
 
-  ! axAssumed:               Whether or not the x-axis is assumed
-  !                          to be the axial direction.
-  ! massflowPrescribed:      Whether or not subsonic inflow boundaries
-  !                          are present with prescribed massflow.
-  ! bcVarPresent(nbcVarMax): Whether or not the possible
-  !                          variables are actually prescribed.
+    ! axAssumed:               Whether or not the x-axis is assumed
+    !                          to be the axial direction.
+    ! massflowPrescribed:      Whether or not subsonic inflow boundaries
+    !                          are present with prescribed massflow.
+    ! bcVarPresent(nbcVarMax): Whether or not the possible
+    !                          variables are actually prescribed.
 
-  logical :: axAssumed, massflowPrescribed
-  logical, dimension(nbcVarMax) :: bcVarPresent
+    logical :: axAssumed, massflowPrescribed
+    logical, dimension(nbcVarMax) :: bcVarPresent
 
-  ! dataSet: Pointer for the data sets of the corresponding cgns
-  !          boundary face.
+    ! dataSet: Pointer for the data sets of the corresponding cgns
+    !          boundary face.
 
-  type(cgnsBcDatasetType), pointer, dimension(:) :: dataSet
+    type(cgnsBcDatasetType), pointer, dimension(:) :: dataSet
 #ifndef USE_TAPENADE
-  type(cgnsBcDatasetType), pointer, dimension(:) :: dataSetd
-  real(kind=realType), dimension(:,:,:), allocatable :: bcVarArrayd
+    type(cgnsBcDatasetType), pointer, dimension(:) :: dataSetd
+    real(kind=realType), dimension(:, :, :), allocatable :: bcVarArrayd
 #endif
 end module BCDataMod
diff --git a/src/modules/BCPointers.F90 b/src/modules/BCPointers.F90
index cb1b73a6b..39b8c6245 100644
--- a/src/modules/BCPointers.F90
+++ b/src/modules/BCPointers.F90
@@ -3,46 +3,46 @@ module BCPointers
 
 ! Thiss module contains data structures used to apply BCs.
 
-  use constants, only : intType, realType
-  implicit none
-  save
-
-  real(kind=realType), dimension(:,:,:), pointer :: ww0, ww1, ww2, ww3
-  real(kind=realType), dimension(:,:),   pointer :: pp0, pp1, pp2, pp3
-  real(kind=realType), dimension(:,:),   pointer :: rlv0, rlv1, rlv2, rlv3
-  real(kind=realType), dimension(:,:),   pointer :: rev0, rev1, rev2, rev3
-  real(kind=realType), dimension(:,:),   pointer :: gamma0, gamma1, gamma2, gamma3
-  real(kind=realType), dimension(:,:,:), pointer :: ssi, ssj, ssk
-  real(kind=realType), dimension(:,:,:), pointer :: ss, xx
-  real(kind=realType), dimension(:,:),   pointer :: dd2wall, sFace
-  integer(kind=intType), dimension(:,:), pointer :: gcp
-
-  integer(kind=intType) :: iStart, iEnd, iSize
-  integer(kind=intType) :: jStart, jEnd, jSize
+    use constants, only: intType, realType
+    implicit none
+    save
+
+    real(kind=realType), dimension(:, :, :), pointer :: ww0, ww1, ww2, ww3
+    real(kind=realType), dimension(:, :), pointer :: pp0, pp1, pp2, pp3
+    real(kind=realType), dimension(:, :), pointer :: rlv0, rlv1, rlv2, rlv3
+    real(kind=realType), dimension(:, :), pointer :: rev0, rev1, rev2, rev3
+    real(kind=realType), dimension(:, :), pointer :: gamma0, gamma1, gamma2, gamma3
+    real(kind=realType), dimension(:, :, :), pointer :: ssi, ssj, ssk
+    real(kind=realType), dimension(:, :, :), pointer :: ss, xx
+    real(kind=realType), dimension(:, :), pointer :: dd2wall, sFace
+    integer(kind=intType), dimension(:, :), pointer :: gcp
+
+    integer(kind=intType) :: iStart, iEnd, iSize
+    integer(kind=intType) :: jStart, jEnd, jSize
 
 #ifndef USE_TAPENADE
-  real(kind=realType), dimension(:,:,:), pointer :: ww0d, ww1d, ww2d, ww3d
-  real(kind=realType), dimension(:,:),   pointer :: pp0d, pp1d, pp2d, pp3d
-  real(kind=realType), dimension(:,:),   pointer :: rlv0d, rlv1d, rlv2d, rlv3d
-  real(kind=realType), dimension(:,:),   pointer :: rev0d, rev1d, rev2d, rev3d
-  real(kind=realType), dimension(:,:),   pointer :: gamma0d, gamma1d, gamma2d, gamma3d
-  real(kind=realType), dimension(:,:,:), pointer :: ssid, ssjd, sskd, xxd
-  real(kind=realType), dimension(:,:,:), pointer :: ssd
-  real(kind=realType), dimension(:,:),   pointer :: dd2walld, sFaced
+    real(kind=realType), dimension(:, :, :), pointer :: ww0d, ww1d, ww2d, ww3d
+    real(kind=realType), dimension(:, :), pointer :: pp0d, pp1d, pp2d, pp3d
+    real(kind=realType), dimension(:, :), pointer :: rlv0d, rlv1d, rlv2d, rlv3d
+    real(kind=realType), dimension(:, :), pointer :: rev0d, rev1d, rev2d, rev3d
+    real(kind=realType), dimension(:, :), pointer :: gamma0d, gamma1d, gamma2d, gamma3d
+    real(kind=realType), dimension(:, :, :), pointer :: ssid, ssjd, sskd, xxd
+    real(kind=realType), dimension(:, :, :), pointer :: ssd
+    real(kind=realType), dimension(:, :), pointer :: dd2walld, sFaced
 #endif
 
 end module BCPointers
 
 #ifndef USE_TAPENADE
 module BCPointers_d
-  use BCPointers
+    use BCPointers
 end module BCPointers_d
 
 module BCPointers_b
-  use BCPointers
+    use BCPointers
 end module BCPointers_b
 
 module BCPointers_fast_b
-  use BCPointers
+    use BCPointers
 end module BCPointers_fast_b
 #endif
diff --git a/src/modules/CpCurveFits.f90 b/src/modules/CpCurveFits.f90
index c09578e81..6ac42407b 100644
--- a/src/modules/CpCurveFits.f90
+++ b/src/modules/CpCurveFits.f90
@@ -1,60 +1,60 @@
-       module CpCurveFits
+module CpCurveFits
 !
 !       This module contains the curve fit data for Cp, or better
 !       Cp/R, as a function of the temperature. The temperature is
 !       assumed to be in Kelvin.
 !
-       use constants, only: intTYpe, realType
-       implicit none
-       save
+    use constants, only: intTYpe, realType
+    implicit none
+    save
 !
 !       The definition of the derived data type CpTempFitType, which
 !       stores the curve fit data for a certain temperature range.
 !
-       type CpTempFitType
+    type CpTempFitType
 
-         ! nterm:     Number of terms in the polynomial expansion.
-         ! exponents: The powers of the polynomial fit.
-         ! constants: The constants in front of each term.
+        ! nterm:     Number of terms in the polynomial expansion.
+        ! exponents: The powers of the polynomial fit.
+        ! constants: The constants in front of each term.
 
-         integer(kind=intType) :: nTerm
-         integer(kind=intType), dimension(:), pointer :: exponents
-         real(kind=realType),   dimension(:), pointer :: constants
+        integer(kind=intType) :: nTerm
+        integer(kind=intType), dimension(:), pointer :: exponents
+        real(kind=realType), dimension(:), pointer :: constants
 
-         ! Additional constant to compute the internal energy.
+        ! Additional constant to compute the internal energy.
 
-         real(kind=realType) :: eint0
+        real(kind=realType) :: eint0
 
-         ! Values of the integrand of Cp/(R*T) at the lower (_1) and
-         ! upper (_2) curve fit boundary. Needed to compute the total
-         ! pressure.
+        ! Values of the integrand of Cp/(R*T) at the lower (_1) and
+        ! upper (_2) curve fit boundary. Needed to compute the total
+        ! pressure.
 
-         real(kind=realType) :: intCpOvRT_1, intCpOvRT_2
+        real(kind=realType) :: intCpOvRT_1, intCpOvRT_2
 
-       end type CpTempFitType
+    end type CpTempFitType
 
-       ! CpNParts:             Number of temperature ranges.
-       ! CpTRange(0:CpNParts): The temperature ranges.
-       ! CpEint(0:CpNParts):   Internal energies at the curve fit
-       !                       boundaries.
-       ! CpHint(0:CpNParts):   Internal enthalpies at the curve fit
-       !                       boundaries.
-       ! CpTempFit(CpNParts):  The actual curve fit data.
+    ! CpNParts:             Number of temperature ranges.
+    ! CpTRange(0:CpNParts): The temperature ranges.
+    ! CpEint(0:CpNParts):   Internal energies at the curve fit
+    !                       boundaries.
+    ! CpHint(0:CpNParts):   Internal enthalpies at the curve fit
+    !                       boundaries.
+    ! CpTempFit(CpNParts):  The actual curve fit data.
 
-       integer(kind=intType) :: CpNParts
+    integer(kind=intType) :: CpNParts
 
-       real(kind=realType), dimension(:), allocatable :: CpTRange
-       real(kind=realType), dimension(:), allocatable :: CpEint
-       real(kind=realType), dimension(:), allocatable :: CpHint
-       type(cpTempFitType), dimension(:), allocatable :: CpTempFit
+    real(kind=realType), dimension(:), allocatable :: CpTRange
+    real(kind=realType), dimension(:), allocatable :: CpEint
+    real(kind=realType), dimension(:), allocatable :: CpHint
+    type(cpTempFitType), dimension(:), allocatable :: CpTempFit
 
-       ! cv0:   The cv value at the the temperature CpTRange(0). If the
-       !        temperature is lower than the lowest curve fit boundary
-       !        this value is needed to extrapolate the energy (assuming
-       !        constant cv).
-       ! cvn:   Idem, but than at CpTRange(CpNParts). Used when the
-       !        temperature is higher than the highest curve fit boundary.
+    ! cv0:   The cv value at the the temperature CpTRange(0). If the
+    !        temperature is lower than the lowest curve fit boundary
+    !        this value is needed to extrapolate the energy (assuming
+    !        constant cv).
+    ! cvn:   Idem, but than at CpTRange(CpNParts). Used when the
+    !        temperature is higher than the highest curve fit boundary.
 
-       real(kind=realType) :: cv0, cvn
+    real(kind=realType) :: cv0, cvn
 
-       end module CpCurveFits
+end module CpCurveFits
diff --git a/src/modules/IOModule.f90 b/src/modules/IOModule.f90
index 3bd6d11bc..201ba14df 100644
--- a/src/modules/IOModule.f90
+++ b/src/modules/IOModule.f90
@@ -1,51 +1,51 @@
-       module IOModule
+module IOModule
 !
 !       Constants and variables used in the IO routines.
 !
-       use constants, only : intType, realType, mpi_offset_kind
-       implicit none
-       save
+    use constants, only: intType, realType, mpi_offset_kind
+    implicit none
+    save
 !
 !       Definition of the parameters.
 !
-       ! The maximum amount of data a processor can read in one call
-       ! to su_file_read. This value is 2Gbyte.
+    ! The maximum amount of data a processor can read in one call
+    ! to su_file_read. This value is 2Gbyte.
 
-       integer(kind=mpi_offset_kind), parameter :: &
-                                  maxSizeIO = 2147483648_mpi_offset_kind
+    integer(kind=mpi_offset_kind), parameter :: &
+        maxSizeIO = 2147483648_mpi_offset_kind
 
-       ! Definition of the possibilities for storing data.
+    ! Definition of the possibilities for storing data.
 
-       integer(kind=intType), parameter :: nodeData          = 1
-       integer(kind=intType), parameter :: cellDataNoHalo    = 2
-       integer(kind=intType), parameter :: cellDataPlusHalo  = 3
+    integer(kind=intType), parameter :: nodeData = 1
+    integer(kind=intType), parameter :: cellDataNoHalo = 2
+    integer(kind=intType), parameter :: cellDataPlusHalo = 3
 
 !
 !       Definition of the derived datatype IOType, which is used to
 !       to make the IO as general as needed.
 !
-       type IOType
+    type IOType
 
-         ! pointerOffset: offset due to the usage of a pointer to a
-         !                subarray. The Fortran standard is such that
-         !                the starting indices of the pointer array
-         !                is 1, no matter what the original starting
-         !                index is. This can lead to a shift in the
-         !                indices.
-         ! w:             The variable(s) to be read/written.
+        ! pointerOffset: offset due to the usage of a pointer to a
+        !                subarray. The Fortran standard is such that
+        !                the starting indices of the pointer array
+        !                is 1, no matter what the original starting
+        !                index is. This can lead to a shift in the
+        !                indices.
+        ! w:             The variable(s) to be read/written.
 
-         integer(kind=intType) :: pointerOffset
+        integer(kind=intType) :: pointerOffset
 
-         real(kind=realType), dimension(:,:,:,:), pointer :: w
+        real(kind=realType), dimension(:, :, :, :), pointer :: w
 
-       end type IOType
+    end type IOType
 
 !
 !       Definition of the variables used for both CGNS
 !
-       ! IOVar(nDom,nIOFiles): Array of the derived datatype IOType to
-       !                       facilitate a general IO implementation.
+    ! IOVar(nDom,nIOFiles): Array of the derived datatype IOType to
+    !                       facilitate a general IO implementation.
 
-       type(IOType), dimension(:,:), allocatable :: IOVar
+    type(IOType), dimension(:, :), allocatable :: IOVar
 
-       end module IOModule
+end module IOModule
diff --git a/src/modules/actuatorRegionData.F90 b/src/modules/actuatorRegionData.F90
index a9943b632..bbc82e987 100644
--- a/src/modules/actuatorRegionData.F90
+++ b/src/modules/actuatorRegionData.F90
@@ -1,44 +1,44 @@
 module actuatorRegionData
-  use constants
+    use constants
 
-  type actuatorRegionType
+    type actuatorRegionType
 
-     character(len=maxStringLen) :: famName
-     integer(kind=intType) :: famID
-     ! The block indexes of the cells included in this region
-     integer(kind=intType), dimension(:, :), pointer :: cellIDs
+        character(len=maxStringLen) :: famName
+        integer(kind=intType) :: famID
+        ! The block indexes of the cells included in this region
+        integer(kind=intType), dimension(:, :), pointer :: cellIDs
 
-     ! The total number of cells included this proc has
-     integer(kind=intType) :: nCellIDs
+        ! The total number of cells included this proc has
+        integer(kind=intType) :: nCellIDs
 
-     ! the force vector to be applied on this region
-     ! this is equal to torque * axisVec
-     real(kind=realType) :: force(3)
+        ! the force vector to be applied on this region
+        ! this is equal to torque * axisVec
+        real(kind=realType) :: force(3)
 
-     ! magnitude of the total torque to be applied on this region
-     real(kind=realType) :: torque
-     ! vector that determines the direction of the applied torque
-     real(kind=realType), dimension(3) :: axisVec
+        ! magnitude of the total torque to be applied on this region
+        real(kind=realType) :: torque
+        ! vector that determines the direction of the applied torque
+        real(kind=realType), dimension(3) :: axisVec
 
-     ! total heat flux to be added on this regoin
-     real(kind=realType) :: heat
+        ! total heat flux to be added on this regoin
+        real(kind=realType) :: heat
 
-     ! Volume is the total integrated volume of all cells (on all
-     ! procs) included in this region
-     real(kind=realType) :: volume
+        ! Volume is the total integrated volume of all cells (on all
+        ! procs) included in this region
+        real(kind=realType) :: volume
 
-     integer(kind=intType), dimension(:), allocatable :: blkPtr
+        integer(kind=intType), dimension(:), allocatable :: blkPtr
 
-     ! Set the defaults for solution relaxation
-     real(kind=realType) :: relaxStart = -one
-     real(kind=realType) :: relaxEnd = -one
-  end type actuatorRegionType
+        ! Set the defaults for solution relaxation
+        real(kind=realType) :: relaxStart = -one
+        real(kind=realType) :: relaxEnd = -one
+    end type actuatorRegionType
 
-  integer(kind=intType), parameter :: nActuatorRegionsMax=10
-  type(actuatorRegionType), dimension(nActuatorRegionsMax), target :: actuatorRegions
-  integer(kind=intTYpe) :: nActuatorRegions=0
+    integer(kind=intType), parameter :: nActuatorRegionsMax = 10
+    type(actuatorRegionType), dimension(nActuatorRegionsMax), target :: actuatorRegions
+    integer(kind=intTYpe) :: nActuatorRegions = 0
 
 #ifndef USE_TAPENADE
-  type(actuatorRegionType), dimension(nActuatorRegionsMax), target :: actuatorRegionsd
+    type(actuatorRegionType), dimension(nActuatorRegionsMax), target :: actuatorRegionsd
 #endif
 end module actuatorRegionData
diff --git a/src/modules/adtData.F90 b/src/modules/adtData.F90
index 2f480f3ce..1c978aa10 100644
--- a/src/modules/adtData.F90
+++ b/src/modules/adtData.F90
@@ -1,4 +1,4 @@
-      module adtData
+module adtData
 !
 !      Module, which defines the derived data types and the arrays to
 !      store multiple ADT's. An array is chosen to store multiple
@@ -7,23 +7,23 @@ module adtData
 !      is some additional work due to reallocation. However this is
 !      negligible due to the usage of pointers.
 !
-      use constants, only : intType, realType, adtElementType, alwaysRealType
-      implicit none
-      save
+    use constants, only: intType, realType, adtElementType, alwaysRealType
+    implicit none
+    save
 !
 !      Define the functions needed for the sorting of the derived
 !      data types to be private, i.e. they can only be accessed
 !      within this module.
 !
-      public
-      private :: adtBBoxTargetTypeLessEqual
-      private :: adtBBoxTargetTypeLess
-      private :: adtTypeAssign
+    public
+    private :: adtBBoxTargetTypeLessEqual
+    private :: adtBBoxTargetTypeLess
+    private :: adtTypeAssign
 !
 !      Definition of the derived data type store a leaf of an ADT.
 !      The ADT itself is an array of these leaves.
 !
-      type adtLeafType
+    type adtLeafType
 
         ! children(2): Children of the parent. If negative it means that
         !              it is a terminal leaf and the absolute values
@@ -34,41 +34,41 @@ module adtData
         ! xMax(6):     The maximum coordinates of the leaf.
 
         integer(kind=intType), dimension(2) :: children
-        real(kind=realType),   dimension(6) :: xMin, xMax
+        real(kind=realType), dimension(6) :: xMin, xMax
 
-      end type adtLeafType
+    end type adtLeafType
 !
 !      The definition of adtBBoxTargetType, which stores the data of
 !      a possible bounding box which minimizes the distances to the
 !      given coordinate.
 !
-      type adtBBoxTargetType
+    type adtBBoxTargetType
 
         ! ID:       The id of the bounding box in the list.
         ! posDist2: the possible minimum distance squared to the active
         !           coordinate.
 
         integer(kind=intType) :: ID
-        real(kind=realType)   :: posDist2
+        real(kind=realType) :: posDist2
 
-      end type adtBBoxTargetType
+    end type adtBBoxTargetType
 
-      ! Interfaces for the extension of the operators <= and <.
-      ! These are needed for the sorting of BBoxTargetType. Note
-      ! that the = operator does not need to be defined, because
-      ! BBoxTargetType only contains primitive types.
+    ! Interfaces for the extension of the operators <= and <.
+    ! These are needed for the sorting of BBoxTargetType. Note
+    ! that the = operator does not need to be defined, because
+    ! BBoxTargetType only contains primitive types.
 
-      interface operator(<=)
+    interface operator(<=)
         module procedure adtBBoxTargetTypeLessEqual
-      end interface
+    end interface
 
-      interface operator(<)
+    interface operator(<)
         module procedure adtBBoxTargetTypeLess
-      end interface
+    end interface
 !
 !      Definition of the derived data type to store an ADT.
 !
-      type adtType
+    type adtType
 
         ! comm:   The communicator of this ADT.
         ! nProcs: The number of processors which participate in this
@@ -83,9 +83,9 @@ module adtData
         ! isActive: Whether or not the ADT is active. If not, this
         !           entry could be used during a reallocation.
 
-        integer           :: adtType
+        integer :: adtType
         character(len=64) :: adtID
-        logical           :: isActive
+        logical :: isActive
 
         ! nNodes:  Number of local nodes in the given grid.
         ! nTria:   Number of local triangles in the given grid.
@@ -101,7 +101,7 @@ module adtData
         ! coor(3,nNodes): Nodal coordinates of the local grid.
         !                 To save memory this pointer is not
         !                 allocated, but set to the data given.
-        real(kind=realType), dimension(:,:), pointer :: coor
+        real(kind=realType), dimension(:, :), pointer :: coor
 
         ! triaConn(3,nTria):     Local connectivity of the triangles.
         !                        To save memory this pointer is not
@@ -112,12 +112,12 @@ module adtData
         ! prismsConn(6,nPrisms): Idem for the prisms.
         ! hexaConn(8,nHexa):     Idem for the hexahedra.
 
-        integer(kind=intType), dimension(:,:), pointer :: triaConn
-        integer(kind=intType), dimension(:,:), pointer :: quadsConn
-        integer(kind=intType), dimension(:,:), pointer :: tetraConn
-        integer(kind=intType), dimension(:,:), pointer :: pyraConn
-        integer(kind=intType), dimension(:,:), pointer :: prismsConn
-        integer(kind=intType), dimension(:,:), pointer :: hexaConn
+        integer(kind=intType), dimension(:, :), pointer :: triaConn
+        integer(kind=intType), dimension(:, :), pointer :: quadsConn
+        integer(kind=intType), dimension(:, :), pointer :: tetraConn
+        integer(kind=intType), dimension(:, :), pointer :: pyraConn
+        integer(kind=intType), dimension(:, :), pointer :: prismsConn
+        integer(kind=intType), dimension(:, :), pointer :: hexaConn
 
         ! nRootLeaves:        Number of non-empty root leaves.
         !                     This number is of course less than or
@@ -130,7 +130,7 @@ module adtData
 
         integer :: nRootLeaves, myEntryInRootProcs
         integer, dimension(:), pointer :: rootLeavesProcs
-        real(kind=realType), dimension(:,:,:), pointer :: rootBBoxes
+        real(kind=realType), dimension(:, :, :), pointer :: rootBBoxes
 
         ! nBBoxes:              Number of bounding boxes stored in
         !                       the ADT.
@@ -144,8 +144,8 @@ module adtData
         integer(kind=intType) :: nBBoxes
 
         integer(kind=adtElementType), dimension(:), pointer :: elementType
-        integer(kind=intType),     dimension(:), pointer :: elementID
-        real(kind=realType),     dimension(:,:), pointer :: xBBox
+        integer(kind=intType), dimension(:), pointer :: elementID
+        real(kind=realType), dimension(:, :), pointer :: xBBox
 
         ! nLeaves:         Number of present in the ADT. Due to the
         !                  variable splitting the tree is optimally
@@ -155,61 +155,60 @@ module adtData
         integer(kind=intType) :: nLeaves
         type(adtLeafType), dimension(:), pointer :: ADTree
 
-      end type adtType
+    end type adtType
 
-      ! Interface for the extension of the operator =.
+    ! Interface for the extension of the operator =.
 
-      interface assignment(=)
+    interface assignment(=)
         module procedure adtTypeAssign
-      end interface
+    end interface
 !
 !                Variables stored in this module.
 !
-      ! ADTs(:): The array to store the different ADT's.
+    ! ADTs(:): The array to store the different ADT's.
 
-      type(adtType), dimension(:), allocatable, target :: ADTs
+    type(adtType), dimension(:), allocatable, target :: ADTs
 
-      ! nProcRecv:      Number of processors from which I receive
-      !                 coordinates that must be searched in my ADT.
-      ! nCoorMax:       Maximum number of coordinates that can be
-      !                 searched during an interpolation round.
-      ! nRounds:        Number of rounds in the outer loop of the search
-      !                 algorithm.
-      ! nLocalInterpol: Number of local coordinates that must be
-      !                 searched in the locally stored tree.
+    ! nProcRecv:      Number of processors from which I receive
+    !                 coordinates that must be searched in my ADT.
+    ! nCoorMax:       Maximum number of coordinates that can be
+    !                 searched during an interpolation round.
+    ! nRounds:        Number of rounds in the outer loop of the search
+    !                 algorithm.
+    ! nLocalInterpol: Number of local coordinates that must be
+    !                 searched in the locally stored tree.
 
-      integer :: nProcRecv
+    integer :: nProcRecv
 
-      integer(kind=intType) :: nCoorMax
-      integer(kind=intType) :: nRounds
-      integer(kind=intType) :: nLocalInterpol
+    integer(kind=intType) :: nCoorMax
+    integer(kind=intType) :: nRounds
+    integer(kind=intType) :: nLocalInterpol
 
+    ! procRecv:         Processor ID's from which I will receive
+    !                   coordinates.
+    ! nCoorProcRecv:    Number of coordinates I must receive from the
+    !                   processors which send coordinates to me.
+    ! nCoorPerRootLeaf: Number of coordinates, which may be searched
+    !                   in each of the local ADT's. The array is in
+    !                   cumulative storage format.
+    ! mCoorPerRootLeaf: Idem, but its contents changes during the
+    !                   iterative algorithm.
+    ! coorPerRootLeaf:  The ID's of the corresponding coordinates.
 
-      ! procRecv:         Processor ID's from which I will receive
-      !                   coordinates.
-      ! nCoorProcRecv:    Number of coordinates I must receive from the
-      !                   processors which send coordinates to me.
-      ! nCoorPerRootLeaf: Number of coordinates, which may be searched
-      !                   in each of the local ADT's. The array is in
-      !                   cumulative storage format.
-      ! mCoorPerRootLeaf: Idem, but its contents changes during the
-      !                   iterative algorithm.
-      ! coorPerRootLeaf:  The ID's of the corresponding coordinates.
+    integer, dimension(:), allocatable :: procRecv
 
-      integer, dimension(:), allocatable :: procRecv
+    integer(kind=intType), dimension(:), allocatable :: nCoorProcRecv
+    integer(kind=intType), dimension(:), allocatable :: nCoorPerRootLeaf
+    integer(kind=intType), dimension(:), allocatable :: mCoorPerRootLeaf
+    integer(kind=intType), dimension(:), allocatable :: coorPerRootLeaf
 
-      integer(kind=intType), dimension(:), allocatable :: nCoorProcRecv
-      integer(kind=intType), dimension(:), allocatable :: nCoorPerRootLeaf
-      integer(kind=intType), dimension(:), allocatable :: mCoorPerRootLeaf
-      integer(kind=intType), dimension(:), allocatable :: coorPerRootLeaf
+    !=================================================================
 
-      !=================================================================
+contains
 
-      contains
+    !===============================================================
 
-        !===============================================================
-
-        logical function adtBBoxTargetTypeLessEqual(g1,g2)
+    logical function adtBBoxTargetTypeLessEqual(g1, g2)
 !
 !        This function returns .true. if g1 <= g2. The comparison is
 !        firstly based on the possible minimum distance such that the
@@ -228,33 +227,33 @@ logical function adtBBoxTargetTypeLessEqual(g1,g2)
 
         ! Compare the possible minimum distances.
 
-        if(g1%posDist2 < g2%posDist2) then
-          adtBBoxTargetTypeLessEqual = .true.
-          return
-        else if(g1%posDist2 > g2%posDist2) then
-          adtBBoxTargetTypeLessEqual = .false.
-          return
-        endif
+        if (g1%posDist2 < g2%posDist2) then
+            adtBBoxTargetTypeLessEqual = .true.
+            return
+        else if (g1%posDist2 > g2%posDist2) then
+            adtBBoxTargetTypeLessEqual = .false.
+            return
+        end if
 
         ! Compare the bounding box ID's.
 
-        if(g1%ID < g2%ID) then
-          adtBBoxTargetTypeLessEqual = .true.
-          return
-        else if(g1%ID > g2%ID) then
-          adtBBoxTargetTypeLessEqual = .false.
-          return
-        endif
+        if (g1%ID < g2%ID) then
+            adtBBoxTargetTypeLessEqual = .true.
+            return
+        else if (g1%ID > g2%ID) then
+            adtBBoxTargetTypeLessEqual = .false.
+            return
+        end if
 
         ! g1 and g2 are identical. Return .true.
 
         adtBBoxTargetTypeLessEqual = .true.
 
-        end function adtBBoxTargetTypeLessEqual
+    end function adtBBoxTargetTypeLessEqual
 
-        !===============================================================
+    !===============================================================
 
-        logical function adtBBoxTargetTypeLess(g1,g2)
+    logical function adtBBoxTargetTypeLess(g1, g2)
 !
 !        This function returns .true. if g1 < g2. The comparison is
 !        firstly based on the possible minimum distance such that the
@@ -273,33 +272,33 @@ logical function adtBBoxTargetTypeLess(g1,g2)
 
         ! Compare the possible minimum distances.
 
-        if(g1%posDist2 < g2%posDist2) then
-          adtBBoxTargetTypeLess = .true.
-          return
-        else if(g1%posDist2 > g2%posDist2) then
-          adtBBoxTargetTypeLess = .false.
-          return
-        endif
+        if (g1%posDist2 < g2%posDist2) then
+            adtBBoxTargetTypeLess = .true.
+            return
+        else if (g1%posDist2 > g2%posDist2) then
+            adtBBoxTargetTypeLess = .false.
+            return
+        end if
 
         ! Compare the bounding box ID's.
 
-        if(g1%ID < g2%ID) then
-          adtBBoxTargetTypeLess = .true.
-          return
-        else if(g1%ID > g2%ID) then
-          adtBBoxTargetTypeLess = .false.
-          return
-        endif
+        if (g1%ID < g2%ID) then
+            adtBBoxTargetTypeLess = .true.
+            return
+        else if (g1%ID > g2%ID) then
+            adtBBoxTargetTypeLess = .false.
+            return
+        end if
 
         ! g1 and g2 are identical. Return .false.
 
         adtBBoxTargetTypeLess = .false.
 
-        end function adtBBoxTargetTypeLess
+    end function adtBBoxTargetTypeLess
 
-        !===============================================================
+    !===============================================================
 
-        subroutine adtTypeAssign(g1, g2)
+    subroutine adtTypeAssign(g1, g2)
 !
 !        This subroutine defines the generic assignment operator for
 !        the derived datatype adtType. The contents of g1 is copied
@@ -316,45 +315,45 @@ subroutine adtTypeAssign(g1, g2)
 !
 !       Subroutine arguments.
 !
-        type(adtType), intent(in)  :: g2
+        type(adtType), intent(in) :: g2
         type(adtType), intent(out) :: g1
 
-        g1%comm     = g2%comm
-        g1%nProcs   = g2%nProcs
-        g1%myID     = g2%myID
-        g1%adtType  = g2%adtType
-        g1%adtID    = g2%adtID
+        g1%comm = g2%comm
+        g1%nProcs = g2%nProcs
+        g1%myID = g2%myID
+        g1%adtType = g2%adtType
+        g1%adtID = g2%adtID
         g1%isActive = g2%isActive
 
-        g1%nNodes  = g2%nNodes
-        g1%nTria   = g2%nTria
-        g1%nQuads  = g2%nQuads
-        g1%nTetra  = g2%nTetra
-        g1%nPyra   = g2%nPyra
+        g1%nNodes = g2%nNodes
+        g1%nTria = g2%nTria
+        g1%nQuads = g2%nQuads
+        g1%nTetra = g2%nTetra
+        g1%nPyra = g2%nPyra
         g1%nPrisms = g2%nPrisms
-        g1%nHexa   = g2%nHexa
+        g1%nHexa = g2%nHexa
 
-        g1%coor       => g2%coor
-        g1%triaConn   => g2%triaConn
-        g1%quadsConn  => g2%quadsConn
-        g1%tetraConn  => g2%tetraConn
-        g1%pyraConn   => g2%pyraConn
+        g1%coor => g2%coor
+        g1%triaConn => g2%triaConn
+        g1%quadsConn => g2%quadsConn
+        g1%tetraConn => g2%tetraConn
+        g1%pyraConn => g2%pyraConn
         g1%prismsConn => g2%prismsConn
-        g1%hexaConn   => g2%hexaConn
+        g1%hexaConn => g2%hexaConn
 
-        g1%nRootLeaves        = g2%nRootLeaves
+        g1%nRootLeaves = g2%nRootLeaves
         g1%myEntryInRootProcs = g2%myEntryInRootProcs
-        g1%rootLeavesProcs   => g2%rootLeavesProcs
-        g1%rootBBoxes        => g2%rootBBoxes
+        g1%rootLeavesProcs => g2%rootLeavesProcs
+        g1%rootBBoxes => g2%rootBBoxes
 
-        g1%nBBoxes     =  g2%nBBoxes
+        g1%nBBoxes = g2%nBBoxes
         g1%elementType => g2%elementType
-        g1%elementID   => g2%elementID
-        g1%xBBox       => g2%xBBox
+        g1%elementID => g2%elementID
+        g1%xBBox => g2%xBBox
 
         g1%nLeaves = g2%nLeaves
         g1%ADTree => g2%ADTree
 
-        end subroutine adtTypeAssign
+    end subroutine adtTypeAssign
 
-      end module adtData
+end module adtData
diff --git a/src/modules/bleedFlows.f90 b/src/modules/bleedFlows.f90
index c96558790..bc40648ab 100644
--- a/src/modules/bleedFlows.f90
+++ b/src/modules/bleedFlows.f90
@@ -1,42 +1,42 @@
-       module bleedFlows
+module bleedFlows
 !
 !       Module which contains the derived data types as well as the
 !       corresponding arrays to store the information needed to model
 !       the bleed flows. Both inflow bleeds and outflow bleeds are
 !       possible.
 !
-       use constants, only : intType, realType
-       implicit none
-       save
+    use constants, only: intType, realType
+    implicit none
+    save
 !
 !       The definition of the derived data type for the flow bleeds.
 !
-       type bleedflowType
+    type bleedflowType
 
-         ! familyID:    Corresponding family ID.
-         ! massFlux:    Prescribed mass flux,
-         ! curMassFlux: Current mass flux. When converged this should
-         !              be equal to massFlux.
+        ! familyID:    Corresponding family ID.
+        ! massFlux:    Prescribed mass flux,
+        ! curMassFlux: Current mass flux. When converged this should
+        !              be equal to massFlux.
 
-         integer(kind=intType) :: familyID
-         real(kind=realType)   :: massFlux, curMassFlux
+        integer(kind=intType) :: familyID
+        real(kind=realType) :: massFlux, curMassFlux
 
-       end type bleedflowType
+    end type bleedflowType
 !
 !       Variables stored in this module.
 !
-       ! nInflowBleeds:    Number of inflow bleeds present.
-       ! nOutflowBleeds:   Number of outflow bleeds present.
-       ! inflowBleeds(:):  Array with the information for the inflow
-       !                   bleeds.
-       ! outflowBleeds(:): Array with the information for the outflow
-       !                   bleeds.
+    ! nInflowBleeds:    Number of inflow bleeds present.
+    ! nOutflowBleeds:   Number of outflow bleeds present.
+    ! inflowBleeds(:):  Array with the information for the inflow
+    !                   bleeds.
+    ! outflowBleeds(:): Array with the information for the outflow
+    !                   bleeds.
 
-       integer(kind=intType) :: nInflowBleeds
-       integer(kind=intType) :: nOutflowBleeds
-       integer(kind=intType) :: nOutflowSubsonic, nInflowSubsonic ! eran-massf
+    integer(kind=intType) :: nInflowBleeds
+    integer(kind=intType) :: nOutflowBleeds
+    integer(kind=intType) :: nOutflowSubsonic, nInflowSubsonic ! eran-massf
 
-       type(bleedflowType), dimension(:), allocatable :: inflowBleeds
-       type(bleedflowType), dimension(:), allocatable :: outflowBleeds
+    type(bleedflowType), dimension(:), allocatable :: inflowBleeds
+    type(bleedflowType), dimension(:), allocatable :: outflowBleeds
 
-       end module bleedFlows
+end module bleedFlows
diff --git a/src/modules/cgnsGrid.F90 b/src/modules/cgnsGrid.F90
index b8238f742..156c017eb 100644
--- a/src/modules/cgnsGrid.F90
+++ b/src/modules/cgnsGrid.F90
@@ -10,59 +10,59 @@ module cgnsGrid
 !       module also contains the name of the base and the physical
 !       dimensions of the problem.
 !
-       use constants, only : intType, realType, maxCGNSNameLen, one
+    use constants, only: intType, realType, maxCGNSNameLen, one
 #ifndef USE_TAPENADE
-       use su_cgns
+    use su_cgns
 #endif
-       implicit none
-       save
+    implicit none
+    save
 !
 !       The definition of the derived datatype to store the actual
 !       data of the boundary conditions.
 !
-       type cgnsBCDataArray
+    type cgnsBCDataArray
 
-         ! The units in which the data is specified.
+        ! The units in which the data is specified.
 
-         integer :: mass, len, time, temp, angle
+        integer :: mass, len, time, temp, angle
 
-         ! The name of the array.
+        ! The name of the array.
 
-         character(len=maxCGNSNameLen) :: arrayName
+        character(len=maxCGNSNameLen) :: arrayName
 
-         ! Number of dimensions for which the data is specified.
+        ! Number of dimensions for which the data is specified.
 
-         integer :: nDimensions
+        integer :: nDimensions
 
-         ! Number of data points of every dimensions. upper limit is
-         ! three, although for BC data the maximum is usually 2.
+        ! Number of data points of every dimensions. upper limit is
+        ! three, although for BC data the maximum is usually 2.
 
-         integer(kind=cgsize_t), dimension(3) :: dataDim
+        integer(kind=cgsize_t), dimension(3) :: dataDim
 
-         ! The actual data. Assumed is that only floating point data
-         ! is prescribed and not integer or character data. Note that
-         ! dataArr is a 1D array even if the data is multi-dimensional.
+        ! The actual data. Assumed is that only floating point data
+        ! is prescribed and not integer or character data. Note that
+        ! dataArr is a 1D array even if the data is multi-dimensional.
 
-         real(kind=realType), pointer, dimension(:) :: dataArr
+        real(kind=realType), pointer, dimension(:) :: dataArr
 
-       end type cgnsBCDataArray
+    end type cgnsBCDataArray
 
 #ifndef USE_TAPENADE
-       TYPE CGNSBCDATASETTYPE_D
-          TYPE(CGNSBCDATAARRAY_D), DIMENSION(:), POINTER :: dirichletarrays
-       END TYPE CGNSBCDATASETTYPE_D
+    TYPE CGNSBCDATASETTYPE_D
+        TYPE(CGNSBCDATAARRAY_D), DIMENSION(:), POINTER :: dirichletarrays
+    END TYPE CGNSBCDATASETTYPE_D
 
-       TYPE CGNSBCDATAARRAY_D
-          REAL(kind=realtype), DIMENSION(:), POINTER :: dataarr
-       END TYPE CGNSBCDATAARRAY_D
+    TYPE CGNSBCDATAARRAY_D
+        REAL(kind=realtype), DIMENSION(:), POINTER :: dataarr
+    END TYPE CGNSBCDATAARRAY_D
 
-       TYPE CGNSBCDATASETTYPE_B
-          TYPE(CGNSBCDATAARRAY_B), DIMENSION(:), POINTER :: dirichletarrays
-       END TYPE CGNSBCDATASETTYPE_B
+    TYPE CGNSBCDATASETTYPE_B
+        TYPE(CGNSBCDATAARRAY_B), DIMENSION(:), POINTER :: dirichletarrays
+    END TYPE CGNSBCDATASETTYPE_B
 
-       TYPE CGNSBCDATAARRAY_B
-          REAL(kind=realtype), DIMENSION(:), POINTER :: dataarr
-       END TYPE CGNSBCDATAARRAY_B
+    TYPE CGNSBCDATAARRAY_B
+        REAL(kind=realtype), DIMENSION(:), POINTER :: dataarr
+    END TYPE CGNSBCDATAARRAY_B
 
 #endif
 
@@ -70,480 +70,480 @@ module cgnsGrid
 !       The definition of the derived datatype to store the prescribed
 !       boundary data for a boundary subface.
 !
-       type cgnsBCDatasetType
+    type cgnsBCDatasetType
 
-         ! Name of the dataset.
+        ! Name of the dataset.
 
-         character(len=maxCGNSNameLen) :: datasetName
+        character(len=maxCGNSNameLen) :: datasetName
 
-         ! Boundary condition type.
+        ! Boundary condition type.
 
-         integer :: BCType
+        integer :: BCType
 
-         ! The number of Dirichlet arrays in the data set.
+        ! The number of Dirichlet arrays in the data set.
 
-         integer(kind=intType) :: nDirichletArrays
+        integer(kind=intType) :: nDirichletArrays
 
-         ! The number of Neumann arrays in the data set.
+        ! The number of Neumann arrays in the data set.
 
-         integer(kind=intType) :: nNeumannArrays
+        integer(kind=intType) :: nNeumannArrays
 
-         ! The Dirichlet arrays.
+        ! The Dirichlet arrays.
 
-         type(cgnsBCDataArray), pointer, dimension(:) :: dirichletArrays
+        type(cgnsBCDataArray), pointer, dimension(:) :: dirichletArrays
 
-         ! The Neumann arrays.
+        ! The Neumann arrays.
 
-         type(cgnsBCDataArray), pointer, dimension(:) :: neumannArrays
+        type(cgnsBCDataArray), pointer, dimension(:) :: neumannArrays
 
-       end type cgnsBCDatasetType
+    end type cgnsBCDatasetType
 !
 !       The definition of the derived data type to store cgns 1 to 1
 !       block to block, i.e. continuous grid lines across block
 !       boundaries, connectivities.
 !
-       type cgns1to1ConnType
+    type cgns1to1ConnType
 
-         ! Name of the interface.
+        ! Name of the interface.
 
-         character(len=maxCGNSNameLen) :: connectName
+        character(len=maxCGNSNameLen) :: connectName
 
-         ! Name of the zone/block interfacing with the current zone/block.
+        ! Name of the zone/block interfacing with the current zone/block.
 
-         character(len=maxCGNSNameLen) :: donorName
+        character(len=maxCGNSNameLen) :: donorName
 
-         ! Zone/block ID of the zone/block interfacing with the current
-         ! zone/block.
+        ! Zone/block ID of the zone/block interfacing with the current
+        ! zone/block.
 
-         integer(kind=intType) :: donorBlock
+        integer(kind=intType) :: donorBlock
 
-         ! Range of points of this subface.
+        ! Range of points of this subface.
 
-         integer(kind=intType) :: iBeg, jBeg, kBeg
-         integer(kind=intType) :: iEnd, jEnd, kEnd
+        integer(kind=intType) :: iBeg, jBeg, kBeg
+        integer(kind=intType) :: iEnd, jEnd, kEnd
 
-         ! Range of points for the donor block.
+        ! Range of points for the donor block.
 
-         integer(kind=intType) :: diBeg, djBeg, dkBeg
-         integer(kind=intType) :: diEnd, djEnd, dkEnd
+        integer(kind=intType) :: diBeg, djBeg, dkBeg
+        integer(kind=intType) :: diEnd, djEnd, dkEnd
 
-         ! Short hand notation defining the relative orientation of the
-         ! two zones.
+        ! Short hand notation defining the relative orientation of the
+        ! two zones.
 
-         integer(kind=intType) :: l1, l2, l3
+        integer(kind=intType) :: l1, l2, l3
 
-         ! Whether or not the subface is a periodic boundary.
+        ! Whether or not the subface is a periodic boundary.
 
-         logical :: periodic
+        logical :: periodic
 
-         ! The center of rotation for a periodic boundary.
+        ! The center of rotation for a periodic boundary.
 
-         real(kind=realType), dimension(3) :: rotationCenter
+        real(kind=realType), dimension(3) :: rotationCenter
 
-         ! The rotation angles for a periodic boundary.
+        ! The rotation angles for a periodic boundary.
 
-         real(kind=realType), dimension(3) :: rotationAngles
+        real(kind=realType), dimension(3) :: rotationAngles
 
-         ! The translation vector for a periodic boundary.
+        ! The translation vector for a periodic boundary.
 
-         real(kind=realType), dimension(3) :: translation
+        real(kind=realType), dimension(3) :: translation
 
-       end type cgns1to1ConnType
+    end type cgns1to1ConnType
 !
 !       The definition of the derived data type to store cgns
 !       non-matching abutting block to block connectivities.
 !
-       type cgnsNonMatchAbuttingConnType
+    type cgnsNonMatchAbuttingConnType
 
-         ! Number of donor blocks. It is possible that the subface
-         ! abuts multiple donor blocks.
+        ! Number of donor blocks. It is possible that the subface
+        ! abuts multiple donor blocks.
 
-         integer(kind=intType) :: nDonorBlocks
+        integer(kind=intType) :: nDonorBlocks
 
-         ! Names of the interfaces. Dimension [nDonorBlocks].
+        ! Names of the interfaces. Dimension [nDonorBlocks].
 
-         character(len=maxCGNSNameLen), pointer, dimension(:) :: &
-                                                            connectNames
+        character(len=maxCGNSNameLen), pointer, dimension(:) :: &
+            connectNames
 
-         ! Names of the zone/block interfacing with the current
-         ! zone/block. Dimension [nDonorBlocks].
+        ! Names of the zone/block interfacing with the current
+        ! zone/block. Dimension [nDonorBlocks].
 
-         character(len=maxCGNSNameLen), pointer, dimension(:) :: &
-                                                           donorNames
+        character(len=maxCGNSNameLen), pointer, dimension(:) :: &
+            donorNames
 
-         ! Zone/block IDs of the zones/blocks interfacing with the
-         ! current zone/block. Dimension [nDonorBlocks].
+        ! Zone/block IDs of the zones/blocks interfacing with the
+        ! current zone/block. Dimension [nDonorBlocks].
 
-         integer(kind=intType), pointer, dimension(:) :: donorBlocks
+        integer(kind=intType), pointer, dimension(:) :: donorBlocks
 
-         ! Range of points of this subface.
+        ! Range of points of this subface.
 
-         integer(kind=intType) :: iBeg, jBeg, kBeg
-         integer(kind=intType) :: iEnd, jEnd, kEnd
+        integer(kind=intType) :: iBeg, jBeg, kBeg
+        integer(kind=intType) :: iEnd, jEnd, kEnd
 
-         ! Block face IDs of the donor blocks, which abut this subface.
-         ! Dimension [nDonorBlocks].
+        ! Block face IDs of the donor blocks, which abut this subface.
+        ! Dimension [nDonorBlocks].
 
-         integer(kind=intType), pointer, dimension(:) :: donorFaceIDs
+        integer(kind=intType), pointer, dimension(:) :: donorFaceIDs
 
-         ! Whether or not the subface is a periodic boundary.
+        ! Whether or not the subface is a periodic boundary.
 
-         logical :: periodic
+        logical :: periodic
 
-         ! The center of rotation for a periodic boundary.
+        ! The center of rotation for a periodic boundary.
 
-         real(kind=realType), dimension(3) :: rotationCenter
+        real(kind=realType), dimension(3) :: rotationCenter
 
-         ! The rotation angles for a periodic boundary.
+        ! The rotation angles for a periodic boundary.
 
-         real(kind=realType), dimension(3) :: rotationAngles
+        real(kind=realType), dimension(3) :: rotationAngles
 
-         ! The translation vector for a periodic boundary.
+        ! The translation vector for a periodic boundary.
 
-         real(kind=realType), dimension(3) :: translation
+        real(kind=realType), dimension(3) :: translation
 
-       end type cgnsNonMatchAbuttingConnType
+    end type cgnsNonMatchAbuttingConnType
 !
 !       The definition of the derived data type to store cgns block
 !       boundary conditions.
 !
-       type cgnsBocoType
+    type cgnsBocoType
 
-         ! Name of the boundary condition.
+        ! Name of the boundary condition.
 
-         character(len=maxCGNSNameLen) :: bocoName
+        character(len=maxCGNSNameLen) :: bocoName
 
-         ! CGNS and internal boundary condition type.
+        ! CGNS and internal boundary condition type.
 
-         integer :: BCTypeCGNS
-         integer(kind=intType) :: BCType
+        integer :: BCTypeCGNS
+        integer(kind=intType) :: BCType
 
-         ! Name of the CGNS user defined data node if the CGNS
-         ! boundary condition is UserDefined.
+        ! Name of the CGNS user defined data node if the CGNS
+        ! boundary condition is UserDefined.
 
-         character(len=maxCGNSNameLen) :: userDefinedName
+        character(len=maxCGNSNameLen) :: userDefinedName
 
-         ! The way the boundary condition faces are specified; either
-         ! a point range or an individual set of points.
+        ! The way the boundary condition faces are specified; either
+        ! a point range or an individual set of points.
 
-         integer :: ptSetType
+        integer :: ptSetType
 
-         ! Number of points in the boundary condition set defining this
-         ! boundary region. For a point range this is 2.
+        ! Number of points in the boundary condition set defining this
+        ! boundary region. For a point range this is 2.
 
-         integer(kind=intType) :: nPnts
+        integer(kind=intType) :: nPnts
 
-         ! Index vector indicating the computational coordinate
-         ! direction of the boundary condition patch normal.
+        ! Index vector indicating the computational coordinate
+        ! direction of the boundary condition patch normal.
 
-         integer :: normalIndex(3)
+        integer :: normalIndex(3)
 
-         ! A flag indicating whether or not boundary normals are defined.
-         ! normalListFlag == 0: normals are not defined.
-         ! normalListFlag == 1: normals are defined.
+        ! A flag indicating whether or not boundary normals are defined.
+        ! normalListFlag == 0: normals are not defined.
+        ! normalListFlag == 1: normals are defined.
 
-         integer(kind=cgsize_t) :: normalListFlag
+        integer(kind=cgsize_t) :: normalListFlag
 
-         ! Data type used for the definition of the normals. Admissible
-         ! types are realSingle and realDouble.
+        ! Data type used for the definition of the normals. Admissible
+        ! types are realSingle and realDouble.
 
-         integer :: normalDataType
+        integer :: normalDataType
 
-         ! Corresponding family number. If the face does not belong to
-         ! a family this value is 0.
+        ! Corresponding family number. If the face does not belong to
+        ! a family this value is 0.
 
-         integer(kind=intType) :: familyID
+        integer(kind=intType) :: familyID
 
-         ! The number of the sliding mesh interface of which this
-         ! boco is part. 0 means that this family is not part of a
-         ! sliding mesh interface. This value can be positive and
-         ! negative in order to distinguish between the two sides of the
-         ! interface. The absolute value is the actual ID of the
-         ! interface.
+        ! The number of the sliding mesh interface of which this
+        ! boco is part. 0 means that this family is not part of a
+        ! sliding mesh interface. This value can be positive and
+        ! negative in order to distinguish between the two sides of the
+        ! interface. The absolute value is the actual ID of the
+        ! interface.
 
-         integer(kind=intType) :: slidingID
+        integer(kind=intType) :: slidingID
 
-         ! Number of boundary condition datasets for the current
-         ! boundary condition.
+        ! Number of boundary condition datasets for the current
+        ! boundary condition.
 
-         integer(kind=intType) :: nDataSet
+        integer(kind=intType) :: nDataSet
 
-         ! The actual boundary condition data sets.
+        ! The actual boundary condition data sets.
 
-         type(cgnsBCDatasetType), pointer, dimension(:) :: dataSet
+        type(cgnsBCDatasetType), pointer, dimension(:) :: dataSet
 
-         ! Whether or not I actually allocated the memory for data_set.
-         ! It is possible that data_set points to corresponding entry
-         ! of a family.
+        ! Whether or not I actually allocated the memory for data_set.
+        ! It is possible that data_set points to corresponding entry
+        ! of a family.
 
-         logical :: dataSetAllocated
+        logical :: dataSetAllocated
 
-         ! The rotation center and rotation rate of the boundary face.
-         ! It is possible that this differs from the rotation rate of
-         ! the corresponding block, e.g. for a casing in a
-         ! turbomachinery problem.
+        ! The rotation center and rotation rate of the boundary face.
+        ! It is possible that this differs from the rotation rate of
+        ! the corresponding block, e.g. for a casing in a
+        ! turbomachinery problem.
 
-         real(kind=realType), dimension(3) :: rotCenter, rotRate
+        real(kind=realType), dimension(3) :: rotCenter, rotRate
 
-         ! Range of points of this subface.
+        ! Range of points of this subface.
 
-         integer(kind=intType) :: iBeg, jBeg, kBeg
-         integer(kind=intType) :: iEnd, jEnd, kEnd
+        integer(kind=intType) :: iBeg, jBeg, kBeg
+        integer(kind=intType) :: iEnd, jEnd, kEnd
 
-         ! Whether or not this subface is an actual face. Some mesh
-         ! generators (such as ICEM CFD hexa) include edges and points
-         ! as boundary conditions. These should not be considered by
-         ! the flow solver. in those cases, actual_face is .false.
+        ! Whether or not this subface is an actual face. Some mesh
+        ! generators (such as ICEM CFD hexa) include edges and points
+        ! as boundary conditions. These should not be considered by
+        ! the flow solver. in those cases, actual_face is .false.
 
-         logical :: actualFace
+        logical :: actualFace
 
-         character(len=maxCGNSNameLen)  :: wallBCName
+        character(len=maxCGNSNameLen) :: wallBCName
 
-       end type cgnsBocoType
+    end type cgnsBocoType
 !
 !       The definition of the derived data type to store the data of a
 !       cgns block.
 !
-       type cgnsBlockInfoType
+    type cgnsBlockInfoType
 !
 !         Information read from the cgns file.
 !
-         ! The type of the zone. Should be structured. Note that this
-         ! is an integer and not integer(kind=intType).
+        ! The type of the zone. Should be structured. Note that this
+        ! is an integer and not integer(kind=intType).
 
-         integer :: zoneType
+        integer :: zoneType
 
-         ! Zone name for this block.
+        ! Zone name for this block.
 
-         character(len=maxCGNSNameLen) :: zoneName
+        character(len=maxCGNSNameLen) :: zoneName
 
-         ! The number or subblocks and the processor ID's on which they
-         ! are stored. Due to the possibility of splitting the block
-         ! during runtime, multiple processors could store a part of
-         ! the block.
+        ! The number or subblocks and the processor ID's on which they
+        ! are stored. Due to the possibility of splitting the block
+        ! during runtime, multiple processors could store a part of
+        ! the block.
 
-         integer                        :: nSubBlocks
-         integer, dimension(:), pointer :: procStored
+        integer :: nSubBlocks
+        integer, dimension(:), pointer :: procStored
 
-         ! The local block ID's of the subblocks.
+        ! The local block ID's of the subblocks.
 
-         integer, dimension(:), pointer :: localBlockID
+        integer, dimension(:), pointer :: localBlockID
 
-         ! The corresponding nodal ranges of the subblocks.
+        ! The corresponding nodal ranges of the subblocks.
 
-         integer, dimension(:), pointer :: iBegOr, jBegOr, kBegOr
-         integer, dimension(:), pointer :: iEndOr, jEndOr, kEndOr
+        integer, dimension(:), pointer :: iBegOr, jBegOr, kBegOr
+        integer, dimension(:), pointer :: iEndOr, jEndOr, kEndOr
 
-         ! The units in which the grid is specified.
+        ! The units in which the grid is specified.
 
-         integer :: mass, len, time, temp, angle
+        integer :: mass, len, time, temp, angle
 
-         ! Whether or not grid units are specified.
+        ! Whether or not grid units are specified.
 
-         logical :: gridUnitsSpecified
+        logical :: gridUnitsSpecified
 
-         ! The conversion factor to meters for this block.
+        ! The conversion factor to meters for this block.
 
-         real(kind=realType) :: LRef
+        real(kind=realType) :: LRef
 
-         ! Corresponding family number. If the block does not belong to
-         ! a family this value is 0.
+        ! Corresponding family number. If the block does not belong to
+        ! a family this value is 0.
 
-         integer(kind=intType) :: familyID
+        integer(kind=intType) :: familyID
 
-         ! Nodal block dimensions.
+        ! Nodal block dimensions.
 
-         integer(kind=intType) :: il, jl, kl
+        integer(kind=intType) :: il, jl, kl
 
-         ! Cell block dimensions.
+        ! Cell block dimensions.
 
-         integer(kind=intType) :: nx, ny, nz
+        integer(kind=intType) :: nx, ny, nz
 
-         ! Total number of 1 to 1 block to block connectivities, i.e.
-         ! continous grid lines, for this block. Also the number of
-         ! 1 to 1 connectivities stored in general connectivity nodes
-         ! is incorporated in n1to1.
+        ! Total number of 1 to 1 block to block connectivities, i.e.
+        ! continous grid lines, for this block. Also the number of
+        ! 1 to 1 connectivities stored in general connectivity nodes
+        ! is incorporated in n1to1.
 
-         integer(kind=intType) :: n1to1
+        integer(kind=intType) :: n1to1
 
-         ! Number of 1 to 1 block to block connectivities stored in
-         ! general connectivities.
+        ! Number of 1 to 1 block to block connectivities stored in
+        ! general connectivities.
 
-         integer(kind=intType) :: n1to1General
+        integer(kind=intType) :: n1to1General
 
-         ! Array of 1 to 1 block to block connectivities.
+        ! Array of 1 to 1 block to block connectivities.
 
-         type(cgns1to1ConnType), pointer, dimension(:) :: conn1to1
+        type(cgns1to1ConnType), pointer, dimension(:) :: conn1to1
 
-         ! Number of non-matching abutting block to block
-         ! connectivities.
+        ! Number of non-matching abutting block to block
+        ! connectivities.
 
-         integer(kind=intType) :: nNonMatchAbutting
+        integer(kind=intType) :: nNonMatchAbutting
 
-         ! Array of non-matching abutting block to block connectivities.
+        ! Array of non-matching abutting block to block connectivities.
 
-         type(cgnsNonMatchAbuttingConnType), pointer, dimension(:) :: &
-                                                   connNonMatchAbutting
+        type(cgnsNonMatchAbuttingConnType), pointer, dimension(:) :: &
+            connNonMatchAbutting
 
-         ! Number of boundary conditions for this block.
+        ! Number of boundary conditions for this block.
 
-         integer(kind=intType) :: nBocos
+        integer(kind=intType) :: nBocos
 
-         ! Array of boundary conditions.
+        ! Array of boundary conditions.
 
-         type(cgnsBocoType), pointer, dimension(:) :: bocoInfo
+        type(cgnsBocoType), pointer, dimension(:) :: bocoInfo
 
-         ! Whether or not a rotating frame is specified.
+        ! Whether or not a rotating frame is specified.
 
-         logical :: rotatingFrameSpecified
+        logical :: rotatingFrameSpecified
 
-         ! The corresponding rotation center and rotation rate.
+        ! The corresponding rotation center and rotation rate.
 
-         real(kind=realType), dimension(3) :: rotCenter, rotRate
+        real(kind=realType), dimension(3) :: rotCenter, rotRate
 
-         ! Whether or not a the BCs in this zone have families.
+        ! Whether or not a the BCs in this zone have families.
 
-         logical :: BCFamilies
+        logical :: BCFamilies
 
-         ! Cluster indentifier of multiblock "cluser" in overset mesh
-         integer(kind=intType) :: cluster
+        ! Cluster indentifier of multiblock "cluser" in overset mesh
+        integer(kind=intType) :: cluster
 
-         ! Overset Priority scaling. Not currently read from CGNS
-         ! file, but could be in the future. Property of CGNS block so
-         ! it's stored here. Set from python options
-         real(kind=realType) :: priority=one
+        ! Overset Priority scaling. Not currently read from CGNS
+        ! file, but could be in the future. Property of CGNS block so
+        ! it's stored here. Set from python options
+        real(kind=realType) :: priority = one
 
-         ! ViscousDir is whether or no there is a viscous direction in I/J/K
-         logical, dimension(3) :: viscousDir = [.False., .False., .False.]
+        ! ViscousDir is whether or no there is a viscous direction in I/J/K
+        logical, dimension(3) :: viscousDir = [.False., .False., .False.]
 
-       end type cgnsBlockInfoType
+    end type cgnsBlockInfoType
 !
 !       The definition of the derived data type to store the data of a
 !       cgns family.
 !
-       type cgnsFamilyType
+    type cgnsFamilyType
 
-         ! Name of the family.
+        ! Name of the family.
 
-         character(len=maxCGNSNameLen) :: familyName
+        character(len=maxCGNSNameLen) :: familyName
 
-         ! Type of the boundary condition and family BC name.
+        ! Type of the boundary condition and family BC name.
 
-         integer :: BCTypeCGNS
-         integer(kind=intType) :: BCType
+        integer :: BCTypeCGNS
+        integer(kind=intType) :: BCType
 
-         character(len=maxCGNSNameLen) :: bcName
+        character(len=maxCGNSNameLen) :: bcName
 
-         ! Name of the CGNS user defined data node if the CGNS
-         ! boundary condition is UserDefined.
+        ! Name of the CGNS user defined data node if the CGNS
+        ! boundary condition is UserDefined.
 
-         character(len=maxCGNSNameLen) :: userDefinedName
+        character(len=maxCGNSNameLen) :: userDefinedName
 
-         ! The number of the sliding mesh interface of which this
-         ! family is part. 0 means that this family is not part of a
-         ! sliding mesh interface. This value can be positive and
-         ! negative in order to distinguish between the two sides of the
-         ! interface. The absolute value is the actual ID of the
-         ! interface.
+        ! The number of the sliding mesh interface of which this
+        ! family is part. 0 means that this family is not part of a
+        ! sliding mesh interface. This value can be positive and
+        ! negative in order to distinguish between the two sides of the
+        ! interface. The absolute value is the actual ID of the
+        ! interface.
 
-         integer(kind=intType) :: slidingID
+        integer(kind=intType) :: slidingID
 
-         ! The number of the bleed flow region of which this family is
-         ! part. 0 means that this family does not belong to a bleed
-         ! flow region. There is no need to distinguish between an
-         ! inflow and an outflow bleed, because they have different
-         ! boundary conditions.
+        ! The number of the bleed flow region of which this family is
+        ! part. 0 means that this family does not belong to a bleed
+        ! flow region. There is no need to distinguish between an
+        ! inflow and an outflow bleed, because they have different
+        ! boundary conditions.
 
-         integer(kind=intType) :: bleedRegionID
+        integer(kind=intType) :: bleedRegionID
 
-         ! Whether or not the mass flow must be monitored for this family.
+        ! Whether or not the mass flow must be monitored for this family.
 
-         logical :: monitorMassFlow
+        logical :: monitorMassFlow
 
-         ! Number of boundary condition datasets for this family.
+        ! Number of boundary condition datasets for this family.
 
-         integer(kind=intType) :: nDataSet
+        integer(kind=intType) :: nDataSet
 
-         ! The actual boundary condition data sets.
+        ! The actual boundary condition data sets.
 
-         type(cgnsBCDatasetType), pointer, dimension(:) :: dataSet
+        type(cgnsBCDatasetType), pointer, dimension(:) :: dataSet
 
-         ! Whether or not a rotating frame is specified.
+        ! Whether or not a rotating frame is specified.
 
-         logical :: rotatingFrameSpecified
+        logical :: rotatingFrameSpecified
 
-         ! The corresponding rotation center and rotation rate.
+        ! The corresponding rotation center and rotation rate.
 
-         real(kind=realType), dimension(3) :: rotCenter, rotRate
+        real(kind=realType), dimension(3) :: rotCenter, rotRate
 
-       end type cgnsFamilyType
+    end type cgnsFamilyType
 !
 !       Definition of the variables stored in this module.
 !
-       ! Dimensions of the cell and of the physical dimensions.
-       ! Both should be 3 for this code. Note that these are integers
-       ! and not integers(kind=intType).
+    ! Dimensions of the cell and of the physical dimensions.
+    ! Both should be 3 for this code. Note that these are integers
+    ! and not integers(kind=intType).
 
-       integer :: cgnsCellDim, cgnsPhysDim
+    integer :: cgnsCellDim, cgnsPhysDim
 
-       ! Number of blocks (zones) in the cgns grid.
+    ! Number of blocks (zones) in the cgns grid.
 
-       integer(kind=intType) :: cgnsNDom
+    integer(kind=intType) :: cgnsNDom
 
-       ! Array of cgns blocks.
+    ! Array of cgns blocks.
 
-       type(cgnsBlockInfoType), allocatable, dimension(:) :: cgnsDoms
-       type(cgnsBlockInfoType), allocatable, dimension(:) :: cgnsDomsd
+    type(cgnsBlockInfoType), allocatable, dimension(:) :: cgnsDoms
+    type(cgnsBlockInfoType), allocatable, dimension(:) :: cgnsDomsd
 
-       ! Number of families in the cgns grid.
+    ! Number of families in the cgns grid.
 
-       integer(kind=intType) :: cgnsNFamilies
+    integer(kind=intType) :: cgnsNFamilies
 
-       ! Array of families.
+    ! Array of families.
 
-       type(cgnsFamilyType), allocatable, dimension(:) :: cgnsFamilies
+    type(cgnsFamilyType), allocatable, dimension(:) :: cgnsFamilies
 
-       ! Number of sliding mesh interfaces in the grid.
+    ! Number of sliding mesh interfaces in the grid.
 
-       integer(kind=intType) :: cgnsNSliding
+    integer(kind=intType) :: cgnsNSliding
 
-       ! The corresponding family ID's of the sliding interfaces.
+    ! The corresponding family ID's of the sliding interfaces.
 
-       integer(kind=intType), allocatable, dimension(:,:) :: famIDsSliding
+    integer(kind=intType), allocatable, dimension(:, :) :: famIDsSliding
 
-       ! Number of domain interfaces, i.e. interfaces with other CFD
-       ! codes, in the grid.
+    ! Number of domain interfaces, i.e. interfaces with other CFD
+    ! codes, in the grid.
 
-       integer(kind=intType) :: cgnsNDomainInterfaces
+    integer(kind=intType) :: cgnsNDomainInterfaces
 
-       ! The family and BC ID's of the domain interfaces.
+    ! The family and BC ID's of the domain interfaces.
 
-       integer(kind=intType), allocatable, dimension(:) :: &
-                                            famIDsDomainInterfaces
-       integer(kind=intType), allocatable, dimension(:,:) :: &
-                                            bcIDsDomainInterfaces
+    integer(kind=intType), allocatable, dimension(:) :: &
+        famIDsDomainInterfaces
+    integer(kind=intType), allocatable, dimension(:, :) :: &
+        bcIDsDomainInterfaces
 
-       ! Name of the cgns base.
+    ! Name of the cgns base.
 
-       character(len=maxCGNSNameLen) :: cgnsBaseName
+    character(len=maxCGNSNameLen) :: cgnsBaseName
 
-       ! massFlowFamilyInv(:,:):  Array to store the local contributions
-       !                          from the central part of the flux to
-       !                          the mass flow of a family and the
-       !                          sliding mesh interfaces. Dimension is
-       !                          (0:nn,nTimeIntervalsSpectral, where
-       !                          nn is the number of families for which
-       !                          the mass flow must be monitored plus
-       !                          2*cgnsNSliding (if the mass flow through
-       !                          the sliding interfaces must be monitored).
-       !                          The reason for 2*cgnsNSliding is that each
-       !                          side of a sliding interface is monitored.
-       !                          The first index starts at 0 to store
-       !                          all the faces that are not on a
-       !                          sliding interface.
-       ! massFlowFamilyDiss(:,:): Idem for the dissipative part.
+    ! massFlowFamilyInv(:,:):  Array to store the local contributions
+    !                          from the central part of the flux to
+    !                          the mass flow of a family and the
+    !                          sliding mesh interfaces. Dimension is
+    !                          (0:nn,nTimeIntervalsSpectral, where
+    !                          nn is the number of families for which
+    !                          the mass flow must be monitored plus
+    !                          2*cgnsNSliding (if the mass flow through
+    !                          the sliding interfaces must be monitored).
+    !                          The reason for 2*cgnsNSliding is that each
+    !                          side of a sliding interface is monitored.
+    !                          The first index starts at 0 to store
+    !                          all the faces that are not on a
+    !                          sliding interface.
+    ! massFlowFamilyDiss(:,:): Idem for the dissipative part.
 
-       real(kind=realType), allocatable, dimension(:,:) :: massFlowFamilyInv
-       real(kind=realType), allocatable, dimension(:,:) :: massFlowFamilyDiss
+    real(kind=realType), allocatable, dimension(:, :) :: massFlowFamilyInv
+    real(kind=realType), allocatable, dimension(:, :) :: massFlowFamilyDiss
 
 end module cgnsGrid
diff --git a/src/modules/commonFormats.F90 b/src/modules/commonFormats.F90
index 0a6bac1c9..ab6fd8bc0 100644
--- a/src/modules/commonFormats.F90
+++ b/src/modules/commonFormats.F90
@@ -8,31 +8,31 @@ module commonFormats
 !       subroutine variable: formats that are repeated within a subroutine
 !       string: one-off cases
 !
-       use constants, only : maxStringLen
-       implicit none
-       save
+    use constants, only: maxStringLen
+    implicit none
+    save
 
-       ! The * in these patterns means that it is applied as often as there are input arguments matching this pattern.
-       ! For example, (*(A, 1X)) means that this formatting pattern is applied N times if you have N strings that
-       ! you want to be separated by a space.
-       ! Similarly, (*(A, ES12.5)) expects N (string, float) pairs as an input.
+    ! The * in these patterns means that it is applied as often as there are input arguments matching this pattern.
+    ! For example, (*(A, 1X)) means that this formatting pattern is applied N times if you have N strings that
+    ! you want to be separated by a space.
+    ! Similarly, (*(A, ES12.5)) expects N (string, float) pairs as an input.
 
-       ! Strings
-       character(len=maxStringLen) :: strings = '(*(A))'
+    ! Strings
+    character(len=maxStringLen) :: strings = '(*(A))'
 
-       ! Strings followed by one space
-       character(len=maxStringLen) :: stringSpace = '(*(A, 1X))'
+    ! Strings followed by one space
+    character(len=maxStringLen) :: stringSpace = '(*(A, 1X))'
 
-       ! Strings followed by a number in scientific notation with 5 decimal places
-       character(len=maxStringLen) :: stringSci5 = '(*(A, ES12.5))'
+    ! Strings followed by a number in scientific notation with 5 decimal places
+    character(len=maxStringLen) :: stringSci5 = '(*(A, ES12.5))'
 
-       ! Strings followed by a one-digit integer
-       character(len=maxStringLen) :: stringInt1 = '(*(A, I1))'
+    ! Strings followed by a one-digit integer
+    character(len=maxStringLen) :: stringInt1 = '(*(A, I1))'
 
-       ! Numbers in scientific notation with 12 decimal places
-       character(len=maxStringLen) :: sci12 = '(*(ES20.12))'
+    ! Numbers in scientific notation with 12 decimal places
+    character(len=maxStringLen) :: sci12 = '(*(ES20.12))'
 
-       ! Integers written with 5 characters
-       character(len=maxStringLen) :: int5 = '(*(I5))'
+    ! Integers written with 5 characters
+    character(len=maxStringLen) :: int5 = '(*(I5))'
 
 end module commonFormats
diff --git a/src/modules/communication.F90 b/src/modules/communication.F90
index ea13728d6..ebb12afa8 100644
--- a/src/modules/communication.F90
+++ b/src/modules/communication.F90
@@ -1,4 +1,4 @@
-       module communication
+module communication
 !
 !       Contains the variable definition of the processor number,
 !       myID and the number of processors, nProc, which belong to the
@@ -8,9 +8,9 @@ module communication
 !       Furthermore this module contains the communication pattern for
 !       all the multigrid levels.
 !
-       use constants, only : intType, realType
-       implicit none
-       save
+    use constants, only: intType, realType
+    implicit none
+    save
 !
 !       The definition of the derived data type commListType, which
 !       stores the i,j and k indices as well as the block id of the
@@ -20,213 +20,213 @@ module communication
 !       side to keep message sizes to a minimum.
 !
 #ifndef USE_TAPENADE
-       type sendCommListType
-
-         ! block(..):     Local block id to which the cell/node belongs.
-         !                The dimension is equal to the number of entities
-         !                to be communicated with the particular processor.
-         ! indices(..,3): I, j and k indices of the data to be communicated.
-         !                For the first dimension, see block.
-         ! interp(..,..): Interpolants for indices that represent a cell
-         !                stencil (allocated only when needed, e.g. send
-         !                list for overset communication).
-         !                For the first dimension, see block.
-
-         integer(kind=intType), pointer, dimension(:)   :: block
-         integer(kind=intType), pointer, dimension(:,:) :: indices
-         real(kind=realType),   pointer, dimension(:,:) :: interp, interpd
-         real(kind=realType),   pointer, dimension(:,:) :: xCen
-       end type sendCommListType
-
-       type recvCommListType
-
-         ! block(..):     Local block id to which the cell/node belongs.
-         !                The dimension is equal to the number of entities
-         !                to be communicated with the particular processor.
-         ! indices(..,3): I, j and k indices of the data to be communicated.
-         !                For the first dimension, see block.
-
-         integer(kind=intType), pointer, dimension(:)   :: block
-         integer(kind=intType), pointer, dimension(:,:) :: indices
-
-       end type recvCommListType
+    type sendCommListType
+
+        ! block(..):     Local block id to which the cell/node belongs.
+        !                The dimension is equal to the number of entities
+        !                to be communicated with the particular processor.
+        ! indices(..,3): I, j and k indices of the data to be communicated.
+        !                For the first dimension, see block.
+        ! interp(..,..): Interpolants for indices that represent a cell
+        !                stencil (allocated only when needed, e.g. send
+        !                list for overset communication).
+        !                For the first dimension, see block.
+
+        integer(kind=intType), pointer, dimension(:) :: block
+        integer(kind=intType), pointer, dimension(:, :) :: indices
+        real(kind=realType), pointer, dimension(:, :) :: interp, interpd
+        real(kind=realType), pointer, dimension(:, :) :: xCen
+    end type sendCommListType
+
+    type recvCommListType
+
+        ! block(..):     Local block id to which the cell/node belongs.
+        !                The dimension is equal to the number of entities
+        !                to be communicated with the particular processor.
+        ! indices(..,3): I, j and k indices of the data to be communicated.
+        !                For the first dimension, see block.
+
+        integer(kind=intType), pointer, dimension(:) :: block
+        integer(kind=intType), pointer, dimension(:, :) :: indices
+
+    end type recvCommListType
 !
 !       The definition of the derived data type periodicDataType,
 !       which stores the rotation matrix, the rotation center and the
 !       translation vector of the periodic transformation, as well as
 !       the halos to which this transformation must be applied.
 !
-       type periodicDataType
+    type periodicDataType
 
-         ! rotMatrix(3,3): Rotation matrix.
-         ! rotCenter(3):   Coordinates of center of rotation.
-         ! translation(3):  Translation vector.
+        ! rotMatrix(3,3): Rotation matrix.
+        ! rotCenter(3):   Coordinates of center of rotation.
+        ! translation(3):  Translation vector.
 
-         real(kind=realType), dimension(3,3) :: rotMatrix
-         real(kind=realType), dimension(3)   :: rotCenter, translation
+        real(kind=realType), dimension(3, 3) :: rotMatrix
+        real(kind=realType), dimension(3) :: rotCenter, translation
 
-         ! nHalos:            # of halos to which this periodic
-         !                    transformation must be applied.
-         ! block(nHalos):     Local block id to which the halos belong.
-         ! indices(nHalos,3): I, j and k indices of the halos in
-         !                    this block.
+        ! nHalos:            # of halos to which this periodic
+        !                    transformation must be applied.
+        ! block(nHalos):     Local block id to which the halos belong.
+        ! indices(nHalos,3): I, j and k indices of the halos in
+        !                    this block.
 
-         integer(kind=intType) :: nHalos
+        integer(kind=intType) :: nHalos
 
-         integer(kind=intType), pointer, dimension(:)   :: block
-         integer(kind=intType), pointer, dimension(:,:) :: indices
+        integer(kind=intType), pointer, dimension(:) :: block
+        integer(kind=intType), pointer, dimension(:, :) :: indices
 
-       end type periodicDataType
+    end type periodicDataType
 !
 !       The definition of the derived data type commType, which
 !       stores the communication pattern for a certain halo type for a
 !       certain grid level.
 !
-       type commType
+    type commType
 
-         ! nProcSend:           # of procs, to whom this proc will send
-         ! nProcRecv:           # of procs, from whom this proc will
-         !                      receive.
-         ! sendProc(nProcSend): Send processor numbers.
-         ! recvProc(nProcRecv): Receive processor numbers.
+        ! nProcSend:           # of procs, to whom this proc will send
+        ! nProcRecv:           # of procs, from whom this proc will
+        !                      receive.
+        ! sendProc(nProcSend): Send processor numbers.
+        ! recvProc(nProcRecv): Receive processor numbers.
 
-         integer(kind=intType)  :: nProcSend, nProcRecv
-         integer(kind=intType), pointer, dimension(:) :: sendProc, recvProc
+        integer(kind=intType) :: nProcSend, nProcRecv
+        integer(kind=intType), pointer, dimension(:) :: sendProc, recvProc
 
-         ! nsend(nProcSend): # of entities to send to other processors.
-         ! nrecv(nProcRecv): # of entities to receive from other processors.
+        ! nsend(nProcSend): # of entities to send to other processors.
+        ! nrecv(nProcRecv): # of entities to receive from other processors.
 
-         integer(kind=intType), pointer, dimension(:) :: nsend, nrecv
+        integer(kind=intType), pointer, dimension(:) :: nsend, nrecv
 
-         ! nsendCum(0:NprocSend): cumulative version of nsend.
-         ! nrecvCum(0:NprocRecv): cumulative version of nrecv.
+        ! nsendCum(0:NprocSend): cumulative version of nsend.
+        ! nrecvCum(0:NprocRecv): cumulative version of nrecv.
 
-         integer(kind=intType), pointer, dimension(:) :: nsendCum
-         integer(kind=intType), pointer, dimension(:) :: nrecvCum
+        integer(kind=intType), pointer, dimension(:) :: nsendCum
+        integer(kind=intType), pointer, dimension(:) :: nrecvCum
 
-         ! indexSendProc(0:Nproc-1): index of the processors in sendProc.
-         !                           If nothing is to be sent to a
-         !                           processor this value is 0.
-         ! indexRecvProc(0:Nproc-1): index of the processors in recvProc.
-         !                           If nothing is to be received from a
-         !                           processor this value is 0.
+        ! indexSendProc(0:Nproc-1): index of the processors in sendProc.
+        !                           If nothing is to be sent to a
+        !                           processor this value is 0.
+        ! indexRecvProc(0:Nproc-1): index of the processors in recvProc.
+        !                           If nothing is to be received from a
+        !                           processor this value is 0.
 
-         integer(kind=intType), pointer, dimension(:) :: indexSendProc
-         integer(kind=intType), pointer, dimension(:) :: indexRecvProc
+        integer(kind=intType), pointer, dimension(:) :: indexSendProc
+        integer(kind=intType), pointer, dimension(:) :: indexRecvProc
 
-         ! sendList(nProcSend): Indices and block ids to send to these
-         !                      processors.
-         ! recvList(nProcRecv): Indices and block ids to receive from
-         !                      these processors.
+        ! sendList(nProcSend): Indices and block ids to send to these
+        !                      processors.
+        ! recvList(nProcRecv): Indices and block ids to receive from
+        !                      these processors.
 
-         type(sendCommListType), pointer, dimension(:) :: sendList
-         type(recvCommListType), pointer, dimension(:) :: recvList
+        type(sendCommListType), pointer, dimension(:) :: sendList
+        type(recvCommListType), pointer, dimension(:) :: recvList
 
-         ! nPeriodic:               # of periodic data arrays.
-         ! periodicData(nPeriodic): Periodic data and entities to which
-         !                          the transformation must be applied.
+        ! nPeriodic:               # of periodic data arrays.
+        ! periodicData(nPeriodic): Periodic data and entities to which
+        !                          the transformation must be applied.
 
-         integer(kind=intType) :: nPeriodic
-         type(periodicDataType), pointer, dimension(:) :: periodicData
+        integer(kind=intType) :: nPeriodic
+        type(periodicDataType), pointer, dimension(:) :: periodicData
 
-       end type commType
+    end type commType
 !
 !       The definition of the derived data type internalCommType,
 !       which stores the memory to memory copy on this processor for a
 !       certain halo type.
 !
-       type internalCommType
+    type internalCommType
 
-         ! ncopy: # of entities to copy internally.
+        ! ncopy: # of entities to copy internally.
 
-         ! donorBlock(ncopy):     Local block id of the donor cell.
-         ! donorIndices(ncopy,3): The indices of the donor cell.
-         ! donorInterp(ncopy,3):  Interpolants of the donor stencil.
-         !                        Only allocated when needed, e.g.
-         !                        overset communication).
+        ! donorBlock(ncopy):     Local block id of the donor cell.
+        ! donorIndices(ncopy,3): The indices of the donor cell.
+        ! donorInterp(ncopy,3):  Interpolants of the donor stencil.
+        !                        Only allocated when needed, e.g.
+        !                        overset communication).
 
-         ! haloBlock(ncopy):     Local block id of the halo cell.
-         ! haloIndices(ncopy,3): The indices of the halo cell.
+        ! haloBlock(ncopy):     Local block id of the halo cell.
+        ! haloIndices(ncopy,3): The indices of the halo cell.
 
-         integer(kind=intType) :: ncopy
+        integer(kind=intType) :: ncopy
 
-         integer(kind=intType), pointer, dimension(:)   :: donorBlock
-         integer(kind=intType), pointer, dimension(:,:) :: donorIndices
-         real(kind=realType),   pointer, dimension(:,:) :: donorInterp, donorInterpd
-         real(kind=realType),   pointer, dimension(:,:) :: xCen
+        integer(kind=intType), pointer, dimension(:) :: donorBlock
+        integer(kind=intType), pointer, dimension(:, :) :: donorIndices
+        real(kind=realType), pointer, dimension(:, :) :: donorInterp, donorInterpd
+        real(kind=realType), pointer, dimension(:, :) :: xCen
 
-         integer(kind=intType), pointer, dimension(:)   :: haloBlock
-         integer(kind=intType), pointer, dimension(:,:) :: haloIndices
+        integer(kind=intType), pointer, dimension(:) :: haloBlock
+        integer(kind=intType), pointer, dimension(:, :) :: haloIndices
 
-         ! nPeriodic:               # of periodic data arrays.
-         ! periodicData(nPeriodic): Periodic data and entities to which
-         !                          the transformation must be applied.
+        ! nPeriodic:               # of periodic data arrays.
+        ! periodicData(nPeriodic): Periodic data and entities to which
+        !                          the transformation must be applied.
 
-         integer(kind=intType) :: nPeriodic
-         type(periodicDataType), pointer, dimension(:) :: periodicData
+        integer(kind=intType) :: nPeriodic
+        type(periodicDataType), pointer, dimension(:) :: periodicData
 
-       end type internalCommType
+    end type internalCommType
 !
 !       Variables stored in this module.
 !
-       ! ADflow_comm_world: The communicator of this processor group.
-       ! myID:            My processor number in ADflow_comm_world.
-       ! nProc:           The number of processors in ADflow_comm_world.
-       integer :: ADflow_comm_world, ADflow_comm_self
-
-       integer :: myID, nProc
-
-       ! commPatternCell_1st(nLevel): The communication pattern for 1st
-       !                              level cell halo's on the multiple
-       !                              grids.
-       ! commPatternCell_2nd(nLevel): The communication pattern for 2nd
-       !                              level cell halo's (including the
-       !                              1st level) on the multiple grids.
-       ! commPatternNode_1st(nLevel): The communication pattern for 1st
-       !                              level node halo's on the multiple
-       !                              grids.
-
-       type(commType), allocatable, dimension(:)   :: commPatternCell_1st
-       type(commType), allocatable, dimension(:)   :: commPatternCell_2nd
-       type(commType), allocatable, dimension(:)   :: commPatternNode_1st
-       type(commType), allocatable, target, dimension(:, :) :: commPatternOverset
-
-       ! internalCell_1st(nLevel): Memory to memory copies for 1st level
-       !                           cell halo's on the multiple grids.
-       ! internalCell_2nd(nLevel): Memory to memory copies for 2nd level
-       !                           cell halo's on the multiple grids.
-       ! internalNode_1st(nLevel): Memory to memory copies for 1st level
-       !                           node halo's on the multiple grids.
-
-       type(internalCommType), allocatable, dimension(:)   :: internalCell_1st
-       type(internalCommType), allocatable, dimension(:)   :: internalCell_2nd
-       type(internalCommType), allocatable, dimension(:)   :: internalNode_1st
-       type(internalCommType), allocatable, target, dimension(:, :) :: internalOverset
-
-       ! sendBufferSize_1to1: Size of the send buffer needed to perform
-       !                      all 1 to 1 communication.
-       ! recvBufferSize_1to1: Idem for the receive buffer.
-       ! sendBufferSizeOver:  Size of the send buffer needed to perform
-       !                      all overset communication.
-       ! recvBufferSizeOver:  Idem for the receive buffer.
-       ! sendBufferSize:      Size of the send buffer to perform all
-       !                      possible communication.
-       ! recvBufferSize:      Idem for the receive buffer.
-
-       integer(kind=intType) :: sendBufferSize_1to1, sendBufferSize
-       integer(kind=intType) :: recvBufferSize_1to1, recvBufferSize
-       integer(kind=intType) :: sendBufferSizeOver, recvBufferSizeOver
-
-       ! sendBuffer:   Buffer used to store the info to be send during
-       !               a nonblocking communication.
-       ! recvBuffer:   Buffer used to store the info to be received
-       !               during a nonblocking communication.
-       ! sendRequests: Array of requests for the nonblocking sends.
-       ! recvRequests: Array of requests for the nonblocking receives.
-
-       real(kind=realType), allocatable, dimension(:) :: sendBuffer
-       real(kind=realType), allocatable, dimension(:) :: recvBuffer
-
-       integer, allocatable, dimension(:) :: sendRequests, recvRequests
+    ! ADflow_comm_world: The communicator of this processor group.
+    ! myID:            My processor number in ADflow_comm_world.
+    ! nProc:           The number of processors in ADflow_comm_world.
+    integer :: ADflow_comm_world, ADflow_comm_self
+
+    integer :: myID, nProc
+
+    ! commPatternCell_1st(nLevel): The communication pattern for 1st
+    !                              level cell halo's on the multiple
+    !                              grids.
+    ! commPatternCell_2nd(nLevel): The communication pattern for 2nd
+    !                              level cell halo's (including the
+    !                              1st level) on the multiple grids.
+    ! commPatternNode_1st(nLevel): The communication pattern for 1st
+    !                              level node halo's on the multiple
+    !                              grids.
+
+    type(commType), allocatable, dimension(:) :: commPatternCell_1st
+    type(commType), allocatable, dimension(:) :: commPatternCell_2nd
+    type(commType), allocatable, dimension(:) :: commPatternNode_1st
+    type(commType), allocatable, target, dimension(:, :) :: commPatternOverset
+
+    ! internalCell_1st(nLevel): Memory to memory copies for 1st level
+    !                           cell halo's on the multiple grids.
+    ! internalCell_2nd(nLevel): Memory to memory copies for 2nd level
+    !                           cell halo's on the multiple grids.
+    ! internalNode_1st(nLevel): Memory to memory copies for 1st level
+    !                           node halo's on the multiple grids.
+
+    type(internalCommType), allocatable, dimension(:) :: internalCell_1st
+    type(internalCommType), allocatable, dimension(:) :: internalCell_2nd
+    type(internalCommType), allocatable, dimension(:) :: internalNode_1st
+    type(internalCommType), allocatable, target, dimension(:, :) :: internalOverset
+
+    ! sendBufferSize_1to1: Size of the send buffer needed to perform
+    !                      all 1 to 1 communication.
+    ! recvBufferSize_1to1: Idem for the receive buffer.
+    ! sendBufferSizeOver:  Size of the send buffer needed to perform
+    !                      all overset communication.
+    ! recvBufferSizeOver:  Idem for the receive buffer.
+    ! sendBufferSize:      Size of the send buffer to perform all
+    !                      possible communication.
+    ! recvBufferSize:      Idem for the receive buffer.
+
+    integer(kind=intType) :: sendBufferSize_1to1, sendBufferSize
+    integer(kind=intType) :: recvBufferSize_1to1, recvBufferSize
+    integer(kind=intType) :: sendBufferSizeOver, recvBufferSizeOver
+
+    ! sendBuffer:   Buffer used to store the info to be send during
+    !               a nonblocking communication.
+    ! recvBuffer:   Buffer used to store the info to be received
+    !               during a nonblocking communication.
+    ! sendRequests: Array of requests for the nonblocking sends.
+    ! recvRequests: Array of requests for the nonblocking receives.
+
+    real(kind=realType), allocatable, dimension(:) :: sendBuffer
+    real(kind=realType), allocatable, dimension(:) :: recvBuffer
+
+    integer, allocatable, dimension(:) :: sendRequests, recvRequests
 #endif
-       end module communication
+end module communication
diff --git a/src/modules/coolingModelLevel0.f90 b/src/modules/coolingModelLevel0.f90
index 5a2aad64b..3f99a6c28 100644
--- a/src/modules/coolingModelLevel0.f90
+++ b/src/modules/coolingModelLevel0.f90
@@ -1,4 +1,4 @@
-       module coolingModelLevel0
+module coolingModelLevel0
 !
 !       Module which contains the derived data type as well as the
 !       corresponding array to store the information for the level 0
@@ -7,67 +7,67 @@ module coolingModelLevel0
 !       not be given to third parties. This implementation assumes
 !       the x-direction is the axial direction.
 !
-       use constants, only : intType, realType
-       implicit none
-       save
+    use constants, only: intType, realType
+    implicit none
+    save
 !
 !       The definition of the derived data type level0CoolingType.
 !
-       type level0CoolingType
+    type level0CoolingType
 
-         ! nSubfaces: Local number of subfaces which are part of this
-         !            cooling plane.
-         ! mDotRatio: Relative amount of cooling mass flow added.
-         ! dpLog:     Log of the total pressure loss coefficient.
-         ! dTLog:     Log of the total temperature loss coefficient.
-         ! area:      Reference area. Based on the entire wheel such that
-         !            different periodic angles are treated correctly.
+        ! nSubfaces: Local number of subfaces which are part of this
+        !            cooling plane.
+        ! mDotRatio: Relative amount of cooling mass flow added.
+        ! dpLog:     Log of the total pressure loss coefficient.
+        ! dTLog:     Log of the total temperature loss coefficient.
+        ! area:      Reference area. Based on the entire wheel such that
+        !            different periodic angles are treated correctly.
 
-         ! blockID(:):  The local block ID of the subfaces.
-         ! indexDir(:): The index direction of this subface, iMin, Imax,
-         !              jMin, jMax, kMin or kMin. These names are a bit
-         !              abused here, because it can also be an internal
-         !              plane.
-         ! indSol(:):   The cell index of the cell centered variables
-         !              whose residuals must be modified.
-         ! indNorm(:):  The face index for the normals.
-         ! indX1(:):    The face index of the 1st adjacent plane to the
-         !              cell center; normally identical to indNorm.
-         ! indX2(:):    The face index of the 2nd adjacent plane.
-         ! jcBeg(:):    Start index of the cell in j-direction of the
-         !              subface.
-         ! jcEnd(:):    Idem, but then for the end index.
-         ! icBeg(:):    Idem, but then starting index in i-direction.
-         ! icEnd(:):    Idem, but then for the end index.
+        ! blockID(:):  The local block ID of the subfaces.
+        ! indexDir(:): The index direction of this subface, iMin, Imax,
+        !              jMin, jMax, kMin or kMin. These names are a bit
+        !              abused here, because it can also be an internal
+        !              plane.
+        ! indSol(:):   The cell index of the cell centered variables
+        !              whose residuals must be modified.
+        ! indNorm(:):  The face index for the normals.
+        ! indX1(:):    The face index of the 1st adjacent plane to the
+        !              cell center; normally identical to indNorm.
+        ! indX2(:):    The face index of the 2nd adjacent plane.
+        ! jcBeg(:):    Start index of the cell in j-direction of the
+        !              subface.
+        ! jcEnd(:):    Idem, but then for the end index.
+        ! icBeg(:):    Idem, but then starting index in i-direction.
+        ! icEnd(:):    Idem, but then for the end index.
 
-         integer(kind=intType) :: nSubfaces
+        integer(kind=intType) :: nSubfaces
 
-         integer(kind=intType), dimension(:), pointer :: blockID
-         integer(kind=intType), dimension(:), pointer :: indexDir
-         integer(kind=intType), dimension(:), pointer :: indSol
-         integer(kind=intType), dimension(:), pointer :: indNorm
-         integer(kind=intType), dimension(:), pointer :: indX1
-         integer(kind=intType), dimension(:), pointer :: indX2
-         integer(kind=intType), dimension(:), pointer :: jcBeg, jcEnd
-         integer(kind=intType), dimension(:), pointer :: icBeg, icEnd
+        integer(kind=intType), dimension(:), pointer :: blockID
+        integer(kind=intType), dimension(:), pointer :: indexDir
+        integer(kind=intType), dimension(:), pointer :: indSol
+        integer(kind=intType), dimension(:), pointer :: indNorm
+        integer(kind=intType), dimension(:), pointer :: indX1
+        integer(kind=intType), dimension(:), pointer :: indX2
+        integer(kind=intType), dimension(:), pointer :: jcBeg, jcEnd
+        integer(kind=intType), dimension(:), pointer :: icBeg, icEnd
 
-         real(kind=realType) :: mDotRatio, dpLog, dTLog
-         real(kind=realType) :: area
+        real(kind=realType) :: mDotRatio, dpLog, dTLog
+        real(kind=realType) :: area
 
-       end type level0CoolingType
+    end type level0CoolingType
 !
 !       Variables stored in this module.
 !
-       ! nPlanesLevel0CoolingModel:  Number of injection planes used for
-       !                             the level 0 cooling model.
-       ! level0Cooling(:,nMGLevels): The array of the derived datatype
-       !                             to store the info to implement the
-       !                             cooling model. The first dimension
-       !                             is nPlanesLevel0CoolingModel.
+    ! nPlanesLevel0CoolingModel:  Number of injection planes used for
+    !                             the level 0 cooling model.
+    ! level0Cooling(:,nMGLevels): The array of the derived datatype
+    !                             to store the info to implement the
+    !                             cooling model. The first dimension
+    !                             is nPlanesLevel0CoolingModel.
 
-       integer(kind=intType) :: nPlanesLevel0CoolingModel
+    integer(kind=intType) :: nPlanesLevel0CoolingModel
 
-       type(level0CoolingType), dimension(:,:), allocatable :: &
-                                                           level0Cooling
+    type(level0CoolingType), dimension(:, :), allocatable :: &
+        level0Cooling
 
-       end module coolingModelLevel0
+end module coolingModelLevel0
diff --git a/src/modules/diffSizes.f90 b/src/modules/diffSizes.f90
index d849739b5..b765c08de 100644
--- a/src/modules/diffSizes.f90
+++ b/src/modules/diffSizes.f90
@@ -1,5 +1,5 @@
-  module diffSizes
-    use constants, only : intType, realType
+module diffSizes
+    use constants, only: intType, realType
     implicit none
     save
 
@@ -110,4 +110,4 @@ module diffSizes
     integer(kind=intType) :: ISIZE1OFDRFDTL, ISIZE2OFDRFDTL, ISIZE3OFDRFDTL
     integer(kind=intType) :: ISIZE1OFDRFDRFVISCSUBFACE_UTAU, ISIZE2OFDRFDRFVISCSUBFACE_UTAU
 
-  end module diffSizes
+end module diffSizes
diff --git a/src/modules/flowVarRefState.F90 b/src/modules/flowVarRefState.F90
index be08bef9f..8a5adc58b 100644
--- a/src/modules/flowVarRefState.F90
+++ b/src/modules/flowVarRefState.F90
@@ -1,96 +1,95 @@
-       module flowVarRefState
+module flowVarRefState
 !
 !       Module that contains information about the reference state as
 !       well as the nondimensional free stream state.
 !
-       use constants, only : intType, realType
-       implicit none
-       save
+    use constants, only: intType, realType
+    implicit none
+    save
 
-       ! nw:       Total number of independent variables including the
-       !           turbulent variables.
-       ! nwf:      Number of flow variables. For perfect gas computations
-       !           this is 5.
-       ! nwt:      Number of turbulent variables, nwt = nw - nwf
-       ! nt1, nt2: Initial and final indices for turbulence variables
+    ! nw:       Total number of independent variables including the
+    !           turbulent variables.
+    ! nwf:      Number of flow variables. For perfect gas computations
+    !           this is 5.
+    ! nwt:      Number of turbulent variables, nwt = nw - nwf
+    ! nt1, nt2: Initial and final indices for turbulence variables
 
-       integer(kind=intType) :: nw, nwf, nwt, nt1, nt2
+    integer(kind=intType) :: nw, nwf, nwt, nt1, nt2
 
-       ! pRef:     Reference pressure (in Pa) used to nondimensionalize
-       !           the flow equations.
-       ! rhoRef:   Reference density (in kg/m^3) used to
-       !           nondimensionalize the flow equations.
-       ! TRef:     Reference temperature (in K) used to nondimensionalize
-       !           the flow equations.
-       ! muRef:    Scale for the viscosity,
-       !           muRef = rhoRef*sqrt(pRef/rhoRef); there is also a
-       !           reference length in the nondimensionalization of mu,
-       !           but this is 1.0, because all the coordinates are
-       !           converted to meters.
-       ! timeRef:  time scale; needed for a correct
-       !           nondimensionalization of unsteady problems.
-       !           timeRef = sqrt(rhoRef/pRef); for the reference
-       !           length, see the comments for muRef.
-       ! uRef:     velocity scale;
-       !           uRef = sqrt(pRef/rhoRef);
-       ! hRef:     enthalpy scale;
-       !           hRef = pRef/rhoRef;
+    ! pRef:     Reference pressure (in Pa) used to nondimensionalize
+    !           the flow equations.
+    ! rhoRef:   Reference density (in kg/m^3) used to
+    !           nondimensionalize the flow equations.
+    ! TRef:     Reference temperature (in K) used to nondimensionalize
+    !           the flow equations.
+    ! muRef:    Scale for the viscosity,
+    !           muRef = rhoRef*sqrt(pRef/rhoRef); there is also a
+    !           reference length in the nondimensionalization of mu,
+    !           but this is 1.0, because all the coordinates are
+    !           converted to meters.
+    ! timeRef:  time scale; needed for a correct
+    !           nondimensionalization of unsteady problems.
+    !           timeRef = sqrt(rhoRef/pRef); for the reference
+    !           length, see the comments for muRef.
+    ! uRef:     velocity scale;
+    !           uRef = sqrt(pRef/rhoRef);
+    ! hRef:     enthalpy scale;
+    !           hRef = pRef/rhoRef;
 
-       real(kind=realType) :: pRef, rhoRef, TRef
-       real(kind=realType) :: muRef, timeRef, uRef, href
+    real(kind=realType) :: pRef, rhoRef, TRef
+    real(kind=realType) :: muRef, timeRef, uRef, href
 
+    ! LRef:          Conversion factor of the length unit of the
+    !                grid to meter. e.g. if the grid is in mm.,
+    !                LRef = 1.e-3.
+    ! LRefSpecified: Whether or not a conversion factor is specified
+    !                in the input file.
 
-       ! LRef:          Conversion factor of the length unit of the
-       !                grid to meter. e.g. if the grid is in mm.,
-       !                LRef = 1.e-3.
-       ! LRefSpecified: Whether or not a conversion factor is specified
-       !                in the input file.
+    real(kind=realType) :: LRef
+    logical :: LRefSpecified
 
-       real(kind=realType) :: LRef
-       logical ::             LRefSpecified
+    ! pInfDim:   Free stream pressure in Pa.
+    ! rhoInfDim: Free stream density in kg/m^3.
+    ! muDim:     Free stream molecular viscosity in kg/(m s)
 
-       ! pInfDim:   Free stream pressure in Pa.
-       ! rhoInfDim: Free stream density in kg/m^3.
-       ! muDim:     Free stream molecular viscosity in kg/(m s)
+    real(kind=realType) :: pInfDim, rhoInfDim, muDim, TinfDim, muInfDim
+    !AD derivative values
 
-       real(kind=realType) :: pInfDim, rhoInfDim, muDim, TinfDim, muInfDim
-       !AD derivative values
+    ! wInf(nw): Nondimensional free stream state vector.
+    !           Variables stored are rho, u, v, w and rhoE.
+    ! pInf:     Nondimensional free stream pressure.
+    ! pInfCorr: Nondimensional free stream pressure, corrected for
+    !           a possible presence of 2/3 rhok.
+    ! rhoInf:   Nondimensional free stream density.
+    ! uInf:     Nondimensional free stream velocity
+    ! muInf:    Nondimensional free stream viscosity.
+    ! RGas:     Nondimensional gas constant.
+    ! gammaInf: Free stream specific heat ratio.
 
-       ! wInf(nw): Nondimensional free stream state vector.
-       !           Variables stored are rho, u, v, w and rhoE.
-       ! pInf:     Nondimensional free stream pressure.
-       ! pInfCorr: Nondimensional free stream pressure, corrected for
-       !           a possible presence of 2/3 rhok.
-       ! rhoInf:   Nondimensional free stream density.
-       ! uInf:     Nondimensional free stream velocity
-       ! muInf:    Nondimensional free stream viscosity.
-       ! RGas:     Nondimensional gas constant.
-       ! gammaInf: Free stream specific heat ratio.
-
-       real(kind=realType) :: rhoInf, uInf, pInf, pInfCorr
-       real(kind=realType) :: RGas, muInf, gammaInf
+    real(kind=realType) :: rhoInf, uInf, pInf, pInfCorr
+    real(kind=realType) :: RGas, muInf, gammaInf
 #ifdef USE_TAPENADE
-       real(kind=realType), dimension(10) :: wInf
+    real(kind=realType), dimension(10) :: wInf
 #else
-       real(kind=realType), dimension(:), allocatable :: wInf
+    real(kind=realType), dimension(:), allocatable :: wInf
 #endif
 
 #ifndef USE_TAPENADE
-       REAL(kind=realtype) :: prefd, rhorefd, trefd, uRefd, Hrefd
-       REAL(kind=realtype) :: murefd, timerefd
-       REAL(kind=realtype) :: pinfdimd, rhoinfdimd, tinfdimd
-       real(kind=realtype) :: mudimd, muinfdimd
-       REAL(kind=realtype) :: pinfdimb, rhoinfdimb
-       REAL(kind=realtype) :: rhoinfd, uinfd, pinfd, pinfcorrd
-       REAL(kind=realtype) :: rgasd, muinfd, gammainfd
-       real(kind=realType), dimension(:), allocatable :: wInfd, wInfb
+    REAL(kind=realtype) :: prefd, rhorefd, trefd, uRefd, Hrefd
+    REAL(kind=realtype) :: murefd, timerefd
+    REAL(kind=realtype) :: pinfdimd, rhoinfdimd, tinfdimd
+    real(kind=realtype) :: mudimd, muinfdimd
+    REAL(kind=realtype) :: pinfdimb, rhoinfdimb
+    REAL(kind=realtype) :: rhoinfd, uinfd, pinfd, pinfcorrd
+    REAL(kind=realtype) :: rgasd, muinfd, gammainfd
+    real(kind=realType), dimension(:), allocatable :: wInfd, wInfb
 #endif
-       ! viscous:   whether or not this is a viscous computation.
-       ! kPresent:  whether or not a turbulent kinetic energy is present
-       !            in the turbulence model.
-       ! eddyModel: whether or not the turbulence model is an eddy
-       !            viscosity model.
+    ! viscous:   whether or not this is a viscous computation.
+    ! kPresent:  whether or not a turbulent kinetic energy is present
+    !            in the turbulence model.
+    ! eddyModel: whether or not the turbulence model is an eddy
+    !            viscosity model.
 
-       logical :: kPresent, eddyModel, viscous
+    logical :: kPresent, eddyModel, viscous
 
-       end module flowVarRefState
+end module flowVarRefState
diff --git a/src/modules/iteration.f90 b/src/modules/iteration.f90
index 917951d9b..defd3309a 100644
--- a/src/modules/iteration.f90
+++ b/src/modules/iteration.f90
@@ -1,130 +1,129 @@
-       module iteration
+module iteration
 !
 !       This module contains the iteration parameters mainly used in
 !       solver.
 !
-       use constants, only: intType, realType, alwaysRealType, maxIterTypelen
-       implicit none
-
-       ! groundLevel:  Current ground level of the computation. Needed
-       !               to determine what kind of action must be
-       !               undertaken. E.G. On the coarse grids no solution
-       !               will be written.
-       ! currentLevel: MG level at which the compution currently resides.
-       ! rkStage:      Current runge kutta stage. Needed to determine
-       !               whether or not the artificial dissipation terms
-       !               must be computed.
-
-       integer(kind=intType) :: groundLevel, currentLevel
-       integer(kind=intType) :: rkStage, Subit
-
-       ! nStepsCycling: Number of steps in the current cycling strategy
-       ! cycling:       The corresponding array defining the multigrid
-       !                cycling strategy.
-
-       integer(kind=intType) :: nStepsCycling
-       integer(kind=intType), dimension(:), allocatable :: cycling
-
-       ! iterTot: Total number of iterations on the current grid;
-       !          a restart is not included in this count.
-
-       integer(kind=intType) :: iterTot
-
-       ! rFil : coefficient to control the fraction of the dissipation
-       !        residual of the previous runge-kutta stage.
-
-       real(kind=realType) :: rFil,rfilb
-
-       ! t0Solver: Reference time for the solver.
-
-       real(kind=realType) :: t0Solver
-
-       ! converged:             Whether or not the solution has been
-       !                        converged.
-       ! exchangePressureEarly: Whether or not the pressure must be
-       !                        exchanged early, i.e. before the
-       !                        boundary conditions are applied.
-       !                        This must be done for a correct treatment
-       !                        of normal momentum boundary condition,
-       !                        but it requires an extra call to the
-       !                        halo routines.
-
-       logical :: converged
-       logical :: exchangePressureEarly
-
-       ! standAloneMode:   Whether or not an executable in stand alone
-       !                   mode is built.
-       ! changing_Grid:    Whether or not the grid changes in time.
-       !                   In stand alone mode this only happens when
-       !                   moving parts are present. In a
-       !                   multi-disciplinary environment more options
-       !                   are possible, i.e. deforming meshes.
-       ! deforming_Grid:   Whether or not the grid deforms; this can
-       !                   only happen for a multi-disciplinary,
-       !                   usually aero-elastic problem.
-       ! changingOverset:  Whether or not the overset connectivity needs
-       !                   to be updated at each time step, due to
-       !                   moving or deforming grids.
-
-       logical :: standAloneMode, changing_Grid, deforming_Grid
-       logical :: changingOverset
-
-       ! nOldSolAvail:     Number of available old solutions for
-       !                   the time integration.
-       ! nOldLevels:       Number of old levels needed in the time
-       !                   integration scheme.
-       ! coefTime(0:nOld): The coefficients in the time integrator
-       !                   for unsteady applications.
-
-       integer(kind=intType) :: nOldSolAvail, nOldLevels
-       real(kind=realType), dimension(:), allocatable :: coefTime
-
-       ! iterType: The type of iteration performed. Will be one of RK,
-       ! DADI, ANK or NK ( or None on the 0th evaluation)
-       character(len=maxIterTypelen) :: iterType
-
-       ! approxTotalIts : A rough approximation of the total number of
-       ! function evaluations. An RK or DADI multi grid iteration
-       ! counts as 1.  ANK steps count as 1 + number of KSP
-       ! iterations. NK steps count the total number of function
-       ! evalautions either for mat-vecs or during a line search. It
-       ! is this value that is checked again nCycles for doing too
-       ! much work.
-       integer(kind=intType) :: approxTotalIts
-
-       ! Variables for monitoring the current CFL and step depending
-       ! on the type of iteration
-       real(kind=realType) :: CFLMonitor=0.0
-       real(kind=alwaysRealType) :: stepMonitor=1.0
-       real(kind=alwaysRealType) :: linResMonitor=-1.0
-
-       ! Added by HDN
-       ! nALEMeshes:                Number of ALE levels for intermediate mesh
-       !                            between two steps
-       ! nALEsteps:                 Number of ALE steps at one time step
-       ! coefTimeALE(nALEsteps):    The weighting coefficients to average the fluxes
-       ! coefMeshALE(nALEmeshes,2): The coefficients to interpolate the mesh
-       integer(kind=intType)                            :: nALEMeshes, nALEsteps
-       real(kind=realType), dimension(:),   allocatable :: coefTimeALE
-       real(kind=realType), dimension(:,:), allocatable :: coefMeshALE
-
-       ! timeSpectralGridsNotWritten: Whether or not grid files have
-       !                              already been written in time
-       !                              spectral mode. In this way
-       !                              it is avoided that files are
-       !                              written multiple times.
-       ! oldSolWritten(nOldLevels-1): Logicals to indicate whether
-       !                              or not old solution levels
-       !                              have been written in
-       !                              unsteady mode.
-
-       logical :: timeSpectralGridsNotWritten
-
-       logical, dimension(:), allocatable :: oldSolWritten
-
-
-       ! Variables for monitoring the residuals
-       real(kind=realType) :: totalR0, totalRStart, totalRFinal, totalR
-       real(kind=realType) :: rhoRes0, rhoResStart, rhoResFinal, rhoRes
-       real(kind=realType) :: ordersConverged=16.0_realType
-       end module iteration
+    use constants, only: intType, realType, alwaysRealType, maxIterTypelen
+    implicit none
+
+    ! groundLevel:  Current ground level of the computation. Needed
+    !               to determine what kind of action must be
+    !               undertaken. E.G. On the coarse grids no solution
+    !               will be written.
+    ! currentLevel: MG level at which the compution currently resides.
+    ! rkStage:      Current runge kutta stage. Needed to determine
+    !               whether or not the artificial dissipation terms
+    !               must be computed.
+
+    integer(kind=intType) :: groundLevel, currentLevel
+    integer(kind=intType) :: rkStage, Subit
+
+    ! nStepsCycling: Number of steps in the current cycling strategy
+    ! cycling:       The corresponding array defining the multigrid
+    !                cycling strategy.
+
+    integer(kind=intType) :: nStepsCycling
+    integer(kind=intType), dimension(:), allocatable :: cycling
+
+    ! iterTot: Total number of iterations on the current grid;
+    !          a restart is not included in this count.
+
+    integer(kind=intType) :: iterTot
+
+    ! rFil : coefficient to control the fraction of the dissipation
+    !        residual of the previous runge-kutta stage.
+
+    real(kind=realType) :: rFil, rfilb
+
+    ! t0Solver: Reference time for the solver.
+
+    real(kind=realType) :: t0Solver
+
+    ! converged:             Whether or not the solution has been
+    !                        converged.
+    ! exchangePressureEarly: Whether or not the pressure must be
+    !                        exchanged early, i.e. before the
+    !                        boundary conditions are applied.
+    !                        This must be done for a correct treatment
+    !                        of normal momentum boundary condition,
+    !                        but it requires an extra call to the
+    !                        halo routines.
+
+    logical :: converged
+    logical :: exchangePressureEarly
+
+    ! standAloneMode:   Whether or not an executable in stand alone
+    !                   mode is built.
+    ! changing_Grid:    Whether or not the grid changes in time.
+    !                   In stand alone mode this only happens when
+    !                   moving parts are present. In a
+    !                   multi-disciplinary environment more options
+    !                   are possible, i.e. deforming meshes.
+    ! deforming_Grid:   Whether or not the grid deforms; this can
+    !                   only happen for a multi-disciplinary,
+    !                   usually aero-elastic problem.
+    ! changingOverset:  Whether or not the overset connectivity needs
+    !                   to be updated at each time step, due to
+    !                   moving or deforming grids.
+
+    logical :: standAloneMode, changing_Grid, deforming_Grid
+    logical :: changingOverset
+
+    ! nOldSolAvail:     Number of available old solutions for
+    !                   the time integration.
+    ! nOldLevels:       Number of old levels needed in the time
+    !                   integration scheme.
+    ! coefTime(0:nOld): The coefficients in the time integrator
+    !                   for unsteady applications.
+
+    integer(kind=intType) :: nOldSolAvail, nOldLevels
+    real(kind=realType), dimension(:), allocatable :: coefTime
+
+    ! iterType: The type of iteration performed. Will be one of RK,
+    ! DADI, ANK or NK ( or None on the 0th evaluation)
+    character(len=maxIterTypelen) :: iterType
+
+    ! approxTotalIts : A rough approximation of the total number of
+    ! function evaluations. An RK or DADI multi grid iteration
+    ! counts as 1.  ANK steps count as 1 + number of KSP
+    ! iterations. NK steps count the total number of function
+    ! evalautions either for mat-vecs or during a line search. It
+    ! is this value that is checked again nCycles for doing too
+    ! much work.
+    integer(kind=intType) :: approxTotalIts
+
+    ! Variables for monitoring the current CFL and step depending
+    ! on the type of iteration
+    real(kind=realType) :: CFLMonitor = 0.0
+    real(kind=alwaysRealType) :: stepMonitor = 1.0
+    real(kind=alwaysRealType) :: linResMonitor = -1.0
+
+    ! Added by HDN
+    ! nALEMeshes:                Number of ALE levels for intermediate mesh
+    !                            between two steps
+    ! nALEsteps:                 Number of ALE steps at one time step
+    ! coefTimeALE(nALEsteps):    The weighting coefficients to average the fluxes
+    ! coefMeshALE(nALEmeshes,2): The coefficients to interpolate the mesh
+    integer(kind=intType) :: nALEMeshes, nALEsteps
+    real(kind=realType), dimension(:), allocatable :: coefTimeALE
+    real(kind=realType), dimension(:, :), allocatable :: coefMeshALE
+
+    ! timeSpectralGridsNotWritten: Whether or not grid files have
+    !                              already been written in time
+    !                              spectral mode. In this way
+    !                              it is avoided that files are
+    !                              written multiple times.
+    ! oldSolWritten(nOldLevels-1): Logicals to indicate whether
+    !                              or not old solution levels
+    !                              have been written in
+    !                              unsteady mode.
+
+    logical :: timeSpectralGridsNotWritten
+
+    logical, dimension(:), allocatable :: oldSolWritten
+
+    ! Variables for monitoring the residuals
+    real(kind=realType) :: totalR0, totalRStart, totalRFinal, totalR
+    real(kind=realType) :: rhoRes0, rhoResStart, rhoResFinal, rhoRes
+    real(kind=realType) :: ordersConverged = 16.0_realType
+end module iteration
diff --git a/src/modules/kd_tree.f90 b/src/modules/kd_tree.f90
index ae36ffe52..1972c5147 100644
--- a/src/modules/kd_tree.f90
+++ b/src/modules/kd_tree.f90
@@ -6,695 +6,687 @@
 !
 
 module kdtree2_priority_queue_module
-  use precision
-  implicit none
-  !
-  ! maintain a priority queue (PQ) of data, pairs of 'priority/payload',
-  ! implemented with a binary heap.  This is the type, and the 'dis' field
-  ! is the priority.
-  !
-  type kdtree2_result
-     ! a pair of distances, indexes
-     real(kind=realType)    :: dis!=0.0
-     integer(kind=intType) :: idx!=-1   Initializers cause some bugs in compilers.
-  end type kdtree2_result
-  !
-  ! A heap-based priority queue lets one efficiently implement the following
-  ! operations, each in log(N) time, as opposed to linear time.
-  !
-  ! 1)  add a datum (push a datum onto the queue, increasing its length)
-  ! 2)  return the priority value of the maximum priority element
-  ! 3)  pop-off (and delete) the element with the maximum priority, decreasing
-  !     the size of the queue.
-  ! 4)  replace the datum with the maximum priority with a supplied datum
-  !     (of either higher or lower priority), maintaining the size of the
-  !     queue.
-  !
-  !
-  ! In the k-d tree case, the 'priority' is the square distance of a point in
-  ! the data set to a reference point.   The goal is to keep the smallest M
-  ! distances to a reference point.  The tree algorithm searches terminal
-  ! nodes to decide whether to add points under consideration.
-  !
-  ! A priority queue is useful here because it lets one quickly return the
-  ! largest distance currently existing in the list.  If a new candidate
-  ! distance is smaller than this, then the new candidate ought to replace
-  ! the old candidate.  In priority queue terms, this means removing the
-  ! highest priority element, and inserting the new one.
-  !
-  ! Algorithms based on Cormen, Leiserson, Rivest, _Introduction
-  ! to Algorithms_, 1990, with further optimization by the author.
-  !
-  ! Originally informed by a C implementation by Sriranga Veeraraghavan.
-  !
-  ! This module is not written in the most clear way, but is implemented such
-  ! for speed, as it its operations will be called many times during searches
-  ! of large numbers of neighbors.
-  !
-  type pq
-     !
-     ! The priority queue consists of elements
-     ! priority(1:heap_size), with associated payload(:).
-     !
-     ! There are heap_size active elements.
-     ! Assumes the allocation is always sufficient.  Will NOT increase it
-     ! to match.
-     integer(kind=intType) :: heap_size = 0
-     type(kdtree2_result), pointer :: elems(:)
-  end type pq
-
-  public :: kdtree2_result
-
-  public :: pq
-  public :: pq_create
-  public :: pq_delete, pq_insert
-  public :: pq_extract_max, pq_max, pq_replace_max, pq_maxpri
-  private
-
-contains
-
-
-  function pq_create(results_in) result(res)
-    implicit none
-    !
-    ! Create a priority queue from ALREADY allocated
-    ! array pointers for storage.  NOTE! It will NOT
-    ! add any alements to the heap, i.e. any existing
-    ! data in the input arrays will NOT be used and may
-    ! be overwritten.
-    !
-    ! usage:
-    !    real(kind=realType), pointer :: x(:)
-    !    integer(kind=intType), pointer :: k(:)
-    !    allocate(x(1000),k(1000))
-    !    pq => pq_create(x,k)
-    !
-    type(kdtree2_result), target:: results_in(:)
-    type(pq) :: res
-    !
-    !
-    integer(kind=intType) :: nalloc
-
-    nalloc = size(results_in,1)
-    if (nalloc .lt. 1) then
-       write (*,*) 'PQ_CREATE: error, input arrays must be allocated.'
-    end if
-    res%elems => results_in
-    res%heap_size = 0
-    return
-  end function pq_create
-
-  !
-  ! operations for getting parents and left + right children
-  ! of elements in a binary heap.
-  !
-
-  !
-  ! These are written inline for speed.
-  !
-  !  integer(kind=intType) function parent(i)
-  !    integer(kind=intType), intent(in) :: i
-  !    parent = (i/2)
-  !    return
-  !  end function parent
-
-  !  integer(kind=intType) function left(i)
-  !    integer(kind=intType), intent(in) ::i
-  !    left = (2*i)
-  !    return
-  !  end function left
-
-  !  integer(kind=intType) function right(i)
-  !    integer(kind=intType), intent(in) :: i
-  !    right = (2*i)+1
-  !    return
-  !  end function right
-
-  !  logical function compare_priority(p1,p2)
-  !    real(kind=realType), intent(in) :: p1, p2
-  !
-  !    compare_priority = (p1 .gt. p2)
-  !    return
-  !  end function compare_priority
-
-  subroutine heapify(a,i_in)
+    use precision
     implicit none
     !
-    ! take a heap rooted at 'i' and force it to be in the
-    ! heap canonical form.   This is performance critical
-    ! and has been tweaked a little to reflect this.
-    !
-    type(pq),pointer   :: a
-    integer(kind=intType), intent(in) :: i_in
-    !
-    integer(kind=intType) :: i, l, r, largest
-
-    real(kind=realType)    :: pri_i, pri_l, pri_r, pri_largest
-
-
-    type(kdtree2_result) :: temp
-
-    i = i_in
-
-    bigloop:  do
-       l = 2*i ! left(i)
-       r = l+1 ! right(i)
-       !
-       ! set 'largest' to the index of either i, l, r
-       ! depending on whose priority is largest.
-       !
-       ! note that l or r can be larger than the heap size
-       ! in which case they do not count.
-
-
-       ! does left child have higher priority?
-       if (l .gt. a%heap_size) then
-          ! we know that i is the largest as both l and r are invalid.
-          exit
-       else
-          pri_i = a%elems(i)%dis
-          pri_l = a%elems(l)%dis
-          if (pri_l .gt. pri_i) then
-             largest = l
-             pri_largest = pri_l
-          else
-             largest = i
-             pri_largest = pri_i
-          endif
-
-          !
-          ! between i and l we have a winner
-          ! now choose between that and r.
-          !
-          if (r .le. a%heap_size) then
-             pri_r = a%elems(r)%dis
-             if (pri_r .gt. pri_largest) then
-                largest = r
-             endif
-          endif
-       endif
-
-       if (largest .ne. i) then
-          ! swap data in nodes largest and i, then heapify
-
-          temp = a%elems(i)
-          a%elems(i) = a%elems(largest)
-          a%elems(largest) = temp
-          !
-          ! Canonical heapify() algorithm has tail-ecursive call:
-          !
-          !        call heapify(a,largest)
-          ! we will simulate with cycle
-          !
-          i = largest
-          cycle bigloop ! continue the loop
-       else
-          return   ! break from the loop
-       end if
-    enddo bigloop
-    return
-  end subroutine heapify
-
-  subroutine pq_max(a,e)
-    implicit none
-    !
-    ! return the priority and its payload of the maximum priority element
-    ! on the queue, which should be the first one, if it is
-    ! in heapified form.
-    !
-    type(pq),pointer :: a
-    type(kdtree2_result),intent(out)  :: e
-
-    if (a%heap_size .gt. 0) then
-       e = a%elems(1)
-    else
-       write (*,*) 'PQ_MAX: ERROR, heap_size < 1'
-       stop
-    endif
-    return
-  end subroutine pq_max
-
-  real(kind=realType) function pq_maxpri(a)
-    implicit none
-    type(pq), pointer :: a
-
-    if (a%heap_size .gt. 0) then
-       pq_maxpri = a%elems(1)%dis
-    else
-       write (*,*) 'PQ_MAX_PRI: ERROR, heapsize < 1'
-       stop
-    endif
-    return
-  end function pq_maxpri
-
-  subroutine pq_extract_max(a,e)
-    implicit none
-    !
-    ! return the priority and payload of maximum priority
-    ! element, and remove it from the queue.
-    ! (equivalent to 'pop()' on a stack)
-    !
-    type(pq),pointer :: a
-    type(kdtree2_result), intent(out) :: e
-
-    if (a%heap_size .ge. 1) then
-       !
-       ! return max as first element
-       !
-       e = a%elems(1)
-
-       !
-       ! move last element to first
-       !
-       a%elems(1) = a%elems(a%heap_size)
-       a%heap_size = a%heap_size-1
-       call heapify(a,1)
-       return
-    else
-       write (*,*) 'PQ_EXTRACT_MAX: error, attempted to pop non-positive PQ'
-       stop
-    end if
-
-  end subroutine pq_extract_max
-
-
-  real(kind=realType) function pq_insert(a,dis,idx)
-    implicit none
-    !
-    ! Insert a new element and return the new maximum priority,
-    ! which may or may not be the same as the old maximum priority.
-    !
-    type(pq),pointer  :: a
-    real(kind=realType), intent(in) :: dis
-    integer(kind=intType), intent(in) :: idx
-    !    type(kdtree2_result), intent(in) :: e
-    !
-    integer(kind=intType) :: i, isparent
-    real(kind=realType)    :: parentdis
-    !
+    ! maintain a priority queue (PQ) of data, pairs of 'priority/payload',
+    ! implemented with a binary heap.  This is the type, and the 'dis' field
+    ! is the priority.
+    !
+    type kdtree2_result
+        ! a pair of distances, indexes
+        real(kind=realType) :: dis!=0.0
+        integer(kind=intType) :: idx!=-1   Initializers cause some bugs in compilers.
+    end type kdtree2_result
+    !
+    ! A heap-based priority queue lets one efficiently implement the following
+    ! operations, each in log(N) time, as opposed to linear time.
+    !
+    ! 1)  add a datum (push a datum onto the queue, increasing its length)
+    ! 2)  return the priority value of the maximum priority element
+    ! 3)  pop-off (and delete) the element with the maximum priority, decreasing
+    !     the size of the queue.
+    ! 4)  replace the datum with the maximum priority with a supplied datum
+    !     (of either higher or lower priority), maintaining the size of the
+    !     queue.
+    !
+    !
+    ! In the k-d tree case, the 'priority' is the square distance of a point in
+    ! the data set to a reference point.   The goal is to keep the smallest M
+    ! distances to a reference point.  The tree algorithm searches terminal
+    ! nodes to decide whether to add points under consideration.
+    !
+    ! A priority queue is useful here because it lets one quickly return the
+    ! largest distance currently existing in the list.  If a new candidate
+    ! distance is smaller than this, then the new candidate ought to replace
+    ! the old candidate.  In priority queue terms, this means removing the
+    ! highest priority element, and inserting the new one.
+    !
+    ! Algorithms based on Cormen, Leiserson, Rivest, _Introduction
+    ! to Algorithms_, 1990, with further optimization by the author.
+    !
+    ! Originally informed by a C implementation by Sriranga Veeraraghavan.
+    !
+    ! This module is not written in the most clear way, but is implemented such
+    ! for speed, as it its operations will be called many times during searches
+    ! of large numbers of neighbors.
+    !
+    type pq
+        !
+        ! The priority queue consists of elements
+        ! priority(1:heap_size), with associated payload(:).
+        !
+        ! There are heap_size active elements.
+        ! Assumes the allocation is always sufficient.  Will NOT increase it
+        ! to match.
+        integer(kind=intType) :: heap_size = 0
+        type(kdtree2_result), pointer :: elems(:)
+    end type pq
+
+    public :: kdtree2_result
+
+    public :: pq
+    public :: pq_create
+    public :: pq_delete, pq_insert
+    public :: pq_extract_max, pq_max, pq_replace_max, pq_maxpri
+    private
 
-    !    if (a%heap_size .ge. a%max_elems) then
-    !       write (*,*) 'PQ_INSERT: error, attempt made to insert element on full PQ'
-    !       stop
-    !    else
-    a%heap_size = a%heap_size + 1
-    i = a%heap_size
-
-    do while (i .gt. 1)
-       isparent = int(i/2)
-       parentdis = a%elems(isparent)%dis
-       if (dis .gt. parentdis) then
-          ! move what was in i's parent into i.
-          a%elems(i)%dis = parentdis
-          a%elems(i)%idx = a%elems(isparent)%idx
-          i = isparent
-       else
-          exit
-       endif
-    end do
-
-    ! insert the element at the determined position
-    a%elems(i)%dis = dis
-    a%elems(i)%idx = idx
-
-    pq_insert = a%elems(1)%dis
-    return
-    !    end if
-
-  end function pq_insert
-
-  subroutine pq_adjust_heap(a,i)
-    implicit none
-    type(pq),pointer  :: a
-    integer(kind=intType), intent(in) :: i
-    !
-    ! nominally arguments (a,i), but specialize for a=1
-    !
-    ! This routine assumes that the trees with roots 2 and 3 are already heaps, i.e.
-    ! the children of '1' are heaps.  When the procedure is completed, the
-    ! tree rooted at 1 is a heap.
-    real(kind=realType) :: prichild
-    integer(kind=intType) :: parent, child, N
-
-    type(kdtree2_result) :: e
-
-    e = a%elems(i)
-
-    parent = i
-    child = 2*i
-    N = a%heap_size
-
-    do while (child .le. N)
-       if (child .lt. N) then
-          if (a%elems(child)%dis .lt. a%elems(child+1)%dis) then
-             child = child+1
-          endif
-       endif
-       prichild = a%elems(child)%dis
-       if (e%dis .ge. prichild) then
-          exit
-       else
-          ! move child into parent.
-          a%elems(parent) = a%elems(child)
-          parent = child
-          child = 2*parent
-       end if
-    end do
-    a%elems(parent) = e
-    return
-  end subroutine pq_adjust_heap
-
-
-  real(kind=realType) function pq_replace_max(a,dis,idx)
-    implicit none
-    !
-    ! Replace the extant maximum priority element
-    ! in the PQ with (dis,idx).  Return
-    ! the new maximum priority, which may be larger
-    ! or smaller than the old one.
-    !
-    type(pq),pointer         :: a
-    real(kind=realType), intent(in) :: dis
-    integer(kind=intType), intent(in) :: idx
-    !    type(kdtree2_result), intent(in) :: e
-    ! not tested as well!
-
-    integer(kind=intType) :: parent, child, N
-    real(kind=realType)    :: prichild, prichildp1
-
-    type(kdtree2_result) :: etmp
-
-    if (.true.) then
-       N=a%heap_size
-       if (N .ge. 1) then
-          parent =1
-          child=2
-
-          loop: do while (child .le. N)
-             prichild = a%elems(child)%dis
-
-             !
-             ! posibly child+1 has higher priority, and if
-             ! so, get it, and increment child.
-             !
-
-             if (child .lt. N) then
-                prichildp1 = a%elems(child+1)%dis
-                if (prichild .lt. prichildp1) then
-                   child = child+1
-                   prichild = prichildp1
-                endif
-             endif
-
-             if (dis .ge. prichild) then
-                exit loop
-                ! we have a proper place for our new element,
-                ! bigger than either children's priority.
-             else
+contains
+
+    function pq_create(results_in) result(res)
+        implicit none
+        !
+        ! Create a priority queue from ALREADY allocated
+        ! array pointers for storage.  NOTE! It will NOT
+        ! add any alements to the heap, i.e. any existing
+        ! data in the input arrays will NOT be used and may
+        ! be overwritten.
+        !
+        ! usage:
+        !    real(kind=realType), pointer :: x(:)
+        !    integer(kind=intType), pointer :: k(:)
+        !    allocate(x(1000),k(1000))
+        !    pq => pq_create(x,k)
+        !
+        type(kdtree2_result), target :: results_in(:)
+        type(pq) :: res
+        !
+        !
+        integer(kind=intType) :: nalloc
+
+        nalloc = size(results_in, 1)
+        if (nalloc .lt. 1) then
+            write (*, *) 'PQ_CREATE: error, input arrays must be allocated.'
+        end if
+        res%elems => results_in
+        res%heap_size = 0
+        return
+    end function pq_create
+
+    !
+    ! operations for getting parents and left + right children
+    ! of elements in a binary heap.
+    !
+
+    !
+    ! These are written inline for speed.
+    !
+    !  integer(kind=intType) function parent(i)
+    !    integer(kind=intType), intent(in) :: i
+    !    parent = (i/2)
+    !    return
+    !  end function parent
+
+    !  integer(kind=intType) function left(i)
+    !    integer(kind=intType), intent(in) ::i
+    !    left = (2*i)
+    !    return
+    !  end function left
+
+    !  integer(kind=intType) function right(i)
+    !    integer(kind=intType), intent(in) :: i
+    !    right = (2*i)+1
+    !    return
+    !  end function right
+
+    !  logical function compare_priority(p1,p2)
+    !    real(kind=realType), intent(in) :: p1, p2
+    !
+    !    compare_priority = (p1 .gt. p2)
+    !    return
+    !  end function compare_priority
+
+    subroutine heapify(a, i_in)
+        implicit none
+        !
+        ! take a heap rooted at 'i' and force it to be in the
+        ! heap canonical form.   This is performance critical
+        ! and has been tweaked a little to reflect this.
+        !
+        type(pq), pointer :: a
+        integer(kind=intType), intent(in) :: i_in
+        !
+        integer(kind=intType) :: i, l, r, largest
+
+        real(kind=realType) :: pri_i, pri_l, pri_r, pri_largest
+
+        type(kdtree2_result) :: temp
+
+        i = i_in
+
+        bigloop: do
+            l = 2 * i ! left(i)
+            r = l + 1 ! right(i)
+            !
+            ! set 'largest' to the index of either i, l, r
+            ! depending on whose priority is largest.
+            !
+            ! note that l or r can be larger than the heap size
+            ! in which case they do not count.
+
+            ! does left child have higher priority?
+            if (l .gt. a%heap_size) then
+                ! we know that i is the largest as both l and r are invalid.
+                exit
+            else
+                pri_i = a%elems(i)%dis
+                pri_l = a%elems(l)%dis
+                if (pri_l .gt. pri_i) then
+                    largest = l
+                    pri_largest = pri_l
+                else
+                    largest = i
+                    pri_largest = pri_i
+                end if
+
+                !
+                ! between i and l we have a winner
+                ! now choose between that and r.
+                !
+                if (r .le. a%heap_size) then
+                    pri_r = a%elems(r)%dis
+                    if (pri_r .gt. pri_largest) then
+                        largest = r
+                    end if
+                end if
+            end if
+
+            if (largest .ne. i) then
+                ! swap data in nodes largest and i, then heapify
+
+                temp = a%elems(i)
+                a%elems(i) = a%elems(largest)
+                a%elems(largest) = temp
+                !
+                ! Canonical heapify() algorithm has tail-ecursive call:
+                !
+                !        call heapify(a,largest)
+                ! we will simulate with cycle
+                !
+                i = largest
+                cycle bigloop ! continue the loop
+            else
+                return   ! break from the loop
+            end if
+        end do bigloop
+        return
+    end subroutine heapify
+
+    subroutine pq_max(a, e)
+        implicit none
+        !
+        ! return the priority and its payload of the maximum priority element
+        ! on the queue, which should be the first one, if it is
+        ! in heapified form.
+        !
+        type(pq), pointer :: a
+        type(kdtree2_result), intent(out) :: e
+
+        if (a%heap_size .gt. 0) then
+            e = a%elems(1)
+        else
+            write (*, *) 'PQ_MAX: ERROR, heap_size < 1'
+            stop
+        end if
+        return
+    end subroutine pq_max
+
+    real(kind=realType) function pq_maxpri(a)
+        implicit none
+        type(pq), pointer :: a
+
+        if (a%heap_size .gt. 0) then
+            pq_maxpri = a%elems(1)%dis
+        else
+            write (*, *) 'PQ_MAX_PRI: ERROR, heapsize < 1'
+            stop
+        end if
+        return
+    end function pq_maxpri
+
+    subroutine pq_extract_max(a, e)
+        implicit none
+        !
+        ! return the priority and payload of maximum priority
+        ! element, and remove it from the queue.
+        ! (equivalent to 'pop()' on a stack)
+        !
+        type(pq), pointer :: a
+        type(kdtree2_result), intent(out) :: e
+
+        if (a%heap_size .ge. 1) then
+            !
+            ! return max as first element
+            !
+            e = a%elems(1)
+
+            !
+            ! move last element to first
+            !
+            a%elems(1) = a%elems(a%heap_size)
+            a%heap_size = a%heap_size - 1
+            call heapify(a, 1)
+            return
+        else
+            write (*, *) 'PQ_EXTRACT_MAX: error, attempted to pop non-positive PQ'
+            stop
+        end if
+
+    end subroutine pq_extract_max
+
+    real(kind=realType) function pq_insert(a, dis, idx)
+        implicit none
+        !
+        ! Insert a new element and return the new maximum priority,
+        ! which may or may not be the same as the old maximum priority.
+        !
+        type(pq), pointer :: a
+        real(kind=realType), intent(in) :: dis
+        integer(kind=intType), intent(in) :: idx
+        !    type(kdtree2_result), intent(in) :: e
+        !
+        integer(kind=intType) :: i, isparent
+        real(kind=realType) :: parentdis
+        !
+
+        !    if (a%heap_size .ge. a%max_elems) then
+        !       write (*,*) 'PQ_INSERT: error, attempt made to insert element on full PQ'
+        !       stop
+        !    else
+        a%heap_size = a%heap_size + 1
+        i = a%heap_size
+
+        do while (i .gt. 1)
+            isparent = int(i / 2)
+            parentdis = a%elems(isparent)%dis
+            if (dis .gt. parentdis) then
+                ! move what was in i's parent into i.
+                a%elems(i)%dis = parentdis
+                a%elems(i)%idx = a%elems(isparent)%idx
+                i = isparent
+            else
+                exit
+            end if
+        end do
+
+        ! insert the element at the determined position
+        a%elems(i)%dis = dis
+        a%elems(i)%idx = idx
+
+        pq_insert = a%elems(1)%dis
+        return
+        !    end if
+
+    end function pq_insert
+
+    subroutine pq_adjust_heap(a, i)
+        implicit none
+        type(pq), pointer :: a
+        integer(kind=intType), intent(in) :: i
+        !
+        ! nominally arguments (a,i), but specialize for a=1
+        !
+        ! This routine assumes that the trees with roots 2 and 3 are already heaps, i.e.
+        ! the children of '1' are heaps.  When the procedure is completed, the
+        ! tree rooted at 1 is a heap.
+        real(kind=realType) :: prichild
+        integer(kind=intType) :: parent, child, N
+
+        type(kdtree2_result) :: e
+
+        e = a%elems(i)
+
+        parent = i
+        child = 2 * i
+        N = a%heap_size
+
+        do while (child .le. N)
+            if (child .lt. N) then
+                if (a%elems(child)%dis .lt. a%elems(child + 1)%dis) then
+                    child = child + 1
+                end if
+            end if
+            prichild = a%elems(child)%dis
+            if (e%dis .ge. prichild) then
+                exit
+            else
                 ! move child into parent.
                 a%elems(parent) = a%elems(child)
                 parent = child
-                child = 2*parent
-             end if
-          end do loop
-          a%elems(parent)%dis = dis
-          a%elems(parent)%idx = idx
-          pq_replace_max = a%elems(1)%dis
-       else
-          a%elems(1)%dis = dis
-          a%elems(1)%idx = idx
-          pq_replace_max = dis
-       endif
-    else
-       !
-       ! slower version using elementary pop and push operations.
-       !
-       call pq_extract_max(a,etmp)
-       etmp%dis = dis
-       etmp%idx = idx
-       pq_replace_max = pq_insert(a,dis,idx)
-    endif
-    return
-  end function pq_replace_max
-
-  subroutine pq_delete(a,i)
-    implicit none
-    !
-    ! delete item with index 'i'
-    !
-    type(pq),pointer :: a
-    integer(kind=intType)           :: i
-
-    if ((i .lt. 1) .or. (i .gt. a%heap_size)) then
-       write (*,*) 'PQ_DELETE: error, attempt to remove out of bounds element.'
-       stop
-    endif
-
-    ! swap the item to be deleted with the last element
-    ! and shorten heap by one.
-    a%elems(i) = a%elems(a%heap_size)
-    a%heap_size = a%heap_size - 1
-
-    call heapify(a,i)
-
-  end subroutine pq_delete
+                child = 2 * parent
+            end if
+        end do
+        a%elems(parent) = e
+        return
+    end subroutine pq_adjust_heap
+
+    real(kind=realType) function pq_replace_max(a, dis, idx)
+        implicit none
+        !
+        ! Replace the extant maximum priority element
+        ! in the PQ with (dis,idx).  Return
+        ! the new maximum priority, which may be larger
+        ! or smaller than the old one.
+        !
+        type(pq), pointer :: a
+        real(kind=realType), intent(in) :: dis
+        integer(kind=intType), intent(in) :: idx
+        !    type(kdtree2_result), intent(in) :: e
+        ! not tested as well!
+
+        integer(kind=intType) :: parent, child, N
+        real(kind=realType) :: prichild, prichildp1
+
+        type(kdtree2_result) :: etmp
+
+        if (.true.) then
+            N = a%heap_size
+            if (N .ge. 1) then
+                parent = 1
+                child = 2
+
+                loop: do while (child .le. N)
+                    prichild = a%elems(child)%dis
+
+                    !
+                    ! posibly child+1 has higher priority, and if
+                    ! so, get it, and increment child.
+                    !
+
+                    if (child .lt. N) then
+                        prichildp1 = a%elems(child + 1)%dis
+                        if (prichild .lt. prichildp1) then
+                            child = child + 1
+                            prichild = prichildp1
+                        end if
+                    end if
+
+                    if (dis .ge. prichild) then
+                        exit loop
+                        ! we have a proper place for our new element,
+                        ! bigger than either children's priority.
+                    else
+                        ! move child into parent.
+                        a%elems(parent) = a%elems(child)
+                        parent = child
+                        child = 2 * parent
+                    end if
+                end do loop
+                a%elems(parent)%dis = dis
+                a%elems(parent)%idx = idx
+                pq_replace_max = a%elems(1)%dis
+            else
+                a%elems(1)%dis = dis
+                a%elems(1)%idx = idx
+                pq_replace_max = dis
+            end if
+        else
+            !
+            ! slower version using elementary pop and push operations.
+            !
+            call pq_extract_max(a, etmp)
+            etmp%dis = dis
+            etmp%idx = idx
+            pq_replace_max = pq_insert(a, dis, idx)
+        end if
+        return
+    end function pq_replace_max
+
+    subroutine pq_delete(a, i)
+        implicit none
+        !
+        ! delete item with index 'i'
+        !
+        type(pq), pointer :: a
+        integer(kind=intType) :: i
+
+        if ((i .lt. 1) .or. (i .gt. a%heap_size)) then
+            write (*, *) 'PQ_DELETE: error, attempt to remove out of bounds element.'
+            stop
+        end if
+
+        ! swap the item to be deleted with the last element
+        ! and shorten heap by one.
+        a%elems(i) = a%elems(a%heap_size)
+        a%heap_size = a%heap_size - 1
+
+        call heapify(a, i)
+
+    end subroutine pq_delete
 
 end module kdtree2_priority_queue_module
 
 module kdtree2_module
-  use kdtree2_priority_queue_module
-  use precision
-  ! K-D tree routines in Fortran 90 by Matt Kennel.
-  ! Original program was written in Sather by Steve Omohundro and
-  ! Matt Kennel.  Only the Euclidean metric is supported.
-  !
-  !
-  ! This module is identical to 'kdtree', except that the order
-  ! of subscripts is reversed in the data file.
-  ! In otherwords for an embedding of N D-dimensional vectors, the
-  ! data file is here, in natural Fortran order  data(1:D, 1:N)
-  ! because Fortran lays out columns first,
-  !
-  ! whereas conventionally (C-style) it is data(1:N,1:D)
-  ! as in the original kdtree module.
-  !
-  !-------------DATA TYPE, CREATION, DELETION---------------------
-  public :: kdtree2, kdtree2_result, tree_node, kdtree2_create, kdtree2destroy
-  !---------------------------------------------------------------
-  !-------------------SEARCH ROUTINES-----------------------------
-  public :: kdtree2_n_nearest,kdtree2_n_nearest_around_point
-  ! Return fixed number of nearest neighbors around arbitrary vector,
-  ! or extant point in dataset, with decorrelation window.
-  !
-  public :: kdtree2_r_nearest, kdtree2_r_nearest_around_point
-  ! Return points within a fixed ball of arb vector/extant point
-  !
-  public :: kdtree2_sort_results
-  ! Sort, in order of increasing distance, rseults from above.
-  !
-  public :: kdtree2_r_count, kdtree2_r_count_around_point
-  ! Count points within a fixed ball of arb vector/extant point
-  !
-  public :: kdtree2_n_nearest_brute_force, kdtree2_r_nearest_brute_force
-  ! brute force of kdtree2_[n|r]_nearest
-  !----------------------------------------------------------------
-
-
-  integer(kind=intType), parameter :: bucket_size = 12
-  ! The maximum number of points to keep in a terminal node.
-
-  type interval
-     real(kind=realType) :: lower,upper
-  end type interval
-
-  type :: tree_node
-     ! an internal tree node
-     private
-     integer(kind=intType) :: cut_dim
-     ! the dimension to cut
-     real(kind=realType) :: cut_val
-     ! where to cut the dimension
-     real(kind=realType) :: cut_val_left, cut_val_right
-     ! improved cutoffs knowing the spread in child boxes.
-     integer(kind=intType) :: l, u
-     type (tree_node), pointer :: left, right
-     type(interval), pointer :: box(:) => null()
-     ! child pointers
-     ! Points included in this node are indexes[k] with k \in [l,u]
-
-
-  end type tree_node
-
-  type :: kdtree2
-     ! Global information about the tree, one per tree
-     integer(kind=intType) :: dimen=0, n=0
-     ! dimensionality and total # of points
-     real(kind=realType), pointer :: the_data(:,:) => null()
-     ! pointer to the actual data array
-     !
-     !  IMPORTANT NOTE:  IT IS DIMENSIONED   the_data(1:d,1:N)
-     !  which may be opposite of what may be conventional.
-     !  This is, because in Fortran, the memory layout is such that
-     !  the first dimension is in sequential order.  Hence, with
-     !  (1:d,1:N), all components of the vector will be in consecutive
-     !  memory locations.  The search time is dominated by the
-     !  evaluation of distances in the terminal nodes.  Putting all
-     !  vector components in consecutive memory location improves
-     !  memory cache locality, and hence search speed, and may enable
-     !  vectorization on some processors and compilers.
-
-     integer(kind=intType), pointer :: ind(:) => null()
-     ! permuted index into the data, so that indexes[l..u] of some
-     ! bucket represent the indexes of the actual points in that
-     ! bucket.
-     logical       :: sort = .false.
-     ! do we always sort output results?
-     logical       :: rearrange = .false.
-     real(kind=realType), pointer :: rearranged_data(:,:) => null()
-     ! if (rearrange .eqv. .true.) then rearranged_data has been
-     ! created so that rearranged_data(:,i) = the_data(:,ind(i)),
-     ! permitting search to use more cache-friendly rearranged_data, at
-     ! some initial computation and storage cost.
-     type (tree_node), pointer :: root => null()
-     ! root pointer of the tree
-  end type kdtree2
-
-
-  type :: tree_search_record
-     !
-     ! One of these is created for each search.
-     !
-     private
-     !
-     ! Many fields are copied from the tree structure, in order to
-     ! speed up the search.
-     !
-     integer(kind=intType)           :: dimen
-     integer(kind=intType)           :: nn, nfound
-     real(kind=realType)      :: ballsize
-     integer(kind=intType)           :: centeridx=999, correltime=9999
-     ! exclude points within 'correltime' of 'centeridx', iff centeridx >= 0
-     integer(kind=intType)           :: nalloc  ! how much allocated for results(:)?
-     logical           :: rearrange  ! are the data rearranged or original?
-     ! did the # of points found overflow the storage provided?
-     logical           :: overflow
-     real(kind=realType), pointer :: qv(:)  ! query vector
-     type(kdtree2_result), pointer :: results(:) ! results
-     type(pq) :: pq
-     real(kind=realType), pointer :: data(:,:)  ! temp pointer to data
-     integer(kind=intType), pointer      :: ind(:)     ! temp pointer to indexes
-  end type tree_search_record
-
-  private
-  ! everything else is private.
-
-  type(tree_search_record), save, target :: sr   ! A GLOBAL VARIABLE for search
+    use kdtree2_priority_queue_module
+    use precision
+    ! K-D tree routines in Fortran 90 by Matt Kennel.
+    ! Original program was written in Sather by Steve Omohundro and
+    ! Matt Kennel.  Only the Euclidean metric is supported.
+    !
+    !
+    ! This module is identical to 'kdtree', except that the order
+    ! of subscripts is reversed in the data file.
+    ! In otherwords for an embedding of N D-dimensional vectors, the
+    ! data file is here, in natural Fortran order  data(1:D, 1:N)
+    ! because Fortran lays out columns first,
+    !
+    ! whereas conventionally (C-style) it is data(1:N,1:D)
+    ! as in the original kdtree module.
+    !
+    !-------------DATA TYPE, CREATION, DELETION---------------------
+    public :: kdtree2, kdtree2_result, tree_node, kdtree2_create, kdtree2destroy
+    !---------------------------------------------------------------
+    !-------------------SEARCH ROUTINES-----------------------------
+    public :: kdtree2_n_nearest, kdtree2_n_nearest_around_point
+    ! Return fixed number of nearest neighbors around arbitrary vector,
+    ! or extant point in dataset, with decorrelation window.
+    !
+    public :: kdtree2_r_nearest, kdtree2_r_nearest_around_point
+    ! Return points within a fixed ball of arb vector/extant point
+    !
+    public :: kdtree2_sort_results
+    ! Sort, in order of increasing distance, rseults from above.
+    !
+    public :: kdtree2_r_count, kdtree2_r_count_around_point
+    ! Count points within a fixed ball of arb vector/extant point
+    !
+    public :: kdtree2_n_nearest_brute_force, kdtree2_r_nearest_brute_force
+    ! brute force of kdtree2_[n|r]_nearest
+    !----------------------------------------------------------------
+
+    integer(kind=intType), parameter :: bucket_size = 12
+    ! The maximum number of points to keep in a terminal node.
+
+    type interval
+        real(kind=realType) :: lower, upper
+    end type interval
+
+    type :: tree_node
+        ! an internal tree node
+        private
+        integer(kind=intType) :: cut_dim
+        ! the dimension to cut
+        real(kind=realType) :: cut_val
+        ! where to cut the dimension
+        real(kind=realType) :: cut_val_left, cut_val_right
+        ! improved cutoffs knowing the spread in child boxes.
+        integer(kind=intType) :: l, u
+        type(tree_node), pointer :: left, right
+        type(interval), pointer :: box(:) => null()
+        ! child pointers
+        ! Points included in this node are indexes[k] with k \in [l,u]
+
+    end type tree_node
+
+    type :: kdtree2
+        ! Global information about the tree, one per tree
+        integer(kind=intType) :: dimen = 0, n = 0
+        ! dimensionality and total # of points
+        real(kind=realType), pointer :: the_data(:, :) => null()
+        ! pointer to the actual data array
+        !
+        !  IMPORTANT NOTE:  IT IS DIMENSIONED   the_data(1:d,1:N)
+        !  which may be opposite of what may be conventional.
+        !  This is, because in Fortran, the memory layout is such that
+        !  the first dimension is in sequential order.  Hence, with
+        !  (1:d,1:N), all components of the vector will be in consecutive
+        !  memory locations.  The search time is dominated by the
+        !  evaluation of distances in the terminal nodes.  Putting all
+        !  vector components in consecutive memory location improves
+        !  memory cache locality, and hence search speed, and may enable
+        !  vectorization on some processors and compilers.
+
+        integer(kind=intType), pointer :: ind(:) => null()
+        ! permuted index into the data, so that indexes[l..u] of some
+        ! bucket represent the indexes of the actual points in that
+        ! bucket.
+        logical :: sort = .false.
+        ! do we always sort output results?
+        logical :: rearrange = .false.
+        real(kind=realType), pointer :: rearranged_data(:, :) => null()
+        ! if (rearrange .eqv. .true.) then rearranged_data has been
+        ! created so that rearranged_data(:,i) = the_data(:,ind(i)),
+        ! permitting search to use more cache-friendly rearranged_data, at
+        ! some initial computation and storage cost.
+        type(tree_node), pointer :: root => null()
+        ! root pointer of the tree
+    end type kdtree2
+
+    type :: tree_search_record
+        !
+        ! One of these is created for each search.
+        !
+        private
+        !
+        ! Many fields are copied from the tree structure, in order to
+        ! speed up the search.
+        !
+        integer(kind=intType) :: dimen
+        integer(kind=intType) :: nn, nfound
+        real(kind=realType) :: ballsize
+        integer(kind=intType) :: centeridx = 999, correltime = 9999
+        ! exclude points within 'correltime' of 'centeridx', iff centeridx >= 0
+        integer(kind=intType) :: nalloc  ! how much allocated for results(:)?
+        logical :: rearrange  ! are the data rearranged or original?
+        ! did the # of points found overflow the storage provided?
+        logical :: overflow
+        real(kind=realType), pointer :: qv(:)  ! query vector
+        type(kdtree2_result), pointer :: results(:) ! results
+        type(pq) :: pq
+        real(kind=realType), pointer :: data(:, :)  ! temp pointer to data
+        integer(kind=intType), pointer :: ind(:)     ! temp pointer to indexes
+    end type tree_search_record
+
+    private
+    ! everything else is private.
+
+    type(tree_search_record), save, target :: sr   ! A GLOBAL VARIABLE for search
 
 contains
 
-  function kdtree2_create(input_data,dim,sort,rearrange) result (mr)
-    implicit none
-    !
-    ! create the actual tree structure, given an input array of data.
-    !
-    ! Note, input data is input_data(1:d,1:N), NOT the other way around.
-    ! THIS IS THE REVERSE OF THE PREVIOUS VERSION OF THIS MODULE.
-    ! The reason for it is cache friendliness, improving performance.
-    !
-    ! Optional arguments:  If 'dim' is specified, then the tree
-    !                      will only search the first 'dim' components
-    !                      of input_data, otherwise, dim is inferred
-    !                      from SIZE(input_data,1).
-    !
-    !                      if sort .eqv. .true. then output results
-    !                      will be sorted by increasing distance.
-    !                      default=.false., as it is faster to not sort.
-    !
-    !                      if rearrange .eqv. .true. then an internal
-    !                      copy of the data, rearranged by terminal node,
-    !                      will be made for cache friendliness.
-    !                      default=.true., as it speeds searches, but
-    !                      building takes longer, and extra memory is used.
-    !
-    ! .. Function Return Cut_value ..
-    type (kdtree2), pointer :: mr
-    integer(kind=intType), intent(in), optional      :: dim
-    logical, intent(in), optional      :: sort
-    logical, intent(in), optional      :: rearrange
-    ! ..
-    ! .. Array Arguments ..
-    real(kind=realType), target :: input_data(:,:)
-    !
-    integer(kind=intType) :: i
-    ! ..
-    allocate (mr)
-    mr%the_data => input_data
-    ! pointer assignment
-
-    if (present(dim)) then
-       mr%dimen = dim
-    else
-       mr%dimen = size(input_data,1)
-    end if
-    mr%n = size(input_data,2)
-
-    call build_tree(mr)
-
-    if (present(sort)) then
-       mr%sort = sort
-    else
-       mr%sort = .false.
-    endif
-
-    if (present(rearrange)) then
-       mr%rearrange = rearrange
-    else
-       mr%rearrange = .true.
-    endif
-
-    if (mr%rearrange) then
-       allocate(mr%rearranged_data(mr%dimen,mr%n))
-       do i=1,mr%n
-          mr%rearranged_data(:,i) = mr%the_data(:, &
-               mr%ind(i))
-       enddo
-    else
-       nullify(mr%rearranged_data)
-    endif
-
-  end function kdtree2_create
-
-  subroutine build_tree(tp)
-    implicit none
-    type (kdtree2), pointer :: tp
-    ! ..
-    integer(kind=intType) :: j
-    type(tree_node), pointer :: dummy => null()
-    ! ..
-    allocate (tp%ind(tp%n))
-    forall (j=1:tp%n)
-       tp%ind(j) = j
-    end forall
-    tp%root => build_tree_for_range(tp,1,tp%n, dummy)
-  end subroutine build_tree
-
-  recursive function build_tree_for_range(tp,l,u,parent) result (res)
-    use constants
-    implicit none
-    ! .. Function Return Cut_value ..
-    type (tree_node), pointer :: res
-    ! ..
-    ! .. Structure Arguments ..
-    type (kdtree2), pointer :: tp
-    type (tree_node),pointer           :: parent
-    ! ..
-    ! .. Scalar Arguments ..
-    integer(kind=intType), intent (In) :: l, u
-    ! ..
-    ! .. Local Scalars ..
-    integer(kind=intType) :: i, c, m, dimen, idim
-    logical :: recompute
-    real(kind=realType)    :: average, left, right, coorspread(tp%dimen), tmp
+    function kdtree2_create(input_data, dim, sort, rearrange) result(mr)
+        implicit none
+        !
+        ! create the actual tree structure, given an input array of data.
+        !
+        ! Note, input data is input_data(1:d,1:N), NOT the other way around.
+        ! THIS IS THE REVERSE OF THE PREVIOUS VERSION OF THIS MODULE.
+        ! The reason for it is cache friendliness, improving performance.
+        !
+        ! Optional arguments:  If 'dim' is specified, then the tree
+        !                      will only search the first 'dim' components
+        !                      of input_data, otherwise, dim is inferred
+        !                      from SIZE(input_data,1).
+        !
+        !                      if sort .eqv. .true. then output results
+        !                      will be sorted by increasing distance.
+        !                      default=.false., as it is faster to not sort.
+        !
+        !                      if rearrange .eqv. .true. then an internal
+        !                      copy of the data, rearranged by terminal node,
+        !                      will be made for cache friendliness.
+        !                      default=.true., as it speeds searches, but
+        !                      building takes longer, and extra memory is used.
+        !
+        ! .. Function Return Cut_value ..
+        type(kdtree2), pointer :: mr
+        integer(kind=intType), intent(in), optional :: dim
+        logical, intent(in), optional :: sort
+        logical, intent(in), optional :: rearrange
+        ! ..
+        ! .. Array Arguments ..
+        real(kind=realType), target :: input_data(:, :)
+        !
+        integer(kind=intType) :: i
+        ! ..
+        allocate (mr)
+        mr%the_data => input_data
+        ! pointer assignment
+
+        if (present(dim)) then
+            mr%dimen = dim
+        else
+            mr%dimen = size(input_data, 1)
+        end if
+        mr%n = size(input_data, 2)
+
+        call build_tree(mr)
+
+        if (present(sort)) then
+            mr%sort = sort
+        else
+            mr%sort = .false.
+        end if
+
+        if (present(rearrange)) then
+            mr%rearrange = rearrange
+        else
+            mr%rearrange = .true.
+        end if
+
+        if (mr%rearrange) then
+            allocate (mr%rearranged_data(mr%dimen, mr%n))
+            do i = 1, mr%n
+                mr%rearranged_data(:, i) = mr%the_data(:, &
+                                                       mr%ind(i))
+            end do
+        else
+            nullify (mr%rearranged_data)
+        end if
+
+    end function kdtree2_create
+
+    subroutine build_tree(tp)
+        implicit none
+        type(kdtree2), pointer :: tp
+        ! ..
+        integer(kind=intType) :: j
+        type(tree_node), pointer :: dummy => null()
+        ! ..
+        allocate (tp%ind(tp%n))
+        forall (j=1:tp%n)
+            tp%ind(j) = j
+        end forall
+        tp%root => build_tree_for_range(tp, 1, tp%n, dummy)
+    end subroutine build_tree
+
+    recursive function build_tree_for_range(tp, l, u, parent) result(res)
+        use constants
+        implicit none
+        ! .. Function Return Cut_value ..
+        type(tree_node), pointer :: res
+        ! ..
+        ! .. Structure Arguments ..
+        type(kdtree2), pointer :: tp
+        type(tree_node), pointer :: parent
+        ! ..
+        ! .. Scalar Arguments ..
+        integer(kind=intType), intent(In) :: l, u
+        ! ..
+        ! .. Local Scalars ..
+        integer(kind=intType) :: i, c, m, dimen, idim
+        logical :: recompute
+        real(kind=realType) :: average, left, right, coorspread(tp%dimen), tmp
 
 !!$      If (.False.) Then
 !!$         If ((l .Lt. 1) .Or. (l .Gt. tp%n)) Then
@@ -708,1215 +700,1206 @@ recursive function build_tree_for_range(tp,l,u,parent) result (res)
 !!$         End If
 !!$      Endif
 !!$
-    ! first compute min and max
-    dimen = tp%dimen
-    allocate (res)
-    allocate(res%box(dimen))
-
-    ! First, compute an APPROXIMATE bounding box of all points associated with this node.
-    if ( u < l ) then
-       ! no points in this box
-       nullify(res)
-       return
-    end if
-
-    if ((u-l)<=bucket_size) then
-       !
-       ! always compute true bounding box for terminal nodes.
-       !
-       do i=1,dimen
-          call spread_in_coordinate(tp,i,l,u,res%box(i))
-       end do
-       res%cut_dim = 0
-       res%cut_val = 0.0
-       res%l = l
-       res%u = u
-       res%left =>null()
-       res%right => null()
-    else
-       !
-       ! modify approximate bounding box.  This will be an
-       ! overestimate of the true bounding box, as we are only recomputing
-       ! the bounding box for the dimension that the parent split on.
-       !
-       ! Going to a true bounding box computation would significantly
-       ! increase the time necessary to build the tree, and usually
-       ! has only a very small difference.  This box is not used
-       ! for searching but only for deciding which coordinate to split on.
-       !
-       do i=1,dimen
-          recompute=.true.
-          if (associated(parent)) then
-             if (i .ne. parent%cut_dim) then
-                recompute=.false.
-             end if
-          endif
-          if (recompute) then
-             call spread_in_coordinate(tp,i,l,u,res%box(i))
-          else
-             res%box(i) = parent%box(i)
-          endif
-       end do
-
-       !
-       ! c is the identity of which coordinate has the greatest spread.
-       !
-
-       coorspread = res%box(1:dimen)%upper-res%box(1:dimen)%lower
-
-       tmp = -one
-       do i=1, dimen
-          if (coorSpread(i) > tmp) then
-             tmp = coorSpread(i)
-             c = i
-          end if
-       end do
-
-       if (.True.) then
-          ! select exact median to have fully balanced tree.
-          m = (l+u)/2
-          call select_on_coordinate(tp%the_data,tp%ind,c,m,l,u)
-       else
-          !
-          ! select point halfway between min and max, as per A. Moore,
-          ! who says this helps in some degenerate cases, or
-          ! actual arithmetic average.
-          !
-          if (.true.) then
-             ! actually compute average
-             average = sum(tp%the_data(c,tp%ind(l:u))) / real(u-l+1,kind=realType)
-          else
-             average = (res%box(c)%upper + res%box(c)%lower)/2.0
-          endif
-
-          res%cut_val = average
-          m = select_on_coordinate_value(tp%the_data,tp%ind,c,average,l,u)
-       endif
-
-       ! moves indexes around
-       res%cut_dim = c
-       res%l = l
-       res%u = u
-       !         res%cut_val = tp%the_data(c,tp%ind(m))
-
-       res%left => build_tree_for_range(tp,l,m,res)
-       res%right => build_tree_for_range(tp,m+1,u,res)
-
-       if (associated(res%right) .eqv. .false.) then
-          res%box = res%left%box
-          res%cut_val_left = res%left%box(c)%upper
-          res%cut_val = res%cut_val_left
-       elseif (associated(res%left) .eqv. .false.) then
-          res%box = res%right%box
-          res%cut_val_right = res%right%box(c)%lower
-          res%cut_val = res%cut_val_right
-       else
-          res%cut_val_right = res%right%box(c)%lower
-          res%cut_val_left = res%left%box(c)%upper
-          res%cut_val = (res%cut_val_left + res%cut_val_right)/2
-
-
-          ! now remake the true bounding box for self.
-          ! Since we are taking unions (in effect) of a tree structure,
-          ! this is much faster than doing an exhaustive
-          ! search over all points
-          do idim=1, dimen
-             left = res%left%box(idim)%upper
-             right = res%right%box(idim)%upper
-             res%box(idim)%upper = max(left,right)
-
-             left = res%left%box(idim)%lower
-             right = res%right%box(idim)%lower
-
-             res%box(idim)%lower = min(left, right)
-          end do
-          ! res%box%upper = max(res%left%box%upper,res%right%box%upper)
-          ! res%box%lower = min(res%left%box%lower,res%right%box%lower)
-       endif
-    end if
-  end function build_tree_for_range
-
-  integer(kind=intType) function select_on_coordinate_value(v,ind,c,alpha,li,ui) &
-       result(res)
-    implicit none
-    ! Move elts of ind around between l and u, so that all points
-    ! <= than alpha (in c cooordinate) are first, and then
-    ! all points > alpha are second.
-
-    !
-    ! Algorithm (matt kennel).
-    !
-    ! Consider the list as having three parts: on the left,
-    ! the points known to be <= alpha.  On the right, the points
-    ! known to be > alpha, and in the middle, the currently unknown
-    ! points.   The algorithm is to scan the unknown points, starting
-    ! from the left, and swapping them so that they are added to
-    ! the left stack or the right stack, as appropriate.
-    !
-    ! The algorithm finishes when the unknown stack is empty.
-    !
-    ! .. Scalar Arguments ..
-    integer(kind=intType), intent (In) :: c, li, ui
-    real(kind=realType), intent(in) :: alpha
-    ! ..
-    real(kind=realType) :: v(1:,1:)
-    integer(kind=intType) :: ind(1:)
-    integer(kind=intType) :: tmp
-    ! ..
-    integer(kind=intType) :: lb, rb
-    !
-    ! The points known to be <= alpha are in
-    ! [l,lb-1]
-    !
-    ! The points known to be > alpha are in
-    ! [rb+1,u].
-    !
-    ! Therefore we add new points into lb or
-    ! rb as appropriate.  When lb=rb
-    ! we are done.  We return the location of the last point <= alpha.
-    !
-    !
-    lb = li; rb = ui
-
-    do while (lb < rb)
-       if ( v(c,ind(lb)) <= alpha ) then
-          ! it is good where it is.
-          lb = lb+1
-       else
-          ! swap it with rb.
-          tmp = ind(lb); ind(lb) = ind(rb); ind(rb) = tmp
-          rb = rb-1
-       endif
-    end do
-
-    ! now lb .eq. ub
-    if (v(c,ind(lb)) <= alpha) then
-       res = lb
-    else
-       res = lb-1
-    endif
-
-  end function select_on_coordinate_value
-
-  subroutine select_on_coordinate(v,ind,c,k,li,ui)
-    implicit none
-    ! Move elts of ind around between l and u, so that the kth
-    ! element
-    ! is >= those below, <= those above, in the coordinate c.
-    ! .. Scalar Arguments ..
-    integer(kind=intType), intent (In) :: c, k, li, ui
-    ! ..
-    integer(kind=intType) :: i, l, m, s, t, u
-    ! ..
-    real(kind=realType) :: v(:,:)
-    integer(kind=intType) :: ind(:)
-    ! ..
-    l = li
-    u = ui
-    do while (l<u)
-       t = ind(l)
-       m = l
-       do i = l + 1, u
-          if (v(c,ind(i))<v(c,t)) then
-             m = m + 1
-             s = ind(m)
-             ind(m) = ind(i)
-             ind(i) = s
-          end if
-       end do
-       s = ind(l)
-       ind(l) = ind(m)
-       ind(m) = s
-       if (m<=k) l = m + 1
-       if (m>=k) u = m - 1
-    end do
-  end subroutine select_on_coordinate
-
-  subroutine spread_in_coordinate(tp,c,l,u,interv)
-
-    implicit none
-
-    ! the spread in coordinate 'c', between l and u.
-    !
-    ! Return lower bound in 'smin', and upper in 'smax',
-    ! ..
-    ! .. Structure Arguments ..
-    type (kdtree2), pointer :: tp
-    type(interval), intent(out) :: interv
-    ! ..
-    ! .. Scalar Arguments ..
-    integer(kind=intType), intent (In) :: c, l, u
-    ! ..
-    ! .. Local Scalars ..
-    real(kind=realType) :: last, lmax, lmin, t, smin,smax
-    integer(kind=intType) :: i, ulocal
-    ! ..
-    ! .. Local Arrays ..
-    real(kind=realType), pointer :: v(:,:)
-    integer(kind=intType), pointer :: ind(:)
-
-    v => tp%the_data(1:,1:)
-    ind => tp%ind(1:)
-    smin = v(c,ind(l))
-    smax = smin
-
-    ulocal = u
-
-    do i = l + 2, ulocal, 2
-       lmin = v(c,ind(i-1))
-       lmax = v(c,ind(i))
-       if (lmin>lmax) then
-          t = lmin
-          lmin = lmax
-          lmax = t
-       end if
-       if (smin>lmin) smin = lmin
-       if (smax<lmax) smax = lmax
-    end do
-    if (i==ulocal+1) then
-       last = v(c,ind(ulocal))
-       if (smin>last) smin = last
-       if (smax<last) smax = last
-    end if
-
-    interv%lower = smin
-    interv%upper = smax
-
-  end subroutine spread_in_coordinate
-
-
-  subroutine kdtree2destroy(tp)
-    implicit none
-    ! Deallocates all memory for the tree, except input data matrix
-    ! .. Structure Arguments ..
-    type (kdtree2), pointer :: tp
-    ! ..
-    call destroy_node(tp%root)
-
-    deallocate (tp%ind)
-    nullify (tp%ind)
-
-    if (tp%rearrange) then
-       deallocate(tp%rearranged_data)
-       nullify(tp%rearranged_data)
-    endif
-
-    deallocate(tp)
-    return
-
-  contains
-    recursive subroutine destroy_node(np)
-      implicit none
-      ! .. Structure Arguments ..
-      type (tree_node), pointer :: np
-      ! ..
-      ! .. Intrinsic Functions ..
-      intrinsic ASSOCIATED
-      ! ..
-      if (associated(np%left)) then
-         call destroy_node(np%left)
-         nullify (np%left)
-      end if
-      if (associated(np%right)) then
-         call destroy_node(np%right)
-         nullify (np%right)
-      end if
-      if (associated(np%box)) deallocate(np%box)
-      deallocate(np)
-      return
-
-    end subroutine destroy_node
-
-  end subroutine kdtree2destroy
-
-  subroutine kdtree2_n_nearest(tp,qv,nn,results)
-    implicit none
-    ! Find the 'nn' vectors in the tree nearest to 'qv' in euclidean norm
-    ! returning their indexes and distances in 'indexes' and 'distances'
-    ! arrays already allocated passed to this subroutine.
-    type (kdtree2), pointer      :: tp
-    real(kind=realType), target, intent (In)    :: qv(:)
-    integer(kind=intType), intent (In)         :: nn
-    type(kdtree2_result), target :: results(:)
-
-
-    sr%ballsize = huge(1.0)
-    sr%qv => qv
-    sr%nn = nn
-    sr%nfound = 0
-    sr%centeridx = -1
-    sr%correltime = 0
-    sr%overflow = .false.
-
-    sr%results => results
-
-    sr%nalloc = nn   ! will be checked
-
-    sr%ind => tp%ind
-    sr%rearrange = tp%rearrange
-    if (tp%rearrange) then
-       sr%Data => tp%rearranged_data
-    else
-       sr%Data => tp%the_data
-    endif
-    sr%dimen = tp%dimen
-
-    call validate_query_storage(nn)
-    sr%pq = pq_create(results)
-
-    call search(tp%root)
-
-    if (tp%sort) then
-       call kdtree2_sort_results(nn, results)
-    endif
-    !    deallocate(sr%pqp)
-    return
-  end subroutine kdtree2_n_nearest
-
-  subroutine kdtree2_n_nearest_around_point(tp,idxin,correltime,nn,results)
-    implicit none
-    ! Find the 'nn' vectors in the tree nearest to point 'idxin',
-    ! with correlation window 'correltime', returing results in
-    ! results(:), which must be pre-allocated upon entry.
-    type (kdtree2), pointer        :: tp
-    integer(kind=intType), intent (In)           :: idxin, correltime, nn
-    type(kdtree2_result), target   :: results(:)
-
-    allocate (sr%qv(tp%dimen))
-    sr%qv = tp%the_data(:,idxin) ! copy the vector
-    sr%ballsize = huge(1.0)       ! the largest real(kind=realType) number
-    sr%centeridx = idxin
-    sr%correltime = correltime
-
-    sr%nn = nn
-    sr%nfound = 0
-
-    sr%dimen = tp%dimen
-    sr%nalloc = nn
-
-    sr%results => results
-
-    sr%ind => tp%ind
-    sr%rearrange = tp%rearrange
-
-    if (sr%rearrange) then
-       sr%Data => tp%rearranged_data
-    else
-       sr%Data => tp%the_data
-    endif
-
-    call validate_query_storage(nn)
-    sr%pq = pq_create(results)
-
-    call search(tp%root)
-
-    if (tp%sort) then
-       call kdtree2_sort_results(nn, results)
-    endif
-    deallocate (sr%qv)
-    return
-  end subroutine kdtree2_n_nearest_around_point
-
-  subroutine kdtree2_r_nearest(tp,qv,r2,nfound,nalloc,results)
-    implicit none
-    ! find the nearest neighbors to point 'idxin', within SQUARED
-    ! Euclidean distance 'r2'.   Upon ENTRY, nalloc must be the
-    ! size of memory allocated for results(1:nalloc).  Upon
-    ! EXIT, nfound is the number actually found within the ball.
-    !
-    !  Note that if nfound .gt. nalloc then more neighbors were found
-    !  than there were storage to store.  The resulting list is NOT
-    !  the smallest ball inside norm r^2
-    !
-    ! Results are NOT sorted unless tree was created with sort option.
-    type (kdtree2), pointer      :: tp
-    real(kind=realType), target, intent (In)    :: qv(:)
-    real(kind=realType), intent(in)             :: r2
-    integer(kind=intType), intent(out)         :: nfound
-    integer(kind=intType), intent (In)         :: nalloc
-    type(kdtree2_result), target :: results(:)
-
-    !
-    sr%qv => qv
-    sr%ballsize = r2
-    sr%nn = 0      ! flag for fixed ball search
-    sr%nfound = 0
-    sr%centeridx = -1
-    sr%correltime = 0
-
-    sr%results => results
-
-    call validate_query_storage(nalloc)
-    sr%nalloc = nalloc
-    sr%overflow = .false.
-    sr%ind => tp%ind
-    sr%rearrange= tp%rearrange
-
-    if (tp%rearrange) then
-       sr%Data => tp%rearranged_data
-    else
-       sr%Data => tp%the_data
-    endif
-    sr%dimen = tp%dimen
-
-    !
-    !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
-    !sr%il = -1               ! set to invalid indexes
-    !
-
-    call search(tp%root)
-    nfound = sr%nfound
-    if (sr%overflow) then
-       ! Sorting will cause an error if we have overflowed.
-       return
-    end if
-
-    if (tp%sort) then
-       call kdtree2_sort_results(nfound, results)
-    endif
-
-  end subroutine kdtree2_r_nearest
-
-  subroutine kdtree2_r_nearest_around_point(tp,idxin,correltime,r2,&
-       nfound,nalloc,results)
-    implicit none
-    !
-    ! Like kdtree2_r_nearest, but around a point 'idxin' already existing
-    ! in the data set.
-    !
-    ! Results are NOT sorted unless tree was created with sort option.
-    !
-    type (kdtree2), pointer      :: tp
-    integer(kind=intType), intent (In)         :: idxin, correltime, nalloc
-    real(kind=realType), intent(in)             :: r2
-    integer(kind=intType), intent(out)         :: nfound
-    type(kdtree2_result), target :: results(:)
-    ! ..
-    ! .. Intrinsic Functions ..
-    intrinsic HUGE
-    ! ..
-    allocate (sr%qv(tp%dimen))
-    sr%qv = tp%the_data(:,idxin) ! copy the vector
-    sr%ballsize = r2
-    sr%nn = 0    ! flag for fixed r search
-    sr%nfound = 0
-    sr%centeridx = idxin
-    sr%correltime = correltime
-
-    sr%results => results
-
-    sr%nalloc = nalloc
-    sr%overflow = .false.
-
-    call validate_query_storage(nalloc)
-
-    !    sr%dsl = HUGE(sr%dsl)    ! set to huge positive values
-    !    sr%il = -1               ! set to invalid indexes
-
-    sr%ind => tp%ind
-    sr%rearrange = tp%rearrange
-
-    if (tp%rearrange) then
-       sr%Data => tp%rearranged_data
-    else
-       sr%Data => tp%the_data
-    endif
-    sr%rearrange = tp%rearrange
-    sr%dimen = tp%dimen
-
-    !
-    !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
-    !sr%il = -1               ! set to invalid indexes
-    !
-
-    call search(tp%root)
-    nfound = sr%nfound
-    if (tp%sort) then
-       call kdtree2_sort_results(nfound,results)
-    endif
-
-    if (sr%overflow) then
-       write (*,*) 'KDTREE_TRANS: warning! return from kdtree2_r_nearest found more neighbors'
-       write (*,*) 'KDTREE_TRANS: than storage was provided for.  Answer is NOT smallest ball'
-       write (*,*) 'KDTREE_TRANS: with that number of neighbors!  I.e. it is wrong.'
-    endif
-
-    deallocate (sr%qv)
-    return
-  end subroutine kdtree2_r_nearest_around_point
-
-  function kdtree2_r_count(tp,qv,r2) result(nfound)
-    implicit none
-    ! Count the number of neighbors within square distance 'r2'.
-    type (kdtree2), pointer   :: tp
-    real(kind=realType), target, intent (In) :: qv(:)
-    real(kind=realType), intent(in)          :: r2
-    integer(kind=intType)                   :: nfound
-    ! ..
-    ! .. Intrinsic Functions ..
-    intrinsic HUGE
-    ! ..
-    sr%qv => qv
-    sr%ballsize = r2
-
-    sr%nn = 0       ! flag for fixed r search
-    sr%nfound = 0
-    sr%centeridx = -1
-    sr%correltime = 0
-
-    nullify(sr%results) ! for some reason, FTN 95 chokes on '=> null()'
-
-    sr%nalloc = 0            ! we do not allocate any storage but that's OK
-    ! for counting.
-    sr%ind => tp%ind
-    sr%rearrange = tp%rearrange
-    if (tp%rearrange) then
-       sr%Data => tp%rearranged_data
-    else
-       sr%Data => tp%the_data
-    endif
-    sr%dimen = tp%dimen
-
-    !
-    !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
-    !sr%il = -1               ! set to invalid indexes
-    !
-    sr%overflow = .false.
-
-    call search(tp%root)
-
-    nfound = sr%nfound
-
-    return
-  end function kdtree2_r_count
-
-  function kdtree2_r_count_around_point(tp,idxin,correltime,r2) &
-       result(nfound)
-    implicit none
-    ! Count the number of neighbors within square distance 'r2' around
-    ! point 'idxin' with decorrelation time 'correltime'.
-    !
-    type (kdtree2), pointer :: tp
-    integer(kind=intType), intent (In)    :: correltime, idxin
-    real(kind=realType), intent(in)        :: r2
-    integer(kind=intType)                 :: nfound
-    ! ..
-    ! ..
-    ! .. Intrinsic Functions ..
-    intrinsic HUGE
-    ! ..
-    allocate (sr%qv(tp%dimen))
-    sr%qv = tp%the_data(:,idxin)
-    sr%ballsize = r2
-
-    sr%nn = 0       ! flag for fixed r search
-    sr%nfound = 0
-    sr%centeridx = idxin
-    sr%correltime = correltime
-    nullify(sr%results)
-
-    sr%nalloc = 0            ! we do not allocate any storage but that's OK
-    ! for counting.
-
-    sr%ind => tp%ind
-    sr%rearrange = tp%rearrange
-
-    if (sr%rearrange) then
-       sr%Data => tp%rearranged_data
-    else
-       sr%Data => tp%the_data
-    endif
-    sr%dimen = tp%dimen
-
-    !
-    !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
-    !sr%il = -1               ! set to invalid indexes
-    !
-    sr%overflow = .false.
-
-    call search(tp%root)
-
-    nfound = sr%nfound
-
-    return
-  end function kdtree2_r_count_around_point
-
-
-  subroutine validate_query_storage(n)
-    implicit none
-    !
-    ! make sure we have enough storage for n
-    !
-    integer(kind=intType), intent(in) :: n
-
-    if (size(sr%results,1) .lt. n) then
-       write (*,*) 'KDTREE_TRANS:  you did not provide enough storage for results(1:n)'
-       stop
-       return
-    endif
-
-    return
-  end subroutine validate_query_storage
-
-  function square_distance(d, iv,qv) result (res)
-    implicit none
-    ! distance between iv[1:n] and qv[1:n]
-    ! .. Function Return Value ..
-    ! re-implemented to improve vectorization.
-    real(kind=realType) :: res
-    ! ..
-    ! ..
-    ! .. Scalar Arguments ..
-    integer(kind=intType) :: d
-    ! ..
-    ! .. Array Arguments ..
-    real(kind=realType) :: iv(:),qv(:)
-    ! ..
-    ! ..
-    res = sum( (iv(1:d)-qv(1:d))**2 )
-  end function square_distance
-
-  recursive subroutine search(node)
-    implicit none
-    !
-    ! This is the innermost core routine of the kd-tree search.  Along
-    ! with "process_terminal_node", it is the performance bottleneck.
-    !
-    ! This version uses a logically complete secondary search of
-    ! "box in bounds", whether the sear
-    !
-    type (Tree_node), pointer          :: node
-    ! ..
-    type(tree_node),pointer            :: ncloser, nfarther
-    !
-    integer(kind=intType)                            :: cut_dim, i
-    ! ..
-    real(kind=realType)                               :: qval, dis
-    real(kind=realType)                               :: ballsize
-    real(kind=realType), pointer           :: qv(:)
-    type(interval), pointer :: box(:)
-
-    if ((associated(node%left) .and. associated(node%right)) .eqv. .false.) then
-       ! we are on a terminal node
-       if (sr%nn .eq. 0) then
-          call process_terminal_node_fixedball(node)
-       else
-          call process_terminal_node(node)
-       endif
-    else
-       ! we are not on a terminal node
-       qv => sr%qv(1:)
-       cut_dim = node%cut_dim
-       qval = qv(cut_dim)
-
-       if (qval < node%cut_val) then
-          ncloser => node%left
-          nfarther => node%right
-          dis = (node%cut_val_right - qval)**2
-          !          extra = node%cut_val - qval
-       else
-          ncloser => node%right
-          nfarther => node%left
-          dis = (node%cut_val_left - qval)**2
-          !          extra = qval- node%cut_val_left
-       endif
-
-       if (associated(ncloser)) call search(ncloser)
-
-       ! we may need to search the second node.
-       if (associated(nfarther)) then
-          ballsize = sr%ballsize
-          !          dis=extra**2
-          if (dis <= ballsize) then
-             !
-             ! we do this separately as going on the first cut dimen is often
-             ! a good idea.
-             ! note that if extra**2 < sr%ballsize, then the next
-             ! check will also be false.
-             !
-             box => node%box(1:)
-             do i=1,sr%dimen
-                if (i .ne. cut_dim) then
-                   dis = dis + dis2_from_bnd(qv(i),box(i)%lower,box(i)%upper)
-                   if (dis > ballsize) then
-                      return
-                   endif
-                endif
-             end do
-
-             !
-             ! if we are still here then we need to search mroe.
-             !
-             call search(nfarther)
-          endif
-       endif
-    end if
-  end subroutine search
-
-
-  real(kind=realType) function dis2_from_bnd(x,amin,amax) result (res)
-    implicit none
-    real(kind=realType), intent(in) :: x, amin,amax
-
-    if (x > amax) then
-       res = (x-amax)**2;
-       return
-    else
-       if (x < amin) then
-          res = (amin-x)**2;
-          return
-       else
-          res = 0.0
-          return
-       endif
-    endif
-    return
-  end function dis2_from_bnd
-
-  logical function box_in_search_range(node, sr) result(res)
-    implicit none
-    !
-    ! Return the distance from 'qv' to the CLOSEST corner of node's
-    ! bounding box
-    ! for all coordinates outside the box.   Coordinates inside the box
-    ! contribute nothing to the distance.
-    !
-    type (tree_node), pointer :: node
-    type (tree_search_record), pointer :: sr
-
-    integer(kind=intType) :: dimen, i
-    real(kind=realType)    :: dis, ballsize
-    real(kind=realType)    :: l, u
-
-    dimen = sr%dimen
-    ballsize = sr%ballsize
-    dis = 0.0
-    res = .true.
-    do i=1,dimen
-       l = node%box(i)%lower
-       u = node%box(i)%upper
-       dis = dis + (dis2_from_bnd(sr%qv(i),l,u))
-       if (dis > ballsize) then
-          res = .false.
-          return
-       endif
-    end do
-    res = .true.
-    return
-  end function box_in_search_range
-
-
-  subroutine process_terminal_node(node)
-    implicit none
-    !
-    ! Look for actual near neighbors in 'node', and update
-    ! the search results on the sr data structure.
-    !
-    type (tree_node), pointer          :: node
-    !
-    real(kind=realType), pointer          :: qv(:)
-    integer(kind=intType), pointer       :: ind(:)
-    real(kind=realType), pointer          :: data(:,:)
-    !
-    integer(kind=intType)                :: dimen, i, indexofi, k, centeridx, correltime
-    real(kind=realType)                   :: ballsize, sd, newpri
-    logical                :: rearrange
-    type(pq), pointer      :: pqp
-    !
-    ! copy values from sr to local variables
-    !
-    !
-    ! Notice, making local pointers with an EXPLICIT lower bound
-    ! seems to generate faster code.
-    ! why?  I don't know.
-    qv => sr%qv(1:)
-    pqp => sr%pq
-    dimen = sr%dimen
-    ballsize = sr%ballsize
-    rearrange = sr%rearrange
-    ind => sr%ind(1:)
-    data => sr%Data(1:,1:)
-    centeridx = sr%centeridx
-    correltime = sr%correltime
-
-    !    doing_correl = (centeridx >= 0)  ! Do we have a decorrelation window?
-    !    include_point = .true.    ! by default include all points
-    ! search through terminal bucket.
-
-    mainloop: do i = node%l, node%u
-       if (rearrange) then
-          sd = 0.0
-          do k = 1,dimen
-             sd = sd + (data(k,i) - qv(k))**2
-             if (sd>ballsize) cycle mainloop
-          end do
-          indexofi = ind(i)  ! only read it if we have not broken out
-       else
-          indexofi = ind(i)
-          sd = 0.0
-          do k = 1,dimen
-             sd = sd + (data(k,indexofi) - qv(k))**2
-             if (sd>ballsize) cycle mainloop
-          end do
-       endif
-
-       if (centeridx > 0) then ! doing correlation interval?
-          if (abs(indexofi-centeridx) < correltime) cycle mainloop
-       endif
-
-
-       !
-       ! two choices for any point.  The list so far is either undersized,
-       ! or it is not.
-       !
-       ! If it is undersized, then add the point and its distance
-       ! unconditionally.  If the point added fills up the working
-       ! list then set the sr%ballsize, maximum distance bound (largest distance on
-       ! list) to be that distance, instead of the initialized +infinity.
-       !
-       ! If the running list is full size, then compute the
-       ! distance but break out immediately if it is larger
-       ! than sr%ballsize, "best squared distance" (of the largest element),
-       ! as it cannot be a good neighbor.
-       !
-       ! Once computed, compare to best_square distance.
-       ! if it is smaller, then delete the previous largest
-       ! element and add the new one.
-
-       if (sr%nfound .lt. sr%nn) then
-          !
-          ! add this point unconditionally to fill list.
-          !
-          sr%nfound = sr%nfound +1
-          newpri = pq_insert(pqp,sd,indexofi)
-          if (sr%nfound .eq. sr%nn) ballsize = newpri
-          ! we have just filled the working list.
-          ! put the best square distance to the maximum value
-          ! on the list, which is extractable from the PQ.
-
-       else
-          !
-          ! now, if we get here,
-          ! we know that the current node has a squared
-          ! distance smaller than the largest one on the list, and
-          ! belongs on the list.
-          ! Hence we replace that with the current one.
-          !
-          ballsize = pq_replace_max(pqp,sd,indexofi)
-       endif
-    end do mainloop
-    !
-    ! Reset sr variables which may have changed during loop
-    !
-    sr%ballsize = ballsize
-
-  end subroutine process_terminal_node
-
-  subroutine process_terminal_node_fixedball(node)
-    implicit none
-    !
-    ! Look for actual near neighbors in 'node', and update
-    ! the search results on the sr data structure, i.e.
-    ! save all within a fixed ball.
-    !
-    type (tree_node), pointer          :: node
-    !
-    real(kind=realType), pointer          :: qv(:)
-    integer(kind=intType), pointer       :: ind(:)
-    real(kind=realType), pointer          :: data(:,:)
-    !
-    integer(kind=intType)                :: nfound
-    integer(kind=intType)                :: dimen, i, indexofi, k
-    integer(kind=intType)                :: centeridx, correltime, nn
-    real(kind=realType)                   :: ballsize, sd
-    logical                :: rearrange
-
-    !
-    ! copy values from sr to local variables
-    !
-    qv => sr%qv(1:)
-    dimen = sr%dimen
-    ballsize = sr%ballsize
-    rearrange = sr%rearrange
-    ind => sr%ind(1:)
-    data => sr%Data(1:,1:)
-    centeridx = sr%centeridx
-    correltime = sr%correltime
-    nn = sr%nn ! number to search for
-    nfound = sr%nfound
-
-    ! search through terminal bucket.
-    mainloop: do i = node%l, node%u
-
-       !
-       ! two choices for any point.  The list so far is either undersized,
-       ! or it is not.
-       !
-       ! If it is undersized, then add the point and its distance
-       ! unconditionally.  If the point added fills up the working
-       ! list then set the sr%ballsize, maximum distance bound (largest distance on
-       ! list) to be that distance, instead of the initialized +infinity.
-       !
-       ! If the running list is full size, then compute the
-       ! distance but break out immediately if it is larger
-       ! than sr%ballsize, "best squared distance" (of the largest element),
-       ! as it cannot be a good neighbor.
-       !
-       ! Once computed, compare to best_square distance.
-       ! if it is smaller, then delete the previous largest
-       ! element and add the new one.
-
-       ! which index to the point do we use?
-
-       if (rearrange) then
-          sd = 0.0
-          do k = 1,dimen
-             sd = sd + (data(k,i) - qv(k))**2
-             if (sd>ballsize) cycle mainloop
-          end do
-          indexofi = ind(i)  ! only read it if we have not broken out
-       else
-          indexofi = ind(i)
-          sd = 0.0
-          do k = 1,dimen
-             sd = sd + (data(k,indexofi) - qv(k))**2
-             if (sd>ballsize) cycle mainloop
-          end do
-       endif
-
-       if (centeridx > 0) then ! doing correlation interval?
-          if (abs(indexofi-centeridx)<correltime) cycle mainloop
-       endif
-
-       nfound = nfound+1
-       if (nfound .gt. sr%nalloc) then
-          ! oh nuts, we have to add another one to the tree but
-          ! there isn't enough room.
-          sr%overflow = .true.
-       else
-          sr%results(nfound)%dis = sd
-          sr%results(nfound)%idx = indexofi
-       endif
-    end do mainloop
-    !
-    ! Reset sr variables which may have changed during loop
-    !
-    sr%nfound = nfound
-  end subroutine process_terminal_node_fixedball
-
-  subroutine kdtree2_n_nearest_brute_force(tp,qv,nn,results)
-    implicit none
-    ! find the 'n' nearest neighbors to 'qv' by exhaustive search.
-    ! only use this subroutine for testing, as it is SLOW!  The
-    ! whole point of a k-d tree is to avoid doing what this subroutine
-    ! does.
-    type (kdtree2), pointer :: tp
-    real(kind=realType), intent (In)       :: qv(:)
-    integer(kind=intType), intent (In)    :: nn
-    type(kdtree2_result)    :: results(:)
-
-    integer(kind=intType) :: i, j, k
-    real(kind=realType), allocatable :: all_distances(:)
-    ! ..
-    allocate (all_distances(tp%n))
-    do i = 1, tp%n
-       all_distances(i) = square_distance(tp%dimen,qv,tp%the_data(:,i))
-    end do
-    ! now find 'n' smallest distances
-    do i = 1, nn
-       results(i)%dis =  huge(1.0)
-       results(i)%idx = -1
-    end do
-    do i = 1, tp%n
-       if (all_distances(i)<results(nn)%dis) then
-          ! insert it somewhere on the list
-          do j = 1, nn
-             if (all_distances(i)<results(j)%dis) exit
-          end do
-          ! now we know 'j'
-          do k = nn - 1, j, -1
-             results(k+1) = results(k)
-          end do
-          results(j)%dis = all_distances(i)
-          results(j)%idx = i
-       end if
-    end do
-    deallocate (all_distances)
-  end subroutine kdtree2_n_nearest_brute_force
-
-
-  subroutine kdtree2_r_nearest_brute_force(tp,qv,r2,nfound,results)
-    implicit none
-    ! find the nearest neighbors to 'qv' with distance**2 <= r2 by exhaustive search.
-    ! only use this subroutine for testing, as it is SLOW!  The
-    ! whole point of a k-d tree is to avoid doing what this subroutine
-    ! does.
-    type (kdtree2), pointer :: tp
-    real(kind=realType), intent (In)       :: qv(:)
-    real(kind=realType), intent (In)       :: r2
-    integer(kind=intType), intent(out)    :: nfound
-    type(kdtree2_result)    :: results(:)
-
-    integer(kind=intType) :: i, nalloc
-    real(kind=realType), allocatable :: all_distances(:)
-    ! ..
-    allocate (all_distances(tp%n))
-    do i = 1, tp%n
-       all_distances(i) = square_distance(tp%dimen,qv,tp%the_data(:,i))
-    end do
-
-    nfound = 0
-    nalloc = size(results,1)
-
-    do i = 1, tp%n
-       if (all_distances(i)< r2) then
-          ! insert it somewhere on the list
-          if (nfound .lt. nalloc) then
-             nfound = nfound+1
-             results(nfound)%dis = all_distances(i)
-             results(nfound)%idx = i
-          endif
-       end if
-    enddo
-    deallocate (all_distances)
-
-    call kdtree2_sort_results(nfound,results)
-
-
-  end subroutine kdtree2_r_nearest_brute_force
-
-  subroutine kdtree2_sort_results(nfound,results)
-    implicit none
-    !  Use after search to sort results(1:nfound) in order of increasing
-    !  distance.
-    integer(kind=intType), intent(in)          :: nfound
-    type(kdtree2_result), target :: results(:)
-    !
-    !
-
-    !THIS IS BUGGY WITH INTEL FORTRAN
-    !    If (nfound .Gt. 1) Call heapsort(results(1:nfound)%dis,results(1:nfound)%ind,nfound)
-    !
-    if (nfound .gt. 1) call heapsort_struct(results,nfound)
-
-    return
-  end subroutine kdtree2_sort_results
-
-  subroutine heapsort(a,ind,n)
-    implicit none
-    !
-    ! Sort a(1:n) in ascending order, permuting ind(1:n) similarly.
-    !
-    ! If ind(k) = k upon input, then it will give a sort index upon output.
-    !
-    integer(kind=intType),intent(in)          :: n
-    real(kind=realType), intent(inout)         :: a(:)
-    integer(kind=intType), intent(inout)      :: ind(:)
-
-    !
-    !
-    real(kind=realType)       :: value   ! temporary for a value from a()
-    integer(kind=intType)     :: ivalue  ! temporary for a value from ind()
-
-    integer(kind=intType)     :: i,j
-    integer(kind=intType)     :: ileft,iright
-
-    ileft=n/2+1
-    iright=n
-
-    !    do i=1,n
-    !       ind(i)=i
-    ! Generate initial idum array
-    !    end do
-
-    if(n.eq.1) return
-
-    do
-       if(ileft > 1)then
-          ileft=ileft-1
-          value=a(ileft); ivalue=ind(ileft)
-       else
-          value=a(iright); ivalue=ind(iright)
-          a(iright)=a(1); ind(iright)=ind(1)
-          iright=iright-1
-          if (iright == 1) then
-             a(1)=value;ind(1)=ivalue
-             return
-          endif
-       endif
-       i=ileft
-       j=2*ileft
-       do while (j <= iright)
-          if(j < iright) then
-             if(a(j) < a(j+1)) j=j+1
-          endif
-          if(value < a(j)) then
-             a(i)=a(j); ind(i)=ind(j)
-             i=j
-             j=j+j
-          else
-             j=iright+1
-          endif
-       end do
-       a(i)=value; ind(i)=ivalue
-    end do
-  end subroutine heapsort
-
-  subroutine heapsort_struct(a,n)
-    implicit none
-    !
-    ! Sort a(1:n) in ascending order
-    !
-    !
-    integer(kind=intType),intent(in)                 :: n
-    type(kdtree2_result),intent(inout) :: a(:)
-
-    !
-    !
-    type(kdtree2_result) :: value ! temporary value
-
-    integer(kind=intType)     :: i,j
-    integer(kind=intType)     :: ileft,iright
-
-    ileft=n/2+1
-    iright=n
-
-    !    do i=1,n
-    !       ind(i)=i
-    ! Generate initial idum array
-    !    end do
-
-    if(n.eq.1) return
-
-    do
-       if(ileft > 1)then
-          ileft=ileft-1
-          value=a(ileft)
-       else
-          value=a(iright)
-          a(iright)=a(1)
-          iright=iright-1
-          if (iright == 1) then
-             a(1) = value
-             return
-          endif
-       endif
-       i=ileft
-       j=2*ileft
-       do while (j <= iright)
-          if(j < iright) then
-             if(a(j)%dis < a(j+1)%dis) j=j+1
-          endif
-          if(value%dis < a(j)%dis) then
-             a(i)=a(j);
-             i=j
-             j=j+j
-          else
-             j=iright+1
-          endif
-       end do
-       a(i)=value
-    end do
-  end subroutine heapsort_struct
+        ! first compute min and max
+        dimen = tp%dimen
+        allocate (res)
+        allocate (res%box(dimen))
+
+        ! First, compute an APPROXIMATE bounding box of all points associated with this node.
+        if (u < l) then
+            ! no points in this box
+            nullify (res)
+            return
+        end if
+
+        if ((u - l) <= bucket_size) then
+            !
+            ! always compute true bounding box for terminal nodes.
+            !
+            do i = 1, dimen
+                call spread_in_coordinate(tp, i, l, u, res%box(i))
+            end do
+            res%cut_dim = 0
+            res%cut_val = 0.0
+            res%l = l
+            res%u = u
+            res%left => null()
+            res%right => null()
+        else
+            !
+            ! modify approximate bounding box.  This will be an
+            ! overestimate of the true bounding box, as we are only recomputing
+            ! the bounding box for the dimension that the parent split on.
+            !
+            ! Going to a true bounding box computation would significantly
+            ! increase the time necessary to build the tree, and usually
+            ! has only a very small difference.  This box is not used
+            ! for searching but only for deciding which coordinate to split on.
+            !
+            do i = 1, dimen
+                recompute = .true.
+                if (associated(parent)) then
+                    if (i .ne. parent%cut_dim) then
+                        recompute = .false.
+                    end if
+                end if
+                if (recompute) then
+                    call spread_in_coordinate(tp, i, l, u, res%box(i))
+                else
+                    res%box(i) = parent%box(i)
+                end if
+            end do
+
+            !
+            ! c is the identity of which coordinate has the greatest spread.
+            !
+
+            coorspread = res%box(1:dimen)%upper - res%box(1:dimen)%lower
+
+            tmp = -one
+            do i = 1, dimen
+                if (coorSpread(i) > tmp) then
+                    tmp = coorSpread(i)
+                    c = i
+                end if
+            end do
+
+            if (.True.) then
+                ! select exact median to have fully balanced tree.
+                m = (l + u) / 2
+                call select_on_coordinate(tp%the_data, tp%ind, c, m, l, u)
+            else
+                !
+                ! select point halfway between min and max, as per A. Moore,
+                ! who says this helps in some degenerate cases, or
+                ! actual arithmetic average.
+                !
+                if (.true.) then
+                    ! actually compute average
+                    average = sum(tp%the_data(c, tp%ind(l:u))) / real(u - l + 1, kind=realType)
+                else
+                    average = (res%box(c)%upper + res%box(c)%lower) / 2.0
+                end if
+
+                res%cut_val = average
+                m = select_on_coordinate_value(tp%the_data, tp%ind, c, average, l, u)
+            end if
+
+            ! moves indexes around
+            res%cut_dim = c
+            res%l = l
+            res%u = u
+            !         res%cut_val = tp%the_data(c,tp%ind(m))
+
+            res%left => build_tree_for_range(tp, l, m, res)
+            res%right => build_tree_for_range(tp, m + 1, u, res)
+
+            if (associated(res%right) .eqv. .false.) then
+                res%box = res%left%box
+                res%cut_val_left = res%left%box(c)%upper
+                res%cut_val = res%cut_val_left
+            elseif (associated(res%left) .eqv. .false.) then
+                res%box = res%right%box
+                res%cut_val_right = res%right%box(c)%lower
+                res%cut_val = res%cut_val_right
+            else
+                res%cut_val_right = res%right%box(c)%lower
+                res%cut_val_left = res%left%box(c)%upper
+                res%cut_val = (res%cut_val_left + res%cut_val_right) / 2
+
+                ! now remake the true bounding box for self.
+                ! Since we are taking unions (in effect) of a tree structure,
+                ! this is much faster than doing an exhaustive
+                ! search over all points
+                do idim = 1, dimen
+                    left = res%left%box(idim)%upper
+                    right = res%right%box(idim)%upper
+                    res%box(idim)%upper = max(left, right)
+
+                    left = res%left%box(idim)%lower
+                    right = res%right%box(idim)%lower
+
+                    res%box(idim)%lower = min(left, right)
+                end do
+                ! res%box%upper = max(res%left%box%upper,res%right%box%upper)
+                ! res%box%lower = min(res%left%box%lower,res%right%box%lower)
+            end if
+        end if
+    end function build_tree_for_range
+
+    integer(kind=intType) function select_on_coordinate_value(v, ind, c, alpha, li, ui) &
+        result(res)
+        implicit none
+        ! Move elts of ind around between l and u, so that all points
+        ! <= than alpha (in c cooordinate) are first, and then
+        ! all points > alpha are second.
+
+        !
+        ! Algorithm (matt kennel).
+        !
+        ! Consider the list as having three parts: on the left,
+        ! the points known to be <= alpha.  On the right, the points
+        ! known to be > alpha, and in the middle, the currently unknown
+        ! points.   The algorithm is to scan the unknown points, starting
+        ! from the left, and swapping them so that they are added to
+        ! the left stack or the right stack, as appropriate.
+        !
+        ! The algorithm finishes when the unknown stack is empty.
+        !
+        ! .. Scalar Arguments ..
+        integer(kind=intType), intent(In) :: c, li, ui
+        real(kind=realType), intent(in) :: alpha
+        ! ..
+        real(kind=realType) :: v(1:, 1:)
+        integer(kind=intType) :: ind(1:)
+        integer(kind=intType) :: tmp
+        ! ..
+        integer(kind=intType) :: lb, rb
+        !
+        ! The points known to be <= alpha are in
+        ! [l,lb-1]
+        !
+        ! The points known to be > alpha are in
+        ! [rb+1,u].
+        !
+        ! Therefore we add new points into lb or
+        ! rb as appropriate.  When lb=rb
+        ! we are done.  We return the location of the last point <= alpha.
+        !
+        !
+        lb = li; rb = ui
+
+        do while (lb < rb)
+            if (v(c, ind(lb)) <= alpha) then
+                ! it is good where it is.
+                lb = lb + 1
+            else
+                ! swap it with rb.
+                tmp = ind(lb); ind(lb) = ind(rb); ind(rb) = tmp
+                rb = rb - 1
+            end if
+        end do
+
+        ! now lb .eq. ub
+        if (v(c, ind(lb)) <= alpha) then
+            res = lb
+        else
+            res = lb - 1
+        end if
+
+    end function select_on_coordinate_value
+
+    subroutine select_on_coordinate(v, ind, c, k, li, ui)
+        implicit none
+        ! Move elts of ind around between l and u, so that the kth
+        ! element
+        ! is >= those below, <= those above, in the coordinate c.
+        ! .. Scalar Arguments ..
+        integer(kind=intType), intent(In) :: c, k, li, ui
+        ! ..
+        integer(kind=intType) :: i, l, m, s, t, u
+        ! ..
+        real(kind=realType) :: v(:, :)
+        integer(kind=intType) :: ind(:)
+        ! ..
+        l = li
+        u = ui
+        do while (l < u)
+            t = ind(l)
+            m = l
+            do i = l + 1, u
+                if (v(c, ind(i)) < v(c, t)) then
+                    m = m + 1
+                    s = ind(m)
+                    ind(m) = ind(i)
+                    ind(i) = s
+                end if
+            end do
+            s = ind(l)
+            ind(l) = ind(m)
+            ind(m) = s
+            if (m <= k) l = m + 1
+            if (m >= k) u = m - 1
+        end do
+    end subroutine select_on_coordinate
+
+    subroutine spread_in_coordinate(tp, c, l, u, interv)
+
+        implicit none
+
+        ! the spread in coordinate 'c', between l and u.
+        !
+        ! Return lower bound in 'smin', and upper in 'smax',
+        ! ..
+        ! .. Structure Arguments ..
+        type(kdtree2), pointer :: tp
+        type(interval), intent(out) :: interv
+        ! ..
+        ! .. Scalar Arguments ..
+        integer(kind=intType), intent(In) :: c, l, u
+        ! ..
+        ! .. Local Scalars ..
+        real(kind=realType) :: last, lmax, lmin, t, smin, smax
+        integer(kind=intType) :: i, ulocal
+        ! ..
+        ! .. Local Arrays ..
+        real(kind=realType), pointer :: v(:, :)
+        integer(kind=intType), pointer :: ind(:)
+
+        v => tp%the_data(1:, 1:)
+        ind => tp%ind(1:)
+        smin = v(c, ind(l))
+        smax = smin
+
+        ulocal = u
+
+        do i = l + 2, ulocal, 2
+            lmin = v(c, ind(i - 1))
+            lmax = v(c, ind(i))
+            if (lmin > lmax) then
+                t = lmin
+                lmin = lmax
+                lmax = t
+            end if
+            if (smin > lmin) smin = lmin
+            if (smax < lmax) smax = lmax
+        end do
+        if (i == ulocal + 1) then
+            last = v(c, ind(ulocal))
+            if (smin > last) smin = last
+            if (smax < last) smax = last
+        end if
+
+        interv%lower = smin
+        interv%upper = smax
+
+    end subroutine spread_in_coordinate
+
+    subroutine kdtree2destroy(tp)
+        implicit none
+        ! Deallocates all memory for the tree, except input data matrix
+        ! .. Structure Arguments ..
+        type(kdtree2), pointer :: tp
+        ! ..
+        call destroy_node(tp%root)
+
+        deallocate (tp%ind)
+        nullify (tp%ind)
+
+        if (tp%rearrange) then
+            deallocate (tp%rearranged_data)
+            nullify (tp%rearranged_data)
+        end if
+
+        deallocate (tp)
+        return
+
+    contains
+        recursive subroutine destroy_node(np)
+            implicit none
+            ! .. Structure Arguments ..
+            type(tree_node), pointer :: np
+            ! ..
+            ! .. Intrinsic Functions ..
+            intrinsic ASSOCIATED
+            ! ..
+            if (associated(np%left)) then
+                call destroy_node(np%left)
+                nullify (np%left)
+            end if
+            if (associated(np%right)) then
+                call destroy_node(np%right)
+                nullify (np%right)
+            end if
+            if (associated(np%box)) deallocate (np%box)
+            deallocate (np)
+            return
+
+        end subroutine destroy_node
+
+    end subroutine kdtree2destroy
+
+    subroutine kdtree2_n_nearest(tp, qv, nn, results)
+        implicit none
+        ! Find the 'nn' vectors in the tree nearest to 'qv' in euclidean norm
+        ! returning their indexes and distances in 'indexes' and 'distances'
+        ! arrays already allocated passed to this subroutine.
+        type(kdtree2), pointer :: tp
+        real(kind=realType), target, intent(In) :: qv(:)
+        integer(kind=intType), intent(In) :: nn
+        type(kdtree2_result), target :: results(:)
+
+        sr%ballsize = huge(1.0)
+        sr%qv => qv
+        sr%nn = nn
+        sr%nfound = 0
+        sr%centeridx = -1
+        sr%correltime = 0
+        sr%overflow = .false.
+
+        sr%results => results
+
+        sr%nalloc = nn   ! will be checked
+
+        sr%ind => tp%ind
+        sr%rearrange = tp%rearrange
+        if (tp%rearrange) then
+            sr%Data => tp%rearranged_data
+        else
+            sr%Data => tp%the_data
+        end if
+        sr%dimen = tp%dimen
+
+        call validate_query_storage(nn)
+        sr%pq = pq_create(results)
+
+        call search(tp%root)
+
+        if (tp%sort) then
+            call kdtree2_sort_results(nn, results)
+        end if
+        !    deallocate(sr%pqp)
+        return
+    end subroutine kdtree2_n_nearest
+
+    subroutine kdtree2_n_nearest_around_point(tp, idxin, correltime, nn, results)
+        implicit none
+        ! Find the 'nn' vectors in the tree nearest to point 'idxin',
+        ! with correlation window 'correltime', returing results in
+        ! results(:), which must be pre-allocated upon entry.
+        type(kdtree2), pointer :: tp
+        integer(kind=intType), intent(In) :: idxin, correltime, nn
+        type(kdtree2_result), target :: results(:)
+
+        allocate (sr%qv(tp%dimen))
+        sr%qv = tp%the_data(:, idxin) ! copy the vector
+        sr%ballsize = huge(1.0)       ! the largest real(kind=realType) number
+        sr%centeridx = idxin
+        sr%correltime = correltime
+
+        sr%nn = nn
+        sr%nfound = 0
+
+        sr%dimen = tp%dimen
+        sr%nalloc = nn
+
+        sr%results => results
+
+        sr%ind => tp%ind
+        sr%rearrange = tp%rearrange
+
+        if (sr%rearrange) then
+            sr%Data => tp%rearranged_data
+        else
+            sr%Data => tp%the_data
+        end if
+
+        call validate_query_storage(nn)
+        sr%pq = pq_create(results)
+
+        call search(tp%root)
+
+        if (tp%sort) then
+            call kdtree2_sort_results(nn, results)
+        end if
+        deallocate (sr%qv)
+        return
+    end subroutine kdtree2_n_nearest_around_point
+
+    subroutine kdtree2_r_nearest(tp, qv, r2, nfound, nalloc, results)
+        implicit none
+        ! find the nearest neighbors to point 'idxin', within SQUARED
+        ! Euclidean distance 'r2'.   Upon ENTRY, nalloc must be the
+        ! size of memory allocated for results(1:nalloc).  Upon
+        ! EXIT, nfound is the number actually found within the ball.
+        !
+        !  Note that if nfound .gt. nalloc then more neighbors were found
+        !  than there were storage to store.  The resulting list is NOT
+        !  the smallest ball inside norm r^2
+        !
+        ! Results are NOT sorted unless tree was created with sort option.
+        type(kdtree2), pointer :: tp
+        real(kind=realType), target, intent(In) :: qv(:)
+        real(kind=realType), intent(in) :: r2
+        integer(kind=intType), intent(out) :: nfound
+        integer(kind=intType), intent(In) :: nalloc
+        type(kdtree2_result), target :: results(:)
+
+        !
+        sr%qv => qv
+        sr%ballsize = r2
+        sr%nn = 0      ! flag for fixed ball search
+        sr%nfound = 0
+        sr%centeridx = -1
+        sr%correltime = 0
+
+        sr%results => results
+
+        call validate_query_storage(nalloc)
+        sr%nalloc = nalloc
+        sr%overflow = .false.
+        sr%ind => tp%ind
+        sr%rearrange = tp%rearrange
+
+        if (tp%rearrange) then
+            sr%Data => tp%rearranged_data
+        else
+            sr%Data => tp%the_data
+        end if
+        sr%dimen = tp%dimen
+
+        !
+        !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
+        !sr%il = -1               ! set to invalid indexes
+        !
+
+        call search(tp%root)
+        nfound = sr%nfound
+        if (sr%overflow) then
+            ! Sorting will cause an error if we have overflowed.
+            return
+        end if
+
+        if (tp%sort) then
+            call kdtree2_sort_results(nfound, results)
+        end if
+
+    end subroutine kdtree2_r_nearest
+
+    subroutine kdtree2_r_nearest_around_point(tp, idxin, correltime, r2, &
+                                              nfound, nalloc, results)
+        implicit none
+        !
+        ! Like kdtree2_r_nearest, but around a point 'idxin' already existing
+        ! in the data set.
+        !
+        ! Results are NOT sorted unless tree was created with sort option.
+        !
+        type(kdtree2), pointer :: tp
+        integer(kind=intType), intent(In) :: idxin, correltime, nalloc
+        real(kind=realType), intent(in) :: r2
+        integer(kind=intType), intent(out) :: nfound
+        type(kdtree2_result), target :: results(:)
+        ! ..
+        ! .. Intrinsic Functions ..
+        intrinsic HUGE
+        ! ..
+        allocate (sr%qv(tp%dimen))
+        sr%qv = tp%the_data(:, idxin) ! copy the vector
+        sr%ballsize = r2
+        sr%nn = 0    ! flag for fixed r search
+        sr%nfound = 0
+        sr%centeridx = idxin
+        sr%correltime = correltime
+
+        sr%results => results
+
+        sr%nalloc = nalloc
+        sr%overflow = .false.
+
+        call validate_query_storage(nalloc)
+
+        !    sr%dsl = HUGE(sr%dsl)    ! set to huge positive values
+        !    sr%il = -1               ! set to invalid indexes
+
+        sr%ind => tp%ind
+        sr%rearrange = tp%rearrange
+
+        if (tp%rearrange) then
+            sr%Data => tp%rearranged_data
+        else
+            sr%Data => tp%the_data
+        end if
+        sr%rearrange = tp%rearrange
+        sr%dimen = tp%dimen
+
+        !
+        !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
+        !sr%il = -1               ! set to invalid indexes
+        !
+
+        call search(tp%root)
+        nfound = sr%nfound
+        if (tp%sort) then
+            call kdtree2_sort_results(nfound, results)
+        end if
+
+        if (sr%overflow) then
+            write (*, *) 'KDTREE_TRANS: warning! return from kdtree2_r_nearest found more neighbors'
+            write (*, *) 'KDTREE_TRANS: than storage was provided for.  Answer is NOT smallest ball'
+            write (*, *) 'KDTREE_TRANS: with that number of neighbors!  I.e. it is wrong.'
+        end if
+
+        deallocate (sr%qv)
+        return
+    end subroutine kdtree2_r_nearest_around_point
+
+    function kdtree2_r_count(tp, qv, r2) result(nfound)
+        implicit none
+        ! Count the number of neighbors within square distance 'r2'.
+        type(kdtree2), pointer :: tp
+        real(kind=realType), target, intent(In) :: qv(:)
+        real(kind=realType), intent(in) :: r2
+        integer(kind=intType) :: nfound
+        ! ..
+        ! .. Intrinsic Functions ..
+        intrinsic HUGE
+        ! ..
+        sr%qv => qv
+        sr%ballsize = r2
+
+        sr%nn = 0       ! flag for fixed r search
+        sr%nfound = 0
+        sr%centeridx = -1
+        sr%correltime = 0
+
+        nullify (sr%results) ! for some reason, FTN 95 chokes on '=> null()'
+
+        sr%nalloc = 0            ! we do not allocate any storage but that's OK
+        ! for counting.
+        sr%ind => tp%ind
+        sr%rearrange = tp%rearrange
+        if (tp%rearrange) then
+            sr%Data => tp%rearranged_data
+        else
+            sr%Data => tp%the_data
+        end if
+        sr%dimen = tp%dimen
+
+        !
+        !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
+        !sr%il = -1               ! set to invalid indexes
+        !
+        sr%overflow = .false.
+
+        call search(tp%root)
+
+        nfound = sr%nfound
+
+        return
+    end function kdtree2_r_count
+
+    function kdtree2_r_count_around_point(tp, idxin, correltime, r2) &
+        result(nfound)
+        implicit none
+        ! Count the number of neighbors within square distance 'r2' around
+        ! point 'idxin' with decorrelation time 'correltime'.
+        !
+        type(kdtree2), pointer :: tp
+        integer(kind=intType), intent(In) :: correltime, idxin
+        real(kind=realType), intent(in) :: r2
+        integer(kind=intType) :: nfound
+        ! ..
+        ! ..
+        ! .. Intrinsic Functions ..
+        intrinsic HUGE
+        ! ..
+        allocate (sr%qv(tp%dimen))
+        sr%qv = tp%the_data(:, idxin)
+        sr%ballsize = r2
+
+        sr%nn = 0       ! flag for fixed r search
+        sr%nfound = 0
+        sr%centeridx = idxin
+        sr%correltime = correltime
+        nullify (sr%results)
+
+        sr%nalloc = 0            ! we do not allocate any storage but that's OK
+        ! for counting.
+
+        sr%ind => tp%ind
+        sr%rearrange = tp%rearrange
+
+        if (sr%rearrange) then
+            sr%Data => tp%rearranged_data
+        else
+            sr%Data => tp%the_data
+        end if
+        sr%dimen = tp%dimen
+
+        !
+        !sr%dsl = Huge(sr%dsl)    ! set to huge positive values
+        !sr%il = -1               ! set to invalid indexes
+        !
+        sr%overflow = .false.
+
+        call search(tp%root)
+
+        nfound = sr%nfound
+
+        return
+    end function kdtree2_r_count_around_point
+
+    subroutine validate_query_storage(n)
+        implicit none
+        !
+        ! make sure we have enough storage for n
+        !
+        integer(kind=intType), intent(in) :: n
+
+        if (size(sr%results, 1) .lt. n) then
+            write (*, *) 'KDTREE_TRANS:  you did not provide enough storage for results(1:n)'
+            stop
+            return
+        end if
+
+        return
+    end subroutine validate_query_storage
+
+    function square_distance(d, iv, qv) result(res)
+        implicit none
+        ! distance between iv[1:n] and qv[1:n]
+        ! .. Function Return Value ..
+        ! re-implemented to improve vectorization.
+        real(kind=realType) :: res
+        ! ..
+        ! ..
+        ! .. Scalar Arguments ..
+        integer(kind=intType) :: d
+        ! ..
+        ! .. Array Arguments ..
+        real(kind=realType) :: iv(:), qv(:)
+        ! ..
+        ! ..
+        res = sum((iv(1:d) - qv(1:d))**2)
+    end function square_distance
+
+    recursive subroutine search(node)
+        implicit none
+        !
+        ! This is the innermost core routine of the kd-tree search.  Along
+        ! with "process_terminal_node", it is the performance bottleneck.
+        !
+        ! This version uses a logically complete secondary search of
+        ! "box in bounds", whether the sear
+        !
+        type(Tree_node), pointer :: node
+        ! ..
+        type(tree_node), pointer :: ncloser, nfarther
+        !
+        integer(kind=intType) :: cut_dim, i
+        ! ..
+        real(kind=realType) :: qval, dis
+        real(kind=realType) :: ballsize
+        real(kind=realType), pointer :: qv(:)
+        type(interval), pointer :: box(:)
+
+        if ((associated(node%left) .and. associated(node%right)) .eqv. .false.) then
+            ! we are on a terminal node
+            if (sr%nn .eq. 0) then
+                call process_terminal_node_fixedball(node)
+            else
+                call process_terminal_node(node)
+            end if
+        else
+            ! we are not on a terminal node
+            qv => sr%qv(1:)
+            cut_dim = node%cut_dim
+            qval = qv(cut_dim)
+
+            if (qval < node%cut_val) then
+                ncloser => node%left
+                nfarther => node%right
+                dis = (node%cut_val_right - qval)**2
+                !          extra = node%cut_val - qval
+            else
+                ncloser => node%right
+                nfarther => node%left
+                dis = (node%cut_val_left - qval)**2
+                !          extra = qval- node%cut_val_left
+            end if
+
+            if (associated(ncloser)) call search(ncloser)
+
+            ! we may need to search the second node.
+            if (associated(nfarther)) then
+                ballsize = sr%ballsize
+                !          dis=extra**2
+                if (dis <= ballsize) then
+                    !
+                    ! we do this separately as going on the first cut dimen is often
+                    ! a good idea.
+                    ! note that if extra**2 < sr%ballsize, then the next
+                    ! check will also be false.
+                    !
+                    box => node%box(1:)
+                    do i = 1, sr%dimen
+                        if (i .ne. cut_dim) then
+                            dis = dis + dis2_from_bnd(qv(i), box(i)%lower, box(i)%upper)
+                            if (dis > ballsize) then
+                                return
+                            end if
+                        end if
+                    end do
+
+                    !
+                    ! if we are still here then we need to search mroe.
+                    !
+                    call search(nfarther)
+                end if
+            end if
+        end if
+    end subroutine search
+
+    real(kind=realType) function dis2_from_bnd(x, amin, amax) result(res)
+        implicit none
+        real(kind=realType), intent(in) :: x, amin, amax
+
+        if (x > amax) then
+            res = (x - amax)**2; 
+            return
+        else
+            if (x < amin) then
+                res = (amin - x)**2; 
+                return
+            else
+                res = 0.0
+                return
+            end if
+        end if
+        return
+    end function dis2_from_bnd
+
+    logical function box_in_search_range(node, sr) result(res)
+        implicit none
+        !
+        ! Return the distance from 'qv' to the CLOSEST corner of node's
+        ! bounding box
+        ! for all coordinates outside the box.   Coordinates inside the box
+        ! contribute nothing to the distance.
+        !
+        type(tree_node), pointer :: node
+        type(tree_search_record), pointer :: sr
+
+        integer(kind=intType) :: dimen, i
+        real(kind=realType) :: dis, ballsize
+        real(kind=realType) :: l, u
+
+        dimen = sr%dimen
+        ballsize = sr%ballsize
+        dis = 0.0
+        res = .true.
+        do i = 1, dimen
+            l = node%box(i)%lower
+            u = node%box(i)%upper
+            dis = dis + (dis2_from_bnd(sr%qv(i), l, u))
+            if (dis > ballsize) then
+                res = .false.
+                return
+            end if
+        end do
+        res = .true.
+        return
+    end function box_in_search_range
+
+    subroutine process_terminal_node(node)
+        implicit none
+        !
+        ! Look for actual near neighbors in 'node', and update
+        ! the search results on the sr data structure.
+        !
+        type(tree_node), pointer :: node
+        !
+        real(kind=realType), pointer :: qv(:)
+        integer(kind=intType), pointer :: ind(:)
+        real(kind=realType), pointer :: data(:, :)
+        !
+        integer(kind=intType) :: dimen, i, indexofi, k, centeridx, correltime
+        real(kind=realType) :: ballsize, sd, newpri
+        logical :: rearrange
+        type(pq), pointer :: pqp
+        !
+        ! copy values from sr to local variables
+        !
+        !
+        ! Notice, making local pointers with an EXPLICIT lower bound
+        ! seems to generate faster code.
+        ! why?  I don't know.
+        qv => sr%qv(1:)
+        pqp => sr%pq
+        dimen = sr%dimen
+        ballsize = sr%ballsize
+        rearrange = sr%rearrange
+        ind => sr%ind(1:)
+        data => sr%Data(1:, 1:)
+        centeridx = sr%centeridx
+        correltime = sr%correltime
+
+        !    doing_correl = (centeridx >= 0)  ! Do we have a decorrelation window?
+        !    include_point = .true.    ! by default include all points
+        ! search through terminal bucket.
+
+        mainloop: do i = node%l, node%u
+            if (rearrange) then
+                sd = 0.0
+                do k = 1, dimen
+                    sd = sd + (data(k, i) - qv(k))**2
+                    if (sd > ballsize) cycle mainloop
+                end do
+                indexofi = ind(i)  ! only read it if we have not broken out
+            else
+                indexofi = ind(i)
+                sd = 0.0
+                do k = 1, dimen
+                    sd = sd + (data(k, indexofi) - qv(k))**2
+                    if (sd > ballsize) cycle mainloop
+                end do
+            end if
+
+            if (centeridx > 0) then ! doing correlation interval?
+                if (abs(indexofi - centeridx) < correltime) cycle mainloop
+            end if
+
+            !
+            ! two choices for any point.  The list so far is either undersized,
+            ! or it is not.
+            !
+            ! If it is undersized, then add the point and its distance
+            ! unconditionally.  If the point added fills up the working
+            ! list then set the sr%ballsize, maximum distance bound (largest distance on
+            ! list) to be that distance, instead of the initialized +infinity.
+            !
+            ! If the running list is full size, then compute the
+            ! distance but break out immediately if it is larger
+            ! than sr%ballsize, "best squared distance" (of the largest element),
+            ! as it cannot be a good neighbor.
+            !
+            ! Once computed, compare to best_square distance.
+            ! if it is smaller, then delete the previous largest
+            ! element and add the new one.
+
+            if (sr%nfound .lt. sr%nn) then
+                !
+                ! add this point unconditionally to fill list.
+                !
+                sr%nfound = sr%nfound + 1
+                newpri = pq_insert(pqp, sd, indexofi)
+                if (sr%nfound .eq. sr%nn) ballsize = newpri
+                ! we have just filled the working list.
+                ! put the best square distance to the maximum value
+                ! on the list, which is extractable from the PQ.
+
+            else
+                !
+                ! now, if we get here,
+                ! we know that the current node has a squared
+                ! distance smaller than the largest one on the list, and
+                ! belongs on the list.
+                ! Hence we replace that with the current one.
+                !
+                ballsize = pq_replace_max(pqp, sd, indexofi)
+            end if
+        end do mainloop
+        !
+        ! Reset sr variables which may have changed during loop
+        !
+        sr%ballsize = ballsize
+
+    end subroutine process_terminal_node
+
+    subroutine process_terminal_node_fixedball(node)
+        implicit none
+        !
+        ! Look for actual near neighbors in 'node', and update
+        ! the search results on the sr data structure, i.e.
+        ! save all within a fixed ball.
+        !
+        type(tree_node), pointer :: node
+        !
+        real(kind=realType), pointer :: qv(:)
+        integer(kind=intType), pointer :: ind(:)
+        real(kind=realType), pointer :: data(:, :)
+        !
+        integer(kind=intType) :: nfound
+        integer(kind=intType) :: dimen, i, indexofi, k
+        integer(kind=intType) :: centeridx, correltime, nn
+        real(kind=realType) :: ballsize, sd
+        logical :: rearrange
+
+        !
+        ! copy values from sr to local variables
+        !
+        qv => sr%qv(1:)
+        dimen = sr%dimen
+        ballsize = sr%ballsize
+        rearrange = sr%rearrange
+        ind => sr%ind(1:)
+        data => sr%Data(1:, 1:)
+        centeridx = sr%centeridx
+        correltime = sr%correltime
+        nn = sr%nn ! number to search for
+        nfound = sr%nfound
+
+        ! search through terminal bucket.
+        mainloop: do i = node%l, node%u
+
+            !
+            ! two choices for any point.  The list so far is either undersized,
+            ! or it is not.
+            !
+            ! If it is undersized, then add the point and its distance
+            ! unconditionally.  If the point added fills up the working
+            ! list then set the sr%ballsize, maximum distance bound (largest distance on
+            ! list) to be that distance, instead of the initialized +infinity.
+            !
+            ! If the running list is full size, then compute the
+            ! distance but break out immediately if it is larger
+            ! than sr%ballsize, "best squared distance" (of the largest element),
+            ! as it cannot be a good neighbor.
+            !
+            ! Once computed, compare to best_square distance.
+            ! if it is smaller, then delete the previous largest
+            ! element and add the new one.
+
+            ! which index to the point do we use?
+
+            if (rearrange) then
+                sd = 0.0
+                do k = 1, dimen
+                    sd = sd + (data(k, i) - qv(k))**2
+                    if (sd > ballsize) cycle mainloop
+                end do
+                indexofi = ind(i)  ! only read it if we have not broken out
+            else
+                indexofi = ind(i)
+                sd = 0.0
+                do k = 1, dimen
+                    sd = sd + (data(k, indexofi) - qv(k))**2
+                    if (sd > ballsize) cycle mainloop
+                end do
+            end if
+
+            if (centeridx > 0) then ! doing correlation interval?
+                if (abs(indexofi - centeridx) < correltime) cycle mainloop
+            end if
+
+            nfound = nfound + 1
+            if (nfound .gt. sr%nalloc) then
+                ! oh nuts, we have to add another one to the tree but
+                ! there isn't enough room.
+                sr%overflow = .true.
+            else
+                sr%results(nfound)%dis = sd
+                sr%results(nfound)%idx = indexofi
+            end if
+        end do mainloop
+        !
+        ! Reset sr variables which may have changed during loop
+        !
+        sr%nfound = nfound
+    end subroutine process_terminal_node_fixedball
+
+    subroutine kdtree2_n_nearest_brute_force(tp, qv, nn, results)
+        implicit none
+        ! find the 'n' nearest neighbors to 'qv' by exhaustive search.
+        ! only use this subroutine for testing, as it is SLOW!  The
+        ! whole point of a k-d tree is to avoid doing what this subroutine
+        ! does.
+        type(kdtree2), pointer :: tp
+        real(kind=realType), intent(In) :: qv(:)
+        integer(kind=intType), intent(In) :: nn
+        type(kdtree2_result) :: results(:)
+
+        integer(kind=intType) :: i, j, k
+        real(kind=realType), allocatable :: all_distances(:)
+        ! ..
+        allocate (all_distances(tp%n))
+        do i = 1, tp%n
+            all_distances(i) = square_distance(tp%dimen, qv, tp%the_data(:, i))
+        end do
+        ! now find 'n' smallest distances
+        do i = 1, nn
+            results(i)%dis = huge(1.0)
+            results(i)%idx = -1
+        end do
+        do i = 1, tp%n
+            if (all_distances(i) < results(nn)%dis) then
+                ! insert it somewhere on the list
+                do j = 1, nn
+                    if (all_distances(i) < results(j)%dis) exit
+                end do
+                ! now we know 'j'
+                do k = nn - 1, j, -1
+                    results(k + 1) = results(k)
+                end do
+                results(j)%dis = all_distances(i)
+                results(j)%idx = i
+            end if
+        end do
+        deallocate (all_distances)
+    end subroutine kdtree2_n_nearest_brute_force
+
+    subroutine kdtree2_r_nearest_brute_force(tp, qv, r2, nfound, results)
+        implicit none
+        ! find the nearest neighbors to 'qv' with distance**2 <= r2 by exhaustive search.
+        ! only use this subroutine for testing, as it is SLOW!  The
+        ! whole point of a k-d tree is to avoid doing what this subroutine
+        ! does.
+        type(kdtree2), pointer :: tp
+        real(kind=realType), intent(In) :: qv(:)
+        real(kind=realType), intent(In) :: r2
+        integer(kind=intType), intent(out) :: nfound
+        type(kdtree2_result) :: results(:)
+
+        integer(kind=intType) :: i, nalloc
+        real(kind=realType), allocatable :: all_distances(:)
+        ! ..
+        allocate (all_distances(tp%n))
+        do i = 1, tp%n
+            all_distances(i) = square_distance(tp%dimen, qv, tp%the_data(:, i))
+        end do
+
+        nfound = 0
+        nalloc = size(results, 1)
+
+        do i = 1, tp%n
+            if (all_distances(i) < r2) then
+                ! insert it somewhere on the list
+                if (nfound .lt. nalloc) then
+                    nfound = nfound + 1
+                    results(nfound)%dis = all_distances(i)
+                    results(nfound)%idx = i
+                end if
+            end if
+        end do
+        deallocate (all_distances)
+
+        call kdtree2_sort_results(nfound, results)
+
+    end subroutine kdtree2_r_nearest_brute_force
+
+    subroutine kdtree2_sort_results(nfound, results)
+        implicit none
+        !  Use after search to sort results(1:nfound) in order of increasing
+        !  distance.
+        integer(kind=intType), intent(in) :: nfound
+        type(kdtree2_result), target :: results(:)
+        !
+        !
+
+        !THIS IS BUGGY WITH INTEL FORTRAN
+        !    If (nfound .Gt. 1) Call heapsort(results(1:nfound)%dis,results(1:nfound)%ind,nfound)
+        !
+        if (nfound .gt. 1) call heapsort_struct(results, nfound)
+
+        return
+    end subroutine kdtree2_sort_results
+
+    subroutine heapsort(a, ind, n)
+        implicit none
+        !
+        ! Sort a(1:n) in ascending order, permuting ind(1:n) similarly.
+        !
+        ! If ind(k) = k upon input, then it will give a sort index upon output.
+        !
+        integer(kind=intType), intent(in) :: n
+        real(kind=realType), intent(inout) :: a(:)
+        integer(kind=intType), intent(inout) :: ind(:)
+
+        !
+        !
+        real(kind=realType) :: value   ! temporary for a value from a()
+        integer(kind=intType) :: ivalue  ! temporary for a value from ind()
+
+        integer(kind=intType) :: i, j
+        integer(kind=intType) :: ileft, iright
+
+        ileft = n / 2 + 1
+        iright = n
+
+        !    do i=1,n
+        !       ind(i)=i
+        ! Generate initial idum array
+        !    end do
+
+        if (n .eq. 1) return
+
+        do
+            if (ileft > 1) then
+                ileft = ileft - 1
+                value = a(ileft); ivalue = ind(ileft)
+            else
+                value = a(iright); ivalue = ind(iright)
+                a(iright) = a(1); ind(iright) = ind(1)
+                iright = iright - 1
+                if (iright == 1) then
+                    a(1) = value; ind(1) = ivalue
+                    return
+                end if
+            end if
+            i = ileft
+            j = 2 * ileft
+            do while (j <= iright)
+                if (j < iright) then
+                    if (a(j) < a(j + 1)) j = j + 1
+                end if
+                if (value < a(j)) then
+                    a(i) = a(j); ind(i) = ind(j)
+                    i = j
+                    j = j + j
+                else
+                    j = iright + 1
+                end if
+            end do
+            a(i) = value; ind(i) = ivalue
+        end do
+    end subroutine heapsort
+
+    subroutine heapsort_struct(a, n)
+        implicit none
+        !
+        ! Sort a(1:n) in ascending order
+        !
+        !
+        integer(kind=intType), intent(in) :: n
+        type(kdtree2_result), intent(inout) :: a(:)
+
+        !
+        !
+        type(kdtree2_result) :: value ! temporary value
+
+        integer(kind=intType) :: i, j
+        integer(kind=intType) :: ileft, iright
+
+        ileft = n / 2 + 1
+        iright = n
+
+        !    do i=1,n
+        !       ind(i)=i
+        ! Generate initial idum array
+        !    end do
+
+        if (n .eq. 1) return
+
+        do
+            if (ileft > 1) then
+                ileft = ileft - 1
+                value = a(ileft)
+            else
+                value = a(iright)
+                a(iright) = a(1)
+                iright = iright - 1
+                if (iright == 1) then
+                    a(1) = value
+                    return
+                end if
+            end if
+            i = ileft
+            j = 2 * ileft
+            do while (j <= iright)
+                if (j < iright) then
+                    if (a(j)%dis < a(j + 1)%dis) j = j + 1
+                end if
+                if (value%dis < a(j)%dis) then
+                    a(i) = a(j); 
+                    i = j
+                    j = j + j
+                else
+                    j = iright + 1
+                end if
+            end do
+            a(i) = value
+        end do
+    end subroutine heapsort_struct
 
 end module kdtree2_module
diff --git a/src/modules/killSignals.f90 b/src/modules/killSignals.f90
index 52d42a319..f397c398e 100644
--- a/src/modules/killSignals.f90
+++ b/src/modules/killSignals.f90
@@ -1,4 +1,4 @@
-       module killSignals
+module killSignals
 !
 !       This module contains the variables used to handle the
 !       kill signals from the user. The user can send two signals that
@@ -12,30 +12,30 @@ module killSignals
 !       The handling can be switched off at compile time using the
 !       compiler flag -DUSE_NO_SIGNALS.
 !
-        use constants, only : intType
-       implicit none
-       save
+    use constants, only: intType
+    implicit none
+    save
 
-       ! Definition of some constants for the signalling. As in the
-       ! reduce functions a maximum is determined, it is important that
-       ! noSignal is less than signalWrite and signalWriteQuit.
+    ! Definition of some constants for the signalling. As in the
+    ! reduce functions a maximum is determined, it is important that
+    ! noSignal is less than signalWrite and signalWriteQuit.
 
-       integer(kind=intType), parameter :: noSignal        = 0
-       integer(kind=intType), parameter :: signalWrite     = 1
-       integer(kind=intType), parameter :: signalWriteQuit = 2
+    integer(kind=intType), parameter :: noSignal = 0
+    integer(kind=intType), parameter :: signalWrite = 1
+    integer(kind=intType), parameter :: signalWriteQuit = 2
 
-       ! localSignal:  Signal stored on this processor.
-       ! globalSignal: Maximum of the local signals.
+    ! localSignal:  Signal stored on this processor.
+    ! globalSignal: Maximum of the local signals.
 
-       integer(kind=intType) :: localSignal, globalSignal
+    integer(kind=intType) :: localSignal, globalSignal
 
-       ! fromPython: was this instance of the solver called from python
-       logical :: fromPython
+    ! fromPython: was this instance of the solver called from python
+    logical :: fromPython
 
-       ! routineFailed: was terminate called from a routine?
-       logical :: routineFailed
-       logical :: fatalFail
+    ! routineFailed: was terminate called from a routine?
+    logical :: routineFailed
+    logical :: fatalFail
 
-       logical :: adjointFailed
+    logical :: adjointFailed
 
-       end module killSignals
+end module killSignals
diff --git a/src/modules/monitor.f90 b/src/modules/monitor.f90
index d8d9cb6dd..c57f48661 100644
--- a/src/modules/monitor.f90
+++ b/src/modules/monitor.f90
@@ -1,4 +1,4 @@
-       module monitor
+module monitor
 !
 !       This module contains the variables to be monitored during the
 !       convergence as well as the arrays to store the convergence.
@@ -6,113 +6,113 @@ module monitor
 !       The default variables to be monitored depend on the governing
 !       equations to be solved.
 !
-       use constants, only : intType, realType, cgnsRealType, &
-            maxCGNSNameLen
-       implicit none
-       save
+    use constants, only: intType, realType, cgnsRealType, &
+                         maxCGNSNameLen
+    implicit none
+    save
 !
 !      Parameters for the format to write the convergence to stdout.
 !
-       integer, parameter :: fieldWidth   = 12
-       integer, parameter :: fieldWidthLarge   = 24
-       integer, parameter :: decimalWidth =  5
+    integer, parameter :: fieldWidth = 12
+    integer, parameter :: fieldWidthLarge = 24
+    integer, parameter :: decimalWidth = 5
 !
 !       Variables to compute the convergence info.
 !
-       ! nMonSum: Number of monitoring variables for which the sum over
-       !          all processors must be taken. Note that this is an
-       !          integer, because of MPI.
-       ! nMonMax: Number of monitoring variables for which the maximum
-       !          over all processors must be taken. Note that this is
-       !          an integer, because of MPI.
-       ! nMon:    The sum of nmonSum and nmonMax
-
-       integer :: nMonSum, nMonMax, nMon
-
-       ! monLoc(nMon):  Array for the local summation/maximum of the
-       !                monitoring variables.
-       ! monGlob(nMon): Idem, but for the global summation/maximum.
-       ! monRef(nMon):  Idem, but for the reference values needed
-       !                for an unsteady computation.
-       ! MassFluxL, G   Massflux out of bcOutflowSubsonic  (eran-msf)
-
-       real(kind=realType), dimension(:), allocatable :: monLoc
-       real(kind=realType), dimension(:), allocatable :: monGlob
-       real(kind=realType), dimension(:), allocatable :: monRef
-       real(kind=realType) :: MassFluxL, MassFluxG  ! eran-msf
-
-       ! monNames(nMon): The names of the variables to be monitored.
-
-       character(len=maxCGNSNameLen), dimension(:), allocatable :: &
-                                                            monNames
-
-       ! monMachOrHMax: Whether or not the maximum value of the Mach
-       !                number and total enthalpy difference must be
-       !                monitored and thus computed.
-       ! showCPU:       Whether or not the CPU time must be shown
-       !                in the output to screen.
-
-       logical :: monMachOrHMax
-       logical :: showCPU
-
-       ! monMassSliding:  Whether or not to monitor the mass flow of
-       !                  the sliding interfaces.
-       ! monMassFamilies: Whether or not the mass flow of at least
-       !                  one family must be monitored.
-
-       logical :: monMassSliding
-       logical :: monMassFamilies
+    ! nMonSum: Number of monitoring variables for which the sum over
+    !          all processors must be taken. Note that this is an
+    !          integer, because of MPI.
+    ! nMonMax: Number of monitoring variables for which the maximum
+    !          over all processors must be taken. Note that this is
+    !          an integer, because of MPI.
+    ! nMon:    The sum of nmonSum and nmonMax
+
+    integer :: nMonSum, nMonMax, nMon
+
+    ! monLoc(nMon):  Array for the local summation/maximum of the
+    !                monitoring variables.
+    ! monGlob(nMon): Idem, but for the global summation/maximum.
+    ! monRef(nMon):  Idem, but for the reference values needed
+    !                for an unsteady computation.
+    ! MassFluxL, G   Massflux out of bcOutflowSubsonic  (eran-msf)
+
+    real(kind=realType), dimension(:), allocatable :: monLoc
+    real(kind=realType), dimension(:), allocatable :: monGlob
+    real(kind=realType), dimension(:), allocatable :: monRef
+    real(kind=realType) :: MassFluxL, MassFluxG  ! eran-msf
+
+    ! monNames(nMon): The names of the variables to be monitored.
+
+    character(len=maxCGNSNameLen), dimension(:), allocatable :: &
+        monNames
+
+    ! monMachOrHMax: Whether or not the maximum value of the Mach
+    !                number and total enthalpy difference must be
+    !                monitored and thus computed.
+    ! showCPU:       Whether or not the CPU time must be shown
+    !                in the output to screen.
+
+    logical :: monMachOrHMax
+    logical :: showCPU
+
+    ! monMassSliding:  Whether or not to monitor the mass flow of
+    !                  the sliding interfaces.
+    ! monMassFamilies: Whether or not the mass flow of at least
+    !                  one family must be monitored.
+
+    logical :: monMassSliding
+    logical :: monMassFamilies
 !
 !       Variables to store the convergence info.
 !
-       ! convArray(0:nIterMax,nsps,nmon): 3D array to store the
-       !                                  convergence histories.
-
-       real(kind=cgnsRealType), dimension(:,:,:), allocatable :: &
-                                                               convArray
-
-       ! solverDataArray(0:Itermax, sps, 4): stores the cumulative number of iterations, 
-       !                       CFL, step size, and lin res at each major iter.
-       !                       It is used post processing convergence histories
-       real(kind=realType), dimension(:,:,:), allocatable:: solverDataArray
-       
-       ! solverTypeArray(0:Itermax, sps): Stores string designating the iteration type on 
-       !                      each major iteration. Each string is 8 char long
-       character(len=8), dimension(:,:), allocatable:: solverTypeArray 
-                                                               
+    ! convArray(0:nIterMax,nsps,nmon): 3D array to store the
+    !                                  convergence histories.
+
+    real(kind=cgnsRealType), dimension(:, :, :), allocatable :: &
+        convArray
+
+    ! solverDataArray(0:Itermax, sps, 4): stores the cumulative number of iterations,
+    !                       CFL, step size, and lin res at each major iter.
+    !                       It is used post processing convergence histories
+    real(kind=realType), dimension(:, :, :), allocatable :: solverDataArray
+
+    ! solverTypeArray(0:Itermax, sps): Stores string designating the iteration type on
+    !                      each major iteration. Each string is 8 char long
+    character(len=8), dimension(:, :), allocatable :: solverTypeArray
+
 !       Variables to store the time accurate history.
 !       Only allocated for a time accurate computation.
 !
-       ! nTimeStepsRestart:   Number of time steps taken in an earlier
-       !                      unsteady computation from which a restart
-       !                      is performed.
-       ! timeStepUnsteady:    The current unsteady time step number;
-       !                      restart is not taken into account.
-       ! timeUnsteady:        Amount of physical time of the current
-       !                      simulation; only relevant in unsteady mode.
-       ! timeUnsteadyRestart: Amount of physical time from a previous
-       !                      simulation from which a restart is
-       !                      performed.
-
-       integer(kind=intType) :: nTimeStepsRestart, timeStepUnsteady
-       real(kind=realType)   :: timeUnsteady, timeUnsteadyRestart
-
-       ! timeArray(nTimeMax):          Array to store the values of the
-       !                               time at every time step.
-       ! timeDataArray(nTimeMax,nMon): 2D array to store the variables
-       !                               to be monitored at every time
-       !                               step. No need to store a spectral
-       !                               index here.
-
-       real(kind=cgnsRealType), dimension(:), allocatable :: timeArray
-
-       real(kind=cgnsRealType), dimension(:,:), allocatable :: &
-                                                          timeDataArray
-
-       ! writeGrid:    Whether or not a grid file must be written.
-       ! writeVolume:  Idem for a volume solution file.
-       ! writeSurface: Idem for a surface solution file.
-
-       logical :: writeGrid, writeVolume, writeSurface, writeSolEachIter
-
-       end module monitor
+    ! nTimeStepsRestart:   Number of time steps taken in an earlier
+    !                      unsteady computation from which a restart
+    !                      is performed.
+    ! timeStepUnsteady:    The current unsteady time step number;
+    !                      restart is not taken into account.
+    ! timeUnsteady:        Amount of physical time of the current
+    !                      simulation; only relevant in unsteady mode.
+    ! timeUnsteadyRestart: Amount of physical time from a previous
+    !                      simulation from which a restart is
+    !                      performed.
+
+    integer(kind=intType) :: nTimeStepsRestart, timeStepUnsteady
+    real(kind=realType) :: timeUnsteady, timeUnsteadyRestart
+
+    ! timeArray(nTimeMax):          Array to store the values of the
+    !                               time at every time step.
+    ! timeDataArray(nTimeMax,nMon): 2D array to store the variables
+    !                               to be monitored at every time
+    !                               step. No need to store a spectral
+    !                               index here.
+
+    real(kind=cgnsRealType), dimension(:), allocatable :: timeArray
+
+    real(kind=cgnsRealType), dimension(:, :), allocatable :: &
+        timeDataArray
+
+    ! writeGrid:    Whether or not a grid file must be written.
+    ! writeVolume:  Idem for a volume solution file.
+    ! writeSurface: Idem for a surface solution file.
+
+    logical :: writeGrid, writeVolume, writeSurface, writeSolEachIter
+
+end module monitor
diff --git a/src/modules/myPushPopLib.F90 b/src/modules/myPushPopLib.F90
index aa5ff925d..9d6061f21 100644
--- a/src/modules/myPushPopLib.F90
+++ b/src/modules/myPushPopLib.F90
@@ -1,17 +1,17 @@
 module myPushPopLib
 
-  ! This modules contains a fixed-sized integer stack that can be used
-  ! to replace tapenade pushcontrol1b routines *AS LONG AS THEY ARE
-  ! CONTAINED ONLY WITHIN A LOOP BODY* ie. only a fixed number of
-  ! pushecontrol1b's are called before corresponding popcontrol1b. By
-  ! modifiing the output code to make the code in-line, this is
-  ! modification makes the reverse mode code faster.
+    ! This modules contains a fixed-sized integer stack that can be used
+    ! to replace tapenade pushcontrol1b routines *AS LONG AS THEY ARE
+    ! CONTAINED ONLY WITHIN A LOOP BODY* ie. only a fixed number of
+    ! pushecontrol1b's are called before corresponding popcontrol1b. By
+    ! modifiing the output code to make the code in-line, this is
+    ! modification makes the reverse mode code faster.
 
-  use precision
-  implicit none
-  save
+    use precision
+    implicit none
+    save
 
-  integer(kind=intType) :: myIntStack(32)
-  integer(kind=intType) :: myIntPtr = 0
+    integer(kind=intType) :: myIntStack(32)
+    integer(kind=intType) :: myIntPtr = 0
 
 end module myPushPopLib
diff --git a/src/modules/overset.F90 b/src/modules/overset.F90
index a2ff1de8b..2710e9109 100644
--- a/src/modules/overset.F90
+++ b/src/modules/overset.F90
@@ -1,572 +1,570 @@
 module oversetData
 
-  use constants
-  use adtData, only : adtType
-  use block, only : fringeType
-  use kdtree2_module, only : kdtree2
+    use constants
+    use adtData, only: adtType
+    use block, only: fringeType
+    use kdtree2_module, only: kdtree2
 #ifndef USE_TAPENADE
 #include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
+    use petsc
+    implicit none
 
 #endif
-  ! Helper dataType for communicated overset grid points. This data
-  ! structure mirrros the blockType structure in block.F90, but only
-  ! contains minimum amount of information required for computing
-  ! overset connectivities.
-
-  ! Store the coordinates from a block that will need to be searched.
+    ! Helper dataType for communicated overset grid points. This data
+    ! structure mirrros the blockType structure in block.F90, but only
+    ! contains minimum amount of information required for computing
+    ! overset connectivities.
 
-  ! A simple generic sparse matrix storage container for storing the
-  ! (sparse) overlap structure of an overset mesh
-  type CSRMatrix
-     integer(kind=intType) :: nRow, nCol, nnz, nnzLocal
-     integer(kind=intType), dimension(:), pointer :: colInd, rowPtr
-     real(kind=realType), dimension(:), pointer :: data
-     integer(Kind=intType), dimension(:), pointer :: assignedProc
+    ! Store the coordinates from a block that will need to be searched.
 
-     ! Flag if this matrix is allocated
-     logical :: allocated=.False.
+    ! A simple generic sparse matrix storage container for storing the
+    ! (sparse) overlap structure of an overset mesh
+    type CSRMatrix
+        integer(kind=intType) :: nRow, nCol, nnz, nnzLocal
+        integer(kind=intType), dimension(:), pointer :: colInd, rowPtr
+        real(kind=realType), dimension(:), pointer :: data
+        integer(Kind=intType), dimension(:), pointer :: assignedProc
 
-  end type CSRMatrix
+        ! Flag if this matrix is allocated
+        logical :: allocated = .False.
 
-  ! This derived type contains sufficient information to perfom ADT
-  ! donor searches.
-  type oversetBlock
+    end type CSRMatrix
 
-     ! Sizes for the block
-     integer(kind=intType) :: il, jl, kl
+    ! This derived type contains sufficient information to perfom ADT
+    ! donor searches.
+    type oversetBlock
 
-     ! Cluster this block belongs to
-     integer(kind=intType) :: cluster
+        ! Sizes for the block
+        integer(kind=intType) :: il, jl, kl
 
-     ! This is the cell volume of the donor
-     real(kind=realType), dimension(:, :), pointer :: qualDonor
+        ! Cluster this block belongs to
+        integer(kind=intType) :: cluster
 
-     ! Connectivity for the ADT
-     integer(kind=intType), dimension(:, :), pointer :: hexaConn
+        ! This is the cell volume of the donor
+        real(kind=realType), dimension(:, :), pointer :: qualDonor
 
-     ! Coordinates for the ADT
-     real(kind=realType), dimension(:, :), pointer :: xADT
+        ! Connectivity for the ADT
+        integer(kind=intType), dimension(:, :), pointer :: hexaConn
 
-     ! Copy of global cell
-     integer(kind=intType), dimension(:, :, :), pointer :: globalCell
+        ! Coordinates for the ADT
+        real(kind=realType), dimension(:, :), pointer :: xADT
 
-     ! Whether or not a cell is "near" a wall
-     integer(kind=intType), dimension(:, :, :), pointer :: nearWall
+        ! Copy of global cell
+        integer(kind=intType), dimension(:, :, :), pointer :: globalCell
 
-     ! Whether or not a cell is not possible to be a donor. Ie. a forceRecv
-     integer(kind=intType), dimension(:, :, :), pointer :: invalidDonor
+        ! Whether or not a cell is "near" a wall
+        integer(kind=intType), dimension(:, :, :), pointer :: nearWall
 
-     ! Minimum volume for this block
-     real(kind=realType) :: minVol
+        ! Whether or not a cell is not possible to be a donor. Ie. a forceRecv
+        integer(kind=intType), dimension(:, :, :), pointer :: invalidDonor
 
-     ! The ADT for this block
-     type(adtType) :: ADT
+        ! Minimum volume for this block
+        real(kind=realType) :: minVol
 
-     ! The processor for this block
-     integer(kind=intType) :: proc
+        ! The ADT for this block
+        type(adtType) :: ADT
 
-     ! And the local block index
-     integer(kind=intType) :: block
+        ! The processor for this block
+        integer(kind=intType) :: proc
 
-     ! Buffer space for sending/receiving the block
-     real(kind=realType), dimension(:), allocatable :: rBuffer
-     integer(kind=intType), dimension(:), allocatable :: iBuffer
+        ! And the local block index
+        integer(kind=intType) :: block
 
-     ! Flag if this block got allocated
-     logical :: allocated = .False.
+        ! Buffer space for sending/receiving the block
+        real(kind=realType), dimension(:), allocatable :: rBuffer
+        integer(kind=intType), dimension(:), allocatable :: iBuffer
 
-     ! Flag if the real/int Buffers are ready after receiving info
-     logical :: realBufferReady = .False.
-     logical :: intBufferReady = .False.
+        ! Flag if this block got allocated
+        logical :: allocated = .False.
 
-  end type oversetBlock
+        ! Flag if the real/int Buffers are ready after receiving info
+        logical :: realBufferReady = .False.
+        logical :: intBufferReady = .False.
 
-  type oversetFringe
+    end type oversetBlock
 
-     ! The processor where this set of fringes came from
-     integer(kind=intType) :: proc
+    type oversetFringe
 
-     ! The block number of these fringes on processor 'proc'
-     integer(kind=intType) :: block
+        ! The processor where this set of fringes came from
+        integer(kind=intType) :: proc
 
-     ! Sizes
-     integer(kind=intType) :: il, jl, kl, nx, ny, nz
+        ! The block number of these fringes on processor 'proc'
+        integer(kind=intType) :: block
 
-     ! Cluster this set of fringes belongs to
-     integer(kind=intType) :: cluster
+        ! Sizes
+        integer(kind=intType) :: il, jl, kl, nx, ny, nz
 
-     ! Buffer space for sending/receiving the fringes
-     real(kind=realType), dimension(:), allocatable :: rBuffer
-     integer(kind=intType), dimension(:), allocatable :: iBuffer
+        ! Cluster this set of fringes belongs to
+        integer(kind=intType) :: cluster
 
-     ! These are the coordinates of the points we are searching for
-     real(kind=realType), dimension(:, :), allocatable :: x
+        ! Buffer space for sending/receiving the fringes
+        real(kind=realType), dimension(:), allocatable :: rBuffer
+        integer(kind=intType), dimension(:), allocatable :: iBuffer
 
-     ! These are the coordinate of its wall surface pt if applicable
-     real(kind=realType), dimension(:, :), allocatable :: xSeed
+        ! These are the coordinates of the points we are searching for
+        real(kind=realType), dimension(:, :), allocatable :: x
 
-     ! These are the indices of the wall surfaces
-     integer(kind=intType), dimension(:), allocatable :: wallInd
+        ! These are the coordinate of its wall surface pt if applicable
+        real(kind=realType), dimension(:, :), allocatable :: xSeed
 
-     ! Flag specifying if this cell is next to a wall or not. 1 for
-     ! next to wall, 0 otherwise.
-     integer(kind=intType), dimension(:), allocatable  :: isWall
+        ! These are the indices of the wall surfaces
+        integer(kind=intType), dimension(:), allocatable :: wallInd
 
-     ! This is where we will store all the potential donors that have
-     ! been found for this set of fringes
-     integer(kind=intType) :: nDonor=0
-     integer(kind=intType), dimension(:,:), pointer :: fringeIntBuffer=>null()
-     real(kind=realType), dimension(:,:), pointer :: fringeRealBuffer=>null()
+        ! Flag specifying if this cell is next to a wall or not. 1 for
+        ! next to wall, 0 otherwise.
+        integer(kind=intType), dimension(:), allocatable :: isWall
 
-     ! Flag if this set of fringes got allocated
-     logical :: allocated = .False.
+        ! This is where we will store all the potential donors that have
+        ! been found for this set of fringes
+        integer(kind=intType) :: nDonor = 0
+        integer(kind=intType), dimension(:, :), pointer :: fringeIntBuffer => null()
+        real(kind=realType), dimension(:, :), pointer :: fringeRealBuffer => null()
 
-     ! Flag if the real/int Buffers are ready after receiving info
-     logical :: realBufferReady = .False.
-     logical :: intBufferReady = .False.
+        ! Flag if this set of fringes got allocated
+        logical :: allocated = .False.
 
-     ! The number of actual fringes that need to communicated.
-     integer(kind=intType) :: fringeReturnSize = 0
+        ! Flag if the real/int Buffers are ready after receiving info
+        logical :: realBufferReady = .False.
+        logical :: intBufferReady = .False.
 
-  end type oversetFringe
+        ! The number of actual fringes that need to communicated.
+        integer(kind=intType) :: fringeReturnSize = 0
 
-  type oversetWall
+    end type oversetFringe
 
-     ! Sizes
-     integer(kind=intType) :: il, jl, kl
-     integer(kind=intType) :: nNodes=0
-     integer(kind=intType) :: nCells=0
-     integer(kind=intType) :: maxCells=0
-     integer(kind=intType) :: cluster=0
+    type oversetWall
 
-     ! Buffer space for sending/receiving the fringes
-     real(kind=realType), dimension(:), allocatable :: rBuffer
-     integer(kind=intType), dimension(:), allocatable :: iBuffer
+        ! Sizes
+        integer(kind=intType) :: il, jl, kl
+        integer(kind=intType) :: nNodes = 0
+        integer(kind=intType) :: nCells = 0
+        integer(kind=intType) :: maxCells = 0
+        integer(kind=intType) :: cluster = 0
 
-     ! Surface nodes used to build the tree:
-     real(kind=realType), dimension(:, :), pointer :: x => null()
+        ! Buffer space for sending/receiving the fringes
+        real(kind=realType), dimension(:), allocatable :: rBuffer
+        integer(kind=intType), dimension(:), allocatable :: iBuffer
 
-     ! Only primal mesh cell centers for dual mesh. Only allocated as
-     ! needed.
-     real(kind=realType), dimension(:, :),  pointer :: xPrimalCen => null()
+        ! Surface nodes used to build the tree:
+        real(kind=realType), dimension(:, :), pointer :: x => null()
 
-     ! Surface nonal used for determining if point is "underneath" the
-     ! surface.
-     real(kind=realType), dimension(:, :), pointer :: norm => null()
+        ! Only primal mesh cell centers for dual mesh. Only allocated as
+        ! needed.
+        real(kind=realType), dimension(:, :), pointer :: xPrimalCen => null()
 
-     ! Local estimate of surface error
-     real(kind=realType), dimension(:), pointer :: delta => null()
+        ! Surface nonal used for determining if point is "underneath" the
+        ! surface.
+        real(kind=realType), dimension(:, :), pointer :: norm => null()
 
-     ! Connectivity for the surface
-     integer(kind=intType), dimension(:, :), pointer:: conn => null()
+        ! Local estimate of surface error
+        real(kind=realType), dimension(:), pointer :: delta => null()
 
-     ! ind: Global node index for nodes
-     integer(kind=intType), dimension(:), pointer :: ind => null()
+        ! Connectivity for the surface
+        integer(kind=intType), dimension(:, :), pointer :: conn => null()
 
-     ! indPrimal: Global node index. Only temporarly used to store
-     ! index for strictly primal cells on dual mesh.
-     integer(kind=intType), dimension(:), pointer :: indPrimal => null()
+        ! ind: Global node index for nodes
+        integer(kind=intType), dimension(:), pointer :: ind => null()
 
-     ! indCell: Global cell index for wall cells
-     integer(kind=intType), dimension(:), pointer :: indCell
+        ! indPrimal: Global node index. Only temporarly used to store
+        ! index for strictly primal cells on dual mesh.
+        integer(kind=intType), dimension(:), pointer :: indPrimal => null()
 
-     ! Blanking values for Nodes
-     integer(kind=intType), dimension(:), allocatable :: iBlank
-     integer(kind=intType), dimension(:), allocatable :: cellPtr
+        ! indCell: Global cell index for wall cells
+        integer(kind=intType), dimension(:), pointer :: indCell
 
-     ! Node to element array
-     integer(kind=intType), dimension(:, :), allocatable :: nte
+        ! Blanking values for Nodes
+        integer(kind=intType), dimension(:), allocatable :: iBlank
+        integer(kind=intType), dimension(:), allocatable :: cellPtr
 
-     ! The ADT for this block's wall(s)
-     type(adtType) :: ADT
+        ! Node to element array
+        integer(kind=intType), dimension(:, :), allocatable :: nte
 
-     ! This KDTree for this block's wall
-     type(kdtree2), pointer :: tree
+        ! The ADT for this block's wall(s)
+        type(adtType) :: ADT
 
-     ! Flag if the real/int Buffers are ready after receiving info
-     logical :: realBufferReady = .False.
-     logical :: intBufferReady = .False.
+        ! This KDTree for this block's wall
+        type(kdtree2), pointer :: tree
 
-     ! Flag if this wall got allocated
-     logical :: allocated = .False.
+        ! Flag if the real/int Buffers are ready after receiving info
+        logical :: realBufferReady = .False.
+        logical :: intBufferReady = .False.
 
-  end type oversetWall
+        ! Flag if this wall got allocated
+        logical :: allocated = .False.
 
-  type oversetString
-     ! This is a generic type defining a string list. It may be used
-     ! as both a "parent" or a "child".
+    end type oversetWall
 
-     ! Sizes
-     integer(kind=intType) :: nNodes, nElems
+    type oversetString
+        ! This is a generic type defining a string list. It may be used
+        ! as both a "parent" or a "child".
 
-     ! My String's index
-     integer(kind=intType) :: myID
+        ! Sizes
+        integer(kind=intType) :: nNodes, nElems
 
-     ! Whether or no this string is periodic
-     logical :: isPeriodic=.False.
+        ! My String's index
+        integer(kind=intType) :: myID
 
-     ! Whether or no this string is a pocket
-     logical :: isPocket=.False.
+        ! Whether or no this string is periodic
+        logical :: isPeriodic = .False.
 
-     ! --------------------------------------------------------------------
-     ! Node Data: The actual physical node locations, unit surface normal,
-     ! perpNormal and mesh size. x is from index 1:3, normal from 4:6,
-     ! perpNormal form 7:9 and h is index 10. This pointer gets allocated.
-     real(kind=realType), dimension(:, :), pointer :: nodeData => null()
+        ! Whether or no this string is a pocket
+        logical :: isPocket = .False.
 
-     ! Pointer for physical node location. Points to nodeData
-     real(kind=realType), dimension(:, :), pointer :: x => null()
+        ! --------------------------------------------------------------------
+        ! Node Data: The actual physical node locations, unit surface normal,
+        ! perpNormal and mesh size. x is from index 1:3, normal from 4:6,
+        ! perpNormal form 7:9 and h is index 10. This pointer gets allocated.
+        real(kind=realType), dimension(:, :), pointer :: nodeData => null()
 
-     ! Pointer for nodal unit normal. Points to nodeData
-     real(kind=realType), dimension(:, :), pointer :: norm => null()
+        ! Pointer for physical node location. Points to nodeData
+        real(kind=realType), dimension(:, :), pointer :: x => null()
 
-     ! Pointer for nodal unit perpendicual in-plane normal. Points to nodeData
-     real(kind=realType), dimension(:, :), pointer :: perpNorm => null()
+        ! Pointer for nodal unit normal. Points to nodeData
+        real(kind=realType), dimension(:, :), pointer :: norm => null()
 
-     ! Pointer for nodal element size. Points to nodeData
-     real(kind=realType), dimension(:), pointer :: h => null()
+        ! Pointer for nodal unit perpendicual in-plane normal. Points to nodeData
+        real(kind=realType), dimension(:, :), pointer :: perpNorm => null()
 
-     ! --------------------------------------------------------------------
-     ! Integer Node Data: This stores the global node index (into
-     ! xVec), the cluster ID of the node, and the family ID of the node
-     integer(kind=intType), dimension(:, :), pointer :: intNodeData => null()
+        ! Pointer for nodal element size. Points to nodeData
+        real(kind=realType), dimension(:), pointer :: h => null()
 
-     ! The orignal nodal index. Size nNodes. Pointer into intNodeData
-     integer(kind=intType), dimension(:), pointer :: ind => null()
+        ! --------------------------------------------------------------------
+        ! Integer Node Data: This stores the global node index (into
+        ! xVec), the cluster ID of the node, and the family ID of the node
+        integer(kind=intType), dimension(:, :), pointer :: intNodeData => null()
 
-     ! The cluster the node came from. Pointer into intNodeData
-     integer(kind=intType), dimension(:), pointer :: cluster => null()
+        ! The orignal nodal index. Size nNodes. Pointer into intNodeData
+        integer(kind=intType), dimension(:), pointer :: ind => null()
 
-     ! The family the node came from. Pointer into intNodeData
-     integer(kind=intType), dimension(:), pointer :: family => null()
-     ! --------------------------------------------------------------------
+        ! The cluster the node came from. Pointer into intNodeData
+        integer(kind=intType), dimension(:), pointer :: cluster => null()
 
-     ! The connectivity of the nodes forming 1D bar elements. Size (2, nElems)
-     integer(kind=intType), dimension(:, :), pointer :: conn => null()
+        ! The family the node came from. Pointer into intNodeData
+        integer(kind=intType), dimension(:), pointer :: family => null()
+        ! --------------------------------------------------------------------
 
-     ! The index of my node numbers on my parent
-     integer(kind=intType), dimension(:), pointer :: pNodes => null()
+        ! The connectivity of the nodes forming 1D bar elements. Size (2, nElems)
+        integer(kind=intType), dimension(:, :), pointer :: conn => null()
 
-     ! The index of my elements numbers on my parent
-     integer(kind=intType), dimension(:), pointer :: pElems => null()
+        ! The index of my node numbers on my parent
+        integer(kind=intType), dimension(:), pointer :: pNodes => null()
 
-     ! The string ID and index of my nodes on a split substing
-     integer(kind=intType), dimension(:, :), pointer :: cNodes => null()
+        ! The index of my elements numbers on my parent
+        integer(kind=intType), dimension(:), pointer :: pElems => null()
 
-     ! The cloest string ID of each node *AND* the node index on the
-     ! other string. Size (2, nNodes)
-     integer(kind=intType), dimension(:, :), pointer :: otherID => null()
+        ! The string ID and index of my nodes on a split substing
+        integer(kind=intType), dimension(:, :), pointer :: cNodes => null()
 
-     ! The inverse of the connectivity node to elem array. Size (5,
-     ! nNodes). First index is the number of elements, other 4 entries
-     ! are the up to 4 possible element neighbours.
-     integer(kind=intType), dimension(:, :), pointer :: nte => null()
+        ! The cloest string ID of each node *AND* the node index on the
+        ! other string. Size (2, nNodes)
+        integer(kind=intType), dimension(:, :), pointer :: otherID => null()
 
-     ! Two buffer used for storing element indices while creating
-     ! chains. Size (2, nElem)
-     integer(kind=intType), dimension(:, :), pointer :: subStr => null()
+        ! The inverse of the connectivity node to elem array. Size (5,
+        ! nNodes). First index is the number of elements, other 4 entries
+        ! are the up to 4 possible element neighbours.
+        integer(kind=intType), dimension(:, :), pointer :: nte => null()
 
-     ! The sizes of the two substrings
-     integer(kind=intType), dimension(2) :: NsubStr
+        ! Two buffer used for storing element indices while creating
+        ! chains. Size (2, nElem)
+        integer(kind=intType), dimension(:, :), pointer :: subStr => null()
 
-     ! A array to keep track of the number of elements
-     ! "consumed" during chain searches or during zipping.
-     integer(kind=intType), dimension(:), pointer :: elemUsed => null()
+        ! The sizes of the two substrings
+        integer(kind=intType), dimension(2) :: NsubStr
 
-     ! Array to keep track of nodes used to contruct string pairs for
-     ! crossZipping.
-     integer(kind=intType), dimension(:), pointer :: XzipNodeUsed => null()
+        ! A array to keep track of the number of elements
+        ! "consumed" during chain searches or during zipping.
+        integer(kind=intType), dimension(:), pointer :: elemUsed => null()
 
-     ! The KD tree for this string for performing fast seaches.
-     !type(tree_master_record), pointer :: tree
-     type(kdtree2), pointer :: tree => null()
+        ! Array to keep track of nodes used to contruct string pairs for
+        ! crossZipping.
+        integer(kind=intType), dimension(:), pointer :: XzipNodeUsed => null()
 
-     ! Pointer to the parent string
-     type(oversetString), pointer :: p => null()
+        ! The KD tree for this string for performing fast seaches.
+        !type(tree_master_record), pointer :: tree
+        type(kdtree2), pointer :: tree => null()
 
-     ! Pointer for next string for a linked list
-     type(oversetString), pointer :: next => null()
+        ! Pointer to the parent string
+        type(oversetString), pointer :: p => null()
 
-     ! List of all all directed edges.
-     type(oversetEdge), pointer, dimension(:) :: edges => null()
+        ! Pointer for next string for a linked list
+        type(oversetString), pointer :: next => null()
 
-     ! nEdges: The number of new edges added due to triangles.
-     integer(kind=intTYpe) :: nEdges
+        ! List of all all directed edges.
+        type(oversetEdge), pointer, dimension(:) :: edges => null()
 
-     ! List of all computed triangles
-     integer(kind=intType), dimension(:, :), pointer :: tris => null()
+        ! nEdges: The number of new edges added due to triangles.
+        integer(kind=intTYpe) :: nEdges
 
-     ! Number of trianges
-     integer(kind=intType) :: nTris
+        ! List of all computed triangles
+        integer(kind=intType), dimension(:, :), pointer :: tris => null()
 
-     ! surfCellID(1:nTris)
-     ! Global cellID of the primal cell containing the triangle centroid
-     integer(kind=intType), dimension(:), pointer :: surfCellID
+        ! Number of trianges
+        integer(kind=intType) :: nTris
 
+        ! surfCellID(1:nTris)
+        ! Global cellID of the primal cell containing the triangle centroid
+        integer(kind=intType), dimension(:), pointer :: surfCellID
 
-  end type oversetString
+    end type oversetString
 
-  type oversetEdge
-     ! Simple data structure representing a directed edge from n1->n2
-     integer(kind=intType) :: n1, n2
-  end type oversetEdge
+    type oversetEdge
+        ! Simple data structure representing a directed edge from n1->n2
+        integer(kind=intType) :: n1, n2
+    end type oversetEdge
 
-  interface operator(<=)
-     module procedure lessEqualEdgeType
-  end interface operator(<=)
+    interface operator(<=)
+        module procedure lessEqualEdgeType
+    end interface operator(<=)
 
-  interface operator(<)
-     module procedure lessEdgeType
-  end interface operator(<)
+    interface operator(<)
+        module procedure lessEdgeType
+    end interface operator(<)
 
-  type pocketEdge
-     ! Simple data structure representing a directed edge from n1->n2
-     ! Similar to oversetEdge, but introduced to do different type
-     ! of sort on edges
-     integer(kind=intType) :: n1, n2
-  end type pocketEdge
+    type pocketEdge
+        ! Simple data structure representing a directed edge from n1->n2
+        ! Similar to oversetEdge, but introduced to do different type
+        ! of sort on edges
+        integer(kind=intType) :: n1, n2
+    end type pocketEdge
 
-  interface operator(<=)
-     module procedure lessEqualPocketEdgeN2
-  end interface operator(<=)
+    interface operator(<=)
+        module procedure lessEqualPocketEdgeN2
+    end interface operator(<=)
 
-  interface operator(<)
-     module procedure lessPocketEdgeN2
-  end interface operator(<)
+    interface operator(<)
+        module procedure lessPocketEdgeN2
+    end interface operator(<)
 
-  interface operator(==)
-     module procedure EqualPocketEdgeN2
-  end interface operator(==)
+    interface operator(==)
+        module procedure EqualPocketEdgeN2
+    end interface operator(==)
 
-  type zipperMesh
+    type zipperMesh
 
-     ! Data required for zipper mesh surface integer for a particular BCGroup
-     integer(kind=intType), dimension(:, :), allocatable :: conn
-     integer(kind=intType), dimension(:), allocatable :: fam, indices
-     logical :: allocated=.False.
+        ! Data required for zipper mesh surface integer for a particular BCGroup
+        integer(kind=intType), dimension(:, :), allocatable :: conn
+        integer(kind=intType), dimension(:), allocatable :: fam, indices
+        logical :: allocated = .False.
 #ifndef USE_TAPENADE
-     VecScatter :: scatter
-     Vec :: localVal
+        VecScatter :: scatter
+        Vec :: localVal
 #endif
-  end type zipperMesh
-
-  ! This is the flattened list of the fringes next to the wall that we
-  !  have actually found donors for.
-  ! tmpFringePtr is only used if we need to realloc.
-  type(fringeType), dimension(:), pointer :: localWallFringes, wallFringes, tmpFringePtr
-  integer(kind=intType) :: nLocalWallFringe, nWallFringe
-
-  ! These are the master overlap matrices
-  type(CSRMatrix), dimension(:, :), allocatable, target :: overlapMatrix
-
-  ! Some additional helper stuff
-  integer(kind=intType), dimension(:), allocatable :: nDomProc, cumDomProc
-  integer(kind=intType) :: nDomTotal
-  integer(kind=intType) :: nClusters
-  integer(kind=intType), dimension(:), allocatable :: clusters
-  real(kind=realType), dimension(:), allocatable :: clusterAreas
-  real(kind=realType), dimension(:), allocatable :: clusterMarchDist
-  type(oversetWall), dimension(:), allocatable, target :: clusterWalls
-
-  ! Flag specifying if overset is present in mesh
-  logical :: oversetPresent
-
-  ! Zipper meshes
-  type(zipperMesh), dimension(nFamExchange), target :: zipperMeshes
-
-  ! Static arrays for doing timings
-  real(kind=realType), dimension(iTotal) :: tStart
-  real(kind=realType), dimension(iTotal) :: oversetTimes
-
-  contains
-  ! ==============================
-  ! Operator overloading functions
-  ! ==============================
-  logical function lessEqualEdgeType(e1, e2)
-    !
-    !   lessEqualEdgeType returns .True. if e1<=e2 and .False. otherwise.
-    !   Compared on the directed edge node indices n1 and n2.
-    !   First compare wrt averaged node indices data. If same averaged
-    !   node data, then compare wrt increasing or decreasing node indices.
-
-    implicit none
-
-    ! Input
-    type(oversetEdge), intent(in) :: e1, e2
-
-    ! Local variables
-    integer(kind=intType) :: nsum1, nsum2, ndiff1, ndiff2
-
-    ! Compare the averaged (or just sum of) node indices values.
-    ! Positive sign for increasing order of node indices.
-    nsum1 = e1%n1 + e1%n2
-    nsum2 = e2%n1 + e2%n2
-    ndiff1 = e1%n2 - e1%n1
-    ndiff2 = e2%n2 - e2%n1
-
-    ! Compare based on averaged node indices values
-    if (abs(nsum1) < abs(nsum2)) then
-       lessEqualEdgeType = .True.
-       return
-    else if (abs(nsum1) > abs(nsum2)) then
-       lessEqualEdgeType = .False.
-       return
-    end if
-
-    if (abs(nsum1) /= abs(nsum2)) &
-       STOP ' *** Error in lessEqualEdgeType ***'
-
-    ! Compare based on edge nodes difference
-    if (abs(ndiff1) < abs(ndiff2)) then
-       lessEqualEdgeType = .True.
-       return
-    else if (abs(ndiff1) > abs(ndiff2)) then
-       lessEqualEdgeType = .False.
-       return
-    end if
-
-    ! here abs(ndiff1) == abs(ndiff2) and
-    ! abs(nsum1)== abs(nsum2), so same edge
-    if (ndiff1 < ndiff2) then
-       lessEqualEdgeType = .True.
-       return
-    else if (ndiff1 > ndiff2) then
-       lessEqualEdgeType = .False.
-       return
-    end if
-
-    ! here ndiff1 == ndiff2, hence .True.
-
-    lessEqualEdgeType = .True.
-
-  end function lessEqualEdgeType
-
-  ! ---------------------------------
-  logical function lessEdgeType(e1, e2)
-    !
-    !   lessEdgeType returns .True. if e1<e2 and .False. otherwise.
-    !   Compared on the directed edge node indices n1 and n2.
-    !   First compare wrt averaged node indices data. If same averaged
-    !   node data, then compare wrt increasing or decreasing node indices.
-
-    implicit none
-
-    ! Input
-    type(oversetEdge), intent(in) :: e1, e2
-
-    ! Local variables
-    integer(kind=intType) :: nsum1, nsum2, ndiff1, ndiff2
-
-    ! Compare the averaged (or just sum of) node indices values.
-    ! Positive sign for increasing order of node indices.
-    nsum1 = e1%n1 + e1%n2
-    nsum2 = e2%n1 + e2%n2
-    ndiff1 = e1%n2 - e1%n1
-    ndiff2 = e2%n2 - e2%n1
-
-    ! Compare based on averaged node indices values
-    if (abs(nsum1) < abs(nsum2)) then
-       lessEdgeType = .True.
-       return
-    else if (abs(nsum1) > abs(nsum2)) then
-       lessEdgeType = .False.
-       return
-    end if
-
-    if (abs(nsum1) /= abs(nsum2)) &
-       STOP ' *** Error in lessEdgeType ***'
-
-    ! Compare based on edge nodes difference
-    if (abs(ndiff1) < abs(ndiff2)) then
-       lessEdgeType = .True.
-       return
-    else if (abs(ndiff1) > abs(ndiff2)) then
-       lessEdgeType = .False.
-       return
-    end if
-
-    ! here abs(ndiff1) == abs(ndiff2) and
-    ! abs(nsum1)== abs(nsum2), so same edge
-    if (ndiff1 < ndiff2) then
-       lessEdgeType = .True.
-       return
-    else if (ndiff1 > ndiff2) then
-       lessEdgeType = .False.
-       return
-    end if
-
-    ! here ndiff1 == ndiff2, hence .False.
-
-    lessEdgeType = .False.
-
-  end function lessEdgeType
-
-
-  logical function lessEqualPocketEdgeN2(e1, e2)
-    !
-    !   lessEqualPocketEdgeN2 returns .True. if e1%n2<=e2%n2 and .False.
-    !   otherwise. Compared on the directed edge node indices n1 and n2.
-
-    implicit none
-
-    ! Input
-    type(pocketEdge), intent(in) :: e1, e2
-
-    if (e1%n2 < e2%n2) then
-       lessEqualPocketEdgeN2 = .True.
-       return
-    else if (e1%n2 > e2%n2) then
-       lessEqualPocketEdgen2 = .False.
-       return
-    end if
-
-    ! Here e1%n2==e2%n2, so edges are equal, hence .True.
-    lessEqualPocketEdgeN2 = .True.
-
-  end function lessEqualPocketEdgeN2
-
-  logical function lessPocketEdgeN2(e1, e2)
-    !
-    !   lessPocketEdgeN2 returns .True. if e1%N2<e2%N2 and .False.
-    !   otherwise. Compared on the directed edge node indices n1 and n2.
-
-    implicit none
-
-    ! Input
-    type(pocketEdge), intent(in) :: e1, e2
-
-    if (e1%N2 < e2%N2) then
-       lessPocketEdgeN2 = .True.
-       return
-    else if (e1%N2 > e2%N2) then
-       lessPocketEdgeN2 = .False.
-       return
-    end if
-
-    ! Here e1%N2==e2%n2, so edges are equal, hence .False.
-    lessPocketEdgeN2 = .False.
-
-  end function lessPocketEdgeN2
-
-  logical function EqualPocketEdgeN2(e1, e2)
-    !
-    !   EqualPocketEdgeN2 returns .True. if e1%N2==e2%N2 and .False.
-    !   otherwise. Compared on the directed edge node indices n1 and n2.
-
-    implicit none
-
-    ! Input
-    type(pocketEdge), intent(in) :: e1, e2
-
-    if (e1%N2 == e2%N2) then
-       EqualPocketEdgeN2 = .True.
-       return
-    else
-       EqualPocketEdgeN2 = .False.
-       return
-    end if
-
-  end function EqualPocketEdgeN2
+    end type zipperMesh
+
+    ! This is the flattened list of the fringes next to the wall that we
+    !  have actually found donors for.
+    ! tmpFringePtr is only used if we need to realloc.
+    type(fringeType), dimension(:), pointer :: localWallFringes, wallFringes, tmpFringePtr
+    integer(kind=intType) :: nLocalWallFringe, nWallFringe
+
+    ! These are the master overlap matrices
+    type(CSRMatrix), dimension(:, :), allocatable, target :: overlapMatrix
+
+    ! Some additional helper stuff
+    integer(kind=intType), dimension(:), allocatable :: nDomProc, cumDomProc
+    integer(kind=intType) :: nDomTotal
+    integer(kind=intType) :: nClusters
+    integer(kind=intType), dimension(:), allocatable :: clusters
+    real(kind=realType), dimension(:), allocatable :: clusterAreas
+    real(kind=realType), dimension(:), allocatable :: clusterMarchDist
+    type(oversetWall), dimension(:), allocatable, target :: clusterWalls
+
+    ! Flag specifying if overset is present in mesh
+    logical :: oversetPresent
+
+    ! Zipper meshes
+    type(zipperMesh), dimension(nFamExchange), target :: zipperMeshes
+
+    ! Static arrays for doing timings
+    real(kind=realType), dimension(iTotal) :: tStart
+    real(kind=realType), dimension(iTotal) :: oversetTimes
+
+contains
+    ! ==============================
+    ! Operator overloading functions
+    ! ==============================
+    logical function lessEqualEdgeType(e1, e2)
+        !
+        !   lessEqualEdgeType returns .True. if e1<=e2 and .False. otherwise.
+        !   Compared on the directed edge node indices n1 and n2.
+        !   First compare wrt averaged node indices data. If same averaged
+        !   node data, then compare wrt increasing or decreasing node indices.
+
+        implicit none
+
+        ! Input
+        type(oversetEdge), intent(in) :: e1, e2
+
+        ! Local variables
+        integer(kind=intType) :: nsum1, nsum2, ndiff1, ndiff2
+
+        ! Compare the averaged (or just sum of) node indices values.
+        ! Positive sign for increasing order of node indices.
+        nsum1 = e1%n1 + e1%n2
+        nsum2 = e2%n1 + e2%n2
+        ndiff1 = e1%n2 - e1%n1
+        ndiff2 = e2%n2 - e2%n1
+
+        ! Compare based on averaged node indices values
+        if (abs(nsum1) < abs(nsum2)) then
+            lessEqualEdgeType = .True.
+            return
+        else if (abs(nsum1) > abs(nsum2)) then
+            lessEqualEdgeType = .False.
+            return
+        end if
+
+        if (abs(nsum1) /= abs(nsum2)) &
+            STOP ' *** Error in lessEqualEdgeType ***'
+
+        ! Compare based on edge nodes difference
+        if (abs(ndiff1) < abs(ndiff2)) then
+            lessEqualEdgeType = .True.
+            return
+        else if (abs(ndiff1) > abs(ndiff2)) then
+            lessEqualEdgeType = .False.
+            return
+        end if
+
+        ! here abs(ndiff1) == abs(ndiff2) and
+        ! abs(nsum1)== abs(nsum2), so same edge
+        if (ndiff1 < ndiff2) then
+            lessEqualEdgeType = .True.
+            return
+        else if (ndiff1 > ndiff2) then
+            lessEqualEdgeType = .False.
+            return
+        end if
+
+        ! here ndiff1 == ndiff2, hence .True.
+
+        lessEqualEdgeType = .True.
+
+    end function lessEqualEdgeType
+
+    ! ---------------------------------
+    logical function lessEdgeType(e1, e2)
+        !
+        !   lessEdgeType returns .True. if e1<e2 and .False. otherwise.
+        !   Compared on the directed edge node indices n1 and n2.
+        !   First compare wrt averaged node indices data. If same averaged
+        !   node data, then compare wrt increasing or decreasing node indices.
+
+        implicit none
+
+        ! Input
+        type(oversetEdge), intent(in) :: e1, e2
+
+        ! Local variables
+        integer(kind=intType) :: nsum1, nsum2, ndiff1, ndiff2
+
+        ! Compare the averaged (or just sum of) node indices values.
+        ! Positive sign for increasing order of node indices.
+        nsum1 = e1%n1 + e1%n2
+        nsum2 = e2%n1 + e2%n2
+        ndiff1 = e1%n2 - e1%n1
+        ndiff2 = e2%n2 - e2%n1
+
+        ! Compare based on averaged node indices values
+        if (abs(nsum1) < abs(nsum2)) then
+            lessEdgeType = .True.
+            return
+        else if (abs(nsum1) > abs(nsum2)) then
+            lessEdgeType = .False.
+            return
+        end if
+
+        if (abs(nsum1) /= abs(nsum2)) &
+            STOP ' *** Error in lessEdgeType ***'
+
+        ! Compare based on edge nodes difference
+        if (abs(ndiff1) < abs(ndiff2)) then
+            lessEdgeType = .True.
+            return
+        else if (abs(ndiff1) > abs(ndiff2)) then
+            lessEdgeType = .False.
+            return
+        end if
+
+        ! here abs(ndiff1) == abs(ndiff2) and
+        ! abs(nsum1)== abs(nsum2), so same edge
+        if (ndiff1 < ndiff2) then
+            lessEdgeType = .True.
+            return
+        else if (ndiff1 > ndiff2) then
+            lessEdgeType = .False.
+            return
+        end if
+
+        ! here ndiff1 == ndiff2, hence .False.
+
+        lessEdgeType = .False.
+
+    end function lessEdgeType
+
+    logical function lessEqualPocketEdgeN2(e1, e2)
+        !
+        !   lessEqualPocketEdgeN2 returns .True. if e1%n2<=e2%n2 and .False.
+        !   otherwise. Compared on the directed edge node indices n1 and n2.
+
+        implicit none
+
+        ! Input
+        type(pocketEdge), intent(in) :: e1, e2
+
+        if (e1%n2 < e2%n2) then
+            lessEqualPocketEdgeN2 = .True.
+            return
+        else if (e1%n2 > e2%n2) then
+            lessEqualPocketEdgen2 = .False.
+            return
+        end if
+
+        ! Here e1%n2==e2%n2, so edges are equal, hence .True.
+        lessEqualPocketEdgeN2 = .True.
+
+    end function lessEqualPocketEdgeN2
+
+    logical function lessPocketEdgeN2(e1, e2)
+        !
+        !   lessPocketEdgeN2 returns .True. if e1%N2<e2%N2 and .False.
+        !   otherwise. Compared on the directed edge node indices n1 and n2.
+
+        implicit none
+
+        ! Input
+        type(pocketEdge), intent(in) :: e1, e2
+
+        if (e1%N2 < e2%N2) then
+            lessPocketEdgeN2 = .True.
+            return
+        else if (e1%N2 > e2%N2) then
+            lessPocketEdgeN2 = .False.
+            return
+        end if
+
+        ! Here e1%N2==e2%n2, so edges are equal, hence .False.
+        lessPocketEdgeN2 = .False.
+
+    end function lessPocketEdgeN2
+
+    logical function EqualPocketEdgeN2(e1, e2)
+        !
+        !   EqualPocketEdgeN2 returns .True. if e1%N2==e2%N2 and .False.
+        !   otherwise. Compared on the directed edge node indices n1 and n2.
+
+        implicit none
+
+        ! Input
+        type(pocketEdge), intent(in) :: e1, e2
+
+        if (e1%N2 == e2%N2) then
+            EqualPocketEdgeN2 = .True.
+            return
+        else
+            EqualPocketEdgeN2 = .False.
+            return
+        end if
+
+    end function EqualPocketEdgeN2
 end module oversetData
diff --git a/src/modules/precision.F90 b/src/modules/precision.F90
index bb392fc37..1e79fc9fa 100644
--- a/src/modules/precision.F90
+++ b/src/modules/precision.F90
@@ -1,128 +1,128 @@
 module precision
-  !
-  !       Definition of the kinds used for the integer and real types.
-  !       Due to MPI, it is a bit messy to use the compiler options -r8
-  !       and -r4 and therefore the kind construction is used here,
-  !       where the precision is set using compiler flags of -d type.
-  !       This is the only file of the code that should be changed when
-  !       a user wants single precision instead of double precision. All
-  !       other routines use the definitions in this file whenever
-  !       possible. If other definitions are used, there is a good
-  !       reason to do so, e.g. when calling the cgns or MPI functions.
-  !       The actual types used are determined by compiler flags like
-  !       -DUSE_LONG_INT and -DUSE_SINGLE_PRECISION. If these are
-  !       omitted the default integer and double precision are used.
-  !
-  !
-  use mpi
-  implicit none
-
-  save
-
-  !
-  !       Definition of the integer type used in the entire code. There
-  !       might be a more elegant solution to do this, but be sure that
-  !       compatability with MPI must be guaranteed. Note that dummyInt
-  !       is a private variable, only used for the definition of the
-  !       integer type. Note furthermore that the parameters defining
-  !       the MPI types are integers. This is because of the definition
-  !       in MPI.
-  !
+    !
+    !       Definition of the kinds used for the integer and real types.
+    !       Due to MPI, it is a bit messy to use the compiler options -r8
+    !       and -r4 and therefore the kind construction is used here,
+    !       where the precision is set using compiler flags of -d type.
+    !       This is the only file of the code that should be changed when
+    !       a user wants single precision instead of double precision. All
+    !       other routines use the definitions in this file whenever
+    !       possible. If other definitions are used, there is a good
+    !       reason to do so, e.g. when calling the cgns or MPI functions.
+    !       The actual types used are determined by compiler flags like
+    !       -DUSE_LONG_INT and -DUSE_SINGLE_PRECISION. If these are
+    !       omitted the default integer and double precision are used.
+    !
+    !
+    use mpi
+    implicit none
+
+    save
+
+    !
+    !       Definition of the integer type used in the entire code. There
+    !       might be a more elegant solution to do this, but be sure that
+    !       compatability with MPI must be guaranteed. Note that dummyInt
+    !       is a private variable, only used for the definition of the
+    !       integer type. Note furthermore that the parameters defining
+    !       the MPI types are integers. This is because of the definition
+    !       in MPI.
+    !
 
 #ifdef USE_LONG_INT
 
-  ! Long, i.e. 8 byte, integers are used as default integers
+    ! Long, i.e. 8 byte, integers are used as default integers
 
-  integer(kind=8), private :: dummyInt
-  integer, parameter       :: adflow_integer  = mpi_integer8
-  integer, parameter       :: sizeOfInteger = 8
+    integer(kind=8), private :: dummyInt
+    integer, parameter :: adflow_integer = mpi_integer8
+    integer, parameter :: sizeOfInteger = 8
 #else
 
-  ! Standard 4 byte integer types are used as default integers.
+    ! Standard 4 byte integer types are used as default integers.
 
-  integer(kind=4), private :: dummyInt
-  integer, parameter       :: adflow_integer  = mpi_integer4
-  integer, parameter       :: sizeOfInteger = 4
+    integer(kind=4), private :: dummyInt
+    integer, parameter :: adflow_integer = mpi_integer4
+    integer, parameter :: sizeOfInteger = 4
 #endif
 
-  !
-  !       Definition of the float type used in the entire code. The
-  !       remarks mentioned before the integer type definition also
-  !       apply here.
-  !
+    !
+    !       Definition of the float type used in the entire code. The
+    !       remarks mentioned before the integer type definition also
+    !       apply here.
+    !
 
 #ifdef USE_SINGLE_PRECISION
 
-  ! Single precision reals are used as default real types.
+    ! Single precision reals are used as default real types.
 
-  real(kind=4), private :: dummyReal
-  integer, parameter    :: adflow_real  = mpi_real4
-  integer, parameter    :: sizeOfReal = 4
-  real(kind=4), private :: dummyCGNSReal
+    real(kind=4), private :: dummyReal
+    integer, parameter :: adflow_real = mpi_real4
+    integer, parameter :: sizeOfReal = 4
+    real(kind=4), private :: dummyCGNSReal
 
 #elif USE_QUADRUPLE_PRECISION
 
-  ! Quadrupole precision reals are used as default real types.
-  ! This may not be supported on all platforms.
-  ! As cgns does not support quadrupole precision, double
-  ! precision is used instead.
+    ! Quadrupole precision reals are used as default real types.
+    ! This may not be supported on all platforms.
+    ! As cgns does not support quadrupole precision, double
+    ! precision is used instead.
 
-  real(kind=16), private :: dummyReal
-  integer, parameter     :: adflow_real  = mpi_real16
-  integer, parameter     :: sizeOfReal = 16
-  real(kind=8), private :: dummyCGNSReal
+    real(kind=16), private :: dummyReal
+    integer, parameter :: adflow_real = mpi_real16
+    integer, parameter :: sizeOfReal = 16
+    real(kind=8), private :: dummyCGNSReal
 
 #else
 
-  ! Double precision reals are used as default real types.
+    ! Double precision reals are used as default real types.
 
-  real(kind=8), private :: dummyReal
-  integer, parameter    :: adflow_real   = mpi_real8
-  integer, parameter    :: sizeOfReal = 8
-  real(kind=8), private :: dummyCGNSReal
+    real(kind=8), private :: dummyReal
+    integer, parameter :: adflow_real = mpi_real8
+    integer, parameter :: sizeOfReal = 8
+    real(kind=8), private :: dummyCGNSReal
 
 #endif
 
-  ! Dummy single and double types
-  real(kind=4) :: dummySingle
-  real(kind=8) :: dummyDouble
-
-  !
-  !       Definition of the porosity type. As this is only a flag to
-  !       indicate whether or not fluxes must be computed, an integer1
-  !       is perfectly okay.
-  !
-  integer(kind=1), private :: dummyPor
-
-  !      Definition of the integer type for the element types. As only
-  !      a limited number element types are present, a 1 byte integer
-  !      is enough.
-  !
-  integer(kind=1), private :: adtDummyElementInt
-
-  !       Definition of the cgns periodic type.
-  !
-  real(kind=4), private :: dummyCGNSPer
-  !
-  !       Definition of the kind parameters for the integer and real
-  !       types.
-  !
-  integer, parameter :: intType      = kind(dummyInt)
-  integer, parameter :: porType      = kind(dummyPor)
-  integer, parameter :: realType     = kind(dummyReal)
-  integer, parameter :: adtElementType = kind(adtDummyElementInt)
-  integer, parameter :: cgnsRealType = kind(dummyCGNSReal)
-  integer, parameter :: cgnsPerType  = kind(dummyCGNSPer)
-  integer, parameter :: alwaysRealType = kind(dummyReal)
-  integer, parameter :: singleType   = kind(dummySingle)
-  integer, parameter :: doubleType   = kind(dummyDouble)
-  !
-  !       Set the parameter debug, depending on the compiler option.
-  !
+    ! Dummy single and double types
+    real(kind=4) :: dummySingle
+    real(kind=8) :: dummyDouble
+
+    !
+    !       Definition of the porosity type. As this is only a flag to
+    !       indicate whether or not fluxes must be computed, an integer1
+    !       is perfectly okay.
+    !
+    integer(kind=1), private :: dummyPor
+
+    !      Definition of the integer type for the element types. As only
+    !      a limited number element types are present, a 1 byte integer
+    !      is enough.
+    !
+    integer(kind=1), private :: adtDummyElementInt
+
+    !       Definition of the cgns periodic type.
+    !
+    real(kind=4), private :: dummyCGNSPer
+    !
+    !       Definition of the kind parameters for the integer and real
+    !       types.
+    !
+    integer, parameter :: intType = kind(dummyInt)
+    integer, parameter :: porType = kind(dummyPor)
+    integer, parameter :: realType = kind(dummyReal)
+    integer, parameter :: adtElementType = kind(adtDummyElementInt)
+    integer, parameter :: cgnsRealType = kind(dummyCGNSReal)
+    integer, parameter :: cgnsPerType = kind(dummyCGNSPer)
+    integer, parameter :: alwaysRealType = kind(dummyReal)
+    integer, parameter :: singleType = kind(dummySingle)
+    integer, parameter :: doubleType = kind(dummyDouble)
+    !
+    !       Set the parameter debug, depending on the compiler option.
+    !
 #ifdef DEBUG_MODE
-  logical, parameter :: debug = .true.
+    logical, parameter :: debug = .true.
 #else
-  logical, parameter :: debug = .false.
+    logical, parameter :: debug = .false.
 #endif
 
 end module precision
diff --git a/src/modules/precision_tapenade.f90 b/src/modules/precision_tapenade.f90
index 241137b1b..33ccc0e16 100644
--- a/src/modules/precision_tapenade.f90
+++ b/src/modules/precision_tapenade.f90
@@ -1,12 +1,11 @@
 
-       module precision
+module precision
 
-       implicit none
-       save
+    implicit none
+    save
 
+    integer, parameter :: intType = 4
+    integer, parameter :: porType = 1
+    integer, parameter :: realType = 8
 
-       integer, parameter :: intType      = 4
-       integer, parameter :: porType      = 1
-       integer, parameter :: realType     = 8
-
-       end module precision
+end module precision
diff --git a/src/modules/section.f90 b/src/modules/section.f90
index 7535cd9e0..c00435f06 100644
--- a/src/modules/section.f90
+++ b/src/modules/section.f90
@@ -1,48 +1,48 @@
-       module section
+module section
 !
 !       This module contains the definition of the derived data type
 !       sectionType, which stores the information of a section of the
 !       grid. It also contains the array to store the info of all the
 !       sections.
 !
-       use constants, only : intType, realType
-       implicit none
-       save
+    use constants, only: intType, realType
+    implicit none
+    save
 !
 !       The definition of the derived data type sectionType.
 !
-       type sectionType
+    type sectionType
 
-         ! periodic:       Whether or not the section is periodic.
-         ! rotating:       Whether or not the section is rotating.
-         ! nSlices:        Number of periodic slices to obtain
-         !                 a full rotation.
-         ! timePeriod:     The physical time of one period.
-         ! rotCenter(3):   Cartesian coordinates of the rotation center.
-         ! rotMatrix(3,3): Rotation matrix of the periodic
-         !                 transformation.
-         ! translation(3): Translation vector of the periodic
-         !                 transformation.
-         ! rotAxis(3):     The rotation axis.
-         ! rotRate(3):     The rotation rate of the section.
+        ! periodic:       Whether or not the section is periodic.
+        ! rotating:       Whether or not the section is rotating.
+        ! nSlices:        Number of periodic slices to obtain
+        !                 a full rotation.
+        ! timePeriod:     The physical time of one period.
+        ! rotCenter(3):   Cartesian coordinates of the rotation center.
+        ! rotMatrix(3,3): Rotation matrix of the periodic
+        !                 transformation.
+        ! translation(3): Translation vector of the periodic
+        !                 transformation.
+        ! rotAxis(3):     The rotation axis.
+        ! rotRate(3):     The rotation rate of the section.
 
-         logical :: periodic, rotating
+        logical :: periodic, rotating
 
-         integer(kind=intType) :: nSlices
+        integer(kind=intType) :: nSlices
 
-         real(kind=realType) :: timePeriod
+        real(kind=realType) :: timePeriod
 
-         real(kind=realType), dimension(3)   :: rotCenter, translation
-         real(kind=realType), dimension(3)   :: rotAxis, rotRate
-         real(kind=realType), dimension(3,3) :: rotMatrix
+        real(kind=realType), dimension(3) :: rotCenter, translation
+        real(kind=realType), dimension(3) :: rotAxis, rotRate
+        real(kind=realType), dimension(3, 3) :: rotMatrix
 
-       end type sectionType
+    end type sectionType
 
-       ! nSections:           Number of different sections in the grid.
-       ! sections(nSections): The info of the corresponding sections.
+    ! nSections:           Number of different sections in the grid.
+    ! sections(nSections): The info of the corresponding sections.
 
-       integer(kind=intType) :: nSections
+    integer(kind=intType) :: nSections
 
-       type(sectionType), dimension(:), allocatable :: sections
+    type(sectionType), dimension(:), allocatable :: sections
 
-       end module section
+end module section
diff --git a/src/modules/stencils.f90 b/src/modules/stencils.f90
index 2a1bc9ddc..504547255 100644
--- a/src/modules/stencils.f90
+++ b/src/modules/stencils.f90
@@ -1,107 +1,107 @@
 module stencils
 
-  !       stencils defines indices for several types of stencils. These
-  !       are useful for setting blocks in dRdw and dRdwPre depending
-  !       on the type of equations  being solved
+    !       stencils defines indices for several types of stencils. These
+    !       are useful for setting blocks in dRdw and dRdwPre depending
+    !       on the type of equations  being solved
 
-  use constants, only : intType, realType
-  implicit none
+    use constants, only: intType, realType
+    implicit none
 
-  ! Euler stencils
-  integer(kind=intType), parameter :: N_euler_pc   = 7
-  integer(kind=intType), parameter :: N_euler_drdw = 13
+    ! Euler stencils
+    integer(kind=intType), parameter :: N_euler_pc = 7
+    integer(kind=intType), parameter :: N_euler_drdw = 13
 
-  integer(kind=intType), dimension(7 ,3), target :: euler_pc_stencil
-  integer(kind=intType), dimension(13,3), target :: euler_drdw_stencil
+    integer(kind=intType), dimension(7, 3), target :: euler_pc_stencil
+    integer(kind=intType), dimension(13, 3), target :: euler_drdw_stencil
 
-  ! Viscous stencils
-  integer(kind=intType), parameter :: N_visc_pc   = 27
-  integer(kind=intType), parameter :: N_visc_drdw = 33
+    ! Viscous stencils
+    integer(kind=intType), parameter :: N_visc_pc = 27
+    integer(kind=intType), parameter :: N_visc_drdw = 33
 
-  integer(kind=intType), dimension(27,3), target :: visc_pc_stencil
-  integer(kind=intType), dimension(33,3), target :: visc_drdw_stencil
+    integer(kind=intType), dimension(27, 3), target :: visc_pc_stencil
+    integer(kind=intType), dimension(33, 3), target :: visc_drdw_stencil
 
 end module stencils
 
 subroutine initialize_stencils
 
-  use stencils
-
-  implicit none
-  integer(kind=intType) :: i,j,k,ii
-
-  ! -------- Euler PC Stencil ---------
-  euler_pc_stencil(1,:) = (/ 0, 0, 0 /)
-  euler_pc_stencil(2,:) = (/-1, 0, 0 /)
-  euler_pc_stencil(3,:) = (/ 1, 0, 0 /)
-  euler_pc_stencil(4,:) = (/ 0,-1, 0 /)
-  euler_pc_stencil(5,:) = (/ 0, 1, 0 /)
-  euler_pc_stencil(6,:) = (/ 0, 0,-1 /)
-  euler_pc_stencil(7,:) = (/ 0, 0, 1 /)
-
- ! ---------- Euler drdw Stencil ---------
-  euler_drdw_stencil(1 ,:) = (/ 0, 0, 0 /)
-  euler_drdw_stencil(2 ,:) = (/-2, 0, 0 /)
-  euler_drdw_stencil(3 ,:) = (/-1, 0, 0 /)
-  euler_drdw_stencil(4 ,:) = (/ 1, 0, 0 /)
-  euler_drdw_stencil(5 ,:) = (/ 2, 0, 0 /)
-  euler_drdw_stencil(6 ,:) = (/ 0,-2, 0 /)
-  euler_drdw_stencil(7 ,:) = (/ 0,-1, 0 /)
-  euler_drdw_stencil(8 ,:) = (/ 0, 1, 0 /)
-  euler_drdw_stencil(9 ,:) = (/ 0, 2, 0 /)
-  euler_drdw_stencil(10,:) = (/ 0, 0,-2 /)
-  euler_drdw_stencil(11,:) = (/ 0, 0,-1 /)
-  euler_drdw_stencil(12,:) = (/ 0, 0, 1 /)
-  euler_drdw_stencil(13,:) = (/ 0, 0, 2 /)
-
-  ! ---------- Visc PC Stencil -------
-
-  ! Set the first 7 to the euler pc stencil
-  visc_pc_stencil(1:7,:) = euler_pc_stencil
-
-  ! And now for the remaining 20
-  visc_pc_stencil(8, :) =   (/-1, -1, -1/)
-  visc_pc_stencil(9, :) =   (/ 0, -1, -1/)
-  visc_pc_stencil(10, :) =  (/ 1, -1, -1/)
-  visc_pc_stencil(11, :) =  (/-1,  0, -1/)
-  visc_pc_stencil(12, :) =  (/ 1,  0, -1/)
-  visc_pc_stencil(13, :) =  (/-1,  1, -1/)
-  visc_pc_stencil(14, :) =  (/ 0,  1, -1/)
-  visc_pc_stencil(15, :) =  (/ 1,  1, -1/)
-
-  visc_pc_stencil(16, :) =  (/-1, -1,  0/)
-  visc_pc_stencil(17, :) =  (/ 1, -1,  0/)
-  visc_pc_stencil(18, :) =  (/-1,  1,  0/)
-  visc_pc_stencil(19, :) =  (/ 1,  1,  0/)
-
-  visc_pc_stencil(20, :) =  (/-1, -1,  1/)
-  visc_pc_stencil(21, :) =  (/ 0, -1,  1/)
-  visc_pc_stencil(22, :) =  (/ 1, -1,  1/)
-  visc_pc_stencil(23, :) =  (/-1,  0,  1/)
-  visc_pc_stencil(24, :) =  (/ 1,  0,  1/)
-  visc_pc_stencil(25, :) =  (/-1,  1,  1/)
-  visc_pc_stencil(26, :) =  (/ 0,  1,  1/)
-  visc_pc_stencil(27, :) =  (/ 1,  1,  1/)
-
-  ! ------------ Visc dRdw Stencil -------------
-  ! Dense 3x3x3 cube
-  ii = 1
-  do k=-1,1
-     do j=-1,1
-        do i=-1,1
-           visc_drdw_stencil(ii, :) =  (/i, j, k/)
-           ii = ii + 1
+    use stencils
+
+    implicit none
+    integer(kind=intType) :: i, j, k, ii
+
+    ! -------- Euler PC Stencil ---------
+    euler_pc_stencil(1, :) = (/0, 0, 0/)
+    euler_pc_stencil(2, :) = (/-1, 0, 0/)
+    euler_pc_stencil(3, :) = (/1, 0, 0/)
+    euler_pc_stencil(4, :) = (/0, -1, 0/)
+    euler_pc_stencil(5, :) = (/0, 1, 0/)
+    euler_pc_stencil(6, :) = (/0, 0, -1/)
+    euler_pc_stencil(7, :) = (/0, 0, 1/)
+
+    ! ---------- Euler drdw Stencil ---------
+    euler_drdw_stencil(1, :) = (/0, 0, 0/)
+    euler_drdw_stencil(2, :) = (/-2, 0, 0/)
+    euler_drdw_stencil(3, :) = (/-1, 0, 0/)
+    euler_drdw_stencil(4, :) = (/1, 0, 0/)
+    euler_drdw_stencil(5, :) = (/2, 0, 0/)
+    euler_drdw_stencil(6, :) = (/0, -2, 0/)
+    euler_drdw_stencil(7, :) = (/0, -1, 0/)
+    euler_drdw_stencil(8, :) = (/0, 1, 0/)
+    euler_drdw_stencil(9, :) = (/0, 2, 0/)
+    euler_drdw_stencil(10, :) = (/0, 0, -2/)
+    euler_drdw_stencil(11, :) = (/0, 0, -1/)
+    euler_drdw_stencil(12, :) = (/0, 0, 1/)
+    euler_drdw_stencil(13, :) = (/0, 0, 2/)
+
+    ! ---------- Visc PC Stencil -------
+
+    ! Set the first 7 to the euler pc stencil
+    visc_pc_stencil(1:7, :) = euler_pc_stencil
+
+    ! And now for the remaining 20
+    visc_pc_stencil(8, :) = (/-1, -1, -1/)
+    visc_pc_stencil(9, :) = (/0, -1, -1/)
+    visc_pc_stencil(10, :) = (/1, -1, -1/)
+    visc_pc_stencil(11, :) = (/-1, 0, -1/)
+    visc_pc_stencil(12, :) = (/1, 0, -1/)
+    visc_pc_stencil(13, :) = (/-1, 1, -1/)
+    visc_pc_stencil(14, :) = (/0, 1, -1/)
+    visc_pc_stencil(15, :) = (/1, 1, -1/)
+
+    visc_pc_stencil(16, :) = (/-1, -1, 0/)
+    visc_pc_stencil(17, :) = (/1, -1, 0/)
+    visc_pc_stencil(18, :) = (/-1, 1, 0/)
+    visc_pc_stencil(19, :) = (/1, 1, 0/)
+
+    visc_pc_stencil(20, :) = (/-1, -1, 1/)
+    visc_pc_stencil(21, :) = (/0, -1, 1/)
+    visc_pc_stencil(22, :) = (/1, -1, 1/)
+    visc_pc_stencil(23, :) = (/-1, 0, 1/)
+    visc_pc_stencil(24, :) = (/1, 0, 1/)
+    visc_pc_stencil(25, :) = (/-1, 1, 1/)
+    visc_pc_stencil(26, :) = (/0, 1, 1/)
+    visc_pc_stencil(27, :) = (/1, 1, 1/)
+
+    ! ------------ Visc dRdw Stencil -------------
+    ! Dense 3x3x3 cube
+    ii = 1
+    do k = -1, 1
+        do j = -1, 1
+            do i = -1, 1
+                visc_drdw_stencil(ii, :) = (/i, j, k/)
+                ii = ii + 1
+            end do
         end do
-     end do
-  end do
-
-  ! Plus the 6 double halos
-  visc_drdw_stencil(28, :) = (/-2, 0, 0 /)
-  visc_drdw_stencil(29, :) = (/ 2, 0, 0 /)
-  visc_drdw_stencil(30, :) = (/ 0,-2, 0 /)
-  visc_drdw_stencil(31, :) = (/ 0, 2, 0 /)
-  visc_drdw_stencil(32, :) = (/ 0, 0,-2 /)
-  visc_drdw_stencil(33, :) = (/ 0, 0, 2 /)
+    end do
+
+    ! Plus the 6 double halos
+    visc_drdw_stencil(28, :) = (/-2, 0, 0/)
+    visc_drdw_stencil(29, :) = (/2, 0, 0/)
+    visc_drdw_stencil(30, :) = (/0, -2, 0/)
+    visc_drdw_stencil(31, :) = (/0, 2, 0/)
+    visc_drdw_stencil(32, :) = (/0, 0, -2/)
+    visc_drdw_stencil(33, :) = (/0, 0, 2/)
 
 end subroutine initialize_stencils
 
diff --git a/src/modules/su_cgns.F90 b/src/modules/su_cgns.F90
index a18d0ada5..d4ec8a3e7 100644
--- a/src/modules/su_cgns.F90
+++ b/src/modules/su_cgns.F90
@@ -1,4 +1,4 @@
-       module su_cgns
+module su_cgns
 !
 !       Module that contains the definition of the cgns parameters.
 !       Depending on the compiler flags either the cgns module is
@@ -14,38 +14,37 @@ module su_cgns
 !      ******************************************************************
 !
 
-       implicit none
-       save
+    implicit none
+    save
 
-       integer, parameter :: Null = 0
-       integer, parameter :: UserDefined = 1
+    integer, parameter :: Null = 0
+    integer, parameter :: UserDefined = 1
 
-       integer, parameter :: Kilogram  = 2
-       integer, parameter :: Gram      = 3
-       integer, parameter :: Slug      = 4
-       integer, parameter :: PoundMass = 5
+    integer, parameter :: Kilogram = 2
+    integer, parameter :: Gram = 3
+    integer, parameter :: Slug = 4
+    integer, parameter :: PoundMass = 5
 
-       integer, parameter :: Meter      = 2
-       integer, parameter :: Centimeter = 3
-       integer, parameter :: Millimeter = 4
-       integer, parameter :: Foot       = 5
-       integer, parameter :: Inch       = 6
-       integer, parameter :: Second = 2
+    integer, parameter :: Meter = 2
+    integer, parameter :: Centimeter = 3
+    integer, parameter :: Millimeter = 4
+    integer, parameter :: Foot = 5
+    integer, parameter :: Inch = 6
+    integer, parameter :: Second = 2
 
-       integer, parameter :: Kelvin     = 2
-       integer, parameter :: Celcius    = 3
-       integer, parameter :: Celsius    = 3
-       integer, parameter :: Rankine    = 4
-       integer, parameter :: Fahrenheit = 5
+    integer, parameter :: Kelvin = 2
+    integer, parameter :: Celcius = 3
+    integer, parameter :: Celsius = 3
+    integer, parameter :: Rankine = 4
+    integer, parameter :: Fahrenheit = 5
 
-       integer, parameter :: Degree = 2
-       integer, parameter :: Radian = 3
+    integer, parameter :: Degree = 2
+    integer, parameter :: Radian = 3
 #else
 
-       use cgns
-       implicit none
+    use cgns
+    implicit none
 
 #endif
 
-
-       end module su_cgns
+end module su_cgns
diff --git a/src/modules/surfaceFamilies.F90 b/src/modules/surfaceFamilies.F90
index a1f51115a..4554dc865 100644
--- a/src/modules/surfaceFamilies.F90
+++ b/src/modules/surfaceFamilies.F90
@@ -1,106 +1,104 @@
 module surfaceFamilies
 
-  use constants
+    use constants
 #ifndef USE_TAPENADE
 #include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
+    use petsc
+    implicit none
 
+    type familyExchange
+        ! Vectors for global traction calc
 
-  type familyExchange
-     ! Vectors for global traction calc
+        ! Parallel vector of un-uniqufied values concatenated for all
+        ! included surfaces
+        Vec nodeValLocal
 
-     ! Parallel vector of un-uniqufied values concatenated for all
-     ! included surfaces
-     Vec nodeValLocal
+        ! Parallel vector of uniqueifed values.
+        Vec nodeValGlobal
 
-     ! Parallel vector of uniqueifed values.
-     Vec nodeValGlobal
+        ! Sum global. Same size as nodeValGlobal
+        Vec sumGlobal
 
-     ! Sum global. Same size as nodeValGlobal
-     Vec sumGlobal
+        ! Scatter from nodeValLocal to NodeValGlobal
+        VecScatter scatter
 
-     ! Scatter from nodeValLocal to NodeValGlobal
-     VecScatter scatter
+        ! Flag for allocated petsc variables
+        logical :: allocated = .False.
+        integer(kind=intType), dimension(:), allocatable :: famList
 
-     ! Flag for allocated petsc variables
-     logical :: allocated =.False.
-     integer(kind=intType), dimension(:) , allocatable :: famList
+        integer(Kind=intType) :: sps
+        integer(kind=intType) :: nNodes
+    end type familyExchange
 
-     integer(Kind=intType) :: sps
-     integer(kind=intType) :: nNodes
-  end type familyExchange
+    type BCGroupType
+        integer(kind=intType), pointer, dimension(:) :: famList
+    end type BCGroupType
 
-  type BCGroupType
-     integer(kind=intType), pointer, dimension(:) :: famList
-  end type BCGroupType
+    ! Generic PETSc scatters
+    IS IS1, IS2
 
-  ! Generic PETSc scatters
-  IS IS1, IS2
+    ! The list of exchanges based on boundary condition type
+    type(familyExchange), dimension(:, :), allocatable, target :: BCFamExchange
 
-  ! The list of exchanges based on boundary condition type
-  type(familyExchange), dimension(:, :), allocatable, target :: BCFamExchange
+    ! List of familis grouped by BC. See constants.F90 for the indices
+    ! to use for this array.
+    type(BCGroupType), dimension(nFamExchange) :: BCFamGroups
 
-  ! List of familis grouped by BC. See constants.F90 for the indices
-  ! to use for this array.
-  type(BCGroupType), dimension(nFamExchange) :: BCFamGroups
+    ! The full list of the family names
+    character(len=maxCGNSNameLen), dimension(:), allocatable :: famNames
 
-  ! The full list of the family names
-  character(len=maxCGNSNameLen), dimension(:), allocatable :: famNames
-
-  ! List of all families. This is just 1,2,3,4...nFam. It is just used
-  ! in fortran when a specific family is not required.
-  integer(kind=intType), dimension(:), allocatable :: fullFamList
+    ! List of all families. This is just 1,2,3,4...nFam. It is just used
+    ! in fortran when a specific family is not required.
+    integer(kind=intType), dimension(:), allocatable :: fullFamList
 #endif
 
-  ! Special BC array's that are sometime required for reducitons.
-  real(kind=realType), dimension(:, :), allocatable, target :: zeroCellVal
-  real(kind=realType), dimension(:, :), allocatable, target :: oneCellVal
-  real(kind=realType), dimension(:, :), allocatable, target :: zeroNodeVal
-
+    ! Special BC array's that are sometime required for reducitons.
+    real(kind=realType), dimension(:, :), allocatable, target :: zeroCellVal
+    real(kind=realType), dimension(:, :), allocatable, target :: oneCellVal
+    real(kind=realType), dimension(:, :), allocatable, target :: zeroNodeVal
 
 #ifndef USE_TAPENADE
-  contains
+contains
 
     subroutine getnfam(nfam)
 
-      implicit none
-      integer(kind=inttype), intent(out) :: nfam
-      if (allocated(famnames)) then
-         nfam = size(famnames)
-      else
-         nfam = 0
-      end if
+        implicit none
+        integer(kind=inttype), intent(out) :: nfam
+        if (allocated(famnames)) then
+            nfam = size(famnames)
+        else
+            nfam = 0
+        end if
 
     end subroutine getnfam
 
     subroutine getfam(i, fam)
-      implicit none
-      character(len=maxCGNSNameLen), intent(out) :: fam
-      integer(kind=intType), intent(in) :: i
+        implicit none
+        character(len=maxCGNSNameLen), intent(out) :: fam
+        integer(kind=intType), intent(in) :: i
 
-      if (allocated(famnames)) then
-         fam = famnames(i)
-      end if
+        if (allocated(famnames)) then
+            fam = famnames(i)
+        end if
 
     end subroutine getfam
 
     subroutine destroyFamilyExchange(exch)
 
-      use constants
-      type(familyExchange) :: exch
-      integer(kind=intType) :: ierr
-      if (exch%allocated) then
+        use constants
+        type(familyExchange) :: exch
+        integer(kind=intType) :: ierr
+        if (exch%allocated) then
 
-           call vecDestroy(exch%nodeValLocal, ierr)
-           call vecDestroy(exch%nodeValGlobal, ierr)
-           call vecDestroy(exch%sumGlobal, ierr)
-           call vecScatterDestroy(exch%scatter, ierr)
+            call vecDestroy(exch%nodeValLocal, ierr)
+            call vecDestroy(exch%nodeValGlobal, ierr)
+            call vecDestroy(exch%sumGlobal, ierr)
+            call vecScatterDestroy(exch%scatter, ierr)
 
         end if
 
         exch%allocated = .False.
 
-      end subroutine destroyFamilyExchange
+    end subroutine destroyFamilyExchange
 #endif
 end module surfaceFamilies
diff --git a/src/modules/userSurfaceIntegrationData.F90 b/src/modules/userSurfaceIntegrationData.F90
index 45f935333..7f544109a 100644
--- a/src/modules/userSurfaceIntegrationData.F90
+++ b/src/modules/userSurfaceIntegrationData.F90
@@ -1,43 +1,43 @@
 module userSurfaceIntegrationData
-  use constants
+    use constants
 
-  type userSurfCommType
-     ! Data required on each proc:
+    type userSurfCommType
+        ! Data required on each proc:
 
-     ! nDonor: The number of donor points the proc will provide
-     ! frac (3, nDonor) : The uvw coordinates of the interpolation point
-     ! donorInfo(4, nDonor) : Donor information. 1 is the local block ID and 2-4 is the
-     !    starting i,j,k indices for the interpolation.
-     ! procSizes(0:nProc-1) : The number of donors on each proc
-     ! procDisps(0:nProc) : Cumulative form of procSizes
+        ! nDonor: The number of donor points the proc will provide
+        ! frac (3, nDonor) : The uvw coordinates of the interpolation point
+        ! donorInfo(4, nDonor) : Donor information. 1 is the local block ID and 2-4 is the
+        !    starting i,j,k indices for the interpolation.
+        ! procSizes(0:nProc-1) : The number of donors on each proc
+        ! procDisps(0:nProc) : Cumulative form of procSizes
 
-     ! inv(nConn) : Array allocated only on root processor used to
-     ! reorder the nodes or elements back to the original order.
+        ! inv(nConn) : Array allocated only on root processor used to
+        ! reorder the nodes or elements back to the original order.
 
-     integer(kind=intType) :: nDonor
-     real(kind=realType), dimension(:,:), allocatable :: frac
-     integer(kind=intType), dimension(:, :), allocatable :: donorInfo
-     integer(kind=intTYpe), dimension(:), allocatable :: procSizes, procDisps
-     integer(kind=intTYpe), dimension(:), allocatable :: inv
-     logical, dimension(:), allocatable :: valid
+        integer(kind=intType) :: nDonor
+        real(kind=realType), dimension(:, :), allocatable :: frac
+        integer(kind=intType), dimension(:, :), allocatable :: donorInfo
+        integer(kind=intTYpe), dimension(:), allocatable :: procSizes, procDisps
+        integer(kind=intTYpe), dimension(:), allocatable :: inv
+        logical, dimension(:), allocatable :: valid
 
-  end type userSurfCommType
+    end type userSurfCommType
 
-  type userIntSurf
+    type userIntSurf
 
-     character(len=maxStringLen) :: famName
-     integer(Kind=intType) :: famID
-     logical :: isInflow
-     real(kind=realType), dimension(:, :), allocatable :: pts
-     integer(kind=intType), dimension(:, :), allocatable :: conn
+        character(len=maxStringLen) :: famName
+        integer(Kind=intType) :: famID
+        logical :: isInflow
+        real(kind=realType), dimension(:, :), allocatable :: pts
+        integer(kind=intType), dimension(:, :), allocatable :: conn
 
-     ! Two separate commes: One for the nodes (based on the primal
-     ! mesh) and one for the variables (based on the dual mesh)
-     type(userSurfCommType) :: nodeComm, flowComm
+        ! Two separate commes: One for the nodes (based on the primal
+        ! mesh) and one for the variables (based on the dual mesh)
+        type(userSurfCommType) :: nodeComm, flowComm
 
-  end type userIntSurf
+    end type userIntSurf
 
-  integer(kind=intType), parameter :: nUserIntSurfsMax=360
-  type(userIntSurf), dimension(nUserIntSurfsMax), target :: userIntSurfs
-  integer(kind=intTYpe) :: nUserIntSurfs=0
+    integer(kind=intType), parameter :: nUserIntSurfsMax = 360
+    type(userIntSurf), dimension(nUserIntSurfsMax), target :: userIntSurfs
+    integer(kind=intTYpe) :: nUserIntSurfs = 0
 end module userSurfaceIntegrationData
diff --git a/src/output/tecplotIO.F90 b/src/output/tecplotIO.F90
index c645eee0d..081df6df6 100644
--- a/src/output/tecplotIO.F90
+++ b/src/output/tecplotIO.F90
@@ -1,2308 +1,2305 @@
 module tecplotIO
 
-  use constants, only : realType, intType, maxStringLen, maxCGNSNameLen
-  implicit none
-  save
-
-  character(len=maxStringLen) :: sci6 = "(ES14.6)"
-
-  type slice
-
-     ! nNodes : Number of nodes for this slice
-     ! ind(2, nNodes) : Indices of the nodes in the global node list on either
-     !                  side of node
-     ! w(2, nNodes) : Weights used to multiply the two global nodes defined in
-     !                ind to get compute nodal values (positions, forces etc)
-     ! pL, vL, pD, vD, pM, vM : Pressure and viscous components of lift, drag, and moment
-     ! CLp, CLv, CDp, CDv, CMp, CMv : Coefficients of pressure and viscous lift, drag, and moment
-     ! chord: chord of section
-     ! pt, normal: The point and the normal that defines the slicing plane
-     ! dir_vec: a direction vector that we use to filter sliced line elements.
-     ! use_dir: flag to determine if we use the dir vec or not. if we are doing
-     !          a regular slice, e.g. wing section, we dont want to use the dir
-     !          and want to use the full slice. if we are doing a cylindrical slice
-     !          we then want to pick which direction since we would get 2 slices
-     !          on something like a nacelle in this case.
-
-     character(len=maxStringLen) :: sliceName
-     integer(kind=intType) :: sps
-     integer(kind=intType), dimension(:,:), allocatable :: ind, conn
-     real(kind=realType), dimension(:, :), allocatable :: w, vars
-     integer(kind=intType) :: nNodes
-     real(kind=realType) :: pL, vL, pD, vD, pM, vM, CLp, CLv, CDp, CDv, CMp, CMv
-     real(kind=realType) :: chord, twist, thickness
-     real(kind=realType), dimension(3) :: le, te
-     real(kind=realType), dimension(3) :: pt, normal, dir_vec
-     logical :: use_dir
-     integer(kind=intType), allocatable, dimension(:) :: famList
-     ! here we declare that 'slice'-types also now have fx,fy and fz:
-     real(kind=realType) :: fx, fy, fz
-  end type slice
-
-  type liftDist
-     ! nSegments: Number of nodes to use for distribution
-     ! normal: Slice direction (normal of the plane)
-     ! normal_ind: Index of direction..1 for x, 2 for y, 3 for z
-     ! distName: Name of lift distribution
-     ! slices: The list of slices this distribution will use
-     ! delta: The current delta spacing for the distribution
-     ! slicePoints: The list of points where the slices are taken
-     character(len=maxStringLen) :: distName
-     integer(kind=intType) :: nSegments, normal_ind
-     integer(kind=intType), dimension(:), allocatable :: famList
-     real(kind=realType) :: normal(3)
-     real(kind=realType) :: delta
-     real(kind=realType), dimension(:,:), allocatable :: slicePts
-  end type liftDist
-
-  logical :: liftDistInitialized = .False.
-  integer(kind=intType) :: msCon1(16, 5), msCon2(4, 2)
-
-   ! Data for the user supplied slices:
-  integer(kind=intType), parameter :: nSliceMax=1000
-  integer(kind=intType) :: nParaSlices=0
-  integer(kind=intType) :: nAbsSlices=0
-  type(slice), dimension(:, :), allocatable :: paraSlices, absSlices
-
-  ! Data for the user supplied lift distributions
-  integer(kind=intType), parameter :: nLiftDistMax=100
-  integer(kind=intType) :: nLiftDists=0
-  type(liftDist), dimension(nLiftDistMax), target :: liftDists
-
-  ! Tecplot Variable names of the data in the lift distribution data file:
-  character(len=maxCGNSNameLen), dimension(:), allocatable :: liftDistName
-  integer(kind=intType), parameter :: nLiftDistVar=26
-
-contains
-  subroutine addParaSlice(sliceName, pt, normal, dir_vec, use_dir, famList, n)
-    !
-    !       This subroutine is intended to be called from python.
-    !       This routine will add a parametric slice to the list of user
-    !       supplied slices.
-    use constants
-    use communication
-    use surfaceFamilies
-    use surfaceUtils
-    use inputTimeSpectral
+    use constants, only: realType, intType, maxStringLen, maxCGNSNameLen
     implicit none
+    save
+
+    character(len=maxStringLen) :: sci6 = "(ES14.6)"
+
+    type slice
+
+        ! nNodes : Number of nodes for this slice
+        ! ind(2, nNodes) : Indices of the nodes in the global node list on either
+        !                  side of node
+        ! w(2, nNodes) : Weights used to multiply the two global nodes defined in
+        !                ind to get compute nodal values (positions, forces etc)
+        ! pL, vL, pD, vD, pM, vM : Pressure and viscous components of lift, drag, and moment
+        ! CLp, CLv, CDp, CDv, CMp, CMv : Coefficients of pressure and viscous lift, drag, and moment
+        ! chord: chord of section
+        ! pt, normal: The point and the normal that defines the slicing plane
+        ! dir_vec: a direction vector that we use to filter sliced line elements.
+        ! use_dir: flag to determine if we use the dir vec or not. if we are doing
+        !          a regular slice, e.g. wing section, we dont want to use the dir
+        !          and want to use the full slice. if we are doing a cylindrical slice
+        !          we then want to pick which direction since we would get 2 slices
+        !          on something like a nacelle in this case.
+
+        character(len=maxStringLen) :: sliceName
+        integer(kind=intType) :: sps
+        integer(kind=intType), dimension(:, :), allocatable :: ind, conn
+        real(kind=realType), dimension(:, :), allocatable :: w, vars
+        integer(kind=intType) :: nNodes
+        real(kind=realType) :: pL, vL, pD, vD, pM, vM, CLp, CLv, CDp, CDv, CMp, CMv
+        real(kind=realType) :: chord, twist, thickness
+        real(kind=realType), dimension(3) :: le, te
+        real(kind=realType), dimension(3) :: pt, normal, dir_vec
+        logical :: use_dir
+        integer(kind=intType), allocatable, dimension(:) :: famList
+        ! here we declare that 'slice'-types also now have fx,fy and fz:
+        real(kind=realType) :: fx, fy, fz
+    end type slice
+
+    type liftDist
+        ! nSegments: Number of nodes to use for distribution
+        ! normal: Slice direction (normal of the plane)
+        ! normal_ind: Index of direction..1 for x, 2 for y, 3 for z
+        ! distName: Name of lift distribution
+        ! slices: The list of slices this distribution will use
+        ! delta: The current delta spacing for the distribution
+        ! slicePoints: The list of points where the slices are taken
+        character(len=maxStringLen) :: distName
+        integer(kind=intType) :: nSegments, normal_ind
+        integer(kind=intType), dimension(:), allocatable :: famList
+        real(kind=realType) :: normal(3)
+        real(kind=realType) :: delta
+        real(kind=realType), dimension(:, :), allocatable :: slicePts
+    end type liftDist
+
+    logical :: liftDistInitialized = .False.
+    integer(kind=intType) :: msCon1(16, 5), msCon2(4, 2)
+
+    ! Data for the user supplied slices:
+    integer(kind=intType), parameter :: nSliceMax = 1000
+    integer(kind=intType) :: nParaSlices = 0
+    integer(kind=intType) :: nAbsSlices = 0
+    type(slice), dimension(:, :), allocatable :: paraSlices, absSlices
+
+    ! Data for the user supplied lift distributions
+    integer(kind=intType), parameter :: nLiftDistMax = 100
+    integer(kind=intType) :: nLiftDists = 0
+    type(liftDist), dimension(nLiftDistMax), target :: liftDists
+
+    ! Tecplot Variable names of the data in the lift distribution data file:
+    character(len=maxCGNSNameLen), dimension(:), allocatable :: liftDistName
+    integer(kind=intType), parameter :: nLiftDistVar = 26
 
-    ! Input parameters
-    character(len=*), intent(in) :: sliceName
-    real(kind=realType), dimension(3), intent(in) :: pt, normal, dir_vec
-    logical, intent(in) :: use_dir
-    integer(kind=intType), intent(in) :: n, famList(n)
-
-    ! Working
-    integer(kind=intType) :: sps, sizeNode, sizeCell
-    integer(kind=intType), dimension(:), pointer :: wallList
-    real(kind=realType), dimension(:, :), allocatable :: pts
-    integer(kind=intType), dimension(:, :), allocatable :: conn
-    integer(kind=intType), dimension(:), allocatable :: elemFam, cgnsBlockID
-
-    if (.not. allocated(paraSlices)) then
-       allocate(paraSlices(nSliceMax, nTimeIntervalsSpectral))
-    end if
-
-    ! We have to add a slice for each spectral instance.
-    do sps=1, nTimeIntervalsSpectral
-       nParaSlices = nParaSlices + 1
-
-       if (nParaSlices > nSliceMax) then
-          print *,'Error: Exceeded the maximum number of slices. Increase nSliceMax'
-          stop
-       end if
-
-       ! Slices are created on walls and walls only. Retrieve the
-       ! points, connectivity and familyID of all the walls.
-       wallList =>  BCFamGroups(iBCGroupWalls)%famList
-       call getSurfaceSize(sizeNode, sizeCell, wallList, size(wallList), .True.)
-       allocate(pts(3, sizeNode), conn(4, sizeCell), elemFam(sizeCell), cgnsBlockID(sizeCell))
-       call getSurfaceConnectivity(conn, cgnsBlockID, sizeCell, wallList, size(wallList), .True.)
-       call getSurfacePoints(pts, sizeNode, sps, wallList, size(wallList), .True.)
-       call getSurfaceFamily(elemFam, sizeCell, wallList, size(wallList), .True.)
-
-       ! Create actual slice
-       call createSlice(pts, conn, elemFam, paraSlices(nParaSlices, sps), pt, normal, dir_vec, &
-            use_dir, sliceName, famList)
-
-       ! Clean up memory.
-       deallocate(pts, conn, elemFam)
-    end do
-
-  end subroutine addParaSlice
-
-  subroutine addAbsSlice(sliceName, pt, normal, dir_vec, use_dir, famList, n)
-    !
-    !       This subroutine is intended to be called from python.
-    !       This routine will add an absolute slice to the list of user
-    !       supplied slices.
-    use constants
-    use communication
-    use surfaceFamilies
-    use surfaceUtils
-    use inputTimeSpectral
-    implicit none
+contains
+    subroutine addParaSlice(sliceName, pt, normal, dir_vec, use_dir, famList, n)
+        !
+        !       This subroutine is intended to be called from python.
+        !       This routine will add a parametric slice to the list of user
+        !       supplied slices.
+        use constants
+        use communication
+        use surfaceFamilies
+        use surfaceUtils
+        use inputTimeSpectral
+        implicit none
 
-    ! Input parameters
-    character(len=*), intent(in) :: sliceName
-    real(kind=realType), dimension(3), intent(in) :: pt, normal, dir_vec
-    logical, intent(in) :: use_dir
-    integer(kind=intType), intent(in) :: n, famList(n)
-
-    ! Working
-    integer(kind=intType) :: sps, sizeNode, sizeCell
-    integer(kind=intType), dimension(:), pointer :: wallList
-    real(kind=realType), dimension(:, :), allocatable :: pts
-    integer(kind=intType), dimension(:, :), allocatable :: conn
-    integer(kind=intType), dimension(:), allocatable :: elemFam, cgnsBlockID
-
-    if (.not. allocated(absSlices)) then
-       allocate(absSlices(nSliceMax, nTimeIntervalsSpectral))
-    end if
-
-    do sps=1, nTimeIntervalsSpectral
-       nAbsSlices = nAbsSlices + 1
-
-       if (nAbsSlices > nSliceMax) then
-          print *,'Error: Exceeded the maximum number of slices. Increase nSliceMax'
-          stop
-       end if
-
-       wallList =>  BCFamGroups(iBCGroupWalls)%famList
-       call getSurfaceSize(sizeNode, sizeCell, wallList, size(wallList), .True.)
-       allocate(pts(3, sizeNode), conn(4, sizeCell), elemFam(sizeCell), cgnsBlockID(sizeCell))
-       call getSurfaceConnectivity(conn, cgnsBlockID, sizeCell, wallList, size(wallList), .True.)
-       call getSurfacePoints(pts, sizeNode, sps, wallList, size(wallList), .True.)
-       call getSurfaceFamily(elemFam, sizeCell, wallList, size(wallList), .True.)
-       call createSlice(pts, conn, elemFam, absSlices(nAbsSlices, sps), pt, normal, dir_vec, &
-            use_dir, sliceName, famList)
-
-       ! Clean up memory.
-       deallocate(pts, conn, elemFam)
-    end do
-
-  end subroutine addAbsSlice
-
-  subroutine addLiftDistribution(nSegments, normal, normal_ind, distName, famList, n)
-    !
-    !       This subroutine is intended to be called from python.
-    !       This routine will add the description of a lift distribution
-
-    use constants
-    use communication
-    use surfaceFamilies
-    implicit none
+        ! Input parameters
+        character(len=*), intent(in) :: sliceName
+        real(kind=realType), dimension(3), intent(in) :: pt, normal, dir_vec
+        logical, intent(in) :: use_dir
+        integer(kind=intType), intent(in) :: n, famList(n)
+
+        ! Working
+        integer(kind=intType) :: sps, sizeNode, sizeCell
+        integer(kind=intType), dimension(:), pointer :: wallList
+        real(kind=realType), dimension(:, :), allocatable :: pts
+        integer(kind=intType), dimension(:, :), allocatable :: conn
+        integer(kind=intType), dimension(:), allocatable :: elemFam, cgnsBlockID
+
+        if (.not. allocated(paraSlices)) then
+            allocate (paraSlices(nSliceMax, nTimeIntervalsSpectral))
+        end if
+
+        ! We have to add a slice for each spectral instance.
+        do sps = 1, nTimeIntervalsSpectral
+            nParaSlices = nParaSlices + 1
+
+            if (nParaSlices > nSliceMax) then
+                print *, 'Error: Exceeded the maximum number of slices. Increase nSliceMax'
+                stop
+            end if
+
+            ! Slices are created on walls and walls only. Retrieve the
+            ! points, connectivity and familyID of all the walls.
+            wallList => BCFamGroups(iBCGroupWalls)%famList
+            call getSurfaceSize(sizeNode, sizeCell, wallList, size(wallList), .True.)
+            allocate (pts(3, sizeNode), conn(4, sizeCell), elemFam(sizeCell), cgnsBlockID(sizeCell))
+            call getSurfaceConnectivity(conn, cgnsBlockID, sizeCell, wallList, size(wallList), .True.)
+            call getSurfacePoints(pts, sizeNode, sps, wallList, size(wallList), .True.)
+            call getSurfaceFamily(elemFam, sizeCell, wallList, size(wallList), .True.)
+
+            ! Create actual slice
+            call createSlice(pts, conn, elemFam, paraSlices(nParaSlices, sps), pt, normal, dir_vec, &
+                             use_dir, sliceName, famList)
+
+            ! Clean up memory.
+            deallocate (pts, conn, elemFam)
+        end do
 
-    ! Input parameters
-    character(len=*), intent(in) :: distName
-    integer(kind=intType), intent(in) :: nSegments
-    real(kind=realType), dimension(3) :: normal
-    integer(kind=intType), intent(in) :: normal_ind
-    integer(kind=intType), intent(in) :: n, famList(n)
-
-    nLiftDists = nLiftDists + 1
-    if (nLiftDists > nLiftDistMax) then
-       print *,'Error: Exceeded the maximum number of lift distributions. &
-            &Increase nLiftDistMax'
-       stop
-    end if
-
-    liftDists(nLIftDists)%nSegments = nSegments
-    liftDists(nLiftDists)%normal = normal
-    liftDists(nLIftDists)%normal_ind = normal_ind
-    liftDists(nLiftDists)%distName = distName
-
-    allocate(liftDists(nLiftDists)%famList(n))
-    liftDists(nLiftDists)%famList(:) = famList
-
-  end subroutine addLiftDistribution
-
-  subroutine writeTecplot(sliceFile, writeSlices, liftFile, writeLift, &
-       surfFile, writeSurf, famList, nFamList)
-    !
-    !       This is the master routine for writing tecplot data from adflow.
-    !       This routine will write the slice, lift and surface files
-    !       depending on the flags writeSlics, writeLift and writeSurface.
-    !       The reason for the combined routine is that we can safely only
-    !       perform the nodal averaging once which is required for all
-    !       three output files.
-    use constants
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use surfaceFamilies, only : BCFamGroups, BCFamExchange
-    use surfaceUtils, only : getSurfaceSize
-    use oversetData, only : zipperMeshes
-    use outputMod, only : numberOfSurfSolVariables
-    implicit none
+    end subroutine addParaSlice
+
+    subroutine addAbsSlice(sliceName, pt, normal, dir_vec, use_dir, famList, n)
+        !
+        !       This subroutine is intended to be called from python.
+        !       This routine will add an absolute slice to the list of user
+        !       supplied slices.
+        use constants
+        use communication
+        use surfaceFamilies
+        use surfaceUtils
+        use inputTimeSpectral
+        implicit none
 
-    ! Input Params
-    character(len=*), intent(in) :: sliceFile, liftFile, surfFile
-    logical, intent(in) :: writeSlices, writeLift, writeSurf
-    integer(kind=intType), intent(in) :: nFamList
-    integer(kind=intType), intent(in), dimension(nFamList) :: famList
-    real(kind=realType), dimension(:, :, :), allocatable :: nodalValues
-    ! Working
-    integer(kind=intType) :: sps, nSolVar, sizeNOde, sizeCell
-    integer(kind=intType), dimension(:), pointer :: wallLIST
-
-    ! Determine the number of surface variables we have
-    call numberOfSurfSolVariables(nSolVar)
-    wallList =>  BCFamGroups(iBCGroupWalls)%famList
-    call getSurfaceSize(sizeNode, sizeCell, wallList, size(wallList), .True.)
-
-    ! Allocate and compute the wall-based surface data for hte slices
-    ! and lift distributions.
-    allocate(nodalValues(max(sizeNode,1), nSolVar+6+3, nTimeIntervalsSpectral))
-
-    do sps=1, nTimeIntervalsSpectral
-       call computeSurfaceOutputNodalData(BCFamExchange(iBCGroupWalls, sps), &
-            zipperMeshes(iBCGroupWalls), .True., nodalValues(:, :, sps))
-    end do
-
-    if (writeSlices) then
-       call writeSlicesFile(sliceFile, nodalValues)
-    end if
-
-    if (writeLift) then
-       call writeLiftDistributionFile(liftFile, nodalValues)
-    end if
-
-    deallocate(nodalValues)
-
-    if (writeSurf) then
-       call writeTecplotSurfaceFile(surfFile, famList)
-    end if
-
-  end subroutine writeTecplot
-
-  subroutine writeSlicesFile(fileName, nodalValues)
-    !
-    ! This subroutine is intended to be called from python.  This
-    ! routine will write the user defined slics to an to the (ascii)
-    ! tecplot file fileName. ASCII files are used for simplicity since
-    ! very little information is actually written.
-    use constants
-    use communication
-    use outputMod
-    use inputTimeSpectral
-    use inputPhysics
-    use inputIteration
-    use inputIO
-    use surfaceFamilies
-    use surfaceUtils
-    use utils, only : EChk
-    implicit none
+        ! Input parameters
+        character(len=*), intent(in) :: sliceName
+        real(kind=realType), dimension(3), intent(in) :: pt, normal, dir_vec
+        logical, intent(in) :: use_dir
+        integer(kind=intType), intent(in) :: n, famList(n)
+
+        ! Working
+        integer(kind=intType) :: sps, sizeNode, sizeCell
+        integer(kind=intType), dimension(:), pointer :: wallList
+        real(kind=realType), dimension(:, :), allocatable :: pts
+        integer(kind=intType), dimension(:, :), allocatable :: conn
+        integer(kind=intType), dimension(:), allocatable :: elemFam, cgnsBlockID
+
+        if (.not. allocated(absSlices)) then
+            allocate (absSlices(nSliceMax, nTimeIntervalsSpectral))
+        end if
+
+        do sps = 1, nTimeIntervalsSpectral
+            nAbsSlices = nAbsSlices + 1
+
+            if (nAbsSlices > nSliceMax) then
+                print *, 'Error: Exceeded the maximum number of slices. Increase nSliceMax'
+                stop
+            end if
+
+            wallList => BCFamGroups(iBCGroupWalls)%famList
+            call getSurfaceSize(sizeNode, sizeCell, wallList, size(wallList), .True.)
+            allocate (pts(3, sizeNode), conn(4, sizeCell), elemFam(sizeCell), cgnsBlockID(sizeCell))
+            call getSurfaceConnectivity(conn, cgnsBlockID, sizeCell, wallList, size(wallList), .True.)
+            call getSurfacePoints(pts, sizeNode, sps, wallList, size(wallList), .True.)
+            call getSurfaceFamily(elemFam, sizeCell, wallList, size(wallList), .True.)
+            call createSlice(pts, conn, elemFam, absSlices(nAbsSlices, sps), pt, normal, dir_vec, &
+                             use_dir, sliceName, famList)
+
+            ! Clean up memory.
+            deallocate (pts, conn, elemFam)
+        end do
 
-    ! Input Params
-    character(len=*), intent(in) :: fileName
-    real(kind=realType), intent(inout), dimension(:, :, :) :: nodalValues
-
-    ! Working parameters
-    integer(kind=intType) :: file, i, sps, nSolVar, ierr
-    character(len=maxStringLen) :: fname
-    character(len=7) :: intString
-    character(len=maxCGNSNameLen), dimension(:), allocatable :: solNames
-    integer(kind=intType), allocatable, dimension(:) :: famList
-    type(slice) :: globalSlice
-    integer(kind=intType) :: sizeNode, sizeCell
-    integer(kind=intType), dimension(:), pointer :: wallList
-    real(kind=realType), dimension(:, :), allocatable :: pts
-    integer(kind=intType), dimension(:, :), allocatable :: conn
-    integer(kind=intType), dimension(:), allocatable :: elemFam, cgnsBlockID
-
-    ! Only write if we actually have lift distributions
-    testwriteSlices: if(nParaSlices + nAbsSlices > 0) then
-
-       if(myID == 0 .and. printIterations) then
-          print "(a)", "#"
-          print "(a)", "# Writing slices file(s) ..."
-       endif
-
-       do sps=1,nTimeIntervalsSpectral
-
-          ! If it is time spectral we need to agument the filename
-          if (equationMode == timeSpectral) then
-             write(intString,"(i7)") sps
-             intString = adjustl(intString)
-             fname = trim(fileName)//"Spectral"//trim(intString)
-          else
-             fname = fileName
-          end if
-
-          file = 11
-          ! Open file on root proc:
-          if (myid == 0) then
-             open(unit=file, file=trim(fname))
-
-             ! Write Header Information
-             write (file,*) "Title = ""ADflow Slice Data"""
-             write (file,"(a)", advance="no") "Variables = "
-             write(file,"(a)",advance="no") " ""CoordinateX"" "
-             write(file,"(a)",advance="no") " ""CoordinateY"" "
-             write(file,"(a)",advance="no") " ""CoordinateZ"" "
-             write(file,"(a)",advance="no") " ""XoC"" "
-             write(file,"(a)",advance="no") " ""YoC"" "
-             write(file,"(a)",advance="no") " ""ZoC"" "
-
-             ! Number of additional variables on slices
-             call numberOfSurfSolVariables(nSolVar)
-             allocate(solNames(nSolVar))
-             call surfSolNames(solNames)
-
-             ! Write the rest of the variables
-             do i=1,nSolVar
-                write(file,"(a,a,a)",advance="no") """",trim(solNames(i)),""" "
-             end do
-
-             write(file,"(1x)")
-             deallocate(solNames)
-          end if
-          call mpi_bcast(nSolVar, 1, adflow_integer, 0, adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-
-          ! Slices are created on walls and walls only. Retrieve the
-          ! points, connectivity and familyID of all the walls.
-          wallList =>  BCFamGroups(iBCGroupWalls)%famList
-          call getSurfaceSize(sizeNode, sizeCell, wallList, size(wallList), .True.)
-          allocate(pts(3, sizeNode), conn(4, sizeCell), elemFam(sizeCell), cgnsBlockID(sizeCell))
-          call getSurfaceConnectivity(conn, cgnsBlockID, sizeCell, wallList, size(wallList), .True.)
-          call getSurfacePoints(pts, sizeNode, sps, wallList, size(wallList), .True.)
-          call getSurfaceFamily(elemFam, sizeCell, wallList, size(wallList), .True.)
-
-          ! Integration is performed in parallel
-          do i=1, nParaSlices
-             call integrateSlice(paraSlices(i, sps), globalSlice, &
-                  nodalValues(:, :, sps), nSolVar, .True.)
-             if (myid == 0) then
-                call writeSlice(globalSlice, file, nSolVar)
-             end if
-             call destroySlice(globalSlice)
-          end do
-
-          do i=1, nAbsSlices
-             ! 'Destroy' the slice...just dealloc the allocated data.
-             ! before we do, save the family list
-             allocate(famList(size(absSlices(i, sps)%famList)))
-             famList = absSlices(i, sps)%famList
-             call destroySlice(absSlices(i, sps))
-
-             ! Make new one in the same location
-             call createSlice(pts, conn, elemFam, absSlices(i, sps), &
-                  absSlices(i, sps)%pt, absSlices(i, sps)%normal, absSlices(i, sps)%dir_vec, &
-                  absSlices(i, sps)%use_dir, absSlices(i, sps)%sliceName, famList)
-
-             call integrateSlice(absSlices(i, sps), globalSlice, &
-                  nodalValues(:, :, sps), nSolVar, .True.)
-             if (myid == 0) then
-                call writeSlice(globalSlice, file, nSolVar)
-             end if
-             call destroySlice(globalSlice)
-             deallocate(famList)
-          end do
-
-          !Close file on root proc
-          if (myid == 0) then
-             close(file)
-          end if
-       end do
-
-       if(myID == 0 .and. printIterations) then
-          print "(a)", "# Slices file(s) written"
-          print "(a)", "#"
-       endif
-    end if testwriteSlices
-  end subroutine writeSlicesFile
-
-  subroutine writeLiftDistributionFile(fileName, nodalValues)
-    !
-    !
-    !       This subroutine is intended to be called from python.
-    !       This routine will write the added lift distributions
-    !       to the (ascii) tecplot file fileName. ASCII files are
-    !       used for siplicity since very little informatin is actually
-    !       written.
-    use constants
-    use communication
-    use outputMod
-    use inputPhysics
-    use inputTimeSpectral
-    use inputIteration
-    use surfaceFamilies
-    implicit none
+    end subroutine addAbsSlice
 
-    ! Input Params
-    character(len=*), intent(in) :: fileName
-    real(kind=realType), dimension(:, :, :), allocatable :: nodalValues
-
-    ! Working parameters
-    integer(kind=intType) :: file, sps
-    character(len=maxStringLen) :: fname
-    character(len=7) :: intString
-
-    ! Only write if we actually have lift distributions
-    testwriteLiftDists: if(nLiftDists > 0) then
-
-       if(myID == 0 .and. printIterations) then
-          print "(a)", "#"
-          print "(a)", "# Writing lift distribution file(s) ..."
-       endif
-
-       do sps=1,nTimeIntervalsSpectral
-
-          ! If it is time spectral we need to agument the filename
-          if (equationMode == timeSpectral) then
-             write(intString,"(i7)") sps
-             intString = adjustl(intString)
-             fname = trim(fileName)//"Spectral"//trim(intString)
-          else
-             fname = fileName
-          end if
-
-          file = 11
-          ! Open file on root proc:
-          if (myid == 0) then
-             open(unit=file, file=trim(fname))
-          end if
-
-          call writeLiftDistributions(sps, file, nodalValues(:, :, sps))
-
-          ! Close file on root proc
-          if (myid == 0) then
-             close(file)
-          end if
-       end do
-
-       if(myID == 0 .and. printIterations) then
-          print "(a)", "# Lift distribution file(s) written"
-          print "(a)", "#"
-       endif
-
-    end if testwriteLiftDists
-  end subroutine writeLiftDistributionFile
-
-  subroutine writeLiftDistributions(sps, fileID, nodalValues)
-    !
-    !       This subroutine writes the liftdistribution for the specified
-    !       spectral instance. It is assumed that the required file handles
-    !       are already open and can be written to
-    use constants
-    use communication
-    use outputMod
-    use su_cgns
-    use cgnsNames
-    use surfaceFamilies, only : BCFamGroups, BCFamExchange
-    use surfaceUtils
-    use utils, only : EChk
-    use sorting, only : famInList
-    implicit none
+    subroutine addLiftDistribution(nSegments, normal, normal_ind, distName, famList, n)
+        !
+        !       This subroutine is intended to be called from python.
+        !       This routine will add the description of a lift distribution
 
-    ! Input parameters
-    integer(kind=intType), intent(in) :: sps, fileID
-    real(kind=realType), dimension(:, :), intent(in) :: nodalValues
-
-    real(kind=realType), dimension(3) :: xmin, xmax, xmin_local, xmax_local
-    real(kind=realType), parameter :: tol=1e-8
-    type(liftDist), pointer :: d
-    integer(kind=intType) :: i, j, ii, jj, iDist, ierr
-    real(kind=realType), dimension(:,:), allocatable :: values
-    character(len=maxCGNSNameLen), dimension(:), allocatable :: liftDistNames
-    real(kind=realType) :: dmin, dmax, sumL, sumD, sumM, span, delta, xCur(3)
-    type(slice) :: localSlice, globalSlice
-    integer(kind=intType) :: sizeNode, sizeCell
-    integer(kind=intType), dimension(:), pointer :: wallList
-    real(kind=realType), dimension(:, :), allocatable :: pts
-    integer(kind=intType), dimension(:, :), allocatable :: conn
-    integer(kind=intType), dimension(:), allocatable :: elemFam, cgnsBlockID
-
-    ! Slices are created on walls and walls only. Retrieve the
-    ! points, connectivity and familyID of all the walls.
-    wallList =>  BCFamGroups(iBCGroupWalls)%famList
-    call getSurfaceSize(sizeNode, sizeCell, wallList, size(wallList), .True.)
-    allocate(pts(3, sizeNode), conn(4, sizeCell), elemFam(sizeCell), cgnsBlockID(sizeCell))
-    call getSurfaceConnectivity(conn, cgnsBlockID, sizeCell, wallList, size(wallList), .True.)
-    call getSurfacePoints(pts, sizeNode, sps, wallList, size(wallList), .True.)
-    call getSurfaceFamily(elemFam, sizeCell, wallList, size(wallList), .True.)
-
-    do iDist=1,nLiftDists
-
-       d => liftDists(iDist)
-       xmin_local = huge(real(zero))
-       xmax_local = -huge(real(zero))
-
-       ! Get the bounding box for the entire geometry we have been slicing.
-       elemLoop: do i=1, size(conn, 2)
-          if (famInList(elemFam(i), d%FamList)) then
-
-             ! Extract each of the 4 nodes on this quad:
-             do jj=1,4
-                xCur = pts(:, conn(jj, i))
-                ! Check the max/min on each index
-                do ii=1,3
-                   xmin_local(ii) = min(xmin_local(ii) , xCur(ii))
-                   xmax_local(ii) = max(xmax_local(ii) , xCur(ii))
-                end do
-             end do
-          end if
-       end do elemLoop
-
-       ! Globalize all min/max values.
-       call mpi_allreduce(xmin_local, xmin, 3, adflow_real, MPI_MIN, &
-            adflow_comm_world, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call mpi_allreduce(xmax_local, xmax, 3, adflow_real, MPI_MAX, &
-            adflow_comm_world, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       d%delta = (xMax(d%normal_ind) - xMin(d%normal_ind))/dble((d%nSegments - 1))
-       allocate(d%slicePts(3, d%nSegments))
-
-       ! Zero out all segments
-       d%slicePts = zero
-
-       ! These are the variable names for the lift distribution:
-       allocate(liftDistNames(nLiftDistVar))
-       ! Set the names here
-       liftDistNames(1) = "eta"
-       liftDistNames(2) = cgnsCoorX
-       liftDistNames(3) = cgnsCoorY
-       liftDistNames(4) = cgnsCoorZ
-       liftDistNames(5) = "Lift"
-       liftDistNames(6) = "Drag"
-       liftDistNames(7) = "Moment"
-       liftDistNames(8) = "Normalized Lift"
-       liftDistNames(9) = "Normalized Drag"
-       liftDistNames(10) = "Normalized Moment"
-       liftDistNames(11) = "CL"
-       liftDistNames(12) = "CD"
-       liftDistNames(13) = "CM"
-       liftDistNames(14) = "CLp"
-       liftDistNames(15) = "CDp"
-       liftDistNames(16) = "CMp"
-       liftDistNames(17) = "CLv"
-       liftDistNames(18) = "CDv"
-       liftDistNames(19) = "CMv"
-       liftDistNames(20) = "Elliptical"
-       liftDistNames(21) = "thickness"
-       liftDistNames(22) = "twist"
-       liftDistNames(23) = "chord"
-       liftDistNames(24) = "fx"
-       liftDistNames(25) = "fy"
-       liftDistNames(26) = "fz"
-
-       ! Only write header info for first distribution only
-       if (myid == 0) then
-          if (iDist == 1) then
-             write (fileID,*) "Title= ""ADflow Lift Distribution Data"""
-             write (fileID,"(a)", advance="no") "Variables = "
-             do i=1,nLIftDistVar ! here we just write var-names in the header
-                write(fileID,"(a,a,a)",advance="no") """",trim(liftDistNames(i)),""" "
-             end do
-             write(fileID,"(1x)")
-          end if
-
-          write (fileID,"(a,a,a)") "Zone T= """,trim(d%distName),""""
-          write (fileID,*) "I= ",d%nSegments
-          write (fileID,*) "DATAPACKING=BLOCK"
-       end if
-
-       allocate(values(d%nSegments, nLiftDistVar))
-       values = zero
-
-       do i=1,d%nSegments
-          if (i==1) then
-             d%slicePts(d%normal_ind, i) = xMin(d%normal_ind) + tol
-          else if (i == d%nSegments) then
-             d%slicePts(d%normal_ind, i) = xMin(d%normal_ind) + (i-1)*d%delta - tol
-          else
-             d%slicePts(d%normal_ind, i) = xMin(d%normal_ind) + (i-1)*d%delta
-          end if
-       end do
-
-       ! Scaled Eta values
-       dmin = minVal(d%slicePts(d%normal_ind, :))
-       dmax = maxval(d%slicePts(d%normal_ind, :))
-
-       ! next line we save ETA as the first column. This corresponds to
-       ! when we open a lift file, e.g. fc_000_lift.dat where we find
-       ! ETA as the first entry... (also visible through tec360)
-       values(:, 1) = (d%slicePts(d%normal_ind, :) - dmin)/(dmax-dmin)
-       ! Coordinate Varaibles
-       if (d%normal_ind == 1) then! X slices
-          values(:, 2) = d%slicePts(1, :)
-       else if (d%normal_ind == 2) then ! Y slices
-          values(:, 3) = d%slicePts(2, :)
-       else if (d%normal_ind == 3) then ! Z slices
-          values(:, 4) = d%slicePts(3, :)
-       end if
-
-       !      as mentioned above nSegments are number of nodes in the
-       !      distribution, e.g. maybe you have asked for 150 points out along
-       !      your wing where you want the lift. Ok, we now for each of these
-       !      150 points have to create a slice and integrate our metrics around
-       !      said slice, save the integrated metrics (e.g. lift, drag) for the
-       !      current point. We then move on to the next point, make a new slice
-       !      get the new point's metrics and save them ... we do this 150 times
-       do i=1, d%nSegments
-          ! Make new one in the same location
-          ! we just pass the normal again as the direction vector because its not used
-          call createSlice(pts, conn, elemFam, localSlice, d%slicePts(:, i), &
-               d%normal, d%normal, .False., "does_not_matter", d%famList)
-          call integrateSlice(localSlice, globalSlice, nodalValues, 0, .False.)
-
-          ! Total lift, drag, and moment
-          values(i, 5)  = globalSlice%pL + globalSlice%vL
-          values(i, 6)  = globalSlice%pD + globalSlice%vD
-          values(i, 7)  = globalSlice%pM + globalSlice%vM
-
-          ! Total CL, CD, and CM
-          values(i, 11)  = globalSlice%CLp + globalSlice%CLv
-          values(i, 12) = globalSlice%CDp + globalSlice%CDv
-          values(i, 13) = globalSlice%CMp + globalSlice%CMv
-
-          ! Pressure lift, drag, and moment coefficients
-          values(i, 14) = globalSlice%CLp
-          values(i, 15) = globalSlice%CDp
-          values(i, 16) = globalSlice%CMp
-
-          ! Viscous lift, drag, and moment coefficients
-          values(i, 17) = globalSlice%CLv
-          values(i, 18) = globalSlice%CDv
-          values(i, 19) = globalSlice%CMv
-
-          ! t/c, twist, chord
-          values(i, 21) = globalSlice%thickness
-          values(i, 22) = globalSlice%twist
-          values(i, 23) = globalSlice%chord
-
-          ! here we now save our new, added values that we have desired to use
-          values(i, 24) = globalSlice%fx
-          values(i, 25) = globalSlice%fy
-          values(i, 26) = globalSlice%fz
-
-          call destroySlice(localSlice)
-          call destroySlice(globalSlice)
-
-       end do
-
-       ! Sum up the lift, drag, and moment values from the slices:
-       sumL = zero
-       sumD = zero
-       sumM = zero
-       do i=1,d%nSegments-1
-          sumL = sumL + half*(values(i, 5) + values(i+1, 5))
-          sumD = sumD + half*(values(i, 6) + values(i+1, 6))
-          sumM = sumM + half*(values(i, 7) + values(i+1, 7))
-       end do
-
-       ! Now compute the normalized lift, drag and elliptical since
-       ! we know the sum. Note delta is non-dimensional!
-       delta = values(2, 1) - values(1, 1)
-
-       ! This is the "nondimensional" span...it basically takes into account if you have
-       ! a wing not at the sym plane
-       span = maxval(values(:, 1)) - minval(values(:, 1))
-       dmin = minval(values(:, 1))
-       sumL = sumL * delta
-       sumD = sumD * delta
-       sumM = sumM * delta
-
-       do i=1,d%nSegments
-
-          ! Normalized Lift, Drag, and Moment
-          values(i, 8) = values(i, 5) / sumL
-          values(i, 9) = values(i, 6) / sumD
-          values(i, 10) = values(i, 7) / abs(sumM)
-
-          ! elliptical value
-          values(i, 20) = four/pi/span*sqrt(one-(values(i, 1)-dmin)**2/span**2)
-       end do
-
-       ! Write all variables in block format
-       if (myid == 0) then
-          do j=1,nLiftDistVar
-             do i=1,d%nSegments
-                write(fileID, sci6) values(i, j)
-             end do
-          end do
-       end if
-
-       ! Deallocate slice list and point list
-       deallocate(d%slicePts)
-
-       ! Destroy temp variables
-       deallocate(liftDistNames, values)
-    end do
-
-  end subroutine writeLiftDistributions
-
- subroutine writeTecplotSurfaceFile(fileName, famList)
-    use constants
-    use communication, only : myid, adflow_comm_world, nProc
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputPhysics, only : equationMode
-    use inputIteration, only : printIterations
-    use inputIO, only :precisionsurfgrid, precisionsurfsol
-    use outputMod, only : surfSolNames, numberOfSurfSolVariables
-    use surfaceFamilies, onlY : BCFamExchange, famNames, familyExchange
-    use utils, only : EChk, setPointers, setBCPointers
-    use BCPointers, only : xx
-    use sorting, only : famInList
-    use extraOutput, only : surfWriteBlank
-    use oversetData, only : zipperMesh, zipperMeshes
-    use surfaceUtils
-#include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
+        use constants
+        use communication
+        use surfaceFamilies
+        implicit none
 
-    ! Input Params
-    character(len=*), intent(in) :: fileName
-    integer(kind=intType), intent(in), dimension(:) :: famList
-
-    ! Working parameters
-    integer(kind=intType) :: i, j, nn, mm, fileID, iVar, ii, ierr, iSize
-    integer(Kind=intType) :: nSolVar, iBeg, iEnd, jBeg, jEnd, sps, sizeNode, sizeCell
-    integer(kind=intType) :: iBCGroup, iFam, iProc, nCells, nNodes, nCellsToWrite, iZone, lastZoneSharing
-    character(len=maxStringLen) :: fname
-    character(len=7) :: intString
-    integer(kind=intType), dimension(:), allocatable :: nodeSizes, nodeDisps
-    integer(kind=intType), dimension(:), allocatable :: cellSizes, cellDisps
-    character(len=maxCGNSNameLen), dimension(:), allocatable :: solNames
-    real(kind=realType), dimension(:, :), allocatable :: nodalValues
-    integer(kind=intType), dimension(:, :), allocatable :: conn, localConn
-    real(kind=realType), dimension(:, :), allocatable :: vars
-    integer(kind=intType), dimension(:), allocatable :: mask, elemFam, localElemFam, cgnsBlockID
-    logical :: blankSave, BCGroupNeeded, dataWritten
-    type(zipperMesh), pointer :: zipper
-    type(familyExchange), pointer :: exch
-    if(myID == 0 .and. printIterations) then
-       print "(a)", "#"
-       print "(a)", "# Writing tecplot surface file(s) ..."
-    endif
-
-    ! Number of surface variables. Note that we *explictly*
-    ! remove the potential for writing the surface blanks as
-    ! these are not necessary for the tecplot IO as we write the
-    ! zipper mesh directly. We must save and restore the
-    ! variable in case the CGNS otuput still wants to write it.
-    blankSave = surfWriteBlank
-    surfWriteBlank = .False.
-    call numberOfSurfSolVariables(nSolVar)
-    allocate(solNames(nSolVar))
-    call surfSolNames(solNames)
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-
-       ! If it is time spectral we need to agument the filename
-       if (equationMode == timeSpectral) then
-          write(intString,"(i7)") sps
-          intString = adjustl(intString)
-          fname = trim(fileName)//"Spectral"//trim(intString)
-       else
-          fname = fileName
-       end if
-
-       fileID = 11
-       ! Open file on root proc:
-
-       if (myid == 0) then
-          open(unit=fileID, file=trim(fname), form='UNFORMATTED', access='stream', status='replace')
-
-          ! Tecplot magic number
-          write(fileID) "#!TDV112"
-
-          ! Integer value of 1 (4 bytes)
-          call writeInteger(1)
-
-          ! Integer for FileType: 0 = Full, 1= Grid, 2 = Solution
-          call writeInteger(0)
-
-          ! Write the title of the file
-          call writeString("ADflow Surface Solution Data")
-
-          ! Write the number of variable names
-          call writeInteger(3 + nSolVar)
-
-          ! Write the variable names
-          call writeString("CoordinateX")
-          call writeString("CoordinateY")
-          call writeString("CoordinateZ")
-
-          ! Write the rest of the variables
-          do i=1, nSolVar
-             call writeString(trim(solNames(i)))
-          end do
-
-          deallocate(solNames)
-
-       end if
-
-       ! First pass through to generate and write header information
-       masterBCLoop1: do iBCGroup=1, nFamExchange
-
-          ! Pointers for easier reading
-          exch => BCFamExchange(iBCGroup, sps)
-          zipper => zipperMeshes(iBCGroup)
-
-          ! First thing we do is figure out if we actually need to do
-          ! anything with this BCgroup at all. If none the requested
-          ! families are in this BCExcahnge we don't have to do
-          ! anything.
-
-          BCGroupNeeded = .False.
-          do i=1,size(famList)
-             if (famInLIst(famList(i), exch%famList)) then
-                BCGroupNeeded = .True.
-             end if
-          end do
-
-          ! Keep going if we don't need this.
-          if (.not. BCGroupNeeded) then
-             cycle
-          end if
-
-          ! Get the sizes of this BCGroup
-          call getSurfaceSize(sizeNode, sizeCell, exch%famList, size(exch%famList), .True.)
-          call mpi_reduce(sizeNode, nNodes, 1, adflow_integer, MPI_SUM, 0, adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Now gather up the cell family info.
-          allocate(cellDisps(0:nProc), cellSizes(nProc))
-
-          call mpi_gather(sizeCell, 1, adflow_integer, &
-               cellSizes, 1, adflow_integer, 0, adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          allocate(localElemFam(sizeCell))
-          call getSurfaceFamily(localElemFam, sizeCell, exch%famList, size(exch%famList), .True.)
-
-          if (myid == 0) then
-             cellDisps(0) = 0
-             do iProc=1, nProc
-                cellDisps(iProc) = cellDisps(iProc-1) + cellSizes(iProc)
-             end do
-             nCells = sum(cellSizes)
-             allocate(elemFam(nCells))
-          end if
-
-          call mpi_gatherv(localElemFam, &
-               size(localElemFam), adflow_integer, elemFam, &
-               cellSizes, cellDisps, adflow_integer, 0, adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Deallocate temp memory
-          deallocate(localElemFam, cellSizes, cellDisps)
-
-          rootProc: if (myid == 0 .and. nCells > 0) then
-             do iFam=1, size(exch%famList)
-
-                ! Check if we have to write this one:
-                famInclude: if (famInList(exch%famList(iFam), famList)) then
-                   nCellsToWrite = 0
-                   do i=1, nCells
-                      ! Check if this elem is to be included
-                      if (elemFam(i) == exch%famList(iFam)) then
-                         nCellsToWrite = nCellsToWrite + 1
-                      end if
-                   end do
-
-                   if (nCellsToWrite > 0) then
-                      call writeFloat(zoneMarker) ! Zone Marker
-                      call writeString(trim(famNames(exch%famList(iFam)))) ! Zone Name
-                      call writeInteger(-1) ! Parent Zone (-1 for None)
-                      call writeInteger(-1) ! Strand ID (-2 for tecplot assignment)
-                      call writeDouble(zero) ! Solution Time
-                      call writeInteger(-1)! Zone Color (Not used anymore) (-1)
-                      call writeInteger(3) ! Zone Type (3 for FEQuadrilateral)
-                      call writeInteger(0)! Data Packing (0 for block)
-                      call writeInteger(0)! Specify Var Location (0=don't specify, all at nodes)
-                      call writeInteger(0) ! Are raw 1-to-1 face neighbours supplied (0 for false)
-                      call writeInteger(nNodes) ! Number of nodes in FE Zone
-                      call writeInteger(nCellsToWrite) ! Number of elements in FE Zone
-                      call writeInteger(0) ! ICellDim, jCellDim, kCellDim (for future use, set to 0)
-                      call writeInteger(0)
-                      call writeInteger(0)
-                      call writeInteger(0) ! Aux data specified (0 for no)
-                   end if
-                end if famInclude
-             end do
-          end if rootProc
-          if (myid == 0) then
-             deallocate(elemFam)
-          end if
-       end do masterBCLoop1
-
-       if (myid == 0) then
-          call writeFloat(dataSectionMarker) ! Eohmarker to mark difference between header and data section
-       end if
-
-       ! Now do everything again but for real.
-       masterBCLoop: do iBCGroup=1,nFamExchange
-
-          ! Pointers for easier reading
-          exch => BCFamExchange(iBCGroup, sps)
-          zipper => zipperMeshes(iBCGroup)
-
-          ! First thing we do is figure out if we actually need to do
-          ! anything with this BCgroup at all. If none the requested
-          ! families are in this BCExcahnge we don't have to do
-          ! anything.
-
-          BCGroupNeeded = .False.
-          do i=1,size(famList)
-             if (famInList(famList(i), exch%famList)) then
-                BCGroupNeeded = .True.
-             end if
-          end do
-
-          ! Keep going if we don't need this.
-          if (.not. BCGroupNeeded) then
-             cycle
-          end if
-
-          ! Get the sizes of this BCGroup
-          call getSurfaceSize(sizeNode, sizeCell, exch%famList, size(exch%famList), .True.)
-          allocate(nodalValues(max(sizeNode,1), nSolVar+3+6))
-          ! Compute the nodal data
-
-          call computeSurfaceOutputNodalData(BCFamExchange(iBCGroup, sps), &
-               zipperMeshes(iBCGroup), .False. , nodalValues(:, :))
-
-          ! Gather up the number of nodes to be set to the root proc:
-          allocate(nodeSizes(nProc), nodeDisps(0:nProc))
-          nodeSizes = 0
-          nodeDisps = 0
-
-          call mpi_allgather(sizeNode, 1, adflow_integer, nodeSizes, 1, adflow_integer, &
-               adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-          nodeDisps(0) = 0
-          do iProc=1, nProc
-             nodeDisps(iProc) = nodeDisps(iProc-1) + nodeSizes(iProc)
-          end do
-
-          iSize = 3 + 6 + nSolVar
-          if (myid == 0) then
-             nNodes = sum(nodeSizes)
-          else
-             nNodes = 1
-          end if
-
-          ! Only root proc actually has any space allocated
-          allocate(vars(nNodes, iSIze))
-
-          ! Gather values to the root proc.
-          do i=1, iSize
-             call mpi_gatherv(nodalValues(:, i), sizeNode, &
-                  adflow_real, vars(:, i), nodeSizes, nodeDisps, adflow_real, 0, adflow_comm_world, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-          end do
-          deallocate(nodalValues)
-
-          ! Now gather up the connectivity
-          allocate(cellDisps(0:nProc), cellSizes(nProc))
-
-          call mpi_gather(sizeCell, 1, adflow_integer, &
-               cellSizes, 1, adflow_integer, 0, adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-
-          if (allocated(cgnsBlockID)) then
-             deallocate(cgnsBlockID)
-          end if
-
-          allocate(localConn(4, sizeCell), localElemFam(sizeCell), cgnsBlockID(sizeCell))
-          call getSurfaceConnectivity(localConn, cgnsBlockID, sizeCell, exch%famList, size(exch%famList), .True.)
-          call getSurfaceFamily(localElemFam, sizeCell, exch%famList, size(exch%famList), .True.)
-
-          if (myid == 0) then
-             cellDisps(0) = 0
-             do iProc=1, nProc
-                cellDisps(iProc) = cellDisps(iProc-1) + cellSizes(iProc)
-             end do
-             nCells = sum(cellSizes)
-             allocate(conn(4, nCells))
-             allocate(elemFam(nCells))
-          end if
-
-          ! We offset the conn array by nodeDisps(iProc) which
-          ! automagically adjusts the connectivity to account for the
-          ! number of nodes from different processors
-
-          call mpi_gatherv(localConn+nodeDisps(myid), &
-               4*size(localConn, 2), adflow_integer, conn, &
-               cellSizes*4, cellDisps*4, adflow_integer, 0, adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          call mpi_gatherv(localElemFam, &
-               size(localElemFam), adflow_integer, elemFam, &
-               cellSizes, cellDisps, adflow_integer, 0, adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Local values are finished
-          deallocate(localConn, localElemFam)
-          iZone = 0
-          rootProc2: if (myid == 0 .and. nCells > 0) then
-
-             ! Need zero based for binary output.
-             conn = conn - 1
-
-             allocate(mask(nCells))
-             dataWritten=.False.
-             do iFam=1, size(exch%famList)
-
-                ! Check if we have to write this one:
-                famInclude2: if (famInList(exch%famList(iFam), famList)) then
-
-                   ! Create a temporary mask
-                   mask = 0
-                   nCellsToWrite = 0
-                   do i=1, nCells
-                      ! Check if this elem is to be included
-                      if (elemFam(i) == exch%famList(iFam)) then
-                         mask(i) = 1
-                         nCellsToWrite = nCellsToWrite + 1
-                      end if
-                   end do
-
-                   actualWrite2 : if (nCellsToWrite > 0) then
-
-                      ! Write Zone Data
-                      call writeFloat(zoneMarker)
-
-                      ! Data Type for each variable (1 for float, 2 for double)
-                      do i=1, 3
-                         if (precisionSurfGrid == precisionSingle) then
-                            call writeInteger(1)
-                         else if (precisionSurfGrid == precisionDouble) then
-                            call writeInteger(2)
-                         end if
-                      end do
-
-                      do i=1, nSolVar
-                         if (precisionSurfSol == precisionSingle) then
-                            call writeInteger(1)
-                         else if (precisionSurfSol == precisionDouble) then
-                            call writeInteger(2)
-                         end if
-                      end do
-
-                      call writeInteger(0) ! Has passive variables (0 for no)
-
-                      if (.not. dataWritten) then
-
-                         ! Save the current 0-based zone so we know which to share with.
-                         lastZoneSharing = iZone
-
-                         call writeInteger(0) ! Has variable sharing (0 for no)
-                         call writeInteger(-1)  ! Zone based zone number to share connectivity (-1 for no sharing)
-
-                         ! Min/Max Value for coordinates
-                         do j=1,3
-                            call writeDouble(minval(vars(:, j)))
-                            call writeDouble(maxval(vars(:, j)))
-                         end do
-
-                         ! Min/Max Value for solution variables
-                         do j=1,nSolVar
-                            call writeDouble(minval(vars(:, j+9)))
-                            call writeDouble(maxval(vars(:, j+9)))
-                         end do
-
-                         ! Dump the coordinates
-                         do j=1,3
-                            if (precisionSurfGrid == precisionSingle) then
-                               call writeFloats(vars(1:nNodes, j))
-                            else if (precisionSurfSol == precisionDouble) then
-                               call writeDoubles(vars(1:nNodes, j))
-                            end if
-                         end do
-
-                         ! Dump the solution variables
-                         do j=1,nSolVar
-                            if (precisionSurfSol == precisionSingle) then
-                               call writeFloats(vars(1:nNodes, j+9))
-                            else if (precisionSurfSol == precisionDouble) then
-                               call writeDoubles(vars(1:nNodes, j+9))
-                            end if
-                         end do
-
-                      else
-                         ! This will be sharing data from another zone.
-                         call writeInteger(1) ! Has variable sharing (0 for no)
-
-                         ! Write out the zone number for sharing the data.
-                         do j=1,3+nSolVar
-                            call writeInteger(lastZoneSharing)
-                         end do
-
-                         call writeInteger(-1)  ! Zone based zone number to share connectivity (-1 for no sharing
-                      end if
-
-                      ! Dump the connectivity
-                      j = 0
-                      do i=1, nCells
-                         ! Check if this elem is to be included
-                         if (mask(i) == 1) then
-                            call writeIntegers(conn(:, i))
-                            j = j + 1
-                         end if
-                      end do
-
-                      iZone = iZone + 1
-                   end if actualWrite2
-                end if famInclude2
-             end do
-             deallocate(mask)
-          end if rootProc2
-          if (myid == 0) then
-             deallocate(conn, elemFam)
-          end if
-          deallocate(cellSizes, cellDisps, nodeSizes, nodeDisps, vars)
-       end do masterBCLoop
-
-       if (myid == 0) then
-          close(fileID)
-       end if
-
-    end do spectralLoop
-
-    ! Restore the modified option
-    surfWriteBlank = blankSave
-    if(myID == 0 .and. printIterations) then
-       print "(a)", "# Tecplot surface file(s) written"
-       print "(a)", "#"
-    endif
+        ! Input parameters
+        character(len=*), intent(in) :: distName
+        integer(kind=intType), intent(in) :: nSegments
+        real(kind=realType), dimension(3) :: normal
+        integer(kind=intType), intent(in) :: normal_ind
+        integer(kind=intType), intent(in) :: n, famList(n)
+
+        nLiftDists = nLiftDists + 1
+        if (nLiftDists > nLiftDistMax) then
+            print *, 'Error: Exceeded the maximum number of lift distributions. &
+                 &Increase nLiftDistMax'
+            stop
+        end if
+
+        liftDists(nLIftDists)%nSegments = nSegments
+        liftDists(nLiftDists)%normal = normal
+        liftDists(nLIftDists)%normal_ind = normal_ind
+        liftDists(nLiftDists)%distName = distName
+
+        allocate (liftDists(nLiftDists)%famList(n))
+        liftDists(nLiftDists)%famList(:) = famList
+
+    end subroutine addLiftDistribution
+
+    subroutine writeTecplot(sliceFile, writeSlices, liftFile, writeLift, &
+                            surfFile, writeSurf, famList, nFamList)
+        !
+        !       This is the master routine for writing tecplot data from adflow.
+        !       This routine will write the slice, lift and surface files
+        !       depending on the flags writeSlics, writeLift and writeSurface.
+        !       The reason for the combined routine is that we can safely only
+        !       perform the nodal averaging once which is required for all
+        !       three output files.
+        use constants
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use surfaceFamilies, only: BCFamGroups, BCFamExchange
+        use surfaceUtils, only: getSurfaceSize
+        use oversetData, only: zipperMeshes
+        use outputMod, only: numberOfSurfSolVariables
+        implicit none
 
-    contains
+        ! Input Params
+        character(len=*), intent(in) :: sliceFile, liftFile, surfFile
+        logical, intent(in) :: writeSlices, writeLift, writeSurf
+        integer(kind=intType), intent(in) :: nFamList
+        integer(kind=intType), intent(in), dimension(nFamList) :: famList
+        real(kind=realType), dimension(:, :, :), allocatable :: nodalValues
+        ! Working
+        integer(kind=intType) :: sps, nSolVar, sizeNOde, sizeCell
+        integer(kind=intType), dimension(:), pointer :: wallLIST
+
+        ! Determine the number of surface variables we have
+        call numberOfSurfSolVariables(nSolVar)
+        wallList => BCFamGroups(iBCGroupWalls)%famList
+        call getSurfaceSize(sizeNode, sizeCell, wallList, size(wallList), .True.)
+
+        ! Allocate and compute the wall-based surface data for hte slices
+        ! and lift distributions.
+        allocate (nodalValues(max(sizeNode, 1), nSolVar + 6 + 3, nTimeIntervalsSpectral))
+
+        do sps = 1, nTimeIntervalsSpectral
+            call computeSurfaceOutputNodalData(BCFamExchange(iBCGroupWalls, sps), &
+                                               zipperMeshes(iBCGroupWalls), .True., nodalValues(:, :, sps))
+        end do
 
-      subroutine writeFloat(adflowRealVal)
-        use iso_fortran_env, only : real32
-        implicit none
-        real(kind=realType) :: adflowRealVal
-        real(kind=real32) :: float
-        float = adflowRealval
-        write(fileID) float
-      end subroutine writeFloat
-
-      subroutine writeDouble(adflowRealVal)
-        use iso_fortran_env, only : real64
+        if (writeSlices) then
+            call writeSlicesFile(sliceFile, nodalValues)
+        end if
+
+        if (writeLift) then
+            call writeLiftDistributionFile(liftFile, nodalValues)
+        end if
+
+        deallocate (nodalValues)
+
+        if (writeSurf) then
+            call writeTecplotSurfaceFile(surfFile, famList)
+        end if
+
+    end subroutine writeTecplot
+
+    subroutine writeSlicesFile(fileName, nodalValues)
+        !
+        ! This subroutine is intended to be called from python.  This
+        ! routine will write the user defined slics to an to the (ascii)
+        ! tecplot file fileName. ASCII files are used for simplicity since
+        ! very little information is actually written.
+        use constants
+        use communication
+        use outputMod
+        use inputTimeSpectral
+        use inputPhysics
+        use inputIteration
+        use inputIO
+        use surfaceFamilies
+        use surfaceUtils
+        use utils, only: EChk
         implicit none
-        real(kind=realType) :: adflowRealVal
-        real(kind=real64) :: dble
-        dble = adFlowRealVal
-        write(fileID) dble
-      end subroutine writeDouble
-
-      subroutine writeFloats(adflowRealVals)
-        use iso_fortran_env, only : real32
-        implicit none
-        real(kind=realType) :: adflowRealVals(:)
-        real(kind=real32) :: floats(size(adflowRealVals))
-        integer :: i
-        floats = adflowRealvals
-        write(fileID) floats
 
-      end subroutine writeFloats
+        ! Input Params
+        character(len=*), intent(in) :: fileName
+        real(kind=realType), intent(inout), dimension(:, :, :) :: nodalValues
+
+        ! Working parameters
+        integer(kind=intType) :: file, i, sps, nSolVar, ierr
+        character(len=maxStringLen) :: fname
+        character(len=7) :: intString
+        character(len=maxCGNSNameLen), dimension(:), allocatable :: solNames
+        integer(kind=intType), allocatable, dimension(:) :: famList
+        type(slice) :: globalSlice
+        integer(kind=intType) :: sizeNode, sizeCell
+        integer(kind=intType), dimension(:), pointer :: wallList
+        real(kind=realType), dimension(:, :), allocatable :: pts
+        integer(kind=intType), dimension(:, :), allocatable :: conn
+        integer(kind=intType), dimension(:), allocatable :: elemFam, cgnsBlockID
+
+        ! Only write if we actually have lift distributions
+        testwriteSlices: if (nParaSlices + nAbsSlices > 0) then
+
+            if (myID == 0 .and. printIterations) then
+                print "(a)", "#"
+                print "(a)", "# Writing slices file(s) ..."
+            end if
+
+            do sps = 1, nTimeIntervalsSpectral
+
+                ! If it is time spectral we need to agument the filename
+                if (equationMode == timeSpectral) then
+                    write (intString, "(i7)") sps
+                    intString = adjustl(intString)
+                    fname = trim(fileName)//"Spectral"//trim(intString)
+                else
+                    fname = fileName
+                end if
 
-      subroutine writeDoubles(adflowRealVals)
-        use iso_fortran_env, only : real64
-        implicit none
-        real(kind=realType) :: adflowRealVals(:)
-        real(kind=real64) :: dbles(size(adflowrealvals))
-        integer :: i
-        dbles = adflowrealvals
-        write(fileID) dbles
+                file = 11
+                ! Open file on root proc:
+                if (myid == 0) then
+                    open (unit=file, file=trim(fname))
+
+                    ! Write Header Information
+                    write (file, *) "Title = ""ADflow Slice Data"""
+                    write (file, "(a)", advance="no") "Variables = "
+                    write (file, "(a)", advance="no") " ""CoordinateX"" "
+                    write (file, "(a)", advance="no") " ""CoordinateY"" "
+                    write (file, "(a)", advance="no") " ""CoordinateZ"" "
+                    write (file, "(a)", advance="no") " ""XoC"" "
+                    write (file, "(a)", advance="no") " ""YoC"" "
+                    write (file, "(a)", advance="no") " ""ZoC"" "
+
+                    ! Number of additional variables on slices
+                    call numberOfSurfSolVariables(nSolVar)
+                    allocate (solNames(nSolVar))
+                    call surfSolNames(solNames)
+
+                    ! Write the rest of the variables
+                    do i = 1, nSolVar
+                        write (file, "(a,a,a)", advance="no") """", trim(solNames(i)), """ "
+                    end do
+
+                    write (file, "(1x)")
+                    deallocate (solNames)
+                end if
+                call mpi_bcast(nSolVar, 1, adflow_integer, 0, adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Slices are created on walls and walls only. Retrieve the
+                ! points, connectivity and familyID of all the walls.
+                wallList => BCFamGroups(iBCGroupWalls)%famList
+                call getSurfaceSize(sizeNode, sizeCell, wallList, size(wallList), .True.)
+                allocate (pts(3, sizeNode), conn(4, sizeCell), elemFam(sizeCell), cgnsBlockID(sizeCell))
+                call getSurfaceConnectivity(conn, cgnsBlockID, sizeCell, wallList, size(wallList), .True.)
+                call getSurfacePoints(pts, sizeNode, sps, wallList, size(wallList), .True.)
+                call getSurfaceFamily(elemFam, sizeCell, wallList, size(wallList), .True.)
+
+                ! Integration is performed in parallel
+                do i = 1, nParaSlices
+                    call integrateSlice(paraSlices(i, sps), globalSlice, &
+                                        nodalValues(:, :, sps), nSolVar, .True.)
+                    if (myid == 0) then
+                        call writeSlice(globalSlice, file, nSolVar)
+                    end if
+                    call destroySlice(globalSlice)
+                end do
 
-      end subroutine writeDoubles
+                do i = 1, nAbsSlices
+                    ! 'Destroy' the slice...just dealloc the allocated data.
+                    ! before we do, save the family list
+                    allocate (famList(size(absSlices(i, sps)%famList)))
+                    famList = absSlices(i, sps)%famList
+                    call destroySlice(absSlices(i, sps))
+
+                    ! Make new one in the same location
+                    call createSlice(pts, conn, elemFam, absSlices(i, sps), &
+                                     absSlices(i, sps)%pt, absSlices(i, sps)%normal, absSlices(i, sps)%dir_vec, &
+                                     absSlices(i, sps)%use_dir, absSlices(i, sps)%sliceName, famList)
+
+                    call integrateSlice(absSlices(i, sps), globalSlice, &
+                                        nodalValues(:, :, sps), nSolVar, .True.)
+                    if (myid == 0) then
+                        call writeSlice(globalSlice, file, nSolVar)
+                    end if
+                    call destroySlice(globalSlice)
+                    deallocate (famList)
+                end do
 
-      subroutine writeInteger(adflowIntegerVal)
-        use iso_fortran_env, only : int32
+                !Close file on root proc
+                if (myid == 0) then
+                    close (file)
+                end if
+            end do
+
+            if (myID == 0 .and. printIterations) then
+                print "(a)", "# Slices file(s) written"
+                print "(a)", "#"
+            end if
+        end if testwriteSlices
+    end subroutine writeSlicesFile
+
+    subroutine writeLiftDistributionFile(fileName, nodalValues)
+        !
+        !
+        !       This subroutine is intended to be called from python.
+        !       This routine will write the added lift distributions
+        !       to the (ascii) tecplot file fileName. ASCII files are
+        !       used for siplicity since very little informatin is actually
+        !       written.
+        use constants
+        use communication
+        use outputMod
+        use inputPhysics
+        use inputTimeSpectral
+        use inputIteration
+        use surfaceFamilies
         implicit none
-        integer(kind=intType) :: adflowIntegerVal
-        integer(kind=int32) :: int
 
-        int = adflowIntegerVal
-        write(fileID) int
-      end subroutine writeInteger
+        ! Input Params
+        character(len=*), intent(in) :: fileName
+        real(kind=realType), dimension(:, :, :), allocatable :: nodalValues
 
-      subroutine writeIntegers(adflowIntegerVals)
-        use iso_fortran_env, only : int32
-        implicit none
-        integer(kind=intType) :: adflowIntegerVals(:), i
-        integer(kind=int32) :: ints(size(adflowintegervals))
-        ints = adflowintegervals
-        write(fileID) ints
+        ! Working parameters
+        integer(kind=intType) :: file, sps
+        character(len=maxStringLen) :: fname
+        character(len=7) :: intString
+
+        ! Only write if we actually have lift distributions
+        testwriteLiftDists: if (nLiftDists > 0) then
+
+            if (myID == 0 .and. printIterations) then
+                print "(a)", "#"
+                print "(a)", "# Writing lift distribution file(s) ..."
+            end if
+
+            do sps = 1, nTimeIntervalsSpectral
+
+                ! If it is time spectral we need to agument the filename
+                if (equationMode == timeSpectral) then
+                    write (intString, "(i7)") sps
+                    intString = adjustl(intString)
+                    fname = trim(fileName)//"Spectral"//trim(intString)
+                else
+                    fname = fileName
+                end if
 
-      end subroutine writeIntegers
+                file = 11
+                ! Open file on root proc:
+                if (myid == 0) then
+                    open (unit=file, file=trim(fname))
+                end if
 
-      subroutine writeString(str)
+                call writeLiftDistributions(sps, file, nodalValues(:, :, sps))
 
+                ! Close file on root proc
+                if (myid == 0) then
+                    close (file)
+                end if
+            end do
+
+            if (myID == 0 .and. printIterations) then
+                print "(a)", "# Lift distribution file(s) written"
+                print "(a)", "#"
+            end if
+
+        end if testwriteLiftDists
+    end subroutine writeLiftDistributionFile
+
+    subroutine writeLiftDistributions(sps, fileID, nodalValues)
+        !
+        !       This subroutine writes the liftdistribution for the specified
+        !       spectral instance. It is assumed that the required file handles
+        !       are already open and can be written to
+        use constants
+        use communication
+        use outputMod
+        use su_cgns
+        use cgnsNames
+        use surfaceFamilies, only: BCFamGroups, BCFamExchange
+        use surfaceUtils
+        use utils, only: EChk
+        use sorting, only: famInList
         implicit none
 
-        character(len=*)::  str
-        integer(kind=intType) :: i
+        ! Input parameters
+        integer(kind=intType), intent(in) :: sps, fileID
+        real(kind=realType), dimension(:, :), intent(in) :: nodalValues
+
+        real(kind=realType), dimension(3) :: xmin, xmax, xmin_local, xmax_local
+        real(kind=realType), parameter :: tol = 1e-8
+        type(liftDist), pointer :: d
+        integer(kind=intType) :: i, j, ii, jj, iDist, ierr
+        real(kind=realType), dimension(:, :), allocatable :: values
+        character(len=maxCGNSNameLen), dimension(:), allocatable :: liftDistNames
+        real(kind=realType) :: dmin, dmax, sumL, sumD, sumM, span, delta, xCur(3)
+        type(slice) :: localSlice, globalSlice
+        integer(kind=intType) :: sizeNode, sizeCell
+        integer(kind=intType), dimension(:), pointer :: wallList
+        real(kind=realType), dimension(:, :), allocatable :: pts
+        integer(kind=intType), dimension(:, :), allocatable :: conn
+        integer(kind=intType), dimension(:), allocatable :: elemFam, cgnsBlockID
+
+        ! Slices are created on walls and walls only. Retrieve the
+        ! points, connectivity and familyID of all the walls.
+        wallList => BCFamGroups(iBCGroupWalls)%famList
+        call getSurfaceSize(sizeNode, sizeCell, wallList, size(wallList), .True.)
+        allocate (pts(3, sizeNode), conn(4, sizeCell), elemFam(sizeCell), cgnsBlockID(sizeCell))
+        call getSurfaceConnectivity(conn, cgnsBlockID, sizeCell, wallList, size(wallList), .True.)
+        call getSurfacePoints(pts, sizeNode, sps, wallList, size(wallList), .True.)
+        call getSurfaceFamily(elemFam, sizeCell, wallList, size(wallList), .True.)
+
+        do iDist = 1, nLiftDists
+
+            d => liftDists(iDist)
+            xmin_local = huge(real(zero))
+            xmax_local = -huge(real(zero))
+
+            ! Get the bounding box for the entire geometry we have been slicing.
+            elemLoop: do i = 1, size(conn, 2)
+                if (famInList(elemFam(i), d%FamList)) then
+
+                    ! Extract each of the 4 nodes on this quad:
+                    do jj = 1, 4
+                        xCur = pts(:, conn(jj, i))
+                        ! Check the max/min on each index
+                        do ii = 1, 3
+                            xmin_local(ii) = min(xmin_local(ii), xCur(ii))
+                            xmax_local(ii) = max(xmax_local(ii), xCur(ii))
+                        end do
+                    end do
+                end if
+            end do elemLoop
+
+            ! Globalize all min/max values.
+            call mpi_allreduce(xmin_local, xmin, 3, adflow_real, MPI_MIN, &
+                               adflow_comm_world, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call mpi_allreduce(xmax_local, xmax, 3, adflow_real, MPI_MAX, &
+                               adflow_comm_world, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            d%delta = (xMax(d%normal_ind) - xMin(d%normal_ind)) / dble((d%nSegments - 1))
+            allocate (d%slicePts(3, d%nSegments))
+
+            ! Zero out all segments
+            d%slicePts = zero
+
+            ! These are the variable names for the lift distribution:
+            allocate (liftDistNames(nLiftDistVar))
+            ! Set the names here
+            liftDistNames(1) = "eta"
+            liftDistNames(2) = cgnsCoorX
+            liftDistNames(3) = cgnsCoorY
+            liftDistNames(4) = cgnsCoorZ
+            liftDistNames(5) = "Lift"
+            liftDistNames(6) = "Drag"
+            liftDistNames(7) = "Moment"
+            liftDistNames(8) = "Normalized Lift"
+            liftDistNames(9) = "Normalized Drag"
+            liftDistNames(10) = "Normalized Moment"
+            liftDistNames(11) = "CL"
+            liftDistNames(12) = "CD"
+            liftDistNames(13) = "CM"
+            liftDistNames(14) = "CLp"
+            liftDistNames(15) = "CDp"
+            liftDistNames(16) = "CMp"
+            liftDistNames(17) = "CLv"
+            liftDistNames(18) = "CDv"
+            liftDistNames(19) = "CMv"
+            liftDistNames(20) = "Elliptical"
+            liftDistNames(21) = "thickness"
+            liftDistNames(22) = "twist"
+            liftDistNames(23) = "chord"
+            liftDistNames(24) = "fx"
+            liftDistNames(25) = "fy"
+            liftDistNames(26) = "fz"
+
+            ! Only write header info for first distribution only
+            if (myid == 0) then
+                if (iDist == 1) then
+                    write (fileID, *) "Title= ""ADflow Lift Distribution Data"""
+                    write (fileID, "(a)", advance="no") "Variables = "
+                    do i = 1, nLIftDistVar ! here we just write var-names in the header
+                        write (fileID, "(a,a,a)", advance="no") """", trim(liftDistNames(i)), """ "
+                    end do
+                    write (fileID, "(1x)")
+                end if
 
-        do i=1,len(str)
-           write(fileID) iachar(str(i:i))
+                write (fileID, "(a,a,a)") "Zone T= """, trim(d%distName), """"
+                write (fileID, *) "I= ", d%nSegments
+                write (fileID, *) "DATAPACKING=BLOCK"
+            end if
+
+            allocate (values(d%nSegments, nLiftDistVar))
+            values = zero
+
+            do i = 1, d%nSegments
+                if (i == 1) then
+                    d%slicePts(d%normal_ind, i) = xMin(d%normal_ind) + tol
+                else if (i == d%nSegments) then
+                    d%slicePts(d%normal_ind, i) = xMin(d%normal_ind) + (i - 1) * d%delta - tol
+                else
+                    d%slicePts(d%normal_ind, i) = xMin(d%normal_ind) + (i - 1) * d%delta
+                end if
+            end do
+
+            ! Scaled Eta values
+            dmin = minVal(d%slicePts(d%normal_ind, :))
+            dmax = maxval(d%slicePts(d%normal_ind, :))
+
+            ! next line we save ETA as the first column. This corresponds to
+            ! when we open a lift file, e.g. fc_000_lift.dat where we find
+            ! ETA as the first entry... (also visible through tec360)
+            values(:, 1) = (d%slicePts(d%normal_ind, :) - dmin) / (dmax - dmin)
+            ! Coordinate Varaibles
+            if (d%normal_ind == 1) then! X slices
+                values(:, 2) = d%slicePts(1, :)
+            else if (d%normal_ind == 2) then ! Y slices
+                values(:, 3) = d%slicePts(2, :)
+            else if (d%normal_ind == 3) then ! Z slices
+                values(:, 4) = d%slicePts(3, :)
+            end if
+
+            !      as mentioned above nSegments are number of nodes in the
+            !      distribution, e.g. maybe you have asked for 150 points out along
+            !      your wing where you want the lift. Ok, we now for each of these
+            !      150 points have to create a slice and integrate our metrics around
+            !      said slice, save the integrated metrics (e.g. lift, drag) for the
+            !      current point. We then move on to the next point, make a new slice
+            !      get the new point's metrics and save them ... we do this 150 times
+            do i = 1, d%nSegments
+                ! Make new one in the same location
+                ! we just pass the normal again as the direction vector because its not used
+                call createSlice(pts, conn, elemFam, localSlice, d%slicePts(:, i), &
+                                 d%normal, d%normal, .False., "does_not_matter", d%famList)
+                call integrateSlice(localSlice, globalSlice, nodalValues, 0, .False.)
+
+                ! Total lift, drag, and moment
+                values(i, 5) = globalSlice%pL + globalSlice%vL
+                values(i, 6) = globalSlice%pD + globalSlice%vD
+                values(i, 7) = globalSlice%pM + globalSlice%vM
+
+                ! Total CL, CD, and CM
+                values(i, 11) = globalSlice%CLp + globalSlice%CLv
+                values(i, 12) = globalSlice%CDp + globalSlice%CDv
+                values(i, 13) = globalSlice%CMp + globalSlice%CMv
+
+                ! Pressure lift, drag, and moment coefficients
+                values(i, 14) = globalSlice%CLp
+                values(i, 15) = globalSlice%CDp
+                values(i, 16) = globalSlice%CMp
+
+                ! Viscous lift, drag, and moment coefficients
+                values(i, 17) = globalSlice%CLv
+                values(i, 18) = globalSlice%CDv
+                values(i, 19) = globalSlice%CMv
+
+                ! t/c, twist, chord
+                values(i, 21) = globalSlice%thickness
+                values(i, 22) = globalSlice%twist
+                values(i, 23) = globalSlice%chord
+
+                ! here we now save our new, added values that we have desired to use
+                values(i, 24) = globalSlice%fx
+                values(i, 25) = globalSlice%fy
+                values(i, 26) = globalSlice%fz
+
+                call destroySlice(localSlice)
+                call destroySlice(globalSlice)
+
+            end do
+
+            ! Sum up the lift, drag, and moment values from the slices:
+            sumL = zero
+            sumD = zero
+            sumM = zero
+            do i = 1, d%nSegments - 1
+                sumL = sumL + half * (values(i, 5) + values(i + 1, 5))
+                sumD = sumD + half * (values(i, 6) + values(i + 1, 6))
+                sumM = sumM + half * (values(i, 7) + values(i + 1, 7))
+            end do
+
+            ! Now compute the normalized lift, drag and elliptical since
+            ! we know the sum. Note delta is non-dimensional!
+            delta = values(2, 1) - values(1, 1)
+
+            ! This is the "nondimensional" span...it basically takes into account if you have
+            ! a wing not at the sym plane
+            span = maxval(values(:, 1)) - minval(values(:, 1))
+            dmin = minval(values(:, 1))
+            sumL = sumL * delta
+            sumD = sumD * delta
+            sumM = sumM * delta
+
+            do i = 1, d%nSegments
+
+                ! Normalized Lift, Drag, and Moment
+                values(i, 8) = values(i, 5) / sumL
+                values(i, 9) = values(i, 6) / sumD
+                values(i, 10) = values(i, 7) / abs(sumM)
+
+                ! elliptical value
+                values(i, 20) = four / pi / span * sqrt(one - (values(i, 1) - dmin)**2 / span**2)
+            end do
+
+            ! Write all variables in block format
+            if (myid == 0) then
+                do j = 1, nLiftDistVar
+                    do i = 1, d%nSegments
+                        write (fileID, sci6) values(i, j)
+                    end do
+                end do
+            end if
+
+            ! Deallocate slice list and point list
+            deallocate (d%slicePts)
+
+            ! Destroy temp variables
+            deallocate (liftDistNames, values)
         end do
-        write(fileID) 0
 
-      end subroutine writeString
+    end subroutine writeLiftDistributions
+
+    subroutine writeTecplotSurfaceFile(fileName, famList)
+        use constants
+        use communication, only: myid, adflow_comm_world, nProc
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputPhysics, only: equationMode
+        use inputIteration, only: printIterations
+        use inputIO, only: precisionsurfgrid, precisionsurfsol
+        use outputMod, only: surfSolNames, numberOfSurfSolVariables
+        use surfaceFamilies, onlY: BCFamExchange, famNames, familyExchange
+        use utils, only: EChk, setPointers, setBCPointers
+        use BCPointers, only: xx
+        use sorting, only: famInList
+        use extraOutput, only: surfWriteBlank
+        use oversetData, only: zipperMesh, zipperMeshes
+        use surfaceUtils
+#include <petsc/finclude/petsc.h>
+        use petsc
+        implicit none
+
+        ! Input Params
+        character(len=*), intent(in) :: fileName
+        integer(kind=intType), intent(in), dimension(:) :: famList
+
+        ! Working parameters
+        integer(kind=intType) :: i, j, nn, mm, fileID, iVar, ii, ierr, iSize
+        integer(Kind=intType) :: nSolVar, iBeg, iEnd, jBeg, jEnd, sps, sizeNode, sizeCell
+        integer(kind=intType) :: iBCGroup, iFam, iProc, nCells, nNodes, nCellsToWrite, iZone, lastZoneSharing
+        character(len=maxStringLen) :: fname
+        character(len=7) :: intString
+        integer(kind=intType), dimension(:), allocatable :: nodeSizes, nodeDisps
+        integer(kind=intType), dimension(:), allocatable :: cellSizes, cellDisps
+        character(len=maxCGNSNameLen), dimension(:), allocatable :: solNames
+        real(kind=realType), dimension(:, :), allocatable :: nodalValues
+        integer(kind=intType), dimension(:, :), allocatable :: conn, localConn
+        real(kind=realType), dimension(:, :), allocatable :: vars
+        integer(kind=intType), dimension(:), allocatable :: mask, elemFam, localElemFam, cgnsBlockID
+        logical :: blankSave, BCGroupNeeded, dataWritten
+        type(zipperMesh), pointer :: zipper
+        type(familyExchange), pointer :: exch
+        if (myID == 0 .and. printIterations) then
+            print "(a)", "#"
+            print "(a)", "# Writing tecplot surface file(s) ..."
+        end if
+
+        ! Number of surface variables. Note that we *explictly*
+        ! remove the potential for writing the surface blanks as
+        ! these are not necessary for the tecplot IO as we write the
+        ! zipper mesh directly. We must save and restore the
+        ! variable in case the CGNS otuput still wants to write it.
+        blankSave = surfWriteBlank
+        surfWriteBlank = .False.
+        call numberOfSurfSolVariables(nSolVar)
+        allocate (solNames(nSolVar))
+        call surfSolNames(solNames)
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+
+            ! If it is time spectral we need to agument the filename
+            if (equationMode == timeSpectral) then
+                write (intString, "(i7)") sps
+                intString = adjustl(intString)
+                fname = trim(fileName)//"Spectral"//trim(intString)
+            else
+                fname = fileName
+            end if
+
+            fileID = 11
+            ! Open file on root proc:
+
+            if (myid == 0) then
+                open (unit=fileID, file=trim(fname), form='UNFORMATTED', access='stream', status='replace')
+
+                ! Tecplot magic number
+                write (fileID) "#!TDV112"
+
+                ! Integer value of 1 (4 bytes)
+                call writeInteger(1)
+
+                ! Integer for FileType: 0 = Full, 1= Grid, 2 = Solution
+                call writeInteger(0)
+
+                ! Write the title of the file
+                call writeString("ADflow Surface Solution Data")
+
+                ! Write the number of variable names
+                call writeInteger(3 + nSolVar)
+
+                ! Write the variable names
+                call writeString("CoordinateX")
+                call writeString("CoordinateY")
+                call writeString("CoordinateZ")
+
+                ! Write the rest of the variables
+                do i = 1, nSolVar
+                    call writeString(trim(solNames(i)))
+                end do
 
-    end subroutine writeTecplotSurfaceFile
+                deallocate (solNames)
 
-  subroutine initializeLiftDistributionData
-    use constants
-    use communication
-    use blockPointers
-    use inputPhysics
-    use outputMod
-    use inputTimeSpectral
-    use surfacefamilies
-    implicit none
+            end if
 
-    ! Working Variables
-    integer(kind=intType) :: nPts, nCells, sps
-
-    if (liftDistInitialized) then
-       return
-    else
-
-       ! Data for the marching squares method: Which edges are cut by
-       ! the contour. We haven't dealt with the case of an ambiguous
-       ! contour which is case 6 and 11. Since most of the slices we are
-       ! doing are planes this won't matter.
-       msCon1( 1 , : ) = (/ 0 , 0 , 0 , 0 , 0 /)
-       msCon1( 2 , : ) = (/ 1 , 4 , 0 , 0 , 0 /)
-       msCon1( 3 , : ) = (/ 1 , 2 , 0 , 0 , 0 /)
-       msCon1( 4 , : ) = (/ 4 , 2 , 0 , 0 , 0 /)
-       msCon1( 5 , : ) = (/ 2 , 3 , 0 , 0 , 0 /)
-       msCon1( 6 , : ) = (/ 2 , 3 , 0 , 0 , 0 /) ! Should be 2, 3, 1, 4
-       msCon1( 7 , : ) = (/ 1 , 3 , 0 , 0 , 0 /)
-       msCon1( 8 , : ) = (/ 4 , 3 , 0 , 0 , 0 /)
-       msCon1( 9 , : ) = (/ 4 , 3 , 0 , 0 , 0 /)
-       msCon1( 10, : ) = (/ 1 , 3 , 0 , 0 , 0 /)
-       msCon1( 11 , : ) = (/ 2 , 3 , 0 , 0 , 0 /) ! Should be 2, 3, 1, 4
-       msCon1( 12 , : ) = (/ 2 , 3 , 0 , 0 , 0 /)
-       msCon1( 13 , : ) = (/ 4 , 2 , 0 , 0 , 0 /)
-       msCon1( 14 , : ) = (/ 1 , 2 , 0 , 0 , 0 /)
-       msCon1( 15 , : ) = (/ 1 , 4 , 0 , 0 , 0 /)
-       msCon1( 16 , : ) = (/ 0 , 0 , 0 , 0 , 0 /)
-
-       msCon2(1, :) = (/1, 2/)
-       msCon2(2, :) = (/2, 3/)
-       msCon2(3, :) = (/3, 4/)
-       msCon2(4, :) = (/4, 1/)
-       liftDistInitialized = .True.
-    end if
-
-  end subroutine initializeLiftDistributionData
-
-  subroutine computeSurfaceOutputNodalData(exch, zipper, includeTractions, nodalValues)
-    !
-    !       This purpose of this subroutine is to compute all nodal values
-    !
-    use constants
-    use communication
-    use inputPhysics
-    use blockPointers
-    use surfaceFamilies, only : BCFamGroups, familyExchange
-    use outputMod, only : storeSurfSolInBuffer, numberOfSurfSolVariables, &
-         surfSolNames
-    use surfaceUtils
-    use utils, only : setPointers, EChk
-    use sorting, only : famInList
-    use oversetData, only : zipperMesh
-#include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-    ! Input Param
-    type(familyExchange) :: exch
-    logical :: includeTractions
-    real(kind=realType), dimension(:, :), intent(inout) :: nodalValues
-    type(zipperMesh) :: zipper
-    ! Working params
-    integer(kind=intType) :: i, j, ii, jj, kk, nn, mm, iSol, ierr, nPts, nCells
-    integer(kind=intType) :: nFields, nSolVar, iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
-    integer(kind=intType) :: sizeNode, sizeCell, iDim
-    integer(kind=intType), dimension(3,2) :: cellRangeCGNS
-    character(len=maxCGNSNameLen), dimension(:), allocatable :: solNames
-    real(kind=realType), dimension(:), allocatable :: buffer
-    real(kind=realType), dimension(:), pointer :: weightPtr, localPtr
-    real(kind=realType), dimension(:, :), allocatable :: tmp
-    logical :: viscousSubFace
-
-    nodalValues = zero
-
-    call numberOfSurfSolVariables(nSolVar)
-    allocate(solNames(nSolVar))
-    call surfSolNames(solNames)
-
-    ! The tractions have a sepcial routine so call that first before we
-    ! mess with the family information.
-
-    if (includeTractions) then
-
-       ! And put the traction where it needs to go if necessary.  Note that
-       ! this computation will only work corectly when the wallExchange
-       ! which include all wallgroups is used.
-
-       call computeNodalTractions(exch%sps)
-       ii = 0
-       do nn=1, nDom
-          call setPointers(nn, 1_intType, exch%sps)
-          do mm=1, nBocos
-             iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-             jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-
-             if (famInList(BCdata(mm)%famID, exch%famList)) then
-                do j=jBeg, jEnd
-                   do i=iBeg, iEnd
-                      ii = ii + 1
-                      nodalValues(ii, 4) = BCData(mm)%Tp(i, j, 1)
-                      nodalValues(ii, 5) = BCData(mm)%Tp(i, j, 2)
-                      nodalValues(ii, 6) = BCData(mm)%Tp(i, j, 3)
-
-                      nodalValues(ii, 7) = BCData(mm)%Tv(i, j, 1)
-                      nodalValues(ii, 8) = BCData(mm)%Tv(i, j, 2)
-                      nodalValues(ii, 9) = BCData(mm)%Tv(i, j, 3)
-                   end do
+            ! First pass through to generate and write header information
+            masterBCLoop1: do iBCGroup = 1, nFamExchange
+
+                ! Pointers for easier reading
+                exch => BCFamExchange(iBCGroup, sps)
+                zipper => zipperMeshes(iBCGroup)
+
+                ! First thing we do is figure out if we actually need to do
+                ! anything with this BCgroup at all. If none the requested
+                ! families are in this BCExcahnge we don't have to do
+                ! anything.
+
+                BCGroupNeeded = .False.
+                do i = 1, size(famList)
+                    if (famInLIst(famList(i), exch%famList)) then
+                        BCGroupNeeded = .True.
+                    end if
                 end do
-             end if
-          end do
-       end do
 
-       ! Not quite dont yet with the nodal tractions; we need to send
-       ! the nodal tractions that the duplices the *zipper* mesh needs
-       ! to the root proc. We have a special scatter for this.
+                ! Keep going if we don't need this.
+                if (.not. BCGroupNeeded) then
+                    cycle
+                end if
 
-       if (zipper%allocated) then
+                ! Get the sizes of this BCGroup
+                call getSurfaceSize(sizeNode, sizeCell, exch%famList, size(exch%famList), .True.)
+                call mpi_reduce(sizeNode, nNodes, 1, adflow_integer, MPI_SUM, 0, adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-          ! Loop over the 6 tractions
-          do iDim=1,6
+                ! Now gather up the cell family info.
+                allocate (cellDisps(0:nProc), cellSizes(nProc))
 
-             ! Copy the values into localPtr
-             call VecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
+                call mpi_gather(sizeCell, 1, adflow_integer, &
+                                cellSizes, 1, adflow_integer, 0, adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-             do i=1,exch%nNodes
-                localPtr(i) = nodalValues(i, iDim+3)
-             end do
+                allocate (localElemFam(sizeCell))
+                call getSurfaceFamily(localElemFam, sizeCell, exch%famList, size(exch%famList), .True.)
 
-             call VecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
+                if (myid == 0) then
+                    cellDisps(0) = 0
+                    do iProc = 1, nProc
+                        cellDisps(iProc) = cellDisps(iProc - 1) + cellSizes(iProc)
+                    end do
+                    nCells = sum(cellSizes)
+                    allocate (elemFam(nCells))
+                end if
 
-             ! Now use the *zipper* scatter
-             call VecScatterBegin(zipper%scatter, exch%nodeValLocal,&
-                  zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
+                call mpi_gatherv(localElemFam, &
+                                 size(localElemFam), adflow_integer, elemFam, &
+                                 cellSizes, cellDisps, adflow_integer, 0, adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Deallocate temp memory
+                deallocate (localElemFam, cellSizes, cellDisps)
+
+                rootProc: if (myid == 0 .and. nCells > 0) then
+                    do iFam = 1, size(exch%famList)
+
+                        ! Check if we have to write this one:
+                        famInclude: if (famInList(exch%famList(iFam), famList)) then
+                            nCellsToWrite = 0
+                            do i = 1, nCells
+                                ! Check if this elem is to be included
+                                if (elemFam(i) == exch%famList(iFam)) then
+                                    nCellsToWrite = nCellsToWrite + 1
+                                end if
+                            end do
+
+                            if (nCellsToWrite > 0) then
+                                call writeFloat(zoneMarker) ! Zone Marker
+                                call writeString(trim(famNames(exch%famList(iFam)))) ! Zone Name
+                                call writeInteger(-1) ! Parent Zone (-1 for None)
+                                call writeInteger(-1) ! Strand ID (-2 for tecplot assignment)
+                                call writeDouble(zero) ! Solution Time
+                                call writeInteger(-1)! Zone Color (Not used anymore) (-1)
+                                call writeInteger(3) ! Zone Type (3 for FEQuadrilateral)
+                                call writeInteger(0)! Data Packing (0 for block)
+                                call writeInteger(0)! Specify Var Location (0=don't specify, all at nodes)
+                                call writeInteger(0) ! Are raw 1-to-1 face neighbours supplied (0 for false)
+                                call writeInteger(nNodes) ! Number of nodes in FE Zone
+                                call writeInteger(nCellsToWrite) ! Number of elements in FE Zone
+                                call writeInteger(0) ! ICellDim, jCellDim, kCellDim (for future use, set to 0)
+                                call writeInteger(0)
+                                call writeInteger(0)
+                                call writeInteger(0) ! Aux data specified (0 for no)
+                            end if
+                        end if famInclude
+                    end do
+                end if rootProc
+                if (myid == 0) then
+                    deallocate (elemFam)
+                end if
+            end do masterBCLoop1
 
-             call VecScatterEnd(zipper%scatter, exch%nodeValLocal,&
-                  zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
+            if (myid == 0) then
+                call writeFloat(dataSectionMarker) ! Eohmarker to mark difference between header and data section
+            end if
 
-             ! Copy the zipper values out on the root proc
-             if (myid == 0) then
-                call VecGetArrayF90(zipper%localVal, localPtr, ierr)
-                call EChk(ierr,__FILE__,__LINE__)
+            ! Now do everything again but for real.
+            masterBCLoop: do iBCGroup = 1, nFamExchange
 
-                do i=1,size(localPtr)
-                   nodalValues(exch%nNodes+i, iDim+3) = localPtr(i)
+                ! Pointers for easier reading
+                exch => BCFamExchange(iBCGroup, sps)
+                zipper => zipperMeshes(iBCGroup)
+
+                ! First thing we do is figure out if we actually need to do
+                ! anything with this BCgroup at all. If none the requested
+                ! families are in this BCExcahnge we don't have to do
+                ! anything.
+
+                BCGroupNeeded = .False.
+                do i = 1, size(famList)
+                    if (famInList(famList(i), exch%famList)) then
+                        BCGroupNeeded = .True.
+                    end if
                 end do
 
-                call VecRestoreArrayF90(zipper%localVal, localPtr, ierr)
-                call EChk(ierr,__FILE__,__LINE__)
-             end if
-          end do
-       end if
-    end if
-
-    ! Get the current set of surface points for the family we just set.
-    call getSurfaceSize(sizeNode, sizeCell, exch%famList, size(exch%famlist), .True.)
-    allocate(tmp(3, sizeNode))
-    call getSurfacePoints(tmp, sizeNode, exch%sps, exch%famList, size(exch%famList), .True.)
-
-    do i=1, sizeNode
-       nodalValues(i, 1:3) = tmp(1:3, i)
-    end do
-    deallocate(tmp)
-    ! For the remainder of the variables, use arithematic averaging.
-    call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    localPtr = zero
-    ! ii is the index into the pointer array
-    ii = 0
-    do nn=1, nDom
-       call setPointers(nn, 1_intType, exch%sps)
-       do mm=1, nBocos
-          iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-          jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-          ni = iEnd - iBeg + 1
-          nj = jEnd - jBeg + 1
-          if (famInList(BCdata(mm)%famID, exch%FamList)) then
-             do j=0,nj-2
-                do i=0,ni-2
-
-                   ! Scatter 1 to each node.
-                   ind(1) = ii + (j  )*ni + i + 1
-                   ind(2) = ii + (j  )*ni + i + 2
-                   ind(3) = ii + (j+1)*ni + i + 2
-                   ind(4) = ii + (j+1)*ni + i + 1
-                   do jj=1,4
-                      localPtr(ind(jj)) = localPtr(ind(jj)) + one
-                   end do
+                ! Keep going if we don't need this.
+                if (.not. BCGroupNeeded) then
+                    cycle
+                end if
+
+                ! Get the sizes of this BCGroup
+                call getSurfaceSize(sizeNode, sizeCell, exch%famList, size(exch%famList), .True.)
+                allocate (nodalValues(max(sizeNode, 1), nSolVar + 3 + 6))
+                ! Compute the nodal data
+
+                call computeSurfaceOutputNodalData(BCFamExchange(iBCGroup, sps), &
+                                                   zipperMeshes(iBCGroup), .False., nodalValues(:, :))
+
+                ! Gather up the number of nodes to be set to the root proc:
+                allocate (nodeSizes(nProc), nodeDisps(0:nProc))
+                nodeSizes = 0
+                nodeDisps = 0
+
+                call mpi_allgather(sizeNode, 1, adflow_integer, nodeSizes, 1, adflow_integer, &
+                                   adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+                nodeDisps(0) = 0
+                do iProc = 1, nProc
+                    nodeDisps(iProc) = nodeDisps(iProc - 1) + nodeSizes(iProc)
                 end do
-             end do
-             ii = ii + ni*nj
-          end if
-       end do
-    end do
 
-    call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+                iSize = 3 + 6 + nSolVar
+                if (myid == 0) then
+                    nNodes = sum(nodeSizes)
+                else
+                    nNodes = 1
+                end if
 
-    ! Globalize the area
-    call vecSet(exch%sumGlobal, zero, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+                ! Only root proc actually has any space allocated
+                allocate (vars(nNodes, iSIze))
 
-    call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
-         exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+                ! Gather values to the root proc.
+                do i = 1, iSize
+                    call mpi_gatherv(nodalValues(:, i), sizeNode, &
+                                 adflow_real, vars(:, i), nodeSizes, nodeDisps, adflow_real, 0, adflow_comm_world, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end do
+                deallocate (nodalValues)
 
-    call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
-         exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+                ! Now gather up the connectivity
+                allocate (cellDisps(0:nProc), cellSizes(nProc))
 
-    ! Now compute the inverse of the weighting so that we can multiply
-    ! instead of dividing.
+                call mpi_gather(sizeCell, 1, adflow_integer, &
+                                cellSizes, 1, adflow_integer, 0, adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-    call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+                if (allocated(cgnsBlockID)) then
+                    deallocate (cgnsBlockID)
+                end if
 
-    localPtr = one/localPtr
+                allocate (localConn(4, sizeCell), localElemFam(sizeCell), cgnsBlockID(sizeCell))
+                call getSurfaceConnectivity(localConn, cgnsBlockID, sizeCell, exch%famList, size(exch%famList), .True.)
+                call getSurfaceFamily(localElemFam, sizeCell, exch%famList, size(exch%famList), .True.)
+
+                if (myid == 0) then
+                    cellDisps(0) = 0
+                    do iProc = 1, nProc
+                        cellDisps(iProc) = cellDisps(iProc - 1) + cellSizes(iProc)
+                    end do
+                    nCells = sum(cellSizes)
+                    allocate (conn(4, nCells))
+                    allocate (elemFam(nCells))
+                end if
 
-    call vecRestoreArrayF90(exch%sumGlobal, localPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+                ! We offset the conn array by nodeDisps(iProc) which
+                ! automagically adjusts the connectivity to account for the
+                ! number of nodes from different processors
+
+                call mpi_gatherv(localConn + nodeDisps(myid), &
+                                 4 * size(localConn, 2), adflow_integer, conn, &
+                                 cellSizes * 4, cellDisps * 4, adflow_integer, 0, adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call mpi_gatherv(localElemFam, &
+                                 size(localElemFam), adflow_integer, elemFam, &
+                                 cellSizes, cellDisps, adflow_integer, 0, adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Local values are finished
+                deallocate (localConn, localElemFam)
+                iZone = 0
+                rootProc2: if (myid == 0 .and. nCells > 0) then
+
+                    ! Need zero based for binary output.
+                    conn = conn - 1
+
+                    allocate (mask(nCells))
+                    dataWritten = .False.
+                    do iFam = 1, size(exch%famList)
+
+                        ! Check if we have to write this one:
+                        famInclude2: if (famInList(exch%famList(iFam), famList)) then
+
+                            ! Create a temporary mask
+                            mask = 0
+                            nCellsToWrite = 0
+                            do i = 1, nCells
+                                ! Check if this elem is to be included
+                                if (elemFam(i) == exch%famList(iFam)) then
+                                    mask(i) = 1
+                                    nCellsToWrite = nCellsToWrite + 1
+                                end if
+                            end do
+
+                            actualWrite2: if (nCellsToWrite > 0) then
+
+                                ! Write Zone Data
+                                call writeFloat(zoneMarker)
+
+                                ! Data Type for each variable (1 for float, 2 for double)
+                                do i = 1, 3
+                                    if (precisionSurfGrid == precisionSingle) then
+                                        call writeInteger(1)
+                                    else if (precisionSurfGrid == precisionDouble) then
+                                        call writeInteger(2)
+                                    end if
+                                end do
+
+                                do i = 1, nSolVar
+                                    if (precisionSurfSol == precisionSingle) then
+                                        call writeInteger(1)
+                                    else if (precisionSurfSol == precisionDouble) then
+                                        call writeInteger(2)
+                                    end if
+                                end do
+
+                                call writeInteger(0) ! Has passive variables (0 for no)
+
+                                if (.not. dataWritten) then
+
+                                    ! Save the current 0-based zone so we know which to share with.
+                                    lastZoneSharing = iZone
+
+                                    call writeInteger(0) ! Has variable sharing (0 for no)
+                                    call writeInteger(-1)  ! Zone based zone number to share connectivity (-1 for no sharing)
+
+                                    ! Min/Max Value for coordinates
+                                    do j = 1, 3
+                                        call writeDouble(minval(vars(:, j)))
+                                        call writeDouble(maxval(vars(:, j)))
+                                    end do
+
+                                    ! Min/Max Value for solution variables
+                                    do j = 1, nSolVar
+                                        call writeDouble(minval(vars(:, j + 9)))
+                                        call writeDouble(maxval(vars(:, j + 9)))
+                                    end do
+
+                                    ! Dump the coordinates
+                                    do j = 1, 3
+                                        if (precisionSurfGrid == precisionSingle) then
+                                            call writeFloats(vars(1:nNodes, j))
+                                        else if (precisionSurfSol == precisionDouble) then
+                                            call writeDoubles(vars(1:nNodes, j))
+                                        end if
+                                    end do
+
+                                    ! Dump the solution variables
+                                    do j = 1, nSolVar
+                                        if (precisionSurfSol == precisionSingle) then
+                                            call writeFloats(vars(1:nNodes, j + 9))
+                                        else if (precisionSurfSol == precisionDouble) then
+                                            call writeDoubles(vars(1:nNodes, j + 9))
+                                        end if
+                                    end do
+
+                                else
+                                    ! This will be sharing data from another zone.
+                                    call writeInteger(1) ! Has variable sharing (0 for no)
+
+                                    ! Write out the zone number for sharing the data.
+                                    do j = 1, 3 + nSolVar
+                                        call writeInteger(lastZoneSharing)
+                                    end do
+
+                                    call writeInteger(-1)  ! Zone based zone number to share connectivity (-1 for no sharing
+                                end if
+
+                                ! Dump the connectivity
+                                j = 0
+                                do i = 1, nCells
+                                    ! Check if this elem is to be included
+                                    if (mask(i) == 1) then
+                                        call writeIntegers(conn(:, i))
+                                        j = j + 1
+                                    end if
+                                end do
+
+                                iZone = iZone + 1
+                            end if actualWrite2
+                        end if famInclude2
+                    end do
+                    deallocate (mask)
+                end if rootProc2
+                if (myid == 0) then
+                    deallocate (conn, elemFam)
+                end if
+                deallocate (cellSizes, cellDisps, nodeSizes, nodeDisps, vars)
+            end do masterBCLoop
 
-    varLoop: do iSol=1, nSolVar
+            if (myid == 0) then
+                close (fileID)
+            end if
 
-       ! Extract the poitner to the local array
-       call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+        end do spectralLoop
 
-       ! ii is the continous running element pointer
-       ii = 0
-       localPtr = zero
-       ! Do each variable separately.
-       domainLoop: do nn=1, nDom
-          call setPointers(nn, 1, exch%sps)
+        ! Restore the modified option
+        surfWriteBlank = blankSave
+        if (myID == 0 .and. printIterations) then
+            print "(a)", "# Tecplot surface file(s) written"
+            print "(a)", "#"
+        end if
 
-          bocoLoop: do mm=1, nBocos
-             if (famInList(BCdata(mm)%famID, exch%famList)) then
+    contains
 
-                ! storeSurfSolInBuffer needs to know if the subface is
-                ! viscous or not.
-                viscousSubFace = .False.
-                if (BCType(mm) == NSWallAdiabatic .or. &
-                     BCType(mm) == NSWallIsoThermal) then
-                   viscousSubFace = .true.
-                end if
+        subroutine writeFloat(adflowRealVal)
+            use iso_fortran_env, only: real32
+            implicit none
+            real(kind=realType) :: adflowRealVal
+            real(kind=real32) :: float
+            float = adflowRealval
+            write (fileID) float
+        end subroutine writeFloat
+
+        subroutine writeDouble(adflowRealVal)
+            use iso_fortran_env, only: real64
+            implicit none
+            real(kind=realType) :: adflowRealVal
+            real(kind=real64) :: dble
+            dble = adFlowRealVal
+            write (fileID) dble
+        end subroutine writeDouble
+
+        subroutine writeFloats(adflowRealVals)
+            use iso_fortran_env, only: real32
+            implicit none
+            real(kind=realType) :: adflowRealVals(:)
+            real(kind=real32) :: floats(size(adflowRealVals))
+            integer :: i
+            floats = adflowRealvals
+            write (fileID) floats
+
+        end subroutine writeFloats
+
+        subroutine writeDoubles(adflowRealVals)
+            use iso_fortran_env, only: real64
+            implicit none
+            real(kind=realType) :: adflowRealVals(:)
+            real(kind=real64) :: dbles(size(adflowrealvals))
+            integer :: i
+            dbles = adflowrealvals
+            write (fileID) dbles
+
+        end subroutine writeDoubles
+
+        subroutine writeInteger(adflowIntegerVal)
+            use iso_fortran_env, only: int32
+            implicit none
+            integer(kind=intType) :: adflowIntegerVal
+            integer(kind=int32) :: int
+
+            int = adflowIntegerVal
+            write (fileID) int
+        end subroutine writeInteger
+
+        subroutine writeIntegers(adflowIntegerVals)
+            use iso_fortran_env, only: int32
+            implicit none
+            integer(kind=intType) :: adflowIntegerVals(:), i
+            integer(kind=int32) :: ints(size(adflowintegervals))
+            ints = adflowintegervals
+            write (fileID) ints
+
+        end subroutine writeIntegers
+
+        subroutine writeString(str)
+
+            implicit none
+
+            character(len=*) :: str
+            integer(kind=intType) :: i
+
+            do i = 1, len(str)
+                write (fileID) iachar(str(i:i))
+            end do
+            write (fileID) 0
+
+        end subroutine writeString
+
+    end subroutine writeTecplotSurfaceFile
 
-                ! Determine the cell range *in the original cgns
-                ! ordering*. The reason for this is the storeSurfSolInBufer
-                ! is normally used for writing CGNS and thus it is that
-                ! ording that is important. However, that isn't too hard to
-                ! deal with.
-
-                jBeg = BCData(mm)%jnBeg + 1
-                jEnd = BCData(mm)%jnEnd
-                iBeg = BCData(mm)%inBeg + 1
-                iEnd = BCData(mm)%inEnd
-
-                ! Dummy value of 1 for the face values not set.
-                cellRangeCGNS = 1
-                select case(BCFaceID(mm))
-                case (iMin, iMax)
-                   ! I range meaningless
-                   cellRangeCGNS(2, 1) = iBeg + jBegOr - 1
-                   cellRangeCGNS(2, 2) = iEnd + jBegOr - 1
-
-                   cellRangeCGNS(3, 1) = jBeg + kBegOr - 1
-                   cellRangeCGNS(3, 2) = jEnd + kBegOr - 1
-
-                case (jMin, jMax)
-                   ! J range meaningless
-                   cellRangeCGNS(1, 1) = iBeg + iBegOr - 1
-                   cellRangeCGNS(1, 2) = iEnd + iBegOr - 1
-
-                   cellRangeCGNS(3, 1) = jBeg + kBegOr - 1
-                   cellRangeCGNS(3, 2) = jEnd + kBegOr - 1
-
-                case (kMin, kMax)
-                   ! J range meaningless
-                   cellRangeCGNS(1, 1) = iBeg + iBegOr - 1
-                   cellRangeCGNS(1, 2) = iEnd + iBegOr - 1
-
-                   cellRangeCGNS(2, 1) = jBeg + jBegOr - 1
-                   cellRangeCGNS(2, 2) = jEnd + jBegOr - 1
-                end select
-
-                ! Allocate enough space for the 1D buffer
-                allocate(buffer((iEnd-iBeg+1)*(jEnd-jBeg+1)))
-                kk = 0
-                call storeSurfsolInBuffer(exch%sps, buffer, kk, nn, BCfaceID(mm), &
-                     cellRangeCGNS, solNames(iSol), viscousSubFace, .False.)
-
-                ! Now since the storeSurfSol just put things in a flat
-                ! array and are face based, here we take the 1D face data
-                ! and scatter to the nodes.
-
-                iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-                jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
+    subroutine initializeLiftDistributionData
+        use constants
+        use communication
+        use blockPointers
+        use inputPhysics
+        use outputMod
+        use inputTimeSpectral
+        use surfacefamilies
+        implicit none
+
+        ! Working Variables
+        integer(kind=intType) :: nPts, nCells, sps
+
+        if (liftDistInitialized) then
+            return
+        else
+
+            ! Data for the marching squares method: Which edges are cut by
+            ! the contour. We haven't dealt with the case of an ambiguous
+            ! contour which is case 6 and 11. Since most of the slices we are
+            ! doing are planes this won't matter.
+            msCon1(1, :) = (/0, 0, 0, 0, 0/)
+            msCon1(2, :) = (/1, 4, 0, 0, 0/)
+            msCon1(3, :) = (/1, 2, 0, 0, 0/)
+            msCon1(4, :) = (/4, 2, 0, 0, 0/)
+            msCon1(5, :) = (/2, 3, 0, 0, 0/)
+            msCon1(6, :) = (/2, 3, 0, 0, 0/) ! Should be 2, 3, 1, 4
+            msCon1(7, :) = (/1, 3, 0, 0, 0/)
+            msCon1(8, :) = (/4, 3, 0, 0, 0/)
+            msCon1(9, :) = (/4, 3, 0, 0, 0/)
+            msCon1(10, :) = (/1, 3, 0, 0, 0/)
+            msCon1(11, :) = (/2, 3, 0, 0, 0/) ! Should be 2, 3, 1, 4
+            msCon1(12, :) = (/2, 3, 0, 0, 0/)
+            msCon1(13, :) = (/4, 2, 0, 0, 0/)
+            msCon1(14, :) = (/1, 2, 0, 0, 0/)
+            msCon1(15, :) = (/1, 4, 0, 0, 0/)
+            msCon1(16, :) = (/0, 0, 0, 0, 0/)
+
+            msCon2(1, :) = (/1, 2/)
+            msCon2(2, :) = (/2, 3/)
+            msCon2(3, :) = (/3, 4/)
+            msCon2(4, :) = (/4, 1/)
+            liftDistInitialized = .True.
+        end if
+
+    end subroutine initializeLiftDistributionData
+
+    subroutine computeSurfaceOutputNodalData(exch, zipper, includeTractions, nodalValues)
+        !
+        !       This purpose of this subroutine is to compute all nodal values
+        !
+        use constants
+        use communication
+        use inputPhysics
+        use blockPointers
+        use surfaceFamilies, only: BCFamGroups, familyExchange
+        use outputMod, only: storeSurfSolInBuffer, numberOfSurfSolVariables, &
+                             surfSolNames
+        use surfaceUtils
+        use utils, only: setPointers, EChk
+        use sorting, only: famInList
+        use oversetData, only: zipperMesh
+#include <petsc/finclude/petsc.h>
+        use petsc
+        implicit none
+        ! Input Param
+        type(familyExchange) :: exch
+        logical :: includeTractions
+        real(kind=realType), dimension(:, :), intent(inout) :: nodalValues
+        type(zipperMesh) :: zipper
+        ! Working params
+        integer(kind=intType) :: i, j, ii, jj, kk, nn, mm, iSol, ierr, nPts, nCells
+        integer(kind=intType) :: nFields, nSolVar, iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
+        integer(kind=intType) :: sizeNode, sizeCell, iDim
+        integer(kind=intType), dimension(3, 2) :: cellRangeCGNS
+        character(len=maxCGNSNameLen), dimension(:), allocatable :: solNames
+        real(kind=realType), dimension(:), allocatable :: buffer
+        real(kind=realType), dimension(:), pointer :: weightPtr, localPtr
+        real(kind=realType), dimension(:, :), allocatable :: tmp
+        logical :: viscousSubFace
+
+        nodalValues = zero
+
+        call numberOfSurfSolVariables(nSolVar)
+        allocate (solNames(nSolVar))
+        call surfSolNames(solNames)
+
+        ! The tractions have a sepcial routine so call that first before we
+        ! mess with the family information.
+
+        if (includeTractions) then
+
+            ! And put the traction where it needs to go if necessary.  Note that
+            ! this computation will only work corectly when the wallExchange
+            ! which include all wallgroups is used.
+
+            call computeNodalTractions(exch%sps)
+            ii = 0
+            do nn = 1, nDom
+                call setPointers(nn, 1_intType, exch%sps)
+                do mm = 1, nBocos
+                    iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                    jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+
+                    if (famInList(BCdata(mm)%famID, exch%famList)) then
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                ii = ii + 1
+                                nodalValues(ii, 4) = BCData(mm)%Tp(i, j, 1)
+                                nodalValues(ii, 5) = BCData(mm)%Tp(i, j, 2)
+                                nodalValues(ii, 6) = BCData(mm)%Tp(i, j, 3)
+
+                                nodalValues(ii, 7) = BCData(mm)%Tv(i, j, 1)
+                                nodalValues(ii, 8) = BCData(mm)%Tv(i, j, 2)
+                                nodalValues(ii, 9) = BCData(mm)%Tv(i, j, 3)
+                            end do
+                        end do
+                    end if
+                end do
+            end do
+
+            ! Not quite dont yet with the nodal tractions; we need to send
+            ! the nodal tractions that the duplices the *zipper* mesh needs
+            ! to the root proc. We have a special scatter for this.
+
+            if (zipper%allocated) then
+
+                ! Loop over the 6 tractions
+                do iDim = 1, 6
+
+                    ! Copy the values into localPtr
+                    call VecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+
+                    do i = 1, exch%nNodes
+                        localPtr(i) = nodalValues(i, iDim + 3)
+                    end do
+
+                    call VecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+
+                    ! Now use the *zipper* scatter
+                    call VecScatterBegin(zipper%scatter, exch%nodeValLocal, &
+                                         zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+
+                    call VecScatterEnd(zipper%scatter, exch%nodeValLocal, &
+                                       zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+
+                    ! Copy the zipper values out on the root proc
+                    if (myid == 0) then
+                        call VecGetArrayF90(zipper%localVal, localPtr, ierr)
+                        call EChk(ierr, __FILE__, __LINE__)
+
+                        do i = 1, size(localPtr)
+                            nodalValues(exch%nNodes + i, iDim + 3) = localPtr(i)
+                        end do
+
+                        call VecRestoreArrayF90(zipper%localVal, localPtr, ierr)
+                        call EChk(ierr, __FILE__, __LINE__)
+                    end if
+                end do
+            end if
+        end if
+
+        ! Get the current set of surface points for the family we just set.
+        call getSurfaceSize(sizeNode, sizeCell, exch%famList, size(exch%famlist), .True.)
+        allocate (tmp(3, sizeNode))
+        call getSurfacePoints(tmp, sizeNode, exch%sps, exch%famList, size(exch%famList), .True.)
+
+        do i = 1, sizeNode
+            nodalValues(i, 1:3) = tmp(1:3, i)
+        end do
+        deallocate (tmp)
+        ! For the remainder of the variables, use arithematic averaging.
+        call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        localPtr = zero
+        ! ii is the index into the pointer array
+        ii = 0
+        do nn = 1, nDom
+            call setPointers(nn, 1_intType, exch%sps)
+            do mm = 1, nBocos
+                iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
                 ni = iEnd - iBeg + 1
                 nj = jEnd - jBeg + 1
-                jj = 0
-                do j=0,nj-2
-                   do i=0,ni-2
-                      jj = jj + 1
-                      ! Scatter value to each node
-                      ind(1) = ii + (j  )*ni + i + 1
-                      ind(2) = ii + (j  )*ni + i + 2
-                      ind(3) = ii + (j+1)*ni + i + 2
-                      ind(4) = ii + (j+1)*ni + i + 1
-                      do kk=1,4
-                         localPtr(ind(kk)) = localPtr(ind(kk)) + buffer(jj)
-                      end do
-                   end do
-                end do
+                if (famInList(BCdata(mm)%famID, exch%FamList)) then
+                    do j = 0, nj - 2
+                        do i = 0, ni - 2
+
+                            ! Scatter 1 to each node.
+                            ind(1) = ii + (j) * ni + i + 1
+                            ind(2) = ii + (j) * ni + i + 2
+                            ind(3) = ii + (j + 1) * ni + i + 2
+                            ind(4) = ii + (j + 1) * ni + i + 1
+                            do jj = 1, 4
+                                localPtr(ind(jj)) = localPtr(ind(jj)) + one
+                            end do
+                        end do
+                    end do
+                    ii = ii + ni * nj
+                end if
+            end do
+        end do
 
-                ii = ii + ni*nj
-                deallocate(buffer)
-             end if
-          end do bocoLoop
-       end do domainLoop
-
-       ! Return our pointer
-       call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Globalize the function value
-       call vecSet(exch%nodeValGlobal, zero, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
-            exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
-            exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Now divide by the weighting sum. We can do this with a
-       ! vecpointwisemult since we already divided by the weight.
-       call vecPointwiseMult(exch%nodeValGlobal, exch%nodeValGlobal, &
-            exch%sumGlobal, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-
-       ! Push back to the local values
-       call VecScatterBegin(exch%scatter, exch%nodeValGlobal, &
-            exch%nodeValLocal, INSERT_VALUES, SCATTER_REVERSE, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecScatterEnd(exch%scatter, exch%nodeValGlobal, &
-            exch%nodeValLocal, INSERT_VALUES, SCATTER_REVERSE, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Copy the values into nodalValues
-       call VecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       do i=1,size(localPtr)
-          nodalValues(i, iSol+9) = localPtr(i)
-       end do
-
-       call VecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       if (zipper%allocated) then
-
-          ! Now use the *zipper* scatter
-          call VecScatterBegin(zipper%scatter, exch%nodeValLocal,&
-               zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          call VecScatterEnd(zipper%scatter, exch%nodeValLocal,&
-               zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Copy the zipper values out on the root proc
-          if (myid == 0) then
-             call VecGetArrayF90(zipper%localVal, localPtr, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-
-             do i=1,size(localPtr)
-                nodalValues(exch%nNodes+i, 9+iSol) = localPtr(i)
-             end do
-
-             call VecRestoreArrayF90(zipper%localVal, localPtr, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-          end if
-       end if
-    end do varLoop
-    deallocate(solNames)
-
-  end subroutine computeSurfaceOutputNodalData
-
-  subroutine createSlice(pts, conn, elemFam, slc, pt, normal, dir_vec, use_dir, sliceName, famList)
-    !
-    !       This subroutine creates a slice on a plane defined by pt and
-    !       and dir. It only uses the families specified in the famList.
-    !       sps define which specral instance to use.
-    !
-    use constants
-    use utils, only : reallocatereal2, reallocateinteger2, pointReduce
-    use sorting, only : famInList
-    implicit none
+        call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Globalize the area
+        call vecSet(exch%sumGlobal, zero, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
+                             exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
+                           exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Now compute the inverse of the weighting so that we can multiply
+        ! instead of dividing.
+
+        call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        localPtr = one / localPtr
+
+        call vecRestoreArrayF90(exch%sumGlobal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        varLoop: do iSol = 1, nSolVar
+
+            ! Extract the poitner to the local array
+            call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! ii is the continous running element pointer
+            ii = 0
+            localPtr = zero
+            ! Do each variable separately.
+            domainLoop: do nn = 1, nDom
+                call setPointers(nn, 1, exch%sps)
+
+                bocoLoop: do mm = 1, nBocos
+                    if (famInList(BCdata(mm)%famID, exch%famList)) then
+
+                        ! storeSurfSolInBuffer needs to know if the subface is
+                        ! viscous or not.
+                        viscousSubFace = .False.
+                        if (BCType(mm) == NSWallAdiabatic .or. &
+                            BCType(mm) == NSWallIsoThermal) then
+                            viscousSubFace = .true.
+                        end if
+
+                        ! Determine the cell range *in the original cgns
+                        ! ordering*. The reason for this is the storeSurfSolInBufer
+                        ! is normally used for writing CGNS and thus it is that
+                        ! ording that is important. However, that isn't too hard to
+                        ! deal with.
+
+                        jBeg = BCData(mm)%jnBeg + 1
+                        jEnd = BCData(mm)%jnEnd
+                        iBeg = BCData(mm)%inBeg + 1
+                        iEnd = BCData(mm)%inEnd
+
+                        ! Dummy value of 1 for the face values not set.
+                        cellRangeCGNS = 1
+                        select case (BCFaceID(mm))
+                        case (iMin, iMax)
+                            ! I range meaningless
+                            cellRangeCGNS(2, 1) = iBeg + jBegOr - 1
+                            cellRangeCGNS(2, 2) = iEnd + jBegOr - 1
+
+                            cellRangeCGNS(3, 1) = jBeg + kBegOr - 1
+                            cellRangeCGNS(3, 2) = jEnd + kBegOr - 1
+
+                        case (jMin, jMax)
+                            ! J range meaningless
+                            cellRangeCGNS(1, 1) = iBeg + iBegOr - 1
+                            cellRangeCGNS(1, 2) = iEnd + iBegOr - 1
+
+                            cellRangeCGNS(3, 1) = jBeg + kBegOr - 1
+                            cellRangeCGNS(3, 2) = jEnd + kBegOr - 1
+
+                        case (kMin, kMax)
+                            ! J range meaningless
+                            cellRangeCGNS(1, 1) = iBeg + iBegOr - 1
+                            cellRangeCGNS(1, 2) = iEnd + iBegOr - 1
+
+                            cellRangeCGNS(2, 1) = jBeg + jBegOr - 1
+                            cellRangeCGNS(2, 2) = jEnd + jBegOr - 1
+                        end select
+
+                        ! Allocate enough space for the 1D buffer
+                        allocate (buffer((iEnd - iBeg + 1) * (jEnd - jBeg + 1)))
+                        kk = 0
+                        call storeSurfsolInBuffer(exch%sps, buffer, kk, nn, BCfaceID(mm), &
+                                                  cellRangeCGNS, solNames(iSol), viscousSubFace, .False.)
+
+                        ! Now since the storeSurfSol just put things in a flat
+                        ! array and are face based, here we take the 1D face data
+                        ! and scatter to the nodes.
+
+                        iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                        jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                        ni = iEnd - iBeg + 1
+                        nj = jEnd - jBeg + 1
+                        jj = 0
+                        do j = 0, nj - 2
+                            do i = 0, ni - 2
+                                jj = jj + 1
+                                ! Scatter value to each node
+                                ind(1) = ii + (j) * ni + i + 1
+                                ind(2) = ii + (j) * ni + i + 2
+                                ind(3) = ii + (j + 1) * ni + i + 2
+                                ind(4) = ii + (j + 1) * ni + i + 1
+                                do kk = 1, 4
+                                    localPtr(ind(kk)) = localPtr(ind(kk)) + buffer(jj)
+                                end do
+                            end do
+                        end do
+
+                        ii = ii + ni * nj
+                        deallocate (buffer)
+                    end if
+                end do bocoLoop
+            end do domainLoop
+
+            ! Return our pointer
+            call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Globalize the function value
+            call vecSet(exch%nodeValGlobal, zero, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
+                                 exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
+                               exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Now divide by the weighting sum. We can do this with a
+            ! vecpointwisemult since we already divided by the weight.
+            call vecPointwiseMult(exch%nodeValGlobal, exch%nodeValGlobal, &
+                                  exch%sumGlobal, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Push back to the local values
+            call VecScatterBegin(exch%scatter, exch%nodeValGlobal, &
+                                 exch%nodeValLocal, INSERT_VALUES, SCATTER_REVERSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecScatterEnd(exch%scatter, exch%nodeValGlobal, &
+                               exch%nodeValLocal, INSERT_VALUES, SCATTER_REVERSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Copy the values into nodalValues
+            call VecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            do i = 1, size(localPtr)
+                nodalValues(i, iSol + 9) = localPtr(i)
+            end do
+
+            call VecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            if (zipper%allocated) then
+
+                ! Now use the *zipper* scatter
+                call VecScatterBegin(zipper%scatter, exch%nodeValLocal, &
+                                     zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call VecScatterEnd(zipper%scatter, exch%nodeValLocal, &
+                                   zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Copy the zipper values out on the root proc
+                if (myid == 0) then
+                    call VecGetArrayF90(zipper%localVal, localPtr, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+
+                    do i = 1, size(localPtr)
+                        nodalValues(exch%nNodes + i, 9 + iSol) = localPtr(i)
+                    end do
+
+                    call VecRestoreArrayF90(zipper%localVal, localPtr, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end if
+            end if
+        end do varLoop
+        deallocate (solNames)
+
+    end subroutine computeSurfaceOutputNodalData
+
+    subroutine createSlice(pts, conn, elemFam, slc, pt, normal, dir_vec, use_dir, sliceName, famList)
+        !
+        !       This subroutine creates a slice on a plane defined by pt and
+        !       and dir. It only uses the families specified in the famList.
+        !       sps define which specral instance to use.
+        !
+        use constants
+        use utils, only: reallocatereal2, reallocateinteger2, pointReduce
+        use sorting, only: famInList
+        implicit none
+
+        ! Input param
+        real(kind=realType), dimension(:, :), intent(in) :: pts
+        integer(kind=intType), dimension(:, :), intent(in) :: conn
+        integer(kind=intType), dimension(:), intent(in) :: elemFam
+        type(slice), intent(inout) :: slc
+        real(kind=realType), dimension(3), intent(in) :: pt, dir_vec, normal
+        logical, intent(in) :: use_dir
+        character(len=*), intent(in) :: sliceName
+        integer(kind=intType), dimension(:), intent(in) :: famList
+
+        ! Working param
+        integer(kind=intType) :: i, nMax, nUnique, oldInd, newInd
+        integer(kind=intType) :: patchIndices(4), indexSquare, jj, kk, icon, iCoor, num1, num2
+        real(kind=realType) :: f(4), d, ovrdnom, tol, len_vec
+        logical :: logic1, foundFam
+        real(kind=realType), dimension(:, :), pointer :: tmpWeight, dummy, tmpNodes
+        integer(kind=intType), dimension(:, :), pointer :: tmpInd
+        integer(kind=intType), dimension(:), allocatable :: link
+        real(kind=realType), dimension(:), allocatable :: fc
+        real(kind=realType), dimension(3) :: elemc, vec
+
+        ! Allocate the family list this slice is to use:
+        slc%sliceName = sliceName
+        ! Set the info for the slice:
+        slc%pt = pt
+        slc%normal = normal
+        slc%dir_vec = dir_vec
+        slc%use_dir = use_dir
+        slc%nNodes = 0
+        allocate (slc%famList(size(famList)))
+        slc%famList = famList
+        ! First step is to compute the 'function' value that will be used
+        ! for the contour.
+
+        ! Equation of plane: ax + by + cz + d = 0
+        d = -pt(1) * normal(1) - pt(2) * normal(2) - pt(3) * normal(3)
+        ovrdnom = one / sqrt(normal(1)**2 + normal(2)**2 + normal(3)**2)
+
+        ! Compute the distance function on all possible surfaces on this
+        ! processor.
+        allocate (fc(size(pts, 2)))
+        do i = 1, size(pts, 2)
+            ! Now compute the signed distance
+            fc(i) = (normal(1) * pts(1, i) + normal(2) * pts(2, i) + normal(3) * pts(3, i) + d) * ovrdnom
+        end do
+
+        ! Estimate size of slice by the sqrt of the number of nodes in the
+        ! mesh. Exact size doesn't matter as we realloc if necessary.
+        nMax = int(sqrt(dble(size(pts, 2))))
+        allocate (tmpWeight(2, nMax), tmpInd(2, nMax), tmpNOdes(3, nMax))
+
+        iCoor = 0
+        oldInd = 1
+
+        ! Loop over all elements
+        elemLoop: do i = 1, size(conn, 2)
+
+            famInclude: if (famInList(elemFam(i), famList)) then
 
-    ! Input param
-    real(kind=realType), dimension(:, :), intent(in) :: pts
-    integer(kind=intType), dimension(:,:), intent(in) :: conn
-    integer(kind=intType), dimension(:), intent(in) :: elemFam
-    type(slice), intent(inout) :: slc
-    real(kind=realType), dimension(3), intent(in) :: pt, dir_vec, normal
-    logical, intent(in) :: use_dir
-    character(len=*), intent(in) :: sliceName
-    integer(kind=intType), dimension(:), intent(in) :: famList
-
-    ! Working param
-    integer(kind=intType) :: i, nMax, nUnique, oldInd, newInd
-    integer(kind=intType) :: patchIndices(4), indexSquare, jj, kk, icon, iCoor, num1, num2
-    real(kind=realType) :: f(4), d, ovrdnom, tol, len_vec
-    logical :: logic1, foundFam
-    real(kind=realType), dimension(:, :), pointer :: tmpWeight, dummy, tmpNodes
-    integer(kind=intType), dimension(:, :), pointer :: tmpInd
-    integer(kind=intType), dimension(:), allocatable :: link
-    real(kind=realType), dimension(:), allocatable :: fc
-    real(kind=realType), dimension(3) :: elemc, vec
-
-    ! Allocate the family list this slice is to use:
-    slc%sliceName = sliceName
-    ! Set the info for the slice:
-    slc%pt = pt
-    slc%normal = normal
-    slc%dir_vec = dir_vec
-    slc%use_dir = use_dir
-    slc%nNodes = 0
-    allocate(slc%famList(size(famList)))
-    slc%famList = famList
-    ! First step is to compute the 'function' value that will be used
-    ! for the contour.
-
-    ! Equation of plane: ax + by + cz + d = 0
-    d = -pt(1)*normal(1) - pt(2)*normal(2) - pt(3)*normal(3)
-    ovrdnom = one/sqrt(normal(1)**2 + normal(2)**2 + normal(3)**2)
-
-    ! Compute the distance function on all possible surfaces on this
-    ! processor.
-    allocate(fc(size(pts,2)))
-    do i=1, size(pts, 2)
-       ! Now compute the signed distance
-       fc(i) = (normal(1)*pts(1, i) + normal(2)*pts(2, i) + normal(3)*pts(3, i) + d)*ovrdnom
-    end do
-
-    ! Estimate size of slice by the sqrt of the number of nodes in the
-    ! mesh. Exact size doesn't matter as we realloc if necessary.
-    nMax = int(sqrt(dble(size(pts, 2))))
-    allocate(tmpWeight(2, nMax), tmpInd(2, nMax), tmpNOdes(3, nMax))
-
-    iCoor = 0
-    oldInd = 1
-
-    ! Loop over all elements
-    elemLoop: do i=1, size(conn, 2)
-
-       famInclude: if (famInList(elemFam(i), famList)) then
-
-          ! zero out the centroid
-          elemc = zero
-
-          ! Extract the indices and function values at each corner
-          do jj=1,4
-             patchIndices(jj) = conn(jj, i)
-             f(jj) = fc(patchIndices(jj))
-             elemc = elemc + 0.25_realType * pts(:, patchIndices(jj))
-          end do
-
-          ! check if we are using the direction to pick sliced elements
-          if (use_dir) then
-             ! check if the centroid of this element is in the correct direction
-             vec = elemc - pt
-             ! normalize vector
-             len_vec = sqrt(vec(1) * vec(1) + vec(2) * vec(2) + vec(3) * vec(3))
-             vec = vec / len_vec
-             if ((vec(1)*dir_vec(1) + vec(2)*dir_vec(2) + vec(3)*dir_vec(3)) .lt. zero) then
-               ! we reject this element; just set all signed distances to 1.0
-               do jj=1,4
-                 f(jj) = one
-               end do
-             end if
-          end if
-
-          ! Based on the values at each corner, determine which
-          ! type contour we have
-          indexSquare = 1
-
-          if (f(1) .lt. zero) indexsquare = indexsquare + 1
-          if (f(2) .lt. zero) indexsquare = indexsquare + 2
-          if (f(3) .lt. zero) indexsquare = indexsquare + 4
-          if (f(4) .lt. zero) indexsquare = indexsquare + 8
-
-          logic1 = .true.
-
-          kk = 1
-          do while (logic1)
-             ! This is the edge
-             icon = mscon1(indexSquare, kk)
-
-             if (icon == 0) then
-                logic1=.false.
-             else
-
-                ! num1, num2 are node indices
-                num1 = mscon2(icon,1)
-                num2 = mscon2(icon,2)
-
-                iCoor = iCoor + 1
-                if (iCoor > nMax) then
-                   ! Need to reallocate the arrays. Make it double the size
-                   call reallocateReal2(tmpWeight, 2, 2*nMax, 2, nMax, .true.)
-                   call reallocateReal2(tmpNodes, 3, 2*nMax, 3, nMax, .true.)
-                   call reallocateInteger2(tmpInd, 2, 2*nMax, 2, nMax, .true.)
-                   nMax = nMax * 2
+                ! zero out the centroid
+                elemc = zero
+
+                ! Extract the indices and function values at each corner
+                do jj = 1, 4
+                    patchIndices(jj) = conn(jj, i)
+                    f(jj) = fc(patchIndices(jj))
+                    elemc = elemc + 0.25_realType * pts(:, patchIndices(jj))
+                end do
+
+                ! check if we are using the direction to pick sliced elements
+                if (use_dir) then
+                    ! check if the centroid of this element is in the correct direction
+                    vec = elemc - pt
+                    ! normalize vector
+                    len_vec = sqrt(vec(1) * vec(1) + vec(2) * vec(2) + vec(3) * vec(3))
+                    vec = vec / len_vec
+                    if ((vec(1) * dir_vec(1) + vec(2) * dir_vec(2) + vec(3) * dir_vec(3)) .lt. zero) then
+                        ! we reject this element; just set all signed distances to 1.0
+                        do jj = 1, 4
+                            f(jj) = one
+                        end do
+                    end if
                 end if
 
-                ! Weight factors
-                tmpWeight(2, iCoor) = (zero - f(num1))/(f(num2) - f(num1))
-                tmpWeight(1, iCoor) = one - tmpWeight(2, icoor)
-
-                ! Store the weight factors
-                tmpInd(:, iCoor) = (/patchIndices(num1), patchIndices(num2)/)
-
-                ! Store the physical nodes so we know how to reduce
-                tmpNodes(:, iCoor) = &
-                     tmpWeight(1, iCoor)*pts(:, tmpInd(1, iCoor)) + &
-                     tmpWeight(2, iCoor)*pts(:, tmpInd(2, iCoor))
-                kk = kk + 1
-             end if
-          end do
-       end if famInclude
-    end do  ElemLoop
-
-    ! To save disk space, we can compact out the doubly defined nodes that
-    ! were created during the slicing process. Then we can allocate the
-    ! final weight array and index array to be the exact correct
-    ! length
-
-    allocate(dummy(3, iCoor), link(iCoor))
-    tol=1e-12
-    call pointReduce(tmpNodes, iCoor, tol, dummy, link, nUnique)
-    allocate(slc%w(2, nUnique), slc%ind(2, nUnique), slc%conn(2, iCoor/2))
-    slc%nNodes = nUnique
-
-    ! Modify the data accordingly
-    do i=1, iCoor
-       slc%w(:, link(i)) = tmpWeight(:, i)
-       slc%ind(:, link(i)) = tmpInd(:, i)
-    end do
-
-    ! The connectivity is actually link reshaped since the original conn
-    ! would have been (1,2), (3,4), (5,6) etc.
-    do i=1, iCoor/2
-       slc%conn(1, i) = link(2*i-1)
-       slc%conn(2, i) = link(2*i)
-    end do
-
-    deallocate(tmpNodes, tmpWeight, tmpInd, dummy, link, fc)
-  end subroutine createSlice
-
-  subroutine destroySlice(slc)
-    !
-    !       This subroutine destroys a slice created by the createSlice
-    !       routine
-    !
-    use constants
-    implicit none
+                ! Based on the values at each corner, determine which
+                ! type contour we have
+                indexSquare = 1
+
+                if (f(1) .lt. zero) indexsquare = indexsquare + 1
+                if (f(2) .lt. zero) indexsquare = indexsquare + 2
+                if (f(3) .lt. zero) indexsquare = indexsquare + 4
+                if (f(4) .lt. zero) indexsquare = indexsquare + 8
+
+                logic1 = .true.
+
+                kk = 1
+                do while (logic1)
+                    ! This is the edge
+                    icon = mscon1(indexSquare, kk)
+
+                    if (icon == 0) then
+                        logic1 = .false.
+                    else
+
+                        ! num1, num2 are node indices
+                        num1 = mscon2(icon, 1)
+                        num2 = mscon2(icon, 2)
+
+                        iCoor = iCoor + 1
+                        if (iCoor > nMax) then
+                            ! Need to reallocate the arrays. Make it double the size
+                            call reallocateReal2(tmpWeight, 2, 2 * nMax, 2, nMax, .true.)
+                            call reallocateReal2(tmpNodes, 3, 2 * nMax, 3, nMax, .true.)
+                            call reallocateInteger2(tmpInd, 2, 2 * nMax, 2, nMax, .true.)
+                            nMax = nMax * 2
+                        end if
+
+                        ! Weight factors
+                        tmpWeight(2, iCoor) = (zero - f(num1)) / (f(num2) - f(num1))
+                        tmpWeight(1, iCoor) = one - tmpWeight(2, icoor)
+
+                        ! Store the weight factors
+                        tmpInd(:, iCoor) = (/patchIndices(num1), patchIndices(num2)/)
+
+                        ! Store the physical nodes so we know how to reduce
+                        tmpNodes(:, iCoor) = &
+                            tmpWeight(1, iCoor) * pts(:, tmpInd(1, iCoor)) + &
+                            tmpWeight(2, iCoor) * pts(:, tmpInd(2, iCoor))
+                        kk = kk + 1
+                    end if
+                end do
+            end if famInclude
+        end do ElemLoop
+
+        ! To save disk space, we can compact out the doubly defined nodes that
+        ! were created during the slicing process. Then we can allocate the
+        ! final weight array and index array to be the exact correct
+        ! length
+
+        allocate (dummy(3, iCoor), link(iCoor))
+        tol = 1e-12
+        call pointReduce(tmpNodes, iCoor, tol, dummy, link, nUnique)
+        allocate (slc%w(2, nUnique), slc%ind(2, nUnique), slc%conn(2, iCoor / 2))
+        slc%nNodes = nUnique
+
+        ! Modify the data accordingly
+        do i = 1, iCoor
+            slc%w(:, link(i)) = tmpWeight(:, i)
+            slc%ind(:, link(i)) = tmpInd(:, i)
+        end do
 
-    ! Input param
-    type(slice), intent(inout) :: slc
-
-    ! Deallocate weights and indices if they are already allocated
-    if (allocated(slc%w)) then
-       deallocate(slc%w)
-    end if
-
-    if (allocated(slc%ind)) then
-       deallocate(slc%ind)
-    end if
-
-    if (allocated(slc%conn)) then
-       deallocate(slc%conn)
-    end if
-
-    if (allocated(slc%famList)) then
-       deallocate(slc%famList)
-    end if
-
-    if (allocated(slc%vars)) then
-       deallocate(slc%vars)
-    end if
-
-  end subroutine destroySlice
-
-  subroutine integrateSlice(lSlc, gSlc, nodalValues, nFields, doConnectivity)
-    !
-    !       This subroutine integrates the forces on slice slc and computes
-    !       the integrated quantities for lift, drag, cl and cd with
-    !       contributions from both the pressure and visoucs forces.
-    !       It optionally interpolates solution variables as well.
-    !
-    use constants
-    use inputPhysics
-    use flowVarRefState
-    use communication
-    use utils, only : EChk
-    implicit none
+        ! The connectivity is actually link reshaped since the original conn
+        ! would have been (1,2), (3,4), (5,6) etc.
+        do i = 1, iCoor / 2
+            slc%conn(1, i) = link(2 * i - 1)
+            slc%conn(2, i) = link(2 * i)
+        end do
 
-    ! Input Variables
-    type(slice) :: lSlc, gSlc
-    integer(kind=intType), intent(in) :: nFields
-    logical, intent(in) :: doConnectivity
-    real(kind=realType), dimension(:, :), intent(in) :: nodalValues
-    ! Working variables
-    integer(kind=intType) :: i, j, i1, i2
-    real(kind=realType), dimension(3) :: x1, x2, pT1, pT2, vT1, vT2, pF, vF, pF_elem, vF_elem
-    real(kind=realType) :: len, dmax, dmin, dist, fact, M(3,3), tmp(6)
-    real(kind=realType) :: r(3), r_new(3), hyp, te(3), le(3), theta, w1, w2
-    integer(kind=intType) :: bestPair(2), normal_ind, iProc, ierr, iSize
-    real(kind=realtype), dimension(:,:), allocatable :: tempCoords
-    real(kind=realtype), dimension(:,:), allocatable :: localVals
-    integer(kind=intType), dimension(:), allocatable :: sliceNodeSizes, sliceCellSizes
-    integer(kind=intType), dimension(:), allocatable :: nodeDisps, cellDisps
-    real(kind=realType), dimension(3) :: refPoint, pM, vM
-    real(kind=realType) :: xc, yc, zc
-    ! Need another tmp to reduce the fx, fy and fz values
-    real(kind=realType) :: tmp_(3)
-
-    ! Copy the info related to slice
-    gSlc%sliceName = trim(lSlc%sliceName)
-    gSlc%pt = lSlc%pt
-    gSlc%normal = lSlc%normal
-
-    ! Back out what is the main index of the slice, x, y or z based on
-    ! the direction. Not the best approach, but that's ok
-    ! TODO this can be improved since we are now doing arbitrary slice directions
-    normal_ind = maxloc(abs(gSlc%normal),1)
-
-    pF = zero
-    vF = zero
-    pM = zero  ! pressure moment
-    vM = zero  ! viscous moment
-    iSize = 3 + 6 + nFields
-    allocate(localVals(iSize,lSlc%nNodes))
-
-   ! Interpolate the required values
-    do i=1, lSlc%nNodes
-       i1 = lSlc%ind(1, i)
-       i2 = lSlc%ind(2, i)
-       w1 = lSlc%w(1, i)
-       w2 = lSlc%w(2, i)
-       localVals(1:iSize, i) = w1*nodalValues(i1, 1:iSize) + w2*nodalValues(i2, 1:iSize)
-    end do
-
-    ! first communicate the coordinates and connectivities to get the chord and the LE/TE
-    ! we need this information to get the quarter-chord location,
-    ! which is needed for the moment computations
-
-    ! Gather up the number of nodes to be set to the root proc:
-    allocate(sliceNodeSizes(nProc), nodeDisps(0:nProc))
-    sliceNodeSizes = 0
-    nodeDisps = 0
-    call mpi_allgather(lSlc%nNodes,1, adflow_integer, sliceNodeSizes, 1, adflow_integer, &
-         adflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-    nodeDisps(0) = 0
-    do iProc=1, nProc
-       nodeDisps(iProc) = nodeDisps(iProc-1) + sliceNodeSizes(iProc)*iSize
-    end do
-
-    if (myid == 0) then
-       gSlc%nNodes = sum(sliceNodeSizes)
-       allocate(gSlc%vars(iSize, gSlc%nNodes))
-    end if
-
-    call mpi_gatherv(localVals, iSize*lSlc%nNodes, adflow_real, gSlc%vars, sliceNodeSizes*iSize, &
-         nodeDisps, adflow_real, 0, adflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! We may also need to gather the connectivity if the slice will have
-    ! to be written to a file.
-    if (doConnectivity) then
-       i = size(lslc%conn, 2)
-       allocate(cellDisps(0:nProc), sliceCellSizes(nProc))
-
-       call mpi_gather(i, 1, adflow_integer, sliceCellSizes, 1, adflow_integer, &
-            0, adflow_comm_world, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       if (myid == 0) then
-          cellDisps(0) = 0
-          do iProc=1, nProc
-             cellDisps(iProc) = cellDisps(iProc-1) + sliceCellSizes(iProc)*2
-          end do
-          allocate(gSlc%conn(2, sum(sliceCellSizes)))
-       end if
-
-       ! We offset the conn array by nodeDisps(iProc) which
-       ! automagically adjust the connectivity to account for the
-       ! number of nodes from different processors
-
-       call mpi_gatherv(lSlc%conn+nodeDisps(myid)/iSize, 2*size(lSlc%conn, 2), adflow_integer, gSlc%conn, &
-            sliceCellSizes*2, cellDisps, adflow_integer, 0, adflow_comm_world, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Not quite finished yet since we will have gathered nodes from
-       ! multiple procs we have to adjust the connectivity
-
-       deallocate(sliceCellSizes, cellDisps)
-    end if
-
-    deallocate(sliceNodeSizes, nodeDisps)
-    ! done communicating the geometry info
-
-    ! process the geometry on the root processor to figure out the LE, TE, and the chord
-    if (myid == 0) then
-       ! TODO the comments below can be used for a _slightly_ more accurate LE/TE detection.
-       ! this does not really show any difference other than the twist computation,
-       ! but depending on the application, getting the twist distribution from ADflow data might be critical.
-       ! so we leave the comments below for a better implementation.
-       ! For now, we just work with the 2 max-distance nodes
-
-       ! begin template:
-         ! loop over the elements and find the TE bends
-         ! if we have 2 TE bends, then take the mid-TE point as the TE
-         ! find the largest distance node
-         ! if we dont have 2 bends, just get the max distance between any 2 nodes, thats our chord line
-       ! end template.
-
-       ! Compute the chord as the max length between any two nodes...this
-       ! is n^2, so should be changed in the future
-       dmax = zero
-       bestPair = (/1, 1/)
-       do i=1,size(gSlc%vars, 2)
-          ! extract node:
-          x1 = gSlc%vars(1:3, i)
-
-          do j=i+1,size(gSlc%vars, 2)
-             ! extract node:
-             x2 = gSlc%vars(1:3, j)
-
-             dist = sqrt((x1(1)-x2(1))**2 +(x1(2)-x2(2))**2 + (x1(3)-x2(3))**2)
-
-             if (dist > dmax) then
-                dmax = dist
-                bestPair = (/i, j/)
-             end if
-          end  do
-       end do
-
-       ! Set chord, protected from zero
-       gSlc%chord = max(dmax, 1e-12)
-
-       ! figure out which node is the LE and the TE
-       if (gSlc%vars(1, bestPair(1)) < gSlc%vars(1, bestPair(2))) then
-         ! first entry in the "bestPair" is the LE node
-         x1 = gSlc%vars(1:3, bestPair(1))
-         x2 = gSlc%vars(1:3, bestPair(2))
-       else
-         ! second entry in the "bestPair" is the LE node
-         x1 = gSlc%vars(1:3, bestPair(2))
-         x2 = gSlc%vars(1:3, bestPair(1))
-       end if
-
-       ! using the LE and TE coordinates, compute the quarter chord point
-       ! we will use this as reference for the moment computation
-       refPoint(1) = 0.75_realType * x1(1) + 0.25_realType * x2(1)
-       refPoint(2) = 0.75_realType * x1(2) + 0.25_realType * x2(2)
-       refPoint(3) = 0.75_realType * x1(3) + 0.25_realType * x2(3)
-    end if
-
-    ! communicate the reference point coordinates across processors.
-    call mpi_bcast(refPoint, 3, adflow_real, 0, adflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! now we are done with the global geometric operations. go back to integrating the slices locally on each proc
-
-    ! loop over elements
-    do i=1, size(lSlc%conn, 2)
-       ! Compute all the local variables we need.
-
-       ! extract nodes:
-       i1 = lslc%conn(1, i)
-       i2 = lslc%conn(2, i)
-
-       x1 = localVals(1:3, i1)
-       x2 = localVals(1:3, i2)
-
-       ! extract pressure tractions
-       pT1 = localVals(4:6, i1)
-       pT2 = localVals(4:6, i2)
-
-       ! extract viscous tractions
-       vT1 = localVals(7:9, i1)
-       vT2 = localVals(7:9, i2)
-
-       ! Length of this segment
-       len = sqrt((x1(1)-x2(1))**2 + (x1(2)-x2(2))**2 + (x1(3)-x2(3))**2)
-
-       ! compute the pressure and viscous forces on this element
-       pF_elem = half*(pT1 + pT2)*len
-       vF_elem = half*(vT1 + vT2)*len
-
-       ! Integrate the pressure and viscous forces separately
-       pF = pF + pF_elem
-       vF = vF + vF_elem
-
-       ! compute moment about the global reference locations
-       xc = half * (x1(1) + x2(1)) - refPoint(1)
-       yc = half * (x1(2) + x2(2)) - refPoint(2)
-       zc = half * (x1(3) + x2(3)) - refPoint(3)
-
-       ! pressure components
-       pM(1) = pM(1) + yc*pF_elem(3) - zc*pF_elem(2)
-       pM(2) = pM(2) + zc*pF_elem(1) - xc*pF_elem(3)
-       pM(3) = pM(3) + xc*pF_elem(2) - yc*pF_elem(1)
-
-       ! viscous components
-       vM(1) = vM(1) + yc*vF_elem(3) - zc*vF_elem(2)
-       vM(2) = vM(2) + zc*vF_elem(1) - xc*vF_elem(3)
-       vM(3) = vM(3) + xc*vF_elem(2) - yc*vF_elem(1)
-
-    end do
-
-    ! That is as far as we can go in parallel. We now have to gather up
-    ! pL, pD, pM, vL, vD, vM as well as the nodes to the root proc.
-
-    ! Set the local values we can in the slice
-    lSlc%pL = liftDirection(1)*pF(1) + liftDirection(2)*pF(2) + liftDirection(3)*pF(3)
-    lSlc%pD = dragDirection(1)*pF(1) + dragDirection(2)*pF(2) + dragDirection(3)*pF(3)
-    lSlc%vL = liftDirection(1)*vF(1) + liftDirection(2)*vF(2) + liftDirection(3)*vF(3)
-    lSlc%vD = dragDirection(1)*vF(1) + dragDirection(2)*vF(2) + dragDirection(3)*vF(3)
-
-    ! the moments are a bit different than lift and drag. we keep the 3 components of the moment
-    ! in pM in this routine but then the slc%pM variable only has the component of the moment
-    ! we are interested in. we use the direction index to get this value out and set it in the slice
-    lSlc%pM = pM(normal_ind)
-    lSlc%vM = vM(normal_ind)
-
-    ! save the x,y,z-forces into the appropriate real-container
-    ! from their type(slice) definition (see the top of this file)
-    lSlc%fx = pF(1) + vF(1)
-    lSlc%fy = pF(2) + vF(2)
-    lSlc%fz = pF(3) + vF(3)
-
-    ! Reduce the lift/drag values
-    call mpi_reduce((/lSlc%pL, lSlc%pD, lSlc%pM, lSlc%vL, lSlc%vD, lSlc%vM/), tmp, 6, adflow_real, MPI_SUM, &
-         0, adflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Reduce the fx, fy and fz using THE OTHER tmp, i.e. tmp_
-    call mpi_reduce((/lSlc%fx, lSlc%fy, lSlc%fz/), tmp_, 3, adflow_real, MPI_SUM, &
-         0, adflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    if (myid == 0) then
-       gSlc%pL = tmp(1)
-       gSlc%pD = tmp(2)
-       gSlc%pM = tmp(3)
-       gSlc%vL = tmp(4)
-       gSlc%vD = tmp(5)
-       gSlc%vM = tmp(6)
-
-       ! we now also must remember to store the MPIreduced fx, fy and fz in the
-       ! global slice type:
-       gSlc%fx = tmp_(1)
-       gSlc%fy = tmp_(2)
-       gSlc%fz = tmp_(3)
-
-       ! Compute factor to get coefficient
-       fact = two/(gammaInf*pInf*MachCoef*MachCoef*pRef)
-
-       ! Take dmax as chord and compute coefficients
-       gSlc%CLp = gSlc%pL / gSlc%chord * fact
-       gSlc%CDp = gSlc%pD / gSlc%chord * fact
-       gSlc%CLv = gSlc%vL / gSlc%chord * fact
-       gSlc%CDv = gSlc%vD / gSlc%chord * fact
-
-       ! Moment factor has an extra lengthRef
-       fact = fact/(lengthRef*LRef)
-
-       ! moments
-       gSlc%CMp = gSlc%pM / gSlc%chord * fact
-       gSlc%CMv = gSlc%vM / gSlc%chord * fact
-
-       ! Default values
-       gSlc%twist = zero
-       gSlc%thickness = zero
-
-       if (gSlc%nNodes == 0) then
-          return
-       end if
-
-       ! Lastly we need the twist and the twist and the thickness
-       i1 = bestPair(1)
-       i2 = bestPair(2)
-       x1 = gSlc%vars(1:3, i1)
-       x2 = gSlc%vars(1:3, i2)
-
-       if (x1(1) > x2(1)) then
-          te = x1
-          le = x2
-       else
-          te = x2
-          le = x1
-       end if
-
-       ! Save the leading and trailing edges so we can do scaled output
-       ! later
-       gSlc%le = le
-       gSlc%te = te
-
-       ! Finally we need to get the thickness. For this, compute temporary
-       ! section nodes and rotate them by the twist values we just computed
-       ! and take the max and min
-
-       ! Length of hyptoneuse is the same
-       hyp = sqrt((x1(1)-x2(1))**2 + (x1(2)-x2(2))**2 + (x1(3)-x2(3))**2)
-
-       if (normal_ind == 1) then
-          ! Xslice...we don't how what to do here..could be y or z. Don't
-          ! do anything.
-          gSlc%twist = zero
-       else if (normal_ind == 2) then
-          ! Yslice
-          theta = asin((le(3)-te(3))/hyp)
-          gSlc%twist = theta*180.0/pi
-       else
-          ! Zslice
-          theta = asin((le(2)-te(2))/hyp)
-          gSlc%twist = theta*180.0/pi
-       end if
-
-       if (normal_ind == 1) then
-          M(1,1) = one; M(1,2) = zero; M(1, 3) = zero;
-          M(2,1) = zero; M(2,2) = one; M(2, 3) = zero;
-          M(3,1) = zero; M(3,2) = zero; M(3,3) = one;
-       else if(normal_ind == 2) then
-          ! Y-rotation matrix
-          M(1,1) = cos(-theta); M(1,2) = zero; M(1, 3) = sin(-theta);
-          M(2,1) = zero; M(2,2) = one; M(2, 3) = zero;
-          M(3,1) = -sin(-theta); M(3,2) = zero; M(3,3) = cos(-theta);
-       else
-          ! Z rotation Matrix
-          M(1,1) = cos(theta); M(1,2) = -sin(theta); M(1,3) = zero;
-          M(2,1) = sin(theta); M(2,2) = cos(theta); M(2,3) = zero;
-          M(3,1) = zero; M(3,2) = zero; M(3,3) = one;
-       end if
-
-       allocate(tempCoords(3, size(gSlc%vars, 2)))
-       do i=1, size(gSlc%vars, 2)
-          ! extract node:
-          r = gSlc%vars(1:3, i) - te
-          r_new = matmul(M, r)
-          tempCoords(:, i) = r_new + te
-       end do
-
-       ! Now get the max and the min and divide by the chord for t/c
-       if (normal_ind == 1) then
-          gSlc%thickness = 0 ! Again, don't know what to do here
-       else if(normal_ind == 2) then
-          dmax = maxval(tempCoords(3, :))
-          dmin = minval(tempCoords(3, :))
-          gSlc%thickness = (dmax-dmin)/hyp
-       else if(normal_ind == 3) then
-          dmax = maxval(tempCoords(2, :))
-          dmin = minval(tempCoords(2, :))
-          gSlc%thickness = (dmax-dmin)/hyp
-       end if
-       deallocate(tempCoords)
-    end if
-  end subroutine integrateSlice
-
-  subroutine writeSlice(slc, fileID, nFields)
-    ! Write the data in slice 'slc' to openfile ID fileID
-    use constants
-    use inputIO
-    use commonFormats, only : int5
-    implicit none
+        deallocate (tmpNodes, tmpWeight, tmpInd, dummy, link, fc)
+    end subroutine createSlice
+
+    subroutine destroySlice(slc)
+        !
+        !       This subroutine destroys a slice created by the createSlice
+        !       routine
+        !
+        use constants
+        implicit none
+
+        ! Input param
+        type(slice), intent(inout) :: slc
+
+        ! Deallocate weights and indices if they are already allocated
+        if (allocated(slc%w)) then
+            deallocate (slc%w)
+        end if
+
+        if (allocated(slc%ind)) then
+            deallocate (slc%ind)
+        end if
+
+        if (allocated(slc%conn)) then
+            deallocate (slc%conn)
+        end if
+
+        if (allocated(slc%famList)) then
+            deallocate (slc%famList)
+        end if
+
+        if (allocated(slc%vars)) then
+            deallocate (slc%vars)
+        end if
+
+    end subroutine destroySlice
+
+    subroutine integrateSlice(lSlc, gSlc, nodalValues, nFields, doConnectivity)
+        !
+        !       This subroutine integrates the forces on slice slc and computes
+        !       the integrated quantities for lift, drag, cl and cd with
+        !       contributions from both the pressure and visoucs forces.
+        !       It optionally interpolates solution variables as well.
+        !
+        use constants
+        use inputPhysics
+        use flowVarRefState
+        use communication
+        use utils, only: EChk
+        implicit none
+
+        ! Input Variables
+        type(slice) :: lSlc, gSlc
+        integer(kind=intType), intent(in) :: nFields
+        logical, intent(in) :: doConnectivity
+        real(kind=realType), dimension(:, :), intent(in) :: nodalValues
+        ! Working variables
+        integer(kind=intType) :: i, j, i1, i2
+        real(kind=realType), dimension(3) :: x1, x2, pT1, pT2, vT1, vT2, pF, vF, pF_elem, vF_elem
+        real(kind=realType) :: len, dmax, dmin, dist, fact, M(3, 3), tmp(6)
+        real(kind=realType) :: r(3), r_new(3), hyp, te(3), le(3), theta, w1, w2
+        integer(kind=intType) :: bestPair(2), normal_ind, iProc, ierr, iSize
+        real(kind=realtype), dimension(:, :), allocatable :: tempCoords
+        real(kind=realtype), dimension(:, :), allocatable :: localVals
+        integer(kind=intType), dimension(:), allocatable :: sliceNodeSizes, sliceCellSizes
+        integer(kind=intType), dimension(:), allocatable :: nodeDisps, cellDisps
+        real(kind=realType), dimension(3) :: refPoint, pM, vM
+        real(kind=realType) :: xc, yc, zc
+        ! Need another tmp to reduce the fx, fy and fz values
+        real(kind=realType) :: tmp_(3)
+
+        ! Copy the info related to slice
+        gSlc%sliceName = trim(lSlc%sliceName)
+        gSlc%pt = lSlc%pt
+        gSlc%normal = lSlc%normal
+
+        ! Back out what is the main index of the slice, x, y or z based on
+        ! the direction. Not the best approach, but that's ok
+        ! TODO this can be improved since we are now doing arbitrary slice directions
+        normal_ind = maxloc(abs(gSlc%normal), 1)
+
+        pF = zero
+        vF = zero
+        pM = zero  ! pressure moment
+        vM = zero  ! viscous moment
+        iSize = 3 + 6 + nFields
+        allocate (localVals(iSize, lSlc%nNodes))
+
+        ! Interpolate the required values
+        do i = 1, lSlc%nNodes
+            i1 = lSlc%ind(1, i)
+            i2 = lSlc%ind(2, i)
+            w1 = lSlc%w(1, i)
+            w2 = lSlc%w(2, i)
+            localVals(1:iSize, i) = w1 * nodalValues(i1, 1:iSize) + w2 * nodalValues(i2, 1:iSize)
+        end do
+
+        ! first communicate the coordinates and connectivities to get the chord and the LE/TE
+        ! we need this information to get the quarter-chord location,
+        ! which is needed for the moment computations
+
+        ! Gather up the number of nodes to be set to the root proc:
+        allocate (sliceNodeSizes(nProc), nodeDisps(0:nProc))
+        sliceNodeSizes = 0
+        nodeDisps = 0
+        call mpi_allgather(lSlc%nNodes, 1, adflow_integer, sliceNodeSizes, 1, adflow_integer, &
+                           adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        nodeDisps(0) = 0
+        do iProc = 1, nProc
+            nodeDisps(iProc) = nodeDisps(iProc - 1) + sliceNodeSizes(iProc) * iSize
+        end do
+
+        if (myid == 0) then
+            gSlc%nNodes = sum(sliceNodeSizes)
+            allocate (gSlc%vars(iSize, gSlc%nNodes))
+        end if
+
+        call mpi_gatherv(localVals, iSize * lSlc%nNodes, adflow_real, gSlc%vars, sliceNodeSizes * iSize, &
+                         nodeDisps, adflow_real, 0, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! We may also need to gather the connectivity if the slice will have
+        ! to be written to a file.
+        if (doConnectivity) then
+            i = size(lslc%conn, 2)
+            allocate (cellDisps(0:nProc), sliceCellSizes(nProc))
+
+            call mpi_gather(i, 1, adflow_integer, sliceCellSizes, 1, adflow_integer, &
+                            0, adflow_comm_world, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            if (myid == 0) then
+                cellDisps(0) = 0
+                do iProc = 1, nProc
+                    cellDisps(iProc) = cellDisps(iProc - 1) + sliceCellSizes(iProc) * 2
+                end do
+                allocate (gSlc%conn(2, sum(sliceCellSizes)))
+            end if
+
+            ! We offset the conn array by nodeDisps(iProc) which
+            ! automagically adjust the connectivity to account for the
+            ! number of nodes from different processors
+
+            call mpi_gatherv(lSlc%conn + nodeDisps(myid) / iSize, 2 * size(lSlc%conn, 2), adflow_integer, gSlc%conn, &
+                             sliceCellSizes * 2, cellDisps, adflow_integer, 0, adflow_comm_world, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Not quite finished yet since we will have gathered nodes from
+            ! multiple procs we have to adjust the connectivity
+
+            deallocate (sliceCellSizes, cellDisps)
+        end if
+
+        deallocate (sliceNodeSizes, nodeDisps)
+        ! done communicating the geometry info
+
+        ! process the geometry on the root processor to figure out the LE, TE, and the chord
+        if (myid == 0) then
+            ! TODO the comments below can be used for a _slightly_ more accurate LE/TE detection.
+            ! this does not really show any difference other than the twist computation,
+            ! but depending on the application, getting the twist distribution from ADflow data might be critical.
+            ! so we leave the comments below for a better implementation.
+            ! For now, we just work with the 2 max-distance nodes
+
+            ! begin template:
+            ! loop over the elements and find the TE bends
+            ! if we have 2 TE bends, then take the mid-TE point as the TE
+            ! find the largest distance node
+            ! if we dont have 2 bends, just get the max distance between any 2 nodes, thats our chord line
+            ! end template.
+
+            ! Compute the chord as the max length between any two nodes...this
+            ! is n^2, so should be changed in the future
+            dmax = zero
+            bestPair = (/1, 1/)
+            do i = 1, size(gSlc%vars, 2)
+                ! extract node:
+                x1 = gSlc%vars(1:3, i)
+
+                do j = i + 1, size(gSlc%vars, 2)
+                    ! extract node:
+                    x2 = gSlc%vars(1:3, j)
+
+                    dist = sqrt((x1(1) - x2(1))**2 + (x1(2) - x2(2))**2 + (x1(3) - x2(3))**2)
+
+                    if (dist > dmax) then
+                        dmax = dist
+                        bestPair = (/i, j/)
+                    end if
+                end do
+            end do
+
+            ! Set chord, protected from zero
+            gSlc%chord = max(dmax, 1e-12)
+
+            ! figure out which node is the LE and the TE
+            if (gSlc%vars(1, bestPair(1)) < gSlc%vars(1, bestPair(2))) then
+                ! first entry in the "bestPair" is the LE node
+                x1 = gSlc%vars(1:3, bestPair(1))
+                x2 = gSlc%vars(1:3, bestPair(2))
+            else
+                ! second entry in the "bestPair" is the LE node
+                x1 = gSlc%vars(1:3, bestPair(2))
+                x2 = gSlc%vars(1:3, bestPair(1))
+            end if
+
+            ! using the LE and TE coordinates, compute the quarter chord point
+            ! we will use this as reference for the moment computation
+            refPoint(1) = 0.75_realType * x1(1) + 0.25_realType * x2(1)
+            refPoint(2) = 0.75_realType * x1(2) + 0.25_realType * x2(2)
+            refPoint(3) = 0.75_realType * x1(3) + 0.25_realType * x2(3)
+        end if
+
+        ! communicate the reference point coordinates across processors.
+        call mpi_bcast(refPoint, 3, adflow_real, 0, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! now we are done with the global geometric operations. go back to integrating the slices locally on each proc
+
+        ! loop over elements
+        do i = 1, size(lSlc%conn, 2)
+            ! Compute all the local variables we need.
+
+            ! extract nodes:
+            i1 = lslc%conn(1, i)
+            i2 = lslc%conn(2, i)
+
+            x1 = localVals(1:3, i1)
+            x2 = localVals(1:3, i2)
+
+            ! extract pressure tractions
+            pT1 = localVals(4:6, i1)
+            pT2 = localVals(4:6, i2)
+
+            ! extract viscous tractions
+            vT1 = localVals(7:9, i1)
+            vT2 = localVals(7:9, i2)
+
+            ! Length of this segment
+            len = sqrt((x1(1) - x2(1))**2 + (x1(2) - x2(2))**2 + (x1(3) - x2(3))**2)
+
+            ! compute the pressure and viscous forces on this element
+            pF_elem = half * (pT1 + pT2) * len
+            vF_elem = half * (vT1 + vT2) * len
+
+            ! Integrate the pressure and viscous forces separately
+            pF = pF + pF_elem
+            vF = vF + vF_elem
+
+            ! compute moment about the global reference locations
+            xc = half * (x1(1) + x2(1)) - refPoint(1)
+            yc = half * (x1(2) + x2(2)) - refPoint(2)
+            zc = half * (x1(3) + x2(3)) - refPoint(3)
+
+            ! pressure components
+            pM(1) = pM(1) + yc * pF_elem(3) - zc * pF_elem(2)
+            pM(2) = pM(2) + zc * pF_elem(1) - xc * pF_elem(3)
+            pM(3) = pM(3) + xc * pF_elem(2) - yc * pF_elem(1)
+
+            ! viscous components
+            vM(1) = vM(1) + yc * vF_elem(3) - zc * vF_elem(2)
+            vM(2) = vM(2) + zc * vF_elem(1) - xc * vF_elem(3)
+            vM(3) = vM(3) + xc * vF_elem(2) - yc * vF_elem(1)
+
+        end do
+
+        ! That is as far as we can go in parallel. We now have to gather up
+        ! pL, pD, pM, vL, vD, vM as well as the nodes to the root proc.
+
+        ! Set the local values we can in the slice
+        lSlc%pL = liftDirection(1) * pF(1) + liftDirection(2) * pF(2) + liftDirection(3) * pF(3)
+        lSlc%pD = dragDirection(1) * pF(1) + dragDirection(2) * pF(2) + dragDirection(3) * pF(3)
+        lSlc%vL = liftDirection(1) * vF(1) + liftDirection(2) * vF(2) + liftDirection(3) * vF(3)
+        lSlc%vD = dragDirection(1) * vF(1) + dragDirection(2) * vF(2) + dragDirection(3) * vF(3)
+
+        ! the moments are a bit different than lift and drag. we keep the 3 components of the moment
+        ! in pM in this routine but then the slc%pM variable only has the component of the moment
+        ! we are interested in. we use the direction index to get this value out and set it in the slice
+        lSlc%pM = pM(normal_ind)
+        lSlc%vM = vM(normal_ind)
+
+        ! save the x,y,z-forces into the appropriate real-container
+        ! from their type(slice) definition (see the top of this file)
+        lSlc%fx = pF(1) + vF(1)
+        lSlc%fy = pF(2) + vF(2)
+        lSlc%fz = pF(3) + vF(3)
+
+        ! Reduce the lift/drag values
+        call mpi_reduce((/lSlc%pL, lSlc%pD, lSlc%pM, lSlc%vL, lSlc%vD, lSlc%vM/), tmp, 6, adflow_real, MPI_SUM, &
+                        0, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Reduce the fx, fy and fz using THE OTHER tmp, i.e. tmp_
+        call mpi_reduce((/lSlc%fx, lSlc%fy, lSlc%fz/), tmp_, 3, adflow_real, MPI_SUM, &
+                        0, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (myid == 0) then
+            gSlc%pL = tmp(1)
+            gSlc%pD = tmp(2)
+            gSlc%pM = tmp(3)
+            gSlc%vL = tmp(4)
+            gSlc%vD = tmp(5)
+            gSlc%vM = tmp(6)
+
+            ! we now also must remember to store the MPIreduced fx, fy and fz in the
+            ! global slice type:
+            gSlc%fx = tmp_(1)
+            gSlc%fy = tmp_(2)
+            gSlc%fz = tmp_(3)
+
+            ! Compute factor to get coefficient
+            fact = two / (gammaInf * pInf * MachCoef * MachCoef * pRef)
+
+            ! Take dmax as chord and compute coefficients
+            gSlc%CLp = gSlc%pL / gSlc%chord * fact
+            gSlc%CDp = gSlc%pD / gSlc%chord * fact
+            gSlc%CLv = gSlc%vL / gSlc%chord * fact
+            gSlc%CDv = gSlc%vD / gSlc%chord * fact
+
+            ! Moment factor has an extra lengthRef
+            fact = fact / (lengthRef * LRef)
+
+            ! moments
+            gSlc%CMp = gSlc%pM / gSlc%chord * fact
+            gSlc%CMv = gSlc%vM / gSlc%chord * fact
+
+            ! Default values
+            gSlc%twist = zero
+            gSlc%thickness = zero
+
+            if (gSlc%nNodes == 0) then
+                return
+            end if
+
+            ! Lastly we need the twist and the twist and the thickness
+            i1 = bestPair(1)
+            i2 = bestPair(2)
+            x1 = gSlc%vars(1:3, i1)
+            x2 = gSlc%vars(1:3, i2)
+
+            if (x1(1) > x2(1)) then
+                te = x1
+                le = x2
+            else
+                te = x2
+                le = x1
+            end if
+
+            ! Save the leading and trailing edges so we can do scaled output
+            ! later
+            gSlc%le = le
+            gSlc%te = te
+
+            ! Finally we need to get the thickness. For this, compute temporary
+            ! section nodes and rotate them by the twist values we just computed
+            ! and take the max and min
+
+            ! Length of hyptoneuse is the same
+            hyp = sqrt((x1(1) - x2(1))**2 + (x1(2) - x2(2))**2 + (x1(3) - x2(3))**2)
+
+            if (normal_ind == 1) then
+                ! Xslice...we don't how what to do here..could be y or z. Don't
+                ! do anything.
+                gSlc%twist = zero
+            else if (normal_ind == 2) then
+                ! Yslice
+                theta = asin((le(3) - te(3)) / hyp)
+                gSlc%twist = theta * 180.0 / pi
+            else
+                ! Zslice
+                theta = asin((le(2) - te(2)) / hyp)
+                gSlc%twist = theta * 180.0 / pi
+            end if
+
+            if (normal_ind == 1) then
+                M(1, 1) = one; M(1, 2) = zero; M(1, 3) = zero; 
+                M(2, 1) = zero; M(2, 2) = one; M(2, 3) = zero; 
+                M(3, 1) = zero; M(3, 2) = zero; M(3, 3) = one; 
+            else if (normal_ind == 2) then
+                ! Y-rotation matrix
+                M(1, 1) = cos(-theta); M(1, 2) = zero; M(1, 3) = sin(-theta); 
+                M(2, 1) = zero; M(2, 2) = one; M(2, 3) = zero; 
+                M(3, 1) = -sin(-theta); M(3, 2) = zero; M(3, 3) = cos(-theta); 
+            else
+                ! Z rotation Matrix
+                M(1, 1) = cos(theta); M(1, 2) = -sin(theta); M(1, 3) = zero; 
+                M(2, 1) = sin(theta); M(2, 2) = cos(theta); M(2, 3) = zero; 
+                M(3, 1) = zero; M(3, 2) = zero; M(3, 3) = one; 
+            end if
+
+            allocate (tempCoords(3, size(gSlc%vars, 2)))
+            do i = 1, size(gSlc%vars, 2)
+                ! extract node:
+                r = gSlc%vars(1:3, i) - te
+                r_new = matmul(M, r)
+                tempCoords(:, i) = r_new + te
+            end do
+
+            ! Now get the max and the min and divide by the chord for t/c
+            if (normal_ind == 1) then
+                gSlc%thickness = 0 ! Again, don't know what to do here
+            else if (normal_ind == 2) then
+                dmax = maxval(tempCoords(3, :))
+                dmin = minval(tempCoords(3, :))
+                gSlc%thickness = (dmax - dmin) / hyp
+            else if (normal_ind == 3) then
+                dmax = maxval(tempCoords(2, :))
+                dmin = minval(tempCoords(2, :))
+                gSlc%thickness = (dmax - dmin) / hyp
+            end if
+            deallocate (tempCoords)
+        end if
+    end subroutine integrateSlice
+
+    subroutine writeSlice(slc, fileID, nFields)
+        ! Write the data in slice 'slc' to openfile ID fileID
+        use constants
+        use inputIO
+        use commonFormats, only: int5
+        implicit none
+
+        ! Input Parameters
+        type(slice), intent(in) :: slc
+        integer(kind=intType), intent(in) :: fileID, nFields
+
+        ! Working Variables
+        integer(kind=intType) :: i, j
+        real(kind=realType) :: tmp, tx, ty, tz
+
+        write (fileID, "(a,a,a)") "Zone T= """, trim(slc%sliceName), """"
+
+        ! IF we have nodes actually write:
+        if (slc%nNodes > 0) then
+            write (fileID, *) "Nodes = ", slc%nNodes, " Elements= ", size(slc%conn, 2), " ZONETYPE=FELINESEG"
+            write (fileID, *) "DATAPACKING=POINT"
+
+            do i = 1, slc%nNodes
+                ! Write the coordinates
+                do j = 1, 3
+                    write (fileID, sci6, advance='no') slc%vars(j, i)
+                end do
+
+                ! Write the scaled coordiantes with the LE at (0,0,0)
+                do j = 1, 3
+                    tmp = slc%vars(j, i)
+                    write (fileID, sci6, advance='no') (tmp - slc%le(j)) / slc%chord
+                end do
+
+                ! Write field data. Starts at 9 (after 3 coordindates and the 6 tractions)
+                do j = 1, nFields
+                    write (fileID, sci6, advance='no') slc%vars(9 + j, i)
+                end do
+
+                write (fileID, "(1x)")
+            end do
+
+            do i = 1, size(slc%conn, 2)
+                write (fileID, int5) slc%conn(1, i), slc%conn(2, i)
+            end do
+        else ! Write dummy data so the number of zones are the same
+
+            write (fileID, *) "Nodes = ", 2, " Elements= ", 1, " ZONETYPE=FELINESEG"
+            write (fileID, *) "DATAPACKING=POINT"
+            do i = 1, 2
+                do j = 1, 6
+                    write (fileID, sci6, advance='no') zero
+                end do
+
+                do j = 1, nFields
+                    write (fileID, sci6, advance='no') zero
+                end do
 
-    ! Input Parameters
-    type(slice), intent(in) :: slc
-    integer(kind=intType), intent(in) :: fileID, nFields
-
-    ! Working Variables
-    integer(kind=intType) :: i, j
-    real(kind=realType) :: tmp, tx, ty, tz
-
-    write (fileID,"(a,a,a)") "Zone T= """,trim(slc%sliceName),""""
-
-    ! IF we have nodes actually write:
-    if (slc%nNodes > 0) then
-       write (fileID,*) "Nodes = ", slc%nNodes, " Elements= ",  size(slc%conn, 2), " ZONETYPE=FELINESEG"
-       write (fileID,*) "DATAPACKING=POINT"
-
-       do i=1,slc%nNodes
-          ! Write the coordinates
-          do j=1,3
-             write(fileID, sci6, advance='no') slc%vars(j, i)
-          end do
-
-          ! Write the scaled coordiantes with the LE at (0,0,0)
-          do j=1,3
-             tmp = slc%vars(j, i)
-             write(fileID, sci6, advance='no') (tmp - slc%le(j))/slc%chord
-          end do
-
-          ! Write field data. Starts at 9 (after 3 coordindates and the 6 tractions)
-          do j=1,nFields
-             write(fileID, sci6, advance='no') slc%vars(9+j, i)
-          end do
-
-          write(fileID,"(1x)")
-       end do
-
-       do i=1, size(slc%conn, 2)
-          write(fileID, int5) slc%conn(1, i), slc%conn(2, i)
-       end do
-    else ! Write dummy data so the number of zones are the same
-
-       write (fileID,*) "Nodes = ", 2, " Elements= ", 1, " ZONETYPE=FELINESEG"
-       write (fileID,*) "DATAPACKING=POINT"
-       do i=1,2
-          do j=1,6
-             write(fileID, sci6, advance='no') zero
-          end do
-
-          do j=1,nFields
-             write(fileID, sci6, advance='no') zero
-          end do
-
-          write(fileID,"(1x)")
-       end do
-       write(fileID, int5) 1, 2
-    end if
-  end subroutine writeSlice
+                write (fileID, "(1x)")
+            end do
+            write (fileID, int5) 1, 2
+        end if
+    end subroutine writeSlice
 
 end module tecplotIO
diff --git a/src/output/writeCGNSGrid.F90 b/src/output/writeCGNSGrid.F90
index 51d796340..83576377d 100644
--- a/src/output/writeCGNSGrid.F90
+++ b/src/output/writeCGNSGrid.F90
@@ -1,1512 +1,1510 @@
 module writeCGNSGrid
 
 contains
-  subroutine writeCGNSGridFile
-    !
-    !       writeCGNSGridFile and its subroutines write the CGNS grid
-    !       file(s). Typically this is needed when the coordinates have
-    !       changed due to moving parts, deformation or both.
-    !
-    use cgnsGrid
-    use communication
-    use IOModule
-    use monitor
-    use su_cgns
-    use outputMod
-    use communication
-    use inputIteration
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer, dimension(cgnsNDom) :: cgnsZone
-
-    integer(kind=intType) :: nn
-
-    ! Determine the number and names of the grid files.
-    ! Also set the pointers for IOVar needed for the general
-    ! treatment of the IO.
-
-    call gridFileNamesWrite
-
-    ! Return immediately if no grids have to be written.
-
-    if(nGridsToWrite == 0) return
-
-    ! Allocate the memory for the fileIDs and the bases.
-
-    allocate(fileIDs(nGridsToWrite), cgnsBases(nGridsToWrite), &
-         stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("writeCGNSGridFile", &
-         "Memory allocation failure for fileIDs and &
-         &cgnsBases")
-
-    ! Write a message that the grid file(s) are being written.
-    ! Of course only processor 0 does this.
-
-    if(myID == 0 .and. printIterations) then
-       print "(a)", "#"
-       print "(a,a)", "# Writing grid file(s): ",trim(gridFileNames(1))
-    endif
-
-    ! All grid information is stored on all processors, with the
-    ! exception of data which can vary in time or use a large
-    ! amount of memory, including the coordinates and
-    ! connectivities. Processor 0 writes this information first as
-    ! a frame for each file.
-
-    if(myID == 0) then
-       do nn=1,nGridsToWrite
-          call writeCGNSGridFrame(cgnsZone, nn)
-       enddo
-    endif
-
-    ! Loop over the number of cgns blocks and write the coordinates
-    ! one at a time to conserve memory.
-
-    do nn=1,cgnsNDom
-       call writeCoorCGNSZone(nn, cgnsZone(nn))
-    enddo
-
-    ! Check if the solution must be written in a different file.
-    ! If so the files must be closed the memory for the fileIDs
-    ! and bases must be released.
-
-    testGridOnly: if(useLinksInCGNS .or. (.not. writeVolume)) then
-
-       ! Processor 0 closes the files.
-
-       if(myID == 0) then
-          do nn=1,nGridsToWrite
-             call cg_close_f(fileIDs(nn), ierr)
-             if(ierr /= CG_OK)                    &
-                  call terminate("writeCGNSGridFile", &
-                  "Something wrong when calling cg_close_f")
-          enddo
-       endif
-
-
-       ! Release the memory of fileIDs and cgnsBases.
-
-       deallocate(fileIDs, cgnsBases, stat=ierr)
-       if(ierr /= 0)                         &
+    subroutine writeCGNSGridFile
+        !
+        !       writeCGNSGridFile and its subroutines write the CGNS grid
+        !       file(s). Typically this is needed when the coordinates have
+        !       changed due to moving parts, deformation or both.
+        !
+        use cgnsGrid
+        use communication
+        use IOModule
+        use monitor
+        use su_cgns
+        use outputMod
+        use communication
+        use inputIteration
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer, dimension(cgnsNDom) :: cgnsZone
+
+        integer(kind=intType) :: nn
+
+        ! Determine the number and names of the grid files.
+        ! Also set the pointers for IOVar needed for the general
+        ! treatment of the IO.
+
+        call gridFileNamesWrite
+
+        ! Return immediately if no grids have to be written.
+
+        if (nGridsToWrite == 0) return
+
+        ! Allocate the memory for the fileIDs and the bases.
+
+        allocate (fileIDs(nGridsToWrite), cgnsBases(nGridsToWrite), &
+                  stat=ierr)
+        if (ierr /= 0) &
+             call terminate("writeCGNSGridFile", &
+             "Memory allocation failure for fileIDs and &
+             &cgnsBases")
+
+        ! Write a message that the grid file(s) are being written.
+        ! Of course only processor 0 does this.
+
+        if (myID == 0 .and. printIterations) then
+            print "(a)", "#"
+            print "(a,a)", "# Writing grid file(s): ", trim(gridFileNames(1))
+        end if
+
+        ! All grid information is stored on all processors, with the
+        ! exception of data which can vary in time or use a large
+        ! amount of memory, including the coordinates and
+        ! connectivities. Processor 0 writes this information first as
+        ! a frame for each file.
+
+        if (myID == 0) then
+            do nn = 1, nGridsToWrite
+                call writeCGNSGridFrame(cgnsZone, nn)
+            end do
+        end if
+
+        ! Loop over the number of cgns blocks and write the coordinates
+        ! one at a time to conserve memory.
+
+        do nn = 1, cgnsNDom
+            call writeCoorCGNSZone(nn, cgnsZone(nn))
+        end do
+
+        ! Check if the solution must be written in a different file.
+        ! If so the files must be closed the memory for the fileIDs
+        ! and bases must be released.
+
+        testGridOnly: if (useLinksInCGNS .or. (.not. writeVolume)) then
+
+            ! Processor 0 closes the files.
+
+            if (myID == 0) then
+                do nn = 1, nGridsToWrite
+                    call cg_close_f(fileIDs(nn), ierr)
+                    if (ierr /= CG_OK) &
+                        call terminate("writeCGNSGridFile", &
+                                       "Something wrong when calling cg_close_f")
+                end do
+            end if
+
+            ! Release the memory of fileIDs and cgnsBases.
+
+            deallocate (fileIDs, cgnsBases, stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("writeCGNSGridFile", &
+                 "Deallocation failure for fileIDs and &
+                 &cgnsBases")
+
+        end if testGridOnly
+
+        ! Releases the memory of IOVar.
+
+        deallocate (IOVar, stat=ierr)
+        if (ierr /= 0) &
             call terminate("writeCGNSGridFile", &
-            "Deallocation failure for fileIDs and &
-            &cgnsBases")
-
-    endif testGridOnly
-
-    ! Releases the memory of IOVar.
-
-    deallocate(IOVar, stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("writeCGNSGridFile", &
-         "Deallocation failure for IOVar")
-
-    ! Wait until all processors (especially processor 0) reach
-    ! this point.
-
-    call mpi_barrier(ADflow_comm_world, ierr)
-
-    ! Write a message that the grid file has been written.
-    ! Of course only processor 0 does this.
-
-    if(myID == 0 .and. printIterations) then
-       print "(a)", "# Grid file(s) written"
-       print "(a)", "#"
-    endif
-  end subroutine writeCGNSGridFile
+                           "Deallocation failure for IOVar")
+
+        ! Wait until all processors (especially processor 0) reach
+        ! this point.
+
+        call mpi_barrier(ADflow_comm_world, ierr)
+
+        ! Write a message that the grid file has been written.
+        ! Of course only processor 0 does this.
+
+        if (myID == 0 .and. printIterations) then
+            print "(a)", "# Grid file(s) written"
+            print "(a)", "#"
+        end if
+    end subroutine writeCGNSGridFile
 
-  subroutine gridFileNamesWrite
-    !
-    !       gridFileNamesWrite determines the names and number of grid
-    !       files to be written. Furthermore, it sets the pointers for
-    !       IOVar to make a general treatment of the writing possible.
-    !
-    use block
-    use inputIO
-    use inputPhysics
-    use inputTimeSpectral
-    use IOModule
-    use iteration
-    use monitor
-    use outputMod
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+    subroutine gridFileNamesWrite
+        !
+        !       gridFileNamesWrite determines the names and number of grid
+        !       files to be written. Furthermore, it sets the pointers for
+        !       IOVar to make a general treatment of the writing possible.
+        !
+        use block
+        use inputIO
+        use inputPhysics
+        use inputTimeSpectral
+        use IOModule
+        use iteration
+        use monitor
+        use outputMod
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-    integer(kind=intType) :: nn, mm, kk, nAvail
+        integer(kind=intType) :: nn, mm, kk, nAvail
 
-    character(len=7) :: intString
+        character(len=7) :: intString
 
-    !       Determine the names and number of grid files to be written.
-    !
-    ! Determine the situation we are having here.
+        !       Determine the names and number of grid files to be written.
+        !
+        ! Determine the situation we are having here.
 
-    select case (equationMode)
+        select case (equationMode)
 
-    case (steady)
+        case (steady)
 
-       ! Steady state computation. Allocate the memory for the
-       ! grid file names. Even if no file needs to be written the
-       ! memory is still allocated because the name is always set.
+            ! Steady state computation. Allocate the memory for the
+            ! grid file names. Even if no file needs to be written the
+            ! memory is still allocated because the name is always set.
 
-       allocate(gridFileNames(1), stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("gridFileNamesWrite", &
-            "Memory allocation failure for grid &
-            &file names")
+            allocate (gridFileNames(1), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("gridFileNamesWrite", &
+                 "Memory allocation failure for grid &
+                 &file names")
 
-       ! Set the number of grid files to be written to either 0 or 1
-       ! and set the name accordingly. The name is always set,
-       ! because it may be needed for a link in the solution file.
+            ! Set the number of grid files to be written to either 0 or 1
+            ! and set the name accordingly. The name is always set,
+            ! because it may be needed for a link in the solution file.
 
-       if( writeGrid ) then
-          nGridsToWrite    = 1
-          gridFileNames(1) = newGridFile
-       else
-          nGridsToWrite    = 0
-          gridFileNames(1) = gridFile
-       endif
+            if (writeGrid) then
+                nGridsToWrite = 1
+                gridFileNames(1) = newGridFile
+            else
+                nGridsToWrite = 0
+                gridFileNames(1) = gridFile
+            end if
 
-       !===============================================================
+            !===============================================================
 
-    case (unsteady)
+        case (unsteady)
 
-       ! Unsteady computation. For a consistent restart for a
-       ! deforming mesh computation nOldLevels grids must be written.
-       ! First determine the number of available solutions.
+            ! Unsteady computation. For a consistent restart for a
+            ! deforming mesh computation nOldLevels grids must be written.
+            ! First determine the number of available solutions.
 
-       nAvail = timeStepUnsteady + nTimeStepsRestart + 1
-       nAvail = min(nAvail,nOldLevels)
+            nAvail = timeStepUnsteady + nTimeStepsRestart + 1
+            nAvail = min(nAvail, nOldLevels)
 
-       ! Allocate the memory for the file names. Note that this is
-       ! an upper boundary. It is possible that less files need
-       ! to be written.
+            ! Allocate the memory for the file names. Note that this is
+            ! an upper boundary. It is possible that less files need
+            ! to be written.
 
-       allocate(gridFileNames(nAvail), stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("gridFileNamesWrite", &
-            "Memory allocation failure for &
-            &gridFileNames")
+            allocate (gridFileNames(nAvail), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("gridFileNamesWrite", &
+                 "Memory allocation failure for &
+                 &gridFileNames")
 
-       ! Set the names of the files.
+            ! Set the names of the files.
 
-       do nn=1,nAvail
-          write(intString,"(i7)") timeStepUnsteady + &
-               nTimeStepsRestart + 1 - nn
-          intString = adjustl(intString)
+            do nn = 1, nAvail
+                write (intString, "(i7)") timeStepUnsteady + &
+                    nTimeStepsRestart + 1 - nn
+                intString = adjustl(intString)
 
-          gridFileNames(nn) = trim(newGridFile)//"&
-               &Timestep"//trim(intString)
-       enddo
+                gridFileNames(nn) = trim(newGridFile)//"&
+                     &Timestep"//trim(intString)
+            end do
 
-       ! Determine the number of grid files to be written.
-       ! This depends on quite a few things.
+            ! Determine the number of grid files to be written.
+            ! This depends on quite a few things.
 
-       if( writeGrid ) then
+            if (writeGrid) then
 
-          ! Initialize nGridsToWrite to 1. This may change
-          ! when the mesh is deforming.
+                ! Initialize nGridsToWrite to 1. This may change
+                ! when the mesh is deforming.
 
-          nGridsToWrite = 1
+                nGridsToWrite = 1
 
-          if( deforming_Grid ) then
+                if (deforming_Grid) then
 
-             ! Grids deform during the computation. Check if the
-             ! older grids must be written.
+                    ! Grids deform during the computation. Check if the
+                    ! older grids must be written.
 
-             do nn=1,(nAvail-1)
-                if(.not. oldSolWritten(nn) ) then
-                   nGridsToWrite = nGridsToWrite + 1
-                   gridFileNames(nGridsToWrite) = gridFileNames(nn+1)
-                endif
-             enddo
+                    do nn = 1, (nAvail - 1)
+                        if (.not. oldSolWritten(nn)) then
+                            nGridsToWrite = nGridsToWrite + 1
+                            gridFileNames(nGridsToWrite) = gridFileNames(nn + 1)
+                        end if
+                    end do
 
-          endif
+                end if
 
-       else
+            else
 
-          ! No grids need to be written. Correct the grid file name
-          ! to the original grid file.
+                ! No grids need to be written. Correct the grid file name
+                ! to the original grid file.
 
-          nGridsToWrite = 0
-          gridFileNames(1) = gridFile
+                nGridsToWrite = 0
+                gridFileNames(1) = gridFile
 
-       endif
+            end if
 
-       !===============================================================
+            !===============================================================
 
-    case (timeSpectral)
+        case (timeSpectral)
 
-       ! Time spectral computation. Allocate the file names.
-       ! Again this is an upper bound.
+            ! Time spectral computation. Allocate the file names.
+            ! Again this is an upper bound.
 
-       allocate(gridFileNames(nTimeIntervalsSpectral), stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("gridFileNamesWrite", &
-            "Memory allocation failure for &
-            &gridFileNames")
+            allocate (gridFileNames(nTimeIntervalsSpectral), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("gridFileNamesWrite", &
+                 "Memory allocation failure for &
+                 &gridFileNames")
 
-       ! Set the names of the files.
+            ! Set the names of the files.
 
-       do nn=1,nTimeIntervalsSpectral
-          write(intString,"(i7)") nn
-          intString = adjustl(intString)
+            do nn = 1, nTimeIntervalsSpectral
+                write (intString, "(i7)") nn
+                intString = adjustl(intString)
 
-          gridFileNames(nn) = trim(newGridFile)//"&
-               &Spectral"//trim(intString)
-       enddo
+                gridFileNames(nn) = trim(newGridFile)//"&
+                     &Spectral"//trim(intString)
+            end do
 
-       ! Set the number of grid files to be written.
-       ! This depends on quite a few things.
+            ! Set the number of grid files to be written.
+            ! This depends on quite a few things.
 
-       ! GKK The logic below is seriously flawed must never had
-       ! been tested. If nGridsToWrite is set to 0 which is what
-       ! it should be set at if the time spectral grid files are
-       ! already written, then in writeCGNSGridFile, fileIDs are
-       ! and CGNSbases are NOT allocated at ALL. Then when you try
-       ! to write the volume grid, and index into fileIDs and
-       ! CGNSbases, you're screwed. nGridsToWrite MUST ALWAYS be
-       ! ntimeIntervalsSpectral regardless.
+            ! GKK The logic below is seriously flawed must never had
+            ! been tested. If nGridsToWrite is set to 0 which is what
+            ! it should be set at if the time spectral grid files are
+            ! already written, then in writeCGNSGridFile, fileIDs are
+            ! and CGNSbases are NOT allocated at ALL. Then when you try
+            ! to write the volume grid, and index into fileIDs and
+            ! CGNSbases, you're screwed. nGridsToWrite MUST ALWAYS be
+            ! ntimeIntervalsSpectral regardless.
 
+            if (writeGrid) then
 
-       if( writeGrid ) then
+                ! Need some additional checks.
 
-          ! Need some additional checks.
+                if (deforming_Grid) then
 
-          if( deforming_Grid ) then
+                    ! Grids deform during the computation and thus
+                    ! they must be written.
 
-             ! Grids deform during the computation and thus
-             ! they must be written.
+                    nGridsToWrite = nTimeIntervalsSpectral
 
-             nGridsToWrite = nTimeIntervalsSpectral
+                else if (timeSpectralGridsNotWritten) then
 
-          else if( timeSpectralGridsNotWritten ) then
+                    ! Grids do not deform, but the time spectral grids have
+                    ! not been written earlier. So write the grids and set
+                    ! timeSpectralGridsNotWritten to .false.
 
-             ! Grids do not deform, but the time spectral grids have
-             ! not been written earlier. So write the grids and set
-             ! timeSpectralGridsNotWritten to .false.
+                    nGridsToWrite = nTimeIntervalsSpectral
+                    timeSpectralGridsNotWritten = .false.
 
-             nGridsToWrite = nTimeIntervalsSpectral
-             timeSpectralGridsNotWritten = .false.
+                else
 
-          else
+                    ! Although indicated that the grids must be written,
+                    ! this is not necessary, because they have already been
+                    ! written earlier and they have not changed.
 
-             ! Although indicated that the grids must be written,
-             ! this is not necessary, because they have already been
-             ! written earlier and they have not changed.
+                    nGridsToWrite = nTimeIntervalsSpectral! 0
 
-             nGridsToWrite = nTimeIntervalsSpectral! 0
+                end if
 
-          endif
+            else
 
-       else
+                ! It is not needed to write the grid files.
 
-          ! It is not needed to write the grid files.
+                nGridsToWrite = 0
 
-          nGridsToWrite = 0
+            end if
 
-       endif
+        end select
+        !
+        !       Determine whether or not to use links in CGNS.
+        !
 
-    end select
-    !
-    !       Determine whether or not to use links in CGNS.
-    !
+        if (writeGrid) then
 
-    if( writeGrid ) then
+            ! Grid file(s) will be written. Compare the (base) names of the
+            ! grid and solution files and set useLinksInCGNS accordingly.
+            if (newGridFile == solFile) then
+                useLinksInCGNS = .false.
+            else
+                useLinksInCGNS = .true.
+            end if
 
-       ! Grid file(s) will be written. Compare the (base) names of the
-       ! grid and solution files and set useLinksInCGNS accordingly.
-       if(newGridFile == solFile) then
-          useLinksInCGNS = .false.
-       else
-          useLinksInCGNS = .true.
-       endif
+        else
 
-    else
+            ! Grid file(s) will not be written. Compare the (base) names of
+            ! the original grid and solution files and set useLinksInCGNS
+            ! accordingly.
 
-       ! Grid file(s) will not be written. Compare the (base) names of
-       ! the original grid and solution files and set useLinksInCGNS
-       ! accordingly.
+            if (gridFile == solFile) then
+                useLinksInCGNS = .false.
+            else
+                useLinksInCGNS = .true.
+            end if
 
-       if(gridFile == solFile) then
-          useLinksInCGNS = .false.
-       else
-          useLinksInCGNS = .true.
-       endif
+        end if
 
-    endif
+        !       Set the pointers for IOVar if grid files need to be written.
+        !
+        testGridsToWrite: if (nGridsToWrite > 0) then
 
-    !       Set the pointers for IOVar if grid files need to be written.
-    !
-    testGridsToWrite: if(nGridsToWrite > 0) then
+            ! Allocate the memory for IOVar.
 
-       ! Allocate the memory for IOVar.
+            allocate (IOVar(nDom, nGridsToWrite), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("gridFileNamesWrite", &
+                               "Memory allocation failure for IOVar")
 
-       allocate(IOVar(nDom,nGridsToWrite), stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("gridFileNamesWrite", &
-            "Memory allocation failure for IOVar")
+            ! Set the pointer w of IOVar to the correct coordinates.
 
-       ! Set the pointer w of IOVar to the correct coordinates.
+            select case (equationMode)
 
-       select case(equationMode)
+            case (steady, timeSpectral)
 
-       case (steady, timeSpectral)
+                ! Steady state or time spectral computation. Simply set the
+                ! pointers to the current coordinates.
 
-          ! Steady state or time spectral computation. Simply set the
-          ! pointers to the current coordinates.
+                do nn = 1, nDom
+                    do mm = 1, nGridsToWrite
+                        IOVar(nn, mm)%pointerOffset = 0
+                        IOVar(nn, mm)%w => flowDoms(nn, 1, mm)%x(1:, 1:, 1:, :)
+                    end do
+                end do
 
-          do nn=1,nDom
-             do mm=1,nGridsToWrite
-                IOVar(nn,mm)%pointerOffset = 0
-                IOVar(nn,mm)%w => flowDoms(nn,1,mm)%x(1:,1:,1:,:)
-             enddo
-          enddo
+                !=============================================================
 
-          !=============================================================
+            case (unsteady)
 
-       case (unsteady)
+                ! Unsteady computation. First coordinates to be written
+                ! are the current coordinates.
 
-          ! Unsteady computation. First coordinates to be written
-          ! are the current coordinates.
+                do nn = 1, nDom
+                    IOVar(nn, 1)%pointerOffset = 0
+                    IOVar(nn, 1)%w => flowDoms(nn, 1, 1)%x(1:, 1:, 1:, :)
+                end do
 
-          do nn=1,nDom
-             IOVar(nn,1)%pointerOffset = 0
-             IOVar(nn,1)%w => flowDoms(nn,1,1)%x(1:,1:,1:,:)
-          enddo
+                ! It is possible (for a case with deforming meshes) that
+                ! older coordinates need to be written as well.
 
-          ! It is possible (for a case with deforming meshes) that
-          ! older coordinates need to be written as well.
+                if (deforming_Grid) then
+                    kk = 1
+                    do mm = 1, (nAvail - 1)
+                        if (.not. oldSolWritten(mm)) then
+                            kk = kk + 1
+                            do nn = 1, nDom
+                                IOVar(nn, kk)%pointerOffset = 0
+                                IOVar(nn, kk)%w => flowDoms(nn, 1, 1)%xOld(mm, 1:, 1:, 1:, :)
+                            end do
+                        end if
+                    end do
+                end if
 
-          if( deforming_Grid ) then
-             kk = 1
-             do mm=1,(nAvail-1)
-                if(.not. oldSolWritten(mm) ) then
-                   kk = kk + 1
-                   do nn=1,nDom
-                      IOVar(nn,kk)%pointerOffset = 0
-                      IOVar(nn,kk)%w => flowDoms(nn,1,1)%xOld(mm,1:,1:,1:,:)
-                   enddo
-                endif
-             enddo
-          endif
+            end select
 
-       end select
+        end if testGridsToWrite
 
-    endif testGridsToWrite
+    end subroutine gridFileNamesWrite
 
-  end subroutine gridFileNamesWrite
+    subroutine writeCGNSGridFrame(cgnsZone, ind)
+        !
+        !       writeCGNSGridFrame writes the framework for the grid file
+        !       gridNames(ind) using the information stored in the module
+        !       cgnsGrid. Basically all information but the coordinates is
+        !       written by this routine.
+        !
+        use constants
+        use cgnsGrid
+        use su_cgns
+        use outputMod
+        use utils, only: terminate, setCGNSRealType
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: ind
+        integer, dimension(*), intent(out) :: cgnsZone
+
+        !
+        !      Local variables.
+        !
+        integer :: ierr, ii, jj, cgnsInd, cgnsBase
+        integer(kind=cgsize_t), dimension(9) :: sizes
+        integer(kind=cgsize_t), dimension(3, 2) :: zoneRange, donorRange
+        integer, dimension(3) :: transform
+        integer(kind=cgsize_t), dimension(:, :), allocatable :: donorData
+
+        integer(kind=intType) :: nn, mm, ll, i, j, k
+        integer(kind=intType) :: s1, s2, s3
+
+        real(kind=realType), dimension(3) :: rotCenter, rotRate, translation
+
+        real(kind=realType) :: LRefInv
+
+        character(len=maxStringLen) :: errorMessage
 
-  subroutine writeCGNSGridFrame(cgnsZone, ind)
-    !
-    !       writeCGNSGridFrame writes the framework for the grid file
-    !       gridNames(ind) using the information stored in the module
-    !       cgnsGrid. Basically all information but the coordinates is
-    !       written by this routine.
-    !
-    use constants
-    use cgnsGrid
-    use su_cgns
-    use outputMod
-    use utils, only : terminate, setCGNSRealType
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)  :: ind
-    integer, dimension(*), intent(out) :: cgnsZone
-
-    !
-    !      Local variables.
-    !
-    integer :: ierr, ii, jj, cgnsInd, cgnsBase
-    integer(kind=cgsize_t), dimension(9)   :: sizes
-    integer(kind=cgsize_t), dimension(3,2) :: zoneRange, donorRange
-    integer, dimension(3)   :: transform
-    integer(kind=cgsize_t), dimension(:,:), allocatable :: donorData
-
-    integer(kind=intType) :: nn, mm, ll, i, j, k
-    integer(kind=intType) :: s1, s2, s3
-
-    real(kind=realType), dimension(3) :: rotCenter, rotRate, translation
-
-    real(kind=realType) :: LRefInv
-
-    character(len=maxStringLen)   :: errorMessage
-
-    type(cgnsBcDatasetType), pointer, dimension(:) :: dataSet
-
-    ! Open the CGNS file for writing and check if it went okay.
-    ! Store the file index afterwards.
-
-    call cg_open_f(gridFileNames(ind), mode_write, cgnsInd, ierr)
-    if(ierr /= CG_OK) then
-       write(errorMessage,*) "File ", trim(gridfileNames(ind)), &
-            " could not be opened by cgns &
-            &for writing"
-       call terminate("writeCGNSGridFrame", errorMessage)
-    endif
-
-    fileIDs(ind) = cgnsInd
-
-    ! Create the base. Copy the cell and physical dimensions and
-    ! store the base ID for this index.
-
-    call cg_base_write_f(cgnsInd, cgnsBaseName, cgnsCelldim, &
-         cgnsPhysdim, cgnsBase, ierr)
-    if(ierr /= CG_OK)                     &
-         call terminate("writeCGNSGridFrame", &
-         "Something wrong when calling cg_base_write_f")
-
-    cgnsBases(ind) = cgnsBase
-    !
-    !       Write the family info.
-    !
-    ! Loop over the number of families.
-
-    familyLoop: do nn=1,cgnsNfamilies
-
-       ! Create the family node.
-
-       call cg_family_write_f(cgnsInd, cgnsBase, &
-            cgnsFamilies(nn)%familyName, ii, ierr)
-       if(ierr /= CG_OK)                     &
-            call terminate("writeCGNSGridFrame", &
-            "Something wrong when calling &
-            &cg_family_write_f")
-
-       ! Write the family BC, if this is present.
-
-       if(cgnsFamilies(nn)%BCTypeCGNS /= Null) then
-
-          call cg_fambc_write_f(cgnsInd, cgnsBase, ii,   &
-               cgnsFamilies(nn)%bcName, &
-               cgnsFamilies(nn)%BCTypeCGNS, jj, ierr)
-          if(ierr /= CG_OK)                     &
-               call terminate("writeCGNSGridFrame", &
-               "Something wrong when calling &
-               &cg_fambc_write_f")
-
-          ! If the boundary condition is UserDefined add the
-          ! description what type of user defined BC it is.
-
-          if(cgnsFamilies(nn)%BCTypeCGNS == UserDefined) then
-
-             ! Ultimately you would like to create the
-             ! UserDefinedData_t as a subnode of the family boundary
-             ! condition node. However, at the moment CGNS does not
-             ! allow this and therefore it is put one level higher.
-             ! As only 1 boundary condition per family is allowed,
-             ! this is not really problem.
-
-             call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-                  "Family_t", ii, "end")
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling cg_goto_f")
-
-             call cg_user_data_write_f(cgnsFamilies(nn)%userDefinedName, &
-                  ierr)
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling &
-                  &cg_user_data_write_f")
-          endif
-
-       endif
-    enddo familyLoop
-    !
-    !       Write all the zone info, except the coordinates.
-    !
-    ! Loop over the number of zones in the original grid.
-
-    zoneLoop: do nn=1,cgnsNDom
-
-       ! Store the inverse of the scaling factor to meters.
-
-       LRefInv = one/cgnsDoms(nn)%LRef
-
-       ! Store the dimensions of the zone in sizes and create the zone.
-
-       sizes(1) = cgnsDoms(nn)%il
-       sizes(2) = cgnsDoms(nn)%jl
-       sizes(3) = cgnsDoms(nn)%kl
-       sizes(4) = cgnsDoms(nn)%nx
-       sizes(5) = cgnsDoms(nn)%ny
-       sizes(6) = cgnsDoms(nn)%nz
-       sizes(7) = 0
-       sizes(8) = 0
-       sizes(9) = 0
-
-       call cg_zone_write_f(cgnsInd, cgnsBase,                   &
-            cgnsDoms(nn)%zoneName, sizes,        &
-            cgnsDoms(nn)%zoneType, cgnsZone(nn), &
-            ierr)
-       if(ierr /= CG_OK)                     &
+        type(cgnsBcDatasetType), pointer, dimension(:) :: dataSet
+
+        ! Open the CGNS file for writing and check if it went okay.
+        ! Store the file index afterwards.
+
+        call cg_open_f(gridFileNames(ind), mode_write, cgnsInd, ierr)
+        if (ierr /= CG_OK) then
+            write (errorMessage, *) "File ", trim(gridfileNames(ind)), &
+                 " could not be opened by cgns &
+                 &for writing"
+            call terminate("writeCGNSGridFrame", errorMessage)
+        end if
+
+        fileIDs(ind) = cgnsInd
+
+        ! Create the base. Copy the cell and physical dimensions and
+        ! store the base ID for this index.
+
+        call cg_base_write_f(cgnsInd, cgnsBaseName, cgnsCelldim, &
+                             cgnsPhysdim, cgnsBase, ierr)
+        if (ierr /= CG_OK) &
             call terminate("writeCGNSGridFrame", &
-            "Something wrong when calling &
-            &cg_zone_write_f")
+                           "Something wrong when calling cg_base_write_f")
 
-       ! Go to the current zone. Needed when family and/or rotating
-       ! frame info must be written.
+        cgnsBases(ind) = cgnsBase
+        !
+        !       Write the family info.
+        !
+        ! Loop over the number of families.
 
-       call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
-            cgnsZone(nn), "end")
-       if(ierr /= CG_OK) &
-            call terminate("writeCGNSGridFrame", &
-            "Something wrong when calling cg_goto_f")
+        familyLoop: do nn = 1, cgnsNfamilies
 
-       ! Check if the zone belongs to a family. If so, write the
-       ! family name.
+            ! Create the family node.
 
-       mm = cgnsDoms(nn)%familyID
-       if(mm > 0) then
+            call cg_family_write_f(cgnsInd, cgnsBase, &
+                                   cgnsFamilies(nn)%familyName, ii, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSGridFrame", &
+                 "Something wrong when calling &
+                 &cg_family_write_f")
 
-          call cg_famname_write_f(cgnsFamilies(mm)%familyName, ierr)
-          if(ierr /= CG_OK) &
-               call terminate("writeCGNSGridFrame", &
-               "Something wrong when calling &
-               &cg_famname_write_f")
-       endif
+            ! Write the family BC, if this is present.
 
-       ! Write the rotating frame info, if the zone is rotating.
+            if (cgnsFamilies(nn)%BCTypeCGNS /= Null) then
 
-       if( cgnsDoms(nn)%rotatingFrameSpecified ) then
+                call cg_fambc_write_f(cgnsInd, cgnsBase, ii, &
+                                      cgnsFamilies(nn)%bcName, &
+                                      cgnsFamilies(nn)%BCTypeCGNS, jj, ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeCGNSGridFrame", &
+                     "Something wrong when calling &
+                     &cg_fambc_write_f")
+
+                ! If the boundary condition is UserDefined add the
+                ! description what type of user defined BC it is.
+
+                if (cgnsFamilies(nn)%BCTypeCGNS == UserDefined) then
+
+                    ! Ultimately you would like to create the
+                    ! UserDefinedData_t as a subnode of the family boundary
+                    ! condition node. However, at the moment CGNS does not
+                    ! allow this and therefore it is put one level higher.
+                    ! As only 1 boundary condition per family is allowed,
+                    ! this is not really problem.
+
+                    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                                   "Family_t", ii, "end")
+                    if (ierr /= CG_OK) &
+                        call terminate("writeCGNSGridFrame", &
+                                       "Something wrong when calling cg_goto_f")
+
+                    call cg_user_data_write_f(cgnsFamilies(nn)%userDefinedName, &
+                                              ierr)
+                    if (ierr /= CG_OK) &
+                         call terminate("writeCGNSGridFrame", &
+                         "Something wrong when calling &
+                         &cg_user_data_write_f")
+                end if
+
+            end if
+        end do familyLoop
+        !
+        !       Write all the zone info, except the coordinates.
+        !
+        ! Loop over the number of zones in the original grid.
+
+        zoneLoop: do nn = 1, cgnsNDom
+
+            ! Store the inverse of the scaling factor to meters.
+
+            LRefInv = one / cgnsDoms(nn)%LRef
+
+            ! Store the dimensions of the zone in sizes and create the zone.
+
+            sizes(1) = cgnsDoms(nn)%il
+            sizes(2) = cgnsDoms(nn)%jl
+            sizes(3) = cgnsDoms(nn)%kl
+            sizes(4) = cgnsDoms(nn)%nx
+            sizes(5) = cgnsDoms(nn)%ny
+            sizes(6) = cgnsDoms(nn)%nz
+            sizes(7) = 0
+            sizes(8) = 0
+            sizes(9) = 0
+
+            call cg_zone_write_f(cgnsInd, cgnsBase, &
+                                 cgnsDoms(nn)%zoneName, sizes, &
+                                 cgnsDoms(nn)%zoneType, cgnsZone(nn), &
+                                 ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSGridFrame", &
+                 "Something wrong when calling &
+                 &cg_zone_write_f")
+
+            ! Go to the current zone. Needed when family and/or rotating
+            ! frame info must be written.
+
+            call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
+                           cgnsZone(nn), "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeCGNSGridFrame", &
+                               "Something wrong when calling cg_goto_f")
+
+            ! Check if the zone belongs to a family. If so, write the
+            ! family name.
+
+            mm = cgnsDoms(nn)%familyID
+            if (mm > 0) then
+
+                call cg_famname_write_f(cgnsFamilies(mm)%familyName, ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeCGNSGridFrame", &
+                     "Something wrong when calling &
+                     &cg_famname_write_f")
+            end if
 
-          ! Convert the rotation rate to degrees per second and store
-          ! it in a single precision array.
+            ! Write the rotating frame info, if the zone is rotating.
 
-          rotRate(1) = cgnsDoms(nn)%rotRate(1)*180.0_realType/pi
-          rotRate(2) = cgnsDoms(nn)%rotRate(2)*180.0_realType/pi
-          rotRate(3) = cgnsDoms(nn)%rotRate(3)*180.0_realType/pi
+            if (cgnsDoms(nn)%rotatingFrameSpecified) then
 
-          ! Convert the rotation center to the original units
-          ! and also in single precision.
+                ! Convert the rotation rate to degrees per second and store
+                ! it in a single precision array.
 
-          rotCenter(1) = LRefInv*cgnsDoms(nn)%rotCenter(1)
-          rotCenter(2) = LRefInv*cgnsDoms(nn)%rotCenter(2)
-          rotCenter(3) = LRefInv*cgnsDoms(nn)%rotCenter(3)
+                rotRate(1) = cgnsDoms(nn)%rotRate(1) * 180.0_realType / pi
+                rotRate(2) = cgnsDoms(nn)%rotRate(2) * 180.0_realType / pi
+                rotRate(3) = cgnsDoms(nn)%rotRate(3) * 180.0_realType / pi
 
-          ! Write the rotation rate and rotation center.
+                ! Convert the rotation center to the original units
+                ! and also in single precision.
 
-          call cg_rotating_write_f(real(rotRate,4), real(rotCenter,4), ierr)
-          if(ierr /= CG_OK) &
-               call terminate("writeCGNSGridFrame", &
-               "Something wrong when calling &
-               &cg_rotating_write_f")
-
-          ! Write the units of the rotation rate of the
-          ! rotating frame.
-
-          call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t",        &
-               cgnsZone(nn), "RotatingCoordinates_t", 1, &
-               "DataArray_t", 2, "end")
-          if(ierr /= CG_OK)                     &
-               call terminate("writeCGNSGridFrame", &
-               "Something wrong when calling cg_goto_f")
-
-          call cg_dataclass_write_f(Dimensional, ierr)
-          if(ierr /= CG_OK)                     &
-               call terminate("writeCGNSGridFrame", &
-               "Something wrong when calling &
-               &cg_dataclass_write_f")
-
-          call cg_units_write_f(Null, Null, Second, Null, &
-               Degree, ierr)
-          if(ierr /= CG_OK)                     &
-               call terminate("writeCGNSGridFrame", &
-               "Something wrong when calling &
-               &cg_units_write_f")
-       endif
-
-       ! Loop over all 1 to 1 connectivities of the block and
-       ! write the data.
-
-       loop1to1: do mm=1,cgnsDoms(nn)%n1to1
-
-          ! Store the range of the subface in zoneRange and
-          ! the range of the donor in donorRange.
-
-          zoneRange(1,1) = cgnsDoms(nn)%conn1to1(mm)%iBeg
-          zoneRange(2,1) = cgnsDoms(nn)%conn1to1(mm)%jBeg
-          zoneRange(3,1) = cgnsDoms(nn)%conn1to1(mm)%kBeg
-
-          zoneRange(1,2) = cgnsDoms(nn)%conn1to1(mm)%iEnd
-          zoneRange(2,2) = cgnsDoms(nn)%conn1to1(mm)%jEnd
-          zoneRange(3,2) = cgnsDoms(nn)%conn1to1(mm)%kEnd
-
-          donorRange(1,1) = cgnsDoms(nn)%conn1to1(mm)%diBeg
-          donorRange(2,1) = cgnsDoms(nn)%conn1to1(mm)%djBeg
-          donorRange(3,1) = cgnsDoms(nn)%conn1to1(mm)%dkBeg
-
-          donorRange(1,2) = cgnsDoms(nn)%conn1to1(mm)%diEnd
-          donorRange(2,2) = cgnsDoms(nn)%conn1to1(mm)%djEnd
-          donorRange(3,2) = cgnsDoms(nn)%conn1to1(mm)%dkEnd
-
-          ! Check whether the subface is periodic or not.
-
-          periodicTest: if( cgnsDoms(nn)%conn1to1(mm)%periodic ) then
-
-             ! Subface is periodic. Due to the current limitations in
-             ! cgns it is not possible to write this info as a 1 to 1
-             ! subface and the general connectivity must be used.
-
-             ! First allocate the memory for donorData.
-
-             ll = (abs(donorRange(3,2) - donorRange(3,1)) + 1) &
-                  * (abs(donorRange(2,2) - donorRange(2,1)) + 1) &
-                  * (abs(donorRange(1,2) - donorRange(1,1)) + 1)
-
-             allocate(donorData(3,ll), stat=ierr)
-             if(ierr /= 0) &
-                  call terminate("writeCGNSGridFrame", &
-                  "Memory allocation failure for &
-                  &donorData")
-
-             ! Determine the step for the three directions of
-             ! donorData and fill the array.
-
-             s1 = 1; s2 = 1; s3 = 1
-             if(donorRange(1,2) < donorRange(1,1)) s1 = -1
-             if(donorRange(2,2) < donorRange(2,1)) s2 = -1
-             if(donorRange(3,2) < donorRange(3,1)) s3 = -1
-
-             ll = 0
-             do k=donorRange(3,1),donorRange(3,2),s3
-                do j=donorRange(2,1),donorRange(2,2),s2
-                   do i=donorRange(1,1),donorRange(1,2),s1
-                      ll = ll+1
-
-                      donorData(1,ll) = i
-                      donorData(2,ll) = j
-                      donorData(3,ll) = k
-                   enddo
-                enddo
-             enddo
-
-             ! Write the general connectivity.
-
-             ii = ll
-             call cg_conn_write_f(cgnsInd, cgnsBase, cgnsZone(nn),       &
-                  cgnsDoms(nn)%conn1to1(mm)%connectName, &
-                  Vertex, Abutting1to1, PointRange, int(2, cgsize_t),   &
-                  zoneRange,                             &
-                  cgnsDoms(nn)%conn1to1(mm)%donorName,   &
-                  Structured, PointListDonor, Integer,   &
-                  int(ii, cgsize_t), donorData, jj, ierr)
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling &
-                  &cg_conn_write_f")
-
-             ! Deallocate the memory of donorData again.
-
-             deallocate(donorData, stat=ierr)
-             if(ierr /= 0)                          &
-                  call terminate("writeCGNSGridFrame", &
-                  "Deallocation failure for donorData")
-
-             ! Write the periodic info. First transform the rotation
-             ! center and translation vector to the original coordinates,
-             ! the angles to degrees and store everything in single
-             ! precision arrays.
-
-             rotCenter  = cgnsDoms(nn)%conn1to1(mm)%rotationCenter &
-                  * LRefInv
-             translation = cgnsDoms(nn)%conn1to1(mm)%translation &
-                  * LRefInv
-             rotRate    = cgnsDoms(nn)%conn1to1(mm)%rotationAngles &
-                  * 180.0_realType/pi
-
-             call cg_conn_periodic_write_f(cgnsInd, cgnsBase,           &
-                  cgnsZone(nn), jj, real(rotCenter,4), &
-                  real(rotRate,4), real(translation,4), ierr)
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling &
-                  &cg_conn_periodic_write_f")
-
-             ! Write the units of the periodic rotation.
-
-             call cg_goto_f(cgnsInd, cgnsBase, ierr,         &
-                  "Zone_t", cgnsZone(nn),          &
-                  "ZoneGridConnectivity_t", 1,     &
-                  "GridConnectivity_t", jj,        &
-                  "GridConnectivityProperty_t", 1, &
-                  "Periodic_t", 1, "DataArray_t", 2, "end")
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling cg_goto_f")
-
-             call cg_dataclass_write_f(Dimensional, ierr)
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling &
-                  &cg_dataclass_write_f")
-
-             call cg_units_write_f(Null, Null, Null, Null, &
-                  Degree, ierr)
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling &
-                  &cg_units_write_f")
-
-          else periodicTest
-
-             ! Normal 1 to 1 subface. Set the elements for the
-             ! abbreviation of the transformation matrix.
-
-             transform(1) = cgnsDoms(nn)%conn1to1(mm)%l1
-             transform(2) = cgnsDoms(nn)%conn1to1(mm)%l2
-             transform(3) = cgnsDoms(nn)%conn1to1(mm)%l3
-
-             ! Write the connectivity.
-
-             call cg_1to1_write_f(cgnsInd, cgnsBase, cgnsZone(nn),       &
-                  cgnsDoms(nn)%conn1to1(mm)%connectName, &
-                  cgnsDoms(nn)%conn1to1(mm)%donorName,   &
-                  zoneRange, donorRange, transform,      &
-                  ii, ierr)
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling &
-                  &cg_1to1_write_f")
-
-          endif periodicTest
-
-       enddo loop1to1
-
-       ! Loop over the boundary subfaces and write the data.
-
-       loopBocos: do mm=1,cgnsDoms(nn)%nBocos
-
-          ! Check if this is an actual face. If not, continue with
-          ! the next face.
-
-          if(.not. cgnsDoms(nn)%bocoInfo(mm)%actualFace) cycle
-
-          ! Store the range of the subface in zoneRange.
-
-          zoneRange(1,1) = cgnsDoms(nn)%bocoInfo(mm)%iBeg
-          zoneRange(2,1) = cgnsDoms(nn)%bocoInfo(mm)%jBeg
-          zoneRange(3,1) = cgnsDoms(nn)%bocoInfo(mm)%kBeg
-
-          zoneRange(1,2) = cgnsDoms(nn)%bocoInfo(mm)%iEnd
-          zoneRange(2,2) = cgnsDoms(nn)%bocoInfo(mm)%jEnd
-          zoneRange(3,2) = cgnsDoms(nn)%bocoInfo(mm)%kEnd
-
-          ! Write the boundary condition. As the preprocessing
-          ! overwrites the BCType for a family specified BC, the
-          ! boundary condition is constructed first and stored in jj.
-
-          jj = cgnsDoms(nn)%bocoInfo(mm)%BCTypeCGNS
-          ll = cgnsDoms(nn)%bocoInfo(mm)%familyID
-          if(ll > 0) jj = FamilySpecified
-
-          call cg_boco_write_f(cgnsInd, cgnsBase, cgnsZone(nn),    &
-               cgnsDoms(nn)%bocoInfo(mm)%bocoName, &
-               jj, PointRange, int(2, cgsize_t), zoneRange, ii, ierr)
-          if(ierr /= CG_OK)                     &
-               call terminate("writeCGNSGridFrame", &
-               "Something wrong when calling &
-               &cg_boco_write_f")
-
-          ! Write the family name for the surface for all boundary
-          ! conditions.
-          call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-               "Zone_t", cgnsZone(nn),  &
-               "ZoneBC_t", 1, "BC_t", ii, "end")
-          if(ierr /= CG_OK)                     &
-               call terminate("writeCGNSGridFrame", &
-               "Something wrong when calling cg_goto_f")
-
-          call cg_famname_write_f(cgnsDoms(nn)%bocoINFO(mm)%wallBCName, ierr)
+                rotCenter(1) = LRefInv * cgnsDoms(nn)%rotCenter(1)
+                rotCenter(2) = LRefInv * cgnsDoms(nn)%rotCenter(2)
+                rotCenter(3) = LRefInv * cgnsDoms(nn)%rotCenter(3)
 
-          ! Write the family name if the boundary condition is
-          ! specified per family.
+                ! Write the rotation rate and rotation center.
 
-          if(ll > 0) then
+                call cg_rotating_write_f(real(rotRate, 4), real(rotCenter, 4), ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeCGNSGridFrame", &
+                     "Something wrong when calling &
+                     &cg_rotating_write_f")
 
-             ! Go to the current boundary condition and write
-             ! the appropriate family name.
+                ! Write the units of the rotation rate of the
+                ! rotating frame.
 
-             call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-                  "Zone_t", cgnsZone(nn),  &
-                  "ZoneBC_t", 1, "BC_t", ii, "end")
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling cg_goto_f")
+                call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
+                               cgnsZone(nn), "RotatingCoordinates_t", 1, &
+                               "DataArray_t", 2, "end")
+                if (ierr /= CG_OK) &
+                    call terminate("writeCGNSGridFrame", &
+                                   "Something wrong when calling cg_goto_f")
+
+                call cg_dataclass_write_f(Dimensional, ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeCGNSGridFrame", &
+                     "Something wrong when calling &
+                     &cg_dataclass_write_f")
 
-             call cg_famname_write_f(cgnsFamilies(ll)%familyName, ierr)
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling &
-                  &cg_famname_write_f")
-          endif
+                call cg_units_write_f(Null, Null, Second, Null, &
+                                      Degree, ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeCGNSGridFrame", &
+                     "Something wrong when calling &
+                     &cg_units_write_f")
+            end if
+
+            ! Loop over all 1 to 1 connectivities of the block and
+            ! write the data.
+
+            loop1to1: do mm = 1, cgnsDoms(nn)%n1to1
+
+                ! Store the range of the subface in zoneRange and
+                ! the range of the donor in donorRange.
+
+                zoneRange(1, 1) = cgnsDoms(nn)%conn1to1(mm)%iBeg
+                zoneRange(2, 1) = cgnsDoms(nn)%conn1to1(mm)%jBeg
+                zoneRange(3, 1) = cgnsDoms(nn)%conn1to1(mm)%kBeg
+
+                zoneRange(1, 2) = cgnsDoms(nn)%conn1to1(mm)%iEnd
+                zoneRange(2, 2) = cgnsDoms(nn)%conn1to1(mm)%jEnd
+                zoneRange(3, 2) = cgnsDoms(nn)%conn1to1(mm)%kEnd
+
+                donorRange(1, 1) = cgnsDoms(nn)%conn1to1(mm)%diBeg
+                donorRange(2, 1) = cgnsDoms(nn)%conn1to1(mm)%djBeg
+                donorRange(3, 1) = cgnsDoms(nn)%conn1to1(mm)%dkBeg
+
+                donorRange(1, 2) = cgnsDoms(nn)%conn1to1(mm)%diEnd
+                donorRange(2, 2) = cgnsDoms(nn)%conn1to1(mm)%djEnd
+                donorRange(3, 2) = cgnsDoms(nn)%conn1to1(mm)%dkEnd
+
+                ! Check whether the subface is periodic or not.
+
+                periodicTest: if (cgnsDoms(nn)%conn1to1(mm)%periodic) then
+
+                    ! Subface is periodic. Due to the current limitations in
+                    ! cgns it is not possible to write this info as a 1 to 1
+                    ! subface and the general connectivity must be used.
+
+                    ! First allocate the memory for donorData.
+
+                    ll = (abs(donorRange(3, 2) - donorRange(3, 1)) + 1) &
+                         * (abs(donorRange(2, 2) - donorRange(2, 1)) + 1) &
+                         * (abs(donorRange(1, 2) - donorRange(1, 1)) + 1)
+
+                    allocate (donorData(3, ll), stat=ierr)
+                    if (ierr /= 0) &
+                         call terminate("writeCGNSGridFrame", &
+                         "Memory allocation failure for &
+                         &donorData")
+
+                    ! Determine the step for the three directions of
+                    ! donorData and fill the array.
+
+                    s1 = 1; s2 = 1; s3 = 1
+                    if (donorRange(1, 2) < donorRange(1, 1)) s1 = -1
+                    if (donorRange(2, 2) < donorRange(2, 1)) s2 = -1
+                    if (donorRange(3, 2) < donorRange(3, 1)) s3 = -1
+
+                    ll = 0
+                    do k = donorRange(3, 1), donorRange(3, 2), s3
+                        do j = donorRange(2, 1), donorRange(2, 2), s2
+                            do i = donorRange(1, 1), donorRange(1, 2), s1
+                                ll = ll + 1
+
+                                donorData(1, ll) = i
+                                donorData(2, ll) = j
+                                donorData(3, ll) = k
+                            end do
+                        end do
+                    end do
+
+                    ! Write the general connectivity.
+
+                    ii = ll
+                    call cg_conn_write_f(cgnsInd, cgnsBase, cgnsZone(nn), &
+                                         cgnsDoms(nn)%conn1to1(mm)%connectName, &
+                                         Vertex, Abutting1to1, PointRange, int(2, cgsize_t), &
+                                         zoneRange, &
+                                         cgnsDoms(nn)%conn1to1(mm)%donorName, &
+                                         Structured, PointListDonor, Integer, &
+                                         int(ii, cgsize_t), donorData, jj, ierr)
+                    if (ierr /= CG_OK) &
+                         call terminate("writeCGNSGridFrame", &
+                         "Something wrong when calling &
+                         &cg_conn_write_f")
+
+                    ! Deallocate the memory of donorData again.
+
+                    deallocate (donorData, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("writeCGNSGridFrame", &
+                                       "Deallocation failure for donorData")
+
+                    ! Write the periodic info. First transform the rotation
+                    ! center and translation vector to the original coordinates,
+                    ! the angles to degrees and store everything in single
+                    ! precision arrays.
+
+                    rotCenter = cgnsDoms(nn)%conn1to1(mm)%rotationCenter &
+                                * LRefInv
+                    translation = cgnsDoms(nn)%conn1to1(mm)%translation &
+                                  * LRefInv
+                    rotRate = cgnsDoms(nn)%conn1to1(mm)%rotationAngles &
+                              * 180.0_realType / pi
+
+                    call cg_conn_periodic_write_f(cgnsInd, cgnsBase, &
+                                                  cgnsZone(nn), jj, real(rotCenter, 4), &
+                                                  real(rotRate, 4), real(translation, 4), ierr)
+                    if (ierr /= CG_OK) &
+                         call terminate("writeCGNSGridFrame", &
+                         "Something wrong when calling &
+                         &cg_conn_periodic_write_f")
+
+                    ! Write the units of the periodic rotation.
+
+                    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                                   "Zone_t", cgnsZone(nn), &
+                                   "ZoneGridConnectivity_t", 1, &
+                                   "GridConnectivity_t", jj, &
+                                   "GridConnectivityProperty_t", 1, &
+                                   "Periodic_t", 1, "DataArray_t", 2, "end")
+                    if (ierr /= CG_OK) &
+                        call terminate("writeCGNSGridFrame", &
+                                       "Something wrong when calling cg_goto_f")
+
+                    call cg_dataclass_write_f(Dimensional, ierr)
+                    if (ierr /= CG_OK) &
+                         call terminate("writeCGNSGridFrame", &
+                         "Something wrong when calling &
+                         &cg_dataclass_write_f")
+
+                    call cg_units_write_f(Null, Null, Null, Null, &
+                                          Degree, ierr)
+                    if (ierr /= CG_OK) &
+                         call terminate("writeCGNSGridFrame", &
+                         "Something wrong when calling &
+                         &cg_units_write_f")
+
+                    else periodicTest
+
+                    ! Normal 1 to 1 subface. Set the elements for the
+                    ! abbreviation of the transformation matrix.
+
+                    transform(1) = cgnsDoms(nn)%conn1to1(mm)%l1
+                    transform(2) = cgnsDoms(nn)%conn1to1(mm)%l2
+                    transform(3) = cgnsDoms(nn)%conn1to1(mm)%l3
+
+                    ! Write the connectivity.
+
+                    call cg_1to1_write_f(cgnsInd, cgnsBase, cgnsZone(nn), &
+                                         cgnsDoms(nn)%conn1to1(mm)%connectName, &
+                                         cgnsDoms(nn)%conn1to1(mm)%donorName, &
+                                         zoneRange, donorRange, transform, &
+                                         ii, ierr)
+                    if (ierr /= CG_OK) &
+                         call terminate("writeCGNSGridFrame", &
+                         "Something wrong when calling &
+                         &cg_1to1_write_f")
+
+                end if periodicTest
+
+            end do loop1to1
+
+            ! Loop over the boundary subfaces and write the data.
+
+            loopBocos: do mm = 1, cgnsDoms(nn)%nBocos
+
+                ! Check if this is an actual face. If not, continue with
+                ! the next face.
+
+                if (.not. cgnsDoms(nn)%bocoInfo(mm)%actualFace) cycle
+
+                ! Store the range of the subface in zoneRange.
+
+                zoneRange(1, 1) = cgnsDoms(nn)%bocoInfo(mm)%iBeg
+                zoneRange(2, 1) = cgnsDoms(nn)%bocoInfo(mm)%jBeg
+                zoneRange(3, 1) = cgnsDoms(nn)%bocoInfo(mm)%kBeg
+
+                zoneRange(1, 2) = cgnsDoms(nn)%bocoInfo(mm)%iEnd
+                zoneRange(2, 2) = cgnsDoms(nn)%bocoInfo(mm)%jEnd
+                zoneRange(3, 2) = cgnsDoms(nn)%bocoInfo(mm)%kEnd
+
+                ! Write the boundary condition. As the preprocessing
+                ! overwrites the BCType for a family specified BC, the
+                ! boundary condition is constructed first and stored in jj.
+
+                jj = cgnsDoms(nn)%bocoInfo(mm)%BCTypeCGNS
+                ll = cgnsDoms(nn)%bocoInfo(mm)%familyID
+                if (ll > 0) jj = FamilySpecified
+
+                call cg_boco_write_f(cgnsInd, cgnsBase, cgnsZone(nn), &
+                                     cgnsDoms(nn)%bocoInfo(mm)%bocoName, &
+                                     jj, PointRange, int(2, cgsize_t), zoneRange, ii, ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeCGNSGridFrame", &
+                     "Something wrong when calling &
+                     &cg_boco_write_f")
 
-          ! If the boundary condition is UserDefined, write the
-          ! description of what type of user defined BC.
+                ! Write the family name for the surface for all boundary
+                ! conditions.
+                call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                               "Zone_t", cgnsZone(nn), &
+                               "ZoneBC_t", 1, "BC_t", ii, "end")
+                if (ierr /= CG_OK) &
+                    call terminate("writeCGNSGridFrame", &
+                                   "Something wrong when calling cg_goto_f")
 
-          if(jj == UserDefined) then
+                call cg_famname_write_f(cgnsDoms(nn)%bocoINFO(mm)%wallBCName, ierr)
 
-             ! Go to the current boundary condition and write
-             ! the appropriate data.
+                ! Write the family name if the boundary condition is
+                ! specified per family.
+
+                if (ll > 0) then
 
-             call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-                  "Zone_t", cgnsZone(nn),  &
-                  "ZoneBC_t", 1, "BC_t", ii, "end")
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling cg_goto_f")
+                    ! Go to the current boundary condition and write
+                    ! the appropriate family name.
+
+                    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                                   "Zone_t", cgnsZone(nn), &
+                                   "ZoneBC_t", 1, "BC_t", ii, "end")
+                    if (ierr /= CG_OK) &
+                        call terminate("writeCGNSGridFrame", &
+                                       "Something wrong when calling cg_goto_f")
+
+                    call cg_famname_write_f(cgnsFamilies(ll)%familyName, ierr)
+                    if (ierr /= CG_OK) &
+                         call terminate("writeCGNSGridFrame", &
+                         "Something wrong when calling &
+                         &cg_famname_write_f")
+                end if
 
-             call cg_user_data_write_f(    &
-                  cgnsDoms(nn)%bocoInfo(mm)%userDefinedName, ierr)
-             if(ierr /= CG_OK)                     &
-                  call terminate("writeCGNSGridFrame", &
-                  "Something wrong when calling &
-                  &cg_user_data_write_f")
-          endif
+                ! If the boundary condition is UserDefined, write the
+                ! description of what type of user defined BC.
 
-          ! If this boundary condition has allocated memory for data
-          ! sets, write them.
+                if (jj == UserDefined) then
 
-          if( cgnsDoms(nn)%bocoInfo(mm)%dataSetAllocated ) then
+                    ! Go to the current boundary condition and write
+                    ! the appropriate data.
 
-             ! Set the pointer for the data sets to make the code
-             ! more readable.
+                    call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                                   "Zone_t", cgnsZone(nn), &
+                                   "ZoneBC_t", 1, "BC_t", ii, "end")
+                    if (ierr /= CG_OK) &
+                        call terminate("writeCGNSGridFrame", &
+                                       "Something wrong when calling cg_goto_f")
 
-             dataSet => cgnsDoms(nn)%bocoInfo(mm)%dataSet
+                    call cg_user_data_write_f( &
+                        cgnsDoms(nn)%bocoInfo(mm)%userDefinedName, ierr)
+                    if (ierr /= CG_OK) &
+                         call terminate("writeCGNSGridFrame", &
+                         "Something wrong when calling &
+                         &cg_user_data_write_f")
+                end if
 
-             ! Loop over the number of data sets for this boundary face.
+                ! If this boundary condition has allocated memory for data
+                ! sets, write them.
 
-             do ll=1,cgnsDoms(nn)%bocoInfo(mm)%nDataSet
+                if (cgnsDoms(nn)%bocoInfo(mm)%dataSetAllocated) then
 
-                ! Create the bc dataset node.
+                    ! Set the pointer for the data sets to make the code
+                    ! more readable.
 
-                call cg_dataset_write_f(cgnsInd, cgnsBase,       &
-                     cgnsZone(nn), ii,        &
-                     dataSet(ll)%datasetName, &
-                     dataSet(ll)%BCType, jj, ierr)
-                if(ierr /= CG_OK)                     &
-                     call terminate("writeCGNSGridFrame", &
-                     "Something wrong when calling &
-                     &cg_dataset_write_f")
+                    dataSet => cgnsDoms(nn)%bocoInfo(mm)%dataSet
 
-                ! Write the Dirichlet and Neumann boundary condition
-                ! data sets if present.
+                    ! Loop over the number of data sets for this boundary face.
 
-                call writeBcdataArrays(dataSet(ll)%ndirichletArrays, &
-                     dataSet(ll)%dirichletArrays,  &
-                     Dirichlet)
+                    do ll = 1, cgnsDoms(nn)%bocoInfo(mm)%nDataSet
 
-                call writeBcdataArrays(dataSet(ll)%nneumannArrays, &
-                     dataSet(ll)%neumannArrays,  &
-                     Neumann)
-             enddo
-          endif
+                        ! Create the bc dataset node.
 
-       enddo loopBocos
-    enddo zoneLoop
+                        call cg_dataset_write_f(cgnsInd, cgnsBase, &
+                                                cgnsZone(nn), ii, &
+                                                dataSet(ll)%datasetName, &
+                                                dataSet(ll)%BCType, jj, ierr)
+                        if (ierr /= CG_OK) &
+                             call terminate("writeCGNSGridFrame", &
+                             "Something wrong when calling &
+                             &cg_dataset_write_f")
 
-    !=================================================================
+                        ! Write the Dirichlet and Neumann boundary condition
+                        ! data sets if present.
 
-  contains
+                        call writeBcdataArrays(dataSet(ll)%ndirichletArrays, &
+                                               dataSet(ll)%dirichletArrays, &
+                                               Dirichlet)
 
-    !===============================================================
+                        call writeBcdataArrays(dataSet(ll)%nneumannArrays, &
+                                               dataSet(ll)%neumannArrays, &
+                                               Neumann)
+                    end do
+                end if
 
-    subroutine writeBcdataArrays(narr, arr, DirNeu)
-      !
-      !         writeBcdataArrays writes the given bc data set arrays,
-      !         either of the dirichlet or neumann type, to the correct
-      !         position in the CGNS file.
-      !
-      implicit none
-      !
-      !        Subroutine arguments.
-      !
-      integer, intent(in) :: DirNeu
+            end do loopBocos
+        end do zoneLoop
 
-      integer(kind=intType), intent(in) :: narr
-      type(cgnsBcdataArray), pointer, dimension(:) :: arr
-      !
-      !        Local variables.
-      !
-      integer :: ierr
-      integer :: realTypeCGNS
-
-      integer(kind=intType) :: i, j, kk
-
-      real(kind=cgnsRealType), dimension(:), allocatable :: tmp
-
-      ! Return immediately if narr == 0, i.e. if there is nothing
-      ! to write.
-
-      if(narr == 0) return
-
-      ! Set the cgns real type.
-
-      realTypeCGNS = setCGNSRealType()
-
-      ! Create the BCData node.
-
-      call cg_bcdata_write_f(cgnsInd, cgnsBase, cgnsZone(nn), &
-           ii, jj, DirNeu, ierr)
-      if(ierr /= CG_OK)                    &
-           call terminate("writeBcdataArrays", &
-           "Something wrong when calling &
-           &cg_bcdata_write_f")
-
-      ! Loop over the number of data arrays.
-
-      loopDataArrays: do kk=1,narr
-
-         ! Go to the main node of the data arrays.
-
-         call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t",       &
-              cgnsZone(nn), "ZoneBC_t", 1, "BC_t", ii, &
-              "BCDataSet_t", jj, "BCData_t", DirNeu, "end")
-         if(ierr /= CG_OK)                    &
-              call terminate("writeBcdataArrays", &
-              "Something wrong when calling cg_goto_f")
-
-         ! Determine the total size of the prescribed data,
-         ! allocate the memory for tmp and copy the data into it.
-
-         j = arr(kk)%dataDim(1)
-         do i=2,arr(kk)%nDimensions
-            j = j*arr(kk)%dataDim(i)
-         enddo
+        !=================================================================
 
-         allocate(tmp(j), stat=ierr)
-         if(ierr /= 0)                         &
-              call terminate("writeBcdataArrays", &
-              "Memory allocation failure for tmp")
-
-         tmp = arr(kk)%dataArr
+    contains
 
-         ! Write the data array and release the memory of tmp
-         ! afterwards.
+        !===============================================================
 
-         call cg_array_write_f(arr(kk)%arrayName,   realTypeCGNS,    &
-              arr(kk)%nDimensions, int(arr(kk)%dataDim, cgsize_t), &
-              tmp, ierr)
-         if(ierr /= CG_OK)                    &
-              call terminate("writeBcdataArrays", &
-              "Something wrong when calling &
-              &cg_array_write_f")
+        subroutine writeBcdataArrays(narr, arr, DirNeu)
+            !
+            !         writeBcdataArrays writes the given bc data set arrays,
+            !         either of the dirichlet or neumann type, to the correct
+            !         position in the CGNS file.
+            !
+            implicit none
+            !
+            !        Subroutine arguments.
+            !
+            integer, intent(in) :: DirNeu
 
-         deallocate(tmp, stat=ierr)
-         if(ierr /= 0)                         &
-              call terminate("writeBcdataArrays", &
-              "Deallocation failure for tmp")
+            integer(kind=intType), intent(in) :: narr
+            type(cgnsBcdataArray), pointer, dimension(:) :: arr
+            !
+            !        Local variables.
+            !
+            integer :: ierr
+            integer :: realTypeCGNS
 
-         ! Write the dimensional info for this array.
+            integer(kind=intType) :: i, j, kk
 
-         call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t",       &
-              cgnsZone(nn), "ZoneBC_t", 1, "BC_t", ii, &
-              "BCDataSet_t", jj, "BCData_t", DirNeu,   &
-              "DataArray_t", kk, "end")
-         if(ierr /= CG_OK)                    &
-              call terminate("writeBcdataArrays", &
-              "Something wrong when calling cg_goto_f")
+            real(kind=cgnsRealType), dimension(:), allocatable :: tmp
 
-         call cg_dataclass_write_f(Dimensional, ierr)
-         if(ierr /= CG_OK)                    &
-              call terminate("writeBcdataArrays", &
-              "Something wrong when calling &
-              &cg_dataclass_write_f")
+            ! Return immediately if narr == 0, i.e. if there is nothing
+            ! to write.
 
-         call cg_units_write_f(arr(kk)%mass,  arr(kk)%len,  &
-              arr(kk)%time,  arr(kk)%temp, &
-              arr(kk)%angle, ierr)
-         if(ierr /= CG_OK)                    &
-              call terminate("writeBcdataArrays", &
-              "Something wrong when calling &
-              &cg_units_write_f")
+            if (narr == 0) return
 
-      enddo loopDataArrays
+            ! Set the cgns real type.
 
-    end subroutine writeBcdataArrays
-  end subroutine writeCGNSGridFrame
+            realTypeCGNS = setCGNSRealType()
 
-  subroutine writeCoorCGNSZone(zone, cgnsZone)
-    !
-    !       writeCoorCGNSZone writes the coordinates of the given zone
-    !       to the cgns file(s).
-    !
-    use constants
-    use block
-    use cgnsGrid
-    use cgnsNames
-    use communication
-    use inputIO
-    use su_cgns
-    use outputMod
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, intent(in) :: cgnsZone
+            ! Create the BCData node.
 
-    integer(kind=intType), intent(in) :: zone
-
-    !
-    !      Local variables.
-    !
-    integer :: ierr, tmp
-    integer :: bufSize, realTypeCGNS, cgnsBase, fileInd
+            call cg_bcdata_write_f(cgnsInd, cgnsBase, cgnsZone(nn), &
+                                   ii, jj, DirNeu, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeBcdataArrays", &
+                 "Something wrong when calling &
+                 &cg_bcdata_write_f")
 
-    integer, dimension(mpi_status_size) :: mpiStatus
-
-    integer, dimension(:), allocatable :: proc
-
-    integer(kind=intType) :: i, j, nn, mm, ll, ind
-    integer(kind=intType) :: nBlocks, nSubBlocks, offset
-    integer(kind=intType) :: sizeCGNSWriteType
-
-    integer(kind=intType), dimension(6)     :: ii
-    integer(kind=intType), dimension(nProc) :: nMessages
+            ! Loop over the number of data arrays.
 
-    integer(kind=intType), dimension(:,:,:), allocatable :: subRanges
-
-    real(kind=realType), dimension(:), allocatable :: buffer
+            loopDataArrays: do kk = 1, narr
 
-    real(kind=4), dimension(:) , allocatable :: coor4
-    real(kind=8), dimension(:) , allocatable :: coor8
-
-    character(len=maxCGNSNameLen), dimension(3) :: coorNames
+                ! Go to the main node of the data arrays.
 
-    ! Store the number of local blocks and the offset in
-    ! blocksCGNSblock for this zone a bit easier.
+                call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
+                               cgnsZone(nn), "ZoneBC_t", 1, "BC_t", ii, &
+                               "BCDataSet_t", jj, "BCData_t", DirNeu, "end")
+                if (ierr /= CG_OK) &
+                    call terminate("writeBcdataArrays", &
+                                   "Something wrong when calling cg_goto_f")
 
-    offset  = nBlocksCGNSblock(zone-1)
-    nBlocks = nBlocksCGNSblock(zone) - offset
-
-    ! Determine the amount of block parts each processor will send to
-    ! processor 0.
-
-    call mpi_gather(nBlocks, 1, adflow_integer, nMessages, 1, &
-         adflow_integer, 0, ADflow_comm_world, ierr)
-
-    ! At the moment the writing of the cgns file is sequential and done
-    ! by processor 0. This means that this processor gathers all info
-    ! from the other processors and writes it to file.
+                ! Determine the total size of the prescribed data,
+                ! allocate the memory for tmp and copy the data into it.
 
-    rootproc: if(myID == 0) then
+                j = arr(kk)%dataDim(1)
+                do i = 2, arr(kk)%nDimensions
+                    j = j * arr(kk)%dataDim(i)
+                end do
 
-       ! I am processor 0 and poor me has to do all the work.
+                allocate (tmp(j), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("writeBcdataArrays", &
+                                   "Memory allocation failure for tmp")
 
-       ! First determine the number of subblocks into the original cgns
-       ! block is split.
+                tmp = arr(kk)%dataArr
 
-       nSubBlocks = 0
-       do i=1,nProc
-          nSubBlocks = nSubBlocks + nMessages(i)
-       enddo
+                ! Write the data array and release the memory of tmp
+                ! afterwards.
 
-       ! Allocate the memory for the ranges and the processor
-       ! where the subblock is stored.
+                call cg_array_write_f(arr(kk)%arrayName, realTypeCGNS, &
+                                      arr(kk)%nDimensions, int(arr(kk)%dataDim, cgsize_t), &
+                                      tmp, ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeBcdataArrays", &
+                     "Something wrong when calling &
+                     &cg_array_write_f")
+
+                deallocate (tmp, stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("writeBcdataArrays", &
+                                   "Deallocation failure for tmp")
+
+                ! Write the dimensional info for this array.
+
+                call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
+                               cgnsZone(nn), "ZoneBC_t", 1, "BC_t", ii, &
+                               "BCDataSet_t", jj, "BCData_t", DirNeu, &
+                               "DataArray_t", kk, "end")
+                if (ierr /= CG_OK) &
+                    call terminate("writeBcdataArrays", &
+                                   "Something wrong when calling cg_goto_f")
+
+                call cg_dataclass_write_f(Dimensional, ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeBcdataArrays", &
+                     "Something wrong when calling &
+                     &cg_dataclass_write_f")
 
-       allocate(subRanges(3,2,nSubBlocks), proc(nSubBlocks), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("writeCoorCGNSZone", &
-            "Memory allocation failure for subRanges &
-            &and proc")
+                call cg_units_write_f(arr(kk)%mass, arr(kk)%len, &
+                                      arr(kk)%time, arr(kk)%temp, &
+                                      arr(kk)%angle, ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeBcdataArrays", &
+                     "Something wrong when calling &
+                     &cg_units_write_f")
 
-       ! Determine the processor ID's where the subRanges are stored.
-       ! Note that 1 must be substracted, because the processor
-       ! numbering starts at 0.
+            end do loopDataArrays
 
-       nSubBlocks = 0
-       do i=1,nProc
-          do j=1,nMessages(i)
-             nSubBlocks = nSubBlocks + 1
-             proc(nSubBlocks) = i - 1
-          enddo
-       enddo
+        end subroutine writeBcdataArrays
+    end subroutine writeCGNSGridFrame
 
-       ! Determine the subRanges of the subblocks stored on this
-       ! processor.  Note that nBlocks can be 0.
+    subroutine writeCoorCGNSZone(zone, cgnsZone)
+        !
+        !       writeCoorCGNSZone writes the coordinates of the given zone
+        !       to the cgns file(s).
+        !
+        use constants
+        use block
+        use cgnsGrid
+        use cgnsNames
+        use communication
+        use inputIO
+        use su_cgns
+        use outputMod
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, intent(in) :: cgnsZone
 
-       do i=1,nBlocks
+        integer(kind=intType), intent(in) :: zone
 
-          ! Store the local block ID a bit easier in j and copy the
-          ! range. This range is identical for spectral solutions and
-          ! thus taking the first is okay.
+        !
+        !      Local variables.
+        !
+        integer :: ierr, tmp
+        integer :: bufSize, realTypeCGNS, cgnsBase, fileInd
 
-          j = blocksCGNSblock(i+offset)
+        integer, dimension(mpi_status_size) :: mpiStatus
 
-          subRanges(1,1,i) = flowDoms(j,1,1)%iBegor
-          subRanges(1,2,i) = flowDoms(j,1,1)%iEndor
+        integer, dimension(:), allocatable :: proc
 
-          subRanges(2,1,i) = flowDoms(j,1,1)%jBegor
-          subRanges(2,2,i) = flowDoms(j,1,1)%jEndor
+        integer(kind=intType) :: i, j, nn, mm, ll, ind
+        integer(kind=intType) :: nBlocks, nSubBlocks, offset
+        integer(kind=intType) :: sizeCGNSWriteType
 
-          subRanges(3,1,i) = flowDoms(j,1,1)%kBegor
-          subRanges(3,2,i) = flowDoms(j,1,1)%kEndor
+        integer(kind=intType), dimension(6) :: ii
+        integer(kind=intType), dimension(nProc) :: nMessages
 
-          ! To avoid duplication of overlap regions, add 1 to the lower
-          ! boundaries if the lower boundary is larger than 1.
+        integer(kind=intType), dimension(:, :, :), allocatable :: subRanges
 
-          if(subRanges(1,1,i) > 1) subRanges(1,1,i) = subRanges(1,1,i) +1
-          if(subRanges(2,1,i) > 1) subRanges(2,1,i) = subRanges(2,1,i) +1
-          if(subRanges(3,1,i) > 1) subRanges(3,1,i) = subRanges(3,1,i) +1
+        real(kind=realType), dimension(:), allocatable :: buffer
 
-       enddo
+        real(kind=4), dimension(:), allocatable :: coor4
+        real(kind=8), dimension(:), allocatable :: coor8
 
-       ! The rest of the block ranges must be obtained by
-       ! communication.
+        character(len=maxCGNSNameLen), dimension(3) :: coorNames
 
-       do i=(nBlocks+1),nSubBlocks
+        ! Store the number of local blocks and the offset in
+        ! blocksCGNSblock for this zone a bit easier.
 
-          call mpi_recv(ii, 6, adflow_integer, proc(i), proc(i), &
-               ADflow_comm_world, mpiStatus, ierr)
+        offset = nBlocksCGNSblock(zone - 1)
+        nBlocks = nBlocksCGNSblock(zone) - offset
 
-          subRanges(1,1,i) = ii(1)
-          subRanges(1,2,i) = ii(2)
-          subRanges(2,1,i) = ii(3)
-          subRanges(2,2,i) = ii(4)
-          subRanges(3,1,i) = ii(5)
-          subRanges(3,2,i) = ii(6)
-       enddo
+        ! Determine the amount of block parts each processor will send to
+        ! processor 0.
 
-       ! Determine the size of the largest subblock and allocate
-       ! the memory for the corresponding buffer.
+        call mpi_gather(nBlocks, 1, adflow_integer, nMessages, 1, &
+                        adflow_integer, 0, ADflow_comm_world, ierr)
 
-       bufSize = 0
-       do i=1,nSubBlocks
-          ll = (subRanges(1,2,i) - subRanges(1,1,i) + 1) &
-               * (subRanges(2,2,i) - subRanges(2,1,i) + 1) &
-               * (subRanges(3,2,i) - subRanges(3,1,i) + 1)
-          bufSize = max(bufSize, ll)
-       enddo
+        ! At the moment the writing of the cgns file is sequential and done
+        ! by processor 0. This means that this processor gathers all info
+        ! from the other processors and writes it to file.
 
-       allocate(buffer(bufSize), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("writeCoorCGNSZone", &
-            "Memory allocation failure for buffer")
-
-       ! Allocate the memory for the array used to write the three
-       ! coordinates and set the cgns names for them. Note that the
-       ! coor array is of type character and therefore the size in
-       ! bytes must be allocated.
-       ll = cgnsDoms(zone)%il * cgnsDoms(zone)%jl * cgnsDoms(zone)%kl
-
-       select case (precisionGrid)
-       case (precisionSingle)
-          allocate(coor4(ll), coor8(0), stat=ierr)
-
-       case (precisionDouble)
-          allocate(coor8(ll), coor4(0), stat=ierr)
-       end select
-       if(ierr /= 0)                         &
-            call terminate("writeCoorCGNSZone", &
-            "Memory allocation failure for coor")
-
-       coorNames(1) = cgnsCoorx
-       coorNames(2) = cgnsCoory
-       coorNames(3) = cgnsCoorz
-
-       ! Loop over the number of grid files to be written.
-
-       gridLoopRoot: do ind=1,nGridsToWrite
-
-          ! Store the file and base ID a bit easier.
-
-          fileInd  = fileIDs(ind)
-          cgnsBase = cgnsBases(ind)
-
-          ! Loop over the three coordinates.
-
-          coorLoopRoot: do nn=1,3
-
-             ! Loop over the number of subblocks stored
-             ! on this processor.
-
-             do mm=1,nBlocks
-
-                ! Fill buffer with the correct coordinate.
-
-                call storeCoorInBuffer(buffer, zone, ind, nn, &
-                     blocksCGNSblock(mm+offset), tmp)
-
-                ! And store it in coor, depending on the precision used.
-
-                select case (precisionGrid)
-                case (precisionSingle)
-                   call copyDataBufSinglePrecision(&
-                        coor4, buffer,  &
-                        1_intType,         &
-                        1_intType,         &
-                        1_intType,         &
-                        cgnsDoms(zone)%il, &
-                        cgnsDoms(zone)%jl, &
-                        cgnsDoms(zone)%kl, &
-                        subRanges(1,1,mm))
-                case (precisionDouble)
-                   call copyDataBufDoublePrecision(&
-                        coor8, buffer, &
-                        1_intType,         &
-                        1_intType,         &
-                        1_intType,         &
-                        cgnsDoms(zone)%il, &
-                        cgnsDoms(zone)%jl, &
-                        cgnsDoms(zone)%kl, &
-                        subRanges(1,1,mm))
-                end select
-
-             enddo
-
-             ! Loop over the number of subblocks stored on
-             ! other processors.
-
-             do mm=(nBlocks+1),nSubBlocks
-
-                ! Receive the range of subblock mm and copy it into coor.
-
-                call mpi_recv(buffer, bufSize, adflow_real, proc(mm), &
-                     proc(mm)+1, ADflow_comm_world, mpiStatus, ierr)
-
-                select case (precisionGrid)
-                case (precisionSingle)
-                   call copyDataBufSinglePrecision(coor4, buffer,      &
-                        1_intType,         &
-                        1_intType,         &
-                        1_intType,         &
-                        cgnsDoms(zone)%il, &
-                        cgnsDoms(zone)%jl, &
-                        cgnsDoms(zone)%kl, &
-                        subRanges(1,1,mm))
-                case (precisionDouble)
-                   call copyDataBufDoublePrecision(coor8, buffer,      &
-                        1_intType,         &
-                        1_intType,         &
-                        1_intType,         &
-                        cgnsDoms(zone)%il, &
-                        cgnsDoms(zone)%jl, &
-                        cgnsDoms(zone)%kl, &
-                        subRanges(1,1,mm))
-                end select
-
-             enddo
-
-             ! Write this coordinate to file; tmp is used to store
-             ! the actual number of the coordinate; usually this is
-             ! equal to nn.
-             select case (precisionGrid)
-             case (precisionSingle)
-                call cg_coord_write_f(fileInd, cgnsBase, cgnsZone, &
-                     realSingle, coorNames(nn), coor4, tmp, ierr)
-             case (precisionDouble)
-                call cg_coord_write_f(fileInd, cgnsBase, cgnsZone, &
-                     realDouble, coorNames(nn), coor8, tmp, ierr)
-             end select
-
-             if(ierr /= CG_OK)                    &
-                  call terminate("writeCoorCGNSZone", &
-                  "Something wrong when calling &
-                  &cg_coord_write_f")
-
-             ! Write the units, if possible.
-
-             if( cgnsDoms(zone)%gridUnitsSpecified ) then
-
-                ! Go to the correct place in the grid file.
-
-                call cg_goto_f(fileInd, cgnsBase, ierr, &
-                     "Zone_t", cgnsZone,      &
-                     "GridCoordinates_t", 1,  &
-                     "DataArray_t", tmp, "end")
-                if(ierr /= CG_OK)                    &
-                     call terminate("writeCoorCGNSZone", &
-                     "Something wrong when calling cg_goto_f")
-
-                ! Write the units.
-
-                call cg_units_write_f(cgnsDoms(zone)%mass, &
-                     cgnsDoms(zone)%len,  &
-                     cgnsDoms(zone)%time, &
-                     cgnsDoms(zone)%temp, &
-                     cgnsDoms(zone)%angle, ierr)
-                if(ierr /= CG_OK)                    &
-                     call terminate("writeCoorCGNSZone", &
-                     "Something wrong when calling &
-                     &cg_units_write_f")
-             endif
+        rootproc: if (myID == 0) then
 
-          enddo coorLoopRoot
+            ! I am processor 0 and poor me has to do all the work.
 
-       enddo gridLoopRoot
+            ! First determine the number of subblocks into the original cgns
+            ! block is split.
 
-       ! Deallocate the memory which is only allocated on the
-       ! root processor.
+            nSubBlocks = 0
+            do i = 1, nProc
+                nSubBlocks = nSubBlocks + nMessages(i)
+            end do
 
-       deallocate(subRanges, proc, coor4, coor8, stat=ierr)
-       if(ierr /= 0) call terminate("writeCoorCGNSZone", &
-            "Deallocation error on root proc")
+            ! Allocate the memory for the ranges and the processor
+            ! where the subblock is stored.
 
-    else rootproc
+            allocate (subRanges(3, 2, nSubBlocks), proc(nSubBlocks), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("writeCoorCGNSZone", &
+                 "Memory allocation failure for subRanges &
+                 &and proc")
 
-       ! I am not the root processor and I may have to send data to
-       ! the root processor.
+            ! Determine the processor ID's where the subRanges are stored.
+            ! Note that 1 must be substracted, because the processor
+            ! numbering starts at 0.
 
-       ! Loop over the number of subblocks stored on this processor
-       ! to send the size to the root processor. Determine in the
-       ! same loop the size of the largest subblock.
+            nSubBlocks = 0
+            do i = 1, nProc
+                do j = 1, nMessages(i)
+                    nSubBlocks = nSubBlocks + 1
+                    proc(nSubBlocks) = i - 1
+                end do
+            end do
 
-       bufSize = 0
-       do i=1,nBlocks
+            ! Determine the subRanges of the subblocks stored on this
+            ! processor.  Note that nBlocks can be 0.
 
-          ! Store the local block ID a bit easier in j and copy the
-          ! range. This range is identical for spectral solutions and
-          ! thus taking the first is okay.
+            do i = 1, nBlocks
 
-          j = blocksCGNSblock(i+offset)
+                ! Store the local block ID a bit easier in j and copy the
+                ! range. This range is identical for spectral solutions and
+                ! thus taking the first is okay.
 
-          ii(1) = flowDoms(j,1,1)%iBegor
-          ii(2) = flowDoms(j,1,1)%iEndor
-          ii(3) = flowDoms(j,1,1)%jBegor
-          ii(4) = flowDoms(j,1,1)%jEndor
-          ii(5) = flowDoms(j,1,1)%kBegor
-          ii(6) = flowDoms(j,1,1)%kEndor
+                j = blocksCGNSblock(i + offset)
 
-          ! To avoid duplication of overlap regions, add 1 to the lower
-          ! boundaries if the lower boundary is larger than 1.
+                subRanges(1, 1, i) = flowDoms(j, 1, 1)%iBegor
+                subRanges(1, 2, i) = flowDoms(j, 1, 1)%iEndor
 
-          if(ii(1) > 1) ii(1) = ii(1) +1
-          if(ii(3) > 1) ii(3) = ii(3) +1
-          if(ii(5) > 1) ii(5) = ii(5) +1
+                subRanges(2, 1, i) = flowDoms(j, 1, 1)%jBegor
+                subRanges(2, 2, i) = flowDoms(j, 1, 1)%jEndor
 
-          ! Send the buffer ii to processor 0.
+                subRanges(3, 1, i) = flowDoms(j, 1, 1)%kBegor
+                subRanges(3, 2, i) = flowDoms(j, 1, 1)%kEndor
 
-          call mpi_send(ii, 6, adflow_integer, 0, myID, &
-               ADflow_comm_world, ierr)
+                ! To avoid duplication of overlap regions, add 1 to the lower
+                ! boundaries if the lower boundary is larger than 1.
 
-          ! Check the size of this subblock and update bufSize
-          ! if needed.
+                if (subRanges(1, 1, i) > 1) subRanges(1, 1, i) = subRanges(1, 1, i) + 1
+                if (subRanges(2, 1, i) > 1) subRanges(2, 1, i) = subRanges(2, 1, i) + 1
+                if (subRanges(3, 1, i) > 1) subRanges(3, 1, i) = subRanges(3, 1, i) + 1
 
-          ll = (ii(2) - ii(1) + 1) * (ii(4) - ii(3) + 1) &
-               * (ii(6) - ii(5) + 1)
-          bufSize = max(bufSize, ll)
-       enddo
+            end do
 
-       ! Allocate the memory for buffer.
+            ! The rest of the block ranges must be obtained by
+            ! communication.
 
-       allocate(buffer(bufSize), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("writeCoorCGNSZone", &
-            "Memory allocation failure for buffer")
+            do i = (nBlocks + 1), nSubBlocks
+
+                call mpi_recv(ii, 6, adflow_integer, proc(i), proc(i), &
+                              ADflow_comm_world, mpiStatus, ierr)
+
+                subRanges(1, 1, i) = ii(1)
+                subRanges(1, 2, i) = ii(2)
+                subRanges(2, 1, i) = ii(3)
+                subRanges(2, 2, i) = ii(4)
+                subRanges(3, 1, i) = ii(5)
+                subRanges(3, 2, i) = ii(6)
+            end do
+
+            ! Determine the size of the largest subblock and allocate
+            ! the memory for the corresponding buffer.
+
+            bufSize = 0
+            do i = 1, nSubBlocks
+                ll = (subRanges(1, 2, i) - subRanges(1, 1, i) + 1) &
+                     * (subRanges(2, 2, i) - subRanges(2, 1, i) + 1) &
+                     * (subRanges(3, 2, i) - subRanges(3, 1, i) + 1)
+                bufSize = max(bufSize, ll)
+            end do
 
-       ! Loop over the number of grids to be written.
+            allocate (buffer(bufSize), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("writeCoorCGNSZone", &
+                               "Memory allocation failure for buffer")
+
+            ! Allocate the memory for the array used to write the three
+            ! coordinates and set the cgns names for them. Note that the
+            ! coor array is of type character and therefore the size in
+            ! bytes must be allocated.
+            ll = cgnsDoms(zone)%il * cgnsDoms(zone)%jl * cgnsDoms(zone)%kl
+
+            select case (precisionGrid)
+            case (precisionSingle)
+                allocate (coor4(ll), coor8(0), stat=ierr)
+
+            case (precisionDouble)
+                allocate (coor8(ll), coor4(0), stat=ierr)
+            end select
+            if (ierr /= 0) &
+                call terminate("writeCoorCGNSZone", &
+                               "Memory allocation failure for coor")
+
+            coorNames(1) = cgnsCoorx
+            coorNames(2) = cgnsCoory
+            coorNames(3) = cgnsCoorz
+
+            ! Loop over the number of grid files to be written.
+
+            gridLoopRoot: do ind = 1, nGridsToWrite
+
+                ! Store the file and base ID a bit easier.
+
+                fileInd = fileIDs(ind)
+                cgnsBase = cgnsBases(ind)
+
+                ! Loop over the three coordinates.
+
+                coorLoopRoot: do nn = 1, 3
+
+                    ! Loop over the number of subblocks stored
+                    ! on this processor.
+
+                    do mm = 1, nBlocks
+
+                        ! Fill buffer with the correct coordinate.
+
+                        call storeCoorInBuffer(buffer, zone, ind, nn, &
+                                               blocksCGNSblock(mm + offset), tmp)
+
+                        ! And store it in coor, depending on the precision used.
+
+                        select case (precisionGrid)
+                        case (precisionSingle)
+                            call copyDataBufSinglePrecision( &
+                                coor4, buffer, &
+                                1_intType, &
+                                1_intType, &
+                                1_intType, &
+                                cgnsDoms(zone)%il, &
+                                cgnsDoms(zone)%jl, &
+                                cgnsDoms(zone)%kl, &
+                                subRanges(1, 1, mm))
+                        case (precisionDouble)
+                            call copyDataBufDoublePrecision( &
+                                coor8, buffer, &
+                                1_intType, &
+                                1_intType, &
+                                1_intType, &
+                                cgnsDoms(zone)%il, &
+                                cgnsDoms(zone)%jl, &
+                                cgnsDoms(zone)%kl, &
+                                subRanges(1, 1, mm))
+                        end select
+
+                    end do
+
+                    ! Loop over the number of subblocks stored on
+                    ! other processors.
+
+                    do mm = (nBlocks + 1), nSubBlocks
+
+                        ! Receive the range of subblock mm and copy it into coor.
+
+                        call mpi_recv(buffer, bufSize, adflow_real, proc(mm), &
+                                      proc(mm) + 1, ADflow_comm_world, mpiStatus, ierr)
+
+                        select case (precisionGrid)
+                        case (precisionSingle)
+                            call copyDataBufSinglePrecision(coor4, buffer, &
+                                                            1_intType, &
+                                                            1_intType, &
+                                                            1_intType, &
+                                                            cgnsDoms(zone)%il, &
+                                                            cgnsDoms(zone)%jl, &
+                                                            cgnsDoms(zone)%kl, &
+                                                            subRanges(1, 1, mm))
+                        case (precisionDouble)
+                            call copyDataBufDoublePrecision(coor8, buffer, &
+                                                            1_intType, &
+                                                            1_intType, &
+                                                            1_intType, &
+                                                            cgnsDoms(zone)%il, &
+                                                            cgnsDoms(zone)%jl, &
+                                                            cgnsDoms(zone)%kl, &
+                                                            subRanges(1, 1, mm))
+                        end select
+
+                    end do
+
+                    ! Write this coordinate to file; tmp is used to store
+                    ! the actual number of the coordinate; usually this is
+                    ! equal to nn.
+                    select case (precisionGrid)
+                    case (precisionSingle)
+                        call cg_coord_write_f(fileInd, cgnsBase, cgnsZone, &
+                                              realSingle, coorNames(nn), coor4, tmp, ierr)
+                    case (precisionDouble)
+                        call cg_coord_write_f(fileInd, cgnsBase, cgnsZone, &
+                                              realDouble, coorNames(nn), coor8, tmp, ierr)
+                    end select
 
-       gridLoopOthers: do ind=1,nGridsToWrite
+                    if (ierr /= CG_OK) &
+                         call terminate("writeCoorCGNSZone", &
+                         "Something wrong when calling &
+                         &cg_coord_write_f")
 
-          ! Loop over the three coordinates.
+                    ! Write the units, if possible.
 
-          coorLoopOthers: do nn=1,3
+                    if (cgnsDoms(zone)%gridUnitsSpecified) then
 
-             ! Loop over the number of subblocks stored
-             ! on this processor.
+                        ! Go to the correct place in the grid file.
 
-             do mm=1,nBlocks
+                        call cg_goto_f(fileInd, cgnsBase, ierr, &
+                                       "Zone_t", cgnsZone, &
+                                       "GridCoordinates_t", 1, &
+                                       "DataArray_t", tmp, "end")
+                        if (ierr /= CG_OK) &
+                            call terminate("writeCoorCGNSZone", &
+                                           "Something wrong when calling cg_goto_f")
+
+                        ! Write the units.
 
-                ! Fill buffer with the correct coordinate and send it to
-                ! processor 0.
+                        call cg_units_write_f(cgnsDoms(zone)%mass, &
+                                              cgnsDoms(zone)%len, &
+                                              cgnsDoms(zone)%time, &
+                                              cgnsDoms(zone)%temp, &
+                                              cgnsDoms(zone)%angle, ierr)
+                        if (ierr /= CG_OK) &
+                             call terminate("writeCoorCGNSZone", &
+                             "Something wrong when calling &
+                             &cg_units_write_f")
+                    end if
+
+                end do coorLoopRoot
+
+            end do gridLoopRoot
+
+            ! Deallocate the memory which is only allocated on the
+            ! root processor.
 
-                call storeCoorInBuffer(buffer, zone, ind, nn, &
-                     blocksCGNSblock(mm+offset), tmp)
+            deallocate (subRanges, proc, coor4, coor8, stat=ierr)
+            if (ierr /= 0) call terminate("writeCoorCGNSZone", &
+                                          "Deallocation error on root proc")
 
-                call mpi_send(buffer, tmp, adflow_real, 0, myID+1, &
-                     ADflow_comm_world, ierr)
-             enddo
+            else rootproc
 
-          enddo coorLoopOthers
-       enddo gridLoopOthers
+            ! I am not the root processor and I may have to send data to
+            ! the root processor.
 
-    endif rootproc
+            ! Loop over the number of subblocks stored on this processor
+            ! to send the size to the root processor. Determine in the
+            ! same loop the size of the largest subblock.
 
-    ! Release the memory of buffer.
+            bufSize = 0
+            do i = 1, nBlocks
 
-    deallocate(buffer, stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("writeCoorCGNSZone", &
-         "Deallocation failure for buffer")
-  end subroutine writeCoorCGNSZone
+                ! Store the local block ID a bit easier in j and copy the
+                ! range. This range is identical for spectral solutions and
+                ! thus taking the first is okay.
 
-  !      ==================================================================
+                j = blocksCGNSblock(i + offset)
 
-  subroutine storeCoorInBuffer(buffer, zone, ind, coorID, &
-       blockID, nn)
-    !
-    !       storeCoorInBuffer stores the given coordinate for the given
-    !       blockID. The total size of the buffer is returned in nn.
-    !
-    use constants
-    use block
-    use cgnsGrid
-    use IOModule
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, intent(out)              :: nn
-    integer(kind=intType), intent(in) :: zone, ind
-    integer(kind=intType), intent(in) :: coorID, blockID
+                ii(1) = flowDoms(j, 1, 1)%iBegor
+                ii(2) = flowDoms(j, 1, 1)%iEndor
+                ii(3) = flowDoms(j, 1, 1)%jBegor
+                ii(4) = flowDoms(j, 1, 1)%jEndor
+                ii(5) = flowDoms(j, 1, 1)%kBegor
+                ii(6) = flowDoms(j, 1, 1)%kEndor
 
-    real(kind=realType), dimension(*), intent(out) :: buffer
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-    integer(kind=intType) :: iStart, iEnd, jStart, jEnd, kStart, kEnd
+                ! To avoid duplication of overlap regions, add 1 to the lower
+                ! boundaries if the lower boundary is larger than 1.
 
-    real(kind=realType) :: LRefInv
+                if (ii(1) > 1) ii(1) = ii(1) + 1
+                if (ii(3) > 1) ii(3) = ii(3) + 1
+                if (ii(5) > 1) ii(5) = ii(5) + 1
 
-    ! Compute the multiplication factor to obtain the original
-    ! coordinates. Note that LRef is corrected to 1.0 when the
-    ! coordinates should be written in meters. This happens when
-    ! the grid is read.
+                ! Send the buffer ii to processor 0.
 
-    LRefInv = one/cgnsDoms(zone)%LRef
+                call mpi_send(ii, 6, adflow_integer, 0, myID, &
+                              ADflow_comm_world, ierr)
 
-    ! Set the range to be stored to the entire block.
+                ! Check the size of this subblock and update bufSize
+                ! if needed.
 
-    iStart = 1
-    iEnd   = flowDoms(blockID,1,1)%il
-    jStart = 1
-    jEnd   = flowDoms(blockID,1,1)%jl
-    kStart = 1
-    kEnd   = flowDoms(blockID,1,1)%kl
+                ll = (ii(2) - ii(1) + 1) * (ii(4) - ii(3) + 1) &
+                     * (ii(6) - ii(5) + 1)
+                bufSize = max(bufSize, ll)
+            end do
 
-    ! To avoid duplication of overlap regions, add 1 to the lower
-    ! boundaries if the lower boundary in the original block is
-    ! larger than 1.
+            ! Allocate the memory for buffer.
 
-    if(flowDoms(blockID,1,1)%iBegor > 1) iStart = iStart + 1
-    if(flowDoms(blockID,1,1)%jBegor > 1) jStart = jStart + 1
-    if(flowDoms(blockID,1,1)%kBegor > 1) kStart = kStart + 1
+            allocate (buffer(bufSize), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("writeCoorCGNSZone", &
+                               "Memory allocation failure for buffer")
 
-    ! Store the coordinate in the 1D buffer.
+            ! Loop over the number of grids to be written.
 
-    nn = 0
-    do k=kStart,kEnd
-       do j=jStart,jEnd
-          do i=iStart,iEnd
-             nn = nn + 1
-             buffer(nn) = LRefInv*IOVar(blockID,ind)%w(i,j,k,coorID)
-          enddo
-       enddo
-    enddo
+            gridLoopOthers: do ind = 1, nGridsToWrite
 
-  end subroutine storeCoorInBuffer
+                ! Loop over the three coordinates.
+
+                coorLoopOthers: do nn = 1, 3
+
+                    ! Loop over the number of subblocks stored
+                    ! on this processor.
+
+                    do mm = 1, nBlocks
+
+                        ! Fill buffer with the correct coordinate and send it to
+                        ! processor 0.
+
+                        call storeCoorInBuffer(buffer, zone, ind, nn, &
+                                               blocksCGNSblock(mm + offset), tmp)
+
+                        call mpi_send(buffer, tmp, adflow_real, 0, myID + 1, &
+                                      ADflow_comm_world, ierr)
+                    end do
+
+                end do coorLoopOthers
+            end do gridLoopOthers
+
+        end if rootproc
+
+        ! Release the memory of buffer.
+
+        deallocate (buffer, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("writeCoorCGNSZone", &
+                           "Deallocation failure for buffer")
+    end subroutine writeCoorCGNSZone
+
+    !      ==================================================================
+
+    subroutine storeCoorInBuffer(buffer, zone, ind, coorID, &
+                                 blockID, nn)
+        !
+        !       storeCoorInBuffer stores the given coordinate for the given
+        !       blockID. The total size of the buffer is returned in nn.
+        !
+        use constants
+        use block
+        use cgnsGrid
+        use IOModule
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, intent(out) :: nn
+        integer(kind=intType), intent(in) :: zone, ind
+        integer(kind=intType), intent(in) :: coorID, blockID
+
+        real(kind=realType), dimension(*), intent(out) :: buffer
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
+        integer(kind=intType) :: iStart, iEnd, jStart, jEnd, kStart, kEnd
+
+        real(kind=realType) :: LRefInv
+
+        ! Compute the multiplication factor to obtain the original
+        ! coordinates. Note that LRef is corrected to 1.0 when the
+        ! coordinates should be written in meters. This happens when
+        ! the grid is read.
+
+        LRefInv = one / cgnsDoms(zone)%LRef
+
+        ! Set the range to be stored to the entire block.
+
+        iStart = 1
+        iEnd = flowDoms(blockID, 1, 1)%il
+        jStart = 1
+        jEnd = flowDoms(blockID, 1, 1)%jl
+        kStart = 1
+        kEnd = flowDoms(blockID, 1, 1)%kl
+
+        ! To avoid duplication of overlap regions, add 1 to the lower
+        ! boundaries if the lower boundary in the original block is
+        ! larger than 1.
+
+        if (flowDoms(blockID, 1, 1)%iBegor > 1) iStart = iStart + 1
+        if (flowDoms(blockID, 1, 1)%jBegor > 1) jStart = jStart + 1
+        if (flowDoms(blockID, 1, 1)%kBegor > 1) kStart = kStart + 1
+
+        ! Store the coordinate in the 1D buffer.
+
+        nn = 0
+        do k = kStart, kEnd
+            do j = jStart, jEnd
+                do i = iStart, iEnd
+                    nn = nn + 1
+                    buffer(nn) = LRefInv * IOVar(blockID, ind)%w(i, j, k, coorID)
+                end do
+            end do
+        end do
+
+    end subroutine storeCoorInBuffer
 end module writeCGNSGrid
diff --git a/src/output/writeCGNSSurface.F90 b/src/output/writeCGNSSurface.F90
index 9de275ae7..c8cc8a979 100644
--- a/src/output/writeCGNSSurface.F90
+++ b/src/output/writeCGNSSurface.F90
@@ -2,2835 +2,2830 @@ module writeCGNSSurface
 
 contains
 
-  subroutine writeCGNSSurfaceSol(famList)
-    !
-    !       writeCGNSSurfaceSol and its subroutines write the surface
-    !       solution file(s). The unknowns are stored in the center of the
-    !       surface quadrilaterals.
-    !
-    use cgnsGrid
-    use communication
-    use su_cgns
-    use outputMod
-    use inputIteration
-    use block
-    use blockPointers
-    use cgnsNames
-    use extraOutput
-    use utils, only : terminate, setPointers
-    use surfaceFamilies, only : famNames
-    use sorting, only : qsortStrings
-    use commonFormats, only : strings
-    implicit none
-
-    ! Input Param
-    integer(kind=intType), dimension(:), intent(in) :: famList
-
-    !
-    !      Local parameter, the cell dimension.
-    !
-    integer, parameter :: celldim = 2
-    !
-    !      Local variables.
-    !
-    integer :: cgnsInd, ierr
-
-    integer(kind=intType) :: nn, mm, ll, i
-    integer(kind=intType) :: nSolVar, nZonesWritten
-    character(len=maxStringLen) :: errorMessage
-    integer(kind=intType) :: iSurf, nisoSurfVar
-    character(len=maxCGNSNameLen), dimension(:), allocatable :: &
-         solNames, isoSurfSolNames
-    character(len=maxCGNSNameLen) :: contourName
-    character(len=maxCGNSNAMELen), dimension(:), allocatable :: famListStr
-    ! Determine the number and names of the solution files.
-
-    call surfSolFileNamesWrite
-
-    ! Return immediately if no surface solution files must
-    ! be written.
-
-    if(nSurfSolToWrite == 0) return
-
-    ! Write a message that the solution file(s) are being written.
-    ! Of course only processor 0 does this.
-
-    if(myID == 0 .and. printIterations) then
-       print "(a)", "#"
-       print "(a,a)", "# Writing surface solution file(s): ", trim(surfSolFileNames(1))
-    endif
-
-    ! Allocate the memory for the fileIDs and the bases.
-
-    allocate(fileIDs(nSurfSolToWrite), cgnsBases(nSurfSolToWrite), &
-         stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("writeCGNSSurfaceSol", &
-         "Memory allocation failure for fileIDs &
-         &and cgnsBases")
+    subroutine writeCGNSSurfaceSol(famList)
+        !
+        !       writeCGNSSurfaceSol and its subroutines write the surface
+        !       solution file(s). The unknowns are stored in the center of the
+        !       surface quadrilaterals.
+        !
+        use cgnsGrid
+        use communication
+        use su_cgns
+        use outputMod
+        use inputIteration
+        use block
+        use blockPointers
+        use cgnsNames
+        use extraOutput
+        use utils, only: terminate, setPointers
+        use surfaceFamilies, only: famNames
+        use sorting, only: qsortStrings
+        use commonFormats, only: strings
+        implicit none
+
+        ! Input Param
+        integer(kind=intType), dimension(:), intent(in) :: famList
+
+        !
+        !      Local parameter, the cell dimension.
+        !
+        integer, parameter :: celldim = 2
+        !
+        !      Local variables.
+        !
+        integer :: cgnsInd, ierr
+
+        integer(kind=intType) :: nn, mm, ll, i
+        integer(kind=intType) :: nSolVar, nZonesWritten
+        character(len=maxStringLen) :: errorMessage
+        integer(kind=intType) :: iSurf, nisoSurfVar
+        character(len=maxCGNSNameLen), dimension(:), allocatable :: &
+            solNames, isoSurfSolNames
+        character(len=maxCGNSNameLen) :: contourName
+        character(len=maxCGNSNAMELen), dimension(:), allocatable :: famListStr
+        ! Determine the number and names of the solution files.
+
+        call surfSolFileNamesWrite
+
+        ! Return immediately if no surface solution files must
+        ! be written.
+
+        if (nSurfSolToWrite == 0) return
+
+        ! Write a message that the solution file(s) are being written.
+        ! Of course only processor 0 does this.
+
+        if (myID == 0 .and. printIterations) then
+            print "(a)", "#"
+            print "(a,a)", "# Writing surface solution file(s): ", trim(surfSolFileNames(1))
+        end if
+
+        ! Allocate the memory for the fileIDs and the bases.
+
+        allocate (fileIDs(nSurfSolToWrite), cgnsBases(nSurfSolToWrite), &
+                  stat=ierr)
+        if (ierr /= 0) &
+             call terminate("writeCGNSSurfaceSol", &
+             "Memory allocation failure for fileIDs &
+             &and cgnsBases")
 
-    ! Open the cgns file(s) and write the header. This is only done
-    ! by processor 0.
+        ! Open the cgns file(s) and write the header. This is only done
+        ! by processor 0.
 
-    testRootProc: if(myID == 0) then
+        testRootProc: if (myID == 0) then
 
-       ! Loop over the number of surface solution files to write.
+            ! Loop over the number of surface solution files to write.
 
-       solLoop: do nn=1,nSurfSolToWrite
+            solLoop: do nn = 1, nSurfSolToWrite
 
-          ! Open the cgns file for writing and check if it went okay.
-          ! Store the file index for later purposes.
-          call cg_open_f(surfSolFileNames(nn), mode_write, cgnsInd, &
-               ierr)
-          if(ierr /= CG_OK) then
-             write(errorMessage, strings) "File ", trim(surfSolFileNames(nn)), &
-               " could not be opened by cgns for writing"
-             call terminate("writeCGNSSurfaceSol", errorMessage)
-          endif
+                ! Open the cgns file for writing and check if it went okay.
+                ! Store the file index for later purposes.
+                call cg_open_f(surfSolFileNames(nn), mode_write, cgnsInd, &
+                               ierr)
+                if (ierr /= CG_OK) then
+                    write (errorMessage, strings) "File ", trim(surfSolFileNames(nn)), &
+                        " could not be opened by cgns for writing"
+                    call terminate("writeCGNSSurfaceSol", errorMessage)
+                end if
 
-          fileIDs(nn) = cgnsInd
+                fileIDs(nn) = cgnsInd
 
-          ! Create the base.
+                ! Create the base.
 
-          call cg_base_write_f(cgnsInd, "BaseSurfaceSol", celldim, &
-               cgnsPhysdim, cgnsBases(nn), ierr)
-          if(ierr /= CG_OK)                      &
-               call terminate("writeCGNSSurfaceSol", &
-               "Something wrong when calling &
-               &cg_base_write_f")
+                call cg_base_write_f(cgnsInd, "BaseSurfaceSol", celldim, &
+                                     cgnsPhysdim, cgnsBases(nn), ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeCGNSSurfaceSol", &
+                     "Something wrong when calling &
+                     &cg_base_write_f")
+
+                ! Write the header in the cgns file.
+                call writeCGNSHeader(cgnsInd, cgnsBases(nn))
+
+            end do solLoop
 
-          ! Write the header in the cgns file.
-          call writeCGNSHeader(cgnsInd, cgnsBases(nn))
+        end if testRootProc
 
-       enddo solLoop
+        ! Determine the number of variables to be written to the surface
+        ! solution file as well as the cgns names.
 
-    endif testRootProc
+        call numberOfSurfSolVariables(nSolVar)
 
-    ! Determine the number of variables to be written to the surface
-    ! solution file as well as the cgns names.
+        allocate (solNames(nSolVar), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("writeCGNSSurfaceSol", &
+                           "Memory allocation failure for solNames")
 
-    call numberOfSurfSolVariables(nSolVar)
+        call surfSolNames(solNames)
 
-    allocate(solNames(nSolVar), stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("writeCGNSSurfaceSol", &
-         "Memory allocation failure for solNames")
+        ! Based on the famList generate the list of strings corresponding
+        ! to the families we want to write.
+        allocate (famListStr(size(famList)))
+        do i = 1, size(famList)
+            famListStr(i) = trim(famNames(famList(i)))
+        end do
 
-    call surfSolNames(solNames)
+        ! Sort them so we can binary search.
+        call qsortStrings(famListStr, size(famListStr))
 
-    ! Based on the famList generate the list of strings corresponding
-    ! to the families we want to write.
-    allocate(famListStr(size(famList)))
-    do i=1,size(famList)
-       famListStr(i) = trim(famNames(famList(i)))
-    end do
+        ! Loop over the number of cgns blocks and its boundary subfaces
+        ! and write the cell centered surface solution of the subface.
 
-    ! Sort them so we can binary search.
-    call qsortStrings(famListStr, size(famListStr))
+        nZonesWritten = 0
+        zoneLoop: do nn = 1, cgnsNDom
 
-    ! Loop over the number of cgns blocks and its boundary subfaces
-    ! and write the cell centered surface solution of the subface.
+            ! Determine the number of blocks on this processor that belong
+            ! to this cgns block.
 
-    nZonesWritten = 0
-    zoneLoop: do nn=1,cgnsNDom
+            mm = nblocksCGNSblock(nn) - nblocksCGNSblock(nn - 1)
 
-       ! Determine the number of blocks on this processor that belong
-       ! to this cgns block.
+            ! Loop over the number of boundary subfaces of the original
+            ! cgns block and write the cell centered surface solution to
+            ! the cgns surface file.
 
-       mm = nblocksCGNSblock(nn) - nblocksCGNSblock(nn-1)
+            do ll = 1, cgnsDoms(nn)%nBocos
 
-       ! Loop over the number of boundary subfaces of the original
-       ! cgns block and write the cell centered surface solution to
-       ! the cgns surface file.
+                ! Only write the solution to file if this is a true subface.
+                if (cgnsDoms(nn)%bocoInfo(ll)%actualFace) &
+                    call writeSurfsolCGNSZone(nn, mm, ll, nSolVar, solNames, &
+                                              nZonesWritten, .false., famListStr)
+            end do
 
-       do ll=1,cgnsDoms(nn)%nBocos
+            ! Loop over the number of internal block boundaries of the
+            ! original grid and write the periodic boundaries.
 
-          ! Only write the solution to file if this is a true subface.
-          if( cgnsDoms(nn)%bocoInfo(ll)%actualFace )                 &
-               call writeSurfsolCGNSZone(nn, mm, ll, nSolVar, solNames, &
-               nZonesWritten, .false., famListStr)
-       enddo
+            do ll = 1, cgnsDoms(nn)%n1to1
 
-       ! Loop over the number of internal block boundaries of the
-       ! original grid and write the periodic boundaries.
+                ! Only periodic boundaries are written; check for this.
 
-       do ll=1,cgnsDoms(nn)%n1to1
+                if (cgnsDoms(nn)%conn1to1(ll)%periodic) &
+                    call writeSurfsolCGNSZone(nn, mm, ll, nSolVar, solNames, &
+                                              nZonesWritten, .true., famListStr)
+            end do
+        end do zoneLoop
 
-          ! Only periodic boundaries are written; check for this.
+        ! Check if isosurface will be written. These will be written to
+        ! a new base
 
-          if( cgnsDoms(nn)%conn1to1(ll)%periodic )                   &
-               call writeSurfsolCGNSZone(nn, mm, ll, nSolVar, solNames, &
-               nZonesWritten, .true., famListStr)
-       enddo
-    enddo zoneLoop
+        testIsoSurafce: if (nIsoSurface > 0) then
 
+            allocate (cgnsIsoSurfBases(nSurfSolToWrite), stat=ierr)
+            testRootProc2: if (myID == 0) then
 
-    ! Check if isosurface will be written. These will be written to
-    ! a new base
+                ! Loop over the number of surface solution files
 
-    testIsoSurafce: if (nIsoSurface > 0)  then
+                solLoop2: do nn = 1, nSurfSolToWrite
 
-       allocate (cgnsIsoSurfBases(nSurfSolToWrite), stat=ierr)
-       testRootProc2: if (myID == 0) then
+                    ! Create the new base
 
-          ! Loop over the number of surface solution files
-
-          solLoop2: do nn=1,nSurfSolToWrite
-
-             ! Create the new base
-
-             cgnsInd = fileIDs(nn)
-             call cg_base_write_f(cgnsInd, "IsoSurfaces", celldim, &
-                  cgnsPhysDim, cgnsIsoSurfBases(nn), ierr)
-             if (ierr /= CG_OK) &
-                  call terminate("WriteCGNSSurfaceSol", &
-                  "Something wrong when calling cg_base_write_f for isoSurface")
-          end do solLoop2
-       end if testRootProc2
+                    cgnsInd = fileIDs(nn)
+                    call cg_base_write_f(cgnsInd, "IsoSurfaces", celldim, &
+                                         cgnsPhysDim, cgnsIsoSurfBases(nn), ierr)
+                    if (ierr /= CG_OK) &
+                        call terminate("WriteCGNSSurfaceSol", &
+                                       "Something wrong when calling cg_base_write_f for isoSurface")
+                end do solLoop2
+            end if testRootProc2
 
-       ! Determine the number of variables to be written to the
-       ! isosurface itself well as the cgns names.
-
-       call numberOfIsoSurfVariables(nIsoSurfVar)
-
-       if (nIsoSurfVar > 0) then
-          allocate(isoSurfSolNames(nIsoSurfVar), stat=ierr)
-          if(ierr /= 0)                           &
-               call terminate("writeCGNSSurfaceSol", &
-               "Memory allocation failure for isoNames")
-          call isoSurfNames(isoSurfSolNames)
-       end if
+            ! Determine the number of variables to be written to the
+            ! isosurface itself well as the cgns names.
 
-       solLoop3: do ll=1,nSurfSolToWrite ! Numer of spectral instances!
-          ! Allocate fn and fc for each domain:
-          do nn=1,nDom
-             call setPointers(nn, 1, ll)
-             allocate(flowDoms(nn, 1, ll)%fn(il, jl, kl))
-             allocate(flowDoms(nn, 1, ll)%fc(1:ie, 1:je, 1:ke))
-          end do
+            call numberOfIsoSurfVariables(nIsoSurfVar)
 
-          ! Finally loop over the required isoSurfaces
-          do iSurf=1,nIsoSurface
-             call computeIsoVariable(isoSurfaceNames(iSurf), ll, isoValues(iSurf))
+            if (nIsoSurfVar > 0) then
+                allocate (isoSurfSolNames(nIsoSurfVar), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("writeCGNSSurfaceSol", &
+                                   "Memory allocation failure for isoNames")
+                call isoSurfNames(isoSurfSolNames)
+            end if
 
-             write(contourName, "(3(A), F7.4)") "Contour ", trim(isoSurfaceNames(iSurf)), "=", isoValues(iSurf)
-             call writeIsoSurface(contourName, ll, nIsoSurfVar, isoSurfSolNames)
-          end do
+            solLoop3: do ll = 1, nSurfSolToWrite ! Numer of spectral instances!
+                ! Allocate fn and fc for each domain:
+                do nn = 1, nDom
+                    call setPointers(nn, 1, ll)
+                    allocate (flowDoms(nn, 1, ll)%fn(il, jl, kl))
+                    allocate (flowDoms(nn, 1, ll)%fc(1:ie, 1:je, 1:ke))
+                end do
 
-          ! deAllocate fn and fc for each domain:
-          do nn=1,nDom
-             deallocate(flowDoms(nn, 1, ll)%fn, flowDoms(nn, 1, ll)%fc)
-          end do
-       end do solLoop3
+                ! Finally loop over the required isoSurfaces
+                do iSurf = 1, nIsoSurface
+                    call computeIsoVariable(isoSurfaceNames(iSurf), ll, isoValues(iSurf))
 
-       ! Free memory for bases
-       deallocate(cgnsIsoSurfBases, stat=ierr)
-       if (nIsoSurfVar > 0) then
-          deallocate(isoSurfSolNames)
-       end if
-    end if testIsoSurafce
+                    write (contourName, "(3(A), F7.4)") "Contour ", trim(isoSurfaceNames(iSurf)), "=", isoValues(iSurf)
+                    call writeIsoSurface(contourName, ll, nIsoSurfVar, isoSurfSolNames)
+                end do
 
-    ! Close the cgns file(s). Only processor 0 does this.
+                ! deAllocate fn and fc for each domain:
+                do nn = 1, nDom
+                    deallocate (flowDoms(nn, 1, ll)%fn, flowDoms(nn, 1, ll)%fc)
+                end do
+            end do solLoop3
 
-    if(myID == 0) then
-       do nn=1,nSurfSolToWrite
-          call cg_close_f(fileIDs(nn), ierr)
-          if(ierr /= CG_OK)                      &
-               call terminate("writeCGNSSurfaceSol", &
-               "Something wrong when calling cg_close_f")
-       enddo
-    end if
+            ! Free memory for bases
+            deallocate (cgnsIsoSurfBases, stat=ierr)
+            if (nIsoSurfVar > 0) then
+                deallocate (isoSurfSolNames)
+            end if
+        end if testIsoSurafce
 
-    ! Deallocate the memory of solNames, fileIDs and cgnsBases.
+        ! Close the cgns file(s). Only processor 0 does this.
 
-    deallocate(solNames, fileIDs, cgnsBases, stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("writeCGNSSurfaceSol", &
-         "Deallocation error for solNames, fileIDs &
-         &and cgnsBases")
+        if (myID == 0) then
+            do nn = 1, nSurfSolToWrite
+                call cg_close_f(fileIDs(nn), ierr)
+                if (ierr /= CG_OK) &
+                    call terminate("writeCGNSSurfaceSol", &
+                                   "Something wrong when calling cg_close_f")
+            end do
+        end if
 
-    ! Wait until all processors (especially processor 0) reach
-    ! this point.
+        ! Deallocate the memory of solNames, fileIDs and cgnsBases.
 
-    call mpi_barrier(ADflow_comm_world, ierr)
+        deallocate (solNames, fileIDs, cgnsBases, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("writeCGNSSurfaceSol", &
+             "Deallocation error for solNames, fileIDs &
+             &and cgnsBases")
 
-    ! Write a message that the solution file(s) have been written.
-    ! Of course only processor 0 does this.
-
-    if(myID == 0 .and. printIterations) then
-       print "(a)", "# Surface solution file(s) written"
-       print "(a)", "#"
-    endif
-  end subroutine writeCGNSSurfaceSol
-  subroutine surfSolFileNamesWrite
-    !
-    !       surfSolFileNamesWrite determines the names and number of
-    !       surface solution files to be written.
-    !
-    use inputIO
-    use inputPhysics
-    use inputTimeSpectral
-    use monitor
-    use outputMod
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn
-
-    character(len=7) :: intString
-
-    ! In contrast to the grids and volume solutions, possible states
-    ! in the past don't need to be written for the surface. Therefore
-    ! the memory allocation can be done independent of the test of
-    ! the equation mode we are solving for.
-
-    allocate(surfSolFileNames(nTimeIntervalsSpectral), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("surfSolFileNamesWrite", &
-         "Memory allocation failure for surfSolFileNames")
-
-    ! Set the number of surface solution files to be written.
-
-    if( writeSurface ) then
-       nSurfSolToWrite = nTimeIntervalsSpectral
-    else
-       nSurfSolToWrite = 0
-    endif
-
-    ! Determine the name(s) of the solution file(s), depending on
-    ! the situation.
-
-    select case (equationMode)
-
-    case (steady)
-
-       ! Steady state computation. Possible previous files will
-       ! be overwritten.
-
-       surfSolFileNames(1) = surfaceSolFile
-
-       !===============================================================
-
-    case (unsteady)
-
-       ! Unsteady computation. A suffix is added depending on the
-       ! time step.
-
-       write(intString,"(i4.4)") timeStepUnsteady + nTimeStepsRestart
-       intString = adjustl(intString)
-
-       surfSolFileNames(1) = trim(surfaceSolFile)//"&
-            &Timestep"//trim(intString)
-
-       !===============================================================
-
-    case (timeSpectral)
-
-       ! Time spectral computation. A suffix is added depending on
-       ! the time instance.
-
-       do nn=1,nTimeIntervalsSpectral
-          write(intString,"(i7)") nn
-          intString = adjustl(intString)
-
-          surfSolFileNames(nn) = trim(surfaceSolFile)//"&
-               &Spectral"//trim(intString)
-       enddo
-
-    end select
-
-  end subroutine surfSolFileNamesWrite
-
-  subroutine writeSurfsolCGNSZone(zone, nBlocks, subface, nSolVar, &
-       solNames, nZonesWritten, periodic, famListStr)
-    !
-    !       writeSurfsolCGNSZone writes a surface solution of the given
-    !       zone (block) and boundary subface to the cgns surface file(s).
-    !       A distinction must be made between true boundaries and
-    !       periodic boundaries; the latter are a special kind of internal
-    !       block boundaries. This is indicated by the logical periodic.
-    !
-    use block
-    use cgnsGrid
-    use communication
-    use inputIO
-    use su_cgns
-    use outputMod
-    use utils, only : terminate, convertToLowerCase
-    use sorting, only : bsearchStrings
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)    :: zone, nBlocks
-    integer(kind=intType), intent(in)    :: subface, nSolVar
-    integer(kind=intType), intent(inout) :: nZonesWritten
-    character(len=*), dimension(:), intent(in) :: famListStr
-    character(len=*), dimension(*), intent(in) :: solNames
-
-    logical, intent(in) :: periodic
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    integer :: source, size
-    integer :: cgnsBase, cgnsZone, cgnsSol, cgnsInd
+        ! Wait until all processors (especially processor 0) reach
+        ! this point.
 
-    integer, dimension(mpi_status_size) :: mpiStatus
+        call mpi_barrier(ADflow_comm_world, ierr)
 
-    integer(kind=intType) :: i, offset
-    integer(kind=intType) :: mm, mBlocks, faceID, nSubfaces
-
-    ! extents of the subface in terms of the i,j,k indices of the cgns zone
-    integer(kind=intType) :: zone_iBeg, zone_jBeg, zone_kBeg, zone_iEnd, zone_jEnd, zone_kEnd
-    ! extents of the subface of i,j indices in terms of i,j,k, indices of cgns zone.
-    integer(kind=intType) :: subface_iBeg, subface_jBeg, subface_iEnd, subface_jEnd
-    integer(kind=intType) :: il, jl, ind
-
-    integer(kind=intType), dimension(nProc)       :: nMessages
-    integer(kind=intType), dimension(3,2,nBlocks) :: nodalRange
-    integer(kind=intType), dimension(3,2,nBlocks) :: cellRange
-
-    integer(kind=intType), dimension(:,:,:), allocatable :: rangeNode
-    integer(kind=intType), dimension(:,:,:), allocatable :: rangeCell
-
-    real(kind=realType), dimension(:), allocatable :: buffer
-
-    logical :: iOverlap, jOverlap, kOverlap
-    logical :: viscousSubface
-    logical, dimension(nBlocks) :: contributeToFace
-    character(len=maxCGNSNameLen) :: tmpStr
-
-    ! Store the offset in blocksCGNSblock for this zone in offset.
-
-    offset  = nBlocksCGNSblock(zone-1)
-
-    ! Determine the range of this subface and whether or not this
-    ! is a viscous subface. For a periodic boundary this info is
-    ! retrieved from the internal block boundary; for all others from
-    ! the physical boundary.
-
-    if( periodic ) then
-
-       ! Periodic boundary. Set viscousSubface to false.
-
-       viscousSubface = .false.
-
-       ! Store the nodal range of the cgns subface a bit easier.
-       ! Make sure that zone_iBeg, zone_jBeg and zone_kBeg contain the lowest values
-       ! and zone_iEnd, zone_jEnd and zone_kEnd the highest.
-
-       zone_iBeg = min(cgnsDoms(zone)%conn1to1(subface)%iBeg, &
-            cgnsDoms(zone)%conn1to1(subface)%iEnd)
-       zone_jBeg = min(cgnsDoms(zone)%conn1to1(subface)%jBeg, &
-            cgnsDoms(zone)%conn1to1(subface)%jEnd)
-       zone_kBeg = min(cgnsDoms(zone)%conn1to1(subface)%kBeg, &
-            cgnsDoms(zone)%conn1to1(subface)%kEnd)
-       zone_iEnd = max(cgnsDoms(zone)%conn1to1(subface)%iBeg, &
-            cgnsDoms(zone)%conn1to1(subface)%iEnd)
-       zone_jEnd = max(cgnsDoms(zone)%conn1to1(subface)%jBeg, &
-            cgnsDoms(zone)%conn1to1(subface)%jEnd)
-       zone_kEnd = max(cgnsDoms(zone)%conn1to1(subface)%kBeg, &
-            cgnsDoms(zone)%conn1to1(subface)%kEnd)
-
-    else
-
-       ! True physical boundary.
-       ! If this is an extrapolation boundary (usually singular line),
-       ! return. You don't want that info in the solution file.
-
-       if(cgnsDoms(zone)%bocoInfo(subface)%BCType == Extrap) return
-
-       ! Possibly do not write this surface if it is not in the famList
-       tmpStr = trim(CGNSDoms(zone)%bocoInfo(subface)%wallBCName)
-       call convertToLowerCase(tmpStr)
-       if (bsearchStrings(tmpStr, famListStr) == 0) then
-          return
-       end if
-
-       zone_iBeg = min(cgnsDoms(zone)%bocoInfo(subface)%iBeg, &
-            cgnsDoms(zone)%bocoInfo(subface)%iEnd)
-       zone_jBeg = min(cgnsDoms(zone)%bocoInfo(subface)%jBeg, &
-            cgnsDoms(zone)%bocoInfo(subface)%jEnd)
-       zone_kBeg = min(cgnsDoms(zone)%bocoInfo(subface)%kBeg, &
-            cgnsDoms(zone)%bocoInfo(subface)%kEnd)
-       zone_iEnd = max(cgnsDoms(zone)%bocoInfo(subface)%iBeg, &
-            cgnsDoms(zone)%bocoInfo(subface)%iEnd)
-       zone_jEnd = max(cgnsDoms(zone)%bocoInfo(subface)%jBeg, &
-            cgnsDoms(zone)%bocoInfo(subface)%jEnd)
-       zone_kEnd = max(cgnsDoms(zone)%bocoInfo(subface)%kBeg, &
-            cgnsDoms(zone)%bocoInfo(subface)%kEnd)
+        ! Write a message that the solution file(s) have been written.
+        ! Of course only processor 0 does this.
 
-       ! Determine whether or not this is a viscous subface.
+        if (myID == 0 .and. printIterations) then
+            print "(a)", "# Surface solution file(s) written"
+            print "(a)", "#"
+        end if
+    end subroutine writeCGNSSurfaceSol
+    subroutine surfSolFileNamesWrite
+        !
+        !       surfSolFileNamesWrite determines the names and number of
+        !       surface solution files to be written.
+        !
+        use inputIO
+        use inputPhysics
+        use inputTimeSpectral
+        use monitor
+        use outputMod
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn
+
+        character(len=7) :: intString
+
+        ! In contrast to the grids and volume solutions, possible states
+        ! in the past don't need to be written for the surface. Therefore
+        ! the memory allocation can be done independent of the test of
+        ! the equation mode we are solving for.
+
+        allocate (surfSolFileNames(nTimeIntervalsSpectral), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("surfSolFileNamesWrite", &
+                           "Memory allocation failure for surfSolFileNames")
+
+        ! Set the number of surface solution files to be written.
+
+        if (writeSurface) then
+            nSurfSolToWrite = nTimeIntervalsSpectral
+        else
+            nSurfSolToWrite = 0
+        end if
+
+        ! Determine the name(s) of the solution file(s), depending on
+        ! the situation.
+
+        select case (equationMode)
+
+        case (steady)
+
+            ! Steady state computation. Possible previous files will
+            ! be overwritten.
+
+            surfSolFileNames(1) = surfaceSolFile
+
+            !===============================================================
+
+        case (unsteady)
+
+            ! Unsteady computation. A suffix is added depending on the
+            ! time step.
+
+            write (intString, "(i4.4)") timeStepUnsteady + nTimeStepsRestart
+            intString = adjustl(intString)
+
+            surfSolFileNames(1) = trim(surfaceSolFile)//"&
+                 &Timestep"//trim(intString)
+
+            !===============================================================
+
+        case (timeSpectral)
+
+            ! Time spectral computation. A suffix is added depending on
+            ! the time instance.
+
+            do nn = 1, nTimeIntervalsSpectral
+                write (intString, "(i7)") nn
+                intString = adjustl(intString)
+
+                surfSolFileNames(nn) = trim(surfaceSolFile)//"&
+                     &Spectral"//trim(intString)
+            end do
+
+        end select
+
+    end subroutine surfSolFileNamesWrite
+
+    subroutine writeSurfsolCGNSZone(zone, nBlocks, subface, nSolVar, &
+                                    solNames, nZonesWritten, periodic, famListStr)
+        !
+        !       writeSurfsolCGNSZone writes a surface solution of the given
+        !       zone (block) and boundary subface to the cgns surface file(s).
+        !       A distinction must be made between true boundaries and
+        !       periodic boundaries; the latter are a special kind of internal
+        !       block boundaries. This is indicated by the logical periodic.
+        !
+        use block
+        use cgnsGrid
+        use communication
+        use inputIO
+        use su_cgns
+        use outputMod
+        use utils, only: terminate, convertToLowerCase
+        use sorting, only: bsearchStrings
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: zone, nBlocks
+        integer(kind=intType), intent(in) :: subface, nSolVar
+        integer(kind=intType), intent(inout) :: nZonesWritten
+        character(len=*), dimension(:), intent(in) :: famListStr
+        character(len=*), dimension(*), intent(in) :: solNames
 
-       viscousSubface = .false.
-       if(cgnsDoms(zone)%bocoInfo(subface)%BCType == &
-            NSWallAdiabatic          .or.     &
-            cgnsDoms(zone)%bocoInfo(subface)%BCType == &
-            NSWallIsothermal) viscousSubface = .true.
-
-    endif
+        logical, intent(in) :: periodic
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer :: source, size
+        integer :: cgnsBase, cgnsZone, cgnsSol, cgnsInd
 
-    ! Update nZonesWritten.
+        integer, dimension(mpi_status_size) :: mpiStatus
 
-    nZonesWritten = nZonesWritten + 1
+        integer(kind=intType) :: i, offset
+        integer(kind=intType) :: mm, mBlocks, faceID, nSubfaces
 
-    ! Determine the face ID on which the given cgns subface is located.
+        ! extents of the subface in terms of the i,j,k indices of the cgns zone
+        integer(kind=intType) :: zone_iBeg, zone_jBeg, zone_kBeg, zone_iEnd, zone_jEnd, zone_kEnd
+        ! extents of the subface of i,j indices in terms of i,j,k, indices of cgns zone.
+        integer(kind=intType) :: subface_iBeg, subface_jBeg, subface_iEnd, subface_jEnd
+        integer(kind=intType) :: il, jl, ind
 
-    if(zone_iBeg == zone_iEnd) then
-       faceID = iMax
-       if(zone_iBeg == 1) faceID = iMin
-    else if(zone_jBeg == zone_jEnd) then
-       faceID = jMax
-       if(zone_jBeg == 1) faceID = jMin
-    else
-       faceID = kMax
-       if(zone_kBeg == 1) faceID = kMin
-    endif
+        integer(kind=intType), dimension(nProc) :: nMessages
+        integer(kind=intType), dimension(3, 2, nBlocks) :: nodalRange
+        integer(kind=intType), dimension(3, 2, nBlocks) :: cellRange
 
-    ! Determine the number of nodes in the two coordinate directions.
-    ! These are called il and jl.
+        integer(kind=intType), dimension(:, :, :), allocatable :: rangeNode
+        integer(kind=intType), dimension(:, :, :), allocatable :: rangeCell
 
-    select case (faceID)
-    case (iMin,iMax)
-       il = zone_jEnd - zone_jBeg + 1
-       jl = zone_kEnd - zone_kBeg + 1
+        real(kind=realType), dimension(:), allocatable :: buffer
 
-       subface_iBeg = zone_jBeg
-       subface_iEnd = zone_jEnd
+        logical :: iOverlap, jOverlap, kOverlap
+        logical :: viscousSubface
+        logical, dimension(nBlocks) :: contributeToFace
+        character(len=maxCGNSNameLen) :: tmpStr
 
-       subface_jBeg = zone_kBeg
-       subface_jEnd = zone_kEnd
+        ! Store the offset in blocksCGNSblock for this zone in offset.
+
+        offset = nBlocksCGNSblock(zone - 1)
+
+        ! Determine the range of this subface and whether or not this
+        ! is a viscous subface. For a periodic boundary this info is
+        ! retrieved from the internal block boundary; for all others from
+        ! the physical boundary.
+
+        if (periodic) then
+
+            ! Periodic boundary. Set viscousSubface to false.
+
+            viscousSubface = .false.
+
+            ! Store the nodal range of the cgns subface a bit easier.
+            ! Make sure that zone_iBeg, zone_jBeg and zone_kBeg contain the lowest values
+            ! and zone_iEnd, zone_jEnd and zone_kEnd the highest.
+
+            zone_iBeg = min(cgnsDoms(zone)%conn1to1(subface)%iBeg, &
+                            cgnsDoms(zone)%conn1to1(subface)%iEnd)
+            zone_jBeg = min(cgnsDoms(zone)%conn1to1(subface)%jBeg, &
+                            cgnsDoms(zone)%conn1to1(subface)%jEnd)
+            zone_kBeg = min(cgnsDoms(zone)%conn1to1(subface)%kBeg, &
+                            cgnsDoms(zone)%conn1to1(subface)%kEnd)
+            zone_iEnd = max(cgnsDoms(zone)%conn1to1(subface)%iBeg, &
+                            cgnsDoms(zone)%conn1to1(subface)%iEnd)
+            zone_jEnd = max(cgnsDoms(zone)%conn1to1(subface)%jBeg, &
+                            cgnsDoms(zone)%conn1to1(subface)%jEnd)
+            zone_kEnd = max(cgnsDoms(zone)%conn1to1(subface)%kBeg, &
+                            cgnsDoms(zone)%conn1to1(subface)%kEnd)
+
+        else
+
+            ! True physical boundary.
+            ! If this is an extrapolation boundary (usually singular line),
+            ! return. You don't want that info in the solution file.
+
+            if (cgnsDoms(zone)%bocoInfo(subface)%BCType == Extrap) return
+
+            ! Possibly do not write this surface if it is not in the famList
+            tmpStr = trim(CGNSDoms(zone)%bocoInfo(subface)%wallBCName)
+            call convertToLowerCase(tmpStr)
+            if (bsearchStrings(tmpStr, famListStr) == 0) then
+                return
+            end if
 
-    case (jMin,jMax)
-       il = zone_iEnd - zone_iBeg + 1
-       jl = zone_kEnd - zone_kBeg + 1
+            zone_iBeg = min(cgnsDoms(zone)%bocoInfo(subface)%iBeg, &
+                            cgnsDoms(zone)%bocoInfo(subface)%iEnd)
+            zone_jBeg = min(cgnsDoms(zone)%bocoInfo(subface)%jBeg, &
+                            cgnsDoms(zone)%bocoInfo(subface)%jEnd)
+            zone_kBeg = min(cgnsDoms(zone)%bocoInfo(subface)%kBeg, &
+                            cgnsDoms(zone)%bocoInfo(subface)%kEnd)
+            zone_iEnd = max(cgnsDoms(zone)%bocoInfo(subface)%iBeg, &
+                            cgnsDoms(zone)%bocoInfo(subface)%iEnd)
+            zone_jEnd = max(cgnsDoms(zone)%bocoInfo(subface)%jBeg, &
+                            cgnsDoms(zone)%bocoInfo(subface)%jEnd)
+            zone_kEnd = max(cgnsDoms(zone)%bocoInfo(subface)%kBeg, &
+                            cgnsDoms(zone)%bocoInfo(subface)%kEnd)
 
-       subface_iBeg = zone_iBeg
-       subface_iEnd = zone_iEnd
+            ! Determine whether or not this is a viscous subface.
 
-       subface_jBeg = zone_kBeg
-       subface_jEnd = zone_kEnd
+            viscousSubface = .false.
+            if (cgnsDoms(zone)%bocoInfo(subface)%BCType == &
+                NSWallAdiabatic .or. &
+                cgnsDoms(zone)%bocoInfo(subface)%BCType == &
+                NSWallIsothermal) viscousSubface = .true.
 
+        end if
 
-    case (kMin,kMax)
-       il = zone_iEnd - zone_iBeg + 1
-       jl = zone_jEnd - zone_jBeg + 1
+        ! Update nZonesWritten.
 
-       subface_iBeg = zone_iBeg
-       subface_iEnd = zone_iEnd
+        nZonesWritten = nZonesWritten + 1
 
-       subface_jBeg = zone_jBeg
-       subface_jEnd = zone_jEnd
+        ! Determine the face ID on which the given cgns subface is located.
 
-    end select
+        if (zone_iBeg == zone_iEnd) then
+            faceID = iMax
+            if (zone_iBeg == 1) faceID = iMin
+        else if (zone_jBeg == zone_jEnd) then
+            faceID = jMax
+            if (zone_jBeg == 1) faceID = jMin
+        else
+            faceID = kMax
+            if (zone_kBeg == 1) faceID = kMin
+        end if
 
-    ! Allocate the memory for buffer, which is used to communicate
-    ! the coordinates and solution. Assume that rind layers are
-    ! present, such that the solution uses most memory.
+        ! Determine the number of nodes in the two coordinate directions.
+        ! These are called il and jl.
 
-    size = (il+1)*(jl+1)
-    allocate(buffer(size), stat=ierr)
-    if(ierr /= 0)                            &
-         call terminate("writeSurfsolCGNSZone", &
-         "Memory allocation failure for buffer")
+        select case (faceID)
+        case (iMin, iMax)
+            il = zone_jEnd - zone_jBeg + 1
+            jl = zone_kEnd - zone_kBeg + 1
 
-    ! Determine the number of local blocks that actually share the
-    ! subface of the original cgns block. Note that nBlocks and
-    ! blocksCGNSblock contain information of the entire cgns block,
-    ! but not of the subface.
+            subface_iBeg = zone_jBeg
+            subface_iEnd = zone_jEnd
 
-    mBlocks = 0
-    do i=1,nBlocks
+            subface_jBeg = zone_kBeg
+            subface_jEnd = zone_kEnd
 
-       ! Store the current local block ID a bit easier.
+        case (jMin, jMax)
+            il = zone_iEnd - zone_iBeg + 1
+            jl = zone_kEnd - zone_kBeg + 1
 
-       mm = blocksCGNSblock(i+offset)
+            subface_iBeg = zone_iBeg
+            subface_iEnd = zone_iEnd
 
-       ! Determine whether or not the cgns subface is (partially)
-       ! part of the subblock mm. Initialize the overlaps to .false.
+            subface_jBeg = zone_kBeg
+            subface_jEnd = zone_kEnd
 
-       iOverlap = .false.
-       jOverlap = .false.
-       kOverlap = .false.
+        case (kMin, kMax)
+            il = zone_iEnd - zone_iBeg + 1
+            jl = zone_jEnd - zone_jBeg + 1
 
-       ! First check the face ID.
+            subface_iBeg = zone_iBeg
+            subface_iEnd = zone_iEnd
 
-       select case (faceID)
-       case (iMin)
-          if(flowDoms(mm,1,1)%iBegor == zone_iBeg) iOverlap = .true.
-       case (iMax)
-          if(flowDoms(mm,1,1)%iEndor == zone_iEnd) iOverlap = .true.
-       case (jMin)
-          if(flowDoms(mm,1,1)%jBegor == zone_jBeg) jOverlap = .true.
-       case (jMax)
-          if(flowDoms(mm,1,1)%jEndor == zone_jEnd) jOverlap = .true.
-       case (kMin)
-          if(flowDoms(mm,1,1)%kBegor == zone_kBeg) kOverlap = .true.
-       case (kMax)
-          if(flowDoms(mm,1,1)%kEndor == zone_kEnd) kOverlap = .true.
-       end select
+            subface_jBeg = zone_jBeg
+            subface_jEnd = zone_jEnd
 
-       ! Check the overlap for the other two directions.
+        end select
 
-       if(zone_iBeg < flowDoms(mm,1,1)%iEndor .and. &
-            zone_iEnd > flowDoms(mm,1,1)%iBegor) iOverlap = .true.
-       if(zone_jBeg < flowDoms(mm,1,1)%jEndor .and. &
-            zone_jEnd > flowDoms(mm,1,1)%jBegor) jOverlap = .true.
-       if(zone_kBeg < flowDoms(mm,1,1)%kEndor .and. &
-            zone_kEnd > flowDoms(mm,1,1)%kBegor) kOverlap = .true.
+        ! Allocate the memory for buffer, which is used to communicate
+        ! the coordinates and solution. Assume that rind layers are
+        ! present, such that the solution uses most memory.
 
-       ! If all three directions overlap, this subblock contributes
-       ! to the current cgns subface.
+        size = (il + 1) * (jl + 1)
+        allocate (buffer(size), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("writeSurfsolCGNSZone", &
+                           "Memory allocation failure for buffer")
+
+        ! Determine the number of local blocks that actually share the
+        ! subface of the original cgns block. Note that nBlocks and
+        ! blocksCGNSblock contain information of the entire cgns block,
+        ! but not of the subface.
+
+        mBlocks = 0
+        do i = 1, nBlocks
+
+            ! Store the current local block ID a bit easier.
+
+            mm = blocksCGNSblock(i + offset)
+
+            ! Determine whether or not the cgns subface is (partially)
+            ! part of the subblock mm. Initialize the overlaps to .false.
+
+            iOverlap = .false.
+            jOverlap = .false.
+            kOverlap = .false.
+
+            ! First check the face ID.
+
+            select case (faceID)
+            case (iMin)
+                if (flowDoms(mm, 1, 1)%iBegor == zone_iBeg) iOverlap = .true.
+            case (iMax)
+                if (flowDoms(mm, 1, 1)%iEndor == zone_iEnd) iOverlap = .true.
+            case (jMin)
+                if (flowDoms(mm, 1, 1)%jBegor == zone_jBeg) jOverlap = .true.
+            case (jMax)
+                if (flowDoms(mm, 1, 1)%jEndor == zone_jEnd) jOverlap = .true.
+            case (kMin)
+                if (flowDoms(mm, 1, 1)%kBegor == zone_kBeg) kOverlap = .true.
+            case (kMax)
+                if (flowDoms(mm, 1, 1)%kEndor == zone_kEnd) kOverlap = .true.
+            end select
 
-       if(iOverlap .and. jOverlap .and. kOverlap) then
-          contributeToFace(i) = .true.
-          mBlocks = mBlocks +1
+            ! Check the overlap for the other two directions.
 
-          ! Determine the nodal and cell subrange for this subface.
+            if (zone_iBeg < flowDoms(mm, 1, 1)%iEndor .and. &
+                zone_iEnd > flowDoms(mm, 1, 1)%iBegor) iOverlap = .true.
+            if (zone_jBeg < flowDoms(mm, 1, 1)%jEndor .and. &
+                zone_jEnd > flowDoms(mm, 1, 1)%jBegor) jOverlap = .true.
+            if (zone_kBeg < flowDoms(mm, 1, 1)%kEndor .and. &
+                zone_kEnd > flowDoms(mm, 1, 1)%kBegor) kOverlap = .true.
 
-          call determineSubranges
+            ! If all three directions overlap, this subblock contributes
+            ! to the current cgns subface.
 
-       else
-          contributeToFace(i) = .false.
-       endif
+            if (iOverlap .and. jOverlap .and. kOverlap) then
+                contributeToFace(i) = .true.
+                mBlocks = mBlocks + 1
 
-    enddo
+                ! Determine the nodal and cell subrange for this subface.
 
-    ! Determine the amount of surface parts each processor will send
-    ! to processor 0. The result needs only to be known on
-    ! processor 0.
+                call determineSubranges
 
-    call mpi_gather(mBlocks, 1, adflow_integer, nMessages, 1, &
-         adflow_integer, 0, ADflow_comm_world, ierr)
+            else
+                contributeToFace(i) = .false.
+            end if
 
-    ! At the moment the writing of the cgns file is sequential and done
-    ! by processor 0. This means that this processor gathers all info
-    ! from the other processors and writes it to file.
+        end do
 
-    rootproc: if(myID == 0) then
+        ! Determine the amount of surface parts each processor will send
+        ! to processor 0. The result needs only to be known on
+        ! processor 0.
 
-       ! I am processor 0 and poor me has to do all the work.
+        call mpi_gather(mBlocks, 1, adflow_integer, nMessages, 1, &
+                        adflow_integer, 0, ADflow_comm_world, ierr)
 
-       ! First determine the number of subfaces into the original cgns
-       ! subface is split.
+        ! At the moment the writing of the cgns file is sequential and done
+        ! by processor 0. This means that this processor gathers all info
+        ! from the other processors and writes it to file.
 
-       nSubfaces = 0
-       do i=1,nProc
-          nSubfaces = nSubfaces + nMessages(i)
-       enddo
+        rootproc: if (myID == 0) then
 
-       ! Allocate the memory for the nodal and cell ranges for each
-       ! of the contributing subfaces.
+            ! I am processor 0 and poor me has to do all the work.
 
-       allocate(rangeNode(3,2,nSubfaces), rangeCell(3,2,nSubfaces), &
-            stat=ierr)
-       if(ierr /= 0)                            &
-            call terminate("writeSurfsolCGNSZone", &
-            "Memory allocation failure for &
-            &rangeNode, etc")
+            ! First determine the number of subfaces into the original cgns
+            ! subface is split.
 
-       ! Store the nodal and cell subranges of all contributions in
-       ! rangeNode and rangeCell. Start with my own contributions.
-       ! Note that mBlocks could be 0.
+            nSubfaces = 0
+            do i = 1, nProc
+                nSubfaces = nSubfaces + nMessages(i)
+            end do
 
-       do i=1,mBlocks
+            ! Allocate the memory for the nodal and cell ranges for each
+            ! of the contributing subfaces.
 
-          ! Copy the local nodal and cell ranges.
+            allocate (rangeNode(3, 2, nSubfaces), rangeCell(3, 2, nSubfaces), &
+                      stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("writeSurfsolCGNSZone", &
+                 "Memory allocation failure for &
+                 &rangeNode, etc")
 
-          rangeNode(1,1,i) = nodalRange(1,1,i)
-          rangeNode(2,1,i) = nodalRange(2,1,i)
-          rangeNode(3,1,i) = nodalRange(3,1,i)
+            ! Store the nodal and cell subranges of all contributions in
+            ! rangeNode and rangeCell. Start with my own contributions.
+            ! Note that mBlocks could be 0.
 
-          rangeNode(1,2,i) = nodalRange(1,2,i)
-          rangeNode(2,2,i) = nodalRange(2,2,i)
-          rangeNode(3,2,i) = nodalRange(3,2,i)
+            do i = 1, mBlocks
 
-          rangeCell(1,1,i) = cellRange(1,1,i)
-          rangeCell(2,1,i) = cellRange(2,1,i)
-          rangeCell(3,1,i) = cellRange(3,1,i)
+                ! Copy the local nodal and cell ranges.
 
-          rangeCell(1,2,i) = cellRange(1,2,i)
-          rangeCell(2,2,i) = cellRange(2,2,i)
-          rangeCell(3,2,i) = cellRange(3,2,i)
+                rangeNode(1, 1, i) = nodalRange(1, 1, i)
+                rangeNode(2, 1, i) = nodalRange(2, 1, i)
+                rangeNode(3, 1, i) = nodalRange(3, 1, i)
 
-       enddo
+                rangeNode(1, 2, i) = nodalRange(1, 2, i)
+                rangeNode(2, 2, i) = nodalRange(2, 2, i)
+                rangeNode(3, 2, i) = nodalRange(3, 2, i)
 
-       ! The rest of the ranges must be obtained by communication.
+                rangeCell(1, 1, i) = cellRange(1, 1, i)
+                rangeCell(2, 1, i) = cellRange(2, 1, i)
+                rangeCell(3, 1, i) = cellRange(3, 1, i)
 
-       mm = mBlocks + 1
-       do i=2,nProc
+                rangeCell(1, 2, i) = cellRange(1, 2, i)
+                rangeCell(2, 2, i) = cellRange(2, 2, i)
+                rangeCell(3, 2, i) = cellRange(3, 2, i)
 
-          ! Check if something must be received from this processor.
+            end do
 
-          if(nMessages(i) > 0) then
+            ! The rest of the ranges must be obtained by communication.
 
-             ! Store the source and size of the messages and receive
-             ! the messages. Note that 1 must be substracted from i
-             ! to obtain the correct processor id.
+            mm = mBlocks + 1
+            do i = 2, nProc
 
-             source = i -1
-             size   = 6*nMessages(i)
+                ! Check if something must be received from this processor.
 
-             call mpi_recv(rangeNode(1,1,mm), size, adflow_integer,   &
-                  source, source, ADflow_comm_world, mpiStatus, &
-                  ierr)
-             call mpi_recv(rangeCell(1,1,mm), size, adflow_integer,     &
-                  source, source+1, ADflow_comm_world, mpiStatus, &
-                  ierr)
+                if (nMessages(i) > 0) then
 
-             ! Update mm.
+                    ! Store the source and size of the messages and receive
+                    ! the messages. Note that 1 must be substracted from i
+                    ! to obtain the correct processor id.
 
-             mm = mm + nMessages(i)
+                    source = i - 1
+                    size = 6 * nMessages(i)
 
-          endif
-       enddo
+                    call mpi_recv(rangeNode(1, 1, mm), size, adflow_integer, &
+                                  source, source, ADflow_comm_world, mpiStatus, &
+                                  ierr)
+                    call mpi_recv(rangeCell(1, 1, mm), size, adflow_integer, &
+                                  source, source + 1, ADflow_comm_world, mpiStatus, &
+                                  ierr)
 
-       ! Loop over the number of solutions to be written.
+                    ! Update mm.
 
-       solLoopRoot: do ind=1,nSurfSolToWrite
+                    mm = mm + nMessages(i)
 
-          ! Store the file and base ID a bit easier.
+                end if
+            end do
 
-          cgnsInd  = fileIDs(ind)
-          cgnsBase = cgnsBases(ind)
+            ! Loop over the number of solutions to be written.
 
-          ! Create the surface zone.
-          call createSurfaceZone
+            solLoopRoot: do ind = 1, nSurfSolToWrite
 
-          ! Write the nodal coordinates.
-          call writeSurfaceCoord
+                ! Store the file and base ID a bit easier.
 
-          ! Write the cell centered surface solution.
-          call writeSurfaceSol
+                cgnsInd = fileIDs(ind)
+                cgnsBase = cgnsBases(ind)
 
-       enddo solLoopRoot
+                ! Create the surface zone.
+                call createSurfaceZone
 
-       ! Release the memory of the variables only
-       ! processor 0 allocates.
+                ! Write the nodal coordinates.
+                call writeSurfaceCoord
 
-       deallocate(rangeNode, rangeCell, stat=ierr)
-       if(ierr /= 0)                               &
-            call terminate("writeSurfsolCGNSZone", &
-            "Deallocation error for rangeNode, etc")
+                ! Write the cell centered surface solution.
+                call writeSurfaceSol
 
-    else rootproc
+            end do solLoopRoot
 
-       ! Send the node and cell ranges to processor 0 if a block
-       ! contributes to the current cgns subface.
+            ! Release the memory of the variables only
+            ! processor 0 allocates.
 
-       if(mBlocks > 0) then
+            deallocate (rangeNode, rangeCell, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("writeSurfsolCGNSZone", &
+                               "Deallocation error for rangeNode, etc")
 
-          ! Determine the size of the messages and send the nodal
-          ! and cell ranges to processor 0.
+            else rootproc
 
-          size = 6*mBlocks
-          call mpi_send(nodalRange, size, adflow_integer, 0, myID, &
-               ADflow_comm_world, ierr)
-          call mpi_send(cellRange, size, adflow_integer, 0, myID+1, &
-               ADflow_comm_world, ierr)
-       endif
+            ! Send the node and cell ranges to processor 0 if a block
+            ! contributes to the current cgns subface.
 
-       ! Loop over the number of solutions to be written.
+            if (mBlocks > 0) then
 
-       solLoopOthers: do ind=1,nSurfSolToWrite
+                ! Determine the size of the messages and send the nodal
+                ! and cell ranges to processor 0.
 
-          ! Write the nodal coordinates.
-          call writeSurfaceCoord
+                size = 6 * mBlocks
+                call mpi_send(nodalRange, size, adflow_integer, 0, myID, &
+                              ADflow_comm_world, ierr)
+                call mpi_send(cellRange, size, adflow_integer, 0, myID + 1, &
+                              ADflow_comm_world, ierr)
+            end if
 
-          ! Write the cell centered surface solution.
-          call writeSurfaceSol
+            ! Loop over the number of solutions to be written.
 
-       enddo solLoopOthers
+            solLoopOthers: do ind = 1, nSurfSolToWrite
 
-    endif rootproc
+                ! Write the nodal coordinates.
+                call writeSurfaceCoord
 
-    ! Release the memory of buffer.
+                ! Write the cell centered surface solution.
+                call writeSurfaceSol
 
-    deallocate(buffer, stat=ierr)
-    if(ierr /= 0)                            &
-         call terminate("writeSurfsolCGNSZone", &
-         "Deallocation error for buffer")
+            end do solLoopOthers
 
-  contains
+        end if rootproc
 
-    !        ================================================================
+        ! Release the memory of buffer.
 
-    subroutine determineSubranges
-      !
-      !         determineSubranges determines the nodal and cell subrange
-      !         for the given local block ID mm in the current cgns subface.
-      !
-      use inputIO
-      implicit none
-      !
-      !        Local variable
-      !
-      integer(kind=intType) :: ii
+        deallocate (buffer, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("writeSurfsolCGNSZone", &
+                           "Deallocation error for buffer")
 
-      ! Store mBlocks, the current number of local blocks that
-      ! participate to the cgns subface, a bit easier in ii.
+    contains
 
-      ii = mBlocks
+        !        ================================================================
 
-      ! Determine the nodal range of the current subface. Note that
-      ! in case multiple blocks contribute to the cgns subface, the
-      ! nodes on the interface are stored on both partitions for the
-      ! moment. This is corrected later.
+        subroutine determineSubranges
+            !
+            !         determineSubranges determines the nodal and cell subrange
+            !         for the given local block ID mm in the current cgns subface.
+            !
+            use inputIO
+            implicit none
+            !
+            !        Local variable
+            !
+            integer(kind=intType) :: ii
 
-      nodalRange(1,1,ii) = max(zone_iBeg,flowDoms(mm,1,1)%iBegor)
-      nodalRange(2,1,ii) = max(zone_jBeg,flowDoms(mm,1,1)%jBegor)
-      nodalRange(3,1,ii) = max(zone_kBeg,flowDoms(mm,1,1)%kBegor)
+            ! Store mBlocks, the current number of local blocks that
+            ! participate to the cgns subface, a bit easier in ii.
 
-      nodalRange(1,2,ii) = min(zone_iEnd,flowDoms(mm,1,1)%iEndor)
-      nodalRange(2,2,ii) = min(zone_jEnd,flowDoms(mm,1,1)%jEndor)
-      nodalRange(3,2,ii) = min(zone_kEnd,flowDoms(mm,1,1)%kEndor)
+            ii = mBlocks
 
-      ! The cell range. Step 1, the interior.
+            ! Determine the nodal range of the current subface. Note that
+            ! in case multiple blocks contribute to the cgns subface, the
+            ! nodes on the interface are stored on both partitions for the
+            ! moment. This is corrected later.
 
-      cellRange(1,1,ii) = nodalRange(1,1,ii) +1
-      cellRange(2,1,ii) = nodalRange(2,1,ii) +1
-      cellRange(3,1,ii) = nodalRange(3,1,ii) +1
+            nodalRange(1, 1, ii) = max(zone_iBeg, flowDoms(mm, 1, 1)%iBegor)
+            nodalRange(2, 1, ii) = max(zone_jBeg, flowDoms(mm, 1, 1)%jBegor)
+            nodalRange(3, 1, ii) = max(zone_kBeg, flowDoms(mm, 1, 1)%kBegor)
 
-      cellRange(1,2,ii) = nodalRange(1,2,ii)
-      cellRange(2,2,ii) = nodalRange(2,2,ii)
-      cellRange(3,2,ii) = nodalRange(3,2,ii)
+            nodalRange(1, 2, ii) = min(zone_iEnd, flowDoms(mm, 1, 1)%iEndor)
+            nodalRange(2, 2, ii) = min(zone_jEnd, flowDoms(mm, 1, 1)%jEndor)
+            nodalRange(3, 2, ii) = min(zone_kEnd, flowDoms(mm, 1, 1)%kEndor)
 
-      ! Step 2. Correct for possible rind layers.
+            ! The cell range. Step 1, the interior.
 
-      if( storeRindLayer ) then
+            cellRange(1, 1, ii) = nodalRange(1, 1, ii) + 1
+            cellRange(2, 1, ii) = nodalRange(2, 1, ii) + 1
+            cellRange(3, 1, ii) = nodalRange(3, 1, ii) + 1
 
-         if(nodalRange(1,1,ii) == zone_iBeg) cellRange(1,1,ii) = zone_iBeg
-         if(nodalRange(2,1,ii) == zone_jBeg) cellRange(2,1,ii) = zone_jBeg
-         if(nodalRange(3,1,ii) == zone_kBeg) cellRange(3,1,ii) = zone_kBeg
+            cellRange(1, 2, ii) = nodalRange(1, 2, ii)
+            cellRange(2, 2, ii) = nodalRange(2, 2, ii)
+            cellRange(3, 2, ii) = nodalRange(3, 2, ii)
 
-         if(nodalRange(1,2,ii) == zone_iEnd) cellRange(1,2,ii) = zone_iEnd +1
-         if(nodalRange(2,2,ii) == zone_jEnd) cellRange(2,2,ii) = zone_jEnd +1
-         if(nodalRange(3,2,ii) == zone_kEnd) cellRange(3,2,ii) = zone_kEnd +1
+            ! Step 2. Correct for possible rind layers.
 
-      endif
+            if (storeRindLayer) then
 
-      ! Step 3. Correct for the face ID.
+                if (nodalRange(1, 1, ii) == zone_iBeg) cellRange(1, 1, ii) = zone_iBeg
+                if (nodalRange(2, 1, ii) == zone_jBeg) cellRange(2, 1, ii) = zone_jBeg
+                if (nodalRange(3, 1, ii) == zone_kBeg) cellRange(3, 1, ii) = zone_kBeg
 
-      select case (faceID)
-      case (iMin)
-         cellRange(1,1,ii) = 2
-         cellRange(1,2,ii) = 2
-      case (iMax)
-         cellRange(1,1,ii) = zone_iEnd
-         cellRange(1,2,ii) = zone_iEnd
-      case (jMin)
-         cellRange(2,1,ii) = 2
-         cellRange(2,2,ii) = 2
-      case (jMax)
-         cellRange(2,1,ii) = zone_jEnd
-         cellRange(2,2,ii) = zone_jEnd
-      case (kMin)
-         cellRange(3,1,ii) = 2
-         cellRange(3,2,ii) = 2
-      case (kMax)
-         cellRange(3,1,ii) = zone_kEnd
-         cellRange(3,2,ii) = zone_kEnd
-      end select
-
-      ! Correct the nodal range for possible overlap.
-
-      if(nodalRange(1,1,ii) > zone_iBeg) &
-           nodalRange(1,1,ii) = nodalRange(1,1,ii) +1
-      if(nodalRange(2,1,ii) > zone_jBeg) &
-           nodalRange(2,1,ii) = nodalRange(2,1,ii) +1
-      if(nodalRange(3,1,ii) > zone_kBeg) &
-           nodalRange(3,1,ii) = nodalRange(3,1,ii) +1
-
-    end subroutine determineSubranges
-
-    !        ================================================================
-
-    subroutine createSurfaceZone
-      !
-      !         createSurfaceZone creates a surface node in the given
-      !         cgns surface solution file. This routine should only be
-      !         called by processor 0.
-      !
-      use inputIO
-      implicit none
-      !
-      !        Local variables.
-      !
-      integer(kind=cgsize_t), dimension(6) :: sizes
-
-      integer(kind=intType) :: nn
-
-      character(len=maxCGNSNameLen) :: zonename
-      character(len=7)              :: integerString
-
-      ! Determine the sizes of the subface.
-
-      sizes(1) = il
-      sizes(2) = jl
-      sizes(3) = il -1
-      sizes(4) = jl -1
-      sizes(5) = 0
-      sizes(6) = 0
-
-      ! For all zones a number is added to make to zone name unique.
-      ! Create that string here.
-
-      write(integerString,"(i6)") nZonesWritten
-      integerString = adjustl(integerString)
-
-      ! Create the zone name. A distinction must be made between
-      ! periodic and physical boundaries.
-
-      if( periodic ) then
-
-         zonename = "PeriodicBCZone"//trim(integerString)
-
-      else
-
-         ! True physcical boundary. A distinction is made between zones
-         ! that do and don't belong to a family. The basename of the
-         ! former boundaries is the family name, such that the entire
-         ! family can be easily selected in postprocessing software.
-
-         nn = cgnsDoms(zone)%bocoInfo(subface)%familyID
-         if(nn > 0) then
-
-            ! Zone belongs to a family. Add the zone number to the
-            ! family name for the zone name.
-
-            zonename = trim(cgnsFamilies(nn)%familyName)//   &
-                 trim(integerString)
-
-         else
-
-            ! Zone does not belong to a family. The first part of the
-            ! zone name depends on the boundary condition of the cgns
-            ! subface.
-
-            select case (cgnsDoms(zone)%bocoInfo(subface)%BCType)
-            case (Symm)
-               zonename = "Symmetry"
-            case (SymmPolar)
-               zonename = "SymmetryPolar"
-            case (NSWallAdiabatic)
-               zonename = "NSWallAdiabatic"
-            case (NSWallIsothermal)
-               zonename = "NSWallIsothermal"
-            case (EulerWall)
-               zonename = "EulerWall"
-            case (FarField)
-               zonename = "FarField"
-            case (SupersonicInflow)
-               zonename = "InflowSupersonic"
-            case (SubsonicInflow)
-               zonename = "InflowSubsonic"
-            case (SupersonicOutflow)
-               zonename = "OutflowSupersonic"
-            case (SubsonicOutflow)
-               zonename = "OutflowSubsonic"
-            case (MassBleedInflow)
-               zonename = "MassBleedInflow"
-            case (MassBleedOutflow)
-               zonename = "MassBleedOutflow"
-            case (mDot)
-               zonename = "MDot"
-            case (bcThrust)
-               zonename = "Thrust"
-            case (SlidingInterface)
-               zonename = "Sliding"
-            case (OversetOuterBound)
-               zonename = "Overlap"
-            case (DomainInterfaceAll)
-               zonename = "DomainAll"
-            case (DomainInterfaceRhoUVW)
-               zonename = "DomainRhoUVW"
-            case (DomainInterfaceP)
-               zonename = "DomainP"
-            case (DomainInterfaceRho)
-               zonename = "DomainRho"
-            case (DomainInterfaceTotal)
-               zonename = "DomainTotal"
-            case default
-               call terminate("createSurfaceZone", &
-                    "Unknown boundary condition")
+                if (nodalRange(1, 2, ii) == zone_iEnd) cellRange(1, 2, ii) = zone_iEnd + 1
+                if (nodalRange(2, 2, ii) == zone_jEnd) cellRange(2, 2, ii) = zone_jEnd + 1
+                if (nodalRange(3, 2, ii) == zone_kEnd) cellRange(3, 2, ii) = zone_kEnd + 1
+
+            end if
+
+            ! Step 3. Correct for the face ID.
+
+            select case (faceID)
+            case (iMin)
+                cellRange(1, 1, ii) = 2
+                cellRange(1, 2, ii) = 2
+            case (iMax)
+                cellRange(1, 1, ii) = zone_iEnd
+                cellRange(1, 2, ii) = zone_iEnd
+            case (jMin)
+                cellRange(2, 1, ii) = 2
+                cellRange(2, 2, ii) = 2
+            case (jMax)
+                cellRange(2, 1, ii) = zone_jEnd
+                cellRange(2, 2, ii) = zone_jEnd
+            case (kMin)
+                cellRange(3, 1, ii) = 2
+                cellRange(3, 2, ii) = 2
+            case (kMax)
+                cellRange(3, 1, ii) = zone_kEnd
+                cellRange(3, 2, ii) = zone_kEnd
             end select
 
-            ! Add the number to zone name to make it unique.
+            ! Correct the nodal range for possible overlap.
+
+            if (nodalRange(1, 1, ii) > zone_iBeg) &
+                nodalRange(1, 1, ii) = nodalRange(1, 1, ii) + 1
+            if (nodalRange(2, 1, ii) > zone_jBeg) &
+                nodalRange(2, 1, ii) = nodalRange(2, 1, ii) + 1
+            if (nodalRange(3, 1, ii) > zone_kBeg) &
+                nodalRange(3, 1, ii) = nodalRange(3, 1, ii) + 1
+
+        end subroutine determineSubranges
+
+        !        ================================================================
+
+        subroutine createSurfaceZone
+            !
+            !         createSurfaceZone creates a surface node in the given
+            !         cgns surface solution file. This routine should only be
+            !         called by processor 0.
+            !
+            use inputIO
+            implicit none
+            !
+            !        Local variables.
+            !
+            integer(kind=cgsize_t), dimension(6) :: sizes
+
+            integer(kind=intType) :: nn
+
+            character(len=maxCGNSNameLen) :: zonename
+            character(len=7) :: integerString
+
+            ! Determine the sizes of the subface.
+
+            sizes(1) = il
+            sizes(2) = jl
+            sizes(3) = il - 1
+            sizes(4) = jl - 1
+            sizes(5) = 0
+            sizes(6) = 0
+
+            ! For all zones a number is added to make to zone name unique.
+            ! Create that string here.
+
+            write (integerString, "(i6)") nZonesWritten
+            integerString = adjustl(integerString)
+
+            ! Create the zone name. A distinction must be made between
+            ! periodic and physical boundaries.
+
+            if (periodic) then
+
+                zonename = "PeriodicBCZone"//trim(integerString)
+
+            else
+
+                ! True physcical boundary. A distinction is made between zones
+                ! that do and don't belong to a family. The basename of the
+                ! former boundaries is the family name, such that the entire
+                ! family can be easily selected in postprocessing software.
+
+                nn = cgnsDoms(zone)%bocoInfo(subface)%familyID
+                if (nn > 0) then
+
+                    ! Zone belongs to a family. Add the zone number to the
+                    ! family name for the zone name.
+
+                    zonename = trim(cgnsFamilies(nn)%familyName)// &
+                               trim(integerString)
+
+                else
+
+                    ! Zone does not belong to a family. The first part of the
+                    ! zone name depends on the boundary condition of the cgns
+                    ! subface.
+
+                    select case (cgnsDoms(zone)%bocoInfo(subface)%BCType)
+                    case (Symm)
+                        zonename = "Symmetry"
+                    case (SymmPolar)
+                        zonename = "SymmetryPolar"
+                    case (NSWallAdiabatic)
+                        zonename = "NSWallAdiabatic"
+                    case (NSWallIsothermal)
+                        zonename = "NSWallIsothermal"
+                    case (EulerWall)
+                        zonename = "EulerWall"
+                    case (FarField)
+                        zonename = "FarField"
+                    case (SupersonicInflow)
+                        zonename = "InflowSupersonic"
+                    case (SubsonicInflow)
+                        zonename = "InflowSubsonic"
+                    case (SupersonicOutflow)
+                        zonename = "OutflowSupersonic"
+                    case (SubsonicOutflow)
+                        zonename = "OutflowSubsonic"
+                    case (MassBleedInflow)
+                        zonename = "MassBleedInflow"
+                    case (MassBleedOutflow)
+                        zonename = "MassBleedOutflow"
+                    case (mDot)
+                        zonename = "MDot"
+                    case (bcThrust)
+                        zonename = "Thrust"
+                    case (SlidingInterface)
+                        zonename = "Sliding"
+                    case (OversetOuterBound)
+                        zonename = "Overlap"
+                    case (DomainInterfaceAll)
+                        zonename = "DomainAll"
+                    case (DomainInterfaceRhoUVW)
+                        zonename = "DomainRhoUVW"
+                    case (DomainInterfaceP)
+                        zonename = "DomainP"
+                    case (DomainInterfaceRho)
+                        zonename = "DomainRho"
+                    case (DomainInterfaceTotal)
+                        zonename = "DomainTotal"
+                    case default
+                        call terminate("createSurfaceZone", &
+                                       "Unknown boundary condition")
+                    end select
+
+                    ! Add the number to zone name to make it unique.
+
+                    zonename = trim(zoneName)//"BCZone"//trim(integerString)
+
+                end if
 
-            zonename = trim(zoneName)//"BCZone"//trim(integerString)
+            end if
 
-         endif
+            ! Create the 2D structured zone.
+
+            call cg_zone_write_f(cgnsInd, cgnsBase, zonename, sizes, &
+                                 Structured, cgnsZone, ierr)
+            if (ierr /= CG_OK) &
+                call terminate("createSurfaceZone", &
+                               "Something wrong when calling cg_zone_write_f")
+
+            ! Create the flow solution node.
+
+            call cg_sol_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                "Flow solution", CellCenter, cgnsSol, ierr)
+            if (ierr /= CG_OK) &
+                call terminate("createSurfaceZone", &
+                               "Something wrong when calling cg_sol_write_f")
+
+            ! Create the rind layers. If rind layers must be stored put
+            ! 1 layer on every side of the subface; otherwise put 0 layers.
+            ! Use sizes as a buffer to store the rind data. The rind data
+            ! must be created under the just created solution node.
+
+            call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
+                           cgnsZone, "FlowSolution_t", cgnsSol, "end")
+            if (ierr /= CG_OK) &
+                call terminate("createSurfaceZone", &
+                               "Something wrong when calling cg_goto_f")
+
+            if (storeRindLayer) then
+                sizes(1) = 1; sizes(2) = 1
+                sizes(3) = 1; sizes(4) = 1
+            else
+                sizes(1) = 0; sizes(2) = 0
+                sizes(3) = 0; sizes(4) = 0
+            end if
 
-      endif
-
-      ! Create the 2D structured zone.
+            call cg_rind_write_f(int(sizes, intType), ierr)
+            if (ierr /= CG_OK) &
+                call terminate("createSurfaceZone", &
+                               "Something wrong when calling cg_rind_write_f")
+
+        end subroutine createSurfaceZone
+
+        !        ================================================================
+
+        subroutine writeSurfaceCoord
+            !
+            !         WriteSurfaceCoord write the vertex values of the
+            !         coordinates to the given zone of the cgns surface solution
+            !         file.
+            !
+            use cgnsNames
+            use inputIO
+            implicit none
+            !
+            !        Local variables.
+            !
+            integer :: realTypeCGNS
+
+            integer(kind=intType) :: i, j, k, kk, ll, mm, ii, jj
+            integer(kind=intType) :: lk, lj, li
+            integer(kind=intType) :: sizeCGNSWriteType
+
+            real(kind=realType) :: LRefInv
+
+            real(kind=4), dimension(:), allocatable :: writeBuffer4
+            real(kind=8), dimension(:), allocatable :: writeBuffer8
+
+            ! Set the cgns real type depending on the input option.
+
+            ! Compute the multiplication factor to obtain the original
+            ! coordinates. Note that LRef is corrected to 1.0 when the
+            ! coordinates should be written in meters. This happens when
+            ! the grid is read.
+
+            LRefInv = one / cgnsDoms(zone)%LRef
+
+            ! Processor 0 does the writing and must therefore allocate the
+            ! writeBuffer.
+
+            if (myID == 0) then
+                mm = (zone_kEnd - zone_kBeg + 1) * (zone_jEnd - zone_jBeg + 1) * (zone_iEnd - zone_iBeg + 1)
+                select case (precisionSurfGrid)
+                case (precisionSingle)
+                    allocate (writeBuffer4(mm), writeBuffer8(0), stat=ierr)
+                case (precisionDouble)
+                    allocate (writeBuffer4(0), writeBuffer8(mm), stat=ierr)
+                end select
+                if (ierr /= 0) &
+                    call terminate("writeSurfaceCoord", &
+                                   "Memory allocation failure for writeBuffer")
+            end if
 
-      call cg_zone_write_f(cgnsInd, cgnsBase, zonename, sizes, &
-           Structured, cgnsZone, ierr)
-      if(ierr /= CG_OK)                    &
-           call terminate("createSurfaceZone", &
-           "Something wrong when calling cg_zone_write_f")
-
-      ! Create the flow solution node.
+            ! Loop over the three coordinates.
 
-      call cg_sol_write_f(cgnsInd, cgnsBase, cgnsZone, &
-           "Flow solution", CellCenter, cgnsSol, ierr)
-      if(ierr /= CG_OK)                    &
-           call terminate("createSurfaceZone", &
-           "Something wrong when calling cg_sol_write_f")
+            coorLoop: do mm = 1, 3
 
-      ! Create the rind layers. If rind layers must be stored put
-      ! 1 layer on every side of the subface; otherwise put 0 layers.
-      ! Use sizes as a buffer to store the rind data. The rind data
-      ! must be created under the just created solution node.
+                ! Loop over the number of blocks stored on this processor
+                ! which may contribute to the subface. Note that
+                ! blocksCGNSblock contain all the subblocks part of cgns
+                ! block, which is not the same. BlocksCGNSblock cannot be
+                ! changed, because it is needed for other subfaces.
 
-      call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
-           cgnsZone, "FlowSolution_t", cgnsSol, "end")
-      if(ierr /= CG_OK)                    &
-           call terminate("createSurfaceZone", &
-           "Something wrong when calling cg_goto_f")
-
-      if( storeRindLayer ) then
-         sizes(1) = 1; sizes(2) = 1
-         sizes(3) = 1; sizes(4) = 1
-      else
-         sizes(1) = 0; sizes(2) = 0
-         sizes(3) = 0; sizes(4) = 0
-      endif
-
-      call cg_rind_write_f(int(sizes, intType), ierr)
-      if(ierr /= CG_OK)                    &
-           call terminate("createSurfaceZone", &
-           "Something wrong when calling cg_rind_write_f")
-
-    end subroutine createSurfaceZone
-
-    !        ================================================================
-
-    subroutine writeSurfaceCoord
-      !
-      !         WriteSurfaceCoord write the vertex values of the
-      !         coordinates to the given zone of the cgns surface solution
-      !         file.
-      !
-      use cgnsNames
-      use inputIO
-      implicit none
-      !
-      !        Local variables.
-      !
-      integer :: realTypeCGNS
+                kk = 0
+                jj = 0
+                do ll = 1, nBlocks
 
-      integer(kind=intType) :: i, j, k, kk, ll, mm, ii, jj
-      integer(kind=intType) :: lk, lj, li
-      integer(kind=intType) :: sizeCGNSWriteType
+                    ! Test if the current local block contributes to the
+                    ! cgns subface to be written.
 
-      real(kind=realType) :: LRefInv
+                    if (contributeToFace(ll)) then
 
-      real(kind=4), dimension(:), allocatable :: writeBuffer4
-      real(kind=8), dimension(:), allocatable :: writeBuffer8
+                        ! Update the counter kk and store the local block id in ii.
 
-      ! Set the cgns real type depending on the input option.
+                        kk = kk + 1
+                        ii = blocksCGNSblock(ll + offset)
 
-      ! Compute the multiplication factor to obtain the original
-      ! coordinates. Note that LRef is corrected to 1.0 when the
-      ! coordinates should be written in meters. This happens when
-      ! the grid is read.
+                        ! Store the coordinate for this contribution in buffer.
+                        ! As nodalRange contains the nodal ranges in the
+                        ! original cgns block, the starting value of the current
+                        ! subblock must be substracted. These are the actual
+                        ! indices in the local block and are stored in
+                        ! lk, lj and li.
 
-      LRefInv = one/cgnsDoms(zone)%LRef
+                        do k = nodalRange(3, 1, kk), nodalRange(3, 2, kk)
+                            lk = k - flowDoms(ii, 1, ind)%kBegor + 1
+                            do j = nodalRange(2, 1, kk), nodalRange(2, 2, kk)
+                                lj = j - flowDoms(ii, 1, ind)%jBegor + 1
+                                do i = nodalRange(1, 1, kk), nodalRange(1, 2, kk)
+                                    li = i - flowDoms(ii, 1, ind)%iBegor + 1
 
-      ! Processor 0 does the writing and must therefore allocate the
-      ! writeBuffer.
+                                    ! Update the counter jj and store the coordinate
+                                    ! in buffer.
 
-      if(myID == 0) then
-         mm = (zone_kEnd-zone_kBeg+1) * (zone_jEnd-zone_jBeg+1) * (zone_iEnd-zone_iBeg+1)
-         select case (precisionSurfGrid)
-         case (precisionSingle)
-            allocate(writeBuffer4(mm), writeBuffer8(0), stat=ierr)
-         case (precisionDouble)
-            allocate(writeBuffer4(0), writeBuffer8(mm), stat=ierr)
-         end select
-         if(ierr /= 0)                         &
-              call terminate("writeSurfaceCoord", &
-              "Memory allocation failure for writeBuffer")
-      endif
-
-      ! Loop over the three coordinates.
-
-      coorLoop: do mm=1,3
-
-         ! Loop over the number of blocks stored on this processor
-         ! which may contribute to the subface. Note that
-         ! blocksCGNSblock contain all the subblocks part of cgns
-         ! block, which is not the same. BlocksCGNSblock cannot be
-         ! changed, because it is needed for other subfaces.
-
-         kk = 0
-         jj = 0
-         do ll=1,nBlocks
-
-            ! Test if the current local block contributes to the
-            ! cgns subface to be written.
-
-            if( contributeToFace(ll) ) then
-
-               ! Update the counter kk and store the local block id in ii.
-
-               kk = kk+1
-               ii = blocksCGNSblock(ll+offset)
-
-               ! Store the coordinate for this contribution in buffer.
-               ! As nodalRange contains the nodal ranges in the
-               ! original cgns block, the starting value of the current
-               ! subblock must be substracted. These are the actual
-               ! indices in the local block and are stored in
-               ! lk, lj and li.
-
-               do k=nodalRange(3,1,kk),nodalRange(3,2,kk)
-                  lk = k - flowDoms(ii,1,ind)%kBegor + 1
-                  do j=nodalRange(2,1,kk),nodalRange(2,2,kk)
-                     lj = j - flowDoms(ii,1,ind)%jBegor + 1
-                     do i=nodalRange(1,1,kk),nodalRange(1,2,kk)
-                        li = i - flowDoms(ii,1,ind)%iBegor + 1
-
-                        ! Update the counter jj and store the coordinate
-                        ! in buffer.
-
-                        jj = jj+1
-                        buffer(jj) = LRefInv &
-                             * flowDoms(ii,1,ind)%x(li,lj,lk,mm)
-                     enddo
-                  enddo
-               enddo
-
-            endif
-         enddo
-
-         ! Make a distinction between processor 0 and the other
-         ! processors. Processor 0 does all the writing.
-
-         rootproc: if(myID == 0) then
-
-            ! I am processor 0 and must do the writing.
-
-            ! Loop over the other processors and receive possible
-            ! messages.
-
-            do ll=2,nProc
-
-               ! Test if processor ll contributes to this subface.
-
-               if(nMessages(ll) > 0) then
-
-                  ! Receive the message. Note that size is an upper limit.
-                  ! Furthermore 1 must be substracted from ll to obtain
-                  ! the correct processor ID; the processor ID's start
-                  ! at 0.
-
-                  size   = il*jl - jj
-                  source = ll -1
-                  call mpi_recv(buffer(jj+1), size, adflow_real, source, &
-                       source, ADflow_comm_world, mpiStatus, ierr)
-
-                  ! Determine the true size of the message and update
-                  ! the counter jj accordingly.
-
-                  call mpi_get_count(mpiStatus, adflow_real, size, ierr)
-                  jj = jj + size
-               endif
-            enddo
-
-            ! Copy the coordinate from buffer into the correct place
-            ! in writeBuffer. The routine called depends on the
-            ! desired precision.
-
-            ii = 1
-            do kk=1,nSubfaces
-               select case (precisionSurfGrid)
-               case (precisionSingle)
-                  call copyDataBufSinglePrecision(writeBuffer4,      &
-                       buffer(ii),       &
-                       zone_iBeg, zone_jBeg, zone_kBeg, &
-                       zone_iEnd, zone_jEnd, zone_kEnd, &
-                       rangeNode(1,1,kk))
-               case (precisionDouble)
-                  call copyDataBufDoublePrecision(writeBuffer8,      &
-                       buffer(ii),       &
-                       zone_iBeg, zone_jBeg, zone_kBeg, &
-                       zone_iEnd, zone_jEnd, zone_kEnd, &
-                       rangeNode(1,1,kk))
-               end select
-
-               ! Update the counter ii for the next subface.
-
-               ii = ii + (rangeNode(1,2,kk) - rangeNode(1,1,kk) + 1) &
-                    *      (rangeNode(2,2,kk) - rangeNode(2,1,kk) + 1) &
-                    *      (rangeNode(3,2,kk) - rangeNode(3,1,kk) + 1)
-            enddo
-
-            ! Write the coordinates, depending on the situation.
-            ! In source the actual number is stored; normally this
-            ! is equal to mm.
-            select case (precisionSurfGrid)
-            case (precisionSingle)
-               select case (mm)
-               case (1_intType)
-                  call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
-                       realSingle, cgnsCoorX, writeBuffer4, source, ierr)
-               case (2_intType)
-                  call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
-                       realSingle, cgnsCoorY, writeBuffer4, source, ierr)
-               case (3_intType)
-                  call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
-                       realSingle, cgnsCoorZ, writeBuffer4, source, ierr)
-               end select
-
-            case (precisionDouble)
-               select case(mm)
-               case (1_intType)
-                  call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
-                       realDouble, cgnsCoorX, writeBuffer8, source, ierr)
-               case (2_intType)
-                  call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
-                       realDouble, cgnsCoorY, writeBuffer8, source, ierr)
-               case (3_intType)
-                  call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
-                       realDouble, cgnsCoorZ, writeBuffer8, source, ierr)
-               end select
-            end select
+                                    jj = jj + 1
+                                    buffer(jj) = LRefInv &
+                                                 * flowDoms(ii, 1, ind)%x(li, lj, lk, mm)
+                                end do
+                            end do
+                        end do
+
+                    end if
+                end do
+
+                ! Make a distinction between processor 0 and the other
+                ! processors. Processor 0 does all the writing.
+
+                rootproc: if (myID == 0) then
+
+                    ! I am processor 0 and must do the writing.
+
+                    ! Loop over the other processors and receive possible
+                    ! messages.
+
+                    do ll = 2, nProc
+
+                        ! Test if processor ll contributes to this subface.
+
+                        if (nMessages(ll) > 0) then
+
+                            ! Receive the message. Note that size is an upper limit.
+                            ! Furthermore 1 must be substracted from ll to obtain
+                            ! the correct processor ID; the processor ID's start
+                            ! at 0.
+
+                            size = il * jl - jj
+                            source = ll - 1
+                            call mpi_recv(buffer(jj + 1), size, adflow_real, source, &
+                                          source, ADflow_comm_world, mpiStatus, ierr)
+
+                            ! Determine the true size of the message and update
+                            ! the counter jj accordingly.
+
+                            call mpi_get_count(mpiStatus, adflow_real, size, ierr)
+                            jj = jj + size
+                        end if
+                    end do
+
+                    ! Copy the coordinate from buffer into the correct place
+                    ! in writeBuffer. The routine called depends on the
+                    ! desired precision.
+
+                    ii = 1
+                    do kk = 1, nSubfaces
+                        select case (precisionSurfGrid)
+                        case (precisionSingle)
+                            call copyDataBufSinglePrecision(writeBuffer4, &
+                                                            buffer(ii), &
+                                                            zone_iBeg, zone_jBeg, zone_kBeg, &
+                                                            zone_iEnd, zone_jEnd, zone_kEnd, &
+                                                            rangeNode(1, 1, kk))
+                        case (precisionDouble)
+                            call copyDataBufDoublePrecision(writeBuffer8, &
+                                                            buffer(ii), &
+                                                            zone_iBeg, zone_jBeg, zone_kBeg, &
+                                                            zone_iEnd, zone_jEnd, zone_kEnd, &
+                                                            rangeNode(1, 1, kk))
+                        end select
+
+                        ! Update the counter ii for the next subface.
+
+                        ii = ii + (rangeNode(1, 2, kk) - rangeNode(1, 1, kk) + 1) &
+                             * (rangeNode(2, 2, kk) - rangeNode(2, 1, kk) + 1) &
+                             * (rangeNode(3, 2, kk) - rangeNode(3, 1, kk) + 1)
+                    end do
+
+                    ! Write the coordinates, depending on the situation.
+                    ! In source the actual number is stored; normally this
+                    ! is equal to mm.
+                    select case (precisionSurfGrid)
+                    case (precisionSingle)
+                        select case (mm)
+                        case (1_intType)
+                            call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                                  realSingle, cgnsCoorX, writeBuffer4, source, ierr)
+                        case (2_intType)
+                            call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                                  realSingle, cgnsCoorY, writeBuffer4, source, ierr)
+                        case (3_intType)
+                            call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                                  realSingle, cgnsCoorZ, writeBuffer4, source, ierr)
+                        end select
+
+                    case (precisionDouble)
+                        select case (mm)
+                        case (1_intType)
+                            call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                                  realDouble, cgnsCoorX, writeBuffer8, source, ierr)
+                        case (2_intType)
+                            call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                                  realDouble, cgnsCoorY, writeBuffer8, source, ierr)
+                        case (3_intType)
+                            call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                                  realDouble, cgnsCoorZ, writeBuffer8, source, ierr)
+                        end select
+                    end select
+
+                    if (ierr /= CG_OK) &
+                         call terminate("writeSurfaceCoord", &
+                         "Something wrong when calling &
+                         &cg_coord_write_f")
+
+                    ! Write the units, if possible.
+
+                    if (cgnsDoms(zone)%gridUnitsSpecified) then
+
+                        ! Go to the correct place in the surface solution file.
+
+                        call cg_goto_f(cgnsInd, cgnsBase, ierr, &
+                                       "Zone_t", cgnsZone, &
+                                       "GridCoordinates_t", 1, &
+                                       "DataArray_t", source, "end")
+                        if (ierr /= CG_OK) &
+                            call terminate("writeSurfaceCoord", &
+                                           "Something wrong when calling cg_goto_f")
+
+                        ! Write the units.
+
+                        call cg_units_write_f(cgnsDoms(zone)%mass, &
+                                              cgnsDoms(zone)%len, &
+                                              cgnsDoms(zone)%time, &
+                                              cgnsDoms(zone)%temp, &
+                                              cgnsDoms(zone)%angle, ierr)
+                        if (ierr /= CG_OK) &
+                             call terminate("writeSurfaceCoord", &
+                             "Something wrong when calling &
+                             &cg_units_write_f")
+                    end if
+
+                    else rootproc
+
+                    ! Not the root processor.
+                    ! Data must be sent to processor 0 if local blocks
+                    ! contribute to the current cgns subface.
+
+                    if (jj > 0) then
+                        size = jj
+                        call mpi_send(buffer, size, adflow_real, 0, myID, &
+                                      ADflow_comm_world, ierr)
+                    end if
+
+                end if rootproc
+
+            end do coorLoop
+
+            ! Processor 0 must deallocate the writeBuffer.
+
+            if (myID == 0) then
+                deallocate (writeBuffer4, writebuffer8, stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("writeSurfaceCoord", &
+                                   "Deallocation error for writeBuffer")
+            end if
 
-            if(ierr /= CG_OK)                    &
-                 call terminate("writeSurfaceCoord", &
-                 "Something wrong when calling &
-                 &cg_coord_write_f")
+        end subroutine writeSurfaceCoord
 
-            ! Write the units, if possible.
+        !        ================================================================
 
-            if( cgnsDoms(zone)%gridUnitsSpecified ) then
+        subroutine writeSurfaceSol
+            !
+            !         writeSurfaceSol writes the cell centered surface solution
+            !         to the cgns surface file.
+            !
+            implicit none
+            !
+            !        Local variables.
+            !
+            integer :: realTypeCGNS
 
-               ! Go to the correct place in the surface solution file.
+            integer(kind=intType) :: ii, jj, kk, ll, mm
+            integer(kind=intType) :: iiBeg, jjBeg, kkBeg
+            integer(kind=intType) :: iiEnd, jjEnd, kkEnd
 
-               call cg_goto_f(cgnsInd, cgnsBase, ierr, &
-                    "Zone_t", cgnsZone,      &
-                    "GridCoordinates_t", 1,  &
-                    "DataArray_t", source, "end")
-               if(ierr /= CG_OK)                      &
-                    call terminate("writeSurfaceCoord", &
-                    "Something wrong when calling cg_goto_f")
+            real(kind=4), dimension(:), allocatable :: writeBuffer4
+            real(kind=8), dimension(:), allocatable :: writeBuffer8
 
-               ! Write the units.
+            ! Processor 0 does the writing and must therefore allocate the
+            ! writeBuffer.
 
-               call cg_units_write_f(cgnsDoms(zone)%mass, &
-                    cgnsDoms(zone)%len,  &
-                    cgnsDoms(zone)%time, &
-                    cgnsDoms(zone)%temp, &
-                    cgnsDoms(zone)%angle, ierr)
-               if(ierr /= CG_OK)                    &
-                    call terminate("writeSurfaceCoord", &
-                    "Something wrong when calling &
-                    &cg_units_write_f")
-            endif
-
-         else rootproc
+            if (myID == 0) then
 
-            ! Not the root processor.
-            ! Data must be sent to processor 0 if local blocks
-            ! contribute to the current cgns subface.
-
-            if( jj > 0 ) then
-               size = jj
-               call mpi_send(buffer, size, adflow_real, 0, myID, &
-                    ADflow_comm_world, ierr)
-            endif
-
-         endif rootproc
-
-      enddo coorLoop
-
-      ! Processor 0 must deallocate the writeBuffer.
-
-      if(myID == 0) then
-         deallocate(writeBuffer4, writebuffer8, stat=ierr)
-         if(ierr /= 0)                         &
-              call terminate("writeSurfaceCoord", &
-              "Deallocation error for writeBuffer")
-      endif
+                iiBeg = rangeCell(1, 1, 1); iiEnd = rangeCell(1, 2, 1)
+                jjBeg = rangeCell(2, 1, 1); jjEnd = rangeCell(2, 2, 1)
+                kkBeg = rangeCell(3, 1, 1); kkEnd = rangeCell(3, 2, 1)
+                do ll = 2, nSubfaces
+                    iiBeg = min(iiBeg, rangeCell(1, 1, ll))
+                    jjBeg = min(jjBeg, rangeCell(2, 1, ll))
+                    kkBeg = min(kkBeg, rangeCell(3, 1, ll))
 
-    end subroutine writeSurfaceCoord
+                    iiEnd = max(iiEnd, rangeCell(1, 2, ll))
+                    jjEnd = max(jjEnd, rangeCell(2, 2, ll))
+                    kkEnd = max(kkEnd, rangeCell(3, 2, ll))
+                end do
 
-    !        ================================================================
+                mm = (kkEnd - kkBeg + 1) * (jjEnd - jjBeg + 1) * (iiEnd - iiBeg + 1)
+                select case (precisionSurfSol)
+                case (precisionSingle)
+                    allocate (writeBuffer4(mm), writeBuffer8(0), stat=ierr)
+                case (precisionDouble)
+                    allocate (writeBuffer4(0), writeBuffer8(mm), stat=ierr)
+                end select
+                if (ierr /= 0) &
+                    call terminate("writeSurfaceSol", &
+                                   "Memory allocation failure for writeBuffer")
+            end if
 
-    subroutine writeSurfaceSol
-      !
-      !         writeSurfaceSol writes the cell centered surface solution
-      !         to the cgns surface file.
-      !
-      implicit none
-      !
-      !        Local variables.
-      !
-      integer :: realTypeCGNS
+            ! Loop over the number of solution variables.
 
-      integer(kind=intType) :: ii, jj, kk, ll, mm
-      integer(kind=intType) :: iiBeg, jjBeg, kkBeg
-      integer(kind=intType) :: iiEnd, jjEnd, kkEnd
+            solLoop: do mm = 1, nSolVar
 
-      real(kind=4), dimension(:), allocatable :: writeBuffer4
-      real(kind=8), dimension(:), allocatable :: writeBuffer8
+                ! Loop over the number of blocks stored on this processor
+                ! which may contribute to the subface. Note that
+                ! blocksCGNSblock contain all the subblocks part of cgns
+                ! block, which is not the same. BlocksCGNSblock cannot be
+                ! changed, because it is needed for other subfaces.
 
-      ! Processor 0 does the writing and must therefore allocate the
-      ! writeBuffer.
+                kk = 0
+                jj = 0
+                do ll = 1, nBlocks
 
-      if(myID == 0) then
+                    ! Test if the current local block contributes to the
+                    ! cgns subface to be written.
 
-         iiBeg = rangeCell(1,1,1); iiEnd = rangeCell(1,2,1)
-         jjBeg = rangeCell(2,1,1); jjEnd = rangeCell(2,2,1)
-         kkBeg = rangeCell(3,1,1); kkEnd = rangeCell(3,2,1)
-         do ll=2,nSubfaces
-            iiBeg = min(iiBeg,rangeCell(1,1,ll))
-            jjBeg = min(jjBeg,rangeCell(2,1,ll))
-            kkBeg = min(kkBeg,rangeCell(3,1,ll))
-
-            iiEnd = max(iiEnd,rangeCell(1,2,ll))
-            jjEnd = max(jjEnd,rangeCell(2,2,ll))
-            kkEnd = max(kkEnd,rangeCell(3,2,ll))
-         enddo
-
-         mm = (kkEnd-kkBeg+1) * (jjEnd-jjBeg+1) * (iiEnd-iiBeg+1)
-         select case (precisionSurfSol)
-         case (precisionSingle)
-            allocate(writeBuffer4(mm), writeBuffer8(0), stat=ierr)
-         case (precisionDouble)
-            allocate(writeBuffer4(0), writeBuffer8(mm), stat=ierr)
-         end select
-         if(ierr /= 0)                       &
-              call terminate("writeSurfaceSol", &
-              "Memory allocation failure for writeBuffer")
-      endif
-
-      ! Loop over the number of solution variables.
-
-      solLoop: do mm=1,nSolVar
-
-         ! Loop over the number of blocks stored on this processor
-         ! which may contribute to the subface. Note that
-         ! blocksCGNSblock contain all the subblocks part of cgns
-         ! block, which is not the same. BlocksCGNSblock cannot be
-         ! changed, because it is needed for other subfaces.
-
-         kk = 0
-         jj = 0
-         do ll=1,nBlocks
-
-            ! Test if the current local block contributes to the
-            ! cgns subface to be written.
-
-            if( contributeToFace(ll) ) then
-
-               ! Update the counter kk and store the local block id in ii.
-
-               kk = kk+1
-               ii = blocksCGNSblock(ll+offset)
-
-               ! Store the surface solution for this contribution in
-               ! buffer. Note that the counter jj is updated in the
-               ! routine storeSurfsolInBuffer.
-               call storeSurfsolInBuffer(ind, buffer, jj, ii,       &
-                    faceID, cellRange(1,1,kk), &
-                    solNames(mm),              &
-                    viscousSubface, storeRindLayer,&
-                    subface_iBeg, subface_iEnd, subface_jBeg, subface_jEnd)
-            endif
-         enddo
-
-         ! Make a distinction between processor 0 and the other
-         ! processors. Processor 0 does all the writing.
-
-         rootproc: if(myID == 0) then
-
-            ! I am processor 0 and must do the writing.
-
-            ! Loop over the other processors and receive possible
-            ! messages.
-
-            do ll=2,nProc
-
-               ! Test if processor ll contributes to this subface.
-
-               if(nMessages(ll) > 0) then
-
-                  ! Receive the message. Note that size is an upper limit.
-                  ! Furthermore 1 must be substracted from ll to obtain
-                  ! the correct processor id; the processor id's start
-                  ! at 0.
-
-                  size   = (il+1)*(jl+1) - jj
-                  source = ll -1
-                  call mpi_recv(buffer(jj+1), size, adflow_real, source, &
-                       source, ADflow_comm_world, mpiStatus, ierr)
-
-                  ! Determine the true size of the message and update
-                  ! the counter jj accordingly.
-
-                  call mpi_get_count(mpiStatus, adflow_real, size, ierr)
-                  jj = jj + size
-               endif
-            enddo
-
-            ! Copy the variable from buffer into the correct place
-            ! in writeBuffer. The routine called depends on the
-            ! desired precision.
-
-            ii = 1
-            do kk=1,nSubfaces
-               select case (precisionSurfSol)
-               case (precisionSingle)
-                  call copyDataBufSinglePrecision(writeBuffer4,        &
-                       buffer(ii),          &
-                       iiBeg, jjBeg, kkBeg, &
-                       iiEnd, jjEnd, kkEnd, &
-                       rangeCell(1,1,kk))
-               case (precisionDouble)
-                  call copyDataBufDoublePrecision(writeBuffer8,        &
-                       buffer(ii),          &
-                       iiBeg, jjBeg, kkBeg, &
-                       iiEnd, jjEnd, kkEnd, &
-                       rangeCell(1,1,kk))
-               end select
-
-               ! Update the counter ii for the next subface.
-
-               ii = ii + (rangeCell(1,2,kk) - rangeCell(1,1,kk) + 1) &
-                    *      (rangeCell(2,2,kk) - rangeCell(2,1,kk) + 1) &
-                    *      (rangeCell(3,2,kk) - rangeCell(3,1,kk) + 1)
-            enddo
-
-            ! Write the solution variable to file. Source is just used
-            ! as a dummy variable and does not have a meaning.
-            select case(precisionSurfSol)
-            case (precisionSingle)
-               call cg_field_write_f(cgnsInd, cgnsBase, cgnsZone, &
-                    cgnsSol, realSingle, solNames(mm), writeBuffer4, &
-                    source, ierr)
-            case (precisionDouble)
-               call cg_field_write_f(cgnsInd, cgnsBase, cgnsZone, &
-                    cgnsSol, realDouble, solNames(mm), writeBuffer8, &
-                    source, ierr)
-            end select
-            if(ierr /= 0)           then
-               call terminate("writeSolCGNSZone", &
-                    "Something wrong when &
-                    &calling cg_field_write_f")
+                    if (contributeToFace(ll)) then
+
+                        ! Update the counter kk and store the local block id in ii.
+
+                        kk = kk + 1
+                        ii = blocksCGNSblock(ll + offset)
+
+                        ! Store the surface solution for this contribution in
+                        ! buffer. Note that the counter jj is updated in the
+                        ! routine storeSurfsolInBuffer.
+                        call storeSurfsolInBuffer(ind, buffer, jj, ii, &
+                                                  faceID, cellRange(1, 1, kk), &
+                                                  solNames(mm), &
+                                                  viscousSubface, storeRindLayer, &
+                                                  subface_iBeg, subface_iEnd, subface_jBeg, subface_jEnd)
+                    end if
+                end do
+
+                ! Make a distinction between processor 0 and the other
+                ! processors. Processor 0 does all the writing.
+
+                rootproc: if (myID == 0) then
+
+                    ! I am processor 0 and must do the writing.
+
+                    ! Loop over the other processors and receive possible
+                    ! messages.
+
+                    do ll = 2, nProc
+
+                        ! Test if processor ll contributes to this subface.
+
+                        if (nMessages(ll) > 0) then
+
+                            ! Receive the message. Note that size is an upper limit.
+                            ! Furthermore 1 must be substracted from ll to obtain
+                            ! the correct processor id; the processor id's start
+                            ! at 0.
+
+                            size = (il + 1) * (jl + 1) - jj
+                            source = ll - 1
+                            call mpi_recv(buffer(jj + 1), size, adflow_real, source, &
+                                          source, ADflow_comm_world, mpiStatus, ierr)
+
+                            ! Determine the true size of the message and update
+                            ! the counter jj accordingly.
+
+                            call mpi_get_count(mpiStatus, adflow_real, size, ierr)
+                            jj = jj + size
+                        end if
+                    end do
+
+                    ! Copy the variable from buffer into the correct place
+                    ! in writeBuffer. The routine called depends on the
+                    ! desired precision.
+
+                    ii = 1
+                    do kk = 1, nSubfaces
+                        select case (precisionSurfSol)
+                        case (precisionSingle)
+                            call copyDataBufSinglePrecision(writeBuffer4, &
+                                                            buffer(ii), &
+                                                            iiBeg, jjBeg, kkBeg, &
+                                                            iiEnd, jjEnd, kkEnd, &
+                                                            rangeCell(1, 1, kk))
+                        case (precisionDouble)
+                            call copyDataBufDoublePrecision(writeBuffer8, &
+                                                            buffer(ii), &
+                                                            iiBeg, jjBeg, kkBeg, &
+                                                            iiEnd, jjEnd, kkEnd, &
+                                                            rangeCell(1, 1, kk))
+                        end select
+
+                        ! Update the counter ii for the next subface.
+
+                        ii = ii + (rangeCell(1, 2, kk) - rangeCell(1, 1, kk) + 1) &
+                             * (rangeCell(2, 2, kk) - rangeCell(2, 1, kk) + 1) &
+                             * (rangeCell(3, 2, kk) - rangeCell(3, 1, kk) + 1)
+                    end do
+
+                    ! Write the solution variable to file. Source is just used
+                    ! as a dummy variable and does not have a meaning.
+                    select case (precisionSurfSol)
+                    case (precisionSingle)
+                        call cg_field_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                              cgnsSol, realSingle, solNames(mm), writeBuffer4, &
+                                              source, ierr)
+                    case (precisionDouble)
+                        call cg_field_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                              cgnsSol, realDouble, solNames(mm), writeBuffer8, &
+                                              source, ierr)
+                    end select
+                    if (ierr /= 0) then
+                        call terminate("writeSolCGNSZone", &
+                             "Something wrong when &
+                             &calling cg_field_write_f")
+                    end if
+
+                    else rootproc
+
+                    ! Not the root processor.
+                    ! Data must be sent to processor 0 if local blocks
+                    ! contribute to the current cgns subface.
+
+                    if (jj > 0) then
+                        size = jj
+                        call mpi_send(buffer, size, adflow_real, 0, myID, &
+                                      ADflow_comm_world, ierr)
+                    end if
+
+                end if rootproc
+
+            end do solLoop
+
+            ! Processor 0 must deallocate the writeBuffer.
+
+            if (myID == 0) then
+                deallocate (writeBuffer4, writeBuffer8, stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("writeSurfaceSol", &
+                                   "Deallocation error for writeBuffer")
+            end if
+
+        end subroutine writeSurfaceSol
+    end subroutine writeSurfsolCGNSZone
+
+    subroutine writeIsoSurface(isoName, sps, nIsoSurfVar, isoSurfSolNames)
+
+        ! Implements a marching cubes algrorithm which can be used to
+        ! extract iso surfaces or slcies from a solution and store them in a
+        ! CGNS surface file.
+
+        use communication
+        use blockPointers
+        use flowVarRefState
+        use inputPhysics
+        use su_cgns
+        use inputIO
+        use outputMod
+        use cgnsNames
+        use utils, only: reallocateReal2, setPointers, reallocateinteger2, &
+                         terminate, EChk, pointReduce
+        implicit none
+
+        ! Input param
+        character(len=*), intent(in) :: isoName
+        integer(kind=intType) :: sps
+        integer(kind=intType) :: nIsoSurfVar
+        character(len=*), dimension(*), intent(in) :: isoSurfSolNames
+        ! Working param
+        integer(kind=intType) :: i, j, k, nn, kk, nMax, icon, iCoor, indexCube, num1, num2
+        real(kind=realType), dimension(:, :, :), pointer :: fn
+        real(kind=realType), dimension(:, :), pointer :: coords, uniqueCoords, weights
+        real(kind=realType), dimension(:), allocatable :: buffer
+
+        integer(kind=intType), dimension(:, :), pointer :: indices
+        integer(kind=intType), dimension(:, :), allocatable :: connBuffer, allConn
+        integer(kind=intType), dimension(:), allocatable :: link, nPtsProc, nConnProc
+
+        integer(kind=intType) :: ccwOrdering(3, 8), n1(3), n2(3)
+        integer(kind=intType) :: matCon1(256, 16), matCon2(12, 2), nUnique, ivar
+        integer(kind=intType) :: ierr, iProc, tag, cumNodes, cumConn, iCorner, curBlock, idim
+        real(kind=realType) :: f(8)
+
+        logical :: logic1
+        integer, dimension(mpi_status_size) :: mpiStatus
+        integer(kind=intType) :: cgnsInd, cgnsBase, cgnsZOne, coordID, secID, solID, fieldID
+        real(kind=realType) :: tol = 1e-8 ! Node tol for isosurf pointReduce
+
+        ! Fill up the connecivity matrices
+        call getMatCons(matCon1, matCon2, ccwOrdering)
+
+        ! Generate a guess for the size of the iso surface -- sum of the
+        ! number of nodes on the faces
+        nMax = 0
+        do nn = 1, nDom
+            call setPointers(nn, 1, sps)
+            nMax = nMax + 2 * il * jl + 2 * il * kl + 2 * jl * kl
+        end do
+
+        ! Allocate size nMax
+        allocate (weights(2, nMax), indices(7, nMax))
+
+        iCoor = 0
+        do nn = 1, nDom
+            call setPointers(nn, 1, sps)
+            fn => flowDoms(nn, 1, sps)%fn
+            ! Now loop over the Cells:
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+
+                        ! Extract value at the corners
+                        do iCorner = 1, 8
+                            f(iCorner) = fn(i + ccwOrdering(1, iCorner), &
+                                            j + ccwOrdering(2, iCorner), &
+                                            k + ccwOrdering(3, iCorner))
+                        end do
+
+                        ! Based on the values at each corner, determine which
+                        ! type surface we have
+                        indexcube = 1
+                        if (f(1) .lt. zero) indexcube = indexcube + 1
+                        if (f(2) .lt. zero) indexcube = indexcube + 2
+                        if (f(3) .lt. zero) indexcube = indexcube + 4
+                        if (f(4) .lt. zero) indexcube = indexcube + 8
+                        if (f(5) .lt. zero) indexcube = indexcube + 16
+                        if (f(6) .lt. zero) indexcube = indexcube + 32
+                        if (f(7) .lt. zero) indexcube = indexcube + 64
+                        if (f(8) .lt. zero) indexcube = indexcube + 128
+
+                        logic1 = .true.
+
+                        kk = 1
+                        do while (logic1)
+                            icon = matcon1(indexcube, kk)
+
+                            if (icon == 0) then
+                                logic1 = .false.
+                            else
+
+                                iCoor = iCoor + 1
+                                if (iCoor > nMax) then
+                                    ! Need to realloc the coord array. Make it double the size
+                                    call reallocateReal2(weights, 2, 2 * nMax, 2, nMax, .true.)
+                                    call reallocateInteger2(indices, 7, 2 * nMax, 7, nMax, .true.)
+                                    nMax = nMax * 2
+                                end if
+
+                                num1 = matcon2(icon, 1)
+                                num2 = matcon2(icon, 2)
+
+                                ! Weight factors
+                                weights(2, iCoor) = (zero - f(num1)) / (f(num2) - f(num1))
+                                weights(1, iCoor) = one - weights(2, icoor)
+
+                                ! Indices of nodes
+                                n1 = (/i, j, k/) + ccwOrdering(:, num1)
+                                n2 = (/i, j, k/) + ccwOrdering(:, num2)
+                                indices(:, iCoor) = (/nn, n1(1), n1(2), n1(3), n2(1), n2(2), n2(3)/)
+
+                                kk = kk + 1
+                            end if
+                        end do
+
+                    end do ! I loop
+                end do ! J loop
+            end do ! K loop
+        end do ! Domain loop
+
+        ! We have not actually stored the coordintes; only the positions and
+        ! the weights. To compute the coordinates we pass back through and assemble
+        allocate (Coords(3, iCoor))
+
+        ! Set pointer to first block
+        call setPointers(1, 1, sps)
+        curBlock = 1
+        do i = 1, iCoor
+
+            ! If we've switched blocks, reset points. This stil only calls
+            ! setPointer nDom times since there are at most that many
+            ! switches
+            if (indices(1, i) /= curBlock) then
+                call setPointers(indices(1, i), 1, sps)
+                curBlock = indices(1, i)
             end if
 
-         else rootproc
-
-            ! Not the root processor.
-            ! Data must be sent to processor 0 if local blocks
-            ! contribute to the current cgns subface.
-
-            if( jj > 0 ) then
-               size = jj
-               call mpi_send(buffer, size, adflow_real, 0, myID, &
-                    ADflow_comm_world, ierr)
-            endif
-
-         endif rootproc
-
-      enddo solLoop
-
-      ! Processor 0 must deallocate the writeBuffer.
-
-      if(myID == 0) then
-         deallocate(writeBuffer4, writeBuffer8, stat=ierr)
-         if(ierr /= 0)                         &
-              call terminate("writeSurfaceSol", &
-              "Deallocation error for writeBuffer")
-      endif
-
-    end subroutine writeSurfaceSol
-  end subroutine writeSurfsolCGNSZone
-
-  subroutine writeIsoSurface(isoName , sps, nIsoSurfVar, isoSurfSolNames)
-
-    ! Implements a marching cubes algrorithm which can be used to
-    ! extract iso surfaces or slcies from a solution and store them in a
-    ! CGNS surface file.
-
-    use communication
-    use blockPointers
-    use flowVarRefState
-    use inputPhysics
-    use su_cgns
-    use inputIO
-    use outputMod
-    use cgnsNames
-    use utils, only : reallocateReal2, setPointers, reallocateinteger2, &
-         terminate, EChk, pointReduce
-    implicit none
-
-    ! Input param
-    character(len=*), intent(in)                   :: isoName
-    integer(kind=intType) :: sps
-    integer(kind=intType) :: nIsoSurfVar
-    character(len=*), dimension(*), intent(in) :: isoSurfSolNames
-    ! Working param
-    integer(kind=intType) :: i, j, k, nn, kk, nMax, icon, iCoor, indexCube, num1, num2
-    real(kind=realType), dimension(:, :, :), pointer :: fn
-    real(kind=realType), dimension(:, :), pointer :: coords, uniqueCoords, weights
-    real(kind=realType), dimension(:), allocatable :: buffer
-
-    integer(kind=intType), dimension(:, :), pointer :: indices
-    integer(kind=intType), dimension(:, :), allocatable :: connBuffer, allConn
-    integer(kind=intType), dimension(:), allocatable :: link, nPtsProc, nConnProc
-
-    integer(kind=intType) :: ccwOrdering(3, 8), n1(3), n2(3)
-    integer(kind=intType) :: matCon1(256, 16), matCon2(12,2), nUnique, ivar
-    integer(kind=intType) :: ierr, iProc, tag, cumNodes, cumConn, iCorner, curBlock, idim
-    real(kind=realType) :: f(8)
-
-    logical :: logic1
-    integer, dimension(mpi_status_size) :: mpiStatus
-    integer(kind=intType) :: cgnsInd, cgnsBase, cgnsZOne, coordID, secID, solID, fieldID
-    real(kind=realType) :: tol=1e-8 ! Node tol for isosurf pointReduce
-
-    ! Fill up the connecivity matrices
-    call getMatCons(matCon1, matCon2, ccwOrdering)
-
-    ! Generate a guess for the size of the iso surface -- sum of the
-    ! number of nodes on the faces
-    nMax = 0
-    do nn=1, nDom
-       call setPointers(nn, 1, sps)
-       nMax = nMax + 2*il*jl + 2*il*kl + 2*jl*kl
-    end do
-
-    ! Allocate size nMax
-    allocate(weights(2, nMax), indices(7, nMax))
-
-    iCoor = 0
-    do nn=1, nDom
-       call setPointers(nn, 1, sps)
-       fn => flowDoms(nn, 1, sps)%fn
-       ! Now loop over the Cells:
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-
-                ! Extract value at the corners
-                do iCorner=1,8
-                   f(iCorner) = fn(i+ccwOrdering(1, iCorner), &
-                        j + ccwOrdering(2, iCorner), &
-                        k + ccwOrdering(3, iCorner))
+            ! Computing coordinates is easy; we just juse the weights and the
+            ! indices on x
+            do idim = 1, 3
+                coords(idim, i) = &
+                    weights(1, i) * &
+                    X(indices(2, i), indices(3, i), indices(4, i), idim) + &
+                    weights(2, i) * &
+                    X(indices(5, i), indices(6, i), indices(7, i), idim)
+            end do
+        end do
+
+        ! Now we know the maximum number of coordinates so we can allocate
+        ! the unique set and the link array
+        allocate (uniqueCoords(3, icoor))
+        allocate (link(icoor))
+
+        ! Compute the reduced set of coordinates. The sole purpose of this
+        ! is to reduce the filesize. This will typicaly reduce the number of
+        ! coordinates by about a factor of 4.
+
+        call pointReduce(coords, iCoor, tol, uniqueCoords, link, nUnique)
+
+        ! Now that we have produced the desired isosurface on each
+        ! processor. Communicate the number of number of coordinates and the
+        ! number of triangles each proc is going to send to the root:
+
+        allocate (nPtsProc(nProc), nConnProc(nProc))
+        nPtsProc(:) = 0_intType
+        nConnProc(:) = 0_intType
+
+        call MPI_Allgather(nUnique, 1, mpi_integer4, nPtsProc, 1, mpi_integer4, &
+                           adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call MPI_Allgather(iCoor / 3, 1, mpi_integer4, nConnProc, 1, mpi_integer4, &
+                           adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (sum(nPtsProc) > 0) then
+
+            if (myid == 0) then ! Root proc does the writing
+
+                ! Write a new zone:
+                cgnsInd = fileIDs(sps)
+                cgnsBase = cgnsIsoSurfBases(sps)
+
+                ! Write the unstructured zone
+                call cg_zone_write_f(cgnsInd, cgnsBase, isoName, int((/sum(nPtsProc), sum(nConnProc), 0/), cgsize_t), &
+                                     Unstructured, cgnsZone, ierr)
+                if (ierr .eq. CG_ERROR) call cg_error_exit_f
+
+                if (ierr /= CG_OK) &
+                    call terminate("writeIsoSurface", &
+                                   "Something wrong when calling cg_zone_write_f")
+            end if
+
+        else
+            if (myid == 0) then
+                ! We don't actually have an isosurface. We will create a zone
+                ! that the same structure, but contains only a single triangle
+                ! with all the coordinates at zero. This way the zone still
+                ! exists and yields a uniform structure which can make
+                ! processing easier
+
+                ! Write a new zone:
+                cgnsInd = fileIDs(sps)
+                cgnsBase = cgnsIsoSurfBases(sps)
+
+                call writeEmptyZone
+
+            end if
+            ! Don't forget to deallocate the stuff allocated so far:
+            deallocate (nPtsProc, nConnProc, link, uniqueCoords, coords, weights, indices)
+            return
+        end if
+
+        ! We need to keep track of the cumulative number of nodes since each
+        ! proc has done its own ordering
+        cumNodes = 0
+
+        ! Communicate and write the coordinates
+        do iproc = 0, nProc - 1
+
+            dataOnProc: if (myid == iproc) then
+                allocate (buffer(3 * nPtsProc(iProc + 1)))
+
+                ! We will swap the order of the coordinates to packed format
+                ! since this is what we need for CGNS
+
+                do i = 1, nPtsProc(iProc + 1)
+                    buffer(i) = uniqueCoords(1, i)
+                    buffer(1 * nPtsProc(iProc + 1) + i) = uniqueCoords(2, i)
+                    buffer(2 * nPtsProc(iProc + 1) + i) = uniqueCoords(3, i)
+                end do
+
+            end if dataOnProc
+
+            if (iproc .ne. 0) then
+                tag = 13
+                if (myid == 0) then
+                    ! allocate space for the recv
+                    allocate (buffer(3 * nPtsProc(iProc + 1)))
+
+                    call mpi_recv(buffer, nPtsProc(iProc + 1) * 3, adflow_real, iProc, tag, &
+                                  adflow_comm_world, mpiStatus, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end if
+
+                if (myid == iProc) then
+                    call mpi_send(buffer, nPtsProc(iProc + 1) * 3, adflow_real, 0, tag, &
+                                  adflow_comm_world, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end if
+            end if
+
+            if (myid == 0) then
+                ! Now do partial writes on the root proc with points we've
+                ! received from iProc
+                if (nPtsProc(iProc + 1) > 0) then
+
+                    call cg_coord_partial_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
+                                                  'CoordinateX', cumNodes + 1, cumNodes + nPtsProc(iProc + 1), &
+                                                  buffer(1:nPtsProc(iProc + 1)), coordID, ierr)
+
+                    call cg_coord_partial_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
+                                                  'CoordinateY', cumNodes + 1, cumNodes + nPtsProc(iProc + 1), &
+                                                 buffer(nPtsProc(iProc + 1) + 1:2 * nPtsProc(iProc + 1)), coordID, ierr)
+
+                    call cg_coord_partial_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
+                                                  'CoordinateZ', cumNodes + 1, cumNodes + nPtsProc(iProc + 1), &
+                                             buffer(2 * nPtsProc(iProc + 1) + 1:3 * nPtsProc(iProc + 1)), coordID, ierr)
+
+                    if (ierr /= CG_OK) &
+                        call terminate("writeIsoSurface", &
+                                       "Something wrong when calling cg_coord_write_f")
+
+                    ! Increment by the number of nodes on this proc
+                    cumNodes = cumNodes + nPtsProc(iProc + 1)
+                end if
+            end if
+
+            ! Buffer was only allocated on root and current iProc
+            if (myid == iProc .or. myid == 0) then
+                deallocate (buffer)
+            end if
+        end do
+
+        ! We need to keep track of the cumulative number of nodes since each
+        ! proc has done its own ordering
+        cumNodes = 0
+        cumConn = 0
+
+        ! The partial write functionality is different between versions 2.5
+        ! and 3.1, so we will just gather all the connectivities and do a
+        ! final write at the end
+        if (myid == 0) then
+            allocate (allConn(3, sum(nConnProc)))
+        end if
+
+        ! Communicate and write the connectivity
+        do iProc = 0, nProc - 1
+            connOnProc: if (myid == iProc) then
+                allocate (connBuffer(3, nConnProc(iProc + 1)))
+                do i = 1, nConnProc(iProc + 1)
+                    connBuffer(1, i) = link(3 * i - 2)
+                    connBuffer(2, i) = link(3 * i - 1)
+                    connBuffer(3, i) = link(3 * i)
+                end do
+            end if connOnProc
+
+            ! Communication is only necessary if we are not dealing with root
+            ! proc:
+            if (iproc .ne. 0) then
+                tag = 13
+                if (myid == 0) then
+                    ! allocate space for the recv
+                    allocate (connBuffer(3, nConnProc(iProc + 1)))
+                    call mpi_recv(connBuffer, nConnProc(iProc + 1) * 3, adflow_integer, iProc, tag, &
+                                  adflow_comm_world, mpiStatus, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end if
+
+                if (myid == iProc) then
+                    call mpi_send(connBuffer, nConnProc(iProc + 1) * 3, adflow_integer, 0, tag, &
+                                  adflow_comm_world, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end if
+            end if
+
+            if (myid == 0) then
+
+                ! Copy into the allCon array and increment the received
+                ! local connectivity by cummNodes
+
+                do i = 1, nConnProc(iProc + 1)
+                    allConn(:, cumConn + i) = connBuffer(:, i) + cumNodes
+                end do
+
+                cumNodes = cumNodes + nPtsProc(iProc + 1)
+                cumConn = cumConn + nConnProc(iProc + 1)
+            end if
+
+            ! Buffer was only allocated on roto and iProc
+            if (myid == iProc .or. myid == 0) then
+                deallocate (connBuffer)
+            end if
+        end do
+
+        ! Finally do the (full) connectivity write
+        if (myid == 0) then
+            ! Now write on root proc:
+
+            ! Write just the connectively we have in buffer
+            call cg_section_write_f(cgnsInd, cgnsBase, cgnsZone, "ELEM", TRI_3, &
+                                    1, sum(nConnProc), 0, allConn, secID, ierr)
+            if (ierr /= CG_OK) &
+                call terminate("writeIsoSurface", &
+                               "Something wrong when calling cg_section_partial_write_f")
+
+            ! Also free allConn
+            deallocate (allConn)
+        end if
+
+        ! Finally we have to write solution data for the iso surface
+        ! iself. The main reason is that the same code is used for "slices"
+        ! as well and in that case, you want to have other data interpolated
+        ! on the "isoSurafce" (slice)
+
+        ! Write the solution node:
+        if (myid == 0) then
+            call cg_sol_write_f(cgnsInd, cgnsBase, cgnsZone, "isoSurfSolution", Vertex, solID, ierr)
+            if (ierr /= CG_OK) &
+                call terminate("writeIsoSurface", &
+                               "Something wrong when calling cg_sol_write_f")
+        end if
+
+        ! Make the buffer large enough
+        allocate (buffer(maxval(nPtsProc)))
+
+        ! Loop over variables to write:
+        do iVar = 1, nIsoSurfVar
+
+            ! We will reuse the same code as was used for computing the value
+            ! onwhich we did the interpolation. However, set 'zero' for the
+            ! isovalue such that we get the true value back
+
+            call computeIsoVariable(isoSurfSolNames(iVar), sps, zero)
+
+            ! Set points to first block:
+            call setPointers(1, 1, sps)
+            curBlock = 1
+            fn => flowDoms(1, 1, sps)%fn
+
+            do i = 1, iCoor
+
+                ! If we've switched blocks, reset points. This stil only calls
+                ! setPointer nDom times since there are at most that many
+                ! switches
+                if (indices(1, i) /= curBlock) then
+                    call setPointers(indices(1, i), 1, sps)
+                    curBlock = indices(1, i)
+                    fn => flowDoms(curBlock, 1, sps)%fn
+                end if
+
+                ! Computing interpolated value is easy using weights:
+                ! indices on x
+                buffer(link(i)) = weights(1, i) * fn(indices(2, i), indices(3, i), indices(4, i)) + &
+                                  weights(2, i) * fn(indices(5, i), indices(6, i), indices(7, i))
+            end do
+
+            cumNodes = 0
+            ! Communicate and write the solutions
+            do iproc = 0, nProc - 1
+
+                if (iproc .ne. 0) then
+                    tag = 13
+                    if (myid == 0) then
+                        call mpi_recv(buffer, nPtsProc(iProc + 1), adflow_real, iProc, tag, &
+                                      adflow_comm_world, mpiStatus, ierr)
+                        call EChk(ierr, __FILE__, __LINE__)
+                    end if
+
+                    if (myid == iProc) then
+                        call mpi_send(buffer, nPtsProc(iProc + 1), adflow_real, 0, tag, &
+                                      adflow_comm_world, ierr)
+                        call EChk(ierr, __FILE__, __LINE__)
+                    end if
+                end if
+
+                if (myid == 0) then
+                    ! Now do partial writes on the root proc with points we've
+                    ! received from iProc
+                    if (nPtsProc(iProc + 1) > 0) Then
+                        call cg_field_partial_write_f(cgnsInd, cgnsBase, cgnsZone, solID, realDouble, &
+                                                  isoSurfSolNames(iVar), cumNodes + 1, cumNodes + nPtsProc(iProc + 1), &
+                                                      buffer, fieldID, ierr)
+
+                        if (ierr /= CG_OK) &
+                            call terminate("writeIsoSurface", &
+                                           "Something wrong when calling cg_field_partial_write_f")
+
+                        ! Increment by the number of nodes on this proc
+                        cumNodes = cumNodes + nPtsProc(iProc + 1)
+                    end if
+                end if
+            end do
+        end do
+        ! Everyone deallocs buffer
+        deallocate (buffer)
+
+        ! Clear up temporary allocatable data.
+        deallocate (nPtsProc, nConnProc)
+        deallocate (coords, uniqueCoords, link)
+        deallocate (weights, indices)
+
+    contains
+
+        subroutine writeEmptyZone
+
+            call cg_zone_write_f(cgnsInd, cgnsBase, isoName, int((/3, 1, 0/), cgsize_t), &
+                                 Unstructured, cgnsZone, ierr)
+            if (ierr .eq. CG_ERROR) call cg_error_exit_f
+
+            call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
+                                  'CoordinateX', (/zero, zero, zero/), coordID, ierr)
+            if (ierr .eq. CG_ERROR) call cg_error_exit_f
+
+            call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
+                                  'CoordinateY', (/zero, zero, zero/), coordID, ierr)
+            if (ierr .eq. CG_ERROR) call cg_error_exit_f
+
+            call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
+                                  'CoordinateZ', (/zero, zero, zero/), coordID, ierr)
+            if (ierr .eq. CG_ERROR) call cg_error_exit_f
+
+            call cg_section_write_f(cgnsInd, cgnsBase, cgnsZone, "ELEM", TRI_3, &
+                                    1, 1, 0, (/1, 2, 3/), secID, ierr)
+            if (ierr .eq. CG_ERROR) call cg_error_exit_f
+
+            call cg_sol_write_f(cgnsInd, cgnsBase, cgnsZone, "isoSurfSolution", Vertex, solID, ierr)
+            if (ierr .eq. CG_ERROR) call cg_error_exit_f
+
+            do iVar = 1, nIsoSurfVar
+                call cg_field_write_f(cgnsInd, cgnsBase, cgnsZone, solID, realDouble, isoSurfSolNames(iVar), &
+                                      (/zero, zero, zero/), fieldID, ierr)
+                if (ierr .eq. CG_ERROR) call cg_error_exit_f
+            end do
+        end subroutine writeEmptyZone
+
+    end subroutine writeIsoSurface
+
+    subroutine getMatCons(matcon1, matcon2, ccwOrdering)
+
+        use precision
+        implicit none
+        integer(kind=intType) :: matCon1(256, 16), matCon2(12, 2), ccwOrdering(3, 8)
+
+        matcon2 = RESHAPE((/1, 2, 3, 1, 5, 6, 7, 5, 1, 2, 3, 4, 2, 3, 4, 4, 6, 7, 8, 8, 5, 6, 7, 8/), (/12, 2/))
+
+        ccwOrdering(:, 1) = (/-1, -1, -1/)
+        ccwOrdering(:, 2) = (/0, -1, -1/)
+        ccwOrdering(:, 3) = (/0, 0, -1/)
+        ccwOrdering(:, 4) = (/-1, 0, -1/)
+        ccwOrdering(:, 5) = (/-1, -1, 0/)
+        ccwOrdering(:, 6) = (/0, -1, 0/)
+        ccwOrdering(:, 7) = (/0, 0, 0/)
+        ccwOrdering(:, 8) = (/-1, 0, 0/)
+
+        matCon1(1, :) = (/0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(2, :) = (/1, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(3, :) = (/1, 2, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(4, :) = (/2, 9, 4, 10, 9, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(5, :) = (/2, 3, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(6, :) = (/1, 9, 4, 2, 3, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(7, :) = (/10, 3, 11, 1, 3, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(8, :) = (/3, 9, 4, 3, 11, 9, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(9, :) = (/4, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(10, :) = (/1, 12, 3, 9, 12, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(11, :) = (/2, 10, 1, 3, 4, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(12, :) = (/2, 12, 3, 2, 10, 12, 10, 9, 12, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(13, :) = (/4, 11, 2, 12, 11, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(14, :) = (/1, 11, 2, 1, 9, 11, 9, 12, 11, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(15, :) = (/4, 10, 1, 4, 12, 10, 12, 11, 10, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(16, :) = (/10, 9, 11, 11, 9, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(17, :) = (/5, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(18, :) = (/5, 4, 1, 8, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(19, :) = (/1, 2, 10, 9, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(20, :) = (/5, 2, 10, 5, 8, 2, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(21, :) = (/2, 3, 11, 9, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(22, :) = (/4, 5, 8, 4, 1, 5, 2, 3, 11, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(23, :) = (/10, 3, 11, 10, 1, 3, 9, 5, 8, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(24, :) = (/3, 11, 10, 3, 10, 8, 3, 8, 4, 8, 10, 5, 0, 0, 0, 0/)
+        matCon1(25, :) = (/9, 5, 8, 4, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(26, :) = (/12, 5, 8, 12, 3, 5, 3, 1, 5, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(27, :) = (/10, 1, 2, 9, 5, 8, 3, 4, 12, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(28, :) = (/5, 8, 12, 10, 5, 12, 10, 12, 3, 10, 3, 2, 0, 0, 0, 0/)
+        matCon1(29, :) = (/4, 11, 2, 4, 12, 11, 8, 9, 5, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(30, :) = (/2, 12, 11, 2, 5, 12, 2, 1, 5, 8, 12, 5, 0, 0, 0, 0/)
+        matCon1(31, :) = (/5, 8, 9, 10, 1, 12, 10, 12, 11, 12, 1, 4, 0, 0, 0, 0/)
+        matCon1(32, :) = (/5, 8, 12, 5, 12, 10, 10, 12, 11, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(33, :) = (/10, 6, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(34, :) = (/10, 6, 5, 1, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(35, :) = (/1, 6, 5, 2, 6, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(36, :) = (/9, 6, 5, 9, 4, 6, 4, 2, 6, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(37, :) = (/2, 3, 11, 10, 6, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(38, :) = (/4, 1, 9, 2, 3, 11, 5, 10, 6, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(39, :) = (/6, 3, 11, 6, 5, 3, 5, 1, 3, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(40, :) = (/3, 11, 6, 4, 3, 6, 4, 6, 5, 4, 5, 9, 0, 0, 0, 0/)
+        matCon1(41, :) = (/10, 6, 5, 3, 4, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(42, :) = (/1, 12, 3, 1, 9, 12, 5, 10, 6, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(43, :) = (/1, 6, 5, 1, 2, 6, 3, 4, 12, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(44, :) = (/3, 2, 6, 3, 6, 9, 3, 9, 12, 5, 9, 6, 0, 0, 0, 0/)
+        matCon1(45, :) = (/11, 4, 12, 11, 2, 4, 10, 6, 5, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(46, :) = (/5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 12, 11, 0, 0, 0, 0/)
+        matCon1(47, :) = (/6, 5, 1, 6, 1, 12, 6, 12, 11, 12, 1, 4, 0, 0, 0, 0/)
+        matCon1(48, :) = (/6, 5, 9, 6, 9, 11, 11, 9, 12, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(49, :) = (/10, 8, 9, 6, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(50, :) = (/10, 4, 1, 10, 6, 4, 6, 8, 4, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(51, :) = (/1, 8, 9, 1, 2, 8, 2, 6, 8, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(52, :) = (/2, 6, 4, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(53, :) = (/10, 8, 9, 10, 6, 8, 11, 2, 3, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(54, :) = (/11, 2, 3, 10, 6, 1, 6, 4, 1, 6, 8, 4, 0, 0, 0, 0/)
+        matCon1(55, :) = (/9, 1, 3, 9, 3, 6, 9, 6, 8, 11, 6, 3, 0, 0, 0, 0/)
+        matCon1(56, :) = (/3, 11, 6, 3, 6, 4, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(57, :) = (/8, 10, 6, 8, 9, 10, 4, 12, 3, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(58, :) = (/10, 6, 8, 10, 8, 3, 10, 3, 1, 3, 8, 12, 0, 0, 0, 0/)
+        matCon1(59, :) = (/3, 4, 12, 1, 2, 9, 2, 8, 9, 2, 6, 8, 0, 0, 0, 0/)
+        matCon1(60, :) = (/12, 3, 2, 12, 2, 8, 8, 2, 6, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(61, :) = (/10, 6, 9, 9, 6, 8, 11, 2, 4, 11, 4, 12, 0, 0, 0, 0/)
+        matCon1(62, :) = (/6, 8, 1, 6, 1, 10, 8, 12, 1, 2, 1, 11, 12, 11, 1, 0/)
+        matCon1(63, :) = (/12, 11, 1, 12, 1, 4, 11, 6, 1, 9, 1, 8, 6, 8, 1, 0/)
+        matCon1(64, :) = (/12, 11, 6, 8, 12, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(65, :) = (/11, 7, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(66, :) = (/1, 9, 4, 6, 11, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(67, :) = (/10, 1, 2, 6, 11, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(68, :) = (/2, 9, 4, 2, 10, 9, 6, 11, 7, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(69, :) = (/2, 7, 6, 3, 7, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(70, :) = (/2, 7, 6, 2, 3, 7, 4, 1, 9, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(71, :) = (/10, 7, 6, 10, 1, 7, 1, 3, 7, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(72, :) = (/6, 10, 9, 6, 9, 3, 6, 3, 7, 4, 3, 9, 0, 0, 0, 0/)
+        matCon1(73, :) = (/3, 4, 12, 11, 7, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(74, :) = (/12, 1, 9, 12, 3, 1, 11, 7, 6, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(75, :) = (/1, 2, 10, 3, 4, 12, 6, 11, 7, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(76, :) = (/6, 11, 7, 2, 10, 3, 10, 12, 3, 10, 9, 12, 0, 0, 0, 0/)
+        matCon1(77, :) = (/7, 4, 12, 7, 6, 4, 6, 2, 4, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(78, :) = (/1, 9, 12, 1, 12, 6, 1, 6, 2, 6, 12, 7, 0, 0, 0, 0/)
+        matCon1(79, :) = (/4, 12, 7, 1, 4, 7, 1, 7, 6, 1, 6, 10, 0, 0, 0, 0/)
+        matCon1(80, :) = (/7, 6, 10, 7, 10, 12, 12, 10, 9, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(81, :) = (/6, 11, 7, 5, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(82, :) = (/5, 4, 1, 5, 8, 4, 7, 6, 11, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(83, :) = (/2, 10, 1, 6, 11, 7, 9, 5, 8, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(84, :) = (/11, 7, 6, 2, 10, 8, 2, 8, 4, 8, 10, 5, 0, 0, 0, 0/)
+        matCon1(85, :) = (/7, 2, 3, 7, 6, 2, 5, 8, 9, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(86, :) = (/2, 3, 6, 6, 3, 7, 4, 1, 5, 4, 5, 8, 0, 0, 0, 0/)
+        matCon1(87, :) = (/9, 5, 8, 10, 1, 6, 1, 7, 6, 1, 3, 7, 0, 0, 0, 0/)
+        matCon1(88, :) = (/8, 4, 10, 8, 10, 5, 4, 3, 10, 6, 10, 7, 3, 7, 10, 0/)
+        matCon1(89, :) = (/4, 12, 3, 8, 9, 5, 11, 7, 6, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(90, :) = (/6, 11, 7, 5, 8, 3, 5, 3, 1, 3, 8, 12, 0, 0, 0, 0/)
+        matCon1(91, :) = (/1, 2, 10, 5, 8, 9, 3, 4, 12, 6, 11, 7, 0, 0, 0, 0/)
+        matCon1(92, :) = (/10, 3, 2, 10, 12, 3, 10, 5, 12, 8, 12, 5, 6, 11, 7, 0/)
+        matCon1(93, :) = (/9, 5, 8, 4, 12, 6, 4, 6, 2, 6, 12, 7, 0, 0, 0, 0/)
+        matCon1(94, :) = (/6, 2, 12, 6, 12, 7, 2, 1, 12, 8, 12, 5, 1, 5, 12, 0/)
+        matCon1(95, :) = (/1, 6, 10, 1, 7, 6, 1, 4, 7, 12, 7, 4, 9, 5, 8, 0/)
+        matCon1(96, :) = (/7, 6, 10, 7, 10, 12, 5, 8, 10, 8, 12, 10, 0, 0, 0, 0/)
+        matCon1(97, :) = (/11, 5, 10, 7, 5, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(98, :) = (/5, 11, 7, 5, 10, 11, 1, 9, 4, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(99, :) = (/11, 1, 2, 11, 7, 1, 7, 5, 1, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(100, :) = (/9, 4, 2, 9, 2, 7, 9, 7, 5, 7, 2, 11, 0, 0, 0, 0/)
+        matCon1(101, :) = (/2, 5, 10, 2, 3, 5, 3, 7, 5, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(102, :) = (/4, 1, 9, 2, 3, 10, 3, 5, 10, 3, 7, 5, 0, 0, 0, 0/)
+        matCon1(103, :) = (/1, 3, 5, 5, 3, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(104, :) = (/9, 4, 3, 9, 3, 5, 5, 3, 7, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(105, :) = (/11, 5, 10, 11, 7, 5, 12, 3, 4, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(106, :) = (/1, 9, 3, 3, 9, 12, 5, 10, 11, 5, 11, 7, 0, 0, 0, 0/)
+        matCon1(107, :) = (/4, 12, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, 0, 0, 0, 0/)
+        matCon1(108, :) = (/7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 12, 9, 12, 2, 0/)
+        matCon1(109, :) = (/10, 7, 5, 10, 4, 7, 10, 2, 4, 12, 7, 4, 0, 0, 0, 0/)
+        matCon1(110, :) = (/9, 12, 2, 9, 2, 1, 12, 7, 2, 10, 2, 5, 7, 5, 2, 0/)
+        matCon1(111, :) = (/4, 12, 7, 4, 7, 1, 1, 7, 5, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(112, :) = (/7, 5, 9, 12, 7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(113, :) = (/8, 11, 7, 8, 9, 11, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(114, :) = (/1, 8, 4, 1, 11, 8, 1, 10, 11, 7, 8, 11, 0, 0, 0, 0/)
+        matCon1(115, :) = (/11, 7, 8, 2, 11, 8, 2, 8, 9, 2, 9, 1, 0, 0, 0, 0/)
+        matCon1(116, :) = (/11, 7, 8, 11, 8, 2, 2, 8, 4, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(117, :) = (/2, 3, 7, 2, 7, 9, 2, 9, 10, 9, 7, 8, 0, 0, 0, 0/)
+        matCon1(118, :) = (/3, 7, 10, 3, 10, 2, 7, 8, 10, 1, 10, 4, 8, 4, 10, 0/)
+        matCon1(119, :) = (/8, 9, 1, 8, 1, 7, 7, 1, 3, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(120, :) = (/8, 4, 3, 7, 8, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(121, :) = (/3, 4, 12, 11, 7, 9, 11, 9, 10, 9, 7, 8, 0, 0, 0, 0/)
+        matCon1(122, :) = (/3, 1, 8, 3, 8, 12, 1, 10, 8, 7, 8, 11, 10, 11, 8, 0/)
+        matCon1(123, :) = (/2, 9, 1, 2, 8, 9, 2, 11, 8, 7, 8, 11, 3, 4, 12, 0/)
+        matCon1(124, :) = (/12, 3, 2, 12, 2, 8, 11, 7, 2, 7, 8, 2, 0, 0, 0, 0/)
+        matCon1(125, :) = (/9, 10, 7, 9, 7, 8, 10, 2, 7, 12, 7, 4, 2, 4, 7, 0/)
+        matCon1(126, :) = (/1, 10, 2, 12, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(127, :) = (/8, 9, 1, 8, 1, 7, 4, 12, 1, 12, 7, 1, 0, 0, 0, 0/)
+        matCon1(128, :) = (/8, 12, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(129, :) = (/8, 7, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(130, :) = (/4, 1, 9, 12, 8, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(131, :) = (/1, 2, 10, 12, 8, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(132, :) = (/9, 2, 10, 9, 4, 2, 12, 8, 7, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(133, :) = (/11, 2, 3, 7, 12, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(134, :) = (/2, 3, 11, 4, 1, 9, 7, 12, 8, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(135, :) = (/3, 10, 1, 3, 11, 10, 7, 12, 8, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(136, :) = (/7, 12, 8, 3, 11, 4, 11, 9, 4, 11, 10, 9, 0, 0, 0, 0/)
+        matCon1(137, :) = (/8, 3, 4, 7, 3, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(138, :) = (/8, 1, 9, 8, 7, 1, 7, 3, 1, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(139, :) = (/3, 8, 7, 3, 4, 8, 1, 2, 10, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(140, :) = (/2, 7, 3, 2, 9, 7, 2, 10, 9, 9, 8, 7, 0, 0, 0, 0/)
+        matCon1(141, :) = (/11, 8, 7, 11, 2, 8, 2, 4, 8, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(142, :) = (/11, 8, 7, 2, 8, 11, 2, 9, 8, 2, 1, 9, 0, 0, 0, 0/)
+        matCon1(143, :) = (/1, 4, 8, 1, 8, 11, 1, 11, 10, 7, 11, 8, 0, 0, 0, 0/)
+        matCon1(144, :) = (/8, 7, 11, 8, 11, 9, 9, 11, 10, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(145, :) = (/7, 9, 5, 12, 9, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(146, :) = (/4, 7, 12, 4, 1, 7, 1, 5, 7, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(147, :) = (/9, 7, 12, 9, 5, 7, 10, 1, 2, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(148, :) = (/10, 5, 7, 10, 7, 4, 10, 4, 2, 12, 4, 7, 0, 0, 0, 0/)
+        matCon1(149, :) = (/7, 9, 5, 7, 12, 9, 3, 11, 2, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(150, :) = (/2, 3, 11, 4, 1, 12, 1, 7, 12, 1, 5, 7, 0, 0, 0, 0/)
+        matCon1(151, :) = (/5, 12, 9, 5, 7, 12, 1, 3, 10, 3, 11, 10, 0, 0, 0, 0/)
+        matCon1(152, :) = (/11, 10, 4, 11, 4, 3, 10, 5, 4, 12, 4, 7, 5, 7, 4, 0/)
+        matCon1(153, :) = (/9, 3, 4, 9, 5, 3, 5, 7, 3, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(154, :) = (/1, 5, 3, 5, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(155, :) = (/2, 10, 1, 3, 4, 5, 3, 5, 7, 5, 4, 9, 0, 0, 0, 0/)
+        matCon1(156, :) = (/2, 10, 5, 2, 5, 3, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(157, :) = (/9, 2, 4, 9, 7, 2, 9, 5, 7, 7, 11, 2, 0, 0, 0, 0/)
+        matCon1(158, :) = (/11, 2, 1, 11, 1, 7, 7, 1, 5, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(159, :) = (/5, 7, 4, 5, 4, 9, 7, 11, 4, 1, 4, 10, 11, 10, 4, 0/)
+        matCon1(160, :) = (/11, 10, 5, 7, 11, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(161, :) = (/5, 10, 6, 8, 7, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(162, :) = (/1, 9, 4, 5, 10, 6, 12, 8, 7, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(163, :) = (/6, 1, 2, 6, 5, 1, 8, 7, 12, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(164, :) = (/12, 8, 7, 9, 4, 5, 4, 6, 5, 4, 2, 6, 0, 0, 0, 0/)
+        matCon1(165, :) = (/10, 6, 5, 11, 2, 3, 8, 7, 12, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(166, :) = (/7, 12, 8, 2, 3, 11, 1, 9, 4, 5, 10, 6, 0, 0, 0, 0/)
+        matCon1(167, :) = (/8, 7, 12, 6, 5, 11, 5, 3, 11, 5, 1, 3, 0, 0, 0, 0/)
+        matCon1(168, :) = (/4, 5, 9, 4, 6, 5, 4, 3, 6, 11, 6, 3, 12, 8, 7, 0/)
+        matCon1(169, :) = (/8, 3, 4, 8, 7, 3, 6, 5, 10, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(170, :) = (/10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 8, 0, 0, 0, 0/)
+        matCon1(171, :) = (/4, 7, 3, 4, 8, 7, 2, 6, 1, 6, 5, 1, 0, 0, 0, 0/)
+        matCon1(172, :) = (/7, 3, 9, 7, 9, 8, 3, 2, 9, 5, 9, 6, 2, 6, 9, 0/)
+        matCon1(173, :) = (/10, 6, 5, 11, 2, 7, 2, 8, 7, 2, 4, 8, 0, 0, 0, 0/)
+        matCon1(174, :) = (/2, 7, 11, 2, 8, 7, 2, 1, 8, 9, 8, 1, 10, 6, 5, 0/)
+        matCon1(175, :) = (/5, 1, 11, 5, 11, 6, 1, 4, 11, 7, 11, 8, 4, 8, 11, 0/)
+        matCon1(176, :) = (/8, 7, 11, 8, 11, 9, 6, 5, 11, 5, 9, 11, 0, 0, 0, 0/)
+        matCon1(177, :) = (/7, 10, 6, 7, 12, 10, 12, 9, 10, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(178, :) = (/4, 7, 12, 1, 7, 4, 1, 6, 7, 1, 10, 6, 0, 0, 0, 0/)
+        matCon1(179, :) = (/1, 12, 9, 1, 6, 12, 1, 2, 6, 6, 7, 12, 0, 0, 0, 0/)
+        matCon1(180, :) = (/7, 12, 4, 7, 4, 6, 6, 4, 2, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(181, :) = (/2, 3, 11, 10, 6, 12, 10, 12, 9, 12, 6, 7, 0, 0, 0, 0/)
+        matCon1(182, :) = (/1, 12, 4, 1, 7, 12, 1, 10, 7, 6, 7, 10, 2, 3, 11, 0/)
+        matCon1(183, :) = (/12, 9, 6, 12, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, 0/)
+        matCon1(184, :) = (/7, 12, 4, 7, 4, 6, 3, 11, 4, 11, 6, 4, 0, 0, 0, 0/)
+        matCon1(185, :) = (/6, 9, 10, 6, 3, 9, 6, 7, 3, 4, 9, 3, 0, 0, 0, 0/)
+        matCon1(186, :) = (/10, 6, 7, 10, 7, 1, 1, 7, 3, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(187, :) = (/2, 6, 9, 2, 9, 1, 6, 7, 9, 4, 9, 3, 7, 3, 9, 0/)
+        matCon1(188, :) = (/2, 6, 7, 3, 2, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(189, :) = (/2, 4, 7, 2, 7, 11, 4, 9, 7, 6, 7, 10, 9, 10, 7, 0/)
+        matCon1(190, :) = (/11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, 0, 0, 0, 0/)
+        matCon1(191, :) = (/1, 4, 9, 6, 7, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(192, :) = (/11, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(193, :) = (/12, 6, 11, 8, 6, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(194, :) = (/12, 6, 11, 12, 8, 6, 9, 4, 1, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(195, :) = (/6, 12, 8, 6, 11, 12, 2, 10, 1, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(196, :) = (/11, 8, 6, 11, 12, 8, 10, 9, 2, 9, 4, 2, 0, 0, 0, 0/)
+        matCon1(197, :) = (/12, 2, 3, 12, 8, 2, 8, 6, 2, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(198, :) = (/1, 9, 4, 2, 3, 8, 2, 8, 6, 8, 3, 12, 0, 0, 0, 0/)
+        matCon1(199, :) = (/10, 8, 6, 10, 3, 8, 10, 1, 3, 3, 12, 8, 0, 0, 0, 0/)
+        matCon1(200, :) = (/8, 6, 3, 8, 3, 12, 6, 10, 3, 4, 3, 9, 10, 9, 3, 0/)
+        matCon1(201, :) = (/3, 6, 11, 3, 4, 6, 4, 8, 6, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(202, :) = (/9, 3, 1, 9, 6, 3, 9, 8, 6, 11, 3, 6, 0, 0, 0, 0/)
+        matCon1(203, :) = (/10, 1, 2, 6, 11, 4, 6, 4, 8, 4, 11, 3, 0, 0, 0, 0/)
+        matCon1(204, :) = (/10, 9, 3, 10, 3, 2, 9, 8, 3, 11, 3, 6, 8, 6, 3, 0/)
+        matCon1(205, :) = (/2, 4, 6, 4, 8, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(206, :) = (/1, 9, 8, 1, 8, 2, 2, 8, 6, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(207, :) = (/10, 1, 4, 10, 4, 6, 6, 4, 8, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(208, :) = (/10, 9, 8, 6, 10, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(209, :) = (/6, 9, 5, 6, 11, 9, 11, 12, 9, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(210, :) = (/6, 1, 5, 6, 12, 1, 6, 11, 12, 12, 4, 1, 0, 0, 0, 0/)
+        matCon1(211, :) = (/1, 2, 10, 9, 5, 11, 9, 11, 12, 11, 5, 6, 0, 0, 0, 0/)
+        matCon1(212, :) = (/11, 12, 5, 11, 5, 6, 12, 4, 5, 10, 5, 2, 4, 2, 5, 0/)
+        matCon1(213, :) = (/3, 6, 2, 3, 9, 6, 3, 12, 9, 5, 6, 9, 0, 0, 0, 0/)
+        matCon1(214, :) = (/1, 5, 12, 1, 12, 4, 5, 6, 12, 3, 12, 2, 6, 2, 12, 0/)
+        matCon1(215, :) = (/1, 3, 6, 1, 6, 10, 3, 12, 6, 5, 6, 9, 12, 9, 6, 0/)
+        matCon1(216, :) = (/10, 5, 6, 3, 12, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(217, :) = (/3, 6, 11, 4, 6, 3, 4, 5, 6, 4, 9, 5, 0, 0, 0, 0/)
+        matCon1(218, :) = (/6, 11, 3, 6, 3, 5, 5, 3, 1, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(219, :) = (/4, 11, 3, 4, 6, 11, 4, 9, 6, 5, 6, 9, 1, 2, 10, 0/)
+        matCon1(220, :) = (/6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, 0, 0, 0, 0/)
+        matCon1(221, :) = (/9, 5, 6, 9, 6, 4, 4, 6, 2, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(222, :) = (/1, 5, 6, 2, 1, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(223, :) = (/9, 5, 6, 9, 6, 4, 10, 1, 6, 1, 4, 6, 0, 0, 0, 0/)
+        matCon1(224, :) = (/10, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(225, :) = (/5, 12, 8, 5, 10, 12, 10, 11, 12, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(226, :) = (/1, 9, 4, 5, 10, 8, 10, 12, 8, 10, 11, 12, 0, 0, 0, 0/)
+        matCon1(227, :) = (/2, 11, 12, 2, 12, 5, 2, 5, 1, 8, 5, 12, 0, 0, 0, 0/)
+        matCon1(228, :) = (/4, 2, 5, 4, 5, 9, 2, 11, 5, 8, 5, 12, 11, 12, 5, 0/)
+        matCon1(229, :) = (/5, 12, 8, 10, 12, 5, 10, 3, 12, 10, 2, 3, 0, 0, 0, 0/)
+        matCon1(230, :) = (/10, 8, 5, 10, 12, 8, 10, 2, 12, 3, 12, 2, 1, 9, 4, 0/)
+        matCon1(231, :) = (/12, 8, 5, 12, 5, 3, 3, 5, 1, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(232, :) = (/12, 8, 5, 12, 5, 3, 9, 4, 5, 4, 3, 5, 0, 0, 0, 0/)
+        matCon1(233, :) = (/3, 10, 11, 3, 8, 10, 3, 4, 8, 8, 5, 10, 0, 0, 0, 0/)
+        matCon1(234, :) = (/10, 11, 8, 10, 8, 5, 11, 3, 8, 9, 8, 1, 3, 1, 8, 0/)
+        matCon1(235, :) = (/4, 8, 11, 4, 11, 3, 8, 5, 11, 2, 11, 1, 5, 1, 11, 0/)
+        matCon1(236, :) = (/2, 11, 3, 9, 8, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(237, :) = (/5, 10, 2, 5, 2, 8, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(238, :) = (/5, 10, 2, 5, 2, 8, 1, 9, 2, 9, 8, 2, 0, 0, 0, 0/)
+        matCon1(239, :) = (/5, 1, 4, 8, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(240, :) = (/5, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(241, :) = (/10, 11, 9, 11, 12, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(242, :) = (/4, 1, 10, 4, 10, 12, 12, 10, 11, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(243, :) = (/1, 2, 11, 1, 11, 9, 9, 11, 12, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(244, :) = (/4, 2, 11, 12, 4, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(245, :) = (/2, 3, 12, 2, 12, 10, 10, 12, 9, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(246, :) = (/4, 1, 10, 4, 10, 12, 2, 3, 10, 3, 12, 10, 0, 0, 0, 0/)
+        matCon1(247, :) = (/1, 3, 12, 9, 1, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(248, :) = (/4, 3, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(249, :) = (/3, 4, 9, 3, 9, 11, 11, 9, 10, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(250, :) = (/10, 11, 3, 1, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(251, :) = (/3, 4, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, 0, 0, 0, 0/)
+        matCon1(252, :) = (/2, 11, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(253, :) = (/2, 4, 9, 10, 2, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(254, :) = (/1, 10, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(255, :) = (/1, 4, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+        matCon1(256, :) = (/0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0/)
+
+    end subroutine getMatCons
+
+    subroutine computeIsoVariable(solName, sps, isoVal)
+        !
+        !       computeIsoVar computes NODE centered values for the given
+        !       solName variable. It is essentially equilivent to
+        !       sotreSolInBuffer. It is assumed blockPointers are already
+        !       set to the correct block.
+        !
+        use constants
+        use blockPointers
+        use cgnsGrid
+        use cgnsNames
+        use flowVarRefState
+        use inputPhysics
+        use IOModule
+        use utils, only: setPointers, terminate
+        use flowUtils, only: computePTot
+        implicit none
+        !
+        !      Subroutine arguments.
+        character(len=*), intent(in) :: solName
+        integer(kind=intType), intent(in) :: sps
+        real(kind=realType), intent(in) :: isoVal
+        !
+        !      Local parameters
+        !
+        real(kind=realType), parameter :: plim = 0.001_realType
+        real(kind=realType), parameter :: rholim = 0.001_realType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii, jj, kk, nn
+
+        real(kind=realType) :: uuy, uuz, vvx, vvz, wwx, wwy, tmp
+        real(kind=realType) :: vortx, vorty, vortz, a2, ptotInf, ptot, uova(3), gradP(3), a
+        real(kind=realType), dimension(:, :, :), pointer :: fc, fn
+
+        do nn = 1, nDom
+            call setPointers(nn, 1, sps)
+            fc => flowDoms(nn, 1, sps)%fc
+            fn => flowDoms(nn, 1, sps)%fn
+
+            select case (solName)
+
+            case (cgnsDensity)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, irho)
+                        end do
+                    end do
+                end do
+
+            case (cgnsMomx)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, irho) * w(i, j, k, ivx)
+                        end do
+                    end do
+                end do
+
+            case (cgnsMomy)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, irho) * w(i, j, k, ivy)
+                        end do
+                    end do
                 end do
 
-                ! Based on the values at each corner, determine which
-                ! type surface we have
-                indexcube = 1
-                if (f(1) .lt. zero) indexcube = indexcube + 1
-                if (f(2) .lt. zero) indexcube = indexcube + 2
-                if (f(3) .lt. zero) indexcube = indexcube + 4
-                if (f(4) .lt. zero) indexcube = indexcube + 8
-                if (f(5) .lt. zero) indexcube = indexcube + 16
-                if (f(6) .lt. zero) indexcube = indexcube + 32
-                if (f(7) .lt. zero) indexcube = indexcube + 64
-                if (f(8) .lt. zero) indexcube = indexcube + 128
-
-                logic1 = .true.
-
-                kk = 1
-                do while (logic1)
-                   icon = matcon1(indexcube, kk)
-
-                   if (icon == 0) then
-                      logic1=.false.
-                   else
-
-                      iCoor = iCoor + 1
-                      if (iCoor > nMax) then
-                         ! Need to realloc the coord array. Make it double the size
-                         call reallocateReal2(weights, 2, 2*nMax, 2, nMax, .true.)
-                         call reallocateInteger2(indices, 7, 2*nMax, 7, nMax, .true.)
-                         nMax = nMax * 2
-                      end if
-
-                      num1 = matcon2(icon,1)
-                      num2 = matcon2(icon,2)
-
-                      ! Weight factors
-                      weights(2, iCoor) = (zero - f(num1))/(f(num2) - f(num1))
-                      weights(1, iCoor) = one - weights(2, icoor)
-
-                      ! Indices of nodes
-                      n1 = (/i, j, k/) + ccwOrdering(: ,num1)
-                      n2 = (/i, j, k/) + ccwOrdering(:, num2)
-                      indices(:, iCoor) = (/nn, n1(1), n1(2), n1(3), n2(1), n2(2), n2(3)/)
-
-                      kk = kk + 1
-                   end if
+            case (cgnsMomz)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, irho) * w(i, j, k, ivz)
+                        end do
+                    end do
                 end do
 
-             end do ! I loop
-          end do ! J loop
-       end do ! K loop
-    end do ! Domain loop
-
-    ! We have not actually stored the coordintes; only the positions and
-    ! the weights. To compute the coordinates we pass back through and assemble
-    allocate(Coords(3, iCoor))
-
-    ! Set pointer to first block
-    call setPointers(1, 1, sps)
-    curBlock = 1
-    do i=1,iCoor
-
-       ! If we've switched blocks, reset points. This stil only calls
-       ! setPointer nDom times since there are at most that many
-       ! switches
-       if (indices(1, i) /= curBlock) then
-          call setPointers(indices(1, i), 1, sps)
-          curBlock = indices(1, i)
-       end if
-
-       ! Computing coordinates is easy; we just juse the weights and the
-       ! indices on x
-       do idim=1,3
-          coords(idim, i) = &
-               weights(1, i) * &
-               X(indices(2,i), indices(3,i), indices(4,i), idim) + &
-               weights(2, i) * &
-               X(indices(5,i), indices(6,i), indices(7,i), idim)
-       end do
-    end do
-
-    ! Now we know the maximum number of coordinates so we can allocate
-    ! the unique set and the link array
-    allocate(uniqueCoords(3, icoor))
-    allocate(link(icoor))
-
-    ! Compute the reduced set of coordinates. The sole purpose of this
-    ! is to reduce the filesize. This will typicaly reduce the number of
-    ! coordinates by about a factor of 4.
-
-    call pointReduce(coords, iCoor, tol, uniqueCoords, link, nUnique)
-
-    ! Now that we have produced the desired isosurface on each
-    ! processor. Communicate the number of number of coordinates and the
-    ! number of triangles each proc is going to send to the root:
-
-    allocate(nPtsProc(nProc), nConnProc(nProc))
-    nPtsProc(:) = 0_intType
-    nConnProc(:) = 0_intType
-
-    call  MPI_Allgather(nUnique, 1, mpi_integer4, nPtsProc, 1, mpi_integer4, &
-         adflow_comm_world, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call  MPI_Allgather(iCoor/3, 1, mpi_integer4, nConnProc, 1, mpi_integer4, &
-         adflow_comm_world, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-
-    if (sum(nPtsProc) > 0) then
-
-       if (myid == 0) then ! Root proc does the writing
-
-          ! Write a new zone:
-          cgnsInd = fileIDs(sps)
-          cgnsBase = cgnsIsoSurfBases(sps)
-
-          ! Write the unstructured zone
-          call cg_zone_write_f(cgnsInd, cgnsBase, isoName, int((/sum(nPtsProc), sum(nConnProc), 0/), cgsize_t), &
-               Unstructured, cgnsZone, ierr)
-          if (ierr .eq. CG_ERROR) call cg_error_exit_f
-
-          if(ierr /= CG_OK)                    &
-               call terminate("writeIsoSurface", &
-               "Something wrong when calling cg_zone_write_f")
-       end if
-
-    else
-       if (myid == 0) then
-          ! We don't actually have an isosurface. We will create a zone
-          ! that the same structure, but contains only a single triangle
-          ! with all the coordinates at zero. This way the zone still
-          ! exists and yields a uniform structure which can make
-          ! processing easier
-
-          ! Write a new zone:
-          cgnsInd = fileIDs(sps)
-          cgnsBase = cgnsIsoSurfBases(sps)
-
-          call writeEmptyZone
-
-       end if
-       ! Don't forget to deallocate the stuff allocated so far:
-       deallocate(nPtsProc, nConnProc, link, uniqueCoords, coords, weights, indices)
-       return
-    end if
-
-    ! We need to keep track of the cumulative number of nodes since each
-    ! proc has done its own ordering
-    cumNodes = 0
-
-    ! Communicate and write the coordinates
-    do iproc=0, nProc-1
-
-       dataOnProc: if (myid == iproc) then
-          allocate(buffer(3*nPtsProc(iProc+1)))
-
-          ! We will swap the order of the coordinates to packed format
-          ! since this is what we need for CGNS
-
-          do i=1,nPtsProc(iProc+1)
-             buffer(i)                      = uniqueCoords(1, i)
-             buffer(1*nPtsProc(iProc+1)+ i) = uniqueCoords(2, i)
-             buffer(2*nPtsProc(iProc+1)+ i) = uniqueCoords(3, i)
-          end do
-
-       end if dataOnProc
-
-       if (iproc .ne. 0) then
-          tag = 13
-          if (myid == 0) then
-             ! allocate space for the recv
-             allocate(buffer(3*nPtsProc(iProc+1)))
-
-             call mpi_recv(buffer, nPtsProc(iProc+1)*3, adflow_real, iProc, tag, &
-                  adflow_comm_world, mpiStatus, ierr)
-             call EChk(ierr, __FILE__, __LINE__)
-          end if
-
-          if (myid == iProc) then
-             call mpi_send(buffer, nPtsProc(iProc+1)*3, adflow_real, 0, tag, &
-                  adflow_comm_world, ierr)
-             call EChk(ierr, __FILE__, __LINE__)
-          end if
-       end if
-
-       if (myid == 0) then
-          ! Now do partial writes on the root proc with points we've
-          ! received from iProc
-          if (nPtsProc(iProc+1) > 0) then
-
-             call cg_coord_partial_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
-                  'CoordinateX', cumNodes+1, cumNodes+nPtsProc(iProc+1), &
-                  buffer(1:nPtsProc(iProc+1)), coordID, ierr)
-
-             call cg_coord_partial_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
-                  'CoordinateY', cumNodes+1, cumNodes+nPtsProc(iProc+1), &
-                  buffer(nPtsProc(iProc+1)+1:2*nPtsProc(iProc+1)), coordID, ierr)
-
-             call cg_coord_partial_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
-                  'CoordinateZ', cumNodes+1, cumNodes+nPtsProc(iProc+1), &
-                  buffer(2*nPtsProc(iProc+1)+1:3*nPtsProc(iProc+1)), coordID, ierr)
-
-             if(ierr /= CG_OK)                    &
-                  call terminate("writeIsoSurface", &
-                  "Something wrong when calling cg_coord_write_f")
-
-             ! Increment by the number of nodes on this proc
-             cumNodes = cumNodes + nPtsProc(iProc+1)
-          end if
-       end if
-
-       ! Buffer was only allocated on root and current iProc
-       if (myid == iProc .or. myid == 0) then
-          deallocate(buffer)
-       end if
-    end do
-
-    ! We need to keep track of the cumulative number of nodes since each
-    ! proc has done its own ordering
-    cumNodes = 0
-    cumConn  = 0
-
-    ! The partial write functionality is different between versions 2.5
-    ! and 3.1, so we will just gather all the connectivities and do a
-    ! final write at the end
-    if (myid == 0) then
-       allocate(allConn(3, sum(nConnProc)))
-    endif
-
-    ! Communicate and write the connectivity
-    do iProc=0, nProc-1
-       connOnProc: if (myid == iProc) then
-          allocate(connBuffer(3,nConnProc(iProc+1)))
-          do i=1,nConnProc(iProc+1)
-             connBuffer(1, i) = link(3*i-2)
-             connBuffer(2, i) = link(3*i-1)
-             connBuffer(3, i) = link(3*i  )
-          end do
-       end if connOnProc
-
-       ! Communication is only necessary if we are not dealing with root
-       ! proc:
-       if (iproc .ne. 0) then
-          tag = 13
-          if (myid == 0) then
-             ! allocate space for the recv
-             allocate(connBuffer(3,nConnProc(iProc+1)))
-             call mpi_recv(connBuffer, nConnProc(iProc+1)*3, adflow_integer, iProc, tag, &
-                  adflow_comm_world, mpiStatus, ierr)
-             call EChk(ierr, __FILE__, __LINE__)
-          end if
-
-          if (myid == iProc) then
-             call mpi_send(connBuffer, nConnProc(iProc+1)*3, adflow_integer, 0, tag, &
-                  adflow_comm_world, ierr)
-             call EChk(ierr, __FILE__, __LINE__)
-          end if
-       endif
-
-       if (myid == 0) then
-
-          ! Copy into the allCon array and increment the received
-          ! local connectivity by cummNodes
-
-          do i=1,nConnProc(iProc+1)
-             allConn(:, cumConn + i) = connBuffer(:, i) + cumNodes
-          end do
-
-          cumNodes = cumNodes + nPtsProc(iProc+1)
-          cumConn  = cumConn  + nConnProc(iProc+1)
-       end if
-
-       ! Buffer was only allocated on roto and iProc
-       if (myid == iProc .or. myid == 0) then
-          deallocate(connBuffer)
-       end if
-    end do
-
-    ! Finally do the (full) connectivity write
-    if (myid == 0) then
-       ! Now write on root proc:
-
-       ! Write just the connectively we have in buffer
-       call cg_section_write_f(cgnsInd, cgnsBase, cgnsZone, "ELEM", TRI_3, &
-            1, sum(nConnProc), 0, allConn, secID, ierr)
-       if(ierr /= CG_OK)                    &
-            call terminate("writeIsoSurface", &
-            "Something wrong when calling cg_section_partial_write_f")
-
-       ! Also free allConn
-       deallocate(allConn)
-    end if
-
-    ! Finally we have to write solution data for the iso surface
-    ! iself. The main reason is that the same code is used for "slices"
-    ! as well and in that case, you want to have other data interpolated
-    ! on the "isoSurafce" (slice)
-
-    ! Write the solution node:
-    if (myid == 0) then
-       call cg_sol_write_f(cgnsInd, cgnsBase, cgnsZone, "isoSurfSolution", Vertex, solID, ierr)
-       if(ierr /= CG_OK)                    &
-            call terminate("writeIsoSurface", &
-            "Something wrong when calling cg_sol_write_f")
-    end if
-
-    ! Make the buffer large enough
-    allocate(buffer(maxval(nPtsProc)))
-
-    ! Loop over variables to write:
-    do iVar=1,nIsoSurfVar
-
-       ! We will reuse the same code as was used for computing the value
-       ! onwhich we did the interpolation. However, set 'zero' for the
-       ! isovalue such that we get the true value back
-
-       call computeIsoVariable(isoSurfSolNames(iVar), sps, zero)
-
-       ! Set points to first block:
-       call setPointers(1, 1, sps)
-       curBlock = 1
-       fn => flowDoms(1, 1, sps)%fn
-
-       do i=1,iCoor
-
-          ! If we've switched blocks, reset points. This stil only calls
-          ! setPointer nDom times since there are at most that many
-          ! switches
-          if (indices(1, i) /= curBlock) then
-             call setPointers(indices(1, i), 1, sps)
-             curBlock = indices(1, i)
-             fn => flowDoms(curBlock, 1, sps)%fn
-          end if
-
-          ! Computing interpolated value is easy using weights:
-          ! indices on x
-          buffer(link(i)) = weights(1, i) * fn(indices(2,i), indices(3,i), indices(4,i)) + &
-               weights(2, i) * fn(indices(5,i), indices(6,i), indices(7,i))
-       end do
-
-       cumNodes = 0
-       ! Communicate and write the solutions
-       do iproc=0, nProc-1
-
-          if (iproc .ne. 0) then
-             tag = 13
-             if (myid == 0) then
-                call mpi_recv(buffer, nPtsProc(iProc+1), adflow_real, iProc, tag, &
-                     adflow_comm_world, mpiStatus, ierr)
-                call EChk(ierr, __FILE__, __LINE__)
-             end if
-
-             if (myid == iProc) then
-                call mpi_send(buffer, nPtsProc(iProc+1), adflow_real, 0, tag, &
-                     adflow_comm_world, ierr)
-                call EChk(ierr, __FILE__, __LINE__)
-             end if
-          end if
-
-          if (myid == 0) then
-             ! Now do partial writes on the root proc with points we've
-             ! received from iProc
-             if (nPtsProc(iProc+1) > 0) Then
-                call cg_field_partial_write_f(cgnsInd, cgnsBase, cgnsZone, solID, realDouble, &
-                     isoSurfSolNames(iVar), cumNodes+1, cumNodes + nPtsProc(iProc+1), &
-                     buffer, fieldID, ierr)
-
-                if(ierr /= CG_OK)                    &
-                     call terminate("writeIsoSurface", &
-                     "Something wrong when calling cg_field_partial_write_f")
-
-                ! Increment by the number of nodes on this proc
-                cumNodes = cumNodes + nPtsProc(iProc+1)
-             end if
-          end if
-       end do
-    end do
-    ! Everyone deallocs buffer
-    deallocate(buffer)
-
-    ! Clear up temporary allocatable data.
-    deallocate(nPtsProc, nConnProc)
-    deallocate(coords, uniqueCoords, link)
-    deallocate(weights, indices)
-
-  contains
-
-    subroutine writeEmptyZone
-
-      call cg_zone_write_f(cgnsInd, cgnsBase, isoName, int((/3, 1, 0/), cgsize_t), &
-           Unstructured, cgnsZone, ierr)
-      if (ierr .eq. CG_ERROR) call cg_error_exit_f
-
-      call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
-           'CoordinateX', (/zero, zero, zero/), coordID, ierr)
-      if (ierr .eq. CG_ERROR) call cg_error_exit_f
-
-      call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
-           'CoordinateY', (/zero, zero, zero /), coordID, ierr)
-      if (ierr .eq. CG_ERROR) call cg_error_exit_f
-
-      call cg_coord_write_f(cgnsInd, cgnsBase, cgnsZone, realDouble, &
-           'CoordinateZ', (/zero, zero, zero/), coordID, ierr)
-      if (ierr .eq. CG_ERROR) call cg_error_exit_f
-
-      call cg_section_write_f(cgnsInd, cgnsBase, cgnsZone, "ELEM", TRI_3, &
-           1, 1, 0, (/1, 2, 3/), secID, ierr)
-      if (ierr .eq. CG_ERROR) call cg_error_exit_f
-
-      call cg_sol_write_f(cgnsInd, cgnsBase, cgnsZone, "isoSurfSolution", Vertex, solID, ierr)
-      if (ierr .eq. CG_ERROR) call cg_error_exit_f
-
-      do iVar = 1, nIsoSurfVar
-         call cg_field_write_f(cgnsInd, cgnsBase, cgnsZone, solID, realDouble, isoSurfSolNames(iVar), &
-              (/zero, zero, zero/), fieldID, ierr)
-         if (ierr .eq. CG_ERROR) call cg_error_exit_f
-      end do
-    end subroutine writeEmptyZone
-
-  end subroutine writeIsoSurface
-
-
-  subroutine getMatCons(matcon1, matcon2, ccwOrdering)
-
-    use precision
-    implicit none
-    integer(kind=intType) :: matCon1(256, 16), matCon2(12, 2), ccwOrdering(3, 8)
-
-    matcon2=RESHAPE((/1,2,3,1,5,6,7,5,1,2,3,4,2,3,4,4,6,7,8,8,5,6,7,8/),(/12,2/))
-
-    ccwOrdering(:, 1) = (/-1, -1, -1/)
-    ccwOrdering(:, 2) = (/ 0, -1, -1/)
-    ccwOrdering(:, 3) = (/ 0,  0, -1/)
-    ccwOrdering(:, 4) = (/-1,  0, -1/)
-    ccwOrdering(:, 5) = (/-1, -1,  0/)
-    ccwOrdering(:, 6) = (/ 0, -1,  0/)
-    ccwOrdering(:, 7) = (/ 0,  0,  0/)
-    ccwOrdering(:, 8) = (/-1,  0,  0/)
-
-    matCon1(1,:)=(/0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(2,:)=(/1,9,4,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(3,:)=(/1,2,10,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(4,:)=(/2,9,4,10,9,2,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(5,:)=(/2,3,11,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(6,:)=(/1,9,4,2,3,11,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(7,:)=(/10,3,11,1,3,10,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(8,:)=(/3,9,4,3,11,9,11,10,9,0,0,0,0,0,0,0/)
-    matCon1(9,:)=(/4,12,3,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(10,:)=(/1,12,3,9,12,1,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(11,:)=(/2,10,1,3,4,12,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(12,:)=(/2,12,3,2,10,12,10,9,12,0,0,0,0,0,0,0/)
-    matCon1(13,:)=(/4,11,2,12,11,4,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(14,:)=(/1,11,2,1,9,11,9,12,11,0,0,0,0,0,0,0/)
-    matCon1(15,:)=(/4,10,1,4,12,10,12,11,10,0,0,0,0,0,0,0/)
-    matCon1(16,:)=(/10,9,11,11,9,12,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(17,:)=(/5,8,9,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(18,:)=(/5,4,1,8,4,5,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(19,:)=(/1,2,10,9,5,8,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(20,:)=(/5,2,10,5,8,2,8,4,2,0,0,0,0,0,0,0/)
-    matCon1(21,:)=(/2,3,11,9,5,8,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(22,:)=(/4,5,8,4,1,5,2,3,11,0,0,0,0,0,0,0/)
-    matCon1(23,:)=(/10,3,11,10,1,3,9,5,8,0,0,0,0,0,0,0/)
-    matCon1(24,:)=(/3,11,10,3,10,8,3,8,4,8,10,5,0,0,0,0/)
-    matCon1(25,:)=(/9,5,8,4,12,3,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(26,:)=(/12,5,8,12,3,5,3,1,5,0,0,0,0,0,0,0/)
-    matCon1(27,:)=(/10,1,2,9,5,8,3,4,12,0,0,0,0,0,0,0/)
-    matCon1(28,:)=(/5,8,12,10,5,12,10,12,3,10,3,2,0,0,0,0/)
-    matCon1(29,:)=(/4,11,2,4,12,11,8,9,5,0,0,0,0,0,0,0/)
-    matCon1(30,:)=(/2,12,11,2,5,12,2,1,5,8,12,5,0,0,0,0/)
-    matCon1(31,:)=(/5,8,9,10,1,12,10,12,11,12,1,4,0,0,0,0/)
-    matCon1(32,:)=(/5,8,12,5,12,10,10,12,11,0,0,0,0,0,0,0/)
-    matCon1(33,:)=(/10,6,5,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(34,:)=(/10,6,5,1,9,4,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(35,:)=(/1,6,5,2,6,1,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(36,:)=(/9,6,5,9,4,6,4,2,6,0,0,0,0,0,0,0/)
-    matCon1(37,:)=(/2,3,11,10,6,5,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(38,:)=(/4,1,9,2,3,11,5,10,6,0,0,0,0,0,0,0/)
-    matCon1(39,:)=(/6,3,11,6,5,3,5,1,3,0,0,0,0,0,0,0/)
-    matCon1(40,:)=(/3,11,6,4,3,6,4,6,5,4,5,9,0,0,0,0/)
-    matCon1(41,:)=(/10,6,5,3,4,12,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(42,:)=(/1,12,3,1,9,12,5,10,6,0,0,0,0,0,0,0/)
-    matCon1(43,:)=(/1,6,5,1,2,6,3,4,12,0,0,0,0,0,0,0/)
-    matCon1(44,:)=(/3,2,6,3,6,9,3,9,12,5,9,6,0,0,0,0/)
-    matCon1(45,:)=(/11,4,12,11,2,4,10,6,5,0,0,0,0,0,0,0/)
-    matCon1(46,:)=(/5,10,6,1,9,2,9,11,2,9,12,11,0,0,0,0/)
-    matCon1(47,:)=(/6,5,1,6,1,12,6,12,11,12,1,4,0,0,0,0/)
-    matCon1(48,:)=(/6,5,9,6,9,11,11,9,12,0,0,0,0,0,0,0/)
-    matCon1(49,:)=(/10,8,9,6,8,10,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(50,:)=(/10,4,1,10,6,4,6,8,4,0,0,0,0,0,0,0/)
-    matCon1(51,:)=(/1,8,9,1,2,8,2,6,8,0,0,0,0,0,0,0/)
-    matCon1(52,:)=(/2,6,4,4,6,8,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(53,:)=(/10,8,9,10,6,8,11,2,3,0,0,0,0,0,0,0/)
-    matCon1(54,:)=(/11,2,3,10,6,1,6,4,1,6,8,4,0,0,0,0/)
-    matCon1(55,:)=(/9,1,3,9,3,6,9,6,8,11,6,3,0,0,0,0/)
-    matCon1(56,:)=(/3,11,6,3,6,4,4,6,8,0,0,0,0,0,0,0/)
-    matCon1(57,:)=(/8,10,6,8,9,10,4,12,3,0,0,0,0,0,0,0/)
-    matCon1(58,:)=(/10,6,8,10,8,3,10,3,1,3,8,12,0,0,0,0/)
-    matCon1(59,:)=(/3,4,12,1,2,9,2,8,9,2,6,8,0,0,0,0/)
-    matCon1(60,:)=(/12,3,2,12,2,8,8,2,6,0,0,0,0,0,0,0/)
-    matCon1(61,:)=(/10,6,9,9,6,8,11,2,4,11,4,12,0,0,0,0/)
-    matCon1(62,:)=(/6,8,1,6,1,10,8,12,1,2,1,11,12,11,1,0/)
-    matCon1(63,:)=(/12,11,1,12,1,4,11,6,1,9,1,8,6,8,1,0/)
-    matCon1(64,:)=(/12,11,6,8,12,6,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(65,:)=(/11,7,6,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(66,:)=(/1,9,4,6,11,7,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(67,:)=(/10,1,2,6,11,7,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(68,:)=(/2,9,4,2,10,9,6,11,7,0,0,0,0,0,0,0/)
-    matCon1(69,:)=(/2,7,6,3,7,2,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(70,:)=(/2,7,6,2,3,7,4,1,9,0,0,0,0,0,0,0/)
-    matCon1(71,:)=(/10,7,6,10,1,7,1,3,7,0,0,0,0,0,0,0/)
-    matCon1(72,:)=(/6,10,9,6,9,3,6,3,7,4,3,9,0,0,0,0/)
-    matCon1(73,:)=(/3,4,12,11,7,6,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(74,:)=(/12,1,9,12,3,1,11,7,6,0,0,0,0,0,0,0/)
-    matCon1(75,:)=(/1,2,10,3,4,12,6,11,7,0,0,0,0,0,0,0/)
-    matCon1(76,:)=(/6,11,7,2,10,3,10,12,3,10,9,12,0,0,0,0/)
-    matCon1(77,:)=(/7,4,12,7,6,4,6,2,4,0,0,0,0,0,0,0/)
-    matCon1(78,:)=(/1,9,12,1,12,6,1,6,2,6,12,7,0,0,0,0/)
-    matCon1(79,:)=(/4,12,7,1,4,7,1,7,6,1,6,10,0,0,0,0/)
-    matCon1(80,:)=(/7,6,10,7,10,12,12,10,9,0,0,0,0,0,0,0/)
-    matCon1(81,:)=(/6,11,7,5,8,9,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(82,:)=(/5,4,1,5,8,4,7,6,11,0,0,0,0,0,0,0/)
-    matCon1(83,:)=(/2,10,1,6,11,7,9,5,8,0,0,0,0,0,0,0/)
-    matCon1(84,:)=(/11,7,6,2,10,8,2,8,4,8,10,5,0,0,0,0/)
-    matCon1(85,:)=(/7,2,3,7,6,2,5,8,9,0,0,0,0,0,0,0/)
-    matCon1(86,:)=(/2,3,6,6,3,7,4,1,5,4,5,8,0,0,0,0/)
-    matCon1(87,:)=(/9,5,8,10,1,6,1,7,6,1,3,7,0,0,0,0/)
-    matCon1(88,:)=(/8,4,10,8,10,5,4,3,10,6,10,7,3,7,10,0/)
-    matCon1(89,:)=(/4,12,3,8,9,5,11,7,6,0,0,0,0,0,0,0/)
-    matCon1(90,:)=(/6,11,7,5,8,3,5,3,1,3,8,12,0,0,0,0/)
-    matCon1(91,:)=(/1,2,10,5,8,9,3,4,12,6,11,7,0,0,0,0/)
-    matCon1(92,:)=(/10,3,2,10,12,3,10,5,12,8,12,5,6,11,7,0/)
-    matCon1(93,:)=(/9,5,8,4,12,6,4,6,2,6,12,7,0,0,0,0/)
-    matCon1(94,:)=(/6,2,12,6,12,7,2,1,12,8,12,5,1,5,12,0/)
-    matCon1(95,:)=(/1,6,10,1,7,6,1,4,7,12,7,4,9,5,8,0/)
-    matCon1(96,:)=(/7,6,10,7,10,12,5,8,10,8,12,10,0,0,0,0/)
-    matCon1(97,:)=(/11,5,10,7,5,11,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(98,:)=(/5,11,7,5,10,11,1,9,4,0,0,0,0,0,0,0/)
-    matCon1(99,:)=(/11,1,2,11,7,1,7,5,1,0,0,0,0,0,0,0/)
-    matCon1(100,:)=(/9,4,2,9,2,7,9,7,5,7,2,11,0,0,0,0/)
-    matCon1(101,:)=(/2,5,10,2,3,5,3,7,5,0,0,0,0,0,0,0/)
-    matCon1(102,:)=(/4,1,9,2,3,10,3,5,10,3,7,5,0,0,0,0/)
-    matCon1(103,:)=(/1,3,5,5,3,7,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(104,:)=(/9,4,3,9,3,5,5,3,7,0,0,0,0,0,0,0/)
-    matCon1(105,:)=(/11,5,10,11,7,5,12,3,4,0,0,0,0,0,0,0/)
-    matCon1(106,:)=(/1,9,3,3,9,12,5,10,11,5,11,7,0,0,0,0/)
-    matCon1(107,:)=(/4,12,3,1,2,7,1,7,5,7,2,11,0,0,0,0/)
-    matCon1(108,:)=(/7,5,2,7,2,11,5,9,2,3,2,12,9,12,2,0/)
-    matCon1(109,:)=(/10,7,5,10,4,7,10,2,4,12,7,4,0,0,0,0/)
-    matCon1(110,:)=(/9,12,2,9,2,1,12,7,2,10,2,5,7,5,2,0/)
-    matCon1(111,:)=(/4,12,7,4,7,1,1,7,5,0,0,0,0,0,0,0/)
-    matCon1(112,:)=(/7,5,9,12,7,9,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(113,:)=(/8,11,7,8,9,11,9,10,11,0,0,0,0,0,0,0/)
-    matCon1(114,:)=(/1,8,4,1,11,8,1,10,11,7,8,11,0,0,0,0/)
-    matCon1(115,:)=(/11,7,8,2,11,8,2,8,9,2,9,1,0,0,0,0/)
-    matCon1(116,:)=(/11,7,8,11,8,2,2,8,4,0,0,0,0,0,0,0/)
-    matCon1(117,:)=(/2,3,7,2,7,9,2,9,10,9,7,8,0,0,0,0/)
-    matCon1(118,:)=(/3,7,10,3,10,2,7,8,10,1,10,4,8,4,10,0/)
-    matCon1(119,:)=(/8,9,1,8,1,7,7,1,3,0,0,0,0,0,0,0/)
-    matCon1(120,:)=(/8,4,3,7,8,3,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(121,:)=(/3,4,12,11,7,9,11,9,10,9,7,8,0,0,0,0/)
-    matCon1(122,:)=(/3,1,8,3,8,12,1,10,8,7,8,11,10,11,8,0/)
-    matCon1(123,:)=(/2,9,1,2,8,9,2,11,8,7,8,11,3,4,12,0/)
-    matCon1(124,:)=(/12,3,2,12,2,8,11,7,2,7,8,2,0,0,0,0/)
-    matCon1(125,:)=(/9,10,7,9,7,8,10,2,7,12,7,4,2,4,7,0/)
-    matCon1(126,:)=(/1,10,2,12,7,8,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(127,:)=(/8,9,1,8,1,7,4,12,1,12,7,1,0,0,0,0/)
-    matCon1(128,:)=(/8,12,7,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(129,:)=(/8,7,12,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(130,:)=(/4,1,9,12,8,7,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(131,:)=(/1,2,10,12,8,7,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(132,:)=(/9,2,10,9,4,2,12,8,7,0,0,0,0,0,0,0/)
-    matCon1(133,:)=(/11,2,3,7,12,8,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(134,:)=(/2,3,11,4,1,9,7,12,8,0,0,0,0,0,0,0/)
-    matCon1(135,:)=(/3,10,1,3,11,10,7,12,8,0,0,0,0,0,0,0/)
-    matCon1(136,:)=(/7,12,8,3,11,4,11,9,4,11,10,9,0,0,0,0/)
-    matCon1(137,:)=(/8,3,4,7,3,8,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(138,:)=(/8,1,9,8,7,1,7,3,1,0,0,0,0,0,0,0/)
-    matCon1(139,:)=(/3,8,7,3,4,8,1,2,10,0,0,0,0,0,0,0/)
-    matCon1(140,:)=(/2,7,3,2,9,7,2,10,9,9,8,7,0,0,0,0/)
-    matCon1(141,:)=(/11,8,7,11,2,8,2,4,8,0,0,0,0,0,0,0/)
-    matCon1(142,:)=(/11,8,7,2,8,11,2,9,8,2,1,9,0,0,0,0/)
-    matCon1(143,:)=(/1,4,8,1,8,11,1,11,10,7,11,8,0,0,0,0/)
-    matCon1(144,:)=(/8,7,11,8,11,9,9,11,10,0,0,0,0,0,0,0/)
-    matCon1(145,:)=(/7,9,5,12,9,7,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(146,:)=(/4,7,12,4,1,7,1,5,7,0,0,0,0,0,0,0/)
-    matCon1(147,:)=(/9,7,12,9,5,7,10,1,2,0,0,0,0,0,0,0/)
-    matCon1(148,:)=(/10,5,7,10,7,4,10,4,2,12,4,7,0,0,0,0/)
-    matCon1(149,:)=(/7,9,5,7,12,9,3,11,2,0,0,0,0,0,0,0/)
-    matCon1(150,:)=(/2,3,11,4,1,12,1,7,12,1,5,7,0,0,0,0/)
-    matCon1(151,:)=(/5,12,9,5,7,12,1,3,10,3,11,10,0,0,0,0/)
-    matCon1(152,:)=(/11,10,4,11,4,3,10,5,4,12,4,7,5,7,4,0/)
-    matCon1(153,:)=(/9,3,4,9,5,3,5,7,3,0,0,0,0,0,0,0/)
-    matCon1(154,:)=(/1,5,3,5,7,3,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(155,:)=(/2,10,1,3,4,5,3,5,7,5,4,9,0,0,0,0/)
-    matCon1(156,:)=(/2,10,5,2,5,3,3,5,7,0,0,0,0,0,0,0/)
-    matCon1(157,:)=(/9,2,4,9,7,2,9,5,7,7,11,2,0,0,0,0/)
-    matCon1(158,:)=(/11,2,1,11,1,7,7,1,5,0,0,0,0,0,0,0/)
-    matCon1(159,:)=(/5,7,4,5,4,9,7,11,4,1,4,10,11,10,4,0/)
-    matCon1(160,:)=(/11,10,5,7,11,5,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(161,:)=(/5,10,6,8,7,12,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(162,:)=(/1,9,4,5,10,6,12,8,7,0,0,0,0,0,0,0/)
-    matCon1(163,:)=(/6,1,2,6,5,1,8,7,12,0,0,0,0,0,0,0/)
-    matCon1(164,:)=(/12,8,7,9,4,5,4,6,5,4,2,6,0,0,0,0/)
-    matCon1(165,:)=(/10,6,5,11,2,3,8,7,12,0,0,0,0,0,0,0/)
-    matCon1(166,:)=(/7,12,8,2,3,11,1,9,4,5,10,6,0,0,0,0/)
-    matCon1(167,:)=(/8,7,12,6,5,11,5,3,11,5,1,3,0,0,0,0/)
-    matCon1(168,:)=(/4,5,9,4,6,5,4,3,6,11,6,3,12,8,7,0/)
-    matCon1(169,:)=(/8,3,4,8,7,3,6,5,10,0,0,0,0,0,0,0/)
-    matCon1(170,:)=(/10,6,5,1,9,7,1,7,3,7,9,8,0,0,0,0/)
-    matCon1(171,:)=(/4,7,3,4,8,7,2,6,1,6,5,1,0,0,0,0/)
-    matCon1(172,:)=(/7,3,9,7,9,8,3,2,9,5,9,6,2,6,9,0/)
-    matCon1(173,:)=(/10,6,5,11,2,7,2,8,7,2,4,8,0,0,0,0/)
-    matCon1(174,:)=(/2,7,11,2,8,7,2,1,8,9,8,1,10,6,5,0/)
-    matCon1(175,:)=(/5,1,11,5,11,6,1,4,11,7,11,8,4,8,11,0/)
-    matCon1(176,:)=(/8,7,11,8,11,9,6,5,11,5,9,11,0,0,0,0/)
-    matCon1(177,:)=(/7,10,6,7,12,10,12,9,10,0,0,0,0,0,0,0/)
-    matCon1(178,:)=(/4,7,12,1,7,4,1,6,7,1,10,6,0,0,0,0/)
-    matCon1(179,:)=(/1,12,9,1,6,12,1,2,6,6,7,12,0,0,0,0/)
-    matCon1(180,:)=(/7,12,4,7,4,6,6,4,2,0,0,0,0,0,0,0/)
-    matCon1(181,:)=(/2,3,11,10,6,12,10,12,9,12,6,7,0,0,0,0/)
-    matCon1(182,:)=(/1,12,4,1,7,12,1,10,7,6,7,10,2,3,11,0/)
-    matCon1(183,:)=(/12,9,6,12,6,7,9,1,6,11,6,3,1,3,6,0/)
-    matCon1(184,:)=(/7,12,4,7,4,6,3,11,4,11,6,4,0,0,0,0/)
-    matCon1(185,:)=(/6,9,10,6,3,9,6,7,3,4,9,3,0,0,0,0/)
-    matCon1(186,:)=(/10,6,7,10,7,1,1,7,3,0,0,0,0,0,0,0/)
-    matCon1(187,:)=(/2,6,9,2,9,1,6,7,9,4,9,3,7,3,9,0/)
-    matCon1(188,:)=(/2,6,7,3,2,7,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(189,:)=(/2,4,7,2,7,11,4,9,7,6,7,10,9,10,7,0/)
-    matCon1(190,:)=(/11,2,1,11,1,7,10,6,1,6,7,1,0,0,0,0/)
-    matCon1(191,:)=(/1,4,9,6,7,11,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(192,:)=(/11,6,7,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(193,:)=(/12,6,11,8,6,12,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(194,:)=(/12,6,11,12,8,6,9,4,1,0,0,0,0,0,0,0/)
-    matCon1(195,:)=(/6,12,8,6,11,12,2,10,1,0,0,0,0,0,0,0/)
-    matCon1(196,:)=(/11,8,6,11,12,8,10,9,2,9,4,2,0,0,0,0/)
-    matCon1(197,:)=(/12,2,3,12,8,2,8,6,2,0,0,0,0,0,0,0/)
-    matCon1(198,:)=(/1,9,4,2,3,8,2,8,6,8,3,12,0,0,0,0/)
-    matCon1(199,:)=(/10,8,6,10,3,8,10,1,3,3,12,8,0,0,0,0/)
-    matCon1(200,:)=(/8,6,3,8,3,12,6,10,3,4,3,9,10,9,3,0/)
-    matCon1(201,:)=(/3,6,11,3,4,6,4,8,6,0,0,0,0,0,0,0/)
-    matCon1(202,:)=(/9,3,1,9,6,3,9,8,6,11,3,6,0,0,0,0/)
-    matCon1(203,:)=(/10,1,2,6,11,4,6,4,8,4,11,3,0,0,0,0/)
-    matCon1(204,:)=(/10,9,3,10,3,2,9,8,3,11,3,6,8,6,3,0/)
-    matCon1(205,:)=(/2,4,6,4,8,6,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(206,:)=(/1,9,8,1,8,2,2,8,6,0,0,0,0,0,0,0/)
-    matCon1(207,:)=(/10,1,4,10,4,6,6,4,8,0,0,0,0,0,0,0/)
-    matCon1(208,:)=(/10,9,8,6,10,8,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(209,:)=(/6,9,5,6,11,9,11,12,9,0,0,0,0,0,0,0/)
-    matCon1(210,:)=(/6,1,5,6,12,1,6,11,12,12,4,1,0,0,0,0/)
-    matCon1(211,:)=(/1,2,10,9,5,11,9,11,12,11,5,6,0,0,0,0/)
-    matCon1(212,:)=(/11,12,5,11,5,6,12,4,5,10,5,2,4,2,5,0/)
-    matCon1(213,:)=(/3,6,2,3,9,6,3,12,9,5,6,9,0,0,0,0/)
-    matCon1(214,:)=(/1,5,12,1,12,4,5,6,12,3,12,2,6,2,12,0/)
-    matCon1(215,:)=(/1,3,6,1,6,10,3,12,6,5,6,9,12,9,6,0/)
-    matCon1(216,:)=(/10,5,6,3,12,4,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(217,:)=(/3,6,11,4,6,3,4,5,6,4,9,5,0,0,0,0/)
-    matCon1(218,:)=(/6,11,3,6,3,5,5,3,1,0,0,0,0,0,0,0/)
-    matCon1(219,:)=(/4,11,3,4,6,11,4,9,6,5,6,9,1,2,10,0/)
-    matCon1(220,:)=(/6,11,3,6,3,5,2,10,3,10,5,3,0,0,0,0/)
-    matCon1(221,:)=(/9,5,6,9,6,4,4,6,2,0,0,0,0,0,0,0/)
-    matCon1(222,:)=(/1,5,6,2,1,6,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(223,:)=(/9,5,6,9,6,4,10,1,6,1,4,6,0,0,0,0/)
-    matCon1(224,:)=(/10,5,6,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(225,:)=(/5,12,8,5,10,12,10,11,12,0,0,0,0,0,0,0/)
-    matCon1(226,:)=(/1,9,4,5,10,8,10,12,8,10,11,12,0,0,0,0/)
-    matCon1(227,:)=(/2,11,12,2,12,5,2,5,1,8,5,12,0,0,0,0/)
-    matCon1(228,:)=(/4,2,5,4,5,9,2,11,5,8,5,12,11,12,5,0/)
-    matCon1(229,:)=(/5,12,8,10,12,5,10,3,12,10,2,3,0,0,0,0/)
-    matCon1(230,:)=(/10,8,5,10,12,8,10,2,12,3,12,2,1,9,4,0/)
-    matCon1(231,:)=(/12,8,5,12,5,3,3,5,1,0,0,0,0,0,0,0/)
-    matCon1(232,:)=(/12,8,5,12,5,3,9,4,5,4,3,5,0,0,0,0/)
-    matCon1(233,:)=(/3,10,11,3,8,10,3,4,8,8,5,10,0,0,0,0/)
-    matCon1(234,:)=(/10,11,8,10,8,5,11,3,8,9,8,1,3,1,8,0/)
-    matCon1(235,:)=(/4,8,11,4,11,3,8,5,11,2,11,1,5,1,11,0/)
-    matCon1(236,:)=(/2,11,3,9,8,5,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(237,:)=(/5,10,2,5,2,8,8,2,4,0,0,0,0,0,0,0/)
-    matCon1(238,:)=(/5,10,2,5,2,8,1,9,2,9,8,2,0,0,0,0/)
-    matCon1(239,:)=(/5,1,4,8,5,4,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(240,:)=(/5,9,8,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(241,:)=(/10,11,9,11,12,9,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(242,:)=(/4,1,10,4,10,12,12,10,11,0,0,0,0,0,0,0/)
-    matCon1(243,:)=(/1,2,11,1,11,9,9,11,12,0,0,0,0,0,0,0/)
-    matCon1(244,:)=(/4,2,11,12,4,11,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(245,:)=(/2,3,12,2,12,10,10,12,9,0,0,0,0,0,0,0/)
-    matCon1(246,:)=(/4,1,10,4,10,12,2,3,10,3,12,10,0,0,0,0/)
-    matCon1(247,:)=(/1,3,12,9,1,12,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(248,:)=(/4,3,12,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(249,:)=(/3,4,9,3,9,11,11,9,10,0,0,0,0,0,0,0/)
-    matCon1(250,:)=(/10,11,3,1,10,3,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(251,:)=(/3,4,9,3,9,11,1,2,9,2,11,9,0,0,0,0/)
-    matCon1(252,:)=(/2,11,3,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(253,:)=(/2,4,9,10,2,9,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(254,:)=(/1,10,2,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(255,:)=(/1,4,9,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-    matCon1(256,:)=(/0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0/)
-
-  end subroutine getMatCons
-
-  subroutine computeIsoVariable(solName, sps, isoVal)
-    !
-    !       computeIsoVar computes NODE centered values for the given
-    !       solName variable. It is essentially equilivent to
-    !       sotreSolInBuffer. It is assumed blockPointers are already
-    !       set to the correct block.
-    !
-    use constants
-    use blockPointers
-    use cgnsGrid
-    use cgnsNames
-    use flowVarRefState
-    use inputPhysics
-    use IOModule
-    use utils, only : setPointers, terminate
-    use flowUtils, only : computePTot
-    implicit none
-    !
-    !      Subroutine arguments.
-    character(len=*), intent(in)                   :: solName
-    integer(kind=intType), intent(in) :: sps
-    real(kind=realType), intent(in) :: isoVal
-    !
-    !      Local parameters
-    !
-    real(kind=realType), parameter :: plim   = 0.001_realType
-    real(kind=realType), parameter :: rholim = 0.001_realType
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii, jj, kk, nn
-
-    real(kind=realType) :: uuy, uuz, vvx, vvz, wwx, wwy, tmp
-    real(kind=realType) :: vortx, vorty, vortz, a2, ptotInf, ptot, uova(3), gradP(3), a
-    real(kind=realType), dimension(:, :, :), pointer :: fc, fn
-
-    do nn=1,nDom
-       call setPointers(nn, 1, sps)
-       fc => flowDoms(nn, 1, sps)%fc
-       fn => flowDoms(nn, 1, sps)%fn
-
-       select case(solName)
-
-       case (cgnsDensity)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,irho)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsMomx)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,irho)*w(i,j,k,ivx)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsMomy)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,irho)*w(i,j,k,ivy)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsMomz)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,irho)*w(i,j,k,ivz)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsEnergy)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,irhoE)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsTurbSaNu,cgnsTurbK)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,itu1)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsTurbOmega,cgnsTurbTau,cgnsTurbEpsilon)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,itu2)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsTurbV2)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,itu3)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsTurbF)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,itu4)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsVelx)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,ivx)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsVely)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,ivy)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsVelz)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,ivz)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsRelVelx)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,ivx)-s(i,j,k,1)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsRelVely)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,ivy)-s(i,j,k,2)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsRelVelz)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = w(i,j,k,ivz)-s(i,j,k,3)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsPressure)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = p(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsTemp)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = p(i,j,k)/(RGas*w(i,j,k,irho))
-                enddo
-             enddo
-          enddo
-
-       case (cgnsCp)
-          tmp = two/(gammaInf*pInf*MachCoef*MachCoef)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = tmp*(p(i,j,k) - pInf)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsMach)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   a2  = gamma(i,j,k)*max(p(i,j,k),plim) &
-                        / max(w(i,j,k,irho),rholim)
-                   tmp = (w(i,j,k,ivx)**2 + w(i,j,k,ivy)**2 &
-                        +  w(i,j,k,ivz)**2)/a2
-                   fc(i,j,k) = sqrt(max(zero,tmp))
-                enddo
-             enddo
-          enddo
-
-       case (cgnsRelMach)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   a2  = gamma(i,j,k)*max(p(i,j,k),plim) &
-                        / max(w(i,j,k,irho),rholim)
-                   tmp = ((w(i,j,k,ivx)-s(i,j,k,1))**2 +&
-                        (w(i,j,k,ivy)-s(i,j,k,2))**2 &
-                        +(w(i,j,k,ivz)-s(i,j,k,3))**2)/a2
-                   fc(i,j,k) = sqrt(max(zero,tmp))
-                enddo
-             enddo
-          enddo
-
-
-       case (cgnsMachTurb)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   tmp = w(i,j,k,irho)*w(i,j,k,itu1) &
-                        / (gamma(i,j,k)*max(p(i,j,k),plim))
-                   fc(i,j,k) = sqrt(max(zero,tmp))
-                enddo
-             enddo
-          enddo
-
-       case (cgnsEddy)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = rev(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsEddyRatio)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = rev(i,j,k)/rlv(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgNSWallDist)
-          do k=1,ke
-             kk = max(2_intType,k); kk = min(kl,kk)
-             do j=1,je
-                jj = max(2_intType,j); jj = min(jl,jj)
-                do i=1,ie
-                   ii = max(2_intType,i); ii = min(il,ii)
-                   fc(i,j,k) = d2Wall(ii,jj,kk)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsVortMagn)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   tmp = half/vol(i,j,k)
-                   uuy = si(i,  j,k,2)*w(i+1,j,k,ivx) &
-                        - si(i-1,j,k,2)*w(i-1,j,k,ivx) &
-                        + sj(i,j,  k,2)*w(i,j+1,k,ivx) &
-                        - sj(i,j-1,k,2)*w(i,j-1,k,ivx) &
-                        + sk(i,j,k,  2)*w(i,j,k+1,ivx) &
-                        - sk(i,j,k-1,2)*w(i,j,k-1,ivx)
-
-                   uuz = si(i,  j,k,3)*w(i+1,j,k,ivx) &
-                        - si(i-1,j,k,3)*w(i-1,j,k,ivx) &
-                        + sj(i,j,  k,3)*w(i,j+1,k,ivx) &
-                        - sj(i,j-1,k,3)*w(i,j-1,k,ivx) &
-                        + sk(i,j,k,  3)*w(i,j,k+1,ivx) &
-                        - sk(i,j,k-1,3)*w(i,j,k-1,ivx)
-
-                   vvx = si(i,  j,k,1)*w(i+1,j,k,ivy) &
-                        - si(i-1,j,k,1)*w(i-1,j,k,ivy) &
-                        + sj(i,j,  k,1)*w(i,j+1,k,ivy) &
-                        - sj(i,j-1,k,1)*w(i,j-1,k,ivy) &
-                        + sk(i,j,k,  1)*w(i,j,k+1,ivy) &
-                        - sk(i,j,k-1,1)*w(i,j,k-1,ivy)
-
-                   vvz = si(i,  j,k,3)*w(i+1,j,k,ivy) &
-                        - si(i-1,j,k,3)*w(i-1,j,k,ivy) &
-                        + sj(i,j,  k,3)*w(i,j+1,k,ivy) &
-                        - sj(i,j-1,k,3)*w(i,j-1,k,ivy) &
-                        + sk(i,j,k,  3)*w(i,j,k+1,ivy) &
-                        - sk(i,j,k-1,3)*w(i,j,k-1,ivy)
-
-                   wwx = si(i,  j,k,1)*w(i+1,j,k,ivz) &
-                        - si(i-1,j,k,1)*w(i-1,j,k,ivz) &
-                        + sj(i,j,  k,1)*w(i,j+1,k,ivz) &
-                        - sj(i,j-1,k,1)*w(i,j-1,k,ivz) &
-                        + sk(i,j,k,  1)*w(i,j,k+1,ivz) &
-                        - sk(i,j,k-1,1)*w(i,j,k-1,ivz)
-
-                   wwy = si(i,  j,k,2)*w(i+1,j,k,ivz) &
-                        - si(i-1,j,k,2)*w(i-1,j,k,ivz) &
-                        + sj(i,j,  k,2)*w(i,j+1,k,ivz) &
-                        - sj(i,j-1,k,2)*w(i,j-1,k,ivz) &
-                        + sk(i,j,k,  2)*w(i,j,k+1,ivz) &
-                        - sk(i,j,k-1,2)*w(i,j,k-1,ivz)
-
-                   vortx = wwy - vvz; vorty = uuz - wwx; vortz = vvx - uuy
-
-                   fc(i,j,k) = tmp*sqrt(vortx**2 + vorty**2 + vortz**2)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsVortx)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   tmp = half/vol(i,j,k)
-                   vvz = si(i,  j,k,3)*w(i+1,j,k,ivy) &
-                        - si(i-1,j,k,3)*w(i-1,j,k,ivy) &
-                        + sj(i,j,  k,3)*w(i,j+1,k,ivy) &
-                        - sj(i,j-1,k,3)*w(i,j-1,k,ivy) &
-                        + sk(i,j,k,  3)*w(i,j,k+1,ivy) &
-                        - sk(i,j,k-1,3)*w(i,j,k-1,ivy)
-
-                   wwy = si(i,  j,k,2)*w(i+1,j,k,ivz) &
-                        - si(i-1,j,k,2)*w(i-1,j,k,ivz) &
-                        + sj(i,j,  k,2)*w(i,j+1,k,ivz) &
-                        - sj(i,j-1,k,2)*w(i,j-1,k,ivz) &
-                        + sk(i,j,k,  2)*w(i,j,k+1,ivz) &
-                        - sk(i,j,k-1,2)*w(i,j,k-1,ivz)
-
-                   fc(i,j,k) = tmp*(wwy - vvz)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsVorty)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   tmp = half/vol(i,j,k)
-                   uuz = si(i,  j,k,3)*w(i+1,j,k,ivx) &
-                        - si(i-1,j,k,3)*w(i-1,j,k,ivx) &
-                        + sj(i,j,  k,3)*w(i,j+1,k,ivx) &
-                        - sj(i,j-1,k,3)*w(i,j-1,k,ivx) &
-                        + sk(i,j,k,  3)*w(i,j,k+1,ivx) &
-                        - sk(i,j,k-1,3)*w(i,j,k-1,ivx)
-
-                   wwx = si(i,  j,k,1)*w(i+1,j,k,ivz) &
-                        - si(i-1,j,k,1)*w(i-1,j,k,ivz) &
-                        + sj(i,j,  k,1)*w(i,j+1,k,ivz) &
-                        - sj(i,j-1,k,1)*w(i,j-1,k,ivz) &
-                        + sk(i,j,k,  1)*w(i,j,k+1,ivz) &
-                        - sk(i,j,k-1,1)*w(i,j,k-1,ivz)
-
-                   fc(i,j,k) = tmp*(uuz - wwx)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsVortz)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   tmp = half/vol(i,j,k)
-                   uuy = si(i,  j,k,2)*w(i+1,j,k,ivx) &
-                        - si(i-1,j,k,2)*w(i-1,j,k,ivx) &
-                        + sj(i,j,  k,2)*w(i,j+1,k,ivx) &
-                        - sj(i,j-1,k,2)*w(i,j-1,k,ivx) &
-                        + sk(i,j,k,  2)*w(i,j,k+1,ivx) &
-                        - sk(i,j,k-1,2)*w(i,j,k-1,ivx)
-
-                   vvx = si(i,  j,k,1)*w(i+1,j,k,ivy) &
-                        - si(i-1,j,k,1)*w(i-1,j,k,ivy) &
-                        + sj(i,j,  k,1)*w(i,j+1,k,ivy) &
-                        - sj(i,j-1,k,1)*w(i,j-1,k,ivy) &
-                        + sk(i,j,k,  1)*w(i,j,k+1,ivy) &
-                        - sk(i,j,k-1,1)*w(i,j,k-1,ivy)
-
-                   fc(i,j,k) = tmp*(vvx - uuy)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsPtotloss)
-
-          ! Compute the free stream total pressure.
-
-          call computePtot(rhoInf, uInf, zero, zero, &
-               pInf, ptotInf)
-          ptotInf = one/ptotInf
-
-          ! Loop over the cell centers and compute the
-          ! total pressure loss.
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   call computePtot(w(i,j,k,irho), w(i,j,k,ivx), &
-                        w(i,j,k,ivy),  w(i,j,k,ivz), &
-                        p(i,j,k),      ptot)
-
-                   fc(i,j,k) = one - ptot*ptotInf
-                enddo
-             enddo
-          enddo
-
-       case (cgnsResRho)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = dw(i,j,k,irho)/vol(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsResMomx)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = dw(i,j,k,imx)/vol(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsResMomy)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = dw(i,j,k,imy)/vol(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsResMomz)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = dw(i,j,k,imz)/vol(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsResRhoE)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = dw(i,j,k,irhoE)/vol(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsResNu,cgnsResK)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = dw(i,j,k,itu1)/vol(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsResOmega,cgnsResTau,cgnsResEpsilon)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = dw(i,j,k,itu2)/vol(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsResV2)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = dw(i,j,k,itu3)/vol(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsResF)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = dw(i,j,k,itu4)/vol(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       case (cgnsShock)
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-
-                   ! Here we compute U/a <dot> grad P / ||grad P||
-                   ! Whre U is the velocity vector, a is the speed of
-                   ! sound and P is the pressure.
-
-                   ! U / a
-                   a  = sqrt(gamma(i,j,k)*max(p(i,j,k),plim) &
-                        / max(w(i,j,k,irho),rholim))
-
-                   if (addGridVelocities) then
-                      UovA = (/w(i,j,k,ivx)-s(i,j,k,1), &
-                           w(i,j,k,ivy)-s(i,j,k,2), &
-                           w(i,j,k,ivz)-s(i,j,k,3)/)/a
-                   else
-                      UovA = (/w(i,j,k,ivx),w(i,j,k,ivy), w(i,j,k,ivz)/)/a
-                   end if
-                   ! grad P / ||grad P||
-
-                   gradP(1) = si(i,  j,k,1)*P(i+1,j,k) &
-                        - si(i-1,j,k,1)*P(i-1,j,k) &
-                        + sj(i,j,  k,1)*P(i,j+1,k) &
-                        - sj(i,j-1,k,1)*P(i,j-1,k) &
-                        + sk(i,j,k,  1)*P(i,j,k+1) &
-                        - sk(i,j,k-1,1)*P(i,j,k-1)
-
-                   gradP(2) = si(i,  j,k,2)*P(i+1,j,k) &
-                        - si(i-1,j,k,2)*P(i-1,j,k) &
-                        + sj(i,j,  k,2)*P(i,j+1,k) &
-                        - sj(i,j-1,k,2)*P(i,j-1,k) &
-                        + sk(i,j,k,  2)*P(i,j,k+1) &
-                        - sk(i,j,k-1,2)*P(i,j,k-1)
-
-                   gradP(3) = si(i,  j,k,3)*P(i+1,j,k) &
-                        - si(i-1,j,k,3)*P(i-1,j,k) &
-                        + sj(i,j,  k,3)*P(i,j+1,k) &
-                        - sj(i,j-1,k,3)*P(i,j-1,k) &
-                        + sk(i,j,k,  3)*P(i,j,k+1) &
-                        - sk(i,j,k-1,3)*P(i,j,k-1)
-
-                   ! Protect against divide by zero
-                   gradP = gradP / sqrt(gradP(1)**2 + gradP(2)**2 + gradP(3)**2 + 1e-12)
-
-                   ! Dot product
-                   fc(i,j,k) = UovA(1)*gradP(1) + UovA(2)*gradP(2) + UovA(3)*gradP(3)
+            case (cgnsEnergy)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, irhoE)
+                        end do
+                    end do
+                end do
+
+            case (cgnsTurbSaNu, cgnsTurbK)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, itu1)
+                        end do
+                    end do
+                end do
+
+            case (cgnsTurbOmega, cgnsTurbTau, cgnsTurbEpsilon)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, itu2)
+                        end do
+                    end do
+                end do
+
+            case (cgnsTurbV2)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, itu3)
+                        end do
+                    end do
+                end do
+
+            case (cgnsTurbF)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, itu4)
+                        end do
+                    end do
+                end do
+
+            case (cgnsVelx)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, ivx)
+                        end do
+                    end do
+                end do
+
+            case (cgnsVely)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, ivy)
+                        end do
+                    end do
+                end do
+
+            case (cgnsVelz)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, ivz)
+                        end do
+                    end do
+                end do
+
+            case (cgnsRelVelx)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, ivx) - s(i, j, k, 1)
+                        end do
+                    end do
+                end do
+
+            case (cgnsRelVely)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, ivy) - s(i, j, k, 2)
+                        end do
+                    end do
+                end do
+
+            case (cgnsRelVelz)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = w(i, j, k, ivz) - s(i, j, k, 3)
+                        end do
+                    end do
+                end do
+
+            case (cgnsPressure)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = p(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgnsTemp)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = p(i, j, k) / (RGas * w(i, j, k, irho))
+                        end do
+                    end do
+                end do
+
+            case (cgnsCp)
+                tmp = two / (gammaInf * pInf * MachCoef * MachCoef)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = tmp * (p(i, j, k) - pInf)
+                        end do
+                    end do
+                end do
+
+            case (cgnsMach)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            a2 = gamma(i, j, k) * max(p(i, j, k), plim) &
+                                 / max(w(i, j, k, irho), rholim)
+                            tmp = (w(i, j, k, ivx)**2 + w(i, j, k, ivy)**2 &
+                                   + w(i, j, k, ivz)**2) / a2
+                            fc(i, j, k) = sqrt(max(zero, tmp))
+                        end do
+                    end do
+                end do
+
+            case (cgnsRelMach)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            a2 = gamma(i, j, k) * max(p(i, j, k), plim) &
+                                 / max(w(i, j, k, irho), rholim)
+                            tmp = ((w(i, j, k, ivx) - s(i, j, k, 1))**2 + &
+                                   (w(i, j, k, ivy) - s(i, j, k, 2))**2 &
+                                   + (w(i, j, k, ivz) - s(i, j, k, 3))**2) / a2
+                            fc(i, j, k) = sqrt(max(zero, tmp))
+                        end do
+                    end do
+                end do
+
+            case (cgnsMachTurb)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            tmp = w(i, j, k, irho) * w(i, j, k, itu1) &
+                                  / (gamma(i, j, k) * max(p(i, j, k), plim))
+                            fc(i, j, k) = sqrt(max(zero, tmp))
+                        end do
+                    end do
+                end do
+
+            case (cgnsEddy)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = rev(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgnsEddyRatio)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = rev(i, j, k) / rlv(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgNSWallDist)
+                do k = 1, ke
+                    kk = max(2_intType, k); kk = min(kl, kk)
+                    do j = 1, je
+                        jj = max(2_intType, j); jj = min(jl, jj)
+                        do i = 1, ie
+                            ii = max(2_intType, i); ii = min(il, ii)
+                            fc(i, j, k) = d2Wall(ii, jj, kk)
+                        end do
+                    end do
+                end do
+
+            case (cgnsVortMagn)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            tmp = half / vol(i, j, k)
+                            uuy = si(i, j, k, 2) * w(i + 1, j, k, ivx) &
+                                  - si(i - 1, j, k, 2) * w(i - 1, j, k, ivx) &
+                                  + sj(i, j, k, 2) * w(i, j + 1, k, ivx) &
+                                  - sj(i, j - 1, k, 2) * w(i, j - 1, k, ivx) &
+                                  + sk(i, j, k, 2) * w(i, j, k + 1, ivx) &
+                                  - sk(i, j, k - 1, 2) * w(i, j, k - 1, ivx)
+
+                            uuz = si(i, j, k, 3) * w(i + 1, j, k, ivx) &
+                                  - si(i - 1, j, k, 3) * w(i - 1, j, k, ivx) &
+                                  + sj(i, j, k, 3) * w(i, j + 1, k, ivx) &
+                                  - sj(i, j - 1, k, 3) * w(i, j - 1, k, ivx) &
+                                  + sk(i, j, k, 3) * w(i, j, k + 1, ivx) &
+                                  - sk(i, j, k - 1, 3) * w(i, j, k - 1, ivx)
+
+                            vvx = si(i, j, k, 1) * w(i + 1, j, k, ivy) &
+                                  - si(i - 1, j, k, 1) * w(i - 1, j, k, ivy) &
+                                  + sj(i, j, k, 1) * w(i, j + 1, k, ivy) &
+                                  - sj(i, j - 1, k, 1) * w(i, j - 1, k, ivy) &
+                                  + sk(i, j, k, 1) * w(i, j, k + 1, ivy) &
+                                  - sk(i, j, k - 1, 1) * w(i, j, k - 1, ivy)
+
+                            vvz = si(i, j, k, 3) * w(i + 1, j, k, ivy) &
+                                  - si(i - 1, j, k, 3) * w(i - 1, j, k, ivy) &
+                                  + sj(i, j, k, 3) * w(i, j + 1, k, ivy) &
+                                  - sj(i, j - 1, k, 3) * w(i, j - 1, k, ivy) &
+                                  + sk(i, j, k, 3) * w(i, j, k + 1, ivy) &
+                                  - sk(i, j, k - 1, 3) * w(i, j, k - 1, ivy)
+
+                            wwx = si(i, j, k, 1) * w(i + 1, j, k, ivz) &
+                                  - si(i - 1, j, k, 1) * w(i - 1, j, k, ivz) &
+                                  + sj(i, j, k, 1) * w(i, j + 1, k, ivz) &
+                                  - sj(i, j - 1, k, 1) * w(i, j - 1, k, ivz) &
+                                  + sk(i, j, k, 1) * w(i, j, k + 1, ivz) &
+                                  - sk(i, j, k - 1, 1) * w(i, j, k - 1, ivz)
+
+                            wwy = si(i, j, k, 2) * w(i + 1, j, k, ivz) &
+                                  - si(i - 1, j, k, 2) * w(i - 1, j, k, ivz) &
+                                  + sj(i, j, k, 2) * w(i, j + 1, k, ivz) &
+                                  - sj(i, j - 1, k, 2) * w(i, j - 1, k, ivz) &
+                                  + sk(i, j, k, 2) * w(i, j, k + 1, ivz) &
+                                  - sk(i, j, k - 1, 2) * w(i, j, k - 1, ivz)
+
+                            vortx = wwy - vvz; vorty = uuz - wwx; vortz = vvx - uuy
+
+                            fc(i, j, k) = tmp * sqrt(vortx**2 + vorty**2 + vortz**2)
+                        end do
+                    end do
+                end do
+
+            case (cgnsVortx)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            tmp = half / vol(i, j, k)
+                            vvz = si(i, j, k, 3) * w(i + 1, j, k, ivy) &
+                                  - si(i - 1, j, k, 3) * w(i - 1, j, k, ivy) &
+                                  + sj(i, j, k, 3) * w(i, j + 1, k, ivy) &
+                                  - sj(i, j - 1, k, 3) * w(i, j - 1, k, ivy) &
+                                  + sk(i, j, k, 3) * w(i, j, k + 1, ivy) &
+                                  - sk(i, j, k - 1, 3) * w(i, j, k - 1, ivy)
+
+                            wwy = si(i, j, k, 2) * w(i + 1, j, k, ivz) &
+                                  - si(i - 1, j, k, 2) * w(i - 1, j, k, ivz) &
+                                  + sj(i, j, k, 2) * w(i, j + 1, k, ivz) &
+                                  - sj(i, j - 1, k, 2) * w(i, j - 1, k, ivz) &
+                                  + sk(i, j, k, 2) * w(i, j, k + 1, ivz) &
+                                  - sk(i, j, k - 1, 2) * w(i, j, k - 1, ivz)
+
+                            fc(i, j, k) = tmp * (wwy - vvz)
+                        end do
+                    end do
+                end do
+
+            case (cgnsVorty)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            tmp = half / vol(i, j, k)
+                            uuz = si(i, j, k, 3) * w(i + 1, j, k, ivx) &
+                                  - si(i - 1, j, k, 3) * w(i - 1, j, k, ivx) &
+                                  + sj(i, j, k, 3) * w(i, j + 1, k, ivx) &
+                                  - sj(i, j - 1, k, 3) * w(i, j - 1, k, ivx) &
+                                  + sk(i, j, k, 3) * w(i, j, k + 1, ivx) &
+                                  - sk(i, j, k - 1, 3) * w(i, j, k - 1, ivx)
+
+                            wwx = si(i, j, k, 1) * w(i + 1, j, k, ivz) &
+                                  - si(i - 1, j, k, 1) * w(i - 1, j, k, ivz) &
+                                  + sj(i, j, k, 1) * w(i, j + 1, k, ivz) &
+                                  - sj(i, j - 1, k, 1) * w(i, j - 1, k, ivz) &
+                                  + sk(i, j, k, 1) * w(i, j, k + 1, ivz) &
+                                  - sk(i, j, k - 1, 1) * w(i, j, k - 1, ivz)
+
+                            fc(i, j, k) = tmp * (uuz - wwx)
+                        end do
+                    end do
+                end do
+
+            case (cgnsVortz)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            tmp = half / vol(i, j, k)
+                            uuy = si(i, j, k, 2) * w(i + 1, j, k, ivx) &
+                                  - si(i - 1, j, k, 2) * w(i - 1, j, k, ivx) &
+                                  + sj(i, j, k, 2) * w(i, j + 1, k, ivx) &
+                                  - sj(i, j - 1, k, 2) * w(i, j - 1, k, ivx) &
+                                  + sk(i, j, k, 2) * w(i, j, k + 1, ivx) &
+                                  - sk(i, j, k - 1, 2) * w(i, j, k - 1, ivx)
+
+                            vvx = si(i, j, k, 1) * w(i + 1, j, k, ivy) &
+                                  - si(i - 1, j, k, 1) * w(i - 1, j, k, ivy) &
+                                  + sj(i, j, k, 1) * w(i, j + 1, k, ivy) &
+                                  - sj(i, j - 1, k, 1) * w(i, j - 1, k, ivy) &
+                                  + sk(i, j, k, 1) * w(i, j, k + 1, ivy) &
+                                  - sk(i, j, k - 1, 1) * w(i, j, k - 1, ivy)
+
+                            fc(i, j, k) = tmp * (vvx - uuy)
+                        end do
+                    end do
+                end do
+
+            case (cgnsPtotloss)
+
+                ! Compute the free stream total pressure.
+
+                call computePtot(rhoInf, uInf, zero, zero, &
+                                 pInf, ptotInf)
+                ptotInf = one / ptotInf
+
+                ! Loop over the cell centers and compute the
+                ! total pressure loss.
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            call computePtot(w(i, j, k, irho), w(i, j, k, ivx), &
+                                             w(i, j, k, ivy), w(i, j, k, ivz), &
+                                             p(i, j, k), ptot)
+
+                            fc(i, j, k) = one - ptot * ptotInf
+                        end do
+                    end do
+                end do
+
+            case (cgnsResRho)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = dw(i, j, k, irho) / vol(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgnsResMomx)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = dw(i, j, k, imx) / vol(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgnsResMomy)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = dw(i, j, k, imy) / vol(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgnsResMomz)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = dw(i, j, k, imz) / vol(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgnsResRhoE)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = dw(i, j, k, irhoE) / vol(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgnsResNu, cgnsResK)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = dw(i, j, k, itu1) / vol(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgnsResOmega, cgnsResTau, cgnsResEpsilon)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = dw(i, j, k, itu2) / vol(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgnsResV2)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = dw(i, j, k, itu3) / vol(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgnsResF)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = dw(i, j, k, itu4) / vol(i, j, k)
+                        end do
+                    end do
+                end do
+
+            case (cgnsShock)
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+
+                            ! Here we compute U/a <dot> grad P / ||grad P||
+                            ! Whre U is the velocity vector, a is the speed of
+                            ! sound and P is the pressure.
+
+                            ! U / a
+                            a = sqrt(gamma(i, j, k) * max(p(i, j, k), plim) &
+                                     / max(w(i, j, k, irho), rholim))
+
+                            if (addGridVelocities) then
+                                UovA = (/w(i, j, k, ivx) - s(i, j, k, 1), &
+                                         w(i, j, k, ivy) - s(i, j, k, 2), &
+                                         w(i, j, k, ivz) - s(i, j, k, 3)/) / a
+                            else
+                                UovA = (/w(i, j, k, ivx), w(i, j, k, ivy), w(i, j, k, ivz)/) / a
+                            end if
+                            ! grad P / ||grad P||
+
+                            gradP(1) = si(i, j, k, 1) * P(i + 1, j, k) &
+                                       - si(i - 1, j, k, 1) * P(i - 1, j, k) &
+                                       + sj(i, j, k, 1) * P(i, j + 1, k) &
+                                       - sj(i, j - 1, k, 1) * P(i, j - 1, k) &
+                                       + sk(i, j, k, 1) * P(i, j, k + 1) &
+                                       - sk(i, j, k - 1, 1) * P(i, j, k - 1)
+
+                            gradP(2) = si(i, j, k, 2) * P(i + 1, j, k) &
+                                       - si(i - 1, j, k, 2) * P(i - 1, j, k) &
+                                       + sj(i, j, k, 2) * P(i, j + 1, k) &
+                                       - sj(i, j - 1, k, 2) * P(i, j - 1, k) &
+                                       + sk(i, j, k, 2) * P(i, j, k + 1) &
+                                       - sk(i, j, k - 1, 2) * P(i, j, k - 1)
+
+                            gradP(3) = si(i, j, k, 3) * P(i + 1, j, k) &
+                                       - si(i - 1, j, k, 3) * P(i - 1, j, k) &
+                                       + sj(i, j, k, 3) * P(i, j + 1, k) &
+                                       - sj(i, j - 1, k, 3) * P(i, j - 1, k) &
+                                       + sk(i, j, k, 3) * P(i, j, k + 1) &
+                                       - sk(i, j, k - 1, 3) * P(i, j, k - 1)
+
+                            ! Protect against divide by zero
+                            gradP = gradP / sqrt(gradP(1)**2 + gradP(2)**2 + gradP(3)**2 + 1e-12)
+
+                            ! Dot product
+                            fc(i, j, k) = UovA(1) * gradP(1) + UovA(2) * gradP(2) + UovA(3) * gradP(3)
+                        end do
+                    end do
+                end do
+
+            case (cgnsBlank)
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            fc(i, j, k) = real(min(iblank(i, j, k), 1_intType), realType)
+                        end do
+                    end do
+                end do
+
+            case default
+                call terminate("computeIsoVariable", &
+                               "This should not happen")
+
+            end select
+
+            ! We now create nodal values from the cell centered
+            ! values. This was the reason for going from 1 to ie
+            ! etc.
+
+            do k = 1, kl
+                do j = 1, jl
+                    do i = 1, il
+                        fn(i, j, k) = eighth * ( &
+                                      fc(i, j, k) + &
+                                      fc(i + 1, j, k) + &
+                                      fc(i, j + 1, k) + &
+                                      fc(i + 1, j + 1, k) + &
+                                      fc(i, j, k + 1) + &
+                                      fc(i + 1, j, k + 1) + &
+                                      fc(i, j + 1, k + 1) + &
+                                      fc(i + 1, j + 1, k + 1)) - isoVal
+                    end do
                 end do
-             end do
-          end do
-
-       case (cgnsBlank)
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   fc(i,j,k) = real(min(iblank(i,j,k),1_intType),realType)
-                enddo
-             enddo
-          enddo
-
-       case default
-          call terminate("computeIsoVariable", &
-               "This should not happen")
-
-       end select
-
-       ! We now create nodal values from the cell centered
-       ! values. This was the reason for going from 1 to ie
-       ! etc.
-
-       do k=1,kl
-          do j=1,jl
-             do i=1,il
-                fn(i,j,k) = eighth*( &
-                     fc(i  , j  , k  ) + &
-                     fc(i+1, j  , k  ) + &
-                     fc(i  , j+1, k  ) + &
-                     fc(i+1, j+1, k  ) + &
-                     fc(i  , j  , k+1) + &
-                     fc(i+1, j  , k+1) + &
-                     fc(i  , j+1, k+1) + &
-                     fc(i+1, j+1, k+1)) - isoVal
-             end do
-          end do
-       end do
-    end do
-  end subroutine computeIsoVariable
+            end do
+        end do
+    end subroutine computeIsoVariable
 end module writeCGNSSurface
diff --git a/src/output/writeCGNSVolume.F90 b/src/output/writeCGNSVolume.F90
index 10a56aeb6..767e8dba8 100644
--- a/src/output/writeCGNSVolume.F90
+++ b/src/output/writeCGNSVolume.F90
@@ -2,1478 +2,1477 @@ module writeCGNSVolume
 
 contains
 
-  subroutine writeCGNSVolumeSol
-    !
-    !       writeCGNSVolumeSol and its subroutines write the cell
-    !       centered CGNS solution file(s).
-    !
-    use block
-    use cgnsGrid
-    use communication
-    use inputPhysics
-    use IOModule
-    use su_cgns
-    use outputMod
-    use inputIteration
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn
-    integer(kind=intType) :: nVolSolvar, nVolDiscrVar
-
-    character(len=maxCGNSNameLen), &
-         dimension(:), allocatable :: solNames
-
-    ! Determine the number and names of the solution files.
-    ! Also set the pointers for IOVar needed for the general
-    ! treatment of the IO.
-
-    call volSolFileNamesWrite
-
-    ! Return immediately if no volume solution files must be written.
-
-    if(nVolSolToWrite == 0) return
-
-    ! Write a message that the solution file(s) are being written.
-    ! Of course only processor 0 does this.
-
-    if(myID == 0 .and. printIterations) then
-       print "(a)", "#"
-       print "(a,a)", "# Writing volume solution file(s): ",trim(volSolFileNames(1))
-    endif
-
-    ! Open the CGNS file(s), the convergence info and if needed the
-    ! time accurate data for an unsteady computation. This is only
-    ! done by processor 0.
-
-    if(myID == 0) then
-       call openCGNSVolumeSol
-       call writeCGNSConvInfo
-
-       if(equationMode == unsteady) call writeCGNSTimeHistory
-    endif
-
-    ! Determine the number of variables to be written to the volume
-    ! solution file(s) as well as the CGNS names.
-
-    call numberOfVolSolVariables(nVolSolvar, nVolDiscrVar)
-    allocate(solNames(nVolSolvar+nVolDiscrVar), stat=ierr)
-    if(ierr /= 0)                          &
-         call terminate("writeCGNSVolumeSol", &
-         "Memory allocation failure for solNames")
-    call volSolNames(solNames)
-
-    ! Loop over the number of CGNS blocks and write the cell centered
-    ! solution(s) of this block.
-
-    do nn=1,cgnsNDom
-       call writeSolCGNSZone(nn, nVolSolvar, nVolDiscrVar, solNames)
-    enddo
-
-    ! Close the cgns file(s). Only processor 0 does this.
-
-    if(myID == 0) then
-       do nn=1,nVolSolToWrite
-          call cg_close_f(fileIDs(nn), ierr)
-          if(ierr /= CG_OK)                     &
-               call terminate("writeCGNSVolumeSol", &
-               "Something wrong when calling cg_close_f")
-       enddo
-
-    end if
-
-    ! Deallocate the memory of fileIDs and cgnsBases.  These are
-    ! allocated ALL PROCESSORS not just processor 0.
-    ! Fixed Bug: GKK
-
-    if (allocated(fileIDs)) then
-       deallocate(fileIDs, stat=ierr)
-    end if
-    if (allocated(cgnsBases)) then
-       deallocate(cgnsBases, stat=ierr)
-    end if
-
-    if(ierr /= 0)                          &
-         call terminate("writeCGNSVolumeSol", &
-         "Deallocation error for fileIDs &
-         &and cgnsBases.")
-
-    ! Deallocate the memory of solNames.
-
-    deallocate(solNames, stat=ierr)
-    if(ierr /= 0)                          &
-         call terminate("writeCGNSVolumeSol", &
-         "Deallocation error for solNames.")
-
-    ! Deallocate the memory of IOVar. Note that the first entry
-    ! is used as a temporary buffer.
-
-    do nn=1,nDom
-       deallocate(IOVar(nn,1)%w, stat=ierr)
-       if(ierr /= 0)                          &
+    subroutine writeCGNSVolumeSol
+        !
+        !       writeCGNSVolumeSol and its subroutines write the cell
+        !       centered CGNS solution file(s).
+        !
+        use block
+        use cgnsGrid
+        use communication
+        use inputPhysics
+        use IOModule
+        use su_cgns
+        use outputMod
+        use inputIteration
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn
+        integer(kind=intType) :: nVolSolvar, nVolDiscrVar
+
+        character(len=maxCGNSNameLen), &
+            dimension(:), allocatable :: solNames
+
+        ! Determine the number and names of the solution files.
+        ! Also set the pointers for IOVar needed for the general
+        ! treatment of the IO.
+
+        call volSolFileNamesWrite
+
+        ! Return immediately if no volume solution files must be written.
+
+        if (nVolSolToWrite == 0) return
+
+        ! Write a message that the solution file(s) are being written.
+        ! Of course only processor 0 does this.
+
+        if (myID == 0 .and. printIterations) then
+            print "(a)", "#"
+            print "(a,a)", "# Writing volume solution file(s): ", trim(volSolFileNames(1))
+        end if
+
+        ! Open the CGNS file(s), the convergence info and if needed the
+        ! time accurate data for an unsteady computation. This is only
+        ! done by processor 0.
+
+        if (myID == 0) then
+            call openCGNSVolumeSol
+            call writeCGNSConvInfo
+
+            if (equationMode == unsteady) call writeCGNSTimeHistory
+        end if
+
+        ! Determine the number of variables to be written to the volume
+        ! solution file(s) as well as the CGNS names.
+
+        call numberOfVolSolVariables(nVolSolvar, nVolDiscrVar)
+        allocate (solNames(nVolSolvar + nVolDiscrVar), stat=ierr)
+        if (ierr /= 0) &
             call terminate("writeCGNSVolumeSol", &
-            "Deallocation error for IOVar%w")
-    enddo
-
-    deallocate(IOVar, stat=ierr)
-    if(ierr /= 0)                          &
-         call terminate("writeCGNSVolumeSol", &
-         "Deallocation error for IOVar")
-
-    ! Wait until all processors (especially processor 0) reach
-    ! this point.
-
-    call mpi_barrier(ADflow_comm_world, ierr)
-
-    ! Write a message that the solution file has been written.
-    ! Of course only processor 0 does this.
-
-    if(myID == 0 .and. printIterations) then
-       print "(a)", "# Volume solution file(s) written"
-       print "(a)", "#"
-    endif
-  end subroutine writeCGNSVolumeSol
+                           "Memory allocation failure for solNames")
+        call volSolNames(solNames)
 
-  subroutine volSolFileNamesWrite
-    !
-    !       volSolFileNamesWrite determines the names and number of volume
-    !       solution files to be written. Furthermore it sets the pointers
-    !       and/or allocates the memory for IOVar to make a general
-    !       treatment of the writing possible.
-    !
-    use block
-    use inputIO
-    use inputPhysics
-    use inputTimeSpectral
-    use IOModule
-    use iteration
-    use monitor
-    use outputMod
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        ! Loop over the number of CGNS blocks and write the cell centered
+        ! solution(s) of this block.
 
-    integer(kind=intType) :: nn, mm, kk, nAvail
-    integer(kind=intType) :: iEnd, jEnd, kEnd
+        do nn = 1, cgnsNDom
+            call writeSolCGNSZone(nn, nVolSolvar, nVolDiscrVar, solNames)
+        end do
 
-    character(len=7) :: intString
+        ! Close the cgns file(s). Only processor 0 does this.
 
-    !       Determine the names and number of volume solution files to be
-    !       written.
-    !
-    ! Determine the situation we are having here.
+        if (myID == 0) then
+            do nn = 1, nVolSolToWrite
+                call cg_close_f(fileIDs(nn), ierr)
+                if (ierr /= CG_OK) &
+                    call terminate("writeCGNSVolumeSol", &
+                                   "Something wrong when calling cg_close_f")
+            end do
 
-    select case (equationMode)
+        end if
 
-    case (steady)
+        ! Deallocate the memory of fileIDs and cgnsBases.  These are
+        ! allocated ALL PROCESSORS not just processor 0.
+        ! Fixed Bug: GKK
 
-       ! Steady state computation. Allocate the memory for the
-       ! volume solution file name.
+        if (allocated(fileIDs)) then
+            deallocate (fileIDs, stat=ierr)
+        end if
+        if (allocated(cgnsBases)) then
+            deallocate (cgnsBases, stat=ierr)
+        end if
 
-       allocate(volSolFileNames(1), stat=ierr)
-       if(ierr /= 0)                            &
-            call terminate("volSolFileNamesWrite", &
-            "Memory allocation failure for &
-            &volSolFileNames")
+        if (ierr /= 0) &
+             call terminate("writeCGNSVolumeSol", &
+             "Deallocation error for fileIDs &
+             &and cgnsBases.")
 
-       ! Volume solution file. Always set the name.
-       ! Set nVolSolToWrite to 1 if a solution file must be written;
-       ! otherwise set it to 0.
+        ! Deallocate the memory of solNames.
 
-       volSolFileNames(1) = solFile
-       if( writeVolume ) then
-          nVolSolToWrite = 1
-       else
-          nVolSolToWrite = 0
-       endif
+        deallocate (solNames, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("writeCGNSVolumeSol", &
+                           "Deallocation error for solNames.")
 
-       !===============================================================
+        ! Deallocate the memory of IOVar. Note that the first entry
+        ! is used as a temporary buffer.
 
-    case (unsteady)
+        do nn = 1, nDom
+            deallocate (IOVar(nn, 1)%w, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("writeCGNSVolumeSol", &
+                               "Deallocation error for IOVar%w")
+        end do
 
-       ! Unsteady computation. For a consistent restart nOldLevels
-       ! solutions must be written. However, it is possible that not
-       ! that many solutions are available or that some of these
-       ! solutions have already been written in an earlier time step.
+        deallocate (IOVar, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("writeCGNSVolumeSol", &
+                           "Deallocation error for IOVar")
+
+        ! Wait until all processors (especially processor 0) reach
+        ! this point.
+
+        call mpi_barrier(ADflow_comm_world, ierr)
+
+        ! Write a message that the solution file has been written.
+        ! Of course only processor 0 does this.
+
+        if (myID == 0 .and. printIterations) then
+            print "(a)", "# Volume solution file(s) written"
+            print "(a)", "#"
+        end if
+    end subroutine writeCGNSVolumeSol
 
-       ! First determine the number of available solutions.
+    subroutine volSolFileNamesWrite
+        !
+        !       volSolFileNamesWrite determines the names and number of volume
+        !       solution files to be written. Furthermore it sets the pointers
+        !       and/or allocates the memory for IOVar to make a general
+        !       treatment of the writing possible.
+        !
+        use block
+        use inputIO
+        use inputPhysics
+        use inputTimeSpectral
+        use IOModule
+        use iteration
+        use monitor
+        use outputMod
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-       nAvail = timeStepUnsteady + nTimeStepsRestart + 1
-       nAvail = min(nAvail,nOldLevels)
+        integer(kind=intType) :: nn, mm, kk, nAvail
+        integer(kind=intType) :: iEnd, jEnd, kEnd
 
-       ! Allocate the memory for the file names. Note that this is
-       ! an upper boundary. It is possible that less files need
-       ! to be written.
+        character(len=7) :: intString
 
-       allocate(volSolFileNames(nAvail), stat=ierr)
-       if(ierr /= 0)                            &
-            call terminate("volSolFileNamesWrite", &
-            "Memory allocation failure for &
-            &volSolFileNames")
+        !       Determine the names and number of volume solution files to be
+        !       written.
+        !
+        ! Determine the situation we are having here.
 
-       ! Set the names of the files.
+        select case (equationMode)
 
-       do nn=1,nAvail
-          write(intString,"(i7)") timeStepUnsteady + &
-               nTimeStepsRestart + 1 - nn
-          intString = adjustl(intString)
+        case (steady)
 
-          volSolFileNames(nn)  = trim(solfile)//"&
-               &Timestep"//trim(intString)
-       enddo
+            ! Steady state computation. Allocate the memory for the
+            ! volume solution file name.
 
-       ! Determine the number of volume solution files to write.
-
-       if( writeVolume ) then
-
-          ! Initialize nVolSolToWrite to 1.
-
-          nVolSolToWrite = 1
+            allocate (volSolFileNames(1), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("volSolFileNamesWrite", &
+                 "Memory allocation failure for &
+                 &volSolFileNames")
 
-          ! Loop over the older levels and check if some of
-          ! them must be written as well.
+            ! Volume solution file. Always set the name.
+            ! Set nVolSolToWrite to 1 if a solution file must be written;
+            ! otherwise set it to 0.
 
-          do nn=1,(nAvail-1)
-             if(.not. oldSolWritten(nn) ) then
-                nVolSolToWrite = nVolSolToWrite + 1
-                volSolFileNames(nVolSolToWrite) = volSolFileNames(nn+1)
-             endif
-          enddo
+            volSolFileNames(1) = solFile
+            if (writeVolume) then
+                nVolSolToWrite = 1
+            else
+                nVolSolToWrite = 0
+            end if
 
-       else
+            !===============================================================
 
-          ! No volume solution files need to be written.
+        case (unsteady)
 
-          nVolSolToWrite = 0
+            ! Unsteady computation. For a consistent restart nOldLevels
+            ! solutions must be written. However, it is possible that not
+            ! that many solutions are available or that some of these
+            ! solutions have already been written in an earlier time step.
 
-       endif
+            ! First determine the number of available solutions.
 
-       !===============================================================
+            nAvail = timeStepUnsteady + nTimeStepsRestart + 1
+            nAvail = min(nAvail, nOldLevels)
 
-    case (timeSpectral)
+            ! Allocate the memory for the file names. Note that this is
+            ! an upper boundary. It is possible that less files need
+            ! to be written.
 
-       ! Time spectral computation. Allocate the file names.
+            allocate (volSolFileNames(nAvail), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("volSolFileNamesWrite", &
+                 "Memory allocation failure for &
+                 &volSolFileNames")
 
-       allocate(volSolFileNames(nTimeIntervalsSpectral), stat=ierr)
-       if(ierr /= 0)                            &
-            call terminate("volSolFileNamesWrite", &
-            "Memory allocation failure for &
-            &volSolFileNames")
+            ! Set the names of the files.
 
-       ! Set the names of the files.
+            do nn = 1, nAvail
+                write (intString, "(i7)") timeStepUnsteady + &
+                    nTimeStepsRestart + 1 - nn
+                intString = adjustl(intString)
 
-       do nn=1,nTimeIntervalsSpectral
-          write(intString,"(i7)") nn
-          intString = adjustl(intString)
+                volSolFileNames(nn) = trim(solfile)//"&
+                     &Timestep"//trim(intString)
+            end do
 
-          volSolFileNames(nn)  = trim(solfile)//"&
-               &Spectral"//trim(intString)
-       enddo
+            ! Determine the number of volume solution files to write.
+
+            if (writeVolume) then
+
+                ! Initialize nVolSolToWrite to 1.
+
+                nVolSolToWrite = 1
 
-       ! Set the number of volume solution files to write.
-       ! Either they are written or they are not written.
+                ! Loop over the older levels and check if some of
+                ! them must be written as well.
 
-       if( writeVolume ) then
-          nVolSolToWrite = nTimeIntervalsSpectral
-       else
-          nVolSolToWrite = 0
-       endif
+                do nn = 1, (nAvail - 1)
+                    if (.not. oldSolWritten(nn)) then
+                        nVolSolToWrite = nVolSolToWrite + 1
+                        volSolFileNames(nVolSolToWrite) = volSolFileNames(nn + 1)
+                    end if
+                end do
 
-    end select
-    !
-    !       Set the pointers for IOVar if volume solution files need to be
-    !       written.
-    !
-    testSolsToWrite: if(nVolSolToWrite > 0) then
+            else
 
-       ! Allocate the memory for IOVar.
+                ! No volume solution files need to be written.
 
-       allocate(IOVar(nDom,nVolSolToWrite), stat=ierr)
-       if(ierr /= 0)                            &
-            call terminate("volSolFileNamesWrite", &
-            "Memory allocation failure for IOVar")
-
-       ! As the writing normally involves other variables than just
-       ! the primitive ones, memory for the member variable w must be
-       ! allocated to make the general IO treatment possible. This is
-       ! a bit of an overhead, but that's a small price to pay for the
-       ! general treatment.
-
-       if( storeRindLayer ) then
-          do nn=1,nDom
-             IOVar(nn,1)%pointerOffset = 0
-
-             iEnd = flowDoms(nn,1,1)%ie
-             jEnd = flowDoms(nn,1,1)%je
-             kEnd = flowDoms(nn,1,1)%ke
-
-             allocate(IOVar(nn,1)%w(iEnd,jEnd,kEnd,1), stat=ierr)
-             if(ierr /= 0)                            &
-                  call terminate("volSolFileNamesWrite", &
-                  "Memory allocation failure for IOVar%w")
-          enddo
-       else
-          do nn=1,nDom
-             IOVar(nn,1)%pointerOffset = 0
-
-             iEnd = flowDoms(nn,1,1)%il
-             jEnd = flowDoms(nn,1,1)%jl
-             kEnd = flowDoms(nn,1,1)%kl
+                nVolSolToWrite = 0
 
-             allocate(IOVar(nn,1)%w(2:iEnd,2:jEnd,2:kEnd,1), stat=ierr)
-             if(ierr /= 0)                            &
-                  call terminate("volSolFileNamesWrite", &
-                  "Memory allocation failure for IOVar%w")
-          enddo
-       endif
-
-       ! Set the pointers for the other solutions depending on the
-       ! situation.
-
-       select case(equationMode)
-
-       case (steady, timeSpectral)
-
-          ! Actually only time spectral mode, but steady is added to
-          ! avoid a compiler warning. Set the pointers for the higher
-          ! spectral solution to the first solution.
-
-          do mm=2,nVolSolToWrite
-             do nn=1,nDom
-                IOVar(nn,mm)%pointerOffset = 0
-                IOVar(nn,mm)%w => IOVar(nn,1)%w
-             enddo
-          enddo
-
-          !=============================================================
-
-       case (unsteady)
-
-          ! It is possible that for an unsteady computation previous
-          ! solutions need to be written. However only the variables
-          ! wOld need to be written, so the pointer can be set to the
-          ! correct entries. As the starting indices of wOld are 2,
-          ! a pointer shift takes place here. I know this is a pain
-          ! in the butt, but that's what we have to live with.
-
-          kk = 1
-          do mm=1,(nAvail-1)
-             if(.not. oldSolWritten(mm) ) then
-                kk = kk + 1
-                do nn=1,nDom
-                   IOVar(nn,kk)%pointerOffset = -1
-                   IOVar(nn,kk)%w => flowDoms(nn,1,1)%wOld(mm,2:,2:,2:,:)
-                enddo
-             endif
-          enddo
-
-       end select
-
-    endif testSolsToWrite
-
-  end subroutine volSolFileNamesWrite
-
-  subroutine openCGNSVolumeSol
-    !
-    !       openCGNSVolumeSol opens the cgns solution file(s) if needed.
-    !       If opened the files are opened either for writing or for
-    !       modification. When the grid file(s) have been written, these
-    !       files are still open and nothing needs to be done.
-    !       Only processor 0 performs this task.
-    !
-    use cgnsGrid
-    use monitor
-    use su_cgns
-    use outputMod
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn
-
-    character(len=maxStringLen) :: errorMessage
-
-    ! Check if grid files have been written and if the solution must
-    ! be stored in the same file. If so only the CGNS header must
-    ! be written. Do this and return immediately.
-
-    if(writeGrid .and. (.not. useLinksInCGNS)) then
-       do nn=1,nVolSolToWrite
-          call writeCGNSHeader(fileIDs(nn), cgnsBases(nn))
-       enddo
-
-       return
-    endif
-
-    ! Solution files must be created. Allocate the memory for
-    ! the fileIDs and the bases.
-
-    allocate(fileIDs(nVolSolToWrite), cgnsBases(nVolSolToWrite), &
-         stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("openCGNSVolumeSol", &
-         "Memory allocation failure for fileIDs and &
-         &cgnsBases")
-
-    ! Determine the situation we are having here.
-
-    testForLinks: if( useLinksInCGNS ) then
-
-       ! Links are used to the coordinates of the zones. This means
-       ! that the files must be opened in write mode.
-
-       do nn=1,nVolSolToWrite
-          call cg_open_f(volSolFileNames(nn), mode_write, &
-               fileIDs(nn), ierr)
-          if(ierr /= CG_OK) then
-             write(errorMessage,*) "File ", trim(volSolFileNames(nn)), &
-                  " could not be opened by CGNS&
-                  & for writing"
-             call terminate("openCGNSVolumeSol", errorMessage)
-          endif
-
-          ! Create the base.
-
-          call cg_base_write_f(fileIDs(nn), cgnsBaseName, cgnsCelldim, &
-               cgnsPhysdim, cgnsBases(nn), ierr)
-          if(ierr /= CG_OK)                    &
-               call terminate("openCGNSVolumeSol", &
-               "Something wrong when calling &
-               &cg_base_write_f")
-       enddo
+            end if
 
-    else testForLinks
+            !===============================================================
 
-       ! Solutions must be written in the same file(s) as the grid.
-       ! As the grid file(s) are not written during the current call
-       ! to writeSol, these are old files and must therefore be opened
-       ! in modify mode.
-
-       do nn=1,nVolSolToWrite
-          call cg_open_f(volSolFileNames(nn), mode_modify, &
-               fileIDs(nn), ierr)
-          if(ierr /= CG_OK) then
-             write(errorMessage,*) "File ", trim(volSolFileNames(nn)), &
-                  " could not be opened by CGNS&
-                  & for writing"
-             call terminate("openCGNSVolumeSol", errorMessage)
-          endif
+        case (timeSpectral)
 
-          ! Simply set the base IDs to 1.
+            ! Time spectral computation. Allocate the file names.
 
-          cgnsBases(nn) = 1
-       enddo
+            allocate (volSolFileNames(nTimeIntervalsSpectral), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("volSolFileNamesWrite", &
+                 "Memory allocation failure for &
+                 &volSolFileNames")
 
-    endif testForLinks
+            ! Set the names of the files.
 
-    ! Write the CGNS header.
+            do nn = 1, nTimeIntervalsSpectral
+                write (intString, "(i7)") nn
+                intString = adjustl(intString)
 
-    do nn=1,nVolSolToWrite
-       call writeCGNSHeader(fileIDs(nn), cgnsBases(nn))
-    enddo
-  end subroutine openCGNSVolumeSol
+                volSolFileNames(nn) = trim(solfile)//"&
+                     &Spectral"//trim(intString)
+            end do
 
-  subroutine writeCGNSConvInfo
-    !
-    !       writeCGNSConvInfo writes the convergence info to the
-    !       cgns file(s).
-    !
-    use inputIO
-    use inputPhysics
-    use monitor, only : nMon, monNames, convArray
-    use iteration, only : iterTot
-    use su_cgns
-    use outputMod
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: nConv, base, cgnsInd, conv
-    integer :: i, nn, mm, ierr, realTypeCGNS
+            ! Set the number of volume solution files to write.
+            ! Either they are written or they are not written.
 
-    real(kind=8), dimension(:), allocatable :: buf8
+            if (writeVolume) then
+                nVolSolToWrite = nTimeIntervalsSpectral
+            else
+                nVolSolToWrite = 0
+            end if
 
-    ! Return immediately if the convergence history (of the inner
-    ! iterations) does not need to be stored. This logical can
-    ! only be .false. for an unsteady computation.
+        end select
+        !
+        !       Set the pointers for IOVar if volume solution files need to be
+        !       written.
+        !
+        testSolsToWrite: if (nVolSolToWrite > 0) then
 
-    if(.not. storeConvInnerIter) return
+            ! Allocate the memory for IOVar.
 
-    ! Store the number of iterations to be written in nn.
-    ! This is iterTot + 1, because the array starts at 0.
+            allocate (IOVar(nDom, nVolSolToWrite), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("volSolFileNamesWrite", &
+                               "Memory allocation failure for IOVar")
+
+            ! As the writing normally involves other variables than just
+            ! the primitive ones, memory for the member variable w must be
+            ! allocated to make the general IO treatment possible. This is
+            ! a bit of an overhead, but that's a small price to pay for the
+            ! general treatment.
+
+            if (storeRindLayer) then
+                do nn = 1, nDom
+                    IOVar(nn, 1)%pointerOffset = 0
+
+                    iEnd = flowDoms(nn, 1, 1)%ie
+                    jEnd = flowDoms(nn, 1, 1)%je
+                    kEnd = flowDoms(nn, 1, 1)%ke
+
+                    allocate (IOVar(nn, 1)%w(iEnd, jEnd, kEnd, 1), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("volSolFileNamesWrite", &
+                                       "Memory allocation failure for IOVar%w")
+                end do
+            else
+                do nn = 1, nDom
+                    IOVar(nn, 1)%pointerOffset = 0
+
+                    iEnd = flowDoms(nn, 1, 1)%il
+                    jEnd = flowDoms(nn, 1, 1)%jl
+                    kEnd = flowDoms(nn, 1, 1)%kl
 
-    nn = iterTot + 1
+                    allocate (IOVar(nn, 1)%w(2:iEnd, 2:jEnd, 2:kEnd, 1), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("volSolFileNamesWrite", &
+                                       "Memory allocation failure for IOVar%w")
+                end do
+            end if
+
+            ! Set the pointers for the other solutions depending on the
+            ! situation.
+
+            select case (equationMode)
+
+            case (steady, timeSpectral)
+
+                ! Actually only time spectral mode, but steady is added to
+                ! avoid a compiler warning. Set the pointers for the higher
+                ! spectral solution to the first solution.
+
+                do mm = 2, nVolSolToWrite
+                    do nn = 1, nDom
+                        IOVar(nn, mm)%pointerOffset = 0
+                        IOVar(nn, mm)%w => IOVar(nn, 1)%w
+                    end do
+                end do
+
+                !=============================================================
+
+            case (unsteady)
+
+                ! It is possible that for an unsteady computation previous
+                ! solutions need to be written. However only the variables
+                ! wOld need to be written, so the pointer can be set to the
+                ! correct entries. As the starting indices of wOld are 2,
+                ! a pointer shift takes place here. I know this is a pain
+                ! in the butt, but that's what we have to live with.
+
+                kk = 1
+                do mm = 1, (nAvail - 1)
+                    if (.not. oldSolWritten(mm)) then
+                        kk = kk + 1
+                        do nn = 1, nDom
+                            IOVar(nn, kk)%pointerOffset = -1
+                            IOVar(nn, kk)%w => flowDoms(nn, 1, 1)%wOld(mm, 2:, 2:, 2:, :)
+                        end do
+                    end if
+                end do
+
+            end select
+
+        end if testSolsToWrite
+
+    end subroutine volSolFileNamesWrite
+
+    subroutine openCGNSVolumeSol
+        !
+        !       openCGNSVolumeSol opens the cgns solution file(s) if needed.
+        !       If opened the files are opened either for writing or for
+        !       modification. When the grid file(s) have been written, these
+        !       files are still open and nothing needs to be done.
+        !       Only processor 0 performs this task.
+        !
+        use cgnsGrid
+        use monitor
+        use su_cgns
+        use outputMod
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn
+
+        character(len=maxStringLen) :: errorMessage
+
+        ! Check if grid files have been written and if the solution must
+        ! be stored in the same file. If so only the CGNS header must
+        ! be written. Do this and return immediately.
+
+        if (writeGrid .and. (.not. useLinksInCGNS)) then
+            do nn = 1, nVolSolToWrite
+                call writeCGNSHeader(fileIDs(nn), cgnsBases(nn))
+            end do
+
+            return
+        end if
+
+        ! Solution files must be created. Allocate the memory for
+        ! the fileIDs and the bases.
+
+        allocate (fileIDs(nVolSolToWrite), cgnsBases(nVolSolToWrite), &
+                  stat=ierr)
+        if (ierr /= 0) &
+             call terminate("openCGNSVolumeSol", &
+             "Memory allocation failure for fileIDs and &
+             &cgnsBases")
+
+        ! Determine the situation we are having here.
+
+        testForLinks: if (useLinksInCGNS) then
+
+            ! Links are used to the coordinates of the zones. This means
+            ! that the files must be opened in write mode.
+
+            do nn = 1, nVolSolToWrite
+                call cg_open_f(volSolFileNames(nn), mode_write, &
+                               fileIDs(nn), ierr)
+                if (ierr /= CG_OK) then
+                    write (errorMessage, *) "File ", trim(volSolFileNames(nn)), &
+                         " could not be opened by CGNS&
+                         & for writing"
+                    call terminate("openCGNSVolumeSol", errorMessage)
+                end if
+
+                ! Create the base.
+
+                call cg_base_write_f(fileIDs(nn), cgnsBaseName, cgnsCelldim, &
+                                     cgnsPhysdim, cgnsBases(nn), ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("openCGNSVolumeSol", &
+                     "Something wrong when calling &
+                     &cg_base_write_f")
+            end do
 
-    ! Depending on the input option, set the CGNS type and allocate
-    ! the memory for either buf4 or buf8.
+            else testForLinks
 
-    ! Always write the cgnsConvergence history to be double since
-    ! that's what Tecplot needs
+            ! Solutions must be written in the same file(s) as the grid.
+            ! As the grid file(s) are not written during the current call
+            ! to writeSol, these are old files and must therefore be opened
+            ! in modify mode.
 
-    realTypeCGNS = RealDouble
-    allocate(buf8(0:iterTot), stat=ierr)
+            do nn = 1, nVolSolToWrite
+                call cg_open_f(volSolFileNames(nn), mode_modify, &
+                               fileIDs(nn), ierr)
+                if (ierr /= CG_OK) then
+                    write (errorMessage, *) "File ", trim(volSolFileNames(nn)), &
+                         " could not be opened by CGNS&
+                         & for writing"
+                    call terminate("openCGNSVolumeSol", errorMessage)
+                end if
 
+                ! Simply set the base IDs to 1.
 
-    if(ierr /= 0)                         &
-         call terminate("writeCGNSConvInfo", &
-         "Memory allocation failure for either buf4 &
-         &or buf8")
+                cgnsBases(nn) = 1
+            end do
 
-    ! Determine the number of convergence histories to be written.
-    ! This depends on the equation mode.
+        end if testForLinks
 
-    select case (equationMode)
+        ! Write the CGNS header.
 
-    case (steady,unsteady)
-       nConv = 1
+        do nn = 1, nVolSolToWrite
+            call writeCGNSHeader(fileIDs(nn), cgnsBases(nn))
+        end do
+    end subroutine openCGNSVolumeSol
 
-    case (timeSpectral)
-       nConv = nVolSolToWrite    ! == number of spectral solutions.
+    subroutine writeCGNSConvInfo
+        !
+        !       writeCGNSConvInfo writes the convergence info to the
+        !       cgns file(s).
+        !
+        use inputIO
+        use inputPhysics
+        use monitor, only: nMon, monNames, convArray
+        use iteration, only: iterTot
+        use su_cgns
+        use outputMod
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: nConv, base, cgnsInd, conv
+        integer :: i, nn, mm, ierr, realTypeCGNS
 
-    end select
+        real(kind=8), dimension(:), allocatable :: buf8
 
-    ! Loop over the number of convergence histories to be written.
+        ! Return immediately if the convergence history (of the inner
+        ! iterations) does not need to be stored. This logical can
+        ! only be .false. for an unsteady computation.
 
-    convLoop: do conv=1,nConv
+        if (.not. storeConvInnerIter) return
 
-       ! Abbreviate the corresponding file and base a bit easier.
+        ! Store the number of iterations to be written in nn.
+        ! This is iterTot + 1, because the array starts at 0.
 
-       cgnsInd = fileIDs(conv)
-       base    = cgnsBases(conv)
+        nn = iterTot + 1
 
-       ! Go to the correct position in the CGNS file.
+        ! Depending on the input option, set the CGNS type and allocate
+        ! the memory for either buf4 or buf8.
 
-       call cg_goto_f(cgnsInd, base, ierr, "end")
-       if(ierr /= CG_OK)                    &
-            call terminate("writeCGNSConvInfo", &
-            "Something wrong when calling cg_goto_f")
+        ! Always write the cgnsConvergence history to be double since
+        ! that's what Tecplot needs
 
-       ! Create the convergence history node. Add a small description.
+        realTypeCGNS = RealDouble
+        allocate (buf8(0:iterTot), stat=ierr)
 
-       call cg_convergence_write_f(nn,"L2 norms are computed by taking &
-            &the square root of the quotient &
-            &the sum of the square of the &
-            &residuals and the total number &
-            &of cells in the grid.", ierr)
-       if(ierr /= CG_OK)                    &
-            call terminate("writeCGNSConvInfo", &
-            "Something wrong when calling &
-            &cg_convergence_write_f")
+        if (ierr /= 0) &
+             call terminate("writeCGNSConvInfo", &
+             "Memory allocation failure for either buf4 &
+             &or buf8")
 
-       ! The convergence history must be written under the node just
-       ! created. Go there.
+        ! Determine the number of convergence histories to be written.
+        ! This depends on the equation mode.
 
-       call cg_goto_f(cgnsInd, base, ierr, &
-            "ConvergenceHistory_t", 1, "end")
-       if(ierr /= CG_OK)                    &
-            call terminate("writeCGNSConvInfo", &
-            "Something wrong when calling cg_goto_f")
+        select case (equationMode)
 
-       ! Loop over the number of monitoring variables.
+        case (steady, unsteady)
+            nConv = 1
 
-       monLoop: do i=1,nMon
+        case (timeSpectral)
+            nConv = nVolSolToWrite    ! == number of spectral solutions.
 
-          ! Copy the convergence info to either buf4 or buf8 and write
-          ! it to file.
-          do mm=0,iterTot
-             buf8(mm) = convArray(mm,conv,i)
-          enddo
+        end select
 
-          call cg_array_write_f(monNames(i), realTypeCGNS, 1, int(nn, cgsize_t), &
-               buf8, ierr)
-          if(ierr /= CG_OK)                    &
-               call terminate("writeCGNSConvInfo", &
-               "Something wrong when calling &
-               &cg_array_write_f")
-       enddo monLoop
-    enddo convLoop
+        ! Loop over the number of convergence histories to be written.
 
-    ! Release the memory of buf8.
+        convLoop: do conv = 1, nConv
 
-    deallocate(buf8, stat=ierr)
+            ! Abbreviate the corresponding file and base a bit easier.
 
-    if(ierr /= 0)                         &
-         call terminate("writeCGNSConvInfo", &
-         "Deallocation failure for either buf4 or buf8")
-  end subroutine writeCGNSConvInfo
+            cgnsInd = fileIDs(conv)
+            base = cgnsBases(conv)
 
-  subroutine writeCGNSTimeHistory
-    !
-    !       WriteCGNSTimeHistory writes for unsteady computations
-    !       the time history of the monitoring variables to the
-    !       cgns file.
-    !
-    use cgnsNames
-    use inputIO
-    use monitor
-    use su_cgns
-    use outputMod
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: i, nn, mm, cgnsInd, base, ierr, realTypeCGNS
+            ! Go to the correct position in the CGNS file.
 
-    real(kind=4), dimension(:), allocatable :: buf4
-    real(kind=8), dimension(:), allocatable :: buf8
+            call cg_goto_f(cgnsInd, base, ierr, "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeCGNSConvInfo", &
+                               "Something wrong when calling cg_goto_f")
 
-    character(len=maxCGNSNameLen) :: cgnsName
+            ! Create the convergence history node. Add a small description.
 
-    ! Store the file index and base a bit easier. Note that this info
-    ! is only written to the 1st file, because this is an unsteady
-    ! computation.
+            call cg_convergence_write_f(nn, "L2 norms are computed by taking &
+                 &the square root of the quotient &
+                 &the sum of the square of the &
+                 &residuals and the total number &
+                 &of cells in the grid.", ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSConvInfo", &
+                 "Something wrong when calling &
+                 &cg_convergence_write_f")
 
-    cgnsInd = fileIDs(1)
-    base    = cgnsBases(1)
+            ! The convergence history must be written under the node just
+            ! created. Go there.
 
-    ! Store the total number of time steps in nn.
+            call cg_goto_f(cgnsInd, base, ierr, &
+                           "ConvergenceHistory_t", 1, "end")
+            if (ierr /= CG_OK) &
+                call terminate("writeCGNSConvInfo", &
+                               "Something wrong when calling cg_goto_f")
 
-    nn = timeStepUnsteady + nTimeStepsRestart
+            ! Loop over the number of monitoring variables.
 
-    ! Depending on the input option, set the CGNS type and allocate
-    ! the memory for either buf4 or buf8.
+            monLoop: do i = 1, nMon
 
-    ! Set the cgns real type depending on the input option.
+                ! Copy the convergence info to either buf4 or buf8 and write
+                ! it to file.
+                do mm = 0, iterTot
+                    buf8(mm) = convArray(mm, conv, i)
+                end do
 
-    select case (precisionSol)
-    case (precisionSingle)
-       realTypeCGNS = RealSingle
-       allocate(buf4(nn), stat=ierr)
+                call cg_array_write_f(monNames(i), realTypeCGNS, 1, int(nn, cgsize_t), &
+                                      buf8, ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeCGNSConvInfo", &
+                     "Something wrong when calling &
+                     &cg_array_write_f")
+            end do monLoop
+        end do convLoop
 
-       !===============================================================
+        ! Release the memory of buf8.
 
-    case (precisionDouble)
-       realTypeCGNS = RealDouble
-       allocate(buf8(nn), stat=ierr)
-    end select
+        deallocate (buf8, stat=ierr)
 
-    if(ierr /= 0)                            &
-         call terminate("writeCGNSTimeHistory", &
-         "Memory allocation failure for either buf4 &
-         &or buf8")
+        if (ierr /= 0) &
+            call terminate("writeCGNSConvInfo", &
+                           "Deallocation failure for either buf4 or buf8")
+    end subroutine writeCGNSConvInfo
 
-    ! Go to the correct position in the cgns file.
+    subroutine writeCGNSTimeHistory
+        !
+        !       WriteCGNSTimeHistory writes for unsteady computations
+        !       the time history of the monitoring variables to the
+        !       cgns file.
+        !
+        use cgnsNames
+        use inputIO
+        use monitor
+        use su_cgns
+        use outputMod
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: i, nn, mm, cgnsInd, base, ierr, realTypeCGNS
 
-    call cg_goto_f(cgnsInd, base, ierr, "end")
-    if(ierr /= CG_OK)                       &
-         call terminate("writeCGNSTimeHistory", &
-         "Something wrong when calling cg_goto_f")
+        real(kind=4), dimension(:), allocatable :: buf4
+        real(kind=8), dimension(:), allocatable :: buf8
 
-    ! Create the name of the base iterative data node.
+        character(len=maxCGNSNameLen) :: cgnsName
 
-    cgnsName = "TimeHistory"
+        ! Store the file index and base a bit easier. Note that this info
+        ! is only written to the 1st file, because this is an unsteady
+        ! computation.
 
-    ! Create the base iterative node and check if everything
-    ! went okay.
+        cgnsInd = fileIDs(1)
+        base = cgnsBases(1)
 
-    call cg_biter_write_f(cgnsInd, base, cgnsName, nn, ierr)
-    if(ierr /= CG_OK)                       &
-         call terminate("writeCGNSTimeHistory", &
-         "Something wrong when calling cg_biter_write_f")
+        ! Store the total number of time steps in nn.
 
-    ! The time history must be written under the node just created.
-    ! Go there.
+        nn = timeStepUnsteady + nTimeStepsRestart
 
-    call cg_goto_f(cgnsInd, base, ierr, &
-         "BaseIterativeData_t" , 1, "end")
-    if(ierr /= CG_OK)                          &
-         call terminate("writeCGNSTimeHistory", &
-         "Something wrong when calling cg_goto_f")
+        ! Depending on the input option, set the CGNS type and allocate
+        ! the memory for either buf4 or buf8.
 
-    ! Write the time values.
+        ! Set the cgns real type depending on the input option.
 
-    cgnsName = cgnsTimeValue
-    call cg_array_write_f(cgnsName, realTypeCGNS, 1, int(nn, cgsize_t), &
-         timeArray, ierr)
-    if(ierr /= CG_OK)                       &
-         call terminate("writeCGNSTimeHistory", &
-         "Something wrong when calling cg_array_write_f")
+        select case (precisionSol)
+        case (precisionSingle)
+            realTypeCGNS = RealSingle
+            allocate (buf4(nn), stat=ierr)
 
-    ! Loop over the number of monitoring variables and write
-    ! their time history.
+            !===============================================================
 
-    monLoop: do i=1,nMon
+        case (precisionDouble)
+            realTypeCGNS = RealDouble
+            allocate (buf8(nn), stat=ierr)
+        end select
 
-       ! Copy the time history to either buf4 or buf8 and write it
-       ! to file.
+        if (ierr /= 0) &
+             call terminate("writeCGNSTimeHistory", &
+             "Memory allocation failure for either buf4 &
+             &or buf8")
 
-       select case (precisionSol)
-       case (precisionSingle)
-          do mm=1,nn
-             buf4(mm) = timeDataArray(mm,i)
-          enddo
+        ! Go to the correct position in the cgns file.
 
-          call cg_array_write_f(monNames(i), realTypeCGNS, 1, int(nn, cgsize_t), &
-               buf4, ierr)
+        call cg_goto_f(cgnsInd, base, ierr, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSTimeHistory", &
+                           "Something wrong when calling cg_goto_f")
 
-          !=============================================================
+        ! Create the name of the base iterative data node.
 
-       case (precisionDouble)
-          do mm=1,nn
-             buf8(mm) = timeDataArray(mm,i)
-          enddo
+        cgnsName = "TimeHistory"
 
-          call cg_array_write_f(monNames(i), realTypeCGNS, 1, int(nn, cgsize_t), &
-               buf8, ierr)
-       end select
+        ! Create the base iterative node and check if everything
+        ! went okay.
 
-       if(ierr /= CG_OK)                       &
+        call cg_biter_write_f(cgnsInd, base, cgnsName, nn, ierr)
+        if (ierr /= CG_OK) &
             call terminate("writeCGNSTimeHistory", &
-            "Something wrong when calling &
-            &cg_array_write_f")
-    enddo monLoop
-
-    ! Release the memory of buf4 or buf8.
-
-    select case (precisionSol)
-    case (precisionSingle)
-       deallocate(buf4, stat=ierr)
-
-    case (precisionDouble)
-       deallocate(buf8, stat=ierr)
-    end select
-
-    if(ierr /= 0)                            &
-         call terminate("writeCGNSTimeHistory", &
-         "Deallocation failure for either buf4 or buf8")
-  end subroutine writeCGNSTimeHistory
-
-  subroutine writeSolCGNSZone(zone, nSolVar, nDiscrVar, solNames)
-    !
-    !       writeSolCGNSZone writes a volume solution of the given zone
-    !       to the cgns file(s). In case the solution must be written to a
-    !       separate file, useLinksInCGNS == .true., a link to the zone of
-    !       the grid file is created.
-    !
-    use blockPointers
-    use cgnsGrid
-    use cgnsNames
-    use communication
-    use flowVarRefState
-    use inputIO
-    use inputPhysics
-    use iteration
-    use su_cgns
-    use outputMod
-    use utils, only : terminate, setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: zone
-    integer(kind=intType), intent(in) :: nSolVar, nDiscrVar
-
-    character(len=*), dimension(*), intent(in) :: solNames
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    integer :: source, bufSize, size, nnVar
-    integer :: cgnsInd, cgnsBase, cgnsZone, cgnsSol, realTypeCGNS
-
-    integer, dimension(mpi_status_size) :: mpiStatus
-    integer(kind=cgsize_t), dimension(9)               :: sizes
-
-    integer(kind=intType) :: i, j, nn, mm, ll, ind, nVarWritten
-    integer(kind=intType) :: nBlocks, nSubblocks, offset
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-    integer(kind=intType) :: iBegCGNS, jBegCGNS, kBegCGNS
-    integer(kind=intType) :: iEndCGNS, jEndCGNS, kEndCGNS
-
-    integer(kind=intType), dimension(nProc) :: nMessages
-
-    integer(kind=intType), dimension(:), allocatable :: proc
-
-    integer(kind=intType), dimension(:,:,:), allocatable :: subRanges
-
-    real(kind=realType), dimension(:), allocatable :: buffer
-
-    real(kind=4), dimension(:), allocatable :: sol4
-    real(kind=8), dimension(:), allocatable :: sol8
-
-    logical :: rindLayerThisSol, unsteadyHigherSol, writeLink
-
-    character(len=maxStringLen) :: linkName, solName
-
-    ! Set the cgns real type depending on the input option.
-
-    ! Store the number of local blocks and the offset in
-    ! blocksCGNSblock for this zone a bit easier.
-
-    offset  = nBlocksCGNSblock(zone-1)
-    nBlocks = nBlocksCGNSblock(zone) - offset
-
-    ! Determine the amount of block parts each processor will send to
-    ! processor 0.
-
-    call mpi_gather(nBlocks, 1, adflow_integer, nMessages, 1, &
-         adflow_integer, 0, ADflow_comm_world, ierr)
-
-    ! At the moment the writing of the cgns file is sequential and done
-    ! by processor 0. This means that this processor gathers all info
-    ! from the other processors and writes it to file.
-
-    rootproc: if(myID == 0) then
-       !
-       !         I am processor 0 and poor me has to do all the work.
-       !
-       ! Allocate the memory for the array used to write the solution
-       ! to file. The size depends whether or not rind layers are to
-       ! be written and the precision of the floating point type.
-
-       if( storeRindLayer ) then
-          ll = (cgnsDoms(zone)%kl+1) * (cgnsDoms(zone)%jl+1) &
-               * (cgnsDoms(zone)%il+1)
-       else
-          ll = (cgnsDoms(zone)%kl-1) * (cgnsDoms(zone)%jl-1) &
-               * (cgnsDoms(zone)%il-1)
-       endif
-
-       select case(precisionSol)
-       case (precisionSingle)
-          allocate(sol4(ll), sol8(0), stat=ierr)
-       case (precisionDouble)
-          allocate(sol4(0), sol8(ll), stat=ierr)
-       end select
-
-       if(ierr /= 0)                        &
-            call terminate("writeSolCGNSZone", &
-            "Memory allocation failure for sol")
+                           "Something wrong when calling cg_biter_write_f")
 
-       ! First determine the number of subblocks into the original cgns
-       ! block is split.
+        ! The time history must be written under the node just created.
+        ! Go there.
 
-       nSubblocks = 0
-       do i=1,nProc
-          nSubblocks = nSubblocks + nMessages(i)
-       enddo
+        call cg_goto_f(cgnsInd, base, ierr, &
+                       "BaseIterativeData_t", 1, "end")
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSTimeHistory", &
+                           "Something wrong when calling cg_goto_f")
 
-       ! Allocate the memory for the ranges and the processor
-       ! where the subblock is stored.
+        ! Write the time values.
 
-       allocate(subRanges(3,2,nSubblocks), proc(nSubblocks), stat=ierr)
-       if(ierr /= 0)                        &
-            call terminate("writeSolCGNSZone", &
-            "Memory allocation failure for subRanges &
-            &and proc")
+        cgnsName = cgnsTimeValue
+        call cg_array_write_f(cgnsName, realTypeCGNS, 1, int(nn, cgsize_t), &
+                              timeArray, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("writeCGNSTimeHistory", &
+                           "Something wrong when calling cg_array_write_f")
 
-       ! Determine the processor ID's where the subRanges are stored.
-       ! Note that 1 must be substracted, because the processor numbering
-       ! starts at 0.
+        ! Loop over the number of monitoring variables and write
+        ! their time history.
 
-       nSubblocks = 0
-       do i=1,nProc
-          do j=1,nMessages(i)
-             nSubblocks = nSubblocks + 1
-             proc(nSubblocks) = i - 1
-          enddo
-       enddo
+        monLoop: do i = 1, nMon
 
-       ! Determine the subranges for the 1st solution.
+            ! Copy the time history to either buf4 or buf8 and write it
+            ! to file.
 
-       rindLayerThisSol = storeRindLayer
-       call getSubRangesSol
+            select case (precisionSol)
+            case (precisionSingle)
+                do mm = 1, nn
+                    buf4(mm) = timeDataArray(mm, i)
+                end do
 
-       ! Allocate the memory for buffer.
+                call cg_array_write_f(monNames(i), realTypeCGNS, 1, int(nn, cgsize_t), &
+                                      buf4, ierr)
 
-       allocate(buffer(bufSize), stat=ierr)
-       if(ierr /= 0)                        &
-            call terminate("writeSolCGNSZone", &
-            "Memory allocation failure for buffer")
+                !=============================================================
 
-       ! Loop over the number of solutions.
+            case (precisionDouble)
+                do mm = 1, nn
+                    buf8(mm) = timeDataArray(mm, i)
+                end do
 
-       solLoopRoot: do ind=1,nVolSolToWrite
+                call cg_array_write_f(monNames(i), realTypeCGNS, 1, int(nn, cgsize_t), &
+                                      buf8, ierr)
+            end select
 
-          ! Determine whether or not we are dealing with an unsteady
-          ! higher solution here.
+            if (ierr /= CG_OK) &
+                 call terminate("writeCGNSTimeHistory", &
+                 "Something wrong when calling &
+                 &cg_array_write_f")
+        end do monLoop
 
-          unsteadyHigherSol = .false.
-          if(ind > 1 .and. equationMode == unsteady) &
-               unsteadyHigherSol = .true.
+        ! Release the memory of buf4 or buf8.
 
-          ! For unsteady mode on rigid meshes it is possible that
-          ! only the solution must be written; the coordinates are
-          ! not written to a file. In case links are used for that
-          ! case the link should not be created. The logical
-          ! writeLink takes care of that.
+        select case (precisionSol)
+        case (precisionSingle)
+            deallocate (buf4, stat=ierr)
 
-          writeLink = useLinksInCGNS
-          if(unsteadyHigherSol .and. (.not. deforming_Grid)) &
-               writeLink = .false.
+        case (precisionDouble)
+            deallocate (buf8, stat=ierr)
+        end select
+
+        if (ierr /= 0) &
+            call terminate("writeCGNSTimeHistory", &
+                           "Deallocation failure for either buf4 or buf8")
+    end subroutine writeCGNSTimeHistory
+
+    subroutine writeSolCGNSZone(zone, nSolVar, nDiscrVar, solNames)
+        !
+        !       writeSolCGNSZone writes a volume solution of the given zone
+        !       to the cgns file(s). In case the solution must be written to a
+        !       separate file, useLinksInCGNS == .true., a link to the zone of
+        !       the grid file is created.
+        !
+        use blockPointers
+        use cgnsGrid
+        use cgnsNames
+        use communication
+        use flowVarRefState
+        use inputIO
+        use inputPhysics
+        use iteration
+        use su_cgns
+        use outputMod
+        use utils, only: terminate, setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: zone
+        integer(kind=intType), intent(in) :: nSolVar, nDiscrVar
+
+        character(len=*), dimension(*), intent(in) :: solNames
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer :: source, bufSize, size, nnVar
+        integer :: cgnsInd, cgnsBase, cgnsZone, cgnsSol, realTypeCGNS
+
+        integer, dimension(mpi_status_size) :: mpiStatus
+        integer(kind=cgsize_t), dimension(9) :: sizes
+
+        integer(kind=intType) :: i, j, nn, mm, ll, ind, nVarWritten
+        integer(kind=intType) :: nBlocks, nSubblocks, offset
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+        integer(kind=intType) :: iBegCGNS, jBegCGNS, kBegCGNS
+        integer(kind=intType) :: iEndCGNS, jEndCGNS, kEndCGNS
+
+        integer(kind=intType), dimension(nProc) :: nMessages
+
+        integer(kind=intType), dimension(:), allocatable :: proc
+
+        integer(kind=intType), dimension(:, :, :), allocatable :: subRanges
+
+        real(kind=realType), dimension(:), allocatable :: buffer
+
+        real(kind=4), dimension(:), allocatable :: sol4
+        real(kind=8), dimension(:), allocatable :: sol8
+
+        logical :: rindLayerThisSol, unsteadyHigherSol, writeLink
+
+        character(len=maxStringLen) :: linkName, solName
+
+        ! Set the cgns real type depending on the input option.
+
+        ! Store the number of local blocks and the offset in
+        ! blocksCGNSblock for this zone a bit easier.
+
+        offset = nBlocksCGNSblock(zone - 1)
+        nBlocks = nBlocksCGNSblock(zone) - offset
+
+        ! Determine the amount of block parts each processor will send to
+        ! processor 0.
+
+        call mpi_gather(nBlocks, 1, adflow_integer, nMessages, 1, &
+                        adflow_integer, 0, ADflow_comm_world, ierr)
+
+        ! At the moment the writing of the cgns file is sequential and done
+        ! by processor 0. This means that this processor gathers all info
+        ! from the other processors and writes it to file.
+
+        rootproc: if (myID == 0) then
+            !
+            !         I am processor 0 and poor me has to do all the work.
+            !
+            ! Allocate the memory for the array used to write the solution
+            ! to file. The size depends whether or not rind layers are to
+            ! be written and the precision of the floating point type.
+
+            if (storeRindLayer) then
+                ll = (cgnsDoms(zone)%kl + 1) * (cgnsDoms(zone)%jl + 1) &
+                     * (cgnsDoms(zone)%il + 1)
+            else
+                ll = (cgnsDoms(zone)%kl - 1) * (cgnsDoms(zone)%jl - 1) &
+                     * (cgnsDoms(zone)%il - 1)
+            end if
+
+            select case (precisionSol)
+            case (precisionSingle)
+                allocate (sol4(ll), sol8(0), stat=ierr)
+            case (precisionDouble)
+                allocate (sol4(0), sol8(ll), stat=ierr)
+            end select
+
+            if (ierr /= 0) &
+                call terminate("writeSolCGNSZone", &
+                               "Memory allocation failure for sol")
 
-          ! Store the file and base ID a bit easier and set
-          ! rindLayerThisSol. A rind layer is not written for the
-          ! higher solution in unsteady mode.
+            ! First determine the number of subblocks into the original cgns
+            ! block is split.
 
-          cgnsInd          = fileIDs(ind)
-          cgnsBase         = cgnsBases(ind)
-          rindLayerThisSol = storeRindLayer
-          if( unsteadyHigherSol ) rindLayerThisSol = .false.
+            nSubblocks = 0
+            do i = 1, nProc
+                nSubblocks = nSubblocks + nMessages(i)
+            end do
 
-          ! Check if the subranges must be recomputed. Some dirty stuff
-          ! must be done, because Fortran 90/95 does not allow a
-          ! comparison between logicals.
+            ! Allocate the memory for the ranges and the processor
+            ! where the subblock is stored.
 
-          nn = 0; if( storeRindLayer )   nn = 1
-          mm = 0; if( rindLayerThisSol ) mm = 1
-          if(nn /= mm) call getSubRangesSol
+            allocate (subRanges(3, 2, nSubblocks), proc(nSubblocks), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("writeSolCGNSZone", &
+                 "Memory allocation failure for subRanges &
+                 &and proc")
 
-          ! Check whether a zone must be created, which is
-          ! true if links are used. Create the zone, if needed.
+            ! Determine the processor ID's where the subRanges are stored.
+            ! Note that 1 must be substracted, because the processor numbering
+            ! starts at 0.
 
-          createZoneTest: if( useLinksInCGNS ) then
+            nSubblocks = 0
+            do i = 1, nProc
+                do j = 1, nMessages(i)
+                    nSubblocks = nSubblocks + 1
+                    proc(nSubblocks) = i - 1
+                end do
+            end do
 
-             ! A zone must be created. Use the same name as
-             ! in the original grid file.
+            ! Determine the subranges for the 1st solution.
 
-             sizes(1) = cgnsDoms(zone)%il
-             sizes(2) = cgnsDoms(zone)%jl
-             sizes(3) = cgnsDoms(zone)%kl
-             sizes(4) = cgnsDoms(zone)%nx
-             sizes(5) = cgnsDoms(zone)%ny
-             sizes(6) = cgnsDoms(zone)%nz
-             sizes(7) = 0
-             sizes(8) = 0
-             sizes(9) = 0
+            rindLayerThisSol = storeRindLayer
+            call getSubRangesSol
 
-             call cg_zone_write_f(cgnsInd, cgnsBase,              &
-                  cgnsDoms(zone)%zonename, sizes, &
-                  Structured, cgnsZone, ierr)
-             if(ierr /= CG_OK)                 then
+            ! Allocate the memory for buffer.
 
+            allocate (buffer(bufSize), stat=ierr)
+            if (ierr /= 0) &
                 call terminate("writeSolCGNSZone", &
-                     "Something wrong when calling &
-                     &cg_zone_write_f")
-             end if
-             ! Check if a link must actually be created.
+                               "Memory allocation failure for buffer")
 
-             writeLinkTest: if( writeLink ) then
+            ! Loop over the number of solutions.
 
-                ! Create the link of the coordinates to the zone in the
-                ! original grid.  First move to the correct location.
+            solLoopRoot: do ind = 1, nVolSolToWrite
 
-                call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
-                     cgnsZone, "end")
-                if(ierr /= CG_OK)                   &
-                     call terminate("writeSolCGNSZone", &
-                     "Something wrong when calling cg_goto_f")
+                ! Determine whether or not we are dealing with an unsteady
+                ! higher solution here.
 
-                ! Determine the link name and write the link to the grid
-                ! coordinates.
+                unsteadyHigherSol = .false.
+                if (ind > 1 .and. equationMode == unsteady) &
+                    unsteadyHigherSol = .true.
 
-                linkName = cgnsBasename//"&
-                     &/"//cgnsDoms(zone)%zonename//"&
-                     &/"//"GridCoordinates"
+                ! For unsteady mode on rigid meshes it is possible that
+                ! only the solution must be written; the coordinates are
+                ! not written to a file. In case links are used for that
+                ! case the link should not be created. The logical
+                ! writeLink takes care of that.
 
-                call cg_link_write_f("GridCoordinates", &
-                     gridFileNames(ind), linkName, ierr)
-                if(ierr /= CG_OK)                   &
-                     call terminate("writeSolCGNSZone", &
-                     "Something wrong when calling &
-                     &cg_link_write_f")
-             endif writeLinkTest
+                writeLink = useLinksInCGNS
+                if (unsteadyHigherSol .and. (.not. deforming_Grid)) &
+                    writeLink = .false.
 
-          else createZoneTest
+                ! Store the file and base ID a bit easier and set
+                ! rindLayerThisSol. A rind layer is not written for the
+                ! higher solution in unsteady mode.
 
-             ! The zone already exists. Simply set cgnsZone to zone.
+                cgnsInd = fileIDs(ind)
+                cgnsBase = cgnsBases(ind)
+                rindLayerThisSol = storeRindLayer
+                if (unsteadyHigherSol) rindLayerThisSol = .false.
 
-             cgnsZone = zone
+                ! Check if the subranges must be recomputed. Some dirty stuff
+                ! must be done, because Fortran 90/95 does not allow a
+                ! comparison between logicals.
 
-          endif createZoneTest
+                nn = 0; if (storeRindLayer) nn = 1
+                mm = 0; if (rindLayerThisSol) mm = 1
+                if (nn /= mm) call getSubRangesSol
 
-          ! Create the flow solution node.
+                ! Check whether a zone must be created, which is
+                ! true if links are used. Create the zone, if needed.
 
-          call cg_sol_write_f(cgnsInd, cgnsBase, cgnsZone,          &
-               "Flow solution", CellCenter, cgnsSol, &
-               ierr)
-          if(ierr /= CG_OK)                   &
-               call terminate("writeSolCGNSZone", &
-               "Something wrong when calling &
-               &cg_sol_write_f")
+                createZoneTest: if (useLinksInCGNS) then
 
-          ! Create the rind layers. If rind layers must be stored put
-          ! 1 layer on every face of the block; otherwise put 0 layers.
-          ! Use sizes as a buffer to store the rind data. The rind data
-          ! must be created under the just created solution node.
+                    ! A zone must be created. Use the same name as
+                    ! in the original grid file.
 
-          call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
-               cgnsZone, "FlowSolution_t", cgnsSol, "end")
-          if(ierr /= CG_OK)                   &
-               call terminate("writeSolCGNSZone", &
-               "Something wrong when calling cg_goto_f")
+                    sizes(1) = cgnsDoms(zone)%il
+                    sizes(2) = cgnsDoms(zone)%jl
+                    sizes(3) = cgnsDoms(zone)%kl
+                    sizes(4) = cgnsDoms(zone)%nx
+                    sizes(5) = cgnsDoms(zone)%ny
+                    sizes(6) = cgnsDoms(zone)%nz
+                    sizes(7) = 0
+                    sizes(8) = 0
+                    sizes(9) = 0
 
-          if( rindLayerThisSol ) then
-             sizes(1) = 1; sizes(2) = 1; sizes(3) = 1
-             sizes(4) = 1; sizes(5) = 1; sizes(6) = 1
-          else
-             sizes(1) = 0; sizes(2) = 0; sizes(3) = 0
-             sizes(4) = 0; sizes(5) = 0; sizes(6) = 0
-          endif
+                    call cg_zone_write_f(cgnsInd, cgnsBase, &
+                                         cgnsDoms(zone)%zonename, sizes, &
+                                         Structured, cgnsZone, ierr)
+                    if (ierr /= CG_OK) then
 
-          call cg_rind_write_f(int(sizes, intType), ierr)
-          if(ierr /= CG_OK)                   &
-               call terminate("writeSolCGNSZone", &
-               "Something wrong when calling &
-               &cg_rind_write_f")
+                        call terminate("writeSolCGNSZone", &
+                             "Something wrong when calling &
+                             &cg_zone_write_f")
+                    end if
+                    ! Check if a link must actually be created.
 
-          ! Determine the index range of the solution of the zone
-          ! to be written.
+                    writeLinkTest: if (writeLink) then
 
-          iBegCGNS = 2 - sizes(1)
-          jBegCGNS = 2 - sizes(3)
-          kBegCGNS = 2 - sizes(5)
+                        ! Create the link of the coordinates to the zone in the
+                        ! original grid.  First move to the correct location.
 
-          iEndCGNS = cgnsDoms(zone)%il + sizes(2)
-          jEndCGNS = cgnsDoms(zone)%jl + sizes(4)
-          kEndCGNS = cgnsDoms(zone)%kl + sizes(6)
+                        call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
+                                       cgnsZone, "end")
+                        if (ierr /= CG_OK) &
+                            call terminate("writeSolCGNSZone", &
+                                           "Something wrong when calling cg_goto_f")
 
-          ! Determine the number of variables to be written.
-          ! For the unsteady higher solutions only the variables
-          ! needed for a restart are written.
+                        ! Determine the link name and write the link to the grid
+                        ! coordinates.
 
-          nVarWritten = nSolVar+nDiscrVar
-          if( unsteadyHigherSol ) nVarWritten = nw
+                        linkName = cgnsBasename//"&
+                             &/"//cgnsDoms(zone)%zonename//"&
+                             &/"//"GridCoordinates"
 
-          ! Loop over the number of variables to be written.
+                        call cg_link_write_f("GridCoordinates", &
+                                             gridFileNames(ind), linkName, ierr)
+                        if (ierr /= CG_OK) &
+                             call terminate("writeSolCGNSZone", &
+                             "Something wrong when calling &
+                             &cg_link_write_f")
+                    end if writeLinkTest
 
-          varWriteLoop: do nn=1,nVarWritten
+                    else createZoneTest
 
-             ! Copy solNames(nn) in solName for later purposes.
-             ! Correct this value if the conservative variables
-             ! for a consistent unsteady restart must be written.
-             ! No need to correct the turbulent variables, because
-             ! these names are okay.
+                    ! The zone already exists. Simply set cgnsZone to zone.
 
-             solName = solNames(nn)
+                    cgnsZone = zone
 
-             if( unsteadyHigherSol ) then
-                nnVar = nn
+                end if createZoneTest
 
-                select case(nnVar)
-                case (irho)
-                   solName = cgnsDensity
+                ! Create the flow solution node.
 
-                case (imx)
-                   solName = cgnsMomx
+                call cg_sol_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                    "Flow solution", CellCenter, cgnsSol, &
+                                    ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeSolCGNSZone", &
+                     "Something wrong when calling &
+                     &cg_sol_write_f")
 
-                case (imy)
-                   solName = cgnsMomy
+                ! Create the rind layers. If rind layers must be stored put
+                ! 1 layer on every face of the block; otherwise put 0 layers.
+                ! Use sizes as a buffer to store the rind data. The rind data
+                ! must be created under the just created solution node.
 
-                case (imz)
-                   solName = cgnsMomz
+                call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", &
+                               cgnsZone, "FlowSolution_t", cgnsSol, "end")
+                if (ierr /= CG_OK) &
+                    call terminate("writeSolCGNSZone", &
+                                   "Something wrong when calling cg_goto_f")
+
+                if (rindLayerThisSol) then
+                    sizes(1) = 1; sizes(2) = 1; sizes(3) = 1
+                    sizes(4) = 1; sizes(5) = 1; sizes(6) = 1
+                else
+                    sizes(1) = 0; sizes(2) = 0; sizes(3) = 0
+                    sizes(4) = 0; sizes(5) = 0; sizes(6) = 0
+                end if
 
-                case (irhoE)
-                   solName = cgnsEnergy
-                end select
-             endif
+                call cg_rind_write_f(int(sizes, intType), ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("writeSolCGNSZone", &
+                     "Something wrong when calling &
+                     &cg_rind_write_f")
 
-             ! Loop over the number of subblocks stored on
-             ! this processor.
+                ! Determine the index range of the solution of the zone
+                ! to be written.
 
-             do mm=1,nBlocks
+                iBegCGNS = 2 - sizes(1)
+                jBegCGNS = 2 - sizes(3)
+                kBegCGNS = 2 - sizes(5)
 
-                ! Set the pointers to the local domain.
+                iEndCGNS = cgnsDoms(zone)%il + sizes(2)
+                jEndCGNS = cgnsDoms(zone)%jl + sizes(4)
+                kEndCGNS = cgnsDoms(zone)%kl + sizes(6)
 
-                if( unsteadyHigherSol ) then
-                   call setPointers(blocksCGNSblock(mm+offset), &
-                        1_intType, 1_intType)
-                else
-                   call setPointers(blocksCGNSblock(mm+offset), &
-                        1_intType, ind)
-                endif
+                ! Determine the number of variables to be written.
+                ! For the unsteady higher solutions only the variables
+                ! needed for a restart are written.
 
-                ! Determine the cell range I have to write. This depends
-                ! whether or not halo's must be written.
+                nVarWritten = nSolVar + nDiscrVar
+                if (unsteadyHigherSol) nVarWritten = nw
 
-                iBeg = 2; iEnd = il
-                jBeg = 2; jEnd = jl
-                kBeg = 2; kEnd = kl
+                ! Loop over the number of variables to be written.
 
-                if(storeRindLayer .and. (.not. unsteadyHigherSol)) then
-                   if(iBegor == 1) iBeg = 1
-                   if(jBegor == 1) jBeg = 1
-                   if(kBegor == 1) kBeg = 1
+                varWriteLoop: do nn = 1, nVarWritten
 
-                   if(iEndor == cgnsDoms(zone)%il) iEnd = ie
-                   if(jEndor == cgnsDoms(zone)%jl) jEnd = je
-                   if(kEndor == cgnsDoms(zone)%kl) kEnd = ke
-                endif
+                    ! Copy solNames(nn) in solName for later purposes.
+                    ! Correct this value if the conservative variables
+                    ! for a consistent unsteady restart must be written.
+                    ! No need to correct the turbulent variables, because
+                    ! these names are okay.
 
-                ! Fill the buffer with the correct solution variable.
-                ! The routine called depends on the situation.
+                    solName = solNames(nn)
 
-                if( unsteadyHigherSol ) then
+                    if (unsteadyHigherSol) then
+                        nnVar = nn
 
-                   ! Unsteady higher solution. Write the old solution.
+                        select case (nnVar)
+                        case (irho)
+                            solName = cgnsDensity
 
-                   call storeOldSolInBuffer(buffer, ind, nn, iBeg, iEnd, &
-                        jBeg, jEnd, kBeg, kEnd)
-                else
+                        case (imx)
+                            solName = cgnsMomx
 
-                   ! Standard solution must be written.
-
-                   call storeSolInBuffer(buffer, .true., solName, &
-                        iBeg, iEnd, jBeg, jEnd,  &
-                        kBeg, kEnd)
-                endif
-
-                ! And store it in sol. The routine called depends on
-                ! the desired precision.
-
-                select case (precisionSol)
-                case (precisionSingle)
-                   call copyDataBufSinglePrecision(sol4, buffer,        &
-                        iBegCGNS, jBegCGNS, &
-                        kBegCGNS, iEndCGNS, &
-                        jEndCGNS, kEndCGNS, &
-                        subRanges(1,1,mm))
-                case (precisionDouble)
-                   call copyDataBufDoublePrecision(sol8, buffer,        &
-                        iBegCGNS, jBegCGNS, &
-                        kBegCGNS, iEndCGNS, &
-                        jEndCGNS, kEndCGNS, &
-                        subRanges(1,1,mm))
-                end select
-
-             enddo
-
-             ! Loop over the number of subblocks stored on
-             ! other processors.
-
-             do mm=(nBlocks+1),nSubblocks
-
-                ! Receive the range of subblock mm.
-
-                source = proc(mm)
-                call mpi_recv(buffer, bufSize, adflow_real, source, &
-                     source+1, ADflow_comm_world, mpiStatus, ierr)
-
-                ! And store it in sol.
-
-                select case (precisionSol)
-                case (precisionSingle)
-                   call copyDataBufSinglePrecision(sol4, buffer,        &
-                        iBegCGNS, jBegCGNS, &
-                        kBegCGNS, iEndCGNS, &
-                        jEndCGNS, kEndCGNS, &
-                        subRanges(1,1,mm))
-                case (precisionDouble)
-                   call copyDataBufDoublePrecision(sol8, buffer,        &
-                        iBegCGNS, jBegCGNS, &
-                        kBegCGNS, iEndCGNS, &
-                        jEndCGNS, kEndCGNS, &
-                        subRanges(1,1,mm))
-                end select
-
-             enddo
-
-             ! Write the solution variable to file. Source is just used
-             ! as a dummy variable and does not have a meaning.
-             select case(precisionSol)
-             case (precisionSingle)
-                call cg_field_write_f(cgnsInd, cgnsBase, cgnsZone, &
-                     cgnsSol, realSingle, solName, sol4, source, ierr)
-             case (precisionDouble)
-                call cg_field_write_f(cgnsInd, cgnsBase, cgnsZone, &
-                     cgnsSol, realDouble, solName, sol8, source, ierr)
-             end select
-             if(ierr /= CG_OK)    &
-                  call terminate("writeSolCGNSZone", &
-                  "Something wrong when calling &
-                  &cg_field_write_f")
-          enddo varWriteLoop
-
-       enddo solLoopRoot
-
-       ! Release some memory only allocated on the root processor.
-       deallocate(sol4, sol8, subRanges, proc, stat=ierr)
-       if(ierr /= 0) call terminate("writeSolCGNSZone", &
-            "Deallocation error on root proc")
-
-    else rootproc
-       !
-       !         I am not the root processor and may have to send some data
-       !         to the root processor.
-       !
-       ! Determine the subranges for the 1st solution.
-
-       rindLayerThisSol = storeRindLayer
-       call getSubRangesSol
-
-       ! Allocate the memory for buffer.
-
-       allocate(buffer(bufSize), stat=ierr)
-       if(ierr /= 0)                        &
-            call terminate("writeSolCGNSZone", &
-            "Memory allocation failure for buffer")
+                        case (imy)
+                            solName = cgnsMomy
 
-       ! Loop over the number of solutions.
+                        case (imz)
+                            solName = cgnsMomz
 
-       solLoopOthers: do ind=1,nVolSolToWrite
+                        case (irhoE)
+                            solName = cgnsEnergy
+                        end select
+                    end if
 
-          ! Determine whether or not we are dealing with an unsteady
-          ! higher solution here.
+                    ! Loop over the number of subblocks stored on
+                    ! this processor.
 
-          unsteadyHigherSol = .false.
-          if(ind > 1 .and. equationMode == unsteady) &
-               unsteadyHigherSol = .true.
+                    do mm = 1, nBlocks
 
-          ! Set rindLayerThisSol. A rind layer is not written for
-          ! the higher solutions in unsteady mode.
+                        ! Set the pointers to the local domain.
 
-          rindLayerThisSol = storeRindLayer
-          if( unsteadyHigherSol ) rindLayerThisSol = .false.
+                        if (unsteadyHigherSol) then
+                            call setPointers(blocksCGNSblock(mm + offset), &
+                                             1_intType, 1_intType)
+                        else
+                            call setPointers(blocksCGNSblock(mm + offset), &
+                                             1_intType, ind)
+                        end if
+
+                        ! Determine the cell range I have to write. This depends
+                        ! whether or not halo's must be written.
 
-          ! Check if the subranges must be recomputed. Some dirty stuff
-          ! must be done, because Fortran 90/95 does not allow a
-          ! comparison between logicals.
+                        iBeg = 2; iEnd = il
+                        jBeg = 2; jEnd = jl
+                        kBeg = 2; kEnd = kl
+
+                        if (storeRindLayer .and. (.not. unsteadyHigherSol)) then
+                            if (iBegor == 1) iBeg = 1
+                            if (jBegor == 1) jBeg = 1
+                            if (kBegor == 1) kBeg = 1
+
+                            if (iEndor == cgnsDoms(zone)%il) iEnd = ie
+                            if (jEndor == cgnsDoms(zone)%jl) jEnd = je
+                            if (kEndor == cgnsDoms(zone)%kl) kEnd = ke
+                        end if
+
+                        ! Fill the buffer with the correct solution variable.
+                        ! The routine called depends on the situation.
+
+                        if (unsteadyHigherSol) then
+
+                            ! Unsteady higher solution. Write the old solution.
+
+                            call storeOldSolInBuffer(buffer, ind, nn, iBeg, iEnd, &
+                                                     jBeg, jEnd, kBeg, kEnd)
+                        else
+
+                            ! Standard solution must be written.
+
+                            call storeSolInBuffer(buffer, .true., solName, &
+                                                  iBeg, iEnd, jBeg, jEnd, &
+                                                  kBeg, kEnd)
+                        end if
+
+                        ! And store it in sol. The routine called depends on
+                        ! the desired precision.
+
+                        select case (precisionSol)
+                        case (precisionSingle)
+                            call copyDataBufSinglePrecision(sol4, buffer, &
+                                                            iBegCGNS, jBegCGNS, &
+                                                            kBegCGNS, iEndCGNS, &
+                                                            jEndCGNS, kEndCGNS, &
+                                                            subRanges(1, 1, mm))
+                        case (precisionDouble)
+                            call copyDataBufDoublePrecision(sol8, buffer, &
+                                                            iBegCGNS, jBegCGNS, &
+                                                            kBegCGNS, iEndCGNS, &
+                                                            jEndCGNS, kEndCGNS, &
+                                                            subRanges(1, 1, mm))
+                        end select
+
+                    end do
+
+                    ! Loop over the number of subblocks stored on
+                    ! other processors.
+
+                    do mm = (nBlocks + 1), nSubblocks
+
+                        ! Receive the range of subblock mm.
+
+                        source = proc(mm)
+                        call mpi_recv(buffer, bufSize, adflow_real, source, &
+                                      source + 1, ADflow_comm_world, mpiStatus, ierr)
+
+                        ! And store it in sol.
+
+                        select case (precisionSol)
+                        case (precisionSingle)
+                            call copyDataBufSinglePrecision(sol4, buffer, &
+                                                            iBegCGNS, jBegCGNS, &
+                                                            kBegCGNS, iEndCGNS, &
+                                                            jEndCGNS, kEndCGNS, &
+                                                            subRanges(1, 1, mm))
+                        case (precisionDouble)
+                            call copyDataBufDoublePrecision(sol8, buffer, &
+                                                            iBegCGNS, jBegCGNS, &
+                                                            kBegCGNS, iEndCGNS, &
+                                                            jEndCGNS, kEndCGNS, &
+                                                            subRanges(1, 1, mm))
+                        end select
+
+                    end do
+
+                    ! Write the solution variable to file. Source is just used
+                    ! as a dummy variable and does not have a meaning.
+                    select case (precisionSol)
+                    case (precisionSingle)
+                        call cg_field_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                              cgnsSol, realSingle, solName, sol4, source, ierr)
+                    case (precisionDouble)
+                        call cg_field_write_f(cgnsInd, cgnsBase, cgnsZone, &
+                                              cgnsSol, realDouble, solName, sol8, source, ierr)
+                    end select
+                    if (ierr /= CG_OK) &
+                         call terminate("writeSolCGNSZone", &
+                         "Something wrong when calling &
+                         &cg_field_write_f")
+                end do varWriteLoop
+
+            end do solLoopRoot
+
+            ! Release some memory only allocated on the root processor.
+            deallocate (sol4, sol8, subRanges, proc, stat=ierr)
+            if (ierr /= 0) call terminate("writeSolCGNSZone", &
+                                          "Deallocation error on root proc")
+
+            else rootproc
+            !
+            !         I am not the root processor and may have to send some data
+            !         to the root processor.
+            !
+            ! Determine the subranges for the 1st solution.
+
+            rindLayerThisSol = storeRindLayer
+            call getSubRangesSol
+
+            ! Allocate the memory for buffer.
+
+            allocate (buffer(bufSize), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("writeSolCGNSZone", &
+                               "Memory allocation failure for buffer")
 
-          nn = 0; if( storeRindLayer )   nn = 1
-          mm = 0; if( rindLayerThisSol ) mm = 1
-          if(nn /= mm) call getSubRangesSol
+            ! Loop over the number of solutions.
 
-          ! Determine the number of variables to be written.
-          ! For the unsteady higher solutions only the variables
-          ! needed for a restart are written.
+            solLoopOthers: do ind = 1, nVolSolToWrite
 
-          nVarWritten = nSolVar+nDiscrVar
-          if( unsteadyHigherSol ) nVarWritten = nw
+                ! Determine whether or not we are dealing with an unsteady
+                ! higher solution here.
 
-          ! Loop over the number of variables to be written.
+                unsteadyHigherSol = .false.
+                if (ind > 1 .and. equationMode == unsteady) &
+                    unsteadyHigherSol = .true.
 
-          do nn=1,nVarWritten
+                ! Set rindLayerThisSol. A rind layer is not written for
+                ! the higher solutions in unsteady mode.
 
-             ! Loop over the number of subblocks stored on
-             ! this processor.
+                rindLayerThisSol = storeRindLayer
+                if (unsteadyHigherSol) rindLayerThisSol = .false.
 
-             do mm=1,nBlocks
+                ! Check if the subranges must be recomputed. Some dirty stuff
+                ! must be done, because Fortran 90/95 does not allow a
+                ! comparison between logicals.
 
-                ! Set the pointers to the local domain.
+                nn = 0; if (storeRindLayer) nn = 1
+                mm = 0; if (rindLayerThisSol) mm = 1
+                if (nn /= mm) call getSubRangesSol
 
-                if( unsteadyHigherSol ) then
-                   call setPointers(blocksCGNSblock(mm+offset), &
-                        1_intType, 1_intType)
-                else
-                   call setPointers(blocksCGNSblock(mm+offset), &
-                        1_intType, ind)
-                endif
+                ! Determine the number of variables to be written.
+                ! For the unsteady higher solutions only the variables
+                ! needed for a restart are written.
 
-                ! Determine the cell range I have to write. This depends
-                ! whether or not halo's must be written.
+                nVarWritten = nSolVar + nDiscrVar
+                if (unsteadyHigherSol) nVarWritten = nw
 
-                iBeg = 2; iEnd = il
-                jBeg = 2; jEnd = jl
-                kBeg = 2; kEnd = kl
+                ! Loop over the number of variables to be written.
 
-                if(storeRindLayer .and. (.not. unsteadyHigherSol)) then
-                   if(iBegor == 1) iBeg = 1
-                   if(jBegor == 1) jBeg = 1
-                   if(kBegor == 1) kBeg = 1
+                do nn = 1, nVarWritten
 
-                   if(iEndor == cgnsDoms(zone)%il) iEnd = ie
-                   if(jEndor == cgnsDoms(zone)%jl) jEnd = je
-                   if(kEndor == cgnsDoms(zone)%kl) kEnd = ke
-                endif
+                    ! Loop over the number of subblocks stored on
+                    ! this processor.
 
-                ! Fill the buffer with the correct solution variable.
-                ! The routine called depends on the situation.
+                    do mm = 1, nBlocks
 
-                if( unsteadyHigherSol ) then
+                        ! Set the pointers to the local domain.
 
-                   ! Unsteady higher solution. Write the old solution.
+                        if (unsteadyHigherSol) then
+                            call setPointers(blocksCGNSblock(mm + offset), &
+                                             1_intType, 1_intType)
+                        else
+                            call setPointers(blocksCGNSblock(mm + offset), &
+                                             1_intType, ind)
+                        end if
 
-                   call storeOldSolInBuffer(buffer, ind, nn, iBeg, iEnd, &
-                        jBeg, jEnd, kBeg, kEnd)
-                else
+                        ! Determine the cell range I have to write. This depends
+                        ! whether or not halo's must be written.
+
+                        iBeg = 2; iEnd = il
+                        jBeg = 2; jEnd = jl
+                        kBeg = 2; kEnd = kl
+
+                        if (storeRindLayer .and. (.not. unsteadyHigherSol)) then
+                            if (iBegor == 1) iBeg = 1
+                            if (jBegor == 1) jBeg = 1
+                            if (kBegor == 1) kBeg = 1
+
+                            if (iEndor == cgnsDoms(zone)%il) iEnd = ie
+                            if (jEndor == cgnsDoms(zone)%jl) jEnd = je
+                            if (kEndor == cgnsDoms(zone)%kl) kEnd = ke
+                        end if
 
-                   ! Standard solution must be written.
+                        ! Fill the buffer with the correct solution variable.
+                        ! The routine called depends on the situation.
 
-                   call storeSolInBuffer(buffer, .true., solNames(nn), &
-                        iBeg, iEnd, jBeg, jEnd,       &
-                        kBeg, kEnd)
-                endif
+                        if (unsteadyHigherSol) then
 
-                ! And send it to processor 0.
+                            ! Unsteady higher solution. Write the old solution.
 
-                ll   = (iEnd-iBeg+1)*(jEnd-jBeg+1)*(kEnd-kBeg+1)
-                size = ll
+                            call storeOldSolInBuffer(buffer, ind, nn, iBeg, iEnd, &
+                                                     jBeg, jEnd, kBeg, kEnd)
+                        else
 
-                call mpi_send(buffer, size, adflow_real, 0, myID+1, &
-                     ADflow_comm_world, ierr)
-             enddo
-          enddo
+                            ! Standard solution must be written.
 
-       enddo solLoopOthers
-    endif rootproc
+                            call storeSolInBuffer(buffer, .true., solNames(nn), &
+                                                  iBeg, iEnd, jBeg, jEnd, &
+                                                  kBeg, kEnd)
+                        end if
 
-    ! Release some memory.
+                        ! And send it to processor 0.
 
-    deallocate(buffer, stat=ierr)
-    if(ierr /= 0)                        &
-         call terminate("writeSolCGNSZone", &
-         "Deallocation error for buffer")
+                        ll = (iEnd - iBeg + 1) * (jEnd - jBeg + 1) * (kEnd - kBeg + 1)
+                        size = ll
+
+                        call mpi_send(buffer, size, adflow_real, 0, myID + 1, &
+                                      ADflow_comm_world, ierr)
+                    end do
+                end do
+
+            end do solLoopOthers
+        end if rootproc
+
+        ! Release some memory.
+
+        deallocate (buffer, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("writeSolCGNSZone", &
+                           "Deallocation error for buffer")
 
-    !=================================================================
+        !=================================================================
 
-  contains
+    contains
 
-    !===============================================================
+        !===============================================================
 
-    subroutine getSubRangesSol
-      !
-      !         getSubRangesSol determines the subranges of the
-      !         computational blocks that contribute to the CGNS block which
-      !         is currently written. Also the size of the largest subblock
-      !         is determined.
-      !
-      implicit none
-      !
-      !        Local variables.
-      !
-      integer :: source
+        subroutine getSubRangesSol
+            !
+            !         getSubRangesSol determines the subranges of the
+            !         computational blocks that contribute to the CGNS block which
+            !         is currently written. Also the size of the largest subblock
+            !         is determined.
+            !
+            implicit none
+            !
+            !        Local variables.
+            !
+            integer :: source
 
-      integer(kind=intType) :: i, j, ll
-      integer(kind=intType), dimension(6) :: ii
+            integer(kind=intType) :: i, j, ll
+            integer(kind=intType), dimension(6) :: ii
 
-      ! Initialize bufSize.
+            ! Initialize bufSize.
 
-      bufSize = 0
+            bufSize = 0
 
-      ! Test if I'm the root processor or not.
+            ! Test if I'm the root processor or not.
 
-      testRoot: if(myID == 0) then
+            testRoot: if (myID == 0) then
 
-         ! I'm the root processor. Determine the subRanges of the
-         ! subblocks stored on locally. Note that nBlocks can be 0.
+                ! I'm the root processor. Determine the subRanges of the
+                ! subblocks stored on locally. Note that nBlocks can be 0.
 
-         do i=1,nBlocks
+                do i = 1, nBlocks
 
-            ! Store the local block ID a bit easier in j.
+                    ! Store the local block ID a bit easier in j.
 
-            j = blocksCGNSblock(i+offset)
+                    j = blocksCGNSblock(i + offset)
 
-            ! Determine the range; this is the same for all spectral
-            ! solutions, so the first one can be used.
+                    ! Determine the range; this is the same for all spectral
+                    ! solutions, so the first one can be used.
 
-            subRanges(1,1,i) = flowDoms(j,1,1)%iBegor + 1
-            subRanges(1,2,i) = flowDoms(j,1,1)%iEndor
+                    subRanges(1, 1, i) = flowDoms(j, 1, 1)%iBegor + 1
+                    subRanges(1, 2, i) = flowDoms(j, 1, 1)%iEndor
 
-            subRanges(2,1,i) = flowDoms(j,1,1)%jBegor + 1
-            subRanges(2,2,i) = flowDoms(j,1,1)%jEndor
+                    subRanges(2, 1, i) = flowDoms(j, 1, 1)%jBegor + 1
+                    subRanges(2, 2, i) = flowDoms(j, 1, 1)%jEndor
 
-            subRanges(3,1,i) = flowDoms(j,1,1)%kBegor + 1
-            subRanges(3,2,i) = flowDoms(j,1,1)%kEndor
+                    subRanges(3, 1, i) = flowDoms(j, 1, 1)%kBegor + 1
+                    subRanges(3, 2, i) = flowDoms(j, 1, 1)%kEndor
 
-            ! Correct in case rind layers must be stored.
+                    ! Correct in case rind layers must be stored.
 
-            if( rindLayerThisSol ) then
+                    if (rindLayerThisSol) then
 
-               if(subRanges(1,1,i) == 2) subRanges(1,1,i) = 1
-               if(subRanges(2,1,i) == 2) subRanges(2,1,i) = 1
-               if(subRanges(3,1,i) == 2) subRanges(3,1,i) = 1
+                        if (subRanges(1, 1, i) == 2) subRanges(1, 1, i) = 1
+                        if (subRanges(2, 1, i) == 2) subRanges(2, 1, i) = 1
+                        if (subRanges(3, 1, i) == 2) subRanges(3, 1, i) = 1
 
-               if(subRanges(1,2,i) == cgnsDoms(zone)%il) &
-                    subRanges(1,2,i) =  cgnsDoms(zone)%il + 1
-               if(subRanges(2,2,i) == cgnsDoms(zone)%jl) &
-                    subRanges(2,2,i) =  cgnsDoms(zone)%jl + 1
-               if(subRanges(3,2,i) == cgnsDoms(zone)%kl) &
-                    subRanges(3,2,i) =  cgnsDoms(zone)%kl + 1
+                        if (subRanges(1, 2, i) == cgnsDoms(zone)%il) &
+                            subRanges(1, 2, i) = cgnsDoms(zone)%il + 1
+                        if (subRanges(2, 2, i) == cgnsDoms(zone)%jl) &
+                            subRanges(2, 2, i) = cgnsDoms(zone)%jl + 1
+                        if (subRanges(3, 2, i) == cgnsDoms(zone)%kl) &
+                            subRanges(3, 2, i) = cgnsDoms(zone)%kl + 1
 
-            endif
+                    end if
 
-         enddo
+                end do
 
-         ! The rest of the block ranges must be obtained by
-         ! communication.
+                ! The rest of the block ranges must be obtained by
+                ! communication.
 
-         do i=(nBlocks+1),nSubblocks
+                do i = (nBlocks + 1), nSubblocks
 
-            ! Receive the range of subblock i.
+                    ! Receive the range of subblock i.
 
-            source = proc(i)
-            call mpi_recv(ii, 6, adflow_integer, source, source, &
-                 ADflow_comm_world, mpiStatus, ierr)
+                    source = proc(i)
+                    call mpi_recv(ii, 6, adflow_integer, source, source, &
+                                  ADflow_comm_world, mpiStatus, ierr)
 
-            subRanges(1,1,i) = ii(1)
-            subRanges(1,2,i) = ii(2)
-            subRanges(2,1,i) = ii(3)
-            subRanges(2,2,i) = ii(4)
-            subRanges(3,1,i) = ii(5)
-            subRanges(3,2,i) = ii(6)
-         enddo
+                    subRanges(1, 1, i) = ii(1)
+                    subRanges(1, 2, i) = ii(2)
+                    subRanges(2, 1, i) = ii(3)
+                    subRanges(2, 2, i) = ii(4)
+                    subRanges(3, 1, i) = ii(5)
+                    subRanges(3, 2, i) = ii(6)
+                end do
 
-         ! Determine the size of the largest subblock.
+                ! Determine the size of the largest subblock.
 
-         do i=1,nSubBlocks
-            ll = (subRanges(1,2,i) - subRanges(1,1,i) + 1) &
-                 * (subRanges(2,2,i) - subRanges(2,1,i) + 1) &
-                 * (subRanges(3,2,i) - subRanges(3,1,i) + 1)
-            bufSize = max(bufSize, ll)
-         enddo
+                do i = 1, nSubBlocks
+                    ll = (subRanges(1, 2, i) - subRanges(1, 1, i) + 1) &
+                         * (subRanges(2, 2, i) - subRanges(2, 1, i) + 1) &
+                         * (subRanges(3, 2, i) - subRanges(3, 1, i) + 1)
+                    bufSize = max(bufSize, ll)
+                end do
 
-      else testRoot
+                else testRoot
 
-         ! Loop over the number of subblocks stored on this processor.
+                ! Loop over the number of subblocks stored on this processor.
 
-         do i=1,nBlocks
+                do i = 1, nBlocks
 
-            ! Store the local block id a bit easier in j.
+                    ! Store the local block id a bit easier in j.
 
-            j = blocksCGNSblock(i+offset)
+                    j = blocksCGNSblock(i + offset)
 
-            ! Copy the range of this subblock into the buffer ii.
-            ! This is the same for all spectral solutions, so the
-            ! first one can be used.
+                    ! Copy the range of this subblock into the buffer ii.
+                    ! This is the same for all spectral solutions, so the
+                    ! first one can be used.
 
-            ii(1) = flowDoms(j,1,1)%iBegor + 1
-            ii(2) = flowDoms(j,1,1)%iEndor
-            ii(3) = flowDoms(j,1,1)%jBegor + 1
-            ii(4) = flowDoms(j,1,1)%jEndor
-            ii(5) = flowDoms(j,1,1)%kBegor + 1
-            ii(6) = flowDoms(j,1,1)%kEndor
+                    ii(1) = flowDoms(j, 1, 1)%iBegor + 1
+                    ii(2) = flowDoms(j, 1, 1)%iEndor
+                    ii(3) = flowDoms(j, 1, 1)%jBegor + 1
+                    ii(4) = flowDoms(j, 1, 1)%jEndor
+                    ii(5) = flowDoms(j, 1, 1)%kBegor + 1
+                    ii(6) = flowDoms(j, 1, 1)%kEndor
 
-            ! Correct in case rind layers must be stored.
+                    ! Correct in case rind layers must be stored.
 
-            if( rindLayerThisSol ) then
+                    if (rindLayerThisSol) then
 
-               if(ii(1) == 2) ii(1) = 1
-               if(ii(2) == cgnsDoms(zone)%il) ii(2) = ii(2) + 1
-               if(ii(3) == 2) ii(3) = 1
-               if(ii(4) == cgnsDoms(zone)%jl) ii(4) = ii(4) + 1
-               if(ii(5) == 2) ii(5) = 1
-               if(ii(6) == cgnsDoms(zone)%kl) ii(6) = ii(6) + 1
+                        if (ii(1) == 2) ii(1) = 1
+                        if (ii(2) == cgnsDoms(zone)%il) ii(2) = ii(2) + 1
+                        if (ii(3) == 2) ii(3) = 1
+                        if (ii(4) == cgnsDoms(zone)%jl) ii(4) = ii(4) + 1
+                        if (ii(5) == 2) ii(5) = 1
+                        if (ii(6) == cgnsDoms(zone)%kl) ii(6) = ii(6) + 1
 
-            endif
+                    end if
 
-            ! Send the buffer to processor 0.
+                    ! Send the buffer to processor 0.
 
-            call mpi_send(ii, 6, adflow_integer, 0, myID, &
-                 ADflow_comm_world, ierr)
+                    call mpi_send(ii, 6, adflow_integer, 0, myID, &
+                                  ADflow_comm_world, ierr)
 
-            ! Check the size of this subblock and update bufSize
-            ! if needed.
+                    ! Check the size of this subblock and update bufSize
+                    ! if needed.
 
-            ll = (ii(2) - ii(1) + 1) * (ii(4) - ii(3) + 1) &
-                 * (ii(6) - ii(5) + 1)
-            bufSize = max(bufSize, ll)
+                    ll = (ii(2) - ii(1) + 1) * (ii(4) - ii(3) + 1) &
+                         * (ii(6) - ii(5) + 1)
+                    bufSize = max(bufSize, ll)
 
-         enddo
+                end do
 
-      endif testRoot
+            end if testRoot
 
-    end subroutine getSubRangesSol
-  end subroutine writeSolCGNSZone
+        end subroutine getSubRangesSol
+    end subroutine writeSolCGNSZone
 end module writeCGNSVolume
diff --git a/src/output/writeSol.F90 b/src/output/writeSol.F90
index 53badedca..51d57e1aa 100644
--- a/src/output/writeSol.F90
+++ b/src/output/writeSol.F90
@@ -1,82 +1,80 @@
 
 subroutine writeSol(famList, nFamList)
-  !
-  !       writeSol controls the writing of a new grid file, a volume
-  !       solution file and a surface solution file.
-  !
-  use constants
-  use extraOutput
-  use communication, only : adflow_comm_world
-  use monitor, only : writeVolume
-  use inputIO, only : storeRindLayer
-  use killSignals, only : localSignal, globalSignal
-  use flowVarRefState, only : nw
-  use outputMod, only : setHelpVariablesWriting, &
-       releaseHelpVariablesWriting, gridFileNames, volSolFileNames, &
-       surfSolFileNames
-  use writeCGNSGrid, only : writeCGNSGridFile
-  use writeCGNSVolume, only : writeCGNSVolumeSol
-  use writeCGNSSurface, only : writeCGNSSurfaceSol
-  use utils, only : terminate, deallocateTempMemory, allocateTempMemory
-  use haloExchange, only : resHalo1
-  implicit none
-
-
-  integer(kind=intType), intent(in) :: nFamList
-  integer(kind=intType), dimension(nFamList), intent(in) :: famList
-  !
-  !      Local variables.
-  !
-  integer :: ierr
-
-  integer(kind=intType) :: i, nn
-
-  ! If residuals must be written to the volume solution and if halo
-  ! values must be stored, exchange the data here, because in the
-  ! next call the communication buffers are deleted.
-
-  if(writeVolume     .and. storeRindLayer .and.  &
-       (volWriteResRho  .or. volWriteResMom .or.   &
-       volWriteResRhoE .or. volWriteResTurb))     &
-       call resHalo1(1_intType, 1_intType, nw)
-
-  ! Temporary deallocate some memory, such that writeSolution
-  ! is not a memory killer.
-
-  call deallocateTempMemory(.true.)
-
-  ! Write the files. The routines called depend on the IO
-  ! format used.
-
-  call setHelpVariablesWriting
-  call writeCGNSGridFile
-  call writeCGNSVolumeSol
-  call writeCGNSSurfaceSol(famList)
-  call releaseHelpVariablesWriting
-
-  ! Release the memory of the file names.
-
-  deallocate(gridFileNames, volSolFileNames, &
-       surfSolFileNames, stat=ierr)
-  if(ierr /= 0)                &
-       call terminate("writeSol", &
-       "Deallocation failure for the file names.")
-
-  ! Allocate the memory again that was deallocated in the beginning
-  ! of this routine.
-
-  call allocateTempMemory(.true.)
+    !
+    !       writeSol controls the writing of a new grid file, a volume
+    !       solution file and a surface solution file.
+    !
+    use constants
+    use extraOutput
+    use communication, only: adflow_comm_world
+    use monitor, only: writeVolume
+    use inputIO, only: storeRindLayer
+    use killSignals, only: localSignal, globalSignal
+    use flowVarRefState, only: nw
+    use outputMod, only: setHelpVariablesWriting, &
+                         releaseHelpVariablesWriting, gridFileNames, volSolFileNames, &
+                         surfSolFileNames
+    use writeCGNSGrid, only: writeCGNSGridFile
+    use writeCGNSVolume, only: writeCGNSVolumeSol
+    use writeCGNSSurface, only: writeCGNSSurfaceSol
+    use utils, only: terminate, deallocateTempMemory, allocateTempMemory
+    use haloExchange, only: resHalo1
+    implicit none
+
+    integer(kind=intType), intent(in) :: nFamList
+    integer(kind=intType), dimension(nFamList), intent(in) :: famList
+    !
+    !      Local variables.
+    !
+    integer :: ierr
+
+    integer(kind=intType) :: i, nn
+
+    ! If residuals must be written to the volume solution and if halo
+    ! values must be stored, exchange the data here, because in the
+    ! next call the communication buffers are deleted.
+
+    if (writeVolume .and. storeRindLayer .and. &
+        (volWriteResRho .or. volWriteResMom .or. &
+         volWriteResRhoE .or. volWriteResTurb)) &
+        call resHalo1(1_intType, 1_intType, nw)
+
+    ! Temporary deallocate some memory, such that writeSolution
+    ! is not a memory killer.
+
+    call deallocateTempMemory(.true.)
+
+    ! Write the files. The routines called depend on the IO
+    ! format used.
+
+    call setHelpVariablesWriting
+    call writeCGNSGridFile
+    call writeCGNSVolumeSol
+    call writeCGNSSurfaceSol(famList)
+    call releaseHelpVariablesWriting
+
+    ! Release the memory of the file names.
+
+    deallocate (gridFileNames, volSolFileNames, &
+                surfSolFileNames, stat=ierr)
+    if (ierr /= 0) &
+        call terminate("writeSol", &
+                       "Deallocation failure for the file names.")
+
+    ! Allocate the memory again that was deallocated in the beginning
+    ! of this routine.
+
+    call allocateTempMemory(.true.)
 
 #ifndef USE_NO_SIGNALS
 
-  ! It is possible that a kill signal was sent during the writing.
-  ! Therefore determine the global signal as the maximum of the
-  ! local ones.
+    ! It is possible that a kill signal was sent during the writing.
+    ! Therefore determine the global signal as the maximum of the
+    ! local ones.
 
-  call mpi_allreduce(localSignal, globalSignal, 1, adflow_integer, &
-       mpi_max, ADflow_comm_world, ierr)
+    call mpi_allreduce(localSignal, globalSignal, 1, adflow_integer, &
+                       mpi_max, ADflow_comm_world, ierr)
 #endif
 
 end subroutine writeSol
 
-
diff --git a/src/overset/buildClusterWalls.F90 b/src/overset/buildClusterWalls.F90
index c249aa4e4..4365cfebc 100644
--- a/src/overset/buildClusterWalls.F90
+++ b/src/overset/buildClusterWalls.F90
@@ -1,571 +1,571 @@
 subroutine buildClusterWalls(level, sps, useDual, walls, famList, nFamList)
 
-  ! This routine will will build a global reduced surface mesh and ADT
-  ! for each cluster. It can build using either the primal mesh or the
-  ! dual mesh depending on the  useDual option.
-
-  use adtBuild, only : buildSerialQuad
-  use blockPointers
-  use communication
-  use inputphysics
-  use inputTimeSpectral
-  use oversetData
-  use inputOverset
-  use utils, only : setPointers, EChk, pointReduce
-  use warping, only : getCGNSMeshIndices
-  use sorting, only : famInList
-  implicit none
-
-  ! Input Variables
-  integer(kind=intType), intent(in) :: level, sps, nFamList
-  logical, intent(in) :: useDual
-  type(oversetWall), intent(inout), dimension(nClusters), target :: walls
-  integer(Kind=intType), intent(in) :: famList(nFamList)
-
-  ! Local Variables
-  integer(kind=intType) :: i, j, k, l, ii, jj, kk, nn, mm, iNode, iCell, c
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ni, nj, nUnique, cellID, cellID2
-  integer(kind=intType) :: ierr, iDim, lj
-
-  ! Data for local surface
-  integer(kind=intType) :: nNodes, nCells
-  integer(kind=intType) :: nNodesLocal, nCellsLocal
-  integer(kind=intType), dimension(:, :), allocatable :: connLocal
-  integer(kind=intType), dimension(:), allocatable :: clusterNodeLocal
-  integer(kind=intType), dimension(:), allocatable :: clusterCellLocal
-  real(kind=realType), dimension(:, :), allocatable :: nodesLocal
-  real(kind=realType), dimension(:,:,:), pointer :: xx, xx1, xx2, xx3, xx4=>null()
-  integer(kind=intType), dimension(:,:,:), pointer :: globalCGNSNode=>null()
-  integer(kind=intType), dimension(:,:), pointer :: ind, indCGNS=>null()
-  integer(kind=intType), dimension(:,:), pointer :: indCell=>null()
-  logical :: regularOrdering
-
-  ! Data for global surface
-  integer(kind=intTYpe) :: nNodesGlobal, nCellsGlobal
-  integer(kind=intType), dimension(:, :), allocatable, target :: connGlobal
-  real(kind=realType), dimension(:, :), allocatable, target :: nodesGlobal
-  integer(kind=intType), dimension(:), allocatable, target :: nodeIndicesGlobal
-  integer(kind=intType), dimension(:), allocatable, target :: nodeIndicesCGNSGlobal
-  integer(kind=intType), dimension(:), allocatable, target :: cellIndicesGlobal
-
-  integer(kind=intType), dimension(:), allocatable :: nodesPerCluster, cellsPerCluster, cnc, ccc
-  integer(kind=intType), dimension(:), allocatable :: clusterNodeGlobal
-  integer(kind=intType), dimension(:), allocatable :: clusterCellGlobal
-  integer(kind=intType), dimension(:), allocatable :: localNodeNums
-  integer(kind=intType), dimension(:), allocatable :: nodeIndicesLocal
-  integer(kind=intType), dimension(:), allocatable :: nodeIndicesCGNSLocal
-  integer(kind=intType), dimension(:), allocatable :: cellIndicesLocal
-  integer(kind=intType), dimension(:), allocatable :: cgnsIndices, curCGNSNode
-
-  integer(kind=intType), dimension(:),    allocatable :: nCellProc, cumCellProc
-  integer(kind=intType), dimension(:),    allocatable :: nNodeProc, cumNodeProc
-  real(kind=realType),   dimension(:, :), allocatable :: uniqueNodes
-  integer(kind=intType), dimension(:),    allocatable :: link
-  real(kind=realType), parameter :: tol=1e-12
-
-  ! Pointers for easier readibility
-  integer(kind=intType), dimension(:, :), pointer :: conn
-  integer(kind=intType), dimension(:), pointer :: tmpInd
-
-  ! The first thing we do is gather all the surface nodes to
-  ! each processor such that every processor can make it's own copy of
-  ! the complete surface mesh to use to search. Note that this
-  ! procedure *DOES NOT SCALE IN MEMORY*...ie eventually the surface
-  ! mesh will become too large to store on a single processor,
-  ! although this will probably not happen until the sizes get up in
-  ! the hundreds of millions of cells.
-
-  nNodesLocal = 0
-  nCellsLocal = 0
-
-  ! Before we start generate a local node indices for the globalCGNS index
-  ii = 0
-  do nn=1, nDom
-     call setPointers(nn, level, sps)
-     allocate(flowDoms(nn, level, sps)%globalCGNSNode(1:il, 1:jl, 1:kl))
-     flowDoms(nn, level, sps)%globalCGNSNode = 0
-     ii = ii + il*jl*kl
-  end do
-
-  if (level == 1) then
-     allocate(cgnsIndices(3*ii))
-     call getCGNSMeshIndices(size(cgnsIndices), cgnsIndices)
-     ii = 0
-     do nn=1, nDom
+    ! This routine will will build a global reduced surface mesh and ADT
+    ! for each cluster. It can build using either the primal mesh or the
+    ! dual mesh depending on the  useDual option.
+
+    use adtBuild, only: buildSerialQuad
+    use blockPointers
+    use communication
+    use inputphysics
+    use inputTimeSpectral
+    use oversetData
+    use inputOverset
+    use utils, only: setPointers, EChk, pointReduce
+    use warping, only: getCGNSMeshIndices
+    use sorting, only: famInList
+    implicit none
+
+    ! Input Variables
+    integer(kind=intType), intent(in) :: level, sps, nFamList
+    logical, intent(in) :: useDual
+    type(oversetWall), intent(inout), dimension(nClusters), target :: walls
+    integer(Kind=intType), intent(in) :: famList(nFamList)
+
+    ! Local Variables
+    integer(kind=intType) :: i, j, k, l, ii, jj, kk, nn, mm, iNode, iCell, c
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ni, nj, nUnique, cellID, cellID2
+    integer(kind=intType) :: ierr, iDim, lj
+
+    ! Data for local surface
+    integer(kind=intType) :: nNodes, nCells
+    integer(kind=intType) :: nNodesLocal, nCellsLocal
+    integer(kind=intType), dimension(:, :), allocatable :: connLocal
+    integer(kind=intType), dimension(:), allocatable :: clusterNodeLocal
+    integer(kind=intType), dimension(:), allocatable :: clusterCellLocal
+    real(kind=realType), dimension(:, :), allocatable :: nodesLocal
+    real(kind=realType), dimension(:, :, :), pointer :: xx, xx1, xx2, xx3, xx4 => null()
+    integer(kind=intType), dimension(:, :, :), pointer :: globalCGNSNode => null()
+    integer(kind=intType), dimension(:, :), pointer :: ind, indCGNS => null()
+    integer(kind=intType), dimension(:, :), pointer :: indCell => null()
+    logical :: regularOrdering
+
+    ! Data for global surface
+    integer(kind=intTYpe) :: nNodesGlobal, nCellsGlobal
+    integer(kind=intType), dimension(:, :), allocatable, target :: connGlobal
+    real(kind=realType), dimension(:, :), allocatable, target :: nodesGlobal
+    integer(kind=intType), dimension(:), allocatable, target :: nodeIndicesGlobal
+    integer(kind=intType), dimension(:), allocatable, target :: nodeIndicesCGNSGlobal
+    integer(kind=intType), dimension(:), allocatable, target :: cellIndicesGlobal
+
+    integer(kind=intType), dimension(:), allocatable :: nodesPerCluster, cellsPerCluster, cnc, ccc
+    integer(kind=intType), dimension(:), allocatable :: clusterNodeGlobal
+    integer(kind=intType), dimension(:), allocatable :: clusterCellGlobal
+    integer(kind=intType), dimension(:), allocatable :: localNodeNums
+    integer(kind=intType), dimension(:), allocatable :: nodeIndicesLocal
+    integer(kind=intType), dimension(:), allocatable :: nodeIndicesCGNSLocal
+    integer(kind=intType), dimension(:), allocatable :: cellIndicesLocal
+    integer(kind=intType), dimension(:), allocatable :: cgnsIndices, curCGNSNode
+
+    integer(kind=intType), dimension(:), allocatable :: nCellProc, cumCellProc
+    integer(kind=intType), dimension(:), allocatable :: nNodeProc, cumNodeProc
+    real(kind=realType), dimension(:, :), allocatable :: uniqueNodes
+    integer(kind=intType), dimension(:), allocatable :: link
+    real(kind=realType), parameter :: tol = 1e-12
+
+    ! Pointers for easier readibility
+    integer(kind=intType), dimension(:, :), pointer :: conn
+    integer(kind=intType), dimension(:), pointer :: tmpInd
+
+    ! The first thing we do is gather all the surface nodes to
+    ! each processor such that every processor can make it's own copy of
+    ! the complete surface mesh to use to search. Note that this
+    ! procedure *DOES NOT SCALE IN MEMORY*...ie eventually the surface
+    ! mesh will become too large to store on a single processor,
+    ! although this will probably not happen until the sizes get up in
+    ! the hundreds of millions of cells.
+
+    nNodesLocal = 0
+    nCellsLocal = 0
+
+    ! Before we start generate a local node indices for the globalCGNS index
+    ii = 0
+    do nn = 1, nDom
         call setPointers(nn, level, sps)
-        do k=1, kl
-           do j=1, jl
-              do i=1, il
-                 ii = ii + 3
-                 ! The reason for the +3 in the counter and the /3 is
-                 ! that the CGNSMesh indicies include all DOF, so it
-                 ! is the total number of mesh nodes *3. Here We only
-                 ! care about nodes themselves so it is sufficient to
-                 ! use basic ordering.
-                 flowDoms(nn, level, sps)%globalCGNSNode(i,j,k) = cgnsIndices(ii)/3
-              end do
-           end do
+        allocate (flowDoms(nn, level, sps)%globalCGNSNode(1:il, 1:jl, 1:kl))
+        flowDoms(nn, level, sps)%globalCGNSNode = 0
+        ii = ii + il * jl * kl
+    end do
+
+    if (level == 1) then
+        allocate (cgnsIndices(3 * ii))
+        call getCGNSMeshIndices(size(cgnsIndices), cgnsIndices)
+        ii = 0
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            do k = 1, kl
+                do j = 1, jl
+                    do i = 1, il
+                        ii = ii + 3
+                        ! The reason for the +3 in the counter and the /3 is
+                        ! that the CGNSMesh indicies include all DOF, so it
+                        ! is the total number of mesh nodes *3. Here We only
+                        ! care about nodes themselves so it is sufficient to
+                        ! use basic ordering.
+                        flowDoms(nn, level, sps)%globalCGNSNode(i, j, k) = cgnsIndices(ii) / 3
+                    end do
+                end do
+            end do
         end do
-     end do
-     deallocate(cgnsIndices)
-  end if
-
-  do nn=1,nDom
-     call setPointers(nn, level, sps)
-
-     do mm=1, nBocos
-        if (famInList(BCdata(mm)%famID, famlist)) then
-           iBeg = bcData(mm)%inBeg
-           iEnd = bcData(mm)%inEnd
-           jBeg = bcData(mm)%jnBeg
-           jEnd = bcData(mm)%jnEnd
-           if (useDual) then
-              nNodesLocal = nNodesLocal + &
-                   (iEnd - iBeg + 2)*(jEnd - jBeg + 2)
-              nCellsLocal = nCellsLocal + &
-                   (iEnd - iBeg + 1)*(jEnd - jBeg + 1)
-           else
-              nNodesLocal = nNodesLocal + &
-                   (iEnd - iBeg + 1)*(jEnd - jBeg + 1)
-              nCellsLocal = nCellsLocal + &
-                   (iEnd - iBeg)*(jEnd - jBeg)
-           end if
-        end if
-     end do
-  end do
-
-  ! Now communicate these sizes with everyone
-  allocate(nCellProc(nProc), cumCellProc(0:nProc), &
-           nNodeProc(nProc), cumNodeProc(0:nProc))
-
-  call mpi_allgather(nCellsLocal, 1, adflow_integer, nCellProc, 1, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgather(nNodesLocal, 1, adflow_integer, nNodeProc, 1, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  ! Now make cumulative versions of these
-  cumCellProc(0) = 0
-  cumNodeProc(0) = 0
-  do i=1,nProc
-     cumCellProc(i) = cumCellProc(i-1) + nCellProc(i)
-     cumNodeProc(i) = cumNodeProc(i-1) + nNodeProc(i)
-  end do
-
-  ! And save the total number of nodes and cells for reference
-  nCellsGlobal = cumCellProc(nProc)
-  nNodesGlobal = cumNodeProc(nProc)
-
-  ! Allocate the space for the local nodes and element connectivity
-  allocate(nodesLocal(3, nNodesLocal), connLocal(4, nCellsLocal), &
-       clusterCellLocal(nCellsLocal), clusterNodeLocal(NNodesLocal), &
-       nodeIndicesLocal(nNodesLocal), nodeIndicesCGNSLocal(nNodesLocal), &
-       cellIndicesLocal(nCellsLocal))
-
-  iCell = 0
-  iNode = 0
-  ! Second loop over the local walls
-  do nn=1, nDom
-     call setPointers(nn, level, sps)
-     c = clusters(cumDomProc(myid) + nn)
-     globalCGNSNode => flowDoms(nn, level, sps)%globalCGNSNode
-     do mm=1,nBocos
-        if (famInList(BCdata(mm)%famID, famlist)) then
-
-           jBeg = BCData(mm)%jnBeg-1 ; jEnd = BCData(mm)%jnEnd
-           iBeg = BCData(mm)%inBeg-1 ; iEnd = BCData(mm)%inEnd
-
-           if (useDual) then
-              ! For the dual we have to allocate the pointer, xx.
-              select case (BCFaceID(mm))
-              case (iMin)
-                 xx1 => x(1,0:jl,0:kl,:)
-                 xx2 => x(1,1:je,0:kl,:)
-                 xx3 => x(1,0:jl,1:ke,:)
-                 xx4 => x(1,1:je,1:ke,:)
-                 ind => globalCell(2, 1:je, 1:ke)
-              case (iMax)
-                 xx1 => x(il,0:jl,0:kl,:)
-                 xx2 => x(il,1:je,0:kl,:)
-                 xx3 => x(il,0:jl,1:ke,:)
-                 xx4 => x(il,1:je,1:ke,:)
-                 ind => globalCell(il, 1:je, 1:ke)
-
-              case (jMin)
-                 xx1  => x(0:il,1,0:kl,:)
-                 xx2  => x(1:ie,1,0:kl,:)
-                 xx3  => x(0:il,1,1:ke,:)
-                 xx4  => x(1:ie,1,1:ke,:)
-                 ind  => globalCell(1:ie, 2, 1:ke)
-
-              case (jMax)
-                 xx1 => x(0:il,jl,0:kl,:)
-                 xx2 => x(1:ie,jl,0:kl,:)
-                 xx3 => x(0:il,jl,1:ke,:)
-                 xx4 => x(1:ie,jl,1:ke,:)
-                 ind => globalCell(1:ie, jl, 1:ke)
-
-              case (kMin)
-                 xx1 => x(0:il,0:jl,1,:)
-                 xx2 => x(1:ie,0:jl,1,:)
-                 xx3 => x(0:il,1:je,1,:)
-                 xx4 => x(1:ie,1:je,1,:)
-                 ind => globalCell(1:ie, 1:je, 2)
-
-              case (kMax)
-                 xx1 => x(0:il,0:jl,kl,:)
-                 xx2 => x(1:ie,0:jl,kl,:)
-                 xx3 => x(0:il,1:je,kl,:)
-                 xx4 => x(1:ie,1:je,kl,:)
-                 ind => globalCell(1:ie, 1:je, kl)
-              end select
-
-           else
-              select case (BCFaceID(mm))
-              case (iMin)
-                 xx   => x(1,:,:,:)
-                 ind  => globalNode(1, :, :)
-                 indCGNS => globalCGNSNode(1, :, :)
-                 ! Pointer to owned global cell indices
-                 indCell => globalCell(2, :, :)
-
-              case (iMax)
-                 xx   => x(il,:,:,:)
-                 ind  => globalNode(il, :, :)
-                 indCGNS => globalCGNSNode(il, :, :)
-
-                 ! Pointer to owned global cell indices
-                 indCell => globalCell(il, :, :)
-
-              case (jMin)
-                 xx   => x(:,1,:,:)
-                 ind  => globalNode(:, 1, :)
-                 indCGNS => globalCGNSNode(:, 1, :)
-                 ! Pointer to owned global cell indices
-                 indCell => globalCell(:, 2, :)
-
-              case (jMax)
-                 xx   => x(:,jl,:,:)
-                 ind  => globalNode(:, jl, :)
-                 indCGNS => globalCGNSNode(:, jl, :)
-                 ! Pointer to owned global cell indices
-                 indCell => globalCell(:, jl, :)
-
-              case (kMin)
-                 xx   => x(:,:,1,:)
-                 ind  => globalNode(:, :, 1)
-                 indCGNS => globalCGNSNode(:, :, 1)
-                 ! Pointer to owned global cell indices
-                 indCell => globalCell(:, :, 2)
-
-              case (kMax)
-                 xx   => x(:,:,kl,:)
-                 ind  => globalNode(:, :, kl)
-                 indCGNS => globalCGNSNode(:, :, kl)
-
-                 ! Pointer to owned global cell indices
-                 indCell => globalCell(:, :, kl)
-
-              end select
-
-              ! Just set hte 4 other pointers to xx so we can use the
-              ! same quarter-summation code below:
-              xx1 => xx
-              xx2 => xx
-              xx3 => xx
-              xx4 => xx
-
-           end if
-
-           ! We want to ensure that all the normals of the faces are
-           ! consistent. To ensure this, we enforce that all normals
-           ! are "into" the domain. Therefore we must treat difference
-           ! faces of a block differently. For example for an iLow
-           ! face, when looping over j-k in the regular way, results
-           ! in in a domain inward pointing normal for iLow but
-           ! outward pointing normal for iHigh. The same is true for
-           ! kMin and kMax. However, it is reverse for the J-faces:
-           ! This is becuase the way the pointers are extracted i then
-           ! k is the reverse of what "should" be for consistency. The
-           ! other two, the pointers are cyclic consistent: i,j->k,
-           ! j,k (wrap) ->i, but for the j-direction is is i,k->j when
-           ! to be consistent with the others it should be
-           ! k,i->j. Hope that made sense.
-
-           select case(BCFaceID(mm))
-           case(iMin, jMax, kMin)
-              regularOrdering = .True.
-           case default
-              regularOrdering = .False.
-           end select
-
-           ! Now this can be reversed *again* if we have a block that
-           ! is left handed.
-           if (.not. rightHanded) then
-              regularOrdering = .not. (regularOrdering)
-           end if
-
-           if (useDual) then
-              ! Start and end bounds for NODES
-              jBeg = BCData(mm)%jnBeg-1; jEnd = BCData(mm)%jnEnd
-              iBeg = BCData(mm)%inBeg-1; iEnd = BCData(mm)%inEnd
-           else
-              ! Start and end bounds for NODES
-              jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-              iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-           end if
-
-           ! ni, nj are the number of NODES
-           ni = iEnd - iBeg + 1
-           nj = jEnd - jBeg + 1
-
-           ! Loop over the faces....this is the node sizes - 1
-           if (regularOrdering) then
-              do j=1,nj-1
-                 do i=1,ni-1
-                    iCell = iCell + 1
-                    connLocal(1, iCell) = cumNodeProc(myid) + iNode + (j-1)*ni + i
-                    connLocal(2, iCell) = cumNodeProc(myid) + iNode + (j-1)*ni + i + 1
-                    connLocal(3, iCell) = cumNodeProc(myid) + iNode + (j)*ni + i + 1
-                    connLocal(4, iCell) = cumNodeProc(myid) + iNode + (j)*ni + i
-                    ! Set the cluster
-                    clusterCellLocal(iCell) = c
-
-                    ! Save the global cell index
-                    if (useDual) then
-                       cellIndicesLocal(iCell) = 0
-                    else
-                       ! Valid only when using primary nodes
-                       cellIndicesLocal(iCell) = indCell(iBeg+i+1, jBeg+j+1)
-                    end if
-                 end do
-              end do
-           else
-              ! Do the reverse ordering
-              do j=1,nj-1
-                 do i=1,ni-1
-                    iCell = iCell + 1
-                    connLocal(1, iCell) = cumNodeProc(myid) + iNode + (j-1)*ni + i
-                    connLocal(2, iCell) = cumNodeProc(myid) + iNode + (j  )*ni + i
-                    connLocal(3, iCell) = cumNodeProc(myid) + iNode + (j)  *ni + i + 1
-                    connLocal(4, iCell) = cumNodeProc(myid) + iNode + (j-1)*ni + i + 1
-
-                    ! Set the cluster
-                    clusterCellLocal(iCell) = c
-
-                    ! Save the global cell index
-                    if (useDual) then
-                       cellIndicesLocal(iCell) = 0
-                    else
-                       ! Valid only when using primary nodes
-                       cellIndicesLocal(iCell) = indCell(iBeg+i+1, jBeg+j+1)
-                    end if
-                 end do
-              end do
-           end if
-
-           ! Loop over the nodes
-           do j=jBeg, jEnd
-              do i=iBeg, iEnd
-                 iNode = iNode + 1
-                 ! The plus one is for the pointer offset
-                 nodesLocal(:, iNode) = fourth*(&
-                      xx1(i+1, j+1, :) + xx2(i+1, j+1, :) + &
-                      xx3(i+1, j+1, :) + xx4(i+1, j+1, :))
-
-                 clusterNodeLocal(iNode) = c
-                 nodeIndicesLocal(iNode) = ind(i+1, j+1) ! +1 for pointer offset
-                 if (.not. useDual) then
-                    nodeIndicesCGNSLocal(iNode) = indCGNS(i, j) ! No pointer offset
-                 else
-                    nodeIndicesCGNSLocal(iNode) = 0
-                 end if
-              end do
-           end do
-        end if
-     end do
-  end do
-
-  ! Allocate space for the global reduced surface
-  allocate(nodesGlobal(3, nNodesGlobal), connGlobal(4, nCellsGlobal), &
-       clusterCellGlobal(nCellsGlobal), clusterNodeGlobal(nNodesGlobal), &
-       nodeIndicesGlobal(nNodesGlobal), nodeIndicesCGNSGlobal(nNodesGlobal), &
-       cellIndicesGlobal(nCellsGlobal))
-
-  ! Communicate the nodes, connectivity and cluster information to everyone
-  call mpi_allgatherv(nodesLocal, 3*nNodesLocal, adflow_real, &
-       nodesGlobal, nNodeProc*3, cumNodeProc*3, adflow_real, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgatherv(clusterNodeLocal, nNodesLocal, adflow_integer, &
-       clusterNodeGlobal, nNodeProc, cumNodeProc, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgatherv(nodeIndicesLocal, nNodesLocal, adflow_integer, &
-       nodeIndicesGlobal, nNodeProc, cumNodeProc, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgatherv(nodeIndicesCGNSLocal, nNodesLocal, adflow_integer, &
-       nodeIndicesCGNSGlobal, nNodeProc, cumNodeProc, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgatherv(connLocal, 4*nCellsLocal, adflow_integer, &
-       connGlobal, nCellProc*4, cumCellProc*4, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgatherv(clusterCellLocal, nCellsLocal, adflow_integer, &
-       clusterCellGlobal, nCellProc, cumCellProc, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgatherv(cellIndicesLocal, nCellsLocal, adflow_integer, &
-       cellIndicesGlobal, nCellProc, cumCellProc, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  ! Free the local data we do not need anymore
-  deallocate(nodesLocal, connLocal, clusterCellLocal, clusterNodeLocal, &
-       nCellProc, cumCellProc, nNodeProc, cumNodeProc, nodeIndicesLocal, &
-       nodeIndicesCGNSLocal, cellIndicesLocal)
-
-  ! We will now build separate trees for each cluster.
-  allocate(nodesPerCluster(nClusters), cellsPerCluster(nClusters), &
-       cnc(nClusters), ccc(nClusters))
-  nodesPerCluster = 0
-  cellsPerCluster = 0
-
-  ! Count up the total number of elements and nodes on each cluster
-  do i=1, nCellsGlobal
-     cellsPerCluster(clusterCellGlobal(i)) = cellsPerCluster(clusterCellGlobal(i)) + 1
-  end do
-
-  do i=1, nNodesGlobal
-     nodesPerCluster(clusterNodeGlobal(i)) = nodesPerCluster(clusterNodeGlobal(i)) + 1
-  end do
-
-  ! Create the list of the walls. We are reusing the overset wall derived type here.
-  allocate(localNodeNums(nNodesGlobal))
-
-  ! Allocate the memory for each of the cluster nodes
-  do i=1, nClusters
-     nNodes = nodesPerCluster(i)
-     nCells = cellsPerCluster(i)
-     walls(i)%nCells = nCells
-     walls(i)%nNodes = nNodes
-
-     allocate(walls(i)%x(3, nNodes), walls(i)%conn(4, nCells), &
-          walls(i)%ind(nNodes))
-     allocate(walls(i)%indCell(nCells))
-  end do
-
-  ! We now loop through the master list of nodes and elements and
-  !  "push" them back to where they should go. We also keep track of
-  !  the local node numbers so that the cluster surcells can update
-  !  their own conn.
-  localNodeNums = 0
-  cnc = 0
-  do i=1, nNodesGlobal
-     c = clusterNodeGlobal(i) ! Cluter this node belongs to
-     cnc(c) = cnc(c) + 1 ! "cluster node count:" the 'nth' node for this  cluster
-
-     walls(c)%x(:, cnc(c))= nodesGlobal(:, i)
-     walls(c)%ind(cnc(c)) = nodeIndicesGlobal(i)
-     localNodeNums(i) = cnc(c)
-  end do
-
-  ccc = 0
-  do i=1, nCellsGlobal
-     c = clusterCellGlobal(i)
-     ccc(c) = ccc(c) + 1 ! "Cluster cell count" the 'nth' cell for this cluster
-     walls(c)%conn(:, ccc(c)) = connGlobal(:, i)
-
-     walls(c)%indCell(ccc(c)) = cellIndicesGlobal(i)
-  end do
-
-  do i=1, nClusters
-
-     nCells = walls(i)%nCells
-     nNodes = walls(i)%nNodes
-
-     ! Fistly we need to update the conn to use our local node ordering.
-     do j=1, nCells
-        do k=1, 4
-           walls(i)%conn(k, j) = localNodeNums(walls(i)%conn(k, j))
+        deallocate (cgnsIndices)
+    end if
+
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
+
+        do mm = 1, nBocos
+            if (famInList(BCdata(mm)%famID, famlist)) then
+                iBeg = bcData(mm)%inBeg
+                iEnd = bcData(mm)%inEnd
+                jBeg = bcData(mm)%jnBeg
+                jEnd = bcData(mm)%jnEnd
+                if (useDual) then
+                    nNodesLocal = nNodesLocal + &
+                                  (iEnd - iBeg + 2) * (jEnd - jBeg + 2)
+                    nCellsLocal = nCellsLocal + &
+                                  (iEnd - iBeg + 1) * (jEnd - jBeg + 1)
+                else
+                    nNodesLocal = nNodesLocal + &
+                                  (iEnd - iBeg + 1) * (jEnd - jBeg + 1)
+                    nCellsLocal = nCellsLocal + &
+                                  (iEnd - iBeg) * (jEnd - jBeg)
+                end if
+            end if
+        end do
+    end do
+
+    ! Now communicate these sizes with everyone
+    allocate (nCellProc(nProc), cumCellProc(0:nProc), &
+              nNodeProc(nProc), cumNodeProc(0:nProc))
+
+    call mpi_allgather(nCellsLocal, 1, adflow_integer, nCellProc, 1, adflow_integer, &
+                       adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call mpi_allgather(nNodesLocal, 1, adflow_integer, nNodeProc, 1, adflow_integer, &
+                       adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    ! Now make cumulative versions of these
+    cumCellProc(0) = 0
+    cumNodeProc(0) = 0
+    do i = 1, nProc
+        cumCellProc(i) = cumCellProc(i - 1) + nCellProc(i)
+        cumNodeProc(i) = cumNodeProc(i - 1) + nNodeProc(i)
+    end do
+
+    ! And save the total number of nodes and cells for reference
+    nCellsGlobal = cumCellProc(nProc)
+    nNodesGlobal = cumNodeProc(nProc)
+
+    ! Allocate the space for the local nodes and element connectivity
+    allocate (nodesLocal(3, nNodesLocal), connLocal(4, nCellsLocal), &
+              clusterCellLocal(nCellsLocal), clusterNodeLocal(NNodesLocal), &
+              nodeIndicesLocal(nNodesLocal), nodeIndicesCGNSLocal(nNodesLocal), &
+              cellIndicesLocal(nCellsLocal))
+
+    iCell = 0
+    iNode = 0
+    ! Second loop over the local walls
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
+        c = clusters(cumDomProc(myid) + nn)
+        globalCGNSNode => flowDoms(nn, level, sps)%globalCGNSNode
+        do mm = 1, nBocos
+            if (famInList(BCdata(mm)%famID, famlist)) then
+
+                jBeg = BCData(mm)%jnBeg - 1; jEnd = BCData(mm)%jnEnd
+                iBeg = BCData(mm)%inBeg - 1; iEnd = BCData(mm)%inEnd
+
+                if (useDual) then
+                    ! For the dual we have to allocate the pointer, xx.
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        xx1 => x(1, 0:jl, 0:kl, :)
+                        xx2 => x(1, 1:je, 0:kl, :)
+                        xx3 => x(1, 0:jl, 1:ke, :)
+                        xx4 => x(1, 1:je, 1:ke, :)
+                        ind => globalCell(2, 1:je, 1:ke)
+                    case (iMax)
+                        xx1 => x(il, 0:jl, 0:kl, :)
+                        xx2 => x(il, 1:je, 0:kl, :)
+                        xx3 => x(il, 0:jl, 1:ke, :)
+                        xx4 => x(il, 1:je, 1:ke, :)
+                        ind => globalCell(il, 1:je, 1:ke)
+
+                    case (jMin)
+                        xx1 => x(0:il, 1, 0:kl, :)
+                        xx2 => x(1:ie, 1, 0:kl, :)
+                        xx3 => x(0:il, 1, 1:ke, :)
+                        xx4 => x(1:ie, 1, 1:ke, :)
+                        ind => globalCell(1:ie, 2, 1:ke)
+
+                    case (jMax)
+                        xx1 => x(0:il, jl, 0:kl, :)
+                        xx2 => x(1:ie, jl, 0:kl, :)
+                        xx3 => x(0:il, jl, 1:ke, :)
+                        xx4 => x(1:ie, jl, 1:ke, :)
+                        ind => globalCell(1:ie, jl, 1:ke)
+
+                    case (kMin)
+                        xx1 => x(0:il, 0:jl, 1, :)
+                        xx2 => x(1:ie, 0:jl, 1, :)
+                        xx3 => x(0:il, 1:je, 1, :)
+                        xx4 => x(1:ie, 1:je, 1, :)
+                        ind => globalCell(1:ie, 1:je, 2)
+
+                    case (kMax)
+                        xx1 => x(0:il, 0:jl, kl, :)
+                        xx2 => x(1:ie, 0:jl, kl, :)
+                        xx3 => x(0:il, 1:je, kl, :)
+                        xx4 => x(1:ie, 1:je, kl, :)
+                        ind => globalCell(1:ie, 1:je, kl)
+                    end select
+
+                else
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        xx => x(1, :, :, :)
+                        ind => globalNode(1, :, :)
+                        indCGNS => globalCGNSNode(1, :, :)
+                        ! Pointer to owned global cell indices
+                        indCell => globalCell(2, :, :)
+
+                    case (iMax)
+                        xx => x(il, :, :, :)
+                        ind => globalNode(il, :, :)
+                        indCGNS => globalCGNSNode(il, :, :)
+
+                        ! Pointer to owned global cell indices
+                        indCell => globalCell(il, :, :)
+
+                    case (jMin)
+                        xx => x(:, 1, :, :)
+                        ind => globalNode(:, 1, :)
+                        indCGNS => globalCGNSNode(:, 1, :)
+                        ! Pointer to owned global cell indices
+                        indCell => globalCell(:, 2, :)
+
+                    case (jMax)
+                        xx => x(:, jl, :, :)
+                        ind => globalNode(:, jl, :)
+                        indCGNS => globalCGNSNode(:, jl, :)
+                        ! Pointer to owned global cell indices
+                        indCell => globalCell(:, jl, :)
+
+                    case (kMin)
+                        xx => x(:, :, 1, :)
+                        ind => globalNode(:, :, 1)
+                        indCGNS => globalCGNSNode(:, :, 1)
+                        ! Pointer to owned global cell indices
+                        indCell => globalCell(:, :, 2)
+
+                    case (kMax)
+                        xx => x(:, :, kl, :)
+                        ind => globalNode(:, :, kl)
+                        indCGNS => globalCGNSNode(:, :, kl)
+
+                        ! Pointer to owned global cell indices
+                        indCell => globalCell(:, :, kl)
+
+                    end select
+
+                    ! Just set hte 4 other pointers to xx so we can use the
+                    ! same quarter-summation code below:
+                    xx1 => xx
+                    xx2 => xx
+                    xx3 => xx
+                    xx4 => xx
+
+                end if
+
+                ! We want to ensure that all the normals of the faces are
+                ! consistent. To ensure this, we enforce that all normals
+                ! are "into" the domain. Therefore we must treat difference
+                ! faces of a block differently. For example for an iLow
+                ! face, when looping over j-k in the regular way, results
+                ! in in a domain inward pointing normal for iLow but
+                ! outward pointing normal for iHigh. The same is true for
+                ! kMin and kMax. However, it is reverse for the J-faces:
+                ! This is becuase the way the pointers are extracted i then
+                ! k is the reverse of what "should" be for consistency. The
+                ! other two, the pointers are cyclic consistent: i,j->k,
+                ! j,k (wrap) ->i, but for the j-direction is is i,k->j when
+                ! to be consistent with the others it should be
+                ! k,i->j. Hope that made sense.
+
+                select case (BCFaceID(mm))
+                case (iMin, jMax, kMin)
+                    regularOrdering = .True.
+                case default
+                    regularOrdering = .False.
+                end select
+
+                ! Now this can be reversed *again* if we have a block that
+                ! is left handed.
+                if (.not. rightHanded) then
+                    regularOrdering = .not. (regularOrdering)
+                end if
+
+                if (useDual) then
+                    ! Start and end bounds for NODES
+                    jBeg = BCData(mm)%jnBeg - 1; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg - 1; iEnd = BCData(mm)%inEnd
+                else
+                    ! Start and end bounds for NODES
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                end if
+
+                ! ni, nj are the number of NODES
+                ni = iEnd - iBeg + 1
+                nj = jEnd - jBeg + 1
+
+                ! Loop over the faces....this is the node sizes - 1
+                if (regularOrdering) then
+                    do j = 1, nj - 1
+                        do i = 1, ni - 1
+                            iCell = iCell + 1
+                            connLocal(1, iCell) = cumNodeProc(myid) + iNode + (j - 1) * ni + i
+                            connLocal(2, iCell) = cumNodeProc(myid) + iNode + (j - 1) * ni + i + 1
+                            connLocal(3, iCell) = cumNodeProc(myid) + iNode + (j) * ni + i + 1
+                            connLocal(4, iCell) = cumNodeProc(myid) + iNode + (j) * ni + i
+                            ! Set the cluster
+                            clusterCellLocal(iCell) = c
+
+                            ! Save the global cell index
+                            if (useDual) then
+                                cellIndicesLocal(iCell) = 0
+                            else
+                                ! Valid only when using primary nodes
+                                cellIndicesLocal(iCell) = indCell(iBeg + i + 1, jBeg + j + 1)
+                            end if
+                        end do
+                    end do
+                else
+                    ! Do the reverse ordering
+                    do j = 1, nj - 1
+                        do i = 1, ni - 1
+                            iCell = iCell + 1
+                            connLocal(1, iCell) = cumNodeProc(myid) + iNode + (j - 1) * ni + i
+                            connLocal(2, iCell) = cumNodeProc(myid) + iNode + (j) * ni + i
+                            connLocal(3, iCell) = cumNodeProc(myid) + iNode + (j) * ni + i + 1
+                            connLocal(4, iCell) = cumNodeProc(myid) + iNode + (j - 1) * ni + i + 1
+
+                            ! Set the cluster
+                            clusterCellLocal(iCell) = c
+
+                            ! Save the global cell index
+                            if (useDual) then
+                                cellIndicesLocal(iCell) = 0
+                            else
+                                ! Valid only when using primary nodes
+                                cellIndicesLocal(iCell) = indCell(iBeg + i + 1, jBeg + j + 1)
+                            end if
+                        end do
+                    end do
+                end if
+
+                ! Loop over the nodes
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        iNode = iNode + 1
+                        ! The plus one is for the pointer offset
+                        nodesLocal(:, iNode) = fourth * ( &
+                                               xx1(i + 1, j + 1, :) + xx2(i + 1, j + 1, :) + &
+                                               xx3(i + 1, j + 1, :) + xx4(i + 1, j + 1, :))
+
+                        clusterNodeLocal(iNode) = c
+                        nodeIndicesLocal(iNode) = ind(i + 1, j + 1) ! +1 for pointer offset
+                        if (.not. useDual) then
+                            nodeIndicesCGNSLocal(iNode) = indCGNS(i, j) ! No pointer offset
+                        else
+                            nodeIndicesCGNSLocal(iNode) = 0
+                        end if
+                    end do
+                end do
+            end if
+        end do
+    end do
+
+    ! Allocate space for the global reduced surface
+    allocate (nodesGlobal(3, nNodesGlobal), connGlobal(4, nCellsGlobal), &
+              clusterCellGlobal(nCellsGlobal), clusterNodeGlobal(nNodesGlobal), &
+              nodeIndicesGlobal(nNodesGlobal), nodeIndicesCGNSGlobal(nNodesGlobal), &
+              cellIndicesGlobal(nCellsGlobal))
+
+    ! Communicate the nodes, connectivity and cluster information to everyone
+    call mpi_allgatherv(nodesLocal, 3 * nNodesLocal, adflow_real, &
+                        nodesGlobal, nNodeProc * 3, cumNodeProc * 3, adflow_real, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call mpi_allgatherv(clusterNodeLocal, nNodesLocal, adflow_integer, &
+                        clusterNodeGlobal, nNodeProc, cumNodeProc, adflow_integer, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call mpi_allgatherv(nodeIndicesLocal, nNodesLocal, adflow_integer, &
+                        nodeIndicesGlobal, nNodeProc, cumNodeProc, adflow_integer, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call mpi_allgatherv(nodeIndicesCGNSLocal, nNodesLocal, adflow_integer, &
+                        nodeIndicesCGNSGlobal, nNodeProc, cumNodeProc, adflow_integer, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call mpi_allgatherv(connLocal, 4 * nCellsLocal, adflow_integer, &
+                        connGlobal, nCellProc * 4, cumCellProc * 4, adflow_integer, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call mpi_allgatherv(clusterCellLocal, nCellsLocal, adflow_integer, &
+                        clusterCellGlobal, nCellProc, cumCellProc, adflow_integer, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call mpi_allgatherv(cellIndicesLocal, nCellsLocal, adflow_integer, &
+                        cellIndicesGlobal, nCellProc, cumCellProc, adflow_integer, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    ! Free the local data we do not need anymore
+    deallocate (nodesLocal, connLocal, clusterCellLocal, clusterNodeLocal, &
+                nCellProc, cumCellProc, nNodeProc, cumNodeProc, nodeIndicesLocal, &
+                nodeIndicesCGNSLocal, cellIndicesLocal)
+
+    ! We will now build separate trees for each cluster.
+    allocate (nodesPerCluster(nClusters), cellsPerCluster(nClusters), &
+              cnc(nClusters), ccc(nClusters))
+    nodesPerCluster = 0
+    cellsPerCluster = 0
+
+    ! Count up the total number of elements and nodes on each cluster
+    do i = 1, nCellsGlobal
+        cellsPerCluster(clusterCellGlobal(i)) = cellsPerCluster(clusterCellGlobal(i)) + 1
+    end do
+
+    do i = 1, nNodesGlobal
+        nodesPerCluster(clusterNodeGlobal(i)) = nodesPerCluster(clusterNodeGlobal(i)) + 1
+    end do
+
+    ! Create the list of the walls. We are reusing the overset wall derived type here.
+    allocate (localNodeNums(nNodesGlobal))
+
+    ! Allocate the memory for each of the cluster nodes
+    do i = 1, nClusters
+        nNodes = nodesPerCluster(i)
+        nCells = cellsPerCluster(i)
+        walls(i)%nCells = nCells
+        walls(i)%nNodes = nNodes
+
+        allocate (walls(i)%x(3, nNodes), walls(i)%conn(4, nCells), &
+                  walls(i)%ind(nNodes))
+        allocate (walls(i)%indCell(nCells))
+    end do
+
+    ! We now loop through the master list of nodes and elements and
+    !  "push" them back to where they should go. We also keep track of
+    !  the local node numbers so that the cluster surcells can update
+    !  their own conn.
+    localNodeNums = 0
+    cnc = 0
+    do i = 1, nNodesGlobal
+        c = clusterNodeGlobal(i) ! Cluter this node belongs to
+        cnc(c) = cnc(c) + 1 ! "cluster node count:" the 'nth' node for this  cluster
+
+        walls(c)%x(:, cnc(c)) = nodesGlobal(:, i)
+        walls(c)%ind(cnc(c)) = nodeIndicesGlobal(i)
+        localNodeNums(i) = cnc(c)
+    end do
+
+    ccc = 0
+    do i = 1, nCellsGlobal
+        c = clusterCellGlobal(i)
+        ccc(c) = ccc(c) + 1 ! "Cluster cell count" the 'nth' cell for this cluster
+        walls(c)%conn(:, ccc(c)) = connGlobal(:, i)
+
+        walls(c)%indCell(ccc(c)) = cellIndicesGlobal(i)
+    end do
+
+    do i = 1, nClusters
+
+        nCells = walls(i)%nCells
+        nNodes = walls(i)%nNodes
+
+        ! Fistly we need to update the conn to use our local node ordering.
+        do j = 1, nCells
+            do k = 1, 4
+                walls(i)%conn(k, j) = localNodeNums(walls(i)%conn(k, j))
+            end do
         end do
-     end do
-
-     ! Allocate temporary space for doing the point reduction.
-     allocate(uniqueNodes(3, nNodes), link(nNodes))
-
-     call pointReduce(walls(i)%x, nNodes, tol, uniqueNodes, link, nUnique)
-
-     ! Update the global indices. Use the returned link
-     tmpInd => walls(i)%ind
-     allocate(walls(i)%ind(nUnique))
-     allocate(curCGNSNode(nUnique))
-     walls(i)%ind = -1
-     curCGNSNode = -1
-     do j=1, walls(i)%nNodes
-        ! Insted of blinding setting the index, we use the the
-        ! nodeIndicesCGNSGlobal to only set the the globalNode with
-        ! the smallest CGNS index. This guarantees that the same node
-        ! ID is always selected independent of the block
-        ! partitioning/splitting. Note that this will work even for
-        ! the coarse levels when nodeIndicesCGNSGLobal are all 0's. In
-        ! that case the first time wall(i)%ind(link(j)) is touched,
-        ! that index is taken.
-        lj = link(j)
-        if (walls(i)%ind(lj) == -1) then
-           ! Not set yet
-           walls(i)%ind(lj) = tmpInd(j)
-           curCGNSNode(lj) = nodeIndicesCGNSGlobal(j)
-        else
-           if (nodeIndicesCGNSGlobal(j) < curCGNSNode(lj)) then
-              ! OR then potential global CGNS node index is LOWER
-              ! than the one I already have
-              walls(i)%ind(lj) = tmpInd(j)
-              curCGNSNode(lj) = nodeIndicesCGNSGlobal(j)
-           end if
-        end if
-     end do
-     deallocate(tmpInd, curCGNSNode)
-
-     ! Reset the number of nodes to be number of unique nodes
-     nNodes = nUnique
-     walls(i)%nNodes = nNodes
-
-     ! Update the nodes with the unique ones.
-     do j=1, nUnique
-        walls(i)%x(:, j) = uniqueNodes(:, j)
-     end do
-
-     ! Update conn using the link:
-     do j=1, nCells
-        do k=1, 4
-           walls(i)%conn(k, j) = link(walls(i)%conn(k, j))
+
+        ! Allocate temporary space for doing the point reduction.
+        allocate (uniqueNodes(3, nNodes), link(nNodes))
+
+        call pointReduce(walls(i)%x, nNodes, tol, uniqueNodes, link, nUnique)
+
+        ! Update the global indices. Use the returned link
+        tmpInd => walls(i)%ind
+        allocate (walls(i)%ind(nUnique))
+        allocate (curCGNSNode(nUnique))
+        walls(i)%ind = -1
+        curCGNSNode = -1
+        do j = 1, walls(i)%nNodes
+            ! Insted of blinding setting the index, we use the the
+            ! nodeIndicesCGNSGlobal to only set the the globalNode with
+            ! the smallest CGNS index. This guarantees that the same node
+            ! ID is always selected independent of the block
+            ! partitioning/splitting. Note that this will work even for
+            ! the coarse levels when nodeIndicesCGNSGLobal are all 0's. In
+            ! that case the first time wall(i)%ind(link(j)) is touched,
+            ! that index is taken.
+            lj = link(j)
+            if (walls(i)%ind(lj) == -1) then
+                ! Not set yet
+                walls(i)%ind(lj) = tmpInd(j)
+                curCGNSNode(lj) = nodeIndicesCGNSGlobal(j)
+            else
+                if (nodeIndicesCGNSGlobal(j) < curCGNSNode(lj)) then
+                    ! OR then potential global CGNS node index is LOWER
+                    ! than the one I already have
+                    walls(i)%ind(lj) = tmpInd(j)
+                    curCGNSNode(lj) = nodeIndicesCGNSGlobal(j)
+                end if
+            end if
+        end do
+        deallocate (tmpInd, curCGNSNode)
+
+        ! Reset the number of nodes to be number of unique nodes
+        nNodes = nUnique
+        walls(i)%nNodes = nNodes
+
+        ! Update the nodes with the unique ones.
+        do j = 1, nUnique
+            walls(i)%x(:, j) = uniqueNodes(:, j)
+        end do
+
+        ! Update conn using the link:
+        do j = 1, nCells
+            do k = 1, 4
+                walls(i)%conn(k, j) = link(walls(i)%conn(k, j))
+            end do
         end do
-     end do
 
-     ! Unique nodes and link are no longer needed
-     deallocate(link, uniqueNodes)
+        ! Unique nodes and link are no longer needed
+        deallocate (link, uniqueNodes)
 
-     call buildSerialQuad(nCells, nNodes, walls(i)%x, walls(i)%conn, walls(i)%ADT)
-  end do
+        call buildSerialQuad(nCells, nNodes, walls(i)%x, walls(i)%conn, walls(i)%ADT)
+    end do
 
-  ! Clean up memeory
-  deallocate(nodesGlobal, connGlobal, clusterCellGlobal, &
-       clusterNodeGlobal, localNodeNums, nodeIndicesGlobal, &
-       nodeIndicesCGNSGlobal)
+    ! Clean up memeory
+    deallocate (nodesGlobal, connGlobal, clusterCellGlobal, &
+                clusterNodeGlobal, localNodeNums, nodeIndicesGlobal, &
+                nodeIndicesCGNSGlobal)
 
-  do nn=1, nDom
-     deallocate(flowDoms(nn, level, sps)%globalCGNSNode)
-  end do
+    do nn = 1, nDom
+        deallocate (flowDoms(nn, level, sps)%globalCGNSNode)
+    end do
 
 end subroutine buildClusterWalls
diff --git a/src/overset/cartMesh.F90 b/src/overset/cartMesh.F90
index 021a1617a..ec41eac10 100644
--- a/src/overset/cartMesh.F90
+++ b/src/overset/cartMesh.F90
@@ -1,1031 +1,1025 @@
 module cartMesh
 
-  use oversetData
-  use communication
-  use utils
-  use haloExchange
-  use oversetPackingRoutines
-  use su_cgns
-  implicit none
-
- contains
-
-  subroutine createCartMesh(level, sps)
-
-    use constants
-    use blockPointers
-    use surfaceFamilies, only : BCFamGroups
+    use oversetData
+    use communication
+    use utils
+    use haloExchange
+    use oversetPackingRoutines
     use su_cgns
-#include <petsc/finclude/petsc.h>
-    use petsc
     implicit none
 
-    ! Input Params
-    integer(kind=intType), intent(in) :: level, sps
-
-    ! Working params
-    integer(kind=intType) :: i, j, k, l, ii, jj, kk, iBeg, iEnd, jBeg
-    integer(kind=intType) :: jEnd, kBeg, kEnd, nn, mm, iDim, symOnFace(6)
-    integer(kind=intType) :: count, countLocal , symOnFaceLocal(6),procDims(3)
-    integer(kind=intType) :: cellDims(3), nNodes, nCells, ierr, globalInd
-    integer(kind=intType) :: nSubI, nSubJ, nSeed, iSeed, nChanged, loopIter
-    integer(kind=intType) :: iSize, jSize, kSize, nChangedLocal, stackPointer
-    integer(kind=intType), dimension(:), allocatable :: indices
-    integer(kind=intType), dimension(:,:), allocatable :: lSizes
-    integer(kind=intType), dimension(:, :), allocatable :: stack, floodSeeds
-
-    real(kind=realType), dimension(:), pointer :: cartPointer
-    real(kind=realType), dimension(:, :, :), pointer :: xx
-    real(kind=realType), dimension(:, :, :), pointer :: arrVals, changed
-    real(kind=realType), dimension(:), allocatable :: values
-    real(kind=realType), dimension(3) :: xMinLocal, xMaxLocal, xMin, xMax, sss
-    real(kind=realType), dimension(3) ::  pt1, pt2, pt3, pt4, newPt, v1, v2
-    real(kind=realType) :: areaLocal, area, areaAvg, err1, err2, h
-    real(kind=realType) :: coorAvg, scaleSize, length, u, v
-
-    DM cartArray
-    AO cartOrdering
-    Vec cartVecGlobal, cartVecLocal, blankVec, blankVecLocal, changedVecLocal, changedVecGlobal
-    IS IS1, IS2
-    VecScatter blankScatter, blankScatterLocal
-
-
-    xMinLocal = large
-    xMaxLocal = -large
-    areaLocal = zero
-    countLocal = 0
-    ! First we have to determine the bounding box of our surfaces.
-
-    do nn=1,nDom
-       call setPointers(nn, level, sps)
-       do mm=1,nBocos
-          if(  BCType(mm) == NSWallAdiabatic .or. &
-               BCType(mm) == NSWallIsothermal .or. &
-               BCType(mm) == EulerWall) then
-
-             jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                xx   => x(1,:,:,:)
-             case (iMax)
-                xx   => x(il,:,:,:)
-             case (jMin)
-                xx   => x(:,1,:,:)
-             case (jMax)
-                xx   => x(:,jl,:,:)
-             case (kMin)
-                xx   => x(:,:,1,:)
-             case (kMax)
-                xx   => x(:,:,kl,:)
-             end select
-
-             do j=jBeg+1, jEnd+1
-                do i=iBeg+1,iEnd+1
-                   do iDim=1,3
-                      xMinLocal(iDim) = min(xMinLocal(iDim), xx(i, j, iDim))
-                      xMaxLocal(iDim) = max(xMaxLocal(iDim), xx(i, j, iDim))
-                   end do
-                end do
-             end do
-
-             ! Determine the total area of the patch:
-             do j=jBeg+1, jEnd
-                do i=iBeg+1, iEnd
-                   v1 = xx(i, j, :) - xx(i-1, j-1, :)
-                   v2 = xx(i-1, j, :) - xx(i, j-1, :)
-                   ! Cross Product
-                   sss(1) = (v1(2)*v2(3) - v1(3)*v2(2))
-                   sss(2) = (v1(3)*v2(1) - v1(1)*v2(3))
-                   sss(3) = (v1(1)*v2(2) - v1(2)*v2(1))
-
-                   areaLocal = areaLocal + half*(sss(1)**2 + sss(2)**2 + sss(3)**2)
-                   countLocal = countLocal + 1
-                end do
-             end do
-          end if
-       end do
-    end do
-
-    call MPI_Allreduce(xMinLocal, xMin, 3, adflow_real, MPI_MIN, adflow_comm_world, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call MPI_Allreduce(xMaxLocal, xMax, 3, adflow_real, MPI_MAX, adflow_comm_world, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call MPI_Allreduce(areaLocal, area, 1, adflow_real, MPI_SUM, adflow_comm_world, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call MPI_Allreduce(countLocal, count, 1, adflow_integer, MPI_SUM, adflow_comm_world, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Compute the average area:
-    areaAvg = area/count
-
-    ! Compute the master size 'h':
-    h = sqrt(areaAvg)
-    h = h * 6
-    ! Before we setup the cartesian mesh, we will expand the bounds
-    ! slightly to ensure that there isn't a wall cell right at the
-    ! boundary. This is actually vastly tricker than it sounds becuase
-    ! we have to correctly account for symmetry planes. That is we
-    ! CANNOT
-    ! outside-in flood would just "go around the back" of the symmetry
-    ! plane and flood out the inside. Not good. So what we do is now
-    ! that we know the exact bounds of our surface, we loop over the
-    ! symmetry planes and flag values in an array of lenght 6
-    ! corresponding to (iLow, iHigh, jLow, jHigh, kLow, kHigh). We set
-    ! the value to 1 if the symmetry plane I'm looking at corresponds
-    ! to that value. Then we all reduce so that everyone knows which
-    ! of the 6 faces cannot be extended.
-
-    scaleSize = mynorm2(xMax-xMin)
-
-    symOnFaceLocal = 0
-    do nn=1,nDom
-       call setPointers(nn, level, sps)
-       do mm=1,nBocos
-          if(  BCType(mm) ==symm) then
-
-             jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                xx   => x(1,:,:,:)
-             case (iMax)
-                xx   => x(il,:,:,:)
-             case (jMin)
-                xx   => x(:,1,:,:)
-             case (jMax)
-                xx   => x(:,jl,:,:)
-             case (kMin)
-                xx   => x(:,:,1,:)
-             case (kMax)
-                xx   => x(:,:,kl,:)
-             end select
-
-             ! First we have to determine the principle coordinate
-             ! direction of the face. Do this with symNorm which is
-             ! already computed.
-
-             ! Location, ie coordiante direction of dominate direction
-             iDim = maxloc(abs(bcData(mm)%symNorm), 1)
-
-             ! Now determine the average value in "iDim" dimension
-             coorAvg = sum(xx(iBeg+1:iEnd+1, jBeg+1:jEnd+1, iDim))/((iEnd-iBeg+1)*(jEnd-jBeg+1))
-
-             ! Check if it is sufficently close to the bounding box:
-             err1 = abs(coorAvg - xMin(iDim))/scaleSize
-             err2 = abs(coorAvg - xMax(iDim))/scaleSize
-
-             if (err1 < 1e-8) then
-                symOnFaceLocal(2*iDim-1) = 1
-             end if
-
-             if (err2< 1e-8) then
-                symOnFaceLocal(2*iDim  ) = 1
-             end if
-          end if
-       end do
-    end do
-
-    ! Now we all reduce with max. Values that are not zero have
-    ! symmetry planes on them.
-
-    call MPI_Allreduce(symOnFaceLocal, symOnFace, 6, adflow_integer, MPI_MAX, adflow_comm_world, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Now we will expand xmin/xmax by 2h (just to be sure) but only
-    ! on the planes without symmetry conditions
-
-    do iDim=1,3
-       if (symOnFace(2*iDim-1) == 0) then
-          xMin(iDim) = xMin(iDim) - 2*h
-       end if
-
-       if (symOnFace(2*iDim  ) == 0) then
-          xMax(iDim) = xMax(iDim) + 2*h
-       end if
-    end do
-
-    ! Ok. Now we have the final size of our cartesian block. The
-    ! next step is to determine how it is to be partitioned.
-    call tripleFactor(nProc, procDims)
-
-    ! And the dimensions in each direction. Remember the
-    ! distributed array is cell-centered based.
-    cellDims = floor((xMax-xMin)/h) + 1
-
-    ! Now reorder the procDims so that the largest dimension
-    ! corresponds to largest number of procs. Since we know procDims
-    ! are sorted, we can do the 6 cases:
-
-    if (cellDims(1) >= cellDims(2) .and. cellDims(2) >= cellDims(3)) then
-       procDims = (/procDims(3), procDims(2), procDims(1)/)
-    else if (cellDims(1) >= cellDims(3) .and. cellDims(3) >= cellDims(2)) then
-       procDims = (/procDims(3), procDims(1), procDims(2)/)
-
-    else if (cellDims(2) >= cellDims(1) .and. cellDims(1) >= cellDims(3)) then
-       procDims = (/procDims(2), procDims(3), procDims(1)/)
-    else if (cellDims(2) >= cellDims(3) .and. cellDims(3) >= cellDims(1)) then
-       procDims = (/procDims(1), procDims(3), procDims(2)/)
-
-    else if (cellDims(3) >= cellDims(1) .and. cellDims(1) >= cellDims(2)) then
-       procDims = (/procDims(2), procDims(1), procDims(3)/)
-    else if (cellDims(3) >= cellDims(2) .and. cellDims(2) >= cellDims(1)) then
-       procDims = (/procDims(1), procDims(2), procDims(3)/)
-    end if
-
-    ! Since we can't pass in null array objects to petsc (this is
-    ! still broken) we have to do up all the arguments ourself. In
-    ! particular, the lx, ly and lz one which are quite annonying.
-
-    allocate(lSizes(maxval(procDims), 3))
-    do iDim=1, 3
-       do j=1, procDims(iDim)
-
-          ii = cellDims(iDim)/procDims(iDim)
-          if (mod(cellDims(iDim), procDims(iDim)) > j-1) then
-             ii = ii + 1
-          end if
-          lSizes(j, iDim) = ii
-       end do
-    end do
-    if (myid == 0) then
-       print *,'xmin:', xmin
-       print *,'xmax:', xmax
-       print *,'dims:', cellDims
-       print *, 'H:', h
-       print *, 'procDims:', procDims
-       print *,'I sizes:', lSizes(1:procDims(1), 1)
-       print *,'J sizes:', lSizes(1:procDims(2), 2)
-       print *,'K sizes:', lSizes(1:procDims(3), 3)
-    end if
-
-    call DMDAcreate3d(adflow_comm_world, DM_BOUNDARY_GHOSTED, DM_BOUNDARY_GHOSTED, &
-         DM_BOUNDARY_GHOSTED, DMDA_STENCIL_STAR, cellDims(1), cellDims(2), &
-         cellDims(3),  procDims(1), procDims(2), procDims(3), 1, 1, &
-         lSizes(1:procDims(1), 1), lSizes(1:procDims(2), 2), lSizes(1:procDims(3), 3), &
-         cartArray, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    deallocate(lSizes)
-
-    ! Now loop back over the surfaces. For each surface, determine
-    ! it's global index of the point that will be flagged as a cut
-    ! cell. Due to the stupid PETSC ordering crap, we have to store
-    ! all the indices as we go.
-
-    i = 0
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-       call getWallSize(BCFamGroups(iBCGroupWalls)%famList, nNodes, nCells, .False.)
-       i = i + nNodes
-    end do
-    i = i * 10
-    allocate(indices(i))
-    count = 0
-    do nn=1,nDom
-       call setPointers(nn, level, sps)
-       do mm=1,nBocos
-          if(  BCType(mm) == NSWallAdiabatic .or. &
-               BCType(mm) == NSWallIsothermal .or. &
-               BCType(mm) == EulerWall) then
-
-             jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                xx   => x(1,:,:,:)
-             case (iMax)
-                xx   => x(il,:,:,:)
-             case (jMin)
-                xx   => x(:,1,:,:)
-             case (jMax)
-                xx   => x(:,jl,:,:)
-             case (kMin)
-                xx   => x(:,:,1,:)
-             case (kMax)
-                xx   => x(:,:,kl,:)
-             end select
-
-
-             ! Loop over Raw Nodes
-             do j=jBeg, jEnd
-                do i=iBeg, iEnd
-
-                   ! Note that ii, jj, kk are zero based since we are
-                   ! working with PETSc indices.
-                   ii = int((xx(i+1, j+1, 1) - xMin(1))/h)
-                   jj = int((xx(i+1, j+1, 2) - xMin(2))/h)
-                   kk = int((xx(i+1, j+1, 3) - xMin(3))/h)
-
-                   globalInd = cellDims(1)*cellDims(2)*kk + cellDims(1)*jj + ii
-                   count = count + 1
-                   indices(count) = globalInd
-                end do
-             end do
-
-             ! Loop over iEdges
-             do j=jBeg, jEnd
-                do i=iBeg+1, iEnd
-
-                   ! Check the Length of this edge
-                   pt1 = xx(i  , j+1, :)
-                   pt2 = xx(i+1, j+1, :)
-
-                   length = mynorm2(pt1-pt2)
-                   nSubI = int(length/h)
-
-                   do k=1, nSubI
-
-                      newPt = pt1 + dble(k)/(nSubI+1)*(pt2-pt1)
-                      ii = int((newPt(1) - xMin(1))/h)
-                      jj = int((newPt(2) - xMin(2))/h)
-                      kk = int((newPt(3) - xMin(3))/h)
-
-                      globalInd = cellDims(1)*cellDims(2)*kk + cellDims(1)*jj + ii
-                      count = count + 1
-                      indices(count) = globalInd
-                   end do
-                end do
-             end do
-
-             ! Loop over jEdges
-             do j=jBeg+1, jEnd
-                do i=iBeg, iEnd
-
-                   ! Check the Length of this edge
-                   pt1 = xx(i+1, j  , :)
-                   pt2 = xx(i+1, j+1, :)
-
-                   length = mynorm2(pt1-pt2)
-                   nSubJ = int(length/h)
-                   do k=1, nSubJ
-
-                      newPt = pt1 + dble(k)/(nSubJ+1)*(pt2-pt1)
-                      ii = int((newPt(1) - xMin(1))/h)
-                      jj = int((newPt(2) - xMin(2))/h)
-                      kk = int((newPt(3) - xMin(3))/h)
-
-                      globalInd = cellDims(1)*cellDims(2)*kk + cellDims(1)*jj + ii
-                      count = count + 1
-                      indices(count) = globalInd
-                   end do
-                end do
-             end do
-
-             ! Loop over faces
-             do j=jBeg+1, jEnd
-                do i=iBeg+1, iEnd
-
-                   ! Extract the 4 pts. CCW ordering
-                   pt1 = xx(i  , j  , :)
-                   pt2 = xx(i+1, j  , :)
-
-                   pt3 = xx(i+1, j+1, :)
-                   pt4 = xx(i  , j+1, :)
-
-                   ! Sub pts in I
-                   length = mynorm2(pt2-pt1)
-                   nSubI = int(length/h)
-
-                   length = mynorm2(pt3-pt4)
-                   nSubI = max(nSubI, int(length/h))
-
-                   ! Sub Pts in J
-                   length = mynorm2(pt4-pt1)
-                   nSubJ = int(length/h)
-
-                   length = mynorm2(pt3-pt2)
-                   nSubJ = max(nSubJ, int(length/h))
-
-                   do l=1, nSubJ
-                      do k=1, nSubI
-                         u = dble(k)/(nSubI+1)
-                         v = dble(l)/(nSubJ+1)
-
-                         newPt = (one-u)*(one-v)*pt1 + u*(one-v)*pt2 + &
-                              u*v*pt3 + (one-u)*v*pt4
-
-                         ii = int((newPt(1) - xMin(1))/h)
-                         jj = int((newPt(2) - xMin(2))/h)
-                         kk = int((newPt(3) - xMin(3))/h)
-
-                         globalInd = cellDims(1)*cellDims(2)*kk + cellDims(1)*jj + ii
-                         count = count + 1
-                         indices(count) = globalInd
-                      end do
-                   end do
-                end do
-             end do
-          end if
-       end do
-    end do
-
-    call DMCreateGlobalVector(cartArray, cartVecGlobal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Initialize all values to 1 ("Compute")
-    call vecSet(cartVecGlobal, one, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call DMDAGetAO(cartArray, cartOrdering, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Convert the indices from application ordering to petsc
-    call AOApplicationToPetsc(cartOrdering, size(indices), indices, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Now we set all the values, with a simple single vecSet call
-    allocate(values(count))
-    values = -three ! -3 is flood seed. Use the same notation here.
-
-    call vecSetValues(cartVecGlobal, count, indices, values, INSERT_VALUES, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    deallocate(values, indices)
-
-    ! Don't forget to assemble
-    call vecAssemblyBegin(cartVecGlobal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call vecAssemblyEnd(cartVecGlobal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! These are "get" vecs. We have to restore them later.
-    call DMGetLocalVector(cartArray, cartVecLocal, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    call DMGetLocalVector(cartArray, changedVecLocal, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    call VecSet(changedVecLocal, zero, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    call DMGetGlobalVector(cartArray, changedVecGlobal, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    ! Now the next step is to perform the flooding from the outside in.
-    call DMGlobalToLocalBegin(cartArray, cartVecGlobal, INSERT_VALUES, cartVecLocal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call DMGlobalToLocalEnd(cartArray, cartVecGlobal, INSERT_VALUES, cartVecLocal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Determine the bounds of the arrays we will be getting back.
-    call DMDAVecGetArrayF90(cartArray, cartVecLocal, arrVals, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    iBeg = lbound(arrVals, 1)
-    jBeg = lbound(arrVals, 2)
-    kBeg = lbound(arrVals, 3)
-
-    iEnd = ubound(arrVals, 1)
-    jEnd = ubound(arrVals, 2)
-    kEnd = ubound(arrVals, 3)
-
-    iSize = iEnd - iBeg + 1
-    jSize = jEnd - jBeg + 1
-    kSize = kEnd - kBeg + 1
+contains
 
-    call DMDAVecRestoreArrayF90(cartArray, cartVecLocal, arrVals, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
+    subroutine createCartMesh(level, sps)
 
-    loopIter = 1
-    allocate(stack(3, 6*iSize*jSize*kSize + 1))
-    allocate(floodSeeds(3, 2*iSize*jSize + 2*iSize*kSize + 2*jSize*kSize))
-
-    parallelSyncLoop: do
-
-       call DMDAVecGetArrayF90(cartArray, cartVecLocal, arrVals, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-
-       call DMDAVecGetArrayF90(cartArray, changedVecLocal, changed, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-
-       ! Keep track of the total number of fringes we've modified
-       nChangedLocal = 0
-
-       ! Allocate space for our queue (stack). It needs to be 6*nx*ny*nz + 1:
-       ! 6 for each of the 6 coordinate directions plus our extra
-       ! seed. It should never come close to this unless the entire
-       ! block will be blanked.
-
-       nSeed = 0
-       if (loopIter == 1) then
-
-          if (myid == 0) then
-
-             ! Set the single seed on the bottom corner of the root proc
-
-             call addSeed(0,0,0)
-
-          end if
-
-       else
-
-          ! On the second and subsequent passes, check each 1st
-          ! non-corner halos in the 6 faces to see if we received
-          ! "changed" info from neighbour proc. This will allow us to
-          ! continue the flooding on this processor/block. Note that
-          ! even in a single processor case, the halo exchange in
-          ! necessary to communicate between two local blocks
-
-          ! iMin/iMax
-          do k=kBeg+1, kEnd-1
-             do j=jBeg+1, jEnd-1
-                if (int(changed(iBeg  , j, k)) == 1) then
-                   call addSeed(iBeg+1, j, k)
-                end if
-                if (int(changed(iEnd  , j, k)) == 1) then
-                   call addSeed(iEnd-1, j, k)
-                end if
-             end do
-          end do
-
-          ! jMin/jMax
-          do k=kBeg+1, kEnd-1
-             do i=iBeg+1, iEnd-1
-                if (int(changed(i, jBeg,   k)) == 1) then
-                   call addSeed(i, jBeg+1, k)
+        use constants
+        use blockPointers
+        use surfaceFamilies, only: BCFamGroups
+        use su_cgns
+#include <petsc/finclude/petsc.h>
+        use petsc
+        implicit none
+
+        ! Input Params
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Working params
+        integer(kind=intType) :: i, j, k, l, ii, jj, kk, iBeg, iEnd, jBeg
+        integer(kind=intType) :: jEnd, kBeg, kEnd, nn, mm, iDim, symOnFace(6)
+        integer(kind=intType) :: count, countLocal, symOnFaceLocal(6), procDims(3)
+        integer(kind=intType) :: cellDims(3), nNodes, nCells, ierr, globalInd
+        integer(kind=intType) :: nSubI, nSubJ, nSeed, iSeed, nChanged, loopIter
+        integer(kind=intType) :: iSize, jSize, kSize, nChangedLocal, stackPointer
+        integer(kind=intType), dimension(:), allocatable :: indices
+        integer(kind=intType), dimension(:, :), allocatable :: lSizes
+        integer(kind=intType), dimension(:, :), allocatable :: stack, floodSeeds
+
+        real(kind=realType), dimension(:), pointer :: cartPointer
+        real(kind=realType), dimension(:, :, :), pointer :: xx
+        real(kind=realType), dimension(:, :, :), pointer :: arrVals, changed
+        real(kind=realType), dimension(:), allocatable :: values
+        real(kind=realType), dimension(3) :: xMinLocal, xMaxLocal, xMin, xMax, sss
+        real(kind=realType), dimension(3) :: pt1, pt2, pt3, pt4, newPt, v1, v2
+        real(kind=realType) :: areaLocal, area, areaAvg, err1, err2, h
+        real(kind=realType) :: coorAvg, scaleSize, length, u, v
+
+        DM cartArray
+        AO cartOrdering
+        Vec cartVecGlobal, cartVecLocal, blankVec, blankVecLocal, changedVecLocal, changedVecGlobal
+        IS IS1, IS2
+        VecScatter blankScatter, blankScatterLocal
+
+        xMinLocal = large
+        xMaxLocal = -large
+        areaLocal = zero
+        countLocal = 0
+        ! First we have to determine the bounding box of our surfaces.
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            do mm = 1, nBocos
+                if (BCType(mm) == NSWallAdiabatic .or. &
+                    BCType(mm) == NSWallIsothermal .or. &
+                    BCType(mm) == EulerWall) then
+
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        xx => x(1, :, :, :)
+                    case (iMax)
+                        xx => x(il, :, :, :)
+                    case (jMin)
+                        xx => x(:, 1, :, :)
+                    case (jMax)
+                        xx => x(:, jl, :, :)
+                    case (kMin)
+                        xx => x(:, :, 1, :)
+                    case (kMax)
+                        xx => x(:, :, kl, :)
+                    end select
+
+                    do j = jBeg + 1, jEnd + 1
+                        do i = iBeg + 1, iEnd + 1
+                            do iDim = 1, 3
+                                xMinLocal(iDim) = min(xMinLocal(iDim), xx(i, j, iDim))
+                                xMaxLocal(iDim) = max(xMaxLocal(iDim), xx(i, j, iDim))
+                            end do
+                        end do
+                    end do
+
+                    ! Determine the total area of the patch:
+                    do j = jBeg + 1, jEnd
+                        do i = iBeg + 1, iEnd
+                            v1 = xx(i, j, :) - xx(i - 1, j - 1, :)
+                            v2 = xx(i - 1, j, :) - xx(i, j - 1, :)
+                            ! Cross Product
+                            sss(1) = (v1(2) * v2(3) - v1(3) * v2(2))
+                            sss(2) = (v1(3) * v2(1) - v1(1) * v2(3))
+                            sss(3) = (v1(1) * v2(2) - v1(2) * v2(1))
+
+                            areaLocal = areaLocal + half * (sss(1)**2 + sss(2)**2 + sss(3)**2)
+                            countLocal = countLocal + 1
+                        end do
+                    end do
                 end if
-                if (int(changed(i, jEnd,   k)) == 1) then
-                   call addSeed(i, jEnd-1, k)
+            end do
+        end do
+
+        call MPI_Allreduce(xMinLocal, xMin, 3, adflow_real, MPI_MIN, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call MPI_Allreduce(xMaxLocal, xMax, 3, adflow_real, MPI_MAX, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call MPI_Allreduce(areaLocal, area, 1, adflow_real, MPI_SUM, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call MPI_Allreduce(countLocal, count, 1, adflow_integer, MPI_SUM, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Compute the average area:
+        areaAvg = area / count
+
+        ! Compute the master size 'h':
+        h = sqrt(areaAvg)
+        h = h * 6
+        ! Before we setup the cartesian mesh, we will expand the bounds
+        ! slightly to ensure that there isn't a wall cell right at the
+        ! boundary. This is actually vastly tricker than it sounds becuase
+        ! we have to correctly account for symmetry planes. That is we
+        ! CANNOT
+        ! outside-in flood would just "go around the back" of the symmetry
+        ! plane and flood out the inside. Not good. So what we do is now
+        ! that we know the exact bounds of our surface, we loop over the
+        ! symmetry planes and flag values in an array of lenght 6
+        ! corresponding to (iLow, iHigh, jLow, jHigh, kLow, kHigh). We set
+        ! the value to 1 if the symmetry plane I'm looking at corresponds
+        ! to that value. Then we all reduce so that everyone knows which
+        ! of the 6 faces cannot be extended.
+
+        scaleSize = mynorm2(xMax - xMin)
+
+        symOnFaceLocal = 0
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            do mm = 1, nBocos
+                if (BCType(mm) == symm) then
+
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        xx => x(1, :, :, :)
+                    case (iMax)
+                        xx => x(il, :, :, :)
+                    case (jMin)
+                        xx => x(:, 1, :, :)
+                    case (jMax)
+                        xx => x(:, jl, :, :)
+                    case (kMin)
+                        xx => x(:, :, 1, :)
+                    case (kMax)
+                        xx => x(:, :, kl, :)
+                    end select
+
+                    ! First we have to determine the principle coordinate
+                    ! direction of the face. Do this with symNorm which is
+                    ! already computed.
+
+                    ! Location, ie coordiante direction of dominate direction
+                    iDim = maxloc(abs(bcData(mm)%symNorm), 1)
+
+                    ! Now determine the average value in "iDim" dimension
+                 coorAvg = sum(xx(iBeg + 1:iEnd + 1, jBeg + 1:jEnd + 1, iDim)) / ((iEnd - iBeg + 1) * (jEnd - jBeg + 1))
+
+                    ! Check if it is sufficently close to the bounding box:
+                    err1 = abs(coorAvg - xMin(iDim)) / scaleSize
+                    err2 = abs(coorAvg - xMax(iDim)) / scaleSize
+
+                    if (err1 < 1e-8) then
+                        symOnFaceLocal(2 * iDim - 1) = 1
+                    end if
+
+                    if (err2 < 1e-8) then
+                        symOnFaceLocal(2 * iDim) = 1
+                    end if
                 end if
-             end do
-          end do
-
-          ! kMin:
-          do j=jBeg+1, jEnd-1
-             do i=iBeg+1, iEnd-1
-                if (int(changed(i, j, kBeg )) == 1) then
-                   call addSeed(i, j, kBeg+1)
+            end do
+        end do
+
+        ! Now we all reduce with max. Values that are not zero have
+        ! symmetry planes on them.
+
+        call MPI_Allreduce(symOnFaceLocal, symOnFace, 6, adflow_integer, MPI_MAX, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Now we will expand xmin/xmax by 2h (just to be sure) but only
+        ! on the planes without symmetry conditions
+
+        do iDim = 1, 3
+            if (symOnFace(2 * iDim - 1) == 0) then
+                xMin(iDim) = xMin(iDim) - 2 * h
+            end if
+
+            if (symOnFace(2 * iDim) == 0) then
+                xMax(iDim) = xMax(iDim) + 2 * h
+            end if
+        end do
+
+        ! Ok. Now we have the final size of our cartesian block. The
+        ! next step is to determine how it is to be partitioned.
+        call tripleFactor(nProc, procDims)
+
+        ! And the dimensions in each direction. Remember the
+        ! distributed array is cell-centered based.
+        cellDims = floor((xMax - xMin) / h) + 1
+
+        ! Now reorder the procDims so that the largest dimension
+        ! corresponds to largest number of procs. Since we know procDims
+        ! are sorted, we can do the 6 cases:
+
+        if (cellDims(1) >= cellDims(2) .and. cellDims(2) >= cellDims(3)) then
+            procDims = (/procDims(3), procDims(2), procDims(1)/)
+        else if (cellDims(1) >= cellDims(3) .and. cellDims(3) >= cellDims(2)) then
+            procDims = (/procDims(3), procDims(1), procDims(2)/)
+
+        else if (cellDims(2) >= cellDims(1) .and. cellDims(1) >= cellDims(3)) then
+            procDims = (/procDims(2), procDims(3), procDims(1)/)
+        else if (cellDims(2) >= cellDims(3) .and. cellDims(3) >= cellDims(1)) then
+            procDims = (/procDims(1), procDims(3), procDims(2)/)
+
+        else if (cellDims(3) >= cellDims(1) .and. cellDims(1) >= cellDims(2)) then
+            procDims = (/procDims(2), procDims(1), procDims(3)/)
+        else if (cellDims(3) >= cellDims(2) .and. cellDims(2) >= cellDims(1)) then
+            procDims = (/procDims(1), procDims(2), procDims(3)/)
+        end if
+
+        ! Since we can't pass in null array objects to petsc (this is
+        ! still broken) we have to do up all the arguments ourself. In
+        ! particular, the lx, ly and lz one which are quite annonying.
+
+        allocate (lSizes(maxval(procDims), 3))
+        do iDim = 1, 3
+            do j = 1, procDims(iDim)
+
+                ii = cellDims(iDim) / procDims(iDim)
+                if (mod(cellDims(iDim), procDims(iDim)) > j - 1) then
+                    ii = ii + 1
                 end if
-                if (int(changed(i, j, kEnd )) == 1) then
-                   call addSeed(i, j, kEnd-1)
+                lSizes(j, iDim) = ii
+            end do
+        end do
+        if (myid == 0) then
+            print *, 'xmin:', xmin
+            print *, 'xmax:', xmax
+            print *, 'dims:', cellDims
+            print *, 'H:', h
+            print *, 'procDims:', procDims
+            print *, 'I sizes:', lSizes(1:procDims(1), 1)
+            print *, 'J sizes:', lSizes(1:procDims(2), 2)
+            print *, 'K sizes:', lSizes(1:procDims(3), 3)
+        end if
+
+        call DMDAcreate3d(adflow_comm_world, DM_BOUNDARY_GHOSTED, DM_BOUNDARY_GHOSTED, &
+                          DM_BOUNDARY_GHOSTED, DMDA_STENCIL_STAR, cellDims(1), cellDims(2), &
+                          cellDims(3), procDims(1), procDims(2), procDims(3), 1, 1, &
+                          lSizes(1:procDims(1), 1), lSizes(1:procDims(2), 2), lSizes(1:procDims(3), 3), &
+                          cartArray, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        deallocate (lSizes)
+
+        ! Now loop back over the surfaces. For each surface, determine
+        ! it's global index of the point that will be flagged as a cut
+        ! cell. Due to the stupid PETSC ordering crap, we have to store
+        ! all the indices as we go.
+
+        i = 0
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            call getWallSize(BCFamGroups(iBCGroupWalls)%famList, nNodes, nCells, .False.)
+            i = i + nNodes
+        end do
+        i = i * 10
+        allocate (indices(i))
+        count = 0
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            do mm = 1, nBocos
+                if (BCType(mm) == NSWallAdiabatic .or. &
+                    BCType(mm) == NSWallIsothermal .or. &
+                    BCType(mm) == EulerWall) then
+
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        xx => x(1, :, :, :)
+                    case (iMax)
+                        xx => x(il, :, :, :)
+                    case (jMin)
+                        xx => x(:, 1, :, :)
+                    case (jMax)
+                        xx => x(:, jl, :, :)
+                    case (kMin)
+                        xx => x(:, :, 1, :)
+                    case (kMax)
+                        xx => x(:, :, kl, :)
+                    end select
+
+                    ! Loop over Raw Nodes
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+
+                            ! Note that ii, jj, kk are zero based since we are
+                            ! working with PETSc indices.
+                            ii = int((xx(i + 1, j + 1, 1) - xMin(1)) / h)
+                            jj = int((xx(i + 1, j + 1, 2) - xMin(2)) / h)
+                            kk = int((xx(i + 1, j + 1, 3) - xMin(3)) / h)
+
+                            globalInd = cellDims(1) * cellDims(2) * kk + cellDims(1) * jj + ii
+                            count = count + 1
+                            indices(count) = globalInd
+                        end do
+                    end do
+
+                    ! Loop over iEdges
+                    do j = jBeg, jEnd
+                        do i = iBeg + 1, iEnd
+
+                            ! Check the Length of this edge
+                            pt1 = xx(i, j + 1, :)
+                            pt2 = xx(i + 1, j + 1, :)
+
+                            length = mynorm2(pt1 - pt2)
+                            nSubI = int(length / h)
+
+                            do k = 1, nSubI
+
+                                newPt = pt1 + dble(k) / (nSubI + 1) * (pt2 - pt1)
+                                ii = int((newPt(1) - xMin(1)) / h)
+                                jj = int((newPt(2) - xMin(2)) / h)
+                                kk = int((newPt(3) - xMin(3)) / h)
+
+                                globalInd = cellDims(1) * cellDims(2) * kk + cellDims(1) * jj + ii
+                                count = count + 1
+                                indices(count) = globalInd
+                            end do
+                        end do
+                    end do
+
+                    ! Loop over jEdges
+                    do j = jBeg + 1, jEnd
+                        do i = iBeg, iEnd
+
+                            ! Check the Length of this edge
+                            pt1 = xx(i + 1, j, :)
+                            pt2 = xx(i + 1, j + 1, :)
+
+                            length = mynorm2(pt1 - pt2)
+                            nSubJ = int(length / h)
+                            do k = 1, nSubJ
+
+                                newPt = pt1 + dble(k) / (nSubJ + 1) * (pt2 - pt1)
+                                ii = int((newPt(1) - xMin(1)) / h)
+                                jj = int((newPt(2) - xMin(2)) / h)
+                                kk = int((newPt(3) - xMin(3)) / h)
+
+                                globalInd = cellDims(1) * cellDims(2) * kk + cellDims(1) * jj + ii
+                                count = count + 1
+                                indices(count) = globalInd
+                            end do
+                        end do
+                    end do
+
+                    ! Loop over faces
+                    do j = jBeg + 1, jEnd
+                        do i = iBeg + 1, iEnd
+
+                            ! Extract the 4 pts. CCW ordering
+                            pt1 = xx(i, j, :)
+                            pt2 = xx(i + 1, j, :)
+
+                            pt3 = xx(i + 1, j + 1, :)
+                            pt4 = xx(i, j + 1, :)
+
+                            ! Sub pts in I
+                            length = mynorm2(pt2 - pt1)
+                            nSubI = int(length / h)
+
+                            length = mynorm2(pt3 - pt4)
+                            nSubI = max(nSubI, int(length / h))
+
+                            ! Sub Pts in J
+                            length = mynorm2(pt4 - pt1)
+                            nSubJ = int(length / h)
+
+                            length = mynorm2(pt3 - pt2)
+                            nSubJ = max(nSubJ, int(length / h))
+
+                            do l = 1, nSubJ
+                                do k = 1, nSubI
+                                    u = dble(k) / (nSubI + 1)
+                                    v = dble(l) / (nSubJ + 1)
+
+                                    newPt = (one - u) * (one - v) * pt1 + u * (one - v) * pt2 + &
+                                            u * v * pt3 + (one - u) * v * pt4
+
+                                    ii = int((newPt(1) - xMin(1)) / h)
+                                    jj = int((newPt(2) - xMin(2)) / h)
+                                    kk = int((newPt(3) - xMin(3)) / h)
+
+                                    globalInd = cellDims(1) * cellDims(2) * kk + cellDims(1) * jj + ii
+                                    count = count + 1
+                                    indices(count) = globalInd
+                                end do
+                            end do
+                        end do
+                    end do
                 end if
-             end do
-          end do
-       end if
-
-       ! Loop over our seeds we currently have
-       do iSeed = 1, nSeed
-
-          ! Put the particular seed in the first slot of the stack
-          stack(:, 1) = floodSeeds(:, iSeed)
-
-          ! Reset the stack pointer length back to just the one seed
-          ! we have
-          stackPointer = 1
+            end do
+        end do
 
-          ! Start the flooding (stacked based, not recursive)
-          do while (stackPointer > 0 )
+        call DMCreateGlobalVector(cartArray, cartVecGlobal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-             ! 'Pop' the current point off the stack
-             i = stack(1, stackPointer)
-             j = stack(2, stackPointer)
-             k = stack(3, stackPointer)
-             stackPointer = stackPointer - 1
+        ! Initialize all values to 1 ("Compute")
+        call vecSet(cartVecGlobal, one, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-             if (int(arrVals(i, j, k)) == 1) then
+        call DMDAGetAO(cartArray, cartOrdering, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-                ! Flag the cell (using changed) as being changed
-                changed(i, j, k) = one
+        ! Convert the indices from application ordering to petsc
+        call AOApplicationToPetsc(cartOrdering, size(indices), indices, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-                ! Keep track of the total number we've changed.  For
-                ! reporting purposes...only count the ones that are
-                ! on actual compute cells:
-                if (onBlock(i, j, k)) then
-                   nChangedLocal = nChangedLocal + 1
-                end if
-
-                ! Set the value as flooded. Use 0 as per usual.
-                arrVals(i, j, k) = zero
-
-                ! Now add the six nearest neighbours to the stack
-                ! provided they are in the owned cell range:
+        ! Now we set all the values, with a simple single vecSet call
+        allocate (values(count))
+        values = -three ! -3 is flood seed. Use the same notation here.
 
-                if (i-1 >= iBeg) then
-                   stackPointer = stackPointer + 1
-                   stack(:, stackPointer) = (/i-1, j  , k  /)
-                end if
-
-                if (i+1 <= iEnd) then
-                   stackPointer = stackPointer + 1
-                   stack(:, stackPointer) = (/i+1, j  , k  /)
-                end if
+        call vecSetValues(cartVecGlobal, count, indices, values, INSERT_VALUES, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        deallocate (values, indices)
 
-                if (j-1 >= jBeg) then
-                   stackPointer = stackPointer + 1
-                   stack(:, stackPointer) = (/i  , j-1, k  /)
-                end if
+        ! Don't forget to assemble
+        call vecAssemblyBegin(cartVecGlobal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-                if (j+1 <= jEnd) then
-                   stackPointer = stackPointer + 1
-                   stack(:, stackPointer) = (/i  , j+1, k  /)
-                end if
+        call vecAssemblyEnd(cartVecGlobal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-                if (k-1 >= kBeg) then
-                   stackPointer = stackPointer + 1
-                   stack(:, stackPointer) = (/i  , j  , k-1/)
-                end if
+        ! These are "get" vecs. We have to restore them later.
+        call DMGetLocalVector(cartArray, cartVecLocal, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-                if (k+1 <= kEnd) then
-                   stackPointer = stackPointer + 1
-                   stack(:, stackPointer) = (/i  , j , k+1 /)
-                end if
-             end if
-          end do
-       end do
+        call DMGetLocalVector(cartArray, changedVecLocal, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-       call DMDAVecRestoreArrayF90(cartArray, cartVecLocal, arrVals, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
+        call VecSet(changedVecLocal, zero, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-       call DMDAVecRestoreArrayF90(cartArray, changedVecLocal, changed, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
+        call DMGetGlobalVector(cartArray, changedVecGlobal, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-       ! Exchange "changed"
-       call DMLocalToGlobalBegin(cartArray, changedVecLocal, INSERT_VALUES, changedVecGlobal, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
+        ! Now the next step is to perform the flooding from the outside in.
+        call DMGlobalToLocalBegin(cartArray, cartVecGlobal, INSERT_VALUES, cartVecLocal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-       call DMLocalToGlobalEnd(cartArray, changedVecLocal, INSERT_VALUES, changedVecGlobal, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
+        call DMGlobalToLocalEnd(cartArray, cartVecGlobal, INSERT_VALUES, cartVecLocal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-       call DMGlobalToLocalBegin(cartArray, changedVecGlobal, INSERT_VALUES, changedVecLocal, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
+        ! Determine the bounds of the arrays we will be getting back.
+        call DMDAVecGetArrayF90(cartArray, cartVecLocal, arrVals, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-       call DMGlobalToLocalEnd(cartArray, changedVecGlobal, INSERT_VALUES, changedVecLocal, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
+        iBeg = lbound(arrVals, 1)
+        jBeg = lbound(arrVals, 2)
+        kBeg = lbound(arrVals, 3)
 
-       ! Determine if cells got changd. If so do another loop.
-       call mpi_allreduce(nChangedLocal, nChanged, 1, adflow_integer, MPI_SUM, &
-            adflow_comm_world, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-       if (myid == 0) then
-          print *, 'Cart Flood Iteration:', loopIter, 'Blanked ', nChanged, 'Interior Cells.'
-       end if
+        iEnd = ubound(arrVals, 1)
+        jEnd = ubound(arrVals, 2)
+        kEnd = ubound(arrVals, 3)
 
-       if (nChanged == 0) then
-          exit parallelSyncLoop
-       end if
+        iSize = iEnd - iBeg + 1
+        jSize = jEnd - jBeg + 1
+        kSize = kEnd - kBeg + 1
 
-       loopIter = loopIter + 1
-    end do parallelSyncLoop
+        call DMDAVecRestoreArrayF90(cartArray, cartVecLocal, arrVals, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-    deallocate(stack, floodSeeds)
+        loopIter = 1
+        allocate (stack(3, 6 * iSize * jSize * kSize + 1))
+        allocate (floodSeeds(3, 2 * iSize * jSize + 2 * iSize * kSize + 2 * jSize * kSize))
 
-    ! Now that we have flooded everything, any cells left over must
-    ! be *inside. Do one last pass through and flip those.
+        parallelSyncLoop: do
 
-    call DMDAVecGetArrayF90(cartArray, cartVecLocal, arrVals, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
+            call DMDAVecGetArrayF90(cartArray, cartVecLocal, arrVals, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
 
-    do k=kBeg+1, kEnd-1
-       do j=jBeg+1, jEnd-1
-          do i=iBeg+1, iEnd-1
-             if (int(arrVals(i, j, k)) == 1) then
-                arrVals(i, j, k) = -three
-             end if
-          end do
-       end do
-    end do
+            call DMDAVecGetArrayF90(cartArray, changedVecLocal, changed, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
 
-    call DMDAVecRestoreArrayF90(cartArray, cartVecLocal, arrVals, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
+            ! Keep track of the total number of fringes we've modified
+            nChangedLocal = 0
 
-    ! Restore the vectors obtained with "get"
-    call DMRestoreLocalVector(cartArray, cartVecLocal, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
+            ! Allocate space for our queue (stack). It needs to be 6*nx*ny*nz + 1:
+            ! 6 for each of the 6 coordinate directions plus our extra
+            ! seed. It should never come close to this unless the entire
+            ! block will be blanked.
 
-    call DMRestoreLocalVector(cartArray, changedVecLocal, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
+            nSeed = 0
+            if (loopIter == 1) then
 
-    call DMRestoreGlobalVector(cartArray, changedVecGlobal, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
+                if (myid == 0) then
 
-    ! Now update the final global variables in cartArray.
-    call DMLocalToGlobalBegin(cartArray, cartVecLocal, INSERT_VALUES, cartVecGlobal, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
+                    ! Set the single seed on the bottom corner of the root proc
 
-    call DMLocalToGlobalEnd(cartArray, cartVecLocal, INSERT_VALUES, cartVecGlobal, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
+                    call addSeed(0, 0, 0)
 
-    ! Now that we are done with the flooding and any local
-    ! communication, we can create a global vector that is in the
-    ! ordering that we actually want.
+                end if
 
-    i = cellDims(1)*cellDims(2)*cellDims(3)
-    call VecCreateMPI(adflow_comm_world, PETSC_DECIDE, i, blankVec, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            else
+
+                ! On the second and subsequent passes, check each 1st
+                ! non-corner halos in the 6 faces to see if we received
+                ! "changed" info from neighbour proc. This will allow us to
+                ! continue the flooding on this processor/block. Note that
+                ! even in a single processor case, the halo exchange in
+                ! necessary to communicate between two local blocks
+
+                ! iMin/iMax
+                do k = kBeg + 1, kEnd - 1
+                    do j = jBeg + 1, jEnd - 1
+                        if (int(changed(iBeg, j, k)) == 1) then
+                            call addSeed(iBeg + 1, j, k)
+                        end if
+                        if (int(changed(iEnd, j, k)) == 1) then
+                            call addSeed(iEnd - 1, j, k)
+                        end if
+                    end do
+                end do
 
+                ! jMin/jMax
+                do k = kBeg + 1, kEnd - 1
+                    do i = iBeg + 1, iEnd - 1
+                        if (int(changed(i, jBeg, k)) == 1) then
+                            call addSeed(i, jBeg + 1, k)
+                        end if
+                        if (int(changed(i, jEnd, k)) == 1) then
+                            call addSeed(i, jEnd - 1, k)
+                        end if
+                    end do
+                end do
 
-    ! Create the index for the real global Vector
-    call VecGetOwnershipRange(blankVec, i, j, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-    call ISCreateStride(adflow_comm_world, j-i, i, 1, IS2, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+                ! kMin:
+                do j = jBeg + 1, jEnd - 1
+                    do i = iBeg + 1, iEnd - 1
+                        if (int(changed(i, j, kBeg)) == 1) then
+                            call addSeed(i, j, kBeg + 1)
+                        end if
+                        if (int(changed(i, j, kEnd)) == 1) then
+                            call addSeed(i, j, kEnd - 1)
+                        end if
+                    end do
+                end do
+            end if
+
+            ! Loop over our seeds we currently have
+            do iSeed = 1, nSeed
+
+                ! Put the particular seed in the first slot of the stack
+                stack(:, 1) = floodSeeds(:, iSeed)
+
+                ! Reset the stack pointer length back to just the one seed
+                ! we have
+                stackPointer = 1
+
+                ! Start the flooding (stacked based, not recursive)
+                do while (stackPointer > 0)
+
+                    ! 'Pop' the current point off the stack
+                    i = stack(1, stackPointer)
+                    j = stack(2, stackPointer)
+                    k = stack(3, stackPointer)
+                    stackPointer = stackPointer - 1
+
+                    if (int(arrVals(i, j, k)) == 1) then
+
+                        ! Flag the cell (using changed) as being changed
+                        changed(i, j, k) = one
+
+                        ! Keep track of the total number we've changed.  For
+                        ! reporting purposes...only count the ones that are
+                        ! on actual compute cells:
+                        if (onBlock(i, j, k)) then
+                            nChangedLocal = nChangedLocal + 1
+                        end if
+
+                        ! Set the value as flooded. Use 0 as per usual.
+                        arrVals(i, j, k) = zero
+
+                        ! Now add the six nearest neighbours to the stack
+                        ! provided they are in the owned cell range:
+
+                        if (i - 1 >= iBeg) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i - 1, j, k/)
+                        end if
+
+                        if (i + 1 <= iEnd) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i + 1, j, k/)
+                        end if
+
+                        if (j - 1 >= jBeg) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i, j - 1, k/)
+                        end if
+
+                        if (j + 1 <= jEnd) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i, j + 1, k/)
+                        end if
+
+                        if (k - 1 >= kBeg) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i, j, k - 1/)
+                        end if
+
+                        if (k + 1 <= kEnd) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i, j, k + 1/)
+                        end if
+                    end if
+                end do
+            end do
 
-    call ISDuplicate(IS2, IS1, ierr)
-    call AOApplicationToPetscIS(cartOrdering, IS1, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            call DMDAVecRestoreArrayF90(cartArray, cartVecLocal, arrVals, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
 
-    call vecScatterCreate(cartVecGlobal, IS1, blankVec, IS2, blankScatter, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            call DMDAVecRestoreArrayF90(cartArray, changedVecLocal, changed, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
 
-    call VecScatterBegin(blankScatter, cartVecGlobal, blankVec, &
-         INSERT_VALUES, SCATTER_FORWARD, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            ! Exchange "changed"
+            call DMLocalToGlobalBegin(cartArray, changedVecLocal, INSERT_VALUES, changedVecGlobal, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
 
-    call VecScatterEnd(blankScatter, cartVecGlobal, blankVec, &
-         INSERT_VALUES, SCATTER_FORWARD, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            call DMLocalToGlobalEnd(cartArray, changedVecLocal, INSERT_VALUES, changedVecGlobal, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
 
-    ! We are now done with everything petsc related except for
-    ! blankVec. That's all we  need now.
-    call VecScatterDestroy(blankScatter, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            call DMGlobalToLocalBegin(cartArray, changedVecGlobal, INSERT_VALUES, changedVecLocal, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
 
-    call ISDestroy(IS1, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            call DMGlobalToLocalEnd(cartArray, changedVecGlobal, INSERT_VALUES, changedVecLocal, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
 
-    call ISDestroy(IS2, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            ! Determine if cells got changd. If so do another loop.
+            call mpi_allreduce(nChangedLocal, nChanged, 1, adflow_integer, MPI_SUM, &
+                               adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+            if (myid == 0) then
+                print *, 'Cart Flood Iteration:', loopIter, 'Blanked ', nChanged, 'Interior Cells.'
+            end if
 
-    call VecDestroy(cartVecGlobal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            if (nChanged == 0) then
+                exit parallelSyncLoop
+            end if
 
-    call DMDestroy(cartArray, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            loopIter = loopIter + 1
+        end do parallelSyncLoop
 
-    ! Write our mesh
-    call writeCartMesh(blankVec, cellDims, xMin, h)
+        deallocate (stack, floodSeeds)
 
+        ! Now that we have flooded everything, any cells left over must
+        ! be *inside. Do one last pass through and flip those.
 
-    ! Now what we have to do is to distribute parts of the cart mesh
-    ! to the processors that need it. For now, just do a create to all.
+        call DMDAVecGetArrayF90(cartArray, cartVecLocal, arrVals, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-    call vecScatterCreateToAll(blankVec, blankScatterLocal, blankVecLocal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        do k = kBeg + 1, kEnd - 1
+            do j = jBeg + 1, jEnd - 1
+                do i = iBeg + 1, iEnd - 1
+                    if (int(arrVals(i, j, k)) == 1) then
+                        arrVals(i, j, k) = -three
+                    end if
+                end do
+            end do
+        end do
 
-    call VecScatterBegin(blankScatterLocal, blankVec, blankVecLOCAL, INSERT_VALUES, SCATTER_FORWARD, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call DMDAVecRestoreArrayF90(cartArray, cartVecLocal, arrVals, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-    call VecScatterEnd(blankScatterLocal, blankVec, blankVecLOCAL, INSERT_VALUES, SCATTER_FORWARD, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Restore the vectors obtained with "get"
+        call DMRestoreLocalVector(cartArray, cartVecLocal, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-    call VecScatterDestroy(blankScatterLocal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call DMRestoreLocalVector(cartArray, changedVecLocal, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-    call VecGetArrayF90(blanKVecLocal, cartPointer, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call DMRestoreGlobalVector(cartArray, changedVecGlobal, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        ! Now update the final global variables in cartArray.
+        call DMLocalToGlobalBegin(cartArray, cartVecLocal, INSERT_VALUES, cartVecGlobal, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        call DMLocalToGlobalEnd(cartArray, cartVecLocal, INSERT_VALUES, cartVecGlobal, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-    do nn=1,nDom
-       call setPointers(nn, level, sps)
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                ii = ii + 1
-                do iDim=1, 3
-                   pt1(iDim) = eighth*(&
-                        x(i-1, j-1, k-1, iDim) + &
-                        x(i  , j-1, k-1, iDim) + &
-                        x(i-1, j  , k-1, iDim) + &
-                        x(i  , j  , k-1, iDim) + &
-                        x(i-1, j-1, k  , iDim) + &
-                        x(i  , j-1, k  , iDim) + &
-                        x(i-1, j  , k  , iDim) + &
-                        x(i  , j  , k  , iDim))
+        ! Now that we are done with the flooding and any local
+        ! communication, we can create a global vector that is in the
+        ! ordering that we actually want.
+
+        i = cellDims(1) * cellDims(2) * cellDims(3)
+        call VecCreateMPI(adflow_comm_world, PETSC_DECIDE, i, blankVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Create the index for the real global Vector
+        call VecGetOwnershipRange(blankVec, i, j, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        call ISCreateStride(adflow_comm_world, j - i, i, 1, IS2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call ISDuplicate(IS2, IS1, ierr)
+        call AOApplicationToPetscIS(cartOrdering, IS1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call vecScatterCreate(cartVecGlobal, IS1, blankVec, IS2, blankScatter, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterBegin(blankScatter, cartVecGlobal, blankVec, &
+                             INSERT_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterEnd(blankScatter, cartVecGlobal, blankVec, &
+                           INSERT_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! We are now done with everything petsc related except for
+        ! blankVec. That's all we  need now.
+        call VecScatterDestroy(blankScatter, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call ISDestroy(IS1, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call ISDestroy(IS2, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecDestroy(cartVecGlobal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call DMDestroy(cartArray, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Write our mesh
+        call writeCartMesh(blankVec, cellDims, xMin, h)
+
+        ! Now what we have to do is to distribute parts of the cart mesh
+        ! to the processors that need it. For now, just do a create to all.
+
+        call vecScatterCreateToAll(blankVec, blankScatterLocal, blankVecLocal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterBegin(blankScatterLocal, blankVec, blankVecLOCAL, INSERT_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterEnd(blankScatterLocal, blankVec, blankVecLOCAL, INSERT_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterDestroy(blankScatterLocal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecGetArrayF90(blanKVecLocal, cartPointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        ii = ii + 1
+                        do iDim = 1, 3
+                            pt1(iDim) = eighth * ( &
+                                        x(i - 1, j - 1, k - 1, iDim) + &
+                                        x(i, j - 1, k - 1, iDim) + &
+                                        x(i - 1, j, k - 1, iDim) + &
+                                        x(i, j, k - 1, iDim) + &
+                                        x(i - 1, j - 1, k, iDim) + &
+                                        x(i, j - 1, k, iDim) + &
+                                        x(i - 1, j, k, iDim) + &
+                                        x(i, j, k, iDim))
+                        end do
+
+                        ! Now Simply check if the cell this point is in has a value of -3
+                        ii = int((pt1(1) - xMin(1)) / h)
+                        jj = int((pt1(2) - xMin(2)) / h)
+                        kk = int((pt1(3) - xMin(3)) / h)
+
+                        ! Clip the bounds to the actual ranges:
+                        ii = min(max(0, ii), cellDims(1) - 1)
+                        jj = min(max(0, jj), cellDims(2) - 1)
+                        kk = min(max(0, kk), cellDims(3) - 1)
+
+                        ! GlobalInd is 0 based
+                        globalInd = cellDims(1) * cellDims(2) * kk + cellDims(1) * jj + ii
+
+                        ! Only blank if we are more than sqrt(3)*h from our own wall:
+                        if (xSeed(i, j, k, 1) < large) then
+                            ! We have a wall: See how far we are away form it
+                            length = mynorm2(pt1 - xSeed(i, j, k, :))
+                            if (length > 1.73205080 * h) then
+                                if (int(cartPointer(globalInd + 1)) == -3) then
+                                    iblank(i, j, k) = -3
+                                end if
+                            end if
+                        else
+                            ! No wall...no wall check.
+                            if (int(cartPointer(globalInd + 1)) == -3) then
+                                iblank(i, j, k) = -3
+                            end if
+                        end if
+                    end do
                 end do
+            end do
+        end do
 
-                ! Now Simply check if the cell this point is in has a value of -3
-                ii = int((pt1(1) - xMin(1))/h)
-                jj = int((pt1(2) - xMin(2))/h)
-                kk = int((pt1(3) - xMin(3))/h)
-
-                ! Clip the bounds to the actual ranges:
-                ii = min(max(0, ii), cellDims(1)-1)
-                jj = min(max(0, jj), cellDims(2)-1)
-                kk = min(max(0, kk), cellDims(3)-1)
-
-                ! GlobalInd is 0 based
-                globalInd = cellDims(1)*cellDims(2)*kk + cellDims(1)*jj + ii
-
-                ! Only blank if we are more than sqrt(3)*h from our own wall:
-                if (xSeed(i, j, k, 1) < large) then
-                   ! We have a wall: See how far we are away form it
-                   length = mynorm2(pt1 - xSeed(i, j, k, :))
-                   if (length > 1.73205080*h) then
-                      if (int(cartPointer(globalInd+1) ) == -3) then
-                         iblank(i, j, k) = -3
-                      end if
-                   end if
-                else
-                   ! No wall...no wall check.
-                   if (int(cartPointer(globalInd+1) ) == -3) then
-                      iblank(i, j, k) = -3
-                   end if
-                end if
-             end do
-          end do
-       end do
-    end do
-
-    call VecRestoreArrayF90(blankVecLocal, cartPointer, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    ! Clean up the remaining PETScmemory
-    call VecDestroy(blankVec, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecRestoreArrayF90(blankVecLocal, cartPointer, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecDestroy(blankVecLocal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Clean up the remaining PETScmemory
+        call VecDestroy(blankVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Update the iblank info.
-    domainLoop:do nn=1, nDom
-       flowDoms(nn, level, sps)%intCommVars(1)%var => &
-            flowDoms(nn, level, sps)%iblank(:, :, :)
-    end do domainLoop
+        call VecDestroy(blankVecLocal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Run the generic integer exchange
-    call wHalo1to1IntGeneric(1, level, sps, commPatternCell_2nd, internalCell_2nd)
+        ! Update the iblank info.
+        domainLoop: do nn = 1, nDom
+            flowDoms(nn, level, sps)%intCommVars(1)%var => &
+                flowDoms(nn, level, sps)%iblank(:, :, :)
+        end do domainLoop
 
+        ! Run the generic integer exchange
+        call wHalo1to1IntGeneric(1, level, sps, commPatternCell_2nd, internalCell_2nd)
 
-  contains
-    ! Simple routine to make code easier to read above
-    subroutine addSeed(i, j, k)
-      use constants
-      implicit none
-      integer(kind=intType), intent(in) :: i, j, k
-      nSeed = nSeed + 1
-      floodSeeds(:, nSeed) = (/i, j, k/)
-    end subroutine addSeed
+    contains
+        ! Simple routine to make code easier to read above
+        subroutine addSeed(i, j, k)
+            use constants
+            implicit none
+            integer(kind=intType), intent(in) :: i, j, k
+            nSeed = nSeed + 1
+            floodSeeds(:, nSeed) = (/i, j, k/)
+        end subroutine addSeed
 
-    function onBlock(i, j, k)
+        function onBlock(i, j, k)
 
-      use constants
-      implicit none
+            use constants
+            implicit none
 
-      integer(kind=intType), intent(in) :: i, j, k
-      logical :: onBlock
+            integer(kind=intType), intent(in) :: i, j, k
+            logical :: onBlock
 
-      if (i >= 0 .and. i <= iEnd-1 .and. j >= 0 .and. j<= jEnd-1 .and. k >= 0 .and. k <= kEnd-1) then
-         onBlock = .True.
-      else
-         onBlock = .False.
-      end if
-    end function onBlock
-  end subroutine createCartMesh
+            if (i >= 0 .and. i <= iEnd - 1 .and. j >= 0 .and. j <= jEnd - 1 .and. k >= 0 .and. k <= kEnd - 1) then
+                onBlock = .True.
+            else
+                onBlock = .False.
+            end if
+        end function onBlock
+    end subroutine createCartMesh
 
-  subroutine writeCartMesh(blankVec, cellDims, xMin, h)
+    subroutine writeCartMesh(blankVec, cellDims, xMin, h)
 
 #include <petsc/finclude/petsc.h>
-   use petsc
-   implicit none
-
-    ! Input
-    integer(kind=intType), intent(in), dimension(3) :: cellDims
-    real(kind=realType), intent(in), dimension(3) :: xMin
-    real(kind=realType), intent(in) :: h
-    Vec blankVec
-    ! Working
-    integer(kind=intType) :: ierr
-
-    ! CGNS
-    character(len=32) :: coorNames(3)
-    integer(kind=intType) :: base, zoneID, coordID, cg, zone,  iField, iSol, i, j, k, iDim
-    real(kind=realType), dimension(:, :, :, :), allocatable :: xTmp
-    real(kind=realType), dimension(:), pointer :: cartPointer
-
-    Vec blankVecLocal
-    IS IS1, IS2
-    VecScatter blankScatterLocal
-
-    coorNames(1) = "CoordinateX"
-    coorNames(2) = "CoordinateY"
-    coorNames(3) = "CoordinateZ"
-
-    call VecScatterCreateToZero(blankVec, blankScatterLocal, blankVecLocal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call VecScatterBegin(blankScatterLocal, blankVec, blankVecLocal, &
-         INSERT_VALUES, SCATTER_FORWARD, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call VecScatterEnd(blankScatterLocal, blankVec, blankVecLocal, &
-         INSERT_VALUES, SCATTER_FORWARD, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    if (myid == 0) then
-       ! Open the CGNS File
-       call cg_open_f("cartblock.cgns", mode_write, cg, ierr)
-       base = 1
-       call cg_base_write_f(cg, "Base#1", 3, 3, base, ierr)
-
-       call cg_zone_write_f(cg, base, "cartblock", int((/cellDims(1)+1, cellDims(2)+1, cellDims(3)+1, &
-            cellDims(1), cellDims(2), cellDims(3), 0, 0, 0/), cgsize_t), Structured, zoneID, ierr)
-
-       allocate(xtmp(cellDims(1)+1, cellDims(2)+1, cellDims(3)+1, 3))
-
-       do k=1, cellDims(3)+1
-          do j=1, cellDims(2)+1
-             do i=1, cellDims(1)+1
-                xTmp(i, j, k, 1) = xMin(1) + (i-1)*h
-             end do
-          end do
-       end do
-
-       do k=1, cellDims(3)+1
-          do j=1, cellDims(2)+1
-             do i=1, cellDims(1)+1
-                xTmp(i, j, k, 2) = xMin(2) + (j-1)*h
-             end do
-          end do
-       end do
-
-       do k=1, cellDims(3)+1
-          do j=1, cellDims(2)+1
-             do i=1, cellDims(1)+1
-                xTmp(i, j, k, 3) = xMin(3) + (k-1)*h
-             end do
-          end do
-       end do
-
-       do idim=1, 3
-          call cg_coord_write_f(cg, base, zoneID, realDouble, coorNames(idim), &
-               xtmp(:, :, :, idim), coordID, ierr)
-       end do
-       deallocate(xtmp)
-
-       call cg_sol_write_f(cg, base, zoneID, "flowSolution", CellCenter, iSol, ierr)
-       call VecGetArrayF90(blanKVecLocal, cartPointer, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call cg_field_write_f(cg, base, zoneID, iSol, realDouble, "iBlank", &
-            cartPointer, iField, ierr)
-
-       call VecRestoreArrayF90(blankVecLocal, cartPointer, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-       call cg_close_f(cg, ierr)
-    end if
-
-    call VecDestroy(blankVecLocal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-    call VecScatterDestroy(blankScatterLocal, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
-
-  end subroutine writeCartMesh
-
-  subroutine tripleFactor(N, s)
-    use constants
-    use sorting, only : qsortIntegers
-    implicit none
-    ! Input/Output
-    integer(kind=intType), intent(in) :: N
-    integer(kind=intType), intent(out) :: s(3)
-
-    ! Working
-    integer(kind=intType) :: a, b, a1, b1, a2, b2, s1(3), s2(3)
-
-    ! Determine a set of triple factors for integer N
-
-    call largeFactor(N, a, b)
-    call largeFactor(b, a1, a2)
-    call largeFactor(a, b1, b2)
-
-    ! Our options are a, a1 and a2 OR b, b1, and b2
-    s1 = (/a, a1, a2/)
-    s2 = (/b, b1, b2/)
-
-    ! Sort them
-    call qsortIntegers(s1, 3)
-    call qsortIntegers(s2, 3)
-
-    ! And take the set that has the largest, smallest value.
-    if (s1(1) > s2(1)) then
-       s = s1
-    else
-       s = s2
-    end if
-  end subroutine tripleFactor
-
-  subroutine largeFactor(N, f1, f2)
+        use petsc
+        implicit none
+
+        ! Input
+        integer(kind=intType), intent(in), dimension(3) :: cellDims
+        real(kind=realType), intent(in), dimension(3) :: xMin
+        real(kind=realType), intent(in) :: h
+        Vec blankVec
+        ! Working
+        integer(kind=intType) :: ierr
+
+        ! CGNS
+        character(len=32) :: coorNames(3)
+        integer(kind=intType) :: base, zoneID, coordID, cg, zone, iField, iSol, i, j, k, iDim
+        real(kind=realType), dimension(:, :, :, :), allocatable :: xTmp
+        real(kind=realType), dimension(:), pointer :: cartPointer
+
+        Vec blankVecLocal
+        IS IS1, IS2
+        VecScatter blankScatterLocal
+
+        coorNames(1) = "CoordinateX"
+        coorNames(2) = "CoordinateY"
+        coorNames(3) = "CoordinateZ"
+
+        call VecScatterCreateToZero(blankVec, blankScatterLocal, blankVecLocal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterBegin(blankScatterLocal, blankVec, blankVecLocal, &
+                             INSERT_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterEnd(blankScatterLocal, blankVec, blankVecLocal, &
+                           INSERT_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        if (myid == 0) then
+            ! Open the CGNS File
+            call cg_open_f("cartblock.cgns", mode_write, cg, ierr)
+            base = 1
+            call cg_base_write_f(cg, "Base#1", 3, 3, base, ierr)
+
+            call cg_zone_write_f(cg, base, "cartblock", int((/cellDims(1) + 1, cellDims(2) + 1, cellDims(3) + 1, &
+                                  cellDims(1), cellDims(2), cellDims(3), 0, 0, 0/), cgsize_t), Structured, zoneID, ierr)
+
+            allocate (xtmp(cellDims(1) + 1, cellDims(2) + 1, cellDims(3) + 1, 3))
+
+            do k = 1, cellDims(3) + 1
+                do j = 1, cellDims(2) + 1
+                    do i = 1, cellDims(1) + 1
+                        xTmp(i, j, k, 1) = xMin(1) + (i - 1) * h
+                    end do
+                end do
+            end do
 
-    implicit none
-    integer(kind=intType) :: N, f1, f2, i, j, s
-    ! Return the two factors that are closest to the sqrt(N)
-
-    s = int(sqrt(dble(N)))
-    do j=s, 1, -1
-       if (mod(N, j) == 0) then
-          f1 = N/j
-          f2 = j
-          exit
-       end if
-    end do
-  end subroutine largeFactor
+            do k = 1, cellDims(3) + 1
+                do j = 1, cellDims(2) + 1
+                    do i = 1, cellDims(1) + 1
+                        xTmp(i, j, k, 2) = xMin(2) + (j - 1) * h
+                    end do
+                end do
+            end do
 
+            do k = 1, cellDims(3) + 1
+                do j = 1, cellDims(2) + 1
+                    do i = 1, cellDims(1) + 1
+                        xTmp(i, j, k, 3) = xMin(3) + (k - 1) * h
+                    end do
+                end do
+            end do
+
+            do idim = 1, 3
+                call cg_coord_write_f(cg, base, zoneID, realDouble, coorNames(idim), &
+                                      xtmp(:, :, :, idim), coordID, ierr)
+            end do
+            deallocate (xtmp)
+
+            call cg_sol_write_f(cg, base, zoneID, "flowSolution", CellCenter, iSol, ierr)
+            call VecGetArrayF90(blanKVecLocal, cartPointer, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call cg_field_write_f(cg, base, zoneID, iSol, realDouble, "iBlank", &
+                                  cartPointer, iField, ierr)
+
+            call VecRestoreArrayF90(blankVecLocal, cartPointer, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+            call cg_close_f(cg, ierr)
+        end if
+
+        call VecDestroy(blankVecLocal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterDestroy(blankScatterLocal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+    end subroutine writeCartMesh
+
+    subroutine tripleFactor(N, s)
+        use constants
+        use sorting, only: qsortIntegers
+        implicit none
+        ! Input/Output
+        integer(kind=intType), intent(in) :: N
+        integer(kind=intType), intent(out) :: s(3)
+
+        ! Working
+        integer(kind=intType) :: a, b, a1, b1, a2, b2, s1(3), s2(3)
+
+        ! Determine a set of triple factors for integer N
+
+        call largeFactor(N, a, b)
+        call largeFactor(b, a1, a2)
+        call largeFactor(a, b1, b2)
+
+        ! Our options are a, a1 and a2 OR b, b1, and b2
+        s1 = (/a, a1, a2/)
+        s2 = (/b, b1, b2/)
+
+        ! Sort them
+        call qsortIntegers(s1, 3)
+        call qsortIntegers(s2, 3)
+
+        ! And take the set that has the largest, smallest value.
+        if (s1(1) > s2(1)) then
+            s = s1
+        else
+            s = s2
+        end if
+    end subroutine tripleFactor
+
+    subroutine largeFactor(N, f1, f2)
+
+        implicit none
+        integer(kind=intType) :: N, f1, f2, i, j, s
+        ! Return the two factors that are closest to the sqrt(N)
+
+        s = int(sqrt(dble(N)))
+        do j = s, 1, -1
+            if (mod(N, j) == 0) then
+                f1 = N / j
+                f2 = j
+                exit
+            end if
+        end do
+    end subroutine largeFactor
 
 end module cartMesh
diff --git a/src/overset/computeCellWallPoint.F90 b/src/overset/computeCellWallPoint.F90
index 6ad1ef4a8..b04a378c8 100644
--- a/src/overset/computeCellWallPoint.F90
+++ b/src/overset/computeCellWallPoint.F90
@@ -1,125 +1,125 @@
 subroutine computeCellWallPoint(level, sps)
 
-  !  This routine is vastly more complex that it really should
-  !  be. Essentially what we want to do is to determine the "wall
-  !  point" for every cell. Essentially for every wall surface, we wan
-  !  to record the coordintes of the wall surface cell center along
-  !  cell centers eminating from the surface. The reason why this gets
-  !  complex is that block can get cut in the off-wall direction which
-  !  breaks the propagation. If this propatation isn't continued, the
-  !  overset hole cut will be dependent on the block distribution and
-  !  hense the numbe rof processors.
-  use constants
-  use blockPointers
-  use communication
-  use kdtree2_module
-  use oversetData
-  use utils, only : setPointers
-  use haloExchange, only : whalo1to1realgeneric
-  implicit none
-
-  ! Input Params
-  integer(kind=intType), intent(in) :: level, sps
-
-  ! Working paramters
-  integer(kind=intType) :: i, j, k, nn, cluster, ii, ind
-  type(oversetWall), pointer :: wall
-  logical, dimension(:), allocatable :: treeBuilt
-  type(kdtree2_result), dimension(1) :: results
-  real(kind=realType) :: xp(3)
-
-  ! We already have clusterWalls. Build the KD tree from the nodes
-  ! only for the clusters we have.
-  allocate(treeBuilt(nClusters))
-  treeBuilt = .False.
-
-  do nn=1, nDom
-     call setPointers(nn, level, sps)
-     cluster = clusters(cumDomProc(myid) + nn)
-     wall => clusterWalls(cluster)
-
-     ! If tree for this cluster is not built
-     if (treeBuilt(cluster) .eqv. .False. .and. wall%nNodes > 0) then
-
-        ! Only build tree for real surface nodes. Copy these and the
-        !  indices out. This is an overestimate of the size.
-        allocate(wall%xPrimalCen(3, 1:wall%nNodes), wall%indPrimal(1:wall%nNodes))
-
-        j = 0
-        do i=1, wall%nNodes
-           if (wall%ind(i) >= 0) then
-              j = j + 1
-              wall%xPrimalCen(:, j) = wall%x(:, i)
-              wall%indPrimal(j) = wall%ind(i)
-           end if
-        end do
-
-        wall%tree => kdtree2_create(wall%xPrimalCen(:, 1:j), sort=.False.)
-     end if
-
-     if (.not. associated(flowDoms(nn, level, sps)%xSeed)) then
-        allocate(flowDoms(nn, level, sps)%XSeed(0:ib, 0:jb, 0:kb, 3))
-        allocate(flowDoms(nn, level, sps)%wallInd(2:il, 2:jl, 2:kl))
-        ! Manaully set the pointer for xSeed so we don't call
-        ! setPointers again
-        xSeed => flowDoms(nn, level, sps)%xSeed
-        wallInd => flowDoms(nn, level, sps)%wallInd
-     end if
-
-     ! Initialize to large to indicate that nothing has been changed.
-     xSeed = large
-     wallInd = -1
-
-     if (wall%nNodes > 0) then
-        do k=2, kl
-           do j=2, jl
-              do i=2, il
-                 xp = eighth*(&
-                      x(i-1, j-1, k-1, :) + &
-                      x(i  , j-1, k-1, :) + &
-                      x(i-1, j  , k-1, :) + &
-                      x(i  , j  , k-1, :) + &
-                      x(i-1, j-1, k  , :) + &
-                      x(i  , j-1, k  , :) + &
-                      x(i-1, j  , k  , :) + &
-                      x(i  , j  , k  , :))
-
-                 call kdtree2_n_nearest(wall%tree, xp, 1, results)
-
-                 ! Need to store the value in xseed and wall ind
-                 xseed(i, j, k, :) = wall%xPrimalCen(:, results(1)%idx)
-                 wallInd(i, j, k) = wall%indPrimal(results(1)%idx)
-
-              end do
-           end do
-        end do
-     end if
-  end do
-
-  ! Loop back over the blocks destroying the kd_trees as necessary
-  do nn=1, nDom
-     call setPointers(nn, level, sps)
-     cluster = clusters(cumDomProc(myid) + nn)
-     wall => clusterWalls(cluster)
-
-     if (treeBuilt(cluster)) then
-        call kdtree2destroy(wall%tree)
-        deallocate(wall%xPrimalCen, wall%indPrimal)
-     end if
-  end do
-
-  ! Exchange the xSeeds. Need these for building oBlock.
-  do nn=1, nDom
-     flowDoms(nn, level, sps)%realCommVars(1)%var => &
-          flowDoms(nn, level, sps)%xSeed(:, :, :, 1)
-     flowDoms(nn, level, sps)%realCommVars(2)%var => &
-          flowDoms(nn, level, sps)%xSeed(:, :, :, 2)
-     flowDoms(nn, level, sps)%realCommVars(3)%var => &
-          flowDoms(nn, level, sps)%xSeed(:, :, :, 3)
-  end do
-
-  ! Run the generic halo exchange.
-  call wHalo1to1RealGeneric(3, level, sps, commPatternCell_2nd, internalCell_2nd)
+    !  This routine is vastly more complex that it really should
+    !  be. Essentially what we want to do is to determine the "wall
+    !  point" for every cell. Essentially for every wall surface, we wan
+    !  to record the coordintes of the wall surface cell center along
+    !  cell centers eminating from the surface. The reason why this gets
+    !  complex is that block can get cut in the off-wall direction which
+    !  breaks the propagation. If this propatation isn't continued, the
+    !  overset hole cut will be dependent on the block distribution and
+    !  hense the numbe rof processors.
+    use constants
+    use blockPointers
+    use communication
+    use kdtree2_module
+    use oversetData
+    use utils, only: setPointers
+    use haloExchange, only: whalo1to1realgeneric
+    implicit none
+
+    ! Input Params
+    integer(kind=intType), intent(in) :: level, sps
+
+    ! Working paramters
+    integer(kind=intType) :: i, j, k, nn, cluster, ii, ind
+    type(oversetWall), pointer :: wall
+    logical, dimension(:), allocatable :: treeBuilt
+    type(kdtree2_result), dimension(1) :: results
+    real(kind=realType) :: xp(3)
+
+    ! We already have clusterWalls. Build the KD tree from the nodes
+    ! only for the clusters we have.
+    allocate (treeBuilt(nClusters))
+    treeBuilt = .False.
+
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
+        cluster = clusters(cumDomProc(myid) + nn)
+        wall => clusterWalls(cluster)
+
+        ! If tree for this cluster is not built
+        if (treeBuilt(cluster) .eqv. .False. .and. wall%nNodes > 0) then
+
+            ! Only build tree for real surface nodes. Copy these and the
+            !  indices out. This is an overestimate of the size.
+            allocate (wall%xPrimalCen(3, 1:wall%nNodes), wall%indPrimal(1:wall%nNodes))
+
+            j = 0
+            do i = 1, wall%nNodes
+                if (wall%ind(i) >= 0) then
+                    j = j + 1
+                    wall%xPrimalCen(:, j) = wall%x(:, i)
+                    wall%indPrimal(j) = wall%ind(i)
+                end if
+            end do
+
+            wall%tree => kdtree2_create(wall%xPrimalCen(:, 1:j), sort=.False.)
+        end if
+
+        if (.not. associated(flowDoms(nn, level, sps)%xSeed)) then
+            allocate (flowDoms(nn, level, sps)%XSeed(0:ib, 0:jb, 0:kb, 3))
+            allocate (flowDoms(nn, level, sps)%wallInd(2:il, 2:jl, 2:kl))
+            ! Manaully set the pointer for xSeed so we don't call
+            ! setPointers again
+            xSeed => flowDoms(nn, level, sps)%xSeed
+            wallInd => flowDoms(nn, level, sps)%wallInd
+        end if
+
+        ! Initialize to large to indicate that nothing has been changed.
+        xSeed = large
+        wallInd = -1
+
+        if (wall%nNodes > 0) then
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        xp = eighth * ( &
+                             x(i - 1, j - 1, k - 1, :) + &
+                             x(i, j - 1, k - 1, :) + &
+                             x(i - 1, j, k - 1, :) + &
+                             x(i, j, k - 1, :) + &
+                             x(i - 1, j - 1, k, :) + &
+                             x(i, j - 1, k, :) + &
+                             x(i - 1, j, k, :) + &
+                             x(i, j, k, :))
+
+                        call kdtree2_n_nearest(wall%tree, xp, 1, results)
+
+                        ! Need to store the value in xseed and wall ind
+                        xseed(i, j, k, :) = wall%xPrimalCen(:, results(1)%idx)
+                        wallInd(i, j, k) = wall%indPrimal(results(1)%idx)
+
+                    end do
+                end do
+            end do
+        end if
+    end do
+
+    ! Loop back over the blocks destroying the kd_trees as necessary
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
+        cluster = clusters(cumDomProc(myid) + nn)
+        wall => clusterWalls(cluster)
+
+        if (treeBuilt(cluster)) then
+            call kdtree2destroy(wall%tree)
+            deallocate (wall%xPrimalCen, wall%indPrimal)
+        end if
+    end do
+
+    ! Exchange the xSeeds. Need these for building oBlock.
+    do nn = 1, nDom
+        flowDoms(nn, level, sps)%realCommVars(1)%var => &
+            flowDoms(nn, level, sps)%xSeed(:, :, :, 1)
+        flowDoms(nn, level, sps)%realCommVars(2)%var => &
+            flowDoms(nn, level, sps)%xSeed(:, :, :, 2)
+        flowDoms(nn, level, sps)%realCommVars(3)%var => &
+            flowDoms(nn, level, sps)%xSeed(:, :, :, 3)
+    end do
+
+    ! Run the generic halo exchange.
+    call wHalo1to1RealGeneric(3, level, sps, commPatternCell_2nd, internalCell_2nd)
 
 end subroutine computeCellWallPoint
 
diff --git a/src/overset/computeHolesInsideBody.F90 b/src/overset/computeHolesInsideBody.F90
index ac03cbd82..463c662d7 100644
--- a/src/overset/computeHolesInsideBody.F90
+++ b/src/overset/computeHolesInsideBody.F90
@@ -1,805 +1,796 @@
 subroutine computeHolesInsideBody(level, sps)
 
-  ! This routine will flag the iBlank values in oBlocks with 0 if the
-  ! the cell center falls inside the body. This is a (semi) parallel
-  ! implementation: The global surface mesh is communicated to all
-  ! processors who then search accordingly. This scalable in terms of
-  ! computation but not strictly memory.
-  use adtLocalSearch, only : minDistanceTreeSearchSinglePoint
-  use adtBuild, only : buildSerialQuad, destroySerialHex
-  use blockPointers
-  use wallDistanceData
-  use communication
-  use inputphysics
-  use inputTimeSpectral
-  use oversetData
-  use inputOverset
-  use adjointVars, only : totalVolumeNodes => nNodesLocal, totalVolumeCells => nCellsLocal
-  implicit none
-
-  ! Input Variables
-  integer(kind=intType), intent(in) :: level, sps
-
-  ! Local Variables
-  integer(kind=intType) :: i, j, k, l, ii, jj, kk, nn, mm, iNode, iCell, c
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ni, nj, nUnique, cellID, cellID2
-  integer(kind=intType) :: ierr, iDim
-
-  ! Data for local surface
-  integer(kind=intType) :: nNodes, nCells
-  integer(kind=intType) :: nNodesLocal, nCellsLocal
-  integer(kind=intType), dimension(:, :), allocatable :: connLocal
-  integer(kind=intType), dimension(:), allocatable :: clusterNodeLocal
-  integer(kind=intType), dimension(:), allocatable :: clusterCellLocal
-  real(kind=realType), dimension(:, :), allocatable :: nodesLocal
-  real(kind=realType), dimension(:,:,:), pointer :: xx
-  integer(kind=intType), dimension(:,:), pointer :: ind
-  real(kind=realType) :: myDist, otherDist, timeA
-  logical :: regularOrdering, nearAWall
-
-  ! Overset Walls for storing the surface ADT's
-  type(oversetWall), dimension(:), allocatable, target :: walls
-  type(oversetWall), target :: fullWall
-
-  ! Data for global surface
-  integer(kind=intTYpe) :: nNodesGlobal, nCellsGlobal
-  integer(kind=intType), dimension(:, :), allocatable, target :: connGlobal
-  real(kind=realType), dimension(:, :), allocatable, target :: nodesGlobal
-  integer(kind=intType), dimension(:), allocatable, target :: nodeIndicesGlobal
-
-  integer(kind=intType), dimension(:), allocatable :: nodesPerCluster, cellsPerCluster, cnc, ccc
-  integer(kind=intType), dimension(:), allocatable :: clusterNodeGlobal
-  integer(kind=intType), dimension(:), allocatable :: clusterCellGlobal
-  integer(kind=intType), dimension(:), allocatable :: localNodeNums
-  integer(kind=intType), dimension(:), allocatable :: nodeIndicesLocal
-
-  integer(kind=intType), dimension(:),    allocatable :: nCellProc, cumCellProc
-  integer(kind=intType), dimension(:),    allocatable :: nNodeProc, cumNodeProc
-  real(kind=realType),   dimension(:, :), allocatable :: uniqueNodes
-  integer(kind=intType), dimension(:),    allocatable :: link, indicesToGet
-
-  ! Pointers for easier readibility
-  integer(kind=intType), dimension(:, :), pointer :: conn
-  real(kind=realType), dimension(:, :), pointer :: nodes, norm
-  integer(kind=intType), dimension(:), pointer :: tmpInd
-
-  ! Data for the ADT
-  integer(kind=intType) :: intInfo(3), intInfo2(3)
-  real(kind=realType) :: coor(4), uvw(5), uvw2(5)
-  real(kind=realType), dimension(3, 2) :: dummy
-  real(kind=realType), parameter :: tol=1e-12
-  integer(kind=intType), dimension(:), pointer :: frontLeaves, frontLeavesNew, BBint
-  type(adtBBoxTargetType), dimension(:), pointer :: BB
-
-  ! Misc
-  real(kind=realType) :: dp, shp(4)
-  real(kind=realType), dimension(3) ::xp, normal, v1
-
-  ! The first thing we do is gather all the surface nodes to
-  ! each processor such that every processor can make it's own copy of
-  ! the complete surface mesh to use to search. Note that this
-  ! procedure *DOES NOT SCALE IN MEMORY*...ie eventually the surface
-  ! mesh will become too large to store on a single processor,
-  ! although this will probably not happen until the sizes get up in
-  ! the hundreds of millions of cells.
-  timea = mpi_wtime()
-
-  nNodesLocal = 0
-  nCellsLocal = 0
-
-  do nn=1,nDom
-     call setPointers(nn, level, sps)
-     do mm=1, nBocos
-        if(BCType(mm) == NSWallAdiabatic .or. &
-           BCType(mm) == NSWallIsothermal .or. &
-           BCType(mm) == EulerWall) then
-           iBeg = bcData(mm)%inBeg
-           iEnd = bcData(mm)%inEnd
-           jBeg = bcData(mm)%jnBeg
-           jEnd = bcData(mm)%jnEnd
-
-           nNodesLocal = nNodesLocal + &
-                (iEnd - iBeg + 1)*(jEnd - jBeg + 1)
-           nCellsLocal = nCellsLocal + &
-                (iEnd - iBeg)*(jEnd - jBeg)
-        end if
-     end do
-  end do
-
-  ! Now communicate these sizes with everyone
-  allocate(nCellProc(nProc), cumCellProc(0:nProc), &
-           nNodeProc(nProc), cumNodeProc(0:nProc))
-
-  call mpi_allgather(nCellsLocal, 1, adflow_integer, nCellProc, 1, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgather(nNodesLocal, 1, adflow_integer, nNodeProc, 1, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  ! Now make cumulative versions of these
-  cumCellProc(0) = 0
-  cumNodeProc(0) = 0
-  do i=1,nProc
-     cumCellProc(i) = cumCellProc(i-1) + nCellProc(i)
-     cumNodeProc(i) = cumNodeProc(i-1) + nNodeProc(i)
-  end do
-
-  ! And save the total number of nodes and cells for reference
-  nCellsGlobal = cumCellProc(nProc)
-  nNodesGlobal = cumNodeProc(nProc)
-
-  ! Allocate the space for the local nodes and element connectivity
-  allocate(nodesLocal(3, nNodesLocal), connLocal(4, nCellsLocal), &
-       clusterCellLocal(nCellsLocal), clusterNodeLocal(NNodesLocal), &
-       nodeIndicesLocal(nNodesLocal))
-
-  iCell = 0
-  iNode = 0
-  ! Second loop over the local walls
-  do nn=1,nDom
-     call setPointers(nn, level, sps)
-     c = clusters(cumDomProc(myid) + nn)
-
-     do mm=1,nBocos
-        if(  BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal .or. &
-             BCType(mm) == EulerWall) then
-
-           select case (BCFaceID(mm))
-           case (iMin)
-              xx   => x(1,:,:,:)
-              ind  => globalNode(1, :, :)
-
-           case (iMax)
-              xx   => x(il,:,:,:)
-              ind  => globalNode(il, :, :)
-
-           case (jMin)
-              xx   => x(:,1,:,:)
-              ind  => globalNode(:, 1, :)
-
-           case (jMax)
-              xx   => x(:,jl,:,:)
-              ind  => globalNode(:, jl, :)
-
-           case (kMin)
-              xx   => x(:,:,1,:)
-              ind  => globalNode(:, :, 1)
-
-           case (kMax)
-              xx   => x(:,:,kl,:)
-              ind  => globalNode(:, :, kl)
-
-           end select
-
-           ! We want to ensure that all the normals of the faces are
-           ! consistent. To ensure this, we enforce that all normals
-           ! are "into" the domain. Therefore we must treat difference
-           ! faces of a block differently. For example for an iLow
-           ! face, when looping over j-k in the regular way, results
-           ! in in a domain inward pointing normal for iLow but
-           ! outward pointing normal for iHigh. The same is true for
-           ! kMin and kMax. However, it is reverse for the J-faces:
-           ! This is becuase the way the pointers are extracted i then
-           ! k is the reverse of what "should" be for consistency. The
-           ! other two, the pointers are cyclic consistent: i,j->k,
-           ! j,k (wrap) ->i, but for the j-direction is is i,k->j when
-           ! to be consistent with the others it should be
-           ! k,i->j. Hope that made sense.
-
-           select case(BCFaceID(mm))
-           case(iMin, jMax, kMin)
-              regularOrdering = .True.
-           case default
-              regularOrdering = .False.
-           end select
-
-           ! Now this can be reversed *again* if we have a block that
-           ! is left handed.
-           if (.not. rightHanded) then
-              regularOrdering = .not. (regularOrdering)
-           end if
-
-           ! Start and end bounds for NODES
-           jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-           iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-
-           ! ni, nj are the number of NODES
-           ni = iEnd - iBeg + 1
-           nj = jEnd - jBeg + 1
-
-           ! Loop over the faces....this is the node sizes - 1
-           if (regularOrdering) then
-              do j=1,nj-1
-                 do i=1,ni-1
-                    iCell = iCell + 1
-                    connLocal(1, iCell) = cumNodeProc(myid) + iNode + (j-1)*ni + i
-                    connLocal(2, iCell) = cumNodeProc(myid) + iNode + (j-1)*ni + i + 1
-                    connLocal(3, iCell) = cumNodeProc(myid) + iNode + (j)*ni + i + 1
-                    connLocal(4, iCell) = cumNodeProc(myid) + iNode + (j)*ni + i
-                    ! Set the cluster
-                    clusterCellLocal(iCell) = c
-                 end do
-              end do
-           else
-              ! Do the reverse ordering
-              do j=1,nj-1
-                 do i=1,ni-1
-                    iCell = iCell + 1
-                    connLocal(1, iCell) = cumNodeProc(myid) + iNode + (j-1)*ni + i
-                    connLocal(2, iCell) = cumNodeProc(myid) + iNode + (j  )*ni + i
-                    connLocal(3, iCell) = cumNodeProc(myid) + iNode + (j)  *ni + i + 1
-                    connLocal(4, iCell) = cumNodeProc(myid) + iNode + (j-1)*ni + i + 1
-
-                    ! Set the cluster
-                    clusterCellLocal(iCell) = c
-                 end do
-              end do
-           end if
-
-           ! Loop over the nodes
-           do j=jBeg,jEnd
-              do i=iBeg,iEnd
-                 iNode = iNode + 1
-                 ! The plus one is for the pointer offset
-                 nodesLocal(:, iNode) = xx(i+1, j+1, :)
-                 clusterNodeLocal(iNode) = c
-                 nodeIndicesLocal(iNode) = ind(i+1, j+1)
-              end do
-           end do
-        end if
-     end do
-  end do
-
-
-  ! Allocate space for the global reduced surface
-  allocate(nodesGlobal(3, nNodesGlobal), connGlobal(4, nCellsGlobal), &
-       clusterCellGlobal(nCellsGlobal), clusterNodeGlobal(nNodesGlobal), &
-       nodeIndicesGlobal(nNodesGlobal))
-
-  ! Communicate the nodes, connectivity and cluster information to everyone
-  call mpi_allgatherv(nodesLocal, 3*nNodesLocal, adflow_real, &
-       nodesGlobal, nNodeProc*3, cumNodeProc*3, adflow_real, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgatherv(clusterNodeLocal, nNodesLocal, adflow_integer, &
-       clusterNodeGlobal, nNodeProc, cumNodeProc, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgatherv(nodeIndicesLocal, nNodesLocal, adflow_integer, &
-       nodeIndicesGlobal, nNodeProc, cumNodeProc, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgatherv(connLocal, 4*nCellsLocal, adflow_integer, &
-       connGlobal, nCellProc*4, cumCellProc*4, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  call mpi_allgatherv(clusterCellLocal, nCellsLocal, adflow_integer, &
-       clusterCellGlobal, nCellProc, cumCellProc, adflow_integer, &
-       adflow_comm_world, ierr)
-  call EChk(ierr, __FILE__, __LINE__)
-
-  ! Free the local data we do not need anymore
-  deallocate(nodesLocal, connLocal, clusterCellLocal, clusterNodeLocal, &
-       nCellProc, cumCellProc, nNodeProc, cumNodeProc, nodeIndicesLocal)
-
-  ! We will now build separate trees for each cluster.
-  allocate(nodesPerCluster(nClusters), cellsPerCluster(nClusters), &
-       cnc(nClusters), ccc(nClusters))
-  nodesPerCluster = 0
-  cellsPerCluster = 0
-
-  ! Count up the total number of elements and nodes on each cluster
-  do i=1, nCellsGlobal
-     cellsPerCluster(clusterCellGlobal(i)) = cellsPerCluster(clusterCellGlobal(i)) + 1
-  end do
-
-  do i=1, nNodesGlobal
-     nodesPerCluster(clusterNodeGlobal(i)) = nodesPerCluster(clusterNodeGlobal(i)) + 1
-  end do
-
-  ! Create the list of the walls. We are reusing the overset wall derived type here.
-  allocate(walls(nClusters))
-  allocate(localNodeNums(nNodesGlobal))
-
-  ! Allocate the memory for each of the cluster nodes
-  do i=1, nClusters
-     nNodes = nodesPerCluster(i)
-     nCells = cellsPerCluster(i)
-     walls(i)%nCells = nCells
-     walls(i)%nNodes = nNodes
-
-     allocate(walls(i)%x(3, nNodes), walls(i)%conn(4, nCells), &
-          walls(i)%ind(nNodes))
-  end do
-
-  ! We now loop through the master list of nodes and elements and
-  !  "push" them back to where they should go. We also keep track of
-  !  the local node numbers so that the cluster surcells can update
-  !  their own conn.
-  localNodeNums = 0
-  cnc = 0
-  do i=1, nNodesGlobal
-     c = clusterNodeGlobal(i) ! Cluter this node belongs to
-     cnc(c) = cnc(c) + 1 ! "cluster node count:" the 'nth' node for this  cluster
-
-     walls(c)%x(:, cnc(c))= nodesGlobal(:, i)
-     walls(c)%ind(cnc(c)) = nodeIndicesGlobal(i)
-     localNodeNums(i) = cnc(c)
-
-  end do
-
-
-  ccc = 0
-  do i=1, nCellsGlobal
-     c = clusterCellGlobal(i)
-     ccc(c) = ccc(c) + 1 ! "Cluster cell count" the 'nth' cell for this cluster
-     walls(c)%conn(:, ccc(c)) = connGlobal(:, i)
-  end do
-
-
-  do i=1, nClusters
-
-     nCells = walls(i)%nCells
-     nNodes = walls(i)%nNodes
-
-     ! Fistly we need to update the conn to use our local node ordering.
-     do j=1, nCells
-        do k=1, 4
-           walls(i)%conn(k, j) = localNodeNums(walls(i)%conn(k, j))
+    ! This routine will flag the iBlank values in oBlocks with 0 if the
+    ! the cell center falls inside the body. This is a (semi) parallel
+    ! implementation: The global surface mesh is communicated to all
+    ! processors who then search accordingly. This scalable in terms of
+    ! computation but not strictly memory.
+    use adtLocalSearch, only: minDistanceTreeSearchSinglePoint
+    use adtBuild, only: buildSerialQuad, destroySerialHex
+    use blockPointers
+    use wallDistanceData
+    use communication
+    use inputphysics
+    use inputTimeSpectral
+    use oversetData
+    use inputOverset
+    use adjointVars, only: totalVolumeNodes => nNodesLocal, totalVolumeCells => nCellsLocal
+    implicit none
+
+    ! Input Variables
+    integer(kind=intType), intent(in) :: level, sps
+
+    ! Local Variables
+    integer(kind=intType) :: i, j, k, l, ii, jj, kk, nn, mm, iNode, iCell, c
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ni, nj, nUnique, cellID, cellID2
+    integer(kind=intType) :: ierr, iDim
+
+    ! Data for local surface
+    integer(kind=intType) :: nNodes, nCells
+    integer(kind=intType) :: nNodesLocal, nCellsLocal
+    integer(kind=intType), dimension(:, :), allocatable :: connLocal
+    integer(kind=intType), dimension(:), allocatable :: clusterNodeLocal
+    integer(kind=intType), dimension(:), allocatable :: clusterCellLocal
+    real(kind=realType), dimension(:, :), allocatable :: nodesLocal
+    real(kind=realType), dimension(:, :, :), pointer :: xx
+    integer(kind=intType), dimension(:, :), pointer :: ind
+    real(kind=realType) :: myDist, otherDist, timeA
+    logical :: regularOrdering, nearAWall
+
+    ! Overset Walls for storing the surface ADT's
+    type(oversetWall), dimension(:), allocatable, target :: walls
+    type(oversetWall), target :: fullWall
+
+    ! Data for global surface
+    integer(kind=intTYpe) :: nNodesGlobal, nCellsGlobal
+    integer(kind=intType), dimension(:, :), allocatable, target :: connGlobal
+    real(kind=realType), dimension(:, :), allocatable, target :: nodesGlobal
+    integer(kind=intType), dimension(:), allocatable, target :: nodeIndicesGlobal
+
+    integer(kind=intType), dimension(:), allocatable :: nodesPerCluster, cellsPerCluster, cnc, ccc
+    integer(kind=intType), dimension(:), allocatable :: clusterNodeGlobal
+    integer(kind=intType), dimension(:), allocatable :: clusterCellGlobal
+    integer(kind=intType), dimension(:), allocatable :: localNodeNums
+    integer(kind=intType), dimension(:), allocatable :: nodeIndicesLocal
+
+    integer(kind=intType), dimension(:), allocatable :: nCellProc, cumCellProc
+    integer(kind=intType), dimension(:), allocatable :: nNodeProc, cumNodeProc
+    real(kind=realType), dimension(:, :), allocatable :: uniqueNodes
+    integer(kind=intType), dimension(:), allocatable :: link, indicesToGet
+
+    ! Pointers for easier readibility
+    integer(kind=intType), dimension(:, :), pointer :: conn
+    real(kind=realType), dimension(:, :), pointer :: nodes, norm
+    integer(kind=intType), dimension(:), pointer :: tmpInd
+
+    ! Data for the ADT
+    integer(kind=intType) :: intInfo(3), intInfo2(3)
+    real(kind=realType) :: coor(4), uvw(5), uvw2(5)
+    real(kind=realType), dimension(3, 2) :: dummy
+    real(kind=realType), parameter :: tol = 1e-12
+    integer(kind=intType), dimension(:), pointer :: frontLeaves, frontLeavesNew, BBint
+    type(adtBBoxTargetType), dimension(:), pointer :: BB
+
+    ! Misc
+    real(kind=realType) :: dp, shp(4)
+    real(kind=realType), dimension(3) :: xp, normal, v1
+
+    ! The first thing we do is gather all the surface nodes to
+    ! each processor such that every processor can make it's own copy of
+    ! the complete surface mesh to use to search. Note that this
+    ! procedure *DOES NOT SCALE IN MEMORY*...ie eventually the surface
+    ! mesh will become too large to store on a single processor,
+    ! although this will probably not happen until the sizes get up in
+    ! the hundreds of millions of cells.
+    timea = mpi_wtime()
+
+    nNodesLocal = 0
+    nCellsLocal = 0
+
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
+        do mm = 1, nBocos
+            if (BCType(mm) == NSWallAdiabatic .or. &
+                BCType(mm) == NSWallIsothermal .or. &
+                BCType(mm) == EulerWall) then
+                iBeg = bcData(mm)%inBeg
+                iEnd = bcData(mm)%inEnd
+                jBeg = bcData(mm)%jnBeg
+                jEnd = bcData(mm)%jnEnd
+
+                nNodesLocal = nNodesLocal + &
+                              (iEnd - iBeg + 1) * (jEnd - jBeg + 1)
+                nCellsLocal = nCellsLocal + &
+                              (iEnd - iBeg) * (jEnd - jBeg)
+            end if
         end do
-     end do
+    end do
 
-     ! Allocate temporary space for doing the point reduction.
-     allocate(uniqueNodes(3, nNodes), link(nNodes))
+    ! Now communicate these sizes with everyone
+    allocate (nCellProc(nProc), cumCellProc(0:nProc), &
+              nNodeProc(nProc), cumNodeProc(0:nProc))
 
-     call pointReduce(walls(i)%x, nNodes, tol, uniqueNodes, link, nUnique)
+    call mpi_allgather(nCellsLocal, 1, adflow_integer, nCellProc, 1, adflow_integer, &
+                       adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
+    call mpi_allgather(nNodesLocal, 1, adflow_integer, nNodeProc, 1, adflow_integer, &
+                       adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
+    ! Now make cumulative versions of these
+    cumCellProc(0) = 0
+    cumNodeProc(0) = 0
+    do i = 1, nProc
+        cumCellProc(i) = cumCellProc(i - 1) + nCellProc(i)
+        cumNodeProc(i) = cumNodeProc(i - 1) + nNodeProc(i)
+    end do
 
+    ! And save the total number of nodes and cells for reference
+    nCellsGlobal = cumCellProc(nProc)
+    nNodesGlobal = cumNodeProc(nProc)
+
+    ! Allocate the space for the local nodes and element connectivity
+    allocate (nodesLocal(3, nNodesLocal), connLocal(4, nCellsLocal), &
+              clusterCellLocal(nCellsLocal), clusterNodeLocal(NNodesLocal), &
+              nodeIndicesLocal(nNodesLocal))
+
+    iCell = 0
+    iNode = 0
+    ! Second loop over the local walls
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
+        c = clusters(cumDomProc(myid) + nn)
+
+        do mm = 1, nBocos
+            if (BCType(mm) == NSWallAdiabatic .or. &
+                BCType(mm) == NSWallIsothermal .or. &
+                BCType(mm) == EulerWall) then
+
+                select case (BCFaceID(mm))
+                case (iMin)
+                    xx => x(1, :, :, :)
+                    ind => globalNode(1, :, :)
+
+                case (iMax)
+                    xx => x(il, :, :, :)
+                    ind => globalNode(il, :, :)
+
+                case (jMin)
+                    xx => x(:, 1, :, :)
+                    ind => globalNode(:, 1, :)
+
+                case (jMax)
+                    xx => x(:, jl, :, :)
+                    ind => globalNode(:, jl, :)
+
+                case (kMin)
+                    xx => x(:, :, 1, :)
+                    ind => globalNode(:, :, 1)
+
+                case (kMax)
+                    xx => x(:, :, kl, :)
+                    ind => globalNode(:, :, kl)
+
+                end select
+
+                ! We want to ensure that all the normals of the faces are
+                ! consistent. To ensure this, we enforce that all normals
+                ! are "into" the domain. Therefore we must treat difference
+                ! faces of a block differently. For example for an iLow
+                ! face, when looping over j-k in the regular way, results
+                ! in in a domain inward pointing normal for iLow but
+                ! outward pointing normal for iHigh. The same is true for
+                ! kMin and kMax. However, it is reverse for the J-faces:
+                ! This is becuase the way the pointers are extracted i then
+                ! k is the reverse of what "should" be for consistency. The
+                ! other two, the pointers are cyclic consistent: i,j->k,
+                ! j,k (wrap) ->i, but for the j-direction is is i,k->j when
+                ! to be consistent with the others it should be
+                ! k,i->j. Hope that made sense.
+
+                select case (BCFaceID(mm))
+                case (iMin, jMax, kMin)
+                    regularOrdering = .True.
+                case default
+                    regularOrdering = .False.
+                end select
+
+                ! Now this can be reversed *again* if we have a block that
+                ! is left handed.
+                if (.not. rightHanded) then
+                    regularOrdering = .not. (regularOrdering)
+                end if
+
+                ! Start and end bounds for NODES
+                jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                ! ni, nj are the number of NODES
+                ni = iEnd - iBeg + 1
+                nj = jEnd - jBeg + 1
+
+                ! Loop over the faces....this is the node sizes - 1
+                if (regularOrdering) then
+                    do j = 1, nj - 1
+                        do i = 1, ni - 1
+                            iCell = iCell + 1
+                            connLocal(1, iCell) = cumNodeProc(myid) + iNode + (j - 1) * ni + i
+                            connLocal(2, iCell) = cumNodeProc(myid) + iNode + (j - 1) * ni + i + 1
+                            connLocal(3, iCell) = cumNodeProc(myid) + iNode + (j) * ni + i + 1
+                            connLocal(4, iCell) = cumNodeProc(myid) + iNode + (j) * ni + i
+                            ! Set the cluster
+                            clusterCellLocal(iCell) = c
+                        end do
+                    end do
+                else
+                    ! Do the reverse ordering
+                    do j = 1, nj - 1
+                        do i = 1, ni - 1
+                            iCell = iCell + 1
+                            connLocal(1, iCell) = cumNodeProc(myid) + iNode + (j - 1) * ni + i
+                            connLocal(2, iCell) = cumNodeProc(myid) + iNode + (j) * ni + i
+                            connLocal(3, iCell) = cumNodeProc(myid) + iNode + (j) * ni + i + 1
+                            connLocal(4, iCell) = cumNodeProc(myid) + iNode + (j - 1) * ni + i + 1
+
+                            ! Set the cluster
+                            clusterCellLocal(iCell) = c
+                        end do
+                    end do
+                end if
+
+                ! Loop over the nodes
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        iNode = iNode + 1
+                        ! The plus one is for the pointer offset
+                        nodesLocal(:, iNode) = xx(i + 1, j + 1, :)
+                        clusterNodeLocal(iNode) = c
+                        nodeIndicesLocal(iNode) = ind(i + 1, j + 1)
+                    end do
+                end do
+            end if
+        end do
+    end do
 
-     ! Update the global indices. Use the returned link
-     tmpInd => walls(i)%ind
-     allocate(walls(i)%ind(nUnique))
-     do j=1, walls(i)%nNodes
-        walls(i)%ind(link(j)) = tmpInd(j)
-     end do
-     deallocate(tmpInd)
+    ! Allocate space for the global reduced surface
+    allocate (nodesGlobal(3, nNodesGlobal), connGlobal(4, nCellsGlobal), &
+              clusterCellGlobal(nCellsGlobal), clusterNodeGlobal(nNodesGlobal), &
+              nodeIndicesGlobal(nNodesGlobal))
+
+    ! Communicate the nodes, connectivity and cluster information to everyone
+    call mpi_allgatherv(nodesLocal, 3 * nNodesLocal, adflow_real, &
+                        nodesGlobal, nNodeProc * 3, cumNodeProc * 3, adflow_real, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call mpi_allgatherv(clusterNodeLocal, nNodesLocal, adflow_integer, &
+                        clusterNodeGlobal, nNodeProc, cumNodeProc, adflow_integer, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call mpi_allgatherv(nodeIndicesLocal, nNodesLocal, adflow_integer, &
+                        nodeIndicesGlobal, nNodeProc, cumNodeProc, adflow_integer, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call mpi_allgatherv(connLocal, 4 * nCellsLocal, adflow_integer, &
+                        connGlobal, nCellProc * 4, cumCellProc * 4, adflow_integer, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call mpi_allgatherv(clusterCellLocal, nCellsLocal, adflow_integer, &
+                        clusterCellGlobal, nCellProc, cumCellProc, adflow_integer, &
+                        adflow_comm_world, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    ! Free the local data we do not need anymore
+    deallocate (nodesLocal, connLocal, clusterCellLocal, clusterNodeLocal, &
+                nCellProc, cumCellProc, nNodeProc, cumNodeProc, nodeIndicesLocal)
+
+    ! We will now build separate trees for each cluster.
+    allocate (nodesPerCluster(nClusters), cellsPerCluster(nClusters), &
+              cnc(nClusters), ccc(nClusters))
+    nodesPerCluster = 0
+    cellsPerCluster = 0
+
+    ! Count up the total number of elements and nodes on each cluster
+    do i = 1, nCellsGlobal
+        cellsPerCluster(clusterCellGlobal(i)) = cellsPerCluster(clusterCellGlobal(i)) + 1
+    end do
 
-     ! Reset the number of nodes to be number of unique nodes
-     nNodes = nUnique
-     walls(i)%nNodes = nNodes
+    do i = 1, nNodesGlobal
+        nodesPerCluster(clusterNodeGlobal(i)) = nodesPerCluster(clusterNodeGlobal(i)) + 1
+    end do
 
-     ! Update the nodes with the unique ones.
-     do j=1, nUnique
-        walls(i)%x(:, j) = uniqueNodes(:, j)
-     end do
+    ! Create the list of the walls. We are reusing the overset wall derived type here.
+    allocate (walls(nClusters))
+    allocate (localNodeNums(nNodesGlobal))
 
-     ! Update conn using the link:
-     do j=1, nCells
-        do k=1, 4
-           walls(i)%conn(k, j) = link(walls(i)%conn(k, j))
-        end do
-     end do
+    ! Allocate the memory for each of the cluster nodes
+    do i = 1, nClusters
+        nNodes = nodesPerCluster(i)
+        nCells = cellsPerCluster(i)
+        walls(i)%nCells = nCells
+        walls(i)%nNodes = nNodes
 
-     ! Unique nodes and link are no longer needed
-     deallocate(link, uniqueNodes)
+        allocate (walls(i)%x(3, nNodes), walls(i)%conn(4, nCells), &
+                  walls(i)%ind(nNodes))
+    end do
 
-     call buildSerialQuad(nCells, nNodes, walls(i)%x, walls(i)%conn, walls(i)%ADT)
-     call buildUniqueNormal(walls(i))
-  end do
+    ! We now loop through the master list of nodes and elements and
+    !  "push" them back to where they should go. We also keep track of
+    !  the local node numbers so that the cluster surcells can update
+    !  their own conn.
+    localNodeNums = 0
+    cnc = 0
+    do i = 1, nNodesGlobal
+        c = clusterNodeGlobal(i) ! Cluter this node belongs to
+        cnc(c) = cnc(c) + 1 ! "cluster node count:" the 'nth' node for this  cluster
 
-  if (oversetPresent) then
-     ! Finally build up a "full wall" that is made up of all the cluster
-     ! walls. Note that we can reuse the space previously allocated for
-     ! the global data, namely, nodes and conn. These will be slightly
-     ! larger than necessary becuase of the point reduce.
+        walls(c)%x(:, cnc(c)) = nodesGlobal(:, i)
+        walls(c)%ind(cnc(c)) = nodeIndicesGlobal(i)
+        localNodeNums(i) = cnc(c)
 
-     fullWall%x => nodesGlobal
-     fullWall%conn => connGlobal
-     fullWall%ind => nodeIndicesGlobal
-     allocate(fullWall%norm(3, nNodesGlobal))
+    end do
 
-     nNodes = 0
-     nCells = 0
-     ii = 0
-     do i=1, nClusters
+    ccc = 0
+    do i = 1, nCellsGlobal
+        c = clusterCellGlobal(i)
+        ccc(c) = ccc(c) + 1 ! "Cluster cell count" the 'nth' cell for this cluster
+        walls(c)%conn(:, ccc(c)) = connGlobal(:, i)
+    end do
 
-        ! Add in the nodes/elements from this cluster
+    do i = 1, nClusters
 
-        do j=1, walls(i)%nNodes
-           nNodes = nNodes + 1
-           fullWall%x(:, nNodes) = walls(i)%x(:, j)
-           fullWall%ind(nNodes) = walls(i)%ind(j)
-        end do
+        nCells = walls(i)%nCells
+        nNodes = walls(i)%nNodes
 
-        do j=1, walls(i)%nCells
-           nCells = nCells + 1
-           fullWall%conn(:, nCells) = walls(i)%conn(:, j) + ii
+        ! Fistly we need to update the conn to use our local node ordering.
+        do j = 1, nCells
+            do k = 1, 4
+                walls(i)%conn(k, j) = localNodeNums(walls(i)%conn(k, j))
+            end do
         end do
 
-        ! Increment the node offset
-        ii = ii + walls(i)%nNodes
-     end do
-
-     ! Finish the setup of the full wall.
-     fullWall%nCells = nCells
-     fullWall%nNodes = nNodes
-     call buildSerialQuad(nCells, nNodes, fullWall%x, fullWall%conn, fullWall%ADT)
-     call buildUniqueNormal(fullWall)
-  end if
-
-  ! Allocate the (pointer) memory that may be resized as necessary for
-  ! the singlePoint search routine.
-  allocate(stack(100), BB(20), BBint(20), frontLeaves(25), frontLeavesNew(25))
-
-
-  ! We need to store the 4 global node indices defining the quad that
-  ! each point has the closest point wrt. We also ned to store the uv
-  ! values. This allows us to recompute the exact surface point, after
-  ! the rquired nodes are fetched from (a possibly) remote proc.
-
-  do nn=1,nDom
-     call setPointers(nn, level, sps)
-
-     ! Check if elemID and uv are allocated yet.
-     if (.not. associated(flowDoms(nn,level,sps)%surfNodeIndices)) then
-        allocate(flowDoms(nn,level,sps)%surfNodeIndices(4, 2:il, 2:jl, 2:kl))
-        allocate(flowDoms(nn,level,sps)%uv(2, 2:il, 2:jl, 2:kl))
-     end if
-
-     ! Set the cluster for this block
-     c = clusters(cumDomProc(myid) + nn)
-     conn => fullWall%conn
-     nodes => fullWall%x
-     norm => fullWall%norm
-
-     do k=2, kl
-        do j=2, jl
-           do i=2, il
-
-              ! Compute the coordinates of the cell center
-              coor(1) = eighth*(x(i-1,j-1,k-1,1) + x(i,j-1,k-1,1)  &
-                   +         x(i-1,j,  k-1,1) + x(i,j,  k-1,1)  &
-                   +         x(i-1,j-1,k,  1) + x(i,j-1,k,  1)  &
-                   +         x(i-1,j,  k,  1) + x(i,j,  k,  1))
-
-              coor(2) = eighth*(x(i-1,j-1,k-1,2) + x(i,j-1,k-1,2)  &
-                   +         x(i-1,j,  k-1,2) + x(i,j,  k-1,2)  &
-                   +         x(i-1,j-1,k,  2) + x(i,j-1,k,  2)  &
-                   +         x(i-1,j,  k,  2) + x(i,j,  k,  2))
-
-              coor(3) = eighth*(x(i-1,j-1,k-1,3) + x(i,j-1,k-1,3)  &
-                   +         x(i-1,j,  k-1,3) + x(i,j,  k-1,3)  &
-                   +         x(i-1,j-1,k,  3) + x(i,j-1,k,  3)  &
-                   +         x(i-1,j,  k,  3) + x(i,j,  k,  3))
-
-              if (.not. oversetPresent) then
-                 ! No overset present. Simply search our own wall,
-                 ! walls(c), up to the wall cutoff.
-                 coor(4) = wallDistCutoff**2
-                 intInfo(3) = 0 ! Must be initialized since the search
-                                ! may not find closer point.
-                 call minDistancetreeSearchSinglePoint(walls(c)%ADT, coor, intInfo, &
-                      uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
-
-                 cellID = intInfo(3)
-                 if (cellID > 0) then
-                    do kk=1,4
-                       flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
-                            walls(c)%ind(walls(c)%conn(kk, cellID))
-                    end do
-                    flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
-                 else
-                    ! Just set dummy values. These will never be used.
-                    flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
-                    flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
-                 end if
-
-                 ! We are done with this point.
-                 cycle
-              end if
+        ! Allocate temporary space for doing the point reduction.
+        allocate (uniqueNodes(3, nNodes), link(nNodes))
 
-              ! This is now the (possibly) overlapping surface mesh
-              ! case. It is somewhat more complex since we use the
-              ! same searches to flag cells that are inside the body.
+        call pointReduce(walls(i)%x, nNodes, tol, uniqueNodes, link, nUnique)
 
-              coor(4) = wallDistCutoff**2
-              intInfo(3) = 0
-              call minDistancetreeSearchSinglePoint(fullWall%ADT, coor, &
-                   intInfo, uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
-              cellID = intInfo(3)
+        ! Update the global indices. Use the returned link
+        tmpInd => walls(i)%ind
+        allocate (walls(i)%ind(nUnique))
+        do j = 1, walls(i)%nNodes
+            walls(i)%ind(link(j)) = tmpInd(j)
+        end do
+        deallocate (tmpInd)
 
-              if (cellID > 0) then
+        ! Reset the number of nodes to be number of unique nodes
+        nNodes = nUnique
+        walls(i)%nNodes = nNodes
 
+        ! Update the nodes with the unique ones.
+        do j = 1, nUnique
+            walls(i)%x(:, j) = uniqueNodes(:, j)
+        end do
 
-                 if (uvw(4) > nearWallDist**2 .or. walls(c)%nCells == 0) then
+        ! Update conn using the link:
+        do j = 1, nCells
+            do k = 1, 4
+                walls(i)%conn(k, j) = link(walls(i)%conn(k, j))
+            end do
+        end do
 
-                    call checkInside()
+        ! Unique nodes and link are no longer needed
+        deallocate (link, uniqueNodes)
 
-                    ! We found a point within the wallDist cutoff OR the
-                    ! cell is from a cluster with no walls, ie a
-                    ! background cell. Accept it's wall distance, since
-                    ! it guaranteed to be correct.
+        call buildSerialQuad(nCells, nNodes, walls(i)%x, walls(i)%conn, walls(i)%ADT)
+        call buildUniqueNormal(walls(i))
+    end do
 
-                    do kk=1,4
-                       flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
-                            fullWall%ind(fullWall%conn(kk, cellID))
-                    end do
-                    flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
+    if (oversetPresent) then
+        ! Finally build up a "full wall" that is made up of all the cluster
+        ! walls. Note that we can reuse the space previously allocated for
+        ! the global data, namely, nodes and conn. These will be slightly
+        ! larger than necessary becuase of the point reduce.
+
+        fullWall%x => nodesGlobal
+        fullWall%conn => connGlobal
+        fullWall%ind => nodeIndicesGlobal
+        allocate (fullWall%norm(3, nNodesGlobal))
+
+        nNodes = 0
+        nCells = 0
+        ii = 0
+        do i = 1, nClusters
+
+            ! Add in the nodes/elements from this cluster
+
+            do j = 1, walls(i)%nNodes
+                nNodes = nNodes + 1
+                fullWall%x(:, nNodes) = walls(i)%x(:, j)
+                fullWall%ind(nNodes) = walls(i)%ind(j)
+            end do
+
+            do j = 1, walls(i)%nCells
+                nCells = nCells + 1
+                fullWall%conn(:, nCells) = walls(i)%conn(:, j) + ii
+            end do
+
+            ! Increment the node offset
+            ii = ii + walls(i)%nNodes
+        end do
 
-                 else
+        ! Finish the setup of the full wall.
+        fullWall%nCells = nCells
+        fullWall%nNodes = nNodes
+        call buildSerialQuad(nCells, nNodes, fullWall%x, fullWall%conn, fullWall%ADT)
+        call buildUniqueNormal(fullWall)
+    end if
 
-                    ! This point is *closer* than the nearWallDist AND
-                    ! it has a wall. Search for our own wall.
+    ! Allocate the (pointer) memory that may be resized as necessary for
+    ! the singlePoint search routine.
+    allocate (stack(100), BB(20), BBint(20), frontLeaves(25), frontLeavesNew(25))
 
-                    coor(4) = large
-                    call minDistancetreeSearchSinglePoint(walls(c)%ADT, coor, &
-                         intInfo2, uvw2, dummy, 0, BB, frontLeaves, frontLeavesNew)
-                    cellID2 = intInfo2(3)
+    ! We need to store the 4 global node indices defining the quad that
+    ! each point has the closest point wrt. We also ned to store the uv
+    ! values. This allows us to recompute the exact surface point, after
+    ! the rquired nodes are fetched from (a possibly) remote proc.
 
-                    if (uvw2(4) < nearWallDist**2) then
-                       ! Both are close to the wall. Accept the one from our own wall.
-                       do kk=1,4
-                          flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
-                               walls(c)%ind(walls(c)%conn(kk, cellID2))
-                       end do
-                       flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw2(1:2)
-                    else
-                       ! We have already found a closer point from the
-                       ! full wall.  This means we need to check if it
-                       ! is inside.
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
 
-                       call checkInside()
+        ! Check if elemID and uv are allocated yet.
+        if (.not. associated(flowDoms(nn, level, sps)%surfNodeIndices)) then
+            allocate (flowDoms(nn, level, sps)%surfNodeIndices(4, 2:il, 2:jl, 2:kl))
+            allocate (flowDoms(nn, level, sps)%uv(2, 2:il, 2:jl, 2:kl))
+        end if
 
-                       ! And save the wall-dist info we already had
-                       ! computed from the full wall search
+        ! Set the cluster for this block
+        c = clusters(cumDomProc(myid) + nn)
+        conn => fullWall%conn
+        nodes => fullWall%x
+        norm => fullWall%norm
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the coordinates of the cell center
+                    coor(1) = eighth * (x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
+                                        + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
+                                        + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
+                                        + x(i - 1, j, k, 1) + x(i, j, k, 1))
+
+                    coor(2) = eighth * (x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
+                                        + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
+                                        + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
+                                        + x(i - 1, j, k, 2) + x(i, j, k, 2))
+
+                    coor(3) = eighth * (x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
+                                        + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
+                                        + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
+                                        + x(i - 1, j, k, 3) + x(i, j, k, 3))
+
+                    if (.not. oversetPresent) then
+                        ! No overset present. Simply search our own wall,
+                        ! walls(c), up to the wall cutoff.
+                        coor(4) = wallDistCutoff**2
+                        intInfo(3) = 0 ! Must be initialized since the search
+                        ! may not find closer point.
+                        call minDistancetreeSearchSinglePoint(walls(c)%ADT, coor, intInfo, &
+                                                              uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
+
+                        cellID = intInfo(3)
+                        if (cellID > 0) then
+                            do kk = 1, 4
+                                flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
+                                    walls(c)%ind(walls(c)%conn(kk, cellID))
+                            end do
+                            flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
+                        else
+                            ! Just set dummy values. These will never be used.
+                            flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
+                            flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
+                        end if
+
+                        ! We are done with this point.
+                        cycle
+                    end if
 
-                       do kk=1,4
-                          flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
-                               fullWall%ind(fullWall%conn(kk, cellID))
-                       end do
-                       flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
+                    ! This is now the (possibly) overlapping surface mesh
+                    ! case. It is somewhat more complex since we use the
+                    ! same searches to flag cells that are inside the body.
 
-                    end if
-                 end if
-              else
-
-                 ! What happend here is a cell is outside the
-                 ! wallDistCutoff. We don't care about wall distance
-                 ! info here so just set dummy info.
-
-                 flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
-                 flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
-
-                 ! HOWEVER, It is possible that this cell is actually
-                 ! inside the body. To quickly check, run the ray cast
-                 ! algo.
-
-                 call intersectionTreeSearchSinglePoint(fullWall%ADT, coor(1:3), &
-                      intInfo(1), BBint, frontLeaves, frontLeavesNew)
-
-                 ! If we never found *any* intersections, cannot
-                 ! possibly be inside, and there is nothing else to do.
-                 if (intInfo(1) == 0) then
-                    cycle
-                 else
-                    ! We found a ray cast intersection. Looks like it
-                    ! might actually be inside the surface after
-                    ! all. Re-run the full distance search with no
-                    ! dist cutoff.
-                    coor(4) = large
+                    coor(4) = wallDistCutoff**2
+                    intInfo(3) = 0
                     call minDistancetreeSearchSinglePoint(fullWall%ADT, coor, &
-                         intInfo, uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
+                                                          intInfo, uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
                     cellID = intInfo(3)
 
-                    ! Determine if it is inside:
-                    call checkInside()
+                    if (cellID > 0) then
+
+                        if (uvw(4) > nearWallDist**2 .or. walls(c)%nCells == 0) then
+
+                            call checkInside()
 
-                 end if
-              end if
-           end do
+                            ! We found a point within the wallDist cutoff OR the
+                            ! cell is from a cluster with no walls, ie a
+                            ! background cell. Accept it's wall distance, since
+                            ! it guaranteed to be correct.
+
+                            do kk = 1, 4
+                                flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
+                                    fullWall%ind(fullWall%conn(kk, cellID))
+                            end do
+                            flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
+
+                        else
+
+                            ! This point is *closer* than the nearWallDist AND
+                            ! it has a wall. Search for our own wall.
+
+                            coor(4) = large
+                            call minDistancetreeSearchSinglePoint(walls(c)%ADT, coor, &
+                                                              intInfo2, uvw2, dummy, 0, BB, frontLeaves, frontLeavesNew)
+                            cellID2 = intInfo2(3)
+
+                            if (uvw2(4) < nearWallDist**2) then
+                                ! Both are close to the wall. Accept the one from our own wall.
+                                do kk = 1, 4
+                                    flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
+                                        walls(c)%ind(walls(c)%conn(kk, cellID2))
+                                end do
+                                flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw2(1:2)
+                            else
+                                ! We have already found a closer point from the
+                                ! full wall.  This means we need to check if it
+                                ! is inside.
+
+                                call checkInside()
+
+                                ! And save the wall-dist info we already had
+                                ! computed from the full wall search
+
+                                do kk = 1, 4
+                                    flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = &
+                                        fullWall%ind(fullWall%conn(kk, cellID))
+                                end do
+                                flowDoms(nn, level, sps)%uv(:, i, j, k) = uvw(1:2)
+
+                            end if
+                        end if
+                    else
+
+                        ! What happend here is a cell is outside the
+                        ! wallDistCutoff. We don't care about wall distance
+                        ! info here so just set dummy info.
+
+                        flowDoms(nn, level, sps)%surfNodeIndices(:, i, j, k) = 0
+                        flowDoms(nn, level, sps)%uv(:, i, j, k) = 0
+
+                        ! HOWEVER, It is possible that this cell is actually
+                        ! inside the body. To quickly check, run the ray cast
+                        ! algo.
+
+                        call intersectionTreeSearchSinglePoint(fullWall%ADT, coor(1:3), &
+                                                               intInfo(1), BBint, frontLeaves, frontLeavesNew)
+
+                        ! If we never found *any* intersections, cannot
+                        ! possibly be inside, and there is nothing else to do.
+                        if (intInfo(1) == 0) then
+                            cycle
+                        else
+                            ! We found a ray cast intersection. Looks like it
+                            ! might actually be inside the surface after
+                            ! all. Re-run the full distance search with no
+                            ! dist cutoff.
+                            coor(4) = large
+                            call minDistancetreeSearchSinglePoint(fullWall%ADT, coor, &
+                                                                intInfo, uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
+                            cellID = intInfo(3)
+
+                            ! Determine if it is inside:
+                            call checkInside()
+
+                        end if
+                    end if
+                end do
+            end do
         end do
-     end do
-  end do
-
-  ! Now determine all the node indices this processor needs to get.
-  mm = 0
-  allocate(indicesToGet(totalVolumeCells(level)*4), link(totalVolumeCells(level)*4))
-  do nn=1, nDom
-     call setPointers(nn, level, sps)
-     do k=2, kl
-        do j=2, jl
-           do i=2, il
-              do kk=1,4
-                 mm = mm + 1
-                 indicesToGet(mm) = flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k)
-              end do
-           end do
+    end do
+
+    ! Now determine all the node indices this processor needs to get.
+    mm = 0
+    allocate (indicesToGet(totalVolumeCells(level) * 4), link(totalVolumeCells(level) * 4))
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    do kk = 1, 4
+                        mm = mm + 1
+                        indicesToGet(mm) = flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k)
+                    end do
+                end do
+            end do
         end do
-     end do
-  end do
-
-  ! This unique-ifies the indices.
-  call unique(indicesToGet, 4*totalVolumeCells(level), nUnique, link)
-
-  ! we need to update the stored indices to use the ordering of the nodes we will receive.
-  mm = 0
-  do nn=1, nDom
-     call setPointers(nn, level, sps)
-     do k=2, kl
-        do j=2, jl
-           do i=2, il
-              do kk=1,4
-                 mm = mm + 1
-                 flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = link(mm)
-              end do
-           end do
+    end do
+
+    ! This unique-ifies the indices.
+    call unique(indicesToGet, 4 * totalVolumeCells(level), nUnique, link)
+
+    ! we need to update the stored indices to use the ordering of the nodes we will receive.
+    mm = 0
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    do kk = 1, 4
+                        mm = mm + 1
+                        flowDoms(nn, level, sps)%surfNodeIndices(kk, i, j, k) = link(mm)
+                    end do
+                end do
+            end do
         end do
-     end do
-  end do
-  deallocate(link)
-
-  ! Now create the index set for the nodes we need to get. We have to
-  ! expand "indices to get" to include the DOF. Use link for this
-  ! temporary array operation.
-
-  allocate(link(nUnique*3))
-  do i=1, nUnique
-     link((i-1)*3+1) = indicesToGet(i)*3
-     link((i-1)*3+2) = indicesToGet(i)*3+1
-     link((i-1)*3+3) = indicesToGet(i)*3+2
-  end do
-
-  call ISCreateGeneral(adflow_comm_world, nUnique*3, link, PETSC_COPY_VALUES, IS1, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-  deallocate(link)
-
-  ! Create the volume vector the nodes will be scatter from. Note that
-  ! this vector contains all the spectal instances. It is therefore
-  ! only allocated on the first call with sps=1
-  if (sps == 1) then
-     call VecCreateMPI(ADFLOW_COMM_WORLD, 3*totalVolumeNodes(level)*nTimeIntervalsSpectral, &
-          PETSC_DETERMINE, xVolumeVec(level), ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-  end if
-
-  ! This is the vector we will scatter the nodes into.
-  call VecCreateMPI(ADFLOW_COMM_WORLD, 3*nUnique, PETSC_DETERMINE, &
-       xSurfVec(level, sps), ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecGetOwnershipRange(xSurfVec(level, sps), i, j, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call ISCreateStride(ADFLOW_COMM_WORLD, j-i, i, 1, IS2, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ! Create the actual final scatter context.
-  call  VecScatterCreate(xVolumeVec(level), IS1, xSurfVec(level, sps), IS2, &
-       wallScatter(level, sps), ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call ISDestroy(IS1, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call ISDestroy(IS2, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ! Deallocate all the remaining temporary data
-  deallocate(stack, BB, frontLeaves, frontLeavesNew, BBint)
-
-  do i=1, nClusters
-     deallocate(walls(i)%x, walls(i)%norm, walls(i)%conn)
-     call destroySerialQuad(walls(i)%ADT)
-  end do
-  deallocate(walls)
-
-  deallocate(nodesGlobal, connGlobal, clusterCellGlobal, &
-       clusterNodeGlobal, localNodeNums)
-
-  if (oversetPresent) then
-     call destroySerialQuad(fullWall%ADT)
-     deallocate(fullWall%norm)
-  end if
-
-  ! Finally communicate the updated iBlanks
-  domainLoop:do nn=1, nDom
-     flowDoms(nn, level, sps)%intCommVars(1)%var => &
-          flowDoms(nn, level, sps)%iblank(:, :, :)
-  end do domainLoop
-
-  ! Run the generic integer exchange
-  call wHalo1to1IntGeneric(1, level, sps, commPatternCell_2nd, internalCell_2nd)
+    end do
+    deallocate (link)
 
-contains
+    ! Now create the index set for the nodes we need to get. We have to
+    ! expand "indices to get" to include the DOF. Use link for this
+    ! temporary array operation.
 
-  subroutine checkInside()
+    allocate (link(nUnique * 3))
+    do i = 1, nUnique
+        link((i - 1) * 3 + 1) = indicesToGet(i) * 3
+        link((i - 1) * 3 + 2) = indicesToGet(i) * 3 + 1
+        link((i - 1) * 3 + 3) = indicesToGet(i) * 3 + 2
+    end do
 
-    implicit none
+    call ISCreateGeneral(adflow_comm_world, nUnique * 3, link, PETSC_COPY_VALUES, IS1, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+    deallocate (link)
+
+    ! Create the volume vector the nodes will be scatter from. Note that
+    ! this vector contains all the spectal instances. It is therefore
+    ! only allocated on the first call with sps=1
+    if (sps == 1) then
+        call VecCreateMPI(ADFLOW_COMM_WORLD, 3 * totalVolumeNodes(level) * nTimeIntervalsSpectral, &
+                          PETSC_DETERMINE, xVolumeVec(level), ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+    end if
 
-    ! bi-linear shape functions (CCW ordering)
-    shp(1) = (one-uvw(1))*(one-uvw(2))
-    shp(2) = (    uvw(1))*(one-uvw(2))
-    shp(3) = (    uvw(1))*(    uvw(2))
-    shp(4) = (one-uvw(1))*(    uvw(2))
-
-    xp = zero
-    normal = zero
-    do jj=1, 4
-       xp = xp + shp(jj)*nodes(:, conn(jj, cellID))
-       normal = normal + shp(jj)*norm(:, conn(jj, cellID))
-    end do
+    ! This is the vector we will scatter the nodes into.
+    call VecCreateMPI(ADFLOW_COMM_WORLD, 3 * nUnique, PETSC_DETERMINE, &
+                      xSurfVec(level, sps), ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call VecGetOwnershipRange(xSurfVec(level, sps), i, j, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call ISCreateStride(ADFLOW_COMM_WORLD, j - i, i, 1, IS2, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-    ! Compute the dot product of normal with cell center
-    ! (stored in coor) with the point on the surface.
-    v1 = coor(1:3) - xp
-    dp = normal(1)*v1(1) + normal(2)*v1(2) + normal(3)*v1(3)
+    ! Create the actual final scatter context.
+    call VecScatterCreate(xVolumeVec(level), IS1, xSurfVec(level, sps), IS2, &
+                          wallScatter(level, sps), ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call ISDestroy(IS1, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call ISDestroy(IS2, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    ! Deallocate all the remaining temporary data
+    deallocate (stack, BB, frontLeaves, frontLeavesNew, BBint)
+
+    do i = 1, nClusters
+        deallocate (walls(i)%x, walls(i)%norm, walls(i)%conn)
+        call destroySerialQuad(walls(i)%ADT)
+    end do
+    deallocate (walls)
 
-    if (dp < zero) then
-       ! We're inside so blank this cell. Set it to -3 as
-       ! being a flood seed.
+    deallocate (nodesGlobal, connGlobal, clusterCellGlobal, &
+                clusterNodeGlobal, localNodeNums)
 
-       iBlank(i, j, k) = -3
+    if (oversetPresent) then
+        call destroySerialQuad(fullWall%ADT)
+        deallocate (fullWall%norm)
     end if
-  end subroutine checkInside
 
-end subroutine computeHolesInsideBody
+    ! Finally communicate the updated iBlanks
+    domainLoop: do nn = 1, nDom
+        flowDoms(nn, level, sps)%intCommVars(1)%var => &
+            flowDoms(nn, level, sps)%iblank(:, :, :)
+    end do domainLoop
+
+    ! Run the generic integer exchange
+    call wHalo1to1IntGeneric(1, level, sps, commPatternCell_2nd, internalCell_2nd)
+
+contains
+
+    subroutine checkInside()
+
+        implicit none
 
+        ! bi-linear shape functions (CCW ordering)
+        shp(1) = (one - uvw(1)) * (one - uvw(2))
+        shp(2) = (uvw(1)) * (one - uvw(2))
+        shp(3) = (uvw(1)) * (uvw(2))
+        shp(4) = (one - uvw(1)) * (uvw(2))
+
+        xp = zero
+        normal = zero
+        do jj = 1, 4
+            xp = xp + shp(jj) * nodes(:, conn(jj, cellID))
+            normal = normal + shp(jj) * norm(:, conn(jj, cellID))
+        end do
+
+        ! Compute the dot product of normal with cell center
+        ! (stored in coor) with the point on the surface.
+        v1 = coor(1:3) - xp
+        dp = normal(1) * v1(1) + normal(2) * v1(2) + normal(3) * v1(3)
+
+        if (dp < zero) then
+            ! We're inside so blank this cell. Set it to -3 as
+            ! being a flood seed.
+
+            iBlank(i, j, k) = -3
+        end if
+    end subroutine checkInside
+
+end subroutine computeHolesInsideBody
 
 subroutine buildUniqueNormal(wall)
 
-  use oversetData
-  implicit none
-
-  ! Input/Output parameters
-  type(oversetWall), intent(inout) :: wall
-
-  ! Working
-  integer(kind=intType), dimension(:),    allocatable :: link, normCount
-  real(kind=realType),   dimension(:, :), pointer :: nodes, norm
-  integer(kind=intType), dimension(:, :), pointer :: conn
-  real(kind=realType), dimension(3) :: sss,  v1, v2
-  integer(kind=intTYpe) :: i, j
-  ! Compute the (averaged) uniqe nodal vectors:
-  allocate(wall%norm(3, wall%nNodes), normCount(wall%nNodes))
-  nodes => wall%x
-  conn => wall%conn
-  norm => wall%norm
-
-  norm = zero
-  normCount = 0
-
-  do i=1, wall%nCells
-
-     ! Compute cross product normal and normize
-     v1 = nodes(:, conn(3, i)) -  nodes(:, conn(1, i))
-     v2 =  nodes(:, conn(4, i)) -  nodes(:, conn(2, i))
-
-     sss(1) = (v1(2)*v2(3) - v1(3)*v2(2))
-     sss(2) = (v1(3)*v2(1) - v1(1)*v2(3))
-     sss(3) = (v1(1)*v2(2) - v1(2)*v2(1))
-     sss = sss / sqrt(sss(1)**2 + sss(2)**2 + sss(3)**2)
-
-     ! Add to each of the four nodes and increment the number added
-     do j=1, 4
-        norm(:, conn(j, i)) = norm(:, conn(j, i)) + sss
-        normCount(conn(j, i)) = normCount(conn(j, i)) + 1
-     end do
-  end do
-
-  ! Now just divide by the norm count
-  do i=1, wall%nNodes
-     norm(:, i) = norm(:, i) / normCount(i)
-  end do
-
-  ! Node count is no longer needed
-  deallocate(normCount)
+    use oversetData
+    implicit none
+
+    ! Input/Output parameters
+    type(oversetWall), intent(inout) :: wall
+
+    ! Working
+    integer(kind=intType), dimension(:), allocatable :: link, normCount
+    real(kind=realType), dimension(:, :), pointer :: nodes, norm
+    integer(kind=intType), dimension(:, :), pointer :: conn
+    real(kind=realType), dimension(3) :: sss, v1, v2
+    integer(kind=intTYpe) :: i, j
+    ! Compute the (averaged) uniqe nodal vectors:
+    allocate (wall%norm(3, wall%nNodes), normCount(wall%nNodes))
+    nodes => wall%x
+    conn => wall%conn
+    norm => wall%norm
+
+    norm = zero
+    normCount = 0
+
+    do i = 1, wall%nCells
+
+        ! Compute cross product normal and normize
+        v1 = nodes(:, conn(3, i)) - nodes(:, conn(1, i))
+        v2 = nodes(:, conn(4, i)) - nodes(:, conn(2, i))
+
+        sss(1) = (v1(2) * v2(3) - v1(3) * v2(2))
+        sss(2) = (v1(3) * v2(1) - v1(1) * v2(3))
+        sss(3) = (v1(1) * v2(2) - v1(2) * v2(1))
+        sss = sss / sqrt(sss(1)**2 + sss(2)**2 + sss(3)**2)
+
+        ! Add to each of the four nodes and increment the number added
+        do j = 1, 4
+            norm(:, conn(j, i)) = norm(:, conn(j, i)) + sss
+            normCount(conn(j, i)) = normCount(conn(j, i)) + 1
+        end do
+    end do
+
+    ! Now just divide by the norm count
+    do i = 1, wall%nNodes
+        norm(:, i) = norm(:, i) / normCount(i)
+    end do
+
+    ! Node count is no longer needed
+    deallocate (normCount)
 
 end subroutine buildUniqueNormal
diff --git a/src/overset/determineDonors.F90 b/src/overset/determineDonors.F90
index 49522866a..955e80da9 100644
--- a/src/overset/determineDonors.F90
+++ b/src/overset/determineDonors.F90
@@ -8,183 +8,183 @@
 
 subroutine determineDonors(level, sps, fringeList, nFringe, useWall)
 
-  use constants
-  use block, only : fringeType, flowDoms
-  use communication, only : adflow_comm_world, myid, nProc, recvRequests, sendRequests
-  use utils, only : Echk, setPointers
-  use oversetData, only : clusters, nDomTotal, nClusters
-  use oversetUtilities, only : qsortFringeType, setIsDonor, setIsWallDonor, &
-       computeFringeProcArray, unwindIndex
-  implicit none
-
-  ! Input Params
-  integer(kind=intType), intent(in) :: level, sps, nFringe
-  type(fringeType), intent(inout), dimension(nFringe) :: fringeList
-  logical, intent(in) :: useWall
-
-  ! Working
-  integer(kind=intType), dimension(:), allocatable :: fringeProc, cumFringeProc
-  integer(kind=intType), dimension(:), allocatable :: tmpInt
-  integer(kind=intType), dimension(:), allocatable :: recvSizes
-  integer(kind=intType), dimension(:), allocatable :: intSendBuf, intRecvBuf
-  integer(kind=intType) :: i, j, k, ii, jj, kk, iii, jjj,  kkk, nn, index
-  integer(kind=intType) :: il, jl, kl, dIndex
-  integer(kind=intType) :: iStart, iEnd, iProc, iSize, nFringeProc
-  integer(kind=intType) :: sendCount, recvCount, ierr, totalRecvSize
-  integer mpiStatus(MPI_STATUS_SIZE)
-
-  ! First sort the fringes such that they are grouped by destination
-  ! procesor.
-  call qsortFringeType(fringeList, nFringe, sortByDonor)
-
-  !-----------------------------------------------------------------
-  ! Step 15: Now can scan through the fringeList which is guaranteed
-  ! to be sorted by processor. We scan though the (sorted) list
-  ! sequentally, detecting where the processor splits are. Then we
-  ! fire that off to the processor that needs it, again using the
-  ! dynamic sparse data exchange. For the section that is
-  ! on-processor, we can do that donor flagging overlapped with the
-  ! communication.
-  ! -----------------------------------------------------------------
-
-  allocate(fringeProc(nProc), cumFringeProc(1:nProc+1))
-  call computeFringeProcArray(fringeList, nFringe, &
-       fringeProc, cumFringeProc, nFringeProc)
-
-  ! nFringeProc is the total number of donor processors from
-  ! fringeList fringeProc(1:nProcFringe) are the donor processors
-  ! for the fringes cumFringeProc(1:nFringeProc) are the cumulative
-  ! offset from in the localFringe Array. Note that we have
-  ! over-estimated their size as nProc.
-
-  allocate(tmpInt(0:nProc-1), recvSizes(0:nProc-1))
-  tmpInt = 0
-  do j=1, nFringeProc
-     iProc = fringeProc(j)
-     if (iProc /= myid) then
-        tmpInt(iProc) = (cumFringeProc(j+1) - cumFringeProc(j))*2
-     end if
-  end do
-
-  ! Sum how much data we must receive from each processor.
-  call mpi_alltoall(tmpInt, 1, adflow_integer, recvSizes, 1, adflow_integer, &
-       adflow_comm_world, ierr)
-  call ECHK(ierr, __FILE__, __LINE__)
-
-  ! We will need to send 2 integers to the donor processor:
-  ! donorBlock, dIndex
-
-  ! Allocate space for the sending and receiving buffers
-  totalRecvSize = sum(recvSizes)
-  allocate(intSendBuf(2*nFringe), intRecvBuf(totalRecvSize))
-
-     ! Pack the full buffer with donorBlock, dI, dJ, and dK
-  do j=1, nFringe
-     intSendBuf(2*j-1) = fringeList(j)%donorBlock
-     intSendbuf(2*j  ) = fringeList(j)%dIndex
-  end do
-
-  ! Send the donors back to their own processors.
-  sendCount = 0
-  do j=1, nFringeProc
-
-     iProc = fringeProc(j)
-     iStart = (cumFringeProc(j)-1)*2 + 1
-     iSize = (cumFringeProc(j+1) - cumFringeProc(j))*2
-
-     if (iProc /= myid) then
-        sendCount = sendCount + 1
-        call mpi_isend(intSendBuf(iStart), iSize, adflow_integer, iProc, myid, &
-             adflow_comm_world, sendRequests(sendCount), ierr)
-        call ECHK(ierr, __FILE__, __LINE__)
-     end if
-  end do
-
-  ! Non-blocking receives
-  recvCount = 0
-  ii = 1
-  do iProc=0, nProc-1
-
-     if (recvSizes(iProc) > 0) then
-        recvCount = recvCount + 1
-        call mpi_irecv(intRecvBuf(ii), recvSizes(iProc), adflow_integer, &
-             iProc, iProc, adflow_comm_world, recvRequests(recvCount), ierr)
-        call ECHK(ierr, __FILE__, __LINE__)
-
-        ii = ii + recvSizes(iProc)
-     end if
-  end do
-
-  ! Local Work to do while we wait for data to send/recv
-  do j=1, nFringeProc
-
-     iProc = fringeProc(j)
-     iStart = cumFringeProc(j)
-     iEnd = cumFringeProc(j+1)-1
-
-     if (iProc == myid) then
-        do i=iStart, iEnd
-           nn = fringeList(i)%donorBlock
-           il = flowDoms(nn, level, sps)%il
-           jl = flowDoms(nn, level, sps)%jl
-           kl = flowDoms(nn, level, sps)%kl
-           dIndex = fringeList(i)%dIndex
-           call unwindIndex(dIndex, il, jl, kl, iii, jjj, kkk)
-
-           ! For the wall donors, we just flag the 1 cell that was
-           ! identified in the fringe search based on the octant.
-           if (useWall) then
-              call setIsWallDonor(flowDoms(nn, level, sps)%status(iii, jjj, kkk), .True. )
-           else
-              do kk=0, 1
-                 do jj=0, 1
-                    do ii=0, 1
-                       call setIsDonor(&
-                            flowDoms(nn, level, sps)%status(iii+ii, jjj+jj, kkk+kk), .True. )
+    use constants
+    use block, only: fringeType, flowDoms
+    use communication, only: adflow_comm_world, myid, nProc, recvRequests, sendRequests
+    use utils, only: Echk, setPointers
+    use oversetData, only: clusters, nDomTotal, nClusters
+    use oversetUtilities, only: qsortFringeType, setIsDonor, setIsWallDonor, &
+                                computeFringeProcArray, unwindIndex
+    implicit none
+
+    ! Input Params
+    integer(kind=intType), intent(in) :: level, sps, nFringe
+    type(fringeType), intent(inout), dimension(nFringe) :: fringeList
+    logical, intent(in) :: useWall
+
+    ! Working
+    integer(kind=intType), dimension(:), allocatable :: fringeProc, cumFringeProc
+    integer(kind=intType), dimension(:), allocatable :: tmpInt
+    integer(kind=intType), dimension(:), allocatable :: recvSizes
+    integer(kind=intType), dimension(:), allocatable :: intSendBuf, intRecvBuf
+    integer(kind=intType) :: i, j, k, ii, jj, kk, iii, jjj, kkk, nn, index
+    integer(kind=intType) :: il, jl, kl, dIndex
+    integer(kind=intType) :: iStart, iEnd, iProc, iSize, nFringeProc
+    integer(kind=intType) :: sendCount, recvCount, ierr, totalRecvSize
+    integer mpiStatus(MPI_STATUS_SIZE)
+
+    ! First sort the fringes such that they are grouped by destination
+    ! procesor.
+    call qsortFringeType(fringeList, nFringe, sortByDonor)
+
+    !-----------------------------------------------------------------
+    ! Step 15: Now can scan through the fringeList which is guaranteed
+    ! to be sorted by processor. We scan though the (sorted) list
+    ! sequentally, detecting where the processor splits are. Then we
+    ! fire that off to the processor that needs it, again using the
+    ! dynamic sparse data exchange. For the section that is
+    ! on-processor, we can do that donor flagging overlapped with the
+    ! communication.
+    ! -----------------------------------------------------------------
+
+    allocate (fringeProc(nProc), cumFringeProc(1:nProc + 1))
+    call computeFringeProcArray(fringeList, nFringe, &
+                                fringeProc, cumFringeProc, nFringeProc)
+
+    ! nFringeProc is the total number of donor processors from
+    ! fringeList fringeProc(1:nProcFringe) are the donor processors
+    ! for the fringes cumFringeProc(1:nFringeProc) are the cumulative
+    ! offset from in the localFringe Array. Note that we have
+    ! over-estimated their size as nProc.
+
+    allocate (tmpInt(0:nProc - 1), recvSizes(0:nProc - 1))
+    tmpInt = 0
+    do j = 1, nFringeProc
+        iProc = fringeProc(j)
+        if (iProc /= myid) then
+            tmpInt(iProc) = (cumFringeProc(j + 1) - cumFringeProc(j)) * 2
+        end if
+    end do
+
+    ! Sum how much data we must receive from each processor.
+    call mpi_alltoall(tmpInt, 1, adflow_integer, recvSizes, 1, adflow_integer, &
+                      adflow_comm_world, ierr)
+    call ECHK(ierr, __FILE__, __LINE__)
+
+    ! We will need to send 2 integers to the donor processor:
+    ! donorBlock, dIndex
+
+    ! Allocate space for the sending and receiving buffers
+    totalRecvSize = sum(recvSizes)
+    allocate (intSendBuf(2 * nFringe), intRecvBuf(totalRecvSize))
+
+    ! Pack the full buffer with donorBlock, dI, dJ, and dK
+    do j = 1, nFringe
+        intSendBuf(2 * j - 1) = fringeList(j)%donorBlock
+        intSendbuf(2 * j) = fringeList(j)%dIndex
+    end do
+
+    ! Send the donors back to their own processors.
+    sendCount = 0
+    do j = 1, nFringeProc
+
+        iProc = fringeProc(j)
+        iStart = (cumFringeProc(j) - 1) * 2 + 1
+        iSize = (cumFringeProc(j + 1) - cumFringeProc(j)) * 2
+
+        if (iProc /= myid) then
+            sendCount = sendCount + 1
+            call mpi_isend(intSendBuf(iStart), iSize, adflow_integer, iProc, myid, &
+                           adflow_comm_world, sendRequests(sendCount), ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+        end if
+    end do
+
+    ! Non-blocking receives
+    recvCount = 0
+    ii = 1
+    do iProc = 0, nProc - 1
+
+        if (recvSizes(iProc) > 0) then
+            recvCount = recvCount + 1
+            call mpi_irecv(intRecvBuf(ii), recvSizes(iProc), adflow_integer, &
+                           iProc, iProc, adflow_comm_world, recvRequests(recvCount), ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            ii = ii + recvSizes(iProc)
+        end if
+    end do
+
+    ! Local Work to do while we wait for data to send/recv
+    do j = 1, nFringeProc
+
+        iProc = fringeProc(j)
+        iStart = cumFringeProc(j)
+        iEnd = cumFringeProc(j + 1) - 1
+
+        if (iProc == myid) then
+            do i = iStart, iEnd
+                nn = fringeList(i)%donorBlock
+                il = flowDoms(nn, level, sps)%il
+                jl = flowDoms(nn, level, sps)%jl
+                kl = flowDoms(nn, level, sps)%kl
+                dIndex = fringeList(i)%dIndex
+                call unwindIndex(dIndex, il, jl, kl, iii, jjj, kkk)
+
+                ! For the wall donors, we just flag the 1 cell that was
+                ! identified in the fringe search based on the octant.
+                if (useWall) then
+                    call setIsWallDonor(flowDoms(nn, level, sps)%status(iii, jjj, kkk), .True.)
+                else
+                    do kk = 0, 1
+                        do jj = 0, 1
+                            do ii = 0, 1
+                                call setIsDonor( &
+                                    flowDoms(nn, level, sps)%status(iii + ii, jjj + jj, kkk + kk), .True.)
+                            end do
+                        end do
                     end do
-                 end do
-              end do
-           end if
-        end do
-     end if
-  end do
-
-  ! Complete all the sends/receives. We could do overlapping here
-  ! like the frist comm for the fringes/blocks.
-  do i=1, recvCount
-     call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-  enddo
-
-  do i=1, sendCount
-     call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-  enddo
-
-  ! Loop over the full receive buffer that should now be full.
-  do j=1, totalRecvSize/2
-
-     nn = intRecvBuf(2*j-1)
-     il = flowDoms(nn, level, sps)%il
-     jl = flowDoms(nn, level, sps)%jl
-     kl = flowDoms(nn, level, sps)%kl
-
-     dIndex = intRecvBuf(2*j)
-     call unwindIndex(dIndex, il, jl, kl, iii, jjj, kkk)
-
-     if (useWall) Then
-        call setIsWallDonor(flowDoms(nn, level, sps)%status(iii, jjj, kkk), .True. )
-     else
-        do kk=0, 1
-           do jj=0, 1
-              do ii=0, 1
-                 call setIsDonor(flowDoms(nn, level, sps)%status(iii+ii, jjj+jj, kkk+kk), .True. )
-              end do
-           end do
-        end do
-     end if
-  end do
-  ! Finished with the buffers and allocatable arrays
-  deallocate(intSendBuf, intRecvBuf, fringeProc, cumFringeProc, tmpInt, recvSizes)
+                end if
+            end do
+        end if
+    end do
+
+    ! Complete all the sends/receives. We could do overlapping here
+    ! like the frist comm for the fringes/blocks.
+    do i = 1, recvCount
+        call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+    end do
+
+    do i = 1, sendCount
+        call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+    end do
+
+    ! Loop over the full receive buffer that should now be full.
+    do j = 1, totalRecvSize / 2
+
+        nn = intRecvBuf(2 * j - 1)
+        il = flowDoms(nn, level, sps)%il
+        jl = flowDoms(nn, level, sps)%jl
+        kl = flowDoms(nn, level, sps)%kl
+
+        dIndex = intRecvBuf(2 * j)
+        call unwindIndex(dIndex, il, jl, kl, iii, jjj, kkk)
+
+        if (useWall) Then
+            call setIsWallDonor(flowDoms(nn, level, sps)%status(iii, jjj, kkk), .True.)
+        else
+            do kk = 0, 1
+                do jj = 0, 1
+                    do ii = 0, 1
+                        call setIsDonor(flowDoms(nn, level, sps)%status(iii + ii, jjj + jj, kkk + kk), .True.)
+                    end do
+                end do
+            end do
+        end if
+    end do
+    ! Finished with the buffers and allocatable arrays
+    deallocate (intSendBuf, intRecvBuf, fringeProc, cumFringeProc, tmpInt, recvSizes)
 
 end subroutine determineDonors
diff --git a/src/overset/finalOversetCommStructures.F90 b/src/overset/finalOversetCommStructures.F90
index 9b6b03af1..f6391c18b 100644
--- a/src/overset/finalOversetCommStructures.F90
+++ b/src/overset/finalOversetCommStructures.F90
@@ -1,347 +1,346 @@
 subroutine finalOversetCommStructures(level, sps)
 
-
-  ! We need to fill in the following information in the comm patterns:
-  ! sendProc, nProcSend, nSend, and sendList for each proc
-  ! recvProc, nProcRecv, nRecv, and recvList for each proc
-  use constants
-  use block, only : flowDoms
-  use blockPointers, only : nDom, fringes, ib, jb, kb, status, fringePtr
-  use oversetData, only: fringeType
-  use communication, only : adflow_comm_world, myid, nProc, sendRequests, recvRequests, &
-       commPatternOverset, internalOverset
-  use utils, only : setPointers, terminate, EChk
-  use oversetUtilities, only : fracToWeights, qsortFringeType, getCumulativeForm, computeFringeProcArray, &
-       isReceiver, unwindIndex
-  implicit none
-
-  ! Input Parameters
-  integer(kind=intType), intent(in) :: level, sps
-
-  ! Working Parameters
-  integer(kind=intType) :: i, j, k, ii, jj, kk, nn, tag, ierr, nLocalFringe
-  integer(kind=intType) :: myI, myJ, myK, dI, dJ, dK, il, jl, kl, myBlock, dBlock
-  integer(kind=intType) :: n, nFringeProc, sendCount, recvCount, iProc
-  integer(kind=intType) :: iSize, iStart, iEnd, iProcRecv, iSendProc, iRecvProc
-  integer(kind=intType) :: nProcSend, nProcRecv, nCopy, totalRecvSize, index
-  type(fringeType), dimension(:), allocatable :: localFringes
-  integer(kind=intType), dimension(:), allocatable :: tmpInt
-  integer(kind=intType), dimension(:), allocatable :: recvSizes
-  integer(kind=intType), dimension(:), allocatable :: nProcSendLocal
-  integer(kind=intType), dimension(:), allocatable :: nProcSendLocaltmp
-  integer(kind=intType), dimension(:), allocatable :: fringeProc, cumFringeProc
-  integer(kind=intType), dimension(:), allocatable :: intSendBuf, intRecvBuf
-  real(kind=realType), dimension(:), allocatable :: realSendBuf, realRecvBuf
-  integer mpiStatus(MPI_STATUS_SIZE)
-
-  ! We need to fill in the following information in the comm patterns:
-  ! sendProc, nProcSend, nSend, and sendList for each proc
-  ! recvProc, nProcRecv, nRecv, and recvList for each proc
-
-  ! Count all actual fringes (including second level halos)
-  nLocalFringe = 0
-  do nn=1, nDom
-     call setPointers(nn, level, sps)
-     do k=0, kb
-        do j=0, jb
-           do i=0, ib
-              if (isReceiver(status(i, j, k))) then
-                 nLocalFringe = nLocalFringe + 1
-              end if
-           end do
-        end do
-     end do
-  end do
-
-  allocate(localFringes(nLocalFringe))
-  ! Now add in all of our local fringes
-  nLocalFringe = 0
-  do nn=1, nDom
-     call setPointers(nn, level, sps)
-     do k=0, kb
-        do j=0, jb
-           do i=0, ib
-              if (isReceiver(status(i, j, k))) then
-                 nLocalFringe = nLocalFringe + 1
-                 ii = fringePtr(1, i, j, k)
-                 localFringes(nLocalFringe) = fringes(ii)
-              end if
-           end do
-        end do
-     end do
-  end do
-
-  ! Sort the actual fringes
-  call qsortFringeType(localFringes, nLocalFringe, sortByDonor)
-
-  ! On the first pass with our local data we can determine the
-  ! internal nCopy as well as the nProcRecv. We also increment
-  ! nProcSend, and then run an allreduce to determine the number of
-  ! processors I need to send stuff to. This will then allow us to
-  ! allocate the sendData as well.
-  allocate(fringeProc(nProc), cumFringeProc(1:nProc+1))
-
-  call computeFringeProcArray(localFringes, nLocalFringe, &
-       fringeProc, cumFringeProc, nFringeProc)
-
-  allocate(tmpInt(0:nProc-1), recvSizes(0:nProc-1))
-  tmpInt = 0
-  do j=1, nFringeProc
-     iProc = fringeProc(j)
-     if (iProc /= myid) then
-        tmpInt(iProc) = (cumFringeProc(j+1) - cumFringeProc(j))
-     end if
-  end do
-
-  ! Sum how much data we must receive from each processor.
-  call mpi_alltoall(tmpInt, 1, adflow_integer, recvSizes, 1, adflow_integer, &
-       adflow_comm_world, ierr)
-  call ECHK(ierr, __FILE__, __LINE__)
-  deallocate(tmpInt)
-
-  ! Allocate space for the sending and receiving buffers
-  totalRecvSize = sum(recvSizes)
-  allocate(intSendBuf(2*nLocalFringe), intRecvBuf(totalRecvSize*2), &
-       realSendBuf(3*nLocalFringe), realRecvBuf(totalRecvSize*3))
-
-  ! Pack the real and integer buffers with donorBlock, dIndex and
-  ! donorFrac. We are putting everything in here, including our
-  ! own. That's ok.
-  do j=1, nLocalFringe
-     intSendBuf(2*j-1) = localFringes(j)%donorBlock
-     intSendbuf(2*j  ) = localFringes(j)%dindex
-     realSendBuf(3*j-2:3*j) = localFringes(j)%donorFrac
-  end do
-
-  nCopy = 0
-  nProcRecv = 0
-  allocate(nProcSendLocal(0:nProc-1))
-  nProcSendLocal = 0
-
-  do i=1, nFringeProc ! The numer of processors i'm dealing with
-     if (fringeProc(i) == myid) then
-        nCopy = cumFringeProc(i+1) - cumFringeProc(i)
-     else
-        nProcRecv = nProcRecv + 1  ! I will receive something from this proc
-        nProcSendLocal(fringeProc(i)) = 1 ! That proc will send it to me
-     end if
-  end do
-
-  ! This will sum up the nProcSendLocal array and then send out the
-  ! number of sends I have to do.
-  ! call mpi_reduce_scatter_block(nProcSendLocal, nProcSend, 1, &
-  !      adflow_integer, MPI_SUM, adflow_comm_world, ierr)
-
- !
-  ! The following is done for MPI 2.0 compatibility.
-  ! ------------------------------------------------
-  ! Break mpi_reduce_scatter_block to two steps: mpi_reduce and mpi_scatter
-  allocate(nProcSendLocaltmp(0:nProc-1))
-  nProcSendLocaltmp = 0
-
-  ! Step 1: Reduce at root proc
-  call mpi_reduce(nProcSendLocal, nProcSendLocaltmp, nProc, &
-           adflow_integer, MPI_SUM, 0, adflow_comm_world, ierr)
-
-  ! Step 2: Scatter from root proc
-  ! sendbuf = nProcSendLocaltmp, sendcount = 1,
-  ! recvbuf = nProcSend, recvcount = 1, source = 0
-  call mpi_scatter(nProcSendLocaltmp, 1, adflow_integer, nProcSend, 1, &
-           adflow_integer, 0, adflow_comm_world, ierr)
-
-  deallocate(nProcSendLocaltmp, nProcSendLocal)
-
-  ! We can allocate all necessary space for the send and receive information
-  commPatternOverset(level, sps)%nProcRecv = nProcRecv
-  allocate(commPatternOverset(level, sps)%recvProc(nProcRecv))
-  allocate(commPatternOverset(level, sps)%nRecv(nProcRecv))
-  allocate(commPatternOverset(level, sps)%recvList(nProcRecv))
-
-  commPatternOverset(level, sps)%nProcSend = nProcSend
-  allocate(commPatternOverset(level, sps)%sendProc(nProcSend))
-  allocate(commPatternOverset(level, sps)%nSend(nProcSend))
-  allocate(commPatternOverset(level, sps)%sendList(nProcSend))
-
-  ! As well as the copy information
-  internalOverset(level, sps)%nCopy = nCopy
-  allocate(internalOverset(level, sps)%donorBlock(nCopy))
-  allocate(internalOverset(level, sps)%donorIndices(nCopy, 3))
-  allocate(internalOverset(level, sps)%donorInterp(nCopy, 8))
-  allocate(internalOverset(level, sps)%donorInterpd(nCopy, 8))
-  allocate(internalOverset(level, sps)%XCen(nCopy, 3))
-  allocate(internalOverset(level, sps)%haloBlock(nCopy))
-  allocate(internalOverset(level, sps)%haloIndices(nCopy, 3))
-
-  ! Send the donors back to their own processors.
-  sendCount = 0
-
-  do j=1, nFringeProc
-
-     iProc = fringeProc(j)
-     iStart = cumFringeProc(j)-1
-     iSize = cumFringeProc(j+1) - cumFringeProc(j)
-
-     if (iProc /= myid) then
-        sendCount = sendCount + 1
-        call mpi_isend(intSendBuf(iStart*2+1), 2*iSize, adflow_integer, iProc, myid, &
-             adflow_comm_world, sendRequests(sendCount), ierr)
-        call ECHK(ierr, __FILE__, __LINE__)
-
-        sendCount = sendCount + 1
-        call mpi_isend(realSendBuf(iStart*3+1), 3*iSize, adflow_real, iProc, myid, &
-             adflow_comm_world, sendRequests(sendCount), ierr)
-        call ECHK(ierr, __FILE__, __LINE__)
-     end if
-  end do
-
-  ! Non-blocking receives
-  recvCount = 0
-  ii = 1
-  jj = 1
-  do iProc=0, nProc-1
-
-     if (recvSizes(iProc) > 0) then
-        recvCount = recvCount + 1
-        call mpi_irecv(intRecvBuf(ii), 2*recvSizes(iProc), adflow_integer, &
-             iProc, iProc, adflow_comm_world, recvRequests(recvCount), ierr)
-        call ECHK(ierr, __FILE__, __LINE__)
-
-        ii = ii + recvSizes(iProc)*2
-
-        recvCount = recvCount + 1
-        call mpi_irecv(realRecvBuf(jj), 3*recvSizes(iProc), adflow_real, &
-             iProc, iProc, adflow_comm_world, recvRequests(recvCount), ierr)
-        call ECHK(ierr, __FILE__, __LINE__)
-        jj = jj + recvSizes(iProc)*3
-     end if
-  end do
-
-  ! Do a little local work while we wait for the data to send/recv
-  iRecvProc = 0
-  do i=1, nFringeProc
-     iSize = cumFringeProc(i+1) - cumFringeProc(i)
-     iStart = cumFringeProc(i)
-     iEnd   = cumFringeProc(i+1)-1
-
-     if (fringeProc(i) == myid) then
-        ii =0
-        do j=iStart,iEnd
-           ii = ii + 1
-           ! This is the donor information
-           dBlock = localFringes(j)%donorBlock
-           internalOverset(level, sps)%donorBlock(ii) = dBlock
-           il = flowDoms(dBlock, level, sps)%il
-           jl = flowDoms(dBlock, level, sps)%jl
-           kl = flowDoms(dBlock, level, sps)%kl
-           call unwindIndex(localFringes(j)%dIndex, il, jl, kl, dI, dJ, dK)
-
-           internalOverset(level, sps)%donorIndices(ii, :) = (/dI, dJ, dk/)
-           call fracToWeights(localFringes(j)%donorFrac, internalOverset(level, sps)%donorInterp(ii, :))
-           internalOverset(level, sps)%donorInterpd(ii, :) = zero
-           internalOverset(level, sps)%xCen(ii, :) = zero
-
-           ! And the receiver (halo) information
-           myBlock = localFringes(j)%myBlock
-           internalOverset(level, sps)%haloBlock(ii) = myBlock
-
-           il = flowDoms(myBlock, level, sps)%il
-           jl = flowDoms(myBlock, level, sps)%jl
-           kl = flowDoms(myBlock, level, sps)%kl
-           call unwindIndex(localFringes(j)%myIndex, il, jl, kl, myI, myJ, myK)
-           internalOverset(level, sps)%haloIndices(ii, :) = (/myI, myJ, myK/)
-        end do
-     else
-
-        ! Set the receiver info. The info is already sent and in flight
-        iRecvProc = iRecvProc + 1
-        commPatternOverset(level, sps)%recvProc(iRecvProc) = fringeProc(i)
-        commPatternOverset(level, sps)%nRecv(iRecvProc) = iSize
-        allocate(commPatternOverset(level, sps)%recvList(iRecvProc)%block(iSize))
-        allocate(commPatternOverset(level, sps)%recvList(iRecvProc)%indices(iSize, 3))
-
-        ! Fill up the local information from myBlock and my{I,j,k}
-        ii = 0
-        do j=iStart, iEnd
-           ii = ii + 1
-           myBlock = localFringes(j)%myBlock
-           commPatternOverset(level, sps)%recvList(iRecvProc)%block(ii) = myBlock
-
-           il = flowDoms(myBlock, level, sps)%il
-           jl = flowDoms(myBlock, level, sps)%jl
-           kl = flowDoms(myBlock, level, sps)%kl
-           call unwindIndex(localFringes(j)%myIndex, il, jl, kl, myI, myJ, myK)
-           commPatternOverset(level, sps)%recvList(iRecvProc)%indices(ii,:) = (/myI, myJ, myK/)
+    ! We need to fill in the following information in the comm patterns:
+    ! sendProc, nProcSend, nSend, and sendList for each proc
+    ! recvProc, nProcRecv, nRecv, and recvList for each proc
+    use constants
+    use block, only: flowDoms
+    use blockPointers, only: nDom, fringes, ib, jb, kb, status, fringePtr
+    use oversetData, only: fringeType
+    use communication, only: adflow_comm_world, myid, nProc, sendRequests, recvRequests, &
+                             commPatternOverset, internalOverset
+    use utils, only: setPointers, terminate, EChk
+    use oversetUtilities, only: fracToWeights, qsortFringeType, getCumulativeForm, computeFringeProcArray, &
+                                isReceiver, unwindIndex
+    implicit none
+
+    ! Input Parameters
+    integer(kind=intType), intent(in) :: level, sps
+
+    ! Working Parameters
+    integer(kind=intType) :: i, j, k, ii, jj, kk, nn, tag, ierr, nLocalFringe
+    integer(kind=intType) :: myI, myJ, myK, dI, dJ, dK, il, jl, kl, myBlock, dBlock
+    integer(kind=intType) :: n, nFringeProc, sendCount, recvCount, iProc
+    integer(kind=intType) :: iSize, iStart, iEnd, iProcRecv, iSendProc, iRecvProc
+    integer(kind=intType) :: nProcSend, nProcRecv, nCopy, totalRecvSize, index
+    type(fringeType), dimension(:), allocatable :: localFringes
+    integer(kind=intType), dimension(:), allocatable :: tmpInt
+    integer(kind=intType), dimension(:), allocatable :: recvSizes
+    integer(kind=intType), dimension(:), allocatable :: nProcSendLocal
+    integer(kind=intType), dimension(:), allocatable :: nProcSendLocaltmp
+    integer(kind=intType), dimension(:), allocatable :: fringeProc, cumFringeProc
+    integer(kind=intType), dimension(:), allocatable :: intSendBuf, intRecvBuf
+    real(kind=realType), dimension(:), allocatable :: realSendBuf, realRecvBuf
+    integer mpiStatus(MPI_STATUS_SIZE)
+
+    ! We need to fill in the following information in the comm patterns:
+    ! sendProc, nProcSend, nSend, and sendList for each proc
+    ! recvProc, nProcRecv, nRecv, and recvList for each proc
+
+    ! Count all actual fringes (including second level halos)
+    nLocalFringe = 0
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
+        do k = 0, kb
+            do j = 0, jb
+                do i = 0, ib
+                    if (isReceiver(status(i, j, k))) then
+                        nLocalFringe = nLocalFringe + 1
+                    end if
+                end do
+            end do
         end do
-     end if
-  end do
-
-  ! Complete all the sends/receives. We could do overlapping here
-  ! like the frist comm for the fringes/blocks.
-
-  do i=1, recvCount
-     call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-  enddo
-
-  do i=1, sendCount
-     call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-  enddo
-
-  ! All of our data has now arrived we can now finish completing the send information.
-  ii = 0
-  jj = 0
-  iSendProc = 0 ! running counter of the ith processor
-  do iProc=0, nProc-1
-     if (recvSizes(iProc)> 0) then
-        ! We should have received something from this processor
-        iSendProc = iSendProc + 1
-        commPatternOverset(level, sps)%sendProc(iSendProc) = iProc
-        n = recvSizes(iProc)
-        commPatternOverset(level, sps)%nSend(iSendProc) = n
-        allocate(&
-             commPatternOverset(level, sps)%sendList(iSendProc)%block(n), &
-             commPatternOverset(level, sps)%sendList(iSendProc)%indices(n, 3), &
-             commPatternOverset(level, sps)%sendList(iSendProc)%interp(n, 8), &
-             commPatternOverset(level, sps)%sendList(iSendProc)%interpd(n, 8), &
-             commPatternOverset(level, sps)%sendList(iSendProc)%xCen(n, 3))
-
-        ! Now set the data
-        do i=1, n
-           myBlock = intRecvBuf(ii+1)
-           commPatternOverset(level, sps)%sendList(iSendProc)%block(i) = myBlock
-
-           il = flowDoms(myBlock, level, sps)%il
-           jl = flowDoms(myBlock, level, sps)%jl
-           kl = flowDoms(myBlock, level, sps)%kl
-
-           index = intRecvBuf(ii+2)
-           call unWindIndex(index, il, jl, kl, dI, dJ, dK)
-           commPatternOverset(level, sps)%sendList(iSendProc)%indices(i, :) = &
-                (/dI, dJ, dK/)
-           ii = ii + 2
-
-           call fracToWeights(realRecvBuf(jj+1:jj+3), &
-                commPatternOverset(level, sps)%sendList(iSendProc)%interp(i, :))
-           commPatternOverset(level, sps)%sendList(iSendProc)%interpd(i, :) = zero
-           commPatternOverset(level, sps)%sendList(iSendProc)%xCen(i, :) = zero
-           jj = jj + 3
+    end do
+
+    allocate (localFringes(nLocalFringe))
+    ! Now add in all of our local fringes
+    nLocalFringe = 0
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
+        do k = 0, kb
+            do j = 0, jb
+                do i = 0, ib
+                    if (isReceiver(status(i, j, k))) then
+                        nLocalFringe = nLocalFringe + 1
+                        ii = fringePtr(1, i, j, k)
+                        localFringes(nLocalFringe) = fringes(ii)
+                    end if
+                end do
+            end do
         end do
-     end if
-  end do
-
-  ! One last thing to do is to create the cumulative forms of nSend
-  ! and nRecv (nSendCum and nRecvCum)
-  allocate(commPatternOverset(level, sps)%nSendCum(0:nProcSend), &
-       commPatternOverset(level, sps)%nRecvCum(0:nProcRecv))
-
-  call getCumulativeForm(commPatternOverset(level, sps)%nSend, commPatternOverset(level, sps)%nProcSend, &
-       commPatternOverset(level, sps)%nSendCum)
-
-  call getCumulativeForm(commPatternOverset(level, sps)%nRecv, commPatternOverset(level, sps)%nProcRecv, &
-       commPatternOverset(level, sps)%nRecvCum)
-
-  deallocate(localFringes, fringeProc, cumFringeProc, &
-       intRecvBuf, intSendBuf, realRecvBuf, realSendBuf)
+    end do
+
+    ! Sort the actual fringes
+    call qsortFringeType(localFringes, nLocalFringe, sortByDonor)
+
+    ! On the first pass with our local data we can determine the
+    ! internal nCopy as well as the nProcRecv. We also increment
+    ! nProcSend, and then run an allreduce to determine the number of
+    ! processors I need to send stuff to. This will then allow us to
+    ! allocate the sendData as well.
+    allocate (fringeProc(nProc), cumFringeProc(1:nProc + 1))
+
+    call computeFringeProcArray(localFringes, nLocalFringe, &
+                                fringeProc, cumFringeProc, nFringeProc)
+
+    allocate (tmpInt(0:nProc - 1), recvSizes(0:nProc - 1))
+    tmpInt = 0
+    do j = 1, nFringeProc
+        iProc = fringeProc(j)
+        if (iProc /= myid) then
+            tmpInt(iProc) = (cumFringeProc(j + 1) - cumFringeProc(j))
+        end if
+    end do
+
+    ! Sum how much data we must receive from each processor.
+    call mpi_alltoall(tmpInt, 1, adflow_integer, recvSizes, 1, adflow_integer, &
+                      adflow_comm_world, ierr)
+    call ECHK(ierr, __FILE__, __LINE__)
+    deallocate (tmpInt)
+
+    ! Allocate space for the sending and receiving buffers
+    totalRecvSize = sum(recvSizes)
+    allocate (intSendBuf(2 * nLocalFringe), intRecvBuf(totalRecvSize * 2), &
+              realSendBuf(3 * nLocalFringe), realRecvBuf(totalRecvSize * 3))
+
+    ! Pack the real and integer buffers with donorBlock, dIndex and
+    ! donorFrac. We are putting everything in here, including our
+    ! own. That's ok.
+    do j = 1, nLocalFringe
+        intSendBuf(2 * j - 1) = localFringes(j)%donorBlock
+        intSendbuf(2 * j) = localFringes(j)%dindex
+        realSendBuf(3 * j - 2:3 * j) = localFringes(j)%donorFrac
+    end do
+
+    nCopy = 0
+    nProcRecv = 0
+    allocate (nProcSendLocal(0:nProc - 1))
+    nProcSendLocal = 0
+
+    do i = 1, nFringeProc ! The numer of processors i'm dealing with
+        if (fringeProc(i) == myid) then
+            nCopy = cumFringeProc(i + 1) - cumFringeProc(i)
+        else
+            nProcRecv = nProcRecv + 1  ! I will receive something from this proc
+            nProcSendLocal(fringeProc(i)) = 1 ! That proc will send it to me
+        end if
+    end do
+
+    ! This will sum up the nProcSendLocal array and then send out the
+    ! number of sends I have to do.
+    ! call mpi_reduce_scatter_block(nProcSendLocal, nProcSend, 1, &
+    !      adflow_integer, MPI_SUM, adflow_comm_world, ierr)
+
+    !
+    ! The following is done for MPI 2.0 compatibility.
+    ! ------------------------------------------------
+    ! Break mpi_reduce_scatter_block to two steps: mpi_reduce and mpi_scatter
+    allocate (nProcSendLocaltmp(0:nProc - 1))
+    nProcSendLocaltmp = 0
+
+    ! Step 1: Reduce at root proc
+    call mpi_reduce(nProcSendLocal, nProcSendLocaltmp, nProc, &
+                    adflow_integer, MPI_SUM, 0, adflow_comm_world, ierr)
+
+    ! Step 2: Scatter from root proc
+    ! sendbuf = nProcSendLocaltmp, sendcount = 1,
+    ! recvbuf = nProcSend, recvcount = 1, source = 0
+    call mpi_scatter(nProcSendLocaltmp, 1, adflow_integer, nProcSend, 1, &
+                     adflow_integer, 0, adflow_comm_world, ierr)
+
+    deallocate (nProcSendLocaltmp, nProcSendLocal)
+
+    ! We can allocate all necessary space for the send and receive information
+    commPatternOverset(level, sps)%nProcRecv = nProcRecv
+    allocate (commPatternOverset(level, sps)%recvProc(nProcRecv))
+    allocate (commPatternOverset(level, sps)%nRecv(nProcRecv))
+    allocate (commPatternOverset(level, sps)%recvList(nProcRecv))
+
+    commPatternOverset(level, sps)%nProcSend = nProcSend
+    allocate (commPatternOverset(level, sps)%sendProc(nProcSend))
+    allocate (commPatternOverset(level, sps)%nSend(nProcSend))
+    allocate (commPatternOverset(level, sps)%sendList(nProcSend))
+
+    ! As well as the copy information
+    internalOverset(level, sps)%nCopy = nCopy
+    allocate (internalOverset(level, sps)%donorBlock(nCopy))
+    allocate (internalOverset(level, sps)%donorIndices(nCopy, 3))
+    allocate (internalOverset(level, sps)%donorInterp(nCopy, 8))
+    allocate (internalOverset(level, sps)%donorInterpd(nCopy, 8))
+    allocate (internalOverset(level, sps)%XCen(nCopy, 3))
+    allocate (internalOverset(level, sps)%haloBlock(nCopy))
+    allocate (internalOverset(level, sps)%haloIndices(nCopy, 3))
+
+    ! Send the donors back to their own processors.
+    sendCount = 0
+
+    do j = 1, nFringeProc
+
+        iProc = fringeProc(j)
+        iStart = cumFringeProc(j) - 1
+        iSize = cumFringeProc(j + 1) - cumFringeProc(j)
+
+        if (iProc /= myid) then
+            sendCount = sendCount + 1
+            call mpi_isend(intSendBuf(iStart * 2 + 1), 2 * iSize, adflow_integer, iProc, myid, &
+                           adflow_comm_world, sendRequests(sendCount), ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            sendCount = sendCount + 1
+            call mpi_isend(realSendBuf(iStart * 3 + 1), 3 * iSize, adflow_real, iProc, myid, &
+                           adflow_comm_world, sendRequests(sendCount), ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+        end if
+    end do
+
+    ! Non-blocking receives
+    recvCount = 0
+    ii = 1
+    jj = 1
+    do iProc = 0, nProc - 1
+
+        if (recvSizes(iProc) > 0) then
+            recvCount = recvCount + 1
+            call mpi_irecv(intRecvBuf(ii), 2 * recvSizes(iProc), adflow_integer, &
+                           iProc, iProc, adflow_comm_world, recvRequests(recvCount), ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            ii = ii + recvSizes(iProc) * 2
+
+            recvCount = recvCount + 1
+            call mpi_irecv(realRecvBuf(jj), 3 * recvSizes(iProc), adflow_real, &
+                           iProc, iProc, adflow_comm_world, recvRequests(recvCount), ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+            jj = jj + recvSizes(iProc) * 3
+        end if
+    end do
+
+    ! Do a little local work while we wait for the data to send/recv
+    iRecvProc = 0
+    do i = 1, nFringeProc
+        iSize = cumFringeProc(i + 1) - cumFringeProc(i)
+        iStart = cumFringeProc(i)
+        iEnd = cumFringeProc(i + 1) - 1
+
+        if (fringeProc(i) == myid) then
+            ii = 0
+            do j = iStart, iEnd
+                ii = ii + 1
+                ! This is the donor information
+                dBlock = localFringes(j)%donorBlock
+                internalOverset(level, sps)%donorBlock(ii) = dBlock
+                il = flowDoms(dBlock, level, sps)%il
+                jl = flowDoms(dBlock, level, sps)%jl
+                kl = flowDoms(dBlock, level, sps)%kl
+                call unwindIndex(localFringes(j)%dIndex, il, jl, kl, dI, dJ, dK)
+
+                internalOverset(level, sps)%donorIndices(ii, :) = (/dI, dJ, dk/)
+                call fracToWeights(localFringes(j)%donorFrac, internalOverset(level, sps)%donorInterp(ii, :))
+                internalOverset(level, sps)%donorInterpd(ii, :) = zero
+                internalOverset(level, sps)%xCen(ii, :) = zero
+
+                ! And the receiver (halo) information
+                myBlock = localFringes(j)%myBlock
+                internalOverset(level, sps)%haloBlock(ii) = myBlock
+
+                il = flowDoms(myBlock, level, sps)%il
+                jl = flowDoms(myBlock, level, sps)%jl
+                kl = flowDoms(myBlock, level, sps)%kl
+                call unwindIndex(localFringes(j)%myIndex, il, jl, kl, myI, myJ, myK)
+                internalOverset(level, sps)%haloIndices(ii, :) = (/myI, myJ, myK/)
+            end do
+        else
+
+            ! Set the receiver info. The info is already sent and in flight
+            iRecvProc = iRecvProc + 1
+            commPatternOverset(level, sps)%recvProc(iRecvProc) = fringeProc(i)
+            commPatternOverset(level, sps)%nRecv(iRecvProc) = iSize
+            allocate (commPatternOverset(level, sps)%recvList(iRecvProc)%block(iSize))
+            allocate (commPatternOverset(level, sps)%recvList(iRecvProc)%indices(iSize, 3))
+
+            ! Fill up the local information from myBlock and my{I,j,k}
+            ii = 0
+            do j = iStart, iEnd
+                ii = ii + 1
+                myBlock = localFringes(j)%myBlock
+                commPatternOverset(level, sps)%recvList(iRecvProc)%block(ii) = myBlock
+
+                il = flowDoms(myBlock, level, sps)%il
+                jl = flowDoms(myBlock, level, sps)%jl
+                kl = flowDoms(myBlock, level, sps)%kl
+                call unwindIndex(localFringes(j)%myIndex, il, jl, kl, myI, myJ, myK)
+                commPatternOverset(level, sps)%recvList(iRecvProc)%indices(ii, :) = (/myI, myJ, myK/)
+            end do
+        end if
+    end do
+
+    ! Complete all the sends/receives. We could do overlapping here
+    ! like the frist comm for the fringes/blocks.
+
+    do i = 1, recvCount
+        call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+    end do
+
+    do i = 1, sendCount
+        call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+    end do
+
+    ! All of our data has now arrived we can now finish completing the send information.
+    ii = 0
+    jj = 0
+    iSendProc = 0 ! running counter of the ith processor
+    do iProc = 0, nProc - 1
+        if (recvSizes(iProc) > 0) then
+            ! We should have received something from this processor
+            iSendProc = iSendProc + 1
+            commPatternOverset(level, sps)%sendProc(iSendProc) = iProc
+            n = recvSizes(iProc)
+            commPatternOverset(level, sps)%nSend(iSendProc) = n
+            allocate ( &
+                commPatternOverset(level, sps)%sendList(iSendProc)%block(n), &
+                commPatternOverset(level, sps)%sendList(iSendProc)%indices(n, 3), &
+                commPatternOverset(level, sps)%sendList(iSendProc)%interp(n, 8), &
+                commPatternOverset(level, sps)%sendList(iSendProc)%interpd(n, 8), &
+                commPatternOverset(level, sps)%sendList(iSendProc)%xCen(n, 3))
+
+            ! Now set the data
+            do i = 1, n
+                myBlock = intRecvBuf(ii + 1)
+                commPatternOverset(level, sps)%sendList(iSendProc)%block(i) = myBlock
+
+                il = flowDoms(myBlock, level, sps)%il
+                jl = flowDoms(myBlock, level, sps)%jl
+                kl = flowDoms(myBlock, level, sps)%kl
+
+                index = intRecvBuf(ii + 2)
+                call unWindIndex(index, il, jl, kl, dI, dJ, dK)
+                commPatternOverset(level, sps)%sendList(iSendProc)%indices(i, :) = &
+                    (/dI, dJ, dK/)
+                ii = ii + 2
+
+                call fracToWeights(realRecvBuf(jj + 1:jj + 3), &
+                                   commPatternOverset(level, sps)%sendList(iSendProc)%interp(i, :))
+                commPatternOverset(level, sps)%sendList(iSendProc)%interpd(i, :) = zero
+                commPatternOverset(level, sps)%sendList(iSendProc)%xCen(i, :) = zero
+                jj = jj + 3
+            end do
+        end if
+    end do
+
+    ! One last thing to do is to create the cumulative forms of nSend
+    ! and nRecv (nSendCum and nRecvCum)
+    allocate (commPatternOverset(level, sps)%nSendCum(0:nProcSend), &
+              commPatternOverset(level, sps)%nRecvCum(0:nProcRecv))
+
+    call getCumulativeForm(commPatternOverset(level, sps)%nSend, commPatternOverset(level, sps)%nProcSend, &
+                           commPatternOverset(level, sps)%nSendCum)
+
+    call getCumulativeForm(commPatternOverset(level, sps)%nRecv, commPatternOverset(level, sps)%nProcRecv, &
+                           commPatternOverset(level, sps)%nRecvCum)
+
+    deallocate (localFringes, fringeProc, cumFringeProc, &
+                intRecvBuf, intSendBuf, realRecvBuf, realSendBuf)
 
 end subroutine finalOversetCommStructures
diff --git a/src/overset/flagNearWall.F90 b/src/overset/flagNearWall.F90
index 19498f7a5..6390cc6bf 100644
--- a/src/overset/flagNearWall.F90
+++ b/src/overset/flagNearWall.F90
@@ -1,269 +1,268 @@
 subroutine flagNearWallCells(level, sps)
 
-  ! This routine is vastly more complex that it really should
-  !  be. Essentially what we want to do is flag cells that are
-  !  approximately within a distance "nearWallDistance" of a
-  !  wall. Nominally, this just means looking at planes parallel to a
-  !  wall boundary and determining the distance to the point on the
-  !  surface. Very quick (no searching). However, if a block is split
-  !  parallel to a wall BC which is not desirable, but will happen
-  !  occasionally for block partitioning purposes, we can have "near
-  !  wall" cells that are on a split block and have no idea they are
-  !  near a wall. So what we have to do is similar to the flooding
-  !  algorithm: Starting at the wall, you proceed outwards. If you get
-  !  to the other side of the block and it is still near a wall, you
-  !  communicate and then continue on the other side. For the fringe
-  !  search, (which is built from the dual mesh), we want to know of
-  !  the *cells* of the dual mesh are near a wall. That means we
-  !  actually want check the distnaces of the dual of the
-  !  dual. Basically that means we're back to the primal.
-
-  use constants
-  use oversetData
-  use inputOverset
-  use blockPointers
-  use adtAPI
-  use cgnsGrid
-  use communication
-  use utils
-  implicit none
-
-  ! Input Params
-  integer(kind=intType), intent(in) :: level, sps
-
-  ! Working paramters
-  integer(kind=intType) :: i, j, k, nn, mm, ierr, nFlaggedLocal
-  integer(kind=intType) :: iStart, iEnd, jStart, jEnd, kStart, kEnd
-  integer(kind=intType) :: loopIter, nAtBoundaryLocal, nAtBoundary
-  logical :: tostop
-  type(Xplane), dimension(:), allocatable :: planes
-  real(Kind=realType), dimension(:, :, :, :), pointer :: xx, xSeed
-
-  real(kind=realType) :: dist
-  ! Since we need to have the 'x' and 'nearWall' arrays allocated at
-  ! once, these need to go into block. Allocate the two x arrays the
-  ! near wall arrays
-  nAtBoundaryLocal = 0
-  nFlaggedLocal = 0
-  do nn=1, nDom
-     call setPointers(nn, level, sps)
-     if (.not. associated(flowDoms(nn, level, sps)%nearWall)) then
-        allocate(flowDoms(nn,level,sps)%nearWall(1:il, 1:jl, 1:kl))
-     end if
-
-     allocate(flowDoms(nn, level, sps)%XSeed(0:ie, 0:je, 0:ke, 3))
-
-     ! Manaully set the three pointers
-     xSeed => flowDoms(nn, level, sps)%xSeed
-     nearWall => flowDoms(nn, level, sps)%nearWall
-     nearWall = 0
-     ! Use large to denote no seed presnet
-     xSeed = large
-
-     ! Flag all nodes that are within nearWallDist as being nearWall
-     do mm=1, nBocos
-        if (isWallType(BCType(mm))) then
-
-           call setBoundaryPointers(mm, BCFaceID(mm), .False.)
-           ! Loop over the generalized plane
-           do j=jStart, jEnd
-              do i=iStart, iEnd
-
-                 ! Loop over the 'k' ie offwall direction
-                 do k=kStart, kEnd
-
-                    dist = norm2(planes(k)%x(i, j, :) - planes(1)%x(i, j, :))
-                    if (dist < nearWallDist) then
-                       planes(k)%nearWall(i, j) = 1
-                       nFlaggedLocal = nFlaggedLocal + 1
-
-                       ! If we made it all the way to the other side
-                       ! of the block, set the wall point into the
-                       ! xSeed array. Note that we need the *SECOND
-                       ! LAST NODE*. This is the value that is
-                       ! actually transfered as it is the halo node
-                       ! for the other block.
-
-                       if (k == kEnd-1) then
-                          planes(k)%xSeed(i, j, :) = planes(1)%x(i, j, :)
-                          nAtBoundaryLocal = nAtBoundaryLocal + 1
-                       end if
-                    end if
-                 end do
-              end do
-           end do
-           deallocate(planes)
-        end if
-     end do ! BocoLoop
-  end do
-
-  ! Determine if any cells made it to the other side of a face. If so, we have to keep going:
-  call mpi_allreduce(nAtBoundaryLocal, nAtBoundary, 1, adflow_integer, MPI_SUM, &
-       adflow_comm_world, ierr)
-  call ECHK(ierr, __FILE__, __LINE__)
-
-  ! Iterative loop
-  loopIter = 1
-  parallelSyncLoop: do while (nAtBoundary > 0)
-     if (myid == 0) then
-        print *, 'Flag Near Wall Iteration:', loopIter, 'nAtBoundary', nAtBoundary
-     end if
-
-     ! Reset the counter
-     nAtBoundaryLocal = 0
-
-     ! Exchange the xSeeds after the initial flooding. Set the
-     ! pointers for the comm:
-     do nn=1, nDom
-        xSeed => flowDoms(nn, level, sps)%xSeed
-        flowDoms(nn, level, sps)%realCommVars(1)%var => xSeed(:, :, :, 1)
-        flowDoms(nn, level, sps)%realCommVars(2)%var => xSeed(:, :, :, 2)
-        flowDoms(nn, level, sps)%realCommVars(3)%var => xSeed(:, :, :, 3)
-     end do
-
-     ! Run the generic integer exchange
-     call wHalo1to1RealGeneric(3, level, sps, commPatternNode_1st, internalNode_1st)
+    ! This routine is vastly more complex that it really should
+    !  be. Essentially what we want to do is flag cells that are
+    !  approximately within a distance "nearWallDistance" of a
+    !  wall. Nominally, this just means looking at planes parallel to a
+    !  wall boundary and determining the distance to the point on the
+    !  surface. Very quick (no searching). However, if a block is split
+    !  parallel to a wall BC which is not desirable, but will happen
+    !  occasionally for block partitioning purposes, we can have "near
+    !  wall" cells that are on a split block and have no idea they are
+    !  near a wall. So what we have to do is similar to the flooding
+    !  algorithm: Starting at the wall, you proceed outwards. If you get
+    !  to the other side of the block and it is still near a wall, you
+    !  communicate and then continue on the other side. For the fringe
+    !  search, (which is built from the dual mesh), we want to know of
+    !  the *cells* of the dual mesh are near a wall. That means we
+    !  actually want check the distnaces of the dual of the
+    !  dual. Basically that means we're back to the primal.
+
+    use constants
+    use oversetData
+    use inputOverset
+    use blockPointers
+    use adtAPI
+    use cgnsGrid
+    use communication
+    use utils
+    implicit none
 
-     do nn=1, nDom
+    ! Input Params
+    integer(kind=intType), intent(in) :: level, sps
+
+    ! Working paramters
+    integer(kind=intType) :: i, j, k, nn, mm, ierr, nFlaggedLocal
+    integer(kind=intType) :: iStart, iEnd, jStart, jEnd, kStart, kEnd
+    integer(kind=intType) :: loopIter, nAtBoundaryLocal, nAtBoundary
+    logical :: tostop
+    type(Xplane), dimension(:), allocatable :: planes
+    real(Kind=realType), dimension(:, :, :, :), pointer :: xx, xSeed
+
+    real(kind=realType) :: dist
+    ! Since we need to have the 'x' and 'nearWall' arrays allocated at
+    ! once, these need to go into block. Allocate the two x arrays the
+    ! near wall arrays
+    nAtBoundaryLocal = 0
+    nFlaggedLocal = 0
+    do nn = 1, nDom
         call setPointers(nn, level, sps)
+        if (.not. associated(flowDoms(nn, level, sps)%nearWall)) then
+            allocate (flowDoms(nn, level, sps)%nearWall(1:il, 1:jl, 1:kl))
+        end if
 
-        ! Manaully set the additional pointers
+        allocate (flowDoms(nn, level, sps)%XSeed(0:ie, 0:je, 0:ke, 3))
+
+        ! Manaully set the three pointers
         xSeed => flowDoms(nn, level, sps)%xSeed
         nearWall => flowDoms(nn, level, sps)%nearWall
+        nearWall = 0
+        ! Use large to denote no seed presnet
+        xSeed = large
+
+        ! Flag all nodes that are within nearWallDist as being nearWall
+        do mm = 1, nBocos
+            if (isWallType(BCType(mm))) then
+
+                call setBoundaryPointers(mm, BCFaceID(mm), .False.)
+                ! Loop over the generalized plane
+                do j = jStart, jEnd
+                    do i = iStart, iEnd
+
+                        ! Loop over the 'k' ie offwall direction
+                        do k = kStart, kEnd
+
+                            dist = norm2(planes(k)%x(i, j, :) - planes(1)%x(i, j, :))
+                            if (dist < nearWallDist) then
+                                planes(k)%nearWall(i, j) = 1
+                                nFlaggedLocal = nFlaggedLocal + 1
+
+                                ! If we made it all the way to the other side
+                                ! of the block, set the wall point into the
+                                ! xSeed array. Note that we need the *SECOND
+                                ! LAST NODE*. This is the value that is
+                                ! actually transfered as it is the halo node
+                                ! for the other block.
+
+                                if (k == kEnd - 1) then
+                                    planes(k)%xSeed(i, j, :) = planes(1)%x(i, j, :)
+                                    nAtBoundaryLocal = nAtBoundaryLocal + 1
+                                end if
+                            end if
+                        end do
+                    end do
+                end do
+                deallocate (planes)
+            end if
+        end do ! BocoLoop
+    end do
+
+    ! Determine if any cells made it to the other side of a face. If so, we have to keep going:
+    call mpi_allreduce(nAtBoundaryLocal, nAtBoundary, 1, adflow_integer, MPI_SUM, &
+                       adflow_comm_world, ierr)
+    call ECHK(ierr, __FILE__, __LINE__)
+
+    ! Iterative loop
+    loopIter = 1
+    parallelSyncLoop: do while (nAtBoundary > 0)
+        if (myid == 0) then
+            print *, 'Flag Near Wall Iteration:', loopIter, 'nAtBoundary', nAtBoundary
+        end if
 
-        ! Loop over all halos. We won't be selective here, since the
-        ! seeds cound show up anywhere! :-)
+        ! Reset the counter
+        nAtBoundaryLocal = 0
 
-        ! Generic loop over all boundaries
-        do mm=1, 6
-           call setBoundaryPointers(mm, mm, .True.)
+        ! Exchange the xSeeds after the initial flooding. Set the
+        ! pointers for the comm:
+        do nn = 1, nDom
+            xSeed => flowDoms(nn, level, sps)%xSeed
+            flowDoms(nn, level, sps)%realCommVars(1)%var => xSeed(:, :, :, 1)
+            flowDoms(nn, level, sps)%realCommVars(2)%var => xSeed(:, :, :, 2)
+            flowDoms(nn, level, sps)%realCommVars(3)%var => xSeed(:, :, :, 3)
+        end do
 
-           ! Loop over the size of the generalized plane
-           do j=jStart, jEnd
-              do i=iStart, iEnd
+        ! Run the generic integer exchange
+        call wHalo1to1RealGeneric(3, level, sps, commPatternNode_1st, internalNode_1st)
 
-                 ! Determine if we need to do the generalized 'k'
-                 ! direction at all. We only need to do if a valid
-                 ! seed has shown up in the xSeed:
-                 if (planes(0)%xSeed(i, j, 1) < large) then
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
 
-                    ! Loop over the 'k' ie offwall direction
-                    do k=kStart, kEnd
+            ! Manaully set the additional pointers
+            xSeed => flowDoms(nn, level, sps)%xSeed
+            nearWall => flowDoms(nn, level, sps)%nearWall
 
-                       dist = norm2(planes(k)%x(i, j, :) - planes(0)%xSeed(i, j, :))
-                       if (dist < nearWallDist .and. planes(k)%nearWall(i, j) == 0) then
-                          planes(k)%nearWall(i, j) = 1
+            ! Loop over all halos. We won't be selective here, since the
+            ! seeds cound show up anywhere! :-)
 
-                          ! If we made it all the way to the other side
-                          ! of the block, copy over the seed for the next exchange.
-                          if (k == kEnd-1) then
-                             planes(k)%xSeed(i, j, :) = planes(0)%xSeed(i, j, :)
-                             nAtBoundaryLocal = nAtBoundaryLocal + 1
-                          end if
-                       end if
+            ! Generic loop over all boundaries
+            do mm = 1, 6
+                call setBoundaryPointers(mm, mm, .True.)
+
+                ! Loop over the size of the generalized plane
+                do j = jStart, jEnd
+                    do i = iStart, iEnd
+
+                        ! Determine if we need to do the generalized 'k'
+                        ! direction at all. We only need to do if a valid
+                        ! seed has shown up in the xSeed:
+                        if (planes(0)%xSeed(i, j, 1) < large) then
+
+                            ! Loop over the 'k' ie offwall direction
+                            do k = kStart, kEnd
+
+                                dist = norm2(planes(k)%x(i, j, :) - planes(0)%xSeed(i, j, :))
+                                if (dist < nearWallDist .and. planes(k)%nearWall(i, j) == 0) then
+                                    planes(k)%nearWall(i, j) = 1
+
+                                    ! If we made it all the way to the other side
+                                    ! of the block, copy over the seed for the next exchange.
+                                    if (k == kEnd - 1) then
+                                        planes(k)%xSeed(i, j, :) = planes(0)%xSeed(i, j, :)
+                                        nAtBoundaryLocal = nAtBoundaryLocal + 1
+                                    end if
+                                end if
+                            end do
+                        end if
                     end do
-                 end if
-              end do
-           end do
-           deallocate(planes)
+                end do
+                deallocate (planes)
+            end do
         end do
-     end do
 
-     ! Determine if any cells made it to the other side of a face. If so, we have to keep going:
-     call mpi_allreduce(nAtBoundaryLocal, nAtBoundary, 1, adflow_integer, MPI_SUM, &
-          adflow_comm_world, ierr)
-     call ECHK(ierr, __FILE__, __LINE__)
+        ! Determine if any cells made it to the other side of a face. If so, we have to keep going:
+        call mpi_allreduce(nAtBoundaryLocal, nAtBoundary, 1, adflow_integer, MPI_SUM, &
+                           adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-     loopIter = loopIter + 1
+        loopIter = loopIter + 1
 
-  end do parallelSyncLoop
+    end do parallelSyncLoop
 
-  ! Deallocate X and XSeed since they are no longer needed. We
-  ! have to hold onto nearWall a little longer since we need to use it
-  ! in initializeOBlock. It will deallocate it.
-  do nn=1, nDom
-    deallocate(flowDoms(nn, level, sps)%XSeed)
-  end do
+    ! Deallocate X and XSeed since they are no longer needed. We
+    ! have to hold onto nearWall a little longer since we need to use it
+    ! in initializeOBlock. It will deallocate it.
+    do nn = 1, nDom
+        deallocate (flowDoms(nn, level, sps)%XSeed)
+    end do
 
 contains
-  subroutine setBoundaryPointers(mm, faceID, fullFaces)
-    implicit none
+    subroutine setBoundaryPointers(mm, faceID, fullFaces)
+        implicit none
+
+        integer(kind=intType), intent(in) :: mm, faceID
+        logical, intent(in) :: fullFaces
+
+        if (.not. fullFaces) then
+            iStart = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+            jStart = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+        else
+            iStart = 1; jStart = 1
+            select case (mm)
+            case (iMin, iMax)
+                iEnd = jl; jEnd = kl
+            case (jMin, jMax)
+                iEnd = il; jEnd = kl
+            case (kMin, kMax)
+                iEnd = il; jEnd = jl
+            end select
+        end if
 
-    integer(kind=intType), intent(in) :: mm, faceID
-    logical, intent(in) :: fullFaces
-
-    if (.not. fullFaces) then
-       iStart=BCData(mm)%inBeg; iEnd=BCData(mm)%inEnd
-       jStart=BCData(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-    else
-       iStart=1; jStart=1
-       select case (mm)
-       case (iMin, iMax)
-          iEnd=jl; jEnd=kl
-       case (jMin, jMax)
-          iEnd=il; jEnd=kl
-       case(kMin, kMax)
-          iEnd=il; jEnd=jl
-       end select
-    end if
-
-    select case (faceID)
-    case (iMin)
-       kStart=1; kend=il
-       allocate(planes(0:il))
-       planes(0)%xSeed => xSeed(0, 1:jl, 1:kl, :)
-       do i=1, il
-          planes(i)%x => x(i, 1:jl, 1:kl, :)
-          planes(i)%xSeed => xSeed(i, 1:jl, 1:kl, :)
-          planes(i)%nearWall => nearWall(i, :, :)
-       end do
-    case (iMax)
-       kStart=1; kend=il
-       allocate(planes(0:il))
-       planes(0)%xSeed => xSeed(ie, 1:jl, 1:kl, :)
-       do i=1, il
-          planes(i)%x => x( il-i+1, 1:jl, 1:kl, :)
-          planes(i)%xSeed => xSeed(il-i+1, 1:jl, 1:kl, :)
-          planes(i)%nearWall => nearWall(il-i+1, :, :)
-       end do
-    case (jMin)
-       kStart=1; kend=jl
-       allocate(planes(0:jl))
-       planes(0)%xSeed => xSeed(1:il, 0, 1:kl, :)
-       do j=1, jl
-          planes(j)%x => x(1:il, j, 1:kl, :)
-          planes(j)%xSeed => xSeed(1:il, j, 1:kl, :)
-          planes(j)%nearWall => nearWall(:, j, :)
-       end do
-    case (jMax)
-       kStart=1; kend=jl
-       allocate(planes(0:jl))
-       planes(0)%xSeed => xSeed(1:il, je, 1:kl, :)
-       do j=1, jl
-          planes(j)%x => x(1:il, jl-j+1, 1:kl, :)
-          planes(j)%xSeed => xSeed(1:il, jl-j+1, 1:kl, :)
-          planes(j)%nearWall => nearWall(:, jl-j+1, :)
-       end do
-    case (kMin)
-       kStart=1; kend=kl
-       allocate(planes(0:kl))
-       planes(0)%xSeed => xSeed(1:il, 1:jl, 0, :)
-       do k=1, kl
-          planes(k)%x => x(1:il, 1:jl, k, :)
-          planes(k)%xSeed => xSeed(1:il, 1:jl, k, :)
-          planes(k)%nearWall => nearWall(:, :, k)
-       end do
-    case (kMax)
-       kStart=1; kend=kl
-       allocate(planes(0:kl))
-       planes(0)%xSeed => xSeed(1:il, 1:jl, ke, :)
-       do k=1, kl
-          planes(k)%x => x(1:il, 1:jl, kl-k+1, :)
-          planes(k)%xSeed => xSeed(1:il, 1:jl, kl-k+1, :)
-          planes(k)%nearWall => nearWall(:, :, kl-k+1)
-       end do
-    end select
-  end subroutine setBoundaryPointers
+        select case (faceID)
+        case (iMin)
+            kStart = 1; kend = il
+            allocate (planes(0:il))
+            planes(0)%xSeed => xSeed(0, 1:jl, 1:kl, :)
+            do i = 1, il
+                planes(i)%x => x(i, 1:jl, 1:kl, :)
+                planes(i)%xSeed => xSeed(i, 1:jl, 1:kl, :)
+                planes(i)%nearWall => nearWall(i, :, :)
+            end do
+        case (iMax)
+            kStart = 1; kend = il
+            allocate (planes(0:il))
+            planes(0)%xSeed => xSeed(ie, 1:jl, 1:kl, :)
+            do i = 1, il
+                planes(i)%x => x(il - i + 1, 1:jl, 1:kl, :)
+                planes(i)%xSeed => xSeed(il - i + 1, 1:jl, 1:kl, :)
+                planes(i)%nearWall => nearWall(il - i + 1, :, :)
+            end do
+        case (jMin)
+            kStart = 1; kend = jl
+            allocate (planes(0:jl))
+            planes(0)%xSeed => xSeed(1:il, 0, 1:kl, :)
+            do j = 1, jl
+                planes(j)%x => x(1:il, j, 1:kl, :)
+                planes(j)%xSeed => xSeed(1:il, j, 1:kl, :)
+                planes(j)%nearWall => nearWall(:, j, :)
+            end do
+        case (jMax)
+            kStart = 1; kend = jl
+            allocate (planes(0:jl))
+            planes(0)%xSeed => xSeed(1:il, je, 1:kl, :)
+            do j = 1, jl
+                planes(j)%x => x(1:il, jl - j + 1, 1:kl, :)
+                planes(j)%xSeed => xSeed(1:il, jl - j + 1, 1:kl, :)
+                planes(j)%nearWall => nearWall(:, jl - j + 1, :)
+            end do
+        case (kMin)
+            kStart = 1; kend = kl
+            allocate (planes(0:kl))
+            planes(0)%xSeed => xSeed(1:il, 1:jl, 0, :)
+            do k = 1, kl
+                planes(k)%x => x(1:il, 1:jl, k, :)
+                planes(k)%xSeed => xSeed(1:il, 1:jl, k, :)
+                planes(k)%nearWall => nearWall(:, :, k)
+            end do
+        case (kMax)
+            kStart = 1; kend = kl
+            allocate (planes(0:kl))
+            planes(0)%xSeed => xSeed(1:il, 1:jl, ke, :)
+            do k = 1, kl
+                planes(k)%x => x(1:il, 1:jl, kl - k + 1, :)
+                planes(k)%xSeed => xSeed(1:il, 1:jl, kl - k + 1, :)
+                planes(k)%nearWall => nearWall(:, :, kl - k + 1)
+            end do
+        end select
+    end subroutine setBoundaryPointers
 end subroutine flagNearWallCells
 
-
diff --git a/src/overset/floodInteriorCells.F90 b/src/overset/floodInteriorCells.F90
index 743a458bc..faa896e80 100644
--- a/src/overset/floodInteriorCells.F90
+++ b/src/overset/floodInteriorCells.F90
@@ -1,267 +1,267 @@
 subroutine floodInteriorCells(level, sps)
-  use constants
-  use communication, only : commPatternCell_1st, internalCell_1st, adflow_comm_world, myid
-  use blockPointers, only : il, jl, kl, nx, ny, nz, ie, je, ke, ib, jb, kb,  &
-       nDom, flowDoms, iBlank, status
-  use utils, only : setPointers, EChk
-  use haloExchange, only : whalo1to1intGeneric
-  use oversetUtilities, only :  isCompute, isWallDonor, isFloodSeed, isHole, setIsFlooded, &
-       setIsHole, setIsCompute, setIsFloodSeed, setIsHole, isReceiver, setIsReceiver, &
-       setIsDonor
-  use inputOverset, only : nFloodIter
-
-  implicit none
-
-  ! Input/Output
-  integer(kind=intType), intent(in) :: level, sps
-
-  ! Working
-  integer(kind=intType) :: nn, i, j, k, nSeed, iSeed, ierr
-  integer(kind=intType), dimension(:, :), allocatable :: stack, floodSeeds
-  integer(kind=intType) :: nChanged, nChangedLocal, stackPointer, loopIter
-  logical :: tmpSave
-  integer(kind=intType), dimension(:, :, :), pointer :: changed
-
-  ! Allocate pointer space for the integer flag communication
-  do nn=1, nDom
-     call setPointers(nn, level, sps)
-     ! Note that it has to start at 1 since this is normally a pointer
-     ! so the exchange routine expects the ordering to start at 1.
-     allocate(flowDoms(nn, level, sps)%intCommVars(1)%var(1:ib+1, 1:jb+1, 1:kb+1))
-     flowDoms(nn, level, sps)%intCommVars(1)%var = 0
-  end do
-
-  ! Keep track of the total number of loops
-  loopIter = 1
-
-  parallelSyncLoop: do
-
-     ! Keep track of the total number of fringes we've modified
-     nChangedLocal = 0
-
-     do nn=1,nDom
+    use constants
+    use communication, only: commPatternCell_1st, internalCell_1st, adflow_comm_world, myid
+    use blockPointers, only: il, jl, kl, nx, ny, nz, ie, je, ke, ib, jb, kb, &
+                             nDom, flowDoms, iBlank, status
+    use utils, only: setPointers, EChk
+    use haloExchange, only: whalo1to1intGeneric
+    use oversetUtilities, only: isCompute, isWallDonor, isFloodSeed, isHole, setIsFlooded, &
+                                setIsHole, setIsCompute, setIsFloodSeed, setIsHole, isReceiver, setIsReceiver, &
+                                setIsDonor
+    use inputOverset, only: nFloodIter
+
+    implicit none
+
+    ! Input/Output
+    integer(kind=intType), intent(in) :: level, sps
+
+    ! Working
+    integer(kind=intType) :: nn, i, j, k, nSeed, iSeed, ierr
+    integer(kind=intType), dimension(:, :), allocatable :: stack, floodSeeds
+    integer(kind=intType) :: nChanged, nChangedLocal, stackPointer, loopIter
+    logical :: tmpSave
+    integer(kind=intType), dimension(:, :, :), pointer :: changed
+
+    ! Allocate pointer space for the integer flag communication
+    do nn = 1, nDom
         call setPointers(nn, level, sps)
-        changed => flowDoms(nn, level, sps)%intCommVars(1)%var
-
-        ! Allocate space for our queue (stack). It needs to be 6*nx*ny*nz + 1:
-        ! 6 for each of the 6 coordinate directions plus our extra
-        ! seed. It should never come close to this unless the entire
-        ! block will be blanked.
-
-        allocate(stack(3, nx*ny*nz*6 + 1))
-
-        ! Also allocate space for our flood seeds. Make it big enough to
-        ! include the first level halos.
-        allocate(floodSeeds(3, 6*ie*je*je))
-
-        ! These are the seeds we have directly. We will only use these on the first iteration:
-        nSeed = 0
-
-        if (loopIter == 1) then
-
-           ! Make the -3 and -2 cells, those inside the body,
-           ! "compute" cells. This allows the flooding algorithm to
-           ! flood them as if they were comptue cells on subsequent
-           ! iterations the same as the first iteration.
-           do k=2, kl
-              do j=2, jl
-                 do i=2, il
-                    if (iblank(i, j, k) == -3 .or. iblank(i, j, k) == -2) then
-                       call setIsCompute(status(i, j, k), .True.)
-                       call setIsReceiver(status(i, j, k), .False.)
+        ! Note that it has to start at 1 since this is normally a pointer
+        ! so the exchange routine expects the ordering to start at 1.
+        allocate (flowDoms(nn, level, sps)%intCommVars(1)%var(1:ib + 1, 1:jb + 1, 1:kb + 1))
+        flowDoms(nn, level, sps)%intCommVars(1)%var = 0
+    end do
+
+    ! Keep track of the total number of loops
+    loopIter = 1
+
+    parallelSyncLoop: do
+
+        ! Keep track of the total number of fringes we've modified
+        nChangedLocal = 0
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            changed => flowDoms(nn, level, sps)%intCommVars(1)%var
+
+            ! Allocate space for our queue (stack). It needs to be 6*nx*ny*nz + 1:
+            ! 6 for each of the 6 coordinate directions plus our extra
+            ! seed. It should never come close to this unless the entire
+            ! block will be blanked.
+
+            allocate (stack(3, nx * ny * nz * 6 + 1))
+
+            ! Also allocate space for our flood seeds. Make it big enough to
+            ! include the first level halos.
+            allocate (floodSeeds(3, 6 * ie * je * je))
+
+            ! These are the seeds we have directly. We will only use these on the first iteration:
+            nSeed = 0
+
+            if (loopIter == 1) then
+
+                ! Make the -3 and -2 cells, those inside the body,
+                ! "compute" cells. This allows the flooding algorithm to
+                ! flood them as if they were comptue cells on subsequent
+                ! iterations the same as the first iteration.
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            if (iblank(i, j, k) == -3 .or. iblank(i, j, k) == -2) then
+                                call setIsCompute(status(i, j, k), .True.)
+                                call setIsReceiver(status(i, j, k), .False.)
+                            end if
+                        end do
+                    end do
+                end do
+
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            if (isWallDonor(status(i, j, k))) then
+                                call addSeed(i, j, k)
+                            end if
+                        end do
+                    end do
+                end do
+            else
+                ! On the second and subsequent passes, check each 1st
+                ! non-corner halos in the 6 faces to see if we received
+                ! "changed" info from neighbour proc. This will allow us to
+                ! continue the flooding on this processor/block. Note that
+                ! even in a single processor case, the halo exchange in
+                ! necessary to communicate between two local blocks
+
+                ! iMin/iMax
+                do k = 2, kl
+                    do j = 2, jl
+                        if (changed(1 + 1, j + 1, k + 1) == 1) then
+                            call addSeed(2, j, k)
+                        end if
+                        if (changed(ie + 1, j + 1, k + 1) == 1) then
+                            call addSeed(il, j, k)
+                        end if
+                    end do
+                end do
+
+                ! jMin/jMax
+                do k = 2, kl
+                    do i = 2, il
+                        if (changed(i + 1, 1 + 1, k + 1) == 1) then
+                            call addSeed(i, 2, k)
+                        end if
+                        if (changed(i + 1, je + 1, k + 1) == 1) then
+                            call addSeed(i, jl, k)
+                        end if
+                    end do
+                end do
+
+                ! kMin:
+                do j = 2, jl
+                    do i = 2, il
+                        if (changed(i + 1, j + 1, 1 + 1) == 1) then
+                            call addSeed(i, j, 2)
+                        end if
+                        if (changed(i + 1, j + 1, ke + 1) == 1) then
+                            call addSeed(i, j, kl)
+                        end if
+                    end do
+                end do
+            end if
+
+            ! Loop over our seeds we currently have
+            do iSeed = 1, nSeed
+
+                ! Put the particular seed in the first slot of the stack
+                stack(:, 1) = floodSeeds(:, iSeed)
+
+                ! Reset the stack pointer length back to just the one seed
+                ! we have
+                stackPointer = 1
+
+                ! flag the seed points --- only on first pass
+                if (loopIter == 1) then
+                    i = stack(1, stackPointer)
+                    j = stack(2, stackPointer)
+                    k = stack(3, stackPointer)
+                    call setIsFloodSeed(status(i, j, k), .True.)
+                    call setIsDonor(status(i, j, k), .False.)
+                    call setIsReceiver(status(i, j, k), .False.)
+                end if
+
+                ! Start the flooding (stacked based, not recursive)
+                do while (stackPointer > 0)
+
+                    ! 'Pop' the current point off the stack
+                    i = stack(1, stackPointer)
+                    j = stack(2, stackPointer)
+                    k = stack(3, stackPointer)
+                    stackPointer = stackPointer - 1
+
+                if (isCompute(status(i, j, k)) .and. .not. isReceiver(status(i, j, k)) .and. iblank(i, j, k) /= -4) then
+                        ! Flag the cell (using changed) as being changed
+                        changed(i + 1, j + 1, k + 1) = 1
+
+                        ! Keep track of the total number we've changed.  For
+                        ! reporting purposes...only count the ones that are
+                        ! on actual compute cells:
+                        if (onBlock(i, j, k)) then
+                            nChangedLocal = nChangedLocal + 1
+                        end if
+
+                        ! pure compute cell, convert to hole
+                        call setIsHole(status(i, j, k), .True.)
+                        call setIsFlooded(status(i, j, k), .True.)
+                        call setIsCompute(status(i, j, k), .False.)
+
+                        ! Now add the six nearest neighbours to the stack
+                        ! provided they are in the owned cell range:
+
+                        if (i - 1 >= 2) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i - 1, j, k/)
+                        end if
+
+                        if (i + 1 <= il) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i + 1, j, k/)
+                        end if
+
+                        if (j - 1 >= 2) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i, j - 1, k/)
+                        end if
+
+                        if (j + 1 <= jl) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i, j + 1, k/)
+                        end if
+
+                        if (k - 1 >= 2) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i, j, k - 1/)
+                        end if
+
+                        if (k + 1 <= kl) then
+                            stackPointer = stackPointer + 1
+                            stack(:, stackPointer) = (/i, j, k + 1/)
+                        end if
                     end if
-                 end do
-              end do
-           end do
-
-           do k=2, kl
-              do j=2, jl
-                 do i=2, il
-                    if (isWallDonor(status(i, j, k))) then
-                       call addSeed(i,j ,k)
-                    end if
-                 end do
-              end do
-           end do
-        else
-           ! On the second and subsequent passes, check each 1st
-           ! non-corner halos in the 6 faces to see if we received
-           ! "changed" info from neighbour proc. This will allow us to
-           ! continue the flooding on this processor/block. Note that
-           ! even in a single processor case, the halo exchange in
-           ! necessary to communicate between two local blocks
-
-           ! iMin/iMax
-           do k=2, kl
-              do j=2, jl
-                 if (changed(1+1, j+1, k+1) == 1) then
-                    call addSeed(2, j, k)
-                 end if
-                 if (changed(ie+1, j+1, k+1) == 1) then
-                    call addSeed(il, j, k)
-                 end if
-              end do
-           end do
-
-           ! jMin/jMax
-           do k=2, kl
-              do i=2, il
-                 if (changed(i+1, 1+1, k+1) == 1) then
-                    call addSeed(i, 2, k)
-                 end if
-                 if (changed(i+1, je+1, k+1) == 1) then
-                    call addSeed(i, jl, k)
-                 end if
-              end do
-           end do
-
-           ! kMin:
-           do j=2, jl
-              do i=2, il
-                 if (changed(i+1, j+1, 1+1) == 1) then
-                    call addSeed(i, j, 2)
-                 end if
-                 if (changed(i+1, j+1, ke+1) == 1) then
-                    call addSeed(i, j, kl)
-                 end if
-              end do
-           end do
-        end if
 
-        ! Loop over our seeds we currently have
-        do iSeed = 1, nSeed
-
-           ! Put the particular seed in the first slot of the stack
-           stack(:, 1) = floodSeeds(:, iSeed)
-
-           ! Reset the stack pointer length back to just the one seed
-           ! we have
-           stackPointer = 1
-
-           ! flag the seed points --- only on first pass
-           if (loopIter == 1) then
-              i = stack(1, stackPointer)
-              j = stack(2, stackPointer)
-              k = stack(3, stackPointer)
-              call setIsFloodSeed(status(i, j, k), .True.)
-              call setIsDonor(status(i, j, k), .False.)
-              call setIsReceiver(status(i,j,k), .False.)
-           end if
-
-           ! Start the flooding (stacked based, not recursive)
-           do while (stackPointer > 0 )
-
-              ! 'Pop' the current point off the stack
-              i = stack(1, stackPointer)
-              j = stack(2, stackPointer)
-              k = stack(3, stackPointer)
-              stackPointer = stackPointer - 1
-
-              if (isCompute(status(i, j, k)) .and. .not. isReceiver(status(i, j, k)) .and. iblank(i,j,k)/=-4) then
-                 ! Flag the cell (using changed) as being changed
-                 changed(i+1, j+1, k+1) = 1
-
-                 ! Keep track of the total number we've changed.  For
-                 ! reporting purposes...only count the ones that are
-                 ! on actual compute cells:
-                 if (onBlock(i, j, k)) then
-                    nChangedLocal = nChangedLocal + 1
-                 end if
-
-                 ! pure compute cell, convert to hole
-                 call setIsHole(status(i, j, k), .True.)
-                 call setIsFlooded(status(i, j, k), .True.)
-                 call setIsCompute(status(i, j, k), .False.)
-
-                 ! Now add the six nearest neighbours to the stack
-                 ! provided they are in the owned cell range:
-
-                 if (i-1 >= 2) then
-                    stackPointer = stackPointer + 1
-                    stack(:, stackPointer) = (/i-1, j  , k  /)
-                 end if
-
-                 if (i+1 <= il) then
-                    stackPointer = stackPointer + 1
-                    stack(:, stackPointer) = (/i+1, j  , k  /)
-                 end if
-
-                 if (j-1 >= 2) then
-                    stackPointer = stackPointer + 1
-                    stack(:, stackPointer) = (/i  , j-1, k  /)
-                 end if
-
-                 if (j+1 <= jl) then
-                    stackPointer = stackPointer + 1
-                    stack(:, stackPointer) = (/i  , j+1, k  /)
-                 end if
-
-                 if (k-1 >= 2) then
-                    stackPointer = stackPointer + 1
-                    stack(:, stackPointer) = (/i  , j  , k-1/)
-                 end if
-
-                 if (k+1 <= kl) then
-                    stackPointer = stackPointer + 1
-                    stack(:, stackPointer) = (/i  , j , k+1 /)
-                 end if
-              end if
-
-           end do
-        end do
+                end do
+            end do
 
-        deallocate(stack, floodSeeds)
-     end do
+            deallocate (stack, floodSeeds)
+        end do
 
-    ! Exchange "changed"
-     call wHalo1to1IntGeneric(1, level, sps, commPatternCell_1st, internalCell_1st)
+        ! Exchange "changed"
+        call wHalo1to1IntGeneric(1, level, sps, commPatternCell_1st, internalCell_1st)
 
-     ! Determine if cells got changd. If so do another loop.
-     call mpi_allreduce(nChangedLocal, nChanged, 1, adflow_integer, MPI_SUM, &
-          adflow_comm_world, ierr)
-     call ECHK(ierr, __FILE__, __LINE__)
+        ! Determine if cells got changd. If so do another loop.
+        call mpi_allreduce(nChangedLocal, nChanged, 1, adflow_integer, MPI_SUM, &
+                           adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
 
-     if (myid == 0) then
-        print *, 'Flood Iteration:', loopIter, 'Blanked ', nChanged, 'Interior Cells.'
-     end if
+        if (myid == 0) then
+            print *, 'Flood Iteration:', loopIter, 'Blanked ', nChanged, 'Interior Cells.'
+        end if
 
-     if ((nChanged == 0) .or. (loopIter .eq. nFloodIter)) then
-        exit parallelSyncLoop
-     end if
+        if ((nChanged == 0) .or. (loopIter .eq. nFloodIter)) then
+            exit parallelSyncLoop
+        end if
 
-     loopIter = loopIter + 1
+        loopIter = loopIter + 1
 
-  end do parallelSyncLoop
+    end do parallelSyncLoop
 
-  ! deallocate the temporary int space
-  do nn=1, nDom
-     call setPointers(nn, level, sps)
-     deallocate(flowDoms(nn, level, sps)%intCommVars(1)%var)
-  end do
+    ! deallocate the temporary int space
+    do nn = 1, nDom
+        call setPointers(nn, level, sps)
+        deallocate (flowDoms(nn, level, sps)%intCommVars(1)%var)
+    end do
 
 contains
-  ! Simple routine to make code easier to read above
-  subroutine addSeed(i, j, k)
+    ! Simple routine to make code easier to read above
+    subroutine addSeed(i, j, k)
 
-    implicit none
-    integer(kind=intType), intent(in) :: i, j, k
-    nSeed = nSeed + 1
-    floodSeeds(:, nSeed) = (/i, j, k/)
-  end subroutine addSeed
+        implicit none
+        integer(kind=intType), intent(in) :: i, j, k
+        nSeed = nSeed + 1
+        floodSeeds(:, nSeed) = (/i, j, k/)
+    end subroutine addSeed
 
-  function onBlock(i, j, k)
+    function onBlock(i, j, k)
 
-    implicit none
+        implicit none
 
-    integer(kind=intType), intent(in) :: i, j, k
-    logical :: onBlock
+        integer(kind=intType), intent(in) :: i, j, k
+        logical :: onBlock
 
-    if (i >= 2 .and. i <= il .and. j >= 2 .and. j<= jl .and. k >= 2 .and. k <= kl) then
-       onBlock = .True.
-    else
-       onBlock = .False.
-    end if
-  end function onBlock
+        if (i >= 2 .and. i <= il .and. j >= 2 .and. j <= jl .and. k >= 2 .and. k <= kl) then
+            onBlock = .True.
+        else
+            onBlock = .False.
+        end if
+    end function onBlock
 
 end subroutine floodInteriorCells
 
diff --git a/src/overset/fringeSearch.F90 b/src/overset/fringeSearch.F90
index 445105a5a..7495ff92c 100644
--- a/src/overset/fringeSearch.F90
+++ b/src/overset/fringeSearch.F90
@@ -1,173 +1,173 @@
 subroutine fringeSearch(oBlock, oFringe)
 
-  use constants
-  use block, only : fringeType
-  use oversetData, only : oversetBlock, oversetFringe, localWallFringes, &
-       tmpFringePtr, nLocalWallFringe
-  use inputOverset, onlY : overlapFactor, oversetProjTol
-  use adtLocalSearch, only :  mindistancetreesearchsinglepoint, &
-       containmenttreesearchsinglepoint
-  use adtData, only : adtBBoxTargetType
-  use adtUtils, only : stack
-  use utils, only : mynorm2
-  use oversetUtilities, only : fracToWeights2, addToFringeList, addToFringeBuffer, &
-       tic, toc, windIndex
-  implicit none
-
-  type(oversetBlock), intent(inout) :: oBlock
-  type(oversetFringe), intent(inout) :: oFringe
-
-  integer(kind=intType) :: idom, jdom
-
-  ! Working Varaibles
-  integer(kind=intType) :: nInterpol, elemID, nalloc, intInfo(3), intInfo2(3)
-  integer(kind=intType) :: i, ii, jj, kk, j, nn, myI, myJ, myK
-  integer(kind=intTYpe) :: iii, jjj, kkk, n, myind,  nx, ny, nz, myindex
-  logical :: invalid, failed
-  real(kind=realType) :: uu, vv, ww, err1, err2
-  real(kind=realType) :: uvw(5), uvw2(5), xx(4), pt(3), xcheck(3)
-  real(kind=realType), dimension(:, :), allocatable :: offset
-  real(kind=realType) :: oneMinusU, oneMinusV, oneMinusW, weight(8)
-  ! Variables we have to pass the ADT search routine
-  integer(kind=intType), dimension(:), pointer :: BB
-  type(adtBBoxTargetType), dimension(:), pointer :: BB2
-  integer(kind=intType), dimension(:), pointer :: frontLeaves
-  integer(kind=intType), dimension(:), pointer :: frontLeavesNew
-  type(fringeType) :: fringe
-  nInterpol = 1 ! we get the ADT to compute the interpolated volume for us.
-
-  ! Allocate the (pointer) memory that may be resized as necessary for
-  ! the singlePoint search routine.
-  allocate(BB(20), BB2(20), frontLeaves(25), frontLeavesNew(25), stack(100))
-
-  ! Number of fringes we have:
-  n = size(oFringe%x, 2)
-
-  ! Offset vector:
-  allocate(offset(3, n))
-  offset = zero
-  call tic(iSurfaceCorrection)
-  call surfaceCorrection(oBlock, oFringe, offset, n)
-  call toc(iSurfaceCorrection)
-
-  call tic(iDonorSearch)
-  ! Search the cells one at a time:
-  do i=1, n
-
-     ! Compute the potentailly offset point to search for.
-     xx(1:3) = oFringe%x(:, i) + offset(:, i)
-
-     call containmentTreeSearchSinglePoint(oBlock%ADT, xx, intInfo, uvw, &
-          oBlock%qualDonor, nInterpol, BB, frontLeaves, frontLeavesNew, failed)
-
-     if (intInfo(1) >= 0) then
-        call fracToWeights2(uvw(1:3), weight)
-        xcheck = zero
-        do j=1,8
-           xcheck = xcheck + weight(j)*oBlock%xADT(:, oBlock%hexaConn(j, intInfo(3)))
-        end do
-
-        if (mynorm2(xcheck - xx(1:3)) > oversetProjTol) then
-           failed = .True.
+    use constants
+    use block, only: fringeType
+    use oversetData, only: oversetBlock, oversetFringe, localWallFringes, &
+                           tmpFringePtr, nLocalWallFringe
+    use inputOverset, onlY: overlapFactor, oversetProjTol
+    use adtLocalSearch, only: mindistancetreesearchsinglepoint, &
+                              containmenttreesearchsinglepoint
+    use adtData, only: adtBBoxTargetType
+    use adtUtils, only: stack
+    use utils, only: mynorm2
+    use oversetUtilities, only: fracToWeights2, addToFringeList, addToFringeBuffer, &
+                                tic, toc, windIndex
+    implicit none
+
+    type(oversetBlock), intent(inout) :: oBlock
+    type(oversetFringe), intent(inout) :: oFringe
+
+    integer(kind=intType) :: idom, jdom
+
+    ! Working Varaibles
+    integer(kind=intType) :: nInterpol, elemID, nalloc, intInfo(3), intInfo2(3)
+    integer(kind=intType) :: i, ii, jj, kk, j, nn, myI, myJ, myK
+    integer(kind=intTYpe) :: iii, jjj, kkk, n, myind, nx, ny, nz, myindex
+    logical :: invalid, failed
+    real(kind=realType) :: uu, vv, ww, err1, err2
+    real(kind=realType) :: uvw(5), uvw2(5), xx(4), pt(3), xcheck(3)
+    real(kind=realType), dimension(:, :), allocatable :: offset
+    real(kind=realType) :: oneMinusU, oneMinusV, oneMinusW, weight(8)
+    ! Variables we have to pass the ADT search routine
+    integer(kind=intType), dimension(:), pointer :: BB
+    type(adtBBoxTargetType), dimension(:), pointer :: BB2
+    integer(kind=intType), dimension(:), pointer :: frontLeaves
+    integer(kind=intType), dimension(:), pointer :: frontLeavesNew
+    type(fringeType) :: fringe
+    nInterpol = 1 ! we get the ADT to compute the interpolated volume for us.
+
+    ! Allocate the (pointer) memory that may be resized as necessary for
+    ! the singlePoint search routine.
+    allocate (BB(20), BB2(20), frontLeaves(25), frontLeavesNew(25), stack(100))
+
+    ! Number of fringes we have:
+    n = size(oFringe%x, 2)
+
+    ! Offset vector:
+    allocate (offset(3, n))
+    offset = zero
+    call tic(iSurfaceCorrection)
+    call surfaceCorrection(oBlock, oFringe, offset, n)
+    call toc(iSurfaceCorrection)
+
+    call tic(iDonorSearch)
+    ! Search the cells one at a time:
+    do i = 1, n
+
+        ! Compute the potentailly offset point to search for.
+        xx(1:3) = oFringe%x(:, i) + offset(:, i)
+
+        call containmentTreeSearchSinglePoint(oBlock%ADT, xx, intInfo, uvw, &
+                                              oBlock%qualDonor, nInterpol, BB, frontLeaves, frontLeavesNew, failed)
+
+        if (intInfo(1) >= 0) then
+            call fracToWeights2(uvw(1:3), weight)
+            xcheck = zero
+            do j = 1, 8
+                xcheck = xcheck + weight(j) * oBlock%xADT(:, oBlock%hexaConn(j, intInfo(3)))
+            end do
+
+            if (mynorm2(xcheck - xx(1:3)) > oversetProjTol) then
+                failed = .True.
+            end if
         end if
-     end if
-
-     if (intInfo(1) >= 0 .and. failed) then
-        ! we "found" a point but it is garbage. Do the failsafe search
-        xx(4) = large
-        call minDistanceTreeSearchSinglePoint(oBlock%ADT, xx, intInfo, uvw, &
-             oBlock%qualDonor, nInterpol, BB2, frontLeaves, frontLeavesNew)
-
-        ! Check this one:
-        call fracToWeights2(uvw(1:3), weight)
-        xcheck = zero
-        do j=1,8
-           xcheck = xcheck + weight(j)*oBlock%xADT(:, oBlock%hexaConn(j, intInfo(3)))
-        end do
-
-        ! Since this is the last line of defence, relax the tolerance a bit
-        if (mynorm2(xcheck - xx(1:3)) > 100*oversetProjTol) then
-           ! This fringe has not found a donor
-           intInfo(1) = -1
-        else
-           ! This one has now passed.
-
-           ! Important! uvw(4) is the distance squared for this search
-           ! not
-           uvw(4) = uvw(5)
-        end if
-
-     end if
-
-     elemFound: if (intInfo(1) >= 0) then
-
-        ! Recompute the i,j,k indices on the donor
-        elemID = intInfo(3) - 1 ! Make it zero based for the modding.
-        ii = mod(elemID, oBlock%il) + 1
-        jj = mod(elemID/oBlock%il, oBlock%jl) + 1
-        kk = elemID/(oBlock%il*oBlock%jl) + 1
-
-        ! Now record the information onto the fringe
-        fringe%donorProc = oBlock%proc
-        fringe%donorBlock= oBlock%block
-        fringe%dIndex    = windIndex(ii, jj, kk, oBlock%il, oBlock%jl, oBlock%kl)
-        fringe%donorFrac = uvw(1:3)
-        fringe%quality   = uvw(4)
-
-        ! Also save the information about where it came from,
-        ! we need this to combine everything together at the end.
-        fringe%myBlock = oFringe%block
-
-        myI = mod((i-1), oFringe%nx) + 2
-        myJ = mod((i-1)/oFringe%nx, oFringe%ny) + 2
-        myK = (i-1)/(oFringe%nx*oFringe%ny) + 2
-        fringe%myIndex = windIndex(myI, myJ, myK, oFringe%il, oFringe%jl, oFringe%kl)
-
-        ! Store the donor in the big flat list if it isn't invalid
-        invalid = .False.
-        do kkk=0,1
-           do jjj=0,1
-              do iii=0,1
-                 if (oBlock%invalidDonor(ii+iii, jj+jjj, kk+kkk)  .ne. 0) then
-                    invalid = .True.
-                 end if
-              end do
-           end do
-        end do
-
-        if (.not. invalid) then
-           call addToFringeBuffer(oFringe%fringeIntBuffer, ofringe%fringeRealBuffer, &
-                oFringe%nDonor, fringe)
-        end if
-
-        ! Save the fringe to the wallList. Note that we have to do
-        ! this *after* the actual fringeList becuase we may modify the
-        ! dI, dJ, dK here.
-
-        if ((oFringe%isWall(i) > 0) .and. .not. (oBlock%nearWall(ii, jj, kk) == 1)) then
 
-           ! Here we have to recompute the i,j,k indices since we may
-           ! need to modify them based on the frac.
+        if (intInfo(1) >= 0 .and. failed) then
+            ! we "found" a point but it is garbage. Do the failsafe search
+            xx(4) = large
+            call minDistanceTreeSearchSinglePoint(oBlock%ADT, xx, intInfo, uvw, &
+                                                  oBlock%qualDonor, nInterpol, BB2, frontLeaves, frontLeavesNew)
+
+            ! Check this one:
+            call fracToWeights2(uvw(1:3), weight)
+            xcheck = zero
+            do j = 1, 8
+                xcheck = xcheck + weight(j) * oBlock%xADT(:, oBlock%hexaConn(j, intInfo(3)))
+            end do
+
+            ! Since this is the last line of defence, relax the tolerance a bit
+            if (mynorm2(xcheck - xx(1:3)) > 100 * oversetProjTol) then
+                ! This fringe has not found a donor
+                intInfo(1) = -1
+            else
+                ! This one has now passed.
+
+                ! Important! uvw(4) is the distance squared for this search
+                ! not
+                uvw(4) = uvw(5)
+            end if
 
-           if (uvw(1) >= half) then
-              ii = ii + 1
-           end if
-
-           if (uvw(2) >= half) then
-              jj =jj + 1
-           end if
-
-           if (uvw(3) >= half) then
-              kk =kk + 1
-           end if
-
-           ! Recompute the full index for the wall.
-           fringe%dIndex = windIndex(ii, jj, kk, oBlock%il, oBlock%jl, oBlock%kl)
-
-           call addToFringeList(localWallFringes, nLocalWallFringe, fringe)
         end if
-     end if elemFound
-  end do
-  deallocate(offset, BB, BB2, frontLeaves, frontLeavesNew, stack)
-  call toc(iDonorSearch)
+
+        elemFound: if (intInfo(1) >= 0) then
+
+            ! Recompute the i,j,k indices on the donor
+            elemID = intInfo(3) - 1 ! Make it zero based for the modding.
+            ii = mod(elemID, oBlock%il) + 1
+            jj = mod(elemID / oBlock%il, oBlock%jl) + 1
+            kk = elemID / (oBlock%il * oBlock%jl) + 1
+
+            ! Now record the information onto the fringe
+            fringe%donorProc = oBlock%proc
+            fringe%donorBlock = oBlock%block
+            fringe%dIndex = windIndex(ii, jj, kk, oBlock%il, oBlock%jl, oBlock%kl)
+            fringe%donorFrac = uvw(1:3)
+            fringe%quality = uvw(4)
+
+            ! Also save the information about where it came from,
+            ! we need this to combine everything together at the end.
+            fringe%myBlock = oFringe%block
+
+            myI = mod((i - 1), oFringe%nx) + 2
+            myJ = mod((i - 1) / oFringe%nx, oFringe%ny) + 2
+            myK = (i - 1) / (oFringe%nx * oFringe%ny) + 2
+            fringe%myIndex = windIndex(myI, myJ, myK, oFringe%il, oFringe%jl, oFringe%kl)
+
+            ! Store the donor in the big flat list if it isn't invalid
+            invalid = .False.
+            do kkk = 0, 1
+                do jjj = 0, 1
+                    do iii = 0, 1
+                        if (oBlock%invalidDonor(ii + iii, jj + jjj, kk + kkk) .ne. 0) then
+                            invalid = .True.
+                        end if
+                    end do
+                end do
+            end do
+
+            if (.not. invalid) then
+                call addToFringeBuffer(oFringe%fringeIntBuffer, ofringe%fringeRealBuffer, &
+                                       oFringe%nDonor, fringe)
+            end if
+
+            ! Save the fringe to the wallList. Note that we have to do
+            ! this *after* the actual fringeList becuase we may modify the
+            ! dI, dJ, dK here.
+
+            if ((oFringe%isWall(i) > 0) .and. .not. (oBlock%nearWall(ii, jj, kk) == 1)) then
+
+                ! Here we have to recompute the i,j,k indices since we may
+                ! need to modify them based on the frac.
+
+                if (uvw(1) >= half) then
+                    ii = ii + 1
+                end if
+
+                if (uvw(2) >= half) then
+                    jj = jj + 1
+                end if
+
+                if (uvw(3) >= half) then
+                    kk = kk + 1
+                end if
+
+                ! Recompute the full index for the wall.
+                fringe%dIndex = windIndex(ii, jj, kk, oBlock%il, oBlock%jl, oBlock%kl)
+
+                call addToFringeList(localWallFringes, nLocalWallFringe, fringe)
+            end if
+        end if elemFound
+    end do
+    deallocate (offset, BB, BB2, frontLeaves, frontLeavesNew, stack)
+    call toc(iDonorSearch)
 end subroutine fringeSearch
diff --git a/src/overset/makeBoundaryStrings.F90 b/src/overset/makeBoundaryStrings.F90
index 9bd8bf494..a978fc86a 100644
--- a/src/overset/makeBoundaryStrings.F90
+++ b/src/overset/makeBoundaryStrings.F90
@@ -1,614 +1,606 @@
 module gapBoundaries
 contains
-  subroutine makeGapBoundaryStrings(zipperFamList, level, sps, master)
-
-    use constants
-    use adtBuild, only : buildSerialQuad
-    use blockPointers, only : x, globalCell, globalNOde, BCData, nBocos, &
-         il, jl, kl, nDom, rightHanded, BCFaceID, BCType
-    use communication, only : adflow_comm_world, myid, nProc
-    use oversetData, only : oversetString, oversetWall, nClusters, clusters, cumDomProc
-    use stringOps
-    use utils, only : setPointers, EChk, myNorm2, cross_prod
-    use oversetPackingRoutines, only : getWallSize
-    use sorting, only : famInList
-    implicit none
-
-    ! Input Params
-    integer(kind=intType), intent(in), dimension(:) :: zipperFamLIst
-    integer(kind=intType), intent(in) :: level, sps
-
-    ! Working
-    integer(kind=intType) :: i, j, k, nn, mm, ii, jj, kk, c, e,  idx
-    integer(kind=intType) :: i1, i2, j1, j2, iBeg, iEnd, jBeg, jEnd
-    integer(kind=intType) :: i3, i4, j3, j4
-    integer(kind=intType) :: iStart, iSize, ierr, iProc, firstElem, curElem
-    integer(kind=intType) :: below, above, left, right, nNodes, nElems
-    integer(kind=intType) :: patchNodeCounter, nZipped, gc
-    integer(kind=intType), dimension(:), allocatable :: nElemsProc, nNodesProc
-    integer(kind=intType), dimension(:, :), pointer :: gcp
-    real(kind=realType), dimension(:, :, :), pointer :: xx
-    real(kind=realType), dimension(3) :: s1, s2, s3, s4, v1, v2, v3, v4, x0
-    real(kind=realType) ::  fact, timeA, minNorm
-
-    real(kind=realType), dimension(:, :, :), allocatable :: patchNormals
-    real(kind=realType), dimension(:, :), allocatable :: patchH
-    integer(kind=intType), dimension(:), allocatable :: epc, surfaceSeeds, inverse
-    logical,  dimension(:), allocatable :: badString
-    type(oversetString), dimension(:), allocatable, target :: localStrings
-    type(oversetString), dimension(:), allocatable, target :: globalStrings
-    type(oversetString) :: master, pocketMaster
-    type(oversetString), pointer :: stringsLL, str
-    type(oversetString), dimension(:), allocatable, target :: strings
-    integer(kind=intType) :: nFullStrings, nUnique, famID
-    logical :: regularOrdering
-    integer mpiStatus(MPI_STATUS_SIZE)
-
-    ! Wall search related
-    integer(kind=intType) :: ncells
-    type(oversetWall), dimension(:), allocatable, target :: walls
-    type(oversetWall),  target :: fullWall
-    character(80) :: fileName
-
-    ! Set small number to avoid division by zero when computing normal vectors
-    minNorm = 1.0e-14
-
-    ! Loop over the wall faces counting up the edges that stradle a
-    ! compute cell and a blanked (or interpolated) cell.
-
-    allocate(epc(nClusters)) ! epc = elementsPerCluster
-    epc = 0
-
-    ! Get a (very) large overestimate of the total number of edges in a
-    ! cluster: as twice the  number of nodes.
-    domainLoop: do nn=1, nDom
-       call setPointers(nn, level, sps)
-       call getWallSize(zipperFamList, nNodes, nElems, .False.)
-       c = clusters(cumDomProc(myid) + nn)
-       epc(c) = epc(c) + 2*nNodes
-    end do domainLoop
-
-
-    ! Allocate the space we need in the local strings.
-    allocate(localStrings(nClusters))
-    do c=1, nClusters
-       call nullifyString(localStrings(c))
-    end do
-
-    do c=1, nClusters
-       allocate(&
-            localStrings(c)%conn(2, epc(c)), localStrings(c)%nodeData(10, 2*epc(c)), &
-            localStrings(c)%intNodeData(3, 2*epc(c)))
-       localStrings(c)%nodeData = zero
-       localStrings(c)%nNodes = 0
-       localStrings(c)%nElems = 0
-
-       ! Assign string pointers immediately after allocation
-       call setStringPointers(localStrings(c))
-
-    end do
-    deallocate(epc)
-    ! And now loop back through the walls and add in the
-    ! elems/nodes/normals/indices for each edge.
-
-    ! Reset the elems per cluster. We will count up the actual number
-    ! now.
-
-    patchNodeCounter = 0
-    domainLoop2: do nn=1, nDom
-       call setPointers(nn, level, sps)
-       ! The current cluster is 'c'
-       c = clusters(cumDomProc(myid) + nn)
-
-       bocoLoop: do mm=1, nBocos
-          famID = BCData(mm)%famID
-          if (famInList(famID, zipperFamList)) then
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                xx => x(1, :, :, :)
-                gcp => globalCell(2, :, :)
-                fact = one
-                regularOrdering = .True.
-             case (iMax)
-                xx => x(il, :, :, :)
-                gcp => globalCell(il, :, :)
-                fact = -one
-                regularOrdering = .False.
-             case (jMin)
-                xx => x(:, 1, :, :)
-                gcp => globalCell(:, 2, :)
-                fact = -one
-                regularOrdering = .False.
-             case (jMax)
-                xx => x(:, jl, :, :)
-                gcp => globalCell(:, jl, :)
-                fact = one
-                regularOrdering = .True.
-             case (kMin)
-                xx => x(:, :, 1, :)
-                gcp => globalCell(:, :, 2)
-                fact = one
-                regularOrdering = .True.
-             case (kMax)
-                xx => x(:, :, kl, :)
-                gcp => globalCell(:, :, kl)
-                fact = -one
-                regularOrdering = .False.
-             end select
-
-             ! Need to reverse once more for a left-handed block
-             if (.not. rightHanded) then
-                fact = -fact
-                regularOrdering = .not. (regularOrdering)
-             end if
-
-             ! Before we go through and find the actual elems,
-             ! precompute the patch numbering and node-based averaged
-             ! unit normals
-             jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
-
-             allocate(patchNormals(3, iBeg:iEnd, jBeg:jEnd), &
-                  patchH(iBeg:iEnd, jBeg:jEnd))
-
-             do j=jBeg, jEnd
-                do i=iBeg, iEnd
-                   patchNodeCounter = patchNodeCounter + 1
-                   x0 = xx(i+1, j+1, :)
-
-                   ! Normalized normals for each surrounding face.
-                   v1 = xx(i+2, j+1, :) - x0
-                   v2 = xx(i+1, j+2, :) - x0
-                   v3 = xx(i  , j+1, :) - x0
-                   v4 = xx(i+1, j  , :) - x0
-
-                   call cross_prod(v1, v2, s1)
-                   call cross_prod(v2, v3, s2)
-                   call cross_prod(v3, v4, s3)
-                   call cross_prod(v4, v1, s4)
-
-                   ! When we have an 0-grid node, two of the v vectors will be the same.
-                   ! Therefore, one of the s vectors will be zero. So we define minNorm
-                   ! to avoid a division by zero. This will not affect the averaged normal
-                   ! vector, since we will normalize it anyway in the end.
-
-                   s1 = s1/max(minNorm,mynorm2(s1))
-                   s2 = s2/max(minNorm,mynorm2(s2))
-                   s3 = s3/max(minNorm,mynorm2(s3))
-                   s4 = s4/max(minNorm,mynorm2(s4))
-
-                   ! Average and do final normalization including
-                   ! correcting for inward normals.
-                   s1 = fourth*(s1 + s2 + s3 + s4)
-                   patchNormals(:, i, j) = s1/mynorm2(s1)*fact
-
-                   ! Get the maximum edge length for this node. Use the
-                   ! 4 diagonal nodes:
-                   v1 = xx(i+2, j+2, :) - x0
-                   v2 = xx(i  , j+2, :) - x0
-                   v3 = xx(i  , j  , :) - x0
-                   v4 = xx(i+2, j  , :) - x0
-
-                   patchH(i, j) = max(mynorm2(v1), mynorm2(v2), mynorm2(v3), mynorm2(v4))
-
-                end do
-             end do
-
-             ! ------------------
-             ! Check the i-edges
-             ! ------------------
-             do j=jBeg, jEnd       ! <------- Node loop
-                do i=iBeg+1, iEnd  ! <------- Face Loop
-                   if (gcp(i+1, j+1) >= 0 .and. gcp(i+1, j+2) >= 0) then
-                      below = max(BCData(mm)%iBlank(i, j), 0)
-                      above = max(BCData(mm)%iBlank(i, j+1), 0)
-
-                      if ((below == 0 .and. above == 1) .or. (below == 1 .and. above == 0)) then
-                         localStrings(c)%nNodes = localStrings(c)%nNodes + 2
-                         localStrings(c)%nElems = localStrings(c)%nElems + 1
-                         e = localStrings(c)%nElems
-
-                         ! Make sure the real cell is on the LEFT
-                         ! of the directed edge
-
-                         if (below == 0) then
-                            i1 = i-1; j1 = j
-                            i2 = i  ; j2 = j
-
-                            i3 = i1; j3 = j + 1
-                            i4 = i2; j4 = j + 1
-                         else
-                            i1 = i  ; j1 = j
-                            i2 = i-1; j2 = j
-
-                            i3 = i1; j3 = j - 1
-                            i4 = i2; j4 = j - 1
-                         end if
-
-                         ! Don't forget pointer offset for xx
-                         if (regularOrdering) then
-                            localStrings(c)%nodeData(1:3, 2*e-1) = xx(i1+1, j1+1, :)
-                            localStrings(c)%nodeData(1:3, 2*e  ) = xx(i2+1, j2+1, :)
-
-
-                            ! Global index of node on reduced global surface.
-                            localStrings(c)%intNodeData(1, 2*e-1) = BCData(mm)%surfIndex(i1, j1)
-                            localStrings(c)%intNodeData(1, 2*e  ) = BCData(mm)%surfIndex(i2, j2)
-                         else
-                            localStrings(c)%nodeData(1:3, 2*e  ) = xx(i1+1, j1+1, :)
-                            localStrings(c)%nodeData(1:3, 2*e-1) = xx(i2+1, j2+1, :)
-
-                            localStrings(c)%intNodeData(1, 2*e  )= BCData(mm)%surfIndex(i1, j1)
-                            localStrings(c)%intNodeData(1, 2*e-1) = BCData(mm)%surfIndex(i2, j2)
-
-                         end if
-                         v1 = xx(i1+1, j1+1, :) - xx(i3+1, j3+1, :)
-                         v1 = v1 / mynorm2(v1)
-
-                         v2 = xx(i2+1, j2+1, :) - xx(i4+1, j4+1, :)
-                         v2 = v2 / mynorm2(v2)
-
-                         ! Perpendicular vector
-                         localStrings(c)%nodeData(7:9, 2*e-1) = v1
-                         localStrings(c)%nodeData(7:9, 2*e  ) = v2
-
-                         ! Averaged node normal
-                         localStrings(c)%nodeData(4:6, 2*e-1) = patchNormals(:, i1, j1)
-                         localStrings(c)%nodeData(4:6, 2*e  ) = patchNormals(:, i2, j2)
-
-                         ! Surface deviation estimation
-                         localStrings(c)%nodeData(10, 2*e-1) = patchH(i1, j1)
-                         localStrings(c)%nodeData(10, 2*e  ) = patchH(i2, j2)
-
-
-                         ! Cluster of the node
-                         localStrings(c)%intNodeData(2, 2*e-1) = c
-                         localStrings(c)%intNodeData(2, 2*e  ) = c
-
-                         ! Family ID of node. Not implemented yet.
-                         localStrings(c)%intNodeData(3, 2*e-1) = famID
-                         localStrings(c)%intNodeData(3, 2*e  ) = famID
-
-                         ! Connectivity
-                         localStrings(c)%conn(:, e) = (/2*e-1, 2*e/)
-                      end if
-                   end if
+    subroutine makeGapBoundaryStrings(zipperFamList, level, sps, master)
+
+        use constants
+        use adtBuild, only: buildSerialQuad
+        use blockPointers, only: x, globalCell, globalNOde, BCData, nBocos, &
+                                 il, jl, kl, nDom, rightHanded, BCFaceID, BCType
+        use communication, only: adflow_comm_world, myid, nProc
+        use oversetData, only: oversetString, oversetWall, nClusters, clusters, cumDomProc
+        use stringOps
+        use utils, only: setPointers, EChk, myNorm2, cross_prod
+        use oversetPackingRoutines, only: getWallSize
+        use sorting, only: famInList
+        implicit none
+
+        ! Input Params
+        integer(kind=intType), intent(in), dimension(:) :: zipperFamLIst
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Working
+        integer(kind=intType) :: i, j, k, nn, mm, ii, jj, kk, c, e, idx
+        integer(kind=intType) :: i1, i2, j1, j2, iBeg, iEnd, jBeg, jEnd
+        integer(kind=intType) :: i3, i4, j3, j4
+        integer(kind=intType) :: iStart, iSize, ierr, iProc, firstElem, curElem
+        integer(kind=intType) :: below, above, left, right, nNodes, nElems
+        integer(kind=intType) :: patchNodeCounter, nZipped, gc
+        integer(kind=intType), dimension(:), allocatable :: nElemsProc, nNodesProc
+        integer(kind=intType), dimension(:, :), pointer :: gcp
+        real(kind=realType), dimension(:, :, :), pointer :: xx
+        real(kind=realType), dimension(3) :: s1, s2, s3, s4, v1, v2, v3, v4, x0
+        real(kind=realType) :: fact, timeA, minNorm
+
+        real(kind=realType), dimension(:, :, :), allocatable :: patchNormals
+        real(kind=realType), dimension(:, :), allocatable :: patchH
+        integer(kind=intType), dimension(:), allocatable :: epc, surfaceSeeds, inverse
+        logical, dimension(:), allocatable :: badString
+        type(oversetString), dimension(:), allocatable, target :: localStrings
+        type(oversetString), dimension(:), allocatable, target :: globalStrings
+        type(oversetString) :: master, pocketMaster
+        type(oversetString), pointer :: stringsLL, str
+        type(oversetString), dimension(:), allocatable, target :: strings
+        integer(kind=intType) :: nFullStrings, nUnique, famID
+        logical :: regularOrdering
+        integer mpiStatus(MPI_STATUS_SIZE)
+
+        ! Wall search related
+        integer(kind=intType) :: ncells
+        type(oversetWall), dimension(:), allocatable, target :: walls
+        type(oversetWall), target :: fullWall
+        character(80) :: fileName
+
+        ! Set small number to avoid division by zero when computing normal vectors
+        minNorm = 1.0e-14
+
+        ! Loop over the wall faces counting up the edges that stradle a
+        ! compute cell and a blanked (or interpolated) cell.
+
+        allocate (epc(nClusters)) ! epc = elementsPerCluster
+        epc = 0
+
+        ! Get a (very) large overestimate of the total number of edges in a
+        ! cluster: as twice the  number of nodes.
+        domainLoop: do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            call getWallSize(zipperFamList, nNodes, nElems, .False.)
+            c = clusters(cumDomProc(myid) + nn)
+            epc(c) = epc(c) + 2 * nNodes
+        end do domainLoop
+
+        ! Allocate the space we need in the local strings.
+        allocate (localStrings(nClusters))
+        do c = 1, nClusters
+            call nullifyString(localStrings(c))
+        end do
+
+        do c = 1, nClusters
+            allocate ( &
+                localStrings(c)%conn(2, epc(c)), localStrings(c)%nodeData(10, 2 * epc(c)), &
+                localStrings(c)%intNodeData(3, 2 * epc(c)))
+            localStrings(c)%nodeData = zero
+            localStrings(c)%nNodes = 0
+            localStrings(c)%nElems = 0
+
+            ! Assign string pointers immediately after allocation
+            call setStringPointers(localStrings(c))
+
+        end do
+        deallocate (epc)
+        ! And now loop back through the walls and add in the
+        ! elems/nodes/normals/indices for each edge.
+
+        ! Reset the elems per cluster. We will count up the actual number
+        ! now.
+
+        patchNodeCounter = 0
+        domainLoop2: do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            ! The current cluster is 'c'
+            c = clusters(cumDomProc(myid) + nn)
+
+            bocoLoop: do mm = 1, nBocos
+                famID = BCData(mm)%famID
+                if (famInList(famID, zipperFamList)) then
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        xx => x(1, :, :, :)
+                        gcp => globalCell(2, :, :)
+                        fact = one
+                        regularOrdering = .True.
+                    case (iMax)
+                        xx => x(il, :, :, :)
+                        gcp => globalCell(il, :, :)
+                        fact = -one
+                        regularOrdering = .False.
+                    case (jMin)
+                        xx => x(:, 1, :, :)
+                        gcp => globalCell(:, 2, :)
+                        fact = -one
+                        regularOrdering = .False.
+                    case (jMax)
+                        xx => x(:, jl, :, :)
+                        gcp => globalCell(:, jl, :)
+                        fact = one
+                        regularOrdering = .True.
+                    case (kMin)
+                        xx => x(:, :, 1, :)
+                        gcp => globalCell(:, :, 2)
+                        fact = one
+                        regularOrdering = .True.
+                    case (kMax)
+                        xx => x(:, :, kl, :)
+                        gcp => globalCell(:, :, kl)
+                        fact = -one
+                        regularOrdering = .False.
+                    end select
+
+                    ! Need to reverse once more for a left-handed block
+                    if (.not. rightHanded) then
+                        fact = -fact
+                        regularOrdering = .not. (regularOrdering)
+                    end if
+
+                    ! Before we go through and find the actual elems,
+                    ! precompute the patch numbering and node-based averaged
+                    ! unit normals
+                    jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                    allocate (patchNormals(3, iBeg:iEnd, jBeg:jEnd), &
+                              patchH(iBeg:iEnd, jBeg:jEnd))
+
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            patchNodeCounter = patchNodeCounter + 1
+                            x0 = xx(i + 1, j + 1, :)
+
+                            ! Normalized normals for each surrounding face.
+                            v1 = xx(i + 2, j + 1, :) - x0
+                            v2 = xx(i + 1, j + 2, :) - x0
+                            v3 = xx(i, j + 1, :) - x0
+                            v4 = xx(i + 1, j, :) - x0
+
+                            call cross_prod(v1, v2, s1)
+                            call cross_prod(v2, v3, s2)
+                            call cross_prod(v3, v4, s3)
+                            call cross_prod(v4, v1, s4)
+
+                            ! When we have an 0-grid node, two of the v vectors will be the same.
+                            ! Therefore, one of the s vectors will be zero. So we define minNorm
+                            ! to avoid a division by zero. This will not affect the averaged normal
+                            ! vector, since we will normalize it anyway in the end.
+
+                            s1 = s1 / max(minNorm, mynorm2(s1))
+                            s2 = s2 / max(minNorm, mynorm2(s2))
+                            s3 = s3 / max(minNorm, mynorm2(s3))
+                            s4 = s4 / max(minNorm, mynorm2(s4))
+
+                            ! Average and do final normalization including
+                            ! correcting for inward normals.
+                            s1 = fourth * (s1 + s2 + s3 + s4)
+                            patchNormals(:, i, j) = s1 / mynorm2(s1) * fact
+
+                            ! Get the maximum edge length for this node. Use the
+                            ! 4 diagonal nodes:
+                            v1 = xx(i + 2, j + 2, :) - x0
+                            v2 = xx(i, j + 2, :) - x0
+                            v3 = xx(i, j, :) - x0
+                            v4 = xx(i + 2, j, :) - x0
+
+                            patchH(i, j) = max(mynorm2(v1), mynorm2(v2), mynorm2(v3), mynorm2(v4))
+
+                        end do
+                    end do
+
+                    ! ------------------
+                    ! Check the i-edges
+                    ! ------------------
+                    do j = jBeg, jEnd       ! <------- Node loop
+                        do i = iBeg + 1, iEnd  ! <------- Face Loop
+                            if (gcp(i + 1, j + 1) >= 0 .and. gcp(i + 1, j + 2) >= 0) then
+                                below = max(BCData(mm)%iBlank(i, j), 0)
+                                above = max(BCData(mm)%iBlank(i, j + 1), 0)
+
+                                if ((below == 0 .and. above == 1) .or. (below == 1 .and. above == 0)) then
+                                    localStrings(c)%nNodes = localStrings(c)%nNodes + 2
+                                    localStrings(c)%nElems = localStrings(c)%nElems + 1
+                                    e = localStrings(c)%nElems
+
+                                    ! Make sure the real cell is on the LEFT
+                                    ! of the directed edge
+
+                                    if (below == 0) then
+                                        i1 = i - 1; j1 = j
+                                        i2 = i; j2 = j
+
+                                        i3 = i1; j3 = j + 1
+                                        i4 = i2; j4 = j + 1
+                                    else
+                                        i1 = i; j1 = j
+                                        i2 = i - 1; j2 = j
+
+                                        i3 = i1; j3 = j - 1
+                                        i4 = i2; j4 = j - 1
+                                    end if
+
+                                    ! Don't forget pointer offset for xx
+                                    if (regularOrdering) then
+                                        localStrings(c)%nodeData(1:3, 2 * e - 1) = xx(i1 + 1, j1 + 1, :)
+                                        localStrings(c)%nodeData(1:3, 2 * e) = xx(i2 + 1, j2 + 1, :)
+
+                                        ! Global index of node on reduced global surface.
+                                        localStrings(c)%intNodeData(1, 2 * e - 1) = BCData(mm)%surfIndex(i1, j1)
+                                        localStrings(c)%intNodeData(1, 2 * e) = BCData(mm)%surfIndex(i2, j2)
+                                    else
+                                        localStrings(c)%nodeData(1:3, 2 * e) = xx(i1 + 1, j1 + 1, :)
+                                        localStrings(c)%nodeData(1:3, 2 * e - 1) = xx(i2 + 1, j2 + 1, :)
+
+                                        localStrings(c)%intNodeData(1, 2 * e) = BCData(mm)%surfIndex(i1, j1)
+                                        localStrings(c)%intNodeData(1, 2 * e - 1) = BCData(mm)%surfIndex(i2, j2)
+
+                                    end if
+                                    v1 = xx(i1 + 1, j1 + 1, :) - xx(i3 + 1, j3 + 1, :)
+                                    v1 = v1 / mynorm2(v1)
+
+                                    v2 = xx(i2 + 1, j2 + 1, :) - xx(i4 + 1, j4 + 1, :)
+                                    v2 = v2 / mynorm2(v2)
+
+                                    ! Perpendicular vector
+                                    localStrings(c)%nodeData(7:9, 2 * e - 1) = v1
+                                    localStrings(c)%nodeData(7:9, 2 * e) = v2
+
+                                    ! Averaged node normal
+                                    localStrings(c)%nodeData(4:6, 2 * e - 1) = patchNormals(:, i1, j1)
+                                    localStrings(c)%nodeData(4:6, 2 * e) = patchNormals(:, i2, j2)
+
+                                    ! Surface deviation estimation
+                                    localStrings(c)%nodeData(10, 2 * e - 1) = patchH(i1, j1)
+                                    localStrings(c)%nodeData(10, 2 * e) = patchH(i2, j2)
+
+                                    ! Cluster of the node
+                                    localStrings(c)%intNodeData(2, 2 * e - 1) = c
+                                    localStrings(c)%intNodeData(2, 2 * e) = c
+
+                                    ! Family ID of node. Not implemented yet.
+                                    localStrings(c)%intNodeData(3, 2 * e - 1) = famID
+                                    localStrings(c)%intNodeData(3, 2 * e) = famID
+
+                                    ! Connectivity
+                                    localStrings(c)%conn(:, e) = (/2 * e - 1, 2 * e/)
+                                end if
+                            end if
+                        end do
+                    end do
+
+                    ! -----------------
+                    ! Check the j-edges
+                    ! -----------------
+                    do j = jBeg + 1, jEnd   ! <------- Face loop
+                        do i = iBeg, iEnd  ! <------- Node Loop
+                            if (gcp(i + 1, j + 1) >= 0 .and. gcp(i + 2, j + 1) >= 0) then
+                                left = max(BCData(mm)%iBlank(i, j), 0)
+                                right = max(BCData(mm)%iBlank(i + 1, j), 0)
+
+                                if ((left == 0 .and. right == 1) .or. (left == 1 .and. right == 0)) then
+                                    localStrings(c)%nNodes = localStrings(c)%nNodes + 2
+                                    localStrings(c)%nElems = localStrings(c)%nElems + 1
+
+                                    e = localStrings(c)%nElems
+
+                                    ! Again, make sure the real cell is on the LEFT
+                                    ! of the directed edge
+                                    if (left == 0) then
+                                        i1 = i; j1 = j
+                                        i2 = i; j2 = j - 1
+
+                                        i3 = i1 + 1; j3 = j1
+                                        i4 = i2 + 1; j4 = j2
+
+                                    else
+                                        i1 = i; j1 = j - 1
+                                        i2 = i; j2 = j
+
+                                        i3 = i1 - 1; j3 = j1
+                                        i4 = i2 - 1; j4 = j2
+                                    end if
+
+                                    ! Don't forget pointer offset xx
+                                    if (regularOrdering) then
+                                        localStrings(c)%nodeData(1:3, 2 * e - 1) = xx(i1 + 1, j1 + 1, :)
+                                        localStrings(c)%nodeData(1:3, 2 * e) = xx(i2 + 1, j2 + 1, :)
+
+                                        ! Index of global node
+                                        localStrings(c)%intNodeData(1, 2 * e - 1) = BCData(mm)%surfIndex(i1, j1)
+                                        localStrings(c)%intNodeData(1, 2 * e) = BCData(mm)%surfIndex(i2, j2)
+
+                                    else
+                                        localStrings(c)%nodeData(1:3, 2 * e) = xx(i1 + 1, j1 + 1, :)
+                                        localStrings(c)%nodeData(1:3, 2 * e - 1) = xx(i2 + 1, j2 + 1, :)
+
+                                        ! Index of global node
+                                        localStrings(c)%intNodeData(1, 2 * e) = BCData(mm)%surfIndex(i1, j1)
+                                        localStrings(c)%intNodeData(1, 2 * e - 1) = BCData(mm)%surfIndex(i2, j2)
+
+                                    end if
+
+                                    v1 = xx(i1 + 1, j1 + 1, :) - xx(i3 + 1, j3 + 1, :)
+                                    v1 = v1 / mynorm2(v1)
+
+                                    v2 = xx(i2 + 1, j2 + 1, :) - xx(i4 + 1, j4 + 1, :)
+                                    v2 = v2 / mynorm2(v2)
+
+                                    ! Perpendicular vector
+                                    localStrings(c)%nodeData(7:9, 2 * e - 1) = v1
+                                    localStrings(c)%nodeData(7:9, 2 * e) = v2
+
+                                    ! Averaged node normal
+                                    localStrings(c)%nodeData(4:6, 2 * e - 1) = patchNormals(:, i1, j1)
+                                    localStrings(c)%nodeData(4:6, 2 * e) = patchNormals(:, i2, j2)
+
+                                    ! Surface deviation estimation
+                                    localStrings(c)%nodeData(10, 2 * e - 1) = patchH(i1, j1)
+                                    localStrings(c)%nodeData(10, 2 * e) = patchH(i2, j2)
+
+                                    ! Cluster of the node
+                                    localStrings(c)%intNodeData(2, 2 * e - 1) = c
+                                    localStrings(c)%intNodeData(2, 2 * e) = c
+
+                                    ! Family ID of node. Not implemented yet.
+                                    localStrings(c)%intNodeData(3, 2 * e - 1) = famID
+                                    localStrings(c)%intNodeData(3, 2 * e) = famID
+
+                                    ! Connectivity
+                                    localStrings(c)%conn(:, e) = (/2 * e - 1, 2 * e/)
+                                end if
+                            end if
+                        end do
+                    end do
+                    deallocate (patchNormals, patchH)
+                end if
+            end do bocoLoop
+        end do domainLoop2
+
+        ! Before we send the gap strings to the root proc, reduce them so
+        ! the root proc has a little less work to do.
+        do c = 1, nClusters
+            call reduceGapString(localStrings(c))
+        end do
+
+        ! Allocate the global list of strings on the root proc
+        if (myid == 0) then
+            allocate (globalStrings(nClusters))
+            do c = 1, nClusters
+                call nullifyString(globalStrings(c))
+            end do
+        end if
+
+        ! Next for each each cluster, gather to the root the gap boundary strings
+
+        allocate (nNodesProc(0:nProc), nElemsProc(0:nProc))
+
+        do c = 1, nClusters
+            ! Now let the root processor know how many nodes/elements my
+            ! processor will be sending:
+            nElemsProc(0) = 0
+            nNodesProc(0) = 0
+
+            call MPI_Gather(localStrings(c)%nElems, 1, adflow_integer, nElemsProc(1:nProc), 1, adflow_integer, 0, &
+                            adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            call MPI_Gather(localStrings(c)%nNodes, 1, adflow_integer, nNodesProc(1:nProc), 1, adflow_integer, 0, &
+                            adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            if (myid == 0) then
+
+                ! Before we can receive stuff, we need to determine the node
+                ! off-sets such that the conn from the strings on each processor
+                ! don't overlap.
+
+                do i = 2, nProc
+                    ! The 0 and 1st entry of the nEdgeProc and nNodeProc arrays are already correct:
+                    nNodesProc(i) = nNodesProc(i) + nNodesProc(i - 1)
+                    nElemsProc(i) = nElemsProc(i) + nElemsProc(i - 1)
                 end do
-             end do
-
-             ! -----------------
-             ! Check the j-edges
-             ! -----------------
-             do j=jBeg+1, jEnd   ! <------- Face loop
-                do i=iBeg, iEnd  ! <------- Node Loop
-                   if (gcp(i+1, j+1) >= 0 .and. gcp(i+2, j+1) >= 0)then
-                      left = max(BCData(mm)%iBlank(i, j), 0)
-                      right = max(BCData(mm)%iBlank(i+1,  j), 0)
-
-                      if ((left == 0 .and. right == 1) .or. (left == 1 .and. right == 0)) then
-                         localStrings(c)%nNodes = localStrings(c)%nNodes + 2
-                         localStrings(c)%nElems = localStrings(c)%nElems + 1
-
-                         e = localStrings(c)%nElems
-
-                         ! Again, make sure the real cell is on the LEFT
-                         ! of the directed edge
-                         if (left == 0) then
-                            i1 = i  ; j1 = j
-                            i2 = i  ; j2 = j-1
-
-                            i3 = i1+1; j3 = j1
-                            i4 = i2+1; j4 = j2
-
-                         else
-                            i1 = i  ; j1 = j-1
-                            i2 = i  ; j2 = j
-
-                            i3 = i1-1; j3 = j1
-                            i4 = i2-1; j4 = j2
-                         end if
-
-                         ! Don't forget pointer offset xx
-                         if (regularOrdering)  then
-                            localStrings(c)%nodeData(1:3, 2*e-1) = xx(i1+1, j1+1, :)
-                            localStrings(c)%nodeData(1:3, 2*e  ) = xx(i2+1, j2+1, :)
-
-                            ! Index of global node
-                            localStrings(c)%intNodeData(1, 2*e-1) = BCData(mm)%surfIndex(i1, j1)
-                            localStrings(c)%intNodeData(1, 2*e  ) = BCData(mm)%surfIndex(i2, j2)
-
-                         else
-                            localStrings(c)%nodeData(1:3, 2*e  ) = xx(i1+1, j1+1, :)
-                            localStrings(c)%nodeData(1:3, 2*e-1) = xx(i2+1, j2+1, :)
 
-                            ! Index of global node
-                            localStrings(c)%intNodeData(1, 2*e  ) = BCData(mm)%surfIndex(i1, j1)
-                            localStrings(c)%intNodeData(1, 2*e-1) = BCData(mm)%surfIndex(i2, j2)
+                allocate (globalStrings(c)%nodeData(10, nNodesProc(nProc)), &
+                          globalStrings(c)%intNodeData(3, nNodesProc(nProc)), &
+                          globalStrings(c)%conn(2, nElemsProc(nProc)))
 
-                         end if
+                ! Always set the pointers immediately after allocation
+                call setStringPointers(globalStrings(c))
 
-                         v1 = xx(i1+1, j1+1, :) - xx(i3+1, j3+1, :)
-                         v1 = v1 / mynorm2(v1)
-
-                         v2 = xx(i2+1, j2+1, :) - xx(i4+1, j4+1, :)
-                         v2 = v2 / mynorm2(v2)
-
-                         ! Perpendicular vector
-                         localStrings(c)%nodeData(7:9, 2*e-1) = v1
-                         localStrings(c)%nodeData(7:9, 2*e  ) = v2
-
-                         ! Averaged node normal
-                         localStrings(c)%nodeData(4:6, 2*e-1) = patchNormals(:, i1, j1)
-                         localStrings(c)%nodeData(4:6, 2*e  ) = patchNormals(:, i2, j2)
-
-                         ! Surface deviation estimation
-                         localStrings(c)%nodeData(10, 2*e-1) = patchH(i1, j1)
-                         localStrings(c)%nodeData(10, 2*e  ) = patchH(i2, j2)
-
-
-
-                         ! Cluster of the node
-                         localStrings(c)%intNodeData(2, 2*e-1) = c
-                         localStrings(c)%intNodeData(2, 2*e  ) = c
+                ! Put proc 0's own nodes/normals/indices in the global list if we have any
+                do i = 1, localStrings(c)%nNodes
+                    globalStrings(c)%nodeData(:, i) = localStrings(c)%nodeData(:, i)
+                    globalStrings(c)%intNodeData(:, i) = localStrings(c)%intNodeData(:, i)
+                end do
 
-                         ! Family ID of node. Not implemented yet.
-                         localStrings(c)%intNodeData(3, 2*e-1) = famID
-                         localStrings(c)%intNodeData(3, 2*e  ) = famID
+                ! Put proc 0's own elements in the global list if we have any
+                do i = 1, localStrings(c)%nElems
+                    globalStrings(c)%conn(:, i) = localStrings(c)%conn(:, i)
+                end do
 
-                         ! Connectivity
-                         localStrings(c)%conn(:, e) = (/2*e-1, 2*e/)
-                      end if
-                   end if
+                ! Set my total sizes
+                globalStrings(c)%nNodes = nNodesProc(nProc)
+                globalStrings(c)%nElems = nElemsProc(nProc)
+
+                ! Now receive from each of the other procs.
+                do iProc = 1, nProc - 1
+                    ! Check if this proc actually has anything to send:
+                    if ((nElemsProc(iProc + 1) - nElemsProc(iProc)) > 0) then
+                        iStart = nNodesProc(iProc) + 1
+                        iEnd = nNodesProc(iProc + 1)
+                        iSize = iEnd - iStart + 1
+
+                        ! ----------- Node sized arrays -------------
+                       call MPI_Recv(globalStrings(c)%nodeData(:, iStart:iEnd), iSize * 10, adflow_real, iProc, iProc, &
+                                      adflow_comm_world, mpiStatus, ierr)
+                        call ECHK(ierr, __FILE__, __LINE__)
+
+                  call MPI_Recv(globalStrings(c)%intNodeData(:, iStart:iEnd), iSize * 3, adflow_integer, iProc, iProc, &
+                                      adflow_comm_world, mpiStatus, ierr)
+                        call ECHK(ierr, __FILE__, __LINE__)
+
+                        ! ----------- Element sized arrays -------------
+                        iStart = nElemsProc(iProc) + 1
+                        iEnd = nElemsProc(iProc + 1)
+                        iSize = iEnd - iStart + 1
+                        call MPI_Recv(globalStrings(c)%conn(:, iStart:iEnd), iSize * 2, adflow_integer, iProc, iProc, &
+                                      adflow_comm_world, mpiStatus, ierr)
+                        call ECHK(ierr, __FILE__, __LINE__)
+
+                        ! Increment the conn we just received by the node offset:
+                        do i = iStart, iEnd
+                            globalStrings(c)%conn(:, i) = globalStrings(c)%conn(:, i) + nNodesProc(iProc)
+                        end do
+                    end if
                 end do
-             end do
-             deallocate(patchNormals, patchH)
-          end if
-       end do bocoLoop
-    end do domainLoop2
-
-    ! Before we send the gap strings to the root proc, reduce them so
-    ! the root proc has a little less work to do.
-    do c=1, nClusters
-       call reduceGapString(localStrings(c))
-    end do
-
-
-    ! Allocate the global list of strings on the root proc
-    if (myid == 0) then
-       allocate(globalStrings(nClusters))
-       do c=1, nClusters
-          call nullifyString(globalStrings(c))
-       end do
-    end if
-
-    ! Next for each each cluster, gather to the root the gap boundary strings
-
-    allocate(nNodesProc(0:nProc), nElemsProc(0:nProc))
-
-    do c=1, nClusters
-       ! Now let the root processor know how many nodes/elements my
-       ! processor will be sending:
-       nElemsProc(0) = 0
-       nNodesProc(0) = 0
-
-       call MPI_Gather(localStrings(c)%nElems, 1, adflow_integer, nElemsProc(1:nProc), 1, adflow_integer, 0, &
-            adflow_comm_world, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-
-       call MPI_Gather(localStrings(c)%nNodes, 1, adflow_integer, nNodesProc(1:nProc), 1, adflow_integer, 0, &
-            adflow_comm_world, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-
-       if (myid == 0) then
-
-          ! Before we can receive stuff, we need to determine the node
-          ! off-sets such that the conn from the strings on each processor
-          ! don't overlap.
-
-          do i=2, nProc
-             ! The 0 and 1st entry of the nEdgeProc and nNodeProc arrays are already correct:
-             nNodesProc(i) = nNodesProc(i) + nNodesProc(i-1)
-             nElemsProc(i) = nElemsProc(i) + nElemsProc(i-1)
-          end do
-
-          allocate(globalStrings(c)%nodeData(10, nNodesProc(nProc)), &
-               globalStrings(c)%intNodeData(3, nNodesProc(nProc)), &
-               globalStrings(c)%conn(2, nElemsProc(nProc)))
-
-          ! Always set the pointers immediately after allocation
-          call setStringPointers(globalStrings(c))
-
-          ! Put proc 0's own nodes/normals/indices in the global list if we have any
-          do i=1, localStrings(c)%nNodes
-             globalStrings(c)%nodeData(:, i) = localStrings(c)%nodeData(:, i)
-             globalStrings(c)%intNodeData(:, i) = localStrings(c)%intNodeData(:, i)
-          end do
-
-          ! Put proc 0's own elements in the global list if we have any
-          do i=1, localStrings(c)%nElems
-             globalStrings(c)%conn(:, i) = localStrings(c)%conn(:, i)
-          end do
-
-          ! Set my total sizes
-          globalStrings(c)%nNodes = nNodesProc(nProc)
-          globalStrings(c)%nElems = nElemsProc(nProc)
-
-          ! Now receive from each of the other procs.
-          do iProc=1, nProc-1
-             ! Check if this proc actually has anything to send:
-             if ((nElemsProc(iProc+1) - nElemsProc(iProc)) > 0) then
-                iStart = nNodesProc(iProc) + 1
-                iEnd =   nNodesProc(iProc+1)
-                iSize = iEnd - iStart + 1
-
-                ! ----------- Node sized arrays -------------
-                call MPI_Recv(globalStrings(c)%nodeData(:, iStart:iEnd), iSize*10, adflow_real, iProc, iProc, &
-                     adflow_comm_world, mpiStatus, ierr)
-                call ECHK(ierr, __FILE__, __LINE__)
-
-                call MPI_Recv(globalStrings(c)%intNodeData(:, iStart:iEnd), iSize*3, adflow_integer, iProc, iProc, &
-                     adflow_comm_world, mpiStatus, ierr)
-                call ECHK(ierr, __FILE__, __LINE__)
-
-                ! ----------- Element sized arrays -------------
-                iStart = nElemsProc(iProc) + 1
-                iEnd =   nElemsProc(iProc+1)
-                iSize = iEnd - iStart + 1
-                call MPI_Recv(globalStrings(c)%conn(:, iStart:iEnd), iSize*2, adflow_integer, iProc, iProc, &
-                     adflow_comm_world, mpiStatus, ierr)
-                call ECHK(ierr, __FILE__, __LINE__)
-
-                ! Increment the conn we just received by the node offset:
-                do i=iStart, iEnd
-                   globalStrings(c)%conn(:, i) = globalStrings(c)%conn(:, i) + nNodesProc(iProc)
+            else
+                ! Not root proc so send my stuff if we have anything:
+                if (localStrings(c)%nElems > 0) then
+
+                    ! ----------- Node sized arrays -------------
+                    call MPI_Send(localStrings(c)%nodeData, 10 * localStrings(c)%nNodes, adflow_real, 0, myid, &
+                                  adflow_comm_world, ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
+
+                    call MPI_Send(localStrings(c)%intNodeData, 3 * localStrings(c)%nNodes, adflow_integer, 0, myid, &
+                                  adflow_comm_world, ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
+
+                    ! ----------- Element sized arrays -------------
+                    call MPI_Send(localStrings(c)%conn, 2 * localStrings(c)%nElems, adflow_integer, 0, myid, &
+                                  adflow_comm_world, ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
+
+                end if
+            end if
+        end do
+
+        ! Everyone is now done with the local strings
+        do c = 1, nClusters
+            call deallocateString(localStrings(c))
+        end do
+        deallocate (localStrings)
+
+        ! Before we perform serial code implementations, surface wall info
+        ! need to be communicated to root proc (zero) too. This will be used
+        ! later to identify zipper triangle containment search for identifying
+        ! quad surface cell info for force integration. Search will be
+        ! performed on dual surface cells.
+
+        ! ---------- Begin wall data accumulation -------------
+        allocate (walls(nClusters))
+        ! Build primal quad walls ADT
+        call buildClusterWalls(level, sps, .False., walls, zipperFamLIst, size(zipperFamList))
+
+        ! Finally build up a "full wall" that is made up of all the cluster
+        ! walls.
+
+        nNodes = 0
+        nCells = 0
+        do i = 1, nClusters
+            nNodes = nNodes + walls(i)%nNodes
+            nCells = nCells + walls(i)%nCells
+        end do
+
+        allocate (fullWall%x(3, nNodes))
+        allocate (fullWall%conn(4, nCells))
+        allocate (fullWall%ind(nNodes))
+        allocate (fullWall%indCell(nCells))
+
+        nNodes = 0
+        nCells = 0
+        ii = 0
+        do i = 1, nClusters
+
+            ! Add in the nodes/elements from this cluster
+
+            do j = 1, walls(i)%nNodes
+                nNodes = nNodes + 1
+                fullWall%x(:, nNodes) = walls(i)%x(:, j)
+                fullWall%ind(nNodes) = walls(i)%ind(j)
+            end do
+
+            do j = 1, walls(i)%nCells
+                nCells = nCells + 1
+                fullWall%conn(:, nCells) = walls(i)%conn(:, j) + ii
+                fullWall%indCell(nCells) = walls(i)%indCell(j)
+            end do
+
+            ! Increment the node offset
+            ii = ii + walls(i)%nNodes
+        end do
+
+        ! Finish the setup of the full wall.
+        fullWall%nCells = nCells
+        fullWall%nNodes = nNodes
+        call buildSerialQuad(nCells, nNodes, fullWall%x, fullWall%conn, fullWall%ADT)
+
+        ! Now all the procs have fullWall info.  Note: this is overkill,
+        ! since only proc 0 needs them for zipper triangle containment
+        ! search.
+
+        ! =================================================================
+        !                   Serial code from here on out
+        ! =================================================================
+
+        if (myid == 0) then
+            timea = mpi_wtime()
+
+            ! First thing we do is reduce each of the global cluster gap
+            ! strings
+            do c = 1, nClusters
+                call reduceGapString(globalStrings(c))
+            end do
+
+            ! Combine all global strings together into a masterString. First
+            ! count up the sizes
+            nElems = 0
+            nNodes = 0
+            do c = 1, nClusters
+                nElems = nElems + globalStrings(c)%nElems
+                nNodes = nNodes + globalStrings(C)%nNodes
+            end do
+
+            call nullifyString(master)
+            master%nNodes = nNodes
+            master%nElems = nElems
+            allocate (master%nodeData(10, nNodes), master%conn(2, nElems), &
+                      master%intNodeData(3, nNodes))
+
+            ! Set the string pointers to the individual arrays
+            call setStringPointers(master)
+
+            nNodes = 0 ! This is our running counter for offseting nodes
+            ii = 0
+            jj = 0
+
+            do c = 1, nClusters
+                do i = 1, globalStrings(c)%nNodes
+                    ii = ii + 1
+                    master%nodeData(:, ii) = globalStrings(c)%nodeData(:, i)
+                    master%intNodeData(:, ii) = globalStrings(c)%intNodeData(:, i)
+                end do
+
+                do i = 1, globalStrings(c)%nElems
+                    jj = jj + 1
+                    master%conn(:, jj) = globalStrings(c)%conn(:, i) + nNodes
                 end do
-             end if
-          end do
-       else
-          ! Not root proc so send my stuff if we have anything:
-          if (localStrings(c)%nElems > 0) then
-
-             ! ----------- Node sized arrays -------------
-             call MPI_Send(localStrings(c)%nodeData, 10*localStrings(c)%nNodes, adflow_real, 0, myid, &
-                  adflow_comm_world, ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
-
-             call MPI_Send(localStrings(c)%intNodeData, 3*localStrings(c)%nNodes, adflow_integer, 0, myid, &
-                  adflow_comm_world, ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
-
-             ! ----------- Element sized arrays -------------
-             call MPI_Send(localStrings(c)%conn, 2*localStrings(c)%nElems, adflow_integer, 0, myid, &
-                  adflow_comm_world, ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
-
-          end if
-       end if
-    end do
-
-    ! Everyone is now done with the local strings
-    do c=1, nClusters
-       call deallocateString(localStrings(c))
-    end do
-    deallocate(localStrings)
-
-    ! Before we perform serial code implementations, surface wall info
-    ! need to be communicated to root proc (zero) too. This will be used
-    ! later to identify zipper triangle containment search for identifying
-    ! quad surface cell info for force integration. Search will be
-    ! performed on dual surface cells.
-
-    ! ---------- Begin wall data accumulation -------------
-    allocate(walls(nClusters))
-    ! Build primal quad walls ADT
-    call buildClusterWalls(level, sps, .False., walls, zipperFamLIst, size(zipperFamList))
-
-
-    ! Finally build up a "full wall" that is made up of all the cluster
-    ! walls.
-
-    nNodes = 0
-    nCells = 0
-    do i=1, nClusters
-       nNodes = nNodes+ walls(i)%nNodes
-       nCells = nCells + walls(i)%nCells
-    end do
-
-    allocate(fullWall%x(3, nNodes))
-    allocate(fullWall%conn(4, nCells))
-    allocate(fullWall%ind(nNodes))
-    allocate(fullWall%indCell(nCells))
-
-    nNodes = 0
-    nCells = 0
-    ii = 0
-    do i=1, nClusters
-
-       ! Add in the nodes/elements from this cluster
-
-       do j=1, walls(i)%nNodes
-          nNodes = nNodes + 1
-          fullWall%x(:, nNodes) = walls(i)%x(:, j)
-          fullWall%ind(nNodes) = walls(i)%ind(j)
-       end do
-
-       do j=1, walls(i)%nCells
-          nCells = nCells + 1
-          fullWall%conn(:, nCells) = walls(i)%conn(:, j) + ii
-          fullWall%indCell(nCells) = walls(i)%indCell(j)
-       end do
-
-       ! Increment the node offset
-       ii = ii + walls(i)%nNodes
-    end do
-
-    ! Finish the setup of the full wall.
-    fullWall%nCells = nCells
-    fullWall%nNodes = nNodes
-    call buildSerialQuad(nCells, nNodes, fullWall%x, fullWall%conn, fullWall%ADT)
-
-    ! Now all the procs have fullWall info.  Note: this is overkill,
-    ! since only proc 0 needs them for zipper triangle containment
-    ! search.
-
-    ! =================================================================
-    !                   Serial code from here on out
-    ! =================================================================
-
-
-    if (myid == 0) then
-       timea = mpi_wtime()
-
-       ! First thing we do is reduce each of the global cluster gap
-       ! strings
-       do c=1, nClusters
-          call reduceGapString(globalStrings(c))
-       end do
-
-       ! Combine all global strings together into a masterString. First
-       ! count up the sizes
-       nElems = 0
-       nNodes = 0
-       do c=1, nClusters
-          nElems = nElems + globalStrings(c)%nElems
-          nNodes = nNodes + globalStrings(C)%nNodes
-       end do
-
-       call nullifyString(master)
-       master%nNodes = nNodes
-       master%nElems = nElems
-       allocate(master%nodeData(10, nNodes), master%conn(2, nElems), &
-            master%intNodeData(3, nNodes))
-
-       ! Set the string pointers to the individual arrays
-       call setStringPointers(master)
-
-       nNodes = 0 ! This is our running counter for offseting nodes
-       ii = 0
-       jj = 0
-
-       do c=1, nClusters
-          do i=1, globalStrings(c)%nNodes
-             ii = ii + 1
-             master%nodeData(:, ii) = globalStrings(c)%nodeData(:, i)
-             master%intNodeData(:, ii) = globalStrings(c)%intNodeData(:, i)
-          end do
-
-          do i=1, globalStrings(c)%nElems
-             jj = jj + 1
-             master%conn(:, jj) = globalStrings(c)%conn(:, i) + nNodes
-          end do
-          nNodes =ii
-       end do
-
-       ! Now the root is done with the global strings so deallocate that
-       ! too.
-       do c=1, nClusters
-          call deallocateString(globalStrings(c))
-       end do
-       deallocate(globalStrings)
-    end if
-
-  end subroutine makeGapBoundaryStrings
+                nNodes = ii
+            end do
+
+            ! Now the root is done with the global strings so deallocate that
+            ! too.
+            do c = 1, nClusters
+                call deallocateString(globalStrings(c))
+            end do
+            deallocate (globalStrings)
+        end if
+
+    end subroutine makeGapBoundaryStrings
 end module gapBoundaries
diff --git a/src/overset/oversetAPI.F90 b/src/overset/oversetAPI.F90
index 1e8fd2d92..824bd3c4c 100644
--- a/src/overset/oversetAPI.F90
+++ b/src/overset/oversetAPI.F90
@@ -1,2456 +1,2448 @@
 module oversetAPI
-  !       computeOversetInterpolation is the top level routine that
-  !       implements the implicit hole cutting method for determing
-  !       overset grid connectivitiies. It operates on a given multigrid
-  !       level and spectral instance
+    !       computeOversetInterpolation is the top level routine that
+    !       implements the implicit hole cutting method for determing
+    !       overset grid connectivitiies. It operates on a given multigrid
+    !       level and spectral instance
 contains
-  subroutine oversetComm(level, firstTime, coarseLevel, closedFamList)
-
-    use constants
-    use communication, only : adflow_comm_world, sendRequests, &
-         recvRequests, sendBuffer, recvBuffer, commPatternCell_2nd, &
-         internalCell_2nd, sendBufferSize, recvBufferSize, myid, &
-         nProc
-    use blockPointers, only : flowDoms, nDom, fringeType, fringes, &
-         il, jl, kl, ie, je, ke, x, nx, ny, nz, iBlank, globalCell, ib, jb, kb, nDonors, &
-         vol, fringePtr, forcedRecv, status, nbkglobal, si, sj, sk
-    use oversetData, only : CSRMatrix, oversetBlock, oversetFringe, &
-         oversetWall, nClusters, cumDomProc, localWallFringes, nDomTotal, &
-         nLocalWallFringe, clusterWalls, oversetPresent, nDomProc, &
-         overlapMatrix, tmpFringePtr, oversetTimes
-    use cgnsGrid, only : cgnsDoms
-    use stencils, only : N_visc_drdw, visc_drdw_stencil
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use adtBuild, only : destroySerialQuad
-    use inputOverset, onlY : useoversetLoadBalance, overlapFactor, nRefine, backgroundVolScale, &
-         useOversetWallScaling
-    use utils, only : EChk, setPointers, setBufferSizes, terminate, returnFail, mynorm2
-    use surfaceFamilies, only : BCFamGroups
-    use kdtree2_module, onlY : kdtree2_create, kdtree2destroy
-    use oversetInitialization, only : initializeOBlock, initializeOFringes, initializeStatus, &
-         reInitializeStatus
-    use oversetCommUtilities , only : recvOBlock, recvOFringe, getCommPattern, getOSurfCommPattern, &
-         emptyOversetComm, exchangeStatusTranspose, exchangeStatus, oversetLoadBalance, &
-         exchangeFringes, sendOFringe, sendOBlock, setupFringeGlobalInd, &
-         getFringeReturnSizes
-    use oversetUtilities, only : isCompute, checkOverset, irregularCellCorrection, &
-         fringeReduction, transposeOverlap, setIBlankArray, deallocateOFringes, deallocateoBlocks, &
-         deallocateOSurfs, deallocateCSRMatrix, setIsCompute, getWorkArray, flagForcedRecv, &
-         qsortFringeType, isReceiver, setIsReceiver, addToFringeList, printOverlapMatrix, &
-         tic, toc, unwindIndex, windIndex, isFloodSeed, isFlooded, wallsOnBlock
-    use oversetPackingRoutines, only : packOFringe, packOBlock, unpackOFringe, unpackOBlock, &
-         getOFringeBufferSizes, getOBlockBufferSizes, getOSurfBufferSizes
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType), intent(in) :: level
-    logical :: firstTime, coarseLevel
-    integer(kind=intType) :: sps
-    integer(kind=intType), intent(in) :: closedFamList(:)
-
-    ! Local Variables
-    integer(kind=intType) :: i, ii, j, jj, k, kk, i_stencil, curI, curJ, curK, mInt
-    integer(kind=intType) :: m, iSize, iStart, iEnd, index, rSize, iFringe
-    integer(kind=intType) :: iDom, jDom, iDim, nReal, iCol
-    integer(kind=intType) :: nn, mm, n, ierr, iProc, iRefine
-    integer(kind=intType) :: iWork, nWork, nLocalFringe, totalOrphans
-    integer(kind=intType) :: myBlock, myINdex, dIndex, donorBlock, donorProc, absDBlock
-    real(kind=realType) :: startTime, endTime, curQuality, aspect(3), fact
-    logical :: localChanged, globalChanged, wallsPresent
-
-    type(CSRMatrix), pointer :: overlap
-    type(CSRMatrix) :: overlapTranspose
-
-    integer(kind=intType), dimension(:), allocatable :: cumFringeRecv, fringeRecvSizes
-    integer(kind=intType), dimension(:, :), allocatable :: work, tmpInt2D
-    integer(kind=intType), dimension(:), pointer ::  wallFamList
-    real(kind=realType), dimension(:), allocatable :: tmpReal
-    real(kind=realType), dimension(:, :), allocatable :: xMin, xMax
-
-    logical, dimension(:), allocatable :: oBlockReady, oFringeReady
-
-    type(oversetBlock), dimension(:), allocatable :: oBlocks
-    type(oversetFringe), dimension(:), allocatable :: oFringes
-    type(oversetWall), pointer :: wall
-    type(fringeType), dimension(:), allocatable :: localFringes
-    type(fringeType) :: fringe
-
-    ! MPI/Communication related
-    integer mpiStatus(MPI_STATUS_SIZE)
-    integer(kind=intType) :: MAGIC, source, tag, sendCount, recvCount
-    integer(kind=intType) :: nOFringeSend, nOFringeRecv
-    integer(kind=intType) :: nOBlockSend, nOBlockRecv
-    integer(kind=intType) :: nOSurfSend, nOSurfRecv
-    logical :: flag, invalid
-
-    integer(kind=intType), dimension(:, :), allocatable :: oBlockSendList, oBlockRecvList
-    integer(kind=intType), dimension(:, :), allocatable :: oFringeSendList, oFringeRecvList
-    integer(kind=intType), dimension(:, :), allocatable :: bufSizes, recvInfo
-    integer(kind=intType), dimension(:), allocatable :: intRecvBuf
-    real(kind=realType), dimension(:), allocatable :: realRecvBuf
-
-    ! -----------------------------------------------------------------
-    ! Step 1: Initializaion: Make sure the stencils are initialized.
-    ! -----------------------------------------------------------------
-
-    call initialize_stencils()
-
-    ! -----------------------------------------------------------------
-    ! Step 2: Communicate the block size info to everyone. Also generate
-    ! cumDomProc which is the cumulative form. This will make our lives
-    ! easier a little later on with indexing. (Routine below)
-    ! -----------------------------------------------------------------
-
-    ! If there is not overset meshes present, just make an empty comm
-    ! structure and call it a day.
-    if (.not. oversetPresent) then
-       do sps=1,nTimeIntervalsSpectral
-          call emptyOversetComm(level, sps)
-
-          do nn=1, nDom
-             flowDoms(nn,level,sps)%nOrphans = 0
-             if (.not. associated(flowDoms(nn,level,sps)%iblank)) then
-                i = flowDoms(nn,level,sps)%ib
-                j = flowDoms(nn,level,sps)%jb
-                k = flowDoms(nn,level,sps)%kb
-
-                allocate(flowDoms(nn,level,sps)%iblank(0:i,0:j,0:k))
-                flowDoms(nn, level, sps)%iblank = 1
-             end if
-             do mm=1, flowDoms(nn, level, sps)%nBocos
-                flowDoms(nn, level, sps)%BCData(mm)%iblank = 1
-             end do
-          end do
-       end do
-       return
-    end if
-
-    ! Determine the magic number which is actually the same as
-    ! nDomTotal. nDomTotal is guaranteed to be greater than or equal to
-    ! nProc. This will be used to space out tags on communications to
-    ! make sure they do not overlap.
-    MAGIC = nDomTotal
-
-    ! Zero out all the timers
-    oversetTimes = zero
-
-    call MPI_barrier(adflow_comm_world, ierr)
-    call tic(iTotal)
-    ! Master SPS loop
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-
-       ! Set a pointer to make the code easier to read
-       overlap => overlapMatrix(level, sps)
-
-       ! -----------------------------------------------------------------
-       ! Step 4: Compute the 3D axis oriented bounding boxes for each block
-       ! and communicate then to everyone. Also determine communicate the
-       ! minimum volume for each block to everyone.  (Routine below)
-       ! -----------------------------------------------------------------
-
-       call tic(iBoundingBox)
-       allocate(xMin(3, nDomTotal), xMax(3, nDomTotal))
-       call computeDomainBoundingBoxes
-       call toc(iBoundingBox)
-
-       ! -----------------------------------------------------------------
-       ! Step 8: Build a global sparse matrix representation of the overlap
-       ! matrix.  Every processor will have the same sparse matrix
-       ! representation when it is finished. (Routine below)
-       ! -----------------------------------------------------------------
-       call tic(iBuildOverlap)
-       if (firstTime) then
-          call deallocateCSRMatrix(overlap)
-          call buildGlobalSparseOverlap(overlap)
-       end if
-
-       ! -----------------------------------------------------------------
-       ! Step 8: This is going to put the number of searches (coordinates)
-       ! in for the costs.  Eventually we need to use the previous time,
-       ! but that is going to be tricky since the sparsity structure of the
-       ! overlap matrix could change....:-(
-       ! -----------------------------------------------------------------
-
-       ! Loop over by blocks and the owned cells on my blocks. This will
-       ! determine which of my coordinates need to be searched on a
-       ! given block.
-
-       ! Also, since this is the last of the collective blocking
-       ! communication for a while do some of the other collective comm
-       ! we need to do as well. Specifically we want to tell all the
-       ! processors the size of the int Buffer, real Buffer and the
-       ! number of fringes we can expect to receive.
-
-       allocate(bufSizes(nDomTotal, 6), tmpInt2D(nDomTotal, 6), &
-            tmpReal(size(overlap%data)))
-
-       wallFamList => BCFamGroups(iBCGroupWalls)%famList
-       ! Initialization
-       tmpReal = zero
-       tmpInt2D = 0
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-          iDom = cumDomProc(myid) + nn
-          ! ------------------------------
-          ! Old Code for setting the data
-          ! if (firstTime) then
-          !    do jj=overlap%rowPtr(iDom), overlap%rowPtr(iDom+1)-1
-          !       tmpReal(jj) = real(nx*ny*nz)
-          !    end do
-          ! end if
-          ! ------------------------------
-
-          ! Sizes
-          call getOBlockBufferSizes (il, jl, kl, tmpInt2D(iDom, 1), tmpInt2D(iDom, 2))
-          call getOFringeBufferSizes(il, jl, kl, tmpInt2D(iDom, 3), tmpInt2D(iDom, 4))
-          call getOSurfBufferSizes  (wallFamList, il, jl, kl, tmpInt2D(iDom, 5), tmpInt2D(iDom, 6), .True.)
-
-       end do
-
-       ! Set the tmpReal variable to to be the number of fringes we
-       ! need to search for. This is inefficient becuase we loop over
-       ! the all the matrix rows:
-       if (firstTime) then
-          do i=1, overlap%nrow
-             do jj=overlap%rowPtr(i), overlap%rowPtr(i+1)-1
-                iCol = overlap%colInd(jj)
-                if (iCol > cumDomProc(myid) .and. iCol <= cumDomProc(myid+1)) then
-                   nn = iCol - cumDomProc(myid)
-                   tmpReal(jj) = &
-                        flowDoms(nn, level, sps)%nx * &
-                        flowDoms(nn, level, sps)%ny * &
-                        flowDoms(nn, level, sps)%nz
+    subroutine oversetComm(level, firstTime, coarseLevel, closedFamList)
+
+        use constants
+        use communication, only: adflow_comm_world, sendRequests, &
+                                 recvRequests, sendBuffer, recvBuffer, commPatternCell_2nd, &
+                                 internalCell_2nd, sendBufferSize, recvBufferSize, myid, &
+                                 nProc
+        use blockPointers, only: flowDoms, nDom, fringeType, fringes, &
+                                 il, jl, kl, ie, je, ke, x, nx, ny, nz, iBlank, globalCell, ib, jb, kb, nDonors, &
+                                 vol, fringePtr, forcedRecv, status, nbkglobal, si, sj, sk
+        use oversetData, only: CSRMatrix, oversetBlock, oversetFringe, &
+                               oversetWall, nClusters, cumDomProc, localWallFringes, nDomTotal, &
+                               nLocalWallFringe, clusterWalls, oversetPresent, nDomProc, &
+                               overlapMatrix, tmpFringePtr, oversetTimes
+        use cgnsGrid, only: cgnsDoms
+        use stencils, only: N_visc_drdw, visc_drdw_stencil
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use adtBuild, only: destroySerialQuad
+        use inputOverset, onlY: useoversetLoadBalance, overlapFactor, nRefine, backgroundVolScale, &
+                                useOversetWallScaling
+        use utils, only: EChk, setPointers, setBufferSizes, terminate, returnFail, mynorm2
+        use surfaceFamilies, only: BCFamGroups
+        use kdtree2_module, onlY: kdtree2_create, kdtree2destroy
+        use oversetInitialization, only: initializeOBlock, initializeOFringes, initializeStatus, &
+                                         reInitializeStatus
+        use oversetCommUtilities, only: recvOBlock, recvOFringe, getCommPattern, getOSurfCommPattern, &
+                                        emptyOversetComm, exchangeStatusTranspose, exchangeStatus, oversetLoadBalance, &
+                                        exchangeFringes, sendOFringe, sendOBlock, setupFringeGlobalInd, &
+                                        getFringeReturnSizes
+        use oversetUtilities, only: isCompute, checkOverset, irregularCellCorrection, &
+                             fringeReduction, transposeOverlap, setIBlankArray, deallocateOFringes, deallocateoBlocks, &
+                                    deallocateOSurfs, deallocateCSRMatrix, setIsCompute, getWorkArray, flagForcedRecv, &
+                                    qsortFringeType, isReceiver, setIsReceiver, addToFringeList, printOverlapMatrix, &
+                                    tic, toc, unwindIndex, windIndex, isFloodSeed, isFlooded, wallsOnBlock
+        use oversetPackingRoutines, only: packOFringe, packOBlock, unpackOFringe, unpackOBlock, &
+                                          getOFringeBufferSizes, getOBlockBufferSizes, getOSurfBufferSizes
+        implicit none
+
+        ! Input Parameters
+        integer(kind=intType), intent(in) :: level
+        logical :: firstTime, coarseLevel
+        integer(kind=intType) :: sps
+        integer(kind=intType), intent(in) :: closedFamList(:)
+
+        ! Local Variables
+        integer(kind=intType) :: i, ii, j, jj, k, kk, i_stencil, curI, curJ, curK, mInt
+        integer(kind=intType) :: m, iSize, iStart, iEnd, index, rSize, iFringe
+        integer(kind=intType) :: iDom, jDom, iDim, nReal, iCol
+        integer(kind=intType) :: nn, mm, n, ierr, iProc, iRefine
+        integer(kind=intType) :: iWork, nWork, nLocalFringe, totalOrphans
+        integer(kind=intType) :: myBlock, myINdex, dIndex, donorBlock, donorProc, absDBlock
+        real(kind=realType) :: startTime, endTime, curQuality, aspect(3), fact
+        logical :: localChanged, globalChanged, wallsPresent
+
+        type(CSRMatrix), pointer :: overlap
+        type(CSRMatrix) :: overlapTranspose
+
+        integer(kind=intType), dimension(:), allocatable :: cumFringeRecv, fringeRecvSizes
+        integer(kind=intType), dimension(:, :), allocatable :: work, tmpInt2D
+        integer(kind=intType), dimension(:), pointer :: wallFamList
+        real(kind=realType), dimension(:), allocatable :: tmpReal
+        real(kind=realType), dimension(:, :), allocatable :: xMin, xMax
+
+        logical, dimension(:), allocatable :: oBlockReady, oFringeReady
+
+        type(oversetBlock), dimension(:), allocatable :: oBlocks
+        type(oversetFringe), dimension(:), allocatable :: oFringes
+        type(oversetWall), pointer :: wall
+        type(fringeType), dimension(:), allocatable :: localFringes
+        type(fringeType) :: fringe
+
+        ! MPI/Communication related
+        integer mpiStatus(MPI_STATUS_SIZE)
+        integer(kind=intType) :: MAGIC, source, tag, sendCount, recvCount
+        integer(kind=intType) :: nOFringeSend, nOFringeRecv
+        integer(kind=intType) :: nOBlockSend, nOBlockRecv
+        integer(kind=intType) :: nOSurfSend, nOSurfRecv
+        logical :: flag, invalid
+
+        integer(kind=intType), dimension(:, :), allocatable :: oBlockSendList, oBlockRecvList
+        integer(kind=intType), dimension(:, :), allocatable :: oFringeSendList, oFringeRecvList
+        integer(kind=intType), dimension(:, :), allocatable :: bufSizes, recvInfo
+        integer(kind=intType), dimension(:), allocatable :: intRecvBuf
+        real(kind=realType), dimension(:), allocatable :: realRecvBuf
+
+        ! -----------------------------------------------------------------
+        ! Step 1: Initializaion: Make sure the stencils are initialized.
+        ! -----------------------------------------------------------------
+
+        call initialize_stencils()
+
+        ! -----------------------------------------------------------------
+        ! Step 2: Communicate the block size info to everyone. Also generate
+        ! cumDomProc which is the cumulative form. This will make our lives
+        ! easier a little later on with indexing. (Routine below)
+        ! -----------------------------------------------------------------
+
+        ! If there is not overset meshes present, just make an empty comm
+        ! structure and call it a day.
+        if (.not. oversetPresent) then
+            do sps = 1, nTimeIntervalsSpectral
+                call emptyOversetComm(level, sps)
+
+                do nn = 1, nDom
+                    flowDoms(nn, level, sps)%nOrphans = 0
+                    if (.not. associated(flowDoms(nn, level, sps)%iblank)) then
+                        i = flowDoms(nn, level, sps)%ib
+                        j = flowDoms(nn, level, sps)%jb
+                        k = flowDoms(nn, level, sps)%kb
+
+                        allocate (flowDoms(nn, level, sps)%iblank(0:i, 0:j, 0:k))
+                        flowDoms(nn, level, sps)%iblank = 1
+                    end if
+                    do mm = 1, flowDoms(nn, level, sps)%nBocos
+                        flowDoms(nn, level, sps)%BCData(mm)%iblank = 1
+                    end do
+                end do
+            end do
+            return
+        end if
+
+        ! Determine the magic number which is actually the same as
+        ! nDomTotal. nDomTotal is guaranteed to be greater than or equal to
+        ! nProc. This will be used to space out tags on communications to
+        ! make sure they do not overlap.
+        MAGIC = nDomTotal
+
+        ! Zero out all the timers
+        oversetTimes = zero
+
+        call MPI_barrier(adflow_comm_world, ierr)
+        call tic(iTotal)
+        ! Master SPS loop
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+
+            ! Set a pointer to make the code easier to read
+            overlap => overlapMatrix(level, sps)
+
+            ! -----------------------------------------------------------------
+            ! Step 4: Compute the 3D axis oriented bounding boxes for each block
+            ! and communicate then to everyone. Also determine communicate the
+            ! minimum volume for each block to everyone.  (Routine below)
+            ! -----------------------------------------------------------------
+
+            call tic(iBoundingBox)
+            allocate (xMin(3, nDomTotal), xMax(3, nDomTotal))
+            call computeDomainBoundingBoxes
+            call toc(iBoundingBox)
+
+            ! -----------------------------------------------------------------
+            ! Step 8: Build a global sparse matrix representation of the overlap
+            ! matrix.  Every processor will have the same sparse matrix
+            ! representation when it is finished. (Routine below)
+            ! -----------------------------------------------------------------
+            call tic(iBuildOverlap)
+            if (firstTime) then
+                call deallocateCSRMatrix(overlap)
+                call buildGlobalSparseOverlap(overlap)
+            end if
+
+            ! -----------------------------------------------------------------
+            ! Step 8: This is going to put the number of searches (coordinates)
+            ! in for the costs.  Eventually we need to use the previous time,
+            ! but that is going to be tricky since the sparsity structure of the
+            ! overlap matrix could change....:-(
+            ! -----------------------------------------------------------------
+
+            ! Loop over by blocks and the owned cells on my blocks. This will
+            ! determine which of my coordinates need to be searched on a
+            ! given block.
+
+            ! Also, since this is the last of the collective blocking
+            ! communication for a while do some of the other collective comm
+            ! we need to do as well. Specifically we want to tell all the
+            ! processors the size of the int Buffer, real Buffer and the
+            ! number of fringes we can expect to receive.
+
+            allocate (bufSizes(nDomTotal, 6), tmpInt2D(nDomTotal, 6), &
+                      tmpReal(size(overlap%data)))
+
+            wallFamList => BCFamGroups(iBCGroupWalls)%famList
+            ! Initialization
+            tmpReal = zero
+            tmpInt2D = 0
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                iDom = cumDomProc(myid) + nn
+                ! ------------------------------
+                ! Old Code for setting the data
+                ! if (firstTime) then
+                !    do jj=overlap%rowPtr(iDom), overlap%rowPtr(iDom+1)-1
+                !       tmpReal(jj) = real(nx*ny*nz)
+                !    end do
+                ! end if
+                ! ------------------------------
+
+                ! Sizes
+                call getOBlockBufferSizes(il, jl, kl, tmpInt2D(iDom, 1), tmpInt2D(iDom, 2))
+                call getOFringeBufferSizes(il, jl, kl, tmpInt2D(iDom, 3), tmpInt2D(iDom, 4))
+                call getOSurfBufferSizes(wallFamList, il, jl, kl, tmpInt2D(iDom, 5), tmpInt2D(iDom, 6), .True.)
+
+            end do
+
+            ! Set the tmpReal variable to to be the number of fringes we
+            ! need to search for. This is inefficient becuase we loop over
+            ! the all the matrix rows:
+            if (firstTime) then
+                do i = 1, overlap%nrow
+                    do jj = overlap%rowPtr(i), overlap%rowPtr(i + 1) - 1
+                        iCol = overlap%colInd(jj)
+                        if (iCol > cumDomProc(myid) .and. iCol <= cumDomProc(myid + 1)) then
+                            nn = iCol - cumDomProc(myid)
+                            tmpReal(jj) = &
+                                flowDoms(nn, level, sps)%nx * &
+                                flowDoms(nn, level, sps)%ny * &
+                                flowDoms(nn, level, sps)%nz
+                        end if
+                    end do
+                end do
+            end if
+
+            if (.not. firstTime) then
+                tmpReal = overlap%data
+            end if
+
+            ! Determine the total search costs for each proc and all the bufferSizes
+            call mpi_allreduce(tmpReal, overlap%data, overlap%nnz, adflow_real, MPI_SUM, &
+                               adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            call mpi_allreduce(tmpInt2D, bufSizes, 6 * nDomTotal, adflow_integer, MPI_SUM, &
+                               adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            ! Done with the tmp arrays. This should be the last of the
+            ! blocking collectives for a while.
+            deallocate (tmpReal, tmpInt2D)
+
+            ! -----------------------------------------------------------------
+            ! Step 8: We are now ready to partiaion and loadbalance the work
+            ! based on the costs stored in the overlap matrix. These costs
+            ! may be the search estimates from initializeOverlapCosts OR they
+            ! may be actual timings from a previous assembly. Also create a
+            ! transpose of the matrix which is useful to use for the fringe
+            ! sending operation (it is transpose of the block sending)
+            ! -----------------------------------------------------------------
+            useOversetLoadBalance = .False.
+            if (useOversetLoadBalance) then
+                call oversetLoadBalance(overlap)
+            end if
+            call transposeOverlap(overlap, overlapTranspose)
+
+            ! -----------------------------------------------------------------
+            !  Step 8: Section out just the intersections we have to
+            !  do. Essentially this is just the entries in the matrix that we
+            !  have been assigned according to assignedProc. This will keep
+            !  track of the work we have to do and if it is yet completed or
+            !  not. We want this small so we can constantlly loop over it quickly.
+            !  -----------------------------------------------------------------
+
+            call getWorkArray(overlap, work)
+            nWork = size(work, 2)
+
+            ! Call the generic routines to determine the send/receive pattern
+            ! for oBlock comm and the fringe comm. These are transpose of
+            ! each other. Just overestimate the sizes of the lists.
+
+            ! For sending, the worse case is sending all my blocks/fringes/walls to
+            ! everyone but myself:
+            ii = nDom * (nProc - 1)
+            allocate (oBlockSendList(2, ii), oFringeSendList(2, ii))
+
+            ! For receiving, the worse receive is all the blocks/fringes/wall I
+            ! don't already have:
+            ii = nDomTotal - nDom
+            allocate (oBlockRecvList(2, ii), oFringeRecvList(2, ii))
+
+            call getCommPattern(overlap, oblockSendList, nOblockSend, &
+                                oBlockRecvList, nOblockRecv)
+
+            call getCommPattern(overlapTranspose, oFringeSendList, nOFringeSend, &
+                                oFringeRecvList, nOFringeRecv)
+
+            ! Done with the transposed matrix
+            call deallocateCSRMatrix(overlapTranspose)
+
+            ! Zero out the overlap data since we will be doing new timings in
+            ! doMyWork()
+            overlap%data = zero
+
+            ! Allocate the exact space for our send and recv requests. Note
+            ! that for the oBlocks, two values are set, real and integer.
+            nn = max(2 * nProc, &
+                     2 * nOBlockSend + 2 * nOFringeSend, &
+                     2 * nOBlockRecv + 2 * nOfringeRecv)
+            if (allocated(sendRequests)) then
+                deallocate (sendRequests, recvRequests)
+            end if
+
+            allocate (sendRequests(nn), recvRequests(nn))
+            allocate (recvInfo(2, nn))
+
+            ! On the first pass we need to get an estimate of what is
+            ! inside the body and what isn't. This method isn't
+            ! perfect; some cells that are actually inside the true
+            ! surface won't be flagged, but that's ok.
+            work(4, :) = 0
+
+            ! -----------------------------------------------------------------
+            ! Step 8: Allocation of temporary data structures: oBlocks and fringeList
+            !
+            ! oBlocks: These contain the AD trees. We allocate the array to
+            ! size of the total number of domains (nDomTotal). Firstly, we
+            ! just add the range of domains we own (cumDomProc(myid)+1 :
+            ! cumDomProc(myid)+nDom). If we need to receive a domain from
+            ! another processor, we just put it in the it's global ordering
+            ! slot.
+            !
+            ! fringeList: Similar logic for the fringe list. Allocated to
+            ! size nDomTotal, add the fringes we own, and the allocate
+            ! additional space for any that we need to receive from other
+            ! processors. Note that we have to be really careful with the
+            ! fringes: We 'isend' our owned fringes and may modify them
+            ! locally as well. The MPI standard says that you cannot modify
+            ! the send buffer until the isend completes. This is why we use
+            ! the overMPISearchCoord type that sends only 'x', 'origQuality'
+            ! and 'isWall'. These are guaranteed not to be changed by the
+            ! local process while the send is on-going so we should be
+            ! ok. Using the same send buffer multiple times should
+            ! technically be ok according to:
+            ! http://stackoverflow.com/questions/17074884/ok-to-call-mpi-isend-multiple-times-on-one-buffer
+            ! the MPI 2.1 standard allows this to be ok.
+            !
+            ! Note that oBlocks and fringeList are allocated to size
+            ! nDomTotal...which is not technically scalable, but there since
+            ! there are only a few scattered variables and no large arrays it
+            ! should be ok.
+            ! -----------------------------------------------------------------
+
+            allocate (oBlocks(nDomTotal), oFringes(nDomTotal))
+
+            ! Thse variables keep track of if the block/fringes are
+            ! ready. Initialized to false and only flipped when we are sure
+            ! they are ready to be used.
+
+            allocate (oBlockReady(nDomTotal), oFringeReady(nDomTotal))
+            oBlockReady = .False.
+            oFringeReady = .False.
+
+            ! Allocate space for the localWallFringes. localWallFringes keeps
+            ! track of donors for cells that are next to a wall. These must
+            ! be recorded independently of the actual donors since we don't
+            ! actually care what the interpolation stencil is, rather just
+            ! who the donor is such that we can use that information for the
+            ! flooding process. We arbitrarily set a size here and it will be
+            ! automatically expanded as necessary in the fringeSearch
+            ! routine.
+            allocate (localWallFringes(1000))
+            nLocalWallFringe = 0
+
+            call toc(iBuildOverlap)
+
+            call tic(iBuildClusterWalls)
+            allocate (clusterWalls(nClusters))
+            call buildClusterWalls(level, sps, .True., clusterWalls, wallFamList, size(wallFamList))
+            call toc(iBuildClusterWalls)
+
+            ! Determine the cells that are near wall. We have a special
+            ! routine for this.
+            call tic(iComputeCellWallPoint)
+            call computeCellWallPoint(level, sps, clusterWalls)
+
+            ! We need a couple of extra things that buildCluster wall
+            ! doesn't do:
+            do ii = 1, nClusters
+                wall => clusterWalls(ii)
+                if (wall%nNodes > 0) then
+                    wall%tree => kdtree2_create(wall%x(:, 1:wall%nNodes))
                 end if
-             end do
-          end do
-       end if
-
-       if (.not. firstTime) then
-          tmpReal = overlap%data
-       end if
-
-       ! Determine the total search costs for each proc and all the bufferSizes
-       call mpi_allreduce(tmpReal, overlap%data, overlap%nnz, adflow_real, MPI_SUM, &
-            adflow_comm_world, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-
-       call mpi_allreduce(tmpInt2D, bufSizes, 6*nDomTotal, adflow_integer, MPI_SUM, &
-            adflow_comm_world, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-
-       ! Done with the tmp arrays. This should be the last of the
-       ! blocking collectives for a while.
-       deallocate(tmpReal, tmpInt2D)
-
-       ! -----------------------------------------------------------------
-       ! Step 8: We are now ready to partiaion and loadbalance the work
-       ! based on the costs stored in the overlap matrix. These costs
-       ! may be the search estimates from initializeOverlapCosts OR they
-       ! may be actual timings from a previous assembly. Also create a
-       ! transpose of the matrix which is useful to use for the fringe
-       ! sending operation (it is transpose of the block sending)
-       ! -----------------------------------------------------------------
-       useOversetLoadBalance = .False.
-       if (useOversetLoadBalance) then
-          call oversetLoadBalance(overlap)
-       end if
-       call transposeOverlap(overlap, overlapTranspose)
-
-       ! -----------------------------------------------------------------
-       !  Step 8: Section out just the intersections we have to
-       !  do. Essentially this is just the entries in the matrix that we
-       !  have been assigned according to assignedProc. This will keep
-       !  track of the work we have to do and if it is yet completed or
-       !  not. We want this small so we can constantlly loop over it quickly.
-       !  -----------------------------------------------------------------
-
-       call getWorkArray(overlap, work)
-       nWork = size(work, 2)
-
-       ! Call the generic routines to determine the send/receive pattern
-       ! for oBlock comm and the fringe comm. These are transpose of
-       ! each other. Just overestimate the sizes of the lists.
-
-       ! For sending, the worse case is sending all my blocks/fringes/walls to
-       ! everyone but myself:
-       ii = nDom*(nProc-1)
-       allocate(oBlockSendList(2, ii), oFringeSendList(2, ii))
-
-       ! For receiving, the worse receive is all the blocks/fringes/wall I
-       ! don't already have:
-       ii = nDomTotal - nDom
-       allocate(oBlockRecvList(2, ii), oFringeRecvList(2, ii))
-
-       call getCommPattern(overlap, oblockSendList, nOblockSend, &
-            oBlockRecvList, nOblockRecv)
-
-       call getCommPattern(overlapTranspose, oFringeSendList, nOFringeSend,  &
-            oFringeRecvList, nOFringeRecv)
-
-       ! Done with the transposed matrix
-       call deallocateCSRMatrix(overlapTranspose)
-
-       ! Zero out the overlap data since we will be doing new timings in
-       ! doMyWork()
-       overlap%data = zero
-
-       ! Allocate the exact space for our send and recv requests. Note
-       ! that for the oBlocks, two values are set, real and integer.
-       nn = max(2*nProc, &
-            2*nOBlockSend + 2*nOFringeSend, &
-            2*nOBlockRecv + 2*nOfringeRecv)
-       if (allocated(sendRequests)) then
-          deallocate(sendRequests, recvRequests)
-       end if
-
-       allocate(sendRequests(nn), recvRequests(nn))
-       allocate(recvInfo(2, nn))
-
-       ! On the first pass we need to get an estimate of what is
-       ! inside the body and what isn't. This method isn't
-       ! perfect; some cells that are actually inside the true
-       ! surface won't be flagged, but that's ok.
-       work(4, :) = 0
-
-       ! -----------------------------------------------------------------
-       ! Step 8: Allocation of temporary data structures: oBlocks and fringeList
-       !
-       ! oBlocks: These contain the AD trees. We allocate the array to
-       ! size of the total number of domains (nDomTotal). Firstly, we
-       ! just add the range of domains we own (cumDomProc(myid)+1 :
-       ! cumDomProc(myid)+nDom). If we need to receive a domain from
-       ! another processor, we just put it in the it's global ordering
-       ! slot.
-       !
-       ! fringeList: Similar logic for the fringe list. Allocated to
-       ! size nDomTotal, add the fringes we own, and the allocate
-       ! additional space for any that we need to receive from other
-       ! processors. Note that we have to be really careful with the
-       ! fringes: We 'isend' our owned fringes and may modify them
-       ! locally as well. The MPI standard says that you cannot modify
-       ! the send buffer until the isend completes. This is why we use
-       ! the overMPISearchCoord type that sends only 'x', 'origQuality'
-       ! and 'isWall'. These are guaranteed not to be changed by the
-       ! local process while the send is on-going so we should be
-       ! ok. Using the same send buffer multiple times should
-       ! technically be ok according to:
-       ! http://stackoverflow.com/questions/17074884/ok-to-call-mpi-isend-multiple-times-on-one-buffer
-       ! the MPI 2.1 standard allows this to be ok.
-       !
-       ! Note that oBlocks and fringeList are allocated to size
-       ! nDomTotal...which is not technically scalable, but there since
-       ! there are only a few scattered variables and no large arrays it
-       ! should be ok.
-       ! -----------------------------------------------------------------
-
-       allocate(oBlocks(nDomTotal), oFringes(nDomTotal))
-
-       ! Thse variables keep track of if the block/fringes are
-       ! ready. Initialized to false and only flipped when we are sure
-       ! they are ready to be used.
-
-       allocate(oBlockReady(nDomTotal), oFringeReady(nDomTotal))
-       oBlockReady = .False.
-       oFringeReady = .False.
-
-       ! Allocate space for the localWallFringes. localWallFringes keeps
-       ! track of donors for cells that are next to a wall. These must
-       ! be recorded independently of the actual donors since we don't
-       ! actually care what the interpolation stencil is, rather just
-       ! who the donor is such that we can use that information for the
-       ! flooding process. We arbitrarily set a size here and it will be
-       ! automatically expanded as necessary in the fringeSearch
-       ! routine.
-       allocate(localWallFringes(1000))
-       nLocalWallFringe = 0
-
-       call toc(iBuildOverlap)
-
-       call tic(iBuildClusterWalls)
-       allocate(clusterWalls(nClusters))
-       call buildClusterWalls(level, sps, .True., clusterWalls, wallFamList, size(wallFamList))
-       call toc(iBuildClusterWalls)
-
-       ! Determine the cells that are near wall. We have a special
-       ! routine for this.
-       call tic(iComputeCellWallPoint)
-       call computeCellWallPoint(level, sps, clusterWalls)
-
-       ! We need a couple of extra things that buildCluster wall
-       ! doesn't do:
-       do ii=1, nClusters
-          wall => clusterWalls(ii)
-          if (wall%nNodes > 0) then
-             wall%tree => kdtree2_create(wall%x(:, 1:wall%nNodes))
-          end if
-
-          ! Build the inverse of the connectivity, the nodeToElem array.
-          allocate(wall%nte(4, wall%nNodes))
-          wall%nte = 0
-          do i=1, wall%nCells
-             do j=1, 4
-                n = wall%conn(j, i)
-                inner:do k=1, 4
-                   if (wall%nte(k, n) == 0) then
-                      wall%nte(k, n) = i
-                      exit inner
-                   end if
-                end do inner
-             end do
-          end do
-       end do
-       call toc(iComputeCellWallPoint)
-
-       ! Flag all the cells that we know are forced receivers.
-       call flagForcedRecv()
-
-       ! Initialize the overset-specific data structures for
-       ! performing searches.
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-          iDom = cumDomProc(myid) + nn
-
-          call tic(iBuildADT)
-          call initializeOBlock(oBlocks(iDom), nn, level, sps)
-          oBlockReady(iDom) = .True.
-          call toc(iBuildADT)
-
-          call tic(iBuildSearchPoints)
-          call initializeOFringes(oFringes(iDom), nn, closedFamList)
-          oFringeReady(iDom) = .True.
-          call toc(iBuildSearchPoints)
-       end do
-
-       ! Post all the oBlock/oFringe iSends
-       sendCount = 0
-       do jj=1, nOblockSend
-          iProc = oBlockSendList(1, jj)
-          iDom = oBlockSendList(2, jj)
-          call packOBlock(oBlocks(iDom))
-          call sendOBlock(oBlocks(iDom), iDom, iProc, 0, sendCount)
-       end do
-
-       do jj=1, nOFringeSend
-          iProc = oFringeSendList(1, jj)
-          iDom = oFringeSendList(2, jj)
-          call packOFringe(oFringes(iDom))
-          call sendOFringe(oFringes(iDom), iDom, iProc, MAGIC, sendCount)
-       end do
-
-       ! Post all the oBlock/oFringe receives. Before posting the actual
-       ! receive, allocate the receiving buffer.
-       recvCount = 0
-       do jj=1, nOBlockRecv
-          iProc = oBlockRecvList(1, jj)
-          iDom = oBlockRecvList(2, jj)
-          call recvOBlock(oBlocks(iDom), iDom, iProc, 0, &
-               bufSizes(iDom, 1), bufSizes(iDom, 2), recvCount, recvInfo)
-       end do
-
-       do jj=1, nOFringeRecv
-          iProc = oFringeRecvList(1, jj)
-          iDom = oFringeRecvList(2, jj)
-          call recvOFringe(oFringes(iDom), iDom, iProc, MAGIC, &
-               bufSizes(iDom, 3), bufSizes(iDom, 4), recvCount, recvInfo)
-       end do
-
-       ! Before we start waiting for the receives to finish, we can see
-       ! if we can do any searches with the blocks/fringes we already
-       ! have. Call the internal routine for this.
-       call doMyWork(flag)
-
-       ! Complete all the recives
-       do i=1, recvCount
-
-          ! Complete any one of the recv requests
-          call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-
-          ! Global domain index of the recv that finished
-          iDom = recvInfo(1, index)
-
-          ! Check which type of receive just finished and flag them as
-          ! being complete.
-          if     (recvInfo(2, index) == 1) then
-             oBlocks(iDom)%realBufferReady = .True.
-          else if (recvInfo(2, index) == 2) then
-             oBlocks(iDom)%intBufferReady = .True.
-          else if (recvInfo(2, index) == 3) then
-             oFringes(iDom)%realBufferReady = .True.
-          else if (recvInfo(2, index) == 4) then
-             oFringes(iDOm)%intBufferReady = .True.
-          end if
-
-          ! If both int and real buffers are received, we can unpack the
-          ! oblock and flag it as ready.
-          if (oBlocks(iDom)%realBufferReady .and. oBlocks(iDom)%intBufferReady .and. &
-               .not.oBlocks(iDom)%allocated) then
-             call unpackOBlock(oBlocks(iDom))
-             oBlockReady(iDom) = .True.
-          end if
-
-          ! If both int and real buffers are received, we can unpack the
-          ! oFringe and flag it as ready.
-          if (oFringes(iDom)%realBufferReady .and. oFringes(iDom)%intBufferReady .and. &
-               .not.oFringes(iDom)%allocated) then
-             call unpackOFringe(oFringes(iDom))
-             oFringeReady(iDom) = .True.
-          end if
-
-          ! Now see if we can do any more of the work, ie the searches.
-          call doMyWork(flag)
-
-          ! Sanity check. flag better be true when i=recvCount
-          if (i==recvCount .and. .not. flag) then
-             call terminate("computeInterpolationParallel", "Inconsistent Comm pattern detected.")
-          end if
-       end do
-
-       ! Last thing to do wait for all the sends to finish
-       do i=1,sendCount
-          call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-       end do
-
-       ! We are now completely finished with oBlocks so
-       ! delete before we allocate space for all the fringes
-       call deallocateOBlocks(oBlocks, size(oBlocks))
-       deallocate(oBlocks)
-       deallocate(oBlockReady, oFringeReady)
-
-       ! Destroy the walls we had made for the surface-overlap
-       ! searching.
-       do i=1, nClusters
-          wall => clusterWalls(i)
-          call destroySerialQuad(wall%ADT)
-          if (wall%nNodes > 0) then
-             call kdtree2destroy(wall%tree)
-          end if
-          deallocate(wall%x, wall%conn, wall%ind, wall%nte)
-       end do
-       deallocate(clusterWalls)
-
-       ! -----------------------------------------------------------------
-       ! Step 9: Well, all the searches are done, so now we can now send
-       ! the fringes back to where they came from.
-       ! -----------------------------------------------------------------
-
-       ! We are done with most of the data of the oFringe. Nuke that
-       ! right now to save memory. The fringeIntBuffer and
-       ! fringeRealBuffer have to stick around a little longer.
-       do iDim=1, nDomTotal
-          if (allocated(oFringes(iDom)%x)) &
-               deallocate(oFringes(iDom)%x)
-
-          if (allocated(oFringes(iDom)%isWall)) &
-               deallocate(oFringes(iDom)%isWall)
-
-          if (allocated(oFringes(iDom)%xSeed)) &
-               deallocate(oFringes(iDom)%xSeed)
-
-          if (allocated(oFringes(iDom)%wallind)) &
-               deallocate(oFringes(iDom)%wallInd)
-       end do
-
-       call tic(iFringeProcessing)
-       nReal = 4
-       mInt = 5
-       do iDom=1, nDomTotal
-          if (oFringes(iDom)%allocated) then
-             ! Fringe is allocated so check it
-             oFringes(iDom)%fringeReturnSize = oFringes(iDom)%nDonor
-
-             ! Check if this domain is one I own. If so, we just copy
-             ! the the fringes into the block-based list:
-
-             nn = iDom - cumDomProc(myid)
-             if (nn >= 1 .and. nn <= nDom) then
+
+                ! Build the inverse of the connectivity, the nodeToElem array.
+                allocate (wall%nte(4, wall%nNodes))
+                wall%nte = 0
+                do i = 1, wall%nCells
+                    do j = 1, 4
+                        n = wall%conn(j, i)
+                        inner: do k = 1, 4
+                            if (wall%nte(k, n) == 0) then
+                                wall%nte(k, n) = i
+                                exit inner
+                            end if
+                        end do inner
+                    end do
+                end do
+            end do
+            call toc(iComputeCellWallPoint)
+
+            ! Flag all the cells that we know are forced receivers.
+            call flagForcedRecv()
+
+            ! Initialize the overset-specific data structures for
+            ! performing searches.
+            do nn = 1, nDom
                 call setPointers(nn, level, sps)
-                if (associated(flowDoms(nn, level, sps)%fringes)) then
-                   deallocate(flowDoms(nn, level, sps)%fringes)
-                   deallocate(flowDoms(nn, level, sps)%fringePtr)
-                   deallocate(flowDoms(nn, level, sps)%nDonors)
+                iDom = cumDomProc(myid) + nn
+
+                call tic(iBuildADT)
+                call initializeOBlock(oBlocks(iDom), nn, level, sps)
+                oBlockReady(iDom) = .True.
+                call toc(iBuildADT)
+
+                call tic(iBuildSearchPoints)
+                call initializeOFringes(oFringes(iDom), nn, closedFamList)
+                oFringeReady(iDom) = .True.
+                call toc(iBuildSearchPoints)
+            end do
+
+            ! Post all the oBlock/oFringe iSends
+            sendCount = 0
+            do jj = 1, nOblockSend
+                iProc = oBlockSendList(1, jj)
+                iDom = oBlockSendList(2, jj)
+                call packOBlock(oBlocks(iDom))
+                call sendOBlock(oBlocks(iDom), iDom, iProc, 0, sendCount)
+            end do
+
+            do jj = 1, nOFringeSend
+                iProc = oFringeSendList(1, jj)
+                iDom = oFringeSendList(2, jj)
+                call packOFringe(oFringes(iDom))
+                call sendOFringe(oFringes(iDom), iDom, iProc, MAGIC, sendCount)
+            end do
+
+            ! Post all the oBlock/oFringe receives. Before posting the actual
+            ! receive, allocate the receiving buffer.
+            recvCount = 0
+            do jj = 1, nOBlockRecv
+                iProc = oBlockRecvList(1, jj)
+                iDom = oBlockRecvList(2, jj)
+                call recvOBlock(oBlocks(iDom), iDom, iProc, 0, &
+                                bufSizes(iDom, 1), bufSizes(iDom, 2), recvCount, recvInfo)
+            end do
+
+            do jj = 1, nOFringeRecv
+                iProc = oFringeRecvList(1, jj)
+                iDom = oFringeRecvList(2, jj)
+                call recvOFringe(oFringes(iDom), iDom, iProc, MAGIC, &
+                                 bufSizes(iDom, 3), bufSizes(iDom, 4), recvCount, recvInfo)
+            end do
+
+            ! Before we start waiting for the receives to finish, we can see
+            ! if we can do any searches with the blocks/fringes we already
+            ! have. Call the internal routine for this.
+            call doMyWork(flag)
+
+            ! Complete all the recives
+            do i = 1, recvCount
+
+                ! Complete any one of the recv requests
+                call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+
+                ! Global domain index of the recv that finished
+                iDom = recvInfo(1, index)
+
+                ! Check which type of receive just finished and flag them as
+                ! being complete.
+                if (recvInfo(2, index) == 1) then
+                    oBlocks(iDom)%realBufferReady = .True.
+                else if (recvInfo(2, index) == 2) then
+                    oBlocks(iDom)%intBufferReady = .True.
+                else if (recvInfo(2, index) == 3) then
+                    oFringes(iDom)%realBufferReady = .True.
+                else if (recvInfo(2, index) == 4) then
+                    oFringes(iDOm)%intBufferReady = .True.
+                end if
+
+                ! If both int and real buffers are received, we can unpack the
+                ! oblock and flag it as ready.
+                if (oBlocks(iDom)%realBufferReady .and. oBlocks(iDom)%intBufferReady .and. &
+                    .not. oBlocks(iDom)%allocated) then
+                    call unpackOBlock(oBlocks(iDom))
+                    oBlockReady(iDom) = .True.
+                end if
+
+                ! If both int and real buffers are received, we can unpack the
+                ! oFringe and flag it as ready.
+                if (oFringes(iDom)%realBufferReady .and. oFringes(iDom)%intBufferReady .and. &
+                    .not. oFringes(iDom)%allocated) then
+                    call unpackOFringe(oFringes(iDom))
+                    oFringeReady(iDom) = .True.
+                end if
+
+                ! Now see if we can do any more of the work, ie the searches.
+                call doMyWork(flag)
+
+                ! Sanity check. flag better be true when i=recvCount
+                if (i == recvCount .and. .not. flag) then
+                    call terminate("computeInterpolationParallel", "Inconsistent Comm pattern detected.")
+                end if
+            end do
+
+            ! Last thing to do wait for all the sends to finish
+            do i = 1, sendCount
+                call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+            end do
+
+            ! We are now completely finished with oBlocks so
+            ! delete before we allocate space for all the fringes
+            call deallocateOBlocks(oBlocks, size(oBlocks))
+            deallocate (oBlocks)
+            deallocate (oBlockReady, oFringeReady)
+
+            ! Destroy the walls we had made for the surface-overlap
+            ! searching.
+            do i = 1, nClusters
+                wall => clusterWalls(i)
+                call destroySerialQuad(wall%ADT)
+                if (wall%nNodes > 0) then
+                    call kdtree2destroy(wall%tree)
+                end if
+                deallocate (wall%x, wall%conn, wall%ind, wall%nte)
+            end do
+            deallocate (clusterWalls)
+
+            ! -----------------------------------------------------------------
+            ! Step 9: Well, all the searches are done, so now we can now send
+            ! the fringes back to where they came from.
+            ! -----------------------------------------------------------------
+
+            ! We are done with most of the data of the oFringe. Nuke that
+            ! right now to save memory. The fringeIntBuffer and
+            ! fringeRealBuffer have to stick around a little longer.
+            do iDim = 1, nDomTotal
+                if (allocated(oFringes(iDom)%x)) &
+                    deallocate (oFringes(iDom)%x)
+
+                if (allocated(oFringes(iDom)%isWall)) &
+                    deallocate (oFringes(iDom)%isWall)
+
+                if (allocated(oFringes(iDom)%xSeed)) &
+                    deallocate (oFringes(iDom)%xSeed)
+
+                if (allocated(oFringes(iDom)%wallind)) &
+                    deallocate (oFringes(iDom)%wallInd)
+            end do
+
+            call tic(iFringeProcessing)
+            nReal = 4
+            mInt = 5
+            do iDom = 1, nDomTotal
+                if (oFringes(iDom)%allocated) then
+                    ! Fringe is allocated so check it
+                    oFringes(iDom)%fringeReturnSize = oFringes(iDom)%nDonor
+
+                    ! Check if this domain is one I own. If so, we just copy
+                    ! the the fringes into the block-based list:
+
+                    nn = iDom - cumDomProc(myid)
+                    if (nn >= 1 .and. nn <= nDom) then
+                        call setPointers(nn, level, sps)
+                        if (associated(flowDoms(nn, level, sps)%fringes)) then
+                            deallocate (flowDoms(nn, level, sps)%fringes)
+                            deallocate (flowDoms(nn, level, sps)%fringePtr)
+                            deallocate (flowDoms(nn, level, sps)%nDonors)
+                        end if
+                        if (associated(flowDoms(nn, level, sps)%gInd)) then
+                            deallocate (flowDoms(nn, level, sps)%gInd)
+                        end if
+
+                        ! Estimate about 1 donors for every cell.
+                        mm = nx * ny * nz
+                        allocate (flowDoms(nn, level, sps)%fringes(mm), &
+                                  flowDoms(nn, level, sps)%fringePtr(3, 0:ib, 0:jb, 0:kb), &
+                                  flowDoms(nn, level, sps)%nDonors)
+                        flowDoms(nn, level, sps)%fringePtr = 0
+                        flowDoms(nn, level, sps)%nDonors = 0
+
+                        ! Reset the pointers due to the allocation
+                        call setPointers(nn, level, sps)
+
+                        ! Copy over the data from oFringes and then nuke the oFringe
+                        do ii = 1, oFringes(iDom)%nDonor
+                            ! Reconsitute the fringe itself.
+                            fringe%donorProc = oFringes(iDom)%fringeIntBuffer(1, ii)
+                            fringe%donorBlock = oFringes(iDom)%fringeIntBuffer(2, ii)
+                            fringe%dIndex = oFringes(iDom)%fringeIntBuffer(3, ii)
+                            fringe%myBlock = oFringes(iDom)%fringeIntBuffer(4, ii)
+                            fringe%myIndex = oFringes(iDom)%fringeIntBuffer(5, ii)
+                            fringe%donorFrac = oFringes(iDom)%fringeRealBuffer(1:3, ii)
+                            fringe%quality = oFringes(iDom)%fringeRealBuffer(4, ii)
+
+                            call addToFringeList(flowDoms(nn, level, sps)%fringes, nDonors, &
+                                                 fringe)
+                        end do
+                    end if
+                end if
+            end do
+            call toc(iFringeProcessing)
+
+            ! This will compute the amount to data we expect to receive
+            ! back for each OFringe we set out.
+            call getFringeReturnSizes(oFringeSendList, oFringeRecvList, &
+                                      nOFringeSend, nOFringeRecv, oFringes, fringeRecvSizes, cumFringeRecv)
+
+            ! Now alocate the integer and real space. Note we are receiving nReal real
+            ! values and mInt int values:
+            ii = cumFringeRecv(nOfringeSend + 1) - 1
+            allocate (intRecvBuf(ii * mInt), realRecvBuf(ii * nReal))
+
+            ! We are now ready to actually send and receive our fringes
+            sendCount = 0
+            do jj = 1, nOFringeRecv
+
+                iProc = oFringeRecvList(1, jj)
+                iDom = oFringeRecvList(2, jj)
+                iSize = oFringes(iDom)%fringeReturnSize
+                if (iSize > 0) then
+                    tag = iDom + MAGIC
+                    sendCount = sendCount + 1
+                    call mpi_isend(oFringes(iDom)%fringeRealBuffer, iSize * nReal, adflow_real, &
+                                   iproc, tag, adflow_comm_world, sendRequests(sendCount), ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
+
+                    tag = iDom + 2 * MAGIC
+                    sendCount = sendCount + 1
+                    call mpi_isend(oFringes(iDom)%fringeIntBuffer, iSize * mInt, adflow_integer, &
+                                   iproc, tag, adflow_comm_world, sendRequests(sendCount), ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
                 end if
-                if (associated(flowDoms(nn, level, sps)%gInd)) then
-                   deallocate(flowDoms(nn, level, sps)%gInd)
+            end do
+
+            ! Non-blocking receives
+            recvCount = 0
+            do jj = 1, nOfringeSend
+
+                iProc = oFringeSendList(1, jj)
+                iDom = oFringeSendList(2, jj)
+                iSize = cumFringeRecv(jj + 1) - cumFringeRecv(jj)
+                if (iSize > 0) then
+
+                    iStart = (cumFringeRecv(jj) - 1) * nReal + 1
+                    tag = iDom + MAGIC
+                    recvCount = recvCount + 1
+                    call mpi_irecv(realRecvBuf(iStart), iSize * nReal, adflow_real, &
+                                   iProc, tag, adflow_comm_world, recvRequests(recvCount), ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
+                    recvInfo(:, recvCount) = (/iDom, 1/) ! 1 for real recv
+
+                    iStart = (cumFringeRecv(jj) - 1) * mInt + 1
+                    tag = iDom + 2 * MAGIC
+                    recvCount = recvCount + 1
+                    call mpi_irecv(intRecvBuf(iStart), iSize * mInt, adflow_integer, &
+                                   iProc, tag, adflow_comm_world, recvRequests(recvCount), ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
+                    recvInfo(:, recvCount) = (/iDom, 2/) ! 2 for int recv
+                end if
+            end do
+
+            ! Now wait for the sends and receives to finish
+            do i = 1, sendCount
+                call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+            end do
+
+            ! With the send finished, we can ditch the oFringe real buffer
+            ! that contains the frac and quality. That is no longer needed.
+            do iDom = 1, nDom
+                if (associated(oFringes(iDom)%fringeRealBuffer)) then
+                    deallocate (oFringes(iDom)%fringeRealBuffer)
                 end if
+            end do
+
+            do i = 1, recvCount
+                call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+            end do
 
-                ! Estimate about 1 donors for every cell.
-                mm = nx*ny*nz
-                allocate(flowDoms(nn, level, sps)%fringes(mm), &
-                     flowDoms(nn, level, sps)%fringePtr(3, 0:ib, 0:jb, 0:kb), &
-                     flowDoms(nn, level, sps)%nDonors)
-                flowDoms(nn, level, sps)%fringePtr = 0
-                flowDoms(nn, level, sps)%nDonors = 0
+            call tic(iFringeProcessing)
 
-                ! Reset the pointers due to the allocation
+            ! Process the data we just received.
+            do kk = 1, nOfringeSend
+
+                ! Local block index of the fringes
+                iDom = oFringeSendList(2, kk)
+                nn = iDom - cumDomProc(myid)
+
+                ! Set the block pointers for the local block we are dealing
+                ! with:
                 call setPointers(nn, level, sps)
 
-                ! Copy over the data from oFringes and then nuke the oFringe
-                do ii=1, oFringes(iDom)%nDonor
-                   ! Reconsitute the fringe itself.
-                   fringe%donorProc = oFringes(iDom)%fringeIntBuffer(1, ii)
-                   fringe%donorBlock= oFringes(iDom)%fringeIntBuffer(2, ii)
-                   fringe%dIndex    = oFringes(iDom)%fringeIntBuffer(3, ii)
-                   fringe%myBlock   = oFringes(iDom)%fringeIntBuffer(4, ii)
-                   fringe%myIndex   = oFringes(iDom)%fringeIntBuffer(5, ii)
-                   fringe%donorFrac = oFringes(iDom)%fringeRealBuffer(1:3, ii)
-                   fringe%quality   = oFringes(iDom)%fringeRealBuffer(4, ii)
-
-                   call addToFringeList(flowDoms(nn, level, sps)%fringes, nDonors, &
-                        fringe)
+                ! This is the range of fringes that are now ready.
+                do jj = cumFringeRecv(kk), cumFringeRecv(kk + 1) - 1
+
+                    ! Recreate the fringe type
+                    iStart = mInt * (jj - 1)
+                    fringe%donorProc = intRecvBuf(iStart + 1)
+                    fringe%donorBlock = intRecvBuf(iStart + 2)
+                    fringe%dIndex = intRecvBuf(iStart + 3)
+                    fringe%myBlock = intRecvBuf(iStart + 4)
+                    fringe%myIndex = intRecvBuf(iStart + 5)
+                    iStart = nReal * (jj - 1)
+                    fringe%donorFrac = realRecvBuf(iStart + 1:iStart + 3)
+                    fringe%quality = realRecvBuf(iStart + 4)
+
+                    ! Add directly to the list.
+                    call addToFringeList(flowDoms(nn, level, sps)%fringes, nDonors, fringe)
                 end do
-             end if
-          end if
-       end do
-       call toc(iFringeProcessing)
-
-       ! This will compute the amount to data we expect to receive
-       ! back for each OFringe we set out.
-       call getFringeReturnSizes(oFringeSendList, oFringeRecvList, &
-            nOFringeSend, nOFringeRecv, oFringes, fringeRecvSizes, cumFringeRecv)
-
-       ! Now alocate the integer and real space. Note we are receiving nReal real
-       ! values and mInt int values:
-       ii = cumFringeRecv(nOfringeSend+1)-1
-       allocate(intRecvBuf(ii*mInt), realRecvBuf(ii*nReal))
-
-       ! We are now ready to actually send and receive our fringes
-       sendCount = 0
-       do jj=1, nOFringeRecv
-
-          iProc = oFringeRecvList(1, jj)
-          iDom = oFringeRecvList(2, jj)
-          iSize = oFringes(iDom)%fringeReturnSize
-          if (iSize > 0) then
-             tag = iDom + MAGIC
-             sendCount = sendCount + 1
-             call mpi_isend(oFringes(iDom)%fringeRealBuffer, iSize*nReal, adflow_real, &
-                  iproc, tag, adflow_comm_world, sendRequests(sendCount), ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
-
-             tag = iDom + 2*MAGIC
-             sendCount = sendCount + 1
-             call mpi_isend(oFringes(iDom)%fringeIntBuffer, iSize*mInt, adflow_integer, &
-                  iproc, tag, adflow_comm_world, sendRequests(sendCount), ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
-          end if
-       end do
-
-       ! Non-blocking receives
-       recvCount = 0
-       do jj=1, nOfringeSend
-
-          iProc = oFringeSendList(1, jj)
-          iDom = oFringeSendList(2, jj)
-          iSize = cumFringeRecv(jj+1) - cumFringeRecv(jj)
-          if (iSize > 0) then
-
-             iStart = (cumFringeRecv(jj  )-1)*nReal + 1
-             tag = iDom + MAGIC
-             recvCount = recvCount + 1
-             call mpi_irecv(realRecvBuf(iStart), iSize*nReal, adflow_real, &
-                  iProc, tag, adflow_comm_world, recvRequests(recvCount), ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
-             recvInfo(:, recvCount) = (/iDom, 1/) ! 1 for real recv
-
-             iStart = (cumFringeRecv(jj  )-1)*mInt + 1
-             tag = iDom + 2*MAGIC
-             recvCount = recvCount + 1
-             call mpi_irecv(intRecvBuf(iStart), iSize*mInt, adflow_integer, &
-                  iProc, tag, adflow_comm_world, recvRequests(recvCount), ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
-             recvInfo(:, recvCount) = (/iDom, 2/) ! 2 for int recv
-          end if
-       end do
-
-       ! Now wait for the sends and receives to finish
-       do i=1,sendCount
-          call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-       end do
-
-       ! With the send finished, we can ditch the oFringe real buffer
-       ! that contains the frac and quality. That is no longer needed.
-       do iDom=1, nDom
-          if (associated(oFringes(iDom)%fringeRealBuffer)) then
-             deallocate(oFringes(iDom)%fringeRealBuffer)
-          end if
-       end do
-
-       do i=1,recvCount
-          call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-       end do
-
-       call tic(iFringeProcessing)
-
-       ! Process the data we just received.
-       do kk=1, nOfringeSend
-
-          ! Local block index of the fringes
-          iDom = oFringeSendList(2, kk)
-          nn = iDom - cumDomProc(myid)
-
-          ! Set the block pointers for the local block we are dealing
-          ! with:
-          call setPointers(nn, level, sps)
-
-          ! This is the range of fringes that are now ready.
-          do jj=cumFringeRecv(kk), cumFringeRecv(kk+1)-1
-
-             ! Recreate the fringe type
-             iStart = mInt*(jj-1)
-             fringe%donorProc  = intRecvBuf(iStart + 1)
-             fringe%donorBlock = intRecvBuf(iStart + 2)
-             fringe%dIndex     = intRecvBuf(iStart + 3)
-             fringe%myBlock    = intRecvBuf(iStart + 4)
-             fringe%myIndex    = intRecvBuf(iStart + 5)
-             iStart = nReal*(jj-1)
-             fringe%donorFrac = realRecvBuf(iStart+1:iStart+3)
-             fringe%quality   = realRecvBuf(iStart + 4)
-
-             ! Add directly to the list.
-             call addToFringeList(flowDoms(nn, level, sps)%fringes, nDonors, fringe)
-          end do
-       end do
-
-       ! Ditch the temporary receiving memory for reals. We still the
-       ! integer receiving buffer for inside the loop.
-       deallocate(realRecvBuf)
-
-       ! Before we start the loop, for each block, we record the total
-       ! number of donors, that is the current value of nDonors. This
-       ! is necessary because on sebsequent iterations, the
-       ! exchangeFringes will have added the extra ones that are
-       ! necessary for the local halo cells. So we save the current
-       ! value of nDonors so that when we start the loop we reset the
-       ! value back this. We also sort the fringes as this only needs
-       ! to be completed once.
-
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-
-          ! First we need to sort the fringes by RECEIVER. That will
-          ! put all the cells in order.
-          call qsortFringeType(fringes, nDonors, sortByReceiver)
-
-          flowDoms(nn, level, sps)%nDonorsOnOwnedCells = flowDoms(nn, level, sps)%nDonors
-
-          ! In this loop we assign the start and end indices for the
-          ! fringePtr. This also is invarient of the subsequent
-          ! interations. Note that the '1' index is not set, that is
-          ! we're not saying which of hte potential frings it could
-          ! use, only the range of the potential fringes.
-
-          curI = 0
-          curJ = 0
-          curK = 0
-
-          do ii=1, nDonors
-
-             ! fringePtr has the following 3 integers:
-
-             ! 1: The actual fringe this cell is using. 0 if it
-             ! doesn't have a fringe
-
-             ! 2: The start index into fringes of all of this cell's
-             ! fringes. 0 if cell has no fringes.
-
-             ! 3: The end index into fringes of all of this cell's
-             ! fringes. 0 if cell has no fringes.
-
-              call unwindIndex(fringes(ii)%myIndex, il, jl, kl, i, j, k)
-             ! Set the start and end to the current index
-             if (curI /= i .or. curJ /= j .or. curK /= k) then
-
-                fringePtr(2, i, j, k) = ii
-                fringePtr(3, i, j, k) = ii
-                curI = i
-                curJ = j
-                curK = k
-             else
-                ! We have the same i,j,k. Set the end index to ii
-                fringePtr(3, i, j, k) = ii
-             end if
-          end do
-       end do
-
-       ! Allocate space for iblankLast which is used to keep track of
-       ! the previous iblank values to determine when we can sto pcd the
-       ! loop. It is initialized to 1 such that at least 2 iteations
-       ! will always be done.
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-          ! Note that we only allocate the owned cells since it
-          ! sufficient to check just the owned cells for if any one of
-          ! them has changed.
-          allocate(flowDoms(nn, level, sps)%iBlankLast(2:il, 2:jl, 2:kl))
-          flowDoms(nn, level, sps)%iBlankLast = 1
-       end do
-
-       call toc(iFringeProcessing)
-
-       ! Wall donors are now set. So we can do them once before the
-       ! loop starts and then ditch the memory. Status must be
-       ! initialized first.
-       call initializeStatus(level, sps)
-       call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
-       call determineDonors(level, sps, localWallFringes, nLocalWallFringe, .True.)
-
-       call toc(iDetermineDonors)
-       call exchangeStatusTranspose(level, sps, commPatternCell_2nd, internalCell_2nd)
-       call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
-       deallocate(localWallFringes)
-
-       ! ---------------------------
-       ! Start the refinement loop:
-       ! ---------------------------
-       refineLoop: do iRefine=1, nRefine
-
-          ! re-Initialize the status array.
-          call reInitializeStatus(level, sps)
-
-          ! Exchange the status so that the halos get the right
-          call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
-
-          call tic(iCheckDonors)
-          nLocalFringe = 0
-          do nn=1, nDom
-             call setPointers(nn, level, sps)
-
-             ! Reset the nDonors to the total number that were found for
-             ! *just* the owned cells.
-
-             nDonors = flowDoms(nn, level, sps)%nDonorsOnOwnedCells
-
-             ! Now we *actually* perform the implicit hole cut. We
-             ! compute our own quality, and then loop over the
-             ! perspective donors and see if we can get one
-             ! better. The purpose of this code is to set the "1"
-             ! index in fringePtr.
-             call wallsOnBlock(wallsPresent)
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      ! Reset the pointer to the donor cell.
-                      fringePtr(1, i, j, k) = 0
-
-                      if (iblank(i, j, k) <= -2) then
-                         ! This is a flooded cell, don't try to get a donor
-                         cycle
-                      end if
-
-                      ! This is my original quality, accounting for
-                      ! the overlap factor.
-                      if (wallsPresent) then
-                         aspect = one
-                         if (useOversetWallScaling) then
-                            if (CGNSDoms(nbkGlobal)%viscousDir(1)) &
-                                 aspect(1) = (half*(mynorm2(si(i-1, j, k, :)) + mynorm2(si(i, j, k, :)))) / vol (i, j, k)
-                            if (CGNSDoms(nbkGlobal)%viscousDir(2)) &
-                                 aspect(2) = (half*(mynorm2(sj(i, j-1, k, :)) + mynorm2(sj(i, j, k, :)))) / vol (i, j, k)
-                            if (CGNSDoms(nbkGlobal)%viscousDir(3)) &
-                                 aspect(3) = (half*(mynorm2(sk(i, j, k-1, :)) + mynorm2(sk(i, j, k, :)))) / vol (i, j, k)
-                         end if
-                         fact = min(aspect(1)*aspect(2)*aspect(3), 100.0_realType)
-
-                         curQuality = (vol(i, j, k)**third/fact) * overlapFactor
-                      else
-                         curQuality = (backGroundVolScale*vol(i, j, k))**third * overlapFactor
-                      end if
-                      ! Account for explict priority
-                      curQuality = curQuality * cgnsDoms(nbkGlobal)%priority
-
-                      ! For forced receivers, they get a large quality
-                      ! such that ANY donor is selected.
-                      if (forcedRecv(i,j,k) > 0) then
-                         curQuality = large
-                      end if
-
-                      ! Loop over the potential donors
-                      iStart = fringePtr(2, i, j, k)
-                      iEnd   = fringePtr(3, i, j, k)
-
-                      ! iStart of 0 means no donor:
-                      if (iStart > 0) then
-                         do ii=iStart, iEnd
-                            if (fringes(ii)%quality < curQuality) then
-
-                               ! Update the pointer to the fringe we want
-                               fringePtr(1, i, j, k) = ii
-
-                               ! Update the current quality to the one
-                               ! we just found, in case we have
-                               ! multiple ones to choose from.
-                               curQuality = fringes(ii)%quality
-
-                               ! Flag this cell as a receiver and remove
-                               ! it's compute status.
-                               call setIsReceiver(status(i, j, k), .True.)
-                               call setIsCompute(status(i, j, k), .False.)
-                            end if
-                         end do
-                      end if
-                   end do
+            end do
+
+            ! Ditch the temporary receiving memory for reals. We still the
+            ! integer receiving buffer for inside the loop.
+            deallocate (realRecvBuf)
+
+            ! Before we start the loop, for each block, we record the total
+            ! number of donors, that is the current value of nDonors. This
+            ! is necessary because on sebsequent iterations, the
+            ! exchangeFringes will have added the extra ones that are
+            ! necessary for the local halo cells. So we save the current
+            ! value of nDonors so that when we start the loop we reset the
+            ! value back this. We also sort the fringes as this only needs
+            ! to be completed once.
+
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+
+                ! First we need to sort the fringes by RECEIVER. That will
+                ! put all the cells in order.
+                call qsortFringeType(fringes, nDonors, sortByReceiver)
+
+                flowDoms(nn, level, sps)%nDonorsOnOwnedCells = flowDoms(nn, level, sps)%nDonors
+
+                ! In this loop we assign the start and end indices for the
+                ! fringePtr. This also is invarient of the subsequent
+                ! interations. Note that the '1' index is not set, that is
+                ! we're not saying which of hte potential frings it could
+                ! use, only the range of the potential fringes.
+
+                curI = 0
+                curJ = 0
+                curK = 0
+
+                do ii = 1, nDonors
+
+                    ! fringePtr has the following 3 integers:
+
+                    ! 1: The actual fringe this cell is using. 0 if it
+                    ! doesn't have a fringe
+
+                    ! 2: The start index into fringes of all of this cell's
+                    ! fringes. 0 if cell has no fringes.
+
+                    ! 3: The end index into fringes of all of this cell's
+                    ! fringes. 0 if cell has no fringes.
+
+                    call unwindIndex(fringes(ii)%myIndex, il, jl, kl, i, j, k)
+                    ! Set the start and end to the current index
+                    if (curI /= i .or. curJ /= j .or. curK /= k) then
+
+                        fringePtr(2, i, j, k) = ii
+                        fringePtr(3, i, j, k) = ii
+                        curI = i
+                        curJ = j
+                        curK = k
+                    else
+                        ! We have the same i,j,k. Set the end index to ii
+                        fringePtr(3, i, j, k) = ii
+                    end if
                 end do
-             end do
-
-             ! Now count up the number of actual fringes we had.
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      if (isReceiver(status(i, j, k))) then
-                         nLocalFringe = nLocalFringe + 1
-                      end if
-                   end do
+            end do
+
+            ! Allocate space for iblankLast which is used to keep track of
+            ! the previous iblank values to determine when we can sto pcd the
+            ! loop. It is initialized to 1 such that at least 2 iteations
+            ! will always be done.
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                ! Note that we only allocate the owned cells since it
+                ! sufficient to check just the owned cells for if any one of
+                ! them has changed.
+                allocate (flowDoms(nn, level, sps)%iBlankLast(2:il, 2:jl, 2:kl))
+                flowDoms(nn, level, sps)%iBlankLast = 1
+            end do
+
+            call toc(iFringeProcessing)
+
+            ! Wall donors are now set. So we can do them once before the
+            ! loop starts and then ditch the memory. Status must be
+            ! initialized first.
+            call initializeStatus(level, sps)
+            call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
+            call determineDonors(level, sps, localWallFringes, nLocalWallFringe, .True.)
+
+            call toc(iDetermineDonors)
+            call exchangeStatusTranspose(level, sps, commPatternCell_2nd, internalCell_2nd)
+            call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
+            deallocate (localWallFringes)
+
+            ! ---------------------------
+            ! Start the refinement loop:
+            ! ---------------------------
+            refineLoop: do iRefine = 1, nRefine
+
+                ! re-Initialize the status array.
+                call reInitializeStatus(level, sps)
+
+                ! Exchange the status so that the halos get the right
+                call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
+
+                call tic(iCheckDonors)
+                nLocalFringe = 0
+                do nn = 1, nDom
+                    call setPointers(nn, level, sps)
+
+                    ! Reset the nDonors to the total number that were found for
+                    ! *just* the owned cells.
+
+                    nDonors = flowDoms(nn, level, sps)%nDonorsOnOwnedCells
+
+                    ! Now we *actually* perform the implicit hole cut. We
+                    ! compute our own quality, and then loop over the
+                    ! perspective donors and see if we can get one
+                    ! better. The purpose of this code is to set the "1"
+                    ! index in fringePtr.
+                    call wallsOnBlock(wallsPresent)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                ! Reset the pointer to the donor cell.
+                                fringePtr(1, i, j, k) = 0
+
+                                if (iblank(i, j, k) <= -2) then
+                                    ! This is a flooded cell, don't try to get a donor
+                                    cycle
+                                end if
+
+                                ! This is my original quality, accounting for
+                                ! the overlap factor.
+                                if (wallsPresent) then
+                                    aspect = one
+                                    if (useOversetWallScaling) then
+                                        if (CGNSDoms(nbkGlobal)%viscousDir(1)) &
+                             aspect(1) = (half * (mynorm2(si(i - 1, j, k, :)) + mynorm2(si(i, j, k, :)))) / vol(i, j, k)
+                                        if (CGNSDoms(nbkGlobal)%viscousDir(2)) &
+                             aspect(2) = (half * (mynorm2(sj(i, j - 1, k, :)) + mynorm2(sj(i, j, k, :)))) / vol(i, j, k)
+                                        if (CGNSDoms(nbkGlobal)%viscousDir(3)) &
+                             aspect(3) = (half * (mynorm2(sk(i, j, k - 1, :)) + mynorm2(sk(i, j, k, :)))) / vol(i, j, k)
+                                    end if
+                                    fact = min(aspect(1) * aspect(2) * aspect(3), 100.0_realType)
+
+                                    curQuality = (vol(i, j, k)**third / fact) * overlapFactor
+                                else
+                                    curQuality = (backGroundVolScale * vol(i, j, k))**third * overlapFactor
+                                end if
+                                ! Account for explict priority
+                                curQuality = curQuality * cgnsDoms(nbkGlobal)%priority
+
+                                ! For forced receivers, they get a large quality
+                                ! such that ANY donor is selected.
+                                if (forcedRecv(i, j, k) > 0) then
+                                    curQuality = large
+                                end if
+
+                                ! Loop over the potential donors
+                                iStart = fringePtr(2, i, j, k)
+                                iEnd = fringePtr(3, i, j, k)
+
+                                ! iStart of 0 means no donor:
+                                if (iStart > 0) then
+                                    do ii = iStart, iEnd
+                                        if (fringes(ii)%quality < curQuality) then
+
+                                            ! Update the pointer to the fringe we want
+                                            fringePtr(1, i, j, k) = ii
+
+                                            ! Update the current quality to the one
+                                            ! we just found, in case we have
+                                            ! multiple ones to choose from.
+                                            curQuality = fringes(ii)%quality
+
+                                            ! Flag this cell as a receiver and remove
+                                            ! it's compute status.
+                                            call setIsReceiver(status(i, j, k), .True.)
+                                            call setIsCompute(status(i, j, k), .False.)
+                                        end if
+                                    end do
+                                end if
+                            end do
+                        end do
+                    end do
+
+                    ! Now count up the number of actual fringes we had.
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                if (isReceiver(status(i, j, k))) then
+                                    nLocalFringe = nLocalFringe + 1
+                                end if
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-          call toc(iCheckDonors)
-
-          ! -------------------------------
-          !            Flooding
-          ! -------------------------------
-          call tic(iFlooding)
-          call floodInteriorCells(level, sps)
-          call toc(iFlooding)
-
-          ! ---------------------------------
-          ! Determine which cells are donors
-          ! ---------------------------------
-          allocate(localFringes(nLocalFringe))
-          ! Load up the fringes
-          nLocalFringe = 0
-          do nn=1, nDom
-             call setPointers(nn, level, sps)
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      if (isReceiver(status(i, j, k))) then
-                         nLocalFringe = nLocalFringe + 1
-                         ii = fringePtr(1, i, j, k)
-                         localFringes(nLocalFringe) = fringes(ii)
-                      end if
-                   end do
+                call toc(iCheckDonors)
+
+                ! -------------------------------
+                !            Flooding
+                ! -------------------------------
+                call tic(iFlooding)
+                call floodInteriorCells(level, sps)
+                call toc(iFlooding)
+
+                ! ---------------------------------
+                ! Determine which cells are donors
+                ! ---------------------------------
+                allocate (localFringes(nLocalFringe))
+                ! Load up the fringes
+                nLocalFringe = 0
+                do nn = 1, nDom
+                    call setPointers(nn, level, sps)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                if (isReceiver(status(i, j, k))) then
+                                    nLocalFringe = nLocalFringe + 1
+                                    ii = fringePtr(1, i, j, k)
+                                    localFringes(nLocalFringe) = fringes(ii)
+                                end if
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-          call tic(iDetermineDonors)
-          call determineDonors(level, sps, localFringes, nLocalFringe, .False.)
-          call toc(iDetermineDonors)
-          call exchangeStatusTranspose(level, sps, commPatternCell_2nd, internalCell_2nd)
-          call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
-          deallocate(localFringes)
-
-          ! Determine which cells need to be forced receivers. This
-          ! may have changed due to flooding.
-          call flagForcedRecv()
-
-          ! Flag cells that are no longer valid donors. This is
-          ! slightly tricky: What we do here is
-
-          ! 1. Firstly we go through the oFringes array and mark all
-          ! the fringes that are now invalid.
-
-          do iDom=1, nDomTotal
-             if (oFringes(iDom)%allocated) then
-                do iFringe=1, oFringes(iDom)%nDonor
-                   donorBlock = oFringes(iDom)%fringeIntBuffer(2, iFringe)
-                   dIndex = oFringes(iDom)%fringeIntBuffer(3, iFringe)
-                   nn = abs(donorBlock)
-
-                   if (nn < 1) then
-                      print *, 'donor:', myid, donorBlock, cumDomProc(myID), nn
-                      stop
-                   end if
-                   il = flowDoms(nn, level, sps)%il
-                   jl = flowDoms(nn, level, sps)%jl
-                   kl = flowDoms(nn, level, sps)%kl
-
-                   call unwindIndex(dIndex, il, jl, kl, i, j, k)
-
-                   ! Check if this is invalid:
-                   invalid = .False.
-                   do kk=k,k+1
-                      do jj=j,j+1
-                         do ii=i,i+1
-
-                            if (isFlooded(flowDoms(nn, level, sps)%status(ii,jj,kk)) .or. &
-                                 isFloodSeed(flowDoms(nn, level, sps)%status(ii,jj,kk)) .or. &
-                                 flowDoms(nn, level, sps)%iblank(ii,jj,kk) == -4 .or. &
-                                 flowDoms(nn, level, sps)%forcedRecv(ii, jj, kk) > 0) then
-                               invalid = .True.
+                call tic(iDetermineDonors)
+                call determineDonors(level, sps, localFringes, nLocalFringe, .False.)
+                call toc(iDetermineDonors)
+                call exchangeStatusTranspose(level, sps, commPatternCell_2nd, internalCell_2nd)
+                call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
+                deallocate (localFringes)
+
+                ! Determine which cells need to be forced receivers. This
+                ! may have changed due to flooding.
+                call flagForcedRecv()
+
+                ! Flag cells that are no longer valid donors. This is
+                ! slightly tricky: What we do here is
+
+                ! 1. Firstly we go through the oFringes array and mark all
+                ! the fringes that are now invalid.
+
+                do iDom = 1, nDomTotal
+                    if (oFringes(iDom)%allocated) then
+                        do iFringe = 1, oFringes(iDom)%nDonor
+                            donorBlock = oFringes(iDom)%fringeIntBuffer(2, iFringe)
+                            dIndex = oFringes(iDom)%fringeIntBuffer(3, iFringe)
+                            nn = abs(donorBlock)
+
+                            if (nn < 1) then
+                                print *, 'donor:', myid, donorBlock, cumDomProc(myID), nn
+                                stop
+                            end if
+                            il = flowDoms(nn, level, sps)%il
+                            jl = flowDoms(nn, level, sps)%jl
+                            kl = flowDoms(nn, level, sps)%kl
+
+                            call unwindIndex(dIndex, il, jl, kl, i, j, k)
+
+                            ! Check if this is invalid:
+                            invalid = .False.
+                            do kk = k, k + 1
+                                do jj = j, j + 1
+                                    do ii = i, i + 1
+
+                                        if (isFlooded(flowDoms(nn, level, sps)%status(ii, jj, kk)) .or. &
+                                            isFloodSeed(flowDoms(nn, level, sps)%status(ii, jj, kk)) .or. &
+                                            flowDoms(nn, level, sps)%iblank(ii, jj, kk) == -4 .or. &
+                                            flowDoms(nn, level, sps)%forcedRecv(ii, jj, kk) > 0) then
+                                            invalid = .True.
+                                        end if
+                                    end do
+                                end do
+                            end do
+
+                            if (invalid) then
+                                ! Change the donorBlock to be negative if it is
+                                ! not already so.  The negative block index
+                                ! indicates that this donor is no longer valid.
+                                if (donorBlock > 0) then
+                                    oFringes(iDom)%fringeIntBuffer(2, iFringe) = -donorBlock
+                                end if
                             end if
-                         end do
-                      end do
-                   end do
-
-                   if (invalid) then
-                      ! Change the donorBlock to be negative if it is
-                      ! not already so.  The negative block index
-                      ! indicates that this donor is no longer valid.
-                      if (donorBlock > 0) then
-                         oFringes(iDom)%fringeIntBuffer(2, iFringe) = -donorBlock
-                      end if
-                   end if
+                        end do
+                    end if
                 end do
-             end if
-          end do
-
-          ! 2. We send the fringes back to where they came from like
-          ! during the search. The only difference is we don't add
-          ! them to the list again, but rather modify what is already there.
-
-          sendCount = 0
-          do jj=1, nOFringeRecv
-
-             iProc = oFringeRecvList(1, jj)
-             iDom = oFringeRecvList(2, jj)
-             iSize = oFringes(iDom)%fringeReturnSize
-             if (iSize > 0) then
-                tag = iDom + 2*MAGIC
-                sendCount = sendCount + 1
-                call mpi_isend(oFringes(iDom)%fringeIntBuffer, iSize*mInt, adflow_integer, &
-                     iproc, tag, adflow_comm_world, sendRequests(sendCount), ierr)
-                call ECHK(ierr, __FILE__, __LINE__)
-             end if
-          end do
-
-          ! Non-blocking receives
-          recvCount = 0
-          do jj=1, nOfringeSend
-
-             iProc = oFringeSendList(1, jj)
-             iDom = oFringeSendList(2, jj)
-             iSize = cumFringeRecv(jj+1) - cumFringeRecv(jj)
-             if (iSize > 0) then
-
-                iStart = (cumFringeRecv(jj  )-1)*mInt + 1
-                tag = iDom + 2*MAGIC
-                recvCount = recvCount + 1
-                call mpi_irecv(intRecvBuf(iStart), iSize*mInt, adflow_integer, &
-                     iProc, tag, adflow_comm_world, recvRequests(recvCount), ierr)
-                call ECHK(ierr, __FILE__, __LINE__)
-                recvInfo(:, recvCount) = (/iDom, 2/) ! 2 for int recv
-             end if
-          end do
 
-          ! Process any data we can locally while we wait.
-          do iDom=1, nDomTotal
-             if (oFringes(iDom)%allocated) then
+                ! 2. We send the fringes back to where they came from like
+                ! during the search. The only difference is we don't add
+                ! them to the list again, but rather modify what is already there.
+
+                sendCount = 0
+                do jj = 1, nOFringeRecv
+
+                    iProc = oFringeRecvList(1, jj)
+                    iDom = oFringeRecvList(2, jj)
+                    iSize = oFringes(iDom)%fringeReturnSize
+                    if (iSize > 0) then
+                        tag = iDom + 2 * MAGIC
+                        sendCount = sendCount + 1
+                        call mpi_isend(oFringes(iDom)%fringeIntBuffer, iSize * mInt, adflow_integer, &
+                                       iproc, tag, adflow_comm_world, sendRequests(sendCount), ierr)
+                        call ECHK(ierr, __FILE__, __LINE__)
+                    end if
+                end do
 
-                nn = iDom - cumDomProc(myid)
-                if (nn >= 1 .and. nn <= nDom) then
-                   call setPointers(nn, level, sps)
+                ! Non-blocking receives
+                recvCount = 0
+                do jj = 1, nOfringeSend
+
+                    iProc = oFringeSendList(1, jj)
+                    iDom = oFringeSendList(2, jj)
+                    iSize = cumFringeRecv(jj + 1) - cumFringeRecv(jj)
+                    if (iSize > 0) then
+
+                        iStart = (cumFringeRecv(jj) - 1) * mInt + 1
+                        tag = iDom + 2 * MAGIC
+                        recvCount = recvCount + 1
+                        call mpi_irecv(intRecvBuf(iStart), iSize * mInt, adflow_integer, &
+                                       iProc, tag, adflow_comm_world, recvRequests(recvCount), ierr)
+                        call ECHK(ierr, __FILE__, __LINE__)
+                        recvInfo(:, recvCount) = (/iDom, 2/) ! 2 for int recv
+                    end if
+                end do
+
+                ! Process any data we can locally while we wait.
+                do iDom = 1, nDomTotal
+                    if (oFringes(iDom)%allocated) then
+
+                        nn = iDom - cumDomProc(myid)
+                        if (nn >= 1 .and. nn <= nDom) then
+                            call setPointers(nn, level, sps)
+
+                            do jj = 1, oFringes(iDom)%nDonor
+
+                                donorProc = oFringes(iDom)%fringeIntBuffer(1, jj)
+                                donorBlock = oFringes(iDom)%fringeIntBuffer(2, jj)
+                                dIndex = oFringes(iDom)%fringeIntBuffer(3, jj)
+                                myBlock = oFringes(iDom)%fringeIntBuffer(4, jj)
+                                myIndex = oFringes(iDom)%fringeIntBuffer(5, jj)
+
+                                if (donorBlock < 0) then
+                                    ! This fringe is no longer valid! Modify this
+                                    ! fringe in our list.
+                                    call unwindIndex(myIndex, il, jl, kl, i, j, k)
+
+                                    ! Now loop over the donors this cells has to match
+                                    ! up the one that we are dealing with here.
+
+                                    ! Loop over the potential donors.
+                                    iStart = fringePtr(2, i, j, k)
+                                    iEnd = fringePtr(3, i, j, k)
+
+                                    do ii = iStart, iEnd
+                                        if (fringes(ii)%donorBlock == abs(donorBlock) .and. &
+                                            fringes(ii)%dIndex == dIndex) then
+
+                                            ! This is the fringe. Flag as bad.
+                                            fringes(ii)%quality = 2 * Large
+                                        end if
+                                    end do
+                                end if
+                            end do
+                        end if
+                    end if
+                end do
+
+                ! Now wait for the sends and receives to finish
+                do i = 1, sendCount
+                    call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
+                end do
+
+                do i = 1, recvCount
+                    call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
+                end do
+
+                ! Process the data we just received.
+                do kk = 1, nOfringeSend
+
+                    ! Local block index of the fringes
+                    iDom = oFringeSendList(2, kk)
+                    nn = iDom - cumDomProc(myid)
 
-                   do jj=1, oFringes(iDom)%nDonor
+                    ! Set the block pointers for the local block we are dealing
+                    ! with:
+                    call setPointers(nn, level, sps)
 
-                      donorProc  = oFringes(iDom)%fringeIntBuffer(1, jj)
-                      donorBlock = oFringes(iDom)%fringeIntBuffer(2, jj)
-                      dIndex     = oFringes(iDom)%fringeIntBuffer(3, jj)
-                      myBlock    = oFringes(iDom)%fringeIntBuffer(4, jj)
-                      myIndex    = oFringes(iDom)%fringeIntBuffer(5, jj)
+                    ! This is the range of fringes that are now ready.
+                    do jj = cumFringeRecv(kk), cumFringeRecv(kk + 1) - 1
 
-                      if (donorBlock < 0) then
-                         ! This fringe is no longer valid! Modify this
-                         ! fringe in our list.
-                         call unwindIndex(myIndex, il, jl, kl, i, j, k)
+                        iStart = mInt * (jj - 1)
+                        donorProc = intRecvBuf(iStart + 1)
+                        donorBlock = intRecvBuf(iStart + 2)
+                        dIndex = intRecvBuf(iStart + 3)
+                        myBlock = intRecvBuf(iStart + 4)
+                        myIndex = intRecvBuf(iStart + 5)
 
-                         ! Now loop over the donors this cells has to match
-                         ! up the one that we are dealing with here.
+                        if (donorBlock < 0) then
+                            ! This fringe is no longer valid! Modify this
+                            ! fringe in our list.
+                            call unwindIndex(myIndex, il, jl, kl, i, j, k)
 
-                         ! Loop over the potential donors.
-                         iStart = fringePtr(2, i, j, k)
-                         iEnd   = fringePtr(3, i, j, k)
+                            ! Now loop over the donors this cells has to match
+                            ! up the one that we are dealing with here.
 
-                         do ii=iStart, iEnd
-                            if (fringes(ii)%donorBlock == abs(donorBlock) .and. &
-                                 fringes(ii)%dIndex == dIndex) then
+                            ! Loop over the potential donors.
+                            iStart = fringePtr(2, i, j, k)
+                            iEnd = fringePtr(3, i, j, k)
 
-                               ! This is the fringe. Flag as bad.
-                               fringes(ii)%quality = 2*Large
+                            if (iStart == 0) then
+                                print *, 'Something bad happened. The block does not think'&
+                                     &'It has any donors but there is an incoming one.'
+                                stop
                             end if
-                         end do
-                      end if
-                   end do
+
+                            do ii = iStart, iEnd
+                                if (fringes(ii)%donorProc == donorProc .and. &
+                                    fringes(ii)%donorBlock == abs(donorBlock) .and. &
+                                    fringes(ii)%dIndex == dIndex) then
+
+                                    ! This is the fringe. Flag as bad.
+                                    fringes(ii)%quality = 2 * Large
+                                end if
+                            end do
+                        end if
+                    end do
+                end do
+
+                ! -------------------------------------------------------------
+
+                ! Correct irregular cells
+                call irregularCellCorrection(level, sps)
+
+                ! Set UPdate the iblank array.
+                call setIblankArray(level, sps)
+
+                ! -----------------------------------------------------------------
+                ! Step 16: The algorithm is now complete. Run the checkOverset
+                ! algorithm to verify that we actually have a valid interpolation
+                ! -----------------------------------------------------------------
+                call checkOverset(level, sps, totalOrphans, .false.)
+
+                ! Determine if we can exit the loop. To do this we need to
+                ! check if *any* of the iblank values has changed on *any*
+                ! processor since the last iteration.
+                localChanged = .False.
+                do nn = 1, nDom
+                    call setPointers(nn, level, sps)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                if (flowDoms(nn, level, sps)%iblankLast(i, j, k) /= iblank(i, j, k)) then
+                                    localChanged = .True.
+                                end if
+                                ! Now save the value
+                                flowDoms(nn, level, sps)%iblankLast(i, j, k) = iblank(i, j, k)
+                            end do
+                        end do
+                    end do
+                end do
+                globalChanged = .False.
+                call mpi_allreduce(localChanged, globalChanged, 1, mpi_logical, MPI_LOR, &
+                                   adflow_comm_world, ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+
+                if (.not. globalChanged) then
+                    ! We're done!
+                    exit refineLoop
                 end if
-             end if
-          end do
-
-          ! Now wait for the sends and receives to finish
-          do i=1,sendCount
-             call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
-          end do
-
-          do i=1,recvCount
-             call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
-          end do
-
-          ! Process the data we just received.
-          do kk=1, nOfringeSend
-
-             ! Local block index of the fringes
-             iDom = oFringeSendList(2, kk)
-             nn = iDom - cumDomProc(myid)
-
-             ! Set the block pointers for the local block we are dealing
-             ! with:
-             call setPointers(nn, level, sps)
-
-             ! This is the range of fringes that are now ready.
-             do jj=cumFringeRecv(kk), cumFringeRecv(kk+1)-1
-
-                iStart = mInt*(jj-1)
-                donorProc  = intRecvBuf(iStart + 1)
-                donorBlock = intRecvBuf(iStart + 2)
-                dIndex     = intRecvBuf(iStart + 3)
-                myBlock    = intRecvBuf(iStart + 4)
-                myIndex    = intRecvBuf(iStart + 5)
-
-                if (donorBlock < 0) then
-                   ! This fringe is no longer valid! Modify this
-                   ! fringe in our list.
-                   call unwindIndex(myIndex, il, jl, kl, i, j, k)
-
-                   ! Now loop over the donors this cells has to match
-                   ! up the one that we are dealing with here.
-
-                   ! Loop over the potential donors.
-                   iStart = fringePtr(2, i, j, k)
-                   iEnd   = fringePtr(3, i, j, k)
-
-                   if (iStart == 0) then
-                      print *,'Something bad happened. The block does not think'&
-                           &'It has any donors but there is an incoming one.'
-                      stop
-                   end if
-
-                   do ii=iStart, iEnd
-                      if (fringes(ii)%donorProc == donorProc .and. &
-                           fringes(ii)%donorBlock == abs(donorBlock) .and. &
-                           fringes(ii)%dIndex == dIndex) then
-
-                         ! This is the fringe. Flag as bad.
-                         fringes(ii)%quality = 2*Large
-                      end if
-                   end do
+
+            end do refineLoop
+
+            if (globalChanged) then
+                if (myID == 0) then
+                    print "(2(A), I4, A)", "Warning: The overset connectivity loop exited ", &
+                        "before the connectivity was complete after running: ", nRefine, " iterations."
+                    print "(a)", "         Increase the number of iterations by setting nRefine."
                 end if
-             end do
-          end do
+            end if
+            ! Final operations after the interpolations have
+            ! stabilized. Status must be exchanged due to the last
+            ! irregular cell correction.
+            call tic(iFringeReduction)
+            call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
+            call fringeReduction(level, sps)
+            call toc(iFringeReduction)
+
+            call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
+            call exchangeFringes(level, sps, commPatternCell_2nd, internalCell_2nd)
+
+            call tic(iFinalCommStructures)
+            call finalOversetCommStructures(level, sps)
+            call setIblankArray(level, sps)
+            call checkOverset(level, sps, totalOrphans, .True.)
+
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                allocate (flowDoms(nn, level, sps)%gInd(8, 0:ib, 0:jb, 0:kb))
+                flowDoms(nn, level, sps)%gInd = -1
+            end do
 
-          ! -------------------------------------------------------------
+            ! Setup the buffer sizes
+            call setBufferSizes(level, sps, .false., .True.)
 
+            ! Set up the gInd using the final overset comm structure.
+            call setupFringeGlobalInd(level, sps)
 
+            ! Deallocate some data we no longer need
+            deallocate (Xmin, Xmax, work)
+            do nn = 1, nDom
+                deallocate (flowDoms(nn, level, sps)%iBlankLast)
+            end do
+            call toc(iFinalCommStructures)
 
+            ! Done with the oFringes
+            call deallocateOFringes(oFringes, size(oFringes))
 
+            ! Ditch the temporary receiving memory and the oFringe array itself.
+            deallocate (oFringes, fringeRecvSizes, cumFringeRecv, intRecvBuf)
 
+        end do spectralLoop
 
-          ! Correct irregular cells
-          call irregularCellCorrection(level, sps)
+        ! Add fail flag if we get orphans
+        if (totalOrphans > 0) then
+            if (myID == 0) then
+                print *, 'Orphans present in the grid. Setting fail flags to True'
+            end if
+            call returnFail("oversetComm", "Orphans present in grid.")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
 
-          ! Set UPdate the iblank array.
-          call setIblankArray(level, sps)
+        ! Free the buffer and make a new one that includes necessary sizes
+        ! for the overset comm
+        deallocate (sendBuffer, recvBuffer)
+        allocate (sendBuffer(sendBufferSize), recvBuffer(recvBufferSize))
 
-          ! -----------------------------------------------------------------
-          ! Step 16: The algorithm is now complete. Run the checkOverset
-          ! algorithm to verify that we actually have a valid interpolation
-          ! -----------------------------------------------------------------
-          call checkOverset(level, sps, totalOrphans, .false.)
+        call MPI_barrier(adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+        call toc(iTotal)
 
-          ! Determine if we can exit the loop. To do this we need to
-          ! check if *any* of the iblank values has changed on *any*
-          ! processor since the last iteration.
-          localChanged = .False.
-          do nn=1, nDom
-             call setPointers(nn, level, sps)
-             do k=2, kl
-                do j=2, jl
-                   do i=2, il
-                      if (flowDoms(nn, level, sps)%iblankLast(i, j, k) /= iblank(i, j, k)) then
-                         localChanged = .True.
-                      end if
-                      ! Now save the value
-                      flowDoms(nn, level, sps)%iblankLast(i, j, k) = iblank(i, j, k)
-                   end do
+    contains
+
+        subroutine computeDomainBoundingBoxes
+
+            implicit none
+
+            ! Working Variables
+            real(kind=realType), dimension(3, nDom) :: xMinLocal, xMaxLocal
+
+            xMinLocal = huge(1.0d0)
+            xMaxLocal = -huge(1.0d0)
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            do iDim = 1, 3
+                                xMinLocal(iDim, nn) = &
+                                    min(xMinLocal(iDim, nn), x(i, j, k, iDim))
+                                xMaxLocal(iDim, nn) = &
+                                    max(xMaxLocal(iDim, nn), x(i, j, k, iDim))
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-          globalChanged = .False.
-          call mpi_allreduce(localChanged, globalChanged, 1, mpi_logical, MPI_LOR, &
-               adflow_comm_world, ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-
-          if (.not. globalChanged) then
-             ! We're done!
-             exit refineLoop
-          end if
-
-       end do refineLoop
-
-       if (globalChanged) then
-          if(myID == 0) then
-             print "(2(A), I4, A)", "Warning: The overset connectivity loop exited ", &
-               "before the connectivity was complete after running: ", nRefine, " iterations."
-             print "(a)", "         Increase the number of iterations by setting nRefine."
-          end if
-       end if
-       ! Final operations after the interpolations have
-       ! stabilized. Status must be exchanged due to the last
-       ! irregular cell correction.
-       call tic(iFringeReduction)
-       call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
-       call fringeReduction(level, sps)
-       call toc(iFringeReduction)
-
-       call exchangeStatus(level, sps, commPatternCell_2nd, internalCell_2nd)
-       call exchangeFringes(level, sps, commPatternCell_2nd, internalCell_2nd)
-
-       call tic(iFinalCommStructures)
-       call finalOversetCommStructures(level, sps)
-       call setIblankArray(level, sps)
-       call checkOverset(level, sps, totalOrphans, .True.)
-
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-          allocate(flowDoms(nn, level, sps)%gInd(8, 0:ib, 0:jb, 0:kb))
-          flowDoms(nn, level, sps)%gInd = -1
-       end do
-
-       ! Setup the buffer sizes
-       call setBufferSizes(level, sps, .false., .True.)
-
-       ! Set up the gInd using the final overset comm structure.
-       call setupFringeGlobalInd(level, sps)
-
-       ! Deallocate some data we no longer need
-       deallocate(Xmin, Xmax, work)
-       do nn=1, nDom
-          deallocate(flowDoms(nn, level, sps)%iBlankLast)
-       end do
-       call toc(iFinalCommStructures)
-
-       ! Done with the oFringes
-       call deallocateOFringes(oFringes, size(oFringes))
-
-       ! Ditch the temporary receiving memory and the oFringe array itself.
-       deallocate(oFringes, fringeRecvSizes, cumFringeRecv, intRecvBuf)
-
-    end do spectralLoop
-
-    ! Add fail flag if we get orphans
-    if (totalOrphans > 0) then
-       if (myID == 0) then
-          print *,'Orphans present in the grid. Setting fail flags to True'
-       end if
-       call returnFail("oversetComm", "Orphans present in grid.")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    end if
-
-    ! Free the buffer and make a new one that includes necessary sizes
-    ! for the overset comm
-    deallocate(sendBuffer, recvBuffer)
-    allocate(sendBuffer(sendBufferSize), recvBuffer(recvBufferSize))
-
-    call MPI_barrier(adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-    call toc(iTotal)
-
-  contains
-
-    subroutine computeDomainBoundingBoxes
-
-      implicit none
-
-      ! Working Variables
-      real(kind=realType), dimension(3, nDom) :: xMinLocal, xMaxLocal
-
-      xMinLocal = huge(1.0d0)
-      xMaxLocal = -huge(1.0d0)
-      do nn=1,nDom
-         call setPointers(nn, level, sps)
-         do k=1, kl
-            do j=1, jl
-               do i=1, il
-                  do iDim=1,3
-                     xMinLocal(iDim, nn) = &
-                          min(xMinLocal(iDim, nn), x(i, j, k, iDim))
-                     xMaxLocal(iDim, nn) = &
-                          max(xMaxLocal(iDim, nn), x(i, j, k, iDim))
-                  end do
-               end do
             end do
-         end do
-      end do
-
-      ! Now we can allgather the xMin and xMax  from each
-      ! processor to everyone
-      call mpi_allgatherV(xMinLocal, nDom*3, adflow_real, xMin, 3*nDomProc, &
-           3*cumDomProc, adflow_real, adflow_comm_world, ierr)
-      call ECHK(ierr, __FILE__, __LINE__)
-
-      call mpi_allgatherV(xMaxLocal, nDom*3, adflow_real, xMax, 3*nDomProc, &
-           3*cumDomProc, adflow_real, adflow_comm_world, ierr)
-      call ECHK(ierr, __FILE__, __LINE__)
-
-    end subroutine computeDomainBoundingBoxes
-
-    subroutine buildGLobalSparseOverlap(overlap)
-
-      use oversetData, only : clusters
-      implicit none
-
-      ! Input/Output
-      type(CSRMatrix), intent(inout) :: overlap
-
-      ! Working
-      integer(kind=intType), dimension(:), allocatable :: colIndLocal, rowPtrLocal
-      logical , dimension(:, :), allocatable :: localOverlap
-      integer(kind=intType) :: nnzLocal
-      integer(kind=intType), dimension(:), allocatable :: nnzProc, cumNNzProc
-
-      ! Allocate the space for my processor's compoent of iOverlap. The
-      ! number of rows is the number of blocks I own (in my own numbering)
-      ! and the coluns are the total number of blocks. It is initialized
-      ! to True, and remains that way unless we can conclusively prove the
-      ! blocks don't overlap.
-      allocate(localOverlap(nDom, nDomTotal))
-      localOverlap = .True.
-
-      ! Assume all rows are dense, and decrement when we eliminate an
-      ! entry
-      nnzLocal = nDom*nDomTotal
-
-      ! Bounding box check: We will check our *own* blocks against the
-      ! bounding boxes for for each domain.  This will fill up the
-      ! localOverlapArray and determine nnzLocal -- the number of
-      ! non-zero local entries in the global sparse matrix.
-
-      do nn=1, nDom
-         iDom = cumDomProc(myid) + nn
-
-         ! Now Loop over *all* of the other blocks
-         do jDom=1, nDomTotal
-
-            ! We can eliminate some of pairs using the cluser
-            ! analysis. Note that the cluster analysis will take care
-            ! of making sure that a block doesn't interset itself. ie,
-            ! the diagonal of the full matrix must be .False.
-
-            if (clusters(iDom) == clusters(jDom)) then
-               localOverlap(nn, jDom) = .False.
-               nnzLocal = nnzLocal - 1
-            end if
 
-            ! Only do the spatial check if we haven't elminated the
-            ! connection through the cluster check
-            if (localOverlap(nn, jDom)) then
-
-               ! Now do the box overlap check
-               if ( &
-                    xMin(1, iDom) >= xMax(1, jDom) .or. &
-                    xMax(1, iDom) <= xMin(1, jDom) .or. &
-                    xMin(2, iDom) >= xMax(2, jDom) .or. &
-                    xMax(2, iDom) <= xMin(2, jDom) .or. &
-                    xMin(3, iDom) >= xMax(3, jDom) .or. &
-                    xMax(3, iDom) <= xMin(3, jDom)) then
-
-                  ! These bounding boxes do not intersect.
-                  localOverlap(nn, jDom) = .False.
-                  nnzLocal = nnzLocal -1
-               end if
-            end if
-         end do
-      end do
-
-      ! Now, create a sparse matrix representation of the local part
-
-      allocate(colIndLocal(nnzLocal), rowPtrLocal(nDom + 1))
-      rowPtrLocal(1) = 1
-      i = 0
-      do nn=1, nDom
-         do jDom=1,nDomTotal
-            if (localOverlap(nn, jDom)) then
-               i = i + 1
-               colIndLocal(i) = jDom
-            end if
-         end do
-         rowPtrLocal(nn+1)  = i + 1
-      end do
-
-      ! Now we want to assemble the global (sparse!) connectivity
-      ! matrix for all processors. This is going to require a little
-      ! communication of sizes first followed by the actual data.
-
-      ! Determine distribution of non-zero locations
-      allocate(nnzProc(nProc), cumNNZProc(0:nProc))
-      call mpi_allgather(nnzLocal, 1, adflow_integer, nnzProc, 1, adflow_integer, &
-           adflow_comm_world, ierr)
-
-      overlap%nnz = sum(nnzProc)
-      overlap%nRow = nDomTotal
-      overlap%nCol = nDomTotal
-      overlap%nnzLocal = nnzLocal
-      ! We can now know how big the global data will be
-      allocate(&
-           overlap%data(overlap%nnz), &
-           overlap%colInd(overlap%nnz), &
-           overlap%rowPtr(overlap%nRow + 1), &
-           overlap%assignedProc(overlap%nnz))
-
-      overlap%allocated = .True.
-      cumNNZProc(0) = 0
-      do iProc=1,nProc
-         cumNNZProc(iProc) = cumNNZProc(iProc-1) + nnzProc(iProc)
-      end do
-
-      ! Correct for rowPtrLocal for the cumulative number of nnz up to
-      ! this proc:
-      rowPtrLocal = rowPtrLocal + cumNNZProc(myid)
-
-      ! Gather the column indicies
-      call mpi_allgatherV(colIndLocal, nnzLocal, adflow_integer, overlap%colInd, &
-           nnzProc, cumNNZProc, adflow_integer, adflow_comm_world, ierr)
-
-      ! Now we gather the rowPtr to everone
-      overlap%rowPtr(1) = 1
-      call mpi_allgatherV(rowPtrLocal(2:nDom), nDom, adflow_integer, &
-           overlap%rowPtr(2:nDomTotal+1), &
-           nDomProc, cumDomProc, adflow_integer, adflow_comm_world, ierr)
-
-      ! Initialize the assignedProc to the owned rows of a processor
-      do iProc=0,nProc-1
-         overlap%assignedProc(cumNNZProc(iProc)+1:cumNNZProc(iProc+1)) = iProc
-      end do
-
-      deallocate(colIndLocal, rowPtrLocal, localOverlap, cumNNzProc)
-
-    end subroutine buildGlobalSparseOverlap
-
-    subroutine doMyWork(flag)
-
-      ! This internal subroutine which may be called repeadly, performs
-      ! as many of the searches my processor is responsible for. It
-      ! returns true when all the work has been completed.
-
-      implicit none
-      logical, intent(out) :: flag
-      integer(kind=intType) :: iDom, jDom, jj
-
-      flag = .True.
-      do iWork=1, nWork
-
-         iDom = work(1, iWork)
-         jDom = work(2, iWork)
-         jj   = work(3, iWork) ! Index from the overlap matrix
-
-         ! Check if I have the oBlock and fringes i need to do this
-         ! intersection and I haven't already done it.
-         if (oBlockReady(iDom) .and. oFringeReady(jDom) .and. &
-              work(4, iWork) == 0) then
-
-            startTime = mpi_wtime()
-
-            call fringeSearch(oBlocks(iDom), oFringes(jDom))
-            endTime = mpi_wtime()
-            overlap%data(jj) = endTime - startTime
-
-            ! Flag this work as being done:
-            work(4, iWork) = 1
-         end if
-
-         ! If any one is not done, flag is flipped to False.
-         if (work(4, iWork) /= 1) then
-            flag = .False.
-         end if
-      end do
-
-    end subroutine doMyWork
-
-  end subroutine oversetComm
-
-  subroutine writePartitionedMesh(fileName)
-
-    ! This is a debugging routine for writing out meshes *as they are
-    ! partioned*. This can be useful for debugging overset issues. Only
-    ! the grid coordinates are written...these will have to be post
-    ! processed to get connectivity information if the grid is to be
-    ! used as input again.
-
-    use constants
-    use communication, only : adflow_comm_world, myID, nProc
-    use blockPointers, only : il, jl, kl, nx, ny, nz, x, nDom
-    use utils, only : EChk, setPointers
-    use su_cgns
-    implicit none
-
-    character(len=*), intent(in) :: fileName
-    integer(kind=intType) :: nDomTotal, iProc, nn, i, j, k, iDim, iDom, ierr, ii
-    integer(kind=intType) :: iii,jjj,kkk
-    integer(kind=intType) :: bufSize, maxSize, ibufSize, imaxSize
-    integer(kind=intType), dimension(3, nDom) :: localDim
-    integer(kind=intType), dimension(:), allocatable :: nDomProc, cumDomProc
-    integer(kind=intType), dimension(:, :), allocatable :: dims
-    real(kind=realType), dimension(:), allocatable :: buffer
-    real(kind=realType), dimension(:, :, :, :), allocatable :: xtmp
-    integer(kind=intType) :: ier, zoneCOunter, base, zoneID, coordID, cg, zone
-    integer(kind=cgsize_t) :: sizes(9)
-    integer(kind=intType) :: ifield, iSol
-    character(len=40) :: tmpStr, zoneName
-    character(len=32) :: coorNames(3)
-    integer mpiStatus(MPI_STATUS_SIZE)
-    character(len=maxStringLen) :: zoneProcFormat = "(A, I5.5, A, I3.3)"
-
-    coorNames(1) = "CoordinateX"
-    coorNames(2) = "CoordinateY"
-    coorNames(3) = "CoordinateZ"
-
-    call MPI_BARRIER(adflow_comm_world, ierr)
-
-    ! Gather the dimensions of all blocks to everyone
-    call mpi_allreduce(nDom, nDomTotal, 1, adflow_integer, MPI_SUM, &
-         adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    ! Store the sizes of the local blocks
-    do nn=1,nDom
-       call setPointers(nn, 1, 1)
-
-       ! Store the 'l' sizes for transferring
-       localDim(1, nn) = il
-       localDim(2, nn) = jl
-       localDim(3, nn) = kl
-    end do
-
-    ! Allocate the space we need for the numbers and cumulative form
-    allocate(nDomProc(0:nProc-1), cumDomProc(0:nProc), dims(3, nDomTotal))
-
-    ! Receive the number of domains from each proc using an allgather.
-    call mpi_allgather(nDom, 1, adflow_integer, nDomProc, 1, adflow_integer, &
-         adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    ! Compute the cumulative format:
-    cumDomProc(0) = 0
-    do iProc=1, nProc
-       cumDomProc(iProc) = cumDomProc(iProc-1) + nDomProc(iProc-1)
-    end do
-
-    ! We will also allgather all of the block sizes which will make
-    ! things a little easier since everyone will know the proper sizes
-    ! for the sends
-    call mpi_allgatherV(localDim, nDom*3, adflow_integer, dims, 3*nDomProc, &
-         3*cumDomProc, adflow_integer, adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    maxSize = 0
-    do i=1,nDomTotal
-       maxSize = max(maxSize, dims(1, i)*dims(2,i)*dims(3,i)*3)
-    end do
-
-    allocate(buffer(maxSize))
-
-    if (myid == 0) then
-
-       ! Open the CGNS File
-       call cg_open_f(fileName, mode_write, cg, ier)
-       base = 1
-       call cg_base_write_f(cg, "Base#1", 3, 3, base, ier)
-
-       zoneCounter = 0
-       ! Write my own blocks first
-       do nn=1,nDom
-          call setPointers(nn, 1, 1)
-
-          sizes(1) = il
-          sizes(2) = jl
-          sizes(3) = kl
-          sizes(4) = nx
-          sizes(5) = ny
-          sizes(6) = nz
-          sizes(7) = 0
-          sizes(8) = 0
-          sizes(9) = 0
-
-          zoneCounter = zoneCounter + 1
-          write(zonename, zoneProcFormat) 'domain.', zoneCounter, 'proc.', myid
-
-          call cg_zone_write_f(cg, base, zonename, sizes, Structured, zoneID, ier)
-
-          allocate(xtmp(sizes(1), sizes(2), sizes(3), 3))
-
-          do k=1, kl
-             do j=1, jl
-                do i=1, il
-                   xtmp(i,j,k,1:3) = x(i,j,k,1:3)
+            ! Now we can allgather the xMin and xMax  from each
+            ! processor to everyone
+            call mpi_allgatherV(xMinLocal, nDom * 3, adflow_real, xMin, 3 * nDomProc, &
+                                3 * cumDomProc, adflow_real, adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            call mpi_allgatherV(xMaxLocal, nDom * 3, adflow_real, xMax, 3 * nDomProc, &
+                                3 * cumDomProc, adflow_real, adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+        end subroutine computeDomainBoundingBoxes
+
+        subroutine buildGLobalSparseOverlap(overlap)
+
+            use oversetData, only: clusters
+            implicit none
+
+            ! Input/Output
+            type(CSRMatrix), intent(inout) :: overlap
+
+            ! Working
+            integer(kind=intType), dimension(:), allocatable :: colIndLocal, rowPtrLocal
+            logical, dimension(:, :), allocatable :: localOverlap
+            integer(kind=intType) :: nnzLocal
+            integer(kind=intType), dimension(:), allocatable :: nnzProc, cumNNzProc
+
+            ! Allocate the space for my processor's compoent of iOverlap. The
+            ! number of rows is the number of blocks I own (in my own numbering)
+            ! and the coluns are the total number of blocks. It is initialized
+            ! to True, and remains that way unless we can conclusively prove the
+            ! blocks don't overlap.
+            allocate (localOverlap(nDom, nDomTotal))
+            localOverlap = .True.
+
+            ! Assume all rows are dense, and decrement when we eliminate an
+            ! entry
+            nnzLocal = nDom * nDomTotal
+
+            ! Bounding box check: We will check our *own* blocks against the
+            ! bounding boxes for for each domain.  This will fill up the
+            ! localOverlapArray and determine nnzLocal -- the number of
+            ! non-zero local entries in the global sparse matrix.
+
+            do nn = 1, nDom
+                iDom = cumDomProc(myid) + nn
+
+                ! Now Loop over *all* of the other blocks
+                do jDom = 1, nDomTotal
+
+                    ! We can eliminate some of pairs using the cluser
+                    ! analysis. Note that the cluster analysis will take care
+                    ! of making sure that a block doesn't interset itself. ie,
+                    ! the diagonal of the full matrix must be .False.
+
+                    if (clusters(iDom) == clusters(jDom)) then
+                        localOverlap(nn, jDom) = .False.
+                        nnzLocal = nnzLocal - 1
+                    end if
+
+                    ! Only do the spatial check if we haven't elminated the
+                    ! connection through the cluster check
+                    if (localOverlap(nn, jDom)) then
+
+                        ! Now do the box overlap check
+                        if ( &
+                            xMin(1, iDom) >= xMax(1, jDom) .or. &
+                            xMax(1, iDom) <= xMin(1, jDom) .or. &
+                            xMin(2, iDom) >= xMax(2, jDom) .or. &
+                            xMax(2, iDom) <= xMin(2, jDom) .or. &
+                            xMin(3, iDom) >= xMax(3, jDom) .or. &
+                            xMax(3, iDom) <= xMin(3, jDom)) then
+
+                            ! These bounding boxes do not intersect.
+                            localOverlap(nn, jDom) = .False.
+                            nnzLocal = nnzLocal - 1
+                        end if
+                    end if
+                end do
+            end do
+
+            ! Now, create a sparse matrix representation of the local part
+
+            allocate (colIndLocal(nnzLocal), rowPtrLocal(nDom + 1))
+            rowPtrLocal(1) = 1
+            i = 0
+            do nn = 1, nDom
+                do jDom = 1, nDomTotal
+                    if (localOverlap(nn, jDom)) then
+                        i = i + 1
+                        colIndLocal(i) = jDom
+                    end if
+                end do
+                rowPtrLocal(nn + 1) = i + 1
+            end do
+
+            ! Now we want to assemble the global (sparse!) connectivity
+            ! matrix for all processors. This is going to require a little
+            ! communication of sizes first followed by the actual data.
+
+            ! Determine distribution of non-zero locations
+            allocate (nnzProc(nProc), cumNNZProc(0:nProc))
+            call mpi_allgather(nnzLocal, 1, adflow_integer, nnzProc, 1, adflow_integer, &
+                               adflow_comm_world, ierr)
+
+            overlap%nnz = sum(nnzProc)
+            overlap%nRow = nDomTotal
+            overlap%nCol = nDomTotal
+            overlap%nnzLocal = nnzLocal
+            ! We can now know how big the global data will be
+            allocate ( &
+                overlap%data(overlap%nnz), &
+                overlap%colInd(overlap%nnz), &
+                overlap%rowPtr(overlap%nRow + 1), &
+                overlap%assignedProc(overlap%nnz))
+
+            overlap%allocated = .True.
+            cumNNZProc(0) = 0
+            do iProc = 1, nProc
+                cumNNZProc(iProc) = cumNNZProc(iProc - 1) + nnzProc(iProc)
+            end do
+
+            ! Correct for rowPtrLocal for the cumulative number of nnz up to
+            ! this proc:
+            rowPtrLocal = rowPtrLocal + cumNNZProc(myid)
+
+            ! Gather the column indicies
+            call mpi_allgatherV(colIndLocal, nnzLocal, adflow_integer, overlap%colInd, &
+                                nnzProc, cumNNZProc, adflow_integer, adflow_comm_world, ierr)
+
+            ! Now we gather the rowPtr to everone
+            overlap%rowPtr(1) = 1
+            call mpi_allgatherV(rowPtrLocal(2:nDom), nDom, adflow_integer, &
+                                overlap%rowPtr(2:nDomTotal + 1), &
+                                nDomProc, cumDomProc, adflow_integer, adflow_comm_world, ierr)
+
+            ! Initialize the assignedProc to the owned rows of a processor
+            do iProc = 0, nProc - 1
+                overlap%assignedProc(cumNNZProc(iProc) + 1:cumNNZProc(iProc + 1)) = iProc
+            end do
+
+            deallocate (colIndLocal, rowPtrLocal, localOverlap, cumNNzProc)
+
+        end subroutine buildGlobalSparseOverlap
+
+        subroutine doMyWork(flag)
+
+            ! This internal subroutine which may be called repeadly, performs
+            ! as many of the searches my processor is responsible for. It
+            ! returns true when all the work has been completed.
+
+            implicit none
+            logical, intent(out) :: flag
+            integer(kind=intType) :: iDom, jDom, jj
+
+            flag = .True.
+            do iWork = 1, nWork
+
+                iDom = work(1, iWork)
+                jDom = work(2, iWork)
+                jj = work(3, iWork) ! Index from the overlap matrix
+
+                ! Check if I have the oBlock and fringes i need to do this
+                ! intersection and I haven't already done it.
+                if (oBlockReady(iDom) .and. oFringeReady(jDom) .and. &
+                    work(4, iWork) == 0) then
+
+                    startTime = mpi_wtime()
+
+                    call fringeSearch(oBlocks(iDom), oFringes(jDom))
+                    endTime = mpi_wtime()
+                    overlap%data(jj) = endTime - startTime
+
+                    ! Flag this work as being done:
+                    work(4, iWork) = 1
+                end if
+
+                ! If any one is not done, flag is flipped to False.
+                if (work(4, iWork) /= 1) then
+                    flag = .False.
+                end if
+            end do
+
+        end subroutine doMyWork
+
+    end subroutine oversetComm
+
+    subroutine writePartitionedMesh(fileName)
+
+        ! This is a debugging routine for writing out meshes *as they are
+        ! partioned*. This can be useful for debugging overset issues. Only
+        ! the grid coordinates are written...these will have to be post
+        ! processed to get connectivity information if the grid is to be
+        ! used as input again.
+
+        use constants
+        use communication, only: adflow_comm_world, myID, nProc
+        use blockPointers, only: il, jl, kl, nx, ny, nz, x, nDom
+        use utils, only: EChk, setPointers
+        use su_cgns
+        implicit none
+
+        character(len=*), intent(in) :: fileName
+        integer(kind=intType) :: nDomTotal, iProc, nn, i, j, k, iDim, iDom, ierr, ii
+        integer(kind=intType) :: iii, jjj, kkk
+        integer(kind=intType) :: bufSize, maxSize, ibufSize, imaxSize
+        integer(kind=intType), dimension(3, nDom) :: localDim
+        integer(kind=intType), dimension(:), allocatable :: nDomProc, cumDomProc
+        integer(kind=intType), dimension(:, :), allocatable :: dims
+        real(kind=realType), dimension(:), allocatable :: buffer
+        real(kind=realType), dimension(:, :, :, :), allocatable :: xtmp
+        integer(kind=intType) :: ier, zoneCOunter, base, zoneID, coordID, cg, zone
+        integer(kind=cgsize_t) :: sizes(9)
+        integer(kind=intType) :: ifield, iSol
+        character(len=40) :: tmpStr, zoneName
+        character(len=32) :: coorNames(3)
+        integer mpiStatus(MPI_STATUS_SIZE)
+        character(len=maxStringLen) :: zoneProcFormat = "(A, I5.5, A, I3.3)"
+
+        coorNames(1) = "CoordinateX"
+        coorNames(2) = "CoordinateY"
+        coorNames(3) = "CoordinateZ"
+
+        call MPI_BARRIER(adflow_comm_world, ierr)
+
+        ! Gather the dimensions of all blocks to everyone
+        call mpi_allreduce(nDom, nDomTotal, 1, adflow_integer, MPI_SUM, &
+                           adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        ! Store the sizes of the local blocks
+        do nn = 1, nDom
+            call setPointers(nn, 1, 1)
+
+            ! Store the 'l' sizes for transferring
+            localDim(1, nn) = il
+            localDim(2, nn) = jl
+            localDim(3, nn) = kl
+        end do
+
+        ! Allocate the space we need for the numbers and cumulative form
+        allocate (nDomProc(0:nProc - 1), cumDomProc(0:nProc), dims(3, nDomTotal))
+
+        ! Receive the number of domains from each proc using an allgather.
+        call mpi_allgather(nDom, 1, adflow_integer, nDomProc, 1, adflow_integer, &
+                           adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        ! Compute the cumulative format:
+        cumDomProc(0) = 0
+        do iProc = 1, nProc
+            cumDomProc(iProc) = cumDomProc(iProc - 1) + nDomProc(iProc - 1)
+        end do
+
+        ! We will also allgather all of the block sizes which will make
+        ! things a little easier since everyone will know the proper sizes
+        ! for the sends
+        call mpi_allgatherV(localDim, nDom * 3, adflow_integer, dims, 3 * nDomProc, &
+                            3 * cumDomProc, adflow_integer, adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        maxSize = 0
+        do i = 1, nDomTotal
+            maxSize = max(maxSize, dims(1, i) * dims(2, i) * dims(3, i) * 3)
+        end do
+
+        allocate (buffer(maxSize))
+
+        if (myid == 0) then
+
+            ! Open the CGNS File
+            call cg_open_f(fileName, mode_write, cg, ier)
+            base = 1
+            call cg_base_write_f(cg, "Base#1", 3, 3, base, ier)
+
+            zoneCounter = 0
+            ! Write my own blocks first
+            do nn = 1, nDom
+                call setPointers(nn, 1, 1)
+
+                sizes(1) = il
+                sizes(2) = jl
+                sizes(3) = kl
+                sizes(4) = nx
+                sizes(5) = ny
+                sizes(6) = nz
+                sizes(7) = 0
+                sizes(8) = 0
+                sizes(9) = 0
+
+                zoneCounter = zoneCounter + 1
+                write (zonename, zoneProcFormat) 'domain.', zoneCounter, 'proc.', myid
+
+                call cg_zone_write_f(cg, base, zonename, sizes, Structured, zoneID, ier)
+
+                allocate (xtmp(sizes(1), sizes(2), sizes(3), 3))
+
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            xtmp(i, j, k, 1:3) = x(i, j, k, 1:3)
+                        end do
+                    end do
                 end do
-             end do
-          end do
-
-          do idim=1, 3
-             call cg_coord_write_f(cg, base, zoneID, realDouble, coorNames(idim), &
-                  xtmp(:, :, :, idim), coordID, ier)
-          end do
-          deallocate(xtmp)
-       end do
-
-       ! Now loop over the remaining blocks...receiving each and writing:
-
-       do iProc=1, nProc-1
-          do nn=1, nDomProc(iProc)
-             iDom = cumDomProc(iProc) + nn
-             bufSize = dims(1, iDom)*dims(2, iDom)*dims(3,iDom)*3
-
-             call MPI_Recv(buffer, bufSize, adflow_real, iProc, iProc, &
-                  adflow_comm_world, mpiStatus, ierr)
-
-             zoneCounter = zoneCounter + 1
-             write(zonename, zoneProcFormat) 'domain.', zoneCounter, 'proc.', iProc
-             sizes(1) = dims(1, iDom)
-             sizes(2) = dims(2, iDom)
-             sizes(3) = dims(3, iDom)
-             sizes(4) = dims(1, iDom)-1
-             sizes(5) = dims(2, iDom)-1
-             sizes(6) = dims(3, iDom)-1
-             sizes(7) = 0
-             sizes(8) = 0
-             sizes(9) = 0
-             call cg_zone_write_f(cg, base, zonename, sizes, Structured, zoneID, ier)
-             ii = 0
-             allocate(xtmp(sizes(1), sizes(2), sizes(3), 3))
-             do k=1, sizes(3)
-                do j=1, sizes(2)
-                   do i=1, sizes(1)
-                      xtmp(i,j,k,1) = buffer(ii+1)
-                      xtmp(i,j,k,2) = buffer(ii+2)
-                      xtmp(i,j,k,3) = buffer(ii+3)
-                      ii = ii + 3
-                   end do
+
+                do idim = 1, 3
+                    call cg_coord_write_f(cg, base, zoneID, realDouble, coorNames(idim), &
+                                          xtmp(:, :, :, idim), coordID, ier)
                 end do
-             end do
-
-             do idim=1, 3
-                call cg_coord_write_f(cg, base, zoneID, realDouble, coorNames(idim), &
-                     xtmp(:, :, :, idim), coordID, ier)
-             end do
-             deallocate(xtmp)
-          end do
-       end do
-    else
-       ! Pack and send my stuff:
-       do nn=1, nDom
-          call setPointers(nn, 1, 1)
-          ii = 0
-          do k=1, kl
-             do j=1, jl
-                do i=1, il
-                   do iDim=1,3
-                      buffer(ii+idim) =  x(i,j,k,iDim)
-                   end do
-                   ii = ii + 3
+                deallocate (xtmp)
+            end do
+
+            ! Now loop over the remaining blocks...receiving each and writing:
+
+            do iProc = 1, nProc - 1
+                do nn = 1, nDomProc(iProc)
+                    iDom = cumDomProc(iProc) + nn
+                    bufSize = dims(1, iDom) * dims(2, iDom) * dims(3, iDom) * 3
+
+                    call MPI_Recv(buffer, bufSize, adflow_real, iProc, iProc, &
+                                  adflow_comm_world, mpiStatus, ierr)
+
+                    zoneCounter = zoneCounter + 1
+                    write (zonename, zoneProcFormat) 'domain.', zoneCounter, 'proc.', iProc
+                    sizes(1) = dims(1, iDom)
+                    sizes(2) = dims(2, iDom)
+                    sizes(3) = dims(3, iDom)
+                    sizes(4) = dims(1, iDom) - 1
+                    sizes(5) = dims(2, iDom) - 1
+                    sizes(6) = dims(3, iDom) - 1
+                    sizes(7) = 0
+                    sizes(8) = 0
+                    sizes(9) = 0
+                    call cg_zone_write_f(cg, base, zonename, sizes, Structured, zoneID, ier)
+                    ii = 0
+                    allocate (xtmp(sizes(1), sizes(2), sizes(3), 3))
+                    do k = 1, sizes(3)
+                        do j = 1, sizes(2)
+                            do i = 1, sizes(1)
+                                xtmp(i, j, k, 1) = buffer(ii + 1)
+                                xtmp(i, j, k, 2) = buffer(ii + 2)
+                                xtmp(i, j, k, 3) = buffer(ii + 3)
+                                ii = ii + 3
+                            end do
+                        end do
+                    end do
+
+                    do idim = 1, 3
+                        call cg_coord_write_f(cg, base, zoneID, realDouble, coorNames(idim), &
+                                              xtmp(:, :, :, idim), coordID, ier)
+                    end do
+                    deallocate (xtmp)
                 end do
-             end do
-          end do
-
-          call mpi_send(buffer, ii, adflow_real, 0, myid, &
-               adflow_comm_world, ierr)
-       end do
-    end if
-
-    deallocate(buffer, nDomProc, cumDomProc, dims)
-
-  end subroutine writePartitionedMesh
-
-  ! Debugging routine for writing the dual grids along with the volume
-  ! in CGNS format to help debug.
-
-  subroutine writeDualMesh(fileName)
-
-
-    ! This is a debugging routine for writing out meshes *as they are
-    ! partioned*. This can be useful for debugging overset issues.
-    use constants
-    use communication, only : adflow_comm_world, myid, nProc
-    use blockPointers, only : ie, je, ke, il, jl, kl, x, globalCell, vol, &
-         nDom, iblank
-    use utils, only : setPointers, EChk
-    use su_cgns
-    implicit none
-
-    character(len=*), intent(in) :: fileName
-    integer(kind=intType) :: nDomTotal, iProc, nn, i, j, k, iDim, iDom, ierr, ii
-    integer(kind=intType) :: iii,jjj,kkk
-    integer(kind=intType) :: bufSize, maxSize, ibufSize, imaxSize
-    integer(kind=intType), dimension(3, nDom) :: localDim
-    integer(kind=intType), dimension(:), allocatable :: nDomProc, cumDomProc
-    integer(kind=intType), dimension(:, :), allocatable :: dims
-    real(kind=realType), dimension(:), allocatable :: buffer
-    real(kind=realType), dimension(:, :, :, :), allocatable :: xtmp
-    integer(kind=intType) :: ier, zoneCOunter, base, zoneID, coordID, cg, zone
-    integer(kind=cgsize_t) :: sizes(9)
-    integer(kind=intType) :: ifield, iSol
-    character(len=40) :: tmpStr, zoneName
-    character(len=32) :: coorNames(3)
-    integer mpiStatus(MPI_STATUS_SIZE)
-    character(len=maxStringLen) :: zoneFormat = "(A, I5.5)"
-
-    coorNames(1) = "CoordinateX"
-    coorNames(2) = "CoordinateY"
-    coorNames(3) = "CoordinateZ"
-
-    ! Gather the dimensions of all blocks to everyone
-    call mpi_allreduce(nDom, nDomTotal, 1, adflow_integer, MPI_SUM, &
-         adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    ! Store the sizes of the local blocks
-    do nn=1,nDom
-       call setPointers(nn, 1, 1)
-       localDim(1, nn) = ie
-       localDim(2, nn) = je
-       localDim(3, nn) = ke
-    end do
-
-    ! Allocate the space we need for the numbers and cumulative form
-    allocate(nDomProc(0:nProc-1), cumDomProc(0:nProc), dims(3, nDomTotal))
-
-    ! Receive the number of domains from each proc using an allgather.
-    call mpi_allgather(nDom, 1, adflow_integer, nDomProc, 1, adflow_integer, &
-         adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    ! Compute the cumulative format:
-    cumDomProc(0) = 0
-    do iProc=1, nProc
-       cumDomProc(iProc) = cumDomProc(iProc-1) + nDomProc(iProc-1)
-    end do
-
-    ! We will also allgather all of the block sizes which will make
-    ! things a little easier since everyone will know the proper sizes
-    ! for the sends
-    call mpi_allgatherV(localDim, nDom*3, adflow_integer, dims, 3*nDomProc, &
-         3*cumDomProc, adflow_integer, adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    maxSize = 0
-    do i=1,nDomTotal
-       maxSize = max(maxSize, dims(1, i)*dims(2,i)*dims(3,i)*5)
-    end do
-
-    allocate(buffer(maxSize))
-
-    if (myid == 0) then
-       call cg_open_f(fileName, mode_write, cg, ier)
-       base = 1
-       call cg_base_write_f(cg, "Base#1", 3, 3, base, ier)
-
-       zoneCounter = 0
-       ! Write my own blocks first
-       do nn=1,nDom
-          call setPointers(nn, 1, 1)
-
-          sizes(1) = ie
-          sizes(2) = je
-          sizes(3) = ke
-          sizes(4) = il
-          sizes(5) = jl
-          sizes(6) = kl
-          sizes(7) = 0
-          sizes(8) = 0
-          sizes(9) = 0
-
-          zoneCounter = zoneCounter + 1
-          write(zonename, zoneFormat) 'domain.', zoneCounter
-
-          call cg_zone_write_f(cg, base, zonename, sizes, Structured, zoneID, ier)
-
-          allocate(xtmp(sizes(1), sizes(2), sizes(3), 5))
-
-          do k=1, ke
-             do j=1, je
-                do i=1, ie
-                   xtmp(i,j,k,1:3) = eighth*(&
-                        x(i-1, j-1, k-1, :) + &
-                        x(i  , j-1, k-1, :) + &
-                        x(i-1, j  , k-1, :) + &
-                        x(i  , j  , k-1, :) + &
-                        x(i-1, j-1, k  , :) + &
-                        x(i  , j-1, k  , :) + &
-                        x(i-1, j  , k  , :) + &
-                        x(i  , j  , k  , :))
-                   xtmp(i,j,k,4) = vol(i,j,k)
-                   if (globalCell(i,j,k) >=0)  then
-                      xtmp(i,j,k,5) = dble(iblank(i,j,k))
-                   else
-                      xtmp(i,j,k,5) = zero
-                   end if
+            end do
+        else
+            ! Pack and send my stuff:
+            do nn = 1, nDom
+                call setPointers(nn, 1, 1)
+                ii = 0
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            do iDim = 1, 3
+                                buffer(ii + idim) = x(i, j, k, iDim)
+                            end do
+                            ii = ii + 3
+                        end do
+                    end do
                 end do
-             end do
-          end do
-
-          do idim=1, 3
-             call cg_coord_write_f(cg, base, zoneID, realDouble, coorNames(idim), &
-                  xtmp(:, :, :, idim), coordID, ier)
-          end do
-
-          call cg_sol_write_f(cg, base, zoneID, "flowSolution", Vertex, iSol, ier)
-
-          call cg_field_write_f(cg, base, zoneID, iSol, realDouble, "volume", &
-               xtmp(:, :, :, 4), iField, ier)
-
-          call cg_field_write_f(cg, base, zoneID, iSol, realDouble, "iBlank", &
-               xtmp(:, :, :, 5), iField, ier)
-
-          deallocate(xtmp)
-       end do
-
-       ! Now loop over the remaining blocks...receiving each and writing:
-
-       do iProc=1, nProc-1
-          do nn=1, nDomProc(iProc)
-             iDom = cumDomProc(iProc) + nn
-             bufSize = dims(1, iDom)*dims(2, iDom)*dims(3,iDom)*5
-
-             call MPI_Recv(buffer, bufSize, adflow_real, iProc, iProc, &
-                  adflow_comm_world, mpiStatus, ierr)
-
-             zoneCounter = zoneCounter + 1
-             write(zonename, zoneFormat) 'domain.', zoneCounter
-             sizes(1) = dims(1, iDom)
-             sizes(2) = dims(2, iDom)
-             sizes(3) = dims(3, iDom)
-             sizes(4) = dims(1, iDom)-1
-             sizes(5) = dims(2, iDom)-1
-             sizes(6) = dims(3, iDom)-1
-             sizes(7) = 0
-             sizes(8) = 0
-             sizes(9) = 0
-             call cg_zone_write_f(cg, base, zonename, sizes, Structured, zoneID, ier)
-             ii = 0
-             allocate(xtmp(sizes(1), sizes(2), sizes(3), 5))
-             do k=1, sizes(3)
-                do j=1, sizes(2)
-                   do i=1, sizes(1)
-                      xtmp(i,j,k,1) = buffer(ii+1)
-                      xtmp(i,j,k,2) = buffer(ii+2)
-                      xtmp(i,j,k,3) = buffer(ii+3)
-                      xtmp(i,j,k,4) = buffer(ii+4)
-                      xtmp(i,j,k,5) = buffer(ii+5)
-                      ii = ii + 5
-                   end do
+
+                call mpi_send(buffer, ii, adflow_real, 0, myid, &
+                              adflow_comm_world, ierr)
+            end do
+        end if
+
+        deallocate (buffer, nDomProc, cumDomProc, dims)
+
+    end subroutine writePartitionedMesh
+
+    ! Debugging routine for writing the dual grids along with the volume
+    ! in CGNS format to help debug.
+
+    subroutine writeDualMesh(fileName)
+
+        ! This is a debugging routine for writing out meshes *as they are
+        ! partioned*. This can be useful for debugging overset issues.
+        use constants
+        use communication, only: adflow_comm_world, myid, nProc
+        use blockPointers, only: ie, je, ke, il, jl, kl, x, globalCell, vol, &
+                                 nDom, iblank
+        use utils, only: setPointers, EChk
+        use su_cgns
+        implicit none
+
+        character(len=*), intent(in) :: fileName
+        integer(kind=intType) :: nDomTotal, iProc, nn, i, j, k, iDim, iDom, ierr, ii
+        integer(kind=intType) :: iii, jjj, kkk
+        integer(kind=intType) :: bufSize, maxSize, ibufSize, imaxSize
+        integer(kind=intType), dimension(3, nDom) :: localDim
+        integer(kind=intType), dimension(:), allocatable :: nDomProc, cumDomProc
+        integer(kind=intType), dimension(:, :), allocatable :: dims
+        real(kind=realType), dimension(:), allocatable :: buffer
+        real(kind=realType), dimension(:, :, :, :), allocatable :: xtmp
+        integer(kind=intType) :: ier, zoneCOunter, base, zoneID, coordID, cg, zone
+        integer(kind=cgsize_t) :: sizes(9)
+        integer(kind=intType) :: ifield, iSol
+        character(len=40) :: tmpStr, zoneName
+        character(len=32) :: coorNames(3)
+        integer mpiStatus(MPI_STATUS_SIZE)
+        character(len=maxStringLen) :: zoneFormat = "(A, I5.5)"
+
+        coorNames(1) = "CoordinateX"
+        coorNames(2) = "CoordinateY"
+        coorNames(3) = "CoordinateZ"
+
+        ! Gather the dimensions of all blocks to everyone
+        call mpi_allreduce(nDom, nDomTotal, 1, adflow_integer, MPI_SUM, &
+                           adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        ! Store the sizes of the local blocks
+        do nn = 1, nDom
+            call setPointers(nn, 1, 1)
+            localDim(1, nn) = ie
+            localDim(2, nn) = je
+            localDim(3, nn) = ke
+        end do
+
+        ! Allocate the space we need for the numbers and cumulative form
+        allocate (nDomProc(0:nProc - 1), cumDomProc(0:nProc), dims(3, nDomTotal))
+
+        ! Receive the number of domains from each proc using an allgather.
+        call mpi_allgather(nDom, 1, adflow_integer, nDomProc, 1, adflow_integer, &
+                           adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        ! Compute the cumulative format:
+        cumDomProc(0) = 0
+        do iProc = 1, nProc
+            cumDomProc(iProc) = cumDomProc(iProc - 1) + nDomProc(iProc - 1)
+        end do
+
+        ! We will also allgather all of the block sizes which will make
+        ! things a little easier since everyone will know the proper sizes
+        ! for the sends
+        call mpi_allgatherV(localDim, nDom * 3, adflow_integer, dims, 3 * nDomProc, &
+                            3 * cumDomProc, adflow_integer, adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        maxSize = 0
+        do i = 1, nDomTotal
+            maxSize = max(maxSize, dims(1, i) * dims(2, i) * dims(3, i) * 5)
+        end do
+
+        allocate (buffer(maxSize))
+
+        if (myid == 0) then
+            call cg_open_f(fileName, mode_write, cg, ier)
+            base = 1
+            call cg_base_write_f(cg, "Base#1", 3, 3, base, ier)
+
+            zoneCounter = 0
+            ! Write my own blocks first
+            do nn = 1, nDom
+                call setPointers(nn, 1, 1)
+
+                sizes(1) = ie
+                sizes(2) = je
+                sizes(3) = ke
+                sizes(4) = il
+                sizes(5) = jl
+                sizes(6) = kl
+                sizes(7) = 0
+                sizes(8) = 0
+                sizes(9) = 0
+
+                zoneCounter = zoneCounter + 1
+                write (zonename, zoneFormat) 'domain.', zoneCounter
+
+                call cg_zone_write_f(cg, base, zonename, sizes, Structured, zoneID, ier)
+
+                allocate (xtmp(sizes(1), sizes(2), sizes(3), 5))
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            xtmp(i, j, k, 1:3) = eighth * ( &
+                                                 x(i - 1, j - 1, k - 1, :) + &
+                                                 x(i, j - 1, k - 1, :) + &
+                                                 x(i - 1, j, k - 1, :) + &
+                                                 x(i, j, k - 1, :) + &
+                                                 x(i - 1, j - 1, k, :) + &
+                                                 x(i, j - 1, k, :) + &
+                                                 x(i - 1, j, k, :) + &
+                                                 x(i, j, k, :))
+                            xtmp(i, j, k, 4) = vol(i, j, k)
+                            if (globalCell(i, j, k) >= 0) then
+                                xtmp(i, j, k, 5) = dble(iblank(i, j, k))
+                            else
+                                xtmp(i, j, k, 5) = zero
+                            end if
+                        end do
+                    end do
                 end do
-             end do
-
-             do idim=1, 3
-                call cg_coord_write_f(cg, base, zoneID, realDouble, coorNames(idim), &
-                     xtmp(:, :, :, idim), coordID, ier)
-             end do
-
-             call cg_sol_write_f(cg, base, zoneID, "flowSolution", Vertex, iSol, ier)
-             call cg_field_write_f(cg, base, zoneID, iSol, realDouble, "volume", &
-                  xtmp(:, :, :, 4), iField, ier)
-             call cg_field_write_f(cg, base, zoneID, iSol, realDouble, "iBlank", &
-                  xtmp(:, :, :, 5), iField, ier)
-
-             deallocate(xtmp)
-          end do
-       end do
-
-    else
-
-       ! Pack and send my stuff:
-       do nn=1, nDom
-          call setPointers(nn, 1, 1)
-          ii = 0
-          do k=1, ke
-             do j=1, je
-                do i=1, ie
-                   do iDim=1,3
-                      buffer(ii+idim) =  eighth*(&
-                           x(i-1, j-1, k-1, idim) + &
-                           x(i  , j-1, k-1, idim) + &
-                           x(i-1, j  , k-1, idim) + &
-                           x(i  , j  , k-1, idim) + &
-                           x(i-1, j-1, k  , idim) + &
-                           x(i  , j-1, k  , idim) + &
-                           x(i-1, j  , k  , idim) + &
-                           x(i  , j  , k  , idim))
-                   end do
-                   buffer(ii+4) = vol(i,j,k)
-                   if (globalCell(i,j,k) >0) then
-                      buffer(ii+5) = dble(iblank(i,j,k))
-                   else
-                      buffer(ii+5) = zero
-                   end if
-
-                   ii = ii + 5
+
+                do idim = 1, 3
+                    call cg_coord_write_f(cg, base, zoneID, realDouble, coorNames(idim), &
+                                          xtmp(:, :, :, idim), coordID, ier)
                 end do
-             end do
-          end do
 
-          call mpi_send(buffer, ii, adflow_real, 0, myid, &
-               adflow_comm_world, ierr)
-       end do
-    end if
+                call cg_sol_write_f(cg, base, zoneID, "flowSolution", Vertex, iSol, ier)
+
+                call cg_field_write_f(cg, base, zoneID, iSol, realDouble, "volume", &
+                                      xtmp(:, :, :, 4), iField, ier)
+
+                call cg_field_write_f(cg, base, zoneID, iSol, realDouble, "iBlank", &
+                                      xtmp(:, :, :, 5), iField, ier)
+
+                deallocate (xtmp)
+            end do
+
+            ! Now loop over the remaining blocks...receiving each and writing:
+
+            do iProc = 1, nProc - 1
+                do nn = 1, nDomProc(iProc)
+                    iDom = cumDomProc(iProc) + nn
+                    bufSize = dims(1, iDom) * dims(2, iDom) * dims(3, iDom) * 5
+
+                    call MPI_Recv(buffer, bufSize, adflow_real, iProc, iProc, &
+                                  adflow_comm_world, mpiStatus, ierr)
+
+                    zoneCounter = zoneCounter + 1
+                    write (zonename, zoneFormat) 'domain.', zoneCounter
+                    sizes(1) = dims(1, iDom)
+                    sizes(2) = dims(2, iDom)
+                    sizes(3) = dims(3, iDom)
+                    sizes(4) = dims(1, iDom) - 1
+                    sizes(5) = dims(2, iDom) - 1
+                    sizes(6) = dims(3, iDom) - 1
+                    sizes(7) = 0
+                    sizes(8) = 0
+                    sizes(9) = 0
+                    call cg_zone_write_f(cg, base, zonename, sizes, Structured, zoneID, ier)
+                    ii = 0
+                    allocate (xtmp(sizes(1), sizes(2), sizes(3), 5))
+                    do k = 1, sizes(3)
+                        do j = 1, sizes(2)
+                            do i = 1, sizes(1)
+                                xtmp(i, j, k, 1) = buffer(ii + 1)
+                                xtmp(i, j, k, 2) = buffer(ii + 2)
+                                xtmp(i, j, k, 3) = buffer(ii + 3)
+                                xtmp(i, j, k, 4) = buffer(ii + 4)
+                                xtmp(i, j, k, 5) = buffer(ii + 5)
+                                ii = ii + 5
+                            end do
+                        end do
+                    end do
+
+                    do idim = 1, 3
+                        call cg_coord_write_f(cg, base, zoneID, realDouble, coorNames(idim), &
+                                              xtmp(:, :, :, idim), coordID, ier)
+                    end do
+
+                    call cg_sol_write_f(cg, base, zoneID, "flowSolution", Vertex, iSol, ier)
+                    call cg_field_write_f(cg, base, zoneID, iSol, realDouble, "volume", &
+                                          xtmp(:, :, :, 4), iField, ier)
+                    call cg_field_write_f(cg, base, zoneID, iSol, realDouble, "iBlank", &
+                                          xtmp(:, :, :, 5), iField, ier)
+
+                    deallocate (xtmp)
+                end do
+            end do
 
-    deallocate(buffer, nDomProc, cumDomProc, dims)
+        else
+
+            ! Pack and send my stuff:
+            do nn = 1, nDom
+                call setPointers(nn, 1, 1)
+                ii = 0
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            do iDim = 1, 3
+                                buffer(ii + idim) = eighth * ( &
+                                                    x(i - 1, j - 1, k - 1, idim) + &
+                                                    x(i, j - 1, k - 1, idim) + &
+                                                    x(i - 1, j, k - 1, idim) + &
+                                                    x(i, j, k - 1, idim) + &
+                                                    x(i - 1, j - 1, k, idim) + &
+                                                    x(i, j - 1, k, idim) + &
+                                                    x(i - 1, j, k, idim) + &
+                                                    x(i, j, k, idim))
+                            end do
+                            buffer(ii + 4) = vol(i, j, k)
+                            if (globalCell(i, j, k) > 0) then
+                                buffer(ii + 5) = dble(iblank(i, j, k))
+                            else
+                                buffer(ii + 5) = zero
+                            end if
 
+                            ii = ii + 5
+                        end do
+                    end do
+                end do
 
-  end subroutine writeDualMesh
+                call mpi_send(buffer, ii, adflow_real, 0, myid, &
+                              adflow_comm_world, ierr)
+            end do
+        end if
 
+        deallocate (buffer, nDomProc, cumDomProc, dims)
 
-  !
-  !       determineClusters determines which blocks are connected with
-  !       1to1 cgns connections. Essentially what we are doing is
-  !       identifying the consitutive multiblock meshes that make up
-  !       an overset mesh. There should be precisely 1 cluster for a
-  !       face mached mesh and 2 or more for a overset mesh
+    end subroutine writeDualMesh
 
-  subroutine determineClusters()
+    !
+    !       determineClusters determines which blocks are connected with
+    !       1to1 cgns connections. Essentially what we are doing is
+    !       identifying the consitutive multiblock meshes that make up
+    !       an overset mesh. There should be precisely 1 cluster for a
+    !       face mached mesh and 2 or more for a overset mesh
 
-    use constants
-    use blockPointers, only : nDom, flowDoms
-    use cgnsGrid, only : CGNSDoms, cgnsNDom
-    use communication, only : adflow_comm_world, myID
-    use oversetData, only :clusters, nDomTotal, nClusters, cumDomProc
-    implicit none
+    subroutine determineClusters()
 
-    ! Working variables
-    integer(kind=intType) :: numBlocks, blockID, cgnsBlk, ierr, clusterID
-    integer(kind=intType) :: i, nn
-    integer(kind=intType), dimension(nDomTotal) :: clustersLocal
-    logical :: blocksAvailable
+        use constants
+        use blockPointers, only: nDom, flowDoms
+        use cgnsGrid, only: CGNSDoms, cgnsNDom
+        use communication, only: adflow_comm_world, myID
+        use oversetData, only: clusters, nDomTotal, nClusters, cumDomProc
+        implicit none
 
-    ! Initialize the cluster of each of the CGNSDoms to 0
-    do i=1, cgnsNDom
-       cgnsDoms(i)%cluster = 0
-    end do
+        ! Working variables
+        integer(kind=intType) :: numBlocks, blockID, cgnsBlk, ierr, clusterID
+        integer(kind=intType) :: i, nn
+        integer(kind=intType), dimension(nDomTotal) :: clustersLocal
+        logical :: blocksAvailable
 
-    ! Allocate clusters (defined in overset)
-    allocate(clusters(nDomTotal))
+        ! Initialize the cluster of each of the CGNSDoms to 0
+        do i = 1, cgnsNDom
+            cgnsDoms(i)%cluster = 0
+        end do
 
-    ! Initialize cluster counter
-    clusterID = 0
+        ! Allocate clusters (defined in overset)
+        allocate (clusters(nDomTotal))
 
-    ! Initialize counter of classified blocks
-    blockID = 0
+        ! Initialize cluster counter
+        clusterID = 0
 
-    ! Initialize variable to state that we have unclassified blocks
-    blocksAvailable = .True.
+        ! Initialize counter of classified blocks
+        blockID = 0
 
-    ! Loop until all blocks are checked
-    do while (blocksAvailable)
+        ! Initialize variable to state that we have unclassified blocks
+        blocksAvailable = .True.
 
-       ! Find position of the available block
-       blocksAvailable = .false.
+        ! Loop until all blocks are checked
+        do while (blocksAvailable)
 
-       do while ((.not. blocksAvailable) .and. (blockID .lt. cgnsnDom))
-          blockID = blockID + 1 ! Increment counter
-          if (cgnsDoms(blockID)%cluster == 0) then
-             blocksAvailable = .true.
-          end if
-       end do
+            ! Find position of the available block
+            blocksAvailable = .false.
 
-       ! If we have blocks available, we start the search
-       if (blocksAvailable) then
-          clusterID = clusterID + 1 ! Increment the running cluser counter
-          cgnsDoms(blockID)%cluster = clusterID
-          call clusterSearch(blockID)
-       end if
+            do while ((.not. blocksAvailable) .and. (blockID .lt. cgnsnDom))
+                blockID = blockID + 1 ! Increment counter
+                if (cgnsDoms(blockID)%cluster == 0) then
+                    blocksAvailable = .true.
+                end if
+            end do
 
-    end do
+            ! If we have blocks available, we start the search
+            if (blocksAvailable) then
+                clusterID = clusterID + 1 ! Increment the running cluser counter
+                cgnsDoms(blockID)%cluster = clusterID
+                call clusterSearch(blockID)
+            end if
 
-    ! Set the clusters to 0 so we can just all reduce
-    clustersLocal = 0
+        end do
 
-    ! Set the cluster ID for all my blocks:
-    do nn=1,nDom
-       cgnsBlk = flowDoms(nn, 1, 1)%cgnsBlockID
-       clustersLocal(cumDomProc(myid) + nn) = cgnsDoms(cgnsBlk)%cluster
-    end do
-    call MPI_Allreduce(clustersLocal, clusters, nDomTotal, adflow_integer, MPI_SUM, &
-         adflow_comm_world, ierr)
+        ! Set the clusters to 0 so we can just all reduce
+        clustersLocal = 0
 
-    ! Finally, set the total number of clusters
-    nClusters = clusterID
+        ! Set the cluster ID for all my blocks:
+        do nn = 1, nDom
+            cgnsBlk = flowDoms(nn, 1, 1)%cgnsBlockID
+            clustersLocal(cumDomProc(myid) + nn) = cgnsDoms(cgnsBlk)%cluster
+        end do
+        call MPI_Allreduce(clustersLocal, clusters, nDomTotal, adflow_integer, MPI_SUM, &
+                           adflow_comm_world, ierr)
 
-  contains
+        ! Finally, set the total number of clusters
+        nClusters = clusterID
 
-    recursive subroutine clusterSearch(blockID)
+    contains
 
-      ! This is the recursive part of cluster search
-      implicit none
+        recursive subroutine clusterSearch(blockID)
 
-      ! Subroutine inputs
-      integer(kind=intType), intent(in) :: blockID
+            ! This is the recursive part of cluster search
+            implicit none
 
-      ! Working variables
-      integer(kind=intTYpe) :: clusterID, connID, connBlock
+            ! Subroutine inputs
+            integer(kind=intType), intent(in) :: blockID
 
-      ! Get the cluster ID from the reference block
-      clusterID = cgnsDoms(blockID)%cluster
+            ! Working variables
+            integer(kind=intTYpe) :: clusterID, connID, connBlock
 
-      ! Loop over all connections of this block
-      do connID = 1, cgnsDoms(blockID)%n1to1
+            ! Get the cluster ID from the reference block
+            clusterID = cgnsDoms(blockID)%cluster
 
-         connBlock = cgnsDoms(blockID)%conn1to1(connID)%donorBlock
+            ! Loop over all connections of this block
+            do connID = 1, cgnsDoms(blockID)%n1to1
 
-         ! Check if connected block is already classified
-         if (cgnsDoms(connBlock)%cluster == 0) then
-            cgnsDoms(connBlock)%cluster = clusterID ! Assign block to the same cluster
-            call clusterSearch(connBlock) ! Start search on the new block
-
-         else if (cgnsDoms(connBlock)%cluster .ne. clusterID) then ! Check symmetry
-            print *,'Non-symmetric connection between CGNS blocks:', blockID, ' and', connBlock
-            stop
-         end if
-      end do
+                connBlock = cgnsDoms(blockID)%conn1to1(connID)%donorBlock
 
-    end subroutine clusterSearch
-  end subroutine determineClusters
-
-  subroutine determineViscousDirs()
-
-    ! Set the viscousDir flags in the CGNS grid based on the CGNS grid
-    ! boundary conditions
-    use constants
-    use cgnsGrid, only : CGNSDoms, cgnsNDom
-    use communication, only : myid
-    implicit none
+                ! Check if connected block is already classified
+                if (cgnsDoms(connBlock)%cluster == 0) then
+                    cgnsDoms(connBlock)%cluster = clusterID ! Assign block to the same cluster
+                    call clusterSearch(connBlock) ! Start search on the new block
 
-    ! Working variables
-    integer(kind=intType) :: i, j, bc
+                else if (cgnsDoms(connBlock)%cluster .ne. clusterID) then ! Check symmetry
+                    print *, 'Non-symmetric connection between CGNS blocks:', blockID, ' and', connBlock
+                    stop
+                end if
+            end do
 
-    do i=1, cgnsNDom
-       do j=1, cgnsDoms(i)%nBocos
-          bc = cgnsDoms(i)%bocoInfo(j)%BCType
+        end subroutine clusterSearch
+    end subroutine determineClusters
 
-          if (bc == NSWallAdiabatic .or. bc == NSWallIsoThermal .or. bc == EulerWall) then
+    subroutine determineViscousDirs()
 
-             if (cgnsDoms(i)%bocoInfo(j)%iBeg == cgnsDoms(i)%bocoInfo(j)%iEnd) &
-                  cgnsDoms(i)%viscousDir(1) = .True.
+        ! Set the viscousDir flags in the CGNS grid based on the CGNS grid
+        ! boundary conditions
+        use constants
+        use cgnsGrid, only: CGNSDoms, cgnsNDom
+        use communication, only: myid
+        implicit none
 
-             if (cgnsDoms(i)%bocoInfo(j)%jBeg == cgnsDoms(i)%bocoInfo(j)%jEnd) &
-                  cgnsDoms(i)%viscousDir(2) = .True.
+        ! Working variables
+        integer(kind=intType) :: i, j, bc
 
-             if (cgnsDoms(i)%bocoInfo(j)%kBeg == cgnsDoms(i)%bocoInfo(j)%kEnd) &
-                  cgnsDoms(i)%viscousDir(3) = .True.
-          end if
-       end do
-    end do
-  end subroutine determineViscousDirs
-
-  subroutine setExplicitHoleCut(flag)
-
-    ! This is meant to be the gateway for doing any explict hole
-    ! cutting. Right now we have a call-back approach
-
-    use constants
-    use adjointVars, only : nCellsGlobal
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use blockPointers, only : nDom, il, jl, kl, x, iBlank, flowDoms
-    use utils, only : setPointers
-    use communication, only : commPatternCell_2nd, internalCell_2nd
-    use haloExchange, only : whalo1to1IntGeneric
-    use adjointvars, only : nCellsLocal
-    implicit none
+        do i = 1, cgnsNDom
+            do j = 1, cgnsDoms(i)%nBocos
+                bc = cgnsDoms(i)%bocoInfo(j)%BCType
 
-    ! Input/output
-    integer(kind=intType), dimension(:), intent(in) :: flag
+                if (bc == NSWallAdiabatic .or. bc == NSWallIsoThermal .or. bc == EulerWall) then
 
-    ! Working
-    integer(kind=intType) ::i, j, k, ii, nn, sps, level
+                    if (cgnsDoms(i)%bocoInfo(j)%iBeg == cgnsDoms(i)%bocoInfo(j)%iEnd) &
+                        cgnsDoms(i)%viscousDir(1) = .True.
 
-    level = 1
-    ! Set iblank to -4 if the flag is true:
-    ii = 0
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, 1, sps)
+                    if (cgnsDoms(i)%bocoInfo(j)%jBeg == cgnsDoms(i)%bocoInfo(j)%jEnd) &
+                        cgnsDoms(i)%viscousDir(2) = .True.
 
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                   ii = ii + 1
-                   if (flag(ii) /= 0) then
-                      iblank(i,j,k) = -4
-                   end if
+                    if (cgnsDoms(i)%bocoInfo(j)%kBeg == cgnsDoms(i)%bocoInfo(j)%kEnd) &
+                        cgnsDoms(i)%viscousDir(3) = .True.
+                end if
+            end do
+        end do
+    end subroutine determineViscousDirs
+
+    subroutine setExplicitHoleCut(flag)
+
+        ! This is meant to be the gateway for doing any explict hole
+        ! cutting. Right now we have a call-back approach
+
+        use constants
+        use adjointVars, only: nCellsGlobal
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use blockPointers, only: nDom, il, jl, kl, x, iBlank, flowDoms
+        use utils, only: setPointers
+        use communication, only: commPatternCell_2nd, internalCell_2nd
+        use haloExchange, only: whalo1to1IntGeneric
+        use adjointvars, only: nCellsLocal
+        implicit none
+
+        ! Input/output
+        integer(kind=intType), dimension(:), intent(in) :: flag
+
+        ! Working
+        integer(kind=intType) :: i, j, k, ii, nn, sps, level
+
+        level = 1
+        ! Set iblank to -4 if the flag is true:
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1, sps)
+
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            ii = ii + 1
+                            if (flag(ii) /= 0) then
+                                iblank(i, j, k) = -4
+                            end if
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-
-    do sps=1, nTimeIntervalsSpectral
-       ! Exchange iblanks
-       domainLoop:do nn=1, nDom
-          flowDoms(nn, level, sps)%intCommVars(1)%var => &
-               flowDoms(nn, level, sps)%iblank(:, :, :)
-       end do domainLoop
-
-       ! Run the generic integer exchange
-       call wHalo1to1IntGeneric(1, level, sps, commPatternCell_2nd, internalCell_2nd)
-    end do
-
-  end subroutine setExplicitHoleCut
-
-  subroutine flagCellsInSurface(pts, npts, conn, nconn, flag, ncell, blockids, nblocks, k_min)
-
-    use constants
-    use communication, only : myID
-    use adtBuild, only : buildSerialQuad, destroySerialQuad
-    use adtLocalSearch, only : minDistanceTreeSearchSinglePoint
-    use ADTUtils, only : stack
-    use ADTData
-    use blockPointers, only : x, il, jl, kl, nDom, iBlank, vol, nbkGlobal, kBegOr
-    use adjointVars, only : nCellsLocal
-    use utils, only : setPointers, EChk
-    use sorting, only : famInList
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: npts, nconn, ncell, nblocks
-    real(kind=realType), intent(in), dimension(3,npts) :: pts
-    integer(kind=intType), intent(in), dimension(4,nconn) :: conn
-    integer(kind=intType), intent(inout), dimension(ncell) :: flag
-    integer(kind=intType), intent(in), dimension(nblocks) :: blockids
-    integer(kind=intType), intent(in) :: k_min
-
-    ! Working variables
-    integer(kind=intType) :: i, j, k, l, nn, iDim, cellID, intInfo(3), sps, level, iii, ierr
-    integer(kind=intType) :: iblock, cell_counter, k_cgns
-    real(kind=realType) :: dStar, frac, volLocal
-    real(kind=realType), dimension(3) :: minX, maxX, v1, v2, v3, axisVec
-    real(kind=realType), dimension(3) :: diag1, diag2, diag3, diag4
-    real(kind=realType) :: dd1, dd2, dd3, dd4, diag_max
-    type(adtType) :: ADT
-    real(kind=realType), dimension(:, :), allocatable :: norm
-    integer(kind=intType), dimension(:), allocatable :: normCount
-    integer(kind=intType), dimension(:, :), pointer :: tmp
-
-    ! ADT Type required data
-    integer(kind=intType), dimension(:), pointer :: frontLeaves, frontLeavesNew
-    type(adtBBoxTargetType), dimension(:), pointer :: BB
-    real(kind=realType) :: coor(4), uvw(5)
-    real(kind=realType) :: dummy(3, 2)
-
-    ! we use the same approach as the actuator zone addition; we project the cell centers to the
-    ! surface provided. This way, we can fairly quickly determine the coordinates
-    ! that are inside the closed volumes. The surface mesh is duplicated on all procs, whereas
-    ! the nodes projected are distributed based on the grid paritioning.
-
-    ! Since this is effectively a wall-distance calc it gets super
-    ! costly for the points far away. Luckly, we can do a fairly
-    ! simple shortcut: Just compute the bounding box of the region and
-    ! use that as the "already found" distance in the closest point
-    ! search. This will eliminate all the points further away
-    ! immediately and this should be sufficiently fast.
-
-    ! So...compute that bounding box:
-    do iDim=1,3
-       minX(iDim) = minval(pts(iDim, :))
-       maxX(iDim) = maxval(pts(iDim, :))
-    end do
-
-    ! Get the max distance. This should be quite conservative.
-    dStar = (maxX(1)-minx(1))**2 + (maxX(2)-minX(2))**2  + (maxX(3)-minX(3))**2
-
-    ! Now build the tree.
-    call buildSerialQuad(size(conn, 2), size(pts, 2), pts, conn, ADT)
-
-    ! Compute the (averaged) unique nodal vectors:
-    allocate(norm(3, size(pts, 2)), normCount(size(pts, 2)))
-
-    norm = zero
-    normCount = 0
-
-    do i=1, size(conn, 2)
-
-       ! Compute cross product normal and normalize
-       v1 = pts(:, conn(3, i)) -  pts(:, conn(1, i))
-       v2 = pts(:, conn(4, i)) -  pts(:, conn(2, i))
-
-       v3(1) = (v1(2)*v2(3) - v1(3)*v2(2))
-       v3(2) = (v1(3)*v2(1) - v1(1)*v2(3))
-       v3(3) = (v1(1)*v2(2) - v1(2)*v2(1))
-       v3 = v3 / sqrt(v3(1)**2 + v3(2)**2 + v3(3)**2)
-
-       ! Add to each of the four pts and increment the number added
-       do j=1, 4
-          norm(:, conn(j, i)) = norm(:, conn(j, i)) + v3
-          normCount(conn(j, i)) = normCount(conn(j, i)) + 1
-       end do
-    end do
-
-    ! Now just divide by the norm count
-    do i=1, size(pts, 2)
-       norm(:, i) = norm(:, i) / normCount(i)
-    end do
-
-    ! Norm count is no longer needed
-    deallocate(normCount)
-
-    ! Allocate the extra data the tree search requires.
-    allocate(stack(100), BB(20), frontLeaves(25), frontLeavesNew(25))
-
-    ! Now search for all the coordinate. Note that We have explictly
-    ! set sps to 1 becuase it is only implemented for single grid.
-    sps = 1
-    level = 1
-    cell_counter = 1
-
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       ! only check this cell if it is within one of the block IDs we are asked to look at
-       ! here, we reuse the famInList function from sorting.F90. The reason is just having
-       ! if (any(blockids == nbkGlobal)) does not work with complexify; complexify changes the
-       ! == (or .eq.) to .ceq., which does not work with integers. The famInList is originally
-       ! written for surface integrations, but the same idea applies here.
-       if (famInList(nbkGlobal, blockids))then
-         do k=2, kl
-            do j=2, jl
-               do i=2, il
-
-                  ! get the k index of this cell in the cgns grid
-                  k_cgns = k + kBegOr - 2
-
-                  ! check if we are above the kmin range needed. if no kmin is provided, then the value
-                  ! defaults to -1 from python, so all points satisfy the check
-                  if (k_cgns .gt. k_min) then
-
-                     ! calculate the 4 diagonals of the cell
-                     diag1 = x(i-1,j-1,k-1,:) - x(i,j,k,:)
-                     diag2 = x(i,j-1,k-1,:) - x(i-1,j,k,:)
-                     diag3 = x(i,j,k-1,:) - x(i-1,j-1,k,:)
-                     diag4 = x(i-1,j,k-1,:) - x(i,j-1,k,:)
-
-                     dd1 = diag1(1)*diag1(1) + diag1(2)*diag1(2) + diag1(3)*diag1(3)
-                     dd2 = diag2(1)*diag2(1) + diag2(2)*diag2(2) + diag2(3)*diag2(3)
-                     dd3 = diag3(1)*diag3(1) + diag3(2)*diag3(2) + diag3(3)*diag3(3)
-                     dd4 = diag4(1)*diag4(1) + diag4(2)*diag4(2) + diag4(3)*diag4(3)
-
-                     ! get the max
-                     diag_max = max(dd1, dd2, dd3, dd4)
-                     ! if the projection is greater than this for any node, we stop testing the current point.
-                     ! we can work with squared distances for the sake of efficiency. the projection routine
-                     ! will also return the squared distance for the same reason.
-
-                     ! actually test each node
-                     do l=1, 8
-                        select case (l)
-                           case (1)
-                              coor(1:3) = x(i-1,j-1,k-1,:)
-                           case (2)
-                              coor(1:3) = x(i,  j-1,k-1,:)
-                           case (3)
-                              coor(1:3) = x(i,  j,  k-1,:)
-                           case (4)
-                              coor(1:3) = x(i-1,j,  k-1,:)
-                           case (5)
-                              coor(1:3) = x(i-1,j-1,k,  :)
-                           case (6)
-                              coor(1:3) = x(i,  j-1,k,  :)
-                           case (7)
-                              coor(1:3) = x(i,  j,  k,  :)
-                           case (8)
-                              coor(1:3) = x(i-1,j,  k,  :)
-                        end select
-
-                        ! reset the "closest point already found" variable.
-                        coor(4) = dStar
-                        intInfo(3) = 0
-                        call minDistancetreeSearchSinglePoint(ADT, coor, intInfo, &
-                              uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
-                        cellID = intInfo(3)
-                        if (cellID > 0) then
-                           ! Now check if this was successful or not:
-                           if (checkInside()) then
-                              ! Whoohoo! We are inside the region. Flag this cell
-                              flag(cell_counter) = 1
-                              exit
-                           else
-                              ! we are outside. now check if the projection distance is larger than
-                              ! the max diagonal. if so, we can quit early here.
-                              if (uvw(4) .gt. diag_max) then
-                                 ! projection is larger than our biggest diagonal.
-                                 ! other nodes wont be in the surface, so we can exit the cell early here
-                                 exit
-                              end if
-                           end if
-                        end if
-                     end do
-                  end if
+            end do
+        end do
+
+        do sps = 1, nTimeIntervalsSpectral
+            ! Exchange iblanks
+            domainLoop: do nn = 1, nDom
+                flowDoms(nn, level, sps)%intCommVars(1)%var => &
+                    flowDoms(nn, level, sps)%iblank(:, :, :)
+            end do domainLoop
+
+            ! Run the generic integer exchange
+            call wHalo1to1IntGeneric(1, level, sps, commPatternCell_2nd, internalCell_2nd)
+        end do
+
+    end subroutine setExplicitHoleCut
+
+    subroutine flagCellsInSurface(pts, npts, conn, nconn, flag, ncell, blockids, nblocks, k_min)
+
+        use constants
+        use communication, only: myID
+        use adtBuild, only: buildSerialQuad, destroySerialQuad
+        use adtLocalSearch, only: minDistanceTreeSearchSinglePoint
+        use ADTUtils, only: stack
+        use ADTData
+        use blockPointers, only: x, il, jl, kl, nDom, iBlank, vol, nbkGlobal, kBegOr
+        use adjointVars, only: nCellsLocal
+        use utils, only: setPointers, EChk
+        use sorting, only: famInList
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: npts, nconn, ncell, nblocks
+        real(kind=realType), intent(in), dimension(3, npts) :: pts
+        integer(kind=intType), intent(in), dimension(4, nconn) :: conn
+        integer(kind=intType), intent(inout), dimension(ncell) :: flag
+        integer(kind=intType), intent(in), dimension(nblocks) :: blockids
+        integer(kind=intType), intent(in) :: k_min
+
+        ! Working variables
+        integer(kind=intType) :: i, j, k, l, nn, iDim, cellID, intInfo(3), sps, level, iii, ierr
+        integer(kind=intType) :: iblock, cell_counter, k_cgns
+        real(kind=realType) :: dStar, frac, volLocal
+        real(kind=realType), dimension(3) :: minX, maxX, v1, v2, v3, axisVec
+        real(kind=realType), dimension(3) :: diag1, diag2, diag3, diag4
+        real(kind=realType) :: dd1, dd2, dd3, dd4, diag_max
+        type(adtType) :: ADT
+        real(kind=realType), dimension(:, :), allocatable :: norm
+        integer(kind=intType), dimension(:), allocatable :: normCount
+        integer(kind=intType), dimension(:, :), pointer :: tmp
+
+        ! ADT Type required data
+        integer(kind=intType), dimension(:), pointer :: frontLeaves, frontLeavesNew
+        type(adtBBoxTargetType), dimension(:), pointer :: BB
+        real(kind=realType) :: coor(4), uvw(5)
+        real(kind=realType) :: dummy(3, 2)
+
+        ! we use the same approach as the actuator zone addition; we project the cell centers to the
+        ! surface provided. This way, we can fairly quickly determine the coordinates
+        ! that are inside the closed volumes. The surface mesh is duplicated on all procs, whereas
+        ! the nodes projected are distributed based on the grid paritioning.
+
+        ! Since this is effectively a wall-distance calc it gets super
+        ! costly for the points far away. Luckly, we can do a fairly
+        ! simple shortcut: Just compute the bounding box of the region and
+        ! use that as the "already found" distance in the closest point
+        ! search. This will eliminate all the points further away
+        ! immediately and this should be sufficiently fast.
+
+        ! So...compute that bounding box:
+        do iDim = 1, 3
+            minX(iDim) = minval(pts(iDim, :))
+            maxX(iDim) = maxval(pts(iDim, :))
+        end do
+
+        ! Get the max distance. This should be quite conservative.
+        dStar = (maxX(1) - minx(1))**2 + (maxX(2) - minX(2))**2 + (maxX(3) - minX(3))**2
+
+        ! Now build the tree.
+        call buildSerialQuad(size(conn, 2), size(pts, 2), pts, conn, ADT)
+
+        ! Compute the (averaged) unique nodal vectors:
+        allocate (norm(3, size(pts, 2)), normCount(size(pts, 2)))
+
+        norm = zero
+        normCount = 0
+
+        do i = 1, size(conn, 2)
+
+            ! Compute cross product normal and normalize
+            v1 = pts(:, conn(3, i)) - pts(:, conn(1, i))
+            v2 = pts(:, conn(4, i)) - pts(:, conn(2, i))
+
+            v3(1) = (v1(2) * v2(3) - v1(3) * v2(2))
+            v3(2) = (v1(3) * v2(1) - v1(1) * v2(3))
+            v3(3) = (v1(1) * v2(2) - v1(2) * v2(1))
+            v3 = v3 / sqrt(v3(1)**2 + v3(2)**2 + v3(3)**2)
+
+            ! Add to each of the four pts and increment the number added
+            do j = 1, 4
+                norm(:, conn(j, i)) = norm(:, conn(j, i)) + v3
+                normCount(conn(j, i)) = normCount(conn(j, i)) + 1
+            end do
+        end do
+
+        ! Now just divide by the norm count
+        do i = 1, size(pts, 2)
+            norm(:, i) = norm(:, i) / normCount(i)
+        end do
+
+        ! Norm count is no longer needed
+        deallocate (normCount)
+
+        ! Allocate the extra data the tree search requires.
+        allocate (stack(100), BB(20), frontLeaves(25), frontLeavesNew(25))
+
+        ! Now search for all the coordinate. Note that We have explictly
+        ! set sps to 1 becuase it is only implemented for single grid.
+        sps = 1
+        level = 1
+        cell_counter = 1
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            ! only check this cell if it is within one of the block IDs we are asked to look at
+            ! here, we reuse the famInList function from sorting.F90. The reason is just having
+            ! if (any(blockids == nbkGlobal)) does not work with complexify; complexify changes the
+            ! == (or .eq.) to .ceq., which does not work with integers. The famInList is originally
+            ! written for surface integrations, but the same idea applies here.
+            if (famInList(nbkGlobal, blockids)) then
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+
+                            ! get the k index of this cell in the cgns grid
+                            k_cgns = k + kBegOr - 2
+
+                            ! check if we are above the kmin range needed. if no kmin is provided, then the value
+                            ! defaults to -1 from python, so all points satisfy the check
+                            if (k_cgns .gt. k_min) then
+
+                                ! calculate the 4 diagonals of the cell
+                                diag1 = x(i - 1, j - 1, k - 1, :) - x(i, j, k, :)
+                                diag2 = x(i, j - 1, k - 1, :) - x(i - 1, j, k, :)
+                                diag3 = x(i, j, k - 1, :) - x(i - 1, j - 1, k, :)
+                                diag4 = x(i - 1, j, k - 1, :) - x(i, j - 1, k, :)
+
+                                dd1 = diag1(1) * diag1(1) + diag1(2) * diag1(2) + diag1(3) * diag1(3)
+                                dd2 = diag2(1) * diag2(1) + diag2(2) * diag2(2) + diag2(3) * diag2(3)
+                                dd3 = diag3(1) * diag3(1) + diag3(2) * diag3(2) + diag3(3) * diag3(3)
+                                dd4 = diag4(1) * diag4(1) + diag4(2) * diag4(2) + diag4(3) * diag4(3)
+
+                                ! get the max
+                                diag_max = max(dd1, dd2, dd3, dd4)
+                                ! if the projection is greater than this for any node, we stop testing the current point.
+                                ! we can work with squared distances for the sake of efficiency. the projection routine
+                                ! will also return the squared distance for the same reason.
+
+                                ! actually test each node
+                                do l = 1, 8
+                                    select case (l)
+                                    case (1)
+                                        coor(1:3) = x(i - 1, j - 1, k - 1, :)
+                                    case (2)
+                                        coor(1:3) = x(i, j - 1, k - 1, :)
+                                    case (3)
+                                        coor(1:3) = x(i, j, k - 1, :)
+                                    case (4)
+                                        coor(1:3) = x(i - 1, j, k - 1, :)
+                                    case (5)
+                                        coor(1:3) = x(i - 1, j - 1, k, :)
+                                    case (6)
+                                        coor(1:3) = x(i, j - 1, k, :)
+                                    case (7)
+                                        coor(1:3) = x(i, j, k, :)
+                                    case (8)
+                                        coor(1:3) = x(i - 1, j, k, :)
+                                    end select
+
+                                    ! reset the "closest point already found" variable.
+                                    coor(4) = dStar
+                                    intInfo(3) = 0
+                                    call minDistancetreeSearchSinglePoint(ADT, coor, intInfo, &
+                                                                         uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
+                                    cellID = intInfo(3)
+                                    if (cellID > 0) then
+                                        ! Now check if this was successful or not:
+                                        if (checkInside()) then
+                                            ! Whoohoo! We are inside the region. Flag this cell
+                                            flag(cell_counter) = 1
+                                            exit
+                                        else
+                                            ! we are outside. now check if the projection distance is larger than
+                                            ! the max diagonal. if so, we can quit early here.
+                                            if (uvw(4) .gt. diag_max) then
+                                                ! projection is larger than our biggest diagonal.
+                                                ! other nodes wont be in the surface, so we can exit the cell early here
+                                                exit
+                                            end if
+                                        end if
+                                    end if
+                                end do
+                            end if
+
+                            cell_counter = cell_counter + 1
+                        end do
+                    end do
+                end do
+            else
+                ! we dont want to consider block, but we need to increment the counter
+                cell_counter = cell_counter + (kl - 1) * (jl - 1) * (il - 1)
+            end if
+        end do
+
+        ! Final memory cleanup
+        deallocate (stack, norm, frontLeaves, frontLeavesNew, BB)
+        call destroySerialQuad(ADT)
+
+    contains
+
+        function checkInside()
+
+            implicit none
+            logical :: checkInside
+            integer(kind=intType) :: jj
+            real(kind=realType) :: shp(4), xp(3), normal(3), v1(3), dp
 
-                  cell_counter = cell_counter + 1
-               end do
+            ! bi-linear shape functions (CCW ordering)
+            shp(1) = (one - uvw(1)) * (one - uvw(2))
+            shp(2) = (uvw(1)) * (one - uvw(2))
+            shp(3) = (uvw(1)) * (uvw(2))
+            shp(4) = (one - uvw(1)) * (uvw(2))
+
+            xp = zero
+            normal = zero
+            do jj = 1, 4
+                xp = xp + shp(jj) * pts(:, conn(jj, cellID))
+                normal = normal + shp(jj) * norm(:, conn(jj, cellID))
             end do
-         end do
-       else
-          ! we dont want to consider block, but we need to increment the counter
-          cell_counter = cell_counter + (kl - 1) * (jl - 1) * (il - 1)
-       end if
-    end do
-
-    ! Final memory cleanup
-    deallocate(stack, norm, frontLeaves, frontLeavesNew, BB)
-    call destroySerialQuad(ADT)
-
-  contains
-
-    function checkInside()
-
-      implicit none
-      logical :: checkInside
-      integer(kind=intType) :: jj
-      real(kind=realType) :: shp(4), xp(3), normal(3), v1(3), dp
-
-      ! bi-linear shape functions (CCW ordering)
-      shp(1) = (one-uvw(1))*(one-uvw(2))
-      shp(2) = (    uvw(1))*(one-uvw(2))
-      shp(3) = (    uvw(1))*(    uvw(2))
-      shp(4) = (one-uvw(1))*(    uvw(2))
-
-      xp = zero
-      normal = zero
-      do jj=1, 4
-         xp = xp + shp(jj)*pts(:, conn(jj, cellID))
-         normal = normal + shp(jj)*norm(:, conn(jj, cellID))
-      end do
-
-      ! Compute the dot product of normal with cell center
-      ! (stored in coor) with the point on the surface.
-      v1 = coor(1:3) - xp
-      dp = normal(1)*v1(1) + normal(2)*v1(2) + normal(3)*v1(3)
-
-      if (dp < zero) then
-         checkInside = .True.
-      else
-         checkInside = .False.
-      end if
-    end function checkInside
-
-  end subroutine flagCellsInSurface
-
-  subroutine updateOverset(flag, n, closedFamList, nFam)
-
-    ! This is the main gateway routine for updating the overset
-    ! connectivity during a solution. It is wrapped and intended to be
-    ! called from Python. What this routine does depends on the value
-    ! of oversetUpdateMode:
-
-    ! updateFrozen: Nothing happens. The initial
-    ! connectivity computed during initialization is kept.
-
-    ! updateFast: Update just the weight, but leave the donors
-    ! unchanged. This is only applicable when the entire mesh is
-    ! warped at the same time like with USMesh in IDWarp.
-
-    ! updateFull: Complete from scratch update. Run the full
-    ! oversetComm routine.
-
-    use constants
-    use inputOverset, only : oversetUpdateMode
-    use block, only : flowDOms
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use oversetCommUtilities, onlY : updateOversetConnectivity
-    use oversetData, only : oversetPresent
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: n, nFam
-    integer(kind=intType), dimension(n) :: flag
-    integer(kind=intType), dimension(nFam) :: closedFamList
-
-    ! Working
-    integer(kind=intType) :: nLevels, level, sps, nn, nDoms, mm
-
-    ! If no overset, don't do anything.
-    if (.not. oversetPresent) then
-       return
-    end if
-
-    select case(oversetUpdateMode)
-    case(updateFrozen)
-       return
-
-    case (updateFast)
-       ! Run our fast version.
-       nLevels = ubound(flowDoms,2)
-       do level=1,nLevels
-          do sps=1, nTimeIntervalsSpectral
-             call updateOversetConnectivity(level, sps)
-          end do
-       end do
-
-    case (updateFull)
-       ! Do the full update like we would have done during
-       ! initialization.
-       nLevels = ubound(flowDoms,2)
-
-       ! Reinitialize domain iblanks
-       nDoms = ubound(flowDoms,1)
-       do nn=1, nDoms
-          do level=1,nLevels
-             do sps=1, nTimeIntervalsSpectral
-                flowDoms(nn, level, sps)%iBlank = 1
-                flowDoms(nn, level, sps)%forcedRecv = 0
-                flowDoms(nn, level, sps)%status = 0
-                do mm=1, flowDoms(nn, level, sps)%nBocos
-                   flowDoms(nn, level, sps)%BCData(mm)%iblank = 1
+
+            ! Compute the dot product of normal with cell center
+            ! (stored in coor) with the point on the surface.
+            v1 = coor(1:3) - xp
+            dp = normal(1) * v1(1) + normal(2) * v1(2) + normal(3) * v1(3)
+
+            if (dp < zero) then
+                checkInside = .True.
+            else
+                checkInside = .False.
+            end if
+        end function checkInside
+
+    end subroutine flagCellsInSurface
+
+    subroutine updateOverset(flag, n, closedFamList, nFam)
+
+        ! This is the main gateway routine for updating the overset
+        ! connectivity during a solution. It is wrapped and intended to be
+        ! called from Python. What this routine does depends on the value
+        ! of oversetUpdateMode:
+
+        ! updateFrozen: Nothing happens. The initial
+        ! connectivity computed during initialization is kept.
+
+        ! updateFast: Update just the weight, but leave the donors
+        ! unchanged. This is only applicable when the entire mesh is
+        ! warped at the same time like with USMesh in IDWarp.
+
+        ! updateFull: Complete from scratch update. Run the full
+        ! oversetComm routine.
+
+        use constants
+        use inputOverset, only: oversetUpdateMode
+        use block, only: flowDOms
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use oversetCommUtilities, onlY: updateOversetConnectivity
+        use oversetData, only: oversetPresent
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: n, nFam
+        integer(kind=intType), dimension(n) :: flag
+        integer(kind=intType), dimension(nFam) :: closedFamList
+
+        ! Working
+        integer(kind=intType) :: nLevels, level, sps, nn, nDoms, mm
+
+        ! If no overset, don't do anything.
+        if (.not. oversetPresent) then
+            return
+        end if
+
+        select case (oversetUpdateMode)
+        case (updateFrozen)
+            return
+
+        case (updateFast)
+            ! Run our fast version.
+            nLevels = ubound(flowDoms, 2)
+            do level = 1, nLevels
+                do sps = 1, nTimeIntervalsSpectral
+                    call updateOversetConnectivity(level, sps)
+                end do
+            end do
+
+        case (updateFull)
+            ! Do the full update like we would have done during
+            ! initialization.
+            nLevels = ubound(flowDoms, 2)
+
+            ! Reinitialize domain iblanks
+            nDoms = ubound(flowDoms, 1)
+            do nn = 1, nDoms
+                do level = 1, nLevels
+                    do sps = 1, nTimeIntervalsSpectral
+                        flowDoms(nn, level, sps)%iBlank = 1
+                        flowDoms(nn, level, sps)%forcedRecv = 0
+                        flowDoms(nn, level, sps)%status = 0
+                        do mm = 1, flowDoms(nn, level, sps)%nBocos
+                            flowDoms(nn, level, sps)%BCData(mm)%iblank = 1
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-
-       do level=1,nLevels
-          if (level == 1) then
-             call setExplicitHoleCut(flag)
-             call oversetComm(level, .False., .false., closedFamList)
-          else
-             call oversetComm(level, .False., .True., closedFamList)
-          end if
-       end do
-
-    end select
-
-  end subroutine updateOverset
-
-  subroutine setBlockPriority(blkName, value, setValue)
-
-    ! Set the CGNSblock with blkName to have a priority of "value"
-
-    use constants
-    use cgnsGrid, only : cgnsDoms, cgnsNDom
-    use communication
-    implicit none
-
-    ! Input Parameters
-    character(len=*), intent(in) :: blkName
-    real(kind=realType), intent(in) :: value
-    logical, intent(out) :: setValue
-
-    ! Working
-    integer(kind=intType) :: iDom
-
-    ! We should do a binary search for the block names, but for now
-    ! just loop over the number of CGNS Doms.
-    if (cgnsNDom > 0) then
-       setValue = .False.
-       do iDom=1, CGNSnDom
-          if (trim(cgnsDoms(iDom)%zoneName) == trim(blkName)) then
-             setValue = .True.
-             cgnsDoms(iDom)%priority = value
-             exit
-          end if
-       end do
-    else
-       ! If there are no domains yet, just say we did it so we don't
-       ! raise an error in python.
-       setValue = .True.
-    end if
-  end subroutine setBlockPriority
+            end do
+
+            do level = 1, nLevels
+                if (level == 1) then
+                    call setExplicitHoleCut(flag)
+                    call oversetComm(level, .False., .false., closedFamList)
+                else
+                    call oversetComm(level, .False., .True., closedFamList)
+                end if
+            end do
+
+        end select
+
+    end subroutine updateOverset
+
+    subroutine setBlockPriority(blkName, value, setValue)
+
+        ! Set the CGNSblock with blkName to have a priority of "value"
+
+        use constants
+        use cgnsGrid, only: cgnsDoms, cgnsNDom
+        use communication
+        implicit none
+
+        ! Input Parameters
+        character(len=*), intent(in) :: blkName
+        real(kind=realType), intent(in) :: value
+        logical, intent(out) :: setValue
+
+        ! Working
+        integer(kind=intType) :: iDom
+
+        ! We should do a binary search for the block names, but for now
+        ! just loop over the number of CGNS Doms.
+        if (cgnsNDom > 0) then
+            setValue = .False.
+            do iDom = 1, CGNSnDom
+                if (trim(cgnsDoms(iDom)%zoneName) == trim(blkName)) then
+                    setValue = .True.
+                    cgnsDoms(iDom)%priority = value
+                    exit
+                end if
+            end do
+        else
+            ! If there are no domains yet, just say we did it so we don't
+            ! raise an error in python.
+            setValue = .True.
+        end if
+    end subroutine setBlockPriority
 
 end module oversetAPI
diff --git a/src/overset/oversetCommUtilites.F90 b/src/overset/oversetCommUtilites.F90
index 8c6dc4dc4..35a0dd221 100644
--- a/src/overset/oversetCommUtilites.F90
+++ b/src/overset/oversetCommUtilites.F90
@@ -2,2468 +2,2457 @@ module oversetCommUtilities
 
 contains
 
-  subroutine getCommPattern(oMat,  sendList,  nSend, recvList, nRecv)
-
-    use constants
-    use oversetData, only : cumDomProc, nDomProc, nDomTotal, CSRMatrix
-    use blockPointers, only : nDom
-    use communication , only : nProc, myid
-    use sorting, only : unique
-    implicit none
-
-    ! Input/output
-    type(CSRMatrix), intent(in) :: oMat
-    integer(kind=intType), intent(out) :: sendList(:, :) , recvList(:,:)
-    integer(kind=intType), intent(out) :: nSend, nRecv
-
-    ! Working:
-    integer(kind=intType) :: nn, iDom, nnRow, i, jj, ii, nUniqueProc, iProc
-    integer(kind=intType), dimension(:), allocatable :: procsForThisRow, inverse, blkProc
-    logical :: added
-
-    ! Generic routine to determine what I need to send/recv based on the
-    ! data provided in the overlap matrix. The '2' in the send and
-    ! receive lists will record the processor and the global 'idom'
-    ! index, which is suffient to use for the subsequent communication
-    ! structure.
-
-    nSend = 0
-    nRecv = 0
-
-    ! These variables are used to compact the sending of
-    ! blocks/fringes. The logic is as follows: A a different
-    ! processor may need a block/fringe for more than 1 search. This
-    ! is manifested by having two or more entries is the rows (or
-    ! columns) I own. It would be inefficient to send the same data
-    ! to the same processor more than once, so we "uniquify" the
-    ! processors before we send. There is also another salient
-    ! reason: If we were to send the same data twice, and the other
-    ! processor started using the data, we could get a race condition
-    ! as it was modified the received fringes (during a search) while
-    ! the same fringes were being overwritten by the receive operation.
-
-    allocate(procsForThisRow(nDomTotal), inverse(nDomTotal), blkProc(nDomTotal))
-
-    ii = 0
-    do iProc=0, nProc-1
-       do i=1, nDomProc(iProc)
-          ii = ii + 1
-          blkProc(ii) = iProc
-       end do
-    end do
-
-    ! Loop over the owned rows of the normal matrix
-    do nn=1, nDom
-       iDom = cumDomProc(myid) + nn
-       nnRow = oMat%rowPtr(iDom+1) - oMat%rowPtr(iDom)
-       procsForThisRow(1:nnRow) = oMat%assignedProc(oMat%rowPtr(iDom) : oMat%rowPtr(iDom+1)-1)
-       call unique(procsForThisRow, nnRow, nUniqueProc, inverse)
-
-       do jj = 1, nUniqueProc
-          if (procsForThisRow(jj) /= myid) then
-             ! This intersection requires a row quantity from me
-             nSend = nSend + 1
-             sendList(1, nSend) = procsForThisRow(jj)
-             sendList(2, nSend) = iDom
-          end if
-       end do
-    end do
-
-    ! Now we loop back through the whole matrix looking at what I have
-    ! to do. If there is a row I don't own, I will have to receive it:
-
-    do iDom=1, oMat%nRow
-       added = .False.
-       rowLoop: do jj=oMat%rowPtr(iDom), oMat%rowPtr(iDom+1)-1
-
-          ! I have to do this intersection
-          if (oMat%assignedProc(jj) == myID) then
-
-             ! But I don't know the row entry
-             if (.not. (iDom > cumDomProc(myid) .and. iDom <= cumDomProc(myid+1))) then
-
-                ! Need to back out what proc the iDom correponds to:
-                nRecv = nRecv + 1
-                recvList(1, nRecv) = blkProc(iDom)
-                recvList(2, nRecv) = iDom
-                added = .True.
-             end if
-          end if
-
-          ! Just move on to the next row since we only need to receive it once.
-          if (added) then
-             exit rowLoop
-          end if
-       end do rowLoop
-    end do
-
-    deallocate(procsForThisRow, inverse, blkProc)
-  end subroutine getCommPattern
-
-  subroutine getOSurfCommPattern(oMat, oMatT, sendList, nSend, &
-       recvList, nRecv, rBufSize)
-
-    ! This subroutine get the the comm pattern to send the oWall types.
-    use constants
-    use blockPointers, only : nDom
-    use oversetData, only : nDomTotal, CSRMatrix, cumDomProc, nDomProc
-    use communication, only : myid, nProc
-    use sorting, only : unique
-    implicit none
-
-    ! Input/output
-    type(CSRMatrix), intent(in) :: oMat, oMatT
-    integer(kind=intType), intent(out) :: sendList(:, :), recvList(:, :)
-    integer(kind=intType), intent(out) :: nSend, nRecv
-    integer(kind=intType), intent(in) :: rBufSize(nDomTotal)
-
-    ! Working:
-    integer(kind=intType) :: nn, iDom, jDom, nnRow, nnRowT, i, jj, ii, nUniqueProc, iProc
-    integer(kind=intType), dimension(:), allocatable :: blkProc, toRecv
-    integer(kind=intType), dimension(:), allocatable :: procsForThisRow, inverse
-
-
-    nSend = 0
-    nRecv = 0
-
-    allocate(procsForThisRow(2*nDomTotal), inverse(2*nDomTotal), blkProc(nDomTotal))
-
-    ii = 0
-    do iProc=0, nProc-1
-       do i=1, nDomProc(iProc)
-          ii = ii + 1
-          blkProc(ii) = iProc
-       end do
-    end do
-
-    ! Loop over the owned rows of the regular matrix and the rows
-    ! transposed matrix (ie columns of the regular matrix)
-    do nn=1, nDom
-
-       iDom = cumDomProc(myid) + nn
-
-       ! Only deal with this block if the rbuffer size for the oWall is
-       ! greater than zero. If it is zero, it is empty we don't need to
-       ! deal with it.
-       if (rBufSize(iDom) > 0) then
-
-          nnRow = oMat%rowPtr(iDom+1) - oMat%rowPtr(iDom)
-          procsForThisRow(1:nnRow) = oMat%assignedProc(oMat%rowPtr(iDom) : oMat%rowPtr(iDom+1)-1)
-
-          nnRowT = oMatT%rowPtr(iDom+1) - oMatT%rowPtr(iDom)
-          procsForThisRow(nnRow+1:nnRow+nnRowT) = oMatT%assignedProc(oMatT%rowPtr(iDom) : oMatT%rowPtr(iDom+1)-1)
-
-          call unique(procsForThisRow, nnRow+nnRowT, nUniqueProc, inverse)
-
-          do jj = 1, nUniqueProc
-             if (procsForThisRow(jj) /= myid) then
-                ! This intersection requires a row quantity from me
-                nSend = nSend + 1
-                sendList(1, nSend) = procsForThisRow(jj)
-                sendList(2, nSend) = iDom
-             end if
-          end do
-       end if
-    end do
-
-    ! Now we loop back through the whole matrix looking at what I have
-    ! to do. If there is a row or column I don't own, I will have to receive it:
-    allocate(toRecv(nDomTotal))
-    toRecv = 0
-    do iDom=1, nDomTotal
-       do jj=oMat%rowPtr(iDom), oMat%rowPtr(iDom+1)-1
-          jDom = oMat%colInd(jj)
-          if (oMat%assignedProc(jj) == myID) then
-             ! I don't have the row entry:
-             if (.not. (iDom > cumDomProc(myid) .and. iDom <= cumDomProc(myid+1))) then
-                toRecv(iDom) = 1
-             end if
-             ! Don't have the column entry:
-             if (.not. (jDom > cumDomProc(myid) .and. jDom <= cumDomProc(myid+1))) then
-                toRecv(jDom) = 1
-             end if
-          end if
-       end do
-    end do
-
-    ! Now loop back through and set my recvList. Only add if the
-    ! rBufferSize is larger than zero.
-    do iDom=1, nDomTotal
-       if (toRecv(iDom) == 1 .and. rBufSize(iDom) > 0) then
-          nRecv = nRecv + 1
-          recvList(1, nRecv) = blkProc(iDom)
-          recvList(2, nRecv) = iDom
-       end if
-    end do
-
-    deallocate(procsForThisRow, inverse, blkProc, toRecv)
-  end subroutine getOSurfCommPattern
-
-  subroutine sendOBlock(oBlock, iDom, iProc, tagOffset, sendCount)
-
-    use constants
-    use communication, only : adflow_comm_world, sendRequests
-    use oversetData, only : oversetBlock
-    use utils, only : EChk
-    implicit none
-
-    ! Input/Output
-    type(oversetBlock), intent(inout) :: oBlock
-    integer(kind=intType), intent(in) :: iProc, iDom, tagOffset
-    integer(kind=intType), intent(inout) :: sendCount
-
-    ! Working
-    integer(kind=intType) :: tag, ierr
-
-    tag = tagOffset + iDom
-    sendCount = sendCount + 1
-    call mpi_isend(oBlock%rBuffer, size(oBlock%rbuffer), adflow_real, &
-         iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    sendCount = sendCount + 1
-    call mpi_isend(oBlock%iBuffer, size(oBlock%iBuffer), adflow_integer, &
-         iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-  end subroutine sendOBlock
-
-  subroutine sendOFringe(oFringe, iDom, iProc, tagOffset, sendCount)
-
-    use constants
-    use communication, only : adflow_comm_world, sendRequests
-    use oversetData, only : oversetFringe
-    use utils, only : EChk
-    implicit none
-
-    ! Input/Output
-    type(oversetFringe), intent(inout) :: oFringe
-    integer(kind=intType), intent(in) :: iProc, iDom, tagOffset
-    integer(kind=intType), intent(inout) :: sendCount
-
-    ! Working
-    integer(kind=intType) :: tag, ierr
-
-    tag = iDom + tagOffset
-    sendCount = sendCount + 1
-    call mpi_isend(oFringe%rBuffer, size(oFringe%rbuffer), adflow_real, &
-         iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    sendCount = sendCount + 1
-    call mpi_isend(oFringe%iBuffer, size(oFringe%iBuffer), adflow_integer, &
-         iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-  end subroutine sendOFringe
-
-  subroutine sendOSurf(oWall, iDom, iProc, tagOffset, sendCount)
-
-    use constants
-    use communication, only : sendRequests, adflow_comm_world
-    use oversetData, only : oversetWall
-    use utils, only : EChk
-    implicit none
-
-    ! Input/Output
-    type(oversetWall), intent(inout) :: oWall
-    integer(kind=intType), intent(in) :: iProc, iDom, tagOffset
-    integer(kind=intType), intent(inout) :: sendCount
-
-    ! Working
-    integer(kind=intType) :: tag, ierr
-
-    tag = iDom + tagOffset
-    sendCount = sendCount + 1
-    call mpi_isend(oWall%rBuffer, size(oWall%rbuffer), adflow_real, &
-         iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    sendCount = sendCount + 1
-    call mpi_isend(oWall%iBuffer, size(oWall%iBuffer), adflow_integer, &
-         iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-  end subroutine sendOSurf
-
-  subroutine recvOBlock(oBlock, iDom, iProc, tagOffset, iSize, rSize, &
-       recvCount, recvInfo)
-
-    use constants
-    use communication, only : adflow_comm_world, recvRequests
-    use oversetData, only : oversetBlock
-    use utils, only : EChk
-    implicit none
-
-    ! Input/Output
-    type(oversetBlock), intent(inout) :: oBlock
-    integer(kind=intType), intent(in) :: iDom, iProc, tagOffset, rSize, iSize
-    integer(kind=intType), intent(inout) :: recvCount
-    integer(kind=intType), intent(inout) :: recvInfo(2, recvCount+2)
-
-    ! Working
-    integer(kind=intType) :: tag, ierr
-
-    tag = tagOffset + iDom
-    allocate(oBLock%rBuffer(rSize), oBlock%iBuffer(iSize))
-
-    recvCount = recvCount + 1
-    call mpi_irecv(oBlock%rBuffer, rSize, adflow_real, &
-         iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-    recvInfo(:, recvCount) = (/iDom, 1/)
-
-    recvCount = recvCount + 1
-    call mpi_irecv(oBlock%iBuffer, iSize, adflow_integer, &
-         iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-    recvInfo(:, recvCount) = (/iDom, 2/)
-
-  end subroutine recvOBlock
-
-  subroutine recvOFringe(oFringe, iDom, iProc, tagOffset, iSize, rSize, &
-       recvCount, recvInfo)
-
-    use constants
-    use communication, only : adflow_comm_world, recvRequests
-    use oversetData, only : oversetFringe
-    use utils, only : EChk
-    implicit none
-
-    ! Input/Output
-    type(oversetFringe), intent(inout) :: oFringe
-    integer(kind=intType), intent(in) :: iDom, iProc, tagOffset, rSize, iSize
-    integer(kind=intType), intent(inout) :: recvCount
-    integer(kind=intType), intent(inout) :: recvInfo(2, recvCount+2)
-
-    ! Working
-    integer(kind=intType) :: tag, ierr
-
-    tag = tagOffset + iDom
-    allocate(oFringe%rBuffer(rSize), oFringe%iBuffer(iSize))
-
-    recvCount = recvCount + 1
-    call mpi_irecv(oFringe%rBuffer, rSize, adflow_real, &
-         iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-    recvInfo(:, recvCount) = (/iDom, 3/)
-
-    recvCount = recvCount + 1
-    call mpi_irecv(oFringe%iBuffer, iSize, adflow_integer, &
-         iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-    recvInfo(:, recvCount) = (/iDom, 4/)
-
-  end subroutine recvOFringe
-
-  subroutine recvOSurf(oWall, iDom, iProc, tagOffset, iSize, rSize, &
-       recvCount, recvInfo)
-
-    use constants
-    use communication, only : adflow_comm_world, recvRequests
-    use oversetData, only : oversetWall
-    use utils, only : EChk
-    implicit none
-
-    ! Input/Output
-    type(oversetWall), intent(inout) :: oWall
-    integer(kind=intType), intent(in) :: iDom, iProc, tagOffset, rSize, iSize
-    integer(kind=intType), intent(inout) :: recvCount
-    integer(kind=intType), intent(inout) :: recvInfo(2, recvCount+2)
-
-    ! Working
-    integer(kind=intType) :: tag, ierr
-
-    tag = tagOffset + iDom
-    allocate(oWall%rBuffer(rSize), oWall%iBuffer(iSize))
-
-    recvCount = recvCount + 1
-    call mpi_irecv(oWall%rBuffer, rSize, adflow_real, &
-         iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-    recvInfo(:, recvCount) = (/iDom, 5/)
-
-    recvCount = recvCount + 1
-
-    call mpi_irecv(oWall%iBuffer, iSize, adflow_integer, &
-         iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-    recvInfo(:, recvCount) = (/iDom, 6/)
-
-  end subroutine recvOSurf
-
-  subroutine getFringeReturnSizes(oFringeSendList, oFringeRecvList, &
-       nOFringeSend, nOfringeRecv, oFringes, &
-       fringeRecvSizes, cumFringeRecv)
-
-    ! For this data exchange we use the exact *reverse* of fringe
-    ! communication pattern. This communiation simply determines the
-    ! number of fringes that must be returned to the owning process.
-
-    use constants
-    use communication , only : sendRequests, recvRequests, adflow_comm_world
-    use utils, only : EChk
-    use oversetData, onlY : oversetFringe
-    implicit none
-
-    ! Input/output
-    type(oversetFringe), dimension(:) :: oFringes
-    integer(kind=intType), dimension(:, :) :: oFringeSendList, oFringeRecvList
-    integer(kind=intType), dimension(:), allocatable :: cumFringeRecv, fringeRecvSizes
-    integer(kind=intType) :: nOFringeSend, nOfringeRecv
-    ! Working
-    integer(kind=intType) :: sendCount, recvCount
-    integer(kind=intType) :: iDom, iProc, jj, ierr, index, i
-    integer mpiStatus(MPI_STATUS_SIZE)
-
-    ! Post all the fringe iSends
-    sendCount = 0
-    do jj=1, nOFringeRecv
-
-       iProc = oFringeRecvList(1, jj)
-       iDom = oFringeRecvList(2, jj)
-       sendCount = sendCount + 1
-       call mpi_isend(oFringes(iDom)%fringeReturnSize, 1, adflow_integer, &
-            iproc, iDom, adflow_comm_world, sendRequests(sendCount), ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-    end do
-
-    allocate(fringeRecvSizes(nOfringeSend))
-
-    ! Non-blocking receives
-    recvCount = 0
-    do jj=1, nOFringeSend
-
-       iProc = oFringeSendList(1, jj)
-       iDom = oFringeSendList(2, jj)
-       recvCount = recvCount + 1
-
-       call mpi_irecv(fringeRecvSizes(jj), 1, adflow_integer, &
-            iProc, iDom, adflow_comm_world, recvRequests(recvCount), ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-    end do
-
-    ! Last thing to do wait for all the sends and receives to finish
-    do i=1,sendCount
-       call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-    end do
-
-    do i=1,recvCount
-       call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-    end do
-
-    ! Compute the cumulative form of the fringeRecvSizes
-
-    allocate(cumFringeRecv(1:nOFringeSend+1))
-    cumFringeRecv(1) = 1
-    do jj=1, nOFringeSend ! These are the fringes we *sent*
-       ! originally, now are going to receive them
-       ! back
-       cumFringeRecv(jj+1) = cumFringeRecv(jj) + fringeRecvSizes(jj)
-    end do
-
-  end subroutine getFringeReturnSizes
-
-
-
-  !
-  !       oversetLoadBalance determine the deistributation of donor and
-  !       receiver blocks that will result in approximate even load
-  !       balancing. The sparse matrix structrue of the overla is
-  !       provided. This computation runs on all processors.
-
-  subroutine oversetLoadBalance(overlap)
-
-    use constants
-    use communication, only : nProc
-    use oversetData, only : CSRMatrix
-    implicit none
-
-    ! Input/Output
-    type(CSRMatrix), intent(inout) :: overlap
-
-    ! Working paramters
-    integer(kind=intType) :: curRow, jj, jj1, iProc, iRow
-    real(kind=realType) :: evenCost, potentialSum, targetCost
-    real(Kind=realType) :: totalSearch, totalBuild
-
-    real(kind=realType), dimension(0:nProc-1) :: procCosts
-    real(kind=realType), dimension(0:nProc) :: cumProcCosts
-    real(kind=realType), dimension(overlap%nRow) :: buildCost
-    real(kind=realType), parameter :: tol=0.1_realType
-    !  real(kind=realType), parameter :: K=10_realType
-    logical, dimension(overlap%nnz) :: blockTaken
-    logical :: increment
-
-    ! Pointers to make code a litte easier to read
-    integer(kind=intType), pointer, dimension(:) :: rowPtr, assignedProc
-    real(kind=realType), pointer, dimension(:) :: data
-
-    ! Set the couple  of pointers
-    rowPtr => overlap%rowPtr
-    assignedProc => overlap%assignedProc
-    data => overlap%data
-
-    ! Determine the total search cost:
-    totalSearch = sum(overlap%data)
-
-    ! Target amount of work for each processor
-    evenCost = totalSearch / nProc
-
-    ! Initialize the taken processor to False
-    blockTaken = .False.
-
-    ! Initialzie assignedProc to -1 since there could be entries we can
-    ! ignore.
-    assignedProc(:) = -1
-    procCosts = zero
-    cumProcCosts(0) = zero
-
-    ! Initialize the starting point
-    jj = 1
-    iProc = 0
-
-    ! Find the first row with non-zeros
-    curRow = 1
-    do while(rowPtr(curRow+1)-rowPtr(curRow) == 0)
-       curRow = curRow + 1
-    end do
-
-    masterLoop: do while (curRow <= overlap%nRow .and. iProc <= nProc)
+    subroutine getCommPattern(oMat, sendList, nSend, recvList, nRecv)
+
+        use constants
+        use oversetData, only: cumDomProc, nDomProc, nDomTotal, CSRMatrix
+        use blockPointers, only: nDom
+        use communication, only: nProc, myid
+        use sorting, only: unique
+        implicit none
+
+        ! Input/output
+        type(CSRMatrix), intent(in) :: oMat
+        integer(kind=intType), intent(out) :: sendList(:, :), recvList(:, :)
+        integer(kind=intType), intent(out) :: nSend, nRecv
+
+        ! Working:
+        integer(kind=intType) :: nn, iDom, nnRow, i, jj, ii, nUniqueProc, iProc
+        integer(kind=intType), dimension(:), allocatable :: procsForThisRow, inverse, blkProc
+        logical :: added
+
+        ! Generic routine to determine what I need to send/recv based on the
+        ! data provided in the overlap matrix. The '2' in the send and
+        ! receive lists will record the processor and the global 'idom'
+        ! index, which is suffient to use for the subsequent communication
+        ! structure.
+
+        nSend = 0
+        nRecv = 0
+
+        ! These variables are used to compact the sending of
+        ! blocks/fringes. The logic is as follows: A a different
+        ! processor may need a block/fringe for more than 1 search. This
+        ! is manifested by having two or more entries is the rows (or
+        ! columns) I own. It would be inefficient to send the same data
+        ! to the same processor more than once, so we "uniquify" the
+        ! processors before we send. There is also another salient
+        ! reason: If we were to send the same data twice, and the other
+        ! processor started using the data, we could get a race condition
+        ! as it was modified the received fringes (during a search) while
+        ! the same fringes were being overwritten by the receive operation.
+
+        allocate (procsForThisRow(nDomTotal), inverse(nDomTotal), blkProc(nDomTotal))
+
+        ii = 0
+        do iProc = 0, nProc - 1
+            do i = 1, nDomProc(iProc)
+                ii = ii + 1
+                blkProc(ii) = iProc
+            end do
+        end do
+
+        ! Loop over the owned rows of the normal matrix
+        do nn = 1, nDom
+            iDom = cumDomProc(myid) + nn
+            nnRow = oMat%rowPtr(iDom + 1) - oMat%rowPtr(iDom)
+            procsForThisRow(1:nnRow) = oMat%assignedProc(oMat%rowPtr(iDom):oMat%rowPtr(iDom + 1) - 1)
+            call unique(procsForThisRow, nnRow, nUniqueProc, inverse)
+
+            do jj = 1, nUniqueProc
+                if (procsForThisRow(jj) /= myid) then
+                    ! This intersection requires a row quantity from me
+                    nSend = nSend + 1
+                    sendList(1, nSend) = procsForThisRow(jj)
+                    sendList(2, nSend) = iDom
+                end if
+            end do
+        end do
 
-       ! Normally we increment
-       increment = .True.
-
-       ! This is our current target cost.
-       targetCost = evenCost*(iProc + 1)
-
-       ! It is still possible that data(jj) is zero. That's ok...we'll
-       ! explictly ignore them.
-       if (data(jj) /= zero .and. .not. (blockTaken(jj))) then
-
-          if (procCosts(iProc) == 0 .or. iProc == nProc-1) then
-             ! Must be added
-             procCosts(iProc) = procCosts(iProc) + data(jj)
-             blockTaken(jj) = .True.
-             assignedProc(jj) = iProc
+        ! Now we loop back through the whole matrix looking at what I have
+        ! to do. If there is a row I don't own, I will have to receive it:
 
-          else
-
-             ! There is already something in there. See what the
-             !  potential sum will be:
-             potentialSum = cumProcCosts(iProc) + procCosts(iProc) + data(jj)
-
-             if (potentialSum < targetCost - tol*evenCost) then
-                !  We are not close to our limit yet so just add it normally
-                procCosts(iProc) = procCosts(iProc) + data(jj)
-                blockTaken(jj) = .True.
-                assignedProc(jj) = iProc
+        do iDom = 1, oMat%nRow
+            added = .False.
+            rowLoop: do jj = oMat%rowPtr(iDom), oMat%rowPtr(iDom + 1) - 1
 
-             else if (potentialSum >= targetCost - tol*evenCost  .and. &
-                  potentialSum <= targetCost + tol*evenCost) then
+                ! I have to do this intersection
+                if (oMat%assignedProc(jj) == myID) then
 
-                ! This one looks perfect. Call it a day...add it and
-                !  move on to the next proc
-
-                procCosts(iProc) = procCosts(iProc) + data(jj)
-                blockTaken(jj) = .True.
-                assignedProc(jj) = iProc
-
-                ! Processor can be incremented
-                cumProcCosts(iProc+1) = cumProcCosts(iProc) + procCosts(iProc)
-                iProc = iProc + 1
-             else
-                ! This means potentialSum > targetCost + tol*evenCost
-
-                ! This is somewhat bad news...this may be *horrendly*
-                ! load balanced. The algorithm dictates we *MUST*
-                ! finish this proc no matter what before we go back to
-                ! the outer loop. Essentially we know jj is bad,
-                ! instead scan over the rest of the row and see if we
-                ! can add something else that is decent.
-                increment = .False.
-
-                restOfRow: do jj1=jj+1, rowPtr(curRow+1)-1
-
-                   potentialSum = cumProcCosts(iProc) + procCosts(iProc) + data(jj1)
-
-                   if (data(jj1) /= zero .and. .not. (blockTaken(jj1))) then
-
-                      if (potentialSum < targetCost - tol*evenCost) then
-                         !Huh...that one fit in without going
-                         ! over....add it and kep going in the loop
-
-                         procCosts(iProc) = procCosts(iProc) + data(jj1)
-                         blockTaken(jj1) = .True.
-                         assignedProc(jj1) = iProc
-
-                      else if (potentialSum >= targetCost - tol*evenCost  .and. &
-                           potentialSum <= targetCost + tol*evenCost) then
-
-                         ! This one fit in perfectly.
-                         procCosts(iProc) = procCosts(iProc) + data(jj1)
-                         blockTaken(jj1) = .True.
-                         assignedProc(jj1) = iProc
-
-                         ! No need to keep going
-                         exit  restOfRow
-
-                      end if
-                   end if
-                end do restOfRow
-
-                ! Well, the loop finished, we may or may not have
-                ! added something. If so great...if not, oh well. We
-                ! just keep going to the next proc. That's the greedy
-                ! algorithm for you.
-
-                ! Processor can be incremented
-                cumProcCosts(iProc+1) = cumProcCosts(iProc) + procCosts(iProc)
-                iProc = iProc + 1
-             end if
-          end if
-       end if
-
-       ! Move 1 in jj, until we reach the end and wrap around.
-       if (increment) then
-          jj = jj + 1
+                    ! But I don't know the row entry
+                    if (.not. (iDom > cumDomProc(myid) .and. iDom <= cumDomProc(myid + 1))) then
 
-          ! Switch to the next row:
-          if (jj == rowPtr(curRow+1)) then
+                        ! Need to back out what proc the iDom correponds to:
+                        nRecv = nRecv + 1
+                        recvList(1, nRecv) = blkProc(iDom)
+                        recvList(2, nRecv) = iDom
+                        added = .True.
+                    end if
+                end if
 
-             ! This is really tricky...we know we're at the end of the
-             ! row, but we have to SKIP OVER THE EMPTY rows, or else the
-             ! algorithm will crap out. Keep incrementing the curRow
-             ! until we get a row with something in it. Make sure we
-             ! don't go out the end, so check again nRow
-
-             findNextNonZeroRow: do while(jj == rowPtr(curRow+1))
-                curRow = curRow + 1
-                if (curRow > overlap%nRow) then
-                   exit findNextNonZeroRow
+                ! Just move on to the next row since we only need to receive it once.
+                if (added) then
+                    exit rowLoop
+                end if
+            end do rowLoop
+        end do
+
+        deallocate (procsForThisRow, inverse, blkProc)
+    end subroutine getCommPattern
+
+    subroutine getOSurfCommPattern(oMat, oMatT, sendList, nSend, &
+                                   recvList, nRecv, rBufSize)
+
+        ! This subroutine get the the comm pattern to send the oWall types.
+        use constants
+        use blockPointers, only: nDom
+        use oversetData, only: nDomTotal, CSRMatrix, cumDomProc, nDomProc
+        use communication, only: myid, nProc
+        use sorting, only: unique
+        implicit none
+
+        ! Input/output
+        type(CSRMatrix), intent(in) :: oMat, oMatT
+        integer(kind=intType), intent(out) :: sendList(:, :), recvList(:, :)
+        integer(kind=intType), intent(out) :: nSend, nRecv
+        integer(kind=intType), intent(in) :: rBufSize(nDomTotal)
+
+        ! Working:
+        integer(kind=intType) :: nn, iDom, jDom, nnRow, nnRowT, i, jj, ii, nUniqueProc, iProc
+        integer(kind=intType), dimension(:), allocatable :: blkProc, toRecv
+        integer(kind=intType), dimension(:), allocatable :: procsForThisRow, inverse
+
+        nSend = 0
+        nRecv = 0
+
+        allocate (procsForThisRow(2 * nDomTotal), inverse(2 * nDomTotal), blkProc(nDomTotal))
+
+        ii = 0
+        do iProc = 0, nProc - 1
+            do i = 1, nDomProc(iProc)
+                ii = ii + 1
+                blkProc(ii) = iProc
+            end do
+        end do
+
+        ! Loop over the owned rows of the regular matrix and the rows
+        ! transposed matrix (ie columns of the regular matrix)
+        do nn = 1, nDom
+
+            iDom = cumDomProc(myid) + nn
+
+            ! Only deal with this block if the rbuffer size for the oWall is
+            ! greater than zero. If it is zero, it is empty we don't need to
+            ! deal with it.
+            if (rBufSize(iDom) > 0) then
+
+                nnRow = oMat%rowPtr(iDom + 1) - oMat%rowPtr(iDom)
+                procsForThisRow(1:nnRow) = oMat%assignedProc(oMat%rowPtr(iDom):oMat%rowPtr(iDom + 1) - 1)
+
+                nnRowT = oMatT%rowPtr(iDom + 1) - oMatT%rowPtr(iDom)
+           procsForThisRow(nnRow + 1:nnRow + nnRowT) = oMatT%assignedProc(oMatT%rowPtr(iDom):oMatT%rowPtr(iDom + 1) - 1)
+
+                call unique(procsForThisRow, nnRow + nnRowT, nUniqueProc, inverse)
+
+                do jj = 1, nUniqueProc
+                    if (procsForThisRow(jj) /= myid) then
+                        ! This intersection requires a row quantity from me
+                        nSend = nSend + 1
+                        sendList(1, nSend) = procsForThisRow(jj)
+                        sendList(2, nSend) = iDom
+                    end if
+                end do
+            end if
+        end do
+
+        ! Now we loop back through the whole matrix looking at what I have
+        ! to do. If there is a row or column I don't own, I will have to receive it:
+        allocate (toRecv(nDomTotal))
+        toRecv = 0
+        do iDom = 1, nDomTotal
+            do jj = oMat%rowPtr(iDom), oMat%rowPtr(iDom + 1) - 1
+                jDom = oMat%colInd(jj)
+                if (oMat%assignedProc(jj) == myID) then
+                    ! I don't have the row entry:
+                    if (.not. (iDom > cumDomProc(myid) .and. iDom <= cumDomProc(myid + 1))) then
+                        toRecv(iDom) = 1
+                    end if
+                    ! Don't have the column entry:
+                    if (.not. (jDom > cumDomProc(myid) .and. jDom <= cumDomProc(myid + 1))) then
+                        toRecv(jDom) = 1
+                    end if
                 end if
-             end do findNextNonZeroRow
-          end if
-       end if
-    end do masterLoop
+            end do
+        end do
 
-  end subroutine oversetLoadBalance
+        ! Now loop back through and set my recvList. Only add if the
+        ! rBufferSize is larger than zero.
+        do iDom = 1, nDomTotal
+            if (toRecv(iDom) == 1 .and. rBufSize(iDom) > 0) then
+                nRecv = nRecv + 1
+                recvList(1, nRecv) = blkProc(iDom)
+                recvList(2, nRecv) = iDom
+            end if
+        end do
 
-  subroutine exchangeFringes(level, sps, commPattern, internal)
-    !
-    !       ExchangeFringes exchanges the donorInformation of the fringes:
-    !       donorProc, donorBlock, dIndex and donorFrac. It does this
-    !       the 1:1 halos for the given level and spectral instance. Since
-    !       we have real values and integer values we will do all the ints
-    !       first and then the reals.
-    !
-    use constants
-    use block, only : fringeType
-    use blockPointers, only : flowDoms
-    use communication, only : commType, internalCommType, recvBuffer, sendBuffer, myid, &
-         adflow_comm_world, sendRequests, recvRequests
-    use oversetUtilities, only : addToFringeList, windIndex
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
+        deallocate (procsForThisRow, inverse, blkProc, toRecv)
+    end subroutine getOSurfCommPattern
+
+    subroutine sendOBlock(oBlock, iDom, iProc, tagOffset, sendCount)
+
+        use constants
+        use communication, only: adflow_comm_world, sendRequests
+        use oversetData, only: oversetBlock
+        use utils, only: EChk
+        implicit none
 
-    type(commType),          dimension(*), intent(in) :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-    !
-    !      Local variables.
-    !
-    integer :: size, procId, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
+        ! Input/Output
+        type(oversetBlock), intent(inout) :: oBlock
+        integer(kind=intType), intent(in) :: iProc, iDom, tagOffset
+        integer(kind=intType), intent(inout) :: sendCount
+
+        ! Working
+        integer(kind=intType) :: tag, ierr
+
+        tag = tagOffset + iDom
+        sendCount = sendCount + 1
+        call mpi_isend(oBlock%rBuffer, size(oBlock%rbuffer), adflow_real, &
+                       iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        sendCount = sendCount + 1
+        call mpi_isend(oBlock%iBuffer, size(oBlock%iBuffer), adflow_integer, &
+                       iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+    end subroutine sendOBlock
+
+    subroutine sendOFringe(oFringe, iDom, iProc, tagOffset, sendCount)
+
+        use constants
+        use communication, only: adflow_comm_world, sendRequests
+        use oversetData, only: oversetFringe
+        use utils, only: EChk
+        implicit none
+
+        ! Input/Output
+        type(oversetFringe), intent(inout) :: oFringe
+        integer(kind=intType), intent(in) :: iProc, iDom, tagOffset
+        integer(kind=intType), intent(inout) :: sendCount
+
+        ! Working
+        integer(kind=intType) :: tag, ierr
+
+        tag = iDom + tagOffset
+        sendCount = sendCount + 1
+        call mpi_isend(oFringe%rBuffer, size(oFringe%rbuffer), adflow_real, &
+                       iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        sendCount = sendCount + 1
+        call mpi_isend(oFringe%iBuffer, size(oFringe%iBuffer), adflow_integer, &
+                       iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+    end subroutine sendOFringe
+
+    subroutine sendOSurf(oWall, iDom, iProc, tagOffset, sendCount)
+
+        use constants
+        use communication, only: sendRequests, adflow_comm_world
+        use oversetData, only: oversetWall
+        use utils, only: EChk
+        implicit none
+
+        ! Input/Output
+        type(oversetWall), intent(inout) :: oWall
+        integer(kind=intType), intent(in) :: iProc, iDom, tagOffset
+        integer(kind=intType), intent(inout) :: sendCount
+
+        ! Working
+        integer(kind=intType) :: tag, ierr
+
+        tag = iDom + tagOffset
+        sendCount = sendCount + 1
+        call mpi_isend(oWall%rBuffer, size(oWall%rbuffer), adflow_real, &
+                       iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        sendCount = sendCount + 1
+        call mpi_isend(oWall%iBuffer, size(oWall%iBuffer), adflow_integer, &
+                       iProc, tag, ADflow_comm_world, sendRequests(sendCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+    end subroutine sendOSurf
+
+    subroutine recvOBlock(oBlock, iDom, iProc, tagOffset, iSize, rSize, &
+                          recvCount, recvInfo)
+
+        use constants
+        use communication, only: adflow_comm_world, recvRequests
+        use oversetData, only: oversetBlock
+        use utils, only: EChk
+        implicit none
+
+        ! Input/Output
+        type(oversetBlock), intent(inout) :: oBlock
+        integer(kind=intType), intent(in) :: iDom, iProc, tagOffset, rSize, iSize
+        integer(kind=intType), intent(inout) :: recvCount
+        integer(kind=intType), intent(inout) :: recvInfo(2, recvCount + 2)
+
+        ! Working
+        integer(kind=intType) :: tag, ierr
+
+        tag = tagOffset + iDom
+        allocate (oBLock%rBuffer(rSize), oBlock%iBuffer(iSize))
+
+        recvCount = recvCount + 1
+        call mpi_irecv(oBlock%rBuffer, rSize, adflow_real, &
+                       iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+        recvInfo(:, recvCount) = (/iDom, 1/)
+
+        recvCount = recvCount + 1
+        call mpi_irecv(oBlock%iBuffer, iSize, adflow_integer, &
+                       iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+        recvInfo(:, recvCount) = (/iDom, 2/)
+
+    end subroutine recvOBlock
+
+    subroutine recvOFringe(oFringe, iDom, iProc, tagOffset, iSize, rSize, &
+                           recvCount, recvInfo)
+
+        use constants
+        use communication, only: adflow_comm_world, recvRequests
+        use oversetData, only: oversetFringe
+        use utils, only: EChk
+        implicit none
+
+        ! Input/Output
+        type(oversetFringe), intent(inout) :: oFringe
+        integer(kind=intType), intent(in) :: iDom, iProc, tagOffset, rSize, iSize
+        integer(kind=intType), intent(inout) :: recvCount
+        integer(kind=intType), intent(inout) :: recvInfo(2, recvCount + 2)
+
+        ! Working
+        integer(kind=intType) :: tag, ierr
+
+        tag = tagOffset + iDom
+        allocate (oFringe%rBuffer(rSize), oFringe%iBuffer(iSize))
+
+        recvCount = recvCount + 1
+        call mpi_irecv(oFringe%rBuffer, rSize, adflow_real, &
+                       iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+        recvInfo(:, recvCount) = (/iDom, 3/)
+
+        recvCount = recvCount + 1
+        call mpi_irecv(oFringe%iBuffer, iSize, adflow_integer, &
+                       iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+        recvInfo(:, recvCount) = (/iDom, 4/)
+
+    end subroutine recvOFringe
+
+    subroutine recvOSurf(oWall, iDom, iProc, tagOffset, iSize, rSize, &
+                         recvCount, recvInfo)
+
+        use constants
+        use communication, only: adflow_comm_world, recvRequests
+        use oversetData, only: oversetWall
+        use utils, only: EChk
+        implicit none
+
+        ! Input/Output
+        type(oversetWall), intent(inout) :: oWall
+        integer(kind=intType), intent(in) :: iDom, iProc, tagOffset, rSize, iSize
+        integer(kind=intType), intent(inout) :: recvCount
+        integer(kind=intType), intent(inout) :: recvInfo(2, recvCount + 2)
+
+        ! Working
+        integer(kind=intType) :: tag, ierr
+
+        tag = tagOffset + iDom
+        allocate (oWall%rBuffer(rSize), oWall%iBuffer(iSize))
+
+        recvCount = recvCount + 1
+        call mpi_irecv(oWall%rBuffer, rSize, adflow_real, &
+                       iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+        recvInfo(:, recvCount) = (/iDom, 5/)
+
+        recvCount = recvCount + 1
+
+        call mpi_irecv(oWall%iBuffer, iSize, adflow_integer, &
+                       iProc, tag, ADflow_comm_world, recvRequests(recvCount), ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+        recvInfo(:, recvCount) = (/iDom, 6/)
+
+    end subroutine recvOSurf
+
+    subroutine getFringeReturnSizes(oFringeSendList, oFringeRecvList, &
+                                    nOFringeSend, nOfringeRecv, oFringes, &
+                                    fringeRecvSizes, cumFringeRecv)
+
+        ! For this data exchange we use the exact *reverse* of fringe
+        ! communication pattern. This communiation simply determines the
+        ! number of fringes that must be returned to the owning process.
+
+        use constants
+        use communication, only: sendRequests, recvRequests, adflow_comm_world
+        use utils, only: EChk
+        use oversetData, onlY: oversetFringe
+        implicit none
+
+        ! Input/output
+        type(oversetFringe), dimension(:) :: oFringes
+        integer(kind=intType), dimension(:, :) :: oFringeSendList, oFringeRecvList
+        integer(kind=intType), dimension(:), allocatable :: cumFringeRecv, fringeRecvSizes
+        integer(kind=intType) :: nOFringeSend, nOfringeRecv
+        ! Working
+        integer(kind=intType) :: sendCount, recvCount
+        integer(kind=intType) :: iDom, iProc, jj, ierr, index, i
+        integer mpiStatus(MPI_STATUS_SIZE)
+
+        ! Post all the fringe iSends
+        sendCount = 0
+        do jj = 1, nOFringeRecv
+
+            iProc = oFringeRecvList(1, jj)
+            iDom = oFringeRecvList(2, jj)
+            sendCount = sendCount + 1
+            call mpi_isend(oFringes(iDom)%fringeReturnSize, 1, adflow_integer, &
+                           iproc, iDom, adflow_comm_world, sendRequests(sendCount), ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+        end do
+
+        allocate (fringeRecvSizes(nOfringeSend))
+
+        ! Non-blocking receives
+        recvCount = 0
+        do jj = 1, nOFringeSend
+
+            iProc = oFringeSendList(1, jj)
+            iDom = oFringeSendList(2, jj)
+            recvCount = recvCount + 1
+
+            call mpi_irecv(fringeRecvSizes(jj), 1, adflow_integer, &
+                           iProc, iDom, adflow_comm_world, recvRequests(recvCount), ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+        end do
+
+        ! Last thing to do wait for all the sends and receives to finish
+        do i = 1, sendCount
+            call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+        end do
+
+        do i = 1, recvCount
+            call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+        end do
+
+        ! Compute the cumulative form of the fringeRecvSizes
+
+        allocate (cumFringeRecv(1:nOFringeSend + 1))
+        cumFringeRecv(1) = 1
+        do jj = 1, nOFringeSend ! These are the fringes we *sent*
+            ! originally, now are going to receive them
+            ! back
+            cumFringeRecv(jj + 1) = cumFringeRecv(jj) + fringeRecvSizes(jj)
+        end do
+
+    end subroutine getFringeReturnSizes
 
-    integer(kind=intType) :: i, j, ii, jj, nVar, iFringe, jFringe
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
-    integer(kind=intType) :: il, jl, kl, myIndex
-    type(fringeType) :: fringe
-    integer(kind=intType), dimension(:), allocatable :: sendBufInt
-    integer(kind=intType), dimension(:), allocatable :: recvBufInt
+    !
+    !       oversetLoadBalance determine the deistributation of donor and
+    !       receiver blocks that will result in approximate even load
+    !       balancing. The sparse matrix structrue of the overla is
+    !       provided. This computation runs on all processors.
+
+    subroutine oversetLoadBalance(overlap)
+
+        use constants
+        use communication, only: nProc
+        use oversetData, only: CSRMatrix
+        implicit none
+
+        ! Input/Output
+        type(CSRMatrix), intent(inout) :: overlap
+
+        ! Working paramters
+        integer(kind=intType) :: curRow, jj, jj1, iProc, iRow
+        real(kind=realType) :: evenCost, potentialSum, targetCost
+        real(Kind=realType) :: totalSearch, totalBuild
+
+        real(kind=realType), dimension(0:nProc - 1) :: procCosts
+        real(kind=realType), dimension(0:nProc) :: cumProcCosts
+        real(kind=realType), dimension(overlap%nRow) :: buildCost
+        real(kind=realType), parameter :: tol = 0.1_realType
+        !  real(kind=realType), parameter :: K=10_realType
+        logical, dimension(overlap%nnz) :: blockTaken
+        logical :: increment
+
+        ! Pointers to make code a litte easier to read
+        integer(kind=intType), pointer, dimension(:) :: rowPtr, assignedProc
+        real(kind=realType), pointer, dimension(:) :: data
+
+        ! Set the couple  of pointers
+        rowPtr => overlap%rowPtr
+        assignedProc => overlap%assignedProc
+        data => overlap%data
+
+        ! Determine the total search cost:
+        totalSearch = sum(overlap%data)
+
+        ! Target amount of work for each processor
+        evenCost = totalSearch / nProc
 
-    ! Allocate the memory for the sending and receiving buffers.
-    nVar = 3
-    ii = commPattern(level)%nProcSend
-    ii = commPattern(level)%nsendCum(ii)
-    jj = commPattern(level)%nProcRecv
-    jj = commPattern(level)%nrecvCum(jj)
+        ! Initialize the taken processor to False
+        blockTaken = .False.
 
-    allocate(sendBufInt(ii*nVar), recvBufInt(jj*nVar), stat=ierr)
+        ! Initialzie assignedProc to -1 since there could be entries we can
+        ! ignore.
+        assignedProc(:) = -1
+        procCosts = zero
+        cumProcCosts(0) = zero
+
+        ! Initialize the starting point
+        jj = 1
+        iProc = 0
+
+        ! Find the first row with non-zeros
+        curRow = 1
+        do while (rowPtr(curRow + 1) - rowPtr(curRow) == 0)
+            curRow = curRow + 1
+        end do
+
+        masterLoop: do while (curRow <= overlap%nRow .and. iProc <= nProc)
+
+            ! Normally we increment
+            increment = .True.
+
+            ! This is our current target cost.
+            targetCost = evenCost * (iProc + 1)
+
+            ! It is still possible that data(jj) is zero. That's ok...we'll
+            ! explictly ignore them.
+            if (data(jj) /= zero .and. .not. (blockTaken(jj))) then
+
+                if (procCosts(iProc) == 0 .or. iProc == nProc - 1) then
+                    ! Must be added
+                    procCosts(iProc) = procCosts(iProc) + data(jj)
+                    blockTaken(jj) = .True.
+                    assignedProc(jj) = iProc
+
+                else
+
+                    ! There is already something in there. See what the
+                    !  potential sum will be:
+                    potentialSum = cumProcCosts(iProc) + procCosts(iProc) + data(jj)
+
+                    if (potentialSum < targetCost - tol * evenCost) then
+                        !  We are not close to our limit yet so just add it normally
+                        procCosts(iProc) = procCosts(iProc) + data(jj)
+                        blockTaken(jj) = .True.
+                        assignedProc(jj) = iProc
+
+                    else if (potentialSum >= targetCost - tol * evenCost .and. &
+                             potentialSum <= targetCost + tol * evenCost) then
+
+                        ! This one looks perfect. Call it a day...add it and
+                        !  move on to the next proc
+
+                        procCosts(iProc) = procCosts(iProc) + data(jj)
+                        blockTaken(jj) = .True.
+                        assignedProc(jj) = iProc
+
+                        ! Processor can be incremented
+                        cumProcCosts(iProc + 1) = cumProcCosts(iProc) + procCosts(iProc)
+                        iProc = iProc + 1
+                    else
+                        ! This means potentialSum > targetCost + tol*evenCost
+
+                        ! This is somewhat bad news...this may be *horrendly*
+                        ! load balanced. The algorithm dictates we *MUST*
+                        ! finish this proc no matter what before we go back to
+                        ! the outer loop. Essentially we know jj is bad,
+                        ! instead scan over the rest of the row and see if we
+                        ! can add something else that is decent.
+                        increment = .False.
+
+                        restOfRow: do jj1 = jj + 1, rowPtr(curRow + 1) - 1
+
+                            potentialSum = cumProcCosts(iProc) + procCosts(iProc) + data(jj1)
 
-    ! Send the variables. The data is first copied into
-    ! the send buffer after which the buffer is sent asap.
+                            if (data(jj1) /= zero .and. .not. (blockTaken(jj1))) then
+
+                                if (potentialSum < targetCost - tol * evenCost) then
+                                    !Huh...that one fit in without going
+                                    ! over....add it and kep going in the loop
 
-    ii = 1
-    intSends: do i=1,commPattern(level)%nProcSend
+                                    procCosts(iProc) = procCosts(iProc) + data(jj1)
+                                    blockTaken(jj1) = .True.
+                                    assignedProc(jj1) = iProc
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+                                else if (potentialSum >= targetCost - tol * evenCost .and. &
+                                         potentialSum <= targetCost + tol * evenCost) then
 
-       procID = commPattern(level)%sendProc(i)
-       size   = nVar*commPattern(level)%nsend(i)
+                                    ! This one fit in perfectly.
+                                    procCosts(iProc) = procCosts(iProc) + data(jj1)
+                                    blockTaken(jj1) = .True.
+                                    assignedProc(jj1) = iProc
 
-       ! Copy the data in the correct part of the send buffer.
+                                    ! No need to keep going
+                                    exit restOfRow
 
-       jj = ii
-       do j=1,commPattern(level)%nsend(i)
+                                end if
+                            end if
+                        end do restOfRow
 
-          ! Store the block id and the indices of the donor
-          ! a bit easier.
+                        ! Well, the loop finished, we may or may not have
+                        ! added something. If so great...if not, oh well. We
+                        ! just keep going to the next proc. That's the greedy
+                        ! algorithm for you.
 
-          d1 = commPattern(level)%sendList(i)%block(j)
-          i1 = commPattern(level)%sendList(i)%indices(j,1)
-          j1 = commPattern(level)%sendList(i)%indices(j,2)
-          k1 = commPattern(level)%sendList(i)%indices(j,3)
+                        ! Processor can be incremented
+                        cumProcCosts(iProc + 1) = cumProcCosts(iProc) + procCosts(iProc)
+                        iProc = iProc + 1
+                    end if
+                end if
+            end if
+
+            ! Move 1 in jj, until we reach the end and wrap around.
+            if (increment) then
+                jj = jj + 1
+
+                ! Switch to the next row:
+                if (jj == rowPtr(curRow + 1)) then
+
+                    ! This is really tricky...we know we're at the end of the
+                    ! row, but we have to SKIP OVER THE EMPTY rows, or else the
+                    ! algorithm will crap out. Keep incrementing the curRow
+                    ! until we get a row with something in it. Make sure we
+                    ! don't go out the end, so check again nRow
+
+                    findNextNonZeroRow: do while (jj == rowPtr(curRow + 1))
+                        curRow = curRow + 1
+                        if (curRow > overlap%nRow) then
+                            exit findNextNonZeroRow
+                        end if
+                    end do findNextNonZeroRow
+                end if
+            end if
+        end do masterLoop
+
+    end subroutine oversetLoadBalance
+
+    subroutine exchangeFringes(level, sps, commPattern, internal)
+        !
+        !       ExchangeFringes exchanges the donorInformation of the fringes:
+        !       donorProc, donorBlock, dIndex and donorFrac. It does this
+        !       the 1:1 halos for the given level and spectral instance. Since
+        !       we have real values and integer values we will do all the ints
+        !       first and then the reals.
+        !
+        use constants
+        use block, only: fringeType
+        use blockPointers, only: flowDoms
+        use communication, only: commType, internalCommType, recvBuffer, sendBuffer, myid, &
+                                 adflow_comm_world, sendRequests, recvRequests
+        use oversetUtilities, only: addToFringeList, windIndex
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+        !
+        !      Local variables.
+        !
+        integer :: size, procId, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: i, j, ii, jj, nVar, iFringe, jFringe
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+        integer(kind=intType) :: il, jl, kl, myIndex
+        type(fringeType) :: fringe
+        integer(kind=intType), dimension(:), allocatable :: sendBufInt
+        integer(kind=intType), dimension(:), allocatable :: recvBufInt
+
+        ! Allocate the memory for the sending and receiving buffers.
+        nVar = 3
+        ii = commPattern(level)%nProcSend
+        ii = commPattern(level)%nsendCum(ii)
+        jj = commPattern(level)%nProcRecv
+        jj = commPattern(level)%nrecvCum(jj)
+
+        allocate (sendBufInt(ii * nVar), recvBufInt(jj * nVar), stat=ierr)
+
+        ! Send the variables. The data is first copied into
+        ! the send buffer after which the buffer is sent asap.
+
+        ii = 1
+        intSends: do i = 1, commPattern(level)%nProcSend
+
+            ! Store the processor id and the size of the message
+            ! a bit easier.
+
+            procID = commPattern(level)%sendProc(i)
+            size = nVar * commPattern(level)%nsend(i)
+
+            ! Copy the data in the correct part of the send buffer.
+
+            jj = ii
+            do j = 1, commPattern(level)%nsend(i)
+
+                ! Store the block id and the indices of the donor
+                ! a bit easier.
+
+                d1 = commPattern(level)%sendList(i)%block(j)
+                i1 = commPattern(level)%sendList(i)%indices(j, 1)
+                j1 = commPattern(level)%sendList(i)%indices(j, 2)
+                k1 = commPattern(level)%sendList(i)%indices(j, 3)
+
+                ! Copy integer values to buffer
+                iFringe = flowDoms(d1, level, sps)%fringePtr(1, i1, j1, k1)
+                if (iFringe > 0) then
+                    sendBufInt(jj) = flowDoms(d1, level, sps)%fringes(iFringe)%donorProc
+                    sendBufInt(jj + 1) = flowDoms(d1, level, sps)%fringes(iFringe)%donorBlock
+                    sendBufInt(jj + 2) = flowDoms(d1, level, sps)%fringes(iFringe)%dIndex
+                else
+                    sendBufInt(jj) = -1
+                    sendBufInt(jj + 1) = 0
+                    sendBufInt(jj + 2) = 0
+                end if
 
-          ! Copy integer values to buffer
-          iFringe = flowDoms(d1, level, sps)%fringePtr(1, i1, j1, k1)
-          if (iFringe > 0) then
-             sendBufInt(jj  ) = flowDoms(d1,level,sps)%fringes(iFringe)%donorProc
-             sendBufInt(jj+1) = flowDoms(d1,level,sps)%fringes(iFringe)%donorBlock
-             sendBufInt(jj+2) = flowDoms(d1,level,sps)%fringes(iFringe)%dIndex
-          else
-             sendBufInt(jj  ) = -1
-             sendBufInt(jj+1) = 0
-             sendBufInt(jj+2) = 0
-          end if
+                jj = jj + nVar
 
-          jj = jj + nVar
+            end do
 
-       enddo
+            ! Send the data.
 
-       ! Send the data.
+            call mpi_isend(sendBufInt(ii), size, adflow_integer, procId, &
+                           procId, ADflow_comm_world, sendRequests(i), &
+                           ierr)
 
-       call mpi_isend(sendBufInt(ii), size, adflow_integer, procId, &
-            procId, ADflow_comm_world, sendRequests(i),   &
-            ierr)
+            ! Set ii to jj for the next processor.
 
-       ! Set ii to jj for the next processor.
+            ii = jj
 
-       ii = jj
+        end do intSends
 
-    enddo intSends
+        ! Post the nonblocking receives.
 
-    ! Post the nonblocking receives.
+        ii = 1
+        intReceives: do i = 1, commPattern(level)%nProcRecv
 
-    ii = 1
-    intReceives: do i=1,commPattern(level)%nProcRecv
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+            procID = commPattern(level)%recvProc(i)
+            size = nVar * commPattern(level)%nrecv(i)
 
-       procID = commPattern(level)%recvProc(i)
-       size    = nVar*commPattern(level)%nrecv(i)
+            ! Post the receive.
 
-       ! Post the receive.
+            call mpi_irecv(recvBufInt(ii), size, adflow_integer, procId, &
+                           myId, ADflow_comm_world, recvRequests(i), ierr)
 
-       call mpi_irecv(recvBufInt(ii), size, adflow_integer, procId, &
-            myId, ADflow_comm_world, recvRequests(i), ierr)
+            ! And update ii.
 
-       ! And update ii.
+            ii = ii + size
 
-       ii = ii + size
+        end do intReceives
 
-    enddo intReceives
+        ! Copy the local data.
 
-    ! Copy the local data.
+        intLocalCopy: do i = 1, internal(level)%ncopy
 
-    intLocalCopy: do i=1,internal(level)%ncopy
+            ! Store the block and the indices of the donor a bit easier.
 
-       ! Store the block and the indices of the donor a bit easier.
+            d1 = internal(level)%donorBlock(i)
+            i1 = internal(level)%donorIndices(i, 1)
+            j1 = internal(level)%donorIndices(i, 2)
+            k1 = internal(level)%donorIndices(i, 3)
 
-       d1 = internal(level)%donorBlock(i)
-       i1 = internal(level)%donorIndices(i,1)
-       j1 = internal(level)%donorIndices(i,2)
-       k1 = internal(level)%donorIndices(i,3)
+            ! Idem for the halo's.
 
-       ! Idem for the halo's.
+            d2 = internal(level)%haloBlock(i)
+            i2 = internal(level)%haloIndices(i, 1)
+            j2 = internal(level)%haloIndices(i, 2)
+            k2 = internal(level)%haloIndices(i, 3)
 
-       d2 = internal(level)%haloBlock(i)
-       i2 = internal(level)%haloIndices(i,1)
-       j2 = internal(level)%haloIndices(i,2)
-       k2 = internal(level)%haloIndices(i,3)
+            iFringe = flowDoms(d1, level, sps)%fringePtr(1, i1, j1, k1)
+            if (iFringe > 0) then
+                ! The sender has an actual fringe. Nowe check if the
+                ! receiver has somewhere already to put it:
 
+                jFringe = flowDoms(d2, level, sps)%fringePtr(1, i2, j2, k2)
 
-       iFringe = flowDoms(d1, level, sps)%fringePtr(1, i1, j1, k1)
-       if (iFringe > 0) then
-          ! The sender has an actual fringe. Nowe check if the
-          ! receiver has somewhere already to put it:
+                ! Setup the new fringe:
+                fringe%myBlock = d2
 
-          jFringe = flowDoms(d2, level, sps)%fringePtr(1, i2, j2, k2)
+                il = flowDoms(d2, level, sps)%il
+                jl = flowDoms(d2, level, sps)%jl
+                kl = flowDoms(d2, level, sps)%kl
+                fringe%myIndex = windIndex(i2, j2, k2, il, jl, kl)
 
-          ! Setup the new fringe:
-          fringe%myBlock = d2
+                fringe%donorProc = flowDoms(d1, level, sps)%fringes(iFringe)%donorProc
+                fringe%donorBlock = flowDoms(d1, level, sps)%fringes(iFringe)%donorBlock
+                fringe%dIndex = flowDoms(d1, level, sps)%fringes(iFringe)%dIndex
+                fringe%donorFrac = flowDoms(d1, level, sps)%fringes(iFringe)%donorFrac
 
-          il = flowDoms(d2, level, sps)%il
-          jl = flowDoms(d2, level, sps)%jl
-          kl = flowDoms(d2, level, sps)%kl
-          fringe%myIndex = windIndex(i2, j2, k2, il, jl, kl)
+                if (jFringe > 0) then
+                    ! Just copy the fringe into the slot. No need to update
+                    ! the pointer since it is already correct.
+                    flowDoms(d2, level, sps)%fringes(jFringe) = fringe
+                else
 
-          fringe%donorProc = flowDoms(d1, level, sps)%fringes(iFringe)%donorProc
-          fringe%donorBlock= flowDoms(d1, level, sps)%fringes(iFringe)%donorBlock
-          fringe%dIndex    = flowDoms(d1, level, sps)%fringes(iFringe)%dIndex
-          fringe%donorFrac = flowDoms(d1, level, sps)%fringes(iFringe)%donorFrac
+                    ! There is no slot available yet. Tack the fringe onto
+                    ! the end of the d2 fringe list and set the pointers
+                    ! accordingly.
 
-          if (jFringe > 0) then
-             ! Just copy the fringe into the slot. No need to update
-             ! the pointer since it is already correct.
-             flowDoms(d2, level, sps)%fringes(jFringe) = fringe
-          else
+                    call addToFringeList(flowDoms(d2, level, sps)%fringes, &
+                                         flowDoms(d2, level, sps)%nDonors, fringe)
 
-             ! There is no slot available yet. Tack the fringe onto
-             ! the end of the d2 fringe list and set the pointers
-             ! accordingly.
+                    ! Note that all three values (pointer, start and end) are
+                    ! all the same.
+                    flowDoms(d2, level, sps)%fringePtr(:, i2, j2, k2) = &
+                        flowDoms(d2, level, sps)%nDonors
+                end if
+            else
+                ! The donor isn't a receiver so make sure the halo isn't
+                ! either. Just set the fringePtr to 0
 
-             call addToFringeList(flowDoms(d2, level, sps)%fringes, &
-                  flowDoms(d2, level, sps)%nDonors,  fringe)
+                flowDoms(d2, level, sps)%fringePtr(1, i2, j2, k2) = 0
+            end if
 
-             ! Note that all three values (pointer, start and end) are
-             ! all the same.
-             flowDoms(d2, level, sps)%fringePtr(:, i2, j2, k2) = &
-                  flowDoms(d2, level, sps)%nDonors
-          end if
-       else
-          ! The donor isn't a receiver so make sure the halo isn't
-          ! either. Just set the fringePtr to 0
+        end do intLocalCopy
 
-          flowDoms(d2, level, sps)%fringePtr(1, i2, j2, k2) = 0
-       end if
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-    enddo intLocalCopy
+        size = commPattern(level)%nProcRecv
+        intCompleteRecvs: do i = 1, commPattern(level)%nProcRecv
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+            ! Complete any of the requests.
 
-    size = commPattern(level)%nProcRecv
-    intCompleteRecvs: do i=1,commPattern(level)%nProcRecv
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
 
-       ! Complete any of the requests.
+            ! Copy the data just arrived in the halo's.
 
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+            ii = index
+            jj = nVar * commPattern(level)%nrecvCum(ii - 1)
+            do j = 1, commPattern(level)%nrecv(ii)
 
-       ! Copy the data just arrived in the halo's.
+                ! Store the block and the indices of the halo a bit easier.
 
-       ii = index
-       jj = nVar*commPattern(level)%nrecvCum(ii-1)
-       do j=1,commPattern(level)%nrecv(ii)
+                d2 = commPattern(level)%recvList(ii)%block(j)
+                i2 = commPattern(level)%recvList(ii)%indices(j, 1)
+                j2 = commPattern(level)%recvList(ii)%indices(j, 2)
+                k2 = commPattern(level)%recvList(ii)%indices(j, 3)
 
-          ! Store the block and the indices of the halo a bit easier.
+                fringe%myBlock = d2
+                ! Recompute my Index:
+                il = flowDoms(d2, level, sps)%il
+                jl = flowDoms(d2, level, sps)%jl
+                kl = flowDoms(d2, level, sps)%kl
+                fringe%myIndex = windIndex(i2, j2, k2, il, jl, kl)
 
-          d2 = commPattern(level)%recvList(ii)%block(j)
-          i2 = commPattern(level)%recvList(ii)%indices(j,1)
-          j2 = commPattern(level)%recvList(ii)%indices(j,2)
-          k2 = commPattern(level)%recvList(ii)%indices(j,3)
+                fringe%donorProc = recvBufInt(jj + 1)
+                fringe%donorBlock = recvBufInt(jj + 2)
+                fringe%dIndex = recvBufInt(jj + 3)
 
-          fringe%myBlock = d2
-          ! Recompute my Index:
-          il = flowDoms(d2, level, sps)%il
-          jl = flowDoms(d2, level, sps)%jl
-          kl = flowDoms(d2, level, sps)%kl
-          fringe%myIndex = windIndex(i2, j2, k2, il, jl, kl)
+                iFringe = flowDoms(d2, level, sps)%fringePtr(1, i2, j2, k2)
+                if (iFringe > 0) then
+                    ! We have somehwere to to put the data already:
+                    flowDoms(d2, level, sps)%fringes(iFringe) = fringe
+                else
+                    ! We don't somehwhere to put the fringe to add to the list:
+                    call addToFringeList(flowDoms(d2, level, sps)%fringes, &
+                                         flowDoms(d2, level, sps)%nDonors, fringe)
 
-          fringe%donorProc  = recvBufInt(jj+1)
-          fringe%donorBlock = recvBufInt(jj+2)
-          fringe%dIndex     = recvBufInt(jj+3)
+                    ! Note that all three values (pointer, start and end) are
+                    ! all the same.
+                    flowDoms(d2, level, sps)%fringePtr(:, i2, j2, k2) = &
+                        flowDoms(d2, level, sps)%nDonors
+                end if
+                jj = jj + nVar
+            end do
 
-          iFringe = flowDoms(d2, level, sps)%fringePtr(1, i2, j2, k2)
-          if (iFringe > 0) then
-             ! We have somehwere to to put the data already:
-             flowDoms(d2, level, sps)%fringes(iFringe) = fringe
-          else
-             ! We don't somehwhere to put the fringe to add to the list:
-             call addToFringeList(flowDoms(d2, level, sps)%fringes, &
-                  flowDoms(d2, level, sps)%nDonors, fringe)
+        end do intCompleteRecvs
 
-             ! Note that all three values (pointer, start and end) are
-             ! all the same.
-             flowDoms(d2, level, sps)%fringePtr(:, i2, j2, k2) = &
-                  flowDoms(d2, level, sps)%nDonors
-          end if
-          jj = jj + nVar
-       enddo
+        ! Complete the nonblocking sends.
 
-    enddo intCompleteRecvs
+        size = commPattern(level)%nProcSend
+        do i = 1, commPattern(level)%nProcSend
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+        end do
 
-    ! Complete the nonblocking sends.
+        ! Done with the integer memory.
 
-    size = commPattern(level)%nProcSend
-    do i=1,commPattern(level)%nProcSend
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-    enddo
+        deallocate (sendBufInt, recvBufInt)
 
-    ! Done with the integer memory.
+        ! Now do the real exchange. We can use the regular real buffers here
+        ! since they are large enough
 
-    deallocate(sendBufInt, recvBufInt)
+        ! ================================================================================
 
-    ! Now do the real exchange. We can use the regular real buffers here
-    ! since they are large enough
+        ! Allocate the memory for the sending and receiving buffers.
+        nVar = 3
 
+        ! Send the variables. The data is first copied into
+        ! the send buffer after which the buffer is sent asap.
 
-    ! ================================================================================
+        ii = 1
+        sends: do i = 1, commPattern(level)%nProcSend
 
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-    ! Allocate the memory for the sending and receiving buffers.
-    nVar = 3
+            procID = commPattern(level)%sendProc(i)
+            size = nVar * commPattern(level)%nsend(i)
 
-    ! Send the variables. The data is first copied into
-    ! the send buffer after which the buffer is sent asap.
+            ! Copy the data in the correct part of the send buffer.
 
-    ii = 1
-    sends: do i=1,commPattern(level)%nProcSend
+            jj = ii
+            do j = 1, commPattern(level)%nsend(i)
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+                ! Store the block id and the indices of the donor
+                ! a bit easier.
 
-       procID = commPattern(level)%sendProc(i)
-       size   = nVar*commPattern(level)%nsend(i)
+                d1 = commPattern(level)%sendList(i)%block(j)
+                i1 = commPattern(level)%sendList(i)%indices(j, 1)
+                j1 = commPattern(level)%sendList(i)%indices(j, 2)
+                k1 = commPattern(level)%sendList(i)%indices(j, 3)
 
-       ! Copy the data in the correct part of the send buffer.
+                ! Copy real values to buffer
+                iFringe = flowDoms(d1, level, sps)%fringePtr(1, i1, j1, k1)
+                if (iFringe > 0) then
+                    sendBuffer(jj:jj + 2) = flowDoms(d1, level, sps)%fringes(iFringe)%donorFrac
+                else
+                    sendBuffer(jj:jj + 2) = zero
+                end if
+                jj = jj + nVar
 
-       jj = ii
-       do j=1,commPattern(level)%nsend(i)
+            end do
 
-          ! Store the block id and the indices of the donor
-          ! a bit easier.
+            ! Send the data.
 
-          d1 = commPattern(level)%sendList(i)%block(j)
-          i1 = commPattern(level)%sendList(i)%indices(j,1)
-          j1 = commPattern(level)%sendList(i)%indices(j,2)
-          k1 = commPattern(level)%sendList(i)%indices(j,3)
+            call mpi_isend(sendBuffer(ii), size, adflow_real, procId, &
+                           procId, ADflow_comm_world, sendRequests(i), &
+                           ierr)
 
-          ! Copy real values to buffer
-          iFringe = flowDoms(d1, level, sps)%fringePtr(1, i1, j1, k1)
-          if (iFringe > 0) then
-             sendBuffer(jj:jj+2) = flowDoms(d1,level,sps)%fringes(iFringe)%donorFrac
-          else
-             sendBuffer(jj:jj+2) = zero
-          end if
-          jj = jj + nVar
+            ! Set ii to jj for the next processor.
 
-       enddo
+            ii = jj
 
-       ! Send the data.
+        end do sends
 
-       call mpi_isend(sendBuffer(ii), size, adflow_real, procId, &
-            procId, ADflow_comm_world, sendRequests(i),   &
-            ierr)
+        ! Post the nonblocking receives.
 
-       ! Set ii to jj for the next processor.
+        ii = 1
+        receives: do i = 1, commPattern(level)%nProcRecv
 
-       ii = jj
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-    enddo sends
+            procID = commPattern(level)%recvProc(i)
+            size = nVar * commPattern(level)%nrecv(i)
 
-    ! Post the nonblocking receives.
+            ! Post the receive.
 
-    ii = 1
-    receives: do i=1,commPattern(level)%nProcRecv
+            call mpi_irecv(recvBuffer(ii), size, adflow_real, procId, &
+                           myId, ADflow_comm_world, recvRequests(i), ierr)
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+            ! And update ii.
 
-       procID = commPattern(level)%recvProc(i)
-       size    = nVar*commPattern(level)%nrecv(i)
+            ii = ii + size
 
-       ! Post the receive.
+        end do receives
 
-       call mpi_irecv(recvBuffer(ii), size, adflow_real, procId, &
-            myId, ADflow_comm_world, recvRequests(i), ierr)
+        ! ***********************************************************
+        ! No local copy since we copied the fringes directly and the
+        ! donorFrac info is already there.
+        ! ***********************************************************
 
-       ! And update ii.
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-       ii = ii + size
+        size = commPattern(level)%nProcRecv
+        completeRecvs: do i = 1, commPattern(level)%nProcRecv
 
-    enddo receives
+            ! Complete any of the requests.
 
-    ! ***********************************************************
-    ! No local copy since we copied the fringes directly and the
-    ! donorFrac info is already there.
-    ! ***********************************************************
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+            ! Copy the data just arrived in the halo's.
 
-    size = commPattern(level)%nProcRecv
-    completeRecvs: do i=1,commPattern(level)%nProcRecv
+            ii = index
+            jj = nVar * commPattern(level)%nrecvCum(ii - 1)
+            do j = 1, commPattern(level)%nrecv(ii)
 
-       ! Complete any of the requests.
+                ! Store the block and the indices of the halo a bit easier.
 
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                d2 = commPattern(level)%recvList(ii)%block(j)
+                i2 = commPattern(level)%recvList(ii)%indices(j, 1)
+                j2 = commPattern(level)%recvList(ii)%indices(j, 2)
+                k2 = commPattern(level)%recvList(ii)%indices(j, 3)
 
-       ! Copy the data just arrived in the halo's.
+                ! Now, there should already be a spot available for the
+                ! donorFrac since it was created if necessary in the integer exchange.
+                iFringe = flowDoms(d2, level, sps)%fringePtr(1, i2, j2, k2)
+                flowDoms(d2, level, sps)%fringes(iFringe)%donorFrac = recvBuffer(jj + 1:jj + 3)
 
-       ii = index
-       jj = nVar*commPattern(level)%nrecvCum(ii-1)
-       do j=1,commPattern(level)%nrecv(ii)
+                jj = jj + nVar
+            end do
 
-          ! Store the block and the indices of the halo a bit easier.
+        end do completeRecvs
 
-          d2 = commPattern(level)%recvList(ii)%block(j)
-          i2 = commPattern(level)%recvList(ii)%indices(j,1)
-          j2 = commPattern(level)%recvList(ii)%indices(j,2)
-          k2 = commPattern(level)%recvList(ii)%indices(j,3)
+        ! Complete the nonblocking sends.
 
-          ! Now, there should already be a spot available for the
-          ! donorFrac since it was created if necessary in the integer exchange.
-          iFringe = flowDoms(d2, level, sps)%fringePtr(1, i2, j2, k2)
-          flowDoms(d2, level, sps)%fringes(iFringe)%donorFrac = recvBuffer(jj+1:jj+3)
+        size = commPattern(level)%nProcSend
+        do i = 1, commPattern(level)%nProcSend
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+        end do
 
-          jj = jj + nVar
-       enddo
+    end subroutine exchangeFringes
 
-    enddo completeRecvs
+    subroutine exchangeStatus(level, sps, commPattern, internal)
+        !
+        !       ExchangeIsCompute exchanges the isCompute flag for the 1 to 1
+        !       connectivity for the given level and sps instance.
+        !
+        use constants
+        use blockPointers, only: nDom, ib, jb, kb, flowDoms
+        use communication, only: commType, internalCommType
+        use utils, only: setPointers
+        use haloExchange, only: whalo1to1intgeneric
 
-    ! Complete the nonblocking sends.
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
 
-    size = commPattern(level)%nProcSend
-    do i=1,commPattern(level)%nProcSend
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-    enddo
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+        integer(kind=intType) :: nn
 
-  end subroutine exchangeFringes
+        domainLoop: do nn = 1, nDom
+            flowDoms(nn, level, sps)%intCommVars(1)%var => flowDoms(nn, level, sps)%status(:, :, :)
+        end do domainLoop
 
-  subroutine exchangeStatus(level, sps, commPattern, internal)
-    !
-    !       ExchangeIsCompute exchanges the isCompute flag for the 1 to 1
-    !       connectivity for the given level and sps instance.
-    !
-    use constants
-    use blockPointers, only : nDom, ib, jb, kb, flowDoms
-    use communication, only : commType, internalCommType
-    use utils, only : setPointers
-    use haloExchange, only : whalo1to1intgeneric
+        ! Run the generic integer exchange
+        call wHalo1to1IntGeneric(1, level, sps, commPattern, internal)
 
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
+    end subroutine exchangeStatus
 
-    type(commType),          dimension(*), intent(in) :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-    integer(kind=intType) :: nn
+    subroutine exchangeStatusTranspose(level, sps, commPattern, internal)
 
-    domainLoop:do nn=1, nDom
-       flowDoms(nn, level, sps)%intCommVars(1)%var => flowDoms(nn, level, sps)%status(:, :, :)
-    end do domainLoop
+        ! exchangeStatusTranspose performs the *TRANSPOSE* of the normal
+        ! halo exchange. That means it takes information *in the halo cells*
+        ! and accumulate it into the *owned cells*. In this particular case,
+        ! we are transmitting the isDonor and isWallDonor information from
+        ! the halos to the owned cells. The "accumulate" operation will be
+        ! an MPI_LOR.  Note that this actually hast he same comm structure
+        ! as 'whalo1to1_b'.
 
-    ! Run the generic integer exchange
-    call wHalo1to1IntGeneric(1, level, sps, commPattern, internal)
+        use constants
+        use blockPointers, only: flowDoms
+        use communication, only: commType, internalCommType, recvBuffer, sendBuffer, myid, &
+                                 adflow_comm_world, sendRequests, recvRequests
+        use oversetUtilities, only: getStatus, setIsDonor, setIsWallDonor
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
 
-  end subroutine exchangeStatus
+        integer(kind=intType) :: mm
+        integer(kind=intType) :: i, j, k, ii, jj
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+        integer(kind=intType), dimension(:), allocatable :: sendBuf, recvBuf
+        logical :: CisDonor, CisHole, CisCompute, CisFloodSeed, CisFlooded, CisWallDonor, CisReceiver
+        logical :: DisDonor, DisHole, DisCompute, DisFloodSeed, DisFlooded, DisWallDonor, DisReceiver
+        integer(kind=intType) :: cellStatus, donorStatus
 
-  subroutine exchangeStatusTranspose(level, sps, commPattern, internal)
+        ii = commPattern(level)%nProcSend
+        ii = commPattern(level)%nsendCum(ii)
+        jj = commPattern(level)%nProcRecv
+        jj = commPattern(level)%nrecvCum(jj)
 
-    ! exchangeStatusTranspose performs the *TRANSPOSE* of the normal
-    ! halo exchange. That means it takes information *in the halo cells*
-    ! and accumulate it into the *owned cells*. In this particular case,
-    ! we are transmitting the isDonor and isWallDonor information from
-    ! the halos to the owned cells. The "accumulate" operation will be
-    ! an MPI_LOR.  Note that this actually hast he same comm structure
-    ! as 'whalo1to1_b'.
+        ! We are exchanging 1 piece of information
+        allocate (sendBuf(ii), recvBuf(jj), stat=ierr)
 
-    use constants
-    use blockPointers, only : flowDoms
-    use communication, only : commType, internalCommType, recvBuffer, sendBuffer, myid, &
-         adflow_comm_world, sendRequests, recvRequests
-    use oversetUtilities, only : getStatus, setIsDonor, setIsWallDonor
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
-    type(commType), dimension(*), intent(in)         :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-    !
-    !      Local variables.
-    !
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
+        ! Gather up the seeds into the *recv* buffer. Note we loop
+        ! over nProcRECV here! After the buffer is assembled it is
+        ! sent off.
 
-    integer(kind=intType) :: mm
-    integer(kind=intType) :: i, j, k, ii, jj
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
-    integer(kind=intType), dimension(:), allocatable :: sendBuf, recvBuf
-    logical :: CisDonor, CisHole, CisCompute, CisFloodSeed, CisFlooded, CisWallDonor, CisReceiver
-    logical :: DisDonor, DisHole, DisCompute, DisFloodSeed, DisFlooded, DisWallDonor, DisReceiver
-    integer(kind=intType) :: cellStatus, donorStatus
+        jj = 1
+        ii = 1
+        recvs: do i = 1, commPattern(level)%nProcRecv
 
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
+            procID = commPattern(level)%recvProc(i)
+            size = commPattern(level)%nrecv(i)
 
-    ii = commPattern(level)%nProcSend
-    ii = commPattern(level)%nsendCum(ii)
-    jj = commPattern(level)%nProcRecv
-    jj = commPattern(level)%nrecvCum(jj)
+            ! Copy the data into the buffer
 
-    ! We are exchanging 1 piece of information
-    allocate(sendBuf(ii), recvBuf(jj), stat=ierr)
+            do j = 1, commPattern(level)%nrecv(i)
 
-    ! Gather up the seeds into the *recv* buffer. Note we loop
-    ! over nProcRECV here! After the buffer is assembled it is
-    ! sent off.
+                ! Store the block and the indices of the halo a bit easier.
 
-    jj = 1
-    ii = 1
-    recvs: do i=1,commPattern(level)%nProcRecv
+                d2 = commPattern(level)%recvList(i)%block(j)
+                i2 = commPattern(level)%recvList(i)%indices(j, 1)
+                j2 = commPattern(level)%recvList(i)%indices(j, 2)
+                k2 = commPattern(level)%recvList(i)%indices(j, 3)
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+                recvBuf(jj) = flowDoms(d2, level, sps)%status(i2, j2, k2)
+                jj = jj + 1
 
-       procID = commPattern(level)%recvProc(i)
-       size    = commPattern(level)%nrecv(i)
+            end do
 
-       ! Copy the data into the buffer
+            ! Send the data.
+            call mpi_isend(recvBuf(ii), size, adflow_integer, procID, &
+                           procID, ADflow_comm_world, sendRequests(i), &
+                           ierr)
 
-       do j=1,commPattern(level)%nrecv(i)
+            ! Set ii to jj for the next processor.
 
-          ! Store the block and the indices of the halo a bit easier.
+            ii = jj
 
-          d2 = commPattern(level)%recvList(i)%block(j)
-          i2 = commPattern(level)%recvList(i)%indices(j,1)
-          j2 = commPattern(level)%recvList(i)%indices(j,2)
-          k2 = commPattern(level)%recvList(i)%indices(j,3)
+        end do recvs
 
-          recvBuf(jj) = flowDoms(d2, level, sps)%status(i2, j2, k2)
-          jj = jj + 1
+        ! Post the nonblocking receives.
 
-       enddo
+        ii = 1
+        sends: do i = 1, commPattern(level)%nProcSend
 
-       ! Send the data.
-       call mpi_isend(recvBuf(ii), size, adflow_integer, procID,  &
-            procID, ADflow_comm_world, sendRequests(i), &
-            ierr)
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       ! Set ii to jj for the next processor.
+            procID = commPattern(level)%sendProc(i)
+            size = commPattern(level)%nsend(i)
 
-       ii = jj
+            ! Post the receive.
 
-    enddo recvs
+            call mpi_irecv(sendBuf(ii), size, adflow_integer, procID, &
+                           myID, ADflow_comm_world, recvRequests(i), ierr)
 
-    ! Post the nonblocking receives.
+            ! And update ii.
 
-    ii = 1
-    sends: do i=1,commPattern(level)%nProcSend
+            ii = ii + size
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+        end do sends
 
-       procID = commPattern(level)%sendProc(i)
-       size    = commPattern(level)%nsend(i)
+        ! Copy the local data.
 
-       ! Post the receive.
+        localCopy: do i = 1, internal(level)%ncopy
 
-       call mpi_irecv(sendBuf(ii), size, adflow_integer, procID, &
-            myID, ADflow_comm_world, recvRequests(i), ierr)
+            ! Store the block and the indices of the donor a bit easier.
 
-       ! And update ii.
+            d1 = internal(level)%donorBlock(i)
+            i1 = internal(level)%donorIndices(i, 1)
+            j1 = internal(level)%donorIndices(i, 2)
+            k1 = internal(level)%donorIndices(i, 3)
 
-       ii = ii + size
+            ! Idem for the halo's.
 
-    enddo sends
+            d2 = internal(level)%haloBlock(i)
+            i2 = internal(level)%haloIndices(i, 1)
+            j2 = internal(level)%haloIndices(i, 2)
+            k2 = internal(level)%haloIndices(i, 3)
 
-    ! Copy the local data.
+            ! OR operation. Note we modify the '1' values ie. the 'donors'
+            ! which are now receivers because of the transpose operation.
+            cellStatus = flowDoms(d1, level, sps)%status(i1, j1, k1)
+            call getStatus(cellStatus, CisDonor, CisHole, CisCompute, &
+                           CisFloodSeed, CisFlooded, CisWallDonor, CisReceiver)
 
-    localCopy: do i=1,internal(level)%ncopy
+            donorStatus = flowDoms(d2, level, sps)%status(i2, j2, k2)
+            call getStatus(donorStatus, DisDonor, DisHole, DisCompute, &
+                           DisFloodSeed, DisFlooded, DisWallDonor, DisReceiver)
 
-       ! Store the block and the indices of the donor a bit easier.
+            call setIsDonor(flowDoms(d1, level, sps)%status(i1, j1, k1), &
+                            CIsDonor .or. DisDonor)
 
-       d1 = internal(level)%donorBlock(i)
-       i1 = internal(level)%donorIndices(i,1)
-       j1 = internal(level)%donorIndices(i,2)
-       k1 = internal(level)%donorIndices(i,3)
+            call setIsWallDonor(flowDoms(d1, level, sps)%status(i1, j1, k1), &
+                                CIsWallDonor .or. DisWallDonor)
 
-       ! Idem for the halo's.
+        end do localCopy
 
-       d2 = internal(level)%haloBlock(i)
-       i2 = internal(level)%haloIndices(i,1)
-       j2 = internal(level)%haloIndices(i,2)
-       k2 = internal(level)%haloIndices(i,3)
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-       ! OR operation. Note we modify the '1' values ie. the 'donors'
-       ! which are now receivers because of the transpose operation.
-       cellStatus = flowDoms(d1, level, sps)%status(i1, j1, k1)
-       call getStatus(cellStatus, CisDonor, CisHole, CisCompute, &
-            CisFloodSeed, CisFlooded, CisWallDonor, CisReceiver)
+        size = commPattern(level)%nProcSend
+        completeSends: do i = 1, commPattern(level)%nProcSend
 
-       donorStatus = flowDoms(d2, level, sps)%status(i2, j2, k2)
-       call getStatus(donorStatus, DisDonor, DisHole, DisCompute, &
-            DisFloodSeed, DisFlooded, DisWallDonor, DisReceiver)
+            ! Complete any of the requests.
 
-       call setIsDonor(flowDoms(d1, level, sps)%status(i1, j1, k1), &
-            CIsDonor .or. DisDonor)
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
 
-       call setIsWallDonor(flowDoms(d1, level, sps)%status(i1, j1, k1), &
-            CIsWallDonor .or. DisWallDonor)
+            ! Copy the data just arrived in the halo's.
 
-    enddo localCopy
+            ii = index
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+            jj = commPattern(level)%nsendCum(ii - 1)
 
-    size = commPattern(level)%nProcSend
-    completeSends: do i=1,commPattern(level)%nProcSend
+            do j = 1, commPattern(level)%nsend(ii)
 
-       ! Complete any of the requests.
+                ! Store the block and the indices of the halo a bit easier.
 
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                d1 = commPattern(level)%sendList(ii)%block(j)
+                i1 = commPattern(level)%sendList(ii)%indices(j, 1)
+                j1 = commPattern(level)%sendList(ii)%indices(j, 2)
+                k1 = commPattern(level)%sendList(ii)%indices(j, 3)
 
-       ! Copy the data just arrived in the halo's.
+                cellStatus = flowDoms(d1, level, sps)%status(i1, j1, k1)
+                call getStatus(cellStatus, CisDonor, CisHole, CisCompute, &
+                               CisFloodSeed, CisFlooded, CisWallDonor, CisReceiver)
+                jj = jj + 1
+                donorStatus = sendBuf(jj)
+                call getStatus(donorStatus, DisDonor, DisHole, DisCompute, &
+                               DisFloodSeed, DisFlooded, DisWallDonor, DisReceiver)
 
-       ii = index
+                call setIsDonor(flowDoms(d1, level, sps)%status(i1, j1, k1), &
+                                CIsDonor .or. DisDonor)
 
-       jj = commPattern(level)%nsendCum(ii-1)
+                call setIsWallDonor(flowDoms(d1, level, sps)%status(i1, j1, k1), &
+                                    CIsWallDonor .or. DisWallDonor)
+            end do
+        end do completeSends
 
-       do j=1,commPattern(level)%nsend(ii)
+        ! Complete the nonblocking sends.
 
-          ! Store the block and the indices of the halo a bit easier.
+        size = commPattern(level)%nProcRecv
+        do i = 1, commPattern(level)%nProcRecv
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+        end do
 
-          d1 = commPattern(level)%sendList(ii)%block(j)
-          i1 = commPattern(level)%sendList(ii)%indices(j,1)
-          j1 = commPattern(level)%sendList(ii)%indices(j,2)
-          k1 = commPattern(level)%sendList(ii)%indices(j,3)
+        deallocate (recvBuf, sendBuf)
 
+    end subroutine exchangeStatusTranspose
 
-          cellStatus = flowDoms(d1, level, sps)%status(i1, j1, k1)
-          call getStatus(cellStatus, CisDonor, CisHole, CisCompute, &
-               CisFloodSeed, CisFlooded, CisWallDonor, CisReceiver)
-          jj = jj + 1
-          donorStatus = sendBuf(jj)
-          call getStatus(donorStatus, DisDonor, DisHole, DisCompute, &
-               DisFloodSeed, DisFlooded, DisWallDonor, DisReceiver)
+    subroutine setupFringeGlobalInd(level, sps)
 
-          call setIsDonor(flowDoms(d1, level, sps)%status(i1, j1, k1), &
-               CIsDonor .or. DisDonor)
+        use constants
+        use blockPointers
+        use communication
+        use utils, only: EChk
+        implicit none
 
-          call setIsWallDonor(flowDoms(d1, level, sps)%status(i1, j1, k1), &
-               CIsWallDonor .or. DisWallDonor)
-       enddo
-    enddo completeSends
+        ! This subroutine is used to record the global index of each of
+        ! the donors for overset fringes. It has the same comm structure
+        ! as  wOverset and flagInvalidDonors.
 
-    ! Complete the nonblocking sends.
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
 
-    size = commPattern(level)%nProcRecv
-    do i=1,commPattern(level)%nProcRecv
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-    enddo
+        !
+        !      Local variables.
+        !
+        integer :: size, procId, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
 
-    deallocate(recvBuf, sendBuf)
+        integer(kind=intType) :: nVar
+        integer(kind=intType) :: i, j, k, ii, jj, iii, jjj, kkk, iFringe
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2, ind
+        integer(kind=intType), dimension(:), allocatable :: sendBufInt
+        integer(kind=intType), dimension(:), allocatable :: recvBufInt
+        logical :: invalid
+        type(commType), pointer :: commPattern
+        type(internalCommType), pointer :: internal
 
-  end subroutine exchangeStatusTranspose
+        commPattern => commPatternOverset(level, sps)
+        internal => internalOverset(level, sps)
 
-  subroutine setupFringeGlobalInd(level, sps)
+        ii = commPattern%nProcSend
+        ii = commPattern%nsendCum(ii)
+        jj = commPattern%nProcRecv
+        jj = commPattern%nrecvCum(jj)
+        nVar = 8
+        allocate (sendBufInt(ii * nVar), recvBufInt(jj * nVar), stat=ierr)
 
-    use constants
-    use blockPointers
-    use communication
-    use utils, only : EChk
-    implicit none
+        ! Send the variables. The data is first copied into
+        ! the send buffer after which the buffer is sent asap.
 
-    ! This subroutine is used to record the global index of each of
-    ! the donors for overset fringes. It has the same comm structure
-    ! as  wOverset and flagInvalidDonors.
+        ii = 1
+        sends: do i = 1, commPattern%nProcSend
 
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-    !
-    !      Local variables.
-    !
-    integer :: size, procId, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
-
-    integer(kind=intType) :: nVar
-    integer(kind=intType) :: i, j, k, ii, jj, iii, jjj, kkk, iFringe
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2, ind
-    integer(kind=intType), dimension(:), allocatable :: sendBufInt
-    integer(kind=intType), dimension(:), allocatable :: recvBufInt
-    logical :: invalid
-    type(commType), pointer :: commPattern
-    type(internalCommType), pointer :: internal
-
-    commPattern => commPatternOverset(level, sps)
-    internal => internalOverset(level, sps)
-
-    ii = commPattern%nProcSend
-    ii = commPattern%nsendCum(ii)
-    jj = commPattern%nProcRecv
-    jj = commPattern%nrecvCum(jj)
-    nVar = 8
-    allocate(sendBufInt(ii*nVar), recvBufInt(jj*nVar), stat=ierr)
-
-    ! Send the variables. The data is first copied into
-    ! the send buffer after which the buffer is sent asap.
-
-    ii = 1
-    sends: do i=1,commPattern%nProcSend
-
-       ! Store the processor id and the size of the message
-       ! a bit easier.
-
-       procID = commPattern%sendProc(i)
-       size    = nVar*commPattern%nsend(i)
-
-       ! Copy the data in the correct part of the send buffer.
-
-       jj = ii
-       do j=1,commPattern%nsend(i)
-
-          ! Store the block id and the indices of the donor
-          ! a bit easier.
-
-          d1 = commPattern%sendList(i)%block(j)
-          i1 = commPattern%sendList(i)%indices(j,1)
-          j1 = commPattern%sendList(i)%indices(j,2)
-          k1 = commPattern%sendList(i)%indices(j,3)
-
-          ! Loop over the 8 donors:
-          do kkk=k1, k1+1
-             do jjj=j1, j1+1
-                do iii=i1, i1+1
-                   sendBufInt(jj) = flowDoms(d1, level, sps)%globalCell(iii,jjj,kkk)
-                   jj =jj + 1
-                end do
-             end do
-          end do
-       enddo
+            procID = commPattern%sendProc(i)
+            size = nVar * commPattern%nsend(i)
 
-       ! Send the data.
+            ! Copy the data in the correct part of the send buffer.
 
-       call mpi_isend(sendBufInt(ii), size, adflow_integer, procId,  &
-            procId, ADflow_comm_world, sendRequests(i), &
-            ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            jj = ii
+            do j = 1, commPattern%nsend(i)
 
-       ! Set ii to jj for the next processor.
+                ! Store the block id and the indices of the donor
+                ! a bit easier.
 
-       ii = jj
+                d1 = commPattern%sendList(i)%block(j)
+                i1 = commPattern%sendList(i)%indices(j, 1)
+                j1 = commPattern%sendList(i)%indices(j, 2)
+                k1 = commPattern%sendList(i)%indices(j, 3)
+
+                ! Loop over the 8 donors:
+                do kkk = k1, k1 + 1
+                    do jjj = j1, j1 + 1
+                        do iii = i1, i1 + 1
+                            sendBufInt(jj) = flowDoms(d1, level, sps)%globalCell(iii, jjj, kkk)
+                            jj = jj + 1
+                        end do
+                    end do
+                end do
+            end do
 
-    enddo sends
+            ! Send the data.
 
-    ! Post the nonblocking receives.
+            call mpi_isend(sendBufInt(ii), size, adflow_integer, procId, &
+                           procId, ADflow_comm_world, sendRequests(i), &
+                           ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    ii = 1
-    receives: do i=1,commPattern%nProcRecv
+            ! Set ii to jj for the next processor.
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+            ii = jj
 
-       procID = commPattern%recvProc(i)
-       size    = nVar*commPattern%nrecv(i)
+        end do sends
 
-       ! Post the receive.
+        ! Post the nonblocking receives.
 
-       call mpi_irecv(recvBufInt(ii), size, adflow_integer, procId, &
-            myId, ADflow_comm_world, recvRequests(i), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+        ii = 1
+        receives: do i = 1, commPattern%nProcRecv
 
-       ! And update ii.
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       ii = ii + size
+            procID = commPattern%recvProc(i)
+            size = nVar * commPattern%nrecv(i)
 
-    enddo receives
+            ! Post the receive.
 
-    ! Do the local interpolation.
-    localInterp: do i=1,internal%ncopy
+            call mpi_irecv(recvBufInt(ii), size, adflow_integer, procId, &
+                           myId, ADflow_comm_world, recvRequests(i), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Store the block and the indices of the donor a bit easier.
+            ! And update ii.
 
-       d1 = internal%donorBlock(i)
-       i1 = internal%donorIndices(i, 1)
-       j1 = internal%donorIndices(i, 2)
-       k1 = internal%donorIndices(i, 3)
+            ii = ii + size
 
-       ! Idem for the halo's.
+        end do receives
 
-       d2 = internal%haloBlock(i)
-       i2 = internal%haloIndices(i, 1)
-       j2 = internal%haloIndices(i, 2)
-       k2 = internal%haloIndices(i, 3)
+        ! Do the local interpolation.
+        localInterp: do i = 1, internal%ncopy
 
-       ! Loop over the 8 donors:
-       ind = 0
-       do kkk=k1, k1+1
-          do jjj=j1, j1+1
-             do iii=i1, i1+1
-                ind = ind + 1
-                flowDoms(d2, level, sps)%gInd(ind, i2, j2, k2) = &
-                     flowDoms(d1, level, sps)%globalCell(iii,jjj,kkk)
-             end do
-          end do
-       end do
-    enddo localInterp
+            ! Store the block and the indices of the donor a bit easier.
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+            d1 = internal%donorBlock(i)
+            i1 = internal%donorIndices(i, 1)
+            j1 = internal%donorIndices(i, 2)
+            k1 = internal%donorIndices(i, 3)
 
-    size = commPattern%nProcRecv
-    completeRecvs: do i=1,commPattern%nProcRecv
+            ! Idem for the halo's.
 
-       ! Complete any of the requests.
+            d2 = internal%haloBlock(i)
+            i2 = internal%haloIndices(i, 1)
+            j2 = internal%haloIndices(i, 2)
+            k2 = internal%haloIndices(i, 3)
 
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Loop over the 8 donors:
+            ind = 0
+            do kkk = k1, k1 + 1
+                do jjj = j1, j1 + 1
+                    do iii = i1, i1 + 1
+                        ind = ind + 1
+                        flowDoms(d2, level, sps)%gInd(ind, i2, j2, k2) = &
+                            flowDoms(d1, level, sps)%globalCell(iii, jjj, kkk)
+                    end do
+                end do
+            end do
+        end do localInterp
 
-       ! Copy the data just arrived in the halo's.
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-       ii = index
-       jj = nVar*commPattern%nrecvCum(ii-1)
-       do j=1,commPattern%nrecv(ii)
+        size = commPattern%nProcRecv
+        completeRecvs: do i = 1, commPattern%nProcRecv
 
-          ! Store the block and the indices of the halo a bit easier.
+            ! Complete any of the requests.
 
-          d2 = commPattern%recvList(ii)%block(j)
-          i2 = commPattern%recvList(ii)%indices(j,1)
-          j2 = commPattern%recvList(ii)%indices(j,2)
-          k2 = commPattern%recvList(ii)%indices(j,3)
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-          ! Just set the 8 values
-          do ind=1,8
-             flowDoms(d2, level, sps)%gInd(ind, i2, j2, k2) = &
-                  recvBufInt(jj+ind)
-          end do
+            ! Copy the data just arrived in the halo's.
 
-          jj = jj + 8
-       enddo
-    end do completeRecvs
+            ii = index
+            jj = nVar * commPattern%nrecvCum(ii - 1)
+            do j = 1, commPattern%nrecv(ii)
 
-    ! Complete the nonblocking sends.
+                ! Store the block and the indices of the halo a bit easier.
 
-    size = commPattern%nProcSend
-    do i=1,commPattern%nProcSend
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    enddo
-    deallocate(sendBufInt, recvBufInt)
+                d2 = commPattern%recvList(ii)%block(j)
+                i2 = commPattern%recvList(ii)%indices(j, 1)
+                j2 = commPattern%recvList(ii)%indices(j, 2)
+                k2 = commPattern%recvList(ii)%indices(j, 3)
 
-  end subroutine setupFringeGlobalInd
+                ! Just set the 8 values
+                do ind = 1, 8
+                    flowDoms(d2, level, sps)%gInd(ind, i2, j2, k2) = &
+                        recvBufInt(jj + ind)
+                end do
 
-  subroutine exchangeSurfaceDelta(zipperFamList, level, sps, commPattern, internal)
-    !
-    !       ExchangeSurfaceDelta exchanges surface delta to fill up halo
-    !       surface cells from adjacent blocks.
-    !
-    use constants
-    use blockPointers, onlY : nDom, flowDoms, nBocos, BCType, BCFaceID, BCData, &
-         ib, il, jb, jl, kb, kl
-    use communication, only : commType, internalCOmmType, commPatternCell_1st, &
-         internalCell_1st
-    use utils, only : setPointers
-    use haloExchange, only : whalo1to1RealGeneric
-    use sorting, only : famInList
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in), dimension(:) :: zipperFamList
-    integer(kind=intType), intent(in) :: level, sps
-
-    type(commType),          dimension(*), intent(in) :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-
-    ! Local
-    integer(kind=intType) :: i, j, k, ii, nn, mm
-    real(kind=realType), dimension(:), allocatable :: pSave
-    real(kind=realType), dimension(:, :), pointer :: deltaPtr
-
-    ! Just cheat by exchangint pressure. saving Pressure, dumping deltaPtr into the pressure,
-    ! exchanging that and then restoring the pressure
-
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       ! Allocate pointer space for the integer flag communication
-       allocate(flowDoms(nn, level, sps)%realCommVars(1)%var(1:ib+1, 1:jb+1, 1:kb+1))
-
-       ! Push the surface iblank back to the generic volume variable rVar1
-       bocoLoop: do mm=1, nBocos
-          famInclude: if (famInList(BCData(mm)%famID, zipperFamList)) then
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(2+1, :, :)
-             case (iMax)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(il+1, :, :)
-             case (jMin)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, 2+1,  :)
-             case (jMax)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, jl+1, :)
-             case (kMin)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, :, 2+1 )
-             case (kMax)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, :, kl+1)
-             end select
-
-             ! NO HALOS!
-             do j=BCData(mm)%jnBeg+1, BCData(mm)%jnEnd
-                do i=BCData(mm)%inBeg+1, BCData(mm)%inEnd
-
-                   ! Remember to account for the pointer offset since
-                   ! the iblank starts at zero
-                   deltaPtr(i+1, j+1) = BCData(mm)%delta(i, j)
+                jj = jj + 8
+            end do
+        end do completeRecvs
+
+        ! Complete the nonblocking sends.
+
+        size = commPattern%nProcSend
+        do i = 1, commPattern%nProcSend
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
+        deallocate (sendBufInt, recvBufInt)
+
+    end subroutine setupFringeGlobalInd
+
+    subroutine exchangeSurfaceDelta(zipperFamList, level, sps, commPattern, internal)
+        !
+        !       ExchangeSurfaceDelta exchanges surface delta to fill up halo
+        !       surface cells from adjacent blocks.
+        !
+        use constants
+        use blockPointers, onlY: nDom, flowDoms, nBocos, BCType, BCFaceID, BCData, &
+                                 ib, il, jb, jl, kb, kl
+        use communication, only: commType, internalCOmmType, commPatternCell_1st, &
+                                 internalCell_1st
+        use utils, only: setPointers
+        use haloExchange, only: whalo1to1RealGeneric
+        use sorting, only: famInList
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in), dimension(:) :: zipperFamList
+        integer(kind=intType), intent(in) :: level, sps
+
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+
+        ! Local
+        integer(kind=intType) :: i, j, k, ii, nn, mm
+        real(kind=realType), dimension(:), allocatable :: pSave
+        real(kind=realType), dimension(:, :), pointer :: deltaPtr
+
+        ! Just cheat by exchangint pressure. saving Pressure, dumping deltaPtr into the pressure,
+        ! exchanging that and then restoring the pressure
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            ! Allocate pointer space for the integer flag communication
+            allocate (flowDoms(nn, level, sps)%realCommVars(1)%var(1:ib + 1, 1:jb + 1, 1:kb + 1))
+
+            ! Push the surface iblank back to the generic volume variable rVar1
+            bocoLoop: do mm = 1, nBocos
+                famInclude: if (famInList(BCData(mm)%famID, zipperFamList)) then
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(2 + 1, :, :)
+                    case (iMax)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(il + 1, :, :)
+                    case (jMin)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, 2 + 1, :)
+                    case (jMax)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, jl + 1, :)
+                    case (kMin)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, :, 2 + 1)
+                    case (kMax)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, :, kl + 1)
+                    end select
+
+                    ! NO HALOS!
+                    do j = BCData(mm)%jnBeg + 1, BCData(mm)%jnEnd
+                        do i = BCData(mm)%inBeg + 1, BCData(mm)%inEnd
+
+                            ! Remember to account for the pointer offset since
+                            ! the iblank starts at zero
+                            deltaPtr(i + 1, j + 1) = BCData(mm)%delta(i, j)
+                        end do
+                    end do
+                end if famInclude
+            end do bocoLoop
+        end do
+
+        ! Exchange the variane
+        call whalo1to1RealGeneric(1, level, sps, commPatternCell_1st, internalCell_1st)
+
+        ! Copy back out
+        ii = 0
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            ! Extract the surface iblank from the volume.
+            bocoLoop2: do mm = 1, nBocos
+                famInclude2: if (famInList(BCData(mm)%famID, zipperFamList)) then
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(2 + 1, :, :)
+                    case (iMax)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(il + 1, :, :)
+                    case (jMin)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, 2 + 1, :)
+                    case (jMax)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, jl + 1, :)
+                    case (kMin)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, :, 2 + 1)
+                    case (kMax)
+                        deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, :, jl + 1)
+                    end select
+
+                    ! INCLUDE THE HALOS!
+                    do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                        do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+
+                            ! Remember to account for the pointer offset since
+                            ! the iblank starts at zero
+                            BCData(mm)%delta(i, j) = deltaPtr(i + 1, j + 1)
+                        end do
+                    end do
+                end if famInclude2
+            end do bocoLoop2
+
+            ! Now deallocate this pointer
+            deallocate (flowDoms(nn, level, sps)%realCommVars(1)%var)
+        end do
+    end subroutine exchangeSurfaceDelta
+
+    subroutine exchangeSurfaceIblanks(zipperFamList, level, sps, commPattern, internal)
+        !
+        !       ExchangeIblank exchanges the 1 to 1 internal halo's for the
+        !       given level and sps instance.
+        !
+        use constants
+        use blockPointers, only: nDom, ib, jb, kb, iBlank, BCData, &
+                                 il, jl, kl, BCFaceID, nBocos, flowDoms, BCType
+        use communication, only: commType, internalCommType
+        use utils, only: setPointers
+        use haloExchange, only: whalo1to1intgeneric
+        use sorting, only: famInList
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+        integer(kind=intType), intent(in), dimension(:) :: zipperFamList
+
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+
+        ! Local
+        integer(kind=intType) :: i, j, k, ii, nn, mm
+        integer(kind=intType), dimension(:), allocatable :: iBlankSave
+        integer(kind=intType), dimension(:, :), pointer :: ibp
+
+        ! Just cheat by saving iBlank iblank array, resusing itand
+        ii = 0
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            ii = ii + (ib + 1) * (jb + 1) * (kb + 1)
+        end do
+
+        allocate (iBlankSave(ii))
+        ii = 0
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            do k = 0, kb
+                do j = 0, jb
+                    do i = 0, ib
+                        ii = ii + 1
+                        iBlankSave(ii) = iblank(i, j, k)
+                        ! The following algorithm uses the volume iblank array to communicate surface blocking
+                        ! across block boundaries. However, for h-topology meshes, for example at a sharp
+                        ! trailing edge this breaks, because the surface on the top an bottom of the shape are
+                        ! not topologically adjacent in the block structure. If we leave the existing volume
+                        ! blanking in place the correct values are communicated, if we zero it as done originally
+                        ! the connection is broken. Therefore the following line is commented out.
+                        !iblank(i,j,k) = 0 !commented out to fix issue with h-topology blocks on the zipper
+
+                    end do
+                end do
+            end do
+
+            ! Push the surface iblank back to the volume:
+            bocoLoop: do mm = 1, nBocos
+                famInclude: if (famInList(BCData(mm)%famID, zipperFamList)) then
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        ibp => iblank(2, :, :)
+                    case (iMax)
+                        ibp => iblank(il, :, :)
+                    case (jMin)
+                        ibp => iblank(:, 2, :)
+                    case (jMax)
+                        ibp => iblank(:, jl, :)
+                    case (kMin)
+                        ibp => iblank(:, :, 2)
+                    case (kMax)
+                        ibp => iblank(:, :, kl)
+                    end select
+
+                    ! NO HALOS!
+                    do j = BCData(mm)%jnBeg + 1, BCData(mm)%jnEnd
+                        do i = BCData(mm)%inBeg + 1, BCData(mm)%inEnd
+
+                            ! Remember to account for the pointer offset since
+                            ! the iblank starts at zero
+                            ibp(i + 1, j + 1) = BCData(mm)%iBlank(i, j)
+                        end do
+                    end do
+                end if famInclude
+            end do bocoLoop
+        end do
+
+        ! Exchange iblanks
+        domainLoop: do nn = 1, nDom
+            flowDoms(nn, level, sps)%intCommVars(1)%var => &
+                flowDoms(nn, level, sps)%iblank(:, :, :)
+        end do domainLoop
+
+        ! Run the generic integer exchange
+        call wHalo1to1IntGeneric(1, level, sps, commPattern, internal)
+
+        ii = 0
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            ! Extract the surface iblank from the volume.
+            bocoLoop2: do mm = 1, nBocos
+                famInclude2: if (famInList(BCData(mm)%famID, zipperFamList)) then
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        ibp => iblank(2, :, :)
+                    case (iMax)
+                        ibp => iblank(il, :, :)
+                    case (jMin)
+                        ibp => iblank(:, 2, :)
+                    case (jMax)
+                        ibp => iblank(:, jl, :)
+                    case (kMin)
+                        ibp => iblank(:, :, 2)
+                    case (kMax)
+                        ibp => iblank(:, :, kl)
+                    end select
+
+                    ! INCLUDE THE HALOS!
+                    do j = BCData(mm)%jnBeg, BCData(mm)%jnEnd + 1
+                        do i = BCData(mm)%inBeg, BCData(mm)%inEnd + 1
+                            ! Remember to account for the pointer offset since
+                            ! the iblank starts at zero
+                            BCData(mm)%iBlank(i, j) = ibp(i + 1, j + 1)
+                        end do
+                    end do
+                end if famInclude2
+            end do bocoLoop2
+
+            ! Restore the saved array
+            do k = 0, kb
+                do j = 0, jb
+                    do i = 0, ib
+                        ii = ii + 1
+                        iBlank(i, j, k) = iBlankSave(ii)
+                    end do
                 end do
-             end do
-          end if famInclude
-       end do bocoLoop
-    end do
-
-    ! Exchange the variane
-    call whalo1to1RealGeneric(1, level, sps, commPatternCell_1st, internalCell_1st)
-
-    ! Copy back out
-    ii = 0
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       ! Extract the surface iblank from the volume.
-       bocoLoop2: do mm=1, nBocos
-          famInclude2: if (famInList(BCData(mm)%famID, zipperFamList)) then
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(2+1, :, :)
-             case (iMax)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(il+1, :, :)
-             case (jMin)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, 2+1,  :)
-             case (jMax)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, jl+1, :)
-             case (kMin)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, :, 2+1 )
-             case (kMax)
-                deltaPtr => flowDoms(nn, level, sps)%realCommVars(1)%var(:, :, jl+1)
-             end select
-
-             ! INCLUDE THE HALOS!
-             do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-
-                   ! Remember to account for the pointer offset since
-                   ! the iblank starts at zero
-                   BCData(mm)%delta(i,j) = deltaPtr(i+1, j+1)
+            end do
+        end do
+        deallocate (iblankSave)
+    end subroutine exchangeSurfaceIblanks
+
+    subroutine emptyOversetComm(level, sps)
+
+        !  Short cut function to make empty overset comm structure for
+        !  problems that do not use overset meshes.
+
+        use constants
+        use communication, only: commPatternOverset, internalOverset
+        implicit none
+
+        ! Function
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Working
+        integer(Kind=intType) :: nn, mm, ierr
+
+        commPatternOverset(level, sps)%nProcRecv = 0
+        allocate (commPatternOverset(level, sps)%recvProc(0))
+        allocate (commPatternOverset(level, sps)%nRecv(0))
+        allocate (commPatternOverset(level, sps)%recvList(0))
+
+        commPatternOverset(level, sps)%nProcSend = 0
+        allocate (commPatternOverset(level, sps)%sendProc(0))
+        allocate (commPatternOverset(level, sps)%nSend(0))
+        allocate (commPatternOverset(level, sps)%sendList(0))
+
+        internalOverset(level, sps)%nCopy = 0
+        allocate (internalOverset(level, sps)%donorBlock(0))
+        allocate (internalOverset(level, sps)%donorIndices(0, 3))
+        allocate (internalOverset(level, sps)%donorInterp(0, 8))
+        allocate (internalOverset(level, sps)%haloBlock(0))
+        allocate (internalOverset(level, sps)%haloIndices(0, 3))
+
+    end subroutine emptyOversetComm
+
+    subroutine updateOversetConnectivity(level, sps)
+
+        ! This subroutine updates the overset connectivity for a perturbed
+        ! mesh. It does *not* completely redo the connectivity. Rather, a
+        ! newton search on the existing donors are performed using the
+        ! updated coordinates. This type of update is only applicable if the
+        ! entire volume mesh is warped as one like with USMesh in IDWarp. This
+        ! actually ends up being a fairly small correction most of the time,
+        ! however, desipite looks to the contrary is actually quite fast to
+        ! run.
+
+        use constants
+        use communication
+        use blockPointers, only: nDom, il, jl, kl, xSeed, flowDoms, x, ib, jb, kb, &
+                                 ie, je, ke, fringes, scratch
+        use haloExchange, only: whalo1to1RealGeneric
+        use oversetUtilities, only: newtonUpdate, fracToWeights
+        use utils, only: setPointers
+
+        implicit none
+
+        ! Input
+        integer(kind=intType), intent(in) :: level, sps
+        type(commType), pointer :: commPattern
+        type(internalCommType), pointer :: internal
+
+        ! Working
+        integer(kind=intType) :: nn, ii, jj, ierr, i, j, k, d1, i1, j1, k1, d2, i2, j2, k2
+        integer(kind=intType) :: size, procID, index, iii, jjj
+        integer, dimension(mpi_status_size) :: mpiStatus
+        real(kind=realType) :: frac(3), frac0(3), xCen(3)
+        integer(kind=intType), dimension(8), parameter :: indices = (/1, 2, 4, 3, 5, 6, 8, 7/)
+
+        ! Set a tolerance for checking whether fractions are between 0 and 1
+        real(kind=realType) :: fracTol = 1e-4
+
+        ! Pointers to the overset comms to make it easier to read
+        commPattern => commPatternOverset(level, sps)
+        internal => internalOverset(level, sps)
+
+        ! Step 1: Since we need to update donors for all cells including the
+        ! donors for double halos, we must know the new cell center
+        ! locations for the all of these receivers. Unfortunately, the
+        ! double halos don't have coordinates, so we must first perform a
+        ! (forward) block-to-block halo exchange to populate the xSeed
+        ! values for all cells, including double halos.
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            if (.not. associated(flowDoms(nn, level, sps)%xSeed)) then
+                allocate (flowDoms(nn, level, sps)%XSeed(0:ib, 0:jb, 0:kb, 3))
+            end if
+            xSeed => flowDoms(nn, level, sps)%xSeed
+            xSeed = zero
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        xSeed(i, j, k, :) = eighth * ( &
+                                            x(i - 1, j - 1, k - 1, :) + &
+                                            x(i, j - 1, k - 1, :) + &
+                                            x(i - 1, j, k - 1, :) + &
+                                            x(i, j, k - 1, :) + &
+                                            x(i - 1, j - 1, k, :) + &
+                                            x(i, j - 1, k, :) + &
+                                            x(i - 1, j, k, :) + &
+                                            x(i, j, k, :)) !+ fringes(i,j,k)%offset
+                    end do
                 end do
-             end do
-          end if famInclude2
-       end do bocoLoop2
+            end do
+        end do
 
-       ! Now deallocate this pointer
-       deallocate(flowDoms(nn, level, sps)%realCommVars(1)%var)
-    end do
-  end subroutine exchangeSurfaceDelta
+        ! Exchange the xSeeds.
+        do nn = 1, nDom
+            flowDoms(nn, level, sps)%realCommVars(1)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 1)
+            flowDoms(nn, level, sps)%realCommVars(2)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 2)
+            flowDoms(nn, level, sps)%realCommVars(3)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 3)
+        end do
 
-  subroutine exchangeSurfaceIblanks(zipperFamList, level, sps, commPattern, internal)
-    !
-    !       ExchangeIblank exchanges the 1 to 1 internal halo's for the
-    !       given level and sps instance.
-    !
-    use constants
-    use blockPointers, only : nDom, ib, jb, kb, iBlank, BCData, &
-         il, jl, kl, BCFaceID, nBocos, flowDoms, BCType
-    use communication, only : commType, internalCommType
-    use utils, only : setPointers
-    use haloExchange, only : whalo1to1intgeneric
-    use sorting, only : famInList
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
-    integer(kind=intType), intent(in), dimension(:) :: zipperFamList
-
-    type(commType),          dimension(*), intent(in) :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-
-    ! Local
-    integer(kind=intType) :: i, j, k, ii, nn, mm
-    integer(kind=intType), dimension(:), allocatable :: iBlankSave
-    integer(kind=intType), dimension(:, :), pointer :: ibp
-
-    ! Just cheat by saving iBlank iblank array, resusing itand
-    ii = 0
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-       ii = ii + (ib+1)*(jb+1)*(kb+1)
-    end do
-
-    allocate(iBlankSave(ii))
-    ii = 0
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-       do k=0,kb
-          do j=0,jb
-             do i=0,ib
-                ii =ii + 1
-                iBlankSave(ii) = iblank(i,j,k)
-                ! The following algorithm uses the volume iblank array to communicate surface blocking
-                ! across block boundaries. However, for h-topology meshes, for example at a sharp
-                ! trailing edge this breaks, because the surface on the top an bottom of the shape are
-                ! not topologically adjacent in the block structure. If we leave the existing volume
-                ! blanking in place the correct values are communicated, if we zero it as done originally
-                ! the connection is broken. Therefore the following line is commented out.
-                !iblank(i,j,k) = 0 !commented out to fix issue with h-topology blocks on the zipper
-                
-             end do
-          end do
-       end do
-
-       ! Push the surface iblank back to the volume:
-       bocoLoop: do mm=1, nBocos
-          famInclude: if (famInList(BCData(mm)%famID, zipperFamList)) then
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                ibp => iblank(2, :, :)
-             case (iMax)
-                ibp => iblank(il, :, :)
-             case (jMin)
-                ibp => iblank(:, 2, :)
-             case (jMax)
-                ibp => iblank(:, jl, :)
-             case (kMin)
-                ibp => iblank(:, :, 2)
-             case (kMax)
-                ibp => iblank(:, :, kl)
-             end select
-
-             ! NO HALOS!
-             do j=BCData(mm)%jnBeg+1, BCData(mm)%jnEnd
-                do i=BCData(mm)%inBeg+1, BCData(mm)%inEnd
-
-                   ! Remember to account for the pointer offset since
-                   ! the iblank starts at zero
-                   ibp(i+1, j+1) = BCData(mm)%iBlank(i,j)
-                end do
-             end do
-          end if famInclude
-       end do bocoLoop
-    end do
-
-    ! Exchange iblanks
-    domainLoop:do nn=1, nDom
-       flowDoms(nn, level, sps)%intCommVars(1)%var => &
-            flowDoms(nn, level, sps)%iblank(:, :, :)
-    end do domainLoop
-
-    ! Run the generic integer exchange
-    call wHalo1to1IntGeneric(1, level, sps, commPattern, internal)
-
-    ii = 0
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       ! Extract the surface iblank from the volume.
-       bocoLoop2: do mm=1, nBocos
-          famInclude2: if (famInList(BCData(mm)%famID, zipperFamList)) then
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                ibp => iblank(2, :, :)
-             case (iMax)
-                ibp => iblank(il, :, :)
-             case (jMin)
-                ibp => iblank(:, 2, :)
-             case (jMax)
-                ibp => iblank(:, jl, :)
-             case (kMin)
-                ibp => iblank(:, :, 2)
-             case (kMax)
-                ibp => iblank(:, :, kl)
-             end select
-
-             ! INCLUDE THE HALOS!
-             do j=BCData(mm)%jnBeg, BCData(mm)%jnEnd+1
-                do i=BCData(mm)%inBeg, BCData(mm)%inEnd+1
-                   ! Remember to account for the pointer offset since
-                   ! the iblank starts at zero
-                   BCData(mm)%iBlank(i,j) = ibp(i+1, j+1)
-                end do
-             end do
-          end if famInclude2
-       end do bocoLoop2
-
-       ! Restore the saved array
-       do k=0,kb
-          do j=0,jb
-             do i=0,ib
-                ii =ii + 1
-                iBlank(i,j,k) = iBlankSave(ii)
-             end do
-          end do
-       end do
-    end do
-    deallocate(iblankSave)
-  end subroutine exchangeSurfaceIblanks
-
-  subroutine emptyOversetComm(level, sps)
-
-    !  Short cut function to make empty overset comm structure for
-    !  problems that do not use overset meshes.
-
-    use constants
-    use communication, only : commPatternOverset, internalOverset
-    implicit none
-
-    ! Function
-    integer(kind=intType), intent(in) :: level, sps
-
-    ! Working
-    integer(Kind=intType) :: nn, mm, ierr
-
-    commPatternOverset(level, sps)%nProcRecv = 0
-    allocate(commPatternOverset(level, sps)%recvProc(0))
-    allocate(commPatternOverset(level, sps)%nRecv(0))
-    allocate(commPatternOverset(level, sps)%recvList(0))
-
-    commPatternOverset(level, sps)%nProcSend = 0
-    allocate(commPatternOverset(level, sps)%sendProc(0))
-    allocate(commPatternOverset(level, sps)%nSend(0))
-    allocate(commPatternOverset(level, sps)%sendList(0))
-
-    internalOverset(level, sps)%nCopy = 0
-    allocate(internalOverset(level, sps)%donorBlock(0))
-    allocate(internalOverset(level, sps)%donorIndices(0, 3))
-    allocate(internalOverset(level, sps)%donorInterp(0, 8))
-    allocate(internalOverset(level, sps)%haloBlock(0))
-    allocate(internalOverset(level, sps)%haloIndices(0, 3))
-
-  end subroutine emptyOversetComm
-
-  subroutine updateOversetConnectivity(level, sps)
-
-    ! This subroutine updates the overset connectivity for a perturbed
-    ! mesh. It does *not* completely redo the connectivity. Rather, a
-    ! newton search on the existing donors are performed using the
-    ! updated coordinates. This type of update is only applicable if the
-    ! entire volume mesh is warped as one like with USMesh in IDWarp. This
-    ! actually ends up being a fairly small correction most of the time,
-    ! however, desipite looks to the contrary is actually quite fast to
-    ! run.
-
-    use constants
-    use communication
-    use blockPointers, only : nDom, il, jl, kl, xSeed, flowDoms, x, ib, jb, kb, &
-         ie, je, ke, fringes,scratch
-    use haloExchange, only : whalo1to1RealGeneric
-    use oversetUtilities, only : newtonUpdate, fracToWeights
-    use utils, only : setPointers
-
-    implicit none
-
-    ! Input
-    integer(kind=intType), intent(in) :: level, sps
-    type(commType), pointer :: commPattern
-    type(internalCommType), pointer :: internal
-
-    ! Working
-    integer(kind=intType) :: nn, ii,jj, ierr,  i, j, k, d1, i1, j1, k1, d2, i2, j2, k2
-    integer(kind=intType) :: size, procID, index, iii,jjj
-    integer, dimension(mpi_status_size) :: mpiStatus
-    real(kind=realType) :: frac(3), frac0(3), xCen(3)
-    integer(kind=intType), dimension(8), parameter :: indices=(/1,2,4,3,5,6,8,7/)
-
-    ! Set a tolerance for checking whether fractions are between 0 and 1
-    real(kind=realType) :: fracTol=1e-4
-
-    ! Pointers to the overset comms to make it easier to read
-    commPattern => commPatternOverset(level, sps)
-    internal => internalOverset(level, sps)
-
-    ! Step 1: Since we need to update donors for all cells including the
-    ! donors for double halos, we must know the new cell center
-    ! locations for the all of these receivers. Unfortunately, the
-    ! double halos don't have coordinates, so we must first perform a
-    ! (forward) block-to-block halo exchange to populate the xSeed
-    ! values for all cells, including double halos.
-
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       if (.not. associated(flowDoms(nn, level, sps)%xSeed)) then
-          allocate(flowDoms(nn, level, sps)%XSeed(0:ib, 0:jb, 0:kb, 3))
-       end if
-       xSeed => flowDoms(nn, level, sps)%xSeed
-       xSeed = zero
-       do k=2,kl
-          do j=2, jl
-             do i=2, il
-                xSeed(i, j, k, :) = eighth*(&
-                     x(i-1, j-1, k-1, :) + &
-                     x(i  , j-1, k-1, :) + &
-                     x(i-1, j  , k-1, :) + &
-                     x(i  , j  , k-1, :) + &
-                     x(i-1, j-1, k  , :) + &
-                     x(i  , j-1, k  , :) + &
-                     x(i-1, j  , k  , :) + &
-                     x(i  , j  , k  , :)) !+ fringes(i,j,k)%offset
-             end do
-          end do
-       end do
-    end do
-
-    ! Exchange the xSeeds.
-    do nn=1, nDom
-       flowDoms(nn, level, sps)%realCommVars(1)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 1)
-       flowDoms(nn, level, sps)%realCommVars(2)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 2)
-       flowDoms(nn, level, sps)%realCommVars(3)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 3)
-    end do
-
-    ! Run the (foward) generic halo exchange.
-    call wHalo1to1RealGeneric(3, level, sps, commPatternCell_2nd, internalCell_2nd)
-
-    ! Step 2: Next we need to communicate the xSeeds to their donor
-    ! procs. This means running the overset exchange in REVERSE (ie from
-    ! receiver to donor).  Most of this code will look like
-    ! wOverset_b. We will runt he newtonUpdate code (below) on the fly
-    ! as we receive the data, which should hide some of the comm time.
-
-    ! Gather up the seeds into the *recv* buffer. Note we loop over
-    ! nProcRECV here! After the buffer is assembled it is send off.
-
-    jj = 1
-    ii = 1
-    recvs: do i=1,commPattern%nProcRecv
+        ! Run the (foward) generic halo exchange.
+        call wHalo1to1RealGeneric(3, level, sps, commPatternCell_2nd, internalCell_2nd)
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+        ! Step 2: Next we need to communicate the xSeeds to their donor
+        ! procs. This means running the overset exchange in REVERSE (ie from
+        ! receiver to donor).  Most of this code will look like
+        ! wOverset_b. We will runt he newtonUpdate code (below) on the fly
+        ! as we receive the data, which should hide some of the comm time.
 
-       procID = commPattern%recvProc(i)
-       size    = 3*commPattern%nrecv(i)
+        ! Gather up the seeds into the *recv* buffer. Note we loop over
+        ! nProcRECV here! After the buffer is assembled it is send off.
 
-       ! Copy the data into the buffer
+        jj = 1
+        ii = 1
+        recvs: do i = 1, commPattern%nProcRecv
 
-       do j=1,commPattern%nrecv(i)
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-          ! Store the block and the indices to make code a bit easier to read
+            procID = commPattern%recvProc(i)
+            size = 3 * commPattern%nrecv(i)
 
-          d2 = commPattern%recvList(i)%block(j)
-          i2 = commPattern%recvList(i)%indices(j,1)
-          j2 = commPattern%recvList(i)%indices(j,2)
-          k2 = commPattern%recvList(i)%indices(j,3)
+            ! Copy the data into the buffer
 
-          ! Copy the xSeed
-          recvBuffer(jj)   = flowDoms(d2,level,sps)%xSeed(i2,j2,k2,1)
-          recvBuffer(jj+1) = flowDoms(d2,level,sps)%xSeed(i2,j2,k2,2)
-          recvBuffer(jj+2) = flowDoms(d2,level,sps)%xSeed(i2,j2,k2,3)
-          jj = jj + 3
-       end do
+            do j = 1, commPattern%nrecv(i)
 
-       ! Send the data.
-       call mpi_isend(recvBuffer(ii), size, adflow_real, procID,  &
-            procID, ADflow_comm_world, recvRequests(i), &
-            ierr)
+                ! Store the block and the indices to make code a bit easier to read
 
-       ! Set ii to jj for the next processor.
+                d2 = commPattern%recvList(i)%block(j)
+                i2 = commPattern%recvList(i)%indices(j, 1)
+                j2 = commPattern%recvList(i)%indices(j, 2)
+                k2 = commPattern%recvList(i)%indices(j, 3)
 
-       ii = jj
+                ! Copy the xSeed
+                recvBuffer(jj) = flowDoms(d2, level, sps)%xSeed(i2, j2, k2, 1)
+                recvBuffer(jj + 1) = flowDoms(d2, level, sps)%xSeed(i2, j2, k2, 2)
+                recvBuffer(jj + 2) = flowDoms(d2, level, sps)%xSeed(i2, j2, k2, 3)
+                jj = jj + 3
+            end do
 
-    end do recvs
+            ! Send the data.
+            call mpi_isend(recvBuffer(ii), size, adflow_real, procID, &
+                           procID, ADflow_comm_world, recvRequests(i), &
+                           ierr)
 
-    ! Post the nonblocking receives.
+            ! Set ii to jj for the next processor.
 
-    ii = 1
-    sends: do i=1,commPattern%nProcSend
+            ii = jj
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+        end do recvs
 
-       procID = commPattern%sendProc(i)
-       size    = 3*commPattern%nsend(i)
+        ! Post the nonblocking receives.
 
-       ! Post the receive.
+        ii = 1
+        sends: do i = 1, commPattern%nProcSend
 
-       call mpi_irecv(sendBuffer(ii), size, adflow_real, procId, &
-            myId, ADflow_comm_world, sendRequests(i), ierr)
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       ! And update ii.
+            procID = commPattern%sendProc(i)
+            size = 3 * commPattern%nsend(i)
 
-       ii = ii + size
+            ! Post the receive.
 
-    enddo sends
+            call mpi_irecv(sendBuffer(ii), size, adflow_real, procId, &
+                           myId, ADflow_comm_world, sendRequests(i), ierr)
 
-    ! Do the local interpolation.
+            ! And update ii.
 
-    localInterp: do i=1,internal%ncopy
+            ii = ii + size
 
-       ! Store the block and the indices of the donor a bit easier.
+        end do sends
 
-       d1 = internal%donorBlock(i)
-       i1 = internal%donorIndices(i, 1)
-       j1 = internal%donorIndices(i, 2)
-       k1 = internal%donorIndices(i, 3)
+        ! Do the local interpolation.
 
-       ! Idem for the halo's.
+        localInterp: do i = 1, internal%ncopy
 
-       d2 = internal%haloBlock(i)
-       i2 = internal%haloIndices(i, 1)
-       j2 = internal%haloIndices(i, 2)
-       k2 = internal%haloIndices(i, 3)
+            ! Store the block and the indices of the donor a bit easier.
 
-       ! xCen is the '2'. This was the receiver, but since this is
-       ! reverse, it's now the "input"
-       xCen = flowDoms(d2, level, sps)%xSeed(i2, j2, k2, :)
+            d1 = internal%donorBlock(i)
+            i1 = internal%donorIndices(i, 1)
+            j1 = internal%donorIndices(i, 2)
+            k1 = internal%donorIndices(i, 3)
 
-       ! Store in the comm structure. We need it for the derivatives.
-       internal%xCen(i, :) = xCen
+            ! Idem for the halo's.
 
-       ! Do newton update
-       frac0 = (/half, half, half/)
-       call newtonUpdate(xCen, &
-            flowDoms(d1, level, sps)%x(i1-1:i1+1, j1-1:j1+1, k1-1:k1+1, :), frac0, frac)
+            d2 = internal%haloBlock(i)
+            i2 = internal%haloIndices(i, 1)
+            j2 = internal%haloIndices(i, 2)
+            k2 = internal%haloIndices(i, 3)
 
-       ! Check if the fractions are between 0 and 1
-       if (MAXVAL(frac) > one + fracTol .or. MINVAL(frac) < zero - fracTol) then
-          print *, "Invalid overset connectivity update. Use 'frozen' or 'full' oversetUpdateMode instead."
-          error stop
-       end if
+            ! xCen is the '2'. This was the receiver, but since this is
+            ! reverse, it's now the "input"
+            xCen = flowDoms(d2, level, sps)%xSeed(i2, j2, k2, :)
 
-       ! Set the new weights
-       call fracToWeights(frac, internal%donorInterp(i, :))
-    enddo localInterp
+            ! Store in the comm structure. We need it for the derivatives.
+            internal%xCen(i, :) = xCen
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+            ! Do newton update
+            frac0 = (/half, half, half/)
+            call newtonUpdate(xCen, &
+                              flowDoms(d1, level, sps)%x(i1 - 1:i1 + 1, j1 - 1:j1 + 1, k1 - 1:k1 + 1, :), frac0, frac)
 
-    size = commPattern%nProcSend
-    completeSends: do i=1,commPattern%nProcSend
+            ! Check if the fractions are between 0 and 1
+            if (MAXVAL(frac) > one + fracTol .or. MINVAL(frac) < zero - fracTol) then
+                print *, "Invalid overset connectivity update. Use 'frozen' or 'full' oversetUpdateMode instead."
+                error stop
+            end if
 
-       ! Complete any of the requests.
+            ! Set the new weights
+            call fracToWeights(frac, internal%donorInterp(i, :))
+        end do localInterp
 
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
+        size = commPattern%nProcSend
+        completeSends: do i = 1, commPattern%nProcSend
 
-       ii = index
+            ! Complete any of the requests.
 
-       jj = 3*commPattern%nsendCum(ii-1)
-       do j=1,commPattern%nsend(ii)
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
 
-          ! Store the block and the indices of the halo a bit easier.
+            ii = index
 
-          d2 = commPattern%sendList(ii)%block(j)
-          i2 = commPattern%sendList(ii)%indices(j,1)
-          j2 = commPattern%sendList(ii)%indices(j,2)
-          k2 = commPattern%sendList(ii)%indices(j,3)
+            jj = 3 * commPattern%nsendCum(ii - 1)
+            do j = 1, commPattern%nsend(ii)
 
-          xCen = sendBuffer(jj+1:jj+3)
-          jj = jj + 3
+                ! Store the block and the indices of the halo a bit easier.
 
-          ! Store in the comm structure. We need it for derivatives.
-          commPattern%sendList(ii)%xCen(j, :) = xCen
+                d2 = commPattern%sendList(ii)%block(j)
+                i2 = commPattern%sendList(ii)%indices(j, 1)
+                j2 = commPattern%sendList(ii)%indices(j, 2)
+                k2 = commPattern%sendList(ii)%indices(j, 3)
 
-          ! Compute new fraction
-          frac0 = (/half, half, half/)
-          call newtonUpdate(xCen, &
-               flowDoms(d2, level, sps)%x(i2-1:i2+1, j2-1:j2+1, k2-1:k2+1, :), frac0, frac)
+                xCen = sendBuffer(jj + 1:jj + 3)
+                jj = jj + 3
 
-          ! Check if the fractions are between zero and one
-          if (MAXVAL(frac) > one + fracTol .or. MINVAL(frac) < zero - fracTol) then
-             print *, "Invalid overset connectivity update. Use 'frozen' or 'full' oversetUpdateMode instead."
-             error stop
-          end if
+                ! Store in the comm structure. We need it for derivatives.
+                commPattern%sendList(ii)%xCen(j, :) = xCen
 
-          ! Set the new weights
-          call fracToWeights(frac, commPattern%sendList(ii)%interp(j, :))
-       enddo
+                ! Compute new fraction
+                frac0 = (/half, half, half/)
+                call newtonUpdate(xCen, &
+                                flowDoms(d2, level, sps)%x(i2 - 1:i2 + 1, j2 - 1:j2 + 1, k2 - 1:k2 + 1, :), frac0, frac)
+
+                ! Check if the fractions are between zero and one
+                if (MAXVAL(frac) > one + fracTol .or. MINVAL(frac) < zero - fracTol) then
+                    print *, "Invalid overset connectivity update. Use 'frozen' or 'full' oversetUpdateMode instead."
+                    error stop
+                end if
 
-    enddo completeSends
+                ! Set the new weights
+                call fracToWeights(frac, commPattern%sendList(ii)%interp(j, :))
+            end do
 
-    ! Complete the nonblocking sends.
+        end do completeSends
 
-    size = commPattern%nProcRecv
-    do i=1,commPattern%nProcRecv
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-    enddo
+        ! Complete the nonblocking sends.
 
-  end subroutine updateOversetConnectivity
+        size = commPattern%nProcRecv
+        do i = 1, commPattern%nProcRecv
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+        end do
+
+    end subroutine updateOversetConnectivity
 #ifndef USE_COMPLEX
-  subroutine updateOversetConnectivity_d(level, sps)
-
-    ! Forward mode linearization of updateOversetConnectivity
-
-    use constants
-    use communication
-    use blockPointers, only : nDom, il, jl, kl, xSeed, flowDoms, x, ib, jb, kb, &
-         ie, je, ke, fringes, scratch, flowDomsd, xd
-    use haloExchange, only : whalo1to1RealGeneric
-    use oversetUtilities_d, only : newtonUpdate_d, fracToWeights_d
-    use utils, only : setPointers_d
-
-    implicit none
-
-    ! Input
-    integer(kind=intType), intent(in) :: level, sps
-    type(commType), pointer :: commPattern
-    type(internalCommType), pointer :: internal
-
-    ! Working
-    integer(kind=intType) :: nn, ii,jj, ierr,  i, j, k, d1, i1, j1, k1, d2, i2, j2, k2
-    integer(kind=intType) :: size, procID, index, iii,jjj
-    integer, dimension(mpi_status_size) :: mpiStatus
-    real(kind=realType) :: frac(3), fracd(3), frac0(3), xCen(3), xCend(3), weight(8)
-    integer(kind=intType), dimension(8), parameter :: indices=(/1,2,4,3,5,6,8,7/)
-
-    ! Pointers to the overset comms to make it easier to read
-    commPattern => commPatternOverset(level, sps)
-    internal => internalOverset(level, sps)
-
-    ! Step 1: Since we need to update donors for all cells including the
-    ! donors for double halos, we must know the new cell center
-    ! locations for the all of these receivers. Unfortunately, the
-    ! double halos don't have coordinates, so we must first perform a
-    ! (forward) block-to-block halo exchange to populate the xSeed
-    ! values for all cells, including double halos.
-
-    do nn=1, nDom
-       call setPointers_d(nn, level, sps)
-
-       if (.not. associated(flowDoms(nn, level, sps)%xSeed)) then
-          allocate(flowDoms(nn, level, sps)%XSeed(0:ib, 0:jb, 0:kb, 3))
-       end if
-       xSeed => flowDoms(nn, level, sps)%xSeed
-       xSeed = zero
-       do k=2,kl
-          do j=2, jl
-             do i=2, il
-                xSeed(i, j, k, :) = eighth*(&
-                     x(i-1, j-1, k-1, :) + &
-                     x(i  , j-1, k-1, :) + &
-                     x(i-1, j  , k-1, :) + &
-                     x(i  , j  , k-1, :) + &
-                     x(i-1, j-1, k  , :) + &
-                     x(i  , j-1, k  , :) + &
-                     x(i-1, j  , k  , :) + &
-                     x(i  , j  , k  , :)) !+ fringes(i,j,k)%offset
-
-                ! Offset is not active so the xSeed_d just has the x
-                ! part. Just dump the values into scratch so we don't
-                ! acllocate any additional memory.
-                scratch(i, j, k, 1:3) = eighth*(&
-                     xd(i-1, j-1, k-1, :) + &
-                     xd(i  , j-1, k-1, :) + &
-                     xd(i-1, j  , k-1, :) + &
-                     xd(i  , j  , k-1, :) + &
-                     xd(i-1, j-1, k  , :) + &
-                     xd(i  , j-1, k  , :) + &
-                     xd(i-1, j  , k  , :) + &
-                     xd(i  , j  , k  , :))
-             end do
-          end do
-       end do
-    end do
-
-    ! Exchange the xSeeds.
-    do nn=1, nDom
-       flowDoms(nn, level, sps)%realCommVars(1)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 1)
-       flowDoms(nn, level, sps)%realCommVars(2)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 2)
-       flowDoms(nn, level, sps)%realCommVars(3)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 3)
-       flowDoms(nn, level, sps)%realCommVars(4)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 1)
-       flowDoms(nn, level, sps)%realCommVars(5)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 2)
-       flowDoms(nn, level, sps)%realCommVars(6)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 3)
-    end do
-
-    ! Run the (foward) generic halo exchange.
-    call wHalo1to1RealGeneric(6, level, sps, commPatternCell_2nd, internalCell_2nd)
-
-    ! Step 2: Next we need to communicate the xSeeds to their donor
-    ! procs. This means running the overset exchange in REVERSE (ie from
-    ! receiver to donor).  Most of this code will look like
-    ! wOverset_b. We will runt he newtonUpdate code (below) on the fly
-    ! as we receive the data, which should hide some of the comm time.
-
-    ! Gather up the seeds into the *recv* buffer. Note we loop over
-    ! nProcRECV here! After the buffer is assembled it is send off.
-
-    jj = 1
-    ii = 1
-    recvs: do i=1,commPattern%nProcRecv
-
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+    subroutine updateOversetConnectivity_d(level, sps)
+
+        ! Forward mode linearization of updateOversetConnectivity
+
+        use constants
+        use communication
+        use blockPointers, only: nDom, il, jl, kl, xSeed, flowDoms, x, ib, jb, kb, &
+                                 ie, je, ke, fringes, scratch, flowDomsd, xd
+        use haloExchange, only: whalo1to1RealGeneric
+        use oversetUtilities_d, only: newtonUpdate_d, fracToWeights_d
+        use utils, only: setPointers_d
+
+        implicit none
+
+        ! Input
+        integer(kind=intType), intent(in) :: level, sps
+        type(commType), pointer :: commPattern
+        type(internalCommType), pointer :: internal
+
+        ! Working
+        integer(kind=intType) :: nn, ii, jj, ierr, i, j, k, d1, i1, j1, k1, d2, i2, j2, k2
+        integer(kind=intType) :: size, procID, index, iii, jjj
+        integer, dimension(mpi_status_size) :: mpiStatus
+        real(kind=realType) :: frac(3), fracd(3), frac0(3), xCen(3), xCend(3), weight(8)
+        integer(kind=intType), dimension(8), parameter :: indices = (/1, 2, 4, 3, 5, 6, 8, 7/)
+
+        ! Pointers to the overset comms to make it easier to read
+        commPattern => commPatternOverset(level, sps)
+        internal => internalOverset(level, sps)
+
+        ! Step 1: Since we need to update donors for all cells including the
+        ! donors for double halos, we must know the new cell center
+        ! locations for the all of these receivers. Unfortunately, the
+        ! double halos don't have coordinates, so we must first perform a
+        ! (forward) block-to-block halo exchange to populate the xSeed
+        ! values for all cells, including double halos.
+
+        do nn = 1, nDom
+            call setPointers_d(nn, level, sps)
+
+            if (.not. associated(flowDoms(nn, level, sps)%xSeed)) then
+                allocate (flowDoms(nn, level, sps)%XSeed(0:ib, 0:jb, 0:kb, 3))
+            end if
+            xSeed => flowDoms(nn, level, sps)%xSeed
+            xSeed = zero
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        xSeed(i, j, k, :) = eighth * ( &
+                                            x(i - 1, j - 1, k - 1, :) + &
+                                            x(i, j - 1, k - 1, :) + &
+                                            x(i - 1, j, k - 1, :) + &
+                                            x(i, j, k - 1, :) + &
+                                            x(i - 1, j - 1, k, :) + &
+                                            x(i, j - 1, k, :) + &
+                                            x(i - 1, j, k, :) + &
+                                            x(i, j, k, :)) !+ fringes(i,j,k)%offset
+
+                        ! Offset is not active so the xSeed_d just has the x
+                        ! part. Just dump the values into scratch so we don't
+                        ! acllocate any additional memory.
+                        scratch(i, j, k, 1:3) = eighth * ( &
+                                                xd(i - 1, j - 1, k - 1, :) + &
+                                                xd(i, j - 1, k - 1, :) + &
+                                                xd(i - 1, j, k - 1, :) + &
+                                                xd(i, j, k - 1, :) + &
+                                                xd(i - 1, j - 1, k, :) + &
+                                                xd(i, j - 1, k, :) + &
+                                                xd(i - 1, j, k, :) + &
+                                                xd(i, j, k, :))
+                    end do
+                end do
+            end do
+        end do
+
+        ! Exchange the xSeeds.
+        do nn = 1, nDom
+            flowDoms(nn, level, sps)%realCommVars(1)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 1)
+            flowDoms(nn, level, sps)%realCommVars(2)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 2)
+            flowDoms(nn, level, sps)%realCommVars(3)%var => flowDoms(nn, level, sps)%xSeed(:, :, :, 3)
+            flowDoms(nn, level, sps)%realCommVars(4)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 1)
+            flowDoms(nn, level, sps)%realCommVars(5)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 2)
+            flowDoms(nn, level, sps)%realCommVars(6)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 3)
+        end do
+
+        ! Run the (foward) generic halo exchange.
+        call wHalo1to1RealGeneric(6, level, sps, commPatternCell_2nd, internalCell_2nd)
+
+        ! Step 2: Next we need to communicate the xSeeds to their donor
+        ! procs. This means running the overset exchange in REVERSE (ie from
+        ! receiver to donor).  Most of this code will look like
+        ! wOverset_b. We will runt he newtonUpdate code (below) on the fly
+        ! as we receive the data, which should hide some of the comm time.
+
+        ! Gather up the seeds into the *recv* buffer. Note we loop over
+        ! nProcRECV here! After the buffer is assembled it is send off.
+
+        jj = 1
+        ii = 1
+        recvs: do i = 1, commPattern%nProcRecv
 
-       procID = commPattern%recvProc(i)
-       size    = 6*commPattern%nrecv(i)
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       ! Copy the data into the buffer
+            procID = commPattern%recvProc(i)
+            size = 6 * commPattern%nrecv(i)
 
-       do j=1,commPattern%nrecv(i)
+            ! Copy the data into the buffer
 
-          ! Store the block and the indices to make code a bit easier to read
+            do j = 1, commPattern%nrecv(i)
 
-          d2 = commPattern%recvList(i)%block(j)
-          i2 = commPattern%recvList(i)%indices(j,1)
-          j2 = commPattern%recvList(i)%indices(j,2)
-          k2 = commPattern%recvList(i)%indices(j,3)
+                ! Store the block and the indices to make code a bit easier to read
 
-          ! Copy the xSeed and it's derivative
-          recvBuffer(jj)   = flowDoms(d2,level,sps)%xSeed(i2,j2,k2,1)
-          recvBuffer(jj+1) = flowDoms(d2,level,sps)%xSeed(i2,j2,k2,2)
-          recvBuffer(jj+2) = flowDoms(d2,level,sps)%xSeed(i2,j2,k2,3)
-          recvBuffer(jj+3) = flowDoms(d2,level,sps)%scratch(i2,j2,k2,1)
-          recvBuffer(jj+4) = flowDoms(d2,level,sps)%scratch(i2,j2,k2,2)
-          recvBuffer(jj+5) = flowDoms(d2,level,sps)%scratch(i2,j2,k2,3)
+                d2 = commPattern%recvList(i)%block(j)
+                i2 = commPattern%recvList(i)%indices(j, 1)
+                j2 = commPattern%recvList(i)%indices(j, 2)
+                k2 = commPattern%recvList(i)%indices(j, 3)
 
-          jj = jj + 6
-       end do
+                ! Copy the xSeed and it's derivative
+                recvBuffer(jj) = flowDoms(d2, level, sps)%xSeed(i2, j2, k2, 1)
+                recvBuffer(jj + 1) = flowDoms(d2, level, sps)%xSeed(i2, j2, k2, 2)
+                recvBuffer(jj + 2) = flowDoms(d2, level, sps)%xSeed(i2, j2, k2, 3)
+                recvBuffer(jj + 3) = flowDoms(d2, level, sps)%scratch(i2, j2, k2, 1)
+                recvBuffer(jj + 4) = flowDoms(d2, level, sps)%scratch(i2, j2, k2, 2)
+                recvBuffer(jj + 5) = flowDoms(d2, level, sps)%scratch(i2, j2, k2, 3)
 
-       ! Send the data.
-       call mpi_isend(recvBuffer(ii), size, adflow_real, procID,  &
-            procID, ADflow_comm_world, recvRequests(i), &
-            ierr)
+                jj = jj + 6
+            end do
 
-       ! Set ii to jj for the next processor.
+            ! Send the data.
+            call mpi_isend(recvBuffer(ii), size, adflow_real, procID, &
+                           procID, ADflow_comm_world, recvRequests(i), &
+                           ierr)
 
-       ii = jj
+            ! Set ii to jj for the next processor.
 
-    end do recvs
+            ii = jj
 
-    ! Post the nonblocking receives.
+        end do recvs
 
-    ii = 1
-    sends: do i=1,commPattern%nProcSend
+        ! Post the nonblocking receives.
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+        ii = 1
+        sends: do i = 1, commPattern%nProcSend
 
-       procID = commPattern%sendProc(i)
-       size    = 6*commPattern%nsend(i)
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       ! Post the receive.
+            procID = commPattern%sendProc(i)
+            size = 6 * commPattern%nsend(i)
 
-       call mpi_irecv(sendBuffer(ii), size, adflow_real, procId, &
-            myId, ADflow_comm_world, sendRequests(i), ierr)
+            ! Post the receive.
 
-       ! And update ii.
+            call mpi_irecv(sendBuffer(ii), size, adflow_real, procId, &
+                           myId, ADflow_comm_world, sendRequests(i), ierr)
 
-       ii = ii + size
+            ! And update ii.
 
-    enddo sends
+            ii = ii + size
 
-    ! Do the local interpolation.
+        end do sends
 
-    localInterp: do i=1,internal%ncopy
+        ! Do the local interpolation.
 
-       ! Store the block and the indices of the donor a bit easier.
+        localInterp: do i = 1, internal%ncopy
 
-       d1 = internal%donorBlock(i)
-       i1 = internal%donorIndices(i, 1)
-       j1 = internal%donorIndices(i, 2)
-       k1 = internal%donorIndices(i, 3)
+            ! Store the block and the indices of the donor a bit easier.
 
-       ! Idem for the halo's.
+            d1 = internal%donorBlock(i)
+            i1 = internal%donorIndices(i, 1)
+            j1 = internal%donorIndices(i, 2)
+            k1 = internal%donorIndices(i, 3)
 
-       d2 = internal%haloBlock(i)
-       i2 = internal%haloIndices(i, 1)
-       j2 = internal%haloIndices(i, 2)
-       k2 = internal%haloIndices(i, 3)
+            ! Idem for the halo's.
 
-       xCen = flowDoms(d2, level, sps)%xSeed(i2, j2, k2, :)
-       xCend = flowDoms(d2, level, sps)%scratch(i2, j2, k2, 1:3)
-       frac0 = (/half, half, half/)
-       call newtonUpdate_d(xCen, xCend, &
-            flowDoms(d1, level, sps)%x(i1-1:i1+1, j1-1:j1+1, k1-1:k1+1, :), &
-            flowDomsd(d1, level, sps)%x(i1-1:i1+1, j1-1:j1+1, k1-1:k1+1, :), &
-            frac0, frac, fracd)
+            d2 = internal%haloBlock(i)
+            i2 = internal%haloIndices(i, 1)
+            j2 = internal%haloIndices(i, 2)
+            k2 = internal%haloIndices(i, 3)
 
-       ! Set the new weights
-       call fracToWeights_d(frac, fracd, weight, internal%donorInterpd(i, :))
+            xCen = flowDoms(d2, level, sps)%xSeed(i2, j2, k2, :)
+            xCend = flowDoms(d2, level, sps)%scratch(i2, j2, k2, 1:3)
+            frac0 = (/half, half, half/)
+            call newtonUpdate_d(xCen, xCend, &
+                                flowDoms(d1, level, sps)%x(i1 - 1:i1 + 1, j1 - 1:j1 + 1, k1 - 1:k1 + 1, :), &
+                                flowDomsd(d1, level, sps)%x(i1 - 1:i1 + 1, j1 - 1:j1 + 1, k1 - 1:k1 + 1, :), &
+                                frac0, frac, fracd)
 
-    enddo localInterp
+            ! Set the new weights
+            call fracToWeights_d(frac, fracd, weight, internal%donorInterpd(i, :))
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+        end do localInterp
 
-    size = commPattern%nProcSend
-    completeSends: do i=1,commPattern%nProcSend
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-       ! Complete any of the requests.
+        size = commPattern%nProcSend
+        completeSends: do i = 1, commPattern%nProcSend
 
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+            ! Complete any of the requests.
 
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
 
-       ii = index
+            ii = index
 
-       jj = 6*commPattern%nsendCum(ii-1)
-       do j=1,commPattern%nsend(ii)
+            jj = 6 * commPattern%nsendCum(ii - 1)
+            do j = 1, commPattern%nsend(ii)
 
-          ! Store the block and the indices of the halo a bit easier.
+                ! Store the block and the indices of the halo a bit easier.
 
-          d2 = commPattern%sendList(ii)%block(j)
-          i2 = commPattern%sendList(ii)%indices(j,1)
-          j2 = commPattern%sendList(ii)%indices(j,2)
-          k2 = commPattern%sendList(ii)%indices(j,3)
+                d2 = commPattern%sendList(ii)%block(j)
+                i2 = commPattern%sendList(ii)%indices(j, 1)
+                j2 = commPattern%sendList(ii)%indices(j, 2)
+                k2 = commPattern%sendList(ii)%indices(j, 3)
 
-          xCen = sendBuffer(jj+1:jj+3)
-          xCend = sendBuffer(jj+4:jj+6)
-          jj = jj + 6
+                xCen = sendBuffer(jj + 1:jj + 3)
+                xCend = sendBuffer(jj + 4:jj + 6)
+                jj = jj + 6
 
-          ! Compute new fraction
-          frac0 = (/half, half, half/)
-          call newtonUpdate_d(xCen, xCend, &
-               flowDoms(d2, level, sps)%x(i2-1:i2+1, j2-1:j2+1, k2-1:k2+1, :), &
-               flowDomsd(d2, level, sps)%x(i2-1:i2+1, j2-1:j2+1, k2-1:k2+1, :), &
-               frac0, frac, fracd)
+                ! Compute new fraction
+                frac0 = (/half, half, half/)
+                call newtonUpdate_d(xCen, xCend, &
+                                    flowDoms(d2, level, sps)%x(i2 - 1:i2 + 1, j2 - 1:j2 + 1, k2 - 1:k2 + 1, :), &
+                                    flowDomsd(d2, level, sps)%x(i2 - 1:i2 + 1, j2 - 1:j2 + 1, k2 - 1:k2 + 1, :), &
+                                    frac0, frac, fracd)
 
-          ! Set the new weights
-          call fracToWeights_d(frac, fracd, weight, &
-               commPattern%sendList(ii)%interpd(j, :))
-       enddo
+                ! Set the new weights
+                call fracToWeights_d(frac, fracd, weight, &
+                                     commPattern%sendList(ii)%interpd(j, :))
+            end do
 
-    enddo completeSends
+        end do completeSends
 
-    ! Complete the nonblocking sends.
+        ! Complete the nonblocking sends.
 
-    size = commPattern%nProcRecv
-    do i=1,commPattern%nProcRecv
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-    enddo
+        size = commPattern%nProcRecv
+        do i = 1, commPattern%nProcRecv
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+        end do
 
-  end subroutine updateOversetConnectivity_d
+    end subroutine updateOversetConnectivity_d
 
-  subroutine updateOversetConnectivity_b(level, sps)
+    subroutine updateOversetConnectivity_b(level, sps)
 
-    ! Reverse  mode linearization of updateOversetConnectivity
+        ! Reverse  mode linearization of updateOversetConnectivity
 
-    use constants
-    use communication
-    use blockPointers, only : nDom, il, jl, kl, xSeed, flowDoms, x, ib, jb, kb, &
-         ie, je, ke, fringes, scratch, flowDomsd, xd
-    use haloExchange, only : whalo1to1RealGeneric_b
-    use oversetUtilities_b, only : newtonUpdate_b, fracToWeights_b, newtonUpdate, fracToWeights
-    use utils, only : setPointers_b
+        use constants
+        use communication
+        use blockPointers, only: nDom, il, jl, kl, xSeed, flowDoms, x, ib, jb, kb, &
+                                 ie, je, ke, fringes, scratch, flowDomsd, xd
+        use haloExchange, only: whalo1to1RealGeneric_b
+        use oversetUtilities_b, only: newtonUpdate_b, fracToWeights_b, newtonUpdate, fracToWeights
+        use utils, only: setPointers_b
 
-    implicit none
+        implicit none
 
-    ! Input
-    integer(kind=intType), intent(in) :: level, sps
-    type(commType), pointer :: commPattern
-    type(internalCommType), pointer :: internal
+        ! Input
+        integer(kind=intType), intent(in) :: level, sps
+        type(commType), pointer :: commPattern
+        type(internalCommType), pointer :: internal
 
-    ! Working
-    integer(kind=intType) :: nn, ii,jj, kk, ierr,  i, j, k, d1, i1, j1, k1, d2, i2, j2, k2
-    integer(kind=intType) :: size, procID, index, iii,jjj
-    integer, dimension(mpi_status_size) :: mpiStatus
-    real(kind=realType) :: frac(3), fracd(3), frac0(3), xCen(3), xCend(3), weight(8), add(3)
-    integer(kind=intType), dimension(8), parameter :: indices=(/1,2,4,3,5,6,8,7/)
+        ! Working
+        integer(kind=intType) :: nn, ii, jj, kk, ierr, i, j, k, d1, i1, j1, k1, d2, i2, j2, k2
+        integer(kind=intType) :: size, procID, index, iii, jjj
+        integer, dimension(mpi_status_size) :: mpiStatus
+        real(kind=realType) :: frac(3), fracd(3), frac0(3), xCen(3), xCend(3), weight(8), add(3)
+        integer(kind=intType), dimension(8), parameter :: indices = (/1, 2, 4, 3, 5, 6, 8, 7/)
 
-    ! Pointers to the overset comms to make it easier to read
-    commPattern => commPatternOverset(level, sps)
-    internal => internalOverset(level, sps)
+        ! Pointers to the overset comms to make it easier to read
+        commPattern => commPatternOverset(level, sps)
+        internal => internalOverset(level, sps)
 
-    ! Zero out xSeedd (in scratch)
-    do nn=1, nDom
-       flowDoms(nn, 1, sps)%scratch(:, :, :, 1:3) = zero
-    end do
+        ! Zero out xSeedd (in scratch)
+        do nn = 1, nDom
+            flowDoms(nn, 1, sps)%scratch(:, :, :, 1:3) = zero
+        end do
 
-    ! In reverse the fist thing we must do is the compute the
-    ! sensitivites of xCen and send it to the receiving
-    ! processor. This comm pattern is the same as wOverset forward.
+        ! In reverse the fist thing we must do is the compute the
+        ! sensitivites of xCen and send it to the receiving
+        ! processor. This comm pattern is the same as wOverset forward.
 
-    ii = 1
-    sends: do i=1,commPattern%nProcSend
+        ii = 1
+        sends: do i = 1, commPattern%nProcSend
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       procID = commPattern%sendProc(i)
-       size    = 3*commPattern%nsend(i)
+            procID = commPattern%sendProc(i)
+            size = 3 * commPattern%nsend(i)
 
-       ! Copy the data in the correct part of the send buffer.
+            ! Copy the data in the correct part of the send buffer.
 
-       jj = ii
-       do j=1,commPattern%nsend(i)
+            jj = ii
+            do j = 1, commPattern%nsend(i)
 
-          ! Store the block id and the indices of the donor
-          ! a bit easier.
-          d1 = commPattern%sendList(i)%block(j)
-          i1 = commPattern%sendList(i)%indices(j,1)
-          j1 = commPattern%sendList(i)%indices(j,2)
-          k1 = commPattern%sendList(i)%indices(j,3)
+                ! Store the block id and the indices of the donor
+                ! a bit easier.
+                d1 = commPattern%sendList(i)%block(j)
+                i1 = commPattern%sendList(i)%indices(j, 1)
+                j1 = commPattern%sendList(i)%indices(j, 2)
+                k1 = commPattern%sendList(i)%indices(j, 3)
 
-          ! -------- Recompute forward pass -------------
-          xCen = commPattern%sendList(i)%xCen(j, :)
+                ! -------- Recompute forward pass -------------
+                xCen = commPattern%sendList(i)%xCen(j, :)
 
-          ! Do newton update
-          frac0 = (/half, half, half/)
-          call newtonUpdate(xCen, &
-               flowDoms(d1, level, sps)%x(i1-1:i1+1, j1-1:j1+1, k1-1:k1+1, :), frac0, frac)
+                ! Do newton update
+                frac0 = (/half, half, half/)
+                call newtonUpdate(xCen, &
+                                flowDoms(d1, level, sps)%x(i1 - 1:i1 + 1, j1 - 1:j1 + 1, k1 - 1:k1 + 1, :), frac0, frac)
 
-          ! Set the new weights
-          call fracToWeights(frac, weight)
+                ! Set the new weights
+                call fracToWeights(frac, weight)
 
-          ! ------------- Reverse pass -----------
+                ! ------------- Reverse pass -----------
 
-          ! Transfer the weights back to the frac
-          call fracToWeights_b(frac, fracd, weight, commPattern%sendList(i)%interpd(j, :))
+                ! Transfer the weights back to the frac
+                call fracToWeights_b(frac, fracd, weight, commPattern%sendList(i)%interpd(j, :))
 
-          ! Run the reverse newton update.  Note that we are
-          ! accumulating into the local xd here in the newton_b call.
-          frac0 = (/half, half, half/)
-          call newtonUpdate_b(xCen, xCend, &
-               flowDoms(d1, level, sps)%x(i1-1:i1+1, j1-1:j1+1, k1-1:k1+1, :), &
-               flowDomsd(d1, level, sps)%x(i1-1:i1+1, j1-1:j1+1, k1-1:k1+1, :), &
-               frac0, frac, fracd)
+                ! Run the reverse newton update.  Note that we are
+                ! accumulating into the local xd here in the newton_b call.
+                frac0 = (/half, half, half/)
+                call newtonUpdate_b(xCen, xCend, &
+                                    flowDoms(d1, level, sps)%x(i1 - 1:i1 + 1, j1 - 1:j1 + 1, k1 - 1:k1 + 1, :), &
+                                    flowDomsd(d1, level, sps)%x(i1 - 1:i1 + 1, j1 - 1:j1 + 1, k1 - 1:k1 + 1, :), &
+                                    frac0, frac, fracd)
 
-          ! -------------------------------------
+                ! -------------------------------------
 
-          ! We want to send xCend to the receiver
-          sendBuffer(jj:jj+2) = xCend
-          jj = jj + 3
+                ! We want to send xCend to the receiver
+                sendBuffer(jj:jj + 2) = xCend
+                jj = jj + 3
 
-       enddo
-       ! Send the data.
+            end do
+            ! Send the data.
 
-       call mpi_isend(sendBuffer(ii), size, adflow_real, procId,  &
-            procId, ADflow_comm_world, sendRequests(i), &
-            ierr)
+            call mpi_isend(sendBuffer(ii), size, adflow_real, procId, &
+                           procId, ADflow_comm_world, sendRequests(i), &
+                           ierr)
 
-       ! Set ii to jj for the next processor.
+            ! Set ii to jj for the next processor.
 
-       ii = jj
+            ii = jj
 
-    enddo sends
+        end do sends
 
-    ! Post the nonblocking receives.
+        ! Post the nonblocking receives.
 
-    ii = 1
-    receives: do i=1,commPattern%nProcRecv
+        ii = 1
+        receives: do i = 1, commPattern%nProcRecv
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       procID = commPattern%recvProc(i)
-       size    = 3*commPattern%nrecv(i)
+            procID = commPattern%recvProc(i)
+            size = 3 * commPattern%nrecv(i)
 
-       ! Post the receive.
+            ! Post the receive.
 
-       call mpi_irecv(recvBuffer(ii), size, adflow_real, procId, &
-            myId, ADflow_comm_world, recvRequests(i), ierr)
+            call mpi_irecv(recvBuffer(ii), size, adflow_real, procId, &
+                           myId, ADflow_comm_world, recvRequests(i), ierr)
 
-       ! And update ii.
+            ! And update ii.
 
-       ii = ii + size
+            ii = ii + size
 
-    enddo receives
+        end do receives
 
-    ! Do the local stuff while we're waiting
+        ! Do the local stuff while we're waiting
 
-    localInterp: do i=1,internal%ncopy
+        localInterp: do i = 1, internal%ncopy
 
-       ! Store the block and the indices of the donor a bit easier.
-       d1 = internal%donorBlock(i)
-       i1 = internal%donorIndices(i, 1)
-       j1 = internal%donorIndices(i, 2)
-       k1 = internal%donorIndices(i, 3)
+            ! Store the block and the indices of the donor a bit easier.
+            d1 = internal%donorBlock(i)
+            i1 = internal%donorIndices(i, 1)
+            j1 = internal%donorIndices(i, 2)
+            k1 = internal%donorIndices(i, 3)
 
-       d2 = internal%haloBlock(i)
-       i2 = internal%haloIndices(i, 1)
-       j2 = internal%haloIndices(i, 2)
-       k2 = internal%haloIndices(i, 3)
+            d2 = internal%haloBlock(i)
+            i2 = internal%haloIndices(i, 1)
+            j2 = internal%haloIndices(i, 2)
+            k2 = internal%haloIndices(i, 3)
 
-       ! -------- Recompute forward pass -------------
-       xCen = internal%XCen(i, :)
+            ! -------- Recompute forward pass -------------
+            xCen = internal%XCen(i, :)
 
-       ! Do newton update
-       frac0 = (/half, half, half/)
-       call newtonUpdate(xCen, &
-            flowDoms(d1, level, sps)%x(i1-1:i1+1, j1-1:j1+1, k1-1:k1+1, :), frac0, frac)
+            ! Do newton update
+            frac0 = (/half, half, half/)
+            call newtonUpdate(xCen, &
+                              flowDoms(d1, level, sps)%x(i1 - 1:i1 + 1, j1 - 1:j1 + 1, k1 - 1:k1 + 1, :), frac0, frac)
 
-       ! Set the new weights
-       call fracToWeights(frac, weight)
+            ! Set the new weights
+            call fracToWeights(frac, weight)
 
-      ! ------------- Reverse pass -----------
+            ! ------------- Reverse pass -----------
 
-       ! Transfer the weights back to the frac
-       call fracToWeights_b(frac, fracd, weight, internal%Donorinterpd(i, :))
+            ! Transfer the weights back to the frac
+            call fracToWeights_b(frac, fracd, weight, internal%Donorinterpd(i, :))
 
-       ! Run the reverse newton update.  Note that we are
-       ! accumulating into the local xd here in the newton_b call.
-       frac0 = (/half, half, half/)
-       call newtonUpdate_b(xCen, xCend, &
-            flowDoms(d1, level, sps)%x(i1-1:i1+1, j1-1:j1+1, k1-1:k1+1, :), &
-            flowDomsd(d1, level, sps)%x(i1-1:i1+1, j1-1:j1+1, k1-1:k1+1, :), &
-            frac0, frac, fracd)
+            ! Run the reverse newton update.  Note that we are
+            ! accumulating into the local xd here in the newton_b call.
+            frac0 = (/half, half, half/)
+            call newtonUpdate_b(xCen, xCend, &
+                                flowDoms(d1, level, sps)%x(i1 - 1:i1 + 1, j1 - 1:j1 + 1, k1 - 1:k1 + 1, :), &
+                                flowDomsd(d1, level, sps)%x(i1 - 1:i1 + 1, j1 - 1:j1 + 1, k1 - 1:k1 + 1, :), &
+                                frac0, frac, fracd)
 
-       ! Accumulate in to the xSeedd (which we're using scratch for)
-       flowDoms(d2, level, sps)%scratch(i2, j2, k2, 1:3) =  &
-            flowDoms(d2, level, sps)%scratch(i2, j2, k2, 1:3) + xCend
+            ! Accumulate in to the xSeedd (which we're using scratch for)
+            flowDoms(d2, level, sps)%scratch(i2, j2, k2, 1:3) = &
+                flowDoms(d2, level, sps)%scratch(i2, j2, k2, 1:3) + xCend
 
-    end do localInterp
+        end do localInterp
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-    size = commPattern%nProcRecv
-    completeRecvs: do i=1,commPattern%nProcRecv
+        size = commPattern%nProcRecv
+        completeRecvs: do i = 1, commPattern%nProcRecv
 
-       ! Complete any of the requests.
+            ! Complete any of the requests.
 
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
 
-       ! Copy the data just arrived in the halo's.
+            ! Copy the data just arrived in the halo's.
 
-       ii = index
-       jj = 3*commPattern%nrecvCum(ii-1)
-       do j=1,commPattern%nrecv(ii)
+            ii = index
+            jj = 3 * commPattern%nrecvCum(ii - 1)
+            do j = 1, commPattern%nrecv(ii)
 
-          ! Store the block and the indices of the halo a bit easier.
+                ! Store the block and the indices of the halo a bit easier.
 
-          d2 = commPattern%recvList(ii)%block(j)
-          i2 = commPattern%recvList(ii)%indices(j,1)
-          j2 = commPattern%recvList(ii)%indices(j,2)
-          k2 = commPattern%recvList(ii)%indices(j,3)
+                d2 = commPattern%recvList(ii)%block(j)
+                i2 = commPattern%recvList(ii)%indices(j, 1)
+                j2 = commPattern%recvList(ii)%indices(j, 2)
+                k2 = commPattern%recvList(ii)%indices(j, 3)
 
-          flowDoms(d2, level, sps)%scratch(i2, j2, k2, 1:3) = &
-               flowDoms(d2, level, sps)%scratch(i2, j2, k2, 1:3) + recvBuffer(jj+1:jj+3)
+                flowDoms(d2, level, sps)%scratch(i2, j2, k2, 1:3) = &
+                    flowDoms(d2, level, sps)%scratch(i2, j2, k2, 1:3) + recvBuffer(jj + 1:jj + 3)
 
-          jj =jj + 3
-       enddo
-    end do completeRecvs
+                jj = jj + 3
+            end do
+        end do completeRecvs
 
-    ! Complete the nonblocking sends.
+        ! Complete the nonblocking sends.
 
-    size = commPattern%nProcSend
-    do i=1,commPattern%nProcSend
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-    enddo
+        size = commPattern%nProcSend
+        do i = 1, commPattern%nProcSend
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+        end do
 
-    ! Now we have accumulated back as far as xSeedd (stored in
-    ! scratch). We can now do the whalo1to1_b
+        ! Now we have accumulated back as far as xSeedd (stored in
+        ! scratch). We can now do the whalo1to1_b
 
-    ! Exchange the xSeeds in reverse.
-    do nn=1, nDom
-       flowDoms(nn, level, sps)%realCommVars(1)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 1)
-       flowDoms(nn, level, sps)%realCommVars(2)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 2)
-       flowDoms(nn, level, sps)%realCommVars(3)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 3)
-    end do
+        ! Exchange the xSeeds in reverse.
+        do nn = 1, nDom
+            flowDoms(nn, level, sps)%realCommVars(1)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 1)
+            flowDoms(nn, level, sps)%realCommVars(2)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 2)
+            flowDoms(nn, level, sps)%realCommVars(3)%var => flowDoms(nn, level, sps)%scratch(:, :, :, 3)
+        end do
 
-    call wHalo1to1RealGeneric_b(3, level, sps, commPatternCell_2nd, internalCell_2nd)
+        call wHalo1to1RealGeneric_b(3, level, sps, commPatternCell_2nd, internalCell_2nd)
 
-    ! Finaly we can push back to the local x
-    do nn=1, nDom
-       call setPointers_b(nn, level, sps)
+        ! Finaly we can push back to the local x
+        do nn = 1, nDom
+            call setPointers_b(nn, level, sps)
 
-       do k=2,kl
-          do j=2, jl
-             do i=2, il
-                ! Add is accumulate seed for xSeed (stored in scratch )
-                add = eighth * scratch(i, j, k, 1:3)
-                do kk=k-1,k
-                   do jj=j-1,j
-                      do ii=i-1,i
-                         xd(ii, jj, kk, :) = xd(ii, jj, kk, :) + add
-                      end do
-                   end do
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        ! Add is accumulate seed for xSeed (stored in scratch )
+                        add = eighth * scratch(i, j, k, 1:3)
+                        do kk = k - 1, k
+                            do jj = j - 1, j
+                                do ii = i - 1, i
+                                    xd(ii, jj, kk, :) = xd(ii, jj, kk, :) + add
+                                end do
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
+            end do
+        end do
 
-  end subroutine updateOversetConnectivity_b
+    end subroutine updateOversetConnectivity_b
 #endif
 end module oversetCommUtilities
diff --git a/src/overset/oversetInitialization.F90 b/src/overset/oversetInitialization.F90
index 798917d51..c83c39dc1 100644
--- a/src/overset/oversetInitialization.F90
+++ b/src/overset/oversetInitialization.F90
@@ -2,611 +2,610 @@ module oversetInitialization
 
 contains
 
-  subroutine initializeStatus(level, sps)
-
-    ! This subroutine initializes the status variable for use in the
-    ! overset process
-
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, ib, jb, kb, status
-    use oversetUtilities, only : setIsCompute, setIsWallDonor, setIsDonor, setIsReceiver
-    use utils, only : setPointers
-    implicit none
-
-    ! Input params
-    integer(kind=intType), intent(in) :: level, sps
-
-    ! Working parameters
-    integer(kind=intType) :: nn, i, j, k
-
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       ! Now loop over the owned cells and set the isCompute flag to
-       ! true.
-
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                status(i,j,k) = 0
-                call setIsCompute(status(i, j, k), .True. )
-
-                ! Additional initialization for full overset update mode
-                call setIsWallDonor(status(i, j, k), .False.)
-                call setIsDonor(status(i, j, k), .False.)
-                call setIsReceiver(status(i, j, k), .False.)
-
-             end do
-          end do
-       end do
-    end do
-
-  end subroutine initializeStatus
-
-  subroutine reInitializeStatus(level, sps)
-
-    ! This subroutine reinitializes the status variable. However, if
-    ! cell is a wallDonor, that information is kept.
-
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, ib, jb, kb, status
-    use oversetUtilities, only : setIsCompute, isWallDonor, setIsWallDonor
-    use utils, only : setPointers
-    implicit none
-
-    ! Input params
-    integer(kind=intType), intent(in) :: level, sps
-
-    ! Working parameters
-    integer(kind=intType) :: nn, i, j, k
-    logical :: wDonor
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       ! Now loop over the owned cells and set the isCompute flag to
-       ! true.
-
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                wDonor = isWallDonor(status(i, j, k))
-                status(i, j, k) = 0
-                call setIsCompute(status(i, j, k), .True. )
-                call setIsWallDonor(status(i, j, k), wDonor)
-             end do
-          end do
-       end do
-    end do
-  end subroutine reInitializeStatus
-
-
-  subroutine initializeOBlock(oBlock, nn, level, sps)
-
-    ! This routine allocates the data for the supplied oBlock using the
-    !  data currently in blockPointers
-    use constants
-    use oversetData, only : oversetBlock, clusters, cumDomProc
-    use inputOverset, only : backgroundVolScale, nearWallDist, useOversetWallScaling
-    use blockPointers, only : x, globalCell, il, jl, kl, ib, jb, kb, &
-         ie, je, ke, vol, iBlank, xSeed, forcedRecv, nbkglobal, si, sj, sk
-    use cgnsGrid, only : cgnsDoms
-    use adtBuild, only : buildSerialHex
-    use communication, only : myID
-    use stencils, only : visc_drdw_stencil, n_visc_drdw
-    use utils, only : mynorm2
-    use oversetUtilities, only :  wallsOnBlock
-    implicit none
-
-    ! Input Params
-    type(oversetBlock), intent(inout) :: oBlock
-    integer(kind=intType) :: nn, level, sps, kk
-
-    ! Working paramters
-    integer(kind=intType) :: i, j, k, mm, nADT, nHexa, planeOffset
-    integer(kind=intType) :: iStart, iEnd, jStart, jEnd, kStart, kEnd
-    real(kind=realType) :: factor, frac,  dist, xp(3),aspect(3), fact
-    integer(kind=intType) :: i_stencil, ii, jj, iii
-    logical :: wallsPresent
-    logical, allocatable, dimension(:, :, :)  :: nearWallTmp
-
-    ! Set all the sizes for this block.
-    oBlock%il = il
-    oBlock%jl = jl
-    oBlock%kl = kl
-
-    oBlock%proc = myID
-    oBlock%block = nn
-    oBlock%cluster = clusters(cumDomProc(myid) + nn)
-    call wallsOnBlock(wallsPresent)
-
-    ! Do the reset of the allocs
-    allocate( &
-         oBlock%qualDonor(1, ie*je*ke), &
-         oBlock%globalCell(0:ib, 0:jb, 0:kb), &
-         oBlock%invalidDonor(1:ie, 1:je, 1:ke))
-
-    ! Invalid Donor array is simply if the cell is a forced receiver
-    ! or not.
-    do k=1,ke
-       do j=1,je
-          do i=1,ie
-             oBlock%invalidDonor(i,j,k) = forcedRecv(i,j,k)
-          end do
-       end do
-    end do
-
-    ! Compute the qualDonor depending on if we have a wall block or not.
-    mm = 0
-    do k=1,ke
-       do j=1,je
-          do i=1,ie
-             mm = mm + 1
-             if (wallsPresent) then
-
-                ! Modify based on aspect ratio of the cell in the
-                ! k-direcion. High aspect ratioin BL
-                aspect = one
-                if (useOversetWallScaling) then
-                   if (CGNSDoms(nbkGlobal)%viscousDir(1)) &
-                        aspect(1) = (half*(mynorm2(si(i-1, j, k, :)) + mynorm2(si(i, j, k, :)))) / vol (i, j, k)
-                   if (CGNSDoms(nbkGlobal)%viscousDir(2)) &
-                        aspect(2) = (half*(mynorm2(sj(i, j-1, k, :)) + mynorm2(sj(i, j, k, :)))) / vol (i, j, k)
-                   if (CGNSDoms(nbkGlobal)%viscousDir(3)) &
-                        aspect(3) = (half*(mynorm2(sk(i, j, k-1, :)) + mynorm2(sk(i, j, k, :)))) / vol (i, j, k)
-                end if
-                fact = min(aspect(1)*aspect(2)*aspect(3), 100.0_realType)
-
-                oBlock%qualDonor(1, mm) = (vol(i, j, k)**third) / fact
-             else
-                oBlock%qualDonor(1, mm) = (backGroundVolScale*vol(i, j, k))**third
-             end if
-
-             ! Account for explicit scaling of quality
-             oblock%qualDonor(1, mm) =  oblock%qualDonor(1, mm)*cgnsDoms(nbkglobal)%priority
-          end do
-       end do
-    end do
-
-    !Copy over global cell
-    oBlock%globalCell = globalCell
-
-    ! Now setup the data for the ADT
-    nHexa = il * jl * kl
-    nADT = ie * je * ke
-
-    allocate(oBlock%xADT(3, nADT), oBlock%hexaConn(8, nHexa))
-    ! Fill up the xADT using cell centers (dual mesh)
-    mm = 0
-
-    ! Allocate the nearWall
-    allocate(oBlock%nearWall(1:il, 1:jl, 1:kl))
-    oBlock%nearWall = 0
-
-    allocate(nearWallTmp(1:ie, 1:je, 1:ke))
-    nearWallTmp = .False.
-
-    do k=1, ke
-       do j=1, je
-          do i=1, ie
-             mm = mm + 1
-             xp = eighth*(&
-                  x(i-1, j-1, k-1, :) + &
-                  x(i  , j-1, k-1, :) + &
-                  x(i-1, j  , k-1, :) + &
-                  x(i  , j  , k-1, :) + &
-                  x(i-1, j-1, k  , :) + &
-                  x(i  , j-1, k  , :) + &
-                  x(i-1, j  , k  , :) + &
-                  x(i  , j  , k  , :))
-             oBlock%xADT(:, mm) = xp
-
-             ! Determine if this point is near wall. Note that the
-             ! boundary halos sill have xSeed as "large" so these won't
-             ! be flagged as nearWall. We will account for this below.
-             dist = mynorm2(xp - xSeed(i, j, k, :))
-             if (dist < nearWallDist) then
-                nearWallTmp(i, j, k) = .True.
-             end if
-          end do
-       end do
-    end do
-
-    ! Now finally set the nearwall for the dual mesh cells. It is
-    ! considered a near wall if all "nodes" of the dual mesh cell are
-    ! also near wall. Have to be carful not to count boundary halos
-    ! since they do not have nearWallTmp Values.
-
-    do k=1, kl
-       do j=1, jl
-          do i=1, il
-             if (&
-                  (nearWallTmp(i  , j  , k  ) .or. globalCell(i  , j,   k  ) < 0) .and. &
-                  (nearWallTmp(i+1, j  , k  ) .or. globalCell(i+1, j,   k  ) < 0) .and. &
-                  (nearWallTmp(i  , j+1, k  ) .or. globalCell(i  , j+1, k  ) < 0) .and. &
-                  (nearWallTmp(i+1, j+1, k  ) .or. globalCell(i+1, j+1, k  ) < 0) .and. &
-                  (nearWallTmp(i  , j  , k+1) .or. globalCell(i  , j,   k+1) < 0) .and. &
-                  (nearWallTmp(i+1, j  , k+1) .or. globalCell(i+1, j,   k+1) < 0) .and. &
-                  (nearWallTmp(i  , j+1, k+1) .or. globalCell(i  , j+1, k+1) < 0) .and. &
-                  (nearWallTmp(i+1, j+1, k+1) .or. globalCell(i+1, j+1, k+1) < 0)) then
-                oBlock%nearWall(i, j, k) = 1
-             end if
-          end do
-       end do
-    end do
-
-    deallocate(nearWallTmp)
-    mm = 0
-    ! These are the 'elements' of the dual mesh.
-    planeOffset = ie * je
-    do k=2, ke
-       do j=2, je
-          do i=2, ie
-             mm = mm + 1
-             oBlock%hexaConn(1, mm) = (k-2)*planeOffset + (j-2)*ie + (i-2) + 1
-             oBlock%hexaConn(2, mm) = oBlock%hexaConn(1, mm) + 1
-             oBlock%hexaConn(3, mm) = oBlock%hexaConn(2, mm) + ie
-             oBlock%hexaConn(4, mm) = oBlock%hexaConn(3, mm) - 1
-
-             oBlock%hexaConn(5, mm) = oBlock%hexaConn(1, mm) + planeOffset
-             oBlock%hexaConn(6, mm) = oBlock%hexaConn(2, mm) + planeOffset
-             oBlock%hexaConn(7, mm) = oBlock%hexaConn(3, mm) + planeOffset
-             oBlock%hexaConn(8, mm) = oBlock%hexaConn(4, mm) + planeOffset
-          end do
-       end do
-    end do
-
-    ! Call the custom build routine -- Serial only, only Hexa volumes,
-    ! we supply our own ADT Type
-
-    call buildSerialHex(nHexa, nADT, oBlock%xADT, oBlock%hexaConn, oBlock%ADT)
-
-    ! Flag this block as being allocated
-    oBlock%allocated = .True.
-
-  end subroutine initializeOBlock
-
-  subroutine initializeOFringes(oFringe, nn, famList)
-
-    ! This subroutine initializes the fringe information for the given
-    ! block, level and spectral instance. It is assumed that
-    ! blockPointers are already set.
-    use constants
-    use communication, only : myID
-    use blockPointers
-    use oversetData, only : oversetFringe, clusters, cumDomProc
-    use stencils, only : visc_drdw_stencil, N_visc_drdw
-    use inputOverset, only :  backgroundVolScale
-    use sorting, only : famInList
-    use oversetUtilities, only :  wallsOnBlock, windIndex, unwindindex
-    implicit none
-
-    ! Input Params
-    type(oversetFringe), intent(inout) :: oFringe
-    integer(kind=intType), intent(in) :: nn
-    integer(kind=intType), intent(in), dimension(:) :: famList
-    ! Working Params
-    integer(kind=intTYpe) :: i, j, k, mm, iDim, ii, jj, kk, iii, jjj, myI, myJ, myK
-    integer(kind=intTYpe) :: iStart, iEnd, jStart, jEnd, kStart, kEnd
-    logical :: wallsPresent
-    integer(kind=intType) :: i_stencil
-    real(kind=realType) :: dist, frac, xp(3)
-    ! Check if we have walls:
-    call wallsOnBLock(wallsPresent)
-
-    ! Set the sizes for the oFringe and allocate the required space.
-    oFringe%il = il
-    oFringe%jl = jl
-    oFringe%kl = kl
-    oFringe%nx = nx
-    oFringe%ny = ny
-    oFringe%nz = nz
-    oFringe%block = nn
-    oFringe%cluster = clusters(cumDomProc(myid) + nn)
-    oFringe%proc = myid
-
-    mm = nx*ny*nz
-    allocate(oFringe%x(3, mm))
-    allocate(oFringe%xSeed(3, mm))
-    allocate(oFringe%wallInd(mm))
-    allocate(oFringe%isWall(mm))
-    oFringe%isWall = 0
-    oFringe%xSeed = large
-    oFringe%wallInd = 0
-
-    ! Assume each cell will get just one donor. It's just a guess, it
-    ! will be expanded if necessary so the exact value doesn't matter.
-    allocate(oFringe%fringeIntBuffer(5, mm), ofringe%fringeRealBuffer(4, mm))
-
-    oFringe%nDonor = 0
-    ! Now loop over the actual compute cells, setting the cell center
-    ! value 'x', the volume and flag these cells as compute
-    ii = 0
-    do k=2, kl
-       do j=2, jl
-          do i=2, il
-             ii = ii + 1
-             do iDim=1, 3
-                oFringe%x(iDim, ii) = eighth*(&
-                     x(i-1, j-1, k-1, iDim) + &
-                     x(i  , j-1, k-1, iDim) + &
-                     x(i-1, j  , k-1, iDim) + &
-                     x(i  , j  , k-1, iDim) + &
-                     x(i-1, j-1, k  , iDim) + &
-                     x(i  , j-1, k  , iDim) + &
-                     x(i-1, j  , k  , iDim) + &
-                     x(i  , j  , k  , iDim))
-             end do
-             oFringe%xSeed(:, ii) = xSeed(i, j, k, :)
-             oFringe%wallInd(ii) = wallInd(i, j, k)
-             oFringe%fringeIntBuffer(4, ii) = nn
-             oFringe%fringeIntBuffer(5, ii) = windIndex(i, j, k, il, jl, kl)
-
-          end do
-       end do
-    end do
-
-    ! We also need to flag a single layer of cells next a wall
-    ! boundary condition as being "isWall". This information is
-    ! necessary to be able to determine the "wall donors" which are
-    ! the flood seeds.
-
-    do mm=1, nBocos
-       select case(BCFaceID(mm))
-       case (iMin)
-          iStart=2; iEnd=2;
-          jStart=BCData(mm)%inBeg+1; jEnd=BCData(mm)%inEnd
-          kStart=BCData(mm)%jnBeg+1; kEnd=BCData(mm)%jnEnd
-       case (iMax)
-          iStart=il; iEnd=il;
-          jStart=BCData(mm)%inBeg+1; jEnd=BCData(mm)%inEnd
-          kStart=BCData(mm)%jnBeg+1; kEnd=BCData(mm)%jnEnd
-       case (jMin)
-          iStart=BCData(mm)%inBeg+1; iEnd=BCData(mm)%inEnd
-          jStart=2; jEnd=2;
-          kStart=BCData(mm)%jnBeg+1; kEnd=BCData(mm)%jnEnd
-       case (jMax)
-          iStart=BCData(mm)%inBeg+1; iEnd=BCData(mm)%inEnd
-          jStart=jl; jEnd=jl;
-          kStart=BCData(mm)%jnBeg+1; kEnd=BCData(mm)%jnEnd
-       case (kMin)
-          iStart=BCData(mm)%inBeg+1; iEnd=BCData(mm)%inEnd
-          jStart=BCData(mm)%jnBeg+1; jEnd=BCData(mm)%jnEnd
-          kStart=2; kEnd=2;
-       case (kMax)
-          iStart=BCData(mm)%inBeg+1; iEnd=BCData(mm)%inEnd
-          jStart=BCData(mm)%jnBeg+1; jEnd=BCData(mm)%jnEnd
-          kStart=kl; kEnd=kl;
-       end select
-
-       famInclude: if (famInList(BCdata(mm)%famID, famList)) then
-          do k=kStart, kEnd
-             do j=jStart, jEnd
-                do i=iStart, iEnd
-                   ! Recompute the index
-                   ii = (k-2)*nx*ny + (j-2)*nx + (i-2) + 1
-                   oFringe%isWall(ii) = bcFaceID(mm)
+    subroutine initializeStatus(level, sps)
+
+        ! This subroutine initializes the status variable for use in the
+        ! overset process
+
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, ib, jb, kb, status
+        use oversetUtilities, only: setIsCompute, setIsWallDonor, setIsDonor, setIsReceiver
+        use utils, only: setPointers
+        implicit none
+
+        ! Input params
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Working parameters
+        integer(kind=intType) :: nn, i, j, k
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            ! Now loop over the owned cells and set the isCompute flag to
+            ! true.
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        status(i, j, k) = 0
+                        call setIsCompute(status(i, j, k), .True.)
+
+                        ! Additional initialization for full overset update mode
+                        call setIsWallDonor(status(i, j, k), .False.)
+                        call setIsDonor(status(i, j, k), .False.)
+                        call setIsReceiver(status(i, j, k), .False.)
+
+                    end do
+                end do
+            end do
+        end do
+
+    end subroutine initializeStatus
+
+    subroutine reInitializeStatus(level, sps)
+
+        ! This subroutine reinitializes the status variable. However, if
+        ! cell is a wallDonor, that information is kept.
+
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, ib, jb, kb, status
+        use oversetUtilities, only: setIsCompute, isWallDonor, setIsWallDonor
+        use utils, only: setPointers
+        implicit none
+
+        ! Input params
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Working parameters
+        integer(kind=intType) :: nn, i, j, k
+        logical :: wDonor
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            ! Now loop over the owned cells and set the isCompute flag to
+            ! true.
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        wDonor = isWallDonor(status(i, j, k))
+                        status(i, j, k) = 0
+                        call setIsCompute(status(i, j, k), .True.)
+                        call setIsWallDonor(status(i, j, k), wDonor)
+                    end do
+                end do
+            end do
+        end do
+    end subroutine reInitializeStatus
+
+    subroutine initializeOBlock(oBlock, nn, level, sps)
+
+        ! This routine allocates the data for the supplied oBlock using the
+        !  data currently in blockPointers
+        use constants
+        use oversetData, only: oversetBlock, clusters, cumDomProc
+        use inputOverset, only: backgroundVolScale, nearWallDist, useOversetWallScaling
+        use blockPointers, only: x, globalCell, il, jl, kl, ib, jb, kb, &
+                                 ie, je, ke, vol, iBlank, xSeed, forcedRecv, nbkglobal, si, sj, sk
+        use cgnsGrid, only: cgnsDoms
+        use adtBuild, only: buildSerialHex
+        use communication, only: myID
+        use stencils, only: visc_drdw_stencil, n_visc_drdw
+        use utils, only: mynorm2
+        use oversetUtilities, only: wallsOnBlock
+        implicit none
+
+        ! Input Params
+        type(oversetBlock), intent(inout) :: oBlock
+        integer(kind=intType) :: nn, level, sps, kk
+
+        ! Working paramters
+        integer(kind=intType) :: i, j, k, mm, nADT, nHexa, planeOffset
+        integer(kind=intType) :: iStart, iEnd, jStart, jEnd, kStart, kEnd
+        real(kind=realType) :: factor, frac, dist, xp(3), aspect(3), fact
+        integer(kind=intType) :: i_stencil, ii, jj, iii
+        logical :: wallsPresent
+        logical, allocatable, dimension(:, :, :) :: nearWallTmp
+
+        ! Set all the sizes for this block.
+        oBlock%il = il
+        oBlock%jl = jl
+        oBlock%kl = kl
+
+        oBlock%proc = myID
+        oBlock%block = nn
+        oBlock%cluster = clusters(cumDomProc(myid) + nn)
+        call wallsOnBlock(wallsPresent)
+
+        ! Do the reset of the allocs
+        allocate ( &
+            oBlock%qualDonor(1, ie * je * ke), &
+            oBlock%globalCell(0:ib, 0:jb, 0:kb), &
+            oBlock%invalidDonor(1:ie, 1:je, 1:ke))
+
+        ! Invalid Donor array is simply if the cell is a forced receiver
+        ! or not.
+        do k = 1, ke
+            do j = 1, je
+                do i = 1, ie
+                    oBlock%invalidDonor(i, j, k) = forcedRecv(i, j, k)
+                end do
+            end do
+        end do
+
+        ! Compute the qualDonor depending on if we have a wall block or not.
+        mm = 0
+        do k = 1, ke
+            do j = 1, je
+                do i = 1, ie
+                    mm = mm + 1
+                    if (wallsPresent) then
+
+                        ! Modify based on aspect ratio of the cell in the
+                        ! k-direcion. High aspect ratioin BL
+                        aspect = one
+                        if (useOversetWallScaling) then
+                            if (CGNSDoms(nbkGlobal)%viscousDir(1)) &
+                             aspect(1) = (half * (mynorm2(si(i - 1, j, k, :)) + mynorm2(si(i, j, k, :)))) / vol(i, j, k)
+                            if (CGNSDoms(nbkGlobal)%viscousDir(2)) &
+                             aspect(2) = (half * (mynorm2(sj(i, j - 1, k, :)) + mynorm2(sj(i, j, k, :)))) / vol(i, j, k)
+                            if (CGNSDoms(nbkGlobal)%viscousDir(3)) &
+                             aspect(3) = (half * (mynorm2(sk(i, j, k - 1, :)) + mynorm2(sk(i, j, k, :)))) / vol(i, j, k)
+                        end if
+                        fact = min(aspect(1) * aspect(2) * aspect(3), 100.0_realType)
+
+                        oBlock%qualDonor(1, mm) = (vol(i, j, k)**third) / fact
+                    else
+                        oBlock%qualDonor(1, mm) = (backGroundVolScale * vol(i, j, k))**third
+                    end if
+
+                    ! Account for explicit scaling of quality
+                    oblock%qualDonor(1, mm) = oblock%qualDonor(1, mm) * cgnsDoms(nbkglobal)%priority
                 end do
-             end do
-          end do
-       end if famInclude
-    end do ! BocoLoop
-
-    ! Flag this set of fringes as being allocated
-    oFringe%allocated = .True.
-  end subroutine initializeOFringes
-
-  subroutine initializeOSurf(famList, oSurf, dualMesh, cluster)
-
-    ! This routine builds the ADT tree for any wall surfaces for the
-    ! block currently being pointed to by block Pointers.
-    use constants
-    use oversetData, only : oversetWall
-    use blockPointers, only : nBocos, BCData, BCFaceID, il, jl, kl, &
-         BCFaceID, x, BCType, rightHanded, nbklocal
-    use adtBuild, only : buildSerialQuad
-    use kdtree2_module, onlY : kdtree2_create
-    use oversetPackingRoutines, only : getWallSize
-    use sorting, only : famInList
-    implicit none
-
-    ! Input Params
-    integer(kind=intType), intent(in), dimension(:) :: famList
-    type(oversetWall), intent(inout) :: oSurf
-    logical, intent(in) :: dualMesh
-    integer(kind=intType), intent(in) :: cluster
-
-    ! Working paramters
-    integer(kind=intType) :: i, j, k, n, ii, jj, jjj, mm, ni, nj, nodeCount
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, nNodes, maxCells, nCells, iNode
-    logical :: regularOrdering
-
-    ! Set all the sizes for this block.
-    oSurf%il = il
-    oSurf%jl = jl
-    oSurf%kl = kl
-
-    call getWallSize(famList, nNodes, maxCells, dualMesh)
-
-    oSurf%nNodes = nNodes
-    oSurf%maxCells = maxCells
-    oSurf%cluster = cluster
-    ! Allocate space for the x array and connectivity array. cellPtr is
-    ! larger than necessary.
-    allocate(oSurf%x(3, nNodes), oSurf%conn(4, maxCells), &
-         oSurf%cellPtr(maxCells), oSurf%iBlank(maxCells), &
-         oSurf%delta(nNodes), oSurf%nte(4, nNodes))
-
-    ii = 0 ! Cumulative node counter
-    jj = 0 ! Cumulative cell counter (with iblanks)
-    jjj = 0 !Cumulative cell counter (without iblanks)
-    nodeCount = 0
-
-    do mm=1, nBocos
-       famInclude: if (famInList(BCData(mm)%famID, famList)) then
-
-          select case(BCFaceID(mm))
-          case(iMin, jMax, kMin)
-             regularOrdering = .True.
-          case default
-             regularOrdering = .False.
-          end select
-
-          ! Now this can be reversed *again* if we have a block that
-          ! is left handed.
-          if (.not. rightHanded) then
-             regularOrdering = .not. (regularOrdering)
-          end if
-
-          ! THIS IS SUPER IMPORTANT: It is absolutely critical that the
-          ! wall be built *FROM THE DUAL MESH!!* IT WILL NOT WORK IF YOU
-          ! USE THE PRIMAL MESH! The -1 for the node ranges below gives
-          ! the extra '1' node for the mesh formed from the dual cells.
-
-          dualCheck: if (dualMesh) then
-             jBeg = BCData(mm)%jnBeg-1 ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg-1 ; iEnd = BCData(mm)%inEnd
-             ! Now fill up the point array
-             do j=jBeg, jEnd
-                do i=iBeg, iEnd
-                   ii = ii +1
-                   select case(BCFaceID(mm))
-                   case(imin)
-                      oSurf%x(:,ii) = fourth*(x(1, i, j, :) + x(1, i+1, j, :) + &
-                           x(1, i, j+1, :) + x(1, i+1, j+1, :))
-                   case(imax)
-                      oSurf%x(:,ii) = fourth*(x(il, i, j, :) + x(il, i+1, j, :) + &
-                           x(il, i, j+1, :) + x(il, i+1, j+1, :))
-                   case(jmin)
-                      oSurf%x(:,ii) = fourth*(x(i, 1, j, :) + x(i+1, 1, j, :) + &
-                           x(i, 1, j+1, :) + x(i+1, 1, j+1, :))
-                   case(jmax)
-                      oSurf%x(:,ii) = fourth*(x(i, jl, j, :) + x(i+1, jl, j, :) + &
-                           x(i, jl, j+1, :) + x(i+1, jl, j+1, :))
-                   case(kmin)
-                      oSurf%x(:,ii) = fourth*(x(i, j, 1, :) + x(i+1, j, 1, :) + &
-                           x(i, j+1, 1, :) + x(i+1, j+1, 1, :))
-                   case(kmax)
-                      oSurf%x(:,ii) = fourth*(x(i, j, kl, :) + x(i+1, j, kl, :) + &
-                           x(i, j+1, kl, :) + x(i+1, j+1, kl, :))
-                   end select
+            end do
+        end do
+
+        !Copy over global cell
+        oBlock%globalCell = globalCell
+
+        ! Now setup the data for the ADT
+        nHexa = il * jl * kl
+        nADT = ie * je * ke
+
+        allocate (oBlock%xADT(3, nADT), oBlock%hexaConn(8, nHexa))
+        ! Fill up the xADT using cell centers (dual mesh)
+        mm = 0
+
+        ! Allocate the nearWall
+        allocate (oBlock%nearWall(1:il, 1:jl, 1:kl))
+        oBlock%nearWall = 0
+
+        allocate (nearWallTmp(1:ie, 1:je, 1:ke))
+        nearWallTmp = .False.
+
+        do k = 1, ke
+            do j = 1, je
+                do i = 1, ie
+                    mm = mm + 1
+                    xp = eighth * ( &
+                         x(i - 1, j - 1, k - 1, :) + &
+                         x(i, j - 1, k - 1, :) + &
+                         x(i - 1, j, k - 1, :) + &
+                         x(i, j, k - 1, :) + &
+                         x(i - 1, j - 1, k, :) + &
+                         x(i, j - 1, k, :) + &
+                         x(i - 1, j, k, :) + &
+                         x(i, j, k, :))
+                    oBlock%xADT(:, mm) = xp
+
+                    ! Determine if this point is near wall. Note that the
+                    ! boundary halos sill have xSeed as "large" so these won't
+                    ! be flagged as nearWall. We will account for this below.
+                    dist = mynorm2(xp - xSeed(i, j, k, :))
+                    if (dist < nearWallDist) then
+                        nearWallTmp(i, j, k) = .True.
+                    end if
                 end do
-             end do
-
-             ! Fill up the conn array. Note that don't take the
-             ! surface`normal direction (in or out) or the cell
-             ! handed-ness into account...it is not necessary since we
-             ! are just getting distance to the wall, which is
-             ! independent of the orientation.
-
-             ni = iEnd - iBeg + 1
-             nj = jEnd - jBeg + 1
-             do j=0, nj-2
-                do i=0, ni-2
-                   jj = jj + 1
-                   if (regularOrdering) then
-                      oSurf%conn(1, jj) = nodeCount + (j  )*ni + i + 1 ! n1
-                      oSurf%conn(2, jj) = nodeCount + (j  )*ni + i + 2 ! n2
-                      oSurf%conn(3, jj) = nodeCount + (j+1)*ni + i + 2 ! n3
-                      oSurf%conn(4, jj) = nodeCount + (j+1)*ni + i + 1 ! n4
-                   else
-                      oSurf%conn(1, jj) = nodeCount + (j  )*ni + i + 1 ! n1
-                      oSurf%conn(2, jj) = nodeCount + (j+1)*ni + i + 1 ! n4
-                      oSurf%conn(3, jj) = nodeCount + (j+1)*ni + i + 2 ! n3
-                      oSurf%conn(4, jj) = nodeCount + (j  )*ni + i + 2 ! n2
-                   end if
+            end do
+        end do
+
+        ! Now finally set the nearwall for the dual mesh cells. It is
+        ! considered a near wall if all "nodes" of the dual mesh cell are
+        ! also near wall. Have to be carful not to count boundary halos
+        ! since they do not have nearWallTmp Values.
+
+        do k = 1, kl
+            do j = 1, jl
+                do i = 1, il
+                    if ( &
+                        (nearWallTmp(i, j, k) .or. globalCell(i, j, k) < 0) .and. &
+                        (nearWallTmp(i + 1, j, k) .or. globalCell(i + 1, j, k) < 0) .and. &
+                        (nearWallTmp(i, j + 1, k) .or. globalCell(i, j + 1, k) < 0) .and. &
+                        (nearWallTmp(i + 1, j + 1, k) .or. globalCell(i + 1, j + 1, k) < 0) .and. &
+                        (nearWallTmp(i, j, k + 1) .or. globalCell(i, j, k + 1) < 0) .and. &
+                        (nearWallTmp(i + 1, j, k + 1) .or. globalCell(i + 1, j, k + 1) < 0) .and. &
+                        (nearWallTmp(i, j + 1, k + 1) .or. globalCell(i, j + 1, k + 1) < 0) .and. &
+                        (nearWallTmp(i + 1, j + 1, k + 1) .or. globalCell(i + 1, j + 1, k + 1) < 0)) then
+                        oBlock%nearWall(i, j, k) = 1
+                    end if
                 end do
-             end do
-             nodeCount = nodeCount + ni*nj
-
-             ! We don't care about iBlank, cellPtr or delta for the dual
-             ! mesh
-             oSurf%iBlank = 1
-             oSurf%cellPtr = 0
-             oSurf%delta = zero
-          else ! Using the primal mesh
-             jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-
-             ! Now fill up the point array. Owned node loop.
-             do j=jBeg, jEnd
-                do i=iBeg, iEnd
-                   ii = ii +1
-                   select case(BCFaceID(mm))
-                   case(imin)
-                      oSurf%x(:,ii) = x(1, i, j, :)
-                   case(imax)
-                      oSurf%x(:,ii) = x(il, i, j, :)
-                   case(jmin)
-                      oSurf%x(:,ii) = x(i, 1, j, :)
-                   case(jmax)
-                      oSurf%x(:,ii) = x(i, jl, j, :)
-                   case(kmin)
-                      oSurf%x(:,ii) = x(i, j, 1, :)
-                   case(kmax)
-                      oSurf%x(:,ii) = x(i, j, kl, :)
-                   end select
-                   oSurf%delta(ii) = BCData(mm)%deltaNode(i, j)
+            end do
+        end do
+
+        deallocate (nearWallTmp)
+        mm = 0
+        ! These are the 'elements' of the dual mesh.
+        planeOffset = ie * je
+        do k = 2, ke
+            do j = 2, je
+                do i = 2, ie
+                    mm = mm + 1
+                    oBlock%hexaConn(1, mm) = (k - 2) * planeOffset + (j - 2) * ie + (i - 2) + 1
+                    oBlock%hexaConn(2, mm) = oBlock%hexaConn(1, mm) + 1
+                    oBlock%hexaConn(3, mm) = oBlock%hexaConn(2, mm) + ie
+                    oBlock%hexaConn(4, mm) = oBlock%hexaConn(3, mm) - 1
+
+                    oBlock%hexaConn(5, mm) = oBlock%hexaConn(1, mm) + planeOffset
+                    oBlock%hexaConn(6, mm) = oBlock%hexaConn(2, mm) + planeOffset
+                    oBlock%hexaConn(7, mm) = oBlock%hexaConn(3, mm) + planeOffset
+                    oBlock%hexaConn(8, mm) = oBlock%hexaConn(4, mm) + planeOffset
                 end do
-             end do
-
-             ! Fill up the conn array being careful to *only* adding
-             ! cells that are not already blanked.
-             ni = iEnd - iBeg + 1
-             nj = jEnd - jBeg + 1
-             do j=0, nj-2
-                do i=0, ni-2
-                   jjj = jjj + 1
-                   oSurf%iBlank(jjj) = BCData(mm)%iblank(iBeg+i+1,jBeg+j+1)
-                   if (oSurf%iBlank(jjj) == 1) then
-                      jj = jj + 1
-                      if (regularOrdering) then
-                         oSurf%conn(1, jj) = nodeCount + (j  )*ni + i + 1 ! n1
-                         oSurf%conn(2, jj) = nodeCount + (j  )*ni + i + 2 ! n2
-                         oSurf%conn(3, jj) = nodeCount + (j+1)*ni + i + 2 ! n3
-                         oSurf%conn(4, jj) = nodeCount + (j+1)*ni + i + 1 ! n4
-                      else
-                         oSurf%conn(1, jj) = nodeCount + (j  )*ni + i + 1 ! n1
-                         oSurf%conn(2, jj) = nodeCount + (j+1)*ni + i + 1 ! n4
-                         oSurf%conn(3, jj) = nodeCount + (j+1)*ni + i + 2 ! n3
-                         oSurf%conn(4, jj) = nodeCount + (j  )*ni + i + 2 ! n2
-                      end if
-                      oSurf%cellPtr(jj) = jjj
-                   end if
+            end do
+        end do
+
+        ! Call the custom build routine -- Serial only, only Hexa volumes,
+        ! we supply our own ADT Type
+
+        call buildSerialHex(nHexa, nADT, oBlock%xADT, oBlock%hexaConn, oBlock%ADT)
+
+        ! Flag this block as being allocated
+        oBlock%allocated = .True.
+
+    end subroutine initializeOBlock
+
+    subroutine initializeOFringes(oFringe, nn, famList)
+
+        ! This subroutine initializes the fringe information for the given
+        ! block, level and spectral instance. It is assumed that
+        ! blockPointers are already set.
+        use constants
+        use communication, only: myID
+        use blockPointers
+        use oversetData, only: oversetFringe, clusters, cumDomProc
+        use stencils, only: visc_drdw_stencil, N_visc_drdw
+        use inputOverset, only: backgroundVolScale
+        use sorting, only: famInList
+        use oversetUtilities, only: wallsOnBlock, windIndex, unwindindex
+        implicit none
+
+        ! Input Params
+        type(oversetFringe), intent(inout) :: oFringe
+        integer(kind=intType), intent(in) :: nn
+        integer(kind=intType), intent(in), dimension(:) :: famList
+        ! Working Params
+        integer(kind=intTYpe) :: i, j, k, mm, iDim, ii, jj, kk, iii, jjj, myI, myJ, myK
+        integer(kind=intTYpe) :: iStart, iEnd, jStart, jEnd, kStart, kEnd
+        logical :: wallsPresent
+        integer(kind=intType) :: i_stencil
+        real(kind=realType) :: dist, frac, xp(3)
+        ! Check if we have walls:
+        call wallsOnBLock(wallsPresent)
+
+        ! Set the sizes for the oFringe and allocate the required space.
+        oFringe%il = il
+        oFringe%jl = jl
+        oFringe%kl = kl
+        oFringe%nx = nx
+        oFringe%ny = ny
+        oFringe%nz = nz
+        oFringe%block = nn
+        oFringe%cluster = clusters(cumDomProc(myid) + nn)
+        oFringe%proc = myid
+
+        mm = nx * ny * nz
+        allocate (oFringe%x(3, mm))
+        allocate (oFringe%xSeed(3, mm))
+        allocate (oFringe%wallInd(mm))
+        allocate (oFringe%isWall(mm))
+        oFringe%isWall = 0
+        oFringe%xSeed = large
+        oFringe%wallInd = 0
+
+        ! Assume each cell will get just one donor. It's just a guess, it
+        ! will be expanded if necessary so the exact value doesn't matter.
+        allocate (oFringe%fringeIntBuffer(5, mm), ofringe%fringeRealBuffer(4, mm))
+
+        oFringe%nDonor = 0
+        ! Now loop over the actual compute cells, setting the cell center
+        ! value 'x', the volume and flag these cells as compute
+        ii = 0
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    ii = ii + 1
+                    do iDim = 1, 3
+                        oFringe%x(iDim, ii) = eighth * ( &
+                                              x(i - 1, j - 1, k - 1, iDim) + &
+                                              x(i, j - 1, k - 1, iDim) + &
+                                              x(i - 1, j, k - 1, iDim) + &
+                                              x(i, j, k - 1, iDim) + &
+                                              x(i - 1, j - 1, k, iDim) + &
+                                              x(i, j - 1, k, iDim) + &
+                                              x(i - 1, j, k, iDim) + &
+                                              x(i, j, k, iDim))
+                    end do
+                    oFringe%xSeed(:, ii) = xSeed(i, j, k, :)
+                    oFringe%wallInd(ii) = wallInd(i, j, k)
+                    oFringe%fringeIntBuffer(4, ii) = nn
+                    oFringe%fringeIntBuffer(5, ii) = windIndex(i, j, k, il, jl, kl)
+
+                end do
+            end do
+        end do
+
+        ! We also need to flag a single layer of cells next a wall
+        ! boundary condition as being "isWall". This information is
+        ! necessary to be able to determine the "wall donors" which are
+        ! the flood seeds.
+
+        do mm = 1, nBocos
+            select case (BCFaceID(mm))
+            case (iMin)
+                iStart = 2; iEnd = 2; 
+                jStart = BCData(mm)%inBeg + 1; jEnd = BCData(mm)%inEnd
+                kStart = BCData(mm)%jnBeg + 1; kEnd = BCData(mm)%jnEnd
+            case (iMax)
+                iStart = il; iEnd = il; 
+                jStart = BCData(mm)%inBeg + 1; jEnd = BCData(mm)%inEnd
+                kStart = BCData(mm)%jnBeg + 1; kEnd = BCData(mm)%jnEnd
+            case (jMin)
+                iStart = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+                jStart = 2; jEnd = 2; 
+                kStart = BCData(mm)%jnBeg + 1; kEnd = BCData(mm)%jnEnd
+            case (jMax)
+                iStart = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+                jStart = jl; jEnd = jl; 
+                kStart = BCData(mm)%jnBeg + 1; kEnd = BCData(mm)%jnEnd
+            case (kMin)
+                iStart = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+                jStart = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                kStart = 2; kEnd = 2; 
+            case (kMax)
+                iStart = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+                jStart = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                kStart = kl; kEnd = kl; 
+            end select
+
+            famInclude: if (famInList(BCdata(mm)%famID, famList)) then
+                do k = kStart, kEnd
+                    do j = jStart, jEnd
+                        do i = iStart, iEnd
+                            ! Recompute the index
+                            ii = (k - 2) * nx * ny + (j - 2) * nx + (i - 2) + 1
+                            oFringe%isWall(ii) = bcFaceID(mm)
+                        end do
+                    end do
                 end do
-             end do
-             nodeCount = nodeCount + ni*nj
-          end if dualCheck
-       end if famInclude
-    end do
-
-    ! Set the actual number of cells
-    oSurf%nCells = jj
-
-    ! Build the tree itself.
-    call buildSerialQuad(oSurf%nCells, nNodes, oSurf%x, oSurf%conn, oSurf%ADT)
-
-    ! Build the KDTree
-    if (oSurf%nNodes > 0) then
-       oSurf%tree => kdtree2_create(oSurf%x)
-    end if
-
-    ! Build the inverse of the connectivity, the nodeToElem array.
-    oSurf%nte = 0
-    do i=1, oSurf%nCells
-       do j=1, 4
-          n = oSurf%conn(j, i)
-          inner:do k=1, 4
-             if (oSurf%nte(k, n) == 0) then
-                oSurf%nte(k, n) = i
-                exit inner
-             end if
-          end do inner
-       end do
-    end do
-
-    ! Flag this wall as being allocated
-    oSurf%allocated = .True.
-
-  end subroutine initializeOSurf
+            end if famInclude
+        end do ! BocoLoop
+
+        ! Flag this set of fringes as being allocated
+        oFringe%allocated = .True.
+    end subroutine initializeOFringes
+
+    subroutine initializeOSurf(famList, oSurf, dualMesh, cluster)
+
+        ! This routine builds the ADT tree for any wall surfaces for the
+        ! block currently being pointed to by block Pointers.
+        use constants
+        use oversetData, only: oversetWall
+        use blockPointers, only: nBocos, BCData, BCFaceID, il, jl, kl, &
+                                 BCFaceID, x, BCType, rightHanded, nbklocal
+        use adtBuild, only: buildSerialQuad
+        use kdtree2_module, onlY: kdtree2_create
+        use oversetPackingRoutines, only: getWallSize
+        use sorting, only: famInList
+        implicit none
+
+        ! Input Params
+        integer(kind=intType), intent(in), dimension(:) :: famList
+        type(oversetWall), intent(inout) :: oSurf
+        logical, intent(in) :: dualMesh
+        integer(kind=intType), intent(in) :: cluster
+
+        ! Working paramters
+        integer(kind=intType) :: i, j, k, n, ii, jj, jjj, mm, ni, nj, nodeCount
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, nNodes, maxCells, nCells, iNode
+        logical :: regularOrdering
+
+        ! Set all the sizes for this block.
+        oSurf%il = il
+        oSurf%jl = jl
+        oSurf%kl = kl
+
+        call getWallSize(famList, nNodes, maxCells, dualMesh)
+
+        oSurf%nNodes = nNodes
+        oSurf%maxCells = maxCells
+        oSurf%cluster = cluster
+        ! Allocate space for the x array and connectivity array. cellPtr is
+        ! larger than necessary.
+        allocate (oSurf%x(3, nNodes), oSurf%conn(4, maxCells), &
+                  oSurf%cellPtr(maxCells), oSurf%iBlank(maxCells), &
+                  oSurf%delta(nNodes), oSurf%nte(4, nNodes))
+
+        ii = 0 ! Cumulative node counter
+        jj = 0 ! Cumulative cell counter (with iblanks)
+        jjj = 0 !Cumulative cell counter (without iblanks)
+        nodeCount = 0
+
+        do mm = 1, nBocos
+            famInclude: if (famInList(BCData(mm)%famID, famList)) then
+
+                select case (BCFaceID(mm))
+                case (iMin, jMax, kMin)
+                    regularOrdering = .True.
+                case default
+                    regularOrdering = .False.
+                end select
+
+                ! Now this can be reversed *again* if we have a block that
+                ! is left handed.
+                if (.not. rightHanded) then
+                    regularOrdering = .not. (regularOrdering)
+                end if
+
+                ! THIS IS SUPER IMPORTANT: It is absolutely critical that the
+                ! wall be built *FROM THE DUAL MESH!!* IT WILL NOT WORK IF YOU
+                ! USE THE PRIMAL MESH! The -1 for the node ranges below gives
+                ! the extra '1' node for the mesh formed from the dual cells.
+
+                dualCheck: if (dualMesh) then
+                    jBeg = BCData(mm)%jnBeg - 1; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg - 1; iEnd = BCData(mm)%inEnd
+                    ! Now fill up the point array
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            ii = ii + 1
+                            select case (BCFaceID(mm))
+                            case (imin)
+                                oSurf%x(:, ii) = fourth * (x(1, i, j, :) + x(1, i + 1, j, :) + &
+                                                           x(1, i, j + 1, :) + x(1, i + 1, j + 1, :))
+                            case (imax)
+                                oSurf%x(:, ii) = fourth * (x(il, i, j, :) + x(il, i + 1, j, :) + &
+                                                           x(il, i, j + 1, :) + x(il, i + 1, j + 1, :))
+                            case (jmin)
+                                oSurf%x(:, ii) = fourth * (x(i, 1, j, :) + x(i + 1, 1, j, :) + &
+                                                           x(i, 1, j + 1, :) + x(i + 1, 1, j + 1, :))
+                            case (jmax)
+                                oSurf%x(:, ii) = fourth * (x(i, jl, j, :) + x(i + 1, jl, j, :) + &
+                                                           x(i, jl, j + 1, :) + x(i + 1, jl, j + 1, :))
+                            case (kmin)
+                                oSurf%x(:, ii) = fourth * (x(i, j, 1, :) + x(i + 1, j, 1, :) + &
+                                                           x(i, j + 1, 1, :) + x(i + 1, j + 1, 1, :))
+                            case (kmax)
+                                oSurf%x(:, ii) = fourth * (x(i, j, kl, :) + x(i + 1, j, kl, :) + &
+                                                           x(i, j + 1, kl, :) + x(i + 1, j + 1, kl, :))
+                            end select
+                        end do
+                    end do
+
+                    ! Fill up the conn array. Note that don't take the
+                    ! surface`normal direction (in or out) or the cell
+                    ! handed-ness into account...it is not necessary since we
+                    ! are just getting distance to the wall, which is
+                    ! independent of the orientation.
+
+                    ni = iEnd - iBeg + 1
+                    nj = jEnd - jBeg + 1
+                    do j = 0, nj - 2
+                        do i = 0, ni - 2
+                            jj = jj + 1
+                            if (regularOrdering) then
+                                oSurf%conn(1, jj) = nodeCount + (j) * ni + i + 1 ! n1
+                                oSurf%conn(2, jj) = nodeCount + (j) * ni + i + 2 ! n2
+                                oSurf%conn(3, jj) = nodeCount + (j + 1) * ni + i + 2 ! n3
+                                oSurf%conn(4, jj) = nodeCount + (j + 1) * ni + i + 1 ! n4
+                            else
+                                oSurf%conn(1, jj) = nodeCount + (j) * ni + i + 1 ! n1
+                                oSurf%conn(2, jj) = nodeCount + (j + 1) * ni + i + 1 ! n4
+                                oSurf%conn(3, jj) = nodeCount + (j + 1) * ni + i + 2 ! n3
+                                oSurf%conn(4, jj) = nodeCount + (j) * ni + i + 2 ! n2
+                            end if
+                        end do
+                    end do
+                    nodeCount = nodeCount + ni * nj
+
+                    ! We don't care about iBlank, cellPtr or delta for the dual
+                    ! mesh
+                    oSurf%iBlank = 1
+                    oSurf%cellPtr = 0
+                    oSurf%delta = zero
+                else ! Using the primal mesh
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                    ! Now fill up the point array. Owned node loop.
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            ii = ii + 1
+                            select case (BCFaceID(mm))
+                            case (imin)
+                                oSurf%x(:, ii) = x(1, i, j, :)
+                            case (imax)
+                                oSurf%x(:, ii) = x(il, i, j, :)
+                            case (jmin)
+                                oSurf%x(:, ii) = x(i, 1, j, :)
+                            case (jmax)
+                                oSurf%x(:, ii) = x(i, jl, j, :)
+                            case (kmin)
+                                oSurf%x(:, ii) = x(i, j, 1, :)
+                            case (kmax)
+                                oSurf%x(:, ii) = x(i, j, kl, :)
+                            end select
+                            oSurf%delta(ii) = BCData(mm)%deltaNode(i, j)
+                        end do
+                    end do
+
+                    ! Fill up the conn array being careful to *only* adding
+                    ! cells that are not already blanked.
+                    ni = iEnd - iBeg + 1
+                    nj = jEnd - jBeg + 1
+                    do j = 0, nj - 2
+                        do i = 0, ni - 2
+                            jjj = jjj + 1
+                            oSurf%iBlank(jjj) = BCData(mm)%iblank(iBeg + i + 1, jBeg + j + 1)
+                            if (oSurf%iBlank(jjj) == 1) then
+                                jj = jj + 1
+                                if (regularOrdering) then
+                                    oSurf%conn(1, jj) = nodeCount + (j) * ni + i + 1 ! n1
+                                    oSurf%conn(2, jj) = nodeCount + (j) * ni + i + 2 ! n2
+                                    oSurf%conn(3, jj) = nodeCount + (j + 1) * ni + i + 2 ! n3
+                                    oSurf%conn(4, jj) = nodeCount + (j + 1) * ni + i + 1 ! n4
+                                else
+                                    oSurf%conn(1, jj) = nodeCount + (j) * ni + i + 1 ! n1
+                                    oSurf%conn(2, jj) = nodeCount + (j + 1) * ni + i + 1 ! n4
+                                    oSurf%conn(3, jj) = nodeCount + (j + 1) * ni + i + 2 ! n3
+                                    oSurf%conn(4, jj) = nodeCount + (j) * ni + i + 2 ! n2
+                                end if
+                                oSurf%cellPtr(jj) = jjj
+                            end if
+                        end do
+                    end do
+                    nodeCount = nodeCount + ni * nj
+                end if dualCheck
+            end if famInclude
+        end do
+
+        ! Set the actual number of cells
+        oSurf%nCells = jj
+
+        ! Build the tree itself.
+        call buildSerialQuad(oSurf%nCells, nNodes, oSurf%x, oSurf%conn, oSurf%ADT)
+
+        ! Build the KDTree
+        if (oSurf%nNodes > 0) then
+            oSurf%tree => kdtree2_create(oSurf%x)
+        end if
+
+        ! Build the inverse of the connectivity, the nodeToElem array.
+        oSurf%nte = 0
+        do i = 1, oSurf%nCells
+            do j = 1, 4
+                n = oSurf%conn(j, i)
+                inner: do k = 1, 4
+                    if (oSurf%nte(k, n) == 0) then
+                        oSurf%nte(k, n) = i
+                        exit inner
+                    end if
+                end do inner
+            end do
+        end do
+
+        ! Flag this wall as being allocated
+        oSurf%allocated = .True.
+
+    end subroutine initializeOSurf
 
 end module oversetInitialization
diff --git a/src/overset/oversetPackingRoutines.F90 b/src/overset/oversetPackingRoutines.F90
index b8fe01fd7..1e2924bf9 100644
--- a/src/overset/oversetPackingRoutines.F90
+++ b/src/overset/oversetPackingRoutines.F90
@@ -1,836 +1,835 @@
 module oversetPackingRoutines
 contains
 
-  subroutine getOBlockBufferSizes(il, jl, kl, iSize, rSize)
+    subroutine getOBlockBufferSizes(il, jl, kl, iSize, rSize)
 
-    ! Subroutine to get the required buffer sizes. This only uses the
-    ! block dimensions and technically doesn't have anything to do with
-    ! oBlock. This allows us to figure out the sizes and perform a
-    ! global communication before actually building the ADTrees, which
-    ! may not be that well load-balanced.
+        ! Subroutine to get the required buffer sizes. This only uses the
+        ! block dimensions and technically doesn't have anything to do with
+        ! oBlock. This allows us to figure out the sizes and perform a
+        ! global communication before actually building the ADTrees, which
+        ! may not be that well load-balanced.
 
-    use constants
-    implicit none
+        use constants
+        implicit none
 
-    ! Input/OUtput
-    integer(kind=intType) :: il, jl ,kl
-    integer(kind=intType) :: rSize, iSize
+        ! Input/OUtput
+        integer(kind=intType) :: il, jl, kl
+        integer(kind=intType) :: rSize, iSize
 
-    ! Working paramters
-    integer(kind=intType) :: ie, je, ke, nBBox, nLeaves
+        ! Working paramters
+        integer(kind=intType) :: ie, je, ke, nBBox, nLeaves
 
-    ! Initializeation
-    iSize = 0
-    rSize = 0
+        ! Initializeation
+        iSize = 0
+        rSize = 0
 
-    ! Create ie sizes as well
-    ie = il + 1; je = jl + 1; ke = kl+1
+        ! Create ie sizes as well
+        ie = il + 1; je = jl + 1; ke = kl + 1
 
-    ! Count up the integers we want to send:
+        ! Count up the integers we want to send:
 
-    iSize = iSize + 6 ! Blocks sizes + proc + nn + cluster
+        iSize = iSize + 6 ! Blocks sizes + proc + nn + cluster
 
-    iSize = iSize + 8*il*jl*kl ! hexa conn
+        iSize = iSize + 8 * il * jl * kl ! hexa conn
 
-    iSize = iSize + (ie+2)*(je+2)*(ke+2) ! global cell
+        iSize = iSize + (ie + 2) * (je + 2) * (ke + 2) ! global cell
 
-    iSize = iSize + il*jl*kl ! nearWall
+        iSize = iSize + il * jl * kl ! nearWall
 
-    iSize = iSize + ie*je*ke ! invalidDonor
+        iSize = iSize + ie * je * ke ! invalidDonor
 
-    ! Number of boxes in the ADT is the same as the number of elements
-    ! (like hexa conn (without the *8 obviously)
-    nBBox = il * jl * kl
-    nLeaves = nBBox-1 ! See ADT/adtBuild.f90
+        ! Number of boxes in the ADT is the same as the number of elements
+        ! (like hexa conn (without the *8 obviously)
+        nBBox = il * jl * kl
+        nLeaves = nBBox - 1 ! See ADT/adtBuild.f90
 
-    iSize = iSize + nLeaves*2 ! Two ints for the children in each leaf
+        iSize = iSize + nLeaves * 2 ! Two ints for the children in each leaf
 
-    ! Count up the reals we ned to send:
-    rSize = rSize + ie*je*ke ! qualDonor
+        ! Count up the reals we ned to send:
+        rSize = rSize + ie * je * ke ! qualDonor
 
-    rSize = rSize + 3*ie*je*ke ! xADT
+        rSize = rSize + 3 * ie * je * ke ! xADT
 
-    rSize = rSize + nBBox*6 ! Cell bounding boxes
+        rSize = rSize + nBBox * 6 ! Cell bounding boxes
 
-    rSize = rSize + nLeaves*12 ! Bounding boxes for leaves
+        rSize = rSize + nLeaves * 12 ! Bounding boxes for leaves
 
-    rSize = rSize + 1 ! Min block volume
-  end subroutine getOBlockBufferSizes
+        rSize = rSize + 1 ! Min block volume
+    end subroutine getOBlockBufferSizes
 
-  subroutine packOBlock(oBlock)
+    subroutine packOBlock(oBlock)
 
-    use constants
-    use oversetData, only : oversetBlock
-    implicit none
+        use constants
+        use oversetData, only: oversetBlock
+        implicit none
 
-    ! Pack up everything we need for this block into its own buffer
-    ! inlucding the data required for the ADTree
+        ! Pack up everything we need for this block into its own buffer
+        ! inlucding the data required for the ADTree
 
-    ! Input/Output Parameters
-    type(oversetBlock), intent(inout) :: oBlock
+        ! Input/Output Parameters
+        type(oversetBlock), intent(inout) :: oBlock
 
-    ! Working paramters
-    integer(kind=intType) :: rSize, iSize, i, j, k, nHexa, nADT
-    integer(kind=intType) :: ie, je, ke, il, jl ,kl
+        ! Working paramters
+        integer(kind=intType) :: rSize, iSize, i, j, k, nHexa, nADT
+        integer(kind=intType) :: ie, je, ke, il, jl, kl
 
-    ! If the buffer is already allocated, the block is packed and there
-    ! is nothing to do
-    if (allocated(oBlock%rBuffer)) then
-       return
-    end if
+        ! If the buffer is already allocated, the block is packed and there
+        ! is nothing to do
+        if (allocated(oBlock%rBuffer)) then
+            return
+        end if
 
-    call getOBlockBufferSizes(oBlock%il, oBlock%jl, oBlock%kl, iSize, rSize)
+        call getOBlockBufferSizes(oBlock%il, oBlock%jl, oBlock%kl, iSize, rSize)
 
-    ! Allocate the buffers
-    allocate(oBlock%rBuffer(rSize), oBlock%iBuffer(iSize))
+        ! Allocate the buffers
+        allocate (oBlock%rBuffer(rSize), oBlock%iBuffer(iSize))
 
-    ! Reset the integer counter and add all the integers on this pass
-    iSize = 0
+        ! Reset the integer counter and add all the integers on this pass
+        iSize = 0
 
-    oBlock%iBuffer(1) = oBlock%il
-    oBlock%iBuffer(2) = oBlock%jl
-    oBlock%iBuffer(3) = oBlock%kl
-    oBlock%iBuffer(4) = oBlock%proc
-    oBlock%iBuffer(5) = oBlock%block
-    oBlock%iBuffer(6) = oBlock%cluster
+        oBlock%iBuffer(1) = oBlock%il
+        oBlock%iBuffer(2) = oBlock%jl
+        oBlock%iBuffer(3) = oBlock%kl
+        oBlock%iBuffer(4) = oBlock%proc
+        oBlock%iBuffer(5) = oBlock%block
+        oBlock%iBuffer(6) = oBlock%cluster
 
-    iSize = iSize + 6
+        iSize = iSize + 6
 
-    il = oBlock%il
-    jl = oBlock%jl
-    kl = oBlock%kl
+        il = oBlock%il
+        jl = oBlock%jl
+        kl = oBlock%kl
 
-    ie = il + 1
-    je = jl + 1
-    ke = kl + 1
+        ie = il + 1
+        je = jl + 1
+        ke = kl + 1
 
-    nHexa = oBlock%il * oBlock%jl * oBlock%kl
-    nADT = ie*je*ke
+        nHexa = oBlock%il * oBlock%jl * oBlock%kl
+        nADT = ie * je * ke
 
-    do j=1, nHexa
-       do i=1, 8
-          iSize = iSize + 1
-          oBlock%iBuffer(iSize) = oBlock%hexaConn(i, j)
-       end do
-    end do
+        do j = 1, nHexa
+            do i = 1, 8
+                iSize = iSize + 1
+                oBlock%iBuffer(iSize) = oBlock%hexaConn(i, j)
+            end do
+        end do
 
-    do k=0, ke+1
-       do j=0, je+1
-          do i=0, ie+1
-             iSize = iSize + 1
-             oBlock%iBuffer(iSize) = oBlock%globalCell(i, j, k)
-          end do
-       end do
-    end do
+        do k = 0, ke + 1
+            do j = 0, je + 1
+                do i = 0, ie + 1
+                    iSize = iSize + 1
+                    oBlock%iBuffer(iSize) = oBlock%globalCell(i, j, k)
+                end do
+            end do
+        end do
 
-    do k=1, kl
-       do j=1, jl
-          do i=1, il
-             iSize = iSize + 1
-             oBlock%iBuffer(iSize) = oBlock%nearWall(i, j, k)
-          end do
-       end do
-    end do
-
-    do k=1, ke
-       do j=1, je
-          do i=1, ie
-             iSize = iSize + 1
-             oBlock%iBuffer(iSize) = oBlock%invalidDonor(i, j, k)
-          end do
-       end do
-    end do
-
-    do i=1, oBlock%ADT%nLeaves
-       iSize = iSize + 1
-       oBlock%iBuffer(iSize) = oBlock%ADT%ADTree(i)%children(1)
-       iSize = iSize + 1
-       oBlock%iBuffer(iSize) = oBlock%ADT%ADTree(i)%children(2)
-    end do
-
-    ! Reset the real counter and add all the real values on this pass.
-    rSize = 0
-
-    do i=1, ie*je*ke
-       rSize = rSize + 1
-       oBlock%rBuffer(rSize) = oBlock%qualDonor(1, i)
-    end do
-
-    do j=1, ie*je*ke
-       do i=1, 3
-          rSize = rSize + 1
-          oBlock%rBuffer(rSize) = oBlock%xADT(i, j)
-       end do
-    end do
-
-    do i=1, oBlock%ADT%nBboxes
-       oBlock%rBuffer(rSize+1:rSize+6) = oBlock%ADT%xBBox(:, i)
-       rSize = rSize + 6
-    end do
-
-    do i=1, oBlock%ADT%nLeaves
-       oBlock%rBuffer(rSize+1:rSize+6) = oBlock%ADT%ADTree(i)%xMin(:)
-       rSize = rSize + 6
-
-       oBlock%rBuffer(rSize+1:rSize+6) = oBlock%ADT%ADTree(i)%xMax(:)
-       rSize = rSize + 6
-    end do
-
-    rSize = rSize + 1
-    oBlock%rBuffer(rSize) = oBlock%minVol
-
-  end subroutine packOBlock
-
-  subroutine unpackOBlock(oBlock)
-
-    use constants
-    use oversetData, only : oversetBlock
-    implicit none
-
-    ! unPack everything we need for this block from its own buffer
-    ! and reconstitute the data required for the ADTree. It is assumed
-    ! the buffers are already allocated and the data is available. This
-    ! does the exact *OPPOSITE* operation as the packBlock() routine
-
-    ! Input/Output Parameters
-    type(oversetBlock), intent(inout) :: oBlock
-
-    ! Working paramters
-    integer(kind=intType) :: rSize, iSize, i, j, k, nHexa, nADT
-    integer(kind=intType) :: ie, je, ke, il, jl, kl
-
-    ! Reset the integer counter and add all the integers on this pass
-    iSize = 0
-
-    oBlock%il = oBlock%iBuffer(1)
-    oBlock%jl = oBlock%iBuffer(2)
-    oBlock%kl = oBlock%iBuffer(3)
-    oBlock%proc = oBlock%iBuffer(4)
-    oBlock%block = oBlock%iBuffer(5)
-    oBlock%cluster = oBlock%iBuffer(6)
-    iSize = iSize + 6
-
-    il = oBlock%il
-    jl = oBlock%jl
-    kl = oBlock%kl
-    ie = il + 1
-    je = jl + 1
-    ke = kl + 1
-
-    nHexa = oBlock%il * oBlock%jl * oBlock%kl
-    nADT = ie*je*ke
-
-    ! Allocate the remainder of the arrays in oBlock.
-    allocate(oBlock%hexaConn(8, nHexa))
-    allocate(oBlock%globalCell(0:ie+1, 0:je+1, 0:ke+1))
-    allocate(oBlock%nearWall(1:il, 1:jl, 1:kl))
-    allocate(oBlock%invalidDonor(1:ie, 1:je, 1:ke))
-    allocate(oBlock%qualDonor(1, ie * je * ke))
-    allocate(oBlock%xADT(3, nADT))
-
-    ! -------------------------------------------------------------------
-    ! Once we know the sizes, allocate all the arrays in the
-    ! ADTree. Since we are not going to call the *actual* build routine
-    ! for the ADT, we need to set all the information ourselves. This
-    ! essentially does the same thing as buildSerialHex.
-    oBlock%ADT%adtType = adtVolumeADT
-    oBlock%ADT%nNodes = nADT
-    oBlock%ADT%nTetra = 0
-    oBlock%ADT%nPyra = 0
-    oBlock%ADT%nPrisms = 0
-    oBlock%ADT%nTria = 0
-    oBlock%ADT%nQuads = 0
-    oBlock%ADT%coor => oBlock%xADT
-    oBlock%ADT%hexaConn => oBlock%hexaConn
-    nullify(oBlock%ADT%tetraConn, oBlock%ADT%pyraConn, oBlock%ADT%prismsConn)
-    oBlock%ADT%nBBoxes = nHexa
-    allocate(oBlock%ADT%xBBOX(6, nHexa))
-    allocate(oBlock%ADT%elementType(nHexa))
-    allocate(oBlock%ADT%elementID(nHexa))
-    oBlock%ADT%comm = MPI_COMM_SELF
-    oBlock%ADT%nProcs = 1
-    oBlock%ADT%myID = 0
-
-    ! All hexas
-    oBlock%ADT%elementType = adtHexahedron
-
-    do i=1,nHexa
-       oBlock%ADT%elementID(i) = i
-    end do
-
-    oBlock%ADT%nLeaves = oBlock%ADT%nBBoxes - 1
-    if(oBlock%ADT%nBBoxes <= 1) oBlock%ADT%nLeaves = oBlock%ADT%nLeaves + 1
-    allocate(oBlock%ADT%ADTree(oBlock%ADT%nLeaves))
-
-    ! -------------------------------------------------------------------
-
-    ! Now continue copying out the integer values
-    do i=1, nHexa
-       do j=1, 8
-          iSize = iSize + 1
-          oBlock%hexaConn(j, i) = oBlock%iBuffer(iSize)
-       end do
-    end do
-
-    do k=0, ke+1
-       do j=0, je+1
-          do i=0, ie+1
-             iSize = iSize + 1
-             oBlock%globalCell(i, j, k) = oBlock%iBuffer(iSize)
-          end do
-       end do
-    end do
-
-    do k=1, kl
-       do j=1, jl
-          do i=1, il
-             iSize = iSize + 1
-             oBlock%nearWall(i, j, k) = oBlock%iBuffer(iSize)
-          end do
-       end do
-    end do
-
-    do k=1, ke
-       do j=1, je
-          do i=1, ie
-             iSize = iSize + 1
-             oBlock%invalidDonor(i, j, k) = oBlock%iBuffer(iSize)
-          end do
-       end do
-    end do
-
-    do i=1, oBlock%ADT%nLeaves
-       iSize = iSize + 1
-       oBlock%ADT%ADTree(i)%children(1) = oBlock%iBuffer(iSize)
-       iSize = iSize + 1
-       oBlock%ADT%ADTree(i)%children(2) = oBlock%iBuffer(iSize)
-    end do
-
-    ! Now copy out the real values
-    rSize = 0
-
-    do i=1, ie*je*ke
-       rSize = rSize + 1
-       oBlock%qualDonor(1, i) =  oBlock%rBuffer(rSize)
-    end do
-
-    do j=1, ie*je*ke
-       do i=1, 3
-          rSize = rSize + 1
-          oBlock%xADT(i, j) = oBlock%rBuffer(rSize)
-       end do
-    end do
-
-    do i=1, oBlock%ADT%nBboxes
-       oBlock%ADT%xBBox(:, i) = oBlock%rBuffer(rSize+1:rSize+6)
-       rSize = rSize + 6
-    end do
-
-    do i=1, oBlock%ADT%nLeaves
-       oBlock%ADT%ADTree(i)%xMin(:) = oBlock%rBuffer(rSize+1:rSize+6)
-       rSize = rSize + 6
-
-       oBlock%ADT%ADTree(i)%xMax(:) = oBlock%rBuffer(rSize+1:rSize+6)
-       rSize = rSize + 6
-    end do
-
-    rSize = rSize + 1
-    oBlock%minVol = oBlock%rBuffer(rSize)
-
-    ! Flag this oBlock as being allocated:
-    oBlock%allocated = .True.
-    deallocate(oBlock%iBuffer, oBlock%rBuffer)
-
-  end subroutine unpackOBlock
-
-  subroutine getOFringeBufferSizes(il, jl, kl, iSize, rSize)
-
-    ! Subroutine to get the required buffer sizes. This one is pretty
-    ! easy, but we use a routine to make it look the same as for the
-    ! oBlock.
+        do k = 1, kl
+            do j = 1, jl
+                do i = 1, il
+                    iSize = iSize + 1
+                    oBlock%iBuffer(iSize) = oBlock%nearWall(i, j, k)
+                end do
+            end do
+        end do
+
+        do k = 1, ke
+            do j = 1, je
+                do i = 1, ie
+                    iSize = iSize + 1
+                    oBlock%iBuffer(iSize) = oBlock%invalidDonor(i, j, k)
+                end do
+            end do
+        end do
+
+        do i = 1, oBlock%ADT%nLeaves
+            iSize = iSize + 1
+            oBlock%iBuffer(iSize) = oBlock%ADT%ADTree(i)%children(1)
+            iSize = iSize + 1
+            oBlock%iBuffer(iSize) = oBlock%ADT%ADTree(i)%children(2)
+        end do
+
+        ! Reset the real counter and add all the real values on this pass.
+        rSize = 0
+
+        do i = 1, ie * je * ke
+            rSize = rSize + 1
+            oBlock%rBuffer(rSize) = oBlock%qualDonor(1, i)
+        end do
+
+        do j = 1, ie * je * ke
+            do i = 1, 3
+                rSize = rSize + 1
+                oBlock%rBuffer(rSize) = oBlock%xADT(i, j)
+            end do
+        end do
+
+        do i = 1, oBlock%ADT%nBboxes
+            oBlock%rBuffer(rSize + 1:rSize + 6) = oBlock%ADT%xBBox(:, i)
+            rSize = rSize + 6
+        end do
+
+        do i = 1, oBlock%ADT%nLeaves
+            oBlock%rBuffer(rSize + 1:rSize + 6) = oBlock%ADT%ADTree(i)%xMin(:)
+            rSize = rSize + 6
+
+            oBlock%rBuffer(rSize + 1:rSize + 6) = oBlock%ADT%ADTree(i)%xMax(:)
+            rSize = rSize + 6
+        end do
+
+        rSize = rSize + 1
+        oBlock%rBuffer(rSize) = oBlock%minVol
+
+    end subroutine packOBlock
+
+    subroutine unpackOBlock(oBlock)
+
+        use constants
+        use oversetData, only: oversetBlock
+        implicit none
+
+        ! unPack everything we need for this block from its own buffer
+        ! and reconstitute the data required for the ADTree. It is assumed
+        ! the buffers are already allocated and the data is available. This
+        ! does the exact *OPPOSITE* operation as the packBlock() routine
+
+        ! Input/Output Parameters
+        type(oversetBlock), intent(inout) :: oBlock
+
+        ! Working paramters
+        integer(kind=intType) :: rSize, iSize, i, j, k, nHexa, nADT
+        integer(kind=intType) :: ie, je, ke, il, jl, kl
+
+        ! Reset the integer counter and add all the integers on this pass
+        iSize = 0
+
+        oBlock%il = oBlock%iBuffer(1)
+        oBlock%jl = oBlock%iBuffer(2)
+        oBlock%kl = oBlock%iBuffer(3)
+        oBlock%proc = oBlock%iBuffer(4)
+        oBlock%block = oBlock%iBuffer(5)
+        oBlock%cluster = oBlock%iBuffer(6)
+        iSize = iSize + 6
+
+        il = oBlock%il
+        jl = oBlock%jl
+        kl = oBlock%kl
+        ie = il + 1
+        je = jl + 1
+        ke = kl + 1
+
+        nHexa = oBlock%il * oBlock%jl * oBlock%kl
+        nADT = ie * je * ke
+
+        ! Allocate the remainder of the arrays in oBlock.
+        allocate (oBlock%hexaConn(8, nHexa))
+        allocate (oBlock%globalCell(0:ie + 1, 0:je + 1, 0:ke + 1))
+        allocate (oBlock%nearWall(1:il, 1:jl, 1:kl))
+        allocate (oBlock%invalidDonor(1:ie, 1:je, 1:ke))
+        allocate (oBlock%qualDonor(1, ie * je * ke))
+        allocate (oBlock%xADT(3, nADT))
+
+        ! -------------------------------------------------------------------
+        ! Once we know the sizes, allocate all the arrays in the
+        ! ADTree. Since we are not going to call the *actual* build routine
+        ! for the ADT, we need to set all the information ourselves. This
+        ! essentially does the same thing as buildSerialHex.
+        oBlock%ADT%adtType = adtVolumeADT
+        oBlock%ADT%nNodes = nADT
+        oBlock%ADT%nTetra = 0
+        oBlock%ADT%nPyra = 0
+        oBlock%ADT%nPrisms = 0
+        oBlock%ADT%nTria = 0
+        oBlock%ADT%nQuads = 0
+        oBlock%ADT%coor => oBlock%xADT
+        oBlock%ADT%hexaConn => oBlock%hexaConn
+        nullify (oBlock%ADT%tetraConn, oBlock%ADT%pyraConn, oBlock%ADT%prismsConn)
+        oBlock%ADT%nBBoxes = nHexa
+        allocate (oBlock%ADT%xBBOX(6, nHexa))
+        allocate (oBlock%ADT%elementType(nHexa))
+        allocate (oBlock%ADT%elementID(nHexa))
+        oBlock%ADT%comm = MPI_COMM_SELF
+        oBlock%ADT%nProcs = 1
+        oBlock%ADT%myID = 0
+
+        ! All hexas
+        oBlock%ADT%elementType = adtHexahedron
+
+        do i = 1, nHexa
+            oBlock%ADT%elementID(i) = i
+        end do
+
+        oBlock%ADT%nLeaves = oBlock%ADT%nBBoxes - 1
+        if (oBlock%ADT%nBBoxes <= 1) oBlock%ADT%nLeaves = oBlock%ADT%nLeaves + 1
+        allocate (oBlock%ADT%ADTree(oBlock%ADT%nLeaves))
+
+        ! -------------------------------------------------------------------
+
+        ! Now continue copying out the integer values
+        do i = 1, nHexa
+            do j = 1, 8
+                iSize = iSize + 1
+                oBlock%hexaConn(j, i) = oBlock%iBuffer(iSize)
+            end do
+        end do
+
+        do k = 0, ke + 1
+            do j = 0, je + 1
+                do i = 0, ie + 1
+                    iSize = iSize + 1
+                    oBlock%globalCell(i, j, k) = oBlock%iBuffer(iSize)
+                end do
+            end do
+        end do
+
+        do k = 1, kl
+            do j = 1, jl
+                do i = 1, il
+                    iSize = iSize + 1
+                    oBlock%nearWall(i, j, k) = oBlock%iBuffer(iSize)
+                end do
+            end do
+        end do
+
+        do k = 1, ke
+            do j = 1, je
+                do i = 1, ie
+                    iSize = iSize + 1
+                    oBlock%invalidDonor(i, j, k) = oBlock%iBuffer(iSize)
+                end do
+            end do
+        end do
+
+        do i = 1, oBlock%ADT%nLeaves
+            iSize = iSize + 1
+            oBlock%ADT%ADTree(i)%children(1) = oBlock%iBuffer(iSize)
+            iSize = iSize + 1
+            oBlock%ADT%ADTree(i)%children(2) = oBlock%iBuffer(iSize)
+        end do
+
+        ! Now copy out the real values
+        rSize = 0
+
+        do i = 1, ie * je * ke
+            rSize = rSize + 1
+            oBlock%qualDonor(1, i) = oBlock%rBuffer(rSize)
+        end do
+
+        do j = 1, ie * je * ke
+            do i = 1, 3
+                rSize = rSize + 1
+                oBlock%xADT(i, j) = oBlock%rBuffer(rSize)
+            end do
+        end do
+
+        do i = 1, oBlock%ADT%nBboxes
+            oBlock%ADT%xBBox(:, i) = oBlock%rBuffer(rSize + 1:rSize + 6)
+            rSize = rSize + 6
+        end do
+
+        do i = 1, oBlock%ADT%nLeaves
+            oBlock%ADT%ADTree(i)%xMin(:) = oBlock%rBuffer(rSize + 1:rSize + 6)
+            rSize = rSize + 6
+
+            oBlock%ADT%ADTree(i)%xMax(:) = oBlock%rBuffer(rSize + 1:rSize + 6)
+            rSize = rSize + 6
+        end do
+
+        rSize = rSize + 1
+        oBlock%minVol = oBlock%rBuffer(rSize)
+
+        ! Flag this oBlock as being allocated:
+        oBlock%allocated = .True.
+        deallocate (oBlock%iBuffer, oBlock%rBuffer)
+
+    end subroutine unpackOBlock
+
+    subroutine getOFringeBufferSizes(il, jl, kl, iSize, rSize)
+
+        ! Subroutine to get the required buffer sizes. This one is pretty
+        ! easy, but we use a routine to make it look the same as for the
+        ! oBlock.
 
-    use constants
-    implicit none
-
-    ! Input/OUtput
-    integer(kind=intType), intent(in) :: il, jl ,kl
-    integer(kind=intType), intent(out) :: rSize, iSize
-
-    ! Working
-    integer(kind=intType) :: mm
-
-    ! All arrays have the same size
-    mm = (il-1)*(jl-1)*(kl-1) ! nx*ny*nz
-
-    ! Initializeation
-    iSize = mm * 3 + 5 ! We need wallInd, isWall, myIndex plus 5 for the sizes
-    rSize = mm * 6 ! Need to send x and xSeed (3 each)
-
-  end subroutine getOFringeBufferSizes
-
-  subroutine packOFringe(oFringe)
-
-    use constants
-    use oversetData, only : oversetFringe
-
-    implicit none
-
-    ! Pack up the search coordines in this oFringe into its own buffer
-    ! so we are ready to send it.
+        use constants
+        implicit none
+
+        ! Input/OUtput
+        integer(kind=intType), intent(in) :: il, jl, kl
+        integer(kind=intType), intent(out) :: rSize, iSize
+
+        ! Working
+        integer(kind=intType) :: mm
+
+        ! All arrays have the same size
+        mm = (il - 1) * (jl - 1) * (kl - 1) ! nx*ny*nz
+
+        ! Initializeation
+        iSize = mm * 3 + 5 ! We need wallInd, isWall, myIndex plus 5 for the sizes
+        rSize = mm * 6 ! Need to send x and xSeed (3 each)
+
+    end subroutine getOFringeBufferSizes
+
+    subroutine packOFringe(oFringe)
+
+        use constants
+        use oversetData, only: oversetFringe
+
+        implicit none
+
+        ! Pack up the search coordines in this oFringe into its own buffer
+        ! so we are ready to send it.
 
-    ! Input/Output Parameters
-    type(oversetFringe), intent(inout) :: oFringe
+        ! Input/Output Parameters
+        type(oversetFringe), intent(inout) :: oFringe
 
-    ! Working paramters
-    integer(kind=intType) :: rSize, iSize, mm, i, ii
+        ! Working paramters
+        integer(kind=intType) :: rSize, iSize, mm, i, ii
 
-    ! If the buffer is already allocated, the block is packed and there
-    ! is nothing to do
-    if (allocated(oFringe%rBuffer)) then
-       return
-    end if
+        ! If the buffer is already allocated, the block is packed and there
+        ! is nothing to do
+        if (allocated(oFringe%rBuffer)) then
+            return
+        end if
 
-    call getOFringeBufferSizes(oFringe%il, oFringe%jl, oFringe%kl, &
-         iSize, rSize)
+        call getOFringeBufferSizes(oFringe%il, oFringe%jl, oFringe%kl, &
+                                   iSize, rSize)
 
-    ! Allocate the buffers
-    allocate(oFringe%rBuffer(rSize), oFringe%iBuffer(iSize))
+        ! Allocate the buffers
+        allocate (oFringe%rBuffer(rSize), oFringe%iBuffer(iSize))
 
-    mm = (oFringe%nx)*(oFringe%ny)*(oFringe%nz)
+        mm = (oFringe%nx) * (oFringe%ny) * (oFringe%nz)
 
-    oFringe%iBuffer(1) = oFringe%il
-    oFringe%iBuffer(2) = oFringe%jl
-    oFringe%iBuffer(3) = oFringe%kl
-    oFringe%iBuffer(4) = oFringe%cluster
-    oFringe%iBuffer(5) = oFringe%block
-    ii = 5
+        oFringe%iBuffer(1) = oFringe%il
+        oFringe%iBuffer(2) = oFringe%jl
+        oFringe%iBuffer(3) = oFringe%kl
+        oFringe%iBuffer(4) = oFringe%cluster
+        oFringe%iBuffer(5) = oFringe%block
+        ii = 5
 
-    ! Copy the integers. Just wallInd and isWall
-    do i=1, mm
+        ! Copy the integers. Just wallInd and isWall
+        do i = 1, mm
 
-       ii = ii +1
-       oFringe%iBuffer(ii) = oFringe%wallInd(i)
+            ii = ii + 1
+            oFringe%iBuffer(ii) = oFringe%wallInd(i)
 
-       ii = ii +1
-       oFringe%iBuffer(ii) = oFringe%isWall(i)
+            ii = ii + 1
+            oFringe%iBuffer(ii) = oFringe%isWall(i)
 
-       ii = ii +1
-       oFringe%iBuffer(ii) = oFringe%fringeIntBuffer(5, i) ! myIndex
+            ii = ii + 1
+            oFringe%iBuffer(ii) = oFringe%fringeIntBuffer(5, i) ! myIndex
 
-    end do
+        end do
 
-    ! Copy the reals. Reset the buffer here.
-    ii = 0
-    do i=1, mm
-       oFringe%rBuffer(ii+1) = oFringe%x(1, i)
-       oFringe%rBuffer(ii+2) = oFringe%x(2, i)
-       oFringe%rBuffer(ii+3) = oFringe%x(3, i)
-       oFringe%rBuffer(ii+4) = oFringe%xSeed(1, i)
-       oFringe%rBuffer(ii+5) = oFringe%xSeed(2, i)
-       oFringe%rBuffer(ii+6) = oFringe%xSeed(3, i)
-       ii = ii + 6
-    end do
+        ! Copy the reals. Reset the buffer here.
+        ii = 0
+        do i = 1, mm
+            oFringe%rBuffer(ii + 1) = oFringe%x(1, i)
+            oFringe%rBuffer(ii + 2) = oFringe%x(2, i)
+            oFringe%rBuffer(ii + 3) = oFringe%x(3, i)
+            oFringe%rBuffer(ii + 4) = oFringe%xSeed(1, i)
+            oFringe%rBuffer(ii + 5) = oFringe%xSeed(2, i)
+            oFringe%rBuffer(ii + 6) = oFringe%xSeed(3, i)
+            ii = ii + 6
+        end do
 
-  end subroutine packOFringe
+    end subroutine packOFringe
 
-  subroutine unpackOFringe(oFringe)
+    subroutine unpackOFringe(oFringe)
 
-    use constants
-    use oversetData, only : oversetFringe
-    implicit none
+        use constants
+        use oversetData, only: oversetFringe
+        implicit none
 
-    ! Pack up the search coordines in this oFringe into its own buffer
-    ! so we are ready to send it.
+        ! Pack up the search coordines in this oFringe into its own buffer
+        ! so we are ready to send it.
 
-    ! Input/Output Parameters
-    type(oversetFringe), intent(inout) :: oFringe
+        ! Input/Output Parameters
+        type(oversetFringe), intent(inout) :: oFringe
 
-    ! Working paramters
-    integer(kind=intType) :: rSize, iSize, idom, i, ii, mm
+        ! Working paramters
+        integer(kind=intType) :: rSize, iSize, idom, i, ii, mm
 
-    ! Set the sizes of this oFringe
-    oFringe%il = oFringe%iBuffer(1)
-    oFringe%jl = oFringe%iBuffer(2)
-    oFringe%kl = oFringe%iBuffer(3)
-    oFringe%nx = oFringe%il-1
-    oFringe%ny = oFringe%jl-1
-    oFringe%nz = oFringe%kl-1
-    oFringe%cluster = oFringe%iBuffer(4)
-    oFringe%block = oFringe%ibuffer(5)
+        ! Set the sizes of this oFringe
+        oFringe%il = oFringe%iBuffer(1)
+        oFringe%jl = oFringe%iBuffer(2)
+        oFringe%kl = oFringe%iBuffer(3)
+        oFringe%nx = oFringe%il - 1
+        oFringe%ny = oFringe%jl - 1
+        oFringe%nz = oFringe%kl - 1
+        oFringe%cluster = oFringe%iBuffer(4)
+        oFringe%block = oFringe%ibuffer(5)
 
-    mm = (oFringe%nx)*(oFringe%ny)*(oFringe%nz)
+        mm = (oFringe%nx) * (oFringe%ny) * (oFringe%nz)
 
-    allocate(&
-         oFringe%x(3, mm), &
-         oFringe%xSeed(3, mm), &
-         oFringe%wallInd(mm), &
-         oFringe%isWall(mm))
+        allocate ( &
+            oFringe%x(3, mm), &
+            oFringe%xSeed(3, mm), &
+            oFringe%wallInd(mm), &
+            oFringe%isWall(mm))
 
-    ! Assume each cell will get just one donor. It's just a guess, it
-    ! will be expanded if necessary so the exact value doesn't matter.
-    allocate(oFringe%fringeIntBuffer(5, mm), oFringe%fringeRealBuffer(4, mm))
-    oFringe%nDonor = 0
+        ! Assume each cell will get just one donor. It's just a guess, it
+        ! will be expanded if necessary so the exact value doesn't matter.
+        allocate (oFringe%fringeIntBuffer(5, mm), oFringe%fringeRealBuffer(4, mm))
+        oFringe%nDonor = 0
 
-    ii = 5 ! Already copied out the sizes
+        ii = 5 ! Already copied out the sizes
 
-    ! Copy out integers
-    do i=1, mm
-       ii = ii + 1
-       oFringe%wallInd(i) = oFringe%iBuffer(ii)
+        ! Copy out integers
+        do i = 1, mm
+            ii = ii + 1
+            oFringe%wallInd(i) = oFringe%iBuffer(ii)
 
-       ii = ii + 1
-       oFringe%isWall(i) = oFringe%iBuffer(ii)
+            ii = ii + 1
+            oFringe%isWall(i) = oFringe%iBuffer(ii)
 
-       ii = ii + 1
-       oFringe%fringeIntBuffer(5, i) = oFringe%iBuffer(ii)
+            ii = ii + 1
+            oFringe%fringeIntBuffer(5, i) = oFringe%iBuffer(ii)
 
-       oFringe%fringeIntBuffer(4, i) = oFringe%block
-    end do
+            oFringe%fringeIntBuffer(4, i) = oFringe%block
+        end do
 
-    ! Copy the reals. Reset the counter ii counter here.
-    ii = 0
-    do i=1, mm
-       oFringe%x(1, i) = oFringe%rBuffer(ii+1)
-       oFringe%x(2, i) = oFringe%rBuffer(ii+2)
-       oFringe%x(3, i) = oFringe%rBuffer(ii+3)
+        ! Copy the reals. Reset the counter ii counter here.
+        ii = 0
+        do i = 1, mm
+            oFringe%x(1, i) = oFringe%rBuffer(ii + 1)
+            oFringe%x(2, i) = oFringe%rBuffer(ii + 2)
+            oFringe%x(3, i) = oFringe%rBuffer(ii + 3)
 
-       oFringe%xSeed(1, i) = oFringe%rBuffer(ii+4)
-       oFringe%xSeed(2, i) = oFringe%rBuffer(ii+5)
-       oFringe%xSeed(3, i) = oFringe%rBuffer(ii+6)
+            oFringe%xSeed(1, i) = oFringe%rBuffer(ii + 4)
+            oFringe%xSeed(2, i) = oFringe%rBuffer(ii + 5)
+            oFringe%xSeed(3, i) = oFringe%rBuffer(ii + 6)
 
-       ii = ii + 6
-    end do
+            ii = ii + 6
+        end do
 
-    ! Flag this oFringe as being allocated:
-    oFringe%allocated = .True.
-    deallocate(oFringe%rBuffer, oFringe%iBuffer)
+        ! Flag this oFringe as being allocated:
+        oFringe%allocated = .True.
+        deallocate (oFringe%rBuffer, oFringe%iBuffer)
 
-  end subroutine unpackOFringe
+    end subroutine unpackOFringe
 
-  subroutine getWallSize(famList, nNodes, nCells, dualMesh)
-    ! Simple helper routine to return the number of wall nodes and cells
-    ! for the block pointed to by blockPointers.
+    subroutine getWallSize(famList, nNodes, nCells, dualMesh)
+        ! Simple helper routine to return the number of wall nodes and cells
+        ! for the block pointed to by blockPointers.
 
-    use constants
-    use blockPointers, only :BCType, nBocos, BCData
-    use sorting, only : famInList
-    implicit none
+        use constants
+        use blockPointers, only: BCType, nBocos, BCData
+        use sorting, only: famInList
+        implicit none
 
-    ! Input
-    integer(kind=intType), intent(in), dimension(:) :: famList
-    logical :: dualMesh
+        ! Input
+        integer(kind=intType), intent(in), dimension(:) :: famList
+        logical :: dualMesh
 
-    ! Output
-    integer(kind=intType), intent(out) :: nNodes, nCells
+        ! Output
+        integer(kind=intType), intent(out) :: nNodes, nCells
 
-    ! Working:
-    integer(kind=intType) :: mm, iBeg, iEnd, jBeg, jEnd
+        ! Working:
+        integer(kind=intType) :: mm, iBeg, iEnd, jBeg, jEnd
 
-    ! Figure out the size the wall is going to be.
-    nNodes = 0
-    nCells = 0
-    do mm=1, nBocos
-       famInclude: if (famInList(BCData(mm)%famID, famList)) then
-          if (dualMesh) then
-             jBeg = BCData(mm)%jnBeg-1 ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg-1 ; iEnd = BCData(mm)%inEnd
-          else
-             jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
-          end if
+        ! Figure out the size the wall is going to be.
+        nNodes = 0
+        nCells = 0
+        do mm = 1, nBocos
+            famInclude: if (famInList(BCData(mm)%famID, famList)) then
+                if (dualMesh) then
+                    jBeg = BCData(mm)%jnBeg - 1; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg - 1; iEnd = BCData(mm)%inEnd
+                else
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                end if
 
-          nNodes = nNodes + (iEnd - iBeg + 1)*(jEnd - jBeg + 1)
-          nCells = nCells + (iEnd - iBeg )*(jEnd - jBeg)
-       end if famInclude
-    end do
+                nNodes = nNodes + (iEnd - iBeg + 1) * (jEnd - jBeg + 1)
+                nCells = nCells + (iEnd - iBeg) * (jEnd - jBeg)
+            end if famInclude
+        end do
 
-  end subroutine getWallSize
+    end subroutine getWallSize
 
-  subroutine getOSurfBufferSizes(famList, il, jl, kl, iSize, rSize, dualMesh)
+    subroutine getOSurfBufferSizes(famList, il, jl, kl, iSize, rSize, dualMesh)
 
-    ! Subroutine to get the required buffer sizes. This one is pretty
-    ! easy, but we use a routine to make it look the same as for hte
-    ! oBlock. Note that these bufer sizes are over-estimates. They are
-    ! the maximum possible amount of data to send.
+        ! Subroutine to get the required buffer sizes. This one is pretty
+        ! easy, but we use a routine to make it look the same as for hte
+        ! oBlock. Note that these bufer sizes are over-estimates. They are
+        ! the maximum possible amount of data to send.
 
-    use constants
-    implicit none
+        use constants
+        implicit none
 
-    ! Input/OUtput
-    integer(kind=intType), intent(in), dimension(:) :: famList
-    integer(kind=intType), intent(in) :: il, jl ,kl
-    logical, intent(in) :: dualMesh
-    integer(kind=intType), intent(out) :: rSize, iSize
+        ! Input/OUtput
+        integer(kind=intType), intent(in), dimension(:) :: famList
+        integer(kind=intType), intent(in) :: il, jl, kl
+        logical, intent(in) :: dualMesh
+        integer(kind=intType), intent(out) :: rSize, iSize
 
-    ! Working
-    integer(kind=intType) :: mm, nNodes, nCells, nBBox, nLeaves
+        ! Working
+        integer(kind=intType) :: mm, nNodes, nCells, nBBox, nLeaves
 
-    ! Initalization
-    iSize = 3 ! For the block sizes
-    iSize = iSize + 4 ! For the maxCells/nCells/nNodes variables
-    rSize = 0
+        ! Initalization
+        iSize = 3 ! For the block sizes
+        iSize = iSize + 4 ! For the maxCells/nCells/nNodes variables
+        rSize = 0
 
-    call getWallSize(famList, nNodes, nCells, dualMesh)
+        call getWallSize(famList, nNodes, nCells, dualMesh)
 
-    ! Note that nCells here is the maximum number size. This will result
-    ! in a slight overestimate of the buffer size. This is ok.
+        ! Note that nCells here is the maximum number size. This will result
+        ! in a slight overestimate of the buffer size. This is ok.
 
-    if (nNodes > 0) then
-       ! Count up the integers we want to send:
+        if (nNodes > 0) then
+            ! Count up the integers we want to send:
 
-       iSize = iSize + nCells*4 ! This is for the connectivity
+            iSize = iSize + nCells * 4 ! This is for the connectivity
 
-       iSize = iSize + nCells   ! This is for the iblank array
+            iSize = iSize + nCells   ! This is for the iblank array
 
-       iSize = iSize + nCells   ! This is for the cellPtr array
+            iSize = iSize + nCells   ! This is for the cellPtr array
 
+            ! Number of boxes in the ADT is the same as the number of elements
+            nBBox = nCells
+            nLeaves = nBBox - 1 ! See ADT/adtBuild.f90
+
+            iSize = iSize + nLeaves * 2 ! Two ints for the children in each leaf
 
-       ! Number of boxes in the ADT is the same as the number of elements
-       nBBox = nCells
-       nLeaves = nBBox-1 ! See ADT/adtBuild.f90
+            ! Count up the reals we ned to send:
+            rSize = rSize + 3 * nNodes ! surface coordinates
 
-       iSize = iSize + nLeaves*2 ! Two ints for the children in each leaf
+            rSize = rSize + nNodes ! surface delta
 
-       ! Count up the reals we ned to send:
-       rSize = rSize + 3*nNodes ! surface coordinates
+            rSize = rSize + nBBox * 6 ! Cell bounding boxes
 
-       rSize = rSize + nNodes ! surface delta
+            rSize = rSize + nLeaves * 12 ! Bounding boxes for leaves
+        end if
+    end subroutine getOSurfBufferSizes
 
-       rSize = rSize + nBBox*6 ! Cell bounding boxes
+    subroutine packOSurf(famList, oSurf, dualMesh)
 
-       rSize = rSize + nLeaves*12 ! Bounding boxes for leaves
-    end if
-  end subroutine getOSurfBufferSizes
+        use constants
+        use oversetData, only: oversetWall
 
-  subroutine packOSurf(famList, oSurf, dualMesh)
+        implicit none
+
+        ! Pack up the search coordines in this oSurf into its own buffer
+        ! so we are ready to send it.
 
-    use constants
-    use oversetData, only : oversetWall
-
-    implicit none
-
-    ! Pack up the search coordines in this oSurf into its own buffer
-    ! so we are ready to send it.
-
-    ! Input/Output Parameters
-    integer(kind=intType), intent(in), dimension(:) :: famList
-    type(oversetWall), intent(inout) :: oSurf
-    logical, intent(in) :: dualMesh
-    ! Working paramters
-    integer(kind=intType) :: rSize, iSize, mm, i, j, nNodes, nCells
-
-    call getOSurfBufferSizes(famList, oSurf%il, oSurf%kl, oSurf%kl, isize, rSize, dualMesh)
-
-    ! Allocate the buffers
-    allocate(oSurf%rBuffer(rSize), oSurf%iBuffer(iSize))
-
-    oSurf%iBuffer(1) = oSurf%il
-    oSurf%iBuffer(2) = oSurf%jl
-    oSurf%iBuffer(3) = oSurf%kl
-    oSurf%iBuffer(4) = oSurf%nNodes
-    oSurf%iBuffer(5) = oSurf%nCells
-    oSurf%iBuffer(6) = oSurf%maxCells
-    oSurf%iBuffer(7) = oSurf%cluster
-
-    if (oSurf%nNodes > 0) then
-       iSize = 7
-       do j=1, oSurf%nCells
-          do i=1, 4
-             iSize = iSize + 1
-             oSurf%iBuffer(iSize) = oSurf%conn(i, j)
-          end do
-       end do
-
-       do i=1, oSurf%maxCells
-          iSize = iSize + 1
-          oSurf%iBuffer(iSize) = oSurf%iBlank(i)
-       end do
-
-       do i=1, oSurf%nCells
-          iSize = iSize + 1
-          oSurf%iBuffer(iSize) = oSurf%cellPtr(i)
-       end do
-
-       do i=1, oSurf%ADT%nLeaves
-          iSize = iSize + 1
-          oSurf%iBuffer(iSize) = oSurf%ADT%ADTree(i)%children(1)
-          iSize = iSize + 1
-          oSurf%iBuffer(iSize) = oSurf%ADT%ADTree(i)%children(2)
-       end do
-
-       ! Done with the integer values, do the real ones
-       rSize = 0
-
-       do i=1, oSurf%nNodes
-          do j=1, 3
-             rSize = rSize + 1
-             oSurf%rBuffer(rSize) = oSurf%x(j, i)
-          end do
-       end do
-
-       do i=1, oSurf%nNodes
-          rSize = rSize + 1
-          oSurf%rBuffer(rSize) = oSurf%delta(i)
-       end do
-
-       do i=1, oSurf%ADT%nBboxes
-          oSurf%rBuffer(rSize+1:rSize+6) = oSurf%ADT%xBBox(:, i)
-          rSize = rSize + 6
-       end do
-
-       do i=1, oSurf%ADT%nLeaves
-          oSurf%rBuffer(rSize+1:rSize+6) = oSurf%ADT%ADTree(i)%xMin(:)
-          rSize = rSize + 6
-
-          oSurf%rBuffer(rSize+1:rSize+6) = oSurf%ADT%ADTree(i)%xMax(:)
-          rSize = rSize + 6
-       end do
-    end if
-  end subroutine packOSurf
-
-  subroutine unpackOSurf(oSurf)
-
-    use constants
-    use oversetData, only : oversetWall
-    use kdtree2_module, only : kdtree2_create
-    implicit none
-
-    ! Input/Output Parameters
-    type(oversetWall), intent(inout) :: oSurf
-
-    ! Working paramters
-    integer(kind=intType) :: rSize, iSize, idom, i,  j, k, n, iNode
-
-    ! Set the sizes of this oSurf
-    oSurf%il = oSurf%iBuffer(1)
-    oSurf%jl = oSurf%iBuffer(2)
-    oSurf%kl = oSurf%iBuffer(3)
-
-    oSurf%nNodes = oSurf%iBuffer(4)
-    oSurf%nCells = oSurf%iBuffer(5)
-    oSurf%maxCells = oSurf%iBuffer(6)
-    oSurf%cluster = oSurf%iBuffer(7)
-
-    iSize = 7
-    rSize = 0
-
-    ! Allocate the arrays now that we know the sizes
-    allocate(oSurf%x(3, oSurf%nNodes))
-    allocate(oSurf%delta(oSurf%nNodes))
-    allocate(oSurf%conn(4, oSurf%nCells))
-    allocate(oSurf%iBlank(oSurf%maxCells))
-    allocate(oSurf%cellPtr(oSurf%nCells))
-    allocate(oSurf%nte(4, oSurf%nNodes))
-    ! Once we know the sizes, allocate all the arrays in the
-    ! ADTree. Since we are not going to call the *actual* build routine
-    ! for the ADT, we need to set all the information ourselves. This
-    ! essentially does the same thing as buildSerialHex.
-    oSurf%ADT%adtType = adtSurfaceADT
-    oSurf%ADT%nNodes = oSurf%nNodes
-    oSurf%ADT%nTetra = 0
-    oSurf%ADT%nPyra = 0
-    oSurf%ADT%nPrisms = 0
-    oSurf%ADT%nTria = 0
-    oSurf%ADT%nQuads = oSurf%nCells
-    oSurf%ADT%coor => oSurf%x
-    oSurf%ADT%quadsConn => oSurf%conn
-    nullify(oSurf%ADT%triaConn)
-    oSurf%ADT%nBBoxes = oSurf%nCells
-    allocate(oSurf%ADT%xBBOX(6, oSurf%nCells))
-    allocate(oSurf%ADT%elementType(oSurf%nCells))
-    allocate(oSurf%ADT%elementID(oSurf%nCells))
-    oSurf%ADT%comm = MPI_COMM_SELF
-    oSurf%ADT%nProcs = 1
-    oSurf%ADT%myID = 0
-
-    ! All hexas
-    oSurf%ADT%elementType = adtQuadrilateral
-
-    do i=1, oSurf%nCells
-       oSurf%ADT%elementID(i) = i
-    end do
-
-    oSurf%ADT%nLeaves = oSurf%ADT%nBBoxes - 1
-    if(oSurf%ADT%nBBoxes <= 1) oSurf%ADT%nLeaves = oSurf%ADT%nLeaves + 1
-
-    allocate(oSurf%ADT%ADTree(oSurf%ADT%nLeaves))
-
-    ! Now continue copying out the values if necessary:
-    if (oSurf%nNodes > 0) then
-       do j=1, oSurf%nCells
-          do i=1, 4
-             iSize = iSize + 1
-             oSurf%conn(i, j) = oSurf%iBuffer(iSize)
-          end do
-       end do
-
-       do i=1, oSurf%maxCells
-          iSize = iSize + 1
-          oSurf%iBlank(i) = oSurf%iBuffer(iSize)
-       end do
-
-       do i=1, oSurf%nCells
-          iSize = iSize + 1
-          oSurf%cellPtr(i) = oSurf%iBuffer(iSize)
-       end do
-
-       do i=1, oSurf%ADT%nLeaves
-          iSize = iSize + 1
-          oSurf%ADT%ADTree(i)%children(1) = oSurf%iBuffer(iSize)
-          iSize = iSize + 1
-          oSurf%ADT%ADTree(i)%children(2) = oSurf%iBuffer(iSize)
-       end do
-
-       ! Done with the integer values, do the real ones
-       rSize = 0
-
-       do i=1, oSurf%nNodes
-          do j=1, 3
-             rSize = rSize + 1
-             oSurf%x(j, i) = oSurf%rBuffer(rSize)
-          end do
-       end do
-
-       do i=1, oSurf%nNodes
-          rSize = rSize + 1
-          oSurf%delta(i) = oSurf%rBuffer(rSize)
-       end do
-
-       do i=1, oSurf%ADT%nBboxes
-          oSurf%ADT%xBBox(:, i) = oSurf%rBuffer(rSize+1:rSize+6)
-          rSize = rSize + 6
-       end do
-
-       do i=1, oSurf%ADT%nLeaves
-          oSurf%ADT%ADTree(i)%xMin(:) = oSurf%rBuffer(rSize+1:rSize+6)
-          rSize = rSize + 6
-
-          oSurf%ADT%ADTree(i)%xMax(:) = oSurf%rBuffer(rSize+1:rSize+6)
-          rSize = rSize + 6
-       end do
-    end if
-
-    ! Build the KDTree
-    if (oSurf%nNodes > 0) then
-       oSurf%tree => kdtree2_create(oSurf%x)
-    end if
-
-    ! Build the inverse of the connectivity, the nodeToElem array.
-    oSurf%nte = 0
-    do i=1, oSurf%nCells
-       do j=1, 4
-          n = oSurf%conn(j, i)
-          inner:do k=1,4
-             if (oSurf%nte(k, n) == 0) then
-                oSurf%nte(k, n) = i
-                exit inner
-             end if
-          end do inner
-       end do
-    end do
-
-    ! Flag this oSurf as being allocated:
-    oSurf%allocated = .True.
-    deallocate(oSurf%rBuffer, oSurf%iBuffer)
-  end subroutine unpackOSurf
+        ! Input/Output Parameters
+        integer(kind=intType), intent(in), dimension(:) :: famList
+        type(oversetWall), intent(inout) :: oSurf
+        logical, intent(in) :: dualMesh
+        ! Working paramters
+        integer(kind=intType) :: rSize, iSize, mm, i, j, nNodes, nCells
+
+        call getOSurfBufferSizes(famList, oSurf%il, oSurf%kl, oSurf%kl, isize, rSize, dualMesh)
+
+        ! Allocate the buffers
+        allocate (oSurf%rBuffer(rSize), oSurf%iBuffer(iSize))
+
+        oSurf%iBuffer(1) = oSurf%il
+        oSurf%iBuffer(2) = oSurf%jl
+        oSurf%iBuffer(3) = oSurf%kl
+        oSurf%iBuffer(4) = oSurf%nNodes
+        oSurf%iBuffer(5) = oSurf%nCells
+        oSurf%iBuffer(6) = oSurf%maxCells
+        oSurf%iBuffer(7) = oSurf%cluster
+
+        if (oSurf%nNodes > 0) then
+            iSize = 7
+            do j = 1, oSurf%nCells
+                do i = 1, 4
+                    iSize = iSize + 1
+                    oSurf%iBuffer(iSize) = oSurf%conn(i, j)
+                end do
+            end do
+
+            do i = 1, oSurf%maxCells
+                iSize = iSize + 1
+                oSurf%iBuffer(iSize) = oSurf%iBlank(i)
+            end do
+
+            do i = 1, oSurf%nCells
+                iSize = iSize + 1
+                oSurf%iBuffer(iSize) = oSurf%cellPtr(i)
+            end do
+
+            do i = 1, oSurf%ADT%nLeaves
+                iSize = iSize + 1
+                oSurf%iBuffer(iSize) = oSurf%ADT%ADTree(i)%children(1)
+                iSize = iSize + 1
+                oSurf%iBuffer(iSize) = oSurf%ADT%ADTree(i)%children(2)
+            end do
+
+            ! Done with the integer values, do the real ones
+            rSize = 0
+
+            do i = 1, oSurf%nNodes
+                do j = 1, 3
+                    rSize = rSize + 1
+                    oSurf%rBuffer(rSize) = oSurf%x(j, i)
+                end do
+            end do
+
+            do i = 1, oSurf%nNodes
+                rSize = rSize + 1
+                oSurf%rBuffer(rSize) = oSurf%delta(i)
+            end do
+
+            do i = 1, oSurf%ADT%nBboxes
+                oSurf%rBuffer(rSize + 1:rSize + 6) = oSurf%ADT%xBBox(:, i)
+                rSize = rSize + 6
+            end do
+
+            do i = 1, oSurf%ADT%nLeaves
+                oSurf%rBuffer(rSize + 1:rSize + 6) = oSurf%ADT%ADTree(i)%xMin(:)
+                rSize = rSize + 6
+
+                oSurf%rBuffer(rSize + 1:rSize + 6) = oSurf%ADT%ADTree(i)%xMax(:)
+                rSize = rSize + 6
+            end do
+        end if
+    end subroutine packOSurf
+
+    subroutine unpackOSurf(oSurf)
+
+        use constants
+        use oversetData, only: oversetWall
+        use kdtree2_module, only: kdtree2_create
+        implicit none
+
+        ! Input/Output Parameters
+        type(oversetWall), intent(inout) :: oSurf
+
+        ! Working paramters
+        integer(kind=intType) :: rSize, iSize, idom, i, j, k, n, iNode
+
+        ! Set the sizes of this oSurf
+        oSurf%il = oSurf%iBuffer(1)
+        oSurf%jl = oSurf%iBuffer(2)
+        oSurf%kl = oSurf%iBuffer(3)
+
+        oSurf%nNodes = oSurf%iBuffer(4)
+        oSurf%nCells = oSurf%iBuffer(5)
+        oSurf%maxCells = oSurf%iBuffer(6)
+        oSurf%cluster = oSurf%iBuffer(7)
+
+        iSize = 7
+        rSize = 0
+
+        ! Allocate the arrays now that we know the sizes
+        allocate (oSurf%x(3, oSurf%nNodes))
+        allocate (oSurf%delta(oSurf%nNodes))
+        allocate (oSurf%conn(4, oSurf%nCells))
+        allocate (oSurf%iBlank(oSurf%maxCells))
+        allocate (oSurf%cellPtr(oSurf%nCells))
+        allocate (oSurf%nte(4, oSurf%nNodes))
+        ! Once we know the sizes, allocate all the arrays in the
+        ! ADTree. Since we are not going to call the *actual* build routine
+        ! for the ADT, we need to set all the information ourselves. This
+        ! essentially does the same thing as buildSerialHex.
+        oSurf%ADT%adtType = adtSurfaceADT
+        oSurf%ADT%nNodes = oSurf%nNodes
+        oSurf%ADT%nTetra = 0
+        oSurf%ADT%nPyra = 0
+        oSurf%ADT%nPrisms = 0
+        oSurf%ADT%nTria = 0
+        oSurf%ADT%nQuads = oSurf%nCells
+        oSurf%ADT%coor => oSurf%x
+        oSurf%ADT%quadsConn => oSurf%conn
+        nullify (oSurf%ADT%triaConn)
+        oSurf%ADT%nBBoxes = oSurf%nCells
+        allocate (oSurf%ADT%xBBOX(6, oSurf%nCells))
+        allocate (oSurf%ADT%elementType(oSurf%nCells))
+        allocate (oSurf%ADT%elementID(oSurf%nCells))
+        oSurf%ADT%comm = MPI_COMM_SELF
+        oSurf%ADT%nProcs = 1
+        oSurf%ADT%myID = 0
+
+        ! All hexas
+        oSurf%ADT%elementType = adtQuadrilateral
+
+        do i = 1, oSurf%nCells
+            oSurf%ADT%elementID(i) = i
+        end do
+
+        oSurf%ADT%nLeaves = oSurf%ADT%nBBoxes - 1
+        if (oSurf%ADT%nBBoxes <= 1) oSurf%ADT%nLeaves = oSurf%ADT%nLeaves + 1
+
+        allocate (oSurf%ADT%ADTree(oSurf%ADT%nLeaves))
+
+        ! Now continue copying out the values if necessary:
+        if (oSurf%nNodes > 0) then
+            do j = 1, oSurf%nCells
+                do i = 1, 4
+                    iSize = iSize + 1
+                    oSurf%conn(i, j) = oSurf%iBuffer(iSize)
+                end do
+            end do
+
+            do i = 1, oSurf%maxCells
+                iSize = iSize + 1
+                oSurf%iBlank(i) = oSurf%iBuffer(iSize)
+            end do
+
+            do i = 1, oSurf%nCells
+                iSize = iSize + 1
+                oSurf%cellPtr(i) = oSurf%iBuffer(iSize)
+            end do
+
+            do i = 1, oSurf%ADT%nLeaves
+                iSize = iSize + 1
+                oSurf%ADT%ADTree(i)%children(1) = oSurf%iBuffer(iSize)
+                iSize = iSize + 1
+                oSurf%ADT%ADTree(i)%children(2) = oSurf%iBuffer(iSize)
+            end do
+
+            ! Done with the integer values, do the real ones
+            rSize = 0
+
+            do i = 1, oSurf%nNodes
+                do j = 1, 3
+                    rSize = rSize + 1
+                    oSurf%x(j, i) = oSurf%rBuffer(rSize)
+                end do
+            end do
+
+            do i = 1, oSurf%nNodes
+                rSize = rSize + 1
+                oSurf%delta(i) = oSurf%rBuffer(rSize)
+            end do
+
+            do i = 1, oSurf%ADT%nBboxes
+                oSurf%ADT%xBBox(:, i) = oSurf%rBuffer(rSize + 1:rSize + 6)
+                rSize = rSize + 6
+            end do
+
+            do i = 1, oSurf%ADT%nLeaves
+                oSurf%ADT%ADTree(i)%xMin(:) = oSurf%rBuffer(rSize + 1:rSize + 6)
+                rSize = rSize + 6
+
+                oSurf%ADT%ADTree(i)%xMax(:) = oSurf%rBuffer(rSize + 1:rSize + 6)
+                rSize = rSize + 6
+            end do
+        end if
+
+        ! Build the KDTree
+        if (oSurf%nNodes > 0) then
+            oSurf%tree => kdtree2_create(oSurf%x)
+        end if
+
+        ! Build the inverse of the connectivity, the nodeToElem array.
+        oSurf%nte = 0
+        do i = 1, oSurf%nCells
+            do j = 1, 4
+                n = oSurf%conn(j, i)
+                inner: do k = 1, 4
+                    if (oSurf%nte(k, n) == 0) then
+                        oSurf%nte(k, n) = i
+                        exit inner
+                    end if
+                end do inner
+            end do
+        end do
+
+        ! Flag this oSurf as being allocated:
+        oSurf%allocated = .True.
+        deallocate (oSurf%rBuffer, oSurf%iBuffer)
+    end subroutine unpackOSurf
 end module oversetPackingRoutines
diff --git a/src/overset/oversetUtilities.F90 b/src/overset/oversetUtilities.F90
index a1f97a808..6e503abbd 100644
--- a/src/overset/oversetUtilities.F90
+++ b/src/overset/oversetUtilities.F90
@@ -2,2714 +2,2697 @@ module oversetUtilities
 contains
 #ifndef USE_TAPENADE
 
-  subroutine tic(index)
-    use constants
-    use oversetData, only : tStart
-    implicit none
-    integer(kind=intType), intent(in) :: index
-    tStart(index) = mpi_wtime()
-
-  end subroutine tic
-
-  subroutine toc(index)
-    use constants
-    use oversetData, only : tStart, oversetTimes
-    implicit none
-    integer(kind=intType), intent(in) :: index
-    oversetTimes(index) = oversetTimes(index) + mpi_wtime()- tStart(index)
-  end subroutine toc
-
-  subroutine unwindIndex(index, il, jl, kl, i, j, k)
-    ! Unwind a 1-based index based on the double halo size of 0:ib,
-    ! 0:jb, 0:kb
-    use constants
-    implicit none
-    integer(kind=intType), intent(in) ::index, il, jl, kl
-    integer(kind=intType), intent(out) :: i, j, k
-    integer(kind=intType) :: ID, isize, jsize, ksize
-    ID = index - 1
-    iSize = il+3
-    jSize = jl+3
-    kSize = kl+3
-    i = mod(ID, iSize)
-    j = mod(ID/iSize, jSize)
-    k = ID/(iSize*jSize)
-
-  end subroutine unwindIndex
-
-  function windIndex(i, j, k, il, jl, kl)
-
-    use constants
-    implicit none
-    integer(kind=intType),intent(in) :: i, j, k, il, jl, kl
-    integer(kind=intType) :: iSize, jSize, kSize, windIndex
-
-    iSize = il+3
-    jSize = jl+3
-    kSize = kl+3
-
-    windIndex = k*iSize*jSize + j*iSize + i +1
-
-  end function windIndex
-
-  subroutine printOverlapMatrix(overlap)
-
-    ! This is a debugging routine to print out the overlap matrix.
-    use constants
-    use communication, only : myid
-    use oversetData, only : CSRMatrix
-    implicit none
-
-    ! Input/output
-    type(CSRMatrix), intent(in) :: overlap
-
-    ! Working
-    integer(kind=intType) :: i, jj
-
-    if (myid == 0) then
-       ! Now dump out who owns what:
-       do i=1, overlap%nrow
-          write(*, "(a,I4, a)", advance='no') 'Row:', i, "   "
-          do jj=overlap%rowPtr(i), overlap%rowPtr(i+1)-1
-             write(*, "(a,I2, a, es10.5)", advance='no') "(", overlap%colInd(jj), ")", overlap%data(jj)
-          end do
-          write(*, *) " "
-       end do
-
-       print *, '--------------------------------------'
-       ! Now dump out who owns what:
-       do i=1, overlap%nRow
-          write(*, "(a,I4, a)", advance='no') 'Row:', i, "   "
-          do jj=overlap%rowPtr(i), overlap%rowPtr(i+1)-1
-             write(*, "(a,I2, a, I8)", advance='no') "(", overlap%colInd(jj), ")", int(overlap%assignedProc(jj))
-          end do
-          write(*, *) " "
-       end do
-    end if
-  end subroutine printOverlapMatrix
-
-  subroutine getCumulativeForm(sizeArray, n, cumArray)
-
-    use constants
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: n
-    integer(kind=intType), dimension(n), intent(in) :: sizeArray
-    integer(kind=intType), dimension(0:n), intent(out) :: cumArray
-
-    ! Working
-    integer(kind=intType) :: i
-
-    cumArray(0) = 0
-    do i=1, n
-       cumArray(i) = cumArray(i-1) + sizeArray(i)
-    end do
-
-  end subroutine getCumulativeForm
-
-  subroutine transposeOverlap(A, B)
-
-    ! Create the matrix Create the matrix transpose.
-    ! Inspired by: https://people.sc.fsu.edu/~jburkardt/f_src/sparsekit/sparsekit.f90
-    use constants
-    use oversetData, only : CSRMatrix
-    implicit none
-
-    ! Input/Output
-    type(CSRMatrix), intent(in) :: A
-    type(CSRMatrix), intent(inout) :: B
-
-    ! Working
-    integer(kind=intType) :: col, colp1, i, k, next
-
-    ! A CSR matrix is the same as a CSC matrix of the transpose. So
-    ! essentially the algorithm is convert A as a CSC matrix to B
-    ! (a CSR matrix)
-
-    ! Allocate space for everything in B
-    B%nnz = A%nnz
-    B%nRow = A%nCol
-    B%nCol = A%nRow
-    allocate(B%data(B%nnz), B%colInd(B%nnz), &
-         B%assignedProc(B%nnz), B%rowPtr(B%nRow + 1))
-    B%allocated = .True.
-    !  Compute lengths of rows of B (ie the columns of A)
-
-    B%rowPtr = 0
-
-    do i = 1, A%nRow
-       do k = A%rowPTr(i), A%rowPtr(i+1)-1
-          colp1 = A%colInd(k) +1
-          B%rowPtr(colp1) = B%rowPtr(colp1) + 1
-
-       end do
-    end do
-    !
-    !  Compute pointers from lengths.
-    !
-    B%rowPtr(1) = 1
-    do i = 1, A%nRow
-       B%rowPtr(i+1) = B%rowPtr(i) + B%rowPtr(i+1)
-    end do
-    !
-    !  Do the actual copying.
-    !
-    do i = 1, A%nRow
-       do k = A%rowPtr(i), A%rowPtr(i+1)-1
-          col = A%colInd(k)
-          next = B%rowPtr(col)
-
-          B%data(next) = A%data(k)
-          B%assignedProc(next) = A%assignedProc(k)
-
-          B%colInd(next) = i
-          B%rowPtr(col) = next + 1
-       end do
-    end do
-
-    !  Reshift B%rowPtr
-    do i = A%nRow, 1, -1
-       B%rowPtr(i+1) = B%rowPtr(i)
-    end do
-    B%rowPtr(1) = 1
-
-  end subroutine transposeOverlap
-
-  subroutine deallocateCSRMatrix(mat1)
-
-    use constants
-    use oversetData, only : CSRMatrix
-    implicit none
-
-    type(CSRMatrix), intent(inout) :: mat1
-
-    if (mat1%allocated) then
-       deallocate(&
-            mat1%data, &
-            mat1%colInd, &
-            mat1%rowPtr, &
-            mat1%assignedProc)
-    end if
-
-  end subroutine deallocateCSRMatrix
-
-  subroutine computeFringeProcArray(fringes, n, fringeProc, cumFringeProc, nFringeProc)
-    ! Compute the breakpoints "cumFringeProc" for a list of sorted n
-    ! fringes "fringes". nFringeProc is the total number of unique
-    ! processors. fringeProc is the processor number for each section.
-    use constants
-    use block, only : fringeType
-    use oversetData, only : CSRMatrix
-    use communication, only : nProc
-
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: n
-    type(fringeType), intent(in), dimension(n) :: fringes
-    integer(kind=intType), intent(out) :: nFringeProc
-    integer(kind=intType), intent(out) :: fringeProc(nProc), cumFringeProc(1:nProc+1)
-
-    ! Working
-    integer(kind=intType) :: i, currentProc
+    subroutine tic(index)
+        use constants
+        use oversetData, only: tStart
+        implicit none
+        integer(kind=intType), intent(in) :: index
+        tStart(index) = mpi_wtime()
+
+    end subroutine tic
+
+    subroutine toc(index)
+        use constants
+        use oversetData, only: tStart, oversetTimes
+        implicit none
+        integer(kind=intType), intent(in) :: index
+        oversetTimes(index) = oversetTimes(index) + mpi_wtime() - tStart(index)
+    end subroutine toc
+
+    subroutine unwindIndex(index, il, jl, kl, i, j, k)
+        ! Unwind a 1-based index based on the double halo size of 0:ib,
+        ! 0:jb, 0:kb
+        use constants
+        implicit none
+        integer(kind=intType), intent(in) :: index, il, jl, kl
+        integer(kind=intType), intent(out) :: i, j, k
+        integer(kind=intType) :: ID, isize, jsize, ksize
+        ID = index - 1
+        iSize = il + 3
+        jSize = jl + 3
+        kSize = kl + 3
+        i = mod(ID, iSize)
+        j = mod(ID / iSize, jSize)
+        k = ID / (iSize * jSize)
+
+    end subroutine unwindIndex
+
+    function windIndex(i, j, k, il, jl, kl)
+
+        use constants
+        implicit none
+        integer(kind=intType), intent(in) :: i, j, k, il, jl, kl
+        integer(kind=intType) :: iSize, jSize, kSize, windIndex
+
+        iSize = il + 3
+        jSize = jl + 3
+        kSize = kl + 3
+
+        windIndex = k * iSize * jSize + j * iSize + i + 1
+
+    end function windIndex
+
+    subroutine printOverlapMatrix(overlap)
+
+        ! This is a debugging routine to print out the overlap matrix.
+        use constants
+        use communication, only: myid
+        use oversetData, only: CSRMatrix
+        implicit none
+
+        ! Input/output
+        type(CSRMatrix), intent(in) :: overlap
+
+        ! Working
+        integer(kind=intType) :: i, jj
+
+        if (myid == 0) then
+            ! Now dump out who owns what:
+            do i = 1, overlap%nrow
+                write (*, "(a,I4, a)", advance='no') 'Row:', i, "   "
+                do jj = overlap%rowPtr(i), overlap%rowPtr(i + 1) - 1
+                    write (*, "(a,I2, a, es10.5)", advance='no') "(", overlap%colInd(jj), ")", overlap%data(jj)
+                end do
+                write (*, *) " "
+            end do
 
-    fringeProc = -1
-    nFringeProc = 0
-    cumFringeProc(1) = 1
-    currentProc = -1
-
-    do i=1, n
-       if (currentProc /= fringes(i)%donorProc) then
-          nFringeProc = nFringeProc + 1
-          cumFringeProc(nFringeProc) = i
-          fringeProc(nFringeProc) = fringes(i)%donorProc
-          currentProc = fringes(i)%donorProc
-       end if
-    end do
-
-    ! Finally, the nFringeProc+1 entry is always n+1
-    cumFringeProc(nFringeProc+1) = n + 1
-
-  end subroutine computeFringeProcArray
-
-
-  subroutine deallocateOBlocks(oBlocks, n)
-
-    ! This subroutine deallocates all data stores in a list of oBlocks
-    use constants
-    use adtBuild, only : destroySerialHex
-    use oversetData, only : oversetBlock
-    implicit none
-
-    ! Input Params
-    integer(kind=intType) :: n
-    type(oversetBlock), dimension(n), intent(inout) :: oBLocks
-
-    ! Working Parameters
-    integer(kind=intType) :: i
-
-    do i=1, n
-
-       ! oBlock:
-       if (oblocks(i)%allocated) then
-          deallocate(&
-               oBlocks(i)%hexaConn, &
-               oBlocks(i)%globalCell, &
-               oBLocks(i)%nearWall, &
-               oBLocks(i)%invalidDonor, &
-               oBlocks(i)%qualDonor, &
-               oBlocks(i)%xADT)
-          if (allocated(oblocks(i)%rbuffer)) then
-             deallocate(oBlocks(i)%rBuffer, &
-                  oBlocks(i)%iBuffer)
-          end if
-          call destroySerialHex(oBlocks(i)%ADT)
-       end if
-    end do
-  end subroutine deallocateOBlocks
-
-  subroutine deallocateOFringes(oFringes, n)
-
-    ! This subroutine deallocates all data stores in a list of oFringes
-    use constants
-    use oversetData, only : oversetFringe
-    implicit none
-
-    ! Input Params
-    integer(kind=intType) :: n
-    type(oversetFringe), dimension(n), intent(inout) :: oFringes
-
-    ! Working Parameters
-    integer(kind=intType) :: i
-
-    do i=1, n
-       if (allocated(oFringes(i)%x)) &
-            deallocate(oFringes(i)%x)
-
-       if (allocated(oFringes(i)%isWall)) &
-            deallocate(oFringes(i)%isWall)
-
-       if (allocated(oFringes(i)%xSeed)) &
-            deallocate(oFringes(i)%xSeed)
-
-       if (allocated(oFringes(i)%wallInd)) &
-            deallocate(oFringes(i)%wallInd)
-
-       if (allocated(oFringes(i)%rbuffer)) &
-            deallocate(oFringes(i)%rBuffer)
-
-       if (allocated(oFringes(i)%ibuffer)) &
-            deallocate(oFringes(i)%iBuffer)
-
-       if (associated(oFringes(i)%fringeIntBuffer)) &
-          deallocate(oFringes(i)%fringeIntBuffer)
-
-       if (associated(oFringes(i)%fringeRealBuffer)) &
-          deallocate(oFringes(i)%fringeRealBuffer)
-
-       oFringes(i)%allocated = .False.
-    end do
-  end subroutine deallocateOFringes
-
-  subroutine deallocateOSurfs(oSurfs, n)
-
-    ! This subroutine deallocates all data stores in a list of oSurfs
-
-    use constants
-    use adtBuild, only : destroySerialQuad
-    use oversetData, only : oversetWall
-    use kdtree2_module, only : kdtree2destroy
-    implicit none
-
-    ! Input Params
-    integer(kind=intType) :: n
-    type(oversetWall), dimension(n), intent(inout) :: oSurfs
-
-    ! Working Parameters
-    integer(kind=intType) :: i
-
-    do i=1, n
-       if (oSurfs(i)%allocated) then
-          deallocate(&
-               oSurfs(i)%x, &
-               oSurfs(i)%conn, &
-               oSurfs(i)%iblank, &
-               oSurfs(i)%cellPtr)
-          call destroySerialQuad(oSurfs(i)%ADT)
-          if (oSurfs(i)%nNodes > 0) then
-             call kdtree2destroy(oSurfs(i)%tree)
-          end if
-       end if
-       oSurfs(i)%allocated = .False.
-    end do
-  end subroutine deallocateOSurfs
-
-  subroutine wallsOnBlock(wallsPresent)
-
-    use constants
-    use blockPointers, only : nBkGlobal
-    use cgnsGrid, only : cgnsDoms
-    implicit none
-
-    logical, intent(out) :: wallsPresent
-    integer(kind=intType) :: mm
-    wallsPresent = .False.
-    ! Check THE ORIGINAL CGNS blocks for BCs, because the block may have
-    ! been split.
-    do mm=1, cgnsDoms(nbkGlobal)%nBocos
-       if (cgnsDoms(nbkGlobal)%bocoInfo(mm)%BCType == NSWallAdiabatic .or. &
-            cgnsDoms(nbkGlobal)%bocoInfo(mm)%BCType == NSWallIsothermal .or. &
-            cgnsDoms(nbkGlobal)%bocoInfo(mm)%BCType == EulerWall) then
-          wallsPresent = .True.
-       end if
-    end do
-  end subroutine wallsOnBlock
-
-  subroutine flagForcedRecv
-
-    use constants
-    use blockPointers, only : nx, ny, nz, ie, je, ke, BCData, BCFaceID, nBocos, BCType, &
-         forcedRecv, flowDoms, nDom, il, jl, kl, iBlank, status
-    use utils, only : setPointers
-    use communication
-    use haloExchange, only : whalo1to1IntGeneric, whalo1to1IntGeneric_b
-    use stencils
-    implicit none
-
-    ! This is generic routine for filling up a 3D array of 1st level halos
-    ! cells (1:ie, 1:je, 1:ke) indicating cells that are forced
-    ! receivers. BlockPointers must have already been set.
-
-    integer(kind=intType) :: nn, i, j, k, mm, iStart, iEnd, jStart, jEnd, kStart, kEnd
-    integer(kind=intType) :: ii, jj, kk, i_stencil
-    logical :: floodOrBlank, floodOrBlank2
-    do nn=1,nDom
-       call setPointers(nn, 1, 1)
-       forcedRecv = 0
-       do mm=1,nBocos
-          ! Just record the ranges necessarvy and we'll add in a generic
-          ! loop. Why is it the first three? Well, the first level of halos
-          ! off of an overset outer bound is completely
-          ! meaningless. Essentially we ignore those. So the outer two
-          ! layers of cells are indices 2 and 3. Therefore the first 3 on
-          ! either side need to be flagged as invalid.
-
-          select case (BCFaceID(mm))
-          case (iMin)
-             iStart=1; iEnd=3;
-             jStart=BCData(mm)%inBeg+1; jEnd=BCData(mm)%inEnd
-             kStart=BCData(mm)%jnBeg+1; kEnd=BCData(mm)%jnEnd
-          case (iMax)
-             iStart=nx; iEnd=ie;
-             jStart=BCData(mm)%inBeg+1; jEnd=BCData(mm)%inEnd
-             kStart=BCData(mm)%jnBeg+1; kEnd=BCData(mm)%jnEnd
-          case (jMin)
-             iStart=BCData(mm)%inBeg+1; iEnd=BCData(mm)%inEnd
-             jStart=1; jEnd=3;
-             kStart=BCData(mm)%jnBeg+1; kEnd=BCData(mm)%jnEnd
-          case (jMax)
-             iStart=BCData(mm)%inBeg+1; iEnd=BCData(mm)%inEnd
-             jStart=ny; jEnd=je;
-             kStart=BCData(mm)%jnBeg+1; kEnd=BCData(mm)%jnEnd
-          case (kMin)
-             iStart=BCData(mm)%inBeg+1; iEnd=BCData(mm)%inEnd
-             jStart=BCData(mm)%jnBeg+1; jEnd=BCData(mm)%jnEnd
-             kStart=1; kEnd=3;
-          case (kMax)
-             iStart=BCData(mm)%inBeg+1; iEnd=BCData(mm)%inEnd
-             jStart=BCData(mm)%jnBeg+1; jEnd=BCData(mm)%jnEnd
-             kStart=nz; kEnd=ke;
-          end select
-
-          if (BCType(mm) == OversetOuterBound) then
-             do k=kStart, kEnd
-                do j=jStart, jEnd
-                   do i=iStart, iEnd
-                      forcedRecv(i, j, k) = 1
-                   end do
+            print *, '--------------------------------------'
+            ! Now dump out who owns what:
+            do i = 1, overlap%nRow
+                write (*, "(a,I4, a)", advance='no') 'Row:', i, "   "
+                do jj = overlap%rowPtr(i), overlap%rowPtr(i + 1) - 1
+                    write (*, "(a,I2, a, I8)", advance='no') "(", overlap%colInd(jj), ")", int(overlap%assignedProc(jj))
                 end do
-             end do
-          end if
-       end do
-
-       ! Add to the invalid donor list if it got flooded with iblank of -2 or -3:
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                ! Flooded or explictly blanked cell
-                floodOrBlank = isFlooded(status(i,j,k)) .or. &
-                     isFloodSeed(status(i,j,k)) .or.&
-                     iblank(i,j,k) == -4
-                if (floodOrBlank) then
-                   stencilLoop: do i_stencil=1, N_visc_drdw
-                      ii = visc_drdw_stencil(i_stencil, 1) + i
-                      jj = visc_drdw_stencil(i_stencil, 2) + j
-                      kk = visc_drdw_stencil(i_stencil, 3) + k
-                      ! Flag as a forced reciver if it *isn't* flooded
-                      ! or explictly blanked
-
-                      floodOrBlank2 = isFlooded(status(ii,jj,kk)) .or. &
-                           isFloodSeed(status(ii,jj,kk)) .or.&
-                           iblank(ii,jj,kk) == -4
-
-                      if (.not. floodOrBlank2) then
-                         forcedRecv(ii, jj, kk) = 1
-                      end if
-                   end do stencilLoop
-                end if
-             end do
-          end do
-       end do
-    end do
-
-    ! Update the info across block boundaries
-    domainLoop:do nn=1, nDom
-       flowDoms(nn, 1, 1)%intCommVars(1)%var => &
-            flowDoms(nn, 1, 1)%forcedRecv(:, :, :)
-    end do domainLoop
-
-    ! Run the reverse halo exchange first. This is necessary if there
-    ! is 1 cell wide block next to a overset outer bound like the following:
-    !
-    !         Blk1         Blk2
-    !   ----+-----+------++-----+
-    !       |     |      ||     |    <= this face has overset outer bound BC
-    !   ----+-----+------++-----+
-    !       |     |      ||     |
-    !   ----+-----+------++-----+
-    !       |     |      ||     |
-    !   ----+-----+------++-----+
-    !                    ^block boundary
-    !
-    ! So what happens, is blk2 (1 cell wide) sets the two layers of
-    ! cells off of the BC as forced receivers. However, the second of
-    ! those layers is a halo cell. Blk1 then never gets this
-    ! information. as it clearly should. So what we have to do is a
-    ! reverse halo exchange that takes halo information and combines
-    ! it with real cell information. Essentially we will accumulate
-    ! forcedRecv from the halo to the real cell. For this we run the
-    ! generic reverse halo exchange.
-
-    call wHalo1to1IntGeneric_b(1, 1, 1, commPatternCell_2nd, internalCell_2nd)
-
-    ! Finally we now need to run the forward halo exchange to make
-    ! sure any halos on other procs are set correctly that may be part of a stencil
-    call wHalo1to1IntGeneric(1, 1, 1, commPatternCell_2nd, internalCell_2nd)
-
-  end subroutine flagForcedRecv
-
-  ! Utility function for unpacking/accessing the status variable
-
-  function isDonor(i)
-    use constants
-    implicit none
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
-    integer(kind=intType), intent(in) :: i
-    call getStatus(i, isDonor, isDonor, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-  end function isDonor
-
-  function isHole(i)
-    use constants
-    implicit none
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
-    integer(kind=intType), intent(in) :: i
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-  end function isHole
-
-  function isCompute(i)
-    use constants
-    implicit none
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
-    integer(kind=intType), intent(in) :: i
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-  end function isCompute
-
-  function isFloodSeed(i)
-    use constants
-    implicit none
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
-    integer(kind=intType), intent(in) :: i
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-  end function isFloodSeed
-
-  function isFlooded(i)
-    use constants
-    implicit none
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
-    integer(kind=intType), intent(in) :: i
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-  end function isFlooded
-
-  function isWallDonor(i)
-    use constants
-    implicit none
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
-    integer(kind=intType), intent(in) :: i
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-  end function isWallDonor
-
-  function isReceiver(i)
-    use constants
-    implicit none
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
-    integer(kind=intType), intent(in) :: i
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-  end function isReceiver
-
-  subroutine setIsDonor(i, flag)
-    use constants
-    implicit none
-    integer(kind=intType), intent(inout) :: i
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-    call setStatus(i, flag   , isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isREceiver)
-  end subroutine setIsDonor
-
-  subroutine setIsHole(i, flag)
-    use constants
-    implicit none
-    integer(kind=intType), intent(inout) :: i
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-    call setStatus(i, isDonor, flag  , isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-  end subroutine setIsHole
-
-  subroutine setIsCompute(i, flag)
-    use constants
-    implicit none
-    integer(kind=intType), intent(inout) :: i
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-    call setStatus(i, isDonor, isHole, flag     , isFloodSeed, isFlooded, isWallDonor, isReceiver)
-  end subroutine setIsCompute
-
-  subroutine setIsFloodSeed(i, flag)
-    use constants
-    implicit none
-    integer(kind=intType), intent(inout) :: i
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-    call setStatus(i, isDonor, isHole, isCompute, flag       , isFlooded, isWallDonor, isReceiver)
-  end subroutine setIsFloodSeed
-
-  subroutine setIsFlooded(i, flag)
-    use constants
-    implicit none
-    integer(kind=intType), intent(inout) :: i
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-    call setStatus(i, isDonor, isHole, isCompute, isFloodSeed, flag     , isWallDonor, isReceiver)
-  end subroutine setIsFlooded
-
-  subroutine setIsWallDonor(i, flag)
-    use constants
-    implicit none
-    integer(kind=intType), intent(inout) :: i
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-    call setStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, flag,        isReceiver)
-  end subroutine setIsWallDonor
-
-  subroutine setIsReceiver(i, flag)
-    use constants
-    implicit none
-    integer(kind=intType), intent(inout) :: i
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
-    call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-    call setStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, flag)
-  end subroutine setIsReceiver
-
-  subroutine setStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-
-    use constants
-    implicit none
-    integer(kind=intType), intent(out) :: i
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
-    i = 0
-
-    if (isDonor  )   i = i + 1
-    if (isHole   )   i = i + 2
-    if (isCompute)   i = i + 4
-    if (isFloodSeed) i = i + 8
-    if (isFlooded  ) i = i + 16
-    if (isWallDonor) i = i + 32
-    if (isReceiver)  i = i + 64
-  end subroutine setStatus
-
-  subroutine getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
-
-    use constants
-    implicit none
-    logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
-    integer(kind=intType) :: i, j
-    j = i
-
-    isDonor = .False.
-    isHole = .False.
-    isCompute = .False.
-    isFloodSeed = .False.
-    isFlooded = .False.
-    isWallDonor = .False.
-    isReceiver = .False.
-
-    if (j/64 > 0) then
-       isReceiver = .True.
-       j = j - 64
-    end if
-
-    if (j/32 > 0) then
-       isWallDonor = .True.
-       j = j - 32
-    end if
-
-    if (j/16 > 0) then
-       isFlooded = .True.
-       j = j - 16
-    end if
-
-    if (j/8 > 0) then
-       isFloodSeed = .True.
-       j = j - 8
-    end if
-
-    if (j/4 > 0) then
-       isCompute = .True.
-       j = j - 4
-    end if
-
-    if (j/2 > 0) then
-       isHole = .True.
-       j = j - 2
-    end if
-
-    if (j/1 > 0) then
-       isDonor = .True.
-       j = j - 1
-    end if
-  end subroutine getStatus
-
-  !
-  subroutine binSearchNodes(arr, searchNode, nn, searchInd)
-
-    !  binSearchNodes does binary search for a node 'searchNode'
-    !  in arr(1:nn) and returns index 'searchInd' where
-    !  'searchNode' lies in arr. searchInd = -1 if not found.
-
-    use constants
-    implicit none
-
-    ! Input parameters
-    integer(kind=intType), intent(in) :: nn, searchNode
-    integer(kind=intType), intent(out) :: searchInd
-    integer(kind=intType), intent(in) :: arr(nn)
-
-    ! Local variables
-    integer(kind=intType) :: first, last, middle
-
-    first = 1
-    last = nn
-
-    middle = (first+last)/2
-
-    do while (first <= last)
-       if (arr(middle) < searchNode) then
-          first = middle + 1
-       else if (arr(middle) == searchNode) then
-          searchInd = middle
-          exit
-       else
-          last = middle -1
-       end if
-
-       middle = (first+last)/2
-    end do !while
-
-    if (first > last) then
-       searchInd = -1
-       print*, ' binSearchNode fails for searchNode ',searchNode
-       STOP
-    end if
-  end subroutine binSearchNodes
-
-  subroutine binSearchPocketEdgeType(arr, search, nn, searchInd)
-
-    !  binSearchPocketEdgeType does binary searche for
-    !  pocketEdgeType 'search' edge and returns index 'searchInd'
-    !  where 'search' lies in arr.
-
-    use constants
-    use oversetData ! cannot use only becuase of <= operator
-    implicit none
-
-    ! Input parameters
-    integer(kind=intType), intent(in) :: nn
-    integer(kind=intType), intent(out) :: searchInd
-
-    type(pocketEdge), intent(in) :: search
-    type(pocketEdge), dimension(*), intent(in) :: arr
-
-    ! Local variables
-    integer(kind=intType) :: first, last, middle
-
-    first = 1
-    last = nn
-
-    middle = (first+last)/2
-
-    do while (first <= last)
-       if (arr(middle) < search) then
-          first = middle + 1
-       else if (arr(middle) == search) then
-          searchInd = middle
-          exit
-       else
-          last = middle -1
-       end if
-
-       middle = (first+last)/2
-    end do !while
-
-    if (first > last) then
-       print*, ' binSearchPocketEdgeType fails for Edge with nodes ',&
-            search%n1, search%n2
-       STOP
-    end if
-  end subroutine binSearchPocketEdgeType
-
-  !
-  subroutine qsortEdgeType(arr, nn)
-    !
-    !       qsortEdgeType sorts the given number of oversetString master
-    !       Edges in increasing order based on the <= operator for this
-    !       derived data type.
-    !       (Generously copied from qsortFringeType.F90)
-    !
-    use constants
-    use oversetData ! cannot use only becuase of <= operator
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
+                write (*, *) " "
+            end do
+        end if
+    end subroutine printOverlapMatrix
 
-    type(oversetEdge), dimension(*), intent(inout) :: arr
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), parameter :: m = 7
+    subroutine getCumulativeForm(sizeArray, n, cumArray)
 
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
+        use constants
+        implicit none
 
-    integer :: ierr
+        ! Input/Output
+        integer(kind=intType), intent(in) :: n
+        integer(kind=intType), dimension(n), intent(in) :: sizeArray
+        integer(kind=intType), dimension(0:n), intent(out) :: cumArray
 
-    type(oversetEdge) :: a, tmp
+        ! Working
+        integer(kind=intType) :: i
 
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
+        cumArray(0) = 0
+        do i = 1, n
+            cumArray(i) = cumArray(i - 1) + sizeArray(i)
+        end do
 
-    ! Allocate the memory for stack.
+    end subroutine getCumulativeForm
 
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("qsortEdgeType", &
-         "Memory allocation failure for stack")
+    subroutine transposeOverlap(A, B)
 
-    ! Initialize the variables that control the sorting.
+        ! Create the matrix Create the matrix transpose.
+        ! Inspired by: https://people.sc.fsu.edu/~jburkardt/f_src/sparsekit/sparsekit.f90
+        use constants
+        use oversetData, only: CSRMatrix
+        implicit none
 
-    jStack = 0
-    l      = 1
-    r      = nn
+        ! Input/Output
+        type(CSRMatrix), intent(in) :: A
+        type(CSRMatrix), intent(inout) :: B
 
-    ! Start of the algorithm
+        ! Working
+        integer(kind=intType) :: col, colp1, i, k, next
 
-    do
+        ! A CSR matrix is the same as a CSC matrix of the transpose. So
+        ! essentially the algorithm is convert A as a CSC matrix to B
+        ! (a CSR matrix)
 
-       ! Check for the size of the subarray.
+        ! Allocate space for everything in B
+        B%nnz = A%nnz
+        B%nRow = A%nCol
+        B%nCol = A%nRow
+        allocate (B%data(B%nnz), B%colInd(B%nnz), &
+                  B%assignedProc(B%nnz), B%rowPtr(B%nRow + 1))
+        B%allocated = .True.
+        !  Compute lengths of rows of B (ie the columns of A)
 
-       if((r-l) < m) then
+        B%rowPtr = 0
 
-          ! Perform insertion sort
+        do i = 1, A%nRow
+            do k = A%rowPTr(i), A%rowPtr(i + 1) - 1
+                colp1 = A%colInd(k) + 1
+                B%rowPtr(colp1) = B%rowPtr(colp1) + 1
 
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+            end do
+        end do
+        !
+        !  Compute pointers from lengths.
+        !
+        B%rowPtr(1) = 1
+        do i = 1, A%nRow
+            B%rowPtr(i + 1) = B%rowPtr(i) + B%rowPtr(i + 1)
+        end do
+        !
+        !  Do the actual copying.
+        !
+        do i = 1, A%nRow
+            do k = A%rowPtr(i), A%rowPtr(i + 1) - 1
+                col = A%colInd(k)
+                next = B%rowPtr(col)
+
+                B%data(next) = A%data(k)
+                B%assignedProc(next) = A%assignedProc(k)
+
+                B%colInd(next) = i
+                B%rowPtr(col) = next + 1
+            end do
+        end do
+
+        !  Reshift B%rowPtr
+        do i = A%nRow, 1, -1
+            B%rowPtr(i + 1) = B%rowPtr(i)
+        end do
+        B%rowPtr(1) = 1
+
+    end subroutine transposeOverlap
+
+    subroutine deallocateCSRMatrix(mat1)
+
+        use constants
+        use oversetData, only: CSRMatrix
+        implicit none
+
+        type(CSRMatrix), intent(inout) :: mat1
+
+        if (mat1%allocated) then
+            deallocate ( &
+                mat1%data, &
+                mat1%colInd, &
+                mat1%rowPtr, &
+                mat1%assignedProc)
+        end if
+
+    end subroutine deallocateCSRMatrix
+
+    subroutine computeFringeProcArray(fringes, n, fringeProc, cumFringeProc, nFringeProc)
+        ! Compute the breakpoints "cumFringeProc" for a list of sorted n
+        ! fringes "fringes". nFringeProc is the total number of unique
+        ! processors. fringeProc is the processor number for each section.
+        use constants
+        use block, only: fringeType
+        use oversetData, only: CSRMatrix
+        use communication, only: nProc
+
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: n
+        type(fringeType), intent(in), dimension(n) :: fringes
+        integer(kind=intType), intent(out) :: nFringeProc
+        integer(kind=intType), intent(out) :: fringeProc(nProc), cumFringeProc(1:nProc + 1)
+
+        ! Working
+        integer(kind=intType) :: i, currentProc
+
+        fringeProc = -1
+        nFringeProc = 0
+        cumFringeProc(1) = 1
+        currentProc = -1
+
+        do i = 1, n
+            if (currentProc /= fringes(i)%donorProc) then
+                nFringeProc = nFringeProc + 1
+                cumFringeProc(nFringeProc) = i
+                fringeProc(nFringeProc) = fringes(i)%donorProc
+                currentProc = fringes(i)%donorProc
+            end if
+        end do
+
+        ! Finally, the nFringeProc+1 entry is always n+1
+        cumFringeProc(nFringeProc + 1) = n + 1
+
+    end subroutine computeFringeProcArray
+
+    subroutine deallocateOBlocks(oBlocks, n)
+
+        ! This subroutine deallocates all data stores in a list of oBlocks
+        use constants
+        use adtBuild, only: destroySerialHex
+        use oversetData, only: oversetBlock
+        implicit none
+
+        ! Input Params
+        integer(kind=intType) :: n
+        type(oversetBlock), dimension(n), intent(inout) :: oBLocks
+
+        ! Working Parameters
+        integer(kind=intType) :: i
+
+        do i = 1, n
+
+            ! oBlock:
+            if (oblocks(i)%allocated) then
+                deallocate ( &
+                    oBlocks(i)%hexaConn, &
+                    oBlocks(i)%globalCell, &
+                    oBLocks(i)%nearWall, &
+                    oBLocks(i)%invalidDonor, &
+                    oBlocks(i)%qualDonor, &
+                    oBlocks(i)%xADT)
+                if (allocated(oblocks(i)%rbuffer)) then
+                    deallocate (oBlocks(i)%rBuffer, &
+                                oBlocks(i)%iBuffer)
+                end if
+                call destroySerialHex(oBlocks(i)%ADT)
+            end if
+        end do
+    end subroutine deallocateOBlocks
+
+    subroutine deallocateOFringes(oFringes, n)
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+        ! This subroutine deallocates all data stores in a list of oFringes
+        use constants
+        use oversetData, only: oversetFringe
+        implicit none
 
-          if(jStack == 0) exit
+        ! Input Params
+        integer(kind=intType) :: n
+        type(oversetFringe), dimension(n), intent(inout) :: oFringes
 
-          ! Pop stack and begin a new round of partitioning.
+        ! Working Parameters
+        integer(kind=intType) :: i
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+        do i = 1, n
+            if (allocated(oFringes(i)%x)) &
+                deallocate (oFringes(i)%x)
 
-       else
+            if (allocated(oFringes(i)%isWall)) &
+                deallocate (oFringes(i)%isWall)
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a.
-          ! Also rearrange so that (l) <= (l+1) <= (r).
+            if (allocated(oFringes(i)%xSeed)) &
+                deallocate (oFringes(i)%xSeed)
 
-          k = (l+r)/2
-          tmp      = arr(k)             ! Swap the elements
-          arr(k)   = arr(l+1)           ! k and l+1.
-          arr(l+1) = tmp
+            if (allocated(oFringes(i)%wallInd)) &
+                deallocate (oFringes(i)%wallInd)
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+            if (allocated(oFringes(i)%rbuffer)) &
+                deallocate (oFringes(i)%rBuffer)
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+            if (allocated(oFringes(i)%ibuffer)) &
+                deallocate (oFringes(i)%iBuffer)
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+            if (associated(oFringes(i)%fringeIntBuffer)) &
+                deallocate (oFringes(i)%fringeIntBuffer)
 
-          ! Initialize the pointers for partitioning.
+            if (associated(oFringes(i)%fringeRealBuffer)) &
+                deallocate (oFringes(i)%fringeRealBuffer)
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+            oFringes(i)%allocated = .False.
+        end do
+    end subroutine deallocateOFringes
 
-          ! The innermost loop
+    subroutine deallocateOSurfs(oSurfs, n)
 
-          do
+        ! This subroutine deallocates all data stores in a list of oSurfs
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+        use constants
+        use adtBuild, only: destroySerialQuad
+        use oversetData, only: oversetWall
+        use kdtree2_module, only: kdtree2destroy
+        implicit none
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+        ! Input Params
+        integer(kind=intType) :: n
+        type(oversetWall), dimension(n), intent(inout) :: oSurfs
 
-             ! Exit the loop in case the pointers i and j crossed.
+        ! Working Parameters
+        integer(kind=intType) :: i
+
+        do i = 1, n
+            if (oSurfs(i)%allocated) then
+                deallocate ( &
+                    oSurfs(i)%x, &
+                    oSurfs(i)%conn, &
+                    oSurfs(i)%iblank, &
+                    oSurfs(i)%cellPtr)
+                call destroySerialQuad(oSurfs(i)%ADT)
+                if (oSurfs(i)%nNodes > 0) then
+                    call kdtree2destroy(oSurfs(i)%tree)
+                end if
+            end if
+            oSurfs(i)%allocated = .False.
+        end do
+    end subroutine deallocateOSurfs
+
+    subroutine wallsOnBlock(wallsPresent)
+
+        use constants
+        use blockPointers, only: nBkGlobal
+        use cgnsGrid, only: cgnsDoms
+        implicit none
+
+        logical, intent(out) :: wallsPresent
+        integer(kind=intType) :: mm
+        wallsPresent = .False.
+        ! Check THE ORIGINAL CGNS blocks for BCs, because the block may have
+        ! been split.
+        do mm = 1, cgnsDoms(nbkGlobal)%nBocos
+            if (cgnsDoms(nbkGlobal)%bocoInfo(mm)%BCType == NSWallAdiabatic .or. &
+                cgnsDoms(nbkGlobal)%bocoInfo(mm)%BCType == NSWallIsothermal .or. &
+                cgnsDoms(nbkGlobal)%bocoInfo(mm)%BCType == EulerWall) then
+                wallsPresent = .True.
+            end if
+        end do
+    end subroutine wallsOnBlock
+
+    subroutine flagForcedRecv
+
+        use constants
+        use blockPointers, only: nx, ny, nz, ie, je, ke, BCData, BCFaceID, nBocos, BCType, &
+                                 forcedRecv, flowDoms, nDom, il, jl, kl, iBlank, status
+        use utils, only: setPointers
+        use communication
+        use haloExchange, only: whalo1to1IntGeneric, whalo1to1IntGeneric_b
+        use stencils
+        implicit none
+
+        ! This is generic routine for filling up a 3D array of 1st level halos
+        ! cells (1:ie, 1:je, 1:ke) indicating cells that are forced
+        ! receivers. BlockPointers must have already been set.
+
+        integer(kind=intType) :: nn, i, j, k, mm, iStart, iEnd, jStart, jEnd, kStart, kEnd
+        integer(kind=intType) :: ii, jj, kk, i_stencil
+        logical :: floodOrBlank, floodOrBlank2
+        do nn = 1, nDom
+            call setPointers(nn, 1, 1)
+            forcedRecv = 0
+            do mm = 1, nBocos
+                ! Just record the ranges necessarvy and we'll add in a generic
+                ! loop. Why is it the first three? Well, the first level of halos
+                ! off of an overset outer bound is completely
+                ! meaningless. Essentially we ignore those. So the outer two
+                ! layers of cells are indices 2 and 3. Therefore the first 3 on
+                ! either side need to be flagged as invalid.
+
+                select case (BCFaceID(mm))
+                case (iMin)
+                    iStart = 1; iEnd = 3; 
+                    jStart = BCData(mm)%inBeg + 1; jEnd = BCData(mm)%inEnd
+                    kStart = BCData(mm)%jnBeg + 1; kEnd = BCData(mm)%jnEnd
+                case (iMax)
+                    iStart = nx; iEnd = ie; 
+                    jStart = BCData(mm)%inBeg + 1; jEnd = BCData(mm)%inEnd
+                    kStart = BCData(mm)%jnBeg + 1; kEnd = BCData(mm)%jnEnd
+                case (jMin)
+                    iStart = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+                    jStart = 1; jEnd = 3; 
+                    kStart = BCData(mm)%jnBeg + 1; kEnd = BCData(mm)%jnEnd
+                case (jMax)
+                    iStart = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+                    jStart = ny; jEnd = je; 
+                    kStart = BCData(mm)%jnBeg + 1; kEnd = BCData(mm)%jnEnd
+                case (kMin)
+                    iStart = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+                    jStart = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                    kStart = 1; kEnd = 3; 
+                case (kMax)
+                    iStart = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+                    jStart = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                    kStart = nz; kEnd = ke; 
+                end select
+
+                if (BCType(mm) == OversetOuterBound) then
+                    do k = kStart, kEnd
+                        do j = jStart, jEnd
+                            do i = iStart, iEnd
+                                forcedRecv(i, j, k) = 1
+                            end do
+                        end do
+                    end do
+                end if
+            end do
 
-             if(j < i) exit
+            ! Add to the invalid donor list if it got flooded with iblank of -2 or -3:
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        ! Flooded or explictly blanked cell
+                        floodOrBlank = isFlooded(status(i, j, k)) .or. &
+                                       isFloodSeed(status(i, j, k)) .or. &
+                                       iblank(i, j, k) == -4
+                        if (floodOrBlank) then
+                            stencilLoop: do i_stencil = 1, N_visc_drdw
+                                ii = visc_drdw_stencil(i_stencil, 1) + i
+                                jj = visc_drdw_stencil(i_stencil, 2) + j
+                                kk = visc_drdw_stencil(i_stencil, 3) + k
+                                ! Flag as a forced reciver if it *isn't* flooded
+                                ! or explictly blanked
+
+                                floodOrBlank2 = isFlooded(status(ii, jj, kk)) .or. &
+                                                isFloodSeed(status(ii, jj, kk)) .or. &
+                                                iblank(ii, jj, kk) == -4
+
+                                if (.not. floodOrBlank2) then
+                                    forcedRecv(ii, jj, kk) = 1
+                                end if
+                            end do stencilLoop
+                        end if
+                    end do
+                end do
+            end do
+        end do
+
+        ! Update the info across block boundaries
+        domainLoop: do nn = 1, nDom
+            flowDoms(nn, 1, 1)%intCommVars(1)%var => &
+                flowDoms(nn, 1, 1)%forcedRecv(:, :, :)
+        end do domainLoop
+
+        ! Run the reverse halo exchange first. This is necessary if there
+        ! is 1 cell wide block next to a overset outer bound like the following:
+        !
+        !         Blk1         Blk2
+        !   ----+-----+------++-----+
+        !       |     |      ||     |    <= this face has overset outer bound BC
+        !   ----+-----+------++-----+
+        !       |     |      ||     |
+        !   ----+-----+------++-----+
+        !       |     |      ||     |
+        !   ----+-----+------++-----+
+        !                    ^block boundary
+        !
+        ! So what happens, is blk2 (1 cell wide) sets the two layers of
+        ! cells off of the BC as forced receivers. However, the second of
+        ! those layers is a halo cell. Blk1 then never gets this
+        ! information. as it clearly should. So what we have to do is a
+        ! reverse halo exchange that takes halo information and combines
+        ! it with real cell information. Essentially we will accumulate
+        ! forcedRecv from the halo to the real cell. For this we run the
+        ! generic reverse halo exchange.
+
+        call wHalo1to1IntGeneric_b(1, 1, 1, commPatternCell_2nd, internalCell_2nd)
+
+        ! Finally we now need to run the forward halo exchange to make
+        ! sure any halos on other procs are set correctly that may be part of a stencil
+        call wHalo1to1IntGeneric(1, 1, 1, commPatternCell_2nd, internalCell_2nd)
+
+    end subroutine flagForcedRecv
+
+    ! Utility function for unpacking/accessing the status variable
+
+    function isDonor(i)
+        use constants
+        implicit none
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
+        integer(kind=intType), intent(in) :: i
+        call getStatus(i, isDonor, isDonor, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+    end function isDonor
+
+    function isHole(i)
+        use constants
+        implicit none
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
+        integer(kind=intType), intent(in) :: i
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+    end function isHole
+
+    function isCompute(i)
+        use constants
+        implicit none
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
+        integer(kind=intType), intent(in) :: i
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+    end function isCompute
+
+    function isFloodSeed(i)
+        use constants
+        implicit none
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
+        integer(kind=intType), intent(in) :: i
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+    end function isFloodSeed
+
+    function isFlooded(i)
+        use constants
+        implicit none
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
+        integer(kind=intType), intent(in) :: i
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+    end function isFlooded
+
+    function isWallDonor(i)
+        use constants
+        implicit none
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
+        integer(kind=intType), intent(in) :: i
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+    end function isWallDonor
+
+    function isReceiver(i)
+        use constants
+        implicit none
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
+        integer(kind=intType), intent(in) :: i
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+    end function isReceiver
+
+    subroutine setIsDonor(i, flag)
+        use constants
+        implicit none
+        integer(kind=intType), intent(inout) :: i
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+        call setStatus(i, flag, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isREceiver)
+    end subroutine setIsDonor
+
+    subroutine setIsHole(i, flag)
+        use constants
+        implicit none
+        integer(kind=intType), intent(inout) :: i
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+        call setStatus(i, isDonor, flag, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+    end subroutine setIsHole
+
+    subroutine setIsCompute(i, flag)
+        use constants
+        implicit none
+        integer(kind=intType), intent(inout) :: i
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+        call setStatus(i, isDonor, isHole, flag, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+    end subroutine setIsCompute
+
+    subroutine setIsFloodSeed(i, flag)
+        use constants
+        implicit none
+        integer(kind=intType), intent(inout) :: i
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+        call setStatus(i, isDonor, isHole, isCompute, flag, isFlooded, isWallDonor, isReceiver)
+    end subroutine setIsFloodSeed
+
+    subroutine setIsFlooded(i, flag)
+        use constants
+        implicit none
+        integer(kind=intType), intent(inout) :: i
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+        call setStatus(i, isDonor, isHole, isCompute, isFloodSeed, flag, isWallDonor, isReceiver)
+    end subroutine setIsFlooded
+
+    subroutine setIsWallDonor(i, flag)
+        use constants
+        implicit none
+        integer(kind=intType), intent(inout) :: i
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+        call setStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, flag, isReceiver)
+    end subroutine setIsWallDonor
+
+    subroutine setIsReceiver(i, flag)
+        use constants
+        implicit none
+        integer(kind=intType), intent(inout) :: i
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver, flag
+        call getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+        call setStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, flag)
+    end subroutine setIsReceiver
+
+    subroutine setStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+
+        use constants
+        implicit none
+        integer(kind=intType), intent(out) :: i
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
+        i = 0
+
+        if (isDonor) i = i + 1
+        if (isHole) i = i + 2
+        if (isCompute) i = i + 4
+        if (isFloodSeed) i = i + 8
+        if (isFlooded) i = i + 16
+        if (isWallDonor) i = i + 32
+        if (isReceiver) i = i + 64
+    end subroutine setStatus
+
+    subroutine getStatus(i, isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver)
+
+        use constants
+        implicit none
+        logical :: isDonor, isHole, isCompute, isFloodSeed, isFlooded, isWallDonor, isReceiver
+        integer(kind=intType) :: i, j
+        j = i
+
+        isDonor = .False.
+        isHole = .False.
+        isCompute = .False.
+        isFloodSeed = .False.
+        isFlooded = .False.
+        isWallDonor = .False.
+        isReceiver = .False.
+
+        if (j / 64 > 0) then
+            isReceiver = .True.
+            j = j - 64
+        end if
+
+        if (j / 32 > 0) then
+            isWallDonor = .True.
+            j = j - 32
+        end if
+
+        if (j / 16 > 0) then
+            isFlooded = .True.
+            j = j - 16
+        end if
+
+        if (j / 8 > 0) then
+            isFloodSeed = .True.
+            j = j - 8
+        end if
+
+        if (j / 4 > 0) then
+            isCompute = .True.
+            j = j - 4
+        end if
+
+        if (j / 2 > 0) then
+            isHole = .True.
+            j = j - 2
+        end if
+
+        if (j / 1 > 0) then
+            isDonor = .True.
+            j = j - 1
+        end if
+    end subroutine getStatus
 
-             ! Swap the element i and j.
+    !
+    subroutine binSearchNodes(arr, searchNode, nn, searchInd)
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+        !  binSearchNodes does binary search for a node 'searchNode'
+        !  in arr(1:nn) and returns index 'searchInd' where
+        !  'searchNode' lies in arr. searchInd = -1 if not found.
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+        use constants
+        implicit none
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+        ! Input parameters
+        integer(kind=intType), intent(in) :: nn, searchNode
+        integer(kind=intType), intent(out) :: searchInd
+        integer(kind=intType), intent(in) :: arr(nn)
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+        ! Local variables
+        integer(kind=intType) :: first, last, middle
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
+        first = 1
+        last = nn
 
-             ! Storage of the stack is too small. Reallocate.
+        middle = (first + last) / 2
 
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortEdgeType", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
+        do while (first <= last)
+            if (arr(middle) < searchNode) then
+                first = middle + 1
+            else if (arr(middle) == searchNode) then
+                searchInd = middle
+                exit
+            else
+                last = middle - 1
+            end if
 
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
+            middle = (first + last) / 2
+        end do !while
 
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortEdgeType", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
+        if (first > last) then
+            searchInd = -1
+            print *, ' binSearchNode fails for searchNode ', searchNode
+            STOP
+        end if
+    end subroutine binSearchNodes
 
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
+    subroutine binSearchPocketEdgeType(arr, search, nn, searchInd)
 
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortEdgeType", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
+        !  binSearchPocketEdgeType does binary searche for
+        !  pocketEdgeType 'search' edge and returns index 'searchInd'
+        !  where 'search' lies in arr.
 
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortEdgeType", &
-                  "Deallocation error for tmpStack")
-          endif
+        use constants
+        use oversetData ! cannot use only becuase of <= operator
+        implicit none
 
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
+        ! Input parameters
+        integer(kind=intType), intent(in) :: nn
+        integer(kind=intType), intent(out) :: searchInd
 
-       endif
-    enddo
+        type(pocketEdge), intent(in) :: search
+        type(pocketEdge), dimension(*), intent(in) :: arr
 
-    ! Release the memory of stack.
+        ! Local variables
+        integer(kind=intType) :: first, last, middle
 
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("qsortEdgeType", &
-         "Deallocation error for stack")
+        first = 1
+        last = nn
 
-    ! Check in debug mode whether the array is really sorted.
+        middle = (first + last) / 2
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                 &
-               call terminate("qsortEdgeType", &
-               "Array is not sorted correctly")
-       enddo
-    endif
+        do while (first <= last)
+            if (arr(middle) < search) then
+                first = middle + 1
+            else if (arr(middle) == search) then
+                searchInd = middle
+                exit
+            else
+                last = middle - 1
+            end if
 
-  end subroutine qsortEdgeType
+            middle = (first + last) / 2
+        end do !while
 
-  subroutine qsortFringeType(arr, nn, sortType)
-    !
-    !       qsortFringeListTy sorts the given number of fringes
-    !       increasing order based on the <= operator for this derived
-    !       data type.
-    !
-    use constants
-    use block ! Cannot use-only becuase of <= operator
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn, sortType
+        if (first > last) then
+            print *, ' binSearchPocketEdgeType fails for Edge with nodes ', &
+                search%n1, search%n2
+            STOP
+        end if
+    end subroutine binSearchPocketEdgeType
 
-    type(fringeType), dimension(*), intent(inout) :: arr
-    !
-    !      Local variables.
     !
-    integer(kind=intType), parameter :: m = 7
+    subroutine qsortEdgeType(arr, nn)
+        !
+        !       qsortEdgeType sorts the given number of oversetString master
+        !       Edges in increasing order based on the <= operator for this
+        !       derived data type.
+        !       (Generously copied from qsortFringeType.F90)
+        !
+        use constants
+        use oversetData ! cannot use only becuase of <= operator
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
 
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
+        type(oversetEdge), dimension(*), intent(inout) :: arr
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), parameter :: m = 7
 
-    integer :: ierr
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
 
-    type(fringeType) :: a, tmp
+        integer :: ierr
 
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
+        type(oversetEdge) :: a, tmp
 
-    if (sortType == sortByDonor) then
-       fringeSortType = sortByDonor
-    else if (sortType == sortByReceiver) then
-       fringeSortType = sortByReceiver
-    else
-       call terminate("qsortfringeType", &
-            "Uuknown sort type")
-    end if
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-    ! Allocate the memory for stack.
+        ! Allocate the memory for stack.
 
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("qsortfringeType", &
-         "Memory allocation failure for stack")
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortEdgeType", &
+                           "Memory allocation failure for stack")
 
-    ! Initialize the variables that control the sorting.
+        ! Initialize the variables that control the sorting.
 
-    jStack = 0
-    l      = 1
-    r      = nn
+        jStack = 0
+        l = 1
+        r = nn
 
-    ! Start of the algorithm
+        ! Start of the algorithm
 
-    do
+        do
 
-       ! Check for the size of the subarray.
+            ! Check for the size of the subarray.
 
-       if((r-l) < m) then
+            if ((r - l) < m) then
 
-          ! Perform insertion sort
+                ! Perform insertion sort
 
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-          if(jStack == 0) exit
+                if (jStack == 0) exit
 
-          ! Pop stack and begin a new round of partitioning.
+                ! Pop stack and begin a new round of partitioning.
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-       else
+            else
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a.
-          ! Also rearrange so that (l) <= (l+1) <= (r).
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a.
+                ! Also rearrange so that (l) <= (l+1) <= (r).
 
-          k = (l+r)/2
-          tmp      = arr(k)             ! Swap the elements
-          arr(k)   = arr(l+1)           ! k and l+1.
-          arr(l+1) = tmp
+                k = (l + r) / 2
+                tmp = arr(k)             ! Swap the elements
+                arr(k) = arr(l + 1)           ! k and l+1.
+                arr(l + 1) = tmp
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-          ! Initialize the pointers for partitioning.
+                ! Initialize the pointers for partitioning.
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-          ! The innermost loop
+                ! The innermost loop
 
-          do
+                do
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-             ! Exit the loop in case the pointers i and j crossed.
+                    ! Exit the loop in case the pointers i and j crossed.
 
-             if(j < i) exit
+                    if (j < i) exit
 
-             ! Swap the element i and j.
+                    ! Swap the element i and j.
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
+                jStack = jStack + 2
+                if (jStack > nStack) then
 
-             ! Storage of the stack is too small. Reallocate.
+                    ! Storage of the stack is too small. Reallocate.
 
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortfringeType", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortEdgeType", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
 
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortfringeType", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortEdgeType", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
 
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
 
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortfringeType", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortEdgeType", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
 
-             ! And finally release the memory of tmpStack.
+                    ! And finally release the memory of tmpStack.
 
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortfringeType", &
-                  "Deallocation error for tmpStack")
-          endif
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortEdgeType", &
+                                       "Deallocation error for tmpStack")
+                end if
 
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
 
-       endif
-    enddo
+            end if
+        end do
 
-    ! Release the memory of stack.
+        ! Release the memory of stack.
 
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("qsortfringeType", &
-         "Deallocation error for stack")
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortEdgeType", &
+                           "Deallocation error for stack")
 
-    ! Check in debug mode whether the array is really sorted.
+        ! Check in debug mode whether the array is really sorted.
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                 &
-               call terminate("qsortfringeType", &
-               "Array is not sorted correctly")
-       enddo
-    endif
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortEdgeType", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
 
-  end subroutine qsortFringeType
+    end subroutine qsortEdgeType
 
-  subroutine addToFringeList(fringeList, n, fringe)
+    subroutine qsortFringeType(arr, nn, sortType)
+        !
+        !       qsortFringeListTy sorts the given number of fringes
+        !       increasing order based on the <= operator for this derived
+        !       data type.
+        !
+        use constants
+        use block ! Cannot use-only becuase of <= operator
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn, sortType
 
-    ! Generic subroutine to add to a fringe "List". This isn't
-    ! actually a list but rather an allocatable (pointer) array that
-    ! is periodically resized as necessary. n is the current size
-    ! which is automatically incremented by this routine. This
-    ! operation occurs multiple times throughout the overset code.
+        type(fringeType), dimension(*), intent(inout) :: arr
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), parameter :: m = 7
 
-    use constants
-    use block, only : fringeType
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
 
-    implicit none
+        integer :: ierr
 
-    ! Input Params
-    type(fringeType), dimension(:), pointer :: fringeList
-    type(fringeType) :: fringe
-    integer(kind=intType), intent(inout) :: n
+        type(fringeType) :: a, tmp
 
-    ! Working Paramters
-    integer(kind=intType) :: fSize
-    type(fringeType), dimension(:), pointer :: tmpFringePtr
-    fSize = size(fringeList)
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-    ! Increment n for next item
-    n = n + 1
+        if (sortType == sortByDonor) then
+            fringeSortType = sortByDonor
+        else if (sortType == sortByReceiver) then
+            fringeSortType = sortByReceiver
+        else
+            call terminate("qsortfringeType", &
+                           "Uuknown sort type")
+        end if
 
-    if (n > fSize) then
+        ! Allocate the memory for stack.
 
-       ! Pointer to existing data:
-       tmpFringePtr => fringeList
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortfringeType", &
+                           "Memory allocation failure for stack")
 
-       ! Allocate new space
-       allocate(fringeList(int(1.5*fSize)))
+        ! Initialize the variables that control the sorting.
 
-       ! Copy exsitng values
-       fringeList(1:fSize) = tmpFringePtr(1:fSize)
+        jStack = 0
+        l = 1
+        r = nn
 
-       ! Free original memory
-       deallocate(tmpFringePtr)
+        ! Start of the algorithm
 
-    end if
+        do
 
-    fringeList(n) = fringe
+            ! Check for the size of the subarray.
 
-  end subroutine addToFringeList
+            if ((r - l) < m) then
 
+                ! Perform insertion sort
 
-  subroutine addToFringeBuffer(intBuffer, realBuffer, n, fringe)
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-    ! Generic subroutine to add to a fringe "List". It isn't actually
-    ! a list of fringe types but rather a real array and an int array.
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-    use constants
-    use block, only : fringeType
+                if (jStack == 0) exit
 
-    implicit none
+                ! Pop stack and begin a new round of partitioning.
 
-    ! Input Params
-    integer(kind=intType), dimension(:,:), pointer :: intBuffer
-    real(kind=realType), dimension(:,:), pointer :: realBuffer
-    type(fringeType) :: fringe
-    integer(kind=intType), intent(inout) :: n
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-    ! Working Paramters
-    integer(kind=intType) :: fSize
-    integer(kind=intType), dimension(:,:), pointer :: tmpInt
-    real(kind=realType), dimension(:,:), pointer :: tmpReal
-    fSize = size(intBuffer, 2)
+            else
 
-    ! Increment n for next item
-    n = n + 1
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a.
+                ! Also rearrange so that (l) <= (l+1) <= (r).
 
-    if (n > fSize) then
+                k = (l + r) / 2
+                tmp = arr(k)             ! Swap the elements
+                arr(k) = arr(l + 1)           ! k and l+1.
+                arr(l + 1) = tmp
 
-       ! Pointers to existing data:
-       tmpInt => intBuffer
-       tmpReal => realBuffer
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-       ! Allocate new space
-       allocate(intBuffer(5, int(1.5*fSize)))
-       allocate(realBuffer(4, int(1.5*fSize)))
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-       ! Copy exsitng values
-       intBuffer(:, 1:fSize) = tmpInt(:, 1:fSize)
-       realBuffer(:, 1:fSize) = tmpReal(:, 1:fSize)
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-       ! Free original memory
-       deallocate(tmpInt, tmpReal)
+                ! Initialize the pointers for partitioning.
 
-    end if
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-    ! Now we can safely add the information:
-    intBuffer(1, n) = fringe%donorProc
-    intBuffer(2, n) = fringe%donorBlock
-    intBuffer(3, n) = fringe%dIndex
-    intBuffer(4, n) = fringe%myBlock
-    intBuffer(5, n) = fringe%myIndex
+                ! The innermost loop
 
-    realBuffer(1:3, n) = fringe%donorFrac
-    realBuffer(4, n) = fringe%quality
+                do
 
-  end subroutine addToFringeBuffer
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
+                    ! Exit the loop in case the pointers i and j crossed.
 
+                    if (j < i) exit
 
+                    ! Swap the element i and j.
 
-  subroutine qsortPocketEdgeType(arr, nn)
-    !
-    !       qsortPocketEdgeType sorts the given number of oversetString
-    !       master Edges in increasing order based on the <= operator for
-    !       this derived data type.
-    !       (Generously copied from qsortFringeType.F90)
-    !
-    use constants
-    use oversetData ! Cannot use-only becuase of <= operator
-    use utils, onlY : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-    type(pocketEdge), dimension(*), intent(inout) :: arr
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), parameter :: m = 7
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
+
+                arr(l + 1) = arr(j)
+                arr(j) = a
+
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
+
+                jStack = jStack + 2
+                if (jStack > nStack) then
+
+                    ! Storage of the stack is too small. Reallocate.
 
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortfringeType", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
 
-    integer :: ierr
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-    type(pocketEdge) :: a, tmp
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortfringeType", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
 
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
 
-    ! Allocate the memory for stack.
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortfringeType", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
 
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("qsortEdgeType", &
-         "Memory allocation failure for stack")
+                    ! And finally release the memory of tmpStack.
 
-    ! Initialize the variables that control the sorting.
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortfringeType", &
+                                       "Deallocation error for tmpStack")
+                end if
+
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
 
-    jStack = 0
-    l      = 1
-    r      = nn
+            end if
+        end do
 
-    ! Start of the algorithm
+        ! Release the memory of stack.
 
-    do
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortfringeType", &
+                           "Deallocation error for stack")
 
-       ! Check for the size of the subarray.
+        ! Check in debug mode whether the array is really sorted.
 
-       if((r-l) < m) then
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortfringeType", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
 
-          ! Perform insertion sort
+    end subroutine qsortFringeType
 
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+    subroutine addToFringeList(fringeList, n, fringe)
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+        ! Generic subroutine to add to a fringe "List". This isn't
+        ! actually a list but rather an allocatable (pointer) array that
+        ! is periodically resized as necessary. n is the current size
+        ! which is automatically incremented by this routine. This
+        ! operation occurs multiple times throughout the overset code.
 
-          if(jStack == 0) exit
+        use constants
+        use block, only: fringeType
 
-          ! Pop stack and begin a new round of partitioning.
+        implicit none
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+        ! Input Params
+        type(fringeType), dimension(:), pointer :: fringeList
+        type(fringeType) :: fringe
+        integer(kind=intType), intent(inout) :: n
 
-       else
+        ! Working Paramters
+        integer(kind=intType) :: fSize
+        type(fringeType), dimension(:), pointer :: tmpFringePtr
+        fSize = size(fringeList)
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a.
-          ! Also rearrange so that (l) <= (l+1) <= (r).
+        ! Increment n for next item
+        n = n + 1
 
-          k = (l+r)/2
-          tmp      = arr(k)             ! Swap the elements
-          arr(k)   = arr(l+1)           ! k and l+1.
-          arr(l+1) = tmp
+        if (n > fSize) then
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+            ! Pointer to existing data:
+            tmpFringePtr => fringeList
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+            ! Allocate new space
+            allocate (fringeList(int(1.5 * fSize)))
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+            ! Copy exsitng values
+            fringeList(1:fSize) = tmpFringePtr(1:fSize)
 
-          ! Initialize the pointers for partitioning.
+            ! Free original memory
+            deallocate (tmpFringePtr)
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+        end if
 
-          ! The innermost loop
+        fringeList(n) = fringe
 
-          do
+    end subroutine addToFringeList
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+    subroutine addToFringeBuffer(intBuffer, realBuffer, n, fringe)
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+        ! Generic subroutine to add to a fringe "List". It isn't actually
+        ! a list of fringe types but rather a real array and an int array.
 
-             ! Exit the loop in case the pointers i and j crossed.
+        use constants
+        use block, only: fringeType
 
-             if(j < i) exit
+        implicit none
 
-             ! Swap the element i and j.
+        ! Input Params
+        integer(kind=intType), dimension(:, :), pointer :: intBuffer
+        real(kind=realType), dimension(:, :), pointer :: realBuffer
+        type(fringeType) :: fringe
+        integer(kind=intType), intent(inout) :: n
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+        ! Working Paramters
+        integer(kind=intType) :: fSize
+        integer(kind=intType), dimension(:, :), pointer :: tmpInt
+        real(kind=realType), dimension(:, :), pointer :: tmpReal
+        fSize = size(intBuffer, 2)
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+        ! Increment n for next item
+        n = n + 1
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+        if (n > fSize) then
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+            ! Pointers to existing data:
+            tmpInt => intBuffer
+            tmpReal => realBuffer
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
+            ! Allocate new space
+            allocate (intBuffer(5, int(1.5 * fSize)))
+            allocate (realBuffer(4, int(1.5 * fSize)))
 
-             ! Storage of the stack is too small. Reallocate.
+            ! Copy exsitng values
+            intBuffer(:, 1:fSize) = tmpInt(:, 1:fSize)
+            realBuffer(:, 1:fSize) = tmpReal(:, 1:fSize)
 
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortEdgeType", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
+            ! Free original memory
+            deallocate (tmpInt, tmpReal)
 
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
+        end if
 
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortEdgeType", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
+        ! Now we can safely add the information:
+        intBuffer(1, n) = fringe%donorProc
+        intBuffer(2, n) = fringe%donorBlock
+        intBuffer(3, n) = fringe%dIndex
+        intBuffer(4, n) = fringe%myBlock
+        intBuffer(5, n) = fringe%myIndex
 
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
+        realBuffer(1:3, n) = fringe%donorFrac
+        realBuffer(4, n) = fringe%quality
 
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortEdgeType", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
+    end subroutine addToFringeBuffer
 
-             ! And finally release the memory of tmpStack.
+    subroutine qsortPocketEdgeType(arr, nn)
+        !
+        !       qsortPocketEdgeType sorts the given number of oversetString
+        !       master Edges in increasing order based on the <= operator for
+        !       this derived data type.
+        !       (Generously copied from qsortFringeType.F90)
+        !
+        use constants
+        use oversetData ! Cannot use-only becuase of <= operator
+        use utils, onlY: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
 
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortEdgeType", &
-                  "Deallocation error for tmpStack")
-          endif
+        type(pocketEdge), dimension(*), intent(inout) :: arr
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), parameter :: m = 7
 
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
 
-       endif
-    enddo
+        integer :: ierr
 
-    ! Release the memory of stack.
+        type(pocketEdge) :: a, tmp
 
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("qsortEdgeType", &
-         "Deallocation error for stack")
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-    ! Check in debug mode whether the array is really sorted.
+        ! Allocate the memory for stack.
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                 &
-               call terminate("qsortEdgeType", &
-               "Array is not sorted correctly")
-       enddo
-    endif
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortEdgeType", &
+                           "Memory allocation failure for stack")
 
-  end subroutine qsortPocketEdgeType
+        ! Initialize the variables that control the sorting.
 
+        jStack = 0
+        l = 1
+        r = nn
 
-  subroutine checkOverset (level, sps, totalOrphans, printBadCells)
+        ! Start of the algorithm
 
-    !
-    !       CheckOverset checks the integrity of the overset connectivity
-    !       and holes. For every comptue cell (iblank = 1) it checks that
-    !       every cell in its stencil are not blanked. If even 1 cell is
-    !      * found with an incomplete stencil it is a fatal error.
-    use constants
-    use blockPointers, only : il, jl, kl, iblank, flowDoms, nDom, orphans, &
-         iBegOR, jBegOr, kBegOr, nbkGlobal, nOrphans
-    use stencils, only : visc_drdw_stencil, N_visc_drdw
-    use communication, only : myid, adflow_comm_world
-    use utils, only : setPointers, EChk
-    use inputOverset, only : oversetDebugPrint
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: level, sps
-    integer(kind=intType), intent(out) :: totalOrphans
-    logical, intent(in) :: printBadCells
-
-    ! Working
-    integer(kind=intType) :: i, j, k, nn, ii, jj, kk, n, ierr
-    integer(kind=intType) :: magic, localOrphans, i_stencil
-    logical :: badCell
-
-    ! This pass just lets the user know where the bad cells are.
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       ! On the first pass count up the total number of orphans for this block
-       n = 0
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                if (iblank(i,j,k) == 1) then
-                   badCell = .False.
-
-                   stencilLoop: do i_stencil=1, N_visc_drdw
-                      ii = visc_drdw_stencil(i_stencil, 1) + i
-                      jj = visc_drdw_stencil(i_stencil, 2) + j
-                      kk = visc_drdw_stencil(i_stencil, 3) + k
-
-                      if (.not. (iBlank(ii, jj, kk) == 1 .or. iblank(ii,jj,kk) == -1)) then
-                         badCell = .True.
-                      end if
-                   end do stencilLoop
-                   if (badCell .and. printBadCells) then
-                      if (oversetDebugPrint) &
-                        print *,'Error in connectivity at :',nbkglobal, i+iBegOr, j+jBegOr, k+kBegOr
-                      ! we can modify iBlankLast because this is the last checkOverset call.
-                      ! we set iBlankLast to -5 to mark orphan cells, this value will then
-                      ! be moved to iBlank after we are done with other loops.
-                      flowDoms(nn, level, sps)%iBlankLast(i,j,k) = -5
-                   end if
-                end if
-             end do
-          end do
-       end do
-    end do
-
-
-    magic = 33
-    localOrphans = 0
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       ! On the first pass count up the total number of orphans for this block
-       n = 0
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                if (iblank(i,j,k) == 0 .or. iblank(i,j,k)==-2 .or. &
-                     iblank(i,j,k)==-3 .or. iblank(i,j,k) == -4) then
-
-                   stencilLoop2: do i_stencil=1, N_visc_drdw
-                      ii = visc_drdw_stencil(i_stencil, 1) + i
-                      jj = visc_drdw_stencil(i_stencil, 2) + j
-                      kk = visc_drdw_stencil(i_stencil, 3) + k
-
-                      if (ii >= 2 .and. jj >= 2 .and. kk>=2 .and. &
-                           ii <= il .and. jj <= jl .and. kk <= kl) then
-
-                         if (iBlank(ii, jj, kk) == 1) then
-                            ! This cell is an orphan:
-                            n = n + 1
-                         end if
-                      end if
-                   end do stencilLoop2
-                end if
-             end do
-          end do
-       end do
-
-       localOrphans = localOrphans + n
-
-       ! Remove any existing orphans
-       if (associated(flowDoms(nn, level, sps)%orphans)) then
-          deallocate(flowDoms(nn, level, sps)%orphans)
-       end if
-       allocate(flowDoms(nn, level, sps)%orphans(3, n))
-
-       ! Save the total number of orphans on this block
-       flowDoms(nn, level, sps)%nOrphans = n
-
-       ! Manual set information from blockPointers that would be set
-       ! with setPointers()
-       orphans => flowDoms(nn, level, sps)%orphans
-       nOrphans = n
-
-       ! On the first pass count up the total number of orphans for this block
-       n = 0
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                if (iblank(i,j,k) == 0 .or. iblank(i,j,k)==-2 .or. &
-                     iblank(i,j,k)==-3 .or. iblank(i,j,k)==-4) then
-
-                   stencilLoop3: do i_stencil=1, N_visc_drdw
-                      ii = visc_drdw_stencil(i_stencil, 1) + i
-                      jj = visc_drdw_stencil(i_stencil, 2) + j
-                      kk = visc_drdw_stencil(i_stencil, 3) + k
-
-                      if (ii >= 2 .and. jj >= 2 .and. kk>=2 .and. &
-                           ii <= il .and. jj <= jl .and. kk <= kl) then
-
-                         if (iBlank(ii, jj, kk) == 1) then
-                            ! This cell is an orphan:
-                            n = n + 1
-                            orphans(:, n) = (/ii, jj, kk/)
-                         end if
-                      end if
-                   end do stencilLoop3
+        do
+
+            ! Check for the size of the subarray.
+
+            if ((r - l) < m) then
+
+                ! Perform insertion sort
+
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
+
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
+
+                if (jStack == 0) exit
+
+                ! Pop stack and begin a new round of partitioning.
+
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
+
+            else
+
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a.
+                ! Also rearrange so that (l) <= (l+1) <= (r).
+
+                k = (l + r) / 2
+                tmp = arr(k)             ! Swap the elements
+                arr(k) = arr(l + 1)           ! k and l+1.
+                arr(l + 1) = tmp
+
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
                 end if
-             end do
-          end do
-       end do
-    end do
-
-    ! Determine the total number of overset orphans
-    call mpi_allreduce(localOrphans, totalOrphans, 1, adflow_integer, MPI_SUM, &
-         adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    if (myid == 0) then
-       print *, 'Total number of orphans:', totalOrphans
-    end if
-
-    ! if this is the last checkOverset call with printBadCells = .True., then
-    ! we can move the orphan information to iBlank because we have done all
-    ! the checking required.
-    if (printBadCells .and. (totalOrphans .gt. 0)) then
-      ! loop over the cells and write the orphan information to iBlank
-      do nn=1, nDom
-         call setPointers(nn, level, sps)
-         do k=2, kl
-            do j=2, jl
-               do i=2, il
-                  if (flowDoms(nn, level, sps)%iblankLast(i,j,k) == -5) iBlank(i,j,k) = -5
-               end do
-            end do
-         end do
-      end do
-    end if
 
-  end subroutine checkOverset
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-  !
-  !       Finds quadratic uvw weights [-1:1] for interpolant point
-  !       xSearch by solving for its co-ord in donor element defined by
-  !       xElem using newton-raphson iteration.
-  !       Fractions uvwQuadratic come with initial guess from ADT search
-  !       used for linear interpolation.
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-  subroutine computeQuadraticWeights(xSearch,xElem,uvwQuadratic)
+                ! Initialize the pointers for partitioning.
 
-    use constants
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-    implicit none
+                ! The innermost loop
 
-    ! Input variables
-    real(kind=realType), intent(in)    :: xSearch(3), xElem(3, -1:1, -1:1, -1:1)
-    real(kind=realType), intent(inout) :: uvwQuadratic(3)
+                do
 
-    ! Working variables
-    ! -----------------------------------------------------------------
-    ! newton related
-    integer(kind=intType) :: n, niter
-    real(kind=realType)   :: resid,residtol
-    real(kind=realType)   :: B(3,4)
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-    ! others
-    integer(kind=intType) :: j, l, iii, jjj, kkk
-    real(kind=realType)   :: ff(27), shp(3,3), psi(3)
-    real(kind=realType)   :: dff(27,3),dshp(3,3,3) !-> differentials wrt 3 psi dirs
-    logical               :: ok_flag
-    ! -----------------------------------------------------------------
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-    ! Initialize newton parameters
-    niter = 0
-    resid = 1.0e10
-    residtol = 1.0e-15
+                    ! Exit the loop in case the pointers i and j crossed.
 
+                    if (j < i) exit
 
-    ! Begin newton iterations to solve for uvw weights wrt quadratic element
+                    ! Swap the element i and j.
 
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-    newton_loop: do while (niter < 10 .and. resid > residtol)
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-       !step 1: find weights
-       !--------------------
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-       ! Initialize weights
-       psi(1:3) = uvwQuadratic(1:3)
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
 
-       ! Precopute the FE shape functions for each j-direction
-       do j=1,3
-          shp(1, j) = half*psi(j)*(psi(j) - one)
-          shp(2, j) = -(psi(j)**2-1)
-          shp(3, j) = half*psi(j)*(psi(j) + one)
-       end do
+                jStack = jStack + 2
+                if (jStack > nStack) then
 
-       ! These are the 27 quadratic weights
-       ff(1 )   = shp(1, 1)*shp(1, 2)*shp(1, 3)
-       ff(2 )   = shp(2, 1)*shp(1, 2)*shp(1, 3)
-       ff(3 )   = shp(3, 1)*shp(1, 2)*shp(1, 3)
+                    ! Storage of the stack is too small. Reallocate.
 
-       ff(4 )   = shp(1, 1)*shp(2, 2)*shp(1, 3)
-       ff(5 )   = shp(2, 1)*shp(2, 2)*shp(1, 3)
-       ff(6 )   = shp(3, 1)*shp(2, 2)*shp(1, 3)
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortEdgeType", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
 
-       ff(7 )   = shp(1, 1)*shp(3, 2)*shp(1, 3)
-       ff(8 )   = shp(2, 1)*shp(3, 2)*shp(1, 3)
-       ff(9 )   = shp(3, 1)*shp(3, 2)*shp(1, 3)
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-       ff(10)   = shp(1, 1)*shp(1, 2)*shp(2, 3)
-       ff(11)   = shp(2, 1)*shp(1, 2)*shp(2, 3)
-       ff(12)   = shp(3, 1)*shp(1, 2)*shp(2, 3)
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortEdgeType", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
 
-       ff(13)   = shp(1, 1)*shp(2, 2)*shp(2, 3)
-       ff(14)   = shp(2, 1)*shp(2, 2)*shp(2, 3)
-       ff(15)   = shp(3, 1)*shp(2, 2)*shp(2, 3)
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
 
-       ff(16)   = shp(1, 1)*shp(3, 2)*shp(2, 3)
-       ff(17)   = shp(2, 1)*shp(3, 2)*shp(2, 3)
-       ff(18)   = shp(3, 1)*shp(3, 2)*shp(2, 3)
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortEdgeType", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
 
-       ff(19)   = shp(1, 1)*shp(1, 2)*shp(3, 3)
-       ff(20)   = shp(2, 1)*shp(1, 2)*shp(3, 3)
-       ff(21)   = shp(3, 1)*shp(1, 2)*shp(3, 3)
+                    ! And finally release the memory of tmpStack.
 
-       ff(22)   = shp(1, 1)*shp(2, 2)*shp(3, 3)
-       ff(23)   = shp(2, 1)*shp(2, 2)*shp(3, 3)
-       ff(24)   = shp(3, 1)*shp(2, 2)*shp(3, 3)
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortEdgeType", &
+                                       "Deallocation error for tmpStack")
+                end if
 
-       ff(25)   = shp(1, 1)*shp(3, 2)*shp(3, 3)
-       ff(26)   = shp(2, 1)*shp(3, 2)*shp(3, 3)
-       ff(27)   = shp(3, 1)*shp(3, 2)*shp(3, 3)
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
 
-       !step 2: find differentials of weights
-       !-------------------------------------
+            end if
+        end do
 
-       ! Linearize the FE shape functions wrt each psi(j) direction
-       ! Note: only derivatives wrt psi(j) for any j are non-zero, rest are zero
+        ! Release the memory of stack.
 
-       dshp(:, :, :) = 0.d0
-       do j=1,3
-          dshp(1, j, j) = half*(psi(j) - one) + half*psi(j)
-          dshp(2, j, j) = -2.d0*psi(j)
-          dshp(3, j, j) = half*(psi(j) + one) + half*psi(j)
-       end do
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortEdgeType", &
+                           "Deallocation error for stack")
 
-       ! Linearize 27 quadratic weights wrt each psi(j) dir, build from dshp
+        ! Check in debug mode whether the array is really sorted.
 
-       loop_psi_j: do j=1,3
-          dff(1, j) =  dshp(1, 1, j)* shp(1, 2   )* shp(1, 3   ) &
-               +  shp(1, 1   )*dshp(1, 2, j)* shp(1, 3   ) &
-               +  shp(1, 1   )* shp(1, 2   )*dshp(1, 3, j)
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortEdgeType", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
+
+    end subroutine qsortPocketEdgeType
+
+    subroutine checkOverset(level, sps, totalOrphans, printBadCells)
+
+        !
+        !       CheckOverset checks the integrity of the overset connectivity
+        !       and holes. For every comptue cell (iblank = 1) it checks that
+        !       every cell in its stencil are not blanked. If even 1 cell is
+        !      * found with an incomplete stencil it is a fatal error.
+        use constants
+        use blockPointers, only: il, jl, kl, iblank, flowDoms, nDom, orphans, &
+                                 iBegOR, jBegOr, kBegOr, nbkGlobal, nOrphans
+        use stencils, only: visc_drdw_stencil, N_visc_drdw
+        use communication, only: myid, adflow_comm_world
+        use utils, only: setPointers, EChk
+        use inputOverset, only: oversetDebugPrint
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: level, sps
+        integer(kind=intType), intent(out) :: totalOrphans
+        logical, intent(in) :: printBadCells
+
+        ! Working
+        integer(kind=intType) :: i, j, k, nn, ii, jj, kk, n, ierr
+        integer(kind=intType) :: magic, localOrphans, i_stencil
+        logical :: badCell
+
+        ! This pass just lets the user know where the bad cells are.
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            ! On the first pass count up the total number of orphans for this block
+            n = 0
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        if (iblank(i, j, k) == 1) then
+                            badCell = .False.
+
+                            stencilLoop: do i_stencil = 1, N_visc_drdw
+                                ii = visc_drdw_stencil(i_stencil, 1) + i
+                                jj = visc_drdw_stencil(i_stencil, 2) + j
+                                kk = visc_drdw_stencil(i_stencil, 3) + k
+
+                                if (.not. (iBlank(ii, jj, kk) == 1 .or. iblank(ii, jj, kk) == -1)) then
+                                    badCell = .True.
+                                end if
+                            end do stencilLoop
+                            if (badCell .and. printBadCells) then
+                                if (oversetDebugPrint) &
+                                    print *, 'Error in connectivity at :', nbkglobal, i + iBegOr, j + jBegOr, k + kBegOr
+                                ! we can modify iBlankLast because this is the last checkOverset call.
+                                ! we set iBlankLast to -5 to mark orphan cells, this value will then
+                                ! be moved to iBlank after we are done with other loops.
+                                flowDoms(nn, level, sps)%iBlankLast(i, j, k) = -5
+                            end if
+                        end if
+                    end do
+                end do
+            end do
+        end do
+
+        magic = 33
+        localOrphans = 0
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            ! On the first pass count up the total number of orphans for this block
+            n = 0
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        if (iblank(i, j, k) == 0 .or. iblank(i, j, k) == -2 .or. &
+                            iblank(i, j, k) == -3 .or. iblank(i, j, k) == -4) then
+
+                            stencilLoop2: do i_stencil = 1, N_visc_drdw
+                                ii = visc_drdw_stencil(i_stencil, 1) + i
+                                jj = visc_drdw_stencil(i_stencil, 2) + j
+                                kk = visc_drdw_stencil(i_stencil, 3) + k
+
+                                if (ii >= 2 .and. jj >= 2 .and. kk >= 2 .and. &
+                                    ii <= il .and. jj <= jl .and. kk <= kl) then
+
+                                    if (iBlank(ii, jj, kk) == 1) then
+                                        ! This cell is an orphan:
+                                        n = n + 1
+                                    end if
+                                end if
+                            end do stencilLoop2
+                        end if
+                    end do
+                end do
+            end do
 
-          dff(2, j) =  dshp(2, 1, j)* shp(1, 2   )* shp(1, 3   ) &
-               +  shp(2, 1   )*dshp(1, 2, j)* shp(1, 3   ) &
-               +  shp(2, 1   )* shp(1, 2   )*dshp(1, 3, j)
+            localOrphans = localOrphans + n
+
+            ! Remove any existing orphans
+            if (associated(flowDoms(nn, level, sps)%orphans)) then
+                deallocate (flowDoms(nn, level, sps)%orphans)
+            end if
+            allocate (flowDoms(nn, level, sps)%orphans(3, n))
+
+            ! Save the total number of orphans on this block
+            flowDoms(nn, level, sps)%nOrphans = n
+
+            ! Manual set information from blockPointers that would be set
+            ! with setPointers()
+            orphans => flowDoms(nn, level, sps)%orphans
+            nOrphans = n
+
+            ! On the first pass count up the total number of orphans for this block
+            n = 0
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        if (iblank(i, j, k) == 0 .or. iblank(i, j, k) == -2 .or. &
+                            iblank(i, j, k) == -3 .or. iblank(i, j, k) == -4) then
+
+                            stencilLoop3: do i_stencil = 1, N_visc_drdw
+                                ii = visc_drdw_stencil(i_stencil, 1) + i
+                                jj = visc_drdw_stencil(i_stencil, 2) + j
+                                kk = visc_drdw_stencil(i_stencil, 3) + k
+
+                                if (ii >= 2 .and. jj >= 2 .and. kk >= 2 .and. &
+                                    ii <= il .and. jj <= jl .and. kk <= kl) then
+
+                                    if (iBlank(ii, jj, kk) == 1) then
+                                        ! This cell is an orphan:
+                                        n = n + 1
+                                        orphans(:, n) = (/ii, jj, kk/)
+                                    end if
+                                end if
+                            end do stencilLoop3
+                        end if
+                    end do
+                end do
+            end do
+        end do
+
+        ! Determine the total number of overset orphans
+        call mpi_allreduce(localOrphans, totalOrphans, 1, adflow_integer, MPI_SUM, &
+                           adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        if (myid == 0) then
+            print *, 'Total number of orphans:', totalOrphans
+        end if
+
+        ! if this is the last checkOverset call with printBadCells = .True., then
+        ! we can move the orphan information to iBlank because we have done all
+        ! the checking required.
+        if (printBadCells .and. (totalOrphans .gt. 0)) then
+            ! loop over the cells and write the orphan information to iBlank
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            if (flowDoms(nn, level, sps)%iblankLast(i, j, k) == -5) iBlank(i, j, k) = -5
+                        end do
+                    end do
+                end do
+            end do
+        end if
 
-          dff(3, j) =  dshp(3, 1, j)* shp(1, 2   )* shp(1, 3   ) &
-               +  shp(3, 1   )*dshp(1, 2, j)* shp(1, 3   ) &
-               +  shp(3, 1   )* shp(1, 2   )*dshp(1, 3, j)
+    end subroutine checkOverset
 
-          dff(4, j) =  dshp(1, 1, j)* shp(2, 2   )* shp(1, 3   ) &
-               +  shp(1, 1   )*dshp(2, 2, j)* shp(1, 3   ) &
-               +  shp(1, 1   )* shp(2, 2   )*dshp(1, 3, j)
+    !
+    !       Finds quadratic uvw weights [-1:1] for interpolant point
+    !       xSearch by solving for its co-ord in donor element defined by
+    !       xElem using newton-raphson iteration.
+    !       Fractions uvwQuadratic come with initial guess from ADT search
+    !       used for linear interpolation.
 
-          dff(5, j) =  dshp(2, 1, j)* shp(2, 2   )* shp(1, 3   ) &
-               +  shp(2, 1   )*dshp(2, 2, j)* shp(1, 3   ) &
-               +  shp(2, 1   )* shp(2, 2   )*dshp(1, 3, j)
+    subroutine computeQuadraticWeights(xSearch, xElem, uvwQuadratic)
 
-          dff(6, j) =  dshp(3, 1, j)* shp(2, 2   )* shp(1, 3   ) &
-               +  shp(3, 1   )*dshp(2, 2, j)* shp(1, 3   ) &
-               +  shp(3, 1   )* shp(2, 2   )*dshp(1, 3, j)
+        use constants
 
-          dff(7, j) =  dshp(1, 1, j)* shp(3, 2   )* shp(1, 3   ) &
-               +  shp(1, 1   )*dshp(3, 2, j)* shp(1, 3   ) &
-               +  shp(1, 1   )* shp(3, 2   )*dshp(1, 3, j)
+        implicit none
 
-          dff(8, j) =  dshp(2, 1, j)* shp(3, 2   )* shp(1, 3   ) &
-               +  shp(2, 1   )*dshp(3, 2, j)* shp(1, 3   ) &
-               +  shp(2, 1   )* shp(3, 2   )*dshp(1, 3, j)
+        ! Input variables
+        real(kind=realType), intent(in) :: xSearch(3), xElem(3, -1:1, -1:1, -1:1)
+        real(kind=realType), intent(inout) :: uvwQuadratic(3)
 
-          dff(9, j) =  dshp(3, 1, j)* shp(3, 2   )* shp(1, 3   ) &
-               +  shp(3, 1   )*dshp(3, 2, j)* shp(1, 3   ) &
-               +  shp(3, 1   )* shp(3, 2   )*dshp(1, 3, j)
+        ! Working variables
+        ! -----------------------------------------------------------------
+        ! newton related
+        integer(kind=intType) :: n, niter
+        real(kind=realType) :: resid, residtol
+        real(kind=realType) :: B(3, 4)
 
-          dff(10, j) =  dshp(1, 1, j)* shp(1, 2   )* shp(2, 3   ) &
-               +  shp(1, 1   )*dshp(1, 2, j)* shp(2, 3   ) &
-               +  shp(1, 1   )* shp(1, 2   )*dshp(2, 3, j)
+        ! others
+        integer(kind=intType) :: j, l, iii, jjj, kkk
+        real(kind=realType) :: ff(27), shp(3, 3), psi(3)
+        real(kind=realType) :: dff(27, 3), dshp(3, 3, 3) !-> differentials wrt 3 psi dirs
+        logical :: ok_flag
+        ! -----------------------------------------------------------------
 
-          dff(11, j) =  dshp(2, 1, j)* shp(1, 2   )* shp(2, 3   ) &
-               +  shp(2, 1   )*dshp(1, 2, j)* shp(2, 3   ) &
-               +  shp(2, 1   )* shp(1, 2   )*dshp(2, 3, j)
+        ! Initialize newton parameters
+        niter = 0
+        resid = 1.0e10
+        residtol = 1.0e-15
 
-          dff(12, j) =  dshp(3, 1, j)* shp(1, 2   )* shp(2, 3   ) &
-               +  shp(3, 1   )*dshp(1, 2, j)* shp(2, 3   ) &
-               +  shp(3, 1   )* shp(1, 2   )*dshp(2, 3, j)
+        ! Begin newton iterations to solve for uvw weights wrt quadratic element
 
-          dff(13, j) =  dshp(1, 1, j)* shp(2, 2   )* shp(2, 3   ) &
-               +  shp(1, 1   )*dshp(2, 2, j)* shp(2, 3   ) &
-               +  shp(1, 1   )* shp(2, 2   )*dshp(2, 3, j)
+        newton_loop: do while (niter < 10 .and. resid > residtol)
 
-          dff(14, j) =  dshp(2, 1, j)* shp(2, 2   )* shp(2, 3   ) &
-               +  shp(2, 1   )*dshp(2, 2, j)* shp(2, 3   ) &
-               +  shp(2, 1   )* shp(2, 2   )*dshp(2, 3, j)
+            !step 1: find weights
+            !--------------------
 
-          dff(15, j) =  dshp(3, 1, j)* shp(2, 2   )* shp(2, 3   ) &
-               +  shp(3, 1   )*dshp(2, 2, j)* shp(2, 3   ) &
-               +  shp(3, 1   )* shp(2, 2   )*dshp(2, 3, j)
+            ! Initialize weights
+            psi(1:3) = uvwQuadratic(1:3)
 
-          dff(16, j) =  dshp(1, 1, j)* shp(3, 2   )* shp(2, 3   ) &
-               +  shp(1, 1   )*dshp(3, 2, j)* shp(2, 3   ) &
-               +  shp(1, 1   )* shp(3, 2   )*dshp(2, 3, j)
+            ! Precopute the FE shape functions for each j-direction
+            do j = 1, 3
+                shp(1, j) = half * psi(j) * (psi(j) - one)
+                shp(2, j) = -(psi(j)**2 - 1)
+                shp(3, j) = half * psi(j) * (psi(j) + one)
+            end do
 
-          dff(17, j) =  dshp(2, 1, j)* shp(3, 2   )* shp(2, 3   ) &
-               +  shp(2, 1   )*dshp(3, 2, j)* shp(2, 3   ) &
-               +  shp(2, 1   )* shp(3, 2   )*dshp(2, 3, j)
+            ! These are the 27 quadratic weights
+            ff(1) = shp(1, 1) * shp(1, 2) * shp(1, 3)
+            ff(2) = shp(2, 1) * shp(1, 2) * shp(1, 3)
+            ff(3) = shp(3, 1) * shp(1, 2) * shp(1, 3)
 
-          dff(18, j) =  dshp(3, 1, j)* shp(3, 2   )* shp(2, 3   ) &
-               +  shp(3, 1   )*dshp(3, 2, j)* shp(2, 3   ) &
-               +  shp(3, 1   )* shp(3, 2   )*dshp(2, 3, j)
+            ff(4) = shp(1, 1) * shp(2, 2) * shp(1, 3)
+            ff(5) = shp(2, 1) * shp(2, 2) * shp(1, 3)
+            ff(6) = shp(3, 1) * shp(2, 2) * shp(1, 3)
 
-          dff(19, j) =  dshp(1, 1, j)* shp(1, 2   )* shp(3, 3   ) &
-               +  shp(1, 1   )*dshp(1, 2, j)* shp(3, 3   ) &
-               +  shp(1, 1   )* shp(1, 2   )*dshp(3, 3, j)
+            ff(7) = shp(1, 1) * shp(3, 2) * shp(1, 3)
+            ff(8) = shp(2, 1) * shp(3, 2) * shp(1, 3)
+            ff(9) = shp(3, 1) * shp(3, 2) * shp(1, 3)
 
-          dff(20, j) =  dshp(2, 1, j)* shp(1, 2   )* shp(3, 3   ) &
-               +  shp(2, 1   )*dshp(1, 2, j)* shp(3, 3   ) &
-               +  shp(2, 1   )* shp(1, 2   )*dshp(3, 3, j)
+            ff(10) = shp(1, 1) * shp(1, 2) * shp(2, 3)
+            ff(11) = shp(2, 1) * shp(1, 2) * shp(2, 3)
+            ff(12) = shp(3, 1) * shp(1, 2) * shp(2, 3)
 
-          dff(21, j) =  dshp(3, 1, j)* shp(1, 2   )* shp(3, 3   ) &
-               +  shp(3, 1   )*dshp(1, 2, j)* shp(3, 3   ) &
-               +  shp(3, 1   )* shp(1, 2   )*dshp(3, 3, j)
+            ff(13) = shp(1, 1) * shp(2, 2) * shp(2, 3)
+            ff(14) = shp(2, 1) * shp(2, 2) * shp(2, 3)
+            ff(15) = shp(3, 1) * shp(2, 2) * shp(2, 3)
 
-          dff(22, j) =  dshp(1, 1, j)* shp(2, 2   )* shp(3, 3   ) &
-               +  shp(1, 1   )*dshp(2, 2, j)* shp(3, 3   ) &
-               +  shp(1, 1   )* shp(2, 2   )*dshp(3, 3, j)
+            ff(16) = shp(1, 1) * shp(3, 2) * shp(2, 3)
+            ff(17) = shp(2, 1) * shp(3, 2) * shp(2, 3)
+            ff(18) = shp(3, 1) * shp(3, 2) * shp(2, 3)
 
-          dff(23, j) =  dshp(2, 1, j)* shp(2, 2   )* shp(3, 3   ) &
-               +  shp(2, 1   )*dshp(2, 2, j)* shp(3, 3   ) &
-               +  shp(2, 1   )* shp(2, 2   )*dshp(3, 3, j)
+            ff(19) = shp(1, 1) * shp(1, 2) * shp(3, 3)
+            ff(20) = shp(2, 1) * shp(1, 2) * shp(3, 3)
+            ff(21) = shp(3, 1) * shp(1, 2) * shp(3, 3)
 
-          dff(24, j) =  dshp(3, 1, j)* shp(2, 2   )* shp(3, 3   ) &
-               +  shp(3, 1   )*dshp(2, 2, j)* shp(3, 3   ) &
-               +  shp(3, 1   )* shp(2, 2   )*dshp(3, 3, j)
+            ff(22) = shp(1, 1) * shp(2, 2) * shp(3, 3)
+            ff(23) = shp(2, 1) * shp(2, 2) * shp(3, 3)
+            ff(24) = shp(3, 1) * shp(2, 2) * shp(3, 3)
 
-          dff(25, j) =  dshp(1, 1, j)* shp(3, 2   )* shp(3, 3   ) &
-               +  shp(1, 1   )*dshp(3, 2, j)* shp(3, 3   ) &
-               +  shp(1, 1   )* shp(3, 2   )*dshp(3, 3, j)
+            ff(25) = shp(1, 1) * shp(3, 2) * shp(3, 3)
+            ff(26) = shp(2, 1) * shp(3, 2) * shp(3, 3)
+            ff(27) = shp(3, 1) * shp(3, 2) * shp(3, 3)
 
-          dff(26, j) =  dshp(2, 1, j)* shp(3, 2   )* shp(3, 3   ) &
-               +  shp(2, 1   )*dshp(3, 2, j)* shp(3, 3   ) &
-               +  shp(2, 1   )* shp(3, 2   )*dshp(3, 3, j)
+            !step 2: find differentials of weights
+            !-------------------------------------
 
-          dff(27, j) =  dshp(3, 1, j)* shp(3, 2   )* shp(3, 3   ) &
-               +  shp(3, 1   )*dshp(3, 2, j)* shp(3, 3   ) &
-               +  shp(3, 1   )* shp(3, 2   )*dshp(3, 3, j)
+            ! Linearize the FE shape functions wrt each psi(j) direction
+            ! Note: only derivatives wrt psi(j) for any j are non-zero, rest are zero
 
-       end do loop_psi_j
+            dshp(:, :, :) = 0.d0
+            do j = 1, 3
+                dshp(1, j, j) = half * (psi(j) - one) + half * psi(j)
+                dshp(2, j, j) = -2.d0 * psi(j)
+                dshp(3, j, j) = half * (psi(j) + one) + half * psi(j)
+            end do
 
+            ! Linearize 27 quadratic weights wrt each psi(j) dir, build from dshp
 
-       ! Step 3: construct Jacobian d(R(psi(:))/d(psi(:)) and residue R(psi(:))
-       ! ----------------------------------------------------------------------
+            loop_psi_j: do j = 1, 3
+                dff(1, j) = dshp(1, 1, j) * shp(1, 2) * shp(1, 3) &
+                            + shp(1, 1) * dshp(1, 2, j) * shp(1, 3) &
+                            + shp(1, 1) * shp(1, 2) * dshp(1, 3, j)
 
-       ! loop over x,y,z dirs, stored row-wise
-       loop_xyz: do n=1,3
+                dff(2, j) = dshp(2, 1, j) * shp(1, 2) * shp(1, 3) &
+                            + shp(2, 1) * dshp(1, 2, j) * shp(1, 3) &
+                            + shp(2, 1) * shp(1, 2) * dshp(1, 3, j)
 
-          ! construct LHS -d(R(psi(:))/d(psi(:))
+                dff(3, j) = dshp(3, 1, j) * shp(1, 2) * shp(1, 3) &
+                            + shp(3, 1) * dshp(1, 2, j) * shp(1, 3) &
+                            + shp(3, 1) * shp(1, 2) * dshp(1, 3, j)
 
-          ! loop over each psi dir j
-          do j=1,3
+                dff(4, j) = dshp(1, 1, j) * shp(2, 2) * shp(1, 3) &
+                            + shp(1, 1) * dshp(2, 2, j) * shp(1, 3) &
+                            + shp(1, 1) * shp(2, 2) * dshp(1, 3, j)
 
-             ! loop over nodes
-             l = 0
-             b(n, j) = 0.d0
+                dff(5, j) = dshp(2, 1, j) * shp(2, 2) * shp(1, 3) &
+                            + shp(2, 1) * dshp(2, 2, j) * shp(1, 3) &
+                            + shp(2, 1) * shp(2, 2) * dshp(1, 3, j)
 
-             do kkk=-1,1
-                do jjj=-1,1
-                   do iii=-1,1
-                      l = l + 1
-                      b(n,j) = b(n,j) + dff(l,j) * xElem(n, iii, jjj, kkk)
-                   end do
-                end do
-             end do
-          end do !j
-
-          ! construct RHS (Xp - sum_i(ff_i * X_i), i =1,27) for nth row (x,y or z)
-          l = 0
-          b(n, 4) = xSearch(n)
-
-          ! loop over nodes
-          do kkk=-1,1
-             do jjj=-1,1
-                do iii=-1,1
-                   l = l + 1
-                   b(n,4) = b(n,4) - ff(l) * xElem(n, iii, jjj, kkk)
-                end do
-             end do
-          end do
+                dff(6, j) = dshp(3, 1, j) * shp(2, 2) * shp(1, 3) &
+                            + shp(3, 1) * dshp(2, 2, j) * shp(1, 3) &
+                            + shp(3, 1) * shp(2, 2) * dshp(1, 3, j)
 
-       end do loop_xyz
+                dff(7, j) = dshp(1, 1, j) * shp(3, 2) * shp(1, 3) &
+                            + shp(1, 1) * dshp(3, 2, j) * shp(1, 3) &
+                            + shp(1, 1) * shp(3, 2) * dshp(1, 3, j)
 
-       ! Get d(uvwQuadratic) weights
-       ! invert 3x3 matrix returns solution in b(:,4)
-       call matrixinv3by3(b, ok_flag)
-       if (.not. ok_flag) stop 'Can not invert B in computeQuadraticWeights'
+                dff(8, j) = dshp(2, 1, j) * shp(3, 2) * shp(1, 3) &
+                            + shp(2, 1) * dshp(3, 2, j) * shp(1, 3) &
+                            + shp(2, 1) * shp(3, 2) * dshp(1, 3, j)
 
-       ! update uvwQuadratic weights
-       do n=1,3
-          uvwQuadratic(n) = uvwQuadratic(n) + b(n,4)
+                dff(9, j) = dshp(3, 1, j) * shp(3, 2) * shp(1, 3) &
+                            + shp(3, 1) * dshp(3, 2, j) * shp(1, 3) &
+                            + shp(3, 1) * shp(3, 2) * dshp(1, 3, j)
 
-          ! sanity check: if weights lie outside [-1:1] reset to 0.5
-          if( (uvwQuadratic(n)-1)*(uvwQuadratic(n)+1) > 0.d0) uvwQuadratic(n) = half
-       end do
+                dff(10, j) = dshp(1, 1, j) * shp(1, 2) * shp(2, 3) &
+                             + shp(1, 1) * dshp(1, 2, j) * shp(2, 3) &
+                             + shp(1, 1) * shp(1, 2) * dshp(2, 3, j)
 
-       ! L2-norm of residue
-       resid = 0.d0
-       do n=1,3
-          resid = resid + b(n,4)**2
-       end do
+                dff(11, j) = dshp(2, 1, j) * shp(1, 2) * shp(2, 3) &
+                             + shp(2, 1) * dshp(1, 2, j) * shp(2, 3) &
+                             + shp(2, 1) * shp(1, 2) * dshp(2, 3, j)
 
-       niter = niter +1
+                dff(12, j) = dshp(3, 1, j) * shp(1, 2) * shp(2, 3) &
+                             + shp(3, 1) * dshp(1, 2, j) * shp(2, 3) &
+                             + shp(3, 1) * shp(1, 2) * dshp(2, 3, j)
 
-    end do newton_loop
-  end subroutine computeQuadraticWeights
-  !
-  !      Plain invert of A to solve Ax=B
-  !      a(3,3) --- matrix to be inverted
-  !      a(:,4) --- contains the right hand side vector, also stores
-  !                 the final solution
+                dff(13, j) = dshp(1, 1, j) * shp(2, 2) * shp(2, 3) &
+                             + shp(1, 1) * dshp(2, 2, j) * shp(2, 3) &
+                             + shp(1, 1) * shp(2, 2) * dshp(2, 3, j)
 
-  subroutine matrixinv3by3(a, ok_flag)
-
-    use constants
-    implicit none
+                dff(14, j) = dshp(2, 1, j) * shp(2, 2) * shp(2, 3) &
+                             + shp(2, 1) * dshp(2, 2, j) * shp(2, 3) &
+                             + shp(2, 1) * shp(2, 2) * dshp(2, 3, j)
 
-    ! Input variables
-    real(kind=realType), intent(inout) :: a(3, 4)
-    logical,             intent(out)   :: ok_flag
+                dff(15, j) = dshp(3, 1, j) * shp(2, 2) * shp(2, 3) &
+                             + shp(3, 1) * dshp(2, 2, j) * shp(2, 3) &
+                             + shp(3, 1) * shp(2, 2) * dshp(2, 3, j)
 
-    ! Working variables
-    integer(kind=intType) :: n
-    real(kind=realType) :: det, cofactor(3,3), ainv(3, 3), rin(3,1), rout(3,1), myeps
+                dff(16, j) = dshp(1, 1, j) * shp(3, 2) * shp(2, 3) &
+                             + shp(1, 1) * dshp(3, 2, j) * shp(2, 3) &
+                             + shp(1, 1) * shp(3, 2) * dshp(2, 3, j)
 
-    myeps = 1.0e-10
-    ainv = 0.d0
+                dff(17, j) = dshp(2, 1, j) * shp(3, 2) * shp(2, 3) &
+                             + shp(2, 1) * dshp(3, 2, j) * shp(2, 3) &
+                             + shp(2, 1) * shp(3, 2) * dshp(2, 3, j)
 
-    det =   a(1,1)*a(2,2)*a(3,3)  &
-         - a(1,1)*a(2,3)*a(3,2)  &
-         - a(1,2)*a(2,1)*a(3,3)  &
-         + a(1,2)*a(2,3)*a(3,1)  &
-         + a(1,3)*a(2,1)*a(3,2)  &
-         - a(1,3)*a(2,2)*a(3,1)
+                dff(18, j) = dshp(3, 1, j) * shp(3, 2) * shp(2, 3) &
+                             + shp(3, 1) * dshp(3, 2, j) * shp(2, 3) &
+                             + shp(3, 1) * shp(3, 2) * dshp(2, 3, j)
 
+                dff(19, j) = dshp(1, 1, j) * shp(1, 2) * shp(3, 3) &
+                             + shp(1, 1) * dshp(1, 2, j) * shp(3, 3) &
+                             + shp(1, 1) * shp(1, 2) * dshp(3, 3, j)
 
-    if (abs(det) <= myeps) then
-       ainv = 0.0d0
-       ok_flag = .false.
-       return
-    end if
+                dff(20, j) = dshp(2, 1, j) * shp(1, 2) * shp(3, 3) &
+                             + shp(2, 1) * dshp(1, 2, j) * shp(3, 3) &
+                             + shp(2, 1) * shp(1, 2) * dshp(3, 3, j)
 
-    cofactor(1,1) = +(a(2,2)*a(3,3)-a(2,3)*a(3,2))
-    cofactor(1,2) = -(a(2,1)*a(3,3)-a(2,3)*a(3,1))
-    cofactor(1,3) = +(a(2,1)*a(3,2)-a(2,2)*a(3,1))
-    cofactor(2,1) = -(a(1,2)*a(3,3)-a(1,3)*a(3,2))
-    cofactor(2,2) = +(a(1,1)*a(3,3)-a(1,3)*a(3,1))
-    cofactor(2,3) = -(a(1,1)*a(3,2)-a(1,2)*a(3,1))
-    cofactor(3,1) = +(a(1,2)*a(2,3)-a(1,3)*a(2,2))
-    cofactor(3,2) = -(a(1,1)*a(2,3)-a(1,3)*a(2,1))
-    cofactor(3,3) = +(a(1,1)*a(2,2)-a(1,2)*a(2,1))
+                dff(21, j) = dshp(3, 1, j) * shp(1, 2) * shp(3, 3) &
+                             + shp(3, 1) * dshp(1, 2, j) * shp(3, 3) &
+                             + shp(3, 1) * shp(1, 2) * dshp(3, 3, j)
 
-    ainv = transpose(cofactor) / det
+                dff(22, j) = dshp(1, 1, j) * shp(2, 2) * shp(3, 3) &
+                             + shp(1, 1) * dshp(2, 2, j) * shp(3, 3) &
+                             + shp(1, 1) * shp(2, 2) * dshp(3, 3, j)
 
-    ok_flag = .true.
+                dff(23, j) = dshp(2, 1, j) * shp(2, 2) * shp(3, 3) &
+                             + shp(2, 1) * dshp(2, 2, j) * shp(3, 3) &
+                             + shp(2, 1) * shp(2, 2) * dshp(3, 3, j)
 
-    do n=1, 3
-       rin(n,1) = a(n, 4)
-    end do
+                dff(24, j) = dshp(3, 1, j) * shp(2, 2) * shp(3, 3) &
+                             + shp(3, 1) * dshp(2, 2, j) * shp(3, 3) &
+                             + shp(3, 1) * shp(2, 2) * dshp(3, 3, j)
 
-    !save solution on a(:,4)
-    rout = matmul(ainv,rin)
+                dff(25, j) = dshp(1, 1, j) * shp(3, 2) * shp(3, 3) &
+                             + shp(1, 1) * dshp(3, 2, j) * shp(3, 3) &
+                             + shp(1, 1) * shp(3, 2) * dshp(3, 3, j)
+
+                dff(26, j) = dshp(2, 1, j) * shp(3, 2) * shp(3, 3) &
+                             + shp(2, 1) * dshp(3, 2, j) * shp(3, 3) &
+                             + shp(2, 1) * shp(3, 2) * dshp(3, 3, j)
+
+                dff(27, j) = dshp(3, 1, j) * shp(3, 2) * shp(3, 3) &
+                             + shp(3, 1) * dshp(3, 2, j) * shp(3, 3) &
+                             + shp(3, 1) * shp(3, 2) * dshp(3, 3, j)
+
+            end do loop_psi_j
+
+            ! Step 3: construct Jacobian d(R(psi(:))/d(psi(:)) and residue R(psi(:))
+            ! ----------------------------------------------------------------------
+
+            ! loop over x,y,z dirs, stored row-wise
+            loop_xyz: do n = 1, 3
+
+                ! construct LHS -d(R(psi(:))/d(psi(:))
+
+                ! loop over each psi dir j
+                do j = 1, 3
+
+                    ! loop over nodes
+                    l = 0
+                    b(n, j) = 0.d0
+
+                    do kkk = -1, 1
+                        do jjj = -1, 1
+                            do iii = -1, 1
+                                l = l + 1
+                                b(n, j) = b(n, j) + dff(l, j) * xElem(n, iii, jjj, kkk)
+                            end do
+                        end do
+                    end do
+                end do !j
+
+                ! construct RHS (Xp - sum_i(ff_i * X_i), i =1,27) for nth row (x,y or z)
+                l = 0
+                b(n, 4) = xSearch(n)
+
+                ! loop over nodes
+                do kkk = -1, 1
+                    do jjj = -1, 1
+                        do iii = -1, 1
+                            l = l + 1
+                            b(n, 4) = b(n, 4) - ff(l) * xElem(n, iii, jjj, kkk)
+                        end do
+                    end do
+                end do
 
-    do n=1, 3
-       a(n, 4) = rout(n,1)
-    end do
+            end do loop_xyz
 
-  end subroutine matrixinv3by3
+            ! Get d(uvwQuadratic) weights
+            ! invert 3x3 matrix returns solution in b(:,4)
+            call matrixinv3by3(b, ok_flag)
+            if (.not. ok_flag) stop 'Can not invert B in computeQuadraticWeights'
 
-  subroutine fringeReduction(level, sps)
+            ! update uvwQuadratic weights
+            do n = 1, 3
+                uvwQuadratic(n) = uvwQuadratic(n) + b(n, 4)
 
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, status, fringePtr
-    use stencils, only : visc_drdw_stencil, n_visc_drdw
-    use utils, only : setPointers
-    implicit none
+                ! sanity check: if weights lie outside [-1:1] reset to 0.5
+                if ((uvwQuadratic(n) - 1) * (uvwQuadratic(n) + 1) > 0.d0) uvwQuadratic(n) = half
+            end do
 
-    ! Input/Output
-    integer(kind=intType), intent(in) :: level, sps
+            ! L2-norm of residue
+            resid = 0.d0
+            do n = 1, 3
+                resid = resid + b(n, 4)**2
+            end do
 
-    ! Working
-    integer(kind=intType) :: i, j, k, nn, ii, jj, kk, i_stencil
-    logical :: computeCellFound
-
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
+            niter = niter + 1
 
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+        end do newton_loop
+    end subroutine computeQuadraticWeights
+    !
+    !      Plain invert of A to solve Ax=B
+    !      a(3,3) --- matrix to be inverted
+    !      a(:,4) --- contains the right hand side vector, also stores
+    !                 the final solution
+
+    subroutine matrixinv3by3(a, ok_flag)
 
-                ! Check if this cell is a fringe:
-                if (isReceiver(status(i, j, k))) then
+        use constants
+        implicit none
+
+        ! Input variables
+        real(kind=realType), intent(inout) :: a(3, 4)
+        logical, intent(out) :: ok_flag
+
+        ! Working variables
+        integer(kind=intType) :: n
+        real(kind=realType) :: det, cofactor(3, 3), ainv(3, 3), rin(3, 1), rout(3, 1), myeps
+
+        myeps = 1.0e-10
+        ainv = 0.d0
+
+        det = a(1, 1) * a(2, 2) * a(3, 3) &
+              - a(1, 1) * a(2, 3) * a(3, 2) &
+              - a(1, 2) * a(2, 1) * a(3, 3) &
+              + a(1, 2) * a(2, 3) * a(3, 1) &
+              + a(1, 3) * a(2, 1) * a(3, 2) &
+              - a(1, 3) * a(2, 2) * a(3, 1)
+
+        if (abs(det) <= myeps) then
+            ainv = 0.0d0
+            ok_flag = .false.
+            return
+        end if
+
+        cofactor(1, 1) = +(a(2, 2) * a(3, 3) - a(2, 3) * a(3, 2))
+        cofactor(1, 2) = -(a(2, 1) * a(3, 3) - a(2, 3) * a(3, 1))
+        cofactor(1, 3) = +(a(2, 1) * a(3, 2) - a(2, 2) * a(3, 1))
+        cofactor(2, 1) = -(a(1, 2) * a(3, 3) - a(1, 3) * a(3, 2))
+        cofactor(2, 2) = +(a(1, 1) * a(3, 3) - a(1, 3) * a(3, 1))
+        cofactor(2, 3) = -(a(1, 1) * a(3, 2) - a(1, 2) * a(3, 1))
+        cofactor(3, 1) = +(a(1, 2) * a(2, 3) - a(1, 3) * a(2, 2))
+        cofactor(3, 2) = -(a(1, 1) * a(2, 3) - a(1, 3) * a(2, 1))
+        cofactor(3, 3) = +(a(1, 1) * a(2, 2) - a(1, 2) * a(2, 1))
+
+        ainv = transpose(cofactor) / det
+
+        ok_flag = .true.
+
+        do n = 1, 3
+            rin(n, 1) = a(n, 4)
+        end do
+
+        !save solution on a(:,4)
+        rout = matmul(ainv, rin)
+
+        do n = 1, 3
+            a(n, 4) = rout(n, 1)
+        end do
+
+    end subroutine matrixinv3by3
+
+    subroutine fringeReduction(level, sps)
+
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, status, fringePtr
+        use stencils, only: visc_drdw_stencil, n_visc_drdw
+        use utils, only: setPointers
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Working
+        integer(kind=intType) :: i, j, k, nn, ii, jj, kk, i_stencil
+        logical :: computeCellFound
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+
+                        ! Check if this cell is a fringe:
+                        if (isReceiver(status(i, j, k))) then
+
+                            computeCellFound = .False.
+
+                            stencilLoop2: do i_stencil = 1, N_visc_drdw
+                                ii = visc_drdw_stencil(i_stencil, 1) + i
+                                jj = visc_drdw_stencil(i_stencil, 2) + j
+                                kk = visc_drdw_stencil(i_stencil, 3) + k
+
+                                if (isCompute(status(ii, jj, kk))) then
+                                    ! This is a compute cell
+                                    computeCellFound = .True.
+                                end if
+                            end do stencilLoop2
+
+                            if (.not. computeCellFound) then
+                                ! This cell is a hole no compute cell
+                                ! surrounding a fringe, we can hard iblank it.
+                                call setIsHole(status(i, j, k), .True.)
+                                call setIsCompute(status(i, j, k), .False.)
+                                call setIsReceiver(status(i, j, k), .False.)
+                                fringePtr(1, i, j, k) = 0
+
+                            end if
+                        end if
+                    end do
+                end do
+            end do
+        end do
 
-                   computeCellFound = .False.
+    end subroutine fringeReduction
 
-                   stencilLoop2: do i_stencil=1, N_visc_drdw
-                      ii = visc_drdw_stencil(i_stencil, 1) + i
-                      jj = visc_drdw_stencil(i_stencil, 2) + j
-                      kk = visc_drdw_stencil(i_stencil, 3) + k
+    subroutine irregularCellCorrection(level, sps)
 
-                      if (isCompute(status(ii, jj, kk))) then
-                         ! This is a compute cell
-                         computeCellFound = .True.
-                      end if
-                   end do stencilLoop2
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, status, ie, je, ke, forcedRecv
+        use utils, only: setPointers
+        implicit none
 
-                   if (.not. computeCellFound) then
-                      ! This cell is a hole no compute cell
-                      ! surrounding a fringe, we can hard iblank it.
-                      call setIsHole(status(i, j, k), .True.)
-                      call setIsCompute(status(i, j, k), .False.)
-                      call setIsReceiver(status(i, j, k), .False.)
-                      fringePtr(1, i, j, k) = 0
+        ! Input/Output
+        integer(kind=intType), intent(in) :: level, sps
 
-                   end if
-                end if
-             end do
-          end do
-       end do
-    end do
+        ! Working
+        integer(kind=intType) :: i, j, k, nn
 
-  end subroutine fringeReduction
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
 
-  subroutine irregularCellCorrection(level, sps)
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        if (isDonor(status(i, j, k)) .and. &
+                            isReceiver(status(i, j, k))) then
 
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, status, ie, je, ke, forcedRecv
-    use utils, only : setPointers
-    implicit none
+                            ! Clear the fringe
+                            call setIsDonor(status(i, j, k), .False.)
+                            call setIsReceiver(status(i, j, k), .False.)
+                            call setIsCompute(status(i, j, k), .True.)
+                        end if
+                    end do
+                end do
+            end do
+        end do
 
-    ! Input/Output
-    integer(kind=intType), intent(in) :: level, sps
+    end subroutine irregularCellCorrection
 
-    ! Working
-    integer(kind=intType) :: i, j, k, nn
+    function checkOversetPresent()
 
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
+        ! This routine determines if there are any overset boundaries
+        ! present in the mesh.
 
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                if (isDonor(status(i, j, k)) .and. &
-                     isReceiver(status(i, j, k))) then
+        use constants
+        use blockPointers, only: nDom, nBocos, BCType
+        use communication, only: adflow_comm_world
+        use utils, only: setPointers, EChk
+        implicit none
 
-                   ! Clear the fringe
-                   call setIsDonor(status(i, j, k), .False.)
-                   call setIsReceiver(status(i, j, k), .False.)
-                   call setIsCompute(status(i, j, k), .True.)
-                end if
-             end do
-          end do
-       end do
-    end do
+        ! Function
+        logical :: checkOversetPresent, local
 
-  end subroutine irregularCellCorrection
+        ! Working
+        integer(Kind=intType) :: nn, mm, ierr
 
+        local = .False.
+        do nn = 1, nDom
+            call setPointers(nn, 1_intType, 1_intType)
 
-  function checkOversetPresent()
+            do mm = 1, nBocos
+                if (BCType(mm) == OversetOuterBound) then
+                    local = .True.
+                end if
+            end do
+        end do
+
+        call mpi_allreduce(local, checkOversetPresent, 1, MPI_LOGICAL, MPI_LOR, ADflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+    end function checkOversetPresent
+
+    subroutine setIblankArray(level, sps)
+
+        use constants
+        use block
+        use blockPointers, only: nDom, il, jl, kl, status, iblank, flowDoms, ie, je, ke, forcedRecv
+        use communication, only: myid, commPatternCell_2nd, internalCell_2nd, &
+                                 adflow_comm_world
+        use utils, only: setPointers, EChk
+        use haloExchange, only: whalo1to1IntGeneric
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Working
+        integer(kind=intType) :: i, j, k, nn
+        integer(kind=intType) :: nCompute, nFringe, nBlank, nFloodSeed, nFlooded, nExplicitBlanked
+        integer(kind=intType) :: counts(6), ierr
+        type(fringeType) :: fringe
+        nCompute = 0
+        nFringe = 0
+        nBlank = 0
+        nFloodSeed = 0
+        nFlooded = 0
+        nExplicitBlanked = 0
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+
+                        if (iblank(i, j, k) == -4) then
+                            nExplicitBlanked = nExplicitBlanked + 1
+
+                        else if (isReceiver(status(i, j, k))) then
+                            iblank(i, j, k) = -1
+                            nFringe = nFringe + 1
+
+                        else if (isFloodSeed(status(i, j, k))) then
+                            iBlank(i, j, k) = -3
+                            nFloodSeed = nFloodSeed + 1
+
+                        else if (isFlooded(status(i, j, k))) then
+                            iBlank(i, j, k) = -2
+                            nFlooded = nFlooded + 1
+
+                        else if (isHole(status(i, j, k))) then
+                            iBlank(i, j, k) = 0
+                            nBlank = nBlank + 1
+
+                        else
+                            ! We need to explictly make sure forced receivers
+                            ! *NEVER EVER EVER EVER* get set as compute cells.
+                            if (forcedRecv(i, j, k) .ne. 0) then
+                                ! This is REALLY Bad. This cell must be a forced
+                                ! receiver but never found a donor.
+                                iblank(i, j, k) = 0
+                                nBlank = nBlank + 1
+                            else
+                                ! Compute cell
+                                nCompute = nCompute + 1
+                                iblank(i, j, k) = 1
+                            end if
+                        end if
+                    end do
+                end do
+            end do
+        end do
+
+        ! Update the iblank info.
+        domainLoop: do nn = 1, nDom
+            flowDoms(nn, level, sps)%intCommVars(1)%var => &
+                flowDoms(nn, level, sps)%iblank(:, :, :)
+        end do domainLoop
+
+        ! Run the generic integer exchange
+        call wHalo1to1IntGeneric(1, level, sps, commPatternCell_2nd, internalCell_2nd)
+
+        call mpi_reduce((/nCompute, nFringe, nBlank, nFlooded, nFloodSeed, nExplicitBlanked/), &
+                        counts, 6, adflow_integer, MPI_SUM, 0, adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        if (myid == 0) then
+            print *, '+--------------------------------+'
+            print *, '| Compute Cells           :', counts(1)
+            print *, '| Fringe Cells            :', counts(2)
+            print *, '| Blanked Cells           :', counts(3)
+            print *, '| Explicitly Blanked Cells:', counts(6)
+            print *, '| Flooded   Cells         :', counts(4)
+            print *, '| FloodSeed Cells         :', counts(5)
+            print *, '+--------------------------------+'
+        end if
+    end subroutine setIblankArray
+
+    subroutine dumpIblank(level, sps)
+
+        use constants
+        use blockPointers, only: il, jl, kl, x, nDom, iBlank
+        use communication, only: myID
+        use utils, only: setPointers
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Working
+        integer(kind=intType) :: i, j, k, nn
+        real(kind=realType) :: xp(3)
+        character(80) :: fileName
+
+        write (fileName, "(a,I2.2,a)") "proc_", myid, ".dat"
+        open (unit=19, file=trim(fileName), form='formatted')
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+
+                        ! Compute the cell center:
+                        xp = eighth * ( &
+                             x(i - 1, j - 1, k - 1, :) + &
+                             x(i, j - 1, k - 1, :) + &
+                             x(i - 1, j, k - 1, :) + &
+                             x(i, j, k - 1, :) + &
+                             x(i - 1, j - 1, k, :) + &
+                             x(i, j - 1, k, :) + &
+                             x(i - 1, j, k, :) + &
+                             x(i, j, k, :))
+
+                        write (19, "(ES18.10, ES18.10, ES18.10, I3)") xp(1), xp(2), xp(3), iblank(i, j, k)
+                    end do
+                end do
+            end do
+        end do
+
+        close (19)
+
+    end subroutine dumpIblank
+
+    subroutine getWorkArray(overlap, work)
+
+        use constants
+        use communication, only: myid
+        use oversetData, only: CSRMatrix, nDomTotal
+        implicit none
+
+        ! Input/Output
+        type(CSRMatrix), intent(in) :: overlap
+        integer(kind=intType), dimension(:, :), allocatable :: work
+
+        ! Local variables
+        integer(kind=intType) :: nWork, jj, iDom, jDom
+
+        nWork = 0
+        do jj = 1, overlap%nnz
+            if (overlap%assignedProc(jj) == myid) then
+                nWork = nWork + 1
+            end if
+        end do
+        allocate (work(4, nWork))
+
+        nWork = 0
+        do iDom = 1, nDomTotal
+            do jj = overlap%rowPtr(iDom), overlap%rowPtr(iDom + 1) - 1
+                jDom = overlap%colInd(jj)
+                if (overlap%assignedProc(jj) == myID) then
+                    nWork = nWork + 1
+                    work(1, nWork) = iDom
+                    work(2, nWork) = jDom
+                    work(3, nWork) = jj
+                    work(4, nWork) = 0
+                end if
+            end do
+        end do
+    end subroutine getWorkArray
+
+    subroutine writeOversetWall(oWall, fName)
+
+        ! debug routine to dumb an owall to a file
+        use constants
+        use oversetData
+        implicit none
+        type(oversetWall) :: oWall
+        character(len=*), intent(in) :: fName
+
+        integer(kind=intType) :: i, iDim
+        open (unit=19, file=trim(fName), form='formatted')
+        write (19, *) 'Variables = "CoordinateX" "CoordianteY" "CoordinateZ"'
+        write (19, *) "Zone"
+        write (19, *) "Nodes = ", oWall%nNodes, " Elements= ", oWalL%nCells, " ZONETYPE=FEQUADRILATERAL"
+        write (19, *) "DATAPACKING=BLOCK"
+
+        do iDim = 1, 3
+            do i = 1, oWall%nNodes
+                write (19, *) oWall%x(iDim, i)
+            end do
+        end do
+
+        do i = 1, oWall%nCells
+            write (19, *) oWall%conn(1, i), oWall%conn(2, i), oWall%conn(3, i), oWall%conn(4, i)
+        end do
+        close (19)
+    end subroutine writeOversetWall
+
+    subroutine getOversetIblank(blkList, n)
+
+        ! This routine gathers a list of the iblank status of every
+        ! compute cell in the original CGNS ordering. It then returns the
+        ! full list on the root processor. Therefore, this routine is not
+        ! memory or computation scalable. It is only meant to be used a
+        ! debugging tool to ensure that the overset connectivity as
+        ! computed with given parallel decomposition is the same as a
+        ! different parallel decomposition.
+
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, iBegOr, jBegOr, kBegOr, &
+                                 nBkGlobal, iBlank
+        use cgnsGrid, only: cgnsDoms, cgnsNDom
+        use communication, only: adflow_comm_world, myid
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers, EChk, terminate
+#include <petsc/finclude/petsc.h>
+        use petsc
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: n
+        integer(kind=intType), dimension(n), intent(out) :: blkList
+
+        ! Working
+
+        ! Working parameters
+        integer(kind=intType) :: i, j, k, ierr, l, nx_cg, ny_cg, nz_cg
+        integer(kind=intType) :: ii, indx, indy, indz, nn, cgnsInd
+        integer(kind=intType), allocatable, dimension(:) :: cellOffset
+        real(kind=realType), dimension(:), pointer :: localPtr
+        real(kind=realType) :: vals(5)
+        Vec CGNSVec
+
+        ! This routine cannot be used in timespectral mode
+        if (nTimeIntervalsSpectral > 1) then
+            call terminate('getOversetIBlank', 'This routine can only be used '&
+                 &'with 1 spectral instance')
+        end if
+
+        allocate (cellOffset(cgnsNDom + 1))
+        cellOffset(1) = 0
+        do nn = 1, cgnsNDom
+            cellOffset(nn + 1) = cellOffset(nn) + &
+                                 cgnsDoms(nn)%nx * cgnsDoms(nn)%ny * cgnsDoms(nn)%nz
+        end do
+
+        ! Create the CGNSVector
+        if (myid == 0) then
+            call VecCreateMPI(adflow_comm_world, cellOffset(cgnsNDom + 1) * 5, &
+                              PETSC_DETERMINE, cgnsVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+        else
+            call VecCreateMPI(adflow_comm_world, 0, PETSC_DETERMINE, cgnsVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+        call VecSetBlockSize(cgnsVec, 5, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        ii = 0
+        do nn = 1, nDom
+            call setPointers(nn, 1, 1)
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+
+                        nx_cg = cgnsDoms(nbkGlobal)%nx
+                        ny_cg = cgnsDoms(nbkGlobal)%ny
+                        nz_cg = cgnsDoms(nbkGlobal)%nz
+
+                        indx = iBegOr + i - 2
+                        indy = jBegOr + j - 2
+                        indz = kBegOr + k - 2
+
+                        ! cgnsInd is zero-based
+                        cgnsInd = cellOffset(nbkGlobal) + &
+                                  (indz - 1) * ny_cg * nx_cg + &
+                                  (indy - 1) * nx_cg + &
+                                  (indx - 1)
+                        vals(1) = real(iblank(i, j, k))
+                        if (vals(1) <= -2) then
+                            vals(1) = 0
+                        end if
+                        vals(2) = real(nbkGlobal)
+                        vals(3) = real(indx)
+                        vals(4) = real(indy)
+                        vals(5) = real(indz)
+                        call VecSetValuesBlocked(cgnsVec, 1, (/cgnsInd/), vals, INSERT_VALUES, ierr)
+                        call EChk(ierr, __FILE__, __LINE__)
+                    end do
+                end do
+            end do
+        end do
 
-    ! This routine determines if there are any overset boundaries
-    ! present in the mesh.
+        call VecAssemblyBegin(cgnsVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use blockPointers, only : nDom, nBocos, BCType
-    use communication, only : adflow_comm_world
-    use utils, only : setPointers, EChk
-    implicit none
+        call VecAssemblyEnd(cgnsVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Function
-    logical :: checkOversetPresent, local
+        ! Get the local vector pointer. Only the root proc actually has
+        ! values.
+        call vecGetArrayF90(cgnsVec, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Working
-    integer(Kind=intType) :: nn, mm, ierr
+        ! Convert back to integer.
+        do i = 1, size(localPtr)
+            blkList(i) = int(localPtr(i))
+        end do
 
-    local = .False.
-    do nn=1, nDom
-       call setPointers(nn, 1_intType, 1_intType)
+        call vecRestoreArrayF90(cgnsVec, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-       do mm=1, nBocos
-          if (BCType(mm) == OversetOuterBound) then
-             local = .True.
-          end if
-       end do
-    end do
+    end subroutine getOversetIblank
 
-    call mpi_allreduce(local, checkOversetPresent, 1, MPI_LOGICAL, MPI_LOR, ADflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
+#endif
 
-  end function checkOversetPresent
+    ! --------------------------------------------------
+    !           Tapenade Routine BELOW this point
+    ! --------------------------------------------------
+
+    subroutine fracToWeights(frac, weights)
+        use constants
+        implicit none
+        real(kind=realType), intent(in), dimension(3) :: frac
+        real(kind=realType), intent(out), dimension(8) :: weights
+
+        weights(1) = (one - frac(1)) * (one - frac(2)) * (one - frac(3))
+        weights(2) = (frac(1)) * (one - frac(2)) * (one - frac(3))
+        weights(3) = (one - frac(1)) * (frac(2)) * (one - frac(3))
+        weights(4) = (frac(1)) * (frac(2)) * (one - frac(3))
+        weights(5) = (one - frac(1)) * (one - frac(2)) * (frac(3))
+        weights(6) = (frac(1)) * (one - frac(2)) * (frac(3))
+        weights(7) = (one - frac(1)) * (frac(2)) * (frac(3))
+        weights(8) = (frac(1)) * (frac(2)) * (frac(3))
+    end subroutine fracToWeights
+
+    subroutine fracToWeights2(frac, weights)
+        use constants
+        implicit none
+        real(kind=realType), intent(in), dimension(3) :: frac
+        real(kind=realType), intent(out), dimension(8) :: weights
+
+        weights(1) = (one - frac(1)) * (one - frac(2)) * (one - frac(3))
+        weights(2) = (frac(1)) * (one - frac(2)) * (one - frac(3))
+        weights(3) = (frac(1)) * (frac(2)) * (one - frac(3))
+        weights(4) = (one - frac(1)) * (frac(2)) * (one - frac(3))
+
+        weights(5) = (one - frac(1)) * (one - frac(2)) * (frac(3))
+        weights(6) = (frac(1)) * (one - frac(2)) * (frac(3))
+        weights(7) = (frac(1)) * (frac(2)) * (frac(3))
+        weights(8) = (one - frac(1)) * (frac(2)) * (frac(3))
+
+    end subroutine fracToWeights2
+
+    subroutine newtonUpdate(xCen, blk, frac0, frac)
+
+        ! This routine performs the newton update to recompute the new
+        ! "frac" (u,v,w) for the point xCen. The actual search is performed
+        ! on the the dual cell formed by the cell centers of the 3x3x3 block
+        ! of primal nodes. This routine is AD'd with tapenade in both
+        ! forward and reverse.
+
+        use constants
+        implicit none
+
+        ! Input
+        real(kind=realType), dimension(3), intent(in) :: xCen
+        real(kind=realType), dimension(3, 3, 3, 3), intent(in) :: blk
+        real(kind=realType), dimension(3), intent(in) :: frac0
+        ! Output
+        real(kind=realType), dimension(3), intent(out) :: frac
+
+        ! Working
+        real(kind=realType), dimension(3, 1:8) :: xn
+        real(kind=realType) :: u, v, w, uv, uw, vw, wvu, du, dv, dw
+        real(kind=realType) :: a11, a12, a13, a21, a22, a23, a31, a32, a33, val
+        real(kind=realType) :: f(3), x(3)
+        integer(kind=intType), dimension(8), parameter :: indices = [1, 2, 4, 3, 5, 6, 8, 7]
+        integer(kind=intType) :: i, j, k, ii, ll
+        real(kind=realType), parameter :: adtEps = 1.e-25_realType
+        real(kind=realType), parameter :: thresConv = 1.e-10_realType
+
+        ! Compute the cell center locations for the 8 nodes describing the
+        ! dual cell. Note that this must be counter-clockwise ordering.
+
+        ii = 0
+        do k = 1, 2
+            do j = 1, 2
+                do i = 1, 2
+                    ii = ii + 1
+                    xn(:, indices(ii)) = eighth * ( &
+                                         blk(i, j, k, :) + &
+                                         blk(i + 1, j, k, :) + &
+                                         blk(i, j + 1, k, :) + &
+                                         blk(i + 1, j + 1, k, :) + &
+                                         blk(i, j, k + 1, :) + &
+                                         blk(i + 1, j, k + 1, :) + &
+                                         blk(i, j + 1, k + 1, :) + &
+                                         blk(i + 1, j + 1, k + 1, :))
+                end do
+            end do
+        end do
 
-  subroutine setIblankArray(level, sps)
+        ! Compute the coordinates relative to node 1.
 
-    use constants
-    use block
-    use blockPointers, only : nDom, il, jl, kl, status, iblank, flowDoms, ie, je, ke, forcedRecv
-    use communication, only : myid, commPatternCell_2nd, internalCell_2nd,&
-         adflow_comm_world
-    use utils, only : setPointers, EChk
-    use haloExchange, only : whalo1to1IntGeneric
-    implicit none
+        do i = 2, 8
+            xn(:, i) = xn(:, i) - xn(:, 1)
+        end do
 
-    ! Input/Output
-    integer(kind=intType), intent(in) :: level, sps
+        ! Compute the location of our seach point relative to the first node.
+        x = xCen - xn(:, 1)
 
-    ! Working
-    integer(kind=intType) :: i, j, k, nn
-    integer(kind=intType) :: nCompute, nFringe, nBlank, nFloodSeed, nFlooded, nExplicitBlanked
-    integer(kind=intType) :: counts(6), ierr
-    type(fringeType) :: fringe
-    nCompute = 0
-    nFringe = 0
-    nBlank = 0
-    nFloodSeed = 0
-    nFlooded = 0
-    nExplicitBlanked = 0
+        ! Modify the coordinates of node 3, 6, 8 and 7 such that
+        ! they correspond to the weights of the u*v, u*w, v*w and
+        ! u*v*w term in the transformation respectively.
 
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
+        xn(1, 7) = xn(1, 7) + xn(1, 2) + xn(1, 4) + xn(1, 5) &
+                   - xn(1, 3) - xn(1, 6) - xn(1, 8)
+        xn(2, 7) = xn(2, 7) + xn(2, 2) + xn(2, 4) + xn(2, 5) &
+                   - xn(2, 3) - xn(2, 6) - xn(2, 8)
+        xn(3, 7) = xn(3, 7) + xn(3, 2) + xn(3, 4) + xn(3, 5) &
+                   - xn(3, 3) - xn(3, 6) - xn(3, 8)
 
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+        xn(1, 3) = xn(1, 3) - xn(1, 2) - xn(1, 4)
+        xn(2, 3) = xn(2, 3) - xn(2, 2) - xn(2, 4)
+        xn(3, 3) = xn(3, 3) - xn(3, 2) - xn(3, 4)
 
-                if (iblank(i,j,k) == -4) then
-                   nExplicitBlanked = nExplicitBlanked + 1
+        xn(1, 6) = xn(1, 6) - xn(1, 2) - xn(1, 5)
+        xn(2, 6) = xn(2, 6) - xn(2, 2) - xn(2, 5)
+        xn(3, 6) = xn(3, 6) - xn(3, 2) - xn(3, 5)
 
-                else if (isReceiver(status(i, j, k))) then
-                   iblank(i, j, k) = -1
-                   nFringe = nFringe + 1
+        xn(1, 8) = xn(1, 8) - xn(1, 4) - xn(1, 5)
+        xn(2, 8) = xn(2, 8) - xn(2, 4) - xn(2, 5)
+        xn(3, 8) = xn(3, 8) - xn(3, 4) - xn(3, 5)
 
-                else if (isFloodSeed(status(i, j, k))) then
-                   iBlank(i, j, k) = -3
-                   nFloodSeed = nFloodSeed + 1
+        ! Set the starting values of u, v and w based on our previous values
 
-                else if (isFlooded(status(i, j, k))) then
-                   iBlank(i, j, k) = -2
-                   nFlooded = nFlooded + 1
+        u = frac0(1); v = frac0(2); w = frac0(3); 
+        ! The Newton algorithm to determine the parametric
+        ! weights u, v and w for the given coordinate.
 
-                else if (isHole(status(i,j, k))) then
-                   iBlank(i, j, k) = 0
-                   nBlank = nBlank + 1
+        NewtonHexa: do ll = 1, 15
 
-                else
-                   ! We need to explictly make sure forced receivers
-                   ! *NEVER EVER EVER EVER* get set as compute cells.
-                   if (forcedRecv(i,j,k) .ne. 0) then
-                      ! This is REALLY Bad. This cell must be a forced
-                      ! receiver but never found a donor.
-                      iblank(i,j,k) = 0
-                      nBlank = nBlank + 1
-                   else
-                      ! Compute cell
-                      nCompute = nCompute + 1
-                      iblank(i,j,k) = 1
-                   end if
-                end if
-             end do
-          end do
-       end do
-    end do
-
-    ! Update the iblank info.
-    domainLoop:do nn=1, nDom
-       flowDoms(nn, level, sps)%intCommVars(1)%var => &
-            flowDoms(nn, level, sps)%iblank(:, :, :)
-    end do domainLoop
-
-    ! Run the generic integer exchange
-    call wHalo1to1IntGeneric(1, level, sps, commPatternCell_2nd, internalCell_2nd)
-
-    call mpi_reduce((/nCompute, nFringe, nBlank, nFlooded, nFloodSeed, nExplicitBlanked/), &
-         counts, 6, adflow_integer, MPI_SUM, 0, adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    if (myid == 0) then
-       print *, '+--------------------------------+'
-       print *, '| Compute Cells           :', counts(1)
-       print *, '| Fringe Cells            :', counts(2)
-       print *, '| Blanked Cells           :', counts(3)
-       print *, '| Explicitly Blanked Cells:', counts(6)
-       print *, '| Flooded   Cells         :', counts(4)
-       print *, '| FloodSeed Cells         :', counts(5)
-       print *, '+--------------------------------+'
-    end if
-  end subroutine setIblankArray
-
-  subroutine dumpIblank(level, sps)
-
-    use constants
-    use blockPointers, only: il, jl, kl, x, nDom, iBlank
-    use communication, only : myID
-    use utils, only : setPointers
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: level, sps
-
-    ! Working
-    integer(kind=intType) :: i, j, k, nn
-    real(kind=realType) :: xp(3)
-    character(80) :: fileName
-
-    write (fileName,"(a,I2.2,a)") "proc_", myid, ".dat"
-    open(unit=19,file=trim(fileName),form='formatted')
-
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-
-                ! Compute the cell center:
-                xp = eighth*(&
-                     x(i-1, j-1, k-1, :) + &
-                     x(i  , j-1, k-1, :) + &
-                     x(i-1, j  , k-1, :) + &
-                     x(i  , j  , k-1, :) + &
-                     x(i-1, j-1, k  , :) + &
-                     x(i  , j-1, k  , :) + &
-                     x(i-1, j  , k  , :) + &
-                     x(i  , j  , k  , :))
-
-                write(19, "(ES18.10, ES18.10, ES18.10, I3)") xp(1), xp(2), xp(3), iblank(i, j, k)
-             end do
-          end do
-       end do
-    end do
-
-    close(19)
-
-  end subroutine dumpIblank
-
-  subroutine getWorkArray(overlap, work)
-
-    use constants
-    use communication, only : myid
-    use oversetData, only : CSRMatrix, nDomTotal
-    implicit none
-
-    ! Input/Output
-    type(CSRMatrix), intent(in) :: overlap
-    integer(kind=intType), dimension(:,:), allocatable :: work
-
-    ! Local variables
-    integer(kind=intType) :: nWork, jj, iDom, jDom
-
-    nWork = 0
-    do jj=1,overlap%nnz
-       if (overlap%assignedProc(jj) == myid) then
-          nWork = nWork + 1
-       end if
-    end do
-    allocate(work(4, nWork))
-
-    nWork = 0
-    do iDom=1, nDomTotal
-       do jj=overlap%rowPtr(iDom), overlap%rowPtr(iDom+1)-1
-          jDom = overlap%colInd(jj)
-          if (overlap%assignedProc(jj) == myID) then
-             nWork = nWork + 1
-             work(1, nWork) = iDom
-             work(2, nWork) = jDom
-             work(3, nWork) = jj
-             work(4, nWork) = 0
-          end if
-       end do
-    end do
-  end subroutine getWorkArray
-
-  subroutine writeOversetWall(oWall, fName)
-
-    ! debug routine to dumb an owall to a file
-    use constants
-    use oversetData
-    implicit none
-    type(oversetWall) :: oWall
-    character(len=*), intent(in) :: fName
-
-    integer(kind=intType) :: i, iDim
-    open(unit=19,file=trim(fName), form='formatted')
-    write (19,*) 'Variables = "CoordinateX" "CoordianteY" "CoordinateZ"'
-    write (19, *) "Zone"
-    write (19,*) "Nodes = ", oWall%nNodes, " Elements= ", oWalL%nCells, " ZONETYPE=FEQUADRILATERAL"
-    write (19,*) "DATAPACKING=BLOCK"
-
-    do iDim=1,3
-       do i=1, oWall%nNodes
-          write(19, *) oWall%x(iDim, i)
-       end do
-    end do
-
-    do i=1, oWall%nCells
-       write(19, *) oWall%conn(1, i), oWall%conn(2, i), oWall%conn(3, i), oWall%conn(4, i)
-    end do
-    close(19)
-  end subroutine writeOversetWall
-
-  subroutine getOversetIblank(blkList, n)
-
-    ! This routine gathers a list of the iblank status of every
-    ! compute cell in the original CGNS ordering. It then returns the
-    ! full list on the root processor. Therefore, this routine is not
-    ! memory or computation scalable. It is only meant to be used a
-    ! debugging tool to ensure that the overset connectivity as
-    ! computed with given parallel decomposition is the same as a
-    ! different parallel decomposition.
-
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, iBegOr, jBegOr, kBegOr, &
-         nBkGlobal, iBlank
-    use cgnsGrid, only : cgnsDoms, cgnsNDom
-    use communication, only : adflow_comm_world, myid
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers, EChk, terminate
-#include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: n
-    integer(kind=intType), dimension(n), intent(out) :: blkList
-
-    ! Working
-
-    ! Working parameters
-    integer(kind=intType) :: i, j, k, ierr, l, nx_cg, ny_cg, nz_cg
-    integer(kind=intType) :: ii, indx, indy, indz, nn, cgnsInd
-    integer(kind=intType), allocatable, dimension(:) :: cellOffset
-    real(kind=realType), dimension(:), pointer :: localPtr
-    real(kind=realType) :: vals(5)
-    Vec CGNSVec
-
-    ! This routine cannot be used in timespectral mode
-    if (nTimeIntervalsSpectral > 1) then
-       call terminate('getOversetIBlank', 'This routine can only be used '&
-            &'with 1 spectral instance')
-    end if
-
-    allocate(cellOffset(cgnsNDom+1))
-    cellOffset(1) = 0
-    do nn=1,cgnsNDom
-       cellOffset(nn+1) = cellOffset(nn) + &
-            cgnsDoms(nn)%nx*cgnsDoms(nn)%ny*cgnsDoms(nn)%nz
-    end do
-
-    ! Create the CGNSVector
-    if (myid == 0) then
-       call VecCreateMPI(adflow_comm_world, cellOffset(cgnsNDom+1)*5, &
-            PETSC_DETERMINE, cgnsVec, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-    else
-       call VecCreateMPI(adflow_comm_world, 0, PETSC_DETERMINE, cgnsVec, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    end if
-
-    call VecSetBlockSize(cgnsVec, 5, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-    ii = 0
-    do nn=1, nDom
-       call setPointers(nn, 1, 1)
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-
-                nx_cg = cgnsDoms(nbkGlobal)%nx
-                ny_cg = cgnsDoms(nbkGlobal)%ny
-                nz_cg = cgnsDoms(nbkGlobal)%nz
-
-                indx = iBegOr + i - 2
-                indy = jBegOr + j - 2
-                indz = kBegOr + k - 2
-
-                ! cgnsInd is zero-based
-                cgnsInd = cellOffset(nbkGlobal)  + &
-                     (indz-1)*ny_cg*nx_cg + &
-                     (indy-1)*nx_cg       + &
-                     (indx-1)
-                vals(1) = real(iblank(i,j,k))
-                if (vals(1) <= -2) then
-                   vals(1) = 0
-                end if
-                vals(2) = real(nbkGlobal)
-                vals(3) = real(indx)
-                vals(4) = real(indy)
-                vals(5) = real(indz)
-                call VecSetValuesBlocked(cgnsVec, 1, (/cgnsInd/), vals, INSERT_VALUES, ierr)
-                call EChk(ierr, __FILE__, __LINE__)
-             end do
-          end do
-       end do
-    end do
+            ! Compute the RHS.
 
-    call VecAssemblyBegin(cgnsVec, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            uv = u * v; uw = u * w; vw = v * w; wvu = u * v * w
 
-    call VecAssemblyEnd(cgnsVec, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+            f(1) = xn(1, 2) * u + xn(1, 4) * v + xn(1, 5) * w &
+                   + xn(1, 3) * uv + xn(1, 6) * uw + xn(1, 8) * vw &
+                   + xn(1, 7) * wvu - x(1)
+            f(2) = xn(2, 2) * u + xn(2, 4) * v + xn(2, 5) * w &
+                   + xn(2, 3) * uv + xn(2, 6) * uw + xn(2, 8) * vw &
+                   + xn(2, 7) * wvu - x(2)
+            f(3) = xn(3, 2) * u + xn(3, 4) * v + xn(3, 5) * w &
+                   + xn(3, 3) * uv + xn(3, 6) * uw + xn(3, 8) * vw &
+                   + xn(3, 7) * wvu - x(3)
 
-    ! Get the local vector pointer. Only the root proc actually has
-    ! values.
-    call vecGetArrayF90(cgnsVec, localPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+            ! Compute the Jacobian.
 
-    ! Convert back to integer.
-    do i=1,size(localPtr)
-       blkList(i) = int(localPtr(i))
-    end do
+            a11 = xn(1, 2) + xn(1, 3) * v + xn(1, 6) * w + xn(1, 7) * vw
+            a12 = xn(1, 4) + xn(1, 3) * u + xn(1, 8) * w + xn(1, 7) * uw
+            a13 = xn(1, 5) + xn(1, 6) * u + xn(1, 8) * v + xn(1, 7) * uv
 
-    call vecRestoreArrayF90(cgnsVec, localPtr, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+            a21 = xn(2, 2) + xn(2, 3) * v + xn(2, 6) * w + xn(2, 7) * vw
+            a22 = xn(2, 4) + xn(2, 3) * u + xn(2, 8) * w + xn(2, 7) * uw
+            a23 = xn(2, 5) + xn(2, 6) * u + xn(2, 8) * v + xn(2, 7) * uv
 
+            a31 = xn(3, 2) + xn(3, 3) * v + xn(3, 6) * w + xn(3, 7) * vw
+            a32 = xn(3, 4) + xn(3, 3) * u + xn(3, 8) * w + xn(3, 7) * uw
+            a33 = xn(3, 5) + xn(3, 6) * u + xn(3, 8) * v + xn(3, 7) * uv
 
-  end subroutine getOversetIblank
+            ! Compute the determinant. Make sure that it is not zero
+            ! and invert the value. The cut off is needed to be able
+            ! to handle exceptional cases for degenerate elements.
 
+            val = a11 * (a22 * a33 - a32 * a23) + a21 * (a13 * a32 - a12 * a33) &
+                  + a31 * (a12 * a23 - a13 * a22)
+            val = sign(one, val) / max(abs(val), adtEps)
 
-#endif
+            ! Compute the new values of u, v and w.
 
-  ! --------------------------------------------------
-  !           Tapenade Routine BELOW this point
-  ! --------------------------------------------------
-
-  subroutine fracToWeights(frac, weights)
-    use constants
-    implicit none
-    real(kind=realType), intent(in), dimension(3) :: frac
-    real(kind=realType), intent(out), dimension(8) :: weights
-
-    weights(1) = (one-frac(1))*(one-frac(2))*(one-frac(3))
-    weights(2) = (    frac(1))*(one-frac(2))*(one-frac(3))
-    weights(3) = (one-frac(1))*(    frac(2))*(one-frac(3))
-    weights(4) = (    frac(1))*(    frac(2))*(one-frac(3))
-    weights(5) = (one-frac(1))*(one-frac(2))*(    frac(3))
-    weights(6) = (    frac(1))*(one-frac(2))*(    frac(3))
-    weights(7) = (one-frac(1))*(    frac(2))*(    frac(3))
-    weights(8) = (    frac(1))*(    frac(2))*(    frac(3))
-  end subroutine fracToWeights
-
-
-  subroutine fracToWeights2(frac, weights)
-    use constants
-    implicit none
-    real(kind=realType), intent(in), dimension(3) :: frac
-    real(kind=realType), intent(out), dimension(8) :: weights
-
-    weights(1) = (one-frac(1))*(one-frac(2))*(one-frac(3))
-    weights(2) = (    frac(1))*(one-frac(2))*(one-frac(3))
-    weights(3) = (    frac(1))*(    frac(2))*(one-frac(3))
-    weights(4) = (one-frac(1))*(    frac(2))*(one-frac(3))
-
-    weights(5) = (one-frac(1))*(one-frac(2))*(    frac(3))
-    weights(6) = (    frac(1))*(one-frac(2))*(    frac(3))
-    weights(7) = (    frac(1))*(    frac(2))*(    frac(3))
-    weights(8) = (one-frac(1))*(    frac(2))*(    frac(3))
-
-  end subroutine fracToWeights2
-
-  subroutine newtonUpdate(xCen, blk, frac0, frac)
-
-    ! This routine performs the newton update to recompute the new
-    ! "frac" (u,v,w) for the point xCen. The actual search is performed
-    ! on the the dual cell formed by the cell centers of the 3x3x3 block
-    ! of primal nodes. This routine is AD'd with tapenade in both
-    ! forward and reverse.
-
-    use constants
-    implicit none
-
-    ! Input
-    real(kind=realType), dimension(3), intent(in) :: xCen
-    real(kind=realType), dimension(3, 3, 3, 3), intent(in) :: blk
-    real(kind=realType), dimension(3), intent(in) :: frac0
-    ! Output
-    real(kind=realType), dimension(3), intent(out) :: frac
-
-    ! Working
-    real(kind=realType), dimension(3, 1:8) :: xn
-    real(kind=realType) :: u,v,w, uv, uw, vw, wvu, du, dv, dw
-    real(kind=realType) :: a11, a12, a13, a21, a22, a23, a31, a32, a33, val
-    real(kind=realType) :: f(3), x(3)
-    integer(kind=intType), dimension(8), parameter :: indices=[1,2,4,3,5,6,8,7]
-    integer(kind=intType) :: i,j,k,ii, ll
-    real(kind=realType),   parameter :: adtEps    = 1.e-25_realType
-    real(kind=realType),   parameter :: thresConv = 1.e-10_realType
-
-    ! Compute the cell center locations for the 8 nodes describing the
-    ! dual cell. Note that this must be counter-clockwise ordering.
-
-    ii = 0
-    do k=1,2
-       do j=1,2
-          do i=1,2
-             ii = ii + 1
-             xn(:, indices(ii)) = eighth * (&
-                  blk(i  , j  , k  , :) + &
-                  blk(i+1, j  , k  , :) + &
-                  blk(i  , j+1, k  , :) + &
-                  blk(i+1, j+1, k  , :) + &
-                  blk(i  , j  , k+1, :) + &
-                  blk(i+1, j  , k+1, :) + &
-                  blk(i  , j+1, k+1, :) + &
-                  blk(i+1, j+1, k+1, :))
-          end do
-       end do
-    end do
-
-
-    ! Compute the coordinates relative to node 1.
-
-    do i=2,8
-       xn(:, i) = xn(:, i) - xn(:, 1)
-    enddo
-
-    ! Compute the location of our seach point relative to the first node.
-    x = xCen - xn(:, 1)
-
-    ! Modify the coordinates of node 3, 6, 8 and 7 such that
-    ! they correspond to the weights of the u*v, u*w, v*w and
-    ! u*v*w term in the transformation respectively.
-
-    xn(1,7) = xn(1,7) + xn(1,2) + xn(1,4) + xn(1,5) &
-         -           xn(1,3) - xn(1,6) - xn(1,8)
-    xn(2,7) = xn(2,7) + xn(2,2) + xn(2,4) + xn(2,5) &
-         -           xn(2,3) - xn(2,6) - xn(2,8)
-    xn(3,7) = xn(3,7) + xn(3,2) + xn(3,4) + xn(3,5) &
-         -           xn(3,3) - xn(3,6) - xn(3,8)
-
-    xn(1,3) = xn(1,3) - xn(1,2) - xn(1,4)
-    xn(2,3) = xn(2,3) - xn(2,2) - xn(2,4)
-    xn(3,3) = xn(3,3) - xn(3,2) - xn(3,4)
-
-    xn(1,6) = xn(1,6) - xn(1,2) - xn(1,5)
-    xn(2,6) = xn(2,6) - xn(2,2) - xn(2,5)
-    xn(3,6) = xn(3,6) - xn(3,2) - xn(3,5)
-
-    xn(1,8) = xn(1,8) - xn(1,4) - xn(1,5)
-    xn(2,8) = xn(2,8) - xn(2,4) - xn(2,5)
-    xn(3,8) = xn(3,8) - xn(3,4) - xn(3,5)
-
-    ! Set the starting values of u, v and w based on our previous values
-
-    u = frac0(1); v = frac0(2); w=frac0(3);
-    ! The Newton algorithm to determine the parametric
-    ! weights u, v and w for the given coordinate.
-
-    NewtonHexa: do ll=1,15
-
-       ! Compute the RHS.
-
-       uv = u*v; uw = u*w; vw = v*w; wvu = u*v*w
-
-       f(1) = xn(1,2)*u   + xn(1,4)*v  + xn(1,5)*w  &
-            + xn(1,3)*uv  + xn(1,6)*uw + xn(1,8)*vw &
-            + xn(1,7)*wvu - x(1)
-       f(2) = xn(2,2)*u   + xn(2,4)*v  + xn(2,5)*w  &
-            + xn(2,3)*uv  + xn(2,6)*uw + xn(2,8)*vw &
-            + xn(2,7)*wvu - x(2)
-       f(3) = xn(3,2)*u   + xn(3,4)*v  + xn(3,5)*w  &
-            + xn(3,3)*uv  + xn(3,6)*uw + xn(3,8)*vw &
-            + xn(3,7)*wvu - x(3)
-
-       ! Compute the Jacobian.
-
-       a11 = xn(1,2) + xn(1,3)*v + xn(1,6)*w + xn(1,7)*vw
-       a12 = xn(1,4) + xn(1,3)*u + xn(1,8)*w + xn(1,7)*uw
-       a13 = xn(1,5) + xn(1,6)*u + xn(1,8)*v + xn(1,7)*uv
-
-       a21 = xn(2,2) + xn(2,3)*v + xn(2,6)*w + xn(2,7)*vw
-       a22 = xn(2,4) + xn(2,3)*u + xn(2,8)*w + xn(2,7)*uw
-       a23 = xn(2,5) + xn(2,6)*u + xn(2,8)*v + xn(2,7)*uv
-
-       a31 = xn(3,2) + xn(3,3)*v + xn(3,6)*w + xn(3,7)*vw
-       a32 = xn(3,4) + xn(3,3)*u + xn(3,8)*w + xn(3,7)*uw
-       a33 = xn(3,5) + xn(3,6)*u + xn(3,8)*v + xn(3,7)*uv
-
-       ! Compute the determinant. Make sure that it is not zero
-       ! and invert the value. The cut off is needed to be able
-       ! to handle exceptional cases for degenerate elements.
-
-       val = a11*(a22*a33 - a32*a23) + a21*(a13*a32 - a12*a33) &
-            + a31*(a12*a23 - a13*a22)
-       val = sign(one,val)/max(abs(val), adtEps)
-
-       ! Compute the new values of u, v and w.
-
-       du = val*((a22*a33 - a23*a32)*f(1) &
-            +      (a13*a32 - a12*a33)*f(2) &
-            +      (a12*a23 - a13*a22)*f(3))
-       dv = val*((a23*a31 - a21*a33)*f(1) &
-            +      (a11*a33 - a13*a31)*f(2) &
-            +      (a13*a21 - a11*a23)*f(3))
-       dw = val*((a21*a32 - a22*a31)*f(1) &
-            +      (a12*a31 - a11*a32)*f(2) &
-            +      (a11*a22 - a12*a21)*f(3))
+            du = val * ((a22 * a33 - a23 * a32) * f(1) &
+                        + (a13 * a32 - a12 * a33) * f(2) &
+                        + (a12 * a23 - a13 * a22) * f(3))
+            dv = val * ((a23 * a31 - a21 * a33) * f(1) &
+                        + (a11 * a33 - a13 * a31) * f(2) &
+                        + (a13 * a21 - a11 * a23) * f(3))
+            dw = val * ((a21 * a32 - a22 * a31) * f(1) &
+                        + (a12 * a31 - a11 * a32) * f(2) &
+                        + (a11 * a22 - a12 * a21) * f(3))
 
-       u = u - du; v = v - dv; w = w - dw
+            u = u - du; v = v - dv; w = w - dw
 
-       ! Exit the loop if the update of the parametric
-       ! weights is below the threshold
+            ! Exit the loop if the update of the parametric
+            ! weights is below the threshold
 
-       val = sqrt(du*du + dv*dv + dw*dw)
-       if(val <= thresConv) then
-          exit NewtonHexa
-       end if
+            val = sqrt(du * du + dv * dv + dw * dw)
+            if (val <= thresConv) then
+                exit NewtonHexa
+            end if
 
-    enddo NewtonHexa
+        end do NewtonHexa
 
-    ! We would *like* that all solutions fall inside the hexa, but we
-    ! can't be picky here since we are not changing the donors. So
-    ! whatever the u,v,w is we have to accept. Even if it is greater than
-    ! 1 or less than zero, it shouldn't be by much.
+        ! We would *like* that all solutions fall inside the hexa, but we
+        ! can't be picky here since we are not changing the donors. So
+        ! whatever the u,v,w is we have to accept. Even if it is greater than
+        ! 1 or less than zero, it shouldn't be by much.
 
-    frac(1) = u
-    frac(2) = v
-    frac(3) = w
+        frac(1) = u
+        frac(2) = v
+        frac(3) = w
 
-  end subroutine newtonUpdate
+    end subroutine newtonUpdate
 
 end module oversetUtilities
diff --git a/src/overset/stringOps.F90 b/src/overset/stringOps.F90
index 19ba3cf10..e6ed97c40 100644
--- a/src/overset/stringOps.F90
+++ b/src/overset/stringOps.F90
@@ -1,3380 +1,3369 @@
 module stringOps
 
-  ! Import oversetString becuase every routine uses this.
-  use oversetData, only : oversetString
-  contains
+    ! Import oversetString becuase every routine uses this.
+    use oversetData, only: oversetString
+contains
 
-  subroutine nullifyString(string)
+    subroutine nullifyString(string)
 
-    use constants
-    implicit none
+        use constants
+        implicit none
 
-    type(oversetString) :: string
+        type(oversetString) :: string
 
-    nullify(string%nodeData, &
-         string%x, &
-         string%norm, &
-         string%perpNorm, &
-         string%h, &
-         string%intNodeData, &
-         string%ind, &
-         string%cluster, &
-         string%family, &
-         string%conn, &
-         string%pNodes, &
-         string%pElems, &
-         string%cNodes, &
-         string%otherID, &
-         string%nte, &
-         string%subStr, &
-         string%elemUsed, &
-         string%XzipNodeUsed, &
-         string%tris, &
-         string%surfCellID)
+        nullify (string%nodeData, &
+                 string%x, &
+                 string%norm, &
+                 string%perpNorm, &
+                 string%h, &
+                 string%intNodeData, &
+                 string%ind, &
+                 string%cluster, &
+                 string%family, &
+                 string%conn, &
+                 string%pNodes, &
+                 string%pElems, &
+                 string%cNodes, &
+                 string%otherID, &
+                 string%nte, &
+                 string%subStr, &
+                 string%elemUsed, &
+                 string%XzipNodeUsed, &
+                 string%tris, &
+                 string%surfCellID)
 
-  end subroutine nullifyString
+    end subroutine nullifyString
 
-  subroutine deallocateString(string)
+    subroutine deallocateString(string)
 
-    use constants
-    implicit none
+        use constants
+        implicit none
 
-    type(oversetString) :: string
-    integer(kind=intType) :: i
+        type(oversetString) :: string
+        integer(kind=intType) :: i
 
-    if (associated(string%nodeData)) &
-         deallocate(string%nodeData)
+        if (associated(string%nodeData)) &
+            deallocate (string%nodeData)
 
-    if (associated(string%intNodeData)) &
-         deallocate(string%intNodeData)
+        if (associated(string%intNodeData)) &
+            deallocate (string%intNodeData)
 
-    if (associated(string%conn)) &
-         deallocate(string%conn)
+        if (associated(string%conn)) &
+            deallocate (string%conn)
 
-    if (associated(string%pNodes)) &
-         deallocate(string%pNodes)
+        if (associated(string%pNodes)) &
+            deallocate (string%pNodes)
 
-    if (associated(string%pElems)) &
-         deallocate(string%pElems)
+        if (associated(string%pElems)) &
+            deallocate (string%pElems)
 
-    if (associated(string%cNodes)) &
-         deallocate(string%cNodes)
+        if (associated(string%cNodes)) &
+            deallocate (string%cNodes)
 
-    if (associated(string%otherID)) &
-         deallocate(string%otherID)
+        if (associated(string%otherID)) &
+            deallocate (string%otherID)
 
-    if (associated(string%nte)) &
-         deallocate(string%nte)
+        if (associated(string%nte)) &
+            deallocate (string%nte)
 
-    if (associated(string%subStr)) &
-         deallocate(string%subStr)
-
-    if (associated(string%elemUsed)) &
-         deallocate(string%elemUsed)
-
-    if (associated(string%xZipNodeUsed)) &
-         deallocate(string%xZipNodeUsed)
-
-    if (associated(string%tris)) &
-         deallocate(string%tris)
-
-    if (associated(string%surfCellID)) &
-         deallocate(string%surfCellID)
-
-    call nullifyString(string)
-
-  end subroutine deallocateString
-
-  subroutine setStringPointers(string)
-
-    use constants
-    implicit none
-    type(oversetString) :: string
-    string%x => string%nodeData(1:3, :)
-    string%norm => string%nodeData(4:6, :)
-    string%perpNorm => string%nodeData(7:9, :)
-    string%h => string%nodeData(10, :)
-
-    string%ind => string%intNodeData(1, :)
-    string%cluster => string%intNodeData(2, :)
-    string%family => string%intNodeData(3, :)
-
-  end subroutine setStringPointers
-
-  subroutine createOrderedStrings(master, strings, nString)
-
-    use constants
-    implicit none
-
-    ! Input/Output
-    type(oversetString) :: master
-    type(oversetString), dimension(:), allocatable :: strings
-
-    ! Working
-    integer(kind=intType) :: nElems, nNodes, curElem, nString, iStart,i, firstElem
-    type(oversetString), pointer  :: stringsLL, str
-
-    ! The next step is to create ordered strings based on the
-    ! connectivity. This is a purely logical operation. We don't know
-    ! how many actual strings we will need so we will use a linked
-    ! list as we go.
-    call createNodeToElem(master)
-
-    ! Allocate some additional arrays we need for doing the chain
-    ! searches.
-    nElems = master%nElems
-    nNodes = master%nNodes
-    allocate(master%elemUsed(nElems), master%subStr(2, nElems), &
-         master%cNodes(2, nNodes))
-
-    master%cNodes = 0
-    master%elemUsed = 0
-    curElem = 1
-    nString = 0
-    do while (curElem < master%nElems)
-
-       ! Arbitrarily get the first node for my element:
-       iStart = master%conn(1, curElem)
-       nElems = master%nte(1, iStart)
-
-       ! ----------------------
-       ! First side of chain:
-       ! ----------------------
-       firstElem = master%nte(2, iStart)
-       master%subStr(1, 1) = firstElem
-       call doChain(master, iStart, 1)
-
-       ! ----------------------
-       ! Second side of chain:
-       ! ----------------------
-       if (nElems > 1) then
-          firstElem = master%nte(3, iStart)
-
-          ! Make sure the second one wasn't wrapped around on a
-          ! periodic chain
-          if (master%elemUsed(firstElem) == 0) then
-
-             master%subStr(2, 1) = firstElem
-             call doChain(master, iStart, 2)
-             call combineChainBuffers(master)
-          end if
-       end if
-
-       ! We now have a boundary string stored in master%subString(1,
-       ! :nSubStr(1)). These are actually the element numbers of the
-       ! master that form a continuous chain.
-
-       ! Create or add a new string to our linked list
-       ! "stringsLL".
-       if (nString == 0) then
-          allocate(stringsLL)
-          nString = 1
-          stringsLL%next => stringsLL
-          str => stringsLL
-       else
-          allocate(str%next)
-          str%next%next => stringsLL
-          str => str%next
-          nString = nString + 1
-       end if
-
-       ! Create a substring from master based on the elements we
-       ! have in the buffer
-       call createSubStringFromElems(master, str, nString)
-
-       ! Scan through until we find the next unused element:
-       do while((master%elemUsed(curElem) == 1) .and. (curElem < master%nElems))
-          curElem = curElem + 1
-       end do
-    end do
-
-    ! Put the strings into an regular array which will be easier to
-    ! manipulate.
-    allocate(strings(nString))
-    str => stringsLL
-    i = 0
-    do while (i < nString)
-       i = i + 1
-       strings(i) = str ! This is derived type assigment.
-       call nullifyString(str)
-       str => str%next
-    end do
-
-  end subroutine createOrderedStrings
-
-  subroutine performSelfZip(master, strings, nStrings, debugZipper)
-
-    use constants
-    use kdtree2_module
-    use inputOverset, only : selfZipCutoff
-    implicit none
-
-    ! Input/Output
-    type(oversetString) :: master
-    integer(kind=intType) :: nStrings
-    type(oversetString), dimension(nStrings), target ::  strings
-    logical, intent(in) :: debugZipper
-
-    ! Workging
-    type(oversetString), pointer :: str
-    real(kind=realType) ::  cutOff
-    integer(kind=intType) :: i, j, nZipped
-
-    ! Now determine if there are any "holes" or periodic strings
-    ! without anything inside of it. Ie closed loops. If there isn't
-    ! we can self zip. Otherwise, we falg it so that it isn't touched
-    ! and automatically pocket zipped at th end.
-
-    do i=1, nStrings
-       str => strings(i)
-       str%isPocket = .True.
-       do j=1, str%nNodes
-          if (str%otherID(1, j) /= -1) then
-             str%isPocket = .False.
-          end if
-       end do
-
-       if (.not. str%isPocket) then
-          zipperLoop: do j=1, 5
-             if (j== 1) then
-                cutOff = selfZipCutoff
-             else
-                cutOff = 90_realType
-             end if
-             call selfZip(strings(i), cutOff, nZipped)
-             if (nZipped == 0) then
-                exit zipperLoop
-             end if
-          end do zipperLoop
-       end if
-    end do
-
-    ! Now we need to redo the string matching becuase the self-zip
-    ! shortened the strings
-    call stringMatch(strings, nStrings, debugZipper)
-
-  end subroutine performSelfZip
-
-  subroutine reduceGapString(string)
-
-    ! Generic routine for removing duplicate nodes on the given
-    ! string. The string is returned with the nodes and connectivities
-    ! adjusted accordingly.
-
-    use constants
-    use utils, only : pointReduce, myNOrm2
-    implicit none
-
-    ! Input/Ouput
-    type(oversetString), intent(inout) :: string
-
-    ! Working:
-    real(kind=realType) :: minEdge
-    integer(kind=intType) :: nUnqiue, i, n1, n2, nUnique,idx
-    integer(kind=intType), dimension(:), allocatable :: link
-    real(kind=realType), dimension(:, :), allocatable :: uniqueNodes
-    real(kind=realType), dimension(:, :), pointer :: nodeDataPtr
-    integer(kind=intType) , dimension(:, :), pointer :: intNodeDataPtr
-    integer(kind=intType), dimension(:), allocatable :: normCounter
-    real(kind=realType), dimension(:, :), allocatable :: uniqueNorms
-
-    ! We will do a sort of adaptive tolernace here: Get the minium edge
-    ! length and base the tolerance on that:
-
-    minEdge = huge(1.0d0)
-
-    do i=1, string%nElems
-       n1 = string%conn(1, i)
-       n2 = string%conn(2, i)
-       minEdge = min(minEdge, mynorm2(string%x(:, n1) - string%x(:, n2)))
-    end do
-
-    allocate(link(string%nNodes), uniqueNodes(3, string%nNodes))
-
-    call pointReduce(string%x, string%nNodes, minEdge/1000.0, uniqueNodes, link, nUnique)
-
-    ! Now average the normals for any duplicate nodes. This is to handle any discrepancies
-    ! for h-type mesh topologies where the surface block is fully represented by the volume connectivity
-    allocate(normCounter(nUnique), uniqueNorms(3, nUnique))
-    normCounter(:)=zero
-    uniqueNorms(:,:) = zero
-    ! sum the norms for the unique node and count how many duplicates there are for a given node
-    do i = 1,string%nNodes
-       idx = link(i)
-       uniqueNorms(:,idx) =  uniqueNorms(:,idx)+ string%nodeData(4:6,i)
-       normCounter(idx) = normCounter(idx)+1
-    end do
-
-    ! Now divide to get the average and assign back to original data storage
-    do i = 1,string%nNodes
-       idx = link(i)
-       string%nodeData(4:6,i) = uniqueNorms(:,idx)/normCounter(idx)
-    end do
-    deallocate(normCounter, uniqueNorms)
-    ! Averageing is complete
-
-    ! Update the connectivity to use the new set of nodes
-    do i=1, string%nElems
-       string%conn(1, i) = link(string%conn(1, i))
-       string%conn(2, i) = link(string%conn(2, i))
-    end do
-
-    ! Reallocate the node based data to the correct size. Set pointers
-    ! to original data first.
-    nodeDataPtr => string%nodeData
-    intNodeDataPtr => string%intNodeData
-    allocate(string%nodeData(10, nUnique), string%intNodeData(3, nUnique))
-
-    ! Reset the pointers
-    call setStringPointers(string)
-
-    do i=1, string%nNodes
-       string%nodeData(:, link(i)) = nodeDataPtr(:, i)
-       string%intNodeData(:, link(i)) = intNodeDataPtr(:, i)
-    end do
-    string%nNodes = nUnique
-
-    ! deallocate the pointer data which is actually the original data
-    deallocate(nodeDataPtr, intNodeDataPtr, link, uniqueNodes)
-
-  end subroutine reduceGapString
-
-  recursive subroutine createNodeToElem(string)
-
-    ! Produce the inverse of the connectivity...the nodeToElem
-    ! array. Each node should point to 1 element (at a
-    ! boundary) or two elements for a normal part of a chain.
-
-    use constants
-    use utils, only : terminate
-    implicit none
-
-    ! Input/Output
-    type(oversetString) :: string
-
-    ! Working
-    integer(kind=intType) :: i, j, ii, jj, n(2), m(2), curElem, nDup
-    integer(kind=intType), dimension(string%nElems) :: duplicated
-    integer(kind=intType), dimension(:, :), pointer :: tmpConn
-    logical :: duplicateElement
-
-    allocate(string%nte(3, string%nNodes))
-    string%nte = 0
-    duplicated = 0
-
-    do i=1, string%nElems
-       ! Node numbers we're working with:
-       n = string%conn(:, i)
-
-       ! For each node check which elements (if any) are already
-       ! connected. We need to check them again the node numbers n1 and n2
-
-       duplicateElement = .False.
-       do jj=1, 2
-
-          do j=1, string%nte(1, n(jj)) ! Loop over the element numbers already here:
-             curElem = string%nte(j+1, n(jj))
-
-             m = string%conn(:, curElem)
-
-             if (m(1) == n(1) .and. m(2) == n(2)) then
-                duplicateElement = .True.
-             else if(m(1) == n(2) .and. m(2) == n(1)) then
-                ! Element exists, but it is the wrong order...don't
-                ! know what to do with this, probably an error or
-                ! maybe a corner case I haven't thought of.
-                call terminate("makeBoundaryString", "Inconsistent duplicate edge.")
-             end if
-          end do
-       end do
-
-       if (.not. duplicateElement) then
-          do jj=1, 2
-             string%nte(1, n(jj)) = string%nte(1, n(jj)) + 1
-             ii = string%nte(1, n(jj))
-             string%nte(ii+1, n(jj)) = i
-          end do
-       else
-          ! Well, we've figured out that this element is actually a
-          ! duplicate so we'll make a note of that
-          duplicated(i) = 1
-       end if
-    end do
-
-    ! If we have duplicated elements, modify the conn to adjust for this.
-    nDup = sum(duplicated)
-    if (nDup > 0) then
-       tmpConn => string%conn
-
-       allocate(string%conn(2, string%nElems - nDup))
-
-       j = 0
-       do i=1, string%nElems
-          if (duplicated(i) == 0) then
-             j = j + 1
-             string%conn(:, j) = tmpConn(:, i)
-
-          end if
-       end do
-
-       ! Set the new number of elements
-       string%nElems = string%nElems - nDup
-
-       ! Don't forget to deallocate the tmpConn pointer which is
-       ! actually the original conn data.
-       deallocate(tmpConn)
-
-       ! Destroy nte and call myself again to get the final correct nte
-       ! without the duplicates.
-       deallocate(string%nte)
-       call createNodeToElem(string)
-    end if
-  end subroutine createNodeToElem
-
-  subroutine doChain(master, iStart, iSub)
-
-    use constants
-    implicit none
-    ! Input/OUtput
-    type(oversetString) :: master
-    integer(kind=intType), intent(in) :: iStart, iSub
-
-    ! Working
-    integer(Kind=intType) :: i, j, jj, c, n1, n2, curNode, nextNode
-    integer(Kind=intType) ::  elem1, elem2, curElem, nextElem
-    integer(kind=intType) :: N
-
-    ! The number of elements in this substring
-    N = 1
-
-    curNode = iStart
-
-    chainLoop: do
-
-       ! Get the currnet element
-       curElem = master%subStr(iSub, N)
-
-       ! Flag the element as used:
-       master%elemUsed(curElem) = 1
-
-       ! Get the two nodes for the current element:
-       n1 = master%conn(1, curElem)
-       n2 = master%conn(2, curElem)
-
-       if (n1 == curNode) then
-          nextNode = n2
-       else
-          nextNode = n1
-       end if
-
-       ! Exit condition 1: Next node was our starting node:
-       if (nextNode == iStart) then
-          exit chainLoop
-       end if
-
-       ! Exit condition 2: The next node has only 1 element, (the one
-       ! we're currently on) so that means the the chain is finished
-       c = master%nte(1, nextNode)
-
-       if (c == 1) then
-          exit chainLoop
-
-       else if (c == 2) then
-          ! With c=2 this easy, just extract the two elements
-          elem1 = master%nte(2, nextNode)
-          elem2 = master%nte(3, nextNode)
-
-          if (elem1 == curElem) then
-             nextElem = elem2
-          else
-             nextElem = elem1
-          end if
-       end if
-
-       ! Now add the "nextElem" to our chain:
-       N = N + 1
-       master%subStr(iSub, N) = nextElem
-
-       ! Flag this elemet as being used
-       master%elemUsed(nextElem) = 1
-
-       ! Finally set the nextNode back to the current node for the next
-       ! iteration
-       curNode = nextNode
-    end do chainLoop
-    master%nSubStr(iSub) = N
-  end subroutine doChain
-
-  subroutine createSubStringFromElems(p, s, id)
-
-    use constants
-    implicit none
-
-    ! Input/output
-    type(oversetString), target, intent(in) :: p
-    type(oversetString), intent(out) :: s
-    integer(kind=intType), intent(in) :: id
-
-    ! Working
-    integer(kind=intType) :: i, j, n1, n2, k
-    integer(kind=intType), dimension(:), allocatable :: nodeUsed
-
-    ! First thing we always have to do with a new string is to nullify
-    ! all the poitners
-    call nullifyString(s)
-
-    ! Firstly we can set the number of elements, since we know precisely
-    ! what this is:
-
-    s%nElems = p%nSubStr(1)
-    s%myID = id
-
-    ! Next determine the number of nodes. This is done by flagging the
-    ! nodes in the parent that are used by 's'
-    allocate(nodeUsed(p%nNodes))
-    nodeUsed = 0
-    k = 0
-    do i=1, s%nElems
-       n1 = p%conn(1, p%subStr(1, i))
-       n2 = p%conn(2, p%subStr(1, i))
-       if (nodeUsed(n1) == 0) then
-          k = k + 1
-          nodeUsed(n1) = k
-       end if
+        if (associated(string%subStr)) &
+            deallocate (string%subStr)
 
-       if (nodeUsed(n2) == 0) then
-          k = k + 1
-          nodeUsed(n2) = k
-       end if
-    end do
-
-    ! We can now set the number of nodes the substring has
-    s%nNodes = k
-
-    ! The number of nodes will equal the number of elements iff the
-    ! string is period. Otherwise we will have 1 more node than element.
-
-    if (s%nNodes ==  s%nElems) then
-       s%isPeriodic = .True.
-    end if
-
-    ! Allocate and set the node and element parent information
-    allocate(s%pElems(s%nElems), s%pNodes(s%nNodes))
-
-    do i=1, s%nElems
-       s%pElems(i) = p%subStr(1, i)
-    end do
-
-    ! Now create the pNodes ("link") array such that pNodes(i) points to
-    ! the node index in the parent
-    j = 0
-    do i=1, p%nNodes
-       if (nodeUsed(i) /= 0) then
-          s%pNodes(nodeUsed(i)) = i
-       end if
-    end do
-
-    ! Set the parent's cNode to point to my nodes
-    do i=1, s%nNodes
-       p%cNodes(:, s%pNodes(i)) = (/s%myID, i/)
-    end do
-
-    ! Now that we know the mapping between by local nodes-based
-    ! quantities and the parent, we can allocate and set all the
-    ! node-based quantities.
-
-    allocate(s%nodeData(10, s%nNodes), s%intNodeData(3, s%nNodes))
-
-    ! Set the string pointers
-    call setStringPointers(s)
-
-    do i=1, s%nNodes
-       s%nodeData(:, i) = p%nodeData(:, s%pNodes(i))
-       s%intNodeData(:, i) = p%intNodeData(:, s%pNodes(i))
-    end do
-
-    ! We can now create the local conn too, *USING THE LOCAL NODE NUMBERS*
-    allocate(s%conn(2, s%nElems))
-
-    do i=1, s%nElems
-       s%conn(1, i) = nodeUsed(p%conn(1, s%pElems(i)))
-       s%conn(2, i) = nodeUsed(p%conn(2, s%pElems(i)))
-    end do
-
-    ! Set the pointer to my parent.
-    s%p => p
-
-    deallocate(nodeUsed)
-
-    ! Last thing we can do is create the nodeToElem for the substring.
-    call  createNodeToElem(s)
-  end subroutine createSubStringFromElems
+        if (associated(string%elemUsed)) &
+            deallocate (string%elemUsed)
 
-  subroutine combineChainBuffers(s)
+        if (associated(string%xZipNodeUsed)) &
+            deallocate (string%xZipNodeUsed)
 
-    use constants
-    implicit none
-    type(oversetString), intent(inout) :: s
-    integer(kind=intType) :: N1, N2
+        if (associated(string%tris)) &
+            deallocate (string%tris)
 
-    N1 = s%nSubStr(1)
-    N2 = s%nSubStr(2)
+        if (associated(string%surfCellID)) &
+            deallocate (string%surfCellID)
 
-    ! First reverse the direction of string 2 of the nodes we found
-    s%subStr(2, 1:N2) = s%subStr(2, N2:1:-1)
+        call nullifyString(string)
 
-    ! Now String 1 can be tacked on the end of string2
-    s%subStr(2, N2+1:N2+N1) = s%subStr(1, 1:N1)
+    end subroutine deallocateString
 
-    ! And finally copied back to string1
-    s%subStr(1, 1:N1+N2) = s%subStr(2, 1:N1+N2)
-    s%nSubStr(1) = N1 + n2
-
-  end subroutine combineChainBuffers
+    subroutine setStringPointers(string)
 
-  subroutine selfZip(s, cutOff, nZipped)
+        use constants
+        implicit none
+        type(oversetString) :: string
+        string%x => string%nodeData(1:3, :)
+        string%norm => string%nodeData(4:6, :)
+        string%perpNorm => string%nodeData(7:9, :)
+        string%h => string%nodeData(10, :)
 
-    use constants
-    use kdtree2_module
-    use utils, only : myNorm2, cross_prod
-    implicit none
+        string%ind => string%intNodeData(1, :)
+        string%cluster => string%intNodeData(2, :)
+        string%family => string%intNodeData(3, :)
 
-    ! Input/Output
-    type(oversetString), intent(inout), target :: s
-    integer(Kind=intType), intent(out) :: nZipped
-    real(kind=realType), intent(in) :: cutOff
+    end subroutine setStringPointers
 
-    ! Working
-    integer(kind=intType) :: i, j, k,  N, ii, im1, ip1
-    logical :: lastNodeZippered,  added
-    real(kind=realType), dimension(3) :: v1, v2, norm
-    real(kind=realType) :: cosCutoff, cosTheta, r2, v1nrm, v2nrm
-    integer(Kind=intType), dimension(:), allocatable :: nodeMap
-    type(kdtree2_result), dimension(:), allocatable  :: results
+    subroutine createOrderedStrings(master, strings, nString)
 
-    ! Perform self zipping on the supplied string. The string at this
-    ! point should be either peroidic or since sinded --- no multiple
-    ! loops should be left. Therefore, we can count on the nodes being
-    ! in order.
+        use constants
+        implicit none
 
-    allocate(results(25))
-    allocate(nodeMap(s%nNodes))
-    nodeMap = 1
+        ! Input/Output
+        type(oversetString) :: master
+        type(oversetString), dimension(:), allocatable :: strings
+
+        ! Working
+        integer(kind=intType) :: nElems, nNodes, curElem, nString, iStart, i, firstElem
+        type(oversetString), pointer :: stringsLL, str
+
+        ! The next step is to create ordered strings based on the
+        ! connectivity. This is a purely logical operation. We don't know
+        ! how many actual strings we will need so we will use a linked
+        ! list as we go.
+        call createNodeToElem(master)
 
-    cosCutoff = cos(cutOff*pi/180)
-    nzipped = 0
+        ! Allocate some additional arrays we need for doing the chain
+        ! searches.
+        nElems = master%nElems
+        nNodes = master%nNodes
+        allocate (master%elemUsed(nElems), master%subStr(2, nElems), &
+                  master%cNodes(2, nNodes))
 
-    ! Peroidic string starts at node 1, and uses node 'N' as the previous
-    ! node. Single chains start at node 2 and only go to the N-1 node.
-    if (s%isPeriodic) then
-       im1 = s%nNodes
-       ii = 1
-       ip1 = 2
-       N = s%nNodes
-    else
-       im1 = 1
-       ii = 2
-       ip1 = 3
-       N = s%nNodes - 1
-    end if
+        master%cNodes = 0
+        master%elemUsed = 0
+        curElem = 1
+        nString = 0
+        do while (curElem < master%nElems)
 
-    do while (ii <= N)
+            ! Arbitrarily get the first node for my element:
+            iStart = master%conn(1, curElem)
+            nElems = master%nte(1, iStart)
 
-       ! Peroidic string at end...loop around
-       if (s%isPeriodic .and. ii == N) then
-          ip1 = 1
-       end if
+            ! ----------------------
+            ! First side of chain:
+            ! ----------------------
+            firstElem = master%nte(2, iStart)
+            master%subStr(1, 1) = firstElem
+            call doChain(master, iStart, 1)
 
-       lastNodeZippered = .False.
+            ! ----------------------
+            ! Second side of chain:
+            ! ----------------------
+            if (nElems > 1) then
+                firstElem = master%nte(3, iStart)
 
-       ! Determine the anlge between the vectors
-       v1 = s%x(:, ip1) - s%x(:, ii)
-       v2 = s%x(:, im1) - s%x(:, ii)
-       v1nrm = mynorm2(v1)
-       v2nrm = mynorm2(v2)
-       call cross_prod(v2, v1, norm)
-       norm = norm / mynorm2(norm)
+                ! Make sure the second one wasn't wrapped around on a
+                ! periodic chain
+                if (master%elemUsed(firstElem) == 0) then
 
-       if (dot_product(norm, s%norm(:, ii)) > zero) then
+                    master%subStr(2, 1) = firstElem
+                    call doChain(master, iStart, 2)
+                    call combineChainBuffers(master)
+                end if
+            end if
+
+            ! We now have a boundary string stored in master%subString(1,
+            ! :nSubStr(1)). These are actually the element numbers of the
+            ! master that form a continuous chain.
+
+            ! Create or add a new string to our linked list
+            ! "stringsLL".
+            if (nString == 0) then
+                allocate (stringsLL)
+                nString = 1
+                stringsLL%next => stringsLL
+                str => stringsLL
+            else
+                allocate (str%next)
+                str%next%next => stringsLL
+                str => str%next
+                nString = nString + 1
+            end if
 
+            ! Create a substring from master based on the elements we
+            ! have in the buffer
+            call createSubStringFromElems(master, str, nString)
+
+            ! Scan through until we find the next unused element:
+            do while ((master%elemUsed(curElem) == 1) .and. (curElem < master%nElems))
+                curElem = curElem + 1
+            end do
+        end do
+
+        ! Put the strings into an regular array which will be easier to
+        ! manipulate.
+        allocate (strings(nString))
+        str => stringsLL
+        i = 0
+        do while (i < nString)
+            i = i + 1
+            strings(i) = str ! This is derived type assigment.
+            call nullifyString(str)
+            str => str%next
+        end do
+
+    end subroutine createOrderedStrings
+
+    subroutine performSelfZip(master, strings, nStrings, debugZipper)
+
+        use constants
+        use kdtree2_module
+        use inputOverset, only: selfZipCutoff
+        implicit none
+
+        ! Input/Output
+        type(oversetString) :: master
+        integer(kind=intType) :: nStrings
+        type(oversetString), dimension(nStrings), target :: strings
+        logical, intent(in) :: debugZipper
+
+        ! Workging
+        type(oversetString), pointer :: str
+        real(kind=realType) :: cutOff
+        integer(kind=intType) :: i, j, nZipped
+
+        ! Now determine if there are any "holes" or periodic strings
+        ! without anything inside of it. Ie closed loops. If there isn't
+        ! we can self zip. Otherwise, we falg it so that it isn't touched
+        ! and automatically pocket zipped at th end.
+
+        do i = 1, nStrings
+            str => strings(i)
+            str%isPocket = .True.
+            do j = 1, str%nNodes
+                if (str%otherID(1, j) /= -1) then
+                    str%isPocket = .False.
+                end if
+            end do
+
+            if (.not. str%isPocket) then
+                zipperLoop: do j = 1, 5
+                    if (j == 1) then
+                        cutOff = selfZipCutoff
+                    else
+                        cutOff = 90_realType
+                    end if
+                    call selfZip(strings(i), cutOff, nZipped)
+                    if (nZipped == 0) then
+                        exit zipperLoop
+                    end if
+                end do zipperLoop
+            end if
+        end do
+
+        ! Now we need to redo the string matching becuase the self-zip
+        ! shortened the strings
+        call stringMatch(strings, nStrings, debugZipper)
+
+    end subroutine performSelfZip
+
+    subroutine reduceGapString(string)
+
+        ! Generic routine for removing duplicate nodes on the given
+        ! string. The string is returned with the nodes and connectivities
+        ! adjusted accordingly.
+
+        use constants
+        use utils, only: pointReduce, myNOrm2
+        implicit none
+
+        ! Input/Ouput
+        type(oversetString), intent(inout) :: string
+
+        ! Working:
+        real(kind=realType) :: minEdge
+        integer(kind=intType) :: nUnqiue, i, n1, n2, nUnique, idx
+        integer(kind=intType), dimension(:), allocatable :: link
+        real(kind=realType), dimension(:, :), allocatable :: uniqueNodes
+        real(kind=realType), dimension(:, :), pointer :: nodeDataPtr
+        integer(kind=intType), dimension(:, :), pointer :: intNodeDataPtr
+        integer(kind=intType), dimension(:), allocatable :: normCounter
+        real(kind=realType), dimension(:, :), allocatable :: uniqueNorms
 
-          ! the dot product of the im1 and ip1 nodes have to be close
-          if (dot_product(s%norm(:, ip1), s%norm(:, im1)) > 0.80) then
+        ! We will do a sort of adaptive tolernace here: Get the minium edge
+        ! length and base the tolerance on that:
+
+        minEdge = huge(1.0d0)
+
+        do i = 1, string%nElems
+            n1 = string%conn(1, i)
+            n2 = string%conn(2, i)
+            minEdge = min(minEdge, mynorm2(string%x(:, n1) - string%x(:, n2)))
+        end do
+
+        allocate (link(string%nNodes), uniqueNodes(3, string%nNodes))
+
+        call pointReduce(string%x, string%nNodes, minEdge / 1000.0, uniqueNodes, link, nUnique)
+
+        ! Now average the normals for any duplicate nodes. This is to handle any discrepancies
+        ! for h-type mesh topologies where the surface block is fully represented by the volume connectivity
+        allocate (normCounter(nUnique), uniqueNorms(3, nUnique))
+        normCounter(:) = zero
+        uniqueNorms(:, :) = zero
+        ! sum the norms for the unique node and count how many duplicates there are for a given node
+        do i = 1, string%nNodes
+            idx = link(i)
+            uniqueNorms(:, idx) = uniqueNorms(:, idx) + string%nodeData(4:6, i)
+            normCounter(idx) = normCounter(idx) + 1
+        end do
+
+        ! Now divide to get the average and assign back to original data storage
+        do i = 1, string%nNodes
+            idx = link(i)
+            string%nodeData(4:6, i) = uniqueNorms(:, idx) / normCounter(idx)
+        end do
+        deallocate (normCounter, uniqueNorms)
+        ! Averageing is complete
+
+        ! Update the connectivity to use the new set of nodes
+        do i = 1, string%nElems
+            string%conn(1, i) = link(string%conn(1, i))
+            string%conn(2, i) = link(string%conn(2, i))
+        end do
+
+        ! Reallocate the node based data to the correct size. Set pointers
+        ! to original data first.
+        nodeDataPtr => string%nodeData
+        intNodeDataPtr => string%intNodeData
+        allocate (string%nodeData(10, nUnique), string%intNodeData(3, nUnique))
+
+        ! Reset the pointers
+        call setStringPointers(string)
+
+        do i = 1, string%nNodes
+            string%nodeData(:, link(i)) = nodeDataPtr(:, i)
+            string%intNodeData(:, link(i)) = intNodeDataPtr(:, i)
+        end do
+        string%nNodes = nUnique
+
+        ! deallocate the pointer data which is actually the original data
+        deallocate (nodeDataPtr, intNodeDataPtr, link, uniqueNodes)
+
+    end subroutine reduceGapString
+
+    recursive subroutine createNodeToElem(string)
+
+        ! Produce the inverse of the connectivity...the nodeToElem
+        ! array. Each node should point to 1 element (at a
+        ! boundary) or two elements for a normal part of a chain.
+
+        use constants
+        use utils, only: terminate
+        implicit none
+
+        ! Input/Output
+        type(oversetString) :: string
+
+        ! Working
+        integer(kind=intType) :: i, j, ii, jj, n(2), m(2), curElem, nDup
+        integer(kind=intType), dimension(string%nElems) :: duplicated
+        integer(kind=intType), dimension(:, :), pointer :: tmpConn
+        logical :: duplicateElement
+
+        allocate (string%nte(3, string%nNodes))
+        string%nte = 0
+        duplicated = 0
+
+        do i = 1, string%nElems
+            ! Node numbers we're working with:
+            n = string%conn(:, i)
+
+            ! For each node check which elements (if any) are already
+            ! connected. We need to check them again the node numbers n1 and n2
+
+            duplicateElement = .False.
+            do jj = 1, 2
+
+                do j = 1, string%nte(1, n(jj)) ! Loop over the element numbers already here:
+                    curElem = string%nte(j + 1, n(jj))
+
+                    m = string%conn(:, curElem)
+
+                    if (m(1) == n(1) .and. m(2) == n(2)) then
+                        duplicateElement = .True.
+                    else if (m(1) == n(2) .and. m(2) == n(1)) then
+                        ! Element exists, but it is the wrong order...don't
+                        ! know what to do with this, probably an error or
+                        ! maybe a corner case I haven't thought of.
+                        call terminate("makeBoundaryString", "Inconsistent duplicate edge.")
+                    end if
+                end do
+            end do
 
-             costheta = dot_product(v1, v2)  / (v1nrm * v2nrm)
+            if (.not. duplicateElement) then
+                do jj = 1, 2
+                    string%nte(1, n(jj)) = string%nte(1, n(jj)) + 1
+                    ii = string%nte(1, n(jj))
+                    string%nte(ii + 1, n(jj)) = i
+                end do
+            else
+                ! Well, we've figured out that this element is actually a
+                ! duplicate so we'll make a note of that
+                duplicated(i) = 1
+            end if
+        end do
 
-             if (costheta > cosCutoff) then
+        ! If we have duplicated elements, modify the conn to adjust for this.
+        nDup = sum(duplicated)
+        if (nDup > 0) then
+            tmpConn => string%conn
 
-                call addPotentialTriangle(s, im1, ii, ip1, nodeMap, &
-                     results, added)
+            allocate (string%conn(2, string%nElems - nDup))
 
+            j = 0
+            do i = 1, string%nElems
+                if (duplicated(i) == 0) then
+                    j = j + 1
+                    string%conn(:, j) = tmpConn(:, i)
 
-                if (added) then
-                   nZipped = nZipped + 1
-                   lastNodeZippered = .True.
                 end if
-             end if
-          end if
-       end if
-
-       if (lastNodeZippered) then
-          ! Skip the next node...we'll get it on the next pass
-          ii = ii + 2
-          im1 = ii -1
-          ip1 = ii + 1
-       else
-          ! Just shuffle along
-          ii = ii + 1
-          ip1 = ii + 1
-          im1 = ii -1
-       end if
-    end do
-
-    ! Now we will modify our string to remove the elements and nodes
-    ! that got knocked off due to self zipping. This way the calling
-    ! process still sees the same string, it just gets a little
-    ! shorter.
-
-    call shortenString(s, nodeMap)
-    deallocate(results, nodeMap)
-
-  end subroutine selfZip
-
-  subroutine crossZip(str1, N1, N2, str2, N3, N4, debugZipper, failed)
-
-    use constants
-    use utils, only : myNorm2, cross_prod
-
-    implicit none
-
-    type(oversetString), intent(inout) :: str1, str2
-    integer(kind=intType) :: N1, N2, N3, N4
-    logical :: debugZipper, failed
-    ! Working
-    type(oversetString), pointer :: p
-    integer(kind=intType) :: stepsA, stepsB, nStepsA, nStepsB
-    integer(kind=intType) :: nTriToAdd, ii, i, j, k, A, B, Ap, Bp
-    integer(kind=intType) :: aPrev, bPrev
-    real(kind=realType), dimension(3) :: ptA, ptB, ptAp, ptBp
-    !real(kind=realType), dimension(3) :: ptAPrev, ptBPRev
-    real(kind=realType), dimension(3) :: Aoff, Boff, ApOff, BpOff
-    real(kind=realType), dimension(3) :: normA, normB, normAp, normBp
-    real(kind=realType), dimension(3) :: perpA, perpB, perpAp, perpBp
-    !real(kind=realType), dimension(3) :: normAPrev, normBPrev
-    !real(kind=realType), dimension(3) :: perpAPrev, perpBPrev
-    real(kind=realType), dimension(3) :: triNorm1, quadNorm1
-    real(kind=realType), dimension(3) :: triNorm2, quadNorm2
-    logical :: aValid, bValid, advanceA, aPreferred, area1, area2
-    logical :: advanceB
-    logical :: changeA, changeB
-    logical :: aValidPrev, bValidPrev, advanceAPrev, advanceBPrev
-    real(kind=realType) ::  sum1, sum2, h, dpa, dpb
-    !am real(kind=realType), parameter :: cutOff = 0.95*3
-    real(kind=realType), parameter :: cutOff = 0.85*3
-    ! First determine the the total number of triangles we will add
-    ! total. It is equal to the total number of triangles on each
-    ! string. This will form the index on the do loop.
-    failed = .False.
-    ! Str1 goes forward
-    if (N2 > N1) then
-       nStepsA = N2 - N1
-    else if (N2 < N1) then
-       nStepsA = N2 + str1%nNodes - N1
-    else ! N1 == N2
-       nStepsA = str1%nElems
-    end if
-
-    ! Str2 goes backwards
-    if (N3 < N4) then
-       nStepsB = N3 + str2%nNodes - N4
-    else if (N3 > N4) then
-       nStepsB = N3 - N4
-    else ! N3 == N4
-       nStepsB =  str2%nElems
-    end if
-
-    ! Initialize these out of bounds incase something goes very wrong.
-    APrev = -1
-    BPrev = -1
-
-    ! The number of steps we've performed in each edge
-    stepsA = 0
-    stepsB = 0
-
-    ! Initialize the front:
-    A = N1
-    B = N3
-    ptA = str1%x(:, A)
-    ptB = str2%x(:, B)
-
-    normA = str1%norm(:, A)
-    normB = str2%norm(:, B)
-
-    perpA = str1%perpNorm(:, A)
-    perpB = str2%perpNorm(:, B)
-
-    Ap = nextNode(str1, A, .True.)
-    Bp = nextNode(str2, B, .False.)
-    ptAp = str1%x(:, Ap)
-    ptBp = str2%x(:, Bp)
-    normAp = str1%norm(:, Ap)
-    normBp = str2%norm(:, Bp)
-    perpAp = str1%perpNorm(:, Ap)
-    perpBp = str2%perpNorm(:, Bp)
-
-
-
-    ! Cross zip nodes N1 to N2 on str1 to nodes N3 to N4 on str2
-    ii = 0
-    do while (ii < nStepsA + nStepsB)
-
-       aValid = .True.
-       bValid = .True.
-       ! ---------------------------------------------------------------
-       ! Check 1: Point-in-Triangle test: This test considers the
-       ! triangle ABA+ and determines if any of the neighbouring points
-       ! on either of the two strings is contained inside the
-       ! triangle. If the test is positive, A+ must be rejected. The
-       ! same test is repeated for B+.
-       ! ---------------------------------------------------------------
-
-       if (triOverlap(ptA, ptB, ptAp, str1, A, Ap) .or. &
-            triOverlap(ptA, ptB, ptAp, str2, B, B)) then
-          aValid = .False.
-       end if
-
-       if (triOverlap(ptA, ptB, ptBp, str1, A, A) .or. &
-            triOverlap(ptA, ptB, ptBp, str2, B, Bp)) then
-          bValid = .False.
-       end if
-
-       ! ---------------------------------------------------------------
-       ! Check 2: Convex quadrilaterl test: This test considers the
-       ! quadrilateral ABB+A+ and determines if it is convex. For
-       ! connection to point A+ to be valid, the vector areas of
-       ! triangles ABA+ and BB+A+ should have the same size. For
-       ! connection to B+ to be valid, the vector areas of trianges ABB+
-       ! and AB+A+ should be the same sign.  NOTE THAT THIS TEST DOES NOT
-       ! ACTUALLY WORK. IT IS 100% INCORRECT!!! THERE ARE CASES WHERE
-       ! THE SIGN OF BOTH AREAS ARE OPPOSITE! IT CANNOT BE SAFELY USED.
-       ! ---------------------------------------------------------------
-
-       ! area1 = positiveTriArea(ptA, ptB, ptAp, normB)
-       ! area2 = positiveTriArea(ptB, ptBp, ptAp, normB)
-
-       ! if (area1 .neqv. area2) then
-       !    aValid = .False.
-       ! end if
-
-       ! area1 = positiveTriArea(ptA, ptB, ptBp, normA)
-       ! area2 = positiveTriArea(ptAp, ptBp, ptA, normA)
-
-       ! if (area1 .neqv. area2) then
-       !    bValid = .False.
-       ! end if
-
-       ! Instead, check if the triangle we're going to add has a
-       ! positive or negative vector area
-
-       area1 = positiveTriArea(ptA, ptB, ptAp, normA)
-       if (area1 .eqv. .False.) then
-          aValid = .False.
-       end if
-
-       area2 = positiveTriArea(ptA, ptB, ptBp, normB)
-       if (area2 .eqv. .False.) then
-          bValid = .False.
-       end if
-
-       ! ---------------------------------------------------------------
-       ! Check 3: Prism volume test: Using the surface normals,
-       ! "extrude" a prisim in the direction of each surface normal and
-       ! find it's volume. It is is not positive, reject the
-       ! triangle. Since we don't have the node off wall, we will have
-       ! to make do with the normal vectors and average cell size. We
-       ! average the cell size and divide by 1000 to give an approximate
-       ! offwall distance. Then we use the norm veectors to offset in
-       ! that distance to produce the "off" points.
-       ! ---------------------------------------------------------------
-
-       ! h = quarter*(str1%h(A) + str1%h(Ap) + str2%h(B) + str2%h(Bp)) / 1000
-       ! AOff = ptA + normA * h
-       ! BOff = ptB + Bnorm * h
-       ! ApOff = ptAp + normAp * h
-       ! BpOff = ptBp + normBp * h
-
-       ! if (prismVol(A, B, Ap, Aoff, Boff, ApOff) < zero) then
-       !    aValid = .False.
-       ! end if
-
-       ! if (prismVol(A, B, Bp, Aoff, Boff, BpOff) < zero) then
-       !    bValid = .False.
-       ! end if
-
-       ! ---------------------------------------------------------------
-       ! Check 4: Interpolation stencil test: This one isn't implemented
-       ! ---------------------------------------------------------------
-
-       ! ---------------------------------------------------------------
-       ! Check 5: Surface normal compatibility test. The surface normal
-       ! from the triangle should be pointing (mostly) in the same
-       ! direction as the normal of the quad that this triangle shares
-       ! and edge with. THIS ALSO DOES NOT WORK! What we have to do
-       ! instead, is check the normal tri normal against the node
-       ! normals it would be using. This is simplier and is vastly
-       ! superior.
-       ! ---------------------------------------------------------------
-
-       call cross_prod(ptB-ptA, ptAp-ptA, triNorm1)
-       triNorm1 = triNorm1 / mynorm2(triNorm1)
-
-       ! Compute the sum of the dot product of the nodal norms with the triNorm
-       sum1 = dot_product(triNorm1, normA) + dot_product(triNorm1, normB) + &
-            dot_product(triNorm1, normAp)
-
-       call cross_prod(ptB-ptA, ptBp-ptA, triNorm2)
-       triNorm2 = triNorm2 / mynorm2(triNorm2)
-
-       sum2 = dot_product(triNorm2, normA) + dot_product(triNorm2, normB) + &
-            dot_product(triNorm2, normBp)
-
-       ! Only use this to help pick one if both are still valid:
-       if (aValid .and. bValid .and. dot_product(triNorm1, triNorm2) < 0.8) then
-
-          ! Only use this to help pick one if both are still valid:
-
-          if (sum1 < cutoff .and. sum2 > cutoff) then
-             aValid = .False.
-
-          else if(sum2 < cutoff .and. sum1 > cutoff) then
-             bValid = .False.
-
-          else if (sum1 < cutoff .and. sum2 < cutoff) then
-             ! Both bad. Take the least bad one
-             if (sum1 > sum2) then
-                bValid = .False.
-             else
+            end do
+
+            ! Set the new number of elements
+            string%nElems = string%nElems - nDup
+
+            ! Don't forget to deallocate the tmpConn pointer which is
+            ! actually the original conn data.
+            deallocate (tmpConn)
+
+            ! Destroy nte and call myself again to get the final correct nte
+            ! without the duplicates.
+            deallocate (string%nte)
+            call createNodeToElem(string)
+        end if
+    end subroutine createNodeToElem
+
+    subroutine doChain(master, iStart, iSub)
+
+        use constants
+        implicit none
+        ! Input/OUtput
+        type(oversetString) :: master
+        integer(kind=intType), intent(in) :: iStart, iSub
+
+        ! Working
+        integer(Kind=intType) :: i, j, jj, c, n1, n2, curNode, nextNode
+        integer(Kind=intType) :: elem1, elem2, curElem, nextElem
+        integer(kind=intType) :: N
+
+        ! The number of elements in this substring
+        N = 1
+
+        curNode = iStart
+
+        chainLoop: do
+
+            ! Get the currnet element
+            curElem = master%subStr(iSub, N)
+
+            ! Flag the element as used:
+            master%elemUsed(curElem) = 1
+
+            ! Get the two nodes for the current element:
+            n1 = master%conn(1, curElem)
+            n2 = master%conn(2, curElem)
+
+            if (n1 == curNode) then
+                nextNode = n2
+            else
+                nextNode = n1
+            end if
+
+            ! Exit condition 1: Next node was our starting node:
+            if (nextNode == iStart) then
+                exit chainLoop
+            end if
+
+            ! Exit condition 2: The next node has only 1 element, (the one
+            ! we're currently on) so that means the the chain is finished
+            c = master%nte(1, nextNode)
+
+            if (c == 1) then
+                exit chainLoop
+
+            else if (c == 2) then
+                ! With c=2 this easy, just extract the two elements
+                elem1 = master%nte(2, nextNode)
+                elem2 = master%nte(3, nextNode)
+
+                if (elem1 == curElem) then
+                    nextElem = elem2
+                else
+                    nextElem = elem1
+                end if
+            end if
+
+            ! Now add the "nextElem" to our chain:
+            N = N + 1
+            master%subStr(iSub, N) = nextElem
+
+            ! Flag this elemet as being used
+            master%elemUsed(nextElem) = 1
+
+            ! Finally set the nextNode back to the current node for the next
+            ! iteration
+            curNode = nextNode
+        end do chainLoop
+        master%nSubStr(iSub) = N
+    end subroutine doChain
+
+    subroutine createSubStringFromElems(p, s, id)
+
+        use constants
+        implicit none
+
+        ! Input/output
+        type(oversetString), target, intent(in) :: p
+        type(oversetString), intent(out) :: s
+        integer(kind=intType), intent(in) :: id
+
+        ! Working
+        integer(kind=intType) :: i, j, n1, n2, k
+        integer(kind=intType), dimension(:), allocatable :: nodeUsed
+
+        ! First thing we always have to do with a new string is to nullify
+        ! all the poitners
+        call nullifyString(s)
+
+        ! Firstly we can set the number of elements, since we know precisely
+        ! what this is:
+
+        s%nElems = p%nSubStr(1)
+        s%myID = id
+
+        ! Next determine the number of nodes. This is done by flagging the
+        ! nodes in the parent that are used by 's'
+        allocate (nodeUsed(p%nNodes))
+        nodeUsed = 0
+        k = 0
+        do i = 1, s%nElems
+            n1 = p%conn(1, p%subStr(1, i))
+            n2 = p%conn(2, p%subStr(1, i))
+            if (nodeUsed(n1) == 0) then
+                k = k + 1
+                nodeUsed(n1) = k
+            end if
+
+            if (nodeUsed(n2) == 0) then
+                k = k + 1
+                nodeUsed(n2) = k
+            end if
+        end do
+
+        ! We can now set the number of nodes the substring has
+        s%nNodes = k
+
+        ! The number of nodes will equal the number of elements iff the
+        ! string is period. Otherwise we will have 1 more node than element.
+
+        if (s%nNodes == s%nElems) then
+            s%isPeriodic = .True.
+        end if
+
+        ! Allocate and set the node and element parent information
+        allocate (s%pElems(s%nElems), s%pNodes(s%nNodes))
+
+        do i = 1, s%nElems
+            s%pElems(i) = p%subStr(1, i)
+        end do
+
+        ! Now create the pNodes ("link") array such that pNodes(i) points to
+        ! the node index in the parent
+        j = 0
+        do i = 1, p%nNodes
+            if (nodeUsed(i) /= 0) then
+                s%pNodes(nodeUsed(i)) = i
+            end if
+        end do
+
+        ! Set the parent's cNode to point to my nodes
+        do i = 1, s%nNodes
+            p%cNodes(:, s%pNodes(i)) = (/s%myID, i/)
+        end do
+
+        ! Now that we know the mapping between by local nodes-based
+        ! quantities and the parent, we can allocate and set all the
+        ! node-based quantities.
+
+        allocate (s%nodeData(10, s%nNodes), s%intNodeData(3, s%nNodes))
+
+        ! Set the string pointers
+        call setStringPointers(s)
+
+        do i = 1, s%nNodes
+            s%nodeData(:, i) = p%nodeData(:, s%pNodes(i))
+            s%intNodeData(:, i) = p%intNodeData(:, s%pNodes(i))
+        end do
+
+        ! We can now create the local conn too, *USING THE LOCAL NODE NUMBERS*
+        allocate (s%conn(2, s%nElems))
+
+        do i = 1, s%nElems
+            s%conn(1, i) = nodeUsed(p%conn(1, s%pElems(i)))
+            s%conn(2, i) = nodeUsed(p%conn(2, s%pElems(i)))
+        end do
+
+        ! Set the pointer to my parent.
+        s%p => p
+
+        deallocate (nodeUsed)
+
+        ! Last thing we can do is create the nodeToElem for the substring.
+        call createNodeToElem(s)
+    end subroutine createSubStringFromElems
+
+    subroutine combineChainBuffers(s)
+
+        use constants
+        implicit none
+        type(oversetString), intent(inout) :: s
+        integer(kind=intType) :: N1, N2
+
+        N1 = s%nSubStr(1)
+        N2 = s%nSubStr(2)
+
+        ! First reverse the direction of string 2 of the nodes we found
+        s%subStr(2, 1:N2) = s%subStr(2, N2:1:-1)
+
+        ! Now String 1 can be tacked on the end of string2
+        s%subStr(2, N2 + 1:N2 + N1) = s%subStr(1, 1:N1)
+
+        ! And finally copied back to string1
+        s%subStr(1, 1:N1 + N2) = s%subStr(2, 1:N1 + N2)
+        s%nSubStr(1) = N1 + n2
+
+    end subroutine combineChainBuffers
+
+    subroutine selfZip(s, cutOff, nZipped)
+
+        use constants
+        use kdtree2_module
+        use utils, only: myNorm2, cross_prod
+        implicit none
+
+        ! Input/Output
+        type(oversetString), intent(inout), target :: s
+        integer(Kind=intType), intent(out) :: nZipped
+        real(kind=realType), intent(in) :: cutOff
+
+        ! Working
+        integer(kind=intType) :: i, j, k, N, ii, im1, ip1
+        logical :: lastNodeZippered, added
+        real(kind=realType), dimension(3) :: v1, v2, norm
+        real(kind=realType) :: cosCutoff, cosTheta, r2, v1nrm, v2nrm
+        integer(Kind=intType), dimension(:), allocatable :: nodeMap
+        type(kdtree2_result), dimension(:), allocatable :: results
+
+        ! Perform self zipping on the supplied string. The string at this
+        ! point should be either peroidic or since sinded --- no multiple
+        ! loops should be left. Therefore, we can count on the nodes being
+        ! in order.
+
+        allocate (results(25))
+        allocate (nodeMap(s%nNodes))
+        nodeMap = 1
+
+        cosCutoff = cos(cutOff * pi / 180)
+        nzipped = 0
+
+        ! Peroidic string starts at node 1, and uses node 'N' as the previous
+        ! node. Single chains start at node 2 and only go to the N-1 node.
+        if (s%isPeriodic) then
+            im1 = s%nNodes
+            ii = 1
+            ip1 = 2
+            N = s%nNodes
+        else
+            im1 = 1
+            ii = 2
+            ip1 = 3
+            N = s%nNodes - 1
+        end if
+
+        do while (ii <= N)
+
+            ! Peroidic string at end...loop around
+            if (s%isPeriodic .and. ii == N) then
+                ip1 = 1
+            end if
+
+            lastNodeZippered = .False.
+
+            ! Determine the anlge between the vectors
+            v1 = s%x(:, ip1) - s%x(:, ii)
+            v2 = s%x(:, im1) - s%x(:, ii)
+            v1nrm = mynorm2(v1)
+            v2nrm = mynorm2(v2)
+            call cross_prod(v2, v1, norm)
+            norm = norm / mynorm2(norm)
+
+            if (dot_product(norm, s%norm(:, ii)) > zero) then
+
+                ! the dot product of the im1 and ip1 nodes have to be close
+                if (dot_product(s%norm(:, ip1), s%norm(:, im1)) > 0.80) then
+
+                    costheta = dot_product(v1, v2) / (v1nrm * v2nrm)
+
+                    if (costheta > cosCutoff) then
+
+                        call addPotentialTriangle(s, im1, ii, ip1, nodeMap, &
+                                                  results, added)
+
+                        if (added) then
+                            nZipped = nZipped + 1
+                            lastNodeZippered = .True.
+                        end if
+                    end if
+                end if
+            end if
+
+            if (lastNodeZippered) then
+                ! Skip the next node...we'll get it on the next pass
+                ii = ii + 2
+                im1 = ii - 1
+                ip1 = ii + 1
+            else
+                ! Just shuffle along
+                ii = ii + 1
+                ip1 = ii + 1
+                im1 = ii - 1
+            end if
+        end do
+
+        ! Now we will modify our string to remove the elements and nodes
+        ! that got knocked off due to self zipping. This way the calling
+        ! process still sees the same string, it just gets a little
+        ! shorter.
+
+        call shortenString(s, nodeMap)
+        deallocate (results, nodeMap)
+
+    end subroutine selfZip
+
+    subroutine crossZip(str1, N1, N2, str2, N3, N4, debugZipper, failed)
+
+        use constants
+        use utils, only: myNorm2, cross_prod
+
+        implicit none
+
+        type(oversetString), intent(inout) :: str1, str2
+        integer(kind=intType) :: N1, N2, N3, N4
+        logical :: debugZipper, failed
+        ! Working
+        type(oversetString), pointer :: p
+        integer(kind=intType) :: stepsA, stepsB, nStepsA, nStepsB
+        integer(kind=intType) :: nTriToAdd, ii, i, j, k, A, B, Ap, Bp
+        integer(kind=intType) :: aPrev, bPrev
+        real(kind=realType), dimension(3) :: ptA, ptB, ptAp, ptBp
+        !real(kind=realType), dimension(3) :: ptAPrev, ptBPRev
+        real(kind=realType), dimension(3) :: Aoff, Boff, ApOff, BpOff
+        real(kind=realType), dimension(3) :: normA, normB, normAp, normBp
+        real(kind=realType), dimension(3) :: perpA, perpB, perpAp, perpBp
+        !real(kind=realType), dimension(3) :: normAPrev, normBPrev
+        !real(kind=realType), dimension(3) :: perpAPrev, perpBPrev
+        real(kind=realType), dimension(3) :: triNorm1, quadNorm1
+        real(kind=realType), dimension(3) :: triNorm2, quadNorm2
+        logical :: aValid, bValid, advanceA, aPreferred, area1, area2
+        logical :: advanceB
+        logical :: changeA, changeB
+        logical :: aValidPrev, bValidPrev, advanceAPrev, advanceBPrev
+        real(kind=realType) :: sum1, sum2, h, dpa, dpb
+        !am real(kind=realType), parameter :: cutOff = 0.95*3
+        real(kind=realType), parameter :: cutOff = 0.85 * 3
+        ! First determine the the total number of triangles we will add
+        ! total. It is equal to the total number of triangles on each
+        ! string. This will form the index on the do loop.
+        failed = .False.
+        ! Str1 goes forward
+        if (N2 > N1) then
+            nStepsA = N2 - N1
+        else if (N2 < N1) then
+            nStepsA = N2 + str1%nNodes - N1
+        else ! N1 == N2
+            nStepsA = str1%nElems
+        end if
+
+        ! Str2 goes backwards
+        if (N3 < N4) then
+            nStepsB = N3 + str2%nNodes - N4
+        else if (N3 > N4) then
+            nStepsB = N3 - N4
+        else ! N3 == N4
+            nStepsB = str2%nElems
+        end if
+
+        ! Initialize these out of bounds incase something goes very wrong.
+        APrev = -1
+        BPrev = -1
+
+        ! The number of steps we've performed in each edge
+        stepsA = 0
+        stepsB = 0
+
+        ! Initialize the front:
+        A = N1
+        B = N3
+        ptA = str1%x(:, A)
+        ptB = str2%x(:, B)
+
+        normA = str1%norm(:, A)
+        normB = str2%norm(:, B)
+
+        perpA = str1%perpNorm(:, A)
+        perpB = str2%perpNorm(:, B)
+
+        Ap = nextNode(str1, A, .True.)
+        Bp = nextNode(str2, B, .False.)
+        ptAp = str1%x(:, Ap)
+        ptBp = str2%x(:, Bp)
+        normAp = str1%norm(:, Ap)
+        normBp = str2%norm(:, Bp)
+        perpAp = str1%perpNorm(:, Ap)
+        perpBp = str2%perpNorm(:, Bp)
+
+        ! Cross zip nodes N1 to N2 on str1 to nodes N3 to N4 on str2
+        ii = 0
+        do while (ii < nStepsA + nStepsB)
+
+            aValid = .True.
+            bValid = .True.
+            ! ---------------------------------------------------------------
+            ! Check 1: Point-in-Triangle test: This test considers the
+            ! triangle ABA+ and determines if any of the neighbouring points
+            ! on either of the two strings is contained inside the
+            ! triangle. If the test is positive, A+ must be rejected. The
+            ! same test is repeated for B+.
+            ! ---------------------------------------------------------------
+
+            if (triOverlap(ptA, ptB, ptAp, str1, A, Ap) .or. &
+                triOverlap(ptA, ptB, ptAp, str2, B, B)) then
                 aValid = .False.
-             end if
-          end if
-       end if
+            end if
 
-       ! ---------------------------------------------------------------
-       ! Check 6: Front angle test: Try to keep the front as close as
-       ! possible to the gap edges.
-       ! ---------------------------------------------------------------
+            if (triOverlap(ptA, ptB, ptBp, str1, A, A) .or. &
+                triOverlap(ptA, ptB, ptBp, str2, B, Bp)) then
+                bValid = .False.
+            end if
 
-       ! Triangle ABA+. Original implemnetation
-       sum1 = abs(vecAngle(ptA-ptAp, ptB-ptAp)) + abs(vecAngle(ptBp-ptB, ptAp-ptB))
-       sum2 = abs(vecAngle(ptA-ptBp, ptB-ptBp)) + abs(vecAngle(ptBp-ptA, ptAp-ptA))
+            ! ---------------------------------------------------------------
+            ! Check 2: Convex quadrilaterl test: This test considers the
+            ! quadrilateral ABB+A+ and determines if it is convex. For
+            ! connection to point A+ to be valid, the vector areas of
+            ! triangles ABA+ and BB+A+ should have the same size. For
+            ! connection to B+ to be valid, the vector areas of trianges ABB+
+            ! and AB+A+ should be the same sign.  NOTE THAT THIS TEST DOES NOT
+            ! ACTUALLY WORK. IT IS 100% INCORRECT!!! THERE ARE CASES WHERE
+            ! THE SIGN OF BOTH AREAS ARE OPPOSITE! IT CANNOT BE SAFELY USED.
+            ! ---------------------------------------------------------------
 
-       if (sum1 > sum2) then
-          aPreferred = .True.
-       else
-          aPreferred = .False.
-       end if
+            ! area1 = positiveTriArea(ptA, ptB, ptAp, normB)
+            ! area2 = positiveTriArea(ptB, ptBp, ptAp, normB)
 
-       ! ---------------------------------------------------------------
-       ! Check 7: End of string test
-       ! ---------------------------------------------------------------
+            ! if (area1 .neqv. area2) then
+            !    aValid = .False.
+            ! end if
 
-       if (A == Ap) then
-          aValid = .False.
-          bValid = .True.
-       end if
+            ! area1 = positiveTriArea(ptA, ptB, ptBp, normA)
+            ! area2 = positiveTriArea(ptAp, ptBp, ptA, normA)
 
-       if (B == Bp) then
-          bValid = .False.
-          aValid = .True.
-       end if
+            ! if (area1 .neqv. area2) then
+            !    bValid = .False.
+            ! end if
 
-       ! ---------------------------------------------------------------
-       ! Decide on the triangle we want to take.
-       ! ---------------------------------------------------------------
+            ! Instead, check if the triangle we're going to add has a
+            ! positive or negative vector area
 
-       if (aValid .and. .not. bValid) then
+            area1 = positiveTriArea(ptA, ptB, ptAp, normA)
+            if (area1 .eqv. .False.) then
+                aValid = .False.
+            end if
 
-          ! We have no choice but to take A+
-          call addTri(A, str1, B, str2, Ap, str1)
-          advanceA = .True.
-          advanceB = .False.
-          ! Flag the nodes as used
-          str1%xZipNOdeUsed(A) = 1
-          str1%xZipNOdeUsed(Ap) = 1
-          str2%xZipNOdeUsed(B) = 1
+            area2 = positiveTriArea(ptA, ptB, ptBp, normB)
+            if (area2 .eqv. .False.) then
+                bValid = .False.
+            end if
 
-       else if (bValid .and. .not. aValid) then
+            ! ---------------------------------------------------------------
+            ! Check 3: Prism volume test: Using the surface normals,
+            ! "extrude" a prisim in the direction of each surface normal and
+            ! find it's volume. It is is not positive, reject the
+            ! triangle. Since we don't have the node off wall, we will have
+            ! to make do with the normal vectors and average cell size. We
+            ! average the cell size and divide by 1000 to give an approximate
+            ! offwall distance. Then we use the norm veectors to offset in
+            ! that distance to produce the "off" points.
+            ! ---------------------------------------------------------------
+
+            ! h = quarter*(str1%h(A) + str1%h(Ap) + str2%h(B) + str2%h(Bp)) / 1000
+            ! AOff = ptA + normA * h
+            ! BOff = ptB + Bnorm * h
+            ! ApOff = ptAp + normAp * h
+            ! BpOff = ptBp + normBp * h
+
+            ! if (prismVol(A, B, Ap, Aoff, Boff, ApOff) < zero) then
+            !    aValid = .False.
+            ! end if
+
+            ! if (prismVol(A, B, Bp, Aoff, Boff, BpOff) < zero) then
+            !    bValid = .False.
+            ! end if
+
+            ! ---------------------------------------------------------------
+            ! Check 4: Interpolation stencil test: This one isn't implemented
+            ! ---------------------------------------------------------------
+
+            ! ---------------------------------------------------------------
+            ! Check 5: Surface normal compatibility test. The surface normal
+            ! from the triangle should be pointing (mostly) in the same
+            ! direction as the normal of the quad that this triangle shares
+            ! and edge with. THIS ALSO DOES NOT WORK! What we have to do
+            ! instead, is check the normal tri normal against the node
+            ! normals it would be using. This is simplier and is vastly
+            ! superior.
+            ! ---------------------------------------------------------------
+
+            call cross_prod(ptB - ptA, ptAp - ptA, triNorm1)
+            triNorm1 = triNorm1 / mynorm2(triNorm1)
+
+            ! Compute the sum of the dot product of the nodal norms with the triNorm
+            sum1 = dot_product(triNorm1, normA) + dot_product(triNorm1, normB) + &
+                   dot_product(triNorm1, normAp)
+
+            call cross_prod(ptB - ptA, ptBp - ptA, triNorm2)
+            triNorm2 = triNorm2 / mynorm2(triNorm2)
+
+            sum2 = dot_product(triNorm2, normA) + dot_product(triNorm2, normB) + &
+                   dot_product(triNorm2, normBp)
+
+            ! Only use this to help pick one if both are still valid:
+            if (aValid .and. bValid .and. dot_product(triNorm1, triNorm2) < 0.8) then
+
+                ! Only use this to help pick one if both are still valid:
+
+                if (sum1 < cutoff .and. sum2 > cutoff) then
+                    aValid = .False.
+
+                else if (sum2 < cutoff .and. sum1 > cutoff) then
+                    bValid = .False.
+
+                else if (sum1 < cutoff .and. sum2 < cutoff) then
+                    ! Both bad. Take the least bad one
+                    if (sum1 > sum2) then
+                        bValid = .False.
+                    else
+                        aValid = .False.
+                    end if
+                end if
+            end if
 
-          ! We have no choice but to take B+
+            ! ---------------------------------------------------------------
+            ! Check 6: Front angle test: Try to keep the front as close as
+            ! possible to the gap edges.
+            ! ---------------------------------------------------------------
 
-          call addTri(A, str1, B, str2, Bp, str2)
-          str1%xZipNOdeUsed(A) = 1
-          str2%xZipNOdeUsed(B) = 1
-          str2%xZipNOdeUsed(Bp) = 1
+            ! Triangle ABA+. Original implemnetation
+            sum1 = abs(vecAngle(ptA - ptAp, ptB - ptAp)) + abs(vecAngle(ptBp - ptB, ptAp - ptB))
+            sum2 = abs(vecAngle(ptA - ptBp, ptB - ptBp)) + abs(vecAngle(ptBp - ptA, ptAp - ptA))
 
-          advanceA = .False.
-          advanceB = .True.
+            if (sum1 > sum2) then
+                aPreferred = .True.
+            else
+                aPreferred = .False.
+            end if
 
-       else if (aValid .and. bValid) then
+            ! ---------------------------------------------------------------
+            ! Check 7: End of string test
+            ! ---------------------------------------------------------------
 
-          ! We could take either. Use the preferred triangle.
-          if (aPreferred) then
+            if (A == Ap) then
+                aValid = .False.
+                bValid = .True.
+            end if
 
-             call addTri(A, str1, B, str2, Ap, str1)
+            if (B == Bp) then
+                bValid = .False.
+                aValid = .True.
+            end if
 
-             str1%xZipNOdeUsed(A) = 1
-             str1%xZipNOdeUsed(Ap) = 1
-             str2%xZipNOdeUsed(B) = 1
+            ! ---------------------------------------------------------------
+            ! Decide on the triangle we want to take.
+            ! ---------------------------------------------------------------
 
-             advanceA = .True.
-             advanceB = .False.
+            if (aValid .and. .not. bValid) then
 
-          else
+                ! We have no choice but to take A+
+                call addTri(A, str1, B, str2, Ap, str1)
+                advanceA = .True.
+                advanceB = .False.
+                ! Flag the nodes as used
+                str1%xZipNOdeUsed(A) = 1
+                str1%xZipNOdeUsed(Ap) = 1
+                str2%xZipNOdeUsed(B) = 1
 
-             call addTri(A, str1, B, str2, Bp, str2)
-             str1%xZipNOdeUsed(A) = 1
-             str2%xZipNOdeUsed(B) = 1
-             str2%xZipNOdeUsed(Bp) = 1
+            else if (bValid .and. .not. aValid) then
 
-             advanceA = .False.
-             advanceB = .True.
+                ! We have no choice but to take B+
 
-          end if
+                call addTri(A, str1, B, str2, Bp, str2)
+                str1%xZipNOdeUsed(A) = 1
+                str2%xZipNOdeUsed(B) = 1
+                str2%xZipNOdeUsed(Bp) = 1
 
-       else
+                advanceA = .False.
+                advanceB = .True.
 
-          ! Things are not looking good...but
+            else if (aValid .and. bValid) then
 
-          if (avalidPRev .and. bvalidPrev) then
-             ! We might be able to save it! The last triangle we added
-             ! was a choice..both were valid, but we picked one
-             ! because it was preferred. Now we know the one we did
-             ! pick screwed us for the next triangle...go back and
-             ! pick the other one instead!
+                ! We could take either. Use the preferred triangle.
+                if (aPreferred) then
 
-             ! First 'delete' the triangle by decrementing the tri
-             ! counter and edge counters
-             p => str1%p
-             p%nTris = p%nTris - 1
-             p%nEdges = p%nEdges - 3
+                    call addTri(A, str1, B, str2, Ap, str1)
 
-             ! Now we determine which one was actually added and add
-             ! the other one instead
+                    str1%xZipNOdeUsed(A) = 1
+                    str1%xZipNOdeUsed(Ap) = 1
+                    str2%xZipNOdeUsed(B) = 1
 
-             if (advanceAPrev) then
-                ! We need to add the old B triangle instead, which
-                ! means the A triangle we had added was bad
-                aValidPrev = .False.
-                stepsB = stepsB + 1
-                stepsA = stepsA - 1
+                    advanceA = .True.
+                    advanceB = .False.
 
-                call addTri(APrev, str1, B, str2, Bp, str2)
+                else
 
-                ! Reset the 'A' data by shuffling backwards: The 'A'
-                ! data is copied to 'Ap' and the 'A' data is restored from Aprev
+                    call addTri(A, str1, B, str2, Bp, str2)
+                    str1%xZipNOdeUsed(A) = 1
+                    str2%xZipNOdeUsed(B) = 1
+                    str2%xZipNOdeUsed(Bp) = 1
 
-                Ap = A
-                ptAp= ptA
-                normAp = normA
-                perpAp = perpA
+                    advanceA = .False.
+                    advanceB = .True.
 
-                A = Aprev
-                ptA = str1%x(:, A)
-                normA = str1%norm(:, A)
-                perpA = str1%perpNorm(:, A)
+                end if
 
-                ! Increment the 'B' data since we actually used B
+            else
 
-                B = Bp
-                ptB = ptBp
-                normB = normBp
-                perpB = perpBp
+                ! Things are not looking good...but
+
+                if (avalidPRev .and. bvalidPrev) then
+                    ! We might be able to save it! The last triangle we added
+                    ! was a choice..both were valid, but we picked one
+                    ! because it was preferred. Now we know the one we did
+                    ! pick screwed us for the next triangle...go back and
+                    ! pick the other one instead!
+
+                    ! First 'delete' the triangle by decrementing the tri
+                    ! counter and edge counters
+                    p => str1%p
+                    p%nTris = p%nTris - 1
+                    p%nEdges = p%nEdges - 3
+
+                    ! Now we determine which one was actually added and add
+                    ! the other one instead
+
+                    if (advanceAPrev) then
+                        ! We need to add the old B triangle instead, which
+                        ! means the A triangle we had added was bad
+                        aValidPrev = .False.
+                        stepsB = stepsB + 1
+                        stepsA = stepsA - 1
+
+                        call addTri(APrev, str1, B, str2, Bp, str2)
+
+                        ! Reset the 'A' data by shuffling backwards: The 'A'
+                        ! data is copied to 'Ap' and the 'A' data is restored from Aprev
+
+                        Ap = A
+                        ptAp = ptA
+                        normAp = normA
+                        perpAp = perpA
+
+                        A = Aprev
+                        ptA = str1%x(:, A)
+                        normA = str1%norm(:, A)
+                        perpA = str1%perpNorm(:, A)
+
+                        ! Increment the 'B' data since we actually used B
+
+                        B = Bp
+                        ptB = ptBp
+                        normB = normBp
+                        perpB = perpBp
+
+                        ! And get the new data for Bp
+                        Bp = nextNode(str2, B, .False.)
+                        ptBp = str2%x(:, Bp)
+                        normBp = str2%norm(:, Bp)
+                        perpBp = str2%perpNorm(:, Bp)
+
+                        ! We *actually* advanced B so..
+                        advanceBPrev = .True.
+                        advanceAPrev = .False.
+                    else
+
+                        ! We need to add the old A triangle, which means the B
+                        ! triangle we had added was bad
+                        bValidPrev = .False.
+                        stepsB = stepsB - 1
+                        stepsA = stepsA + 1
+                        call addTri(A, str1, Bprev, str2, Ap, str1)
+
+                        ! Reset the 'B' data by shuffling backwards: The 'B'
+                        ! data is copied to 'Bp' and the 'B' data is restored from Bprev
+
+                        Bp = B
+                        ptBp = ptB
+                        normBp = normB
+                        perpBp = perpB
+
+                        B = Bprev
+                        ptB = str2%x(:, B)
+                        normB = str2%norm(:, B)
+                        perpB = str2%perpNorm(:, B)
+
+                        ! Increment the 'A' data since we actually used A
+
+                        A = Ap
+                        ptA = ptAp
+                        normA = normAp
+                        perpA = perpAp
+
+                        ! And get the new data for Ap
+                        Ap = nextNode(str2, A, .True.)
+                        ptAp = str1%x(:, Ap)
+                        normAp = str1%norm(:, Ap)
+                        perpAp = str1%perpNorm(:, Ap)
+                        ! We *actually* advanced A so..
+                        advanceAPrev = .True.
+                        advanceBPrev = .False.
+
+                    end if
+
+                    ! We *don't* increment ii since this is in essence still
+                    ! the "last" iteration. We just cycle and try the current
+                    ! one again.
+                    if (debugZipper) then
+                        print *, 'Saved cross zip from bad front.'
+                    end if
+                    cycle
 
-                ! And get the new data for Bp
-                Bp = nextNode(str2, B, .False.)
-                ptBp = str2%x(:, Bp)
-                normBp = str2%norm(:, Bp)
-                perpBp = str2%perpNorm(:, Bp)
+                end if
 
-                ! We *actually* advanced B so..
-                advanceBPrev = .True.
-                advanceAPrev = .False.
-             else
+                advanceA = .False.
+                advanceB = .False.
 
-                ! We need to add the old A triangle, which means the B
-                ! triangle we had added was bad
-                bValidPrev = .False.
-                stepsB = stepsB - 1
-                stepsA = stepsA + 1
-                call addTri(A, str1, Bprev, str2, Ap, str1)
+                ! Ewww. neither triangle is valid. Do not add the triangle
+                ! just return and let the cross zip restart. This should
+                ! skip over the bad area and the pocket zip can do the bad
+                ! region.
+                failed = .True.
+                return
 
-                ! Reset the 'B' data by shuffling backwards: The 'B'
-                ! data is copied to 'Bp' and the 'B' data is restored from Bprev
+            end if
 
-                Bp = B
-                ptBp= ptB
-                normBp = normB
-                perpBp = perpB
+            ! Now we have to shuffle along the string.
+            if (advanceA .and. .not. advanceB) then
 
-                B = Bprev
-                ptB = str2%x(:, B)
-                normB = str2%norm(:, B)
-                perpB = str2%perpNorm(:, B)
+                stepsA = stepsA + 1
 
-                ! Increment the 'A' data since we actually used A
+                ! Save a copy of the previous A info
+                APrev = A
 
+                ! Copy the Ap to A
                 A = Ap
                 ptA = ptAp
                 normA = normAp
                 perpA = perpAp
 
                 ! And get the new data for Ap
-                Ap = nextNode(str2, A, .True.)
+                Ap = nextNode(str1, A, .True.)
                 ptAp = str1%x(:, Ap)
                 normAp = str1%norm(:, Ap)
                 perpAp = str1%perpNorm(:, Ap)
-                ! We *actually* advanced A so..
-                advanceAPrev = .True.
-                advanceBPrev = .False.
-
-             end if
-
-             ! We *don't* increment ii since this is in essence still
-             ! the "last" iteration. We just cycle and try the current
-             ! one again.
-             if (debugZipper) then
-                print *,'Saved cross zip from bad front.'
-             end if
-             cycle
-
-          end  if
-
-          advanceA = .False.
-          advanceB = .False.
-
-          ! Ewww. neither triangle is valid. Do not add the triangle
-          ! just return and let the cross zip restart. This should
-          ! skip over the bad area and the pocket zip can do the bad
-          ! region.
-          failed = .True.
-          return
-
-       end if
-
-       ! Now we have to shuffle along the string.
-       if (advanceA .and. .not.advanceB) then
-
-          stepsA = stepsA + 1
-
-          ! Save a copy of the previous A info
-          APrev = A
-
-          ! Copy the Ap to A
-          A = Ap
-          ptA = ptAp
-          normA = normAp
-          perpA = perpAp
-
-          ! And get the new data for Ap
-          Ap = nextNode(str1, A, .True.)
-          ptAp = str1%x(:, Ap)
-          normAp = str1%norm(:, Ap)
-          perpAp = str1%perpNorm(:, Ap)
-
-       else if (advanceB .and. .not.advanceA) then
-
-          stepsB = stepsB + 1
-
-          ! Save a copy of the previous B info incase we need it
-          BPrev = B
-
-          ! Copy the Bp to B
-          B = Bp
-          ptB = ptBp
-          normB = normBp
-          perpB = perpBp
-
-          ! And get the new data for Bp
-          Bp = nextNode(str2, B, .False.)
-          ptBp = str2%x(:, Bp)
-          normBp = str2%norm(:, Bp)
-          perpBp = str2%perpNorm(:, Bp)
-       end if
-
-       ! Save the prevoius valid triangles and what was advanced
-       aValidPrev = aValid
-       bValidPrev = bValid
-       advanceAPrev = advanceA
-       advanceBPrev = advanceB
-
-       ! Finally increment the number of triangles we've used so far.
-       ii = ii + 1
-    end do
-
-  contains
-
-    function nextNode(str, i, pos)
-
-      implicit none
-      type(oversetString), intent(iN) :: str
-      integer(kind=intType), intent(in) :: i
-      logical, intent(in) :: pos
-      integer(kind=intType) :: nextNode
-
-      if (pos) then
-         if (stepsA == nStepsA) then
-            nextNode = i
-         else
-            nextNode = i + 1
-            if (nextNode > str%nNodes) then
-               if (str%isPeriodic) then
-                  ! Loop back around
-                  nextNode = 1
-               else
-                  ! Leave it at the same node
-                  nextNode = i
-               end if
-            end if
-         end if
-      else
-         if (stepsB == nStepsB) then
-            nextNode = i
-         else
-            nextNode = i - 1
-            if (nextNode < 1) then
-               if (str%isPeriodic) then
-                  ! Loop back around
-                  nextNode = str%nNodes
-               else
-                  ! Leave it at the same node
-                  nextNode = i
-               end if
-            end if
-         end if
-      end if
-    end function nextNode
-
-    function vecAngle(vec1, vec2)
-      implicit none
 
-      ! Input/Output
-      real(kind=realType), dimension(3), intent(in) :: vec1, vec2
-      real(kind=realType) :: vecAngle
+            else if (advanceB .and. .not. advanceA) then
 
-      ! Working
-      real(kind=realType), dimension(3) :: vecA, vecB
+                stepsB = stepsB + 1
 
-      vecA = vec1 / mynorm2(vec1)
-      vecB = vec2 / mynorm2(vec2)
+                ! Save a copy of the previous B info incase we need it
+                BPrev = B
 
-      vecAngle = acos(dot_product(vecA, vecB))
+                ! Copy the Bp to B
+                B = Bp
+                ptB = ptBp
+                normB = normBp
+                perpB = perpBp
 
-    end function vecAngle
-
-    function elemBetweenNodes(str, a, b)
-      implicit none
+                ! And get the new data for Bp
+                Bp = nextNode(str2, B, .False.)
+                ptBp = str2%x(:, Bp)
+                normBp = str2%norm(:, Bp)
+                perpBp = str2%perpNorm(:, Bp)
+            end if
 
-      ! Input/Output
-      type(oversetString), intent(in) :: str
-      integer(kind=intType), intent(in) :: a,b
-      integer(kind=intType) :: elemBetweenNodes
+            ! Save the prevoius valid triangles and what was advanced
+            aValidPrev = aValid
+            bValidPrev = bValid
+            advanceAPrev = advanceA
+            advanceBPrev = advanceB
+
+            ! Finally increment the number of triangles we've used so far.
+            ii = ii + 1
+        end do
+
+    contains
+
+        function nextNode(str, i, pos)
+
+            implicit none
+            type(oversetString), intent(iN) :: str
+            integer(kind=intType), intent(in) :: i
+            logical, intent(in) :: pos
+            integer(kind=intType) :: nextNode
+
+            if (pos) then
+                if (stepsA == nStepsA) then
+                    nextNode = i
+                else
+                    nextNode = i + 1
+                    if (nextNode > str%nNodes) then
+                        if (str%isPeriodic) then
+                            ! Loop back around
+                            nextNode = 1
+                        else
+                            ! Leave it at the same node
+                            nextNode = i
+                        end if
+                    end if
+                end if
+            else
+                if (stepsB == nStepsB) then
+                    nextNode = i
+                else
+                    nextNode = i - 1
+                    if (nextNode < 1) then
+                        if (str%isPeriodic) then
+                            ! Loop back around
+                            nextNode = str%nNodes
+                        else
+                            ! Leave it at the same node
+                            nextNode = i
+                        end if
+                    end if
+                end if
+            end if
+        end function nextNode
 
-      ! Working
-      integer(kind=intType) :: e1, e2, e3, e4
+        function vecAngle(vec1, vec2)
+            implicit none
 
-      if (str%nte(1, a) == 1) then
-         e1 = str%nte(2, a)
-         e2 = e1
-      else
-         e1 = str%nte(2, a)
-         e2 = str%nte(3, a)
-      end if
-
-      if (str%nte(1, b) == 1) then
-         e3 = str%nte(2, b)
-         e4 = e3
-      else
-         e3 = str%nte(2, b)
-         e4 = str%nte(3, b)
-      end if
-
-      ! Two of the edges are the same. And this is the one that must
-      ! be between the two nodes.
-
-      if (e1 == e3 .or. e1 == e4) then
-         elemBetweenNodes = e1
-      else
-         elemBetweenNodes = e2
-      end if
-
-    end function elemBetweenNodes
-
-    function triArea(pt1, pt2, pt3)
-
-      use constants
-      use utils, only : myNorm2, cross_prod
-      implicit none
-
-      ! Input/Output
-      real(kind=realType), intent(in), dimension(3) :: pt1, pt2, pt3
-      real(kind=realType) :: triArea
-
-      ! Working
-      real(kind=realType), dimension(3) :: norm
-
-      call cross_prod(pt2-pt1, pt3-pt1, norm)
-      triArea = half * mynorm2(norm)
-
-    end function triArea
-
-  end subroutine crossZip
-
-  subroutine addTri(A, sA, B, sB, C, sC)
-
-    ! Form a triangle from index 'A' on string 'sA' , index 'B' on
-    ! string 'sB' and index 'C' on string 'sC'
-
-    use constants
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: A, B, C
-    type(oversetString), intent(in) :: sA, sB, sC
-
-    ! Working
-    type(oversetString), pointer :: p
-    integer(kind=intType) :: mn1, mn2, mn3
-    p => sA%p
-
-    p%nTris = p%nTris+ 1
-
-    ! mn = master node
-    mn1 = sA%pNodes(A)
-    mn2 = sB%pNodes(B)
-    mn3 = sC%pNodes(C)
-
-    p%tris(:, p%nTris) = (/mn1, mn2, mn3/)
-
-    ! Add these three edges to master list of edges
-
-    ! Edge 1:
-    p%nEdges = p%nEdges + 1
-    p%edges(p%nEdges)%n1 = mn1
-    p%edges(p%nEdges)%n2 = mn2
-
-    ! Edge 2:
-    p%nEdges = p%nEdges + 1
-    p%edges(p%nEdges)%n1 = mn2
-    p%edges(p%nEdges)%n2 = mn3
-
-    ! Edge 3:
-    p%nEdges = p%nEdges + 1
-    p%edges(p%nEdges)%n1 = mn3
-    p%edges(p%nEdges)%n2 = mn1
-
-  end subroutine addTri
-
-  subroutine makeCrossZip(p, strings, nStrings, debugZipper)
-
-    use constants
-    implicit none
-
-    ! Input/output
-    integer(kind=intType), intent(in) :: nStrings
-    type(oversetString), intent(inout), target :: p, strings(nStrings)
-    type(oversetString), pointer :: s, s1, s2
-    logical, intent(in) :: debugZipper
-    ! Working
-    integer(kind=intType) :: i, iStart, iEnd, jStart, jEnd, iStart_j, iEnd_j
-    integer(kind=intType) :: curOtherID, iString, ii, nextI, curIStart, nIElems_j
-    integer(kind=intType) :: nIElemsBeg, nJElemsBeg, nElem1, nElem2
-    integer(kind=intType) ::iStart_orig, iEnd_orig, jStart_orig, jEnd_orig
-    logical :: fullLoop1, fullLoop2, dummy, failed
-    ! The purpose of this routine is to determine the ranges on two
-    !  paired strings that are continuously paired and suitable for
-    !  performing cross zipping.
-
-    ! Allocate arrays to keep track of nodes that have already been
-    !  used in cross zipping.
-    do i=1, nstrings
-       s => strings(i)
-       allocate(s%XzipNodeUsed(s%nNodes))
-       s%xZipNodeUsed = 0
-    end do
-
-    strLoop: do iString=1,nStrings
-
-       ! Skip strings that were pocekts
-       if (strings(iString)%isPocket) then
-          cycle
-       end if
-
-       ! S1 is the curent '1' string we are working with
-       s1 => strings(iString)
-
-       ! Find the lowest node number that isn't used:
-       curIStart = startNode(s1)
-       do while(curIStart > 0)
-
-          if (debugZipper) then
-             print *,'------------------------------------------------'
-             print *,'Starting string ', s1%myid, 'at index ', curIstart
-             print *,'------------------------------------------------'
-          end if
-
-          iStart = curIStart
-          ! Other ID is the string attached at the current pt.
-          curOtherID = s1%otherID(1, iStart)
-
-          if (curOtherID == -1) then
-             print *,'*************************************************************************'
-             print *,'Error during makeCrossZip: Point ', iStart, 'does not have a matching point'
-             print *,'Position: ', s1%x(:, iStart)
-             print *,'*************************************************************************'
-             stop
-          end if
-
-          ! S2 is the current '2' string we are working with
-          s2 => strings(curOtherID)
-          jStart = s1%otherID(2, iStart)
-
-          ! ---------------- s1 increments -------------
-          ! The goal is to increment s1 as far as we can go in the
-          ! NEGATIVE direction.
-          call traceMatch(s1, iStart, .False., curOtherID, iEnd, fullLoop1)
-
-          if (.not. fullLoop1) then
-             ! Now set iStart to iEnd. Basically we start right at the
-             ! negative end the chain and traverse in the POSITIVE
-             ! direction.
-             iStart = iEnd
-             call traceMatch(s1, iStart, .True., curOtherID, iEnd, dummy)
-          end if
-
-          ! Now, iStart -> iEnd (in the positive order) is the maximum
-          ! possible extent that s1 could be connected to s1
-          ! over. However, s2 may have something to say about that. We
-          ! do the same operation for s2. Note that the orders are reversed.
-
-          ! ---------------- s2 increments -------------
-          call traceMatch(s2, jStart, .True., s1%myID, jEnd, fullLoop2)
-
-          ! If the first jnode isnt' actually matched to me, like I am
-          ! to him. Therefore skip me, and go to the next one.
-          if (jStart == jEnd .and. .not. fullLoop2 .and. &
-               s2%otherID(1, jStart) /= s1%myID) then
-             s1%xZipNodeUsed(curIStart) = 1
-             curIStart = startNode(s1)
-             cycle
-          end if
-
-          if (.not. fullLoop2) then
-             jStart = jEnd
-             call traceMatch(s2, jStart, .False., s1%myID, jEnd, dummy)
-          end if
-
-          if ((iStart == iEnd .and. .not. fullLoop1)  .or.&
-               (jStart == jEnd .and. .not. fullLoop2)) then
-             ! Can't go anywhere. Flag this node and the next.
-
-             s1%xZipNodeUsed(curIStart) = 1
-             curIStart = startNode(s1)
-             cycle
-          end if
-
-          if (debugZipper) then
-             print *,'Initial Range s1:', istart, iend, fullLoop1
-             print *,'Initial Range s2:', jstart, jend, fullLoop2
-          end if
-
-          ! Save the original start/endn locations
-          iStart_orig = iStart
-          iEnd_orig = iEnd
-          jStart_orig = jStart
-          jEnd_orig = jEnd
-
-          if ((istart == iend .and. fullLoop1) .and. &
-               (jstart == jend .and. fullLoop2)) then
-             ! s1 fully attached to s2
-
-             call closestSymmetricNode(s1, s2, istart, jstart)
-             iEnd = iStart
-             jEnd = jStart
-
-          else if((iStart == iEnd .and. fullLoop1) .and. .not. fullLoop2) then
-
-             ! Project jStart and jEnd onto s1
-             iStart = s2%otherID(2, jStart)
-             iEnd   = s2%otherID(2, jEnd)
-
-          else if((jStart == jEnd .and. fullLoop2) .and. .not. fullLoop1) then
-
-             ! Project iStart and iEnd onto s2
-             jStart = s1%otherID(2, iStart)
-             jEnd   = s1%otherID(2, iEnd)
-
-          else
-
-             ! part of s1 is attached to part of s2
-
-             nIElemsBeg = elemsForRange(s1, iStart, iEnd, .True.)
-             nJElemsBeg = elemsForRange(s2, jStart, jEnd, .False.)
-
-             ! This the "projection" of the 'j' string on the 'i'
-             ! string. Basically this is the range the 'j' string
-             ! wants to "use up" on the i string.
-             iStart_j = s2%otherID(2, jStart)
-             iEnd_j   = s2%otherID(2, jEnd)
-
-             ! These could match up with iStart and iEnd or they could
-             ! not. That's what we need to determine here.
-
-             ! Need to check if iStart_j is "larger" than istart. We
-             ! just increment using nextNode to take care of periodic
-             ! boundaries.
-             if (iStart_j /= iStart) then
-                ! The starting points are different. Increment iStart
-                ! until we find
-                i = iStart
-                do ii=1, nIElemsBeg
-                   i = nextNode(s1, i)
-                   if (i == iStart_j) then
-                      iStart = iStart_j
-                      exit
-                   end if
-                end do
-             end if
-
-             if (iEnd_j /= iEnd) then
-                ! The starting points are different. Decrement jEnd
-                ! until we find
-                i = iEnd
-                do ii=1, nIElemsBeg
-                   i = prevNode(s1, i)
-                   if (i == iEnd_j) then
-                      iEnd = iEnd_j
-                      exit
-                   end if
-                end do
-             end if
-
-             ! Now with the updated range. Project the iRange back to the
-             ! the final J range.
-             jStart = s1%otherID(2, iStart)
-             jEnd   = s1%otherID(2, iEnd)
-
-          end if
-
-          if (debugZipper) then
-             print *,'Zipping string: ', s1%myid, ' with ', s2%myid
-             print *,'s1 range:', istart, iend
-             print *,'s2 range:', jstart, jend
-          end if
-
-          ! Before we do the zip, make sure the ranges have not
-          ! degenerated to 0 elements
-          nElem1 = 1 ! Doesn't matter, just not zero
-          nElem2 = 1 ! Doesn't matter, just not zero
-          if (.not. fullLoop1) then
-             nElem1 = elemsForRange(s1, iStart, iEnd, .True.)
-             if (iStart == iEnd) then
-                nElem1 = 0
-             end if
-
-          end if
-          if (.not. fullLoop2) then
-             nElem2 = elemsForRange(s2, jStart, jEnd, .False.)
-             if (jStart == jEnd) then
-                nElem2 = 0
-             end if
-          end if
-
-          if  (nElem1 > 0 .and. nElem2 > 0) then
-             ! Do actual cross zip if we still have elements left on both strings
-             call crossZip(s1, iStart, iEnd, s2, jStart, jEnd, debugZipper, failed)
-
-             ! If we succefully cross zippered what we were suppoed to
-             ! flag all the nodes from the original region as done.
-             if (.not. failed) then
-                call flagNodesUsed(s1, iStart_orig, iEnd_orig, .True.)
-                call flagNodesUsed(s2, jStart_orig, jEnd_orig, .False.)
-             else
-                ! UhOh. We got stopped part way through. Flag just the
-                ! nodes at the beginning that we didn't use. Leave
-                ! those for the pocket. The nodes up to where theh
-                ! cross zip stopped were flagged internally in cross zip.
-                call flagNodesUsed(s1, iStart_orig, iStart, .True.)
-                call flagNodesUsed(s2, jStart_orig, jStart, .False.)
-             end if
-          else
-
-             ! Flag the full range of elements are consumed even
-             ! though we didn't do the cross zip. Leave it for the
-             ! pocket zipping.
-             call flagNodesUsed(s1, iStart_orig, iEnd_orig, .True.)
-             call flagNodesUsed(s2, jStart_orig, jEnd_orig, .False.)
-          end if
-
-          ! Find the next starting index:
-          curIStart = startNode(s1)
-
-       end do
-    end do strLoop
-
-  contains
-
-    function startNode(s)
-      ! Determine the lowest index of a non-used xzip node for
-      ! string 's'.
-      implicit none
-      type(oversetString) :: s
-      integer(kind=intType) :: startNode, i
-
-      ! This will be the return value if all nodes are used:
-      startNode = 0
-      nodeLoop: do i=1, s%nNodes
-         if (s%xZipNodeUsed(i) == 0) then
-            startNode = i
-            exit nodeLoop
-         end if
-      end do nodeLoop
-    end function startNode
-
-    function nextNode(s, i)
-
-      implicit none
-      type(oversetString), intent(iN) :: s
-      integer(kind=intType), intent(in) :: i
-      integer(kind=intType) :: nextNode
-
-      ! Normally just increment:
-      nextNode = i + 1
-
-      if (i == s%nNodes) then
-         if (s%isPeriodic) then
-            nextNode = 1
-         else
-            ! Can't go any further
-            nextNode = i
-         end if
-      end if
-
-      ! If the next node is used. The next node is set the current
-      ! one.
-      if (s%xZipNodeUsed(nextNode) == 1) then
-         nextNode = i
-      end if
-    end function nextNode
-
-    function simpleNextNode(s, i)
-
-      implicit none
-      type(oversetString), intent(iN) :: s
-      integer(kind=intType), intent(in) :: i
-      integer(kind=intType) :: simpleNextNode
-
-      ! Normally just increment:
-      simpleNextNode = i + 1
-
-      if (i == s%nNodes) then
-         if (s%isPeriodic) then
-            simpleNextNode = 1
-         else
-            ! Can't go any further
-            simpleNextNode = i
-         end if
-      end if
-    end function simpleNextNode
-
-    function prevNode(s, i)
-
-      implicit none
-      type(oversetString), intent(iN) :: s
-      integer(kind=intType), intent(in) :: i
-      integer(kind=intTYpe) :: prevNode
-      ! Normally just increment:
-      prevNode = i - 1
-
-      if (i == 1) then
-         if (s%isPeriodic) then
-            prevNode = s%nNodes
-         else
-            ! Can't go any further
-            prevNode = i
-         end if
-      end if
-
-      ! If the next node is used. The next node is set the current
-      ! one.
-      if (s%xZipNodeUsed(prevNode) == 1) then
-         prevNode = i
-      end if
-    end function prevNode
-
-    subroutine traceMatch(s, iStart, pos, checkID, iEnd, fullLoop)
-
-      implicit none
-
-      ! Given a starting position 'iStart' on string 's', traverse in
-      ! the 'POSitive' or '.not. POSitive' direction checking that the
-      ! otherID still matches "checkID". Return the ending position
-      ! 'iEnd'.
-
-      ! Input/Output
-      type(oversetString) :: s
-      integer(kind=intType), intent(in) :: iStart, checkID
-      logical, intent(in) :: pos
-      integer(kind=intType), intent(out) :: iEnd
-      logical, intent(out) :: fullLoop
-
-      ! Working
-      integer(kind=intType) :: i, nextI
-
-      i = iStart
-      fullLoop = .False.
-
-      traverseLoop: do
-         if (pos) then
-            nextI = nextNode(s, i)
-         else
-            nextI = prevNode(s, i)
-         end if
-         if (nextI == i .or. s%otherID(1, nextI) /= checkID) then
-            ! We can't go any further than we already are
-            iEnd = i
-            exit traverseLoop
-         end if
-
-         ! Continue to the next one.
-         i = nextI
-
-         if (i == iStart) then
-            fullLoop = .True.
-            iEnd = i
-            exit traverseLoop
-         end if
-      end do traverseLoop
-    end subroutine traceMatch
-
-    subroutine flagNodesUsed(s, N1, N2, pos)
-
-      implicit none
-
-      ! Input/Output
-      type(oversetString) :: s
-      integer(kind=intType), intent(in) :: N1, N2
-      logical, intent(in) :: pos
-
-      ! Working
-      integer(kind=intType) :: nSteps, i, nextI
-
-      if (pos) then
-         if (N2 > N1) then
-            nSteps = N2 - N1
-         else if (N2 < N1) then
-            nSteps = N2 + s%nNodes - N1
-         else ! N1 == N2
-            nSteps = s%nElems
-         end if
-      else
-         if (N1 < N2) then
-            nSteps = N1 + s%nNodes - N2
-         else if (N1 > N2) then
-            nSteps = N1 - N2
-         else ! N3 == N4
-            nSteps =  s%nElems
-         end if
-      end if
-
-      s%xZipNodeUsed(N1) = 1
-      i = N1
-      do ii=1, nSteps
-         if (pos) then
-            nextI = nextNode(s, i)
-         else
-            nextI = prevNode(s, i)
-         end if
-
-         s%xZipNodeUsed(nextI) = 1
-         i = nextI
-      end do
-    end subroutine flagNodesUsed
-
-    function elemsForRange(s, N1, N2, pos)
-      ! Determine the number of elements between N1 and N2 for for the
-      ! "POSitive" or "not POSIitive" (negative) direction.
-
-      implicit none
-      type(oversetString) :: s
-      integer(kind=intType), intent(in) :: N1, N2
-      logical :: pos
-      integer(kind=intType) :: elemsForRange
-
-      if (.not. s%isPeriodic) then
-         if (pos) then
-            elemsForRange = N2 - N1
-         else
-            elemsForRange = N1 - N2
-         end if
-      else ! Periodic
-         if (pos) then
-            if (N2 == N1) then
-               elemsForRange = s%nElems
-            else if (N2 > N1) then
-               elemsForRange = N2 - N1
+            ! Input/Output
+            real(kind=realType), dimension(3), intent(in) :: vec1, vec2
+            real(kind=realType) :: vecAngle
+
+            ! Working
+            real(kind=realType), dimension(3) :: vecA, vecB
+
+            vecA = vec1 / mynorm2(vec1)
+            vecB = vec2 / mynorm2(vec2)
+
+            vecAngle = acos(dot_product(vecA, vecB))
+
+        end function vecAngle
+
+        function elemBetweenNodes(str, a, b)
+            implicit none
+
+            ! Input/Output
+            type(oversetString), intent(in) :: str
+            integer(kind=intType), intent(in) :: a, b
+            integer(kind=intType) :: elemBetweenNodes
+
+            ! Working
+            integer(kind=intType) :: e1, e2, e3, e4
+
+            if (str%nte(1, a) == 1) then
+                e1 = str%nte(2, a)
+                e2 = e1
+            else
+                e1 = str%nte(2, a)
+                e2 = str%nte(3, a)
+            end if
+
+            if (str%nte(1, b) == 1) then
+                e3 = str%nte(2, b)
+                e4 = e3
             else
-               elemsForRange = N2 + s%nNodes - N1
+                e3 = str%nte(2, b)
+                e4 = str%nte(3, b)
             end if
-         else
-            if (N1 == N2) then
-               elemsForRange = s%nElems
-            else if (N1 > N2) then
-               elemsForRange = N1 - N2
+
+            ! Two of the edges are the same. And this is the one that must
+            ! be between the two nodes.
+
+            if (e1 == e3 .or. e1 == e4) then
+                elemBetweenNodes = e1
             else
-               elemsForRange = N1 + s%nNodes - N2
-            end if
-         end if
-      end if
-    end function elemsForRange
-
-  end subroutine makeCrossZip
-
-  subroutine makePocketZip(p, strings, nStrings, pocketMaster, debugZipper)
-    use constants
-    use oversetData, only : oversetString, oversetEdge
-    use oversetUtilities, only : qsortEdgeType
-    use kdtree2_module
-    implicit none
-
-    ! Input/output
-    integer(kind=intType), intent(in) :: nStrings
-    type(oversetString), intent(in) :: p, strings(nStrings)
-    type(oversetString) :: pocketMaster
-    logical, intent(in) :: debugZipper
-
-    ! Local variables
-    integer(kind=intType) :: i, j, nsum1, nsum2, ndiff1, ndiff2, ipedge, icur
-    integer(kind=intType) :: n1, n2, npolyEdges
-    integer(kind=intType) :: nNodes1, nNodes2, cn1, cn2, str1, str2
-    type(oversetEdge), allocatable, dimension(:) :: polyEdges
-    type(oversetEdge) :: e1, e2
-    type(oversetString), pointer  :: stringsLL, str
-    integer(kind=intType) :: npocketEdges, nFullStrings, nNodes
-    integer(kind=intType) :: ip, curElem, nElems, iStart, firstElem
-    type(oversetString), allocatable, dimension(:), target :: pocketStringsArr
-
-    ! ---------------------------------------------------------------
-    ! PocketZip 1:
-    ! First sort the edges.
-    ! ---------------------------------------------------------------
-    call qsortEdgeType(p%Edges, p%nEdges)
-
-    ! Now gather up the left-over edges for pocket zipping.
-
-    ! Over estimate of remaining pocket edges to zip
-    allocate(polyEdges(p%nEdges))
-
-    ! Eliminate the edges going through the ordered edges.
-    ! The sorted opposite edges are canceled in pairs.
-    npolyEdges = 0
-    i = 1
-    do while (i <= p%nEdges)
-
-       if (i == p%nEdges) then
-          ! This must be the last free edge:
-          e1 = p%Edges(i)
-          npolyEdges = npolyEdges + 1
-          polyEdges(npolyEdges) = e1
-          i = i + 1
-          cycle
-       end if
-
-       ! Two edges in sequence
-       e1 = p%Edges(i)
-       e2 = p%Edges(i+1)
-
-       ! First determine if e1 is at the end of two single ended
-       ! chains. In this case the edge *will* not be paired and that's
-       ! correct.
-
-       str1    = p%cNodes(1, e1%n1) ! node1's child fullStrings ID
-       cn1     = p%cNodes(2, e1%n1) ! node1's child fullStrings node index
-       nNodes1 = strings(str1)%nNodes ! node1's child fullStrings nNodes size
-
-       str2    = p%cNodes(1, e1%n2) ! node2's child fullStrings ID
-       cn2     = p%cNodes(2, e1%n2) ! node2's child fullStrings node index
-       nNodes2 = strings(str2)%nNodes ! node2's child fullStrings nNodes size
-
-       if (str1 /= str2 ) then
-          if (.not.strings(str1)%isperiodic .and. &
-               .not.strings(str2)%isperiodic .and. &
-               (cn1==1 .or. cn1==nNodes1) .and. (cn2==1 .or. cn2==nNodes2)) then
-             ! Increment just 1 in 1 to skip over edge e1.
-             i = i + 1
-             cycle
-          end if
-       end if
-
-       ! The sum and difference:
-       nsum1 = e1%n1 + e1%n2
-       nsum2 = e2%n1 + e2%n2
-
-       ndiff1 = e1%n2 - e1%n1
-       ndiff2 = e2%n2 - e2%n1
-
-       if (nsum1 == nsum2 .and. ndiff1 + ndiff2 == 0) then
-          ! These edges cancel. Great.
-          i = i + 2
-          cycle
-       else
-          ! Add just the first edge
-          npolyEdges = npolyEdges + 1
-          polyEdges(npolyEdges) = e1
-          i = i + 1
-       end if
-    end do
-
-    ! Define pocketMaster string
-    call nullifyString(pocketMaster)
-    pocketMaster%myID = 88
-    pocketMaster%nElems = nPolyEdges
-    pocketMaster%nNodes = nPolyEdges*2
-    pocketMaster%nEdges = 0
-    allocate(pocketMaster%nodeData(10, 2*nPolyEdges), &
-         pocketMaster%intNodeData(3, 2*nPolyEdges), &
-         pocketMaster%conn(2, nPolyEdges))
-
-    ! Dump the data into the pocketMaster
-    do i=1, nPolyEdges
-       pocketMaster%nodeData(:, 2*i-1) = p%nodeData(:, polyEdges(i)%n1)
-       pocketMaster%intNodeData(:, 2*i-1) = p%intNodeData(:, polyEdges(i)%n1)
-
-       pocketMaster%nodeData(:, 2*i) = p%nodeData(:, polyEdges(i)%n2)
-       pocketMaster%intNodeData(:, 2*i) = p%intNodeData(:, polyEdges(i)%n2)
-       pocketMaster%conn(:, i) = (/2*i, 2*i-1/)
-    end do
-
-    call setStringPointers(pocketMaster)
-    call reduceGapString(pocketMaster)
-    call createNodeToElem(pocketMaster)
-
-    ! The next step is to create ordered strings based on the
-    ! connectivity. This is a purely logical operation. We don't know
-    ! how many actual strings we will need so we will use a linked
-    ! list as we go.
-
-    ! Allocate some additional arrays we need for doing the chain
-    ! searches.
-    nElems = pocketMaster%nElems
-    nNodes = pocketMaster%nNodes
-    allocate(pocketMaster%elemUsed(nElems), pocketMaster%subStr(2, nElems), &
-         pocketMaster%cNodes(2, nNodes))
-
-    pocketMaster%cNodes = 0
-    pocketMaster%elemUsed = 0
-    curElem = 1
-    nFullStrings = 0
-
-    do while (curElem < pocketMaster%nElems)
-
-       ! Arbitrarily get the first node for my element:
-       iStart = pocketMaster%conn(1, curElem)
-       nElems = pocketMaster%nte(1, iStart)
-
-       firstElem = pocketMaster%nte(2, iStart)
-       pocketMaster%subStr(1, 1) = firstElem
-       call doChain(pocketMaster, iStart, 1)
-
-       ! We now have a boundary string stored in master%subString(1,
-       ! :nSubStr(1)). These are actually the element numbers of the
-       ! master that form a continuous chain.
-
-       ! Create or add a new string to our linked list
-       ! "stringsLL".
-       if (nFullStrings == 0) then
-          allocate(stringsLL)
-          nFullStrings = 1
-          stringsLL%next => stringsLL
-          str => stringsLL
-       else
-          allocate(str%next)
-          str%next%next => stringsLL
-          str => str%next
-          nFullStrings = nFullStrings + 1
-       end if
-
-       ! Create a substring from master based on the elements we
-       ! have in the buffer
-       call createSubStringFromElems(pocketMaster, str, nFullStrings)
-
-       ! Scan through until we find the next unused element:
-       do while((pocketMaster%elemUsed(curElem) == 1) .and. &
-            (curElem < pocketMaster%nElems))
-          curElem = curElem + 1
-       end do
-    end do
-
-    ! Temporary strings array for plotting and pocketZipping
-    allocate(pocketStringsArr(nFullStrings))
-    str => stringsLL
-    i = 0
-    do while(i < nFullStrings)
-       i = i + 1
-       pocketStringsArr(i) = str ! Derived type assignment
-       call nullifyString(str)
-       str => str%next
-    end do
-
-    ! Allocate space for pocket triangles.
-    ! (n-sided polygon -> n-2 triangles)
-    allocate(pocketMaster%tris(3, 10*pocketMaster%nElems))
-    allocate(pocketMaster%edges(4*pocketMaster%nElems))
-    pocketMaster%nTris = 0
-
-    ! Build the pocketMaster tree
-    pocketMaster%tree => kdtree2_create(pocketMaster%x, sort=.True.)
-
-    if (debugZipper) then
-       open(unit=101, file="strings_pocket.dat", form='formatted')
-       write(101,*) 'TITLE = "PocketStrings Data" '
-
-       write(101,*) 'Variables = "X" "Y" "Z" "Nx" "Ny" "Nz" "Vx" "Vy" "Vz" "ind" &
-            &"gapID" "gapIndex" "otherID" "otherIndex" "ratio"'
-       do i=1, nFullStrings
-          ! Temporarily allocate otherID
-          allocate(pocketStringsArr(i)%otherID(2, pocketStringsArr(i)%nNodes))
-          pocketStringsArr(i)%otherID = -1
-
-          call writeOversetString(pocketStringsArr(i), pocketStringsArr, &
-               nFullStrings, 101)
-       end do
-       close(101)
-    end if
-
-    ! Loop over pocketStrings and begin pocketZip starting
-    ! from smallest convex ear.
-    do i=1,nFullStrings
-       if (debugZipper) then
-          print *,'Pocket Zipping String ', i, ' of ', nFullStrings
-       end if
-       pocketZiploop: do while (pocketStringsArr(i)%nNodes > 2)
-          ! Each pass zips one triangle. Keep zipping
-          ! until last triangle is zipped in the pocket polygon.
-          call pocketZip(pocketStringsArr(i))
-       end do pocketZiploop
-    end do
-
-    ! Destroy the strings array
-    do i=1, nFullStrings
-       call deallocateString(pocketStringsArr(i))
-    end do
-    deallocate(pocketStringsArr, polyEdges)
-
-  end subroutine makePocketZip
-
-  subroutine pocketZip(s)
-
-    use constants
-    use kdtree2_module
-    use utils, only : mynorm2, cross_prod
-
-    implicit none
-
-    ! Input parameters
-    type(oversetString), intent(inout), target :: s
-
-    ! Local variables
-    integer(kind=intType) :: i, j, k, ii, im1, ip1, N
-    integer(kind=intType) :: nNodes, nElems, iimin
-    real(kind=realType), dimension(3) :: v1, v2, norm, c
-    real(kind=realType) :: cosCutoff, cosTheta, r2, v1nrm, v2nrm, costhetaMax
-    real(kind=realType) :: dp, dpMax
-
-    integer(Kind=intType), dimension(:), allocatable :: nodeMap, badNode
-    type(kdtree2_result), dimension(:), allocatable  :: results
-    logical :: added, iiMinSet
-    real(kind=realType), parameter:: fact=0.95_realType
-    N = s%nNodes
-    allocate(results(25), nodeMap(N), badNode(N))
-    nodeMap = 1
-    badNode = 0 ! Will become 1 if bad
-    outerZiploop: do
-
-       ! No choice for the last triangle:
-       if (N==3) then
-          ii = 1
-          im1 = prevNode(ii)
-          ip1 = nextNode(ii)
-          ! We don't call addPotentialTriangle because we don't have a
-          ! choice anymore. Just call the raw addTri command
-          call addTri(ip1, s, ii, s, im1, s)
-          ! and flag the node as gone
-          nodeMap(ii) = 0
-          exit outerZipLoop
-       end if
-
-       iiMinSet = .False.
-
-       ! First find the largest dot product:
-       dpMax = -one
-       nodeloop1: do ii=1, N
-
-          if (badNode(ii) == 1) then
-             cycle nodeLoop1
-          end if
-
-          ip1 = nextNode(ii)
-          im1 = prevNode(ii)
-
-          ! Determine the angle between the vectors
-          v1 = s%x(:, im1) - s%x(:, ii)
-          v2 = s%x(:, ip1) - s%x(:, ii)
-          v1nrm = mynorm2(v1)
-          v2nrm = mynorm2(v2)
-          call cross_prod(v2, v1, norm)
-          norm = norm / mynorm2(norm)
-          dpMax = max(dpmax, dot_product(norm, s%norm(:, ii)))
-       end do nodeloop1
-
-       ! Next find the largest cosTheta that is winthin a factor
-       !  of dpMax
-       costhetaMax = -Large
-       nodeloop2: do ii=1, N
-
-          if (badNode(ii) == 1) then
-             cycle nodeLoop2
-          end if
-
-          ip1 = nextNode(ii)
-          im1 = prevNode(ii)
-
-          ! Determine the angle between the vectors
-          v1 = s%x(:, im1) - s%x(:, ii)
-          v2 = s%x(:, ip1) - s%x(:, ii)
-          v1nrm = mynorm2(v1)
-          v2nrm = mynorm2(v2)
-          call cross_prod(v2, v1, norm)
-          norm = norm / mynorm2(norm)
-          dp = dot_product(norm, s%norm(:, ii))
-          if (dp > dpMax*fact) then ! We take this
-             costheta = dot_product(v1, v2)  / (v1nrm * v2nrm)
-             if (cosTheta > cosThetaMax) then
-                costhetaMax = costheta
-                iiMinSet = .True.
-                iimin = ii
-             end if
-          end if
-       end do nodeloop2
-
-       if (iiMinSet) then
-          ! Zip about node "iimin" if it was set:
-          ii = iimin
-          ip1 = nextNode(ii)
-          im1 = prevNode(ii)
-          call addPotentialTriangle(s, ip1, ii, im1, nodeMap, results, added)
-          if (added) then
-             ! This triangle was good!
-             exit outerZipLoop
-          else
-             ! Bad node. Need to cycle through rest of pocket nodes.
-             ! Remember this bad node in next cycle.
-             badNode(ii) = 1
-             cycle outerZiploop
-          end if
-       else
-          ! What does this mean? We didn't find any node to zip. Are they all bad?
-          print *,'Problem with pocket zipper. Somehow we were not able to find "&
-               &"node to add a triangle on. This should not happen. Contact the "&
-               &"Developers.'
-          stop
-       end if
-    end do outerZiploop
-
-    ! Modify the pocketStrings to remove the two elements and the node
-    ! that got eliminated due to pocketZipping.
-    call shortenString(s, nodeMap)
-    deallocate(nodeMap, badNode, results)
-
-  contains
-    function nextNode(ii)
-      implicit none
-      integer(kind=intType) :: ii, nextNode
-      nextNode = ii + 1
-      if (ii == N) then
-         nextNode = 1
-      end if
-    end function nextNode
-
-    function prevNode(ii)
-      implicit none
-      integer(kind=intType) :: ii, prevNode
-      prevNode = ii - 1
-      if (ii == 1) then
-         prevNode = N
-      end if
-    end function prevNode
-  end subroutine pocketZip
-
-  subroutine computeTriSurfArea(master, area)
-
-    ! Computes area sum of all triangles belonging to object master
-    use constants
-    use utils, only : mynorm2, cross_prod
-    implicit none
-
-    ! Input parameters
-    type(oversetString), intent(in) :: master
-    real(kind=realType), intent(out) :: area
-
-    ! Local variables
-    integer(kind=intType) :: i, n1, n2, n3
-    real(kind=realType), dimension(3) :: v1, v2, norm
-
-    area = 0.0
-    do i=1, master%nTris
-       n1 = master%tris(1, i)
-       n2 = master%tris(2, i)
-       n3 = master%tris(3, i)
-
-       v1 = master%x(:, n2) - master%x(:, n1)
-       v2 = master%x(:, n3) - master%x(:, n1)
-       call cross_prod(v1, v2, norm)
-       area = area + half*mynorm2(norm)
-    end do
-
-  end subroutine computeTriSurfArea
-
-  function triOverlap(pt1, pt2, pt3, str, i1, i2)
-
-    use constants
-    use utils, only : mynorm2, cross_prod
-    implicit none
-
-    ! Input/Output
-    real(kind=realType), dimension(3), intent(in) :: pt1, pt2, pt3
-    integer(kind=intType), intent(in) :: i1, i2
-    type(oversetString), intent(in) :: str
-
-    ! Working
-    logical :: triOverlap, inTri
-    integer(kind=intType) :: i
-    real(kind=realType) :: triNorm(3)
-
-    ! Note: This is a dumb loop. We need to do a spatial serch here to
-    ! only check the nodes around the current point.
-
-    call cross_prod(pt2-pt1, pt3-pt1, triNorm)
-    triNorm = triNorm / mynorm2(triNorm)
-
-    triOverlap = .False.
-    do i=1, str%nNodes
-       if (i /= i1 .and. i/= i2) then
-          if (dot_product(str%norm(:, i), triNorm) > 0.8) then
-             call pointInTriangle(pt1, pt2, pt3, str%x(:, i), inTri)
-             if (inTri) then
-                triOverlap = .true.
-                exit
-             end if
-          end if
-       end if
-    end do
-  end function triOverlap
-
-  subroutine shortenString(s, nodeMap)
-
-    ! This is an auxilary routine that take a string 's', and a node
-    ! map of len s%nNodes, with 1 or 0. A 1 means that the node will
-    ! be in the shortened string, 0 means that the node should be
-    ! deleted.
-    use constants
-    implicit none
-
-    ! Input/Output
-    type(oversetString) :: s
-    integer(kind=intType), dimension(:), intent(inout) :: nodeMap
-
-    ! Working
-    integer(kind=intType) :: nNodes, nElems, nRemoved, i, j
-    real(kind=realType), dimension(:, :), pointer :: nodeDataTmp
-    integer(kind=intType), dimension(:, :), pointer :: connTmp, intNodeDataTmp
-    integer(kind=intType), dimension(:), pointer :: pNodesTmp
-
-
-    ! Now we will modify our string to remove the elements and nodes
-    ! that got knocked off due to self zipping. This way the calling
-    ! process still sees the same string, it just gets a little
-    ! shorter.
-
-    ! Save pointers to existing data
-    nNodes = s%nNodes
-    nElems = s%nElems
-    nodeDataTmp => s%nodeData
-    intNodeDataTmp => s%intNodeData
-    connTmp => s%conn
-    pNodesTmp => s%pNodes
-
-    ! Convert the nodeMap which currently contains a one if the node
-    ! still exists and 0 if it doesn't. This will convert it to the new
-    ! node numbers. Ie nodeMap(i) gives the new node index of the
-    ! shorted chain. If nodeMap(i) = 0, it is no longer part of the
-    ! chain.
-    j = 0
-    nRemoved = 0
-    do i=1, s%nNodes
-       if (nodeMap(i) == 1) then
-          j = j + 1
-          nodeMap(i) = j
-       else
-          nRemoved = nRemoved + 1
-       end if
-    end do
-
-    !  Update the cNodes in the parent so they point to the updated node
-    ! numbers. Note that the nodes that have been eliminated, have cNode
-    ! = 0, which will identify that it no longer has a child node.
-    do i=1, s%nNodes
-       s%p%cNodes(:, s%pNodes(i)) = (/s%myID, nodeMap(i)/)
-    end do
-
-    ! Update the number of nodes/elems in our shorted chain. Every
-    ! zipper reduces the number of nodes and number of elems by 1
-    s%nNodes = s%nNodes - nRemoved
-    s%nElems = s%nElems - nRemoved
-
-    allocate(s%nodeData(10, s%nNodes), s%intNodeData(3, s%nNodes), &
-         s%pNodes(s%nNodes), s%conn(2, s%nElems))
-
-    ! Set the pointers for the new string
-    call setStringPointers(s)
-
-    do i=1, nNodes
-       if (nodeMap(i) /= 0) then
-          s%nodeData(:, nodeMap(i)) = nodeDataTmp(:, i)
-          s%intNodeData(:, nodeMap(i)) = intNodeDataTmp(:, i)
-          s%pNodes(nodeMap(i)) = pNodesTmp(i)
-       end if
-    end do
-
-    ! Since we know the string was in order, we can simply redo the connectivity
-    do i=1, s%nElems
-       s%conn(:, i) = (/i, i+1/)
-    end do
-
-    if (s%isPeriodic) then
-       s%conn(2, s%nElems) = 1
-    end if
-
-    ! Dellocate the existing memory
-    deallocate(nodeDataTmp, intNodeDataTmp, connTmp, pNodesTmp)
-
-    ! Recreate the node to elem
-    if (s%nNodes >=3 ) then
-       call createNodeToElem(s)
-    end if
-
-  end subroutine shortenString
-
-  subroutine addPotentialTriangle(s, im1, ii, ip1, nodeMap, results, added)
-
-    ! Common routine (for pocketZip and selfZip) to potentially add a
-    ! triangle resulting from a single string.
-    use constants
-    use kdtree2_priority_queue_module
-    use kdtree2_module
-    implicit none
-
-    ! Input/Output
-    type(oversetString) :: s
-    integer(kind=intType), intent(in) :: im1, ii, ip1
-    integer(kind=intType), intent(inout), dimension(:) :: nodeMap
-    type(kdtree2_result), dimension(:), allocatable :: results
-    logical, intent(out) :: added
-    ! Working:
-    real(kind=realType) :: r2
-    real(kind=realType), dimension(3) :: v1, v2, norm, c
-    integer(kind=intType) ::  nFound, nalloc, idx, k, j, i
-    logical :: overlapFound, inTri
-
-    ! We may have a valid triangle. We need to make sure we
-    ! don't overlap anyone else.
-    !
-    ! xim1 +
-    !      | \
-    !      |   \
-    !      |     c
-    !      |       \
-    !      |         \
-    !      +----------+
-    !      xi         xip1
-    ! We do a ball search based at 'c' which is just the
-    ! (average of xip1 and xim1) using a radius defined as the
-    ! maximum of (the distance between 'c' and 'xi', half
-    ! length of xip1 to xim1)
-    !
-    added = .False.
-
-    c = half*(s%x(:, ip1) + s%x(:, im1))
-    r2 = (c(1) - s%x(1, ii))**2 +  (c(2) - s%x(2, ii))**2 +  (c(3) - s%x(3, ii))**2
-
-    r2 = max(r2, (s%x(1, ip1) - s%x(1, im1))**2 + (s%x(2, ip1) - s%x(2, im1))**2 + &
-         (s%x(3, ip1) - s%x(3, im1))**2)
-
-    nFound = 0
-    outerLoop: do
-       nalloc = size(results)
-       call kdtree2_r_nearest(s%p%tree, c, r2, nfound, nalloc, results)
-       if (nFound < nAlloc) then
-          exit outerLoop
-       end if
-
-       ! Allocate more space and keep going
-       deallocate(results)
-       nAlloc = nAlloc * 2
-       allocate(results(nAlloc))
-    end do outerLoop
-
-
-    ! We can now be sure that we have all the points inside our
-    ! ball. Next we proceed to systematically check them.
-    overlapFound = .False.
-    nodeFoundLoop: do k=1, nFound
-       ! Note that we do check nodes from our own string,
-       ! except for the the three nodes we're dealing
-       ! with. Remember that we are working in our parent's
-       ! ording here.
-       idx = results(k)%idx
-
-       notPartofTriangle: if (idx /= s%pNodes(im1) .and. &
-            idx /= s%pNodes(ii) .and. idx /= s%pNodes(ip1)) then
-
-          ! Only check if the node normal of the point we're
-          ! checking is in the same direction as the triangle.
-          if (dot_product(s%norm(:, ii), s%p%norm(:, idx)) > zero) then
-
-             ! Finally do the actual trianlge test
-             call pointInTriangle(s%x(:, ip1), s%x(:, ii), s%x(:, im1), &
-                  s%p%x(:, idx), inTri)
-             if (inTri) then
-                ! As soon as 1 is in the triangle, we know the
-                ! triangle is no good.
-                overlapFound = .True.
-                exit nodeFoundLoop
-             end if
-          end if
-       end if notPartofTriangle
-    end do nodeFoundLoop
-
-    if (.not. overlapFound) then
-
-       ! This triangle is good!
-       added = .True.
-
-       ! Call the generic addTri Routine. Here all the ndoes from the
-       ! triangle come from the same string.
-       call addTri(ip1, s, ii, s, im1, s)
-
-       ! Flag this node as gone
-       nodeMap(ii) = 0
-
-    end if
-  end subroutine addPotentialTriangle
-
-  subroutine closestSymmetricNode(s1, s2, i, j)
-    use constants
-    use utils, only : mynorm2
-
-    implicit none
-
-    ! Input/Output
-    type(oversetString) :: s1, s2
-    integer(kind=intType), intent(out) :: i, j
-    real(kind=realType) :: minDist, dist
-    integer(kind=intType) :: ii
-
-    ! Working:
-    minDist = large
-
-    do ii=1, s1%nNodes
-       ! "The other index of the matching node on the other string is
-       ! me" ie. "I point to you and you point to me"
-
-       if (s2%otherID(2, s1%otherID(2, ii)) == ii) then
-
-          dist = mynorm2(s1%x(:, ii) - s2%x(:, s1%otherID(2, ii)))
-
-          if (dist < minDist) then
-             minDist = dist
-             i = ii
-             j = s1%otherID(2, ii)
-          end if
-       end if
-    end do
-
-  end subroutine closestSymmetricNode
-
-  subroutine stringMatch(strings, nStrings, debugZipper)
-    use constants
-    use kdtree2_priority_queue_module
-    use kdtree2_module
-    use utils, only : mynorm2
-
-    implicit none
-
-    ! Input/output
-    integer(kind=intType), intent(in) :: nStrings
-    type(oversetString), dimension(nstrings), target :: strings
-    logical, intent(in) :: debugZipper
-
-    ! Working
-    integer(kind=intType) :: i, j, k, idx, oid(4)
-    integer(kind=intType) ::  nAlloc, nUnique, nSearch
-    type(kdtree2_result), allocatable, dimension(:) :: results
-    type(oversetString), pointer ::  str, master
-    logical :: checkLeft, checkRight, concave
-    logical :: checkLeft2, checkRight2, concave2
-    logical :: leftOK, rightOK, isEndNode
-    real(kind=realType), dimension(3) :: xj, xjp1, xjm1, normj
-    real(kind=realType), dimension(3) :: xk, xkp1, xkm1, normk
-    real(kind=realType), dimension(3) :: myPt, otherPt, eNorm
-    real(kind=realType) ::  fact, dStar, curDist, minDist, edgeLength
-    integer(kind=intTYpe) :: otherID, otherIndex, closestOtherIndex, closestOtherString
-    integer(kind=intType) :: id, index
-    real(kind=realType) :: timeA,  pt(3),   v(3), cosTheta,  cutOff, dist, maxH, ratio
-
-    if (nStrings == 0) then
-       return
-    end if
-
-    ! Now make we determine the nearest point on another substring
-    ! for each point.
-    nAlloc = 50
-    allocate(results(nAlloc))
-    master => strings(1)%p
-
-    ! Loop over the fullStrings
-    do i=1, nStrings
-       str => strings(i) ! Easier readability
-
-       ! No need to do anything with the pocket string.
-       if (str%isPocket) then
-          cycle
-       end if
-
-       ! Allocate space for otherID as it is not done yet
-       if (associated(str%otherID)) then
-          deallocate(str%otherID)
-       end if
-
-       allocate(str%otherID(2, str%nNodes))
-       str%otherID = -1
-
-       ! Loop over my nodes and search for it in master tree
-       nodeLoop:do j=1, str%nNodes
-
-          ! Set the initial maximum number of neighbours
-          ! This can be at most the total number of nodes
-          nSearch = min(nAlloc, master%nNodes)
-
-          ! We have to be careful since single-sided chains have only
-          ! 1 neighbour at each end.
-
-          call getNodeInfo(str, j, checkLeft, checkRight, concave, &
-               xj, xjm1, xjp1,  normj)
-          isEndNode = .False.
-          if (.not. (checkLeft .eqv. checkRight)) then
-             ! Since we don't need to check one side, this means we're
-             ! at the end of the chain. This is important since this
-             ! node *MUST* be attached to another node on another
-             ! chain at the end
-             isEndNode = .True.
-          end if
-
-          outerLoop: do
-             minDist = large
-             closestOtherIndex = -1
-             call kdtree2_n_nearest(master%tree, xj, nSearch, results)
-
-             ! Only check edges connected to nodes within the
-             ! distance the maximum element size of my self or the
-             ! closest node. We put in a fudge factor of 1.5.
-
-             innerLoop: do k=1, nSearch
-
-                ! Since we know the results are sorted, if the
-                ! distance(k) > than our current minDist, we can stop
-                ! since there is no possible way that any of the
-                ! remaining points can be closer given that the modified
-                ! D* is always larger than the original D
-
-                ! Extract current information to make things a little
-                ! easier to read
-                curDist = sqrt(results(k)%dis)
-                idx = results(k)%idx
-                pt = master%x(:, idx)
-
-                ! ---------------------------------------------
-                ! Exit Condition: We can stop the loop if the current
-                ! uncorrected distance is larger than our current
-                ! minimum. This guarantees the minimum corrected
-                ! distance is found.
-                ! ---------------------------------------------
-
-                if (curDist > minDist) then
-                   exit outerLoop
+                elemBetweenNodes = e2
+            end if
+
+        end function elemBetweenNodes
+
+        function triArea(pt1, pt2, pt3)
+
+            use constants
+            use utils, only: myNorm2, cross_prod
+            implicit none
+
+            ! Input/Output
+            real(kind=realType), intent(in), dimension(3) :: pt1, pt2, pt3
+            real(kind=realType) :: triArea
+
+            ! Working
+            real(kind=realType), dimension(3) :: norm
+
+            call cross_prod(pt2 - pt1, pt3 - pt1, norm)
+            triArea = half * mynorm2(norm)
+
+        end function triArea
+
+    end subroutine crossZip
+
+    subroutine addTri(A, sA, B, sB, C, sC)
+
+        ! Form a triangle from index 'A' on string 'sA' , index 'B' on
+        ! string 'sB' and index 'C' on string 'sC'
+
+        use constants
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: A, B, C
+        type(oversetString), intent(in) :: sA, sB, sC
+
+        ! Working
+        type(oversetString), pointer :: p
+        integer(kind=intType) :: mn1, mn2, mn3
+        p => sA%p
+
+        p%nTris = p%nTris + 1
+
+        ! mn = master node
+        mn1 = sA%pNodes(A)
+        mn2 = sB%pNodes(B)
+        mn3 = sC%pNodes(C)
+
+        p%tris(:, p%nTris) = (/mn1, mn2, mn3/)
+
+        ! Add these three edges to master list of edges
+
+        ! Edge 1:
+        p%nEdges = p%nEdges + 1
+        p%edges(p%nEdges)%n1 = mn1
+        p%edges(p%nEdges)%n2 = mn2
+
+        ! Edge 2:
+        p%nEdges = p%nEdges + 1
+        p%edges(p%nEdges)%n1 = mn2
+        p%edges(p%nEdges)%n2 = mn3
+
+        ! Edge 3:
+        p%nEdges = p%nEdges + 1
+        p%edges(p%nEdges)%n1 = mn3
+        p%edges(p%nEdges)%n2 = mn1
+
+    end subroutine addTri
+
+    subroutine makeCrossZip(p, strings, nStrings, debugZipper)
+
+        use constants
+        implicit none
+
+        ! Input/output
+        integer(kind=intType), intent(in) :: nStrings
+        type(oversetString), intent(inout), target :: p, strings(nStrings)
+        type(oversetString), pointer :: s, s1, s2
+        logical, intent(in) :: debugZipper
+        ! Working
+        integer(kind=intType) :: i, iStart, iEnd, jStart, jEnd, iStart_j, iEnd_j
+        integer(kind=intType) :: curOtherID, iString, ii, nextI, curIStart, nIElems_j
+        integer(kind=intType) :: nIElemsBeg, nJElemsBeg, nElem1, nElem2
+        integer(kind=intType) :: iStart_orig, iEnd_orig, jStart_orig, jEnd_orig
+        logical :: fullLoop1, fullLoop2, dummy, failed
+        ! The purpose of this routine is to determine the ranges on two
+        !  paired strings that are continuously paired and suitable for
+        !  performing cross zipping.
+
+        ! Allocate arrays to keep track of nodes that have already been
+        !  used in cross zipping.
+        do i = 1, nstrings
+            s => strings(i)
+            allocate (s%XzipNodeUsed(s%nNodes))
+            s%xZipNodeUsed = 0
+        end do
+
+        strLoop: do iString = 1, nStrings
+
+            ! Skip strings that were pocekts
+            if (strings(iString)%isPocket) then
+                cycle
+            end if
+
+            ! S1 is the curent '1' string we are working with
+            s1 => strings(iString)
+
+            ! Find the lowest node number that isn't used:
+            curIStart = startNode(s1)
+            do while (curIStart > 0)
+
+                if (debugZipper) then
+                    print *, '------------------------------------------------'
+                    print *, 'Starting string ', s1%myid, 'at index ', curIstart
+                    print *, '------------------------------------------------'
                 end if
 
-                ! ---------------------------------------------
-                ! Check 1: If the node we found isn't on our
-                ! substring. we don't need to do anything
-                ! ---------------------------------------------
+                iStart = curIStart
+                ! Other ID is the string attached at the current pt.
+                curOtherID = s1%otherID(1, iStart)
 
-                if (master%cNodes(1, idx) == str%myID) then
-                   cycle innerLoop
+                if (curOtherID == -1) then
+                    print *, '*************************************************************************'
+                    print *, 'Error during makeCrossZip: Point ', iStart, 'does not have a matching point'
+                    print *, 'Position: ', s1%x(:, iStart)
+                    print *, '*************************************************************************'
+                    stop
                 end if
 
-                ! ---------------------------------------------
+                ! S2 is the current '2' string we are working with
+                s2 => strings(curOtherID)
+                jStart = s1%otherID(2, iStart)
+
+                ! ---------------- s1 increments -------------
+                ! The goal is to increment s1 as far as we can go in the
+                ! NEGATIVE direction.
+                call traceMatch(s1, iStart, .False., curOtherID, iEnd, fullLoop1)
+
+                if (.not. fullLoop1) then
+                    ! Now set iStart to iEnd. Basically we start right at the
+                    ! negative end the chain and traverse in the POSITIVE
+                    ! direction.
+                    iStart = iEnd
+                    call traceMatch(s1, iStart, .True., curOtherID, iEnd, dummy)
+                end if
 
-                ! Check 1b: If the node we found has been removed due
-                ! to self zipping, we can just keep going
-                ! --------------------------------------------
-                if (master%cNodes(2, idx) == 0) then
-                   cycle innerLoop
+                ! Now, iStart -> iEnd (in the positive order) is the maximum
+                ! possible extent that s1 could be connected to s1
+                ! over. However, s2 may have something to say about that. We
+                ! do the same operation for s2. Note that the orders are reversed.
+
+                ! ---------------- s2 increments -------------
+                call traceMatch(s2, jStart, .True., s1%myID, jEnd, fullLoop2)
+
+                ! If the first jnode isnt' actually matched to me, like I am
+                ! to him. Therefore skip me, and go to the next one.
+                if (jStart == jEnd .and. .not. fullLoop2 .and. &
+                    s2%otherID(1, jStart) /= s1%myID) then
+                    s1%xZipNodeUsed(curIStart) = 1
+                    curIStart = startNode(s1)
+                    cycle
                 end if
 
-                ! The first time we make it here, idx will be the
-                ! index of the closest node on another string that
-                ! isn't me.
-                if (closestOtherIndex == -1) then
-                   closestOtherString = master%cNodes(1, idx)
-                   closestOtherIndex = master%cNodes(2, idx)
+                if (.not. fullLoop2) then
+                    jStart = jEnd
+                    call traceMatch(s2, jStart, .False., s1%myID, jEnd, dummy)
                 end if
 
-                ! ---------------------------------------------
-                ! Check 2: Check if the node we found violates the
-                ! the "in front" test. For a concave corner TWO
-                ! triangle areas formed by the point and the two
-                ! edges must be positive. For a convex corner only
-                ! one of the triangle areas needs to be positive.
-                ! ---------------------------------------------
-                if (.not. nodeInFrontOfEdges(pt, concave, checkLeft, checkRight, &
-                     xj, xjm1, xjp1, normj)) then
-                   cycle innerLoop
+                if ((iStart == iEnd .and. .not. fullLoop1) .or. &
+                    (jStart == jEnd .and. .not. fullLoop2)) then
+                    ! Can't go anywhere. Flag this node and the next.
+
+                    s1%xZipNodeUsed(curIStart) = 1
+                    curIStart = startNode(s1)
+                    cycle
                 end if
 
-                ! ---------------------------------------------
-                ! Check 3: This is the *reverse* of check 2: Is the
-                ! node we're searching for visible from the potential
-                ! closest other node.
-                ! ---------------------------------------------
-                otherID = master%cNodes(1, idx)
-                otherIndex = master%cNodes(2, idx)
+                if (debugZipper) then
+                    print *, 'Initial Range s1:', istart, iend, fullLoop1
+                    print *, 'Initial Range s2:', jstart, jend, fullLoop2
+                end if
 
-                call getNodeInfo(strings(otherID), otherIndex, checkLeft2, &
-                     checkRight2, concave2, xk, xkm1, xkp1,  normk)
+                ! Save the original start/endn locations
+                iStart_orig = iStart
+                iEnd_orig = iEnd
+                jStart_orig = jStart
+                jEnd_orig = jEnd
+
+                if ((istart == iend .and. fullLoop1) .and. &
+                    (jstart == jend .and. fullLoop2)) then
+                    ! s1 fully attached to s2
+
+                    call closestSymmetricNode(s1, s2, istart, jstart)
+                    iEnd = iStart
+                    jEnd = jStart
+
+                else if ((iStart == iEnd .and. fullLoop1) .and. .not. fullLoop2) then
+
+                    ! Project jStart and jEnd onto s1
+                    iStart = s2%otherID(2, jStart)
+                    iEnd = s2%otherID(2, jEnd)
+
+                else if ((jStart == jEnd .and. fullLoop2) .and. .not. fullLoop1) then
+
+                    ! Project iStart and iEnd onto s2
+                    jStart = s1%otherID(2, iStart)
+                    jEnd = s1%otherID(2, iEnd)
+
+                else
+
+                    ! part of s1 is attached to part of s2
+
+                    nIElemsBeg = elemsForRange(s1, iStart, iEnd, .True.)
+                    nJElemsBeg = elemsForRange(s2, jStart, jEnd, .False.)
+
+                    ! This the "projection" of the 'j' string on the 'i'
+                    ! string. Basically this is the range the 'j' string
+                    ! wants to "use up" on the i string.
+                    iStart_j = s2%otherID(2, jStart)
+                    iEnd_j = s2%otherID(2, jEnd)
+
+                    ! These could match up with iStart and iEnd or they could
+                    ! not. That's what we need to determine here.
+
+                    ! Need to check if iStart_j is "larger" than istart. We
+                    ! just increment using nextNode to take care of periodic
+                    ! boundaries.
+                    if (iStart_j /= iStart) then
+                        ! The starting points are different. Increment iStart
+                        ! until we find
+                        i = iStart
+                        do ii = 1, nIElemsBeg
+                            i = nextNode(s1, i)
+                            if (i == iStart_j) then
+                                iStart = iStart_j
+                                exit
+                            end if
+                        end do
+                    end if
+
+                    if (iEnd_j /= iEnd) then
+                        ! The starting points are different. Decrement jEnd
+                        ! until we find
+                        i = iEnd
+                        do ii = 1, nIElemsBeg
+                            i = prevNode(s1, i)
+                            if (i == iEnd_j) then
+                                iEnd = iEnd_j
+                                exit
+                            end if
+                        end do
+                    end if
+
+                    ! Now with the updated range. Project the iRange back to the
+                    ! the final J range.
+                    jStart = s1%otherID(2, iStart)
+                    jEnd = s1%otherID(2, iEnd)
 
-                if (.not. nodeInFrontOfEdges(xj, concave2, checkLeft2, &
-                     checkRight2, xk, xkm1, xkp1, normk)) then
-                   cycle innerLoop
                 end if
 
-                ! ---------------------------------------------
-                ! Check 4a: Check if the potential node intersects
-                ! itself.
-                ! ---------------------------------------------
-                if (overlappedEdges(str, j, pt)) then
-                   cycle
+                if (debugZipper) then
+                    print *, 'Zipping string: ', s1%myid, ' with ', s2%myid
+                    print *, 's1 range:', istart, iend
+                    print *, 's2 range:', jstart, jend
                 end if
 
-                ! ---------------------------------------------
-                ! Check 4b: OR if the other node would have to
-                ! intersect *ITSELF* to get back to me. This is used
-                ! to catch closest points crossing over thin strips.
-                ! ---------------------------------------------
+                ! Before we do the zip, make sure the ranges have not
+                ! degenerated to 0 elements
+                nElem1 = 1 ! Doesn't matter, just not zero
+                nElem2 = 1 ! Doesn't matter, just not zero
+                if (.not. fullLoop1) then
+                    nElem1 = elemsForRange(s1, iStart, iEnd, .True.)
+                    if (iStart == iEnd) then
+                        nElem1 = 0
+                    end if
 
-                if (overlappedEdges(strings(otherID), otherIndex, xj)) then
-                   cycle
+                end if
+                if (.not. fullLoop2) then
+                    nElem2 = elemsForRange(s2, jStart, jEnd, .False.)
+                    if (jStart == jEnd) then
+                        nElem2 = 0
+                    end if
                 end if
 
-                ! ---------------------------------------------
-                ! Check 4c: Make sure it doesn't inersect the closest
-                ! string if that happens to be different from the
-                ! cloest one.  string. This should only check very
-                ! rare cases the other checks miss.
-                ! ---------------------------------------------
-
-                if (otherID /= closestOtherString) then
-                   if (overlappedEdges2(&
-                        strings(closestOtherString), xj, normj, pt)) then
-                      cycle
-                   end if
+                if (nElem1 > 0 .and. nElem2 > 0) then
+                    ! Do actual cross zip if we still have elements left on both strings
+                    call crossZip(s1, iStart, iEnd, s2, jStart, jEnd, debugZipper, failed)
+
+                    ! If we succefully cross zippered what we were suppoed to
+                    ! flag all the nodes from the original region as done.
+                    if (.not. failed) then
+                        call flagNodesUsed(s1, iStart_orig, iEnd_orig, .True.)
+                        call flagNodesUsed(s2, jStart_orig, jEnd_orig, .False.)
+                    else
+                        ! UhOh. We got stopped part way through. Flag just the
+                        ! nodes at the beginning that we didn't use. Leave
+                        ! those for the pocket. The nodes up to where theh
+                        ! cross zip stopped were flagged internally in cross zip.
+                        call flagNodesUsed(s1, iStart_orig, iStart, .True.)
+                        call flagNodesUsed(s2, jStart_orig, jStart, .False.)
+                    end if
+                else
+
+                    ! Flag the full range of elements are consumed even
+                    ! though we didn't do the cross zip. Leave it for the
+                    ! pocket zipping.
+                    call flagNodesUsed(s1, iStart_orig, iEnd_orig, .True.)
+                    call flagNodesUsed(s2, jStart_orig, jEnd_orig, .False.)
                 end if
 
-                ! ---------------------------------------------
-                ! Check 4d: If this is an end node, we need to check
-                ! if the potential canditate is also a end node
-                ! ---------------------------------------------
-                if (isEndNode) then
-                   if (checkRight2 .eqv. checkLeft2) then
-                      cycle
-                   end if
+                ! Find the next starting index:
+                curIStart = startNode(s1)
+
+            end do
+        end do strLoop
+
+    contains
+
+        function startNode(s)
+            ! Determine the lowest index of a non-used xzip node for
+            ! string 's'.
+            implicit none
+            type(oversetString) :: s
+            integer(kind=intType) :: startNode, i
+
+            ! This will be the return value if all nodes are used:
+            startNode = 0
+            nodeLoop: do i = 1, s%nNodes
+                if (s%xZipNodeUsed(i) == 0) then
+                    startNode = i
+                    exit nodeLoop
                 end if
+            end do nodeLoop
+        end function startNode
+
+        function nextNode(s, i)
 
-                ! ---------------------------------------------
-                ! Check 5: Now that the point has passed the previous
-                ! checks, we can compute the agumented distance
-                ! function and see if it better than the exisitng min
-                ! distance.
-                ! ---------------------------------------------
+            implicit none
+            type(oversetString), intent(iN) :: s
+            integer(kind=intType), intent(in) :: i
+            integer(kind=intType) :: nextNode
 
-                ! Now calculate our new distance
-                v =  pt - xj
-                v = v/mynorm2(v)
+            ! Normally just increment:
+            nextNode = i + 1
 
-                ! Recompute the distance function
-                cosTheta = abs(dot_product(normj, v))
+            if (i == s%nNodes) then
+                if (s%isPeriodic) then
+                    nextNode = 1
+                else
+                    ! Can't go any further
+                    nextNode = i
+                end if
+            end if
 
-                ! Update distFunction
-                dStar = curDist / (max(1-cosTheta, 1e-6))
+            ! If the next node is used. The next node is set the current
+            ! one.
+            if (s%xZipNodeUsed(nextNode) == 1) then
+                nextNode = i
+            end if
+        end function nextNode
 
-                if (dStar < minDist) then
-                   ! Save the string ID and the index.
-                   minDist = dStar
-                   str%otherID(:, j) = master%cNodes(:, idx)
+        function simpleNextNode(s, i)
+
+            implicit none
+            type(oversetString), intent(iN) :: s
+            integer(kind=intType), intent(in) :: i
+            integer(kind=intType) :: simpleNextNode
+
+            ! Normally just increment:
+            simpleNextNode = i + 1
+
+            if (i == s%nNodes) then
+                if (s%isPeriodic) then
+                    simpleNextNode = 1
+                else
+                    ! Can't go any further
+                    simpleNextNode = i
+                end if
+            end if
+        end function simpleNextNode
+
+        function prevNode(s, i)
+
+            implicit none
+            type(oversetString), intent(iN) :: s
+            integer(kind=intType), intent(in) :: i
+            integer(kind=intTYpe) :: prevNode
+            ! Normally just increment:
+            prevNode = i - 1
+
+            if (i == 1) then
+                if (s%isPeriodic) then
+                    prevNode = s%nNodes
+                else
+                    ! Can't go any further
+                    prevNode = i
                 end if
-             end do innerLoop
+            end if
+
+            ! If the next node is used. The next node is set the current
+            ! one.
+            if (s%xZipNodeUsed(prevNode) == 1) then
+                prevNode = i
+            end if
+        end function prevNode
+
+        subroutine traceMatch(s, iStart, pos, checkID, iEnd, fullLoop)
+
+            implicit none
+
+            ! Given a starting position 'iStart' on string 's', traverse in
+            ! the 'POSitive' or '.not. POSitive' direction checking that the
+            ! otherID still matches "checkID". Return the ending position
+            ! 'iEnd'.
+
+            ! Input/Output
+            type(oversetString) :: s
+            integer(kind=intType), intent(in) :: iStart, checkID
+            logical, intent(in) :: pos
+            integer(kind=intType), intent(out) :: iEnd
+            logical, intent(out) :: fullLoop
+
+            ! Working
+            integer(kind=intType) :: i, nextI
+
+            i = iStart
+            fullLoop = .False.
+
+            traverseLoop: do
+                if (pos) then
+                    nextI = nextNode(s, i)
+                else
+                    nextI = prevNode(s, i)
+                end if
+                if (nextI == i .or. s%otherID(1, nextI) /= checkID) then
+                    ! We can't go any further than we already are
+                    iEnd = i
+                    exit traverseLoop
+                end if
+
+                ! Continue to the next one.
+                i = nextI
+
+                if (i == iStart) then
+                    fullLoop = .True.
+                    iEnd = i
+                    exit traverseLoop
+                end if
+            end do traverseLoop
+        end subroutine traceMatch
+
+        subroutine flagNodesUsed(s, N1, N2, pos)
+
+            implicit none
 
-             ! If we have already searched the max, we have to quit the loop
-             if (nSearch == master%Nnodes) then
+            ! Input/Output
+            type(oversetString) :: s
+            integer(kind=intType), intent(in) :: N1, N2
+            logical, intent(in) :: pos
+
+            ! Working
+            integer(kind=intType) :: nSteps, i, nextI
+
+            if (pos) then
+                if (N2 > N1) then
+                    nSteps = N2 - N1
+                else if (N2 < N1) then
+                    nSteps = N2 + s%nNodes - N1
+                else ! N1 == N2
+                    nSteps = s%nElems
+                end if
+            else
+                if (N1 < N2) then
+                    nSteps = N1 + s%nNodes - N2
+                else if (N1 > N2) then
+                    nSteps = N1 - N2
+                else ! N3 == N4
+                    nSteps = s%nElems
+                end if
+            end if
+
+            s%xZipNodeUsed(N1) = 1
+            i = N1
+            do ii = 1, nSteps
+                if (pos) then
+                    nextI = nextNode(s, i)
+                else
+                    nextI = prevNode(s, i)
+                end if
+
+                s%xZipNodeUsed(nextI) = 1
+                i = nextI
+            end do
+        end subroutine flagNodesUsed
+
+        function elemsForRange(s, N1, N2, pos)
+            ! Determine the number of elements between N1 and N2 for for the
+            ! "POSitive" or "not POSIitive" (negative) direction.
+
+            implicit none
+            type(oversetString) :: s
+            integer(kind=intType), intent(in) :: N1, N2
+            logical :: pos
+            integer(kind=intType) :: elemsForRange
+
+            if (.not. s%isPeriodic) then
+                if (pos) then
+                    elemsForRange = N2 - N1
+                else
+                    elemsForRange = N1 - N2
+                end if
+            else ! Periodic
+                if (pos) then
+                    if (N2 == N1) then
+                        elemsForRange = s%nElems
+                    else if (N2 > N1) then
+                        elemsForRange = N2 - N1
+                    else
+                        elemsForRange = N2 + s%nNodes - N1
+                    end if
+                else
+                    if (N1 == N2) then
+                        elemsForRange = s%nElems
+                    else if (N1 > N2) then
+                        elemsForRange = N1 - N2
+                    else
+                        elemsForRange = N1 + s%nNodes - N2
+                    end if
+                end if
+            end if
+        end function elemsForRange
+
+    end subroutine makeCrossZip
+
+    subroutine makePocketZip(p, strings, nStrings, pocketMaster, debugZipper)
+        use constants
+        use oversetData, only: oversetString, oversetEdge
+        use oversetUtilities, only: qsortEdgeType
+        use kdtree2_module
+        implicit none
+
+        ! Input/output
+        integer(kind=intType), intent(in) :: nStrings
+        type(oversetString), intent(in) :: p, strings(nStrings)
+        type(oversetString) :: pocketMaster
+        logical, intent(in) :: debugZipper
+
+        ! Local variables
+        integer(kind=intType) :: i, j, nsum1, nsum2, ndiff1, ndiff2, ipedge, icur
+        integer(kind=intType) :: n1, n2, npolyEdges
+        integer(kind=intType) :: nNodes1, nNodes2, cn1, cn2, str1, str2
+        type(oversetEdge), allocatable, dimension(:) :: polyEdges
+        type(oversetEdge) :: e1, e2
+        type(oversetString), pointer :: stringsLL, str
+        integer(kind=intType) :: npocketEdges, nFullStrings, nNodes
+        integer(kind=intType) :: ip, curElem, nElems, iStart, firstElem
+        type(oversetString), allocatable, dimension(:), target :: pocketStringsArr
+
+        ! ---------------------------------------------------------------
+        ! PocketZip 1:
+        ! First sort the edges.
+        ! ---------------------------------------------------------------
+        call qsortEdgeType(p%Edges, p%nEdges)
+
+        ! Now gather up the left-over edges for pocket zipping.
+
+        ! Over estimate of remaining pocket edges to zip
+        allocate (polyEdges(p%nEdges))
+
+        ! Eliminate the edges going through the ordered edges.
+        ! The sorted opposite edges are canceled in pairs.
+        npolyEdges = 0
+        i = 1
+        do while (i <= p%nEdges)
+
+            if (i == p%nEdges) then
+                ! This must be the last free edge:
+                e1 = p%Edges(i)
+                npolyEdges = npolyEdges + 1
+                polyEdges(npolyEdges) = e1
+                i = i + 1
+                cycle
+            end if
+
+            ! Two edges in sequence
+            e1 = p%Edges(i)
+            e2 = p%Edges(i + 1)
+
+            ! First determine if e1 is at the end of two single ended
+            ! chains. In this case the edge *will* not be paired and that's
+            ! correct.
+
+            str1 = p%cNodes(1, e1%n1) ! node1's child fullStrings ID
+            cn1 = p%cNodes(2, e1%n1) ! node1's child fullStrings node index
+            nNodes1 = strings(str1)%nNodes ! node1's child fullStrings nNodes size
+
+            str2 = p%cNodes(1, e1%n2) ! node2's child fullStrings ID
+            cn2 = p%cNodes(2, e1%n2) ! node2's child fullStrings node index
+            nNodes2 = strings(str2)%nNodes ! node2's child fullStrings nNodes size
+
+            if (str1 /= str2) then
+                if (.not. strings(str1)%isperiodic .and. &
+                    .not. strings(str2)%isperiodic .and. &
+                    (cn1 == 1 .or. cn1 == nNodes1) .and. (cn2 == 1 .or. cn2 == nNodes2)) then
+                    ! Increment just 1 in 1 to skip over edge e1.
+                    i = i + 1
+                    cycle
+                end if
+            end if
+
+            ! The sum and difference:
+            nsum1 = e1%n1 + e1%n2
+            nsum2 = e2%n1 + e2%n2
+
+            ndiff1 = e1%n2 - e1%n1
+            ndiff2 = e2%n2 - e2%n1
+
+            if (nsum1 == nsum2 .and. ndiff1 + ndiff2 == 0) then
+                ! These edges cancel. Great.
+                i = i + 2
+                cycle
+            else
+                ! Add just the first edge
+                npolyEdges = npolyEdges + 1
+                polyEdges(npolyEdges) = e1
+                i = i + 1
+            end if
+        end do
+
+        ! Define pocketMaster string
+        call nullifyString(pocketMaster)
+        pocketMaster%myID = 88
+        pocketMaster%nElems = nPolyEdges
+        pocketMaster%nNodes = nPolyEdges * 2
+        pocketMaster%nEdges = 0
+        allocate (pocketMaster%nodeData(10, 2 * nPolyEdges), &
+                  pocketMaster%intNodeData(3, 2 * nPolyEdges), &
+                  pocketMaster%conn(2, nPolyEdges))
+
+        ! Dump the data into the pocketMaster
+        do i = 1, nPolyEdges
+            pocketMaster%nodeData(:, 2 * i - 1) = p%nodeData(:, polyEdges(i)%n1)
+            pocketMaster%intNodeData(:, 2 * i - 1) = p%intNodeData(:, polyEdges(i)%n1)
+
+            pocketMaster%nodeData(:, 2 * i) = p%nodeData(:, polyEdges(i)%n2)
+            pocketMaster%intNodeData(:, 2 * i) = p%intNodeData(:, polyEdges(i)%n2)
+            pocketMaster%conn(:, i) = (/2 * i, 2 * i - 1/)
+        end do
+
+        call setStringPointers(pocketMaster)
+        call reduceGapString(pocketMaster)
+        call createNodeToElem(pocketMaster)
+
+        ! The next step is to create ordered strings based on the
+        ! connectivity. This is a purely logical operation. We don't know
+        ! how many actual strings we will need so we will use a linked
+        ! list as we go.
+
+        ! Allocate some additional arrays we need for doing the chain
+        ! searches.
+        nElems = pocketMaster%nElems
+        nNodes = pocketMaster%nNodes
+        allocate (pocketMaster%elemUsed(nElems), pocketMaster%subStr(2, nElems), &
+                  pocketMaster%cNodes(2, nNodes))
+
+        pocketMaster%cNodes = 0
+        pocketMaster%elemUsed = 0
+        curElem = 1
+        nFullStrings = 0
+
+        do while (curElem < pocketMaster%nElems)
+
+            ! Arbitrarily get the first node for my element:
+            iStart = pocketMaster%conn(1, curElem)
+            nElems = pocketMaster%nte(1, iStart)
+
+            firstElem = pocketMaster%nte(2, iStart)
+            pocketMaster%subStr(1, 1) = firstElem
+            call doChain(pocketMaster, iStart, 1)
+
+            ! We now have a boundary string stored in master%subString(1,
+            ! :nSubStr(1)). These are actually the element numbers of the
+            ! master that form a continuous chain.
+
+            ! Create or add a new string to our linked list
+            ! "stringsLL".
+            if (nFullStrings == 0) then
+                allocate (stringsLL)
+                nFullStrings = 1
+                stringsLL%next => stringsLL
+                str => stringsLL
+            else
+                allocate (str%next)
+                str%next%next => stringsLL
+                str => str%next
+                nFullStrings = nFullStrings + 1
+            end if
+
+            ! Create a substring from master based on the elements we
+            ! have in the buffer
+            call createSubStringFromElems(pocketMaster, str, nFullStrings)
+
+            ! Scan through until we find the next unused element:
+            do while ((pocketMaster%elemUsed(curElem) == 1) .and. &
+                      (curElem < pocketMaster%nElems))
+                curElem = curElem + 1
+            end do
+        end do
+
+        ! Temporary strings array for plotting and pocketZipping
+        allocate (pocketStringsArr(nFullStrings))
+        str => stringsLL
+        i = 0
+        do while (i < nFullStrings)
+            i = i + 1
+            pocketStringsArr(i) = str ! Derived type assignment
+            call nullifyString(str)
+            str => str%next
+        end do
+
+        ! Allocate space for pocket triangles.
+        ! (n-sided polygon -> n-2 triangles)
+        allocate (pocketMaster%tris(3, 10 * pocketMaster%nElems))
+        allocate (pocketMaster%edges(4 * pocketMaster%nElems))
+        pocketMaster%nTris = 0
+
+        ! Build the pocketMaster tree
+        pocketMaster%tree => kdtree2_create(pocketMaster%x, sort=.True.)
+
+        if (debugZipper) then
+            open (unit=101, file="strings_pocket.dat", form='formatted')
+            write (101, *) 'TITLE = "PocketStrings Data" '
+
+            write (101, *) 'Variables = "X" "Y" "Z" "Nx" "Ny" "Nz" "Vx" "Vy" "Vz" "ind" &
+                 &"gapID" "gapIndex" "otherID" "otherIndex" "ratio"'
+            do i = 1, nFullStrings
+                ! Temporarily allocate otherID
+                allocate (pocketStringsArr(i)%otherID(2, pocketStringsArr(i)%nNodes))
+                pocketStringsArr(i)%otherID = -1
+
+                call writeOversetString(pocketStringsArr(i), pocketStringsArr, &
+                                        nFullStrings, 101)
+            end do
+            close (101)
+        end if
+
+        ! Loop over pocketStrings and begin pocketZip starting
+        ! from smallest convex ear.
+        do i = 1, nFullStrings
+            if (debugZipper) then
+                print *, 'Pocket Zipping String ', i, ' of ', nFullStrings
+            end if
+            pocketZiploop: do while (pocketStringsArr(i)%nNodes > 2)
+                ! Each pass zips one triangle. Keep zipping
+                ! until last triangle is zipped in the pocket polygon.
+                call pocketZip(pocketStringsArr(i))
+            end do pocketZiploop
+        end do
+
+        ! Destroy the strings array
+        do i = 1, nFullStrings
+            call deallocateString(pocketStringsArr(i))
+        end do
+        deallocate (pocketStringsArr, polyEdges)
+
+    end subroutine makePocketZip
+
+    subroutine pocketZip(s)
+
+        use constants
+        use kdtree2_module
+        use utils, only: mynorm2, cross_prod
+
+        implicit none
+
+        ! Input parameters
+        type(oversetString), intent(inout), target :: s
+
+        ! Local variables
+        integer(kind=intType) :: i, j, k, ii, im1, ip1, N
+        integer(kind=intType) :: nNodes, nElems, iimin
+        real(kind=realType), dimension(3) :: v1, v2, norm, c
+        real(kind=realType) :: cosCutoff, cosTheta, r2, v1nrm, v2nrm, costhetaMax
+        real(kind=realType) :: dp, dpMax
+
+        integer(Kind=intType), dimension(:), allocatable :: nodeMap, badNode
+        type(kdtree2_result), dimension(:), allocatable :: results
+        logical :: added, iiMinSet
+        real(kind=realType), parameter :: fact = 0.95_realType
+        N = s%nNodes
+        allocate (results(25), nodeMap(N), badNode(N))
+        nodeMap = 1
+        badNode = 0 ! Will become 1 if bad
+        outerZiploop: do
+
+            ! No choice for the last triangle:
+            if (N == 3) then
+                ii = 1
+                im1 = prevNode(ii)
+                ip1 = nextNode(ii)
+                ! We don't call addPotentialTriangle because we don't have a
+                ! choice anymore. Just call the raw addTri command
+                call addTri(ip1, s, ii, s, im1, s)
+                ! and flag the node as gone
+                nodeMap(ii) = 0
+                exit outerZipLoop
+            end if
+
+            iiMinSet = .False.
+
+            ! First find the largest dot product:
+            dpMax = -one
+            nodeloop1: do ii = 1, N
+
+                if (badNode(ii) == 1) then
+                    cycle nodeLoop1
+                end if
+
+                ip1 = nextNode(ii)
+                im1 = prevNode(ii)
+
+                ! Determine the angle between the vectors
+                v1 = s%x(:, im1) - s%x(:, ii)
+                v2 = s%x(:, ip1) - s%x(:, ii)
+                v1nrm = mynorm2(v1)
+                v2nrm = mynorm2(v2)
+                call cross_prod(v2, v1, norm)
+                norm = norm / mynorm2(norm)
+                dpMax = max(dpmax, dot_product(norm, s%norm(:, ii)))
+            end do nodeloop1
+
+            ! Next find the largest cosTheta that is winthin a factor
+            !  of dpMax
+            costhetaMax = -Large
+            nodeloop2: do ii = 1, N
+
+                if (badNode(ii) == 1) then
+                    cycle nodeLoop2
+                end if
+
+                ip1 = nextNode(ii)
+                im1 = prevNode(ii)
+
+                ! Determine the angle between the vectors
+                v1 = s%x(:, im1) - s%x(:, ii)
+                v2 = s%x(:, ip1) - s%x(:, ii)
+                v1nrm = mynorm2(v1)
+                v2nrm = mynorm2(v2)
+                call cross_prod(v2, v1, norm)
+                norm = norm / mynorm2(norm)
+                dp = dot_product(norm, s%norm(:, ii))
+                if (dp > dpMax * fact) then ! We take this
+                    costheta = dot_product(v1, v2) / (v1nrm * v2nrm)
+                    if (cosTheta > cosThetaMax) then
+                        costhetaMax = costheta
+                        iiMinSet = .True.
+                        iimin = ii
+                    end if
+                end if
+            end do nodeloop2
+
+            if (iiMinSet) then
+                ! Zip about node "iimin" if it was set:
+                ii = iimin
+                ip1 = nextNode(ii)
+                im1 = prevNode(ii)
+                call addPotentialTriangle(s, ip1, ii, im1, nodeMap, results, added)
+                if (added) then
+                    ! This triangle was good!
+                    exit outerZipLoop
+                else
+                    ! Bad node. Need to cycle through rest of pocket nodes.
+                    ! Remember this bad node in next cycle.
+                    badNode(ii) = 1
+                    cycle outerZiploop
+                end if
+            else
+                ! What does this mean? We didn't find any node to zip. Are they all bad?
+                print *, 'Problem with pocket zipper. Somehow we were not able to find "&
+                     &"node to add a triangle on. This should not happen. Contact the "&
+                     &"Developers.'
+                stop
+            end if
+        end do outerZiploop
+
+        ! Modify the pocketStrings to remove the two elements and the node
+        ! that got eliminated due to pocketZipping.
+        call shortenString(s, nodeMap)
+        deallocate (nodeMap, badNode, results)
+
+    contains
+        function nextNode(ii)
+            implicit none
+            integer(kind=intType) :: ii, nextNode
+            nextNode = ii + 1
+            if (ii == N) then
+                nextNode = 1
+            end if
+        end function nextNode
+
+        function prevNode(ii)
+            implicit none
+            integer(kind=intType) :: ii, prevNode
+            prevNode = ii - 1
+            if (ii == 1) then
+                prevNode = N
+            end if
+        end function prevNode
+    end subroutine pocketZip
+
+    subroutine computeTriSurfArea(master, area)
+
+        ! Computes area sum of all triangles belonging to object master
+        use constants
+        use utils, only: mynorm2, cross_prod
+        implicit none
+
+        ! Input parameters
+        type(oversetString), intent(in) :: master
+        real(kind=realType), intent(out) :: area
+
+        ! Local variables
+        integer(kind=intType) :: i, n1, n2, n3
+        real(kind=realType), dimension(3) :: v1, v2, norm
+
+        area = 0.0
+        do i = 1, master%nTris
+            n1 = master%tris(1, i)
+            n2 = master%tris(2, i)
+            n3 = master%tris(3, i)
+
+            v1 = master%x(:, n2) - master%x(:, n1)
+            v2 = master%x(:, n3) - master%x(:, n1)
+            call cross_prod(v1, v2, norm)
+            area = area + half * mynorm2(norm)
+        end do
+
+    end subroutine computeTriSurfArea
+
+    function triOverlap(pt1, pt2, pt3, str, i1, i2)
+
+        use constants
+        use utils, only: mynorm2, cross_prod
+        implicit none
+
+        ! Input/Output
+        real(kind=realType), dimension(3), intent(in) :: pt1, pt2, pt3
+        integer(kind=intType), intent(in) :: i1, i2
+        type(oversetString), intent(in) :: str
+
+        ! Working
+        logical :: triOverlap, inTri
+        integer(kind=intType) :: i
+        real(kind=realType) :: triNorm(3)
+
+        ! Note: This is a dumb loop. We need to do a spatial serch here to
+        ! only check the nodes around the current point.
+
+        call cross_prod(pt2 - pt1, pt3 - pt1, triNorm)
+        triNorm = triNorm / mynorm2(triNorm)
+
+        triOverlap = .False.
+        do i = 1, str%nNodes
+            if (i /= i1 .and. i /= i2) then
+                if (dot_product(str%norm(:, i), triNorm) > 0.8) then
+                    call pointInTriangle(pt1, pt2, pt3, str%x(:, i), inTri)
+                    if (inTri) then
+                        triOverlap = .true.
+                        exit
+                    end if
+                end if
+            end if
+        end do
+    end function triOverlap
+
+    subroutine shortenString(s, nodeMap)
+
+        ! This is an auxilary routine that take a string 's', and a node
+        ! map of len s%nNodes, with 1 or 0. A 1 means that the node will
+        ! be in the shortened string, 0 means that the node should be
+        ! deleted.
+        use constants
+        implicit none
+
+        ! Input/Output
+        type(oversetString) :: s
+        integer(kind=intType), dimension(:), intent(inout) :: nodeMap
+
+        ! Working
+        integer(kind=intType) :: nNodes, nElems, nRemoved, i, j
+        real(kind=realType), dimension(:, :), pointer :: nodeDataTmp
+        integer(kind=intType), dimension(:, :), pointer :: connTmp, intNodeDataTmp
+        integer(kind=intType), dimension(:), pointer :: pNodesTmp
+
+        ! Now we will modify our string to remove the elements and nodes
+        ! that got knocked off due to self zipping. This way the calling
+        ! process still sees the same string, it just gets a little
+        ! shorter.
+
+        ! Save pointers to existing data
+        nNodes = s%nNodes
+        nElems = s%nElems
+        nodeDataTmp => s%nodeData
+        intNodeDataTmp => s%intNodeData
+        connTmp => s%conn
+        pNodesTmp => s%pNodes
+
+        ! Convert the nodeMap which currently contains a one if the node
+        ! still exists and 0 if it doesn't. This will convert it to the new
+        ! node numbers. Ie nodeMap(i) gives the new node index of the
+        ! shorted chain. If nodeMap(i) = 0, it is no longer part of the
+        ! chain.
+        j = 0
+        nRemoved = 0
+        do i = 1, s%nNodes
+            if (nodeMap(i) == 1) then
+                j = j + 1
+                nodeMap(i) = j
+            else
+                nRemoved = nRemoved + 1
+            end if
+        end do
+
+        !  Update the cNodes in the parent so they point to the updated node
+        ! numbers. Note that the nodes that have been eliminated, have cNode
+        ! = 0, which will identify that it no longer has a child node.
+        do i = 1, s%nNodes
+            s%p%cNodes(:, s%pNodes(i)) = (/s%myID, nodeMap(i)/)
+        end do
+
+        ! Update the number of nodes/elems in our shorted chain. Every
+        ! zipper reduces the number of nodes and number of elems by 1
+        s%nNodes = s%nNodes - nRemoved
+        s%nElems = s%nElems - nRemoved
+
+        allocate (s%nodeData(10, s%nNodes), s%intNodeData(3, s%nNodes), &
+                  s%pNodes(s%nNodes), s%conn(2, s%nElems))
+
+        ! Set the pointers for the new string
+        call setStringPointers(s)
+
+        do i = 1, nNodes
+            if (nodeMap(i) /= 0) then
+                s%nodeData(:, nodeMap(i)) = nodeDataTmp(:, i)
+                s%intNodeData(:, nodeMap(i)) = intNodeDataTmp(:, i)
+                s%pNodes(nodeMap(i)) = pNodesTmp(i)
+            end if
+        end do
+
+        ! Since we know the string was in order, we can simply redo the connectivity
+        do i = 1, s%nElems
+            s%conn(:, i) = (/i, i + 1/)
+        end do
+
+        if (s%isPeriodic) then
+            s%conn(2, s%nElems) = 1
+        end if
+
+        ! Dellocate the existing memory
+        deallocate (nodeDataTmp, intNodeDataTmp, connTmp, pNodesTmp)
+
+        ! Recreate the node to elem
+        if (s%nNodes >= 3) then
+            call createNodeToElem(s)
+        end if
+
+    end subroutine shortenString
+
+    subroutine addPotentialTriangle(s, im1, ii, ip1, nodeMap, results, added)
+
+        ! Common routine (for pocketZip and selfZip) to potentially add a
+        ! triangle resulting from a single string.
+        use constants
+        use kdtree2_priority_queue_module
+        use kdtree2_module
+        implicit none
+
+        ! Input/Output
+        type(oversetString) :: s
+        integer(kind=intType), intent(in) :: im1, ii, ip1
+        integer(kind=intType), intent(inout), dimension(:) :: nodeMap
+        type(kdtree2_result), dimension(:), allocatable :: results
+        logical, intent(out) :: added
+        ! Working:
+        real(kind=realType) :: r2
+        real(kind=realType), dimension(3) :: v1, v2, norm, c
+        integer(kind=intType) :: nFound, nalloc, idx, k, j, i
+        logical :: overlapFound, inTri
+
+        ! We may have a valid triangle. We need to make sure we
+        ! don't overlap anyone else.
+        !
+        ! xim1 +
+        !      | \
+        !      |   \
+        !      |     c
+        !      |       \
+        !      |         \
+        !      +----------+
+        !      xi         xip1
+        ! We do a ball search based at 'c' which is just the
+        ! (average of xip1 and xim1) using a radius defined as the
+        ! maximum of (the distance between 'c' and 'xi', half
+        ! length of xip1 to xim1)
+        !
+        added = .False.
+
+        c = half * (s%x(:, ip1) + s%x(:, im1))
+        r2 = (c(1) - s%x(1, ii))**2 + (c(2) - s%x(2, ii))**2 + (c(3) - s%x(3, ii))**2
+
+        r2 = max(r2, (s%x(1, ip1) - s%x(1, im1))**2 + (s%x(2, ip1) - s%x(2, im1))**2 + &
+                 (s%x(3, ip1) - s%x(3, im1))**2)
+
+        nFound = 0
+        outerLoop: do
+            nalloc = size(results)
+            call kdtree2_r_nearest(s%p%tree, c, r2, nfound, nalloc, results)
+            if (nFound < nAlloc) then
                 exit outerLoop
-             end if
-
-             ! We are not 100% sure that we found the minium
-             ! yet. Make nAlloc twice as big and start over.
-             nSearch = nSearch * 2
-             nSearch = min(nSearch, master%nNodes)
-             if (nSearch > nAlloc) then
-                deallocate(results)
-                nAlloc = nAlloc*2
-                allocate(results(nAlloc))
-             end if
-          end do outerLoop
-       end do nodeLoop
-
-       ! Do a sanity check to fix some extraordinary cases. If a node
-       ! hasn't found a neighbouring string but each of the two nodes
-       ! either side have, and they found the *same* string, just
-       ! accept that.
-
-       do j=3, str%nNodes-2
-          if (str%otherID(1, j) == -1) then
-             ! Bad node:
-             oid(1) = str%otherID(1, j-2)
-             oid(2) = str%otherID(1, j-1)
-             oid(3) = str%otherID(1, j+1)
-             oid(4) = str%otherID(1, j+2)
-
-             if (oid(1) /= -1 .and. &
-                  oid(1) == oid(2) .and. &
-                  oid(1) == oid(3) .and. &
-                  oid(1) == oid(4)) then
+            end if
 
-                if (debugZipper) then
-                   print *,'****************************************************************'
-                   print *,'Warning: Fixing a bad association on string ', i, 'at index', j
-                   print *,'****************************************************************'
+            ! Allocate more space and keep going
+            deallocate (results)
+            nAlloc = nAlloc * 2
+            allocate (results(nAlloc))
+        end do outerLoop
+
+        ! We can now be sure that we have all the points inside our
+        ! ball. Next we proceed to systematically check them.
+        overlapFound = .False.
+        nodeFoundLoop: do k = 1, nFound
+            ! Note that we do check nodes from our own string,
+            ! except for the the three nodes we're dealing
+            ! with. Remember that we are working in our parent's
+            ! ording here.
+            idx = results(k)%idx
+
+            notPartofTriangle: if (idx /= s%pNodes(im1) .and. &
+                                   idx /= s%pNodes(ii) .and. idx /= s%pNodes(ip1)) then
+
+                ! Only check if the node normal of the point we're
+                ! checking is in the same direction as the triangle.
+                if (dot_product(s%norm(:, ii), s%p%norm(:, idx)) > zero) then
+
+                    ! Finally do the actual trianlge test
+                    call pointInTriangle(s%x(:, ip1), s%x(:, ii), s%x(:, im1), &
+                                         s%p%x(:, idx), inTri)
+                    if (inTri) then
+                        ! As soon as 1 is in the triangle, we know the
+                        ! triangle is no good.
+                        overlapFound = .True.
+                        exit nodeFoundLoop
+                    end if
+                end if
+            end if notPartofTriangle
+        end do nodeFoundLoop
+
+        if (.not. overlapFound) then
+
+            ! This triangle is good!
+            added = .True.
+
+            ! Call the generic addTri Routine. Here all the ndoes from the
+            ! triangle come from the same string.
+            call addTri(ip1, s, ii, s, im1, s)
+
+            ! Flag this node as gone
+            nodeMap(ii) = 0
+
+        end if
+    end subroutine addPotentialTriangle
+
+    subroutine closestSymmetricNode(s1, s2, i, j)
+        use constants
+        use utils, only: mynorm2
+
+        implicit none
+
+        ! Input/Output
+        type(oversetString) :: s1, s2
+        integer(kind=intType), intent(out) :: i, j
+        real(kind=realType) :: minDist, dist
+        integer(kind=intType) :: ii
+
+        ! Working:
+        minDist = large
+
+        do ii = 1, s1%nNodes
+            ! "The other index of the matching node on the other string is
+            ! me" ie. "I point to you and you point to me"
+
+            if (s2%otherID(2, s1%otherID(2, ii)) == ii) then
+
+                dist = mynorm2(s1%x(:, ii) - s2%x(:, s1%otherID(2, ii)))
+
+                if (dist < minDist) then
+                    minDist = dist
+                    i = ii
+                    j = s1%otherID(2, ii)
                 end if
+            end if
+        end do
+
+    end subroutine closestSymmetricNode
+
+    subroutine stringMatch(strings, nStrings, debugZipper)
+        use constants
+        use kdtree2_priority_queue_module
+        use kdtree2_module
+        use utils, only: mynorm2
+
+        implicit none
+
+        ! Input/output
+        integer(kind=intType), intent(in) :: nStrings
+        type(oversetString), dimension(nstrings), target :: strings
+        logical, intent(in) :: debugZipper
+
+        ! Working
+        integer(kind=intType) :: i, j, k, idx, oid(4)
+        integer(kind=intType) :: nAlloc, nUnique, nSearch
+        type(kdtree2_result), allocatable, dimension(:) :: results
+        type(oversetString), pointer :: str, master
+        logical :: checkLeft, checkRight, concave
+        logical :: checkLeft2, checkRight2, concave2
+        logical :: leftOK, rightOK, isEndNode
+        real(kind=realType), dimension(3) :: xj, xjp1, xjm1, normj
+        real(kind=realType), dimension(3) :: xk, xkp1, xkm1, normk
+        real(kind=realType), dimension(3) :: myPt, otherPt, eNorm
+        real(kind=realType) :: fact, dStar, curDist, minDist, edgeLength
+        integer(kind=intTYpe) :: otherID, otherIndex, closestOtherIndex, closestOtherString
+        integer(kind=intType) :: id, index
+        real(kind=realType) :: timeA, pt(3), v(3), cosTheta, cutOff, dist, maxH, ratio
+
+        if (nStrings == 0) then
+            return
+        end if
+
+        ! Now make we determine the nearest point on another substring
+        ! for each point.
+        nAlloc = 50
+        allocate (results(nAlloc))
+        master => strings(1)%p
+
+        ! Loop over the fullStrings
+        do i = 1, nStrings
+            str => strings(i) ! Easier readability
+
+            ! No need to do anything with the pocket string.
+            if (str%isPocket) then
+                cycle
+            end if
+
+            ! Allocate space for otherID as it is not done yet
+            if (associated(str%otherID)) then
+                deallocate (str%otherID)
+            end if
+
+            allocate (str%otherID(2, str%nNodes))
+            str%otherID = -1
+
+            ! Loop over my nodes and search for it in master tree
+            nodeLoop: do j = 1, str%nNodes
 
-                ! We have a '-1' surrounded by the same gap string
-
-                ! Set the stringID
-                str%otherID(1, j) = oid(1)
-
-                ! Estimate what the other index should be. Since this
-                ! is in the middle of the string, the exact index
-                ! shouldn't matter.
-                str%otherID(2, j) = str%otherID(2, j-1)
-             end if
-          end if
-       end do
-
-    end do
-  end subroutine stringMatch
-
-  subroutine writeOversetString(str, strings, n, fileID)
-
-    use constants
-    use utils, only : mynorm2
-    use commonFormats, only : sci12, int5
-    implicit none
-
-    integer(kind=intType), intent(in) :: fileID, n
-    type(oversetString), intent(in) :: str
-    type(oversetString), intent(in), dimension(n) :: strings
-    integer(kind=intType) :: i, j, id, index
-    real(kind=realType), dimension(3) :: myPt, otherPT, vec
-    real(kind=realType) :: maxH, dist, ratio
-
-    character(80) :: zoneName
-
-
-    write (zoneName,"(a,I5.5)") "Zone T=gap_", str%myID
-    write (fileID, *) trim(zoneName)
-
-    write (fileID,*) "Nodes = ", str%nNodes, " Elements= ", str%nElems, " ZONETYPE=FELINESEG"
-    write(fileID, *) "DATAPACKING=BLOCK"
-
-    ! Nodes
-    do j=1,3
-       do i=1, str%nNodes
-          write(fileID, sci12) str%x(j, i)
-       end do
-    end do
-
-    ! Node normal
-    do j=1,3
-       do i=1, str%nNodes
-          write(fileID, sci12) str%norm(j, i)
-       end do
-    end do
-
-    ! Vector between closest points
-    do j=1,3
-       do i=1, str%nNodes
-          myPt = str%x(:, i)
-          id = str%otherID(1, i)
-          if (id /= -1) then
-             index = str%otherID(2, i)
-             otherPt = strings(id)%x(:, index)
-             vec = otherPt - myPt
-          else
-             vec = zero
-          end if
-
-          write(fileID, sci12) vec(j)
-       end do
-    end do
-
-    ! global node ID
-    do i=1, str%nNodes
-       write(fileID, sci12) real(str%ind(i))
-    end do
-
-    ! gapID
-    do i=1, str%nNodes
-       write(fileID, sci12) real(str%myID)
-    end do
-
-    ! gap Index
-    do i=1, str%nNodes
-       write(fileID, sci12) real(i)
-    end do
-
-    if (associated(str%otherID)) then
-       ! otherID
-       do i=1, str%nNodes
-          write(fileID, sci12) real(str%otherID(1, i))
-       end do
-
-       ! other Index
-       do i=1, str%nNodes
-          write(fileID, sci12) real(str%otherID(2, i))
-       end do
-    else
-       do i=1, 2*str%nNodes
-          write(fileID, sci12) zero
-       end do
-    end if
-
-
-    do i=1, str%nNodes
-       myPt = str%x(:, i)
-       id = str%otherID(1, i)
-       if (id /= -1) then
-          index = str%otherID(2, i)
-          otherPt = strings(id)%x(:, index)
-          dist = mynorm2(myPt - otherPt)
-          maxH = max(str%h(i), strings(id)%h(index))
-          ratio = dist/maxH
-       else
-          ratio = zero
-       end if
-
-       write(fileID, sci12) ratio
-    end do
-
-    do i=1, str%nElems
-       write(fileID, int5) str%conn(1, i), str%conn(2, i)
-    end do
-
-  end subroutine writeOversetString
-
-  subroutine writeOversetMaster(str,fileID)
-
-    use constants
-    use utils, only : mynorm2
-    use commonFormats, only :  sci12, int5
-    implicit none
-
-    type(oversetString), intent(in) :: str
-    integer(kind=intType), intent(in) :: fileID
-    integer(kind=intType) :: i, j, id, index
-    real(kind=realType), dimension(3) :: myPt, otherPT, vec
-    real(kind=realType) :: maxH, dist, ratio
-
-    character(80) :: zoneName
-
-
-    write (zoneName,"(a,I5.5)") "Zone T=gap_", str%myID
-    write (fileID, *) trim(zoneName)
-
-    write (fileID,*) "Nodes = ", str%nNodes, " Elements= ", str%nElems, " ZONETYPE=FELINESEG"
-    write(fileID, *) "DATAPACKING=BLOCK"
-
-    ! Nodes
-    do j=1,3
-       do i=1, str%nNodes
-          write(fileID, sci12) str%x(j, i)
-       end do
-    end do
-
-    do i=1, str%nElems
-       write(fileID, int5) str%conn(1, i), str%conn(2, i)
-    end do
-
-  end subroutine writeOversetMaster
-
-
-  subroutine writeOversetTriangles(string, fileName, startTri, endTri)
-
-    use constants
-    use commonFormats, only : sci12
-    implicit none
-
-    type(oversetString), intent(inout) :: string
-    integer(kind=intType), intent(in) :: startTri, endTri
-    character(*) :: fileName
-    integer(kind=intType) :: i, j
-    character(80) :: zoneName
-
-    open(unit=101, file=trim(fileName), form='formatted')
-    write(101,*) 'TITLE = "Triangles"'
-    write(101,*) 'Variables = "X", "Y", "Z"'
-
-    write (zoneName,"(a,I5.5)") "Zone T=triangles_", string%myID
-    write (101, *) trim(zoneName)
-
-    write (101,*) "Nodes = ", string%nNodes, " Elements= ", (endTri-startTri+1), " ZONETYPE=FETRIANGLE"
-    write (101,*) "DATAPACKING=POINT"
-
-    ! Write all the coordinates
-    do i=1, string%nNodes
-       do j=1, 3
-          write(101, sci12, advance='no') string%x(j, i)
-       end do
-       write(101,"(1x)")
-    end do
-
-    do i=startTri, endTri
-       write(101, "(*(I7))") string%tris(1, i), string%tris(2, i), string%tris(3, i)
-    end do
-    close(101)
-  end subroutine writeOversetTriangles
-
-  subroutine writeZipperDebug(str)
-
-    ! Save the state of an unsplit string such that it can be debugged
-    ! later without running overset interpolation.
-
-    use constants
-    implicit none
-
-    type(oversetString) :: str
-    integer(kind=intType) :: i, j
-
-    open(unit=101, file="debug.zipper", form='formatted')
-    write(101, *) str%nNodes
-    write(101, *) str%nElems
-    do i=1, str%nNodes
-       do j=1, 10
-          write (101,*) str%nodeData(j, i)
-       end do
-    end do
-
-    do i=1, str%nElems
-       do j=1, 2
-          write (101,*) str%conn(j, i)
-       end do
-    end do
-
-    do i=1, str%nNodes
-       do j=1, 3
-          write (101,*) str%intNodeData(j, i)
-       end do
-    end do
-    close(101)
-  end subroutine writeZipperDebug
-
-  subroutine loadZipperDebug(fileName, str)
-
-    ! Save the state of an unsplit string such that it can be debugged
-    ! later without running overset interpolation.
-
-    use constants
-    implicit none
-
-    character(*), intent(in) :: fileName
-    type(oversetString) :: str
-    integer(kind=intType) :: i, j
-
-    open(unit=101, file=fileName, form='formatted')
-    read(101, *) str%nNodes
-    read(101, *) str%nElems
-    call nullifyString(str)
-
-    allocate(str%nodeData(10, str%nNodes))
-    allocate(str%conn(2, str%nElems))
-    allocate(str%intNodeData(3, str%nNodes))
-
-    do i=1, str%nNodes
-       do j=1, 10
-          read (101,*) str%nodeData(j, i)
-       end do
-    end do
-
-    do i=1, str%nElems
-       do j=1, 2
-          read (101,*) str%conn(j, i)
-       end do
-    end do
-
-    do i=1, str%nNodes
-       do j=1, 3
-          read (101,*) str%intNodeData(j, i)
-       end do
-    end do
-    close(101)
-
-    call setStringPointers(str)
-
-  end subroutine loadZipperDebug
-
-  subroutine pointInTriangle(x1, x2, x3, pt, inTri)
-
-    use constants
-    use utils, only : cross_prod
-    implicit none
-    real(kind=realType), dimension(3), intent(in) :: x1, x2, x3, pt
-    logical, intent(out) :: inTri
-
-    if (sameSide(pt,x1, x2,x3) .and. sameSide(pt,x2, x1,x3) .and. sameSide(pt,x3, x1,x2)) then
-       inTri = .True.
-    else
-       inTri = .false.
-    end if
-
-  contains
-    function sameSide(p1, p2, a, b)
-
-      implicit none
-      logical :: sameSide
-      real(kind=realType), dimension(3) ::p1, p2, a, b, cp1, cp2
-
-      sameSide = .False.
-      call cross_prod(b-a, p1-a, cp1)
-      call cross_prod(b-a, p2-a, cp2)
-      if (dot_product(cp1, cp2) >= zero) then
-         sameSide = .true.
-      end if
-    end function SameSide
-  end subroutine pointInTriangle
-
-  function positiveTriArea(p1, p2, p3, norm)
-
-    use constants
-    use utils, only : cross_prod
-    implicit none
-    real(kind=realType), intent(in), dimension(3) :: p1, p2, p3, norm
-    real(kind=realType), dimension(3) :: n
-    logical :: positiveTriArea
-
-    call cross_prod(p2-p1, p3-p1, n)
-    if (dot_product(n, norm) > zero) then
-       positiveTriArea = .True.
-    else
-       positiveTriArea = .False.
-    end if
-  end function positiveTriArea
-
-  subroutine getNodeInfo(str, j, checkLeft, checkRight, concave, xj, xjm1, xjp1, normj)
-
-    use constants
-    use utils, only : cross_prod
-    implicit none
-
-    type(oversetString) :: str
-    integer(kind=intType) :: j
-    logical ::checkLeft, checkRight, concave
-    real(kind=realType), dimension(3) :: xj, xjm1, xjp1, normj
-    real(kind=realType), dimension(3) :: v
-    checkLeft = .True.
-    checkRight = .True.
-    xj = str%x(:, j)
-    normj = str%norm(:, j)
-    concave = .False.
-    if (str%isPeriodic) then
-       if (j > 1 .and. j < str%nNodes) then
-          xjm1 = str%x(:, j-1)
-          xjp1 = str%x(:, j+1)
-       else if (j == 1) then
-          xjm1 = str%x(:, str%nNodes)
-          xjp1 = str%x(:, j+1)
-       else if (j == str%nNodes) then
-          xjm1 = str%x(:, j-1)
-          xjp1 = str%x(:, 1)
-       end if
-    else
-       ! Not periodic. Assume the ends are concave. This will
-       ! forces checking if both the left and right are ok,
-       ! which since the leftOK and rightOK's default to
-       ! .True., it just checks the one triangle which is what
-       ! we want.
-       if (j == 1) then
-          checkLeft = .False.
-          concave = .True.
-       end if
-
-       if (j == str%nNodes) then
-          checkRight = .False.
-          concave = .True.
-       end if
-
-       if (checkLeft)  &
-            xjm1 = str%x(:, j-1)
-       if (checkRight) &
-            xjp1 = str%x(:, j+1)
-    end if
-
-    if (checkLeft .and. checkRight) then
-
-       ! Determine if the point is convex or concave provided
-       !  we have both neighbours.
-       call cross_prod(xjm1 - xj, xjp1 - xj, v)
-
-       if (dot_product(v, normj) > zero) then
-          concave = .True.
-       end if
-    end if
-
-  end subroutine getNodeInfo
-
-  function nodeInFrontOfEdges(pt, concave, checkLeft, checkRight, xj, xjm1, xjp1, normj)
-
-    use constants
-    implicit none
-
-    real(kind=realType), dimension(3), intent(in) :: pt, xj, xjm1, xjp1, normj
-    logical, intent(in) :: concave, checkLeft, checkRight
-    logical ::  nodeInFrontOfEdges
-    logical :: leftOK, rightoK
-
-    nodeInFrontOfEdges = .True.
-    leftOK = .True.
-    rightOK = .True.
-    if (checkLeft .and. .not. positiveTriArea(xj, xjm1, pt, normj)) then
-       leftOK = .False.
-    end if
-
-    if (checkRight .and. .not. positiveTriArea(xjp1, xj, pt, normj)) then
-       rightOK = .False.
-    end if
-
-    if (concave) then
-       if (.not. (leftOK .and. rightOK)) then
-          nodeInFrontofEdges = .False.
-       end if
-    else
-       if (.not. (leftOK .or. rightOK)) then
-          nodeInFrontOfEdges = .False.
-       end if
-    end if
-  end function nodeInFrontOfEdges
-
-  function overlappedEdges(str, j, pt)
-
-    use constants
-    use utils, only : mynorm2, cross_prod
-
-    implicit none
-
-    ! Input/output
-    real(kind=realType), dimension(3), intent(in) :: pt
-    type(oversetString) , intent(in) :: str
-    integer(kind=intType), intent(in) :: j
-    logical :: overlappedEdges
-
-    ! Working
-    integer(kind=intType) :: i
-    real(kind=realType), dimension(3) :: v, p1, p2, u, normA,  normB, x0, norm
-    real(kind=realType) :: uNrm, x1, x2, x3, x4, y1, y2, y3, y4, idet, Px, Py
-    real(kind=realType) :: u1, u2, v1, v2, w1, w2
-    real(kind=realType) :: s1, s2, tmp, line(2), vec(2), tol
-    overlappedEdges = .False.
-    tol = 1e-6
-    ! We will conver this completely into a 2D problem by projecting
-    ! everything onto the plane defined by norm. x0 is at the origin of
-    ! the 2D system and the xaxis point from x0 to pt
-
-    x0 = str%x(:, j)
-    norm = str%norm(:, j)
-
-    u = pt - x0
-    uNrm = mynorm2(u)
-    u = u/uNrm
-
-    call cross_prod(norm, u, v)
-    v = v /mynorm2(v)
-
-    ! Now u,v,norm is an orthogonal coordinate system
-    x1 = zero
-    y1 = zero
-    x2 = uNrm
-    y2 = zero
-    overLappedEdges = .False.
-
-    ! Loop over the number of edges on my string
-    elemLoop: do i=1, str%nElems
-       ! Don't check the ones right next to me, since they will
-       ! "overlap" exactly at x0
-
-       if (str%conn(1, i) == j .or. str%conn(2, i) == j) then
-          cycle
-       end if
-
-       ! Project the two points into the plane
-       p1 = str%x(:, str%conn(1, i))
-       p2 = str%x(:, str%conn(2, i))
-
-       normA = str%norm(:, str%conn(1, i))
-       normB = str%norm(:, str%conn(2, i))
-
-       ! Make sure the edges are on the same plane, otherwise this is
-       ! meaningless
-       if (dot_product(normA, norm) < half .or. dot_product(normb, norm) < half) then
-          cycle
-       end if
-       ! Project the two points onto the plane
-       p1 = p1 - norm*dot_product(p1 - x0, norm)
-       p2 = p2 - norm*dot_product(p2 - x0, norm)
-
-       ! Now get the 2D coordinates
-       x3 = dot_product(p1-x0, u)
-       y3 = dot_product(p1-x0, v)
-       x4 = dot_product(p2-x0, u)
-       y4 = dot_product(p2-x0, v)
-
-       u1 = x2 - x1
-       y2 = y2 - y1
-
-       v1 = x4 - x3
-       v2 = y4 - y3
-
-       w1 = x1- x3
-       w2 = y1- y3
-
-       s1 = (v2*w1 - v1*w2)/(v1*u2 - v2*u1)
-       s2 = (u1*w2 - u2*w1)/(u1*v2 - u2*v1)
-
-       if (s1 > tol .and. s1 < one - tol .and. s2 > tol .and. s2 < one - tol) then
-          overlappedEdges = .True.
-          exit elemLoop
-       end if
-    end do elemLoop
-
-  end function overlappedEdges
-
-  function overlappedEdges2(str, pt1, norm, pt2)
-
-    use constants
-    use utils, only : mynorm2, cross_prod
-    implicit none
-
-    ! Input/output
-    real(kind=realType), dimension(3), intent(in) :: pt1, pt2, norm
-    type(oversetString) , intent(in) :: str
-    logical :: overlappedEdges2
-
-    ! Working
-    integer(kind=intType) :: i
-    real(kind=realType), dimension(3) :: v, p1, p2, u, normA,  normB, x0
-    real(kind=realType) :: uNrm, x1, x2, x3, x4, y1, y2, y3, y4, idet, Px, Py
-    real(kind=realType) :: u1, u2, v1, v2, w1, w2
-    real(kind=realType) :: s1, s2, tmp, line(2), vec(2), tol
-
-    tol = 1e-6
-    ! We will conver this completely into a 2D problem by projecting
-    ! everything onto the plane defined by norm. x0 is at the origin of
-    ! the 2D system and the xaxis point from x0 to pt
-
-    x0 = pt1
-
-    u = pt2 - x0
-    uNrm = mynorm2(u)
-    u = u/uNrm
-
-    call cross_prod(norm, u, v)
-    v = v /mynorm2(v)
-
-    ! Now u,v,norm is an orthogonal coordinate system
-    x1 = zero
-    y1 = zero
-    x2 = uNrm
-    y2 = zero
-    overLappedEdges2 = .False.
-
-    ! Loop over the number of edges on my string
-    elemLoop: do i=1, str%nElems
-
-       ! Project the two points into the plane
-       p1 = str%x(:, str%conn(1, i))
-       p2 = str%x(:, str%conn(2, i))
-
-       normA = str%norm(:, str%conn(1, i))
-       normB = str%norm(:, str%conn(2, i))
-
-       ! Make sure the edges are on the same plane, otherwise this is
-       ! meaningless
-       if (dot_product(normA, norm) < half .or. dot_product(normb, norm) < half) then
-          cycle
-       end if
-       ! Project the two points onto the plane
-       p1 = p1 - norm*dot_product(p1 - x0, norm)
-       p2 = p2 - norm*dot_product(p2 - x0, norm)
+                ! Set the initial maximum number of neighbours
+                ! This can be at most the total number of nodes
+                nSearch = min(nAlloc, master%nNodes)
 
-       ! Now get the 2D coordinates
-       x3 = dot_product(p1-x0, u)
-       y3 = dot_product(p1-x0, v)
-       x4 = dot_product(p2-x0, u)
-       y4 = dot_product(p2-x0, v)
-
-       u1 = x2 - x1
-       y2 = y2 - y1
-
-       v1 = x4 - x3
-       v2 = y4 - y3
+                ! We have to be careful since single-sided chains have only
+                ! 1 neighbour at each end.
+
+                call getNodeInfo(str, j, checkLeft, checkRight, concave, &
+                                 xj, xjm1, xjp1, normj)
+                isEndNode = .False.
+                if (.not. (checkLeft .eqv. checkRight)) then
+                    ! Since we don't need to check one side, this means we're
+                    ! at the end of the chain. This is important since this
+                    ! node *MUST* be attached to another node on another
+                    ! chain at the end
+                    isEndNode = .True.
+                end if
+
+                outerLoop: do
+                    minDist = large
+                    closestOtherIndex = -1
+                    call kdtree2_n_nearest(master%tree, xj, nSearch, results)
+
+                    ! Only check edges connected to nodes within the
+                    ! distance the maximum element size of my self or the
+                    ! closest node. We put in a fudge factor of 1.5.
+
+                    innerLoop: do k = 1, nSearch
+
+                        ! Since we know the results are sorted, if the
+                        ! distance(k) > than our current minDist, we can stop
+                        ! since there is no possible way that any of the
+                        ! remaining points can be closer given that the modified
+                        ! D* is always larger than the original D
+
+                        ! Extract current information to make things a little
+                        ! easier to read
+                        curDist = sqrt(results(k)%dis)
+                        idx = results(k)%idx
+                        pt = master%x(:, idx)
+
+                        ! ---------------------------------------------
+                        ! Exit Condition: We can stop the loop if the current
+                        ! uncorrected distance is larger than our current
+                        ! minimum. This guarantees the minimum corrected
+                        ! distance is found.
+                        ! ---------------------------------------------
+
+                        if (curDist > minDist) then
+                            exit outerLoop
+                        end if
+
+                        ! ---------------------------------------------
+                        ! Check 1: If the node we found isn't on our
+                        ! substring. we don't need to do anything
+                        ! ---------------------------------------------
+
+                        if (master%cNodes(1, idx) == str%myID) then
+                            cycle innerLoop
+                        end if
+
+                        ! ---------------------------------------------
+
+                        ! Check 1b: If the node we found has been removed due
+                        ! to self zipping, we can just keep going
+                        ! --------------------------------------------
+                        if (master%cNodes(2, idx) == 0) then
+                            cycle innerLoop
+                        end if
+
+                        ! The first time we make it here, idx will be the
+                        ! index of the closest node on another string that
+                        ! isn't me.
+                        if (closestOtherIndex == -1) then
+                            closestOtherString = master%cNodes(1, idx)
+                            closestOtherIndex = master%cNodes(2, idx)
+                        end if
+
+                        ! ---------------------------------------------
+                        ! Check 2: Check if the node we found violates the
+                        ! the "in front" test. For a concave corner TWO
+                        ! triangle areas formed by the point and the two
+                        ! edges must be positive. For a convex corner only
+                        ! one of the triangle areas needs to be positive.
+                        ! ---------------------------------------------
+                        if (.not. nodeInFrontOfEdges(pt, concave, checkLeft, checkRight, &
+                                                     xj, xjm1, xjp1, normj)) then
+                            cycle innerLoop
+                        end if
+
+                        ! ---------------------------------------------
+                        ! Check 3: This is the *reverse* of check 2: Is the
+                        ! node we're searching for visible from the potential
+                        ! closest other node.
+                        ! ---------------------------------------------
+                        otherID = master%cNodes(1, idx)
+                        otherIndex = master%cNodes(2, idx)
+
+                        call getNodeInfo(strings(otherID), otherIndex, checkLeft2, &
+                                         checkRight2, concave2, xk, xkm1, xkp1, normk)
+
+                        if (.not. nodeInFrontOfEdges(xj, concave2, checkLeft2, &
+                                                     checkRight2, xk, xkm1, xkp1, normk)) then
+                            cycle innerLoop
+                        end if
+
+                        ! ---------------------------------------------
+                        ! Check 4a: Check if the potential node intersects
+                        ! itself.
+                        ! ---------------------------------------------
+                        if (overlappedEdges(str, j, pt)) then
+                            cycle
+                        end if
+
+                        ! ---------------------------------------------
+                        ! Check 4b: OR if the other node would have to
+                        ! intersect *ITSELF* to get back to me. This is used
+                        ! to catch closest points crossing over thin strips.
+                        ! ---------------------------------------------
+
+                        if (overlappedEdges(strings(otherID), otherIndex, xj)) then
+                            cycle
+                        end if
+
+                        ! ---------------------------------------------
+                        ! Check 4c: Make sure it doesn't inersect the closest
+                        ! string if that happens to be different from the
+                        ! cloest one.  string. This should only check very
+                        ! rare cases the other checks miss.
+                        ! ---------------------------------------------
+
+                        if (otherID /= closestOtherString) then
+                            if (overlappedEdges2( &
+                                strings(closestOtherString), xj, normj, pt)) then
+                                cycle
+                            end if
+                        end if
+
+                        ! ---------------------------------------------
+                        ! Check 4d: If this is an end node, we need to check
+                        ! if the potential canditate is also a end node
+                        ! ---------------------------------------------
+                        if (isEndNode) then
+                            if (checkRight2 .eqv. checkLeft2) then
+                                cycle
+                            end if
+                        end if
+
+                        ! ---------------------------------------------
+                        ! Check 5: Now that the point has passed the previous
+                        ! checks, we can compute the agumented distance
+                        ! function and see if it better than the exisitng min
+                        ! distance.
+                        ! ---------------------------------------------
+
+                        ! Now calculate our new distance
+                        v = pt - xj
+                        v = v / mynorm2(v)
+
+                        ! Recompute the distance function
+                        cosTheta = abs(dot_product(normj, v))
+
+                        ! Update distFunction
+                        dStar = curDist / (max(1 - cosTheta, 1e-6))
+
+                        if (dStar < minDist) then
+                            ! Save the string ID and the index.
+                            minDist = dStar
+                            str%otherID(:, j) = master%cNodes(:, idx)
+                        end if
+                    end do innerLoop
+
+                    ! If we have already searched the max, we have to quit the loop
+                    if (nSearch == master%Nnodes) then
+                        exit outerLoop
+                    end if
+
+                    ! We are not 100% sure that we found the minium
+                    ! yet. Make nAlloc twice as big and start over.
+                    nSearch = nSearch * 2
+                    nSearch = min(nSearch, master%nNodes)
+                    if (nSearch > nAlloc) then
+                        deallocate (results)
+                        nAlloc = nAlloc * 2
+                        allocate (results(nAlloc))
+                    end if
+                end do outerLoop
+            end do nodeLoop
+
+            ! Do a sanity check to fix some extraordinary cases. If a node
+            ! hasn't found a neighbouring string but each of the two nodes
+            ! either side have, and they found the *same* string, just
+            ! accept that.
+
+            do j = 3, str%nNodes - 2
+                if (str%otherID(1, j) == -1) then
+                    ! Bad node:
+                    oid(1) = str%otherID(1, j - 2)
+                    oid(2) = str%otherID(1, j - 1)
+                    oid(3) = str%otherID(1, j + 1)
+                    oid(4) = str%otherID(1, j + 2)
+
+                    if (oid(1) /= -1 .and. &
+                        oid(1) == oid(2) .and. &
+                        oid(1) == oid(3) .and. &
+                        oid(1) == oid(4)) then
+
+                        if (debugZipper) then
+                            print *, '****************************************************************'
+                            print *, 'Warning: Fixing a bad association on string ', i, 'at index', j
+                            print *, '****************************************************************'
+                        end if
+
+                        ! We have a '-1' surrounded by the same gap string
+
+                        ! Set the stringID
+                        str%otherID(1, j) = oid(1)
+
+                        ! Estimate what the other index should be. Since this
+                        ! is in the middle of the string, the exact index
+                        ! shouldn't matter.
+                        str%otherID(2, j) = str%otherID(2, j - 1)
+                    end if
+                end if
+            end do
+
+        end do
+    end subroutine stringMatch
+
+    subroutine writeOversetString(str, strings, n, fileID)
+
+        use constants
+        use utils, only: mynorm2
+        use commonFormats, only: sci12, int5
+        implicit none
+
+        integer(kind=intType), intent(in) :: fileID, n
+        type(oversetString), intent(in) :: str
+        type(oversetString), intent(in), dimension(n) :: strings
+        integer(kind=intType) :: i, j, id, index
+        real(kind=realType), dimension(3) :: myPt, otherPT, vec
+        real(kind=realType) :: maxH, dist, ratio
+
+        character(80) :: zoneName
+
+        write (zoneName, "(a,I5.5)") "Zone T=gap_", str%myID
+        write (fileID, *) trim(zoneName)
+
+        write (fileID, *) "Nodes = ", str%nNodes, " Elements= ", str%nElems, " ZONETYPE=FELINESEG"
+        write (fileID, *) "DATAPACKING=BLOCK"
+
+        ! Nodes
+        do j = 1, 3
+            do i = 1, str%nNodes
+                write (fileID, sci12) str%x(j, i)
+            end do
+        end do
+
+        ! Node normal
+        do j = 1, 3
+            do i = 1, str%nNodes
+                write (fileID, sci12) str%norm(j, i)
+            end do
+        end do
+
+        ! Vector between closest points
+        do j = 1, 3
+            do i = 1, str%nNodes
+                myPt = str%x(:, i)
+                id = str%otherID(1, i)
+                if (id /= -1) then
+                    index = str%otherID(2, i)
+                    otherPt = strings(id)%x(:, index)
+                    vec = otherPt - myPt
+                else
+                    vec = zero
+                end if
+
+                write (fileID, sci12) vec(j)
+            end do
+        end do
+
+        ! global node ID
+        do i = 1, str%nNodes
+            write (fileID, sci12) real(str%ind(i))
+        end do
+
+        ! gapID
+        do i = 1, str%nNodes
+            write (fileID, sci12) real(str%myID)
+        end do
+
+        ! gap Index
+        do i = 1, str%nNodes
+            write (fileID, sci12) real(i)
+        end do
+
+        if (associated(str%otherID)) then
+            ! otherID
+            do i = 1, str%nNodes
+                write (fileID, sci12) real(str%otherID(1, i))
+            end do
+
+            ! other Index
+            do i = 1, str%nNodes
+                write (fileID, sci12) real(str%otherID(2, i))
+            end do
+        else
+            do i = 1, 2 * str%nNodes
+                write (fileID, sci12) zero
+            end do
+        end if
+
+        do i = 1, str%nNodes
+            myPt = str%x(:, i)
+            id = str%otherID(1, i)
+            if (id /= -1) then
+                index = str%otherID(2, i)
+                otherPt = strings(id)%x(:, index)
+                dist = mynorm2(myPt - otherPt)
+                maxH = max(str%h(i), strings(id)%h(index))
+                ratio = dist / maxH
+            else
+                ratio = zero
+            end if
+
+            write (fileID, sci12) ratio
+        end do
+
+        do i = 1, str%nElems
+            write (fileID, int5) str%conn(1, i), str%conn(2, i)
+        end do
+
+    end subroutine writeOversetString
+
+    subroutine writeOversetMaster(str, fileID)
+
+        use constants
+        use utils, only: mynorm2
+        use commonFormats, only: sci12, int5
+        implicit none
+
+        type(oversetString), intent(in) :: str
+        integer(kind=intType), intent(in) :: fileID
+        integer(kind=intType) :: i, j, id, index
+        real(kind=realType), dimension(3) :: myPt, otherPT, vec
+        real(kind=realType) :: maxH, dist, ratio
+
+        character(80) :: zoneName
+
+        write (zoneName, "(a,I5.5)") "Zone T=gap_", str%myID
+        write (fileID, *) trim(zoneName)
+
+        write (fileID, *) "Nodes = ", str%nNodes, " Elements= ", str%nElems, " ZONETYPE=FELINESEG"
+        write (fileID, *) "DATAPACKING=BLOCK"
+
+        ! Nodes
+        do j = 1, 3
+            do i = 1, str%nNodes
+                write (fileID, sci12) str%x(j, i)
+            end do
+        end do
+
+        do i = 1, str%nElems
+            write (fileID, int5) str%conn(1, i), str%conn(2, i)
+        end do
+
+    end subroutine writeOversetMaster
+
+    subroutine writeOversetTriangles(string, fileName, startTri, endTri)
+
+        use constants
+        use commonFormats, only: sci12
+        implicit none
+
+        type(oversetString), intent(inout) :: string
+        integer(kind=intType), intent(in) :: startTri, endTri
+        character(*) :: fileName
+        integer(kind=intType) :: i, j
+        character(80) :: zoneName
+
+        open (unit=101, file=trim(fileName), form='formatted')
+        write (101, *) 'TITLE = "Triangles"'
+        write (101, *) 'Variables = "X", "Y", "Z"'
+
+        write (zoneName, "(a,I5.5)") "Zone T=triangles_", string%myID
+        write (101, *) trim(zoneName)
+
+        write (101, *) "Nodes = ", string%nNodes, " Elements= ", (endTri - startTri + 1), " ZONETYPE=FETRIANGLE"
+        write (101, *) "DATAPACKING=POINT"
+
+        ! Write all the coordinates
+        do i = 1, string%nNodes
+            do j = 1, 3
+                write (101, sci12, advance='no') string%x(j, i)
+            end do
+            write (101, "(1x)")
+        end do
+
+        do i = startTri, endTri
+            write (101, "(*(I7))") string%tris(1, i), string%tris(2, i), string%tris(3, i)
+        end do
+        close (101)
+    end subroutine writeOversetTriangles
+
+    subroutine writeZipperDebug(str)
+
+        ! Save the state of an unsplit string such that it can be debugged
+        ! later without running overset interpolation.
+
+        use constants
+        implicit none
+
+        type(oversetString) :: str
+        integer(kind=intType) :: i, j
+
+        open (unit=101, file="debug.zipper", form='formatted')
+        write (101, *) str%nNodes
+        write (101, *) str%nElems
+        do i = 1, str%nNodes
+            do j = 1, 10
+                write (101, *) str%nodeData(j, i)
+            end do
+        end do
+
+        do i = 1, str%nElems
+            do j = 1, 2
+                write (101, *) str%conn(j, i)
+            end do
+        end do
+
+        do i = 1, str%nNodes
+            do j = 1, 3
+                write (101, *) str%intNodeData(j, i)
+            end do
+        end do
+        close (101)
+    end subroutine writeZipperDebug
+
+    subroutine loadZipperDebug(fileName, str)
+
+        ! Save the state of an unsplit string such that it can be debugged
+        ! later without running overset interpolation.
+
+        use constants
+        implicit none
+
+        character(*), intent(in) :: fileName
+        type(oversetString) :: str
+        integer(kind=intType) :: i, j
+
+        open (unit=101, file=fileName, form='formatted')
+        read (101, *) str%nNodes
+        read (101, *) str%nElems
+        call nullifyString(str)
+
+        allocate (str%nodeData(10, str%nNodes))
+        allocate (str%conn(2, str%nElems))
+        allocate (str%intNodeData(3, str%nNodes))
+
+        do i = 1, str%nNodes
+            do j = 1, 10
+                read (101, *) str%nodeData(j, i)
+            end do
+        end do
+
+        do i = 1, str%nElems
+            do j = 1, 2
+                read (101, *) str%conn(j, i)
+            end do
+        end do
+
+        do i = 1, str%nNodes
+            do j = 1, 3
+                read (101, *) str%intNodeData(j, i)
+            end do
+        end do
+        close (101)
+
+        call setStringPointers(str)
+
+    end subroutine loadZipperDebug
+
+    subroutine pointInTriangle(x1, x2, x3, pt, inTri)
+
+        use constants
+        use utils, only: cross_prod
+        implicit none
+        real(kind=realType), dimension(3), intent(in) :: x1, x2, x3, pt
+        logical, intent(out) :: inTri
+
+        if (sameSide(pt, x1, x2, x3) .and. sameSide(pt, x2, x1, x3) .and. sameSide(pt, x3, x1, x2)) then
+            inTri = .True.
+        else
+            inTri = .false.
+        end if
+
+    contains
+        function sameSide(p1, p2, a, b)
+
+            implicit none
+            logical :: sameSide
+            real(kind=realType), dimension(3) :: p1, p2, a, b, cp1, cp2
+
+            sameSide = .False.
+            call cross_prod(b - a, p1 - a, cp1)
+            call cross_prod(b - a, p2 - a, cp2)
+            if (dot_product(cp1, cp2) >= zero) then
+                sameSide = .true.
+            end if
+        end function SameSide
+    end subroutine pointInTriangle
+
+    function positiveTriArea(p1, p2, p3, norm)
+
+        use constants
+        use utils, only: cross_prod
+        implicit none
+        real(kind=realType), intent(in), dimension(3) :: p1, p2, p3, norm
+        real(kind=realType), dimension(3) :: n
+        logical :: positiveTriArea
+
+        call cross_prod(p2 - p1, p3 - p1, n)
+        if (dot_product(n, norm) > zero) then
+            positiveTriArea = .True.
+        else
+            positiveTriArea = .False.
+        end if
+    end function positiveTriArea
+
+    subroutine getNodeInfo(str, j, checkLeft, checkRight, concave, xj, xjm1, xjp1, normj)
+
+        use constants
+        use utils, only: cross_prod
+        implicit none
+
+        type(oversetString) :: str
+        integer(kind=intType) :: j
+        logical :: checkLeft, checkRight, concave
+        real(kind=realType), dimension(3) :: xj, xjm1, xjp1, normj
+        real(kind=realType), dimension(3) :: v
+        checkLeft = .True.
+        checkRight = .True.
+        xj = str%x(:, j)
+        normj = str%norm(:, j)
+        concave = .False.
+        if (str%isPeriodic) then
+            if (j > 1 .and. j < str%nNodes) then
+                xjm1 = str%x(:, j - 1)
+                xjp1 = str%x(:, j + 1)
+            else if (j == 1) then
+                xjm1 = str%x(:, str%nNodes)
+                xjp1 = str%x(:, j + 1)
+            else if (j == str%nNodes) then
+                xjm1 = str%x(:, j - 1)
+                xjp1 = str%x(:, 1)
+            end if
+        else
+            ! Not periodic. Assume the ends are concave. This will
+            ! forces checking if both the left and right are ok,
+            ! which since the leftOK and rightOK's default to
+            ! .True., it just checks the one triangle which is what
+            ! we want.
+            if (j == 1) then
+                checkLeft = .False.
+                concave = .True.
+            end if
+
+            if (j == str%nNodes) then
+                checkRight = .False.
+                concave = .True.
+            end if
 
-       w1 = x1- x3
-       w2 = y1- y3
+            if (checkLeft) &
+                xjm1 = str%x(:, j - 1)
+            if (checkRight) &
+                xjp1 = str%x(:, j + 1)
+        end if
 
-       s1 = (v2*w1 - v1*w2)/(v1*u2 - v2*u1)
-       s2 = (u1*w2 - u2*w1)/(u1*v2 - u2*v1)
+        if (checkLeft .and. checkRight) then
 
-       if (s1 > tol .and. s1 < one - tol .and. s2 > tol .and. s2 < one - tol) then
-          overlappedEdges2 = .True.
-          exit elemLoop
-       end if
-    end do elemLoop
+            ! Determine if the point is convex or concave provided
+            !  we have both neighbours.
+            call cross_prod(xjm1 - xj, xjp1 - xj, v)
 
-  end function overlappedEdges2
+            if (dot_product(v, normj) > zero) then
+                concave = .True.
+            end if
+        end if
+
+    end subroutine getNodeInfo
+
+    function nodeInFrontOfEdges(pt, concave, checkLeft, checkRight, xj, xjm1, xjp1, normj)
+
+        use constants
+        implicit none
+
+        real(kind=realType), dimension(3), intent(in) :: pt, xj, xjm1, xjp1, normj
+        logical, intent(in) :: concave, checkLeft, checkRight
+        logical :: nodeInFrontOfEdges
+        logical :: leftOK, rightoK
+
+        nodeInFrontOfEdges = .True.
+        leftOK = .True.
+        rightOK = .True.
+        if (checkLeft .and. .not. positiveTriArea(xj, xjm1, pt, normj)) then
+            leftOK = .False.
+        end if
+
+        if (checkRight .and. .not. positiveTriArea(xjp1, xj, pt, normj)) then
+            rightOK = .False.
+        end if
+
+        if (concave) then
+            if (.not. (leftOK .and. rightOK)) then
+                nodeInFrontofEdges = .False.
+            end if
+        else
+            if (.not. (leftOK .or. rightOK)) then
+                nodeInFrontOfEdges = .False.
+            end if
+        end if
+    end function nodeInFrontOfEdges
+
+    function overlappedEdges(str, j, pt)
+
+        use constants
+        use utils, only: mynorm2, cross_prod
+
+        implicit none
+
+        ! Input/output
+        real(kind=realType), dimension(3), intent(in) :: pt
+        type(oversetString), intent(in) :: str
+        integer(kind=intType), intent(in) :: j
+        logical :: overlappedEdges
+
+        ! Working
+        integer(kind=intType) :: i
+        real(kind=realType), dimension(3) :: v, p1, p2, u, normA, normB, x0, norm
+        real(kind=realType) :: uNrm, x1, x2, x3, x4, y1, y2, y3, y4, idet, Px, Py
+        real(kind=realType) :: u1, u2, v1, v2, w1, w2
+        real(kind=realType) :: s1, s2, tmp, line(2), vec(2), tol
+        overlappedEdges = .False.
+        tol = 1e-6
+        ! We will conver this completely into a 2D problem by projecting
+        ! everything onto the plane defined by norm. x0 is at the origin of
+        ! the 2D system and the xaxis point from x0 to pt
+
+        x0 = str%x(:, j)
+        norm = str%norm(:, j)
+
+        u = pt - x0
+        uNrm = mynorm2(u)
+        u = u / uNrm
+
+        call cross_prod(norm, u, v)
+        v = v / mynorm2(v)
+
+        ! Now u,v,norm is an orthogonal coordinate system
+        x1 = zero
+        y1 = zero
+        x2 = uNrm
+        y2 = zero
+        overLappedEdges = .False.
+
+        ! Loop over the number of edges on my string
+        elemLoop: do i = 1, str%nElems
+            ! Don't check the ones right next to me, since they will
+            ! "overlap" exactly at x0
+
+            if (str%conn(1, i) == j .or. str%conn(2, i) == j) then
+                cycle
+            end if
+
+            ! Project the two points into the plane
+            p1 = str%x(:, str%conn(1, i))
+            p2 = str%x(:, str%conn(2, i))
+
+            normA = str%norm(:, str%conn(1, i))
+            normB = str%norm(:, str%conn(2, i))
+
+            ! Make sure the edges are on the same plane, otherwise this is
+            ! meaningless
+            if (dot_product(normA, norm) < half .or. dot_product(normb, norm) < half) then
+                cycle
+            end if
+            ! Project the two points onto the plane
+            p1 = p1 - norm * dot_product(p1 - x0, norm)
+            p2 = p2 - norm * dot_product(p2 - x0, norm)
+
+            ! Now get the 2D coordinates
+            x3 = dot_product(p1 - x0, u)
+            y3 = dot_product(p1 - x0, v)
+            x4 = dot_product(p2 - x0, u)
+            y4 = dot_product(p2 - x0, v)
+
+            u1 = x2 - x1
+            y2 = y2 - y1
+
+            v1 = x4 - x3
+            v2 = y4 - y3
+
+            w1 = x1 - x3
+            w2 = y1 - y3
+
+            s1 = (v2 * w1 - v1 * w2) / (v1 * u2 - v2 * u1)
+            s2 = (u1 * w2 - u2 * w1) / (u1 * v2 - u2 * v1)
+
+            if (s1 > tol .and. s1 < one - tol .and. s2 > tol .and. s2 < one - tol) then
+                overlappedEdges = .True.
+                exit elemLoop
+            end if
+        end do elemLoop
+
+    end function overlappedEdges
+
+    function overlappedEdges2(str, pt1, norm, pt2)
+
+        use constants
+        use utils, only: mynorm2, cross_prod
+        implicit none
+
+        ! Input/output
+        real(kind=realType), dimension(3), intent(in) :: pt1, pt2, norm
+        type(oversetString), intent(in) :: str
+        logical :: overlappedEdges2
+
+        ! Working
+        integer(kind=intType) :: i
+        real(kind=realType), dimension(3) :: v, p1, p2, u, normA, normB, x0
+        real(kind=realType) :: uNrm, x1, x2, x3, x4, y1, y2, y3, y4, idet, Px, Py
+        real(kind=realType) :: u1, u2, v1, v2, w1, w2
+        real(kind=realType) :: s1, s2, tmp, line(2), vec(2), tol
+
+        tol = 1e-6
+        ! We will conver this completely into a 2D problem by projecting
+        ! everything onto the plane defined by norm. x0 is at the origin of
+        ! the 2D system and the xaxis point from x0 to pt
+
+        x0 = pt1
+
+        u = pt2 - x0
+        uNrm = mynorm2(u)
+        u = u / uNrm
+
+        call cross_prod(norm, u, v)
+        v = v / mynorm2(v)
+
+        ! Now u,v,norm is an orthogonal coordinate system
+        x1 = zero
+        y1 = zero
+        x2 = uNrm
+        y2 = zero
+        overLappedEdges2 = .False.
+
+        ! Loop over the number of edges on my string
+        elemLoop: do i = 1, str%nElems
+
+            ! Project the two points into the plane
+            p1 = str%x(:, str%conn(1, i))
+            p2 = str%x(:, str%conn(2, i))
+
+            normA = str%norm(:, str%conn(1, i))
+            normB = str%norm(:, str%conn(2, i))
+
+            ! Make sure the edges are on the same plane, otherwise this is
+            ! meaningless
+            if (dot_product(normA, norm) < half .or. dot_product(normb, norm) < half) then
+                cycle
+            end if
+            ! Project the two points onto the plane
+            p1 = p1 - norm * dot_product(p1 - x0, norm)
+            p2 = p2 - norm * dot_product(p2 - x0, norm)
+
+            ! Now get the 2D coordinates
+            x3 = dot_product(p1 - x0, u)
+            y3 = dot_product(p1 - x0, v)
+            x4 = dot_product(p2 - x0, u)
+            y4 = dot_product(p2 - x0, v)
+
+            u1 = x2 - x1
+            y2 = y2 - y1
+
+            v1 = x4 - x3
+            v2 = y4 - y3
+
+            w1 = x1 - x3
+            w2 = y1 - y3
+
+            s1 = (v2 * w1 - v1 * w2) / (v1 * u2 - v2 * u1)
+            s2 = (u1 * w2 - u2 * w1) / (u1 * v2 - u2 * v1)
+
+            if (s1 > tol .and. s1 < one - tol .and. s2 > tol .and. s2 < one - tol) then
+                overlappedEdges2 = .True.
+                exit elemLoop
+            end if
+        end do elemLoop
 
+    end function overlappedEdges2
 
 end module stringOps
diff --git a/src/overset/surfaceCorrection.F90 b/src/overset/surfaceCorrection.F90
index db83a72d8..25de8ed0a 100644
--- a/src/overset/surfaceCorrection.F90
+++ b/src/overset/surfaceCorrection.F90
@@ -1,174 +1,174 @@
 subroutine surfaceCorrection(oBlock, oFringe, offset, n)
 
-  use constants
-  use oversetData, only : oversetBlock, oversetFringe, oversetWall, clusterWalls
-  use adtData, only : adtBBoxTargetType
-  use adtLocalSearch, only : minDistanceTreeSearchSinglePoint
-  use kdtree2_module, onlY : kdtree2_result, kdtree2_n_nearest
-  use inputOverset, only : nearWallDist
-  use sorting, only : unique
-  use utils, only : myNorm2
-  use wallSearches, only : quadOverlap
-  implicit none
-
-  ! Input/Output
-  type(oversetBlock), intent(inout) :: oBlock
-  type(oversetFringe), intent(inout) :: oFringe
-  integer(kind=intType), intent(in) :: n
-  real(kind=realType), intent(out), dimension(3, n) :: offset
-
-  ! Working
-  integer(kind=intType) :: i, j, k, ii, jj, nInterpol
-  integer(kind=intType) :: cellID, idx, nUnique
-
-  integer(kind=intType), dimension(3) :: intInfoF, intInfoB
-  integer(kind=intType), dimension(4) :: nodesB, nodesF
-  real(kind=realType), dimension(3, 2) :: dummy
-  real(kind=realType), dimension(5) :: uvwF, uvwB
-  real(kind=realType), dimension(3) :: ptB, ptF, yy
-  real(kind=realType), dimension(4) :: weightsF, weightsB, xx
-  real(kind=realType) :: ratio, fact, distY, q1(3, 4), q2(3, 4), dB
-  type(kdtree2_result) :: results(1)
-  logical :: overlapped1, overlapped2, overlapped
-
-  integer(kind=intType), dimension(:), allocatable :: link, tmp
-  real(kind=realType), dimension(:, :), allocatable :: uniqueWallPts, masterOffset
-  ! Variables we have to pass the ADT search routine
-  integer(kind=intType), dimension(:), pointer :: frontLeaves
-  integer(kind=intType), dimension(:), pointer :: frontLeavesNew
-  type(adtBBoxTargetType), dimension(:), pointer :: BB
-  type(oversetWall), pointer :: bWall, fWall
-
-  ! Set pointers to walls (even if they are empty) for the cluster
-  ! containing the search block (bWall) and the cluster containing the
-  ! fringe pts (fWall)
-  bWall => clusterWalls(oBlock%cluster)
-  fWall => clusterWalls(oFringe%cluster)
-
-  ! Determine if we can make a quick exit:
-  if (bWall%nNodes == 0 .or. fWall%nNodes == 0) then
-     ! oBlock cluster or fringeCluster do not have walls. Cannot have
-     !  a surface-surface overlap!
-     return
-  end if
-
-  ! Allocate the (pointer) memory that may be resized as necessary for
-  ! the singlePoint search routine.
-  allocate(BB(10), frontLeaves(25), frontLeavesNew(25))
-
-  nInterpol = 0
-
-  ! Determine the surface points from our list of fringes. We have the
-  ! the list global indices, which essentially just serves to
-  ! determine a compact list of nodes to search for. We need to make a
-  ! tmp array since unique overwrites the array.
-  allocate(tmp(n), link(n))
-  tmp = oFringe%wallInd
-  call unique(tmp, n, nUnique, link)
-  allocate(uniqueWallPts(3, nUnique), masterOffset(3, nUnique))
-  masterOffset = zero
-
-  do i=1, n
-     uniqueWallPts(:, link(i)) = oFringe%xSeed(:, i)
-  end do
-
-  masterLoop: do ii=1, nUnique
-
-     ! The search point we are dealing with:
-     xx(1:3) = uniqueWallPts(:, ii)
-     xx(4) = large
-
-     ! Project the point onto the oBlock
-     call minDistanceTreeSearchSinglePoint(bWall%ADT, &
-          xx, intInfoB, uvwB, dummy, nInterpol, BB, frontLeaves, frontLeavesNew)
-     dB = sqrt(uvwB(4))
-
-     if ((uvwB(1) > zero .and. uvwB(1) < one .and. &
-          uvwB(2) > zero .and. uvwB(2) < one) .or. dB < nearWallDist) then
-
-        ! Extract the 4 nodes for this quad element
-        do k=1, 4
-           q1(:, k) = bWall%x(:, bWall%conn(k, intInfoB(3)))
-        end do
-
-        ! This is a little inefficient...what we want to do is
-        ! determine the 4 quads surrounding the point I'm looking
-        ! for. Use the KDTree to determine the index of the node in
-        ! question, then use the nToElem pointer to get the 4 quads
-        ! surrounding my node.
-        call kdtree2_n_nearest(fWall%tree, xx(1:3), 1, results)
-
-        idx = results(1)%idx ! Node index on fWall
-
-        overlapped1 = .False.
-        overlapped2 = .False.
-        overlapped = .False.
-
-        ! Now loop over (up to 4) of the quads surrounding this node:
-        quadLoop: do j=1, 4
-
-           cellID = fWall%nte(j, idx)
-           if (cellID > 0) then
-
-              do k=1, 4
-                 q2(:, k) = fWall%x(:, fWall%conn(k, cellID))
-              end do
-
-              ! Now see if the two quads overlap in the flat sense
-              call quadOverlap(q1, q2, overlapped1)
-              overlapped2 = .False.
-              if (dB < nearWallDist) then
-                 overlapped2 = .True.
-              end if
-
-              if (overlapped1 .and. overlapped2) then
-                 overlapped = .True.
-                 exit quadLoop
-              end if
-           end if
-        end do quadLoop
-
-        if (overlapped) then
-
-           nodesB = bWall%conn(:, intInfoB(3))
-           call getWeights(uvwB(1:2), weightsB)
-
-           ptB = zero
-           do j=1,4
-              ptB = ptB + weightsB(j)*bWall%x(:, nodesB(j))
-           end do
-
-           ! Now set the offset for the wall.
-           masterOffset(:, ii) =  ptB - xx(1:3)
+    use constants
+    use oversetData, only: oversetBlock, oversetFringe, oversetWall, clusterWalls
+    use adtData, only: adtBBoxTargetType
+    use adtLocalSearch, only: minDistanceTreeSearchSinglePoint
+    use kdtree2_module, onlY: kdtree2_result, kdtree2_n_nearest
+    use inputOverset, only: nearWallDist
+    use sorting, only: unique
+    use utils, only: myNorm2
+    use wallSearches, only: quadOverlap
+    implicit none
+
+    ! Input/Output
+    type(oversetBlock), intent(inout) :: oBlock
+    type(oversetFringe), intent(inout) :: oFringe
+    integer(kind=intType), intent(in) :: n
+    real(kind=realType), intent(out), dimension(3, n) :: offset
+
+    ! Working
+    integer(kind=intType) :: i, j, k, ii, jj, nInterpol
+    integer(kind=intType) :: cellID, idx, nUnique
+
+    integer(kind=intType), dimension(3) :: intInfoF, intInfoB
+    integer(kind=intType), dimension(4) :: nodesB, nodesF
+    real(kind=realType), dimension(3, 2) :: dummy
+    real(kind=realType), dimension(5) :: uvwF, uvwB
+    real(kind=realType), dimension(3) :: ptB, ptF, yy
+    real(kind=realType), dimension(4) :: weightsF, weightsB, xx
+    real(kind=realType) :: ratio, fact, distY, q1(3, 4), q2(3, 4), dB
+    type(kdtree2_result) :: results(1)
+    logical :: overlapped1, overlapped2, overlapped
+
+    integer(kind=intType), dimension(:), allocatable :: link, tmp
+    real(kind=realType), dimension(:, :), allocatable :: uniqueWallPts, masterOffset
+    ! Variables we have to pass the ADT search routine
+    integer(kind=intType), dimension(:), pointer :: frontLeaves
+    integer(kind=intType), dimension(:), pointer :: frontLeavesNew
+    type(adtBBoxTargetType), dimension(:), pointer :: BB
+    type(oversetWall), pointer :: bWall, fWall
+
+    ! Set pointers to walls (even if they are empty) for the cluster
+    ! containing the search block (bWall) and the cluster containing the
+    ! fringe pts (fWall)
+    bWall => clusterWalls(oBlock%cluster)
+    fWall => clusterWalls(oFringe%cluster)
+
+    ! Determine if we can make a quick exit:
+    if (bWall%nNodes == 0 .or. fWall%nNodes == 0) then
+        ! oBlock cluster or fringeCluster do not have walls. Cannot have
+        !  a surface-surface overlap!
+        return
+    end if
+
+    ! Allocate the (pointer) memory that may be resized as necessary for
+    ! the singlePoint search routine.
+    allocate (BB(10), frontLeaves(25), frontLeavesNew(25))
+
+    nInterpol = 0
+
+    ! Determine the surface points from our list of fringes. We have the
+    ! the list global indices, which essentially just serves to
+    ! determine a compact list of nodes to search for. We need to make a
+    ! tmp array since unique overwrites the array.
+    allocate (tmp(n), link(n))
+    tmp = oFringe%wallInd
+    call unique(tmp, n, nUnique, link)
+    allocate (uniqueWallPts(3, nUnique), masterOffset(3, nUnique))
+    masterOffset = zero
+
+    do i = 1, n
+        uniqueWallPts(:, link(i)) = oFringe%xSeed(:, i)
+    end do
+
+    masterLoop: do ii = 1, nUnique
+
+        ! The search point we are dealing with:
+        xx(1:3) = uniqueWallPts(:, ii)
+        xx(4) = large
+
+        ! Project the point onto the oBlock
+        call minDistanceTreeSearchSinglePoint(bWall%ADT, &
+                                              xx, intInfoB, uvwB, dummy, nInterpol, BB, frontLeaves, frontLeavesNew)
+        dB = sqrt(uvwB(4))
+
+        if ((uvwB(1) > zero .and. uvwB(1) < one .and. &
+             uvwB(2) > zero .and. uvwB(2) < one) .or. dB < nearWallDist) then
+
+            ! Extract the 4 nodes for this quad element
+            do k = 1, 4
+                q1(:, k) = bWall%x(:, bWall%conn(k, intInfoB(3)))
+            end do
+
+            ! This is a little inefficient...what we want to do is
+            ! determine the 4 quads surrounding the point I'm looking
+            ! for. Use the KDTree to determine the index of the node in
+            ! question, then use the nToElem pointer to get the 4 quads
+            ! surrounding my node.
+            call kdtree2_n_nearest(fWall%tree, xx(1:3), 1, results)
+
+            idx = results(1)%idx ! Node index on fWall
+
+            overlapped1 = .False.
+            overlapped2 = .False.
+            overlapped = .False.
+
+            ! Now loop over (up to 4) of the quads surrounding this node:
+            quadLoop: do j = 1, 4
+
+                cellID = fWall%nte(j, idx)
+                if (cellID > 0) then
+
+                    do k = 1, 4
+                        q2(:, k) = fWall%x(:, fWall%conn(k, cellID))
+                    end do
+
+                    ! Now see if the two quads overlap in the flat sense
+                    call quadOverlap(q1, q2, overlapped1)
+                    overlapped2 = .False.
+                    if (dB < nearWallDist) then
+                        overlapped2 = .True.
+                    end if
+
+                    if (overlapped1 .and. overlapped2) then
+                        overlapped = .True.
+                        exit quadLoop
+                    end if
+                end if
+            end do quadLoop
+
+            if (overlapped) then
+
+                nodesB = bWall%conn(:, intInfoB(3))
+                call getWeights(uvwB(1:2), weightsB)
+
+                ptB = zero
+                do j = 1, 4
+                    ptB = ptB + weightsB(j) * bWall%x(:, nodesB(j))
+                end do
+
+                ! Now set the offset for the wall.
+                masterOffset(:, ii) = ptB - xx(1:3)
+            end if
         end if
-     end if
-  end do masterLoop
+    end do masterLoop
 
-  ! Now that we've determined the number of surface offsets, we can
-  ! loop back throught he actual nodes and set the wall offset.
+    ! Now that we've determined the number of surface offsets, we can
+    ! loop back throught he actual nodes and set the wall offset.
 
-  do ii=1, n
-     ! Attenuate the offset over nearWallDist
-     distY = mynorm2(oFringe%x(:, ii) - uniqueWallPts(:, link(ii)))
-     ratio = distY / nearWallDist
-     fact = max(one - ratio**3, zero)
-     offset(:, ii) = offset(:, ii) + fact*masterOffset(:, link(ii))
-  end do
+    do ii = 1, n
+        ! Attenuate the offset over nearWallDist
+        distY = mynorm2(oFringe%x(:, ii) - uniqueWallPts(:, link(ii)))
+        ratio = distY / nearWallDist
+        fact = max(one - ratio**3, zero)
+        offset(:, ii) = offset(:, ii) + fact * masterOffset(:, link(ii))
+    end do
 
-  ! Make sure to clean up the pointer allocations
-  deallocate(BB, frontLeaves, frontLeavesNew)
-  deallocate(uniqueWallPts, masterOffset, tmp, link)
+    ! Make sure to clean up the pointer allocations
+    deallocate (BB, frontLeaves, frontLeavesNew)
+    deallocate (uniqueWallPts, masterOffset, tmp, link)
 
 contains
-  subroutine getWeights(uv, weights)
-    use constants
-    implicit none
-
-    real(kind=realType), intent(in) :: uv(2)
-    real(kind=realType), intent(out) :: weights(4)
-    weights(1) = (one - uv(1))*(one - uv(2))
-    weights(2) = (      uv(1))*(one - uv(2))
-    weights(3) = (      uv(1))*(      uv(2))
-    weights(4) = (one - uv(1))*(      uv(2))
-  end subroutine getWeights
+    subroutine getWeights(uv, weights)
+        use constants
+        implicit none
+
+        real(kind=realType), intent(in) :: uv(2)
+        real(kind=realType), intent(out) :: weights(4)
+        weights(1) = (one - uv(1)) * (one - uv(2))
+        weights(2) = (uv(1)) * (one - uv(2))
+        weights(3) = (uv(1)) * (uv(2))
+        weights(4) = (one - uv(1)) * (uv(2))
+    end subroutine getWeights
 end subroutine surfaceCorrection
diff --git a/src/overset/wallSearch.F90 b/src/overset/wallSearch.F90
index 89122e417..6c59d7143 100644
--- a/src/overset/wallSearch.F90
+++ b/src/overset/wallSearch.F90
@@ -1,425 +1,424 @@
 module wallSearches
 contains
-  subroutine wallSearch(aSurf, bSurf)
-
-    use constants
-    use oversetData, only : oversetWall, clusterAreas
-    use inputOverset, only : nearWallDist
-    use adtLocalSearch, only : minDistanceTreeSearchSinglePoint
-    use adtData, only : adtBBoxTargetType, adtLeafType
-    use adtUtils, only : stack
-    use utils, only : mynorm2
-    implicit none
-
-    ! Input/Output
-    type(oversetWall), intent(inout) :: aSurf, bSurf
-
-    ! Working Varaibles
-    integer(kind=intType) :: i, jj, k, iElem, maxLevels, nNeighbours, nOtherElem, iOther, otherElem
-    integer(kind=intType) :: nInterpol, elemID, intInfo(3), factor, jelem, otherElems(4)
-    real(kind=realType) :: uvw(5), xx(4), dist, q1(3, 4), q2(3, 4), delta, radius1, radius2
-
-    ! Variables we have to pass the ADT search routine
-    integer(kind=intType), dimension(:), pointer :: frontLeaves
-    integer(kind=intType), dimension(:), pointer :: frontLeavesNew
-    type(adtBBoxTargetType), dimension(:), pointer :: BB
-    real(kind=realType),   dimension(3,2) :: dummy
-    integer(kind=intType), dimension(:), allocatable :: tmpCellArr
-    integer(kind=intType), dimension(:), allocatable :: tmpNodeElem
-    integer(kind=intType), dimension(:), allocatable :: mask
-    type(adtLeafType), dimension(:), pointer :: ADTree
-
-    logical :: overlapped
-
-    if (aSurf%nCells == 0 .or. bSurf%nCells == 0) then
-       ! Either block doesn't have walls, so there is nothing do but just
-       ! return.
-       return
-    end if
-
-    if (clusterAreas(bSurf%cluster) < clusterAreas(aSurf%cluster)) then
-       ! B is smaller so we don't need to do anything
-       return
-    else if (clusterAreas(bSurf%cluster) == clusterAreas(aSurf%cluster)) then 
-       ! If the areas are *exactly* equal (which can happen when we have
-       ! replicated grids) cut by the cluster index.
-       
-       if (bSurf%cluster < aSurf%cluster) then 
-          return 
-       end if
-    end if
-    nInterpol = 0
-    ! Allocate the (pointer) memory that may be resized as necessary for
-    ! the singlePoint search routine.
-    allocate(BB(10), frontLeaves(25), frontLeavesNew(25), stack(100))
-
-    ! Basically what we are doing it looping all of our bSurf NODES. We
-    ! use a special "surface containment search". Essentially all we are
-    ! looking for is if a point it inside of of an actual element
-    ! BBox. If it isn't inside any BBox then we know it it can't
-    ! overlap. This is essentialy fast cull of the majority of panels so
-    ! we can later just focus on the ones that may actually overlap.
-    ! node as being blanked
-
-    ! Start with a max 10 layers (each with an unreduced 8 cells)
-    maxLevels = 1
-    allocate(tmpCellArr(3*3), mask(aSurf%nCells), tmpNodeElem(bSurf%nNodes))
-    tmpNodeElem(:) = 0
-    mask = 0
-    ADTree => aSurf%ADT%ADTree
-
-    do i=1, bSurf%nNodes
-
-       xx(1:3) = bSurf%x(:, i)
-       xx(4) = large
-
-       ! Just check if it is inside the root bounding box..ie the full
-       ! bounding box of the surface. This is would appear conservative,
-       ! but isn't good enough. We need to expand by nearWallDist since
-       ! it is possible a overlap occurs right at the edge of the
-       ! bounding box.
-       if(xx(1) >= ADTree(1)%xMin(1) - nearWallDist .and. &
-            xx(1) <= ADTree(1)%xMax(4)  + nearWallDist .and. &
-            xx(2) >= ADTree(1)%xMin(2) - nearWallDist .and. &
-            xx(2) <= ADTree(1)%xMax(5) + nearWallDist .and. &
-            xx(3) >= ADTree(1)%xMin(3) - nearWallDist .and. &
-            xx(3) <= ADTree(1)%xMax(6) + nearWallDist) then
-
-          ! Now find the closest element on the other mesh for this
-          ! node. This is the regular (expensive) closest point search
-
-          call minDistanceTreeSearchSinglePoint(aSurf%ADT, xx, intInfo, uvw, &
-               dummy, nInterpol, BB, frontLeaves, frontLeavesNew)
-          elemID = intInfo(3)
-          tmpNodeElem(i) = elemID
-       end if
-    end do
-
-    ! Loop over the cells now since this is eventually want we need to blank out:
-    cellLoop: do i=1,bSurf%nCells
-
-       ! Extract out the elems found on the other mesh for the 4 nodes
-       ! on my element. There could be none, 1 or up to 4 other elements.
-
-       nOtherElem = 0
-       do jj=1, 4
-          otherElem = tmpNodeElem(bSurf%conn(jj, i))
-          if (otherElem /= 0) then
-             nOtherElem = nOtherElem + 1
-             otherElems(nOtherElem) = otherElem
-          end if
-       end do
-
-       ! Get the coordinates of my quad
-       do jj=1,4
-          q1(:, jj) = bSurf%x(:, bSurf%conn(jj, i))
-       end do
-
-       do iOther=1, nOtherElem
-
-          elemID = otherElems(iOther)
-
-          ! Get coordinates of the other (found) quad
-          do jj=1,4
-             q2(:, jj) = aSurf%x(:, aSurf%conn(jj, elemID))
-          end do
-
-          ! Do a quick check of the cell itself. If it overlaps,
-          ! we're done and don't need to deal with neighbor cells at
-          ! all.
-          call quadOverlap(q1, q2, overlapped)
-          if (overlapped) then
-             bSurf%iBlank(bSurf%cellPtr(i)) = -2
-
-             ! No need to do anything else
-             cycle CellLoop
-          end if
-
-          ! Otherwise, we need to do more work.
-          radius1 = getCellRadius(q1)
-          radius2 = getCellRadius(q2)
-
-          ! We technically only should only need to add 1 here, but
-          ! to be safer, we'll have at least two layers to check.
-          factor = int(radius1/radius2) + 2
-
-          if (factor > maxLevels) then
-             deallocate(tmpCellArr)
-             maxLevels = factor
-             allocate(tmpCellArr((1+2*maxLevels)**2))
-          end if
-
-          ! This is where it gets interesing: We can determine the
-          ! number of recursive radiating layers we need to check
-          ! based on the relative size of the the two quads.
-
-          ! Now for the fun part: Recursion!
-          nNeighbours = 0
-          call getNeighbourCells(aSurf, mask, elemID, factor, tmpCellArr, nNeighbours)
-
-          ! Now just blindly check them until we run out of find an overlapped one:
-
-          do iElem = 1, nNeighbours
-             elemID = tmpCellArr(iElem)
-
-             ! Return the mask for this elem back to 0
-             mask(elemID) = 0
-
-             ! Get coordinates of the other quad
-             do jj=1,4
-                q2(:, jj) = aSurf%x(:, aSurf%conn(jj, elemID))
-             end do
-
-             ! Do the actual overlap calc for the found cell:
-             call quadOverlap(q1, q2, overlapped)
-
-             if (overlapped) then
-                bSurf%iBlank(bSurf%cellPtr(i)) = -2
-
-                ! No need to do anything else, but we we do need to
-                ! flip all the mask elements back for the next
-                ! iteration of cellLoop
-
-                do jElem=iElem+1, nNeighbours
-                   mask(tmpCellArr(jElem)) = 0
+    subroutine wallSearch(aSurf, bSurf)
+
+        use constants
+        use oversetData, only: oversetWall, clusterAreas
+        use inputOverset, only: nearWallDist
+        use adtLocalSearch, only: minDistanceTreeSearchSinglePoint
+        use adtData, only: adtBBoxTargetType, adtLeafType
+        use adtUtils, only: stack
+        use utils, only: mynorm2
+        implicit none
+
+        ! Input/Output
+        type(oversetWall), intent(inout) :: aSurf, bSurf
+
+        ! Working Varaibles
+        integer(kind=intType) :: i, jj, k, iElem, maxLevels, nNeighbours, nOtherElem, iOther, otherElem
+        integer(kind=intType) :: nInterpol, elemID, intInfo(3), factor, jelem, otherElems(4)
+        real(kind=realType) :: uvw(5), xx(4), dist, q1(3, 4), q2(3, 4), delta, radius1, radius2
+
+        ! Variables we have to pass the ADT search routine
+        integer(kind=intType), dimension(:), pointer :: frontLeaves
+        integer(kind=intType), dimension(:), pointer :: frontLeavesNew
+        type(adtBBoxTargetType), dimension(:), pointer :: BB
+        real(kind=realType), dimension(3, 2) :: dummy
+        integer(kind=intType), dimension(:), allocatable :: tmpCellArr
+        integer(kind=intType), dimension(:), allocatable :: tmpNodeElem
+        integer(kind=intType), dimension(:), allocatable :: mask
+        type(adtLeafType), dimension(:), pointer :: ADTree
+
+        logical :: overlapped
+
+        if (aSurf%nCells == 0 .or. bSurf%nCells == 0) then
+            ! Either block doesn't have walls, so there is nothing do but just
+            ! return.
+            return
+        end if
+
+        if (clusterAreas(bSurf%cluster) < clusterAreas(aSurf%cluster)) then
+            ! B is smaller so we don't need to do anything
+            return
+        else if (clusterAreas(bSurf%cluster) == clusterAreas(aSurf%cluster)) then
+            ! If the areas are *exactly* equal (which can happen when we have
+            ! replicated grids) cut by the cluster index.
+
+            if (bSurf%cluster < aSurf%cluster) then
+                return
+            end if
+        end if
+        nInterpol = 0
+        ! Allocate the (pointer) memory that may be resized as necessary for
+        ! the singlePoint search routine.
+        allocate (BB(10), frontLeaves(25), frontLeavesNew(25), stack(100))
+
+        ! Basically what we are doing it looping all of our bSurf NODES. We
+        ! use a special "surface containment search". Essentially all we are
+        ! looking for is if a point it inside of of an actual element
+        ! BBox. If it isn't inside any BBox then we know it it can't
+        ! overlap. This is essentialy fast cull of the majority of panels so
+        ! we can later just focus on the ones that may actually overlap.
+        ! node as being blanked
+
+        ! Start with a max 10 layers (each with an unreduced 8 cells)
+        maxLevels = 1
+        allocate (tmpCellArr(3 * 3), mask(aSurf%nCells), tmpNodeElem(bSurf%nNodes))
+        tmpNodeElem(:) = 0
+        mask = 0
+        ADTree => aSurf%ADT%ADTree
+
+        do i = 1, bSurf%nNodes
+
+            xx(1:3) = bSurf%x(:, i)
+            xx(4) = large
+
+            ! Just check if it is inside the root bounding box..ie the full
+            ! bounding box of the surface. This is would appear conservative,
+            ! but isn't good enough. We need to expand by nearWallDist since
+            ! it is possible a overlap occurs right at the edge of the
+            ! bounding box.
+            if (xx(1) >= ADTree(1)%xMin(1) - nearWallDist .and. &
+                xx(1) <= ADTree(1)%xMax(4) + nearWallDist .and. &
+                xx(2) >= ADTree(1)%xMin(2) - nearWallDist .and. &
+                xx(2) <= ADTree(1)%xMax(5) + nearWallDist .and. &
+                xx(3) >= ADTree(1)%xMin(3) - nearWallDist .and. &
+                xx(3) <= ADTree(1)%xMax(6) + nearWallDist) then
+
+                ! Now find the closest element on the other mesh for this
+                ! node. This is the regular (expensive) closest point search
+
+                call minDistanceTreeSearchSinglePoint(aSurf%ADT, xx, intInfo, uvw, &
+                                                      dummy, nInterpol, BB, frontLeaves, frontLeavesNew)
+                elemID = intInfo(3)
+                tmpNodeElem(i) = elemID
+            end if
+        end do
+
+        ! Loop over the cells now since this is eventually want we need to blank out:
+        cellLoop: do i = 1, bSurf%nCells
+
+            ! Extract out the elems found on the other mesh for the 4 nodes
+            ! on my element. There could be none, 1 or up to 4 other elements.
+
+            nOtherElem = 0
+            do jj = 1, 4
+                otherElem = tmpNodeElem(bSurf%conn(jj, i))
+                if (otherElem /= 0) then
+                    nOtherElem = nOtherElem + 1
+                    otherElems(nOtherElem) = otherElem
+                end if
+            end do
+
+            ! Get the coordinates of my quad
+            do jj = 1, 4
+                q1(:, jj) = bSurf%x(:, bSurf%conn(jj, i))
+            end do
+
+            do iOther = 1, nOtherElem
+
+                elemID = otherElems(iOther)
+
+                ! Get coordinates of the other (found) quad
+                do jj = 1, 4
+                    q2(:, jj) = aSurf%x(:, aSurf%conn(jj, elemID))
                 end do
 
-                cycle CellLoop
-
-             end if
-          end do
-       end do
-    end do cellLoop
-
-    deallocate(BB, frontLeaves, frontLeavesNew, stack, tmpCellArr, mask)
-
-  end subroutine wallSearch
-
-  recursive subroutine getNeighbourCells(aSurf, mask, baseElemID, layers, elemList, nElemFound)
-
-    ! This routine recursively assembles a list all neighbours within
-    ! "layers" of the the baseELemID. The elemList is sorted such that
-    ! there are no duplicates:
-
-    use constants
-    use oversetData, only : oversetWall
-    implicit none
-
-    ! Input/Output
-    type(oversetWall), intent(inout) :: aSurf
-    integer(kind=intType), intent(inout), dimension(:) :: mask, elemList
-    integer(kind=intType), intent(in) :: baseElemID, layers
-    integer(kind=intType), intent(inout) :: nElemFound
-
-    ! Working
-    integer(kind=intType) :: i, iNode, iCell, curElem
-
-    ! The recusive chain ends when layers == 0
-    if (layers == 0) then
-       return
-    end if
-
-    ! Loop over the nodes of the given quad:
-    do i=1, 4
-       iNode = aSurf%conn(i, baseElemID)
-
-       ! Loop over the (up to 4) cells surrounding this node use the
-       ! node->elem (nte) array
-       do iCell=1,4
-          curElem = aSurf%nte(iCell, iNode)
-          if (curElem /= 0) then
-             ! This is a real cell:
-
-             if (mask(curElem) /= baseElemID .and. mask(curElem) == 0) then
-                ! we know we don't need to add the baseElemID and if its
-                ! already in the mask we don't have to do anything either
-
-                nElemFound = nElemFound + 1
-                elemList(nElemFound) = curElem
-                mask(curElem) = 1
-                ! Now recursively call again, with the baseElement of curElem and 1 fewer levels
-                call getNeighbourCells(aSurf, mask, curElem, layers-1, elemList, nElemFound)
-             end if
-          end if
-       end do
-    end do
-  end subroutine getNeighbourCells
-
-  subroutine quadOverlap(q1, q2, overlapped)
-    ! Given two quad in *3D* determine if they overlap using the
-    ! separation axis theorem after projecting onto the plane defined by
-    ! the cell normal. Check both normals from each quad.
-
-    use constants
-    use utils, only : mynorm2, cross_prod
-
-    implicit none
-
-    ! input/output
-    real(kind=realType), dimension(3, 4), intent(in) :: q1, q2
-    logical , intent(out) :: overlapped
-
-    ! Working
-    integer(kind=intType) :: ii, jj
-    real(kind=realType), dimension(2, 4) :: qq1, qq2
-    real(kind=realType), dimension(3) :: axis1, axis2, n1, n2, normal, v1, v2, c1, c2
-    real(kind=realType) :: e1, e2
-    ! Check distance between cell centers
-    c1 = zero
-    c2 = zero
-    do ii = 1,4
-       c1 = c1 + fourth*q1(:,ii)
-       c2 = c2 + fourth*q2(:,ii)
-    end do
-
-    ! Get get max distance between center and node:
-    e1 = zero
-    e2 = zero
-    do ii=1,4
-       e1 = max(e1, mynorm2(c1 - q1(:, ii)))
-       e2 = max(e2, mynorm2(c2 - q2(:, ii)))
-    end do
-
-    ! Check if distance between cell center sid beyond the threshold
-    if (mynorm2(c1-c2) .ge. (e1 + e2)) then
-       overlapped = .False.
-       return
-    end if
-
-    ! The two quads *may* be overlapped. We have to do it hard way.
-
-    ! Normal of first quad
-    v1 = q1(:, 3) - q1(:, 1)
-    v2 = q1(:, 4) - q1(:, 2)
-    call cross_prod(v1, v2, n1)
-    n1 = n1 / mynorm2(n1)
-
-    ! Normal of second quad
-    v1 = q2(:, 3) - q2(:, 1)
-    v2 = q2(:, 4) - q2(:, 2)
-    call cross_prod(v1, v2, n2)
-    n2 = n2/mynorm2(n2)
-
-    ! f the normals are not in the same direction, must be a thin
-    ! surface.
-    if (dot_product(n1, n2) < zero) then
-       overlapped = .False.
-       return
-    end if
-
-    do ii=1, 2
-       if (ii == 1) then
-          normal = n1
-          axis1 = q1(:, 2) - q1(:, 1)
-       else
-          normal = n2
-          axis1 = q2(:, 2) - q2(:, 1)
-       end if
-
-       ! Project axis1 onto the plane and normalize
-       axis1 = axis1 - dot_product(axis1, normal)*normal
-       axis1 = axis1/mynorm2(axis1)
-
-       ! Axis 2 is now the normal cross axis1
-       call cross_prod(normal, axis1, axis2)
-       axis2 = axis2/mynorm2(axis2)
-
-       do jj=1, 4
-          qq1(1, jj) = dot_product(axis1, q1(:, jj))
-          qq1(2, jj) = dot_product(axis2, q1(:, jj))
-
-          qq2(1, jj) = dot_product(axis1, q2(:, jj))
-          qq2(2, jj) = dot_product(axis2, q2(:, jj))
-       end do
-       call quadOverlap2D(qq1, qq2, overlapped)
-
-       if (overlapped) then
-          return
-       end if
-    end do
-
-  end subroutine quadOverlap
-
-  subroutine quadOverlap2D(q1, q2, overlapped)
-    ! Given two quad in *2D* determine if they overlap using the
-    ! separation axis theorem
-
-    use constants
-    implicit none
-
-    ! input/output
-    real(kind=realType), dimension(2, 4), intent(in) :: q1, q2
-    logical , intent(out) :: overlapped
-
-    ! Working
-    real(kind=realType), dimension(4) :: tmp1, tmp2
-    integer(kind=intType) :: ii, jj, kk, jjp1
-    real(kind=realType), dimension(2) :: axis, p0
-    real(kind=realType) :: min1, max1, min2, max2
-    overlapped = .True.
-    tmp1 = zero
-    tmp2 = zero
-    quadLoop: do ii=1, 2 ! Loop over the two quads
-       edgeLoop: do jj=1, 4 ! Loop over the edges of each quad
-          jjp1 = mod(jj, 4)+1
-
-          if (ii == 1) then
-             axis = q1(:, jjp1) - q1(:, jj)
-             p0 = q1(:, jj)
-          else
-             axis = q2(:, jjp1) - q2(:, jj)
-             p0 = q2(:, jj)
-          end if
-
-          ! Take the axis normal
-          axis = (/axis(2), -axis(1)/)
-
-          ! Take the dot products
-          do kk=1,4
-             tmp1(kk) = dot_product(axis, q1(:, kk) - p0)
-             tmp2(kk) = dot_product(axis, q2(:, kk) - p0)
-          end do
-
-          min1 = minval(tmp1)
-          max1 = maxval(tmp1)
-
-          min2 = minval(tmp2)
-          max2 = maxval(tmp2)
-
-          if (max1 < min2 .or. max2 < min1) then
-             overlapped = .False.
-             ! We can just jump right out since we know they cannot
-             ! overlap.
-             exit quadLoop
-          end if
-       end do edgeLoop
-    end do quadLoop
-  end subroutine quadOverlap2D
-
-  function getCellRadius(q)
-
-    use constants
-    use utils, only : mynorm2
-    implicit none
-    ! Input
-    real(kind=realType), dimension(3, 4) :: q
-    real(kind=realType) :: getCellRadius
-
-    ! Working
-    real(kind=realType) :: c(3)
-    integer(kind=intType) :: ii
-
-    c = zero
-    do ii=1, 4
-       c = c + fourth*q(:, ii)
-    end do
-
-
-    getCellRadius = zero
-    do ii=1, 4
-       getCellRadius = max(getCellRadius, mynorm2(c - q(:, ii)))
-    end do
-
-  end function getCellRadius
+                ! Do a quick check of the cell itself. If it overlaps,
+                ! we're done and don't need to deal with neighbor cells at
+                ! all.
+                call quadOverlap(q1, q2, overlapped)
+                if (overlapped) then
+                    bSurf%iBlank(bSurf%cellPtr(i)) = -2
+
+                    ! No need to do anything else
+                    cycle CellLoop
+                end if
+
+                ! Otherwise, we need to do more work.
+                radius1 = getCellRadius(q1)
+                radius2 = getCellRadius(q2)
+
+                ! We technically only should only need to add 1 here, but
+                ! to be safer, we'll have at least two layers to check.
+                factor = int(radius1 / radius2) + 2
+
+                if (factor > maxLevels) then
+                    deallocate (tmpCellArr)
+                    maxLevels = factor
+                    allocate (tmpCellArr((1 + 2 * maxLevels)**2))
+                end if
+
+                ! This is where it gets interesing: We can determine the
+                ! number of recursive radiating layers we need to check
+                ! based on the relative size of the the two quads.
+
+                ! Now for the fun part: Recursion!
+                nNeighbours = 0
+                call getNeighbourCells(aSurf, mask, elemID, factor, tmpCellArr, nNeighbours)
+
+                ! Now just blindly check them until we run out of find an overlapped one:
+
+                do iElem = 1, nNeighbours
+                    elemID = tmpCellArr(iElem)
+
+                    ! Return the mask for this elem back to 0
+                    mask(elemID) = 0
+
+                    ! Get coordinates of the other quad
+                    do jj = 1, 4
+                        q2(:, jj) = aSurf%x(:, aSurf%conn(jj, elemID))
+                    end do
+
+                    ! Do the actual overlap calc for the found cell:
+                    call quadOverlap(q1, q2, overlapped)
+
+                    if (overlapped) then
+                        bSurf%iBlank(bSurf%cellPtr(i)) = -2
+
+                        ! No need to do anything else, but we we do need to
+                        ! flip all the mask elements back for the next
+                        ! iteration of cellLoop
+
+                        do jElem = iElem + 1, nNeighbours
+                            mask(tmpCellArr(jElem)) = 0
+                        end do
+
+                        cycle CellLoop
+
+                    end if
+                end do
+            end do
+        end do cellLoop
+
+        deallocate (BB, frontLeaves, frontLeavesNew, stack, tmpCellArr, mask)
+
+    end subroutine wallSearch
+
+    recursive subroutine getNeighbourCells(aSurf, mask, baseElemID, layers, elemList, nElemFound)
+
+        ! This routine recursively assembles a list all neighbours within
+        ! "layers" of the the baseELemID. The elemList is sorted such that
+        ! there are no duplicates:
+
+        use constants
+        use oversetData, only: oversetWall
+        implicit none
+
+        ! Input/Output
+        type(oversetWall), intent(inout) :: aSurf
+        integer(kind=intType), intent(inout), dimension(:) :: mask, elemList
+        integer(kind=intType), intent(in) :: baseElemID, layers
+        integer(kind=intType), intent(inout) :: nElemFound
+
+        ! Working
+        integer(kind=intType) :: i, iNode, iCell, curElem
+
+        ! The recusive chain ends when layers == 0
+        if (layers == 0) then
+            return
+        end if
+
+        ! Loop over the nodes of the given quad:
+        do i = 1, 4
+            iNode = aSurf%conn(i, baseElemID)
+
+            ! Loop over the (up to 4) cells surrounding this node use the
+            ! node->elem (nte) array
+            do iCell = 1, 4
+                curElem = aSurf%nte(iCell, iNode)
+                if (curElem /= 0) then
+                    ! This is a real cell:
+
+                    if (mask(curElem) /= baseElemID .and. mask(curElem) == 0) then
+                        ! we know we don't need to add the baseElemID and if its
+                        ! already in the mask we don't have to do anything either
+
+                        nElemFound = nElemFound + 1
+                        elemList(nElemFound) = curElem
+                        mask(curElem) = 1
+                        ! Now recursively call again, with the baseElement of curElem and 1 fewer levels
+                        call getNeighbourCells(aSurf, mask, curElem, layers - 1, elemList, nElemFound)
+                    end if
+                end if
+            end do
+        end do
+    end subroutine getNeighbourCells
+
+    subroutine quadOverlap(q1, q2, overlapped)
+        ! Given two quad in *3D* determine if they overlap using the
+        ! separation axis theorem after projecting onto the plane defined by
+        ! the cell normal. Check both normals from each quad.
+
+        use constants
+        use utils, only: mynorm2, cross_prod
+
+        implicit none
+
+        ! input/output
+        real(kind=realType), dimension(3, 4), intent(in) :: q1, q2
+        logical, intent(out) :: overlapped
+
+        ! Working
+        integer(kind=intType) :: ii, jj
+        real(kind=realType), dimension(2, 4) :: qq1, qq2
+        real(kind=realType), dimension(3) :: axis1, axis2, n1, n2, normal, v1, v2, c1, c2
+        real(kind=realType) :: e1, e2
+        ! Check distance between cell centers
+        c1 = zero
+        c2 = zero
+        do ii = 1, 4
+            c1 = c1 + fourth * q1(:, ii)
+            c2 = c2 + fourth * q2(:, ii)
+        end do
+
+        ! Get get max distance between center and node:
+        e1 = zero
+        e2 = zero
+        do ii = 1, 4
+            e1 = max(e1, mynorm2(c1 - q1(:, ii)))
+            e2 = max(e2, mynorm2(c2 - q2(:, ii)))
+        end do
+
+        ! Check if distance between cell center sid beyond the threshold
+        if (mynorm2(c1 - c2) .ge. (e1 + e2)) then
+            overlapped = .False.
+            return
+        end if
+
+        ! The two quads *may* be overlapped. We have to do it hard way.
+
+        ! Normal of first quad
+        v1 = q1(:, 3) - q1(:, 1)
+        v2 = q1(:, 4) - q1(:, 2)
+        call cross_prod(v1, v2, n1)
+        n1 = n1 / mynorm2(n1)
+
+        ! Normal of second quad
+        v1 = q2(:, 3) - q2(:, 1)
+        v2 = q2(:, 4) - q2(:, 2)
+        call cross_prod(v1, v2, n2)
+        n2 = n2 / mynorm2(n2)
+
+        ! f the normals are not in the same direction, must be a thin
+        ! surface.
+        if (dot_product(n1, n2) < zero) then
+            overlapped = .False.
+            return
+        end if
+
+        do ii = 1, 2
+            if (ii == 1) then
+                normal = n1
+                axis1 = q1(:, 2) - q1(:, 1)
+            else
+                normal = n2
+                axis1 = q2(:, 2) - q2(:, 1)
+            end if
+
+            ! Project axis1 onto the plane and normalize
+            axis1 = axis1 - dot_product(axis1, normal) * normal
+            axis1 = axis1 / mynorm2(axis1)
+
+            ! Axis 2 is now the normal cross axis1
+            call cross_prod(normal, axis1, axis2)
+            axis2 = axis2 / mynorm2(axis2)
+
+            do jj = 1, 4
+                qq1(1, jj) = dot_product(axis1, q1(:, jj))
+                qq1(2, jj) = dot_product(axis2, q1(:, jj))
+
+                qq2(1, jj) = dot_product(axis1, q2(:, jj))
+                qq2(2, jj) = dot_product(axis2, q2(:, jj))
+            end do
+            call quadOverlap2D(qq1, qq2, overlapped)
+
+            if (overlapped) then
+                return
+            end if
+        end do
+
+    end subroutine quadOverlap
+
+    subroutine quadOverlap2D(q1, q2, overlapped)
+        ! Given two quad in *2D* determine if they overlap using the
+        ! separation axis theorem
+
+        use constants
+        implicit none
+
+        ! input/output
+        real(kind=realType), dimension(2, 4), intent(in) :: q1, q2
+        logical, intent(out) :: overlapped
+
+        ! Working
+        real(kind=realType), dimension(4) :: tmp1, tmp2
+        integer(kind=intType) :: ii, jj, kk, jjp1
+        real(kind=realType), dimension(2) :: axis, p0
+        real(kind=realType) :: min1, max1, min2, max2
+        overlapped = .True.
+        tmp1 = zero
+        tmp2 = zero
+        quadLoop: do ii = 1, 2 ! Loop over the two quads
+            edgeLoop: do jj = 1, 4 ! Loop over the edges of each quad
+                jjp1 = mod(jj, 4) + 1
+
+                if (ii == 1) then
+                    axis = q1(:, jjp1) - q1(:, jj)
+                    p0 = q1(:, jj)
+                else
+                    axis = q2(:, jjp1) - q2(:, jj)
+                    p0 = q2(:, jj)
+                end if
+
+                ! Take the axis normal
+                axis = (/axis(2), -axis(1)/)
+
+                ! Take the dot products
+                do kk = 1, 4
+                    tmp1(kk) = dot_product(axis, q1(:, kk) - p0)
+                    tmp2(kk) = dot_product(axis, q2(:, kk) - p0)
+                end do
+
+                min1 = minval(tmp1)
+                max1 = maxval(tmp1)
+
+                min2 = minval(tmp2)
+                max2 = maxval(tmp2)
+
+                if (max1 < min2 .or. max2 < min1) then
+                    overlapped = .False.
+                    ! We can just jump right out since we know they cannot
+                    ! overlap.
+                    exit quadLoop
+                end if
+            end do edgeLoop
+        end do quadLoop
+    end subroutine quadOverlap2D
+
+    function getCellRadius(q)
+
+        use constants
+        use utils, only: mynorm2
+        implicit none
+        ! Input
+        real(kind=realType), dimension(3, 4) :: q
+        real(kind=realType) :: getCellRadius
+
+        ! Working
+        real(kind=realType) :: c(3)
+        integer(kind=intType) :: ii
+
+        c = zero
+        do ii = 1, 4
+            c = c + fourth * q(:, ii)
+        end do
+
+        getCellRadius = zero
+        do ii = 1, 4
+            getCellRadius = max(getCellRadius, mynorm2(c - q(:, ii)))
+        end do
+
+    end function getCellRadius
 
 end module wallSearches
diff --git a/src/overset/zipperMesh.F90 b/src/overset/zipperMesh.F90
index 9323ab203..a51d90fa3 100644
--- a/src/overset/zipperMesh.F90
+++ b/src/overset/zipperMesh.F90
@@ -1,1699 +1,1693 @@
 module zipperMesh
 contains
 
-  !
-  !      createZipperMesh zips multiple overlapping surface meshes.
-  !      First, it eliminates overlapping quads and then stiches the
-  !      non-overlapping surface mesh boundaries with triangular
-  !      surface grids.
-  !      In an overset framework the overlapping surface grids give
-  !      wrong estimate of airloads. Zipper mesh overcomes this by
-  !      creating a more accurate representation of the surface in the
-  !      mesh overlap regions.
-  !      ref:  "Enhancements to the Hybrid Mesh Approach to Surface Loads
-  !             Integration on Overset Structured Grids", William M. Chan
-  !      http://people.nas.nasa.gov/~wchan/publications/AIAA-2009-3990.pdf
-  !
-
-  subroutine createZipperMesh(zipperFamList, nZipFam)
-
-    use constants
-    use communication, only : myID, adflow_comm_world, nProc, recvRequests, &
-         sendRequests, commPatternCell_2nd, internalCell_2nd
-    use blockPointers, only : nDom, BCData, nBocos, BCType, il, jl, kl
-    use oversetData, only : oversetString, oversetWall, CSRMatrix, cumDomProc, nDomTotal, &
-         clusters, overlapMatrix, &
-         oversetPresent, zipperMesh, zipperMeshes
-    use wallDistanceData, only : xVolumeVec, IS1, IS2
-    use utils, only : setPointers, EChk
-    use adjointvars, only :nNodesLocal
-    use sorting, only : famInList
-    use oversetUtilities, only : getWorkArray, deallocateOSurfs, transposeOverlap
-    use oversetCommUtilities, only : exchangeSurfaceIBlanks, recvOSurf, sendOSurf, &
-         getOSurfCommPattern
-    use oversetPackingRoutines, only : packOSurf, unpackOSurf, getOSurfBufferSizes
-    use oversetInitialization, only : initializeOSurf
-    use inputOverset, only : debugZipper, useZipperMesh
-    use surfaceFamilies, only : BCFamExchange, famNames, BCFamGroups
-    use stringOps
-    use gapBoundaries
-    use wallSearches, only : wallSearch
+    !
+    !      createZipperMesh zips multiple overlapping surface meshes.
+    !      First, it eliminates overlapping quads and then stiches the
+    !      non-overlapping surface mesh boundaries with triangular
+    !      surface grids.
+    !      In an overset framework the overlapping surface grids give
+    !      wrong estimate of airloads. Zipper mesh overcomes this by
+    !      creating a more accurate representation of the surface in the
+    !      mesh overlap regions.
+    !      ref:  "Enhancements to the Hybrid Mesh Approach to Surface Loads
+    !             Integration on Overset Structured Grids", William M. Chan
+    !      http://people.nas.nasa.gov/~wchan/publications/AIAA-2009-3990.pdf
+    !
+
+    subroutine createZipperMesh(zipperFamList, nZipFam)
+
+        use constants
+        use communication, only: myID, adflow_comm_world, nProc, recvRequests, &
+                                 sendRequests, commPatternCell_2nd, internalCell_2nd
+        use blockPointers, only: nDom, BCData, nBocos, BCType, il, jl, kl
+        use oversetData, only: oversetString, oversetWall, CSRMatrix, cumDomProc, nDomTotal, &
+                               clusters, overlapMatrix, &
+                               oversetPresent, zipperMesh, zipperMeshes
+        use wallDistanceData, only: xVolumeVec, IS1, IS2
+        use utils, only: setPointers, EChk
+        use adjointvars, only: nNodesLocal
+        use sorting, only: famInList
+        use oversetUtilities, only: getWorkArray, deallocateOSurfs, transposeOverlap
+        use oversetCommUtilities, only: exchangeSurfaceIBlanks, recvOSurf, sendOSurf, &
+                                        getOSurfCommPattern
+        use oversetPackingRoutines, only: packOSurf, unpackOSurf, getOSurfBufferSizes
+        use oversetInitialization, only: initializeOSurf
+        use inputOverset, only: debugZipper, useZipperMesh
+        use surfaceFamilies, only: BCFamExchange, famNames, BCFamGroups
+        use stringOps
+        use gapBoundaries
+        use wallSearches, only: wallSearch
 
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType), intent(in) :: nZipFam
-    integer(kind=intType), intent(in), dimension(nZipFam) :: zipperFamList
-
-    ! Local Variables
-    integer(kind=intType) :: i, j, k, ii, jj, kk, iStart, iSize, sps, level, iStr
-    integer(kind=intType) :: iDom, jDom, nn, mm, n, ierr, iProc, iWork, nodeFam(3)
-    integer(kind=intType) :: nNodeTotal, nTriTotal, offset, iBCGroup, nFam
-    integer(kind=intType), dimension(:), allocatable :: famList
-
-    integer(kind=intType), dimension(:,:), allocatable :: tmpInt2D, work
-    logical, dimension(:), allocatable :: oSurfReady
-    type(oversetWall), dimension(:), allocatable :: oSurfs
-    integer(kind=intType), dimension(:), allocatable :: intRecvBuf
-    type(oversetString), target :: master, pocketMaster
-    type(oversetString), pointer :: str
-    integer(kind=intType), dimension(:), allocatable :: nodeIndices
-
-    integer(kind=intType) :: nFullStrings, nUnique
-    integer(kind=intType) :: nOSurfRecv, nOSurfSend
-    integer(kind=intType) , dimension(:,:), allocatable :: oSurfRecvList, oSurfSendList
-    ! MPI/Communication related
-    integer mpiStatus(MPI_STATUS_SIZE)
-    integer(kind=intType), dimension(:, :), allocatable :: bufSizes
-    integer(kind=intType), dimension(:, :), allocatable :: recvInfo
-    integer(kind=intType) :: sendCount, recvCount, index
-    type(CSRMatrix), pointer :: overlap
-    type(CSRMatrix) :: overlapTranspose
-    type(zipperMesh), pointer :: zipper
-    ! Wall search related
-    integer(kind=intType) :: ncells
-    type(oversetWall), dimension(:), allocatable, target :: walls
-    type(oversetWall),  target :: fullWall
-
-    if (.not. oversetPresent .or. (.not. useZipperMesh)) then
-       ! Not overset so we don't can't have a zipper.
-       return
-    end if
-
-    ! Zipper is only implemented for single grid and 1 spectral
-    ! instance (ie not time spectral).
-    level = 1
-    sps = 1
-
-    call initBCDataIblank(level, sps)
-
-    ! We build the zipper meshes *independently* for each BCType.
-    BCGroupLoop: do iBCGroup=1, nFamExchange
-
-       ! Set a pointer to the zipper we are working on to make code
-       ! easier to read
-       zipper => zipperMeshes(iBCGroup)
-
-       ! Deallocate if already exists
-       if (zipper%allocated) then
-          call VecScatterDestroy(zipper%scatter, ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-          call VecDestroy(zipper%localVal, ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-          zipper%allocated = .False.
-       end if
-
-       ! Note that the zipper%conn could be allocated with size of
-       ! zero, but the vecScatter and Vec are not petsc-allocated.
-       if (allocated(zipper%conn)) then
-          deallocate(zipper%conn, zipper%fam, zipper%indices)
-       end if
-
-       ! Before we can proceed with the zipper, we need to generate
-       ! intersection of the zipperFamList() with the families on this
-       ! BCGroup. This would be so much easier in Python...
-
-       if (allocated(famList)) then
-          deallocate(famList)
-       end if
-
-       nFam = 0
-       do i=1, size(BCFamGroups(iBCGroup)%famList)
-          do j=1, size(zipperFamlist)
-             if (BCFamGroups(iBCGroup)%famList(i) == zipperFamList(j)) then
-                nFam = nFam + 1
-             end if
-          end do
-       end do
-
-       ! If nFam is zero, no need ot do anything for this zipper. Just
-       ! allocated zero-sized arrays so we know the size is 0.
-       if (nFam == 0) then
-          allocate(zipper%conn(3, 0), zipper%fam(0), zipper%indices(0))
-
-          cycle ! to the next BCGroup
-       else
-          ! Do a second pass and fill up the famList
-          allocate(famList(nFam))
-          nFam = 0
-          do i=1, size(BCFamGroups(iBCGroup)%famList)
-             do j=1, size(zipperFamlist)
-                if (BCFamGroups(iBCGroup)%famList(i) == zipperFamList(j)) then
-                   nFam = nFam + 1
-                   famList(nFam) = zipperFamList(j)
-                end if
-             end do
-          end do
-       end if
-
-       if (debugZipper .and. myid == 0) then
-          write(*,"(a)",advance="no") '-> Creating zipper for families : '
-          do i=1, size(famList)
-             write(*,"(a,1x)",advance="no") trim(famNames(famList(i)))
-          end do
-          print "(1x)"
-       end if
-
-       overlap => overlapMatrix(level, sps)
-       call transposeOverlap(overlap, overlapTranspose)
-       ! -------------------------------------------------------------------
-       ! Step 1: Eliminate any gap overlaps between meshes
-       ! -------------------------------------------------------------------
-
-       ! Determine the average area of surfaces on each cluster. This
-       ! will be independent of any block splitting distribution.
-       call determineClusterAreas(famList)
-
-       ! Set the boundary condition blank values
-       call slitElimination(famList, level, sps)
-
-       ! Create the surface deltas
-       call surfaceDeviation(famList, level, sps)
-
-       ! ================ WE NORMALLY GOT THIS FROM OVERSETCOMM =============
-       ! Get the OSurf buffer sizes becuase we need that for getOSurfCommPattern
-       allocate(bufSizes(nDomTotal, 6), tmpInt2D(nDomTotal, 6))
-       tmpInt2D = 0
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-          iDom = cumDomProc(myid) + nn
-          call getOSurfBufferSizes(famList, il, jl, kl, tmpInt2D(iDom, 5), &
-               tmpInt2D(iDom, 6), .True.)
-       end do
-
-       call mpi_allreduce(tmpInt2D, bufSizes, 6*nDomTotal, adflow_integer, MPI_SUM, &
-            adflow_comm_world, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-
-       ! Get the basic surface comm pattern.
-       ! For sending, the worse case is sending all my blocks/fringes/walls to
-       ! everyone but myself:
-       ii = nDom*(nProc-1)
-       allocate(oSurfSendList(2, ii))
-
-       ! For receiving, the worse receive is all the blocks/fringes/wall I
-       ! don't already have:
-       ii = nDomTotal - nDom
-       allocate(oSurfRecvList(2, ii))
-
-       call getOSurfCommPattern(overlap, overlapTranspose, &
-            oSurfSendList, nOSurfSend, oSurfRecvList, nOSurfRecv, bufSizes(:, 6))
-       deallocate(tmpInt2D, bufSizes)
-       ! ========================================================================
-
-       ! Alloc data for the OSurfs
-       nn = max(nProc, 2*nOSurfSend, 2*nOSurfRecv)
-       allocate(tmpInt2D(nDomTotal, 2), bufSizes(nDomTotal, 2), &
-            oSurfReady(nDomTotal), recvInfo(2, nn))
-
-       tmpInt2D = 0
-       ! Need to get the differt sizes for the OSurfs since they are now
-       ! based on the primal mesh as opposed to do the dual mesh as
-       ! previously done
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-          iDom = cumDomProc(myid) + nn
-          call getOSurfBufferSizes(famList, il, jl, kl, tmpInt2D(iDom, 1), &
-               tmpInt2D(iDom, 2), .False.)
-       end do
-
-       ! Make sure everyone has the sizes
-       call mpi_allreduce(tmpInt2D, bufSizes, 2*nDomTotal, adflow_integer, MPI_SUM, &
-            adflow_comm_world, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-       deallocate(tmpInt2D)
-
-       ! allocate oSurfs for the primal mesh
-       allocate(oSurfs(nDomTotal))
-       do iDom=1, nDomtotal
-          if (bufSizes(iDom, 1) == 0) then
-             oSurfReady(iDom) = .True.
-          else
-             oSurfReady(iDom) = .False.
-          end if
-       end do
-
-       ! Initialize the primal walls
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-          iDom = cumDomProc(myid) + nn
-          call initializeOSurf(famList, oSurfs(iDom), .False., clusters(iDom))
-          call packOSurf(famList, oSurfs(iDom), .False.)
-          oSurfReady(iDom) = .True.
-       end do
-
-       ! Post all the oSurf iSends
-       sendCount = 0
-       do jj=1, nOSurfSend
-          iProc = oSurfSendList(1, jj)
-          iDom = oSurfSendList(2, jj)
-          call sendOSurf(oSurfs(iDom), iDom, iProc, 0, sendCount)
-       end do
-
-       recvCount = 0
-       do jj=1, nOSurfRecv
-          iProc = oSurfRecvList(1, jj)
-          iDom = oSurfRecvList(2, jj)
-          call recvOSurf(oSurfs(iDom), iDom, iProc, 0, &
-               bufSizes(iDom, 1), bufSizes(iDom, 2), recvCount, recvInfo)
-       end do
-
-       ! Complete all the recives and sends
-       do i=1, recvCount
-          call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-       end do
-
-       do i=1,sendCount
-          call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-       end do
-
-       ! Unpack any blocks we received if necessary:
-       do i=1, recvCount
-
-          ! Global domain index of the recv that finished
-          iDom = recvInfo(1, i)
-          if (.not. oSurfs(iDom)%allocated) then
-             call unpackOSurf(oSurfs(iDom))
-          end if
-       end do
-
-       ! Determine the size of the buffer we need locally for the
-       ! receives. We do this outside the main iteration loop since it
-       ! always the same size.
-       ii = 0
-       do jj=1, noSurfSend
-          ! These blocks are by definition local.
-          iDom = oSurfSendList(2, jj)
-          ii = ii + oSurfs(iDom)%maxCells
-       end do
-       allocate(intRecvBuf(max(1, ii)))
-
-       ! ------------------------ Performing Searches ----------------
-       call getWorkArray(overlap, work)
-
-       do iWork=1, size(work,2)
-          iDom = work(1, iWork)
-          jDom = work(2, iWork)
-          call wallSearch(oSurfs(iDom), oSurfs(jDom))
-       end do
-
-       ! ------------------------ Receiving iBlank back ---------------
-
-       sendCount = 0
-       do jj=1, noSurfRecv
-          iProc = oSurfRecvList(1, jj)
-          iDom = oSurfRecvList(2, jj)
-          sendCount = sendCount + 1
-          call mpi_isend(oSurfs(iDom)%iBlank, oSurfs(iDom)%maxCells, adflow_integer, &
-               iproc, iDom, adflow_comm_world, sendRequests(sendCount), ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-       end do
-
-       recvCount = 0
-       iStart = 1
-       do jj=1, noSurfSend
-          iProc = oSurfSendList(1, jj)
-          iDom = oSurfSendList(2, jj)
-          iSize = oSurfs(iDom)%maxCells
-          recvCount = recvCount + 1
-
-          call mpi_irecv(intRecvBuf(iStart), iSize, adflow_integer, &
-               iProc, iDom, adflow_comm_world, recvRequests(recvCount), ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-          iStart = iStart + iSize
-       end do
-
-       ! Now wait for the sends and receives to finish
-       do i=1, sendCount
-          call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-       end do
-
-       do i=1, recvCount
-          call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
-          call ECHK(ierr, __FILE__, __LINE__)
-       end do
-
-       ! Process the oSurfs we own locally
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-          iDom = cumDomProc(myid) + nn
-          ii = 0
-          do mm=1, nBocos
-             if (famInList(BCData(mm)%famID, famList)) then
-                do j=BCData(mm)%jnBeg+1, BCData(mm)%jnEnd
-                   do i=BCData(mm)%inBeg+1, BCData(mm)%inEnd
-                      ii = ii +1
-                      if (oSurfs(iDom)%iBlank(ii) == -2) then
-                         BCData(mm)%iblank(i,j) = -2
-                      end if
-                   end do
+        use petsc
+        implicit none
+
+        ! Input Parameters
+        integer(kind=intType), intent(in) :: nZipFam
+        integer(kind=intType), intent(in), dimension(nZipFam) :: zipperFamList
+
+        ! Local Variables
+        integer(kind=intType) :: i, j, k, ii, jj, kk, iStart, iSize, sps, level, iStr
+        integer(kind=intType) :: iDom, jDom, nn, mm, n, ierr, iProc, iWork, nodeFam(3)
+        integer(kind=intType) :: nNodeTotal, nTriTotal, offset, iBCGroup, nFam
+        integer(kind=intType), dimension(:), allocatable :: famList
+
+        integer(kind=intType), dimension(:, :), allocatable :: tmpInt2D, work
+        logical, dimension(:), allocatable :: oSurfReady
+        type(oversetWall), dimension(:), allocatable :: oSurfs
+        integer(kind=intType), dimension(:), allocatable :: intRecvBuf
+        type(oversetString), target :: master, pocketMaster
+        type(oversetString), pointer :: str
+        integer(kind=intType), dimension(:), allocatable :: nodeIndices
+
+        integer(kind=intType) :: nFullStrings, nUnique
+        integer(kind=intType) :: nOSurfRecv, nOSurfSend
+        integer(kind=intType), dimension(:, :), allocatable :: oSurfRecvList, oSurfSendList
+        ! MPI/Communication related
+        integer mpiStatus(MPI_STATUS_SIZE)
+        integer(kind=intType), dimension(:, :), allocatable :: bufSizes
+        integer(kind=intType), dimension(:, :), allocatable :: recvInfo
+        integer(kind=intType) :: sendCount, recvCount, index
+        type(CSRMatrix), pointer :: overlap
+        type(CSRMatrix) :: overlapTranspose
+        type(zipperMesh), pointer :: zipper
+        ! Wall search related
+        integer(kind=intType) :: ncells
+        type(oversetWall), dimension(:), allocatable, target :: walls
+        type(oversetWall), target :: fullWall
+
+        if (.not. oversetPresent .or. (.not. useZipperMesh)) then
+            ! Not overset so we don't can't have a zipper.
+            return
+        end if
+
+        ! Zipper is only implemented for single grid and 1 spectral
+        ! instance (ie not time spectral).
+        level = 1
+        sps = 1
+
+        call initBCDataIblank(level, sps)
+
+        ! We build the zipper meshes *independently* for each BCType.
+        BCGroupLoop: do iBCGroup = 1, nFamExchange
+
+            ! Set a pointer to the zipper we are working on to make code
+            ! easier to read
+            zipper => zipperMeshes(iBCGroup)
+
+            ! Deallocate if already exists
+            if (zipper%allocated) then
+                call VecScatterDestroy(zipper%scatter, ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+                call VecDestroy(zipper%localVal, ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+                zipper%allocated = .False.
+            end if
+
+            ! Note that the zipper%conn could be allocated with size of
+            ! zero, but the vecScatter and Vec are not petsc-allocated.
+            if (allocated(zipper%conn)) then
+                deallocate (zipper%conn, zipper%fam, zipper%indices)
+            end if
+
+            ! Before we can proceed with the zipper, we need to generate
+            ! intersection of the zipperFamList() with the families on this
+            ! BCGroup. This would be so much easier in Python...
+
+            if (allocated(famList)) then
+                deallocate (famList)
+            end if
+
+            nFam = 0
+            do i = 1, size(BCFamGroups(iBCGroup)%famList)
+                do j = 1, size(zipperFamlist)
+                    if (BCFamGroups(iBCGroup)%famList(i) == zipperFamList(j)) then
+                        nFam = nFam + 1
+                    end if
                 end do
-             end if
-          end do
-       end do
-
-       ! And update based on the data we received from other processors
-       ii = 0
-       do kk=1, noSurfSend
-
-          iDom = oSurfSendList(2, kk)
-          nn = iDom - cumDomProc(myid)
-
-          ! Set the block pointers for the local block we are dealing
-           !with:
-          call setPointers(nn, level, sps)
-          do mm=1, nBocos
-             if (famInList(BCData(mm)%famID, famList)) then
-                do j=BCData(mm)%jnBeg+1, BCData(mm)%jnEnd
-                   do i=BCData(mm)%inBeg+1, BCData(mm)%inEnd
-                      ii = ii + 1
-                      if (intRecvBuf(ii) == -2) then
-                         BCData(mm)%iBlank(i  , j) = -2
-                      end if
-                   enddo
+            end do
+
+            ! If nFam is zero, no need ot do anything for this zipper. Just
+            ! allocated zero-sized arrays so we know the size is 0.
+            if (nFam == 0) then
+                allocate (zipper%conn(3, 0), zipper%fam(0), zipper%indices(0))
+
+                cycle ! to the next BCGroup
+            else
+                ! Do a second pass and fill up the famList
+                allocate (famList(nFam))
+                nFam = 0
+                do i = 1, size(BCFamGroups(iBCGroup)%famList)
+                    do j = 1, size(zipperFamlist)
+                        if (BCFamGroups(iBCGroup)%famList(i) == zipperFamList(j)) then
+                            nFam = nFam + 1
+                            famList(nFam) = zipperFamList(j)
+                        end if
+                    end do
                 end do
-             end if
-          end do
-       end do
-       ! Release some unnecessary memory
-       deallocate(bufSizes, oSurfSendList, oSurfRecvList, oSurfReady, recvInfo, work)
-
-       ! Ditch our oSurfs
-       call deallocateOSurfs(OSurfs, nDomTotal)
-       deallocate(oSurfs, intRecvBuf)
-
-       ! Before we continue, we do a little more
-       ! processing. bowTieElimination tries to eliminate cells
-       call bowTieAndIsolationElimination(famList, level, sps)
-
-       ! -------------------------------------------------------------------
-       ! Step 2: Identify gap boundary strings and split the strings to
-       !         sub-strings.
-       ! -------------------------------------------------------------------
-
-       if (debugZipper) then
-          call writeWalls(famList)
-       end if
-
-       call makeGapBoundaryStrings(famList, level, sps, master)
-
-       rootProc: if (myid == 0) then
-          if (debugZipper) then
-             call writeZipperDebug(master)
-          end if
-
-          ! Run the common core zipper routines
-          call zipperCore(master, pocketMaster, debugZipper)
-
-          ! Before we create the final data structures for the zipper
-          ! mesh; we will combine the master and pocketMaster
-          ! strings, and unique-ify the indices to help keep amount of data
-          ! transfer to a minimum. We also determine at this point which
-          ! triangle belongs to which family.
-
-          call setStringPointers(master)
-          nTriTotal = master%ntris + pocketMaster%ntris
-          nNodeTotal = master%nNodes + pocketMaster%nNodes
-          allocate(zipper%conn(3, nTriTotal), zipper%fam(nTriTotal), zipper%indices(nNodeTotal))
-
-          ii = 0
-          jj = 0
-          outerLoop: do iStr=1,2
-             ! Select which of the two we are dealing with
-             if (iStr ==1) then
-                str => master
-                offset = 0
-             else
-                str => pocketMaster
-                offset = master%nNodes
-             end if
-
-             ! Loop over the number of triangles.
-             do i=1, str%nTris
-
-                ! Extract the family from the nodes
-                do j=1,3
-                   nodeFam(j) = str%family(str%tris(j, i))
+            end if
+
+            if (debugZipper .and. myid == 0) then
+                write (*, "(a)", advance="no") '-> Creating zipper for families : '
+                do i = 1, size(famList)
+                    write (*, "(a,1x)", advance="no") trim(famNames(famList(i)))
                 end do
+                print "(1x)"
+            end if
+
+            overlap => overlapMatrix(level, sps)
+            call transposeOverlap(overlap, overlapTranspose)
+            ! -------------------------------------------------------------------
+            ! Step 1: Eliminate any gap overlaps between meshes
+            ! -------------------------------------------------------------------
+
+            ! Determine the average area of surfaces on each cluster. This
+            ! will be independent of any block splitting distribution.
+            call determineClusterAreas(famList)
+
+            ! Set the boundary condition blank values
+            call slitElimination(famList, level, sps)
+
+            ! Create the surface deltas
+            call surfaceDeviation(famList, level, sps)
+
+            ! ================ WE NORMALLY GOT THIS FROM OVERSETCOMM =============
+            ! Get the OSurf buffer sizes becuase we need that for getOSurfCommPattern
+            allocate (bufSizes(nDomTotal, 6), tmpInt2D(nDomTotal, 6))
+            tmpInt2D = 0
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                iDom = cumDomProc(myid) + nn
+                call getOSurfBufferSizes(famList, il, jl, kl, tmpInt2D(iDom, 5), &
+                                         tmpInt2D(iDom, 6), .True.)
+            end do
+
+            call mpi_allreduce(tmpInt2D, bufSizes, 6 * nDomTotal, adflow_integer, MPI_SUM, &
+                               adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            ! Get the basic surface comm pattern.
+            ! For sending, the worse case is sending all my blocks/fringes/walls to
+            ! everyone but myself:
+            ii = nDom * (nProc - 1)
+            allocate (oSurfSendList(2, ii))
+
+            ! For receiving, the worse receive is all the blocks/fringes/wall I
+            ! don't already have:
+            ii = nDomTotal - nDom
+            allocate (oSurfRecvList(2, ii))
+
+            call getOSurfCommPattern(overlap, overlapTranspose, &
+                                     oSurfSendList, nOSurfSend, oSurfRecvList, nOSurfRecv, bufSizes(:, 6))
+            deallocate (tmpInt2D, bufSizes)
+            ! ========================================================================
+
+            ! Alloc data for the OSurfs
+            nn = max(nProc, 2 * nOSurfSend, 2 * nOSurfRecv)
+            allocate (tmpInt2D(nDomTotal, 2), bufSizes(nDomTotal, 2), &
+                      oSurfReady(nDomTotal), recvInfo(2, nn))
+
+            tmpInt2D = 0
+            ! Need to get the differt sizes for the OSurfs since they are now
+            ! based on the primal mesh as opposed to do the dual mesh as
+            ! previously done
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                iDom = cumDomProc(myid) + nn
+                call getOSurfBufferSizes(famList, il, jl, kl, tmpInt2D(iDom, 1), &
+                                         tmpInt2D(iDom, 2), .False.)
+            end do
+
+            ! Make sure everyone has the sizes
+            call mpi_allreduce(tmpInt2D, bufSizes, 2 * nDomTotal, adflow_integer, MPI_SUM, &
+                               adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+            deallocate (tmpInt2D)
+
+            ! allocate oSurfs for the primal mesh
+            allocate (oSurfs(nDomTotal))
+            do iDom = 1, nDomtotal
+                if (bufSizes(iDom, 1) == 0) then
+                    oSurfReady(iDom) = .True.
+                else
+                    oSurfReady(iDom) = .False.
+                end if
+            end do
+
+            ! Initialize the primal walls
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                iDom = cumDomProc(myid) + nn
+                call initializeOSurf(famList, oSurfs(iDom), .False., clusters(iDom))
+                call packOSurf(famList, oSurfs(iDom), .False.)
+                oSurfReady(iDom) = .True.
+            end do
+
+            ! Post all the oSurf iSends
+            sendCount = 0
+            do jj = 1, nOSurfSend
+                iProc = oSurfSendList(1, jj)
+                iDom = oSurfSendList(2, jj)
+                call sendOSurf(oSurfs(iDom), iDom, iProc, 0, sendCount)
+            end do
+
+            recvCount = 0
+            do jj = 1, nOSurfRecv
+                iProc = oSurfRecvList(1, jj)
+                iDom = oSurfRecvList(2, jj)
+                call recvOSurf(oSurfs(iDom), iDom, iProc, 0, &
+                               bufSizes(iDom, 1), bufSizes(iDom, 2), recvCount, recvInfo)
+            end do
+
+            ! Complete all the recives and sends
+            do i = 1, recvCount
+                call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+            end do
+
+            do i = 1, sendCount
+                call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+            end do
+
+            ! Unpack any blocks we received if necessary:
+            do i = 1, recvCount
 
-                ! Increment the running counter for all triangles.
-                ii = ii + 1
-
-                ! Set the family
-                zipper%Fam(ii) = selectNodeFamily(nodeFam)
-
-                ! Set the connectivity.
-                zipper%conn(:, ii) = str%tris(:, i) + offset
-             end do
-
-             ! Loop over the nodal indices and add
-             do j=1, str%nNodes
-                ! And set the global indices that the zipper needs. Note
-                ! that we are doing zipperIndices as a scalar and that
-                ! the indices refer to the global nodes so are already in
-                ! 0-based ordering.
-                jj = jj + 1
-                zipper%indices(jj) = str%ind(j)
-             end do
-          end do outerLoop
-
-          call deallocateString(master)
-          call deallocateString(pocketMaster)
-       else
-          ! Other procs don't have any triangles *sniffle* :-(
-          allocate(zipper%conn(3, 0), zipper%fam(0), zipper%indices(0))
-       end if rootProc
-
-       call VecCreateMPI(ADFLOW_COMM_WORLD, size(zipper%indices), PETSC_DETERMINE, &
-            zipper%localVal, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Now create the general scatter that goes from the
-       ! globalNodalVector to the local vectors.
-       call ISCreateGeneral(adflow_comm_world, size(zipper%indices), &
-            zipper%indices-1, PETSC_COPY_VALUES, IS1, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+                ! Global domain index of the recv that finished
+                iDom = recvInfo(1, i)
+                if (.not. oSurfs(iDom)%allocated) then
+                    call unpackOSurf(oSurfs(iDom))
+                end if
+            end do
+
+            ! Determine the size of the buffer we need locally for the
+            ! receives. We do this outside the main iteration loop since it
+            ! always the same size.
+            ii = 0
+            do jj = 1, noSurfSend
+                ! These blocks are by definition local.
+                iDom = oSurfSendList(2, jj)
+                ii = ii + oSurfs(iDom)%maxCells
+            end do
+            allocate (intRecvBuf(max(1, ii)))
+
+            ! ------------------------ Performing Searches ----------------
+            call getWorkArray(overlap, work)
+
+            do iWork = 1, size(work, 2)
+                iDom = work(1, iWork)
+                jDom = work(2, iWork)
+                call wallSearch(oSurfs(iDom), oSurfs(jDom))
+            end do
+
+            ! ------------------------ Receiving iBlank back ---------------
+
+            sendCount = 0
+            do jj = 1, noSurfRecv
+                iProc = oSurfRecvList(1, jj)
+                iDom = oSurfRecvList(2, jj)
+                sendCount = sendCount + 1
+                call mpi_isend(oSurfs(iDom)%iBlank, oSurfs(iDom)%maxCells, adflow_integer, &
+                               iproc, iDom, adflow_comm_world, sendRequests(sendCount), ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+            end do
+
+            recvCount = 0
+            iStart = 1
+            do jj = 1, noSurfSend
+                iProc = oSurfSendList(1, jj)
+                iDom = oSurfSendList(2, jj)
+                iSize = oSurfs(iDom)%maxCells
+                recvCount = recvCount + 1
+
+                call mpi_irecv(intRecvBuf(iStart), iSize, adflow_integer, &
+                               iProc, iDom, adflow_comm_world, recvRequests(recvCount), ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+                iStart = iStart + iSize
+            end do
+
+            ! Now wait for the sends and receives to finish
+            do i = 1, sendCount
+                call mpi_waitany(sendCount, sendRequests, index, mpiStatus, ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+            end do
 
+            do i = 1, recvCount
+                call mpi_waitany(recvCount, recvRequests, index, mpiStatus, ierr)
+                call ECHK(ierr, __FILE__, __LINE__)
+            end do
+
+            ! Process the oSurfs we own locally
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                iDom = cumDomProc(myid) + nn
+                ii = 0
+                do mm = 1, nBocos
+                    if (famInList(BCData(mm)%famID, famList)) then
+                        do j = BCData(mm)%jnBeg + 1, BCData(mm)%jnEnd
+                            do i = BCData(mm)%inBeg + 1, BCData(mm)%inEnd
+                                ii = ii + 1
+                                if (oSurfs(iDom)%iBlank(ii) == -2) then
+                                    BCData(mm)%iblank(i, j) = -2
+                                end if
+                            end do
+                        end do
+                    end if
+                end do
+            end do
+
+            ! And update based on the data we received from other processors
+            ii = 0
+            do kk = 1, noSurfSend
+
+                iDom = oSurfSendList(2, kk)
+                nn = iDom - cumDomProc(myid)
+
+                ! Set the block pointers for the local block we are dealing
+                !with:
+                call setPointers(nn, level, sps)
+                do mm = 1, nBocos
+                    if (famInList(BCData(mm)%famID, famList)) then
+                        do j = BCData(mm)%jnBeg + 1, BCData(mm)%jnEnd
+                            do i = BCData(mm)%inBeg + 1, BCData(mm)%inEnd
+                                ii = ii + 1
+                                if (intRecvBuf(ii) == -2) then
+                                    BCData(mm)%iBlank(i, j) = -2
+                                end if
+                            end do
+                        end do
+                    end if
+                end do
+            end do
+            ! Release some unnecessary memory
+            deallocate (bufSizes, oSurfSendList, oSurfRecvList, oSurfReady, recvInfo, work)
+
+            ! Ditch our oSurfs
+            call deallocateOSurfs(OSurfs, nDomTotal)
+            deallocate (oSurfs, intRecvBuf)
+
+            ! Before we continue, we do a little more
+            ! processing. bowTieElimination tries to eliminate cells
+            call bowTieAndIsolationElimination(famList, level, sps)
+
+            ! -------------------------------------------------------------------
+            ! Step 2: Identify gap boundary strings and split the strings to
+            !         sub-strings.
+            ! -------------------------------------------------------------------
+
+            if (debugZipper) then
+                call writeWalls(famList)
+            end if
+
+            call makeGapBoundaryStrings(famList, level, sps, master)
+
+            rootProc: if (myid == 0) then
+                if (debugZipper) then
+                    call writeZipperDebug(master)
+                end if
+
+                ! Run the common core zipper routines
+                call zipperCore(master, pocketMaster, debugZipper)
+
+                ! Before we create the final data structures for the zipper
+                ! mesh; we will combine the master and pocketMaster
+                ! strings, and unique-ify the indices to help keep amount of data
+                ! transfer to a minimum. We also determine at this point which
+                ! triangle belongs to which family.
+
+                call setStringPointers(master)
+                nTriTotal = master%ntris + pocketMaster%ntris
+                nNodeTotal = master%nNodes + pocketMaster%nNodes
+                allocate (zipper%conn(3, nTriTotal), zipper%fam(nTriTotal), zipper%indices(nNodeTotal))
+
+                ii = 0
+                jj = 0
+                outerLoop: do iStr = 1, 2
+                    ! Select which of the two we are dealing with
+                    if (iStr == 1) then
+                        str => master
+                        offset = 0
+                    else
+                        str => pocketMaster
+                        offset = master%nNodes
+                    end if
+
+                    ! Loop over the number of triangles.
+                    do i = 1, str%nTris
+
+                        ! Extract the family from the nodes
+                        do j = 1, 3
+                            nodeFam(j) = str%family(str%tris(j, i))
+                        end do
+
+                        ! Increment the running counter for all triangles.
+                        ii = ii + 1
+
+                        ! Set the family
+                        zipper%Fam(ii) = selectNodeFamily(nodeFam)
+
+                        ! Set the connectivity.
+                        zipper%conn(:, ii) = str%tris(:, i) + offset
+                    end do
+
+                    ! Loop over the nodal indices and add
+                    do j = 1, str%nNodes
+                        ! And set the global indices that the zipper needs. Note
+                        ! that we are doing zipperIndices as a scalar and that
+                        ! the indices refer to the global nodes so are already in
+                        ! 0-based ordering.
+                        jj = jj + 1
+                        zipper%indices(jj) = str%ind(j)
+                    end do
+                end do outerLoop
+
+                call deallocateString(master)
+                call deallocateString(pocketMaster)
+            else
+                ! Other procs don't have any triangles *sniffle* :-(
+                allocate (zipper%conn(3, 0), zipper%fam(0), zipper%indices(0))
+            end if rootProc
+
+            call VecCreateMPI(ADFLOW_COMM_WORLD, size(zipper%indices), PETSC_DETERMINE, &
+                              zipper%localVal, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Now create the general scatter that goes from the
+            ! globalNodalVector to the local vectors.
+            call ISCreateGeneral(adflow_comm_world, size(zipper%indices), &
+                                 zipper%indices - 1, PETSC_COPY_VALUES, IS1, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
 #if PETSC_VERSION_GE(3,8,0)
-       call VecScatterCreate(BCFamExchange(iBCGroup, sps)%nodeValLocal, IS1, &
-            zipper%localVal, PETSC_NULL_IS, zipper%scatter, ierr)
+            call VecScatterCreate(BCFamExchange(iBCGroup, sps)%nodeValLocal, IS1, &
+                                  zipper%localVal, PETSC_NULL_IS, zipper%scatter, ierr)
 #else
-       call VecScatterCreate(BCFamExchange(iBCGroup, sps)%nodeValLocal, IS1, &
-            zipper%localVal, PETSC_NULL_OBJECT, zipper%scatter, ierr)
+            call VecScatterCreate(BCFamExchange(iBCGroup, sps)%nodeValLocal, IS1, &
+                                  zipper%localVal, PETSC_NULL_OBJECT, zipper%scatter, ierr)
 #endif
-       call EChk(ierr,__FILE__,__LINE__)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call ISDestroy(IS1, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            call ISDestroy(IS1, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Flag to keep track of allocated PETSc object.
-       zipper%allocated = .True.
+            ! Flag to keep track of allocated PETSc object.
+            zipper%allocated = .True.
 
-    end do BCGroupLoop
+        end do BCGroupLoop
 
-  contains
-    function selectNodeFamily(nodeFam)
-      implicit none
-      integer(kind=intType), dimension(3) :: nodeFam
-      integer(kind=intType) :: selectNodeFamily
+    contains
+        function selectNodeFamily(nodeFam)
+            implicit none
+            integer(kind=intType), dimension(3) :: nodeFam
+            integer(kind=intType) :: selectNodeFamily
 
-      ! We have a few cases to check:
+            ! We have a few cases to check:
 
-      if (nodeFam(1) == nodeFam(2) .and. nodeFam(1) == nodeFam(3)) then
-         ! Case 1: All of the nodes have the same family. This is what
-         ! *should* happen all the time:
+            if (nodeFam(1) == nodeFam(2) .and. nodeFam(1) == nodeFam(3)) then
+                ! Case 1: All of the nodes have the same family. This is what
+                ! *should* happen all the time:
 
-         selectNodeFamily = nodeFam(1)
+                selectNodeFamily = nodeFam(1)
 
-      else if (nodeFam(1) == nodeFam(2)) then
+            else if (nodeFam(1) == nodeFam(2)) then
 
-         ! Case 2a: First two nodes are the same. Take the family from 1.
+                ! Case 2a: First two nodes are the same. Take the family from 1.
 
-         selectNodeFamily = nodeFam(1)
+                selectNodeFamily = nodeFam(1)
 
-      else if (nodeFam(2) == nodeFam(3)) then
+            else if (nodeFam(2) == nodeFam(3)) then
 
-         ! Case 2b: Last two nodes are the same. Take the family from 2.
+                ! Case 2b: Last two nodes are the same. Take the family from 2.
 
-         selectNodeFamily = nodeFam(2)
+                selectNodeFamily = nodeFam(2)
 
-      else if (nodeFam(1) == nodeFam(3)) then
+            else if (nodeFam(1) == nodeFam(3)) then
 
-         ! Case 2b: First and last nodes are the same. Take the family from 1.
+                ! Case 2b: First and last nodes are the same. Take the family from 1.
 
-         selectNodeFamily = nodeFam(1)
+                selectNodeFamily = nodeFam(1)
 
-      else
+            else
 
-         ! All nodes are different. We arbitrarily take the first and
-         ! print a warning becuase this should not happen.
+                ! All nodes are different. We arbitrarily take the first and
+                ! print a warning becuase this should not happen.
 
-         selectNodeFamily = nodeFam(1)
-         print *,'Family for triangle could not be uniquely determined. Nodes are from 3 different families!'
-      end if
+                selectNodeFamily = nodeFam(1)
+                print *, 'Family for triangle could not be uniquely determined. Nodes are from 3 different families!'
+            end if
 
-    end function selectNodeFamily
-  end subroutine createZipperMesh
+        end function selectNodeFamily
+    end subroutine createZipperMesh
 
-  subroutine checkZipper(fileName)
+    subroutine checkZipper(fileName)
 
-    ! Special routine for checking zipper mesh loaded from debug file
-    use constants
-    use stringOps
-    implicit none
+        ! Special routine for checking zipper mesh loaded from debug file
+        use constants
+        use stringOps
+        implicit none
 
-    ! Input/Output
-    character(*), intent(in) :: fileName
+        ! Input/Output
+        character(*), intent(in) :: fileName
 
-    ! Working
-    type(oversetString) :: master, pocketMaster
+        ! Working
+        type(oversetString) :: master, pocketMaster
 
-    call loadZipperDebug(fileName, master)
-    call zipperCore(master, pocketMaster, .True.)
+        call loadZipperDebug(fileName, master)
+        call zipperCore(master, pocketMaster, .True.)
 
-    print *, 'Zipper successfully completed'
-  end subroutine checkZipper
+        print *, 'Zipper successfully completed'
+    end subroutine checkZipper
 
-  subroutine zipperCore(master, pocketMaster, debugZipper)
+    subroutine zipperCore(master, pocketMaster, debugZipper)
 
-    ! Common routine for creating the zipper from a given master
-    use constants
-    use stringOps
-    use kdtree2_module
-    implicit none
+        ! Common routine for creating the zipper from a given master
+        use constants
+        use stringOps
+        use kdtree2_module
+        implicit none
 
-    ! Input/Output
-    type(oversetString), intent(inout) :: master, pocketMaster
-    logical, intent(in):: debugZipper
+        ! Input/Output
+        type(oversetString), intent(inout) :: master, pocketMaster
+        logical, intent(in) :: debugZipper
+
+        ! Local
+        type(oversetString), dimension(:), allocatable, target :: strings
+        type(oversetString), pointer :: str
+        integer(kind=intType) :: nStrings, i, j, nTriSelf
+
+        if (debugZipper) then
+            open (unit=101, file="master_beforeStrings.dat", form='formatted')
+            write (101, *) 'TITLE = "Master Data" '
+            write (101, *) 'Variables = "X" "Y" "Z"'
+            call writeOversetMaster(master, 101)
+            close (101)
+        end if
+
+        call createOrderedStrings(master, strings, nStrings)
+
+        master%myID = 99
+
+        ! Allocate space for the maximum number of directed edges. This
+        ! is equal to the initial number of edges (nElems) plus 3 times
+        ! the number of triangles we will add, which is also nElems. Now,
+        ! we will probably not actualy have that many since we save a
+        ! triangle and 3 edges for every self zip that is
+        ! applied. Therefore we know this will always be enough
+        allocate (master%edges(4 * master%nElems))
+
+        master%nEdges = 0
+
+        do i = 1, nStrings
+            str => strings(i)
+            do j = 1, str%nElems
+                master%nEdges = master%nEdges + 1
+                master%edges(master%nEdges)%n1 = str%p%conn(1, str%pElems(j)) !<-- first node
+                master%edges(master%nEdges)%n2 = str%p%conn(2, str%pElems(j)) !<-- second node
+            end do
+        end do
 
-    ! Local
-    type(oversetString), dimension(:), allocatable, target :: strings
-    type(oversetString), pointer :: str
-    integer(kind=intType) :: nStrings, i, j, nTriSelf
+        ! Allocate space for the triangles. Again, this can be at most,
+        ! nElems, but the total number of elements will most likely be
+        ! smaller due to self zipping. If someone puts
+        allocate (master%tris(3, master%nElems))
+        master%nTris = 0
+
+        ! Build the master tree
+        master%tree => kdtree2_create(master%x, sort=.True.)
+
+        ! Perform the string association:
+        call stringMatch(strings, nStrings, debugZipper)
+
+        if (debugZipper) then
+            open (unit=101, file="strings_beforeSelfZip.dat", form='formatted')
+            write (101, *) 'TITLE = "Gap Strings Data" '
+            write (101, *) 'Variables = "X" "Y" "Z" "Nx" "Ny" "Nz" "Vx" "Vy" "Vz" "ind" &
+                 &"gapID" "gapIndex" "otherID" "otherIndex" "ratio"'
+            do i = 1, nStrings
+                call writeOversetString(strings(i), strings, nStrings, 101)
+            end do
+            close (101)
+        end if
+
+        call performSelfZip(master, strings, nStrings, debugZipper)
+
+        ! Write out any self-zipped triangles
+        nTriSelf = master%nTris
+
+        if (debugZipper) then
+            call writeOversetTriangles(master, "selfzipTriangulation.dat", 1, master%nTris)
+        end if
+
+        ! Write out the gaps AFTER the self zip
+        if (debugZipper) then
+            open (unit=101, file="strings_afterSelfZip.dat", form='formatted')
+            write (101, *) 'TITLE = "Gap Strings Data" '
+            write (101, *) 'Variables = "X" "Y" "Z" "Nx" "Ny" "Nz" "Vx" "Vy" "Vz" "ind" &
+                 &"gapID" "gapIndex" "otherID" "otherIndex" "ratio"'
+            do i = 1, nStrings
+                call writeOversetString(strings(i), strings, nStrings, 101)
+            end do
+            close (101)
+        end if
+
+        ! Now do the cross zip
+        call makeCrossZip(master, strings, nStrings, debugZipper)
+
+        ! And write out the triangle from the cross zip
+        if (debugZipper) then
+            call writeOversetTriangles(master, "crossZipTriangulation.dat", nTriSelf + 1, master%nTris)
+        end if
+
+        ! ---------------------------------------------------------------
+        ! Sort through zipped triangle edges and the edges which have not
+        ! been used twice (orphan edges) will be ultimately gathered to
+        ! form polygon pockets to be zipped.
+        if (debugZipper) then
+            print *, 'Doing pocket zip'
+        end if
+        call makePocketZip(master, strings, nStrings, pocketMaster, debugZipper)
+
+        if (debugZipper) then
+            call writeOversetTriangles(pocketMaster, "pocketTriangulation.dat", 1, pocketMaster%nTris)
+        end if
+
+        ! Clean up the reminder of the sting memory on the root proc
+        do i = 1, nStrings
+            call deallocateString(strings(i))
+        end do
+        deallocate (strings)
 
-    if (debugZipper) then
-       open(unit=101, file="master_beforeStrings.dat", form='formatted')
-       write(101,*) 'TITLE = "Master Data" '
-       write(101,*) 'Variables = "X" "Y" "Z"'
-       call writeOversetMaster(master, 101)
-       close(101)
-    end if
+    end subroutine zipperCore
 
-    call createOrderedStrings(master, strings, nStrings)
+    !
+    !       determineClusterArea determine the average cell surface area
+    !       for all blocks in a particular cluster. This is used for
+    !       determine blanking preference for overlapping surface cells.
+
+    subroutine determineClusterAreas(famList)
+
+        use constants
+        use blockPointers, only: nDom, BCData, nBocos, BCType
+        use communication, only: adflow_comm_world, myid
+        use oversetData, onlY: clusterAreas, nClusters, clusters, cumDomProc
+        use utils, only: setPointers, EChk, setBCPointers, cross_prod
+        use BCPointers, only: xx
+        use sorting, only: famInList
+        implicit none
 
+        ! Input Parameters
+        integer(kind=intType), intent(in), dimension(:) :: famList
+
+        ! Working
+        integer(kind=intType) :: i, j, mm, nn, clusterID, ierr, nPts, nCells
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+        real(kind=realType), dimension(:), allocatable :: localAreas
+        integer(kind=intType), dimension(:), allocatable :: localCount, globalCount
+        real(kind=realType), dimension(:, :), allocatable :: pts
+        real(kind=realType) :: fact, v1(3), v2(3), sss(3), da
+
+        if (allocated(clusterAreas)) then
+            ! We only ever do this once!
+            deallocate (clusterAreas)
+        end if
+
+        allocate (clusterAreas(nClusters), localAreas(nClusters), &
+                  localCount(nClusters), globalCount(nClusters))
+
+        localAreas = zero
+        localCount = 0
+
+        domains: do nn = 1, nDom
+            call setPointers(nn, 1_intType, 1_intType)
+
+            clusterID = clusters(cumDomProc(myid) + nn)
+
+            ! Loop over the number of boundary subfaces of this block.
+            bocos: do mm = 1, nBocos
+                famInclude: if (famInList(BCData(mm)%famID, famList)) then
+                    ! Store the cell range of the subfaces a bit easier.
+                    ! As only owned faces must be considered the nodal range
+                    ! in BCData must be used to obtain this data.
+
+                    jBeg = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+
+                    call setBCPointers(mm, .True.)
+
+                    ! Compute the dual area at each node. Just store in first dof
+                    do j = jBeg, jEnd ! This is a face loop
+                        do i = iBeg, iEnd ! This is a face loop
+
+                            v1(:) = xx(i + 1, j + 1, :) - xx(i, j, :)
+                            v2(:) = xx(i, j + 1, :) - xx(i + 1, j, :)
+
+                            ! Cross Product
+                            call cross_prod(v1, v2, sss)
+                            da = half * sqrt(sss(1)**2 + sss(2)**2 + sss(3)**2)
+                            localAreas(clusterID) = localAreas(clusterID) + da
+                            localCount(clusterID) = localCount(clusterID) + 1
+                        end do
+                    end do
+                end if famInclude
+            end do bocos
+        end do domains
+
+        ! All reduce sum for the localAreas to get clusterAreas and
+        ! localCount to get globalCount
+
+        call mpi_allreduce(localAreas, clusterAreas, nClusters, adflow_real, &
+                           MPI_SUM, adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        call mpi_allreduce(localCount, globalCount, nClusters, adflow_integer, &
+                           MPI_SUM, adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        ! Final get the average global area
+        do i = 1, nClusters
+            clusterAreas(i) = clusterAreas(i) / max(globalCount(i), 1)
+        end do
 
-    master%myID = 99
+        deallocate (localAreas, localCount, globalCount)
+    end subroutine determineClusterAreas
 
-    ! Allocate space for the maximum number of directed edges. This
-    ! is equal to the initial number of edges (nElems) plus 3 times
-    ! the number of triangles we will add, which is also nElems. Now,
-    ! we will probably not actualy have that many since we save a
-    ! triangle and 3 edges for every self zip that is
-    ! applied. Therefore we know this will always be enough
-    allocate(master%edges(4*master%nElems))
+    subroutine initBCDataiBlank(level, sps)
 
-    master%nEdges = 0
+        use constants
+        use blockPointers
+        use communication
+        use utils, only: setPointers
+        use oversetUtilities, only: flagForcedRecv
+        implicit none
 
-    do i=1, nStrings
-       str => strings(i)
-       do j=1, str%nElems
-          master%nEdges = master%nEdges + 1
-          master%edges(master%nEdges)%n1 = str%p%conn(1, str%pElems(j)) !<-- first node
-          master%edges(master%nEdges)%n2 = str%p%conn(2, str%pElems(j)) !<-- second node
-       end do
-    end do
+        ! Input Parameters
+        integer(kind=intType), intent(in) :: level, sps
 
-    ! Allocate space for the triangles. Again, this can be at most,
-    ! nElems, but the total number of elements will most likely be
-    ! smaller due to self zipping. If someone puts
-    allocate(master%tris(3, master%nElems))
-    master%nTris = 0
-
-    ! Build the master tree
-    master%tree => kdtree2_create(master%x, sort=.True.)
-
-    ! Perform the string association:
-    call stringMatch(strings, nStrings, debugZipper)
-
-    if (debugZipper) then
-       open(unit=101, file="strings_beforeSelfZip.dat", form='formatted')
-       write(101,*) 'TITLE = "Gap Strings Data" '
-       write(101,*) 'Variables = "X" "Y" "Z" "Nx" "Ny" "Nz" "Vx" "Vy" "Vz" "ind" &
-            &"gapID" "gapIndex" "otherID" "otherIndex" "ratio"'
-       do i=1, nStrings
-          call writeOversetString(strings(i), strings, nStrings, 101)
-       end do
-       close(101)
-    end if
-
-    call performSelfZip(master, strings, nStrings, debugZipper)
-
-   ! Write out any self-zipped triangles
-    nTriSelf = master%nTris
-
-    if (debugZipper) then
-       call writeOversetTriangles(master, "selfzipTriangulation.dat", 1, master%nTris)
-    end if
-
-    ! Write out the gaps AFTER the self zip
-    if (debugZipper) then
-       open(unit=101, file="strings_afterSelfZip.dat", form='formatted')
-       write(101,*) 'TITLE = "Gap Strings Data" '
-       write(101,*) 'Variables = "X" "Y" "Z" "Nx" "Ny" "Nz" "Vx" "Vy" "Vz" "ind" &
-            &"gapID" "gapIndex" "otherID" "otherIndex" "ratio"'
-       do i=1, nStrings
-          call writeOversetString(strings(i), strings, nStrings, 101)
-       end do
-       close(101)
-    end if
-
-    ! Now do the cross zip
-    call makeCrossZip(master, strings, nStrings, debugZipper)
-
-    ! And write out the triangle from the cross zip
-    if (debugZipper) then
-       call writeOversetTriangles(master, "crossZipTriangulation.dat", nTriSelf+1, master%nTris)
-    end if
-
-    ! ---------------------------------------------------------------
-    ! Sort through zipped triangle edges and the edges which have not
-    ! been used twice (orphan edges) will be ultimately gathered to
-    ! form polygon pockets to be zipped.
-    if (debugZipper) then
-       print *,'Doing pocket zip'
-    end if
-    call makePocketZip(master, strings, nStrings, pocketMaster, debugZipper)
-
-    if (debugZipper) then
-       call writeOversetTriangles(pocketMaster, "pocketTriangulation.dat", 1, pocketMaster%nTris)
-    end if
-
-    ! Clean up the reminder of the sting memory on the root proc
-    do i=1, nStrings
-       call deallocateString(strings(i))
-    end do
-    deallocate(strings)
-
-  end subroutine zipperCore
-
-  !
-  !       determineClusterArea determine the average cell surface area
-  !       for all blocks in a particular cluster. This is used for
-  !       determine blanking preference for overlapping surface cells.
-
-  subroutine determineClusterAreas(famList)
-
-    use constants
-    use blockPointers, only : nDom, BCData, nBocos, BCType
-    use communication, only : adflow_comm_world, myid
-    use oversetData, onlY : clusterAreas, nClusters, clusters, cumDomProc
-    use utils, only : setPointers, EChk, setBCPointers, cross_prod
-    use BCPointers, only : xx
-    use sorting, only : famInList
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType), intent(in), dimension(:) :: famList
-
-    ! Working
-    integer(kind=intType) :: i, j, mm, nn, clusterID, ierr, nPts, nCells
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-    real(kind=realType), dimension(:), allocatable :: localAreas
-    integer(kind=intType), dimension(:), allocatable :: localCount, globalCount
-    real(kind=realType), dimension(:, :), allocatable :: pts
-    real(kind=realType) :: fact , v1(3), v2(3), sss(3), da
-
-    if (allocated(clusterAreas)) then
-       ! We only ever do this once!
-       deallocate(clusterAreas)
-    end if
-
-    allocate(clusterAreas(nClusters), localAreas(nClusters), &
-         localCount(nClusters), globalCount(nClusters))
-
-    localAreas = zero
-    localCount = 0
-
-    domains: do nn=1,nDom
-       call setPointers(nn, 1_intType, 1_intType)
-
-       clusterID = clusters(cumDomProc(myid) + nn)
-
-       ! Loop over the number of boundary subfaces of this block.
-       bocos: do mm=1,nBocos
-          famInclude: if (famInList(BCData(mm)%famID, famList)) then
-             ! Store the cell range of the subfaces a bit easier.
-             ! As only owned faces must be considered the nodal range
-             ! in BCData must be used to obtain this data.
-
-             jBeg = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
-
-             call setBCPointers(mm, .True.)
-
-             ! Compute the dual area at each node. Just store in first dof
-             do j=jBeg, jEnd ! This is a face loop
-                do i=iBeg, iEnd ! This is a face loop
-
-                   v1(:) = xx(i+1, j+1, :) - xx(i,   j,  :)
-                   v2(:) = xx(i  , j+1, :) - xx(i+1, j,  :)
-
-                   ! Cross Product
-                   call cross_prod(v1, v2, sss)
-                   da = half*sqrt(sss(1)**2 + sss(2)**2 + sss(3)**2)
-                   localAreas(clusterID) = localAreas(clusterID) + da
-                   localCount(clusterID) = localCount(clusterID) + 1
-                end do
-             end do
-          end if famInclude
-       end do bocos
-    end do domains
-
-    ! All reduce sum for the localAreas to get clusterAreas and
-    ! localCount to get globalCount
-
-    call mpi_allreduce(localAreas, clusterAreas, nClusters, adflow_real, &
-         MPI_SUM, adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    call mpi_allreduce(localCount, globalCount, nClusters, adflow_integer, &
-         MPI_SUM, adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    ! Final get the average global area
-    do i=1, nClusters
-       clusterAreas(i) = clusterAreas(i)/max(globalCount(i), 1)
-    end do
-
-    deallocate(localAreas, localCount, globalCount)
-  end subroutine determineClusterAreas
-
-  subroutine initBCDataiBlank(level, sps)
-
-    use constants
-    use blockPointers
-    use communication
-    use utils, only : setPointers
-    use oversetUtilities, only : flagForcedRecv
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType), intent(in) :: level, sps
-
-    ! Local variables
-    integer(kind=intType) :: mm, nn, i, j, k, iBeg, iEnd, jBeg, jEnd
-    logical :: side(4)
-
-    integer(kind=intType), dimension(:, :), pointer :: ibp, gcp, frx
-    integer(kind=intType), dimension(:, :), allocatable :: toFlip
-
-    ! This routine initializes the surface cell iblank based on the
-    ! volume iblank.
-
-    domainLoop: do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       ! Setting the surface IBlank array is done for *all* bocos.
-       bocoLoop: do mm=1, nBocos
-          select case (BCFaceID(mm))
-          case (iMin)
-             ibp => iblank(2, :, :)
-          case (iMax)
-             ibp => iblank(il, :, :)
-          case (jMin)
-             ibp => iblank(:, 2, :)
-          case (jMax)
-             ibp => iblank(:, jl, :)
-          case (kMin)
-             ibp => iblank(:, :, 2)
-          case (kMax)
-             ibp => iblank(:, :, kl)
-          end select
-
-          ! -------------------------------------------------
-          ! Step 1: Set the (haloed) cell iBlanks directly from
-          ! the volume iBlanks
-          ! -------------------------------------------------
-          jBeg = BCData(mm)%jnBeg+1 ; jEnd = BCData(mm)%jnEnd
-          iBeg = BCData(mm)%inBeg+1 ; iEnd = BCData(mm)%inEnd
-
-          ! Just set the cell iblank directly from the cell iblank
-          ! above it. Remember the +1 in ibp is for the pointer
-          ! offset. These ranges *ALWAYS* give 1 level of halos
-          do j=jBeg-1, jEnd+1
-             do i=iBeg-1, iEnd+1
-                BCData(mm)%iBlank(i,j) = ibp(i+1, j+1)
-             end do
-          end do
-       end do bocoLoop
-    end do domainLoop
-  end subroutine initBCDataiBlank
-
-
-  subroutine slitElimination(famList, level, sps)
-
-    use constants
-    use blockPointers
-    use communication
-    use utils, only : setPointers
-    use oversetUtilities, only : flagForcedRecv
-    use sorting, only : famInList
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType), intent(in), dimension(:) :: famList
-    integer(kind=intType), intent(in) :: level, sps
-
-    ! Local variables
-    integer(kind=intType) :: mm, nn, i, j, k, iBeg, iEnd, jBeg, jEnd
-    logical :: side(4)
-
-    integer(kind=intType), dimension(:, :), pointer :: ibp, gcp, frx
-    integer(kind=intType), dimension(:, :), allocatable :: toFlip
-
-    ! This routine initializes the surface cell iblank based on the
-    ! volume iblank. It is not a straight copy since we a little
-    ! preprocessing
-
-    ! This is a little trickier than it seems. The reason is that we
-    ! will allow a limited number of interpolated cells to be used
-    ! directly in the integration provided the meet certain criteria.
-
-    ! Consider the following
-    !  +------+--------+-------+
-    !  |ib=1  |  ib=1  | ib= 1 |
-    !  |      |        |       |
-    !  |      |        |       |
-    !  +------+========+-------+
-    !  |ib=1  || ib=-1 || ib=1 |
-    !  |      ||       ||      |
-    !  |      ||       ||      |
-    !==+======+--------+=======+==
-    !  |ib=-1 |  ib=-1 | ib=-1 |
-    !  |      |        |       |
-    !  |      |        |       |
-    !  +------+--------+-------+
-    !
-    ! The boundary between real/interpolated cells is marked by double
-    ! lines. For zipper mesh purposes, it is generally going to be
-    ! better to treat the center cell, as a regular force integration
-    ! cell (ie surface iblank=1). The criteria for selection of these
-    ! cells is:
-
-    ! 1. The cell must not have been a forced receiver (ie at overset outer
-    ! bound)
-    ! 2. Any pair *opposite* sides of the cell must be compute cells.
-
-    ! This criterial allows one-cell wide 'slits' to be pre-eliminated.
-
-    call flagForcedRecv()
-    domainLoop: do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       ! Setting the surface IBlank array is done for *all* bocos.
-       bocoLoop: do mm=1, nBocos
-          select case (BCFaceID(mm))
-          case (iMin)
-             ibp => iblank(2, :, :)
-             gcp => globalCell(2, :, :)
-             frx => forcedRecv(2, :, :)
-          case (iMax)
-             ibp => iblank(il, :, :)
-             gcp => globalCell(il, :, :)
-             frx => forcedRecv(il, :, :)
-          case (jMin)
-             ibp => iblank(:, 2, :)
-             gcp => globalCell(:, 2, :)
-             frx => forcedRecv(:, 2, :)
-          case (jMax)
-             ibp => iblank(:, jl, :)
-             gcp => globalCell(:, jl, :)
-             frx => forcedRecv(:, jl, :)
-          case (kMin)
-             ibp => iblank(:, :, 2)
-             gcp => globalCell(:, :, 2)
-             frx => forcedRecv(:, :, 2)
-          case (kMax)
-             ibp => iblank(:, :, kl)
-             gcp => globalCell(:, :, kl)
-             frx => forcedRecv(:, :, kl)
-          end select
-
-          ! -------------------------------------------------
-          ! Step 1: Set the (haloed) cell iBlanks directly from
-          ! the volume iBlanks
-          ! -------------------------------------------------
-          jBeg = BCData(mm)%jnBeg+1 ; jEnd = BCData(mm)%jnEnd
-          iBeg = BCData(mm)%inBeg+1 ; iEnd = BCData(mm)%inEnd
-
-          ! Only do the slit elimination if we actually care about
-          ! this surface for the zipper
-          famInclude: if (famInList(BCData(mm)%famID, famList)) then
-
-             ! -------------------------------------------------
-             ! Step 2: Slit elimination
-             ! -------------------------------------------------
-
-             ! Now we loop back through the cells again. For
-             ! interpolated cells with iblank=-1 we see if it satifies
-             ! the criteria above. If so we flag it wil "toFlip" =
-             ! 1. Note that we can't set a particular iblank directly
-             ! since that could cause an "avalance" effect with the
-             ! later iterations using the updated iblank from a previous
-             ! iteration.
-             allocate(toFlip(iBeg:iEnd, jBeg:jEnd))
-             toFlip = 0
-             do j=jBeg, jEnd
-                do i=iBeg, iEnd
-
-                   ! We *might* add it if the interpolated cell is
-                   ! touching two real cell on opposite sides.
-
-                   if (BCData(mm)%iBlank(i, j) == -1 .and. validCell(i, j)) then
-
-                      ! Reset the side flag
-                      side = .False.
-
-                      if (validCell(i-1, j) .and. BCData(mm)%iBlank(i-1, j) == 1) then
-                         side(1) = .True.
-                      end if
-
-                      if (validCell(i+1, j) .and. BCData(mm)%iBlank(i+1, j) == 1) then
-                         side(2) = .True.
-                      end if
-
-                      if (validCell(i, j-1) .and. BCData(mm)%iBlank(i, j-1) ==1 ) then
-                         side(3) = .True.
-                      end if
-
-                      if (validCell(i, j+1) .and. BCData(mm)%iBlank(i, j+1) == 1) then
-                         side(4) = .True.
-                      end if
-
-                      if ((side(1) .and. side(2)) .or. (side(3) .and. side(4)))  then
-                         toFlip(i,j) = 1
-                      end if
-                   end if
-                end do
-             end do
+        ! Local variables
+        integer(kind=intType) :: mm, nn, i, j, k, iBeg, iEnd, jBeg, jEnd
+        logical :: side(4)
 
-             ! Now just set the cell surface iblank to 1 if we
-             ! determined above we need to flip  the cell
+        integer(kind=intType), dimension(:, :), pointer :: ibp, gcp, frx
+        integer(kind=intType), dimension(:, :), allocatable :: toFlip
 
-             do j=jBeg, jEnd
-                do i=iBeg, iEnd
-                   if (toFlip(i, j) == 1) then
-                      BCData(mm)%iBlank(i, j) = 1
-                   end if
-                end do
-             end do
-
-             deallocate(toFlip)
-          end if famInclude
-       end do bocoLoop
-    end do domainLoop
-
-  contains
-    function validCell(i, j)
-      implicit none
-      integer(kind=intType), intent(in) :: i, j
-      logical :: validCell
-
-      ! for our purposes here, a valid cell is one that:
-      ! 1. Is not a boundary halo. ie has globalCell >= 0
-      ! 2. It is not a force receiver.
-
-      validCell = .False.
-      if (gcp(i+1, j+1) >= 0 .and. frx(i+1, j+1) == 0) then
-         validCell = .True.
-      end if
-    end function validCell
-  end subroutine slitElimination
-
-
-  !
-  !      surfaceDeviation computes an approximation of the maximum
-  !      deviation a surface could be as compared to an underlying "exact"
-  !      surface. The purpose is to compute an adaptive "near wall distance"
-  !      value that can be used to determine if a point is "close" to a
-  !     * wall.
-  !
-
-  subroutine surfaceDeviation(famList, level, sps)
-
-    use constants
-    use blockPointers, only :BCdata, x, nBocos, nDom, BCType, il, jl, kl, BCFaceID
-    use utils, only : setPointers, myNorm2, setBCPointers
-    use sorting, only : famInList
-    use BCPointers, only : xx
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType), intent(in), dimension(:) :: famList
-    integer(kind=intType), intent(in) :: level, sps
-
-    ! Local Variables
-    integer(kind=intType) :: i, j, k, ii, jj, kk, nn, iBeg, iEnd, jBeg, jEnd, mm
-    real(kind=realType) ::  deviation
-
-    ! Loop over blocks
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       bocoLoop: do mm=1, nBocos
-          famInclude: if (famInList(BCData(mm)%famID, famList)) then
-
-             call setBCPointers(mm, .True.)
-             jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
-
-             ! The procedure goes in 2 passes. The first pass checks all
-             ! the i-direction edges, and the second all the j-direction
-             ! edges. For every edge, we estimate the max deviation
-             ! along that edge and then the surface will use the maximum
-             ! deviation from each of the 4 edges. Ie we scatter the
-             ! edge deviation to the two cells next to it. We only do
-             ! the real cells here. Boundary halos get -one set (below)
-             ! and then actual compute halos are set with an exchange.
-
-             bcData(mm)%delta = -one
-
-             ! ------------------
-             ! Check the i-edges
-             ! ------------------
-             do j=jBeg, jEnd       ! <------- Node loop
-                do i=iBeg+1, iEnd  ! <------- Face Loop
-
-                   ! We will creating a local cubic approximation of the
-                   ! local edge. This will use node i-2, i-1, i, and
-                   ! i+1. However, due to the pointer offset, these are
-                   ! all shifted by 1 to get: i-1, i, i+1, i+2
-
-                   deviation = checkDeviation(xx(i-1, j, :), xx(i, j, :), xx(i+1, j, :), &
-                        xx(i+2, j, :))
-
-                   ! Cell to the bottom:
-                   if (j-1 >= jBeg+1) then
-                      bcData(mm)%delta(i, j-1) = max(bcData(mm)%delta(i, j-1), deviation)
-                   end if
-
-                   ! Cell to the top:
-                   if (j+1 <= jEnd) then
-                      bcData(mm)%delta(i, j+1) = max(bcData(mm)%delta(i, j+1), deviation)
-                   end if
+        ! This routine initializes the surface cell iblank based on the
+        ! volume iblank.
+
+        domainLoop: do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            ! Setting the surface IBlank array is done for *all* bocos.
+            bocoLoop: do mm = 1, nBocos
+                select case (BCFaceID(mm))
+                case (iMin)
+                    ibp => iblank(2, :, :)
+                case (iMax)
+                    ibp => iblank(il, :, :)
+                case (jMin)
+                    ibp => iblank(:, 2, :)
+                case (jMax)
+                    ibp => iblank(:, jl, :)
+                case (kMin)
+                    ibp => iblank(:, :, 2)
+                case (kMax)
+                    ibp => iblank(:, :, kl)
+                end select
+
+                ! -------------------------------------------------
+                ! Step 1: Set the (haloed) cell iBlanks directly from
+                ! the volume iBlanks
+                ! -------------------------------------------------
+                jBeg = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                iBeg = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+
+                ! Just set the cell iblank directly from the cell iblank
+                ! above it. Remember the +1 in ibp is for the pointer
+                ! offset. These ranges *ALWAYS* give 1 level of halos
+                do j = jBeg - 1, jEnd + 1
+                    do i = iBeg - 1, iEnd + 1
+                        BCData(mm)%iBlank(i, j) = ibp(i + 1, j + 1)
+                    end do
                 end do
-             end do
+            end do bocoLoop
+        end do domainLoop
+    end subroutine initBCDataiBlank
+
+    subroutine slitElimination(famList, level, sps)
+
+        use constants
+        use blockPointers
+        use communication
+        use utils, only: setPointers
+        use oversetUtilities, only: flagForcedRecv
+        use sorting, only: famInList
+        implicit none
 
-             ! -----------------
-             ! Check the j-edges
-             ! -----------------
-             do j=jBeg+1, jEnd   ! <------- Face loop
-                do i=iBeg, iEnd  ! <------- Node Loop
+        ! Input Parameters
+        integer(kind=intType), intent(in), dimension(:) :: famList
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Local variables
+        integer(kind=intType) :: mm, nn, i, j, k, iBeg, iEnd, jBeg, jEnd
+        logical :: side(4)
+
+        integer(kind=intType), dimension(:, :), pointer :: ibp, gcp, frx
+        integer(kind=intType), dimension(:, :), allocatable :: toFlip
+
+        ! This routine initializes the surface cell iblank based on the
+        ! volume iblank. It is not a straight copy since we a little
+        ! preprocessing
+
+        ! This is a little trickier than it seems. The reason is that we
+        ! will allow a limited number of interpolated cells to be used
+        ! directly in the integration provided the meet certain criteria.
+
+        ! Consider the following
+        !  +------+--------+-------+
+        !  |ib=1  |  ib=1  | ib= 1 |
+        !  |      |        |       |
+        !  |      |        |       |
+        !  +------+========+-------+
+        !  |ib=1  || ib=-1 || ib=1 |
+        !  |      ||       ||      |
+        !  |      ||       ||      |
+        !==+======+--------+=======+==
+        !  |ib=-1 |  ib=-1 | ib=-1 |
+        !  |      |        |       |
+        !  |      |        |       |
+        !  +------+--------+-------+
+        !
+        ! The boundary between real/interpolated cells is marked by double
+        ! lines. For zipper mesh purposes, it is generally going to be
+        ! better to treat the center cell, as a regular force integration
+        ! cell (ie surface iblank=1). The criteria for selection of these
+        ! cells is:
+
+        ! 1. The cell must not have been a forced receiver (ie at overset outer
+        ! bound)
+        ! 2. Any pair *opposite* sides of the cell must be compute cells.
+
+        ! This criterial allows one-cell wide 'slits' to be pre-eliminated.
+
+        call flagForcedRecv()
+        domainLoop: do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            ! Setting the surface IBlank array is done for *all* bocos.
+            bocoLoop: do mm = 1, nBocos
+                select case (BCFaceID(mm))
+                case (iMin)
+                    ibp => iblank(2, :, :)
+                    gcp => globalCell(2, :, :)
+                    frx => forcedRecv(2, :, :)
+                case (iMax)
+                    ibp => iblank(il, :, :)
+                    gcp => globalCell(il, :, :)
+                    frx => forcedRecv(il, :, :)
+                case (jMin)
+                    ibp => iblank(:, 2, :)
+                    gcp => globalCell(:, 2, :)
+                    frx => forcedRecv(:, 2, :)
+                case (jMax)
+                    ibp => iblank(:, jl, :)
+                    gcp => globalCell(:, jl, :)
+                    frx => forcedRecv(:, jl, :)
+                case (kMin)
+                    ibp => iblank(:, :, 2)
+                    gcp => globalCell(:, :, 2)
+                    frx => forcedRecv(:, :, 2)
+                case (kMax)
+                    ibp => iblank(:, :, kl)
+                    gcp => globalCell(:, :, kl)
+                    frx => forcedRecv(:, :, kl)
+                end select
 
-                   ! We will creating a local cubic approximation of the
-                   ! local edge. This will use node j-2, j-1, j, and
-                   ! j+1. However, due to the pointer offset, these are
-                   ! all shifted by 1 to get: j-1, j, j+1, j+2
+                ! -------------------------------------------------
+                ! Step 1: Set the (haloed) cell iBlanks directly from
+                ! the volume iBlanks
+                ! -------------------------------------------------
+                jBeg = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                iBeg = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+
+                ! Only do the slit elimination if we actually care about
+                ! this surface for the zipper
+                famInclude: if (famInList(BCData(mm)%famID, famList)) then
+
+                    ! -------------------------------------------------
+                    ! Step 2: Slit elimination
+                    ! -------------------------------------------------
+
+                    ! Now we loop back through the cells again. For
+                    ! interpolated cells with iblank=-1 we see if it satifies
+                    ! the criteria above. If so we flag it wil "toFlip" =
+                    ! 1. Note that we can't set a particular iblank directly
+                    ! since that could cause an "avalance" effect with the
+                    ! later iterations using the updated iblank from a previous
+                    ! iteration.
+                    allocate (toFlip(iBeg:iEnd, jBeg:jEnd))
+                    toFlip = 0
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+
+                            ! We *might* add it if the interpolated cell is
+                            ! touching two real cell on opposite sides.
+
+                            if (BCData(mm)%iBlank(i, j) == -1 .and. validCell(i, j)) then
+
+                                ! Reset the side flag
+                                side = .False.
+
+                                if (validCell(i - 1, j) .and. BCData(mm)%iBlank(i - 1, j) == 1) then
+                                    side(1) = .True.
+                                end if
+
+                                if (validCell(i + 1, j) .and. BCData(mm)%iBlank(i + 1, j) == 1) then
+                                    side(2) = .True.
+                                end if
+
+                                if (validCell(i, j - 1) .and. BCData(mm)%iBlank(i, j - 1) == 1) then
+                                    side(3) = .True.
+                                end if
+
+                                if (validCell(i, j + 1) .and. BCData(mm)%iBlank(i, j + 1) == 1) then
+                                    side(4) = .True.
+                                end if
+
+                                if ((side(1) .and. side(2)) .or. (side(3) .and. side(4))) then
+                                    toFlip(i, j) = 1
+                                end if
+                            end if
+                        end do
+                    end do
+
+                    ! Now just set the cell surface iblank to 1 if we
+                    ! determined above we need to flip  the cell
+
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            if (toFlip(i, j) == 1) then
+                                BCData(mm)%iBlank(i, j) = 1
+                            end if
+                        end do
+                    end do
+
+                    deallocate (toFlip)
+                end if famInclude
+            end do bocoLoop
+        end do domainLoop
 
-                   deviation = checkDeviation(xx(i, j-1, :), xx(i, j, :), xx(i, j+1, :), &
-                        xx(i, j+2, :))
+    contains
+        function validCell(i, j)
+            implicit none
+            integer(kind=intType), intent(in) :: i, j
+            logical :: validCell
+
+            ! for our purposes here, a valid cell is one that:
+            ! 1. Is not a boundary halo. ie has globalCell >= 0
+            ! 2. It is not a force receiver.
+
+            validCell = .False.
+            if (gcp(i + 1, j + 1) >= 0 .and. frx(i + 1, j + 1) == 0) then
+                validCell = .True.
+            end if
+        end function validCell
+    end subroutine slitElimination
 
-                   ! Cell to the left:
-                   if (i-1 >= iBeg+1) then
-                      bcData(mm)%delta(i-1, j) = max(bcData(mm)%delta(i-1, j), deviation)
-                   end if
+    !
+    !      surfaceDeviation computes an approximation of the maximum
+    !      deviation a surface could be as compared to an underlying "exact"
+    !      surface. The purpose is to compute an adaptive "near wall distance"
+    !      value that can be used to determine if a point is "close" to a
+    !     * wall.
+    !
 
-                   ! Cell to the right:
-                   if (i+1 <= iEnd) then
-                      bcData(mm)%delta(i+1, j) = max(bcData(mm)%delta(i+1, j), deviation)
-                   end if
+    subroutine surfaceDeviation(famList, level, sps)
 
-                end do
-             end do
-          end if famInclude
-       end do bocoLoop
-    end do
-
-    ! Exchange so that halos get correct values set as well. THIS IS BROKEN FIX IT!
-    !call exchangeSurfaceDelta(level, sps, commPatternCell_1st, internalCell_1st)
-
-    ! Now make one pass back and compute a delta for the nodes. Of
-    ! course, this technically makes no sense: The nodes should
-    ! exactly
-    ! using this as a surrogate for what is near a surface, it make a
-    ! little sense. Essentially we go through the nodes, and take the
-    ! max deviation from the cells surrpounding it.
-
-    ! Loop over blocks
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       bocoLoop2: do mm=1, nBocos
-          famInclude2: if (famInList(BCData(mm)%famID, famList)) then
-
-             jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
-
-             do j=jBeg, jEnd
-                do i=iBeg, iEnd
-
-                   ! Since we are taking the max and the boundary halos
-                   ! have a value of -one it's ok to blindy just take
-                   ! the max from each of the 4 cells surrounding each node.
-
-                   BCData(mm)%deltaNode(i, j) = max(&
-                        BCData(mm)%delta(i  , j  ), &
-                        BCData(mm)%delta(i+1, j  ), &
-                        BCData(mm)%delta(i  , j+1), &
-                        BCData(mm)%delta(i+1, j+1))
-                end do
-             end do
-          end if famInclude2
-       end do bocoLoop2
-    end do
+        use constants
+        use blockPointers, only: BCdata, x, nBocos, nDom, BCType, il, jl, kl, BCFaceID
+        use utils, only: setPointers, myNorm2, setBCPointers
+        use sorting, only: famInList
+        use BCPointers, only: xx
+        implicit none
 
-  end subroutine surfaceDeviation
+        ! Input Parameters
+        integer(kind=intType), intent(in), dimension(:) :: famList
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Local Variables
+        integer(kind=intType) :: i, j, k, ii, jj, kk, nn, iBeg, iEnd, jBeg, jEnd, mm
+        real(kind=realType) :: deviation
+
+        ! Loop over blocks
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            bocoLoop: do mm = 1, nBocos
+                famInclude: if (famInList(BCData(mm)%famID, famList)) then
+
+                    call setBCPointers(mm, .True.)
+                    jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                    ! The procedure goes in 2 passes. The first pass checks all
+                    ! the i-direction edges, and the second all the j-direction
+                    ! edges. For every edge, we estimate the max deviation
+                    ! along that edge and then the surface will use the maximum
+                    ! deviation from each of the 4 edges. Ie we scatter the
+                    ! edge deviation to the two cells next to it. We only do
+                    ! the real cells here. Boundary halos get -one set (below)
+                    ! and then actual compute halos are set with an exchange.
+
+                    bcData(mm)%delta = -one
+
+                    ! ------------------
+                    ! Check the i-edges
+                    ! ------------------
+                    do j = jBeg, jEnd       ! <------- Node loop
+                        do i = iBeg + 1, iEnd  ! <------- Face Loop
+
+                            ! We will creating a local cubic approximation of the
+                            ! local edge. This will use node i-2, i-1, i, and
+                            ! i+1. However, due to the pointer offset, these are
+                            ! all shifted by 1 to get: i-1, i, i+1, i+2
+
+                            deviation = checkDeviation(xx(i - 1, j, :), xx(i, j, :), xx(i + 1, j, :), &
+                                                       xx(i + 2, j, :))
+
+                            ! Cell to the bottom:
+                            if (j - 1 >= jBeg + 1) then
+                                bcData(mm)%delta(i, j - 1) = max(bcData(mm)%delta(i, j - 1), deviation)
+                            end if
+
+                            ! Cell to the top:
+                            if (j + 1 <= jEnd) then
+                                bcData(mm)%delta(i, j + 1) = max(bcData(mm)%delta(i, j + 1), deviation)
+                            end if
+                        end do
+                    end do
+
+                    ! -----------------
+                    ! Check the j-edges
+                    ! -----------------
+                    do j = jBeg + 1, jEnd   ! <------- Face loop
+                        do i = iBeg, iEnd  ! <------- Node Loop
+
+                            ! We will creating a local cubic approximation of the
+                            ! local edge. This will use node j-2, j-1, j, and
+                            ! j+1. However, due to the pointer offset, these are
+                            ! all shifted by 1 to get: j-1, j, j+1, j+2
+
+                            deviation = checkDeviation(xx(i, j - 1, :), xx(i, j, :), xx(i, j + 1, :), &
+                                                       xx(i, j + 2, :))
+
+                            ! Cell to the left:
+                            if (i - 1 >= iBeg + 1) then
+                                bcData(mm)%delta(i - 1, j) = max(bcData(mm)%delta(i - 1, j), deviation)
+                            end if
+
+                            ! Cell to the right:
+                            if (i + 1 <= iEnd) then
+                                bcData(mm)%delta(i + 1, j) = max(bcData(mm)%delta(i + 1, j), deviation)
+                            end if
+
+                        end do
+                    end do
+                end if famInclude
+            end do bocoLoop
+        end do
 
-  function checkDeviation(P0, P1, P2, P3)
+        ! Exchange so that halos get correct values set as well. THIS IS BROKEN FIX IT!
+        !call exchangeSurfaceDelta(level, sps, commPatternCell_1st, internalCell_1st)
+
+        ! Now make one pass back and compute a delta for the nodes. Of
+        ! course, this technically makes no sense: The nodes should
+        ! exactly
+        ! using this as a surrogate for what is near a surface, it make a
+        ! little sense. Essentially we go through the nodes, and take the
+        ! max deviation from the cells surrpounding it.
+
+        ! Loop over blocks
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            bocoLoop2: do mm = 1, nBocos
+                famInclude2: if (famInList(BCData(mm)%famID, famList)) then
+
+                    jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+
+                            ! Since we are taking the max and the boundary halos
+                            ! have a value of -one it's ok to blindy just take
+                            ! the max from each of the 4 cells surrounding each node.
+
+                            BCData(mm)%deltaNode(i, j) = max( &
+                                                         BCData(mm)%delta(i, j), &
+                                                         BCData(mm)%delta(i + 1, j), &
+                                                         BCData(mm)%delta(i, j + 1), &
+                                                         BCData(mm)%delta(i + 1, j + 1))
+                        end do
+                    end do
+                end if famInclude2
+            end do bocoLoop2
+        end do
 
-    ! Find the maximum deviation between a local cubic approximation
-    ! formed by nodes P0, P1, P2 and P3, with the linear approximation
-    ! formed by nodes P1 and P2.
+    end subroutine surfaceDeviation
 
-    ! See this article for the implementation.
-    ! https://en.wikipedia.org/wiki/Centripetal_Catmull-Rom_spline
-
-    use constants
-    use utils, only : myNorm2
-    implicit none
-
-    ! Input Parameters
-    real(kind=realType), intent(in), dimension(3) :: P0, P1, P2, P3
+    function checkDeviation(P0, P1, P2, P3)
 
-    ! Function value
-    real(kind=realType) ::  checkDeviation
-
-    ! Working Parameters
-    real(kind=realType) :: t0, t1, t2, t3
-    real(kind=realType), dimension(3) :: A1, A2, A3, B1, B2
-    real(kind=realType), parameter :: alpha=half
-    integer(kind=intType), parameter :: N=20
-    integer(kind=intType) :: i
-    real(kind=realType) :: t, P(3), Q(3), s
-
-    t0 = zero
-    t1 = t0 + mynorm2(P1-P0)**alpha
-    t2 = t1 + mynorm2(P2-P1)**alpha
-    t3 = t2 + mynorm2(P3-P2)**alpha
-
-    ! Normalize
-    t1 = t1/t3
-    t2 = t2/t3
-    t3 = one
-
-    ! Loop over the number of points to check. We need to go between t2
-    ! and t3. No need to check the first and last since the devaition
-    ! there is zero by construction.
-    checkDeviation = zero
-
-    do i=1, N
-       s = (i-one)/(N-one)
-       t = (one-s)*t1 + s*t2
-
-
-       ! Spline pt
-       A3 = (t3-t)/(t3-t2)*P2 + (t-t2)/(t3-t2)*P3
-       A2 = (t2-t)/(t2-t1)*P1 + (t-t1)/(t2-t1)*P2
-       A1 = (t1-t)/(t1-t0)*P0 + (t-t0)/(t1-t0)*P1
-
-       B2 = (t3-t)/(t3-t1)*A2 + (t-t1)/(t3-t1)*A3
-       B1 = (t2-t)/(t2-t0)*A1 + (t-t0)/(t2-t0)*A2
-
-       P = (t2-t)/(t2-t1)*B1 + (t-t1)/(t2-t1)*B2
-
-       ! Now project the cubic point onto the line to get point Q
-
-       Q = P1 + dot_product(P-P1, P2-P1)/dot_product(P2-P1, P2-P1) * (P2 - P1)
-
-       ! Just get the distance between the two points.
-       checkDeviation = max(checkDeviation, mynorm2(Q-P))
-
-    end do
-
-  end function checkDeviation
-
-  subroutine writeWalls(famList)
-
-
-    !use oversetData
-    use constants
-    use blockPointers
-    use utils, only : setPointers, setBCPointers
-    use BCPointers, only : xx
-    use sorting, only : famInList
-    use communication, only : myid, adflow_comm_world, nProc
-    use utils, only : EChk
-    use commonFormats, only : sci12
-    implicit none
-    integer(kind=intType), intent(in), dimension(:) :: famList
-    character(80) :: fileName, zoneName
-    integer(kind=intType) :: i, j, nn, iDom, iBeg, iEnd, jBeg, jEnd, mm, iDim
-    integer(kind=intType) :: nNode, nCell, tag, ierr, ii, iProc, nLocalBoco, tmp(5)
-    real(kind=realType), dimension(:), allocatable :: xBuffer
-    integer(kind=intType), dimension(:), allocatable :: iblankBuffer, bocosPerProc
-    integer(kind=intType), dimension(:,:,:), allocatable :: faceInfo
-    integer, dimension(mpi_status_size) :: mpiStatus
-
-    ! Write a gathered surface tecplot file.
-    if (myid == 0) then
-       write (fileName,"(a)") "zipper_wall.dat"
-
-       open(unit=101,file=trim(fileName),form='formatted')
-       write(101,*) 'TITLE = "zipper walls"'
-       write(101,*) 'Variables = "X", "Y", "Z", "CellIBlank"'
-    end if
-
-    ! Before we start, the root processor needs to know how many
-    ! receives we can expect from each processor.
-
-    nLocalBoco = 0
-    do nn=1,nDom
-       call setPointers(nn, 1, 1)
-       do mm=1, nBocos
-          famInclude: if (famInList(BCData(mm)%famID, famList)) then
-             nLocalBoco = nLocalBoco + 1
-          end if famInclude
-       end do
-    end do
-
-    if (myid == 0) then
-       allocate(bocosPerProc(0:nProc-1))
-    end if
-
-    call mpi_gather(nLocalBoco, 1, adflow_integer, bocosPerProc, 1, &
-         adflow_integer, 0, adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-    ! Now setup the info array on the root proc:
-    if (myid == 0) then
-       allocate(faceInfo(5, maxval(bocosPerProc), 0:nProc-1))
-    end if
-
-    if (myid /= 0) then
-       tag = 0
-       do nn=1, nDom
-          call setPointers(nn, 1, 1)
-          do mm=1, nBocos
-             jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-             famInclude2: if (famInList(BCData(mm)%famID, famList)) then
-                tag = tag + 1
-                tmp = (/iBeg, iEnd, jBeg, jEnd, nBkGlobal/)
-                call mpi_send(tmp, 5, &
-                     adflow_integer, 0, tag, adflow_comm_world, ierr)
-                call ECHK(ierr, __FILE__, __LINE__)
-             end if famInclude2
-          end do
-       end do
-    else
-       ! Receive the size info:
-       do iProc=1, nProc-1
-          do tag=1, bocosPerProc(iProc)
-             call mpi_recv(tmp, 5, adflow_integer, iProc, tag, &
-                  adflow_comm_world, mpiStatus, ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
-             faceInfo(:, tag, iProc) = tmp
-          end do
-       end do
-    end if
-
-    ! Need a barrier between the two sets of the comms just in case.
-    call MPI_Barrier(adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    tag = 0
-    do nn=1,nDom
-       call setPointers(nn, 1, 1)
-       do mm=1, nBocos
-          jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-          iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-          famInclude3: if (famInList(BCData(mm)%famID, famList)) then
-             call setBCPointers(mm, .True.)
-
-             nNode = (iEnd - iBeg + 1)*(jEnd - jBeg + 1)
-             nCell = (iEnd - iBeg + 1)*(jEnd - jBeg + 1)
-             allocate(iBlankBuffer(nCell), xBuffer(3*nNode))
-             ii = 0
-             do j=jBeg+1, jEnd
-                do i=iBeg+1, iEnd
-                   ii = ii + 1
-                   iBlankBuffer(ii) = BCData(mm)%iBlank(i, j)
-                end do
-             end do
-
-             ii = 0
-             do iDim=1,3
-                do j=jBeg, jEnd
-                   do i=iBeg, iEnd
-                      ii = ii + 1
-                      xBuffer(ii) = xx(i+1, j+1, iDim)
-                   end do
-                end do
-             end do
+        ! Find the maximum deviation between a local cubic approximation
+        ! formed by nodes P0, P1, P2 and P3, with the linear approximation
+        ! formed by nodes P1 and P2.
 
-             if (myid == 0) then
-                ! We can write it directly:
-                call writeZone(iBeg, iEnd, jBeg, jEnd, nBkGlobal, xBuffer, iBlankBuffer)
-             else
+        ! See this article for the implementation.
+        ! https://en.wikipedia.org/wiki/Centripetal_Catmull-Rom_spline
 
-                tag = tag + 1
-                call mpi_send(iBlankBuffer, nCell, adflow_integer, 0, tag, &
-                     adflow_comm_world, ierr)
-                call ECHK(ierr, __FILE__, __LINE__)
+        use constants
+        use utils, only: myNorm2
+        implicit none
 
-                tag = tag + 1
-                call mpi_send(xBuffer, nNode*3, adflow_real, 0, tag, &
-                     adflow_comm_world, ierr)
-                call ECHK(ierr, __FILE__, __LINE__)
+        ! Input Parameters
+        real(kind=realType), intent(in), dimension(3) :: P0, P1, P2, P3
 
-             end if
-             deallocate(iBlankBuffer, xBuffer)
-          end if famInclude3
-       end do
-    end do
+        ! Function value
+        real(kind=realType) :: checkDeviation
 
-    ! Complete the receives and writes on the root proc:
-    if (myid == 0) then
-       ! Receive the nodes and iblank info
-       do iProc=1, nProc-1
-          do tag=1, bocosPerProc(iProc)
+        ! Working Parameters
+        real(kind=realType) :: t0, t1, t2, t3
+        real(kind=realType), dimension(3) :: A1, A2, A3, B1, B2
+        real(kind=realType), parameter :: alpha = half
+        integer(kind=intType), parameter :: N = 20
+        integer(kind=intType) :: i
+        real(kind=realType) :: t, P(3), Q(3), s
 
-             iBeg = faceInfo(1, tag, iProc)
-             iEnd = faceInfo(2, tag, iProc)
+        t0 = zero
+        t1 = t0 + mynorm2(P1 - P0)**alpha
+        t2 = t1 + mynorm2(P2 - P1)**alpha
+        t3 = t2 + mynorm2(P3 - P2)**alpha
 
-             jBeg = faceInfo(3, tag, iProc)
-             jEnd = faceInfo(4, tag, iProc)
+        ! Normalize
+        t1 = t1 / t3
+        t2 = t2 / t3
+        t3 = one
 
-             nBkGlobal =faceInfo(5, tag, iProc)
+        ! Loop over the number of points to check. We need to go between t2
+        ! and t3. No need to check the first and last since the devaition
+        ! there is zero by construction.
+        checkDeviation = zero
 
-             nNode = (iEnd - iBeg + 1)*(jEnd - jBeg + 1)
-             nCell = (iEnd - iBeg + 1)*(jEnd - jBeg + 1)
+        do i = 1, N
+            s = (i - one) / (N - one)
+            t = (one - s) * t1 + s * t2
 
-             allocate(iBlankBuffer(nCell), xBuffer(3*nNode))
+            ! Spline pt
+            A3 = (t3 - t) / (t3 - t2) * P2 + (t - t2) / (t3 - t2) * P3
+            A2 = (t2 - t) / (t2 - t1) * P1 + (t - t1) / (t2 - t1) * P2
+            A1 = (t1 - t) / (t1 - t0) * P0 + (t - t0) / (t1 - t0) * P1
 
-             call mpi_recv(iBlankBuffer, nCell, adflow_integer, iProc, (2*tag-1), &
-                  adflow_comm_world, mpiStatus, ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
+            B2 = (t3 - t) / (t3 - t1) * A2 + (t - t1) / (t3 - t1) * A3
+            B1 = (t2 - t) / (t2 - t0) * A1 + (t - t0) / (t2 - t0) * A2
 
-             call mpi_recv(xBuffer, nNode*3, adflow_real, iProc, 2*tag, &
-                  adflow_comm_world, mpiStatus, ierr)
-             call ECHK(ierr, __FILE__, __LINE__)
+            P = (t2 - t) / (t2 - t1) * B1 + (t - t1) / (t2 - t1) * B2
 
-             call writeZone(iBeg, iEnd, jBeg, jEnd, nBkGlobal, xBuffer, iBlankBuffer)
+            ! Now project the cubic point onto the line to get point Q
 
-             deallocate(iBlankBuffer, xBuffer)
+            Q = P1 + dot_product(P - P1, P2 - P1) / dot_product(P2 - P1, P2 - P1) * (P2 - P1)
 
-          end do
-       end do
-       deallocate(faceInfo, bocosPerProc)
-       close(101)
-    end if
+            ! Just get the distance between the two points.
+            checkDeviation = max(checkDeviation, mynorm2(Q - P))
 
-    contains
+        end do
+
+    end function checkDeviation
 
-      subroutine writeZone(iBeg, iEnd, jBeg, jEnd, nBkGlobal, xx, iblank)
+    subroutine writeWalls(famList)
 
+        !use oversetData
+        use constants
+        use blockPointers
+        use utils, only: setPointers, setBCPointers
+        use BCPointers, only: xx
+        use sorting, only: famInList
+        use communication, only: myid, adflow_comm_world, nProc
+        use utils, only: EChk
+        use commonFormats, only: sci12
         implicit none
-        ! Input
-        integer(kind=intType), intent(in) :: iBeg, iEnd, jBeg, jEnd, nBkGlobal
-        real(kind=realType), intent(in), dimension(:) :: xx
-        integer(kind=intType), intent(in), dimension(:) :: iblank
-
-        character(80) :: zoneName
-        integer(kind=intType) :: iDim
-        character(len=maxStringLen) :: zoneFormat
-
-        write(zoneName, "(a,I5.5)") "Zone_", nBkGlobal
-        zoneFormat = "(3(A), I5, A, I5)"
-        write(101, zoneFormat) 'ZONE T=', trim(zoneName), " I=", iEnd-iBeg+1, " J=", jEnd-jBeg+1
-        write (101,*) "DATAPACKING=BLOCK, VARLOCATION=([1,2,3]=NODAL, [4]=CELLCENTERED)"
-
-        ! The 3 is for the three coordinate directions
-        nNode = (iEnd - iBeg + 1)*(jEnd - jBeg + 1)
-        nCell = (iEnd - iBeg)*(jEnd - jBeg)
-
-        do i=1, 3*nNode
-           write(101, sci12) xx(i)
+        integer(kind=intType), intent(in), dimension(:) :: famList
+        character(80) :: fileName, zoneName
+        integer(kind=intType) :: i, j, nn, iDom, iBeg, iEnd, jBeg, jEnd, mm, iDim
+        integer(kind=intType) :: nNode, nCell, tag, ierr, ii, iProc, nLocalBoco, tmp(5)
+        real(kind=realType), dimension(:), allocatable :: xBuffer
+        integer(kind=intType), dimension(:), allocatable :: iblankBuffer, bocosPerProc
+        integer(kind=intType), dimension(:, :, :), allocatable :: faceInfo
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        ! Write a gathered surface tecplot file.
+        if (myid == 0) then
+            write (fileName, "(a)") "zipper_wall.dat"
+
+            open (unit=101, file=trim(fileName), form='formatted')
+            write (101, *) 'TITLE = "zipper walls"'
+            write (101, *) 'Variables = "X", "Y", "Z", "CellIBlank"'
+        end if
+
+        ! Before we start, the root processor needs to know how many
+        ! receives we can expect from each processor.
+
+        nLocalBoco = 0
+        do nn = 1, nDom
+            call setPointers(nn, 1, 1)
+            do mm = 1, nBocos
+                famInclude: if (famInList(BCData(mm)%famID, famList)) then
+                    nLocalBoco = nLocalBoco + 1
+                end if famInclude
+            end do
         end do
 
-        do i=1, nCell
-           write(101, *) iBlank(i)
+        if (myid == 0) then
+            allocate (bocosPerProc(0:nProc - 1))
+        end if
+
+        call mpi_gather(nLocalBoco, 1, adflow_integer, bocosPerProc, 1, &
+                        adflow_integer, 0, adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+        ! Now setup the info array on the root proc:
+        if (myid == 0) then
+            allocate (faceInfo(5, maxval(bocosPerProc), 0:nProc - 1))
+        end if
+
+        if (myid /= 0) then
+            tag = 0
+            do nn = 1, nDom
+                call setPointers(nn, 1, 1)
+                do mm = 1, nBocos
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                    famInclude2: if (famInList(BCData(mm)%famID, famList)) then
+                        tag = tag + 1
+                        tmp = (/iBeg, iEnd, jBeg, jEnd, nBkGlobal/)
+                        call mpi_send(tmp, 5, &
+                                      adflow_integer, 0, tag, adflow_comm_world, ierr)
+                        call ECHK(ierr, __FILE__, __LINE__)
+                    end if famInclude2
+                end do
+            end do
+        else
+            ! Receive the size info:
+            do iProc = 1, nProc - 1
+                do tag = 1, bocosPerProc(iProc)
+                    call mpi_recv(tmp, 5, adflow_integer, iProc, tag, &
+                                  adflow_comm_world, mpiStatus, ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
+                    faceInfo(:, tag, iProc) = tmp
+                end do
+            end do
+        end if
+
+        ! Need a barrier between the two sets of the comms just in case.
+        call MPI_Barrier(adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        tag = 0
+        do nn = 1, nDom
+            call setPointers(nn, 1, 1)
+            do mm = 1, nBocos
+                jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                famInclude3: if (famInList(BCData(mm)%famID, famList)) then
+                    call setBCPointers(mm, .True.)
+
+                    nNode = (iEnd - iBeg + 1) * (jEnd - jBeg + 1)
+                    nCell = (iEnd - iBeg + 1) * (jEnd - jBeg + 1)
+                    allocate (iBlankBuffer(nCell), xBuffer(3 * nNode))
+                    ii = 0
+                    do j = jBeg + 1, jEnd
+                        do i = iBeg + 1, iEnd
+                            ii = ii + 1
+                            iBlankBuffer(ii) = BCData(mm)%iBlank(i, j)
+                        end do
+                    end do
+
+                    ii = 0
+                    do iDim = 1, 3
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                ii = ii + 1
+                                xBuffer(ii) = xx(i + 1, j + 1, iDim)
+                            end do
+                        end do
+                    end do
+
+                    if (myid == 0) then
+                        ! We can write it directly:
+                        call writeZone(iBeg, iEnd, jBeg, jEnd, nBkGlobal, xBuffer, iBlankBuffer)
+                    else
+
+                        tag = tag + 1
+                        call mpi_send(iBlankBuffer, nCell, adflow_integer, 0, tag, &
+                                      adflow_comm_world, ierr)
+                        call ECHK(ierr, __FILE__, __LINE__)
+
+                        tag = tag + 1
+                        call mpi_send(xBuffer, nNode * 3, adflow_real, 0, tag, &
+                                      adflow_comm_world, ierr)
+                        call ECHK(ierr, __FILE__, __LINE__)
+
+                    end if
+                    deallocate (iBlankBuffer, xBuffer)
+                end if famInclude3
+            end do
         end do
-      end subroutine writeZone
-    end subroutine writeWalls
 
-  subroutine bowTieAndIsolationElimination(famList, level, sps)
+        ! Complete the receives and writes on the root proc:
+        if (myid == 0) then
+            ! Receive the nodes and iblank info
+            do iProc = 1, nProc - 1
+                do tag = 1, bocosPerProc(iProc)
 
-    use constants
-    use blockPointers
-    use communication
-    use utils, only : setPointers
-    use oversetCommUtilities, only : exchangeSurfaceIBlanks
-    use sorting, only : famInList
-    implicit none
+                    iBeg = faceInfo(1, tag, iProc)
+                    iEnd = faceInfo(2, tag, iProc)
 
-    ! Input Parameters
-    integer(kind=intType), intent(in), dimension(:) :: famList
-    integer(kind=intType), intent(in) :: level, sps
+                    jBeg = faceInfo(3, tag, iProc)
+                    jEnd = faceInfo(4, tag, iProc)
 
-    ! Local variables
-    integer(kind=intType) :: mm, nn, i, j, k, e, iBeg, iEnd, jBeg, jEnd
-    logical :: side(4)
+                    nBkGlobal = faceInfo(5, tag, iProc)
 
-    integer(kind=intType), dimension(:, :), pointer :: ibp, gcp
-    integer(kind=intType), dimension(:, :), allocatable :: toFlip, nE, nC
+                    nNode = (iEnd - iBeg + 1) * (jEnd - jBeg + 1)
+                    nCell = (iEnd - iBeg + 1) * (jEnd - jBeg + 1)
 
-    ! This routine initializes the surface cell iblank based on the
-    ! volume iblank. It is not a straight copy since we a little
-    ! preprocessing to eliminate a few particularly nasty cases.
-    ! Three analysis are performed:
-    ! 1. Bow-tie elimination
-    ! 2. Single cell elmination
+                    allocate (iBlankBuffer(nCell), xBuffer(3 * nNode))
 
-    bowTieLoop: do E=0, 2
-       domainLoop1: do nn=1, nDom
-          call setPointers(nn, level, sps)
+                    call mpi_recv(iBlankBuffer, nCell, adflow_integer, iProc, (2 * tag - 1), &
+                                  adflow_comm_world, mpiStatus, ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
 
-          bocoLoop1: do mm=1, nBocos
-             famInclude1: if (famInList(BCData(mm)%famID, famList)) then
+                    call mpi_recv(xBuffer, nNode * 3, adflow_real, iProc, 2 * tag, &
+                                  adflow_comm_world, mpiStatus, ierr)
+                    call ECHK(ierr, __FILE__, __LINE__)
 
-                select case (BCFaceID(mm))
-                case (iMin)
-                   ibp => iblank(2, :, :)
-                   gcp => globalCell(2, :, :)
-                case (iMax)
-                   ibp => iblank(il, :, :)
-                   gcp => globalCell(il, :, :)
-                case (jMin)
-                   ibp => iblank(:, 2, :)
-                   gcp => globalCell(:, 2, :)
-                case (jMax)
-                   ibp => iblank(:, jl, :)
-                   gcp => globalCell(:, jl, :)
-                case (kMin)
-                   ibp => iblank(:, :, 2)
-                   gcp => globalCell(:, :, 2)
-                case (kMax)
-                   ibp => iblank(:, :, kl)
-                   gcp => globalCell(:, :, kl)
-                end select
+                    call writeZone(iBeg, iEnd, jBeg, jEnd, nBkGlobal, xBuffer, iBlankBuffer)
 
-                ! -------------------------------------------------
-                ! Step 2: Bow-tie elimination: Elimiate cells
-                ! that touch only at a corner.
-                ! -------------------------------------------------
+                    deallocate (iBlankBuffer, xBuffer)
 
-                ! Make bounds a little easier to read. Owned cells only
-                ! from now on.
-                jBeg = BCData(mm)%jnBeg+1 ; jEnd = BCData(mm)%jnEnd
-                iBeg = BCData(mm)%inBeg+1 ; iEnd = BCData(mm)%inEnd
-
-                ! Allocate two tmporary auxilary arrays 'eN'->
-                ! edgeNeighbours and 'cN'-> cornerNeighbous. For every
-                ! comupte cell determine the number of compute
-                ! neighbours connected along edges and at corners
-                allocate(nE(iBeg:iEnd, jBeg:jEnd), nC(iBeg:iEnd, jBeg:jEnd))!, &
-
-                call findBowTies()
-
-                do j=jBeg, jEnd
-                   do i=iBeg, iEnd
-                      if (BCData(mm)%iBlank(i, j) > 0 .and. nC(i,j) >=1 .and. nE(i,j) <= E) then
-                         BCData(mm)%iBlank(i, j) = 0
-                      end if
-                   end do
                 end do
+            end do
+            deallocate (faceInfo, bocosPerProc)
+            close (101)
+        end if
 
-                deallocate(nC, nE)
-             end if famInclude1
-          end do bocoLoop1
-       end do domainLoop1
-
-       ! Since we potentially changed iBlanks, we need to updated by
-       ! performing an exchange.
-       call exchangeSurfaceIBlanks(famList, level, sps, commPatternCell_2nd, internalCell_2nd)
-
-    end do bowTieLoop
-
-    domainLoop2: do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       bocoLoop2: do mm=1, nBocos
-          famInclude2: if (famInList(BCData(mm)%famID, famList)) then
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                gcp => globalCell(2, :, :)
-             case (iMax)
-                gcp => globalCell(il, :, :)
-             case (jMin)
-                gcp => globalCell(:, 2, :)
-             case (jMax)
-                gcp => globalCell(:, jl, :)
-             case (kMin)
-                gcp => globalCell(:, :, 2)
-             case (kMax)
-                gcp => globalCell(:, :, kl)
-             end select
-
-             ! Make bounds a little easier to read. Owned cells only
-             ! from now on.
-             jBeg = BCData(mm)%jnBeg+1 ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg+1 ; iEnd = BCData(mm)%inEnd
-
-             ! -------------------------------------------------
-             ! Step 3: Single-cell elimination: Elimiate cells
-             ! that do not touch any other cells.
-             ! -------------------------------------------------
-
-             allocate(nE(iBeg:iEnd, jBeg:jEnd), nC(iBeg:iEnd, jBeg:jEnd))
-
-             call setNeighbourCounts()
-
-             ! This is easy, if a compute cell is stil around with no
-             ! neighbours, kill it
-             do j=jBeg, jEnd
-                do i=iBeg, iEnd
-                   if (BCData(mm)%iBlank(i, j) == 1 .and. nE(i,j) == 0 .and. nC(i,j) == 0) then
-                      BCData(mm)%iBlank(i, j) = 0
-                   end if
-                end do
-             end do
+    contains
+
+        subroutine writeZone(iBeg, iEnd, jBeg, jEnd, nBkGlobal, xx, iblank)
 
-             deallocate(nE, nC)
+            implicit none
+            ! Input
+            integer(kind=intType), intent(in) :: iBeg, iEnd, jBeg, jEnd, nBkGlobal
+            real(kind=realType), intent(in), dimension(:) :: xx
+            integer(kind=intType), intent(in), dimension(:) :: iblank
 
-          end if famInclude2
-       end do bocoLoop2
-    end do domainLoop2
+            character(80) :: zoneName
+            integer(kind=intType) :: iDim
+            character(len=maxStringLen) :: zoneFormat
 
-    ! Again, since we potentially changed iBlanks, we need to updated by
-    ! performing an exchange.
-    call exchangeSurfaceIBlanks(famList, level, sps, commPatternCell_2nd, internalCell_2nd)
-  contains
-    subroutine findBowTies
+            write (zoneName, "(a,I5.5)") "Zone_", nBkGlobal
+            zoneFormat = "(3(A), I5, A, I5)"
+            write (101, zoneFormat) 'ZONE T=', trim(zoneName), " I=", iEnd - iBeg + 1, " J=", jEnd - jBeg + 1
+            write (101, *) "DATAPACKING=BLOCK, VARLOCATION=([1,2,3]=NODAL, [4]=CELLCENTERED)"
 
-      implicit none
-      ! For every compute determine the number of compute neighbours
-      ! connected along edges (nE) and the number of bow-ties (in nC)
+            ! The 3 is for the three coordinate directions
+            nNode = (iEnd - iBeg + 1) * (jEnd - jBeg + 1)
+            nCell = (iEnd - iBeg) * (jEnd - jBeg)
 
-      integer(kind=intType) :: i, j, e(4)
+            do i = 1, 3 * nNode
+                write (101, sci12) xx(i)
+            end do
 
-      nE = 0
-      nC = 0
+            do i = 1, nCell
+                write (101, *) iBlank(i)
+            end do
+        end subroutine writeZone
+    end subroutine writeWalls
 
-      do j=jBeg, jEnd
-         do i=iBeg, iEnd
-            if (BCData(mm)%iBlank(i, j) >= 0 ) then
-               e = 0
+    subroutine bowTieAndIsolationElimination(famList, level, sps)
 
-               !    |  e3  |
-               !  --c4-----c3-
-               !    |      |
-               ! e4 |   x  | e2
-               !    |      |
-               ! -- c1-----c2-
-               !    |  e1  |
+        use constants
+        use blockPointers
+        use communication
+        use utils, only: setPointers
+        use oversetCommUtilities, only: exchangeSurfaceIBlanks
+        use sorting, only: famInList
+        implicit none
 
-               ! Set the status of each of the 4 edges:
+        ! Input Parameters
+        integer(kind=intType), intent(in), dimension(:) :: famList
+        integer(kind=intType), intent(in) :: level, sps
+
+        ! Local variables
+        integer(kind=intType) :: mm, nn, i, j, k, e, iBeg, iEnd, jBeg, jEnd
+        logical :: side(4)
+
+        integer(kind=intType), dimension(:, :), pointer :: ibp, gcp
+        integer(kind=intType), dimension(:, :), allocatable :: toFlip, nE, nC
+
+        ! This routine initializes the surface cell iblank based on the
+        ! volume iblank. It is not a straight copy since we a little
+        ! preprocessing to eliminate a few particularly nasty cases.
+        ! Three analysis are performed:
+        ! 1. Bow-tie elimination
+        ! 2. Single cell elmination
+
+        bowTieLoop: do E = 0, 2
+            domainLoop1: do nn = 1, nDom
+                call setPointers(nn, level, sps)
+
+                bocoLoop1: do mm = 1, nBocos
+                    famInclude1: if (famInList(BCData(mm)%famID, famList)) then
+
+                        select case (BCFaceID(mm))
+                        case (iMin)
+                            ibp => iblank(2, :, :)
+                            gcp => globalCell(2, :, :)
+                        case (iMax)
+                            ibp => iblank(il, :, :)
+                            gcp => globalCell(il, :, :)
+                        case (jMin)
+                            ibp => iblank(:, 2, :)
+                            gcp => globalCell(:, 2, :)
+                        case (jMax)
+                            ibp => iblank(:, jl, :)
+                            gcp => globalCell(:, jl, :)
+                        case (kMin)
+                            ibp => iblank(:, :, 2)
+                            gcp => globalCell(:, :, 2)
+                        case (kMax)
+                            ibp => iblank(:, :, kl)
+                            gcp => globalCell(:, :, kl)
+                        end select
+
+                        ! -------------------------------------------------
+                        ! Step 2: Bow-tie elimination: Elimiate cells
+                        ! that touch only at a corner.
+                        ! -------------------------------------------------
+
+                        ! Make bounds a little easier to read. Owned cells only
+                        ! from now on.
+                        jBeg = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                        iBeg = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+
+                        ! Allocate two tmporary auxilary arrays 'eN'->
+                        ! edgeNeighbours and 'cN'-> cornerNeighbous. For every
+                        ! comupte cell determine the number of compute
+                        ! neighbours connected along edges and at corners
+                        allocate (nE(iBeg:iEnd, jBeg:jEnd), nC(iBeg:iEnd, jBeg:jEnd))!, &
+
+                        call findBowTies()
+
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                if (BCData(mm)%iBlank(i, j) > 0 .and. nC(i, j) >= 1 .and. nE(i, j) <= E) then
+                                    BCData(mm)%iBlank(i, j) = 0
+                                end if
+                            end do
+                        end do
+
+                        deallocate (nC, nE)
+                    end if famInclude1
+                end do bocoLoop1
+            end do domainLoop1
+
+            ! Since we potentially changed iBlanks, we need to updated by
+            ! performing an exchange.
+            call exchangeSurfaceIBlanks(famList, level, sps, commPatternCell_2nd, internalCell_2nd)
+
+        end do bowTieLoop
+
+        domainLoop2: do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            bocoLoop2: do mm = 1, nBocos
+                famInclude2: if (famInList(BCData(mm)%famID, famList)) then
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        gcp => globalCell(2, :, :)
+                    case (iMax)
+                        gcp => globalCell(il, :, :)
+                    case (jMin)
+                        gcp => globalCell(:, 2, :)
+                    case (jMax)
+                        gcp => globalCell(:, jl, :)
+                    case (kMin)
+                        gcp => globalCell(:, :, 2)
+                    case (kMax)
+                        gcp => globalCell(:, :, kl)
+                    end select
+
+                    ! Make bounds a little easier to read. Owned cells only
+                    ! from now on.
+                    jBeg = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+
+                    ! -------------------------------------------------
+                    ! Step 3: Single-cell elimination: Elimiate cells
+                    ! that do not touch any other cells.
+                    ! -------------------------------------------------
+
+                    allocate (nE(iBeg:iEnd, jBeg:jEnd), nC(iBeg:iEnd, jBeg:jEnd))
+
+                    call setNeighbourCounts()
+
+                    ! This is easy, if a compute cell is stil around with no
+                    ! neighbours, kill it
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            if (BCData(mm)%iBlank(i, j) == 1 .and. nE(i, j) == 0 .and. nC(i, j) == 0) then
+                                BCData(mm)%iBlank(i, j) = 0
+                            end if
+                        end do
+                    end do
+
+                    deallocate (nE, nC)
+
+                end if famInclude2
+            end do bocoLoop2
+        end do domainLoop2
+
+        ! Again, since we potentially changed iBlanks, we need to updated by
+        ! performing an exchange.
+        call exchangeSurfaceIBlanks(famList, level, sps, commPatternCell_2nd, internalCell_2nd)
+    contains
+        subroutine findBowTies
 
-               if (BCData(mm)%iBlank(i, j-1) == 1) &
-                    e(1) = 1
+            implicit none
+            ! For every compute determine the number of compute neighbours
+            ! connected along edges (nE) and the number of bow-ties (in nC)
 
-               if (BCData(mm)%iBlank(i+1, j) == 1) &
-                    e(2) = 1
+            integer(kind=intType) :: i, j, e(4)
 
-               if (BCData(mm)%iBlank(i, j+1) == 1) &
-                    e(3) = 1
+            nE = 0
+            nC = 0
 
-               if (BCData(mm)%iBlank(i-1, j) == 1) &
-                    e(4) = 1
+            do j = jBeg, jEnd
+                do i = iBeg, iEnd
+                    if (BCData(mm)%iBlank(i, j) >= 0) then
+                        e = 0
 
-               ! Check the 4 corner neighbours for bow-tie status
-               if (BCData(mm)%iBlank(i-1, j-1) == 1 .and. e(4) == 0 .and. e(1) == 0) &
-                    nC(i, j) = nC(i, j) + 1
+                        !    |  e3  |
+                        !  --c4-----c3-
+                        !    |      |
+                        ! e4 |   x  | e2
+                        !    |      |
+                        ! -- c1-----c2-
+                        !    |  e1  |
 
-               if (BCData(mm)%iBlank(i+1, j-1) == 1 .and. e(1) == 0 .and. e(2) == 0) &
-                    nC(i, j) = nC(i, j) + 1
+                        ! Set the status of each of the 4 edges:
 
-               if (BCData(mm)%iBlank(i+1, j+1) == 1 .and. e(2) == 0 .and. e(3) == 0) &
-                    nC(i, j) = nC(i, j) + 1
+                        if (BCData(mm)%iBlank(i, j - 1) == 1) &
+                            e(1) = 1
 
-               if (BCData(mm)%iBlank(i-1, j+1) == 1 .and. e(3) == 0 .and. e(4) == 0) &
-                    nC(i, j) = nC(i, j) + 1
+                        if (BCData(mm)%iBlank(i + 1, j) == 1) &
+                            e(2) = 1
 
-               nE(i, j) = sum(e)
-            end if
-         end do
-      end do
-    end subroutine findBowTies
+                        if (BCData(mm)%iBlank(i, j + 1) == 1) &
+                            e(3) = 1
 
-    subroutine setNeighbourCounts
+                        if (BCData(mm)%iBlank(i - 1, j) == 1) &
+                            e(4) = 1
 
-      implicit none
-      ! For every comute determine the number of compute neighbours
-      ! connected along edges and at corners
+                        ! Check the 4 corner neighbours for bow-tie status
+                        if (BCData(mm)%iBlank(i - 1, j - 1) == 1 .and. e(4) == 0 .and. e(1) == 0) &
+                            nC(i, j) = nC(i, j) + 1
 
-      integer(kind=intType) :: i, j
+                        if (BCData(mm)%iBlank(i + 1, j - 1) == 1 .and. e(1) == 0 .and. e(2) == 0) &
+                            nC(i, j) = nC(i, j) + 1
 
-      nE = 0
-      nC = 0
+                        if (BCData(mm)%iBlank(i + 1, j + 1) == 1 .and. e(2) == 0 .and. e(3) == 0) &
+                            nC(i, j) = nC(i, j) + 1
 
-      do j=jBeg, jEnd
-         do i=iBeg, iEnd
-            if (BCData(mm)%iBlank(i, j) == 1) then
+                        if (BCData(mm)%iBlank(i - 1, j + 1) == 1 .and. e(3) == 0 .and. e(4) == 0) &
+                            nC(i, j) = nC(i, j) + 1
 
-               !    |  e3  |
-               !  --c4-----c3-
-               !    |      |
-               ! e4 |   x  | e2
-               !    |      |
-               ! -- c1-----c2-
-               !    |  e1  |
+                        nE(i, j) = sum(e)
+                    end if
+                end do
+            end do
+        end subroutine findBowTies
 
-               ! Set the status of each of the 4 edges:
+        subroutine setNeighbourCounts
 
-               if (BCData(mm)%iBlank(i, j-1) == 1) &
-                    nE(i, j) = nE(i, j) + 1
+            implicit none
+            ! For every comute determine the number of compute neighbours
+            ! connected along edges and at corners
 
-               if (BCData(mm)%iBlank(i+1, j) == 1) &
-                    nE(i, j) = nE(i, j) + 1
+            integer(kind=intType) :: i, j
 
-               if (BCData(mm)%iBlank(i, j+1) == 1) &
-                    nE(i, j) = nE(i, j) + 1
+            nE = 0
+            nC = 0
 
-               if (BCData(mm)%iBlank(i-1, j) == 1) &
-                    nE(i, j) = nE(i, j) + 1
+            do j = jBeg, jEnd
+                do i = iBeg, iEnd
+                    if (BCData(mm)%iBlank(i, j) == 1) then
 
-               ! Check the 4 corner neighbours for compute neighbour
-               if (BCData(mm)%iBlank(i-1, j-1) == 1) &
-                    nC(i, j) = nC(i, j) + 1
+                        !    |  e3  |
+                        !  --c4-----c3-
+                        !    |      |
+                        ! e4 |   x  | e2
+                        !    |      |
+                        ! -- c1-----c2-
+                        !    |  e1  |
 
-               if (BCData(mm)%iBlank(i+1, j-1) == 1) &
-                    nC(i, j) = nC(i, j) + 1
+                        ! Set the status of each of the 4 edges:
 
-               if (BCData(mm)%iBlank(i+1, j+1) == 1) &
-                    nC(i, j) = nC(i, j) + 1
+                        if (BCData(mm)%iBlank(i, j - 1) == 1) &
+                            nE(i, j) = nE(i, j) + 1
 
-               if (BCData(mm)%iBlank(i-1, j+1) == 1) &
-                    nC(i, j) = nC(i, j) + 1
-            end if
-         end do
-      end do
-    end subroutine setNeighbourCounts
-  end subroutine bowTieAndIsolationElimination
+                        if (BCData(mm)%iBlank(i + 1, j) == 1) &
+                            nE(i, j) = nE(i, j) + 1
+
+                        if (BCData(mm)%iBlank(i, j + 1) == 1) &
+                            nE(i, j) = nE(i, j) + 1
+
+                        if (BCData(mm)%iBlank(i - 1, j) == 1) &
+                            nE(i, j) = nE(i, j) + 1
+
+                        ! Check the 4 corner neighbours for compute neighbour
+                        if (BCData(mm)%iBlank(i - 1, j - 1) == 1) &
+                            nC(i, j) = nC(i, j) + 1
+
+                        if (BCData(mm)%iBlank(i + 1, j - 1) == 1) &
+                            nC(i, j) = nC(i, j) + 1
+
+                        if (BCData(mm)%iBlank(i + 1, j + 1) == 1) &
+                            nC(i, j) = nC(i, j) + 1
+
+                        if (BCData(mm)%iBlank(i - 1, j + 1) == 1) &
+                            nC(i, j) = nC(i, j) + 1
+                    end if
+                end do
+            end do
+        end subroutine setNeighbourCounts
+    end subroutine bowTieAndIsolationElimination
 end module zipperMesh
diff --git a/src/partitioning/gridChecking.F90 b/src/partitioning/gridChecking.F90
index 120b15613..c1b19bbe1 100644
--- a/src/partitioning/gridChecking.F90
+++ b/src/partitioning/gridChecking.F90
@@ -1,1326 +1,1325 @@
 module gridChecking
 contains
-  subroutine checkFaces
-    !
-    !       checkFaces determines whether or not a boundary condition or
-    !       a connectivity has been specified for all block faces. If this
-    !       is not the case, the corresponding blocks are printed and the
-    !       code terminates.
-    !
-    use constants
-    use blockPointers, only : nDom, nbkGlobal
-    use cgnsGrid, only : cgnsNDom, cgnsDoms
-    use communication, only : adflow_comm_world, myid, nProc
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers, terminate
-    use partitionMod, only : sortBadEntities
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr, size
+    subroutine checkFaces
+        !
+        !       checkFaces determines whether or not a boundary condition or
+        !       a connectivity has been specified for all block faces. If this
+        !       is not the case, the corresponding blocks are printed and the
+        !       code terminates.
+        !
+        use constants
+        use blockPointers, only: nDom, nbkGlobal
+        use cgnsGrid, only: cgnsNDom, cgnsDoms
+        use communication, only: adflow_comm_world, myid, nProc
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers, terminate
+        use partitionMod, only: sortBadEntities
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr, size
 
-    integer, dimension(nProc) :: recvcounts, displs
+        integer, dimension(nProc) :: recvcounts, displs
 
-    integer(kind=intType) :: mm, nn, sps, multiple
-    integer(kind=intType) :: nBad, faceID, nBadGlobal
+        integer(kind=intType) :: mm, nn, sps, multiple
+        integer(kind=intType) :: nBad, faceID, nBadGlobal
 
-    integer(kind=intType), dimension(nProc) :: counts
-    integer(kind=intType), &
-         dimension(4,nDom*nTimeIntervalsSpectral) :: bad
+        integer(kind=intType), dimension(nProc) :: counts
+        integer(kind=intType), &
+            dimension(4, nDom*nTimeIntervalsSpectral) :: bad
 
-    integer(kind=intType), dimension(:,:), allocatable :: badGlobal
+        integer(kind=intType), dimension(:, :), allocatable :: badGlobal
 
-    real(kind=realType) :: dummy(1)
+        real(kind=realType) :: dummy(1)
 
-    logical :: blockIsBad
+        logical :: blockIsBad
 
-    character(len=7) :: intString
+        character(len=7) :: intString
 
-    !       Determine the local bad blocks.
-    !
-    ! Loop over the number of spectral solutions to be checked and
-    ! the number of local blocks.
+        !       Determine the local bad blocks.
+        !
+        ! Loop over the number of spectral solutions to be checked and
+        ! the number of local blocks.
 
-    nBad = 0
-    do sps=1,nTimeIntervalsSpectral
-       do nn=1,nDom
+        nBad = 0
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
 
-          ! Check if the current block is okay.
+                ! Check if the current block is okay.
 
-          call setPointers(nn, 1_intType, sps)
-          call checkFacesBlock(blockIsBad, faceID, multiple)
+                call setPointers(nn, 1_intType, sps)
+                call checkFacesBlock(blockIsBad, faceID, multiple)
 
-          ! If the block is bad, update nBad and store the info in bad.
+                ! If the block is bad, update nBad and store the info in bad.
 
-          if( blockIsBad ) then
-             nBad        = nBad + 1
-             bad(1,nBad) = nbkGlobal
-             bad(2,nBad) = faceID
-             bad(3,nBad) = multiple
-             bad(4,nBad) = sps
-          endif
+                if (blockIsBad) then
+                    nBad = nBad + 1
+                    bad(1, nBad) = nbkGlobal
+                    bad(2, nBad) = faceID
+                    bad(3, nBad) = multiple
+                    bad(4, nBad) = sps
+                end if
 
-       enddo
-    enddo
-    !
-    !       Determine the global number of bad blocks and gather this
-    !       information.
-    !
-    ! Determine the number of bad blocks per processor.
+            end do
+        end do
+        !
+        !       Determine the global number of bad blocks and gather this
+        !       information.
+        !
+        ! Determine the number of bad blocks per processor.
 
-    call mpi_allgather(nBad, 1, adflow_integer, counts, 1, &
-         adflow_integer, ADflow_comm_world, ierr)
+        call mpi_allgather(nBad, 1, adflow_integer, counts, 1, &
+                           adflow_integer, ADflow_comm_world, ierr)
 
-    ! Determine the global number of bad blocks and the arrays
-    ! recvcounts and displs needed for the call to allgatherv.
+        ! Determine the global number of bad blocks and the arrays
+        ! recvcounts and displs needed for the call to allgatherv.
 
-    nBadGlobal    = counts(1)
-    recvcounts(1) = 4*counts(1)
-    displs(1)     = 0
+        nBadGlobal = counts(1)
+        recvcounts(1) = 4 * counts(1)
+        displs(1) = 0
 
-    do nn=2,nProc
-       nBadGlobal     = nBadGlobal + counts(nn)
-       recvcounts(nn) = 4*counts(nn)
-       displs(nn)     = displs(nn-1) + recvcounts(nn-1)
-    enddo
+        do nn = 2, nProc
+            nBadGlobal = nBadGlobal + counts(nn)
+            recvcounts(nn) = 4 * counts(nn)
+            displs(nn) = displs(nn - 1) + recvcounts(nn - 1)
+        end do
 
-    ! Allocate the memory to store all the bad blocks.
+        ! Allocate the memory to store all the bad blocks.
 
-    allocate(badGlobal(4,nBadGlobal), stat=ierr)
-    if(ierr /= 0)                  &
-         call terminate("checkFaces", &
-         "Memory allocation failure for badGlobal")
+        allocate (badGlobal(4, nBadGlobal), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("checkFaces", &
+                           "Memory allocation failure for badGlobal")
 
-    ! Gather the data.
+        ! Gather the data.
 
-    size = 4*nBad
-    call mpi_allgatherv(bad, size, adflow_integer, badGlobal, &
-         recvcounts, displs, adflow_integer,   &
-         ADflow_comm_world, ierr)
+        size = 4 * nBad
+        call mpi_allgatherv(bad, size, adflow_integer, badGlobal, &
+                            recvcounts, displs, adflow_integer, &
+                            ADflow_comm_world, ierr)
 
-    ! Sort the bad blocks and get rid of the multiple entries.
-    ! The last argument is .false. to indicate that only the
-    ! integers must be sorted; dummy is passed for consistency.
+        ! Sort the bad blocks and get rid of the multiple entries.
+        ! The last argument is .false. to indicate that only the
+        ! integers must be sorted; dummy is passed for consistency.
 
-    call sortBadEntities(nBadGlobal, badGlobal, dummy, .false.)
+        call sortBadEntities(nBadGlobal, badGlobal, dummy, .false.)
 
-    ! If bad blocks are present, print them to stdout and terminate.
+        ! If bad blocks are present, print them to stdout and terminate.
 
-    testBadPresent: if(nBadGlobal > 0) then
+        testBadPresent: if (nBadGlobal > 0) then
 
-       ! The data is only written by processor 0.
+            ! The data is only written by processor 0.
 
-       testRootProc: if(myID == 0) then
+            testRootProc: if (myID == 0) then
 
-          ! Write a header.
+                ! Write a header.
 
-          write(intString,"(i6)") nBadGlobal
-          intString = adjustl(intString)
+                write (intString, "(i6)") nBadGlobal
+                intString = adjustl(intString)
 
-          print "(a)", "#"
-          print strings, "# Found ", trim(intString), " blocks for which the boundary or connectivity information"
-          print "(a)", "# is not correct. Here is the list of bad blocks"
-          print "(a)", "#"
+                print "(a)", "#"
+                print strings, "# Found ", trim(intString), " blocks for which the boundary or connectivity information"
+                print "(a)", "# is not correct. Here is the list of bad blocks"
+                print "(a)", "#"
+
+                ! Write the bad blocks.
+
+                do mm = 1, nBadGlobal
+
+                    ! Abbreviate the contents of badGlobal a bit easier.
+
+                    nn = badGlobal(1, mm)
+                    faceID = badGlobal(2, mm)
+                    multiple = badGlobal(3, mm)
+                    sps = badGlobal(4, mm)
+
+                    ! If multiple spectral solutions are read, write the info
+                    ! about it. Otherwise just start the line with # sign.
+
+                    if (nTimeIntervalsSpectral > 1) then
+                        write (intString, "(i6)") nBadGlobal
+                        intString = adjustl(intString)
+                        write (*, strings, advance="no") "# Spectral grid ", trim(intString), ", "
+                    else
+                        write (*, "(a)", advance="no") "# "
+                    end if
+
+                    ! Write the error message, depending on the value of
+                    ! faceID and multiple.
+
+                    if (multiple == 0) then
+
+                        select case (faceID)
+                        case (iMin)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": iMin block face not fully described"
+                        case (iMax)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": iMax block face not fully described"
+                        case (jMin)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": jMin block face not fully described"
+                        case (jMax)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": jMax block face not fully described"
+                        case (kMin)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": kMin block face not fully described"
+                        case (kMax)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": kMax block face not fully described"
+                        case default
+                       print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": Multiple block faces not fully described"
+                        end select
+
+                    else
+
+                        select case (faceID)
+                        case (iMin)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": iMin block face multiple described"
+                        case (iMax)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": iMax block face multiple described"
+                        case (jMin)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": jMin block face multiple described"
+                        case (jMax)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": jMax block face multiple described"
+                        case (kMin)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": kMin block face multiple described"
+                        case (kMax)
+                            print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": kMax block face multiple described"
+                        case default
+                        print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": Multiple block faces multiple described"
+                        end select
+
+                    end if
+
+                end do
+
+                ! Terminate.
+
+                call terminate("checkFaces", &
+                               "Wrong block boundary info found")
+
+            end if testRootProc
+
+            ! The other processors wait to get killed.
+
+            call mpi_barrier(ADflow_comm_world, ierr)
+
+        end if testBadPresent
+
+        ! Deallocate the memory of badGlobal.
+
+        deallocate (badGlobal, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("checkFaces", &
+                           "Deallocation failure for badGlobal")
+
+    end subroutine checkFaces
+
+    !      ==================================================================
+
+    subroutine checkFacesBlock(blockIsBad, faceID, multiple)
+        !
+        !       checkFacesBlock checks if for the currently active block all
+        !       the necessary boundary and connectivity info is specified.
+        !       If not, blockIsBad is set to .true. and the block face ID
+        !       which is bad, is returned as well.
+        !
+        use constants
+        use blockPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(out) :: faceID, multiple
+        logical, intent(out) :: blockIsBad
+        !
+        !      Local variables.
+        !
+        integer, dimension(2:jl, 2:kl), target :: iMinFace, iMaxFace
+        integer, dimension(2:il, 2:kl), target :: jMinFace, jMaxFace
+        integer, dimension(2:il, 2:jl), target :: kMinFace, kMaxFace
+
+        integer, dimension(:, :), pointer :: face
+
+        integer(kind=intType) :: nn, i, j, k, nBad
+        integer(kind=intType) :: istart, iEnd, jStart, jEnd
+
+        logical :: iMinBad, iMaxBad, jMinBad, jMaxBad
+        logical :: kMinBad, kMaxBad
+
+        ! Initialize iMinFace, etc to 0. These arrays will contain the
+        ! number of specified connectivities and BC's for each face on
+        ! the block boundary.
+
+        iMinFace = 0; iMaxFace = 0
+        jMinFace = 0; jMaxFace = 0
+        kMinFace = 0; kMaxFace = 0
+
+        ! Loop over the number of subfaces of this block.
+
+        do nn = 1, nSubface
+
+            ! Determine the block face on which the subface is located and
+            ! set a couple of variables accordingly. Note that the nodal
+            ! ranges are used to determine the correct range of faces,
+            ! because the actual cell range can contain halo cells.
+
+            select case (BCFaceID(nn))
+            case (iMin)
+                face => iMinFace; istart = min(jnBeg(nn), jnEnd(nn)) + 1
+                iEnd = max(jnBeg(nn), jnEnd(nn))
+                jStart = min(knBeg(nn), knEnd(nn)) + 1
+                jEnd = max(knBeg(nn), knEnd(nn))
+            case (iMax)
+                face => iMaxFace; istart = min(jnBeg(nn), jnEnd(nn)) + 1
+                iEnd = max(jnBeg(nn), jnEnd(nn))
+                jStart = min(knBeg(nn), knEnd(nn)) + 1
+                jEnd = max(knBeg(nn), knEnd(nn))
+            case (jMin)
+                face => jMinFace; istart = min(inBeg(nn), inEnd(nn)) + 1
+                iEnd = max(inBeg(nn), inEnd(nn))
+                jStart = min(knBeg(nn), knEnd(nn)) + 1
+                jEnd = max(knBeg(nn), knEnd(nn))
+            case (jMax)
+                face => jMaxFace; istart = min(inBeg(nn), inEnd(nn)) + 1
+                iEnd = max(inBeg(nn), inEnd(nn))
+                jStart = min(knBeg(nn), knEnd(nn)) + 1
+                jEnd = max(knBeg(nn), knEnd(nn))
+            case (kMin)
+                face => kMinFace; istart = min(inBeg(nn), inEnd(nn)) + 1
+                iEnd = max(inBeg(nn), inEnd(nn))
+                jStart = min(jnBeg(nn), jnEnd(nn)) + 1
+                jEnd = max(jnBeg(nn), jnEnd(nn))
+            case (kMax)
+                face => kMaxFace; istart = min(inBeg(nn), inEnd(nn)) + 1
+                iEnd = max(inBeg(nn), inEnd(nn))
+                jStart = min(jnBeg(nn), jnEnd(nn)) + 1
+                jEnd = max(jnBeg(nn), jnEnd(nn))
+            end select
+
+            ! Loop over the faces and update their counter.
+
+            do j = jStart, jEnd
+                do i = istart, iEnd
+                    face(i, j) = face(i, j) + 1
+                end do
+            end do
+
+        end do
+
+        ! Determine the bad block faces.
+
+        multiple = 0
+        iMinBad = .false.
+        iMaxBad = .false.
+
+        ! iMin and iMax face.
+
+        do k = 2, kl
+            do j = 2, jl
+                if (iMinFace(j, k) /= 1) then
+                    multiple = iMinFace(j, k)
+                    faceID = iMin
+                    iMinBad = .true.
+                end if
+
+                if (iMaxFace(j, k) /= 1) then
+                    multiple = iMaxFace(j, k)
+                    faceID = iMax
+                    iMaxBad = .true.
+                end if
+            end do
+        end do
+
+        ! jMin and jMax face.
+
+        jMinBad = .false.
+        jMaxBad = .false.
+
+        do k = 2, kl
+            do i = 2, il
+                if (jMinFace(i, k) /= 1) then
+                    multiple = jMinFace(i, k)
+                    faceID = jMin
+                    jMinBad = .true.
+                end if
+
+                if (jMaxFace(i, k) /= 1) then
+                    multiple = jMaxFace(i, k)
+                    faceID = jMax
+                    jMaxBad = .true.
+                end if
+            end do
+        end do
+
+        ! kMin and kMax face.
+
+        kMinBad = .false.
+        kMaxBad = .false.
+
+        do j = 2, jl
+            do i = 2, il
+                if (kMinFace(i, j) /= 1) then
+                    multiple = kMinFace(i, j)
+                    faceID = kMin
+                    kMinBad = .true.
+                end if
+
+                if (kMaxFace(i, j) /= 1) then
+                    multiple = kMaxFace(i, j)
+                    faceID = kMax
+                    kMaxBad = .true.
+                end if
+            end do
+        end do
+
+        ! Determine the number of bad block faces.
+
+        nBad = 0
+        if (iMinBad) nBad = nBad + 1
+        if (iMaxBad) nBad = nBad + 1
+        if (jMinBad) nBad = nBad + 1
+        if (jMaxBad) nBad = nBad + 1
+        if (kMinBad) nBad = nBad + 1
+        if (kMaxBad) nBad = nBad + 1
+
+        ! Set blockIsBad if bad faces are present and correct the face
+        ! id to something weird if multiple bad faces are present.
+
+        blockIsBad = .false.
+        if (nBad > 0) blockIsBad = .true.
+        if (nBad > 1) faceID = huge(faceID)
+
+    end subroutine checkFacesBlock
+    subroutine check1to1Subfaces
+        !
+        !       check1to1Subfaces checks if the 1 to 1 internal subfaces,
+        !       including the periodic ones, match up to a certain tolerance.
+        !       If not, a warning will be printed. The computation is not
+        !       returnFaild, because sometimes gaps are introduced on purpose,
+        !       e.g. near a wing tip in an H-topology in spanwise direction.
+        !
+        use constants
+        use blockPointers, only: nDom, nBocos, inBeg, inEnd, jnBeg, &
+                                 jnEnd, knBeg, knEnd, n1to1, nbkGlobal, neighProc, dinEnd, &
+                                 dinBeg, djnBeg, djnEnd, dknBeg, dknEnd, x, cgnsSubFace, &
+                                 neighBlock, l1, l2, l3
+        use cgnsGrid, only: cgnsDoms, cgnsNDom
+        use communication, only: myID, adflow_comm_world, nProc, sendRequests, &
+                                 recvRequests
+        use inputPhysics
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: delta, setPointers, terminate
+        use partitionMod, only: sortBadEntities
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr, procId, size
+
+        integer, dimension(mpi_status_size) :: mpiStatus
+        integer, dimension(nProc) :: sizeMessage
+        integer, dimension(nProc) :: recvcounts, displs
+
+        integer(kind=intType) :: i, j, k, ll1, ll2, ll3, ic, jc, kc
+        integer(kind=intType) :: sps, nn, mm, ll, ii, proc, nFCheck
+        integer(kind=intType) :: stepI, stepJ, stepK
+        integer(kind=intType) :: nMessagesSend, nMessagesReceive
+        integer(kind=intType) :: nBad, nBadGlobal
+
+        integer(kind=intType), dimension(3, 3) :: trMat
+
+        integer(kind=intType), dimension(0:nProc) :: nFSend, nCoor
+        integer(kind=intType), dimension(nProc) :: nFCount, nCCount
+
+        integer(kind=intType), dimension(:, :), allocatable :: intBuf
+        integer(kind=intType), dimension(:, :), allocatable :: intRecv
+        integer(kind=intType), dimension(:, :), allocatable :: badSubfaces
+        integer(kind=intType), dimension(:, :), allocatable :: badGlobal
+
+        real(kind=realType), dimension(:), allocatable :: badDist
+        real(kind=realType), dimension(:), allocatable :: badDistGlobal
+        real(kind=realType), dimension(:, :), allocatable :: realBuf
+        real(kind=realType), dimension(:, :), allocatable :: realRecv
+
+        character(len=7) :: intString
+        character(len=maxStringLen) :: devFormat, spectralDevFormat
+        !
+        !       Determine the local number of faces that must be sent to other
+        !       processors, including to myself. Also determine the number of
+        !       faces I have to check.
+        !
+        nFCheck = 0
+        nFSend = 0
+        nCoor = 0
+
+        ! Loop over the local blocks and its subfaces of the number of
+        ! spectral solutions to be checked.
+
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+
+                call setPointers(nn, 1_intType, sps)
+
+                ! Loop over the 1 to 1 subfaces.
+
+                do mm = 1, n1to1
+
+                    ! Add the offset of nBocos to mm, such that the entries
+                    ! in the arrays corresponds to this 1 to 1 subface.
+
+                    ll = mm + nBocos
+
+                    ! Update nFSend and nCoor of the correct processor and
+                    ! update nFCheck.
+
+                    ii = (abs(inEnd(ll) - inBeg(ll)) + 1) &
+                         * (abs(jnEnd(ll) - jnBeg(ll)) + 1) &
+                         * (abs(knEnd(ll) - knBeg(ll)) + 1)
+                    ll = neighProc(ll) + 1
+
+                    nFSend(ll) = nFSend(ll) + 1
+                    nCoor(ll) = nCoor(ll) + ii
+
+                    nFCheck = nFCheck + 1
+
+                end do
+            end do
+        end do
+
+        ! Put nFSend and nCoor in cumulative storage format. Note
+        ! that nFSend and nCoor start at index 0. Store the starting
+        ! value in nFCount and nCCount respectively.
+
+        do nn = 1, nProc
+            nFCount(nn) = nFSend(nn - 1)
+            nCCount(nn) = nCoor(nn - 1)
+
+            nFSend(nn) = nFSend(nn) + nFSend(nn - 1)
+            nCoor(nn) = nCoor(nn) + nCoor(nn - 1)
+        end do
+        !
+        !       Determine the integer and real buffers to store the subface
+        !       information to be communicated.
+        !
+        ! Allocate the memory for the integer and real buffers to store
+        ! the information of the subfaces to be communicated.
 
-          ! Write the bad blocks.
+        nn = nFSend(nProc)
+        mm = nCoor(nProc)
 
-          do mm=1,nBadGlobal
+        allocate (intBuf(10, nn), realBuf(3, mm), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("check1to1Subfaces", &
+             "Memory allocation failure for intBuf and &
+             &realBuf")
 
-             ! Abbreviate the contents of badGlobal a bit easier.
+        ! Repeat the loop over the number of local blocks and its subfaces
+        ! of the number of spectral solutions to be checked.
 
-             nn       = badGlobal(1,mm)
-             faceID   = badGlobal(2,mm)
-             multiple = badGlobal(3,mm)
-             sps      = badGlobal(4,mm)
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
 
-             ! If multiple spectral solutions are read, write the info
-             ! about it. Otherwise just start the line with # sign.
+                call setPointers(nn, 1_intType, sps)
 
-             if(nTimeIntervalsSpectral > 1) then
-                write(intString,"(i6)") nBadGlobal
-                intString = adjustl(intString)
-                write(*, strings, advance="no") "# Spectral grid ", trim(intString), ", "
-             else
-                write(*,"(a)",advance="no") "# "
-             endif
-
-             ! Write the error message, depending on the value of
-             ! faceID and multiple.
-
-
-             if( multiple == 0) then
-
-                select case (faceID)
-                case (iMin)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": iMin block face not fully described"
-                case (iMax)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": iMax block face not fully described"
-                case (jMin)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": jMin block face not fully described"
-                case (jMax)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": jMax block face not fully described"
-                case (kMin)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": kMin block face not fully described"
-                case (kMax)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": kMax block face not fully described"
-                case default
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": Multiple block faces not fully described"
-                end select
-
-             else
-
-                select case (faceID)
-                case (iMin)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": iMin block face multiple described"
-                case (iMax)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": iMax block face multiple described"
-                case (jMin)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": jMin block face multiple described"
-                case (jMax)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": jMax block face multiple described"
-                case (kMin)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": kMin block face multiple described"
-                case (kMax)
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": kMax block face multiple described"
-                case default
-                   print strings, "Zone ", trim(cgnsDoms(nn)%zoneName), ": Multiple block faces multiple described"
-                end select
-
-             endif
-
-          enddo
-
-          ! Terminate.
-
-          call terminate("checkFaces", &
-               "Wrong block boundary info found")
-
-       endif testRootProc
-
-       ! The other processors wait to get killed.
-
-       call mpi_barrier(ADflow_comm_world, ierr)
-
-    endif testBadPresent
-
-    ! Deallocate the memory of badGlobal.
-
-    deallocate(badGlobal, stat=ierr)
-    if(ierr /= 0)                   &
-         call terminate("checkFaces", &
-         "Deallocation failure for badGlobal")
-
-  end subroutine checkFaces
-
-  !      ==================================================================
-
-  subroutine checkFacesBlock(blockIsBad, faceID, multiple)
-    !
-    !       checkFacesBlock checks if for the currently active block all
-    !       the necessary boundary and connectivity info is specified.
-    !       If not, blockIsBad is set to .true. and the block face ID
-    !       which is bad, is returned as well.
-    !
-    use constants
-    use blockPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(out) :: faceID, multiple
-    logical,               intent(out) :: blockIsBad
-    !
-    !      Local variables.
-    !
-    integer, dimension(2:jl,2:kl), target :: iMinFace, iMaxFace
-    integer, dimension(2:il,2:kl), target :: jMinFace, jMaxFace
-    integer, dimension(2:il,2:jl), target :: kMinFace, kMaxFace
-
-    integer, dimension(:,:), pointer :: face
-
-    integer(kind=intType) :: nn, i, j, k, nBad
-    integer(kind=intType) :: istart, iEnd, jStart, jEnd
-
-    logical :: iMinBad, iMaxBad, jMinBad, jMaxBad
-    logical :: kMinBad, kMaxBad
-
-    ! Initialize iMinFace, etc to 0. These arrays will contain the
-    ! number of specified connectivities and BC's for each face on
-    ! the block boundary.
-
-    iMinFace = 0; iMaxFace = 0
-    jMinFace = 0; jMaxFace = 0
-    kMinFace = 0; kMaxFace = 0
-
-    ! Loop over the number of subfaces of this block.
-
-    do nn=1,nSubface
-
-       ! Determine the block face on which the subface is located and
-       ! set a couple of variables accordingly. Note that the nodal
-       ! ranges are used to determine the correct range of faces,
-       ! because the actual cell range can contain halo cells.
-
-       select case(BCFaceID(nn))
-       case (iMin)
-          face => iMinFace; istart = min(jnBeg(nn),jnEnd(nn)) + 1
-          iEnd   = max(jnBeg(nn),jnEnd(nn))
-          jStart = min(knBeg(nn),knEnd(nn)) + 1
-          jEnd   = max(knBeg(nn),knEnd(nn))
-       case (iMax)
-          face => iMaxFace; istart = min(jnBeg(nn),jnEnd(nn)) + 1
-          iEnd   = max(jnBeg(nn),jnEnd(nn))
-          jStart = min(knBeg(nn),knEnd(nn)) + 1
-          jEnd   = max(knBeg(nn),knEnd(nn))
-       case (jMin)
-          face => jMinFace; istart = min(inBeg(nn),inEnd(nn)) + 1
-          iEnd   = max(inBeg(nn),inEnd(nn))
-          jStart = min(knBeg(nn),knEnd(nn)) + 1
-          jEnd   = max(knBeg(nn),knEnd(nn))
-       case (jMax)
-          face => jMaxFace; istart = min(inBeg(nn),inEnd(nn)) + 1
-          iEnd   = max(inBeg(nn),inEnd(nn))
-          jStart = min(knBeg(nn),knEnd(nn)) + 1
-          jEnd   = max(knBeg(nn),knEnd(nn))
-       case (kMin)
-          face => kMinFace; istart = min(inBeg(nn),inEnd(nn)) + 1
-          iEnd   = max(inBeg(nn),inEnd(nn))
-          jStart = min(jnBeg(nn),jnEnd(nn)) + 1
-          jEnd   = max(jnBeg(nn),jnEnd(nn))
-       case (kMax)
-          face => kMaxFace; istart = min(inBeg(nn),inEnd(nn)) + 1
-          iEnd   = max(inBeg(nn),inEnd(nn))
-          jStart = min(jnBeg(nn),jnEnd(nn)) + 1
-          jEnd   = max(jnBeg(nn),jnEnd(nn))
-       end select
-
-       ! Loop over the faces and update their counter.
-
-       do j=jStart,jEnd
-          do i=istart,iEnd
-             face(i,j) = face(i,j) + 1
-          enddo
-       enddo
-
-    enddo
-
-    ! Determine the bad block faces.
-
-    multiple = 0
-    iMinBad  = .false.
-    iMaxBad  = .false.
-
-    ! iMin and iMax face.
-
-    do k=2,kl
-       do j=2,jl
-          if(iMinFace(j,k) /= 1) then
-             multiple = iMinFace(j,k)
-             faceID   = iMin
-             iMinBad  = .true.
-          endif
-
-          if(iMaxFace(j,k) /= 1) then
-             multiple = iMaxFace(j,k)
-             faceID   = iMax
-             iMaxBad  = .true.
-          endif
-       enddo
-    enddo
-
-    ! jMin and jMax face.
-
-    jMinBad = .false.
-    jMaxBad = .false.
-
-    do k=2,kl
-       do i=2,il
-          if(jMinFace(i,k) /= 1) then
-             multiple = jMinFace(i,k)
-             faceID   = jMin
-             jMinBad  = .true.
-          endif
-
-          if(jMaxFace(i,k) /= 1) then
-             multiple = jMaxFace(i,k)
-             faceID   = jMax
-             jMaxBad  = .true.
-          endif
-       enddo
-    enddo
-
-    ! kMin and kMax face.
-
-    kMinBad = .false.
-    kMaxBad = .false.
-
-    do j=2,jl
-       do i=2,il
-          if(kMinFace(i,j) /= 1) then
-             multiple = kMinFace(i,j)
-             faceID   = kMin
-             kMinBad  = .true.
-          endif
-
-          if(kMaxFace(i,j) /= 1) then
-             multiple = kMaxFace(i,j)
-             faceID   = kMax
-             kMaxBad  = .true.
-          endif
-       enddo
-    enddo
-
-    ! Determine the number of bad block faces.
-
-    nBad = 0
-    if(iMinBad) nBad = nBad + 1
-    if(iMaxBad) nBad = nBad + 1
-    if(jMinBad) nBad = nBad + 1
-    if(jMaxBad) nBad = nBad + 1
-    if(kMinBad) nBad = nBad + 1
-    if(kMaxBad) nBad = nBad + 1
-
-    ! Set blockIsBad if bad faces are present and correct the face
-    ! id to something weird if multiple bad faces are present.
-
-    blockIsBad = .false.
-    if(nBad > 0) blockIsBad = .true.
-    if(nBad > 1) faceID = huge(faceID)
-
-  end subroutine checkFacesBlock
-  subroutine check1to1Subfaces
-    !
-    !       check1to1Subfaces checks if the 1 to 1 internal subfaces,
-    !       including the periodic ones, match up to a certain tolerance.
-    !       If not, a warning will be printed. The computation is not
-    !       returnFaild, because sometimes gaps are introduced on purpose,
-    !       e.g. near a wing tip in an H-topology in spanwise direction.
-    !
-    use constants
-    use blockPointers, only : nDom, nBocos, inBeg, inEnd, jnBeg, &
-         jnEnd, knBeg, knEnd, n1to1, nbkGlobal, neighProc, dinEnd, &
-         dinBeg, djnBeg, djnEnd, dknBeg, dknEnd, x, cgnsSubFace, &
-         neighBlock, l1, l2, l3
-    use cgnsGrid, only : cgnsDoms, cgnsNDom
-    use communication, only : myID, adflow_comm_world, nProc, sendRequests, &
-         recvRequests
-    use inputPhysics
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : delta, setPointers, terminate
-    use partitionMod, only : sortBadEntities
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr, procId, size
-
-    integer, dimension(mpi_status_size) :: mpiStatus
-    integer, dimension(nProc)           :: sizeMessage
-    integer, dimension(nProc)           :: recvcounts, displs
-
-    integer(kind=intType) :: i, j, k, ll1, ll2, ll3, ic, jc, kc
-    integer(kind=intType) :: sps, nn, mm, ll, ii, proc, nFCheck
-    integer(kind=intType) :: stepI, stepJ, stepK
-    integer(kind=intType) :: nMessagesSend, nMessagesReceive
-    integer(kind=intType) :: nBad, nBadGlobal
-
-    integer(kind=intType), dimension(3,3) :: trMat
-
-    integer(kind=intType), dimension(0:nProc) :: nFSend, nCoor
-    integer(kind=intType), dimension(nProc)   :: nFCount, nCCount
-
-    integer(kind=intType), dimension(:,:), allocatable :: intBuf
-    integer(kind=intType), dimension(:,:), allocatable :: intRecv
-    integer(kind=intType), dimension(:,:), allocatable :: badSubfaces
-    integer(kind=intType), dimension(:,:), allocatable :: badGlobal
-
-    real(kind=realType), dimension(:),   allocatable :: badDist
-    real(kind=realType), dimension(:),   allocatable :: badDistGlobal
-    real(kind=realType), dimension(:,:), allocatable :: realBuf
-    real(kind=realType), dimension(:,:), allocatable :: realRecv
-
-    character(len=7) :: intString
-    character(len=maxStringLen) :: devFormat, spectralDevFormat
-    !
-    !       Determine the local number of faces that must be sent to other
-    !       processors, including to myself. Also determine the number of
-    !       faces I have to check.
-    !
-    nFCheck = 0
-    nFSend  = 0
-    nCoor   = 0
-
-    ! Loop over the local blocks and its subfaces of the number of
-    ! spectral solutions to be checked.
-
-    do sps=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-
-          call setPointers(nn, 1_intType, sps)
-
-          ! Loop over the 1 to 1 subfaces.
-
-          do mm=1,n1to1
-
-             ! Add the offset of nBocos to mm, such that the entries
-             ! in the arrays corresponds to this 1 to 1 subface.
-
-             ll = mm + nBocos
-
-             ! Update nFSend and nCoor of the correct processor and
-             ! update nFCheck.
-
-             ii = (abs(inEnd(ll) - inBeg(ll)) + 1) &
-                  * (abs(jnEnd(ll) - jnBeg(ll)) + 1) &
-                  * (abs(knEnd(ll) - knBeg(ll)) + 1)
-             ll = neighProc(ll) + 1
-
-             nFSend(ll) = nFSend(ll) + 1
-             nCoor(ll)  = nCoor(ll)  + ii
-
-             nFCheck = nFCheck + 1
-
-          enddo
-       enddo
-    enddo
-
-    ! Put nFSend and nCoor in cumulative storage format. Note
-    ! that nFSend and nCoor start at index 0. Store the starting
-    ! value in nFCount and nCCount respectively.
-
-    do nn=1,nProc
-       nFCount(nn) = nFSend(nn-1)
-       nCCount(nn) = nCoor(nn-1)
-
-       nFSend(nn) = nFSend(nn) + nFSend(nn-1)
-       nCoor(nn)  = nCoor(nn)  + nCoor(nn-1)
-    enddo
-    !
-    !       Determine the integer and real buffers to store the subface
-    !       information to be communicated.
-    !
-    ! Allocate the memory for the integer and real buffers to store
-    ! the information of the subfaces to be communicated.
+                ! Loop over the 1 to 1 subfaces.
 
-    nn = nFSend(nProc)
-    mm = nCoor(nProc)
+                do mm = 1, n1to1
 
-    allocate(intBuf(10,nn), realBuf(3,mm), stat=ierr)
-    if(ierr /= 0) &
-         call terminate("check1to1Subfaces", &
-         "Memory allocation failure for intBuf and &
-         &realBuf")
+                    ! Add the offset of nBocos to mm, such that the entries
+                    ! in the arrays corresponds to this 1 to 1 subface.
 
-    ! Repeat the loop over the number of local blocks and its subfaces
-    ! of the number of spectral solutions to be checked.
+                    ll = mm + nBocos
 
-    do sps=1,nTimeIntervalsSpectral
-       do nn=1,nDom
+                    ! Store the donor range, the local block number, the
+                    ! spectral solution and global block ID in intBuf.
 
-          call setPointers(nn, 1_intType, sps)
+                    proc = neighProc(ll) + 1
+                    nFCount(proc) = nFCount(proc) + 1
+                    ii = nFCount(proc)
 
-          ! Loop over the 1 to 1 subfaces.
+                    intBuf(1, ii) = dinBeg(ll)
+                    intBuf(2, ii) = dinEnd(ll)
+                    intBuf(3, ii) = djnBeg(ll)
+                    intBuf(4, ii) = djnEnd(ll)
+                    intBuf(5, ii) = dknBeg(ll)
+                    intBuf(6, ii) = dknEnd(ll)
 
-          do mm=1,n1to1
+                    intBuf(7, ii) = neighBlock(ll)
+                    intBuf(8, ii) = sps
 
-             ! Add the offset of nBocos to mm, such that the entries
-             ! in the arrays corresponds to this 1 to 1 subface.
+                    intBuf(9, ii) = nbkGlobal
+                    intBuf(10, ii) = cgnsSubface(ll)
 
-             ll = mm + nBocos
+                    ! Determine whether the subface has positive or negative
+                    ! running indices. To be sure that the correct sequence is
+                    ! stored in realBuf, the loop must be performed over the
+                    ! donor range (i, j, and k indices could be swapped and
+                    ! therefore stored wrongly in the 1D buffer).
 
-             ! Store the donor range, the local block number, the
-             ! spectral solution and global block ID in intBuf.
+                    stepI = 1; if (dinEnd(ll) < dinBeg(ll)) stepI = -1
+                    stepJ = 1; if (djnEnd(ll) < djnBeg(ll)) stepJ = -1
+                    stepK = 1; if (dknEnd(ll) < dknBeg(ll)) stepK = -1
 
-             proc          = neighProc(ll) + 1
-             nFCount(proc) = nFCount(proc) + 1
-             ii            = nFCount(proc)
+                    ! Determine the transformation matrix between the donor
+                    ! and the current face. As the information stored in l1,
+                    ! l2 and l3 is for the transformation matrix to the donor
+                    ! face, the transpose must be taken.
 
-             intBuf( 1,ii) = dinBeg(ll)
-             intBuf( 2,ii) = dinEnd(ll)
-             intBuf( 3,ii) = djnBeg(ll)
-             intBuf( 4,ii) = djnEnd(ll)
-             intBuf( 5,ii) = dknBeg(ll)
-             intBuf( 6,ii) = dknEnd(ll)
+                    ll1 = l1(ll); ll2 = L2(ll); ll3 = l3(ll)
 
-             intBuf( 7,ii) = neighBlock(ll)
-             intBuf( 8,ii) = sps
+                    trMat(1, 1) = sign(1_intType, ll1) * delta(ll1, 1_intType)
+                    trMat(1, 2) = sign(1_intType, ll1) * delta(ll1, 2_intType)
+                    trMat(1, 3) = sign(1_intType, ll1) * delta(ll1, 3_intType)
 
-             intBuf( 9,ii) = nbkGlobal
-             intBuf(10,ii) = cgnsSubface(ll)
+                    trMat(2, 1) = sign(1_intType, ll2) * delta(ll2, 1_intType)
+                    trMat(2, 2) = sign(1_intType, ll2) * delta(ll2, 2_intType)
+                    trMat(2, 3) = sign(1_intType, ll2) * delta(ll2, 3_intType)
 
-             ! Determine whether the subface has positive or negative
-             ! running indices. To be sure that the correct sequence is
-             ! stored in realBuf, the loop must be performed over the
-             ! donor range (i, j, and k indices could be swapped and
-             ! therefore stored wrongly in the 1D buffer).
+                    trMat(3, 1) = sign(1_intType, ll3) * delta(ll3, 1_intType)
+                    trMat(3, 2) = sign(1_intType, ll3) * delta(ll3, 2_intType)
+                    trMat(3, 3) = sign(1_intType, ll3) * delta(ll3, 3_intType)
 
-             stepI = 1; if(dinEnd(ll) < dinBeg(ll)) stepI = -1
-             stepJ = 1; if(djnEnd(ll) < djnBeg(ll)) stepJ = -1
-             stepK = 1; if(dknEnd(ll) < dknBeg(ll)) stepK = -1
+                    ! Store the coordinates in realBuf by looping over the
+                    ! points of the subface.
 
-             ! Determine the transformation matrix between the donor
-             ! and the current face. As the information stored in l1,
-             ! l2 and l3 is for the transformation matrix to the donor
-             ! face, the transpose must be taken.
+                    ii = nCCount(proc)
 
-             ll1 = l1(ll); ll2 = L2(ll); ll3 = l3(ll)
+                    do k = dknBeg(ll), dknEnd(ll), stepK
+                        do j = djnBeg(ll), djnEnd(ll), stepJ
+                            do i = dinBeg(ll), dinEnd(ll), stepI
 
-             trMat(1,1) = sign(1_intType,ll1) * delta(ll1,1_intType)
-             trMat(1,2) = sign(1_intType,ll1) * delta(ll1,2_intType)
-             trMat(1,3) = sign(1_intType,ll1) * delta(ll1,3_intType)
+                                ! Determine the nodal indices in the current block.
 
-             trMat(2,1) = sign(1_intType,ll2) * delta(ll2,1_intType)
-             trMat(2,2) = sign(1_intType,ll2) * delta(ll2,2_intType)
-             trMat(2,3) = sign(1_intType,ll2) * delta(ll2,3_intType)
+                                ll1 = i - dinBeg(ll)
+                                ll2 = j - djnBeg(ll)
+                                ll3 = k - dknBeg(ll)
 
-             trMat(3,1) = sign(1_intType,ll3) * delta(ll3,1_intType)
-             trMat(3,2) = sign(1_intType,ll3) * delta(ll3,2_intType)
-             trMat(3,3) = sign(1_intType,ll3) * delta(ll3,3_intType)
+                                ic = inBeg(ll) &
+                                     + trMat(1, 1) * ll1 + trMat(1, 2) * ll2 + trMat(1, 3) * ll3
+                                jc = jnBeg(ll) &
+                                     + trMat(2, 1) * ll1 + trMat(2, 2) * ll2 + trMat(2, 3) * ll3
+                                kc = knBeg(ll) &
+                                     + trMat(3, 1) * ll1 + trMat(3, 2) * ll2 + trMat(3, 3) * ll3
 
-             ! Store the coordinates in realBuf by looping over the
-             ! points of the subface.
+                                ! Store the coordinates in the buffer.
 
-             ii = nCCount(proc)
+                                ii = ii + 1
+                                realBuf(1, ii) = x(ic, jc, kc, 1)
+                                realBuf(2, ii) = x(ic, jc, kc, 2)
+                                realBuf(3, ii) = x(ic, jc, kc, 3)
+                            end do
+                        end do
+                    end do
 
-             do k=dknBeg(ll), dknEnd(ll), stepK
-                do j=djnBeg(ll), djnEnd(ll), stepJ
-                   do i=dinBeg(ll), dinEnd(ll), stepI
+                    ! Set ii to the number of points stored for this subface.
+                    ! This number is needed when for the periodic correction.
 
-                      ! Determine the nodal indices in the current block.
+                    ii = ii - nCCount(proc)
 
-                      ll1 = i - dinBeg(ll)
-                      ll2 = j - djnBeg(ll)
-                      ll3 = k - dknBeg(ll)
+                    ! Check if a periodic correction must be applied and if so
+                    ! call the routine to do so for the coordinates of this
+                    ! subface. Note that internally created subfaces due to
+                    ! block splitting must be excluded from this test.
 
-                      ic = inBeg(ll) &
-                           + trMat(1,1)*ll1 + trMat(1,2)*ll2 + trMat(1,3)*ll3
-                      jc = jnBeg(ll) &
-                           + trMat(2,1)*ll1 + trMat(2,2)*ll2 + trMat(2,3)*ll3
-                      kc = knBeg(ll) &
-                           + trMat(3,1)*ll1 + trMat(3,2)*ll2 + trMat(3,3)*ll3
+                    k = cgnsSubface(ll)
+                    if (k > 0) then
+                        if (cgnsDoms(nbkGlobal)%conn1to1(k)%periodic) then
 
-                      ! Store the coordinates in the buffer.
+                            j = nCCount(proc) + 1
+                            call periodicTransformSubface(realBuf(1, j), ii, &
+                                                          cgnsDoms(nbkGlobal)%conn1to1(k)%rotationCenter, &
+                                                          cgnsDoms(nbkGlobal)%conn1to1(k)%rotationAngles, &
+                                                          cgnsDoms(nbkGlobal)%conn1to1(k)%translation)
+                        end if
+                    end if
 
-                      ii = ii + 1
-                      realBuf(1,ii) = x(ic,jc,kc,1)
-                      realBuf(2,ii) = x(ic,jc,kc,2)
-                      realBuf(3,ii) = x(ic,jc,kc,3)
-                   enddo
-                enddo
-             enddo
+                    ! Update the counter nCCount(proc).
 
-             ! Set ii to the number of points stored for this subface.
-             ! This number is needed when for the periodic correction.
+                    nCCount(proc) = nCCount(proc) + ii
 
-             ii = ii - nCCount(proc)
+                end do
+            end do
+        end do
+        !
+        !       Determine the number of messages I will send and receive.
+        !       The term message in this routine means a the complete subface
+        !       info, i.e. a combination of an integer and real message.
+        !
+        ! Fill the array nCCount with 0's and 1's. A 0 indicates that no
+        ! message is sent to the corresponding processor.
 
-             ! Check if a periodic correction must be applied and if so
-             ! call the routine to do so for the coordinates of this
-             ! subface. Note that internally created subfaces due to
-             ! block splitting must be excluded from this test.
+        do nn = 1, nProc
+            nCCount(nn) = 0
+            if (nFSend(nn) > nFSend(nn - 1)) nCCount(nn) = 1
+        end do
 
-             k = cgnsSubface(ll)
-             if(k > 0) then
-                if( cgnsDoms(nbkGlobal)%conn1to1(k)%periodic ) then
+        ! Make sure that no message is sent to myself. An offset of + 1
+        ! must be added, because the processor numbers start at 0 and
+        ! nCCount at 1.
 
-                   j = nCCount(proc) + 1
-                   call periodicTransformSubface(realBuf(1,j), ii,     &
-                        cgnsDoms(nbkGlobal)%conn1to1(k)%rotationCenter, &
-                        cgnsDoms(nbkGlobal)%conn1to1(k)%rotationAngles, &
-                        cgnsDoms(nbkGlobal)%conn1to1(k)%translation)
-                endif
-             endif
-
-             ! Update the counter nCCount(proc).
-
-             nCCount(proc) = nCCount(proc) + ii
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Determine the number of messages I will send and receive.
-    !       The term message in this routine means a the complete subface
-    !       info, i.e. a combination of an integer and real message.
-    !
-    ! Fill the array nCCount with 0's and 1's. A 0 indicates that no
-    ! message is sent to the corresponding processor.
-
-    do nn=1,nProc
-       nCCount(nn) = 0
-       if(nFSend(nn) > nFSend(nn-1)) nCCount(nn) = 1
-    enddo
-
-    ! Make sure that no message is sent to myself. An offset of + 1
-    ! must be added, because the processor numbers start at 0 and
-    ! nCCount at 1.
-
-    nCCount(myID+1) = 0
-
-    ! Determine the number of message I have to receive.
-
-    sizeMessage = 1
-    call mpi_reduce_scatter(nCCount, nMessagesReceive,          &
-         sizeMessage, adflow_integer, mpi_sum, &
-         ADflow_comm_world, ierr)
-
-    ! Send the data I have to send. Do not send a message to myself.
-    ! That is handled separately. As nonblocking sends must be used
-    ! to avoid deadlock and two messages are sent to a processor,
-    ! the sendRequests (stored in the module communication) are used
-    ! for the integer messages and the recvRequests (same module)
-    ! are used for the real messages.
-
-    ii = 0
-    do nn=1,nProc
-
-       ! Check if something must be sent to this processor.
-
-       if(nCCount(nn) == 1) then
-
-          ! Send the integer buffer.
-
-          ii     = ii + 1
-          procID = nn - 1
-          size   = 10*(nFSend(nn) - nFSend(nn-1))
-          mm     = nFSend(nn-1) + 1
-
-          call mpi_isend(intBuf(1,mm), size, adflow_integer, procID,  &
-               procID, ADflow_comm_world, sendRequests(ii), &
-               ierr)
-
-          ! Send the real buffer.
-
-          size = 3*(nCoor(nn) - nCoor(nn-1))
-          mm   = nCoor(nn-1) + 1
-
-          call mpi_isend(realBuf(1,mm), size, adflow_real, procID, &
-               procID+1, ADflow_comm_world,              &
-               recvRequests(ii), ierr)
-
-       endif
-    enddo
-
-    ! Store the number of messages sent.
-
-    nMessagesSend = ii
-    !
-    !       Check the coordinates of the subfaces which should have been
-    !       sent to myself.
-    !
-    ! Initialize nBad to 0 and allocate the memory to store possible
-    ! bad subfaces.
-
-    nBad = 0
-    allocate(badSubfaces(4,nFCheck), badDist(nFCheck), stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("check1to1Subfaces", &
-         "Memory allocation failure for badSubfaces &
-         &and badDist")
-
-    ! Determine the number of local subfaces that must be checked.
-    ! If any present, check them.
-
-    nn = nFSend(myID+1) - nFSend(myID)
-    if(nn > 0) then
-       ii = nFSend(myID) + 1
-       mm = nCoor(myID)  + 1
-       call checkSubfaceCoor(intBuf(1,ii), realBuf(1,mm), nn, &
-            nBad, badSubfaces, badDist,      &
-            nTimeIntervalsSpectral)
-    endif
-    !
-    !       Check the coordinates of the subfaces which are received from
-    !       other processors.
-    !
-    ! Loop over the number of messages to be received.
-
-    do ii=1,nMessagesReceive
-
-       ! Wait until an integer message arrives and determine the
-       ! source and size of the message.
-
-       call mpi_probe(mpi_any_source, myID, ADflow_comm_world, &
-            mpiStatus, ierr)
+        nCCount(myID + 1) = 0
 
-       procID = mpiStatus(mpi_source)
-       call mpi_get_count(mpiStatus, adflow_integer, size, ierr)
+        ! Determine the number of message I have to receive.
 
-       ! Check in debug mode that the incoming message is of
-       ! correct size.
+        sizeMessage = 1
+        call mpi_reduce_scatter(nCCount, nMessagesReceive, &
+                                sizeMessage, adflow_integer, mpi_sum, &
+                                ADflow_comm_world, ierr)
 
-       if( debug ) then
-          if(size == mpi_undefined .or. mod(size,10) /= 0) &
-               call terminate("check1to1Subfaces",            &
-               "Unexpected size of integer message")
-       endif
+        ! Send the data I have to send. Do not send a message to myself.
+        ! That is handled separately. As nonblocking sends must be used
+        ! to avoid deadlock and two messages are sent to a processor,
+        ! the sendRequests (stored in the module communication) are used
+        ! for the integer messages and the recvRequests (same module)
+        ! are used for the real messages.
 
-       ! Determine the number of subfaces this message contains and
-       ! allocate the memory for the integer receive buffer.
-
-       nn = size/10
-       allocate(intRecv(10,nn), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("check1to1Subfaces", &
-            "Memory allocation failure for intRecv")
+        ii = 0
+        do nn = 1, nProc
 
-       ! Receive the integer buffer. Blocking receives can be used,
-       ! because the message has already arrived.
+            ! Check if something must be sent to this processor.
 
-       call mpi_recv(intRecv, size, adflow_integer, procID, &
-            myID, ADflow_comm_world, mpiStatus, ierr)
+            if (nCCount(nn) == 1) then
 
-       ! Probe for the corresponding real buffer and determine its
-       ! size.
+                ! Send the integer buffer.
 
-       call mpi_probe(procID, myID+1, ADflow_comm_world, &
-            mpiStatus, ierr)
-       call mpi_get_count(mpiStatus, adflow_real, size, ierr)
+                ii = ii + 1
+                procID = nn - 1
+                size = 10 * (nFSend(nn) - nFSend(nn - 1))
+                mm = nFSend(nn - 1) + 1
+
+                call mpi_isend(intBuf(1, mm), size, adflow_integer, procID, &
+                               procID, ADflow_comm_world, sendRequests(ii), &
+                               ierr)
+
+                ! Send the real buffer.
+
+                size = 3 * (nCoor(nn) - nCoor(nn - 1))
+                mm = nCoor(nn - 1) + 1
+
+                call mpi_isend(realBuf(1, mm), size, adflow_real, procID, &
+                               procID + 1, ADflow_comm_world, &
+                               recvRequests(ii), ierr)
+
+            end if
+        end do
+
+        ! Store the number of messages sent.
 
-       ! Check in debug mode that the incoming message is of
-       ! correct size.
+        nMessagesSend = ii
+        !
+        !       Check the coordinates of the subfaces which should have been
+        !       sent to myself.
+        !
+        ! Initialize nBad to 0 and allocate the memory to store possible
+        ! bad subfaces.
 
-       if( debug ) then
-          if(size == mpi_undefined .or. mod(size,3) /= 0) &
-               call terminate("check1to1Subfaces",           &
-               "Unexpected size of real message")
-       endif
+        nBad = 0
+        allocate (badSubfaces(4, nFCheck), badDist(nFCheck), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("check1to1Subfaces", &
+             "Memory allocation failure for badSubfaces &
+             &and badDist")
 
-       ! Determine the total number of coordinates in the message and
-       ! allocate the memory for the real receive buffer.
+        ! Determine the number of local subfaces that must be checked.
+        ! If any present, check them.
+
+        nn = nFSend(myID + 1) - nFSend(myID)
+        if (nn > 0) then
+            ii = nFSend(myID) + 1
+            mm = nCoor(myID) + 1
+            call checkSubfaceCoor(intBuf(1, ii), realBuf(1, mm), nn, &
+                                  nBad, badSubfaces, badDist, &
+                                  nTimeIntervalsSpectral)
+        end if
+        !
+        !       Check the coordinates of the subfaces which are received from
+        !       other processors.
+        !
+        ! Loop over the number of messages to be received.
 
-       mm = size/3
-       allocate(realRecv(3,mm), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("check1to1Subfaces", &
-            "Memory allocation failure for realRecv")
+        do ii = 1, nMessagesReceive
+
+            ! Wait until an integer message arrives and determine the
+            ! source and size of the message.
+
+            call mpi_probe(mpi_any_source, myID, ADflow_comm_world, &
+                           mpiStatus, ierr)
+
+            procID = mpiStatus(mpi_source)
+            call mpi_get_count(mpiStatus, adflow_integer, size, ierr)
+
+            ! Check in debug mode that the incoming message is of
+            ! correct size.
+
+            if (debug) then
+                if (size == mpi_undefined .or. mod(size, 10) /= 0) &
+                    call terminate("check1to1Subfaces", &
+                                   "Unexpected size of integer message")
+            end if
+
+            ! Determine the number of subfaces this message contains and
+            ! allocate the memory for the integer receive buffer.
+
+            nn = size / 10
+            allocate (intRecv(10, nn), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("check1to1Subfaces", &
+                               "Memory allocation failure for intRecv")
 
-       ! Receive the real buffer. Blocking receives can be used,
-       ! because the message has already arrived.
+            ! Receive the integer buffer. Blocking receives can be used,
+            ! because the message has already arrived.
 
-       call mpi_recv(realRecv, size, adflow_real, procID, &
-            myID+1, ADflow_comm_world, mpiStatus, ierr)
+            call mpi_recv(intRecv, size, adflow_integer, procID, &
+                          myID, ADflow_comm_world, mpiStatus, ierr)
 
-       ! Check the subfaces stored in these messages.
+            ! Probe for the corresponding real buffer and determine its
+            ! size.
 
-       call checkSubfaceCoor(intRecv, realRecv, nn, nBad, &
-            badSubfaces, badDist,        &
-            nTimeIntervalsSpectral)
+            call mpi_probe(procID, myID + 1, ADflow_comm_world, &
+                           mpiStatus, ierr)
+            call mpi_get_count(mpiStatus, adflow_real, size, ierr)
 
-       ! Release the memory of the integer and real receive buffer.
+            ! Check in debug mode that the incoming message is of
+            ! correct size.
 
-       deallocate(intRecv, realRecv, stat=ierr)
-       if(ierr /= 0)                         &
+            if (debug) then
+                if (size == mpi_undefined .or. mod(size, 3) /= 0) &
+                    call terminate("check1to1Subfaces", &
+                                   "Unexpected size of real message")
+            end if
+
+            ! Determine the total number of coordinates in the message and
+            ! allocate the memory for the real receive buffer.
+
+            mm = size / 3
+            allocate (realRecv(3, mm), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("check1to1Subfaces", &
+                               "Memory allocation failure for realRecv")
+
+            ! Receive the real buffer. Blocking receives can be used,
+            ! because the message has already arrived.
+
+            call mpi_recv(realRecv, size, adflow_real, procID, &
+                          myID + 1, ADflow_comm_world, mpiStatus, ierr)
+
+            ! Check the subfaces stored in these messages.
+
+            call checkSubfaceCoor(intRecv, realRecv, nn, nBad, &
+                                  badSubfaces, badDist, &
+                                  nTimeIntervalsSpectral)
+
+            ! Release the memory of the integer and real receive buffer.
+
+            deallocate (intRecv, realRecv, stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("check1to1Subfaces", &
+                 "Deallocation failure for intRecv &
+                 &and realRecv")
+        end do
+
+        ! Complete the nonblocking sends.
+
+        size = nMessagesSend
+        do nn = 1, nMessagesSend
+            call mpi_waitany(size, sendRequests, procID, mpiStatus, ierr)
+            call mpi_waitany(size, recvRequests, procID, mpiStatus, ierr)
+        end do
+
+        ! Deallocate the memory for the integer and real buffers.
+
+        deallocate (intBuf, realBuf, stat=ierr)
+        if (ierr /= 0) &
             call terminate("check1to1Subfaces", &
-            "Deallocation failure for intRecv &
-            &and realRecv")
-    enddo
-
-    ! Complete the nonblocking sends.
-
-    size = nMessagesSend
-    do nn=1,nMessagesSend
-       call mpi_waitany(size, sendRequests, procID, mpiStatus, ierr)
-       call mpi_waitany(size, recvRequests, procID, mpiStatus, ierr)
-    enddo
-
-    ! Deallocate the memory for the integer and real buffers.
-
-    deallocate(intBuf, realBuf, stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("check1to1Subfaces", &
-         "Deallocation failure for intBuf and realBuf")
-    !
-    !       Determine the global number of bad subfaces and gather this
-    !       information.
-    !
-    ! Determine the global number of bad subfaces.
-
-    call mpi_allgather(nBad, 1, adflow_integer, nCCount, 1, &
-         adflow_integer, ADflow_comm_world, ierr)
-
-    ! Determine the global number of bad subfaces and the arrays
-    ! recvcounts and displs needed for the call to allgatherv
-
-    nBadGlobal    = nCCount(1)
-    recvcounts(1) = nCCount(1)
-    displs(1)     = 0
-
-    do nn=2,nProc
-       nBadGlobal     = nBadGlobal + nCCount(nn)
-       recvcounts(nn) = nCCount(nn)
-       displs(nn)     = displs(nn-1) + recvcounts(nn-1)
-    enddo
-
-    ! Allocate the memory to store the global bad surfaces.
-
-    allocate(badGlobal(4,nBadGlobal), badDistGlobal(nBadGlobal), &
-         stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("check1to1Subfaces", &
-         "Memory allocation failure for badGlobal &
-         &and badDistGlobal")
-
-    ! Gather the data. First the distance info.
-
-    size = nBad
-    call mpi_allgatherv(badDist, size, adflow_real, badDistGlobal, &
-         recvcounts, displs, adflow_real,           &
-         ADflow_comm_world, ierr)
-
-    ! And the integer info. Multiply recvcounts and displs
-    ! by 4, because 4 integers are received.
-
-    do nn=1,nProc
-       recvcounts(nn) = 4*recvcounts(nn)
-       displs(nn)     = 4*displs(nn)
-    enddo
-
-    size = 4*nBad
-    call mpi_allgatherv(badSubfaces, size, adflow_integer, badGlobal, &
-         recvcounts, displs, adflow_integer,           &
-         ADflow_comm_world, ierr)
-
-    ! Sort the bad subfaces and get rid of the multiple entries.
-
-    call sortBadEntities(nBadGlobal, badGlobal, badDistGlobal, .true.)
-
-    ! Check for the presence of any internally created subfaces.
-    ! This only occurs when something goes wrong in the block
-    ! splitting and therefore the program is terminated.
-
-    do nn=1,nBadGlobal
-       if(badGlobal(2,nn) == 0) then
-          if(myID == 0)                         &
-               call terminate("check1to1Subfaces", &
-               "Non-matching internally created &
-               &face found.")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-    enddo
-
-    ! Print the bad subfaces, if present. Only processor 0 performs
-    ! this task.
-
-    devFormat = "(7(A), ES12.5)"
-    spectralDevFormat = "(9(A), ES12.5)"
-
-    if(myID == 0 .and. nBadGlobal > 0) then
-
-       write(intString,"(i6)") nBadGlobal
-       intString = adjustl(intString)
-
-       print "(a)",  "#"
-       print strings, "#                      Warning"
-       print strings, "# Found ", trim(intString)," one to one subfaces which do not coincide."
-       print strings, "# Computation continues, but be aware of it."
-       print strings, "# List of nonmatching one to one subfaces."
-       print "(a)", "#"
-
-       do nn=1,nBadGlobal
-          i = badGlobal(1,nn)
-          j = badGlobal(2,nn)
-
-          write(intString,"(i6)") badGlobal(3,nn)
-          intString = adjustl(intString)
-
-          ! Write a different error message if more than one grid has
-          ! been read.
-
-          if(nTimeIntervalsSpectral > 1) then
-
-             if(badGlobal(4,nn) == 1) then
-                print spectralDevFormat, "# Spectral grid ", trim(intString), ", zone ", trim(cgnsDoms(i)%zoneName), &
-                 ", periodic subface ", trim(cgnsDoms(i)%conn1to1(j)%connectName), &
-                 " does not match donor ", trim(cgnsDoms(i)%conn1to1(j)%donorName), &
-                 ". Maximum deviation: ", badDistGlobal(nn)
-             else
-               print spectralDevFormat, "# Spectral grid ", trim(intString), ", zone ", trim(cgnsDoms(i)%zoneName), &
-               ", subface ", trim(cgnsDoms(i)%conn1to1(j)%connectName), &
-               " does not match donor ", trim(cgnsDoms(i)%conn1to1(j)%donorName), &
-               ". Maximum deviation: ", badDistGlobal(nn)
-             endif
-
-          else
-
-             if(badGlobal(4,nn) == 1) then
-               print devFormat, "Zone ", trim(cgnsDoms(i)%zoneName), &
-               ", periodic subface ", trim(cgnsDoms(i)%conn1to1(j)%connectName), &
-               " does not match donor ", trim(cgnsDoms(i)%conn1to1(j)%donorName), &
-               ". Maximum deviation: ", badDistGlobal(nn)
-             else
-               print devFormat, "Zone ", trim(cgnsDoms(i)%zoneName), &
-               ", subface ", trim(cgnsDoms(i)%conn1to1(j)%connectName), &
-               " does not match donor ", trim(cgnsDoms(i)%conn1to1(j)%donorName), &
-               ". Maximum deviation: ", badDistGlobal(nn)
-             endif
-
-          endif
-       enddo
-
-       print "(a)", "#"
+                           "Deallocation failure for intBuf and realBuf")
+        !
+        !       Determine the global number of bad subfaces and gather this
+        !       information.
+        !
+        ! Determine the global number of bad subfaces.
+
+        call mpi_allgather(nBad, 1, adflow_integer, nCCount, 1, &
+                           adflow_integer, ADflow_comm_world, ierr)
+
+        ! Determine the global number of bad subfaces and the arrays
+        ! recvcounts and displs needed for the call to allgatherv
+
+        nBadGlobal = nCCount(1)
+        recvcounts(1) = nCCount(1)
+        displs(1) = 0
+
+        do nn = 2, nProc
+            nBadGlobal = nBadGlobal + nCCount(nn)
+            recvcounts(nn) = nCCount(nn)
+            displs(nn) = displs(nn - 1) + recvcounts(nn - 1)
+        end do
+
+        ! Allocate the memory to store the global bad surfaces.
+
+        allocate (badGlobal(4, nBadGlobal), badDistGlobal(nBadGlobal), &
+                  stat=ierr)
+        if (ierr /= 0) &
+             call terminate("check1to1Subfaces", &
+             "Memory allocation failure for badGlobal &
+             &and badDistGlobal")
+
+        ! Gather the data. First the distance info.
+
+        size = nBad
+        call mpi_allgatherv(badDist, size, adflow_real, badDistGlobal, &
+                            recvcounts, displs, adflow_real, &
+                            ADflow_comm_world, ierr)
+
+        ! And the integer info. Multiply recvcounts and displs
+        ! by 4, because 4 integers are received.
+
+        do nn = 1, nProc
+            recvcounts(nn) = 4 * recvcounts(nn)
+            displs(nn) = 4 * displs(nn)
+        end do
+
+        size = 4 * nBad
+        call mpi_allgatherv(badSubfaces, size, adflow_integer, badGlobal, &
+                            recvcounts, displs, adflow_integer, &
+                            ADflow_comm_world, ierr)
+
+        ! Sort the bad subfaces and get rid of the multiple entries.
+
+        call sortBadEntities(nBadGlobal, badGlobal, badDistGlobal, .true.)
+
+        ! Check for the presence of any internally created subfaces.
+        ! This only occurs when something goes wrong in the block
+        ! splitting and therefore the program is terminated.
+
+        do nn = 1, nBadGlobal
+            if (badGlobal(2, nn) == 0) then
+                if (myID == 0) &
+                     call terminate("check1to1Subfaces", &
+                     "Non-matching internally created &
+                     &face found.")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+        end do
+
+        ! Print the bad subfaces, if present. Only processor 0 performs
+        ! this task.
+
+        devFormat = "(7(A), ES12.5)"
+        spectralDevFormat = "(9(A), ES12.5)"
+
+        if (myID == 0 .and. nBadGlobal > 0) then
+
+            write (intString, "(i6)") nBadGlobal
+            intString = adjustl(intString)
+
+            print "(a)", "#"
+            print strings, "#                      Warning"
+            print strings, "# Found ", trim(intString), " one to one subfaces which do not coincide."
+            print strings, "# Computation continues, but be aware of it."
+            print strings, "# List of nonmatching one to one subfaces."
+            print "(a)", "#"
+
+            do nn = 1, nBadGlobal
+                i = badGlobal(1, nn)
+                j = badGlobal(2, nn)
+
+                write (intString, "(i6)") badGlobal(3, nn)
+                intString = adjustl(intString)
 
-    endif
+                ! Write a different error message if more than one grid has
+                ! been read.
+
+                if (nTimeIntervalsSpectral > 1) then
+
+                    if (badGlobal(4, nn) == 1) then
+                  print spectralDevFormat, "# Spectral grid ", trim(intString), ", zone ", trim(cgnsDoms(i)%zoneName), &
+                            ", periodic subface ", trim(cgnsDoms(i)%conn1to1(j)%connectName), &
+                            " does not match donor ", trim(cgnsDoms(i)%conn1to1(j)%donorName), &
+                            ". Maximum deviation: ", badDistGlobal(nn)
+                    else
+                  print spectralDevFormat, "# Spectral grid ", trim(intString), ", zone ", trim(cgnsDoms(i)%zoneName), &
+                            ", subface ", trim(cgnsDoms(i)%conn1to1(j)%connectName), &
+                            " does not match donor ", trim(cgnsDoms(i)%conn1to1(j)%donorName), &
+                            ". Maximum deviation: ", badDistGlobal(nn)
+                    end if
+
+                else
+
+                    if (badGlobal(4, nn) == 1) then
+                        print devFormat, "Zone ", trim(cgnsDoms(i)%zoneName), &
+                            ", periodic subface ", trim(cgnsDoms(i)%conn1to1(j)%connectName), &
+                            " does not match donor ", trim(cgnsDoms(i)%conn1to1(j)%donorName), &
+                            ". Maximum deviation: ", badDistGlobal(nn)
+                    else
+                        print devFormat, "Zone ", trim(cgnsDoms(i)%zoneName), &
+                            ", subface ", trim(cgnsDoms(i)%conn1to1(j)%connectName), &
+                            " does not match donor ", trim(cgnsDoms(i)%conn1to1(j)%donorName), &
+                            ". Maximum deviation: ", badDistGlobal(nn)
+                    end if
+
+                end if
+            end do
 
-    ! Deallocate the memory of to store the bad subfaces.
+            print "(a)", "#"
 
-    deallocate(badSubfaces, badGlobal, badDist, badDistGlobal, &
-         stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("check1to1Subfaces", &
-         "Deallocation failure for badSubfaces, &
-         &badGlobal, badDist and badDistGlobal")
-
-    ! Synchronize the processors, because wild cards have been used.
-
-    call mpi_barrier(ADflow_comm_world, ierr)
-
-  end subroutine check1to1Subfaces
-
-  !      ==================================================================
-
-  subroutine periodicTransformSubface(coor, nn, rotCenter, &
-       rotAngles, translation)
-    !
-    !       periodicTransformSubface transforms the given set of
-    !       coordinates using the periodic transformation defined by
-    !       rotCenter, rotAngles and translation.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-
-    real(kind=realType), dimension(3), intent(in) :: rotCenter
-    real(kind=realType), dimension(3), intent(in) :: rotAngles
-    real(kind=realType), dimension(3), intent(in) :: translation
-
-    real(kind=realType), dimension(3,nn), intent(inout) :: coor
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i
-
-    real(kind=realType) :: cosTheta, cosPhi, cosPsi
-    real(kind=realType) :: sinTheta, sinPhi, sinPsi
-    real(kind=realType) :: dx, dy, dz
-
-    real(kind=realType), dimension(3)   :: trans
-    real(kind=realType), dimension(3,3) :: rotMatrix
-
-    ! Construct from the given rotation angles the rotation matrix
-    ! from the current coordinates to the donor coordinates.
-    ! Note that the sequence of rotation is first rotation around the
-    ! x-axis, followed by rotation around the y-axis and finally
-    ! rotation around the z-axis.
-
-    cosTheta = cos(rotAngles(1)); sinTheta = sin(rotAngles(1))
-    cosPhi   = cos(rotAngles(2)); sinPhi   = sin(rotAngles(2))
-    cosPsi   = cos(rotAngles(3)); sinPsi   = sin(rotAngles(3))
-
-    rotMatrix(1,1) =  cosPhi*cosPsi
-    rotMatrix(2,1) =  cosPhi*sinPsi
-    rotMatrix(3,1) = -sinPhi
-
-    rotMatrix(1,2) = sinTheta*sinPhi*cosPsi - cosTheta*sinPsi
-    rotMatrix(2,2) = sinTheta*sinPhi*sinPsi + cosTheta*cosPsi
-    rotMatrix(3,2) = sinTheta*cosPhi
-
-    rotMatrix(1,3) = cosTheta*sinPhi*cosPsi + sinTheta*sinPsi
-    rotMatrix(2,3) = cosTheta*sinPhi*sinPsi - sinTheta*cosPsi
-    rotMatrix(3,3) = cosTheta*cosPhi
-
-    ! Store the translation plus the rotation center in trans.
-
-    trans(1) = rotCenter(1) + translation(1)
-    trans(2) = rotCenter(2) + translation(2)
-    trans(3) = rotCenter(3) + translation(3)
-
-    ! Loop over the number of coordinates to be corrected.
-
-    do i=1,nn
-
-       ! Determine the relative position w.R.T. The rotation center.
-
-       dx = coor(1,i) - rotCenter(1)
-       dy = coor(2,i) - rotCenter(2)
-       dz = coor(3,i) - rotCenter(3)
-
-       ! Determine the coordinates after the transformation.
-
-       coor(1,i) = rotMatrix(1,1)*dx + rotMatrix(1,2)*dy &
-            + rotMatrix(1,3)*dz + trans(1)
-       coor(2,i) = rotMatrix(2,1)*dx + rotMatrix(2,2)*dy &
-            + rotMatrix(2,3)*dz + trans(2)
-       coor(3,i) = rotMatrix(3,1)*dx + rotMatrix(3,2)*dy &
-            + rotMatrix(3,3)*dz + trans(3)
+        end if
 
-    enddo
+        ! Deallocate the memory of to store the bad subfaces.
 
-  end subroutine periodicTransformSubface
+        deallocate (badSubfaces, badGlobal, badDist, badDistGlobal, &
+                    stat=ierr)
+        if (ierr /= 0) &
+             call terminate("check1to1Subfaces", &
+             "Deallocation failure for badSubfaces, &
+             &badGlobal, badDist and badDistGlobal")
+
+        ! Synchronize the processors, because wild cards have been used.
+
+        call mpi_barrier(ADflow_comm_world, ierr)
+
+    end subroutine check1to1Subfaces
+
+    !      ==================================================================
+
+    subroutine periodicTransformSubface(coor, nn, rotCenter, &
+                                        rotAngles, translation)
+        !
+        !       periodicTransformSubface transforms the given set of
+        !       coordinates using the periodic transformation defined by
+        !       rotCenter, rotAngles and translation.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+
+        real(kind=realType), dimension(3), intent(in) :: rotCenter
+        real(kind=realType), dimension(3), intent(in) :: rotAngles
+        real(kind=realType), dimension(3), intent(in) :: translation
+
+        real(kind=realType), dimension(3, nn), intent(inout) :: coor
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i
+
+        real(kind=realType) :: cosTheta, cosPhi, cosPsi
+        real(kind=realType) :: sinTheta, sinPhi, sinPsi
+        real(kind=realType) :: dx, dy, dz
+
+        real(kind=realType), dimension(3) :: trans
+        real(kind=realType), dimension(3, 3) :: rotMatrix
+
+        ! Construct from the given rotation angles the rotation matrix
+        ! from the current coordinates to the donor coordinates.
+        ! Note that the sequence of rotation is first rotation around the
+        ! x-axis, followed by rotation around the y-axis and finally
+        ! rotation around the z-axis.
+
+        cosTheta = cos(rotAngles(1)); sinTheta = sin(rotAngles(1))
+        cosPhi = cos(rotAngles(2)); sinPhi = sin(rotAngles(2))
+        cosPsi = cos(rotAngles(3)); sinPsi = sin(rotAngles(3))
+
+        rotMatrix(1, 1) = cosPhi * cosPsi
+        rotMatrix(2, 1) = cosPhi * sinPsi
+        rotMatrix(3, 1) = -sinPhi
+
+        rotMatrix(1, 2) = sinTheta * sinPhi * cosPsi - cosTheta * sinPsi
+        rotMatrix(2, 2) = sinTheta * sinPhi * sinPsi + cosTheta * cosPsi
+        rotMatrix(3, 2) = sinTheta * cosPhi
+
+        rotMatrix(1, 3) = cosTheta * sinPhi * cosPsi + sinTheta * sinPsi
+        rotMatrix(2, 3) = cosTheta * sinPhi * sinPsi - sinTheta * cosPsi
+        rotMatrix(3, 3) = cosTheta * cosPhi
+
+        ! Store the translation plus the rotation center in trans.
+
+        trans(1) = rotCenter(1) + translation(1)
+        trans(2) = rotCenter(2) + translation(2)
+        trans(3) = rotCenter(3) + translation(3)
+
+        ! Loop over the number of coordinates to be corrected.
+
+        do i = 1, nn
+
+            ! Determine the relative position w.R.T. The rotation center.
+
+            dx = coor(1, i) - rotCenter(1)
+            dy = coor(2, i) - rotCenter(2)
+            dz = coor(3, i) - rotCenter(3)
+
+            ! Determine the coordinates after the transformation.
+
+            coor(1, i) = rotMatrix(1, 1) * dx + rotMatrix(1, 2) * dy &
+                         + rotMatrix(1, 3) * dz + trans(1)
+            coor(2, i) = rotMatrix(2, 1) * dx + rotMatrix(2, 2) * dy &
+                         + rotMatrix(2, 3) * dz + trans(2)
+            coor(3, i) = rotMatrix(3, 1) * dx + rotMatrix(3, 2) * dy &
+                         + rotMatrix(3, 3) * dz + trans(3)
 
-  !      ==================================================================
-
-  subroutine checkSubfaceCoor(subfaceInfo, coor, nFace,   &
-       nBad, badSubfaces, badDist, &
-       nSpectral)
-    !
-    !       checkSubfaceCoor checks if the coordinates of the subfaces
-    !       defined in subfaceInfo and coor match the coordinates stored
-    !       in flowDoms.
-    !
-    use constants
-    use blockPointers
-    use cgnsGrid
-    use communication
-    use utils, only : setPointers, terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)    :: nFace, nSpectral
-    integer(kind=intType), intent(inout) :: nBad
-
-    integer(kind=intType), dimension(10,*), intent(in) :: subfaceInfo
-    integer(kind=intType), dimension( 4,*), intent(inout) :: badSubfaces
-
-    real(kind=realType), dimension(*),   intent(inout) :: badDist
-    real(kind=realType), dimension(3,*), intent(in)    :: coor
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: relTol = 0.05_realType
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii, jj, mm, nn
-    integer(kind=intType) :: stepI, stepJ, stepK
-    integer(kind=intType) :: im1, ip1, jm1, jp1, km1, kp1
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-    integer(kind=intType) :: blockID, sps, globalDonID, subfaceID
+        end do
 
-    real(kind=realType) :: dist2, dist2I, dist2J, dist2K, tol
-    real(kind=realType) :: factI, factJ, factK, diffMax
+    end subroutine periodicTransformSubface
 
-    character(len=maxStringLen) :: errorMessage
-    character(len=7)            :: intString
+    !      ==================================================================
+
+    subroutine checkSubfaceCoor(subfaceInfo, coor, nFace, &
+                                nBad, badSubfaces, badDist, &
+                                nSpectral)
+        !
+        !       checkSubfaceCoor checks if the coordinates of the subfaces
+        !       defined in subfaceInfo and coor match the coordinates stored
+        !       in flowDoms.
+        !
+        use constants
+        use blockPointers
+        use cgnsGrid
+        use communication
+        use utils, only: setPointers, terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nFace, nSpectral
+        integer(kind=intType), intent(inout) :: nBad
+
+        integer(kind=intType), dimension(10, *), intent(in) :: subfaceInfo
+        integer(kind=intType), dimension(4, *), intent(inout) :: badSubfaces
+
+        real(kind=realType), dimension(*), intent(inout) :: badDist
+        real(kind=realType), dimension(3, *), intent(in) :: coor
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: relTol = 0.05_realType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii, jj, mm, nn
+        integer(kind=intType) :: stepI, stepJ, stepK
+        integer(kind=intType) :: im1, ip1, jm1, jp1, km1, kp1
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+        integer(kind=intType) :: blockID, sps, globalDonID, subfaceID
 
-    logical :: badFace
+        real(kind=realType) :: dist2, dist2I, dist2J, dist2K, tol
+        real(kind=realType) :: factI, factJ, factK, diffMax
 
-    ! Initialize the counter ii for the coordinates and loop over the
-    ! number of subfaces to be checked.
+        character(len=maxStringLen) :: errorMessage
+        character(len=7) :: intString
 
-    ii = 0
-    subfaceLoop: do nn=1,nFace
+        logical :: badFace
 
-       ! Store the integer info a bit easier.
+        ! Initialize the counter ii for the coordinates and loop over the
+        ! number of subfaces to be checked.
 
-       iBeg = subfaceInfo(1,nn)
-       iEnd = subfaceInfo(2,nn)
-       jBeg = subfaceInfo(3,nn)
-       jEnd = subfaceInfo(4,nn)
-       kBeg = subfaceInfo(5,nn)
-       kEnd = subfaceInfo(6,nn)
+        ii = 0
+        subfaceLoop: do nn = 1, nFace
 
-       blockID     = subfaceInfo( 7,nn)
-       sps         = subfaceInfo( 8,nn)
-       globalDonID = subfaceInfo( 9,nn)
-       subfaceID   = subfaceInfo(10,nn)
+            ! Store the integer info a bit easier.
 
-       ! Set the pointers to the block to be searched.
+            iBeg = subfaceInfo(1, nn)
+            iEnd = subfaceInfo(2, nn)
+            jBeg = subfaceInfo(3, nn)
+            jEnd = subfaceInfo(4, nn)
+            kBeg = subfaceInfo(5, nn)
+            kEnd = subfaceInfo(6, nn)
 
-       call setPointers(blockID, 1_intType, sps)
+            blockID = subfaceInfo(7, nn)
+            sps = subfaceInfo(8, nn)
+            globalDonID = subfaceInfo(9, nn)
+            subfaceID = subfaceInfo(10, nn)
 
-       ! Find the matching 1 to 1 subface.
+            ! Set the pointers to the block to be searched.
 
-       do mm=1,n1to1
+            call setPointers(blockID, 1_intType, sps)
 
-          ! Add the offset of nBocos to nn such that it contains the
-          ! correct index of the arrays.
+            ! Find the matching 1 to 1 subface.
 
-          jj = mm + nBocos
+            do mm = 1, n1to1
 
-          ! If this is the subface exit the loop.
+                ! Add the offset of nBocos to nn such that it contains the
+                ! correct index of the arrays.
 
-          if(min(iBeg,iEnd) == min(inBeg(jj),inEnd(jj)) .and. &
-               max(iBeg,iEnd) == max(inBeg(jj),inEnd(jj)) .and. &
-               min(jBeg,jEnd) == min(jnBeg(jj),jnEnd(jj)) .and. &
-               max(jBeg,jEnd) == max(jnBeg(jj),jnEnd(jj)) .and. &
-               min(kBeg,kEnd) == min(knBeg(jj),knEnd(jj)) .and. &
-               max(kBeg,kEnd) == max(knBeg(jj),knEnd(jj))) exit
+                jj = mm + nBocos
 
-       enddo
+                ! If this is the subface exit the loop.
 
-       ! Check if a subface was found. If not terminate.
+                if (min(iBeg, iEnd) == min(inBeg(jj), inEnd(jj)) .and. &
+                    max(iBeg, iEnd) == max(inBeg(jj), inEnd(jj)) .and. &
+                    min(jBeg, jEnd) == min(jnBeg(jj), jnEnd(jj)) .and. &
+                    max(jBeg, jEnd) == max(jnBeg(jj), jnEnd(jj)) .and. &
+                    min(kBeg, kEnd) == min(knBeg(jj), knEnd(jj)) .and. &
+                    max(kBeg, kEnd) == max(knBeg(jj), knEnd(jj))) exit
 
-       if(mm > n1to1) then
+            end do
 
-          if(nSpectral > 1) then
-             write(intString,"(i6)") sps
-             intString = adjustl(intString)
-             write(errorMessage, strings) "Spectral grid ", trim(intString), &
-               ", Zone ", trim(cgnsDoms(globalDonID)%zoneName), &
-               "Connectivity ", trim(cgnsDoms(globalDonID)%conn1to1(subfaceID)%connectName), &
-               ": 1 to 1 subface not found. Something is seriously wrong with the zone connectivity."
+            ! Check if a subface was found. If not terminate.
 
-          else
-             write(errorMessage, strings) "Zone ", trim(cgnsDoms(globalDonID)%zoneName), &
-               "Connectivity", trim(cgnsDoms(globalDonID)%conn1to1(subfaceID)%connectName), &
-               ": 1 to 1 subface not found. Something is seriously wrong with the zone connectivity."
-          endif
+            if (mm > n1to1) then
 
-          call terminate("checkSubfaceCoor", errorMessage)
+                if (nSpectral > 1) then
+                    write (intString, "(i6)") sps
+                    intString = adjustl(intString)
+                    write (errorMessage, strings) "Spectral grid ", trim(intString), &
+                        ", Zone ", trim(cgnsDoms(globalDonID)%zoneName), &
+                        "Connectivity ", trim(cgnsDoms(globalDonID)%conn1to1(subfaceID)%connectName), &
+                        ": 1 to 1 subface not found. Something is seriously wrong with the zone connectivity."
 
-       endif
+                else
+                    write (errorMessage, strings) "Zone ", trim(cgnsDoms(globalDonID)%zoneName), &
+                        "Connectivity", trim(cgnsDoms(globalDonID)%conn1to1(subfaceID)%connectName), &
+                        ": 1 to 1 subface not found. Something is seriously wrong with the zone connectivity."
+                end if
 
-       ! Determine whether positive or negative running indices must
-       ! be used for the subface. This depends on the sequence stored
-       ! in the coordinate buffer and not of the sequence of the
-       ! 1 to 1 subface mm.
+                call terminate("checkSubfaceCoor", errorMessage)
 
-       stepI = 1; if(iEnd < iBeg) stepI = -1
-       stepJ = 1; if(jEnd < jBeg) stepJ = -1
-       stepK = 1; if(kEnd < kBeg) stepK = -1
+            end if
 
-       ! Initialize badFace to .false. to indicate that it is
-       ! a correct subface. Set the maximum difference to zero.
+            ! Determine whether positive or negative running indices must
+            ! be used for the subface. This depends on the sequence stored
+            ! in the coordinate buffer and not of the sequence of the
+            ! 1 to 1 subface mm.
 
-       badFace = .false.
-       diffMax = zero
+            stepI = 1; if (iEnd < iBeg) stepI = -1
+            stepJ = 1; if (jEnd < jBeg) stepJ = -1
+            stepK = 1; if (kEnd < kBeg) stepK = -1
 
-       ! Loop over the coordinates of the subface to see if they
-       ! match of to a certain tolerance.
+            ! Initialize badFace to .false. to indicate that it is
+            ! a correct subface. Set the maximum difference to zero.
 
-       do k=kBeg,kEnd,stepK
+            badFace = .false.
+            diffMax = zero
 
-          ! Determine the indices to the left and to the right.
-          ! Do not exceed the block boundary. Determine the
-          ! scaling factor of the tolerance accordingly.
+            ! Loop over the coordinates of the subface to see if they
+            ! match of to a certain tolerance.
 
-          km1 = max(1_intType,k-1_intType)
-          kp1 = min(kl,       k+1_intType)
+            do k = kBeg, kEnd, stepK
 
-          factK = one
-          if(km1 == k) factK = factK*four
-          if(kp1 == k) factK = factK*four
+                ! Determine the indices to the left and to the right.
+                ! Do not exceed the block boundary. Determine the
+                ! scaling factor of the tolerance accordingly.
 
-          ! Loop in j-direction.
+                km1 = max(1_intType, k - 1_intType)
+                kp1 = min(kl, k + 1_intType)
 
-          do j=jBeg,jEnd,stepJ
+                factK = one
+                if (km1 == k) factK = factK * four
+                if (kp1 == k) factK = factK * four
 
-             ! Determine the neighbors to the left and right and factJ.
+                ! Loop in j-direction.
 
-             jm1 = max(1_intType,j-1_intType)
-             jp1 = min(jl,       j+1_intType)
+                do j = jBeg, jEnd, stepJ
 
-             factJ = one
-             if(jm1 == j) factJ = factJ*four
-             if(jp1 == j) factJ = factJ*four
+                    ! Determine the neighbors to the left and right and factJ.
 
-             ! Loop in i-direction.
+                    jm1 = max(1_intType, j - 1_intType)
+                    jp1 = min(jl, j + 1_intType)
 
-             do i=iBeg,iEnd,stepI
+                    factJ = one
+                    if (jm1 == j) factJ = factJ * four
+                    if (jp1 == j) factJ = factJ * four
 
-                ! Determine the neighbors to the left and right and factI.
+                    ! Loop in i-direction.
 
-                im1 = max(1_intType,i-1_intType)
-                ip1 = min(il,       i+1_intType)
+                    do i = iBeg, iEnd, stepI
 
-                factI = one
-                if(im1 == i) factI = factI*four
-                if(ip1 == i) factI = factI*four
+                        ! Determine the neighbors to the left and right and factI.
 
-                ! Determine the distances squared in i, j and k direction.
-                ! The reason for squared is that some square roots are
-                ! avoided this way.
+                        im1 = max(1_intType, i - 1_intType)
+                        ip1 = min(il, i + 1_intType)
 
-                dist2I = (x(ip1,j,k,1) - x(im1,j,k,1))**2 &
-                     + (x(ip1,j,k,2) - x(im1,j,k,2))**2 &
-                     + (x(ip1,j,k,3) - x(im1,j,k,3))**2
+                        factI = one
+                        if (im1 == i) factI = factI * four
+                        if (ip1 == i) factI = factI * four
 
-                dist2J = (x(i,jp1,k,1) - x(i,jm1,k,1))**2 &
-                     + (x(i,jp1,k,2) - x(i,jm1,k,2))**2 &
-                     + (x(i,jp1,k,3) - x(i,jm1,k,3))**2
+                        ! Determine the distances squared in i, j and k direction.
+                        ! The reason for squared is that some square roots are
+                        ! avoided this way.
 
-                dist2K = (x(i,j,kp1,1) - x(i,j,km1,1))**2 &
-                     + (x(i,j,kp1,2) - x(i,j,km1,2))**2 &
-                     + (x(i,j,kp1,3) - x(i,j,km1,3))**2
+                        dist2I = (x(ip1, j, k, 1) - x(im1, j, k, 1))**2 &
+                                 + (x(ip1, j, k, 2) - x(im1, j, k, 2))**2 &
+                                 + (x(ip1, j, k, 3) - x(im1, j, k, 3))**2
 
-                ! Make sure that singular lines are excluded.
+                        dist2J = (x(i, jp1, k, 1) - x(i, jm1, k, 1))**2 &
+                                 + (x(i, jp1, k, 2) - x(i, jm1, k, 2))**2 &
+                                 + (x(i, jp1, k, 3) - x(i, jm1, k, 3))**2
 
-                if(dist2I < eps) dist2I = large
-                if(dist2J < eps) dist2J = large
-                if(dist2K < eps) dist2K = large
+                        dist2K = (x(i, j, kp1, 1) - x(i, j, km1, 1))**2 &
+                                 + (x(i, j, kp1, 2) - x(i, j, km1, 2))**2 &
+                                 + (x(i, j, kp1, 3) - x(i, j, km1, 3))**2
 
-                ! Determine the tolerance for identical points.
-                ! Note the multiplication with the square of the relative
-                ! tolerance, because the distances are squared as well.
+                        ! Make sure that singular lines are excluded.
 
-                tol = factK*dist2K
-                tol = min(tol, factJ*dist2J)
-                tol = min(tol, factI*dist2I)
+                        if (dist2I < eps) dist2I = large
+                        if (dist2J < eps) dist2J = large
+                        if (dist2K < eps) dist2K = large
 
-                tol = tol*relTol*relTol
+                        ! Determine the tolerance for identical points.
+                        ! Note the multiplication with the square of the relative
+                        ! tolerance, because the distances are squared as well.
 
-                ! Update the counter for the coordinate in the buffer
-                ! and determine the distance squared between the points
-                ! that should be identical.
+                        tol = factK * dist2K
+                        tol = min(tol, factJ * dist2J)
+                        tol = min(tol, factI * dist2I)
 
-                ii = ii + 1
-                dist2 = (x(i,j,k,1) - coor(1,ii))**2 &
-                     + (x(i,j,k,2) - coor(2,ii))**2 &
-                     + (x(i,j,k,3) - coor(3,ii))**2
+                        tol = tol * relTol * relTol
+
+                        ! Update the counter for the coordinate in the buffer
+                        ! and determine the distance squared between the points
+                        ! that should be identical.
+
+                        ii = ii + 1
+                        dist2 = (x(i, j, k, 1) - coor(1, ii))**2 &
+                                + (x(i, j, k, 2) - coor(2, ii))**2 &
+                                + (x(i, j, k, 3) - coor(3, ii))**2
 
-                ! Flag the subface to bad if the nodes do not coincide
-                ! within the given tolerance. Store the difference if
-                ! it is larger than the currently stored value.
+                        ! Flag the subface to bad if the nodes do not coincide
+                        ! within the given tolerance. Store the difference if
+                        ! it is larger than the currently stored value.
 
-                if(dist2 > tol) then
-                   badFace = .true.
-                   diffMax = max(diffMax, dist2)
-                endif
+                        if (dist2 > tol) then
+                            badFace = .true.
+                            diffMax = max(diffMax, dist2)
+                        end if
 
-             enddo
-          enddo
-       enddo
+                    end do
+                end do
+            end do
 
-       ! Store this subface if it is not matching. The following data
-       ! is stored: global block number of the donor, the subface ID
-       ! on this block, the spectral solution, whether or not this
-       ! is a periodic face and the maximum difference.
+            ! Store this subface if it is not matching. The following data
+            ! is stored: global block number of the donor, the subface ID
+            ! on this block, the spectral solution, whether or not this
+            ! is a periodic face and the maximum difference.
 
-       if( badFace ) then
-          nBad = nBad + 1
+            if (badFace) then
+                nBad = nBad + 1
 
-          badSubfaces(1,nBad) = globalDonID
-          badSubfaces(2,nBad) = subfaceID
-          badSubfaces(3,nBad) = sps
+                badSubfaces(1, nBad) = globalDonID
+                badSubfaces(2, nBad) = subfaceID
+                badSubfaces(3, nBad) = sps
 
-          badSubfaces(4,nBad) = 0
-          if(subfaceID > 0) then
-             if( cgnsDoms(globalDonID)%conn1to1(subfaceID)%periodic ) &
-                  badSubfaces(4,nBad) = 1
-          endif
+                badSubfaces(4, nBad) = 0
+                if (subfaceID > 0) then
+                    if (cgnsDoms(globalDonID)%conn1to1(subfaceID)%periodic) &
+                        badSubfaces(4, nBad) = 1
+                end if
 
-          badDist(nBad) = sqrt(diffMax)
+                badDist(nBad) = sqrt(diffMax)
 
-          !  if(subfaceID == 0 .and. myID == 1) then
-          !    write(*,*) "myID: ", myID, badDist(nBad)
-          !    write(*,*) blockID, mm, globalDonID, nbkGlobal
-          !    write(*,"(6I4)") iBegor, iEndor, jBegor, jEndor, kBegor, kEndor
+                !  if(subfaceID == 0 .and. myID == 1) then
+                !    write(*,*) "myID: ", myID, badDist(nBad)
+                !    write(*,*) blockID, mm, globalDonID, nbkGlobal
+                !    write(*,"(6I4)") iBegor, iEndor, jBegor, jEndor, kBegor, kEndor
 
-          !    jj = mm + nBocos
-          !    write(*,"(6I4)") inBeg(jj),inEnd(jj), jnBeg(jj),jnEnd(jj), knBeg(jj),knEnd(jj)
-          !  endif
-       endif
+                !    jj = mm + nBocos
+                !    write(*,"(6I4)") inBeg(jj),inEnd(jj), jnBeg(jj),jnEnd(jj), knBeg(jj),knEnd(jj)
+                !  endif
+            end if
 
-    end do subfaceLoop
+        end do subfaceLoop
 
-  end subroutine checkSubfaceCoor
+    end subroutine checkSubfaceCoor
 end module gridChecking
diff --git a/src/partitioning/loadBalance.F90 b/src/partitioning/loadBalance.F90
index 2c7405ac9..72e39cb48 100644
--- a/src/partitioning/loadBalance.F90
+++ b/src/partitioning/loadBalance.F90
@@ -2,3251 +2,3247 @@ module loadBalance
 
 contains
 
-  subroutine loadBalanceGrid
-    !
-    !       loadBalance determines the mapping of the blocks onto the
-    !       processors. If the user allows so blocks my be split to obtain
-    !       a better load balance.
-    !
-    use constants
-    use block, only : flowDoms, nDom
-    use cgnsGrid, only : cgnsDoms, cgnsNDom
-    use communication, only : adflow_comm_world, nProc, myID
-    use inputMotion, only : gridMotionSpecified
-    use inputParallel, only : partitionLikeNProc
-    use inputPhysics, only : equationMode
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use iteration, only : deforming_Grid
-    use partitionMod, only : subBlocksofCGNSType, blocks, part, nBlocks, &
-         sortRangesSplitInfo, qsortSubblocksOfCGNSType
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+    subroutine loadBalanceGrid
+        !
+        !       loadBalance determines the mapping of the blocks onto the
+        !       processors. If the user allows so blocks my be split to obtain
+        !       a better load balance.
+        !
+        use constants
+        use block, only: flowDoms, nDom
+        use cgnsGrid, only: cgnsDoms, cgnsNDom
+        use communication, only: adflow_comm_world, nProc, myID
+        use inputMotion, only: gridMotionSpecified
+        use inputParallel, only: partitionLikeNProc
+        use inputPhysics, only: equationMode
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: deforming_Grid
+        use partitionMod, only: subBlocksofCGNSType, blocks, part, nBlocks, &
+                                sortRangesSplitInfo, qsortSubblocksOfCGNSType
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-    integer(kind=intType) :: i, j, k, nn, mm, ii, jj, kk
-    integer(kind=intType) :: nViscBocos
+        integer(kind=intType) :: i, j, k, nn, mm, ii, jj, kk
+        integer(kind=intType) :: nViscBocos
 
-    integer(kind=intType), dimension(0:nProc-1) :: nBlockPerProc
+        integer(kind=intType), dimension(0:nProc - 1) :: nBlockPerProc
 
-    integer(kind=intType), dimension(:), allocatable :: oldSubfaceID
+        integer(kind=intType), dimension(:), allocatable :: oldSubfaceID
 
-    type(subblocksOfCGNSType), dimension(:), allocatable :: &
-         subblocksOfCGNS
+        type(subblocksOfCGNSType), dimension(:), allocatable :: &
+            subblocksOfCGNS
 
+        ! Determine the block distribution over the processors.
 
-    ! Determine the block distribution over the processors.
+        if (partitionLikeNProc > nProc) then
+            nProc = partitionLikenProc
+        end if
 
+        call blockDistribution
 
-    if (partitionLikeNProc > nProc) then
-       nProc = partitionLikenProc
-    end if
+        ! Restore the size of the comm
+        call mpi_Comm_Size(adflow_comm_world, nProc, ierr)
 
-    call blockDistribution
+        ! We ned to modify what comes back if we are using the
+        ! partitionLikenProc option:
+        if (partitionLikenProc > nProc) then
+            do i = 1, nBlocks
+                part(i) = mod(part(i), nProc)
+            end do
+        end if
 
-    ! Restore the size of the comm
-    call mpi_Comm_Size(adflow_comm_world, nProc, ierr)
+        !
+        !       Determine the local block info.
+        !
+        ! Initialize nBlockPerProc to 0.
 
-    ! We ned to modify what comes back if we are using the
-    ! partitionLikenProc option:
-    if (partitionLikenProc > nProc) then
-       do i=1, nBlocks
-          part(i) = mod(part(i), nProc)
-       end do
-    end if
+        nBlockPerProc = 0
 
-    !
-    !       Determine the local block info.
-    !
-    ! Initialize nBlockPerProc to 0.
+        ! Determine the number of blocks the current processor will store
+        ! and the local block number for every block.
 
-    nBlockPerProc = 0
+        nDom = 0
+        do i = 1, nBlocks
+            if (part(i) == myID) nDom = nDom + 1
 
-    ! Determine the number of blocks the current processor will store
-    ! and the local block number for every block.
+            nBlockPerProc(part(i)) = nBlockPerProc(part(i)) + 1
+            blocks(i)%blockID = nBlockPerProc(part(i))
+        end do
 
-    nDom = 0
-    do i=1,nBlocks
-       if(part(i) == myID) nDom = nDom +1
+        ! Allocate the memory for flowDoms and initialize its pointers
+        ! to null pointers.
+
+        call initFlowDoms
+
+        ! Repeat the loop, but now store the info of the blocks
+        ! in flowDoms. Store the number of time intervals for the spectral
+        ! method a bit easier in mm. Note that this number is 1 for the
+        ! steady and unsteady modes.
+
+        nn = 0
+        mm = nTimeIntervalsSpectral
+        domains: do i = 1, nBlocks
+            myBlock: if (part(i) == myID) then
+
+                ! Update the counter nn.
+
+                nn = nn + 1
+
+                ! Copy the dimensions of the block.
+
+                flowDoms(nn, 1, 1:mm)%nx = blocks(i)%nx
+                flowDoms(nn, 1, 1:mm)%ny = blocks(i)%ny
+                flowDoms(nn, 1, 1:mm)%nz = blocks(i)%nz
+
+                flowDoms(nn, 1, 1:mm)%il = blocks(i)%il
+                flowDoms(nn, 1, 1:mm)%jl = blocks(i)%jl
+                flowDoms(nn, 1, 1:mm)%kl = blocks(i)%kl
+
+                ! The number of single halo quantities.
+
+                flowDoms(nn, 1, 1:mm)%ie = blocks(i)%il + 1
+                flowDoms(nn, 1, 1:mm)%je = blocks(i)%jl + 1
+                flowDoms(nn, 1, 1:mm)%ke = blocks(i)%kl + 1
+
+                ! The number of double halo quantities.
+
+                flowDoms(nn, 1, 1:mm)%ib = blocks(i)%il + 2
+                flowDoms(nn, 1, 1:mm)%jb = blocks(i)%jl + 2
+                flowDoms(nn, 1, 1:mm)%kb = blocks(i)%kl + 2
 
-       nBlockPerProc(part(i)) = nBlockPerProc(part(i)) + 1
-       blocks(i)%blockID = nBlockPerProc(part(i))
-    enddo
+                ! Relation to the original cgns grid.
 
-    ! Allocate the memory for flowDoms and initialize its pointers
-    ! to null pointers.
+                flowDoms(nn, 1, 1:mm)%cgnsBlockID = blocks(i)%cgnsBlockID
 
-    call initFlowDoms
+                flowDoms(nn, 1, 1:mm)%iBegOr = blocks(i)%iBegOr
+                flowDoms(nn, 1, 1:mm)%jBegOr = blocks(i)%jBegOr
+                flowDoms(nn, 1, 1:mm)%kBegOr = blocks(i)%kBegOr
 
-    ! Repeat the loop, but now store the info of the blocks
-    ! in flowDoms. Store the number of time intervals for the spectral
-    ! method a bit easier in mm. Note that this number is 1 for the
-    ! steady and unsteady modes.
-
-    nn = 0
-    mm = nTimeIntervalsSpectral
-    domains: do i=1,nBlocks
-       myBlock: if(part(i) == myID) then
-
-          ! Update the counter nn.
-
-          nn = nn + 1
-
-          ! Copy the dimensions of the block.
-
-          flowDoms(nn,1,1:mm)%nx = blocks(i)%nx
-          flowDoms(nn,1,1:mm)%ny = blocks(i)%ny
-          flowDoms(nn,1,1:mm)%nz = blocks(i)%nz
-
-          flowDoms(nn,1,1:mm)%il = blocks(i)%il
-          flowDoms(nn,1,1:mm)%jl = blocks(i)%jl
-          flowDoms(nn,1,1:mm)%kl = blocks(i)%kl
-
-          ! The number of single halo quantities.
-
-          flowDoms(nn,1,1:mm)%ie = blocks(i)%il + 1
-          flowDoms(nn,1,1:mm)%je = blocks(i)%jl + 1
-          flowDoms(nn,1,1:mm)%ke = blocks(i)%kl + 1
+                flowDoms(nn, 1, 1:mm)%iEndOr = blocks(i)%iEndOr
+                flowDoms(nn, 1, 1:mm)%jEndOr = blocks(i)%jEndOr
+                flowDoms(nn, 1, 1:mm)%kEndOr = blocks(i)%kEndOr
 
-          ! The number of double halo quantities.
+                ! Determine whether or not the block is moving.
+                ! First initialize it to gridMotionSpecified. This is
+                ! .true. if a rigid body motion was specified for the
+                ! entire grid; otherwise it is .false.
 
-          flowDoms(nn,1,1:mm)%ib = blocks(i)%il + 2
-          flowDoms(nn,1,1:mm)%jb = blocks(i)%jl + 2
-          flowDoms(nn,1,1:mm)%kb = blocks(i)%kl + 2
+                flowDoms(nn, 1, 1:mm)%blockIsMoving = gridMotionSpecified
 
-          ! Relation to the original cgns grid.
+                ! Check whether the corresponding cgns block is moving.
+                ! Although it is possible that boundaries of a block rotate
+                ! differently than the block itself, this should not be
+                ! taken into account here; that's a matter of BC's.
+                ! Here only the internal block structure is looked at.
 
-          flowDoms(nn,1,1:mm)%cgnsBlockID = blocks(i)%cgnsBlockID
+                k = flowDoms(nn, 1, 1)%cgnsBlockID
+                if (cgnsDoms(k)%rotatingFrameSpecified) &
+                    flowDoms(nn, 1, 1:mm)%blockIsMoving = .true.
 
-          flowDoms(nn,1,1:mm)%iBegOr = blocks(i)%iBegOr
-          flowDoms(nn,1,1:mm)%jBegOr = blocks(i)%jBegOr
-          flowDoms(nn,1,1:mm)%kBegOr = blocks(i)%kBegOr
+                ! For an unsteady computation on a deforming mesh
+                ! blockIsMoving is always .true. Note that the time spectral
+                ! method is also an unsteady computation.
 
-          flowDoms(nn,1,1:mm)%iEndOr = blocks(i)%iEndOr
-          flowDoms(nn,1,1:mm)%jEndOr = blocks(i)%jEndOr
-          flowDoms(nn,1,1:mm)%kEndOr = blocks(i)%kEndOr
+                if (deforming_Grid .and. &
+                    (equationMode == unsteady .or. &
+                     equationMode == timeSpectral)) &
+                    flowDoms(nn, 1, 1:mm)%blockIsMoving = .true.
 
-          ! Determine whether or not the block is moving.
-          ! First initialize it to gridMotionSpecified. This is
-          ! .true. if a rigid body motion was specified for the
-          ! entire grid; otherwise it is .false.
+                ! Set addGridVelocities to blockIsMoving. This could be
+                ! overwritten later when the code is running in python mode.
 
-          flowDoms(nn,1,1:mm)%blockIsMoving = gridMotionSpecified
+                flowDoms(nn, 1, 1:mm)%addGridVelocities = &
+                    flowDoms(nn, 1, 1:mm)%blockIsMoving
 
-          ! Check whether the corresponding cgns block is moving.
-          ! Although it is possible that boundaries of a block rotate
-          ! differently than the block itself, this should not be
-          ! taken into account here; that's a matter of BC's.
-          ! Here only the internal block structure is looked at.
+                ! Set the number of subfaces and allocate the memory for the
+                ! subface info. Note that this memory is only allocated for
+                ! the first spectral time value; the other ones are identical.
 
-          k = flowDoms(nn,1,1)%cgnsBlockID
-          if( cgnsDoms(k)%rotatingFrameSpecified ) &
-               flowDoms(nn,1,1:mm)%blockIsMoving = .true.
+                flowDoms(nn, 1, 1:mm)%nBocos = blocks(i)%nBocos
+                flowDoms(nn, 1, 1:mm)%n1to1 = blocks(i)%n1to1
+                flowDoms(nn, 1, 1:mm)%nSubface = blocks(i)%nSubface
+                j = blocks(i)%nSubface
 
-          ! For an unsteady computation on a deforming mesh
-          ! blockIsMoving is always .true. Note that the time spectral
-          ! method is also an unsteady computation.
+                allocate (flowDoms(nn, 1, 1)%BCType(j), &
+                          flowDoms(nn, 1, 1)%BCFaceID(j), &
+                          flowDoms(nn, 1, 1)%cgnsSubface(j), &
+                          flowDoms(nn, 1, 1)%inBeg(j), &
+                          flowDoms(nn, 1, 1)%jnBeg(j), &
+                          flowDoms(nn, 1, 1)%knBeg(j), &
+                          flowDoms(nn, 1, 1)%inEnd(j), &
+                          flowDoms(nn, 1, 1)%jnEnd(j), &
+                          flowDoms(nn, 1, 1)%knEnd(j), &
+                          flowDoms(nn, 1, 1)%dinBeg(j), &
+                          flowDoms(nn, 1, 1)%djnBeg(j), &
+                          flowDoms(nn, 1, 1)%dknBeg(j), &
+                          flowDoms(nn, 1, 1)%dinEnd(j), &
+                          flowDoms(nn, 1, 1)%djnEnd(j), &
+                          flowDoms(nn, 1, 1)%dknEnd(j), &
+                          flowDoms(nn, 1, 1)%neighProc(j), &
+                          flowDoms(nn, 1, 1)%neighBlock(j), &
+                          flowDoms(nn, 1, 1)%l1(j), &
+                          flowDoms(nn, 1, 1)%l2(j), &
+                          flowDoms(nn, 1, 1)%l3(j), &
+                          flowDoms(nn, 1, 1)%groupNum(j), &
+                          stat=ierr)
 
-          if(deforming_Grid .and.            &
-               (equationMode == unsteady .or. &
-               equationMode == timeSpectral)) &
-               flowDoms(nn,1,1:mm)%blockIsMoving = .true.
+                if (ierr /= 0) &
+                    call terminate("loadBalance", &
+                                   "Memory allocation failure for subface info")
+
+                ! Determine the new numbering of the boundary subfaces, such
+                ! that the viscous subfaces are numbered first, followed by
+                ! the inViscid subfaces, etc.
+
+                allocate (oldSubfaceID(blocks(i)%nBocos), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("loadBalance", &
+                                   "Memory allocation failure for oldSubfaceID")
+
+                call sortSubfaces(oldSubfaceID, blocks(i))
+
+                ! Initialize the number of viscous boundary subfaces to 0.
+
+                nViscBocos = 0
+
+                ! Copy the info. Set the neighboring proc and block id to -1
+                ! and 0 repectively in this loop. This is okay for boundary
+                ! faces, but must be corrected for the internal block
+                ! boundaries.
+
+                do j = 1, blocks(i)%nSubface
+
+                    ! Store the old subface id in k. For boundary faces the
+                    ! sorting is taken into account; for 1 to 1 subfaces the
+                    ! number is identical to the subface id in block.
+
+                    k = j
+                    if (j <= blocks(i)%nBocos) k = oldSubfaceID(j)
+
+                    ! Copy the info.
+
+                    flowDoms(nn, 1, 1)%BCType(j) = blocks(i)%BCType(k)
+                    flowDoms(nn, 1, 1)%BCFaceID(j) = blocks(i)%BCFaceID(k)
+                    flowDoms(nn, 1, 1)%cgnsSubface(j) = blocks(i)%cgnsSubface(k)
+
+                    flowDoms(nn, 1, 1)%inBeg(j) = blocks(i)%inBeg(k)
+                    flowDoms(nn, 1, 1)%jnBeg(j) = blocks(i)%jnBeg(k)
+                    flowDoms(nn, 1, 1)%knBeg(j) = blocks(i)%knBeg(k)
+                    flowDoms(nn, 1, 1)%inEnd(j) = blocks(i)%inEnd(k)
+                    flowDoms(nn, 1, 1)%jnEnd(j) = blocks(i)%jnEnd(k)
+                    flowDoms(nn, 1, 1)%knEnd(j) = blocks(i)%knEnd(k)
+
+                    flowDoms(nn, 1, 1)%dinBeg(j) = blocks(i)%dinBeg(k)
+                    flowDoms(nn, 1, 1)%djnBeg(j) = blocks(i)%djnBeg(k)
+                    flowDoms(nn, 1, 1)%dknBeg(j) = blocks(i)%dknBeg(k)
+                    flowDoms(nn, 1, 1)%dinEnd(j) = blocks(i)%dinEnd(k)
+                    flowDoms(nn, 1, 1)%djnEnd(j) = blocks(i)%djnEnd(k)
+                    flowDoms(nn, 1, 1)%dknEnd(j) = blocks(i)%dknEnd(k)
+
+                    flowDoms(nn, 1, 1)%neighProc(j) = -1
+                    flowDoms(nn, 1, 1)%neighBlock(j) = 0
+
+                    flowDoms(nn, 1, 1)%l1(j) = blocks(i)%l1(k)
+                    flowDoms(nn, 1, 1)%l2(j) = blocks(i)%l2(k)
+                    flowDoms(nn, 1, 1)%l3(j) = blocks(i)%l3(k)
+
+                    flowDoms(nn, 1, 1)%groupNum(j) = blocks(i)%groupNum(k)
+
+                    ! Update the number of viscous boundaries if this
+                    ! is a viscous subface.
+
+                    if (flowDoms(nn, 1, 1)%BCType(j) == NSWallAdiabatic .or. &
+                        flowDoms(nn, 1, 1)%BCType(j) == NSWallIsothermal) &
+                        nViscBocos = nViscBocos + 1
+                end do
+
+                flowDoms(nn, 1, 1:mm)%nViscBocos = nViscBocos
+
+                ! Correct the neighboring block and proc ID for internal
+                ! block boundaries.
+
+                do k = 1, blocks(i)%n1to1
+                    j = blocks(i)%nBocos + k
+
+                    flowDoms(nn, 1, 1)%neighProc(j) = part(blocks(i)%neighBlock(j))
+                    flowDoms(nn, 1, 1)%neighBlock(j) = &
+                        blocks(blocks(i)%neighBlock(j))%blockID
+                end do
+
+                ! Release the memory of oldSubfaceID.
+
+                deallocate (oldSubfaceID, stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("loadBalance", &
+                                   "Deallocation error for oldSubfaceID")
+
+            end if myBlock
+
+            ! Release the memory of the subface on this block.
+
+            deallocate (blocks(i)%bcType, blocks(i)%bcFaceid, &
+                        blocks(i)%cgnsSubface, blocks(i)%inBeg, &
+                        blocks(i)%jnBeg, blocks(i)%knBeg, &
+                        blocks(i)%inEnd, blocks(i)%jnEnd, &
+                        blocks(i)%knEnd, blocks(i)%dinBeg, &
+                        blocks(i)%djnBeg, blocks(i)%dknBeg, &
+                        blocks(i)%dinEnd, blocks(i)%djnEnd, &
+                        blocks(i)%dknEnd, blocks(i)%neighBlock, &
+                        blocks(i)%l1, blocks(i)%l2, &
+                        blocks(i)%l3, blocks(i)%groupNum, &
+                        stat=ierr)
+            if (ierr /= 0) &
+                call terminate("loadBalance", &
+                               "Deallocation error for boundary info")
+        end do domains
 
-          ! Set addGridVelocities to blockIsMoving. This could be
-          ! overwritten later when the code is running in python mode.
+        !       Determine the number of processors, the processor ID's on
+        !       which the original cgns blocks are stored, the local
+        !       block ID's and the nodal ranges of the subblocks. As blocks
+        !       can be split during run-time, multiple processors can store a
+        !       part of original block.
+        !
+        ! Allocate the memory for subblocksOfCGNS.
+
+        allocate (subblocksOfCGNS(nBlocks), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("loadBalance", &
+                           "Memory allocation failure for subblocksOfCGNS")
 
-          flowDoms(nn,1,1:mm)%addGridVelocities = &
-               flowDoms(nn,1,1:mm)%blockIsMoving
+        ! Copy the data into subblocksOfCGNS.
 
-          ! Set the number of subfaces and allocate the memory for the
-          ! subface info. Note that this memory is only allocated for
-          ! the first spectral time value; the other ones are identical.
+        do nn = 1, nBlocks
+            subblocksOfCGNS(nn)%cgnsBlockID = blocks(nn)%cgnsBlockID
+            subblocksOfCGNS(nn)%procID = part(nn)
+            subblocksOfCGNS(nn)%blockID = blocks(nn)%blockID
 
-          flowDoms(nn,1,1:mm)%nBocos   = blocks(i)%nBocos
-          flowDoms(nn,1,1:mm)%n1to1    = blocks(i)%n1to1
-          flowDoms(nn,1,1:mm)%nSubface = blocks(i)%nSubface
-          j                           = blocks(i)%nSubface
+            subblocksOfCGNS(nn)%iBegOr = blocks(nn)%iBegOr
+            subblocksOfCGNS(nn)%iEndOr = blocks(nn)%iEndOr
+            subblocksOfCGNS(nn)%jBegOr = blocks(nn)%jBegOr
+            subblocksOfCGNS(nn)%jEndOr = blocks(nn)%jEndOr
+            subblocksOfCGNS(nn)%kBegOr = blocks(nn)%kBegOr
+            subblocksOfCGNS(nn)%kEndOr = blocks(nn)%kEndOr
+        end do
 
-          allocate(flowDoms(nn,1,1)%BCType(j),      &
-               flowDoms(nn,1,1)%BCFaceID(j),    &
-               flowDoms(nn,1,1)%cgnsSubface(j), &
-               flowDoms(nn,1,1)%inBeg(j),       &
-               flowDoms(nn,1,1)%jnBeg(j),       &
-               flowDoms(nn,1,1)%knBeg(j),       &
-               flowDoms(nn,1,1)%inEnd(j),       &
-               flowDoms(nn,1,1)%jnEnd(j),       &
-               flowDoms(nn,1,1)%knEnd(j),       &
-               flowDoms(nn,1,1)%dinBeg(j),      &
-               flowDoms(nn,1,1)%djnBeg(j),      &
-               flowDoms(nn,1,1)%dknBeg(j),      &
-               flowDoms(nn,1,1)%dinEnd(j),      &
-               flowDoms(nn,1,1)%djnEnd(j),      &
-               flowDoms(nn,1,1)%dknEnd(j),      &
-               flowDoms(nn,1,1)%neighProc(j),   &
-               flowDoms(nn,1,1)%neighBlock(j),  &
-               flowDoms(nn,1,1)%l1(j),          &
-               flowDoms(nn,1,1)%l2(j),          &
-               flowDoms(nn,1,1)%l3(j),          &
-               flowDoms(nn,1,1)%groupNum(j),    &
-               stat=ierr)
+        ! Sort subblocksOfCGNS in increasing order.
 
+        call qsortSubblocksOfCGNSType(subblocksOfCGNS, nBlocks)
 
-          if(ierr /= 0)                   &
-               call terminate("loadBalance", &
-               "Memory allocation failure for subface info")
-
-          ! Determine the new numbering of the boundary subfaces, such
-          ! that the viscous subfaces are numbered first, followed by
-          ! the inViscid subfaces, etc.
-
-          allocate(oldSubfaceID(blocks(i)%nBocos), stat=ierr)
-          if(ierr /= 0)                   &
-               call terminate("loadBalance", &
-               "Memory allocation failure for oldSubfaceID")
-
-          call sortSubfaces(oldSubfaceID, blocks(i))
-
-          ! Initialize the number of viscous boundary subfaces to 0.
-
-          nViscBocos = 0
-
-          ! Copy the info. Set the neighboring proc and block id to -1
-          ! and 0 repectively in this loop. This is okay for boundary
-          ! faces, but must be corrected for the internal block
-          ! boundaries.
-
-          do j=1,blocks(i)%nSubface
-
-             ! Store the old subface id in k. For boundary faces the
-             ! sorting is taken into account; for 1 to 1 subfaces the
-             ! number is identical to the subface id in block.
-
-             k = j
-             if(j <= blocks(i)%nBocos) k = oldSubfaceID(j)
-
-             ! Copy the info.
-
-             flowDoms(nn,1,1)%BCType(j)      = blocks(i)%BCType(k)
-             flowDoms(nn,1,1)%BCFaceID(j)    = blocks(i)%BCFaceID(k)
-             flowDoms(nn,1,1)%cgnsSubface(j) = blocks(i)%cgnsSubface(k)
-
-             flowDoms(nn,1,1)%inBeg(j) = blocks(i)%inBeg(k)
-             flowDoms(nn,1,1)%jnBeg(j) = blocks(i)%jnBeg(k)
-             flowDoms(nn,1,1)%knBeg(j) = blocks(i)%knBeg(k)
-             flowDoms(nn,1,1)%inEnd(j) = blocks(i)%inEnd(k)
-             flowDoms(nn,1,1)%jnEnd(j) = blocks(i)%jnEnd(k)
-             flowDoms(nn,1,1)%knEnd(j) = blocks(i)%knEnd(k)
-
-             flowDoms(nn,1,1)%dinBeg(j) = blocks(i)%dinBeg(k)
-             flowDoms(nn,1,1)%djnBeg(j) = blocks(i)%djnBeg(k)
-             flowDoms(nn,1,1)%dknBeg(j) = blocks(i)%dknBeg(k)
-             flowDoms(nn,1,1)%dinEnd(j) = blocks(i)%dinEnd(k)
-             flowDoms(nn,1,1)%djnEnd(j) = blocks(i)%djnEnd(k)
-             flowDoms(nn,1,1)%dknEnd(j) = blocks(i)%dknEnd(k)
-
-             flowDoms(nn,1,1)%neighProc(j)  = -1
-             flowDoms(nn,1,1)%neighBlock(j) =  0
-
-             flowDoms(nn,1,1)%l1(j) = blocks(i)%l1(k)
-             flowDoms(nn,1,1)%l2(j) = blocks(i)%l2(k)
-             flowDoms(nn,1,1)%l3(j) = blocks(i)%l3(k)
-
-             flowDoms(nn,1,1)%groupNum(j) = blocks(i)%groupNum(k)
-
-             ! Update the number of viscous boundaries if this
-             ! is a viscous subface.
-
-             if(flowDoms(nn,1,1)%BCType(j) == NSWallAdiabatic .or. &
-                  flowDoms(nn,1,1)%BCType(j) == NSWallIsothermal)    &
-                  nViscBocos = nViscBocos + 1
-          enddo
-
-          flowDoms(nn,1,1:mm)%nViscBocos = nViscBocos
-
-          ! Correct the neighboring block and proc ID for internal
-          ! block boundaries.
-
-          do k=1,blocks(i)%n1to1
-             j = blocks(i)%nBocos + k
-
-             flowDoms(nn,1,1)%neighProc(j)  = part(blocks(i)%neighBlock(j))
-             flowDoms(nn,1,1)%neighBlock(j) = &
-                  blocks(blocks(i)%neighBlock(j))%blockID
-          enddo
-
-          ! Release the memory of oldSubfaceID.
-
-          deallocate(oldSubfaceID, stat=ierr)
-          if(ierr /= 0)                   &
-               call terminate("loadBalance", &
-               "Deallocation error for oldSubfaceID")
+        ! Loop over the number of cgns blocks and find out the number of
+        ! subblocks it contains.
 
-       endif myBlock
+        ii = 1
+        subBlockLoop: do nn = 1, cgnsNDom
 
-       ! Release the memory of the subface on this block.
+            ! Determine the ending index jj in subblocksOfCGNS for this
+            ! CGNS block. The starting index is ii.
 
-       deallocate(blocks(i)%bcType,      blocks(i)%bcFaceid,    &
-            blocks(i)%cgnsSubface, blocks(i)%inBeg,       &
-            blocks(i)%jnBeg,       blocks(i)%knBeg,       &
-            blocks(i)%inEnd,       blocks(i)%jnEnd,       &
-            blocks(i)%knEnd,       blocks(i)%dinBeg,      &
-            blocks(i)%djnBeg,      blocks(i)%dknBeg,      &
-            blocks(i)%dinEnd,      blocks(i)%djnEnd,      &
-            blocks(i)%dknEnd,      blocks(i)%neighBlock,  &
-            blocks(i)%l1,          blocks(i)%l2,          &
-            blocks(i)%l3,          blocks(i)%groupNum,    &
-            stat=ierr)
-       if(ierr /= 0)                   &
-            call terminate("loadBalance", &
-            "Deallocation error for boundary info")
-    enddo domains
-
-    !       Determine the number of processors, the processor ID's on
-    !       which the original cgns blocks are stored, the local
-    !       block ID's and the nodal ranges of the subblocks. As blocks
-    !       can be split during run-time, multiple processors can store a
-    !       part of original block.
-    !
-    ! Allocate the memory for subblocksOfCGNS.
-
-    allocate(subblocksOfCGNS(nBlocks), stat=ierr)
-    if(ierr /= 0)                   &
-         call terminate("loadBalance", &
-         "Memory allocation failure for subblocksOfCGNS")
-
-    ! Copy the data into subblocksOfCGNS.
-
-    do nn=1,nBlocks
-       subblocksOfCGNS(nn)%cgnsBlockID = blocks(nn)%cgnsBlockID
-       subblocksOfCGNS(nn)%procID      = part(nn)
-       subblocksOfCGNS(nn)%blockID     = blocks(nn)%blockID
-
-       subblocksOfCGNS(nn)%iBegOr = blocks(nn)%iBegOr
-       subblocksOfCGNS(nn)%iEndOr = blocks(nn)%iEndOr
-       subblocksOfCGNS(nn)%jBegOr = blocks(nn)%jBegOr
-       subblocksOfCGNS(nn)%jEndOr = blocks(nn)%jEndOr
-       subblocksOfCGNS(nn)%kBegOr = blocks(nn)%kBegOr
-       subblocksOfCGNS(nn)%kEndOr = blocks(nn)%kEndOr
-    enddo
-
-    ! Sort subblocksOfCGNS in increasing order.
-
-    call qsortSubblocksOfCGNSType(subblocksOfCGNS, nBlocks)
-
-    ! Loop over the number of cgns blocks and find out the number of
-    ! subblocks it contains.
-
-    ii = 1
-    subBlockLoop: do nn=1,cgnsNDom
-
-       ! Determine the ending index jj in subblocksOfCGNS for this
-       ! CGNS block. The starting index is ii.
-
-       if(nn == cgnsNDom) then
-          jj = nBlocks
-       else
-          jj = ii
-          do
-             if(subblocksOfCGNS(jj+1)%cgnsBlockID > nn) exit
-             jj = jj + 1
-          enddo
-       endif
-
-       ! Set nSubBlocks and allocate the memory for procStored,
-       ! localBlockID, iBegOr, iEndOr, etc.
-
-       cgnsDoms(nn)%nSubBlocks = jj - ii + 1
-       k = cgnsDoms(nn)%nSubBlocks
-       allocate(cgnsDoms(nn)%iBegOr(k),       cgnsDoms(nn)%iEndOr(k), &
-            cgnsDoms(nn)%jBegOr(k),       cgnsDoms(nn)%jEndOr(k), &
-            cgnsDoms(nn)%kBegOr(k),       cgnsDoms(nn)%kEndOr(k), &
-            cgnsDoms(nn)%procStored(k),                           &
-            cgnsDoms(nn)%localBlockID(k), stat=ierr)
-       if(ierr /= 0)                   &
-            call terminate("loadBalance", &
-            "Memory allocation failure for procStored, &
-            &localBlockID, iBegOr, iEndOr, etc.")
-
-       ! Copy the processor ID's, the local block ID's
-       ! and the subranges.
-
-       do i=1,cgnsDoms(nn)%nSubBlocks
-          j = i + ii - 1
-          cgnsDoms(nn)%procStored(i)   = subblocksOfCGNS(j)%procID
-          cgnsDoms(nn)%localBlockID(i) = subblocksOfCGNS(j)%blockID
-
-          cgnsDoms(nn)%iBegOr(i) = subblocksOfCGNS(j)%iBegOr
-          cgnsDoms(nn)%iEndOr(i) = subblocksOfCGNS(j)%iEndOr
-          cgnsDoms(nn)%jBegOr(i) = subblocksOfCGNS(j)%jBegOr
-          cgnsDoms(nn)%jEndOr(i) = subblocksOfCGNS(j)%jEndOr
-          cgnsDoms(nn)%kBegOr(i) = subblocksOfCGNS(j)%kBegOr
-          cgnsDoms(nn)%kEndOr(i) = subblocksOfCGNS(j)%kEndOr
-       enddo
-
-       ! Set ii for the next CGNS block.
-
-       ii = jj + 1
-
-    enddo subBlockLoop
-
-    ! Release the memory of blocks, part and subblocksOfCGNS.
-
-    deallocate(blocks, part, subblocksOfCGNS, stat=ierr)
-    if(ierr /= 0)                   &
-         call terminate("loadBalance", &
-         "Deallocation error for blocks, part and &
-         &subblocksOfCGNS")
-
-    !j = 20+myID
-    !do nn=1,ndom
-    !  write(j,"(8I4)") nn, flowDoms(nn,1,1)%cgnsBlockID, &
-    !                   flowDoms(nn,1,1)%iBegOr, flowDoms(nn,1,1)%iEndOr, &
-    !                   flowDoms(nn,1,1)%jBegOr, flowDoms(nn,1,1)%jEndOr, &
-    !                   flowDoms(nn,1,1)%kBegOr, flowDoms(nn,1,1)%kEndOr
-    !enddo
-
-  end subroutine loadBalanceGrid
-
-  subroutine blockDistribution
-    !
-    !       blockDistribution determines the distribution of the blocks
-    !       over the processors. If blocks must be split to obtain a good
-    !       load balance an iterative algorithm is used to determine the
-    !       best way to split them.
-    !
-    use constants
-    use cgnsGrid, only : cgnsDoms, cgnsNDom
-    use communication, only : myID, adflow_comm_world, nProc
-    use inputParallel, only : loadBalanceIter, loadImbalance, splitBlocks
-    use partitionMod, onlY : splitCGNSType, distributionBlockType, ubVec, &
-         blocks, part, nBlocks, sortRangesSplitInfo
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, nx, ny, nz
-    integer(kind=intType) :: nCellsTot, nCellsEven, nCellsUpper
-    integer(kind=intType) :: nCellsPerProcMax
-    integer(kind=intType) :: iter, iterMax
-
-    logical :: cellsBalanced,   facesBalanced
-    logical :: emptyPartitions, commNeglected
-
-    type(splitCGNSType), dimension(cgnsNDom) :: splitInfo
+            if (nn == cgnsNDom) then
+                jj = nBlocks
+            else
+                jj = ii
+                do
+                    if (subblocksOfCGNS(jj + 1)%cgnsBlockID > nn) exit
+                    jj = jj + 1
+                end do
+            end if
+
+            ! Set nSubBlocks and allocate the memory for procStored,
+            ! localBlockID, iBegOr, iEndOr, etc.
+
+            cgnsDoms(nn)%nSubBlocks = jj - ii + 1
+            k = cgnsDoms(nn)%nSubBlocks
+            allocate (cgnsDoms(nn)%iBegOr(k), cgnsDoms(nn)%iEndOr(k), &
+                      cgnsDoms(nn)%jBegOr(k), cgnsDoms(nn)%jEndOr(k), &
+                      cgnsDoms(nn)%kBegOr(k), cgnsDoms(nn)%kEndOr(k), &
+                      cgnsDoms(nn)%procStored(k), &
+                      cgnsDoms(nn)%localBlockID(k), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("loadBalance", &
+                 "Memory allocation failure for procStored, &
+                 &localBlockID, iBegOr, iEndOr, etc.")
+
+            ! Copy the processor ID's, the local block ID's
+            ! and the subranges.
+
+            do i = 1, cgnsDoms(nn)%nSubBlocks
+                j = i + ii - 1
+                cgnsDoms(nn)%procStored(i) = subblocksOfCGNS(j)%procID
+                cgnsDoms(nn)%localBlockID(i) = subblocksOfCGNS(j)%blockID
+
+                cgnsDoms(nn)%iBegOr(i) = subblocksOfCGNS(j)%iBegOr
+                cgnsDoms(nn)%iEndOr(i) = subblocksOfCGNS(j)%iEndOr
+                cgnsDoms(nn)%jBegOr(i) = subblocksOfCGNS(j)%jBegOr
+                cgnsDoms(nn)%jEndOr(i) = subblocksOfCGNS(j)%jEndOr
+                cgnsDoms(nn)%kBegOr(i) = subblocksOfCGNS(j)%kBegOr
+                cgnsDoms(nn)%kEndOr(i) = subblocksOfCGNS(j)%kEndOr
+            end do
+
+            ! Set ii for the next CGNS block.
+
+            ii = jj + 1
+
+        end do subBlockLoop
+
+        ! Release the memory of blocks, part and subblocksOfCGNS.
+
+        deallocate (blocks, part, subblocksOfCGNS, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("loadBalance", &
+             "Deallocation error for blocks, part and &
+             &subblocksOfCGNS")
+
+        !j = 20+myID
+        !do nn=1,ndom
+        !  write(j,"(8I4)") nn, flowDoms(nn,1,1)%cgnsBlockID, &
+        !                   flowDoms(nn,1,1)%iBegOr, flowDoms(nn,1,1)%iEndOr, &
+        !                   flowDoms(nn,1,1)%jBegOr, flowDoms(nn,1,1)%jEndOr, &
+        !                   flowDoms(nn,1,1)%kBegOr, flowDoms(nn,1,1)%kEndOr
+        !enddo
+
+    end subroutine loadBalanceGrid
+
+    subroutine blockDistribution
+        !
+        !       blockDistribution determines the distribution of the blocks
+        !       over the processors. If blocks must be split to obtain a good
+        !       load balance an iterative algorithm is used to determine the
+        !       best way to split them.
+        !
+        use constants
+        use cgnsGrid, only: cgnsDoms, cgnsNDom
+        use communication, only: myID, adflow_comm_world, nProc
+        use inputParallel, only: loadBalanceIter, loadImbalance, splitBlocks
+        use partitionMod, onlY: splitCGNSType, distributionBlockType, ubVec, &
+                                blocks, part, nBlocks, sortRangesSplitInfo
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, nx, ny, nz
+        integer(kind=intType) :: nCellsTot, nCellsEven, nCellsUpper
+        integer(kind=intType) :: nCellsPerProcMax
+        integer(kind=intType) :: iter, iterMax
+
+        logical :: cellsBalanced, facesBalanced
+        logical :: emptyPartitions, commNeglected
+
+        type(splitCGNSType), dimension(cgnsNDom) :: splitInfo
+
+        character(len=maxStringLen) :: loadFormat
+
+        ! If it is not allowed to split the blocks, check that the
+        ! number of blocks is equal or larger than the number of
+        ! processors. If not, print an error message and exit.
+
+        if (.not. splitBlocks .and. nProc > cgnsNDom) then
+
+            ! Processor 0 prints the error message, while the others wait
+            ! to get killed.
+
+            if (myID == 0) &
+                 call terminate("blockDistribution", &
+                 "Number of processors is larger than number &
+                 &of blocks, but it is not allowed to split &
+                 &blocks")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+        !
+        !       Determine how the blocks must be split (if allowed) for
+        !       load balancing reasons.
+        !
+        ! Initialize splitInfo to the original cgns blocks and determine
+        ! the total number of cells.
+
+        nCellsTot = 0
+        nBlocks = cgnsNDom
 
-    character(len=maxStringLen) :: loadFormat
+        do nn = 1, cgnsNDom
+            splitInfo(nn)%nSubBlocks = 1
 
-    ! If it is not allowed to split the blocks, check that the
-    ! number of blocks is equal or larger than the number of
-    ! processors. If not, print an error message and exit.
+            allocate (splitInfo(nn)%ranges(1, 3, 2), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("blockDistribution", &
+                               "Memory allocation failure for ranges")
 
-    if(.not. splitBlocks .and. nProc > cgnsNDom) then
+            splitInfo(nn)%ranges(1, 1, 1) = 1
+            splitInfo(nn)%ranges(1, 2, 1) = 1
+            splitInfo(nn)%ranges(1, 3, 1) = 1
 
-       ! Processor 0 prints the error message, while the others wait
-       ! to get killed.
+            splitInfo(nn)%ranges(1, 1, 2) = cgnsDoms(nn)%il
+            splitInfo(nn)%ranges(1, 2, 2) = cgnsDoms(nn)%jl
+            splitInfo(nn)%ranges(1, 3, 2) = cgnsDoms(nn)%kl
 
-       if(myID == 0)                         &
-            call terminate("blockDistribution", &
-            "Number of processors is larger than number &
-            &of blocks, but it is not allowed to split &
-            &blocks")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-    !
-    !       Determine how the blocks must be split (if allowed) for
-    !       load balancing reasons.
-    !
-    ! Initialize splitInfo to the original cgns blocks and determine
-    ! the total number of cells.
+            nx = cgnsDoms(nn)%nx
+            ny = cgnsDoms(nn)%ny
+            nz = cgnsDoms(nn)%nz
 
-    nCellsTot = 0
-    nBlocks   = cgnsNDom
+            nCellsTot = nCellsTot + nx * ny * nz
+        end do
 
-    do nn=1,cgnsNDom
-       splitInfo(nn)%nSubBlocks = 1
+        ! Determine the desirable number of cells per processor and
+        ! determine the upper bound based on the imbalance tolerance.
+        ! Initialize nCellsPerProcMax, which is used to decide whether
+        ! or not a block should be split, to this upper limit.
+
+        nCellsEven = nCellsTot / nProc
+        nCellsUpper = nCellsEven + loadImbalance * nCellsEven
+        nCellsPerProcMax = nCellsUpper
 
-       allocate(splitInfo(nn)%ranges(1,3,2), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("blockDistribution", &
-            "Memory allocation failure for ranges")
+        ! Start the loop to determine the block topology to be used
+        ! in the computation.
 
-       splitInfo(nn)%ranges(1,1,1) = 1
-       splitInfo(nn)%ranges(1,2,1) = 1
-       splitInfo(nn)%ranges(1,3,1) = 1
+        initSplitLoop: do iter = 1, 2
 
-       splitInfo(nn)%ranges(1,1,2) = cgnsDoms(nn)%il
-       splitInfo(nn)%ranges(1,2,2) = cgnsDoms(nn)%jl
-       splitInfo(nn)%ranges(1,3,2) = cgnsDoms(nn)%kl
+            ! Loop over the number of original cgns blocks and determine
+            ! whether or not they must be split further.
 
-       nx = cgnsDoms(nn)%nx
-       ny = cgnsDoms(nn)%ny
-       nz = cgnsDoms(nn)%nz
+            do nn = 1, cgnsNDom
+                if (.not. splittingIsOkay(nn)) &
+                    call splitBlockInitialization(nn)
+            end do
 
-       nCellsTot = nCellsTot + nx*ny*nz
-    enddo
+            ! Exit the loop if the number of processors is smaller than or
+            ! equal to the number of blocks.
 
-    ! Determine the desirable number of cells per processor and
-    ! determine the upper bound based on the imbalance tolerance.
-    ! Initialize nCellsPerProcMax, which is used to decide whether
-    ! or not a block should be split, to this upper limit.
-
-    nCellsEven       = nCellsTot/nProc
-    nCellsUpper      = nCellsEven + loadImbalance*nCellsEven
-    nCellsPerProcMax = nCellsUpper
+            if (nBlocks >= nProc) exit
 
-    ! Start the loop to determine the block topology to be used
-    ! in the computation.
+            ! The number of blocks is smaller than the number of processors.
+            ! Set the tolerance for splitting to the desired number of
+            ! cells on a processor.
 
-    initSplitLoop: do iter=1,2
+            nCellsPerProcMax = nCellsEven
 
-       ! Loop over the number of original cgns blocks and determine
-       ! whether or not they must be split further.
+        end do initSplitLoop
 
-       do nn=1,cgnsNDom
-          if(.not. splittingIsOkay(nn) ) &
-               call splitBlockInitialization(nn)
-       enddo
-
-       ! Exit the loop if the number of processors is smaller than or
-       ! equal to the number of blocks.
-
-       if(nBlocks >= nProc) exit
+        ! Set the number of iterations to determine the distribution over
+        ! the processors. If blocks cannot be split this is set to 1;
+        ! otherwise it is set to 2.
 
-       ! The number of blocks is smaller than the number of processors.
-       ! Set the tolerance for splitting to the desired number of
-       ! cells on a processor.
+        iterMax = 1
+        if (splitBlocks) iterMax = loadBalanceIter
 
-       nCellsPerProcMax  = nCellsEven
+        ! Loop to determine a good load balance.
 
-    enddo initSplitLoop
+        distributionLoop: do iter = 1, iterMax
 
-    ! Set the number of iterations to determine the distribution over
-    ! the processors. If blocks cannot be split this is set to 1;
-    ! otherwise it is set to 2.
+            ! Determine the computational blocks from the splitting info of
+            ! the original blocks.
 
-    iterMax = 1
-    if( splitBlocks ) iterMax = loadBalanceIter
+            call determineComputeBlocks(splitInfo)
+            ! Apply the graph partitioning to the computational blocks.
 
-    ! Loop to determine a good load balance.
+            call graphPartitioning(emptyPartitions, commNeglected)
 
-    distributionLoop: do iter=1,iterMax
+            ! Determine whether the load balance is okay. If empty
+            ! partitions are present the load balance is per definition
+            ! not okay and there is no need to call checkLoadBalance.
 
-       ! Determine the computational blocks from the splitting info of
-       ! the original blocks.
+            if (emptyPartitions) then
+                cellsBalanced = .false.
+                facesBalanced = .false.
+            else
+                call checkLoadBalance(cellsBalanced, facesBalanced)
+            end if
 
-       call determineComputeBlocks(splitInfo)
-       ! Apply the graph partitioning to the computational blocks.
+            ! Exit the loop if the cells or the faces are load balanced
+            ! or if the maximum number of iterations have been reached.
 
-       call graphPartitioning(emptyPartitions, commNeglected)
+            if (cellsBalanced .or. facesBalanced .or. &
+                iter == iterMax) exit
 
-       ! Determine whether the load balance is okay. If empty
-       ! partitions are present the load balance is per definition
-       ! not okay and there is no need to call checkLoadBalance.
+            ! exit
 
-       if( emptyPartitions ) then
-          cellsBalanced = .false.
-          facesBalanced = .false.
-       else
-          call checkLoadBalance(cellsBalanced, facesBalanced)
-       endif
+            ! Split some blocks on the processors with too many cells/faces.
+            call splitBlocksLoadBalance
 
-       ! Exit the loop if the cells or the faces are load balanced
-       ! or if the maximum number of iterations have been reached.
+        end do distributionLoop
 
-       if(cellsBalanced .or. facesBalanced .or. &
-            iter == iterMax) exit
+        ! Deallocate the memory for ranges.
 
-       ! exit
+        do nn = 1, cgnsNDom
+            deallocate (splitInfo(nn)%ranges, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("blockDistribution", &
+                               "Deallocation failure for ranges")
+        end do
 
-       ! Split some blocks on the processors with too many cells/faces.
-       call splitBlocksLoadBalance
+        ! If empty partitions are present print an error message and
+        ! exit. Only processor 0 prints the message to avoid a mess.
 
-    enddo distributionLoop
+        if (emptyPartitions) then
+            if (myID == 0) &
+                call terminate("blockDistribution", &
+                               "Empty partitions present")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
 
-    ! Deallocate the memory for ranges.
+        ! Processor 0 prints a warning if the communication was
+        ! neglected to obtain a valid partitioning.
 
-    do nn=1,cgnsNDom
-       deallocate(splitInfo(nn)%ranges, stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("blockDistribution", &
-            "Deallocation failure for ranges")
-    enddo
+        if (commNeglected .and. myID == 0) then
+            print "(a)", "#"
+            print "(a)", "#                    Warning"
+            print "(a)", "# Communication costs neglected to obtain a valid partitioning."
+        end if
 
-    ! If empty partitions are present print an error message and
-    ! exit. Only processor 0 prints the message to avoid a mess.
+        ! If the load imbalance tolerance was not met, print a warning
+        ! message if I am processor 0.
 
-    if( emptyPartitions ) then
-       if(myID == 0)                         &
-            call terminate("blockDistribution", &
-            "Empty partitions present")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
+        loadFormat = "(*(A, F6.3))"
 
-    ! Processor 0 prints a warning if the communication was
-    ! neglected to obtain a valid partitioning.
+        if (myid == 0) then
+            if (.not. (cellsBalanced .and. facesBalanced)) then
+                print "(a)", "#"
+                print "(a)", "#                    Warning"
+                print loadFormat, "# Specified load imbalance tolerance", real(loadImbalance), " not achieved."
+                print loadFormat, "# Continuing with", real(ubvec(1)), &
+                    " load imbalance for the cells and", real(ubvec(2)), " for the faces"
+                print "(a)", "#"
+            else
+                print "(a)", "#"
+                print loadFormat, "# Specified load imbalance tolerance", real(loadImbalance), " achieved."
+                print loadFormat, "# Continuing with", real(ubvec(1)), &
+                    " load imbalance for the cells and", real(ubvec(2)), " for the faces"
+                print "(a)", "#"
+            end if
 
-    if(commNeglected .and. myID == 0) then
-       print "(a)", "#"
-       print "(a)", "#                    Warning"
-       print "(a)", "# Communication costs neglected to obtain a valid partitioning."
-    endif
+        end if
 
-    ! If the load imbalance tolerance was not met, print a warning
-    ! message if I am processor 0.
+        !=================================================================
 
-    loadFormat = "(*(A, F6.3))"
+    contains
 
-    if (myid == 0) then
-       if(.not.(cellsBalanced .and. facesBalanced)) then
-          print "(a)", "#"
-          print "(a)", "#                    Warning"
-          print loadFormat, "# Specified load imbalance tolerance", real(loadImbalance), " not achieved."
-          print loadFormat,  "# Continuing with", real(ubvec(1)), &
-            " load imbalance for the cells and", real(ubvec(2)), " for the faces"
-          print "(a)", "#"
-       else
-          print "(a)","#"
-          print loadFormat, "# Specified load imbalance tolerance", real(loadImbalance), " achieved."
-          print loadFormat, "# Continuing with", real(ubvec(1)), &
-            " load imbalance for the cells and", real(ubvec(2)), " for the faces"
-          print "(a)", "#"
-       end if
+        !===============================================================
 
-    endif
+        logical function splittingIsOkay(cgnsID)
+            !
+            !         splittingIsOkay determines whether or not the splitting of
+            !         the given cgns block is okay in the sense that all subblocks
+            !         are smaller than the allowed number of cells and faces.
+            !
+            implicit none
+            !
+            !        Function argument
+            !
+            integer(kind=intType), intent(in) :: cgnsID
+            !
+            !        Local variables.
+            !
+            integer(kind=intType) :: i, nx, ny, nz, nCells
 
-    !=================================================================
+            ! Initialize splittingIsOkay to .true.
 
-  contains
+            splittingIsOkay = .true.
 
-    !===============================================================
+            ! Loop over the subblocks.
 
-    logical function splittingIsOkay(cgnsID)
-      !
-      !         splittingIsOkay determines whether or not the splitting of
-      !         the given cgns block is okay in the sense that all subblocks
-      !         are smaller than the allowed number of cells and faces.
-      !
-      implicit none
-      !
-      !        Function argument
-      !
-      integer(kind=intType), intent(in) :: cgnsID
-      !
-      !        Local variables.
-      !
-      integer(kind=intType) :: i, nx, ny, nz, nCells
+            do i = 1, splitInfo(cgnsID)%nSubBlocks
 
-      ! Initialize splittingIsOkay to .true.
+                ! Determine the number of cells in the three directions.
 
-      splittingIsOkay = .true.
+                nx = splitInfo(cgnsID)%ranges(i, 1, 2) &
+                     - splitInfo(cgnsID)%ranges(i, 1, 1)
+                ny = splitInfo(cgnsID)%ranges(i, 2, 2) &
+                     - splitInfo(cgnsID)%ranges(i, 2, 1)
+                nz = splitInfo(cgnsID)%ranges(i, 3, 2) &
+                     - splitInfo(cgnsID)%ranges(i, 3, 1)
 
-      ! Loop over the subblocks.
+                ! Determine the number of cells for this subblock.
 
-      do i=1,splitInfo(cgnsID)%nSubBlocks
+                nCells = nx * ny * nz
 
-         ! Determine the number of cells in the three directions.
+                ! Check whether this number is smaller or equal to the
+                ! maximum allowed number. If not, set splittingIsOkay to
+                ! .false. and exit the loop.
 
-         nx = splitInfo(cgnsID)%ranges(i,1,2) &
-              - splitInfo(cgnsID)%ranges(i,1,1)
-         ny = splitInfo(cgnsID)%ranges(i,2,2) &
-              - splitInfo(cgnsID)%ranges(i,2,1)
-         nz = splitInfo(cgnsID)%ranges(i,3,2) &
-              - splitInfo(cgnsID)%ranges(i,3,1)
+                if (nCells > nCellsPerProcMax) then
+                    splittingIsOkay = .false.
+                    exit
+                end if
 
-         ! Determine the number of cells for this subblock.
+            end do
 
-         nCells = nx*ny*nz
+        end function splittingIsOkay
 
-         ! Check whether this number is smaller or equal to the
-         ! maximum allowed number. If not, set splittingIsOkay to
-         ! .false. and exit the loop.
-
-         if(nCells > nCellsPerProcMax) then
-            splittingIsOkay = .false.
-            exit
-         endif
-
-      enddo
-
-    end function splittingIsOkay
-
-    !===============================================================
+        !===============================================================
 
-    subroutine splitBlockInitialization(cgnsID)
-      !
-      !         splitBlockInitialization splits the given cgns block ID
-      !         into a number of subbocks during the initialization phase.
-      !
-      implicit none
-      !
-      !        Subroutine argument.
-      !
-      integer(kind=intType), intent(in) :: cgnsID
-      !
-      !        Local variables.
-      !
-      integer :: ierr
+        subroutine splitBlockInitialization(cgnsID)
+            !
+            !         splitBlockInitialization splits the given cgns block ID
+            !         into a number of subbocks during the initialization phase.
+            !
+            implicit none
+            !
+            !        Subroutine argument.
+            !
+            integer(kind=intType), intent(in) :: cgnsID
+            !
+            !        Local variables.
+            !
+            integer :: ierr
 
-      integer(kind=intType) :: nn, mm
-      integer(kind=intType) :: nx, ny, nz, nCells, nSub
+            integer(kind=intType) :: nn, mm
+            integer(kind=intType) :: nx, ny, nz, nCells, nSub
 
-      integer(kind=intType), dimension(:,:,:), allocatable :: tmpRange
+            integer(kind=intType), dimension(:, :, :), allocatable :: tmpRange
 
-      type(distributionBlockType) :: tmpBlock
+            type(distributionBlockType) :: tmpBlock
 
-      ! Check whether it is allowed to split blocks.
-
-      if(.not. splitBlocks)                        &
-           call terminate("splitBlockInitialization", &
-           "Block must be split for load balance, &
-           &but I am not allowed to do so")
-
-      ! Store the number of cells of this block a bit easier.
-
-      nx     = cgnsDoms(cgnsID)%nx
-      ny     = cgnsDoms(cgnsID)%ny
-      nz     = cgnsDoms(cgnsID)%nz
-      nCells = nx*ny*nz
+            ! Check whether it is allowed to split blocks.
 
-      ! Copy the information from the given cgns block into
-      ! tmpBlock, such that the routine splitBlock can be used.
+            if (.not. splitBlocks) &
+                 call terminate("splitBlockInitialization", &
+                 "Block must be split for load balance, &
+                 &but I am not allowed to do so")
 
-      ! First the scalar info.
+            ! Store the number of cells of this block a bit easier.
 
-      tmpBlock%nx = nx; tmpBlock%il = nx + 1
-      tmpBlock%ny = ny; tmpBlock%jl = ny + 1
-      tmpBlock%nz = nz; tmpBlock%kl = nz + 1
+            nx = cgnsDoms(cgnsID)%nx
+            ny = cgnsDoms(cgnsID)%ny
+            nz = cgnsDoms(cgnsID)%nz
+            nCells = nx * ny * nz
 
-      tmpBlock%nCell = nCells
-      tmpBlock%nface = (nx+1)*ny*nz + (ny+1)*nx*nz &
-           + (nz+1)*nx*ny
-
-      tmpBlock%cgnsBlockID = cgnsID
-
-      tmpBlock%iBegor = 1; tmpBlock%iEndor = tmpBlock%il
-      tmpBlock%jBegor = 1; tmpBlock%jEndor = tmpBlock%jl
-      tmpBlock%kBegor = 1; tmpBlock%kEndor = tmpBlock%kl
+            ! Copy the information from the given cgns block into
+            ! tmpBlock, such that the routine splitBlock can be used.
 
-      tmpBlock%nBocos   = cgnsDoms(cgnsID)%nBocos
-      tmpBlock%n1to1    = cgnsDoms(cgnsID)%n1to1
-      tmpBlock%nSubface = tmpBlock%nBocos + tmpBlock%n1to1
+            ! First the scalar info.
 
-      ! Allocate the memory for the nodal ranges of the subfaces
-      ! and nullify the other pointers.
+            tmpBlock%nx = nx; tmpBlock%il = nx + 1
+            tmpBlock%ny = ny; tmpBlock%jl = ny + 1
+            tmpBlock%nz = nz; tmpBlock%kl = nz + 1
 
-      nSub = tmpBlock%nSubface
+            tmpBlock%nCell = nCells
+            tmpBlock%nface = (nx + 1) * ny * nz + (ny + 1) * nx * nz &
+                             + (nz + 1) * nx * ny
+
+            tmpBlock%cgnsBlockID = cgnsID
 
-      allocate(tmpBlock%BCType(nSub), tmpBlock%BCFaceID(nSub), &
-           tmpBlock%inBeg(nSub),  tmpBlock%inEnd(nSub),    &
-           tmpBlock%jnBeg(nSub),  tmpBlock%jnEnd(nSub),    &
-           tmpBlock%knBeg(nSub),  tmpBlock%knEnd(nSub),    &
-           stat=ierr)
-      if(ierr /= 0) &
-           call terminate("splitBlockInitialization", &
-           "Deallocation failure for the subface &
-           &info in tmpBlock")
+            tmpBlock%iBegor = 1; tmpBlock%iEndor = tmpBlock%il
+            tmpBlock%jBegor = 1; tmpBlock%jEndor = tmpBlock%jl
+            tmpBlock%kBegor = 1; tmpBlock%kEndor = tmpBlock%kl
 
-      nullify(tmpBlock%cgnsSubface, tmpBlock%neighBlock, &
-           tmpBlock%dinBeg,      tmpBlock%dinEnd,     &
-           tmpBlock%djnBeg,      tmpBlock%djnEnd,     &
-           tmpBlock%dknBeg,      tmpBlock%dknEnd,     &
-           tmpBlock%l1,          tmpBlock%l2,         &
-           tmpBlock%l3,          tmpBlock%groupNum)
+            tmpBlock%nBocos = cgnsDoms(cgnsID)%nBocos
+            tmpBlock%n1to1 = cgnsDoms(cgnsID)%n1to1
+            tmpBlock%nSubface = tmpBlock%nBocos + tmpBlock%n1to1
 
-      ! Copy the range of the subfaces into tmpBlock and set the
-      ! corresponding boundary condition.
-      ! First the boundary faces.
+            ! Allocate the memory for the nodal ranges of the subfaces
+            ! and nullify the other pointers.
 
-      do nn=1,cgnsDoms(cgnsID)%nBocos
-         tmpBlock%inBeg(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%iBeg
-         tmpBlock%jnBeg(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%jBeg
-         tmpBlock%knBeg(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%kBeg
+            nSub = tmpBlock%nSubface
 
-         tmpBlock%inEnd(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%iEnd
-         tmpBlock%jnEnd(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%jEnd
-         tmpBlock%knEnd(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%kEnd
+            allocate (tmpBlock%BCType(nSub), tmpBlock%BCFaceID(nSub), &
+                      tmpBlock%inBeg(nSub), tmpBlock%inEnd(nSub), &
+                      tmpBlock%jnBeg(nSub), tmpBlock%jnEnd(nSub), &
+                      tmpBlock%knBeg(nSub), tmpBlock%knEnd(nSub), &
+                      stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("splitBlockInitialization", &
+                 "Deallocation failure for the subface &
+                 &info in tmpBlock")
 
-         tmpBlock%BCType(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%BCType
-      enddo
+            nullify (tmpBlock%cgnsSubface, tmpBlock%neighBlock, &
+                     tmpBlock%dinBeg, tmpBlock%dinEnd, &
+                     tmpBlock%djnBeg, tmpBlock%djnEnd, &
+                     tmpBlock%dknBeg, tmpBlock%dknEnd, &
+                     tmpBlock%l1, tmpBlock%l2, &
+                     tmpBlock%l3, tmpBlock%groupNum)
 
-      ! And the internal block faces; set the boundary condition to
-      ! B2BMatch.
+            ! Copy the range of the subfaces into tmpBlock and set the
+            ! corresponding boundary condition.
+            ! First the boundary faces.
 
-      do mm=1,cgnsDoms(cgnsID)%n1to1
-         nn = mm + cgnsDoms(cgnsID)%nBocos
+            do nn = 1, cgnsDoms(cgnsID)%nBocos
+                tmpBlock%inBeg(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%iBeg
+                tmpBlock%jnBeg(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%jBeg
+                tmpBlock%knBeg(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%kBeg
+
+                tmpBlock%inEnd(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%iEnd
+                tmpBlock%jnEnd(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%jEnd
+                tmpBlock%knEnd(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%kEnd
 
-         tmpBlock%inBeg(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%iBeg
-         tmpBlock%jnBeg(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%jBeg
-         tmpBlock%knBeg(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%kBeg
+                tmpBlock%BCType(nn) = cgnsDoms(cgnsID)%bocoInfo(nn)%BCType
+            end do
+
+            ! And the internal block faces; set the boundary condition to
+            ! B2BMatch.
+
+            do mm = 1, cgnsDoms(cgnsID)%n1to1
+                nn = mm + cgnsDoms(cgnsID)%nBocos
+
+                tmpBlock%inBeg(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%iBeg
+                tmpBlock%jnBeg(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%jBeg
+                tmpBlock%knBeg(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%kBeg
 
-         tmpBlock%inEnd(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%iEnd
-         tmpBlock%jnEnd(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%jEnd
-         tmpBlock%knEnd(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%kEnd
+                tmpBlock%inEnd(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%iEnd
+                tmpBlock%jnEnd(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%jEnd
+                tmpBlock%knEnd(nn) = cgnsDoms(cgnsID)%conn1to1(mm)%kEnd
 
-         tmpBlock%BCType(nn) = B2BMatch
-      enddo
+                tmpBlock%BCType(nn) = B2BMatch
+            end do
 
-      ! Determine for the block face on which the subface is located.
-      ! Assume that the subface connectivity is correct. This will be
-      ! tested later on in determineComputeBlocks.
+            ! Determine for the block face on which the subface is located.
+            ! Assume that the subface connectivity is correct. This will be
+            ! tested later on in determineComputeBlocks.
 
-      do nn=1,tmpBlock%nSubface
-         if(tmpBlock%inBeg(nn) == tmpBlock%inEnd(nn)) then
-            tmpBlock%BCFaceID(nn) = iMax
-            if(tmpBlock%inBeg(nn) == 1) tmpBlock%BCFaceID(nn) = iMin
-         else if(tmpBlock%jnBeg(nn) == tmpBlock%jnEnd(nn)) then
-            tmpBlock%BCFaceID(nn) = jMax
-            if(tmpBlock%jnBeg(nn) == 1) tmpBlock%BCFaceID(nn) = jMin
-         else
-            tmpBlock%BCFaceID(nn) = kMax
-            if(tmpBlock%knBeg(nn) == 1) tmpBlock%BCFaceID(nn) = kMin
-         endif
-      enddo
+            do nn = 1, tmpBlock%nSubface
+                if (tmpBlock%inBeg(nn) == tmpBlock%inEnd(nn)) then
+                    tmpBlock%BCFaceID(nn) = iMax
+                    if (tmpBlock%inBeg(nn) == 1) tmpBlock%BCFaceID(nn) = iMin
+                else if (tmpBlock%jnBeg(nn) == tmpBlock%jnEnd(nn)) then
+                    tmpBlock%BCFaceID(nn) = jMax
+                    if (tmpBlock%jnBeg(nn) == 1) tmpBlock%BCFaceID(nn) = jMin
+                else
+                    tmpBlock%BCFaceID(nn) = kMax
+                    if (tmpBlock%knBeg(nn) == 1) tmpBlock%BCFaceID(nn) = kMin
+                end if
+            end do
 
-      ! Determine the number of subblocks into which this block is
-      ! to be split.
+            ! Determine the number of subblocks into which this block is
+            ! to be split.
 
-      nSub = nint(real(nCells,realType)/real(nCellsEven,realType))
-      nSub = max(nSub,2_intType)
-
-      ! Deallocate the memory of the splitting info and allocate
-      ! the memory of tmpRange.
-
-      deallocate(splitInfo(cgnsID)%ranges, stat=ierr)
-      if(ierr /= 0) &
-           call terminate("splitBlockInitialization", &
-           "Deallocation failure for ranges")
-
-      allocate(tmpRange(nSub,3,2), stat=ierr)
-      if(ierr /= 0) &
-           call terminate("splitBlockInitialization", &
-           "Memory allocation failure for tmpRange")
-
-      ! Split the block into the subblocks. It is possible that
-      ! the block is split into less subblocks than the desired
-      ! number. The actual number of subblocks is returned in nSub.
-
-      call splitBlock(tmpBlock, nSub, nCellsEven, tmpRange)
-
-      ! Allocate the memory for ranges.
-
-      allocate(splitInfo(cgnsID)%ranges(nSub,3,2), stat=ierr)
-      if(ierr /= 0) &
-           call terminate("splitBlockInitialization", &
-           "Memory allocation failure for ranges")
-
-      ! Determine the new number of computational blocks. This is
-      ! the old number plus the number or extra blocks created
-      ! by the splitting.
-
-      nBlocks = nBlocks + nSub - splitInfo(cgnsID)%nSubBlocks
-
-      ! Copy tmpRange into ranges of splitInfo.
-
-      splitInfo(cgnsID)%nSubBlocks = nSub
-
-      do nn=1,nSub
-         splitInfo(cgnsID)%ranges(nn,1,1) = tmpRange(nn,1,1)
-         splitInfo(cgnsID)%ranges(nn,2,1) = tmpRange(nn,2,1)
-         splitInfo(cgnsID)%ranges(nn,3,1) = tmpRange(nn,3,1)
-
-         splitInfo(cgnsID)%ranges(nn,1,2) = tmpRange(nn,1,2)
-         splitInfo(cgnsID)%ranges(nn,2,2) = tmpRange(nn,2,2)
-         splitInfo(cgnsID)%ranges(nn,3,2) = tmpRange(nn,3,2)
-      enddo
+            nSub = nint(real(nCells, realType) / real(nCellsEven, realType))
+            nSub = max(nSub, 2_intType)
 
-      ! Deallocate the memory allocated for tmpBlock and tmpRange.
+            ! Deallocate the memory of the splitting info and allocate
+            ! the memory of tmpRange.
 
-      deallocate(tmpBlock%BCType, tmpBlock%BCFaceID, &
-           tmpBlock%inBeg,  tmpBlock%inEnd,    &
-           tmpBlock%jnBeg,  tmpBlock%jnEnd,    &
-           tmpBlock%knBeg,  tmpBlock%knEnd,    &
-           tmpRange, stat=ierr)
-      if(ierr /= 0) &
-           call terminate("splitBlockInitialization", &
-           "Deallocation failure for tmpBlock &
-           &and tmpRange")
-
-      ! Sort the subranges of this block in increasing order.
-
-      call sortRangesSplitInfo(splitInfo(cgnsID))
-
-    end subroutine splitBlockInitialization
-
-    !===============================================================
-
-    subroutine splitBlocksLoadBalance
-      !
-      !         splitBlocksLoadBalance splits some (sub)blocks even
-      !         further to obtain a better load balance.
-      !
-      use sorting, only : bsearchIntegers, qsortIntegers
-      implicit none
-      !
-      !        Local variables.
-      !
-      integer :: ierr
-
-      integer(kind=intType) :: i, j, k, kk, jj
-      integer(kind=intType) :: nCellDiff, proc, nSub, cgnsID
-      integer(kind=intType) :: ncgnsSort
-
-      integer(kind=intType), dimension(0:cgnsNDom) :: nSubBlocks
-      integer(kind=intType), dimension(cgnsNDom)   :: cgnsSort
-
-      integer(kind=intType), dimension(nProc)   :: nCell, nCellOr
-      integer(kind=intType), dimension(nProc)   :: tmp, procNCell
-      integer(kind=intType), dimension(0:nProc) :: nBlockProc
-      integer(kind=intType), dimension(0:nProc) :: multNCell
-      integer(kind=intType), dimension(nBlocks) :: blockProc
+            deallocate (splitInfo(cgnsID)%ranges, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("splitBlockInitialization", &
+                               "Deallocation failure for ranges")
 
-      integer(kind=intType), dimension(:,:,:), allocatable :: tmpRange
-      integer(kind=intType), dimension(:,:,:), pointer :: oldRanges
+            allocate (tmpRange(nSub, 3, 2), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("splitBlockInitialization", &
+                               "Memory allocation failure for tmpRange")
 
-      logical, dimension(cgnsNDom) :: cgnsBlockFlagged
+            ! Split the block into the subblocks. It is possible that
+            ! the block is split into less subblocks than the desired
+            ! number. The actual number of subblocks is returned in nSub.
 
-      ! Determine the number of blocks per cgns block in cumulative
-      ! storage format. These values will serve as an offset to
-      ! determine the local subblock ID.
-      ! Initialize in the same loop cgnsBlockFlagged to .false.
+            call splitBlock(tmpBlock, nSub, nCellsEven, tmpRange)
 
-      nSubBlocks(0) = 0
-      do i=1,cgnsNDom
-         nSubBlocks(i) = nSubBlocks(i-1) + splitInfo(i)%nSubBlocks
-         cgnsBlockFlagged(i) = .false.
-      enddo
+            ! Allocate the memory for ranges.
 
-      ! Determine the number of cells and blocks per processor.
-      ! Note that part(i) contains the processor ID, which start at 0.
+            allocate (splitInfo(cgnsID)%ranges(nSub, 3, 2), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("splitBlockInitialization", &
+                               "Memory allocation failure for ranges")
 
-      nCell      = 0
-      nBlockProc = 0
+            ! Determine the new number of computational blocks. This is
+            ! the old number plus the number or extra blocks created
+            ! by the splitting.
 
-      do i=1,nBlocks
-         j = part(i) + 1
-         nCell(j)      = nCell(j) + blocks(i)%nCell
-         nBlockProc(j) = nBlockProc(j) + 1
-      enddo
+            nBlocks = nBlocks + nSub - splitInfo(cgnsID)%nSubBlocks
 
-      ! Put nBlockProc in cumulative storage format and store the
-      ! starting entries in tmp, which will be used as a counter to
-      ! put blockProc in the correct place.
+            ! Copy tmpRange into ranges of splitInfo.
 
-      do i=1,nProc
-         tmp(i)        = nBlockProc(i-1)
-         nBlockProc(i) = nBlockProc(i) + tmp(i)
-      enddo
+            splitInfo(cgnsID)%nSubBlocks = nSub
 
-      ! Determine the block ID's for every processor. Again note
-      ! that part(i) starts at 0; tmp is used as a counter variable.
+            do nn = 1, nSub
+                splitInfo(cgnsID)%ranges(nn, 1, 1) = tmpRange(nn, 1, 1)
+                splitInfo(cgnsID)%ranges(nn, 2, 1) = tmpRange(nn, 2, 1)
+                splitInfo(cgnsID)%ranges(nn, 3, 1) = tmpRange(nn, 3, 1)
 
-      do i=1,nBlocks
-         j                 = part(i) + 1
-         tmp(j)            = tmp(j) + 1
-         blockProc(tmp(j)) = i
-      enddo
+                splitInfo(cgnsID)%ranges(nn, 1, 2) = tmpRange(nn, 1, 2)
+                splitInfo(cgnsID)%ranges(nn, 2, 2) = tmpRange(nn, 2, 2)
+                splitInfo(cgnsID)%ranges(nn, 3, 2) = tmpRange(nn, 3, 2)
+            end do
 
-      ! Sort nCell in increasing order. Store the original values.
+            ! Deallocate the memory allocated for tmpBlock and tmpRange.
 
-      nCellOr   = nCell
-      nCellDiff = nProc
-      call qsortIntegers(nCell, nCellDiff)
+            deallocate (tmpBlock%BCType, tmpBlock%BCFaceID, &
+                        tmpBlock%inBeg, tmpBlock%inEnd, &
+                        tmpBlock%jnBeg, tmpBlock%jnEnd, &
+                        tmpBlock%knBeg, tmpBlock%knEnd, &
+                        tmpRange, stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("splitBlockInitialization", &
+                 "Deallocation failure for tmpBlock &
+                 &and tmpRange")
 
-      ! Determine the different number of cells per proc and store
-      ! the multiplicity.
+            ! Sort the subranges of this block in increasing order.
 
-      multNCell(0) = 0
-      multNCell(1) = 1
-      nCellDiff    = 1
+            call sortRangesSplitInfo(splitInfo(cgnsID))
 
-      do i=2,nProc
-         if(nCell(i) == nCell(nCellDiff)) then
-            multNCell(nCellDiff) = multNCell(nCellDiff) + 1
-         else
-            nCellDiff = nCellDiff + 1
-            multNCell(nCellDiff) = multNCell(nCellDiff-1) + 1
-            nCell(nCellDiff)     = nCell(i)
-         endif
-      enddo
+        end subroutine splitBlockInitialization
 
-      ! Set tmp to multNCell; it will serve as a counter to determine
-      ! the corresponding processor ID's of the sorted cell numbers
+        !===============================================================
 
-      do i=1,nCellDiff
-         tmp(i) = multNCell(i-1)
-      enddo
+        subroutine splitBlocksLoadBalance
+            !
+            !         splitBlocksLoadBalance splits some (sub)blocks even
+            !         further to obtain a better load balance.
+            !
+            use sorting, only: bsearchIntegers, qsortIntegers
+            implicit none
+            !
+            !        Local variables.
+            !
+            integer :: ierr
 
-      ! Determine the processor ID's corresponding to the sorted
-      ! number of cells. Note that the processor ID's start at 0.
+            integer(kind=intType) :: i, j, k, kk, jj
+            integer(kind=intType) :: nCellDiff, proc, nSub, cgnsID
+            integer(kind=intType) :: ncgnsSort
 
-      do i=1,nProc
-         j = bsearchIntegers(nCellOr(i), nCell(1:nCellDiff))
-         tmp(j) = tmp(j) + 1
-         procNCell(tmp(j)) = i - 1
-      enddo
+            integer(kind=intType), dimension(0:cgnsNDom) :: nSubBlocks
+            integer(kind=intType), dimension(cgnsNDom) :: cgnsSort
 
-      ! Loop to subdivide blocks. As nCell is sorted in increasing
-      ! order, the largest number of cells are at the end of this
-      ! array. Therefore this loop starts at the back.
+            integer(kind=intType), dimension(nProc) :: nCell, nCellOr
+            integer(kind=intType), dimension(nProc) :: tmp, procNCell
+            integer(kind=intType), dimension(0:nProc) :: nBlockProc
+            integer(kind=intType), dimension(0:nProc) :: multNCell
+            integer(kind=intType), dimension(nBlocks) :: blockProc
 
-      i = nCellDiff
-      ncgnsSort = 0
-      divisionLoop: do
+            integer(kind=intType), dimension(:, :, :), allocatable :: tmpRange
+            integer(kind=intType), dimension(:, :, :), pointer :: oldRanges
 
-         ! Condition to exit the loop. The number of cells per
-         ! processor is allowed in the load balance.
+            logical, dimension(cgnsNDom) :: cgnsBlockFlagged
 
-         if(nCell(i) <= nCellsUpper) exit
+            ! Determine the number of blocks per cgns block in cumulative
+            ! storage format. These values will serve as an offset to
+            ! determine the local subblock ID.
+            ! Initialize in the same loop cgnsBlockFlagged to .false.
 
-         ! Loop over the processors with this number of cells.
+            nSubBlocks(0) = 0
+            do i = 1, cgnsNDom
+                nSubBlocks(i) = nSubBlocks(i - 1) + splitInfo(i)%nSubBlocks
+                cgnsBlockFlagged(i) = .false.
+            end do
 
-         processorLoop: do j=(multNCell(i-1)+1), multNCell(i)
+            ! Determine the number of cells and blocks per processor.
+            ! Note that part(i) contains the processor ID, which start at 0.
 
-            ! Store the processor ID a bit easier.
+            nCell = 0
+            nBlockProc = 0
 
-            proc = procNCell(j)
+            do i = 1, nBlocks
+                j = part(i) + 1
+                nCell(j) = nCell(j) + blocks(i)%nCell
+                nBlockProc(j) = nBlockProc(j) + 1
+            end do
 
-            ! Determine the largest block of this processor, which will
-            ! be stored in index jj. As only the largest is needed a
-            ! linear search algorithm is okay.
+            ! Put nBlockProc in cumulative storage format and store the
+            ! starting entries in tmp, which will be used as a counter to
+            ! put blockProc in the correct place.
 
-            k  = nBlockProc(proc) + 1
-            jj = blockProc(k)
+            do i = 1, nProc
+                tmp(i) = nBlockProc(i - 1)
+                nBlockProc(i) = nBlockProc(i) + tmp(i)
+            end do
 
-            do k=(nBlockProc(proc) + 2),nBlockProc(proc+1)
-               kk = blockProc(k)
-               if(blocks(kk)%nCell > blocks(jj)%nCell) jj = kk
-            enddo
+            ! Determine the block ID's for every processor. Again note
+            ! that part(i) starts at 0; tmp is used as a counter variable.
 
-            ! Determine the local subblock number of computational block
-            ! jj in the original cgns block. This will be stored in kk.
+            do i = 1, nBlocks
+                j = part(i) + 1
+                tmp(j) = tmp(j) + 1
+                blockProc(tmp(j)) = i
+            end do
+
+            ! Sort nCell in increasing order. Store the original values.
+
+            nCellOr = nCell
+            nCellDiff = nProc
+            call qsortIntegers(nCell, nCellDiff)
+
+            ! Determine the different number of cells per proc and store
+            ! the multiplicity.
+
+            multNCell(0) = 0
+            multNCell(1) = 1
+            nCellDiff = 1
+
+            do i = 2, nProc
+                if (nCell(i) == nCell(nCellDiff)) then
+                    multNCell(nCellDiff) = multNCell(nCellDiff) + 1
+                else
+                    nCellDiff = nCellDiff + 1
+                    multNCell(nCellDiff) = multNCell(nCellDiff - 1) + 1
+                    nCell(nCellDiff) = nCell(i)
+                end if
+            end do
 
-            cgnsID = blocks(jj)%cgnsBlockID
-            kk     = jj - nSubBlocks(cgnsID-1)
+            ! Set tmp to multNCell; it will serve as a counter to determine
+            ! the corresponding processor ID's of the sorted cell numbers
 
-            ! Determine the number of cells, which should be split from
-            ! block jj, such that the load balance of processor proc
-            ! would be optimal (in terms of cells).
+            do i = 1, nCellDiff
+                tmp(i) = multNCell(i - 1)
+            end do
 
-            nCellDiff = nCell(i) - nCellsEven
+            ! Determine the processor ID's corresponding to the sorted
+            ! number of cells. Note that the processor ID's start at 0.
 
-            ! A very theoretical case would be that nCellDiff is larger
-            ! than the number of cells of block jj. This could occur,
-            ! because the partitioning is a multi-constraint
-            ! partitioning; however, it is not likely. In this case
-            ! nothing is done and the next processor is treated.
+            do i = 1, nProc
+                j = bsearchIntegers(nCellOr(i), nCell(1:nCellDiff))
+                tmp(j) = tmp(j) + 1
+                procNCell(tmp(j)) = i - 1
+            end do
 
-            if(nCellDiff >= blocks(jj)%nCell) cycle
+            ! Loop to subdivide blocks. As nCell is sorted in increasing
+            ! order, the largest number of cells are at the end of this
+            ! array. Therefore this loop starts at the back.
 
-            ! Determine the situation we are having here. If nCellDiff
-            ! is less or equal to the number of cells to obtain a good
-            ! load balance for the least loaded processor the block will
-            ! be split into 2. Test this.
+            i = nCellDiff
+            ncgnsSort = 0
+            divisionLoop: do
 
-            if(nCellDiff <= (nCellsUpper - nCell(1))) then
+                ! Condition to exit the loop. The number of cells per
+                ! processor is allowed in the load balance.
 
-               nSub = 2
+                if (nCell(i) <= nCellsUpper) exit
 
-            else
+                ! Loop over the processors with this number of cells.
 
-               ! nCellDiff is larger than the number of cells to create
-               ! a good load balance for the least loaded processor.
-               ! This probably means that the block should be split in
-               ! more than 2 subblocks.
-               ! Store the number of cells which should be kept in block
-               ! jj for an optimal load balance in nCellDiff.
+                processorLoop: do j = (multNCell(i - 1) + 1), multNCell(i)
 
-               nCellDiff = blocks(jj)%nCell - nCellDiff
+                    ! Store the processor ID a bit easier.
 
-               ! Determine the number of subblocks.
+                    proc = procNCell(j)
 
-               nSub = nint(real(blocks(jj)%nCell,realType) &
-                    /      real(nCellDiff,realType))
-               nSub = max(nSub,2_intType)
+                    ! Determine the largest block of this processor, which will
+                    ! be stored in index jj. As only the largest is needed a
+                    ! linear search algorithm is okay.
 
-            endif
+                    k = nBlockProc(proc) + 1
+                    jj = blockProc(k)
 
-            ! Allocate the memory for tmpRange, which will store the
-            ! block splitting of block jj.
+                    do k = (nBlockProc(proc) + 2), nBlockProc(proc + 1)
+                        kk = blockProc(k)
+                        if (blocks(kk)%nCell > blocks(jj)%nCell) jj = kk
+                    end do
 
-            allocate(tmpRange(nSub,3,2), stat=ierr)
-            if(ierr /= 0)                              &
-                 call terminate("splitBlocksLoadBalance", &
-                 "Memory allocation failure for tmpRange")
+                    ! Determine the local subblock number of computational block
+                    ! jj in the original cgns block. This will be stored in kk.
 
-            ! Split computational block into the desired number of
-            ! subblocks, if possible. The actual number of splittings
-            ! is returned in nSub.
+                    cgnsID = blocks(jj)%cgnsBlockID
+                    kk = jj - nSubBlocks(cgnsID - 1)
 
-            call splitBlock(blocks(jj), nSub, nCellDiff, tmpRange)
+                    ! Determine the number of cells, which should be split from
+                    ! block jj, such that the load balance of processor proc
+                    ! would be optimal (in terms of cells).
 
-            ! Store the old ranges, store the old number of subblocks
-            ! in jj, determine the new number of subblocks and allocate
-            ! the memory for ranges.
+                    nCellDiff = nCell(i) - nCellsEven
 
-            jj         = splitInfo(cgnsID)%nSubBlocks
-            oldRanges => splitInfo(cgnsID)%ranges
+                    ! A very theoretical case would be that nCellDiff is larger
+                    ! than the number of cells of block jj. This could occur,
+                    ! because the partitioning is a multi-constraint
+                    ! partitioning; however, it is not likely. In this case
+                    ! nothing is done and the next processor is treated.
 
-            splitInfo(cgnsID)%nSubBlocks = jj + nSub - 1
+                    if (nCellDiff >= blocks(jj)%nCell) cycle
 
-            k = splitInfo(cgnsID)%nSubBlocks
-            allocate(splitInfo(cgnsID)%ranges(k,3,2), stat=ierr)
-            if(ierr /= 0)                              &
-                 call terminate("splitBlocksLoadBalance", &
-                 "Memory allocation failure for ranges")
+                    ! Determine the situation we are having here. If nCellDiff
+                    ! is less or equal to the number of cells to obtain a good
+                    ! load balance for the least loaded processor the block will
+                    ! be split into 2. Test this.
 
-            ! Determine the new number of computational blocks.
-            ! This is the old number plus the number or extra blocks
-            ! created by the splitting.
+                    if (nCellDiff <= (nCellsUpper - nCell(1))) then
 
-            nBlocks = nBlocks + splitInfo(cgnsID)%nSubBlocks - jj
+                        nSub = 2
 
-            ! Copy the original info back into ranges.
+                    else
 
-            do k=1,jj
-               splitInfo(cgnsID)%ranges(k,1,1) = oldRanges(k,1,1)
-               splitInfo(cgnsID)%ranges(k,2,1) = oldRanges(k,2,1)
-               splitInfo(cgnsID)%ranges(k,3,1) = oldRanges(k,3,1)
+                        ! nCellDiff is larger than the number of cells to create
+                        ! a good load balance for the least loaded processor.
+                        ! This probably means that the block should be split in
+                        ! more than 2 subblocks.
+                        ! Store the number of cells which should be kept in block
+                        ! jj for an optimal load balance in nCellDiff.
 
-               splitInfo(cgnsID)%ranges(k,1,2) = oldRanges(k,1,2)
-               splitInfo(cgnsID)%ranges(k,2,2) = oldRanges(k,2,2)
-               splitInfo(cgnsID)%ranges(k,3,2) = oldRanges(k,3,2)
-            enddo
+                        nCellDiff = blocks(jj)%nCell - nCellDiff
 
-            ! The splitting kk has disappeared. Replace it by the
-            ! first splitting in tmpRange.
+                        ! Determine the number of subblocks.
 
-            splitInfo(cgnsID)%ranges(kk,1,1) = tmpRange(1,1,1)
-            splitInfo(cgnsID)%ranges(kk,2,1) = tmpRange(1,2,1)
-            splitInfo(cgnsID)%ranges(kk,3,1) = tmpRange(1,3,1)
+                        nSub = nint(real(blocks(jj)%nCell, realType) &
+                                    / real(nCellDiff, realType))
+                        nSub = max(nSub, 2_intType)
 
-            splitInfo(cgnsID)%ranges(kk,1,2) = tmpRange(1,1,2)
-            splitInfo(cgnsID)%ranges(kk,2,2) = tmpRange(1,2,2)
-            splitInfo(cgnsID)%ranges(kk,3,2) = tmpRange(1,3,2)
+                    end if
 
-            ! Add the other splittings to the end of ranges.
+                    ! Allocate the memory for tmpRange, which will store the
+                    ! block splitting of block jj.
 
-            do k=2,nSub
-               jj = jj + 1
-
-               splitInfo(cgnsID)%ranges(jj,1,1) = tmpRange(k,1,1)
-               splitInfo(cgnsID)%ranges(jj,2,1) = tmpRange(k,2,1)
-               splitInfo(cgnsID)%ranges(jj,3,1) = tmpRange(k,3,1)
-
-               splitInfo(cgnsID)%ranges(jj,1,2) = tmpRange(k,1,2)
-               splitInfo(cgnsID)%ranges(jj,2,2) = tmpRange(k,2,2)
-               splitInfo(cgnsID)%ranges(jj,3,2) = tmpRange(k,3,2)
-            enddo
-
-            ! Deallocate the memory of oldRanges and tmpRange.
-
-            deallocate(oldRanges, tmpRange, stat=ierr)
-            if(ierr /= 0)                                 &
-                 call terminate("splitBlocksLoadBalance", &
-                 "Deallocation failure for oldRanges and &
-                 &tmpRange")
+                    allocate (tmpRange(nSub, 3, 2), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("splitBlocksLoadBalance", &
+                                       "Memory allocation failure for tmpRange")
 
-            ! Store the cgns ID, if not already stored. In this way
-            ! it is known for which cgns blocks the subranges must be
-            ! sorted.
-
-            if(.not. cgnsBlockFlagged(cgnsID)) then
-               cgnsBlockFlagged(cgnsID) = .true.
-               ncgnsSort = ncgnsSort + 1
-               cgnsSort(ncgnsSort) = cgnsID
-            endif
+                    ! Split computational block into the desired number of
+                    ! subblocks, if possible. The actual number of splittings
+                    ! is returned in nSub.
 
-         enddo processorLoop
+                    call splitBlock(blocks(jj), nSub, nCellDiff, tmpRange)
 
-         ! Decrease the counter i for the next number of cells.
+                    ! Store the old ranges, store the old number of subblocks
+                    ! in jj, determine the new number of subblocks and allocate
+                    ! the memory for ranges.
 
-         i = i - 1
+                    jj = splitInfo(cgnsID)%nSubBlocks
+                    oldRanges => splitInfo(cgnsID)%ranges
 
-      enddo divisionLoop
+                    splitInfo(cgnsID)%nSubBlocks = jj + nSub - 1
 
-      ! Sort the subranges of the CGNS blocks that were split in this
-      ! routine in increasing order.
+                    k = splitInfo(cgnsID)%nSubBlocks
+                    allocate (splitInfo(cgnsID)%ranges(k, 3, 2), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("splitBlocksLoadBalance", &
+                                       "Memory allocation failure for ranges")
 
-      do i=1,ncgnsSort
-         call sortRangesSplitInfo(splitInfo(cgnsSort(i)))
-      enddo
+                    ! Determine the new number of computational blocks.
+                    ! This is the old number plus the number or extra blocks
+                    ! created by the splitting.
 
-    end subroutine splitBlocksLoadBalance
+                    nBlocks = nBlocks + splitInfo(cgnsID)%nSubBlocks - jj
 
-  end subroutine blockDistribution
+                    ! Copy the original info back into ranges.
 
-  subroutine initFlowDoms
-    !
-    !       initFlowDoms allocates the memory for flowDoms and initializes
-    !       its pointers to null pointers, such that they do not have
-    !       random targets.
-    !
-    use constants
-    use block, only : flowDoms, nDom
-    use inputIteration, only : nMGLevels, mgStartLevel
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : terminate, nullifyFlowDomPointers
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+                    do k = 1, jj
+                        splitInfo(cgnsID)%ranges(k, 1, 1) = oldRanges(k, 1, 1)
+                        splitInfo(cgnsID)%ranges(k, 2, 1) = oldRanges(k, 2, 1)
+                        splitInfo(cgnsID)%ranges(k, 3, 1) = oldRanges(k, 3, 1)
 
-    integer(kind=intType) :: i, j, k, nn
+                        splitInfo(cgnsID)%ranges(k, 1, 2) = oldRanges(k, 1, 2)
+                        splitInfo(cgnsID)%ranges(k, 2, 2) = oldRanges(k, 2, 2)
+                        splitInfo(cgnsID)%ranges(k, 3, 2) = oldRanges(k, 3, 2)
+                    end do
 
-    ! Allocate the memory for flowDoms. Set nn to the maximum of the
-    ! number of mg levels needed in the cycle and mg start level.
-    ! This is namely the amount of grid levels the solver needs.
+                    ! The splitting kk has disappeared. Replace it by the
+                    ! first splitting in tmpRange.
 
-    nn = max(nMGLevels, mgStartlevel)
-    allocate(flowDoms(nDom, nn, nTimeIntervalsSpectral), stat=ierr)
-    if(ierr /= 0)                    &
-         call terminate("initFlowDoms", &
-         "Memory allocation failure for flowDoms")
+                    splitInfo(cgnsID)%ranges(kk, 1, 1) = tmpRange(1, 1, 1)
+                    splitInfo(cgnsID)%ranges(kk, 2, 1) = tmpRange(1, 2, 1)
+                    splitInfo(cgnsID)%ranges(kk, 3, 1) = tmpRange(1, 3, 1)
 
-    ! Loop over all the blocks and initialize its pointers to the
-    ! null-pointer.
+                    splitInfo(cgnsID)%ranges(kk, 1, 2) = tmpRange(1, 1, 2)
+                    splitInfo(cgnsID)%ranges(kk, 2, 2) = tmpRange(1, 2, 2)
+                    splitInfo(cgnsID)%ranges(kk, 3, 2) = tmpRange(1, 3, 2)
 
-    do k=1,nTimeIntervalsSpectral
-       do j=1,nn
-          do i=1,nDom
-             call nullifyFlowDomPointers(i,j,k)
-          enddo
-       enddo
-    enddo
+                    ! Add the other splittings to the end of ranges.
 
-  end subroutine initFlowDoms
-  subroutine sortSubfaces(oldSubfaceID, blockID)
-    !
-    !       sortSubfaces sorts the boundary subfaces of the given block
-    !       such that viscous subfaces are numbered first, followed by
-    !       inviscid, etc.
-    !
-    use constants
-    use partitionMod, only : distributionBlockType
-    use sorting, only : qsortIntegers, bsearchIntegers
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType), dimension(*), intent(out) :: oldSubfaceID
-    type(distributionBlockType), intent(in)          :: blockID
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, ii, nDiff
+                    do k = 2, nSub
+                        jj = jj + 1
 
-    integer(kind=intType), dimension(blockID%nBocos) :: bcPrior
-    integer(kind=intType), dimension(blockID%nBocos) :: bcPriorSort
+                        splitInfo(cgnsID)%ranges(jj, 1, 1) = tmpRange(k, 1, 1)
+                        splitInfo(cgnsID)%ranges(jj, 2, 1) = tmpRange(k, 2, 1)
+                        splitInfo(cgnsID)%ranges(jj, 3, 1) = tmpRange(k, 3, 1)
+
+                        splitInfo(cgnsID)%ranges(jj, 1, 2) = tmpRange(k, 1, 2)
+                        splitInfo(cgnsID)%ranges(jj, 2, 2) = tmpRange(k, 2, 2)
+                        splitInfo(cgnsID)%ranges(jj, 3, 2) = tmpRange(k, 3, 2)
+                    end do
+
+                    ! Deallocate the memory of oldRanges and tmpRange.
+
+                    deallocate (oldRanges, tmpRange, stat=ierr)
+                    if (ierr /= 0) &
+                         call terminate("splitBlocksLoadBalance", &
+                         "Deallocation failure for oldRanges and &
+                         &tmpRange")
 
-    integer(kind=intType), dimension(0:blockID%nBocos) :: mult
+                    ! Store the cgns ID, if not already stored. In this way
+                    ! it is known for which cgns blocks the subranges must be
+                    ! sorted.
+
+                    if (.not. cgnsBlockFlagged(cgnsID)) then
+                        cgnsBlockFlagged(cgnsID) = .true.
+                        ncgnsSort = ncgnsSort + 1
+                        cgnsSort(ncgnsSort) = cgnsID
+                    end if
 
-    ! Loop over the boundary subfaces and determine the priorities.
-    ! Store the priorities in both bcPrior and bcPriorSort.
+                end do processorLoop
 
-    do i=1,blockID%nBocos
+                ! Decrease the counter i for the next number of cells.
 
-       select case (blockID%BCType(i))
+                i = i - 1
 
-       case (NSWallAdiabatic)
-          bcPrior(i) = 1
+            end do divisionLoop
 
-       case (NSWallIsothermal)
-          bcPrior(i) = 2
+            ! Sort the subranges of the CGNS blocks that were split in this
+            ! routine in increasing order.
 
-       case (EulerWall)
-          bcPrior(i) = 3
+            do i = 1, ncgnsSort
+                call sortRangesSplitInfo(splitInfo(cgnsSort(i)))
+            end do
 
-       case (Symm)
-          bcPrior(i) = 4
+        end subroutine splitBlocksLoadBalance
 
-       case (SymmPolar)
-          bcPrior(i) = 5
+    end subroutine blockDistribution
 
-       case (FarField)
-          bcPrior(i) = 6
+    subroutine initFlowDoms
+        !
+        !       initFlowDoms allocates the memory for flowDoms and initializes
+        !       its pointers to null pointers, such that they do not have
+        !       random targets.
+        !
+        use constants
+        use block, only: flowDoms, nDom
+        use inputIteration, only: nMGLevels, mgStartLevel
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: terminate, nullifyFlowDomPointers
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-       case (SupersonicInflow)
-          bcPrior(i) = 7
+        integer(kind=intType) :: i, j, k, nn
 
-       case (SubsonicInflow)
-          bcPrior(i) = 8
+        ! Allocate the memory for flowDoms. Set nn to the maximum of the
+        ! number of mg levels needed in the cycle and mg start level.
+        ! This is namely the amount of grid levels the solver needs.
 
-       case (SupersonicOutflow)
-          bcPrior(i) = 9
+        nn = max(nMGLevels, mgStartlevel)
+        allocate (flowDoms(nDom, nn, nTimeIntervalsSpectral), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("initFlowDoms", &
+                           "Memory allocation failure for flowDoms")
 
-       case (SubsonicOutflow)
-          bcPrior(i) = 10
+        ! Loop over all the blocks and initialize its pointers to the
+        ! null-pointer.
 
-       case (MassBleedInflow)
-          bcPrior(i) = 11
+        do k = 1, nTimeIntervalsSpectral
+            do j = 1, nn
+                do i = 1, nDom
+                    call nullifyFlowDomPointers(i, j, k)
+                end do
+            end do
+        end do
 
-       case (MassBleedOutflow)
-          bcPrior(i) = 12
+    end subroutine initFlowDoms
+    subroutine sortSubfaces(oldSubfaceID, blockID)
+        !
+        !       sortSubfaces sorts the boundary subfaces of the given block
+        !       such that viscous subfaces are numbered first, followed by
+        !       inviscid, etc.
+        !
+        use constants
+        use partitionMod, only: distributionBlockType
+        use sorting, only: qsortIntegers, bsearchIntegers
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType), dimension(*), intent(out) :: oldSubfaceID
+        type(distributionBlockType), intent(in) :: blockID
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, ii, nDiff
 
-       case (mDot)
-          bcPrior(i) = 13
+        integer(kind=intType), dimension(blockID%nBocos) :: bcPrior
+        integer(kind=intType), dimension(blockID%nBocos) :: bcPriorSort
 
-       case (bcThrust)
-          bcPrior(i) = 14
+        integer(kind=intType), dimension(0:blockID%nBocos) :: mult
 
-       case (Extrap)
-          bcPrior(i) = 15
+        ! Loop over the boundary subfaces and determine the priorities.
+        ! Store the priorities in both bcPrior and bcPriorSort.
 
-       case (SlidingInterface)
-          bcPrior(i) = 19
+        do i = 1, blockID%nBocos
 
-       case (OversetOuterBound)
-          bcPrior(i) = 20
+            select case (blockID%BCType(i))
 
-       case (DomainInterfaceAll)
-          bcPrior(i) = 21
+            case (NSWallAdiabatic)
+                bcPrior(i) = 1
 
-       case (DomainInterfaceRhoUVW)
-          bcPrior(i) = 22
+            case (NSWallIsothermal)
+                bcPrior(i) = 2
 
-       case (DomainInterfaceP)
-          bcPrior(i) = 23
+            case (EulerWall)
+                bcPrior(i) = 3
 
-       case (DomainInterfaceRho)
-          bcPrior(i) = 24
+            case (Symm)
+                bcPrior(i) = 4
 
-       case (DomainInterfaceTotal)
-          bcPrior(i) = 25
+            case (SymmPolar)
+                bcPrior(i) = 5
 
-       end select
+            case (FarField)
+                bcPrior(i) = 6
 
-       bcPriorSort(i) = bcPrior(i)
+            case (SupersonicInflow)
+                bcPrior(i) = 7
 
-    enddo
+            case (SubsonicInflow)
+                bcPrior(i) = 8
 
-    ! Sort bcPriorSort in increasing order.
+            case (SupersonicOutflow)
+                bcPrior(i) = 9
 
-    call qsortIntegers(bcPriorSort, blockID%nBocos)
+            case (SubsonicOutflow)
+                bcPrior(i) = 10
 
-    ! Get rid of the multiple entries and store the multiplicity in
-    ! cumulative storage format. nDiff contains the number of
-    ! different boundary conditions for this block. The initialization
-    ! with the min function is necessary to be able to treat blocks
-    ! without a boundary condition correctly.
+            case (MassBleedInflow)
+                bcPrior(i) = 11
 
-    nDiff       = min(1_intType, blockID%nBocos)
-    mult(0)     = 0
-    mult(nDiff) = 1
+            case (MassBleedOutflow)
+                bcPrior(i) = 12
 
-    do i=2,blockID%nBocos
-       if(bcPriorSort(i) == bcPriorSort(nDiff)) then
-          mult(nDiff) = mult(nDiff) + 1
-       else
-          nDiff              = nDiff + 1
-          mult(nDiff)        = mult(nDiff-1) + 1
-          bcPriorSort(nDiff) = bcPriorSort(i)
-       endif
-    enddo
+            case (mDot)
+                bcPrior(i) = 13
 
-    ! Determine the old subface ID by searching in the sorted
-    ! priorities.
+            case (bcThrust)
+                bcPrior(i) = 14
 
-    do i=1,blockID%nBocos
+            case (Extrap)
+                bcPrior(i) = 15
 
-       ii = bsearchIntegers(bcPrior(i), bcPriorSort(1:nDiff))
+            case (SlidingInterface)
+                bcPrior(i) = 19
 
-       ! Update the lower boundary in the multiplicity for this
-       ! boundary condition and store the entry a bit easier.
+            case (OversetOuterBound)
+                bcPrior(i) = 20
 
-       mult(ii-1) = mult(ii-1) + 1
-       ii         = mult(ii-1)
+            case (DomainInterfaceAll)
+                bcPrior(i) = 21
 
-       ! Store the mapping of the new to old subface id.
+            case (DomainInterfaceRhoUVW)
+                bcPrior(i) = 22
 
-       oldSubfaceID(ii) = i
+            case (DomainInterfaceP)
+                bcPrior(i) = 23
 
-    enddo
+            case (DomainInterfaceRho)
+                bcPrior(i) = 24
 
-  end subroutine sortSubfaces
+            case (DomainInterfaceTotal)
+                bcPrior(i) = 25
 
-  subroutine determineComputeBlocks(splitInfo)
-    !
-    !       determineComputeBlocks determines the computational blocks
-    !       from the original grid and the given information how to split
-    !       these blocks.
-    !
-    use constants
-    use cgnsGrid, only :cgnsnDom, cgnsDoms
-    use partitionMod, only : blocks, nBlocks, splitCGNSType
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    type(splitCGNSType), dimension(cgnsNDom), intent(in) :: splitInfo
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+            end select
 
-    integer(kind=intType) :: i, ii, jj, mm
-    integer(kind=intType) :: nx, ny, nz, nAlloc
+            bcPriorSort(i) = bcPrior(i)
 
-    integer(kind=intType), dimension(0:cgnsNDom) :: nSubPerCGNS
+        end do
 
-    ! Determine the number of subblocks per cgns block in
-    ! cumulative storage format.
+        ! Sort bcPriorSort in increasing order.
 
-    nSubPerCGNS(0) = 0
-    do i=1,cgnsNDom
-       nSubPerCGNS(i) = nSubPerCGNS(i-1) + splitInfo(i)%nSubblocks
-    enddo
+        call qsortIntegers(bcPriorSort, blockID%nBocos)
 
-    ! Check whether blocks are already allocated. If so, release
-    ! the memory.
-    if( allocated(blocks) ) then
+        ! Get rid of the multiple entries and store the multiplicity in
+        ! cumulative storage format. nDiff contains the number of
+        ! different boundary conditions for this block. The initialization
+        ! with the min function is necessary to be able to treat blocks
+        ! without a boundary condition correctly.
 
-       ! Loop over the number of old blocks and release the memory.
+        nDiff = min(1_intType, blockID%nBocos)
+        mult(0) = 0
+        mult(nDiff) = 1
 
-       mm = ubound(blocks,1)
-       do i = 1,mm
-          deallocate(blocks(i)%BCType,      blocks(i)%BCFaceID,   &
-               blocks(i)%cgnsSubface, blocks(i)%inBeg,      &
-               blocks(i)%jnBeg,       blocks(i)%knBeg,      &
-               blocks(i)%inEnd,       blocks(i)%jnEnd,      &
-               blocks(i)%knEnd,       blocks(i)%dinBeg,     &
-               blocks(i)%djnBeg,      blocks(i)%dknBeg,     &
-               blocks(i)%dinEnd,      blocks(i)%djnEnd,     &
-               blocks(i)%dknEnd,      blocks(i)%neighBlock, &
-               blocks(i)%l1,          blocks(i)%l2,         &
-               blocks(i)%l3,          blocks(i)%groupNum,   &
-               stat=ierr)
-          if(ierr /= 0) &
-               call terminate("determineComputeBlocks", &
-               "Deallocation error for subface info")
-       enddo
-
-       ! Release the memory of blocks itself.
-
-       deallocate(blocks, stat=ierr)
-       if(ierr /= 0) &
+        do i = 2, blockID%nBocos
+            if (bcPriorSort(i) == bcPriorSort(nDiff)) then
+                mult(nDiff) = mult(nDiff) + 1
+            else
+                nDiff = nDiff + 1
+                mult(nDiff) = mult(nDiff - 1) + 1
+                bcPriorSort(nDiff) = bcPriorSort(i)
+            end if
+        end do
+
+        ! Determine the old subface ID by searching in the sorted
+        ! priorities.
+
+        do i = 1, blockID%nBocos
+
+            ii = bsearchIntegers(bcPrior(i), bcPriorSort(1:nDiff))
+
+            ! Update the lower boundary in the multiplicity for this
+            ! boundary condition and store the entry a bit easier.
+
+            mult(ii - 1) = mult(ii - 1) + 1
+            ii = mult(ii - 1)
+
+            ! Store the mapping of the new to old subface id.
+
+            oldSubfaceID(ii) = i
+
+        end do
+
+    end subroutine sortSubfaces
+
+    subroutine determineComputeBlocks(splitInfo)
+        !
+        !       determineComputeBlocks determines the computational blocks
+        !       from the original grid and the given information how to split
+        !       these blocks.
+        !
+        use constants
+        use cgnsGrid, only: cgnsnDom, cgnsDoms
+        use partitionMod, only: blocks, nBlocks, splitCGNSType
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        type(splitCGNSType), dimension(cgnsNDom), intent(in) :: splitInfo
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, ii, jj, mm
+        integer(kind=intType) :: nx, ny, nz, nAlloc
+
+        integer(kind=intType), dimension(0:cgnsNDom) :: nSubPerCGNS
+
+        ! Determine the number of subblocks per cgns block in
+        ! cumulative storage format.
+
+        nSubPerCGNS(0) = 0
+        do i = 1, cgnsNDom
+            nSubPerCGNS(i) = nSubPerCGNS(i - 1) + splitInfo(i)%nSubblocks
+        end do
+
+        ! Check whether blocks are already allocated. If so, release
+        ! the memory.
+        if (allocated(blocks)) then
+
+            ! Loop over the number of old blocks and release the memory.
+
+            mm = ubound(blocks, 1)
+            do i = 1, mm
+                deallocate (blocks(i)%BCType, blocks(i)%BCFaceID, &
+                            blocks(i)%cgnsSubface, blocks(i)%inBeg, &
+                            blocks(i)%jnBeg, blocks(i)%knBeg, &
+                            blocks(i)%inEnd, blocks(i)%jnEnd, &
+                            blocks(i)%knEnd, blocks(i)%dinBeg, &
+                            blocks(i)%djnBeg, blocks(i)%dknBeg, &
+                            blocks(i)%dinEnd, blocks(i)%djnEnd, &
+                            blocks(i)%dknEnd, blocks(i)%neighBlock, &
+                            blocks(i)%l1, blocks(i)%l2, &
+                            blocks(i)%l3, blocks(i)%groupNum, &
+                            stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("determineComputeBlocks", &
+                                   "Deallocation error for subface info")
+            end do
+
+            ! Release the memory of blocks itself.
+
+            deallocate (blocks, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("determineComputeBlocks", &
+                               "Deallocation error for blocks")
+        end if
+
+        ! Allocate the memory for blocks.
+
+        allocate (blocks(nBlocks), stat=ierr)
+        if (ierr /= 0) &
             call terminate("determineComputeBlocks", &
-            "Deallocation error for blocks")
-    endif
-
-    ! Allocate the memory for blocks.
-
-    allocate(blocks(nBlocks), stat=ierr)
-    if(ierr /= 0) &
-         call terminate("determineComputeBlocks", &
-         "Memory allocation failure for blocks")
-
-    ! Set the counter ii for the global computational block number
-    ! and loop over the cgns blocks.
-
-    ii = 0
-    cgnsLoop: do i=1,cgnsNDom
-
-       ! Loop over the number of subblocks of this cgns block.
-
-       subblockLoop: do mm=1,splitInfo(i)%nSubblocks
-
-          ! Update the counter ii and store the number of cells in
-          ! the three directions in nx, ny and nz.
-
-          ii = ii + 1
-          nx = splitInfo(i)%ranges(mm,1,2) &
-               - splitInfo(i)%ranges(mm,1,1)
-          ny = splitInfo(i)%ranges(mm,2,2) &
-               - splitInfo(i)%ranges(mm,2,1)
-          nz = splitInfo(i)%ranges(mm,3,2) &
-               - splitInfo(i)%ranges(mm,3,1)
-
-          ! Initialize the scalar variables of blocks(ii).
-
-          blocks(ii)%nx = nx; blocks(ii)%il = nx + 1
-          blocks(ii)%ny = ny; blocks(ii)%jl = ny + 1
-          blocks(ii)%nz = nz; blocks(ii)%kl = nz + 1
-
-          blocks(ii)%ncell = nx*ny*nz
-          blocks(ii)%nface = (nx+1)*ny*nz + (ny+1)*nx*nz &
-               + (nz+1)*nx*ny
-
-          blocks(ii)%cgnsBlockID = i
-
-          blocks(ii)%iBegor = splitInfo(i)%ranges(mm,1,1)
-          blocks(ii)%jBegor = splitInfo(i)%ranges(mm,2,1)
-          blocks(ii)%kBegor = splitInfo(i)%ranges(mm,3,1)
-
-          blocks(ii)%iEndor = splitInfo(i)%ranges(mm,1,2)
-          blocks(ii)%jEndor = splitInfo(i)%ranges(mm,2,2)
-          blocks(ii)%kEndor = splitInfo(i)%ranges(mm,3,2)
-
-          blocks(ii)%nBocos   = 0
-          blocks(ii)%nSubface = 0
-          blocks(ii)%n1to1    = 0
-
-          ! Do an allocation for the subface info. NAlloc is such
-          ! that no reallocation is needed for the boundary info.
-
-          nAlloc = cgnsDoms(i)%nBocos + cgnsDoms(i)%n1to1
-
-          allocate(blocks(ii)%BCType(nAlloc),      &
-               blocks(ii)%BCFaceID(nAlloc),    &
-               blocks(ii)%cgnsSubface(nAlloc), &
-               blocks(ii)%inBeg(nAlloc),       &
-               blocks(ii)%jnBeg(nAlloc),       &
-               blocks(ii)%knBeg(nAlloc),       &
-               blocks(ii)%inEnd(nAlloc),       &
-               blocks(ii)%jnEnd(nAlloc),       &
-               blocks(ii)%knEnd(nAlloc),       &
-               blocks(ii)%dinBeg(nAlloc),      &
-               blocks(ii)%djnBeg(nAlloc),      &
-               blocks(ii)%dknBeg(nAlloc),      &
-               blocks(ii)%dinEnd(nAlloc),      &
-               blocks(ii)%djnEnd(nAlloc),      &
-               blocks(ii)%dknEnd(nAlloc),      &
-               blocks(ii)%neighBlock(nAlloc),  &
-               blocks(ii)%l1(nAlloc),          &
-               blocks(ii)%l2(nAlloc),          &
-               blocks(ii)%l3(nAlloc),          &
-               blocks(ii)%groupNum(nAlloc),    &
-               stat=ierr)
-          if(ierr /= 0) &
-               call terminate("determineComputeBlocks", &
-               "Memory allocation failure for &
-               &subface info")
-
-          ! Determine the boundary condition subfaces and the subfaces
-          ! of the subblock, which are located on the block boundaries
-          ! of the original cgns block.
-
-          jj = 0
-          call BCFacesSubblock(i, ii, jj)
-          blocks(ii)%nBocos = jj
-
-          call externalFacesSubblock(i, ii, jj, nSubPerCGNS, &
-               nAlloc, splitInfo)
-
-          ! Determine the subfaces of the subblock created by the
-          ! splitting of the original block.
-
-          call internalFacesSubblock(i, ii, jj, nSubPerCGNS, &
-               nAlloc, splitInfo(i))
-          blocks(ii)%nSubface = jj
-          blocks(ii)%n1to1    = jj - blocks(ii)%nBocos
-
-       enddo subblockLoop
-    enddo cgnsLoop
-
-  end subroutine determineComputeBlocks
-
-  !========================================================================
-
-  subroutine BCFacesSubblock(cgnsID, ii, jj)
-    !
-    !       BCFacesSubblock determines the boundary subfaces of compute
-    !       block ii, which is a subblock of the given cgns block.
-    !       Jj is the counter for the number of subfaces.
-    !
-    use cgnsGrid
-    use communication
-    use inputPhysics
-    use partitionMod
-    use utils, only : terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)    :: cgnsID, ii
-    integer(kind=intType), intent(inout) :: jj
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: j, mm
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    character(len=maxCGNSNameLen) :: zoneName, subName
-    character(len=2*maxStringLen) :: errorMessage
-
-    ! Loop over the physical boundaries of the original block.
-
-    bocoLoop: do j=1,cgnsDoms(cgnsID)%nBocos
-
-       ! Continue with the next boundary subface if this is a
-       ! degenerated subface.
-
-       if(.not. cgnsDoms(cgnsID)%bocoInfo(j)%actualFace) cycle
-
-       ! Store the subface range a bit easier. Make sure that the
-       ! indices run from low to high.
-
-       iBeg = min(cgnsDoms(cgnsID)%bocoInfo(j)%iBeg, &
-            cgnsDoms(cgnsID)%bocoInfo(j)%iEnd)
-       iEnd = max(cgnsDoms(cgnsID)%bocoInfo(j)%iBeg, &
-            cgnsDoms(cgnsID)%bocoInfo(j)%iEnd)
-
-       jBeg = min(cgnsDoms(cgnsID)%bocoInfo(j)%jBeg, &
-            cgnsDoms(cgnsID)%bocoInfo(j)%jEnd)
-       jEnd = max(cgnsDoms(cgnsID)%bocoInfo(j)%jBeg, &
-            cgnsDoms(cgnsID)%bocoInfo(j)%jEnd)
-
-       kBeg = min(cgnsDoms(cgnsID)%bocoInfo(j)%kBeg, &
-            cgnsDoms(cgnsID)%bocoInfo(j)%kEnd)
-       kEnd = max(cgnsDoms(cgnsID)%bocoInfo(j)%kBeg, &
-            cgnsDoms(cgnsID)%bocoInfo(j)%kEnd)
+                           "Memory allocation failure for blocks")
+
+        ! Set the counter ii for the global computational block number
+        ! and loop over the cgns blocks.
+
+        ii = 0
+        cgnsLoop: do i = 1, cgnsNDom
+
+            ! Loop over the number of subblocks of this cgns block.
+
+            subblockLoop: do mm = 1, splitInfo(i)%nSubblocks
+
+                ! Update the counter ii and store the number of cells in
+                ! the three directions in nx, ny and nz.
+
+                ii = ii + 1
+                nx = splitInfo(i)%ranges(mm, 1, 2) &
+                     - splitInfo(i)%ranges(mm, 1, 1)
+                ny = splitInfo(i)%ranges(mm, 2, 2) &
+                     - splitInfo(i)%ranges(mm, 2, 1)
+                nz = splitInfo(i)%ranges(mm, 3, 2) &
+                     - splitInfo(i)%ranges(mm, 3, 1)
+
+                ! Initialize the scalar variables of blocks(ii).
+
+                blocks(ii)%nx = nx; blocks(ii)%il = nx + 1
+                blocks(ii)%ny = ny; blocks(ii)%jl = ny + 1
+                blocks(ii)%nz = nz; blocks(ii)%kl = nz + 1
+
+                blocks(ii)%ncell = nx * ny * nz
+                blocks(ii)%nface = (nx + 1) * ny * nz + (ny + 1) * nx * nz &
+                                   + (nz + 1) * nx * ny
+
+                blocks(ii)%cgnsBlockID = i
+
+                blocks(ii)%iBegor = splitInfo(i)%ranges(mm, 1, 1)
+                blocks(ii)%jBegor = splitInfo(i)%ranges(mm, 2, 1)
+                blocks(ii)%kBegor = splitInfo(i)%ranges(mm, 3, 1)
+
+                blocks(ii)%iEndor = splitInfo(i)%ranges(mm, 1, 2)
+                blocks(ii)%jEndor = splitInfo(i)%ranges(mm, 2, 2)
+                blocks(ii)%kEndor = splitInfo(i)%ranges(mm, 3, 2)
+
+                blocks(ii)%nBocos = 0
+                blocks(ii)%nSubface = 0
+                blocks(ii)%n1to1 = 0
+
+                ! Do an allocation for the subface info. NAlloc is such
+                ! that no reallocation is needed for the boundary info.
+
+                nAlloc = cgnsDoms(i)%nBocos + cgnsDoms(i)%n1to1
+
+                allocate (blocks(ii)%BCType(nAlloc), &
+                          blocks(ii)%BCFaceID(nAlloc), &
+                          blocks(ii)%cgnsSubface(nAlloc), &
+                          blocks(ii)%inBeg(nAlloc), &
+                          blocks(ii)%jnBeg(nAlloc), &
+                          blocks(ii)%knBeg(nAlloc), &
+                          blocks(ii)%inEnd(nAlloc), &
+                          blocks(ii)%jnEnd(nAlloc), &
+                          blocks(ii)%knEnd(nAlloc), &
+                          blocks(ii)%dinBeg(nAlloc), &
+                          blocks(ii)%djnBeg(nAlloc), &
+                          blocks(ii)%dknBeg(nAlloc), &
+                          blocks(ii)%dinEnd(nAlloc), &
+                          blocks(ii)%djnEnd(nAlloc), &
+                          blocks(ii)%dknEnd(nAlloc), &
+                          blocks(ii)%neighBlock(nAlloc), &
+                          blocks(ii)%l1(nAlloc), &
+                          blocks(ii)%l2(nAlloc), &
+                          blocks(ii)%l3(nAlloc), &
+                          blocks(ii)%groupNum(nAlloc), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("determineComputeBlocks", &
+                     "Memory allocation failure for &
+                     &subface info")
+
+                ! Determine the boundary condition subfaces and the subfaces
+                ! of the subblock, which are located on the block boundaries
+                ! of the original cgns block.
+
+                jj = 0
+                call BCFacesSubblock(i, ii, jj)
+                blocks(ii)%nBocos = jj
+
+                call externalFacesSubblock(i, ii, jj, nSubPerCGNS, &
+                                           nAlloc, splitInfo)
+
+                ! Determine the subfaces of the subblock created by the
+                ! splitting of the original block.
+
+                call internalFacesSubblock(i, ii, jj, nSubPerCGNS, &
+                                           nAlloc, splitInfo(i))
+                blocks(ii)%nSubface = jj
+                blocks(ii)%n1to1 = jj - blocks(ii)%nBocos
+
+            end do subblockLoop
+        end do cgnsLoop
+
+    end subroutine determineComputeBlocks
+
+    !========================================================================
+
+    subroutine BCFacesSubblock(cgnsID, ii, jj)
+        !
+        !       BCFacesSubblock determines the boundary subfaces of compute
+        !       block ii, which is a subblock of the given cgns block.
+        !       Jj is the counter for the number of subfaces.
+        !
+        use cgnsGrid
+        use communication
+        use inputPhysics
+        use partitionMod
+        use utils, only: terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: cgnsID, ii
+        integer(kind=intType), intent(inout) :: jj
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: j, mm
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        character(len=maxCGNSNameLen) :: zoneName, subName
+        character(len=2*maxStringLen) :: errorMessage
+
+        ! Loop over the physical boundaries of the original block.
+
+        bocoLoop: do j = 1, cgnsDoms(cgnsID)%nBocos
+
+            ! Continue with the next boundary subface if this is a
+            ! degenerated subface.
+
+            if (.not. cgnsDoms(cgnsID)%bocoInfo(j)%actualFace) cycle
+
+            ! Store the subface range a bit easier. Make sure that the
+            ! indices run from low to high.
+
+            iBeg = min(cgnsDoms(cgnsID)%bocoInfo(j)%iBeg, &
+                       cgnsDoms(cgnsID)%bocoInfo(j)%iEnd)
+            iEnd = max(cgnsDoms(cgnsID)%bocoInfo(j)%iBeg, &
+                       cgnsDoms(cgnsID)%bocoInfo(j)%iEnd)
+
+            jBeg = min(cgnsDoms(cgnsID)%bocoInfo(j)%jBeg, &
+                       cgnsDoms(cgnsID)%bocoInfo(j)%jEnd)
+            jEnd = max(cgnsDoms(cgnsID)%bocoInfo(j)%jBeg, &
+                       cgnsDoms(cgnsID)%bocoInfo(j)%jEnd)
+
+            kBeg = min(cgnsDoms(cgnsID)%bocoInfo(j)%kBeg, &
+                       cgnsDoms(cgnsID)%bocoInfo(j)%kEnd)
+            kEnd = max(cgnsDoms(cgnsID)%bocoInfo(j)%kBeg, &
+                       cgnsDoms(cgnsID)%bocoInfo(j)%kEnd)
+
+            ! Check for a possible overlap between the current boundary
+            ! subface and subblock ii.
+
+            overlap: if (iBeg <= blocks(ii)%iEndor .and. &
+                         iEnd >= blocks(ii)%iBegor .and. &
+                         jBeg <= blocks(ii)%jEndor .and. &
+                         jEnd >= blocks(ii)%jBegor .and. &
+                         kBeg <= blocks(ii)%kEndor .and. &
+                         kEnd >= blocks(ii)%kBegor) then
+
+                ! Determine the overlap region between the current boundary
+                ! face and subblock ii.
+
+                iBeg = max(blocks(ii)%iBegor, iBeg)
+                iEnd = min(blocks(ii)%iEndor, iEnd)
 
-       ! Check for a possible overlap between the current boundary
-       ! subface and subblock ii.
+                jBeg = max(blocks(ii)%jBegor, jBeg)
+                jEnd = min(blocks(ii)%jEndor, jEnd)
+
+                kBeg = max(blocks(ii)%kBegor, kBeg)
+                kEnd = min(blocks(ii)%kEndor, kEnd)
 
-       overlap: if(iBeg <= blocks(ii)%iEndor .and. &
-            iEnd >= blocks(ii)%iBegor .and. &
-            jBeg <= blocks(ii)%jEndor .and. &
-            jEnd >= blocks(ii)%jBegor .and. &
-            kBeg <= blocks(ii)%kEndor .and. &
-            kEnd >= blocks(ii)%kBegor) then
+                ! Check the number of equal indices, which is stored in mm.
 
-          ! Determine the overlap region between the current boundary
-          ! face and subblock ii.
+                mm = 0
+                if (iBeg == iEnd) mm = mm + 1
+                if (jBeg == jEnd) mm = mm + 1
+                if (kBeg == kEnd) mm = mm + 1
 
-          iBeg = max(blocks(ii)%iBegor, iBeg)
-          iEnd = min(blocks(ii)%iEndor, iEnd)
+                ! If no constant index is found something is wrong with the
+                ! grid. Processor 0 prints an error message, while the
+                ! others wait until they are killed.
+
+                if (mm == 0) then
+                    if (myID == 0) then
+                        zoneName = cgnsDoms(cgnsID)%zoneName
+                        subName = cgnsDoms(cgnsID)%bocoInfo(j)%bocoName
+                        write (errorMessage, strings) "Zone ", trim(zoneName), ", boundary subface ", trim(subName), &
+                            ": No constant index found for subface"
+                        call terminate("BCFacesSubblock", errorMessage)
+                    end if
+
+                    call mpi_barrier(ADflow_comm_world, ierr)
+                end if
+
+                ! Continue with the next subface if there is more than
+                ! one constant index. This means that there is no overlap,
+                ! but just an adjacency.
+
+                if (mm > 1) cycle
+
+                ! Update the counter jj and determine the range of the
+                ! subface in the subblock.
 
-          jBeg = max(blocks(ii)%jBegor, jBeg)
-          jEnd = min(blocks(ii)%jEndor, jEnd)
+                jj = jj + 1
 
-          kBeg = max(blocks(ii)%kBegor, kBeg)
-          kEnd = min(blocks(ii)%kEndor, kEnd)
+                blocks(ii)%inBeg(jj) = iBeg - blocks(ii)%iBegor + 1
+                blocks(ii)%inEnd(jj) = iEnd - blocks(ii)%iBegor + 1
 
-          ! Check the number of equal indices, which is stored in mm.
+                blocks(ii)%jnBeg(jj) = jBeg - blocks(ii)%jBegor + 1
+                blocks(ii)%jnEnd(jj) = jEnd - blocks(ii)%jBegor + 1
 
-          mm = 0
-          if(iBeg == iEnd) mm = mm + 1
-          if(jBeg == jEnd) mm = mm + 1
-          if(kBeg == kEnd) mm = mm + 1
+                blocks(ii)%knBeg(jj) = kBeg - blocks(ii)%kBegor + 1
+                blocks(ii)%knEnd(jj) = kEnd - blocks(ii)%kBegor + 1
 
-          ! If no constant index is found something is wrong with the
-          ! grid. Processor 0 prints an error message, while the
-          ! others wait until they are killed.
+                ! Determine the block face id on which this subface
+                ! is located.
 
-          if(mm == 0) then
-             if(myID == 0) then
-                zoneName = cgnsDoms(cgnsID)%zoneName
-                subName  = cgnsDoms(cgnsID)%bocoInfo(j)%bocoName
-                write(errorMessage, strings) "Zone ", trim(zoneName), ", boundary subface ", trim(subName), &
-                  ": No constant index found for subface"
-                call terminate("BCFacesSubblock", errorMessage)
-             endif
+                if (iBeg == iEnd) then
 
-             call mpi_barrier(ADflow_comm_world, ierr)
-          endif
+                    blocks(ii)%BCFaceID(jj) = iMax
+                    if (iBeg == blocks(ii)%iBegor) blocks(ii)%BCFaceID(jj) = iMin
 
-          ! Continue with the next subface if there is more than
-          ! one constant index. This means that there is no overlap,
-          ! but just an adjacency.
+                else if (jBeg == jEnd) then
 
-          if(mm > 1) cycle
+                    blocks(ii)%BCFaceID(jj) = jMax
+                    if (jBeg == blocks(ii)%jBegor) blocks(ii)%BCFaceID(jj) = jMin
 
-          ! Update the counter jj and determine the range of the
-          ! subface in the subblock.
+                else
 
-          jj = jj + 1
+                    blocks(ii)%BCFaceID(jj) = kMax
+                    if (kBeg == blocks(ii)%kBegor) blocks(ii)%BCFaceID(jj) = kMin
 
-          blocks(ii)%inBeg(jj) = iBeg - blocks(ii)%iBegor + 1
-          blocks(ii)%inEnd(jj) = iEnd - blocks(ii)%iBegor + 1
+                end if
 
-          blocks(ii)%jnBeg(jj) = jBeg - blocks(ii)%jBegor + 1
-          blocks(ii)%jnEnd(jj) = jEnd - blocks(ii)%jBegor + 1
+                ! Set some variables to 0, which are not relevant
+                ! for boundary subfaces.
 
-          blocks(ii)%knBeg(jj) = kBeg - blocks(ii)%kBegor + 1
-          blocks(ii)%knEnd(jj) = kEnd - blocks(ii)%kBegor + 1
+                blocks(ii)%dinBeg(jj) = 0; blocks(ii)%dinEnd(jj) = 0
+                blocks(ii)%djnBeg(jj) = 0; blocks(ii)%djnEnd(jj) = 0
+                blocks(ii)%dknBeg(jj) = 0; blocks(ii)%dknEnd(jj) = 0
 
-          ! Determine the block face id on which this subface
-          ! is located.
+                blocks(ii)%neighBlock(jj) = 0
 
-          if(iBeg == iEnd) then
+                blocks(ii)%l1(jj) = 0
+                blocks(ii)%l2(jj) = 0
+                blocks(ii)%l3(jj) = 0
 
-             blocks(ii)%BCFaceID(jj) = iMax
-             if(iBeg == blocks(ii)%iBegor) blocks(ii)%BCFaceID(jj) = iMin
+                ! Set the boundary condition and store to which original cgns
+                ! subface this subface belongs.
 
-          else if(jBeg == jEnd) then
+                blocks(ii)%BCType(jj) = cgnsDoms(cgnsID)%bocoInfo(j)%BCType
 
-             blocks(ii)%BCFaceID(jj) = jMax
-             if(jBeg == blocks(ii)%jBegor) blocks(ii)%BCFaceID(jj) = jMin
+                blocks(ii)%cgnsSubface(jj) = j
 
-          else
+                ! Check whether this is a valid boundary condition for
+                ! the current simulation.
+
+                if (blocks(ii)%BCType(jj) == BCNotValid) then
+
+                    ! To avoid a messy output only processor 0 calls
+                    ! terminate. The other processors will wait until
+                    ! they are killed.
+
+                    if (myID == 0) then
+                        zoneName = cgnsDoms(cgnsID)%zoneName
+                        subName = cgnsDoms(cgnsID)%bocoInfo(j)%bocoName
+
+                        ! Check whether this is an internal or an external
+                        ! flow problem and create the error message
+                        ! accordingly.
+
+                        if (flowType == internalFlow) then
+                          write (errorMessage, strings) "Zone ", trim(zoneName), ", boundary subface ", trim(subName), &
+                                ": Not a valid boundary condition for internal flow"
+                        else
+                          write (errorMessage, strings) "Zone ", trim(zoneName), ", boundary subface ", trim(subName), &
+                                ": Not a valid boundary condition for external flow"
+                        end if
+
+                        call terminate("BCFacesSubblock", errorMessage)
+                    end if
+
+                    call mpi_barrier(ADflow_comm_world, ierr)
+                end if
+
+                ! Store the corresponding family a bit easier.
+
+                mm = cgnsDoms(cgnsID)%bocoInfo(j)%familyID
+
+                ! Check if this is either a sliding mesh interface or a
+                ! bleed flow region. If so the group nummer is set to the
+                ! sliding interface ID or bleed flow region ID respectivily.
+                ! Otherwise the group nummer is the family nummer, which
+                ! is 0 if the subface does not belong to a family.
+
+                select case (blocks(ii)%BCType(jj))
+                case (SlidingInterface)
+                    blocks(ii)%groupNum(jj) = &
+                        cgnsDoms(cgnsID)%bocoInfo(j)%slidingID
+
+                case (MassBleedInflow, MassBleedOutflow)
+                    blocks(ii)%groupNum(jj) = cgnsFamilies(mm)%bleedRegionID
+
+                case default
+                    blocks(ii)%groupNum(jj) = mm
+                end select
 
-             blocks(ii)%BCFaceID(jj) = kMax
-             if(kBeg == blocks(ii)%kBegor) blocks(ii)%BCFaceID(jj) = kMin
+            end if overlap
 
-          endif
+        end do bocoLoop
 
-          ! Set some variables to 0, which are not relevant
-          ! for boundary subfaces.
+    end subroutine BCFacesSubblock
 
-          blocks(ii)%dinBeg(jj) = 0; blocks(ii)%dinEnd(jj) = 0
-          blocks(ii)%djnBeg(jj) = 0; blocks(ii)%djnEnd(jj) = 0
-          blocks(ii)%dknBeg(jj) = 0; blocks(ii)%dknEnd(jj) = 0
+    !========================================================================
 
-          blocks(ii)%neighBlock(jj) = 0
+    subroutine externalFacesSubblock(cgnsID, ii, jj, nSubPerCGNS, &
+                                     nAlloc, splitInfo)
+        !
+        !       externalFacesSubblock determines the block boundaries of
+        !       the compute block ii which are located on the boundaries of
+        !       the given original cgns block. As it is possible that due to
+        !       a splitting of a neighboring block the number of block
+        !       boundaries is larger than the original number, it must be
+        !       checked whether enough memory has been allocated.
+        !       jj is the counter for the number of subfaces.
+        !
+        use cgnsGrid
+        use communication
+        use partitionMod
+        use utils, only: delta, terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType), intent(in) :: cgnsID, ii
+        integer(kind=intType), intent(inout) :: jj, nAlloc
 
-          blocks(ii)%l1(jj) = 0
-          blocks(ii)%l2(jj) = 0
-          blocks(ii)%l3(jj) = 0
+        integer(kind=intType), dimension(0:cgnsNDom), intent(in) :: &
+            nSubPerCGNS
+        type(splitCGNSType), dimension(cgnsNDom), intent(in) :: splitInfo
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-          ! Set the boundary condition and store to which original cgns
-          ! subface this subface belongs.
+        integer(kind=intType) :: j, k, kk, mm
+        integer(kind=intType) :: l1, L2, l3
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+        integer(kind=intType) :: diBeg, diEnd, djBeg, djEnd
+        integer(kind=intType) :: dkBeg, dkEnd
 
-          blocks(ii)%BCType(jj) = cgnsDoms(cgnsID)%bocoInfo(j)%BCType
+        integer(kind=intType), dimension(3, 3) :: trMat
 
-          blocks(ii)%cgnsSubface(jj) = j
+        integer(kind=intType), dimension(:, :, :), pointer :: ranges
+
+        character(len=maxCGNSNameLen) :: zoneName, subName
+        character(len=2*maxStringLen) :: errorMessage
+
+        logical :: diSwap, djSwap, dkSwap
+        !
+        ! Loop over the 1 to 1 block boundaries of the original block.
+
+        n1to1Loop: do j = 1, cgnsDoms(cgnsID)%n1to1
+
+            ! Store the subface range a bit easier. Make sure that the
+            ! indices run from low to high.
+
+            iBeg = min(cgnsDoms(cgnsID)%conn1to1(j)%iBeg, &
+                       cgnsDoms(cgnsID)%conn1to1(j)%iEnd)
+            iEnd = max(cgnsDoms(cgnsID)%conn1to1(j)%iBeg, &
+                       cgnsDoms(cgnsID)%conn1to1(j)%iEnd)
+
+            jBeg = min(cgnsDoms(cgnsID)%conn1to1(j)%jBeg, &
+                       cgnsDoms(cgnsID)%conn1to1(j)%jEnd)
+            jEnd = max(cgnsDoms(cgnsID)%conn1to1(j)%jBeg, &
+                       cgnsDoms(cgnsID)%conn1to1(j)%jEnd)
 
-          ! Check whether this is a valid boundary condition for
-          ! the current simulation.
+            kBeg = min(cgnsDoms(cgnsID)%conn1to1(j)%kBeg, &
+                       cgnsDoms(cgnsID)%conn1to1(j)%kEnd)
+            kEnd = max(cgnsDoms(cgnsID)%conn1to1(j)%kBeg, &
+                       cgnsDoms(cgnsID)%conn1to1(j)%kEnd)
+
+            ! Check for a possible overlap between the current boundary
+            ! subface and subblock ii.
+
+            overlap: if (iBeg <= blocks(ii)%iEndor .and. &
+                         iEnd >= blocks(ii)%iBegor .and. &
+                         jBeg <= blocks(ii)%jEndor .and. &
+                         jEnd >= blocks(ii)%jBegor .and. &
+                         kBeg <= blocks(ii)%kEndor .and. &
+                         kEnd >= blocks(ii)%kBegor) then
+
+                ! Determine the overlap region between the current boundary
+                ! face and subblock ii.
+
+                iBeg = max(blocks(ii)%iBegor, iBeg)
+                iEnd = min(blocks(ii)%iEndor, iEnd)
+
+                jBeg = max(blocks(ii)%jBegor, jBeg)
+                jEnd = min(blocks(ii)%jEndor, jEnd)
+
+                kBeg = max(blocks(ii)%kBegor, kBeg)
+                kEnd = min(blocks(ii)%kEndor, kEnd)
+
+                ! Check the number of equal indices, which is stored in kk.
 
-          if(blocks(ii)%BCType(jj) == BCNotValid) then
+                kk = 0
+                if (iBeg == iEnd) kk = kk + 1
+                if (jBeg == jEnd) kk = kk + 1
+                if (kBeg == kEnd) kk = kk + 1
+
+                ! If no constant index is found something is wrong with the
+                ! grid. Processor 0 prints an error message, while the
+                ! others wait until they are killed.
+
+                if (kk == 0) then
+                    if (myID == 0) then
+                        zoneName = cgnsDoms(cgnsID)%zoneName
+                        subName = cgnsDoms(cgnsID)%bocoInfo(j)%bocoName
+                 write (errorMessage, strings) "Zone ", trim(zoneName), ", 1 to 1 block connectivity ", trim(subName), &
+                            ": No constant index found for subface"
+                        call terminate("externalFacesSubblock", errorMessage)
+                    end if
+
+                    call mpi_barrier(ADflow_comm_world, ierr)
+                end if
+
+                ! Continue with the next subface if there is more than
+                ! one constant index. This means that there is no overlap,
+                ! but just an adjacency.
+
+                if (kk > 1) cycle
+
+                ! Preserve negative running indices of the subface.
+
+                if (cgnsDoms(cgnsID)%conn1to1(j)%iEnd < &
+                    cgnsDoms(cgnsID)%conn1to1(j)%iBeg) then
+                    mm = iBeg
+                    iBeg = iEnd
+                    iEnd = mm
+                end if
+
+                if (cgnsDoms(cgnsID)%conn1to1(j)%jEnd < &
+                    cgnsDoms(cgnsID)%conn1to1(j)%jBeg) then
+                    mm = jBeg
+                    jBeg = jEnd
+                    jEnd = mm
+                end if
+
+                if (cgnsDoms(cgnsID)%conn1to1(j)%kEnd < &
+                    cgnsDoms(cgnsID)%conn1to1(j)%kBeg) then
+                    mm = kBeg
+                    kBeg = kEnd
+                    kEnd = mm
+                end if
+
+                ! Determine the transformation matrix between the
+                ! current subface and the donor subface.
+
+                l1 = cgnsDoms(cgnsID)%conn1to1(j)%l1
+                L2 = cgnsDoms(cgnsID)%conn1to1(j)%l2
+                l3 = cgnsDoms(cgnsID)%conn1to1(j)%l3
+
+                trMat(1, 1) = sign(1_intType, l1) * delta(l1, 1_intType)
+                trMat(2, 1) = sign(1_intType, l1) * delta(l1, 2_intType)
+                trMat(3, 1) = sign(1_intType, l1) * delta(l1, 3_intType)
+
+                trMat(1, 2) = sign(1_intType, l2) * delta(l2, 1_intType)
+                trMat(2, 2) = sign(1_intType, l2) * delta(l2, 2_intType)
+                trMat(3, 2) = sign(1_intType, l2) * delta(l2, 3_intType)
+
+                trMat(1, 3) = sign(1_intType, l3) * delta(l3, 1_intType)
+                trMat(2, 3) = sign(1_intType, l3) * delta(l3, 2_intType)
+                trMat(3, 3) = sign(1_intType, l3) * delta(l3, 3_intType)
+
+                ! Determine the corresponding donor range of the subface
+                ! iBeg, iEnd; jBeg, jEnd; kBeg, kEnd.
 
-             ! To avoid a messy output only processor 0 calls
-             ! terminate. The other processors will wait until
-             ! they are killed.
+                l1 = iBeg - cgnsDoms(cgnsID)%conn1to1(j)%iBeg
+                L2 = jBeg - cgnsDoms(cgnsID)%conn1to1(j)%jBeg
+                l3 = kBeg - cgnsDoms(cgnsID)%conn1to1(j)%kBeg
 
-             if(myID == 0) then
-                zoneName = cgnsDoms(cgnsID)%zoneName
-                subName  = cgnsDoms(cgnsID)%bocoInfo(j)%bocoName
+                diBeg = cgnsDoms(cgnsID)%conn1to1(j)%diBeg &
+                        + trMat(1, 1) * l1 + trMat(1, 2) * l2 + trMat(1, 3) * l3
+                djBeg = cgnsDoms(cgnsID)%conn1to1(j)%djBeg &
+                        + trMat(2, 1) * l1 + trMat(2, 2) * l2 + trMat(2, 3) * l3
+                dkBeg = cgnsDoms(cgnsID)%conn1to1(j)%dkBeg &
+                        + trMat(3, 1) * l1 + trMat(3, 2) * l2 + trMat(3, 3) * l3
 
-                ! Check whether this is an internal or an external
-                ! flow problem and create the error message
-                ! accordingly.
+                l1 = iEnd - cgnsDoms(cgnsID)%conn1to1(j)%iBeg
+                L2 = jEnd - cgnsDoms(cgnsID)%conn1to1(j)%jBeg
+                l3 = kEnd - cgnsDoms(cgnsID)%conn1to1(j)%kBeg
 
-                if(flowType == internalFlow) then
-                   write(errorMessage, strings) "Zone ", trim(zoneName), ", boundary subface ", trim(subName), &
-                     ": Not a valid boundary condition for internal flow"
-                else
-                  write(errorMessage, strings) "Zone ", trim(zoneName), ", boundary subface ", trim(subName), &
-                    ": Not a valid boundary condition for external flow"
-                endif
-
-                call terminate("BCFacesSubblock", errorMessage)
-             endif
-
-             call mpi_barrier(ADflow_comm_world, ierr)
-          endif
-
-          ! Store the corresponding family a bit easier.
-
-          mm = cgnsDoms(cgnsID)%bocoInfo(j)%familyID
-
-          ! Check if this is either a sliding mesh interface or a
-          ! bleed flow region. If so the group nummer is set to the
-          ! sliding interface ID or bleed flow region ID respectivily.
-          ! Otherwise the group nummer is the family nummer, which
-          ! is 0 if the subface does not belong to a family.
-
-          select case (blocks(ii)%BCType(jj))
-          case (SlidingInterface)
-             blocks(ii)%groupNum(jj) = &
-                  cgnsDoms(cgnsID)%bocoInfo(j)%slidingID
-
-          case (MassBleedInflow, MassBleedOutflow)
-             blocks(ii)%groupNum(jj) = cgnsFamilies(mm)%bleedRegionID
-
-          case default
-             blocks(ii)%groupNum(jj) = mm
-          end select
-
-       endif overlap
-
-    enddo bocoLoop
-
-  end subroutine BCFacesSubblock
-
-  !========================================================================
-
-  subroutine externalFacesSubblock(cgnsID, ii, jj, nSubPerCGNS, &
-       nAlloc, splitInfo)
-    !
-    !       externalFacesSubblock determines the block boundaries of
-    !       the compute block ii which are located on the boundaries of
-    !       the given original cgns block. As it is possible that due to
-    !       a splitting of a neighboring block the number of block
-    !       boundaries is larger than the original number, it must be
-    !       checked whether enough memory has been allocated.
-    !       jj is the counter for the number of subfaces.
-    !
-    use cgnsGrid
-    use communication
-    use partitionMod
-    use utils, only : delta, terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType), intent(in)    :: cgnsID, ii
-    integer(kind=intType), intent(inout) :: jj, nAlloc
-
-    integer(kind=intType), dimension(0:cgnsNDom), intent(in) :: &
-         nSubPerCGNS
-    type(splitCGNSType), dimension(cgnsNDom), intent(in) :: splitInfo
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: j, k, kk, mm
-    integer(kind=intType) :: l1, L2, l3
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-    integer(kind=intType) :: diBeg, diEnd, djBeg, djEnd
-    integer(kind=intType) :: dkBeg, dkEnd
-
-    integer(kind=intType), dimension(3,3) :: trMat
-
-    integer(kind=intType), dimension(:,:,:), pointer :: ranges
-
-    character(len=maxCGNSNameLen) :: zoneName, subName
-    character(len=2*maxStringLen) :: errorMessage
-
-    logical :: diSwap, djSwap, dkSwap
-    !
-    ! Loop over the 1 to 1 block boundaries of the original block.
-
-    n1to1Loop: do j=1,cgnsDoms(cgnsID)%n1to1
-
-       ! Store the subface range a bit easier. Make sure that the
-       ! indices run from low to high.
-
-       iBeg = min(cgnsDoms(cgnsID)%conn1to1(j)%iBeg, &
-            cgnsDoms(cgnsID)%conn1to1(j)%iEnd)
-       iEnd = max(cgnsDoms(cgnsID)%conn1to1(j)%iBeg, &
-            cgnsDoms(cgnsID)%conn1to1(j)%iEnd)
-
-       jBeg = min(cgnsDoms(cgnsID)%conn1to1(j)%jBeg, &
-            cgnsDoms(cgnsID)%conn1to1(j)%jEnd)
-       jEnd = max(cgnsDoms(cgnsID)%conn1to1(j)%jBeg, &
-            cgnsDoms(cgnsID)%conn1to1(j)%jEnd)
-
-       kBeg = min(cgnsDoms(cgnsID)%conn1to1(j)%kBeg, &
-            cgnsDoms(cgnsID)%conn1to1(j)%kEnd)
-       kEnd = max(cgnsDoms(cgnsID)%conn1to1(j)%kBeg, &
-            cgnsDoms(cgnsID)%conn1to1(j)%kEnd)
-
-       ! Check for a possible overlap between the current boundary
-       ! subface and subblock ii.
-
-       overlap: if(iBeg <= blocks(ii)%iEndor .and. &
-            iEnd >= blocks(ii)%iBegor .and. &
-            jBeg <= blocks(ii)%jEndor .and. &
-            jEnd >= blocks(ii)%jBegor .and. &
-            kBeg <= blocks(ii)%kEndor .and. &
-            kEnd >= blocks(ii)%kBegor) then
-
-          ! Determine the overlap region between the current boundary
-          ! face and subblock ii.
-
-          iBeg = max(blocks(ii)%iBegor, iBeg)
-          iEnd = min(blocks(ii)%iEndor, iEnd)
-
-          jBeg = max(blocks(ii)%jBegor, jBeg)
-          jEnd = min(blocks(ii)%jEndor, jEnd)
-
-          kBeg = max(blocks(ii)%kBegor, kBeg)
-          kEnd = min(blocks(ii)%kEndor, kEnd)
-
-          ! Check the number of equal indices, which is stored in kk.
-
-          kk = 0
-          if(iBeg == iEnd) kk = kk + 1
-          if(jBeg == jEnd) kk = kk + 1
-          if(kBeg == kEnd) kk = kk + 1
-
-          ! If no constant index is found something is wrong with the
-          ! grid. Processor 0 prints an error message, while the
-          ! others wait until they are killed.
-
-          if(kk == 0) then
-             if(myID == 0) then
-                zoneName = cgnsDoms(cgnsID)%zoneName
-                subName  = cgnsDoms(cgnsID)%bocoInfo(j)%bocoName
-                write(errorMessage, strings) "Zone ", trim(zoneName), ", 1 to 1 block connectivity ", trim(subName), &
-                  ": No constant index found for subface"
-                call terminate("externalFacesSubblock", errorMessage)
-             endif
-
-             call mpi_barrier(ADflow_comm_world, ierr)
-          endif
-
-          ! Continue with the next subface if there is more than
-          ! one constant index. This means that there is no overlap,
-          ! but just an adjacency.
-
-          if(kk > 1) cycle
-
-          ! Preserve negative running indices of the subface.
-
-          if(cgnsDoms(cgnsID)%conn1to1(j)%iEnd < &
-               cgnsDoms(cgnsID)%conn1to1(j)%iBeg) then
-             mm   = iBeg
-             iBeg = iEnd
-             iEnd = mm
-          endif
-
-          if(cgnsDoms(cgnsID)%conn1to1(j)%jEnd < &
-               cgnsDoms(cgnsID)%conn1to1(j)%jBeg) then
-             mm   = jBeg
-             jBeg = jEnd
-             jEnd = mm
-          endif
+                diEnd = cgnsDoms(cgnsID)%conn1to1(j)%diBeg &
+                        + trMat(1, 1) * l1 + trMat(1, 2) * l2 + trMat(1, 3) * l3
+                djEnd = cgnsDoms(cgnsID)%conn1to1(j)%djBeg &
+                        + trMat(2, 1) * l1 + trMat(2, 2) * l2 + trMat(2, 3) * l3
+                dkEnd = cgnsDoms(cgnsID)%conn1to1(j)%dkBeg &
+                        + trMat(3, 1) * l1 + trMat(3, 2) * l2 + trMat(3, 3) * l3
 
-          if(cgnsDoms(cgnsID)%conn1to1(j)%kEnd < &
-               cgnsDoms(cgnsID)%conn1to1(j)%kBeg) then
-             mm   = kBeg
-             kBeg = kEnd
-             kEnd = mm
-          endif
-
-          ! Determine the transformation matrix between the
-          ! current subface and the donor subface.
-
-          l1 = cgnsDoms(cgnsID)%conn1to1(j)%l1
-          L2 = cgnsDoms(cgnsID)%conn1to1(j)%l2
-          l3 = cgnsDoms(cgnsID)%conn1to1(j)%l3
-
-          trMat(1,1) = sign(1_intType,l1) * delta(l1,1_intType)
-          trMat(2,1) = sign(1_intType,l1) * delta(l1,2_intType)
-          trMat(3,1) = sign(1_intType,l1) * delta(l1,3_intType)
-
-          trMat(1,2) = sign(1_intType,l2) * delta(l2,1_intType)
-          trMat(2,2) = sign(1_intType,l2) * delta(l2,2_intType)
-          trMat(3,2) = sign(1_intType,l2) * delta(l2,3_intType)
-
-          trMat(1,3) = sign(1_intType,l3) * delta(l3,1_intType)
-          trMat(2,3) = sign(1_intType,l3) * delta(l3,2_intType)
-          trMat(3,3) = sign(1_intType,l3) * delta(l3,3_intType)
-
-          ! Determine the corresponding donor range of the subface
-          ! iBeg, iEnd; jBeg, jEnd; kBeg, kEnd.
-
-          l1 = iBeg - cgnsDoms(cgnsID)%conn1to1(j)%iBeg
-          L2 = jBeg - cgnsDoms(cgnsID)%conn1to1(j)%jBeg
-          l3 = kBeg - cgnsDoms(cgnsID)%conn1to1(j)%kBeg
+                ! Make sure that the donor indices are positive running
+                ! indices. If they must be swapped, the corresponding
+                ! logical is set to .true.
 
-          diBeg = cgnsDoms(cgnsID)%conn1to1(j)%diBeg &
-               + trMat(1,1)*l1 + trMat(1,2)*l2 + trMat(1,3)*l3
-          djBeg = cgnsDoms(cgnsID)%conn1to1(j)%djBeg &
-               + trMat(2,1)*l1 + trMat(2,2)*l2 + trMat(2,3)*l3
-          dkBeg = cgnsDoms(cgnsID)%conn1to1(j)%dkBeg &
-               + trMat(3,1)*l1 + trMat(3,2)*l2 + trMat(3,3)*l3
-
-          l1 = iEnd - cgnsDoms(cgnsID)%conn1to1(j)%iBeg
-          L2 = jEnd - cgnsDoms(cgnsID)%conn1to1(j)%jBeg
-          l3 = kEnd - cgnsDoms(cgnsID)%conn1to1(j)%kBeg
-
-          diEnd = cgnsDoms(cgnsID)%conn1to1(j)%diBeg &
-               + trMat(1,1)*l1 + trMat(1,2)*l2 + trMat(1,3)*l3
-          djEnd = cgnsDoms(cgnsID)%conn1to1(j)%djBeg &
-               + trMat(2,1)*l1 + trMat(2,2)*l2 + trMat(2,3)*l3
-          dkEnd = cgnsDoms(cgnsID)%conn1to1(j)%dkBeg &
-               + trMat(3,1)*l1 + trMat(3,2)*l2 + trMat(3,3)*l3
-
-          ! Make sure that the donor indices are positive running
-          ! indices. If they must be swapped, the corresponding
-          ! logical is set to .true.
-
-          diSwap = .false.
-          if(diBeg > diEnd) then
-             mm = diBeg; diBeg = diEnd; diEnd = mm; diSwap = .true.
-          endif
-
-          djSwap = .false.
-          if(djBeg > djEnd) then
-             mm = djBeg; djBeg = djEnd; djEnd = mm; djSwap = .true.
-          endif
-
-          dkSwap = .false.
-          if(dkBeg > dkEnd) then
-             mm = dkBeg; dkBeg = dkEnd; dkEnd = mm; dkSwap = .true.
-          endif
-
-          ! Store the index of the donor block a bit easier and loop
-          ! over its subblocks to find the donor range.
+                diSwap = .false.
+                if (diBeg > diEnd) then
+                    mm = diBeg; diBeg = diEnd; diEnd = mm; diSwap = .true.
+                end if
 
-          mm = cgnsDoms(cgnsID)%conn1to1(j)%donorBlock
-          ranges => splitInfo(mm)%ranges
-
-          donorLoop: do k=1,splitInfo(mm)%nSubblocks
+                djSwap = .false.
+                if (djBeg > djEnd) then
+                    mm = djBeg; djBeg = djEnd; djEnd = mm; djSwap = .true.
+                end if
 
-             ! Check whether this subblock and the given donor range
-             ! overlap.
+                dkSwap = .false.
+                if (dkBeg > dkEnd) then
+                    mm = dkBeg; dkBeg = dkEnd; dkEnd = mm; dkSwap = .true.
+                end if
 
-             donorOverlap: if(diBeg <= ranges(k,1,2) .and. &
-                  diEnd >= ranges(k,1,1) .and. &
-                  djBeg <= ranges(k,2,2) .and. &
-                  djEnd >= ranges(k,2,1) .and. &
-                  dkBeg <= ranges(k,3,2) .and. &
-                  dkEnd >= ranges(k,3,1)) then
-
-                ! Determine the range of the donor face, which is
-                ! stored in iBeg, iEnd, etc.
-
-                iBeg = max(diBeg,ranges(k,1,1))
-                iEnd = min(diEnd,ranges(k,1,2))
-
-                jBeg = max(djBeg,ranges(k,2,1))
-                jEnd = min(djEnd,ranges(k,2,2))
+                ! Store the index of the donor block a bit easier and loop
+                ! over its subblocks to find the donor range.
 
-                kBeg = max(dkBeg,ranges(k,3,1))
-                kEnd = min(dkEnd,ranges(k,3,2))
+                mm = cgnsDoms(cgnsID)%conn1to1(j)%donorBlock
+                ranges => splitInfo(mm)%ranges
 
-                ! Check whether the subfaces are truely overlapping
-                ! or just adjacent.
+                donorLoop: do k = 1, splitInfo(mm)%nSubblocks
 
-                kk = 0
-                if(iBeg == iEnd) kk = kk + 1
-                if(jBeg == jEnd) kk = kk + 1
-                if(kBeg == kEnd) kk = kk + 1
+                    ! Check whether this subblock and the given donor range
+                    ! overlap.
 
-                if(kk > 1) cycle
+                    donorOverlap: if (diBeg <= ranges(k, 1, 2) .and. &
+                                      diEnd >= ranges(k, 1, 1) .and. &
+                                      djBeg <= ranges(k, 2, 2) .and. &
+                                      djEnd >= ranges(k, 2, 1) .and. &
+                                      dkBeg <= ranges(k, 3, 2) .and. &
+                                      dkEnd >= ranges(k, 3, 1)) then
+
+                        ! Determine the range of the donor face, which is
+                        ! stored in iBeg, iEnd, etc.
 
-                ! Update the counter jj and check whether enough memory
-                ! has been allocated. If not, reallocate.
+                        iBeg = max(diBeg, ranges(k, 1, 1))
+                        iEnd = min(diEnd, ranges(k, 1, 2))
+
+                        jBeg = max(djBeg, ranges(k, 2, 1))
+                        jEnd = min(djEnd, ranges(k, 2, 2))
 
-                jj = jj + 1
-                if(jj > nAlloc) call reallocSubfaceMemory(ii,nAlloc)
+                        kBeg = max(dkBeg, ranges(k, 3, 1))
+                        kEnd = min(dkEnd, ranges(k, 3, 2))
 
-                ! Set some info for this subface, which can be
-                ! determined relatively easily.
+                        ! Check whether the subfaces are truely overlapping
+                        ! or just adjacent.
 
-                blocks(ii)%BCType(jj)      = B2BMatch
-                blocks(ii)%cgnsSubface(jj) = j
-                blocks(ii)%groupNum(jj)    = 0
+                        kk = 0
+                        if (iBeg == iEnd) kk = kk + 1
+                        if (jBeg == jEnd) kk = kk + 1
+                        if (kBeg == kEnd) kk = kk + 1
 
-                blocks(ii)%l1(jj) = cgnsDoms(cgnsID)%conn1to1(j)%l1
-                blocks(ii)%l2(jj) = cgnsDoms(cgnsID)%conn1to1(j)%l2
-                blocks(ii)%l3(jj) = cgnsDoms(cgnsID)%conn1to1(j)%l3
+                        if (kk > 1) cycle
 
-                ! Determine the neighboring block id.
+                        ! Update the counter jj and check whether enough memory
+                        ! has been allocated. If not, reallocate.
 
-                blocks(ii)%neighBlock(jj) = nSubPerCGNS(mm-1) + k
+                        jj = jj + 1
+                        if (jj > nAlloc) call reallocSubfaceMemory(ii, nAlloc)
 
-                ! Determine the range of the donor. First switch the
-                ! indices if the original indices were swapped.
+                        ! Set some info for this subface, which can be
+                        ! determined relatively easily.
 
-                if( diSwap ) then
-                   kk = iBeg; iBeg = iEnd; iEnd = kk
-                endif
+                        blocks(ii)%BCType(jj) = B2BMatch
+                        blocks(ii)%cgnsSubface(jj) = j
+                        blocks(ii)%groupNum(jj) = 0
 
-                if( djSwap ) then
-                   kk = jBeg; jBeg = jEnd; jEnd = kk
-                endif
+                        blocks(ii)%l1(jj) = cgnsDoms(cgnsID)%conn1to1(j)%l1
+                        blocks(ii)%l2(jj) = cgnsDoms(cgnsID)%conn1to1(j)%l2
+                        blocks(ii)%l3(jj) = cgnsDoms(cgnsID)%conn1to1(j)%l3
 
-                if( dkSwap ) then
-                   kk = kBeg; kBeg = kEnd; kEnd = kk
-                endif
+                        ! Determine the neighboring block id.
 
-                ! Determine the local range of the donor, i.e. the
-                ! offset in the original block must be substracted.
+                        blocks(ii)%neighBlock(jj) = nSubPerCGNS(mm - 1) + k
 
-                blocks(ii)%dinBeg(jj) = iBeg - ranges(k,1,1) + 1
-                blocks(ii)%djnBeg(jj) = jBeg - ranges(k,2,1) + 1
-                blocks(ii)%dknBeg(jj) = kBeg - ranges(k,3,1) + 1
+                        ! Determine the range of the donor. First switch the
+                        ! indices if the original indices were swapped.
 
-                blocks(ii)%dinEnd(jj) = iEnd - ranges(k,1,1) + 1
-                blocks(ii)%djnEnd(jj) = jEnd - ranges(k,2,1) + 1
-                blocks(ii)%dknEnd(jj) = kEnd - ranges(k,3,1) + 1
+                        if (diSwap) then
+                            kk = iBeg; iBeg = iEnd; iEnd = kk
+                        end if
 
-                ! Transform the donor range in the original donor block
-                ! back the a subface range in the original cgns block.
-                ! The inverse of the transformation matrix trMat is
-                ! the transpose.
+                        if (djSwap) then
+                            kk = jBeg; jBeg = jEnd; jEnd = kk
+                        end if
 
-                l1 = iBeg - cgnsDoms(cgnsID)%conn1to1(j)%diBeg
-                L2 = jBeg - cgnsDoms(cgnsID)%conn1to1(j)%djBeg
-                l3 = kBeg - cgnsDoms(cgnsID)%conn1to1(j)%dkBeg
+                        if (dkSwap) then
+                            kk = kBeg; kBeg = kEnd; kEnd = kk
+                        end if
 
-                iBeg = cgnsDoms(cgnsID)%conn1to1(j)%iBeg &
-                     + trMat(1,1)*l1 + trMat(2,1)*l2 + trMat(3,1)*l3
-                jBeg = cgnsDoms(cgnsID)%conn1to1(j)%jBeg &
-                     + trMat(1,2)*l1 + trMat(2,2)*l2 + trMat(3,2)*l3
-                kBeg = cgnsDoms(cgnsID)%conn1to1(j)%kBeg &
-                     + trMat(1,3)*l1 + trMat(2,3)*l2 + trMat(3,3)*l3
+                        ! Determine the local range of the donor, i.e. the
+                        ! offset in the original block must be substracted.
 
-                l1 = iEnd - cgnsDoms(cgnsID)%conn1to1(j)%diBeg
-                L2 = jEnd - cgnsDoms(cgnsID)%conn1to1(j)%djBeg
-                l3 = kEnd - cgnsDoms(cgnsID)%conn1to1(j)%dkBeg
+                        blocks(ii)%dinBeg(jj) = iBeg - ranges(k, 1, 1) + 1
+                        blocks(ii)%djnBeg(jj) = jBeg - ranges(k, 2, 1) + 1
+                        blocks(ii)%dknBeg(jj) = kBeg - ranges(k, 3, 1) + 1
 
-                iEnd = cgnsDoms(cgnsID)%conn1to1(j)%iBeg &
-                     + trMat(1,1)*l1 + trMat(2,1)*l2 + trMat(3,1)*l3
-                jEnd = cgnsDoms(cgnsID)%conn1to1(j)%jBeg &
-                     + trMat(1,2)*l1 + trMat(2,2)*l2 + trMat(3,2)*l3
-                kEnd = cgnsDoms(cgnsID)%conn1to1(j)%kBeg &
-                     + trMat(1,3)*l1 + trMat(2,3)*l2 + trMat(3,3)*l3
+                        blocks(ii)%dinEnd(jj) = iEnd - ranges(k, 1, 1) + 1
+                        blocks(ii)%djnEnd(jj) = jEnd - ranges(k, 2, 1) + 1
+                        blocks(ii)%dknEnd(jj) = kEnd - ranges(k, 3, 1) + 1
 
-                ! Store the subface range of the new block, i.e.
-                ! An offset must be subtracted.
+                        ! Transform the donor range in the original donor block
+                        ! back the a subface range in the original cgns block.
+                        ! The inverse of the transformation matrix trMat is
+                        ! the transpose.
 
-                blocks(ii)%inBeg(jj) = iBeg - blocks(ii)%iBegor + 1
-                blocks(ii)%jnBeg(jj) = jBeg - blocks(ii)%jBegor + 1
-                blocks(ii)%knBeg(jj) = kBeg - blocks(ii)%kBegor + 1
+                        l1 = iBeg - cgnsDoms(cgnsID)%conn1to1(j)%diBeg
+                        L2 = jBeg - cgnsDoms(cgnsID)%conn1to1(j)%djBeg
+                        l3 = kBeg - cgnsDoms(cgnsID)%conn1to1(j)%dkBeg
 
-                blocks(ii)%inEnd(jj) = iEnd - blocks(ii)%iBegor + 1
-                blocks(ii)%jnEnd(jj) = jEnd - blocks(ii)%jBegor + 1
-                blocks(ii)%knEnd(jj) = kEnd - blocks(ii)%kBegor + 1
+                        iBeg = cgnsDoms(cgnsID)%conn1to1(j)%iBeg &
+                               + trMat(1, 1) * l1 + trMat(2, 1) * l2 + trMat(3, 1) * l3
+                        jBeg = cgnsDoms(cgnsID)%conn1to1(j)%jBeg &
+                               + trMat(1, 2) * l1 + trMat(2, 2) * l2 + trMat(3, 2) * l3
+                        kBeg = cgnsDoms(cgnsID)%conn1to1(j)%kBeg &
+                               + trMat(1, 3) * l1 + trMat(2, 3) * l2 + trMat(3, 3) * l3
 
-                ! Determine the block face id on which this subface
-                ! is located.
+                        l1 = iEnd - cgnsDoms(cgnsID)%conn1to1(j)%diBeg
+                        L2 = jEnd - cgnsDoms(cgnsID)%conn1to1(j)%djBeg
+                        l3 = kEnd - cgnsDoms(cgnsID)%conn1to1(j)%dkBeg
 
-                if(iBeg == iEnd) then
+                        iEnd = cgnsDoms(cgnsID)%conn1to1(j)%iBeg &
+                               + trMat(1, 1) * l1 + trMat(2, 1) * l2 + trMat(3, 1) * l3
+                        jEnd = cgnsDoms(cgnsID)%conn1to1(j)%jBeg &
+                               + trMat(1, 2) * l1 + trMat(2, 2) * l2 + trMat(3, 2) * l3
+                        kEnd = cgnsDoms(cgnsID)%conn1to1(j)%kBeg &
+                               + trMat(1, 3) * l1 + trMat(2, 3) * l2 + trMat(3, 3) * l3
 
-                   blocks(ii)%BCFaceID(jj) = iMax
-                   if(iBeg == blocks(ii)%iBegor) &
-                        blocks(ii)%BCFaceID(jj) = iMin
+                        ! Store the subface range of the new block, i.e.
+                        ! An offset must be subtracted.
 
-                else if(jBeg == jEnd) then
+                        blocks(ii)%inBeg(jj) = iBeg - blocks(ii)%iBegor + 1
+                        blocks(ii)%jnBeg(jj) = jBeg - blocks(ii)%jBegor + 1
+                        blocks(ii)%knBeg(jj) = kBeg - blocks(ii)%kBegor + 1
 
-                   blocks(ii)%BCFaceID(jj) = jMax
-                   if(jBeg == blocks(ii)%jBegor) &
-                        blocks(ii)%BCFaceID(jj) = jMin
+                        blocks(ii)%inEnd(jj) = iEnd - blocks(ii)%iBegor + 1
+                        blocks(ii)%jnEnd(jj) = jEnd - blocks(ii)%jBegor + 1
+                        blocks(ii)%knEnd(jj) = kEnd - blocks(ii)%kBegor + 1
 
-                else
+                        ! Determine the block face id on which this subface
+                        ! is located.
+
+                        if (iBeg == iEnd) then
+
+                            blocks(ii)%BCFaceID(jj) = iMax
+                            if (iBeg == blocks(ii)%iBegor) &
+                                blocks(ii)%BCFaceID(jj) = iMin
+
+                        else if (jBeg == jEnd) then
 
-                   blocks(ii)%BCFaceID(jj) = kMax
-                   if(kBeg == blocks(ii)%kBegor) &
-                        blocks(ii)%BCFaceID(jj) = kMin
+                            blocks(ii)%BCFaceID(jj) = jMax
+                            if (jBeg == blocks(ii)%jBegor) &
+                                blocks(ii)%BCFaceID(jj) = jMin
 
-                endif
+                        else
 
-             endif donorOverlap
-          enddo donorLoop
-       endif overlap
-    enddo n1to1Loop
+                            blocks(ii)%BCFaceID(jj) = kMax
+                            if (kBeg == blocks(ii)%kBegor) &
+                                blocks(ii)%BCFaceID(jj) = kMin
 
-  end subroutine externalFacesSubblock
+                        end if
 
-  !========================================================================
+                    end if donorOverlap
+                end do donorLoop
+            end if overlap
+        end do n1to1Loop
 
-  subroutine internalFacesSubblock(cgnsID, ii, jj, nSubPerCGNS, &
-       nAlloc, splitInfo)
-    !
-    !       internalFacesSubblock determines the block boundaries of
-    !       the compute block ii which are created due to the splitting of
-    !       the original block into subblock. As the number of these
-    !       internal boundaries is not known, it must be checked whether
-    !       enough memory has been allocated. jj is the counter for the
-    !       number of subfaces.
-    !
-    use cgnsGrid
-    use communication
-    use partitionMod
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType), intent(in)    :: cgnsID, ii
-    integer(kind=intType), intent(inout) :: jj, nAlloc
+    end subroutine externalFacesSubblock
 
-    integer(kind=intType), dimension(0:cgnsNDom), intent(in) :: &
-         nSubPerCGNS
-    type(splitCGNSType), intent(in) :: splitInfo
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: indFace, jBeg, jEnd, kBeg, kEnd
-    integer(kind=intType) :: i, i2, j, k, faceID
+    !========================================================================
 
-    ! iMin face.
+    subroutine internalFacesSubblock(cgnsID, ii, jj, nSubPerCGNS, &
+                                     nAlloc, splitInfo)
+        !
+        !       internalFacesSubblock determines the block boundaries of
+        !       the compute block ii which are created due to the splitting of
+        !       the original block into subblock. As the number of these
+        !       internal boundaries is not known, it must be checked whether
+        !       enough memory has been allocated. jj is the counter for the
+        !       number of subfaces.
+        !
+        use cgnsGrid
+        use communication
+        use partitionMod
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType), intent(in) :: cgnsID, ii
+        integer(kind=intType), intent(inout) :: jj, nAlloc
 
-    if(blocks(ii)%iBegor > 1) then
+        integer(kind=intType), dimension(0:cgnsNDom), intent(in) :: &
+            nSubPerCGNS
+        type(splitCGNSType), intent(in) :: splitInfo
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: indFace, jBeg, jEnd, kBeg, kEnd
+        integer(kind=intType) :: i, i2, j, k, faceID
 
-       ! Imin face is created through splitting. Set some variables
-       ! for the general treatment.
+        ! iMin face.
 
-       indFace = blocks(ii)%iBegor
-       jBeg    = blocks(ii)%jBegor
-       jEnd    = blocks(ii)%jEndor
-       kBeg    = blocks(ii)%kBegor
-       kEnd    = blocks(ii)%kEndor
+        if (blocks(ii)%iBegor > 1) then
 
-       i = 1; j = 2; k = 3
-       i2 = 2; faceID = iMin
+            ! Imin face is created through splitting. Set some variables
+            ! for the general treatment.
 
-       ! Search for neighbors in the subblocks of the given
-       ! cgns blocks.
+            indFace = blocks(ii)%iBegor
+            jBeg = blocks(ii)%jBegor
+            jEnd = blocks(ii)%jEndor
+            kBeg = blocks(ii)%kBegor
+            kEnd = blocks(ii)%kEndor
 
-       call searchInternalNeighbors
+            i = 1; j = 2; k = 3
+            i2 = 2; faceID = iMin
 
-    endif
+            ! Search for neighbors in the subblocks of the given
+            ! cgns blocks.
 
-    ! iMax face.
+            call searchInternalNeighbors
 
-    if(blocks(ii)%iEndor < cgnsDoms(cgnsID)%il) then
+        end if
 
-       ! Imax face is created through splitting. Set some variables
-       ! for the general treatment.
+        ! iMax face.
 
-       indFace = blocks(ii)%iEndor
-       jBeg    = blocks(ii)%jBegor
-       jEnd    = blocks(ii)%jEndor
-       kBeg    = blocks(ii)%kBegor
-       kEnd    = blocks(ii)%kEndor
+        if (blocks(ii)%iEndor < cgnsDoms(cgnsID)%il) then
 
-       i = 1; j = 2; k = 3
-       i2 = 1; faceID = iMax
+            ! Imax face is created through splitting. Set some variables
+            ! for the general treatment.
 
-       ! Search for neighbors in the subblocks of the given
-       ! cgns blocks.
+            indFace = blocks(ii)%iEndor
+            jBeg = blocks(ii)%jBegor
+            jEnd = blocks(ii)%jEndor
+            kBeg = blocks(ii)%kBegor
+            kEnd = blocks(ii)%kEndor
 
-       call searchInternalNeighbors
+            i = 1; j = 2; k = 3
+            i2 = 1; faceID = iMax
 
-    endif
+            ! Search for neighbors in the subblocks of the given
+            ! cgns blocks.
 
-    ! jMin face.
+            call searchInternalNeighbors
 
-    if(blocks(ii)%jBegor > 1) then
+        end if
 
-       ! Jmin face is created through splitting. Set some variables
-       ! for the general treatment.
+        ! jMin face.
 
-       indFace = blocks(ii)%jBegor
-       jBeg    = blocks(ii)%iBegor
-       jEnd    = blocks(ii)%iEndor
-       kBeg    = blocks(ii)%kBegor
-       kEnd    = blocks(ii)%kEndor
+        if (blocks(ii)%jBegor > 1) then
 
-       i = 2; j = 1; k = 3
-       i2 = 2; faceID = jMin
+            ! Jmin face is created through splitting. Set some variables
+            ! for the general treatment.
 
-       ! Search for neighbors in the subblocks of the given
-       ! cgns blocks.
+            indFace = blocks(ii)%jBegor
+            jBeg = blocks(ii)%iBegor
+            jEnd = blocks(ii)%iEndor
+            kBeg = blocks(ii)%kBegor
+            kEnd = blocks(ii)%kEndor
 
-       call searchInternalNeighbors
+            i = 2; j = 1; k = 3
+            i2 = 2; faceID = jMin
 
-    endif
+            ! Search for neighbors in the subblocks of the given
+            ! cgns blocks.
 
-    ! jMax face.
+            call searchInternalNeighbors
 
-    if(blocks(ii)%jEndor < cgnsDoms(cgnsID)%jl) then
+        end if
 
-       ! Jmax face is created through splitting. Set some variables
-       ! for the general treatment.
+        ! jMax face.
 
-       indFace = blocks(ii)%jEndor
-       jBeg    = blocks(ii)%iBegor
-       jEnd    = blocks(ii)%iEndor
-       kBeg    = blocks(ii)%kBegor
-       kEnd    = blocks(ii)%kEndor
+        if (blocks(ii)%jEndor < cgnsDoms(cgnsID)%jl) then
 
-       i = 2; j = 1; k = 3
-       i2 = 1; faceID = jMax
+            ! Jmax face is created through splitting. Set some variables
+            ! for the general treatment.
 
-       ! Search for neighbors in the subblocks of the given
-       ! cgns blocks.
+            indFace = blocks(ii)%jEndor
+            jBeg = blocks(ii)%iBegor
+            jEnd = blocks(ii)%iEndor
+            kBeg = blocks(ii)%kBegor
+            kEnd = blocks(ii)%kEndor
 
-       call searchInternalNeighbors
+            i = 2; j = 1; k = 3
+            i2 = 1; faceID = jMax
 
-    endif
+            ! Search for neighbors in the subblocks of the given
+            ! cgns blocks.
 
-    ! kMin face.
+            call searchInternalNeighbors
 
-    if(blocks(ii)%kBegor > 1) then
+        end if
 
-       ! Kmin face is created through splitting. Set some variables
-       ! for the general treatment.
+        ! kMin face.
 
-       indFace = blocks(ii)%kBegor
-       jBeg    = blocks(ii)%iBegor
-       jEnd    = blocks(ii)%iEndor
-       kBeg    = blocks(ii)%jBegor
-       kEnd    = blocks(ii)%jEndor
+        if (blocks(ii)%kBegor > 1) then
 
-       i = 3; j = 1; k = 2
-       i2 = 2; faceID = kMin
+            ! Kmin face is created through splitting. Set some variables
+            ! for the general treatment.
 
-       ! Search for neighbors in the subblocks of the given
-       ! cgns blocks.
+            indFace = blocks(ii)%kBegor
+            jBeg = blocks(ii)%iBegor
+            jEnd = blocks(ii)%iEndor
+            kBeg = blocks(ii)%jBegor
+            kEnd = blocks(ii)%jEndor
 
-       call searchInternalNeighbors
+            i = 3; j = 1; k = 2
+            i2 = 2; faceID = kMin
 
-    endif
+            ! Search for neighbors in the subblocks of the given
+            ! cgns blocks.
 
-    ! kMax face.
+            call searchInternalNeighbors
 
-    if(blocks(ii)%kEndor < cgnsDoms(cgnsID)%kl) then
+        end if
 
-       ! Kmax face is created through splitting. Set some variables
-       ! for the general treatment.
+        ! kMax face.
 
-       indFace = blocks(ii)%kEndor
-       jBeg    = blocks(ii)%iBegor
-       jEnd    = blocks(ii)%iEndor
-       kBeg    = blocks(ii)%jBegor
-       kEnd    = blocks(ii)%jEndor
+        if (blocks(ii)%kEndor < cgnsDoms(cgnsID)%kl) then
 
-       i = 3; j = 1; k = 2
-       i2 = 1; faceID = kMax
+            ! Kmax face is created through splitting. Set some variables
+            ! for the general treatment.
 
-       ! Search for neighbors in the subblocks of the given
-       ! cgns blocks.
+            indFace = blocks(ii)%kEndor
+            jBeg = blocks(ii)%iBegor
+            jEnd = blocks(ii)%iEndor
+            kBeg = blocks(ii)%jBegor
+            kEnd = blocks(ii)%jEndor
 
-       call searchInternalNeighbors
+            i = 3; j = 1; k = 2
+            i2 = 1; faceID = kMax
 
-    endif
+            ! Search for neighbors in the subblocks of the given
+            ! cgns blocks.
 
-    !=================================================================
+            call searchInternalNeighbors
 
-  contains
+        end if
 
-    !===============================================================
+        !=================================================================
 
-    subroutine searchInternalNeighbors
-      !
-      !         searchInternalNeighbors determines block faces created by
-      !         by the splitting of the original block. The variables set in
-      !         internalFacesSubblock are used such that a general
-      !         treatment is possible.
-      !
-      implicit none
-      !
-      !        Local variables
-      !
-      integer(kind=intType) :: mm, jnBeg, jnEnd, knBeg, knEnd
+    contains
 
-      integer(kind=intType), dimension(3,2) :: subRange
+        !===============================================================
 
-      integer(kind=intType), dimension(:,:,:), pointer :: ranges
+        subroutine searchInternalNeighbors
+            !
+            !         searchInternalNeighbors determines block faces created by
+            !         by the splitting of the original block. The variables set in
+            !         internalFacesSubblock are used such that a general
+            !         treatment is possible.
+            !
+            implicit none
+            !
+            !        Local variables
+            !
+            integer(kind=intType) :: mm, jnBeg, jnEnd, knBeg, knEnd
 
-      ! Set the pointer for ranges to make the code more readable.
+            integer(kind=intType), dimension(3, 2) :: subRange
 
-      ranges => splitInfo%ranges
+            integer(kind=intType), dimension(:, :, :), pointer :: ranges
 
-      ! Loop over the number of blocks into the original block
-      ! is split.
+            ! Set the pointer for ranges to make the code more readable.
 
-      do mm=1,splitInfo%nSubblocks
+            ranges => splitInfo%ranges
 
-         ! Check whether the constant index of the face matches
-         ! the given index of subblock.
+            ! Loop over the number of blocks into the original block
+            ! is split.
 
-         if(ranges(mm,i,i2) == indFace) then
+            do mm = 1, splitInfo%nSubblocks
 
-            ! Check whether the faces overlap.
+                ! Check whether the constant index of the face matches
+                ! the given index of subblock.
 
-            if(jBeg <= ranges(mm,j,2) .and. &
-                 jEnd >= ranges(mm,j,1) .and. &
-                 kBeg <= ranges(mm,k,2) .and. &
-                 kEnd >= ranges(mm,k,1) ) then
+                if (ranges(mm, i, i2) == indFace) then
 
-               ! There is a possible overlap. Determine the nodal
-               ! range of the subface.
+                    ! Check whether the faces overlap.
 
-               jnBeg = max(jBeg,ranges(mm,j,1))
-               jnEnd = min(jEnd,ranges(mm,j,2))
+                    if (jBeg <= ranges(mm, j, 2) .and. &
+                        jEnd >= ranges(mm, j, 1) .and. &
+                        kBeg <= ranges(mm, k, 2) .and. &
+                        kEnd >= ranges(mm, k, 1)) then
 
-               knBeg = max(kBeg,ranges(mm,k,1))
-               knEnd = min(kEnd,ranges(mm,k,2))
+                        ! There is a possible overlap. Determine the nodal
+                        ! range of the subface.
 
-               ! Check whether this is a true subface.
+                        jnBeg = max(jBeg, ranges(mm, j, 1))
+                        jnEnd = min(jEnd, ranges(mm, j, 2))
 
-               if(jnEnd > jnBeg .and. knEnd > knBeg) then
+                        knBeg = max(kBeg, ranges(mm, k, 1))
+                        knEnd = min(kEnd, ranges(mm, k, 2))
 
-                  ! An overlap occurs. Update the counter jj and check
-                  ! whether enough memory has been allocated.
-                  ! If not, reallocate.
+                        ! Check whether this is a true subface.
 
-                  jj = jj + 1
-                  if(jj > nAlloc) call reallocSubfaceMemory(ii,nAlloc)
+                        if (jnEnd > jnBeg .and. knEnd > knBeg) then
 
-                  ! Set the information of the BCType, BCFaceID,
-                  ! cgnsSubface and the group number. As this face is
-                  ! created internally the latter two variables are
-                  ! set to 0.
+                            ! An overlap occurs. Update the counter jj and check
+                            ! whether enough memory has been allocated.
+                            ! If not, reallocate.
 
-                  blocks(ii)%BCType(jj)      = B2BMatch
-                  blocks(ii)%BCFaceID(jj)    = faceID
-                  blocks(ii)%cgnsSubface(jj) = 0
-                  blocks(ii)%groupNum(jj)    = 0
-
-                  ! Determine the subRange of the subface in the
-                  ! original block.
-
-                  subRange(i,1) = indFace; subRange(i,2) = indFace
-                  subRange(j,1) = jnBeg;   subRange(j,2) = jnEnd
-                  subRange(k,1) = knBeg;   subRange(k,2) = knEnd
-
-                  ! Determine the nodal range in the current subblock.
-
-                  blocks(ii)%inBeg(jj) = subRange(1,1) &
-                       - blocks(ii)%iBegor + 1
-                  blocks(ii)%inEnd(jj) = subRange(1,2) &
-                       - blocks(ii)%iBegor + 1
-
-                  blocks(ii)%jnBeg(jj) = subRange(2,1) &
-                       - blocks(ii)%jBegor + 1
-                  blocks(ii)%jnEnd(jj) = subRange(2,2) &
-                       - blocks(ii)%jBegor + 1
-
-                  blocks(ii)%knBeg(jj) = subRange(3,1) &
-                       - blocks(ii)%kBegor + 1
-                  blocks(ii)%knEnd(jj) = subRange(3,2) &
-                       - blocks(ii)%kBegor + 1
-
-                  ! Determine the nodal range in the donor block.
-
-                  blocks(ii)%dinBeg(jj) = subRange(1,1) &
-                       - ranges(mm,1,1) + 1
-                  blocks(ii)%dinEnd(jj) = subRange(1,2) &
-                       - ranges(mm,1,1) + 1
-
-                  blocks(ii)%djnBeg(jj) = subRange(2,1) &
-                       - ranges(mm,2,1) + 1
-                  blocks(ii)%djnEnd(jj) = subRange(2,2) &
-                       - ranges(mm,2,1) + 1
-
-                  blocks(ii)%dknBeg(jj) = subRange(3,1) &
-                       - ranges(mm,3,1) + 1
-                  blocks(ii)%dknEnd(jj) = subRange(3,2) &
-                       - ranges(mm,3,1) + 1
-
-                  ! Set the neighboring block to mm plus the offset
-                  ! for the current cgns block and set the transformation
-                  ! matrix. The latter is simply 1-2-3, because the
-                  ! orientation of the subblocks is identical to the
-                  ! original block.
-
-                  blocks(ii)%neighBlock(jj) = mm + nSubPerCGNS(cgnsID-1)
-
-                  blocks(ii)%l1(jj) = 1
-                  blocks(ii)%l2(jj) = 2
-                  blocks(ii)%l3(jj) = 3
-
-               endif
-            endif
-         endif
-      enddo
-
-    end subroutine searchInternalNeighbors
-
-  end subroutine internalFacesSubblock
-  subroutine graphPartitioning(emptyPartitions, commNeglected)
-    !
-    !       graphPartitioning partitions the corresponding graph of the
-    !       computational blocks such that both the number of cells and
-    !       number of faces is about equal on all processors.
-    !
-    use constants
-    use communication, only : myID, adflow_comm_world, nProc
-    use partitionMod, only : part, blocks, ubvec, nBlocks
-    use inputParallel, only : loadImbalance
-    use utils, only : terminate
-    use sorting, only : qsortIntegers, bsearchIntegers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(out) :: emptyPartitions, commNeglected
-    !
-    !      Variables to store the graph in metis format.
-    !
-    ! nVertex        : Number of vertices in the graph, equals nBlocks.
-    ! nCon           : Number of contraints, 2.
-    ! xadj(0:nVertex): Number of edges per vertex, cumulative storage
-    !                  format.
-    ! adjncy(:)      : End vertex of the edge; the size of adjncy is
-    !                  xadj(nVertex).
-    ! vwgt(:,:)      : Vertex weights, size equals nCon,nVertex. The
-    !                  vertex weights are stored contiguously.
-    ! adjwgt(:)      : Edge weights, size equals xadj(nVertex). Note
-    !                  that the edge weights of edge i-j can be
-    !                  different from the weight of edge j-i.
-    ! wgtflag        : Whether or not to use weights on edges. Here
-    !                  wgtflag should always be 1 to indicate that
-    !                  edge weights are used.
-    ! numflag        : Flag to indicate the numbering convention,
-    !                  starting from 0 or 1. Here we start from 0.
-    ! nParts         : Number of parts to split the graph. This is
-    !                  nProc.
-    ! ubvec(2)       : Tolerance for the constraints. Stored in the
-    !                  module dpartitionMod.
-    ! options(5)     : Option array; normally the default is used
-    !                  indicated by options(1) = 0.
-    ! edgecut        : On return it contains the edge cut of the
-    !                  distributed graph.
-    ! part(nVertex)  : On return the processor ID for each block.
-    !                  It will be returned in fortran numbering,
-    !                  i.e. starting at 1.  Stored in the module
-    !                  distributionMod.
-
-    integer :: nVertex, nCon, wgtflag, numflag, nParts, edgecut
-    integer, dimension(5) :: options
-
-    integer(kind=intType), dimension(:),   allocatable :: xadj, adjncy
-    integer(kind=intType), dimension(:),   allocatable :: adjwgt
-    integer(kind=intType), dimension(:,:), allocatable :: vwgt
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i, j
-    integer(kind=intType) :: nEdges, nEdgesMax, ii, jj, kk
-
-    integer(kind=intType), dimension(0:nProc-1) :: nBlockPerProc
-
-    integer(kind=intType), dimension(:), allocatable :: tmp
-
-    integer(kind=8) :: nCellsTotal    ! 8 byte integers to avoid
-    integer(kind=8) :: nFacesTotal    ! overflow.
-
-    real(kind=4) :: ubvec_temp(2) ! Explict float for ubvec
-    !
-    ! Check whether part is allocated from a previous call. If so,
-    ! release the memory.
-
-    if( allocated(part) ) then
-       deallocate(part, stat=ierr)
-       if(ierr /= 0)                       &
+                            jj = jj + 1
+                            if (jj > nAlloc) call reallocSubfaceMemory(ii, nAlloc)
+
+                            ! Set the information of the BCType, BCFaceID,
+                            ! cgnsSubface and the group number. As this face is
+                            ! created internally the latter two variables are
+                            ! set to 0.
+
+                            blocks(ii)%BCType(jj) = B2BMatch
+                            blocks(ii)%BCFaceID(jj) = faceID
+                            blocks(ii)%cgnsSubface(jj) = 0
+                            blocks(ii)%groupNum(jj) = 0
+
+                            ! Determine the subRange of the subface in the
+                            ! original block.
+
+                            subRange(i, 1) = indFace; subRange(i, 2) = indFace
+                            subRange(j, 1) = jnBeg; subRange(j, 2) = jnEnd
+                            subRange(k, 1) = knBeg; subRange(k, 2) = knEnd
+
+                            ! Determine the nodal range in the current subblock.
+
+                            blocks(ii)%inBeg(jj) = subRange(1, 1) &
+                                                   - blocks(ii)%iBegor + 1
+                            blocks(ii)%inEnd(jj) = subRange(1, 2) &
+                                                   - blocks(ii)%iBegor + 1
+
+                            blocks(ii)%jnBeg(jj) = subRange(2, 1) &
+                                                   - blocks(ii)%jBegor + 1
+                            blocks(ii)%jnEnd(jj) = subRange(2, 2) &
+                                                   - blocks(ii)%jBegor + 1
+
+                            blocks(ii)%knBeg(jj) = subRange(3, 1) &
+                                                   - blocks(ii)%kBegor + 1
+                            blocks(ii)%knEnd(jj) = subRange(3, 2) &
+                                                   - blocks(ii)%kBegor + 1
+
+                            ! Determine the nodal range in the donor block.
+
+                            blocks(ii)%dinBeg(jj) = subRange(1, 1) &
+                                                    - ranges(mm, 1, 1) + 1
+                            blocks(ii)%dinEnd(jj) = subRange(1, 2) &
+                                                    - ranges(mm, 1, 1) + 1
+
+                            blocks(ii)%djnBeg(jj) = subRange(2, 1) &
+                                                    - ranges(mm, 2, 1) + 1
+                            blocks(ii)%djnEnd(jj) = subRange(2, 2) &
+                                                    - ranges(mm, 2, 1) + 1
+
+                            blocks(ii)%dknBeg(jj) = subRange(3, 1) &
+                                                    - ranges(mm, 3, 1) + 1
+                            blocks(ii)%dknEnd(jj) = subRange(3, 2) &
+                                                    - ranges(mm, 3, 1) + 1
+
+                            ! Set the neighboring block to mm plus the offset
+                            ! for the current cgns block and set the transformation
+                            ! matrix. The latter is simply 1-2-3, because the
+                            ! orientation of the subblocks is identical to the
+                            ! original block.
+
+                            blocks(ii)%neighBlock(jj) = mm + nSubPerCGNS(cgnsID - 1)
+
+                            blocks(ii)%l1(jj) = 1
+                            blocks(ii)%l2(jj) = 2
+                            blocks(ii)%l3(jj) = 3
+
+                        end if
+                    end if
+                end if
+            end do
+
+        end subroutine searchInternalNeighbors
+
+    end subroutine internalFacesSubblock
+    subroutine graphPartitioning(emptyPartitions, commNeglected)
+        !
+        !       graphPartitioning partitions the corresponding graph of the
+        !       computational blocks such that both the number of cells and
+        !       number of faces is about equal on all processors.
+        !
+        use constants
+        use communication, only: myID, adflow_comm_world, nProc
+        use partitionMod, only: part, blocks, ubvec, nBlocks
+        use inputParallel, only: loadImbalance
+        use utils, only: terminate
+        use sorting, only: qsortIntegers, bsearchIntegers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(out) :: emptyPartitions, commNeglected
+        !
+        !      Variables to store the graph in metis format.
+        !
+        ! nVertex        : Number of vertices in the graph, equals nBlocks.
+        ! nCon           : Number of contraints, 2.
+        ! xadj(0:nVertex): Number of edges per vertex, cumulative storage
+        !                  format.
+        ! adjncy(:)      : End vertex of the edge; the size of adjncy is
+        !                  xadj(nVertex).
+        ! vwgt(:,:)      : Vertex weights, size equals nCon,nVertex. The
+        !                  vertex weights are stored contiguously.
+        ! adjwgt(:)      : Edge weights, size equals xadj(nVertex). Note
+        !                  that the edge weights of edge i-j can be
+        !                  different from the weight of edge j-i.
+        ! wgtflag        : Whether or not to use weights on edges. Here
+        !                  wgtflag should always be 1 to indicate that
+        !                  edge weights are used.
+        ! numflag        : Flag to indicate the numbering convention,
+        !                  starting from 0 or 1. Here we start from 0.
+        ! nParts         : Number of parts to split the graph. This is
+        !                  nProc.
+        ! ubvec(2)       : Tolerance for the constraints. Stored in the
+        !                  module dpartitionMod.
+        ! options(5)     : Option array; normally the default is used
+        !                  indicated by options(1) = 0.
+        ! edgecut        : On return it contains the edge cut of the
+        !                  distributed graph.
+        ! part(nVertex)  : On return the processor ID for each block.
+        !                  It will be returned in fortran numbering,
+        !                  i.e. starting at 1.  Stored in the module
+        !                  distributionMod.
+
+        integer :: nVertex, nCon, wgtflag, numflag, nParts, edgecut
+        integer, dimension(5) :: options
+
+        integer(kind=intType), dimension(:), allocatable :: xadj, adjncy
+        integer(kind=intType), dimension(:), allocatable :: adjwgt
+        integer(kind=intType), dimension(:, :), allocatable :: vwgt
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, j
+        integer(kind=intType) :: nEdges, nEdgesMax, ii, jj, kk
+
+        integer(kind=intType), dimension(0:nProc - 1) :: nBlockPerProc
+
+        integer(kind=intType), dimension(:), allocatable :: tmp
+
+        integer(kind=8) :: nCellsTotal    ! 8 byte integers to avoid
+        integer(kind=8) :: nFacesTotal    ! overflow.
+
+        real(kind=4) :: ubvec_temp(2) ! Explict float for ubvec
+        !
+        ! Check whether part is allocated from a previous call. If so,
+        ! release the memory.
+
+        if (allocated(part)) then
+            deallocate (part, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("graphPartitioning", &
+                               "Deallocation failure for part")
+        end if
+
+        ! Determine the number of edges in the graph and the maximum
+        ! number for a vertex in the graph.
+
+        nEdges = 0
+        nEdgesMax = 0
+        do i = 1, nBlocks
+            ii = blocks(i)%n1to1
+            nEdges = nedges + ii
+            nEdgesMax = max(nedgesMax, ii)
+        end do
+
+        ! Initialize some values for the graph.
+
+        nVertex = nBlocks
+        nCon = 2
+        wgtflag = 1
+        numflag = 0
+        nParts = nProc
+        ubvec(1) = one + loadImbalance
+        ubvec(2) = one + loadImbalance
+        options = 0
+
+        !  Allocate the memory to store/build the graph.
+
+        allocate (xadj(0:nVertex), vwgt(nCon, nVertex), adjncy(nEdges), &
+                  adjwgt(nEdges), part(nVertex), tmp(nEdgesMax), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("graphPartitioning", &
+                           "Memory allocation failure for graph variables")
+
+        ! Initialize xadj(0) to 0.
+        ! Furthermore initialize adjwgt to 0, as these values are
+        ! accumulated due to multiple subfaces between blocks.
+
+        xadj(0) = 0
+        adjwgt = 0
+
+        ! Loop over the number of blocks to build the graph.
+
+        graphVertex: do i = 1, nBlocks
+
+            ! Store the both vertex weights.
+
+            vwgt(1, i) = blocks(i)%nCell
+            vwgt(2, i) = blocks(i)%nFace
+
+            ! Sort the neighbors in increasing order and neglect the
+            ! communication to myself, i.e. do not allow an edge to myself.
+            ! The sorting is necessary, because a block might have several
+            ! subfaces with another block
+
+            nEdges = 0
+            do j = 1, blocks(i)%n1to1
+                ii = blocks(i)%nBocos + j
+                if (blocks(i)%neighBlock(ii) /= i) then
+                    nEdges = nEdges + 1
+                    tmp(nEdges) = blocks(i)%neighBlock(ii)
+                end if
+            end do
+
+            ! Sort tmp in increasing order and get rid of the possible
+            ! multiple entries.
+
+            call qsortIntegers(tmp, nEdges)
+
+            ii = min(nEdges, 1_intType)  ! Be aware of nEdges == 0
+            do j = 2, nEdges
+                if (tmp(j) /= tmp(ii)) then
+                    ii = ii + 1
+                    tmp(ii) = tmp(j)
+                end if
+            end do
+
+            ! Set nEdges to ii and update xadj(i).
+
+            nEdges = ii
+            xadj(i) = xadj(i - 1) + nEdges
+
+            ! Repeat the loop over the subfaces, but now store
+            ! the edge info.
+
+            Edges1to1: do j = 1, blocks(i)%n1to1
+
+                ii = blocks(i)%nBocos + j
+                if (blocks(i)%neighBlock(ii) /= i) then
+
+                    ! Search for the block ID and add the offset of xadj(i-1)
+                    ! to obtain the correct index to store the edge info.
+                    ! The -1 to the adjncy is present because C-numbering
+                    ! is used when calling Metis.
+
+                    jj = xadj(i - 1) &
+                         + bsearchIntegers(blocks(i)%neighBlock(ii), tmp(1:nEdges))
+                    adjncy(jj) = blocks(i)%neighBlock(ii) - 1
+
+                    ! The weight equals the number of 1st and 2nd level halo
+                    ! cells to be communicated between the blocks. The weights
+                    ! are accumulated, as multiple subfaces between blocks are
+                    ! possible.
+
+                    kk = 1
+                    adjwgt(jj) = adjwgt(jj) + 2 * &
+                                 (max(abs(blocks(i)%inEnd(ii) - blocks(i)%inBeg(ii)), kk) &
+                                  * max(abs(blocks(i)%jnEnd(ii) - blocks(i)%jnBeg(ii)), kk) &
+                                  * max(abs(blocks(i)%knEnd(ii) - blocks(i)%knBeg(ii)), kk))
+                end if
+
+            end do Edges1to1
+
+        end do graphVertex
+
+        ! Metis has problems when the total number of cells or faces
+        ! used in the weights exceeds 2Gb. Therefore the sum of these
+        ! values is determined and an appropriate weight factor is
+        ! determined. Note that the type of nCellsTotal and nFacesTotal
+        ! is integer*8.
+
+        nCellsTotal = 0
+        nFacesTotal = 0
+        do i = 1, nBlocks
+            nCellsTotal = nCellsTotal + vwgt(1, i)
+            nFacesTotal = nFacesTotal + vwgt(2, i)
+        end do
+
+        if (nCellsTotal > 2147483647 .or. nFacesTotal > 2147483647) then
+            nCellsTotal = nCellsTotal / 2147483647 + 1
+            nFacesTotal = nFacesTotal / 2147483647 + 1
+
+            do i = 1, nBlocks
+                vwgt(1, i) = vwgt(1, i) / nCellsTotal
+                vwgt(2, i) = vwgt(2, i) / nFacesTotal
+            end do
+        end if
+
+        ! Loop over the number of attempts to partition the graph.
+        ! In the first attempt the communication is taken into account.
+        ! If not successful, i.e. empty partitions present, the metis
+        ! routine is called once more, but now with zero adjwgt. This
+        ! means that the communication cost is neglected and metis
+        ! normally gives a valid partitioning.
+        ! Initialize commNeglected to .false. This will change if in
+        ! the loop below the first call to metis is not successful.
+
+        commNeglected = .false.
+        attemptLoop: do ii = 1, 2
+
+            ! Copy ubvec to a float since if you use -r8 flag it gets
+            ! converted to real
+            ubvec_temp(1) = real(ubvec(1))
+            ubvec_temp(2) = real(ubvec(2))
+
+            call metisInterface(nVertex, nCon, xadj, adjncy, vwgt, &
+                                adjwgt, wgtflag, numflag, nParts, &
+                                ubvec_temp, options, edgecut, part)
+            ! Determine the number of blocks per processor.
+
+            nBlockPerProc = 0
+            do i = 1, nBlocks
+                nBlockPerProc(part(i)) = nBlockPerProc(part(i)) + 1
+            end do
+
+            ! Check for empty partitions.
+
+            emptyPartitions = .false.
+            do i = 0, nProc - 1
+                if (nBlockPerProc(i) == 0) emptyPartitions = .true.
+            end do
+
+            ! Exit the loop if no empty partitions are present or if
+            ! this is the second time this loop is executed.
+
+            if (ii == 2 .or. (.not. emptyPartitions)) exit attemptLoop
+
+            ! The first call to metis resulted in empty partitions.
+            ! Ignore the communication, i.e. set the number of
+            ! neighbors to 0, and try again.
+
+            commNeglected = .true.
+            xadj = 0
+
+        end do attemptLoop
+
+        ! Deallocate the memory for the graph except part.
+
+        deallocate (xadj, vwgt, adjncy, adjwgt, tmp, stat=ierr)
+        if (ierr /= 0) &
             call terminate("graphPartitioning", &
-            "Deallocation failure for part")
-    endif
+                           "Deallocation failure for graph variables")
+
+    end subroutine graphPartitioning
+
+    subroutine checkLoadBalance(cellsBalanced, facesBalanced)
+        !
+        !       checkLoadBalance determines whether or not the load balance
+        !       for the cells and faces is met.
+        !
+        use constants
+        use communication, only: adflow_comm_world, myid, nProc
+        use partitionMod, only: blocks, ubvec, part, nBlocks
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(out) :: cellsBalanced, facesBalanced
+        !
+        !      Local variables
+        !
+        integer(kind=intType) :: i, j
+        integer(kind=intType) :: nCellMax, nCellTol
+        integer(kind=intType) :: nFaceMax, nFaceTol
+
+        integer(kind=intType), dimension(nProc) :: nCell, nFace
+
+        integer(kind=8) :: nCellsEven, nFacesEven   ! 8 byte integers to
+        ! avoid overflow.
+
+        ! Initialize nCell and nFace to 0. These variables will contain
+        ! the number of cells and faces per partition (== processor)
+        ! respectively.
+
+        nCell = 0
+        nFace = 0
+
+        ! Determine the number of cells and faces per partition.
+        ! Note that part(i) is the processor id, which starts at 0.
+
+        do i = 1, nblocks
+            j = part(i) + 1
+            nCell(j) = nCell(j) + blocks(i)%nCell
+            nFace(j) = nFace(j) + blocks(i)%nFace
+        end do
+
+        ! Determine the desirable number of cells and faces per processor.
+
+        nCellsEven = nCell(1)
+        nFacesEven = nFace(1)
+
+        do i = 2, nProc
+            nCellsEven = nCellsEven + nCell(i)
+            nFacesEven = nFacesEven + nFace(i)
+        end do
+
+        nCellsEven = nCellsEven / nProc
+        nFacesEven = nFacesEven / nProc
+
+        ! Determine the maximum value of nCell and nFace
+        ! and substract the optimal value.
+
+        nCellMax = abs(maxval(nCell) - nCellsEven)
+        nFaceMax = abs(maxval(nFace) - nFacesEven)
+
+        ! Determine the tolerance for the cells and faces.
+
+        nCellTol = (ubvec(1) - one) * nCellsEven
+        nFaceTol = (ubvec(2) - one) * nFacesEven
+
+        ! Check whether the load balance values for the cells and faces
+        ! are met.
 
-    ! Determine the number of edges in the graph and the maximum
-    ! number for a vertex in the graph.
+        cellsBalanced = .true.
+        facesBalanced = .true.
 
-    nEdges = 0
-    nEdgesMax = 0
-    do i=1,nBlocks
-       ii        = blocks(i)%n1to1
-       nEdges    = nedges + ii
-       nEdgesMax = max(nedgesMax, ii)
-    enddo
+        if (nCellMax > nCellTol) cellsBalanced = .false.
+        if (nFaceMax > nFaceTol) facesBalanced = .false.
 
-    ! Initialize some values for the graph.
+        ! Determine the load imbalances for the cells and faces
+        ! and store it in ubvec.
 
-    nVertex  = nBlocks
-    nCon     = 2
-    wgtflag  = 1
-    numflag  = 0
-    nParts   = nProc
-    ubvec(1) = one + loadImbalance
-    ubvec(2) = one + loadImbalance
-    options  = 0
-
-    !  Allocate the memory to store/build the graph.
+        ubvec(1) = real(nCellMax, realType) &
+                   / real(nCellsEven, realType)
+        ubvec(2) = real(nFaceMax, realType) &
+                   / real(nFacesEven, realType)
 
-    allocate(xadj(0:nVertex), vwgt(nCon,nVertex), adjncy(nEdges), &
-         adjwgt(nEdges), part(nVertex), tmp(nEdgesMax), stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("graphPartitioning", &
-         "Memory allocation failure for graph variables")
-
-    ! Initialize xadj(0) to 0.
-    ! Furthermore initialize adjwgt to 0, as these values are
-    ! accumulated due to multiple subfaces between blocks.
-
-    xadj(0) = 0
-    adjwgt  = 0
-
-    ! Loop over the number of blocks to build the graph.
-
-    graphVertex: do i=1,nBlocks
-
-       ! Store the both vertex weights.
-
-       vwgt(1,i) = blocks(i)%nCell
-       vwgt(2,i) = blocks(i)%nFace
-
-       ! Sort the neighbors in increasing order and neglect the
-       ! communication to myself, i.e. do not allow an edge to myself.
-       ! The sorting is necessary, because a block might have several
-       ! subfaces with another block
-
-       nEdges = 0
-       do j=1,blocks(i)%n1to1
-          ii = blocks(i)%nBocos + j
-          if(blocks(i)%neighBlock(ii) /= i) then
-             nEdges = nEdges +1
-             tmp(nEdges) = blocks(i)%neighBlock(ii)
-          endif
-       enddo
-
-       ! Sort tmp in increasing order and get rid of the possible
-       ! multiple entries.
-
-       call qsortIntegers(tmp, nEdges)
-
-       ii = min(nEdges,1_intType)  ! Be aware of nEdges == 0
-       do j=2,nEdges
-          if(tmp(j) /= tmp(ii)) then
-             ii = ii + 1
-             tmp(ii) = tmp(j)
-          endif
-       enddo
-
-       ! Set nEdges to ii and update xadj(i).
-
-       nEdges  = ii
-       xadj(i) = xadj(i-1) + nEdges
-
-       ! Repeat the loop over the subfaces, but now store
-       ! the edge info.
-
-       Edges1to1: do j=1,blocks(i)%n1to1
-
-          ii = blocks(i)%nBocos + j
-          if(blocks(i)%neighBlock(ii) /= i) then
-
-             ! Search for the block ID and add the offset of xadj(i-1)
-             ! to obtain the correct index to store the edge info.
-             ! The -1 to the adjncy is present because C-numbering
-             ! is used when calling Metis.
+    end subroutine checkLoadBalance
 
-             jj = xadj(i-1) &
-                  + bsearchIntegers(blocks(i)%neighBlock(ii), tmp(1:nEdges))
-             adjncy(jj) = blocks(i)%neighBlock(ii) - 1
-
-             ! The weight equals the number of 1st and 2nd level halo
-             ! cells to be communicated between the blocks. The weights
-             ! are accumulated, as multiple subfaces between blocks are
-             ! possible.
-
-             kk = 1
-             adjwgt(jj) = adjwgt(jj) + 2 * &
-                  ( max(abs(blocks(i)%inEnd(ii) - blocks(i)%inBeg(ii)), kk) &
-                  * max(abs(blocks(i)%jnEnd(ii) - blocks(i)%jnBeg(ii)), kk) &
-                  * max(abs(blocks(i)%knEnd(ii) - blocks(i)%knBeg(ii)), kk) )
-          endif
-
-       enddo Edges1to1
-
-    enddo graphVertex
-
-    ! Metis has problems when the total number of cells or faces
-    ! used in the weights exceeds 2Gb. Therefore the sum of these
-    ! values is determined and an appropriate weight factor is
-    ! determined. Note that the type of nCellsTotal and nFacesTotal
-    ! is integer*8.
-
-    nCellsTotal = 0
-    nFacesTotal = 0
-    do i=1,nBlocks
-       nCellsTotal = nCellsTotal + vwgt(1,i)
-       nFacesTotal = nFacesTotal + vwgt(2,i)
-    enddo
-
-    if(nCellsTotal > 2147483647 .or. nFacesTotal > 2147483647) then
-       nCellsTotal = nCellsTotal/2147483647 + 1
-       nFacesTotal = nFacesTotal/2147483647 + 1
-
-       do i=1,nBlocks
-          vwgt(1,i) = vwgt(1,i)/nCellsTotal
-          vwgt(2,i) = vwgt(2,i)/nFacesTotal
-       enddo
-    endif
-
-    ! Loop over the number of attempts to partition the graph.
-    ! In the first attempt the communication is taken into account.
-    ! If not successful, i.e. empty partitions present, the metis
-    ! routine is called once more, but now with zero adjwgt. This
-    ! means that the communication cost is neglected and metis
-    ! normally gives a valid partitioning.
-    ! Initialize commNeglected to .false. This will change if in
-    ! the loop below the first call to metis is not successful.
-
-    commNeglected = .false.
-    attemptLoop: do ii=1,2
-
-       ! Copy ubvec to a float since if you use -r8 flag it gets
-       ! converted to real
-       ubvec_temp(1) = real(ubvec(1))
-       ubvec_temp(2) = real(ubvec(2))
-
-       call metisInterface(nVertex, nCon, xadj, adjncy, vwgt, &
-            adjwgt, wgtflag, numflag, nParts,  &
-            ubvec_temp, options, edgecut, part)
-       ! Determine the number of blocks per processor.
-
-       nBlockPerProc = 0
-       do i=1,nBlocks
-          nBlockPerProc(part(i)) = nBlockPerProc(part(i)) + 1
-       enddo
-
-       ! Check for empty partitions.
+    subroutine splitBlock(compBlock, nSub, nCells, ranges)
+        !
+        !       splitBlock tries to split the given computational block into
+        !       the desired number of subblocks nSub. However it can happen
+        !       that nSub is a strange number and a different splitting is
+        !       performed. On return, nSub contains the actual number into the
+        !       block is split. This number is smaller or equal to nSub on
+        !       entry. As it is possible that the computational block itself
+        !       is a subblock of an original cgns block, on return ranges will
+        !       contain the nodal ranges of the subblocks in the original cgns
+        !       block. The splitting attempts to keep the needed multigrid
+        !       capabilities as much as possible.
+        !       A recursive bisection algorithm is used.
+        !
+        use constants
+        use inputIteration, only: smoother
+        use partitionMod, onlY: distributionBlockType
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        type(distributionBlockType), intent(in) :: compBlock
+        integer(kind=intType), intent(in) :: nCells
+        integer(kind=intType), intent(inout) :: nSub
 
-       emptyPartitions = .false.
-       do i=0,nProc-1
-          if(nBlockPerProc(i) == 0) emptyPartitions = .true.
-       enddo
+        integer(kind=intType), dimension(nSub, 3, 2), intent(out) :: ranges
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nLevels, level, nn, mm, nTarget
+        integer(kind=intType) :: nSplit, nSplitNew
 
-       ! Exit the loop if no empty partitions are present or if
-       ! this is the second time this loop is executed.
+        integer(kind=intType), dimension(nSub) :: nSubblocks
 
-       if(ii == 2 .or. (.not. emptyPartitions)) exit attemptLoop
+        logical, dimension(3) :: viscousDir
 
-       ! The first call to metis resulted in empty partitions.
-       ! Ignore the communication, i.e. set the number of
-       ! neighbors to 0, and try again.
+        ! Determine the viscous directions of the block.
 
-       commNeglected = .true.
-       xadj          = 0
-
-    enddo attemptLoop
-
-    ! Deallocate the memory for the graph except part.
-
-    deallocate(xadj, vwgt, adjncy, adjwgt, tmp, stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("graphPartitioning", &
-         "Deallocation failure for graph variables")
-
-  end subroutine graphPartitioning
-
-  subroutine checkLoadBalance(cellsBalanced, facesBalanced)
-    !
-    !       checkLoadBalance determines whether or not the load balance
-    !       for the cells and faces is met.
-    !
-    use constants
-    use communication, only : adflow_comm_world, myid, nProc
-    use partitionMod, only : blocks, ubvec, part, nBlocks
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(out) :: cellsBalanced, facesBalanced
-    !
-    !      Local variables
-    !
-    integer(kind=intType) :: i, j
-    integer(kind=intType) :: nCellMax, nCellTol
-    integer(kind=intType) :: nFaceMax, nFaceTol
-
-    integer(kind=intType), dimension(nProc) :: nCell, nFace
-
-    integer(kind=8) :: nCellsEven, nFacesEven   ! 8 byte integers to
-    ! avoid overflow.
-
-    ! Initialize nCell and nFace to 0. These variables will contain
-    ! the number of cells and faces per partition (== processor)
-    ! respectively.
-
-    nCell = 0
-    nFace = 0
-
-    ! Determine the number of cells and faces per partition.
-    ! Note that part(i) is the processor id, which starts at 0.
-
-    do i=1,nblocks
-       j = part(i) + 1
-       nCell(j) = nCell(j) + blocks(i)%nCell
-       nFace(j) = nFace(j) + blocks(i)%nFace
-    enddo
-
-    ! Determine the desirable number of cells and faces per processor.
-
-    nCellsEven = nCell(1)
-    nFacesEven = nFace(1)
-
-    do i=2,nProc
-       nCellsEven = nCellsEven + nCell(i)
-       nFacesEven = nFacesEven + nFace(i)
-    enddo
+        viscousDir = .false.
+        do nn = 1, compBlock%nBocos
 
-    nCellsEven = nCellsEven/nProc
-    nFacesEven = nFacesEven/nProc
+            ! Check for viscous boundary conditions and if found set the
+            ! corresponding direction to viscous.
 
-    ! Determine the maximum value of nCell and nFace
-    ! and substract the optimal value.
+            if (compBlock%BCType(nn) == NSWallAdiabatic .or. &
+                compBlock%BCType(nn) == NSWallIsothermal) then
 
-    nCellMax = abs(maxval(nCell) - nCellsEven)
-    nFaceMax = abs(maxval(nFace) - nFacesEven)
+                select case (compBlock%BCFaceID(nn))
+                case (iMin, iMax)
+                    viscousDir(1) = .true.
 
-    ! Determine the tolerance for the cells and faces.
+                case (jMin, jMax)
+                    viscousDir(2) = .true.
 
-    nCellTol = (ubvec(1) - one)*nCellsEven
-    nFaceTol = (ubvec(2) - one)*nFacesEven
+                case (kMin, kMax)
+                    viscousDir(3) = .true.
+                end select
+            end if
+        end do
 
-    ! Check whether the load balance values for the cells and faces
-    ! are met.
+        ! Set viscousDir to .false. if an explicit smoother is used.
 
-    cellsBalanced = .true.
-    facesBalanced = .true.
+        if (smoother == RungeKutta) viscousDir = .false.
 
-    if(nCellMax > nCellTol) cellsBalanced = .false.
-    if(nFaceMax > nFaceTol) facesBalanced = .false.
+        ! Determine the number of levels in the bisection.
 
-    ! Determine the load imbalances for the cells and faces
-    ! and store it in ubvec.
+        nLevels = log(real(nSub, realType)) / log(two)
+        if (2**nLevels < nSub) nLevels = nLevels + 1
 
-    ubvec(1) = real(nCellMax,realType) &
-         / real(nCellsEven,realType)
-    ubvec(2) = real(nFaceMax,realType) &
-         / real(nFacesEven,realType)
+        ! Initialize the range of the first splittable block to the
+        ! entire block.
 
-  end subroutine checkLoadBalance
+        ranges(1, 1, 1) = 1; ranges(1, 1, 2) = compBlock%il
+        ranges(1, 2, 1) = 1; ranges(1, 2, 2) = compBlock%jl
+        ranges(1, 3, 1) = 1; ranges(1, 3, 2) = compBlock%kl
 
-  subroutine splitBlock(compBlock, nSub, nCells, ranges)
-    !
-    !       splitBlock tries to split the given computational block into
-    !       the desired number of subblocks nSub. However it can happen
-    !       that nSub is a strange number and a different splitting is
-    !       performed. On return, nSub contains the actual number into the
-    !       block is split. This number is smaller or equal to nSub on
-    !       entry. As it is possible that the computational block itself
-    !       is a subblock of an original cgns block, on return ranges will
-    !       contain the nodal ranges of the subblocks in the original cgns
-    !       block. The splitting attempts to keep the needed multigrid
-    !       capabilities as much as possible.
-    !       A recursive bisection algorithm is used.
-    !
-    use constants
-    use inputIteration, only : smoother
-    use partitionMod, onlY : distributionBlockType
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    type(distributionBlockType), intent(in)    :: compBlock
-    integer(kind=intType),       intent(in)    :: nCells
-    integer(kind=intType),       intent(inout) :: nSub
+        ! Initialize the number of blocks to be split to 1 and the
+        ! number of blocks it should be split into to nSub.
 
-    integer(kind=intType), dimension(nSub,3,2), intent(out) :: ranges
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nLevels, level, nn, mm, nTarget
-    integer(kind=intType) :: nSplit, nSplitNew
+        nSplit = 1
+        nSubblocks(1) = nSub
 
-    integer(kind=intType), dimension(nSub) :: nSubblocks
+        ! Loop over the number of levels
 
-    logical, dimension(3) :: viscousDir
+        levelLoop: do level = 1, nLevels
 
-    ! Determine the viscous directions of the block.
+            ! Initialize nSplitNew to nSplit; it will be used to store the
+            ! position of the new subblocks. At the end of this loop this
+            ! value will be set to nSplit for the new level.
 
-    viscousDir = .false.
-    do nn=1,compBlock%nBocos
+            nSplitNew = nSplit
 
-       ! Check for viscous boundary conditions and if found set the
-       ! corresponding direction to viscous.
+            ! Loop over the number of blocks to be split.
 
-       if(compBlock%BCType(nn) == NSWallAdiabatic .or. &
-            compBlock%BCType(nn) == NSWallIsothermal) then
+            blockSplitLoop: do nn = 1, nSplit
 
-          select case (compBlock%BCFaceID(nn))
-          case (iMin,iMax)
-             viscousDir(1) = .true.
+                ! Determine the situation we are having it.
 
-          case (jMin,jMax)
-             viscousDir(2) = .true.
+                select case (nSubblocks(nn))
 
-          case (kMin,kMax)
-             viscousDir(3) = .true.
-          end select
-       endif
-    enddo
+                case (2_intType, 3_intType)
 
-    ! Set viscousDir to .false. if an explicit smoother is used.
+                    ! Subblock must be split in either two or three. Store
+                    ! the number into the new subblocks must be in nSubblocks.
+                    ! As the routine split2block puts the subblock with the
+                    ! target number of cells in position nSplitNew, this
+                    ! block should not be split any further and thus gets a
+                    ! value of 1. Position nn gets the old value -1, which
+                    ! will be either 1 or 2
 
-    if(smoother == RungeKutta) viscousDir = .false.
+                    nSplitNew = nSplitNew + 1
+                    nSubblocks(nSplitNew) = 1
+                    nSubblocks(nn) = nSubblocks(nn) - 1
 
-    ! Determine the number of levels in the bisection.
+                    ! Split the block into two, where a block with nCells
+                    ! should be split off.
 
-    nLevels = log(real(nSub,realType))/log(two)
-    if(2**nLevels < nSub) nLevels = nLevels + 1
+                    call split2block(nSub, nn, nSplitNew, nCells, ranges, &
+                                     viscousDir)
 
-    ! Initialize the range of the first splittable block to the
-    ! entire block.
+                    !===========================================================
 
-    ranges(1,1,1) = 1; ranges(1,1,2) = compBlock%il
-    ranges(1,2,1) = 1; ranges(1,2,2) = compBlock%jl
-    ranges(1,3,1) = 1; ranges(1,3,2) = compBlock%kl
+                case (4_intType:)
 
-    ! Initialize the number of blocks to be split to 1 and the
-    ! number of blocks it should be split into to nSub.
+                    ! Subblock must be split in 4 or more. First determine the
+                    ! number of subblocks into the two new subblocks must be
+                    ! split in the next round. Save the old value of
+                    ! nSubblocks(nn) in mm.
 
-    nSplit = 1
-    nSubblocks(1) = nSub
+                    mm = nSubblocks(nn)
 
-    ! Loop over the number of levels
+                    nSplitNew = nSplitNew + 1
+                    nSubblocks(nSplitNew) = nSubblocks(nn) / 2
+                    nSubblocks(nn) = nSubblocks(nn) &
+                                     - nSubblocks(nSplitNew)
 
-    levelLoop: do level=1,nLevels
+                    ! Determine the number of cells for the new subblock
+                    ! nSplitNew.
 
-       ! Initialize nSplitNew to nSplit; it will be used to store the
-       ! position of the new subblocks. At the end of this loop this
-       ! value will be set to nSplit for the new level.
+                    nTarget = (ranges(nn, 1, 2) - ranges(nn, 1, 1)) &
+                              * (ranges(nn, 2, 2) - ranges(nn, 2, 1)) &
+                              * (ranges(nn, 3, 2) - ranges(nn, 3, 1))
+                    nTarget = nTarget * (real(nSubblocks(nSplitNew), realType) &
+                                         / real(mm, realType))
 
-       nSplitNew = nSplit
+                    ! Split the block into two, where one block should get
+                    ! approximately nTarget cells.
 
-       ! Loop over the number of blocks to be split.
+                    call split2block(nSub, nn, nSplitNew, nTarget, ranges, &
+                                     viscousDir)
 
-       blockSplitLoop: do nn=1,nSplit
+                end select
 
-          ! Determine the situation we are having it.
+            end do blockSplitLoop
 
-          select case (nSubblocks(nn))
+            ! Set nSplit to nSplitNew for the next level of bisection.
 
-          case (2_intType,3_intType)
+            nSplit = nSplitNew
 
-             ! Subblock must be split in either two or three. Store
-             ! the number into the new subblocks must be in nSubblocks.
-             ! As the routine split2block puts the subblock with the
-             ! target number of cells in position nSplitNew, this
-             ! block should not be split any further and thus gets a
-             ! value of 1. Position nn gets the old value -1, which
-             ! will be either 1 or 2
+        end do levelLoop
 
-             nSplitNew             = nSplitNew + 1
-             nSubblocks(nSplitNew) = 1
-             nSubblocks(nn)        = nSubblocks(nn) - 1
+        ! Get rid of the possible zero cell ranges. In that case
+        ! nSub will change as well. Add the offset of the computational
+        ! block, as this might be a subblock itself.
 
-             ! Split the block into two, where a block with nCells
-             ! should be split off.
+        mm = nSub
+        nSub = 0
+        do nn = 1, mm
 
-             call split2block(nSub, nn, nSplitNew, nCells, ranges, &
-                  viscousDir)
+            ! Test whether this is a non-zero cell subblock.
 
-             !===========================================================
+            if (ranges(nn, 1, 2) > ranges(nn, 1, 1) .and. &
+                ranges(nn, 2, 2) > ranges(nn, 2, 1) .and. &
+                ranges(nn, 3, 2) > ranges(nn, 3, 1)) then
 
-         case (4_intType:)
+                ! This is a valid subblock. Update nSub and copy the range
+                ! including the offset of the computational block.
 
+                nSub = nSub + 1
 
-             ! Subblock must be split in 4 or more. First determine the
-             ! number of subblocks into the two new subblocks must be
-             ! split in the next round. Save the old value of
-             ! nSubblocks(nn) in mm.
+                ranges(nSub, 1, 1) = ranges(nn, 1, 1) + compBlock%iBegor - 1
+                ranges(nSub, 2, 1) = ranges(nn, 2, 1) + compBlock%jBegor - 1
+                ranges(nSub, 3, 1) = ranges(nn, 3, 1) + compBlock%kBegor - 1
 
-             mm = nSubblocks(nn)
+                ranges(nSub, 1, 2) = ranges(nn, 1, 2) + compBlock%iBegor - 1
+                ranges(nSub, 2, 2) = ranges(nn, 2, 2) + compBlock%jBegor - 1
+                ranges(nSub, 3, 2) = ranges(nn, 3, 2) + compBlock%kBegor - 1
 
-             nSplitNew             = nSplitNew + 1
-             nSubblocks(nSplitNew) = nSubblocks(nn)/2
-             nSubblocks(nn)        = nSubblocks(nn) &
-                  - nSubblocks(nSplitNew)
+            end if
+        end do
 
-             ! Determine the number of cells for the new subblock
-             ! nSplitNew.
+    end subroutine splitBlock
 
-             nTarget = (ranges(nn,1,2) - ranges(nn,1,1)) &
-                  * (ranges(nn,2,2) - ranges(nn,2,1)) &
-                  * (ranges(nn,3,2) - ranges(nn,3,1))
-             nTarget = nTarget*(real(nSubblocks(nSplitNew),realType) &
-                  /          real(mm,realType))
+    !========================================================================
 
-             ! Split the block into two, where one block should get
-             ! approximately nTarget cells.
+    subroutine split2block(nSub, n1, n2, nTarget, ranges, &
+                           viscousDir)
+        !
+        !       split2block splits the block stored in ranges(n1,:,:) into
+        !       two. The new blocks are stored in ranges(n1,:,:) and
+        !       ranges(n2,:,:), where the n2 will store the block with the
+        !      * number of cells closest to nTarget.
+        !
+        use inputIteration
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nSub, n1, n2, nTarget
 
-             call split2block(nSub, nn, nSplitNew, nTarget, ranges, &
-                  viscousDir)
+        integer(kind=intType), dimension(nSub, 3, 2), intent(inout) :: ranges
 
-          end select
+        logical, dimension(3), intent(in) :: viscousDir
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nMG, mm, ii, jj, i, iiOpt, iOpt
+        integer(kind=intType) :: nCellOpt
 
-       enddo blockSplitLoop
+        integer(kind=intType), dimension(3) :: nc, mc, c, nf, prefDir
 
-       ! Set nSplit to nSplitNew for the next level of bisection.
+        ! Determine the maximum of the number of mg levels needed in the
+        ! cycle and the mg start level. Store this value in nMG.
 
-       nSplit = nSplitNew
+        nMG = max(nMGLevels, mgStartlevel)
 
-    enddo levelLoop
+        ! Store the number of cells of the original subblock in nc.
 
-    ! Get rid of the possible zero cell ranges. In that case
-    ! nSub will change as well. Add the offset of the computational
-    ! block, as this might be a subblock itself.
+        nc(1) = ranges(n1, 1, 2) - ranges(n1, 1, 1)
+        nc(2) = ranges(n1, 2, 2) - ranges(n1, 2, 1)
+        nc(3) = ranges(n1, 3, 2) - ranges(n1, 3, 1)
 
-    mm   = nSub
-    nSub = 0
-    do nn=1,mm
+        ! Determine its multigrid capabilities in the three directions.
 
-       ! Test whether this is a non-zero cell subblock.
+        loopDir: do mm = 1, 3
 
-       if(ranges(nn,1,2) > ranges(nn,1,1) .and. &
-            ranges(nn,2,2) > ranges(nn,2,1) .and. &
-            ranges(nn,3,2) > ranges(nn,3,1)) then
+            ! Initialize nf(mm), number of mg levels, to 1 and ii to 2.
+            ! nf is used as a temporary buffer.
 
-          ! This is a valid subblock. Update nSub and copy the range
-          ! including the offset of the computational block.
+            nf(mm) = 1
+            ii = 2
 
-          nSub = nSub + 1
+            ! Loop to determine the number of mg levels.
 
-          ranges(nSub,1,1) = ranges(nn,1,1) + compBlock%iBegor - 1
-          ranges(nSub,2,1) = ranges(nn,2,1) + compBlock%jBegor - 1
-          ranges(nSub,3,1) = ranges(nn,3,1) + compBlock%kBegor - 1
+            do
+                ! Conditions to exit the loop. The first condition is the
+                ! truncation to the maximum number of mg levels needed by the
+                ! iterative algorithm of the solver. The second condition is
+                ! simply that the number of cells do not allow more standard
+                ! mg levels.
 
-          ranges(nSub,1,2) = ranges(nn,1,2) + compBlock%iBegor - 1
-          ranges(nSub,2,2) = ranges(nn,2,2) + compBlock%jBegor - 1
-          ranges(nSub,3,2) = ranges(nn,3,2) + compBlock%kBegor - 1
+                if (nf(mm) == nMG) exit
+                if (mod(nc(mm), ii) /= 0) exit
 
-       endif
-    enddo
+                ! Update nf(mm) and multiply ii by 2 to test for the
+                ! next level.
 
-  end subroutine splitBlock
+                nf(mm) = nf(mm) + 1
+                ii = 2 * ii
+            end do
 
-  !========================================================================
+            ! Determine the constants c(mm) and mc(mm), such that
+            ! nc(mm) = c(mm)*mc(mm). Mc(mm) is the number of cells in every
+            ! direction per supercell. A supercell is the smallest possible
+            ! block able to do multigridding, i.e. mc(mm) = 2**(nf(mm)-1)
+            ! if the block allows for nMG multigrid levels.
 
-  subroutine split2block(nSub, n1, n2, nTarget, ranges, &
-       viscousDir)
-    !
-    !       split2block splits the block stored in ranges(n1,:,:) into
-    !       two. The new blocks are stored in ranges(n1,:,:) and
-    !       ranges(n2,:,:), where the n2 will store the block with the
-    !      * number of cells closest to nTarget.
-    !
-    use inputIteration
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nSub, n1, n2, nTarget
+            c(mm) = 2 * nc(mm) / ii
+            mc(mm) = nc(mm) / c(mm)
 
-    integer(kind=intType), dimension(nSub,3,2), intent(inout) :: ranges
+        end do loopDir
 
-    logical, dimension(3), intent(in) :: viscousDir
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nMG, mm, ii, jj, i, iiOpt, iOpt
-    integer(kind=intType) :: nCellOpt
+        ! Test whether the multigrid requirements are not too restrictive
+        ! for this subblock. If they are, relax them, if possible.
 
-    integer(kind=intType), dimension(3) :: nc, mc, c, nf, prefDir
+        if (nMG > 1 .and. max(c(1), c(2), c(3)) == 1) then
+            c(1) = c(1) * 2; c(2) = c(2) * 2; c(3) = c(3) * 2
+            mc(1) = mc(1) / 2; mc(2) = mc(2) / 2; mc(3) = mc(3) / 2
+        end if
 
-    ! Determine the maximum of the number of mg levels needed in the
-    ! cycle and the mg start level. Store this value in nMG.
+        ! Determine the number of faces on a slice in the
+        ! three directions.
 
-    nMG = max(nMGLevels, mgStartlevel)
+        nf(1) = nc(2) * nc(3)
+        nf(2) = nc(1) * nc(3)
+        nf(3) = nc(1) * nc(2)
 
-    ! Store the number of cells of the original subblock in nc.
+        ! Determine the preferred split direction; this the direction
+        ! which results in the least amount of additional faces. This is
+        ! important, because the number of flux evaluations is
+        ! proportional to the number of faces.
 
-    nc(1) = ranges(n1,1,2) - ranges(n1,1,1)
-    nc(2) = ranges(n1,2,2) - ranges(n1,2,1)
-    nc(3) = ranges(n1,3,2) - ranges(n1,3,1)
-
-    ! Determine its multigrid capabilities in the three directions.
-
-    loopDir: do mm=1,3
-
-       ! Initialize nf(mm), number of mg levels, to 1 and ii to 2.
-       ! nf is used as a temporary buffer.
-
-       nf(mm) = 1
-       ii     = 2
-
-       ! Loop to determine the number of mg levels.
-
-       do
-          ! Conditions to exit the loop. The first condition is the
-          ! truncation to the maximum number of mg levels needed by the
-          ! iterative algorithm of the solver. The second condition is
-          ! simply that the number of cells do not allow more standard
-          ! mg levels.
-
-          if(nf(mm) == nMG) exit
-          if(mod(nc(mm),ii) /= 0) exit
-
-          ! Update nf(mm) and multiply ii by 2 to test for the
-          ! next level.
-
-          nf(mm) = nf(mm) +1
-          ii     = 2*ii
-       enddo
-
-       ! Determine the constants c(mm) and mc(mm), such that
-       ! nc(mm) = c(mm)*mc(mm). Mc(mm) is the number of cells in every
-       ! direction per supercell. A supercell is the smallest possible
-       ! block able to do multigridding, i.e. mc(mm) = 2**(nf(mm)-1)
-       ! if the block allows for nMG multigrid levels.
-
-       c(mm)  = 2*nc(mm)/ii
-       mc(mm) = nc(mm)/c(mm)
-
-    enddo loopDir
-
-    ! Test whether the multigrid requirements are not too restrictive
-    ! for this subblock. If they are, relax them, if possible.
-
-    if(nMG > 1 .and. max(c(1),c(2),c(3)) == 1) then
-       c(1) =  c(1)*2;  c(2) =  c(2)*2;  c(3) =  c(3)*2
-       mc(1) = mc(1)/2; mc(2) = mc(2)/2; mc(3) = mc(3)/2
-    endif
-
-    ! Determine the number of faces on a slice in the
-    ! three directions.
-
-    nf(1) = nc(2)*nc(3)
-    nf(2) = nc(1)*nc(3)
-    nf(3) = nc(1)*nc(2)
-
-    ! Determine the preferred split direction; this the direction
-    ! which results in the least amount of additional faces. This is
-    ! important, because the number of flux evaluations is
-    ! proportional to the number of faces.
-
-    if(nf(1) < nf(2)) then
-       if(nf(1) < nf(3)) then
-          prefDir(1) = 1
-          if(nf(2) < nf(3)) then
-             prefDir(2) = 2
-             prefDir(3) = 3
-          else
-             prefDir(2) = 3
-             prefDir(3) = 2
-          endif
-       else
-          prefDir(1) = 3
-          prefDir(2) = 1
-          prefDir(3) = 2
-       endif
-    else if(nf(2) < nf(3)) then
-       prefDir(1) = 2
-       if(nf(1) < nf(3)) then
-          prefDir(2) = 1
-          prefDir(3) = 3
-       else
-          prefDir(2) = 3
-          prefDir(3) = 1
-       endif
-    else
-       prefDir(1) = 3
-       prefDir(2) = 2
-       prefDir(3) = 1
-    endif
-
-    ! Take viscousDir into account to determine the preference
-    ! direction. For implicit methods it may not be a good idea to
-    ! split the block parallel to a viscous wall.
-
-    ! Check the 1st preference direction. If it is viscous swap
-    ! it with the best inviscid direction, if available.
-
-    if( viscousDir(prefDir(1)) ) then
-       if(.not. viscousDir(prefDir(2)) ) then
-          mm         = prefDir(1)
-          prefDir(1) = prefDir(2)
-          prefDir(2) = mm
-       else if(.not. viscousDir(prefDir(3)) ) then
-          mm         = prefDir(1)
-          prefDir(1) = prefDir(3)
-          prefDir(3) = mm
-       endif
-    endif
-
-    ! Check the second preference direction. If it is viscous
-    ! try to swap it whith the third direction.
-
-    if(viscousDir(prefDir(2)) .and. &
-         .not. viscousDir(prefDir(3)) ) then
-       mm         = prefDir(2)
-       prefDir(2) = prefDir(3)
-       prefDir(3) = mm
-    endif
-
-    ! Determine the splitting which is best. Initialize nCellOpt
-    ! to a ridiculously high number.
-
-    nCellOpt = 10*nc(1)*nc(2)*nc(3)
-
-    ! Loop over the three directions.
-
-    do mm=1,3
-
-       ! Store the current direction considered in ii and determine
-       ! the index i, which gives a number of cells as close as
-       ! possible to the desired number.
-
-       ii = prefDir(mm)
-       i  = nint(real(nTarget,realType) &
-            /      real(mc(ii)*nf(ii),realType),intType)
-       i  = max(i,1_intType)
-
-       ! Determine whether the corresponding number of cells is
-       ! closer to the target number than the currently stored value.
-       ! If so, store the settings for this splitting.
-
-       jj = i*mc(ii)*nf(ii)
-       if(abs(jj - nTarget) < abs(nCellOpt - nTarget)) then
-          nCellOpt = jj
-          iiOpt    = ii
-          iOpt     = i
-       endif
-
-    enddo
-
-    ! Copy the range of subblock n1 into n2 as initialization.
-
-    ranges(n2,1,1) = ranges(n1,1,1); ranges(n2,1,2) = ranges(n1,1,2)
-    ranges(n2,2,1) = ranges(n1,2,1); ranges(n2,2,2) = ranges(n1,2,2)
-    ranges(n2,3,1) = ranges(n1,3,1); ranges(n2,3,2) = ranges(n1,3,2)
-
-    ! Determine the nodal index where the splitting takes place.
-
-    jj = iOpt*mc(iiOpt) + ranges(n1,iiOpt,1)
-
-    ! Adapt the corresponding indices of the new subblocks n1 and
-    ! n2, such that it corresponds to the new situation; n2 should
-    ! contain the subblock, which contains a number of cells as
-    ! close as possible to nTarget.
-
-    ranges(n2,iiOpt,2) = jj
-    ranges(n1,iiOpt,1) = jj
-
-  end subroutine split2block
-
-  subroutine reallocSubfaceMemory(ii,nAlloc)
-    !
-    !       reallocSubfaceMemory reallocates the memory to store the
-    !       subface information for the given block ii. On entry nAlloc
-    !       contains the current number of allocated subfaces, on exit
-    !       this is updated to the new number.
-    !
-    use constants
-    use partitionmod, only : blocks
-    use utils, only : reallocateInteger
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)    :: ii
-    integer(kind=intType), intent(inout) :: nAlloc
-    !
-    !      Local arguments.
-    !
-    integer(kind=intType) :: nOld, nNew
-    !
-    ! Store the old value of the allocated array and determine the
-    ! new one. Store this new value in nAlloc.
-
-    nOld   = nAlloc
-    nNew   = nOld + 6
-    nAlloc = nNew
-
-    ! Reallocate the memory.
-
-    call reallocateInteger(blocks(ii)%BCType,      nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%BCFaceID,    nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%cgnsSubface, nNew, nOld, .true.)
-
-    call reallocateInteger(blocks(ii)%inBeg, nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%jnBeg, nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%knBeg, nNew, nOld, .true.)
-
-    call reallocateInteger(blocks(ii)%inEnd, nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%jnEnd, nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%knEnd, nNew, nOld, .true.)
-
-    call reallocateInteger(blocks(ii)%dinBeg, nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%djnBeg, nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%dknBeg, nNew, nOld, .true.)
-
-    call reallocateInteger(blocks(ii)%dinEnd, nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%djnEnd, nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%dknEnd, nNew, nOld, .true.)
-
-    call reallocateInteger(blocks(ii)%neighBlock, nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%groupNum,   nNew, nOld, .true.)
-
-    call reallocateInteger(blocks(ii)%l1, nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%l2, nNew, nOld, .true.)
-    call reallocateInteger(blocks(ii)%l3, nNew, nOld, .true.)
-
-  end subroutine reallocSubfaceMemory
+        if (nf(1) < nf(2)) then
+            if (nf(1) < nf(3)) then
+                prefDir(1) = 1
+                if (nf(2) < nf(3)) then
+                    prefDir(2) = 2
+                    prefDir(3) = 3
+                else
+                    prefDir(2) = 3
+                    prefDir(3) = 2
+                end if
+            else
+                prefDir(1) = 3
+                prefDir(2) = 1
+                prefDir(3) = 2
+            end if
+        else if (nf(2) < nf(3)) then
+            prefDir(1) = 2
+            if (nf(1) < nf(3)) then
+                prefDir(2) = 1
+                prefDir(3) = 3
+            else
+                prefDir(2) = 3
+                prefDir(3) = 1
+            end if
+        else
+            prefDir(1) = 3
+            prefDir(2) = 2
+            prefDir(3) = 1
+        end if
+
+        ! Take viscousDir into account to determine the preference
+        ! direction. For implicit methods it may not be a good idea to
+        ! split the block parallel to a viscous wall.
+
+        ! Check the 1st preference direction. If it is viscous swap
+        ! it with the best inviscid direction, if available.
+
+        if (viscousDir(prefDir(1))) then
+            if (.not. viscousDir(prefDir(2))) then
+                mm = prefDir(1)
+                prefDir(1) = prefDir(2)
+                prefDir(2) = mm
+            else if (.not. viscousDir(prefDir(3))) then
+                mm = prefDir(1)
+                prefDir(1) = prefDir(3)
+                prefDir(3) = mm
+            end if
+        end if
+
+        ! Check the second preference direction. If it is viscous
+        ! try to swap it whith the third direction.
+
+        if (viscousDir(prefDir(2)) .and. &
+            .not. viscousDir(prefDir(3))) then
+            mm = prefDir(2)
+            prefDir(2) = prefDir(3)
+            prefDir(3) = mm
+        end if
+
+        ! Determine the splitting which is best. Initialize nCellOpt
+        ! to a ridiculously high number.
+
+        nCellOpt = 10 * nc(1) * nc(2) * nc(3)
+
+        ! Loop over the three directions.
+
+        do mm = 1, 3
+
+            ! Store the current direction considered in ii and determine
+            ! the index i, which gives a number of cells as close as
+            ! possible to the desired number.
+
+            ii = prefDir(mm)
+            i = nint(real(nTarget, realType) &
+                     / real(mc(ii) * nf(ii), realType), intType)
+            i = max(i, 1_intType)
+
+            ! Determine whether the corresponding number of cells is
+            ! closer to the target number than the currently stored value.
+            ! If so, store the settings for this splitting.
+
+            jj = i * mc(ii) * nf(ii)
+            if (abs(jj - nTarget) < abs(nCellOpt - nTarget)) then
+                nCellOpt = jj
+                iiOpt = ii
+                iOpt = i
+            end if
+
+        end do
+
+        ! Copy the range of subblock n1 into n2 as initialization.
+
+        ranges(n2, 1, 1) = ranges(n1, 1, 1); ranges(n2, 1, 2) = ranges(n1, 1, 2)
+        ranges(n2, 2, 1) = ranges(n1, 2, 1); ranges(n2, 2, 2) = ranges(n1, 2, 2)
+        ranges(n2, 3, 1) = ranges(n1, 3, 1); ranges(n2, 3, 2) = ranges(n1, 3, 2)
+
+        ! Determine the nodal index where the splitting takes place.
+
+        jj = iOpt * mc(iiOpt) + ranges(n1, iiOpt, 1)
+
+        ! Adapt the corresponding indices of the new subblocks n1 and
+        ! n2, such that it corresponds to the new situation; n2 should
+        ! contain the subblock, which contains a number of cells as
+        ! close as possible to nTarget.
+
+        ranges(n2, iiOpt, 2) = jj
+        ranges(n1, iiOpt, 1) = jj
+
+    end subroutine split2block
+
+    subroutine reallocSubfaceMemory(ii, nAlloc)
+        !
+        !       reallocSubfaceMemory reallocates the memory to store the
+        !       subface information for the given block ii. On entry nAlloc
+        !       contains the current number of allocated subfaces, on exit
+        !       this is updated to the new number.
+        !
+        use constants
+        use partitionmod, only: blocks
+        use utils, only: reallocateInteger
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: ii
+        integer(kind=intType), intent(inout) :: nAlloc
+        !
+        !      Local arguments.
+        !
+        integer(kind=intType) :: nOld, nNew
+        !
+        ! Store the old value of the allocated array and determine the
+        ! new one. Store this new value in nAlloc.
+
+        nOld = nAlloc
+        nNew = nOld + 6
+        nAlloc = nNew
+
+        ! Reallocate the memory.
+
+        call reallocateInteger(blocks(ii)%BCType, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%BCFaceID, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%cgnsSubface, nNew, nOld, .true.)
+
+        call reallocateInteger(blocks(ii)%inBeg, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%jnBeg, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%knBeg, nNew, nOld, .true.)
+
+        call reallocateInteger(blocks(ii)%inEnd, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%jnEnd, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%knEnd, nNew, nOld, .true.)
+
+        call reallocateInteger(blocks(ii)%dinBeg, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%djnBeg, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%dknBeg, nNew, nOld, .true.)
+
+        call reallocateInteger(blocks(ii)%dinEnd, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%djnEnd, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%dknEnd, nNew, nOld, .true.)
+
+        call reallocateInteger(blocks(ii)%neighBlock, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%groupNum, nNew, nOld, .true.)
+
+        call reallocateInteger(blocks(ii)%l1, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%l2, nNew, nOld, .true.)
+        call reallocateInteger(blocks(ii)%l3, nNew, nOld, .true.)
+
+    end subroutine reallocSubfaceMemory
 
 end module loadBalance
diff --git a/src/partitioning/partitionMod.F90 b/src/partitioning/partitionMod.F90
index 59052f51c..c289d426f 100644
--- a/src/partitioning/partitionMod.F90
+++ b/src/partitioning/partitionMod.F90
@@ -1,1805 +1,1802 @@
 module partitionMod
-  !
-  !       This local module contains definitions of derived datatypes
-  !       as well as variables used in the partitioning directory.
-  !
-  use constants
-  implicit none
-  save
-  !
-  !       The definition of the derived datatype distributionBlockType
-  !
-  type distributionBlockType
-     !
-     !         Block dimensions and local block ID.
-     !
-     !  nx, ny, nz - block integer dimensions for no halo cell based
-     !               quantities.
-     !  il, jl, kl - block integer dimensions for no halo node based
-     !               quantities.
-     !  blockID    - local block ID on the processor this block is
-     !               stored.
-
-     integer(kind=intType) :: nx, ny, nz, &
-          il, jl, kl
-     integer(kind=intType) :: blockID
-     !
-     !         Total number cells and faces inside the block. In the number
-     !         faces the work for nonmatching block boundaries is included,
-     !         such that the load balance is still guaranteed.
-     !
-     ! Ncell     : total number of cells in this block.
-     ! Nface     : total number of faces in this block.
-
-     integer(kind=intType) :: ncell, nface
-     !
-     !         Block boundary conditions.
-     !
-     !  nSubface             - Number of subfaces on this block.
-     !  n1to1                - Number of 1 to 1 block boundaries.
-     !  nBocos               - Number of physical boundary subfaces.
-     !  BCType(:)            - Boundary condition type for each
-     !                         subface. See the module BCTypes for
-     !                         the possibilities.
-     !  BCFaceID(:)          - Block face location of each subface.
-     !                         Possible values are: iMin, iMax, jMin,
-     !                         jMax, kMin, kMax.
-     !  cgnsSubface(:)       - The subface in the corresponding cgns
-     !                         block. As cgns distinguishes between
-     !                         boundary and internal boundaries, the
-     !                         BCType of the subface is needed to
-     !                         know which one to take. A zero
-     !                         indicates that this face was obtained
-     !                         by splitting a cgns block.
-     !  inBeg(:), inEnd(:)   - Lower and upper limits for the nodes
-     !  jnBeg(:), jnEnd(:)     in each of the index directions on a
-     !  knBeg(:), knEnd(:)     given subface. Note that one of these
-     !                         indices will not change since we will
-     !                         be moving on a face.
-     !  dinBeg(:), dinEnd(:) - Lower and upper limits for the nodes
-     !  djnBeg(:), djnEnd(:)   in the each of the index directions
-     !  dknBeg(:), dknEnd(:)   of the donor subface for this
-     !                         particular subface. Note that one of
-     !                         these indices will not change since we
-     !                         will be moving on a face.
-     !  neighBlock(:)        - Block number to which this subface
-     !                         connects. This value is set to zero if
-     !                         this subface is not connected to
-     !                         another block.
-     !  l1(:), l2(:),        - Short hand for the transformation
-     !  l3(:)                  matrix between this subface and the
-     !                         neighbor block. These value are set to
-     !                         zero if this subface is not connected
-     !                         to another block.
-     !  groupNum(:)          - Group number to which this subface
-     !                         belongs. If this subface does not
-     !                         belong to any group, the corresponding
-     !                         entry in this array is zeroed out.
-
-
-     integer(kind=intType) :: nSubface, n1to1, nBocos
-
-     integer(kind=intType), dimension(:), pointer :: BCType
-     integer(kind=intType), dimension(:), pointer :: BCFaceID
-     integer(kind=intType), dimension(:), pointer :: cgnsSubface
-
-     integer(kind=intType), dimension(:), pointer :: inBeg, inEnd
-     integer(kind=intType), dimension(:), pointer :: jnBeg, jnEnd
-     integer(kind=intType), dimension(:), pointer :: knBeg, knEnd
-
-     integer(kind=intType), dimension(:), pointer :: dinBeg, dinEnd
-     integer(kind=intType), dimension(:), pointer :: djnBeg, djnEnd
-     integer(kind=intType), dimension(:), pointer :: dknBeg, dknEnd
-
-     integer(kind=intType), dimension(:), pointer :: neighBlock
-     integer(kind=intType), dimension(:), pointer :: l1, l2, l3
-     integer(kind=intType), dimension(:), pointer :: groupNum
-
-     !
-     !         Relation to the original cgns grid.
-     !
-     ! cgnsBlockID    - block/zone number of the cgns grid to which
-     !                  this block is related.
-     ! iBegOr, iEndOr - range of points of this block in the
-     ! jBegOr, jEndOr   corresponding cgns block, i.e. for this block
-     ! kBegOr, kEndOr   iBegOr <= i <= iEndOr, jBegOr <= j <= jEndOr,
-     !                  kBegOr <= k <= kEndOr.
-     !                  It is of course possible that the entire
-     !                  block is stored.
-
-     integer(kind=intType) :: cgnsBlockID
-     integer(kind=intType) :: iBegOr, iEndOr, jBegOr, jEndOr
-     integer(kind=intType) :: kBegOr, kEndOr
-
-  end type distributionBlockType
-
-  ! nBlocks        : Number of blocks to be distributed.
-  ! blocks(nBlocks): The array with the block info.
-
-  integer(kind=intType) :: nBlocks
-  type(distributionBlockType), dimension(:), allocatable :: blocks
-
-  !
-  !       Type definition to store the way the original cgns blocks are
-  !       split for load balancing reasons.
-  !
-  type splitCGNSType
-
-     ! nSubBlocks:             # of subblocks into which the original
-     !                         block is split.
-     ! ranges(nsubblocks,3,2): The nodal range in the original
-     !                         block of these subblocks.
-
-     integer(kind=intType) :: nSubBlocks
-     integer(kind=intType), dimension(:,:,:), pointer :: ranges
-
-  end type splitCGNSType
-
-  !
-  !       Type definition needed to determine the processor ID's and
-  !       nodal ranges of the subblocks for every CGNS block.
-  !
-  type subblocksOfCGNSType
-
-     ! cgnsBlockID    - block/zone number of the cgns grid to which
-     !                  this subblock is related.
-     ! procID         - Processor ID on which this subblock is
-     !                  stored.
-     ! blockID        - local block ID on the processor this block is
-     !                  stored.
-     ! iBegOr, iEndOr - range of points of this block in the
-     ! jBegOr, jEndOr   corresponding cgns block, i.e. for this block
-     ! kBegOr, kEndOr   iBegOr <= i <= iEndOr, jBegOr <= j <= jEndOr,
-     !                  kBegOr <= k <= kEndOr.
-     !                  It is of course possible that the entire
-     !                  block is stored.
-
-     integer :: cgnsBlockID
-     integer :: procID
-     integer :: blockID
-     integer :: iBegOr, iEndOr, jBegOr, jEndOr, kBegOr, kEndOr
-
-  end type subblocksOfCGNSType
-
-  ! Interface for the extension of the operators <= and <.
-  ! These are needed for the sorting of subblocksOfCGNSType. Note
-  ! that the = operator does not need to be defined, because
-  ! subblocksOfCGNSType only contains primitive types.
-
-  interface operator(<=)
-     module procedure lessEqualSubblocksOfCGNSType
-  end interface operator(<=)
-
-  interface operator(<)
-     module procedure lessSubblocksOfCGNSType
-  end interface operator(<)
-  !
-  !       Type definition needed to determine the number of distinct
-  !       non-matching abutting subfaces in the CGNS file.
-  !
-  type subfaceNonMatchType
-
-     ! iBeg, iEnd - Nodal subface range om the CGNS block, i-direction
-     ! jBeg, jEnd - Idem in the j-direction
-     ! kBeg, kEnd - Idem in the k-direction
-     ! connID     - The cgns connectivity ID.
-
-     integer :: iBeg, jBeg, kBeg, iEnd, jEnd, kEnd
-     integer :: connID
-
-  end type subfaceNonMatchType
-
-  ! Interface for the extension of the operators <= and <.
-  ! These are needed for the sorting of subfaceNonMatchType. Note
-  ! that the = operator does not need to be defined, because
-  ! subfaceNonMatchType only contains primitive types.
-
-  interface operator(<=)
-     module procedure lessEqualSubfaceNonMatchType
-  end interface operator(<=)
-
-  interface operator(<)
-     module procedure lessSubfaceNonMatchType
-  end interface operator(<)
-
-  type sortSubRangeType
-
-     ! iMin: minimum i-index in the subrange.
-     ! jMin: minimum j-index in the subrange.
-     ! kMin: minimum k-index in the subrange.
-     ! iMax: maximum i-index in the subrange.
-     ! jMax: maximum j-index in the subrange.
-     ! kMax: maximum k-index in the subrange.
-
-     integer(kind=intType) :: iMin, jMin, kMin
-     integer(kind=intType) :: iMax, jMax, kMax
-
-  end type sortSubRangeType
-
-  ! Interfaces for the definitions of the operators <=, < and /=.
-  ! These are needed for the sorting of this derived data type.
-  ! Note that the = operator does not need to be defined, because
-  ! sortSubRangeType only contains primitive types.
-
-  interface operator(<=)
-     module procedure lessEqualSortSubRangeType
-  end interface operator(<=)
-
-  interface operator(<)
-     module procedure lessSortSubRangeType
-  end interface operator(<)
-
-  ! Definition of the derived data type.
-
-  type fourIntPlusRealType
-     integer(kind=intType) :: n1, n2, n3, n4
-     real(kind=realType)   :: dist
-  end type fourIntPlusRealType
-
-  ! Interfaces for the definitions of the operators <=, < and /=.
-  ! These are needed for the sorting of this derived data type.
-  ! Note that the = operator does not need to be defined, because
-  ! fourIntPlusRealType only contains primitive types.
-
-  interface operator(<=)
-     module procedure lessEqualFourIntPlusRealType
-  end interface operator(<=)
-
-  interface operator(<)
-     module procedure lessFourIntPlusRealType
-  end interface operator(<)
-
-  interface operator(/=)
-     module procedure notEqualFourIntPlusRealType
-  end interface operator(/=)
-
-  ! ==========================================================================
-  !
-  !       Variable to store the partition number (processor ID) of the
-  !       computational blocks.
-  !
-  ! ubvec(2):      Tolerance for the constraints.
-  ! part(nBlocks): The processor ID for each block, starting at 0.
-
-  real, dimension(2) :: ubvec
-
-  integer(kind=intType), dimension(:), allocatable :: part
-  !
-  !       Variables needed for the reading of the grid files.
-  !
-  ! nGridsRead:            Number of grids to read.
-  ! fileIDs(nGridsRead):   The file ID's.
-  ! gridFiles(nGridsRead): Names of the grid files to read.
-  ! interpolSpectral:      Whether or not to interpolate the
-  !                        coordinates for the time spectral mode.
-
-  integer(kind=intType) :: nGridsRead
-  logical ::               interpolSpectral
-
-  integer, dimension(:), allocatable :: fileIDs
-
-  character(len=maxStringLen), dimension(:), allocatable :: gridFiles
-  ! ==========================================================================
-
-
-contains
-  !
-  !         Functions to simulate the operators <= and < for the derived
-  !         datatypes subblocksOfCGNSType and subfaceNonMatchType.
-  !
-  logical function lessEqualSubblocksOfCGNSType(g1, g2)
-    !
-    !         This function returns .true. if g1 <= g2 and .false.
-    !         otherwise. The comparison is firstly based on the CGNS block
-    !         ID, then the processor ID and finally the local block ID.
-    !
-    implicit none
     !
-    !        Function arguments.
-    !
-    type(subblocksOfCGNSType), intent(in) :: g1, g2
-
-    ! Comparison of the CGNS block ID. If not equal, set
-    ! lessEqualSubblocksOfCGNSType appropriately and return.
-
-    if(g1%cgnsBlockID < g2%cgnsBlockID) then
-       lessEqualSubblocksOfCGNSType = .true.
-       return
-    else if(g1%cgnsBlockID > g2%cgnsBlockID) then
-       lessEqualSubblocksOfCGNSType = .false.
-       return
-    endif
-
-    ! Compare the processor ID's.
-
-    if(g1%procID < g2%procID) then
-       lessEqualSubblocksOfCGNSType = .true.
-       return
-    else if(g1%procID > g2%procID) then
-       lessEqualSubblocksOfCGNSType = .false.
-       return
-    endif
-
-    ! Compare the local block ID's.
-
-    if(g1%blockID < g2%blockID) then
-       lessEqualSubblocksOfCGNSType = .true.
-       return
-    else if(g1%blockID > g2%blockID) then
-       lessEqualSubblocksOfCGNSType = .false.
-       return
-    endif
-
-    ! It does not make sense to compare the subranges, because
-    ! these are identical if we came so far. Both entities are
-    ! identical. So set lessEqualSubblocksOfCGNSType to .true.
-
-    lessEqualSubblocksOfCGNSType = .true.
-
-  end function lessEqualSubblocksOfCGNSType
-
-  !===============================================================
-
-  logical function lessSubblocksOfCGNSType(g1, g2)
-    !
-    !         This function returns .true. if g1 < g2 and .false.
-    !         otherwise. The comparison is firstly based on the CGNS block
-    !         ID, then the processor ID and finally the local blockID.
+    !       This local module contains definitions of derived datatypes
+    !       as well as variables used in the partitioning directory.
     !
+    use constants
     implicit none
-    !
-    !        Function arguments.
-    !
-    type(subblocksOfCGNSType), intent(in) :: g1, g2
-
-    ! Comparison of the CGNS block ID. If not equal, set
-    ! lessSubblocksOfCGNSType appropriately and return.
-
-    if(g1%cgnsBlockID < g2%cgnsBlockID) then
-       lessSubblocksOfCGNSType = .true.
-       return
-    else if(g1%cgnsBlockID > g2%cgnsBlockID) then
-       lessSubblocksOfCGNSType = .false.
-       return
-    endif
-
-    ! Compare the processor ID's.
-
-    if(g1%procID < g2%procID) then
-       lessSubblocksOfCGNSType = .true.
-       return
-    else if(g1%procID > g2%procID) then
-       lessSubblocksOfCGNSType = .false.
-       return
-    endif
-
-    ! Compare the local block ID's.
-
-    if(g1%blockID < g2%blockID) then
-       lessSubblocksOfCGNSType = .true.
-       return
-    else if(g1%blockID > g2%blockID) then
-       lessSubblocksOfCGNSType = .false.
-       return
-    endif
+    save
+    !
+    !       The definition of the derived datatype distributionBlockType
+    !
+    type distributionBlockType
+        !
+        !         Block dimensions and local block ID.
+        !
+        !  nx, ny, nz - block integer dimensions for no halo cell based
+        !               quantities.
+        !  il, jl, kl - block integer dimensions for no halo node based
+        !               quantities.
+        !  blockID    - local block ID on the processor this block is
+        !               stored.
+
+        integer(kind=intType) :: nx, ny, nz, &
+                                 il, jl, kl
+        integer(kind=intType) :: blockID
+        !
+        !         Total number cells and faces inside the block. In the number
+        !         faces the work for nonmatching block boundaries is included,
+        !         such that the load balance is still guaranteed.
+        !
+        ! Ncell     : total number of cells in this block.
+        ! Nface     : total number of faces in this block.
+
+        integer(kind=intType) :: ncell, nface
+        !
+        !         Block boundary conditions.
+        !
+        !  nSubface             - Number of subfaces on this block.
+        !  n1to1                - Number of 1 to 1 block boundaries.
+        !  nBocos               - Number of physical boundary subfaces.
+        !  BCType(:)            - Boundary condition type for each
+        !                         subface. See the module BCTypes for
+        !                         the possibilities.
+        !  BCFaceID(:)          - Block face location of each subface.
+        !                         Possible values are: iMin, iMax, jMin,
+        !                         jMax, kMin, kMax.
+        !  cgnsSubface(:)       - The subface in the corresponding cgns
+        !                         block. As cgns distinguishes between
+        !                         boundary and internal boundaries, the
+        !                         BCType of the subface is needed to
+        !                         know which one to take. A zero
+        !                         indicates that this face was obtained
+        !                         by splitting a cgns block.
+        !  inBeg(:), inEnd(:)   - Lower and upper limits for the nodes
+        !  jnBeg(:), jnEnd(:)     in each of the index directions on a
+        !  knBeg(:), knEnd(:)     given subface. Note that one of these
+        !                         indices will not change since we will
+        !                         be moving on a face.
+        !  dinBeg(:), dinEnd(:) - Lower and upper limits for the nodes
+        !  djnBeg(:), djnEnd(:)   in the each of the index directions
+        !  dknBeg(:), dknEnd(:)   of the donor subface for this
+        !                         particular subface. Note that one of
+        !                         these indices will not change since we
+        !                         will be moving on a face.
+        !  neighBlock(:)        - Block number to which this subface
+        !                         connects. This value is set to zero if
+        !                         this subface is not connected to
+        !                         another block.
+        !  l1(:), l2(:),        - Short hand for the transformation
+        !  l3(:)                  matrix between this subface and the
+        !                         neighbor block. These value are set to
+        !                         zero if this subface is not connected
+        !                         to another block.
+        !  groupNum(:)          - Group number to which this subface
+        !                         belongs. If this subface does not
+        !                         belong to any group, the corresponding
+        !                         entry in this array is zeroed out.
+
+        integer(kind=intType) :: nSubface, n1to1, nBocos
+
+        integer(kind=intType), dimension(:), pointer :: BCType
+        integer(kind=intType), dimension(:), pointer :: BCFaceID
+        integer(kind=intType), dimension(:), pointer :: cgnsSubface
+
+        integer(kind=intType), dimension(:), pointer :: inBeg, inEnd
+        integer(kind=intType), dimension(:), pointer :: jnBeg, jnEnd
+        integer(kind=intType), dimension(:), pointer :: knBeg, knEnd
+
+        integer(kind=intType), dimension(:), pointer :: dinBeg, dinEnd
+        integer(kind=intType), dimension(:), pointer :: djnBeg, djnEnd
+        integer(kind=intType), dimension(:), pointer :: dknBeg, dknEnd
+
+        integer(kind=intType), dimension(:), pointer :: neighBlock
+        integer(kind=intType), dimension(:), pointer :: l1, l2, l3
+        integer(kind=intType), dimension(:), pointer :: groupNum
+
+        !
+        !         Relation to the original cgns grid.
+        !
+        ! cgnsBlockID    - block/zone number of the cgns grid to which
+        !                  this block is related.
+        ! iBegOr, iEndOr - range of points of this block in the
+        ! jBegOr, jEndOr   corresponding cgns block, i.e. for this block
+        ! kBegOr, kEndOr   iBegOr <= i <= iEndOr, jBegOr <= j <= jEndOr,
+        !                  kBegOr <= k <= kEndOr.
+        !                  It is of course possible that the entire
+        !                  block is stored.
+
+        integer(kind=intType) :: cgnsBlockID
+        integer(kind=intType) :: iBegOr, iEndOr, jBegOr, jEndOr
+        integer(kind=intType) :: kBegOr, kEndOr
+
+    end type distributionBlockType
+
+    ! nBlocks        : Number of blocks to be distributed.
+    ! blocks(nBlocks): The array with the block info.
+
+    integer(kind=intType) :: nBlocks
+    type(distributionBlockType), dimension(:), allocatable :: blocks
+
+    !
+    !       Type definition to store the way the original cgns blocks are
+    !       split for load balancing reasons.
+    !
+    type splitCGNSType
+
+        ! nSubBlocks:             # of subblocks into which the original
+        !                         block is split.
+        ! ranges(nsubblocks,3,2): The nodal range in the original
+        !                         block of these subblocks.
+
+        integer(kind=intType) :: nSubBlocks
+        integer(kind=intType), dimension(:, :, :), pointer :: ranges
+
+    end type splitCGNSType
+
+    !
+    !       Type definition needed to determine the processor ID's and
+    !       nodal ranges of the subblocks for every CGNS block.
+    !
+    type subblocksOfCGNSType
+
+        ! cgnsBlockID    - block/zone number of the cgns grid to which
+        !                  this subblock is related.
+        ! procID         - Processor ID on which this subblock is
+        !                  stored.
+        ! blockID        - local block ID on the processor this block is
+        !                  stored.
+        ! iBegOr, iEndOr - range of points of this block in the
+        ! jBegOr, jEndOr   corresponding cgns block, i.e. for this block
+        ! kBegOr, kEndOr   iBegOr <= i <= iEndOr, jBegOr <= j <= jEndOr,
+        !                  kBegOr <= k <= kEndOr.
+        !                  It is of course possible that the entire
+        !                  block is stored.
+
+        integer :: cgnsBlockID
+        integer :: procID
+        integer :: blockID
+        integer :: iBegOr, iEndOr, jBegOr, jEndOr, kBegOr, kEndOr
+
+    end type subblocksOfCGNSType
 
-    ! It does not make sense to compare the subranges, because
-    ! these are identical if we came so far. Both entities are
-    ! identical. So set lessSubblocksOfCGNSType to .false.
+    ! Interface for the extension of the operators <= and <.
+    ! These are needed for the sorting of subblocksOfCGNSType. Note
+    ! that the = operator does not need to be defined, because
+    ! subblocksOfCGNSType only contains primitive types.
 
-    lessSubblocksOfCGNSType = .false.
+    interface operator(<=)
+        module procedure lessEqualSubblocksOfCGNSType
+    end interface operator(<=)
 
-  end function lessSubblocksOfCGNSType
-
-  !===============================================================
-
-  logical function lessEqualSubfaceNonMatchType(g1, g2)
-    !
-    !         This function returns .true. if g1 <= g2 and .false.
-    !         otherwise. The comparison is firstly based on the i-range,
-    !         followed by the j-range and k-range. If these are all the
-    !         same the connectivity ID is compared.
-    !
-    implicit none
-    !
-    !        Function arguments.
-    !
-    type(subfaceNonMatchType), intent(in) :: g1, g2
-
-    ! Comparison of the iBeg value. If different set
-    ! lessEqualSubfaceNonMatchType appropriately and return.
-
-    if(g1%iBeg < g2%iBeg) then
-       lessEqualSubfaceNonMatchType = .true.
-       return
-    else if(g1%iBeg > g2%iBeg) then
-       lessEqualSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The iEnd value.
-
-    if(g1%iEnd < g2%iEnd) then
-       lessEqualSubfaceNonMatchType = .true.
-       return
-    else if(g1%iEnd > g2%iEnd) then
-       lessEqualSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The jBeg value.
-
-    if(g1%jBeg < g2%jBeg) then
-       lessEqualSubfaceNonMatchType = .true.
-       return
-    else if(g1%jBeg > g2%jBeg) then
-       lessEqualSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The jEnd value.
-
-    if(g1%jEnd < g2%jEnd) then
-       lessEqualSubfaceNonMatchType = .true.
-       return
-    else if(g1%jEnd > g2%jEnd) then
-       lessEqualSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The kBeg value.
-
-    if(g1%kBeg < g2%kBeg) then
-       lessEqualSubfaceNonMatchType = .true.
-       return
-    else if(g1%kBeg > g2%kBeg) then
-       lessEqualSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The kEnd value.
-
-    if(g1%kEnd < g2%kEnd) then
-       lessEqualSubfaceNonMatchType = .true.
-       return
-    else if(g1%kEnd > g2%kEnd) then
-       lessEqualSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The subrange is identical. Compare the connectivity ID.
-
-    if(g1%connID < g2%connID) then
-       lessEqualSubfaceNonMatchType = .true.
-       return
-    else if(g1%connID > g2%connID) then
-       lessEqualSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! g1 and g2 are identical. Return .true.
-
-    lessEqualSubfaceNonMatchType = .true.
-
-  end function lessEqualSubfaceNonMatchType
-
-  !===============================================================
-
-  logical function lessSubfaceNonMatchType(g1, g2)
-    !
-    !         This function returns .true. if g1 < g2 and .false.
-    !         otherwise. The comparison is firstly based on the i-range,
-    !         followed by the j-range and k-range. If these are all the
-    !         same the connectivity ID is compared.
-    !
-    implicit none
+    interface operator(<)
+        module procedure lessSubblocksOfCGNSType
+    end interface operator(<)
     !
-    !        Function arguments.
+    !       Type definition needed to determine the number of distinct
+    !       non-matching abutting subfaces in the CGNS file.
     !
-    type(subfaceNonMatchType), intent(in) :: g1, g2
-
-    ! Comparison of the iBeg value. If different set
-    ! lessEqualSubfaceNonMatchType appropriately and return.
-
-    if(g1%iBeg < g2%iBeg) then
-       lessSubfaceNonMatchType = .true.
-       return
-    else if(g1%iBeg > g2%iBeg) then
-       lessSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The iEnd value.
-
-    if(g1%iEnd < g2%iEnd) then
-       lessSubfaceNonMatchType = .true.
-       return
-    else if(g1%iEnd > g2%iEnd) then
-       lessSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The jBeg value.
-
-    if(g1%jBeg < g2%jBeg) then
-       lessSubfaceNonMatchType = .true.
-       return
-    else if(g1%jBeg > g2%jBeg) then
-       lessSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The jEnd value.
-
-    if(g1%jEnd < g2%jEnd) then
-       lessSubfaceNonMatchType = .true.
-       return
-    else if(g1%jEnd > g2%jEnd) then
-       lessSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The kBeg value.
-
-    if(g1%kBeg < g2%kBeg) then
-       lessSubfaceNonMatchType = .true.
-       return
-    else if(g1%kBeg > g2%kBeg) then
-       lessSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The kEnd value.
-
-    if(g1%kEnd < g2%kEnd) then
-       lessSubfaceNonMatchType = .true.
-       return
-    else if(g1%kEnd > g2%kEnd) then
-       lessSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! The subrange is identical. Compare the connectivity ID.
-
-    if(g1%connID < g2%connID) then
-       lessSubfaceNonMatchType = .true.
-       return
-    else if(g1%connID > g2%connID) then
-       lessSubfaceNonMatchType= .false.
-       return
-    endif
-
-    ! g1 and g2 are identical. Return .false.
-
-    lessSubfaceNonMatchType = .false.
-
-  end function lessSubfaceNonMatchType
-
-  subroutine qsortSubblocksOfCGNSType(arr, nn)
-    !
-    !       qsortSubblocksOfCGNSType sorts the array of the derived
-    !       datatype subblocksOfCGNSType in increasing order based on the
-    !       <= operator for this derived data type.
-    !
-    use constants
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-
-    type(subblocksOfCGNSType), dimension(*), intent(inout) :: arr
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), parameter :: m = 7
-
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
-
-    integer :: ierr
-
-    type(subblocksOfCGNSType) :: a, tmp
-
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
-
-    ! Allocate the memory for stack.
-
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                                &
-         call terminate("qsortSubblocksOfCGNSType", &
-         "Memory allocation failure for stack")
-
-    ! Initialize the variables that control the sorting.
-
-    jStack = 0
-    l      = 1
-    r      = nn
-
-    ! Start of the algorithm
-
-    do
-
-       ! Check for the size of the subarray.
-
-       if((r-l) < m) then
-
-          ! Perform insertion sort
-
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
-
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
-
-          if(jStack == 0) exit
-
-          ! Pop stack and begin a new round of partitioning.
-
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
-
-       else
-
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a.
-          ! Also rearrange so that (l) <= (l+1) <= (r).
-
-          k = (l+r)/2
-          tmp      = arr(k)             ! Swap the elements
-          arr(k)   = arr(l+1)           ! k and l+1.
-          arr(l+1) = tmp
-
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
-
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
-
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
-
-          ! Initialize the pointers for partitioning.
-
-          i = l+1
-          j = r
-          a = arr(l+1)
-
-          ! The innermost loop
-
-          do
-
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
-
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
-
-             ! Exit the loop in case the pointers i and j crossed.
-
-             if(j < i) exit
-
-             ! Swap the element i and j.
+    type subfaceNonMatchType
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+        ! iBeg, iEnd - Nodal subface range om the CGNS block, i-direction
+        ! jBeg, jEnd - Idem in the j-direction
+        ! kBeg, kEnd - Idem in the k-direction
+        ! connID     - The cgns connectivity ID.
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+        integer :: iBeg, jBeg, kBeg, iEnd, jEnd, kEnd
+        integer :: connID
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+    end type subfaceNonMatchType
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+    ! Interface for the extension of the operators <= and <.
+    ! These are needed for the sorting of subfaceNonMatchType. Note
+    ! that the = operator does not need to be defined, because
+    ! subfaceNonMatchType only contains primitive types.
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
+    interface operator(<=)
+        module procedure lessEqualSubfaceNonMatchType
+    end interface operator(<=)
 
-             ! Storage of the stack is too small. Reallocate.
+    interface operator(<)
+        module procedure lessSubfaceNonMatchType
+    end interface operator(<)
 
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortSubblocksOfCGNSType", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
+    type sortSubRangeType
 
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
+        ! iMin: minimum i-index in the subrange.
+        ! jMin: minimum j-index in the subrange.
+        ! kMin: minimum k-index in the subrange.
+        ! iMax: maximum i-index in the subrange.
+        ! jMax: maximum j-index in the subrange.
+        ! kMax: maximum k-index in the subrange.
 
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortSubblocksOfCGNSType", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
+        integer(kind=intType) :: iMin, jMin, kMin
+        integer(kind=intType) :: iMax, jMax, kMax
 
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
+    end type sortSubRangeType
 
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortSubblocksOfCGNSType", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
+    ! Interfaces for the definitions of the operators <=, < and /=.
+    ! These are needed for the sorting of this derived data type.
+    ! Note that the = operator does not need to be defined, because
+    ! sortSubRangeType only contains primitive types.
 
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortSubblocksOfCGNSType", &
-                  "Deallocation error for tmpStack")
-          endif
+    interface operator(<=)
+        module procedure lessEqualSortSubRangeType
+    end interface operator(<=)
 
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
+    interface operator(<)
+        module procedure lessSortSubRangeType
+    end interface operator(<)
 
-       endif
-    enddo
+    ! Definition of the derived data type.
 
-    ! Release the memory of stack.
+    type fourIntPlusRealType
+        integer(kind=intType) :: n1, n2, n3, n4
+        real(kind=realType) :: dist
+    end type fourIntPlusRealType
 
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                                &
-         call terminate("qsortSubblocksOfCGNSType", &
-         "Deallocation error for stack")
+    ! Interfaces for the definitions of the operators <=, < and /=.
+    ! These are needed for the sorting of this derived data type.
+    ! Note that the = operator does not need to be defined, because
+    ! fourIntPlusRealType only contains primitive types.
 
-    ! Check in debug mode whether the array is really sorted.
+    interface operator(<=)
+        module procedure lessEqualFourIntPlusRealType
+    end interface operator(<=)
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                        &
-               call terminate("qsortSubblocksOfCGNSType", &
-               "Array is not sorted correctly")
-       enddo
-    endif
+    interface operator(<)
+        module procedure lessFourIntPlusRealType
+    end interface operator(<)
 
-  end subroutine qsortSubblocksOfCGNSType
+    interface operator(/=)
+        module procedure notEqualFourIntPlusRealType
+    end interface operator(/=)
 
-  subroutine qsortSubfaceNonMatchType(arr, nn)
+    ! ==========================================================================
     !
-    !       qsortSubfaceNonMatchType sorts the array of the derived
-    !       datatype subfaceNonMatchType in increasing order based on the
-    !       <= operator for this derived data type.
+    !       Variable to store the partition number (processor ID) of the
+    !       computational blocks.
     !
-    use constants
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
+    ! ubvec(2):      Tolerance for the constraints.
+    ! part(nBlocks): The processor ID for each block, starting at 0.
+
+    real, dimension(2) :: ubvec
 
-    type(subfaceNonMatchType), dimension(*), intent(inout) :: arr
+    integer(kind=intType), dimension(:), allocatable :: part
     !
-    !      Local variables.
+    !       Variables needed for the reading of the grid files.
     !
-    integer(kind=intType), parameter :: m = 7
-
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
-
-    integer :: ierr
-
-    type(subfaceNonMatchType) :: a, tmp
-
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
-
-    ! Allocate the memory for stack.
-
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                                &
-         call terminate("qsortSubfaceNonMatchType", &
-         "Memory allocation failure for stack")
-
-    ! Initialize the variables that control the sorting.
-
-    jStack = 0
-    l      = 1
-    r      = nn
-
-    ! Start of the algorithm
-
-    do
-
-       ! Check for the size of the subarray.
-
-       if((r-l) < m) then
-
-          ! Perform insertion sort
-
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
-
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
-
-          if(jStack == 0) exit
+    ! nGridsRead:            Number of grids to read.
+    ! fileIDs(nGridsRead):   The file ID's.
+    ! gridFiles(nGridsRead): Names of the grid files to read.
+    ! interpolSpectral:      Whether or not to interpolate the
+    !                        coordinates for the time spectral mode.
 
-          ! Pop stack and begin a new round of partitioning.
+    integer(kind=intType) :: nGridsRead
+    logical :: interpolSpectral
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+    integer, dimension(:), allocatable :: fileIDs
 
-       else
+    character(len=maxStringLen), dimension(:), allocatable :: gridFiles
+    ! ==========================================================================
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a.
-          ! Also rearrange so that (l) <= (l+1) <= (r).
-
-          k = (l+r)/2
-          tmp      = arr(k)             ! Swap the elements
-          arr(k)   = arr(l+1)           ! k and l+1.
-          arr(l+1) = tmp
-
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+contains
+    !
+    !         Functions to simulate the operators <= and < for the derived
+    !         datatypes subblocksOfCGNSType and subfaceNonMatchType.
+    !
+    logical function lessEqualSubblocksOfCGNSType(g1, g2)
+        !
+        !         This function returns .true. if g1 <= g2 and .false.
+        !         otherwise. The comparison is firstly based on the CGNS block
+        !         ID, then the processor ID and finally the local block ID.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(subblocksOfCGNSType), intent(in) :: g1, g2
+
+        ! Comparison of the CGNS block ID. If not equal, set
+        ! lessEqualSubblocksOfCGNSType appropriately and return.
+
+        if (g1%cgnsBlockID < g2%cgnsBlockID) then
+            lessEqualSubblocksOfCGNSType = .true.
+            return
+        else if (g1%cgnsBlockID > g2%cgnsBlockID) then
+            lessEqualSubblocksOfCGNSType = .false.
+            return
+        end if
+
+        ! Compare the processor ID's.
+
+        if (g1%procID < g2%procID) then
+            lessEqualSubblocksOfCGNSType = .true.
+            return
+        else if (g1%procID > g2%procID) then
+            lessEqualSubblocksOfCGNSType = .false.
+            return
+        end if
+
+        ! Compare the local block ID's.
+
+        if (g1%blockID < g2%blockID) then
+            lessEqualSubblocksOfCGNSType = .true.
+            return
+        else if (g1%blockID > g2%blockID) then
+            lessEqualSubblocksOfCGNSType = .false.
+            return
+        end if
+
+        ! It does not make sense to compare the subranges, because
+        ! these are identical if we came so far. Both entities are
+        ! identical. So set lessEqualSubblocksOfCGNSType to .true.
+
+        lessEqualSubblocksOfCGNSType = .true.
+
+    end function lessEqualSubblocksOfCGNSType
+
+    !===============================================================
+
+    logical function lessSubblocksOfCGNSType(g1, g2)
+        !
+        !         This function returns .true. if g1 < g2 and .false.
+        !         otherwise. The comparison is firstly based on the CGNS block
+        !         ID, then the processor ID and finally the local blockID.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(subblocksOfCGNSType), intent(in) :: g1, g2
+
+        ! Comparison of the CGNS block ID. If not equal, set
+        ! lessSubblocksOfCGNSType appropriately and return.
+
+        if (g1%cgnsBlockID < g2%cgnsBlockID) then
+            lessSubblocksOfCGNSType = .true.
+            return
+        else if (g1%cgnsBlockID > g2%cgnsBlockID) then
+            lessSubblocksOfCGNSType = .false.
+            return
+        end if
+
+        ! Compare the processor ID's.
+
+        if (g1%procID < g2%procID) then
+            lessSubblocksOfCGNSType = .true.
+            return
+        else if (g1%procID > g2%procID) then
+            lessSubblocksOfCGNSType = .false.
+            return
+        end if
+
+        ! Compare the local block ID's.
+
+        if (g1%blockID < g2%blockID) then
+            lessSubblocksOfCGNSType = .true.
+            return
+        else if (g1%blockID > g2%blockID) then
+            lessSubblocksOfCGNSType = .false.
+            return
+        end if
+
+        ! It does not make sense to compare the subranges, because
+        ! these are identical if we came so far. Both entities are
+        ! identical. So set lessSubblocksOfCGNSType to .false.
+
+        lessSubblocksOfCGNSType = .false.
+
+    end function lessSubblocksOfCGNSType
+
+    !===============================================================
+
+    logical function lessEqualSubfaceNonMatchType(g1, g2)
+        !
+        !         This function returns .true. if g1 <= g2 and .false.
+        !         otherwise. The comparison is firstly based on the i-range,
+        !         followed by the j-range and k-range. If these are all the
+        !         same the connectivity ID is compared.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(subfaceNonMatchType), intent(in) :: g1, g2
+
+        ! Comparison of the iBeg value. If different set
+        ! lessEqualSubfaceNonMatchType appropriately and return.
+
+        if (g1%iBeg < g2%iBeg) then
+            lessEqualSubfaceNonMatchType = .true.
+            return
+        else if (g1%iBeg > g2%iBeg) then
+            lessEqualSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The iEnd value.
+
+        if (g1%iEnd < g2%iEnd) then
+            lessEqualSubfaceNonMatchType = .true.
+            return
+        else if (g1%iEnd > g2%iEnd) then
+            lessEqualSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The jBeg value.
+
+        if (g1%jBeg < g2%jBeg) then
+            lessEqualSubfaceNonMatchType = .true.
+            return
+        else if (g1%jBeg > g2%jBeg) then
+            lessEqualSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The jEnd value.
+
+        if (g1%jEnd < g2%jEnd) then
+            lessEqualSubfaceNonMatchType = .true.
+            return
+        else if (g1%jEnd > g2%jEnd) then
+            lessEqualSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The kBeg value.
+
+        if (g1%kBeg < g2%kBeg) then
+            lessEqualSubfaceNonMatchType = .true.
+            return
+        else if (g1%kBeg > g2%kBeg) then
+            lessEqualSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The kEnd value.
+
+        if (g1%kEnd < g2%kEnd) then
+            lessEqualSubfaceNonMatchType = .true.
+            return
+        else if (g1%kEnd > g2%kEnd) then
+            lessEqualSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The subrange is identical. Compare the connectivity ID.
+
+        if (g1%connID < g2%connID) then
+            lessEqualSubfaceNonMatchType = .true.
+            return
+        else if (g1%connID > g2%connID) then
+            lessEqualSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! g1 and g2 are identical. Return .true.
+
+        lessEqualSubfaceNonMatchType = .true.
+
+    end function lessEqualSubfaceNonMatchType
+
+    !===============================================================
+
+    logical function lessSubfaceNonMatchType(g1, g2)
+        !
+        !         This function returns .true. if g1 < g2 and .false.
+        !         otherwise. The comparison is firstly based on the i-range,
+        !         followed by the j-range and k-range. If these are all the
+        !         same the connectivity ID is compared.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(subfaceNonMatchType), intent(in) :: g1, g2
+
+        ! Comparison of the iBeg value. If different set
+        ! lessEqualSubfaceNonMatchType appropriately and return.
+
+        if (g1%iBeg < g2%iBeg) then
+            lessSubfaceNonMatchType = .true.
+            return
+        else if (g1%iBeg > g2%iBeg) then
+            lessSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The iEnd value.
+
+        if (g1%iEnd < g2%iEnd) then
+            lessSubfaceNonMatchType = .true.
+            return
+        else if (g1%iEnd > g2%iEnd) then
+            lessSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The jBeg value.
+
+        if (g1%jBeg < g2%jBeg) then
+            lessSubfaceNonMatchType = .true.
+            return
+        else if (g1%jBeg > g2%jBeg) then
+            lessSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The jEnd value.
+
+        if (g1%jEnd < g2%jEnd) then
+            lessSubfaceNonMatchType = .true.
+            return
+        else if (g1%jEnd > g2%jEnd) then
+            lessSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The kBeg value.
+
+        if (g1%kBeg < g2%kBeg) then
+            lessSubfaceNonMatchType = .true.
+            return
+        else if (g1%kBeg > g2%kBeg) then
+            lessSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The kEnd value.
+
+        if (g1%kEnd < g2%kEnd) then
+            lessSubfaceNonMatchType = .true.
+            return
+        else if (g1%kEnd > g2%kEnd) then
+            lessSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! The subrange is identical. Compare the connectivity ID.
+
+        if (g1%connID < g2%connID) then
+            lessSubfaceNonMatchType = .true.
+            return
+        else if (g1%connID > g2%connID) then
+            lessSubfaceNonMatchType = .false.
+            return
+        end if
+
+        ! g1 and g2 are identical. Return .false.
+
+        lessSubfaceNonMatchType = .false.
+
+    end function lessSubfaceNonMatchType
+
+    subroutine qsortSubblocksOfCGNSType(arr, nn)
+        !
+        !       qsortSubblocksOfCGNSType sorts the array of the derived
+        !       datatype subblocksOfCGNSType in increasing order based on the
+        !       <= operator for this derived data type.
+        !
+        use constants
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+
+        type(subblocksOfCGNSType), dimension(*), intent(inout) :: arr
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), parameter :: m = 7
+
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
+
+        integer :: ierr
+
+        type(subblocksOfCGNSType) :: a, tmp
+
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+        ! Allocate the memory for stack.
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortSubblocksOfCGNSType", &
+                           "Memory allocation failure for stack")
 
-          ! Initialize the pointers for partitioning.
+        ! Initialize the variables that control the sorting.
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+        jStack = 0
+        l = 1
+        r = nn
 
-          ! The innermost loop
+        ! Start of the algorithm
 
-          do
+        do
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+            ! Check for the size of the subarray.
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+            if ((r - l) < m) then
 
-             ! Exit the loop in case the pointers i and j crossed.
+                ! Perform insertion sort
 
-             if(j < i) exit
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-             ! Swap the element i and j.
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+                if (jStack == 0) exit
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+                ! Pop stack and begin a new round of partitioning.
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+            else
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a.
+                ! Also rearrange so that (l) <= (l+1) <= (r).
 
-             ! Storage of the stack is too small. Reallocate.
+                k = (l + r) / 2
+                tmp = arr(k)             ! Swap the elements
+                arr(k) = arr(l + 1)           ! k and l+1.
+                arr(l + 1) = tmp
 
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortSubfaceNonMatchType", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortSubfaceNonMatchType", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
+                ! Initialize the pointers for partitioning.
 
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortSubfaceNonMatchType", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortSubfaceNonMatchType", &
-                  "Deallocation error for tmpStack")
-          endif
+                ! The innermost loop
 
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
+                do
 
-       endif
-    enddo
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-    ! Release the memory of stack.
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                                &
-         call terminate("qsortSubfaceNonMatchType", &
-         "Deallocation error for stack")
+                    ! Exit the loop in case the pointers i and j crossed.
 
-    ! Check in debug mode whether the array is really sorted.
+                    if (j < i) exit
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                        &
-               call terminate("qsortSubfaceNonMatchType", &
-               "Array is not sorted correctly")
-       enddo
-    endif
+                    ! Swap the element i and j.
 
-  end subroutine qsortSubfaceNonMatchType
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-  logical function lessEqualSortSubRangeType(g1, g2)
-    !
-    !         lessEqualSortSubRangeType defines the operator <= for the
-    !         derived datatype sortSubRangeType. The comparison is first
-    !         based on kMin, followed by jMin and finally iMin.
-    !         The comparison is therefore not based on the max values.
-    !
-    implicit none
-    !
-    !        Function arguments.
-    !
-    type(sortSubRangeType), intent(in) :: g1, g2
-    !
-    !         Begin executation.
-    !
-    ! Compare the kMin index and return .true. or .false. if they
-    ! differ.
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-    if(g1%kMin < g2%kMin) then
-       lessEqualSortSubRangeType = .true.
-       return
-    else if(g1%kMin > g2%kMin) then
-       lessEqualSortSubRangeType = .false.
-       return
-    endif
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-    ! kMin indices are equal. Compare the jMin's.
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
 
-    if(g1%jMin < g2%jMin) then
-       lessEqualSortSubRangeType = .true.
-       return
-    else if(g1%jMin > g2%jMin) then
-       lessEqualSortSubRangeType = .false.
-       return
-    endif
+                jStack = jStack + 2
+                if (jStack > nStack) then
 
-    ! Also the jMin's are equal. Compare iMin's.
+                    ! Storage of the stack is too small. Reallocate.
+
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSubblocksOfCGNSType", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
+
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-    if(g1%iMin < g2%iMin) then
-       lessEqualSortSubRangeType = .true.
-       return
-    else if(g1%iMin > g2%iMin) then
-       lessEqualSortSubRangeType = .false.
-       return
-    endif
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSubblocksOfCGNSType", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
 
-    ! g1 equals g2. Return .true.
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
 
-    lessEqualSortSubRangeType = .true.
-
-  end function lessEqualSortSubRangeType
-
-  !===============================================================
-
-  logical function lessSortSubRangeType(g1, g2)
-    !
-    !         lessSortSubRangeType defines the operator < for the derived
-    !         datatype sortSubRangeType. The comparison is first based on
-    !         kMin, followed by jMin and finally iMin.
-    !         The comparison is therefore not based on the max values.
-    !
-    implicit none
-    !
-    !        Function arguments.
-    !
-    type(sortSubRangeType), intent(in) :: g1, g2
-    !
-    !         Begin executation.
-    !
-    ! Compare the kMin index and return .true. or .false. if they
-    ! differ.
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSubblocksOfCGNSType", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
 
-    if(g1%kMin < g2%kMin) then
-       lessSortSubRangeType = .true.
-       return
-    else if(g1%kMin > g2%kMin) then
-       lessSortSubRangeType = .false.
-       return
-    endif
+                    ! And finally release the memory of tmpStack.
+
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSubblocksOfCGNSType", &
+                                       "Deallocation error for tmpStack")
+                end if
 
-    ! kMin indices are equal. Compare the jMin's.
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
 
-    if(g1%jMin < g2%jMin) then
-       lessSortSubRangeType = .true.
-       return
-    else if(g1%jMin > g2%jMin) then
-       lessSortSubRangeType = .false.
-       return
-    endif
+            end if
+        end do
 
-    ! Also the jMin's are equal. Compare iMin's.
+        ! Release the memory of stack.
 
-    if(g1%iMin < g2%iMin) then
-       lessSortSubRangeType = .true.
-       return
-    else if(g1%iMin > g2%iMin) then
-       lessSortSubRangeType = .false.
-       return
-    endif
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortSubblocksOfCGNSType", &
+                           "Deallocation error for stack")
 
-    ! g1 equals g2. Return .false.
+        ! Check in debug mode whether the array is really sorted.
 
-    lessSortSubRangeType = .false.
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortSubblocksOfCGNSType", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
 
-  end function lessSortSubRangeType
+    end subroutine qsortSubblocksOfCGNSType
 
-  !      ==================================================================
+    subroutine qsortSubfaceNonMatchType(arr, nn)
+        !
+        !       qsortSubfaceNonMatchType sorts the array of the derived
+        !       datatype subfaceNonMatchType in increasing order based on the
+        !       <= operator for this derived data type.
+        !
+        use constants
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
 
-  subroutine sortRangesSplitInfo(splitInfo)
-    !
-    !       sortRangesSplitInfo sort the ranges of the given subblocks in
-    !       increasing order such that a unique ordering is obtained,
-    !       independent of the history of the splitting.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    type(splitCGNSType), intent(inout) :: splitInfo
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, nSubBlocks
+        type(subfaceNonMatchType), dimension(*), intent(inout) :: arr
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), parameter :: m = 7
 
-    type(sortSubRangeType), dimension(splitInfo%nSubBlocks) :: subRanges
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
 
-    ! Copy the subface range from splitInfo into subRanges.
+        integer :: ierr
+
+        type(subfaceNonMatchType) :: a, tmp
 
-    nSubBlocks = splitInfo%nSubBlocks
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-    do i=1,nSubBlocks
-       subRanges(i)%iMin = splitInfo%ranges(i,1,1)
-       subRanges(i)%jMin = splitInfo%ranges(i,2,1)
-       subRanges(i)%kMin = splitInfo%ranges(i,3,1)
+        ! Allocate the memory for stack.
 
-       subRanges(i)%iMax = splitInfo%ranges(i,1,2)
-       subRanges(i)%jMax = splitInfo%ranges(i,2,2)
-       subRanges(i)%kMax = splitInfo%ranges(i,3,2)
-    enddo
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortSubfaceNonMatchType", &
+                           "Memory allocation failure for stack")
 
-    ! Sort subRanges in increasing order.
+        ! Initialize the variables that control the sorting.
 
-    call qsortSortSubRangeType(subRanges, nSubBlocks)
+        jStack = 0
+        l = 1
+        r = nn
 
-    ! Copy the data back into splitInfo.
+        ! Start of the algorithm
 
-    do i=1,nSubBlocks
-       splitInfo%ranges(i,1,1) = subRanges(i)%iMin
-       splitInfo%ranges(i,2,1) = subRanges(i)%jMin
-       splitInfo%ranges(i,3,1) = subRanges(i)%kMin
+        do
 
-       splitInfo%ranges(i,1,2) = subRanges(i)%iMax
-       splitInfo%ranges(i,2,2) = subRanges(i)%jMax
-       splitInfo%ranges(i,3,2) = subRanges(i)%kMax
-    enddo
+            ! Check for the size of the subarray.
 
-  end subroutine sortRangesSplitInfo
+            if ((r - l) < m) then
 
-  !      ==================================================================
+                ! Perform insertion sort
 
-  subroutine qsortSortSubRangeType(arr, nn)
-    !
-    !       qsortSortSubRangeType sorts the given number of halo's in
-    !       increasing order based on the <= operator for this derived
-    !       data type.
-    !
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-    type(sortSubRangeType), dimension(*), intent(inout) :: arr
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), parameter :: m = 7
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
+                if (jStack == 0) exit
 
-    integer :: ierr
+                ! Pop stack and begin a new round of partitioning.
 
-    type(sortSubRangeType) :: a, tmp
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
+            else
 
-    ! Allocate the memory for stack.
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a.
+                ! Also rearrange so that (l) <= (l+1) <= (r).
 
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("qsortSortSubRangeType", &
-         "Memory allocation failure for stack")
+                k = (l + r) / 2
+                tmp = arr(k)             ! Swap the elements
+                arr(k) = arr(l + 1)           ! k and l+1.
+                arr(l + 1) = tmp
 
-    ! Initialize the variables that control the sorting.
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-    jStack = 0
-    l      = 1
-    r      = nn
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-    ! Start of the algorithm
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-    do
+                ! Initialize the pointers for partitioning.
 
-       ! Check for the size of the subarray.
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-       if((r-l) < m) then
+                ! The innermost loop
 
-          ! Perform insertion sort
+                do
 
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-          if(jStack == 0) exit
+                    ! Exit the loop in case the pointers i and j crossed.
 
-          ! Pop stack and begin a new round of partitioning.
+                    if (j < i) exit
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+                    ! Swap the element i and j.
+
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
+
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
+
+                arr(l + 1) = arr(j)
+                arr(j) = a
+
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
+
+                jStack = jStack + 2
+                if (jStack > nStack) then
+
+                    ! Storage of the stack is too small. Reallocate.
+
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSubfaceNonMatchType", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
+
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
+
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSubfaceNonMatchType", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
+
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
+
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSubfaceNonMatchType", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
+
+                    ! And finally release the memory of tmpStack.
+
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSubfaceNonMatchType", &
+                                       "Deallocation error for tmpStack")
+                end if
+
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
+
+            end if
+        end do
+
+        ! Release the memory of stack.
+
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortSubfaceNonMatchType", &
+                           "Deallocation error for stack")
+
+        ! Check in debug mode whether the array is really sorted.
+
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortSubfaceNonMatchType", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
+
+    end subroutine qsortSubfaceNonMatchType
+
+    logical function lessEqualSortSubRangeType(g1, g2)
+        !
+        !         lessEqualSortSubRangeType defines the operator <= for the
+        !         derived datatype sortSubRangeType. The comparison is first
+        !         based on kMin, followed by jMin and finally iMin.
+        !         The comparison is therefore not based on the max values.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(sortSubRangeType), intent(in) :: g1, g2
+        !
+        !         Begin executation.
+        !
+        ! Compare the kMin index and return .true. or .false. if they
+        ! differ.
+
+        if (g1%kMin < g2%kMin) then
+            lessEqualSortSubRangeType = .true.
+            return
+        else if (g1%kMin > g2%kMin) then
+            lessEqualSortSubRangeType = .false.
+            return
+        end if
+
+        ! kMin indices are equal. Compare the jMin's.
+
+        if (g1%jMin < g2%jMin) then
+            lessEqualSortSubRangeType = .true.
+            return
+        else if (g1%jMin > g2%jMin) then
+            lessEqualSortSubRangeType = .false.
+            return
+        end if
+
+        ! Also the jMin's are equal. Compare iMin's.
+
+        if (g1%iMin < g2%iMin) then
+            lessEqualSortSubRangeType = .true.
+            return
+        else if (g1%iMin > g2%iMin) then
+            lessEqualSortSubRangeType = .false.
+            return
+        end if
+
+        ! g1 equals g2. Return .true.
+
+        lessEqualSortSubRangeType = .true.
+
+    end function lessEqualSortSubRangeType
+
+    !===============================================================
+
+    logical function lessSortSubRangeType(g1, g2)
+        !
+        !         lessSortSubRangeType defines the operator < for the derived
+        !         datatype sortSubRangeType. The comparison is first based on
+        !         kMin, followed by jMin and finally iMin.
+        !         The comparison is therefore not based on the max values.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(sortSubRangeType), intent(in) :: g1, g2
+        !
+        !         Begin executation.
+        !
+        ! Compare the kMin index and return .true. or .false. if they
+        ! differ.
+
+        if (g1%kMin < g2%kMin) then
+            lessSortSubRangeType = .true.
+            return
+        else if (g1%kMin > g2%kMin) then
+            lessSortSubRangeType = .false.
+            return
+        end if
+
+        ! kMin indices are equal. Compare the jMin's.
+
+        if (g1%jMin < g2%jMin) then
+            lessSortSubRangeType = .true.
+            return
+        else if (g1%jMin > g2%jMin) then
+            lessSortSubRangeType = .false.
+            return
+        end if
+
+        ! Also the jMin's are equal. Compare iMin's.
+
+        if (g1%iMin < g2%iMin) then
+            lessSortSubRangeType = .true.
+            return
+        else if (g1%iMin > g2%iMin) then
+            lessSortSubRangeType = .false.
+            return
+        end if
+
+        ! g1 equals g2. Return .false.
+
+        lessSortSubRangeType = .false.
+
+    end function lessSortSubRangeType
+
+    !      ==================================================================
+
+    subroutine sortRangesSplitInfo(splitInfo)
+        !
+        !       sortRangesSplitInfo sort the ranges of the given subblocks in
+        !       increasing order such that a unique ordering is obtained,
+        !       independent of the history of the splitting.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        type(splitCGNSType), intent(inout) :: splitInfo
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, nSubBlocks
+
+        type(sortSubRangeType), dimension(splitInfo%nSubBlocks) :: subRanges
+
+        ! Copy the subface range from splitInfo into subRanges.
+
+        nSubBlocks = splitInfo%nSubBlocks
+
+        do i = 1, nSubBlocks
+            subRanges(i)%iMin = splitInfo%ranges(i, 1, 1)
+            subRanges(i)%jMin = splitInfo%ranges(i, 2, 1)
+            subRanges(i)%kMin = splitInfo%ranges(i, 3, 1)
 
-       else
+            subRanges(i)%iMax = splitInfo%ranges(i, 1, 2)
+            subRanges(i)%jMax = splitInfo%ranges(i, 2, 2)
+            subRanges(i)%kMax = splitInfo%ranges(i, 3, 2)
+        end do
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a.
-          ! Also rearrange so that (l) <= (l+1) <= (r).
+        ! Sort subRanges in increasing order.
 
-          k = (l+r)/2
-          tmp      = arr(k)             ! Swap the elements
-          arr(k)   = arr(l+1)           ! k and l+1.
-          arr(l+1) = tmp
+        call qsortSortSubRangeType(subRanges, nSubBlocks)
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+        ! Copy the data back into splitInfo.
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+        do i = 1, nSubBlocks
+            splitInfo%ranges(i, 1, 1) = subRanges(i)%iMin
+            splitInfo%ranges(i, 2, 1) = subRanges(i)%jMin
+            splitInfo%ranges(i, 3, 1) = subRanges(i)%kMin
+
+            splitInfo%ranges(i, 1, 2) = subRanges(i)%iMax
+            splitInfo%ranges(i, 2, 2) = subRanges(i)%jMax
+            splitInfo%ranges(i, 3, 2) = subRanges(i)%kMax
+        end do
+
+    end subroutine sortRangesSplitInfo
+
+    !      ==================================================================
+
+    subroutine qsortSortSubRangeType(arr, nn)
+        !
+        !       qsortSortSubRangeType sorts the given number of halo's in
+        !       increasing order based on the <= operator for this derived
+        !       data type.
+        !
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+        type(sortSubRangeType), dimension(*), intent(inout) :: arr
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), parameter :: m = 7
 
-          ! Initialize the pointers for partitioning.
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+        integer :: ierr
 
-          ! The innermost loop
+        type(sortSubRangeType) :: a, tmp
 
-          do
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+        ! Allocate the memory for stack.
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortSortSubRangeType", &
+                           "Memory allocation failure for stack")
 
-             ! Exit the loop in case the pointers i and j crossed.
+        ! Initialize the variables that control the sorting.
 
-             if(j < i) exit
+        jStack = 0
+        l = 1
+        r = nn
 
-             ! Swap the element i and j.
+        ! Start of the algorithm
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+        do
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+            ! Check for the size of the subarray.
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+            if ((r - l) < m) then
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+                ! Perform insertion sort
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
-
-             ! Storage of the stack is too small. Reallocate.
-
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                             &
-                  call terminate("qsortSortSubRangeType", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
-
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
-
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                             &
-                  call terminate("qsortSortSubRangeType", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
-
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
-
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                             &
-                  call terminate("qsortSortSubRangeType", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
-
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                             &
-                  call terminate("qsortSortSubRangeType", &
-                  "Deallocation error for tmpStack")
-          endif
-
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
-
-       endif
-    enddo
-
-    ! Release the memory of stack.
-
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("qsortSortSubRangeType", &
-         "Deallocation error for stack")
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-    ! Check in debug mode whether the array is really sorted.
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                     &
-               call terminate("qsortSortSubRangeType", &
-               "Array is not sorted correctly")
-       enddo
-    endif
-
-  end subroutine qsortSortSubRangeType
-
-  !
-  !         Functions to define the operators <, <= and /=.
-  !         Note that the comparison is only based on the integers.
-  !         The real contains additional info, the maximum deviation,
-  !         which is normally different even if the subfaces are
-  !         identical.
-  !
-  logical function lessEqualFourIntPlusRealType(g1, g2)
-    implicit none
-    type(fourIntPlusRealType), intent(in) :: g1, g2
+                if (jStack == 0) exit
 
-    ! Compare the first element.
+                ! Pop stack and begin a new round of partitioning.
 
-    if(g1%n1 < g2%n1) then
-       lessEqualFourIntPlusRealType = .true.
-       return
-    else if(g1%n1 > g2%n1) then
-       lessEqualFourIntPlusRealType = .false.
-       return
-    endif
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-    ! Compare the second element.
+            else
 
-    if(g1%n2 < g2%n2) then
-       lessEqualFourIntPlusRealType = .true.
-       return
-    else if(g1%n2 > g2%n2) then
-       lessEqualFourIntPlusRealType = .false.
-       return
-    endif
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a.
+                ! Also rearrange so that (l) <= (l+1) <= (r).
 
-    ! Compare the third element.
+                k = (l + r) / 2
+                tmp = arr(k)             ! Swap the elements
+                arr(k) = arr(l + 1)           ! k and l+1.
+                arr(l + 1) = tmp
 
-    if(g1%n3 < g2%n3) then
-       lessEqualFourIntPlusRealType = .true.
-       return
-    else if(g1%n3 > g2%n3) then
-       lessEqualFourIntPlusRealType = .false.
-       return
-    endif
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-    ! Compare the fourth element.
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-    if(g1%n4 < g2%n4) then
-       lessEqualFourIntPlusRealType = .true.
-       return
-    else if(g1%n4 > g2%n4) then
-       lessEqualFourIntPlusRealType = .false.
-       return
-    endif
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-    ! g1 equals g2. Return .true.
+                ! Initialize the pointers for partitioning.
 
-    lessEqualFourIntPlusRealType = .true.
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-  end function lessEqualFourIntPlusRealType
+                ! The innermost loop
 
-  !===============================================================
+                do
 
-  logical function lessFourIntPlusRealType(g1, g2)
-    implicit none
-    type(fourIntPlusRealType), intent(in) :: g1, g2
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-    ! Compare the first element.
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-    if(g1%n1 < g2%n1) then
-       lessFourIntPlusRealType = .true.
-       return
-    else if(g1%n1 > g2%n1) then
-       lessFourIntPlusRealType = .false.
-       return
-    endif
+                    ! Exit the loop in case the pointers i and j crossed.
 
-    ! Compare the second element.
+                    if (j < i) exit
 
-    if(g1%n2 < g2%n2) then
-       lessFourIntPlusRealType = .true.
-       return
-    else if(g1%n2 > g2%n2) then
-       lessFourIntPlusRealType = .false.
-       return
-    endif
+                    ! Swap the element i and j.
 
-    ! Compare the third element.
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-    if(g1%n3 < g2%n3) then
-       lessFourIntPlusRealType = .true.
-       return
-    else if(g1%n3 > g2%n3) then
-       lessFourIntPlusRealType = .false.
-       return
-    endif
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-    ! Compare the fourth element.
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-    if(g1%n4 < g2%n4) then
-       lessFourIntPlusRealType = .true.
-       return
-    else if(g1%n4 > g2%n4) then
-       lessFourIntPlusRealType = .false.
-       return
-    endif
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
 
-    ! g1 equals g2. Return .false.
+                jStack = jStack + 2
+                if (jStack > nStack) then
 
-    lessFourIntPlusRealType = .false.
+                    ! Storage of the stack is too small. Reallocate.
 
-  end function lessFourIntPlusRealType
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSortSubRangeType", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
+
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-  !===============================================================
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSortSubRangeType", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
+
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
+
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSortSubRangeType", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
 
-  logical function notEqualFourIntPlusRealType(g1, g2)
-    implicit none
-    type(fourIntPlusRealType), intent(in) :: g1, g2
+                    ! And finally release the memory of tmpStack.
+
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortSortSubRangeType", &
+                                       "Deallocation error for tmpStack")
+                end if
 
-    notEqualFourIntPlusRealType = .true.
-    if(g1%n1 == g2%n1 .and. g1%n2 == g2%n2 .and. &
-         g1%n3 == g2%n3 .and. g1%n4 == g2%n4) &
-         notEqualFourIntPlusRealType = .false.
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
 
-  end function notEqualFourIntPlusRealType
+            end if
+        end do
 
+        ! Release the memory of stack.
 
-  !      ==================================================================
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortSortSubRangeType", &
+                           "Deallocation error for stack")
 
-  subroutine sortBadEntities(nEntities, entities, dist, sortDist)
-    !
-    !       sortBadEntities sorts the given number of entities in
-    !       increasing order and gets rid of the multiple entries.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(inout) :: nEntities
-    integer(kind=intType), dimension(4,*), intent(inout) :: entities
-    real(kind=realType),   dimension(*),   intent(inout) :: dist
+        ! Check in debug mode whether the array is really sorted.
 
-    logical, intent(in) :: sortDist
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortSortSubRangeType", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
+
+    end subroutine qsortSortSubRangeType
+
+    !
+    !         Functions to define the operators <, <= and /=.
+    !         Note that the comparison is only based on the integers.
+    !         The real contains additional info, the maximum deviation,
+    !         which is normally different even if the subfaces are
+    !         identical.
     !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm
-
-    type(fourIntPlusRealType), dimension(nEntities) :: tmp
-
-    ! Return immediately if there are no entities to be sorted.
-
-    if(nEntities == 0) return
-
-    ! Copy the info into tmp. If the distances must be sorted as
-    ! well, copy the info. Otherwise simply put zero.
-
-    do nn=1,nEntities
-       tmp(nn)%n1   = entities(1,nn)
-       tmp(nn)%n2   = entities(2,nn)
-       tmp(nn)%n3   = entities(3,nn)
-       tmp(nn)%n4   = entities(4,nn)
-       if( sortDist ) then
-          tmp(nn)%dist = dist(nn)
-       else
-          tmp(nn)%dist = zero
-       endif
-    enddo
-
-    ! Sort tmp in increasing order.
-
-    call qsortFourIntPlusRealType(tmp, nEntities)
+    logical function lessEqualFourIntPlusRealType(g1, g2)
+        implicit none
+        type(fourIntPlusRealType), intent(in) :: g1, g2
 
-    ! Get rid of the multiple entries. Note that the exceptional
-    ! case of zero entities does not to be considered, because in
-    ! that case this part of the subroutine is not executed.
-    ! If multiple entries are present the distance is taken as
-    ! the maximum of the two.
+        ! Compare the first element.
 
-    mm = 1
-    do nn=2,nEntities
-       if(tmp(nn) /= tmp(mm)) then
-          mm = mm + 1
-          tmp(mm) = tmp(nn)
-       else
-          tmp(mm)%dist = max(tmp(mm)%dist, tmp(nn)%dist)
-       endif
-    enddo
+        if (g1%n1 < g2%n1) then
+            lessEqualFourIntPlusRealType = .true.
+            return
+        else if (g1%n1 > g2%n1) then
+            lessEqualFourIntPlusRealType = .false.
+            return
+        end if
 
-    ! Copy the data back info entities and dist. The latter
-    ! only if the distances should be sorted as well.
+        ! Compare the second element.
 
-    nEntities = mm
+        if (g1%n2 < g2%n2) then
+            lessEqualFourIntPlusRealType = .true.
+            return
+        else if (g1%n2 > g2%n2) then
+            lessEqualFourIntPlusRealType = .false.
+            return
+        end if
+
+        ! Compare the third element.
+
+        if (g1%n3 < g2%n3) then
+            lessEqualFourIntPlusRealType = .true.
+            return
+        else if (g1%n3 > g2%n3) then
+            lessEqualFourIntPlusRealType = .false.
+            return
+        end if
+
+        ! Compare the fourth element.
+
+        if (g1%n4 < g2%n4) then
+            lessEqualFourIntPlusRealType = .true.
+            return
+        else if (g1%n4 > g2%n4) then
+            lessEqualFourIntPlusRealType = .false.
+            return
+        end if
+
+        ! g1 equals g2. Return .true.
+
+        lessEqualFourIntPlusRealType = .true.
+
+    end function lessEqualFourIntPlusRealType
+
+    !===============================================================
+
+    logical function lessFourIntPlusRealType(g1, g2)
+        implicit none
+        type(fourIntPlusRealType), intent(in) :: g1, g2
+
+        ! Compare the first element.
+
+        if (g1%n1 < g2%n1) then
+            lessFourIntPlusRealType = .true.
+            return
+        else if (g1%n1 > g2%n1) then
+            lessFourIntPlusRealType = .false.
+            return
+        end if
+
+        ! Compare the second element.
+
+        if (g1%n2 < g2%n2) then
+            lessFourIntPlusRealType = .true.
+            return
+        else if (g1%n2 > g2%n2) then
+            lessFourIntPlusRealType = .false.
+            return
+        end if
+
+        ! Compare the third element.
+
+        if (g1%n3 < g2%n3) then
+            lessFourIntPlusRealType = .true.
+            return
+        else if (g1%n3 > g2%n3) then
+            lessFourIntPlusRealType = .false.
+            return
+        end if
+
+        ! Compare the fourth element.
+
+        if (g1%n4 < g2%n4) then
+            lessFourIntPlusRealType = .true.
+            return
+        else if (g1%n4 > g2%n4) then
+            lessFourIntPlusRealType = .false.
+            return
+        end if
+
+        ! g1 equals g2. Return .false.
+
+        lessFourIntPlusRealType = .false.
+
+    end function lessFourIntPlusRealType
+
+    !===============================================================
+
+    logical function notEqualFourIntPlusRealType(g1, g2)
+        implicit none
+        type(fourIntPlusRealType), intent(in) :: g1, g2
+
+        notEqualFourIntPlusRealType = .true.
+        if (g1%n1 == g2%n1 .and. g1%n2 == g2%n2 .and. &
+            g1%n3 == g2%n3 .and. g1%n4 == g2%n4) &
+            notEqualFourIntPlusRealType = .false.
+
+    end function notEqualFourIntPlusRealType
+
+    !      ==================================================================
+
+    subroutine sortBadEntities(nEntities, entities, dist, sortDist)
+        !
+        !       sortBadEntities sorts the given number of entities in
+        !       increasing order and gets rid of the multiple entries.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(inout) :: nEntities
+        integer(kind=intType), dimension(4, *), intent(inout) :: entities
+        real(kind=realType), dimension(*), intent(inout) :: dist
+
+        logical, intent(in) :: sortDist
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm
+
+        type(fourIntPlusRealType), dimension(nEntities) :: tmp
+
+        ! Return immediately if there are no entities to be sorted.
+
+        if (nEntities == 0) return
 
-    do nn=1,nEntities
-       entities(1,nn) = tmp(nn)%n1
-       entities(2,nn) = tmp(nn)%n2
-       entities(3,nn) = tmp(nn)%n3
-       entities(4,nn) = tmp(nn)%n4
-       if( sortDist ) dist(nn) = tmp(nn)%dist
-    enddo
+        ! Copy the info into tmp. If the distances must be sorted as
+        ! well, copy the info. Otherwise simply put zero.
 
-  end subroutine sortBadEntities
+        do nn = 1, nEntities
+            tmp(nn)%n1 = entities(1, nn)
+            tmp(nn)%n2 = entities(2, nn)
+            tmp(nn)%n3 = entities(3, nn)
+            tmp(nn)%n4 = entities(4, nn)
+            if (sortDist) then
+                tmp(nn)%dist = dist(nn)
+            else
+                tmp(nn)%dist = zero
+            end if
+        end do
+
+        ! Sort tmp in increasing order.
+
+        call qsortFourIntPlusRealType(tmp, nEntities)
+
+        ! Get rid of the multiple entries. Note that the exceptional
+        ! case of zero entities does not to be considered, because in
+        ! that case this part of the subroutine is not executed.
+        ! If multiple entries are present the distance is taken as
+        ! the maximum of the two.
+
+        mm = 1
+        do nn = 2, nEntities
+            if (tmp(nn) /= tmp(mm)) then
+                mm = mm + 1
+                tmp(mm) = tmp(nn)
+            else
+                tmp(mm)%dist = max(tmp(mm)%dist, tmp(nn)%dist)
+            end if
+        end do
 
-  !      ==================================================================
-
-  subroutine qsortFourIntPlusRealType(arr, nn)
-    !
-    !       qsortFourIntPlusRealType sorts the given number of halo's in
-    !       increasing order based on the <= operator for this derived
-    !       data type.
-    !
-    use constants
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-
-    type(fourIntPlusRealType), dimension(*), intent(inout) :: arr
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), parameter :: m = 7
+        ! Copy the data back info entities and dist. The latter
+        ! only if the distances should be sorted as well.
 
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
+        nEntities = mm
 
-    integer :: ierr
+        do nn = 1, nEntities
+            entities(1, nn) = tmp(nn)%n1
+            entities(2, nn) = tmp(nn)%n2
+            entities(3, nn) = tmp(nn)%n3
+            entities(4, nn) = tmp(nn)%n4
+            if (sortDist) dist(nn) = tmp(nn)%dist
+        end do
+
+    end subroutine sortBadEntities
 
-    type(fourIntPlusRealType) :: a, tmp
+    !      ==================================================================
 
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
+    subroutine qsortFourIntPlusRealType(arr, nn)
+        !
+        !       qsortFourIntPlusRealType sorts the given number of halo's in
+        !       increasing order based on the <= operator for this derived
+        !       data type.
+        !
+        use constants
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+
+        type(fourIntPlusRealType), dimension(*), intent(inout) :: arr
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), parameter :: m = 7
+
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
+
+        integer :: ierr
+
+        type(fourIntPlusRealType) :: a, tmp
+
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-    ! Allocate the memory for stack.
+        ! Allocate the memory for stack.
 
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                                &
-         call terminate("qsortFourIntPlusRealType", &
-         "Memory allocation failure for stack")
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortFourIntPlusRealType", &
+                           "Memory allocation failure for stack")
 
-    ! Initialize the variables that control the sorting.
+        ! Initialize the variables that control the sorting.
 
-    jStack = 0
-    l      = 1
-    r      = nn
+        jStack = 0
+        l = 1
+        r = nn
 
-    ! Start of the algorithm
+        ! Start of the algorithm
 
-    do
+        do
 
-       ! Check for the size of the subarray.
+            ! Check for the size of the subarray.
 
-       if((r-l) < m) then
+            if ((r - l) < m) then
 
-          ! Perform insertion sort
+                ! Perform insertion sort
 
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-          if(jStack == 0) exit
+                if (jStack == 0) exit
 
-          ! Pop stack and begin a new round of partitioning.
+                ! Pop stack and begin a new round of partitioning.
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-       else
+            else
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a.
-          ! Also rearrange so that (l) <= (l+1) <= (r).
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a.
+                ! Also rearrange so that (l) <= (l+1) <= (r).
 
-          k = (l+r)/2
-          tmp      = arr(k)             ! Swap the elements
-          arr(k)   = arr(l+1)           ! k and l+1.
-          arr(l+1) = tmp
+                k = (l + r) / 2
+                tmp = arr(k)             ! Swap the elements
+                arr(k) = arr(l + 1)           ! k and l+1.
+                arr(l + 1) = tmp
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-          ! Initialize the pointers for partitioning.
+                ! Initialize the pointers for partitioning.
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-          ! The innermost loop
+                ! The innermost loop
 
-          do
+                do
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-             ! Exit the loop in case the pointers i and j crossed.
+                    ! Exit the loop in case the pointers i and j crossed.
 
-             if(j < i) exit
+                    if (j < i) exit
 
-             ! Swap the element i and j.
+                    ! Swap the element i and j.
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
+                jStack = jStack + 2
+                if (jStack > nStack) then
 
-             ! Storage of the stack is too small. Reallocate.
+                    ! Storage of the stack is too small. Reallocate.
 
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortFourIntPlusRealType", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortFourIntPlusRealType", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
 
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortFourIntPlusRealType", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortFourIntPlusRealType", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
 
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
 
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortFourIntPlusRealType", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
-
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                                &
-                  call terminate("qsortFourIntPlusRealType", &
-                  "Deallocation error for tmpStack")
-          endif
-
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortFourIntPlusRealType", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
+
+                    ! And finally release the memory of tmpStack.
+
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortFourIntPlusRealType", &
+                                       "Deallocation error for tmpStack")
+                end if
+
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
 
-       endif
-    enddo
+            end if
+        end do
 
-    ! Release the memory of stack.
+        ! Release the memory of stack.
 
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                                &
-         call terminate("qsortFourIntPlusRealType", &
-         "Deallocation error for stack")
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortFourIntPlusRealType", &
+                           "Deallocation error for stack")
 
-    ! Check in debug mode whether the array is really sorted.
+        ! Check in debug mode whether the array is really sorted.
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                        &
-               call terminate("qsortFourIntPlusRealType", &
-               "Array is not sorted correctly")
-       enddo
-    endif
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortFourIntPlusRealType", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
 
-  end subroutine qsortFourIntPlusRealType
+    end subroutine qsortFourIntPlusRealType
 
 end module partitionMod
diff --git a/src/partitioning/partitioning.F90 b/src/partitioning/partitioning.F90
index 25a8ef0d6..b7f798f35 100644
--- a/src/partitioning/partitioning.F90
+++ b/src/partitioning/partitioning.F90
@@ -2,983 +2,978 @@ module partitioning
 
 contains
 
-  subroutine partitionAndReadGrid(partitionOnly)
-    !
-    !       partitionAndReadGrid determines the partitioning of the
-    !       multiblock grid over the processors and reads the grid of the
-    !       blocks (or block parts) assigned to this processor. Other
-    !       preprocessing activities, such as the proper setup of the halo
-    !       communication structure, creation of coarse grids and wall
-    !       distance computation, are performed in the preprocessing
-    !       library.
-    !
-    use constants
-    use IOModule, only : IOVar
-    use partitionMod, only : fileIDs, gridFiles
-    use utils, only : terminate
-    use loadBalance, only : loadBalanceGrid
-    use gridChecking, only : checkFaces, check1to1Subfaces
-    use readCGNSGrid, only : readBlockSizes, readGrid
-    implicit none
-
-    logical, intent(in) :: partitionOnly
-    !
-    !      Local variables
-    !
-    integer :: ierr
-
-    ! Determine the number of grid files that must be read,
-    ! as well as the corresponding file names.
-
-    call determineGridFileNames
-
-    ! Read the number of blocks and the block sizes of the grid stored
-    ! in the cgns grid file. This info is stored on all processors
-
-    call readBlockSizes
-    call determineNeighborIDs
-
-    ! Determine the grid sections.
-    call determineSections
-
-    ! If we are just doing a partition test, return
-    if (partitionOnly) then
-       return
-    end if
-
-    ! Determine the number of blocks to be stored on this processor
-    ! and the relation to the original grid. Remember that blocks
-    ! can be split for load balancing reasons.
-
-    call loadBalanceGrid
-
-    ! Initialize the iblank array for the fine grid domains
-
-    call initFineGridIblank
-
-    ! Allocate the coordinates of the fine grid level and the
-    ! derived data type used to read them.
-
-    call allocCoorFineGrid
-
-    ! Read the grid of the blocks (block parts) to be stored
-    ! on this processor.
-
-    call readGrid
-
-    ! Determine for the time spectral mode the time of one period,
-    ! the rotation matrices for the velocity components and
-    ! create the fine grid coordinates of all time spectral locations.
-
-    call timePeriodSpectral
-    call timeRotMatricesSpectral
-    call fineGridSpectralCoor
-
-    ! Release the memory of fileIDs, gridFiles and IOVar.
-    ! They are not needed anymore.
-
-    deallocate(fileIDs, gridFiles, IOVar, stat=ierr)
-    if(ierr /= 0)                            &
-         call terminate("partitionAndReadGrid", &
-         "Deallocation failure for fileIDs, gridFiles &
-         &and IOVar")
-
-    ! Check if for all faces on the block boundaries either a
-    ! physical boundary condition or a connectivity has been
-    ! specified and check if the 1 to 1 subfaces match.
-
-    call checkFaces
-    call check1to1Subfaces
-
-  end subroutine partitionAndReadGrid
-
-
-  subroutine determineGridFileNames
-    !
-    !       determineGridFileNames determines the number and names of the
-    !       files that contain the grids. For steady computations only one
-    !       file must be present no matter if a restart is performed or
-    !       not. For unsteady the situation is a little more complicated.
-    !       If no restart is performed only one file must be present. If a
-    !       restart is performed in unsteady or time spectral mode and a
-    !       rigid body motion is prescribed again only one grid file is
-    !       required; however for a consistent restart with deforming
-    !       meshes the grids in the past must be read as well. If this is
-    !       not possible only a first order restart can be made in
-    !       unsteady mode and some kind of interpolation is used for the
-    !       time spectral method.
-    !
-    use constants
-    use communication, only: myID, adflow_comm_world
-    use inputIO, only : gridFile
-    use inputPhysics, only :equationMode
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputUnsteady, only : nOldGridRead
-    use iteration, only : deforming_grid, nOldLevels
-    use partitionMod, only : gridFiles, interpolSpectral, &
-         nGridsRead, fileIDs
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables
-    !
-    integer :: ierr
+    subroutine partitionAndReadGrid(partitionOnly)
+        !
+        !       partitionAndReadGrid determines the partitioning of the
+        !       multiblock grid over the processors and reads the grid of the
+        !       blocks (or block parts) assigned to this processor. Other
+        !       preprocessing activities, such as the proper setup of the halo
+        !       communication structure, creation of coarse grids and wall
+        !       distance computation, are performed in the preprocessing
+        !       library.
+        !
+        use constants
+        use IOModule, only: IOVar
+        use partitionMod, only: fileIDs, gridFiles
+        use utils, only: terminate
+        use loadBalance, only: loadBalanceGrid
+        use gridChecking, only: checkFaces, check1to1Subfaces
+        use readCGNSGrid, only: readBlockSizes, readGrid
+        implicit none
+
+        logical, intent(in) :: partitionOnly
+        !
+        !      Local variables
+        !
+        integer :: ierr
+
+        ! Determine the number of grid files that must be read,
+        ! as well as the corresponding file names.
+
+        call determineGridFileNames
+
+        ! Read the number of blocks and the block sizes of the grid stored
+        ! in the cgns grid file. This info is stored on all processors
+
+        call readBlockSizes
+        call determineNeighborIDs
+
+        ! Determine the grid sections.
+        call determineSections
+
+        ! If we are just doing a partition test, return
+        if (partitionOnly) then
+            return
+        end if
+
+        ! Determine the number of blocks to be stored on this processor
+        ! and the relation to the original grid. Remember that blocks
+        ! can be split for load balancing reasons.
+
+        call loadBalanceGrid
+
+        ! Initialize the iblank array for the fine grid domains
+
+        call initFineGridIblank
+
+        ! Allocate the coordinates of the fine grid level and the
+        ! derived data type used to read them.
+
+        call allocCoorFineGrid
+
+        ! Read the grid of the blocks (block parts) to be stored
+        ! on this processor.
+
+        call readGrid
+
+        ! Determine for the time spectral mode the time of one period,
+        ! the rotation matrices for the velocity components and
+        ! create the fine grid coordinates of all time spectral locations.
+
+        call timePeriodSpectral
+        call timeRotMatricesSpectral
+        call fineGridSpectralCoor
+
+        ! Release the memory of fileIDs, gridFiles and IOVar.
+        ! They are not needed anymore.
+
+        deallocate (fileIDs, gridFiles, IOVar, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("partitionAndReadGrid", &
+             "Deallocation failure for fileIDs, gridFiles &
+             &and IOVar")
+
+        ! Check if for all faces on the block boundaries either a
+        ! physical boundary condition or a connectivity has been
+        ! specified and check if the 1 to 1 subfaces match.
+
+        call checkFaces
+        call check1to1Subfaces
+
+    end subroutine partitionAndReadGrid
+
+    subroutine determineGridFileNames
+        !
+        !       determineGridFileNames determines the number and names of the
+        !       files that contain the grids. For steady computations only one
+        !       file must be present no matter if a restart is performed or
+        !       not. For unsteady the situation is a little more complicated.
+        !       If no restart is performed only one file must be present. If a
+        !       restart is performed in unsteady or time spectral mode and a
+        !       rigid body motion is prescribed again only one grid file is
+        !       required; however for a consistent restart with deforming
+        !       meshes the grids in the past must be read as well. If this is
+        !       not possible only a first order restart can be made in
+        !       unsteady mode and some kind of interpolation is used for the
+        !       time spectral method.
+        !
+        use constants
+        use communication, only: myID, adflow_comm_world
+        use inputIO, only: gridFile
+        use inputPhysics, only: equationMode
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputUnsteady, only: nOldGridRead
+        use iteration, only: deforming_grid, nOldLevels
+        use partitionMod, only: gridFiles, interpolSpectral, &
+                                nGridsRead, fileIDs
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables
+        !
+        integer :: ierr
 
-    integer(kind=intType) :: ii, nn, restartID
+        integer(kind=intType) :: ii, nn, restartID
 
-    character(len=7)            :: integerString
-    character(len=maxStringLen) :: tmpName
+        character(len=7) :: integerString
+        character(len=maxStringLen) :: tmpName
 
-    ! Initialization of nOldGridRead and interpolSpectral.
+        ! Initialization of nOldGridRead and interpolSpectral.
 
-    nOldGridRead     = 1
-    interpolSpectral = .true.
+        nOldGridRead = 1
+        interpolSpectral = .true.
 
-    ! Determine the desired number of files from which grids at
-    ! certain time levels should be read.  This depends on the
-    ! equation mode we have to solve for. Also set the corresponding
-    ! file names.
+        ! Determine the desired number of files from which grids at
+        ! certain time levels should be read.  This depends on the
+        ! equation mode we have to solve for. Also set the corresponding
+        ! file names.
 
-    select case(equationMode)
+        select case (equationMode)
 
-    case (steady)
+        case (steady)
 
-       ! Steady computation. Only one grid needs to be read.
+            ! Steady computation. Only one grid needs to be read.
 
-       nGridsRead = 1
-       allocate(fileIDs(nGridsRead), &
-            gridFiles(nGridsRead), stat=ierr)
-       if(ierr /= 0)                              &
-            call terminate("determineGridFileNames", &
-            "Memory allocation failure for fileIDs &
-            &and gridFiles")
+            nGridsRead = 1
+            allocate (fileIDs(nGridsRead), &
+                      gridFiles(nGridsRead), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("determineGridFileNames", &
+                 "Memory allocation failure for fileIDs &
+                 &and gridFiles")
 
-       gridFiles(1) = gridFile
+            gridFiles(1) = gridFile
 
-       !===============================================================
-
-    case (unsteady)
+            !===============================================================
+
+        case (unsteady)
+
+            ! Unsteady computation. A further check is required.
+            ! EJ: replaced boolean variable restart with .false. for now
+            ! Need to refactor this code as well with ALE restart
+
+            testMultipleUnsteady: if (deforming_Grid .and. .false.) then
+
+                ! A restart is made with deforming meshes. For a consistent
+                ! restart nOldLevels grids must be read. First determine
+                ! the prefix of the grid file and the time step number
+                ! from which a restart should be made.
+
+                ii = len_trim(gridFile)
+                do
+                    if (gridFile(ii:ii) < "0" .or. gridFile(ii:ii) > "9") exit
+                    ii = ii - 1
+                end do
+
+                ! If the last characters of the file name do not contain a
+                ! number, the grid file does not come from a previous
+                ! unsteady deforming mesh computation and therefore only
+                ! one grid will be read.
+
+                if (ii == len_trim(gridFile)) then
+
+                    nGridsRead = 1
+                    allocate (fileIDs(nGridsRead), &
+                              gridFiles(nGridsRead), stat=ierr)
+                    if (ierr /= 0) &
+                         call terminate("determineGridFileNames", &
+                         "Memory allocation failure for fileIDs &
+                         &and gridFiles")
+
+                    gridFiles(1) = gridFile
+
+                else
+
+                    ! Read the integer number from the last characters
+                    ! of the grid file.
+
+                    read (gridFile(ii + 1:), *) restartID
+
+                    ! Allocate the memory for the file names and set them.
+
+                    nGridsRead = nOldLevels
+                    allocate (fileIDs(nGridsRead), &
+                              gridFiles(nGridsRead), stat=ierr)
+                    if (ierr /= 0) &
+                         call terminate("determineGridFileNames", &
+                         "Memory allocation failure for fileIDs &
+                         &and gridFiles")
 
-       ! Unsteady computation. A further check is required.
-       ! EJ: replaced boolean variable restart with .false. for now
-       ! Need to refactor this code as well with ALE restart
+                    do nn = 1, nGridsRead
+                        write (integerString, "(i6)") restartID - nn + 1
+                        integerString = adjustl(integerString)
+                        gridFiles(nn) = gridFile(:ii)//trim(integerString)
+                    end do
 
-       testMultipleUnsteady: if(deforming_Grid .and. .false.) then
-
-          ! A restart is made with deforming meshes. For a consistent
-          ! restart nOldLevels grids must be read. First determine
-          ! the prefix of the grid file and the time step number
-          ! from which a restart should be made.
+                end if
 
-          ii = len_trim(gridFile)
-          do
-             if(gridFile(ii:ii) < "0" .or. gridFile(ii:ii) > "9") exit
-             ii = ii - 1
-          enddo
-
-          ! If the last characters of the file name do not contain a
-          ! number, the grid file does not come from a previous
-          ! unsteady deforming mesh computation and therefore only
-          ! one grid will be read.
-
-          if(ii == len_trim(gridFile)) then
-
-             nGridsRead = 1
-             allocate(fileIDs(nGridsRead), &
-                  gridFiles(nGridsRead), stat=ierr)
-             if(ierr /= 0)                              &
-                  call terminate("determineGridFileNames", &
-                  "Memory allocation failure for fileIDs &
-                  &and gridFiles")
-
-             gridFiles(1) = gridFile
-
-          else
-
-             ! Read the integer number from the last characters
-             ! of the grid file.
-
-             read(gridFile(ii+1:),*) restartID
-
-             ! Allocate the memory for the file names and set them.
+                else testMultipleUnsteady
 
-             nGridsRead = nOldLevels
-             allocate(fileIDs(nGridsRead), &
-                  gridFiles(nGridsRead), stat=ierr)
-             if(ierr /= 0)                              &
-                  call terminate("determineGridFileNames", &
-                  "Memory allocation failure for fileIDs &
-                  &and gridFiles")
+                ! The computation either starts from scratch or an unsteady
+                ! restart for a rigid grid (possibly moving) is made. In
+                ! all cases only one grid file is needed.
 
-             do nn=1,nGridsRead
-                write(integerString,"(i6)") restartID - nn + 1
-                integerString = adjustl(integerString)
-                gridFiles(nn) = gridFile(:ii)//trim(integerString)
-             enddo
+                nGridsRead = 1
+                allocate (fileIDs(nGridsRead), &
+                          gridFiles(nGridsRead), stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("determineGridFileNames", &
+                     "Memory allocation failure for fileIDs &
+                     &and gridFiles")
 
-          endif
-
-       else testMultipleUnsteady
-
-          ! The computation either starts from scratch or an unsteady
-          ! restart for a rigid grid (possibly moving) is made. In
-          ! all cases only one grid file is needed.
-
-          nGridsRead = 1
-          allocate(fileIDs(nGridsRead), &
-               gridFiles(nGridsRead), stat=ierr)
-          if(ierr /= 0)                              &
-               call terminate("determineGridFileNames", &
-               "Memory allocation failure for fileIDs &
-               &and gridFiles")
-
-          gridFiles(1) = gridFile
-
-       endif testMultipleUnsteady
-
-       ! Check if the files can be opened.
-
-       do nn=1,nGridsRead
-          open(unit=21,file=gridFiles(nn),status="old",iostat=ierr)
-          if(ierr /= 0) exit
-          close(unit=21)
-       enddo
-
-       ! Possibly correct nGridsRead and set nOldGridRead.
-       ! If nOldGridRead == 0, i.e. not a valid grid is found,
-       ! print an error message and terminate.
-
-       nGridsRead   = nn - 1
-       nOldGridRead = nGridsRead
-
-       if(nOldGridRead == 0) then
-          if(myID == 0)                              &
-               call terminate("determineGridFileNames", &
-               "Grid file(s) could not be opened")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-
-       !===============================================================
-
-    case (timeSpectral)
-
-       ! Time spectral computation. A further check is required.
-       ! EJ: replaced boolean variable restart with .false. for now
-       ! Need to refactor this code as well with ALE restart
-
-       testMultipleTS: if(deforming_Grid .and. .false.) then
-
-          ! A restart is made with deforming meshes. For a consistent
-          ! restart multiple grids must be read. First determine the
-          ! the prefix of the grid file from which a restart should
-          ! be made.
-
-          ii = len_trim(gridFile)
-          do
-             if(gridFile(ii:ii) < "0" .or. gridFile(ii:ii) > "9") exit
-             ii = ii - 1
-          enddo
-
-          ! If the last characters of the file name do not contain a
-          ! number, the grid file does not come from a previous
-          ! time spectral deforming mesh computation and therefore
-          ! only one grid will be read.
-
-          if(ii == len_trim(gridFile)) then
-
-             nGridsRead = 1
-             allocate(fileIDs(nGridsRead), &
-                  gridFiles(nGridsRead), stat=ierr)
-             if(ierr /= 0)                              &
-                  call terminate("determineGridFileNames", &
-                  "Memory allocation failure for fileIDs &
-                  &and gridFiles")
-
-             gridFiles(1) = gridFile
+                gridFiles(1) = gridFile
 
-          else
+            end if testMultipleUnsteady
 
-             ! Loop to find out how many time instances were used in
-             ! the previous computation from which a restart is made.
-
-             nn = 0
-             do
-                nn = nn + 1
-                write(integerString,"(i6)") nn
-                integerString = adjustl(integerString)
-                tmpName = gridFile(:ii)//trim(integerString)
+            ! Check if the files can be opened.
 
-                open(unit=21,file=tmpName,status="old",iostat=ierr)
-                if(ierr /= 0) exit
-                close(unit=21)
-             enddo
+            do nn = 1, nGridsRead
+                open (unit=21, file=gridFiles(nn), status="old", iostat=ierr)
+                if (ierr /= 0) exit
+                close (unit=21)
+            end do
 
-             nn = nn - 1
+            ! Possibly correct nGridsRead and set nOldGridRead.
+            ! If nOldGridRead == 0, i.e. not a valid grid is found,
+            ! print an error message and terminate.
 
-             ! Take care of the exceptional situation that nn == 0.
-             ! This happens when the restart file ends at with an
-             ! integer, but does not correspond to a time spectral
-             ! solution. Allocate the memory.
+            nGridsRead = nn - 1
+            nOldGridRead = nGridsRead
 
-             nGridsRead = max(nn, 1_intType)
-             allocate(fileIDs(nGridsRead), &
-                  gridFiles(nGridsRead), stat=ierr)
-             if(ierr /= 0)                              &
-                  call terminate("determineGridFileNames", &
-                  "Memory allocation failure for fileIDs &
-                  &and gridFiles")
+            if (nOldGridRead == 0) then
+                if (myID == 0) &
+                    call terminate("determineGridFileNames", &
+                                   "Grid file(s) could not be opened")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+            !===============================================================
+
+        case (timeSpectral)
+
+            ! Time spectral computation. A further check is required.
+            ! EJ: replaced boolean variable restart with .false. for now
+            ! Need to refactor this code as well with ALE restart
+
+            testMultipleTS: if (deforming_Grid .and. .false.) then
+
+                ! A restart is made with deforming meshes. For a consistent
+                ! restart multiple grids must be read. First determine the
+                ! the prefix of the grid file from which a restart should
+                ! be made.
+
+                ii = len_trim(gridFile)
+                do
+                    if (gridFile(ii:ii) < "0" .or. gridFile(ii:ii) > "9") exit
+                    ii = ii - 1
+                end do
+
+                ! If the last characters of the file name do not contain a
+                ! number, the grid file does not come from a previous
+                ! time spectral deforming mesh computation and therefore
+                ! only one grid will be read.
+
+                if (ii == len_trim(gridFile)) then
+
+                    nGridsRead = 1
+                    allocate (fileIDs(nGridsRead), &
+                              gridFiles(nGridsRead), stat=ierr)
+                    if (ierr /= 0) &
+                         call terminate("determineGridFileNames", &
+                         "Memory allocation failure for fileIDs &
+                         &and gridFiles")
+
+                    gridFiles(1) = gridFile
+
+                else
+
+                    ! Loop to find out how many time instances were used in
+                    ! the previous computation from which a restart is made.
+
+                    nn = 0
+                    do
+                        nn = nn + 1
+                        write (integerString, "(i6)") nn
+                        integerString = adjustl(integerString)
+                        tmpName = gridFile(:ii)//trim(integerString)
+
+                        open (unit=21, file=tmpName, status="old", iostat=ierr)
+                        if (ierr /= 0) exit
+                        close (unit=21)
+                    end do
+
+                    nn = nn - 1
+
+                    ! Take care of the exceptional situation that nn == 0.
+                    ! This happens when the restart file ends at with an
+                    ! integer, but does not correspond to a time spectral
+                    ! solution. Allocate the memory.
+
+                    nGridsRead = max(nn, 1_intType)
+                    allocate (fileIDs(nGridsRead), &
+                              gridFiles(nGridsRead), stat=ierr)
+                    if (ierr /= 0) &
+                         call terminate("determineGridFileNames", &
+                         "Memory allocation failure for fileIDs &
+                         &and gridFiles")
+
+                    if (nn == 0) then
+                        gridFiles(1) = gridFile
+                    else
+                        do nn = 1, nGridsRead
+                            write (integerString, "(i6)") nn
+                            integerString = adjustl(integerString)
+                            gridFiles(nn) = gridFile(:ii)//trim(integerString)
+                        end do
+                    end if
+
+                    ! Check whether or not the coordinates must be interpolated,
+                    ! i.e. check if nGridsRead == nTimeIntervalsSpectral.
+
+                    if (nGridsRead == nTimeIntervalsSpectral) &
+                        interpolSpectral = .false.
+
+                end if
+
+                else testMultipleTS
+
+                ! The computation either starts from scratch or a
+                ! restart for a rigid grid (possibly moving) is made. In
+                ! all cases only one grid file is needed.
+
+                nGridsRead = 1
+                allocate (fileIDs(nGridsRead), &
+                          gridFiles(nGridsRead), stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("determineGridFileNames", &
+                     "Memory allocation failure for fileIDs &
+                     &and gridFiles")
 
-             if(nn == 0) then
                 gridFiles(1) = gridFile
-             else
-                do nn=1,nGridsRead
-                   write(integerString,"(i6)") nn
-                   integerString = adjustl(integerString)
-                   gridFiles(nn) = gridFile(:ii)//trim(integerString)
-                enddo
-             endif
-
-             ! Check whether or not the coordinates must be interpolated,
-             ! i.e. check if nGridsRead == nTimeIntervalsSpectral.
 
-             if(nGridsRead == nTimeIntervalsSpectral) &
-                  interpolSpectral = .false.
+            end if testMultipleTS
 
-          endif
+        end select
 
-       else testMultipleTS
+    end subroutine determineGridFileNames
 
-          ! The computation either starts from scratch or a
-          ! restart for a rigid grid (possibly moving) is made. In
-          ! all cases only one grid file is needed.
+    subroutine determineNeighborIDs
+        !
+        !       determineNeighborIDs determines for every internal block
+        !       boundary the block ID of the neighbor. In the cgns file only
+        !       the zone name is stored, but the ID's are more useful
+        !       internally.
+        !       Although for this case a quadratic search algorithm is not too
+        !       bad (number of blocks are O(1000) maximum), I don't like the
+        !       idea of having a quadratic loop in the code. That's why a
+        !       O(n log(n)) algorithm is used here.
+        !
+        use constants
+        use cgnsGrid, only: cgnsDoms, cgnsNDOm
+        use utils, only: terminate
+        use sorting, only: qsortstrings, bsearchstrings
+        implicit none
+        !
+        !      Local variables
+        !
+        character(len=maxCGNSNameLen), dimension(cgnsNDom) :: zoneNames
 
-          nGridsRead = 1
-          allocate(fileIDs(nGridsRead), &
-               gridFiles(nGridsRead), stat=ierr)
-          if(ierr /= 0)                              &
-               call terminate("determineGridFileNames", &
-               "Memory allocation failure for fileIDs &
-               &and gridFiles")
+        integer(kind=intType), dimension(cgnsNDom) :: zoneNumbers
 
-          gridFiles(1) = gridFile
+        integer(kind=intType) :: i, j, k, ii
+        !
+        ! Copy the zone name from the derived data type into zoneNames.
 
-       endif testMultipleTS
+        do i = 1, cgnsNDom
+            zoneNames(i) = cgnsDoms(i)%zoneName
+        end do
 
-    end select
+        ! Sort zoneNames in increasing order.
 
-  end subroutine determineGridFileNames
+        call qsortStrings(zoneNames, cgnsNDom)
 
-  subroutine determineNeighborIDs
-    !
-    !       determineNeighborIDs determines for every internal block
-    !       boundary the block ID of the neighbor. In the cgns file only
-    !       the zone name is stored, but the ID's are more useful
-    !       internally.
-    !       Although for this case a quadratic search algorithm is not too
-    !       bad (number of blocks are O(1000) maximum), I don't like the
-    !       idea of having a quadratic loop in the code. That's why a
-    !       O(n log(n)) algorithm is used here.
-    !
-    use constants
-    use cgnsGrid, only : cgnsDoms, cgnsNDOm
-    use utils, only : terminate
-    use sorting, only: qsortstrings, bsearchstrings
-    implicit none
-    !
-    !      Local variables
-    !
-    character(len=maxCGNSNameLen), dimension(cgnsNDom) :: zoneNames
+        ! Initialize zoneNumbers to -1. This serves as a check during
+        ! the search.
 
-    integer(kind=intType), dimension(cgnsNDom) :: zoneNumbers
+        zoneNumbers = -1
 
-    integer(kind=intType) :: i, j, k, ii
-    !
-    ! Copy the zone name from the derived data type into zoneNames.
+        ! Find the original zone ids for the sorted zone names.
 
-    do i=1,cgnsNDom
-       zoneNames(i) = cgnsDoms(i)%zoneName
-    enddo
+        do i = 1, cgnsNDom
+            ii = bsearchStrings(cgnsDoms(i)%zoneName, zoneNames)
 
-    ! Sort zoneNames in increasing order.
+            ! Check if the zone number is not already taken. If this is the
+            ! case, this means that the grid file contains two identical
+            ! zone names.
 
-    call qsortStrings(zoneNames, cgnsNDom)
+            if (zoneNumbers(ii) /= -1) &
+                 call terminate("determineNeighborIDs", &
+                 "Error occurs only when two identical zone &
+                 &names are present")
 
-    ! Initialize zoneNumbers to -1. This serves as a check during
-    ! the search.
+            ! And set the zone number.
 
-    zoneNumbers = -1
+            zoneNumbers(ii) = i
+        end do
 
-    ! Find the original zone ids for the sorted zone names.
+        ! Loop over the blocks and its connectivities to find out the
+        ! neighbors.
 
-    do i=1,cgnsNDom
-       ii = bsearchStrings(cgnsDoms(i)%zoneName, zoneNames)
+        domains: do i = 1, cgnsNDom
 
-       ! Check if the zone number is not already taken. If this is the
-       ! case, this means that the grid file contains two identical
-       ! zone names.
+            ! The 1-to-1 connectivities.
 
-       if(zoneNumbers(ii) /= -1)                &
-            call terminate("determineNeighborIDs", &
-            "Error occurs only when two identical zone &
-            &names are present")
+            do j = 1, cgnsDoms(i)%n1to1
 
-       ! And set the zone number.
+                ! Determine the neighbor ID for this internal block boundary.
 
-       zoneNumbers(ii) = i
-    enddo
+                ii = bsearchStrings(cgnsDoms(i)%conn1to1(j)%donorName, zoneNames)
+                if (ii == 0) &
+                    call terminate("determineNeighborIDs", &
+                                   "donor name not found in sorted zone names")
 
-    ! Loop over the blocks and its connectivities to find out the
-    ! neighbors.
+                cgnsDoms(i)%conn1to1(j)%donorBlock = zoneNumbers(ii)
 
-    domains: do i=1,cgnsNDom
+            end do
 
-       ! The 1-to-1 connectivities.
+            ! The non-matching abutting connectivities.
 
-       do j=1,cgnsDoms(i)%n1to1
+            do j = 1, cgnsDoms(i)%nNonMatchAbutting
 
-          ! Determine the neighbor ID for this internal block boundary.
+                ! Determine the neighbor ID's for this subface.
 
-          ii = bsearchStrings(cgnsDoms(i)%conn1to1(j)%donorName, zoneNames)
-          if(ii == 0)                              &
-               call terminate("determineNeighborIDs", &
-               "donor name not found in sorted zone names")
+                do k = 1, cgnsDoms(i)%connNonMatchAbutting(j)%nDonorBlocks
 
-          cgnsDoms(i)%conn1to1(j)%donorBlock = zoneNumbers(ii)
+                    ii = bsearchStrings( &
+                         cgnsDoms(i)%connNonMatchAbutting(j)%donorNames(k), zoneNames)
+                    if (ii == 0) &
+                        call terminate("determineNeighborIDs", &
+                                       "donor name not found in sorted zone names")
 
-       enddo
+                    cgnsDoms(i)%connNonMatchAbutting(j)%donorBlocks(k) = &
+                        zoneNumbers(ii)
+                end do
+            end do
+
+        end do domains
+
+    end subroutine determineNeighborIDs
+
+    subroutine determineInterfaceIDs
+        !
+        !       DetermineInterfaceIDs determines more information for both the
+        !       sliding mesh and domain interfaces with other codes, which are
+        !       both specified as user defined boundary conditions in CGNS. In
+        !       particular the number of sliding mesh interfaces and their
+        !       pairings, and number of interfaces with other codes are
+        !       determined.
+        !       There are some parts in the coupler API routines where
+        !       the family-specified domain interfaces are implicitly assumed.
+        !       Therefore, it is recommended for the time being that all the
+        !       domain interfaces should be family-specified.
+        !
+        use constants
+        use cgnsGrid, only: cgnsDoms, cgnsNDom, cgnsFamilies, &
+                            famIDsDomainInterfaces, bcIDsDomainInterfaces, famIDsSliding, &
+                            cgnsNFamilies, cgnsNSliding, cgnsNDomainInterfaces
+        use communication, only: adflow_comm_world, myid
+        use iteration, only: standAloneMode
+        use utils, only: terminate
+        use sorting, only: qsortstrings, bsearchstrings
+        use commonFormats, only: stringSpace
 
-       ! The non-matching abutting connectivities.
-
-       do j=1,cgnsDoms(i)%nNonMatchAbutting
-
-          ! Determine the neighbor ID's for this subface.
-
-          do k=1,cgnsDoms(i)%connNonMatchAbutting(j)%nDonorBlocks
-
-             ii = bsearchStrings(&
-                  cgnsDoms(i)%connNonMatchAbutting(j)%donorNames(k), zoneNames)
-             if(ii == 0)                              &
-                  call terminate("determineNeighborIDs", &
-                  "donor name not found in sorted zone names")
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-             cgnsDoms(i)%connNonMatchAbutting(j)%donorBlocks(k) = &
-                  zoneNumbers(ii)
-          enddo
-       enddo
-
-    enddo domains
-
-  end subroutine determineNeighborIDs
-
-
-  subroutine determineInterfaceIDs
-    !
-    !       DetermineInterfaceIDs determines more information for both the
-    !       sliding mesh and domain interfaces with other codes, which are
-    !       both specified as user defined boundary conditions in CGNS. In
-    !       particular the number of sliding mesh interfaces and their
-    !       pairings, and number of interfaces with other codes are
-    !       determined.
-    !       There are some parts in the coupler API routines where
-    !       the family-specified domain interfaces are implicitly assumed.
-    !       Therefore, it is recommended for the time being that all the
-    !       domain interfaces should be family-specified.
-    !
-    use constants
-    use cgnsGrid, only : cgnsDoms, cgnsNDom, cgnsFamilies, &
-         famIDsDomainInterfaces, bcIDsDomainInterfaces, famIDsSliding, &
-         cgnsNFamilies, cgnsNSliding, cgnsNDomainInterfaces
-    use communication, only : adflow_comm_world, myid
-    use iteration, only : standAloneMode
-    use utils, only : terminate
-    use sorting, only: qsortstrings, bsearchstrings
-    use commonFormats, only : stringSpace
+        integer(kind=intType) :: ii, jj, mm, nn
+        integer(kind=intType) :: nSlidingFam, nSlidingBC, nSliding
+        integer(kind=intType) :: nDomainFam, nDomainBC
 
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        integer(kind=intType), dimension(:), allocatable :: famSlidingID
+        integer(kind=intType), dimension(:, :), allocatable :: bcSlidingID
+        integer(kind=intType), dimension(:), allocatable :: orID
 
-    integer(kind=intType) :: ii, jj, mm, nn
-    integer(kind=intType) :: nSlidingFam, nSlidingBC, nSliding
-    integer(kind=intType) :: nDomainFam,  nDomainBC
+        character(len=maxStringLen) :: errorMessage
 
-    integer(kind=intType), dimension(:),   allocatable :: famSlidingID
-    integer(kind=intType), dimension(:,:), allocatable :: bcSlidingID
-    integer(kind=intType), dimension(:),   allocatable :: orID
+        character(len=maxCGNSNameLen), dimension(:), allocatable :: &
+            namesSliding
+        character(len=maxCGNSNameLen), dimension(:), allocatable :: &
+            namesSorted
+
+        logical :: validInterface
+        !
+        ! Count the total number of each type of user-defined BC that
+        ! needs to be treated here. Note that if a BC is specified by the
+        ! the family it is not counted, such that BCs making up a user-
+        ! defined family are only counted once.
 
-    character(len=maxStringLen) :: errorMessage
-
-    character(len=maxCGNSNameLen), dimension(:), allocatable :: &
-         namesSliding
-    character(len=maxCGNSNameLen), dimension(:), allocatable :: &
-         namesSorted
-
-    logical :: validInterface
-    !
-    ! Count the total number of each type of user-defined BC that
-    ! needs to be treated here. Note that if a BC is specified by the
-    ! the family it is not counted, such that BCs making up a user-
-    ! defined family are only counted once.
-
-    nSlidingFam = 0
-    nSlidingBC  = 0
-    nDomainFam  = 0
-    nDomainBC   = 0
-
-    do nn=1,cgnsNFamilies
-
-       select case (cgnsFamilies(nn)%BCType)
-       case (SlidingInterface)
-          nSlidingFam = nSlidingFam + 1
-       case (DomainInterfaceAll, DomainInterfaceRhoUVW, &
-            DomainInterfaceP,   DomainInterfaceRho,    &
-            DomainInterfaceTotal)
-          nDomainFam = nDomainFam + 1
-       end select
-
-    enddo
-
-    do nn=1,cgnsNDom
-       do mm=1,cgnsDoms(nn)%nBocos
-          if (cgnsDoms(nn)%bocoInfo(mm)%actualFace .and. &
-               cgnsDoms(nn)%bocoInfo(mm)%familyID == 0) then
-
-             select case (cgnsDoms(nn)%bocoInfo(mm)%BCType)
-             case (SlidingInterface)
-                nSlidingBC = nSlidingBC + 1
-             case (DomainInterfaceAll, DomainInterfaceRhoUVW, &
-                  DomainInterfaceP  , DomainInterfaceRho,    &
+        nSlidingFam = 0
+        nSlidingBC = 0
+        nDomainFam = 0
+        nDomainBC = 0
+
+        do nn = 1, cgnsNFamilies
+
+            select case (cgnsFamilies(nn)%BCType)
+            case (SlidingInterface)
+                nSlidingFam = nSlidingFam + 1
+            case (DomainInterfaceAll, DomainInterfaceRhoUVW, &
+                  DomainInterfaceP, DomainInterfaceRho, &
                   DomainInterfaceTotal)
-                nDomainBC = nDomainBC + 1
-             end select
-
-          end if
-       end do
-    end do
-
-    ! Domain interfaces are only allowed when the code is run in a
-    ! multi-disciplinary environment. Check this.
+                nDomainFam = nDomainFam + 1
+            end select
+
+        end do
+
+        do nn = 1, cgnsNDom
+            do mm = 1, cgnsDoms(nn)%nBocos
+                if (cgnsDoms(nn)%bocoInfo(mm)%actualFace .and. &
+                    cgnsDoms(nn)%bocoInfo(mm)%familyID == 0) then
+
+                    select case (cgnsDoms(nn)%bocoInfo(mm)%BCType)
+                    case (SlidingInterface)
+                        nSlidingBC = nSlidingBC + 1
+                    case (DomainInterfaceAll, DomainInterfaceRhoUVW, &
+                          DomainInterfaceP, DomainInterfaceRho, &
+                          DomainInterfaceTotal)
+                        nDomainBC = nDomainBC + 1
+                    end select
+
+                end if
+            end do
+        end do
+
+        ! Domain interfaces are only allowed when the code is run in a
+        ! multi-disciplinary environment. Check this.
+
+        cgnsNDomainInterfaces = nDomainFam + nDomainBC
+
+        if (standAloneMode .and. cgnsNDomainInterfaces > 0) then
+            if (myID == 0) &
+                 call terminate("determineInterfaceIDs", &
+                 "Domain interfaces are not allowed in &
+                 &stand alone mode.")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        ! The number of sliding mesh interfaces must be even, because each
+        ! sliding interface should have two sides. Check this.
+
+        nSliding = nSlidingFam + nSlidingBC
+        if (mod(nSliding, 2) == 1) then
+            if (myID == 0) &
+                call terminate("determineInterfaceIDs", &
+                               "Odd number of sliding mesh families found")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        ! Allocate memory to store the names and IDs for the interfaces.
+
+        allocate (famIDsDomainInterfaces(nDomainFam), &
+                  bcIDsDomainInterfaces(2, nDomainBC), &
+                  namesSliding(nSliding), namesSorted(nSliding), &
+                  orID(nSliding), famSlidingID(nSlidingFam), &
+                  bcSlidingID(2, nSlidingBC), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("determineInterfaceIDs", &
+                           "Memory allocation failure for names, IDs")
 
-    cgnsNDomainInterfaces = nDomainFam + nDomainBC
+        ! Loop back over the families again and store the names and
+        ! IDs this time around. Note the ID is just the index of the
+        ! corresponding family.
 
-    if(standAloneMode .and. cgnsNDomainInterfaces > 0) then
-       if(myID == 0) &
-            call terminate("determineInterfaceIDs", &
-            "Domain interfaces are not allowed in &
-            &stand alone mode.")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
+        ii = 0
+        jj = 0
 
-    ! The number of sliding mesh interfaces must be even, because each
-    ! sliding interface should have two sides. Check this.
+        do nn = 1, cgnsNFamilies
 
-    nSliding = nSlidingFam + nSlidingBC
-    if(mod(nSliding,2) == 1) then
-       if(myID == 0)                              &
-            call terminate("determineInterfaceIDs", &
-            "Odd number of sliding mesh families found")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-
-    ! Allocate memory to store the names and IDs for the interfaces.
-
-    allocate(famIDsDomainInterfaces(nDomainFam),            &
-         bcIDsDomainInterfaces(2,nDomainBC),            &
-         namesSliding(nSliding), namesSorted(nSliding), &
-         orID(nSliding), famSlidingID(nSlidingFam),     &
-         bcSlidingID(2,nSlidingBC), stat=ierr)
-    if(ierr /= 0) &
-         call terminate("determineInterfaceIDs", &
-         "Memory allocation failure for names, IDs")
-
-    ! Loop back over the families again and store the names and
-    ! IDs this time around. Note the ID is just the index of the
-    ! corresponding family.
-
-    ii = 0
-    jj = 0
-
-    do nn=1,cgnsNFamilies
-
-       select case (cgnsFamilies(nn)%BCType)
-       case (SlidingInterface)
-          ii = ii + 1
-          namesSliding(ii) = cgnsFamilies(nn)%familyName
-          famSlidingID(ii) = nn
-       case (DomainInterfaceAll, DomainInterfaceRhoUVW, &
-            DomainInterfaceP,   DomainInterfaceRho,    &
-            DomainInterfaceTotal)
-          jj = jj + 1
-          famIDsDomainInterfaces(jj) = nn
-       end select
-
-    enddo
-
-    ! Loop back over the boundary conditions again and store the
-    ! names and IDs this time around. Note the ID has two parts:
-    ! the domain and the index of the BC info in that domain.
-
-    jj = 0
-
-    do nn=1,cgnsNDom
-       do mm=1,cgnsDoms(nn)%nBocos
-          if (cgnsDoms(nn)%bocoInfo(mm)%actualFace .and. &
-               cgnsDoms(nn)%bocoInfo(mm)%familyID == 0) then
-
-             select case (cgnsDoms(nn)%bocoInfo(mm)%BCType)
-             case (SlidingInterface)
+            select case (cgnsFamilies(nn)%BCType)
+            case (SlidingInterface)
                 ii = ii + 1
-                namesSliding(ii) = cgnsDoms(nn)%bocoInfo(mm)%bocoName
-                bcSlidingID(:,ii-nSlidingFam) = (/ nn, mm /)
-             case (DomainInterfaceAll, DomainInterfaceRhoUVW, &
-                  DomainInterfaceP,   DomainInterfaceRho,    &
+                namesSliding(ii) = cgnsFamilies(nn)%familyName
+                famSlidingID(ii) = nn
+            case (DomainInterfaceAll, DomainInterfaceRhoUVW, &
+                  DomainInterfaceP, DomainInterfaceRho, &
                   DomainInterfaceTotal)
                 jj = jj + 1
-                bcIDsDomainInterfaces(:,jj) = (/ nn, mm /)
-             end select
+                famIDsDomainInterfaces(jj) = nn
+            end select
 
-          end if
-       end do
-    end do
+        end do
 
-    ! Initialize orID to -1, which serves as a check later on, and
-    ! copy the names of the sliding mesh families in namesSorted.
+        ! Loop back over the boundary conditions again and store the
+        ! names and IDs this time around. Note the ID has two parts:
+        ! the domain and the index of the BC info in that domain.
 
-    do ii=1,nSliding
-       orID(ii)        = -1
-       namesSorted(ii) = namesSliding(ii)
-    enddo
+        jj = 0
 
-    ! Sort the names of the sliding mesh families in increasing
-    ! order and find the corresponding entry in namesSliding.
+        do nn = 1, cgnsNDom
+            do mm = 1, cgnsDoms(nn)%nBocos
+                if (cgnsDoms(nn)%bocoInfo(mm)%actualFace .and. &
+                    cgnsDoms(nn)%bocoInfo(mm)%familyID == 0) then
 
-    call qsortStrings(namesSorted, nSliding)
+                    select case (cgnsDoms(nn)%bocoInfo(mm)%BCType)
+                    case (SlidingInterface)
+                        ii = ii + 1
+                        namesSliding(ii) = cgnsDoms(nn)%bocoInfo(mm)%bocoName
+                        bcSlidingID(:, ii - nSlidingFam) = (/nn, mm/)
+                    case (DomainInterfaceAll, DomainInterfaceRhoUVW, &
+                          DomainInterfaceP, DomainInterfaceRho, &
+                          DomainInterfaceTotal)
+                        jj = jj + 1
+                        bcIDsDomainInterfaces(:, jj) = (/nn, mm/)
+                    end select
 
-    do ii=1,nSliding
+                end if
+            end do
+        end do
 
-       ! Search the sorted strings.
+        ! Initialize orID to -1, which serves as a check later on, and
+        ! copy the names of the sliding mesh families in namesSorted.
 
-       mm = bsearchStrings(namesSliding(ii), namesSorted)
+        do ii = 1, nSliding
+            orID(ii) = -1
+            namesSorted(ii) = namesSliding(ii)
+        end do
 
-       if(orID(mm) /= -1) then
+        ! Sort the names of the sliding mesh families in increasing
+        ! order and find the corresponding entry in namesSliding.
 
-          ! Family name occurs more than once. This is not allowed.
+        call qsortStrings(namesSorted, nSliding)
 
-          write(errorMessage, stringSpace) "Family name", trim(namesSliding(ii)), "occurs more than once."
-          if(myID == 0) &
-               call terminate("determineInterfaceIDs", errorMessage)
-          call mpi_barrier(ADflow_comm_world, ierr)
+        do ii = 1, nSliding
 
-       endif
+            ! Search the sorted strings.
 
-       ! Store the entry in orID.
+            mm = bsearchStrings(namesSliding(ii), namesSorted)
 
-       orID(mm) = ii
+            if (orID(mm) /= -1) then
 
-    enddo
+                ! Family name occurs more than once. This is not allowed.
 
-    ! Set the number of sliding mesh interfaces and allocate the
-    ! memory for famIDsSliding.
+                write (errorMessage, stringSpace) "Family name", trim(namesSliding(ii)), "occurs more than once."
+                if (myID == 0) &
+                    call terminate("determineInterfaceIDs", errorMessage)
+                call mpi_barrier(ADflow_comm_world, ierr)
 
-    cgnsNSliding = nSliding/2
-    allocate(famIDsSliding(cgnsNSliding,2), stat=ierr)
-    if(ierr /= 0) &
-         call terminate("determineInterfaceIDs", &
-         "Memory allocation failure for famIDsSliding")
+            end if
 
-    ! Check if the sorted family names indeed form a
-    ! sliding mesh interface.
+            ! Store the entry in orID.
 
-    do ii=1,cgnsNSliding
+            orID(mm) = ii
 
-       ! Store the entries in namesSorted, which should form a sliding
-       ! interface, a bit easier.
+        end do
 
-       nn = 2*ii
-       mm = nn - 1
+        ! Set the number of sliding mesh interfaces and allocate the
+        ! memory for famIDsSliding.
 
-       ! Store the original values in famIDsSliding.
+        cgnsNSliding = nSliding / 2
+        allocate (famIDsSliding(cgnsNSliding, 2), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("determineInterfaceIDs", &
+                           "Memory allocation failure for famIDsSliding")
 
-       jj = orID(nn)/2
+        ! Check if the sorted family names indeed form a
+        ! sliding mesh interface.
 
-       famIDsSliding(jj,1) = famSlidingID(orID(mm))
-       famIDsSliding(jj,2) = famSlidingID(orID(nn))
+        do ii = 1, cgnsNSliding
 
-       ! Check if the names form a valid sliding interface.
+            ! Store the entries in namesSorted, which should form a sliding
+            ! interface, a bit easier.
 
-       validInterface = .true.
-       if(len_trim(namesSorted(nn)) /= len_trim(namesSorted(mm))) &
-            validInterface = .false.
+            nn = 2 * ii
+            mm = nn - 1
 
-       jj = len_trim(namesSorted(nn)) - 2
-       if(jj <= 0) then
-          validInterface = .false.
-       else if(namesSorted(nn)(:jj) /= namesSorted(mm)(:jj)) then
-          validInterface = .false.
-       endif
+            ! Store the original values in famIDsSliding.
 
-       ! Print an error message and exit if the two families do not
-       ! form a valid sliding interface.
+            jj = orID(nn) / 2
 
-       if(.not. validInterface) then
-          write(errorMessage, stringSpace) "Family names", trim(namesSliding(nn)), "and", &
-            trim(namesSliding(mm)), "do not form a valid sliding mesh interface"
-          if(myID == 0) &
-               call terminate("determineInterfaceIDs", errorMessage)
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
+            famIDsSliding(jj, 1) = famSlidingID(orID(mm))
+            famIDsSliding(jj, 2) = famSlidingID(orID(nn))
 
-       ! The two names form a valid sliding interface. Store the
-       ! sliding interface ID for the original family or BC. Note that
-       ! one gets a positive ID and the other one a negative ID.
-       ! In this way a distinction is made between the two sides.
+            ! Check if the names form a valid sliding interface.
 
-       if (orID(mm) > nSlidingFam) then
-          jj = bcSlidingID(2,orID(mm))
-          mm = bcSlidingID(1,orID(mm))
-          cgnsDoms(mm)%bocoInfo(jj)%slidingID = -ii
-       else
-          mm = famSlidingID(orID(mm))
-          cgnsFamilies(mm)%slidingID = -ii
-       end if
-
-       if (orID(nn) > nSlidingFam) then
-          jj = bcSlidingID(2,orID(nn))
-          nn = bcSlidingID(1,orID(nn))
-          cgnsDoms(nn)%bocoInfo(jj)%slidingID = ii
-       else
-          nn = famSlidingID(orID(nn))
-          cgnsFamilies(nn)%slidingID = ii
-       end if
-
-    enddo
-
-    ! Loop over all the boundary conditions again, this time
-    ! copying the sliding interface IDs for those BCs that
-    ! were part of a sliding family.
-
-    do nn=1,cgnsNDom
-       do mm=1,cgnsDoms(nn)%nBocos
-          if (cgnsDoms(nn)%bocoInfo(mm)%actualFace .and.   &
-               cgnsDoms(nn)%bocoInfo(mm)%familyID > 0 .and. &
-               cgnsDoms(nn)%bocoInfo(mm)%BCType == SlidingInterface) then
-
-             cgnsDoms(nn)%bocoInfo(mm)%slidingID = &
-                  cgnsFamilies(cgnsDoms(nn)%bocoInfo(mm)%familyID)%slidingID
-
-          end if
-       end do
-    end do
-
-    ! Deallocate the memory used to determine the sliding IDs.
-
-    deallocate(namesSliding, namesSorted, orID, &
-         famSlidingID, bcSlidingID, stat=ierr)
-    if(ierr /= 0) &
-         call terminate("determineInterfaceIDs", &
-         "Deallocation failure for names, IDs")
-
-  end subroutine determineInterfaceIDs
-
-  subroutine initFineGridIblank
-    !
-    !       InitFineGridIblank allocates the fine grid iblank array and
-    !       initializes the values for the holes, boundary, and halos. The
-    !       holes read into the cgns domains are distributed amongst its
-    !       sublocks in the form of iblanks. That is, we do not store a
-    !       list of indices for the holes of the flow domains as done in
-    !       the CGNS. The number of holes in each domain are also counted.
-    !
-    use constants
-    use block, only : nDom, flowDoms
-    use utils, only : terminate
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i, j, k, l, m, n, cgnsId, sps
-
-    ! Loop over the local blocks.
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domains: do n = 1,nDom
-
-          ! Allocate memory for the iblank array of this block.
-
-          i = flowDoms(n,1,sps)%ib
-          j = flowDoms(n,1,sps)%jb
-          k = flowDoms(n,1,sps)%kb
-          allocate(flowDoms(n,1,sps)%iblank(0:i,0:j,0:k), &
-               flowDoms(n,1,sps)%forcedRecv(0:i,0:j,0:k), &
-               flowDoms(n,1,sps)%status(0:i,0:j,0:k), &
-               stat=ierr)
-          if(ierr /= 0)                             &
-               call terminate("initFineGridIblank", &
-               "Memory allocation failure for iblank")
-
-          ! Initialize iblank to 1 everywhere, and the number of holes
-          ! for this domain to 0.
-
-          flowDoms(n,1,sps)%iblank = 1
-          flowDoms(n,1,sps)%forcedRecv = 0
-          flowDoms(n,1,sps)%status = 0
-       end do domains
-    end do spectralLoop
-
-  end subroutine initFineGridIblank
-
-  subroutine timePeriodSpectral
-    !
-    !       timePeriodSpectral determines the time of one period for the
-    !       time spectral method. It is possible that sections have
-    !       different periodic times.
-    !
-    use constants
-    use communication, only : myID, adflow_comm_world
-    use inputMotion, only : degreeFourXRot, degreeFourYRot, degreeFourZrot, &
-         omegaFourAlpha, omegaFourBeta, omegaFourMach, omegaFourXRot, &
-         omegaFourYRot, omegaFourZRot, degreeFourMach, degreeFourAlpha, &
-         degreeFourBeta
-    use inputPhysics, only : equationMOde, flowType
-    use inputTimeSpectral, only : omegaFourier
-    use section, only : sections, nSections
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: tol  = 1.e-5_realType
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    integer(kind=intType) :: nn
-
-    real(kind=realType) :: tt, omega
-    real(kind=realType) :: timePeriod
-
-    logical :: timeDetermined
-
-    ! This routine is only used for the spectral solutions. Return
-    ! immediately if a different mode is solved.
-
-    if(equationMode /= timeSpectral) return
-
-    ! First check if a rotational frequency has been specified.
-    ! Only for external flows.
-
-    timeDetermined = .false.
-
-
-    externalTest: if(flowType == externalFlow) then
-
-       ! X-rotation.
-
-       if(degreeFourXRot > 0) then
-          timePeriod     = two*pi/omegaFourXRot
-          timeDetermined = .true.
-       endif
-
-       ! Y-rotation.
-
-       if(degreeFourYRot > 0) then
-          tt = two*pi/omegaFourYRot
-
-          ! Check if a time period was already determined. If so, try
-          ! to determine a common time. Otherwise just copy the data.
-
-          if( timeDetermined ) then
-             timePeriod = commonTimeSpectral(timePeriod, tt)
-          else
-             timePeriod     = tt
-             timeDetermined = .true.
-          endif
-       endif
-
-       ! Z-rotation.
-
-       if(degreeFourZRot > 0) then
-          tt = two*pi/omegaFourZRot
-
-          ! Check if a time period was already determined. If so, try
-          ! to determine a common time. Otherwise just copy the data.
-
-          if( timeDetermined ) then
-             timePeriod = commonTimeSpectral(timePeriod, tt)
-          else
-             timePeriod     = tt
-             timeDetermined = .true.
-          endif
-       endif
-
-
-       ! Alpha
-       !print *,'degreeFourAlpha',degreefouralpha,omegafouralpha,sincoeffouralpha
-       if(degreeFourAlpha > 0) then
-          tt = two*pi/omegaFourAlpha
-          !print *,'timePeriod',tt
-          ! Check if a time period was already determined. If so, try
-          ! to determine a common time. Otherwise just copy the data.
-
-          if( timeDetermined ) then
-             timePeriod = commonTimeSpectral(timePeriod, tt)
-          else
-             timePeriod     = tt
-             timeDetermined = .true.
-          endif
-       endif
-
-
-       ! Beta
-
-       if(degreeFourBeta > 0) then
-          tt = two*pi/omegaFourBeta
-
-          ! Check if a time period was already determined. If so, try
-          ! to determine a common time. Otherwise just copy the data.
-
-          if( timeDetermined ) then
-             timePeriod = commonTimeSpectral(timePeriod, tt)
-          else
-             timePeriod     = tt
-             timeDetermined = .true.
-          endif
-       endif
+            validInterface = .true.
+            if (len_trim(namesSorted(nn)) /= len_trim(namesSorted(mm))) &
+                validInterface = .false.
 
-       ! Mach
+            jj = len_trim(namesSorted(nn)) - 2
+            if (jj <= 0) then
+                validInterface = .false.
+            else if (namesSorted(nn) (:jj) /= namesSorted(mm) (:jj)) then
+                validInterface = .false.
+            end if
 
-       if(degreeFourMach > 0) then
-          tt = two*pi/omegaFourMach
+            ! Print an error message and exit if the two families do not
+            ! form a valid sliding interface.
 
-          ! Check if a time period was already determined. If so, try
-          ! to determine a common time. Otherwise just copy the data.
+            if (.not. validInterface) then
+                write (errorMessage, stringSpace) "Family names", trim(namesSliding(nn)), "and", &
+                    trim(namesSliding(mm)), "do not form a valid sliding mesh interface"
+                if (myID == 0) &
+                    call terminate("determineInterfaceIDs", errorMessage)
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+
+            ! The two names form a valid sliding interface. Store the
+            ! sliding interface ID for the original family or BC. Note that
+            ! one gets a positive ID and the other one a negative ID.
+            ! In this way a distinction is made between the two sides.
+
+            if (orID(mm) > nSlidingFam) then
+                jj = bcSlidingID(2, orID(mm))
+                mm = bcSlidingID(1, orID(mm))
+                cgnsDoms(mm)%bocoInfo(jj)%slidingID = -ii
+            else
+                mm = famSlidingID(orID(mm))
+                cgnsFamilies(mm)%slidingID = -ii
+            end if
+
+            if (orID(nn) > nSlidingFam) then
+                jj = bcSlidingID(2, orID(nn))
+                nn = bcSlidingID(1, orID(nn))
+                cgnsDoms(nn)%bocoInfo(jj)%slidingID = ii
+            else
+                nn = famSlidingID(orID(nn))
+                cgnsFamilies(nn)%slidingID = ii
+            end if
+
+        end do
+
+        ! Loop over all the boundary conditions again, this time
+        ! copying the sliding interface IDs for those BCs that
+        ! were part of a sliding family.
+
+        do nn = 1, cgnsNDom
+            do mm = 1, cgnsDoms(nn)%nBocos
+                if (cgnsDoms(nn)%bocoInfo(mm)%actualFace .and. &
+                    cgnsDoms(nn)%bocoInfo(mm)%familyID > 0 .and. &
+                    cgnsDoms(nn)%bocoInfo(mm)%BCType == SlidingInterface) then
+
+                    cgnsDoms(nn)%bocoInfo(mm)%slidingID = &
+                        cgnsFamilies(cgnsDoms(nn)%bocoInfo(mm)%familyID)%slidingID
+
+                end if
+            end do
+        end do
+
+        ! Deallocate the memory used to determine the sliding IDs.
+
+        deallocate (namesSliding, namesSorted, orID, &
+                    famSlidingID, bcSlidingID, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("determineInterfaceIDs", &
+                           "Deallocation failure for names, IDs")
+
+    end subroutine determineInterfaceIDs
+
+    subroutine initFineGridIblank
+        !
+        !       InitFineGridIblank allocates the fine grid iblank array and
+        !       initializes the values for the holes, boundary, and halos. The
+        !       holes read into the cgns domains are distributed amongst its
+        !       sublocks in the form of iblanks. That is, we do not store a
+        !       list of indices for the holes of the flow domains as done in
+        !       the CGNS. The number of holes in each domain are also counted.
+        !
+        use constants
+        use block, only: nDom, flowDoms
+        use utils, only: terminate
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, j, k, l, m, n, cgnsId, sps
+
+        ! Loop over the local blocks.
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domains: do n = 1, nDom
+
+                ! Allocate memory for the iblank array of this block.
+
+                i = flowDoms(n, 1, sps)%ib
+                j = flowDoms(n, 1, sps)%jb
+                k = flowDoms(n, 1, sps)%kb
+                allocate (flowDoms(n, 1, sps)%iblank(0:i, 0:j, 0:k), &
+                          flowDoms(n, 1, sps)%forcedRecv(0:i, 0:j, 0:k), &
+                          flowDoms(n, 1, sps)%status(0:i, 0:j, 0:k), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("initFineGridIblank", &
+                                   "Memory allocation failure for iblank")
+
+                ! Initialize iblank to 1 everywhere, and the number of holes
+                ! for this domain to 0.
+
+                flowDoms(n, 1, sps)%iblank = 1
+                flowDoms(n, 1, sps)%forcedRecv = 0
+                flowDoms(n, 1, sps)%status = 0
+            end do domains
+        end do spectralLoop
+
+    end subroutine initFineGridIblank
+
+    subroutine timePeriodSpectral
+        !
+        !       timePeriodSpectral determines the time of one period for the
+        !       time spectral method. It is possible that sections have
+        !       different periodic times.
+        !
+        use constants
+        use communication, only: myID, adflow_comm_world
+        use inputMotion, only: degreeFourXRot, degreeFourYRot, degreeFourZrot, &
+                               omegaFourAlpha, omegaFourBeta, omegaFourMach, omegaFourXRot, &
+                               omegaFourYRot, omegaFourZRot, degreeFourMach, degreeFourAlpha, &
+                               degreeFourBeta
+        use inputPhysics, only: equationMOde, flowType
+        use inputTimeSpectral, only: omegaFourier
+        use section, only: sections, nSections
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: tol = 1.e-5_realType
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer(kind=intType) :: nn
+
+        real(kind=realType) :: tt, omega
+        real(kind=realType) :: timePeriod
+
+        logical :: timeDetermined
+
+        ! This routine is only used for the spectral solutions. Return
+        ! immediately if a different mode is solved.
+
+        if (equationMode /= timeSpectral) return
+
+        ! First check if a rotational frequency has been specified.
+        ! Only for external flows.
+
+        timeDetermined = .false.
+
+        externalTest: if (flowType == externalFlow) then
+
+            ! X-rotation.
+
+            if (degreeFourXRot > 0) then
+                timePeriod = two * pi / omegaFourXRot
+                timeDetermined = .true.
+            end if
+
+            ! Y-rotation.
+
+            if (degreeFourYRot > 0) then
+                tt = two * pi / omegaFourYRot
+
+                ! Check if a time period was already determined. If so, try
+                ! to determine a common time. Otherwise just copy the data.
+
+                if (timeDetermined) then
+                    timePeriod = commonTimeSpectral(timePeriod, tt)
+                else
+                    timePeriod = tt
+                    timeDetermined = .true.
+                end if
+            end if
+
+            ! Z-rotation.
+
+            if (degreeFourZRot > 0) then
+                tt = two * pi / omegaFourZRot
+
+                ! Check if a time period was already determined. If so, try
+                ! to determine a common time. Otherwise just copy the data.
+
+                if (timeDetermined) then
+                    timePeriod = commonTimeSpectral(timePeriod, tt)
+                else
+                    timePeriod = tt
+                    timeDetermined = .true.
+                end if
+            end if
+
+            ! Alpha
+            !print *,'degreeFourAlpha',degreefouralpha,omegafouralpha,sincoeffouralpha
+            if (degreeFourAlpha > 0) then
+                tt = two * pi / omegaFourAlpha
+                !print *,'timePeriod',tt
+                ! Check if a time period was already determined. If so, try
+                ! to determine a common time. Otherwise just copy the data.
+
+                if (timeDetermined) then
+                    timePeriod = commonTimeSpectral(timePeriod, tt)
+                else
+                    timePeriod = tt
+                    timeDetermined = .true.
+                end if
+            end if
+
+            ! Beta
+
+            if (degreeFourBeta > 0) then
+                tt = two * pi / omegaFourBeta
+
+                ! Check if a time period was already determined. If so, try
+                ! to determine a common time. Otherwise just copy the data.
+
+                if (timeDetermined) then
+                    timePeriod = commonTimeSpectral(timePeriod, tt)
+                else
+                    timePeriod = tt
+                    timeDetermined = .true.
+                end if
+            end if
+
+            ! Mach
+
+            if (degreeFourMach > 0) then
+                tt = two * pi / omegaFourMach
 
-          if( timeDetermined ) then
-             timePeriod = commonTimeSpectral(timePeriod, tt)
-          else
-             timePeriod     = tt
-             timeDetermined = .true.
-          endif
-       endif
+                ! Check if a time period was already determined. If so, try
+                ! to determine a common time. Otherwise just copy the data.
 
-       ! aeroelastic case
-       if(omegaFourier > 0) then 
-         tt = two*pi/omegaFourier
+                if (timeDetermined) then
+                    timePeriod = commonTimeSpectral(timePeriod, tt)
+                else
+                    timePeriod = tt
+                    timeDetermined = .true.
+                end if
+            end if
 
-          ! Check if a time period was already determined. If so, try
-          ! to determine a common time. Otherwise just copy the data.
+            ! aeroelastic case
+            if (omegaFourier > 0) then
+                tt = two * pi / omegaFourier
 
-          if( timeDetermined ) then
-             timePeriod = commonTimeSpectral(timePeriod, tt)
-          else
-             timePeriod     = tt
-             timeDetermined = .true.
-          endif
-       end if
+                ! Check if a time period was already determined. If so, try
+                ! to determine a common time. Otherwise just copy the data.
+
+                if (timeDetermined) then
+                    timePeriod = commonTimeSpectral(timePeriod, tt)
+                else
+                    timePeriod = tt
+                    timeDetermined = .true.
+                end if
+            end if
 
 !!$         ! Altitude.
 !!$
@@ -996,1348 +991,1345 @@ subroutine timePeriodSpectral
 !!$           endif
 !!$         endif
 
-    endif externalTest
+        end if externalTest
 
-    ! If it was possible to determine the time, copy it to the
-    ! sections and return.
+        ! If it was possible to determine the time, copy it to the
+        ! sections and return.
 
+        if (timeDetermined) then
+            do nn = 1, nSections
+                sections(nn)%timePeriod = timePeriod / sections(nn)%nSlices
+                !print *,'sectionTimePeriod',sections(nn)%timePeriod,nn
+            end do
+            return
+        end if
 
-    if( timeDetermined ) then
-       do nn=1,nSections
-          sections(nn)%timePeriod = timePeriod/sections(nn)%nSlices
-          !print *,'sectionTimePeriod',sections(nn)%timePeriod,nn
-       enddo
-       return
-    endif
+        ! Try to determine the periodic time via the rotation rate of the
+        ! sections and its number of slices.
 
-    ! Try to determine the periodic time via the rotation rate of the
-    ! sections and its number of slices.
+        sectionLoop: do nn = 1, nSections
 
-    sectionLoop: do nn=1,nSections
+            ! Test if the section is rotating, because only for rotating
+            ! sections the periodic time can be determined.
 
-       ! Test if the section is rotating, because only for rotating
-       ! sections the periodic time can be determined.
+            testRotating: if (sections(nn)%rotating) then
+
+                ! Determine the magnitude of the rotation rate and the
+                ! corresponding periodic time period.
+
+                omega = sqrt(sections(nn)%rotRate(1)**2 &
+                             + sections(nn)%rotRate(2)**2 &
+                             + sections(nn)%rotRate(3)**2)
 
-       testRotating: if( sections(nn)%rotating ) then
+                tt = two * pi / omega
+
+                ! If a time period was already determined, check if this is
+                ! identical to tt. If not print an error message and exit.
+
+                if (timeDetermined) then
+
+                    tt = abs(tt - timePeriod) / timePeriod
+                    if (tt > tol) then
+                        if (myID == 0) &
+                             call terminate("timePeriodSpectral", &
+                             "Rotational frequencies of the rotating &
+                             &sections are not identical.")
+                        call mpi_barrier(ADflow_comm_world, ierr)
+                    end if
+
+                else
+
+                    ! Just copy the data.
+
+                    timePeriod = tt
+                    timeDetermined = .true.
+
+                end if
+
+            end if testRotating
+        end do sectionLoop
+
+        ! Divide the periodic time by the number of slices to get the
+        ! characteristic time for every section.
+
+        do nn = 1, nSections
+            sections(nn)%timePeriod = timePeriod / sections(nn)%nSlices
+        end do
+
+        ! Return if it was possible to determine the time.
+
+        if (timeDetermined) return
+
+        ! Periodic time could not be determined. Print an error
+        ! message and exit.
+
+        if (myID == 0) &
+             call terminate("timePeriodSpectral", &
+             "Not possible to determine the periodic time &
+             &for the time spectral method")
+        call mpi_barrier(ADflow_comm_world, ierr)
+
+    end subroutine timePeriodSpectral
+
+    function commonTimeSpectral(t1, t2)
+        !
+        !       The function commonTimeSpectral determines the smallest
+        !       possible common time between t1 and t2, such that
+        !       tcommon = n1*t1 = n2*t2 and n1, n2 integers.
+        !
+        use communication
+        use utils, only: terminate
+        implicit none
+        !
+        !      Function definition
+        !
+        real(kind=realType) :: commonTimeSpectral
+        !
+        !      Function arguments.
+        !
+        real(kind=realType), intent(in) :: t1, t2
+        !
+        !      Local parameters.
+        !
+        integer(kind=intType), parameter :: nMax = 100
+        real(kind=realType), parameter :: tol = 1.e-5_realType
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer(kind=intType) :: n1, n2
+        real(kind=realType) :: tt1, tt2, tt, ratio
+
+        ! Store the largest time in tt1 and the smallest in tt2 and
+        ! compute the ratio tt1/tt2, which is >= 1
+
+        tt1 = max(t1, t2)
+        tt2 = min(t1, t2)
+        ratio = tt1 / tt2
+
+        ! Loop to find the smallest integer values of n1 and n2, such
+        ! that n1*tt1 = n2*tt2. Note that due to the previous definition
+        ! n2 >= n1.
+
+        do n1 = 1, nMax
+            tt = n1 * ratio
+            n2 = nint(tt)
+            tt = abs(tt - n2)
+            if (tt <= tol) exit
+        end do
+
+        ! Check if a common time was found
+
+        if (n1 > nMax) then
+            if (myID == 0) &
+                 call terminate("commonTimeSpectral", &
+                 "No common time periodic time found. &
+                 &Problem may not be periodic")
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        ! Set the common time.
+
+        commonTimeSpectral = n1 * tt1
+
+    end function commonTimeSpectral
+    subroutine timeRotMatricesSpectral
+        !
+        !       timeRotMatricesSpectral determines the rotation matrices
+        !       used in the time derivatives for the velocity components in
+        !       the time spectral method. These matrices are the identity
+        !       matrices for non-rotating sections and something different for
+        !       rotating sections. Therefore the rotation matrices are stored
+        !       for every section.
+        !
+        use constants
+        use inputPhysics, only: equationMode
+        use inputTimeSpectral, only: rotMatrixSpectral
+        use section, only: sections, nSections
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn
+        real(kind=realType) :: tmp, theta, cosTheta, sinTheta
+
+        real(kind=realType), dimension(3) :: xt, yt, zt
+
+        ! This routine is only used for the spectral solutions. Return
+        ! immediately if a different mode is solved.
+
+        if (equationMode /= timeSpectral) return
+
+        ! Allocate the memory for rotMatrixSpectral, which will store
+        ! the rotation matrices for all the sections.
+
+        if (allocated(rotMatrixSpectral)) deallocate (rotMatrixSpectral)
+        allocate (rotMatrixSpectral(nSections, 3, 3), stat=ierr)
+
+        if (ierr /= 0) &
+             call terminate("timeRotMatricesSpectral", &
+             "Memory allocation failure for &
+             &rotMatrixSpectral")
+
+        ! Loop over the number of sections.
+
+        sectionLoop: do nn = 1, nSections
+
+            ! Test if the rotation matrix is the unity matrix. This is the
+            ! case if this section is not rotating or if the number of
+            ! slices is only 1, i.e. the true physical model is computed.
+
+            testUnity: if (.not. sections(nn)%rotating .or. &
+                           sections(nn)%nSlices == 1) then
+
+                ! Set the rotation matrix to the unity matrix.
+
+                rotMatrixSpectral(nn, 1, 1) = one
+                rotMatrixSpectral(nn, 1, 2) = zero
+                rotMatrixSpectral(nn, 1, 3) = zero
+
+                rotMatrixSpectral(nn, 2, 1) = zero
+                rotMatrixSpectral(nn, 2, 2) = one
+                rotMatrixSpectral(nn, 2, 3) = zero
+
+                rotMatrixSpectral(nn, 3, 1) = zero
+                rotMatrixSpectral(nn, 3, 2) = zero
+                rotMatrixSpectral(nn, 3, 3) = one
+
+                else testUnity
+
+                ! Section is rotating and only a part of the physical problem
+                ! is modelled. Consequently a rotation matrix is present for
+                ! the velocity components.
+
+                ! First transform to a frame where the xt-axis points in the
+                ! direction of the rotation vector.
+
+                xt(1) = sections(nn)%rotAxis(1)
+                xt(2) = sections(nn)%rotAxis(2)
+                xt(3) = sections(nn)%rotAxis(3)
+
+                ! Construct the yt axis. It does not matter exactly as long
+                ! as it is normal to xt.
+
+                if (abs(xt(2)) < 0.707107_realType) then
+                    yt(1) = zero
+                    yt(2) = one
+                    yt(3) = zero
+                else
+                    yt(1) = zero
+                    yt(2) = zero
+                    yt(3) = one
+                end if
+
+                ! Make sure that yt is normal to xt.
+
+                tmp = xt(1) * yt(1) + xt(2) * yt(2) + xt(3) * yt(3)
+                yt(1) = yt(1) - tmp * xt(1)
+                yt(2) = yt(2) - tmp * xt(2)
+                yt(3) = yt(3) - tmp * xt(3)
+
+                ! And create a unit vector.
+
+                tmp = one / sqrt(yt(1)**2 + yt(2)**2 + yt(3)**2)
+                yt(1) = tmp * yt(1)
+                yt(2) = tmp * yt(2)
+                yt(3) = tmp * yt(3)
+
+                ! Create the vector zt by taking the cross product xt*yt.
+
+                zt(1) = xt(2) * yt(3) - xt(3) * yt(2)
+                zt(2) = xt(3) * yt(1) - xt(1) * yt(3)
+                zt(3) = xt(1) * yt(2) - xt(2) * yt(1)
+
+                ! Compute the periodic angle theta and its sine and cosine.
+
+                theta = two * pi / real(sections(nn)%nSlices, realType)
+                cosTheta = cos(theta)
+                sinTheta = sin(theta)
+
+                ! The rotation matrix in the xt,yt,zt frame is given by
+                !
+                ! R = | 1      0           0      |
+                !     | 0  cos(theta) -sin(theta) |
+                !     | 0  sin(theta)  cos(theta) |
+                !
+                ! The rotation matrix in the standard cartesian frame is then
+                ! given by t * r * t^t, where the colums of the transformation
+                ! matrix t are the unit vectors xt,yt,zt. One can easily check
+                ! this by checking rotation around the y- and z-axis. The
+                ! result of this is the expression below.
+
+                rotMatrixSpectral(nn, 1, 1) = xt(1) * xt(1) &
+                                              + cosTheta * (yt(1) * yt(1) + zt(1) * zt(1))
+                rotMatrixSpectral(nn, 1, 2) = xt(1) * xt(2) &
+                                              + cosTheta * (yt(1) * yt(2) + zt(1) * zt(2)) &
+                                              - sinTheta * (yt(1) * zt(2) - yt(2) * zt(1))
+                rotMatrixSpectral(nn, 1, 3) = xt(1) * xt(3) &
+                                              + cosTheta * (yt(1) * yt(3) + zt(1) * zt(3)) &
+                                              - sinTheta * (yt(1) * zt(3) - yt(3) * zt(1))
+
+                rotMatrixSpectral(nn, 2, 1) = xt(1) * xt(2) &
+                                              + cosTheta * (yt(1) * yt(2) + zt(1) * zt(2)) &
+                                              + sinTheta * (yt(1) * zt(2) - yt(2) * zt(1))
+                rotMatrixSpectral(nn, 2, 2) = xt(2) * xt(2) &
+                                              + cosTheta * (yt(2) * yt(2) + zt(2) * zt(2))
+                rotMatrixSpectral(nn, 2, 3) = xt(2) * xt(3) &
+                                              + cosTheta * (yt(2) * yt(3) + zt(2) * zt(3)) &
+                                              - sinTheta * (yt(2) * zt(3) - yt(3) * zt(2))
+
+                rotMatrixSpectral(nn, 3, 1) = xt(1) * xt(3) &
+                                              + cosTheta * (yt(1) * yt(3) + zt(1) * zt(3)) &
+                                              + sinTheta * (yt(1) * zt(3) - yt(3) * zt(1))
+                rotMatrixSpectral(nn, 3, 2) = xt(2) * xt(3) &
+                                              + cosTheta * (yt(2) * yt(3) + zt(2) * zt(3)) &
+                                              + sinTheta * (yt(2) * zt(3) - yt(3) * zt(2))
+                rotMatrixSpectral(nn, 3, 3) = xt(3) * xt(3) &
+                                              + cosTheta * (yt(3) * yt(3) + zt(3) * zt(3))
+
+            end if testUnity
+
+        end do sectionLoop
+
+    end subroutine timeRotMatricesSpectral
+
+    subroutine fineGridSpectralCoor
+        !
+        !       fineGridSpectralCoor computes the coordinates of all but
+        !       the first spectral solution from the known coordinates of the
+        !       first time instance.
+        !
+        use constants
+        use block, only: flowDoms, nDom
+        use inputPhysics, only: equationMode
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use IOModule, only: IOVar
+        use iteration, only: currentLevel
+        use monitor, only: timeUnsteady
+        use section, only: nSections, sections
+        use partitionMod, only: interpolSpectral, nGridsRead
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: nn, ll, i, j, k
+        integer(kind=intType) :: il, jl, kl
+
+        real(kind=realType), dimension(nSections) :: dt, t
+
+        ! This routine is only used for the spectral solutions. Return
+        ! immediately if a different mode is solved.
+
+        if (equationMode /= timeSpectral) return
+
+        ! Also return immediately if all coordinates were already read
+        ! from the grid file.
+
+        if (.not. interpolSpectral) return
+        !
+        !       Step 1. Perform a rigid body motion of the coordinates of the
+        !               1st time instance to the other instances.
+        !
+        ! Set currentLevel to 1, such that updateCoorFineMesh
+        ! updates the coordinates of the correct level.
+
+        currentLevel = 1
+
+        ! Determine the delta t for every section. Remember it is possible
+        ! that every section has a different periodic time.
+
+        do nn = 1, nSections
+            dt(nn) = sections(nn)%timePeriod &
+                     / real(nTimeIntervalsSpectral, realType)
+        end do
+
+        ! Loop over the number of spectral solutions, starting at 2,
+        ! because the first is already known.
+
+        timeUnsteady = zero
+        spectralLoop: do ll = 2, nTimeIntervalsSpectral
+
+            ! Set the owned coordinates to the coordinates of the first
+            ! solution such that updateCoorFineMesh can modify them.
+
+            do nn = 1, nDom
+
+                do k = 1, flowDoms(nn, 1, 1)%kl
+                    do j = 1, flowDoms(nn, 1, 1)%jl
+                        do i = 1, flowDoms(nn, 1, 1)%il
+                            flowDoms(nn, 1, ll)%x(i, j, k, 1) = flowDoms(nn, 1, 1)%x(i, j, k, 1)
+                            flowDoms(nn, 1, ll)%x(i, j, k, 2) = flowDoms(nn, 1, 1)%x(i, j, k, 2)
+                            flowDoms(nn, 1, ll)%x(i, j, k, 3) = flowDoms(nn, 1, 1)%x(i, j, k, 3)
+                        end do
+                    end do
+                end do
+
+            end do
+
+            ! Compute the corresponding times for this spectral solution
+            ! and call updateCoorFineMesh to determine the coordinates.
+
+            do nn = 1, nSections
+                t(nn) = (ll - 1) * dt(nn)
+            end do
+
+            call updateCoorFineMesh(t, ll)
+
+        end do spectralLoop
+
+        ! Return if only one grid has been read.
+
+        if (nGridsRead == 1) return
+        !
+        !       Step 2. Multiple grids have been read, but the number is not
+        !               equal to the number of time instances used in the
+        !               computation. As multiple grids have been read this
+        !               means that a time spectral computation on a deforming
+        !               mesh is performed. Therefore the deformations,
+        !               relative to the first grid, must be interpolated.
+        !
+        ! First allocate the memory of IOVar(..,1)%w.
+        ! This will serve as temporary storage for the coordinates of
+        ! the 1st spectral instance, because those will be used in the
+        ! call to updateCoorFineMesh. In this way this routine can be
+        ! used without modification.
+
+        do nn = 1, nDom
+
+            kl = flowDoms(nn, 1, 1)%kl
+            jl = flowDoms(nn, 1, 1)%jl
+            il = flowDoms(nn, 1, 1)%il
+
+            allocate (IOVar(nn, 1)%w(il, jl, kl, 3), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("fineGridSpectralCoor", &
+                 "Memory allocation failure for &
+                 &IOVar(nn,1)%w")
+
+            do k = 1, kl
+                do j = 1, jl
+                    do i = 1, il
+                        IOVar(nn, 1)%w(i, j, k, 1) = flowDoms(nn, 1, 1)%x(i, j, k, 1)
+                        IOVar(nn, 1)%w(i, j, k, 2) = flowDoms(nn, 1, 1)%x(i, j, k, 2)
+                        IOVar(nn, 1)%w(i, j, k, 3) = flowDoms(nn, 1, 1)%x(i, j, k, 3)
+                    end do
+                end do
+            end do
+
+        end do
+
+        ! Determine the delta t for every section. Remember it is possible
+        ! that every section has a different periodic time.
+
+        do nn = 1, nSections
+            dt(nn) = sections(nn)%timePeriod &
+                     / real(nGridsRead, realType)
+        end do
+
+        ! Loop over the number of spectral solutions read, starting at
+        ! 2, and determine the displacements relative to the rigid body
+        ! motion of the grid of the 1st time instance.
+
+        timeUnsteady = zero
+        spectralLoopRead: do ll = 2, nGridsRead
+
+            ! Compute the corresponding times for this spectral solution
+            ! and call updateCoorFineMesh to determine the coordinates.
+
+            do nn = 1, nSections
+                t(nn) = (ll - 1) * dt(nn)
+            end do
+
+            call updateCoorFineMesh(t, 1_intType)
+
+            ! Determine the relative displacements for this time instance
+            ! and initialize flowDoms(nn,1,1) for the next round.
+
+            do nn = 1, nDom
+
+                do k = 1, flowDoms(nn, 1, 1)%kl
+                    do j = 1, flowDoms(nn, 1, 1)%jl
+                        do i = 1, flowDoms(nn, 1, 1)%il
+                            IOVar(nn, ll)%w(i, j, k, 1) = IOVar(nn, ll)%w(i, j, k, 1) &
+                                                          - flowDoms(nn, 1, 1)%x(i, j, k, 1)
+                            IOVar(nn, ll)%w(i, j, k, 2) = IOVar(nn, ll)%w(i, j, k, 2) &
+                                                          - flowDoms(nn, 1, 1)%x(i, j, k, 2)
+                            IOVar(nn, ll)%w(i, j, k, 3) = IOVar(nn, ll)%w(i, j, k, 3) &
+                                                          - flowDoms(nn, 1, 1)%x(i, j, k, 3)
+
+                            flowDoms(nn, 1, 1)%x(i, j, k, 1) = IOVar(nn, 1)%w(i, j, k, 1)
+                            flowDoms(nn, 1, 1)%x(i, j, k, 2) = IOVar(nn, 1)%w(i, j, k, 2)
+                            flowDoms(nn, 1, 1)%x(i, j, k, 3) = IOVar(nn, 1)%w(i, j, k, 3)
+                        end do
+                    end do
+                end do
+
+            end do
+
+        end do spectralLoopRead
+
+        ! The coordinates of IOVar now contain the relative
+        ! displacements compared to the rigid body motion of the first
+        ! time instance, except for the first time instance.
+        ! Set these to zero.
+
+        do nn = 1, nDom
+            IOVar(nn, 1)%w = zero
+        end do
+
+        ! Interpolate the displacements and add them to the currently
+        ! stored coordinates.
+
+        call terminate("fineGridSpectralCoor", &
+                       "Arti should do this interpolation stuff")
+
+        ! Release the memory of the variable w in IOVar.
+
+        do nn = 1, nDom
+            do ll = 1, nGridsRead
+                deallocate (IOVar(nn, ll)%w, stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("fineGridSpectralCoor", &
+                                   "Deallocation failure for IOVar%w")
+            end do
+        end do
+
+    end subroutine fineGridSpectralCoor
+    subroutine updateCoorFineMesh(dtAdvance, sps)
+        !
+        !       updateCoorFineMesh updates the coordinates of the
+        !       moving parts of the current finest mesh by the given amount of
+        !       time, possibly different per section. In unsteady mode all the
+        !       times will be equal, but in time spectral mode they can be
+        !       different.
+        !       This routine is called in the full mg cycle to put the fine
+        !       mesh to the position previously calculated on the coarser
+        !       grid levels, in the unsteady time loop to advance the
+        !       coordinates only one time step and in the partitioning part
+        !       of the spectral mode to compute the coordinates of the given
+        !       spectral solution sps. As it is used in the full MG cycle,
+        !       currentLevel points to the correct grid level and not
+        !       ground level.
+        !
+        use constants
+        use block
+        use blockPointers
+        use flowVarRefState
+        use cgnsGrid
+        use inputMotion
+        use iteration
+        use monitor
+        use utils, only: setPointers, rotMatrixRigidBody
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+
+        real(kind=realType), dimension(*), intent(in) :: dtAdvance
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn
+
+        real(kind=realType) :: displX, displY, displZ
+        real(kind=realType) :: tNew, tOld
+        real(kind=realType) :: angleX, angleY, angleZ, dx, dy, dz
+        real(kind=realType) :: xiX, xiY, xiZ, etaX, etaY, etaZ
+        real(kind=realType) :: zetaX, zetaY, zetaZ, xp, yp, zp, t
+        real(kind=realType) :: phi, cosPhi, sinPhi, eta, zeta
+
+        real(kind=realType), dimension(3) :: rotationPoint
+        real(kind=realType), dimension(3, 3) :: rotationMatrix
+
+        ! Compute the displacements due to the rigid motion of the mesh.
+
+        displX = zero
+        displY = zero
+        displZ = zero
+
+        ! Determine the time values of the old and new time level.
+        ! It is assumed that the rigid body rotation of the mesh is only
+        ! used when only 1 section is present.
+
+        tNew = timeUnsteady + timeUnsteadyRestart
+        tOld = tNew - dtAdvance(1)
+
+        ! Compute the rotation matrix of the rigid body rotation as
+        ! well as the rotation point; the latter may vary in time due
+        ! to rigid body translation.
+
+        call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
+
+        ! Loop over the number of local blocks.
+
+        blockLoop: do nn = 1, nDom
+
+            ! Set the pointers for this block on the current level.
+            ! Note that currentLevel must be used and not groundLevel,
+            ! because groundLevel is 1 level too coarse when this routine
+            ! is called in the full mg cycle.
+
+            call setPointers(nn, currentLevel, sps)
+            !
+            !         The rigid body motion of the entire mesh.
+            !
+            ! First the rotation.
+
+            do k = 1, kl
+                do j = 1, jl
+                    do i = 1, il
+
+                        ! Determine the vector relative to the rotation point.
+
+                        xp = x(i, j, k, 1) - rotationPoint(1)
+                        yp = x(i, j, k, 2) - rotationPoint(2)
+                        zp = x(i, j, k, 3) - rotationPoint(3)
+
+                        ! Apply the transformation matrix to the vector (xp,yp,zp)
+                        ! and set the new coordinates.
+
+                        x(i, j, k, 1) = rotationMatrix(1, 1) * xp &
+                                        + rotationMatrix(1, 2) * yp &
+                                        + rotationMatrix(1, 3) * zp + rotationPoint(1)
+                        x(i, j, k, 2) = rotationMatrix(2, 1) * xp &
+                                        + rotationMatrix(2, 2) * yp &
+                                        + rotationMatrix(2, 3) * zp + rotationPoint(2)
+                        x(i, j, k, 3) = rotationMatrix(3, 1) * xp &
+                                        + rotationMatrix(3, 2) * yp &
+                                        + rotationMatrix(3, 3) * zp + rotationPoint(3)
+                    end do
+                end do
+            end do
+
+            ! Add the translation.
+
+            do k = 1, kl
+                do j = 1, jl
+                    do i = 1, il
+                        x(i, j, k, 1) = x(i, j, k, 1) + displX
+                        x(i, j, k, 2) = x(i, j, k, 2) + displY
+                        x(i, j, k, 3) = x(i, j, k, 3) + displZ
+                    end do
+                end do
+            end do
+
+            !
+            !         Determine whether the corresponding cgns block is a rotating
+            !         block. If it is, apply the rotation.
+            !         Note that now the section ID of the block is taken into
+            !         account to allow for different periodic times per section.
+            !
+            if (cgnsDoms(nbkGlobal)%rotatingFrameSpecified) then
+
+                ! Compute the rotation angles.
+
+                angleX = dtAdvance(sectionID) * cgnsDoms(nbkGlobal)%rotRate(1)
+                angleY = dtAdvance(sectionID) * cgnsDoms(nbkGlobal)%rotRate(2)
+                angleZ = dtAdvance(sectionID) * cgnsDoms(nbkGlobal)%rotRate(3)
+
+                ! Compute the unit vector in the direction of the rotation
+                ! axis, which will be called the xi-direction.
+
+                t = one / max(eps, sqrt(angleX**2 + angleY**2 + angleZ**2))
+                xiX = t * angleX
+                xiY = t * angleY
+                xiZ = t * angleZ
+
+                ! Determine the rotation angle in xi-direction and its sine
+                ! and cosine. Due to the definition of the xi-direction this
+                ! angle will always be positive.
+
+                phi = xiX * angleX + xiY * angleY + xiZ * angleZ
+                cosPhi = cos(phi)
+                sinPhi = sin(phi)
+
+                ! Loop over the owned coordinates of this block.
+
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+
+                            ! Compute the vector relative to center of rotation.
+
+                            dx = x(i, j, k, 1) - cgnsDoms(nbkGlobal)%rotCenter(1)
+                            dy = x(i, j, k, 2) - cgnsDoms(nbkGlobal)%rotCenter(2)
+                            dz = x(i, j, k, 3) - cgnsDoms(nbkGlobal)%rotCenter(3)
+
+                            ! Compute the coordinates of the point p, which is the
+                            ! closest point on the rotation axis.
+
+                            t = dx * xiX + dy * xiY + dz * xiZ
+                            xp = cgnsDoms(nbkGlobal)%rotCenter(1) + t * xiX
+                            yp = cgnsDoms(nbkGlobal)%rotCenter(2) + t * xiY
+                            zp = cgnsDoms(nbkGlobal)%rotCenter(3) + t * xiZ
+
+                            ! Determine the unit vector in eta direction, which
+                            ! is defined from point p to the current point.
+
+                            etaX = x(i, j, k, 1) - xp
+                            etaY = x(i, j, k, 2) - yp
+                            etaZ = x(i, j, k, 3) - zp
+
+                            eta = sqrt(etaX**2 + etaY**2 + etaZ**2)
+                            t = one / max(eps, eta)
+
+                            etaX = t * etaX
+                            etaY = t * etaY
+                            etaZ = t * etaZ
+
+                            ! Determine the unit vector in zeta-direction. This is
+                            ! the cross product of the unit vectors in xi and in
+                            ! eta-direction.
+
+                            zetaX = xiY * etaZ - xiZ * etaY
+                            zetaY = xiZ * etaX - xiX * etaZ
+                            zetaZ = xiX * etaY - xiY * etaX
+
+                            ! Compute the new eta and zeta coordinates.
+
+                            zeta = eta * sinPhi
+                            eta = eta * cosPhi
+
+                            ! Compute the new cartesian coordinates.
+
+                            x(i, j, k, 1) = xp + eta * etaX + zeta * zetaX
+                            x(i, j, k, 2) = yp + eta * etaY + zeta * zetaY
+                            x(i, j, k, 3) = zp + eta * etaZ + zeta * zetaZ
+
+                        end do
+                    end do
+                end do
+
+            end if
+
+        end do blockLoop
+
+    end subroutine updateCoorFineMesh
+    subroutine allocCoorFineGrid
+        !
+        !       allocCoorFineGrid allocates the memory for all the coordinates
+        !       of all local blocks. Also the memory for the derived data type
+        !       used for the reading is allocated. If an interpolation must be
+        !       performed for the time spectral method the variables of this
+        !       IO type are allocated as well. For all other cases the pointer
+        !       of the variables are set to the appropriate entry in flowDoms.
+        !
+        use constants
+        use block, only: nDom, flowDoms
+        use inputPhysics, only: equationMode
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use IOModule, only: IOVar
+        use iteration, only: nOldLevels, deforming_grid
+        use partitionMod, only: interpolSpectral, nGridsRead
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-          ! Determine the magnitude of the rotation rate and the
-          ! corresponding periodic time period.
+        integer(kind=intType) :: nn, mm
+        integer(kind=intType) :: il, jl, kl, ie, je, ke
 
-          omega = sqrt(sections(nn)%rotRate(1)**2 &
-               +      sections(nn)%rotRate(2)**2 &
-               +      sections(nn)%rotRate(3)**2)
+        ! Loop over the local blocks and allocate the memory for the
+        ! coordinates.
 
-          tt = two*pi/omega
+        blockLoop: do nn = 1, nDom
 
-          ! If a time period was already determined, check if this is
-          ! identical to tt. If not print an error message and exit.
+            ! Some abbreviations of the block dimensions.
 
-          if( timeDetermined ) then
+            il = flowDoms(nn, 1, 1)%il
+            jl = flowDoms(nn, 1, 1)%jl
+            kl = flowDoms(nn, 1, 1)%kl
 
-             tt = abs(tt-timePeriod)/timePeriod
-             if(tt > tol) then
-                if(myID == 0)                          &
-                     call terminate("timePeriodSpectral", &
-                     "Rotational frequencies of the rotating &
-                     &sections are not identical.")
-                call mpi_barrier(ADflow_comm_world, ierr)
-             endif
-
-          else
-
-             ! Just copy the data.
-
-             timePeriod     = tt
-             timeDetermined = .true.
-
-          endif
-
-       endif testRotating
-    enddo sectionLoop
-
-    ! Divide the periodic time by the number of slices to get the
-    ! characteristic time for every section.
-
-    do nn=1,nSections
-       sections(nn)%timePeriod = timePeriod/sections(nn)%nSlices
-    enddo
-
-    ! Return if it was possible to determine the time.
-
-    if( timeDetermined ) return
-
-    ! Periodic time could not be determined. Print an error
-    ! message and exit.
-
-    if(myID == 0)                          &
-         call terminate("timePeriodSpectral", &
-         "Not possible to determine the periodic time &
-         &for the time spectral method")
-    call mpi_barrier(ADflow_comm_world, ierr)
-
-  end subroutine timePeriodSpectral
-
-  function commonTimeSpectral(t1, t2)
-    !
-    !       The function commonTimeSpectral determines the smallest
-    !       possible common time between t1 and t2, such that
-    !       tcommon = n1*t1 = n2*t2 and n1, n2 integers.
-    !
-    use communication
-    use utils, only : terminate
-    implicit none
-    !
-    !      Function definition
-    !
-    real(kind=realType) :: commonTimeSpectral
-    !
-    !      Function arguments.
-    !
-    real(kind=realType), intent(in) :: t1, t2
-    !
-    !      Local parameters.
-    !
-    integer(kind=intType), parameter :: nMax = 100
-    real(kind=realType),   parameter :: tol  = 1.e-5_realType
-    !
-    !      Local variables.
-    !
-    integer                :: ierr
-    integer(kind=intType) :: n1, n2
-    real(kind=realType)   :: tt1, tt2, tt, ratio
-
-    ! Store the largest time in tt1 and the smallest in tt2 and
-    ! compute the ratio tt1/tt2, which is >= 1
-
-    tt1   = max(t1, t2)
-    tt2   = min(t1, t2)
-    ratio = tt1/tt2
-
-    ! Loop to find the smallest integer values of n1 and n2, such
-    ! that n1*tt1 = n2*tt2. Note that due to the previous definition
-    ! n2 >= n1.
-
-    do n1=1,nMax
-       tt = n1*ratio
-       n2 = nint(tt)
-       tt = abs(tt-n2)
-       if(tt <= tol) exit
-    enddo
-
-    ! Check if a common time was found
-
-    if(n1 > nMax) then
-       if(myID == 0)                          &
-            call terminate("commonTimeSpectral", &
-            "No common time periodic time found. &
-            &Problem may not be periodic")
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-
-    ! Set the common time.
-
-    commonTimeSpectral = n1*tt1
-
-  end function commonTimeSpectral
-  subroutine timeRotMatricesSpectral
-    !
-    !       timeRotMatricesSpectral determines the rotation matrices
-    !       used in the time derivatives for the velocity components in
-    !       the time spectral method. These matrices are the identity
-    !       matrices for non-rotating sections and something different for
-    !       rotating sections. Therefore the rotation matrices are stored
-    !       for every section.
-    !
-    use constants
-    use inputPhysics, only : equationMode
-    use inputTimeSpectral, only : rotMatrixSpectral
-    use section, only : sections, nSections
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn
-    real(kind=realType)   :: tmp, theta, cosTheta, sinTheta
-
-    real(kind=realType), dimension(3) :: xt, yt, zt
-
-    ! This routine is only used for the spectral solutions. Return
-    ! immediately if a different mode is solved.
-
-    if(equationMode /= timeSpectral) return
-
-    ! Allocate the memory for rotMatrixSpectral, which will store
-    ! the rotation matrices for all the sections.
-
-    if( allocated(rotMatrixSpectral)) deallocate(rotMatrixSpectral)
-    allocate(rotMatrixSpectral(nSections,3,3), stat=ierr)
-
-    if(ierr /= 0)                               &
-         call terminate("timeRotMatricesSpectral", &
-         "Memory allocation failure for &
-         &rotMatrixSpectral")
-
-    ! Loop over the number of sections.
-
-    sectionLoop: do nn=1,nSections
-
-       ! Test if the rotation matrix is the unity matrix. This is the
-       ! case if this section is not rotating or if the number of
-       ! slices is only 1, i.e. the true physical model is computed.
-
-       testUnity: if(.not. sections(nn)%rotating .or. &
-            sections(nn)%nSlices == 1) then
-
-          ! Set the rotation matrix to the unity matrix.
-
-          rotMatrixSpectral(nn,1,1) = one
-          rotMatrixSpectral(nn,1,2) = zero
-          rotMatrixSpectral(nn,1,3) = zero
-
-          rotMatrixSpectral(nn,2,1) = zero
-          rotMatrixSpectral(nn,2,2) = one
-          rotMatrixSpectral(nn,2,3) = zero
-
-          rotMatrixSpectral(nn,3,1) = zero
-          rotMatrixSpectral(nn,3,2) = zero
-          rotMatrixSpectral(nn,3,3) = one
-
-       else testUnity
-
-          ! Section is rotating and only a part of the physical problem
-          ! is modelled. Consequently a rotation matrix is present for
-          ! the velocity components.
-
-          ! First transform to a frame where the xt-axis points in the
-          ! direction of the rotation vector.
-
-          xt(1) = sections(nn)%rotAxis(1)
-          xt(2) = sections(nn)%rotAxis(2)
-          xt(3) = sections(nn)%rotAxis(3)
-
-          ! Construct the yt axis. It does not matter exactly as long
-          ! as it is normal to xt.
-
-          if(abs(xt(2)) < 0.707107_realType) then
-             yt(1) = zero
-             yt(2) = one
-             yt(3) = zero
-          else
-             yt(1) = zero
-             yt(2) = zero
-             yt(3) = one
-          endif
-
-          ! Make sure that yt is normal to xt.
-
-          tmp   = xt(1)*yt(1) + xt(2)*yt(2) + xt(3)*yt(3)
-          yt(1) = yt(1) - tmp*xt(1)
-          yt(2) = yt(2) - tmp*xt(2)
-          yt(3) = yt(3) - tmp*xt(3)
-
-          ! And create a unit vector.
-
-          tmp   = one/sqrt(yt(1)**2 + yt(2)**2 + yt(3)**2)
-          yt(1) = tmp*yt(1)
-          yt(2) = tmp*yt(2)
-          yt(3) = tmp*yt(3)
-
-          ! Create the vector zt by taking the cross product xt*yt.
-
-          zt(1) = xt(2)*yt(3) - xt(3)*yt(2)
-          zt(2) = xt(3)*yt(1) - xt(1)*yt(3)
-          zt(3) = xt(1)*yt(2) - xt(2)*yt(1)
-
-          ! Compute the periodic angle theta and its sine and cosine.
-
-          theta     = two*pi/real(sections(nn)%nSlices,realType)
-          cosTheta = cos(theta)
-          sinTheta = sin(theta)
-
-          ! The rotation matrix in the xt,yt,zt frame is given by
-          !
-          ! R = | 1      0           0      |
-          !     | 0  cos(theta) -sin(theta) |
-          !     | 0  sin(theta)  cos(theta) |
-          !
-          ! The rotation matrix in the standard cartesian frame is then
-          ! given by t * r * t^t, where the colums of the transformation
-          ! matrix t are the unit vectors xt,yt,zt. One can easily check
-          ! this by checking rotation around the y- and z-axis. The
-          ! result of this is the expression below.
-
-          rotMatrixSpectral(nn,1,1) = xt(1)*xt(1)                    &
-               + cosTheta*(yt(1)*yt(1) + zt(1)*zt(1))
-          rotMatrixSpectral(nn,1,2) = xt(1)*xt(2)                    &
-               + cosTheta*(yt(1)*yt(2) + zt(1)*zt(2)) &
-               - sinTheta*(yt(1)*zt(2) - yt(2)*zt(1))
-          rotMatrixSpectral(nn,1,3) = xt(1)*xt(3)                    &
-               + cosTheta*(yt(1)*yt(3) + zt(1)*zt(3)) &
-               - sinTheta*(yt(1)*zt(3) - yt(3)*zt(1))
-
-          rotMatrixSpectral(nn,2,1) = xt(1)*xt(2)                    &
-               + cosTheta*(yt(1)*yt(2) + zt(1)*zt(2)) &
-               + sinTheta*(yt(1)*zt(2) - yt(2)*zt(1))
-          rotMatrixSpectral(nn,2,2) = xt(2)*xt(2)                    &
-               + cosTheta*(yt(2)*yt(2) + zt(2)*zt(2))
-          rotMatrixSpectral(nn,2,3) = xt(2)*xt(3)                    &
-               + cosTheta*(yt(2)*yt(3) + zt(2)*zt(3)) &
-               - sinTheta*(yt(2)*zt(3) - yt(3)*zt(2))
-
-          rotMatrixSpectral(nn,3,1) = xt(1)*xt(3)                    &
-               + cosTheta*(yt(1)*yt(3) + zt(1)*zt(3)) &
-               + sinTheta*(yt(1)*zt(3) - yt(3)*zt(1))
-          rotMatrixSpectral(nn,3,2) = xt(2)*xt(3)                    &
-               + cosTheta*(yt(2)*yt(3) + zt(2)*zt(3)) &
-               + sinTheta*(yt(2)*zt(3) - yt(3)*zt(2))
-          rotMatrixSpectral(nn,3,3) = xt(3)*xt(3)                    &
-               + cosTheta*(yt(3)*yt(3) + zt(3)*zt(3))
-
-       endif testUnity
-
-    enddo sectionLoop
-
-  end subroutine timeRotMatricesSpectral
-
-
-  subroutine fineGridSpectralCoor
-    !
-    !       fineGridSpectralCoor computes the coordinates of all but
-    !       the first spectral solution from the known coordinates of the
-    !       first time instance.
-    !
-    use constants
-    use block, only: flowDoms, nDom
-    use inputPhysics, only : equationMode
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use IOModule, only : IOVar
-    use iteration, only : currentLevel
-    use monitor, only : timeUnsteady
-    use section, only : nSections, sections
-    use partitionMod, only : interpolSpectral, nGridsRead
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, ll, i, j, k
-    integer(kind=intType) :: il, jl, kl
-
-    real(kind=realType), dimension(nSections) :: dt, t
-
-    ! This routine is only used for the spectral solutions. Return
-    ! immediately if a different mode is solved.
-
-    if(equationMode /= timeSpectral) return
-
-    ! Also return immediately if all coordinates were already read
-    ! from the grid file.
-
-    if(.not. interpolSpectral) return
-    !
-    !       Step 1. Perform a rigid body motion of the coordinates of the
-    !               1st time instance to the other instances.
-    !
-    ! Set currentLevel to 1, such that updateCoorFineMesh
-    ! updates the coordinates of the correct level.
-
-    currentLevel = 1
-
-    ! Determine the delta t for every section. Remember it is possible
-    ! that every section has a different periodic time.
-
-    do nn=1,nSections
-       dt(nn) = sections(nn)%timePeriod &
-            / real(nTimeIntervalsSpectral,realType)
-    enddo
-
-    ! Loop over the number of spectral solutions, starting at 2,
-    ! because the first is already known.
-
-    timeUnsteady = zero
-    spectralLoop: do ll=2,nTimeIntervalsSpectral
-
-       ! Set the owned coordinates to the coordinates of the first
-       ! solution such that updateCoorFineMesh can modify them.
-
-       do nn=1,nDom
-
-          do k=1,flowDoms(nn,1,1)%kl
-             do j=1,flowDoms(nn,1,1)%jl
-                do i=1,flowDoms(nn,1,1)%il
-                   flowDoms(nn,1,ll)%x(i,j,k,1) = flowDoms(nn,1,1)%x(i,j,k,1)
-                   flowDoms(nn,1,ll)%x(i,j,k,2) = flowDoms(nn,1,1)%x(i,j,k,2)
-                   flowDoms(nn,1,ll)%x(i,j,k,3) = flowDoms(nn,1,1)%x(i,j,k,3)
-                enddo
-             enddo
-          enddo
-
-       enddo
-
-       ! Compute the corresponding times for this spectral solution
-       ! and call updateCoorFineMesh to determine the coordinates.
-
-       do nn=1,nSections
-          t(nn) = (ll-1)*dt(nn)
-       enddo
-
-       call updateCoorFineMesh(t, ll)
-
-    enddo spectralLoop
-
-    ! Return if only one grid has been read.
-
-    if(nGridsRead == 1) return
-    !
-    !       Step 2. Multiple grids have been read, but the number is not
-    !               equal to the number of time instances used in the
-    !               computation. As multiple grids have been read this
-    !               means that a time spectral computation on a deforming
-    !               mesh is performed. Therefore the deformations,
-    !               relative to the first grid, must be interpolated.
-    !
-    ! First allocate the memory of IOVar(..,1)%w.
-    ! This will serve as temporary storage for the coordinates of
-    ! the 1st spectral instance, because those will be used in the
-    ! call to updateCoorFineMesh. In this way this routine can be
-    ! used without modification.
-
-    do nn=1,nDom
-
-       kl = flowDoms(nn,1,1)%kl
-       jl = flowDoms(nn,1,1)%jl
-       il = flowDoms(nn,1,1)%il
-
-       allocate(IOVar(nn,1)%w(il,jl,kl,3), stat=ierr)
-       if(ierr /= 0)                            &
-            call terminate("fineGridSpectralCoor", &
-            "Memory allocation failure for &
-            &IOVar(nn,1)%w")
-
-       do k=1,kl
-          do j=1,jl
-             do i=1,il
-                IOVar(nn,1)%w(i,j,k,1) = flowDoms(nn,1,1)%x(i,j,k,1)
-                IOVar(nn,1)%w(i,j,k,2) = flowDoms(nn,1,1)%x(i,j,k,2)
-                IOVar(nn,1)%w(i,j,k,3) = flowDoms(nn,1,1)%x(i,j,k,3)
-             enddo
-          enddo
-       enddo
-
-    enddo
-
-    ! Determine the delta t for every section. Remember it is possible
-    ! that every section has a different periodic time.
-
-    do nn=1,nSections
-       dt(nn) = sections(nn)%timePeriod &
-            / real(nGridsRead,realType)
-    enddo
-
-    ! Loop over the number of spectral solutions read, starting at
-    ! 2, and determine the displacements relative to the rigid body
-    ! motion of the grid of the 1st time instance.
-
-    timeUnsteady = zero
-    spectralLoopRead: do ll=2,nGridsRead
-
-       ! Compute the corresponding times for this spectral solution
-       ! and call updateCoorFineMesh to determine the coordinates.
-
-       do nn=1,nSections
-          t(nn) = (ll-1)*dt(nn)
-       enddo
-
-       call updateCoorFineMesh(t, 1_intType)
-
-       ! Determine the relative displacements for this time instance
-       ! and initialize flowDoms(nn,1,1) for the next round.
-
-       do nn=1,nDom
-
-          do k=1,flowDoms(nn,1,1)%kl
-             do j=1,flowDoms(nn,1,1)%jl
-                do i=1,flowDoms(nn,1,1)%il
-                   IOVar(nn,ll)%w(i,j,k,1) = IOVar(nn,ll)%w(i,j,k,1)     &
-                        - flowDoms(nn,1,1)%x(i,j,k,1)
-                   IOVar(nn,ll)%w(i,j,k,2) = IOVar(nn,ll)%w(i,j,k,2)     &
-                        - flowDoms(nn,1,1)%x(i,j,k,2)
-                   IOVar(nn,ll)%w(i,j,k,3) = IOVar(nn,ll)%w(i,j,k,3)     &
-                        - flowDoms(nn,1,1)%x(i,j,k,3)
-
-                   flowDoms(nn,1,1)%x(i,j,k,1) = IOVar(nn,1)%w(i,j,k,1)
-                   flowDoms(nn,1,1)%x(i,j,k,2) = IOVar(nn,1)%w(i,j,k,2)
-                   flowDoms(nn,1,1)%x(i,j,k,3) = IOVar(nn,1)%w(i,j,k,3)
-                enddo
-             enddo
-          enddo
-
-       enddo
-
-    enddo spectralLoopRead
-
-    ! The coordinates of IOVar now contain the relative
-    ! displacements compared to the rigid body motion of the first
-    ! time instance, except for the first time instance.
-    ! Set these to zero.
-
-    do nn=1,nDom
-       IOVar(nn,1)%w = zero
-    enddo
-
-    ! Interpolate the displacements and add them to the currently
-    ! stored coordinates.
-
-    call terminate("fineGridSpectralCoor", &
-         "Arti should do this interpolation stuff")
-
-    ! Release the memory of the variable w in IOVar.
-
-    do nn=1,nDom
-       do ll=1,nGridsRead
-          deallocate(IOVar(nn,ll)%w, stat=ierr)
-          if(ierr /= 0)                            &
-               call terminate("fineGridSpectralCoor", &
-               "Deallocation failure for IOVar%w")
-       enddo
-    enddo
-
-  end subroutine fineGridSpectralCoor
- subroutine updateCoorFineMesh(dtAdvance, sps)
-    !
-    !       updateCoorFineMesh updates the coordinates of the
-    !       moving parts of the current finest mesh by the given amount of
-    !       time, possibly different per section. In unsteady mode all the
-    !       times will be equal, but in time spectral mode they can be
-    !       different.
-    !       This routine is called in the full mg cycle to put the fine
-    !       mesh to the position previously calculated on the coarser
-    !       grid levels, in the unsteady time loop to advance the
-    !       coordinates only one time step and in the partitioning part
-    !       of the spectral mode to compute the coordinates of the given
-    !       spectral solution sps. As it is used in the full MG cycle,
-    !       currentLevel points to the correct grid level and not
-    !       ground level.
-    !
-    use constants
-    use block
-    use blockPointers
-    use flowVarRefState
-    use cgnsGrid
-    use inputMotion
-    use iteration
-    use monitor
-    use utils, only : setPointers, rotMatrixRigidBody
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps
-
-    real(kind=realType), dimension(*), intent(in) :: dtAdvance
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, nn
-
-    real(kind=realType) :: displX, displY, displZ
-    real(kind=realType) :: tNew, tOld
-    real(kind=realType) :: angleX, angleY, angleZ, dx, dy, dz
-    real(kind=realType) :: xiX, xiY, xiZ, etaX, etaY, etaZ
-    real(kind=realType) :: zetaX, zetaY, zetaZ, xp, yp, zp, t
-    real(kind=realType) :: phi, cosPhi, sinPhi, eta, zeta
-
-    real(kind=realType), dimension(3)   :: rotationPoint
-    real(kind=realType), dimension(3,3) :: rotationMatrix
-
-    ! Compute the displacements due to the rigid motion of the mesh.
-
-    displX = zero
-    displY = zero
-    displZ = zero
-
-    ! Determine the time values of the old and new time level.
-    ! It is assumed that the rigid body rotation of the mesh is only
-    ! used when only 1 section is present.
-
-    tNew = timeUnsteady + timeUnsteadyRestart
-    tOld = tNew - dtAdvance(1)
-
-    ! Compute the rotation matrix of the rigid body rotation as
-    ! well as the rotation point; the latter may vary in time due
-    ! to rigid body translation.
-
-    call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
-
-    ! Loop over the number of local blocks.
-
-    blockLoop: do nn=1,nDom
-
-       ! Set the pointers for this block on the current level.
-       ! Note that currentLevel must be used and not groundLevel,
-       ! because groundLevel is 1 level too coarse when this routine
-       ! is called in the full mg cycle.
-
-       call setPointers(nn, currentLevel, sps)
-       !
-       !         The rigid body motion of the entire mesh.
-       !
-       ! First the rotation.
-
-       do k=1,kl
-          do j=1,jl
-             do i=1,il
-
-                ! Determine the vector relative to the rotation point.
-
-                xp = x(i,j,k,1) - rotationPoint(1)
-                yp = x(i,j,k,2) - rotationPoint(2)
-                zp = x(i,j,k,3) - rotationPoint(3)
-
-                ! Apply the transformation matrix to the vector (xp,yp,zp)
-                ! and set the new coordinates.
-
-                x(i,j,k,1) = rotationMatrix(1,1)*xp &
-                     + rotationMatrix(1,2)*yp &
-                     + rotationMatrix(1,3)*zp + rotationPoint(1)
-                x(i,j,k,2) = rotationMatrix(2,1)*xp &
-                     + rotationMatrix(2,2)*yp &
-                     + rotationMatrix(2,3)*zp + rotationPoint(2)
-                x(i,j,k,3) = rotationMatrix(3,1)*xp &
-                     + rotationMatrix(3,2)*yp &
-                     + rotationMatrix(3,3)*zp + rotationPoint(3)
-             enddo
-          enddo
-       enddo
-
-       ! Add the translation.
-
-       do k=1,kl
-          do j=1,jl
-             do i=1,il
-                x(i,j,k,1) = x(i,j,k,1) + displX
-                x(i,j,k,2) = x(i,j,k,2) + displY
-                x(i,j,k,3) = x(i,j,k,3) + displZ
-             enddo
-          enddo
-       enddo
-
-       !
-       !         Determine whether the corresponding cgns block is a rotating
-       !         block. If it is, apply the rotation.
-       !         Note that now the section ID of the block is taken into
-       !         account to allow for different periodic times per section.
-       !
-       if( cgnsDoms(nbkGlobal)%rotatingFrameSpecified ) then
-
-          ! Compute the rotation angles.
-
-          angleX = dtAdvance(sectionID)*cgnsDoms(nbkGlobal)%rotRate(1)
-          angleY = dtAdvance(sectionID)*cgnsDoms(nbkGlobal)%rotRate(2)
-          angleZ = dtAdvance(sectionID)*cgnsDoms(nbkGlobal)%rotRate(3)
-
-          ! Compute the unit vector in the direction of the rotation
-          ! axis, which will be called the xi-direction.
-
-          t    = one/max(eps,sqrt(angleX**2 + angleY**2 + angleZ**2))
-          xiX = t*angleX
-          xiY = t*angleY
-          xiZ = t*angleZ
-
-          ! Determine the rotation angle in xi-direction and its sine
-          ! and cosine. Due to the definition of the xi-direction this
-          ! angle will always be positive.
-
-          phi    = xiX*angleX + xiY*angleY + xiZ*angleZ
-          cosPhi = cos(phi)
-          sinPhi = sin(phi)
-
-          ! Loop over the owned coordinates of this block.
-
-          do k=1,kl
-             do j=1,jl
-                do i=1,il
-
-                   ! Compute the vector relative to center of rotation.
-
-                   dx = x(i,j,k,1) - cgnsDoms(nbkGlobal)%rotCenter(1)
-                   dy = x(i,j,k,2) - cgnsDoms(nbkGlobal)%rotCenter(2)
-                   dz = x(i,j,k,3) - cgnsDoms(nbkGlobal)%rotCenter(3)
-
-                   ! Compute the coordinates of the point p, which is the
-                   ! closest point on the rotation axis.
-
-                   t  = dx*xiX + dy*xiY + dz*xiZ
-                   xp = cgnsDoms(nbkGlobal)%rotCenter(1) + t*xiX
-                   yp = cgnsDoms(nbkGlobal)%rotCenter(2) + t*xiY
-                   zp = cgnsDoms(nbkGlobal)%rotCenter(3) + t*xiZ
-
-                   ! Determine the unit vector in eta direction, which
-                   ! is defined from point p to the current point.
-
-                   etaX = x(i,j,k,1) - xp
-                   etaY = x(i,j,k,2) - yp
-                   etaZ = x(i,j,k,3) - zp
-
-                   eta = sqrt(etaX**2 + etaY**2 + etaZ**2)
-                   t   = one/max(eps,eta)
-
-                   etaX = t*etaX
-                   etaY = t*etaY
-                   etaZ = t*etaZ
-
-                   ! Determine the unit vector in zeta-direction. This is
-                   ! the cross product of the unit vectors in xi and in
-                   ! eta-direction.
-
-                   zetaX = xiY*etaZ - xiZ*etaY
-                   zetaY = xiZ*etaX - xiX*etaZ
-                   zetaZ = xiX*etaY - xiY*etaX
-
-                   ! Compute the new eta and zeta coordinates.
-
-                   zeta = eta*sinPhi
-                   eta  = eta*cosPhi
-
-                   ! Compute the new cartesian coordinates.
-
-                   x(i,j,k,1) = xp + eta*etaX + zeta*zetaX
-                   x(i,j,k,2) = yp + eta*etaY + zeta*zetaY
-                   x(i,j,k,3) = zp + eta*etaZ + zeta*zetaZ
-
-                enddo
-             enddo
-          enddo
-
-       endif
-
-    enddo blockLoop
-
-  end subroutine updateCoorFineMesh
-  subroutine allocCoorFineGrid
-    !
-    !       allocCoorFineGrid allocates the memory for all the coordinates
-    !       of all local blocks. Also the memory for the derived data type
-    !       used for the reading is allocated. If an interpolation must be
-    !       performed for the time spectral method the variables of this
-    !       IO type are allocated as well. For all other cases the pointer
-    !       of the variables are set to the appropriate entry in flowDoms.
-    !
-    use constants
-    use block, only : nDom, flowDoms
-    use inputPhysics, only : equationMode
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use IOModule, only : IOVar
-    use iteration, only : nOldLevels, deforming_grid
-    use partitionMod, only : interpolSpectral, nGridsRead
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: nn, mm
-    integer(kind=intType) :: il, jl, kl, ie, je, ke
-
-    ! Loop over the local blocks and allocate the memory for the
-    ! coordinates.
-
-    blockLoop: do nn=1,nDom
-
-       ! Some abbreviations of the block dimensions.
-
-       il = flowDoms(nn,1,1)%il
-       jl = flowDoms(nn,1,1)%jl
-       kl = flowDoms(nn,1,1)%kl
+            ie = flowDoms(nn, 1, 1)%ie
+            je = flowDoms(nn, 1, 1)%je
+            ke = flowDoms(nn, 1, 1)%ke
 
-       ie = flowDoms(nn,1,1)%ie
-       je = flowDoms(nn,1,1)%je
-       ke = flowDoms(nn,1,1)%ke
+            ! Loop over the number of spectral modes and allocate the
+            ! memory of the coordinates, single halo's included. The halo
+            ! values will be initialized in the preprocessing part.
 
-       ! Loop over the number of spectral modes and allocate the
-       ! memory of the coordinates, single halo's included. The halo
-       ! values will be initialized in the preprocessing part.
+            do mm = 1, nTimeIntervalsSpectral
+                allocate (flowDoms(nn, 1, mm)%x(0:ie, 0:je, 0:ke, 3), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("allocCoorFineGrid", &
+                                   "Memory allocation failure for flowDoms%x")
 
-       do mm=1,nTimeIntervalsSpectral
-          allocate(flowDoms(nn,1,mm)%x(0:ie,0:je,0:ke,3), stat=ierr)
-          if(ierr /= 0)                         &
-               call terminate("allocCoorFineGrid", &
-               "Memory allocation failure for flowDoms%x")
+                flowDoms(nn, 1, mm)%x = 0.0
 
-          flowDoms(nn,1,mm)%x = 0.0
+                !allocate xInit for all time spectral intervals for meshwarping
+
+                if (ierr /= 0) &
+                    call terminate("allocCoorFineGrid", &
+                                   "Memory allocation failure for flowDoms%xInit")
+                !flowDoms(nn,1,mm)%xInit=0.0
+                !for the first grid also allocate xPlus and xMinus for the
+                !meshwarping verification...
+            end do
+
+            ! For a time accurate computation on deforming meshes, allocate
+            ! the memory for the old coordinates. As this is not the time
+            ! spectral mode, the third index of flowDoms is always 1.
+
+            if (deforming_Grid .and. equationMode == unsteady) then
+
+                allocate (flowDoms(nn, 1, 1)%xOld(nOldLevels, 0:ie, 0:je, 0:ke, 3), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("allocCoorFineGrid", &
+                                   "Memory allocation failure for xOld")
+            end if
+
+            ! Added by HDN
+            if (deforming_Grid .and. equationMode == unsteady) then
+
+                allocate (flowDoms(nn, 1, 1)%xALE(0:ie, 0:je, 0:ke, 3), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("allocCoorFineGrid", &
+                                   "Memory allocation failure for xALE")
+            end if
+
+        end do blockLoop
+
+        ! Allocate the memory for IOVar.
+
+        allocate (IOVar(nDom, nGridsRead), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("allocCoorFineGrid", &
+                           "Memory allocation failure for IOVar")
+
+        ! Determine the equation mode we are solving and set the pointers
+        ! of IOVar accordingly, or even allocate the memory, if needed.
+
+        select case (equationMode)
+
+        case (steady)
+
+            ! Steady computation. Only one grid needs to be read.
+            ! Loop over the number of blocks and set the pointer.
+            ! No pointer offsets are needed for the coordinates.
+
+            do nn = 1, nDom
+                IOVar(nn, 1)%pointerOffset = 0
+                IOVar(nn, 1)%w => flowDoms(nn, 1, 1)%x(1:, 1:, 1:, :)
+            end do
+
+            !===============================================================
+
+        case (unsteady)
+
+            ! Unsteady computation. The first set of coordinates should
+            ! be stored in x, other sets (if present) in xOld.
+            ! No pointer offsets are needed for the coordinates.
 
-          !allocate xInit for all time spectral intervals for meshwarping
+            do nn = 1, nDom
+                IOVar(nn, 1)%pointerOffset = 0
+                IOVar(nn, 1)%w => flowDoms(nn, 1, 1)%x(1:, 1:, 1:, :)
 
-          if(ierr /= 0)                         &
-               call terminate("allocCoorFineGrid", &
-               "Memory allocation failure for flowDoms%xInit")
-          !flowDoms(nn,1,mm)%xInit=0.0
-          !for the first grid also allocate xPlus and xMinus for the
-          !meshwarping verification...
-       enddo
+                do mm = 2, nGridsRead
+                    IOVar(nn, mm)%pointerOffset = 0
+                    IOVar(nn, mm)%w => flowDoms(nn, 1, 1)%xOld(mm - 1, 1:, 1:, 1:, :)
+                end do
+            end do
 
-       ! For a time accurate computation on deforming meshes, allocate
-       ! the memory for the old coordinates. As this is not the time
-       ! spectral mode, the third index of flowDoms is always 1.
-
-       if(deforming_Grid .and. equationMode == unsteady) then
-
-          allocate(flowDoms(nn,1,1)%xOld(nOldLevels,0:ie,0:je,0:ke,3), &
-               stat=ierr)
-          if(ierr /= 0)                         &
-               call terminate("allocCoorFineGrid", &
-               "Memory allocation failure for xOld")
-       endif
-
-
-       ! Added by HDN
-       if(deforming_Grid .and. equationMode == unsteady) then
-
-          allocate(flowDoms(nn,1,1)%xALE(0:ie,0:je,0:ke,3), &
-               stat=ierr)
-          if(ierr /= 0)                         &
-               call terminate("allocCoorFineGrid", &
-               "Memory allocation failure for xALE")
-       endif
-
-    enddo blockLoop
-
-    ! Allocate the memory for IOVar.
-
-    allocate(IOVar(nDom,nGridsRead), stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("allocCoorFineGrid", &
-         "Memory allocation failure for IOVar")
-
-    ! Determine the equation mode we are solving and set the pointers
-    ! of IOVar accordingly, or even allocate the memory, if needed.
-
-    select case(equationMode)
-
-    case (steady)
-
-       ! Steady computation. Only one grid needs to be read.
-       ! Loop over the number of blocks and set the pointer.
-       ! No pointer offsets are needed for the coordinates.
-
-       do nn=1,nDom
-          IOVar(nn,1)%pointerOffset = 0
-          IOVar(nn,1)%w => flowDoms(nn,1,1)%x(1:,1:,1:,:)
-       enddo
-
-       !===============================================================
-
-    case (unsteady)
-
-       ! Unsteady computation. The first set of coordinates should
-       ! be stored in x, other sets (if present) in xOld.
-       ! No pointer offsets are needed for the coordinates.
+            !===============================================================
 
-       do nn=1,nDom
-          IOVar(nn,1)%pointerOffset = 0
-          IOVar(nn,1)%w => flowDoms(nn,1,1)%x(1:,1:,1:,:)
+        case (timeSpectral)
+
+            ! Time spectral mode. A further check is required.
+
+            testAllocIOVar: if (interpolSpectral .and. &
+                                nGridsRead > 1) then
+
+                ! A restart is performed for a deforming mesh using a
+                ! different number of time instances than the previous
+                ! computation. Consequently the coordinates, or better
+                ! the deformations, will be interpolated later on. Hence
+                ! some additional storage is required for the coordinates
+                ! to be read and thus the memory for the variables w of
+                ! IOVar is allocated. No halo data is needed here.
+                ! Note that for the 1st time instance the pointer is set
+                ! to the coordinates of flowDoms, because these are not
+                ! interpolated. Only the higher time instances are
+                ! interpolated. No pointer offsets are needed for the
+                ! coordinates.
+
+                do nn = 1, nDom
+                    il = flowDoms(nn, 1, 1)%il
+                    jl = flowDoms(nn, 1, 1)%jl
+                    kl = flowDoms(nn, 1, 1)%kl
+
+                    IOVar(nn, 1)%pointerOffset = 0
+                    IOVar(nn, 1)%w => flowDoms(nn, 1, 1)%x(1:, 1:, 1:, :)
+
+                    do mm = 2, nGridsRead
+                        IOVar(nn, mm)%pointerOffset = 0
+                        allocate (IOVar(nn, mm)%w(il, jl, kl, 3), stat=ierr)
+                        if (ierr /= 0) &
+                             call terminate("allocCoorFineGrid", &
+                             "Memory allocation failure for &
+                             &IOVar%w")
+                    end do
+                end do
+
+                else testAllocIOVar
+
+                ! One of the following options is true.
+                ! - The computation starts from scratch.
+                ! - A restart is performed using a rigid grid, possibly
+                !   moving. The number of time instances does not have
+                !   to be the same.
+                ! - A restart with a deforming mesh is performed with the
+                !   same number of time instances.
+                !
+                ! In all these situations the pointers of IOVar are
+                ! simply set to the coordinates of flowDoms.
+
+                do nn = 1, nDom
+                    do mm = 1, nGridsRead
+                        IOVar(nn, mm)%pointerOffset = 0
+                        IOVar(nn, mm)%w => flowDoms(nn, 1, mm)%x(1:, 1:, 1:, :)
+                    end do
+                end do
+
+            end if testAllocIOVar
+
+        end select
+
+    end subroutine allocCoorFineGrid
+    subroutine checkPartitioning(np, load_inbalance, face_inbalance)
+
+        ! This subroutine runs the load balancing and partitioning algorithm
+        ! to determine what the load balancing will be for a given number of
+        ! procs np. The output is load_inbalance and face_inbalance.
+
+        use constants
+        use communication, only: nProc
+        use partitionMod, only: ubvec
+        use loadBalance, only: blockDistribution
+        implicit none
+
+        integer(kind=intType), intent(in) :: np
+        integer(kind=intType) :: nproc_save
+        real(kind=realType), intent(out) :: load_inbalance, face_inbalance
 
-          do mm=2,nGridsRead
-             IOVar(nn,mm)%pointerOffset = 0
-             IOVar(nn,mm)%w => flowDoms(nn,1,1)%xOld(mm-1,1:,1:,1:,:)
-          enddo
-       enddo
+        ! Note: This file follows mostly partitionAndReadGrid. See
+        ! partitionAndReadGrid.f90 for more infromation
 
-       !===============================================================
+        ! Trick it into thinking we have np processors:
+        nproc_save = nproc
+        nproc = np
 
-    case (timeSpectral)
-
-       ! Time spectral mode. A further check is required.
-
-       testAllocIOVar: if(interpolSpectral .and. &
-            nGridsRead > 1) then
-
-          ! A restart is performed for a deforming mesh using a
-          ! different number of time instances than the previous
-          ! computation. Consequently the coordinates, or better
-          ! the deformations, will be interpolated later on. Hence
-          ! some additional storage is required for the coordinates
-          ! to be read and thus the memory for the variables w of
-          ! IOVar is allocated. No halo data is needed here.
-          ! Note that for the 1st time instance the pointer is set
-          ! to the coordinates of flowDoms, because these are not
-          ! interpolated. Only the higher time instances are
-          ! interpolated. No pointer offsets are needed for the
-          ! coordinates.
-
-          do nn=1,nDom
-             il = flowDoms(nn,1,1)%il
-             jl = flowDoms(nn,1,1)%jl
-             kl = flowDoms(nn,1,1)%kl
-
-             IOVar(nn,1)%pointerOffset = 0
-             IOVar(nn,1)%w => flowDoms(nn,1,1)%x(1:,1:,1:,:)
-
-             do mm=2,nGridsRead
-                IOVar(nn,mm)%pointerOffset = 0
-                allocate(IOVar(nn,mm)%w(il,jl,kl,3), stat=ierr)
-                if(ierr /= 0)                         &
-                     call terminate("allocCoorFineGrid", &
-                     "Memory allocation failure for &
-                     &IOVar%w")
-             enddo
-          enddo
-
-       else testAllocIOVar
-
-          ! One of the following options is true.
-          ! - The computation starts from scratch.
-          ! - A restart is performed using a rigid grid, possibly
-          !   moving. The number of time instances does not have
-          !   to be the same.
-          ! - A restart with a deforming mesh is performed with the
-          !   same number of time instances.
-          !
-          ! In all these situations the pointers of IOVar are
-          ! simply set to the coordinates of flowDoms.
-
-          do nn=1,nDom
-             do mm=1,nGridsRead
-                IOVar(nn,mm)%pointerOffset = 0
-                IOVar(nn,mm)%w => flowDoms(nn,1,mm)%x(1:,1:,1:,:)
-             enddo
-          enddo
-
-       endif testAllocIOVar
-
-    end select
-
-  end subroutine allocCoorFineGrid
-  subroutine checkPartitioning(np,load_inbalance,face_inbalance)
-
-    ! This subroutine runs the load balancing and partitioning algorithm
-    ! to determine what the load balancing will be for a given number of
-    ! procs np. The output is load_inbalance and face_inbalance.
-
-    use constants
-    use communication, only : nProc
-    use partitionMod, only : ubvec
-    use loadBalance, only : blockDistribution
-    implicit none
-
-    integer(kind=intType),intent(in) ::np
-    integer(kind=intType) :: nproc_save
-    real(kind=realType),intent(out) :: load_inbalance,face_inbalance
+        call blockDistribution
 
-    ! Note: This file follows mostly partitionAndReadGrid. See
-    ! partitionAndReadGrid.f90 for more infromation
+        ! Restore the number of procs
+        nproc = nproc_save
 
-    ! Trick it into thinking we have np processors:
-    nproc_save = nproc
-    nproc = np
+        ! Extract the inbalance info:
+        load_inbalance = ubvec(1)
+        face_inbalance = ubvec(2)
 
-    call blockDistribution
+    end subroutine checkPartitioning
 
-    ! Restore the number of procs
-    nproc = nproc_save
+    subroutine determineSections
+        !
+        !       determineSections determines the number of sections, i.e.
+        !       grid parts between sliding mesh interfaces, present in the
+        !       entire grid.
+        !
+        use constants
+        use block
+        use cgnsGrid
+        use communication
+        use inputTimeSpectral
+        use section
+        use su_cgns
+        use sorting, only: qsortIntegers, bsearchIntegers
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local parameter, threshold for allowed angle difference between
+        !                       the theoretical and true value, 0.1 degrees.
+        !
+        real(kind=realType), parameter :: threshold = 0.1_realType
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-    ! Extract the inbalance info:
-    load_inbalance = ubvec(1)
-    face_inbalance = ubvec(2)
+        integer(kind=intType) :: nn, mm, ii, jj
+        integer(kind=intType) :: nLevel, nSlices, slideID
+        integer(kind=intType), dimension(cgnsNDom) :: sectionID, sorted
+        integer(kind=intType), dimension(cgnsNsliding, 2) :: secSliding
 
-  end subroutine checkPartitioning
+        real(kind=realType) :: cosTheta, cosPhi, cosPsi
+        real(kind=realType) :: sinTheta, sinPhi, sinPsi
+        real(kind=realType) :: r11, r12, r13, r21, r22, r23
+        real(kind=realType) :: r31, r32, r33
+        real(kind=realType) :: d1, d2, a1, a0, lamr, lami, angle, dAngle
 
-  subroutine determineSections
-    !
-    !       determineSections determines the number of sections, i.e.
-    !       grid parts between sliding mesh interfaces, present in the
-    !       entire grid.
-    !
-    use constants
-    use block
-    use cgnsGrid
-    use communication
-    use inputTimeSpectral
-    use section
-    use su_cgns
-    use sorting, only : qsortIntegers, bsearchIntegers
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local parameter, threshold for allowed angle difference between
-    !                       the theoretical and true value, 0.1 degrees.
-    !
-    real(kind=realType), parameter :: threshold = 0.1_realType
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        logical :: situationChanged
 
-    integer(kind=intType) :: nn, mm, ii, jj
-    integer(kind=intType) :: nLevel, nSlices, slideID
-    integer(kind=intType), dimension(cgnsNDom) :: sectionID, sorted
-    integer(kind=intType), dimension(cgnsNsliding,2) :: secSliding
+        ! Initialize sectionID to the cgns block number.
 
-    real(kind=realType) :: cosTheta, cosPhi, cosPsi
-    real(kind=realType) :: sinTheta, sinPhi, sinPsi
-    real(kind=realType) :: r11, r12, r13, r21, r22, r23
-    real(kind=realType) :: r31, r32, r33
-    real(kind=realType) :: d1, d2, a1, a0, lamr, lami, angle, dAngle
+        do nn = 1, cgnsNDom
+            sectionID(nn) = nn
+        end do
 
-    logical :: situationChanged
+        ! Loop to determine the highest cgns block id in every section.
 
-    ! Initialize sectionID to the cgns block number.
+        loopHighestBlock: do
 
-    do nn=1,cgnsNDom
-       sectionID(nn) = nn
-    enddo
+            ! Initialize situationChanged to .false.
 
-    ! Loop to determine the highest cgns block id in every section.
+            situationChanged = .false.
 
-    loopHighestBlock: do
+            ! Loop over the internal block faces of the blocks.
 
-       ! Initialize situationChanged to .false.
+            do nn = 1, cgnsNDom
 
-       situationChanged = .false.
+                ! First the 1 to 1 block connectivities.
 
-       ! Loop over the internal block faces of the blocks.
+                do mm = 1, cgnsDoms(nn)%n1to1
 
-       do nn=1,cgnsNDom
+                    ! Store the neighboring block a bit easier and change the
+                    ! value of sectionID if the neighboring block has a
+                    ! higher value. In that case situationChanged is .true.
 
-          ! First the 1 to 1 block connectivities.
+                    ii = cgnsDoms(nn)%conn1to1(mm)%donorBlock
+                    if (sectionID(ii) > sectionID(nn)) then
+                        sectionID(nn) = sectionID(ii)
+                        situationChanged = .true.
+                    end if
 
-          do mm=1,cgnsDoms(nn)%n1to1
+                end do
 
-             ! Store the neighboring block a bit easier and change the
-             ! value of sectionID if the neighboring block has a
-             ! higher value. In that case situationChanged is .true.
+                ! No general connectivities yet.
 
-             ii = cgnsDoms(nn)%conn1to1(mm)%donorBlock
-             if(sectionID(ii) > sectionID(nn)) then
-                sectionID(nn)    = sectionID(ii)
-                situationChanged = .true.
-             endif
+            end do
 
-          enddo
+            ! Criterion to exit the loop.
 
-          ! No general connectivities yet.
+            if (.not. situationChanged) exit
 
-       enddo
+        end do loopHighestBlock
 
-       ! Criterion to exit the loop.
+        ! Copy sectionID in sorted and sort sorted in increasing order.
 
-       if(.not. situationChanged) exit
+        do nn = 1, cgnsNDom
+            sorted(nn) = sectionID(nn)
+        end do
 
-    enddo loopHighestBlock
+        call qsortIntegers(sorted, cgnsNDom)
 
-    ! Copy sectionID in sorted and sort sorted in increasing order.
+        ! Determine the number of sections. Note there is at least one
+        ! section, because there is at least one block in the grid.
 
-    do nn=1,cgnsNDom
-       sorted(nn) = sectionID(nn)
-    enddo
+        nSections = 1
+        do nn = 2, cgnsNDom
+            if (sorted(nn) > sorted(nSections)) then
+                nSections = nSections + 1
+                sorted(nSections) = sorted(nn)
+            end if
+        end do
 
-    call qsortIntegers(sorted, cgnsNDom)
+        ! Determine the sections to which the owned blocks belong.
 
-    ! Determine the number of sections. Note there is at least one
-    ! section, because there is at least one block in the grid.
+        nLevel = ubound(flowDoms, 2)
+        do nn = 1, nDom
 
-    nSections = 1
-    do nn=2,cgnsNDom
-       if(sorted(nn) > sorted(nSections)) then
-          nSections = nSections +1
-          sorted(nSections) = sorted(nn)
-       endif
-    enddo
+            ! Determine the corresponding cgns block id and search its
+            ! section id in sorted.
 
-    ! Determine the sections to which the owned blocks belong.
+            mm = flowDoms(nn, 1, 1)%cgnsBlockID
+            mm = bsearchIntegers(sectionID(mm), sorted)
 
-    nLevel = ubound(flowDoms,2)
-    do nn=1,nDom
+            if (debug) then
+                if (mm == 0) call terminate("determineSections", &
+                                            "Entry not found in sorted.")
+            end if
 
-       ! Determine the corresponding cgns block id and search its
-       ! section id in sorted.
+            ! Set the section id for all grid levels for all spectral
+            ! time intervals.
 
-       mm = flowDoms(nn,1,1)%cgnsBlockID
-       mm = bsearchIntegers(sectionID(mm), sorted)
+            do jj = 1, nTimeIntervalsSpectral
+                do ii = 1, nLevel
+                    flowDoms(nn, ii, jj)%sectionID = mm
+                end do
+            end do
 
-       if( debug ) then
-          if(mm == 0) call terminate("determineSections", &
-               "Entry not found in sorted.")
-       endif
+        end do
 
-       ! Set the section id for all grid levels for all spectral
-       ! time intervals.
+        ! Allocate the memory for sections.
 
-       do jj=1,nTimeIntervalsSpectral
-          do ii=1,nLevel
-             flowDoms(nn,ii,jj)%sectionID = mm
-          enddo
-       enddo
+        allocate (sections(nSections), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("determineSections", &
+                           "Memory allocation failure for sections.")
 
-    enddo
+        ! Initialize the number of slices for each of the sections to 1,
+        ! periodic and rotating to .false. and rotCenter, rotAxis
+        ! and rotRate to zero.
 
-    ! Allocate the memory for sections.
+        do nn = 1, nSections
+            sections(nn)%nSlices = 1
+            sections(nn)%periodic = .false.
+            sections(nn)%rotating = .false.
+            sections(nn)%rotCenter = zero
+            sections(nn)%rotAxis = zero
+            sections(nn)%rotRate = zero
+        end do
 
-    allocate(sections(nSections), stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("determineSections", &
-         "Memory allocation failure for sections.")
+        ! Determine the number of slices and the periodic transformation
+        ! for the sections.
 
-    ! Initialize the number of slices for each of the sections to 1,
-    ! periodic and rotating to .false. and rotCenter, rotAxis
-    ! and rotRate to zero.
+        loopCGNSDom: do nn = 1, cgnsNDom
 
-    do nn=1,nSections
-       sections(nn)%nSlices   = 1
-       sections(nn)%periodic  = .false.
-       sections(nn)%rotating  = .false.
-       sections(nn)%rotCenter = zero
-       sections(nn)%rotAxis   = zero
-       sections(nn)%rotRate   = zero
-    enddo
+            ! Search for the corresponding section.
 
-    ! Determine the number of slices and the periodic transformation
-    ! for the sections.
+            ii = bsearchIntegers(sectionID(nn), sorted(1:nSections))
+            if (debug) then
+                if (ii == 0) call terminate("determineSections", &
+                                            "Entry not found in sorted.")
+            end if
 
-    loopCGNSDom: do nn=1,cgnsNDom
+            ! It is assumed that periodic info is correct. So if this
+            ! section has already been treated, there is no need to do
+            ! it again.
 
-       ! Search for the corresponding section.
+            if (sections(ii)%periodic) cycle
 
-       ii = bsearchIntegers(sectionID(nn), sorted(1:nSections))
-       if( debug ) then
-          if(ii == 0) call terminate("determineSections", &
-               "Entry not found in sorted.")
-       endif
+            ! Loop over the 1 to 1 subfaces of the cgns block and try to
+            ! find a periodic one.
 
-       ! It is assumed that periodic info is correct. So if this
-       ! section has already been treated, there is no need to do
-       ! it again.
+            do mm = 1, cgnsDoms(nn)%n1to1
+                if (cgnsDoms(nn)%conn1to1(mm)%periodic) exit
+            end do
 
-       if( sections(ii)%periodic ) cycle
+            ! Continue with the next block if this block does not have
+            ! periodic subfaces.
 
-       ! Loop over the 1 to 1 subfaces of the cgns block and try to
-       ! find a periodic one.
+            if (mm > cgnsDoms(nn)%n1to1) cycle
 
-       do mm=1,cgnsDoms(nn)%n1to1
-          if( cgnsDoms(nn)%conn1to1(mm)%periodic ) exit
-       enddo
+            ! Subface mm is a periodic one. Set periodic to .true.
 
-       ! Continue with the next block if this block does not have
-       ! periodic subfaces.
+            sections(ii)%periodic = .true.
 
-       if(mm > cgnsDoms(nn)%n1to1) cycle
+            ! Set the rotation axis of the section to the rotation
+            ! angles of the periodic transformation. This may be
+            ! overwritten later on using the rotation rate, but for
+            ! some cases this is the only rotation information present.
 
-       ! Subface mm is a periodic one. Set periodic to .true.
+            sections(ii)%rotAxis = cgnsDoms(nn)%conn1to1(mm)%rotationAngles
 
-       sections(ii)%periodic = .true.
+            ! Construct the rotation matrix, where it is assumed that the
+            ! sequence of rotation is first rotation around the x-axis,
+            ! followed by rotation around the y-axis and finally rotation
+            ! around the z-axis.
 
-       ! Set the rotation axis of the section to the rotation
-       ! angles of the periodic transformation. This may be
-       ! overwritten later on using the rotation rate, but for
-       ! some cases this is the only rotation information present.
+            cosTheta = cos(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(1))
+            sinTheta = sin(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(1))
 
-       sections(ii)%rotAxis = cgnsDoms(nn)%conn1to1(mm)%rotationAngles
+            cosPhi = cos(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(2))
+            sinPhi = sin(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(2))
 
-       ! Construct the rotation matrix, where it is assumed that the
-       ! sequence of rotation is first rotation around the x-axis,
-       ! followed by rotation around the y-axis and finally rotation
-       ! around the z-axis.
+            cosPsi = cos(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(3))
+            sinPsi = sin(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(3))
 
-       cosTheta = cos(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(1))
-       sinTheta = sin(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(1))
+            r11 = cosPhi * cosPsi
+            r21 = cosPhi * sinPsi
+            r31 = -sinPhi
 
-       cosPhi = cos(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(2))
-       sinPhi = sin(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(2))
+            r12 = sinTheta * sinPhi * cosPsi - cosTheta * sinPsi
+            r22 = sinTheta * sinPhi * sinPsi + cosTheta * cosPsi
+            r32 = sinTheta * cosPhi
 
-       cosPsi = cos(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(3))
-       sinPsi = sin(cgnsDoms(nn)%conn1to1(mm)%rotationAngles(3))
+            r13 = cosTheta * sinPhi * cosPsi + sinTheta * sinPsi
+            r23 = cosTheta * sinPhi * sinPsi - sinTheta * cosPsi
+            r33 = cosTheta * cosPhi
 
-       r11 =  cosPhi*cosPsi
-       r21 =  cosPhi*sinPsi
-       r31 = -sinPhi
+            ! Store the rotation matrix, rotation center and translation
+            ! vector for this section.
 
-       r12 = sinTheta*sinPhi*cosPsi - cosTheta*sinPsi
-       r22 = sinTheta*sinPhi*sinPsi + cosTheta*cosPsi
-       r32 = sinTheta*cosPhi
+            sections(ii)%rotCenter = &
+                cgnsDoms(nn)%conn1to1(mm)%rotationCenter
+            sections(ii)%translation = &
+                cgnsDoms(nn)%conn1to1(mm)%translation
 
-       r13 = cosTheta*sinPhi*cosPsi + sinTheta*sinPsi
-       r23 = cosTheta*sinPhi*sinPsi - sinTheta*cosPsi
-       r33 = cosTheta*cosPhi
+            sections(ii)%rotMatrix(1, 1) = r11
+            sections(ii)%rotMatrix(2, 1) = r21
+            sections(ii)%rotMatrix(3, 1) = r31
 
-       ! Store the rotation matrix, rotation center and translation
-       ! vector for this section.
+            sections(ii)%rotMatrix(1, 2) = r12
+            sections(ii)%rotMatrix(2, 2) = r22
+            sections(ii)%rotMatrix(3, 2) = r32
 
-       sections(ii)%rotCenter  = &
-            cgnsDoms(nn)%conn1to1(mm)%rotationCenter
-       sections(ii)%translation = &
-            cgnsDoms(nn)%conn1to1(mm)%translation
+            sections(ii)%rotMatrix(1, 3) = r13
+            sections(ii)%rotMatrix(2, 3) = r23
+            sections(ii)%rotMatrix(3, 3) = r33
 
-       sections(ii)%rotMatrix(1,1) = r11
-       sections(ii)%rotMatrix(2,1) = r21
-       sections(ii)%rotMatrix(3,1) = r31
+            ! Determine the coefficients of lambda and lambda^2 of the
+            ! characteristic polynomial.
 
-       sections(ii)%rotMatrix(1,2) = r12
-       sections(ii)%rotMatrix(2,2) = r22
-       sections(ii)%rotMatrix(3,2) = r32
+            d2 = -r11 - r22 - r33
+            d1 = r11 * r22 + r11 * r33 + r22 * r33 - r12 * r21 - r13 * r31 - r23 * r32
 
-       sections(ii)%rotMatrix(1,3) = r13
-       sections(ii)%rotMatrix(2,3) = r23
-       sections(ii)%rotMatrix(3,3) = r33
+            ! Make use of the fact that one eigenvalue of the transformation
+            ! matrix is 1 and determine the coefficients of the quadratic
+            ! equation for the other two eigenvalues.
 
-       ! Determine the coefficients of lambda and lambda^2 of the
-       ! characteristic polynomial.
+            a1 = d2 + one
+            a0 = a1 + d1
 
-       d2 = -r11 - r22 - r33
-       d1 =  r11*r22 + r11*r33 + r22*r33 - r12*r21 - r13*r31 - r23*r32
+            ! Determine the real and imaginary part of the two eigenvalues.
+            ! Neglect the factor 1/2 here.
 
-       ! Make use of the fact that one eigenvalue of the transformation
-       ! matrix is 1 and determine the coefficients of the quadratic
-       ! equation for the other two eigenvalues.
+            lamr = -a1
+            lami = sqrt(abs(a1 * a1 - four * a0))
 
-       a1 = d2 + one
-       a0 = a1 + d1
+            ! Determine the angle in the imaginary plane. Due to the
+            ! positive definition of lami, this angle will be between
+            ! 0 and pi. Take care of the exceptional case that the angle
+            ! is zero.
 
-       ! Determine the real and imaginary part of the two eigenvalues.
-       ! Neglect the factor 1/2 here.
+            angle = atan2(lami, lamr)
+            if (angle == zero) angle = two * pi
 
-       lamr = -a1
-       lami = sqrt(abs(a1*a1 - four*a0))
+            ! Determine the number of slices.
 
-       ! Determine the angle in the imaginary plane. Due to the
-       ! positive definition of lami, this angle will be between
-       ! 0 and pi. Take care of the exceptional case that the angle
-       ! is zero.
+            nSlices = nint(two * pi / angle)
 
-       angle = atan2(lami, lamr)
-       if(angle == zero) angle = two*pi
+            ! Determine the angle difference in degrees between
+            ! nSlices*angle and a complete rotation. If this is larger than
+            ! the threshold processor 0 will print an error message and exit.
 
-       ! Determine the number of slices.
+            dAngle = abs(180.0_realType * (two * pi - nSlices * angle) / pi)
 
-       nSlices = nint(two*pi/angle)
-
-       ! Determine the angle difference in degrees between
-       ! nSlices*angle and a complete rotation. If this is larger than
-       ! the threshold processor 0 will print an error message and exit.
-
-       dAngle = abs(180.0_realType*(two*pi - nSlices*angle)/pi)
-
-       if(dAngle >= threshold) then
-          if(myID == 0)                         &
-               call terminate("determineSections", &
-               "Periodic angle not a integer divide of &
-               &360 degrees")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
+            if (dAngle >= threshold) then
+                if (myID == 0) &
+                     call terminate("determineSections", &
+                     "Periodic angle not a integer divide of &
+                     &360 degrees")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-       ! Set the number of slices for this section.
+            ! Set the number of slices for this section.
 
-       sections(ii)%nSlices = nSlices
+            sections(ii)%nSlices = nSlices
 
-    enddo loopCGNSDom
+        end do loopCGNSDom
 
-    ! Again loop over the number of block of the original mesh,
-    ! but now determine whether or not the section is rotating.
+        ! Again loop over the number of block of the original mesh,
+        ! but now determine whether or not the section is rotating.
 
-    do nn=1,cgnsNDom
+        do nn = 1, cgnsNDom
 
-       ! If the block is rotating, copy that information to
-       ! the corresponding section. If the section is not
-       ! periodic, also set the rotation center.
+            ! If the block is rotating, copy that information to
+            ! the corresponding section. If the section is not
+            ! periodic, also set the rotation center.
 
-       if( cgnsDoms(nn)%rotatingFrameSpecified ) then
+            if (cgnsDoms(nn)%rotatingFrameSpecified) then
 
-          ii = bsearchIntegers(sectionID(nn), sorted(1:nSections))
-          sections(ii)%rotating = .true.
-          sections(ii)%rotAxis  = cgnsDoms(nn)%rotRate
-          sections(ii)%rotRate  = cgnsDoms(nn)%rotRate
+                ii = bsearchIntegers(sectionID(nn), sorted(1:nSections))
+                sections(ii)%rotating = .true.
+                sections(ii)%rotAxis = cgnsDoms(nn)%rotRate
+                sections(ii)%rotRate = cgnsDoms(nn)%rotRate
 
-          if(.not. sections(ii)%periodic) &
-               sections(ii)%rotCenter = cgnsDoms(nn)%rotCenter
+                if (.not. sections(ii)%periodic) &
+                    sections(ii)%rotCenter = cgnsDoms(nn)%rotCenter
 
-       endif
-    enddo
+            end if
+        end do
 
-    ! Determine the two sections for every sliding mesh
-    ! interface.
+        ! Determine the two sections for every sliding mesh
+        ! interface.
 
-    secSliding = 0
-    do nn=1,cgnsNDom
-       do mm=1,cgnsDoms(nn)%nBocos
-          if(cgnsDoms(nn)%bocoInfo(mm)%actualFace .and. &
-               cgnsDoms(nn)%bocoInfo(mm)%BCType == SlidingInterface) then
+        secSliding = 0
+        do nn = 1, cgnsNDom
+            do mm = 1, cgnsDoms(nn)%nBocos
+                if (cgnsDoms(nn)%bocoInfo(mm)%actualFace .and. &
+                    cgnsDoms(nn)%bocoInfo(mm)%BCType == SlidingInterface) then
 
-             ! Boundary face is part of a sliding mesh interface.
-             ! Determine the ID of the interface.
+                    ! Boundary face is part of a sliding mesh interface.
+                    ! Determine the ID of the interface.
 
-             slideID = abs(cgnsDoms(nn)%bocoInfo(mm)%slidingID)
+                    slideID = abs(cgnsDoms(nn)%bocoInfo(mm)%slidingID)
 
-             ! Determine the section to which this block belongs and
-             ! store its id for this sliding interface.
+                    ! Determine the section to which this block belongs and
+                    ! store its id for this sliding interface.
 
-             ii = bsearchIntegers(sectionID(nn), sorted(1:nSections))
-             if(secSliding(slideID,1) == 0) then
-                secSliding(slideID,1) = ii
-             else if(secSliding(slideID,1) /= ii) then
-                secSliding(slideID,2) = ii
-             endif
+                    ii = bsearchIntegers(sectionID(nn), sorted(1:nSections))
+                    if (secSliding(slideID, 1) == 0) then
+                        secSliding(slideID, 1) = ii
+                    else if (secSliding(slideID, 1) /= ii) then
+                        secSliding(slideID, 2) = ii
+                    end if
 
-          endif
-       enddo
-    enddo
+                end if
+            end do
+        end do
 
-    ! Loop over the sliding mesh interfaces to set the rotation axis
-    ! for non-rotating sections.
+        ! Loop over the sliding mesh interfaces to set the rotation axis
+        ! for non-rotating sections.
 
-    do ii=1,cgnsNsliding
+        do ii = 1, cgnsNsliding
 
-       ! Store the two sections id's a bit easier.
+            ! Store the two sections id's a bit easier.
 
-       mm = secSliding(ii,1)
-       nn = secSliding(ii,2)
+            mm = secSliding(ii, 1)
+            nn = secSliding(ii, 2)
 
-       ! Print an error message if both sections are not rotating.
+            ! Print an error message if both sections are not rotating.
 
-       if((.not. sections(mm)%rotating) .and. &
-            (.not. sections(nn)%rotating) ) then
-          if(myID == 0)                         &
-               call terminate("determineSections", &
-               "Encountered sliding interface between &
-               &two non-rotating sections")
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
+            if ((.not. sections(mm)%rotating) .and. &
+                (.not. sections(nn)%rotating)) then
+                if (myID == 0) &
+                     call terminate("determineSections", &
+                     "Encountered sliding interface between &
+                     &two non-rotating sections")
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
 
-       ! Set the rotation axis if section mm is not rotating.
-       ! If it is not periodic also set the rotation point.
+            ! Set the rotation axis if section mm is not rotating.
+            ! If it is not periodic also set the rotation point.
 
-       if(.not. sections(mm)%rotating) then
-          sections(mm)%rotAxis = sections(nn)%rotAxis
-          if(.not. sections(mm)%periodic) &
-               sections(mm)%rotCenter = sections(nn)%rotCenter
-       endif
+            if (.not. sections(mm)%rotating) then
+                sections(mm)%rotAxis = sections(nn)%rotAxis
+                if (.not. sections(mm)%periodic) &
+                    sections(mm)%rotCenter = sections(nn)%rotCenter
+            end if
 
-       ! Idem for section nn.
+            ! Idem for section nn.
 
-       if(.not. sections(nn)%rotating) then
-          sections(nn)%rotAxis = sections(mm)%rotAxis
-          if(.not. sections(nn)%periodic) &
-               sections(nn)%rotCenter = sections(mm)%rotCenter
-       endif
+            if (.not. sections(nn)%rotating) then
+                sections(nn)%rotAxis = sections(mm)%rotAxis
+                if (.not. sections(nn)%periodic) &
+                    sections(nn)%rotCenter = sections(mm)%rotCenter
+            end if
 
-    enddo
+        end do
 
-    ! Determine the unit rotation axis for the sections.
+        ! Determine the unit rotation axis for the sections.
 
-    do nn=1,nSections
-       d1 = one/max(eps,sqrt(sections(nn)%rotAxis(1)**2 &
-            +                  sections(nn)%rotAxis(2)**2 &
-            +                  sections(nn)%rotAxis(3)**2))
+        do nn = 1, nSections
+            d1 = one / max(eps, sqrt(sections(nn)%rotAxis(1)**2 &
+                                     + sections(nn)%rotAxis(2)**2 &
+                                     + sections(nn)%rotAxis(3)**2))
 
-       sections(nn)%rotAxis(1) = d1*sections(nn)%rotAxis(1)
-       sections(nn)%rotAxis(2) = d1*sections(nn)%rotAxis(2)
-       sections(nn)%rotAxis(3) = d1*sections(nn)%rotAxis(3)
-    enddo
+            sections(nn)%rotAxis(1) = d1 * sections(nn)%rotAxis(1)
+            sections(nn)%rotAxis(2) = d1 * sections(nn)%rotAxis(2)
+            sections(nn)%rotAxis(3) = d1 * sections(nn)%rotAxis(3)
+        end do
 
-  end subroutine determineSections
+    end subroutine determineSections
 
 end module partitioning
diff --git a/src/partitioning/readCGNSGrid.F90 b/src/partitioning/readCGNSGrid.F90
index f6e7c417b..78436ad65 100644
--- a/src/partitioning/readCGNSGrid.F90
+++ b/src/partitioning/readCGNSGrid.F90
@@ -2,3268 +2,3258 @@ module readCGNSGrid
 
 contains
 
-  subroutine readBlockSizes
-    !
-    !       readBlockSizes reads the number of blocks and their size
-    !       from the given grid file. The data is stored in the module
-    !       cgnsGrid.
-    !       If multiple grids need to be read for a consistent restart, it
-    !       is checked that the number of blocks and the block sizes are
-    !       identical.
-    !
-    use constants
-    use su_cgns
-    use cgnsGrid, only : cgnsDoms, cgnsNDom, cgnsFamilies, &
-         cgnsCellDim, cgnsPhysDim, cgnsDomsd, cgnsBaseName, &
-         cgnsNFamilies
-    use communication, only : myid, adflow_comm_world
-    use inputPhysics, only: equations, equationMode
-    use iteration, only : changing_grid, deforming_grid
-    use partitionMod, only: fileIds, gridFiles, nGridsRead
-    use utils, only : terminate, nullifyCGNSDomPointers
-    use sorting, only : bsearchStrings, qsortStrings
-    use commonFormats, only : strings, stringInt1
-    implicit none
-    !
-    !      Local variables
-    !
-    integer :: cgnsInd, cgnsNbases, cgnsBase, cgnsNzones
-    integer :: i, nZone
-    integer :: ierr
-    integer :: nDoubleBoundFaces
-
-    integer(kind=intType) :: ii, nn
-
-    integer(kind=intType), dimension(:), allocatable :: famID
-
-    character(len=2*maxStringLen) :: errorMessage
-    character(len=7)              :: integerString
-
-    character(len=maxCGNSNameLen), dimension(:), allocatable :: &
-         sortedFamName
-
-    logical :: noUnits
-
-    ! Open the cgns files for reading and check if it went okay.
-    ! Later on it is assumed that the 1st file stored in gridFiles
-    ! is the "master".
-
-    do nn=1,nGridsRead
-       call cg_open_f(gridFiles(nn), mode_read, fileIDs(nn), ierr)
-       if(ierr /= CG_OK) then
-          write(errorMessage,*) "File ", trim(gridFiles(nn)), &
-               " could not be opened for reading"
-          call terminate("readBlockSizes", errorMessage)
-       endif
-    enddo
-
-    cgnsInd = fileIDs(1)
-
-    ! Determine the number of bases in the cgns file.
-    ! This must be at least 1.
-
-    call cg_nBases_f(cgnsInd, cgnsNbases, ierr)
-    if(ierr /= CG_OK)                 &
-         call terminate("readBlockSizes", &
-         "Something wrong when calling cg_nBases_f")
-
-    if(cgnsNbases < 1) then
-       write(errorMessage,*) "CGNS file ", trim(gridFiles(1)), &
-            " does not contain a base"
-       call terminate("readBlockSizes", errorMessage)
-    endif
-
-    ! Set cgnsBase explicitly to 1, because for the reading
-    ! of the connectivity and boundary info only the info
-    ! of the first base is needed. Here it is assumed that
-    ! base 1 is the primary base.
-
-    cgnsBase = 1
-
-    ! Read the cell and physical dimensions as well as the name for
-    ! this base.
-
-    call cg_base_read_f(cgnsInd, cgnsBase, cgnsBasename, &
-         cgnsCelldim, cgnsPhysdim, ierr)
-    if(ierr /= CG_OK)                 &
-         call terminate("readBlockSizes", &
-         "Something wrong when calling cg_base_read_f")
-
-    ! Check the cell and physical dimensions. Both must be 3 for
-    ! this code to work.
-
-    if(cgnsCelldim /= 3 .or. cgnsPhysdim /= 3) then
-       write(errorMessage, stringInt1) "Both the number of cell and physical dimensions should be 3, not ", &
-         cgnsCelldim, " and ", cgnsPhysdim
-       call terminate("readBlockSizes", errorMessage)
-    endif
-
-    ! Read the family info, if present.
-
-    call readFamilyInfo(cgnsInd, cgnsBase)
-
-    ! Allocate the memory for famID and sortedFamName.
-
-    allocate(famID(cgnsNFamilies), sortedFamName(cgnsNFamilies), &
-         stat=ierr)
-    if(ierr /= 0)                      &
-         call terminate("readBlockSizes", &
-         "Memory allocation failure for sorted family names")
-
-    ! Determine the sorted version of the family names and determine
-    ! the entries in the original numbering. FamID is initialized
-    ! to -1, which serves as a check later on.
-
-    do i=1,cgnsNFamilies
-       sortedFamName(i) = cgnsFamilies(i)%familyName
-       famID(i)         = -1
-    enddo
-
-    nn = cgnsNFamilies
-    call qsortStrings(sortedFamName, nn)
+    subroutine readBlockSizes
+        !
+        !       readBlockSizes reads the number of blocks and their size
+        !       from the given grid file. The data is stored in the module
+        !       cgnsGrid.
+        !       If multiple grids need to be read for a consistent restart, it
+        !       is checked that the number of blocks and the block sizes are
+        !       identical.
+        !
+        use constants
+        use su_cgns
+        use cgnsGrid, only: cgnsDoms, cgnsNDom, cgnsFamilies, &
+                            cgnsCellDim, cgnsPhysDim, cgnsDomsd, cgnsBaseName, &
+                            cgnsNFamilies
+        use communication, only: myid, adflow_comm_world
+        use inputPhysics, only: equations, equationMode
+        use iteration, only: changing_grid, deforming_grid
+        use partitionMod, only: fileIds, gridFiles, nGridsRead
+        use utils, only: terminate, nullifyCGNSDomPointers
+        use sorting, only: bsearchStrings, qsortStrings
+        use commonFormats, only: strings, stringInt1
+        implicit none
+        !
+        !      Local variables
+        !
+        integer :: cgnsInd, cgnsNbases, cgnsBase, cgnsNzones
+        integer :: i, nZone
+        integer :: ierr
+        integer :: nDoubleBoundFaces
+
+        integer(kind=intType) :: ii, nn
+
+        integer(kind=intType), dimension(:), allocatable :: famID
+
+        character(len=2*maxStringLen) :: errorMessage
+        character(len=7) :: integerString
+
+        character(len=maxCGNSNameLen), dimension(:), allocatable :: &
+            sortedFamName
+
+        logical :: noUnits
+
+        ! Open the cgns files for reading and check if it went okay.
+        ! Later on it is assumed that the 1st file stored in gridFiles
+        ! is the "master".
+
+        do nn = 1, nGridsRead
+            call cg_open_f(gridFiles(nn), mode_read, fileIDs(nn), ierr)
+            if (ierr /= CG_OK) then
+                write (errorMessage, *) "File ", trim(gridFiles(nn)), &
+                    " could not be opened for reading"
+                call terminate("readBlockSizes", errorMessage)
+            end if
+        end do
+
+        cgnsInd = fileIDs(1)
+
+        ! Determine the number of bases in the cgns file.
+        ! This must be at least 1.
+
+        call cg_nBases_f(cgnsInd, cgnsNbases, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("readBlockSizes", &
+                           "Something wrong when calling cg_nBases_f")
+
+        if (cgnsNbases < 1) then
+            write (errorMessage, *) "CGNS file ", trim(gridFiles(1)), &
+                " does not contain a base"
+            call terminate("readBlockSizes", errorMessage)
+        end if
 
-    do i=1,cgnsNFamilies
-       ii = bsearchStrings(cgnsFamilies(i)%familyName, sortedFamName)
+        ! Set cgnsBase explicitly to 1, because for the reading
+        ! of the connectivity and boundary info only the info
+        ! of the first base is needed. Here it is assumed that
+        ! base 1 is the primary base.
 
-       if( debug ) then
-          if(ii == 0)                        &
-               call terminate("readBlockSizes", &
-               "Family name not found in sorted &
-               &family names.")
-       endif
+        cgnsBase = 1
 
-       ! Check if the family name is not already taken. If this is the
-       ! case the grid file is not valid.
+        ! Read the cell and physical dimensions as well as the name for
+        ! this base.
 
-       if(famID(ii) /= -1)                &
+        call cg_base_read_f(cgnsInd, cgnsBase, cgnsBasename, &
+                            cgnsCelldim, cgnsPhysdim, ierr)
+        if (ierr /= CG_OK) &
             call terminate("readBlockSizes", &
-            "Error occurs when two identical family names &
-            &are present")
+                           "Something wrong when calling cg_base_read_f")
 
-       famID(ii) = i
-    enddo
+        ! Check the cell and physical dimensions. Both must be 3 for
+        ! this code to work.
 
-    ! Initialize whether the grid is changing to whether it is
-    ! deforming
+        if (cgnsCelldim /= 3 .or. cgnsPhysdim /= 3) then
+            write (errorMessage, stringInt1) "Both the number of cell and physical dimensions should be 3, not ", &
+                cgnsCelldim, " and ", cgnsPhysdim
+            call terminate("readBlockSizes", errorMessage)
+        end if
 
-    changing_Grid = deforming_Grid
+        ! Read the family info, if present.
 
+        call readFamilyInfo(cgnsInd, cgnsBase)
 
-    ! Determine the number of zones/blocks in the grid. Note that the
-    ! reading is done using cgnsNzones (an integer variable) which is
-    ! then copied into cgnsNDom (an integer(kind=intType) variable).
-    ! cgnsNdom cannot be used directly, because a type mismatch may
-    ! occur.
+        ! Allocate the memory for famID and sortedFamName.
 
-    call cg_nZones_f(cgnsInd, cgnsBase, cgnsNzones, ierr)
-    if(ierr /= CG_OK)                 &
-         call terminate("readBlockSizes", &
-         "Something wrong when calling cg_nZones_f")
-    cgnsNDom = cgnsNzones
+        allocate (famID(cgnsNFamilies), sortedFamName(cgnsNFamilies), &
+                  stat=ierr)
+        if (ierr /= 0) &
+            call terminate("readBlockSizes", &
+                           "Memory allocation failure for sorted family names")
 
-    ! Check if the number of zones for all the grid to be read
-    ! are identical.
+        ! Determine the sorted version of the family names and determine
+        ! the entries in the original numbering. FamID is initialized
+        ! to -1, which serves as a check later on.
 
-    do nn=2,nGridsRead
-       call cg_nZones_f(fileIDs(nn), cgnsBase, cgnsNzones, ierr)
-       if(ierr /= CG_OK)                 &
-            call terminate("readBlockSizes", &
-            "Something wrong when calling cg_nZones_f")
+        do i = 1, cgnsNFamilies
+            sortedFamName(i) = cgnsFamilies(i)%familyName
+            famID(i) = -1
+        end do
 
-       if(cgnsNzones /= cgnsNDom) then
-          write(errorMessage,*) "File ", trim(gridFiles(nn)), &
-               ": Different number of blocks than&
-               & in file ", trim(gridFiles(1))
-          if(myID == 0) call terminate("readBlockSizes", errorMessage)
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-    enddo
+        nn = cgnsNFamilies
+        call qsortStrings(sortedFamName, nn)
 
-    ! Allocate the memory for cgnsDoms, the derived data type to
-    ! store the info of the cgns grid, and nullify its pointers.
+        do i = 1, cgnsNFamilies
+            ii = bsearchStrings(cgnsFamilies(i)%familyName, sortedFamName)
 
-    allocate(cgnsDoms(cgnsNDom), stat=ierr)
-    !and a second variable for the forwardAD
-    allocate(cgnsDomsd(cgnsNDom), stat=ierr)
-    if(ierr /= 0)                      &
-         call terminate("readBlockSizes", &
-         "Memory allocation failure for cgnsDoms")
+            if (debug) then
+                if (ii == 0) &
+                     call terminate("readBlockSizes", &
+                     "Family name not found in sorted &
+                     &family names.")
+            end if
 
-    do nn=1,cgnsNDom
-       call nullifyCGNSDomPointers(nn)
-    enddo
+            ! Check if the family name is not already taken. If this is the
+            ! case the grid file is not valid.
 
-    ! Some initializations before reading the zone info.
+            if (famID(ii) /= -1) &
+                 call terminate("readBlockSizes", &
+                 "Error occurs when two identical family names &
+                 &are present")
 
-    nDoubleBoundFaces = 0
-    noUnits           = .false.
+            famID(ii) = i
+        end do
 
-    ! Loop over the number of zones.
+        ! Initialize whether the grid is changing to whether it is
+        ! deforming
 
-    zones: do nZone=1, cgnsNDom
+        changing_Grid = deforming_Grid
 
-       ! Read the zone info
+        ! Determine the number of zones/blocks in the grid. Note that the
+        ! reading is done using cgnsNzones (an integer variable) which is
+        ! then copied into cgnsNDom (an integer(kind=intType) variable).
+        ! cgnsNdom cannot be used directly, because a type mismatch may
+        ! occur.
 
-       call readZoneInfo(cgnsBase, nZone, sortedFamName, &
-            famID, noUnits)
+        call cg_nZones_f(cgnsInd, cgnsBase, cgnsNzones, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("readBlockSizes", &
+                           "Something wrong when calling cg_nZones_f")
+        cgnsNDom = cgnsNzones
+
+        ! Check if the number of zones for all the grid to be read
+        ! are identical.
+
+        do nn = 2, nGridsRead
+            call cg_nZones_f(fileIDs(nn), cgnsBase, cgnsNzones, ierr)
+            if (ierr /= CG_OK) &
+                call terminate("readBlockSizes", &
+                               "Something wrong when calling cg_nZones_f")
+
+            if (cgnsNzones /= cgnsNDom) then
+                write (errorMessage, *) "File ", trim(gridFiles(nn)), &
+                     ": Different number of blocks than&
+                     & in file ", trim(gridFiles(1))
+                if (myID == 0) call terminate("readBlockSizes", errorMessage)
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+        end do
 
-       ! Count the number of each connectivity for the supported
-       ! types (needed for memory allocation and to add up 1-to-1)
+        ! Allocate the memory for cgnsDoms, the derived data type to
+        ! store the info of the cgns grid, and nullify its pointers.
 
-       call countConnectivities(cgnsInd, cgnsBase, nZone)
+        allocate (cgnsDoms(cgnsNDom), stat=ierr)
+        !and a second variable for the forwardAD
+        allocate (cgnsDomsd(cgnsNDom), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("readBlockSizes", &
+                           "Memory allocation failure for cgnsDoms")
 
-       ! For this zone, read the 1-to-1 block connectivity, the
-       ! general connectivities, and the boundary conditions.
+        do nn = 1, cgnsNDom
+            call nullifyCGNSDomPointers(nn)
+        end do
 
-       call read1to1Conn(cgnsInd, cgnsBase, nZone)
-       call readGeneralConn(cgnsInd, cgnsBase, nZone)
-       call readBocos(cgnsInd, cgnsBase, nZone, &
-            ndoubleBoundFaces, sortedFamName, famId)
+        ! Some initializations before reading the zone info.
 
-    enddo zones
+        nDoubleBoundFaces = 0
+        noUnits = .false.
 
-    ! Release the memory of sortedFamName and famID.
+        ! Loop over the number of zones.
 
-    deallocate(sortedFamName, famID, stat=ierr)
-    if(ierr /= 0)                      &
-         call terminate("readBlockSizes", &
-         "Deallocation error for sortedFamName and famID")
+        zones: do nZone = 1, cgnsNDom
 
-    ! If there are double boundary faces, print a warning.
+            ! Read the zone info
 
-    if(myID == 0 .and. nDoubleBoundFaces > 0) then
-       write(integerString,"(i6)") nDoubleBoundFaces
-       integerString = adjustl(integerString)
+            call readZoneInfo(cgnsBase, nZone, sortedFamName, &
+                              famID, noUnits)
 
-       print "(a)", "#"
-       print "(a)", "#                      Warning"
-       print strings, "# ", trim(integerString), " double boundary faces found."
-       print "(a)", "# Block connectivity is kept, boundary info is neglected."
-       print "(a)", "#"
+            ! Count the number of each connectivity for the supported
+            ! types (needed for memory allocation and to add up 1-to-1)
 
-    endif
+            call countConnectivities(cgnsInd, cgnsBase, nZone)
 
-    ! If the units could not be determined, print a warning if
-    ! the viscous or unsteady equations are solved.
+            ! For this zone, read the 1-to-1 block connectivity, the
+            ! general connectivities, and the boundary conditions.
 
-    if(myID == 0 .and. noUnits) then
+            call read1to1Conn(cgnsInd, cgnsBase, nZone)
+            call readGeneralConn(cgnsInd, cgnsBase, nZone)
+            call readBocos(cgnsInd, cgnsBase, nZone, &
+                           ndoubleBoundFaces, sortedFamName, famId)
 
-       if(equations==NSEquations .or. equations==RANSEquations .or. equationMode==unsteady)then
+        end do zones
 
-          print "(a)", "#"
-          print "(a)", "#                      Warning"
-          print "(a)", "# It is assumed that the grid is given in meters."
-          print "(a)", "#"
+        ! Release the memory of sortedFamName and famID.
 
-       endif
-    endif
+        deallocate (sortedFamName, famID, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("readBlockSizes", &
+                           "Deallocation error for sortedFamName and famID")
 
+        ! If there are double boundary faces, print a warning.
 
-  end subroutine readBlockSizes
-  subroutine readFamilyInfo(cgnsInd, cgnsBase)
-    !
-    !       readFamilyInfo determines the number of families in the
-    !       given base of the cgns grid and determines their possible
-    !       boundary condition, including some user defined ones.
-    !
-    use constants
-    use su_cgns
-    use cgnsGrid, only : cgnsDoms, cgnsNDom, cgnsFamilies, cgnsNFamilies
-    use communication, only : myid, adflow_comm_world
-    use utils, only: terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsInd, cgnsBase
-    !
-    !      Local variables.
-    !
-    integer :: nn, bc, nFamBC, nGeo, nUserData, ierr
+        if (myID == 0 .and. nDoubleBoundFaces > 0) then
+            write (integerString, "(i6)") nDoubleBoundFaces
+            integerString = adjustl(integerString)
 
-    character(len=maxStringLen) :: errorMessage
+            print "(a)", "#"
+            print "(a)", "#                      Warning"
+            print strings, "# ", trim(integerString), " double boundary faces found."
+            print "(a)", "# Block connectivity is kept, boundary info is neglected."
+            print "(a)", "#"
 
-    ! Determine the number of families in the given base.
+        end if
 
-    call cg_nfamilies_f(cgnsInd, cgnsBase, nn, ierr)
-    if(ierr /= CG_OK)                 &
-         call terminate("readFamilyInfo", &
-         "Something wrong when calling cg_nfamilies_f")
-    cgnsNFamilies = nn
+        ! If the units could not be determined, print a warning if
+        ! the viscous or unsteady equations are solved.
 
-    ! Allocate the memory for cgnsFamilies.
+        if (myID == 0 .and. noUnits) then
 
-    allocate(cgnsFamilies(nn), stat=ierr)
-    if(ierr /= 0)                      &
-         call terminate("readFamilyInfo", &
-         "Memory allocation failure for cgnsFamilies")
+            if (equations == NSEquations .or. equations == RANSEquations .or. equationMode == unsteady) then
 
-    ! Loop over the number of families and read the info.
+                print "(a)", "#"
+                print "(a)", "#                      Warning"
+                print "(a)", "# It is assumed that the grid is given in meters."
+                print "(a)", "#"
 
-    nFam: do nn=1,cgnsNFamilies
+            end if
+        end if
+
+    end subroutine readBlockSizes
+    subroutine readFamilyInfo(cgnsInd, cgnsBase)
+        !
+        !       readFamilyInfo determines the number of families in the
+        !       given base of the cgns grid and determines their possible
+        !       boundary condition, including some user defined ones.
+        !
+        use constants
+        use su_cgns
+        use cgnsGrid, only: cgnsDoms, cgnsNDom, cgnsFamilies, cgnsNFamilies
+        use communication, only: myid, adflow_comm_world
+        use utils, only: terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase
+        !
+        !      Local variables.
+        !
+        integer :: nn, bc, nFamBC, nGeo, nUserData, ierr
+
+        character(len=maxStringLen) :: errorMessage
+
+        ! Determine the number of families in the given base.
+
+        call cg_nfamilies_f(cgnsInd, cgnsBase, nn, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("readFamilyInfo", &
+                           "Something wrong when calling cg_nfamilies_f")
+        cgnsNFamilies = nn
 
-       ! Initialize slidingID to 0 to indicate that this family does
-       ! not belong to a sliding mesh interface. Idem for the
-       ! bleedRegionID.
+        ! Allocate the memory for cgnsFamilies.
 
-       cgnsFamilies(nn)%slidingID   = 0
-       cgnsFamilies(nn)%bleedRegionID = 0
+        allocate (cgnsFamilies(nn), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("readFamilyInfo", &
+                           "Memory allocation failure for cgnsFamilies")
 
-       ! Initialize the logical to monitor the mass flow to .false.
+        ! Loop over the number of families and read the info.
 
-       cgnsFamilies(nn)%monitorMassFlow = .false.
+        nFam: do nn = 1, cgnsNFamilies
 
-       ! Nullify the pointer for the prescribed boundary data.
+            ! Initialize slidingID to 0 to indicate that this family does
+            ! not belong to a sliding mesh interface. Idem for the
+            ! bleedRegionID.
 
-       nullify(cgnsFamilies(nn)%dataSet)
+            cgnsFamilies(nn)%slidingID = 0
+            cgnsFamilies(nn)%bleedRegionID = 0
 
-       ! Read the family name and the number of boundary conditions
-       ! specified.
+            ! Initialize the logical to monitor the mass flow to .false.
 
-       call cg_family_read_f(cgnsInd, cgnsBase, nn,       &
-            cgnsFamilies(nn)%familyName, &
-            nFamBC, nGeo, ierr)
+            cgnsFamilies(nn)%monitorMassFlow = .false.
 
-       if(ierr /= CG_OK)               &
-            call terminate("readFamilyInfo", &
-            "Something wrong when calling cg_family_read_f")
+            ! Nullify the pointer for the prescribed boundary data.
 
-       ! Determine the boundary condition for this family, if specified.
+            nullify (cgnsFamilies(nn)%dataSet)
 
-       select case (nFamBC)
+            ! Read the family name and the number of boundary conditions
+            ! specified.
 
-       case (0)
-          cgnsFamilies(nn)%BCTypeCGNS = Null
-          cgnsFamilies(nn)%BCType     = BCNull
-          cgnsFamilies(nn)%bcName     = ""
+            call cg_family_read_f(cgnsInd, cgnsBase, nn, &
+                                  cgnsFamilies(nn)%familyName, &
+                                  nFamBC, nGeo, ierr)
 
-          !=============================================================
+            if (ierr /= CG_OK) &
+                call terminate("readFamilyInfo", &
+                               "Something wrong when calling cg_family_read_f")
 
-       case (1)
-          bc = 1
-          call cg_fambc_read_f(cgnsInd, cgnsBase, nn, bc, &
-               cgnsFamilies(nn)%bcName,   &
-               cgnsFamilies(nn)%BCTypeCGNS, ierr)
-          if(ierr /= CG_OK)                 &
-               call terminate("readFamilyInfo", &
-               "Something wrong when calling cg_fambc_read_f")
+            ! Determine the boundary condition for this family, if specified.
 
-          ! If this is a user defined boundary condition it must
-          ! contain more information to determine the internally
-          ! used BC.
+            select case (nFamBC)
 
-          testUserDefined: if(cgnsFamilies(nn)%BCTypeCGNS == &
-               UserDefined) then
+            case (0)
+                cgnsFamilies(nn)%BCTypeCGNS = Null
+                cgnsFamilies(nn)%BCType = BCNull
+                cgnsFamilies(nn)%bcName = ""
 
-             ! Move to the family and determine the number of
-             ! user defined data nodes.
+                !=============================================================
 
-             call cg_goto_f(cgnsInd, cgnsBase, ierr, "Family_t", nn, "end")
-             if(ierr /= CG_OK)                 &
-                  call terminate("readFamilyInfo", &
-                  "Something wrong when calling cg_goto_f")
+            case (1)
+                bc = 1
+                call cg_fambc_read_f(cgnsInd, cgnsBase, nn, bc, &
+                                     cgnsFamilies(nn)%bcName, &
+                                     cgnsFamilies(nn)%BCTypeCGNS, ierr)
+                if (ierr /= CG_OK) &
+                    call terminate("readFamilyInfo", &
+                                   "Something wrong when calling cg_fambc_read_f")
 
-             call cg_nuser_data_f(nUserData, ierr)
-             if(ierr /= CG_OK)                 &
-                  call terminate("readFamilyInfo", &
-                  "Something wrong when calling cg_nuser_data_f")
+                ! If this is a user defined boundary condition it must
+                ! contain more information to determine the internally
+                ! used BC.
 
-             ! nUserData should be 1. Check this.
+                testUserDefined: if (cgnsFamilies(nn)%BCTypeCGNS == &
+                                     UserDefined) then
 
-             if(nUserData /= 1) then
-                write(errorMessage, strings) "Family ", trim(cgnsFamilies(nn)%familyName), &
-                  ": Need 1 UserDefinedData_t node for user defined boundary condition"
-                if(myID == 0) &
-                     call terminate("readFamilyInfo", errorMessage)
-                call mpi_barrier(ADflow_comm_world, ierr)
-             endif
+                    ! Move to the family and determine the number of
+                    ! user defined data nodes.
 
-             ! Read the name of the user defined data node.
+                    call cg_goto_f(cgnsInd, cgnsBase, ierr, "Family_t", nn, "end")
+                    if (ierr /= CG_OK) &
+                        call terminate("readFamilyInfo", &
+                                       "Something wrong when calling cg_goto_f")
 
-             call cg_user_data_read_f(nUserData, cgnsFamilies(nn)%userDefinedName, ierr)
-             if(ierr /= CG_OK)                 &
-                  call terminate("readFamilyInfo", &
-                  "Something wrong when calling cg_user_data_read_f")
+                    call cg_nuser_data_f(nUserData, ierr)
+                    if (ierr /= CG_OK) &
+                        call terminate("readFamilyInfo", &
+                                       "Something wrong when calling cg_nuser_data_f")
 
-          else testUserDefined
+                    ! nUserData should be 1. Check this.
 
-             ! Set the user defined name to an empty string.
+                    if (nUserData /= 1) then
+                        write (errorMessage, strings) "Family ", trim(cgnsFamilies(nn)%familyName), &
+                            ": Need 1 UserDefinedData_t node for user defined boundary condition"
+                        if (myID == 0) &
+                            call terminate("readFamilyInfo", errorMessage)
+                        call mpi_barrier(ADflow_comm_world, ierr)
+                    end if
 
-             cgnsFamilies(nn)%userDefinedName = ""
+                    ! Read the name of the user defined data node.
 
-          endif testUserDefined
+                    call cg_user_data_read_f(nUserData, cgnsFamilies(nn)%userDefinedName, ierr)
+                    if (ierr /= CG_OK) &
+                        call terminate("readFamilyInfo", &
+                                       "Something wrong when calling cg_user_data_read_f")
 
-          ! Determine the internal BC type from the CGNS type and
-          ! possibly the user defined name.
+                    else testUserDefined
 
-          cgnsFamilies(nn)%BCType = &
-               internalBC(cgnsFamilies(nn)%BCTypeCGNS, &
-               cgnsFamilies(nn)%userDefinedName)
+                    ! Set the user defined name to an empty string.
 
-          !=============================================================
+                    cgnsFamilies(nn)%userDefinedName = ""
 
-       case default
-          write(errorMessage, strings) "Family ", trim(cgnsFamilies(nn)%familyName), &
-            ": More than 1 boundary condition specified"
-          if(myID == 0) &
-               call terminate("readFamilyInfo", errorMessage)
-          call mpi_barrier(ADflow_comm_world, ierr)
+                end if testUserDefined
 
-       end select
+                ! Determine the internal BC type from the CGNS type and
+                ! possibly the user defined name.
 
-    enddo nFam
+                cgnsFamilies(nn)%BCType = &
+                    internalBC(cgnsFamilies(nn)%BCTypeCGNS, &
+                               cgnsFamilies(nn)%userDefinedName)
 
-  end subroutine readFamilyInfo
+                !=============================================================
 
-  subroutine readZoneInfo(cgnsBase, nZone, sortedFamName, &
-       famID, noUnits)
-    !
-    !       readZoneInfo reads the general information, like zone type
-    !       and physical dimensions, for the given zone/block.
-    !
-    use constants
-    use su_cgns
-    use cgnsGrid, only : cgnsDoms, cgnsNDom, cgnsFamilies, cgnsNFamilies
-    use communication, only : adflow_comm_world, myid
-    use flowVarRefState, only : LRefSpecified, LRef
-    use iteration, only : changing_grid
-    use partitionMod, only : nGridsRead, fileIDs, gridFiles
-    use utils, only : terminate, siAngle, siLen, siAngle
-    use sorting, only: bsearchStrings
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsBase, nZone
-    character(len=*), dimension(:), intent(in) :: sortedFamName
-    integer(kind=intType), dimension(:), intent(in) :: famID
+            case default
+                write (errorMessage, strings) "Family ", trim(cgnsFamilies(nn)%familyName), &
+                    ": More than 1 boundary condition specified"
+                if (myID == 0) &
+                    call terminate("readFamilyInfo", errorMessage)
+                call mpi_barrier(ADflow_comm_world, ierr)
 
-    logical, intent(inout) :: noUnits
+            end select
 
-    !
-    !      Local variables
-    !
-    integer :: cgnsInd
-    integer :: i, ierr, nCoords
-    integer :: mass, len, time, temp, angle
+        end do nFam
 
-    integer(kind=cgsize_t), dimension(9) :: sizesBlock
+    end subroutine readFamilyInfo
 
-    integer(kind=intType) :: ii, nn
+    subroutine readZoneInfo(cgnsBase, nZone, sortedFamName, &
+                            famID, noUnits)
+        !
+        !       readZoneInfo reads the general information, like zone type
+        !       and physical dimensions, for the given zone/block.
+        !
+        use constants
+        use su_cgns
+        use cgnsGrid, only: cgnsDoms, cgnsNDom, cgnsFamilies, cgnsNFamilies
+        use communication, only: adflow_comm_world, myid
+        use flowVarRefState, only: LRefSpecified, LRef
+        use iteration, only: changing_grid
+        use partitionMod, only: nGridsRead, fileIDs, gridFiles
+        use utils, only: terminate, siAngle, siLen, siAngle
+        use sorting, only: bsearchStrings
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsBase, nZone
+        character(len=*), dimension(:), intent(in) :: sortedFamName
+        integer(kind=intType), dimension(:), intent(in) :: famID
 
-    real(kind=realType), dimension(3) :: rotCenter, rotRate
+        logical, intent(inout) :: noUnits
 
-    real(kind=realType) :: mult, trans
+        !
+        !      Local variables
+        !
+        integer :: cgnsInd
+        integer :: i, ierr, nCoords
+        integer :: mass, len, time, temp, angle
 
-    character(len=maxCGNSNameLen) :: familyName
-    character(len=maxStringLen)   :: errorMessage
-    character(len=maxStringLen)   :: coordFormat
+        integer(kind=cgsize_t), dimension(9) :: sizesBlock
 
-    logical :: overwrite
+        integer(kind=intType) :: ii, nn
 
+        real(kind=realType), dimension(3) :: rotCenter, rotRate
 
-    ! Set the cgns ID for the "master" file and read the size
-    ! of the block as well as the zone name.
+        real(kind=realType) :: mult, trans
 
-    cgnsInd = fileIDs(1)
+        character(len=maxCGNSNameLen) :: familyName
+        character(len=maxStringLen) :: errorMessage
+        character(len=maxStringLen) :: coordFormat
 
-    call cg_zone_read_f(cgnsInd, cgnsBase, nZone, &
-         cgnsDoms(nZone)%zoneName, sizesBlock, ierr)
-    if(ierr /= CG_OK)               &
-         call terminate("readZoneInfo", &
-         "Something wrong when calling cg_nZones_f")
+        logical :: overwrite
 
-    ! Check the zone type.
+        ! Set the cgns ID for the "master" file and read the size
+        ! of the block as well as the zone name.
 
-    call cg_zone_type_f(cgnsInd, cgnsBase, nZone, &
-         cgnsDoms(nZone)%zonetype, ierr)
-    if(ierr /= CG_OK)               &
-         call terminate("readZoneInfo", &
-         "Something wrong when calling cg_zone_type_f")
+        cgnsInd = fileIDs(1)
 
-    if(cgnsDoms(nZone)%zonetype /= Structured) then
-       write(errorMessage,*) "Zone ",                         &
-            trim(cgnsDoms(nZone)%zoneName), &
-            " of the grid file is not structured"
-       if(myID == 0) call terminate("readZoneInfo", errorMessage)
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-
-    ! Set the values for the number of nodes and cells in i, j and
-    ! k-direction.
-
-    cgnsDoms(nZone)%il = sizesBlock(1)
-    cgnsDoms(nZone)%jl = sizesBlock(2)
-    cgnsDoms(nZone)%kl = sizesBlock(3)
-
-    cgnsDoms(nZone)%nx = sizesBlock(4)
-    cgnsDoms(nZone)%ny = sizesBlock(5)
-    cgnsDoms(nZone)%nz = sizesBlock(6)
-
-    ! Check the size of this zone for the other grids to be read.
-    ! They should be equal. Note that familyName is only used as
-    ! a dummy in this call.
-
-    do nn=2,nGridsRead
-       call cg_zone_read_f(fileIDs(nn), cgnsBase, nZone, &
-            familyName, sizesBlock, ierr)
-       if(ierr /= CG_OK)               &
+        call cg_zone_read_f(cgnsInd, cgnsBase, nZone, &
+                            cgnsDoms(nZone)%zoneName, sizesBlock, ierr)
+        if (ierr /= CG_OK) &
             call terminate("readZoneInfo", &
-            "Something wrong when calling cg_nZones_f")
+                           "Something wrong when calling cg_nZones_f")
 
-       if(cgnsDoms(nZone)%il /= sizesBlock(1) .or. &
-            cgnsDoms(nZone)%jl /= sizesBlock(2) .or. &
-            cgnsDoms(nZone)%kl /= sizesBlock(3)) then
+        ! Check the zone type.
 
-          write(errorMessage,*) "File ", trim(gridFiles(nn)), &
-               ", zone ", trim(familyName), &
-               " Zone dimensions are different&
-               & than in file ", trim(gridFiles(1))
-          if(myID == 0) call terminate("readBlockSizes", errorMessage)
-          call mpi_barrier(ADflow_comm_world, ierr)
-       endif
-    enddo
+        call cg_zone_type_f(cgnsInd, cgnsBase, nZone, &
+                            cgnsDoms(nZone)%zonetype, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("readZoneInfo", &
+                           "Something wrong when calling cg_zone_type_f")
+
+        if (cgnsDoms(nZone)%zonetype /= Structured) then
+            write (errorMessage, *) "Zone ", &
+                trim(cgnsDoms(nZone)%zoneName), &
+                " of the grid file is not structured"
+            if (myID == 0) call terminate("readZoneInfo", errorMessage)
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        ! Set the values for the number of nodes and cells in i, j and
+        ! k-direction.
+
+        cgnsDoms(nZone)%il = sizesBlock(1)
+        cgnsDoms(nZone)%jl = sizesBlock(2)
+        cgnsDoms(nZone)%kl = sizesBlock(3)
+
+        cgnsDoms(nZone)%nx = sizesBlock(4)
+        cgnsDoms(nZone)%ny = sizesBlock(5)
+        cgnsDoms(nZone)%nz = sizesBlock(6)
+
+        ! Check the size of this zone for the other grids to be read.
+        ! They should be equal. Note that familyName is only used as
+        ! a dummy in this call.
+
+        do nn = 2, nGridsRead
+            call cg_zone_read_f(fileIDs(nn), cgnsBase, nZone, &
+                                familyName, sizesBlock, ierr)
+            if (ierr /= CG_OK) &
+                call terminate("readZoneInfo", &
+                               "Something wrong when calling cg_nZones_f")
+
+            if (cgnsDoms(nZone)%il /= sizesBlock(1) .or. &
+                cgnsDoms(nZone)%jl /= sizesBlock(2) .or. &
+                cgnsDoms(nZone)%kl /= sizesBlock(3)) then
+
+                write (errorMessage, *) "File ", trim(gridFiles(nn)), &
+                     ", zone ", trim(familyName), &
+                     " Zone dimensions are different&
+                     & than in file ", trim(gridFiles(1))
+                if (myID == 0) call terminate("readBlockSizes", errorMessage)
+                call mpi_barrier(ADflow_comm_world, ierr)
+            end if
+        end do
 
-    ! Goto this zone.
+        ! Goto this zone.
 
-    call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, "end")
-    if(ierr /= CG_OK)               &
-         call terminate("readZoneInfo", &
-         "Something wrong when calling cg_goto_f")
-    !
-    !       Try to read the family name.
-    !
-    call cg_famname_read_f(familyName, ierr)
-    if(ierr == error)                &
-         call terminate("readZoneInfo", &
-         "Something wrong when calling cg_famname_read_f")
+        call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, "end")
+        if (ierr /= CG_OK) &
+            call terminate("readZoneInfo", &
+                           "Something wrong when calling cg_goto_f")
+        !
+        !       Try to read the family name.
+        !
+        call cg_famname_read_f(familyName, ierr)
+        if (ierr == error) &
+            call terminate("readZoneInfo", &
+                           "Something wrong when calling cg_famname_read_f")
 
-    ! Check if a family name was specified. If so, determine the
-    ! corresponding id.
+        ! Check if a family name was specified. If so, determine the
+        ! corresponding id.
 
-    cgnsDoms(nZone)%familyID = 0
-    if(ierr == CG_OK) then
+        cgnsDoms(nZone)%familyID = 0
+        if (ierr == CG_OK) then
 
-       ! Search the family name in the sorted names. For a valid
-       ! grid this name must be found.
+            ! Search the family name in the sorted names. For a valid
+            ! grid this name must be found.
 
-       nn = cgnsNFamilies
-       ii = bsearchStrings(familyName, sortedFamName)
-       if(ii == 0) then
+            nn = cgnsNFamilies
+            ii = bsearchStrings(familyName, sortedFamName)
+            if (ii == 0) then
 
-          write(errorMessage, strings) "Family name ", trim(familyName)," not present in the grid"
-          if(myID == 0) call terminate("readZoneInfo", errorMessage)
-          call mpi_barrier(ADflow_comm_world, ierr)
+                write (errorMessage, strings) "Family name ", trim(familyName), " not present in the grid"
+                if (myID == 0) call terminate("readZoneInfo", errorMessage)
+                call mpi_barrier(ADflow_comm_world, ierr)
 
-       endif
+            end if
 
-       ! Set the family number.
+            ! Set the family number.
 
-       ii = famID(ii)
-       cgnsDoms(nZone)%familyID = ii
+            ii = famID(ii)
+            cgnsDoms(nZone)%familyID = ii
 
-    endif
-    !
-    !       Try to determine the units of the coordinates.
-    !
-    ! Determine the number of coordinates in this zone.
+        end if
+        !
+        !       Try to determine the units of the coordinates.
+        !
+        ! Determine the number of coordinates in this zone.
 
-    call cg_ncoords_f(cgnsInd, cgnsBase, nZone, nCoords, ierr)
-    if(ierr /= CG_OK)               &
-         call terminate("readZoneInfo", &
-         "Something wrong when calling cg_ncoords_f")
+        call cg_ncoords_f(cgnsInd, cgnsBase, nZone, nCoords, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("readZoneInfo", &
+                           "Something wrong when calling cg_ncoords_f")
 
-    ! Check that 3 coordinates are present. If not, terminate.
+        ! Check that 3 coordinates are present. If not, terminate.
 
-    if(nCoords /= 3) then
-       coordFormat = "(3(A), I1, A)"
-       write(errorMessage, coordFormat) "The number of coordinates of zone ", trim(cgnsDoms(nZone)%zoneName), &
-         " of base 1 is", nCoords, ". This should 3."
+        if (nCoords /= 3) then
+            coordFormat = "(3(A), I1, A)"
+            write (errorMessage, coordFormat) "The number of coordinates of zone ", trim(cgnsDoms(nZone)%zoneName), &
+                " of base 1 is", nCoords, ". This should 3."
 
-       if(myID == 0) call terminate("readZoneInfo", errorMessage)
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
+            if (myID == 0) call terminate("readZoneInfo", errorMessage)
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
 
-    ! Loop over the three coordinates.
+        ! Loop over the three coordinates.
 
-    cgnsDoms(nZone)%gridUnitsSpecified = .false.
+        cgnsDoms(nZone)%gridUnitsSpecified = .false.
 
-    do i=1,3
+        do i = 1, 3
 
-       ! Go to the correct place in the grid file.
+            ! Go to the correct place in the grid file.
 
-       call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
-            "GridCoordinates_t", 1, "DataArray_t", i, &
-            "end")
-       if(ierr /= CG_OK)               &
-            call terminate("readZoneInfo", &
-            "Something wrong when calling cg_goto_f")
+            call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
+                           "GridCoordinates_t", 1, "DataArray_t", i, &
+                           "end")
+            if (ierr /= CG_OK) &
+                call terminate("readZoneInfo", &
+                               "Something wrong when calling cg_goto_f")
 
-       call cg_units_read_f(mass, len, time, temp, angle, ierr)
-       if(ierr == error)                &
-            call terminate("readZoneInfo", &
-            "Something wrong when calling cg_units_read_f")
+            call cg_units_read_f(mass, len, time, temp, angle, ierr)
+            if (ierr == error) &
+                call terminate("readZoneInfo", &
+                               "Something wrong when calling cg_units_read_f")
 
-       ! Check if units were specified.
+            ! Check if units were specified.
 
-       if(ierr == CG_OK .and. len /= Null) then
+            if (ierr == CG_OK .and. len /= Null) then
 
-          ! Copy the units and set gridUnitsSpecified to .true.
+                ! Copy the units and set gridUnitsSpecified to .true.
 
-          cgnsDoms(nZone)%mass  = mass
-          cgnsDoms(nZone)%len   = len
-          cgnsDoms(nZone)%time  = time
-          cgnsDoms(nZone)%temp  = temp
-          cgnsDoms(nZone)%angle = angle
+                cgnsDoms(nZone)%mass = mass
+                cgnsDoms(nZone)%len = len
+                cgnsDoms(nZone)%time = time
+                cgnsDoms(nZone)%temp = temp
+                cgnsDoms(nZone)%angle = angle
 
-          cgnsDoms(nZone)%gridUnitsSpecified = .true.
+                cgnsDoms(nZone)%gridUnitsSpecified = .true.
 
-          ! Determine the conversion factor to meters.
+                ! Determine the conversion factor to meters.
 
-          call siLen(len, mult, trans)
+                call siLen(len, mult, trans)
 
-          cgnsDoms(nZone)%LRef = mult
+                cgnsDoms(nZone)%LRef = mult
 
-       endif
+            end if
 
-    enddo
+        end do
 
-    ! Check whether units were specified or not.
+        ! Check whether units were specified or not.
 
-    if( cgnsDoms(nZone)%gridUnitsSpecified ) then
+        if (cgnsDoms(nZone)%gridUnitsSpecified) then
 
-       ! Units were specified. Check if a global reference length
-       ! was specified as well. If so, compare the conversion factor.
-       ! If not identical, processor 0 prints a warning.
+            ! Units were specified. Check if a global reference length
+            ! was specified as well. If so, compare the conversion factor.
+            ! If not identical, processor 0 prints a warning.
 
-       if(myID ==0 .and. LRefSpecified .and. &
-            cgnsDoms(nZone)%LRef /= LRef)  then
+            if (myID == 0 .and. LRefSpecified .and. &
+                cgnsDoms(nZone)%LRef /= LRef) then
 
-          print "(a)", "#"
-          print "(a)", "#                      Warning"
-          print strings, "# Zone ", trim(cgnsDoms(nZone)%zoneName), &
-            ": Conversion factor to meters not identical to global conversion factor."
-          print "(a)", "# Global conversion factor is ignored."
-          print "(a)", "#"
-       endif
+                print "(a)", "#"
+                print "(a)", "#                      Warning"
+                print strings, "# Zone ", trim(cgnsDoms(nZone)%zoneName), &
+                    ": Conversion factor to meters not identical to global conversion factor."
+                print "(a)", "# Global conversion factor is ignored."
+                print "(a)", "#"
+            end if
 
-       ! In case no global conversion factor was specified,
-       ! set LRef to the LRef of this block.
+            ! In case no global conversion factor was specified,
+            ! set LRef to the LRef of this block.
 
-       if(.not. LRefSpecified) LRef = cgnsDoms(nZone)%LRef
+            if (.not. LRefSpecified) LRef = cgnsDoms(nZone)%LRef
 
-    else
+        else
 
-       ! No units specified. Set the reference length of the block
-       ! to the global conversion factor.
+            ! No units specified. Set the reference length of the block
+            ! to the global conversion factor.
 
-       cgnsDoms(nZone)%LRef = LRef
+            cgnsDoms(nZone)%LRef = LRef
 
-       ! If the global reference length was not specified, set
-       ! noUnits to .true.
+            ! If the global reference length was not specified, set
+            ! noUnits to .true.
 
-       if(.not. LRefSpecified) noUnits = .true.
+            if (.not. LRefSpecified) noUnits = .true.
 
-    endif
-    !
-    !       Try to determine the rotation rate and center.
-    !
-    ! Some initializations.
+        end if
+        !
+        !       Try to determine the rotation rate and center.
+        !
+        ! Some initializations.
 
-    cgnsDoms(nZone)%rotatingFrameSpecified = .false.
-    cgnsDoms(nZone)%rotRate   = zero
-    cgnsDoms(nZone)%rotCenter = zero
+        cgnsDoms(nZone)%rotatingFrameSpecified = .false.
+        cgnsDoms(nZone)%rotRate = zero
+        cgnsDoms(nZone)%rotCenter = zero
 
-    mult = one   ! Assuming radians/s for the rotation rate.
+        mult = one   ! Assuming radians/s for the rotation rate.
 
-    ! Check if a family overwrite is present.
+        ! Check if a family overwrite is present.
 
-    overwrite = .false.
-    nn = cgnsDoms(nZone)%familyID
-    if(nn > 0) then
-       if( cgnsFamilies(nn)%rotatingFrameSpecified ) &
-            overwrite = .true.
-    endif
+        overwrite = .false.
+        nn = cgnsDoms(nZone)%familyID
+        if (nn > 0) then
+            if (cgnsFamilies(nn)%rotatingFrameSpecified) &
+                overwrite = .true.
+        end if
 
-    testOverwrite: if( overwrite ) then
+        testOverwrite: if (overwrite) then
 
-       ! Rotation rate is specified per family.
-       ! Set rotatingFrameSpecified to .true. and copy the data
-       ! from the corresponding family. Note that the rotation rate
-       ! is already in rad/s and therefore mult does not need to
-       ! change.
+            ! Rotation rate is specified per family.
+            ! Set rotatingFrameSpecified to .true. and copy the data
+            ! from the corresponding family. Note that the rotation rate
+            ! is already in rad/s and therefore mult does not need to
+            ! change.
 
-       cgnsDoms(nZone)%rotatingFrameSpecified = .true.
-       cgnsDoms(nZone)%rotRate   = cgnsFamilies(nn)%rotRate
-       cgnsDoms(nZone)%rotCenter = cgnsFamilies(nn)%rotCenter
+            cgnsDoms(nZone)%rotatingFrameSpecified = .true.
+            cgnsDoms(nZone)%rotRate = cgnsFamilies(nn)%rotRate
+            cgnsDoms(nZone)%rotCenter = cgnsFamilies(nn)%rotCenter
 
-    else testOverwrite
+            else testOverwrite
 
-       ! Go to the correct location in the cgns file, where
-       ! the rotation rate should be specified.
+            ! Go to the correct location in the cgns file, where
+            ! the rotation rate should be specified.
 
-       call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
-            "end")
-       if(ierr /= CG_OK)               &
-            call terminate("readZoneInfo", &
-            "Something wrong when calling cg_goto_f")
+            call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
+                           "end")
+            if (ierr /= CG_OK) &
+                call terminate("readZoneInfo", &
+                               "Something wrong when calling cg_goto_f")
 
-       ! No family information specified.
-       ! Try to read the rotation rate and center.
-       call cg_rotating_read_f(real(rotRate,cgnsPerType), real(rotCenter,cgnsPerType), ierr)
-       
-       if(ierr == error)                &
-            call terminate("readZoneInfo", &
-            "Something wrong when calling &
-            &cg_rotating_read_f")
+            ! No family information specified.
+            ! Try to read the rotation rate and center.
+            call cg_rotating_read_f(real(rotRate, cgnsPerType), real(rotCenter, cgnsPerType), ierr)
 
-       ! Check if a rotating frame is specified.
+            if (ierr == error) &
+                 call terminate("readZoneInfo", &
+                 "Something wrong when calling &
+                 &cg_rotating_read_f")
 
-       if(ierr == CG_OK) then
+            ! Check if a rotating frame is specified.
 
-          ! Set changingGrid to .true.
+            if (ierr == CG_OK) then
 
-          changing_grid = .true.
+                ! Set changingGrid to .true.
 
-          ! Set rotatingFrameSpecified to .true. and copy
-          ! to rotation center and rotation rate.
+                changing_grid = .true.
 
-          cgnsDoms(nZone)%rotatingFrameSpecified = .true.
-          cgnsDoms(nZone)%rotRate   = rotRate
-          cgnsDoms(nZone)%rotCenter = rotCenter
+                ! Set rotatingFrameSpecified to .true. and copy
+                ! to rotation center and rotation rate.
 
-          ! Determine the conversion factor to rad/s if the
-          ! dimensional units are specified. If not, it is assumed
-          ! that the dimensions are given in rad/s.
+                cgnsDoms(nZone)%rotatingFrameSpecified = .true.
+                cgnsDoms(nZone)%rotRate = rotRate
+                cgnsDoms(nZone)%rotCenter = rotCenter
 
-          call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
-               "RotatingCoordinates_t", 1, "DataArray_t", 2, &
-               "end")
-          if(ierr /= CG_OK)               &
-               call terminate("readZoneInfo", &
-               "Something wrong when calling cg_goto_f")
+                ! Determine the conversion factor to rad/s if the
+                ! dimensional units are specified. If not, it is assumed
+                ! that the dimensions are given in rad/s.
 
-          call cg_units_read_f(mass, len, time, temp, angle, ierr)
-          if(ierr == error)                &
-               call terminate("readZoneInfo", &
-               "Something wrong when calling &
-               &cg_units_read_f")
+                call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
+                               "RotatingCoordinates_t", 1, "DataArray_t", 2, &
+                               "end")
+                if (ierr /= CG_OK) &
+                    call terminate("readZoneInfo", &
+                                   "Something wrong when calling cg_goto_f")
 
-          ! Check if units were specified. If not assume radians.
+                call cg_units_read_f(mass, len, time, temp, angle, ierr)
+                if (ierr == error) &
+                     call terminate("readZoneInfo", &
+                     "Something wrong when calling &
+                     &cg_units_read_f")
 
-          if(ierr == CG_OK .and. angle /= Null) then
+                ! Check if units were specified. If not assume radians.
 
-             ! Determine the conversion factor to radians.
+                if (ierr == CG_OK .and. angle /= Null) then
 
-             call siAngle(angle, mult, trans)
+                    ! Determine the conversion factor to radians.
 
-          else
+                    call siAngle(angle, mult, trans)
 
-             ! Angle units not specified. Assume rad/s.
-             ! Processor 0 writes a warning to stdout.
+                else
 
-             if(myID == 0) then
+                    ! Angle units not specified. Assume rad/s.
+                    ! Processor 0 writes a warning to stdout.
 
-                print "(a)", "#"
-                print "(a)", "#                      Warning"
-                print strings, "# Zone ",  trim(cgnsDoms(nZone)%zoneName), &
-                  ": No unit specified for the rotation rate, assuming rad/s."
-                print "(a)", "#"
-             endif
+                    if (myID == 0) then
 
-          endif
-       endif
+                        print "(a)", "#"
+                        print "(a)", "#                      Warning"
+                        print strings, "# Zone ", trim(cgnsDoms(nZone)%zoneName), &
+                            ": No unit specified for the rotation rate, assuming rad/s."
+                        print "(a)", "#"
+                    end if
 
-    endif testOverwrite
+                end if
+            end if
+
+        end if testOverwrite
+
+        ! Multiply the rotation center with LRef to obtain the correct
+        ! coordinates in meters.
+
+        cgnsDoms(nZone)%rotCenter(1) = cgnsDoms(nZone)%LRef &
+                                       * cgnsDoms(nZone)%rotCenter(1)
+        cgnsDoms(nZone)%rotCenter(2) = cgnsDoms(nZone)%LRef &
+                                       * cgnsDoms(nZone)%rotCenter(2)
+        cgnsDoms(nZone)%rotCenter(3) = cgnsDoms(nZone)%LRef &
+                                       * cgnsDoms(nZone)%rotCenter(3)
+
+        ! Multiply the rotation rate by the conversion factor to obtain
+        ! the correct angular velocity.
+
+        cgnsDoms(nZone)%rotRate(1) = cgnsDoms(nZone)%rotRate(1) * mult
+        cgnsDoms(nZone)%rotRate(2) = cgnsDoms(nZone)%rotRate(2) * mult
+        cgnsDoms(nZone)%rotRate(3) = cgnsDoms(nZone)%rotRate(3) * mult
+
+    end subroutine readZoneInfo
+
+    subroutine countConnectivities(cgnsInd, cgnsBase, nZone)
+        !
+        !       countConnectivities determines the number of connectivities
+        !       for each of the supported types stored in 1to1 and general.
+        !
+        use constants
+        use su_cgns
+        use cgnsGrid, only: cgnsDoms, cgnsNDom, cgnsNonMatchAbuttingConnType
+        use communication, only: myid, adflow_comm_world
+        use partitionMod, only: subfaceNonMatchType, qsortSubfaceNonMatchType
+        use utils, only: terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase, nZone
+        !
+        !      Local variables.
+        !
+        integer :: i, ngeneral, ierr
+        integer :: n1to1, n1to1General, nNonMatch
+
+        integer :: location, connectType, ptsetType
+        integer(kind=cgsize_t) :: npnts, ndataDonor
+        integer :: donorZoneType, donorPtsetType, donorDatatype
+
+        integer, dimension(:), allocatable :: connIDNonMatch
+        integer(kind=cgsize_t), dimension(:, :), allocatable :: donorData
+        integer(kind=cgsize_t), dimension(:, :, :), allocatable :: myRangeNonMatch
+
+        integer(kind=intType) :: mm, nn
+
+        integer(kind=intType), dimension(:), allocatable :: multSubfaces
+
+        type(subfaceNonMatchType), dimension(:), allocatable :: subfaceNonMatch
+
+        type(cgnsNonMatchAbuttingConnType), pointer, dimension(:) :: connNonMatch
+
+        character(len=maxStringLen) :: errorMessage
+        character(len=maxCGNSNameLen) :: connectName, donorName
+
+        ! Determine the number of 1 to 1 connectivities in this zone.
+        ! Note that the reading takes place via an integer type.
+
+        call cg_n1to1_f(cgnsInd, cgnsBase, nZone, i, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("countConnectivities", &
+                           "Something wrong when calling cg_n1to1_f")
 
-    ! Multiply the rotation center with LRef to obtain the correct
-    ! coordinates in meters.
+        n1to1 = i
 
-    cgnsDoms(nZone)%rotCenter(1) = cgnsDoms(nZone)%LRef &
-         * cgnsDoms(nZone)%rotCenter(1)
-    cgnsDoms(nZone)%rotCenter(2) = cgnsDoms(nZone)%LRef &
-         * cgnsDoms(nZone)%rotCenter(2)
-    cgnsDoms(nZone)%rotCenter(3) = cgnsDoms(nZone)%LRef &
-         * cgnsDoms(nZone)%rotCenter(3)
+        ! Determine the total number of general connectivities in this
+        ! zone.
 
-    ! Multiply the rotation rate by the conversion factor to obtain
-    ! the correct angular velocity.
+        call cg_nconns_f(cgnsInd, cgnsBase, nZone, ngeneral, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("countConnectivities", &
+                           "Something wrong when calling cg_nconns_f")
 
-    cgnsDoms(nZone)%rotRate(1) = cgnsDoms(nZone)%rotRate(1)*mult
-    cgnsDoms(nZone)%rotRate(2) = cgnsDoms(nZone)%rotRate(2)*mult
-    cgnsDoms(nZone)%rotRate(3) = cgnsDoms(nZone)%rotRate(3)*mult
+        ! Allocate the memory for connIDNonMatch and myRangeNonMatch. Note
+        ! that this number is an upper bound, because other connectivities
+        ! may be present in general connectivities.
 
+        allocate (connIDNonMatch(ngeneral), &
+                  myRangeNonMatch(3, 2, ngeneral), stat=ierr)
+        if (ierr /= CG_OK) &
+            call terminate("countConnectivities", &
+                           "Memory allocation failure for connIDNonMatch and myRangeNonMatch")
 
-  end subroutine readZoneInfo
+        ! Loop over ngeneral to find out how many of each supported
+        ! types of connectivities are stored here.
 
-  subroutine countConnectivities(cgnsInd, cgnsBase, nZone)
-    !
-    !       countConnectivities determines the number of connectivities
-    !       for each of the supported types stored in 1to1 and general.
-    !
-    use constants
-    use su_cgns
-    use cgnsGrid, only : cgnsDoms, cgnsNDom, cgnsNonMatchAbuttingConnType
-    use communication, only : myid, adflow_comm_world
-    use partitionMod, only : subfaceNonMatchType, qsortSubfaceNonMatchType
-    use utils, only : terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in)  :: cgnsInd, cgnsBase, nZone
-    !
-    !      Local variables.
-    !
-    integer :: i, ngeneral, ierr
-    integer :: n1to1, n1to1General, nNonMatch
+        n1to1General = 0
+        nNonMatch = 0
 
-    integer :: location, connectType, ptsetType
-    integer(kind=cgsize_t) :: npnts, ndataDonor
-    integer :: donorZoneType, donorPtsetType, donorDatatype
+        do i = 1, ngeneral
 
-    integer, dimension(:),     allocatable :: connIDNonMatch
-    integer(kind=cgsize_t), dimension(:,:),   allocatable :: donorData
-    integer(kind=cgsize_t), dimension(:,:,:), allocatable :: myRangeNonMatch
+            ! Read the information of this connectivity.
 
-    integer(kind=intType) :: mm, nn
+            call cg_conn_info_f(cgnsInd, cgnsBase, nZone, i, connectName, &
+                                location, connectType, ptsetType, npnts, &
+                                donorName, donorZoneType, donorPtsetType, &
+                                donorDatatype, ndataDonor, ierr)
+            if (ierr /= CG_OK) &
+                call terminate("countConnectivities", &
+                               "Something wrong when calling cg_conn_info_f")
 
-    integer(kind=intType), dimension(:), allocatable :: multSubfaces
+            ! Check if this is a supported structured connectivity.
 
-    type(subfaceNonMatchType), dimension(:), allocatable :: subfaceNonMatch
+            select case (connectType)
 
-    type(cgnsNonMatchAbuttingConnType), pointer, dimension(:) :: connNonMatch
+            case (Abutting1to1)
 
-    character(len=maxStringLen)   :: errorMessage
-    character(len=maxCGNSNameLen) :: connectName, donorName
+                if (location == Vertex .and. &
+                    ptsetType == PointRange .and. &
+                    donorZoneType == Structured .and. &
+                    donorPtsetType == PointListDonor) then
 
-    ! Determine the number of 1 to 1 connectivities in this zone.
-    ! Note that the reading takes place via an integer type.
+                    n1to1General = n1to1General + 1
 
-    call cg_n1to1_f(cgnsInd, cgnsBase, nZone, i, ierr)
-    if(ierr /= CG_OK)                      &
-         call terminate("countConnectivities", &
-         "Something wrong when calling cg_n1to1_f")
+                else
 
-    n1to1 = i
+                    ! CGNS format not supported.
 
-    ! Determine the total number of general connectivities in this
-    ! zone.
+                    write (errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), ", connectivity ", &
+                        trim(connectName), ": No support for this format of an abutting 1 to 1 connectivity"
 
-    call cg_nconns_f(cgnsInd, cgnsBase, nZone, ngeneral, ierr)
-    if(ierr /= CG_OK)                      &
-         call terminate("countConnectivities", &
-         "Something wrong when calling cg_nconns_f")
+                    if (myID == 0) &
+                        call terminate("countConnectivities", errorMessage)
+                    call mpi_barrier(ADflow_comm_world, ierr)
 
-    ! Allocate the memory for connIDNonMatch and myRangeNonMatch. Note
-    ! that this number is an upper bound, because other connectivities
-    ! may be present in general connectivities.
+                end if
 
-    allocate(connIDNonMatch(ngeneral), &
-         myRangeNonMatch(3,2,ngeneral),stat=ierr)
-    if(ierr /= CG_OK)                      &
-         call terminate("countConnectivities", &
-         "Memory allocation failure for connIDNonMatch and myRangeNonMatch")
+                !============================================================
 
-    ! Loop over ngeneral to find out how many of each supported
-    ! types of connectivities are stored here.
+            case (Abutting)
 
-    n1to1General = 0
-    nNonMatch    = 0
+                if (location == Vertex .and. &
+                    ptsetType == PointRange .and. &
+                    donorZoneType == Structured .and. &
+                    donorPtsetType == PointListDonor) then
 
-    do i=1,ngeneral
+                    nNonMatch = nNonMatch + 1
 
-       ! Read the information of this connectivity.
+                    connIDNonMatch(nNonMatch) = i
 
-       call cg_conn_info_f(cgnsInd, cgnsBase, nZone, i, connectName, &
-            location, connectType, ptsetType, npnts,  &
-            donorName, donorZoneType, donorPtsetType, &
-            donorDatatype, ndataDonor, ierr)
-       if(ierr /= CG_OK)                      &
-            call terminate("countConnectivities", &
-            "Something wrong when calling cg_conn_info_f")
+                    ! Allocate the memory for donorData and read the info.
+                    ! Release the memory of donorData after the read, because
+                    ! only the subface range is needed at the moment.
 
-       ! Check if this is a supported structured connectivity.
+                    allocate (donorData(3, ndataDonor), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("countConnectivities", &
+                                       "Memory allocation failure for donorData")
 
-       select case (connectType)
+                    call cg_conn_read_f(cgnsInd, cgnsBase, nZone, i, &
+                                        myRangeNonMatch(1, 1, nNonMatch), &
+                                        Integer, donorData, ierr)
+                    if (ierr /= CG_OK) &
+                        call terminate("countConnectivities", &
+                                       "Something wrong when calling cg_conn_read_f")
 
-       case (Abutting1to1)
+                    deallocate (donorData, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("countConnectivities", &
+                                       "Deallocation failure for donorData")
+                else
 
-          if(location       == Vertex       .and. &
-               ptsetType      == PointRange   .and. &
-               donorZoneType  == Structured   .and. &
-               donorPtsetType == PointListDonor) then
+                    ! CGNS format not supported.
 
-             n1to1General = n1to1General + 1
+                    write (errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), ", connectivity ", &
+                        trim(connectName), ": No support for this format of a non-matching abutting connectivity"
+                    if (myID == 0) &
+                        call terminate("countConnectivities", errorMessage)
+                    call mpi_barrier(ADflow_comm_world, ierr)
 
-          else
+                end if
 
-             ! CGNS format not supported.
+                !============================================================
 
-             write(errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName),", connectivity ", &
-               trim(connectName), ": No support for this format of an abutting 1 to 1 connectivity"
+            case default
 
-             if(myID == 0) &
-                  call terminate("countConnectivities", errorMessage)
-             call mpi_barrier(ADflow_comm_world, ierr)
+                call terminate("countConnectivities", &
+                               "Unsupportted general connectivity found")
 
-          endif
+            end select
 
-          !============================================================
+        end do
 
-       case (Abutting)
+        ! Update the value of n1to1 with the number stored in the
+        ! general connectivities.
 
-          if(location       == Vertex       .and. &
-               ptsetType      == PointRange   .and. &
-               donorZoneType  == Structured   .and. &
-               donorPtsetType == PointListDonor) then
+        n1to1 = n1to1 + n1to1General
 
-             nNonMatch = nNonMatch + 1
+        ! Memory allocation for the 1 to 1 connectivities.
 
-             connIDNonMatch(nNonMatch) = i
+        allocate (cgnsDoms(nZone)%conn1to1(n1to1), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("countConnectivities", &
+             "Memory allocation failure for conn1to1 and &
+             &connOver")
+        !
+        !       For the non-matching abutting subfaces some more information
+        !       needs to be extracted. The reason is that a subface abuts
+        !       multiple blocks and in CGNS this info is stored in multiple
+        !       connectivities. However it is a lot easier to store that info
+        !       together. That's why the non-abbuting subfaces must be sorted
+        !       in increasing order to extract this information.
+        !
+        ! Allocate the memory for subfaceNonMatch and copy the data
+        ! from myRangeNonMatch and connIDNonMatch. Release the memory
+        ! of these two arrays afterwards. Also allocate the memory
+        ! for multSubfaces, which is needed later on to determine the
+        ! multiplicity of the subfaces.
 
-             ! Allocate the memory for donorData and read the info.
-             ! Release the memory of donorData after the read, because
-             ! only the subface range is needed at the moment.
+        allocate (subfaceNonMatch(nNonMatch), &
+                  multSubfaces(nNonMatch), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("countConnectivities", &
+                           "Memory allocation failure for subfaceNonMatch")
+
+        do i = 1, nNonMatch
+            subfaceNonMatch(i)%iBeg = min(myRangeNonMatch(1, 1, i), &
+                                          myRangeNonMatch(1, 2, i))
+            subfaceNonMatch(i)%jBeg = min(myRangeNonMatch(2, 1, i), &
+                                          myRangeNonMatch(2, 2, i))
+            subfaceNonMatch(i)%kBeg = min(myRangeNonMatch(3, 1, i), &
+                                          myRangeNonMatch(3, 2, i))
+
+            subfaceNonMatch(i)%iEnd = max(myRangeNonMatch(1, 1, i), &
+                                          myRangeNonMatch(1, 2, i))
+            subfaceNonMatch(i)%jEnd = max(myRangeNonMatch(2, 1, i), &
+                                          myRangeNonMatch(2, 2, i))
+            subfaceNonMatch(i)%kEnd = max(myRangeNonMatch(3, 1, i), &
+                                          myRangeNonMatch(3, 2, i))
+
+            subfaceNonMatch(i)%connID = connIDNonMatch(i)
+        end do
+
+        deallocate (connIDNonMatch, myRangeNonMatch, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("countConnectivities", &
+             "Deallocation failure for connIDNonMatch and &
+             &myRangeNonMatch")
+
+        ! Sort subfaceNonMatch in increasing order and determine the
+        ! number of different subfaces as well as their multiplicity.
+
+        call qsortSubfaceNonMatchType(subfaceNonMatch, nNonMatch)
+
+        nn = min(nNonMatch, 1_intType)
+        multSubfaces = 1
+
+        do i = 2, nNonMatch
+            mm = i - 1
+            if (subfaceNonMatch(i)%iBeg == subfaceNonMatch(mm)%iBeg .and. &
+                subfaceNonMatch(i)%jBeg == subfaceNonMatch(mm)%jBeg .and. &
+                subfaceNonMatch(i)%kBeg == subfaceNonMatch(mm)%kBeg .and. &
+                subfaceNonMatch(i)%iEnd == subfaceNonMatch(mm)%iEnd .and. &
+                subfaceNonMatch(i)%jEnd == subfaceNonMatch(mm)%jEnd .and. &
+                subfaceNonMatch(i)%kEnd == subfaceNonMatch(mm)%kEnd) then
+                multSubfaces(nn) = multSubfaces(nn) + 1
+            else
+                nn = nn + 1
+            end if
+        end do
+
+        ! Store the number of non-matching connectivities in nNonMatch and
+        ! allocate the memory for the non-matching abutting connectivities
+
+        nNonMatch = nn
+        allocate (cgnsDoms(nZone)%connNonMatchAbutting(nn), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("countConnectivities", &
+             "Memory allocation failure for &
+             &connNonMatchAbutting")
+
+        connNonMatch => cgnsDoms(nZone)%connNonMatchAbutting
+
+        ! Loop over the number of non-matching connectivities to copy
+        ! the info from subfaceNonMatch.
+
+        nn = 0
+        do i = 1, nNonMatch
+
+            ! Set the value for the number of donor blocks and allocate
+            ! the memory for connectNames, donorNames, donorBlocks and
+            ! donorFaceIDs.
+
+            mm = multSubfaces(i)
+            connNonMatch(i)%nDonorBlocks = mm
+
+            allocate (connNonMatch(i)%connectNames(mm), &
+                      connNonMatch(i)%donorNames(mm), &
+                      connNonMatch(i)%donorBlocks(mm), &
+                      connNonMatch(i)%donorFaceIDs(mm), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("countConnectivities", &
+                 "Memory allocation failure for connectNames, &
+                 &donorNames, donorBlocks and donorFaceIDs.")
+
+            ! Loop over the number of donor blocks and copy for the moment
+            ! the connectivity ID in donorBlocks.
+
+            do mm = 1, connNonMatch(i)%nDonorBlocks
+                nn = nn + 1
+                connNonMatch(i)%donorBlocks(mm) = subfaceNonMatch(nn)%connID
+            end do
+
+            ! Copy the subface range.
+
+            connNonMatch(i)%iBeg = subfaceNonMatch(nn)%iBeg
+            connNonMatch(i)%jBeg = subfaceNonMatch(nn)%jBeg
+            connNonMatch(i)%kBeg = subfaceNonMatch(nn)%kBeg
+
+            connNonMatch(i)%iEnd = subfaceNonMatch(nn)%iEnd
+            connNonMatch(i)%jEnd = subfaceNonMatch(nn)%jEnd
+            connNonMatch(i)%kEnd = subfaceNonMatch(nn)%kEnd
+
+        end do
+
+        ! Release the memory of subfaceNonMatch again.
+
+        deallocate (subfaceNonMatch, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("countConnectivities", &
+                           "Deallocation failure for subfaceNonMatch")
+        !
+        !       Store the number of connectivities in cgnsDoms(nZone).
+        !
+        cgnsDoms(nZone)%n1to1 = n1to1
+        cgnsDoms(nZone)%n1to1General = n1to1General
+        cgnsDoms(nZone)%nNonMatchAbutting = nNonMatch
+
+    end subroutine countConnectivities
+
+    subroutine read1to1Conn(cgnsInd, cgnsBase, nZone)
+        !
+        !       read1to1Conn reads the 1 to 1 block to block, i.e.
+        !       continuous grid lines across block boundaries, connectivities
+        !       for the given zone/block.
+        !
+        use constants
+        use su_cgns
+        use cgnsGrid, only: cgnsDoms, cgnsNDom, cgns1to1ConnType
+        use communication, only: adflow_comm_world, myID
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase, nZone
+        !
+        !      Local variables
+        !
+        integer :: cgnsN1to1
+        integer :: i, ierr
+
+        integer(kind=cgsize_t), dimension(3, 2) :: zoneRange, donorRange
+        integer, dimension(3) :: transform
+
+        character(len=maxCGNSNameLen) :: connectName
+
+        type(cgns1to1ConnType), pointer, dimension(:) :: conn1to1
+        real(kind=realType), dimension(3) :: rotCenter, rotAngles
+        real(kind=realType), dimension(3) :: tlation
+
+        ! Determine the number of 1 to 1 connectivities stored in the
+        ! CGNS file for this zone.
+
+        cgnsN1to1 = cgnsDoms(nZone)%n1to1 - cgnsDoms(nZone)%n1to1General
+
+        ! Loop over the number of 1 to 1 connectivities stored in the
+        ! grid file.
+
+        do i = 1, cgnsN1to1
+
+            ! Read the 1 to 1 connectivity info from the cgns file.
+
+            call cg_1to1_read_f(cgnsInd, cgnsBase, nZone, i, &
+                                cgnsDoms(nZone)%conn1to1(i)%connectName, &
+                                cgnsDoms(nZone)%conn1to1(i)%donorName, &
+                                zoneRange, donorRange, transform, ierr)
+            if (ierr /= CG_OK) &
+                call terminate("read1to1Conn", &
+                               "Something wrong when calling cg_1to1_read_f")
+
+            ! Store the zone range and donor range in cgnsDoms.
+
+            cgnsDoms(nZone)%conn1to1(i)%iBeg = zoneRange(1, 1)
+            cgnsDoms(nZone)%conn1to1(i)%jBeg = zoneRange(2, 1)
+            cgnsDoms(nZone)%conn1to1(i)%kBeg = zoneRange(3, 1)
+
+            cgnsDoms(nZone)%conn1to1(i)%iEnd = zoneRange(1, 2)
+            cgnsDoms(nZone)%conn1to1(i)%jEnd = zoneRange(2, 2)
+            cgnsDoms(nZone)%conn1to1(i)%kEnd = zoneRange(3, 2)
+
+            cgnsDoms(nZone)%conn1to1(i)%diBeg = donorRange(1, 1)
+            cgnsDoms(nZone)%conn1to1(i)%djBeg = donorRange(2, 1)
+            cgnsDoms(nZone)%conn1to1(i)%dkBeg = donorRange(3, 1)
+
+            cgnsDoms(nZone)%conn1to1(i)%diEnd = donorRange(1, 2)
+            cgnsDoms(nZone)%conn1to1(i)%djEnd = donorRange(2, 2)
+            cgnsDoms(nZone)%conn1to1(i)%dkEnd = donorRange(3, 2)
+
+            ! Check the transformation matrices between this zone and the
+            ! donor zone and store it in l1, L2 and l3.
+
+            call checkTransform(transform, nZone, i, .true.)
+
+            cgnsDoms(nZone)%conn1to1(i)%l1 = transform(1)
+            cgnsDoms(nZone)%conn1to1(i)%l2 = transform(2)
+            cgnsDoms(nZone)%conn1to1(i)%l3 = transform(3)
+
+            ! Subface is a normal boundary. Set periodic to .false. and
+            ! initialize the periodic data to zero to avoid possible
+            ! problems due to uninitialized data.
+
+            cgnsDoms(nZone)%conn1to1(i)%periodic = .false.
+
+            cgnsDoms(nZone)%conn1to1(i)%rotationCenter = zero
+            cgnsDoms(nZone)%conn1to1(i)%rotationAngles = zero
+            cgnsDoms(nZone)%conn1to1(i)%translation = zero
+
+            call cg_1to1_periodic_read_f(cgnsInd, cgnsBase, nZone, i, &
+                           real(rotCenter, cgnsPerType), real(rotAngles, cgnsPerType), real(tlation, cgnsPerType), ierr)
+            if (ierr == CG_OK) then
+                call readPeriodicSubface1to1(cgnsInd, cgnsBase, nZone, i, &
+                                             cgnsDoms(nZone)%conn1to1(i)%connectName, &
+                                             cgnsDoms(nZone)%conn1to1(i)%periodic, &
+                                             cgnsDoms(nZone)%conn1to1(i)%rotationCenter, &
+                                             cgnsDoms(nZone)%conn1to1(i)%rotationAngles, &
+                                             cgnsDoms(nZone)%conn1to1(i)%translation)
+            end if
+
+        end do
+
+    end subroutine read1to1Conn
+    subroutine checkTransform(transform, nZone, n1to1, printWarning)
+        !
+        !       checkTransform checks the transformation matrix between this
+        !       zone and the donor for the given subrange. In case an error is
+        !       found it is tried to correct this.
+        !
+        use constants
+        use cgnsGrid, only: cgnsDoms
+        use communication, only: myID, adflow_comm_world
+        use utils, only: delta, terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer, intent(in) :: nZone, n1to1
+        integer, dimension(3), intent(inout) :: transform
+        logical, intent(in) :: printWarning
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-             allocate(donorData(3,ndataDonor), stat=ierr)
-             if(ierr /= 0)                           &
-                  call terminate("countConnectivities", &
-                  "Memory allocation failure for donorData")
+        integer(kind=intType) :: nDirFace, nDirDonor
+        integer(kind=intType) :: sumTransform
+        integer(kind=intType) :: l1, L2, l3
+
+        integer(kind=intType), dimension(3) :: haloDir, donorDir
+        integer(kind=intType), dimension(3, 2) :: zoneRange, donorRange
+        integer(kind=intType), dimension(3, 3) :: trMat
+
+        character(len=maxCGNSNameLen) :: zoneName, connectName
+        character(len=2*maxStringLen) :: errorMessage
+        !
+        ! Copy the zoneName and connectName, just for readability later.
+
+        zoneName = cgnsDoms(nZone)%zoneName
+        connectName = cgnsDoms(nZone)%conn1to1(n1to1)%connectName
+
+        ! Copy the zone and donor range into zoneRange and donorRange.
+
+        zoneRange(1, 1) = cgnsDoms(nZone)%conn1to1(n1to1)%iBeg
+        zoneRange(2, 1) = cgnsDoms(nZone)%conn1to1(n1to1)%jBeg
+        zoneRange(3, 1) = cgnsDoms(nZone)%conn1to1(n1to1)%kBeg
+
+        zoneRange(1, 2) = cgnsDoms(nZone)%conn1to1(n1to1)%iEnd
+        zoneRange(2, 2) = cgnsDoms(nZone)%conn1to1(n1to1)%jEnd
+        zoneRange(3, 2) = cgnsDoms(nZone)%conn1to1(n1to1)%kEnd
+
+        donorRange(1, 1) = cgnsDoms(nZone)%conn1to1(n1to1)%diBeg
+        donorRange(2, 1) = cgnsDoms(nZone)%conn1to1(n1to1)%djBeg
+        donorRange(3, 1) = cgnsDoms(nZone)%conn1to1(n1to1)%dkEnd
+
+        donorRange(1, 2) = cgnsDoms(nZone)%conn1to1(n1to1)%diEnd
+        donorRange(2, 2) = cgnsDoms(nZone)%conn1to1(n1to1)%djEnd
+        donorRange(3, 2) = cgnsDoms(nZone)%conn1to1(n1to1)%dkEnd
+
+        ! Determine the normal direction for the subface and do a trivial
+        ! check to see if there is one.
+
+        do nDirFace = 1, 3
+            if (zoneRange(nDirFace, 1) == zoneRange(nDirFace, 2)) exit
+        end do
+
+        if (nDirFace > 3) then
+            if (myID == 0) then
+                write (errorMessage, strings) "1 to 1 subface ", trim(connectName), " of zone ", trim(zoneName), &
+                    ": No constant index found"
+                call terminate("checkTransform", errorMessage)
+            end if
+
+            ! Make sure that other processors wait until they are killed.
+
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        ! Idem for the donor subface.
+
+        do nDirDonor = 1, 3
+            if (donorRange(nDirDonor, 1) == donorRange(nDirDonor, 2)) exit
+        end do
+
+        if (nDirDonor > 3) then
+            if (myID == 0) then
+                write (errorMessage, strings) "1 to 1 subface ", trim(connectName), " of zone ", trim(zoneName), &
+                    ": No constant index found for donor"
+                call terminate("checkTransform", errorMessage)
+            end if
+
+            ! Make sure that other processors wait until they are killed.
+
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end if
+
+        ! Check if the sum of the absolute values of transform equals 6.
+        ! If not, assume that the normal direction is not set correctly.
+
+        sumTransform = abs(transform(1)) + abs(transform(2)) &
+                       + abs(transform(3))
+        if (sumTransform /= 6) then
+
+            ! Change the normal direction of transform and check the
+            ! sum again.
+
+            transform(nDirFace) = nDirDonor
+            sumTransform = abs(transform(1)) + abs(transform(2)) &
+                           + abs(transform(3))
+
+            if (sumTransform /= 6) then
 
-             call cg_conn_read_f(cgnsInd, cgnsBase, nZone, i,    &
-                  myRangeNonMatch(1,1,nNonMatch), &
-                  Integer, donorData, ierr)
-             if(ierr /= CG_OK)                      &
-                  call terminate("countConnectivities", &
-                  "Something wrong when calling cg_conn_read_f")
+                ! Something seriously wrong. I cannot repair this.
 
-             deallocate(donorData, stat=ierr)
-             if(ierr /= 0)                           &
-                  call terminate("countConnectivities", &
-                  "Deallocation failure for donorData")
-          else
+                if (myID == 0) then
+                    write (errorMessage, strings) "1 to 1 subface ", trim(connectName), " of zone ", trim(zoneName), &
+                        ": Something seriously wrong with the transformation matrix"
+                    call terminate("checkTransform", errorMessage)
+                end if
 
-             ! CGNS format not supported.
+                ! Make sure that other processors wait until they are killed.
 
-             write(errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), ", connectivity ", &
-               trim(connectName), ": No support for this format of a non-matching abutting connectivity"
-             if(myID == 0) &
-                  call terminate("countConnectivities", errorMessage)
-             call mpi_barrier(ADflow_comm_world, ierr)
+                call mpi_barrier(ADflow_comm_world, ierr)
+            else
+                ! Repair successful, although the orientation might be wrong.
+                ! This will be checked later. Anyway print a warning message
+                ! if desired.
+
+                if (myID == 0 .and. printWarning) then
+                    print "(a)", "#"
+                    print "(a)", "#                          Warning"
+                    write (errorMessage, strings) "1 to 1 subface ", trim(connectName), " of zone ", trim(zoneName), &
+                        ": Normal component of the transformation matrix successfully corrected."
+                    print "(a)", "#"
+                end if
+            end if
+        end if
 
-          endif
+        ! Create the halo vector for the current zone. This vector is
+        ! pointing outwards, i.e. in direction of the donor block.
 
-          !============================================================
+        haloDir = 0
+        haloDir(nDirFace) = 1
+        if (zoneRange(nDirFace, 1) == 1) haloDir(nDirFace) = -1
 
-       case default
+        ! Idem for the donor. Also this vector points from the current
+        ! block to the donor block, albeit in donor block coordinates.
 
-          call terminate("countConnectivities", &
-               "Unsupportted general connectivity found")
+        donorDir = 0
+        donorDir(nDirDonor) = -1
+        if (donorRange(nDirDonor, 1) == 1) donorDir(nDirDonor) = 1
 
-       end select
+        ! Determine the full transformation matrix.
 
-    enddo
+        l1 = transform(1)
+        L2 = transform(2)
+        l3 = transform(3)
 
-    ! Update the value of n1to1 with the number stored in the
-    ! general connectivities.
+        trMat(1, 1) = sign(1_intType, l1) * delta(l1, 1_intType)
+        trMat(2, 1) = sign(1_intType, l1) * delta(l1, 2_intType)
+        trMat(3, 1) = sign(1_intType, l1) * delta(l1, 3_intType)
 
-    n1to1 = n1to1 + n1to1General
+        trMat(1, 2) = sign(1_intType, l2) * delta(l2, 1_intType)
+        trMat(2, 2) = sign(1_intType, l2) * delta(l2, 2_intType)
+        trMat(3, 2) = sign(1_intType, l2) * delta(l2, 3_intType)
 
-    ! Memory allocation for the 1 to 1 connectivities.
+        trMat(1, 3) = sign(1_intType, l3) * delta(l3, 1_intType)
+        trMat(2, 3) = sign(1_intType, l3) * delta(l3, 2_intType)
+        trMat(3, 3) = sign(1_intType, l3) * delta(l3, 3_intType)
 
-    allocate(cgnsDoms(nZone)%conn1to1(n1to1), stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("countConnectivities", &
-         "Memory allocation failure for conn1to1 and &
-         &connOver")
-    !
-    !       For the non-matching abutting subfaces some more information
-    !       needs to be extracted. The reason is that a subface abuts
-    !       multiple blocks and in CGNS this info is stored in multiple
-    !       connectivities. However it is a lot easier to store that info
-    !       together. That's why the non-abbuting subfaces must be sorted
-    !       in increasing order to extract this information.
-    !
-    ! Allocate the memory for subfaceNonMatch and copy the data
-    ! from myRangeNonMatch and connIDNonMatch. Release the memory
-    ! of these two arrays afterwards. Also allocate the memory
-    ! for multSubfaces, which is needed later on to determine the
-    ! multiplicity of the subfaces.
-
-    allocate(subfaceNonMatch(nNonMatch), &
-         multSubfaces(nNonMatch), stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("countConnectivities", &
-         "Memory allocation failure for subfaceNonMatch")
-
-    do i=1,nNonMatch
-       subfaceNonMatch(i)%iBeg = min(myRangeNonMatch(1,1,i), &
-            myRangeNonMatch(1,2,i))
-       subfaceNonMatch(i)%jBeg = min(myRangeNonMatch(2,1,i), &
-            myRangeNonMatch(2,2,i))
-       subfaceNonMatch(i)%kBeg = min(myRangeNonMatch(3,1,i), &
-            myRangeNonMatch(3,2,i))
-
-       subfaceNonMatch(i)%iEnd = max(myRangeNonMatch(1,1,i), &
-            myRangeNonMatch(1,2,i))
-       subfaceNonMatch(i)%jEnd = max(myRangeNonMatch(2,1,i), &
-            myRangeNonMatch(2,2,i))
-       subfaceNonMatch(i)%kEnd = max(myRangeNonMatch(3,1,i), &
-            myRangeNonMatch(3,2,i))
-
-       subfaceNonMatch(i)%connID = connIDNonMatch(i)
-    enddo
-
-    deallocate(connIDNonMatch, myRangeNonMatch, stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("countConnectivities", &
-         "Deallocation failure for connIDNonMatch and &
-         &myRangeNonMatch")
-
-    ! Sort subfaceNonMatch in increasing order and determine the
-    ! number of different subfaces as well as their multiplicity.
-
-    call qsortSubfaceNonMatchType(subfaceNonMatch, nNonMatch)
-
-    nn           = min(nNonMatch, 1_intType)
-    multSubfaces = 1
-
-    do i=2,nNonMatch
-       mm = i - 1
-       if(subfaceNonMatch(i)%iBeg == subfaceNonMatch(mm)%iBeg .and. &
-            subfaceNonMatch(i)%jBeg == subfaceNonMatch(mm)%jBeg .and. &
-            subfaceNonMatch(i)%kBeg == subfaceNonMatch(mm)%kBeg .and. &
-            subfaceNonMatch(i)%iEnd == subfaceNonMatch(mm)%iEnd .and. &
-            subfaceNonMatch(i)%jEnd == subfaceNonMatch(mm)%jEnd .and. &
-            subfaceNonMatch(i)%kEnd == subfaceNonMatch(mm)%kEnd) then
-          multSubfaces(nn) = multSubfaces(nn) + 1
-       else
-          nn = nn + 1
-       endif
-    enddo
-
-    ! Store the number of non-matching connectivities in nNonMatch and
-    ! allocate the memory for the non-matching abutting connectivities
-
-    nNonMatch = nn
-    allocate(cgnsDoms(nZone)%connNonMatchAbutting(nn), stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("countConnectivities", &
-         "Memory allocation failure for &
-         &connNonMatchAbutting")
-
-    connNonMatch => cgnsDoms(nZone)%connNonMatchAbutting
-
-    ! Loop over the number of non-matching connectivities to copy
-    ! the info from subfaceNonMatch.
-
-    nn = 0
-    do i=1,nNonMatch
-
-       ! Set the value for the number of donor blocks and allocate
-       ! the memory for connectNames, donorNames, donorBlocks and
-       ! donorFaceIDs.
-
-       mm = multSubfaces(i)
-       connNonMatch(i)%nDonorBlocks = mm
-
-       allocate(connNonMatch(i)%connectNames(mm), &
-            connNonMatch(i)%donorNames(mm),   &
-            connNonMatch(i)%donorBlocks(mm),  &
-            connNonMatch(i)%donorFaceIDs(mm), stat=ierr)
-       if(ierr /= 0)                           &
-            call terminate("countConnectivities", &
-            "Memory allocation failure for connectNames, &
-            &donorNames, donorBlocks and donorFaceIDs.")
-
-       ! Loop over the number of donor blocks and copy for the moment
-       ! the connectivity ID in donorBlocks.
-
-       do mm=1,connNonMatch(i)%nDonorBlocks
-          nn = nn + 1
-          connNonMatch(i)%donorBlocks(mm) = subfaceNonMatch(nn)%connID
-       enddo
-
-       ! Copy the subface range.
-
-       connNonMatch(i)%iBeg = subfaceNonMatch(nn)%iBeg
-       connNonMatch(i)%jBeg = subfaceNonMatch(nn)%jBeg
-       connNonMatch(i)%kBeg = subfaceNonMatch(nn)%kBeg
-
-       connNonMatch(i)%iEnd = subfaceNonMatch(nn)%iEnd
-       connNonMatch(i)%jEnd = subfaceNonMatch(nn)%jEnd
-       connNonMatch(i)%kEnd = subfaceNonMatch(nn)%kEnd
-
-    enddo
+        ! Apply the transformation matrix to haloDir.
 
-    ! Release the memory of subfaceNonMatch again.
-
-    deallocate(subfaceNonMatch, stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("countConnectivities", &
-         "Deallocation failure for subfaceNonMatch")
-    !
-    !       Store the number of connectivities in cgnsDoms(nZone).
-    !
-    cgnsDoms(nZone)%n1to1             = n1to1
-    cgnsDoms(nZone)%n1to1General      = n1to1General
-    cgnsDoms(nZone)%nNonMatchAbutting = nNonMatch
-
-  end subroutine countConnectivities
-
-  subroutine read1to1Conn(cgnsInd, cgnsBase, nZone)
-    !
-    !       read1to1Conn reads the 1 to 1 block to block, i.e.
-    !       continuous grid lines across block boundaries, connectivities
-    !       for the given zone/block.
-    !
-    use constants
-    use su_cgns
-    use cgnsGrid, only : cgnsDoms, cgnsNDom, cgns1to1ConnType
-    use communication, only : adflow_comm_world, myID
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsInd, cgnsBase, nZone
-    !
-    !      Local variables
-    !
-    integer :: cgnsN1to1
-    integer :: i, ierr
-
-    integer(kind=cgsize_t), dimension(3,2) :: zoneRange, donorRange
-    integer, dimension(3)   :: transform
-
-    character(len=maxCGNSNameLen) :: connectName
-
-    type(cgns1to1ConnType),    pointer, dimension(:) :: conn1to1
-    real(kind=realType), dimension(3) :: rotCenter, rotAngles
-    real(kind=realType), dimension(3) :: tlation
-
-    ! Determine the number of 1 to 1 connectivities stored in the
-    ! CGNS file for this zone.
-
-    cgnsN1to1 = cgnsDoms(nZone)%n1to1 - cgnsDoms(nZone)%n1to1General
-
-    ! Loop over the number of 1 to 1 connectivities stored in the
-    ! grid file.
-
-    do i=1,cgnsN1to1
-
-       ! Read the 1 to 1 connectivity info from the cgns file.
-
-       call cg_1to1_read_f(cgnsInd, cgnsBase, nZone, i,             &
-            cgnsDoms(nZone)%conn1to1(i)%connectName, &
-            cgnsDoms(nZone)%conn1to1(i)%donorName,   &
-            zoneRange, donorRange, transform, ierr)
-       if(ierr /= CG_OK)                 &
-            call terminate("read1to1Conn", &
-            "Something wrong when calling cg_1to1_read_f")
-
-       ! Store the zone range and donor range in cgnsDoms.
-
-       cgnsDoms(nZone)%conn1to1(i)%iBeg = zoneRange(1,1)
-       cgnsDoms(nZone)%conn1to1(i)%jBeg = zoneRange(2,1)
-       cgnsDoms(nZone)%conn1to1(i)%kBeg = zoneRange(3,1)
-
-       cgnsDoms(nZone)%conn1to1(i)%iEnd = zoneRange(1,2)
-       cgnsDoms(nZone)%conn1to1(i)%jEnd = zoneRange(2,2)
-       cgnsDoms(nZone)%conn1to1(i)%kEnd = zoneRange(3,2)
-
-       cgnsDoms(nZone)%conn1to1(i)%diBeg = donorRange(1,1)
-       cgnsDoms(nZone)%conn1to1(i)%djBeg = donorRange(2,1)
-       cgnsDoms(nZone)%conn1to1(i)%dkBeg = donorRange(3,1)
-
-       cgnsDoms(nZone)%conn1to1(i)%diEnd = donorRange(1,2)
-       cgnsDoms(nZone)%conn1to1(i)%djEnd = donorRange(2,2)
-       cgnsDoms(nZone)%conn1to1(i)%dkEnd = donorRange(3,2)
-
-       ! Check the transformation matrices between this zone and the
-       ! donor zone and store it in l1, L2 and l3.
-
-       call checkTransform(transform, nZone, i, .true.)
-
-       cgnsDoms(nZone)%conn1to1(i)%l1 = transform(1)
-       cgnsDoms(nZone)%conn1to1(i)%l2 = transform(2)
-       cgnsDoms(nZone)%conn1to1(i)%l3 = transform(3)
-
-       ! Subface is a normal boundary. Set periodic to .false. and
-       ! initialize the periodic data to zero to avoid possible
-       ! problems due to uninitialized data.
-
-       cgnsDoms(nZone)%conn1to1(i)%periodic = .false.
-
-       cgnsDoms(nZone)%conn1to1(i)%rotationCenter = zero
-       cgnsDoms(nZone)%conn1to1(i)%rotationAngles = zero
-       cgnsDoms(nZone)%conn1to1(i)%translation    = zero
-
-
-       call cg_1to1_periodic_read_f(cgnsInd, cgnsBase, nZone, i, &
-            real(rotCenter,cgnsPerType), real(rotAngles,cgnsPerType), real(tlation,cgnsPerType), ierr)
-       if(ierr == CG_OK)then
-          call readPeriodicSubface1to1(cgnsInd, cgnsBase, nZone, i,    &
-               cgnsDoms(nZone)%conn1to1(i)%connectName,                       &
-               cgnsDoms(nZone)%conn1to1(i)%periodic,          &
-               cgnsDoms(nZone)%conn1to1(i)%rotationCenter,    &
-               cgnsDoms(nZone)%conn1to1(i)%rotationAngles,    &
-               cgnsDoms(nZone)%conn1to1(i)%translation)
-       endif
-
-    enddo
-
-
-
-  end subroutine read1to1Conn
-  subroutine checkTransform(transform, nZone, n1to1, printWarning)
-    !
-    !       checkTransform checks the transformation matrix between this
-    !       zone and the donor for the given subrange. In case an error is
-    !       found it is tried to correct this.
-    !
-    use constants
-    use cgnsGrid, only : cgnsDoms
-    use communication, only : myID, adflow_comm_world
-    use utils, only : delta, terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer, intent(in)                  :: nZone, n1to1
-    integer, dimension(3), intent(inout) :: transform
-    logical, intent(in)                  :: printWarning
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        l1 = haloDir(1)
+        L2 = haloDir(2)
+        l3 = haloDir(3)
 
-    integer(kind=intType) :: nDirFace, nDirDonor
-    integer(kind=intType) :: sumTransform
-    integer(kind=intType) :: l1, L2, l3
+        haloDir(1) = trMat(1, 1) * l1 + trMat(1, 2) * l2 + trMat(1, 3) * l3
+        haloDir(2) = trMat(2, 1) * l1 + trMat(2, 2) * l2 + trMat(2, 3) * l3
+        haloDir(3) = trMat(3, 1) * l1 + trMat(3, 2) * l2 + trMat(3, 3) * l3
 
-    integer(kind=intType), dimension(3) :: haloDir, donorDir
-    integer(kind=intType), dimension(3,2) :: zoneRange, donorRange
-    integer(kind=intType), dimension(3,3) :: trMat
+        ! If the transformation matrix is correct haloDir == donorDir.
+        ! If this is not the case, there are two possibilities. Either
+        ! the directions are just reversed, which means that the
+        ! corresponding element of transform must be reversed, or they
+        ! are really different. In the latter case it cannot be
+        ! corrected here and the grid file must be adapted.
 
-    character(len=maxCGNSNameLen) :: zoneName, connectName
-    character(len=2*maxStringLen)  :: errorMessage
-    !
-    ! Copy the zoneName and connectName, just for readability later.
-
-    zoneName    = cgnsDoms(nZone)%zoneName
-    connectName = cgnsDoms(nZone)%conn1to1(n1to1)%connectName
-
-    ! Copy the zone and donor range into zoneRange and donorRange.
-
-    zoneRange(1,1) = cgnsDoms(nZone)%conn1to1(n1to1)%iBeg
-    zoneRange(2,1) = cgnsDoms(nZone)%conn1to1(n1to1)%jBeg
-    zoneRange(3,1) = cgnsDoms(nZone)%conn1to1(n1to1)%kBeg
-
-    zoneRange(1,2) = cgnsDoms(nZone)%conn1to1(n1to1)%iEnd
-    zoneRange(2,2) = cgnsDoms(nZone)%conn1to1(n1to1)%jEnd
-    zoneRange(3,2) = cgnsDoms(nZone)%conn1to1(n1to1)%kEnd
-
-    donorRange(1,1) = cgnsDoms(nZone)%conn1to1(n1to1)%diBeg
-    donorRange(2,1) = cgnsDoms(nZone)%conn1to1(n1to1)%djBeg
-    donorRange(3,1) = cgnsDoms(nZone)%conn1to1(n1to1)%dkEnd
-
-    donorRange(1,2) = cgnsDoms(nZone)%conn1to1(n1to1)%diEnd
-    donorRange(2,2) = cgnsDoms(nZone)%conn1to1(n1to1)%djEnd
-    donorRange(3,2) = cgnsDoms(nZone)%conn1to1(n1to1)%dkEnd
-
-    ! Determine the normal direction for the subface and do a trivial
-    ! check to see if there is one.
+        if (haloDir(nDirDonor) == 0) then
 
-    do nDirFace=1,3
-       if(zoneRange(nDirFace,1) == zoneRange(nDirFace,2)) exit
-    enddo
+            ! Something seriously wrong. Exit the program.
 
-    if(nDirFace > 3) then
-       if(myID == 0) then
-          write(errorMessage, strings) "1 to 1 subface ", trim(connectName)," of zone ", trim(zoneName), &
-               ": No constant index found"
-          call terminate("checkTransform", errorMessage)
-       endif
-
-       ! Make sure that other processors wait until they are killed.
-
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-
-    ! Idem for the donor subface.
-
-    do nDirDonor=1,3
-       if(donorRange(nDirDonor,1) == donorRange(nDirDonor,2)) exit
-    enddo
-
-    if(nDirDonor > 3) then
-       if(myID == 0) then
-          write(errorMessage, strings) "1 to 1 subface ", trim(connectName)," of zone ", trim(zoneName), &
-          ": No constant index found for donor"
-          call terminate("checkTransform", errorMessage)
-       endif
-
-       ! Make sure that other processors wait until they are killed.
-
-       call mpi_barrier(ADflow_comm_world, ierr)
-    endif
-
-    ! Check if the sum of the absolute values of transform equals 6.
-    ! If not, assume that the normal direction is not set correctly.
+            if (myID == 0) then
+                write (errorMessage, strings) "1 to 1 subface ", trim(connectName), " of zone ", trim(zoneName), &
+                    ": Something seriously wrong with the transformation matrix"
+                call terminate("checkTransform", errorMessage)
+            end if
 
-    sumTransform = abs(transform(1)) + abs(transform(2)) &
-         + abs(transform(3))
-    if(sumTransform /= 6) then
-
-       ! Change the normal direction of transform and check the
-       ! sum again.
+            ! Make sure that other processors wait until they are killed.
 
-       transform(nDirFace) = nDirDonor
-       sumTransform = abs(transform(1)) + abs(transform(2)) &
-            + abs(transform(3))
+            call mpi_barrier(ADflow_comm_world, ierr)
 
-       if(sumTransform /= 6) then
+        else if (haloDir(nDirDonor) * donorDir(nDirDonor) < 0) then
 
-          ! Something seriously wrong. I cannot repair this.
+            ! Simply reverse the sign of the corresponding entry in
+            ! transform. Processor 0 prints a warning message if desired.
 
-          if(myID == 0) then
-             write(errorMessage, strings) "1 to 1 subface ", trim(connectName)," of zone ", trim(zoneName), &
-             ": Something seriously wrong with the transformation matrix"
-             call terminate("checkTransform", errorMessage)
-          endif
+            transform(nDirFace) = -transform(nDirFace)
 
-          ! Make sure that other processors wait until they are killed.
+            if (myID == 0 .and. printWarning) then
+                print "(a)", "#"
+                print "(a)", "#                            Warning"
+                write (errorMessage, strings) "1 to 1 subface ", trim(connectName), " of zone ", trim(zoneName), &
+                    ": Normal component of the transformation matrix reversed"
+                print "(a)", "#"
+            end if
+        end if
+
+    end subroutine checkTransform
+    subroutine readGeneralConn(cgnsInd, cgnsBase, nZone)
+        !
+        !       readGeneralConn reads and converts the cgns general
+        !       connectivities.  Supported connectivites are 1-to-1 and
+        !       non-matching abutting.
+        !
+        use constants
+        use su_cgns
+        use cgnsGrid, only: cgnsDoms, cgnsNDom, cgns1to1ConnType, &
+                            cgnsNOnMatchAbuttingConnType
+        use communication, only: myid, adflow_comm_world
+        use utils, only: terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase, nZone
+
+        !
+        !      Local variables.
+        !
+        character(len=maxStringLen) :: errorMessage
+
+        integer :: i, j, nn, nGeneral, n1to1, ierr
+        integer :: location, connectType, ptsetType
+        integer(kind=cgsize_t) :: npnts
+        integer :: donorZoneType, donorPtsetType, donorDatatype
+        integer :: id, jj
+        integer :: nArrays, dataType, dataDim
+        integer(kind=cgsize_t) :: nDataDonor
+        integer, dimension(2) :: dimVector
+        integer, dimension(3) :: ii, transform
+        integer(kind=cgsize_t), dimension(3, 2) :: myRange
+
+        integer(kind=cgsize_t), dimension(:, :), allocatable :: myData, donorData
+        integer, dimension(:, :), allocatable :: map2NonMatch
+
+        real(kind=realType), dimension(3) :: rotationCenter
+        real(kind=realType), dimension(3) :: rotationAngles
+        real(kind=realType), dimension(3) :: translation
+
+        logical :: periodic, wrongData
+
+        character(len=maxCGNSNameLen) :: connectName, donorName, arrayName
+
+        type(cgns1to1ConnType), pointer, dimension(:) :: conn1to1
+
+        type(cgnsNonMatchAbuttingConnType), pointer, dimension(:) :: connNonMatch
+        !
+        !      Function definition.
+        !
+        integer :: setCGNSRealType
+
+        ! Set some pointers for the connectivities to make the code
+        ! more readable.
+
+        conn1to1 => cgnsDoms(nZone)%conn1to1
+        connNonMatch => cgnsDoms(nZone)%connNonMatchAbutting
+
+        ! Set the counter n1to1 to the currently stored number of 1 to 1
+        ! block connectivities, and initialize other counters.
+
+        n1to1 = cgnsDoms(nZone)%n1to1 - cgnsDoms(nZone)%n1to1General
+
+        ! Determine the number of general connectivities.
+
+        call cg_nconns_f(cgnsInd, cgnsBase, nZone, ngeneral, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("readGeneralConn", &
+                           "Something wrong when calling cg_nconns_f")
+
+        ! Determine the mapping from the general connectivities to the
+        ! data structure for the non-matching abutting connectivities.
+        ! There are two indices for this mapping, the first is the
+        ! index in connNonMatch and the second the neighbor index.
+        ! Note that in the data structure a non-matching abutting
+        ! subface can have multiple neighbors.
+        ! Note that the general connectivity ID is temporarily stored
+        ! in connNonMatch(i)%donorBlocks.
+
+        allocate (map2NonMatch(ngeneral, 2), stat=ierr)
+        if (ierr /= CG_OK) &
+            call terminate("readGeneralConn", &
+                           "Memory allocation failure for map2NonMatch")
 
-          call mpi_barrier(ADflow_comm_world, ierr)
-       else
-          ! Repair successful, although the orientation might be wrong.
-          ! This will be checked later. Anyway print a warning message
-          ! if desired.
+        do i = 1, cgnsDoms(nZone)%nNonMatchAbutting
+            do j = 1, connNonMatch(i)%nDonorBlocks
+                nn = connNonMatch(i)%donorBlocks(j)
+                map2NonMatch(nn, 1) = i
+                map2NonMatch(nn, 2) = j
+            end do
+        end do
 
-          if(myID == 0 .and. printWarning) then
-             print "(a)", "#"
-             print "(a)", "#                          Warning"
-             write(errorMessage, strings) "1 to 1 subface ", trim(connectName)," of zone ", trim(zoneName), &
-             ": Normal component of the transformation matrix successfully corrected."
-             print "(a)", "#"
-          endif
-       endif
-    endif
+        ! Loop over the general connectivities.
 
-    ! Create the halo vector for the current zone. This vector is
-    ! pointing outwards, i.e. in direction of the donor block.
-
-    haloDir = 0
-    haloDir(nDirFace) = 1
-    if(zoneRange(nDirFace,1) == 1) haloDir(nDirFace) = -1
-
-    ! Idem for the donor. Also this vector points from the current
-    ! block to the donor block, albeit in donor block coordinates.
-
-    donorDir = 0
-    donorDir(nDirDonor) = -1
-    if(donorRange(nDirDonor,1) == 1) donorDir(nDirDonor) = 1
-
-    ! Determine the full transformation matrix.
-
-    l1 = transform(1)
-    L2 = transform(2)
-    l3 = transform(3)
-
-    trMat(1,1) = sign(1_intType,l1) * delta(l1,1_intType)
-    trMat(2,1) = sign(1_intType,l1) * delta(l1,2_intType)
-    trMat(3,1) = sign(1_intType,l1) * delta(l1,3_intType)
-
-    trMat(1,2) = sign(1_intType,l2) * delta(l2,1_intType)
-    trMat(2,2) = sign(1_intType,l2) * delta(l2,2_intType)
-    trMat(3,2) = sign(1_intType,l2) * delta(l2,3_intType)
-
-    trMat(1,3) = sign(1_intType,l3) * delta(l3,1_intType)
-    trMat(2,3) = sign(1_intType,l3) * delta(l3,2_intType)
-    trMat(3,3) = sign(1_intType,l3) * delta(l3,3_intType)
-
-    ! Apply the transformation matrix to haloDir.
-
-    l1 = haloDir(1)
-    L2 = haloDir(2)
-    l3 = haloDir(3)
-
-    haloDir(1) = trMat(1,1)*l1 + trMat(1,2)*l2 + trMat(1,3)*l3
-    haloDir(2) = trMat(2,1)*l1 + trMat(2,2)*l2 + trMat(2,3)*l3
-    haloDir(3) = trMat(3,1)*l1 + trMat(3,2)*l2 + trMat(3,3)*l3
-
-    ! If the transformation matrix is correct haloDir == donorDir.
-    ! If this is not the case, there are two possibilities. Either
-    ! the directions are just reversed, which means that the
-    ! corresponding element of transform must be reversed, or they
-    ! are really different. In the latter case it cannot be
-    ! corrected here and the grid file must be adapted.
-
-    if(haloDir(nDirDonor) == 0) then
-
-       ! Something seriously wrong. Exit the program.
+        nConnLoop: do nn = 1, ngeneral
 
-       if(myID == 0) then
-          write(errorMessage, strings) "1 to 1 subface ", trim(connectName)," of zone ", trim(zoneName), &
-          ": Something seriously wrong with the transformation matrix"
-          call terminate("checkTransform", errorMessage)
-       endif
-
-       ! Make sure that other processors wait until they are killed.
-
-       call mpi_barrier(ADflow_comm_world, ierr)
-
-    else if(haloDir(nDirDonor)*donorDir(nDirDonor) < 0) then
-
-       ! Simply reverse the sign of the corresponding entry in
-       ! transform. Processor 0 prints a warning message if desired.
-
-       transform(nDirFace) = -transform(nDirFace)
-
-       if(myID == 0 .and. printWarning) then
-          print "(a)", "#"
-          print "(a)", "#                            Warning"
-          write(errorMessage, strings) "1 to 1 subface ", trim(connectName)," of zone ", trim(zoneName), &
-          ": Normal component of the transformation matrix reversed"
-          print "(a)", "#"
-       endif
-   endif
-
-  end subroutine checkTransform
-  subroutine readGeneralConn(cgnsInd, cgnsBase, nZone)
-    !
-    !       readGeneralConn reads and converts the cgns general
-    !       connectivities.  Supported connectivites are 1-to-1 and
-    !       non-matching abutting.
-    !
-    use constants
-    use su_cgns
-    use cgnsGrid, only : cgnsDoms, cgnsNDom, cgns1to1ConnType, &
-         cgnsNOnMatchAbuttingConnType
-    use communication, only : myid, adflow_comm_world
-    use utils, only : terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in) :: cgnsInd, cgnsBase, nZone
-
-    !
-    !      Local variables.
-    !
-    character(len=maxStringLen) :: errorMessage
-
-    integer :: i, j, nn, nGeneral, n1to1, ierr
-    integer :: location, connectType, ptsetType
-    integer(kind=cgsize_t) :: npnts
-    integer :: donorZoneType, donorPtsetType, donorDatatype
-    integer :: id, jj
-    integer :: nArrays, dataType, dataDim
-    integer(kind=cgsize_t) :: nDataDonor
-    integer, dimension(2)   :: dimVector
-    integer, dimension(3)   :: ii, transform
-    integer(kind=cgsize_t), dimension(3,2) :: myRange
-
-    integer(kind=cgsize_t), dimension(:,:), allocatable :: myData, donorData
-    integer, dimension(:,:), allocatable :: map2NonMatch
-
-    real(kind=realType), dimension(3) :: rotationCenter
-    real(kind=realType), dimension(3) :: rotationAngles
-    real(kind=realType), dimension(3) :: translation
-
-    logical :: periodic, wrongData
-
-    character(len=maxCGNSNameLen) :: connectName, donorName, arrayName
-
-    type(cgns1to1ConnType),    pointer, dimension(:) :: conn1to1
-
-    type(cgnsNonMatchAbuttingConnType), pointer, dimension(:) :: connNonMatch
-    !
-    !      Function definition.
-    !
-    integer :: setCGNSRealType
-
-    ! Set some pointers for the connectivities to make the code
-    ! more readable.
-
-    conn1to1     => cgnsDoms(nZone)%conn1to1
-    connNonMatch => cgnsDoms(nZone)%connNonMatchAbutting
-
-    ! Set the counter n1to1 to the currently stored number of 1 to 1
-    ! block connectivities, and initialize other counters.
-
-    n1to1 = cgnsDoms(nZone)%n1to1 - cgnsDoms(nZone)%n1to1General
-
-    ! Determine the number of general connectivities.
-
-    call cg_nconns_f(cgnsInd, cgnsBase, nZone, ngeneral, ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("readGeneralConn", &
-         "Something wrong when calling cg_nconns_f")
-
-    ! Determine the mapping from the general connectivities to the
-    ! data structure for the non-matching abutting connectivities.
-    ! There are two indices for this mapping, the first is the
-    ! index in connNonMatch and the second the neighbor index.
-    ! Note that in the data structure a non-matching abutting
-    ! subface can have multiple neighbors.
-    ! Note that the general connectivity ID is temporarily stored
-    ! in connNonMatch(i)%donorBlocks.
-
-    allocate(map2NonMatch(ngeneral,2), stat=ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("readGeneralConn", &
-         "Memory allocation failure for map2NonMatch")
-
-    do i=1,cgnsDoms(nZone)%nNonMatchAbutting
-       do j=1,connNonMatch(i)%nDonorBlocks
-          nn = connNonMatch(i)%donorBlocks(j)
-          map2NonMatch(nn,1) = i
-          map2NonMatch(nn,2) = j
-       enddo
-    enddo
-
-    ! Loop over the general connectivities.
-
-    nConnLoop: do nn=1,ngeneral
-
-       ! Read the information of this connectivity.
-
-       call cg_conn_info_f(cgnsInd, cgnsBase, nZone, nn, connectName, &
-            location, connectType, ptsetType, npnts,   &
-            donorName, donorZoneType, donorPtsetType,  &
-            donorDatatype, ndataDonor, ierr)
-       if(ierr /= CG_OK)                  &
-            call terminate("readGeneralConn", &
-            "Something wrong when calling cg_conn_info_f")
+            ! Read the information of this connectivity.
 
-       ! Read the data based on the type of connectivity.
+            call cg_conn_info_f(cgnsInd, cgnsBase, nZone, nn, connectName, &
+                                location, connectType, ptsetType, npnts, &
+                                donorName, donorZoneType, donorPtsetType, &
+                                donorDatatype, ndataDonor, ierr)
+            if (ierr /= CG_OK) &
+                call terminate("readGeneralConn", &
+                               "Something wrong when calling cg_conn_info_f")
 
-       connectivityType: select case(connectType)
+            ! Read the data based on the type of connectivity.
 
-       case (Abutting1to1)
-          !
-          !             1-to-1 connectivity stored as a general one. Note that
-          !             the check for a valid one has already been done in
-          !             countConnectivities.
-          !
-          ! Update the counter n1to1 and store some info in conn1to1.
+            connectivityType:select case(connectType)
 
-          n1to1 = n1to1 + 1
+            case (Abutting1to1)
+            !
+            !             1-to-1 connectivity stored as a general one. Note that
+            !             the check for a valid one has already been done in
+            !             countConnectivities.
+            !
+            ! Update the counter n1to1 and store some info in conn1to1.
 
-          conn1to1(n1to1)%connectName = connectName
-          conn1to1(n1to1)%donorName   = donorName
+            n1to1 = n1to1 + 1
 
-          ! Allocate the memory for donorData.
+            conn1to1(n1to1)%connectName = connectName
+            conn1to1(n1to1)%donorName = donorName
 
-          allocate(donorData(3,ndataDonor), stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("readGeneralConn", &
-               "Memory allocation failure for donorData")
+            ! Allocate the memory for donorData.
 
-          ! Read the ranges of the connectivities.
+            allocate (donorData(3, ndataDonor), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("readGeneralConn", &
+                               "Memory allocation failure for donorData")
 
-          call cg_conn_read_f(cgnsInd, cgnsBase, nZone, nn, &
-               myRange, Integer, donorData, ierr)
-          if(ierr /= CG_OK)                  &
-               call terminate("readGeneralConn", &
-               "Something wrong when calling &
-               &cg_conn_read_f")
+            ! Read the ranges of the connectivities.
 
-          ! Store the range of the current subface and its donor.
+            call cg_conn_read_f(cgnsInd, cgnsBase, nZone, nn, &
+                                myRange, Integer, donorData, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("readGeneralConn", &
+                 "Something wrong when calling &
+                 &cg_conn_read_f")
 
-          conn1to1(n1to1)%iBeg = myRange(1,1)
-          conn1to1(n1to1)%jBeg = myRange(2,1)
-          conn1to1(n1to1)%kBeg = myRange(3,1)
+            ! Store the range of the current subface and its donor.
 
-          conn1to1(n1to1)%iEnd = myRange(1,2)
-          conn1to1(n1to1)%jEnd = myRange(2,2)
-          conn1to1(n1to1)%kEnd = myRange(3,2)
+            conn1to1(n1to1)%iBeg = myRange(1, 1)
+            conn1to1(n1to1)%jBeg = myRange(2, 1)
+            conn1to1(n1to1)%kBeg = myRange(3, 1)
 
-          conn1to1(n1to1)%diBeg = donorData(1,1)
-          conn1to1(n1to1)%djBeg = donorData(2,1)
-          conn1to1(n1to1)%dkBeg = donorData(3,1)
+            conn1to1(n1to1)%iEnd = myRange(1, 2)
+            conn1to1(n1to1)%jEnd = myRange(2, 2)
+            conn1to1(n1to1)%kEnd = myRange(3, 2)
 
-          conn1to1(n1to1)%diEnd = donorData(1,ndataDonor)
-          conn1to1(n1to1)%djEnd = donorData(2,ndataDonor)
-          conn1to1(n1to1)%dkEnd = donorData(3,ndataDonor)
+            conn1to1(n1to1)%diBeg = donorData(1, 1)
+            conn1to1(n1to1)%djBeg = donorData(2, 1)
+            conn1to1(n1to1)%dkBeg = donorData(3, 1)
 
-          ! Determine the transformation matrix between the subface
-          ! and the donor subface. Initialize it to 0.
+            conn1to1(n1to1)%diEnd = donorData(1, ndataDonor)
+            conn1to1(n1to1)%djEnd = donorData(2, ndataDonor)
+            conn1to1(n1to1)%dkEnd = donorData(3, ndataDonor)
 
-          transform = 0
+            ! Determine the transformation matrix between the subface
+            ! and the donor subface. Initialize it to 0.
 
-          ! Determine the fastest changing index in the donor.
-          ! Take negative running indices into account.
+            transform = 0
 
-          ii = donorData(:,2) - donorData(:,1)
-          do id=1,3
-             if(ii(id) /= 0) exit
-          enddo
-          if(ii(id) < 0) id = -id
+            ! Determine the fastest changing index in the donor.
+            ! Take negative running indices into account.
 
-          ! Determine the corresponding index in myRange.
+            ii = donorData(:, 2) - donorData(:, 1)
+            do id = 1, 3
+                if (ii(id) /= 0) exit
+            end do
+            if (ii(id) < 0) id = -id
 
-          do jj=1,3
-             if(myRange(jj,1) /= myRange(jj,2)) exit
-          enddo
+            ! Determine the corresponding index in myRange.
 
-          ! Set the corresponding entry of transform; take negative
-          ! running indices of myRangle into account.
+            do jj = 1, 3
+                if (myRange(jj, 1) /= myRange(jj, 2)) exit
+            end do
 
-          if(myRange(jj,1) > myRange(jj,2)) id = -id
-          transform(jj) = id
+            ! Set the corresponding entry of transform; take negative
+            ! running indices of myRangle into account.
 
-          ! Determine the index in donorData where the second index
-          ! of the subface changes for the first time and determine
-          ! this second index. Take negative running indices into
-          ! account.
+            if (myRange(jj, 1) > myRange(jj, 2)) id = -id
+            transform(jj) = id
 
-          j = abs(myRange(jj,2)-myRange(jj,1)) + 2
-          ii = donorData(:,j) - donorData(:,1)
-          do id=1,3
-             if(ii(id) /= 0) exit
-          enddo
-          if(ii(id) < 0) id = -id
+            ! Determine the index in donorData where the second index
+            ! of the subface changes for the first time and determine
+            ! this second index. Take negative running indices into
+            ! account.
 
-          ! Determine the corresponding index in myRange and set the
-          ! corresponding entry in transform.
+            j = abs(myRange(jj, 2) - myRange(jj, 1)) + 2
+            ii = donorData(:, j) - donorData(:, 1)
+            do id = 1, 3
+                if (ii(id) /= 0) exit
+            end do
+            if (ii(id) < 0) id = -id
 
-          do jj=jj+1,3
-             if(myRange(jj,1) /= myRange(jj,2)) exit
-          enddo
-          if(myRange(jj,1) > myRange(jj,2)) id = -id
-          transform(jj) = id
+            ! Determine the corresponding index in myRange and set the
+            ! corresponding entry in transform.
 
-          ! Release the memory of donorData.
+            do jj = jj + 1, 3
+                if (myRange(jj, 1) /= myRange(jj, 2)) exit
+            end do
+            if (myRange(jj, 1) > myRange(jj, 2)) id = -id
+            transform(jj) = id
 
-          deallocate(donorData, stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("readGeneralConn", &
-               "Deallocation error for donorData")
+            ! Release the memory of donorData.
 
-          ! Determine the correct third direction of the
-          ! transformation matrix. Although not intented,
-          ! it also serves as a check.
+            deallocate (donorData, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("readGeneralConn", &
+                               "Deallocation error for donorData")
 
-          call checkTransform(transform, nZone, n1to1, .false.)
+            ! Determine the correct third direction of the
+            ! transformation matrix. Although not intented,
+            ! it also serves as a check.
 
-          conn1to1(n1to1)%l1 = transform(1)
-          conn1to1(n1to1)%l2 = transform(2)
-          conn1to1(n1to1)%l3 = transform(3)
+            call checkTransform(transform, nZone, n1to1, .false.)
 
-          ! Read the periodic info if this is a periodic boundary.
+            conn1to1(n1to1)%l1 = transform(1)
+            conn1to1(n1to1)%l2 = transform(2)
+            conn1to1(n1to1)%l3 = transform(3)
 
-          call readPeriodicSubface(cgnsInd, cgnsBase, nZone, nn,   &
-               connectName,                    &
-               conn1to1(n1to1)%periodic,       &
-               conn1to1(n1to1)%rotationCenter, &
-               conn1to1(n1to1)%rotationAngles, &
-               conn1to1(n1to1)%translation)
+            ! Read the periodic info if this is a periodic boundary.
 
-          !=============================================================
+            call readPeriodicSubface(cgnsInd, cgnsBase, nZone, nn, &
+                                     connectName, &
+                                     conn1to1(n1to1)%periodic, &
+                                     conn1to1(n1to1)%rotationCenter, &
+                                     conn1to1(n1to1)%rotationAngles, &
+                                     conn1to1(n1to1)%translation)
 
-       case (Abutting)
-          !
-          !             Non-matching abutting connectivity. Note that the
-          !             check for a valid one has already been done in
-          !             countConnectivities.
-          !
-          ! Determine the indices in connNonMatch where the data of
-          ! the current connectivity must be stored.
+            !=============================================================
 
-          i = map2NonMatch(nn,1)
-          j = map2NonMatch(nn,2)
+            case (Abutting)
+            !
+            !             Non-matching abutting connectivity. Note that the
+            !             check for a valid one has already been done in
+            !             countConnectivities.
+            !
+            ! Determine the indices in connNonMatch where the data of
+            ! the current connectivity must be stored.
 
-          ! Store the names of the connectivity and the donor block.
+            i = map2NonMatch(nn, 1)
+            j = map2NonMatch(nn, 2)
 
-          connNonMatch(i)%connectNames(j) = connectName
-          connNonMatch(i)%donorNames(j)   = donorName
+            ! Store the names of the connectivity and the donor block.
 
-          ! Allocate the memory for donorData.
+            connNonMatch(i)%connectNames(j) = connectName
+            connNonMatch(i)%donorNames(j) = donorName
 
-          allocate(donorData(3,ndataDonor), stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("readGeneralConn", &
-               "Memory allocation failure for donorData")
+            ! Allocate the memory for donorData.
 
-          ! Read the ranges of the connectivities.
+            allocate (donorData(3, ndataDonor), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("readGeneralConn", &
+                               "Memory allocation failure for donorData")
 
-          call cg_conn_read_f(cgnsInd, cgnsBase, nZone, nn, &
-               myRange, Integer, donorData, ierr)
-          if(ierr /= CG_OK)                  &
-               call terminate("readGeneralConn", &
-               "Something wrong when calling &
-               &cg_conn_read_f")
+            ! Read the ranges of the connectivities.
 
-          ! Determine the face ID on the abutting donor block.
-          ! Note that mu subface range has already been stored in
-          ! countConnectivities.
+            call cg_conn_read_f(cgnsInd, cgnsBase, nZone, nn, &
+                                myRange, Integer, donorData, ierr)
+            if (ierr /= CG_OK) &
+                 call terminate("readGeneralConn", &
+                 "Something wrong when calling &
+                 &cg_conn_read_f")
 
-          if(donorData(1,1) == donorData(1,ndataDonor)) then
+            ! Determine the face ID on the abutting donor block.
+            ! Note that mu subface range has already been stored in
+            ! countConnectivities.
 
-             if(donorData(1,1) == 1) then
-                connNonMatch(i)%donorFaceIDs(j) = iMin
-             else
-                connNonMatch(i)%donorFaceIDs(j) = iMax
-             endif
+            if (donorData(1, 1) == donorData(1, ndataDonor)) then
 
-          else if(donorData(2,1) == donorData(2,ndataDonor)) then
+                if (donorData(1, 1) == 1) then
+                    connNonMatch(i)%donorFaceIDs(j) = iMin
+                else
+                    connNonMatch(i)%donorFaceIDs(j) = iMax
+                end if
 
-             if(donorData(2,1) == 1) then
-                connNonMatch(i)%donorFaceIDs(j) = jMin
-             else
-                connNonMatch(i)%donorFaceIDs(j) = jMax
-             endif
+            else if (donorData(2, 1) == donorData(2, ndataDonor)) then
 
-          else if(donorData(3,1) == donorData(3,ndataDonor)) then
+                if (donorData(2, 1) == 1) then
+                    connNonMatch(i)%donorFaceIDs(j) = jMin
+                else
+                    connNonMatch(i)%donorFaceIDs(j) = jMax
+                end if
 
-             if(donorData(3,1) == 1) then
-                connNonMatch(i)%donorFaceIDs(j) = kMin
-             else
-                connNonMatch(i)%donorFaceIDs(j) = kMax
-             endif
+            else if (donorData(3, 1) == donorData(3, ndataDonor)) then
 
-          else
+                if (donorData(3, 1) == 1) then
+                    connNonMatch(i)%donorFaceIDs(j) = kMin
+                else
+                    connNonMatch(i)%donorFaceIDs(j) = kMax
+                end if
 
-             write(errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), ", connectivity ", &
-               trim(connectName), ": Invalid donor subface."
-             if(myID == 0) &
-                  call terminate("readGeneralConn", errorMessage)
-             call mpi_barrier(ADflow_comm_world, ierr)
+            else
 
-          endif
+                write (errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), ", connectivity ", &
+                    trim(connectName), ": Invalid donor subface."
+                if (myID == 0) &
+                    call terminate("readGeneralConn", errorMessage)
+                call mpi_barrier(ADflow_comm_world, ierr)
 
-          ! Release the memory of donorData.
+            end if
 
-          deallocate(donorData, stat=ierr)
-          if(ierr /= 0)                       &
-               call terminate("readGeneralConn", &
-               "Deallocation error for donorData")
+            ! Release the memory of donorData.
 
-          ! Read the periodic info if this is a periodic boundary.
+            deallocate (donorData, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("readGeneralConn", &
+                               "Deallocation error for donorData")
 
-          call readPeriodicSubface(cgnsInd, cgnsBase, nZone, nn,   &
-               connectName, periodic,          &
-               rotationCenter, rotationAngles, &
-               translation)
+            ! Read the periodic info if this is a periodic boundary.
 
-          ! If j == 1 then the info is simply copied. Otherwise it
-          ! is checked if it is okay.
+            call readPeriodicSubface(cgnsInd, cgnsBase, nZone, nn, &
+                                     connectName, periodic, &
+                                     rotationCenter, rotationAngles, &
+                                     translation)
 
-          checkConsistency: if(j == 1) then
+            ! If j == 1 then the info is simply copied. Otherwise it
+            ! is checked if it is okay.
 
-             connNonMatch(i)%periodic       = periodic
-             connNonMatch(i)%rotationCenter = rotationCenter
-             connNonMatch(i)%rotationAngles = rotationAngles
-             connNonMatch(i)%translation    = translation
+            checkConsistency: if (j == 1) then
 
-          else checkConsistency
+                connNonMatch(i)%periodic = periodic
+                connNonMatch(i)%rotationCenter = rotationCenter
+                connNonMatch(i)%rotationAngles = rotationAngles
+                connNonMatch(i)%translation = translation
 
-             ! Check for consistency.
+                else checkConsistency
 
-             wrongData = .false.
+                ! Check for consistency.
 
-             ! Some dirty stuff to compare the logicals. The FORTRAN
-             ! standard does not allow comparisons of logicals.
+                wrongData = .false.
 
-             ii(1) = 0; if( connNonMatch(i)%periodic ) ii(1) = 1
-             ii(2) = 0; if(                 periodic ) ii(2) = 1
+                ! Some dirty stuff to compare the logicals. The FORTRAN
+                ! standard does not allow comparisons of logicals.
 
-             if(ii(1) /= ii(2)) wrongData = .true.
+                ii(1) = 0; if (connNonMatch(i)%periodic) ii(1) = 1
+                ii(2) = 0; if (periodic) ii(2) = 1
 
-             ! Compare the rotation center.
+                if (ii(1) /= ii(2)) wrongData = .true.
 
-             if(abs(connNonMatch(i)%rotationCenter(1)              &
-                  - rotationCenter(1)) > eps .and. &
-                  abs(connNonMatch(i)%rotationCenter(2)              &
-                  - rotationCenter(2)) > eps .and. &
-                  abs(connNonMatch(i)%rotationCenter(3)              &
-                  - rotationCenter(3)) > eps)      &
-                  wrongData = .true.
+                ! Compare the rotation center.
 
-             ! Check the rotation angles.
+                if (abs(connNonMatch(i)%rotationCenter(1) &
+                        - rotationCenter(1)) > eps .and. &
+                    abs(connNonMatch(i)%rotationCenter(2) &
+                        - rotationCenter(2)) > eps .and. &
+                    abs(connNonMatch(i)%rotationCenter(3) &
+                        - rotationCenter(3)) > eps) &
+                    wrongData = .true.
 
-             if(abs(connNonMatch(i)%rotationAngles(1)              &
-                  - rotationAngles(1)) > eps .and. &
-                  abs(connNonMatch(i)%rotationAngles(2)              &
-                  - rotationAngles(2)) > eps .and. &
-                  abs(connNonMatch(i)%rotationAngles(3)              &
-                  - rotationAngles(3)) > eps)      &
-                  wrongData = .true.
+                ! Check the rotation angles.
 
-             ! Check the translation vector.
+                if (abs(connNonMatch(i)%rotationAngles(1) &
+                        - rotationAngles(1)) > eps .and. &
+                    abs(connNonMatch(i)%rotationAngles(2) &
+                        - rotationAngles(2)) > eps .and. &
+                    abs(connNonMatch(i)%rotationAngles(3) &
+                        - rotationAngles(3)) > eps) &
+                    wrongData = .true.
 
-             if(abs(connNonMatch(i)%translation(1)              &
-                  - translation(1)) > eps .and. &
-                  abs(connNonMatch(i)%translation(2)              &
-                  - translation(2)) > eps .and. &
-                  abs(connNonMatch(i)%translation(3)              &
-                  - translation(3)) > eps)      &
-                  wrongData = .true.
+                ! Check the translation vector.
 
-             ! Print an error message and exit if inconsistent
-             ! data was found.
+                if (abs(connNonMatch(i)%translation(1) &
+                        - translation(1)) > eps .and. &
+                    abs(connNonMatch(i)%translation(2) &
+                        - translation(2)) > eps .and. &
+                    abs(connNonMatch(i)%translation(3) &
+                        - translation(3)) > eps) &
+                    wrongData = .true.
 
-             write(errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), ", connectivity ", &
-               trim(connectName), ": Inconsistent periodic info compared to connectivity ", &
-               trim(connNonMatch(i)%connectNames(1)), "."
+                ! Print an error message and exit if inconsistent
+                ! data was found.
 
-             if(myID == 0) &
-                  call terminate("readGeneralConn", errorMessage)
-             call mpi_barrier(ADflow_comm_world, ierr)
+                write (errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), ", connectivity ", &
+                    trim(connectName), ": Inconsistent periodic info compared to connectivity ", &
+                    trim(connNonMatch(i)%connectNames(1)), "."
 
-          endif checkConsistency
+                if (myID == 0) &
+                    call terminate("readGeneralConn", errorMessage)
+                call mpi_barrier(ADflow_comm_world, ierr)
 
-          !=============================================================
+            end if checkConsistency
 
-       end select connectivityType
+            !=============================================================
 
-    enddo nConnLoop
+            end select connectivityType
 
-    ! Release the memory of map2NonMatch again.
+        end do nConnLoop
 
-    deallocate(map2NonMatch, stat=ierr)
-    if(ierr /= CG_OK)                  &
-         call terminate("readGeneralConn", &
-         "Deallocation failure for map2NonMatch")
+        ! Release the memory of map2NonMatch again.
 
-  end subroutine readGeneralConn
+        deallocate (map2NonMatch, stat=ierr)
+        if (ierr /= CG_OK) &
+            call terminate("readGeneralConn", &
+                           "Deallocation failure for map2NonMatch")
+
+    end subroutine readGeneralConn
+
+    subroutine readBocos(cgnsInd, cgnsBase, nZone, &
+                         nDoubleBoundFaces, sortedFamName, famID)
+        !
+        !       ReadBocos reads the boundary condition info for the given
+        !       zone/block.
+        !
+        use constants
+        use su_cgns
+        use cgnsGrid, only: cgnsDoms, cgnsNDom, cgnsBcDatasetType, &
+                            cgnsFamilies, cgnsBcdataArray, cgnsNFamilies
+        use communication, only: myID, adflow_comm_world
+        use utils, only: terminate, setcgnsRealType
+        use sorting, only: bsearchStrings
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase, nZone
+        integer, intent(inout) :: nDoubleBoundFaces
+
+        character(len=*), dimension(:), intent(in) :: sortedFamName
+        integer(kind=intType), dimension(:), intent(in) :: famID
+        !
+        !      Local variables
+        !
+        integer :: cgnsNBocos, cgnsNDataSet, nUserData
+        integer(kind=cgsize_t) :: cgnsNpnts
+        integer :: i, j, match
+        integer :: ierr, dummy
+        integer :: dirichletFlag, neumannFlag
+
+        integer(kind=cgsize_t), dimension(3, 2) :: bcRange
+
+        integer(kind=intType) :: ii, nn
+
+        character(len=maxCGNSNameLen) :: familyName
+        character(len=maxStringLen) :: errorMessage
+
+        logical :: familySpecifiedData
+
+        type(cgnsBcDatasetType), pointer, dimension(:) :: dataSet
+
+        ! Read the number of boundary conditions in this zone/block.
+        ! Again the reading takes place via an integer.
+
+        call cg_nbocos_f(cgnsInd, cgnsBase, nZone, cgnsNBocos, ierr)
+        if (ierr /= CG_OK) &
+            call terminate("readBocos", &
+                           "Something wrong when calling cg_nbocos_f")
+        cgnsDoms(nZone)%nBocos = cgnsNBocos
 
-  subroutine readBocos(cgnsInd, cgnsBase, nZone,                &
-       nDoubleBoundFaces, sortedFamName, famID)
-    !
-    !       ReadBocos reads the boundary condition info for the given
-    !       zone/block.
-    !
-    use constants
-    use su_cgns
-    use cgnsGrid, only : cgnsDoms, cgnsNDom, cgnsBcDatasetType, &
-         cgnsFamilies, cgnsBcdataArray, cgnsNFamilies
-    use communication, only : myID, adflow_comm_world
-    use utils, only: terminate, setcgnsRealType
-    use sorting, only: bsearchStrings
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer, intent(in)    :: cgnsInd, cgnsBase, nZone
-    integer, intent(inout) :: nDoubleBoundFaces
+        ! Allocate the memory for the boundary condition info for
+        ! this zone/block.
 
-    character(len=*), dimension(:), intent(in) :: sortedFamName
-    integer(kind=intType), dimension(:), intent(in) :: famID
-    !
-    !      Local variables
-    !
-    integer :: cgnsNBocos, cgnsNDataSet, nUserData
-    integer(kind=cgsize_t) :: cgnsNpnts
-    integer :: i, j, match
-    integer :: ierr, dummy
-    integer :: dirichletFlag, neumannFlag
-
-    integer(kind=cgsize_t), dimension(3,2) :: bcRange
-
-    integer(kind=intType) :: ii, nn
-
-    character(len=maxCGNSNameLen) :: familyName
-    character(len=maxStringLen)    :: errorMessage
-
-    logical :: familySpecifiedData
-
-    type(cgnsBcDatasetType), pointer, dimension(:) :: dataSet
-
-
-    ! Read the number of boundary conditions in this zone/block.
-    ! Again the reading takes place via an integer.
-
-    call cg_nbocos_f(cgnsInd, cgnsBase, nZone, cgnsNBocos, ierr)
-    if(ierr /= CG_OK)            &
-         call terminate("readBocos", &
-         "Something wrong when calling cg_nbocos_f")
-    cgnsDoms(nZone)%nBocos = cgnsNBocos
-
-    ! Allocate the memory for the boundary condition info for
-    ! this zone/block.
-
-    allocate(cgnsDoms(nZone)%bocoInfo(cgnsNBocos), stat=ierr)
-    if(ierr /= 0) &
-         call terminate("readBocos", &
-         "Memory allocation failure for bocoInfo")
-
-    ! Loop over the boundary conditions.
-
-    bocoLoop: do i=1,cgnsNBocos
-       !
-       !         Read the general info for this boundary condition and set
-       !         the dimensions of the subface.
-       !
-       call cg_boco_info_f(cgnsInd, cgnsBase, nZone, i,                &
-            cgnsDoms(nZone)%bocoInfo(i)%bocoName,       &
-            cgnsDoms(nZone)%bocoInfo(i)%BCTypeCGNS,     &
-            cgnsDoms(nZone)%bocoInfo(i)%ptsetType,      &
-            cgnsNpnts,                                  &
-            cgnsDoms(nZone)%bocoInfo(i)%normalIndex,    &
-            cgnsDoms(nZone)%bocoInfo(i)%normalListFlag, &
-            cgnsDoms(nZone)%bocoInfo(i)%normalDataType, &
-            cgnsNDataSet, ierr)
-
-       if(ierr /= CG_OK)           &
+        allocate (cgnsDoms(nZone)%bocoInfo(cgnsNBocos), stat=ierr)
+        if (ierr /= 0) &
             call terminate("readBocos", &
-            "Something wrong when calling cg_boco_info_f")
+                           "Memory allocation failure for bocoInfo")
+
+        ! Loop over the boundary conditions.
+
+        bocoLoop: do i = 1, cgnsNBocos
+            !
+            !         Read the general info for this boundary condition and set
+            !         the dimensions of the subface.
+            !
+            call cg_boco_info_f(cgnsInd, cgnsBase, nZone, i, &
+                                cgnsDoms(nZone)%bocoInfo(i)%bocoName, &
+                                cgnsDoms(nZone)%bocoInfo(i)%BCTypeCGNS, &
+                                cgnsDoms(nZone)%bocoInfo(i)%ptsetType, &
+                                cgnsNpnts, &
+                                cgnsDoms(nZone)%bocoInfo(i)%normalIndex, &
+                                cgnsDoms(nZone)%bocoInfo(i)%normalListFlag, &
+                                cgnsDoms(nZone)%bocoInfo(i)%normalDataType, &
+                                cgnsNDataSet, ierr)
+
+            if (ierr /= CG_OK) &
+                call terminate("readBocos", &
+                               "Something wrong when calling cg_boco_info_f")
+
+            cgnsDoms(nZone)%bocoInfo(i)%npnts = cgnsNpnts
+
+            ! Nullify the pointer for dataSet.
+
+            nullify (cgnsDoms(nZone)%bocoInfo(i)%dataSet)
+
+            ! Perform some checks.
+            if (cgnsDoms(nZone)%bocoInfo(i)%normalListFlag > 0) &
+                 call terminate("readBocos", &
+                 "Currently not possible to read &
+                 &boundary normals")
+
+            ! Check how the boundary conditions are specified. Normally this
+            ! is a given point range, but there is limited support for
+            ! specifying a point list. The latter is needed for icem grids
+            ! when boundary conditions are specified on a point.
+
+            if (cgnsDoms(nZone)%bocoInfo(i)%ptsetType == pointrange) then
+
+                ! Point range specified. The usual situation.
+                ! Read the point range for this boundary condition.
+
+                call cg_boco_read_f(cgnsInd, cgnsBase, nZone, i, bcRange, &
+                                    dummy, ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("readBocos", &
+                     "Something wrong when calling &
+                     &cg_boco_read_f")
 
-       cgnsDoms(nZone)%bocoInfo(i)%npnts = cgnsNpnts
+            else if (cgnsDoms(nZone)%bocoInfo(i)%ptsetType == pointlist) then
 
-       ! Nullify the pointer for dataSet.
+                ! List of points specified. This is normally undesired, because
+                ! boundary conditions are applied per face. However icem grids
+                ! tend to give boundary conditions for corner points and this
+                ! must be handled. In this case the number of points specified,
+                ! cgnsNpnts, equals 1. In all other situations print an error
+                ! message.
 
-       nullify(cgnsDoms(nZone)%bocoInfo(i)%dataSet)
+                if (cgnsNpnts > 1) &
+                    call terminate("readBocos", &
+                                   "Point list with more than 1 point specified")
 
-       ! Perform some checks.
-       if(cgnsDoms(nZone)%bocoInfo(i)%normalListFlag > 0) &
-            call terminate("readBocos", &
-            "Currently not possible to read &
-            &boundary normals")
+                ! Read the point index.
 
-       ! Check how the boundary conditions are specified. Normally this
-       ! is a given point range, but there is limited support for
-       ! specifying a point list. The latter is needed for icem grids
-       ! when boundary conditions are specified on a point.
+                call cg_boco_read_f(cgnsInd, cgnsBase, nZone, i, bcRange, &
+                                    dummy, ierr)
+                if (ierr /= CG_OK) &
+                    call terminate("readBocos", &
+                                   "Something wrong when calling cg_boco_read_f")
 
-       if(cgnsDoms(nZone)%bocoInfo(i)%ptsetType == pointrange) then
+                ! Make sure that bcRange contains a range, i.e. the point
+                ! indices are copied to the second column of bcRange.
 
-          ! Point range specified. The usual situation.
-          ! Read the point range for this boundary condition.
+                bcRange(1, 2) = bcRange(1, 1)
+                bcRange(2, 2) = bcRange(2, 1)
+                bcRange(3, 2) = bcRange(3, 1)
 
-          call cg_boco_read_f(cgnsInd, cgnsBase, nZone, i, bcRange, &
-               dummy, ierr)
-          if(ierr /= CG_OK)             &
-               call terminate("readBocos", &
-               "Something wrong when calling &
-               &cg_boco_read_f")
+            else
 
-       else if(cgnsDoms(nZone)%bocoInfo(i)%ptsetType == pointlist) then
+                ! Unknown ptsetType.
 
-          ! List of points specified. This is normally undesired, because
-          ! boundary conditions are applied per face. However icem grids
-          ! tend to give boundary conditions for corner points and this
-          ! must be handled. In this case the number of points specified,
-          ! cgnsNpnts, equals 1. In all other situations print an error
-          ! message.
+                call terminate("readBocos", "Unknown ptsetType encountered")
 
-          if(cgnsNpnts > 1)             &
-               call terminate("readBocos", &
-               "Point list with more than 1 point specified")
+            end if
 
-          ! Read the point index.
+            ! Store the range in the cgns grid type.
 
-          call cg_boco_read_f(cgnsInd, cgnsBase, nZone, i, bcRange, &
-               dummy, ierr)
-          if(ierr /= CG_OK)             &
-               call terminate("readBocos", &
-               "Something wrong when calling cg_boco_read_f")
+            cgnsDoms(nZone)%bocoInfo(i)%iBeg = bcRange(1, 1)
+            cgnsDoms(nZone)%bocoInfo(i)%jBeg = bcRange(2, 1)
+            cgnsDoms(nZone)%bocoInfo(i)%kBeg = bcRange(3, 1)
 
-          ! Make sure that bcRange contains a range, i.e. the point
-          ! indices are copied to the second column of bcRange.
+            cgnsDoms(nZone)%bocoInfo(i)%iEnd = bcRange(1, 2)
+            cgnsDoms(nZone)%bocoInfo(i)%jEnd = bcRange(2, 2)
+            cgnsDoms(nZone)%bocoInfo(i)%kEnd = bcRange(3, 2)
 
-          bcRange(1,2) = bcRange(1,1)
-          bcRange(2,2) = bcRange(2,1)
-          bcRange(3,2) = bcRange(3,1)
+            ! Check and see if this is a valid boundary condition or if it
+            ! corresponds to either an edge or a point, which must be
+            ! ignored by the flow solver.
 
-       else
+            match = 0
+            if (bcRange(1, 1) == bcRange(1, 2)) match = match + 1
+            if (bcRange(2, 1) == bcRange(2, 2)) match = match + 1
+            if (bcRange(3, 1) == bcRange(3, 2)) match = match + 1
 
-          ! Unknown ptsetType.
+            if (match == 1) then
+                cgnsDoms(nZone)%bocoInfo(i)%actualFace = .true.
+            else
+                cgnsDoms(nZone)%bocoInfo(i)%actualFace = .false.
+            end if
 
-          call terminate("readBocos", "Unknown ptsetType encountered")
+            ! It is possible that the 1 to 1 block connectivities are
+            ! repeated in the boundary section. Check for this, but only
+            ! if this an actual face.
 
-       endif
+            if (cgnsDoms(nZone)%bocoInfo(i)%actualFace) then
+                if (checkForDoubleBoundFace(nZone, i)) then
 
-       ! Store the range in the cgns grid type.
+                    ! Face is repeated. Increment nDoubleBoundFaces and set
+                    ! actualFace to .false.
 
-       cgnsDoms(nZone)%bocoInfo(i)%iBeg = bcRange(1,1)
-       cgnsDoms(nZone)%bocoInfo(i)%jBeg = bcRange(2,1)
-       cgnsDoms(nZone)%bocoInfo(i)%kBeg = bcRange(3,1)
+                    nDoubleBoundFaces = nDoubleBoundFaces + 1
+                    cgnsDoms(nZone)%bocoInfo(i)%actualFace = .false.
 
-       cgnsDoms(nZone)%bocoInfo(i)%iEnd = bcRange(1,2)
-       cgnsDoms(nZone)%bocoInfo(i)%jEnd = bcRange(2,2)
-       cgnsDoms(nZone)%bocoInfo(i)%kEnd = bcRange(3,2)
+                end if
+            end if
+            !
+            !         Determine the internally used boundary condition and whether
+            !         or not the boundary condition is given on a per family basis.
+            !
+            cgnsDoms(nZone)%bocoInfo(i)%familyID = 0
 
-       ! Check and see if this is a valid boundary condition or if it
-       ! corresponds to either an edge or a point, which must be
-       ! ignored by the flow solver.
+            checkActualFace: if (cgnsDoms(nZone)%bocoInfo(i)%actualFace) then
 
-       match = 0
-       if(bcRange(1,1) == bcRange(1,2)) match = match +1
-       if(bcRange(2,1) == bcRange(2,2)) match = match +1
-       if(bcRange(3,1) == bcRange(3,2)) match = match +1
+                ! Determine the type of CGNS boundary condition and act
+                ! accordingly.
 
-       if(match == 1) then
-          cgnsDoms(nZone)%bocoInfo(i)%actualFace = .true.
-       else
-          cgnsDoms(nZone)%bocoInfo(i)%actualFace = .false.
-       endif
+                select case (cgnsDoms(nZone)%bocoInfo(i)%BCTypeCGNS)
 
-       ! It is possible that the 1 to 1 block connectivities are
-       ! repeated in the boundary section. Check for this, but only
-       ! if this an actual face.
+                case (FamilySpecified)
 
-       if( cgnsDoms(nZone)%bocoInfo(i)%actualFace ) then
-          if( checkForDoubleBoundFace(nZone, i) ) then
+                    ! Boundary condition is specified per family.
+                    !added to accomodate case where a grid family is specified
+                    ! but BC's are specified in standard CGNS Format
+                    cgnsDoms(nZone)%BCFamilies = .True.
+                    ! Find out the family name to which this boundary
+                    ! face belongs.
 
-             ! Face is repeated. Increment nDoubleBoundFaces and set
-             ! actualFace to .false.
+                    call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
+                                   "ZoneBC_t", 1, "BC_t", i, "end")
+                    if (ierr /= CG_OK) &
+                        call terminate("readBocos", &
+                                       "Something wrong when calling cg_goto_f")
 
-             nDoubleBoundFaces = nDoubleBoundFaces +1
-             cgnsDoms(nZone)%bocoInfo(i)%actualFace = .false.
+                    call cg_famname_read_f(familyName, ierr)
 
-          endif
-       endif
-       !
-       !         Determine the internally used boundary condition and whether
-       !         or not the boundary condition is given on a per family basis.
-       !
-       cgnsDoms(nZone)%bocoInfo(i)%familyID = 0
+                    if (ierr /= CG_OK) then
 
-       checkActualFace: if( cgnsDoms(nZone)%bocoInfo(i)%actualFace ) then
+                        write (errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), &
+                            ", boundary face ", trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName), &
+                            ": Corresponding family name not given."
+                        if (myID == 0) call terminate("readBocos", errorMessage)
+                        call mpi_barrier(ADflow_comm_world, ierr)
 
-          ! Determine the type of CGNS boundary condition and act
-          ! accordingly.
+                    end if
 
-          select case (cgnsDoms(nZone)%bocoInfo(i)%BCTypeCGNS)
+                    ! Search the family name in the sorted names. For a valid
+                    ! grid this name must be found.
 
-          case (FamilySpecified)
+                    nn = cgnsNFamilies
+                    ii = bsearchStrings(familyName, sortedFamName)
+                    if (ii == 0) then
 
-             ! Boundary condition is specified per family.
-             !added to accomodate case where a grid family is specified
-             ! but BC's are specified in standard CGNS Format
-             cgnsDoms(nZone)%BCFamilies = .True.
-             ! Find out the family name to which this boundary
-             ! face belongs.
+                        write (errorMessage, strings) "Family name ", trim(familyName), " not present in the grid"
+                        if (myID == 0) call terminate("readBocos", errorMessage)
+                        call mpi_barrier(ADflow_comm_world, ierr)
 
-             call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
-                  "ZoneBC_t", 1, "BC_t", i, "end")
-             if(ierr /= CG_OK)             &
-                  call terminate("readBocos", &
-                  "Something wrong when calling cg_goto_f")
+                    end if
 
-             call cg_famname_read_f(familyName, ierr)
+                    ! Set the family number and the boundary condition types.
 
-             if(ierr /= CG_OK) then
+                    ii = famID(ii)
+                    cgnsDoms(nZone)%bocoInfo(i)%familyID = ii
+                    cgnsDoms(nZone)%bocoInfo(i)%BCTypeCGNS = &
+                        cgnsFamilies(ii)%BCTypeCGNS
+                    cgnsDoms(nZone)%bocoInfo(i)%BCType = &
+                        cgnsFamilies(ii)%BCType
 
-                write(errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), &
-                  ", boundary face ", trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName), &
-                  ": Corresponding family name not given."
-                if(myID == 0) call terminate("readBocos", errorMessage)
-                call mpi_barrier(ADflow_comm_world, ierr)
+                    !===========================================================
 
-             endif
+                case (UserDefined)
 
-             ! Search the family name in the sorted names. For a valid
-             ! grid this name must be found.
+                    ! A user defined boundary condition is prescribed.
+                    ! More information should be present. Determine the
+                    ! number of user defined data nodes.
 
-             nn = cgnsNFamilies
-             ii = bsearchStrings(familyName, sortedFamName)
-             if(ii == 0) then
+                    call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, "ZoneBC_t", 1, "BC_t", i, "end")
+                    if (ierr /= CG_OK) &
+                        call terminate("readBocos", &
+                                       "Something wrong when calling cg_goto_f")
 
-                write(errorMessage, strings) "Family name ", trim(familyName), " not present in the grid"
-                if(myID == 0) call terminate("readBocos", errorMessage)
-                call mpi_barrier(ADflow_comm_world, ierr)
+                    call cg_nuser_data_f(nUserData, ierr)
+                    if (ierr /= CG_OK) &
+                        call terminate("readBocos", &
+                                       "Something wrong when calling cg_nuser_data_f")
 
-             endif
+                    ! nUserData should be 1. Check this.
 
-             ! Set the family number and the boundary condition types.
+                    if (nUserData /= 1) then
+                        write (errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), ", boundary face ", &
+                            trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName), &
+                            ": Need 1 UserDefinedData_t node for user defined boundary condition"
+                        if (myID == 0) call terminate("readBocos", errorMessage)
+                        call mpi_barrier(ADflow_comm_world, ierr)
+                    end if
 
-             ii = famID(ii)
-             cgnsDoms(nZone)%bocoInfo(i)%familyID = ii
-             cgnsDoms(nZone)%bocoInfo(i)%BCTypeCGNS = &
-                  cgnsFamilies(ii)%BCTypeCGNS
-             cgnsDoms(nZone)%bocoInfo(i)%BCType = &
-                  cgnsFamilies(ii)%BCType
+                    ! Read the name of the user defined data node.
 
-             !===========================================================
+                    call cg_user_data_read_f(nUserData, &
+                                             cgnsDoms(nZone)%bocoInfo(i)%userDefinedName, &
+                                             ierr)
+                    if (ierr /= CG_OK) &
+                        call terminate("readBocos", &
+                                       "Something wrong when calling cg_user_data_read_f")
 
-          case (UserDefined)
+                    ! Determine the corresponding internal boundary
+                    ! condition from the name just read.
 
-             ! A user defined boundary condition is prescribed.
-             ! More information should be present. Determine the
-             ! number of user defined data nodes.
+                    cgnsDoms(nZone)%bocoInfo(i)%BCType = &
+                        internalBC(cgnsDoms(nZone)%bocoInfo(i)%BCTypeCGNS, &
+                                   cgnsDoms(nZone)%bocoInfo(i)%userDefinedName)
 
-             call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, "ZoneBC_t", 1, "BC_t", i, "end")
-             if(ierr /= CG_OK)             &
-                  call terminate("readBocos", &
-                  "Something wrong when calling cg_goto_f")
+                    ! Print an error message if the BC type was not recognized.
 
-             call cg_nuser_data_f(nUserData, ierr)
-             if(ierr /= CG_OK)            &
-                  call terminate("readBocos", &
-                  "Something wrong when calling cg_nuser_data_f")
+                    if (cgnsDoms(nZone)%bocoInfo(i)%BCType == bcNull) then
+                        write (errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), &
+                            ", boundary face ", trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName), &
+                         ": Unknown user-defined boundary condition ", trim(cgnsDoms(nZone)%bocoInfo(i)%userDefinedName)
+                        if (myID == 0) call terminate("readBocos", errorMessage)
+                        call mpi_barrier(ADflow_comm_world, ierr)
+                    end if
 
-             ! nUserData should be 1. Check this.
+                    ! At the moment the domain interfaces as well as the
+                    ! bleed flows are only possible on a per family basis.
 
-             if(nUserData /= 1) then
-                write(errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), ", boundary face ", &
-                  trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName), &
-                  ": Need 1 UserDefinedData_t node for user defined boundary condition"
-                if(myID == 0) call terminate("readBocos", errorMessage)
-                call mpi_barrier(ADflow_comm_world, ierr)
-             endif
+                    select case (cgnsDoms(nZone)%bocoInfo(i)%BCType)
 
-             ! Read the name of the user defined data node.
+                    case (MassBleedInflow, MassBleedOutflow, &
+                          DomainInterfaceAll, DomainInterfaceRhoUVW, &
+                          DomainInterfaceP, DomainInterfaceRho, &
+                          DomainInterfaceTotal)
 
-             call cg_user_data_read_f(nUserData,                      &
-                  cgnsDoms(nZone)%bocoInfo(i)%userDefinedName, &
-                  ierr)
-             if(ierr /= CG_OK)            &
-                  call terminate("readBocos", &
-                  "Something wrong when calling cg_user_data_read_f")
+                        write (errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), &
+                            ", boundary face ", trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName), &
+                            ": User-defined boundary condition ", trim(cgnsDoms(nZone)%bocoInfo(i)%userDefinedName), &
+                            " only possible for a family"
 
-             ! Determine the corresponding internal boundary
-             ! condition from the name just read.
+                        if (myID == 0) &
+                            call terminate("readBocos", errorMessage)
+                        call mpi_barrier(ADflow_comm_world, ierr)
 
-             cgnsDoms(nZone)%bocoInfo(i)%BCType = &
-                  internalBC(cgnsDoms(nZone)%bocoInfo(i)%BCTypeCGNS, &
-                  cgnsDoms(nZone)%bocoInfo(i)%userDefinedName)
+                    end select
 
-             ! Print an error message if the BC type was not recognized.
+                    ! Try to Read off the family name.
+                    call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
+                                   "ZoneBC_t", 1, "BC_t", i, "end")
 
-             if(cgnsDoms(nZone)%bocoInfo(i)%BCType == bcNull) then
-                write(errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), &
-                  ", boundary face ", trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName), &
-                  ": Unknown user-defined boundary condition ", trim(cgnsDoms(nZone)%bocoInfo(i)%userDefinedName)
-                if(myID == 0) call terminate("readBocos", errorMessage)
-                call mpi_barrier(ADflow_comm_world, ierr)
-             endif
+                    cgnsDoms(nZone)%bocoInfo(i)%wallBCName = ""
+                    if (ierr == 0) then ! Node exits
+                        call cg_famname_read_f(familyName, ierr)
+                        if (ierr == 0) then
+                            cgnsDoms(nZone)%bocoInfo(i)%wallBCName = familyName
+                        end if
+                    end if
+                    !===========================================================
 
-             ! At the moment the domain interfaces as well as the
-             ! bleed flows are only possible on a per family basis.
+                case default
 
-             select case (cgnsDoms(nZone)%bocoInfo(i)%BCType)
+                    ! A standard CGNS boundary condition is used. Determine
+                    ! the internally used boundary condition.
 
-             case (MassBleedInflow,    MassBleedOutflow,      &
-                  DomainInterfaceAll, DomainInterfaceRhoUVW, &
-                  DomainInterfaceP,   DomainInterfaceRho,    &
-                  DomainInterfaceTotal)
+                    cgnsDoms(nZone)%bocoInfo(i)%userDefinedName = ""
 
-                write(errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), &
-                  ", boundary face ", trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName), &
-                  ": User-defined boundary condition ", trim(cgnsDoms(nZone)%bocoInfo(i)%userDefinedName), &
-                  " only possible for a family"
+                    cgnsDoms(nZone)%bocoInfo(i)%BCType = &
+                        internalBC(cgnsDoms(nZone)%bocoInfo(i)%BCTypeCGNS, &
+                                   cgnsDoms(nZone)%bocoInfo(i)%userDefinedName)
 
-                if(myID == 0) &
-                     call terminate("readBocos", errorMessage)
-                call mpi_barrier(ADflow_comm_world, ierr)
+                    ! Print an error message if the BC type was not recognized.
 
-             end select
+                    if (cgnsDoms(nZone)%bocoInfo(i)%BCType == bcNull) then
+                        write (errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), &
+                            ", boundary face ", trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName), &
+                            ": boundary condition type missing or not supported"
+                        if (myID == 0) call terminate("readBocos", errorMessage)
+                        call mpi_barrier(ADflow_comm_world, ierr)
+                    end if
 
-             ! Try to Read off the family name.
-             call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
-                  "ZoneBC_t", 1, "BC_t", i, "end")
+                    !added to accomodate case where a grid family is specified
+                    ! but BC's are specified in standard CGNS Format
+                    cgnsDoms(nZone)%BCFamilies = .False.
 
-             cgnsDoms(nZone)%bocoInfo(i)%wallBCName = ""
-             if (ierr == 0) then ! Node exits
-                call cg_famname_read_f(familyName, ierr)
-                if (ierr == 0) then
-                   cgnsDoms(nZone)%bocoInfo(i)%wallBCName = familyName
-                end if
-             end if
-             !===========================================================
+                    ! Try to Read off the Boundary Condition family
+                    ! name.
+                    call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
+                                   "ZoneBC_t", 1, "BC_t", i, "end")
 
-          case default
+                    cgnsDoms(nZone)%bocoInfo(i)%wallBCName = ""
+                    if (ierr == 0) then ! Node exits
+                        call cg_famname_read_f(familyName, ierr)
+                        if (ierr == 0) then
+                            cgnsDoms(nZone)%bocoInfo(i)%wallBCName = familyName
+                        end if
+                    end if
 
-             ! A standard CGNS boundary condition is used. Determine
-             ! the internally used boundary condition.
+                end select
 
-             cgnsDoms(nZone)%bocoInfo(i)%userDefinedName = ""
+            end if checkActualFace
+            !
+            !         Initialize slidingID to 0 to indicate that this boco does
+            !         not belong to a sliding mesh interface. If it is, this will
+            !         be overwritten after all bocos are read for every zone.
+            !
+            cgnsDoms(nzone)%bocoInfo(i)%slidingID = 0
+            !
+            !         Determine the possible rotating rate of the boundary face.
+            !
+            ! Initialize the rotating center and the rotating rates to
+            ! the values of the corresponding block.
 
-             cgnsDoms(nZone)%bocoInfo(i)%BCType = &
-                  internalBC(cgnsDoms(nZone)%bocoInfo(i)%BCTypeCGNS, &
-                  cgnsDoms(nZone)%bocoInfo(i)%userDefinedName)
+            cgnsDoms(nZone)%bocoInfo(i)%rotCenter = &
+                cgnsDoms(nZone)%rotCenter
+            cgnsDoms(nZone)%bocoInfo(i)%rotRate = &
+                cgnsDoms(nZone)%rotRate
 
-             ! Print an error message if the BC type was not recognized.
+            ! Check if a rotating rate was specified for the family to
+            ! which this boundary face belongs.
 
-             if(cgnsDoms(nZone)%bocoInfo(i)%BCType == bcNull) then
-                write(errorMessage, strings) "Zone ", trim(cgnsDoms(nZone)%zoneName), &
-                  ", boundary face ", trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName), &
-                  ": boundary condition type missing or not supported"
-                if(myID == 0) call terminate("readBocos", errorMessage)
-                call mpi_barrier(ADflow_comm_world, ierr)
-             endif
+            ii = cgnsDoms(nZone)%bocoInfo(i)%familyID
+            if (cgnsDoms(nZone)%bocoInfo(i)%actualFace .and. ii > 0) then
+
+                ! Check if a rotating rate was specified for the family ii.
 
-             !added to accomodate case where a grid family is specified
-             ! but BC's are specified in standard CGNS Format
-             cgnsDoms(nZone)%BCFamilies = .False.
+                if (cgnsFamilies(ii)%rotatingFrameSpecified) then
 
+                    ! Copy the rotation info from the family to the boundary
+                    ! face. Note the multiplication with LRef, because the
+                    ! rotation center for the families has not been scaled
+                    ! to meters.
 
-             ! Try to Read off the Boundary Condition family
-             ! name.
-             call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
-                  "ZoneBC_t", 1, "BC_t", i, "end")
+                    cgnsDoms(nZone)%bocoInfo(i)%rotCenter = &
+                        cgnsDoms(nZone)%LRef * cgnsFamilies(ii)%rotCenter
+                    cgnsDoms(nZone)%bocoInfo(i)%rotRate = &
+                        cgnsFamilies(ii)%rotRate
 
-             cgnsDoms(nZone)%bocoInfo(i)%wallBCName = ""
-             if (ierr == 0) then ! Node exits
-                call cg_famname_read_f(familyName, ierr)
-                if (ierr == 0) then
-                   cgnsDoms(nZone)%bocoInfo(i)%wallBCName = familyName
                 end if
-             end if
+            end if
+            !
+            !         Read and store the prescribed boundary condition data sets.
+            !
+            ! Initialize dataSetAllocated to .false. and nDataSet to 0.
 
-          end select
+            cgnsDoms(nZone)%bocoInfo(i)%dataSetAllocated = .false.
+            cgnsDoms(nZone)%bocoInfo(i)%nDataSet = 0
 
-       endif checkActualFace
-       !
-       !         Initialize slidingID to 0 to indicate that this boco does
-       !         not belong to a sliding mesh interface. If it is, this will
-       !         be overwritten after all bocos are read for every zone.
-       !
-       cgnsDoms(nzone)%bocoInfo(i)%slidingID = 0
-       !
-       !         Determine the possible rotating rate of the boundary face.
-       !
-       ! Initialize the rotating center and the rotating rates to
-       ! the values of the corresponding block.
+            ! Find out whether data has been specified for the
+            ! corresponding family.
 
-       cgnsDoms(nZone)%bocoInfo(i)%rotCenter = &
-            cgnsDoms(nZone)%rotCenter
-       cgnsDoms(nZone)%bocoInfo(i)%rotRate   = &
-            cgnsDoms(nZone)%rotRate
+            familySpecifiedData = .false.
+            ii = cgnsDoms(nZone)%bocoInfo(i)%familyID
+            if (ii > 0) then
+                if (cgnsFamilies(ii)%nDataSet > 0) &
+                    familySpecifiedData = .true.
+            end if
 
-       ! Check if a rotating rate was specified for the family to
-       ! which this boundary face belongs.
+            ! If family data is specified, set the pointer for the
+            ! data sets.
 
-       ii = cgnsDoms(nZone)%bocoInfo(i)%familyID
-       if(cgnsDoms(nZone)%bocoInfo(i)%actualFace .and. ii > 0) then
+            testFamilySpecified: if (familySpecifiedData) then
 
-          ! Check if a rotating rate was specified for the family ii.
+                cgnsDoms(nZone)%bocoInfo(i)%nDataSet = &
+                    cgnsFamilies(ii)%nDataSet
+                cgnsDoms(nZone)%bocoInfo(i)%dataSet => &
+                    cgnsFamilies(ii)%dataSet
 
-          if( cgnsFamilies(ii)%rotatingFrameSpecified ) then
+                else testFamilySpecified
 
-             ! Copy the rotation info from the family to the boundary
-             ! face. Note the multiplication with LRef, because the
-             ! rotation center for the families has not been scaled
-             ! to meters.
+                ! No family specified stuff.
+                ! Check if data sets are prescribed in the cgns file.
 
-             cgnsDoms(nZone)%bocoInfo(i)%rotCenter = &
-                  cgnsDoms(nZone)%LRef*cgnsFamilies(ii)%rotCenter
-             cgnsDoms(nZone)%bocoInfo(i)%rotRate   = &
-                  cgnsFamilies(ii)%rotRate
+                testDataSets: if (cgnsNDataSet > 0) then
 
-          endif
-       endif
-       !
-       !         Read and store the prescribed boundary condition data sets.
-       !
-       ! Initialize dataSetAllocated to .false. and nDataSet to 0.
+                    ! Set dataSetAllocated to true., allocate the memory for
+                    ! dataSet and set the pointer to make the code more
+                    ! readable.
 
-       cgnsDoms(nZone)%bocoInfo(i)%dataSetAllocated = .false.
-       cgnsDoms(nZone)%bocoInfo(i)%nDataSet         = 0
+                    cgnsDoms(nZone)%bocoInfo(i)%dataSetAllocated = .true.
+                    cgnsDoms(nZone)%bocoInfo(i)%nDataSet = cgnsNDataSet
 
-       ! Find out whether data has been specified for the
-       ! corresponding family.
+                    allocate (cgnsDoms(nZone)%bocoInfo(i)%dataSet(cgnsNDataSet), &
+                              stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("testDataSets", &
+                                       "Memory allocation failure for dataSet")
 
-       familySpecifiedData = .false.
-       ii = cgnsDoms(nZone)%bocoInfo(i)%familyID
-       if(ii > 0) then
-          if(cgnsFamilies(ii)%nDataSet > 0) &
-               familySpecifiedData = .true.
-       endif
+                    dataSet => cgnsDoms(nZone)%bocoInfo(i)%dataSet
 
-       ! If family data is specified, set the pointer for the
-       ! data sets.
+                    ! Loop over the number of data sets to extract the data.
 
-       testFamilySpecified: if( familySpecifiedData ) then
+                    loopDataSet: do j = 1, cgnsNDataSet
 
-          cgnsDoms(nZone)%bocoInfo(i)%nDataSet = &
-               cgnsFamilies(ii)%nDataSet
-          cgnsDoms(nZone)%bocoInfo(i)%dataSet => &
-               cgnsFamilies(ii)%dataSet
+                        ! Find out what kind of data, dirichlet or neumann
+                        ! (or both), are stored in this data set.
 
-       else testFamilySpecified
+                        call cg_dataset_read_f(cgnsInd, cgnsBase, nZone, i, j, &
+                                               dataSet(j)%datasetName, &
+                                               dataSet(j)%BCType, &
+                                               dirichletFlag, neumannFlag, ierr)
 
-          ! No family specified stuff.
-          ! Check if data sets are prescribed in the cgns file.
+                        ! Nullify the dirichlet and neumann arrays and initialize
+                        ! the number of presribed data to 0.
 
-          testDataSets: if(cgnsNDataSet > 0) then
+                        nullify (dataSet(j)%dirichletArrays)
+                        nullify (dataSet(j)%neumannArrays)
 
-             ! Set dataSetAllocated to true., allocate the memory for
-             ! dataSet and set the pointer to make the code more
-             ! readable.
+                        dataSet(j)%nDirichletArrays = 0
+                        dataSet(j)%nNeumannArrays = 0
 
-             cgnsDoms(nZone)%bocoInfo(i)%dataSetAllocated = .true.
-             cgnsDoms(nZone)%bocoInfo(i)%nDataSet = cgnsNDataSet
+                        ! Read the dirichlet and neumann arrays if data
+                        ! is present.
 
-             allocate(cgnsDoms(nZone)%bocoInfo(i)%dataSet(cgnsNDataSet), &
-                  stat=ierr)
-             if(ierr /= 0 ) &
-                  call terminate("testDataSets", &
-                  "Memory allocation failure for dataSet")
+                        if (dirichletFlag == 1) &
+                            call readBCDataArrays(dataSet(j)%nDirichletArrays, &
+                                                  dataSet(j)%dirichletArrays, &
+                                                  Dirichlet)
+
+                        if (neumannFlag == 1) &
+                            call readBCDataArrays(dataSet(j)%nNeumannArrays, &
+                                                  dataSet(j)%neumannArrays, &
+                                                  Neumann)
+
+                    end do loopDataSet
+                end if testDataSets
+            end if testFamilySpecified
+
+        end do bocoLoop
+
+        !=================================================================
 
-             dataSet => cgnsDoms(nZone)%bocoInfo(i)%dataSet
+    contains
 
-             ! Loop over the number of data sets to extract the data.
+        !===============================================================
 
-             loopDataSet: do j=1,cgnsNDataSet
+        subroutine readBCDataArrays(nArr, arr, DirNeu)
+            !
+            !         readBCDataArrays reads the arrays of the given data set
+            !         from the cgns file.
+            !
+            implicit none
+            !
+            !        Subroutine arguments.
+            !
+            integer, intent(in) :: DirNeu
 
-                ! Find out what kind of data, dirichlet or neumann
-                ! (or both), are stored in this data set.
+            integer(kind=intType), intent(out) :: nArr
+            type(cgnsBcdataArray), pointer, dimension(:) :: arr
+            !
+            !        Local variables.
+            !
+            integer :: ierr
+            integer :: k, l, nArrays, realTypeCGNS
+            integer :: mass, len, time, temp, angle, dataType
 
-                call cg_dataset_read_f(cgnsInd, cgnsBase, nZone, i, j, &
-                     dataSet(j)%datasetName,         &
-                     dataSet(j)%BCType,              &
-                     dirichletFlag, neumannFlag, ierr)
+            integer(kind=intType) :: nn
 
-                ! Nullify the dirichlet and neumann arrays and initialize
-                ! the number of presribed data to 0.
+            real(kind=cgnsRealType), dimension(:), allocatable :: tmp
 
-                nullify(dataSet(j)%dirichletArrays)
-                nullify(dataSet(j)%neumannArrays)
+            logical :: globalUnits
 
-                dataSet(j)%nDirichletArrays = 0
-                dataSet(j)%nNeumannArrays   = 0
+            ! Set the cgns real type.
 
-                ! Read the dirichlet and neumann arrays if data
-                ! is present.
+            realTypeCGNS = setCGNSRealType()
 
-                if(dirichletFlag == 1)                               &
-                     call readBCDataArrays(dataSet(j)%nDirichletArrays, &
-                     dataSet(j)%dirichletArrays,  &
-                     Dirichlet)
+            ! Go to the correct node of the given boundary subface.
 
-                if(neumannFlag == 1)                               &
-                     call readBCDataArrays(dataSet(j)%nNeumannArrays, &
-                     dataSet(j)%neumannArrays,  &
-                     Neumann)
+            call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
+                           "ZoneBC_t", 1, "BC_t", i, "BCDataSet_t", j, &
+                           "BCData_t", DirNeu, "end")
+            if (ierr /= CG_OK) &
+                call terminate("readBCDataArrays", &
+                               "Something wrong when calling cg_goto_f")
 
-             enddo loopDataSet
-          endif testDataSets
-       endif testFamilySpecified
+            ! Determine the amount of data arrays present for this node.
 
-    enddo bocoLoop
+            call cg_narrays_f(nArrays, ierr)
+            if (ierr /= CG_OK) &
+                call terminate("readBCDataArrays", &
+                               "Something wrong when calling cg_narrays_f")
+            nArr = nArrays
 
-    !=================================================================
+            ! Allocate the memory for the prescribed data sets.
 
-  contains
+            allocate (arr(nArr), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("readBCDataArrays", &
+                               "Memory allocation failure for arr")
 
-    !===============================================================
+            ! Initialize the units to si units.
 
-    subroutine readBCDataArrays(nArr, arr, DirNeu)
-      !
-      !         readBCDataArrays reads the arrays of the given data set
-      !         from the cgns file.
-      !
-      implicit none
-      !
-      !        Subroutine arguments.
-      !
-      integer, intent(in) :: DirNeu
+            do k = 1, nArrays
+                arr(k)%mass = Kilogram
+                arr(k)%len = Meter
+                arr(k)%time = Second
+                arr(k)%temp = Kelvin
+                arr(k)%angle = Radian
+            end do
 
-      integer(kind=intType), intent(out) :: nArr
-      type(cgnsBcdataArray), pointer, dimension(:) :: arr
-      !
-      !        Local variables.
-      !
-      integer :: ierr
-      integer :: k, l, nArrays, realTypeCGNS
-      integer :: mass, len, time, temp, angle, dataType
+            ! Check if this "main" node contains info about the units.
+            ! If so set the units of arr to these units.
 
-      integer(kind=intType) :: nn
+            globalUnits = .false.
+            call cg_units_read_f(mass, len, time, temp, angle, ierr)
+            if (ierr == error) &
+                call terminate("readBCDataArrays", &
+                               "Something wrong when calling cg_units_read_f")
 
-      real(kind=cgnsRealType), dimension(:), allocatable :: tmp
+            if (ierr == CG_OK) then
+                globalUnits = .true.
 
-      logical :: globalUnits
+                do k = 1, nArrays
+                    arr(k)%mass = mass
+                    arr(k)%len = len
+                    arr(k)%time = time
+                    arr(k)%temp = temp
+                    arr(k)%angle = angle
+                end do
+            end if
 
-      ! Set the cgns real type.
+            ! Loop over the number of data arrays.
 
-      realTypeCGNS = setCGNSRealType()
+            loopDataArrays: do k = 1, nArrays
 
-      ! Go to the correct node of the given boundary subface.
+                ! Go to the main node of the data arrays.
 
-      call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
-           "ZoneBC_t", 1, "BC_t", i, "BCDataSet_t", j, &
-           "BCData_t", DirNeu, "end")
-      if(ierr /= CG_OK)                     &
-           call terminate("readBCDataArrays", &
-           "Something wrong when calling cg_goto_f")
+                call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
+                               "ZoneBC_t", 1, "BC_t", i, "BCDataSet_t", j, &
+                               "BCData_t", DirNeu, "end")
+                if (ierr /= CG_OK) &
+                    call terminate("readBCDataArrays", &
+                                   "Something wrong when calling cg_goto_f")
+
+                ! Determine the name and the dimensions of the array.
+
+                call cg_array_info_f(k, arr(k)%arrayName, dataType, &
+                                     arr(k)%nDimensions, arr(k)%dataDim, &
+                                     ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("readBCDataArrays", &
+                     "Something wrong when calling &
+                     &cg_array_info_f")
 
-      ! Determine the amount of data arrays present for this node.
+                ! Determine the total size of the data array and allocate
+                ! the memory for dataArr and tmp, which is used to read
+                ! the data.
 
-      call cg_narrays_f(nArrays, ierr)
-      if(ierr /= CG_OK)                   &
-           call terminate("readBCDataArrays", &
-           "Something wrong when calling cg_narrays_f")
-      nArr = nArrays
+                nn = arr(k)%dataDim(1)
+                do l = 2, arr(k)%nDimensions
+                    nn = nn * arr(k)%dataDim(l)
+                end do
 
-      ! Allocate the memory for the prescribed data sets.
+                allocate (arr(k)%dataArr(nn), tmp(nn), stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("readBCDataArrays", &
+                     "Memory allocation failure for dataArr &
+                     &and tmp")
 
-      allocate(arr(nArr), stat=ierr)
-      if(ierr /= 0)                        &
-           call terminate("readBCDataArrays", &
-           "Memory allocation failure for arr")
+                ! Read the data, copy it from tmp into dataArr and
+                ! deallocate tmp again.
 
-      ! Initialize the units to si units.
+                call cg_array_read_as_f(k, realTypeCGNS, tmp, ierr)
+                if (ierr /= CG_OK) &
+                     call terminate("readBCDataArrays", &
+                     "Something wrong when calling &
+                     &cg_array_read_as_f")
 
-      do k=1,nArrays
-         arr(k)%mass  = Kilogram
-         arr(k)%len   = Meter
-         arr(k)%time  = Second
-         arr(k)%temp  = Kelvin
-         arr(k)%angle = Radian
-      enddo
+                arr(k)%dataArr = tmp
 
-      ! Check if this "main" node contains info about the units.
-      ! If so set the units of arr to these units.
+                deallocate (tmp, stat=ierr)
+                if (ierr /= 0) call terminate("loopDataArrays", &
+                                              "Deallocation failure for tmp")
 
-      globalUnits = .false.
-      call cg_units_read_f(mass, len, time, temp, angle, ierr)
-      if(ierr == error) &
-           call terminate("readBCDataArrays", &
-           "Something wrong when calling cg_units_read_f")
+                ! Go to data array node to find out if the dimensions are
+                ! specified here.
 
-      if(ierr == CG_OK) then
-         globalUnits = .true.
+                call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
+                               "ZoneBC_t", 1, "BC_t", i, "BCDataSet_t", j, &
+                               "BCData_t", DirNeu, "DataArray_t", k, "end")
+                if (ierr /= CG_OK) &
+                    call terminate("readBCDataArrays", &
+                                   "Something wrong when calling cg_goto_f")
 
-         do k=1,nArrays
-            arr(k)%mass  = mass
-            arr(k)%len   = len
-            arr(k)%time  = time
-            arr(k)%temp  = temp
-            arr(k)%angle = angle
-         enddo
-      endif
+                ! Try to read the units.
 
-      ! Loop over the number of data arrays.
+                call cg_units_read_f(mass, len, time, temp, angle, ierr)
+                if (ierr == error) &
+                     call terminate("readBCDataArrays", &
+                     "Something wrong when calling &
+                     &cg_units_read_f")
 
-      loopDataArrays: do k=1,nArrays
+                ! If the units are specified overwrite the currently
+                ! stored units .
 
-         ! Go to the main node of the data arrays.
+                if (ierr == CG_OK) then
 
-         call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
-              "ZoneBC_t", 1, "BC_t", i, "BCDataSet_t", j, &
-              "BCData_t", DirNeu, "end")
-         if(ierr /= CG_OK)                     &
-              call terminate("readBCDataArrays", &
-              "Something wrong when calling cg_goto_f")
+                    arr(k)%mass = mass
+                    arr(k)%len = len
+                    arr(k)%time = time
+                    arr(k)%temp = temp
+                    arr(k)%angle = angle
 
-         ! Determine the name and the dimensions of the array.
-
-         call cg_array_info_f(k, arr(k)%arrayName, dataType,     &
-              arr(k)%nDimensions, arr(k)%dataDim, &
-              ierr)
-         if(ierr /= CG_OK)                     &
-              call terminate("readBCDataArrays", &
-              "Something wrong when calling &
-              &cg_array_info_f")
-
-         ! Determine the total size of the data array and allocate
-         ! the memory for dataArr and tmp, which is used to read
-         ! the data.
-
-         nn = arr(k)%dataDim(1)
-         do l=2,arr(k)%nDimensions
-            nn = nn*arr(k)%dataDim(l)
-         enddo
-
-         allocate(arr(k)%dataArr(nn), tmp(nn), stat=ierr)
-         if(ierr /= 0)                          &
-              call terminate("readBCDataArrays", &
-              "Memory allocation failure for dataArr &
-              &and tmp")
-
-         ! Read the data, copy it from tmp into dataArr and
-         ! deallocate tmp again.
-
-         call cg_array_read_as_f(k, realTypeCGNS, tmp, ierr)
-         if(ierr /= CG_OK)                     &
-              call terminate("readBCDataArrays", &
-              "Something wrong when calling &
-              &cg_array_read_as_f")
-
-         arr(k)%dataArr = tmp
-
-         deallocate(tmp, stat=ierr)
-         if(ierr /= 0) call terminate("loopDataArrays", &
-              "Deallocation failure for tmp")
+                else if (.not. globalUnits) then
 
-         ! Go to data array node to find out if the dimensions are
-         ! specified here.
-
-         call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", nZone, &
-              "ZoneBC_t", 1, "BC_t", i, "BCDataSet_t", j, &
-              "BCData_t", DirNeu, "DataArray_t", k, "end")
-         if(ierr /= CG_OK)                     &
-              call terminate("readBCDataArrays", &
-              "Something wrong when calling cg_goto_f")
-
-         ! Try to read the units.
-
-         call cg_units_read_f(mass, len, time, temp, angle, ierr)
-         if(ierr == error) &
-              call terminate("readBCDataArrays", &
-              "Something wrong when calling &
-              &cg_units_read_f")
-
-         ! If the units are specified overwrite the currently
-         ! stored units .
-
-         if(ierr == CG_OK) then
-
-            arr(k)%mass  = mass
-            arr(k)%len   = len
-            arr(k)%time  = time
-            arr(k)%temp  = temp
-            arr(k)%angle = angle
-
-         else if(.not. globalUnits) then
-
-            ! No local and global units specified. Processor 0 prints
-            ! a warning.
+                    ! No local and global units specified. Processor 0 prints
+                    ! a warning.
 
-            if(myID == 0) then
+                    if (myID == 0) then
 
-               print "(a)", "#"
-               print "(a)", "#                      Warning"
-               print strings, "# Zone ", trim(cgnsDoms(nZone)%zoneName), &
-                 ", boundary subface ", trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName)
-               print strings, "# BC data set ", trim(arr(k)%arrayName), ": No units specified, assuming SI units"
-               print "(a)", "#"
-            endif
-         endif
-
-      enddo loopDataArrays
-
-    end subroutine readBCDataArrays
-
-
-  end subroutine readBocos
-
-  !      ==================================================================
-
-  logical function checkForDoubleBoundFace(nZone, nBound)
-    !
-    !       CheckForDoubleBoundFace checks whether the given boundary
-    !       range for the given zone has already been defined in the 1 to
-    !       1 block connectivities. If so .true. is returned, otherwise
-    !       .false.
-    !
-    use cgnsGrid
-    implicit none
-    !
-    !      Function arguments.
-    !
-    integer, intent(in) :: nZone, nBound
-    !
-    !      Local variables.
-    !
-    integer :: i
-    integer, dimension(3,2) :: rangeBound, rangeFace
-
-    ! Set the range for the boundary face.
-
-    rangeBound(1,1) = min(cgnsDoms(nZone)%bocoInfo(nBound)%iBeg, &
-         cgnsDoms(nZone)%bocoInfo(nBound)%iEnd)
-    rangeBound(1,2) = max(cgnsDoms(nZone)%bocoInfo(nBound)%iBeg, &
-         cgnsDoms(nZone)%bocoInfo(nBound)%iEnd)
-
-    rangeBound(2,1) = min(cgnsDoms(nZone)%bocoInfo(nBound)%jBeg, &
-         cgnsDoms(nZone)%bocoInfo(nBound)%jEnd)
-    rangeBound(2,2) = max(cgnsDoms(nZone)%bocoInfo(nBound)%jBeg, &
-         cgnsDoms(nZone)%bocoInfo(nBound)%jEnd)
-
-    rangeBound(3,1) = min(cgnsDoms(nZone)%bocoInfo(nBound)%kBeg, &
-         cgnsDoms(nZone)%bocoInfo(nBound)%kEnd)
-    rangeBound(3,2) = max(cgnsDoms(nZone)%bocoInfo(nBound)%kBeg, &
-         cgnsDoms(nZone)%bocoInfo(nBound)%kEnd)
-
-    ! Initialize checkForDoubleBoundFace to .false.
-
-    checkForDoubleBoundFace = .false.
+                        print "(a)", "#"
+                        print "(a)", "#                      Warning"
+                        print strings, "# Zone ", trim(cgnsDoms(nZone)%zoneName), &
+                            ", boundary subface ", trim(cgnsDoms(nZone)%bocoInfo(i)%bocoName)
+                      print strings, "# BC data set ", trim(arr(k)%arrayName), ": No units specified, assuming SI units"
+                        print "(a)", "#"
+                    end if
+                end if
 
-    ! Loop over the 1 to 1 block connectivities of this zone.
-
-    do i=1,cgnsDoms(nZone)%n1to1
-
-       ! Set the range for this subface.
+            end do loopDataArrays
+
+        end subroutine readBCDataArrays
+
+    end subroutine readBocos
+
+    !      ==================================================================
+
+    logical function checkForDoubleBoundFace(nZone, nBound)
+        !
+        !       CheckForDoubleBoundFace checks whether the given boundary
+        !       range for the given zone has already been defined in the 1 to
+        !       1 block connectivities. If so .true. is returned, otherwise
+        !       .false.
+        !
+        use cgnsGrid
+        implicit none
+        !
+        !      Function arguments.
+        !
+        integer, intent(in) :: nZone, nBound
+        !
+        !      Local variables.
+        !
+        integer :: i
+        integer, dimension(3, 2) :: rangeBound, rangeFace
+
+        ! Set the range for the boundary face.
+
+        rangeBound(1, 1) = min(cgnsDoms(nZone)%bocoInfo(nBound)%iBeg, &
+                               cgnsDoms(nZone)%bocoInfo(nBound)%iEnd)
+        rangeBound(1, 2) = max(cgnsDoms(nZone)%bocoInfo(nBound)%iBeg, &
+                               cgnsDoms(nZone)%bocoInfo(nBound)%iEnd)
+
+        rangeBound(2, 1) = min(cgnsDoms(nZone)%bocoInfo(nBound)%jBeg, &
+                               cgnsDoms(nZone)%bocoInfo(nBound)%jEnd)
+        rangeBound(2, 2) = max(cgnsDoms(nZone)%bocoInfo(nBound)%jBeg, &
+                               cgnsDoms(nZone)%bocoInfo(nBound)%jEnd)
+
+        rangeBound(3, 1) = min(cgnsDoms(nZone)%bocoInfo(nBound)%kBeg, &
+                               cgnsDoms(nZone)%bocoInfo(nBound)%kEnd)
+        rangeBound(3, 2) = max(cgnsDoms(nZone)%bocoInfo(nBound)%kBeg, &
+                               cgnsDoms(nZone)%bocoInfo(nBound)%kEnd)
+
+        ! Initialize checkForDoubleBoundFace to .false.
+
+        checkForDoubleBoundFace = .false.
+
+        ! Loop over the 1 to 1 block connectivities of this zone.
+
+        do i = 1, cgnsDoms(nZone)%n1to1
+
+            ! Set the range for this subface.
 
-       rangeFace(1,1) = min(cgnsDoms(nZone)%conn1to1(i)%iBeg, &
-            cgnsDoms(nZone)%conn1to1(i)%iEnd)
-       rangeFace(1,2) = max(cgnsDoms(nZone)%conn1to1(i)%iBeg, &
-            cgnsDoms(nZone)%conn1to1(i)%iEnd)
-
-       rangeFace(2,1) = min(cgnsDoms(nZone)%conn1to1(i)%jBeg, &
-            cgnsDoms(nZone)%conn1to1(i)%jEnd)
-       rangeFace(2,2) = max(cgnsDoms(nZone)%conn1to1(i)%jBeg, &
-            cgnsDoms(nZone)%conn1to1(i)%jEnd)
+            rangeFace(1, 1) = min(cgnsDoms(nZone)%conn1to1(i)%iBeg, &
+                                  cgnsDoms(nZone)%conn1to1(i)%iEnd)
+            rangeFace(1, 2) = max(cgnsDoms(nZone)%conn1to1(i)%iBeg, &
+                                  cgnsDoms(nZone)%conn1to1(i)%iEnd)
+
+            rangeFace(2, 1) = min(cgnsDoms(nZone)%conn1to1(i)%jBeg, &
+                                  cgnsDoms(nZone)%conn1to1(i)%jEnd)
+            rangeFace(2, 2) = max(cgnsDoms(nZone)%conn1to1(i)%jBeg, &
+                                  cgnsDoms(nZone)%conn1to1(i)%jEnd)
 
-       rangeFace(3,1) = min(cgnsDoms(nZone)%conn1to1(i)%kBeg, &
-            cgnsDoms(nZone)%conn1to1(i)%kEnd)
-       rangeFace(3,2) = max(cgnsDoms(nZone)%conn1to1(i)%kBeg, &
-            cgnsDoms(nZone)%conn1to1(i)%kEnd)
+            rangeFace(3, 1) = min(cgnsDoms(nZone)%conn1to1(i)%kBeg, &
+                                  cgnsDoms(nZone)%conn1to1(i)%kEnd)
+            rangeFace(3, 2) = max(cgnsDoms(nZone)%conn1to1(i)%kBeg, &
+                                  cgnsDoms(nZone)%conn1to1(i)%kEnd)
 
-       ! And do the check.
+            ! And do the check.
 
-       if(rangeBound(1,1) == rangeFace(1,1) .and. &
-            rangeBound(1,2) == rangeFace(1,2) .and. &
-            rangeBound(2,1) == rangeFace(2,1) .and. &
-            rangeBound(2,2) == rangeFace(2,2) .and. &
-            rangeBound(3,1) == rangeFace(3,1) .and. &
-            rangeBound(3,2) == rangeFace(3,2)) then
+            if (rangeBound(1, 1) == rangeFace(1, 1) .and. &
+                rangeBound(1, 2) == rangeFace(1, 2) .and. &
+                rangeBound(2, 1) == rangeFace(2, 1) .and. &
+                rangeBound(2, 2) == rangeFace(2, 2) .and. &
+                rangeBound(3, 1) == rangeFace(3, 1) .and. &
+                rangeBound(3, 2) == rangeFace(3, 2)) then
 
-          ! Faces are identical. Set checkForDoubleBoundFace to
-          ! .true. and exit the loop.
+                ! Faces are identical. Set checkForDoubleBoundFace to
+                ! .true. and exit the loop.
 
-          checkForDoubleBoundFace = .true.
-          exit
+                checkForDoubleBoundFace = .true.
+                exit
 
-       endif
+            end if
 
-    enddo
+        end do
 
-  end function checkForDoubleBoundFace
-  function internalBC(cgnsBocoType, userDefinedName)
-    !
-    !       internalBC determines the corresponding internally used
-    !       boundary condition type for the given CGNS boundary condition.
-    !       The flow equations to be solved are taken into account, e.g.
-    !       a viscous wall BC for the Euler equations is set to an
-    !       inviscid wall.
-    !
-    use constants
-    use su_cgns
-    use inputPhysics, only : equations, flowType
-    implicit none
-    !
-    !      Function type.
-    !
-    integer(kind=intType) :: internalBC
-    !
-    !      Function argument.
-    !
-    integer, intent(in) :: cgnsBocoType  ! Note integer and not
-    ! integer(intType).
-    ! Because of cgns.
+    end function checkForDoubleBoundFace
+    function internalBC(cgnsBocoType, userDefinedName)
+        !
+        !       internalBC determines the corresponding internally used
+        !       boundary condition type for the given CGNS boundary condition.
+        !       The flow equations to be solved are taken into account, e.g.
+        !       a viscous wall BC for the Euler equations is set to an
+        !       inviscid wall.
+        !
+        use constants
+        use su_cgns
+        use inputPhysics, only: equations, flowType
+        implicit none
+        !
+        !      Function type.
+        !
+        integer(kind=intType) :: internalBC
+        !
+        !      Function argument.
+        !
+        integer, intent(in) :: cgnsBocoType  ! Note integer and not
+        ! integer(intType).
+        ! Because of cgns.
 
-    character(len=maxCGNSNameLen), intent(in) :: userDefinedName
+        character(len=maxCGNSNameLen), intent(in) :: userDefinedName
 
-    ! Determine the CGNS boundary condition type and set
-    ! internalBC accordingly.
+        ! Determine the CGNS boundary condition type and set
+        ! internalBC accordingly.
 
-    select case (cgnsBocoType)
-    case (BCWallInviscid)
-       internalBC = EulerWall
+        select case (cgnsBocoType)
+        case (BCWallInviscid)
+            internalBC = EulerWall
 
-    case (BCWall, BCWallViscous, BCWallViscousHeatFlux)
-       internalBC = NSWallAdiabatic
-       if(equations == EulerEquations) internalBC = EulerWall
+        case (BCWall, BCWallViscous, BCWallViscousHeatFlux)
+            internalBC = NSWallAdiabatic
+            if (equations == EulerEquations) internalBC = EulerWall
 
-    case (BCWallViscousIsothermal)
-       internalBC = NSWallIsothermal
-       if(equations == EulerEquations) internalBC = EulerWall
+        case (BCWallViscousIsothermal)
+            internalBC = NSWallIsothermal
+            if (equations == EulerEquations) internalBC = EulerWall
 
-    case (BCSymmetryPlane)
-       internalBC = Symm
+        case (BCSymmetryPlane)
+            internalBC = Symm
 
-    case (BCSymmetryPolar)
-       internalBC = SymmPolar
+        case (BCSymmetryPolar)
+            internalBC = SymmPolar
 
-    case (BCExtrapolate, BCDegenerateLine, BCDegeneratePoint, &
-         BCAxisymmetricWedge)
-       internalBC = Extrap
+        case (BCExtrapolate, BCDegenerateLine, BCDegeneratePoint, &
+              BCAxisymmetricWedge)
+            internalBC = Extrap
 
-    case (BCFarfield, BCInflow, BCOutflow)
-       internalBC = FarField
+        case (BCFarfield, BCInflow, BCOutflow)
+            internalBC = FarField
 
-    case (BCInflowSubsonic)
-       internalBC = SubsonicInflow
+        case (BCInflowSubsonic)
+            internalBC = SubsonicInflow
 
-    case (BCInflowSupersonic)
-       internalBC = SupersonicInflow
+        case (BCInflowSupersonic)
+            internalBC = SupersonicInflow
 
-    case (BCOutflowSubsonic)
-       internalBC = SubsonicOutflow
+        case (BCOutflowSubsonic)
+            internalBC = SubsonicOutflow
 
-    case (BCOutflowSupersonic)
-       internalBC = SupersonicOutflow
+        case (BCOutflowSupersonic)
+            internalBC = SupersonicOutflow
 
-    case (BCTunnelInflow)
-       internalBC = SubsonicInflow
+        case (BCTunnelInflow)
+            internalBC = SubsonicInflow
 
-    case (BCTunnelOutflow)
-       internalBC = SubsonicOutflow
+        case (BCTunnelOutflow)
+            internalBC = SubsonicOutflow
 
-    case (UserDefined)
+        case (UserDefined)
 
-       ! Select the internal type base on the user defined name.
+            ! Select the internal type base on the user defined name.
 
-       select case (trim(adjustl(userDefinedName)))
+            select case (trim(adjustl(userDefinedName)))
 
-       case ("BCMassBleedInflow")
-          internalBC = MassBleedInflow
+            case ("BCMassBleedInflow")
+                internalBC = MassBleedInflow
 
-       case ("BCMassBleedOutflow")
-          internalBC = MassBleedOutflow
+            case ("BCMassBleedOutflow")
+                internalBC = MassBleedOutflow
 
-       case ("BCSlidingMesh")
-          internalBC = SlidingInterface
+            case ("BCSlidingMesh")
+                internalBC = SlidingInterface
 
-       case ("BCOverset")
-          internalBC = OversetOuterBound
+            case ("BCOverset")
+                internalBC = OversetOuterBound
 
-       case ("BCDomainInterfaceAll")
-          internalBC = DomainInterfaceAll
+            case ("BCDomainInterfaceAll")
+                internalBC = DomainInterfaceAll
 
-       case ("BCDomainInterfaceRhoUVW")
-          internalBC = DomainInterfaceRhoUVW
+            case ("BCDomainInterfaceRhoUVW")
+                internalBC = DomainInterfaceRhoUVW
 
-       case ("BCDomainInterfaceP")
-          internalBC = DomainInterfaceP
+            case ("BCDomainInterfaceP")
+                internalBC = DomainInterfaceP
 
-       case ("BCDomainInterfaceRho")
-          internalBC = DomainInterfaceRho
+            case ("BCDomainInterfaceRho")
+                internalBC = DomainInterfaceRho
 
-       case ("BCDomainInterfaceTotal")
-          internalBC = DomainInterfaceTotal
+            case ("BCDomainInterfaceTotal")
+                internalBC = DomainInterfaceTotal
 
-       case default
-          internalBC = bcNull
+            case default
+                internalBC = bcNull
 
-       end select
+            end select
 
-    case default
-       internalBC = bcNull
+        case default
+            internalBC = bcNull
 
-    end select
+        end select
 
-    ! Check whether the boundary conditions is allowed for the
-    ! type of flow problem to be solved.
+        ! Check whether the boundary conditions is allowed for the
+        ! type of flow problem to be solved.
 
-    select case (flowType)
+        select case (flowType)
 
-    case (internalFlow)
+        case (internalFlow)
 
-       ! Internal flow. Not allowed to specify a farfield
-       ! boundary condition.
+            ! Internal flow. Not allowed to specify a farfield
+            ! boundary condition.
 
-       if(internalBC == FarField) internalBC = bcNotValid
+            if (internalBC == FarField) internalBC = bcNotValid
 
-    end select
+        end select
 
-  end function internalBC
-  subroutine readPeriodicSubface(cgnsInd, cgnsBase, zone, conn,  &
-       connectName, periodic,          &
-       rotationCenter, rotationAngles, &
-       translation)
-    !
-    !       readPeriodicSubface reads the possible periodic info for the
-    !       given general subface connectivity.
-    !
-    use constants
-    use su_cgns
-    use cgnsGrid, only : cgnsDoms, cgnsNDom
-    use communication, only : adflow_comm_world, myid
-    use utils, only : siAngle, terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer,          intent(in) :: cgnsInd, cgnsBase, zone, conn
-    character(len=*), intent(in) :: connectName
+    end function internalBC
+    subroutine readPeriodicSubface(cgnsInd, cgnsBase, zone, conn, &
+                                   connectName, periodic, &
+                                   rotationCenter, rotationAngles, &
+                                   translation)
+        !
+        !       readPeriodicSubface reads the possible periodic info for the
+        !       given general subface connectivity.
+        !
+        use constants
+        use su_cgns
+        use cgnsGrid, only: cgnsDoms, cgnsNDom
+        use communication, only: adflow_comm_world, myid
+        use utils, only: siAngle, terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase, zone, conn
+        character(len=*), intent(in) :: connectName
 
-    logical, intent(out) :: periodic
+        logical, intent(out) :: periodic
 
-    real(kind=realType), dimension(3), intent(out) :: rotationCenter
-    real(kind=realType), dimension(3), intent(out) :: rotationAngles
-    real(kind=realType), dimension(3), intent(out) :: translation
+        real(kind=realType), dimension(3), intent(out) :: rotationCenter
+        real(kind=realType), dimension(3), intent(out) :: rotationAngles
+        real(kind=realType), dimension(3), intent(out) :: translation
 
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    integer :: jj
-    integer :: mass, len, time, temp, angle
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer :: jj
+        integer :: mass, len, time, temp, angle
 
-    real(kind=realType), dimension(3) :: rotCenter, rotAngles
-    real(kind=realType), dimension(3) :: tlation
+        real(kind=realType), dimension(3) :: rotCenter, rotAngles
+        real(kind=realType), dimension(3) :: tlation
 
-    real(kind=realType) :: mult, trans
+        real(kind=realType) :: mult, trans
 
-    ! Check if this is a periodic boundary.
+        ! Check if this is a periodic boundary.
 
-    call cg_conn_periodic_read_f(cgnsInd, cgnsBase, zone, conn, &
-         real(rotCenter,cgnsPerType), real(rotAngles,cgnsPerType), real(tlation,cgnsPerType), ierr)
+        call cg_conn_periodic_read_f(cgnsInd, cgnsBase, zone, conn, &
+                           real(rotCenter, cgnsPerType), real(rotAngles, cgnsPerType), real(tlation, cgnsPerType), ierr)
 
-    testPeriodic: if(ierr == CG_OK) then
+        testPeriodic: if (ierr == CG_OK) then
 
-       ! Subface is a periodic boundary. Check if the unit for
-       ! the rotation angles is specified.
+            ! Subface is a periodic boundary. Check if the unit for
+            ! the rotation angles is specified.
 
-       call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", zone, &
-            "ZoneGridConnectivity_t", 1,             &
-            "GridConnectivity_t", conn,              &
-            "GridConnectivityProperty_t", 1,         &
-            "Periodic_t", 1, "DataArray_t", 2, "end")
-       if(ierr /= CG_OK)                      &
-            call terminate("readPeriodicSubface", &
-            "Something wrong when calling cg_goto_f")
+            call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", zone, &
+                           "ZoneGridConnectivity_t", 1, &
+                           "GridConnectivity_t", conn, &
+                           "GridConnectivityProperty_t", 1, &
+                           "Periodic_t", 1, "DataArray_t", 2, "end")
+            if (ierr /= CG_OK) &
+                call terminate("readPeriodicSubface", &
+                               "Something wrong when calling cg_goto_f")
 
-       call cg_units_read_f(mass, len, time, temp, angle, ierr)
-       if(ierr == error)                   &
-            call terminate("readPeriodicSubface", &
-            "Something wrong when calling cg_units_read_f")
+            call cg_units_read_f(mass, len, time, temp, angle, ierr)
+            if (ierr == error) &
+                call terminate("readPeriodicSubface", &
+                               "Something wrong when calling cg_units_read_f")
 
-       ! Check if the angle dimensions were specified.
+            ! Check if the angle dimensions were specified.
 
-       if(ierr == CG_OK .and. angle /= Null) then
+            if (ierr == CG_OK .and. angle /= Null) then
 
-          ! Determine the conversion factor to radians.
+                ! Determine the conversion factor to radians.
 
-          call siAngle(angle, mult, trans)
+                call siAngle(angle, mult, trans)
 
-       else
+            else
 
-          ! Angle units not specified. Assume radians.
-          ! Processor 0 writes a warning to stdout.
+                ! Angle units not specified. Assume radians.
+                ! Processor 0 writes a warning to stdout.
 
-          if(myID == 0) then
+                if (myID == 0) then
 
-             print "(a)", "#"
-             print "(a)", "#                      Warning"
-             print strings, "# Zone ", trim(cgnsDoms(zone)%zonename), &
-               ", General connectivity ", trim(connectName), &
-               ": No unit specified for periodic angles, assuming radians."
-             print "(a)", "#"
+                    print "(a)", "#"
+                    print "(a)", "#                      Warning"
+                    print strings, "# Zone ", trim(cgnsDoms(zone)%zonename), &
+                        ", General connectivity ", trim(connectName), &
+                        ": No unit specified for periodic angles, assuming radians."
+                    print "(a)", "#"
 
-          endif
+                end if
 
-          ! Set mult to one.
+                ! Set mult to one.
 
-          mult = one
+                mult = one
 
-       endif
+            end if
 
-       ! Store the info. Convert the rotation center and the
-       ! translation vector to meters.
+            ! Store the info. Convert the rotation center and the
+            ! translation vector to meters.
 
-       periodic = .true.
+            periodic = .true.
 
-       rotationCenter = rotCenter*cgnsDoms(zone)%LRef
-       rotationAngles = rotAngles*mult
-       translation    = tlation*cgnsDoms(zone)%LRef
+            rotationCenter = rotCenter * cgnsDoms(zone)%LRef
+            rotationAngles = rotAngles * mult
+            translation = tlation * cgnsDoms(zone)%LRef
 
-       ! Make sure that the rotation angles are such that it
-       ! corresponds to an integer value of the number of
-       ! sections per wheel.
+            ! Make sure that the rotation angles are such that it
+            ! corresponds to an integer value of the number of
+            ! sections per wheel.
 
-       mult = sqrt(rotationAngles(1)**2 &
-            +      rotationAngles(2)**2 &
-            +      rotationAngles(3)**2)
+            mult = sqrt(rotationAngles(1)**2 &
+                        + rotationAngles(2)**2 &
+                        + rotationAngles(3)**2)
 
-       if(mult > eps) then
+            if (mult > eps) then
 
-          ! Nonzero angle specified. Determine the number of
-          ! sections for the full wheel, which is an integer.
+                ! Nonzero angle specified. Determine the number of
+                ! sections for the full wheel, which is an integer.
 
-          jj = nint(two*pi/mult)
+                jj = nint(two * pi / mult)
 
-          ! Store the correction factor for the angles in
-          ! mult and correct the periodic angles accordingly.
+                ! Store the correction factor for the angles in
+                ! mult and correct the periodic angles accordingly.
 
-          mult = two*pi/(jj*mult)
+                mult = two * pi / (jj * mult)
 
-          rotationAngles = mult*rotationAngles
+                rotationAngles = mult * rotationAngles
 
-       endif
+            end if
 
-    else testPeriodic
+            else testPeriodic
 
-       ! Subface is a normal boundary. Set periodic to .false. and
-       ! initialize the periodic data to zero to avoid possible
-       ! problems due to uninitialized data.
+            ! Subface is a normal boundary. Set periodic to .false. and
+            ! initialize the periodic data to zero to avoid possible
+            ! problems due to uninitialized data.
 
-       periodic = .false.
+            periodic = .false.
 
-       rotationCenter = zero
-       rotationAngles = zero
-       translation    = zero
+            rotationCenter = zero
+            rotationAngles = zero
+            translation = zero
 
-    endif testPeriodic
+        end if testPeriodic
 
-  end subroutine readPeriodicSubface
+    end subroutine readPeriodicSubface
 
-  subroutine readPeriodicSubface1to1(cgnsInd, cgnsBase, zone, conn,  &
-       connectName, periodic,          &
-       rotationCenter, rotationAngles, &
-       translation)
-    !
-    !       readPeriodicSubface reads the possible periodic info for the
-    !       given general subface connectivity.
-    !
-    use constants
-    use su_cgns
-    use cgnsGrid, only : cgnsDoms, cgnsNDom
-    use communication, only : adflow_comm_world, myid
-    use utils, only : siAngle, terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer,          intent(in) :: cgnsInd, cgnsBase, zone, conn
-    character(len=*), intent(in) :: connectName
+    subroutine readPeriodicSubface1to1(cgnsInd, cgnsBase, zone, conn, &
+                                       connectName, periodic, &
+                                       rotationCenter, rotationAngles, &
+                                       translation)
+        !
+        !       readPeriodicSubface reads the possible periodic info for the
+        !       given general subface connectivity.
+        !
+        use constants
+        use su_cgns
+        use cgnsGrid, only: cgnsDoms, cgnsNDom
+        use communication, only: adflow_comm_world, myid
+        use utils, only: siAngle, terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer, intent(in) :: cgnsInd, cgnsBase, zone, conn
+        character(len=*), intent(in) :: connectName
 
-    logical, intent(out) :: periodic
+        logical, intent(out) :: periodic
 
-    real(kind=realType), dimension(3), intent(out) :: rotationCenter
-    real(kind=realType), dimension(3), intent(out) :: rotationAngles
-    real(kind=realType), dimension(3), intent(out) :: translation
+        real(kind=realType), dimension(3), intent(out) :: rotationCenter
+        real(kind=realType), dimension(3), intent(out) :: rotationAngles
+        real(kind=realType), dimension(3), intent(out) :: translation
 
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    integer :: jj
-    integer :: mass, len, time, temp, angle
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer :: jj
+        integer :: mass, len, time, temp, angle
 
-    real(kind=realType), dimension(3) :: rotCenter, rotAngles
-    real(kind=realType), dimension(3) :: tlation
+        real(kind=realType), dimension(3) :: rotCenter, rotAngles
+        real(kind=realType), dimension(3) :: tlation
 
-    real(kind=realType) :: mult, trans
+        real(kind=realType) :: mult, trans
 
-    ! Check if this is a periodic boundary.
-    call cg_1to1_periodic_read_f(cgnsInd, cgnsBase, zone, conn, &
-         real(rotCenter,cgnsPerType), real(rotAngles,cgnsPerType), real(tlation,cgnsPerType), ierr)
+        ! Check if this is a periodic boundary.
+        call cg_1to1_periodic_read_f(cgnsInd, cgnsBase, zone, conn, &
+                           real(rotCenter, cgnsPerType), real(rotAngles, cgnsPerType), real(tlation, cgnsPerType), ierr)
 
-    testPeriodic: if(ierr == CG_OK) then
+        testPeriodic: if (ierr == CG_OK) then
 
-       ! Subface is a periodic boundary. Check if the unit for
-       ! the rotation angles is specified.
+            ! Subface is a periodic boundary. Check if the unit for
+            ! the rotation angles is specified.
 
-       call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", zone, &
-            "ZoneGridConnectivity_t", 1,             &
-            "GridConnectivity1to1_t", conn,              &
-            "GridConnectivityProperty_t", 1,         &
-            "Periodic_t", 3, "DataArray_t", 3, "end")
-       if(ierr /= CG_OK)                      &
-            call terminate("readPeriodicSubface1to1", &
-            "Something wrong when calling cg_goto_f")
+            call cg_goto_f(cgnsInd, cgnsBase, ierr, "Zone_t", zone, &
+                           "ZoneGridConnectivity_t", 1, &
+                           "GridConnectivity1to1_t", conn, &
+                           "GridConnectivityProperty_t", 1, &
+                           "Periodic_t", 3, "DataArray_t", 3, "end")
+            if (ierr /= CG_OK) &
+                call terminate("readPeriodicSubface1to1", &
+                               "Something wrong when calling cg_goto_f")
 
-       call cg_units_read_f(mass, len, time, temp, angle, ierr)
-       if(ierr == error)                   &
-            call terminate("readPeriodicSubface1to1", &
-            "Something wrong when calling cg_units_read_f")
+            call cg_units_read_f(mass, len, time, temp, angle, ierr)
+            if (ierr == error) &
+                call terminate("readPeriodicSubface1to1", &
+                               "Something wrong when calling cg_units_read_f")
 
-       ! Check if the angle dimensions were specified.
+            ! Check if the angle dimensions were specified.
 
-       if(ierr == CG_OK .and. angle /= Null) then
+            if (ierr == CG_OK .and. angle /= Null) then
 
-          ! Determine the conversion factor to radians.
+                ! Determine the conversion factor to radians.
 
-          call siAngle(angle, mult, trans)
+                call siAngle(angle, mult, trans)
 
-       else
+            else
 
-          ! Angle units not specified. Assume radians.
-          ! Processor 0 writes a warning to stdout.
+                ! Angle units not specified. Assume radians.
+                ! Processor 0 writes a warning to stdout.
 
-          if(myID == 0) then
+                if (myID == 0) then
 
-             print "(a)", "#"
-             print "(a)", "#                      Warning"
-             print strings, "# Zone ", trim(cgnsDoms(zone)%zonename), ", 1to1 connectivity ", trim(connectName), &
-               ": No unit specified for periodic angles, assuming radians."
-             print "(a)", "#"
+                    print "(a)", "#"
+                    print "(a)", "#                      Warning"
+                   print strings, "# Zone ", trim(cgnsDoms(zone)%zonename), ", 1to1 connectivity ", trim(connectName), &
+                        ": No unit specified for periodic angles, assuming radians."
+                    print "(a)", "#"
 
-          endif
+                end if
 
-          ! Set mult to one.
+                ! Set mult to one.
 
-          mult = one
+                mult = one
 
-       endif
+            end if
 
-       ! Store the info. Convert the rotation center and the
-       ! translation vector to meters.
+            ! Store the info. Convert the rotation center and the
+            ! translation vector to meters.
 
-       periodic = .true.
+            periodic = .true.
 
-       rotationCenter = rotCenter*cgnsDoms(zone)%LRef
-       rotationAngles = rotAngles*mult
-       translation    = tlation*cgnsDoms(zone)%LRef
+            rotationCenter = rotCenter * cgnsDoms(zone)%LRef
+            rotationAngles = rotAngles * mult
+            translation = tlation * cgnsDoms(zone)%LRef
 
-       ! Make sure that the rotation angles are such that it
-       ! corresponds to an integer value of the number of
-       ! sections per wheel.
+            ! Make sure that the rotation angles are such that it
+            ! corresponds to an integer value of the number of
+            ! sections per wheel.
 
-       mult = sqrt(rotationAngles(1)**2 &
-            +      rotationAngles(2)**2 &
-            +      rotationAngles(3)**2)
+            mult = sqrt(rotationAngles(1)**2 &
+                        + rotationAngles(2)**2 &
+                        + rotationAngles(3)**2)
 
-       if(mult > eps) then
+            if (mult > eps) then
 
-          ! Nonzero angle specified. Determine the number of
-          ! sections for the full wheel, which is an integer.
+                ! Nonzero angle specified. Determine the number of
+                ! sections for the full wheel, which is an integer.
 
-          jj = nint(two*pi/mult)
+                jj = nint(two * pi / mult)
 
-          ! Store the correction factor for the angles in
-          ! mult and correct the periodic angles accordingly.
+                ! Store the correction factor for the angles in
+                ! mult and correct the periodic angles accordingly.
 
-          mult = two*pi/(jj*mult)
+                mult = two * pi / (jj * mult)
 
-          rotationAngles = mult*rotationAngles
+                rotationAngles = mult * rotationAngles
 
-       endif
+            end if
 
-    else testPeriodic
+            else testPeriodic
 
-       ! Subface is a normal boundary. Set periodic to .false. and
-       ! initialize the periodic data to zero to avoid possible
-       ! problems due to uninitialized data.
+            ! Subface is a normal boundary. Set periodic to .false. and
+            ! initialize the periodic data to zero to avoid possible
+            ! problems due to uninitialized data.
 
-       periodic = .false.
+            periodic = .false.
 
-       rotationCenter = zero
-       rotationAngles = zero
-       translation    = zero
+            rotationCenter = zero
+            rotationAngles = zero
+            translation = zero
 
-    endif testPeriodic
+        end if testPeriodic
 
-  end subroutine readPeriodicSubface1to1
+    end subroutine readPeriodicSubface1to1
 
-  subroutine readGrid
-    !
-    !       readGrid reads the coordinates for the blocks or block parts
-    !       to be stored on this processor.
-    !
-    use constants
-    use cgnsNames
-    use su_cgns
-    use block, only : flowDoms, nDom
-    use cgnsGrid, only : cgnsDoms, cgnsNDom
-    use communication, only : adflow_comm_world, myID
-    use flowVarRefState, only : LRef
-    use inputIO, only : writeCoorMeter
-    use IOModule, only : IOVar
-    use partitionMod, only : nGridsRead, fileIDs, gridFiles
-    use utils, only: setCGNSRealType, terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: cgnsInd, cgnsBase, cgnsZone
-    integer :: j, nCoords
-    integer :: ierr, realTypeCGNS, datatype
+    subroutine readGrid
+        !
+        !       readGrid reads the coordinates for the blocks or block parts
+        !       to be stored on this processor.
+        !
+        use constants
+        use cgnsNames
+        use su_cgns
+        use block, only: flowDoms, nDom
+        use cgnsGrid, only: cgnsDoms, cgnsNDom
+        use communication, only: adflow_comm_world, myID
+        use flowVarRefState, only: LRef
+        use inputIO, only: writeCoorMeter
+        use IOModule, only: IOVar
+        use partitionMod, only: nGridsRead, fileIDs, gridFiles
+        use utils, only: setCGNSRealType, terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: cgnsInd, cgnsBase, cgnsZone
+        integer :: j, nCoords
+        integer :: ierr, realTypeCGNS, datatype
 
-    integer(kind=cgsize_t), dimension(3) :: rangeMin, rangeMax
+        integer(kind=cgsize_t), dimension(3) :: rangeMin, rangeMax
 
-    integer(kind=intType) :: i, ii, jj, kk, ll, nn
-    integer(kind=intType) :: il, jl, kl
-    integer(kind=intType) :: typeMismatch
+        integer(kind=intType) :: i, ii, jj, kk, ll, nn
+        integer(kind=intType) :: il, jl, kl
+        integer(kind=intType) :: typeMismatch
 
-    character(len=7)              :: int1String
-    character(len=2*maxStringLen) :: errorMessage
-    character(len=maxCGNSNameLen) :: coordname
-    character(len=maxCGNSNameLen) :: coordFormat
+        character(len=7) :: int1String
+        character(len=2*maxStringLen) :: errorMessage
+        character(len=maxCGNSNameLen) :: coordname
+        character(len=maxCGNSNameLen) :: coordFormat
 
-    real(kind=cgnsRealType), allocatable, dimension(:,:,:) :: buffer
+        real(kind=cgnsRealType), allocatable, dimension(:, :, :) :: buffer
 
-    ! Set the cgns real type and initialize typeMismatch to 0.
-    ! Set cgnsBase to 1, because we will always read from base 1;
-    ! possible higher bases are ignored.
+        ! Set the cgns real type and initialize typeMismatch to 0.
+        ! Set cgnsBase to 1, because we will always read from base 1;
+        ! possible higher bases are ignored.
 
-    realTypeCGNS = setCGNSRealType()
-    typeMismatch = 0
-    cgnsBase     = 1
+        realTypeCGNS = setCGNSRealType()
+        typeMismatch = 0
+        cgnsBase = 1
 
-    ! Loop over the number of blocks stored on this processor.
+        ! Loop over the number of blocks stored on this processor.
 
-    domainLoop: do i=1,nDom
+        domainLoop: do i = 1, nDom
 
-       ! Abbreviate the nodal block dimensions.
+            ! Abbreviate the nodal block dimensions.
 
-       il = flowDoms(i,1,1)%il
-       jl = flowDoms(i,1,1)%jl
-       kl = flowDoms(i,1,1)%kl
+            il = flowDoms(i, 1, 1)%il
+            jl = flowDoms(i, 1, 1)%jl
+            kl = flowDoms(i, 1, 1)%kl
 
-       ! Store the zone number a bit easier and set the range
-       ! for reading the coordinates.
+            ! Store the zone number a bit easier and set the range
+            ! for reading the coordinates.
 
-       cgnsZone = flowDoms(i,1,1)%cgnsBlockID
+            cgnsZone = flowDoms(i, 1, 1)%cgnsBlockID
 
-       rangeMin(1) = flowDoms(i,1,1)%iBegor
-       rangeMin(2) = flowDoms(i,1,1)%jBegor
-       rangeMin(3) = flowDoms(i,1,1)%kBegor
+            rangeMin(1) = flowDoms(i, 1, 1)%iBegor
+            rangeMin(2) = flowDoms(i, 1, 1)%jBegor
+            rangeMin(3) = flowDoms(i, 1, 1)%kBegor
 
-       rangeMax(1) = flowDoms(i,1,1)%iEndor
-       rangeMax(2) = flowDoms(i,1,1)%jEndor
-       rangeMax(3) = flowDoms(i,1,1)%kEndor
+            rangeMax(1) = flowDoms(i, 1, 1)%iEndor
+            rangeMax(2) = flowDoms(i, 1, 1)%jEndor
+            rangeMax(3) = flowDoms(i, 1, 1)%kEndor
 
-       ! Allocate the memory for the read buffer.
+            ! Allocate the memory for the read buffer.
 
-       allocate(buffer(il,jl,kl), stat=ierr)
-       if(ierr /= 0)                &
-            call terminate("readGrid", &
-            "Memory allocation error for buffer")
+            allocate (buffer(il, jl, kl), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("readGrid", &
+                               "Memory allocation error for buffer")
 
-       ! Loop over the number of grids to be read.
+            ! Loop over the number of grids to be read.
 
-       nGridLoop: do nn=1,nGridsRead
+            nGridLoop: do nn = 1, nGridsRead
 
-          ! Store the file index a bit easier.
+                ! Store the file index a bit easier.
 
-          cgnsInd = fileIDs(nn)
+                cgnsInd = fileIDs(nn)
 
-          ! Determine the number of coordinates in this zone.
+                ! Determine the number of coordinates in this zone.
 
-          call cg_ncoords_f(cgnsInd, cgnsBase, cgnsZone, &
-               nCoords, ierr)
-          if(ierr /= CG_OK)           &
-               call terminate("readGrid", &
-               "Something wrong when calling cg_ncoords_f")
+                call cg_ncoords_f(cgnsInd, cgnsBase, cgnsZone, &
+                                  nCoords, ierr)
+                if (ierr /= CG_OK) &
+                    call terminate("readGrid", &
+                                   "Something wrong when calling cg_ncoords_f")
 
-          ! The coordinates are only read if 3 coordinates are present.
+                ! The coordinates are only read if 3 coordinates are present.
 
-          checkNcoords: if(nCoords == 3) then
+                checkNcoords: if (nCoords == 3) then
 
-             ! Loop over the number of coordinates. Note that the counter j
-             ! is an integer. This is for compatibility with cgns.
+                    ! Loop over the number of coordinates. Note that the counter j
+                    ! is an integer. This is for compatibility with cgns.
 
-             coords: do j=1,nCoords
+                    coords: do j = 1, nCoords
 
-                ! Get the info for this coordinate.
+                        ! Get the info for this coordinate.
 
-                call cg_coord_info_f(cgnsInd, cgnsBase, cgnsZone, j, &
-                     datatype, coordname, ierr)
-                if(ierr /= CG_OK)           &
-                     call terminate("readGrid", &
-                     "Something wrong when calling &
-                     &cg_coord_info_f")
-
-                ! Update the value of typeMismatch if the datatype of
-                ! the coordinate is not equal to the datatype used in
-                ! the solver.
-
-                if(realTypeCGNS /= datatype) &
-                     typeMismatch = typeMismatch + 1
-
-                ! Set the value of the counter ll, depending on the name.
-                ! Normally the x-coordinate is written first, followed by
-                ! the y-coordinate and finally the z-coordinate. But you
-                ! never know.
-
-                select case(coordname)
-                case (cgnsCoorX)
-                   ll = 1
-                case (cgnsCoorY)
-                   ll = 2
-                case (cgnsCoorZ)
-                   ll = 3
-                case default
-                   write(errorMessage, strings) "Zone ", trim(cgnsDoms(cgnsZone)%zoneName), &
-                     " :Unknown coordinate name, ", trim(coordname), ", in grid file"
-                   call terminate("readGrid", errorMessage)
-                end select
+                        call cg_coord_info_f(cgnsInd, cgnsBase, cgnsZone, j, &
+                                             datatype, coordname, ierr)
+                        if (ierr /= CG_OK) &
+                             call terminate("readGrid", &
+                             "Something wrong when calling &
+                             &cg_coord_info_f")
+
+                        ! Update the value of typeMismatch if the datatype of
+                        ! the coordinate is not equal to the datatype used in
+                        ! the solver.
+
+                        if (realTypeCGNS /= datatype) &
+                            typeMismatch = typeMismatch + 1
 
-                ! Read the coordinates.
+                        ! Set the value of the counter ll, depending on the name.
+                        ! Normally the x-coordinate is written first, followed by
+                        ! the y-coordinate and finally the z-coordinate. But you
+                        ! never know.
 
-                call cg_coord_read_f(cgnsInd, cgnsBase, cgnsZone, &
-                     coordname, realTypeCGNS,     &
-                     rangeMin, rangeMax, buffer, ierr)
-                if(ierr /= CG_OK)           &
-                     call terminate("readGrid", &
-                     "Something wrong when calling cg_coord_read_f")
+                        select case (coordname)
+                        case (cgnsCoorX)
+                            ll = 1
+                        case (cgnsCoorY)
+                            ll = 2
+                        case (cgnsCoorZ)
+                            ll = 3
+                        case default
+                            write (errorMessage, strings) "Zone ", trim(cgnsDoms(cgnsZone)%zoneName), &
+                                " :Unknown coordinate name, ", trim(coordname), ", in grid file"
+                            call terminate("readGrid", errorMessage)
+                        end select
 
-                ! Copy the data into IOVar and scale it to meters.
-                ! The is effectivly copying into the variable x. w is just temporary through IOVar
+                        ! Read the coordinates.
 
-                do kk=1,kl
-                   do jj=1,jl
-                      do ii=1,il
-                         IOVar(i,nn)%w(ii,jj,kk,ll) = buffer(ii,jj,kk) &
-                              * cgnsDoms(cgnsZone)%LRef
-                         !print *,'iovar',IOVar(i,nn)%w(ii,jj,kk,ll),flowdoms(nn,1,1)%x(ii,jj,kk,ll),i,nn
-                      enddo
-                   enddo
-                enddo
+                        call cg_coord_read_f(cgnsInd, cgnsBase, cgnsZone, &
+                                             coordname, realTypeCGNS, &
+                                             rangeMin, rangeMax, buffer, ierr)
+                        if (ierr /= CG_OK) &
+                            call terminate("readGrid", &
+                                           "Something wrong when calling cg_coord_read_f")
 
-             enddo coords
+                        ! Copy the data into IOVar and scale it to meters.
+                        ! The is effectivly copying into the variable x. w is just temporary through IOVar
 
-          else checkNcoords
+                        do kk = 1, kl
+                            do jj = 1, jl
+                                do ii = 1, il
+                                    IOVar(i, nn)%w(ii, jj, kk, ll) = buffer(ii, jj, kk) &
+                                                                     * cgnsDoms(cgnsZone)%LRef
+                                    !print *,'iovar',IOVar(i,nn)%w(ii,jj,kk,ll),flowdoms(nn,1,1)%x(ii,jj,kk,ll),i,nn
+                                end do
+                            end do
+                        end do
 
-             ! There are not three coordinates present in this base.
-             ! An error message is printed and an exit is made.
+                    end do coords
 
-             coordFormat = "(5(A), I1)"
+                    else checkNcoords
 
-             write(errorMessage, coordFormat) "File ", trim(gridFiles(nn)), &
-               ": The number of coordinates of zone ", trim(cgnsDoms(cgnsZone)%zoneName), &
-               " should be 3, not ", nCoords
+                    ! There are not three coordinates present in this base.
+                    ! An error message is printed and an exit is made.
 
-             call terminate("readGrid", errorMessage)
+                    coordFormat = "(5(A), I1)"
 
-          endif checkNcoords
+                    write (errorMessage, coordFormat) "File ", trim(gridFiles(nn)), &
+                        ": The number of coordinates of zone ", trim(cgnsDoms(cgnsZone)%zoneName), &
+                        " should be 3, not ", nCoords
 
-       enddo nGridLoop
+                    call terminate("readGrid", errorMessage)
 
-       ! Release the memory of buffer.
+                end if checkNcoords
 
-       deallocate(buffer, stat=ierr)
-       if(ierr /= 0) call terminate("readGrid", &
-            "Deallocation error for buffer")
-    enddo domainLoop
+            end do nGridLoop
 
-    ! Close the cgns files.
+            ! Release the memory of buffer.
 
-    do nn=1,nGridsRead
-       call cg_close_f(fileIDs(nn), ierr)
-       if(ierr /= CG_OK)           &
-            call terminate("readGrid", &
-            "Something wrong when calling cg_close_f")
-    enddo
+            deallocate (buffer, stat=ierr)
+            if (ierr /= 0) call terminate("readGrid", &
+                                          "Deallocation error for buffer")
+        end do domainLoop
 
-    ! Determine the global sum of typeMismatch; the result only
-    ! needs to be known on processor 0. Use ii as the global buffer
-    ! to store the result. If a type mismatch occured,
-    ! print a warning.
+        ! Close the cgns files.
 
-    call mpi_reduce(typeMismatch, ii, 1, adflow_integer, &
-         mpi_sum, 0, ADflow_comm_world, ierr)
-    if(myID == 0 .and. ii > 0) then
+        do nn = 1, nGridsRead
+            call cg_close_f(fileIDs(nn), ierr)
+            if (ierr /= CG_OK) &
+                call terminate("readGrid", &
+                               "Something wrong when calling cg_close_f")
+        end do
 
-       write(int1String,"(i6)") ii
-       int1String = adjustl(int1String)
+        ! Determine the global sum of typeMismatch; the result only
+        ! needs to be known on processor 0. Use ii as the global buffer
+        ! to store the result. If a type mismatch occured,
+        ! print a warning.
 
-       print "(a)", "#"
-       print "(a)", "#                      Warning"
-       print strings, "# ", trim(int1String)," type mismatches occured when reading the coordinates of the blocks"
-       print "(a)", "#"
+        call mpi_reduce(typeMismatch, ii, 1, adflow_integer, &
+                        mpi_sum, 0, ADflow_comm_world, ierr)
+        if (myID == 0 .and. ii > 0) then
 
-    endif
+            write (int1String, "(i6)") ii
+            int1String = adjustl(int1String)
+
+            print "(a)", "#"
+            print "(a)", "#                      Warning"
+            print strings, "# ", trim(int1String), " type mismatches occured when reading the coordinates of the blocks"
+            print "(a)", "#"
 
-    ! If the coordinates in the solution files must be written in
-    ! meters, correct this info for all cgns blocks.
+        end if
+
+        ! If the coordinates in the solution files must be written in
+        ! meters, correct this info for all cgns blocks.
 
-    if( writeCoorMeter ) then
-       do i=1,cgnsNDom
-          cgnsDoms(i)%mass  = Null
-          cgnsDoms(i)%len   = Meter
-          cgnsDoms(i)%time  = Null
-          cgnsDoms(i)%temp  = Null
-          cgnsDoms(i)%angle = Null
+        if (writeCoorMeter) then
+            do i = 1, cgnsNDom
+                cgnsDoms(i)%mass = Null
+                cgnsDoms(i)%len = Meter
+                cgnsDoms(i)%time = Null
+                cgnsDoms(i)%temp = Null
+                cgnsDoms(i)%angle = Null
 
-          cgnsDoms(i)%gridUnitsSpecified = .true.
-          cgnsDoms(i)%LRef = one
-       enddo
+                cgnsDoms(i)%gridUnitsSpecified = .true.
+                cgnsDoms(i)%LRef = one
+            end do
 
-       LRef = one
-    endif
+            LRef = one
+        end if
 
-  end subroutine readGrid
+    end subroutine readGrid
 
 end module readCGNSGrid
diff --git a/src/preprocessing/coarseUtils.F90 b/src/preprocessing/coarseUtils.F90
index 7ce7e77d2..6952a1ba3 100644
--- a/src/preprocessing/coarseUtils.F90
+++ b/src/preprocessing/coarseUtils.F90
@@ -2,1546 +2,1544 @@ module coarseUtils
 
 contains
 
+    subroutine createCoarseBlocks(level)
+        !
+        !       createCoarseBlocks creates the block data structure for the
+        !       given coarse grid from the 1 level finer grid. Only direct
+        !       info is created, like owned coordinates, block sizes and
+        !       subface info. Indirect info, like face normals, volumes, wall
+        !       distances, etc. Are created later on. That info can be created
+        !       independent of the finer grid.
+        !
+        use constants
+        use block
+        use inputTimeSpectral
+        use coarse1to1Subface
+        use coarseningInfo
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, j, k, ii, jj, kk, n1to1
+        integer(kind=intType) :: nn, mm, iil, jjl, kkl, il, jl, kl
+        integer(kind=intType) :: iBeg, jBeg, kBeg, iEnd, jEnd, kEnd
+        integer(kind=intType) :: l1, L2, l3
+        integer(kind=intType) :: levm1, donorOffset, fact
+
+        integer(kind=intType), dimension(:), allocatable :: imap, iimap
+        integer(kind=intType), dimension(:), allocatable :: jmap, jjmap
+        integer(kind=intType), dimension(:), allocatable :: kmap, kkmap
+
+        integer(kind=intType), dimension(:), pointer :: dfine
+
+        logical, dimension(:), pointer :: iCo, jCo, kCo
+
+        ! Store the finer grid level in levm1
+
+        levm1 = level - 1
+
+        ! Determine the total number of 1 to 1 block faces on this
+        ! processor for the fine grid level.
+
+        nSubface1to1 = 0
+        do nn = 1, nDom
+            nSubface1to1 = nSubface1to1 + flowDoms(nn, levm1, 1)%n1to1
+        end do
+
+        ! Allocate the memory for subface1to1 and coarseInfo.
+
+        allocate (subface1to1(nSubface1to1), coarseInfo(nDom), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("createCoarseBlocks", &
+             "Memory allocation failure for subface1to1 &
+             &and coarseInfo")
+
+        ! Loop over the number of blocks on this processor.
+
+        n1to1 = 0
+        domains: do nn = 1, nDom
+
+            ! If levm1 is 1, i.e. the finest grid, set the coarsenings to
+            ! regular, as this is a regular grid.
 
+            if (levm1 == 1) then
+                flowDoms(nn, levm1, 1)%iCoarsened = regular
+                flowDoms(nn, levm1, 1)%jCoarsened = regular
+                flowDoms(nn, levm1, 1)%kCoarsened = regular
+            end if
 
-  subroutine createCoarseBlocks(level)
-    !
-    !       createCoarseBlocks creates the block data structure for the
-    !       given coarse grid from the 1 level finer grid. Only direct
-    !       info is created, like owned coordinates, block sizes and
-    !       subface info. Indirect info, like face normals, volumes, wall
-    !       distances, etc. Are created later on. That info can be created
-    !       independent of the finer grid.
-    !
-    use constants
-    use block
-    use inputTimeSpectral
-    use coarse1to1Subface
-    use coarseningInfo
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i, j, k, ii, jj, kk, n1to1
-    integer(kind=intType) :: nn, mm, iil, jjl, kkl, il, jl, kl
-    integer(kind=intType) :: iBeg, jBeg, kBeg, iEnd, jEnd, kEnd
-    integer(kind=intType) :: l1, L2, l3
-    integer(kind=intType) :: levm1, donorOffset, fact
-
-    integer(kind=intType), dimension(:), allocatable :: imap, iimap
-    integer(kind=intType), dimension(:), allocatable :: jmap, jjmap
-    integer(kind=intType), dimension(:), allocatable :: kmap, kkmap
-
-    integer(kind=intType), dimension(:), pointer :: dfine
-
-    logical, dimension(:), pointer :: iCo, jCo, kCo
-
-    ! Store the finer grid level in levm1
-
-    levm1 = level -1
-
-    ! Determine the total number of 1 to 1 block faces on this
-    ! processor for the fine grid level.
-
-    nSubface1to1 = 0
-    do nn=1,nDom
-       nSubface1to1 = nSubface1to1 + flowDoms(nn,levm1,1)%n1to1
-    enddo
-
-    ! Allocate the memory for subface1to1 and coarseInfo.
-
-    allocate(subface1to1(nSubface1to1), coarseInfo(nDom), stat=ierr)
-    if(ierr /= 0)                            &
-         call terminate("createCoarseBlocks", &
-         "Memory allocation failure for subface1to1 &
-         &and coarseInfo")
-
-    ! Loop over the number of blocks on this processor.
-
-    n1to1 = 0
-    domains: do nn=1,nDom
-
-       ! If levm1 is 1, i.e. the finest grid, set the coarsenings to
-       ! regular, as this is a regular grid.
-
-       if(levm1 == 1) then
-          flowDoms(nn,levm1,1)%iCoarsened = regular
-          flowDoms(nn,levm1,1)%jCoarsened = regular
-          flowDoms(nn,levm1,1)%kCoarsened = regular
-       endif
-
-       ! Copy cgns block id, blockIsMoving and addGridVelocities.
-       ! Although identical to the values on the finest grid level,
-       ! it is copied anyway for consistency reasons.
-
-       flowDoms(nn,level,1)%cgnsBlockID   = &
-            flowDoms(nn,levm1,1)%cgnsBlockID
-       flowDoms(nn,level,1)%blockIsMoving = &
-            flowDoms(nn,levm1,1)%blockIsMoving
-       flowDoms(nn,level,1)%addGridVelocities = &
-            flowDoms(nn,levm1,1)%addGridVelocities
-
-       ! Store the number of fine nodes a bit easier and allocate the
-       ! memory for the logicals iCo, jCo and kCo and for coarseIs1to1.
-
-       iil = flowDoms(nn,levm1,1)%il
-       jjl = flowDoms(nn,levm1,1)%jl
-       kkl = flowDoms(nn,levm1,1)%kl
-
-       ii = flowDoms(nn,levm1,1)%n1to1
-       allocate(flowDoms(nn,levm1,1)%iCo(iil), &
-            flowDoms(nn,levm1,1)%jCo(jjl), &
-            flowDoms(nn,levm1,1)%kCo(kkl), &
-            coarseInfo(nn)%coarseIs1to1(ii), stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("createCoarseBlocks", &
-            "Memory allocation failure for iCo, jCo, kCo &
-            &and coarseIs1to1")
-
-       ! Initialize iCo, jCo and kCo such that the block boundaries
-       ! remain, but all other nodes can disappear. Set pointers to
-       ! make the code more readable.
-
-       iCo => flowDoms(nn,levm1,1)%iCo
-       jCo => flowDoms(nn,levm1,1)%jCo
-       kCo => flowDoms(nn,levm1,1)%kCo
-
-       iCo = .false.; iCo(1) = .true.; iCo(iil) = .true.
-       jCo = .false.; jCo(1) = .true.; jCo(jjl) = .true.
-       kCo = .false.; kCo(1) = .true.; kCo(kkl) = .true.
-
-       ! Loop over the subfaces to keep their boundaries. Also internal
-       ! block boundaries are kept.
-
-       do i=1,flowDoms(nn,levm1,1)%nSubface
-          iCo(flowDoms(nn,levm1,1)%inBeg(i)) = .true.
-          iCo(flowDoms(nn,levm1,1)%inEnd(i)) = .true.
-
-          jCo(flowDoms(nn,levm1,1)%jnBeg(i)) = .true.
-          jCo(flowDoms(nn,levm1,1)%jnEnd(i)) = .true.
-
-          kCo(flowDoms(nn,levm1,1)%knBeg(i)) = .true.
-          kCo(flowDoms(nn,levm1,1)%knEnd(i)) = .true.
-       enddo
-
-       ! Create the coarser grid in i-direction. In case the fine grid
-       ! was already a nonregular coarsened block, start at the
-       ! opposite boundary. For a regular grid simply start at the
-       ! left boundary.
+            ! Copy cgns block id, blockIsMoving and addGridVelocities.
+            ! Although identical to the values on the finest grid level,
+            ! it is copied anyway for consistency reasons.
 
-       if(flowDoms(nn,levm1,1)%iCoarsened == leftStarted) then
+            flowDoms(nn, level, 1)%cgnsBlockID = &
+                flowDoms(nn, levm1, 1)%cgnsBlockID
+            flowDoms(nn, level, 1)%blockIsMoving = &
+                flowDoms(nn, levm1, 1)%blockIsMoving
+            flowDoms(nn, level, 1)%addGridVelocities = &
+                flowDoms(nn, levm1, 1)%addGridVelocities
 
-          flowDoms(nn,level,1)%iCoarsened = rightStarted
+            ! Store the number of fine nodes a bit easier and allocate the
+            ! memory for the logicals iCo, jCo and kCo and for coarseIs1to1.
 
-          do i=(iil-1),2,-1
-             if(.not. iCo(i+1)) iCo(i) = .true.
-          enddo
+            iil = flowDoms(nn, levm1, 1)%il
+            jjl = flowDoms(nn, levm1, 1)%jl
+            kkl = flowDoms(nn, levm1, 1)%kl
 
-       else
+            ii = flowDoms(nn, levm1, 1)%n1to1
+            allocate (flowDoms(nn, levm1, 1)%iCo(iil), &
+                      flowDoms(nn, levm1, 1)%jCo(jjl), &
+                      flowDoms(nn, levm1, 1)%kCo(kkl), &
+                      coarseInfo(nn)%coarseIs1to1(ii), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("createCoarseBlocks", &
+                 "Memory allocation failure for iCo, jCo, kCo &
+                 &and coarseIs1to1")
 
-          flowDoms(nn,level,1)%iCoarsened = leftStarted
+            ! Initialize iCo, jCo and kCo such that the block boundaries
+            ! remain, but all other nodes can disappear. Set pointers to
+            ! make the code more readable.
 
-          do i=2,(iil-1)
-             if(.not. iCo(i-1)) iCo(i) = .true.
-          enddo
+            iCo => flowDoms(nn, levm1, 1)%iCo
+            jCo => flowDoms(nn, levm1, 1)%jCo
+            kCo => flowDoms(nn, levm1, 1)%kCo
 
-       endif
+            iCo = .false.; iCo(1) = .true.; iCo(iil) = .true.
+            jCo = .false.; jCo(1) = .true.; jCo(jjl) = .true.
+            kCo = .false.; kCo(1) = .true.; kCo(kkl) = .true.
 
-       ! Create the coarser grid in j-direction. Same story as in
-       ! i-direction.
+            ! Loop over the subfaces to keep their boundaries. Also internal
+            ! block boundaries are kept.
 
-       if(flowDoms(nn,levm1,1)%jCoarsened == leftStarted) then
+            do i = 1, flowDoms(nn, levm1, 1)%nSubface
+                iCo(flowDoms(nn, levm1, 1)%inBeg(i)) = .true.
+                iCo(flowDoms(nn, levm1, 1)%inEnd(i)) = .true.
 
-          flowDoms(nn,level,1)%jCoarsened = rightStarted
+                jCo(flowDoms(nn, levm1, 1)%jnBeg(i)) = .true.
+                jCo(flowDoms(nn, levm1, 1)%jnEnd(i)) = .true.
 
-          do j=(jjl-1),2,-1
-             if(.not. jCo(j+1)) jCo(j) = .true.
-          enddo
+                kCo(flowDoms(nn, levm1, 1)%knBeg(i)) = .true.
+                kCo(flowDoms(nn, levm1, 1)%knEnd(i)) = .true.
+            end do
 
-       else
+            ! Create the coarser grid in i-direction. In case the fine grid
+            ! was already a nonregular coarsened block, start at the
+            ! opposite boundary. For a regular grid simply start at the
+            ! left boundary.
 
-          flowDoms(nn,level,1)%jCoarsened = leftStarted
+            if (flowDoms(nn, levm1, 1)%iCoarsened == leftStarted) then
 
-          do j=2,(jjl-1)
-             if(.not. jCo(j-1)) jCo(j) = .true.
-          enddo
+                flowDoms(nn, level, 1)%iCoarsened = rightStarted
 
-       endif
+                do i = (iil - 1), 2, -1
+                    if (.not. iCo(i + 1)) iCo(i) = .true.
+                end do
 
-       ! Create the coarser grid in k-direction. Same story as in
-       ! i- and j-direction.
+            else
 
-       if(flowDoms(nn,levm1,1)%kCoarsened == leftStarted) then
+                flowDoms(nn, level, 1)%iCoarsened = leftStarted
 
-          flowDoms(nn,level,1)%kCoarsened = rightStarted
+                do i = 2, (iil - 1)
+                    if (.not. iCo(i - 1)) iCo(i) = .true.
+                end do
 
-          do k=(kkl-1),2,-1
-             if(.not. kCo(k+1)) kCo(k) = .true.
-          enddo
+            end if
 
-       else
+            ! Create the coarser grid in j-direction. Same story as in
+            ! i-direction.
 
-          flowDoms(nn,level,1)%kCoarsened = leftStarted
+            if (flowDoms(nn, levm1, 1)%jCoarsened == leftStarted) then
 
-          do k=2,(kkl-1)
-             if(.not. kCo(k-1)) kCo(k) = .true.
-          enddo
+                flowDoms(nn, level, 1)%jCoarsened = rightStarted
 
-       endif
+                do j = (jjl - 1), 2, -1
+                    if (.not. jCo(j + 1)) jCo(j) = .true.
+                end do
 
-       ! Determine the number of points in each direction for the
-       ! coarse grid.
+            else
 
-       il = 0
-       do i=1,iil
-          if( iCo(i) ) il = il +1
-       enddo
+                flowDoms(nn, level, 1)%jCoarsened = leftStarted
 
-       jl = 0
-       do j=1,jjl
-          if( jCo(j) ) jl = jl +1
-       enddo
+                do j = 2, (jjl - 1)
+                    if (.not. jCo(j - 1)) jCo(j) = .true.
+                end do
 
-       kl = 0
-       do k=1,kkl
-          if( kCo(k) ) kl = kl +1
-       enddo
+            end if
 
-       ! Store the number of nodes and cells in the three directions
-       ! and the dimensions for the halo based quantities.
+            ! Create the coarser grid in k-direction. Same story as in
+            ! i- and j-direction.
 
-       flowDoms(nn,level,1)%il = il
-       flowDoms(nn,level,1)%jl = jl
-       flowDoms(nn,level,1)%kl = kl
+            if (flowDoms(nn, levm1, 1)%kCoarsened == leftStarted) then
 
-       flowDoms(nn,level,1)%nx = il - 1
-       flowDoms(nn,level,1)%ny = jl - 1
-       flowDoms(nn,level,1)%nz = kl - 1
+                flowDoms(nn, level, 1)%kCoarsened = rightStarted
 
-       flowDoms(nn,level,1)%ie = il + 1
-       flowDoms(nn,level,1)%je = jl + 1
-       flowDoms(nn,level,1)%ke = kl + 1
+                do k = (kkl - 1), 2, -1
+                    if (.not. kCo(k + 1)) kCo(k) = .true.
+                end do
 
-       flowDoms(nn,level,1)%ib = il + 2
-       flowDoms(nn,level,1)%jb = jl + 2
-       flowDoms(nn,level,1)%kb = kl + 2
+            else
 
-       ! If the coarsening was regular, i.e. if the number of fine
-       ! grid cells is twice the number of coarse grid cells, reset
-       ! the coarsening to regular.
+                flowDoms(nn, level, 1)%kCoarsened = leftStarted
 
-       if(flowDoms(nn,levm1,1)%nx == 2*flowDoms(nn,level,1)%nx) &
-            flowDoms(nn,level,1)%iCoarsened = regular
-       if(flowDoms(nn,levm1,1)%ny == 2*flowDoms(nn,level,1)%ny) &
-            flowDoms(nn,level,1)%jCoarsened = regular
-       if(flowDoms(nn,levm1,1)%nz == 2*flowDoms(nn,level,1)%nz) &
-            flowDoms(nn,level,1)%kCoarsened = regular
-       !
-       !         The variables, which control the restriction to and the
-       !         interpolation from the coarser grid level.
-       !
-       ! Allocate the memory.
+                do k = 2, (kkl - 1)
+                    if (.not. kCo(k - 1)) kCo(k) = .true.
+                end do
 
-       i = flowDoms(nn,level,1)%ie
-       j = flowDoms(nn,level,1)%je
-       k = flowDoms(nn,level,1)%ke
-
-       allocate(flowDoms(nn,level,1)%mgIFine(1:i,2),     &
-            flowDoms(nn,level,1)%mgJFine(1:j,2),     &
-            flowDoms(nn,level,1)%mgKFine(1:k,2),     &
-            flowDoms(nn,level,1)%mgIWeight(2:il),    &
-            flowDoms(nn,level,1)%mgJWeight(2:jl),    &
-            flowDoms(nn,level,1)%mgKWeight(2:kl),    &
-            flowDoms(nn,levm1,1)%mgICoarse(2:iil,2), &
-            flowDoms(nn,levm1,1)%mgJCoarse(2:jjl,2), &
-            flowDoms(nn,levm1,1)%mgKCoarse(2:kkl,2), stat=ierr)
-       if(ierr /= 0)                            &
-            call terminate("createCoarseBlocks", &
-            "Memory allocation failure for interpolation &
-            &variables")
-
-       ! Set the halo values to the halo indices of the fine level.
-
-       flowDoms(nn,level,1)%mgIFine(1,1:2) = (/ 0, 1 /)
-       flowDoms(nn,level,1)%mgJFine(1,1:2) = (/ 0, 1 /)
-       flowDoms(nn,level,1)%mgKFine(1,1:2) = (/ 0, 1 /)
-
-       flowDoms(nn,level,1)%mgIFine(i,1) = flowDoms(nn,levm1,1)%ie
-       flowDoms(nn,level,1)%mgIFine(i,2) = flowDoms(nn,levm1,1)%ib
-       flowDoms(nn,level,1)%mgJFine(j,1) = flowDoms(nn,levm1,1)%je
-       flowDoms(nn,level,1)%mgJFine(j,2) = flowDoms(nn,levm1,1)%jb
-       flowDoms(nn,level,1)%mgKFine(k,1) = flowDoms(nn,levm1,1)%ke
-       flowDoms(nn,level,1)%mgKFine(k,2) = flowDoms(nn,levm1,1)%kb
-
-       ! Determine the restriction variables in i-direction.
-
-       ii = 2
-       do i=2,iil
-          if( iCo(i) ) then
-             if( iCo(i-1) ) then
-                flowDoms(nn,level,1)%mgIFine(ii,1) = i
-                flowDoms(nn,level,1)%mgIFine(ii,2) = i
-                flowDoms(nn,level,1)%mgIWeight(ii) = half
-             else
-                flowDoms(nn,level,1)%mgIFine(ii,1) = i-1
-                flowDoms(nn,level,1)%mgIFine(ii,2) = i
-                flowDoms(nn,level,1)%mgIWeight(ii) = one
-             endif
-             ii = ii+1
-          endif
-       enddo
-
-       ! Determine the restriction variables in j-direction.
-
-       jj = 2
-       do j=2,jjl
-          if( jCo(j) ) then
-             if( jCo(j-1) ) then
-                flowDoms(nn,level,1)%mgJFine(jj,1) = j
-                flowDoms(nn,level,1)%mgJFine(jj,2) = j
-                flowDoms(nn,level,1)%mgJWeight(jj) = half
-             else
-                flowDoms(nn,level,1)%mgJFine(jj,1) = j-1
-                flowDoms(nn,level,1)%mgJFine(jj,2) = j
-                flowDoms(nn,level,1)%mgJWeight(jj) = one
-             endif
-             jj = jj+1
-          endif
-       enddo
-
-       ! Determine the restriction variables in k-direction.
-
-       kk = 2
-       do k=2,kkl
-          if( kCo(k) ) then
-             if( kCo(k-1) ) then
-                flowDoms(nn,level,1)%mgKFine(kk,1) = k
-                flowDoms(nn,level,1)%mgKFine(kk,2) = k
-                flowDoms(nn,level,1)%mgKWeight(kk) = half
-             else
-                flowDoms(nn,level,1)%mgKFine(kk,1) = k-1
-                flowDoms(nn,level,1)%mgKFine(kk,2) = k
-                flowDoms(nn,level,1)%mgKWeight(kk) = one
-             endif
-             kk = kk+1
-          endif
-       enddo
-
-       ! Determine the interpolation variables in i-direction.
-
-       ii = 2
-       do i=2,iil
-          if( iCo(i) ) then
-             if( iCo(i-1) ) then
-                flowDoms(nn,levm1,1)%mgICoarse(i,1) = ii
-                flowDoms(nn,levm1,1)%mgICoarse(i,2) = ii
-             else
-                flowDoms(nn,levm1,1)%mgICoarse(i,1) = ii
-                flowDoms(nn,levm1,1)%mgICoarse(i,2) = ii+1
-             endif
-             ii = ii+1
-          else
-             flowDoms(nn,levm1,1)%mgICoarse(i,1) = ii
-             flowDoms(nn,levm1,1)%mgICoarse(i,2) = ii-1
-          endif
-       enddo
-
-       ! Determine the interpolation variables in j-direction.
-
-       jj = 2
-       do j=2,jjl
-          if( jCo(j) ) then
-             if( jCo(j-1) ) then
-                flowDoms(nn,levm1,1)%mgJCoarse(j,1) = jj
-                flowDoms(nn,levm1,1)%mgJCoarse(j,2) = jj
-             else
-                flowDoms(nn,levm1,1)%mgJCoarse(j,1) = jj
-                flowDoms(nn,levm1,1)%mgJCoarse(j,2) = jj+1
-             endif
-             jj = jj+1
-          else
-             flowDoms(nn,levm1,1)%mgJCoarse(j,1) = jj
-             flowDoms(nn,levm1,1)%mgJCoarse(j,2) = jj-1
-          endif
-       enddo
-
-       ! Determine the interpolation variables in k-direction.
-
-       kk = 2
-       do k=2,kkl
-          if( kCo(k) ) then
-             if( kCo(k-1) ) then
-                flowDoms(nn,levm1,1)%mgKCoarse(k,1) = kk
-                flowDoms(nn,levm1,1)%mgKCoarse(k,2) = kk
-             else
-                flowDoms(nn,levm1,1)%mgKCoarse(k,1) = kk
-                flowDoms(nn,levm1,1)%mgKCoarse(k,2) = kk+1
-             endif
-             kk = kk+1
-          else
-             flowDoms(nn,levm1,1)%mgKCoarse(k,1) = kk
-             flowDoms(nn,levm1,1)%mgKCoarse(k,2) = kk-1
-          endif
-       enddo
-       !
-       !         The coordinate mapping from fine to coarse and coarse to
-       !         fine. These are needed to determine the coarse grid subface
-       !         info.
-       !
-       ! Allocate the memory.
-
-       allocate(imap(iil), jmap(jjl), kmap(kkl), &
-            iimap(il), jjmap(jl), kkmap(kl), stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("createCoarseBlocks", &
-            "Memory allocation failure for imap, etc")
-
-       ! Set the values of imap and iimap.
-
-       ii = 1
-       do i=1,iil
-          if( iCo(i) ) then
-             imap(i)   = ii
-             iimap(ii) = i
-             ii = ii+1
-          endif
-       enddo
-
-       ! Set the values of jmap and jjmap.
-
-       jj = 1
-       do j=1,jjl
-          if( jCo(j) ) then
-             jmap(j)   = jj
-             jjmap(jj) = j
-             jj = jj+1
-          endif
-       enddo
-
-       ! Set the values of kmap and kkmap.
-
-       kk = 1
-       do k=1,kkl
-          if( kCo(k) ) then
-             kmap(k)   = kk
-             kkmap(kk) = k
-             kk = kk+1
-          endif
-       enddo
-       !
-       !         The subface info. Except for the subface range all other
-       !         data can be copied. The range must be adapted and the donor
-       !         range is created later, because the coarsening info of the
-       !         donor block must be known.
-       !
-       flowDoms(nn,level,1)%nSubface   = flowDoms(nn,levm1,1)%nSubface
-       flowDoms(nn,level,1)%n1to1      = flowDoms(nn,levm1,1)%n1to1
-       flowDoms(nn,level,1)%nBocos     = flowDoms(nn,levm1,1)%nBocos
-       flowDoms(nn,level,1)%nViscBocos = flowDoms(nn,levm1,1)%nViscBocos
-
-       ! Allocate the memory.
-
-       mm = flowDoms(nn,level,1)%nSubface
-       allocate(flowDoms(nn,level,1)%BCType(mm),      &
-            flowDoms(nn,level,1)%BCFaceID(mm),    &
-            flowDoms(nn,level,1)%cgnsSubface(mm), &
-            flowDoms(nn,level,1)%neighBlock(mm),  &
-            flowDoms(nn,level,1)%neighProc(mm),   &
-            flowDoms(nn,level,1)%groupNum(mm),    &
-            flowDoms(nn,level,1)%inBeg(mm),       &
-            flowDoms(nn,level,1)%jnBeg(mm),       &
-            flowDoms(nn,level,1)%knBeg(mm),       &
-            flowDoms(nn,level,1)%inEnd(mm),       &
-            flowDoms(nn,level,1)%jnEnd(mm),       &
-            flowDoms(nn,level,1)%knEnd(mm),       &
-            flowDoms(nn,level,1)%dinBeg(mm),      &
-            flowDoms(nn,level,1)%djnBeg(mm),      &
-            flowDoms(nn,level,1)%dknBeg(mm),      &
-            flowDoms(nn,level,1)%dinEnd(mm),      &
-            flowDoms(nn,level,1)%djnEnd(mm),      &
-            flowDoms(nn,level,1)%dknEnd(mm),      &
-            flowDoms(nn,level,1)%l1(mm),          &
-            flowDoms(nn,level,1)%l2(mm),          &
-            flowDoms(nn,level,1)%l3(mm),          &
-            stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("createCoarseBlocks", &
-            "Memory allocation failure for subface info")
-
-       ! Loop over the subfaces.
-
-       subfaces: do mm=1,flowDoms(nn,level,1)%nSubface
-
-          ! Determine the range of the coarse subface.
-
-          flowDoms(nn,level,1)%inBeg(mm) = &
-               imap(flowDoms(nn,levm1,1)%inBeg(mm))
-          flowDoms(nn,level,1)%jnBeg(mm) = &
-               jmap(flowDoms(nn,levm1,1)%jnBeg(mm))
-          flowDoms(nn,level,1)%knBeg(mm) = &
-               kmap(flowDoms(nn,levm1,1)%knBeg(mm))
-
-          flowDoms(nn,level,1)%inEnd(mm) = &
-               imap(flowDoms(nn,levm1,1)%inEnd(mm))
-          flowDoms(nn,level,1)%jnEnd(mm) = &
-               jmap(flowDoms(nn,levm1,1)%jnEnd(mm))
-          flowDoms(nn,level,1)%knEnd(mm) = &
-               kmap(flowDoms(nn,levm1,1)%knEnd(mm))
-
-          ! Copy the rest of the subface info.
-
-          flowDoms(nn,level,1)%BCType(mm)     = &
-               flowDoms(nn,levm1,1)%BCType(mm)
-          flowDoms(nn,level,1)%BCFaceID(mm)   = &
-               flowDoms(nn,levm1,1)%BCFaceID(mm)
-          flowDoms(nn,level,1)%neighBlock(mm) = &
-               flowDoms(nn,levm1,1)%neighBlock(mm)
-          flowDoms(nn,level,1)%neighProc(mm) = &
-               flowDoms(nn,levm1,1)%neighProc(mm)
-          flowDoms(nn,level,1)%groupNum(mm)  = &
-               flowDoms(nn,levm1,1)%groupNum(mm)
-          flowDoms(nn,level,1)%cgnsSubface(mm) = &
-               flowDoms(nn,levm1,1)%cgnsSubface(mm)
-
-          flowDoms(nn,level,1)%l1(mm) = flowDoms(nn,levm1,1)%l1(mm)
-          flowDoms(nn,level,1)%l2(mm) = flowDoms(nn,levm1,1)%l2(mm)
-          flowDoms(nn,level,1)%l3(mm) = flowDoms(nn,levm1,1)%l3(mm)
-
-          ! Create some info if this is a 1 to 1 subface. This is stored
-          ! in the array subface1to1 and is needed to determine the donor
-          ! info and to check if this is still a 1 to 1 subface on the
-          ! coarse grid.
-
-          subface_1to1: if(mm > flowDoms(nn,level,1)%nBocos .and. &
-               mm <= (flowDoms(nn,level,1)%nBocos     &
-               +  flowDoms(nn,level,1)%n1to1)) then
-
-             ! Update the counter.
-
-             n1to1 = n1to1 +1
-
-             ! Store the range of the 1 to 1 subface.
-
-             subface1to1(n1to1)%iBeg = flowDoms(nn,level,1)%inBeg(mm)
-             subface1to1(n1to1)%jBeg = flowDoms(nn,level,1)%jnBeg(mm)
-             subface1to1(n1to1)%kBeg = flowDoms(nn,level,1)%knBeg(mm)
-             subface1to1(n1to1)%iEnd = flowDoms(nn,level,1)%inEnd(mm)
-             subface1to1(n1to1)%jEnd = flowDoms(nn,level,1)%jnEnd(mm)
-             subface1to1(n1to1)%kEnd = flowDoms(nn,level,1)%knEnd(mm)
-
-             ! Store the new range of this subface a bit easier.
-             ! Make sure that i1, etc contains the lowest index.
-
-             iBeg = min(subface1to1(n1to1)%iBeg, subface1to1(n1to1)%iEnd)
-             jBeg = min(subface1to1(n1to1)%jBeg, subface1to1(n1to1)%jEnd)
-             kBeg = min(subface1to1(n1to1)%kBeg, subface1to1(n1to1)%kEnd)
-
-             iEnd = max(subface1to1(n1to1)%iBeg, subface1to1(n1to1)%iEnd)
-             jEnd = max(subface1to1(n1to1)%jBeg, subface1to1(n1to1)%jEnd)
-             kEnd = max(subface1to1(n1to1)%kBeg, subface1to1(n1to1)%kEnd)
-
-             ! And copy it back into subface1to1.
-
-             subface1to1(n1to1)%iBeg = iBeg
-             subface1to1(n1to1)%jBeg = jBeg
-             subface1to1(n1to1)%kBeg = kBeg
-             subface1to1(n1to1)%iEnd = iEnd
-             subface1to1(n1to1)%jEnd = jEnd
-             subface1to1(n1to1)%kEnd = kEnd
-
-             ! Store the processor and block ID of the donor.
-
-             subface1to1(n1to1)%neighProc  = &
-                  flowDoms(nn,level,1)%neighProc(mm)
-             subface1to1(n1to1)%neighBlock = &
-                  flowDoms(nn,level,1)%neighBlock(mm)
-
-             ! Store the shorthand for the transformation matrix a
-             ! bit easier.
-
-             l1 = flowDoms(nn,level,1)%l1(mm)
-             L2 = flowDoms(nn,level,1)%l2(mm)
-             l3 = flowDoms(nn,level,1)%l3(mm)
-
-             ! Determine the fine grid donor indices of the subface.
-             ! i-direction.
-
-             fact = 1
-             if(l1 < 0) fact = -1
-             ii = iEnd - iBeg + 1
-
-             l1 = abs(l1)
-             select case(l1)
-             case (1_intType)
-                allocate(subface1to1(n1to1)%idfine(ii), stat=ierr)
-                dfine => subface1to1(n1to1)%idfine
-                subface1to1(n1to1)%ndi = ii
-                donorOffset = flowDoms(nn,levm1,1)%dinBeg(mm)
-             case (2_intType)
-                allocate(subface1to1(n1to1)%jdfine(ii), stat=ierr)
-                dfine => subface1to1(n1to1)%jdfine
-                subface1to1(n1to1)%ndj = ii
-                donorOffset = flowDoms(nn,levm1,1)%djnBeg(mm)
-             case (3_intType)
-                allocate(subface1to1(n1to1)%kdfine(ii), stat=ierr)
-                dfine => subface1to1(n1to1)%kdfine
-                subface1to1(n1to1)%ndk = ii
-                donorOffset = flowDoms(nn,levm1,1)%dknBeg(mm)
-             end select
-
-             if(ierr /= 0)                          &
-                  call terminate("createCoarseBlocks", &
-                  "Memory allocation failure for idfine")
-
-             ii = 1
-             do i=iBeg,iEnd
-                jj = fact*(iimap(i) - flowDoms(nn,levm1,1)%inBeg(mm))
-                dfine(ii) = jj + donorOffset
-                ii = ii+1
-             enddo
-
-             ! Determine the fine grid donor indices of the subface.
-             ! j-direction.
-
-             fact = 1
-             if(l2 < 0) fact = -1
-             ii = jEnd - jBeg + 1
-
-             L2 = abs(l2)
-             select case(l2)
-             case (1_intType)
-                allocate(subface1to1(n1to1)%idfine(ii), stat=ierr)
-                dfine => subface1to1(n1to1)%idfine
-                subface1to1(n1to1)%ndi = ii
-                donorOffset = flowDoms(nn,levm1,1)%dinBeg(mm)
-             case (2_intType)
-                allocate(subface1to1(n1to1)%jdfine(ii), stat=ierr)
-                dfine => subface1to1(n1to1)%jdfine
-                subface1to1(n1to1)%ndj = ii
-                donorOffset = flowDoms(nn,levm1,1)%djnBeg(mm)
-             case (3_intType)
-                allocate(subface1to1(n1to1)%kdfine(ii), stat=ierr)
-                dfine => subface1to1(n1to1)%kdfine
-                subface1to1(n1to1)%ndk = ii
-                donorOffset = flowDoms(nn,levm1,1)%dknBeg(mm)
-             end select
-
-             if(ierr /= 0)                          &
-                  call terminate("createCoarseBlocks", &
-                  "Memory allocation failure for jdfine")
-
-             jj = 1
-             do j=jBeg,jEnd
-                ii = fact*(jjmap(j) - flowDoms(nn,levm1,1)%jnBeg(mm))
-                dfine(jj) = ii + donorOffset
-                jj = jj+1
-             enddo
-
-             ! Determine the fine grid donor indices of the subface.
-             ! k-direction.
-
-             fact = 1
-             if(l3 < 0) fact = -1
-             ii = kEnd - kBeg + 1
-
-             l3 = abs(l3)
-             select case(l3)
-             case (1_intType)
-                allocate(subface1to1(n1to1)%idfine(ii), stat=ierr)
-                dfine => subface1to1(n1to1)%idfine
-                subface1to1(n1to1)%ndi = ii
-                donorOffset = flowDoms(nn,levm1,1)%dinBeg(mm)
-             case (2_intType)
-                allocate(subface1to1(n1to1)%jdfine(ii), stat=ierr)
-                dfine => subface1to1(n1to1)%jdfine
-                subface1to1(n1to1)%ndj = ii
-                donorOffset = flowDoms(nn,levm1,1)%djnBeg(mm)
-             case (3_intType)
-                allocate(subface1to1(n1to1)%kdfine(ii), stat=ierr)
-                dfine => subface1to1(n1to1)%kdfine
-                subface1to1(n1to1)%ndk = ii
-                donorOffset = flowDoms(nn,levm1,1)%dknBeg(mm)
-             end select
-
-             if(ierr /= 0)                          &
-                  call terminate("createCoarseBlocks", &
-                  "Memory allocation failure for kdfine")
-
-             kk = 1
-             do k=kBeg,kEnd
-                ii = fact*(kkmap(k) - flowDoms(nn,levm1,1)%knBeg(mm))
-                dfine(kk) = ii + donorOffset
-                kk = kk+1
-             enddo
-
-          endif subface_1to1
-
-       enddo subfaces
-
-       ! Release the local memory allocated inside this loop.
-
-       deallocate(imap, jmap, kmap, iimap, jjmap, kkmap, stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("createCoarseBlocks", &
-            "Deallocation error for imap, iimap, etc.")
+            end if
+
+            ! Determine the number of points in each direction for the
+            ! coarse grid.
 
-       ! Allocate the memory for the coordinates of all time spectral
-       ! solutions.
+            il = 0
+            do i = 1, iil
+                if (iCo(i)) il = il + 1
+            end do
 
-       ii = flowDoms(nn,level,1)%ie
-       jj = flowDoms(nn,level,1)%je
-       kk = flowDoms(nn,level,1)%ke
+            jl = 0
+            do j = 1, jjl
+                if (jCo(j)) jl = jl + 1
+            end do
 
-       do mm=1,nTimeIntervalsSpectral
-          allocate(flowDoms(nn,level,mm)%x(0:ii,0:jj,0:kk,3), stat=ierr)
-          if(ierr /= 0)                          &
-               call terminate("createCoarseBlocks", &
-               "Memory allocation failure for x")
-       enddo
+            kl = 0
+            do k = 1, kkl
+                if (kCo(k)) kl = kl + 1
+            end do
+
+            ! Store the number of nodes and cells in the three directions
+            ! and the dimensions for the halo based quantities.
+
+            flowDoms(nn, level, 1)%il = il
+            flowDoms(nn, level, 1)%jl = jl
+            flowDoms(nn, level, 1)%kl = kl
+
+            flowDoms(nn, level, 1)%nx = il - 1
+            flowDoms(nn, level, 1)%ny = jl - 1
+            flowDoms(nn, level, 1)%nz = kl - 1
+
+            flowDoms(nn, level, 1)%ie = il + 1
+            flowDoms(nn, level, 1)%je = jl + 1
+            flowDoms(nn, level, 1)%ke = kl + 1
+
+            flowDoms(nn, level, 1)%ib = il + 2
+            flowDoms(nn, level, 1)%jb = jl + 2
+            flowDoms(nn, level, 1)%kb = kl + 2
+
+            ! If the coarsening was regular, i.e. if the number of fine
+            ! grid cells is twice the number of coarse grid cells, reset
+            ! the coarsening to regular.
+
+            if (flowDoms(nn, levm1, 1)%nx == 2 * flowDoms(nn, level, 1)%nx) &
+                flowDoms(nn, level, 1)%iCoarsened = regular
+            if (flowDoms(nn, levm1, 1)%ny == 2 * flowDoms(nn, level, 1)%ny) &
+                flowDoms(nn, level, 1)%jCoarsened = regular
+            if (flowDoms(nn, levm1, 1)%nz == 2 * flowDoms(nn, level, 1)%nz) &
+                flowDoms(nn, level, 1)%kCoarsened = regular
+            !
+            !         The variables, which control the restriction to and the
+            !         interpolation from the coarser grid level.
+            !
+            ! Allocate the memory.
+
+            i = flowDoms(nn, level, 1)%ie
+            j = flowDoms(nn, level, 1)%je
+            k = flowDoms(nn, level, 1)%ke
+
+            allocate (flowDoms(nn, level, 1)%mgIFine(1:i, 2), &
+                      flowDoms(nn, level, 1)%mgJFine(1:j, 2), &
+                      flowDoms(nn, level, 1)%mgKFine(1:k, 2), &
+                      flowDoms(nn, level, 1)%mgIWeight(2:il), &
+                      flowDoms(nn, level, 1)%mgJWeight(2:jl), &
+                      flowDoms(nn, level, 1)%mgKWeight(2:kl), &
+                      flowDoms(nn, levm1, 1)%mgICoarse(2:iil, 2), &
+                      flowDoms(nn, levm1, 1)%mgJCoarse(2:jjl, 2), &
+                      flowDoms(nn, levm1, 1)%mgKCoarse(2:kkl, 2), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("createCoarseBlocks", &
+                 "Memory allocation failure for interpolation &
+                 &variables")
+
+            ! Set the halo values to the halo indices of the fine level.
+
+            flowDoms(nn, level, 1)%mgIFine(1, 1:2) = (/0, 1/)
+            flowDoms(nn, level, 1)%mgJFine(1, 1:2) = (/0, 1/)
+            flowDoms(nn, level, 1)%mgKFine(1, 1:2) = (/0, 1/)
+
+            flowDoms(nn, level, 1)%mgIFine(i, 1) = flowDoms(nn, levm1, 1)%ie
+            flowDoms(nn, level, 1)%mgIFine(i, 2) = flowDoms(nn, levm1, 1)%ib
+            flowDoms(nn, level, 1)%mgJFine(j, 1) = flowDoms(nn, levm1, 1)%je
+            flowDoms(nn, level, 1)%mgJFine(j, 2) = flowDoms(nn, levm1, 1)%jb
+            flowDoms(nn, level, 1)%mgKFine(k, 1) = flowDoms(nn, levm1, 1)%ke
+            flowDoms(nn, level, 1)%mgKFine(k, 2) = flowDoms(nn, levm1, 1)%kb
+
+            ! Determine the restriction variables in i-direction.
+
+            ii = 2
+            do i = 2, iil
+                if (iCo(i)) then
+                    if (iCo(i - 1)) then
+                        flowDoms(nn, level, 1)%mgIFine(ii, 1) = i
+                        flowDoms(nn, level, 1)%mgIFine(ii, 2) = i
+                        flowDoms(nn, level, 1)%mgIWeight(ii) = half
+                    else
+                        flowDoms(nn, level, 1)%mgIFine(ii, 1) = i - 1
+                        flowDoms(nn, level, 1)%mgIFine(ii, 2) = i
+                        flowDoms(nn, level, 1)%mgIWeight(ii) = one
+                    end if
+                    ii = ii + 1
+                end if
+            end do
+
+            ! Determine the restriction variables in j-direction.
+
+            jj = 2
+            do j = 2, jjl
+                if (jCo(j)) then
+                    if (jCo(j - 1)) then
+                        flowDoms(nn, level, 1)%mgJFine(jj, 1) = j
+                        flowDoms(nn, level, 1)%mgJFine(jj, 2) = j
+                        flowDoms(nn, level, 1)%mgJWeight(jj) = half
+                    else
+                        flowDoms(nn, level, 1)%mgJFine(jj, 1) = j - 1
+                        flowDoms(nn, level, 1)%mgJFine(jj, 2) = j
+                        flowDoms(nn, level, 1)%mgJWeight(jj) = one
+                    end if
+                    jj = jj + 1
+                end if
+            end do
+
+            ! Determine the restriction variables in k-direction.
+
+            kk = 2
+            do k = 2, kkl
+                if (kCo(k)) then
+                    if (kCo(k - 1)) then
+                        flowDoms(nn, level, 1)%mgKFine(kk, 1) = k
+                        flowDoms(nn, level, 1)%mgKFine(kk, 2) = k
+                        flowDoms(nn, level, 1)%mgKWeight(kk) = half
+                    else
+                        flowDoms(nn, level, 1)%mgKFine(kk, 1) = k - 1
+                        flowDoms(nn, level, 1)%mgKFine(kk, 2) = k
+                        flowDoms(nn, level, 1)%mgKWeight(kk) = one
+                    end if
+                    kk = kk + 1
+                end if
+            end do
+
+            ! Determine the interpolation variables in i-direction.
+
+            ii = 2
+            do i = 2, iil
+                if (iCo(i)) then
+                    if (iCo(i - 1)) then
+                        flowDoms(nn, levm1, 1)%mgICoarse(i, 1) = ii
+                        flowDoms(nn, levm1, 1)%mgICoarse(i, 2) = ii
+                    else
+                        flowDoms(nn, levm1, 1)%mgICoarse(i, 1) = ii
+                        flowDoms(nn, levm1, 1)%mgICoarse(i, 2) = ii + 1
+                    end if
+                    ii = ii + 1
+                else
+                    flowDoms(nn, levm1, 1)%mgICoarse(i, 1) = ii
+                    flowDoms(nn, levm1, 1)%mgICoarse(i, 2) = ii - 1
+                end if
+            end do
+
+            ! Determine the interpolation variables in j-direction.
+
+            jj = 2
+            do j = 2, jjl
+                if (jCo(j)) then
+                    if (jCo(j - 1)) then
+                        flowDoms(nn, levm1, 1)%mgJCoarse(j, 1) = jj
+                        flowDoms(nn, levm1, 1)%mgJCoarse(j, 2) = jj
+                    else
+                        flowDoms(nn, levm1, 1)%mgJCoarse(j, 1) = jj
+                        flowDoms(nn, levm1, 1)%mgJCoarse(j, 2) = jj + 1
+                    end if
+                    jj = jj + 1
+                else
+                    flowDoms(nn, levm1, 1)%mgJCoarse(j, 1) = jj
+                    flowDoms(nn, levm1, 1)%mgJCoarse(j, 2) = jj - 1
+                end if
+            end do
+
+            ! Determine the interpolation variables in k-direction.
+
+            kk = 2
+            do k = 2, kkl
+                if (kCo(k)) then
+                    if (kCo(k - 1)) then
+                        flowDoms(nn, levm1, 1)%mgKCoarse(k, 1) = kk
+                        flowDoms(nn, levm1, 1)%mgKCoarse(k, 2) = kk
+                    else
+                        flowDoms(nn, levm1, 1)%mgKCoarse(k, 1) = kk
+                        flowDoms(nn, levm1, 1)%mgKCoarse(k, 2) = kk + 1
+                    end if
+                    kk = kk + 1
+                else
+                    flowDoms(nn, levm1, 1)%mgKCoarse(k, 1) = kk
+                    flowDoms(nn, levm1, 1)%mgKCoarse(k, 2) = kk - 1
+                end if
+            end do
+            !
+            !         The coordinate mapping from fine to coarse and coarse to
+            !         fine. These are needed to determine the coarse grid subface
+            !         info.
+            !
+            ! Allocate the memory.
+
+            allocate (imap(iil), jmap(jjl), kmap(kkl), &
+                      iimap(il), jjmap(jl), kkmap(kl), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("createCoarseBlocks", &
+                               "Memory allocation failure for imap, etc")
+
+            ! Set the values of imap and iimap.
+
+            ii = 1
+            do i = 1, iil
+                if (iCo(i)) then
+                    imap(i) = ii
+                    iimap(ii) = i
+                    ii = ii + 1
+                end if
+            end do
+
+            ! Set the values of jmap and jjmap.
+
+            jj = 1
+            do j = 1, jjl
+                if (jCo(j)) then
+                    jmap(j) = jj
+                    jjmap(jj) = j
+                    jj = jj + 1
+                end if
+            end do
+
+            ! Set the values of kmap and kkmap.
+
+            kk = 1
+            do k = 1, kkl
+                if (kCo(k)) then
+                    kmap(k) = kk
+                    kkmap(kk) = k
+                    kk = kk + 1
+                end if
+            end do
+            !
+            !         The subface info. Except for the subface range all other
+            !         data can be copied. The range must be adapted and the donor
+            !         range is created later, because the coarsening info of the
+            !         donor block must be known.
+            !
+            flowDoms(nn, level, 1)%nSubface = flowDoms(nn, levm1, 1)%nSubface
+            flowDoms(nn, level, 1)%n1to1 = flowDoms(nn, levm1, 1)%n1to1
+            flowDoms(nn, level, 1)%nBocos = flowDoms(nn, levm1, 1)%nBocos
+            flowDoms(nn, level, 1)%nViscBocos = flowDoms(nn, levm1, 1)%nViscBocos
+
+            ! Allocate the memory.
+
+            mm = flowDoms(nn, level, 1)%nSubface
+            allocate (flowDoms(nn, level, 1)%BCType(mm), &
+                      flowDoms(nn, level, 1)%BCFaceID(mm), &
+                      flowDoms(nn, level, 1)%cgnsSubface(mm), &
+                      flowDoms(nn, level, 1)%neighBlock(mm), &
+                      flowDoms(nn, level, 1)%neighProc(mm), &
+                      flowDoms(nn, level, 1)%groupNum(mm), &
+                      flowDoms(nn, level, 1)%inBeg(mm), &
+                      flowDoms(nn, level, 1)%jnBeg(mm), &
+                      flowDoms(nn, level, 1)%knBeg(mm), &
+                      flowDoms(nn, level, 1)%inEnd(mm), &
+                      flowDoms(nn, level, 1)%jnEnd(mm), &
+                      flowDoms(nn, level, 1)%knEnd(mm), &
+                      flowDoms(nn, level, 1)%dinBeg(mm), &
+                      flowDoms(nn, level, 1)%djnBeg(mm), &
+                      flowDoms(nn, level, 1)%dknBeg(mm), &
+                      flowDoms(nn, level, 1)%dinEnd(mm), &
+                      flowDoms(nn, level, 1)%djnEnd(mm), &
+                      flowDoms(nn, level, 1)%dknEnd(mm), &
+                      flowDoms(nn, level, 1)%l1(mm), &
+                      flowDoms(nn, level, 1)%l2(mm), &
+                      flowDoms(nn, level, 1)%l3(mm), &
+                      stat=ierr)
+            if (ierr /= 0) &
+                call terminate("createCoarseBlocks", &
+                               "Memory allocation failure for subface info")
+
+            ! Loop over the subfaces.
+
+            subfaces: do mm = 1, flowDoms(nn, level, 1)%nSubface
+
+                ! Determine the range of the coarse subface.
+
+                flowDoms(nn, level, 1)%inBeg(mm) = &
+                    imap(flowDoms(nn, levm1, 1)%inBeg(mm))
+                flowDoms(nn, level, 1)%jnBeg(mm) = &
+                    jmap(flowDoms(nn, levm1, 1)%jnBeg(mm))
+                flowDoms(nn, level, 1)%knBeg(mm) = &
+                    kmap(flowDoms(nn, levm1, 1)%knBeg(mm))
+
+                flowDoms(nn, level, 1)%inEnd(mm) = &
+                    imap(flowDoms(nn, levm1, 1)%inEnd(mm))
+                flowDoms(nn, level, 1)%jnEnd(mm) = &
+                    jmap(flowDoms(nn, levm1, 1)%jnEnd(mm))
+                flowDoms(nn, level, 1)%knEnd(mm) = &
+                    kmap(flowDoms(nn, levm1, 1)%knEnd(mm))
+
+                ! Copy the rest of the subface info.
+
+                flowDoms(nn, level, 1)%BCType(mm) = &
+                    flowDoms(nn, levm1, 1)%BCType(mm)
+                flowDoms(nn, level, 1)%BCFaceID(mm) = &
+                    flowDoms(nn, levm1, 1)%BCFaceID(mm)
+                flowDoms(nn, level, 1)%neighBlock(mm) = &
+                    flowDoms(nn, levm1, 1)%neighBlock(mm)
+                flowDoms(nn, level, 1)%neighProc(mm) = &
+                    flowDoms(nn, levm1, 1)%neighProc(mm)
+                flowDoms(nn, level, 1)%groupNum(mm) = &
+                    flowDoms(nn, levm1, 1)%groupNum(mm)
+                flowDoms(nn, level, 1)%cgnsSubface(mm) = &
+                    flowDoms(nn, levm1, 1)%cgnsSubface(mm)
+
+                flowDoms(nn, level, 1)%l1(mm) = flowDoms(nn, levm1, 1)%l1(mm)
+                flowDoms(nn, level, 1)%l2(mm) = flowDoms(nn, levm1, 1)%l2(mm)
+                flowDoms(nn, level, 1)%l3(mm) = flowDoms(nn, levm1, 1)%l3(mm)
+
+                ! Create some info if this is a 1 to 1 subface. This is stored
+                ! in the array subface1to1 and is needed to determine the donor
+                ! info and to check if this is still a 1 to 1 subface on the
+                ! coarse grid.
+
+                subface_1to1: if (mm > flowDoms(nn, level, 1)%nBocos .and. &
+                                  mm <= (flowDoms(nn, level, 1)%nBocos &
+                                         + flowDoms(nn, level, 1)%n1to1)) then
+
+                    ! Update the counter.
+
+                    n1to1 = n1to1 + 1
+
+                    ! Store the range of the 1 to 1 subface.
+
+                    subface1to1(n1to1)%iBeg = flowDoms(nn, level, 1)%inBeg(mm)
+                    subface1to1(n1to1)%jBeg = flowDoms(nn, level, 1)%jnBeg(mm)
+                    subface1to1(n1to1)%kBeg = flowDoms(nn, level, 1)%knBeg(mm)
+                    subface1to1(n1to1)%iEnd = flowDoms(nn, level, 1)%inEnd(mm)
+                    subface1to1(n1to1)%jEnd = flowDoms(nn, level, 1)%jnEnd(mm)
+                    subface1to1(n1to1)%kEnd = flowDoms(nn, level, 1)%knEnd(mm)
+
+                    ! Store the new range of this subface a bit easier.
+                    ! Make sure that i1, etc contains the lowest index.
+
+                    iBeg = min(subface1to1(n1to1)%iBeg, subface1to1(n1to1)%iEnd)
+                    jBeg = min(subface1to1(n1to1)%jBeg, subface1to1(n1to1)%jEnd)
+                    kBeg = min(subface1to1(n1to1)%kBeg, subface1to1(n1to1)%kEnd)
+
+                    iEnd = max(subface1to1(n1to1)%iBeg, subface1to1(n1to1)%iEnd)
+                    jEnd = max(subface1to1(n1to1)%jBeg, subface1to1(n1to1)%jEnd)
+                    kEnd = max(subface1to1(n1to1)%kBeg, subface1to1(n1to1)%kEnd)
+
+                    ! And copy it back into subface1to1.
+
+                    subface1to1(n1to1)%iBeg = iBeg
+                    subface1to1(n1to1)%jBeg = jBeg
+                    subface1to1(n1to1)%kBeg = kBeg
+                    subface1to1(n1to1)%iEnd = iEnd
+                    subface1to1(n1to1)%jEnd = jEnd
+                    subface1to1(n1to1)%kEnd = kEnd
+
+                    ! Store the processor and block ID of the donor.
+
+                    subface1to1(n1to1)%neighProc = &
+                        flowDoms(nn, level, 1)%neighProc(mm)
+                    subface1to1(n1to1)%neighBlock = &
+                        flowDoms(nn, level, 1)%neighBlock(mm)
+
+                    ! Store the shorthand for the transformation matrix a
+                    ! bit easier.
+
+                    l1 = flowDoms(nn, level, 1)%l1(mm)
+                    L2 = flowDoms(nn, level, 1)%l2(mm)
+                    l3 = flowDoms(nn, level, 1)%l3(mm)
+
+                    ! Determine the fine grid donor indices of the subface.
+                    ! i-direction.
+
+                    fact = 1
+                    if (l1 < 0) fact = -1
+                    ii = iEnd - iBeg + 1
+
+                    l1 = abs(l1)
+                    select case (l1)
+                    case (1_intType)
+                        allocate (subface1to1(n1to1)%idfine(ii), stat=ierr)
+                        dfine => subface1to1(n1to1)%idfine
+                        subface1to1(n1to1)%ndi = ii
+                        donorOffset = flowDoms(nn, levm1, 1)%dinBeg(mm)
+                    case (2_intType)
+                        allocate (subface1to1(n1to1)%jdfine(ii), stat=ierr)
+                        dfine => subface1to1(n1to1)%jdfine
+                        subface1to1(n1to1)%ndj = ii
+                        donorOffset = flowDoms(nn, levm1, 1)%djnBeg(mm)
+                    case (3_intType)
+                        allocate (subface1to1(n1to1)%kdfine(ii), stat=ierr)
+                        dfine => subface1to1(n1to1)%kdfine
+                        subface1to1(n1to1)%ndk = ii
+                        donorOffset = flowDoms(nn, levm1, 1)%dknBeg(mm)
+                    end select
+
+                    if (ierr /= 0) &
+                        call terminate("createCoarseBlocks", &
+                                       "Memory allocation failure for idfine")
+
+                    ii = 1
+                    do i = iBeg, iEnd
+                        jj = fact * (iimap(i) - flowDoms(nn, levm1, 1)%inBeg(mm))
+                        dfine(ii) = jj + donorOffset
+                        ii = ii + 1
+                    end do
+
+                    ! Determine the fine grid donor indices of the subface.
+                    ! j-direction.
+
+                    fact = 1
+                    if (l2 < 0) fact = -1
+                    ii = jEnd - jBeg + 1
+
+                    L2 = abs(l2)
+                    select case (l2)
+                    case (1_intType)
+                        allocate (subface1to1(n1to1)%idfine(ii), stat=ierr)
+                        dfine => subface1to1(n1to1)%idfine
+                        subface1to1(n1to1)%ndi = ii
+                        donorOffset = flowDoms(nn, levm1, 1)%dinBeg(mm)
+                    case (2_intType)
+                        allocate (subface1to1(n1to1)%jdfine(ii), stat=ierr)
+                        dfine => subface1to1(n1to1)%jdfine
+                        subface1to1(n1to1)%ndj = ii
+                        donorOffset = flowDoms(nn, levm1, 1)%djnBeg(mm)
+                    case (3_intType)
+                        allocate (subface1to1(n1to1)%kdfine(ii), stat=ierr)
+                        dfine => subface1to1(n1to1)%kdfine
+                        subface1to1(n1to1)%ndk = ii
+                        donorOffset = flowDoms(nn, levm1, 1)%dknBeg(mm)
+                    end select
+
+                    if (ierr /= 0) &
+                        call terminate("createCoarseBlocks", &
+                                       "Memory allocation failure for jdfine")
+
+                    jj = 1
+                    do j = jBeg, jEnd
+                        ii = fact * (jjmap(j) - flowDoms(nn, levm1, 1)%jnBeg(mm))
+                        dfine(jj) = ii + donorOffset
+                        jj = jj + 1
+                    end do
+
+                    ! Determine the fine grid donor indices of the subface.
+                    ! k-direction.
+
+                    fact = 1
+                    if (l3 < 0) fact = -1
+                    ii = kEnd - kBeg + 1
+
+                    l3 = abs(l3)
+                    select case (l3)
+                    case (1_intType)
+                        allocate (subface1to1(n1to1)%idfine(ii), stat=ierr)
+                        dfine => subface1to1(n1to1)%idfine
+                        subface1to1(n1to1)%ndi = ii
+                        donorOffset = flowDoms(nn, levm1, 1)%dinBeg(mm)
+                    case (2_intType)
+                        allocate (subface1to1(n1to1)%jdfine(ii), stat=ierr)
+                        dfine => subface1to1(n1to1)%jdfine
+                        subface1to1(n1to1)%ndj = ii
+                        donorOffset = flowDoms(nn, levm1, 1)%djnBeg(mm)
+                    case (3_intType)
+                        allocate (subface1to1(n1to1)%kdfine(ii), stat=ierr)
+                        dfine => subface1to1(n1to1)%kdfine
+                        subface1to1(n1to1)%ndk = ii
+                        donorOffset = flowDoms(nn, levm1, 1)%dknBeg(mm)
+                    end select
+
+                    if (ierr /= 0) &
+                        call terminate("createCoarseBlocks", &
+                                       "Memory allocation failure for kdfine")
+
+                    kk = 1
+                    do k = kBeg, kEnd
+                        ii = fact * (kkmap(k) - flowDoms(nn, levm1, 1)%knBeg(mm))
+                        dfine(kk) = ii + donorOffset
+                        kk = kk + 1
+                    end do
+
+                end if subface_1to1
+
+            end do subfaces
+
+            ! Release the local memory allocated inside this loop.
+
+            deallocate (imap, jmap, kmap, iimap, jjmap, kkmap, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("createCoarseBlocks", &
+                               "Deallocation error for imap, iimap, etc.")
+
+            ! Allocate the memory for the coordinates of all time spectral
+            ! solutions.
+
+            ii = flowDoms(nn, level, 1)%ie
+            jj = flowDoms(nn, level, 1)%je
+            kk = flowDoms(nn, level, 1)%ke
+
+            do mm = 1, nTimeIntervalsSpectral
+                allocate (flowDoms(nn, level, mm)%x(0:ii, 0:jj, 0:kk, 3), stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("createCoarseBlocks", &
+                                   "Memory allocation failure for x")
+            end do
+
+            ! Copy the scalars such that they are known for all time
+            ! spectral solutions.
+
+            do mm = 2, nTimeIntervalsSpectral
+
+                flowDoms(nn, level, mm)%nx = flowDoms(nn, level, 1)%nx
+                flowDoms(nn, level, mm)%ny = flowDoms(nn, level, 1)%ny
+                flowDoms(nn, level, mm)%nz = flowDoms(nn, level, 1)%nz
+
+                flowDoms(nn, level, mm)%il = flowDoms(nn, level, 1)%il
+                flowDoms(nn, level, mm)%jl = flowDoms(nn, level, 1)%jl
+                flowDoms(nn, level, mm)%kl = flowDoms(nn, level, 1)%kl
+
+                flowDoms(nn, level, mm)%ie = flowDoms(nn, level, 1)%ie
+                flowDoms(nn, level, mm)%je = flowDoms(nn, level, 1)%je
+                flowDoms(nn, level, mm)%ke = flowDoms(nn, level, 1)%ke
+
+                flowDoms(nn, level, mm)%ib = flowDoms(nn, level, 1)%ib
+                flowDoms(nn, level, mm)%jb = flowDoms(nn, level, 1)%jb
+                flowDoms(nn, level, mm)%kb = flowDoms(nn, level, 1)%kb
+
+                flowDoms(nn, level, mm)%nSubface = flowDoms(nn, level, 1)%nSubface
+                flowDoms(nn, level, mm)%n1to1 = flowDoms(nn, level, 1)%n1to1
+                flowDoms(nn, level, mm)%nBocos = flowDoms(nn, level, 1)%nBocos
+                flowDoms(nn, level, mm)%nViscBocos = flowDoms(nn, level, 1)%nViscBocos
 
-       ! Copy the scalars such that they are known for all time
-       ! spectral solutions.
+                flowDoms(nn, level, mm)%iCoarsened = flowDoms(nn, level, 1)%iCoarsened
+                flowDoms(nn, level, mm)%jCoarsened = flowDoms(nn, level, 1)%jCoarsened
+                flowDoms(nn, level, mm)%kCoarsened = flowDoms(nn, level, 1)%kCoarsened
 
-       do mm=2,nTimeIntervalsSpectral
+                flowDoms(nn, level, mm)%blockIsMoving = &
+                    flowDoms(nn, level, 1)%blockIsMoving
+                flowDoms(nn, level, mm)%cgnsBlockID = &
+                    flowDoms(nn, level, 1)%cgnsBlockID
 
-          flowDoms(nn,level,mm)%nx = flowDoms(nn,level,1)%nx
-          flowDoms(nn,level,mm)%ny = flowDoms(nn,level,1)%ny
-          flowDoms(nn,level,mm)%nz = flowDoms(nn,level,1)%nz
+                flowDoms(nn, level, mm)%addGridVelocities = &
+                    flowDoms(nn, level, 1)%addGridVelocities
 
-          flowDoms(nn,level,mm)%il = flowDoms(nn,level,1)%il
-          flowDoms(nn,level,mm)%jl = flowDoms(nn,level,1)%jl
-          flowDoms(nn,level,mm)%kl = flowDoms(nn,level,1)%kl
+            end do
 
-          flowDoms(nn,level,mm)%ie = flowDoms(nn,level,1)%ie
-          flowDoms(nn,level,mm)%je = flowDoms(nn,level,1)%je
-          flowDoms(nn,level,mm)%ke = flowDoms(nn,level,1)%ke
+        end do domains
 
-          flowDoms(nn,level,mm)%ib = flowDoms(nn,level,1)%ib
-          flowDoms(nn,level,mm)%jb = flowDoms(nn,level,1)%jb
-          flowDoms(nn,level,mm)%kb = flowDoms(nn,level,1)%kb
+        ! Determine the owned coordinates.
 
-          flowDoms(nn,level,mm)%nSubface   = flowDoms(nn,level,1)%nSubface
-          flowDoms(nn,level,mm)%n1to1      = flowDoms(nn,level,1)%n1to1
-          flowDoms(nn,level,mm)%nBocos     = flowDoms(nn,level,1)%nBocos
-          flowDoms(nn,level,mm)%nViscBocos = flowDoms(nn,level,1)%nViscBocos
+        call coarseOwnedCoordinates(level)
 
-          flowDoms(nn,level,mm)%iCoarsened = flowDoms(nn,level,1)%iCoarsened
-          flowDoms(nn,level,mm)%jCoarsened = flowDoms(nn,level,1)%jCoarsened
-          flowDoms(nn,level,mm)%kCoarsened = flowDoms(nn,level,1)%kCoarsened
+        ! Determine the donor info for the internal block boundaries.
 
-          flowDoms(nn,level,mm)%blockIsMoving = &
-               flowDoms(nn,level,1)%blockIsMoving
-          flowDoms(nn,level,mm)%cgnsBlockID   = &
-               flowDoms(nn,level,1)%cgnsBlockID
+        call coarseDonorInfo(level)
 
-          flowDoms(nn,level,mm)%addGridVelocities = &
-               flowDoms(nn,level,1)%addGridVelocities
+        ! Remove possible coarse grid non-matching block boundaries from
+        ! the 1 to 1 list.
 
-       enddo
+        call checkCoarse1to1(level)
 
-    enddo domains
+        ! Release the memory of the coarsening info.
 
-    ! Determine the owned coordinates.
+        do nn = 1, nDom
+            deallocate (coarseInfo(nn)%coarseIs1to1, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("createCoarseBlocks", &
+                               "Deallocation error for coarseIs1to1")
+        end do
 
-    call coarseOwnedCoordinates(level)
-
-    ! Determine the donor info for the internal block boundaries.
-
-    call coarseDonorInfo(level)
-
-    ! Remove possible coarse grid non-matching block boundaries from
-    ! the 1 to 1 list.
-
-    call checkCoarse1to1(level)
-
-    ! Release the memory of the coarsening info.
-
-    do nn=1,nDom
-       deallocate(coarseInfo(nn)%coarseIs1to1, stat=ierr)
-       if(ierr /= 0)                          &
+        deallocate (coarseInfo, stat=ierr)
+        if (ierr /= 0) &
             call terminate("createCoarseBlocks", &
-            "Deallocation error for coarseIs1to1")
-    enddo
-
-    deallocate(coarseInfo, stat=ierr)
-    if(ierr /= 0)                          &
-         call terminate("createCoarseBlocks", &
-         "Deallocation error for coarseInfo")
-
-  end subroutine createCoarseBlocks
-
-  !      ==================================================================
-
-  subroutine coarseOwnedCoordinates(level)
-    !
-    !       coarseOwnedCoordinates determines from the coarsening info
-    !       the owned coordinates of the coarse grid. This is done in a
-    !       separate routine, because in unsteady moving mesh mode or for
-    !       deforming meshes only new coordinates need to be computed,
-    !       while the connectivity remains the same.
-    !
-    use block
-    use inputTimeSpectral
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii, jj, kk
-    integer(kind=intType) :: nn, mm, il, jl, kl, levm1
-
-    logical, dimension(:), pointer :: iCo, jCo, kCo
-
-    ! Store the finer grid level in levm1
-
-    levm1 = level -1
-
-    ! Loop over the number of blocks on this processor
-
-    domains: do nn=1,nDom
-
-       ! Easier storage of some variables of this block.
-
-       il = flowDoms(nn,levm1,1)%il
-       jl = flowDoms(nn,levm1,1)%jl
-       kl = flowDoms(nn,levm1,1)%kl
-
-       iCo => flowDoms(nn,levm1,1)%iCo
-       jCo => flowDoms(nn,levm1,1)%jCo
-       kCo => flowDoms(nn,levm1,1)%kCo
-
-       ! Loop over the fine grid lines in the three directions and
-       ! determine which should be kept on the coarse grid.
-
-       kk = 1
-       do k=1,kl
-          if( kCo(k) ) then
-             jj = 1
-             do j=1,jl
-                if( jCo(j) ) then
-                   ii = 1
-                   do i=1,il
-                      if( iCo(i) ) then
-
-                         ! Loop over the spectral solutions and copy the
-                         ! coordinates from the fine grid.
-
-                         do mm=1,nTimeIntervalsSpectral
-                            flowDoms(nn,level,mm)%x(ii,jj,kk,1) = &
-                                 flowDoms(nn,levm1,mm)%x(i,j,k,1)
-                            flowDoms(nn,level,mm)%x(ii,jj,kk,2) = &
-                                 flowDoms(nn,levm1,mm)%x(i,j,k,2)
-                            flowDoms(nn,level,mm)%x(ii,jj,kk,3) = &
-                                 flowDoms(nn,levm1,mm)%x(i,j,k,3)
-                         enddo
-
-                         ii = ii + 1
-                      endif
-                   enddo
-                   jj = jj + 1
-                endif
-             enddo
-             kk = kk + 1
-          endif
-       enddo
-
-    enddo domains
-
-  end subroutine coarseOwnedCoordinates
-
-  subroutine update1to1Coarse(level, subface)
-    !
-    !       update1to1Coarse determines whether or not the given 1 to 1
-    !       block boundary subface on the fine level is still a 1 to 1
-    !       subface on the coarser grid level.
-    !
-    use block
-    use coarse1to1Subface
-    use coarseningInfo
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType),        intent(in) :: level
-    type(coarse1to1SubfaceType), intent(in) :: subface
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: levm1, nn, mm, ii, jj, kk, ll
-    integer(kind=intType) :: i, j, k
-    integer(kind=intType) :: iBeg, jBeg, kBeg, iEnd, jEnd, kEnd
-
-    logical :: subfaceFound, idir, jdir, kdir
-
-    ! Store the finer grid level in levm1
-
-    levm1 = level -1
-
-    ! Store the local block id a bit easier and store the minimum and
-    ! maximum index in the three coordinate directions for the fine
-    ! grid.
-
-    nn = subface%neighBlock
-
-    mm = subface%ndi
-    iBeg = min(subface%idfine(1), subface%idfine(mm))
-    iEnd = max(subface%idfine(1), subface%idfine(mm))
-
-    mm = subface%ndj
-    jBeg = min(subface%jdfine(1), subface%jdfine(mm))
-    jEnd = max(subface%jdfine(1), subface%jdfine(mm))
-
-    mm = subface%ndk
-    kBeg = min(subface%kdfine(1), subface%kdfine(mm))
-    kEnd = max(subface%kdfine(1), subface%kdfine(mm))
-
-    ! Find the subface id of the block, which corresponds to the
-    ! given subface of the subroutine header.
-
-    subfaceFound = .false.
-    findSubface: do ii=1,flowDoms(nn,levm1,1)%n1to1
-
-       mm = ii + flowDoms(nn,levm1,1)%nBocos
-
-       ! Check if this subface is the correct one.
-
-       kk = min(flowDoms(nn,levm1,1)%inBeg(mm), &
-            flowDoms(nn,levm1,1)%inEnd(mm))
-       ll = max(flowDoms(nn,levm1,1)%inBeg(mm), &
-            flowDoms(nn,levm1,1)%inEnd(mm))
-
-       if(kk == iBeg .and. ll == iEnd) then
-          kk = min(flowDoms(nn,levm1,1)%jnBeg(mm), &
-               flowDoms(nn,levm1,1)%jnEnd(mm))
-          ll = max(flowDoms(nn,levm1,1)%jnBeg(mm), &
-               flowDoms(nn,levm1,1)%jnEnd(mm))
-
-          if(kk == jBeg .and. ll == jEnd) then
-             kk = min(flowDoms(nn,levm1,1)%knBeg(mm), &
-                  flowDoms(nn,levm1,1)%knEnd(mm))
-             ll = max(flowDoms(nn,levm1,1)%knBeg(mm), &
-                  flowDoms(nn,levm1,1)%knEnd(mm))
-
-             if(kk == kBeg .and. ll == kEnd) subfaceFound = .true.
-          endif
-       endif
-
-       ! Exit the loop if this is indeed the subface.
-
-       if( subfaceFound ) exit
-    enddo findSubface
-
-    ! The subface must be found on the fine grid. Check this.
-
-    if(.not. subfaceFound)               &
-         call terminate("update1to1Coarse", &
-         "Invalid fine grid 1 to 1 subface connectivity")
-
-    ! Check the i-direction of the coarse grid subface to see if it
-    ! is still a 1 to 1 subface. First check the number of grid lines.
-    ! If these are identical, check each coarse grid line.
-
-    idir = .true.
-    ii = abs(flowDoms(nn,level,1)%inEnd(mm) &
-         -     flowDoms(nn,level,1)%inBeg(mm)) + 1
-
-    if(ii == subface%ndi) then
-       do i=1,subface%ndi
-          ii = subface%idfine(i)
-          if(.not. flowDoms(nn,levm1,1)%ico(ii) ) idir = .false.
-       enddo
-    else
-       idir = .false.
-    endif
-
-    ! Check the j-direction of the coarse grid subface to see if it
-    ! is still a 1 to 1 subface. First check the number of grid lines.
-    ! If these are identical, check each coarse grid line.
-
-    jdir = .true.
-    jj = abs(flowDoms(nn,level,1)%jnEnd(mm) &
-         -     flowDoms(nn,level,1)%jnBeg(mm)) + 1
-
-    if(jj == subface%ndj) then
-       do j=1,subface%ndj
-          jj = subface%jdfine(j)
-          if(.not. flowDoms(nn,levm1,1)%jco(jj) ) jdir = .false.
-       enddo
-    else
-       jdir = .false.
-    endif
-
-    ! Check the k-direction of the coarse grid subface to see if it
-    ! is still a 1 to 1 subface. First check the number of grid lines.
-    ! If these are identical, check each coarse grid line.
-
-    kdir = .true.
-    kk = abs(flowDoms(nn,level,1)%knEnd(mm) &
-         -     flowDoms(nn,level,1)%knBeg(mm)) + 1
-
-    if(kk == subface%ndk) then
-       do k=1,subface%ndk
-          kk = subface%kdfine(k)
-          if(.not. flowDoms(nn,levm1,1)%kco(kk) ) kdir = .false.
-       enddo
-    else
-       kdir = .false.
-    endif
-
-    ! Set coarseIs1to1 to .true. if the subface on the coarse grid
-    ! is still a 1 to 1 subface. Otherwise set it to .false.
-
-    ii = mm - flowDoms(nn,levm1,1)%nBocos
-    if(idir .and. jdir .and. kdir) then
-       coarseInfo(nn)%coarseIs1to1(ii) = .true.
-    else
-       coarseInfo(nn)%coarseIs1to1(ii) = .false.
-    endif
-
-    ! Store the donor range. It is possible that the lower and
-    ! upper boundary must be reversed.
-
-    if(flowDoms(nn,levm1,1)%dinBeg(mm) < &
-         flowDoms(nn,levm1,1)%dinEnd(mm)) then
-       flowDoms(nn,level,1)%dinBeg(mm) = subface%iBeg
-       flowDoms(nn,level,1)%dinEnd(mm) = subface%iEnd
-    else
-       flowDoms(nn,level,1)%dinBeg(mm) = subface%iEnd
-       flowDoms(nn,level,1)%dinEnd(mm) = subface%iBeg
-    endif
-
-    if(flowDoms(nn,levm1,1)%djnBeg(mm) < &
-         flowDoms(nn,levm1,1)%djnEnd(mm)) then
-       flowDoms(nn,level,1)%djnBeg(mm) = subface%jBeg
-       flowDoms(nn,level,1)%djnEnd(mm) = subface%jEnd
-    else
-       flowDoms(nn,level,1)%djnBeg(mm) = subface%jEnd
-       flowDoms(nn,level,1)%djnEnd(mm) = subface%jBeg
-    endif
-
-    if(flowDoms(nn,levm1,1)%dknBeg(mm) < &
-         flowDoms(nn,levm1,1)%dknEnd(mm)) then
-       flowDoms(nn,level,1)%dknBeg(mm) = subface%kBeg
-       flowDoms(nn,level,1)%dknEnd(mm) = subface%kEnd
-    else
-       flowDoms(nn,level,1)%dknBeg(mm) = subface%kEnd
-       flowDoms(nn,level,1)%dknEnd(mm) = subface%kBeg
-    endif
-
-  end subroutine update1to1Coarse
-  subroutine coarseDonorInfo(level)
-    !
-    !       coarseDonorInfo creates the donor info for the internal
-    !       block boundaries on the given coarse grid level.
-    !
-    use communication
-    use coarse1to1Subface
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: proc, size, ierr
-
-    integer, dimension(mpi_status_size) :: mpiStatus
-    integer, dimension(nProc)           :: sizeMessage
-
-    integer(kind=intType) :: nn, mm, ii, i
-    integer(kind=intType) :: nMessageReceive, nMessageSend
-
-    integer(kind=intType), dimension(0:nProc) :: size2proc
-    integer(kind=intType), dimension(nProc)   :: tmp
-
-    integer(kind=intType), dimension(:), allocatable :: sendBuf
-    integer(kind=intType), dimension(:), allocatable, target :: recvBuf
-
-    type(coarse1to1SubfaceType) :: tmpSubface
-
-    ! Determine the number of integers sent to every processor.
-
-    size2proc = 0
-    do nn=1,nSubface1to1
-       mm = subface1to1(nn)%neighProc + 1    ! Proc ID's start at 0.
-
-       size2proc(mm) = size2proc(mm) + 13                          &
-            + subface1to1(nn)%iEnd - subface1to1(nn)%iBeg &
-            + subface1to1(nn)%jEnd - subface1to1(nn)%jBeg &
-            + subface1to1(nn)%kEnd - subface1to1(nn)%kBeg
-    enddo
-
-    ! No message is sent to myself, so set the corresponding entry in
-    ! size2proc to 0. The processor id's start at 0, which explains
-    ! the +1.
-
-    size2proc(myID+1) = 0
-
-    ! Determine the number of messages i have to sent. Store in tmp
-    ! a 0 or a 1, depending whether or not a message must be sent
-    ! to the corresponding processor. This info is needed to determine
-    ! the number of messages i will receive.
-
-    nMessageSend = 0
-    do nn=1,nProc
-       if(size2proc(nn) > 0) then
-          nMessageSend = nMessageSend +1
-          tmp(nn) = 1
-       else
-          tmp(nn) = 0
-       endif
-    enddo
-
-    ! Determine the number of messages i will receive.
-
-    sizeMessage = 1
-    call mpi_reduce_scatter(tmp, nMessageReceive, sizeMessage,      &
-         adflow_integer, mpi_sum, ADflow_comm_world, &
-         ierr)
-
-    ! Put size2proc in cumulative storage format and store the
-    ! starting entry in tmp, which is used as a counter.
-
-    do nn=1,nProc
-       size2proc(nn) = size2proc(nn) + size2proc(nn-1)
-       tmp(nn)       = size2proc(nn-1)
-    enddo
-
-    ! Allocate the memory for the send buffer.
+                           "Deallocation error for coarseInfo")
+
+    end subroutine createCoarseBlocks
+
+    !      ==================================================================
+
+    subroutine coarseOwnedCoordinates(level)
+        !
+        !       coarseOwnedCoordinates determines from the coarsening info
+        !       the owned coordinates of the coarse grid. This is done in a
+        !       separate routine, because in unsteady moving mesh mode or for
+        !       deforming meshes only new coordinates need to be computed,
+        !       while the connectivity remains the same.
+        !
+        use block
+        use inputTimeSpectral
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii, jj, kk
+        integer(kind=intType) :: nn, mm, il, jl, kl, levm1
+
+        logical, dimension(:), pointer :: iCo, jCo, kCo
+
+        ! Store the finer grid level in levm1
+
+        levm1 = level - 1
+
+        ! Loop over the number of blocks on this processor
+
+        domains: do nn = 1, nDom
+
+            ! Easier storage of some variables of this block.
+
+            il = flowDoms(nn, levm1, 1)%il
+            jl = flowDoms(nn, levm1, 1)%jl
+            kl = flowDoms(nn, levm1, 1)%kl
+
+            iCo => flowDoms(nn, levm1, 1)%iCo
+            jCo => flowDoms(nn, levm1, 1)%jCo
+            kCo => flowDoms(nn, levm1, 1)%kCo
+
+            ! Loop over the fine grid lines in the three directions and
+            ! determine which should be kept on the coarse grid.
+
+            kk = 1
+            do k = 1, kl
+                if (kCo(k)) then
+                    jj = 1
+                    do j = 1, jl
+                        if (jCo(j)) then
+                            ii = 1
+                            do i = 1, il
+                                if (iCo(i)) then
+
+                                    ! Loop over the spectral solutions and copy the
+                                    ! coordinates from the fine grid.
+
+                                    do mm = 1, nTimeIntervalsSpectral
+                                        flowDoms(nn, level, mm)%x(ii, jj, kk, 1) = &
+                                            flowDoms(nn, levm1, mm)%x(i, j, k, 1)
+                                        flowDoms(nn, level, mm)%x(ii, jj, kk, 2) = &
+                                            flowDoms(nn, levm1, mm)%x(i, j, k, 2)
+                                        flowDoms(nn, level, mm)%x(ii, jj, kk, 3) = &
+                                            flowDoms(nn, levm1, mm)%x(i, j, k, 3)
+                                    end do
+
+                                    ii = ii + 1
+                                end if
+                            end do
+                            jj = jj + 1
+                        end if
+                    end do
+                    kk = kk + 1
+                end if
+            end do
+
+        end do domains
+
+    end subroutine coarseOwnedCoordinates
+
+    subroutine update1to1Coarse(level, subface)
+        !
+        !       update1to1Coarse determines whether or not the given 1 to 1
+        !       block boundary subface on the fine level is still a 1 to 1
+        !       subface on the coarser grid level.
+        !
+        use block
+        use coarse1to1Subface
+        use coarseningInfo
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        type(coarse1to1SubfaceType), intent(in) :: subface
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: levm1, nn, mm, ii, jj, kk, ll
+        integer(kind=intType) :: i, j, k
+        integer(kind=intType) :: iBeg, jBeg, kBeg, iEnd, jEnd, kEnd
+
+        logical :: subfaceFound, idir, jdir, kdir
+
+        ! Store the finer grid level in levm1
+
+        levm1 = level - 1
+
+        ! Store the local block id a bit easier and store the minimum and
+        ! maximum index in the three coordinate directions for the fine
+        ! grid.
+
+        nn = subface%neighBlock
+
+        mm = subface%ndi
+        iBeg = min(subface%idfine(1), subface%idfine(mm))
+        iEnd = max(subface%idfine(1), subface%idfine(mm))
+
+        mm = subface%ndj
+        jBeg = min(subface%jdfine(1), subface%jdfine(mm))
+        jEnd = max(subface%jdfine(1), subface%jdfine(mm))
+
+        mm = subface%ndk
+        kBeg = min(subface%kdfine(1), subface%kdfine(mm))
+        kEnd = max(subface%kdfine(1), subface%kdfine(mm))
+
+        ! Find the subface id of the block, which corresponds to the
+        ! given subface of the subroutine header.
+
+        subfaceFound = .false.
+        findSubface: do ii = 1, flowDoms(nn, levm1, 1)%n1to1
+
+            mm = ii + flowDoms(nn, levm1, 1)%nBocos
+
+            ! Check if this subface is the correct one.
+
+            kk = min(flowDoms(nn, levm1, 1)%inBeg(mm), &
+                     flowDoms(nn, levm1, 1)%inEnd(mm))
+            ll = max(flowDoms(nn, levm1, 1)%inBeg(mm), &
+                     flowDoms(nn, levm1, 1)%inEnd(mm))
+
+            if (kk == iBeg .and. ll == iEnd) then
+                kk = min(flowDoms(nn, levm1, 1)%jnBeg(mm), &
+                         flowDoms(nn, levm1, 1)%jnEnd(mm))
+                ll = max(flowDoms(nn, levm1, 1)%jnBeg(mm), &
+                         flowDoms(nn, levm1, 1)%jnEnd(mm))
+
+                if (kk == jBeg .and. ll == jEnd) then
+                    kk = min(flowDoms(nn, levm1, 1)%knBeg(mm), &
+                             flowDoms(nn, levm1, 1)%knEnd(mm))
+                    ll = max(flowDoms(nn, levm1, 1)%knBeg(mm), &
+                             flowDoms(nn, levm1, 1)%knEnd(mm))
+
+                    if (kk == kBeg .and. ll == kEnd) subfaceFound = .true.
+                end if
+            end if
+
+            ! Exit the loop if this is indeed the subface.
+
+            if (subfaceFound) exit
+        end do findSubface
+
+        ! The subface must be found on the fine grid. Check this.
+
+        if (.not. subfaceFound) &
+            call terminate("update1to1Coarse", &
+                           "Invalid fine grid 1 to 1 subface connectivity")
+
+        ! Check the i-direction of the coarse grid subface to see if it
+        ! is still a 1 to 1 subface. First check the number of grid lines.
+        ! If these are identical, check each coarse grid line.
+
+        idir = .true.
+        ii = abs(flowDoms(nn, level, 1)%inEnd(mm) &
+                 - flowDoms(nn, level, 1)%inBeg(mm)) + 1
+
+        if (ii == subface%ndi) then
+            do i = 1, subface%ndi
+                ii = subface%idfine(i)
+                if (.not. flowDoms(nn, levm1, 1)%ico(ii)) idir = .false.
+            end do
+        else
+            idir = .false.
+        end if
+
+        ! Check the j-direction of the coarse grid subface to see if it
+        ! is still a 1 to 1 subface. First check the number of grid lines.
+        ! If these are identical, check each coarse grid line.
+
+        jdir = .true.
+        jj = abs(flowDoms(nn, level, 1)%jnEnd(mm) &
+                 - flowDoms(nn, level, 1)%jnBeg(mm)) + 1
+
+        if (jj == subface%ndj) then
+            do j = 1, subface%ndj
+                jj = subface%jdfine(j)
+                if (.not. flowDoms(nn, levm1, 1)%jco(jj)) jdir = .false.
+            end do
+        else
+            jdir = .false.
+        end if
+
+        ! Check the k-direction of the coarse grid subface to see if it
+        ! is still a 1 to 1 subface. First check the number of grid lines.
+        ! If these are identical, check each coarse grid line.
+
+        kdir = .true.
+        kk = abs(flowDoms(nn, level, 1)%knEnd(mm) &
+                 - flowDoms(nn, level, 1)%knBeg(mm)) + 1
+
+        if (kk == subface%ndk) then
+            do k = 1, subface%ndk
+                kk = subface%kdfine(k)
+                if (.not. flowDoms(nn, levm1, 1)%kco(kk)) kdir = .false.
+            end do
+        else
+            kdir = .false.
+        end if
+
+        ! Set coarseIs1to1 to .true. if the subface on the coarse grid
+        ! is still a 1 to 1 subface. Otherwise set it to .false.
+
+        ii = mm - flowDoms(nn, levm1, 1)%nBocos
+        if (idir .and. jdir .and. kdir) then
+            coarseInfo(nn)%coarseIs1to1(ii) = .true.
+        else
+            coarseInfo(nn)%coarseIs1to1(ii) = .false.
+        end if
+
+        ! Store the donor range. It is possible that the lower and
+        ! upper boundary must be reversed.
+
+        if (flowDoms(nn, levm1, 1)%dinBeg(mm) < &
+            flowDoms(nn, levm1, 1)%dinEnd(mm)) then
+            flowDoms(nn, level, 1)%dinBeg(mm) = subface%iBeg
+            flowDoms(nn, level, 1)%dinEnd(mm) = subface%iEnd
+        else
+            flowDoms(nn, level, 1)%dinBeg(mm) = subface%iEnd
+            flowDoms(nn, level, 1)%dinEnd(mm) = subface%iBeg
+        end if
+
+        if (flowDoms(nn, levm1, 1)%djnBeg(mm) < &
+            flowDoms(nn, levm1, 1)%djnEnd(mm)) then
+            flowDoms(nn, level, 1)%djnBeg(mm) = subface%jBeg
+            flowDoms(nn, level, 1)%djnEnd(mm) = subface%jEnd
+        else
+            flowDoms(nn, level, 1)%djnBeg(mm) = subface%jEnd
+            flowDoms(nn, level, 1)%djnEnd(mm) = subface%jBeg
+        end if
+
+        if (flowDoms(nn, levm1, 1)%dknBeg(mm) < &
+            flowDoms(nn, levm1, 1)%dknEnd(mm)) then
+            flowDoms(nn, level, 1)%dknBeg(mm) = subface%kBeg
+            flowDoms(nn, level, 1)%dknEnd(mm) = subface%kEnd
+        else
+            flowDoms(nn, level, 1)%dknBeg(mm) = subface%kEnd
+            flowDoms(nn, level, 1)%dknEnd(mm) = subface%kBeg
+        end if
+
+    end subroutine update1to1Coarse
+    subroutine coarseDonorInfo(level)
+        !
+        !       coarseDonorInfo creates the donor info for the internal
+        !       block boundaries on the given coarse grid level.
+        !
+        use communication
+        use coarse1to1Subface
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: proc, size, ierr
+
+        integer, dimension(mpi_status_size) :: mpiStatus
+        integer, dimension(nProc) :: sizeMessage
+
+        integer(kind=intType) :: nn, mm, ii, i
+        integer(kind=intType) :: nMessageReceive, nMessageSend
+
+        integer(kind=intType), dimension(0:nProc) :: size2proc
+        integer(kind=intType), dimension(nProc) :: tmp
+
+        integer(kind=intType), dimension(:), allocatable :: sendBuf
+        integer(kind=intType), dimension(:), allocatable, target :: recvBuf
+
+        type(coarse1to1SubfaceType) :: tmpSubface
+
+        ! Determine the number of integers sent to every processor.
+
+        size2proc = 0
+        do nn = 1, nSubface1to1
+            mm = subface1to1(nn)%neighProc + 1    ! Proc ID's start at 0.
+
+            size2proc(mm) = size2proc(mm) + 13 &
+                            + subface1to1(nn)%iEnd - subface1to1(nn)%iBeg &
+                            + subface1to1(nn)%jEnd - subface1to1(nn)%jBeg &
+                            + subface1to1(nn)%kEnd - subface1to1(nn)%kBeg
+        end do
+
+        ! No message is sent to myself, so set the corresponding entry in
+        ! size2proc to 0. The processor id's start at 0, which explains
+        ! the +1.
+
+        size2proc(myID + 1) = 0
+
+        ! Determine the number of messages i have to sent. Store in tmp
+        ! a 0 or a 1, depending whether or not a message must be sent
+        ! to the corresponding processor. This info is needed to determine
+        ! the number of messages i will receive.
+
+        nMessageSend = 0
+        do nn = 1, nProc
+            if (size2proc(nn) > 0) then
+                nMessageSend = nMessageSend + 1
+                tmp(nn) = 1
+            else
+                tmp(nn) = 0
+            end if
+        end do
+
+        ! Determine the number of messages i will receive.
+
+        sizeMessage = 1
+        call mpi_reduce_scatter(tmp, nMessageReceive, sizeMessage, &
+                                adflow_integer, mpi_sum, ADflow_comm_world, &
+                                ierr)
+
+        ! Put size2proc in cumulative storage format and store the
+        ! starting entry in tmp, which is used as a counter.
+
+        do nn = 1, nProc
+            size2proc(nn) = size2proc(nn) + size2proc(nn - 1)
+            tmp(nn) = size2proc(nn - 1)
+        end do
+
+        ! Allocate the memory for the send buffer.
+
+        allocate (sendBuf(size2proc(nProc)), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("coarseDonorInfo", &
+                           "Memory allocation failure for sendBuf")
 
-    allocate(sendBuf(size2proc(nProc)), stat=ierr)
-    if(ierr /= 0)                       &
-         call terminate("coarseDonorInfo", &
-         "Memory allocation failure for sendBuf")
+        ! Loop over the number of 1 to 1 subfaces to fill the send buffer.
 
-    ! Loop over the number of 1 to 1 subfaces to fill the send buffer.
+        unOwnedSubfaces: do nn = 1, nSubface1to1
 
-    unOwnedSubfaces: do nn=1,nSubface1to1
+            ! Only info that must be sent to other processors must be stored.
 
-       ! Only info that must be sent to other processors must be stored.
+            if (subface1to1(nn)%neighProc /= myID) then
 
-       if(subface1to1(nn)%neighProc /= myID) then
+                ! Store the entry in tmp in mm.
+                ! Note that the proc id's start at 0.
 
-          ! Store the entry in tmp in mm.
-          ! Note that the proc id's start at 0.
+                mm = subface1to1(nn)%neighProc + 1
 
-          mm = subface1to1(nn)%neighProc + 1
+                ! Store the block id of the donor and the coarse grid range of
+                ! the current subface in the send buffer. Update the counter
+                ! tmp(mm) accordingly.
 
-          ! Store the block id of the donor and the coarse grid range of
-          ! the current subface in the send buffer. Update the counter
-          ! tmp(mm) accordingly.
+                tmp(mm) = tmp(mm) + 1
+                sendBuf(tmp(mm)) = subface1to1(nn)%neighBlock
 
-          tmp(mm) = tmp(mm) +1
-          sendBuf(tmp(mm)) = subface1to1(nn)%neighBlock
+                tmp(mm) = tmp(mm) + 1
+                sendBuf(tmp(mm)) = subface1to1(nn)%iBeg
 
-          tmp(mm) = tmp(mm) +1
-          sendBuf(tmp(mm)) = subface1to1(nn)%iBeg
+                tmp(mm) = tmp(mm) + 1
+                sendBuf(tmp(mm)) = subface1to1(nn)%jBeg
 
-          tmp(mm) = tmp(mm) +1
-          sendBuf(tmp(mm)) = subface1to1(nn)%jBeg
+                tmp(mm) = tmp(mm) + 1
+                sendBuf(tmp(mm)) = subface1to1(nn)%kBeg
 
-          tmp(mm) = tmp(mm) +1
-          sendBuf(tmp(mm)) = subface1to1(nn)%kBeg
+                tmp(mm) = tmp(mm) + 1
+                sendBuf(tmp(mm)) = subface1to1(nn)%iEnd
 
-          tmp(mm) = tmp(mm) +1
-          sendBuf(tmp(mm)) = subface1to1(nn)%iEnd
+                tmp(mm) = tmp(mm) + 1
+                sendBuf(tmp(mm)) = subface1to1(nn)%jEnd
 
-          tmp(mm) = tmp(mm) +1
-          sendBuf(tmp(mm)) = subface1to1(nn)%jEnd
+                tmp(mm) = tmp(mm) + 1
+                sendBuf(tmp(mm)) = subface1to1(nn)%kEnd
 
-          tmp(mm) = tmp(mm) +1
-          sendBuf(tmp(mm)) = subface1to1(nn)%kEnd
+                ! Store the number of points in the three coordinate
+                ! directions of the coarse grid donor face.
 
-          ! Store the number of points in the three coordinate
-          ! directions of the coarse grid donor face.
+                tmp(mm) = tmp(mm) + 1
+                sendBuf(tmp(mm)) = subface1to1(nn)%ndi
 
-          tmp(mm) = tmp(mm) +1
-          sendBuf(tmp(mm)) = subface1to1(nn)%ndi
+                tmp(mm) = tmp(mm) + 1
+                sendBuf(tmp(mm)) = subface1to1(nn)%ndj
 
-          tmp(mm) = tmp(mm) +1
-          sendBuf(tmp(mm)) = subface1to1(nn)%ndj
+                tmp(mm) = tmp(mm) + 1
+                sendBuf(tmp(mm)) = subface1to1(nn)%ndk
 
-          tmp(mm) = tmp(mm) +1
-          sendBuf(tmp(mm)) = subface1to1(nn)%ndk
+                ! Store the i-donor indices of the fine grid in sendBuf.
 
-          ! Store the i-donor indices of the fine grid in sendBuf.
+                do i = 1, subface1to1(nn)%ndi
+                    tmp(mm) = tmp(mm) + 1
+                    sendBuf(tmp(mm)) = subface1to1(nn)%idfine(i)
+                end do
 
-          do i=1,subface1to1(nn)%ndi
-             tmp(mm) = tmp(mm) +1
-             sendBuf(tmp(mm)) = subface1to1(nn)%idfine(i)
-          enddo
+                ! Store the j-donor indices of the fine grid in sendBuf.
 
-          ! Store the j-donor indices of the fine grid in sendBuf.
+                do i = 1, subface1to1(nn)%ndj
+                    tmp(mm) = tmp(mm) + 1
+                    sendBuf(tmp(mm)) = subface1to1(nn)%jdfine(i)
+                end do
 
-          do i=1,subface1to1(nn)%ndj
-             tmp(mm) = tmp(mm) +1
-             sendBuf(tmp(mm)) = subface1to1(nn)%jdfine(i)
-          enddo
+                ! Store the k-donor indices of the fine grid in sendBuf.
 
-          ! Store the k-donor indices of the fine grid in sendBuf.
+                do i = 1, subface1to1(nn)%ndk
+                    tmp(mm) = tmp(mm) + 1
+                    sendBuf(tmp(mm)) = subface1to1(nn)%kdfine(i)
+                end do
 
-          do i=1,subface1to1(nn)%ndk
-             tmp(mm) = tmp(mm) +1
-             sendBuf(tmp(mm)) = subface1to1(nn)%kdfine(i)
-          enddo
+            end if
 
-       endif
+        end do unOwnedSubfaces
 
-    enddo unOwnedSubfaces
+        ! Send the data i have to send.
 
-    ! Send the data i have to send.
+        mm = 0
+        sends: do nn = 1, nProc
 
-    mm = 0
-    sends: do nn=1,nProc
+            if (size2proc(nn) > size2proc(nn - 1)) then
 
-       if(size2proc(nn) > size2proc(nn-1)) then
+                ! Update mm and store the processor id, the size of the
+                ! message and the starting index in sendbuf.
 
-          ! Update mm and store the processor id, the size of the
-          ! message and the starting index in sendbuf.
+                mm = mm + 1
+                proc = nn - 1
+                size = size2proc(nn) - size2proc(nn - 1)
+                ii = size2proc(nn - 1) + 1
 
-          mm   = mm +1
-          proc = nn -1
-          size = size2proc(nn) - size2proc(nn-1)
-          ii   = size2proc(nn-1) +1
+                ! Send the message.
 
-          ! Send the message.
+                call mpi_isend(sendBuf(ii), size, adflow_integer, proc, proc, &
+                               ADflow_comm_world, sendRequests(mm), ierr)
 
-          call mpi_isend(sendBuf(ii), size, adflow_integer, proc, proc, &
-               ADflow_comm_world, sendRequests(mm), ierr)
+            end if
 
-       endif
+        end do sends
 
-    enddo sends
+        ! Loop over the number of 1 to 1 subfaces stored on this processor
+        ! and update the locally stored info.
 
-    ! Loop over the number of 1 to 1 subfaces stored on this processor
-    ! and update the locally stored info.
+        do nn = 1, nSubface1to1
+            if (subface1to1(nn)%neighProc == myID) &
+                call update1to1Coarse(level, subface1to1(nn))
+        end do
 
-    do nn=1,nSubface1to1
-       if(subface1to1(nn)%neighProc == myID) &
-            call update1to1Coarse(level, subface1to1(nn))
-    enddo
+        ! Release the memory of subface1to1.
 
-    ! Release the memory of subface1to1.
+        do nn = 1, nSubface1to1
+            deallocate (subface1to1(nn)%idfine, subface1to1(nn)%jdfine, &
+                        subface1to1(nn)%kdfine, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("coarseDonorInfo", &
+                               "Deallocation error for idfine, etc")
+        end do
 
-    do nn=1,nSubface1to1
-       deallocate(subface1to1(nn)%idfine, subface1to1(nn)%jdfine, &
-            subface1to1(nn)%kdfine, stat=ierr)
-       if(ierr /= 0)                       &
+        deallocate (subface1to1, stat=ierr)
+        if (ierr /= 0) &
             call terminate("coarseDonorInfo", &
-            "Deallocation error for idfine, etc")
-    enddo
+                           "Deallocation error for subface1to1")
 
-    deallocate(subface1to1, stat=ierr)
-    if(ierr /= 0)                       &
-         call terminate("coarseDonorInfo", &
-         "Deallocation error for subface1to1")
+        ! Loop over the number of messages i must receive to determine
+        ! info from externally stored neighboring subfaces.
 
-    ! Loop over the number of messages i must receive to determine
-    ! info from externally stored neighboring subfaces.
+        receives: do nn = 1, nMessageReceive
 
-    receives: do nn=1,nMessageReceive
+            ! Block until a message arrives.
 
-       ! Block until a message arrives.
+            call mpi_probe(mpi_any_source, myID, ADflow_comm_world, &
+                           mpiStatus, ierr)
 
-       call mpi_probe(mpi_any_source, myID, ADflow_comm_world, &
-            mpiStatus, ierr)
+            ! Find the source and size of the message.
 
-       ! Find the source and size of the message.
+            proc = mpiStatus(mpi_source)
+            call mpi_get_count(mpiStatus, adflow_integer, size, ierr)
 
-       proc = mpiStatus(mpi_source)
-       call mpi_get_count(mpiStatus, adflow_integer, size, ierr)
+            ! Check in debug mode that the incoming message is of
+            ! correct size.
 
-       ! Check in debug mode that the incoming message is of
-       ! correct size.
+            if (debug) then
+                if (size == mpi_undefined) &
+                    call terminate("coarseDonorInfo", &
+                                   "Unexpected size of message")
+            end if
 
-       if( debug ) then
-          if(size == mpi_undefined)             &
-               call terminate("coarseDonorInfo", &
-               "Unexpected size of message")
-       endif
+            ! Allocate the memory for the receive buffer.
 
-       ! Allocate the memory for the receive buffer.
+            allocate (recvBuf(size), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("coarseDonorInfo", &
+                               "Memory allocation failure for recvBuf")
 
-       allocate(recvBuf(size), stat=ierr)
-       if(ierr /= 0)                         &
-            call terminate("coarseDonorInfo", &
-            "Memory allocation failure for recvBuf")
-
-       ! Receive the messsage. As it has already arrived a blocking
-       ! receive can be used.
+            ! Receive the messsage. As it has already arrived a blocking
+            ! receive can be used.
 
-       call mpi_recv(recvBuf, size, adflow_integer, proc, myID, &
-            ADflow_comm_world, mpiStatus, ierr)
+            call mpi_recv(recvBuf, size, adflow_integer, proc, myID, &
+                          ADflow_comm_world, mpiStatus, ierr)
 
-       ! Loop to extract the 1 to 1 subface info from the buffer.
+            ! Loop to extract the 1 to 1 subface info from the buffer.
 
-       ii = 1
-       extractSubface: do
-          ! Exit the loop if no more info is present.
+            ii = 1
+            extractSubface: do
+                ! Exit the loop if no more info is present.
 
-          if(ii > size) exit
+                if (ii > size) exit
 
-          ! Store the info of this subface in tmpSubface.
+                ! Store the info of this subface in tmpSubface.
 
-          tmpSubface%neighProc  = proc
-          tmpSubface%neighBlock = recvBuf(ii); ii = ii +1
+                tmpSubface%neighProc = proc
+                tmpSubface%neighBlock = recvBuf(ii); ii = ii + 1
 
-          tmpSubface%iBeg = recvBuf(ii); ii = ii +1
-          tmpSubface%jBeg = recvBuf(ii); ii = ii +1
-          tmpSubface%kBeg = recvBuf(ii); ii = ii +1
+                tmpSubface%iBeg = recvBuf(ii); ii = ii + 1
+                tmpSubface%jBeg = recvBuf(ii); ii = ii + 1
+                tmpSubface%kBeg = recvBuf(ii); ii = ii + 1
 
-          tmpSubface%iEnd = recvBuf(ii); ii = ii +1
-          tmpSubface%jEnd = recvBuf(ii); ii = ii +1
-          tmpSubface%kEnd = recvBuf(ii); ii = ii +1
+                tmpSubface%iEnd = recvBuf(ii); ii = ii + 1
+                tmpSubface%jEnd = recvBuf(ii); ii = ii + 1
+                tmpSubface%kEnd = recvBuf(ii); ii = ii + 1
 
-          tmpSubface%ndi = recvBuf(ii); ii = ii +1
-          tmpSubface%ndj = recvBuf(ii); ii = ii +1
-          tmpSubface%ndk = recvBuf(ii); ii = ii +1
+                tmpSubface%ndi = recvBuf(ii); ii = ii + 1
+                tmpSubface%ndj = recvBuf(ii); ii = ii + 1
+                tmpSubface%ndk = recvBuf(ii); ii = ii + 1
 
-          mm = ii + tmpSubface%ndi
-          tmpSubface%idfine => recvBuf(ii:(mm-1))
-          ii = mm
+                mm = ii + tmpSubface%ndi
+                tmpSubface%idfine => recvBuf(ii:(mm - 1))
+                ii = mm
 
-          mm = ii + tmpSubface%ndj
-          tmpSubface%jdfine => recvBuf(ii:(mm-1))
-          ii = mm
+                mm = ii + tmpSubface%ndj
+                tmpSubface%jdfine => recvBuf(ii:(mm - 1))
+                ii = mm
 
-          mm = ii + tmpSubface%ndk
-          tmpSubface%kdfine => recvBuf(ii:(mm-1))
-          ii = mm
+                mm = ii + tmpSubface%ndk
+                tmpSubface%kdfine => recvBuf(ii:(mm - 1))
+                ii = mm
 
-          ! Update the 1 to 1 subface info.
+                ! Update the 1 to 1 subface info.
 
-          call update1to1Coarse(level, tmpSubface)
+                call update1to1Coarse(level, tmpSubface)
 
-       enddo extractSubface
+            end do extractSubface
 
-       ! Release the memory of the receive buffer.
+            ! Release the memory of the receive buffer.
 
-       deallocate(recvBuf, stat=ierr)
-       if(ierr /= 0)                       &
-            call terminate("coarseDonorInfo", &
-            "Deallocation failure for recvBuf")
+            deallocate (recvBuf, stat=ierr)
+            if (ierr /= 0) &
+                call terminate("coarseDonorInfo", &
+                               "Deallocation failure for recvBuf")
 
-    enddo receives
+        end do receives
 
-    ! Complete the nonblocking sends.
+        ! Complete the nonblocking sends.
 
-    size = nMessageSend
-    do nn=1,nMessageSend
-       call mpi_waitany(size, sendRequests, proc, mpiStatus, ierr)
-    enddo
+        size = nMessageSend
+        do nn = 1, nMessageSend
+            call mpi_waitany(size, sendRequests, proc, mpiStatus, ierr)
+        end do
 
-    ! Release the memory of the send buffer.
+        ! Release the memory of the send buffer.
 
-    deallocate(sendBuf, stat=ierr)
-    if(ierr /= 0)                       &
-         call terminate("coarseDonorInfo", &
-         "Deallocation failure for sendBuf")
+        deallocate (sendBuf, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("coarseDonorInfo", &
+                           "Deallocation failure for sendBuf")
 
-    ! Synchronize the processors, because wild cards have been used.
+        ! Synchronize the processors, because wild cards have been used.
 
-    call mpi_barrier(ADflow_comm_world, ierr)
+        call mpi_barrier(ADflow_comm_world, ierr)
 
-  end subroutine coarseDonorInfo
+    end subroutine coarseDonorInfo
 
-  subroutine checkCoarse1to1(level)
-    !
-    !       checkCoarse1to1 removes the nonmatching block boundaries
-    !       from the list of 1 to 1 matching ones. They are in there,
-    !       because they are 1 to 1 matching on the finer grids.
-    !
-    use constants
-    use cgnsGrid
-    use block
-    use inputTimeSpectral
-    use coarseningInfo
-    use utils, only : terminate
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    character(len=maxStringLen) :: errorMessage
+    subroutine checkCoarse1to1(level)
+        !
+        !       checkCoarse1to1 removes the nonmatching block boundaries
+        !       from the list of 1 to 1 matching ones. They are in there,
+        !       because they are 1 to 1 matching on the finer grids.
+        !
+        use constants
+        use cgnsGrid
+        use block
+        use inputTimeSpectral
+        use coarseningInfo
+        use utils, only: terminate
+        use commonFormats, only: strings
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        character(len=maxStringLen) :: errorMessage
 
-    integer(kind=intType) :: i, nn, mm, kk, ll
+        integer(kind=intType) :: i, nn, mm, kk, ll
 
-    ! Loop over the number of domains.
+        ! Loop over the number of domains.
 
-    domains: do nn=1,nDom
+        domains: do nn = 1, nDom
 
-       ! Loop over the number of 1 to 1 subfaces. As this number can
-       ! change during the loop, the control statement is done via
-       ! an exit.
+            ! Loop over the number of 1 to 1 subfaces. As this number can
+            ! change during the loop, the control statement is done via
+            ! an exit.
 
-       i  = 1
-       n1to1: do
+            i = 1
+            n1to1: do
 
-          ! Exit the loop if the counter is larger than the number
-          ! of 1 to 1 subfaces.
+                ! Exit the loop if the counter is larger than the number
+                ! of 1 to 1 subfaces.
 
-          if(i > flowDoms(nn,level,1)%n1to1) exit
+                if (i > flowDoms(nn, level, 1)%n1to1) exit
 
-          ! Add the offset to i to store the correct place in the
-          ! subface info.
+                ! Add the offset to i to store the correct place in the
+                ! subface info.
 
-          mm = i + flowDoms(nn,level,1)%nBocos
+                mm = i + flowDoms(nn, level, 1)%nBocos
 
-          ! Test if this is still a 1 to 1 subface on the coarse grid.
+                ! Test if this is still a 1 to 1 subface on the coarse grid.
 
-          is1to1: if( coarseInfo(nn)%coarseIs1to1(i) ) then
+                is1to1: if (coarseInfo(nn)%coarseIs1to1(i)) then
 
-             ! Subface is still a 1 to 1 subface on the coarse grid.
-             ! Only the counter i must be updated.
+                    ! Subface is still a 1 to 1 subface on the coarse grid.
+                    ! Only the counter i must be updated.
 
-             i = i + 1
+                    i = i + 1
 
-          else is1to1
+                    else is1to1
 
-             ! Due to the coarsening the subface is not a 1 to 1 block
-             ! boundary anymore. Swap the current entry with the entry
-             ! kk.
+                    ! Due to the coarsening the subface is not a 1 to 1 block
+                    ! boundary anymore. Swap the current entry with the entry
+                    ! kk.
 
-             kk = flowDoms(nn,level,1)%nBocos + flowDoms(nn,level,1)%n1to1
+                    kk = flowDoms(nn, level, 1)%nBocos + flowDoms(nn, level, 1)%n1to1
 
-             ! The range info.
+                    ! The range info.
 
-             ll                             = flowDoms(nn,level,1)%inBeg(mm)
-             flowDoms(nn,level,1)%inBeg(mm) = flowDoms(nn,level,1)%inBeg(kk)
-             flowDoms(nn,level,1)%inBeg(kk) = ll
+                    ll = flowDoms(nn, level, 1)%inBeg(mm)
+                    flowDoms(nn, level, 1)%inBeg(mm) = flowDoms(nn, level, 1)%inBeg(kk)
+                    flowDoms(nn, level, 1)%inBeg(kk) = ll
 
-             ll                             = flowDoms(nn,level,1)%jnBeg(mm)
-             flowDoms(nn,level,1)%jnBeg(mm) = flowDoms(nn,level,1)%jnBeg(kk)
-             flowDoms(nn,level,1)%jnBeg(kk) = ll
+                    ll = flowDoms(nn, level, 1)%jnBeg(mm)
+                    flowDoms(nn, level, 1)%jnBeg(mm) = flowDoms(nn, level, 1)%jnBeg(kk)
+                    flowDoms(nn, level, 1)%jnBeg(kk) = ll
 
-             ll                             = flowDoms(nn,level,1)%knBeg(mm)
-             flowDoms(nn,level,1)%knBeg(mm) = flowDoms(nn,level,1)%knBeg(kk)
-             flowDoms(nn,level,1)%knBeg(kk) = ll
+                    ll = flowDoms(nn, level, 1)%knBeg(mm)
+                    flowDoms(nn, level, 1)%knBeg(mm) = flowDoms(nn, level, 1)%knBeg(kk)
+                    flowDoms(nn, level, 1)%knBeg(kk) = ll
 
-             ll                             = flowDoms(nn,level,1)%inEnd(mm)
-             flowDoms(nn,level,1)%inEnd(mm) = flowDoms(nn,level,1)%inEnd(kk)
-             flowDoms(nn,level,1)%inEnd(kk) = ll
+                    ll = flowDoms(nn, level, 1)%inEnd(mm)
+                    flowDoms(nn, level, 1)%inEnd(mm) = flowDoms(nn, level, 1)%inEnd(kk)
+                    flowDoms(nn, level, 1)%inEnd(kk) = ll
 
-             ll                             = flowDoms(nn,level,1)%jnEnd(mm)
-             flowDoms(nn,level,1)%jnEnd(mm) = flowDoms(nn,level,1)%jnEnd(kk)
-             flowDoms(nn,level,1)%jnEnd(kk) = ll
+                    ll = flowDoms(nn, level, 1)%jnEnd(mm)
+                    flowDoms(nn, level, 1)%jnEnd(mm) = flowDoms(nn, level, 1)%jnEnd(kk)
+                    flowDoms(nn, level, 1)%jnEnd(kk) = ll
 
-             ll                             = flowDoms(nn,level,1)%knEnd(mm)
-             flowDoms(nn,level,1)%knEnd(mm) = flowDoms(nn,level,1)%knEnd(kk)
-             flowDoms(nn,level,1)%knEnd(kk) = ll
+                    ll = flowDoms(nn, level, 1)%knEnd(mm)
+                    flowDoms(nn, level, 1)%knEnd(mm) = flowDoms(nn, level, 1)%knEnd(kk)
+                    flowDoms(nn, level, 1)%knEnd(kk) = ll
 
-             ! The donor info.
+                    ! The donor info.
 
-             ll                              = flowDoms(nn,level,1)%dinBeg(mm)
-             flowDoms(nn,level,1)%dinBeg(mm) = flowDoms(nn,level,1)%dinBeg(kk)
-             flowDoms(nn,level,1)%dinBeg(kk) = ll
+                    ll = flowDoms(nn, level, 1)%dinBeg(mm)
+                    flowDoms(nn, level, 1)%dinBeg(mm) = flowDoms(nn, level, 1)%dinBeg(kk)
+                    flowDoms(nn, level, 1)%dinBeg(kk) = ll
 
-             ll                              = flowDoms(nn,level,1)%djnBeg(mm)
-             flowDoms(nn,level,1)%djnBeg(mm) = flowDoms(nn,level,1)%djnBeg(kk)
-             flowDoms(nn,level,1)%djnBeg(kk) = ll
+                    ll = flowDoms(nn, level, 1)%djnBeg(mm)
+                    flowDoms(nn, level, 1)%djnBeg(mm) = flowDoms(nn, level, 1)%djnBeg(kk)
+                    flowDoms(nn, level, 1)%djnBeg(kk) = ll
 
-             ll                              = flowDoms(nn,level,1)%dknBeg(mm)
-             flowDoms(nn,level,1)%dknBeg(mm) = flowDoms(nn,level,1)%dknBeg(kk)
-             flowDoms(nn,level,1)%dknBeg(kk) = ll
+                    ll = flowDoms(nn, level, 1)%dknBeg(mm)
+                    flowDoms(nn, level, 1)%dknBeg(mm) = flowDoms(nn, level, 1)%dknBeg(kk)
+                    flowDoms(nn, level, 1)%dknBeg(kk) = ll
 
-             ll                              = flowDoms(nn,level,1)%dinEnd(mm)
-             flowDoms(nn,level,1)%dinEnd(mm) = flowDoms(nn,level,1)%dinEnd(kk)
-             flowDoms(nn,level,1)%dinEnd(kk) = ll
+                    ll = flowDoms(nn, level, 1)%dinEnd(mm)
+                    flowDoms(nn, level, 1)%dinEnd(mm) = flowDoms(nn, level, 1)%dinEnd(kk)
+                    flowDoms(nn, level, 1)%dinEnd(kk) = ll
 
-             ll                              = flowDoms(nn,level,1)%djnEnd(mm)
-             flowDoms(nn,level,1)%djnEnd(mm) = flowDoms(nn,level,1)%djnEnd(kk)
-             flowDoms(nn,level,1)%djnEnd(kk) = ll
+                    ll = flowDoms(nn, level, 1)%djnEnd(mm)
+                    flowDoms(nn, level, 1)%djnEnd(mm) = flowDoms(nn, level, 1)%djnEnd(kk)
+                    flowDoms(nn, level, 1)%djnEnd(kk) = ll
 
-             ll                              = flowDoms(nn,level,1)%dknEnd(mm)
-             flowDoms(nn,level,1)%dknEnd(mm) = flowDoms(nn,level,1)%dknEnd(kk)
-             flowDoms(nn,level,1)%dknEnd(kk) = ll
+                    ll = flowDoms(nn, level, 1)%dknEnd(mm)
+                    flowDoms(nn, level, 1)%dknEnd(mm) = flowDoms(nn, level, 1)%dknEnd(kk)
+                    flowDoms(nn, level, 1)%dknEnd(kk) = ll
 
-             ! The transformation matrix.
+                    ! The transformation matrix.
 
-             ll                          = flowDoms(nn,level,1)%l1(mm)
-             flowDoms(nn,level,1)%l1(mm) = flowDoms(nn,level,1)%l1(kk)
-             flowDoms(nn,level,1)%l1(kk) = ll
+                    ll = flowDoms(nn, level, 1)%l1(mm)
+                    flowDoms(nn, level, 1)%l1(mm) = flowDoms(nn, level, 1)%l1(kk)
+                    flowDoms(nn, level, 1)%l1(kk) = ll
 
-             ll                          = flowDoms(nn,level,1)%l2(mm)
-             flowDoms(nn,level,1)%l2(mm) = flowDoms(nn,level,1)%l2(kk)
-             flowDoms(nn,level,1)%l2(kk) = ll
+                    ll = flowDoms(nn, level, 1)%l2(mm)
+                    flowDoms(nn, level, 1)%l2(mm) = flowDoms(nn, level, 1)%l2(kk)
+                    flowDoms(nn, level, 1)%l2(kk) = ll
 
-             ll                          = flowDoms(nn,level,1)%l3(mm)
-             flowDoms(nn,level,1)%l3(mm) = flowDoms(nn,level,1)%l3(kk)
-             flowDoms(nn,level,1)%l3(kk) = ll
+                    ll = flowDoms(nn, level, 1)%l3(mm)
+                    flowDoms(nn, level, 1)%l3(mm) = flowDoms(nn, level, 1)%l3(kk)
+                    flowDoms(nn, level, 1)%l3(kk) = ll
 
-             ! The rest of the subface info.
+                    ! The rest of the subface info.
 
-             ll = flowDoms(nn,level,1)%BCType(mm)
-             flowDoms(nn,level,1)%BCType(mm) = &
-                  flowDoms(nn,level,1)%BCType(kk)
-             flowDoms(nn,level,1)%BCType(kk) = ll
+                    ll = flowDoms(nn, level, 1)%BCType(mm)
+                    flowDoms(nn, level, 1)%BCType(mm) = &
+                        flowDoms(nn, level, 1)%BCType(kk)
+                    flowDoms(nn, level, 1)%BCType(kk) = ll
 
-             ll = flowDoms(nn,level,1)%BCFaceID(mm)
-             flowDoms(nn,level,1)%BCFaceID(mm) = &
-                  flowDoms(nn,level,1)%BCFaceID(kk)
-             flowDoms(nn,level,1)%BCFaceID(kk) = ll
+                    ll = flowDoms(nn, level, 1)%BCFaceID(mm)
+                    flowDoms(nn, level, 1)%BCFaceID(mm) = &
+                        flowDoms(nn, level, 1)%BCFaceID(kk)
+                    flowDoms(nn, level, 1)%BCFaceID(kk) = ll
 
-             ll = flowDoms(nn,level,1)%neighBlock(mm)
-             flowDoms(nn,level,1)%neighBlock(mm) = &
-                  flowDoms(nn,level,1)%neighBlock(kk)
-             flowDoms(nn,level,1)%neighBlock(kk) = ll
+                    ll = flowDoms(nn, level, 1)%neighBlock(mm)
+                    flowDoms(nn, level, 1)%neighBlock(mm) = &
+                        flowDoms(nn, level, 1)%neighBlock(kk)
+                    flowDoms(nn, level, 1)%neighBlock(kk) = ll
 
-             ll = flowDoms(nn,level,1)%neighProc(mm)
-             flowDoms(nn,level,1)%neighProc(mm) = &
-                  flowDoms(nn,level,1)%neighProc(kk)
-             flowDoms(nn,level,1)%neighProc(kk) = ll
+                    ll = flowDoms(nn, level, 1)%neighProc(mm)
+                    flowDoms(nn, level, 1)%neighProc(mm) = &
+                        flowDoms(nn, level, 1)%neighProc(kk)
+                    flowDoms(nn, level, 1)%neighProc(kk) = ll
 
-             ll = flowDoms(nn,level,1)%groupNum(mm)
-             flowDoms(nn,level,1)%groupNum(mm) = &
-                  flowDoms(nn,level,1)%groupNum(kk)
-             flowDoms(nn,level,1)%groupNum(kk) = ll
+                    ll = flowDoms(nn, level, 1)%groupNum(mm)
+                    flowDoms(nn, level, 1)%groupNum(mm) = &
+                        flowDoms(nn, level, 1)%groupNum(kk)
+                    flowDoms(nn, level, 1)%groupNum(kk) = ll
 
-             ll = flowDoms(nn,level,1)%cgnsSubface(mm)
-             flowDoms(nn,level,1)%cgnsSubface(mm) = &
-                  flowDoms(nn,level,1)%cgnsSubface(kk)
-             flowDoms(nn,level,1)%cgnsSubface(kk) = ll
+                    ll = flowDoms(nn, level, 1)%cgnsSubface(mm)
+                    flowDoms(nn, level, 1)%cgnsSubface(mm) = &
+                        flowDoms(nn, level, 1)%cgnsSubface(kk)
+                    flowDoms(nn, level, 1)%cgnsSubface(kk) = ll
 
-             ! Decrease the number of 1 to 1 block boundaries. Note the
-             ! counter i should not be updated.
+                    ! Decrease the number of 1 to 1 block boundaries. Note the
+                    ! counter i should not be updated.
 
-             flowDoms(nn,level,1)%n1to1 = flowDoms(nn,level,1)%n1to1 - 1
+                    flowDoms(nn, level, 1)%n1to1 = flowDoms(nn, level, 1)%n1to1 - 1
 
-             ll = flowDoms(nn,1,1)%cgnsBlockID
-             write(errorMessage, strings) "Non-matching block-to-block face on zone ", cgnsDoms(ll)%zoneName, &
-               "...Support not implemented yet."
-             call terminate("checkCoarse1to1", errorMessage)
+                    ll = flowDoms(nn, 1, 1)%cgnsBlockID
+                    write (errorMessage, strings) "Non-matching block-to-block face on zone ", cgnsDoms(ll)%zoneName, &
+                        "...Support not implemented yet."
+                    call terminate("checkCoarse1to1", errorMessage)
 
-          endif is1to1
+                end if is1to1
 
-       enddo n1to1
+            end do n1to1
 
-       ! Copy the number of internal subfaces for the rest of
-       ! the spectral solutions.
+            ! Copy the number of internal subfaces for the rest of
+            ! the spectral solutions.
 
-       do i=2,nTimeIntervalsSpectral
-          flowDoms(nn,level,i)%n1to1 = flowDoms(nn,level,1)%n1to1
-       enddo
+            do i = 2, nTimeIntervalsSpectral
+                flowDoms(nn, level, i)%n1to1 = flowDoms(nn, level, 1)%n1to1
+            end do
 
-    enddo domains
+        end do domains
 
-  end subroutine checkCoarse1to1
+    end subroutine checkCoarse1to1
 
 end module coarseUtils
diff --git a/src/preprocessing/pointMatchedCommPattern.F90 b/src/preprocessing/pointMatchedCommPattern.F90
index 8db6875e3..3bd53da1c 100644
--- a/src/preprocessing/pointMatchedCommPattern.F90
+++ b/src/preprocessing/pointMatchedCommPattern.F90
@@ -1,3799 +1,3791 @@
 module pointMatchedCommPattern
 
-
 contains
 
-  subroutine determineCommPattern(level)
-    !
-    !       determineCommPattern determines the communication pattern
-    !       for the indicated grid level from the given block distribution
-    !       and corresponding halo info. Both the first and second level
-    !       cell halo communication pattern as well as the first level
-    !       nodal halo communication pattern is determined.
-    !       A recursive algorithm is used. First the face halo's are
-    !       determined and from those the indirect halo's can be obtained
-    !       by looping over the level of indirectness.
-    !       This routine controls the creation of the communication
-    !       pattern and basically contains the function calls to the
-    !       subtasks.
-    !
-    use block
-    use communication
-    use haloList
-    use periodicInfo
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer               :: ierr
-    integer(kind=intType) :: i
-
-    ! Determine the number of periodic faces.
-
-    call determinePeriodicFaces
-
-    ! Determine the amount of 1st and 2nd level cell halo's and 1st
-    ! level node halo's.
-
-    call determineNumberOfHalos(level)
-
-    ! Allocate the memory for the help variables needed to
-    ! determine the communication pattern.
-
-    call allocMemHaloList(level)
-
-    ! Determine the face halo's for both the 1st level cell and the
-    ! 1st level node halo's.
-
-    call determineFaceHalos(level)
-
-    ! Determine the indirect 1st level node halo's.
-
-    call determineIndirectHalos(nNodeHalo1st, iinode1st,    &
-         nodeHalo1st, transformNode, &
-         nodeIndex,   1_intType,     &
-         1_intType,   0_intType, level)
-
-    ! Release the memory of transformNode and nodeIndex.
-
-    do i=1,nDom
-       deallocate(nodeIndex(i)%entryList, stat=ierr)
-       if(ierr /= 0)                           &
-            call terminate("determineCommPattern", &
-            "Deallocation error for &
-            &nodeIndex(i)%entryList")
-    enddo
+    subroutine determineCommPattern(level)
+        !
+        !       determineCommPattern determines the communication pattern
+        !       for the indicated grid level from the given block distribution
+        !       and corresponding halo info. Both the first and second level
+        !       cell halo communication pattern as well as the first level
+        !       nodal halo communication pattern is determined.
+        !       A recursive algorithm is used. First the face halo's are
+        !       determined and from those the indirect halo's can be obtained
+        !       by looping over the level of indirectness.
+        !       This routine controls the creation of the communication
+        !       pattern and basically contains the function calls to the
+        !       subtasks.
+        !
+        use block
+        use communication
+        use haloList
+        use periodicInfo
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer(kind=intType) :: i
+
+        ! Determine the number of periodic faces.
+
+        call determinePeriodicFaces
+
+        ! Determine the amount of 1st and 2nd level cell halo's and 1st
+        ! level node halo's.
+
+        call determineNumberOfHalos(level)
+
+        ! Allocate the memory for the help variables needed to
+        ! determine the communication pattern.
+
+        call allocMemHaloList(level)
+
+        ! Determine the face halo's for both the 1st level cell and the
+        ! 1st level node halo's.
+
+        call determineFaceHalos(level)
+
+        ! Determine the indirect 1st level node halo's.
+
+        call determineIndirectHalos(nNodeHalo1st, iinode1st, &
+                                    nodeHalo1st, transformNode, &
+                                    nodeIndex, 1_intType, &
+                                    1_intType, 0_intType, level)
+
+        ! Release the memory of transformNode and nodeIndex.
+
+        do i = 1, nDom
+            deallocate (nodeIndex(i)%entryList, stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("determineCommPattern", &
+                 "Deallocation error for &
+                 &nodeIndex(i)%entryList")
+        end do
+
+        deallocate (nodeIndex, transformNode, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("determineCommPattern", &
+             "Deallocation error for nodeIndex &
+             &and transformNode")
+
+        ! Determine the indirect 1st level cell halo's.
+
+        call determineIndirectHalos(nCellHalo1st, iicell1st, &
+                                    cellHalo1st, transformCell, &
+                                    cellIndex, 2_intType, &
+                                    1_intType, 0_intType, level)
+
+        ! Initialize the 2nd level cell halo list. Basically the 1st
+        ! level cell halo list is copied.
+
+        call init2ndLevelCellHalos
+
+        ! Determine the indirect 2nd level cell halo's. As the indirect
+        ! 1st level halo's are already treated an offset of 1 is passed.
+
+        call determineIndirectHalos(nCellHalo2nd, iicell2nd, &
+                                    cellHalo2nd, transformCell, &
+                                    cellIndex, 2_intType, &
+                                    2_intType, 1_intType, level)
+
+        ! Release the memory of transformCell and cellIndex.
+
+        do i = 1, nDom
+            deallocate (cellIndex(i)%entryList, stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("determineCommPattern", &
+                 "Deallocation error for &
+                 &cellIndex(i)%entryList")
+        end do
+
+        deallocate (cellIndex, transformCell, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("determineCommPattern", &
+             "Deallocation error for cellIndex &
+             &and transformCell")
+
+        ! Sort the three lists in increasing order.
+
+        call qsortHaloListType(nodeHalo1st, nNodeHalo1st)
+        call qsortHaloListType(cellHalo1st, nCellHalo1st)
+        call qsortHaloListType(cellHalo2nd, nCellHalo2nd)
+
+        ! Determine the final communication data structures to store the
+        ! halo info.
+
+        call finalCommStructures(nodeHalo1st, nNodeHalo1st, &
+                                 commPatternNode_1st(level), &
+                                 internalNode_1st(level), 0_intType)
+
+        call finalCommStructures(cellHalo1st, nCellHalo1st, &
+                                 commPatternCell_1st(level), &
+                                 internalCell_1st(level), 0_intType)
 
-    deallocate(nodeIndex, transformNode, stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("determineCommPattern", &
-         "Deallocation error for nodeIndex &
-         &and transformNode")
+        call finalCommStructures(cellHalo2nd, nCellHalo2nd, &
+                                 commPatternCell_2nd(level), &
+                                 internalCell_2nd(level), 0_intType)
 
-    ! Determine the indirect 1st level cell halo's.
+        ! Determine the transformation for periodic halo's.
 
-    call determineIndirectHalos(nCellHalo1st, iicell1st,    &
-         cellHalo1st, transformCell, &
-         cellIndex,   2_intType,     &
-         1_intType,    0_intType, level)
+        call determinePeriodicData(nodeHalo1st, nNodeHalo1st, &
+                                   commPatternNode_1st(level), &
+                                   internalNode_1st(level))
 
-    ! Initialize the 2nd level cell halo list. Basically the 1st
-    ! level cell halo list is copied.
+        call determinePeriodicData(cellHalo1st, nCellHalo1st, &
+                                   commPatternCell_1st(level), &
+                                   internalCell_1st(level))
 
-    call init2ndLevelCellHalos
+        call determinePeriodicData(cellHalo2nd, nCellHalo2nd, &
+                                   commPatternCell_2nd(level), &
+                                   internalCell_2nd(level))
 
-    ! Determine the indirect 2nd level cell halo's. As the indirect
-    ! 1st level halo's are already treated an offset of 1 is passed.
+        ! Deallocate the memory for the 3 halo lists.
 
-    call determineIndirectHalos(nCellHalo2nd, iicell2nd,    &
-         cellHalo2nd, transformCell, &
-         cellIndex,   2_intType,     &
-         2_intType,   1_intType, level)
+        call deallocatePointersHaloList(nodeHalo1st, nNodeHalo1st)
+        call deallocatePointersHaloList(cellHalo1st, nCellHalo1st)
+        call deallocatePointersHaloList(cellHalo2nd, nCellHalo2nd)
 
-    ! Release the memory of transformCell and cellIndex.
+        deallocate (nodeHalo1st, cellHalo1st, cellHalo2nd, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("determineCommPattern", &
+             "Deallocation error for nodeHalo1st, &
+             &cellHalo1st and cellHalo2nd")
 
-    do i=1,nDom
-       deallocate(cellIndex(i)%entryList, stat=ierr)
-       if(ierr /= 0)                             &
+        ! Deallocate the memory of periodicGlobal.
+
+        deallocate (periodicGlobal, stat=ierr)
+        if (ierr /= 0) &
             call terminate("determineCommPattern", &
-            "Deallocation error for &
-            &cellIndex(i)%entryList")
-    enddo
-
-    deallocate(cellIndex, transformCell, stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("determineCommPattern", &
-         "Deallocation error for cellIndex &
-         &and transformCell")
-
-    ! Sort the three lists in increasing order.
-
-    call qsortHaloListType(nodeHalo1st, nNodeHalo1st)
-    call qsortHaloListType(cellHalo1st, nCellHalo1st)
-    call qsortHaloListType(cellHalo2nd, nCellHalo2nd)
-
-    ! Determine the final communication data structures to store the
-    ! halo info.
-
-    call finalCommStructures(nodeHalo1st, nNodeHalo1st,  &
-         commPatternNode_1st(level), &
-         internalNode_1st(level), 0_intType)
-
-    call finalCommStructures(cellHalo1st, nCellHalo1st,  &
-         commPatternCell_1st(level), &
-         internalCell_1st(level), 0_intType)
-
-    call finalCommStructures(cellHalo2nd, nCellHalo2nd,  &
-         commPatternCell_2nd(level), &
-         internalCell_2nd(level), 0_intType)
-
-    ! Determine the transformation for periodic halo's.
-
-    call determinePeriodicData(nodeHalo1st, nNodeHalo1st,  &
-         commPatternNode_1st(level), &
-         internalNode_1st(level))
-
-    call determinePeriodicData(cellHalo1st, nCellHalo1st,  &
-         commPatternCell_1st(level), &
-         internalCell_1st(level))
-
-    call determinePeriodicData(cellHalo2nd, nCellHalo2nd,  &
-         commPatternCell_2nd(level), &
-         internalCell_2nd(level))
-
-    ! Deallocate the memory for the 3 halo lists.
-
-    call deallocatePointersHaloList(nodeHalo1st, nNodeHalo1st)
-    call deallocatePointersHaloList(cellHalo1st, nCellHalo1st)
-    call deallocatePointersHaloList(cellHalo2nd, nCellHalo2nd)
-
-    deallocate(nodeHalo1st, cellHalo1st, cellHalo2nd, stat=ierr)
-    if(ierr /= 0)                            &
-         call terminate("determineCommPattern", &
-         "Deallocation error for nodeHalo1st, &
-         &cellHalo1st and cellHalo2nd")
-
-    ! Deallocate the memory of periodicGlobal.
-
-    deallocate(periodicGlobal, stat=ierr)
-    if(ierr /= 0)                            &
-         call terminate("determineCommPattern", &
-         "Deallocation error for periodicGlobal")
-
-  end subroutine determineCommPattern
-
-  !-----------------------------------------------------------------
-
-  subroutine deallocatePointersHaloList(entityHalo, nHalo)
-    !
-    !       deallocatePointersHaloList deallocates the memory of the
-    !       pointer variables of entityHalo.
-    !
-    use haloList
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)               :: nHalo
-    type(haloListType), dimension(*), intent(inout) :: entityHalo
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i
-
-    ! Loop over the number of halo's and deallocate the memory
-    ! of the pointer variables, if allocated.
-
-    do i=1,nHalo
-       if( associated(entityHalo(i)%interp) ) then
-          deallocate(entityHalo(i)%interp, stat=ierr)
-          if(ierr /= 0)                                  &
-               call terminate("deallocatePointersHaloList", &
-               "Deallocation failure for interp")
-       endif
-
-       if( associated(entityHalo(i)%periodicSubfaces) ) then
-          deallocate(entityHalo(i)%periodicSubfaces, stat=ierr)
-          if(ierr /= 0)                                  &
-               call terminate("deallocatePointersHaloList", &
-               "Deallocation failure for periodicSubfaces")
-       endif
-    enddo
-
-  end subroutine deallocatePointersHaloList
-
-  subroutine determineIndirectHalos(nHalo, iihalo, entityHalo,  &
-       transform, entityIndex,     &
-       start, nLevel, offset,      &
-       gridLevel)
-    !
-    !       determineIndirectHalos determines the indirect halo's via a
-    !       recursive algorithm.
-    !       Step 1.
-    !       =======
-    !       Determine for every indirect halo the closest face halo.
-    !       If several options exist choose the one that does not
-    !       correspond to a boundary halo, if possible. If several
-    !       non-boundary halo's exist, just pick one. If all the closest
-    !       face halo's are boundary halo's then there is no corresponding
-    !       halo and the state is determined by the boundary conditions
-    !       and/or extrapolation.
-    !       Store the direction from the face halo to the indirect halo.
-    !       Step 2.
-    !       =======
-    !       Determine the level of indirectness of every indirect halo.
-    !       This is the sum of the absolute values of the elements of the
-    !       direction vector. For 1st level halo's the maximum level of
-    !       of indirectness is 2; for 2nd level halo's it is 5. These
-    !       numbers are for 3 space dimensions.
-    !       Step 3.
-    !       =======
-    !       Loop over the number of indirect levels.
-    !         For every halo of the current level of indirectness do:
-    !           - apply the transformation matrix of its corresponding
-    !             face halo to the direction vector.
-    !           - start in the donor cell of the face halo and travel in
-    !             the direction of the transformed direction vector.
-    !           - you either end up in an internal cell/node or in a halo.
-    !             Case internal: you're done. Internal cell/node is the
-    !                            donor.
-    !             Case halo: this is guarenteed to be a halo of at least
-    !                        one level of indirectness less than the
-    !                        current level. Thus the donor is known and
-    !                        you're done too. It is possible that it is a
-    !                        boundary halo, but this is allowed.
-    !       End loop over the number of indirect levels.
-    !
-    use haloList
-    use indirectHalo
-    use communication
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nHalo, start, nLevel, offset
-    integer(kind=intType), intent(in) :: gridLevel
-    integer(kind=intType), intent(inout) :: iihalo
-
-    integer(kind=intType), dimension(:,:), intent(in) :: transform
-
-    type(haloListType), dimension(:), intent(inout) :: entityHalo
-    type(indexListType), dimension(:), intent(inout) :: entityIndex
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i
-
-    ! Determine for every indirect halo the closest direct halo.
-
-    call closestDirectHalos(entityHalo, entityIndex, start, &
-         nLevel, offset, gridLevel)
-
-    ! Sort the indirect halo's.
-
-    call qsortIndHaloType(indHalo, nIndHalo)
-
-    ! Abbreviate the number of indirect levels a bit easier, allocate
-    ! the memory for nHaloPerLev and nHaloPerProc, and determine
-    ! the values of nHaloPerLev.
-
-    nLevOfInd = indHalo(nIndHalo)%levOfInd
-    allocate(nHaloPerLev(0:nlevOfInd), nHaloPerProc(0:nProc), &
-         stat=ierr)
-    if(ierr /= 0)                                          &
-         call terminate("determineIndirectHalos",             &
-         "Allocation error for nHaloPerLev and &
-         &nHaloPerProc")
-
-    nHaloPerLev = 0
-    do i=1,nIndHalo
-       nHaloPerLev(indHalo(i)%levOfInd) = &
-            nHaloPerLev(indHalo(i)%levOfInd) + 1
-    enddo
-
-    ! Put nHaloPerLev in cumulative storage format.
-
-    do i=1,nlevOfInd
-       nHaloPerLev(i) = nHaloPerLev(i) + nHaloPerLev(i-1)
-    enddo
-
-    ! Loop over the number of levels of indirectness.
-
-    do i=1,nLevOfInd
-
-       ! Determine the halo's for this level of indirectness.
-
-       call indirectHalosPerLevel(i, iihalo, entityHalo, transform, &
-            entityIndex)
-
-       ! Synchronize the processors to avoid possible problems.
-
-       call mpi_barrier(ADflow_comm_world, ierr)
-    enddo
-
-    ! Release the memory of the module indirectHalo.
-
-    deallocate(indHalo, nHaloPerLev, nHaloPerProc, stat=ierr)
-    if(ierr /= 0) &
-         call terminate("determineIndirectHalos",        &
-         "Deallocation error for indHalo, &
-         &nHaloPerLev and nHaloPerProc")
-
-    ! Check in debug mode if iihalo equals nHalo, as it should be.
-
-    if( debug ) then
-       if(iihalo /= nHalo)                        &
-            call terminate("determineIndirectHalos", &
-            "iihalo differs from nHalo")
-    endif
-
-  end subroutine determineIndirectHalos
-
-
-  subroutine determinePeriodicData(entityHalo,   nHalo, &
-       externalComm, internalComm)
-    !
-    !       determinePeriodicData determines the periodic transformation
-    !       for both the external and the internal communication patterns.
-    !
-    use constants
-    use communication
-    use haloList
-    use periodicInfo
-    use utils, only : terminate
-    use sorting, only :qsortIntegers
-
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)            :: nHalo
-    type(haloListType), dimension(:), intent(in) :: entityHalo
-
-    type(commType),         intent(inout) :: externalComm
-    type(internalCommType), intent(inout) :: internalComm
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i, nn
-    integer(kind=intType) :: nPerHalos, nInternal, nExternal
-
-    integer(kind=intType), dimension(5) :: tmp
+                           "Deallocation error for periodicGlobal")
+
+    end subroutine determineCommPattern
+
+    !-----------------------------------------------------------------
+
+    subroutine deallocatePointersHaloList(entityHalo, nHalo)
+        !
+        !       deallocatePointersHaloList deallocates the memory of the
+        !       pointer variables of entityHalo.
+        !
+        use haloList
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nHalo
+        type(haloListType), dimension(*), intent(inout) :: entityHalo
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i
+
+        ! Loop over the number of halo's and deallocate the memory
+        ! of the pointer variables, if allocated.
+
+        do i = 1, nHalo
+            if (associated(entityHalo(i)%interp)) then
+                deallocate (entityHalo(i)%interp, stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("deallocatePointersHaloList", &
+                                   "Deallocation failure for interp")
+            end if
+
+            if (associated(entityHalo(i)%periodicSubfaces)) then
+                deallocate (entityHalo(i)%periodicSubfaces, stat=ierr)
+                if (ierr /= 0) &
+                    call terminate("deallocatePointersHaloList", &
+                                   "Deallocation failure for periodicSubfaces")
+            end if
+        end do
+
+    end subroutine deallocatePointersHaloList
+
+    subroutine determineIndirectHalos(nHalo, iihalo, entityHalo, &
+                                      transform, entityIndex, &
+                                      start, nLevel, offset, &
+                                      gridLevel)
+        !
+        !       determineIndirectHalos determines the indirect halo's via a
+        !       recursive algorithm.
+        !       Step 1.
+        !       =======
+        !       Determine for every indirect halo the closest face halo.
+        !       If several options exist choose the one that does not
+        !       correspond to a boundary halo, if possible. If several
+        !       non-boundary halo's exist, just pick one. If all the closest
+        !       face halo's are boundary halo's then there is no corresponding
+        !       halo and the state is determined by the boundary conditions
+        !       and/or extrapolation.
+        !       Store the direction from the face halo to the indirect halo.
+        !       Step 2.
+        !       =======
+        !       Determine the level of indirectness of every indirect halo.
+        !       This is the sum of the absolute values of the elements of the
+        !       direction vector. For 1st level halo's the maximum level of
+        !       of indirectness is 2; for 2nd level halo's it is 5. These
+        !       numbers are for 3 space dimensions.
+        !       Step 3.
+        !       =======
+        !       Loop over the number of indirect levels.
+        !         For every halo of the current level of indirectness do:
+        !           - apply the transformation matrix of its corresponding
+        !             face halo to the direction vector.
+        !           - start in the donor cell of the face halo and travel in
+        !             the direction of the transformed direction vector.
+        !           - you either end up in an internal cell/node or in a halo.
+        !             Case internal: you're done. Internal cell/node is the
+        !                            donor.
+        !             Case halo: this is guarenteed to be a halo of at least
+        !                        one level of indirectness less than the
+        !                        current level. Thus the donor is known and
+        !                        you're done too. It is possible that it is a
+        !                        boundary halo, but this is allowed.
+        !       End loop over the number of indirect levels.
+        !
+        use haloList
+        use indirectHalo
+        use communication
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nHalo, start, nLevel, offset
+        integer(kind=intType), intent(in) :: gridLevel
+        integer(kind=intType), intent(inout) :: iihalo
+
+        integer(kind=intType), dimension(:, :), intent(in) :: transform
+
+        type(haloListType), dimension(:), intent(inout) :: entityHalo
+        type(indexListType), dimension(:), intent(inout) :: entityIndex
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i
+
+        ! Determine for every indirect halo the closest direct halo.
+
+        call closestDirectHalos(entityHalo, entityIndex, start, &
+                                nLevel, offset, gridLevel)
+
+        ! Sort the indirect halo's.
+
+        call qsortIndHaloType(indHalo, nIndHalo)
+
+        ! Abbreviate the number of indirect levels a bit easier, allocate
+        ! the memory for nHaloPerLev and nHaloPerProc, and determine
+        ! the values of nHaloPerLev.
+
+        nLevOfInd = indHalo(nIndHalo)%levOfInd
+        allocate (nHaloPerLev(0:nlevOfInd), nHaloPerProc(0:nProc), &
+                  stat=ierr)
+        if (ierr /= 0) &
+             call terminate("determineIndirectHalos", &
+             "Allocation error for nHaloPerLev and &
+             &nHaloPerProc")
+
+        nHaloPerLev = 0
+        do i = 1, nIndHalo
+            nHaloPerLev(indHalo(i)%levOfInd) = &
+                nHaloPerLev(indHalo(i)%levOfInd) + 1
+        end do
+
+        ! Put nHaloPerLev in cumulative storage format.
+
+        do i = 1, nlevOfInd
+            nHaloPerLev(i) = nHaloPerLev(i) + nHaloPerLev(i - 1)
+        end do
+
+        ! Loop over the number of levels of indirectness.
+
+        do i = 1, nLevOfInd
+
+            ! Determine the halo's for this level of indirectness.
+
+            call indirectHalosPerLevel(i, iihalo, entityHalo, transform, &
+                                       entityIndex)
+
+            ! Synchronize the processors to avoid possible problems.
+
+            call mpi_barrier(ADflow_comm_world, ierr)
+        end do
+
+        ! Release the memory of the module indirectHalo.
+
+        deallocate (indHalo, nHaloPerLev, nHaloPerProc, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("determineIndirectHalos", &
+             "Deallocation error for indHalo, &
+             &nHaloPerLev and nHaloPerProc")
+
+        ! Check in debug mode if iihalo equals nHalo, as it should be.
+
+        if (debug) then
+            if (iihalo /= nHalo) &
+                call terminate("determineIndirectHalos", &
+                               "iihalo differs from nHalo")
+        end if
+
+    end subroutine determineIndirectHalos
+
+    subroutine determinePeriodicData(entityHalo, nHalo, &
+                                     externalComm, internalComm)
+        !
+        !       determinePeriodicData determines the periodic transformation
+        !       for both the external and the internal communication patterns.
+        !
+        use constants
+        use communication
+        use haloList
+        use periodicInfo
+        use utils, only: terminate
+        use sorting, only: qsortIntegers
+
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nHalo
+        type(haloListType), dimension(:), intent(in) :: entityHalo
+
+        type(commType), intent(inout) :: externalComm
+        type(internalCommType), intent(inout) :: internalComm
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, nn
+        integer(kind=intType) :: nPerHalos, nInternal, nExternal
 
-    type(periodicSubfacesHaloType), dimension(:), allocatable :: &
-         periodic
-
-    ! Loop over the halo's and determine the number of halo's
-    ! for which periodic transformations are present.
-
-    nn = 0
-    do i=1,nHalo
-       if(entityHalo(i)%nPeriodicSubfaces > 0) nn = nn + 1
-    enddo
-
-    ! Allocate the memory for periodic and copy the relevant data
-    ! from entityHalo. Note that a shallow copy is made, i.e. the
-    ! array for periodicSubfaces is not allocated. It just points
-    ! to the entry in entityHalo.
-
-    nPerHalos = nn
-    allocate(periodic(nn), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("determinePeriodicData", &
-         "Memory allocation failure for periodic")
-
-    nn = 0
-    do i=1,nHalo
-       if(entityHalo(i)%nPeriodicSubfaces > 0) then
-          nn = nn + 1
-          periodic(nn)%indexInHaloList   = i
-          periodic(nn)%nPeriodicSubfaces = entityHalo(i)%nPeriodicSubfaces
-          periodic(nn)%periodicSubfaces => entityHalo(i)%periodicSubfaces
-
-          if(entityHalo(i)%donorProc == myID) then
-             periodic(nn)%internalHalo = .true.
-          else
-             periodic(nn)%internalHalo = .false.
-          endif
-       endif
-    enddo
-
-    ! Make sure that the numbers of the periodic subfaces are
-    ! sorted in increasing order. This is important for the sorting
-    ! of the derived datatype periodicSubfacesHaloType.
-
-    do i=1,nPerHalos
-       do nn=1,periodic(i)%nPeriodicSubfaces
-          tmp(nn) = periodic(i)%periodicSubfaces(nn)
-       enddo
-
-       call qsortIntegers(tmp, periodic(i)%nPeriodicSubfaces)
-
-       do nn=1,periodic(i)%nPeriodicSubfaces
-          periodic(i)%periodicSubfaces(nn) = tmp(nn)
-       enddo
-    enddo
-
-    ! Sort periodic in increasing order, such that the number
-    ! of different transformations can be determined.
-
-    call qsortPeriodicSubfacesHaloType(periodic, nPerHalos)
-
-    ! Determine the number of periodic halo's, which are also
-    ! internal. These are now numbered first in periodic.
-
-    nInternal = 0
-    do i=1,nPerHalos
-       if( periodic(i)%internalHalo ) nInternal = nInternal + 1
-    enddo
-
-    nExternal = nPerHalos - nInternal
-
-    ! Call the internal subroutine to do the work for the
-    ! internal and external communication pattern.
-
-    nullify(internalComm%periodicData)
-    if(nInternal == 0) then
-       internalComm%nPeriodic = 0
-    else
-       call setPeriodicData(periodic, nInternal,    &
-            internalComm%nPeriodic, &
-            internalComm%periodicData)
-    endif
-
-    nullify(externalComm%periodicData)
-    if(nExternal == 0) then
-       externalComm%nPeriodic = 0
-    else
-       call setPeriodicData(periodic(nInternal+1:), nExternal, &
-            externalComm%nPeriodic,            &
-            externalComm%periodicData)
-    endif
-
-    ! Release the memory of period again. As the pointer was not
-    ! allocated there is no need to release it either.
-
-    deallocate(periodic, stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("determinePeriodicData", &
-         "Deallocation failure for periodic")
+        integer(kind=intType), dimension(5) :: tmp
 
-    !=================================================================
+        type(periodicSubfacesHaloType), dimension(:), allocatable :: &
+            periodic
 
-  contains
-
-    !===============================================================
-
-    subroutine setPeriodicData(perHalo, nPerHalo, nPeriodic, &
-         periodicData)
-      !
-      !         setPeriodicData stores the periodic transformations and the
-      !         corresponding halo's to which it must be applied in
-      !         nPeriodic and periodicData. These variables are part of
-      !         either the internal or external communication pattern.
-      !
-      use cgnsGrid
-      implicit none
-      !
-      !        Subroutine arguments.
-      !
-      integer(kind=intType), intent(in) :: nPerHalo
-      type(periodicSubfacesHaloType), dimension(:), intent(in) :: &
-           perHalo
-
-      integer(kind=intType), intent(out) :: nPeriodic
-      type(periodicDataType), dimension(:), pointer :: periodicData
-      !
-      !        Local variables.
-      !
-      integer(kind=intType) :: nn, mm, ll, kk
-      integer(kind=intType) :: cgnsBlock, cgnsSub
-
-      integer(kind=intType), dimension(0:nPerHalo) :: nHaloPerTransform
-
-      integer(kind=intType), pointer, dimension(:)   :: blockID
-      integer(kind=intType), pointer, dimension(:,:) :: indices
-      real(kind=realType) :: theta, phi, psi
-      real(kind=realType) :: cosTheta, cosPhi, cosPsi
-      real(kind=realType) :: sinTheta, sinPhi, sinPsi
-
-      real(kind=realType), dimension(3,3) :: rotMat, rotMatrix, tmpMat
-      real(kind=realType), dimension(3)   :: trans, translation
-      real(kind=realType), dimension(3)   :: rotCenter
-
-      ! Determine the number of different periodic transformations
-      ! as well as the number of halo's per transformation.
-      ! Note that the operator == of periodicSubfacesHaloType only
-      ! compares the periodic subface information of the halos.
-
-      nHaloPerTransform(0) = 0
-
-      nn = min(1_intType, nPerHalo)
-      nHaloPerTransform(nn) = nn
-
-      do i=2,nPerHalo
-         if(perHalo(i) == perHalo(i-1)) then
-            nHaloPerTransform(nn) = nHaloPerTransform(nn) + 1
-         else
-            nn = nn + 1
-            nHaloPerTransform(nn) = nHaloPerTransform(nn-1) + 1
-         endif
-      enddo
+        ! Loop over the halo's and determine the number of halo's
+        ! for which periodic transformations are present.
+
+        nn = 0
+        do i = 1, nHalo
+            if (entityHalo(i)%nPeriodicSubfaces > 0) nn = nn + 1
+        end do
+
+        ! Allocate the memory for periodic and copy the relevant data
+        ! from entityHalo. Note that a shallow copy is made, i.e. the
+        ! array for periodicSubfaces is not allocated. It just points
+        ! to the entry in entityHalo.
+
+        nPerHalos = nn
+        allocate (periodic(nn), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("determinePeriodicData", &
+                           "Memory allocation failure for periodic")
+
+        nn = 0
+        do i = 1, nHalo
+            if (entityHalo(i)%nPeriodicSubfaces > 0) then
+                nn = nn + 1
+                periodic(nn)%indexInHaloList = i
+                periodic(nn)%nPeriodicSubfaces = entityHalo(i)%nPeriodicSubfaces
+                periodic(nn)%periodicSubfaces => entityHalo(i)%periodicSubfaces
+
+                if (entityHalo(i)%donorProc == myID) then
+                    periodic(nn)%internalHalo = .true.
+                else
+                    periodic(nn)%internalHalo = .false.
+                end if
+            end if
+        end do
+
+        ! Make sure that the numbers of the periodic subfaces are
+        ! sorted in increasing order. This is important for the sorting
+        ! of the derived datatype periodicSubfacesHaloType.
+
+        do i = 1, nPerHalos
+            do nn = 1, periodic(i)%nPeriodicSubfaces
+                tmp(nn) = periodic(i)%periodicSubfaces(nn)
+            end do
+
+            call qsortIntegers(tmp, periodic(i)%nPeriodicSubfaces)
+
+            do nn = 1, periodic(i)%nPeriodicSubfaces
+                periodic(i)%periodicSubfaces(nn) = tmp(nn)
+            end do
+        end do
+
+        ! Sort periodic in increasing order, such that the number
+        ! of different transformations can be determined.
+
+        call qsortPeriodicSubfacesHaloType(periodic, nPerHalos)
+
+        ! Determine the number of periodic halo's, which are also
+        ! internal. These are now numbered first in periodic.
+
+        nInternal = 0
+        do i = 1, nPerHalos
+            if (periodic(i)%internalHalo) nInternal = nInternal + 1
+        end do
+
+        nExternal = nPerHalos - nInternal
+
+        ! Call the internal subroutine to do the work for the
+        ! internal and external communication pattern.
+
+        nullify (internalComm%periodicData)
+        if (nInternal == 0) then
+            internalComm%nPeriodic = 0
+        else
+            call setPeriodicData(periodic, nInternal, &
+                                 internalComm%nPeriodic, &
+                                 internalComm%periodicData)
+        end if
+
+        nullify (externalComm%periodicData)
+        if (nExternal == 0) then
+            externalComm%nPeriodic = 0
+        else
+            call setPeriodicData(periodic(nInternal + 1:), nExternal, &
+                                 externalComm%nPeriodic, &
+                                 externalComm%periodicData)
+        end if
+
+        ! Release the memory of period again. As the pointer was not
+        ! allocated there is no need to release it either.
+
+        deallocate (periodic, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("determinePeriodicData", &
+                           "Deallocation failure for periodic")
+
+        !=================================================================
+
+    contains
+
+        !===============================================================
+
+        subroutine setPeriodicData(perHalo, nPerHalo, nPeriodic, &
+                                   periodicData)
+            !
+            !         setPeriodicData stores the periodic transformations and the
+            !         corresponding halo's to which it must be applied in
+            !         nPeriodic and periodicData. These variables are part of
+            !         either the internal or external communication pattern.
+            !
+            use cgnsGrid
+            implicit none
+            !
+            !        Subroutine arguments.
+            !
+            integer(kind=intType), intent(in) :: nPerHalo
+            type(periodicSubfacesHaloType), dimension(:), intent(in) :: &
+                perHalo
+
+            integer(kind=intType), intent(out) :: nPeriodic
+            type(periodicDataType), dimension(:), pointer :: periodicData
+            !
+            !        Local variables.
+            !
+            integer(kind=intType) :: nn, mm, ll, kk
+            integer(kind=intType) :: cgnsBlock, cgnsSub
+
+            integer(kind=intType), dimension(0:nPerHalo) :: nHaloPerTransform
+
+            integer(kind=intType), pointer, dimension(:) :: blockID
+            integer(kind=intType), pointer, dimension(:, :) :: indices
+            real(kind=realType) :: theta, phi, psi
+            real(kind=realType) :: cosTheta, cosPhi, cosPsi
+            real(kind=realType) :: sinTheta, sinPhi, sinPsi
+
+            real(kind=realType), dimension(3, 3) :: rotMat, rotMatrix, tmpMat
+            real(kind=realType), dimension(3) :: trans, translation
+            real(kind=realType), dimension(3) :: rotCenter
+
+            ! Determine the number of different periodic transformations
+            ! as well as the number of halo's per transformation.
+            ! Note that the operator == of periodicSubfacesHaloType only
+            ! compares the periodic subface information of the halos.
+
+            nHaloPerTransform(0) = 0
+
+            nn = min(1_intType, nPerHalo)
+            nHaloPerTransform(nn) = nn
+
+            do i = 2, nPerHalo
+                if (perHalo(i) == perHalo(i - 1)) then
+                    nHaloPerTransform(nn) = nHaloPerTransform(nn) + 1
+                else
+                    nn = nn + 1
+                    nHaloPerTransform(nn) = nHaloPerTransform(nn - 1) + 1
+                end if
+            end do
+
+            ! Set nPeriodic and allocate the memory for periodicData.
+            ! If there are no periodic transformations return after
+            ! nPeriodic is set.
+
+            nPeriodic = nn
+            if (nPeriodic == 0) return
+
+            allocate (periodicData(nn), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("setPeriodicData", &
+                               "Memory allocation failure for periodicData")
+
+            ! Loop over the number of periodic transformations.
+
+            periodicLoop: do nn = 1, nPeriodic
+
+                ! Determine the number of halo's for this transformation
+                ! and allocate the memory for the block ID and indices.
+                ! Set pointers for readability.
+
+                mm = nHaloPerTransform(nn) - nHaloPerTransform(nn - 1)
+
+                periodicData(nn)%nHalos = mm
+                allocate (periodicData(nn)%block(mm), &
+                          periodicData(nn)%indices(mm, 3), stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("setPeriodicData", &
+                     "Memory allocation failure for block &
+                     &and indices.")
+
+                blockID => periodicData(nn)%block
+                indices => periodicData(nn)%indices
+
+                ! Loop over the number of periodic halo's and set the
+                ! blockID and indices.
+
+                kk = 0
+                do mm = (nHaloPerTransform(nn - 1) + 1), nHaloPerTransform(nn)
+                    kk = kk + 1
+                    ll = perHalo(mm)%indexInHaloList
+
+                    blockID(kk) = entityHalo(ll)%myBlock
+                    indices(kk, 1) = entityHalo(ll)%myI
+                    indices(kk, 2) = entityHalo(ll)%myJ
+                    indices(kk, 3) = entityHalo(ll)%myK
+                end do
 
-      ! Set nPeriodic and allocate the memory for periodicData.
-      ! If there are no periodic transformations return after
-      ! nPeriodic is set.
+                ! Determine the rotation matrix, rotation center and
+                ! translation vector of this periodic transformation.
+                ! The sorting has been such that all the halo's set above
+                ! have the same transformation, so it is enough to consider
+                ! the first element.
 
-      nPeriodic = nn
-      if(nPeriodic == 0) return
+                kk = nHaloPerTransform(nn - 1) + 1
 
-      allocate(periodicData(nn), stat=ierr)
-      if(ierr /= 0)                       &
-           call terminate("setPeriodicData", &
-           "Memory allocation failure for periodicData")
+                ! Set the rotation center to the rotation center of the
+                ! first subface. This should be the same for all of them.
+                ! Initialize the rotation matrix to the identity and the
+                ! translation vector to zero.
 
-      ! Loop over the number of periodic transformations.
+                ll = perHalo(kk)%periodicSubfaces(1)
+                cgnsBlock = periodicGlobal(ll)%cgnsBlock
+                cgnsSub = periodicGlobal(ll)%cgnsSubface
 
-      periodicLoop: do nn=1,nPeriodic
+                rotCenter = cgnsDoms(cgnsBlock)%conn1to1(cgnsSub)%rotationCenter
 
-         ! Determine the number of halo's for this transformation
-         ! and allocate the memory for the block ID and indices.
-         ! Set pointers for readability.
+                rotMat(1, 1) = one; rotMat(1, 2) = zero; rotMat(1, 3) = zero
+                rotMat(2, 1) = zero; rotMat(2, 2) = one; rotMat(2, 3) = zero
+                rotMat(3, 1) = zero; rotMat(3, 2) = zero; rotMat(3, 3) = one
+
+                trans = zero
+
+                ! Loop over the number of periodic subface for the total
+                ! periodic transformation.
+
+                subfaceLoop: do mm = 1, perHalo(kk)%nPeriodicSubfaces
 
-         mm = nHaloPerTransform(nn) - nHaloPerTransform(nn-1)
+                    ll = perHalo(kk)%periodicSubfaces(1)
+                    cgnsBlock = periodicGlobal(ll)%cgnsBlock
+                    cgnsSub = periodicGlobal(ll)%cgnsSubface
+
+                    ! Store the data for the transformation of this subface
+                    ! a bit easier.
+
+                    translation = cgnsDoms(cgnsBlock)%conn1to1(cgnsSub)%translation
+
+                    theta = cgnsDoms(cgnsBlock)%conn1to1(cgnsSub)%rotationAngles(1)
+                    phi = cgnsDoms(cgnsBlock)%conn1to1(cgnsSub)%rotationAngles(2)
+                    psi = cgnsDoms(cgnsBlock)%conn1to1(cgnsSub)%rotationAngles(3)
+
+                    ! Construct the rotation matrix for this subface. Actually
+                    ! the inverse (== transpose) is constructed, because the
+                    ! cgns data is for the transformation from the current face
+                    ! to the donor and here the inverse is needed. Note
+                    ! furthermore that the sequence of rotation is first
+                    ! rotation around the x-axis, followed by rotation around
+                    ! the y-axis and finally rotation around the z-axis.
+
+                    cosTheta = cos(theta); cosPhi = cos(phi); cosPsi = cos(psi)
+                    sinTheta = sin(theta); sinPhi = sin(phi); sinPsi = sin(psi)
+
+                    rotMatrix(1, 1) = cosPhi * cosPsi
+                    rotMatrix(1, 2) = cosPhi * sinPsi
+                    rotMatrix(1, 3) = -sinPhi
+
+                    rotMatrix(2, 1) = sinTheta * sinPhi * cosPsi - cosTheta * sinPsi
+                    rotMatrix(2, 2) = sinTheta * sinPhi * sinPsi + cosTheta * cosPsi
+                    rotMatrix(2, 3) = sinTheta * cosPhi
+
+                    rotMatrix(3, 1) = cosTheta * sinPhi * cosPsi + sinTheta * sinPsi
+                    rotMatrix(3, 2) = cosTheta * sinPhi * sinPsi - sinTheta * cosPsi
+                    rotMatrix(3, 3) = cosTheta * cosPhi
+
+                    ! Create the product of rotMat and rotMatrix. Copy the
+                    ! result back into rotMat.
+
+                    tmpMat(:, 1) = rotMat(:, 1) * rotMatrix(1, 1) &
+                                   + rotMat(:, 2) * rotMatrix(2, 1) &
+                                   + rotMat(:, 3) * rotMatrix(3, 1)
+                    tmpMat(:, 2) = rotMat(:, 1) * rotMatrix(1, 2) &
+                                   + rotMat(:, 2) * rotMatrix(2, 2) &
+                                   + rotMat(:, 3) * rotMatrix(3, 2)
+                    tmpMat(:, 3) = rotMat(:, 1) * rotMatrix(1, 3) &
+                                   + rotMat(:, 2) * rotMatrix(2, 3) &
+                                   + rotMat(:, 3) * rotMatrix(3, 3)
+
+                    rotMat = tmpMat
+
+                    ! Update the translation vector. As we need the inverse
+                    ! of the transformation translation must be premultiplied
+                    ! by the rotation matrix and negated.
+
+                    trans = trans - rotMatrix(:, 1) * translation(1) &
+                            - rotMatrix(:, 2) * translation(2) &
+                            - rotMatrix(:, 3) * translation(3)
+
+                end do subfaceLoop
+
+                ! Store the results in periodicData(nn).
+
+                periodicData(nn)%rotMatrix = rotMat
+                periodicData(nn)%rotCenter = rotCenter
+                periodicData(nn)%translation = trans
+
+            end do periodicLoop
+
+        end subroutine setPeriodicData
+
+    end subroutine determinePeriodicData
+
+    subroutine allocMemHaloList(level)
+        !
+        !       allocMemHaloList allocates the memory for the variables
+        !       needed to construct the communication lists and the periodic
+        !       information. These variables are located in the module
+        !       haloList. Only the 1st level halo variables are allocated here
+        !       to avoid unnecessary memory usage. The 2nd level cell halo
+        !       are allocated later on in init2ndLevelCellHalos.
+        !
+        use block
+        use haloList
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i
+        integer(kind=intType) :: ie, je, ke, ib, jb, kb
+
+        ! Allocate the memory for the 1st level cell and node halo lists.
+
+        allocate (cellHalo1st(nCellHalo1st), &
+                  nodeHalo1st(nNodeHalo1st), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("allocMemHaloList", &
+             "Memory allocation failure for cellHalo1st &
+             &and nodeHalo1st")
+
+        ! Initialize the level of indirectness to 0. This will only be
+        ! overwritten for indirect boundary halo's. Also initialize the
+        ! number of periodid subfaces to 0.
+
+        do i = 1, nCellHalo1st
+            cellHalo1st(i)%levOfInd = 0
+            cellHalo1st(i)%nPeriodicSubfaces = 0
+            nullify (cellHalo1st(i)%periodicSubfaces)
+            nullify (cellHalo1st(i)%interp)
+        end do
+
+        do i = 1, nNodeHalo1st
+            nodeHalo1st(i)%levOfInd = 0
+            nodeHalo1st(i)%nPeriodicSubfaces = 0
+            nullify (nodeHalo1st(i)%periodicSubfaces)
+            nullify (nodeHalo1st(i)%interp)
+        end do
+
+        ! Allocate the memory to store the short hand of the transformation
+        ! matrix for both cell and nodal halo's. In principle this matrix
+        ! is only needed for the face (i.e. direct) halo's. However the
+        ! difference between the number of 1st level halo's and the cell
+        ! halo's is not so large and at this time of the program you should
+        ! not worry too much about memory, because the metrics as well as
+        ! the solution variables have not been allocated yet.
+
+        allocate (transformCell(nCellHalo1st, 3), &
+                  transformNode(nNodeHalo1st, 3), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("allocMemHaloList", &
+             "Memory allocation failure for transformCell &
+             &and transformNode")
+
+        ! Allocate the memory for nodeIndex and cellIndex, which will
+        ! store the indices per block in the lists above.
+
+        allocate (nodeIndex(nDom), cellIndex(nDom), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("allocMemHaloList", &
+             "Memory allocation failure for nodeIndex &
+             &and cellIndex")
+
+        ! Loop over the number of blocks to allocate and initialize
+        ! the elements of nodeIndex and cellIndex.
+
+        do i = 1, nDom
+
+            ! Store the upper indices in the allocation a bit easier.
+
+            ie = flowDoms(i, level, 1)%ie
+            je = flowDoms(i, level, 1)%je
+            ke = flowDoms(i, level, 1)%ke
+
+            ib = flowDoms(i, level, 1)%ib
+            jb = flowDoms(i, level, 1)%jb
+            kb = flowDoms(i, level, 1)%kb
+
+            ! Allocate the memory for entryList.
+
+            allocate (nodeIndex(i)%entryList(0:ie, 0:je, 0:ke), &
+                      cellIndex(i)%entryList(0:ib, 0:jb, 0:kb), stat=ierr)
+            if (ierr /= 0) &
+                call terminate("allocMemHaloList", &
+                               "Memory allocation failure for entryList")
+
+            ! Initialize entryList to zero. This serves as a check later
+            ! on. Cell halo's are uniquely defined via the 1 to 1 block
+            ! connectivity, but for node halo's (on the boundary of a
+            ! subface) several possibilities exist.
+
+            nodeIndex(i)%entryList = 0
+            cellIndex(i)%entryList = 0
+
+        end do
+
+    end subroutine allocMemHaloList
+
+    subroutine determineFaceHalos(level)
+        !
+        !       determineFaceHalos determines the 1st level direct cell and
+        !       node halo's. Direct halo means that at least one of the
+        !       neighboring cell/nodes belongs is owned by the block.
+        !       Consequently the halo can be found using the 1 to 1 block
+        !       connectivity.
+        !
+        use constants
+        use blockPointers
+        use cgnsGrid
+        use haloList
+        use periodicInfo
+        use utils, only: delta, terminate, setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, j, k
+        integer(kind=intType) :: ii, jj, kk, ll, mm, nn
+        integer(kind=intType) :: iH, jH, kH, iD, jD, kD
+        integer(kind=intType) :: indexPeriodic
+
+        integer(kind=intType), dimension(3) :: myOffset, donorOffset
+        integer(kind=intType), dimension(3) :: step
+
+        integer(kind=intType), dimension(3, 3) :: trMat
+        integer(kind=intType), dimension(3, 2) :: myCellRange
+        integer(kind=intType), dimension(3, 2) :: myNodeRange
+        integer(kind=intType), dimension(3, 2) :: donorCellRange
+
+        type(cgnsPeriodicType) :: key
+
+        ! Initialize the counter variables for the 1st level halo's to 0.
+
+        iicell1st = 0
+        iinode1st = 0
+
+        ! Determine the 1st level cell and node halo lists by looping over
+        ! the boundary faces of the blocks stored on this processor.
+
+        domains: do nn = 1, nDom
+
+            ! Set the pointers for this block on the given level.
+
+            call setPointers(nn, level, 1_intType)
+            !
+            !         Loop over the boundary halo's first. The reason is that a
+            !         node could belong to both a boundary and an internal subface.
+            !         By looping first over the boundaries, the internal subfaces
+            !         overwrite earlier set values by the boundary subface, which
+            !         is desirable.
+            !
+            bocos: do mm = 1, nBocos
+
+                ! Determine the cell and nodal range for the halo's of this
+                ! subface as well as the direction normal to the subface.
+
+                call haloRanges(mm, myOffset, myCellRange, myNodeRange, step)
+                !
+                !           First treat the nodes on the subface.
+                !
+                ! Loop over the nodes of the boundary subface and store
+                ! the halo info. For the edges of the subface it is possible
+                ! that the node is already stored. This must be checked;
+                ! otherwise this node will occur more than once in the list,
+                ! which is not correct.
+
+                do k = myNodeRange(3, 1), myNodeRange(3, 2), step(3)
+                    do j = myNodeRange(2, 1), myNodeRange(2, 2), step(2)
+                        do i = myNodeRange(1, 1), myNodeRange(1, 2), step(1)
+
+                            ! Determine the indices of my nodal halo node.
+
+                            iH = i + myOffset(1)
+                            jH = j + myOffset(2)
+                            kH = k + myOffset(3)
+
+                            ! Store the halo, if it has not been stored yet.
+
+                            if (nodeIndex(nn)%entryList(iH, jH, kH) == 0) then
+
+                                ! Update the counter iinode1st. Store it in ii for
+                                ! convenience. Store the index in %entryList.
+
+                                iinode1st = iinode1st + 1
+                                ii = iinode1st
+
+                                nodeIndex(nn)%entryList(iH, jH, kH) = ii
+
+                                ! Store the info in nodeHalo1st. Note that
+                                ! donorBlock contains the boundary condition, although
+                                ! this is not really important for node halo's.
+                                ! Furthermore donorProc is set to -1 to indicate a
+                                ! boundary halo and the donor indices are set to i,j,k.
+                                ! In this way the extrapolated coordinates can be
+                                ! obtained easily later on.
+
+                                nodeHalo1st(ii)%myBlock = nn
+                                nodeHalo1st(ii)%myI = iH
+                                nodeHalo1st(ii)%myJ = jH
+                                nodeHalo1st(ii)%myK = kH
+
+                                nodeHalo1st(ii)%donorProc = -1
+                                nodeHalo1st(ii)%donorBlock = BCType(mm)
+
+                                nodeHalo1st(ii)%dI = i
+                                nodeHalo1st(ii)%dJ = j
+                                nodeHalo1st(ii)%dK = k
 
-         periodicData(nn)%nHalos = mm
-         allocate(periodicData(nn)%block(mm), &
-              periodicData(nn)%indices(mm,3), stat=ierr)
-         if(ierr /= 0)                       &
-              call terminate("setPeriodicData", &
-              "Memory allocation failure for block &
-              &and indices.")
+                            end if
 
-         blockID => periodicData(nn)%block
-         indices => periodicData(nn)%indices
+                        end do
+                    end do
+                end do
+                !
+                !           The cell halo's belonging to this subface. Direct cell
+                !           halo's are unique and therefore info cannot already be
+                !           written earlier.
+                !
+                ! Loop over the halo cells located adjacent to the subface.
 
-         ! Loop over the number of periodic halo's and set the
-         ! blockID and indices.
+                do k = myCellRange(3, 1), myCellRange(3, 2), step(3)
+                    do j = myCellRange(2, 1), myCellRange(2, 2), step(2)
+                        do i = myCellRange(1, 1), myCellRange(1, 2), step(1)
 
-         kk = 0
-         do mm=(nHaloPerTransform(nn-1)+1),nHaloPerTransform(nn)
-            kk = kk + 1
-            ll = perHalo(mm)%indexInHaloList
+                            ! Check in debug mode whether this halo is already
+                            ! stored. This should not be the case.
 
-            blockID(kk)   = entityHalo(ll)%myBlock
-            indices(kk,1) = entityHalo(ll)%myI
-            indices(kk,2) = entityHalo(ll)%myJ
-            indices(kk,3) = entityHalo(ll)%myK
-         enddo
+                            if (debug) then
+                                if (cellIndex(nn)%entryList(i, j, k) /= 0) &
+                                    call terminate("determineFaceHalos", &
+                                                   "boundary cell halo already stored")
+                            end if
 
-         ! Determine the rotation matrix, rotation center and
-         ! translation vector of this periodic transformation.
-         ! The sorting has been such that all the halo's set above
-         ! have the same transformation, so it is enough to consider
-         ! the first element.
+                            ! Update the counter iicell1st and store its value a
+                            ! bit easier in ii and set entryList accordingly.
 
-         kk = nHaloPerTransform(nn-1) + 1
+                            iicell1st = iicell1st + 1
+                            ii = iicell1st
+
+                            cellIndex(nn)%entryList(i, j, k) = ii
+
+                            ! Store the info in cellHalo1st. Note that donorBlock
+                            ! contains the boundary condition and donorProc is set
+                            ! to -1 to indicate a boundary halo. The donor indices are
+                            ! set to the owned cell on the other side of the subface.
+
+                            cellHalo1st(ii)%myBlock = nn
+                            cellHalo1st(ii)%myI = i
+                            cellHalo1st(ii)%myJ = j
+                            cellHalo1st(ii)%myK = k
+
+                            cellHalo1st(ii)%donorProc = -1
+                            cellHalo1st(ii)%donorBlock = BCType(mm)
+
+                            cellHalo1st(ii)%dI = i - myOffset(1)
+                            cellHalo1st(ii)%dJ = j - myOffset(2)
+                            cellHalo1st(ii)%dK = k - myOffset(3)
+
+                        end do
+                    end do
+                end do
 
-         ! Set the rotation center to the rotation center of the
-         ! first subface. This should be the same for all of them.
-         ! Initialize the rotation matrix to the identity and the
-         ! translation vector to zero.
+            end do bocos
+            !
+            !         Loop over the 1 to 1 block to block boundaries.
+            !
+            n1to1Loop: do ll = 1, n1to1
+
+                ! Store the correct index for this subface, i.e. add the
+                ! offset from the boundary subfaces.
+
+                mm = nBocos + ll
+
+                ! Check if the original subface is a periodic subface.
+                ! Subfaces created by internal block splitting are indicated
+                ! by 0 and are certainly not periodic. This must be tested
+                ! first to avoid array overflow.
+
+                indexPeriodic = 0
+
+                kk = cgnsSubface(mm)
+                if (kk > 0) then
+                    if (cgnsDoms(nbkGlobal)%conn1to1(kk)%periodic) then
+
+                        ! Determine the corresponding index in periodicGlobal.
+
+                        key%cgnsBlock = nbkGlobal
+                        key%cgnsSubface = kk
+
+                        indexPeriodic = bsearchCGNSPeriodicType(key, periodicGlobal)
+
+                        if (debug) then
+                            if (indexPeriodic == 0) &
+                                call terminate("determineFaceHalos", &
+                                               "Entry not found in periodicGlobal")
+                        end if
+                    end if
+                end if
+
+                ! Determine the cell and nodal range for the halo's of this
+                ! subface as well as the direction normal to the subface.
+
+                call haloRanges(mm, myOffset, myCellRange, &
+                                myNodeRange, step)
+
+                ! Determine the complete transformation matrix from the
+                ! given shorthand.
+
+                trMat(1, 1) = sign(1_intType, l1(mm)) * delta(l1(mm), 1_intType)
+                trMat(2, 1) = sign(1_intType, l1(mm)) * delta(l1(mm), 2_intType)
+                trMat(3, 1) = sign(1_intType, l1(mm)) * delta(l1(mm), 3_intType)
+
+                trMat(1, 2) = sign(1_intType, l2(mm)) * delta(l2(mm), 1_intType)
+                trMat(2, 2) = sign(1_intType, l2(mm)) * delta(l2(mm), 2_intType)
+                trMat(3, 2) = sign(1_intType, l2(mm)) * delta(l2(mm), 3_intType)
+
+                trMat(1, 3) = sign(1_intType, l3(mm)) * delta(l3(mm), 1_intType)
+                trMat(2, 3) = sign(1_intType, l3(mm)) * delta(l3(mm), 2_intType)
+                trMat(3, 3) = sign(1_intType, l3(mm)) * delta(l3(mm), 3_intType)
+
+                ! Determine the offset of the donor block.
+
+                donorOffset(1) = trMat(1, 1) * myOffset(1) &
+                                 + trMat(1, 2) * myOffset(2) &
+                                 + trMat(1, 3) * myOffset(3)
+                donorOffset(2) = trMat(2, 1) * myOffset(1) &
+                                 + trMat(2, 2) * myOffset(2) &
+                                 + trMat(2, 3) * myOffset(3)
+                donorOffset(3) = trMat(3, 1) * myOffset(1) &
+                                 + trMat(3, 2) * myOffset(2) &
+                                 + trMat(3, 3) * myOffset(3)
+                !
+                !           First treat the nodes on the subface.
+                !
+                ! Loop over the nodal range for this subface.
+
+                do k = myNodeRange(3, 1), myNodeRange(3, 2), step(3)
+                    do j = myNodeRange(2, 1), myNodeRange(2, 2), step(2)
+                        do i = myNodeRange(1, 1), myNodeRange(1, 2), step(1)
+
+                            ! Determine the donor indices by applying the
+                            ! transformation matrix to i,j,k and adding the
+                            ! offset to obtain the halo.
+
+                            ii = i - myNodeRange(1, 1)
+                            jj = j - myNodeRange(2, 1)
+                            kk = k - myNodeRange(3, 1)
+
+                            iD = donorOffset(1) + dinBeg(mm) &
+                                 + trMat(1, 1) * ii + trMat(1, 2) * jj + trMat(1, 3) * kk
+                            jD = donorOffset(2) + djnBeg(mm) &
+                                 + trMat(2, 1) * ii + trMat(2, 2) * jj + trMat(2, 3) * kk
+                            kD = donorOffset(3) + dknBeg(mm) &
+                                 + trMat(3, 1) * ii + trMat(3, 2) * jj + trMat(3, 3) * kk
+
+                            ! Determine the indices of my nodal halo node.
+
+                            iH = i + myOffset(1)
+                            jH = j + myOffset(2)
+                            kH = k + myOffset(3)
+
+                            ! It is possible that this halo is already stored,
+                            ! either as a boundary or as an internal halo. In the
+                            ! former case it should be overwritten; in the latter
+                            ! this is not strictly necessary, but it does not hurt.
+                            ! Therefore simply overwrite the old index. If the
+                            ! halo has not been stored yet, update iinode1st.
+                            ! The index to store the info will be ii.
+
+                            if (nodeIndex(nn)%entryList(iH, jH, kH) == 0) then
+                                iinode1st = iinode1st + 1
+                                ii = iinode1st
+
+                                nodeIndex(nn)%entryList(iH, jH, kH) = ii
+                            else
+                                ii = nodeIndex(nn)%entryList(iH, jH, kH)
+                            end if
+
+                            ! Store the info in the correct place in nodeHalo1st.
+
+                            nodeHalo1st(ii)%myBlock = nn
+                            nodeHalo1st(ii)%myI = iH
+                            nodeHalo1st(ii)%myJ = jH
+                            nodeHalo1st(ii)%myK = kH
+
+                            nodeHalo1st(ii)%donorProc = neighProc(mm)
+                            nodeHalo1st(ii)%donorBlock = neighBlock(mm)
+
+                            nodeHalo1st(ii)%dI = iD
+                            nodeHalo1st(ii)%dJ = jD
+                            nodeHalo1st(ii)%dK = kD
+
+                            ! Store the short hand of the transformation matrix
+                            ! for this halo.
+
+                            transformNode(ii, 1) = l1(mm)
+                            transformNode(ii, 2) = l2(mm)
+                            transformNode(ii, 3) = l3(mm)
+
+                            ! It is possible that ii is treated earlier and hence
+                            ! periodic info may have been stored. Remove this.
+
+                            if (nodeHalo1st(ii)%nPeriodicSubfaces > 0) then
+                                deallocate (nodeHalo1st(ii)%periodicSubfaces, stat=ierr)
+                                if (ierr /= 0) &
+                                     call terminate("determineFaceHalos", &
+                                     "Deallocation failure for &
+                                     &periodicSubfaces")
+                                nullify (nodeHalo1st(ii)%periodicSubfaces)
+                                nodeHalo1st(ii)%nPeriodicSubfaces = 0
+                            end if
+
+                            ! If the subface is periodic store the periodic info.
+
+                            if (indexPeriodic > 0) then
+                                nodeHalo1st(ii)%nPeriodicSubfaces = 1
+                                allocate (nodeHalo1st(ii)%periodicSubfaces(1), &
+                                          stat=ierr)
+                                if (ierr /= 0) &
+                                     call terminate("determineFaceHalos", &
+                                     "Memory allocation failure for &
+                                     &periodicSubfaces")
+                                nodeHalo1st(ii)%periodicSubfaces(1) = indexPeriodic
+                            end if
+
+                        end do
+                    end do
+                end do
+                !
+                !           The cell halo's belonging to this subface. Direct cell
+                !           halo's are unique and therefore info cannot already be
+                !           written earlier.
+                !
+                ! First determine the cell range of the donor block on
+                ! the subface. This equals the nodal range, except that 1 is
+                ! added to the smallest index. As it is possible that the
+                ! index is running negatively, this should be taken into account.
+
+                donorCellRange(1, 1) = dinBeg(mm)
+                donorCellRange(2, 1) = djnBeg(mm)
+                donorCellRange(3, 1) = dknBeg(mm)
+
+                donorCellRange(1, 2) = dinEnd(mm)
+                donorCellRange(2, 2) = djnEnd(mm)
+                donorCellRange(3, 2) = dknEnd(mm)
+
+                ! The loop to add 1 to the lowest index and to correct the
+                ! index corresponding to the face we are on.
+
+                do i = 1, 3
+                    if (donorCellRange(i, 1) == donorCellRange(i, 2)) then
+
+                        ! If the face corresponds to a min face, indicated by
+                        ! donorCellRange(i,1) == 1 then 1 must be added;
+                        ! otherwise nothing needs to be done.
+
+                        if (donorCellRange(i, 1) == 1) then
+                            donorCellRange(i, 1) = 2
+                            donorCellRange(i, 2) = 2
+                        end if
+
+                    else if (donorCellRange(i, 1) > donorCellRange(i, 2)) then
+                        donorCellRange(i, 2) = donorCellRange(i, 2) + 1
+                    else
+                        donorCellRange(i, 1) = donorCellRange(i, 1) + 1
+                    end if
+                end do
+
+                ! Loop over the halo cells located adjacent to the subface.
+
+                do k = myCellRange(3, 1), myCellRange(3, 2), step(3)
+                    do j = myCellRange(2, 1), myCellRange(2, 2), step(2)
+                        do i = myCellRange(1, 1), myCellRange(1, 2), step(1)
+
+                            ! Check in debug mode whether this halo is already
+                            ! stored. This should not be the case.
+
+                            if (debug) then
+                                if (cellIndex(nn)%entryList(i, j, k) /= 0) &
+                                    call terminate("determineFaceHalos", &
+                                                   "internal cell halo already stored")
+                            end if
+
+                            ! Determine the indices of the donor point by applying
+                            ! the transformation matrix to i,j,k.
+
+                            ii = i - myCellRange(1, 1)
+                            jj = j - myCellRange(2, 1)
+                            kk = k - myCellRange(3, 1)
+
+                            iD = donorCellRange(1, 1) &
+                                 + trMat(1, 1) * ii + trMat(1, 2) * jj + trMat(1, 3) * kk
+                            jD = donorCellRange(2, 1) &
+                                 + trMat(2, 1) * ii + trMat(2, 2) * jj + trMat(2, 3) * kk
+                            kD = donorCellRange(3, 1) &
+                                 + trMat(3, 1) * ii + trMat(3, 2) * jj + trMat(3, 3) * kk
+
+                            ! Update the counter iicell1st and store its value a
+                            ! bit easier in ii and set entryList accordingly.
+
+                            iicell1st = iicell1st + 1
+                            ii = iicell1st
+
+                            cellIndex(nn)%entryList(i, j, k) = ii
+
+                            ! Store the info in the correct place in cellHalo1st.
+
+                            cellHalo1st(ii)%myBlock = nn
+                            cellHalo1st(ii)%myI = i
+                            cellHalo1st(ii)%myJ = j
+                            cellHalo1st(ii)%myK = k
+
+                            cellHalo1st(ii)%donorProc = neighProc(mm)
+                            cellHalo1st(ii)%donorBlock = neighBlock(mm)
+
+                            cellHalo1st(ii)%dI = iD
+                            cellHalo1st(ii)%dJ = jD
+                            cellHalo1st(ii)%dK = kD
+
+                            ! Store the short hand of the transformation matrix
+                            ! for this halo.
+
+                            transformCell(ii, 1) = l1(mm)
+                            transformCell(ii, 2) = l2(mm)
+                            transformCell(ii, 3) = l3(mm)
+
+                            ! If the subface is periodic store the periodic info.
+                            ! Note that for the cells it is not needed to check
+                            ! if a previous transformation was already stored,
+                            ! because cell halo's are unique.
+
+                            if (indexPeriodic > 0) then
+                                cellHalo1st(ii)%nPeriodicSubfaces = 1
+                                allocate (cellHalo1st(ii)%periodicSubfaces(1), &
+                                          stat=ierr)
+                                if (ierr /= 0) &
+                                     call terminate("determineFaceHalos", &
+                                     "Memory allocation failure for &
+                                     &periodicSubfaces")
+                                cellHalo1st(ii)%periodicSubfaces(1) = indexPeriodic
+                            end if
+
+                        end do
+                    end do
+                end do
+
+            end do n1to1Loop
+
+        end do domains
+
+    end subroutine determineFaceHalos
+
+    !      ==================================================================
+
+    subroutine haloRanges(mm, offset, cellRange, nodeRange, step)
+        !
+        !       haloRanges determines the cell and nodal ranges for the given
+        !       subface as well as the direction normal to the subface,
+        !       pointing outwards. In case of negative running indices of the
+        !       subface, step is set to -1; otherwise it is 1.
+
+        use constants
+        use blockPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: mm
+        integer(kind=intType), dimension(3), intent(out) :: offset, step
+        integer(kind=intType), dimension(3, 2), intent(out) :: cellRange
+        integer(kind=intType), dimension(3, 2), intent(out) :: nodeRange
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i
+        integer(kind=intType) :: cellHaloInd, cellHaloID
+
+        ! Determine the offset in i, j and k direction, depending on the
+        ! faceID. This can be interpreted as the outward pointing normal
+        ! in the index domain. Furthermore store cellHaloInd and
+        ! cellHaloID. This info is needed to construct the cell halo
+        ! range correctly.
+
+        offset = 0
+
+        select case (BCFaceID(mm))
+        case (iMin)
+            offset(1) = -1
+            cellHaloInd = 1
+            cellHaloID = 1
+        case (iMax)
+            offset(1) = 1
+            cellHaloInd = 1
+            cellHaloID = ie
+        case (jMin)
+            offset(2) = -1
+            cellHaloInd = 2
+            cellHaloID = 1
+        case (jMax)
+            offset(2) = 1
+            cellHaloInd = 2
+            cellHaloID = je
+        case (kMin)
+            offset(3) = -1
+            cellHaloInd = 3
+            cellHaloID = 1
+        case (kMax)
+            offset(3) = 1
+            cellHaloInd = 3
+            cellHaloID = ke
+        end select
+
+        ! Copy the nodal range.
+
+        nodeRange(1, 1) = inBeg(mm)
+        nodeRange(2, 1) = jnBeg(mm)
+        nodeRange(3, 1) = knBeg(mm)
+
+        nodeRange(1, 2) = inEnd(mm)
+        nodeRange(2, 2) = jnEnd(mm)
+        nodeRange(3, 2) = knEnd(mm)
+
+        ! Determine the cell range. The cell numbering of a block starts
+        ! at index 2, i.e. 1 higher than the node numbering. Consequently
+        ! 1 must be added to the smallest indices of the nodal range.
+        ! Take negative running indices into account and set step
+        ! accordingly.
+
+        do i = 1, 3
+            if (nodeRange(i, 1) > nodeRange(i, 2)) then
+
+                ! Negative running index.
+
+                step(i) = -1
+                cellRange(i, 1) = nodeRange(i, 1)
+                cellRange(i, 2) = nodeRange(i, 2) + 1
+            else
+
+                ! Positive running index.
+
+                step(i) = 1
+                cellRange(i, 1) = nodeRange(i, 1) + 1
+                cellRange(i, 2) = nodeRange(i, 2)
+            end if
+        end do
+
+        ! Correct the cell range for the index corresponding to the face
+        ! we are on.
+
+        cellRange(cellHaloInd, 1) = cellHaloID
+        cellRange(cellHaloInd, 2) = cellHaloID
+
+    end subroutine haloRanges
+
+    subroutine init2ndLevelCellHalos
+        !
+        !       init2ndLevelCellHalos initializes the 2nd level cell halo
+        !       list. Basically the 1st level cell halo list is copied and the
+        !       counter iicell2nd is set to nCellHalo1st. This means that
+        !       the 2nd level cell halo's are appended to the first level
+        !       halo's. They are stored in a separate list, because the
+        !       communication pattern of the 2nd level halo's is separate from
+        !       the 1st level halo's. Efficiency is the reason to do this; it
+        !       is more efficient to send one big message than two smaller
+        !       ones.
+        !
+        use haloList
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, j, jj
+
+        ! Allocate the memory for the 2nd level halo list.
+
+        allocate (cellHalo2nd(nCellHalo2nd), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("init2ndLevelCellHalos", &
+                           "Memory allocation failure for cellHalo2nd")
+
+        ! Initialize iicell2nd to nCellHalo1st.
+
+        iiCell2nd = nCellHalo1st
+
+        ! Copy the information from the 1st level cell halo's into the
+        ! second level cell halo list. Make sure to make a deep copy.
+
+        do i = 1, nCellHalo1st
+            cellHalo2nd(i)%myBlock = cellHalo1st(i)%myBlock
+            cellHalo2nd(i)%myI = cellHalo1st(i)%myI
+            cellHalo2nd(i)%myJ = cellHalo1st(i)%myJ
+            cellHalo2nd(i)%myK = cellHalo1st(i)%myK
+            cellHalo2nd(i)%donorProc = cellHalo1st(i)%donorProc
+            cellHalo2nd(i)%donorBlock = cellHalo1st(i)%donorBlock
+            cellHalo2nd(i)%dI = cellHalo1st(i)%dI
+            cellHalo2nd(i)%dJ = cellHalo1st(i)%dJ
+            cellHalo2nd(i)%dK = cellHalo1st(i)%dK
+            cellHalo2nd(i)%levOfInd = cellHalo1st(i)%levOfInd
+
+            nullify (cellHalo2nd(i)%interp)
+
+            cellHalo2nd(i)%nPeriodicSubfaces = &
+                cellHalo1st(i)%nPeriodicSubfaces
+
+            if (cellHalo2nd(i)%nPeriodicSubfaces > 0) then
+                jj = cellHalo2nd(i)%nPeriodicSubfaces
+                allocate (cellHalo2nd(i)%periodicSubfaces(jj), stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("init2ndLevelCellHalos", &
+                     "Memory allocation failure for &
+                     &periodicSubfaces")
+                do j = 1, jj
+                    cellHalo2nd(i)%periodicSubfaces(j) = &
+                        cellHalo1st(i)%periodicSubfaces(j)
+                end do
+            else
+                nullify (cellHalo2nd(i)%periodicSubfaces)
+            end if
+        end do
+
+        ! Initialize the level of indirectness for the rest of the list
+        ! to 0 and initialize the periodic data to 0 as well.
+
+        do i = (nCellHalo1st + 1), nCellHalo2nd
+            cellHalo2nd(i)%levOfInd = 0
+            cellHalo2nd(i)%nPeriodicSubfaces = 0
+            nullify (cellHalo2nd(i)%periodicSubfaces)
+            nullify (cellHalo2nd(i)%interp)
+        end do
+
+    end subroutine init2ndLevelCellHalos
+
+    subroutine qsortHaloListType(arr, nn)
+        !
+        !       qsortHaloListType sorts the given number of halo's in
+        !       increasing order based on the <= operator for this derived
+        !       data type.
+        !
+        use haloList
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+
+        type(haloListType), dimension(*), intent(inout) :: arr
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), parameter :: m = 7
+
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
+
+        integer :: ierr
+
+        type(haloListType) :: a, tmp
+
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
+
+        ! Allocate the memory for stack.
+
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortHaloListType", &
+                           "Memory allocation failure for stack")
 
-         ll        = perHalo(kk)%periodicSubfaces(1)
-         cgnsBlock = periodicGlobal(ll)%cgnsBlock
-         cgnsSub   = periodicGlobal(ll)%cgnsSubface
+        ! Initialize the variables that control the sorting.
 
-         rotCenter = cgnsDoms(cgnsBlock)%conn1to1(cgnsSub)%rotationCenter
+        jStack = 0
+        l = 1
+        r = nn
+
+        ! Start of the algorithm
 
-         rotMat(1,1) =  one; rotMat(1,2) = zero; rotMat(1,3) = zero
-         rotMat(2,1) = zero; rotMat(2,2) =  one; rotMat(2,3) = zero
-         rotMat(3,1) = zero; rotMat(3,2) = zero; rotMat(3,3) =  one
+        do
 
-         trans = zero
-
-         ! Loop over the number of periodic subface for the total
-         ! periodic transformation.
+            ! Check for the size of the subarray.
 
-         subfaceLoop: do mm=1,perHalo(kk)%nPeriodicSubfaces
+            if ((r - l) < m) then
 
-            ll        = perHalo(kk)%periodicSubfaces(1)
-            cgnsBlock = periodicGlobal(ll)%cgnsBlock
-            cgnsSub   = periodicGlobal(ll)%cgnsSubface
+                ! Perform insertion sort
+
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-            ! Store the data for the transformation of this subface
-            ! a bit easier.
-
-            translation = cgnsDoms(cgnsBlock)%conn1to1(cgnsSub)%translation
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-            theta = cgnsDoms(cgnsBlock)%conn1to1(cgnsSub)%rotationAngles(1)
-            phi   = cgnsDoms(cgnsBlock)%conn1to1(cgnsSub)%rotationAngles(2)
-            psi   = cgnsDoms(cgnsBlock)%conn1to1(cgnsSub)%rotationAngles(3)
+                if (jStack == 0) exit
 
-            ! Construct the rotation matrix for this subface. Actually
-            ! the inverse (== transpose) is constructed, because the
-            ! cgns data is for the transformation from the current face
-            ! to the donor and here the inverse is needed. Note
-            ! furthermore that the sequence of rotation is first
-            ! rotation around the x-axis, followed by rotation around
-            ! the y-axis and finally rotation around the z-axis.
-
-            cosTheta = cos(theta); cosPhi = cos(phi); cosPsi = cos(psi)
-            sinTheta = sin(theta); sinPhi = sin(phi); sinPsi = sin(psi)
-
-            rotMatrix(1,1) =  cosPhi*cosPsi
-            rotMatrix(1,2) =  cosPhi*sinPsi
-            rotMatrix(1,3) = -sinPhi
-
-            rotMatrix(2,1) = sinTheta*sinPhi*cosPsi - cosTheta*sinPsi
-            rotMatrix(2,2) = sinTheta*sinPhi*sinPsi + cosTheta*cosPsi
-            rotMatrix(2,3) = sinTheta*cosPhi
-
-            rotMatrix(3,1) = cosTheta*sinPhi*cosPsi + sinTheta*sinPsi
-            rotMatrix(3,2) = cosTheta*sinPhi*sinPsi - sinTheta*cosPsi
-            rotMatrix(3,3) = cosTheta*cosPhi
-
-            ! Create the product of rotMat and rotMatrix. Copy the
-            ! result back into rotMat.
-
-            tmpMat(:,1) = rotMat(:,1)*rotMatrix(1,1) &
-                 + rotMat(:,2)*rotMatrix(2,1) &
-                 + rotMat(:,3)*rotMatrix(3,1)
-            tmpMat(:,2) = rotMat(:,1)*rotMatrix(1,2) &
-                 + rotMat(:,2)*rotMatrix(2,2) &
-                 + rotMat(:,3)*rotMatrix(3,2)
-            tmpMat(:,3) = rotMat(:,1)*rotMatrix(1,3) &
-                 + rotMat(:,2)*rotMatrix(2,3) &
-                 + rotMat(:,3)*rotMatrix(3,3)
-
-            rotMat = tmpMat
-
-            ! Update the translation vector. As we need the inverse
-            ! of the transformation translation must be premultiplied
-            ! by the rotation matrix and negated.
-
-            trans = trans - rotMatrix(:,1)*translation(1) &
-                 - rotMatrix(:,2)*translation(2) &
-                 - rotMatrix(:,3)*translation(3)
-
-         enddo subfaceLoop
-
-         ! Store the results in periodicData(nn).
-
-         periodicData(nn)%rotMatrix   = rotMat
-         periodicData(nn)%rotCenter   = rotCenter
-         periodicData(nn)%translation = trans
-
-      enddo periodicLoop
-
-    end subroutine setPeriodicData
-
-  end subroutine determinePeriodicData
-
-
-  subroutine allocMemHaloList(level)
-    !
-    !       allocMemHaloList allocates the memory for the variables
-    !       needed to construct the communication lists and the periodic
-    !       information. These variables are located in the module
-    !       haloList. Only the 1st level halo variables are allocated here
-    !       to avoid unnecessary memory usage. The 2nd level cell halo
-    !       are allocated later on in init2ndLevelCellHalos.
-    !
-    use block
-    use haloList
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i
-    integer(kind=intType) :: ie, je, ke, ib, jb, kb
-
-    ! Allocate the memory for the 1st level cell and node halo lists.
-
-    allocate(cellHalo1st(nCellHalo1st), &
-         nodeHalo1st(nNodeHalo1st), stat=ierr)
-    if(ierr /= 0)                                               &
-         call terminate("allocMemHaloList",                        &
-         "Memory allocation failure for cellHalo1st &
-         &and nodeHalo1st")
-
-    ! Initialize the level of indirectness to 0. This will only be
-    ! overwritten for indirect boundary halo's. Also initialize the
-    ! number of periodid subfaces to 0.
-
-    do i=1,nCellHalo1st
-       cellHalo1st(i)%levOfInd          = 0
-       cellHalo1st(i)%nPeriodicSubfaces = 0
-       nullify(cellHalo1st(i)%periodicSubfaces)
-       nullify(cellHalo1st(i)%interp)
-    enddo
-
-    do i=1,nNodeHalo1st
-       nodeHalo1st(i)%levOfInd          = 0
-       nodeHalo1st(i)%nPeriodicSubfaces = 0
-       nullify(nodeHalo1st(i)%periodicSubfaces)
-       nullify(nodeHalo1st(i)%interp)
-    enddo
-
-    ! Allocate the memory to store the short hand of the transformation
-    ! matrix for both cell and nodal halo's. In principle this matrix
-    ! is only needed for the face (i.e. direct) halo's. However the
-    ! difference between the number of 1st level halo's and the cell
-    ! halo's is not so large and at this time of the program you should
-    ! not worry too much about memory, because the metrics as well as
-    ! the solution variables have not been allocated yet.
-
-    allocate(transformCell(nCellHalo1st,3), &
-         transformNode(nNodeHalo1st,3), stat=ierr)
-    if(ierr /= 0)                                                 &
-         call terminate("allocMemHaloList"  ,                        &
-         "Memory allocation failure for transformCell &
-         &and transformNode")
-
-    ! Allocate the memory for nodeIndex and cellIndex, which will
-    ! store the indices per block in the lists above.
-
-    allocate(nodeIndex(nDom), cellIndex(nDom), stat=ierr)
-    if(ierr /= 0)                                             &
-         call terminate("allocMemHaloList",                      &
-         "Memory allocation failure for nodeIndex &
-         &and cellIndex")
-
-    ! Loop over the number of blocks to allocate and initialize
-    ! the elements of nodeIndex and cellIndex.
-
-    do i=1,nDom
-
-       ! Store the upper indices in the allocation a bit easier.
-
-       ie = flowDoms(i,level,1)%ie
-       je = flowDoms(i,level,1)%je
-       ke = flowDoms(i,level,1)%ke
-
-       ib = flowDoms(i,level,1)%ib
-       jb = flowDoms(i,level,1)%jb
-       kb = flowDoms(i,level,1)%kb
-
-       ! Allocate the memory for entryList.
-
-       allocate(nodeIndex(i)%entryList(0:ie,0:je,0:ke), &
-            cellIndex(i)%entryList(0:ib,0:jb,0:kb), stat=ierr)
-       if(ierr /= 0)                        &
-            call terminate("allocMemHaloList", &
-            "Memory allocation failure for entryList")
-
-       ! Initialize entryList to zero. This serves as a check later
-       ! on. Cell halo's are uniquely defined via the 1 to 1 block
-       ! connectivity, but for node halo's (on the boundary of a
-       ! subface) several possibilities exist.
-
-       nodeIndex(i)%entryList = 0
-       cellIndex(i)%entryList = 0
-
-    enddo
-
-  end subroutine allocMemHaloList
-
-  subroutine determineFaceHalos(level)
-    !
-    !       determineFaceHalos determines the 1st level direct cell and
-    !       node halo's. Direct halo means that at least one of the
-    !       neighboring cell/nodes belongs is owned by the block.
-    !       Consequently the halo can be found using the 1 to 1 block
-    !       connectivity.
-    !
-    use constants
-    use blockPointers
-    use cgnsGrid
-    use haloList
-    use periodicInfo
-    use utils, only : delta, terminate, setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i, j, k
-    integer(kind=intType) :: ii, jj, kk, ll, mm, nn
-    integer(kind=intType) :: iH, jH, kH, iD, jD, kD
-    integer(kind=intType) :: indexPeriodic
-
-    integer(kind=intType), dimension(3) :: myOffset, donorOffset
-    integer(kind=intType), dimension(3) :: step
-
-    integer(kind=intType), dimension(3,3) :: trMat
-    integer(kind=intType), dimension(3,2) :: myCellRange
-    integer(kind=intType), dimension(3,2) :: myNodeRange
-    integer(kind=intType), dimension(3,2) :: donorCellRange
-
-    type(cgnsPeriodicType) :: key
-
-    ! Initialize the counter variables for the 1st level halo's to 0.
-
-    iicell1st = 0
-    iinode1st = 0
-
-    ! Determine the 1st level cell and node halo lists by looping over
-    ! the boundary faces of the blocks stored on this processor.
-
-    domains: do nn=1,nDom
-
-       ! Set the pointers for this block on the given level.
-
-       call setPointers(nn,level,1_intType)
-       !
-       !         Loop over the boundary halo's first. The reason is that a
-       !         node could belong to both a boundary and an internal subface.
-       !         By looping first over the boundaries, the internal subfaces
-       !         overwrite earlier set values by the boundary subface, which
-       !         is desirable.
-       !
-       bocos: do mm=1,nBocos
-
-          ! Determine the cell and nodal range for the halo's of this
-          ! subface as well as the direction normal to the subface.
-
-          call haloRanges(mm, myOffset, myCellRange, myNodeRange, step)
-          !
-          !           First treat the nodes on the subface.
-          !
-          ! Loop over the nodes of the boundary subface and store
-          ! the halo info. For the edges of the subface it is possible
-          ! that the node is already stored. This must be checked;
-          ! otherwise this node will occur more than once in the list,
-          ! which is not correct.
-
-          do k=myNodeRange(3,1),myNodeRange(3,2),step(3)
-             do j=myNodeRange(2,1),myNodeRange(2,2),step(2)
-                do i=myNodeRange(1,1),myNodeRange(1,2),step(1)
-
-                   ! Determine the indices of my nodal halo node.
-
-                   iH = i + myOffset(1)
-                   jH = j + myOffset(2)
-                   kH = k + myOffset(3)
-
-                   ! Store the halo, if it has not been stored yet.
-
-                   if(nodeIndex(nn)%entryList(iH,jH,kH) == 0) then
-
-                      ! Update the counter iinode1st. Store it in ii for
-                      ! convenience. Store the index in %entryList.
-
-                      iinode1st = iinode1st +1
-                      ii        = iinode1st
-
-                      nodeIndex(nn)%entryList(iH,jH,kH) = ii
-
-                      ! Store the info in nodeHalo1st. Note that
-                      ! donorBlock contains the boundary condition, although
-                      ! this is not really important for node halo's.
-                      ! Furthermore donorProc is set to -1 to indicate a
-                      ! boundary halo and the donor indices are set to i,j,k.
-                      ! In this way the extrapolated coordinates can be
-                      ! obtained easily later on.
-
-                      nodeHalo1st(ii)%myBlock = nn
-                      nodeHalo1st(ii)%myI     = iH
-                      nodeHalo1st(ii)%myJ     = jH
-                      nodeHalo1st(ii)%myK     = kH
-
-                      nodeHalo1st(ii)%donorProc  = -1
-                      nodeHalo1st(ii)%donorBlock = BCType(mm)
-
-                      nodeHalo1st(ii)%dI = i
-                      nodeHalo1st(ii)%dJ = j
-                      nodeHalo1st(ii)%dK = k
+                ! Pop stack and begin a new round of partitioning.
 
-                   endif
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
+
+            else
 
-                enddo
-             enddo
-          enddo
-          !
-          !           The cell halo's belonging to this subface. Direct cell
-          !           halo's are unique and therefore info cannot already be
-          !           written earlier.
-          !
-          ! Loop over the halo cells located adjacent to the subface.
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a.
+                ! Also rearrange so that (l) <= (l+1) <= (r).
 
-          do k=myCellRange(3,1),myCellRange(3,2),step(3)
-             do j=myCellRange(2,1),myCellRange(2,2),step(2)
-                do i=myCellRange(1,1),myCellRange(1,2),step(1)
+                k = (l + r) / 2
+                tmp = arr(k)             ! Swap the elements
+                arr(k) = arr(l + 1)           ! k and l+1.
+                arr(l + 1) = tmp
 
-                   ! Check in debug mode whether this halo is already
-                   ! stored. This should not be the case.
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-                   if( debug ) then
-                      if(cellIndex(nn)%entryList(i,j,k) /= 0) &
-                           call terminate("determineFaceHalos",  &
-                           "boundary cell halo already stored")
-                   endif
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-                   ! Update the counter iicell1st and store its value a
-                   ! bit easier in ii and set entryList accordingly.
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-                   iicell1st = iicell1st +1
-                   ii        = iicell1st
-
-                   cellIndex(nn)%entryList(i,j,k) = ii
-
-                   ! Store the info in cellHalo1st. Note that donorBlock
-                   ! contains the boundary condition and donorProc is set
-                   ! to -1 to indicate a boundary halo. The donor indices are
-                   ! set to the owned cell on the other side of the subface.
-
-                   cellHalo1st(ii)%myBlock = nn
-                   cellHalo1st(ii)%myI     = i
-                   cellHalo1st(ii)%myJ     = j
-                   cellHalo1st(ii)%myK     = k
-
-                   cellHalo1st(ii)%donorProc  = -1
-                   cellHalo1st(ii)%donorBlock = BCType(mm)
-
-                   cellHalo1st(ii)%dI = i - myOffset(1)
-                   cellHalo1st(ii)%dJ = j - myOffset(2)
-                   cellHalo1st(ii)%dK = k - myOffset(3)
-
-                enddo
-             enddo
-          enddo
+                ! Initialize the pointers for partitioning.
 
-       enddo bocos
-       !
-       !         Loop over the 1 to 1 block to block boundaries.
-       !
-       n1to1Loop: do ll=1,n1to1
-
-          ! Store the correct index for this subface, i.e. add the
-          ! offset from the boundary subfaces.
-
-          mm = nBocos + ll
-
-          ! Check if the original subface is a periodic subface.
-          ! Subfaces created by internal block splitting are indicated
-          ! by 0 and are certainly not periodic. This must be tested
-          ! first to avoid array overflow.
-
-          indexPeriodic = 0
-
-          kk = cgnsSubface(mm)
-          if(kk > 0) then
-             if(cgnsDoms(nbkGlobal)%conn1to1(kk)%periodic) then
-
-                ! Determine the corresponding index in periodicGlobal.
-
-                key%cgnsBlock   = nbkGlobal
-                key%cgnsSubface = kk
-
-                indexPeriodic = bsearchCGNSPeriodicType(key, periodicGlobal)
-
-                if( debug ) then
-                   if(indexPeriodic == 0)                 &
-                        call terminate("determineFaceHalos", &
-                        "Entry not found in periodicGlobal")
-                endif
-             endif
-          endif
-
-          ! Determine the cell and nodal range for the halo's of this
-          ! subface as well as the direction normal to the subface.
-
-          call haloRanges(mm, myOffset, myCellRange, &
-               myNodeRange, step)
-
-          ! Determine the complete transformation matrix from the
-          ! given shorthand.
-
-          trMat(1,1) = sign(1_intType,l1(mm)) * delta(l1(mm),1_intType)
-          trMat(2,1) = sign(1_intType,l1(mm)) * delta(l1(mm),2_intType)
-          trMat(3,1) = sign(1_intType,l1(mm)) * delta(l1(mm),3_intType)
-
-          trMat(1,2) = sign(1_intType,l2(mm)) * delta(l2(mm),1_intType)
-          trMat(2,2) = sign(1_intType,l2(mm)) * delta(l2(mm),2_intType)
-          trMat(3,2) = sign(1_intType,l2(mm)) * delta(l2(mm),3_intType)
-
-          trMat(1,3) = sign(1_intType,l3(mm)) * delta(l3(mm),1_intType)
-          trMat(2,3) = sign(1_intType,l3(mm)) * delta(l3(mm),2_intType)
-          trMat(3,3) = sign(1_intType,l3(mm)) * delta(l3(mm),3_intType)
-
-          ! Determine the offset of the donor block.
-
-          donorOffset(1) = trMat(1,1)*myOffset(1) &
-               + trMat(1,2)*myOffset(2) &
-               + trMat(1,3)*myOffset(3)
-          donorOffset(2) = trMat(2,1)*myOffset(1) &
-               + trMat(2,2)*myOffset(2) &
-               + trMat(2,3)*myOffset(3)
-          donorOffset(3) = trMat(3,1)*myOffset(1) &
-               + trMat(3,2)*myOffset(2) &
-               + trMat(3,3)*myOffset(3)
-          !
-          !           First treat the nodes on the subface.
-          !
-          ! Loop over the nodal range for this subface.
-
-          do k=myNodeRange(3,1),myNodeRange(3,2),step(3)
-             do j=myNodeRange(2,1),myNodeRange(2,2),step(2)
-                do i=myNodeRange(1,1),myNodeRange(1,2),step(1)
-
-                   ! Determine the donor indices by applying the
-                   ! transformation matrix to i,j,k and adding the
-                   ! offset to obtain the halo.
-
-                   ii = i - myNodeRange(1,1)
-                   jj = j - myNodeRange(2,1)
-                   kk = k - myNodeRange(3,1)
-
-                   iD = donorOffset(1) + dinBeg(mm) &
-                        + trMat(1,1)*ii + trMat(1,2)*jj + trMat(1,3)*kk
-                   jD = donorOffset(2) + djnBeg(mm) &
-                        + trMat(2,1)*ii + trMat(2,2)*jj + trMat(2,3)*kk
-                   kD = donorOffset(3) + dknBeg(mm) &
-                        + trMat(3,1)*ii + trMat(3,2)*jj + trMat(3,3)*kk
-
-                   ! Determine the indices of my nodal halo node.
-
-                   iH = i + myOffset(1)
-                   jH = j + myOffset(2)
-                   kH = k + myOffset(3)
-
-                   ! It is possible that this halo is already stored,
-                   ! either as a boundary or as an internal halo. In the
-                   ! former case it should be overwritten; in the latter
-                   ! this is not strictly necessary, but it does not hurt.
-                   ! Therefore simply overwrite the old index. If the
-                   ! halo has not been stored yet, update iinode1st.
-                   ! The index to store the info will be ii.
-
-                   if(nodeIndex(nn)%entryList(iH,jH,kH) == 0) then
-                      iinode1st = iinode1st +1
-                      ii        = iinode1st
-
-                      nodeIndex(nn)%entryList(iH,jH,kH) = ii
-                   else
-                      ii = nodeIndex(nn)%entryList(iH,jH,kH)
-                   endif
-
-                   ! Store the info in the correct place in nodeHalo1st.
-
-                   nodeHalo1st(ii)%myBlock = nn
-                   nodeHalo1st(ii)%myI     = iH
-                   nodeHalo1st(ii)%myJ     = jH
-                   nodeHalo1st(ii)%myK     = kH
-
-                   nodeHalo1st(ii)%donorProc  = neighProc(mm)
-                   nodeHalo1st(ii)%donorBlock = neighBlock(mm)
-
-                   nodeHalo1st(ii)%dI = iD
-                   nodeHalo1st(ii)%dJ = jD
-                   nodeHalo1st(ii)%dK = kD
-
-                   ! Store the short hand of the transformation matrix
-                   ! for this halo.
-
-                   transformNode(ii,1) = l1(mm)
-                   transformNode(ii,2) = l2(mm)
-                   transformNode(ii,3) = l3(mm)
-
-                   ! It is possible that ii is treated earlier and hence
-                   ! periodic info may have been stored. Remove this.
-
-                   if(nodeHalo1st(ii)%nPeriodicSubfaces > 0) then
-                      deallocate(nodeHalo1st(ii)%periodicSubfaces, stat=ierr)
-                      if(ierr /= 0)                              &
-                           call terminate("determineFaceHalos",     &
-                           "Deallocation failure for &
-                           &periodicSubfaces")
-                      nullify(nodeHalo1st(ii)%periodicSubfaces)
-                      nodeHalo1st(ii)%nPeriodicSubfaces = 0
-                   endif
-
-                   ! If the subface is periodic store the periodic info.
-
-                   if(indexPeriodic > 0) then
-                      nodeHalo1st(ii)%nPeriodicSubfaces = 1
-                      allocate(nodeHalo1st(ii)%periodicSubfaces(1), &
-                           stat=ierr)
-                      if(ierr /= 0)                                   &
-                           call terminate("determineFaceHalos",          &
-                           "Memory allocation failure for &
-                           &periodicSubfaces")
-                      nodeHalo1st(ii)%periodicSubfaces(1) = indexPeriodic
-                   endif
-
-                enddo
-             enddo
-          enddo
-          !
-          !           The cell halo's belonging to this subface. Direct cell
-          !           halo's are unique and therefore info cannot already be
-          !           written earlier.
-          !
-          ! First determine the cell range of the donor block on
-          ! the subface. This equals the nodal range, except that 1 is
-          ! added to the smallest index. As it is possible that the
-          ! index is running negatively, this should be taken into account.
-
-          donorCellRange(1,1) = dinBeg(mm)
-          donorCellRange(2,1) = djnBeg(mm)
-          donorCellRange(3,1) = dknBeg(mm)
-
-          donorCellRange(1,2) = dinEnd(mm)
-          donorCellRange(2,2) = djnEnd(mm)
-          donorCellRange(3,2) = dknEnd(mm)
-
-          ! The loop to add 1 to the lowest index and to correct the
-          ! index corresponding to the face we are on.
-
-          do i=1,3
-             if(donorCellRange(i,1) == donorCellRange(i,2)) then
-
-                ! If the face corresponds to a min face, indicated by
-                ! donorCellRange(i,1) == 1 then 1 must be added;
-                ! otherwise nothing needs to be done.
-
-                if(donorCellRange(i,1) == 1) then
-                   donorCellRange(i,1) = 2
-                   donorCellRange(i,2) = 2
-                endif
-
-             else if(donorCellRange(i,1) > donorCellRange(i,2)) then
-                donorCellRange(i,2) = donorCellRange(i,2) + 1
-             else
-                donorCellRange(i,1) = donorCellRange(i,1) + 1
-             endif
-          enddo
-
-          ! Loop over the halo cells located adjacent to the subface.
-
-          do k=myCellRange(3,1),myCellRange(3,2),step(3)
-             do j=myCellRange(2,1),myCellRange(2,2),step(2)
-                do i=myCellRange(1,1),myCellRange(1,2),step(1)
-
-                   ! Check in debug mode whether this halo is already
-                   ! stored. This should not be the case.
-
-                   if( debug ) then
-                      if(cellIndex(nn)%entryList(i,j,k) /= 0) &
-                           call terminate("determineFaceHalos",  &
-                           "internal cell halo already stored")
-                   endif
-
-                   ! Determine the indices of the donor point by applying
-                   ! the transformation matrix to i,j,k.
-
-                   ii = i - myCellRange(1,1)
-                   jj = j - myCellRange(2,1)
-                   kk = k - myCellRange(3,1)
-
-                   iD = donorCellRange(1,1) &
-                        + trMat(1,1)*ii + trMat(1,2)*jj + trMat(1,3)*kk
-                   jD = donorCellRange(2,1) &
-                        + trMat(2,1)*ii + trMat(2,2)*jj + trMat(2,3)*kk
-                   kD = donorCellRange(3,1) &
-                        + trMat(3,1)*ii + trMat(3,2)*jj + trMat(3,3)*kk
-
-                   ! Update the counter iicell1st and store its value a
-                   ! bit easier in ii and set entryList accordingly.
-
-                   iicell1st = iicell1st +1
-                   ii        = iicell1st
-
-                   cellIndex(nn)%entryList(i,j,k) = ii
-
-                   ! Store the info in the correct place in cellHalo1st.
-
-                   cellHalo1st(ii)%myBlock = nn
-                   cellHalo1st(ii)%myI     = i
-                   cellHalo1st(ii)%myJ     = j
-                   cellHalo1st(ii)%myK     = k
-
-                   cellHalo1st(ii)%donorProc  = neighProc(mm)
-                   cellHalo1st(ii)%donorBlock = neighBlock(mm)
-
-                   cellHalo1st(ii)%dI = iD
-                   cellHalo1st(ii)%dJ = jD
-                   cellHalo1st(ii)%dK = kD
-
-                   ! Store the short hand of the transformation matrix
-                   ! for this halo.
-
-                   transformCell(ii,1) = l1(mm)
-                   transformCell(ii,2) = l2(mm)
-                   transformCell(ii,3) = l3(mm)
-
-                   ! If the subface is periodic store the periodic info.
-                   ! Note that for the cells it is not needed to check
-                   ! if a previous transformation was already stored,
-                   ! because cell halo's are unique.
-
-                   if(indexPeriodic > 0) then
-                      cellHalo1st(ii)%nPeriodicSubfaces = 1
-                      allocate(cellHalo1st(ii)%periodicSubfaces(1), &
-                           stat=ierr)
-                      if(ierr /= 0)                                   &
-                           call terminate("determineFaceHalos",          &
-                           "Memory allocation failure for &
-                           &periodicSubfaces")
-                      cellHalo1st(ii)%periodicSubfaces(1) = indexPeriodic
-                   endif
-
-                enddo
-             enddo
-          enddo
-
-       enddo n1to1Loop
-
-    enddo domains
-
-  end subroutine determineFaceHalos
-
-  !      ==================================================================
-
-  subroutine haloRanges(mm, offset, cellRange, nodeRange, step)
-    !
-    !       haloRanges determines the cell and nodal ranges for the given
-    !       subface as well as the direction normal to the subface,
-    !       pointing outwards. In case of negative running indices of the
-    !       subface, step is set to -1; otherwise it is 1.
-
-    use constants
-    use blockPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: mm
-    integer(kind=intType), dimension(3), intent(out) :: offset, step
-    integer(kind=intType), dimension(3,2), intent(out) :: cellRange
-    integer(kind=intType), dimension(3,2), intent(out) :: nodeRange
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i
-    integer(kind=intType) :: cellHaloInd, cellHaloID
-
-    ! Determine the offset in i, j and k direction, depending on the
-    ! faceID. This can be interpreted as the outward pointing normal
-    ! in the index domain. Furthermore store cellHaloInd and
-    ! cellHaloID. This info is needed to construct the cell halo
-    ! range correctly.
-
-    offset = 0
-
-    select case (BCFaceID(mm))
-    case (iMin)
-       offset(1)   = -1
-       cellHaloInd =  1
-       cellHaloID  =  1
-    case (iMax)
-       offset(1)   =  1
-       cellHaloInd =  1
-       cellHaloID  =  ie
-    case (jMin)
-       offset(2)   = -1
-       cellHaloInd =  2
-       cellHaloID  =  1
-    case (jMax)
-       offset(2)   =  1
-       cellHaloInd =  2
-       cellHaloID  =  je
-    case (kMin)
-       offset(3)   = -1
-       cellHaloInd =  3
-       cellHaloID  =  1
-    case (kMax)
-       offset(3)   =  1
-       cellHaloInd =  3
-       cellHaloID  =  ke
-    end select
-
-    ! Copy the nodal range.
-
-    nodeRange(1,1) = inBeg(mm)
-    nodeRange(2,1) = jnBeg(mm)
-    nodeRange(3,1) = knBeg(mm)
-
-    nodeRange(1,2) = inEnd(mm)
-    nodeRange(2,2) = jnEnd(mm)
-    nodeRange(3,2) = knEnd(mm)
-
-    ! Determine the cell range. The cell numbering of a block starts
-    ! at index 2, i.e. 1 higher than the node numbering. Consequently
-    ! 1 must be added to the smallest indices of the nodal range.
-    ! Take negative running indices into account and set step
-    ! accordingly.
-
-    do i=1,3
-       if(nodeRange(i,1) > nodeRange(i,2)) then
-
-          ! Negative running index.
-
-          step(i)        = -1
-          cellRange(i,1) = nodeRange(i,1)
-          cellRange(i,2) = nodeRange(i,2) + 1
-       else
-
-          ! Positive running index.
-
-          step(i)        = 1
-          cellRange(i,1) = nodeRange(i,1) + 1
-          cellRange(i,2) = nodeRange(i,2)
-       endif
-    enddo
-
-    ! Correct the cell range for the index corresponding to the face
-    ! we are on.
-
-    cellRange(cellHaloInd,1) = cellHaloID
-    cellRange(cellHaloInd,2) = cellHaloID
-
-  end subroutine haloRanges
-
-
-  subroutine init2ndLevelCellHalos
-    !
-    !       init2ndLevelCellHalos initializes the 2nd level cell halo
-    !       list. Basically the 1st level cell halo list is copied and the
-    !       counter iicell2nd is set to nCellHalo1st. This means that
-    !       the 2nd level cell halo's are appended to the first level
-    !       halo's. They are stored in a separate list, because the
-    !       communication pattern of the 2nd level halo's is separate from
-    !       the 1st level halo's. Efficiency is the reason to do this; it
-    !       is more efficient to send one big message than two smaller
-    !       ones.
-    !
-    use haloList
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    integer(kind=intType) :: i, j, jj
-
-    ! Allocate the memory for the 2nd level halo list.
-
-    allocate(cellHalo2nd(nCellHalo2nd), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("init2ndLevelCellHalos", &
-         "Memory allocation failure for cellHalo2nd")
-
-    ! Initialize iicell2nd to nCellHalo1st.
-
-    iiCell2nd = nCellHalo1st
-
-    ! Copy the information from the 1st level cell halo's into the
-    ! second level cell halo list. Make sure to make a deep copy.
-
-    do i=1,nCellHalo1st
-       cellHalo2nd(i)%myBlock    = cellHalo1st(i)%myBlock
-       cellHalo2nd(i)%myI        = cellHalo1st(i)%myI
-       cellHalo2nd(i)%myJ        = cellHalo1st(i)%myJ
-       cellHalo2nd(i)%myK        = cellHalo1st(i)%myK
-       cellHalo2nd(i)%donorProc  = cellHalo1st(i)%donorProc
-       cellHalo2nd(i)%donorBlock = cellHalo1st(i)%donorBlock
-       cellHalo2nd(i)%dI         = cellHalo1st(i)%dI
-       cellHalo2nd(i)%dJ         = cellHalo1st(i)%dJ
-       cellHalo2nd(i)%dK         = cellHalo1st(i)%dK
-       cellHalo2nd(i)%levOfInd   = cellHalo1st(i)%levOfInd
-
-       nullify(cellHalo2nd(i)%interp)
-
-       cellHalo2nd(i)%nPeriodicSubfaces = &
-            cellHalo1st(i)%nPeriodicSubfaces
-
-       if(cellHalo2nd(i)%nPeriodicSubfaces > 0) then
-          jj = cellHalo2nd(i)%nPeriodicSubfaces
-          allocate(cellHalo2nd(i)%periodicSubfaces(jj), stat=ierr)
-          if(ierr /= 0)                             &
-               call terminate("init2ndLevelCellHalos", &
-               "Memory allocation failure for &
-               &periodicSubfaces")
-          do j=1,jj
-             cellHalo2nd(i)%periodicSubfaces(j) = &
-                  cellHalo1st(i)%periodicSubfaces(j)
-          enddo
-       else
-          nullify(cellHalo2nd(i)%periodicSubfaces)
-       endif
-    enddo
-
-    ! Initialize the level of indirectness for the rest of the list
-    ! to 0 and initialize the periodic data to 0 as well.
-
-    do i=(nCellHalo1st+1), nCellHalo2nd
-       cellHalo2nd(i)%levOfInd          = 0
-       cellHalo2nd(i)%nPeriodicSubfaces = 0
-       nullify(cellHalo2nd(i)%periodicSubfaces)
-       nullify(cellHalo2nd(i)%interp)
-    enddo
-
-  end subroutine init2ndLevelCellHalos
-
-  subroutine qsortHaloListType(arr, nn)
-    !
-    !       qsortHaloListType sorts the given number of halo's in
-    !       increasing order based on the <= operator for this derived
-    !       data type.
-    !
-    use haloList
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
-
-    type(haloListType), dimension(*), intent(inout) :: arr
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), parameter :: m = 7
-
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
-
-    integer :: ierr
-
-    type(haloListType) :: a, tmp
-
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
-
-    ! Allocate the memory for stack.
-
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("qsortHaloListType", &
-         "Memory allocation failure for stack")
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-    ! Initialize the variables that control the sorting.
+                ! The innermost loop
 
-    jStack = 0
-    l      = 1
-    r      = nn
-
-    ! Start of the algorithm
+                do
 
-    do
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-       ! Check for the size of the subarray.
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-       if((r-l) < m) then
+                    ! Exit the loop in case the pointers i and j crossed.
 
-          ! Perform insertion sort
-
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+                    if (j < i) exit
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+                    ! Swap the element i and j.
 
-          if(jStack == 0) exit
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-          ! Pop stack and begin a new round of partitioning.
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
-
-       else
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a.
-          ! Also rearrange so that (l) <= (l+1) <= (r).
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
 
-          k = (l+r)/2
-          tmp      = arr(k)             ! Swap the elements
-          arr(k)   = arr(l+1)           ! k and l+1.
-          arr(l+1) = tmp
+                jStack = jStack + 2
+                if (jStack > nStack) then
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+                    ! Storage of the stack is too small. Reallocate.
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortHaloListType", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-          ! Initialize the pointers for partitioning.
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortHaloListType", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
 
-          ! The innermost loop
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortHaloListType", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
 
-          do
+                    ! And finally release the memory of tmpStack.
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortHaloListType", &
+                                       "Deallocation error for tmpStack")
+                end if
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
+
+            end if
+        end do
+
+        ! Release the memory of stack.
+
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortHaloListType", &
+                           "Deallocation error for stack")
+
+        ! Check in debug mode whether the array is really sorted.
+
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortHaloListType", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
+
+    end subroutine qsortHaloListType
+
+    subroutine finalCommStructures(entityHalo, nHalo, commPattern, &
+                                   internalComm, nInterp)
+        !
+        !       FinalCommStructures determines the communication data
+        !       structures used in the flow solver, commPattern and
+        !       internalComm, from the given haloList, entityHalo.
+        !
+        use communication
+        use haloList
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nHalo
+        type(haloListType), dimension(*), intent(in) :: entityHalo
+
+        type(commType), intent(out) :: commPattern
+        type(internalCommType), intent(out) :: internalComm
+
+        integer(kind=intType), intent(in) :: nInterp
+        !
+        !      Local variables.
+        !
+        integer :: ierr, count, dest, source, sizeRecv
+
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: i, j
+        integer(kind=intType) :: ii, jj, kk, ll, mm, nn, pp
+        integer(kind=intType) :: sizeBuffer
+
+        integer(kind=intType), dimension(:), allocatable :: nHaloPerProc
+        integer(kind=intType), dimension(:), allocatable :: sendInfo
+
+        integer(kind=intType), dimension(:, :), allocatable :: buffer
+        integer(kind=intType), dimension(:, :), allocatable :: recvBuf
+
+        real(kind=realType), dimension(:, :), allocatable :: bufInt
+        real(kind=realType), dimension(:, :), allocatable :: recvBufInt
+
+        ! Allocate the memory for nHaloPerProc and initialize its
+        ! values to 0. Boundary halo's are stored with a processor id -1
+        ! and the array will be put in cumulative storage format later,
+        ! which explains the boundaries in the allocation.
+        ! Also allocate the memory for sendInfo, which is needed in the
+        ! all to all communication call.
+
+        allocate (nHaloPerProc(0:nProc), sendInfo(0:nProc - 1), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("finalCommStructures", &
+             "Memory allocation error for nHaloPerProc &
+             &and sendInfo")
+
+        nHaloPerProc = 0
+
+        ! Loop over the number of halo's and determine the nHaloPerProc.
+        ! Note that the boundary conditions are stored in entityHalo with
+        ! a processor id -1.
+
+        do i = 1, nHalo
+            ii = entityHalo(i)%donorProc + 1
+            nHaloPerProc(ii) = nHaloPerProc(ii) + 1
+        end do
+
+        ! Perform an all to all communication, such that the processors
+        ! know how many messages will be received as well as their size.
+
+        call mpi_alltoall(nHaloPerProc(1), 1, adflow_integer, &
+                          sendInfo(0), 1, adflow_integer, &
+                          ADflow_comm_world, ierr)
+
+        ! Allocate the memory for indexRecvProc, the index in the
+        ! receive info for a certain processor. Initialize these value
+        ! to 0, indicating that nothing is received from a processor.
+
+        allocate (commPattern%indexRecvProc(0:nProc - 1), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("finalCommStructures", &
+                           "Memory allocation error for indexRecvProc")
+
+        do i = 0, (nProc - 1)
+            commPattern%indexRecvProc(i) = 0
+        end do
+
+        ! Determine the number of processors from which i receive data.
+        ! Receive data here means when an actual exchange takes place.
+        ! Furthermore determine the size of the buffer for the
+        ! nonblocking sends.
+
+        commPattern%nProcRecv = 0
+        sizeBuffer = 0
+        do i = 0, (nProc - 1)
+            ii = i + 1
+            if (nHaloPerProc(ii) > 0 .and. i /= myId) then
+                commPattern%nProcRecv = commPattern%nProcRecv + 1
+                commPattern%indexRecvProc(i) = commPattern%nProcRecv
+
+                sizeBuffer = sizeBuffer + nHaloPerProc(ii)
+            end if
+        end do
+
+        ! Allocate the memory for recvProc, nrecv, nrecvCum
+        ! and recvList.
+
+        ii = commPattern%nProcRecv
+        allocate (commPattern%recvProc(ii), &
+                  commPattern%nrecv(ii), &
+                  commPattern%nrecvCum(0:ii), &
+                  commPattern%recvList(ii), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("finalCommStructures", &
+                           "Memory allocation error for recvProc, etc")
+
+        ! Allocate memory for buffers, needed for the nonblocking sends.
+
+        allocate (buffer(4, sizeBuffer), bufInt(ninterp, sizeBuffer), &
+                  stat=ierr)
+        if (ierr /= 0) &
+            call terminate("finalCommStructures", &
+                           "Memory allocation error for buffer, bufInt.")
 
-             ! Exit the loop in case the pointers i and j crossed.
+        ! Repeat the loop over the processors, but now store receive info.
+        ! Jj is the counter for the current index in the receive processors
+        ! and mm the counter in the buffer used to send the halo info.
 
-             if(j < i) exit
+        jj = 0
+        mm = 1
+        commPattern%nrecvCum(0) = 0
+        do i = 0, (nProc - 1)
+            ii = i + 1
+            if (nHaloPerProc(ii) > 0 .and. i /= myId) then
 
-             ! Swap the element i and j.
-
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+                ! Update the counter jj, store the number of halo's a bit
+                ! easier in kk and set recvProc and nrecv.
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
-
-          arr(l+1) = arr(j)
-          arr(j)   = a
-
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
-
-          jStack = jStack + 2
-          if(jStack > nStack) then
-
-             ! Storage of the stack is too small. Reallocate.
-
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortHaloListType", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
-
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
-
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortHaloListType", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
-
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
-
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortHaloListType", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
-
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortHaloListType", &
-                  "Deallocation error for tmpStack")
-          endif
-
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
-
-       endif
-    enddo
-
-    ! Release the memory of stack.
-
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("qsortHaloListType", &
-         "Deallocation error for stack")
-
-    ! Check in debug mode whether the array is really sorted.
-
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                 &
-               call terminate("qsortHaloListType", &
-               "Array is not sorted correctly")
-       enddo
-    endif
-
-  end subroutine qsortHaloListType
-
-  subroutine finalCommStructures(entityHalo, nHalo, commPattern, &
-       internalComm, nInterp)
-    !
-    !       FinalCommStructures determines the communication data
-    !       structures used in the flow solver, commPattern and
-    !       internalComm, from the given haloList, entityHalo.
-    !
-    use communication
-    use haloList
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)            :: nHalo
-    type(haloListType), dimension(*), intent(in) :: entityHalo
-
-    type(commType),         intent(out) :: commPattern
-    type(internalCommType), intent(out) :: internalComm
-
-    integer(kind=intType), intent(in) :: nInterp
-    !
-    !      Local variables.
-    !
-    integer :: ierr, count, dest, source, sizeRecv
-
-    integer, dimension(mpi_status_size)  :: mpiStatus
-
-    integer(kind=intType) :: i, j
-    integer(kind=intType) :: ii, jj, kk, ll, mm, nn, pp
-    integer(kind=intType) :: sizeBuffer
-
-    integer(kind=intType), dimension(:), allocatable :: nHaloPerProc
-    integer(kind=intType), dimension(:), allocatable :: sendInfo
-
-    integer(kind=intType), dimension(:,:), allocatable :: buffer
-    integer(kind=intType), dimension(:,:), allocatable :: recvBuf
-
-    real(kind=realType), dimension(:,:), allocatable :: bufInt
-    real(kind=realType), dimension(:,:), allocatable :: recvBufInt
-
-    ! Allocate the memory for nHaloPerProc and initialize its
-    ! values to 0. Boundary halo's are stored with a processor id -1
-    ! and the array will be put in cumulative storage format later,
-    ! which explains the boundaries in the allocation.
-    ! Also allocate the memory for sendInfo, which is needed in the
-    ! all to all communication call.
-
-    allocate(nHaloPerProc(0:nProc), sendInfo(0:nProc-1), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("finalCommStructures", &
-         "Memory allocation error for nHaloPerProc &
-         &and sendInfo")
-
-    nHaloPerProc = 0
-
-    ! Loop over the number of halo's and determine the nHaloPerProc.
-    ! Note that the boundary conditions are stored in entityHalo with
-    ! a processor id -1.
-
-    do i=1,nHalo
-       ii = entityHalo(i)%donorProc + 1
-       nHaloPerProc(ii) = nHaloPerProc(ii) + 1
-    enddo
-
-    ! Perform an all to all communication, such that the processors
-    ! know how many messages will be received as well as their size.
-
-    call mpi_alltoall(nHaloPerProc(1), 1,  adflow_integer, &
-         sendInfo(0),      1, adflow_integer, &
-         ADflow_comm_world, ierr)
-
-    ! Allocate the memory for indexRecvProc, the index in the
-    ! receive info for a certain processor. Initialize these value
-    ! to 0, indicating that nothing is received from a processor.
-
-    allocate(commPattern%indexRecvProc(0:nProc-1), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("finalCommStructures", &
-         "Memory allocation error for indexRecvProc")
-
-    do i=0,(nProc-1)
-       commPattern%indexRecvProc(i) = 0
-    enddo
-
-    ! Determine the number of processors from which i receive data.
-    ! Receive data here means when an actual exchange takes place.
-    ! Furthermore determine the size of the buffer for the
-    ! nonblocking sends.
-
-    commPattern%nProcRecv = 0
-    sizeBuffer             = 0
-    do i=0,(nProc-1)
-       ii = i+1
-       if(nHaloPerProc(ii) > 0 .and. i /= myId) then
-          commPattern%nProcRecv         = commPattern%nProcRecv + 1
-          commPattern%indexRecvProc(i) = commPattern%nProcRecv
-
-          sizeBuffer = sizeBuffer + nHaloPerProc(ii)
-       endif
-    enddo
-
-    ! Allocate the memory for recvProc, nrecv, nrecvCum
-    ! and recvList.
-
-    ii = commPattern%nProcRecv
-    allocate(commPattern%recvProc(ii),   &
-         commPattern%nrecv(ii),       &
-         commPattern%nrecvCum(0:ii), &
-         commPattern%recvList(ii), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("finalCommStructures", &
-         "Memory allocation error for recvProc, etc")
+                jj = jj + 1
+                kk = nHaloPerProc(ii)
 
-    ! Allocate memory for buffers, needed for the nonblocking sends.
+                commPattern%recvProc(jj) = i
+                commPattern%nrecv(jj) = kk
+                commPattern%nrecvCum(jj) = commPattern%nrecvCum(jj - 1) + kk
 
-    allocate(buffer(4,sizeBuffer), bufInt(ninterp,sizeBuffer), &
-         stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("finalCommStructures", &
-         "Memory allocation error for buffer, bufInt.")
+                ! Allocate the memory for the receive list.
 
-    ! Repeat the loop over the processors, but now store receive info.
-    ! Jj is the counter for the current index in the receive processors
-    ! and mm the counter in the buffer used to send the halo info.
+                allocate (commPattern%recvList(jj)%block(kk), &
+                          commPattern%recvList(jj)%indices(kk, 3), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("finalCommStructures", &
+                     "Memory allocation failure for block and &
+                     &indices of recvlist")
 
-    jj = 0
-    mm = 1
-    commPattern%nrecvCum(0) = 0
-    do i=0,(nProc-1)
-       ii = i+1
-       if(nHaloPerProc(ii) > 0 .and. i /= myId) then
-
-          ! Update the counter jj, store the number of halo's a bit
-          ! easier in kk and set recvProc and nrecv.
-
-          jj = jj + 1
-          kk = nHaloPerProc(ii)
-
-          commPattern%recvProc(jj) = i
-          commPattern%nrecv(jj)     = kk
-          commPattern%nrecvCum(jj) = commPattern%nrecvCum(jj-1) + kk
-
-          ! Allocate the memory for the receive list.
+                ! Copy the halo info in the receive list of commPattern.
 
-          allocate(commPattern%recvList(jj)%block(kk),     &
-               commPattern%recvList(jj)%indices(kk,3), &
-               stat=ierr)
-          if(ierr /= 0)                             &
-               call terminate("finalCommStructures", &
-               "Memory allocation failure for block and &
-               &indices of recvlist")
+                do j = 1, kk
+                    ll = j + nHaloPerProc(i)
 
-          ! Copy the halo info in the receive list of commPattern.
+                    commPattern%recvList(jj)%block(j) = entityHalo(ll)%myBlock
 
-          do j=1,kk
-             ll = j + nHaloPerProc(i)
+                    commPattern%recvList(jj)%indices(j, 1) = entityHalo(ll)%myI
+                    commPattern%recvList(jj)%indices(j, 2) = entityHalo(ll)%myJ
+                    commPattern%recvList(jj)%indices(j, 3) = entityHalo(ll)%myK
+                end do
 
-             commPattern%recvList(jj)%block(j) = entityHalo(ll)%myBlock
+                ! Copy the donor info in the buffer and send it to the
+                ! appropriate processor.
 
-             commPattern%recvList(jj)%indices(j,1) = entityHalo(ll)%myI
-             commPattern%recvList(jj)%indices(j,2) = entityHalo(ll)%myJ
-             commPattern%recvList(jj)%indices(j,3) = entityHalo(ll)%myK
-          enddo
+                nn = mm - 1
+                do j = 1, kk
+                    ll = j + nHaloPerProc(i)
+                    nn = nn + 1
 
-          ! Copy the donor info in the buffer and send it to the
-          ! appropriate processor.
+                    buffer(1, nn) = entityHalo(ll)%donorBlock
+                    buffer(2, nn) = entityHalo(ll)%dI
+                    buffer(3, nn) = entityHalo(ll)%dJ
+                    buffer(4, nn) = entityHalo(ll)%dK
 
-          nn = mm - 1
-          do j=1,kk
-             ll = j + nHaloPerProc(i)
-             nn = nn +1
-
-             buffer(1,nn) = entityHalo(ll)%donorBlock
-             buffer(2,nn) = entityHalo(ll)%dI
-             buffer(3,nn) = entityHalo(ll)%dJ
-             buffer(4,nn) = entityHalo(ll)%dK
-
-             do pp = 1,ninterp
-                bufInt(pp,nn) = entityHalo(ll)%interp(pp)
-             end do
-          enddo
+                    do pp = 1, ninterp
+                        bufInt(pp, nn) = entityHalo(ll)%interp(pp)
+                    end do
+                end do
 
-          ! Copy some values to be sure integer data is used in the
-          ! MPI call.
-
-          count = 4*kk
-          dest  = i
-
-          ! And send the stuff.
-
-          call mpi_isend(buffer(1,mm), count, adflow_integer, &
-               dest, dest+2, ADflow_comm_world,     &
-               sendRequests(jj), ierr)
+                ! Copy some values to be sure integer data is used in the
+                ! MPI call.
 
-          ! Now send the interpolants, if any are being exchanged.
+                count = 4 * kk
+                dest = i
 
-          if(ninterp > 0) then
-             count = nInterp*kk
-             call mpi_isend(bufInt(1,mm), count, adflow_real, &
-                  dest, dest+3, ADflow_comm_world,  &
-                  recvRequests(jj), ierr)
-          end if
-
-          ! Update mm to the index in buffer for the next processor.
-
-          mm = mm + kk
-       endif
+                ! And send the stuff.
 
-       ! Put nHaloPerProc in cumulative storage format.
+                call mpi_isend(buffer(1, mm), count, adflow_integer, &
+                               dest, dest + 2, ADflow_comm_world, &
+                               sendRequests(jj), ierr)
 
-       nHaloPerProc(ii) = nHaloPerProc(ii) + nHaloPerProc(i)
-    enddo
+                ! Now send the interpolants, if any are being exchanged.
 
-    ! Determine the number of internal memory to memory copies and
-    ! allocate the memory for it.
-
-    ii = nHaloPerProc(myId+1) - nHaloPerProc(myId)
-    internalComm%ncopy = ii
-
-    allocate(internalComm%donorBlock(ii),          &
-         internalComm%donorIndices(ii,3),      &
-         internalComm%donorInterp(ii,ninterp), &
-         internalComm%haloBlock(ii),           &
-         internalComm%haloIndices(ii,3), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("finalCommStructures", &
-         "Memory allocation failure for internalComm")
-
-    ! Copy the info from the halo list.
+                if (ninterp > 0) then
+                    count = nInterp * kk
+                    call mpi_isend(bufInt(1, mm), count, adflow_real, &
+                                   dest, dest + 3, ADflow_comm_world, &
+                                   recvRequests(jj), ierr)
+                end if
 
-    do i=1,internalComm%ncopy
-       ii = i + nHaloPerProc(myId)
+                ! Update mm to the index in buffer for the next processor.
 
-       internalComm%donorBlock(i) = entityHalo(ii)%donorBlock
-       internalComm%haloBlock(i)  = entityHalo(ii)%myBlock
-
-       internalComm%donorIndices(i,1) = entityHalo(ii)%dI
-       internalComm%donorIndices(i,2) = entityHalo(ii)%dJ
-       internalComm%donorIndices(i,3) = entityHalo(ii)%dK
-
-       do pp = 1,ninterp
-          internalComm%donorInterp(i,pp) = entityHalo(ii)%interp(pp)
-       end do
-
-       internalComm%haloIndices(i,1) = entityHalo(ii)%myI
-       internalComm%haloIndices(i,2) = entityHalo(ii)%myJ
-       internalComm%haloIndices(i,3) = entityHalo(ii)%myK
-    enddo
+                mm = mm + kk
+            end if
 
-    ! Determine from the earlier MPI_alltoall call the processors
-    ! to whom i must send data in a normal data exchange. Also
-    ! determine the amount i must send.
+            ! Put nHaloPerProc in cumulative storage format.
 
-    ! First determine the number of processors to which i must send
-    ! data and the size of the buffer for receiving data.
-    ! Allocate and determine indexSendProc as well.
+            nHaloPerProc(ii) = nHaloPerProc(ii) + nHaloPerProc(i)
+        end do
 
-    allocate(commPattern%indexSendProc(0:nProc-1), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("finalCommStructures", &
-         "Memory allocation error for indexSendProc")
-
-    do i=0,(nProc-1)
-       commPattern%indexSendProc(i) = 0
-    enddo
-
-    commPattern%nProcSend = 0
-    sizeBuffer = 0
-
-    do i=0,(nProc-1)
-       if(sendInfo(i) > 0 .and. i /= myId) then
-          commPattern%nProcSend         = commPattern%nProcSend + 1
-          commPattern%indexSendProc(i) = commPattern%nProcSend
-
-          sizeBuffer = max(sizeBuffer,sendInfo(i))
-       endif
-    enddo
-
-    ! Allocate the memory for sendProc, nsend, nsendCum
-    ! and sendList
+        ! Determine the number of internal memory to memory copies and
+        ! allocate the memory for it.
 
-    ii = commPattern%nProcSend
-    allocate(commPattern%sendProc(ii),   &
-         commPattern%nsend(ii),       &
-         commPattern%nsendCum(0:ii), &
-         commPattern%sendList(ii),   stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("finalCommStructures", &
-         "Memory allocation error for sendProc, etc")
-
-    ! Repeat the loop over the number of processors, but now store
-    ! the sending info. Use ii as a counter.
-
-    ii = 0
-    commPattern%nsendCum(0) = 0
-    do i=0,(nProc-1)
-       if(sendInfo(i) > 0 .and. i /= myId) then
-          ii = ii + 1
-
-          commPattern%sendProc(ii) = i
-          commPattern%nsend(ii)     = sendInfo(i)
-          commPattern%nsendCum(ii) = commPattern%nsendCum(ii-1) &
-               + sendInfo(i)
-       endif
-    enddo
-
-    ! Allocate the memory for the send lists.
-
-    do i=1,commPattern%nProcSend
-       ii = commPattern%nsend(i)
-       allocate(commPattern%sendList(i)%block(ii),          &
-            commPattern%sendList(i)%indices(ii,3),      &
-            commPattern%sendList(i)%interp(ii,ninterp), &
-            stat=ierr)
-       if(ierr /= 0)                             &
+        ii = nHaloPerProc(myId + 1) - nHaloPerProc(myId)
+        internalComm%ncopy = ii
+
+        allocate (internalComm%donorBlock(ii), &
+                  internalComm%donorIndices(ii, 3), &
+                  internalComm%donorInterp(ii, ninterp), &
+                  internalComm%haloBlock(ii), &
+                  internalComm%haloIndices(ii, 3), stat=ierr)
+        if (ierr /= 0) &
             call terminate("finalCommStructures", &
-            "Memory allocation failure for block, interp, &
-            &and indices of sendlist")
-    enddo
+                           "Memory allocation failure for internalComm")
+
+        ! Copy the info from the halo list.
 
-    ! Allocate the memory for the receiving buffers.
+        do i = 1, internalComm%ncopy
+            ii = i + nHaloPerProc(myId)
 
-    allocate(recvBuf(4,sizeBuffer), &
-         recvBufInt(ninterp,sizeBuffer), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("finalCommStructures", &
-         "Memory allocation failure for recvBuffers")
+            internalComm%donorBlock(i) = entityHalo(ii)%donorBlock
+            internalComm%haloBlock(i) = entityHalo(ii)%myBlock
 
-    ! Loop over the number of processors to receive my send info.
+            internalComm%donorIndices(i, 1) = entityHalo(ii)%dI
+            internalComm%donorIndices(i, 2) = entityHalo(ii)%dJ
+            internalComm%donorIndices(i, 3) = entityHalo(ii)%dK
 
-    recvSendInfo: do i=1,commPattern%nProcSend
+            do pp = 1, ninterp
+                internalComm%donorInterp(i, pp) = entityHalo(ii)%interp(pp)
+            end do
 
-       ! Block until a message arrives.
+            internalComm%haloIndices(i, 1) = entityHalo(ii)%myI
+            internalComm%haloIndices(i, 2) = entityHalo(ii)%myJ
+            internalComm%haloIndices(i, 3) = entityHalo(ii)%myK
+        end do
 
-       call mpi_probe(mpi_any_source, myId+2, ADflow_comm_world, &
-            mpiStatus, ierr)
+        ! Determine from the earlier MPI_alltoall call the processors
+        ! to whom i must send data in a normal data exchange. Also
+        ! determine the amount i must send.
 
-       ! Store the source processor a bit easier and determine the
-       ! index in the send data structure.
-
-       source = mpiStatus(mpi_source)
-       ii     = commPattern%indexSendProc(source)
+        ! First determine the number of processors to which i must send
+        ! data and the size of the buffer for receiving data.
+        ! Allocate and determine indexSendProc as well.
 
-       ! Perform some checks in debug mode.
-
-       if( debug ) then
-          if(ii <= 0 .or. ii > commPattern%nProcSend) &
-               call terminate("finalCommStructures",     &
-               "Send processor not in the list")
+        allocate (commPattern%indexSendProc(0:nProc - 1), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("finalCommStructures", &
+                           "Memory allocation error for indexSendProc")
 
-          call mpi_get_count(mpiStatus, adflow_integer, sizeRecv, ierr)
-          if(sizeRecv /= 4*commPattern%nsend(ii)) &
-               call terminate("finalCommStructures",     &
-               "Unexpected size of message")
-       endif
+        do i = 0, (nProc - 1)
+            commPattern%indexSendProc(i) = 0
+        end do
 
-       ! Receive the message. As it has already arrived a blocking
-       ! receive can be used.
+        commPattern%nProcSend = 0
+        sizeBuffer = 0
 
-       sizeRecv = 4*commPattern%nsend(ii)
+        do i = 0, (nProc - 1)
+            if (sendInfo(i) > 0 .and. i /= myId) then
+                commPattern%nProcSend = commPattern%nProcSend + 1
+                commPattern%indexSendProc(i) = commPattern%nProcSend
 
-       call mpi_recv(recvBuf, sizeRecv, adflow_integer, source, &
-            myId+2, ADflow_comm_world, mpiStatus, ierr)
+                sizeBuffer = max(sizeBuffer, sendInfo(i))
+            end if
+        end do
 
-       ! Now receive the interpolants, if any.
+        ! Allocate the memory for sendProc, nsend, nsendCum
+        ! and sendList
 
-       if(ninterp > 0) then
-          sizeRecv = nInterp*commPattern%nsend(ii)
-          call mpi_recv(recvBufInt, sizeRecv, adflow_real, source, &
-               myId+3, ADflow_comm_world, mpiStatus, ierr)
-       end if
+        ii = commPattern%nProcSend
+        allocate (commPattern%sendProc(ii), &
+                  commPattern%nsend(ii), &
+                  commPattern%nsendCum(0:ii), &
+                  commPattern%sendList(ii), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("finalCommStructures", &
+                           "Memory allocation error for sendProc, etc")
+
+        ! Repeat the loop over the number of processors, but now store
+        ! the sending info. Use ii as a counter.
+
+        ii = 0
+        commPattern%nsendCum(0) = 0
+        do i = 0, (nProc - 1)
+            if (sendInfo(i) > 0 .and. i /= myId) then
+                ii = ii + 1
+
+                commPattern%sendProc(ii) = i
+                commPattern%nsend(ii) = sendInfo(i)
+                commPattern%nsendCum(ii) = commPattern%nsendCum(ii - 1) &
+                                           + sendInfo(i)
+            end if
+        end do
+
+        ! Allocate the memory for the send lists.
+
+        do i = 1, commPattern%nProcSend
+            ii = commPattern%nsend(i)
+            allocate (commPattern%sendList(i)%block(ii), &
+                      commPattern%sendList(i)%indices(ii, 3), &
+                      commPattern%sendList(i)%interp(ii, ninterp), &
+                      stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("finalCommStructures", &
+                 "Memory allocation failure for block, interp, &
+                 &and indices of sendlist")
+        end do
+
+        ! Allocate the memory for the receiving buffers.
+
+        allocate (recvBuf(4, sizeBuffer), &
+                  recvBufInt(ninterp, sizeBuffer), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("finalCommStructures", &
+                           "Memory allocation failure for recvBuffers")
 
-       ! Store the info I must send to this processor in a normal
-       ! data exchange.
+        ! Loop over the number of processors to receive my send info.
 
-       do j=1,commPattern%nsend(ii)
-          commPattern%sendList(ii)%block(j) = recvBuf(1,j)
+        recvSendInfo: do i = 1, commPattern%nProcSend
 
-          commPattern%sendList(ii)%indices(j,1) = recvBuf(2,j)
-          commPattern%sendList(ii)%indices(j,2) = recvBuf(3,j)
-          commPattern%sendList(ii)%indices(j,3) = recvBuf(4,j)
+            ! Block until a message arrives.
 
-          do pp = 1,ninterp
-             commPattern%sendList(ii)%interp(j,pp) = recvBufInt(pp,j)
-          end do
-       enddo
+            call mpi_probe(mpi_any_source, myId + 2, ADflow_comm_world, &
+                           mpiStatus, ierr)
 
-    enddo recvSendInfo
+            ! Store the source processor a bit easier and determine the
+            ! index in the send data structure.
 
-    ! Complete the nonblocking sends. Dest is used as a temporary
-    ! storage such that an integer is passed to the MPI call
+            source = mpiStatus(mpi_source)
+            ii = commPattern%indexSendProc(source)
 
-    dest = commPattern%nProcRecv
-    do i=1,commPattern%nProcRecv
-       call mpi_waitany(dest, sendRequests, count, mpiStatus, ierr)
-    enddo
+            ! Perform some checks in debug mode.
 
-    ! Repeat the call if any interpolants were exchanged.
+            if (debug) then
+                if (ii <= 0 .or. ii > commPattern%nProcSend) &
+                    call terminate("finalCommStructures", &
+                                   "Send processor not in the list")
 
-    if(ninterp > 0) then
-       do i=1,commPattern%nProcRecv
-          call mpi_waitany(dest, recvRequests, count, mpiStatus, ierr)
-       enddo
-    end if
+                call mpi_get_count(mpiStatus, adflow_integer, sizeRecv, ierr)
+                if (sizeRecv /= 4 * commPattern%nsend(ii)) &
+                    call terminate("finalCommStructures", &
+                                   "Unexpected size of message")
+            end if
 
-    ! Release the memory of the help variables allocated in this
-    ! subroutine.
+            ! Receive the message. As it has already arrived a blocking
+            ! receive can be used.
 
-    deallocate(nHaloPerProc, sendInfo, buffer, recvBuf, &
-         bufInt, recvBufInt, stat=ierr)
-    if(ierr /= 0) &
-         call terminate("finalCommStructures", &
-         "Deallocation error for nHaloPerProc, &
-         &sendInfo, and temporary buffers")
+            sizeRecv = 4 * commPattern%nsend(ii)
 
-  end subroutine finalCommStructures
+            call mpi_recv(recvBuf, sizeRecv, adflow_integer, source, &
+                          myId + 2, ADflow_comm_world, mpiStatus, ierr)
 
-  subroutine closestDirectHalos(entityHalo, entityIndex, &
-       start, nLevel, offset, gridLevel)
-    !
-    !       closestDirectHalos determines the number of indirect halo's
-    !       to be treated and its corresponding direct halo.
-    !
-    use block
-    use bcHalo
-    use haloList
-    use indirectHalo
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: start, nLevel, offset
-    integer(kind=intType), intent(in) :: gridLevel
+            ! Now receive the interpolants, if any.
 
-    type(haloListType), dimension(:), intent(in) :: entityHalo
-    type(indexListType), dimension(:), intent(in) :: entityIndex
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+            if (ninterp > 0) then
+                sizeRecv = nInterp * commPattern%nsend(ii)
+                call mpi_recv(recvBufInt, sizeRecv, adflow_real, source, &
+                              myId + 3, ADflow_comm_world, mpiStatus, ierr)
+            end if
 
-    integer(kind=intType) :: i, j, k, jj, nn, mm
-    integer(kind=intType) :: il, jl, kl
-    integer(kind=intType) :: iStart, jStart, kStart, iEnd, jEnd, kEnd
-    integer(kind=intType) :: nHaloDirI, nHaloDirJ, nHaloDirK
-    integer(kind=intType) :: iindHalo, levOfInd
+            ! Store the info I must send to this processor in a normal
+            ! data exchange.
 
-    integer(kind=intType), dimension(3) :: dir, haloDir, ii
+            do j = 1, commPattern%nsend(ii)
+                commPattern%sendList(ii)%block(j) = recvBuf(1, j)
 
-    type(bcHaloType), dimension(3) :: bcHalos
-    !
+                commPattern%sendList(ii)%indices(j, 1) = recvBuf(2, j)
+                commPattern%sendList(ii)%indices(j, 2) = recvBuf(3, j)
+                commPattern%sendList(ii)%indices(j, 3) = recvBuf(4, j)
 
-    ! Determine the number of indirect halo's for which the donor must
-    ! be determined. Initialize iindHalo, the actual counter, to 0.
+                do pp = 1, ninterp
+                    commPattern%sendList(ii)%interp(j, pp) = recvBufInt(pp, j)
+                end do
+            end do
 
-    nIndHalo = getNumberIndirectHalos(start, nLevel, offset, &
-         gridLevel)
-    iindHalo = 0
+        end do recvSendInfo
 
-    ! Allocate the memory for indHalo
+        ! Complete the nonblocking sends. Dest is used as a temporary
+        ! storage such that an integer is passed to the MPI call
 
-    allocate(indHalo(nIndHalo), stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("closestDirectHalos", &
-         "Memory allocation failure for indHalo")
+        dest = commPattern%nProcRecv
+        do i = 1, commPattern%nProcRecv
+            call mpi_waitany(dest, sendRequests, count, mpiStatus, ierr)
+        end do
 
-    ! Determine the lower bound for the block with halo's.
-    ! This is identical for all the blocks.
+        ! Repeat the call if any interpolants were exchanged.
 
-    iStart = start - nLevel
-    jStart = iStart
-    kStart = iStart
+        if (ninterp > 0) then
+            do i = 1, commPattern%nProcRecv
+                call mpi_waitany(dest, recvRequests, count, mpiStatus, ierr)
+            end do
+        end if
 
-    ! Loop over the blocks on this processor.
+        ! Release the memory of the help variables allocated in this
+        ! subroutine.
 
-    domains: do nn=1,nDom
+        deallocate (nHaloPerProc, sendInfo, buffer, recvBuf, &
+                    bufInt, recvBufInt, stat=ierr)
+        if (ierr /= 0) &
+             call terminate("finalCommStructures", &
+             "Deallocation error for nHaloPerProc, &
+             &sendInfo, and temporary buffers")
 
-       ! Store the number of nodes in the three directions of the
-       ! current block a bit easier.
+    end subroutine finalCommStructures
 
-       il = flowDoms(nn,gridLevel,1)%il
-       jl = flowDoms(nn,gridLevel,1)%jl
-       kl = flowDoms(nn,gridLevel,1)%kl
+    subroutine closestDirectHalos(entityHalo, entityIndex, &
+                                  start, nLevel, offset, gridLevel)
+        !
+        !       closestDirectHalos determines the number of indirect halo's
+        !       to be treated and its corresponding direct halo.
+        !
+        use block
+        use bcHalo
+        use haloList
+        use indirectHalo
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: start, nLevel, offset
+        integer(kind=intType), intent(in) :: gridLevel
 
-       ! Determine the upper boundaries of this block with halo's.
+        type(haloListType), dimension(:), intent(in) :: entityHalo
+        type(indexListType), dimension(:), intent(in) :: entityIndex
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-       iEnd = il + nLevel
-       jEnd = jl + nLevel
-       kEnd = kl + nLevel
+        integer(kind=intType) :: i, j, k, jj, nn, mm
+        integer(kind=intType) :: il, jl, kl
+        integer(kind=intType) :: iStart, jStart, kStart, iEnd, jEnd, kEnd
+        integer(kind=intType) :: nHaloDirI, nHaloDirJ, nHaloDirK
+        integer(kind=intType) :: iindHalo, levOfInd
 
-       ! Loop over all the entities of the full halo block.
+        integer(kind=intType), dimension(3) :: dir, haloDir, ii
 
-       iLoop: do i=iStart,iEnd
+        type(bcHaloType), dimension(3) :: bcHalos
+        !
 
-          ! Determine the halo in i-direction.
+        ! Determine the number of indirect halo's for which the donor must
+        ! be determined. Initialize iindHalo, the actual counter, to 0.
 
-          dir(1) = min(i-start,max(i-il,0_intType))
+        nIndHalo = getNumberIndirectHalos(start, nLevel, offset, &
+                                          gridLevel)
+        iindHalo = 0
 
-          if(dir(1) == 0) then
-             nHaloDirI = 0
-          else
-             nHaloDirI = 1
-             haloDir(1) = 1
-          endif
+        ! Allocate the memory for indHalo
 
-          jLoop: do j=jStart,jEnd
+        allocate (indHalo(nIndHalo), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("closestDirectHalos", &
+                           "Memory allocation failure for indHalo")
 
-             ! Determine the halo in j-direction.
+        ! Determine the lower bound for the block with halo's.
+        ! This is identical for all the blocks.
 
-             dir(2) = min(j-start,max(j-jl,0_intType))
+        iStart = start - nLevel
+        jStart = iStart
+        kStart = iStart
 
-             if(dir(2) == 0) then
-                nHaloDirJ = nHaloDirI
-             else
-                nHaloDirJ          = nHaloDirI + 1
-                haloDir(nHaloDirJ) = 2
-             endif
+        ! Loop over the blocks on this processor.
 
-             kLoop: do k=kStart,kEnd
+        domains: do nn = 1, nDom
 
-                ! Determine the halo in k-direction.
+            ! Store the number of nodes in the three directions of the
+            ! current block a bit easier.
 
-                dir(3) = min(k-start,max(k-kl,0_intType))
+            il = flowDoms(nn, gridLevel, 1)%il
+            jl = flowDoms(nn, gridLevel, 1)%jl
+            kl = flowDoms(nn, gridLevel, 1)%kl
 
-                if(dir(3) == 0) then
-                   nHaloDirK = nHaloDirJ
-                else
-                   nHaloDirK          = nHaloDirJ + 1
-                   haloDir(nHaloDirK) = 3
-                endif
+            ! Determine the upper boundaries of this block with halo's.
 
-                ! Determine the level of indirectness relative to the
-                ! block boundary. This is the sum of the absolute values
-                ! of dir.
+            iEnd = il + nLevel
+            jEnd = jl + nLevel
+            kEnd = kl + nLevel
 
-                levOfInd = abs(dir(1)) + abs(dir(2)) + abs(dir(3))
+            ! Loop over all the entities of the full halo block.
 
-                ! If levOfInd <= 1, this means that we are either dealing
-                ! with an owned entity of the block or a direct halo.
-                ! As this is not an indirect halo continue with the next
-                ! entity.
+            iLoop: do i = iStart, iEnd
 
-                if(levOfInd <= 1) cycle
+                ! Determine the halo in i-direction.
 
-                ! A distinction must now be made between 1st level halo's
-                ! and higher level halo's. In this code it is done such
-                ! that all first level halo's are determined first,
-                ! followed by the higher level halo's. The reasons is that
-                ! the communication pattern of the first level halo's is
-                ! stored separately. The distinction between both cases is
-                ! the value of offset. Offset == 0 means 1st level halo's;
-                ! offset > 0 higher level halo's. Anyway, the level of
-                ! indirect halo's for 1st level halo's is either 2 or 3.
-                ! They can be distinguished by higher level halo's by the
-                ! fact that their number of halo directions equal their
-                ! level of indirectness. So continue with the next halo
-                ! if we are dealing with such a case here.
+                dir(1) = min(i - start, max(i - il, 0_intType))
 
-                if(offset > 0 .and. nHaloDirK == levOfInd) cycle
+                if (dir(1) == 0) then
+                    nHaloDirI = 0
+                else
+                    nHaloDirI = 1
+                    haloDir(1) = 1
+                end if
+
+                jLoop: do j = jStart, jEnd
+
+                    ! Determine the halo in j-direction.
 
-                ! This indirect halo should be stored in the indHalo.
-                ! Independent of the choice of the corresponding direct
-                ! halo quite a bit of info can already be set. Update the
-                ! counter iindHalo and do this.
-                ! Note that 1 is substracted from levOfInd, because
-                ! in the other routines levOfInd is the level of
-                ! indirectness of the halo's and not the distance to
-                ! the nearest owned entity. The difference is 1.
+                    dir(2) = min(j - start, max(j - jl, 0_intType))
 
-                iindHalo = iindHalo +1
+                    if (dir(2) == 0) then
+                        nHaloDirJ = nHaloDirI
+                    else
+                        nHaloDirJ = nHaloDirI + 1
+                        haloDir(nHaloDirJ) = 2
+                    end if
 
-                indHalo(iindHalo)%myBlock = nn
+                    kLoop: do k = kStart, kEnd
 
-                indHalo(iindHalo)%myI = i
-                indHalo(iindHalo)%myJ = j
-                indHalo(iindHalo)%myK = k
+                        ! Determine the halo in k-direction.
 
-                indHalo(iindHalo)%levOfInd = levOfInd -1
+                        dir(3) = min(k - start, max(k - kl, 0_intType))
 
-                ! The number of possibilities for the corresponding direct
-                ! halo are nHaloDirK. Therefore loop over the number of
-                ! possibilities and take an internal halo if possible. If
-                ! several options exist, just take one. If all nearest
-                ! direct halo's are boundary halo's then this indirect halo
-                ! will also be a boundary halo.
+                        if (dir(3) == 0) then
+                            nHaloDirK = nHaloDirJ
+                        else
+                            nHaloDirK = nHaloDirJ + 1
+                            haloDir(nHaloDirK) = 3
+                        end if
 
-                do mm=1,nHaloDirK
+                        ! Determine the level of indirectness relative to the
+                        ! block boundary. This is the sum of the absolute values
+                        ! of dir.
 
-                   ! Determine the corresponding direct halo. Note that in
-                   ! dir the direction from the nearest owned entity is stored.
-                   ! Consequently 1 must be added/substracted from this
-                   ! direction in the coordinate direction specified by
-                   ! haloDir(mm) to obtain the direct halo.
+                        levOfInd = abs(dir(1)) + abs(dir(2)) + abs(dir(3))
 
-                   ii(1) = i - dir(1)
-                   ii(2) = j - dir(2)
-                   ii(3) = k - dir(3)
+                        ! If levOfInd <= 1, this means that we are either dealing
+                        ! with an owned entity of the block or a direct halo.
+                        ! As this is not an indirect halo continue with the next
+                        ! entity.
 
-                   ii(haloDir(mm)) = ii(haloDir(mm)) &
-                        + sign(1_intType, dir(haloDir(mm)))
+                        if (levOfInd <= 1) cycle
 
-                   ! Store the index of the direct halo in entityHalo
-                   ! a bit easier.
+                        ! A distinction must now be made between 1st level halo's
+                        ! and higher level halo's. In this code it is done such
+                        ! that all first level halo's are determined first,
+                        ! followed by the higher level halo's. The reasons is that
+                        ! the communication pattern of the first level halo's is
+                        ! stored separately. The distinction between both cases is
+                        ! the value of offset. Offset == 0 means 1st level halo's;
+                        ! offset > 0 higher level halo's. Anyway, the level of
+                        ! indirect halo's for 1st level halo's is either 2 or 3.
+                        ! They can be distinguished by higher level halo's by the
+                        ! fact that their number of halo directions equal their
+                        ! level of indirectness. So continue with the next halo
+                        ! if we are dealing with such a case here.
 
-                   jj = entityIndex(nn)%entryList(ii(1),ii(2),ii(3))
+                        if (offset > 0 .and. nHaloDirK == levOfInd) cycle
 
-                   ! Copy the data in bcHalos for possible later use.
-                   ! Remember that donorBlock contains the boundary
-                   ! condition in case jj is a boundary halo.
+                        ! This indirect halo should be stored in the indHalo.
+                        ! Independent of the choice of the corresponding direct
+                        ! halo quite a bit of info can already be set. Update the
+                        ! counter iindHalo and do this.
+                        ! Note that 1 is substracted from levOfInd, because
+                        ! in the other routines levOfInd is the level of
+                        ! indirectness of the halo's and not the distance to
+                        ! the nearest owned entity. The difference is 1.
 
-                   bcHalos(mm)%directHalo = jj
-                   bcHalos(mm)%bc         = entityHalo(jj)%donorBlock
+                        iindHalo = iindHalo + 1
 
-                   ! Check if jj > 0. If not this means that the halo information
-                   ! in the cgns file is not correct.
+                        indHalo(iindHalo)%myBlock = nn
 
-                   if(jj == 0)                            &
-                        call terminate("closestDirectHalos", &
-                        "Closest direct halo not in halo &
-                        &list. Something wrong with BC info?")
+                        indHalo(iindHalo)%myI = i
+                        indHalo(iindHalo)%myJ = j
+                        indHalo(iindHalo)%myK = k
 
-                   if(entityHalo(jj)%donorProc >= 0) then
+                        indHalo(iindHalo)%levOfInd = levOfInd - 1
 
-                      ! Face halo is an internal block boundary halo. Store
-                      ! the rest of the info in indHalo.
+                        ! The number of possibilities for the corresponding direct
+                        ! halo are nHaloDirK. Therefore loop over the number of
+                        ! possibilities and take an internal halo if possible. If
+                        ! several options exist, just take one. If all nearest
+                        ! direct halo's are boundary halo's then this indirect halo
+                        ! will also be a boundary halo.
 
-                      indHalo(iindHalo)%myDirectHalo = jj
+                        do mm = 1, nHaloDirK
 
-                      indHalo(iindHalo)%donorProc = entityHalo(jj)%donorProc
+                            ! Determine the corresponding direct halo. Note that in
+                            ! dir the direction from the nearest owned entity is stored.
+                            ! Consequently 1 must be added/substracted from this
+                            ! direction in the coordinate direction specified by
+                            ! haloDir(mm) to obtain the direct halo.
 
-                      ! Direct halo has been set. Exit the loop.
+                            ii(1) = i - dir(1)
+                            ii(2) = j - dir(2)
+                            ii(3) = k - dir(3)
 
-                      exit
+                            ii(haloDir(mm)) = ii(haloDir(mm)) &
+                                              + sign(1_intType, dir(haloDir(mm)))
 
-                   endif
-                enddo
+                            ! Store the index of the direct halo in entityHalo
+                            ! a bit easier.
 
-                ! Check if a halo has been set. If not this is a boundary
-                ! halo. Set the direct halo with the most important
-                ! boundary condition.
+                            jj = entityIndex(nn)%entryList(ii(1), ii(2), ii(3))
 
-                if(mm > nHaloDirK) then
+                            ! Copy the data in bcHalos for possible later use.
+                            ! Remember that donorBlock contains the boundary
+                            ! condition in case jj is a boundary halo.
 
-                   ! Sort the boundary halo's in increasing order. The
-                   ! most important will be in position nHaloDirK after
-                   ! the sorting.
+                            bcHalos(mm)%directHalo = jj
+                            bcHalos(mm)%bc = entityHalo(jj)%donorBlock
 
-                   call sortBCHaloType(bcHalos, nHaloDirK)
+                            ! Check if jj > 0. If not this means that the halo information
+                            ! in the cgns file is not correct.
 
-                   ! Set the value of myDirectHalo and set donorProc
-                   ! to -1 to indicate a boundary halo.
+                            if (jj == 0) &
+                                 call terminate("closestDirectHalos", &
+                                 "Closest direct halo not in halo &
+                                 &list. Something wrong with BC info?")
 
-                   indHalo(iindHalo)%myDirectHalo = &
-                        bcHalos(nHaloDirK)%directHalo
-                   indHalo(iindHalo)%donorProc    = -1
-                endif
+                            if (entityHalo(jj)%donorProc >= 0) then
 
-             enddo kLoop
-          enddo jLoop
-       enddo iLoop
+                                ! Face halo is an internal block boundary halo. Store
+                                ! the rest of the info in indHalo.
 
-    enddo domains
+                                indHalo(iindHalo)%myDirectHalo = jj
 
-  end subroutine closestDirectHalos
+                                indHalo(iindHalo)%donorProc = entityHalo(jj)%donorProc
 
+                                ! Direct halo has been set. Exit the loop.
 
-  function getNumberIndirectHalos(start, nLevel, offset, gridLevel)
-    !
-    !       getNumberIndirectHalos determines the number of indirect
-    !       halo's for which the donor must be determined.
-    !
-    use block
-    implicit none
-    !
-    !      Function type
-    !
-    integer(kind=intType) :: getNumberIndirectHalos
-    !
-    !      Function arguments
-    !
-    integer(kind=intType), intent(in) :: start, nLevel, offset
-    integer(kind=intType), intent(in) :: gridLevel
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, il, jl, kl
-    integer(kind=intType) :: i, j, k, ii, jj, kk
+                                exit
 
-    ! Initialize getNumberIndirectHalos and loop over the blocks.
+                            end if
+                        end do
 
-    getNumberIndirectHalos = 0
+                        ! Check if a halo has been set. If not this is a boundary
+                        ! halo. Set the direct halo with the most important
+                        ! boundary condition.
 
-    domains: do nn=1,nDom
+                        if (mm > nHaloDirK) then
 
-       ! Store the number of nodes in the three directions of the
-       ! current block a bit easier.
+                            ! Sort the boundary halo's in increasing order. The
+                            ! most important will be in position nHaloDirK after
+                            ! the sorting.
 
-       il = flowDoms(nn,gridLevel,1)%il
-       jl = flowDoms(nn,gridLevel,1)%jl
-       kl = flowDoms(nn,gridLevel,1)%kl
+                            call sortBCHaloType(bcHalos, nHaloDirK)
 
-       ! Determine the size in every coordinate direction with and
-       ! without halo's
+                            ! Set the value of myDirectHalo and set donorProc
+                            ! to -1 to indicate a boundary halo.
 
-       i = il - start + 2*offset + 1
-       j = jl - start + 2*offset + 1
-       k = kl - start + 2*offset + 1
+                            indHalo(iindHalo)%myDirectHalo = &
+                                bcHalos(nHaloDirK)%directHalo
+                            indHalo(iindHalo)%donorProc = -1
+                        end if
 
-       ii = il - start + 2*nLevel + 1
-       jj = jl - start + 2*nLevel + 1
-       kk = kl - start + 2*nLevel + 1
+                    end do kLoop
+                end do jLoop
+            end do iLoop
 
-       ! Add the total number of halo's to getNumberIndirectHalos.
+        end do domains
 
-       getNumberIndirectHalos = getNumberIndirectHalos &
-            + ii*jj*kk - i*j*k
+    end subroutine closestDirectHalos
 
-       ! Substract the direct halo's. The convention is such that in
-       ! a first loop the first level halo's are determined,
-       ! offset == 0, and the higher level halo's in a next loop,
-       ! offset > 0. Only in the former case the direct halo's must
-       ! be sustracted.
+    function getNumberIndirectHalos(start, nLevel, offset, gridLevel)
+        !
+        !       getNumberIndirectHalos determines the number of indirect
+        !       halo's for which the donor must be determined.
+        !
+        use block
+        implicit none
+        !
+        !      Function type
+        !
+        integer(kind=intType) :: getNumberIndirectHalos
+        !
+        !      Function arguments
+        !
+        integer(kind=intType), intent(in) :: start, nLevel, offset
+        integer(kind=intType), intent(in) :: gridLevel
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, il, jl, kl
+        integer(kind=intType) :: i, j, k, ii, jj, kk
 
-       if(offset == 0) then
+        ! Initialize getNumberIndirectHalos and loop over the blocks.
 
-          ! Store the number of owned number of entities in i, j, k
+        getNumberIndirectHalos = 0
 
-          i = il - start + 1
-          j = jl - start + 1
-          k = kl - start + 1
+        domains: do nn = 1, nDom
 
-          ! Substract the direct halo's
+            ! Store the number of nodes in the three directions of the
+            ! current block a bit easier.
 
-          getNumberIndirectHalos = getNumberIndirectHalos &
-               - 2*(i*j + i*k + j*k)
-       endif
+            il = flowDoms(nn, gridLevel, 1)%il
+            jl = flowDoms(nn, gridLevel, 1)%jl
+            kl = flowDoms(nn, gridLevel, 1)%kl
 
-    enddo domains
+            ! Determine the size in every coordinate direction with and
+            ! without halo's
 
-  end function getNumberIndirectHalos
+            i = il - start + 2 * offset + 1
+            j = jl - start + 2 * offset + 1
+            k = kl - start + 2 * offset + 1
 
-  subroutine determineNumberOfHalos(level)
-    !
-    !       determineNumberOfHalos determines the amount of 1st and 2nd
-    !       level cell halo's as well as the number of 1st level node
-    !       halo's stored on this processor.
-    !
-    use blockPointers
-    use haloList
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, nn
+            ii = il - start + 2 * nLevel + 1
+            jj = jl - start + 2 * nLevel + 1
+            kk = kl - start + 2 * nLevel + 1
 
-    ! Initialize the amount of halo cells and nodes to 0.
-
-    nCellHalo1st = 0
-    nCellHalo2nd = 0
-    nNodeHalo1st = 0
-
-    ! Loop over the number of blocks stored on this processor.
-
-    do i=1,nDom
-
-       ! Set the pointers for this block. The halo construction is
-       ! the same for all time spectral solutions, so only the 1st
-       ! needs to be considered.
-
-       call setPointers(i,level,1_intType)
-
-       ! Determine the number of 1st level halo cells for this block
-       ! and add it to nCellHalo1st. Note the ie == nx + 2, etc.
+            ! Add the total number of halo's to getNumberIndirectHalos.
 
-       nn = nx*ny*nz
+            getNumberIndirectHalos = getNumberIndirectHalos &
+                                     + ii * jj * kk - i * j * k
 
-       nCellHalo1st = nCellHalo1st - nn + ie*je*ke
+            ! Substract the direct halo's. The convention is such that in
+            ! a first loop the first level halo's are determined,
+            ! offset == 0, and the higher level halo's in a next loop,
+            ! offset > 0. Only in the former case the direct halo's must
+            ! be sustracted.
 
-       ! Idem for the second level halo's. However there is no variable
-       ! which stores nx + 4, so it is computed.
+            if (offset == 0) then
 
-       nCellHalo2nd = nCellHalo2nd - nn + (nx+4)*(ny+4)*(nz+4)
+                ! Store the number of owned number of entities in i, j, k
 
-       ! Idem for the 1st level node halo's. Use is made of the fact
-       ! that ib == il + 2, etc.
+                i = il - start + 1
+                j = jl - start + 1
+                k = kl - start + 1
 
-       nNodeHalo1st = nNodeHalo1st + ib*jb*kb - il*jl*kl
-    enddo
+                ! Substract the direct halo's
 
-  end subroutine determineNumberOfHalos
+                getNumberIndirectHalos = getNumberIndirectHalos &
+                                         - 2 * (i * j + i * k + j * k)
+            end if
 
-  subroutine indirectHalosPerLevel(level, iihalo, entityHalo, &
-       transform, entityIndex)
-    !
-    !       indirectHalosPerLevel determines the donor cells for the
-    !       halo's of the given level of indirectness. From the known
-    !       appropriate direct halo and its donor, the corresponding cell
-    !       in the donor block is determined.
-    !
-    use haloList
-    use indirectHalo
-    use communication
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)    :: level
-    integer(kind=intType), intent(inout) :: iihalo
+        end do domains
 
-    integer(kind=intType), dimension(:,:), intent(in) :: transform
+    end function getNumberIndirectHalos
 
-    type(haloListType),  dimension(:), intent(inout) :: entityHalo
-    type(indexListType), dimension(:), intent(inout) :: entityIndex
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    integer :: count, dest, source, sizeRecv
+    subroutine determineNumberOfHalos(level)
+        !
+        !       determineNumberOfHalos determines the amount of 1st and 2nd
+        !       level cell halo's as well as the number of 1st level node
+        !       halo's stored on this processor.
+        !
+        use blockPointers
+        use haloList
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, nn
+
+        ! Initialize the amount of halo cells and nodes to 0.
+
+        nCellHalo1st = 0
+        nCellHalo2nd = 0
+        nNodeHalo1st = 0
+
+        ! Loop over the number of blocks stored on this processor.
+
+        do i = 1, nDom
+
+            ! Set the pointers for this block. The halo construction is
+            ! the same for all time spectral solutions, so only the 1st
+            ! needs to be considered.
+
+            call setPointers(i, level, 1_intType)
+
+            ! Determine the number of 1st level halo cells for this block
+            ! and add it to nCellHalo1st. Note the ie == nx + 2, etc.
 
-    integer, dimension(mpi_status_size)  :: mpiStatus
+            nn = nx * ny * nz
 
-    integer, allocatable, dimension(:) :: sizeMessage
+            nCellHalo1st = nCellHalo1st - nn + ie * je * ke
 
-    integer(kind=intType) :: i, ii, nn, mm, ms
-    integer(kind=intType) :: l1, l2, l3, proc
-    integer(kind=intType) :: start, eend, nProcsSend, nProcsRecv
-    integer(kind=intType) :: sizeSendBuf, sizeRecvBuf
-    integer(kind=intType) :: nLocalHalos
-    integer(kind=intType) :: nItemReturn, nItemSend, nItemAlloc
+            ! Idem for the second level halo's. However there is no variable
+            ! which stores nx + 4, so it is computed.
 
-    integer(kind=intType), dimension(2) :: tmpBuf
+            nCellHalo2nd = nCellHalo2nd - nn + (nx + 4) * (ny + 4) * (nz + 4)
 
-    integer(kind=intType), allocatable, dimension(:) :: counter
-    integer(kind=intType), allocatable, dimension(:) :: sendBuf
-    integer(kind=intType), allocatable, dimension(:) :: recvBuf
-    integer(kind=intType), allocatable, dimension(:) :: localHalos
-
-    ! Determine the number of items per entity that is returned from
-    ! the requested processor. This is 5 + level, because of the
-    ! storage of possible periodic transformations.
-    ! Also set nItemSend to 7 and determine the number of items that
-    ! should be allocated in the communication buffers.
+            ! Idem for the 1st level node halo's. Use is made of the fact
+            ! that ib == il + 2, etc.
 
-    nItemReturn = 5 + level
-    nItemSend   = 7
-    nItemAlloc  = max(nItemReturn, nItemSend)
+            nNodeHalo1st = nNodeHalo1st + ib * jb * kb - il * jl * kl
+        end do
 
-    ! Abbreviate the start and ending index for this level
-    ! a bit easier.
+    end subroutine determineNumberOfHalos
 
-    start = nHaloPerLev(level-1) +1
-    eend = nHaloPerLev(level)
+    subroutine indirectHalosPerLevel(level, iihalo, entityHalo, &
+                                     transform, entityIndex)
+        !
+        !       indirectHalosPerLevel determines the donor cells for the
+        !       halo's of the given level of indirectness. From the known
+        !       appropriate direct halo and its donor, the corresponding cell
+        !       in the donor block is determined.
+        !
+        use haloList
+        use indirectHalo
+        use communication
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        integer(kind=intType), intent(inout) :: iihalo
 
-    ! Allocate the memory for sizeMessage and counter, which will
-    ! be used in reduceScatter to determine the amount of messages
-    ! and the total size.
+        integer(kind=intType), dimension(:, :), intent(in) :: transform
 
-    allocate(sizeMessage(nProc), counter(2*nProc), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("indirectHalosPerLevel", &
-         "Memory allocation failure for sizeMessage &
-         &and counter")
+        type(haloListType), dimension(:), intent(inout) :: entityHalo
+        type(indexListType), dimension(:), intent(inout) :: entityIndex
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer :: count, dest, source, sizeRecv
 
-    ! Determine the amount of halo's per processors. Here use is made
-    ! of the fact that boundary halo's are flagged with a processor
-    ! ID of -1.
+        integer, dimension(mpi_status_size) :: mpiStatus
 
-    nHaloPerProc = 0
-    do i=start,eend
-       ii               = indHalo(i)%donorProc+1
-       nHaloPerProc(ii) = nHaloPerProc(ii) +1
-    enddo
+        integer, allocatable, dimension(:) :: sizeMessage
 
-    ! Determine the amount of messages as well as the total number of
-    ! halos I have to send and put nHaloPerProc in cumulative
-    ! storage format.
+        integer(kind=intType) :: i, ii, nn, mm, ms
+        integer(kind=intType) :: l1, l2, l3, proc
+        integer(kind=intType) :: start, eend, nProcsSend, nProcsRecv
+        integer(kind=intType) :: sizeSendBuf, sizeRecvBuf
+        integer(kind=intType) :: nLocalHalos
+        integer(kind=intType) :: nItemReturn, nItemSend, nItemAlloc
 
-    nProcsSend  = 0
-    sizeSendBuf = 0
+        integer(kind=intType), dimension(2) :: tmpBuf
 
-    do i=1,nProc
-       if(nHaloPerProc(i) > 0 .and. i /= (myID+1)) then
+        integer(kind=intType), allocatable, dimension(:) :: counter
+        integer(kind=intType), allocatable, dimension(:) :: sendBuf
+        integer(kind=intType), allocatable, dimension(:) :: recvBuf
+        integer(kind=intType), allocatable, dimension(:) :: localHalos
+
+        ! Determine the number of items per entity that is returned from
+        ! the requested processor. This is 5 + level, because of the
+        ! storage of possible periodic transformations.
+        ! Also set nItemSend to 7 and determine the number of items that
+        ! should be allocated in the communication buffers.
 
-          ! Something must be send to this proc. Update nProcsSend
-          ! and sizeSendBuf and set the two elements of counter
-          ! appropriately.
+        nItemReturn = 5 + level
+        nItemSend = 7
+        nItemAlloc = max(nItemReturn, nItemSend)
 
-          nProcsSend     = nProcsSend +1
-          sizeSendBuf    = sizeSendBuf + nHaloPerProc(i)
-          counter(2*i-1) = 1
-          counter(2*i)   = nHaloPerProc(i)
+        ! Abbreviate the start and ending index for this level
+        ! a bit easier.
 
-       else
+        start = nHaloPerLev(level - 1) + 1
+        eend = nHaloPerLev(level)
 
-          ! Nothing needs to be sent. Set the two elements of
-          ! counter to 0.
+        ! Allocate the memory for sizeMessage and counter, which will
+        ! be used in reduceScatter to determine the amount of messages
+        ! and the total size.
 
-          counter(2*i-1) = 0
-          counter(2*i)   = 0
+        allocate (sizeMessage(nProc), counter(2 * nProc), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("indirectHalosPerLevel", &
+             "Memory allocation failure for sizeMessage &
+             &and counter")
 
-       endif
+        ! Determine the amount of halo's per processors. Here use is made
+        ! of the fact that boundary halo's are flagged with a processor
+        ! ID of -1.
 
-       ! Set the size of the message to 2 and put nHaloPerProc in
-       ! cumulative storage format.
+        nHaloPerProc = 0
+        do i = start, eend
+            ii = indHalo(i)%donorProc + 1
+            nHaloPerProc(ii) = nHaloPerProc(ii) + 1
+        end do
 
-       sizeMessage(i)  = 2
-       nHaloPerProc(i) = nHaloPerProc(i) + nHaloPerProc(i-1)
+        ! Determine the amount of messages as well as the total number of
+        ! halos I have to send and put nHaloPerProc in cumulative
+        ! storage format.
 
-    enddo
+        nProcsSend = 0
+        sizeSendBuf = 0
 
-    ! Call reduceScatter to determine the number of processors
-    ! from which I receive data and the total amount of data.
-    ! tmpBuf is used as a temporary buffer to receive the data.
+        do i = 1, nProc
+            if (nHaloPerProc(i) > 0 .and. i /= (myID + 1)) then
 
-    call mpi_reduce_scatter(counter, tmpBuf, sizeMessage,           &
-         adflow_integer, mpi_sum, ADflow_comm_world, &
-         ierr)
+                ! Something must be send to this proc. Update nProcsSend
+                ! and sizeSendBuf and set the two elements of counter
+                ! appropriately.
 
-    nProcsRecv  = tmpBuf(1)
-    sizeRecvBuf = tmpBuf(2)
+                nProcsSend = nProcsSend + 1
+                sizeSendBuf = sizeSendBuf + nHaloPerProc(i)
+                counter(2 * i - 1) = 1
+                counter(2 * i) = nHaloPerProc(i)
 
-    ! Allocate the memory for the send buffer.
+            else
 
-    allocate(sendBuf(nItemAlloc*sizeSendBuf), stat=ierr)
-    if(ierr /= 0)                                           &
-         call terminate("indirectHalosPerLevel",               &
-         "Memory allocation error for sendBuf.")
+                ! Nothing needs to be sent. Set the two elements of
+                ! counter to 0.
 
-    ! Send the data I must send. Use nonblocking sends to
-    ! avoid deadlock. nn is used as a counter for the current
-    ! message to be sent and ms as starting index for the
-    ! active message in sendBuf.
+                counter(2 * i - 1) = 0
+                counter(2 * i) = 0
 
-    nn = 0
-    ms = 1
-    sendproc: do i=1,nProc
-       if(counter(2*i) > 0) then
+            end if
 
-          ! Something must be send to this processor. Update nn.
+            ! Set the size of the message to 2 and put nHaloPerProc in
+            ! cumulative storage format.
 
-          nn = nn +1
+            sizeMessage(i) = 2
+            nHaloPerProc(i) = nHaloPerProc(i) + nHaloPerProc(i - 1)
 
-          ! Fill this part of the send buffer
+        end do
 
-          proc = i-1
-          call fillSendBuf(sendBuf(ms:), proc, entityHalo, &
-               transform, level, mm)
+        ! Call reduceScatter to determine the number of processors
+        ! from which I receive data and the total amount of data.
+        ! tmpBuf is used as a temporary buffer to receive the data.
 
-          ! Send this buffer. Make sure that integers are used for
-          ! count, destination and tag.
+        call mpi_reduce_scatter(counter, tmpBuf, sizeMessage, &
+                                adflow_integer, mpi_sum, ADflow_comm_world, &
+                                ierr)
 
-          count = nItemSend*mm
-          dest  = i-1
-          call mpi_isend(sendBuf(ms), count, adflow_integer, dest, dest, &
-               ADflow_comm_world, sendRequests(nn), ierr)
+        nProcsRecv = tmpBuf(1)
+        sizeRecvBuf = tmpBuf(2)
 
-          ! Update ms to the starting index in buffer for the next
-          ! message to be sent.
+        ! Allocate the memory for the send buffer.
 
-          ms = ms + nItemAlloc*mm
+        allocate (sendBuf(nItemAlloc * sizeSendBuf), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("indirectHalosPerLevel", &
+                           "Memory allocation error for sendBuf.")
 
-       endif
-    enddo sendproc
+        ! Send the data I must send. Use nonblocking sends to
+        ! avoid deadlock. nn is used as a counter for the current
+        ! message to be sent and ms as starting index for the
+        ! active message in sendBuf.
 
-    ! Loop over the boundary halos for this level. The value of start
-    ! defined earlier is still okay; only end needs to be redefined.
+        nn = 0
+        ms = 1
+        sendproc: do i = 1, nProc
+            if (counter(2 * i) > 0) then
 
-    eend = nHaloPerLev(level-1) + nHaloPerProc(0)
+                ! Something must be send to this processor. Update nn.
 
-    bocos: do i=start,eend
+                nn = nn + 1
 
-       ! Determine the corresponding direct boundary halo.
+                ! Fill this part of the send buffer
 
-       ii = indHalo(i)%myDirectHalo
+                proc = i - 1
+                call fillSendBuf(sendBuf(ms:), proc, entityHalo, &
+                                 transform, level, mm)
 
-       ! Determine the vector from the direct halo to its donor.
-       ! Store this in l1, l2 and l3.
+                ! Send this buffer. Make sure that integers are used for
+                ! count, destination and tag.
 
-       l1 = entityHalo(ii)%dI - entityHalo(ii)%myI
-       l2 = entityHalo(ii)%dJ - entityHalo(ii)%myJ
-       l3 = entityHalo(ii)%dK - entityHalo(ii)%myK
+                count = nItemSend * mm
+                dest = i - 1
+                call mpi_isend(sendBuf(ms), count, adflow_integer, dest, dest, &
+                               ADflow_comm_world, sendRequests(nn), ierr)
 
-       ! Store the info for this boundary halo. Set the boundary
-       ! condition, i.e. donorBlock, to the boundary condition of the
-       ! closest direct halo. This has been chosen to be the most
-       ! important boundary condition.
-       ! DonorProc is set to -1 to indicate a boundary halo.
+                ! Update ms to the starting index in buffer for the next
+                ! message to be sent.
 
-       iihalo = iihalo +1
-       entityHalo(iihalo)%myBlock = indHalo(i)%myBlock
-       entityHalo(iihalo)%myI     = indHalo(i)%myI
-       entityHalo(iihalo)%myJ     = indHalo(i)%myJ
-       entityHalo(iihalo)%myK     = indHalo(i)%myK
+                ms = ms + nItemAlloc * mm
 
-       entityHalo(iihalo)%donorProc  = -1
-       entityHalo(iihalo)%donorBlock = entityHalo(ii)%donorBlock
+            end if
+        end do sendproc
 
-       ! Determine the vector from my indices to the indices of my
-       ! donor. This is the vector from me to the donor indices of
-       ! the closest halo, projected on the vector (l1,l2,l3).
-       ! Although in general this will not give to indices of my true
-       ! donor, this info is stored, because the halo list could be
-       ! both a node or a cell halo list. The true donor will be
-       ! extracted later.
-       ! This implies that my donor could also be a halo, but its
-       ! level of indirectness is guaranteed one less than mine.
+        ! Loop over the boundary halos for this level. The value of start
+        ! defined earlier is still okay; only end needs to be redefined.
 
-       ii = l1*(entityHalo(ii)%dI - entityHalo(iihalo)%myI) &
-            + l2*(entityHalo(ii)%dJ - entityHalo(iihalo)%myJ) &
-            + l3*(entityHalo(ii)%dK - entityHalo(iihalo)%myK)
+        eend = nHaloPerLev(level - 1) + nHaloPerProc(0)
 
-       entityHalo(iihalo)%dI = ii*l1 + indHalo(i)%myI
-       entityHalo(iihalo)%dJ = ii*l2 + indHalo(i)%myJ
-       entityHalo(iihalo)%dK = ii*l3 + indHalo(i)%myK
+        bocos: do i = start, eend
 
-       ! Copy the level of indirectness.
+            ! Determine the corresponding direct boundary halo.
 
-       entityHalo(iihalo)%levOfInd = indHalo(i)%levOfInd
+            ii = indHalo(i)%myDirectHalo
 
-       ! Store the entry of entityHalo in the i,j,k indices
-       ! of in entityIndex.
+            ! Determine the vector from the direct halo to its donor.
+            ! Store this in l1, l2 and l3.
 
-       ii = indHalo(i)%myBlock
-       l1 = indHalo(i)%myI
-       l2 = indHalo(i)%myJ
-       l3 = indHalo(i)%myK
+            l1 = entityHalo(ii)%dI - entityHalo(ii)%myI
+            l2 = entityHalo(ii)%dJ - entityHalo(ii)%myJ
+            l3 = entityHalo(ii)%dK - entityHalo(ii)%myK
 
-       entityIndex(ii)%entryList(l1,l2,l3) = iihalo
+            ! Store the info for this boundary halo. Set the boundary
+            ! condition, i.e. donorBlock, to the boundary condition of the
+            ! closest direct halo. This has been chosen to be the most
+            ! important boundary condition.
+            ! DonorProc is set to -1 to indicate a boundary halo.
 
-    enddo bocos
+            iihalo = iihalo + 1
+            entityHalo(iihalo)%myBlock = indHalo(i)%myBlock
+            entityHalo(iihalo)%myI = indHalo(i)%myI
+            entityHalo(iihalo)%myJ = indHalo(i)%myJ
+            entityHalo(iihalo)%myK = indHalo(i)%myK
 
-    ! Treat the internal block boundary halo's, whose donor is stored
-    ! on the same processor. Store this data in localHalos, for
-    ! which the memory must be allocated. First determine the number
-    ! of local halo's.
+            entityHalo(iihalo)%donorProc = -1
+            entityHalo(iihalo)%donorBlock = entityHalo(ii)%donorBlock
 
-    nLocalHalos = nHaloPerProc(myID+1) - nHaloPerProc(myID)
-    allocate(localHalos(nItemAlloc*nLocalHalos), stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("indirectHalosPerLevel", &
-         "Memory allocation error for localHalos")
+            ! Determine the vector from my indices to the indices of my
+            ! donor. This is the vector from me to the donor indices of
+            ! the closest halo, projected on the vector (l1,l2,l3).
+            ! Although in general this will not give to indices of my true
+            ! donor, this info is stored, because the halo list could be
+            ! both a node or a cell halo list. The true donor will be
+            ! extracted later.
+            ! This implies that my donor could also be a halo, but its
+            ! level of indirectness is guaranteed one less than mine.
 
-    proc = myID
-    call fillSendBuf(localHalos, proc, entityHalo, transform, &
-         level, mm)
+            ii = l1 * (entityHalo(ii)%dI - entityHalo(iihalo)%myI) &
+                 + l2 * (entityHalo(ii)%dJ - entityHalo(iihalo)%myJ) &
+                 + l3 * (entityHalo(ii)%dK - entityHalo(iihalo)%myK)
 
-    ! Receive the messages in arbitrary sequence, determine the
-    ! corresponding halo info and send the message back.
+            entityHalo(iihalo)%dI = ii * l1 + indHalo(i)%myI
+            entityHalo(iihalo)%dJ = ii * l2 + indHalo(i)%myJ
+            entityHalo(iihalo)%dK = ii * l3 + indHalo(i)%myK
 
-    allocate(recvBuf(nItemAlloc*sizeRecvBuf),  stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("indirectHalosPerLevel", &
-         "Memory allocation error for recvBuf.")
+            ! Copy the level of indirectness.
 
-    ! Initialize ms to 1 and start the loop over the number of
-    ! messages to be received.
+            entityHalo(iihalo)%levOfInd = indHalo(i)%levOfInd
 
-    ms = 1
-    recvproc: do i=1,nProcsRecv
+            ! Store the entry of entityHalo in the i,j,k indices
+            ! of in entityIndex.
 
-       ! Block until a message arrives.
+            ii = indHalo(i)%myBlock
+            l1 = indHalo(i)%myI
+            l2 = indHalo(i)%myJ
+            l3 = indHalo(i)%myK
 
-       call mpi_probe(mpi_any_source, myID, ADflow_comm_world, &
-            mpiStatus, ierr)
+            entityIndex(ii)%entryList(l1, l2, l3) = iihalo
 
-       ! Find the source and size of the message.
+        end do bocos
 
-       source = mpiStatus(mpi_source)
-       call mpi_get_count(mpiStatus, adflow_integer, sizeRecv, ierr)
+        ! Treat the internal block boundary halo's, whose donor is stored
+        ! on the same processor. Store this data in localHalos, for
+        ! which the memory must be allocated. First determine the number
+        ! of local halo's.
 
-       ! Check in debug mode that the incoming message is of
-       ! correct size.
+        nLocalHalos = nHaloPerProc(myID + 1) - nHaloPerProc(myID)
+        allocate (localHalos(nItemAlloc * nLocalHalos), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("indirectHalosPerLevel", &
+                           "Memory allocation error for localHalos")
 
-       if( debug ) then
-          if(sizeRecv == mpi_undefined .or. &
-               mod(sizeRecv,nItemSend) /= 0)  &
-               call terminate("indirectHalosPerLevel", &
-               "Unexpected size of message")
-       endif
+        proc = myID
+        call fillSendBuf(localHalos, proc, entityHalo, transform, &
+                         level, mm)
 
-       ! Receive the message. As it has already arrived a blocking
-       ! receive can be used.
+        ! Receive the messages in arbitrary sequence, determine the
+        ! corresponding halo info and send the message back.
 
-       call mpi_recv(recvBuf(ms), sizeRecv, adflow_integer, &
-            source, myID, ADflow_comm_world, mpiStatus, ierr)
+        allocate (recvBuf(nItemAlloc * sizeRecvBuf), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("indirectHalosPerLevel", &
+                           "Memory allocation error for recvBuf.")
 
-       ! Determine the number of halo's in the receive buffer.
+        ! Initialize ms to 1 and start the loop over the number of
+        ! messages to be received.
 
-       mm = sizeRecv/nItemSend
+        ms = 1
+        recvproc: do i = 1, nProcsRecv
 
-       ! Find the donors for the halo's in the receive buffer as
-       ! well as the periodic info.
+            ! Block until a message arrives.
 
-       call findDonorsRecvBuffer(recvBuf(ms:), mm, entityHalo, &
-            entityIndex, level, nItemReturn)
+            call mpi_probe(mpi_any_source, myID, ADflow_comm_world, &
+                           mpiStatus, ierr)
 
-       ! Send the modified receive buffer back to the source processor.
+            ! Find the source and size of the message.
 
-       count = nItemReturn*mm
-       call mpi_isend(recvBuf(ms), count, adflow_integer, source, &
-            source+1, ADflow_comm_world, recvRequests(i), ierr)
+            source = mpiStatus(mpi_source)
+            call mpi_get_count(mpiStatus, adflow_integer, sizeRecv, ierr)
 
-       ! Update the starting index ms for the next message.
+            ! Check in debug mode that the incoming message is of
+            ! correct size.
 
-       ms = ms + nItemAlloc*mm
+            if (debug) then
+                if (sizeRecv == mpi_undefined .or. &
+                    mod(sizeRecv, nItemSend) /= 0) &
+                    call terminate("indirectHalosPerLevel", &
+                                   "Unexpected size of message")
+            end if
 
-    enddo recvproc
+            ! Receive the message. As it has already arrived a blocking
+            ! receive can be used.
 
-    ! Find the donors for the locally stored halo's and store them
-    ! in the list.
+            call mpi_recv(recvBuf(ms), sizeRecv, adflow_integer, &
+                          source, myID, ADflow_comm_world, mpiStatus, ierr)
 
-    call findDonorsRecvBuffer(localHalos, nLocalHalos, entityHalo, &
-         entityIndex, level, nItemReturn)
+            ! Determine the number of halo's in the receive buffer.
 
-    proc = myID
-    call storeHalosInList(localHalos, nLocalHalos, proc, level, &
-         nItemReturn, entityHalo, entityIndex, &
-         iihalo)
+            mm = sizeRecv / nItemSend
 
-    ! Complete the 1st series of nonblocking sends.
+            ! Find the donors for the halo's in the receive buffer as
+            ! well as the periodic info.
 
-    do i=1,nProcsSend
-       call mpi_waitany(nProcsSend, sendRequests, count, mpiStatus, ierr)
-    enddo
+            call findDonorsRecvBuffer(recvBuf(ms:), mm, entityHalo, &
+                                      entityIndex, level, nItemReturn)
 
-    ! Loop over the processors to which I sent data to find out the
-    ! halo information. Now these messages must be received.
+            ! Send the modified receive buffer back to the source processor.
 
-    secondRecv: do i=1,nProcsSend
+            count = nItemReturn * mm
+            call mpi_isend(recvBuf(ms), count, adflow_integer, source, &
+                           source + 1, ADflow_comm_world, recvRequests(i), ierr)
 
-       ! Block until a message arrives.
+            ! Update the starting index ms for the next message.
 
-       call mpi_probe(mpi_any_source, myID+1, ADflow_comm_world, &
-            mpiStatus, ierr)
+            ms = ms + nItemAlloc * mm
 
-       ! Find the source and size of the message.
+        end do recvproc
 
-       source = mpiStatus(mpi_source)
-       call mpi_get_count(mpiStatus, adflow_integer, sizeRecv, ierr)
+        ! Find the donors for the locally stored halo's and store them
+        ! in the list.
 
-       ! Check in debug mode that the incoming message is of
-       ! correct size.
+        call findDonorsRecvBuffer(localHalos, nLocalHalos, entityHalo, &
+                                  entityIndex, level, nItemReturn)
 
-       if( debug ) then
-          if(sizeRecv == mpi_undefined .or.         &
-               mod(sizeRecv,nItemReturn) /= 0)        &
-               call terminate("indirectHalosPerLevel", &
-               "Unexpected size of message")
-       endif
+        proc = myID
+        call storeHalosInList(localHalos, nLocalHalos, proc, level, &
+                              nItemReturn, entityHalo, entityIndex, &
+                              iihalo)
 
-       ! Store the number of halo's in mm.
+        ! Complete the 1st series of nonblocking sends.
 
-       mm = sizeRecv/nItemReturn
+        do i = 1, nProcsSend
+            call mpi_waitany(nProcsSend, sendRequests, count, mpiStatus, ierr)
+        end do
 
-       ! Receive the message. Use a blocking receive, as the message
-       ! has already arrived.
+        ! Loop over the processors to which I sent data to find out the
+        ! halo information. Now these messages must be received.
 
-       call mpi_recv(sendBuf, sizeRecv, adflow_integer, source, &
-            myID+1, ADflow_comm_world, mpiStatus, ierr)
+        secondRecv: do i = 1, nProcsSend
 
-       ! Store the donors in the list.
+            ! Block until a message arrives.
 
-       proc = source
-       call storeHalosInList(sendBuf, mm, proc, level, nItemReturn, &
-            entityHalo, entityIndex, iihalo)
-    enddo secondRecv
+            call mpi_probe(mpi_any_source, myID + 1, ADflow_comm_world, &
+                           mpiStatus, ierr)
 
-    ! Complete the second series of nonblocking sends.
+            ! Find the source and size of the message.
 
-    do i=1,nProcsRecv
-       call mpi_waitany(nProcsRecv, recvRequests, count, mpiStatus, ierr)
-    enddo
+            source = mpiStatus(mpi_source)
+            call mpi_get_count(mpiStatus, adflow_integer, sizeRecv, ierr)
 
-    ! Release the memory allocated in this subroutine.
+            ! Check in debug mode that the incoming message is of
+            ! correct size.
 
-    deallocate(sizeMessage, counter, sendBuf, localHalos, &
-         recvBuf, stat=ierr)
-    if(ierr /= 0)                             &
-         call terminate("indirectHalosPerLevel", &
-         "Deallocation error for sizeMessage, etc.")
+            if (debug) then
+                if (sizeRecv == mpi_undefined .or. &
+                    mod(sizeRecv, nItemReturn) /= 0) &
+                    call terminate("indirectHalosPerLevel", &
+                                   "Unexpected size of message")
+            end if
 
+            ! Store the number of halo's in mm.
 
-  end subroutine indirectHalosPerLevel
+            mm = sizeRecv / nItemReturn
 
-  !=================================================================
+            ! Receive the message. Use a blocking receive, as the message
+            ! has already arrived.
 
-  subroutine fillSendBuf(sendBuf, proc, entityHalo, transform, &
-       level, mm)
-    !
-    !       fillSendBuf fills the buffer, which must be sent to the
-    !       given processor.
-    !
-    use haloList
-    use indirectHalo
-    use utils, only : delta
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType), intent(in)  :: proc, level
-    integer(kind=intType), intent(out) :: mm
+            call mpi_recv(sendBuf, sizeRecv, adflow_integer, source, &
+                          myID + 1, ADflow_comm_world, mpiStatus, ierr)
 
-    integer(kind=intType), dimension(:), intent(out) :: sendBuf
-    integer(kind=intType), dimension(:,:), intent(in) :: transform
+            ! Store the donors in the list.
 
-    type(haloListType), dimension(:), intent(in) :: entityHalo
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, ii, jj, m
-    integer(kind=intType) :: l1, l2, l3
+            proc = source
+            call storeHalosInList(sendBuf, mm, proc, level, nItemReturn, &
+                                  entityHalo, entityIndex, iihalo)
+        end do secondRecv
 
-    integer(kind=intType), dimension(3,3) :: trMat
+        ! Complete the second series of nonblocking sends.
 
+        do i = 1, nProcsRecv
+            call mpi_waitany(nProcsRecv, recvRequests, count, mpiStatus, ierr)
+        end do
 
-    ! Initialize m to 0.
+        ! Release the memory allocated in this subroutine.
 
-    m = 0
+        deallocate (sizeMessage, counter, sendBuf, localHalos, &
+                    recvBuf, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("indirectHalosPerLevel", &
+                           "Deallocation error for sizeMessage, etc.")
+
+    end subroutine indirectHalosPerLevel
+
+    !=================================================================
 
-    ! Loop over the number of halo's for this processor.
+    subroutine fillSendBuf(sendBuf, proc, entityHalo, transform, &
+                           level, mm)
+        !
+        !       fillSendBuf fills the buffer, which must be sent to the
+        !       given processor.
+        !
+        use haloList
+        use indirectHalo
+        use utils, only: delta
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType), intent(in) :: proc, level
+        integer(kind=intType), intent(out) :: mm
 
-    do i=nHaloPerProc(proc)+1,nHaloPerProc(proc+1)
+        integer(kind=intType), dimension(:), intent(out) :: sendBuf
+        integer(kind=intType), dimension(:, :), intent(in) :: transform
 
-       ! Abbreviate the current entry in indHalo in ii and the
-       ! entry of its corresponding direct halo in jj.
+        type(haloListType), dimension(:), intent(in) :: entityHalo
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, ii, jj, m
+        integer(kind=intType) :: l1, l2, l3
 
-       ii = i + nHaloPerLev(level-1)
-       jj = indHalo(ii)%myDirectHalo
+        integer(kind=intType), dimension(3, 3) :: trMat
 
-       ! Determine the transformation matrix between the block of
-       ! the direct halo and its donor block.
+        ! Initialize m to 0.
 
-       l1 = transform(jj,1)
-       l2 = transform(jj,2)
-       l3 = transform(jj,3)
+        m = 0
 
-       trMat(1,1) = sign(1_intType,l1) * delta(l1,1_intType)
-       trMat(2,1) = sign(1_intType,l1) * delta(l1,2_intType)
-       trMat(3,1) = sign(1_intType,l1) * delta(l1,3_intType)
+        ! Loop over the number of halo's for this processor.
 
-       trMat(1,2) = sign(1_intType,l2) * delta(l2,1_intType)
-       trMat(2,2) = sign(1_intType,l2) * delta(l2,2_intType)
-       trMat(3,2) = sign(1_intType,l2) * delta(l2,3_intType)
+        do i = nHaloPerProc(proc) + 1, nHaloPerProc(proc + 1)
 
-       trMat(1,3) = sign(1_intType,l3) * delta(l3,1_intType)
-       trMat(2,3) = sign(1_intType,l3) * delta(l3,2_intType)
-       trMat(3,3) = sign(1_intType,l3) * delta(l3,3_intType)
+            ! Abbreviate the current entry in indHalo in ii and the
+            ! entry of its corresponding direct halo in jj.
 
-       ! Store the direction from the direct to the indirect
-       ! halo in l1, l2 and l3
+            ii = i + nHaloPerLev(level - 1)
+            jj = indHalo(ii)%myDirectHalo
 
-       l1 = indHalo(ii)%myI - entityHalo(jj)%myI
-       l2 = indHalo(ii)%myJ - entityHalo(jj)%myJ
-       l3 = indHalo(ii)%myK - entityHalo(jj)%myK
+            ! Determine the transformation matrix between the block of
+            ! the direct halo and its donor block.
 
-       ! Fill the send buffer with block ID and i,j and k
-       ! indices of the donor of the direct halo and the
-       ! transformed direction to reach the donor of the
-       ! indirect halo. For these last three values the
-       ! transformation matrix must be applied to l1, l2, l3.
+            l1 = transform(jj, 1)
+            l2 = transform(jj, 2)
+            l3 = transform(jj, 3)
 
-       m = m+1; sendBuf(m) = entityHalo(jj)%donorBlock
-       m = m+1; sendBuf(m) = entityHalo(jj)%dI
-       m = m+1; sendBuf(m) = entityHalo(jj)%dJ
-       m = m+1; sendBuf(m) = entityHalo(jj)%dK
+            trMat(1, 1) = sign(1_intType, l1) * delta(l1, 1_intType)
+            trMat(2, 1) = sign(1_intType, l1) * delta(l1, 2_intType)
+            trMat(3, 1) = sign(1_intType, l1) * delta(l1, 3_intType)
 
-       m = m+1; sendBuf(m) = trMat(1,1)*l1 + trMat(1,2)*l2 + trMat(1,3)*l3
-       m = m+1; sendBuf(m) = trMat(2,1)*l1 + trMat(2,2)*l2 + trMat(2,3)*l3
-       m = m+1; sendBuf(m) = trMat(3,1)*l1 + trMat(3,2)*l2 + trMat(3,3)*l3
+            trMat(1, 2) = sign(1_intType, l2) * delta(l2, 1_intType)
+            trMat(2, 2) = sign(1_intType, l2) * delta(l2, 2_intType)
+            trMat(3, 2) = sign(1_intType, l2) * delta(l2, 3_intType)
 
-    enddo
+            trMat(1, 3) = sign(1_intType, l3) * delta(l3, 1_intType)
+            trMat(2, 3) = sign(1_intType, l3) * delta(l3, 2_intType)
+            trMat(3, 3) = sign(1_intType, l3) * delta(l3, 3_intType)
 
-    ! Set the return variable mm to the number of halos stored in
-    ! the send buffer.
+            ! Store the direction from the direct to the indirect
+            ! halo in l1, l2 and l3
 
-    mm = m/7
+            l1 = indHalo(ii)%myI - entityHalo(jj)%myI
+            l2 = indHalo(ii)%myJ - entityHalo(jj)%myJ
+            l3 = indHalo(ii)%myK - entityHalo(jj)%myK
 
-  end subroutine fillSendBuf
+            ! Fill the send buffer with block ID and i,j and k
+            ! indices of the donor of the direct halo and the
+            ! transformed direction to reach the donor of the
+            ! indirect halo. For these last three values the
+            ! transformation matrix must be applied to l1, l2, l3.
 
-  !=================================================================
+            m = m + 1; sendBuf(m) = entityHalo(jj)%donorBlock
+            m = m + 1; sendBuf(m) = entityHalo(jj)%dI
+            m = m + 1; sendBuf(m) = entityHalo(jj)%dJ
+            m = m + 1; sendBuf(m) = entityHalo(jj)%dK
 
-  subroutine findDonorsRecvBuffer(recvBuf, nHalos, entityHalo, &
-       entityIndex, level, nItemReturn)
-    !
-    !       findDonorsRecvBuffer finds the donor cells for the halo
-    !       information stored in recvBuf. On return recvBuf contains
-    !       for every halo the following information: processor ID,
-    !       block ID, the i,j,k indices of the donor cell and periodic
-    !       information. The number of periodic subfaces stored is level,
-    !       where a 0 indicates that the subface is not periodic.
-    !
-    use haloList
-    use communication
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nHalos, level, nItemReturn
+            m = m + 1; sendBuf(m) = trMat(1, 1) * l1 + trMat(1, 2) * l2 + trMat(1, 3) * l3
+            m = m + 1; sendBuf(m) = trMat(2, 1) * l1 + trMat(2, 2) * l2 + trMat(2, 3) * l3
+            m = m + 1; sendBuf(m) = trMat(3, 1) * l1 + trMat(3, 2) * l2 + trMat(3, 3) * l3
 
-    integer(kind=intType), dimension(:), intent(inout) :: recvBuf
+        end do
 
-    type(haloListType),  dimension(:), intent(in) :: entityHalo
-    type(indexListType), dimension(:), intent(in) :: entityIndex
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+        ! Set the return variable mm to the number of halos stored in
+        ! the send buffer.
 
-    integer(kind=intType) :: i, j, k, ii, jj, kk, mm, nn
-    integer(kind=intType) :: db, l1, L2, l3
-    integer(kind=intType) :: nPeriodic
+        mm = m / 7
 
-    integer(kind=intType), dimension(:), allocatable :: tmpBuf
+    end subroutine fillSendBuf
 
-    ! Allocate the memory for tmpBuf to the size needed to store the
-    ! return information.
+    !=================================================================
 
-    allocate(tmpBuf(nItemReturn*nHalos), stat=ierr)
-    if(ierr /= 0)                            &
-         call terminate("findDonorsRecvBuffer", &
-         "Memory allocation failure for tmpBuf");
+    subroutine findDonorsRecvBuffer(recvBuf, nHalos, entityHalo, &
+                                    entityIndex, level, nItemReturn)
+        !
+        !       findDonorsRecvBuffer finds the donor cells for the halo
+        !       information stored in recvBuf. On return recvBuf contains
+        !       for every halo the following information: processor ID,
+        !       block ID, the i,j,k indices of the donor cell and periodic
+        !       information. The number of periodic subfaces stored is level,
+        !       where a 0 indicates that the subface is not periodic.
+        !
+        use haloList
+        use communication
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nHalos, level, nItemReturn
+
+        integer(kind=intType), dimension(:), intent(inout) :: recvBuf
+
+        type(haloListType), dimension(:), intent(in) :: entityHalo
+        type(indexListType), dimension(:), intent(in) :: entityIndex
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        integer(kind=intType) :: i, j, k, ii, jj, kk, mm, nn
+        integer(kind=intType) :: db, l1, L2, l3
+        integer(kind=intType) :: nPeriodic
+
+        integer(kind=intType), dimension(:), allocatable :: tmpBuf
+
+        ! Allocate the memory for tmpBuf to the size needed to store the
+        ! return information.
+
+        allocate (tmpBuf(nItemReturn * nHalos), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("findDonorsRecvBuffer", &
+                           "Memory allocation failure for tmpBuf"); 
+        ! Initialize nn and mm to 0. nn is the counter for the incoming
+        ! halo information (7 per halo) and mm for the outgoing info
+        ! (nItemReturn per halo).
+
+        nn = 0
+        mm = 0
+
+        ! Loop over the number of halos in the receive buffer.
+
+        do ii = 1, nHalos
+
+            ! Store the incoming information a bit easier.
+
+            db = recvBuf(nn + 1)
+            i = recvBuf(nn + 2)
+            j = recvBuf(nn + 3)
+            k = recvBuf(nn + 4)
+            l1 = recvBuf(nn + 5)
+            l2 = recvBuf(nn + 6)
+            l3 = recvBuf(nn + 7)
+
+            ! At the moment i,j,k are the indices of the donor of the direct
+            ! halo and l1,l2,l3 the path from the direct halo to the
+            ! indirect halo. Add l1, L2, l3 to i,j,k such that they store
+            ! the indices of the donor of the indirect halo.
+
+            i = i + l1
+            j = j + l2
+            k = k + l3
+
+            ! Store the entry (if there is one) in entityHalo in jj.
+
+            jj = entityIndex(db)%entryList(i, j, k)
+
+            ! Now determine the situation we are dealing with.
+
+            if (jj == 0) then
+
+                ! Donor is an owned entity of the block. Store the
+                ! appropriate info in tmpBuf. There are no periodic
+                ! subfaces for this donor.
+
+                tmpBuf(mm + 1) = myID
+                tmpBuf(mm + 2) = db
+                tmpBuf(mm + 3) = i
+                tmpBuf(mm + 4) = j
+                tmpBuf(mm + 5) = k
+                nPeriodic = 0
+
+            else
+
+                ! Donor is also a halo, but its level of indirectness is at
+                ! least one less than the one for which information is to
+                ! be found. Still there are two possibilities. Either this
+                ! is an internal block boundary halo or a physical boundary
+                ! halo. In the former case the corresponding halo is stored,
+                ! in the latter case the indices of the boundary halo are
+                ! returned.
+
+                if (entityHalo(jj)%donorProc == -1) then
+
+                    ! Physical boundary halo. Store the appropriate
+                    ! info in tmpBuf. No periodic subfaces for this donor.
+
+                    tmpBuf(mm + 1) = myID
+                    tmpBuf(mm + 2) = db
+                    tmpBuf(mm + 3) = i
+                    tmpBuf(mm + 4) = j
+                    tmpBuf(mm + 5) = k
+                    nPeriodic = 0
 
-    ! Initialize nn and mm to 0. nn is the counter for the incoming
-    ! halo information (7 per halo) and mm for the outgoing info
-    ! (nItemReturn per halo).
+                else
 
-    nn = 0
-    mm = 0
+                    ! Internal block boundary halo. Store the appropriate
+                    ! info in tmpBuf, including the possible periodic
+                    ! subfaces.
 
-    ! Loop over the number of halos in the receive buffer.
+                    tmpBuf(mm + 1) = entityHalo(jj)%donorProc
+                    tmpBuf(mm + 2) = entityHalo(jj)%donorBlock
+                    tmpBuf(mm + 3) = entityHalo(jj)%dI
+                    tmpBuf(mm + 4) = entityHalo(jj)%dJ
+                    tmpBuf(mm + 5) = entityHalo(jj)%dK
 
-    do ii=1,nHalos
+                    nPeriodic = entityHalo(jj)%nPeriodicSubfaces
+                    do kk = 1, nPeriodic
+                        tmpBuf(mm + 5 + kk) = entityHalo(jj)%periodicSubfaces(kk)
+                    end do
 
-       ! Store the incoming information a bit easier.
+                end if
 
-       db = recvBuf(nn+1)
-       i  = recvBuf(nn+2)
-       j  = recvBuf(nn+3)
-       k  = recvBuf(nn+4)
-       l1 = recvBuf(nn+5)
-       l2 = recvBuf(nn+6)
-       l3 = recvBuf(nn+7)
+            end if
 
-       ! At the moment i,j,k are the indices of the donor of the direct
-       ! halo and l1,l2,l3 the path from the direct halo to the
-       ! indirect halo. Add l1, L2, l3 to i,j,k such that they store
-       ! the indices of the donor of the indirect halo.
+            ! Fill the remaining part reserved for the periodic subfaces
+            ! with 0's. A 0 indicates that no periodic subface is crossed.
 
-       i = i + l1
-       j = j + l2
-       k = k + l3
+            do kk = (nPeriodic + 1), level
+                tmpBuf(mm + 5 + kk) = 0
+            end do
 
-       ! Store the entry (if there is one) in entityHalo in jj.
+            ! Update nn and mm for the next halo.
 
-       jj = entityIndex(db)%entryList(i,j,k)
+            nn = nn + 7
+            mm = mm + nItemReturn
 
-       ! Now determine the situation we are dealing with.
+        end do
 
-       if(jj == 0) then
+        ! Copy the data from tmpBuf into recvBuf and delete tmpBuf
+        ! afterwards.
 
-          ! Donor is an owned entity of the block. Store the
-          ! appropriate info in tmpBuf. There are no periodic
-          ! subfaces for this donor.
+        nn = nItemReturn * nHalos
+        do i = 1, nn
+            recvBuf(i) = tmpBuf(i)
+        end do
 
-          tmpBuf(mm+1) = myID
-          tmpBuf(mm+2) = db
-          tmpBuf(mm+3) = i
-          tmpBuf(mm+4) = j
-          tmpBuf(mm+5) = k
-          nPeriodic    = 0
-
-       else
-
-          ! Donor is also a halo, but its level of indirectness is at
-          ! least one less than the one for which information is to
-          ! be found. Still there are two possibilities. Either this
-          ! is an internal block boundary halo or a physical boundary
-          ! halo. In the former case the corresponding halo is stored,
-          ! in the latter case the indices of the boundary halo are
-          ! returned.
-
-          if(entityHalo(jj)%donorProc == -1) then
-
-             ! Physical boundary halo. Store the appropriate
-             ! info in tmpBuf. No periodic subfaces for this donor.
+        deallocate (tmpBuf, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("findDonorsRecvBuffer", &
+                           "Deallocation failure for tmpBuf")
 
-             tmpBuf(mm+1) = myID
-             tmpBuf(mm+2) = db
-             tmpBuf(mm+3) = i
-             tmpBuf(mm+4) = j
-             tmpBuf(mm+5) = k
-             nPeriodic    = 0
+    end subroutine findDonorsRecvBuffer
 
-          else
+    !=================================================================
 
-             ! Internal block boundary halo. Store the appropriate
-             ! info in tmpBuf, including the possible periodic
-             ! subfaces.
+    subroutine storeHalosInList(buffer, bufSize, proc, level, &
+                                nItemReturn, entityHalo, &
+                                entityIndex, iihalo)
+        !
+        !       storeHalosInList stores the halo info present in buf, which
+        !       has been retreived from the given processor, in the correct
+        !       place in entityHalo and entityIndex.
+        !
+        use haloList
+        use indirectHalo
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: bufSize, proc
+        integer(kind=intType), intent(in) :: level, nItemReturn
+        integer(kind=intType), intent(inout) :: iihalo
 
-             tmpBuf(mm+1) = entityHalo(jj)%donorProc
-             tmpBuf(mm+2) = entityHalo(jj)%donorBlock
-             tmpBuf(mm+3) = entityHalo(jj)%dI
-             tmpBuf(mm+4) = entityHalo(jj)%dJ
-             tmpBuf(mm+5) = entityHalo(jj)%dK
+        integer(kind=intType), dimension(:), intent(in) :: buffer
 
-             nPeriodic = entityHalo(jj)%nPeriodicSubfaces
-             do kk=1,nPeriodic
-                tmpBuf(mm+5+kk) = entityHalo(jj)%periodicSubfaces(kk)
-             enddo
+        type(haloListType), dimension(:), intent(inout) :: entityHalo
+        type(indexListType), dimension(:), intent(inout) :: entityIndex
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-          endif
+        integer(kind=intType) :: i, j, k
+        integer(kind=intType) :: ii, nn, blockID, iii
+        integer(kind=intType) :: nPeriodic
 
-       endif
+        ! Store the start index (-1) for this processor in indHalo in nn.
 
-       ! Fill the remaining part reserved for the periodic subfaces
-       ! with 0's. A 0 indicates that no periodic subface is crossed.
+        nn = nHaloPerLev(level - 1) + nHaloPerProc(proc)
 
-       do kk=(nPeriodic+1),level
-          tmpBuf(mm+5+kk) = 0
-       enddo
+        ! Loop over the number of halo's stored in the buffer.
 
-       ! Update nn and mm for the next halo.
+        do ii = 1, bufSize
 
-       nn = nn + 7
-       mm = mm + nItemReturn
+            iii = (ii - 1) * nItemReturn
 
-    enddo
+            ! Update the counters iihalo and nn.
 
-    ! Copy the data from tmpBuf into recvBuf and delete tmpBuf
-    ! afterwards.
+            iihalo = iihalo + 1
+            nn = nn + 1
 
-    nn = nItemReturn*nHalos
-    do i=1,nn
-       recvBuf(i) = tmpBuf(i)
-    enddo
+            ! Store the i,j,k indices and the block ID of the current
+            ! halo a bit easier.
+
+            blockID = indHalo(nn)%myBlock
+            i = indHalo(nn)%myI
+            j = indHalo(nn)%myJ
+            k = indHalo(nn)%myK
+
+            ! Store the entry of entityHalo in the i,j,k indices
+            ! of in entityIndex.
+
+            entityIndex(blockID)%entryList(i, j, k) = iihalo
 
-    deallocate(tmpBuf, stat=ierr)
-    if(ierr /= 0)                            &
-         call terminate("findDonorsRecvBuffer", &
-         "Deallocation failure for tmpBuf")
+            ! Store the info of the current halo in entityHalo.
+
+            entityHalo(iihalo)%myBlock = blockID
+
+            entityHalo(iihalo)%myI = i
+            entityHalo(iihalo)%myJ = j
+            entityHalo(iihalo)%myK = k
 
-  end subroutine findDonorsRecvBuffer
+            entityHalo(iihalo)%donorProc = buffer(iii + 1)
+            entityHalo(iihalo)%donorBlock = buffer(iii + 2)
+
+            entityHalo(iihalo)%dI = buffer(iii + 3)
+            entityHalo(iihalo)%dJ = buffer(iii + 4)
+            entityHalo(iihalo)%dK = buffer(iii + 5)
 
-  !=================================================================
+            ! Determine the number of periodic subfaces in the buffer.
 
-  subroutine storeHalosInList(buffer, bufSize, proc, level, &
-       nItemReturn, entityHalo,      &
-       entityIndex, iihalo)
-    !
-    !       storeHalosInList stores the halo info present in buf, which
-    !       has been retreived from the given processor, in the correct
-    !       place in entityHalo and entityIndex.
-    !
-    use haloList
-    use indirectHalo
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)    :: bufSize, proc
-    integer(kind=intType), intent(in)    :: level, nItemReturn
-    integer(kind=intType), intent(inout) :: iihalo
+            nPeriodic = 0
+            do i = 6, nItemReturn
+                if (buffer(iii + i) > 0) nPeriodic = nPeriodic + 1
+            end do
 
-    integer(kind=intType), dimension(:), intent(in) :: buffer
+            ! Check if the corresponding direct halo borders a periodic
+            ! subface and update nPeriodic accordingly.
 
-    type(haloListType),  dimension(:), intent(inout) :: entityHalo
-    type(indexListType), dimension(:), intent(inout) :: entityIndex
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+            j = indHalo(nn)%myDirectHalo
+            nPeriodic = nPeriodic + entityHalo(j)%nPeriodicSubfaces
 
-    integer(kind=intType) :: i, j, k
-    integer(kind=intType) :: ii, nn, blockID, iii
-    integer(kind=intType) :: nPeriodic
+            ! If periodic subfaces are present for this halo, allocate
+            ! the memory for periodicSubfaces and copy the data from
+            ! both the buffer and the direct halo.
 
-    ! Store the start index (-1) for this processor in indHalo in nn.
+            if (nPeriodic > 0) then
+                entityHalo(iihalo)%nPeriodicSubfaces = nPeriodic
+                allocate (entityHalo(iihalo)%periodicSubfaces(nPeriodic), &
+                          stat=ierr)
+                if (ierr /= 0) &
+                     call terminate("storeHalosInList", &
+                     "Memory allocation failure for &
+                     &periodicSubfaces")
+                nPeriodic = 0
+                do i = 6, nItemReturn
+                    if (buffer(iii + i) > 0) then
+                        nPeriodic = nPeriodic + 1
+                        entityHalo(iihalo)%periodicSubfaces(nPeriodic) = &
+                            buffer(iii + i)
+                    end if
+                end do
 
-    nn = nHaloPerLev(level-1) + nHaloPerProc(proc)
+                do i = 1, entityHalo(j)%nPeriodicSubfaces
+                    nPeriodic = nPeriodic + 1
+                    entityHalo(iihalo)%periodicSubfaces(nPeriodic) = &
+                        entityHalo(j)%periodicSubfaces(i)
+                end do
+            end if
 
-    ! Loop over the number of halo's stored in the buffer.
+        end do
 
-    do ii=1,bufSize
-
-       iii = (ii-1)*nItemReturn
-
-       ! Update the counters iihalo and nn.
-
-       iihalo = iihalo +1
-       nn     = nn +1
-
-       ! Store the i,j,k indices and the block ID of the current
-       ! halo a bit easier.
-
-       blockID = indHalo(nn)%myBlock
-       i       = indHalo(nn)%myI
-       j       = indHalo(nn)%myJ
-       k       = indHalo(nn)%myK
+        ! Check in debug mode if the buffer size was correct.
 
-       ! Store the entry of entityHalo in the i,j,k indices
-       ! of in entityIndex.
+        if (debug) then
+            if (nn /= nHaloPerLev(level - 1) + nHaloPerProc(proc + 1)) &
+                call terminate("storeHalosInList", &
+                               "Something wrong with buffer size")
+        end if
 
-       entityIndex(blockID)%entryList(i,j,k) = iihalo
+    end subroutine storeHalosInList
 
-       ! Store the info of the current halo in entityHalo.
+    subroutine qsortPeriodicSubfacesHaloType(arr, nn)
+        !
+        !       qsortPeriodicSubfacesHaloType sorts the given number of halo's
+        !       with periodic subfaces in increasing order based on the
+        !       <= operator for this derived data type.
+        !
+        use periodicInfo
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
 
-       entityHalo(iihalo)%myBlock = blockID
+        type(periodicSubfacesHaloType), dimension(*), intent(inout) :: arr
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), parameter :: m = 7
 
-       entityHalo(iihalo)%myI = i
-       entityHalo(iihalo)%myJ = j
-       entityHalo(iihalo)%myK = k
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
 
-       entityHalo(iihalo)%donorProc  = buffer(iii+1)
-       entityHalo(iihalo)%donorBlock = buffer(iii+2)
+        integer :: ierr
 
-       entityHalo(iihalo)%dI = buffer(iii+3)
-       entityHalo(iihalo)%dJ = buffer(iii+4)
-       entityHalo(iihalo)%dK = buffer(iii+5)
+        type(periodicSubfacesHaloType) :: a, tmp
 
-       ! Determine the number of periodic subfaces in the buffer.
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-       nPeriodic = 0
-       do i=6,nItemReturn
-          if(buffer(iii+i) > 0) nPeriodic = nPeriodic + 1
-       enddo
+        ! Allocate the memory for stack.
 
-       ! Check if the corresponding direct halo borders a periodic
-       ! subface and update nPeriodic accordingly.
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortPeriodicSubfacesHaloType", &
+                           "Memory allocation failure for stack")
 
-       j = indHalo(nn)%myDirectHalo
-       nPeriodic = nPeriodic + entityHalo(j)%nPeriodicSubfaces
+        ! Initialize the variables that control the sorting.
 
-       ! If periodic subfaces are present for this halo, allocate
-       ! the memory for periodicSubfaces and copy the data from
-       ! both the buffer and the direct halo.
+        jStack = 0
+        l = 1
+        r = nn
 
-       if(nPeriodic > 0) then
-          entityHalo(iihalo)%nPeriodicSubfaces = nPeriodic
-          allocate(entityHalo(iihalo)%periodicSubfaces(nPeriodic), &
-               stat=ierr)
-          if(ierr /= 0)                        &
-               call terminate("storeHalosInList", &
-               "Memory allocation failure for &
-               &periodicSubfaces")
-          nPeriodic = 0
-          do i=6,nItemReturn
-             if(buffer(iii+i) > 0) then
-                nPeriodic = nPeriodic + 1
-                entityHalo(iihalo)%periodicSubfaces(nPeriodic) = &
-                     buffer(iii+i)
-             endif
-          enddo
+        ! Start of the algorithm
 
-          do i=1,entityHalo(j)%nPeriodicSubfaces
-             nPeriodic = nPeriodic + 1
-             entityHalo(iihalo)%periodicSubfaces(nPeriodic) = &
-                  entityHalo(j)%periodicSubfaces(i)
-          enddo
-       endif
+        do
 
-    enddo
+            ! Check for the size of the subarray.
 
-    ! Check in debug mode if the buffer size was correct.
+            if ((r - l) < m) then
 
-    if( debug ) then
-       if(nn /= nHaloPerLev(level-1) + nHaloPerProc(proc+1)) &
-            call terminate("storeHalosInList",                  &
-            "Something wrong with buffer size")
-    endif
+                ! Perform insertion sort
 
-  end subroutine storeHalosInList
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-  subroutine qsortPeriodicSubfacesHaloType(arr, nn)
-    !
-    !       qsortPeriodicSubfacesHaloType sorts the given number of halo's
-    !       with periodic subfaces in increasing order based on the
-    !       <= operator for this derived data type.
-    !
-    use periodicInfo
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
+
+                if (jStack == 0) exit
 
-    type(periodicSubfacesHaloType), dimension(*), intent(inout) :: arr
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), parameter :: m = 7
+                ! Pop stack and begin a new round of partitioning.
 
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-    integer :: ierr
+            else
 
-    type(periodicSubfacesHaloType) :: a, tmp
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a.
+                ! Also rearrange so that (l) <= (l+1) <= (r).
 
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
+                k = (l + r) / 2
+                tmp = arr(k)             ! Swap the elements
+                arr(k) = arr(l + 1)           ! k and l+1.
+                arr(l + 1) = tmp
 
-    ! Allocate the memory for stack.
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                                     &
-         call terminate("qsortPeriodicSubfacesHaloType", &
-         "Memory allocation failure for stack")
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-    ! Initialize the variables that control the sorting.
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-    jStack = 0
-    l      = 1
-    r      = nn
+                ! Initialize the pointers for partitioning.
 
-    ! Start of the algorithm
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-    do
+                ! The innermost loop
 
-       ! Check for the size of the subarray.
+                do
 
-       if((r-l) < m) then
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-          ! Perform insertion sort
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+                    ! Exit the loop in case the pointers i and j crossed.
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
-
-          if(jStack == 0) exit
+                    if (j < i) exit
 
-          ! Pop stack and begin a new round of partitioning.
+                    ! Swap the element i and j.
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-       else
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a.
-          ! Also rearrange so that (l) <= (l+1) <= (r).
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-          k = (l+r)/2
-          tmp      = arr(k)             ! Swap the elements
-          arr(k)   = arr(l+1)           ! k and l+1.
-          arr(l+1) = tmp
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+                jStack = jStack + 2
+                if (jStack > nStack) then
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+                    ! Storage of the stack is too small. Reallocate.
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortPeriodicSubfacesHaloType", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
 
-          ! Initialize the pointers for partitioning.
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortPeriodicSubfacesHaloType", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
 
-          ! The innermost loop
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
 
-          do
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortPeriodicSubfacesHaloType", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+                    ! And finally release the memory of tmpStack.
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortPeriodicSubfacesHaloType", &
+                                       "Deallocation error for tmpstack")
+                end if
 
-             ! Exit the loop in case the pointers i and j crossed.
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
+
+            end if
+        end do
+
+        ! Release the memory of stack.
+
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortPeriodicSubfacesHaloType", &
+                           "Deallocation error for stack")
+
+        ! Check in debug mode whether the array is really sorted.
+
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortPeriodicSubfacesHaloType", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
+
+    end subroutine qsortPeriodicSubfacesHaloType
+
+    subroutine determinePeriodicFaces
+        !
+        !       determinePeriodicFaces determines and stores the number of
+        !       periodic faces present in the complete mesh. The sequence of
+        !       storing the data is such that the array periodicGlobal is
+        !       sorted with the definition of the < operator for this datatype.
+        !
+        use cgnsGrid
+        use periodicInfo
+        use utils, only: terminate
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer(kind=intType) :: nn, ii, i
+
+        ! Determine the number of periodic faces present in the cgns grid.
+
+        nPeriodicGlobal = 0
+        do nn = 1, cgnsNDom
+            do i = 1, cgnsDoms(nn)%n1to1
+                if (cgnsDoms(nn)%conn1to1(i)%periodic) &
+                    nPeriodicGlobal = nPeriodicGlobal + 1
+            end do
+        end do
+
+        ! Allocate the memory for periodicGlobal.
+
+        allocate (periodicGlobal(nPeriodicGlobal), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("determinePeriodicFaces", &
+                           "Memory allocation failure for periodicGlobal")
+
+        ! Repeat the loop over the faces of the cgns grid and store the
+        ! periodic faces.
+
+        ii = 0
+        do nn = 1, cgnsNDom
+            do i = 1, cgnsDoms(nn)%n1to1
+                if (cgnsDoms(nn)%conn1to1(i)%periodic) then
+
+                    ii = ii + 1
+                    periodicGlobal(ii)%cgnsBlock = nn
+                    periodicGlobal(ii)%cgnsSubface = i
+
+                end if
+            end do
+        end do
+
+    end subroutine determinePeriodicFaces
+
+    function bsearchCGNSPeriodicType(key, base)
+        !
+        !       bsearchCGNSPeriodicType returns the index in base where key
+        !       is stored. A binary search algorithm is used here, so it is
+        !       assumed that base is sorted in increasing order. In case key
+        !       appears more than once in base, the result is arbitrary.
+        !       If key is not found, a zero is returned.
+        !
+        use periodicInfo
+        implicit none
+        !
+        !      Function type
+        !
+        integer(kind=intType) :: bsearchCGNSPeriodicType
+        !
+        !      Function arguments.
+        !
+        type(cgnsPeriodicType), intent(in) :: key
+        type(cgnsPeriodicType), dimension(:), intent(in) :: base
+        integer(kind=intType) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: ii, pos, start
+        logical :: entryFound
+
+        ! Initialize some values.
+
+        start = 1
+        ii = size(base)
+        entryFound = .false.
+
+        ! Binary search to find key.
+
+        do
+            ! Condition for breaking the loop
+
+            if (ii == 0) exit
+
+            ! Determine the position in the array to compare.
+
+            pos = start + ii / 2
+
+            ! In case this is the entry, break the search loop.
+
+            if (base(pos) == key) then
+                entryFound = .true.
+                exit
+            end if
+
+            ! In case the search key is larger than the current position,
+            ! only parts to the right must be searched. Remember that base
+            ! is sorted in increasing order. Nothing needs to be done if the
+            ! key is smaller than the current element.
+
+            if (base(pos) < key) then
+                start = pos + 1
+                ii = ii - 1
+            end if
+
+            ! Modify ii for the next branch to search.
+
+            ii = ii / 2
+        end do
+
+        ! Set bsearchCGNSPeriodicType.
+        ! This depends whether the key was found.
+
+        if (entryFound) then
+            bsearchCGNSPeriodicType = pos
+        else
+            bsearchCGNSPeriodicType = 0
+        end if
+
+    end function bsearchCGNSPeriodicType
+    subroutine qsortIndHaloType(arr, nn)
+        !
+        !       qsortIndHaloType sorts the given number of indirect halo's
+        !       in increasing order based on the <= operator for this derived
+        !       data type.
+        !
+        use indirectHalo
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
 
-             if(j < i) exit
+        type(indirectHaloType), dimension(*), intent(inout) :: arr
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), parameter :: m = 7
+
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
+
+        integer :: ierr
+
+        type(indirectHaloType) :: a, tmp
+
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-             ! Swap the element i and j.
-
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+        ! Allocate the memory for stack.
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
-
-          arr(l+1) = arr(j)
-          arr(j)   = a
-
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
-
-          jStack = jStack + 2
-          if(jStack > nStack) then
-
-             ! Storage of the stack is too small. Reallocate.
-
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                                     &
-                  call terminate("qsortPeriodicSubfacesHaloType", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
-
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
-
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                                     &
-                  call terminate("qsortPeriodicSubfacesHaloType", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
-
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
-
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                                     &
-                  call terminate("qsortPeriodicSubfacesHaloType", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
-
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                                     &
-                  call terminate("qsortPeriodicSubfacesHaloType", &
-                  "Deallocation error for tmpstack")
-          endif
-
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
-
-       endif
-    enddo
-
-    ! Release the memory of stack.
-
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                                     &
-         call terminate("qsortPeriodicSubfacesHaloType", &
-         "Deallocation error for stack")
-
-    ! Check in debug mode whether the array is really sorted.
-
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                             &
-               call terminate("qsortPeriodicSubfacesHaloType", &
-               "Array is not sorted correctly")
-       enddo
-    endif
-
-  end subroutine qsortPeriodicSubfacesHaloType
-
-  subroutine determinePeriodicFaces
-    !
-    !       determinePeriodicFaces determines and stores the number of
-    !       periodic faces present in the complete mesh. The sequence of
-    !       storing the data is such that the array periodicGlobal is
-    !       sorted with the definition of the < operator for this datatype.
-    !
-    use cgnsGrid
-    use periodicInfo
-    use utils, only : terminate
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    integer(kind=intType) :: nn, ii, i
-
-    ! Determine the number of periodic faces present in the cgns grid.
-
-    nPeriodicGlobal = 0
-    do nn=1,cgnsNDom
-       do i=1,cgnsDoms(nn)%n1to1
-          if( cgnsDoms(nn)%conn1to1(i)%periodic ) &
-               nPeriodicGlobal = nPeriodicGlobal + 1
-       enddo
-    enddo
-
-    ! Allocate the memory for periodicGlobal.
-
-    allocate(periodicGlobal(nPeriodicGlobal), stat=ierr)
-    if(ierr /= 0)                              &
-         call terminate("determinePeriodicFaces", &
-         "Memory allocation failure for periodicGlobal")
-
-    ! Repeat the loop over the faces of the cgns grid and store the
-    ! periodic faces.
-
-    ii = 0
-    do nn=1,cgnsNDom
-       do i=1,cgnsDoms(nn)%n1to1
-          if( cgnsDoms(nn)%conn1to1(i)%periodic ) then
-
-             ii = ii + 1
-             periodicGlobal(ii)%cgnsBlock   = nn
-             periodicGlobal(ii)%cgnsSubface = i
-
-          endif
-       enddo
-    enddo
-
-  end subroutine determinePeriodicFaces
-
-  function bsearchCGNSPeriodicType(key, base)
-    !
-    !       bsearchCGNSPeriodicType returns the index in base where key
-    !       is stored. A binary search algorithm is used here, so it is
-    !       assumed that base is sorted in increasing order. In case key
-    !       appears more than once in base, the result is arbitrary.
-    !       If key is not found, a zero is returned.
-    !
-    use periodicInfo
-    implicit none
-    !
-    !      Function type
-    !
-    integer(kind=intType) :: bsearchCGNSPeriodicType
-    !
-    !      Function arguments.
-    !
-    type(cgnsPeriodicType), intent(in)               :: key
-    type(cgnsPeriodicType), dimension(:), intent(in) :: base
-    integer(kind=intType)                            :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: ii, pos, start
-    logical               :: entryFound
-
-    ! Initialize some values.
-
-    start       = 1
-    ii          = size(base)
-    entryFound = .false.
-
-    ! Binary search to find key.
-
-    do
-       ! Condition for breaking the loop
-
-       if(ii == 0) exit
-
-       ! Determine the position in the array to compare.
-
-       pos = start + ii/2
-
-       ! In case this is the entry, break the search loop.
-
-       if(base(pos) == key) then
-          entryFound = .true.
-          exit
-       endif
-
-       ! In case the search key is larger than the current position,
-       ! only parts to the right must be searched. Remember that base
-       ! is sorted in increasing order. Nothing needs to be done if the
-       ! key is smaller than the current element.
-
-       if(base(pos) < key) then
-          start = pos +1
-          ii    = ii -1
-       endif
-
-       ! Modify ii for the next branch to search.
-
-       ii = ii/2
-    enddo
-
-    ! Set bsearchCGNSPeriodicType.
-    ! This depends whether the key was found.
-
-    if( entryFound ) then
-       bsearchCGNSPeriodicType = pos
-    else
-       bsearchCGNSPeriodicType = 0
-    endif
-
-  end function bsearchCGNSPeriodicType
-  subroutine qsortIndHaloType(arr, nn)
-    !
-    !       qsortIndHaloType sorts the given number of indirect halo's
-    !       in increasing order based on the <= operator for this derived
-    !       data type.
-    !
-    use indirectHalo
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortIndHaloType", &
+                           "Memory allocation failure for stack")
 
-    type(indirectHaloType), dimension(*), intent(inout) :: arr
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), parameter :: m = 7
-
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
-
-    integer :: ierr
-
-    type(indirectHaloType) :: a, tmp
-
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
+        ! Initialize the variables that control the sorting.
 
-    ! Allocate the memory for stack.
+        jStack = 0
+        l = 1
+        r = nn
 
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                        &
-         call terminate("qsortIndHaloType", &
-         "Memory allocation failure for stack")
+        ! Start of the algorithm
 
-    ! Initialize the variables that control the sorting.
+        do
 
-    jStack = 0
-    l      = 1
-    r      = nn
+            ! Check for the size of the subarray.
 
-    ! Start of the algorithm
+            if ((r - l) < m) then
 
-    do
+                ! Perform insertion sort
 
-       ! Check for the size of the subarray.
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-       if((r-l) < m) then
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-          ! Perform insertion sort
+                if (jStack == 0) exit
 
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+                ! Pop stack and begin a new round of partitioning.
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-          if(jStack == 0) exit
+            else
 
-          ! Pop stack and begin a new round of partitioning.
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a.
+                ! Also rearrange so that (l) <= (l+1) <= (r).
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+                k = (l + r) / 2
+                tmp = arr(k)             ! Swap the elements
+                arr(k) = arr(l + 1)           ! k and l+1.
+                arr(l + 1) = tmp
 
-       else
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a.
-          ! Also rearrange so that (l) <= (l+1) <= (r).
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-          k = (l+r)/2
-          tmp      = arr(k)             ! Swap the elements
-          arr(k)   = arr(l+1)           ! k and l+1.
-          arr(l+1) = tmp
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+                ! Initialize the pointers for partitioning.
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+                ! The innermost loop
 
-          ! Initialize the pointers for partitioning.
+                do
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-          ! The innermost loop
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-          do
+                    ! Exit the loop in case the pointers i and j crossed.
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+                    if (j < i) exit
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+                    ! Swap the element i and j.
 
-             ! Exit the loop in case the pointers i and j crossed.
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-             if(j < i) exit
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-             ! Swap the element i and j.
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+                jStack = jStack + 2
+                if (jStack > nStack) then
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+                    ! Storage of the stack is too small. Reallocate.
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortIndHaloType", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-             ! Storage of the stack is too small. Reallocate.
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortIndHaloType", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
 
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                        &
-                  call terminate("qsortIndHaloType", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
 
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortIndHaloType", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
 
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                        &
-                  call terminate("qsortIndHaloType", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
+                    ! And finally release the memory of tmpStack.
 
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortIndHaloType", &
+                                       "Deallocation error for tmpstack")
+                end if
 
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                        &
-                  call terminate("qsortIndHaloType", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
-
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                        &
-                  call terminate("qsortIndHaloType", &
-                  "Deallocation error for tmpstack")
-          endif
-
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
 
-       endif
-    enddo
+            end if
+        end do
 
-    ! Release the memory of stack.
+        ! Release the memory of stack.
 
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                        &
-         call terminate("qsortIndHaloType", &
-         "Deallocation error for stack")
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortIndHaloType", &
+                           "Deallocation error for stack")
 
-    ! Check in debug mode whether the array is really sorted.
+        ! Check in debug mode whether the array is really sorted.
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                &
-               call terminate("qsortIndHaloType", &
-               "Array is not sorted correctly")
-       enddo
-    endif
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortIndHaloType", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
 
-  end subroutine qsortIndHaloType
+    end subroutine qsortIndHaloType
 
 end module pointMatchedCommPattern
diff --git a/src/preprocessing/preprocessingAPI.F90 b/src/preprocessing/preprocessingAPI.F90
index e583d0bc2..4d57131a8 100644
--- a/src/preprocessing/preprocessingAPI.F90
+++ b/src/preprocessing/preprocessingAPI.F90
@@ -3444,8 +3444,7 @@ subroutine writeNegVolumes(checkVolDoms)
                                 print "(a)", "#"
                                 print stringSpace, "# Block", trim(cgnsDoms(nbkGlobal)%zoneName), &
                                     "contains the following negative volumes"
-                                print "(a)", "#================================&
-                                &===================================="
+                                print "(a)", "#--------------------------------------------------------------------"
                                 print "(a)", "#"
 
                                 !====================================================
@@ -3458,8 +3457,7 @@ subroutine writeNegVolumes(checkVolDoms)
                                 print "(a)", "#"
                                 print stringSpace, "# Spectral solution", trim(intString1), "block", &
                                     trim(cgnsDoms(nbkGlobal)%zoneName), "contains the following negative volumes"
-                                print "(a)", "#===================================&
-                                &================================="
+                                print "(a)", "#--------------------------------------------------------------------"
                                 print "(a)", "#"
 
                             end select
diff --git a/src/preprocessing/preprocessingModules.F90 b/src/preprocessing/preprocessingModules.F90
index dd6eb2761..4269176ba 100644
--- a/src/preprocessing/preprocessingModules.F90
+++ b/src/preprocessing/preprocessingModules.F90
@@ -1,1308 +1,1308 @@
 module indirectHalo
-  !
-  !       This local module contains the derived data type used to
-  !       determine the indirect halo's as well as an array of this type.
-  !
-  use precision
-  implicit none
-  save
-
-  public
-  private :: lessEqualIndirectHaloType
-  private :: lessIndirectHaloType
-  !
-  !       The definition of the derived data type indirectHaloType.
-  !
-  type indirectHaloType
-
-     ! myBlock      : Local block ID of the halo.
-     ! myI, myJ, myK: i,j,k indices of the halo.
-     ! myDirectHalo : Index in the haloListType where the
-     !                corresponding direct halo is stored.
-     ! levOfInd     : Level of indirectness.
-     ! donorProc    : Processor where donor of the direct halo is
-     !                stored. In case this halo is a boundary
-     !                halo, donorProc is set to -1.
-
-     integer(kind=intType) :: myBlock
-     integer(kind=intType) :: myI, myJ, myK
-     integer(kind=intType) :: myDirectHalo
-     integer(kind=intType) :: levOfInd
-     integer(kind=intType) :: donorProc
-
-  end type indirectHaloType
-
-  ! Interface for the extension of the operators <= and <.
-  ! These are needed for the sorting of indirectHaloType.
-
-  interface operator(<=)
-     module procedure lessEqualIndirectHaloType
-  end interface operator(<=)
-
-  interface operator(<)
-     module procedure lessIndirectHaloType
-  end interface operator(<)
-
-  ! nIndHalo         : Number of indirect halo's to be treated.
-  ! indHalo(nIndHalo): The indirect halo's.
-
-  integer(kind=intType) :: nIndHalo
-  type(indirectHaloType), dimension(:), allocatable :: indHalo
-
-  ! nLevOfInd               : Number of levels of indirectness
-  ! nHaloPerLev(0:nLevOfInd): Number of indirect halo's per level
-  !                           of indirectness; stored in
-  !                           cumulative storage format.
-  ! nHaloPerProc(0:nProc)   : Number of indirect halo's per
-  !                           processor for a given level of
-  !                           indirectness; cumulative storage.
-  !                           nHaloPerProc(0) is not 0,
-  !                           because of the presence of boundary
-  !                           halo's, which get proc ID -1.
-
-  integer(kind=intType) :: nLevOfInd
-  integer(kind=intType), dimension(:), allocatable :: nHaloPerLev
-  integer(kind=intType), dimension(:), allocatable :: nHaloPerProc
-
-contains
-  !
-  !         Functions to simulate the operators <= and <.
-  !
-  logical function lessEqualIndirectHaloType(g1, g2)
     !
-    !         This function returns .true. if g1 <= g2 and .false.
-    !         otherwise. The comparison is firstly based on the level of
-    !         indirectness followed by the donor processor, the
-    !         corresponding direct halo, my block ID and finally the i, j
-    !         and k indices.
+    !       This local module contains the derived data type used to
+    !       determine the indirect halo's as well as an array of this type.
     !
+    use precision
     implicit none
+    save
+
+    public
+    private :: lessEqualIndirectHaloType
+    private :: lessIndirectHaloType
     !
-    !        Function arguments.
-    !
-    type(indirectHaloType), intent(in) :: g1, g2
-
-    ! Compare the level of indirectness. If not equal, set
-    ! lessEqual appropriately and return.
-
-    if(g1%levOfInd < g2%levOfInd) then
-       lessEqualIndirectHaloType = .true.
-       return
-    else if(g1%levOfInd > g2%levOfInd) then
-       lessEqualIndirectHaloType = .false.
-       return
-    endif
-
-    ! Compare the donor processors.
-
-    if(g1%donorProc < g2%donorProc) then
-       lessEqualIndirectHaloType = .true.
-       return
-    else if(g1%donorProc > g2%donorProc) then
-       lessEqualIndirectHaloType = .false.
-       return
-    endif
-
-    ! Compare the direct halo.
-
-    if(g1%myDirectHalo < g2%myDirectHalo) then
-       lessEqualIndirectHaloType = .true.
-       return
-    else if(g1%myDirectHalo > g2%myDirectHalo) then
-       lessEqualIndirectHaloType = .false.
-       return
-    endif
-
-    ! Compare my block ID.
-
-    if(g1%myBlock < g2%myBlock) then
-       lessEqualIndirectHaloType = .true.
-       return
-    else if(g1%myBlock > g2%myBlock) then
-       lessEqualIndirectHaloType = .false.
-       return
-    endif
-
-    ! Finally compare the halo indices. Start with k.
-
-    if(g1%myK < g2%myK) then
-       lessEqualIndirectHaloType = .true.
-       return
-    else if(g1%myK > g2%myK) then
-       lessEqualIndirectHaloType = .false.
-       return
-    endif
-
-    ! The j index.
-
-    if(g1%myJ < g2%myJ) then
-       lessEqualIndirectHaloType = .true.
-       return
-    else if(g1%myJ > g2%myJ) then
-       lessEqualIndirectHaloType = .false.
-       return
-    endif
-
-    ! The i index.
-
-    if(g1%myI < g2%myI) then
-       lessEqualIndirectHaloType = .true.
-       return
-    else if(g1%myI > g2%myI) then
-       lessEqualIndirectHaloType = .false.
-       return
-    endif
-
-    ! Both entities are identical. So set lessEqual to .true.
-
-    lessEqualIndirectHaloType = .true.
-
-  end function lessEqualIndirectHaloType
-
-  !        ================================================================
-
-  logical function lessIndirectHaloType(g1, g2)
-    !
-    !         This function returns .true. If g1 < g2 and .false.
-    !         otherwise. It is basically the same as the lessEqual
-    !         function, except that the equality is now considered as
-    !         .false.
+    !       The definition of the derived data type indirectHaloType.
     !
-    implicit none
+    type indirectHaloType
+
+        ! myBlock      : Local block ID of the halo.
+        ! myI, myJ, myK: i,j,k indices of the halo.
+        ! myDirectHalo : Index in the haloListType where the
+        !                corresponding direct halo is stored.
+        ! levOfInd     : Level of indirectness.
+        ! donorProc    : Processor where donor of the direct halo is
+        !                stored. In case this halo is a boundary
+        !                halo, donorProc is set to -1.
+
+        integer(kind=intType) :: myBlock
+        integer(kind=intType) :: myI, myJ, myK
+        integer(kind=intType) :: myDirectHalo
+        integer(kind=intType) :: levOfInd
+        integer(kind=intType) :: donorProc
+
+    end type indirectHaloType
+
+    ! Interface for the extension of the operators <= and <.
+    ! These are needed for the sorting of indirectHaloType.
+
+    interface operator(<=)
+        module procedure lessEqualIndirectHaloType
+    end interface operator(<=)
+
+    interface operator(<)
+        module procedure lessIndirectHaloType
+    end interface operator(<)
+
+    ! nIndHalo         : Number of indirect halo's to be treated.
+    ! indHalo(nIndHalo): The indirect halo's.
+
+    integer(kind=intType) :: nIndHalo
+    type(indirectHaloType), dimension(:), allocatable :: indHalo
+
+    ! nLevOfInd               : Number of levels of indirectness
+    ! nHaloPerLev(0:nLevOfInd): Number of indirect halo's per level
+    !                           of indirectness; stored in
+    !                           cumulative storage format.
+    ! nHaloPerProc(0:nProc)   : Number of indirect halo's per
+    !                           processor for a given level of
+    !                           indirectness; cumulative storage.
+    !                           nHaloPerProc(0) is not 0,
+    !                           because of the presence of boundary
+    !                           halo's, which get proc ID -1.
+
+    integer(kind=intType) :: nLevOfInd
+    integer(kind=intType), dimension(:), allocatable :: nHaloPerLev
+    integer(kind=intType), dimension(:), allocatable :: nHaloPerProc
+
+contains
     !
-    !        Function arguments.
+    !         Functions to simulate the operators <= and <.
     !
-    type(indirectHaloType), intent(in) :: g1, g2
-
-    ! Compare the level of indirectness. If not equal, set
-    ! lessIndirectHaloType appropriately and return.
-
-    if(g1%levOfInd < g2%levOfInd) then
-       lessIndirectHaloType = .true.
-       return
-    else if(g1%levOfInd > g2%levOfInd) then
-       lessIndirectHaloType = .false.
-       return
-    endif
-
-    ! Compare the donor processors.
-
-    if(g1%donorProc < g2%donorProc) then
-       lessIndirectHaloType = .true.
-       return
-    else if(g1%donorProc > g2%donorProc) then
-       lessIndirectHaloType = .false.
-       return
-    endif
-
-    ! Compare the direct halo.
-
-    if(g1%myDirectHalo < g2%myDirectHalo) then
-       lessIndirectHaloType = .true.
-       return
-    else if(g1%myDirectHalo > g2%myDirectHalo) then
-       lessIndirectHaloType = .false.
-       return
-    endif
-
-    ! Compare my block id.
-
-    if(g1%myBlock < g2%myBlock) then
-       lessIndirectHaloType = .true.
-       return
-    else if(g1%myBlock > g2%myBlock) then
-       lessIndirectHaloType = .false.
-       return
-    endif
-
-    ! Finally compare the halo indices. Start with k.
-
-    if(g1%myK < g2%myK) then
-       lessIndirectHaloType = .true.
-       return
-    else if(g1%myK > g2%myK) then
-       lessIndirectHaloType = .false.
-       return
-    endif
-
-    ! The j index.
-
-    if(g1%myJ < g2%myJ) then
-       lessIndirectHaloType = .true.
-       return
-    else if(g1%myJ > g2%myJ) then
-       lessIndirectHaloType = .false.
-       return
-    endif
-
-    ! The i index.
-
-    if(g1%myI < g2%myI) then
-       lessIndirectHaloType = .true.
-       return
-    else if(g1%myI > g2%myI) then
-       lessIndirectHaloType = .false.
-       return
-    endif
-
-    ! Both entities are identical.
-    ! So set lessIndirectHaloType to .false.
-
-    lessIndirectHaloType = .false.
-
-  end function lessIndirectHaloType
+    logical function lessEqualIndirectHaloType(g1, g2)
+        !
+        !         This function returns .true. if g1 <= g2 and .false.
+        !         otherwise. The comparison is firstly based on the level of
+        !         indirectness followed by the donor processor, the
+        !         corresponding direct halo, my block ID and finally the i, j
+        !         and k indices.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(indirectHaloType), intent(in) :: g1, g2
+
+        ! Compare the level of indirectness. If not equal, set
+        ! lessEqual appropriately and return.
+
+        if (g1%levOfInd < g2%levOfInd) then
+            lessEqualIndirectHaloType = .true.
+            return
+        else if (g1%levOfInd > g2%levOfInd) then
+            lessEqualIndirectHaloType = .false.
+            return
+        end if
+
+        ! Compare the donor processors.
+
+        if (g1%donorProc < g2%donorProc) then
+            lessEqualIndirectHaloType = .true.
+            return
+        else if (g1%donorProc > g2%donorProc) then
+            lessEqualIndirectHaloType = .false.
+            return
+        end if
+
+        ! Compare the direct halo.
+
+        if (g1%myDirectHalo < g2%myDirectHalo) then
+            lessEqualIndirectHaloType = .true.
+            return
+        else if (g1%myDirectHalo > g2%myDirectHalo) then
+            lessEqualIndirectHaloType = .false.
+            return
+        end if
+
+        ! Compare my block ID.
+
+        if (g1%myBlock < g2%myBlock) then
+            lessEqualIndirectHaloType = .true.
+            return
+        else if (g1%myBlock > g2%myBlock) then
+            lessEqualIndirectHaloType = .false.
+            return
+        end if
+
+        ! Finally compare the halo indices. Start with k.
+
+        if (g1%myK < g2%myK) then
+            lessEqualIndirectHaloType = .true.
+            return
+        else if (g1%myK > g2%myK) then
+            lessEqualIndirectHaloType = .false.
+            return
+        end if
+
+        ! The j index.
+
+        if (g1%myJ < g2%myJ) then
+            lessEqualIndirectHaloType = .true.
+            return
+        else if (g1%myJ > g2%myJ) then
+            lessEqualIndirectHaloType = .false.
+            return
+        end if
+
+        ! The i index.
+
+        if (g1%myI < g2%myI) then
+            lessEqualIndirectHaloType = .true.
+            return
+        else if (g1%myI > g2%myI) then
+            lessEqualIndirectHaloType = .false.
+            return
+        end if
+
+        ! Both entities are identical. So set lessEqual to .true.
+
+        lessEqualIndirectHaloType = .true.
+
+    end function lessEqualIndirectHaloType
+
+    !        ================================================================
+
+    logical function lessIndirectHaloType(g1, g2)
+        !
+        !         This function returns .true. If g1 < g2 and .false.
+        !         otherwise. It is basically the same as the lessEqual
+        !         function, except that the equality is now considered as
+        !         .false.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(indirectHaloType), intent(in) :: g1, g2
+
+        ! Compare the level of indirectness. If not equal, set
+        ! lessIndirectHaloType appropriately and return.
+
+        if (g1%levOfInd < g2%levOfInd) then
+            lessIndirectHaloType = .true.
+            return
+        else if (g1%levOfInd > g2%levOfInd) then
+            lessIndirectHaloType = .false.
+            return
+        end if
+
+        ! Compare the donor processors.
+
+        if (g1%donorProc < g2%donorProc) then
+            lessIndirectHaloType = .true.
+            return
+        else if (g1%donorProc > g2%donorProc) then
+            lessIndirectHaloType = .false.
+            return
+        end if
+
+        ! Compare the direct halo.
+
+        if (g1%myDirectHalo < g2%myDirectHalo) then
+            lessIndirectHaloType = .true.
+            return
+        else if (g1%myDirectHalo > g2%myDirectHalo) then
+            lessIndirectHaloType = .false.
+            return
+        end if
+
+        ! Compare my block id.
+
+        if (g1%myBlock < g2%myBlock) then
+            lessIndirectHaloType = .true.
+            return
+        else if (g1%myBlock > g2%myBlock) then
+            lessIndirectHaloType = .false.
+            return
+        end if
+
+        ! Finally compare the halo indices. Start with k.
+
+        if (g1%myK < g2%myK) then
+            lessIndirectHaloType = .true.
+            return
+        else if (g1%myK > g2%myK) then
+            lessIndirectHaloType = .false.
+            return
+        end if
+
+        ! The j index.
+
+        if (g1%myJ < g2%myJ) then
+            lessIndirectHaloType = .true.
+            return
+        else if (g1%myJ > g2%myJ) then
+            lessIndirectHaloType = .false.
+            return
+        end if
+
+        ! The i index.
+
+        if (g1%myI < g2%myI) then
+            lessIndirectHaloType = .true.
+            return
+        else if (g1%myI > g2%myI) then
+            lessIndirectHaloType = .false.
+            return
+        end if
+
+        ! Both entities are identical.
+        ! So set lessIndirectHaloType to .false.
+
+        lessIndirectHaloType = .false.
+
+    end function lessIndirectHaloType
 
 end module indirectHalo
 module haloList
-  !
-  !       This local module contains temporary variables to create the
-  !       list of halo cells and nodes.
-  !
-  use precision
-  implicit none
-  save
-
-  public
-  private :: lessEqualHaloListType
-  private :: lessHaloListType
-  !
-  !       The definition of the variables for the 3 lists.
-  !
-  type haloListType
-
-     ! myBlock:           local block ID of the halo.
-     ! myI, myJ, myK:     i,j,k indices of the halo.
-     ! donorProc :        processor where donor is stored. In case
-     !                    the halo is a boundary halo, donorProc
-     !                    is set to -1.
-     ! donorBlock:        block ID of the donor. In case the halo
-     !                    is a boundary halo donorBlock is set
-     !                    to the corresponding boundary condition.
-     ! dI, dJ, dK:        i,j,k indices of the donor.
-     ! levOfInd:          level of indirectness.
-     ! interp(..):        interpolants for the donor stencil; only
-     !                    allocated for lists requiring this info.
-     ! nPeriodicSubfaces: Number of periodic subfaces that are
-     !                    crossed when going from the halo to the
-     !                    donor. This is at most the level of
-     !                    indirectness of the halo.
-     ! periodicSubfaces:  The corresponding subfaces ID's according
-     !                    to the sequence defined in periodicGlobal.
-
-     integer(kind=intType) :: myBlock
-     integer(kind=intType) :: myI, myJ, myK
-     integer(kind=intType) :: donorProc, donorBlock
-     integer(kind=intType) :: dI, dJ, dK
-     integer(kind=intType) :: levOfInd
-     real(kind=realType), dimension(:), pointer :: interp
-
-     integer(kind=intType) :: nPeriodicSubfaces
-     integer(kind=intType), dimension(:), pointer :: periodicSubfaces
-
-  end type haloListType
-
-  ! Interface for the extension of the operators <= and <.
-  ! These are needed for the sorting of haloListType.
-
-  interface operator(<=)
-     module procedure lessEqualHaloListType
-  end interface operator(<=)
-
-  interface operator(<)
-     module procedure lessHaloListType
-  end interface operator(<)
-
-  ! nCellHalo1st: # of 1st level cell halo's
-  ! nCellHalo2nd: # of 2nd level cell halo's
-  ! nNodeHalo1st: # of 1st level node halo's
-
-  integer(kind=intType) :: nCellHalo1st, nCellHalo2nd
-  integer(kind=intType) :: nNodeHalo1st
-
-  ! iiCell1st: Counter variable for the 1st level cell halo's
-  ! iiCell2nd: Counter variable for the 2nd level cell halo's
-  ! iiNode1st: Counter variable for the 1st level node halo's
-
-  integer(kind=intType) :: iiCell1st, iiCell2nd, iiNode1st
-
-  ! cellHalo1st(nCellHalo1st) :: List of halo info for 1st
-  !                              level cell halo's.
-  ! cellHalo2nd(nCellHalo2nd) :: Idem for 2nd level cell halo's.
-  ! nodeHalo1st(nNodeHalo1st) :: Idem for 1st level node halo's.
-
-  type(haloListType), dimension(:), allocatable :: cellHalo1st
-  type(haloListType), dimension(:), allocatable :: cellHalo2nd
-  type(haloListType), dimension(:), allocatable :: nodeHalo1st
-
-  ! transformCell(nCellHalo1st,3) :: Short hand for the transformation
-  !                                  matrix between the halo and
-  !                                  the donor for cell based halo's.
-  !                                  In principle the size equals the
-  !                                  number of faces (i.e. direct)
-  !                                  halo's, but the difference is
-  !                                  not so large.
-  ! transformNode(nNodeHalo1st,3) :: Idem for the nodes.
-
-  integer(kind=intType), dimension(:,:), allocatable :: transformCell
-  integer(kind=intType), dimension(:,:), allocatable :: transformNode
-  !
-  !       The definition of the index variables, which store for each
-  !       i,j,k in the block the index in the corresponding list.
-  !       I know I'm wasting memory here (because only the halo's are
-  !       relevant), but that's not too much of a problem. The reason is
-  !       that neither the metrics nor the variables have been allocated
-  !       yet. So later on, much more memory is needed than the single
-  !       integer for each cell/node used here.
-  !
-  type indexListType
-
-     ! entryList(:,:,:)  :: Corresponding entry in the list.
-     !                      Dimensions are either 0:ie,0:je,0:ke for
-     !                      the node or 0:ib,0:jb,0:kb for the cell
-     !                      based halo's. The latter is then suited
-     !                      for the 2nd level halo's.
-
-     integer(kind=intType), dimension(:,:,:), pointer :: entryList
-
-  end type indexListType
-
-  ! nodeIndex(nDom) :: The node indices for every block.
-  ! cellIndex(nDom) :: Idem for the cells.
-
-  type(indexListType), allocatable, dimension(:) :: nodeIndex
-  type(indexListType), allocatable, dimension(:) :: cellIndex
-
-contains
-  !
-  !         Functions to simulate the operators <= and <.
-  !
-  logical function lessEqualHaloListType(g1, g2)
     !
-    !         lessEqual returns .true. if g1 <= g2 and .false. otherwise.
-    !         The comparison is firstly based on the processor ID of the
-    !         donor. After that it depends whether the halo is a boundary
-    !         halo or not. Note that boundary halo's have a donor processor
-    !         if of -1, such that they are always first in the list.
+    !       This local module contains temporary variables to create the
+    !       list of halo cells and nodes.
     !
+    use precision
     implicit none
+    save
+
+    public
+    private :: lessEqualHaloListType
+    private :: lessHaloListType
     !
-    !        Function arguments.
+    !       The definition of the variables for the 3 lists.
     !
-    type(haloListType), intent(in) :: g1, g2
-
-    ! Compare the donor processors first. If not equal,
-    ! set lessEqual appropriately and return.
-
-    if(g1%donorProc < g2%donorProc) then
-       lessEqualHaloListType = .true.
-       return
-    else if(g1%donorProc > g2%donorProc) then
-       lessEqualHaloListType = .false.
-       return
-    endif
-
-    ! Donor processors are identical. Now it depends whether we are
-    ! dealing with boundary halo's or not.
-
-    boundary: if(g1%donorProc == -1) then ! And thus
-       ! g2%donorProc == -1
-
-       ! Both halo's are boundary halo's. Compare the block ID of
-       ! the halo's.
-
-       if(g1%myBlock < g2%myBlock) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%myBlock > g2%myBlock) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       ! Compare the boundary conditions, which are stored in
-       ! donorBlock. Note that the sequence in BCTypes is such that
-       ! the most important BC has the highest number.
-
-       if(g1%donorBlock < g2%donorBlock) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%donorBlock > g2%donorBlock) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       ! As it is possible that indirect halo's need donor info from
-       ! direct halo's or even indirect halo's with a smaller level
-       ! of indirectness, compare the level of indirectness.
-
-       if(g1%levOfInd < g2%levOfInd) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%levOfInd > g2%levOfInd) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       ! Compare the indices of the halo. First k, then j and
-       ! finally i.
-
-       if(g1%myK < g2%myK) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%myK > g2%myK) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       if(g1%myJ < g2%myJ) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%myJ > g2%myJ) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       if(g1%myI < g2%myI) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%myI > g2%myI) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       ! No need to compare anything else; g1 == g2.
-
-    else boundary
-
-       ! Both halo's are internal halo's, whose donor is stored on
-       ! the same processor. Compare the donor blocks.
-
-       if(g1%donorBlock < g2%donorBlock) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%donorBlock > g2%donorBlock) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       ! Also the blocks are identical. Compare the donor indices.
-       ! First the k index.
-
-       if(g1%dK < g2%dK) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%dK > g2%dK) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       ! The j index.
-
-       if(g1%dJ < g2%dJ) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%dJ > g2%dJ) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       ! And the i index.
-
-       if(g1%dI < g2%dI) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%dI > g2%dI) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       ! The donors are identical. Compare the halo's.
-       ! First the block id.
-
-       if(g1%myBlock < g2%myBlock) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%myBlock > g2%myBlock) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       ! Halo blocks are also identical. Finally compare the
-       ! halo indices. Start with k.
-
-       if(g1%myK < g2%myK) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%myK > g2%myK) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       ! The j index.
-
-       if(g1%myJ < g2%myJ) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%myJ > g2%myJ) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-       ! The i index.
-
-       if(g1%myI < g2%myI) then
-          lessEqualHaloListType = .true.
-          return
-       else if(g1%myI > g2%myI) then
-          lessEqualHaloListType = .false.
-          return
-       endif
-
-    endif boundary
-
-    ! Both entities are identical. So set lessEqual to .true.
-
-    lessEqualHaloListType = .true.
-
-  end function lessEqualHaloListType
-
-  !        ================================================================
-
-  logical function lessHaloListType(g1, g2)
+    type haloListType
+
+        ! myBlock:           local block ID of the halo.
+        ! myI, myJ, myK:     i,j,k indices of the halo.
+        ! donorProc :        processor where donor is stored. In case
+        !                    the halo is a boundary halo, donorProc
+        !                    is set to -1.
+        ! donorBlock:        block ID of the donor. In case the halo
+        !                    is a boundary halo donorBlock is set
+        !                    to the corresponding boundary condition.
+        ! dI, dJ, dK:        i,j,k indices of the donor.
+        ! levOfInd:          level of indirectness.
+        ! interp(..):        interpolants for the donor stencil; only
+        !                    allocated for lists requiring this info.
+        ! nPeriodicSubfaces: Number of periodic subfaces that are
+        !                    crossed when going from the halo to the
+        !                    donor. This is at most the level of
+        !                    indirectness of the halo.
+        ! periodicSubfaces:  The corresponding subfaces ID's according
+        !                    to the sequence defined in periodicGlobal.
+
+        integer(kind=intType) :: myBlock
+        integer(kind=intType) :: myI, myJ, myK
+        integer(kind=intType) :: donorProc, donorBlock
+        integer(kind=intType) :: dI, dJ, dK
+        integer(kind=intType) :: levOfInd
+        real(kind=realType), dimension(:), pointer :: interp
+
+        integer(kind=intType) :: nPeriodicSubfaces
+        integer(kind=intType), dimension(:), pointer :: periodicSubfaces
+
+    end type haloListType
+
+    ! Interface for the extension of the operators <= and <.
+    ! These are needed for the sorting of haloListType.
+
+    interface operator(<=)
+        module procedure lessEqualHaloListType
+    end interface operator(<=)
+
+    interface operator(<)
+        module procedure lessHaloListType
+    end interface operator(<)
+
+    ! nCellHalo1st: # of 1st level cell halo's
+    ! nCellHalo2nd: # of 2nd level cell halo's
+    ! nNodeHalo1st: # of 1st level node halo's
+
+    integer(kind=intType) :: nCellHalo1st, nCellHalo2nd
+    integer(kind=intType) :: nNodeHalo1st
+
+    ! iiCell1st: Counter variable for the 1st level cell halo's
+    ! iiCell2nd: Counter variable for the 2nd level cell halo's
+    ! iiNode1st: Counter variable for the 1st level node halo's
+
+    integer(kind=intType) :: iiCell1st, iiCell2nd, iiNode1st
+
+    ! cellHalo1st(nCellHalo1st) :: List of halo info for 1st
+    !                              level cell halo's.
+    ! cellHalo2nd(nCellHalo2nd) :: Idem for 2nd level cell halo's.
+    ! nodeHalo1st(nNodeHalo1st) :: Idem for 1st level node halo's.
+
+    type(haloListType), dimension(:), allocatable :: cellHalo1st
+    type(haloListType), dimension(:), allocatable :: cellHalo2nd
+    type(haloListType), dimension(:), allocatable :: nodeHalo1st
+
+    ! transformCell(nCellHalo1st,3) :: Short hand for the transformation
+    !                                  matrix between the halo and
+    !                                  the donor for cell based halo's.
+    !                                  In principle the size equals the
+    !                                  number of faces (i.e. direct)
+    !                                  halo's, but the difference is
+    !                                  not so large.
+    ! transformNode(nNodeHalo1st,3) :: Idem for the nodes.
+
+    integer(kind=intType), dimension(:, :), allocatable :: transformCell
+    integer(kind=intType), dimension(:, :), allocatable :: transformNode
     !
-    !         This function returns .true. if g1 < g2 and .false.
-    !         otherwise. It is basically the same as the lessEqual
-    !         function, except that the equality is now considered as
-    !         .false.
+    !       The definition of the index variables, which store for each
+    !       i,j,k in the block the index in the corresponding list.
+    !       I know I'm wasting memory here (because only the halo's are
+    !       relevant), but that's not too much of a problem. The reason is
+    !       that neither the metrics nor the variables have been allocated
+    !       yet. So later on, much more memory is needed than the single
+    !       integer for each cell/node used here.
     !
-    implicit none
+    type indexListType
+
+        ! entryList(:,:,:)  :: Corresponding entry in the list.
+        !                      Dimensions are either 0:ie,0:je,0:ke for
+        !                      the node or 0:ib,0:jb,0:kb for the cell
+        !                      based halo's. The latter is then suited
+        !                      for the 2nd level halo's.
+
+        integer(kind=intType), dimension(:, :, :), pointer :: entryList
+
+    end type indexListType
+
+    ! nodeIndex(nDom) :: The node indices for every block.
+    ! cellIndex(nDom) :: Idem for the cells.
+
+    type(indexListType), allocatable, dimension(:) :: nodeIndex
+    type(indexListType), allocatable, dimension(:) :: cellIndex
+
+contains
     !
-    !        Function arguments.
+    !         Functions to simulate the operators <= and <.
     !
-    type(haloListType), intent(in) :: g1, g2
-
-    ! Compare the donor processors first. If not equal,
-    ! set the function appropriately and return.
-
-    if(g1%donorProc < g2%donorProc) then
-       lessHaloListType = .true.
-       return
-    else if(g1%donorProc > g2%donorProc) then
-       lessHaloListType = .false.
-       return
-    endif
-
-    ! Donor processors are identical. Now it depends whether we are
-    ! dealing with boundary halo's or not.
-
-    boundary: if(g1%donorProc == -1) then ! And thus
-       ! g2%donorProc == -1
-
-       ! Both halo's are boundary halo's. Compare the block ID of
-       ! the halo's.
-
-       if(g1%myBlock < g2%myBlock) then
-          lessHaloListType = .true.
-          return
-       else if(g1%myBlock > g2%myBlock) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       ! Compare the boundary conditions, which are stored in
-       ! donorBlock. Note that the sequence in BCTypes is such that
-       ! the most important bc has the highest number.
-
-       if(g1%donorBlock < g2%donorBlock) then
-          lessHaloListType = .true.
-          return
-       else if(g1%donorBlock > g2%donorBlock) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       ! As it is possible that indirect halo's need donor info from
-       ! direct halo's or even indirect halo's with a smaller level
-       ! of indirectness, compare the level of indirectness.
-
-       if(g1%levOfInd < g2%levOfInd) then
-          lessHaloListType = .true.
-          return
-       else if(g1%levOfInd > g2%levOfInd) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       ! Compare the indices of the halo. First k, then j and
-       ! finally i.
-
-       if(g1%myK < g2%myK) then
-          lessHaloListType = .true.
-          return
-       else if(g1%myK > g2%myK) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       if(g1%myJ < g2%myJ) then
-          lessHaloListType = .true.
-          return
-       else if(g1%myJ > g2%myJ) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       if(g1%myI < g2%myI) then
-          lessHaloListType = .true.
-          return
-       else if(g1%myI > g2%myI) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       ! No need to compare anything else. G1 == g2.
-
-    else boundary
-
-       ! Both halo's are internal halo's, whose donor is stored on
-       ! the same processor. Compare the donor blocks.
-
-       if(g1%donorBlock < g2%donorBlock) then
-          lessHaloListType = .true.
-          return
-       else if(g1%donorBlock > g2%donorBlock) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       ! Also the blocks are identical. Compare the donor indices.
-       ! First the k index.
-
-       if(g1%dK < g2%dK) then
-          lessHaloListType = .true.
-          return
-       else if(g1%dK > g2%dK) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       ! The j index.
-
-       if(g1%dJ < g2%dJ) then
-          lessHaloListType = .true.
-          return
-       else if(g1%dJ > g2%dJ) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       ! And the i index.
-
-       if(g1%dI < g2%dI) then
-          lessHaloListType = .true.
-          return
-       else if(g1%dI > g2%dI) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       ! The donors are identical. Compare the halo's.
-       ! First the block id.
-
-       if(g1%myBlock < g2%myBlock) then
-          lessHaloListType = .true.
-          return
-       else if(g1%myBlock > g2%myBlock) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       ! Halo blocks are also identical. Finally compare the
-       ! halo indices. Start with k.
-
-       if(g1%myK < g2%myK) then
-          lessHaloListType = .true.
-          return
-       else if(g1%myK > g2%myK) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       ! The j index.
-
-       if(g1%myJ < g2%myJ) then
-          lessHaloListType = .true.
-          return
-       else if(g1%myJ > g2%myJ) then
-          lessHaloListType = .false.
-          return
-       endif
-
-       ! The i index.
-
-       if(g1%myI < g2%myI) then
-          lessHaloListType = .true.
-          return
-       else if(g1%myI > g2%myI) then
-          lessHaloListType = .false.
-          return
-       endif
-
-    endif boundary
-
-    ! Both entities are identical.
-    ! So set lessHaloListType to .false.
-
-    lessHaloListType = .false.
-
-  end function lessHaloListType
+    logical function lessEqualHaloListType(g1, g2)
+        !
+        !         lessEqual returns .true. if g1 <= g2 and .false. otherwise.
+        !         The comparison is firstly based on the processor ID of the
+        !         donor. After that it depends whether the halo is a boundary
+        !         halo or not. Note that boundary halo's have a donor processor
+        !         if of -1, such that they are always first in the list.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(haloListType), intent(in) :: g1, g2
+
+        ! Compare the donor processors first. If not equal,
+        ! set lessEqual appropriately and return.
+
+        if (g1%donorProc < g2%donorProc) then
+            lessEqualHaloListType = .true.
+            return
+        else if (g1%donorProc > g2%donorProc) then
+            lessEqualHaloListType = .false.
+            return
+        end if
+
+        ! Donor processors are identical. Now it depends whether we are
+        ! dealing with boundary halo's or not.
+
+        boundary: if (g1%donorProc == -1) then ! And thus
+            ! g2%donorProc == -1
+
+            ! Both halo's are boundary halo's. Compare the block ID of
+            ! the halo's.
+
+            if (g1%myBlock < g2%myBlock) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%myBlock > g2%myBlock) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            ! Compare the boundary conditions, which are stored in
+            ! donorBlock. Note that the sequence in BCTypes is such that
+            ! the most important BC has the highest number.
+
+            if (g1%donorBlock < g2%donorBlock) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%donorBlock > g2%donorBlock) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            ! As it is possible that indirect halo's need donor info from
+            ! direct halo's or even indirect halo's with a smaller level
+            ! of indirectness, compare the level of indirectness.
+
+            if (g1%levOfInd < g2%levOfInd) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%levOfInd > g2%levOfInd) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            ! Compare the indices of the halo. First k, then j and
+            ! finally i.
+
+            if (g1%myK < g2%myK) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%myK > g2%myK) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            if (g1%myJ < g2%myJ) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%myJ > g2%myJ) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            if (g1%myI < g2%myI) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%myI > g2%myI) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            ! No need to compare anything else; g1 == g2.
+
+            else boundary
+
+            ! Both halo's are internal halo's, whose donor is stored on
+            ! the same processor. Compare the donor blocks.
+
+            if (g1%donorBlock < g2%donorBlock) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%donorBlock > g2%donorBlock) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            ! Also the blocks are identical. Compare the donor indices.
+            ! First the k index.
+
+            if (g1%dK < g2%dK) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%dK > g2%dK) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            ! The j index.
+
+            if (g1%dJ < g2%dJ) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%dJ > g2%dJ) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            ! And the i index.
+
+            if (g1%dI < g2%dI) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%dI > g2%dI) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            ! The donors are identical. Compare the halo's.
+            ! First the block id.
+
+            if (g1%myBlock < g2%myBlock) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%myBlock > g2%myBlock) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            ! Halo blocks are also identical. Finally compare the
+            ! halo indices. Start with k.
+
+            if (g1%myK < g2%myK) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%myK > g2%myK) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            ! The j index.
+
+            if (g1%myJ < g2%myJ) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%myJ > g2%myJ) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+            ! The i index.
+
+            if (g1%myI < g2%myI) then
+                lessEqualHaloListType = .true.
+                return
+            else if (g1%myI > g2%myI) then
+                lessEqualHaloListType = .false.
+                return
+            end if
+
+        end if boundary
+
+        ! Both entities are identical. So set lessEqual to .true.
+
+        lessEqualHaloListType = .true.
+
+    end function lessEqualHaloListType
+
+    !        ================================================================
+
+    logical function lessHaloListType(g1, g2)
+        !
+        !         This function returns .true. if g1 < g2 and .false.
+        !         otherwise. It is basically the same as the lessEqual
+        !         function, except that the equality is now considered as
+        !         .false.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(haloListType), intent(in) :: g1, g2
+
+        ! Compare the donor processors first. If not equal,
+        ! set the function appropriately and return.
+
+        if (g1%donorProc < g2%donorProc) then
+            lessHaloListType = .true.
+            return
+        else if (g1%donorProc > g2%donorProc) then
+            lessHaloListType = .false.
+            return
+        end if
+
+        ! Donor processors are identical. Now it depends whether we are
+        ! dealing with boundary halo's or not.
+
+        boundary: if (g1%donorProc == -1) then ! And thus
+            ! g2%donorProc == -1
+
+            ! Both halo's are boundary halo's. Compare the block ID of
+            ! the halo's.
+
+            if (g1%myBlock < g2%myBlock) then
+                lessHaloListType = .true.
+                return
+            else if (g1%myBlock > g2%myBlock) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            ! Compare the boundary conditions, which are stored in
+            ! donorBlock. Note that the sequence in BCTypes is such that
+            ! the most important bc has the highest number.
+
+            if (g1%donorBlock < g2%donorBlock) then
+                lessHaloListType = .true.
+                return
+            else if (g1%donorBlock > g2%donorBlock) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            ! As it is possible that indirect halo's need donor info from
+            ! direct halo's or even indirect halo's with a smaller level
+            ! of indirectness, compare the level of indirectness.
+
+            if (g1%levOfInd < g2%levOfInd) then
+                lessHaloListType = .true.
+                return
+            else if (g1%levOfInd > g2%levOfInd) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            ! Compare the indices of the halo. First k, then j and
+            ! finally i.
+
+            if (g1%myK < g2%myK) then
+                lessHaloListType = .true.
+                return
+            else if (g1%myK > g2%myK) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            if (g1%myJ < g2%myJ) then
+                lessHaloListType = .true.
+                return
+            else if (g1%myJ > g2%myJ) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            if (g1%myI < g2%myI) then
+                lessHaloListType = .true.
+                return
+            else if (g1%myI > g2%myI) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            ! No need to compare anything else. G1 == g2.
+
+            else boundary
+
+            ! Both halo's are internal halo's, whose donor is stored on
+            ! the same processor. Compare the donor blocks.
+
+            if (g1%donorBlock < g2%donorBlock) then
+                lessHaloListType = .true.
+                return
+            else if (g1%donorBlock > g2%donorBlock) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            ! Also the blocks are identical. Compare the donor indices.
+            ! First the k index.
+
+            if (g1%dK < g2%dK) then
+                lessHaloListType = .true.
+                return
+            else if (g1%dK > g2%dK) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            ! The j index.
+
+            if (g1%dJ < g2%dJ) then
+                lessHaloListType = .true.
+                return
+            else if (g1%dJ > g2%dJ) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            ! And the i index.
+
+            if (g1%dI < g2%dI) then
+                lessHaloListType = .true.
+                return
+            else if (g1%dI > g2%dI) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            ! The donors are identical. Compare the halo's.
+            ! First the block id.
+
+            if (g1%myBlock < g2%myBlock) then
+                lessHaloListType = .true.
+                return
+            else if (g1%myBlock > g2%myBlock) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            ! Halo blocks are also identical. Finally compare the
+            ! halo indices. Start with k.
+
+            if (g1%myK < g2%myK) then
+                lessHaloListType = .true.
+                return
+            else if (g1%myK > g2%myK) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            ! The j index.
+
+            if (g1%myJ < g2%myJ) then
+                lessHaloListType = .true.
+                return
+            else if (g1%myJ > g2%myJ) then
+                lessHaloListType = .false.
+                return
+            end if
+
+            ! The i index.
+
+            if (g1%myI < g2%myI) then
+                lessHaloListType = .true.
+                return
+            else if (g1%myI > g2%myI) then
+                lessHaloListType = .false.
+                return
+            end if
+
+        end if boundary
+
+        ! Both entities are identical.
+        ! So set lessHaloListType to .false.
+
+        lessHaloListType = .false.
+
+    end function lessHaloListType
 
 end module haloList
 
 module checkVolBlock
-  !
-  !       Local module, which contains the definition of the derived
-  !       datatype used to test for negative volumes in the grid.
-  !
-  implicit none
-  save
+    !
+    !       Local module, which contains the definition of the derived
+    !       datatype used to test for negative volumes in the grid.
+    !
+    implicit none
+    save
 
-  type checkVolBlockType
+    type checkVolBlockType
 
-     ! blockHasNegVol:              Whether or not the block
-     !                              contains negative volumes.
-     ! volumeIsNeg(2:il,2:jl,2:kl): Whether or not the owned volumes
-     !                              are negative.
+        ! blockHasNegVol:              Whether or not the block
+        !                              contains negative volumes.
+        ! volumeIsNeg(2:il,2:jl,2:kl): Whether or not the owned volumes
+        !                              are negative.
 
-     logical :: blockHasNegVol
-     logical, dimension(:,:,:), pointer :: volumeIsNeg
+        logical :: blockHasNegVol
+        logical, dimension(:, :, :), pointer :: volumeIsNeg
 
-  end type checkVolBlockType
+    end type checkVolBlockType
 
 end module checkVolBlock
 module periodicInfo
-  !
-  !       Local module that contains derived datatypes as well as arrays
-  !       of these derived datatypes to store information related to
-  !       periodicity.
-  !
-  use precision
-  implicit none
-  save
+    !
+    !       Local module that contains derived datatypes as well as arrays
+    !       of these derived datatypes to store information related to
+    !       periodicity.
+    !
+    use precision
+    implicit none
+    save
 
-  public
-  private :: lessCGNSPeriodicType
-  private :: equalCGNSPeriodicType
-  private :: lessPeriodicSubfacesHaloT
-  private :: lessEqualPeriodicSubfacesHaloT
-  private :: equalPeriodicSubfacesHaloT
+    public
+    private :: lessCGNSPeriodicType
+    private :: equalCGNSPeriodicType
+    private :: lessPeriodicSubfacesHaloT
+    private :: lessEqualPeriodicSubfacesHaloT
+    private :: equalPeriodicSubfacesHaloT
 
-  ! Definition of the derived data type for storing the periodic
-  ! faces of the cgns grid a bit easier.
+    ! Definition of the derived data type for storing the periodic
+    ! faces of the cgns grid a bit easier.
 
-  type cgnsPeriodicType
+    type cgnsPeriodicType
 
-     ! cgnsBlock   :: the block ID in the cgns grid.
-     ! cgnsSubface :: the suface ID in this block.
+        ! cgnsBlock   :: the block ID in the cgns grid.
+        ! cgnsSubface :: the suface ID in this block.
 
-     integer(kind=intType) :: cgnsBlock, cgnsSubface
+        integer(kind=intType) :: cgnsBlock, cgnsSubface
 
-  end type cgnsPeriodicType
+    end type cgnsPeriodicType
 
-  ! Interface for the extension of the operators < and ==.
-  ! These are needed for the sorting and searching of
-  ! cgnsPeriodicType.
+    ! Interface for the extension of the operators < and ==.
+    ! These are needed for the sorting and searching of
+    ! cgnsPeriodicType.
 
-  interface operator(<)
-     module procedure lessCGNSPeriodicType
-  end interface operator(<)
+    interface operator(<)
+        module procedure lessCGNSPeriodicType
+    end interface operator(<)
 
-  interface operator(==)
-     module procedure equalCGNSPeriodicType
-  end interface operator(==)
+    interface operator(==)
+        module procedure equalCGNSPeriodicType
+    end interface operator(==)
 
-  ! nPeriodicGlobal :: Total number of periodic faces in cgns grid.
-  ! periodicGlobal  :: The corresponding faces.
+    ! nPeriodicGlobal :: Total number of periodic faces in cgns grid.
+    ! periodicGlobal  :: The corresponding faces.
 
-  integer(kind=intType) :: nPeriodicGlobal
-  type(cgnsPeriodicType), dimension(:), allocatable :: periodicGlobal
+    integer(kind=intType) :: nPeriodicGlobal
+    type(cgnsPeriodicType), dimension(:), allocatable :: periodicGlobal
 
-  ! Definition of the derived data type to store the periodic
-  ! subfaces that are crossed when the halo and the donor are
-  ! connected. The direction is from the halo to the donor.
+    ! Definition of the derived data type to store the periodic
+    ! subfaces that are crossed when the halo and the donor are
+    ! connected. The direction is from the halo to the donor.
 
-  type periodicSubfacesHaloType
+    type periodicSubfacesHaloType
 
-     ! internalHalo:      Whether or not the halo is an internal
-     !                    halo, i.e. it is stored on the
-     !                    same processor as the donor.
-     ! indexInHaloList:   The corresponding index in either the
-     !                    node or cell halo list.
-     ! nPeriodicSubfaces: Number of periodic subfaces that are
-     !                    crossed. This is at most the level of
-     !                    indirectness of the halo.
-     ! periodicSubfaces:  The corresponding subfaces ID's according
-     !                    to the sequence defined in periodicGlobal.
+        ! internalHalo:      Whether or not the halo is an internal
+        !                    halo, i.e. it is stored on the
+        !                    same processor as the donor.
+        ! indexInHaloList:   The corresponding index in either the
+        !                    node or cell halo list.
+        ! nPeriodicSubfaces: Number of periodic subfaces that are
+        !                    crossed. This is at most the level of
+        !                    indirectness of the halo.
+        ! periodicSubfaces:  The corresponding subfaces ID's according
+        !                    to the sequence defined in periodicGlobal.
 
-     logical               :: internalHalo
-     integer(kind=intType) :: indexInHaloList
-     integer(kind=intType) :: nPeriodicSubfaces
-     integer(kind=intType), dimension(:), pointer :: periodicSubfaces
+        logical :: internalHalo
+        integer(kind=intType) :: indexInHaloList
+        integer(kind=intType) :: nPeriodicSubfaces
+        integer(kind=intType), dimension(:), pointer :: periodicSubfaces
 
-  end type periodicSubfacesHaloType
+    end type periodicSubfacesHaloType
 
-  ! Interface for the extension of the operators <, <= and ==.
-  ! This is needed for the sorting and comparing of variables of
-  ! the type periodicSubfacesHaloType
+    ! Interface for the extension of the operators <, <= and ==.
+    ! This is needed for the sorting and comparing of variables of
+    ! the type periodicSubfacesHaloType
 
-  interface operator(<)
-     module procedure lessPeriodicSubfacesHaloT
-  end interface operator(<)
+    interface operator(<)
+        module procedure lessPeriodicSubfacesHaloT
+    end interface operator(<)
 
-  interface operator(<=)
-     module procedure lessEqualPeriodicSubfacesHaloT
-  end interface operator(<=)
+    interface operator(<=)
+        module procedure lessEqualPeriodicSubfacesHaloT
+    end interface operator(<=)
 
-  interface operator(==)
-     module procedure equalPeriodicSubfacesHaloT
-  end interface operator(==)
+    interface operator(==)
+        module procedure equalPeriodicSubfacesHaloT
+    end interface operator(==)
 
-  !=================================================================
+    !=================================================================
 
 contains
 
-  !=================================================================
-  !
-  !         Functions to simulate the operators < and ==.
-  !
-  logical function lessCGNSPeriodicType(g1, g2)
-    !
-    !         lessCGNSPeriodicType returns .true. if g1 is considered
-    !         smaller than g2. This comparison is first based on the block
-    !         ID followed by the subface id.
-    !
-    implicit none
+    !=================================================================
     !
-    !        Function arguments.
+    !         Functions to simulate the operators < and ==.
     !
-    type(cgnsPeriodicType), intent(in) :: g1, g2
-
-    ! Compare the block ID. If not equal set lessCGNSPeriodicType
-    ! accordingly.
-
-    if(g1%cgnsBlock < g2%cgnsBlock) then
-       lessCGNSPeriodicType = .true.
-       return
-    else if(g1%cgnsBlock > g2%cgnsBlock) then
-       lessCGNSPeriodicType = .false.
-       return
-    endif
-
-    ! Block ID's are identical. Compare the subfaces.
-
-    if(g1%cgnsSubface < g2%cgnsSubface) then
-       lessCGNSPeriodicType = .true.
-       return
-    else if(g1%cgnsSubface > g2%cgnsSubface) then
-       lessCGNSPeriodicType = .false.
-       return
-    endif
-
-    ! Both objects are identical.
-    ! Set lessCGNSPeriodicType to .false.
+    logical function lessCGNSPeriodicType(g1, g2)
+        !
+        !         lessCGNSPeriodicType returns .true. if g1 is considered
+        !         smaller than g2. This comparison is first based on the block
+        !         ID followed by the subface id.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(cgnsPeriodicType), intent(in) :: g1, g2
+
+        ! Compare the block ID. If not equal set lessCGNSPeriodicType
+        ! accordingly.
+
+        if (g1%cgnsBlock < g2%cgnsBlock) then
+            lessCGNSPeriodicType = .true.
+            return
+        else if (g1%cgnsBlock > g2%cgnsBlock) then
+            lessCGNSPeriodicType = .false.
+            return
+        end if
+
+        ! Block ID's are identical. Compare the subfaces.
+
+        if (g1%cgnsSubface < g2%cgnsSubface) then
+            lessCGNSPeriodicType = .true.
+            return
+        else if (g1%cgnsSubface > g2%cgnsSubface) then
+            lessCGNSPeriodicType = .false.
+            return
+        end if
+
+        ! Both objects are identical.
+        ! Set lessCGNSPeriodicType to .false.
+
+        lessCGNSPeriodicType = .false.
+
+    end function lessCGNSPeriodicType
+
+    !        ================================================================
+
+    logical function equalCGNSPeriodicType(g1, g2)
+        !
+        !         equalCGNSPeriodicType returns .true. if g1 is considered
+        !         equal to g2, i.e. both the block and subface ID must match,
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(cgnsPeriodicType), intent(in) :: g1, g2
+
+        equalCGNSPeriodicType = .false.
+        if (g1%cgnsBlock == g2%cgnsBlock .and. &
+            g1%cgnsSubface == g2%cgnsSubface) &
+            equalCGNSPeriodicType = .true.
+
+    end function equalCGNSPeriodicType
+
+    !        ================================================================
+
+    logical function lessPeriodicSubfacesHaloT(g1, g2)
+        !
+        !         lessPeriodicSubfacesHaloT returns .true. if g1 is
+        !         considered smaller than g2.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(periodicSubfacesHaloType), intent(in) :: g1, g2
+        !
+        !        Local variables.
+        !
+        integer(kind=intType) :: nn, i1, i2
+
+        ! First compare whether or not both g1 and g2 are internal
+        ! halo's. Fortran does not allow a direct comparison of
+        ! logicals and therefore the integers i1 and i2 are used.
+
+        i1 = 1; if (g1%internalHalo) i1 = 0
+        i2 = 1; if (g2%internalHalo) i2 = 0
+
+        if (i1 < i2) then
+            lessPeriodicSubfacesHaloT = .true.
+            return
+        else if (i1 > i2) then
+            lessPeriodicSubfacesHaloT = .false.
+            return
+        end if
+
+        ! Compare the number of periodic subfaces.
+
+        if (g1%nPeriodicSubfaces < g2%nPeriodicSubfaces) then
+            lessPeriodicSubfacesHaloT = .true.
+            return
+        else if (g1%nPeriodicSubfaces > g2%nPeriodicSubfaces) then
+            lessPeriodicSubfacesHaloT = .false.
+            return
+        end if
+
+        ! The number of periodic subfaces is the same. Compare the
+        ! subfaces themselves. It is assumed that the subfaces are
+        ! sorted in increading order. This can be done, because the
+        ! periodic transformations are commuting matrices.
+
+        do nn = 1, g1%nPeriodicSubfaces
+            if (g1%periodicSubfaces(nn) < g2%periodicSubfaces(nn)) then
+                lessPeriodicSubfacesHaloT = .true.
+                return
+            else if (g1%periodicSubfaces(nn) > g2%periodicSubfaces(nn)) then
+                lessPeriodicSubfacesHaloT = .false.
+                return
+            end if
+        end do
+
+        ! The periodic subfaces are identical as well. Compare the
+        ! indices in the list.
+
+        if (g1%indexInHaloList < g2%indexInHaloList) then
+            lessPeriodicSubfacesHaloT = .true.
+            return
+        else if (g1%indexInHaloList > g2%indexInHaloList) then
+            lessPeriodicSubfacesHaloT = .false.
+            return
+        end if
+
+        ! Both objects are the same. Return .false.
+
+        lessPeriodicSubfacesHaloT = .false.
+
+    end function lessPeriodicSubfacesHaloT
+
+    !        ================================================================
+
+    logical function lessEqualPeriodicSubfacesHaloT(g1, g2)
+        !
+        !         lessEqualPeriodicSubfacesHaloT returns .true. if g1 is
+        !         considered smaller than or equal to g2.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(periodicSubfacesHaloType), intent(in) :: g1, g2
+        !
+        !        Local variables.
+        !
+        integer(kind=intType) :: nn, i1, i2
+
+        ! First compare whether or not both g1 and g2 are internal
+        ! halo's. Fortran does not allow a direct comparison of
+        ! logicals and therefore the integers i1 and i2 are used.
+
+        i1 = 1; if (g1%internalHalo) i1 = 0
+        i2 = 1; if (g2%internalHalo) i2 = 0
+
+        if (i1 < i2) then
+            lessEqualPeriodicSubfacesHaloT = .true.
+            return
+        else if (i1 > i2) then
+            lessEqualPeriodicSubfacesHaloT = .false.
+            return
+        end if
+
+        ! Compare the number of periodic subfaces.
+
+        if (g1%nPeriodicSubfaces < g2%nPeriodicSubfaces) then
+            lessEqualPeriodicSubfacesHaloT = .true.
+            return
+        else if (g1%nPeriodicSubfaces > g2%nPeriodicSubfaces) then
+            lessEqualPeriodicSubfacesHaloT = .false.
+            return
+        end if
+
+        ! The number of periodic subfaces is the same. Compare the
+        ! subfaces themselves. It is assumed that the subfaces are
+        ! sorted in increading order. This can be done, because the
+        ! periodic transformations are commuting matrices.
+
+        do nn = 1, g1%nPeriodicSubfaces
+            if (g1%periodicSubfaces(nn) < g2%periodicSubfaces(nn)) then
+                lessEqualPeriodicSubfacesHaloT = .true.
+                return
+            else if (g1%periodicSubfaces(nn) > g2%periodicSubfaces(nn)) then
+                lessEqualPeriodicSubfacesHaloT = .false.
+                return
+            end if
+        end do
+
+        ! The periodic subfaces are identical as well. Compare the
+        ! indices in the list.
+
+        if (g1%indexInHaloList < g2%indexInHaloList) then
+            lessEqualPeriodicSubfacesHaloT = .true.
+            return
+        else if (g1%indexInHaloList > g2%indexInHaloList) then
+            lessEqualPeriodicSubfacesHaloT = .false.
+            return
+        end if
+
+        ! Both objects are the same. Return .true.
+
+        lessEqualPeriodicSubfacesHaloT = .true.
+
+    end function lessEqualPeriodicSubfacesHaloT
+
+    !        ================================================================
+
+    logical function equalPeriodicSubfacesHaloT(g1, g2)
+        !
+        !         equalPeriodicSubfacesHaloT returns .true. if g1 is
+        !         considered equal to g2. The equal operator is only used to
+        !         find the different number of periodic transformations in
+        !         determinePeriodicData. Hence only the periodic subfaces of
+        !         the halo's are compared and g1 and g2 are considered equal
+        !         if the subfaces are equal, even if other member variables
+        !         differ.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(periodicSubfacesHaloType), intent(in) :: g1, g2
+        !
+        !        Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        if (g1%nPeriodicSubfaces /= g2%nPeriodicSubfaces) then
+            equalPeriodicSubfacesHaloT = .false.
+            return
+        end if
+
+        do nn = 1, g1%nPeriodicSubfaces
+            if (g1%periodicSubfaces(nn) /= g2%periodicSubfaces(nn)) then
+                equalPeriodicSubfacesHaloT = .false.
+                return
+            end if
+        end do
+
+        equalPeriodicSubfacesHaloT = .true.
+
+    end function equalPeriodicSubfacesHaloT
 
-    lessCGNSPeriodicType = .false.
-
-  end function lessCGNSPeriodicType
-
-  !        ================================================================
-
-  logical function equalCGNSPeriodicType(g1, g2)
+end module periodicInfo
+module bcHalo
     !
-    !         equalCGNSPeriodicType returns .true. if g1 is considered
-    !         equal to g2, i.e. both the block and subface ID must match,
+    !       This local module contains the derived datatype bcHaloType,
+    !       which is used to determine the boundary condition for an
+    !       indirect halo when the nearest direct halo's are all boundary
+    !       halo's.
     !
+    use precision
     implicit none
-    !
-    !        Function arguments.
-    !
-    type(cgnsPeriodicType), intent(in) :: g1, g2
-
-    equalCGNSPeriodicType = .false.
-    if(g1%cgnsBlock   == g2%cgnsBlock .and. &
-         g1%cgnsSubface == g2%cgnsSubface)    &
-         equalCGNSPeriodicType = .true.
-
-  end function equalCGNSPeriodicType
+    save
 
-  !        ================================================================
-
-  logical function lessPeriodicSubfacesHaloT(g1, g2)
-    !
-    !         lessPeriodicSubfacesHaloT returns .true. if g1 is
-    !         considered smaller than g2.
+    public
+    private :: lessEqualBCHaloType
     !
-    implicit none
-    !
-    !        Function arguments.
-    !
-    type(periodicSubfacesHaloType), intent(in) :: g1, g2
-    !
-    !        Local variables.
-    !
-    integer(kind=intType) :: nn, i1, i2
-
-    ! First compare whether or not both g1 and g2 are internal
-    ! halo's. Fortran does not allow a direct comparison of
-    ! logicals and therefore the integers i1 and i2 are used.
-
-    i1 = 1; if( g1%internalHalo ) i1 = 0
-    i2 = 1; if( g2%internalHalo ) i2 = 0
-
-    if(i1 < i2) then
-       lessPeriodicSubfacesHaloT = .true.
-       return
-    else if(i1 > i2) then
-       lessPeriodicSubfacesHaloT = .false.
-       return
-    endif
-
-    ! Compare the number of periodic subfaces.
-
-    if(g1%nPeriodicSubfaces < g2%nPeriodicSubfaces) then
-       lessPeriodicSubfacesHaloT = .true.
-       return
-    else if(g1%nPeriodicSubfaces > g2%nPeriodicSubfaces) then
-       lessPeriodicSubfacesHaloT = .false.
-       return
-    endif
-
-    ! The number of periodic subfaces is the same. Compare the
-    ! subfaces themselves. It is assumed that the subfaces are
-    ! sorted in increading order. This can be done, because the
-    ! periodic transformations are commuting matrices.
-
-    do nn=1,g1%nPeriodicSubfaces
-       if(g1%periodicSubfaces(nn) < g2%periodicSubfaces(nn)) then
-          lessPeriodicSubfacesHaloT = .true.
-          return
-       else if(g1%periodicSubfaces(nn) > g2%periodicSubfaces(nn)) then
-          lessPeriodicSubfacesHaloT = .false.
-          return
-       endif
-    enddo
-
-    ! The periodic subfaces are identical as well. Compare the
-    ! indices in the list.
-
-    if(g1%indexInHaloList < g2%indexInHaloList) then
-       lessPeriodicSubfacesHaloT = .true.
-       return
-    else if(g1%indexInHaloList > g2%indexInHaloList) then
-       lessPeriodicSubfacesHaloT = .false.
-       return
-    endif
-
-    ! Both objects are the same. Return .false.
-
-    lessPeriodicSubfacesHaloT = .false.
-
-  end function lessPeriodicSubfacesHaloT
-
-  !        ================================================================
-
-  logical function lessEqualPeriodicSubfacesHaloT(g1, g2)
-    !
-    !         lessEqualPeriodicSubfacesHaloT returns .true. if g1 is
-    !         considered smaller than or equal to g2.
-    !
-    implicit none
-    !
-    !        Function arguments.
-    !
-    type(periodicSubfacesHaloType), intent(in) :: g1, g2
-    !
-    !        Local variables.
+    !       The definition of the derived datatype.
     !
-    integer(kind=intType) :: nn, i1, i2
-
-    ! First compare whether or not both g1 and g2 are internal
-    ! halo's. Fortran does not allow a direct comparison of
-    ! logicals and therefore the integers i1 and i2 are used.
-
-    i1 = 1; if( g1%internalHalo ) i1 = 0
-    i2 = 1; if( g2%internalHalo ) i2 = 0
-
-    if(i1 < i2) then
-       lessEqualPeriodicSubfacesHaloT = .true.
-       return
-    else if(i1 > i2) then
-       lessEqualPeriodicSubfacesHaloT = .false.
-       return
-    endif
-
-    ! Compare the number of periodic subfaces.
-
-    if(g1%nPeriodicSubfaces < g2%nPeriodicSubfaces) then
-       lessEqualPeriodicSubfacesHaloT = .true.
-       return
-    else if(g1%nPeriodicSubfaces > g2%nPeriodicSubfaces) then
-       lessEqualPeriodicSubfacesHaloT = .false.
-       return
-    endif
-
-    ! The number of periodic subfaces is the same. Compare the
-    ! subfaces themselves. It is assumed that the subfaces are
-    ! sorted in increading order. This can be done, because the
-    ! periodic transformations are commuting matrices.
-
-    do nn=1,g1%nPeriodicSubfaces
-       if(g1%periodicSubfaces(nn) < g2%periodicSubfaces(nn)) then
-          lessEqualPeriodicSubfacesHaloT = .true.
-          return
-       else if(g1%periodicSubfaces(nn) > g2%periodicSubfaces(nn)) then
-          lessEqualPeriodicSubfacesHaloT = .false.
-          return
-       endif
-    enddo
-
-    ! The periodic subfaces are identical as well. Compare the
-    ! indices in the list.
-
-    if(g1%indexInHaloList < g2%indexInHaloList) then
-       lessEqualPeriodicSubfacesHaloT = .true.
-       return
-    else if(g1%indexInHaloList > g2%indexInHaloList) then
-       lessEqualPeriodicSubfacesHaloT = .false.
-       return
-    endif
-
-    ! Both objects are the same. Return .true.
-
-    lessEqualPeriodicSubfacesHaloT = .true.
-
-  end function lessEqualPeriodicSubfacesHaloT
-
-  !        ================================================================
-
-  logical function equalPeriodicSubfacesHaloT(g1, g2)
-    !
-    !         equalPeriodicSubfacesHaloT returns .true. if g1 is
-    !         considered equal to g2. The equal operator is only used to
-    !         find the different number of periodic transformations in
-    !         determinePeriodicData. Hence only the periodic subfaces of
-    !         the halo's are compared and g1 and g2 are considered equal
-    !         if the subfaces are equal, even if other member variables
-    !         differ.
-    !
-    implicit none
-    !
-    !        Function arguments.
-    !
-    type(periodicSubfacesHaloType), intent(in) :: g1, g2
-    !
-    !        Local variables.
-    !
-    integer(kind=intType) :: nn
+    type bcHaloType
 
-    if(g1%nPeriodicSubfaces /= g2%nPeriodicSubfaces) then
-       equalPeriodicSubfacesHaloT = .false.
-       return
-    endif
+        ! directHalo: Index in the haloListType where the
+        !             corresponding direct halo is stored.
+        ! BC:         Corresponding boundary condition.
 
-    do nn=1,g1%nPeriodicSubfaces
-       if(g1%periodicSubfaces(nn) /= g2%periodicSubfaces(nn)) then
-          equalPeriodicSubfacesHaloT = .false.
-          return
-       endif
-    enddo
+        integer(kind=intType) :: directHalo, BC
 
-    equalPeriodicSubfacesHaloT = .true.
+    end type bcHaloType
 
-  end function equalPeriodicSubfacesHaloT
+    ! Interface for the extension of the operator <= needed for the
+    ! sorting of bcHaloType. Note that the = operator does not
+    ! need to be defined, because bcHaloType only contains
+    ! primitive types.
 
-end module periodicInfo
-module bcHalo
-  !
-  !       This local module contains the derived datatype bcHaloType,
-  !       which is used to determine the boundary condition for an
-  !       indirect halo when the nearest direct halo's are all boundary
-  !       halo's.
-  !
-  use precision
-  implicit none
-  save
-
-  public
-  private :: lessEqualBCHaloType
-  !
-  !       The definition of the derived datatype.
-  !
-  type bcHaloType
-
-     ! directHalo: Index in the haloListType where the
-     !             corresponding direct halo is stored.
-     ! BC:         Corresponding boundary condition.
-
-     integer(kind=intType) :: directHalo, BC
-
-  end type bcHaloType
-
-  ! Interface for the extension of the operator <= needed for the
-  ! sorting of bcHaloType. Note that the = operator does not
-  ! need to be defined, because bcHaloType only contains
-  ! primitive types.
-
-  interface operator(<=)
-     module procedure lessEqualBCHaloType
-  end interface operator(<=)
+    interface operator(<=)
+        module procedure lessEqualBCHaloType
+    end interface operator(<=)
 
 contains
-  !
-  logical function lessEqualBCHaloType(g1, g2)
-    !
-    !         Function to simulate the operator <= for bcHaloType.
-    !         It first compares the boundary condition. If equal the index
-    !         of the direct halo is compared, although this is not really
-    !         important.
-    !         LessEqual returns .true. if g1 <= g2 and .false. otherwise.
-    !
-    implicit none
-    !
-    !        Function arguments.
     !
-    type(bcHaloType), intent(in) :: g1, g2
-
-    ! First compare the boundary conditions. Note that the sequence
-    ! in BCTypes is such that the most important BC has the
-    ! highest number.
-
-    if(g1%BC < g2%BC) then
-       lessEqualBCHaloType = .true.
-       return
-    else if(g1%BC > g2%BC) then
-       lessEqualBCHaloType = .false.
-       return
-    endif
-
-    ! Boundary conditions are equal. Just compare the index.
-
-    if(g1%directHalo < g2%directHalo) then
-       lessEqualBCHaloType = .true.
-       return
-    else if(g1%directHalo > g2%directHalo) then
-       lessEqualBCHaloType = .false.
-       return
-    endif
+    logical function lessEqualBCHaloType(g1, g2)
+        !
+        !         Function to simulate the operator <= for bcHaloType.
+        !         It first compares the boundary condition. If equal the index
+        !         of the direct halo is compared, although this is not really
+        !         important.
+        !         LessEqual returns .true. if g1 <= g2 and .false. otherwise.
+        !
+        implicit none
+        !
+        !        Function arguments.
+        !
+        type(bcHaloType), intent(in) :: g1, g2
+
+        ! First compare the boundary conditions. Note that the sequence
+        ! in BCTypes is such that the most important BC has the
+        ! highest number.
+
+        if (g1%BC < g2%BC) then
+            lessEqualBCHaloType = .true.
+            return
+        else if (g1%BC > g2%BC) then
+            lessEqualBCHaloType = .false.
+            return
+        end if
+
+        ! Boundary conditions are equal. Just compare the index.
+
+        if (g1%directHalo < g2%directHalo) then
+            lessEqualBCHaloType = .true.
+            return
+        else if (g1%directHalo > g2%directHalo) then
+            lessEqualBCHaloType = .false.
+            return
+        end if
+
+        ! g1 and g2 are equal. Return .true.
+
+        lessEqualBCHaloType = .true.
+
+    end function lessEqualBCHaloType
+
+    !        ================================================================
+
+    subroutine sortBCHaloType(bcHaloArray, nn)
+        !
+        !         SortBCHaloType sorts the given number of BCHalo's in
+        !         increasing order. Note that this routine is called sort and
+        !         not qsort, because only an insertion sort is done here. The
+        !         reason is that nn <= 3 and thus an insertion sort is okay.
+        !
+        implicit none
+        !
+        !        Subroutine arguments
+        !
+        integer(kind=intType), intent(in) :: nn
+        type(bcHaloType), dimension(*), intent(inout) :: bcHaloArray
+        !
+        !        Local variables.
+        !
+        integer(kind=intType) :: i, j
+
+        type(bcHaloType) :: a
+
+        do j = 1, nn
+            a = bcHaloArray(j)
+            do i = (j - 1), 1, -1
+                if (bcHaloArray(i) <= a) exit
+                bcHaloArray(i + 1) = bcHaloArray(i)
+            end do
+            bcHaloArray(i + 1) = a
+        end do
+
+    end subroutine sortBCHaloType
 
-    ! g1 and g2 are equal. Return .true.
-
-    lessEqualBCHaloType = .true.
-
-  end function lessEqualBCHaloType
-
-  !        ================================================================
+end module bcHalo
 
-  subroutine sortBCHaloType(bcHaloArray, nn)
+module coarse1to1Subface
     !
-    !         SortBCHaloType sorts the given number of BCHalo's in
-    !         increasing order. Note that this routine is called sort and
-    !         not qsort, because only an insertion sort is done here. The
-    !         reason is that nn <= 3 and thus an insertion sort is okay.
+    !       This local module contains the derived datatype
+    !       coarse1to1SubfaceType, which is used to determine the 1 to 1
+    !       block boundaries for the coarser grids.
     !
+    use precision
     implicit none
+    save
     !
-    !        Subroutine arguments
+    !       The definition of the derived datatype.
     !
-    integer(kind=intType), intent(in) :: nn
-    type(bcHaloType), dimension(*), intent(inout) :: bcHaloArray
-    !
-    !        Local variables.
-    !
-    integer(kind=intType) :: i, j
-
-    type(bcHaloType) :: a
+    type coarse1to1SubfaceType
 
-    do j=1,nn
-       a = bcHaloArray(j)
-       do i=(j-1),1,-1
-          if(bcHaloArray(i) <= a) exit
-          bcHaloArray(i+1) = bcHaloArray(i)
-       enddo
-       bcHaloArray(i+1) = a
-    enddo
+        ! Nodal range in the three coordinates directions for the
+        ! coarse grid subface.
 
-  end subroutine sortBCHaloType
+        integer(kind=intType) :: iBeg, jBeg, kBeg, iEnd, jEnd, kEnd
 
-end module bcHalo
-
-module coarse1to1Subface
-  !
-  !       This local module contains the derived datatype
-  !       coarse1to1SubfaceType, which is used to determine the 1 to 1
-  !       block boundaries for the coarser grids.
-  !
-  use precision
-  implicit none
-  save
-  !
-  !       The definition of the derived datatype.
-  !
-  type coarse1to1SubfaceType
+        ! Processor and block id of the neighboring block.
 
-     ! Nodal range in the three coordinates directions for the
-     ! coarse grid subface.
+        integer(kind=intType) :: neighProc, neighBlock
 
-     integer(kind=intType) :: iBeg, jBeg, kBeg, iEnd, jEnd, kEnd
+        ! Number of points in the three coordinate directions for the
+        ! coarse grid donor subface.
 
-     ! Processor and block id of the neighboring block.
+        integer(kind=intType) :: ndi, ndj, ndk
 
-     integer(kind=intType) :: neighProc, neighBlock
+        ! Corresponding i, j and k indices of the fine grid donor block
+        ! for each of the coarse grid subface lines.
 
-     ! Number of points in the three coordinate directions for the
-     ! coarse grid donor subface.
+        integer(kind=intType), dimension(:), pointer :: idfine
+        integer(kind=intType), dimension(:), pointer :: jdfine
+        integer(kind=intType), dimension(:), pointer :: kdfine
 
-     integer(kind=intType) :: ndi, ndj, ndk
+    end type coarse1to1SubfaceType
 
-     ! Corresponding i, j and k indices of the fine grid donor block
-     ! for each of the coarse grid subface lines.
+    ! Number of 1 to 1 fine grid subfaces on this processor.
 
-     integer(kind=intType), dimension(:), pointer :: idfine
-     integer(kind=intType), dimension(:), pointer :: jdfine
-     integer(kind=intType), dimension(:), pointer :: kdfine
+    integer(kind=intType) :: nSubface1to1
 
-  end type coarse1to1SubfaceType
+    ! Array of 1 to 1 subfaces.
 
-  ! Number of 1 to 1 fine grid subfaces on this processor.
-
-  integer(kind=intType) :: nSubface1to1
-
-  ! Array of 1 to 1 subfaces.
-
-  type(coarse1to1SubfaceType), dimension(:), allocatable :: subface1to1
+    type(coarse1to1SubfaceType), dimension(:), allocatable :: subface1to1
 
 end module coarse1to1Subface
 
 !      ==================================================================
 
 module coarseningInfo
-  !
-  !       This local module contains the derived datatype
-  !       coarseningInfoType, which stores for a given block the grid
-  !       lines to keep for the coarse grid.
-  !
-  type coarseningInfoType
+    !
+    !       This local module contains the derived datatype
+    !       coarseningInfoType, which stores for a given block the grid
+    !       lines to keep for the coarse grid.
+    !
+    type coarseningInfoType
 
-     ! Logical, which indicate whether or not a fine grid 1 to 1
-     ! block boundary is still a 1 to 1 block boundary on the
-     ! coarse grid.
+        ! Logical, which indicate whether or not a fine grid 1 to 1
+        ! block boundary is still a 1 to 1 block boundary on the
+        ! coarse grid.
 
-     logical, dimension(:), pointer :: coarseIs1to1
+        logical, dimension(:), pointer :: coarseIs1to1
 
-  end type coarseningInfoType
+    end type coarseningInfoType
 
-  ! Array to store the info for all the blocks.
+    ! Array to store the info for all the blocks.
 
-  type(coarseningInfoType), dimension(:), allocatable :: coarseInfo
+    type(coarseningInfoType), dimension(:), allocatable :: coarseInfo
 
 end module coarseningInfo
 
diff --git a/src/solver/ALEUtils.F90 b/src/solver/ALEUtils.F90
index 339163fe3..fd58a4d81 100644
--- a/src/solver/ALEUtils.F90
+++ b/src/solver/ALEUtils.F90
@@ -1,41 +1,41 @@
 
 module ALEUtils
-  contains
+contains
 #ifndef USE_TAPENADE
-subroutine slipVelocitiesFineLevel_ALE(useOldCoor, t, sps)
-  !
-  ! Shell function to call slipVelocitiesFineLevel on all blocks
-  !
-  use constants
-  use blockPointers
-  use constants
-  use inputTimeSpectral
-  use iteration
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Subroutine arguments.
-  !
-  integer(kind=intType), intent(in) :: sps
-  logical,               intent(in) :: useOldCoor
-  real(kind=realType), dimension(*), intent(in) :: t  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: nn
-
-  ! Loop over the number of blocks.
-
-  domains: do nn=1,nDom
-
-     ! Set the pointers for this block.
-
-     call setPointers(nn, groundLevel, sps)
-
-     call slipVelocitiesFineLevelALE_block(useOldCoor, t, sps)
-
-  end do domains
-
-end subroutine slipVelocitiesFineLevel_ALE
+    subroutine slipVelocitiesFineLevel_ALE(useOldCoor, t, sps)
+        !
+        ! Shell function to call slipVelocitiesFineLevel on all blocks
+        !
+        use constants
+        use blockPointers
+        use constants
+        use inputTimeSpectral
+        use iteration
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        logical, intent(in) :: useOldCoor
+        real(kind=realType), dimension(*), intent(in) :: t  !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        ! Loop over the number of blocks.
+
+        domains: do nn = 1, nDom
+
+            ! Set the pointers for this block.
+
+            call setPointers(nn, groundLevel, sps)
+
+            call slipVelocitiesFineLevelALE_block(useOldCoor, t, sps)
+
+        end do domains
+
+    end subroutine slipVelocitiesFineLevel_ALE
 #endif
 
 !
@@ -48,539 +48,537 @@ end subroutine slipVelocitiesFineLevel_ALE
 !      *                                                                *
 !      ******************************************************************
 !
-subroutine slipVelocitiesFineLevelALE_block(useOldCoor, t, sps)
-  !
-  !      ******************************************************************
-  !      *                                                                *
-  !      * slipVelocitiesFineLevel computes the slip velocities for       *
-  !      * viscous subfaces on all viscous boundaries on groundLevel for  *
-  !      * the given spectral solution. If useOldCoor is .true. the       *
-  !      * velocities are determined using the unsteady time integrator;  *
-  !      * otherwise the analytic form is used.                           *
-  !      *                                                                *
-  !      * Calculates the surface normal and normal velocity on BC using  *
-  !      * FIRST order BDF.                                               *
-  !      *                                                                *
-  !      ******************************************************************
-  !
-  use constants
-  use inputTimeSpectral
-  use blockPointers
-  use cgnsGrid
-  use flowVarRefState
-  use inputMotion
-  use inputUnsteady
-  use iteration
-  use inputPhysics
-  use inputTSStabDeriv
-  use monitor
-  use communication
-  use utils, only : setCoefTimeIntegrator
-  implicit none
-  !
-  !      Subroutine arguments.
-  !
-  integer(kind=intType), intent(in) :: sps
-  logical,               intent(in) :: useOldCoor
-
-  real(kind=realType), dimension(*), intent(in) :: t
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: nn, mm, i, j, level
-
-  real(kind=realType) :: oneOver4dt
-  real(kind=realType) :: velxGrid, velyGrid, velzGrid,ainf
-  real(kind=realType) :: velxGrid0, velyGrid0, velzGrid0
-
-  real(kind=realType), dimension(3) :: xc, xxc
-  real(kind=realType), dimension(3) :: rotCenter, rotRate
-
-  real(kind=realType), dimension(3)   :: rotationPoint
-  real(kind=realType), dimension(3,3) :: rotationMatrix,&
-       derivRotationMatrix
-
-  real(kind=realType) :: tNew, tOld
-
-  real(kind=realType), dimension(:,:,:),   pointer :: uSlip
-  real(kind=realType), dimension(:,:,:),   pointer :: xFace
-  real(kind=realType), dimension(:,:,:,:), pointer :: xFaceOld
-
-  real(kind=realType) :: intervalMach,alphaTS,alphaIncrement,&
-       betaTS,betaIncrement
-  real(kind=realType), dimension(3) ::velDir
-  real(kind=realType), dimension(3) :: refDirection
-
-  !Function Definitions
-
-  real(kind=realType) :: TSAlpha,TSBeta,TSMach
-  !
-  !      ******************************************************************
-  !      *                                                                *
-  !      * Begin execution                                                *
-  !      *                                                                *
-  !      ******************************************************************
-  !
-  ! Determine the situation we are having here.
-
-     ! *******************************
-     ! REMOVED the rigid body rotation part for simplicity
-     ! *******************************
-
-     ! The velocities must be determined via a finite difference
-     ! formula using the coordinates of the old levels.
-
-     ! Set the coefficients for the time integrator and store the
-     ! inverse of the physical nonDimensional time step, divided
-     ! by 4, a bit easier.
-
-     call setCoefTimeIntegrator
-     oneOver4dt = fourth*timeRef/deltaT
-
-     ! Loop over the number of viscous subfaces.
-
-     bocoLoop1: do mm=1,nViscBocos
-
-        ! Set the pointer for uSlip to make the code more
-        ! readable.
-
-        uSlip => BCData(mm)%uSlip
-
-        ! Determine the grid face on which the subface is located
-        ! and set some variables accordingly.
-
-        select case (BCFaceID(mm))
-
-        case (iMin)
-           xFace => x(1,:,:,:);  xFaceOld => xOld(:,1,:,:,:)
-
-        case (iMax)
-           xFace => x(il,:,:,:); xFaceOld => xOld(:,il,:,:,:)
-
-        case (jMin)
-           xFace => x(:,1,:,:);  xFaceOld => xOld(:,:,1,:,:)
-
-        case (jMax)
-           xFace => x(:,jl,:,:); xFaceOld => xOld(:,:,jl,:,:)
+    subroutine slipVelocitiesFineLevelALE_block(useOldCoor, t, sps)
+        !
+        !      ******************************************************************
+        !      *                                                                *
+        !      * slipVelocitiesFineLevel computes the slip velocities for       *
+        !      * viscous subfaces on all viscous boundaries on groundLevel for  *
+        !      * the given spectral solution. If useOldCoor is .true. the       *
+        !      * velocities are determined using the unsteady time integrator;  *
+        !      * otherwise the analytic form is used.                           *
+        !      *                                                                *
+        !      * Calculates the surface normal and normal velocity on BC using  *
+        !      * FIRST order BDF.                                               *
+        !      *                                                                *
+        !      ******************************************************************
+        !
+        use constants
+        use inputTimeSpectral
+        use blockPointers
+        use cgnsGrid
+        use flowVarRefState
+        use inputMotion
+        use inputUnsteady
+        use iteration
+        use inputPhysics
+        use inputTSStabDeriv
+        use monitor
+        use communication
+        use utils, only: setCoefTimeIntegrator
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        logical, intent(in) :: useOldCoor
+
+        real(kind=realType), dimension(*), intent(in) :: t
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, i, j, level
+
+        real(kind=realType) :: oneOver4dt
+        real(kind=realType) :: velxGrid, velyGrid, velzGrid, ainf
+        real(kind=realType) :: velxGrid0, velyGrid0, velzGrid0
+
+        real(kind=realType), dimension(3) :: xc, xxc
+        real(kind=realType), dimension(3) :: rotCenter, rotRate
+
+        real(kind=realType), dimension(3) :: rotationPoint
+        real(kind=realType), dimension(3, 3) :: rotationMatrix, &
+                                                derivRotationMatrix
+
+        real(kind=realType) :: tNew, tOld
+
+        real(kind=realType), dimension(:, :, :), pointer :: uSlip
+        real(kind=realType), dimension(:, :, :), pointer :: xFace
+        real(kind=realType), dimension(:, :, :, :), pointer :: xFaceOld
+
+        real(kind=realType) :: intervalMach, alphaTS, alphaIncrement, &
+                               betaTS, betaIncrement
+        real(kind=realType), dimension(3) :: velDir
+        real(kind=realType), dimension(3) :: refDirection
+
+        !Function Definitions
+
+        real(kind=realType) :: TSAlpha, TSBeta, TSMach
+        !
+        !      ******************************************************************
+        !      *                                                                *
+        !      * Begin execution                                                *
+        !      *                                                                *
+        !      ******************************************************************
+        !
+        ! Determine the situation we are having here.
+
+        ! *******************************
+        ! REMOVED the rigid body rotation part for simplicity
+        ! *******************************
+
+        ! The velocities must be determined via a finite difference
+        ! formula using the coordinates of the old levels.
+
+        ! Set the coefficients for the time integrator and store the
+        ! inverse of the physical nonDimensional time step, divided
+        ! by 4, a bit easier.
+
+        call setCoefTimeIntegrator
+        oneOver4dt = fourth * timeRef / deltaT
+
+        ! Loop over the number of viscous subfaces.
+
+        bocoLoop1: do mm = 1, nViscBocos
+
+            ! Set the pointer for uSlip to make the code more
+            ! readable.
+
+            uSlip => BCData(mm)%uSlip
+
+            ! Determine the grid face on which the subface is located
+            ! and set some variables accordingly.
+
+            select case (BCFaceID(mm))
+
+            case (iMin)
+                xFace => x(1, :, :, :); xFaceOld => xOld(:, 1, :, :, :)
+
+            case (iMax)
+                xFace => x(il, :, :, :); xFaceOld => xOld(:, il, :, :, :)
+
+            case (jMin)
+                xFace => x(:, 1, :, :); xFaceOld => xOld(:, :, 1, :, :)
+
+            case (jMax)
+                xFace => x(:, jl, :, :); xFaceOld => xOld(:, :, jl, :, :)
+
+            case (kMin)
+                xFace => x(:, :, 1, :); xFaceOld => xOld(:, :, :, 1, :)
+
+            case (kMax)
+                xFace => x(:, :, kl, :); xFaceOld => xOld(:, :, :, kl, :)
+
+            end select
+
+            ! Some boundary faces have a different rotation speed than
+            ! the corresponding block. This happens e.g. in the tip gap
+            ! region of turboMachinary problems where the casing does
+            ! not rotate. As the coordinate difference corresponds to
+            ! the rotation rate of the block, a correction must be
+            ! computed. Therefore compute the difference in rotation
+            ! rate and store the rotation center a bit easier. Note that
+            ! the rotation center of subface is taken, because if there
+            ! is a difference in rotation rate this info for the subface
+            ! must always be specified.
 
-        case (kMin)
-           xFace => x(:,:,1,:);  xFaceOld => xOld(:,:,:,1,:)
+            j = nbkGlobal
+            i = cgnsSubface(mm)
+
+            ! Loop over the quadrilateral faces of the viscous subface.
+            ! Note that due to the usage of the pointers xFace and
+            ! xFaceOld an offset of +1 must be used in the coordinate
+            ! arrays, because x and xOld originally start at 0 for the
+            ! i, j and k indices.
 
-        case (kMax)
-           xFace => x(:,:,kl,:); xFaceOld => xOld(:,:,:,kl,:)
+            do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                do i = BCData(mm)%icBeg, BCData(mm)%icEnd
 
-        end select
+                    ! Determine the coordinates of the centroid of the
+                    ! face, multiplied by 4.
+
+                    uSlip(i, j, 1) = (xFace(i + 1, j + 1, 1) + xFace(i + 1, j, 1) &
+                                      + xFace(i, j + 1, 1) + xFace(i, j, 1))
+                    uSlip(i, j, 2) = (xFace(i + 1, j + 1, 2) + xFace(i + 1, j, 2) &
+                                      + xFace(i, j + 1, 2) + xFace(i, j, 2))
+                    uSlip(i, j, 3) = (xFace(i + 1, j + 1, 3) + xFace(i + 1, j, 3) &
+                                      + xFace(i, j + 1, 3) + xFace(i, j, 3))
+
+                    ! Loop over the older time levels and take their
+                    ! contribution into account.
+
+                    level = 1 ! There was a loop over all old levels
+                    uSlip(i, j, 1) = uSlip(i, j, 1) &
+                                     + (xFaceOld(level, i + 1, j + 1, 1) &
+                                        + xFaceOld(level, i + 1, j, 1) &
+                                        + xFaceOld(level, i, j + 1, 1) &
+                                        + xFaceOld(level, i, j, 1)) &
+                                     * (-1.0_realType)
+                    uSlip(i, j, 2) = uSlip(i, j, 2) &
+                                     + (xFaceOld(level, i + 1, j + 1, 2) &
+                                        + xFaceOld(level, i + 1, j, 2) &
+                                        + xFaceOld(level, i, j + 1, 2) &
+                                        + xFaceOld(level, i, j, 2)) &
+                                     * (-1.0_realType)
+                    uSlip(i, j, 3) = uSlip(i, j, 3) &
+                                     + (xFaceOld(level, i + 1, j + 1, 3) &
+                                        + xFaceOld(level, i + 1, j, 3) &
+                                        + xFaceOld(level, i, j + 1, 3) &
+                                        + xFaceOld(level, i, j, 3)) &
+                                     * (-1.0_realType)
+
+                    ! Divide by 4 times the time step to obtain the
+                    ! correct velocity.
+
+                    uSlip(i, j, 1) = uSlip(i, j, 1) * oneOver4dt
+                    uSlip(i, j, 2) = uSlip(i, j, 2) * oneOver4dt
+                    uSlip(i, j, 3) = uSlip(i, j, 3) * oneOver4dt
+                end do
+            end do
 
-        ! Some boundary faces have a different rotation speed than
-        ! the corresponding block. This happens e.g. in the tip gap
-        ! region of turboMachinary problems where the casing does
-        ! not rotate. As the coordinate difference corresponds to
-        ! the rotation rate of the block, a correction must be
-        ! computed. Therefore compute the difference in rotation
-        ! rate and store the rotation center a bit easier. Note that
-        ! the rotation center of subface is taken, because if there
-        ! is a difference in rotation rate this info for the subface
-        ! must always be specified.
-
-        j = nbkGlobal
-        i = cgnsSubface(mm)
-
-        ! Loop over the quadrilateral faces of the viscous subface.
-        ! Note that due to the usage of the pointers xFace and
-        ! xFaceOld an offset of +1 must be used in the coordinate
-        ! arrays, because x and xOld originally start at 0 for the
-        ! i, j and k indices.
-
-        do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-           do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-
-              ! Determine the coordinates of the centroid of the
-              ! face, multiplied by 4.
-
-              uSlip(i,j,1) = (xFace(i+1,j+1,1) + xFace(i+1,j,1) &
-                            + xFace(i,  j+1,1) + xFace(i,  j,1))
-              uSlip(i,j,2) = (xFace(i+1,j+1,2) + xFace(i+1,j,2) &
-                            + xFace(i,  j+1,2) + xFace(i,  j,2))
-              uSlip(i,j,3) = (xFace(i+1,j+1,3) + xFace(i+1,j,3) &
-                            + xFace(i,  j+1,3) + xFace(i,  j,3))
-
-              ! Loop over the older time levels and take their
-              ! contribution into account.
-
-              level = 1 ! There was a loop over all old levels
-              uSlip(i,j,1) = uSlip(i,j,1) &
-                   +          (xFaceOld(level,i+1,j+1,1) &
-                   +           xFaceOld(level,i+1,j,  1) &
-                   +           xFaceOld(level,i,  j+1,1) &
-                   +           xFaceOld(level,i,  j,  1)) &
-                   * (-1.0_realType)
-              uSlip(i,j,2) = uSlip(i,j,2) &
-                   +          (xFaceOld(level,i+1,j+1,2) &
-                   +           xFaceOld(level,i+1,j,  2) &
-                   +           xFaceOld(level,i,  j+1,2) &
-                   +           xFaceOld(level,i,  j,  2)) &
-                   * (-1.0_realType)
-              uSlip(i,j,3) = uSlip(i,j,3) &
-                   +          (xFaceOld(level,i+1,j+1,3) &
-                   +           xFaceOld(level,i+1,j,  3) &
-                   +           xFaceOld(level,i,  j+1,3) &
-                   +           xFaceOld(level,i,  j,  3)) &
-                   * (-1.0_realType)
-
-              ! Divide by 4 times the time step to obtain the
-              ! correct velocity.
-
-              uSlip(i,j,1) = uSlip(i,j,1)*oneOver4dt
-              uSlip(i,j,2) = uSlip(i,j,2)*oneOver4dt
-              uSlip(i,j,3) = uSlip(i,j,3)*oneOver4dt
-           enddo
-        enddo
-
-     enddo bocoLoop1
-
-end subroutine slipVelocitiesFineLevelALE_block
+        end do bocoLoop1
+
+    end subroutine slipVelocitiesFineLevelALE_block
 
 ! ===========================================================
-subroutine interpLevelALE_block
-  !
-  !      ******************************************************************
-  !      *                                                                *
-  !      * interpLevelALE_block interpolates geometric data over the      *
-  !      * latest time step.                                              *
-  !      *                                                                *
-  !      ******************************************************************
-  !
-  use blockPointers
-  use iteration
-  use inputUnsteady
-  use inputPhysics
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i,j,k,l,nn,mm,kk
-
-  if (.not. useALE .or. equationMode .ne. unsteady)  then
-     return
-  end if
-
-  ! --------------------------------
-  ! First store then clear current data
-  ! --------------------------------
-  clearI : do k = 1,ke
-     do j = 1,je
-        do i = 0,ie
-           sFaceIALE(0,i,j,k) = sFaceI(i,j,k)
-           sIALE(0,i,j,k,1)   = sI(i,j,k,1)
-           sIALE(0,i,j,k,2)   = sI(i,j,k,2)
-           sIALE(0,i,j,k,3)   = sI(i,j,k,3)
-           sFaceI(i,j,k)      = zero
-           sI(i,j,k,1)        = zero
-           sI(i,j,k,2)        = zero
-           sI(i,j,k,3)        = zero
-        enddo
-     enddo
-  enddo clearI
-
-  clearJ : do k = 1,ke
-     do j = 0,je
-        do i = 1,ie
-           sFaceJALE(0,i,j,k) = sFaceJ(i,j,k)
-           sJALE(0,i,j,k,1)   = sJ(i,j,k,1)
-           sJALE(0,i,j,k,2)   = sJ(i,j,k,2)
-           sJALE(0,i,j,k,3)   = sJ(i,j,k,3)
-           sFaceJ(i,j,k)      = zero
-           sJ(i,j,k,1)        = zero
-           sJ(i,j,k,2)        = zero
-           sJ(i,j,k,3)        = zero
-        enddo
-     enddo
-  enddo clearJ
-
-  clearK : do k = 0,ke
-     do j = 1,je
-        do i = 1,ie
-           sFaceKALE(0,i,j,k) = sFaceK(i,j,k)
-           sKALE(0,i,j,k,1)   = sK(i,j,k,1)
-           sKALE(0,i,j,k,2)   = sK(i,j,k,2)
-           sKALE(0,i,j,k,3)   = sK(i,j,k,3)
-           sFaceK(i,j,k)      = zero
-           sK(i,j,k,1)        = zero
-           sK(i,j,k,2)        = zero
-           sK(i,j,k,3)        = zero
-        enddo
-     enddo
-  enddo clearK
-
-  ALEloop : do l = 1,nALEsteps
-     ! --------------------------------
-     ! Then average surface normal and normal velocity from array of old variables
-     ! This eq. 10a and 10b, found paper by C.Farhat http://dx.doi.org/10.1016/S0021-9991(03)00311-5
-     ! --------------------------------
-     updateI : do k = 1,ke
-        do j = 1,je
-           do i = 0,ie
-              sFaceI(i,j,k) = sFaceI(i,j,k) + coefTimeALE(l) * sFaceIALE(l,i,j,k)
-              sI(i,j,k,1)   = sI(i,j,k,1)   + coefTimeALE(l) * sIALE(l,i,j,k,1)
-              sI(i,j,k,2)   = sI(i,j,k,2)   + coefTimeALE(l) * sIALE(l,i,j,k,2)
-              sI(i,j,k,3)   = sI(i,j,k,3)   + coefTimeALE(l) * sIALE(l,i,j,k,3)
-           enddo
-        enddo
-     enddo updateI
-
-     updateJ : do k = 1,ke
-        do j = 0,je
-           do i = 1,ie
-              sFaceJ(i,j,k) = sFaceJ(i,j,k) + coefTimeALE(l) * sFaceJALE(l,i,j,k)
-              sJ(i,j,k,1)   = sJ(i,j,k,1)   + coefTimeALE(l) * sJALE(l,i,j,k,1)
-              sJ(i,j,k,2)   = sJ(i,j,k,2)   + coefTimeALE(l) * sJALE(l,i,j,k,2)
-              sJ(i,j,k,3)   = sJ(i,j,k,3)   + coefTimeALE(l) * sJALE(l,i,j,k,3)
-           enddo
-        enddo
-     enddo updateJ
-
-     updateK : do k = 0,ke
-        do j = 1,je
-           do i = 1,ie
-              sFaceK(i,j,k) = sFaceK(i,j,k) + coefTimeALE(l) * sFaceKALE(l,i,j,k)
-              sK(i,j,k,1)   = sK(i,j,k,1)   + coefTimeALE(l) * sKALE(l,i,j,k,1)
-              sK(i,j,k,2)   = sK(i,j,k,2)   + coefTimeALE(l) * sKALE(l,i,j,k,2)
-              sK(i,j,k,3)   = sK(i,j,k,3)   + coefTimeALE(l) * sKALE(l,i,j,k,3)
-           enddo
-        enddo
-     enddo updateK
-  enddo ALEloop
-
-
-end subroutine interpLevelALE_block
+    subroutine interpLevelALE_block
+        !
+        !      ******************************************************************
+        !      *                                                                *
+        !      * interpLevelALE_block interpolates geometric data over the      *
+        !      * latest time step.                                              *
+        !      *                                                                *
+        !      ******************************************************************
+        !
+        use blockPointers
+        use iteration
+        use inputUnsteady
+        use inputPhysics
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, l, nn, mm, kk
+
+        if (.not. useALE .or. equationMode .ne. unsteady) then
+            return
+        end if
+
+        ! --------------------------------
+        ! First store then clear current data
+        ! --------------------------------
+        clearI: do k = 1, ke
+            do j = 1, je
+                do i = 0, ie
+                    sFaceIALE(0, i, j, k) = sFaceI(i, j, k)
+                    sIALE(0, i, j, k, 1) = sI(i, j, k, 1)
+                    sIALE(0, i, j, k, 2) = sI(i, j, k, 2)
+                    sIALE(0, i, j, k, 3) = sI(i, j, k, 3)
+                    sFaceI(i, j, k) = zero
+                    sI(i, j, k, 1) = zero
+                    sI(i, j, k, 2) = zero
+                    sI(i, j, k, 3) = zero
+                end do
+            end do
+        end do clearI
+
+        clearJ: do k = 1, ke
+            do j = 0, je
+                do i = 1, ie
+                    sFaceJALE(0, i, j, k) = sFaceJ(i, j, k)
+                    sJALE(0, i, j, k, 1) = sJ(i, j, k, 1)
+                    sJALE(0, i, j, k, 2) = sJ(i, j, k, 2)
+                    sJALE(0, i, j, k, 3) = sJ(i, j, k, 3)
+                    sFaceJ(i, j, k) = zero
+                    sJ(i, j, k, 1) = zero
+                    sJ(i, j, k, 2) = zero
+                    sJ(i, j, k, 3) = zero
+                end do
+            end do
+        end do clearJ
+
+        clearK: do k = 0, ke
+            do j = 1, je
+                do i = 1, ie
+                    sFaceKALE(0, i, j, k) = sFaceK(i, j, k)
+                    sKALE(0, i, j, k, 1) = sK(i, j, k, 1)
+                    sKALE(0, i, j, k, 2) = sK(i, j, k, 2)
+                    sKALE(0, i, j, k, 3) = sK(i, j, k, 3)
+                    sFaceK(i, j, k) = zero
+                    sK(i, j, k, 1) = zero
+                    sK(i, j, k, 2) = zero
+                    sK(i, j, k, 3) = zero
+                end do
+            end do
+        end do clearK
+
+        ALEloop: do l = 1, nALEsteps
+            ! --------------------------------
+            ! Then average surface normal and normal velocity from array of old variables
+            ! This eq. 10a and 10b, found paper by C.Farhat http://dx.doi.org/10.1016/S0021-9991(03)00311-5
+            ! --------------------------------
+            updateI: do k = 1, ke
+                do j = 1, je
+                    do i = 0, ie
+                        sFaceI(i, j, k) = sFaceI(i, j, k) + coefTimeALE(l) * sFaceIALE(l, i, j, k)
+                        sI(i, j, k, 1) = sI(i, j, k, 1) + coefTimeALE(l) * sIALE(l, i, j, k, 1)
+                        sI(i, j, k, 2) = sI(i, j, k, 2) + coefTimeALE(l) * sIALE(l, i, j, k, 2)
+                        sI(i, j, k, 3) = sI(i, j, k, 3) + coefTimeALE(l) * sIALE(l, i, j, k, 3)
+                    end do
+                end do
+            end do updateI
+
+            updateJ: do k = 1, ke
+                do j = 0, je
+                    do i = 1, ie
+                        sFaceJ(i, j, k) = sFaceJ(i, j, k) + coefTimeALE(l) * sFaceJALE(l, i, j, k)
+                        sJ(i, j, k, 1) = sJ(i, j, k, 1) + coefTimeALE(l) * sJALE(l, i, j, k, 1)
+                        sJ(i, j, k, 2) = sJ(i, j, k, 2) + coefTimeALE(l) * sJALE(l, i, j, k, 2)
+                        sJ(i, j, k, 3) = sJ(i, j, k, 3) + coefTimeALE(l) * sJALE(l, i, j, k, 3)
+                    end do
+                end do
+            end do updateJ
+
+            updateK: do k = 0, ke
+                do j = 1, je
+                    do i = 1, ie
+                        sFaceK(i, j, k) = sFaceK(i, j, k) + coefTimeALE(l) * sFaceKALE(l, i, j, k)
+                        sK(i, j, k, 1) = sK(i, j, k, 1) + coefTimeALE(l) * sKALE(l, i, j, k, 1)
+                        sK(i, j, k, 2) = sK(i, j, k, 2) + coefTimeALE(l) * sKALE(l, i, j, k, 2)
+                        sK(i, j, k, 3) = sK(i, j, k, 3) + coefTimeALE(l) * sKALE(l, i, j, k, 3)
+                    end do
+                end do
+            end do updateK
+        end do ALEloop
+
+    end subroutine interpLevelALE_block
 
 ! ===========================================================
-subroutine recoverLevelALE_block
-  !
-  !      ******************************************************************
-  !      *                                                                *
-  !      * recoverLevelALE_block recovers current geometric data from     *
-  !      * temporary interpolation                                        *
-  !      *                                                                *
-  !      ******************************************************************
-  !
-  use blockPointers
-  use inputUnsteady
-  use inputPhysics
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i,j,k,nn,mm,kk
-
-  if (.not. useALE .or. equationMode .ne. unsteady)  then
-     return
-  end if
-
-  recoverI : do k = 1,ke
-     do j = 1,je
-        do i = 0,ie
-           sFaceI(i,j,k) = sFaceIALE(0,i,j,k)
-           sI(i,j,k,1)   = sIALE(0,i,j,k,1)
-           sI(i,j,k,2)   = sIALE(0,i,j,k,2)
-           sI(i,j,k,3)   = sIALE(0,i,j,k,3)
-        enddo
-     enddo
-  enddo recoverI
-
-  recoverJ : do k = 1,ke
-     do j = 0,je
-        do i = 1,ie
-           sFaceJ(i,j,k) = sFaceJALE(0,i,j,k)
-           sJ(i,j,k,1)   = sJALE(0,i,j,k,1)
-           sJ(i,j,k,2)   = sJALE(0,i,j,k,2)
-           sJ(i,j,k,3)   = sJALE(0,i,j,k,3)
-        enddo
-     enddo
-  enddo recoverJ
-
-  recoverK : do k = 0,ke
-     do j = 1,je
-        do i = 1,ie
-           sFaceK(i,j,k) = sFaceKALE(0,i,j,k)
-           sK(i,j,k,1)   = sKALE(0,i,j,k,1)
-           sK(i,j,k,2)   = sKALE(0,i,j,k,2)
-           sK(i,j,k,3)   = sKALE(0,i,j,k,3)
-        enddo
-     enddo
-  enddo recoverK
-
-
-end subroutine recoverLevelALE_block
+    subroutine recoverLevelALE_block
+        !
+        !      ******************************************************************
+        !      *                                                                *
+        !      * recoverLevelALE_block recovers current geometric data from     *
+        !      * temporary interpolation                                        *
+        !      *                                                                *
+        !      ******************************************************************
+        !
+        use blockPointers
+        use inputUnsteady
+        use inputPhysics
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn, mm, kk
+
+        if (.not. useALE .or. equationMode .ne. unsteady) then
+            return
+        end if
+
+        recoverI: do k = 1, ke
+            do j = 1, je
+                do i = 0, ie
+                    sFaceI(i, j, k) = sFaceIALE(0, i, j, k)
+                    sI(i, j, k, 1) = sIALE(0, i, j, k, 1)
+                    sI(i, j, k, 2) = sIALE(0, i, j, k, 2)
+                    sI(i, j, k, 3) = sIALE(0, i, j, k, 3)
+                end do
+            end do
+        end do recoverI
+
+        recoverJ: do k = 1, ke
+            do j = 0, je
+                do i = 1, ie
+                    sFaceJ(i, j, k) = sFaceJALE(0, i, j, k)
+                    sJ(i, j, k, 1) = sJALE(0, i, j, k, 1)
+                    sJ(i, j, k, 2) = sJALE(0, i, j, k, 2)
+                    sJ(i, j, k, 3) = sJALE(0, i, j, k, 3)
+                end do
+            end do
+        end do recoverJ
+
+        recoverK: do k = 0, ke
+            do j = 1, je
+                do i = 1, ie
+                    sFaceK(i, j, k) = sFaceKALE(0, i, j, k)
+                    sK(i, j, k, 1) = sKALE(0, i, j, k, 1)
+                    sK(i, j, k, 2) = sKALE(0, i, j, k, 2)
+                    sK(i, j, k, 3) = sKALE(0, i, j, k, 3)
+                end do
+            end do
+        end do recoverK
+
+    end subroutine recoverLevelALE_block
 
 ! ===========================================================
-subroutine interpLevelALEBC_block
-  !
-  !      ******************************************************************
-  !      *                                                                *
-  !      * interpLevelALEBC_block interpolates geometric data on boundary *
-  !      * over the latest time step.                                     *
-  !      *                                                                *
-  !      ******************************************************************
-  !
-  use blockPointers
-  use iteration
-  use inputUnsteady
-  use inputPhysics
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i,j,k,l,nn,mm,kk
-
-  if (.not. useALE .or. equationMode .ne. unsteady)  then
-     return
-  end if
-
-  ! --------------------------------
-  ! First store then clear current data
-  ! --------------------------------
-  clearNM: do mm=1,nBocos
-     do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-        do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-           BCData(mm)%normALE(0,i,j,1) = BCData(mm)%norm(i,j,1)
-           BCData(mm)%normALE(0,i,j,2) = BCData(mm)%norm(i,j,2)
-           BCData(mm)%normALE(0,i,j,3) = BCData(mm)%norm(i,j,3)
-           BCData(mm)%norm(i,j,1)      = zero
-           BCData(mm)%norm(i,j,2)      = zero
-           BCData(mm)%norm(i,j,3)      = zero
-        enddo
-     enddo
-  enddo clearNM
-
-  clearRF: do mm=1,nBocos
-     testAssoc1: if( associated(BCData(mm)%rFace) ) then
-        do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-           do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-              BCData(mm)%rFaceALE(0,i,j) = BCData(mm)%rFace(i,j)
-              BCData(mm)%rFace(i,j)      = zero
-           enddo
-        enddo
-     endif testAssoc1
-  enddo clearRF
-
-  clearUS: do mm=1,nViscBocos
-     do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-        do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-           BCData(mm)%uSlipALE(0,i,j,1) = BCData(mm)%uSlip(i,j,1)
-           BCData(mm)%uSlipALE(0,i,j,2) = BCData(mm)%uSlip(i,j,2)
-           BCData(mm)%uSlipALE(0,i,j,3) = BCData(mm)%uSlip(i,j,3)
-           BCData(mm)%uSlip(i,j,1)      = zero
-           BCData(mm)%uSlip(i,j,2)      = zero
-           BCData(mm)%uSlip(i,j,3)      = zero
-        enddo
-     enddo
-  enddo clearUS
-
-  ALEloop : do l = 1,nALEsteps
-     ! --------------------------------
-     ! Then average surface normal and normal velocity from array of old variables
-     ! --------------------------------
-     updateNM: do mm=1,nBocos
-        do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-           do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-              BCData(mm)%norm(i,j,1) = BCData(mm)%norm(i,j,1) &
-                   + coefTimeALE(l) * BCData(mm)%normALE(l,i,j,1)
-              BCData(mm)%norm(i,j,2) = BCData(mm)%norm(i,j,2) &
-                   + coefTimeALE(l) * BCData(mm)%normALE(l,i,j,2)
-              BCData(mm)%norm(i,j,3) = BCData(mm)%norm(i,j,3) &
-                   + coefTimeALE(l) * BCData(mm)%normALE(l,i,j,3)
-           enddo
-        enddo
-     enddo updateNM
-
-     updateRF: do mm=1,nBocos
-        testAssoc2: if( associated(BCData(mm)%rFace) ) then
-           do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-              do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-                 BCData(mm)%rFace(i,j) = BCData(mm)%rFace(i,j) &
-                      + coefTimeALE(l) * BCData(mm)%rFaceALE(0,i,j)
-              enddo
-           enddo
-        endif testAssoc2
-     enddo updateRF
-
-     updateUS: do mm=1,nViscBocos
-        do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-           do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-              BCData(mm)%uSlip(i,j,1) = BCData(mm)%uSlip(i,j,1) &
-                   + coefTimeALE(l) * BCData(mm)%uSlipALE(l,i,j,1)
-              BCData(mm)%uSlip(i,j,2) = BCData(mm)%uSlip(i,j,2) &
-                   + coefTimeALE(l) * BCData(mm)%uSlipALE(l,i,j,2)
-              BCData(mm)%uSlip(i,j,3) = BCData(mm)%uSlip(i,j,3) &
-                   + coefTimeALE(l) * BCData(mm)%uSlipALE(l,i,j,3)
-           enddo
-        enddo
-     enddo updateUS
-  enddo ALEloop
-
-end subroutine interpLevelALEBC_block
+    subroutine interpLevelALEBC_block
+        !
+        !      ******************************************************************
+        !      *                                                                *
+        !      * interpLevelALEBC_block interpolates geometric data on boundary *
+        !      * over the latest time step.                                     *
+        !      *                                                                *
+        !      ******************************************************************
+        !
+        use blockPointers
+        use iteration
+        use inputUnsteady
+        use inputPhysics
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, l, nn, mm, kk
+
+        if (.not. useALE .or. equationMode .ne. unsteady) then
+            return
+        end if
+
+        ! --------------------------------
+        ! First store then clear current data
+        ! --------------------------------
+        clearNM: do mm = 1, nBocos
+            do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                    BCData(mm)%normALE(0, i, j, 1) = BCData(mm)%norm(i, j, 1)
+                    BCData(mm)%normALE(0, i, j, 2) = BCData(mm)%norm(i, j, 2)
+                    BCData(mm)%normALE(0, i, j, 3) = BCData(mm)%norm(i, j, 3)
+                    BCData(mm)%norm(i, j, 1) = zero
+                    BCData(mm)%norm(i, j, 2) = zero
+                    BCData(mm)%norm(i, j, 3) = zero
+                end do
+            end do
+        end do clearNM
+
+        clearRF: do mm = 1, nBocos
+            testAssoc1: if (associated(BCData(mm)%rFace)) then
+                do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                    do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                        BCData(mm)%rFaceALE(0, i, j) = BCData(mm)%rFace(i, j)
+                        BCData(mm)%rFace(i, j) = zero
+                    end do
+                end do
+            end if testAssoc1
+        end do clearRF
+
+        clearUS: do mm = 1, nViscBocos
+            do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                    BCData(mm)%uSlipALE(0, i, j, 1) = BCData(mm)%uSlip(i, j, 1)
+                    BCData(mm)%uSlipALE(0, i, j, 2) = BCData(mm)%uSlip(i, j, 2)
+                    BCData(mm)%uSlipALE(0, i, j, 3) = BCData(mm)%uSlip(i, j, 3)
+                    BCData(mm)%uSlip(i, j, 1) = zero
+                    BCData(mm)%uSlip(i, j, 2) = zero
+                    BCData(mm)%uSlip(i, j, 3) = zero
+                end do
+            end do
+        end do clearUS
+
+        ALEloop: do l = 1, nALEsteps
+            ! --------------------------------
+            ! Then average surface normal and normal velocity from array of old variables
+            ! --------------------------------
+            updateNM: do mm = 1, nBocos
+                do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                    do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                        BCData(mm)%norm(i, j, 1) = BCData(mm)%norm(i, j, 1) &
+                                                   + coefTimeALE(l) * BCData(mm)%normALE(l, i, j, 1)
+                        BCData(mm)%norm(i, j, 2) = BCData(mm)%norm(i, j, 2) &
+                                                   + coefTimeALE(l) * BCData(mm)%normALE(l, i, j, 2)
+                        BCData(mm)%norm(i, j, 3) = BCData(mm)%norm(i, j, 3) &
+                                                   + coefTimeALE(l) * BCData(mm)%normALE(l, i, j, 3)
+                    end do
+                end do
+            end do updateNM
+
+            updateRF: do mm = 1, nBocos
+                testAssoc2: if (associated(BCData(mm)%rFace)) then
+                    do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                        do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                            BCData(mm)%rFace(i, j) = BCData(mm)%rFace(i, j) &
+                                                     + coefTimeALE(l) * BCData(mm)%rFaceALE(0, i, j)
+                        end do
+                    end do
+                end if testAssoc2
+            end do updateRF
+
+            updateUS: do mm = 1, nViscBocos
+                do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                    do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                        BCData(mm)%uSlip(i, j, 1) = BCData(mm)%uSlip(i, j, 1) &
+                                                    + coefTimeALE(l) * BCData(mm)%uSlipALE(l, i, j, 1)
+                        BCData(mm)%uSlip(i, j, 2) = BCData(mm)%uSlip(i, j, 2) &
+                                                    + coefTimeALE(l) * BCData(mm)%uSlipALE(l, i, j, 2)
+                        BCData(mm)%uSlip(i, j, 3) = BCData(mm)%uSlip(i, j, 3) &
+                                                    + coefTimeALE(l) * BCData(mm)%uSlipALE(l, i, j, 3)
+                    end do
+                end do
+            end do updateUS
+        end do ALEloop
+
+    end subroutine interpLevelALEBC_block
 
 ! ===========================================================
-subroutine recoverLevelALEBC_block
-  !
-  !      ******************************************************************
-  !      *                                                                *
-  !      * recoverLevelALEBC_block recovers current geometric data on     *
-  !      * boundary from temporary interpolation                          *
-  !      *                                                                *
-  !      ******************************************************************
-  !
-  use blockPointers
-  use inputUnsteady
-  use inputPhysics
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i,j,k,nn,mm,kk
-
-  if (.not. useALE .or. equationMode .ne. unsteady)  then
-     return
-  end if
-
-  recoverNM: do mm=1,nBocos
-     do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-        do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-           BCData(mm)%norm(i,j,1) = BCData(mm)%normALE(0,i,j,1)
-           BCData(mm)%norm(i,j,2) = BCData(mm)%normALE(0,i,j,2)
-           BCData(mm)%norm(i,j,3) = BCData(mm)%normALE(0,i,j,3)
-        enddo
-     enddo
-  enddo recoverNM
-
-  recoverRF: do mm=1,nBocos
-     testAssoc: if( associated(BCData(mm)%rFace) ) then
-        do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-           do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-              BCData(mm)%rFace(i,j) = BCData(mm)%rFaceALE(0,i,j)
-           enddo
-        enddo
-     endif testAssoc
-  enddo recoverRF
-
-  recoverUS: do mm=1,nViscBocos
-     do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-        do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-           BCData(mm)%uSlip(i,j,1) = BCData(mm)%uSlipALE(0,i,j,1)
-           BCData(mm)%uSlip(i,j,2) = BCData(mm)%uSlipALE(0,i,j,2)
-           BCData(mm)%uSlip(i,j,3) = BCData(mm)%uSlipALE(0,i,j,3)
-        enddo
-     enddo
-  enddo recoverUS
-
-end subroutine recoverLevelALEBC_block
-
-  ! ----------------------------------------------------------------------
-  !                                                                      |
-  !                    No Tapenade Routine below this line               |
-  !                                                                      |
-  ! ----------------------------------------------------------------------
+    subroutine recoverLevelALEBC_block
+        !
+        !      ******************************************************************
+        !      *                                                                *
+        !      * recoverLevelALEBC_block recovers current geometric data on     *
+        !      * boundary from temporary interpolation                          *
+        !      *                                                                *
+        !      ******************************************************************
+        !
+        use blockPointers
+        use inputUnsteady
+        use inputPhysics
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn, mm, kk
+
+        if (.not. useALE .or. equationMode .ne. unsteady) then
+            return
+        end if
+
+        recoverNM: do mm = 1, nBocos
+            do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                    BCData(mm)%norm(i, j, 1) = BCData(mm)%normALE(0, i, j, 1)
+                    BCData(mm)%norm(i, j, 2) = BCData(mm)%normALE(0, i, j, 2)
+                    BCData(mm)%norm(i, j, 3) = BCData(mm)%normALE(0, i, j, 3)
+                end do
+            end do
+        end do recoverNM
+
+        recoverRF: do mm = 1, nBocos
+            testAssoc: if (associated(BCData(mm)%rFace)) then
+                do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                    do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                        BCData(mm)%rFace(i, j) = BCData(mm)%rFaceALE(0, i, j)
+                    end do
+                end do
+            end if testAssoc
+        end do recoverRF
+
+        recoverUS: do mm = 1, nViscBocos
+            do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                    BCData(mm)%uSlip(i, j, 1) = BCData(mm)%uSlipALE(0, i, j, 1)
+                    BCData(mm)%uSlip(i, j, 2) = BCData(mm)%uSlipALE(0, i, j, 2)
+                    BCData(mm)%uSlip(i, j, 3) = BCData(mm)%uSlipALE(0, i, j, 3)
+                end do
+            end do
+        end do recoverUS
+
+    end subroutine recoverLevelALEBC_block
+
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 #ifndef USE_TAPENADE
 
 !
@@ -607,186 +605,185 @@ end subroutine recoverLevelALEBC_block
 !
 ! ===========================================================
 
-subroutine fillCoor
-
-  use constants
-  use blockPointers, only : ie, je, ke, il, jl, kl, vol, volOld, nDom, xOld, x
-  use iteration, only : groundLevel
-  use inputTimeSpectral, only : nTimeIntervalsSpectral
-  use inputUnsteady, only : useALE
-  use inputPhysics, only : equationMode
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i,j,k,nn,kk
-
-  if (equationMode .ne. unsteady)  then
-     return
-  end if
-
-  spectralLoop: do kk=1,nTimeIntervalsSpectral
-     domains: do nn=1,nDom
-
-        ! Set the pointers for this block on the ground level.
-
-        call setPointers(nn, groundLevel,kk)
-
-        do k=0,ke
-           do j=0,je
-              do i=0,ie
-                 xOld(:,i,j,k,1) = x(i,j,k,1)
-                 xOld(:,i,j,k,2) = x(i,j,k,2)
-                 xOld(:,i,j,k,3) = x(i,j,k,3)
-              enddo
-           enddo
-        enddo
-
-        do k=2,kl
-           do j=2,jl
-              do i=2,il
-                 volOld(:,i,j,k) = vol(i,j,k)
-              enddo
-           enddo
-        enddo
-
-     end do domains
-  end do spectralLoop
-
-end subroutine fillCoor
+    subroutine fillCoor
+
+        use constants
+        use blockPointers, only: ie, je, ke, il, jl, kl, vol, volOld, nDom, xOld, x
+        use iteration, only: groundLevel
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputUnsteady, only: useALE
+        use inputPhysics, only: equationMode
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn, kk
+
+        if (equationMode .ne. unsteady) then
+            return
+        end if
+
+        spectralLoop: do kk = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block on the ground level.
+
+                call setPointers(nn, groundLevel, kk)
+
+                do k = 0, ke
+                    do j = 0, je
+                        do i = 0, ie
+                            xOld(:, i, j, k, 1) = x(i, j, k, 1)
+                            xOld(:, i, j, k, 2) = x(i, j, k, 2)
+                            xOld(:, i, j, k, 3) = x(i, j, k, 3)
+                        end do
+                    end do
+                end do
+
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            volOld(:, i, j, k) = vol(i, j, k)
+                        end do
+                    end do
+                end do
+
+            end do domains
+        end do spectralLoop
+
+    end subroutine fillCoor
 
 ! ===========================================================
-subroutine storeCoor
-  use constants
-  use blockPointers, only : ie, je, ke, il, jl, kl, xAle, x, nDom
-  use iteration, only : groundLevel
-  use inputTimeSpectral, only : nTimeIntervalsSpectral
-  use inputUnsteady, only : useALE
-  use inputPhysics, only : equationMode
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i,j,k,nn,kk
-
-  if (.not. useALE .or. equationMode .ne. unsteady)  then
-     return
-  end if
-
-  spectralLoop: do kk=1,nTimeIntervalsSpectral
-     domains: do nn=1,nDom
-
-        ! Set the pointers for this block on the ground level.
-
-        call setPointers(nn, groundLevel,kk)
-
-        storex : do k = 0,ke
-           do j = 0,je
-              do i = 0,ie
-                 xALE(i,j,k,1) = x(i,j,k,1)
-                 xALE(i,j,k,2) = x(i,j,k,2)
-                 xALE(i,j,k,3) = x(i,j,k,3)
-              enddo
-           enddo
-        enddo storex
-
-     end do domains
-  end do spectralLoop
-
-end subroutine storeCoor
+    subroutine storeCoor
+        use constants
+        use blockPointers, only: ie, je, ke, il, jl, kl, xAle, x, nDom
+        use iteration, only: groundLevel
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputUnsteady, only: useALE
+        use inputPhysics, only: equationMode
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn, kk
+
+        if (.not. useALE .or. equationMode .ne. unsteady) then
+            return
+        end if
+
+        spectralLoop: do kk = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block on the ground level.
+
+                call setPointers(nn, groundLevel, kk)
+
+                storex: do k = 0, ke
+                    do j = 0, je
+                        do i = 0, ie
+                            xALE(i, j, k, 1) = x(i, j, k, 1)
+                            xALE(i, j, k, 2) = x(i, j, k, 2)
+                            xALE(i, j, k, 3) = x(i, j, k, 3)
+                        end do
+                    end do
+                end do storex
+
+            end do domains
+        end do spectralLoop
+
+    end subroutine storeCoor
 
 ! ===========================================================
-subroutine interpCoor(lale)
-  use constants
-  use blockPointers, only : ie, je, ke, il, jl, kl, xOld, xAle, x, nDom
-  use iteration, only : groundLevel, coefMeshALE
-  use inputTimeSpectral, only : nTimeIntervalsSpectral
-  use inputUnsteady, only : useALE
-  use inputPhysics, only : equationMode
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Input variables.
-  !
-  integer(kind=intType), intent(in) :: lale
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i,j,k,nn,kk
-
-  if (.not. useALE .or. equationMode .ne. unsteady)  then
-     return
-  end if
-
-  spectralLoop: do kk=1,nTimeIntervalsSpectral
-     domains: do nn=1,nDom
-
-        ! Set the pointers for this block on the ground level.
-        ! This eq. 11a, found paper by C.Farhat http://dx.doi.org/10.1016/S0021-9991(03)00311-5
-
-        call setPointers(nn, groundLevel,kk)
-
-        interpmesh : do k = 0,ke
-           do j = 0,je
-              do i = 0,ie
-                 x(i,j,k,1) = coefMeshALE(lale,1)*xALE(i,j,k,1) &
-                            + coefMeshALE(lale,2)*xOld(1,i,j,k,1)
-                 x(i,j,k,2) = coefMeshALE(lale,1)*xALE(i,j,k,2) &
-                            + coefMeshALE(lale,2)*xOld(1,i,j,k,2)
-                 x(i,j,k,3) = coefMeshALE(lale,1)*xALE(i,j,k,3) &
-                            + coefMeshALE(lale,2)*xOld(1,i,j,k,3)
-              enddo
-           enddo
-        enddo interpmesh
-
-     end do domains
-  end do spectralLoop
-
-end subroutine interpCoor
+    subroutine interpCoor(lale)
+        use constants
+        use blockPointers, only: ie, je, ke, il, jl, kl, xOld, xAle, x, nDom
+        use iteration, only: groundLevel, coefMeshALE
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputUnsteady, only: useALE
+        use inputPhysics, only: equationMode
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Input variables.
+        !
+        integer(kind=intType), intent(in) :: lale
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn, kk
+
+        if (.not. useALE .or. equationMode .ne. unsteady) then
+            return
+        end if
+
+        spectralLoop: do kk = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block on the ground level.
+                ! This eq. 11a, found paper by C.Farhat http://dx.doi.org/10.1016/S0021-9991(03)00311-5
+
+                call setPointers(nn, groundLevel, kk)
+
+                interpmesh: do k = 0, ke
+                    do j = 0, je
+                        do i = 0, ie
+                            x(i, j, k, 1) = coefMeshALE(lale, 1) * xALE(i, j, k, 1) &
+                                            + coefMeshALE(lale, 2) * xOld(1, i, j, k, 1)
+                            x(i, j, k, 2) = coefMeshALE(lale, 1) * xALE(i, j, k, 2) &
+                                            + coefMeshALE(lale, 2) * xOld(1, i, j, k, 2)
+                            x(i, j, k, 3) = coefMeshALE(lale, 1) * xALE(i, j, k, 3) &
+                                            + coefMeshALE(lale, 2) * xOld(1, i, j, k, 3)
+                        end do
+                    end do
+                end do interpmesh
+
+            end do domains
+        end do spectralLoop
+
+    end subroutine interpCoor
 
 ! ===========================================================
-subroutine recoverCoor
-  use constants
-  use blockPointers, only : ie, je, ke, x, xALe, nDom
-  use iteration, only : groundLevel
-  use inputTimeSpectral, only : nTimeIntervalsSpectral
-  use inputUnsteady, only : useALE
-  use inputPhysics, only : equationMode
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i,j,k,nn,kk
-
-
-  if (.not. useALE .or. equationMode .ne. unsteady)  then
-     return
-  end if
-
-  spectralLoop: do kk=1,nTimeIntervalsSpectral
-     domains: do nn=1,nDom
-
-        ! Set the pointers for this block on the ground level.
-
-        call setPointers(nn, groundLevel,kk)
-
-        recoverx : do k = 0,ke
-           do j = 0,je
-              do i = 0,ie
-                 x(i,j,k,1) = xALE(i,j,k,1)
-                 x(i,j,k,2) = xALE(i,j,k,2)
-                 x(i,j,k,3) = xALE(i,j,k,3)
-              enddo
-           enddo
-        enddo recoverx
-
-     end do domains
-  end do spectralLoop
-
-end subroutine recoverCoor
+    subroutine recoverCoor
+        use constants
+        use blockPointers, only: ie, je, ke, x, xALe, nDom
+        use iteration, only: groundLevel
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputUnsteady, only: useALE
+        use inputPhysics, only: equationMode
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn, kk
+
+        if (.not. useALE .or. equationMode .ne. unsteady) then
+            return
+        end if
+
+        spectralLoop: do kk = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block on the ground level.
+
+                call setPointers(nn, groundLevel, kk)
+
+                recoverx: do k = 0, ke
+                    do j = 0, je
+                        do i = 0, ie
+                            x(i, j, k, 1) = xALE(i, j, k, 1)
+                            x(i, j, k, 2) = xALE(i, j, k, 2)
+                            x(i, j, k, 3) = xALE(i, j, k, 3)
+                        end do
+                    end do
+                end do recoverx
+
+            end do domains
+        end do spectralLoop
+
+    end subroutine recoverCoor
 
 !
 !      ******************************************************************
@@ -808,251 +805,250 @@ end subroutine recoverCoor
 !      ******************************************************************
 !
 ! ===========================================================
-subroutine setLevelALE(setType)
-  !
-  !      ******************************************************************
-  !      *                                                                *
-  !      * setLevelALE sets specified ALE level(s) with current data.     *
-  !      *                                                                *
-  !      ******************************************************************
-  !
-  use blockPointers
-  use iteration
-  use inputTimeSpectral
-  use inputUnsteady
-  use inputPhysics
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Input variables.
-  !
-  integer(kind=intType), intent(in) :: setType
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i,j,k,l, aleBeg,aleEnd, nn,mm,kk
-  integer(kind=intType) :: ll,nlvl
-
-  if (.not. useALE .or. equationMode .ne. unsteady)  then
-     return
-  end if
-
-  ! Determine what to do based on input.
-  ! -1 is used to initialize all ALE steps with inital values
-  select case (setType)
-  case (-1_intType)
-     aleBeg = 0_intType
-     aleEnd = nALEsteps
-  case (-2_intType)
-     aleBeg = 1_intType
-     aleEnd = nALEMeshes
-  case default
-     aleBeg = setType
-     aleEnd = setType
-  end select
-
-  nlvl = ubound(flowDoms,2)
-  spectralLoop: do kk=1,nTimeIntervalsSpectral
-     levels: do ll=groundLevel,nlvl
-        domains: do nn=1,nDom
-
-           ! Set the pointers for this block on the ground level.
-           call setPointers(nn, ll, kk)
-
-           blkALE : do l = aleBeg,aleEnd
-              fillI2 : do k = 1,ke
-                 do j = 1,je
-                    do i = 0,ie
-                       sFaceIALE(l,i,j,k) = sFaceI(i,j,k)
-                       sIALE(l,i,j,k,1)   = sI(i,j,k,1)
-                       sIALE(l,i,j,k,2)   = sI(i,j,k,2)
-                       sIALE(l,i,j,k,3)   = sI(i,j,k,3)
-                    enddo
-                 enddo
-              enddo fillI2
-
-              fillJ2 : do k = 1,ke
-                 do j = 0,je
-                    do i = 1,ie
-                       sFaceJALE(l,i,j,k) = sFaceJ(i,j,k)
-                       sJALE(l,i,j,k,1)   = sJ(i,j,k,1)
-                       sJALE(l,i,j,k,2)   = sJ(i,j,k,2)
-                       sJALE(l,i,j,k,3)   = sJ(i,j,k,3)
-                    enddo
-                 enddo
-              enddo fillJ2
-
-              fillK2 : do k = 0,ke
-                 do j = 1,je
-                    do i = 1,ie
-                       sFaceKALE(l,i,j,k) = sFaceK(i,j,k)
-                       sKALE(l,i,j,k,1)   = sK(i,j,k,1)
-                       sKALE(l,i,j,k,2)   = sK(i,j,k,2)
-                       sKALE(l,i,j,k,3)   = sK(i,j,k,3)
-                    enddo
-                 enddo
-              enddo fillK2
-           enddo blkALE
-
-           normLoop: do mm=1,nBocos
-              do l = aleBeg,aleEnd
-                 do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                    do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-                       BCData(mm)%normALE(l,i,j,1) = BCData(mm)%norm(i,j,1)
-                       BCData(mm)%normALE(l,i,j,2) = BCData(mm)%norm(i,j,2)
-                       BCData(mm)%normALE(l,i,j,3) = BCData(mm)%norm(i,j,3)
-                    enddo
-                 enddo
-              enddo
-           enddo normLoop
-
-           rFaceLoop: do mm=1,nBocos
-              testAssoc: if( associated(BCData(mm)%rFace) ) then
-                 do l = aleBeg,aleEnd
-                    do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                       do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-                          BCData(mm)%rFaceALE(l,i,j) = BCData(mm)%rFace(i,j)
-                       enddo
-                    enddo
-                 enddo
-              endif testAssoc
-           enddo rFaceLoop
-
-           uSlipLoop: do mm=1,nViscBocos
-              do l = aleBeg,aleEnd
-                 do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                    do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-                       BCData(mm)%uSlipALE(l,i,j,1) = BCData(mm)%uSlip(i,j,1)
-                       BCData(mm)%uSlipALE(l,i,j,2) = BCData(mm)%uSlip(i,j,2)
-                       BCData(mm)%uSlipALE(l,i,j,3) = BCData(mm)%uSlip(i,j,3)
-                    enddo
-                 enddo
-              enddo
-           enddo uSlipLoop
-
-        end do domains
-     end do levels
-  end do spectralLoop
+    subroutine setLevelALE(setType)
+        !
+        !      ******************************************************************
+        !      *                                                                *
+        !      * setLevelALE sets specified ALE level(s) with current data.     *
+        !      *                                                                *
+        !      ******************************************************************
+        !
+        use blockPointers
+        use iteration
+        use inputTimeSpectral
+        use inputUnsteady
+        use inputPhysics
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Input variables.
+        !
+        integer(kind=intType), intent(in) :: setType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, l, aleBeg, aleEnd, nn, mm, kk
+        integer(kind=intType) :: ll, nlvl
+
+        if (.not. useALE .or. equationMode .ne. unsteady) then
+            return
+        end if
+
+        ! Determine what to do based on input.
+        ! -1 is used to initialize all ALE steps with inital values
+        select case (setType)
+        case (-1_intType)
+            aleBeg = 0_intType
+            aleEnd = nALEsteps
+        case (-2_intType)
+            aleBeg = 1_intType
+            aleEnd = nALEMeshes
+        case default
+            aleBeg = setType
+            aleEnd = setType
+        end select
 
-end subroutine setLevelALE
+        nlvl = ubound(flowDoms, 2)
+        spectralLoop: do kk = 1, nTimeIntervalsSpectral
+            levels: do ll = groundLevel, nlvl
+                domains: do nn = 1, nDom
+
+                    ! Set the pointers for this block on the ground level.
+                    call setPointers(nn, ll, kk)
+
+                    blkALE: do l = aleBeg, aleEnd
+                        fillI2: do k = 1, ke
+                            do j = 1, je
+                                do i = 0, ie
+                                    sFaceIALE(l, i, j, k) = sFaceI(i, j, k)
+                                    sIALE(l, i, j, k, 1) = sI(i, j, k, 1)
+                                    sIALE(l, i, j, k, 2) = sI(i, j, k, 2)
+                                    sIALE(l, i, j, k, 3) = sI(i, j, k, 3)
+                                end do
+                            end do
+                        end do fillI2
+
+                        fillJ2: do k = 1, ke
+                            do j = 0, je
+                                do i = 1, ie
+                                    sFaceJALE(l, i, j, k) = sFaceJ(i, j, k)
+                                    sJALE(l, i, j, k, 1) = sJ(i, j, k, 1)
+                                    sJALE(l, i, j, k, 2) = sJ(i, j, k, 2)
+                                    sJALE(l, i, j, k, 3) = sJ(i, j, k, 3)
+                                end do
+                            end do
+                        end do fillJ2
+
+                        fillK2: do k = 0, ke
+                            do j = 1, je
+                                do i = 1, ie
+                                    sFaceKALE(l, i, j, k) = sFaceK(i, j, k)
+                                    sKALE(l, i, j, k, 1) = sK(i, j, k, 1)
+                                    sKALE(l, i, j, k, 2) = sK(i, j, k, 2)
+                                    sKALE(l, i, j, k, 3) = sK(i, j, k, 3)
+                                end do
+                            end do
+                        end do fillK2
+                    end do blkALE
+
+                    normLoop: do mm = 1, nBocos
+                        do l = aleBeg, aleEnd
+                            do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                                do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                                    BCData(mm)%normALE(l, i, j, 1) = BCData(mm)%norm(i, j, 1)
+                                    BCData(mm)%normALE(l, i, j, 2) = BCData(mm)%norm(i, j, 2)
+                                    BCData(mm)%normALE(l, i, j, 3) = BCData(mm)%norm(i, j, 3)
+                                end do
+                            end do
+                        end do
+                    end do normLoop
+
+                    rFaceLoop: do mm = 1, nBocos
+                        testAssoc: if (associated(BCData(mm)%rFace)) then
+                            do l = aleBeg, aleEnd
+                                do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                                    do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                                        BCData(mm)%rFaceALE(l, i, j) = BCData(mm)%rFace(i, j)
+                                    end do
+                                end do
+                            end do
+                        end if testAssoc
+                    end do rFaceLoop
+
+                    uSlipLoop: do mm = 1, nViscBocos
+                        do l = aleBeg, aleEnd
+                            do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                                do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                                    BCData(mm)%uSlipALE(l, i, j, 1) = BCData(mm)%uSlip(i, j, 1)
+                                    BCData(mm)%uSlipALE(l, i, j, 2) = BCData(mm)%uSlip(i, j, 2)
+                                    BCData(mm)%uSlipALE(l, i, j, 3) = BCData(mm)%uSlip(i, j, 3)
+                                end do
+                            end do
+                        end do
+                    end do uSlipLoop
+
+                end do domains
+            end do levels
+        end do spectralLoop
+
+    end subroutine setLevelALE
 
 ! ===========================================================
-subroutine shiftLevelALE
-  !
-  !      ******************************************************************
-  !      *                                                                *
-  !      * shiftLevelALE move current ALE levels to older levels and      *
-  !      * update them with current data.                                 *
-  !      *                                                                *
-  !      ******************************************************************
-  !
-  use blockPointers
-  use iteration
-  use inputTimeSpectral
-  use inputUnsteady
-  use inputPhysics
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i,j,k,l,lo,nn,mm,kk
-
-  if (.not. useALE .or. equationMode .ne. unsteady)  then
-     return
-  end if
-
-  spectralLoop: do kk=1,nTimeIntervalsSpectral
-     domains: do nn=1,nDom
-
-        ! Set the pointers for this block on the ground level.
-
-        call setPointers(nn, groundLevel, kk)
-
-        blkALE : do l = nALEsteps, nALEMeshes+1, -1
-           lo = l - nALEMeshes
-
-           fillI2 : do k = 1,ke
-              do j = 1,je
-                 do i = 0,ie
-                    sFaceIALE(l,i,j,k) = sFaceIALE(lo,i,j,k)
-                    sIALE(l,i,j,k,1)   = sIALE(lo,i,j,k,1)
-                    sIALE(l,i,j,k,2)   = sIALE(lo,i,j,k,2)
-                    sIALE(l,i,j,k,3)   = sIALE(lo,i,j,k,3)
-                 enddo
-              enddo
-           enddo fillI2
-
-           fillJ2 : do k = 1,ke
-              do j = 0,je
-                 do i = 1,ie
-                    sFaceJALE(l,i,j,k) = sFaceJALE(lo,i,j,k)
-                    sJALE(l,i,j,k,1)   = sJALE(lo,i,j,k,1)
-                    sJALE(l,i,j,k,2)   = sJALE(lo,i,j,k,2)
-                    sJALE(l,i,j,k,3)   = sJALE(lo,i,j,k,3)
-                 enddo
-              enddo
-           enddo fillJ2
-
-           fillK2 : do k = 0,ke
-              do j = 1,je
-                 do i = 1,ie
-                    sFaceKALE(l,i,j,k) = sFaceKALE(lo,i,j,k)
-                    sKALE(l,i,j,k,1)   = sKALE(lo,i,j,k,1)
-                    sKALE(l,i,j,k,2)   = sKALE(lo,i,j,k,2)
-                    sKALE(l,i,j,k,3)   = sKALE(lo,i,j,k,3)
-                 enddo
-              enddo
-           enddo fillK2
-        enddo blkALE
-
-        normLoop: do mm=1,nBocos
-           do l = nALEsteps, nALEMeshes+1, -1
-              lo = l - nALEMeshes
-              do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                 do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-                    BCData(mm)%normALE(l,i,j,1) = BCData(mm)%normALE(lo,i,j,1)
-                    BCData(mm)%normALE(l,i,j,2) = BCData(mm)%normALE(lo,i,j,2)
-                    BCData(mm)%normALE(l,i,j,3) = BCData(mm)%normALE(lo,i,j,3)
-                 enddo
-              enddo
-           enddo
-        enddo normLoop
-
-        rFaceLoop: do mm=1,nBocos
-           testAssoc: if( associated(BCData(mm)%rFace) ) then
-              do l = nALEsteps, nALEMeshes+1, -1
-                 lo = l - nALEMeshes
-                 do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                    do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-                       BCData(mm)%rFaceALE(l,i,j) = BCData(mm)%rFaceALE(lo,i,j)
-                    enddo
-                 enddo
-              enddo
-           endif testAssoc
-        enddo rFaceLoop
-
-        uSlipLoop: do mm=1,nViscBocos
-           do l = nALEsteps, nALEMeshes+1, -1
-              lo = l - nALEMeshes
-              do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                 do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-                    BCData(mm)%uSlipALE(l,i,j,1) = BCData(mm)%uSlipALE(lo,i,j,1)
-                    BCData(mm)%uSlipALE(l,i,j,2) = BCData(mm)%uSlipALE(lo,i,j,2)
-                    BCData(mm)%uSlipALE(l,i,j,3) = BCData(mm)%uSlipALE(lo,i,j,3)
-                 enddo
-              enddo
-           enddo
-        enddo uSlipLoop
-
-     end do domains
-  end do spectralLoop
-
-  ! Set latest levels with current data
-  call setLevelALE(-2_intType)
-
-end subroutine shiftLevelALE
+    subroutine shiftLevelALE
+        !
+        !      ******************************************************************
+        !      *                                                                *
+        !      * shiftLevelALE move current ALE levels to older levels and      *
+        !      * update them with current data.                                 *
+        !      *                                                                *
+        !      ******************************************************************
+        !
+        use blockPointers
+        use iteration
+        use inputTimeSpectral
+        use inputUnsteady
+        use inputPhysics
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, l, lo, nn, mm, kk
+
+        if (.not. useALE .or. equationMode .ne. unsteady) then
+            return
+        end if
+
+        spectralLoop: do kk = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block on the ground level.
+
+                call setPointers(nn, groundLevel, kk)
+
+                blkALE: do l = nALEsteps, nALEMeshes + 1, -1
+                    lo = l - nALEMeshes
+
+                    fillI2: do k = 1, ke
+                        do j = 1, je
+                            do i = 0, ie
+                                sFaceIALE(l, i, j, k) = sFaceIALE(lo, i, j, k)
+                                sIALE(l, i, j, k, 1) = sIALE(lo, i, j, k, 1)
+                                sIALE(l, i, j, k, 2) = sIALE(lo, i, j, k, 2)
+                                sIALE(l, i, j, k, 3) = sIALE(lo, i, j, k, 3)
+                            end do
+                        end do
+                    end do fillI2
+
+                    fillJ2: do k = 1, ke
+                        do j = 0, je
+                            do i = 1, ie
+                                sFaceJALE(l, i, j, k) = sFaceJALE(lo, i, j, k)
+                                sJALE(l, i, j, k, 1) = sJALE(lo, i, j, k, 1)
+                                sJALE(l, i, j, k, 2) = sJALE(lo, i, j, k, 2)
+                                sJALE(l, i, j, k, 3) = sJALE(lo, i, j, k, 3)
+                            end do
+                        end do
+                    end do fillJ2
+
+                    fillK2: do k = 0, ke
+                        do j = 1, je
+                            do i = 1, ie
+                                sFaceKALE(l, i, j, k) = sFaceKALE(lo, i, j, k)
+                                sKALE(l, i, j, k, 1) = sKALE(lo, i, j, k, 1)
+                                sKALE(l, i, j, k, 2) = sKALE(lo, i, j, k, 2)
+                                sKALE(l, i, j, k, 3) = sKALE(lo, i, j, k, 3)
+                            end do
+                        end do
+                    end do fillK2
+                end do blkALE
+
+                normLoop: do mm = 1, nBocos
+                    do l = nALEsteps, nALEMeshes + 1, -1
+                        lo = l - nALEMeshes
+                        do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                            do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                                BCData(mm)%normALE(l, i, j, 1) = BCData(mm)%normALE(lo, i, j, 1)
+                                BCData(mm)%normALE(l, i, j, 2) = BCData(mm)%normALE(lo, i, j, 2)
+                                BCData(mm)%normALE(l, i, j, 3) = BCData(mm)%normALE(lo, i, j, 3)
+                            end do
+                        end do
+                    end do
+                end do normLoop
+
+                rFaceLoop: do mm = 1, nBocos
+                    testAssoc: if (associated(BCData(mm)%rFace)) then
+                        do l = nALEsteps, nALEMeshes + 1, -1
+                            lo = l - nALEMeshes
+                            do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                                do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                                    BCData(mm)%rFaceALE(l, i, j) = BCData(mm)%rFaceALE(lo, i, j)
+                                end do
+                            end do
+                        end do
+                    end if testAssoc
+                end do rFaceLoop
+
+                uSlipLoop: do mm = 1, nViscBocos
+                    do l = nALEsteps, nALEMeshes + 1, -1
+                        lo = l - nALEMeshes
+                        do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                            do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                                BCData(mm)%uSlipALE(l, i, j, 1) = BCData(mm)%uSlipALE(lo, i, j, 1)
+                                BCData(mm)%uSlipALE(l, i, j, 2) = BCData(mm)%uSlipALE(lo, i, j, 2)
+                                BCData(mm)%uSlipALE(l, i, j, 3) = BCData(mm)%uSlipALE(lo, i, j, 3)
+                            end do
+                        end do
+                    end do
+                end do uSlipLoop
+
+            end do domains
+        end do spectralLoop
+
+        ! Set latest levels with current data
+        call setLevelALE(-2_intType)
+
+    end subroutine shiftLevelALE
 #endif
 
-
 end module ALEUtils
diff --git a/src/solver/BCRoutines.F90 b/src/solver/BCRoutines.F90
index 9b7c66896..9ce054802 100644
--- a/src/solver/BCRoutines.F90
+++ b/src/solver/BCRoutines.F90
@@ -6,1917 +6,1915 @@
 
 module BCRoutines
 
-  implicit none
-  save
+    implicit none
+    save
 
 contains
 
 #ifndef USE_TAPENADE
-subroutine applyAllBC(secondHalo)
-  !
-  !       applyAllBC applies all boundary conditions for the all
-  !       blocks on the grid level currentLevel.
-  !
-  use constants
-  use blockPointers, only : nDom
-  use inputTimeSpectral, only : nTimeIntervalsSpectral
-  use iteration, only :currentLevel
-  use utils, only : setPointers
-  use ALEUtils, only : interpLevelALEBC_Block, recoverLevelALEBC_block
-  implicit none
-  !
-  !      Subroutine arguments.
-  !
-  logical, intent(in) :: secondHalo
-  !
-  !      Local Variables
-  integer(kind=intType) :: sps, nn
+    subroutine applyAllBC(secondHalo)
+        !
+        !       applyAllBC applies all boundary conditions for the all
+        !       blocks on the grid level currentLevel.
+        !
+        use constants
+        use blockPointers, only: nDom
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: currentLevel
+        use utils, only: setPointers
+        use ALEUtils, only: interpLevelALEBC_Block, recoverLevelALEBC_block
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: secondHalo
+        !
+        !      Local Variables
+        integer(kind=intType) :: sps, nn
+
+        ! Loop over the number of spectral solutions.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+
+            ! Loop over the number of blocks.
+
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block.
+
+                call setPointers(nn, currentLevel, sps)
+
+                call interpLevelALEBC_block
+                call applyAllBC_block(secondHalo)
+                call recoverLevelALEBC_block
+
+            end do domains
+        end do spectralLoop
+
+    end subroutine applyAllBC
+#endif
 
+    subroutine applyAllBC_block(secondHalo)
+
+        ! Apply BC's for a single block
+        use constants
+        use blockPointers, only: nBocos, BCType, nViscBocos, w, dw, x, vol, il, jl, kl, &
+                                 sectionID, wOld, volOld, BCData, &
+                                 si, sj, sk, sfacei, sfacej, sfacek, rlv, gamma, p, rev, &
+                                 bmtj1, bmtj2, scratch, bmtk2, bmtk1, &
+                                 fw, aa, d2wall, bmti1, bmti2, s
+        use utils, only: setBCPointers, getCorrectForK
+        use BCPointers
+        implicit none
+
+        ! Subroutine arguments.
+        logical, intent(in) :: secondHalo
+
+        ! Local variables.
+        logical :: correctForK
+        integer(kind=intType) :: nn
+        !
+        ! Determine whether or not the total energy must be corrected
+        ! for the presence of the turbulent kinetic energy.
+        correctForK = getCorrectForK()
+
+        ! Apply all the boundary conditions. The order is important!  Only
+        ! some of them have been AD'ed
+
+        ! ------------------------------------
+        !  Symmetry Boundary Condition
+        ! ------------------------------------
+        !$AD II-LOOP
+        do nn = 1, nBocos
+            if (BCType(nn) == symm) then
+                call setBCPointers(nn, .False.)
+                call bcSymm1stHalo(nn)
+            end if
+        end do
+
+        if (secondHalo) then
+            !$AD II-LOOP
+            do nn = 1, nBocos
+                if (BCType(nn) == symm) then
+                    call setBCPointers(nn, .False.)
+                    call bcSymm2ndHalo(nn)
+                end if
+            end do
+        end if
+
+        ! ------------------------------------
+        !  Symmetry Polar Boundary Condition
+        ! ------------------------------------
+        !$AD II-LOOP
+        do nn = 1, nBocos
+            if (BCType(nn) == symmPolar) then
+                call setBCPointers(nn, .True.)
+                call bcSymmPolar1stHalo(nn)
+            end if
+        end do
+
+        if (secondHalo) then
+            !$AD II-LOOP
+            do nn = 1, nBocos
+                if (BCType(nn) == symmPolar) then
+                    call setBCPointers(nn, .True.)
+                    call bcSymmPolar2ndHalo(nn)
+                end if
+            end do
+        end if
+
+        ! ------------------------------------
+        !  Adibatic Wall Boundary Condition
+        ! ------------------------------------
+        !$AD II-LOOP
+        do nn = 1, nViscBocos
+            if (BCType(nn) == NSWallAdiabatic) then
+                call setBCPointers(nn, .False.)
+                call bcNSWallAdiabatic(nn, secondHalo, correctForK)
+            end if
+        end do
+
+        ! ------------------------------------
+        !  Isotermal Wall Boundary Condition
+        ! ------------------------------------
+        !$AD II-LOOP
+        do nn = 1, nViscBocos
+            if (BCType(nn) == NSWallIsoThermal) then
+                call setBCPointers(nn, .False.)
+                call bcNSWallIsothermal(nn, secondHalo, correctForK)
+            end if
+        end do
+
+        ! ------------------------------------
+        !  Farfield Boundary Condition
+        ! ------------------------------------
+        !$AD II-LOOP
+        do nn = 1, nBocos
+            if (BCType(nn) == farField) then
+                call setBCPointers(nn, .False.)
+                call bcFarField(nn, secondHalo, correctForK)
+            end if
+        end do
+
+        ! ------------------------------------
+        !  Subsonic Outflow Boundary Condition
+        ! ------------------------------------
+        do nn = 1, nBocos
+            if (BCType(nn) == subSonicOutFlow .or. &
+                BCType(nn) == MassBleedOutflow) then
+                call setBCPointers(nn, .False.)
+                call bcSubSonicOutFlow(nn, secondHalo, correctForK)
+            end if
+        end do
+
+        ! ------------------------------------
+        !  Subsonic Inflow Boundary Condition
+        ! ------------------------------------
+        do nn = 1, nBocos
+            if (BCType(nn) == subSonicInFlow) then
+                call setBCPointers(nn, .False.)
+                call bcSubSonicInflow(nn, secondHalo, correctForK)
+            end if
+        end do
+
+        ! ------------------------------------
+        !  Extrapolation Boundary Condition
+        ! ------------------------------------
+        ! Extrapolation boundary conditions; this also includes
+        ! the supersonic outflow boundary conditions. The difference
+        ! between the two is that the extrap boundary conditions
+        ! correspond to singular lines and supersonic outflow
+        ! boundaries to physical boundaries. The treatment however
+        ! is identical.
+        do nn = 1, nBocos
+            if (BCType(nn) == extrap .or. &
+                BCType(nn) == SupersonicOutFlow) then
+                call setBCPointers(nn, .False.)
+                call bcExtrap(nn, secondHalo, correctForK)
+            end if
+        end do
+
+        ! ------------------------------------
+        !  Euler Wall Boundary Condition
+        ! ------------------------------------
+        !$AD II-LOOP
+        do nn = 1, nBocos
+            if (BCType(nn) == EulerWall) then
+                call setBCPointers(nn, .True.)
+                call bcEulerWall(nn, secondHalo, correctForK)
+            end if
+        end do
+
+        ! ------------------------------------
+        !  Supersonic inflow condition
+        ! ------------------------------------
+        do nn = 1, nBocos
+            if (BCType(nn) == SupersonicInflow) then
+                call setBCPointers(nn, .False.)
+                call bcSupersonicInflow(nn, secondHalo, correctForK)
+            end if
+        end do
+
+    end subroutine applyAllBC_block
+
+    ! ===================================================================
+    !   Actual implementation of each of the boundary condition routines
+    ! ===================================================================
+    subroutine bcSymm1stHalo(nn)
+
+        !  bcSymm1stHalo applies the symmetry boundary conditions to a
+        !  block.  * It is assumed that the pointers in blockPointers are
+        !  already set to the correct block on the correct grid level.
+        !
+        !  In case also the second halo must be set, a second loop is
+        !  execulted calling bcSymm2ndhalo. This is the only correct way
+        !  in case the block contains only 1 cell between two symmetry
+        !  planes, i.e. a 2D problem.
+
+        use constants
+        use blockPointers, only: BCdata
+        use flowVarRefState, only: viscous, eddyModel
+        use BCPointers, only: gamma1, gamma2, ww1, ww2, pp1, pp2, rlv1, rlv2, &
+                              iStart, jStart, iSize, jSize, rev1, rev2
+        implicit none
+
+        ! Subroutine arguments.
+        integer(kind=intType), intent(in) :: nn
+
+        ! Local variables.
+        integer(kind=intType) :: i, j, l, ii
+        real(kind=realType) :: vn, nnx, nny, nnz
+
+        ! Loop over the generic subface to set the state in the
+        ! 1-st level halos
+
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
+
+            ! Determine twice the normal velocity component,
+            ! which must be substracted from the donor velocity
+            ! to obtain the halo velocity.
+
+            vn = two * (ww2(i, j, ivx) * BCData(nn)%norm(i, j, 1) + &
+                        ww2(i, j, ivy) * BCData(nn)%norm(i, j, 2) + &
+                        ww2(i, j, ivz) * BCData(nn)%norm(i, j, 3))
+
+            ! Determine the flow variables in the halo cell.
+
+            ww1(i, j, irho) = ww2(i, j, irho)
+            ww1(i, j, ivx) = ww2(i, j, ivx) - vn * BCData(nn)%norm(i, j, 1)
+            ww1(i, j, ivy) = ww2(i, j, ivy) - vn * BCData(nn)%norm(i, j, 2)
+            ww1(i, j, ivz) = ww2(i, j, ivz) - vn * BCData(nn)%norm(i, j, 3)
+            ww1(i, j, irhoE) = ww2(i, j, irhoE)
+
+            ! Set the pressure and gamma and possibly the
+            ! laminar and eddy viscosity in the halo.
+
+            gamma1(i, j) = gamma2(i, j)
+            pp1(i, j) = pp2(i, j)
+            if (viscous) rlv1(i, j) = rlv2(i, j)
+            if (eddyModel) rev1(i, j) = rev2(i, j)
+        end do
+    end subroutine bcSymm1stHalo
+
+    subroutine bcSymm2ndHalo(nn)
+
+        !  bcSymm2ndHalo applies the symmetry boundary conditions to a
+        !  block for the 2nd halo. This routine is separate as it makes
+        !  AD slightly easier.
+        use constants
+        use blockPointers, only: BCdata
+        use flowVarRefState, only: viscous, eddyModel
+        use BCPointers, only: gamma0, gamma3, ww0, ww3, pp0, pp3, rlv0, rlv3, &
+                              rev0, rev3, iStart, jStart, iSize, jSize
+        implicit none
+
+        ! Subroutine arguments.
+        integer(kind=intType), intent(in) :: nn
+
+        ! Local variables.
+        integer(kind=intType) :: i, j, l, ii
+        real(kind=realType) :: vn, nnx, nny, nnz
+
+        ! If we need the second halo, do everything again, but using ww0,
+        ! ww3 etc instead of ww2 and ww1.
+
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
+
+            vn = two * (ww3(i, j, ivx) * BCData(nn)%norm(i, j, 1) + &
+                        ww3(i, j, ivy) * BCData(nn)%norm(i, j, 2) + &
+                        ww3(i, j, ivz) * BCData(nn)%norm(i, j, 3))
+
+            ! Determine the flow variables in the halo cell.
+            ww0(i, j, irho) = ww3(i, j, irho)
+            ww0(i, j, ivx) = ww3(i, j, ivx) - vn * BCData(nn)%norm(i, j, 1)
+            ww0(i, j, ivy) = ww3(i, j, ivy) - vn * BCData(nn)%norm(i, j, 2)
+            ww0(i, j, ivz) = ww3(i, j, ivz) - vn * BCData(nn)%norm(i, j, 3)
+
+            ww0(i, j, irhoE) = ww3(i, j, irhoE)
+
+            ! Set the pressure and gamma and possibly the
+            ! laminar and eddy viscosity in the halo.
+
+            gamma0(i, j) = gamma3(i, j)
+            pp0(i, j) = pp3(i, j)
+            if (viscous) rlv0(i, j) = rlv3(i, j)
+            if (eddyModel) rev0(i, j) = rev3(i, j)
+        end do
+
+    end subroutine bcSymm2ndHalo
+
+    subroutine bcSymmPolar1stHalo(nn)
+
+        ! bcSymmPolar applies the polar symmetry boundary conditions to a
+        ! singular line of a block. It is assumed that the pointers in
+        ! blockPointers are already set to the correct block on the
+        ! correct grid level.  The polar symmetry condition is a special
+        ! case of a degenerate line, as this line is the axi-symmetric
+        ! centerline. This routine does just the 1st level halo.
+
+        use constants
+        use BCPointers, only: ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2, &
+                              xx, iStart, jStart, iSize, jSize
+        use flowVarRefState, only: viscous, eddyModel
+        implicit none
+
+        ! Subroutine arguments.
+        integer(kind=intType), intent(in) :: nn
+
+        ! Local variables.
+        integer(kind=intType) :: i, j, l, ii, mm
+        real(kind=realType) :: nnx, nny, nnz, tmp, vtx, vty, vtz
+
+        ! Loop over the generic subface to set the state in the
+        ! 1-st level halos
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
+
+            ! Determine the unit vector along the degenerated face.
+            ! However it is not known which is the singular
+            ! direction and therefore determine the direction along
+            ! the diagonal (i,j) -- (i-1,j-1), which is correct for
+            ! both singular i and j-direction. Note that due to the
+            ! usage of the pointer xx there is an offset of +1
+            ! in the indices and therefore (i+1,j+1) - (i,j) must
+            ! be used to determine this vector.
+
+            nnx = xx(i + 1, j + 1, 1) - xx(i, j, 1)
+            nny = xx(i + 1, j + 1, 2) - xx(i, j, 2)
+            nnz = xx(i + 1, j + 1, 3) - xx(i, j, 3)
+
+            ! Determine the unit vector in this direction.
+
+            tmp = one / sqrt(nnx * nnx + nny * nny + nnz * nnz)
+            nnx = nnx * tmp
+            nny = nny * tmp
+            nnz = nnz * tmp
+
+            ! Determine twice the tangential velocity vector of the
+            ! internal cell.
+
+            tmp = two * (ww2(i, j, ivx) * nnx + ww2(i, j, ivy) * nny &
+                         + ww2(i, j, ivz) * nnz)
+            vtx = tmp * nnx
+            vty = tmp * nny
+            vtz = tmp * nnz
+
+            ! Determine the flow variables in the halo cell. The
+            ! velocity is constructed such that the average of the
+            ! internal and the halo cell is along the centerline.
+            ! Note that the magnitude of the velocity does not
+            ! change and thus the energy is identical.
+
+            ww1(i, j, irho) = ww2(i, j, irho)
+            ww1(i, j, ivx) = vtx - ww2(i, j, ivx)
+            ww1(i, j, ivy) = vty - ww2(i, j, ivy)
+            ww1(i, j, ivz) = vtz - ww2(i, j, ivz)
+            ww1(i, j, irhoE) = ww2(i, j, irhoE)
+
+            ! Set the pressure and possibly the laminar and
+            ! eddy viscosity in the halo.
+
+            pp1(i, j) = pp2(i, j)
+            if (viscous) rlv1(i, j) = rlv2(i, j)
+            if (eddyModel) rev1(i, j) = rev2(i, j)
+        end do
+    end subroutine bcSymmPolar1stHalo
+
+    subroutine bcSymmPolar2ndHalo(nn)
+
+        ! bcSymmPolar applies the polar symmetry boundary conditions to a
+        ! singular line of a block. It is assumed that the pointers in
+        ! blockPointers are already set to the correct block on the
+        ! correct grid level.  The polar symmetry condition is a special
+        ! case of a degenerate line, as this line is the axi-symmetric
+        ! centerline. This routine does just the 2nd level halo.
+
+        use constants
+        use BCPointers, only: ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3, &
+                              xx, iStart, jStart, iSize, jSize
+        use flowVarRefState, only: viscous, eddyModel
+        implicit none
+
+        ! Subroutine arguments.
+        integer(kind=intType), intent(in) :: nn
+
+        ! Local variables.
+        integer(kind=intType) :: i, j, l, ii, mm
+        real(kind=realType) :: nnx, nny, nnz, tmp, vtx, vty, vtz
+
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
+
+            ! Determine the unit vector along the degenerated face.
+            ! However it is not known which is the singular
+            ! direction and therefore determine the direction along
+            ! the diagonal (i,j) -- (i-1,j-1), which is correct for
+            ! both singular i and j-direction. Note that due to the
+            ! usage of the pointer xx there is an offset of +1
+            ! in the indices and therefore (i+1,j+1) - (i,j) must
+            ! be used to determine this vector.
+
+            nnx = xx(i + 1, j + 1, 1) - xx(i, j, 1)
+            nny = xx(i + 1, j + 1, 2) - xx(i, j, 2)
+            nnz = xx(i + 1, j + 1, 3) - xx(i, j, 3)
+
+            ! Determine the unit vector in this direction.
+
+            tmp = one / sqrt(nnx * nnx + nny * nny + nnz * nnz)
+            nnx = nnx * tmp
+            nny = nny * tmp
+            nnz = nnz * tmp
+
+            ! Determine twice the tangential velocity vector of the
+            ! internal cell.
+
+            tmp = two * (ww3(i, j, ivx) * nnx + ww3(i, j, ivy) * nny &
+                         + ww3(i, j, ivz) * nnz)
+            vtx = tmp * nnx
+            vty = tmp * nny
+            vtz = tmp * nnz
+
+            ! Determine the flow variables in the halo cell. The
+            ! velocity is constructed such that the average of the
+            ! internal and the halo cell is along the centerline.
+            ! Note that the magnitude of the velocity does not
+            ! change and thus the energy is identical.
 
+            ww0(i, j, irho) = ww3(i, j, irho)
+            ww0(i, j, ivx) = vtx - ww3(i, j, ivx)
+            ww0(i, j, ivy) = vty - ww3(i, j, ivy)
+            ww0(i, j, ivz) = vtz - ww3(i, j, ivz)
+            ww0(i, j, irhoE) = ww3(i, j, irhoE)
 
-  ! Loop over the number of spectral solutions.
+            ! Set the pressure and possibly the laminar and
+            ! eddy viscosity in the halo.
 
-  spectralLoop: do sps=1,nTimeIntervalsSpectral
+            pp0(i, j) = pp3(i, j)
+            if (viscous) rlv0(i, j) = rlv3(i, j)
+            if (eddyModel) rev0(i, j) = rev3(i, j)
+        end do
 
-     ! Loop over the number of blocks.
+    end subroutine bcSymmPolar2ndHalo
 
-     domains: do nn=1,nDom
+    subroutine bcNSWallAdiabatic(nn, secondHalo, correctForK)
 
-        ! Set the pointers for this block.
+        ! bcNSWallAdiabatic applies the viscous adiabatic wall boundary
+        ! condition the pointers already defined.
 
-        call setPointers(nn, currentLevel, sps)
+        use constants
+        use blockPointers, only: BCData
+        use inputDiscretization, only: viscWallBCTreatment
+        use BCPointers, only: ww0, ww1, ww2, rlv0, rlv1, rlv2, pp0, pp1, pp2, pp3, rev0, &
+                              rev1, rev2, iStart, jStart, iSize, jSize
+        use flowVarRefState, only: viscous, eddyModel
+        use iteration, only: currentLevel, groundLevel
+        implicit none
 
-        call interpLevelALEBC_block
-        call applyAllBC_block(secondHalo)
-        call recoverLevelALEBC_block
+        logical, intent(in) :: secondHalo, correctForK
+        integer(kind=intType), intent(in) :: nn
+        integer(kind=intType) :: i, j, ii
+        real(kind=realType) :: rhok
+        integer(kind=intType) :: wallTreatment
 
-     enddo domains
-  enddo spectralLoop
+        ! Initialize rhok to zero. This will be overwritten if a
+        ! correction for k must be applied.
 
-end subroutine applyAllBC
-#endif
+        rhok = zero
 
-  subroutine applyAllBC_block(secondHalo)
-
-    ! Apply BC's for a single block
-    use constants
-    use blockPointers , only : nBocos, BCType, nViscBocos, w, dw, x, vol, il, jl, kl, &
-         sectionID, wOld, volOld, BCData, &
-       si, sj, sk, sfacei, sfacej, sfacek, rlv, gamma, p, rev, &
-       bmtj1, bmtj2, scratch, bmtk2, bmtk1, &
-       fw, aa, d2wall, bmti1, bmti2, s
-    use utils, only : setBCPointers, getCorrectForK
-    use BCPointers
-    implicit none
+        ! Loop over the generic subface to set the state in the
+        ! halo cells.
 
-    ! Subroutine arguments.
-    logical, intent(in) :: secondHalo
-
-    ! Local variables.
-    logical :: correctForK
-    integer(kind=intType) :: nn
-    !
-    ! Determine whether or not the total energy must be corrected
-    ! for the presence of the turbulent kinetic energy.
-    correctForK = getCorrectForK()
-
-    ! Apply all the boundary conditions. The order is important!  Only
-    ! some of them have been AD'ed
-
-    ! ------------------------------------
-    !  Symmetry Boundary Condition
-    ! ------------------------------------
-    !$AD II-LOOP
-    do nn=1, nBocos
-       if (BCType(nn) == symm) then
-          call setBCPointers(nn, .False.)
-          call bcSymm1stHalo(nn)
-       end if
-    end do
-
-    if (secondHalo) then
-       !$AD II-LOOP
-       do nn=1, nBocos
-          if (BCType(nn) == symm) then
-             call setBCPointers(nn, .False.)
-             call bcSymm2ndHalo(nn)
-          end if
-       end do
-    end if
-
-    ! ------------------------------------
-    !  Symmetry Polar Boundary Condition
-    ! ------------------------------------
-    !$AD II-LOOP
-    do nn=1, nBocos
-       if (BCType(nn) == symmPolar) then
-          call setBCPointers(nn, .True.)
-          call bcSymmPolar1stHalo(nn)
-       end if
-    end do
-
-    if (secondHalo) then
-       !$AD II-LOOP
-       do nn=1, nBocos
-          if (BCType(nn) == symmPolar) then
-             call setBCPointers(nn, .True.)
-             call bcSymmPolar2ndHalo(nn)
-          end if
-       end do
-    end if
-
-    ! ------------------------------------
-    !  Adibatic Wall Boundary Condition
-    ! ------------------------------------
-    !$AD II-LOOP
-    do nn=1, nViscBocos
-       if (BCType(nn) == NSWallAdiabatic) then
-          call setBCPointers(nn, .False.)
-          call bcNSWallAdiabatic(nn, secondHalo, correctForK)
-       end if
-    end do
-
-    ! ------------------------------------
-    !  Isotermal Wall Boundary Condition
-    ! ------------------------------------
-    !$AD II-LOOP
-    do nn=1, nViscBocos
-       if (BCType(nn) == NSWallIsoThermal) then
-          call setBCPointers(nn, .False.)
-          call bcNSWallIsothermal(nn, secondHalo, correctForK)
-       end if
-    end do
-
-    ! ------------------------------------
-    !  Farfield Boundary Condition
-    ! ------------------------------------
-    !$AD II-LOOP
-    do nn=1,nBocos
-       if (BCType(nn) == farField) then
-          call setBCPointers(nn, .False.)
-          call bcFarField(nn, secondHalo, correctForK)
-       end if
-    end do
-
-    ! ------------------------------------
-    !  Subsonic Outflow Boundary Condition
-    ! ------------------------------------
-    do nn=1,nBocos
-       if (BCType(nn) == subSonicOutFlow .or. &
-            BCType(nn) == MassBleedOutflow) then
-          call setBCPointers(nn, .False.)
-          call bcSubSonicOutFlow(nn, secondHalo, correctForK)
-       end if
-    end do
-
-    ! ------------------------------------
-    !  Subsonic Inflow Boundary Condition
-    ! ------------------------------------
-    do nn=1,nBocos
-       if (BCType(nn) == subSonicInFlow) then
-          call setBCPointers(nn, .False.)
-          call bcSubSonicInflow(nn, secondHalo, correctForK)
-       end if
-    end do
-
-    ! ------------------------------------
-    !  Extrapolation Boundary Condition
-    ! ------------------------------------
-    ! Extrapolation boundary conditions; this also includes
-    ! the supersonic outflow boundary conditions. The difference
-    ! between the two is that the extrap boundary conditions
-    ! correspond to singular lines and supersonic outflow
-    ! boundaries to physical boundaries. The treatment however
-    ! is identical.
-    do nn=1,nBocos
-       if (BCType(nn) == extrap .or. &
-            BCType(nn) == SupersonicOutFlow) then
-          call setBCPointers(nn, .False.)
-          call bcExtrap(nn, secondHalo, correctForK)
-       end if
-    end do
-
-    ! ------------------------------------
-    !  Euler Wall Boundary Condition
-    ! ------------------------------------
-    !$AD II-LOOP
-    do nn=1,nBocos
-       if (BCType(nn) == EulerWall) then
-          call setBCPointers(nn, .True.)
-          call bcEulerWall(nn, secondHalo, correctForK)
-       end if
-    end do
-
-    ! ------------------------------------
-    !  Supersonic inflow condition
-    ! ------------------------------------
-    do nn=1,nBocos
-       if (BCType(nn) == SupersonicInflow) then
-          call setBCPointers(nn, .False.)
-          call bcSupersonicInflow(nn, secondHalo, correctForK)
-       end if
-    end do
-
-  end subroutine applyAllBC_block
-
-  ! ===================================================================
-  !   Actual implementation of each of the boundary condition routines
-  ! ===================================================================
-  subroutine bcSymm1stHalo(nn)
-
-    !  bcSymm1stHalo applies the symmetry boundary conditions to a
-    !  block.  * It is assumed that the pointers in blockPointers are
-    !  already set to the correct block on the correct grid level.
-    !
-    !  In case also the second halo must be set, a second loop is
-    !  execulted calling bcSymm2ndhalo. This is the only correct way
-    !  in case the block contains only 1 cell between two symmetry
-    !  planes, i.e. a 2D problem.
-
-    use constants
-    use blockPointers, only : BCdata
-    use flowVarRefState, only : viscous, eddyModel
-    use BCPointers, only : gamma1, gamma2, ww1, ww2, pp1, pp2, rlv1, rlv2, &
-         iStart, jStart, iSize, jSize, rev1, rev2
-    implicit none
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
 
-    ! Subroutine arguments.
-    integer(kind=intType), intent(in) :: nn
-
-    ! Local variables.
-    integer(kind=intType) :: i, j, l, ii
-    real(kind=realType) :: vn, nnx, nny, nnz
-
-    ! Loop over the generic subface to set the state in the
-    ! 1-st level halos
-
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-       i = mod(ii, isize) + iStart
-       j = ii/isize + jStart
-
-       ! Determine twice the normal velocity component,
-       ! which must be substracted from the donor velocity
-       ! to obtain the halo velocity.
-
-       vn = two*(ww2(i,j,ivx)*BCData(nn)%norm(i,j,1) + &
-            ww2(i,j,ivy)*BCData(nn)%norm(i,j,2) + &
-            ww2(i,j,ivz)*BCData(nn)%norm(i,j,3))
-
-       ! Determine the flow variables in the halo cell.
-
-       ww1(i,j,irho) = ww2(i,j,irho)
-       ww1(i,j,ivx) = ww2(i,j,ivx) - vn*BCData(nn)%norm(i,j,1)
-       ww1(i,j,ivy) = ww2(i,j,ivy) - vn*BCData(nn)%norm(i,j,2)
-       ww1(i,j,ivz) = ww2(i,j,ivz) - vn*BCData(nn)%norm(i,j,3)
-       ww1(i,j,irhoE) = ww2(i,j,irhoE)
-
-       ! Set the pressure and gamma and possibly the
-       ! laminar and eddy viscosity in the halo.
-
-       gamma1(i,j) = gamma2(i,j)
-       pp1(i,j)    = pp2(i,j)
-       if( viscous )   rlv1(i,j) = rlv2(i,j)
-       if( eddyModel ) rev1(i,j) = rev2(i,j)
-    enddo
-  end subroutine bcSymm1stHalo
-
-  subroutine bcSymm2ndHalo(nn)
-
-    !  bcSymm2ndHalo applies the symmetry boundary conditions to a
-    !  block for the 2nd halo. This routine is separate as it makes
-    !  AD slightly easier.
-    use constants
-    use blockPointers, only : BCdata
-    use flowVarRefState, only : viscous, eddyModel
-    use BCPointers, only : gamma0, gamma3, ww0, ww3, pp0, pp3, rlv0, rlv3, &
-         rev0, rev3, iStart, jStart, iSize, jSize
-    implicit none
+            ! Set the value of rhok if a correcton must be applied.
+            ! It probably does not matter too much, because k is very
+            ! small near the wall.
 
-    ! Subroutine arguments.
-    integer(kind=intType), intent(in) :: nn
+            if (correctForK) rhok = ww2(i, j, irho) * ww2(i, j, itu1)
 
-    ! Local variables.
-    integer(kind=intType) :: i, j, l, ii
-    real(kind=realType) :: vn, nnx, nny, nnz
+            ! Determine the variables in the halo. As the spacing
+            ! is very small a constant pressure boundary condition
+            ! (except for the k correction) is okay. Take the slip
+            ! velocity into account.
 
-    ! If we need the second halo, do everything again, but using ww0,
-    ! ww3 etc instead of ww2 and ww1.
+            ww1(i, j, irho) = ww2(i, j, irho)
+            ww1(i, j, ivx) = -ww2(i, j, ivx) + two * bcData(nn)%uSlip(i, j, 1)
+            ww1(i, j, ivy) = -ww2(i, j, ivy) + two * bcData(nn)%uSlip(i, j, 2)
+            ww1(i, j, ivz) = -ww2(i, j, ivz) + two * bcData(nn)%uSlip(i, j, 3)
 
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-       i = mod(ii, isize) + iStart
-       j = ii/isize + jStart
+            ! Set the viscosities. There is no need to test for a
+            ! viscous problem of course. The eddy viscosity is
+            ! set to the negative value, as it should be zero on
+            ! the wall.
 
-       vn = two*(ww3(i,j,ivx)*BCData(nn)%norm(i,j,1) + &
-            ww3(i,j,ivy)*BCData(nn)%norm(i,j,2) + &
-            ww3(i,j,ivz)*BCData(nn)%norm(i,j,3))
+            rlv1(i, j) = rlv2(i, j)
+            if (eddyModel) rev1(i, j) = -rev2(i, j)
 
-       ! Determine the flow variables in the halo cell.
-       ww0(i,j,irho) = ww3(i,j,irho)
-       ww0(i,j,ivx) = ww3(i,j,ivx) - vn*BCData(nn)%norm(i,j,1)
-       ww0(i,j,ivy) = ww3(i,j,ivy) - vn*BCData(nn)%norm(i,j,2)
-       ww0(i,j,ivz) = ww3(i,j,ivz) - vn*BCData(nn)%norm(i,j,3)
+            ! Make sure that on the coarser grids the constant pressure
+            ! boundary condition is used.
 
-       ww0(i,j,irhoE) = ww3(i,j,irhoE)
+            wallTreatment = viscWallBcTreatment
+            if (currentLevel > groundLevel) wallTreatment = constantPressure
 
-       ! Set the pressure and gamma and possibly the
-       ! laminar and eddy viscosity in the halo.
+            BCTreatment:select case(wallTreatment)
 
-       gamma0(i,j) = gamma3(i,j)
-       pp0(i,j)    = pp3(i,j)
-       if( viscous )   rlv0(i,j) = rlv3(i,j)
-       if( eddyModel ) rev0(i,j) = rev3(i,j)
-    enddo
+            case (constantPressure)
 
-  end subroutine bcSymm2ndHalo
+            ! Constant pressure. Set the gradient to zero.
+            pp1(i, j) = pp2(i, j) - four * third * rhok
 
-  subroutine bcSymmPolar1stHalo(nn)
+            case default
 
-    ! bcSymmPolar applies the polar symmetry boundary conditions to a
-    ! singular line of a block. It is assumed that the pointers in
-    ! blockPointers are already set to the correct block on the
-    ! correct grid level.  The polar symmetry condition is a special
-    ! case of a degenerate line, as this line is the axi-symmetric
-    ! centerline. This routine does just the 1st level halo.
+            pp1(i, j) = 2 * pp2(i, j) - pp3(i, j)
+            ! Adjust value if pressure is negative
+            if (pp1(i, j) .le. zero) pp1(i, j) = pp2(i, j)
 
-    use constants
-    use BCPointers, only: ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2, &
-         xx, iStart, jStart, iSize, jSize
-    use flowVarRefState, only : viscous, eddyModel
-    implicit none
+            end select BCTreatment
+        end do
 
-    ! Subroutine arguments.
-    integer(kind=intType), intent(in) :: nn
-
-    ! Local variables.
-    integer(kind=intType) :: i, j, l, ii, mm
-    real(kind=realType) :: nnx, nny, nnz, tmp, vtx, vty, vtz
-
-    ! Loop over the generic subface to set the state in the
-    ! 1-st level halos
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-       i = mod(ii, isize) + iStart
-       j = ii/isize + jStart
-
-       ! Determine the unit vector along the degenerated face.
-       ! However it is not known which is the singular
-       ! direction and therefore determine the direction along
-       ! the diagonal (i,j) -- (i-1,j-1), which is correct for
-       ! both singular i and j-direction. Note that due to the
-       ! usage of the pointer xx there is an offset of +1
-       ! in the indices and therefore (i+1,j+1) - (i,j) must
-       ! be used to determine this vector.
-
-       nnx = xx(i+1,j+1,1) - xx(i,j,1)
-       nny = xx(i+1,j+1,2) - xx(i,j,2)
-       nnz = xx(i+1,j+1,3) - xx(i,j,3)
-
-       ! Determine the unit vector in this direction.
-
-       tmp = one/sqrt(nnx*nnx + nny*nny + nnz*nnz)
-       nnx = nnx*tmp
-       nny = nny*tmp
-       nnz = nnz*tmp
-
-       ! Determine twice the tangential velocity vector of the
-       ! internal cell.
-
-       tmp = two*(ww2(i,j,ivx)*nnx + ww2(i,j,ivy)*nny &
-            +      ww2(i,j,ivz)*nnz)
-       vtx = tmp*nnx
-       vty = tmp*nny
-       vtz = tmp*nnz
-
-       ! Determine the flow variables in the halo cell. The
-       ! velocity is constructed such that the average of the
-       ! internal and the halo cell is along the centerline.
-       ! Note that the magnitude of the velocity does not
-       ! change and thus the energy is identical.
-
-       ww1(i,j,irho)  = ww2(i,j,irho)
-       ww1(i,j,ivx)   = vtx - ww2(i,j,ivx)
-       ww1(i,j,ivy)   = vty - ww2(i,j,ivy)
-       ww1(i,j,ivz)   = vtz - ww2(i,j,ivz)
-       ww1(i,j,irhoE) = ww2(i,j,irhoE)
-
-       ! Set the pressure and possibly the laminar and
-       ! eddy viscosity in the halo.
-
-       pp1(i,j) = pp2(i,j)
-       if( viscous )   rlv1(i,j) = rlv2(i,j)
-       if( eddyModel ) rev1(i,j) = rev2(i,j)
-    end do
-  end subroutine bcSymmPolar1stHalo
-
-  subroutine bcSymmPolar2ndHalo(nn)
-
-    ! bcSymmPolar applies the polar symmetry boundary conditions to a
-    ! singular line of a block. It is assumed that the pointers in
-    ! blockPointers are already set to the correct block on the
-    ! correct grid level.  The polar symmetry condition is a special
-    ! case of a degenerate line, as this line is the axi-symmetric
-    ! centerline. This routine does just the 2nd level halo.
-
-    use constants
-    use BCPointers, only: ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3, &
-         xx, iStart, jStart, iSize, jSize
-    use flowVarRefState, only : viscous, eddyModel
-    implicit none
+        ! Compute the energy for these halo's.
 
-    ! Subroutine arguments.
-    integer(kind=intType), intent(in) :: nn
-
-    ! Local variables.
-    integer(kind=intType) :: i, j, l, ii, mm
-    real(kind=realType) :: nnx, nny, nnz, tmp, vtx, vty, vtz
-
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-       i = mod(ii, isize) + iStart
-       j = ii/isize + jStart
-
-       ! Determine the unit vector along the degenerated face.
-       ! However it is not known which is the singular
-       ! direction and therefore determine the direction along
-       ! the diagonal (i,j) -- (i-1,j-1), which is correct for
-       ! both singular i and j-direction. Note that due to the
-       ! usage of the pointer xx there is an offset of +1
-       ! in the indices and therefore (i+1,j+1) - (i,j) must
-       ! be used to determine this vector.
-
-       nnx = xx(i+1,j+1,1) - xx(i,j,1)
-       nny = xx(i+1,j+1,2) - xx(i,j,2)
-       nnz = xx(i+1,j+1,3) - xx(i,j,3)
-
-       ! Determine the unit vector in this direction.
-
-       tmp = one/sqrt(nnx*nnx + nny*nny + nnz*nnz)
-       nnx = nnx*tmp
-       nny = nny*tmp
-       nnz = nnz*tmp
-
-       ! Determine twice the tangential velocity vector of the
-       ! internal cell.
-
-       tmp = two*(ww3(i,j,ivx)*nnx + ww3(i,j,ivy)*nny &
-            +      ww3(i,j,ivz)*nnz)
-       vtx = tmp*nnx
-       vty = tmp*nny
-       vtz = tmp*nnz
-
-       ! Determine the flow variables in the halo cell. The
-       ! velocity is constructed such that the average of the
-       ! internal and the halo cell is along the centerline.
-       ! Note that the magnitude of the velocity does not
-       ! change and thus the energy is identical.
-
-       ww0(i,j,irho)  = ww3(i,j,irho)
-       ww0(i,j,ivx)   = vtx - ww3(i,j,ivx)
-       ww0(i,j,ivy)   = vty - ww3(i,j,ivy)
-       ww0(i,j,ivz)   = vtz - ww3(i,j,ivz)
-       ww0(i,j,irhoE) = ww3(i,j,irhoE)
-
-       ! Set the pressure and possibly the laminar and
-       ! eddy viscosity in the halo.
-
-       pp0(i,j) = pp3(i,j)
-       if( viscous )   rlv0(i,j) = rlv3(i,j)
-       if( eddyModel ) rev0(i,j) = rev3(i,j)
-    enddo
-
-  end subroutine bcSymmPolar2ndHalo
-
-  subroutine bcNSWallAdiabatic(nn, secondHalo, correctForK)
-
-    ! bcNSWallAdiabatic applies the viscous adiabatic wall boundary
-    ! condition the pointers already defined.
-
-    use constants
-    use blockPointers, only : BCData
-    use inputDiscretization , only : viscWallBCTreatment
-    use BCPointers, only : ww0, ww1, ww2, rlv0, rlv1, rlv2, pp0, pp1, pp2, pp3, rev0, &
-         rev1, rev2,  iStart, jStart, iSize, jSize
-    use flowVarRefState, only : viscous, eddyModel
-    use iteration, only : currentLevel, groundLevel
-    implicit none
+        call computeEtot(ww1, pp1, correctForK)
 
-    logical, intent(in) :: secondHalo, correctForK
-    integer(kind=intType), intent(in) :: nn
-    integer(kind=intType) :: i, j, ii
-    real(kind=realType) :: rhok
-    integer(kind=intType) :: wallTreatment
+        ! Extrapolate the state vectors in case a second halo
+        ! is needed.
 
-    ! Initialize rhok to zero. This will be overwritten if a
-    ! correction for k must be applied.
+        if (secondHalo) call extrapolate2ndHalo(correctForK)
 
-    rhok = zero
+    end subroutine bcNSWallAdiabatic
 
-    ! Loop over the generic subface to set the state in the
-    ! halo cells.
+    subroutine bcNSWallIsoThermal(nn, secondHalo, correctForK)
 
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-       i = mod(ii, isize) + iStart
-       j = ii/isize + jStart
+        ! bcNSWallAdiabatic applies the viscous isothermal wall boundary
+        ! condition to a block. It is assumed that the BCPointers are
+        ! already set
 
-       ! Set the value of rhok if a correcton must be applied.
-       ! It probably does not matter too much, because k is very
-       ! small near the wall.
+        use constants
+        use blockPointers, only: BCData
+        use inputDiscretization, only: viscWallBCTreatment
+        use BCPointers, only: ww0, ww1, ww2, rlv0, rlv1, rlv2, pp0, pp1, pp2, pp3, &
+                              rev0, rev1, rev2, iStart, jStart, iSize, jSize
+        use flowVarRefState, only: viscous, eddyModel, RGas
+        use iteration, only: currentLevel, groundLevel
+        implicit none
 
-       if( correctForK ) rhok = ww2(i,j,irho)*ww2(i,j,itu1)
+        ! Subroutine arguments.
+        logical, intent(in) :: secondHalo, correctForK
+        integer(kind=intType), intent(in) :: nn
 
-       ! Determine the variables in the halo. As the spacing
-       ! is very small a constant pressure boundary condition
-       ! (except for the k correction) is okay. Take the slip
-       ! velocity into account.
+        ! Local variables.
+        integer(kind=intType) :: i, j, ii
+        integer(kind=intType) :: wallTreatment
+        real(kind=realType) :: rhok, t2, t1
 
-       ww1(i,j,irho) =  ww2(i,j,irho)
-       ww1(i,j,ivx)  = -ww2(i,j,ivx) + two*bcData(nn)%uSlip(i,j,1)
-       ww1(i,j,ivy)  = -ww2(i,j,ivy) + two*bcData(nn)%uSlip(i,j,2)
-       ww1(i,j,ivz)  = -ww2(i,j,ivz) + two*bcData(nn)%uSlip(i,j,3)
+        ! Initialize rhok to zero. This will be overwritten if a
+        ! correction for k must be applied.
 
-       ! Set the viscosities. There is no need to test for a
-       ! viscous problem of course. The eddy viscosity is
-       ! set to the negative value, as it should be zero on
-       ! the wall.
+        rhok = zero
 
-       rlv1(i,j) = rlv2(i,j)
-       if( eddyModel ) rev1(i,j) = -rev2(i,j)
+        ! Loop over the generic subface to set the state in the
+        ! halo cells.
 
-       ! Make sure that on the coarser grids the constant pressure
-       ! boundary condition is used.
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
 
-       wallTreatment = viscWallBcTreatment
-       if(currentLevel > groundLevel) wallTreatment = constantPressure
+            ! Set the value of rhok if a correcton must be applied.
+            ! It probably does not matter too much, because k is very
+            ! small near the wall.
 
-       BCTreatment: select case (wallTreatment)
+            if (correctForK) rhok = ww2(i, j, irho) * ww2(i, j, itu1)
 
-       case (constantPressure)
+            ! Compute the temperature in the internal cell and in the
+            ! halo cell such that the average is the wall temperature.
 
-          ! Constant pressure. Set the gradient to zero.
-          pp1(i, j) = pp2(i, j) - four*third*rhok
+            t2 = pp2(i, j) / (RGas * ww2(i, j, irho))
+            t1 = two * bcData(nn)%TNS_Wall(i, j) - t2
 
-       case default
+            ! Make sure that t1 is within reasonable bounds. These
+            ! bounds are such that the clipping is never active in the
+            ! converged solution; it is only to avoid instabilities
+            ! during the convergence.
 
-          pp1(i,j) = 2*pp2(i,j) - pp3(i,j)
-          ! Adjust value if pressure is negative
-          if (pp1(i,j) .le. zero) pp1(i,j) = pp2(i,j)
+            t1 = max(half * bcData(nn)%TNS_Wall(i, j), t1)
+            t1 = min(two * bcData(nn)%TNS_Wall(i, j), t1)
 
-       end select BCTreatment
-    end do
+            ! PRESSURE EXTRAPOLATION
 
-    ! Compute the energy for these halo's.
+            ! Make sure that on the coarser grids the constant pressure
+            ! boundary condition is used.
 
-    call computeEtot(ww1, pp1, correctForK)
+            wallTreatment = viscWallBCTreatment
+            if (currentLevel > groundLevel) wallTreatment = constantPressure
 
-    ! Extrapolate the state vectors in case a second halo
-    ! is needed.
+            BCTreatment:select case(wallTreatment)
 
-    if( secondHalo ) call extrapolate2ndHalo(correctForK)
+            case (constantPressure)
 
-  end subroutine bcNSWallAdiabatic
+            ! Constant pressure. Set the gradient to zero.
+            pp1(i, j) = pp2(i, j) - four * third * rhok
 
-  subroutine bcNSWallIsoThermal(nn, secondHalo, correctForK)
+            case default
 
-    ! bcNSWallAdiabatic applies the viscous isothermal wall boundary
-    ! condition to a block. It is assumed that the BCPointers are
-    ! already set
+            ! Linear extrapolation.
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
 
-    use constants
-    use blockPointers, only : BCData
-    use inputDiscretization , only : viscWallBCTreatment
-    use BCPointers, only : ww0, ww1, ww2, rlv0, rlv1, rlv2, pp0, pp1, pp2, pp3, &
-         rev0, rev1, rev2, iStart, jStart, iSize, jSize
-    use flowVarRefState, only : viscous, eddyModel, RGas
-    use iteration, only : currentLevel, groundLevel
-    implicit none
+            pp1(i, j) = 2 * pp2(i, j) - pp3(i, j)
+            ! Adjust value if pressure is negative
+            if (pp1(i, j) .le. zero) pp1(i, j) = pp2(i, j)
 
-    ! Subroutine arguments.
-    logical, intent(in) :: secondHalo, correctForK
-    integer(kind=intType), intent(in) :: nn
+            end select BCTreatment
 
-    ! Local variables.
-    integer(kind=intType) :: i, j, ii
-    integer(kind=intType) :: wallTreatment
-    real(kind=realType) :: rhok, t2, t1
+            ! Determine the variables in the halo. As the spacing
+            ! is very small a constant pressure boundary condition
+            ! (except for the k correction) is okay. Take the slip
+            ! velocity into account.
 
-    ! Initialize rhok to zero. This will be overwritten if a
-    ! correction for k must be applied.
+            ww1(i, j, irho) = pp1(i, j) / (RGas * t1)
+            ww1(i, j, ivx) = -ww2(i, j, ivx) + two * bcData(nn)%uSlip(i, j, 1)
+            ww1(i, j, ivy) = -ww2(i, j, ivy) + two * bcData(nn)%uSlip(i, j, 2)
+            ww1(i, j, ivz) = -ww2(i, j, ivz) + two * bcData(nn)%uSlip(i, j, 3)
 
-    rhok = zero
+            ! Set the viscosities. There is no need to test for a
+            ! viscous problem of course. The eddy viscosity is
+            ! set to the negative value, as it should be zero on
+            ! the wall.
 
-    ! Loop over the generic subface to set the state in the
-    ! halo cells.
+            rlv1(i, j) = rlv2(i, j)
+            if (eddyModel) rev1(i, j) = -rev2(i, j)
+        end do
 
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-       i = mod(ii, isize) + iStart
-       j = ii/isize + jStart
+        ! Compute the energy for these halo's.
 
-       ! Set the value of rhok if a correcton must be applied.
-       ! It probably does not matter too much, because k is very
-       ! small near the wall.
+        call computeEtot(ww1, pp1, correctForK)
 
-       if( correctForK ) rhok = ww2(i,j,irho)*ww2(i,j,itu1)
+        ! Extrapolate the state vectors in case a second halo
+        ! is needed.
 
-       ! Compute the temperature in the internal cell and in the
-       ! halo cell such that the average is the wall temperature.
+        if (secondHalo) call extrapolate2ndHalo(correctForK)
 
-       t2 = pp2(i,j)/(RGas*ww2(i,j,irho))
-       t1 = two*bcData(nn)%TNS_Wall(i,j) - t2
+    end subroutine bcNSWallIsoThermal
 
-       ! Make sure that t1 is within reasonable bounds. These
-       ! bounds are such that the clipping is never active in the
-       ! converged solution; it is only to avoid instabilities
-       ! during the convergence.
+    subroutine bcSubsonicOutflow(nn, secondHalo, correctForK)
 
-       t1 = max(half*bcData(nn)%TNS_Wall(i,j), t1)
-       t1 = min(two *bcData(nn)%TNS_Wall(i,j), t1)
+        !  bcSubsonicOutflow applies the subsonic outflow boundary
+        !  condition, static pressure prescribed, to a block. It is
+        !  assumed that the pointers in blockPointers are already set to
+        !  the correct block on the correct grid level.  Exactly the same
+        !  boundary condition is also applied for an outflow mass
+        !  bleed. Therefore the test is for both a subsonic outflow and an
+        !  bleed outflow.
 
-       ! PRESSURE EXTRAPOLATION
+        use constants
+        use blockPointers, only: BCData
+        use BCPointers, only: ww0, ww1, ww2, pp0, pp1, pp2, &
+                              rlv0, rlv1, rlv2, rev0, rev1, rev2, gamma2, &
+                              iSize, jSize, iStart, jStart
+        use flowVarRefState, only: eddyModel, viscous
+        implicit none
 
-       ! Make sure that on the coarser grids the constant pressure
-       ! boundary condition is used.
+        ! Subroutine arguments.
+        logical, intent(in) :: secondHalo, correctForK
+        integer(kind=intType), intent(in) :: nn
 
-       wallTreatment = viscWallBCTreatment
-       if(currentLevel > groundLevel) wallTreatment = constantPressure
+        ! Local variables.
+        integer(kind=intType) :: i, j, l, ii
+        real(kind=realType), parameter :: twothird = two * third
+        real(kind=realType) :: ovg, ovgm1, nnx, nny, nnz
+        real(kind=realType) :: pExit, pInt, r, a2, a, ac, ss
+        real(kind=realType) :: ue, ve, we, qne, qnh
 
-       BCTreatment: select case (wallTreatment)
+        ! Loop over the generic subface to set the state in the
+        ! halo cells.
 
-       case (constantPressure)
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
 
-          ! Constant pressure. Set the gradient to zero.
-          pp1(i,j) = pp2(i,j) - four*third*rhok
+            ! Store a couple of variables, such as the static
+            ! pressure and grid unit outward normal, a bit easier.
 
-       case default
+            pExit = BCData(nn)%ps(i, j)
 
-          ! Linear extrapolation.
-          i = mod(ii, isize) + iStart
-          j = ii/isize + jStart
+            nnx = BCData(nn)%norm(i, j, 1)
+            nny = BCData(nn)%norm(i, j, 2)
+            nnz = BCData(nn)%norm(i, j, 3)
 
-          pp1(i,j) = 2*pp2(i,j) - pp3(i,j)
-          ! Adjust value if pressure is negative
-          if (pp1(i,j) .le. zero) pp1(i,j) = pp2(i,j)
+            ! Abbreviate 1/gamma and 1/(gamma -1) a bit easier.
 
-       end select BCTreatment
+            ovg = one / gamma2(i, j)
+            ovgm1 = one / (gamma2(i, j) - one)
 
-       ! Determine the variables in the halo. As the spacing
-       ! is very small a constant pressure boundary condition
-       ! (except for the k correction) is okay. Take the slip
-       ! velocity into account.
+            ! Store the internal pressure and correct for the
+            ! possible presence of a k-equation.
 
-       ww1(i,j,irho) =  pp1(i,j)/(RGas*t1)
-       ww1(i,j,ivx)  = -ww2(i,j,ivx) + two*bcData(nn)%uSlip(i,j,1)
-       ww1(i,j,ivy)  = -ww2(i,j,ivy) + two*bcData(nn)%uSlip(i,j,2)
-       ww1(i,j,ivz)  = -ww2(i,j,ivz) + two*bcData(nn)%uSlip(i,j,3)
+            pInt = pp2(i, j)
+            if (correctForK) &
+                pInt = pInt - twothird * ww2(i, j, irho) * ww2(i, j, itu1)
 
-       ! Set the viscosities. There is no need to test for a
-       ! viscous problem of course. The eddy viscosity is
-       ! set to the negative value, as it should be zero on
-       ! the wall.
+            ! Compute the velocity components, the normal velocity
+            ! and the speed of sound for the internal cell.
 
-       rlv1(i,j) = rlv2(i,j)
-       if( eddyModel ) rev1(i,j) = -rev2(i,j)
-    enddo
+            r = one / ww2(i, j, irho)
+            a2 = gamma2(i, j) * pInt * r
+            a = sqrt(a2)
+            ue = ww2(i, j, ivx)
+            ve = ww2(i, j, ivy)
+            we = ww2(i, j, ivz)
+            qne = ue * nnx + ve * nny + we * nnz
 
-    ! Compute the energy for these halo's.
+            ! Compute the entropy and the acoustic variable.
+            ! These riemann inVariants, as well as the tangential
+            ! velocity components, are extrapolated.
 
-    call computeEtot(ww1, pp1, correctForK)
+            ss = pInt * (r**gamma2(i, j))
+            ac = qne + two * a * ovgm1
 
-    ! Extrapolate the state vectors in case a second halo
-    ! is needed.
+            ! Compute the state in the halo.
 
-    if( secondHalo ) call extrapolate2ndHalo(correctForK)
+            ww1(i, j, irho) = (pExit / ss)**ovg
+            pp1(i, j) = pExit
+            a = sqrt(gamma2(i, j) * pExit / ww1(i, j, irho))
+            qnh = ac - two * a * ovgm1
+            ww1(i, j, ivx) = ue + (qnh - qne) * nnx
+            ww1(i, j, ivy) = ve + (qnh - qne) * nny
+            ww1(i, j, ivz) = we + (qnh - qne) * nnz
 
-  end subroutine bcNSWallIsoThermal
+            ! Correct the pressure if a k-equation is present.
 
-  subroutine bcSubsonicOutflow(nn, secondHalo, correctForK)
+            if (correctForK) &
+                pp1(i, j) = pp1(i, j) &
+                            + twothird * ww1(i, j, irho) * ww1(i, j, itu1)
 
-    !  bcSubsonicOutflow applies the subsonic outflow boundary
-    !  condition, static pressure prescribed, to a block. It is
-    !  assumed that the pointers in blockPointers are already set to
-    !  the correct block on the correct grid level.  Exactly the same
-    !  boundary condition is also applied for an outflow mass
-    !  bleed. Therefore the test is for both a subsonic outflow and an
-    !  bleed outflow.
+            ! Set the viscosities in the halo to the viscosities
+            ! in the donor cell.
 
-    use constants
-    use blockPointers, only : BCData
-    use BCPointers, only : ww0, ww1, ww2, pp0, pp1, pp2, &
-         rlv0, rlv1, rlv2, rev0, rev1, rev2, gamma2, &
-         iSize, jSize, iStart, jStart
-    use flowVarRefState, only : eddyModel, viscous
-    implicit none
+            if (viscous) rlv1(i, j) = rlv2(i, j)
+            if (eddyModel) rev1(i, j) = rev2(i, j)
 
-    ! Subroutine arguments.
-    logical, intent(in) :: secondHalo, correctForK
-    integer(kind=intType), intent(in) :: nn
+        end do
 
-    ! Local variables.
-    integer(kind=intType) :: i, j, l, ii
-    real(kind=realType), parameter :: twothird = two*third
-    real(kind=realType) :: ovg, ovgm1, nnx, nny, nnz
-    real(kind=realType) :: pExit, pInt, r, a2, a, ac, ss
-    real(kind=realType) :: ue, ve, we, qne, qnh
+        ! Compute the energy for these halo's.
 
-    ! Loop over the generic subface to set the state in the
-    ! halo cells.
+        call computeEtot(ww1, pp1, correctForK)
 
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-       i = mod(ii, isize) + iStart
-       j = ii/isize + jStart
+        ! Extrapolate the state vectors in case a second halo
+        ! is needed.
 
-       ! Store a couple of variables, such as the static
-       ! pressure and grid unit outward normal, a bit easier.
+        if (secondHalo) call extrapolate2ndHalo(correctForK)
 
-       pExit = BCData(nn)%ps(i,j)
+    end subroutine bcSubsonicOutflow
 
-       nnx = BCData(nn)%norm(i,j,1)
-       nny = BCData(nn)%norm(i,j,2)
-       nnz = BCData(nn)%norm(i,j,3)
+    subroutine bcSubsonicInflow(nn, secondHalo, correctForK)
 
-       ! Abbreviate 1/gamma and 1/(gamma -1) a bit easier.
+        !  bcSubsonicInflow applies the subsonic outflow boundary
+        !  condition, total pressure, total density and flow direction
+        !  prescribed, to a block. It is assumed that the pointers in
+        !  blockPointers are already set to the correct block on the
+        !  correct grid level.
 
-       ovg   = one/gamma2(i,j)
-       ovgm1 = one/(gamma2(i,j)-one)
+        use constants
+        use blockPointers, only: BCData
+        use flowVarRefState, only: viscous, eddyModel, RGas
+        use inputDiscretization, only: hScalingInlet
+        use BCPointers, only: ww0, ww1, ww2, pp0, pp1, pp2, &
+                              rlv0, rlv1, rlv2, rev0, rev1, rev2, gamma2, &
+                              iSize, jSize, iStart, jStart
+        use inputPhysics, only: cpModel, gammaConstant
+        use utils, only: terminate
+        implicit none
 
-       ! Store the internal pressure and correct for the
-       ! possible presence of a k-equation.
+        ! Subroutine arguments.
+        logical, intent(in) :: secondHalo, correctForK
+        integer(kind=intType), intent(in) :: nn
 
-       pInt = pp2(i,j)
-       if( correctForK ) &
-            pInt = pInt - twothird*ww2(i,j,irho)*ww2(i,j,itu1)
+        ! Local variables.
+        integer(kind=intType) :: i, j, l, ii
+        real(kind=realType), parameter :: twoThird = two * third
+        real(kind=realType) :: gm1, ovgm1
+        real(kind=realType) :: ptot, ttot, htot, a2tot, r, alpha, beta
+        real(kind=realType) :: aa2, bb, cc, dd, q, q2, a2, m2, scaleFact
+        real(kind=realType) :: ssx, ssy, ssz, nnx, nny, nnz
+        real(kind=realType) :: rho, velx, vely, velz, ratio, ts, govgm1
 
-       ! Compute the velocity components, the normal velocity
-       ! and the speed of sound for the internal cell.
+        ! Determine the boundary treatment to be used.
+        govgm1 = gammaConstant / (gammaConstant - one)
 
-       r   = one/ww2(i,j,irho)
-       a2  = gamma2(i,j)*pInt*r
-       a   = sqrt(a2)
-       ue  = ww2(i,j,ivx)
-       ve  = ww2(i,j,ivy)
-       we  = ww2(i,j,ivz)
-       qne = ue*nnx + ve*nny + we*nnz
+        select case (BCData(nn)%subsonicInletTreatment)
 
-       ! Compute the entropy and the acoustic variable.
-       ! These riemann inVariants, as well as the tangential
-       ! velocity components, are extrapolated.
+        case (totalConditions)
 
-       ss = pInt*(r**gamma2(i,j))
-       ac = qne + two*a*ovgm1
+            ! The total conditions have been prescribed.
 
-       ! Compute the state in the halo.
+            ! Loop over the generic subface to set the state in the
+            ! halo cells.
 
-       ww1(i,j,irho) = (pExit/ss)**ovg
-       pp1(i,j)      = pExit
-       a             = sqrt(gamma2(i,j)*pExit/ww1(i,j,irho))
-       qnh           = ac - two*a*ovgm1
-       ww1(i,j,ivx)  = ue + (qnh - qne)*nnx
-       ww1(i,j,ivy)  = ve + (qnh - qne)*nny
-       ww1(i,j,ivz)  = we + (qnh - qne)*nnz
+            !$AD II-LOOP
+            do ii = 0, isize * jsize - 1
+                i = mod(ii, isize) + iStart
+                j = ii / isize + jStart
 
-       ! Correct the pressure if a k-equation is present.
+                ! Store a couple of variables, such as the total
+                ! pressure, total temperature, total enthalpy, flow
+                ! direction and grid unit outward normal, a bit easier.
 
-       if( correctForK )   &
-            pp1(i,j) = pp1(i,j) &
-            + twothird*ww1(i,j,irho)*ww1(i,j,itu1)
+                ptot = BCData(nn)%ptInlet(i, j)
+                ttot = BCData(nn)%ttInlet(i, j)
+                htot = BCData(nn)%htInlet(i, j)
 
-       ! Set the viscosities in the halo to the viscosities
-       ! in the donor cell.
+                ssx = BCData(nn)%flowXdirInlet(i, j)
+                ssy = BCData(nn)%flowYdirInlet(i, j)
+                ssz = BCData(nn)%flowZdirInlet(i, j)
 
-       if( viscous )   rlv1(i,j) = rlv2(i,j)
-       if( eddyModel ) rev1(i,j) = rev2(i,j)
+                nnx = BCData(nn)%norm(i, j, 1)
+                nny = BCData(nn)%norm(i, j, 2)
+                nnz = BCData(nn)%norm(i, j, 3)
 
-    enddo
+                ! Some abbreviations in which gamma occurs.
 
-    ! Compute the energy for these halo's.
+                gm1 = gamma2(i, j) - one
+                ovgm1 = one / gm1
 
-    call computeEtot(ww1, pp1, correctForK)
+                ! Determine the acoustic Riemann variable that must be
+                ! extrapolated from the domain.
 
-    ! Extrapolate the state vectors in case a second halo
-    ! is needed.
+                r = one / ww2(i, j, irho)
+                a2 = gamma2(i, j) * pp2(i, j) * r
+                beta = ww2(i, j, ivx) * nnx + ww2(i, j, ivy) * nny &
+                       + ww2(i, j, ivz) * nnz + two * ovgm1 * sqrt(a2)
 
-    if( secondHalo ) call extrapolate2ndHalo(correctForK)
+                ! Correct the value of the Riemann invariant if total
+                ! enthalpy scaling must be applied. This scaling may
+                ! be needed for stability if large gradients of the
+                ! total temperature are prescribed.
 
-  end subroutine bcSubsonicOutflow
+                scaleFact = one
+                if (hScalingInlet) &
+                    scaleFact = sqrt(htot / (r * (ww2(i, j, irhoE) + pp2(i, j))))
 
-  subroutine bcSubsonicInflow(nn, secondHalo, correctForK)
+                beta = beta * scaleFact
 
-    !  bcSubsonicInflow applies the subsonic outflow boundary
-    !  condition, total pressure, total density and flow direction
-    !  prescribed, to a block. It is assumed that the pointers in
-    !  blockPointers are already set to the correct block on the
-    !  correct grid level.
+                ! Compute the value of a2 + 0.5*gm1*q2, which is the
+                ! total speed of sound for constant cp. However, the
+                ! expression below is also valid for variable cp,
+                ! although a linearization around the value of the
+                ! internal cell is performed.
 
-    use constants
-    use blockPointers, only : BCData
-    use flowVarRefState, only : viscous, eddyModel, RGas
-    use inputDiscretization, only : hScalingInlet
-    use BCPointers, only : ww0, ww1, ww2, pp0, pp1, pp2, &
-         rlv0, rlv1, rlv2, rev0, rev1, rev2, gamma2, &
-         iSize, jSize, iStart, jStart
-    use inputPhysics, only : cpModel, gammaConstant
-    use utils, only : terminate
-    implicit none
+                q2 = ww2(i, j, ivx)**2 + ww2(i, j, ivy)**2 &
+                     + ww2(i, j, ivz)**2
+                a2tot = gm1 * (htot - r * (ww2(i, j, irhoE) + pp2(i, j)) &
+                               + half * q2) + a2
 
-    ! Subroutine arguments.
-    logical, intent(in) :: secondHalo, correctForK
-    integer(kind=intType), intent(in) :: nn
+                ! Compute the dot product between the normal and the
+                ! velocity direction. This value should be negative.
+
+                alpha = nnx * ssx + nny * ssy + nnz * ssz
+
+                ! Compute the coefficients in the quadratic equation
+                ! for the magnitude of the velocity.
 
-    ! Local variables.
-    integer(kind=intType) :: i, j, l, ii
-    real(kind=realType), parameter :: twoThird = two*third
-    real(kind=realType) :: gm1, ovgm1
-    real(kind=realType) :: ptot, ttot, htot, a2tot, r, alpha, beta
-    real(kind=realType) :: aa2, bb, cc, dd, q, q2, a2, m2, scaleFact
-    real(kind=realType) :: ssx, ssy, ssz, nnx, nny, nnz
-    real(kind=realType) :: rho, velx, vely, velz, ratio, ts, govgm1
+                aa2 = half * gm1 * alpha * alpha + one
+                bb = -gm1 * alpha * beta
+                cc = half * gm1 * beta * beta - two * ovgm1 * a2tot
 
-    ! Determine the boundary treatment to be used.
-    govgm1 = gammaConstant/(gammaConstant - one)
+                ! Solve the equation for the magnitude of the
+                ! velocity. As this value must be positive and both aa2
+                ! and bb are positive (alpha is negative and beta is
+                ! positive up till Mach = 5.0 or so, which is not
+                ! really subsonic anymore), it is clear which of the
+                ! two possible solutions must be taken. Some clipping
+                ! is present, but this is normally not active.
 
-    select case (BCData(nn)%subsonicInletTreatment)
+                dd = bb * bb - four * aa2 * cc
+                dd = sqrt(max(zero, dd))
+                q = (-bb + dd) / (two * aa2)
+                q = max(zero, q)
+                q2 = q * q
 
-    case (totalConditions)
+                ! Compute the speed of sound squared from the total
+                ! speed of sound equation (== total enthalpy equation
+                ! for constant cp).
 
-       ! The total conditions have been prescribed.
+                a2 = a2tot - half * gm1 * q2
 
-       ! Loop over the generic subface to set the state in the
-       ! halo cells.
+                ! Compute the Mach number squared and cut it between
+                ! 0.0 and 1.0. Adapt the velocity and speed of sound
+                ! squared accordingly.
 
-       !$AD II-LOOP
-       do ii=0,isize*jsize-1
-          i = mod(ii, isize) + iStart
-          j = ii/isize + jStart
+                m2 = q2 / a2
+                m2 = min(one, m2)
+                q2 = m2 * a2
+                q = sqrt(q2)
+                a2 = a2tot - half * gm1 * q2
 
-          ! Store a couple of variables, such as the total
-          ! pressure, total temperature, total enthalpy, flow
-          ! direction and grid unit outward normal, a bit easier.
+                ! Compute the velocities in the halo cell and use rho,
+                ! rhoe and p as temporary buffers to store the total
+                ! temperature, total pressure and static temperature.
 
-          ptot = BCData(nn)%ptInlet(i,j)
-          ttot = BCData(nn)%ttInlet(i,j)
-          htot = BCData(nn)%htInlet(i,j)
+                ww1(i, j, ivx) = q * ssx
+                ww1(i, j, ivy) = q * ssy
+                ww1(i, j, ivz) = q * ssz
 
-          ssx  = BCData(nn)%flowXdirInlet(i,j)
-          ssy  = BCData(nn)%flowYdirInlet(i,j)
-          ssz  = BCData(nn)%flowZdirInlet(i,j)
+                ! This should call prhosubsonicInlet, but it doesnt' AD
+                ! correctly, so just the constant CP model is used here.
 
-          nnx = BCData(nn)%norm(i,j,1)
-          nny = BCData(nn)%norm(i,j,2)
-          nnz = BCData(nn)%norm(i,j,3)
+                ! Compute the pressure and density for these halo's.
+                select case (cpModel)
+                case (cpConstant)
+                    ! Compute the static pressure from the total pressure
+                    ! and the temperature ratio. Compute the density using
+                    ! the gas law.
 
-          ! Some abbreviations in which gamma occurs.
-
-          gm1   = gamma2(i,j) - one
-          ovgm1 = one/gm1
-
-          ! Determine the acoustic Riemann variable that must be
-          ! extrapolated from the domain.
+                    ts = a2 / (gamma2(i, j) * RGas)
+                    ratio = (ts / ttot)**govgm1
+                    pp1(i, j) = ptot * ratio
+                    ww1(i, j, irho) = (ptot * ratio) / (RGas * ts)
+                    if (correctForK) then
+                        pp1(i, j) = pp1(i, j) &
+                                    + twoThird * ww1(i, j, irho) * ww1(i, j, itu1)
+                    end if
 
-          r    = one/ww2(i,j,irho)
-          a2   = gamma2(i,j)*pp2(i,j)*r
-          beta = ww2(i,j,ivx)*nnx + ww2(i,j,ivy)*nny  &
-               + ww2(i,j,ivz)*nnz + two*ovgm1*sqrt(a2)
-
-          ! Correct the value of the Riemann invariant if total
-          ! enthalpy scaling must be applied. This scaling may
-          ! be needed for stability if large gradients of the
-          ! total temperature are prescribed.
+                case (cpTempCurveFits)
+                    call terminate('BCRoutines', "not curve fits not implemented")
+                end select
 
-          scaleFact = one
-          if( hScalingInlet ) &
-               scaleFact = sqrt(htot/(r*(ww2(i,j,irhoE) + pp2(i,j))))
+                ! Set the viscosities in the halo to the viscosities
+                ! in the donor cell.
 
-          beta = beta*scaleFact
+                if (viscous) rlv1(i, j) = rlv2(i, j)
+                if (eddyModel) rev1(i, j) = rev2(i, j)
 
-          ! Compute the value of a2 + 0.5*gm1*q2, which is the
-          ! total speed of sound for constant cp. However, the
-          ! expression below is also valid for variable cp,
-          ! although a linearization around the value of the
-          ! internal cell is performed.
+            end do
 
-          q2    = ww2(i,j,ivx)**2 + ww2(i,j,ivy)**2 &
-               + ww2(i,j,ivz)**2
-          a2tot = gm1*(htot - r*(ww2(i,j,irhoE) + pp2(i,j)) &
-               +      half*q2) + a2
+            !===========================================================
 
-          ! Compute the dot product between the normal and the
-          ! velocity direction. This value should be negative.
+        case (massFlow)
 
-          alpha = nnx*ssx + nny*ssy + nnz*ssz
+            ! Density and velocity vector prescribed.
 
-          ! Compute the coefficients in the quadratic equation
-          ! for the magnitude of the velocity.
+            ! Loop over the generic subface to set the state in the
+            ! halo cells.
 
-          aa2 =  half*gm1*alpha*alpha + one
-          bb = -gm1*alpha*beta
-          cc =  half*gm1*beta*beta - two*ovgm1*a2tot
+            !$AD II-LOOP
+            do ii = 0, isize * jsize - 1
+                i = mod(ii, isize) + iStart
+                j = ii / isize + jStart
 
-          ! Solve the equation for the magnitude of the
-          ! velocity. As this value must be positive and both aa2
-          ! and bb are positive (alpha is negative and beta is
-          ! positive up till Mach = 5.0 or so, which is not
-          ! really subsonic anymore), it is clear which of the
-          ! two possible solutions must be taken. Some clipping
-          ! is present, but this is normally not active.
+                ! Store a couple of variables, such as the density,
+                ! velocity and grid unit outward normal, a bit easier.
 
-          dd = bb*bb - four*aa2*cc
-          dd = sqrt(max(zero,dd))
-          q  = (-bb + dd)/(two*aa2)
-          q  = max(zero,q)
-          q2 = q*q
+                rho = BCData(nn)%rho(i, j)
+                velx = BCData(nn)%velx(i, j)
+                vely = BCData(nn)%vely(i, j)
+                velz = BCData(nn)%velz(i, j)
 
-          ! Compute the speed of sound squared from the total
-          ! speed of sound equation (== total enthalpy equation
-          ! for constant cp).
+                nnx = BCData(nn)%norm(i, j, 1)
+                nny = BCData(nn)%norm(i, j, 2)
+                nnz = BCData(nn)%norm(i, j, 3)
 
-          a2 = a2tot - half*gm1*q2
+                ! Some abbreviations in which gamma occurs.
 
-          ! Compute the Mach number squared and cut it between
-          ! 0.0 and 1.0. Adapt the velocity and speed of sound
-          ! squared accordingly.
+                gm1 = gamma2(i, j) - one
+                ovgm1 = one / gm1
 
-          m2 = q2/a2
-          m2 = min(one,m2)
-          q2 = m2*a2
-          q  = sqrt(q2)
-          a2 = a2tot - half*gm1*q2
+                ! Determine the acoustic Riemann variable that must be
+                ! extrapolated from the domain.
 
-          ! Compute the velocities in the halo cell and use rho,
-          ! rhoe and p as temporary buffers to store the total
-          ! temperature, total pressure and static temperature.
+                r = one / ww2(i, j, irho)
+                a2 = gamma2(i, j) * pp2(i, j) * r
+                beta = ww2(i, j, ivx) * nnx + ww2(i, j, ivy) * nny &
+                       + ww2(i, j, ivz) * nnz + two * ovgm1 * sqrt(a2)
 
-          ww1(i,j,ivx)  = q*ssx
-          ww1(i,j,ivy)  = q*ssy
-          ww1(i,j,ivz)  = q*ssz
+                ! Compute the speed of sound squared in the halo.
 
-          ! This should call prhosubsonicInlet, but it doesnt' AD
-          ! correctly, so just the constant CP model is used here.
+                a2 = half * gm1 * (beta - velx * nnx - vely * nny - velz * nnz)
+                a2 = max(zero, a2)
+                a2 = a2 * a2
 
-          ! Compute the pressure and density for these halo's.
-          select case (cpModel)
-          case (cpConstant)
-             ! Compute the static pressure from the total pressure
-             ! and the temperature ratio. Compute the density using
-             ! the gas law.
+                ! Compute the pressure in the halo, assuming a
+                ! constant value of gamma.
 
-             ts = a2/(gamma2(i,j)*RGas)
-             ratio        = (ts/ttot)**govgm1
-             pp1(i,j)      = ptot*ratio
-             ww1(i,j,irho) = (ptot*ratio)/(RGas*ts)
-             if( correctForK ) then
-                pp1(i,j) = pp1(i,j) &
-                     + twoThird*ww1(i,j,irho)*ww1(i,j,itu1)
-             end if
+                pp1(i, j) = rho * a2 / gamma2(i, j)
 
-          case (cpTempCurveFits)
-             call terminate('BCRoutines', "not curve fits not implemented")
-          end select
+                ! Simply copy the density and velocities.
 
-          ! Set the viscosities in the halo to the viscosities
-          ! in the donor cell.
+                ww1(i, j, irho) = rho
+                ww1(i, j, ivx) = velx
+                ww1(i, j, ivy) = vely
+                ww1(i, j, ivz) = velz
 
-          if( viscous )   rlv1(i,j) = rlv2(i,j)
-          if( eddyModel ) rev1(i,j) = rev2(i,j)
+                ! Set the viscosities in the halo to the viscosities
+                ! in the donor cell.
 
-       enddo
+                if (viscous) rlv1(i, j) = rlv2(i, j)
+                if (eddyModel) rev1(i, j) = rev2(i, j)
 
-       !===========================================================
+            end do
 
-    case (massFlow)
+        end select
 
-       ! Density and velocity vector prescribed.
+        ! Compute the energy for these halo's.
 
-       ! Loop over the generic subface to set the state in the
-       ! halo cells.
+        call computeEtot(ww1, pp1, correctForK)
 
-       !$AD II-LOOP
-       do ii=0,isize*jsize-1
-          i = mod(ii, isize) + iStart
-          j = ii/isize + jStart
+        ! Extrapolate the state vectors in case a second halo
+        ! is needed.
 
-          ! Store a couple of variables, such as the density,
-          ! velocity and grid unit outward normal, a bit easier.
+        if (secondHalo) call extrapolate2ndHalo(correctForK)
 
-          rho  = BCData(nn)%rho(i,j)
-          velx = BCData(nn)%velx(i,j)
-          vely = BCData(nn)%vely(i,j)
-          velz = BCData(nn)%velz(i,j)
+    end subroutine bcSubsonicInflow
 
-          nnx = BCData(nn)%norm(i,j,1)
-          nny = BCData(nn)%norm(i,j,2)
-          nnz = BCData(nn)%norm(i,j,3)
+    subroutine bcEulerWall(nn, secondHalo, correctForK)
 
-          ! Some abbreviations in which gamma occurs.
+        !  bcEulerWall applies the inviscid wall boundary condition to a
+        !  block. It is assumed that the bcpointers are already set to the
+        !  correct block on the correct grid level.
 
-          gm1   = gamma2(i,j) - one
-          ovgm1 = one/gm1
+        use constants
+        use blockPointers, only: BCData, addGridVelocities
+        use flowVarRefState, only: viscous, eddyModel, RGas
+        use inputDiscretization, only: eulerWallBCTreatment
+        use iteration, only: currentLevel, groundLevel
+        use utils, only: myDim
+        use BCPointers, only: ww0, ww1, ww2, pp0, pp1, pp2, pp3, rlv0, rlv1, rlv2, &
+                              rev0, rev1, rev2, ss, ssi, ssj, ssk, &
+                              iStart, iSize, jStart, jSize, iEnd, jEnd
+        implicit none
 
-          ! Determine the acoustic Riemann variable that must be
-          ! extrapolated from the domain.
+        ! Subroutine arguments.
+        logical, intent(in) :: secondHalo, correctForK
+        integer(kind=intType), intent(in) :: nn
 
-          r    = one/ww2(i,j,irho)
-          a2   = gamma2(i,j)*pp2(i,j)*r
-          beta = ww2(i,j,ivx)*nnx + ww2(i,j,ivy)*nny  &
-               + ww2(i,j,ivz)*nnz + two*ovgm1*sqrt(a2)
+        ! Local variables.
+        integer(kind=intType) :: j, k, l, ii
+        integer(kind=intType) :: jm1, jp1, km1, kp1
+        integer(kind=intType) :: wallTreatment
 
-          ! Compute the speed of sound squared in the halo.
+        real(kind=realType) :: sixa, siya, siza, sjxa, sjya, sjza
+        real(kind=realType) :: skxa, skya, skza, a1, b1
+        real(kind=realType) :: rxj, ryj, rzj, rxk, ryk, rzk
+        real(kind=realType) :: dpj, dpk, ri, rj, rk, qj, qk, vn
+        real(kind=realType) :: uux, uuy, uuz
+        real(kind=realType), dimension(iStart:iEnd, jStart:jEnd) :: grad
 
-          a2 = half*gm1*(beta - velx*nnx - vely*nny - velz*nnz)
-          a2 = max(zero,a2)
-          a2 = a2*a2
+        ! Make sure that on the coarser grids the constant pressure
+        ! boundary condition is used.
 
-          ! Compute the pressure in the halo, assuming a
-          ! constant value of gamma.
+        wallTreatment = eulerWallBcTreatment
+        if (currentLevel > groundLevel) wallTreatment = constantPressure
 
-          pp1(i,j) = rho*a2/gamma2(i,j)
+        !  Determine the boundary condition treatment and compute the
+        !  undivided pressure gradient accordingly. This gradient is
+        !  temporarily stored in the halo pressure.
+        !
+        BCTreatment:select case(wallTreatment)
 
-          ! Simply copy the density and velocities.
+        case (constantPressure)
 
-          ww1(i,j,irho) = rho
-          ww1(i,j,ivx)  = velx
-          ww1(i,j,ivy)  = vely
-          ww1(i,j,ivz)  = velz
+        ! Constant pressure. Set the gradient to zero.
+        grad = zero
 
-          ! Set the viscosities in the halo to the viscosities
-          ! in the donor cell.
+        case (linExtrapolPressure)
 
-          if( viscous )   rlv1(i,j) = rlv2(i,j)
-          if( eddyModel ) rev1(i,j) = rev2(i,j)
+        ! Linear extrapolation.
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            j = mod(ii, isize) + iStart
+            k = ii / isize + jStart
+            grad(j, k) = pp3(j, k) - pp2(j, k)
+        end do
 
-       enddo
+        case (normalMomentum)
 
-    end select
+        ! Pressure gradient is computed using the normal momentum
+        ! equation. First set a couple of additional variables for
+        ! the normals, depending on the block face. Note that the
+        ! construction 1: should not be used in these pointers,
+        ! because element 0 is needed. Consequently there will be
+        ! an offset of 1 for these normals. This is commented in
+        ! the code. For moving faces also the grid velocity of
+        ! the 1st cell center from the wall is needed.
 
-    ! Compute the energy for these halo's.
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            j = mod(ii, isize) + iStart
+            k = ii / isize + jStart
 
-    call computeEtot(ww1, pp1, correctForK)
+            ! Store the indices k+1, k-1 a bit easier and make
+            ! sure that they do not exceed the range of the arrays.
 
-    ! Extrapolate the state vectors in case a second halo
-    ! is needed.
+            km1 = k - 1; km1 = max(jStart, km1)
+            kp1 = k + 1; kp1 = min(jend, kp1)
 
-    if( secondHalo ) call extrapolate2ndHalo(correctForK)
+            ! Compute the scaling factor for the central difference
+            ! in the k-direction.
 
-  end subroutine bcSubsonicInflow
+            b1 = one / max(1_intType, (kp1 - km1))
+
+            ! The indices j+1 and j-1. Make sure that they
+            ! do not exceed the range of the arrays.
+
+            jm1 = j - 1; jm1 = max(iStart, jm1)
+            jp1 = j + 1; jp1 = min(iEnd, jp1)
+
+            ! Compute the scaling factor for the central
+            ! difference in the j-direction.
+
+            a1 = one / max(1_intType, (jp1 - jm1))
+
+            ! Compute (twice) the average normal in the generic i,
+            ! j and k-direction. Note that in j and k-direction
+            ! the average in the original indices should be taken
+            ! using j-1 and j (and k-1 and k). However due to the
+            ! usage of pointers ssj and ssk there is an offset in
+            ! the indices of 1 and therefore now the correct
+            ! average is obtained with the indices j and j+1
+            ! (k and k+1).
+
+            sixa = two * ssi(j, k, 1)
+            siya = two * ssi(j, k, 2)
+            siza = two * ssi(j, k, 3)
+
+            sjxa = ssj(j, k, 1) + ssj(j + 1, k, 1)
+            sjya = ssj(j, k, 2) + ssj(j + 1, k, 2)
+            sjza = ssj(j, k, 3) + ssj(j + 1, k, 3)
 
-  subroutine bcEulerWall(nn, secondHalo, correctForK)
+            skxa = ssk(j, k, 1) + ssk(j, k + 1, 1)
+            skya = ssk(j, k, 2) + ssk(j, k + 1, 2)
+            skza = ssk(j, k, 3) + ssk(j, k + 1, 3)
 
-    !  bcEulerWall applies the inviscid wall boundary condition to a
-    !  block. It is assumed that the bcpointers are already set to the
-    !  correct block on the correct grid level.
+            ! Compute the difference of the normal vector and
+            ! pressure in j and k-direction. As the indices are
+            ! restricted to the 1st halo-layer, the computation
+            ! of the internal halo values is not consistent;
+            ! however this is not really a problem, because these
+            ! values are overwritten in the communication pattern.
 
-    use constants
-    use blockPointers, only : BCData, addGridVelocities
-    use flowVarRefState, only : viscous, eddyModel, RGas
-    use inputDiscretization, only : eulerWallBCTreatment
-    use iteration, only : currentLevel, groundLevel
-    use utils, only : myDim
-    use BCPointers, only : ww0, ww1, ww2, pp0, pp1, pp2, pp3, rlv0, rlv1, rlv2, &
-         rev0, rev1, rev2, ss, ssi, ssj, ssk, &
-         iStart, iSize, jStart, jSize, iEnd, jEnd
-    implicit none
+            rxj = a1 * (BCData(nn)%norm(jp1, k, 1) - BCData(nn)%norm(jm1, k, 1))
+            ryj = a1 * (BCData(nn)%norm(jp1, k, 2) - BCData(nn)%norm(jm1, k, 2))
+            rzj = a1 * (BCData(nn)%norm(jp1, k, 3) - BCData(nn)%norm(jm1, k, 3))
+            dpj = a1 * (pp2(jp1, k) - pp2(jm1, k))
 
-    ! Subroutine arguments.
-    logical, intent(in) :: secondHalo, correctForK
-    integer(kind=intType), intent(in) :: nn
+            rxk = b1 * (BCData(nn)%norm(j, kp1, 1) - BCData(nn)%norm(j, km1, 1))
+            ryk = b1 * (BCData(nn)%norm(j, kp1, 2) - BCData(nn)%norm(j, km1, 2))
+            rzk = b1 * (BCData(nn)%norm(j, kp1, 3) - BCData(nn)%norm(j, km1, 3))
+            dpk = b1 * (pp2(j, kp1) - pp2(j, km1))
 
-    ! Local variables.
-    integer(kind=intType) :: j, k, l, ii
-    integer(kind=intType) :: jm1, jp1, km1, kp1
-    integer(kind=intType) :: wallTreatment
+            ! Compute the dot product between the unit vector
+            ! and the normal vectors in i, j and k-direction.
 
-    real(kind=realType) :: sixa, siya, siza, sjxa, sjya, sjza
-    real(kind=realType) :: skxa, skya, skza, a1, b1
-    real(kind=realType) :: rxj, ryj, rzj, rxk, ryk, rzk
-    real(kind=realType) :: dpj, dpk, ri, rj, rk, qj, qk, vn
-    real(kind=realType) :: uux, uuy, uuz
-    real(kind=realType), dimension(iStart:iEnd, jStart:jEnd) :: grad
-
-    ! Make sure that on the coarser grids the constant pressure
-    ! boundary condition is used.
-
-    wallTreatment = eulerWallBcTreatment
-    if(currentLevel > groundLevel) wallTreatment = constantPressure
+            ri = BCData(nn)%norm(j, k, 1) * sixa + BCData(nn)%norm(j, k, 2) * siya &
+                 + BCData(nn)%norm(j, k, 3) * siza
+            rj = BCData(nn)%norm(j, k, 1) * sjxa + BCData(nn)%norm(j, k, 2) * sjya &
+                 + BCData(nn)%norm(j, k, 3) * sjza
+            rk = BCData(nn)%norm(j, k, 1) * skxa + BCData(nn)%norm(j, k, 2) * skya &
+                 + BCData(nn)%norm(j, k, 3) * skza
 
-    !  Determine the boundary condition treatment and compute the
-    !  undivided pressure gradient accordingly. This gradient is
-    !  temporarily stored in the halo pressure.
-    !
-    BCTreatment: select case (wallTreatment)
-
-    case (constantPressure)
-
-       ! Constant pressure. Set the gradient to zero.
-       grad = zero
-
-    case (linExtrapolPressure)
-
-       ! Linear extrapolation.
-       !$AD II-LOOP
-       do ii=0,isize*jsize-1
-          j = mod(ii, isize) + iStart
-          k = ii/isize + jStart
-          grad(j,k) = pp3(j,k) - pp2(j,k)
-       end do
-
-    case (normalMomentum)
-
-       ! Pressure gradient is computed using the normal momentum
-       ! equation. First set a couple of additional variables for
-       ! the normals, depending on the block face. Note that the
-       ! construction 1: should not be used in these pointers,
-       ! because element 0 is needed. Consequently there will be
-       ! an offset of 1 for these normals. This is commented in
-       ! the code. For moving faces also the grid velocity of
-       ! the 1st cell center from the wall is needed.
-
-       !$AD II-LOOP
-       do ii=0,isize*jsize-1
-          j = mod(ii, isize) + iStart
-          k = ii/isize + jStart
-
-          ! Store the indices k+1, k-1 a bit easier and make
-          ! sure that they do not exceed the range of the arrays.
-
-          km1 = k-1; km1 = max(jStart, km1)
-          kp1 = k+1; kp1 = min(jend  , kp1)
-
-          ! Compute the scaling factor for the central difference
-          ! in the k-direction.
-
-          b1 = one/max(1_intType,(kp1-km1))
-
-          ! The indices j+1 and j-1. Make sure that they
-          ! do not exceed the range of the arrays.
-
-          jm1 = j-1; jm1 = max(iStart, jm1)
-          jp1 = j+1; jp1 = min(iEnd  , jp1)
-
-          ! Compute the scaling factor for the central
-          ! difference in the j-direction.
-
-          a1 = one/max(1_intType,(jp1-jm1))
-
-          ! Compute (twice) the average normal in the generic i,
-          ! j and k-direction. Note that in j and k-direction
-          ! the average in the original indices should be taken
-          ! using j-1 and j (and k-1 and k). However due to the
-          ! usage of pointers ssj and ssk there is an offset in
-          ! the indices of 1 and therefore now the correct
-          ! average is obtained with the indices j and j+1
-          ! (k and k+1).
-
-          sixa = two*ssi(j,k,1)
-          siya = two*ssi(j,k,2)
-          siza = two*ssi(j,k,3)
-
-          sjxa = ssj(j,k,1) + ssj(j+1,k,1)
-          sjya = ssj(j,k,2) + ssj(j+1,k,2)
-          sjza = ssj(j,k,3) + ssj(j+1,k,3)
+            ! Store the velocity components in uux, uuy and uuz and
+            ! subtract the mesh velocity if the face is moving.
 
-          skxa = ssk(j,k,1) + ssk(j,k+1,1)
-          skya = ssk(j,k,2) + ssk(j,k+1,2)
-          skza = ssk(j,k,3) + ssk(j,k+1,3)
+            uux = ww2(j, k, ivx)
+            uuy = ww2(j, k, ivy)
+            uuz = ww2(j, k, ivz)
 
-          ! Compute the difference of the normal vector and
-          ! pressure in j and k-direction. As the indices are
-          ! restricted to the 1st halo-layer, the computation
-          ! of the internal halo values is not consistent;
-          ! however this is not really a problem, because these
-          ! values are overwritten in the communication pattern.
+            if (addGridVelocities) then
+                uux = uux - ss(j, k, 1)
+                uuy = uuy - ss(j, k, 2)
+                uuz = uuz - ss(j, k, 3)
+            end if
 
-          rxj = a1*(BCData(nn)%norm(jp1,k,1) - BCData(nn)%norm(jm1,k,1))
-          ryj = a1*(BCData(nn)%norm(jp1,k,2) - BCData(nn)%norm(jm1,k,2))
-          rzj = a1*(BCData(nn)%norm(jp1,k,3) - BCData(nn)%norm(jm1,k,3))
-          dpj = a1*(pp2(jp1,k)    - pp2(jm1,k))
+            ! Compute the velocity components in j and
+            ! k-direction.
 
-          rxk = b1*(BCData(nn)%norm(j,kp1,1) - BCData(nn)%norm(j,km1,1))
-          ryk = b1*(BCData(nn)%norm(j,kp1,2) - BCData(nn)%norm(j,km1,2))
-          rzk = b1*(BCData(nn)%norm(j,kp1,3) - BCData(nn)%norm(j,km1,3))
-          dpk = b1*(pp2(j,kp1)    - pp2(j,km1))
+            qj = uux * sjxa + uuy * sjya + uuz * sjza
+            qk = uux * skxa + uuy * skya + uuz * skza
 
-          ! Compute the dot product between the unit vector
-          ! and the normal vectors in i, j and k-direction.
+            ! Compute the pressure gradient, which is stored
+            ! in pp1. I'm not entirely sure whether this
+            ! formulation is correct for moving meshes. It could
+            ! be that an additional term is needed there.
 
-          ri = BCData(nn)%norm(j,k,1)*sixa + BCData(nn)%norm(j,k,2)*siya &
-               + BCData(nn)%norm(j,k,3)*siza
-          rj = BCData(nn)%norm(j,k,1)*sjxa + BCData(nn)%norm(j,k,2)*sjya &
-               + BCData(nn)%norm(j,k,3)*sjza
-          rk = BCData(nn)%norm(j,k,1)*skxa + BCData(nn)%norm(j,k,2)*skya &
-               + BCData(nn)%norm(j,k,3)*skza
+            grad(j, k) = ((qj * (uux * rxj + uuy * ryj + uuz * rzj) &
+                           + qk * (uux * rxk + uuy * ryk + uuz * rzk)) &
+                          * ww2(j, k, irho) - rj * dpj - rk * dpk) / ri
+        end do
+        end select BCTreatment
 
-          ! Store the velocity components in uux, uuy and uuz and
-          ! subtract the mesh velocity if the face is moving.
+        ! Determine the state in the halo cell. Again loop over
+        ! the cell range for this subface.
 
-          uux = ww2(j,k,ivx)
-          uuy = ww2(j,k,ivy)
-          uuz = ww2(j,k,ivz)
-
-          if( addGridVelocities ) then
-             uux = uux - ss(j,k,1)
-             uuy = uuy - ss(j,k,2)
-             uuz = uuz - ss(j,k,3)
-          endif
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            j = mod(ii, isize) + iStart
+            k = ii / isize + jStart
 
-          ! Compute the velocity components in j and
-          ! k-direction.
+            ! Compute the pressure density and velocity in the
+            ! halo cell. Note that rface is the grid velocity
+            ! component in the direction of norm, i.e. outward
+            ! pointing.
 
-          qj = uux*sjxa + uuy*sjya + uuz*sjza
-          qk = uux*skxa + uuy*skya + uuz*skza
+            pp1(j, k) = mydim(pp2(j, k), grad(j, k))
+
+            vn = two * (BCData(nn)%rface(j, k) - &
+                        ww2(j, k, ivx) * BCData(nn)%norm(j, k, 1) - &
+                        ww2(j, k, ivy) * BCData(nn)%norm(j, k, 2) - &
+                        ww2(j, k, ivz) * BCData(nn)%norm(j, k, 3))
+
+            ww1(j, k, irho) = ww2(j, k, irho)
+            ww1(j, k, ivx) = ww2(j, k, ivx) + vn * BCData(nn)%norm(j, k, 1)
+            ww1(j, k, ivy) = ww2(j, k, ivy) + vn * BCData(nn)%norm(j, k, 2)
+            ww1(j, k, ivz) = ww2(j, k, ivz) + vn * BCData(nn)%norm(j, k, 3)
+
+            ! The laminar and eddy viscosity, if present.
+
+            if (viscous) rlv1(j, k) = rlv2(j, k)
+            if (eddyModel) rev1(j, k) = rev2(j, k)
 
-          ! Compute the pressure gradient, which is stored
-          ! in pp1. I'm not entirely sure whether this
-          ! formulation is correct for moving meshes. It could
-          ! be that an additional term is needed there.
+        end do
+
+        ! Compute the energy for these halo's.
 
-          grad(j,k) = ((qj*(uux*rxj + uuy*ryj + uuz*rzj)      &
-               +   qk*(uux*rxk + uuy*ryk + uuz*rzk))     &
-               *  ww2(j,k,irho) - rj*dpj - rk*dpk)/ri
-       enddo
-    end select BCTreatment
+        call computeEtot(ww1, pp1, correctForK)
 
-    ! Determine the state in the halo cell. Again loop over
-    ! the cell range for this subface.
-    
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-      j = mod(ii, isize) + iStart
-      k = ii/isize + jStart
+        ! Extrapolate the state vectors in case a second halo
+        ! is needed.
 
-      ! Compute the pressure density and velocity in the
-      ! halo cell. Note that rface is the grid velocity
-      ! component in the direction of norm, i.e. outward
-      ! pointing.
+        if (secondHalo) call extrapolate2ndHalo(correctForK)
 
-      pp1(j,k) = mydim(pp2(j,k), grad(j,k))
+    end subroutine bcEulerWall
 
-      vn = two*(BCData(nn)%rface(j,k) - &
-           ww2(j,k,ivx)*BCData(nn)%norm(j,k,1) - &
-           ww2(j,k,ivy)*BCData(nn)%norm(j,k,2) - &
-           ww2(j,k,ivz)*BCData(nn)%norm(j,k,3))
+    subroutine bcFarfield(nn, secondHalo, correctForK)
 
-      ww1(j,k,irho) = ww2(j,k,irho)
-      ww1(j,k,ivx)  = ww2(j,k,ivx) + vn*BCData(nn)%norm(j,k,1)
-      ww1(j,k,ivy)  = ww2(j,k,ivy) + vn*BCData(nn)%norm(j,k,2)
-      ww1(j,k,ivz)  = ww2(j,k,ivz) + vn*BCData(nn)%norm(j,k,3)
-
-      ! The laminar and eddy viscosity, if present.
+        ! bcFarfield applies the farfield boundary condition to a block.
+        ! It is assumed that the BCPointers are already set *
 
-      if( viscous )    rlv1(j,k) = rlv2(j,k)
-      if( eddyModel ) rev1(j,k) = rev2(j,k)
+        use constants
+        use blockPointers, only: BCData
+        use flowVarRefState, only: eddyModel, viscous, gammaInf, wInf, pInfCorr
+        use BCPointers, only: ww0, ww1, ww2, pp0, pp1, pp2, rlv0, rlv1, rlv2, &
+                              rev0, rev1, rev2, gamma2, iStart, jStart, iSize, jSize
+        implicit none
 
-    enddo
+        ! Subroutine arguments.
+        logical, intent(in) :: secondHalo, correctForK
 
-    ! Compute the energy for these halo's.
+        ! Local variables.
+        integer(kind=intType) :: nn, i, j, k, l, ii
 
-    call computeEtot(ww1, pp1, correctForK)
+        real(kind=realType) :: nnx, nny, nnz
+        real(kind=realType) :: gm1, ovgm1, ac1, ac2
+        real(kind=realType) :: r0, u0, v0, w0, qn0, vn0, c0, s0
+        real(kind=realType) :: re, ue, ve, we, qne, ce
+        real(kind=realType) :: qnf, cf, uf, vf, wf, sf, cc, qq
 
-    ! Extrapolate the state vectors in case a second halo
-    ! is needed.
+        ! Some constants needed to compute the riemann inVariants.
 
-    if( secondHalo ) call extrapolate2ndHalo(correctForK)
+        gm1 = gammaInf - one
+        ovgm1 = one / gm1
 
-  end subroutine bcEulerWall
+        ! Compute the three velocity components, the speed of sound and
+        ! the entropy of the free stream.
 
-  subroutine bcFarfield(nn, secondHalo, correctForK)
+        r0 = one / wInf(irho)
+        u0 = wInf(ivx)
+        v0 = wInf(ivy)
+        w0 = wInf(ivz)
+        c0 = sqrt(gammaInf * pInfCorr * r0)
+        s0 = wInf(irho)**gammaInf / pInfCorr
 
-    ! bcFarfield applies the farfield boundary condition to a block.
-    ! It is assumed that the BCPointers are already set *
+        ! Loop over the generic subface to set the state in the
+        ! halo cells.
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
 
-    use constants
-    use blockPointers, only : BCData
-    use flowVarRefState, only : eddyModel, viscous, gammaInf, wInf, pInfCorr
-    use BCPointers, only : ww0, ww1, ww2, pp0, pp1, pp2, rlv0, rlv1, rlv2, &
-         rev0, rev1, rev2, gamma2, iStart, jStart, iSize, jSize
-    implicit none
+            ! Compute the normal velocity of the free stream and
+            ! substract the normal velocity of the mesh.
 
-    ! Subroutine arguments.
-    logical, intent(in) :: secondHalo, correctForK
-
-    ! Local variables.
-    integer(kind=intType) :: nn, i, j, k, l, ii
-
-    real(kind=realType) :: nnx, nny, nnz
-    real(kind=realType) :: gm1, ovgm1, ac1, ac2
-    real(kind=realType) :: r0, u0, v0, w0, qn0, vn0, c0, s0
-    real(kind=realType) :: re, ue, ve, we, qne, ce
-    real(kind=realType) :: qnf, cf, uf, vf, wf, sf, cc, qq
-
-    ! Some constants needed to compute the riemann inVariants.
-
-    gm1   = gammaInf -one
-    ovgm1 = one/gm1
+            qn0 = u0 * BCData(nn)%norm(i, j, 1) + v0 * BCData(nn)%norm(i, j, 2) + w0 * BCData(nn)%norm(i, j, 3)
+            vn0 = qn0 - BCData(nn)%rface(i, j)
 
-    ! Compute the three velocity components, the speed of sound and
-    ! the entropy of the free stream.
+            ! Compute the three velocity components, the normal
+            ! velocity and the speed of sound of the current state
+            ! in the internal cell.
 
-    r0  = one/wInf(irho)
-    u0  = wInf(ivx)
-    v0  = wInf(ivy)
-    w0  = wInf(ivz)
-    c0  = sqrt(gammaInf*pInfCorr*r0)
-    s0  = wInf(irho)**gammaInf/pInfCorr
-
-    ! Loop over the generic subface to set the state in the
-    ! halo cells.
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-       i = mod(ii, isize) + iStart
-       j = ii/isize + jStart
-
-       ! Compute the normal velocity of the free stream and
-       ! substract the normal velocity of the mesh.
-
-       qn0 = u0*BCData(nn)%norm(i,j,1) + v0*BCData(nn)%norm(i,j,2) + w0*BCData(nn)%norm(i,j,3)
-       vn0 = qn0 - BCData(nn)%rface(i,j)
-
-       ! Compute the three velocity components, the normal
-       ! velocity and the speed of sound of the current state
-       ! in the internal cell.
-
-       re  = one/ww2(i,j,irho)
-       ue  = ww2(i,j,ivx)
-       ve  = ww2(i,j,ivy)
-       we  = ww2(i,j,ivz)
-       qne = ue*BCData(nn)%norm(i,j,1) + ve*BCData(nn)%norm(i,j,2) + we*BCData(nn)%norm(i,j,3)
-       ce  = sqrt(gamma2(i,j)*pp2(i,j)*re)
-
-       ! Compute the new values of the riemann inVariants in
-       ! the halo cell. Either the value in the internal cell
-       ! is taken (positive sign of the corresponding
-       ! eigenvalue) or the free stream value is taken
-       ! (otherwise).
-
-       if(vn0 > -c0) then ! Outflow or subsonic inflow.
-          ac1 = qne + two*ovgm1*ce
-       else               ! Supersonic inflow.
-          ac1 = qn0 + two*ovgm1*c0
-       endif
-
-       if(vn0 > c0) then  ! Supersonic outflow.
-          ac2 = qne - two*ovgm1*ce
-       else                     ! Inflow or subsonic outflow.
-          ac2 = qn0 - two*ovgm1*c0
-       endif
-
-       qnf = half*  (ac1 + ac2)
-       cf  = fourth*(ac1 - ac2)*gm1
-
-       if(vn0 > zero) then ! Outflow.
-
-          uf = ue + (qnf - qne)*BCData(nn)%norm(i,j,1)
-          vf = ve + (qnf - qne)*BCData(nn)%norm(i,j,2)
-          wf = we + (qnf - qne)*BCData(nn)%norm(i,j,3)
-
-          sf = ww2(i,j,irho)**gamma2(i,j)/pp2(i,j)
-
-       else
-          ! Inflow
-          uf = u0 + (qnf - qn0)*BCData(nn)%norm(i,j,1)
-          vf = v0 + (qnf - qn0)*BCData(nn)%norm(i,j,2)
-          wf = w0 + (qnf - qn0)*BCData(nn)%norm(i,j,3)
-          sf = s0
-
-       endif
-
-       ! Compute the density, velocity and pressure in the
-       ! halo cell.
-
-       cc = cf*cf/gamma2(i,j)
-       qq = uf*uf + vf*vf + wf*wf
-       ww1(i,j,irho) = (sf*cc)**ovgm1
-       ww1(i,j,ivx)  = uf
-       ww1(i,j,ivy)  = vf
-       ww1(i,j,ivz)  = wf
-       pp1(i,j)      = ww1(i,j,irho)*cc
-
-       ! Simply set the laminar and eddy viscosity to
-       ! the value in the donor cell. Their values do
-       ! not matter too much in the far field.
-
-       if( viscous )    rlv1(i,j) = rlv2(i,j)
-       if( eddyModel )  rev1(i,j) = rev2(i,j)
-    enddo
-
-    ! Compute the energy for these halo's.
-    call computeEtot(ww1, pp1, correctForK)
-
-    ! Extrapolate the state vectors in case a second halo
-    ! is needed.
-    if( secondHalo ) call extrapolate2ndHalo(correctForK)
-
-  end subroutine bcFarfield
-
-  subroutine bcSupersonicInflow(nn, secondHalo, correctForK)
-
-    ! bcSupersonicInflow applies the supersonic inflow boundary
-    ! conditions, entire state vector is prescribed, to a block. It is
-    ! assumed that the pointers in blockPointers are already set to
-    ! the correct block on the correct grid level.
-
-    use constants
-    use blockPointers, only : BCData
-    use flowVarRefState, only : eddyModel, viscous
-    use BCPointers, only : ww0, ww1, pp0, pp1, rlv0, rlv1, rlv2, &
-         rev0, rev1, rev2, iStart, jStart, iSize, jSize
-    implicit none
+            re = one / ww2(i, j, irho)
+            ue = ww2(i, j, ivx)
+            ve = ww2(i, j, ivy)
+            we = ww2(i, j, ivz)
+            qne = ue * BCData(nn)%norm(i, j, 1) + ve * BCData(nn)%norm(i, j, 2) + we * BCData(nn)%norm(i, j, 3)
+            ce = sqrt(gamma2(i, j) * pp2(i, j) * re)
 
-    ! Subroutine arguments.
-    logical, intent(in) :: secondHalo, correctForK
-    integer(kind=intType), intent(in) :: nn
-
-    ! Local variables.
-    integer(kind=intType) :: i, j, l, kk, mm, ii
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    ! Loop over the generic subface to set the state in the
-    ! halo cells.
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-       i = mod(ii, isize) + iStart
-       j = ii/isize + jStart
-
-       ww1(i,j,irho) = BCData(nn)%rho(i,j)
-       ww1(i,j,ivx)  = BCData(nn)%velx(i,j)
-       ww1(i,j,ivy)  = BCData(nn)%vely(i,j)
-       ww1(i,j,ivz)  = BCData(nn)%velz(i,j)
-       pp1(i,j)      = BCData(nn)%ps(i,j)
-
-       ! Set the laminar and eddy viscosity in the halo
-       ! if needed.
-
-       if( viscous )   rlv1(i,j) = rlv2(i,j)
-       if( eddyModel ) rev1(i,j) = rev2(i,j)
-    end do
-
-    call computeEtot(ww1, pp1, correctForK)
-
-    if (secondHalo) then
-       !$AD II-LOOP
-       do ii=0,isize*jsize-1
-          i = mod(ii, isize) + iStart
-          j = ii/isize + jStart
-
-          ww0(i,j,irho) = BCData(nn)%rho(i,j)
-          ww0(i,j,ivx)  = BCData(nn)%velx(i,j)
-          ww0(i,j,ivy)  = BCData(nn)%vely(i,j)
-          ww0(i,j,ivz)  = BCData(nn)%velz(i,j)
-          pp0(i,j)      = BCData(nn)%ps(i,j)
-
-          ! Set the laminar and eddy viscosity in the halo
-          ! if needed.
-
-          if( viscous )   rlv0(i,j) = rlv1(i,j)
-          if( eddyModel ) rev0(i,j) = rev1(i,j)
-       end do
-
-       call computeEtot(ww0, pp0, correctForK)
-    end if
-
-  end subroutine bcSupersonicInflow
-
-  subroutine bcExtrap(nn, secondHalo, correctForK)
-    !
-    !  ccExtrap applies the extrapolation boundary condition to a
-    !  block. It is assumed that the pointers in blockPointers are
-    !  already set to the correct block on the correct grid level.
-    !  Extrapolation boundaries are applied to both singular lines or
-    !  points of a block face and to supersonic outlets. They are
-    !  marked differently because of postprocessing reasons, but
-    !  their numerical treatment is identical.
-    !
-    use constants
-    use blockPointers, only : BCType
-    use flowVarRefState, only : viscous, eddyModel
-    use inputDiscretization, onlY : outflowTreatment
-    !use inputPhysics
-    use BCPointers, only : ww1, ww2, ww3, pp1, pp2, pp3, &
-         rlv1, rlv2, rev1, rev2, iStart, jStart, iSize, jSize
-    implicit none
+            ! Compute the new values of the riemann inVariants in
+            ! the halo cell. Either the value in the internal cell
+            ! is taken (positive sign of the corresponding
+            ! eigenvalue) or the free stream value is taken
+            ! (otherwise).
 
-    ! Subroutine arguments.
-    logical, intent(in) :: secondHalo, correctForK
-    integer(kind=intType), intent(in) :: nn
+            if (vn0 > -c0) then ! Outflow or subsonic inflow.
+                ac1 = qne + two * ovgm1 * ce
+            else               ! Supersonic inflow.
+                ac1 = qn0 + two * ovgm1 * c0
+            end if
 
-    ! Local parameter.
-    real(kind=realType), parameter :: factor = 0.5
+            if (vn0 > c0) then  ! Supersonic outflow.
+                ac2 = qne - two * ovgm1 * ce
+            else                     ! Inflow or subsonic outflow.
+                ac2 = qn0 - two * ovgm1 * c0
+            end if
 
-    ! Local variables.
-    integer(kind=intType) :: i, j, l, ii
-    real(kind=realType) :: fw2, fw3
+            qnf = half * (ac1 + ac2)
+            cf = fourth * (ac1 - ac2) * gm1
 
-    ! Set the extrapolation weights, depending on the situation.
+            if (vn0 > zero) then ! Outflow.
 
-    if(BCType(nn) == SupersonicOutflow) then
+                uf = ue + (qnf - qne) * BCData(nn)%norm(i, j, 1)
+                vf = ve + (qnf - qne) * BCData(nn)%norm(i, j, 2)
+                wf = we + (qnf - qne) * BCData(nn)%norm(i, j, 3)
 
-       ! A physical outflow face. Set the weights depending
-       ! on the input parameter.
+                sf = ww2(i, j, irho)**gamma2(i, j) / pp2(i, j)
 
-       select case (outflowTreatment)
-       case (constantExtrapol)
-          fw2 = one; fw3 = zero
-       case (linExtrapol)
-          fw2 = two; fw3 = -one
-       end select
+            else
+                ! Inflow
+                uf = u0 + (qnf - qn0) * BCData(nn)%norm(i, j, 1)
+                vf = v0 + (qnf - qn0) * BCData(nn)%norm(i, j, 2)
+                wf = w0 + (qnf - qn0) * BCData(nn)%norm(i, j, 3)
+                sf = s0
+
+            end if
+
+            ! Compute the density, velocity and pressure in the
+            ! halo cell.
+
+            cc = cf * cf / gamma2(i, j)
+            qq = uf * uf + vf * vf + wf * wf
+            ww1(i, j, irho) = (sf * cc)**ovgm1
+            ww1(i, j, ivx) = uf
+            ww1(i, j, ivy) = vf
+            ww1(i, j, ivz) = wf
+            pp1(i, j) = ww1(i, j, irho) * cc
+
+            ! Simply set the laminar and eddy viscosity to
+            ! the value in the donor cell. Their values do
+            ! not matter too much in the far field.
+
+            if (viscous) rlv1(i, j) = rlv2(i, j)
+            if (eddyModel) rev1(i, j) = rev2(i, j)
+        end do
+
+        ! Compute the energy for these halo's.
+        call computeEtot(ww1, pp1, correctForK)
+
+        ! Extrapolate the state vectors in case a second halo
+        ! is needed.
+        if (secondHalo) call extrapolate2ndHalo(correctForK)
+
+    end subroutine bcFarfield
+
+    subroutine bcSupersonicInflow(nn, secondHalo, correctForK)
+
+        ! bcSupersonicInflow applies the supersonic inflow boundary
+        ! conditions, entire state vector is prescribed, to a block. It is
+        ! assumed that the pointers in blockPointers are already set to
+        ! the correct block on the correct grid level.
+
+        use constants
+        use blockPointers, only: BCData
+        use flowVarRefState, only: eddyModel, viscous
+        use BCPointers, only: ww0, ww1, pp0, pp1, rlv0, rlv1, rlv2, &
+                              rev0, rev1, rev2, iStart, jStart, iSize, jSize
+        implicit none
+
+        ! Subroutine arguments.
+        logical, intent(in) :: secondHalo, correctForK
+        integer(kind=intType), intent(in) :: nn
+
+        ! Local variables.
+        integer(kind=intType) :: i, j, l, kk, mm, ii
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        ! Loop over the generic subface to set the state in the
+        ! halo cells.
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
+
+            ww1(i, j, irho) = BCData(nn)%rho(i, j)
+            ww1(i, j, ivx) = BCData(nn)%velx(i, j)
+            ww1(i, j, ivy) = BCData(nn)%vely(i, j)
+            ww1(i, j, ivz) = BCData(nn)%velz(i, j)
+            pp1(i, j) = BCData(nn)%ps(i, j)
+
+            ! Set the laminar and eddy viscosity in the halo
+            ! if needed.
+
+            if (viscous) rlv1(i, j) = rlv2(i, j)
+            if (eddyModel) rev1(i, j) = rev2(i, j)
+        end do
+
+        call computeEtot(ww1, pp1, correctForK)
+
+        if (secondHalo) then
+            !$AD II-LOOP
+            do ii = 0, isize * jsize - 1
+                i = mod(ii, isize) + iStart
+                j = ii / isize + jStart
+
+                ww0(i, j, irho) = BCData(nn)%rho(i, j)
+                ww0(i, j, ivx) = BCData(nn)%velx(i, j)
+                ww0(i, j, ivy) = BCData(nn)%vely(i, j)
+                ww0(i, j, ivz) = BCData(nn)%velz(i, j)
+                pp0(i, j) = BCData(nn)%ps(i, j)
+
+                ! Set the laminar and eddy viscosity in the halo
+                ! if needed.
 
-    else
+                if (viscous) rlv0(i, j) = rlv1(i, j)
+                if (eddyModel) rev0(i, j) = rev1(i, j)
+            end do
 
-       ! Singular block boundary. Use linear extrapolation.
+            call computeEtot(ww0, pp0, correctForK)
+        end if
 
-       fw2 = two; fw3 = -one
+    end subroutine bcSupersonicInflow
 
-    endif
+    subroutine bcExtrap(nn, secondHalo, correctForK)
+        !
+        !  ccExtrap applies the extrapolation boundary condition to a
+        !  block. It is assumed that the pointers in blockPointers are
+        !  already set to the correct block on the correct grid level.
+        !  Extrapolation boundaries are applied to both singular lines or
+        !  points of a block face and to supersonic outlets. They are
+        !  marked differently because of postprocessing reasons, but
+        !  their numerical treatment is identical.
+        !
+        use constants
+        use blockPointers, only: BCType
+        use flowVarRefState, only: viscous, eddyModel
+        use inputDiscretization, onlY: outflowTreatment
+        !use inputPhysics
+        use BCPointers, only: ww1, ww2, ww3, pp1, pp2, pp3, &
+                              rlv1, rlv2, rev1, rev2, iStart, jStart, iSize, jSize
+        implicit none
 
-    ! Loop over the generic subface to set the state in the
-    ! 1-st level halos
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-       i = mod(ii, isize) + iStart
-       j = ii/isize + jStart
+        ! Subroutine arguments.
+        logical, intent(in) :: secondHalo, correctForK
+        integer(kind=intType), intent(in) :: nn
 
-       ! Extrapolate the density, velocities and pressure.
-       ! Make sure that a certain threshold is kept for the
-       ! density and pressure.
+        ! Local parameter.
+        real(kind=realType), parameter :: factor = 0.5
 
-       ww1(i,j,irho) = fw2*ww2(i,j,irho) + fw3*ww3(i,j,irho)
-       ww1(i,j,irho) = max(factor*ww2(i,j,irho), ww1(i,j,irho))
+        ! Local variables.
+        integer(kind=intType) :: i, j, l, ii
+        real(kind=realType) :: fw2, fw3
 
-       ww1(i,j,ivx) = fw2*ww2(i,j,ivx) + fw3*ww3(i,j,ivx)
-       ww1(i,j,ivy) = fw2*ww2(i,j,ivy) + fw3*ww3(i,j,ivy)
-       ww1(i,j,ivz) = fw2*ww2(i,j,ivz) + fw3*ww3(i,j,ivz)
+        ! Set the extrapolation weights, depending on the situation.
 
-       pp1(i,j) = fw2*pp2(i,j) + fw3*pp3(i,j)
-       pp1(i,j) = max(factor*pp2(i,j), pp1(i,j))
+        if (BCType(nn) == SupersonicOutflow) then
 
-       ! The laminar and eddy viscosity, if present. These
-       ! values are simply taken constant. Their values do
-       ! not really matter.
+            ! A physical outflow face. Set the weights depending
+            ! on the input parameter.
 
-       if( viscous )   rlv1(i,j) = rlv2(i,j)
-       if( eddyModel ) rev1(i,j) = rev2(i,j)
+            select case (outflowTreatment)
+            case (constantExtrapol)
+                fw2 = one; fw3 = zero
+            case (linExtrapol)
+                fw2 = two; fw3 = -one
+            end select
 
-    enddo
+        else
 
-    ! Compute the energy for these halo's.
+            ! Singular block boundary. Use linear extrapolation.
 
-    call computeEtot(ww1, pp1, correctForK)
+            fw2 = two; fw3 = -one
 
-    ! Extrapolate the state vectors in case a second halo
-    ! is needed.
+        end if
 
-    if( secondHalo ) call extrapolate2ndHalo(correctForK)
+        ! Loop over the generic subface to set the state in the
+        ! 1-st level halos
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
 
-  end subroutine bcExtrap
+            ! Extrapolate the density, velocities and pressure.
+            ! Make sure that a certain threshold is kept for the
+            ! density and pressure.
 
-  subroutine pRhoSubsonicInlet(ww, pp, correctForK)
+            ww1(i, j, irho) = fw2 * ww2(i, j, irho) + fw3 * ww3(i, j, irho)
+            ww1(i, j, irho) = max(factor * ww2(i, j, irho), ww1(i, j, irho))
 
-    !  pRhoSubsonicInlet computes the pressure and density for the
-    !  given range of the block to which the pointers in blockPointers
-    !  currently point.
+            ww1(i, j, ivx) = fw2 * ww2(i, j, ivx) + fw3 * ww3(i, j, ivx)
+            ww1(i, j, ivy) = fw2 * ww2(i, j, ivy) + fw3 * ww3(i, j, ivy)
+            ww1(i, j, ivz) = fw2 * ww2(i, j, ivz) + fw3 * ww3(i, j, ivz)
 
-    use constants
-    use cpCurveFits
-    use flowVarRefState, only : RGas, Tref
-    use inputPhysics, only : cpModel, gammaConstant
-    use BCPointers, only : iSize, jSize, iStart, jStart
-    implicit none
+            pp1(i, j) = fw2 * pp2(i, j) + fw3 * pp3(i, j)
+            pp1(i, j) = max(factor * pp2(i, j), pp1(i, j))
+
+            ! The laminar and eddy viscosity, if present. These
+            ! values are simply taken constant. Their values do
+            ! not really matter.
+
+            if (viscous) rlv1(i, j) = rlv2(i, j)
+            if (eddyModel) rev1(i, j) = rev2(i, j)
+
+        end do
+
+        ! Compute the energy for these halo's.
 
-    ! Local parameter.
-    real(kind=realType), parameter :: twoThird = two*third
+        call computeEtot(ww1, pp1, correctForK)
 
-    ! Subroutine arguments.
-    real(kind=realType), dimension(:,:,:) :: ww
-    real(kind=realType), dimension(:,:) :: pp
-    logical, intent(in)               :: correctForK
+        ! Extrapolate the state vectors in case a second halo
+        ! is needed.
 
-    ! Local variables.
-    integer(kind=intType) :: i, j, ii, mm, nns, nnt, iii
-    real(kind=realType)   :: govgm1, tt, ts, pt, ratio
-    real(kind=realType)   :: intTs, intTt, val
+        if (secondHalo) call extrapolate2ndHalo(correctForK)
 
-    ! Determine the cp model used in the computation.
-    select case (cpModel)
+    end subroutine bcExtrap
 
-    case (cpConstant)
+    subroutine pRhoSubsonicInlet(ww, pp, correctForK)
 
-       ! Constant cp and thus constant gamma. Compute the coefficient
-       ! gamma/(gamma-1), which occurs in the isentropic expression
-       ! for the total pressure.
+        !  pRhoSubsonicInlet computes the pressure and density for the
+        !  given range of the block to which the pointers in blockPointers
+        !  currently point.
 
-       govgm1 = gammaConstant/(gammaConstant - one)
+        use constants
+        use cpCurveFits
+        use flowVarRefState, only: RGas, Tref
+        use inputPhysics, only: cpModel, gammaConstant
+        use BCPointers, only: iSize, jSize, iStart, jStart
+        implicit none
 
-       ! Loop over the pointer range
-       !$AD II-LOOP
-       do ii=0,isize*jsize-1
-          i = mod(ii, isize) + iStart
-          j = ii/isize + jStart
+        ! Local parameter.
+        real(kind=realType), parameter :: twoThird = two * third
 
-          ! Store the total temperature, total pressure and
-          ! static temperature a bit easier.
+        ! Subroutine arguments.
+        real(kind=realType), dimension(:, :, :) :: ww
+        real(kind=realType), dimension(:, :) :: pp
+        logical, intent(in) :: correctForK
 
-          tt = ww(i,j,irho)
-          pt = pp(i,j)
-          ts = ww(i,j,irhoE)
+        ! Local variables.
+        integer(kind=intType) :: i, j, ii, mm, nns, nnt, iii
+        real(kind=realType) :: govgm1, tt, ts, pt, ratio
+        real(kind=realType) :: intTs, intTt, val
 
-          ! Compute the static pressure from the total pressure
-          ! and the temperature ratio. Compute the density using
-          ! the gas law.
+        ! Determine the cp model used in the computation.
+        select case (cpModel)
 
-          ratio        = (ts/tt)**govgm1
-          pp(i,j)      = pt*ratio
-          ww(i,j,irho) = pp(i,j)/(RGas*ts)
+        case (cpConstant)
 
-       enddo
+            ! Constant cp and thus constant gamma. Compute the coefficient
+            ! gamma/(gamma-1), which occurs in the isentropic expression
+            ! for the total pressure.
+
+            govgm1 = gammaConstant / (gammaConstant - one)
+
+            ! Loop over the pointer range
+            !$AD II-LOOP
+            do ii = 0, isize * jsize - 1
+                i = mod(ii, isize) + iStart
+                j = ii / isize + jStart
+
+                ! Store the total temperature, total pressure and
+                ! static temperature a bit easier.
+
+                tt = ww(i, j, irho)
+                pt = pp(i, j)
+                ts = ww(i, j, irhoE)
+
+                ! Compute the static pressure from the total pressure
+                ! and the temperature ratio. Compute the density using
+                ! the gas law.
+
+                ratio = (ts / tt)**govgm1
+                pp(i, j) = pt * ratio
+                ww(i, j, irho) = pp(i, j) / (RGas * ts)
+
+            end do
 
 #ifndef USE_TAPENADE
-    case (cpTempCurveFits)
+        case (cpTempCurveFits)
 
-       ! Cp as function of the temperature is given via curve fits.
-       ! The ratio pt/ps is given by exp(a), where a is the integral
-       ! from ts to tt of cp/(r*t).
+            ! Cp as function of the temperature is given via curve fits.
+            ! The ratio pt/ps is given by exp(a), where a is the integral
+            ! from ts to tt of cp/(r*t).
 
-       ! Loop over the pointer range
-       !$AD II-LOOP
-       do iii=0,isize*jsize-1
-          i = mod(iii, isize) + 1
-          j = iii/isize + 1
+            ! Loop over the pointer range
+            !$AD II-LOOP
+            do iii = 0, isize * jsize - 1
+                i = mod(iii, isize) + 1
+                j = iii / isize + 1
 
-          ! Store the total temperature, total pressure and
-          ! static temperature a bit easier. Note that the
-          ! temperatures get their dimensional value.
+                ! Store the total temperature, total pressure and
+                ! static temperature a bit easier. Note that the
+                ! temperatures get their dimensional value.
 
-          tt = Tref*ww(i,j,irho)
-          pt = pp(i,j)
-          ts = Tref*ww(i,j,irhoE)
+                tt = Tref * ww(i, j, irho)
+                pt = pp(i, j)
+                ts = Tref * ww(i, j, irhoE)
 
-          ! Determine the integrant of cp/(r*t) for the static
-          ! temperature ts and the total temperature tt.
+                ! Determine the integrant of cp/(r*t) for the static
+                ! temperature ts and the total temperature tt.
 
-          call cportIntegrant(ts, nns, intTs)
-          call cportIntegrant(tt, nnt, intTt)
+                call cportIntegrant(ts, nns, intTs)
+                call cportIntegrant(tt, nnt, intTt)
 
-          ! Compute the value of the integral of cp/(r*t) from
-          ! ts to tt. First part is the initialization where it
-          ! is assumed that both ts and tt lie in the same
-          ! interval of the curve fits.
+                ! Compute the value of the integral of cp/(r*t) from
+                ! ts to tt. First part is the initialization where it
+                ! is assumed that both ts and tt lie in the same
+                ! interval of the curve fits.
 
-          val = intTt - intTs
+                val = intTt - intTs
 
-          ! Correct this value if ts and tt belong to different
-          ! curve fit intervals.
+                ! Correct this value if ts and tt belong to different
+                ! curve fit intervals.
 
-          do mm=(nns+1),nnt
+                do mm = (nns + 1), nnt
 
-             ! The contribution from the interval mm-1. Add the
-             ! value of the integrant at the upper boundary.
+                    ! The contribution from the interval mm-1. Add the
+                    ! value of the integrant at the upper boundary.
 
-             ii = mm - 1
-             if(ii == 0_intType) then
-                val = val + (cv0+one)*log(cpTrange(0))
-             else
-                val = val + cpTempFit(ii)%intCpovrt_2
-             endif
+                    ii = mm - 1
+                    if (ii == 0_intType) then
+                        val = val + (cv0 + one) * log(cpTrange(0))
+                    else
+                        val = val + cpTempFit(ii)%intCpovrt_2
+                    end if
 
-             ! The contribution from the interval mm. Substract
-             ! the value of integrant at the lower boundary.
+                    ! The contribution from the interval mm. Substract
+                    ! the value of integrant at the lower boundary.
 
-             if(mm > cpNparts) then
-                val = val - (cvn+one)*log(cpTrange(cpNparts))
-             else
-                val = val - cpTempFit(mm)%intCpovrt_1
-             endif
+                    if (mm > cpNparts) then
+                        val = val - (cvn + one) * log(cpTrange(cpNparts))
+                    else
+                        val = val - cpTempFit(mm)%intCpovrt_1
+                    end if
 
-          enddo
+                end do
 
-          ! Compute the static pressure from the known
-          ! total pressure.
+                ! Compute the static pressure from the known
+                ! total pressure.
 
-          ratio   = exp(val)
-          pp(i,j) = pt/ratio
+                ratio = exp(val)
+                pp(i, j) = pt / ratio
 
-          ! Compute the density using the gas law.
+                ! Compute the density using the gas law.
 
-          ts = ww(i,j,irhoE)
-          ww(i,j,irho) = pp(i,j)/(RGas*ts)
+                ts = ww(i, j, irhoE)
+                ww(i, j, irho) = pp(i, j) / (RGas * ts)
 
-       enddo
+            end do
 #endif
-    end select
-
-    ! Add 2*rho*k/3 to the pressure if a k-equation is present.
-
-    if( correctForK ) then
-       !$AD II-LOOP
-       do ii=0,isize*jsize-1
-          i = mod(ii, isize) + iStart
-          j = ii/isize + jStart
-          pp(i,j) = pp(i,j) &
-               + twoThird*ww(i,j,irho)*ww(i,j,itu1)
-       enddo
-    end if
+        end select
+
+        ! Add 2*rho*k/3 to the pressure if a k-equation is present.
+
+        if (correctForK) then
+            !$AD II-LOOP
+            do ii = 0, isize * jsize - 1
+                i = mod(ii, isize) + iStart
+                j = ii / isize + jStart
+                pp(i, j) = pp(i, j) &
+                           + twoThird * ww(i, j, irho) * ww(i, j, itu1)
+            end do
+        end if
 #ifndef USE_TAPENADAE
-  contains
-    subroutine cportIntegrant(T, nn, int)
+    contains
+        subroutine cportIntegrant(T, nn, int)
 
-      ! cportIntegrant computes the integrant of the function cp/(r*t)
-      ! for the given temperature. It also stores the correct curve
-      ! fit interval, which is needed to determine the entire integral
-      ! in the main subroutine.
+            ! cportIntegrant computes the integrant of the function cp/(r*t)
+            ! for the given temperature. It also stores the correct curve
+            ! fit interval, which is needed to determine the entire integral
+            ! in the main subroutine.
 
-      implicit none
+            implicit none
 
-      ! Subroutine arguments.
-      integer(kind=intType), intent(out) :: nn
-      real(kind=realType),   intent(in)  :: t
-      real(kind=realType),   intent(out) :: int
+            ! Subroutine arguments.
+            integer(kind=intType), intent(out) :: nn
+            real(kind=realType), intent(in) :: t
+            real(kind=realType), intent(out) :: int
 
-      ! Local variables.
-      integer(kind=intType) :: mm, ii, start
-      real(kind=realType)   :: T2
+            ! Local variables.
+            integer(kind=intType) :: mm, ii, start
+            real(kind=realType) :: T2
 
-      ! Determine the situation we are having here for the temperature.
+            ! Determine the situation we are having here for the temperature.
 
-      if(T <= cpTrange(0)) then
+            if (T <= cpTrange(0)) then
 
-         ! Temperature is less than the smallest temperature of the
-         ! curve fits. Use extrapolation using constant cp.
-         ! Set nn to 0 to indicate this.
+                ! Temperature is less than the smallest temperature of the
+                ! curve fits. Use extrapolation using constant cp.
+                ! Set nn to 0 to indicate this.
 
-         nn  = 0
-         int = (cv0+one)*log(T)
+                nn = 0
+                int = (cv0 + one) * log(T)
 
-      else if(T >= cpTrange(cpNparts)) then
+            else if (T >= cpTrange(cpNparts)) then
 
-         ! Temperature is larger than the largest temperature of the
-         ! curve fits. Use extrapolation using constant cp.
-         ! Set nn to cpNparts+1 to indicate this.
+                ! Temperature is larger than the largest temperature of the
+                ! curve fits. Use extrapolation using constant cp.
+                ! Set nn to cpNparts+1 to indicate this.
 
-         nn  = cpNparts + 1
-         int = (cvn+one)*log(T)
+                nn = cpNparts + 1
+                int = (cvn + one) * log(T)
 
-      else
+            else
 
-         ! Temperature is within the curve fit range. Determine
-         ! the correct interval.
+                ! Temperature is within the curve fit range. Determine
+                ! the correct interval.
 
-         ii    = cpNparts
-         start = 1
-         interval: do
+                ii = cpNparts
+                start = 1
+                interval: do
 
-            ! Next guess for the interval.
+                    ! Next guess for the interval.
 
-            nn = start + ii/2
+                    nn = start + ii / 2
 
-            ! Determine the situation we are having here.
+                    ! Determine the situation we are having here.
 
-            if(T > cpTrange(nn)) then
+                    if (T > cpTrange(nn)) then
 
-               ! Temperature is larger than the upper boundary of
-               ! the current interval. Update the lower boundary.
+                        ! Temperature is larger than the upper boundary of
+                        ! the current interval. Update the lower boundary.
 
-               start = nn + 1
-               ii    = ii - 1
+                        start = nn + 1
+                        ii = ii - 1
 
-            else if(T >= cpTrange(nn-1)) then
+                    else if (T >= cpTrange(nn - 1)) then
 
-               ! This is the correct range. Exit the do-loop.
+                        ! This is the correct range. Exit the do-loop.
 
-               exit
+                        exit
 
-            endif
+                    end if
 
-            ! Modify ii for the next branch to search.
+                    ! Modify ii for the next branch to search.
 
-            ii = ii/2
+                    ii = ii / 2
 
-         enddo interval
+                end do interval
 
-         ! Nn contains the correct curve fit interval.
-         ! Compute the value of the integrant.
+                ! Nn contains the correct curve fit interval.
+                ! Compute the value of the integrant.
 
-         int = zero
-         do ii=1,cpTempFit(nn)%nterm
+                int = zero
+                do ii = 1, cpTempFit(nn)%nterm
 
-            mm = cpTempFit(nn)%exponents(ii)
-            if(mm == 0_intType) then
-               int = int + cpTempFit(nn)%constants(ii)*log(T)
-            else
-               T2  = T**mm
-               int = int + cpTempFit(nn)%constants(ii)*T2/mm
-            endif
+                    mm = cpTempFit(nn)%exponents(ii)
+                    if (mm == 0_intType) then
+                        int = int + cpTempFit(nn)%constants(ii) * log(T)
+                    else
+                        T2 = T**mm
+                        int = int + cpTempFit(nn)%constants(ii) * T2 / mm
+                    end if
 
-         enddo
+                end do
 
-      endif
-    end subroutine cportIntegrant
+            end if
+        end subroutine cportIntegrant
 #endif
-  end subroutine pRhoSubsonicInlet
+    end subroutine pRhoSubsonicInlet
 
-  subroutine computeEtot(ww, pp, correctForK)
+    subroutine computeEtot(ww, pp, correctForK)
 
-    ! Simplified total energy computation for boundary conditions.
-    ! Only implements the constant cpModel
+        ! Simplified total energy computation for boundary conditions.
+        ! Only implements the constant cpModel
 
-    use constants
-    use inputPhysics, only : gammaConstant, cpModel
-    use utils, only :terminate
-    use BCPointers, only : iSize, jSize, iStart, jStart
-    implicit none
+        use constants
+        use inputPhysics, only: gammaConstant, cpModel
+        use utils, only: terminate
+        use BCPointers, only: iSize, jSize, iStart, jStart
+        implicit none
 
-    real(kind=realType),  dimension(:,:) :: pp
-    real(kind=realType),  dimension(:,:,:) :: ww
-    logical :: correctForK
-    integer(kind=intType) :: ii, i, j
-    real(kind=realType) :: ovgm1, factK
-
-    select case (cpModel)
-
-    case (cpConstant)
-
-       ! Constant cp and thus constant gamma.
-       ! Abbreviate 1/(gamma -1) a bit easier.
-
-       ovgm1 = one/(gammaConstant - one)
-       factK = ovgm1*(five*third - gammaConstant)
-
-       ! Loop over the given array and compute the energy, possibly
-       ! correcting for K
-       !$AD II-LOOP
-       do ii=0,isize*jsize-1
-          i = mod(ii, isize) + iStart
-          j = ii/isize + jStart
-          if( .not. correctForK ) then
-             ww(i,j,iRhoE) = ovgm1*pp(i,j) &
-                  + half*ww(i,j,irho)*(ww(i,j,ivx)**2 &
-                  +                    ww(i,j,ivy)**2 &
-                  +                    ww(i,j,ivz)**2)
-
-          else
-             ww(i,j, iRhoE) = ovgm1*pp(i,j) &
-                  + half*ww(i,j,irho)*(ww(i,j,ivx)**2 &
-                  +                    ww(i,j,ivy)**2 &
-                  +                    ww(i,j,ivz)**2) &
-                  - factK*ww(i,j,irho)*ww(i,j,itu1)
-          end if
-       enddo
-
-    case (cpTempCurveFits)
-
-       call terminate("BCRoutines", "CPTempCurveFits not implemented yet.")
-    end select
-  end subroutine computeEtot
-
-  subroutine extrapolate2ndHalo(correctForK)
-
-    ! extrapolate2ndHalo determines the states of the second layer
-    ! halo cells for the given subface of the block. It is assumed
-    ! that the appropriate BCPointers are already set
-
-    use constants
-    use BCPointers, only : ww0, ww1, ww2, pp0, pp1, pp2, &
-         rlv0, rlv1, rlv2, rev0, rev1, rev2, iSize, jSize, iStart, jStart
-    use flowVarRefState, only : viscous, eddyModel
-    implicit none
+        real(kind=realType), dimension(:, :) :: pp
+        real(kind=realType), dimension(:, :, :) :: ww
+        logical :: correctForK
+        integer(kind=intType) :: ii, i, j
+        real(kind=realType) :: ovgm1, factK
+
+        select case (cpModel)
+
+        case (cpConstant)
+
+            ! Constant cp and thus constant gamma.
+            ! Abbreviate 1/(gamma -1) a bit easier.
+
+            ovgm1 = one / (gammaConstant - one)
+            factK = ovgm1 * (five * third - gammaConstant)
+
+            ! Loop over the given array and compute the energy, possibly
+            ! correcting for K
+            !$AD II-LOOP
+            do ii = 0, isize * jsize - 1
+                i = mod(ii, isize) + iStart
+                j = ii / isize + jStart
+                if (.not. correctForK) then
+                    ww(i, j, iRhoE) = ovgm1 * pp(i, j) &
+                                      + half * ww(i, j, irho) * (ww(i, j, ivx)**2 &
+                                                                 + ww(i, j, ivy)**2 &
+                                                                 + ww(i, j, ivz)**2)
+
+                else
+                    ww(i, j, iRhoE) = ovgm1 * pp(i, j) &
+                                      + half * ww(i, j, irho) * (ww(i, j, ivx)**2 &
+                                                                 + ww(i, j, ivy)**2 &
+                                                                 + ww(i, j, ivz)**2) &
+                                      - factK * ww(i, j, irho) * ww(i, j, itu1)
+                end if
+            end do
+
+        case (cpTempCurveFits)
+
+            call terminate("BCRoutines", "CPTempCurveFits not implemented yet.")
+        end select
+    end subroutine computeEtot
+
+    subroutine extrapolate2ndHalo(correctForK)
+
+        ! extrapolate2ndHalo determines the states of the second layer
+        ! halo cells for the given subface of the block. It is assumed
+        ! that the appropriate BCPointers are already set
+
+        use constants
+        use BCPointers, only: ww0, ww1, ww2, pp0, pp1, pp2, &
+                              rlv0, rlv1, rlv2, rev0, rev1, rev2, iSize, jSize, iStart, jStart
+        use flowVarRefState, only: viscous, eddyModel
+        implicit none
 
-    ! Input variables
-    logical, intent(in) :: correctForK
+        ! Input variables
+        logical, intent(in) :: correctForK
 
-    ! Working variables
-    real(kind=realType), parameter :: factor = 0.5_realType
-    integer(kind=intType) :: i, j, l, ii
+        ! Working variables
+        real(kind=realType), parameter :: factor = 0.5_realType
+        integer(kind=intType) :: i, j, l, ii
 
-    ! Loop over the generic subface to set the state in the
-    ! halo cells.
-    !$AD II-LOOP
-    do ii=0,isize*jsize-1
-       i = mod(ii, isize) + iStart
-       j = ii/isize + jStart
+        ! Loop over the generic subface to set the state in the
+        ! halo cells.
+        !$AD II-LOOP
+        do ii = 0, isize * jsize - 1
+            i = mod(ii, isize) + iStart
+            j = ii / isize + jStart
 
-       ! Extrapolate the density, momentum and pressure.
-       ! Make sure that a certain threshold is kept.
+            ! Extrapolate the density, momentum and pressure.
+            ! Make sure that a certain threshold is kept.
 
-       ww0(i,j,irho) = two*ww1(i,j,irho) - ww2(i,j,irho)
-       ww0(i,j,irho) = max(factor*ww1(i,j,irho),ww0(i,j,irho))
+            ww0(i, j, irho) = two * ww1(i, j, irho) - ww2(i, j, irho)
+            ww0(i, j, irho) = max(factor * ww1(i, j, irho), ww0(i, j, irho))
 
-       ww0(i,j,ivx) = two*ww1(i,j,ivx) - ww2(i,j,ivx)
-       ww0(i,j,ivy) = two*ww1(i,j,ivy) - ww2(i,j,ivy)
-       ww0(i,j,ivz) = two*ww1(i,j,ivz) - ww2(i,j,ivz)
+            ww0(i, j, ivx) = two * ww1(i, j, ivx) - ww2(i, j, ivx)
+            ww0(i, j, ivy) = two * ww1(i, j, ivy) - ww2(i, j, ivy)
+            ww0(i, j, ivz) = two * ww1(i, j, ivz) - ww2(i, j, ivz)
 
-       pp0(i,j) = max(factor*pp1(i,j),two*pp1(i,j) - pp2(i,j))
+            pp0(i, j) = max(factor * pp1(i, j), two * pp1(i, j) - pp2(i, j))
 
-       ! The laminar and eddy viscosity, if present. These values
-       ! are simply taken constant. Their values do not matter.
+            ! The laminar and eddy viscosity, if present. These values
+            ! are simply taken constant. Their values do not matter.
 
-       if( viscous )   rlv0(i,j) = rlv1(i,j)
-       if( eddyModel ) rev0(i,j) = rev1(i,j)
-    enddo
+            if (viscous) rlv0(i, j) = rlv1(i, j)
+            if (eddyModel) rev0(i, j) = rev1(i, j)
+        end do
 
-    ! Compute the energy for this halo range.
-    call computeEtot(ww0, pp0, correctForK)
+        ! Compute the energy for this halo range.
+        call computeEtot(ww0, pp0, correctForK)
 
-  end subroutine extrapolate2ndHalo
+    end subroutine extrapolate2ndHalo
 
 end module BCRoutines
diff --git a/src/solver/actuatorRegion.F90 b/src/solver/actuatorRegion.F90
index 5f41366bd..ca09cc122 100644
--- a/src/solver/actuatorRegion.F90
+++ b/src/solver/actuatorRegion.F90
@@ -1,576 +1,576 @@
 module actuatorRegion
 
-  use constants
-  use communication, only : commType, internalCommType
-  use actuatorRegionData
-  implicit none
-
-contains
-  subroutine addActuatorRegion(pts, conn, axis1, axis2, famName, famID, &
-       thrust, torque, heat, relaxStart, relaxEnd, nPts, nConn)
-    ! Add a user-supplied integration surface.
-
-    use communication, only : myID, adflow_comm_world
-    use constants
-    use adtBuild, only : buildSerialQuad, destroySerialQuad
-    use adtLocalSearch, only : minDistanceTreeSearchSinglePoint
-    use ADTUtils, only : stack
-    use ADTData
-    use blockPointers, only : x, il, jl, kl, nDom, iBlank, vol
-    use adjointVars, only : nCellsLocal
-    use utils, only : setPointers, EChk
-    implicit none
-
-    ! Input variables
-    integer(kind=intType), intent(in) :: nPts, nConn, famID
-    real(kind=realType), dimension(3, nPts), intent(in), target :: pts
-    integer(kind=intType), dimension(4, nConn), intent(in), target :: conn
-    real(kind=realType), intent(in), dimension(3) :: axis1, axis2
-    character(len=*) :: famName
-    real(kind=realType) :: thrust, torque, heat, relaxStart, relaxEnd
-
-    ! Working variables
-    integer(kind=intType) :: i, j, k, nn, iDim, cellID, intInfo(3), sps, level, iii, ierr
-    real(kind=realType) :: dStar, frac, volLocal
-    type(actuatorRegionType), pointer :: region
-    real(kind=realType), dimension(3) :: minX, maxX, v1, v2, v3, xCen, axisVec
-    type(adtType) :: ADT
-    real(kind=realType) :: axisVecNorm
-    real(kind=realType), dimension(:, :), allocatable :: norm
-    integer(kind=intType), dimension(:), allocatable :: normCount
-    integer(kind=intType), dimension(:, :), pointer :: tmp
-
-    ! ADT Type required data
-    integer(kind=intType), dimension(:), pointer :: frontLeaves, frontLeavesNew
-    type(adtBBoxTargetType), dimension(:), pointer :: BB
-    real(kind=realType) :: coor(4), uvw(5)
-    real(kind=realType) :: dummy(3, 2)
-
-    nActuatorRegions = nActuatorRegions + 1
-    if (nActuatorRegions > nActuatorRegionsMax) then
-       print *,"Error: Exceeded the maximum number of actuatorDiskRegions.  "&
-            &"Increase nActuatorDiskRegionsMax"
-       stop
-    end if
-
-    ! Save the input information
-    region => actuatorRegions(nActuatorRegions)
-    region%famName = famName
-    region%famID = famID
-    region%torque = torque
-    region%heat = heat
-    region%relaxStart = relaxStart
-    region%relaxEnd = relaxEnd
-    ! We use the axis to define the direction of F. Since we are
-    ! dealing with rotating machinary, it is pretty good approximation
-    ! to assume that the thrust is going to be in the direction of the
-    ! axis.
-    axisVec = axis2-axis1
-    axisVecNorm = sqrt((axisVec(1)**2 + axisvec(2)**2 + axisVec(3)**2))
-    if (axisVecNorm < 1e-12) then
-       print *,"Error: Axis cannot be determined by the supplied points. They are too close"
-       stop
-    end if
-
-    axisVec = axisVec / axisVecNorm
-
-    region%force = axisVec*thrust
-    region%axisVec = axisVec
-
-    allocate(region%blkPtr(0:nDom))
-    region%blkPtr(0) = 0
-
-    ! Next thing we need to do is to figure out if any of our cells
-    ! are inside the actuator disk region. If so we will save them in
-    ! the actuatorRegionType data structure
-
-    ! Since this is effectively a wall-distance calc it gets super
-    ! costly for the points far away. Luckly, we can do a fairly
-    ! simple shortcut: Just compute the bounding box of the region and
-    ! use that as the "already found" distance in the cloest point
-    ! search. This will eliminate all the points further away
-    ! immediately and this should be sufficiently fast.
-
-    ! So...compute that bounding box:
-    do iDim=1,3
-       minX(iDim) = minval(pts(iDim, :))
-       maxX(iDim) = maxval(pts(iDim, :))
-    end do
-
-    ! Get the max distance. This should be quite conservative.
-    dStar = (maxX(1)-minx(1))**2 + (maxX(2)-minX(2))**2  + (maxX(3)-minX(3))**2
-
-    ! Now build the tree.
-    call buildSerialQuad(size(conn, 2), size(pts, 2), pts, conn, ADT)
-
-    ! Compute the (averaged) unique nodal vectors:
-    allocate(norm(3, size(pts, 2)), normCount(size(pts, 2)))
-
-    norm = zero
-    normCount = 0
-
-    do i=1, size(conn, 2)
-
-       ! Compute cross product normal and normalize
-       v1 = pts(:, conn(3, i)) -  pts(:, conn(1, i))
-       v2 = pts(:, conn(4, i)) -  pts(:, conn(2, i))
-
-       v3(1) = (v1(2)*v2(3) - v1(3)*v2(2))
-       v3(2) = (v1(3)*v2(1) - v1(1)*v2(3))
-       v3(3) = (v1(1)*v2(2) - v1(2)*v2(1))
-       v3 = v3 / sqrt(v3(1)**2 + v3(2)**2 + v3(3)**2)
-
-       ! Add to each of the four pts and increment the number added
-       do j=1, 4
-          norm(:, conn(j, i)) = norm(:, conn(j, i)) + v3
-          normCount(conn(j, i)) = normCount(conn(j, i)) + 1
-       end do
-    end do
-
-    ! Now just divide by the norm count
-    do i=1, size(pts, 2)
-       norm(:, i) = norm(:, i) / normCount(i)
-    end do
-
-    ! Norm count is no longer needed
-    deallocate(normCount)
-
-    ! Allocate the extra data the tree search requires.
-    allocate(stack(100), BB(20), frontLeaves(25), frontLeavesNew(25))
-
-    ! Allocate sufficient space for the maximum possible number of cellIDs
-    allocate(region%cellIDs(3, nCellsLocal(1)))
-
-    ! Now search for all the coordinate. Note that We have explictly
-    ! set sps to 1 becuase it is only implemented for single grid.
-    sps = 1
-    level = 1
-
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
-                ! Only check real cells
-                if (iblank(i, j, k) == 1) then
-                   ! Compute the cell center
-                   xCen = eighth*(x(i-1,j-1,k-1,:) + x(i,j-1,k-1,:)  &
-                        +         x(i-1,j,  k-1,:) + x(i,j,  k-1,:)  &
-                        +         x(i-1,j-1,k,  :) + x(i,j-1,k,  :)  &
-                        +         x(i-1,j,  k,  :) + x(i,j,  k,  :))
-
-                   ! The current point to search for and continually
-                   ! reset the "closest point already found" variable.
-                   coor(1:3) = xCen
-                   coor(4) = dStar
-                   intInfo(3) = 0
-                   call minDistancetreeSearchSinglePoint(ADT, coor, intInfo, &
-                        uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
-                   cellID = intInfo(3)
-                   if (cellID > 0) then
-                      ! Now check if this was successful or now:
-                      if (checkInside()) then
-                         ! Whoohoo! We are inside the region. Add this cell
-                         ! to the list.
-                         region%nCellIDs = region%nCellIDs + 1
-                         region%cellIDs(:, region%nCellIDs) = (/i, j, k/)
-                      end if
-                   end if
-                end if
-             end do
-          end do
-       end do
-       ! Since we're doing all the blocks in order, simply store the
-       ! current counter into blkPtr which gives up the range of cells
-       ! we have found on this block
-       region%blkPtr(nn) = region%nCellIDs
-
-    end do
-
-    ! Resize the cellIDs to the correct size now that we know the
-    ! correct exact number.
-    tmp => region%cellIDs
-    allocate(region%cellIDs(3, region%nCellIDs))
-    region%cellIDs = tmp(:, 1:region%nCellIDs)
-    deallocate(tmp)
-
-    ! Now go back and generate the total volume of the the cells we've flagged
-    volLocal = zero
-
-    do nn=1, nDom
-       call setPointers(nn, level, sps)
-
-       ! Loop over the region for this block
-       do iii=region%blkPtr(nn-1) + 1, region%blkPtr(nn)
-          i = region%cellIDs(1, iii)
-          j = region%cellIDs(2, iii)
-          k = region%cellIDs(3, iii)
-          volLocal = volLocal + vol(i, j, k)
-       end do
-    end do
-
-    call mpi_allreduce(volLocal, region%volume, 1, adflow_real, &
-         MPI_SUM, adflow_comm_world, ierr)
-    call ECHK(ierr, __FILE__, __LINE__)
-
-    ! Final memory cleanup
-    deallocate(stack, norm, frontLeaves, frontLeavesNew, BB)
-    call destroySerialQuad(ADT)
-
-  contains
-
-    function checkInside()
-
-      implicit none
-      logical :: checkInside
-      integer(kind=intType) :: jj
-      real(kind=realType) :: shp(4), xp(3), normal(3), v1(3), dp
-
-      ! bi-linear shape functions (CCW ordering)
-      shp(1) = (one-uvw(1))*(one-uvw(2))
-      shp(2) = (    uvw(1))*(one-uvw(2))
-      shp(3) = (    uvw(1))*(    uvw(2))
-      shp(4) = (one-uvw(1))*(    uvw(2))
-
-      xp = zero
-      normal = zero
-      do jj=1, 4
-         xp = xp + shp(jj)*pts(:, conn(jj, cellID))
-         normal = normal + shp(jj)*norm(:, conn(jj, cellID))
-      end do
-
-      ! Compute the dot product of normal with cell center
-      ! (stored in coor) with the point on the surface.
-      v1 = coor(1:3) - xp
-      dp = normal(1)*v1(1) + normal(2)*v1(2) + normal(3)*v1(3)
-
-      if (dp < zero) then
-         checkInside = .True.
-      else
-         checkInside = .False.
-      end if
-    end function checkInside
-  end subroutine addActuatorRegion
-
-  subroutine writeActuatorRegions(fileName)
-
-    ! This a (mostly) debug routine that is used to verify to the user
-    ! the that the cells that the user thinks should be specified as
-    ! being inside the actuator region actually are. We will dump a
-    ! hex unstructured ascii tecplot file with all the zones we
-    ! found. We won't be super concerned about efficiency here.
-
     use constants
-    use utils, only : EChk, pointReduce, setPointers
-    use communication, only : myID, adflow_comm_world, nProc
-    use blockPointers, only : x, nDom
-    use commonFormats, only : sci12
+    use communication, only: commType, internalCommType
+    use actuatorRegionData
     implicit none
 
-    ! Input
-    character(len=*) :: fileName
-
-    ! Working
-    integer(kind=intType) :: iRegion, nn, i, j, k, ii, jj, kk, iii, kkk, iDim
-    integer(kind=intType) :: level, sps, iProc, ierr, totalCount, offset, nUnique
-    integer(kind=intType), dimension(:), allocatable :: sizesProc, cumSizesProc
-    real(kind=realType) , dimension(:), allocatable :: pts, allPts
-    real(kind=realType) , dimension(:,:), allocatable :: tmp, uniquePts
-    real(kind=realType), parameter :: tol=1e-8
-    integer(kind=intType), dimension(:), allocatable :: conn, allConn, link
-    character(80) :: zoneName
-    type(actuatorRegionType), pointer :: region
-
-    ! Before we start the main region loop the root procesoor has to
-    ! open up the tecplot file and write the header
-
-    if (myid == 0) then
-       open(unit=101, file=trim(fileName), form='formatted')
-       write(101,*) 'TITLE = "Actuator Regions"'
-       write(101,*) 'Variables = "CoordinateX", "CoordinateY", "CoordinateZ"'
-    end if
-
-    ! Region Loop
-    regionLoop: do iRegion=1, nActuatorRegions
-
-       ! Only for the finest grid level.
-       level =1
-       sps = 1
-
-       ! Do an allgather with the number of actuator cells on each
-       ! processor so that everyone knows the sizes and can compute the offsets,
-       region => actuatorRegions(iRegion)
-
-       allocate(sizesProc(nProc), cumSizesProc(0:nProc))
-
-       call mpi_allgather(region%nCellIDs, 1, adflow_integer, sizesProc, 1, &
-            adflow_integer, adflow_comm_world, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-
-       cumSizesProc(0) = 0
-       do iProc=1, nProc
-          cumSizesProc(iProc) = cumSizesProc(iProc-1) + sizesProc(iProc)
-       end do
-
-       ! Fill up our own nodes/conn with the nodes we have here.
-       allocate(conn(8*region%nCellIDs), pts(24*region%nCellIDs))
-
-       kkk = 0
-       do nn=1, nDom
-          call setPointers(nn, level, sps)
-          ! Loop over the ranges for this block
-          do iii=region%blkPtr(nn-1) + 1, region%blkPtr(nn)
-
-             ! Carful with the conn values! They need to be in counter clock wise ordering!
-             offset = (iii-1)*8 + cumSizesProc(myID)*8
-             conn((iii-1)*8 + 1) = 1 + offset
-             conn((iii-1)*8 + 2) = 2 + offset
-             conn((iii-1)*8 + 3) = 4 + offset
-             conn((iii-1)*8 + 4) = 3 + offset
-             conn((iii-1)*8 + 5) = 5 + offset
-             conn((iii-1)*8 + 6) = 6 + offset
-             conn((iii-1)*8 + 7) = 8 + offset
-             conn((iii-1)*8 + 8) = 7 + offset
-
-             ! Add in the 24 values for the nodal coordinates in coordinate
-             ! ordering. Do all the coordinates interlaced
-             do kk=-1, 0
-                do jj=-1, 0
-                   do ii=-1, 0
-                      do iDim=1,3
-                         i = region%cellIDs(1, iii)
-                         j = region%cellIDs(2, iii)
-                         k = region%cellIDs(3, iii)
-                         kkk = kkk + 1
-                         pts(kkk) = x(i+ii, j+jj, k+kk, iDim)
-                      end do
-                   end do
+contains
+    subroutine addActuatorRegion(pts, conn, axis1, axis2, famName, famID, &
+                                 thrust, torque, heat, relaxStart, relaxEnd, nPts, nConn)
+        ! Add a user-supplied integration surface.
+
+        use communication, only: myID, adflow_comm_world
+        use constants
+        use adtBuild, only: buildSerialQuad, destroySerialQuad
+        use adtLocalSearch, only: minDistanceTreeSearchSinglePoint
+        use ADTUtils, only: stack
+        use ADTData
+        use blockPointers, only: x, il, jl, kl, nDom, iBlank, vol
+        use adjointVars, only: nCellsLocal
+        use utils, only: setPointers, EChk
+        implicit none
+
+        ! Input variables
+        integer(kind=intType), intent(in) :: nPts, nConn, famID
+        real(kind=realType), dimension(3, nPts), intent(in), target :: pts
+        integer(kind=intType), dimension(4, nConn), intent(in), target :: conn
+        real(kind=realType), intent(in), dimension(3) :: axis1, axis2
+        character(len=*) :: famName
+        real(kind=realType) :: thrust, torque, heat, relaxStart, relaxEnd
+
+        ! Working variables
+        integer(kind=intType) :: i, j, k, nn, iDim, cellID, intInfo(3), sps, level, iii, ierr
+        real(kind=realType) :: dStar, frac, volLocal
+        type(actuatorRegionType), pointer :: region
+        real(kind=realType), dimension(3) :: minX, maxX, v1, v2, v3, xCen, axisVec
+        type(adtType) :: ADT
+        real(kind=realType) :: axisVecNorm
+        real(kind=realType), dimension(:, :), allocatable :: norm
+        integer(kind=intType), dimension(:), allocatable :: normCount
+        integer(kind=intType), dimension(:, :), pointer :: tmp
+
+        ! ADT Type required data
+        integer(kind=intType), dimension(:), pointer :: frontLeaves, frontLeavesNew
+        type(adtBBoxTargetType), dimension(:), pointer :: BB
+        real(kind=realType) :: coor(4), uvw(5)
+        real(kind=realType) :: dummy(3, 2)
+
+        nActuatorRegions = nActuatorRegions + 1
+        if (nActuatorRegions > nActuatorRegionsMax) then
+            print *, "Error: Exceeded the maximum number of actuatorDiskRegions.  "&
+                 &"Increase nActuatorDiskRegionsMax"
+            stop
+        end if
+
+        ! Save the input information
+        region => actuatorRegions(nActuatorRegions)
+        region%famName = famName
+        region%famID = famID
+        region%torque = torque
+        region%heat = heat
+        region%relaxStart = relaxStart
+        region%relaxEnd = relaxEnd
+        ! We use the axis to define the direction of F. Since we are
+        ! dealing with rotating machinary, it is pretty good approximation
+        ! to assume that the thrust is going to be in the direction of the
+        ! axis.
+        axisVec = axis2 - axis1
+        axisVecNorm = sqrt((axisVec(1)**2 + axisvec(2)**2 + axisVec(3)**2))
+        if (axisVecNorm < 1e-12) then
+            print *, "Error: Axis cannot be determined by the supplied points. They are too close"
+            stop
+        end if
+
+        axisVec = axisVec / axisVecNorm
+
+        region%force = axisVec * thrust
+        region%axisVec = axisVec
+
+        allocate (region%blkPtr(0:nDom))
+        region%blkPtr(0) = 0
+
+        ! Next thing we need to do is to figure out if any of our cells
+        ! are inside the actuator disk region. If so we will save them in
+        ! the actuatorRegionType data structure
+
+        ! Since this is effectively a wall-distance calc it gets super
+        ! costly for the points far away. Luckly, we can do a fairly
+        ! simple shortcut: Just compute the bounding box of the region and
+        ! use that as the "already found" distance in the cloest point
+        ! search. This will eliminate all the points further away
+        ! immediately and this should be sufficiently fast.
+
+        ! So...compute that bounding box:
+        do iDim = 1, 3
+            minX(iDim) = minval(pts(iDim, :))
+            maxX(iDim) = maxval(pts(iDim, :))
+        end do
+
+        ! Get the max distance. This should be quite conservative.
+        dStar = (maxX(1) - minx(1))**2 + (maxX(2) - minX(2))**2 + (maxX(3) - minX(3))**2
+
+        ! Now build the tree.
+        call buildSerialQuad(size(conn, 2), size(pts, 2), pts, conn, ADT)
+
+        ! Compute the (averaged) unique nodal vectors:
+        allocate (norm(3, size(pts, 2)), normCount(size(pts, 2)))
+
+        norm = zero
+        normCount = 0
+
+        do i = 1, size(conn, 2)
+
+            ! Compute cross product normal and normalize
+            v1 = pts(:, conn(3, i)) - pts(:, conn(1, i))
+            v2 = pts(:, conn(4, i)) - pts(:, conn(2, i))
+
+            v3(1) = (v1(2) * v2(3) - v1(3) * v2(2))
+            v3(2) = (v1(3) * v2(1) - v1(1) * v2(3))
+            v3(3) = (v1(1) * v2(2) - v1(2) * v2(1))
+            v3 = v3 / sqrt(v3(1)**2 + v3(2)**2 + v3(3)**2)
+
+            ! Add to each of the four pts and increment the number added
+            do j = 1, 4
+                norm(:, conn(j, i)) = norm(:, conn(j, i)) + v3
+                normCount(conn(j, i)) = normCount(conn(j, i)) + 1
+            end do
+        end do
+
+        ! Now just divide by the norm count
+        do i = 1, size(pts, 2)
+            norm(:, i) = norm(:, i) / normCount(i)
+        end do
+
+        ! Norm count is no longer needed
+        deallocate (normCount)
+
+        ! Allocate the extra data the tree search requires.
+        allocate (stack(100), BB(20), frontLeaves(25), frontLeavesNew(25))
+
+        ! Allocate sufficient space for the maximum possible number of cellIDs
+        allocate (region%cellIDs(3, nCellsLocal(1)))
+
+        ! Now search for all the coordinate. Note that We have explictly
+        ! set sps to 1 becuase it is only implemented for single grid.
+        sps = 1
+        level = 1
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        ! Only check real cells
+                        if (iblank(i, j, k) == 1) then
+                            ! Compute the cell center
+                            xCen = eighth * (x(i - 1, j - 1, k - 1, :) + x(i, j - 1, k - 1, :) &
+                                             + x(i - 1, j, k - 1, :) + x(i, j, k - 1, :) &
+                                             + x(i - 1, j - 1, k, :) + x(i, j - 1, k, :) &
+                                             + x(i - 1, j, k, :) + x(i, j, k, :))
+
+                            ! The current point to search for and continually
+                            ! reset the "closest point already found" variable.
+                            coor(1:3) = xCen
+                            coor(4) = dStar
+                            intInfo(3) = 0
+                            call minDistancetreeSearchSinglePoint(ADT, coor, intInfo, &
+                                                                  uvw, dummy, 0, BB, frontLeaves, frontLeavesNew)
+                            cellID = intInfo(3)
+                            if (cellID > 0) then
+                                ! Now check if this was successful or now:
+                                if (checkInside()) then
+                                    ! Whoohoo! We are inside the region. Add this cell
+                                    ! to the list.
+                                    region%nCellIDs = region%nCellIDs + 1
+                                    region%cellIDs(:, region%nCellIDs) = (/i, j, k/)
+                                end if
+                            end if
+                        end if
+                    end do
+                end do
+            end do
+            ! Since we're doing all the blocks in order, simply store the
+            ! current counter into blkPtr which gives up the range of cells
+            ! we have found on this block
+            region%blkPtr(nn) = region%nCellIDs
+
+        end do
+
+        ! Resize the cellIDs to the correct size now that we know the
+        ! correct exact number.
+        tmp => region%cellIDs
+        allocate (region%cellIDs(3, region%nCellIDs))
+        region%cellIDs = tmp(:, 1:region%nCellIDs)
+        deallocate (tmp)
+
+        ! Now go back and generate the total volume of the the cells we've flagged
+        volLocal = zero
+
+        do nn = 1, nDom
+            call setPointers(nn, level, sps)
+
+            ! Loop over the region for this block
+            do iii = region%blkPtr(nn - 1) + 1, region%blkPtr(nn)
+                i = region%cellIDs(1, iii)
+                j = region%cellIDs(2, iii)
+                k = region%cellIDs(3, iii)
+                volLocal = volLocal + vol(i, j, k)
+            end do
+        end do
+
+        call mpi_allreduce(volLocal, region%volume, 1, adflow_real, &
+                           MPI_SUM, adflow_comm_world, ierr)
+        call ECHK(ierr, __FILE__, __LINE__)
+
+        ! Final memory cleanup
+        deallocate (stack, norm, frontLeaves, frontLeavesNew, BB)
+        call destroySerialQuad(ADT)
+
+    contains
+
+        function checkInside()
+
+            implicit none
+            logical :: checkInside
+            integer(kind=intType) :: jj
+            real(kind=realType) :: shp(4), xp(3), normal(3), v1(3), dp
+
+            ! bi-linear shape functions (CCW ordering)
+            shp(1) = (one - uvw(1)) * (one - uvw(2))
+            shp(2) = (uvw(1)) * (one - uvw(2))
+            shp(3) = (uvw(1)) * (uvw(2))
+            shp(4) = (one - uvw(1)) * (uvw(2))
+
+            xp = zero
+            normal = zero
+            do jj = 1, 4
+                xp = xp + shp(jj) * pts(:, conn(jj, cellID))
+                normal = normal + shp(jj) * norm(:, conn(jj, cellID))
+            end do
+
+            ! Compute the dot product of normal with cell center
+            ! (stored in coor) with the point on the surface.
+            v1 = coor(1:3) - xp
+            dp = normal(1) * v1(1) + normal(2) * v1(2) + normal(3) * v1(3)
+
+            if (dp < zero) then
+                checkInside = .True.
+            else
+                checkInside = .False.
+            end if
+        end function checkInside
+    end subroutine addActuatorRegion
+
+    subroutine writeActuatorRegions(fileName)
+
+        ! This a (mostly) debug routine that is used to verify to the user
+        ! the that the cells that the user thinks should be specified as
+        ! being inside the actuator region actually are. We will dump a
+        ! hex unstructured ascii tecplot file with all the zones we
+        ! found. We won't be super concerned about efficiency here.
+
+        use constants
+        use utils, only: EChk, pointReduce, setPointers
+        use communication, only: myID, adflow_comm_world, nProc
+        use blockPointers, only: x, nDom
+        use commonFormats, only: sci12
+        implicit none
+
+        ! Input
+        character(len=*) :: fileName
+
+        ! Working
+        integer(kind=intType) :: iRegion, nn, i, j, k, ii, jj, kk, iii, kkk, iDim
+        integer(kind=intType) :: level, sps, iProc, ierr, totalCount, offset, nUnique
+        integer(kind=intType), dimension(:), allocatable :: sizesProc, cumSizesProc
+        real(kind=realType), dimension(:), allocatable :: pts, allPts
+        real(kind=realType), dimension(:, :), allocatable :: tmp, uniquePts
+        real(kind=realType), parameter :: tol = 1e-8
+        integer(kind=intType), dimension(:), allocatable :: conn, allConn, link
+        character(80) :: zoneName
+        type(actuatorRegionType), pointer :: region
+
+        ! Before we start the main region loop the root procesoor has to
+        ! open up the tecplot file and write the header
+
+        if (myid == 0) then
+            open (unit=101, file=trim(fileName), form='formatted')
+            write (101, *) 'TITLE = "Actuator Regions"'
+            write (101, *) 'Variables = "CoordinateX", "CoordinateY", "CoordinateZ"'
+        end if
+
+        ! Region Loop
+        regionLoop: do iRegion = 1, nActuatorRegions
+
+            ! Only for the finest grid level.
+            level = 1
+            sps = 1
+
+            ! Do an allgather with the number of actuator cells on each
+            ! processor so that everyone knows the sizes and can compute the offsets,
+            region => actuatorRegions(iRegion)
+
+            allocate (sizesProc(nProc), cumSizesProc(0:nProc))
+
+            call mpi_allgather(region%nCellIDs, 1, adflow_integer, sizesProc, 1, &
+                               adflow_integer, adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            cumSizesProc(0) = 0
+            do iProc = 1, nProc
+                cumSizesProc(iProc) = cumSizesProc(iProc - 1) + sizesProc(iProc)
+            end do
+
+            ! Fill up our own nodes/conn with the nodes we have here.
+            allocate (conn(8 * region%nCellIDs), pts(24 * region%nCellIDs))
+
+            kkk = 0
+            do nn = 1, nDom
+                call setPointers(nn, level, sps)
+                ! Loop over the ranges for this block
+                do iii = region%blkPtr(nn - 1) + 1, region%blkPtr(nn)
+
+                    ! Carful with the conn values! They need to be in counter clock wise ordering!
+                    offset = (iii - 1) * 8 + cumSizesProc(myID) * 8
+                    conn((iii - 1) * 8 + 1) = 1 + offset
+                    conn((iii - 1) * 8 + 2) = 2 + offset
+                    conn((iii - 1) * 8 + 3) = 4 + offset
+                    conn((iii - 1) * 8 + 4) = 3 + offset
+                    conn((iii - 1) * 8 + 5) = 5 + offset
+                    conn((iii - 1) * 8 + 6) = 6 + offset
+                    conn((iii - 1) * 8 + 7) = 8 + offset
+                    conn((iii - 1) * 8 + 8) = 7 + offset
+
+                    ! Add in the 24 values for the nodal coordinates in coordinate
+                    ! ordering. Do all the coordinates interlaced
+                    do kk = -1, 0
+                        do jj = -1, 0
+                            do ii = -1, 0
+                                do iDim = 1, 3
+                                    i = region%cellIDs(1, iii)
+                                    j = region%cellIDs(2, iii)
+                                    k = region%cellIDs(3, iii)
+                                    kkk = kkk + 1
+                                    pts(kkk) = x(i + ii, j + jj, k + kk, iDim)
+                                end do
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+
+            ! Now that we've filled up our array, we can allocate the total
+            ! space we need on the root proc and it
+            if (myid == 0) then
+                totalCount = sum(sizesProc)
+                allocate (allConn(8 * totalCount), allPts(24 * totalCount))
+            end if
+
+            ! Perform the two gatherV's
+            call mpi_gatherV(pts, region%nCellIDs * 24, adflow_real, &
+                             allPts, 24 * sizesProc, 24 * cumSizesProc, adflow_real, &
+                             0, adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            call mpi_gatherV(conn, region%nCellIDs * 8, adflow_integer, &
+                             allConn, 8 * sizesProc, 8 * cumSizesProc, adflow_integer, &
+                             0, adflow_comm_world, ierr)
+            call ECHK(ierr, __FILE__, __LINE__)
+
+            ! We can deallocate all the per-proc memory now
+            deallocate (sizesProc, cumSizesProc, pts, conn)
+
+            if (myid == 0) then
+
+                ! Now the poor root processor dumps everything out to a
+                ! file. To help cut down on the already bloated file size,
+                ! we'll point reduce it which will help tecplot display them
+                ! better as well.
+
+                allocate (tmp(3, totalCount * 8))
+                do i = 1, totalCount * 8
+                    do iDim = 1, 3
+                        tmp(iDim, i) = allPts((i - 1) * 3 + iDim)
+                    end do
+                end do
+                deallocate (allPts)
+                allocate (uniquePts(3, totalCount * 8), link(totalCount * 8))
+
+                ! Get unique set of nodes.
+                call pointReduce(tmp, totalCount * 8, tol, uniquePts, link, nUnique)
+
+                write (zoneName, "(a,a,a)") 'Zone T="', trim(region%famName), ' Region"'
+                write (101, *) trim(zoneName)
+                write (101, *) "Nodes = ", nUnique, " Elements= ", totalCount, " ZONETYPE=FEBRICK"
+                write (101, *) "DATAPACKING=BLOCK, VARLOCATION=([1,2,3]=NODAL, [4]=CELLCENTERED)"
+
+                ! Write all the coordinates...this is horrendously slow...
+                do iDim = 1, 3
+                    do i = 1, nUnique
+                        write (101, sci12) uniquePts(iDim, i)
+                    end do
                 end do
-             end do
-          end do
-       end do
-
-       ! Now that we've filled up our array, we can allocate the total
-       ! space we need on the root proc and it
-       if (myid == 0) then
-          totalCount = sum(sizesProc)
-          allocate(allConn(8*totalCount), allPts(24*totalCount))
-       end if
-
-       ! Perform the two gatherV's
-       call mpi_gatherV(pts, region%nCellIDs*24, adflow_real, &
-            allPts, 24*sizesProc, 24*cumSizesProc, adflow_real, &
-            0, adflow_comm_world, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-
-       call mpi_gatherV(conn, region%nCellIDs*8, adflow_integer, &
-            allConn, 8*sizesProc, 8*cumSizesProc, adflow_integer, &
-            0, adflow_comm_world, ierr)
-       call ECHK(ierr, __FILE__, __LINE__)
-
-       ! We can deallocate all the per-proc memory now
-       deallocate(sizesProc, cumSizesProc, pts, conn)
-
-       if (myid == 0) then
-
-          ! Now the poor root processor dumps everything out to a
-          ! file. To help cut down on the already bloated file size,
-          ! we'll point reduce it which will help tecplot display them
-          ! better as well.
-
-          allocate(tmp(3, totalCount*8))
-          do i=1, totalCount*8
-             do iDim=1, 3
-                tmp(iDim, i) = allPts((i-1)*3 + iDim)
-             end do
-          end do
-          deallocate(allPts)
-          allocate(uniquePts(3, totalCount*8), link(totalCount*8))
-
-          ! Get unique set of nodes.
-          call pointReduce(tmp, totalCount*8, tol, uniquePts, link, nUnique)
-
-          write (zoneName,"(a,a,a)") 'Zone T="', trim(region%famName), ' Region"'
-          write (101, *) trim(zoneName)
-          write (101,*) "Nodes = ", nUnique, " Elements= ", totalCount, " ZONETYPE=FEBRICK"
-          write (101,*) "DATAPACKING=BLOCK, VARLOCATION=([1,2,3]=NODAL, [4]=CELLCENTERED)"
-
-          ! Write all the coordinates...this is horrendously slow...
-          do iDim=1, 3
-             do i=1, nUnique
-                write(101, sci12) uniquePts(iDim, i)
-             end do
-          end do
-
-          ! Write out the connectivity
-          do i=1, totalCount
-             do j=1,8
-                write(101, "(I8)", advance='no') link(allConn((i-1)*8 + j))
-             end do
-             write(101,"(1x)")
-          end do
-
-          ! Ditch the memory only allocated on this proc
-          deallocate(allConn, link, tmp, uniquePts)
-       end if
-    end do regionLoop
-
-    ! Close the output file on the root proc
-    if (myid == 0) then
-       close(101)
-    end if
-  end subroutine writeActuatorRegions
-
-  subroutine integrateActuatorRegions(localValues, famList, sps)
-    !--------------------------------------------------------------
-    ! Manual Differentiation Warning: Modifying this routine requires
-    ! modifying the hand-written forward and reverse routines.
-    ! --------------------------------------------------------------
-
-    ! Perform volume integrals over the actuator region.
-    use constants
-    use blockPointers, only : vol, dw, w, nDom
-    use flowVarRefState, only : Pref, uRef
-    use utils, only : setPointers
-    use sorting, only : famInList
-    use residuals, only : sourceTerms_block
-    use actuatorRegionData
-    use communication
-    ! Input/output Variables
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    integer(kind=intType), intent(in) :: sps
-
-    ! Working
-    integer(kind=intType) :: nn, iRegion
-    real(kind=realType) :: PLocal, PLocald
-    ! Zero the accumulation variable. We comptue flow power across
-    ! 'all' actuaor zones. The famInclude is used to section out which
-    ! one we want.
-    PLocal = zero
-
-    domainLoop: do nn=1, nDom
-       call setPointers(nn, 1, sps)
-
-       ! Loop over each region
-       regionLoop: do iRegion=1, nActuatorRegions
-
-          ! Check if this needs to be included:
-          famInclude: if (famInList(actuatorRegions(iRegion)%famID, famList)) then
-
-             ! If so, call the regular sourceTerms_block routine
-             call sourceTerms_block(nn, .False., iRegion, pLocal)
-
-          end if famInclude
-       end do regionLoop
-    end do domainLoop
 
-    ! Add in the contribution from this processor.
-    localValues(iPower) = localValues(iPower) + PLocal
+                ! Write out the connectivity
+                do i = 1, totalCount
+                    do j = 1, 8
+                        write (101, "(I8)", advance='no') link(allConn((i - 1) * 8 + j))
+                    end do
+                    write (101, "(1x)")
+                end do
 
-  end subroutine integrateActuatorRegions
+                ! Ditch the memory only allocated on this proc
+                deallocate (allConn, link, tmp, uniquePts)
+            end if
+        end do regionLoop
+
+        ! Close the output file on the root proc
+        if (myid == 0) then
+            close (101)
+        end if
+    end subroutine writeActuatorRegions
+
+    subroutine integrateActuatorRegions(localValues, famList, sps)
+        !--------------------------------------------------------------
+        ! Manual Differentiation Warning: Modifying this routine requires
+        ! modifying the hand-written forward and reverse routines.
+        ! --------------------------------------------------------------
+
+        ! Perform volume integrals over the actuator region.
+        use constants
+        use blockPointers, only: vol, dw, w, nDom
+        use flowVarRefState, only: Pref, uRef
+        use utils, only: setPointers
+        use sorting, only: famInList
+        use residuals, only: sourceTerms_block
+        use actuatorRegionData
+        use communication
+        ! Input/output Variables
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+
+        ! Working
+        integer(kind=intType) :: nn, iRegion
+        real(kind=realType) :: PLocal, PLocald
+        ! Zero the accumulation variable. We comptue flow power across
+        ! 'all' actuaor zones. The famInclude is used to section out which
+        ! one we want.
+        PLocal = zero
+
+        domainLoop: do nn = 1, nDom
+            call setPointers(nn, 1, sps)
+
+            ! Loop over each region
+            regionLoop: do iRegion = 1, nActuatorRegions
+
+                ! Check if this needs to be included:
+                famInclude: if (famInList(actuatorRegions(iRegion)%famID, famList)) then
+
+                    ! If so, call the regular sourceTerms_block routine
+                    call sourceTerms_block(nn, .False., iRegion, pLocal)
+
+                end if famInclude
+            end do regionLoop
+        end do domainLoop
+
+        ! Add in the contribution from this processor.
+        localValues(iPower) = localValues(iPower) + PLocal
+
+    end subroutine integrateActuatorRegions
 #ifndef USE_COMPLEX
-  subroutine integrateActuatorRegions_d(localValues, localValuesd, famList, sps)
-    !--------------------------------------------------------------
-    ! Manual Differentiation Warning: Modifying this routine requires
-    ! modifying the hand-written forward and reverse routines.
-    ! --------------------------------------------------------------
-
-    ! Perform volume integrals over the actuator region.
-    use constants
-    use blockPointers, only : vol, dw, w, nDom
-    use flowVarRefState, only : Pref, uRef
-    use utils, only : setPointers_d
-    use sorting, only : famInList
-    use actuatorRegionData
-    use residuals_d, only : sourceTerms_block_d
-
-    ! Input/output Variables
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    integer(kind=intType), intent(in) :: sps
-
-    ! Working
-    integer(kind=intType) :: nn, iRegion
-    real(kind=realType) :: PLocal, PLocald
-
-    ! Zero the accumulation variable. We comptue flow power across
-    ! 'all' actuaor zones. The famInclude is used to section out which
-    ! one we want.
-    PLocal = zero
-    PLocald = zero
-
-    domainLoop: do nn=1, nDom
-       call setPointers_d(nn, 1, sps)
-
-       ! Loop over each region
-       regionLoop: do iRegion=1, nActuatorRegions
-
-          ! Check if this needs to be included:
-          famInclude: if (famInList(actuatorRegions(iRegion)%famID, famList)) then
-
-             ! If so, call the regular sourceTerms_block routine
-             call sourceTerms_block_d(nn, .False., iRegion, pLocal, pLocald)
-
-          end if famInclude
-       end do regionLoop
-    end do domainLoop
-
-    ! Add in the contribution from this processor.
-    localValues(iPower) = localValues(iPower) + PLocal
-    localValuesd(iPower) = localValuesd(iPower) + PLocald
-
-  end subroutine integrateActuatorRegions_d
-
-  subroutine integrateActuatorRegions_b(localValues, localValuesd, famList, sps)
-    !--------------------------------------------------------------
-    ! Manual Differentiation Warning: Modifying this routine requires
-    ! modifying the hand-written forward and reverse routines.
-    ! --------------------------------------------------------------
-
-    ! Perform volume integrals over the actuator region.
-    use constants
-    use blockPointers, only : vol, dw, w, nDom
-    use flowVarRefState, only : Pref, uRef
-    use utils, only : setPointers_b
-    use sorting, only : famInList
-    use actuatorRegionData
-    use residuals_b, only : sourceTerms_block_b
-
-    ! Input/output Variables
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    integer(kind=intType), intent(in) :: sps
-
-    ! Working
-    integer(kind=intType) :: nn, iRegion
-    real(kind=realType) :: PLocal, PLocald
-
-    ! Zero the accumulation variable. We comptue flow power across
-    ! 'all' actuaor zones. The famInclude is used to section out which
-    ! one we want.
-    PLocal = zero
-    PLocald = zero
-
-    ! Pull out the seed
-    PLocald = localValuesd(iPower)
-
-    domainLoop: do nn=1, nDom
-       call setPointers_b(nn, 1, sps)
-
-       ! Loop over each region
-       regionLoop: do iRegion=1, nActuatorRegions
-
-          ! Check if this needs to be included:
-          famInclude: if (famInList(actuatorRegions(iRegion)%famID, famList)) then
-
-             ! If so, call the regular sourceTerms_block routine
-             call sourceTerms_block_b(nn, .False., iRegion, pLocal, pLocald)
-
-          end if famInclude
-       end do regionLoop
-    end do domainLoop
-  end subroutine integrateActuatorRegions_b
+    subroutine integrateActuatorRegions_d(localValues, localValuesd, famList, sps)
+        !--------------------------------------------------------------
+        ! Manual Differentiation Warning: Modifying this routine requires
+        ! modifying the hand-written forward and reverse routines.
+        ! --------------------------------------------------------------
+
+        ! Perform volume integrals over the actuator region.
+        use constants
+        use blockPointers, only: vol, dw, w, nDom
+        use flowVarRefState, only: Pref, uRef
+        use utils, only: setPointers_d
+        use sorting, only: famInList
+        use actuatorRegionData
+        use residuals_d, only: sourceTerms_block_d
+
+        ! Input/output Variables
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+
+        ! Working
+        integer(kind=intType) :: nn, iRegion
+        real(kind=realType) :: PLocal, PLocald
+
+        ! Zero the accumulation variable. We comptue flow power across
+        ! 'all' actuaor zones. The famInclude is used to section out which
+        ! one we want.
+        PLocal = zero
+        PLocald = zero
+
+        domainLoop: do nn = 1, nDom
+            call setPointers_d(nn, 1, sps)
+
+            ! Loop over each region
+            regionLoop: do iRegion = 1, nActuatorRegions
+
+                ! Check if this needs to be included:
+                famInclude: if (famInList(actuatorRegions(iRegion)%famID, famList)) then
+
+                    ! If so, call the regular sourceTerms_block routine
+                    call sourceTerms_block_d(nn, .False., iRegion, pLocal, pLocald)
+
+                end if famInclude
+            end do regionLoop
+        end do domainLoop
+
+        ! Add in the contribution from this processor.
+        localValues(iPower) = localValues(iPower) + PLocal
+        localValuesd(iPower) = localValuesd(iPower) + PLocald
+
+    end subroutine integrateActuatorRegions_d
+
+    subroutine integrateActuatorRegions_b(localValues, localValuesd, famList, sps)
+        !--------------------------------------------------------------
+        ! Manual Differentiation Warning: Modifying this routine requires
+        ! modifying the hand-written forward and reverse routines.
+        ! --------------------------------------------------------------
+
+        ! Perform volume integrals over the actuator region.
+        use constants
+        use blockPointers, only: vol, dw, w, nDom
+        use flowVarRefState, only: Pref, uRef
+        use utils, only: setPointers_b
+        use sorting, only: famInList
+        use actuatorRegionData
+        use residuals_b, only: sourceTerms_block_b
+
+        ! Input/output Variables
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+
+        ! Working
+        integer(kind=intType) :: nn, iRegion
+        real(kind=realType) :: PLocal, PLocald
+
+        ! Zero the accumulation variable. We comptue flow power across
+        ! 'all' actuaor zones. The famInclude is used to section out which
+        ! one we want.
+        PLocal = zero
+        PLocald = zero
+
+        ! Pull out the seed
+        PLocald = localValuesd(iPower)
+
+        domainLoop: do nn = 1, nDom
+            call setPointers_b(nn, 1, sps)
+
+            ! Loop over each region
+            regionLoop: do iRegion = 1, nActuatorRegions
+
+                ! Check if this needs to be included:
+                famInclude: if (famInList(actuatorRegions(iRegion)%famID, famList)) then
+
+                    ! If so, call the regular sourceTerms_block routine
+                    call sourceTerms_block_b(nn, .False., iRegion, pLocal, pLocald)
+
+                end if famInclude
+            end do regionLoop
+        end do domainLoop
+    end subroutine integrateActuatorRegions_b
 #endif
 
 end module actuatorRegion
diff --git a/src/solver/agmg.F90 b/src/solver/agmg.F90
index 5cc68a255..e53b76b7e 100644
--- a/src/solver/agmg.F90
+++ b/src/solver/agmg.F90
@@ -1,725 +1,719 @@
 module agmg
 
-  use constants
-  use utils, only : EChk
+    use constants
+    use utils, only: EChk
 #include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
-
-  ! Structure used for storing the interpolation indices
-  type arr1int4
-     integer, dimension(:), allocatable :: arr
-  end type arr1int4
-
-  ! Structure used for storing the interpolation indices
-  type arr3int4
-     integer, dimension(:, :, :), allocatable :: arr
-  end type arr3int4
-  
-  ! Structure used for storing the interpolation indices
-  type arr4int4
-     integer, dimension(:, :, :, :), allocatable :: arr
-  end type arr4int4
-  
-
-  ! The number of agmg levels
-  integer(kind=intType) agmgLevels
-
-  ! The number of outer iterations
-  integer(kind=intType) agmgOuterIts
-
-  ! The number of smoothign iteratinso
-  integer(kind=intType) agmgNSmooth
-
-  ! ASM overlap for levels
-  integer(kind=intType) :: agmgASMOverlap
-
-  ! Fill
-  integer(kind=intType) :: agmgFillLevel
-  
-  ! Ordering
-  character(len=maxStringLen) :: agmgMatrixOrdering
-
-  ! Arrays for matrices/vectors/solvers on each level. 
-  Mat, dimension(:), allocatable :: A
-  KSP, dimension(:), allocatable :: kspLevels
-  Vec, dimension(:), allocatable :: res, rhs, sol, sol2
-  PC shellPC
-
-  Mat, pointer :: fineMat
-
-  ! The interpolation arrays
-  type(arr1int4), dimension(:), allocatable :: interps
-
-  ! The coarse grid indices
-  type(arr3int4), dimension(:, :), allocatable, target :: coarseIndices
-  type(arr4int4), dimension(:, :), allocatable, target :: coarseOversetIndices
-
-  logical :: agmgSetup = .False.
-  integer :: bs
+    use petsc
+    implicit none
+
+    ! Structure used for storing the interpolation indices
+    type arr1int4
+        integer, dimension(:), allocatable :: arr
+    end type arr1int4
+
+    ! Structure used for storing the interpolation indices
+    type arr3int4
+        integer, dimension(:, :, :), allocatable :: arr
+    end type arr3int4
+
+    ! Structure used for storing the interpolation indices
+    type arr4int4
+        integer, dimension(:, :, :, :), allocatable :: arr
+    end type arr4int4
+
+    ! The number of agmg levels
+    integer(kind=intType) agmgLevels
+
+    ! The number of outer iterations
+    integer(kind=intType) agmgOuterIts
+
+    ! The number of smoothign iteratinso
+    integer(kind=intType) agmgNSmooth
+
+    ! ASM overlap for levels
+    integer(kind=intType) :: agmgASMOverlap
+
+    ! Fill
+    integer(kind=intType) :: agmgFillLevel
+
+    ! Ordering
+    character(len=maxStringLen) :: agmgMatrixOrdering
+
+    ! Arrays for matrices/vectors/solvers on each level.
+    Mat, dimension(:), allocatable :: A
+    KSP, dimension(:), allocatable :: kspLevels
+    Vec, dimension(:), allocatable :: res, rhs, sol, sol2
+    PC shellPC
+
+    Mat, pointer :: fineMat
+
+    ! The interpolation arrays
+    type(arr1int4), dimension(:), allocatable :: interps
+
+    ! The coarse grid indices
+    type(arr3int4), dimension(:, :), allocatable, target :: coarseIndices
+    type(arr4int4), dimension(:, :), allocatable, target :: coarseOversetIndices
+
+    logical :: agmgSetup = .False.
+    integer :: bs
 contains
 
-  subroutine setupAGMG(inputMat, nCell, blockSize)
-
-    use blockPointers
-    use communication
-    use utils, only : setPointers
-    use haloExchange, only : whalo1to1intgeneric
-    use adjointVars, only : nCellsLocal
-    integer(kind=intType), intent(in) :: nCell, blockSize
-    Mat, target :: inputMat
-
-    VecScatter :: scat
-    Vec :: recvVec, indexVec
-    IS  :: IS1
-
-    integer(kind=intTYpe) :: nnx, nny, nnz, l, nn, i, j, k, ii, jj, kk, lvl, n, m
-    integer(kind=intType) :: idim, count, cursize, ierr, coarseIndex
-    integer(kind=intType) :: level, nVar, sps, nextLvl, ncoarse
-    integer(kind=intType), dimension(:, :, :), allocatable :: sizes
-    integer(kind=intType), dimension(:), allocatable :: offsets, procStarts, nnzOn, nnzOff
-    real(kind=realType), dimension(:), pointer :: indPtr
-    integer(kind=intType), dimension(:), allocatable :: indicesToGet
-    
-    if (agmgSetup) then
-       return
-    end if
-
-    bs = blockSize
-    ! Set the pointer to the fine grid the AMG will be working on.
-    fineMat => inputMat
-    
-    ! The setup procedure for the agglomeration multigrid.
-
-    ! Allocate the list of the mats/vects/ksp
-    allocate(&
-         A(2:agmgLevels), &
-         kspLevels(1:agmgLevels), &
-         res(1:agmgLevels), &
-         rhs(1:agmgLevels), &
-         sol(1:agmgLevels), &
-         sol2(1:agmgLevels))
-    
-    ! First allocate the coarse grid indices. 
-    allocate(coarseIndices(nDom, 1:agmgLevels-1))
-    allocate(coarseOversetIndices(nDom, 1:agmgLevels-1))
-    allocate(sizes(3, nDom, 1:agmgLevels))
-    allocate(offsets(0:nProc), procStarts(1:agmgLevels))
-    allocate(interps(1:agmgLevels-1))
-    do lvl=1, agmgLevels
- 
-       do nn=1, nDom 
-          call setPointers(nn, 1, 1)
-          if (lvl == 1) then
-    
-             ! Set the sizes for the finest level. This is the number of
-             ! owned cells. 
-             sizes(1, nn, 1) = nx
-             sizes(2, nn, 1) = ny
-             sizes(3, nn, 1) = nz
-          end if
-       end do
-
-       if (lvl > 1) then 
-          do nn=1, nDom
-             do iDim=1, 3
-                curSize = sizes(iDim, nn, lvl-1)
-                if (curSize == 1) then
-                   sizes(iDim, nn, lvl) = curSize
-                else if (mod(curSize, 2) == 0) then
-                   ! Evenly divides
-                   sizes(iDim, nn, lvl) = curSize / 2
-                else if (mod(curSize, 2) == 1) then
-                   ! Odd, so have an extra
-                   sizes(iDim, nn, lvl) = (curSize-1) / 2 + 1
+    subroutine setupAGMG(inputMat, nCell, blockSize)
+
+        use blockPointers
+        use communication
+        use utils, only: setPointers
+        use haloExchange, only: whalo1to1intgeneric
+        use adjointVars, only: nCellsLocal
+        integer(kind=intType), intent(in) :: nCell, blockSize
+        Mat, target :: inputMat
+
+        VecScatter :: scat
+        Vec :: recvVec, indexVec
+        IS :: IS1
+
+        integer(kind=intTYpe) :: nnx, nny, nnz, l, nn, i, j, k, ii, jj, kk, lvl, n, m
+        integer(kind=intType) :: idim, count, cursize, ierr, coarseIndex
+        integer(kind=intType) :: level, nVar, sps, nextLvl, ncoarse
+        integer(kind=intType), dimension(:, :, :), allocatable :: sizes
+        integer(kind=intType), dimension(:), allocatable :: offsets, procStarts, nnzOn, nnzOff
+        real(kind=realType), dimension(:), pointer :: indPtr
+        integer(kind=intType), dimension(:), allocatable :: indicesToGet
+
+        if (agmgSetup) then
+            return
+        end if
+
+        bs = blockSize
+        ! Set the pointer to the fine grid the AMG will be working on.
+        fineMat => inputMat
+
+        ! The setup procedure for the agglomeration multigrid.
+
+        ! Allocate the list of the mats/vects/ksp
+        allocate ( &
+            A(2:agmgLevels), &
+            kspLevels(1:agmgLevels), &
+            res(1:agmgLevels), &
+            rhs(1:agmgLevels), &
+            sol(1:agmgLevels), &
+            sol2(1:agmgLevels))
+
+        ! First allocate the coarse grid indices.
+        allocate (coarseIndices(nDom, 1:agmgLevels - 1))
+        allocate (coarseOversetIndices(nDom, 1:agmgLevels - 1))
+        allocate (sizes(3, nDom, 1:agmgLevels))
+        allocate (offsets(0:nProc), procStarts(1:agmgLevels))
+        allocate (interps(1:agmgLevels - 1))
+        do lvl = 1, agmgLevels
+
+            do nn = 1, nDom
+                call setPointers(nn, 1, 1)
+                if (lvl == 1) then
+
+                    ! Set the sizes for the finest level. This is the number of
+                    ! owned cells.
+                    sizes(1, nn, 1) = nx
+                    sizes(2, nn, 1) = ny
+                    sizes(3, nn, 1) = nz
                 end if
-             end do
-          end do
-       end if
-
-       ! Get the number of cells on this proc:
-       i = 0
-       do nn=1, nDom
-          i = i + sizes(1, nn, lvl) * sizes(2, nn, lvl) * sizes(3, nn, lvl)
-       end do
-
-       call MPI_Allgather(i, 1, mpi_integer, offsets(1:nProc), 1, mpi_integer, &
-            adflow_comm_world, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Prefix sum
-       offsets(0) = 0
-       do i=1, nProc
-          offsets(i) = offsets(i-1) + offsets(i)
-       end do
-
-       ! Get my starting index.
-       procStarts(lvl) = offsets(myid)
-       
-    end do
-
-    ! Now setup the interp and the coarse-grid indices. Note that this
-    ! loop is the number of levels MINUS 1 since we are generating the
-    ! interpolations between levels and we already have the 1st level
-    ! of the  coarseIndices
-
-    call VecCreateMPI(adflow_comm_world, nCellsLocal(1_intType), & 
-         PETSC_DETERMINE, indexVec, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    do lvl=1, agmgLevels-1
-
-       do nn=1, nDom
-          call setPointers(nn, 1, 1)
-          allocate(coarseIndices(nn, lvl)%arr(0:ib, 0:jb, 0:kb))
-          allocate(coarseOversetIndices(nn, lvl)%arr(8, 0:ib, 0:jb, 0:kb))
-          coarseIndices(nn, lvl)%arr = -1
-          coarseOversetIndices(nn, lvl)%arr = -1
-       end do
-       
-       ! Allocate the linear algebra interpolation array for this
-       ! level (first count the nuber of nodes to be restricted on
-       ! level lvl)
-       n = 0
-       do nn=1, nDom
-          n = n + sizes(1, nn, lvl) * sizes(2, nn, lvl) * sizes(3, nn, lvl)
-       end do
-       allocate(interps(lvl)%arr(n))
-       
-       ! Interps uses the local ordering so we always start at 0.
-       n = 0
-       count = 0
-       ! Loop over the blocks
-       do nn=1, nDom
-
-          ! Sizes for next level
-          nnx = sizes(1, nn, lvl+1)
-          nny = sizes(2, nn, lvl+1)
-          nnz = sizes(3, nn, lvl+1)
-
-          ! Loop over the sizes of this level
-          do k=1, sizes(3, nn, lvl)
-             do j=1, sizes(2, nn, lvl)
-                do i=1, sizes(1, nn, lvl)
-
-                   ! These are the indices on the next level
-                   ii = (i-1)/2 + 1
-                   jj = (j-1)/2 + 1
-                   kk = (k-1)/2 + 1
-
-                   coarseIndex = (kk-1)*nnx*nny + (jj-1)*nnx + ii + count 
-
-                   ! Linear algebra info
-                   n = n + 1
-                   interps(lvl)%arr(n) = coarseIndex
-
-                   ! Block-basd info, 
+            end do
+
+            if (lvl > 1) then
+                do nn = 1, nDom
+                    do iDim = 1, 3
+                        curSize = sizes(iDim, nn, lvl - 1)
+                        if (curSize == 1) then
+                            sizes(iDim, nn, lvl) = curSize
+                        else if (mod(curSize, 2) == 0) then
+                            ! Evenly divides
+                            sizes(iDim, nn, lvl) = curSize / 2
+                        else if (mod(curSize, 2) == 1) then
+                            ! Odd, so have an extra
+                            sizes(iDim, nn, lvl) = (curSize - 1) / 2 + 1
+                        end if
+                    end do
                 end do
-             end do
-          end do
-          count = count + nnx*nny*nnz
-       end do
-
-       ! We are not done yet; We need to fill in the block-based
-       ! coarse indices and then do a halo-exchange on it so procs
-       ! know where to put their off-proc entries on the coarser
-       ! grids.
-
-       ! Loop over the blocks. 
-       n = 0
-       ii = 0
-
-       call VecGetArrayF90(indexVec, indPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       do nn=1, nDom
-          call setPointers(nn, 1, 1)
-          ii = ii + (kb+1)*(jb+1)*(ib+1) ! Count maximum double halos
-          ! Loop over the sizes of this level
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-                 
-                   n = n + 1
-
-                   ! Coarse Index: This is the first coarse index for
-                   ! the current finest grid element.
-                   coarseIndex = interps(1)%arr(n)
-
-                   ! For levels higher than 2, we need to trace
-                   ! through the subsequent levels to find what the
-                   ! coarse grid index is for level lvl. 
-                   do nextLvl=2, lvl
-                      coarseIndex = interps(nextLvl)%arr(coarseIndex)
-                   end do
-
-                   ! Now we set the lvl coarse grid index into the
-                   ! coarseIndices array. Note that we make the index
-                   ! global here by adding procStarts. We also
-                   ! subtract 1 to make it zero-based for petsc. 
-                   coarseIndex = coarseIndex + procStarts(lvl+1) - 1
-                   coarseIndices(nn, lvl)%arr(i, j, k) = coarseIndex
-                   
-                   ! Now put that index into the array
-                   indPtr(n) = transfer(int(coarseIndex, 8), one)
-                   
+            end if
+
+            ! Get the number of cells on this proc:
+            i = 0
+            do nn = 1, nDom
+                i = i + sizes(1, nn, lvl) * sizes(2, nn, lvl) * sizes(3, nn, lvl)
+            end do
+
+            call MPI_Allgather(i, 1, mpi_integer, offsets(1:nProc), 1, mpi_integer, &
+                               adflow_comm_world, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Prefix sum
+            offsets(0) = 0
+            do i = 1, nProc
+                offsets(i) = offsets(i - 1) + offsets(i)
+            end do
+
+            ! Get my starting index.
+            procStarts(lvl) = offsets(myid)
+
+        end do
+
+        ! Now setup the interp and the coarse-grid indices. Note that this
+        ! loop is the number of levels MINUS 1 since we are generating the
+        ! interpolations between levels and we already have the 1st level
+        ! of the  coarseIndices
+
+        call VecCreateMPI(adflow_comm_world, nCellsLocal(1_intType), &
+                          PETSC_DETERMINE, indexVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        do lvl = 1, agmgLevels - 1
+
+            do nn = 1, nDom
+                call setPointers(nn, 1, 1)
+                allocate (coarseIndices(nn, lvl)%arr(0:ib, 0:jb, 0:kb))
+                allocate (coarseOversetIndices(nn, lvl)%arr(8, 0:ib, 0:jb, 0:kb))
+                coarseIndices(nn, lvl)%arr = -1
+                coarseOversetIndices(nn, lvl)%arr = -1
+            end do
+
+            ! Allocate the linear algebra interpolation array for this
+            ! level (first count the nuber of nodes to be restricted on
+            ! level lvl)
+            n = 0
+            do nn = 1, nDom
+                n = n + sizes(1, nn, lvl) * sizes(2, nn, lvl) * sizes(3, nn, lvl)
+            end do
+            allocate (interps(lvl)%arr(n))
+
+            ! Interps uses the local ordering so we always start at 0.
+            n = 0
+            count = 0
+            ! Loop over the blocks
+            do nn = 1, nDom
+
+                ! Sizes for next level
+                nnx = sizes(1, nn, lvl + 1)
+                nny = sizes(2, nn, lvl + 1)
+                nnz = sizes(3, nn, lvl + 1)
+
+                ! Loop over the sizes of this level
+                do k = 1, sizes(3, nn, lvl)
+                    do j = 1, sizes(2, nn, lvl)
+                        do i = 1, sizes(1, nn, lvl)
+
+                            ! These are the indices on the next level
+                            ii = (i - 1) / 2 + 1
+                            jj = (j - 1) / 2 + 1
+                            kk = (k - 1) / 2 + 1
+
+                            coarseIndex = (kk - 1) * nnx * nny + (jj - 1) * nnx + ii + count
+
+                            ! Linear algebra info
+                            n = n + 1
+                            interps(lvl)%arr(n) = coarseIndex
+
+                            ! Block-basd info,
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-       call VecRestoreArrayF90(indexVec, indPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       allocate(indicesToGet(8*ii))
-       ii = 0
-       do nn=1, nDom
-          call setPointers(nn, 1, 1)
-          do k=0, kb
-             do j=0, jb
-                do i=0, ib
-                   ! If this cell is is an interpolated cell, record
-                   ! the indices we need to get
-                   if (iblank(i, j, k) == -1) then 
-                      do m=1, 8
-                         if (flowDoms(nn, 1, 1)%gInd(m, i, j, k) >= 0) then 
-                            ii = ii + 1
-                            indicesToGet(ii) = flowDoms(nn, 1, 1)%gInd(m, i, j, k)
-                         end if
-                      end do
-                   end if
+                count = count + nnx * nny * nnz
+            end do
+
+            ! We are not done yet; We need to fill in the block-based
+            ! coarse indices and then do a halo-exchange on it so procs
+            ! know where to put their off-proc entries on the coarser
+            ! grids.
+
+            ! Loop over the blocks.
+            n = 0
+            ii = 0
+
+            call VecGetArrayF90(indexVec, indPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            do nn = 1, nDom
+                call setPointers(nn, 1, 1)
+                ii = ii + (kb + 1) * (jb + 1) * (ib + 1) ! Count maximum double halos
+                ! Loop over the sizes of this level
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+
+                            n = n + 1
+
+                            ! Coarse Index: This is the first coarse index for
+                            ! the current finest grid element.
+                            coarseIndex = interps(1)%arr(n)
+
+                            ! For levels higher than 2, we need to trace
+                            ! through the subsequent levels to find what the
+                            ! coarse grid index is for level lvl.
+                            do nextLvl = 2, lvl
+                                coarseIndex = interps(nextLvl)%arr(coarseIndex)
+                            end do
+
+                            ! Now we set the lvl coarse grid index into the
+                            ! coarseIndices array. Note that we make the index
+                            ! global here by adding procStarts. We also
+                            ! subtract 1 to make it zero-based for petsc.
+                            coarseIndex = coarseIndex + procStarts(lvl + 1) - 1
+                            coarseIndices(nn, lvl)%arr(i, j, k) = coarseIndex
+
+                            ! Now put that index into the array
+                            indPtr(n) = transfer(int(coarseIndex, 8), one)
+
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-
-       ! Now create the scatter to retrieve the "indicesToGet" from
-       ! the indexVec. Petsc is always annonying for this. 
-       
-       call ISCreateGeneral(adflow_comm_world, ii, indicesToGet(1:ii), PETSC_COPY_VALUES, &
-            IS1, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-       deallocate(indicesToGet)
-
-       ! Create array to dump the result
-       call VecCreateMPI(adflow_comm_world, ii, PETSC_DETERMINE, recvVec, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Create the scatter
-       call VecScatterCreate(indexVec, IS1, recvVec, PETSC_NULL_IS, scat, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Do the actual scatter
-       call VecScatterBegin(scat, indexVec, recvVec, INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-       call VecScatterEnd(scat, indexVec, recvVec, INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecGetArrayF90(recvVec, indPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Loop back over at set the coarse indices
-       ii = 0
-       do nn=1, nDom
-          call setPointers(nn, 1, 1)
-          ! Loop over the sizes of this level
-          do k=0, kb
-             do j=0, jb
-                do i=0, ib
-                   if (iblank(i, j, k) == -1) then 
-                      do m=1, 8
-                         if (flowDoms(nn, 1, 1)%gInd(m, i, j, k) >= 0) then 
-                            ii = ii + 1
-                            coarseOversetIndices(nn, lvl)%arr(m, i, j, k) = & 
-                                 int(transfer(indPtr(ii), 1_8), intType)
-                         end if
-                      end do
-                   end if
+            end do
+            call VecRestoreArrayF90(indexVec, indPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            allocate (indicesToGet(8 * ii))
+            ii = 0
+            do nn = 1, nDom
+                call setPointers(nn, 1, 1)
+                do k = 0, kb
+                    do j = 0, jb
+                        do i = 0, ib
+                            ! If this cell is is an interpolated cell, record
+                            ! the indices we need to get
+                            if (iblank(i, j, k) == -1) then
+                                do m = 1, 8
+                                    if (flowDoms(nn, 1, 1)%gInd(m, i, j, k) >= 0) then
+                                        ii = ii + 1
+                                        indicesToGet(ii) = flowDoms(nn, 1, 1)%gInd(m, i, j, k)
+                                    end if
+                                end do
+                            end if
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-
-       call VecRestoreArrayF90(recvVec, indPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecScatterDestroy(scat, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecDestroy(recvVec, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call ISDestroy(IS1, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Now we need to exchange the coarse grid indices. Set
-       ! pointers to the coarseIndices arrays and then call the
-       ! generic integer halo exchange. 
-       level = 1
-       sps = 1
-       nVar= 1
-          
-       do nn=1, nDom
-          flowDoms(nn, level, sps)%intCommVars(1)%var => coarseIndices(nn, lvl)%arr(:, :, :)
-       end do
-       
-       call whalo1to1IntGeneric(nVar, level, sps,  commPatternCell_2nd, internalCell_2nd)
-      
-    end do
-
-    call VecDestroy(indexVec, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Next we need to setup the matrices and vectors
-
-    do lvl=1, agmgLevels
-
-
-       call KSPCreate(adflow_comm_world, kspLevels(lvl), ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-
-       ! Create the coarse grid.
-       if (lvl >= 2) then
-          ncoarse = maxval(interps(lvl-1)%arr)
-          
-          allocate(nnzOn(1:ncoarse), nnzOff(1:ncoarse))
-          nnzOn = 14
-          nnzOff = 7
-          call MatCreateBAIJ(adflow_comm_world, bs, ncoarse*bs, ncoarse*bs, &
-               PETSC_DETERMINE, PETSC_DETERMINE, 0, nnzOn, 0, nnzOff, &
-               A(lvl), ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-          deallocate(nnzOn, nnzOff)
-          
-          call MatSetOption(A(lvl), MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-          
-          call MatSetOption(A(lvl), MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-          
-          call MatCreateVecs(A(lvl), res(lvl), sol(lvl), ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-       else
-          call vecCreateMPI(adflow_comm_world, nCell*bs, PETSC_DETERMINE, res(lvl), ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-          call VecDuplicate(res(lvl), sol(lvl), ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-          
-       end if
-       call VecDuplicate(res(lvl), rhs(lvl), ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-       
-       call VecDuplicate(res(lvl), sol2(lvl), ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-    end do
-    agmgSetup = .True.
-
-  end subroutine setupAGMG
-
-  subroutine destroyAGMG
-
-    integer(kind=intType) :: lvl, ierr, i, j
-    if (agmgSetup) then 
-       do lvl=1, agmgLevels
-          ! Destroy all of our vectors/matrices
-          if (lvl > 1) then 
-             call MatDestroy(A(lvl), ierr)
-             call EChk(ierr, __FILE__, __LINE__)
-          end if
-          
-          call VecDestroy(res(lvl), ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-          
-          call VecDestroy(sol(lvl), ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-          
-          call VecDestroy(sol2(lvl), ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-          
-          call VecDestroy(rhs(lvl), ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-          call KSPDestroy(kspLevels(lvl), ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-       end do
-
-       deallocate(A, res, rhs, sol, sol2, kspLevels)
-       deallocate(coarseIndices, coarseOversetIndices, interps)
-       agmgSetup = .False.
-       
-    end if
-  end subroutine destroyAGMG
-
-  subroutine applyShellPC(pc, x, y, ierr)
-    use communication
-    ! Input/Output
-    PC      pc
-    Vec     x, y
-    integer(kind=intType) ierr
-
-    ! Working
-    integer(kind=intType) :: i
-
-    if (agmgLevels > 1) then 
-
-       if (agmgOuterIts == 1) then 
-          call MGPreCon(x, y, 1) ! y is the new approximate sol
-       else
-          
-          call VecCopy(x, rhs(1), ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-          call VecSet(y, zero, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-
-          do i=1, agmgOuterIts
-
-             call MGPreCon(rhs(1), sol(1), 1) ! y is the new approximate sol
-
-             ! Update the solution
-             call VecAYPX(y, one, sol(1), ierr)
-             call EChk(ierr, __FILE__, __LINE__)
-
-             if (i < agmgOuterIts) then 
-
-                ! Compute new residual
-                call matMult(fineMat, y, rhs(1), ierr)
+            end do
+
+            ! Now create the scatter to retrieve the "indicesToGet" from
+            ! the indexVec. Petsc is always annonying for this.
+
+            call ISCreateGeneral(adflow_comm_world, ii, indicesToGet(1:ii), PETSC_COPY_VALUES, &
+                                 IS1, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+            deallocate (indicesToGet)
+
+            ! Create array to dump the result
+            call VecCreateMPI(adflow_comm_world, ii, PETSC_DETERMINE, recvVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Create the scatter
+            call VecScatterCreate(indexVec, IS1, recvVec, PETSC_NULL_IS, scat, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Do the actual scatter
+            call VecScatterBegin(scat, indexVec, recvVec, INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+            call VecScatterEnd(scat, indexVec, recvVec, INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecGetArrayF90(recvVec, indPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Loop back over at set the coarse indices
+            ii = 0
+            do nn = 1, nDom
+                call setPointers(nn, 1, 1)
+                ! Loop over the sizes of this level
+                do k = 0, kb
+                    do j = 0, jb
+                        do i = 0, ib
+                            if (iblank(i, j, k) == -1) then
+                                do m = 1, 8
+                                    if (flowDoms(nn, 1, 1)%gInd(m, i, j, k) >= 0) then
+                                        ii = ii + 1
+                                        coarseOversetIndices(nn, lvl)%arr(m, i, j, k) = &
+                                            int(transfer(indPtr(ii), 1_8), intType)
+                                    end if
+                                end do
+                            end if
+                        end do
+                    end do
+                end do
+            end do
+
+            call VecRestoreArrayF90(recvVec, indPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecScatterDestroy(scat, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecDestroy(recvVec, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call ISDestroy(IS1, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Now we need to exchange the coarse grid indices. Set
+            ! pointers to the coarseIndices arrays and then call the
+            ! generic integer halo exchange.
+            level = 1
+            sps = 1
+            nVar = 1
+
+            do nn = 1, nDom
+                flowDoms(nn, level, sps)%intCommVars(1)%var => coarseIndices(nn, lvl)%arr(:, :, :)
+            end do
+
+            call whalo1to1IntGeneric(nVar, level, sps, commPatternCell_2nd, internalCell_2nd)
+
+        end do
+
+        call VecDestroy(indexVec, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Next we need to setup the matrices and vectors
+
+        do lvl = 1, agmgLevels
+
+            call KSPCreate(adflow_comm_world, kspLevels(lvl), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Create the coarse grid.
+            if (lvl >= 2) then
+                ncoarse = maxval(interps(lvl - 1)%arr)
+
+                allocate (nnzOn(1:ncoarse), nnzOff(1:ncoarse))
+                nnzOn = 14
+                nnzOff = 7
+                call MatCreateBAIJ(adflow_comm_world, bs, ncoarse * bs, ncoarse * bs, &
+                                   PETSC_DETERMINE, PETSC_DETERMINE, 0, nnzOn, 0, nnzOff, &
+                                   A(lvl), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+                deallocate (nnzOn, nnzOff)
+
+                call MatSetOption(A(lvl), MAT_ROW_ORIENTED, PETSC_FALSE, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call MatSetOption(A(lvl), MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call MatCreateVecs(A(lvl), res(lvl), sol(lvl), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+            else
+                call vecCreateMPI(adflow_comm_world, nCell * bs, PETSC_DETERMINE, res(lvl), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call VecDuplicate(res(lvl), sol(lvl), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+            end if
+            call VecDuplicate(res(lvl), rhs(lvl), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecDuplicate(res(lvl), sol2(lvl), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+        end do
+        agmgSetup = .True.
+
+    end subroutine setupAGMG
+
+    subroutine destroyAGMG
+
+        integer(kind=intType) :: lvl, ierr, i, j
+        if (agmgSetup) then
+            do lvl = 1, agmgLevels
+                ! Destroy all of our vectors/matrices
+                if (lvl > 1) then
+                    call MatDestroy(A(lvl), ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end if
+
+                call VecDestroy(res(lvl), ierr)
                 call EChk(ierr, __FILE__, __LINE__)
-                       
-                call VecAYPX(rhs(1), -one, x, ierr)
+
+                call VecDestroy(sol(lvl), ierr)
                 call EChk(ierr, __FILE__, __LINE__)
 
-             end if
-          end do
-       end if
-    else 
-      
-       call KSPSolve(kspLevels(1), x, y, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    end if
-
-  end subroutine ApplyShellPC
-
-  subroutine setupShellPC(pc, ierr)
-
-    use communication, only : adflow_comm_world
-    use inputAdjoint
-    ! Input/Output
-    PC  pc
-    integer(kind=intTYpe) :: ierr
-
-    ! Working
-    PC  globalPC, subpc
-    KSP subksp
-    integer(kind=intType) :: lvl
-    integer(kind=intType) :: nlocal, first
+                call VecDestroy(sol2(lvl), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call VecDestroy(rhs(lvl), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call KSPDestroy(kspLevels(lvl), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+
+            deallocate (A, res, rhs, sol, sol2, kspLevels)
+            deallocate (coarseIndices, coarseOversetIndices, interps)
+            agmgSetup = .False.
+
+        end if
+    end subroutine destroyAGMG
+
+    subroutine applyShellPC(pc, x, y, ierr)
+        use communication
+        ! Input/Output
+        PC pc
+        Vec x, y
+        integer(kind=intType) ierr
+
+        ! Working
+        integer(kind=intType) :: i
+
+        if (agmgLevels > 1) then
+
+            if (agmgOuterIts == 1) then
+                call MGPreCon(x, y, 1) ! y is the new approximate sol
+            else
+
+                call VecCopy(x, rhs(1), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call VecSet(y, zero, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                do i = 1, agmgOuterIts
+
+                    call MGPreCon(rhs(1), sol(1), 1) ! y is the new approximate sol
+
+                    ! Update the solution
+                    call VecAYPX(y, one, sol(1), ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+
+                    if (i < agmgOuterIts) then
+
+                        ! Compute new residual
+                        call matMult(fineMat, y, rhs(1), ierr)
+                        call EChk(ierr, __FILE__, __LINE__)
+
+                        call VecAYPX(rhs(1), -one, x, ierr)
+                        call EChk(ierr, __FILE__, __LINE__)
+
+                    end if
+                end do
+            end if
+        else
+
+            call KSPSolve(kspLevels(1), x, y, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end if
+
+    end subroutine ApplyShellPC
+
+    subroutine setupShellPC(pc, ierr)
+
+        use communication, only: adflow_comm_world
+        use inputAdjoint
+        ! Input/Output
+        PC pc
+        integer(kind=intTYpe) :: ierr
+
+        ! Working
+        PC globalPC, subpc
+        KSP subksp
+        integer(kind=intType) :: lvl
+        integer(kind=intType) :: nlocal, first
+
+        ! Note that this has to be updated to work in parallel!
+
+        do lvl = 1, agmgLevels
+
+            if (lvl == 1) then
+                call KSPSetOperators(kspLevels(lvl), fineMat, fineMat, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            else
+                call KSPSetOperators(kspLevels(lvl), A(lvl), A(lvl), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end if
 
-    ! Note that this has to be updated to work in parallel! 
-    
-    do lvl=1, agmgLevels
-       
-       if (lvl == 1) then 
-          call KSPSetOperators(kspLevels(lvl), fineMat, fineMat, ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-       else
-          call KSPSetOperators(kspLevels(lvl), A(lvl), A(lvl), ierr)
-          call EChk(ierr, __FILE__, __LINE__)
-       end if
-       
-       call kspsetnormtype(kspLevels(lvl), KSP_NORM_NONE, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call kspsetnormtype(kspLevels(lvl), KSP_NORM_NONE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call KSPSetType(kspLevels(lvl), 'richardson', ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call KSPSetType(kspLevels(lvl), 'richardson', ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call KSPSetTolerances(kspLevels(lvl), PETSC_DEFAULT_REAL, &
-            PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, &
-            agmgNSmooth, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call KSPSetTolerances(kspLevels(lvl), PETSC_DEFAULT_REAL, &
+                                  PETSC_DEFAULT_REAL, PETSC_DEFAULT_REAL, &
+                                  agmgNSmooth, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call KSPgetPC(kspLevels(lvl), globalPC, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call KSPgetPC(kspLevels(lvl), globalPC, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call PCSetType(globalPC, 'asm', ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call PCSetType(globalPC, 'asm', ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call PCASMSetOverlap(globalPC, agmgASMOverlap, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call PCASMSetOverlap(globalPC, agmgASMOverlap, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       !Setup the main ksp context before extracting the subdomains
-       call KSPSetUp(kspLevels(lvl), ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-       
-       ! Extract the ksp objects for each subdomain
-       call PCASMGetSubKSP(globalPC, nlocal, first, subksp, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            !Setup the main ksp context before extracting the subdomains
+            call KSPSetUp(kspLevels(lvl), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call KSPSetType(subksp, 'preonly', ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Extract the ksp objects for each subdomain
+            call PCASMGetSubKSP(globalPC, nlocal, first, subksp, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Extract the preconditioner for subksp object.
-       call KSPGetPC(subksp, subpc, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            call KSPSetType(subksp, 'preonly', ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! The subpc type will almost always be ILU
-       call PCSetType(subpc, 'ilu', ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! Extract the preconditioner for subksp object.
+            call KSPGetPC(subksp, subpc, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! ! ! Setup the matrix ordering for the subpc object:
-       call PCFactorSetMatOrderingtype(subpc, agmgMatrixOrdering, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+            ! The subpc type will almost always be ILU
+            call PCSetType(subpc, 'ilu', ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Set the ILU parameters
-       call PCFactorSetLevels(subpc, agmgFillLevel , ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-    end do
+            ! ! ! Setup the matrix ordering for the subpc object:
+            call PCFactorSetMatOrderingtype(subpc, agmgMatrixOrdering, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine setupShellPC
+            ! Set the ILU parameters
+            call PCFactorSetLevels(subpc, agmgFillLevel, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
 
-  subroutine destroyShellPC(pc, ierr)
+    end subroutine setupShellPC
 
-    ! Input/Ouput
-    PC      pc
-    integer(kind=intType) :: ierr
+    subroutine destroyShellPC(pc, ierr)
 
-    ! Working
-    integer(kind=intType) :: lvl
+        ! Input/Ouput
+        PC pc
+        integer(kind=intType) :: ierr
 
+        ! Working
+        integer(kind=intType) :: lvl
 
-  end subroutine destroyShellPC
+    end subroutine destroyShellPC
 
-  subroutine restrictVec(x, y, interp)
+    subroutine restrictVec(x, y, interp)
 
-    ! Input/Output
-    Vec x, y
-    integer(kind=intType), dimension(:), intent(in) :: interp
+        ! Input/Output
+        Vec x, y
+        integer(kind=intType), dimension(:), intent(in) :: interp
 
-    ! Working
-    real(kind=realType), pointer :: yPtr(:), xPtr(:)
-    real(kind=realType), pointer :: xPtrBlk(:, :), yPtrBlk(:, :)
-    integer(kind=intType) :: ierr, n, i, j
+        ! Working
+        real(kind=realType), pointer :: yPtr(:), xPtr(:)
+        real(kind=realType), pointer :: xPtrBlk(:, :), yPtrBlk(:, :)
+        integer(kind=intType) :: ierr, n, i, j
 
-    ! Restrict x -> y
-    call VecGetArrayF90(x, xPtr, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Restrict x -> y
+        call VecGetArrayF90(x, xPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecGetArrayF90(y, yPtr, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecGetArrayF90(y, yPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Number of block nodes
-    n = size(interp)
+        ! Number of block nodes
+        n = size(interp)
 
-    ! Convience block-based pointers
-    xPtrBlk(1:bs, 1:size(xPtr)/bs) => xPtr
-    yPtrBlk(1:bs, 1:size(yPtr)/bs) => yPtr
+        ! Convience block-based pointers
+        xPtrBlk(1:bs, 1:size(xPtr) / bs) => xPtr
+        yPtrBlk(1:bs, 1:size(yPtr) / bs) => yPtr
 
-    ! Zero the output array
-    yPtr = zero
+        ! Zero the output array
+        yPtr = zero
 
-    ! Loop over the interpolation array, summing into the coarse arary
-    do i=1, n
-       j = interp(i)
-       yPtrBlk(:, j) = yPtrBlk(:, j) + xPtrBlk(:, i)
-    end do
+        ! Loop over the interpolation array, summing into the coarse arary
+        do i = 1, n
+            j = interp(i)
+            yPtrBlk(:, j) = yPtrBlk(:, j) + xPtrBlk(:, i)
+        end do
 
-    call VecRestoreArrayF90(x, xPtr, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecRestoreArrayF90(x, xPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecRestoreArrayF90(y, yPtr, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecRestoreArrayF90(y, yPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine restrictVec
+    end subroutine restrictVec
 
-  subroutine prolongVec(x, y, interp)
+    subroutine prolongVec(x, y, interp)
 
-    ! Input/Output
-    Vec x, y
-    integer(kind=intType), dimension(:), intent(in) :: interp
+        ! Input/Output
+        Vec x, y
+        integer(kind=intType), dimension(:), intent(in) :: interp
 
-    ! Working
-    real(kind=realType), pointer :: yPtr(:), xPtr(:)
-    real(kind=realType), pointer :: xPtrBlk(:, :), yPtrBlk(:, :)
-    integer(kind=intType) :: ierr, n, i, j
+        ! Working
+        real(kind=realType), pointer :: yPtr(:), xPtr(:)
+        real(kind=realType), pointer :: xPtrBlk(:, :), yPtrBlk(:, :)
+        integer(kind=intType) :: ierr, n, i, j
 
-    ! Prolong vector x -> y
+        ! Prolong vector x -> y
 
-    call VecGetArrayF90(x, xPtr, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecGetArrayF90(x, xPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecGetArrayF90(y, yPtr, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecGetArrayF90(y, yPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Number of block nodes
-    n = size(interp)
+        ! Number of block nodes
+        n = size(interp)
 
-    ! Convience pointers
-    yPtrBlk(1:bs, 1:size(yPtr)/bs) => yPtr
-    xPtrBlk(1:bs, 1:size(xPtr)/bs) => xPtr
+        ! Convience pointers
+        yPtrBlk(1:bs, 1:size(yPtr) / bs) => yPtr
+        xPtrBlk(1:bs, 1:size(xPtr) / bs) => xPtr
 
-    ! Loop over the interpoaltion array, injecting into the fine array
-    do i=1, n
-       j = interp(i)
-       yPtrBlk(:, i) = xPtrBlk(:, j)
-    end do
+        ! Loop over the interpoaltion array, injecting into the fine array
+        do i = 1, n
+            j = interp(i)
+            yPtrBlk(:, i) = xPtrBlk(:, j)
+        end do
 
-    call VecRestoreArrayF90(x, xPtr, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecRestoreArrayF90(x, xPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call VecRestoreArrayF90(y, yPtr, ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecRestoreArrayF90(y, yPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine prolongVec
+    end subroutine prolongVec
 
-  recursive subroutine MGPreCon(r, y, k)
+    recursive subroutine MGPreCon(r, y, k)
 
-    ! r is the residual and y is the approximate solution
-    
-    ! Input/Output
-    Vec y, r
-    integer(kind=intType), intent(in) :: k
+        ! r is the residual and y is the approximate solution
 
-    ! Working
-    integer(kind=intType) ::  ierr, i
+        ! Input/Output
+        Vec y, r
+        integer(kind=intType), intent(in) :: k
 
-    ! Setp 3: Restrict the residual
-    call restrictVec(r, rhs(k+1), interps(k)%arr)
+        ! Working
+        integer(kind=intType) :: ierr, i
 
-    if (k == agmglevels-1) then 
-       ! The next level down is the bottom...break the recursion by solving:
-       call kspSolve(KSPLevels(k+1), rhs(k+1), sol(k+1), ierr)
-       call EChk(ierr, __FILE__, __LINE__)
+        ! Setp 3: Restrict the residual
+        call restrictVec(r, rhs(k + 1), interps(k)%arr)
 
-    else
+        if (k == agmglevels - 1) then
+            ! The next level down is the bottom...break the recursion by solving:
+            call kspSolve(KSPLevels(k + 1), rhs(k + 1), sol(k + 1), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Step 4: Call the next level down recursively
-       call MGPreCon(rhs(k+1), sol(k+1), k+1)
-    end if
+        else
 
-    call prolongVec(sol(k+1), y, interps(k)%arr)
+            ! Step 4: Call the next level down recursively
+            call MGPreCon(rhs(k + 1), sol(k + 1), k + 1)
+        end if
 
-    ! ! Step 6: Compute the new residual:
-    if (k==1) then
-       call matMult(fineMat, y, res(k), ierr)  ! res = A(i) * z1(k)
-    else
-       call matMult(A(k), y, res(k), ierr)  ! res = A(i) * z1(k)
-    end if
-    call EChk(ierr, __FILE__, __LINE__)
-    
-    call VecAYPX(res(k), -one, r, ierr)    ! res = -res + r
-    call EChk(ierr, __FILE__, __LINE__)
+        call prolongVec(sol(k + 1), y, interps(k)%arr)
 
+        ! ! Step 6: Compute the new residual:
+        if (k == 1) then
+            call matMult(fineMat, y, res(k), ierr)  ! res = A(i) * z1(k)
+        else
+            call matMult(A(k), y, res(k), ierr)  ! res = A(i) * z1(k)
+        end if
+        call EChk(ierr, __FILE__, __LINE__)
 
-    ! Step 7: Relax using the smoother
-    call kspSolve(KSPLevels(k), res(k), sol2(k), ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        call VecAYPX(res(k), -one, r, ierr)    ! res = -res + r
+        call EChk(ierr, __FILE__, __LINE__)
 
-    call vecAXPY(y, one, sol2(k), ierr)
-    call EChk(ierr, __FILE__, __LINE__)
+        ! Step 7: Relax using the smoother
+        call kspSolve(KSPLevels(k), res(k), sol2(k), ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine MGPreCon
+        call vecAXPY(y, one, sol2(k), ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
+    end subroutine MGPreCon
 
 end module agmg
diff --git a/src/solver/fluxes.F90 b/src/solver/fluxes.F90
index 0da8203e0..b05d8ec61 100644
--- a/src/solver/fluxes.F90
+++ b/src/solver/fluxes.F90
@@ -1,5674 +1,5667 @@
 module fluxes
 
 contains
-  subroutine inviscidCentralFlux
-    !
-    !       inviscidCentralFlux computes the Euler fluxes using a central
-    !       discretization for a given block. Therefore it is assumed that
-    !       the pointers in block pointer already point to the correct
-    !       block on the correct multigrid level.
-    !
-    use constants
-    use blockPointers, only : nx, il, ie, ny, jl, je, nz, kl, ke, spectralSol, &
-         w, si, sj, sk, dw, porI, porJ, porK, &
-         indFamilyI, indFamilyJ, indFamilyK, p, sFaceI, sFaceJ, sFaceK, nbkglobal, &
-         addgridVelocities, blockIsMoving, vol, factFamilyI, factFamilyJ, factFamilyK
-    use cgnsGrid, only : cgnsDoms, massFlowFamilyInv
-    use flowVarRefState, only : timeRef
-    use inputPhysics, only : equationMode
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ind, ii
-    real(kind=realType) :: qsp, qsm, rqsp, rqsm, porVel, porFlux
-    real(kind=realType) :: pa, fs, sFace, vnp, vnm
-    real(kind=realType) :: wwx, wwy, wwz, rvol
-
-
-    continue
-    !$AD CHECKPOINT-START
-    ! Initialize sFace to zero. This value will be used if the
-    ! block is not moving.
-    sFace = zero
-    !
-    !       Advective fluxes in the i-direction.
-    !
+    subroutine inviscidCentralFlux
+        !
+        !       inviscidCentralFlux computes the Euler fluxes using a central
+        !       discretization for a given block. Therefore it is assumed that
+        !       the pointers in block pointer already point to the correct
+        !       block on the correct multigrid level.
+        !
+        use constants
+        use blockPointers, only: nx, il, ie, ny, jl, je, nz, kl, ke, spectralSol, &
+                                 w, si, sj, sk, dw, porI, porJ, porK, &
+                                 indFamilyI, indFamilyJ, indFamilyK, p, sFaceI, sFaceJ, sFaceK, nbkglobal, &
+                                 addgridVelocities, blockIsMoving, vol, factFamilyI, factFamilyJ, factFamilyK
+        use cgnsGrid, only: cgnsDoms, massFlowFamilyInv
+        use flowVarRefState, only: timeRef
+        use inputPhysics, only: equationMode
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ind, ii
+        real(kind=realType) :: qsp, qsm, rqsp, rqsm, porVel, porFlux
+        real(kind=realType) :: pa, fs, sFace, vnp, vnm
+        real(kind=realType) :: wwx, wwy, wwz, rvol
+
+        continue
+        !$AD CHECKPOINT-START
+        ! Initialize sFace to zero. This value will be used if the
+        ! block is not moving.
+        sFace = zero
+        !
+        !       Advective fluxes in the i-direction.
+        !
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,il*ny*nz-1
-       i = mod(ii, il) + 1
-       j = mod(ii/il, ny) + 2
-       k = ii/(il*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, il * ny * nz - 1
+            i = mod(ii, il) + 1
+            j = mod(ii / il, ny) + 2
+            k = ii / (il * ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=1, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 1, il
 #endif
-                ! Set the dot product of the grid velocity and the
-                ! normal in i-direction for a moving face.
-
-                if( addGridVelocities ) sFace = sFaceI(i,j,k)
-
-                ! Compute the normal velocities of the left and right state.
-
-                vnp = w(i+1,j,k,ivx)*sI(i,j,k,1) &
-                     + w(i+1,j,k,ivy)*sI(i,j,k,2) &
-                     + w(i+1,j,k,ivz)*sI(i,j,k,3)
-                vnm = w(i,  j,k,ivx)*sI(i,j,k,1) &
-                     + w(i,  j,k,ivy)*sI(i,j,k,2) &
-                     + w(i,  j,k,ivz)*sI(i,j,k,3)
-                ! Set the values of the porosities for this face.
-                ! porVel defines the porosity w.r.t. velocity;
-                ! porFlux defines the porosity w.r.t. the entire flux.
-                ! The latter is only zero for a discontinuous block
-                ! boundary that must be treated conservatively.
-                ! The default value of porFlux is 0.5, such that the
-                ! correct central flux is scattered to both cells.
-                ! In case of a boundFlux the normal velocity is set
-                ! to sFace.
-
-                porVel  = one
-                porFlux = half
-                if(porI(i,j,k) == noFlux)    porFlux = zero
-                if(porI(i,j,k) == boundFlux) then
-                   porVel = zero
-                   vnp    = sFace
-                   vnm    = sFace
-                endif
-
-                ! Incorporate porFlux in porVel.
-
-                porVel = porVel*porFlux
-
-                ! Compute the normal velocities relative to the grid for
-                ! the face as well as the mass fluxes.
-
-                qsp = (vnp -sFace)*porVel
-                qsm = (vnm -sFace)*porVel
-
-                rqsp = qsp*w(i+1,j,k,irho)
-                rqsm = qsm*w(i,  j,k,irho)
-
-                ! Compute the sum of the pressure multiplied by porFlux.
-                ! For the default value of porFlux, 0.5, this leads to
-                ! the average pressure.
-
-                pa = porFlux*(p(i+1,j,k) + p(i,j,k))
-
-                ! Compute the fluxes and scatter them to the cells
-                ! i,j,k and i+1,j,k. Store the density flux in the
-                ! mass flow of the appropriate sliding mesh interface.
-
-                fs = rqsp + rqsm
-                dw(i+1,j,k,irho) = dw(i+1,j,k,irho) - fs
-                dw(i,  j,k,irho) = dw(i,  j,k,irho) + fs
+                        ! Set the dot product of the grid velocity and the
+                        ! normal in i-direction for a moving face.
+
+                        if (addGridVelocities) sFace = sFaceI(i, j, k)
+
+                        ! Compute the normal velocities of the left and right state.
+
+                        vnp = w(i + 1, j, k, ivx) * sI(i, j, k, 1) &
+                              + w(i + 1, j, k, ivy) * sI(i, j, k, 2) &
+                              + w(i + 1, j, k, ivz) * sI(i, j, k, 3)
+                        vnm = w(i, j, k, ivx) * sI(i, j, k, 1) &
+                              + w(i, j, k, ivy) * sI(i, j, k, 2) &
+                              + w(i, j, k, ivz) * sI(i, j, k, 3)
+                        ! Set the values of the porosities for this face.
+                        ! porVel defines the porosity w.r.t. velocity;
+                        ! porFlux defines the porosity w.r.t. the entire flux.
+                        ! The latter is only zero for a discontinuous block
+                        ! boundary that must be treated conservatively.
+                        ! The default value of porFlux is 0.5, such that the
+                        ! correct central flux is scattered to both cells.
+                        ! In case of a boundFlux the normal velocity is set
+                        ! to sFace.
+
+                        porVel = one
+                        porFlux = half
+                        if (porI(i, j, k) == noFlux) porFlux = zero
+                        if (porI(i, j, k) == boundFlux) then
+                            porVel = zero
+                            vnp = sFace
+                            vnm = sFace
+                        end if
+
+                        ! Incorporate porFlux in porVel.
+
+                        porVel = porVel * porFlux
+
+                        ! Compute the normal velocities relative to the grid for
+                        ! the face as well as the mass fluxes.
+
+                        qsp = (vnp - sFace) * porVel
+                        qsm = (vnm - sFace) * porVel
+
+                        rqsp = qsp * w(i + 1, j, k, irho)
+                        rqsm = qsm * w(i, j, k, irho)
+
+                        ! Compute the sum of the pressure multiplied by porFlux.
+                        ! For the default value of porFlux, 0.5, this leads to
+                        ! the average pressure.
+
+                        pa = porFlux * (p(i + 1, j, k) + p(i, j, k))
+
+                        ! Compute the fluxes and scatter them to the cells
+                        ! i,j,k and i+1,j,k. Store the density flux in the
+                        ! mass flow of the appropriate sliding mesh interface.
+
+                        fs = rqsp + rqsm
+                        dw(i + 1, j, k, irho) = dw(i + 1, j, k, irho) - fs
+                        dw(i, j, k, irho) = dw(i, j, k, irho) + fs
 #ifndef USE_TAPENADE
-                ind = indFamilyI(i,j,k)
-                massFlowFamilyInv(ind,spectralSol) =       &
-                     massFlowFamilyInv(ind,spectralSol) &
-                     + factFamilyI(i,j,k)*fs
+                        ind = indFamilyI(i, j, k)
+                        massFlowFamilyInv(ind, spectralSol) = &
+                            massFlowFamilyInv(ind, spectralSol) &
+                            + factFamilyI(i, j, k) * fs
 #endif
-                fs = rqsp*w(i+1,j,k,ivx) + rqsm*w(i,j,k,ivx) &
-                     + pa*sI(i,j,k,1)
-                dw(i+1,j,k,imx) = dw(i+1,j,k,imx) - fs
-                dw(i,  j,k,imx) = dw(i,  j,k,imx) + fs
-
-                fs = rqsp*w(i+1,j,k,ivy) + rqsm*w(i,j,k,ivy) &
-                     + pa*sI(i,j,k,2)
-                dw(i+1,j,k,imy) = dw(i+1,j,k,imy) - fs
-                dw(i,  j,k,imy) = dw(i,  j,k,imy) + fs
-
-                fs = rqsp*w(i+1,j,k,ivz) + rqsm*w(i,j,k,ivz) &
-                     + pa*sI(i,j,k,3)
-                dw(i+1,j,k,imz) = dw(i+1,j,k,imz) - fs
-                dw(i,  j,k,imz) = dw(i,  j,k,imz) + fs
-
-                fs = qsp*w(i+1,j,k,irhoE) + qsm*w(i,j,k,irhoE) &
-                     + porFlux*(vnp*p(i+1,j,k) + vnm*p(i,j,k))
-                dw(i+1,j,k,irhoE) = dw(i+1,j,k,irhoE) - fs
-                dw(i,  j,k,irhoE) = dw(i,  j,k,irhoE) + fs
+                        fs = rqsp * w(i + 1, j, k, ivx) + rqsm * w(i, j, k, ivx) &
+                             + pa * sI(i, j, k, 1)
+                        dw(i + 1, j, k, imx) = dw(i + 1, j, k, imx) - fs
+                        dw(i, j, k, imx) = dw(i, j, k, imx) + fs
+
+                        fs = rqsp * w(i + 1, j, k, ivy) + rqsm * w(i, j, k, ivy) &
+                             + pa * sI(i, j, k, 2)
+                        dw(i + 1, j, k, imy) = dw(i + 1, j, k, imy) - fs
+                        dw(i, j, k, imy) = dw(i, j, k, imy) + fs
+
+                        fs = rqsp * w(i + 1, j, k, ivz) + rqsm * w(i, j, k, ivz) &
+                             + pa * sI(i, j, k, 3)
+                        dw(i + 1, j, k, imz) = dw(i + 1, j, k, imz) - fs
+                        dw(i, j, k, imz) = dw(i, j, k, imz) + fs
+
+                        fs = qsp * w(i + 1, j, k, irhoE) + qsm * w(i, j, k, irhoE) &
+                             + porFlux * (vnp * p(i + 1, j, k) + vnm * p(i, j, k))
+                        dw(i + 1, j, k, irhoE) = dw(i + 1, j, k, irhoE) - fs
+                        dw(i, j, k, irhoE) = dw(i, j, k, irhoE) + fs
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-    continue
-    !$AD CHECKPOINT-END
-
-    !
-    !       Advective fluxes in the j-direction.
-    !
-    continue
-    !$AD CHECKPOINT-START
-    sface = zero
+        continue
+        !$AD CHECKPOINT-END
+
+        !
+        !       Advective fluxes in the j-direction.
+        !
+        continue
+        !$AD CHECKPOINT-START
+        sface = zero
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*jl*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, jl) + 1
-       k = ii/(nx*jl) + 2
+        !$AD II-LOOP
+        do ii = 0, nx * jl * nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, jl) + 1
+            k = ii / (nx * jl) + 2
 #else
-       do k=2,kl
-          do j=1,jl
-             do i=2,il
+            do k = 2, kl
+                do j = 1, jl
+                    do i = 2, il
 #endif
-                ! Set the dot product of the grid velocity and the
-                ! normal in j-direction for a moving face.
+                        ! Set the dot product of the grid velocity and the
+                        ! normal in j-direction for a moving face.
 
-                if( addGridVelocities ) sFace = sFaceJ(i,j,k)
+                        if (addGridVelocities) sFace = sFaceJ(i, j, k)
 
-                ! Compute the normal velocities of the left and right state.
+                        ! Compute the normal velocities of the left and right state.
 
-                vnp = w(i,j+1,k,ivx)*sJ(i,j,k,1) &
-                     + w(i,j+1,k,ivy)*sJ(i,j,k,2) &
-                     + w(i,j+1,k,ivz)*sJ(i,j,k,3)
-                vnm = w(i,j,  k,ivx)*sJ(i,j,k,1) &
-                     + w(i,j,  k,ivy)*sJ(i,j,k,2) &
-                     + w(i,j,  k,ivz)*sJ(i,j,k,3)
+                        vnp = w(i, j + 1, k, ivx) * sJ(i, j, k, 1) &
+                              + w(i, j + 1, k, ivy) * sJ(i, j, k, 2) &
+                              + w(i, j + 1, k, ivz) * sJ(i, j, k, 3)
+                        vnm = w(i, j, k, ivx) * sJ(i, j, k, 1) &
+                              + w(i, j, k, ivy) * sJ(i, j, k, 2) &
+                              + w(i, j, k, ivz) * sJ(i, j, k, 3)
 
-                ! Set the values of the porosities for this face.
-                ! porVel defines the porosity w.r.t. velocity;
-                ! porFlux defines the porosity w.r.t. the entire flux.
-                ! The latter is only zero for a discontinuous block
-                ! boundary that must be treated conservatively.
-                ! The default value of porFlux is 0.5, such that the
-                ! correct central flux is scattered to both cells.
-                ! In case of a boundFlux the normal velocity is set
-                ! to sFace.
+                        ! Set the values of the porosities for this face.
+                        ! porVel defines the porosity w.r.t. velocity;
+                        ! porFlux defines the porosity w.r.t. the entire flux.
+                        ! The latter is only zero for a discontinuous block
+                        ! boundary that must be treated conservatively.
+                        ! The default value of porFlux is 0.5, such that the
+                        ! correct central flux is scattered to both cells.
+                        ! In case of a boundFlux the normal velocity is set
+                        ! to sFace.
 
-                porVel  = one
-                porFlux = half
-                if(porJ(i,j,k) == noFlux)    porFlux = zero
-                if(porJ(i,j,k) == boundFlux) then
-                   porVel = zero
-                   vnp    = sFace
-                   vnm    = sFace
-                endif
+                        porVel = one
+                        porFlux = half
+                        if (porJ(i, j, k) == noFlux) porFlux = zero
+                        if (porJ(i, j, k) == boundFlux) then
+                            porVel = zero
+                            vnp = sFace
+                            vnm = sFace
+                        end if
 
-                ! Incorporate porFlux in porVel.
+                        ! Incorporate porFlux in porVel.
 
-                porVel = porVel*porFlux
+                        porVel = porVel * porFlux
 
-                ! Compute the normal velocities for the face as well as the
-                ! mass fluxes.
+                        ! Compute the normal velocities for the face as well as the
+                        ! mass fluxes.
 
-                qsp = (vnp - sFace)*porVel
-                qsm = (vnm - sFace)*porVel
+                        qsp = (vnp - sFace) * porVel
+                        qsm = (vnm - sFace) * porVel
 
-                rqsp = qsp*w(i,j+1,k,irho)
-                rqsm = qsm*w(i,j,  k,irho)
+                        rqsp = qsp * w(i, j + 1, k, irho)
+                        rqsm = qsm * w(i, j, k, irho)
 
-                ! Compute the sum of the pressure multiplied by porFlux.
-                ! For the default value of porFlux, 0.5, this leads to
-                ! the average pressure.
+                        ! Compute the sum of the pressure multiplied by porFlux.
+                        ! For the default value of porFlux, 0.5, this leads to
+                        ! the average pressure.
 
-                pa = porFlux*(p(i,j+1,k) + p(i,j,k))
+                        pa = porFlux * (p(i, j + 1, k) + p(i, j, k))
 
-                ! Compute the fluxes and scatter them to the cells
-                ! i,j,k and i,j+1,k. Store the density flux in the
-                ! mass flow of the appropriate sliding mesh interface.
+                        ! Compute the fluxes and scatter them to the cells
+                        ! i,j,k and i,j+1,k. Store the density flux in the
+                        ! mass flow of the appropriate sliding mesh interface.
 
-                fs = rqsp + rqsm
-                dw(i,j+1,k,irho) = dw(i,j+1,k,irho) - fs
-                dw(i,j,  k,irho) = dw(i,j,  k,irho) + fs
+                        fs = rqsp + rqsm
+                        dw(i, j + 1, k, irho) = dw(i, j + 1, k, irho) - fs
+                        dw(i, j, k, irho) = dw(i, j, k, irho) + fs
 #ifndef USE_TAPENADE
-                ind = indFamilyJ(i,j,k)
-                massFlowFamilyInv(ind,spectralSol) =       &
-                     massFlowFamilyInv(ind,spectralSol) &
-                     + factFamilyJ(i,j,k)*fs
+                        ind = indFamilyJ(i, j, k)
+                        massFlowFamilyInv(ind, spectralSol) = &
+                            massFlowFamilyInv(ind, spectralSol) &
+                            + factFamilyJ(i, j, k) * fs
 #endif
-                fs = rqsp*w(i,j+1,k,ivx) + rqsm*w(i,j,k,ivx) &
-                     + pa*sJ(i,j,k,1)
-                dw(i,j+1,k,imx) = dw(i,j+1,k,imx) - fs
-                dw(i,j,  k,imx) = dw(i,j,  k,imx) + fs
-
-                fs = rqsp*w(i,j+1,k,ivy) + rqsm*w(i,j,k,ivy) &
-                     + pa*sJ(i,j,k,2)
-                dw(i,j+1,k,imy) = dw(i,j+1,k,imy) - fs
-                dw(i,j,  k,imy) = dw(i,j,  k,imy) + fs
-
-                fs = rqsp*w(i,j+1,k,ivz) + rqsm*w(i,j,k,ivz) &
-                     + pa*sJ(i,j,k,3)
-                dw(i,j+1,k,imz) = dw(i,j+1,k,imz) - fs
-                dw(i,j,  k,imz) = dw(i,j,  k,imz) + fs
-
-                fs = qsp*w(i,j+1,k,irhoE) + qsm*w(i,j,k,irhoE) &
-                     + porFlux*(vnp*p(i,j+1,k) + vnm*p(i,j,k))
-                dw(i,j+1,k,irhoE) = dw(i,j+1,k,irhoE) - fs
-                dw(i,j,  k,irhoE) = dw(i,j,  k,irhoE) + fs
+                        fs = rqsp * w(i, j + 1, k, ivx) + rqsm * w(i, j, k, ivx) &
+                             + pa * sJ(i, j, k, 1)
+                        dw(i, j + 1, k, imx) = dw(i, j + 1, k, imx) - fs
+                        dw(i, j, k, imx) = dw(i, j, k, imx) + fs
+
+                        fs = rqsp * w(i, j + 1, k, ivy) + rqsm * w(i, j, k, ivy) &
+                             + pa * sJ(i, j, k, 2)
+                        dw(i, j + 1, k, imy) = dw(i, j + 1, k, imy) - fs
+                        dw(i, j, k, imy) = dw(i, j, k, imy) + fs
+
+                        fs = rqsp * w(i, j + 1, k, ivz) + rqsm * w(i, j, k, ivz) &
+                             + pa * sJ(i, j, k, 3)
+                        dw(i, j + 1, k, imz) = dw(i, j + 1, k, imz) - fs
+                        dw(i, j, k, imz) = dw(i, j, k, imz) + fs
+
+                        fs = qsp * w(i, j + 1, k, irhoE) + qsm * w(i, j, k, irhoE) &
+                             + porFlux * (vnp * p(i, j + 1, k) + vnm * p(i, j, k))
+                        dw(i, j + 1, k, irhoE) = dw(i, j + 1, k, irhoE) - fs
+                        dw(i, j, k, irhoE) = dw(i, j, k, irhoE) + fs
 
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-    continue
-    !$AD CHECKPOINT-END
-
-    !
-    !       Advective fluxes in the k-direction.
-    continue
-    !$AD CHECKPOINT-START
-    sface = zero
+        continue
+        !$AD CHECKPOINT-END
+
+        !
+        !       Advective fluxes in the k-direction.
+        continue
+        !$AD CHECKPOINT-START
+        sface = zero
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*kl-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 1
+        !$AD II-LOOP
+        do ii = 0, nx * ny * kl - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 1
 #else
-       do k=1,kl
-          do j=2,jl
-             do i=2,il
+            do k = 1, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
-                ! Set the dot product of the grid velocity and the
-                ! normal in k-direction for a moving face.
+                        ! Set the dot product of the grid velocity and the
+                        ! normal in k-direction for a moving face.
 
-                if( addGridVelocities ) sFace = sFaceK(i,j,k)
+                        if (addGridVelocities) sFace = sFaceK(i, j, k)
 
-                ! Compute the normal velocities of the left and right state.
+                        ! Compute the normal velocities of the left and right state.
 
-                vnp = w(i,j,k+1,ivx)*sK(i,j,k,1) &
-                     + w(i,j,k+1,ivy)*sK(i,j,k,2) &
-                     + w(i,j,k+1,ivz)*sK(i,j,k,3)
-                vnm = w(i,j,k,  ivx)*sK(i,j,k,1) &
-                     + w(i,j,k,  ivy)*sK(i,j,k,2) &
-                     + w(i,j,k,  ivz)*sK(i,j,k,3)
+                        vnp = w(i, j, k + 1, ivx) * sK(i, j, k, 1) &
+                              + w(i, j, k + 1, ivy) * sK(i, j, k, 2) &
+                              + w(i, j, k + 1, ivz) * sK(i, j, k, 3)
+                        vnm = w(i, j, k, ivx) * sK(i, j, k, 1) &
+                              + w(i, j, k, ivy) * sK(i, j, k, 2) &
+                              + w(i, j, k, ivz) * sK(i, j, k, 3)
 
-                ! Set the values of the porosities for this face.
-                ! porVel defines the porosity w.r.t. velocity;
-                ! porFlux defines the porosity w.r.t. the entire flux.
-                ! The latter is only zero for a discontinuous block
-                ! block boundary that must be treated conservatively.
-                ! The default value of porFlux is 0.5, such that the
-                ! correct central flux is scattered to both cells.
-                ! In case of a boundFlux the normal velocity is set
-                ! to sFace.
+                        ! Set the values of the porosities for this face.
+                        ! porVel defines the porosity w.r.t. velocity;
+                        ! porFlux defines the porosity w.r.t. the entire flux.
+                        ! The latter is only zero for a discontinuous block
+                        ! block boundary that must be treated conservatively.
+                        ! The default value of porFlux is 0.5, such that the
+                        ! correct central flux is scattered to both cells.
+                        ! In case of a boundFlux the normal velocity is set
+                        ! to sFace.
 
-                porVel  = one
-                porFlux = half
+                        porVel = one
+                        porFlux = half
 
-                if(porK(i,j,k) == noFlux)    porFlux = zero
-                if(porK(i,j,k) == boundFlux) then
-                   porVel = zero
-                   vnp    = sFace
-                   vnm    = sFace
-                endif
+                        if (porK(i, j, k) == noFlux) porFlux = zero
+                        if (porK(i, j, k) == boundFlux) then
+                            porVel = zero
+                            vnp = sFace
+                            vnm = sFace
+                        end if
 
-                ! Incorporate porFlux in porVel.
+                        ! Incorporate porFlux in porVel.
 
-                porVel = porVel*porFlux
+                        porVel = porVel * porFlux
 
-                ! Compute the normal velocities for the face as well as the
-                ! mass fluxes.
+                        ! Compute the normal velocities for the face as well as the
+                        ! mass fluxes.
 
-                qsp = (vnp - sFace)*porVel
-                qsm = (vnm - sFace)*porVel
+                        qsp = (vnp - sFace) * porVel
+                        qsm = (vnm - sFace) * porVel
 
-                rqsp = qsp*w(i,j,k+1,irho)
-                rqsm = qsm*w(i,j,k,  irho)
+                        rqsp = qsp * w(i, j, k + 1, irho)
+                        rqsm = qsm * w(i, j, k, irho)
 
-                ! Compute the sum of the pressure multiplied by porFlux.
-                ! For the default value of porFlux, 0.5, this leads to
-                ! the average pressure.
+                        ! Compute the sum of the pressure multiplied by porFlux.
+                        ! For the default value of porFlux, 0.5, this leads to
+                        ! the average pressure.
 
-                pa = porFlux*(p(i,j,k+1) + p(i,j,k))
+                        pa = porFlux * (p(i, j, k + 1) + p(i, j, k))
 
-                ! Compute the fluxes and scatter them to the cells
-                ! i,j,k and i,j,k+1. Store the density flux in the
-                ! mass flow of the appropriate sliding mesh interface.
+                        ! Compute the fluxes and scatter them to the cells
+                        ! i,j,k and i,j,k+1. Store the density flux in the
+                        ! mass flow of the appropriate sliding mesh interface.
 
-                fs = rqsp + rqsm
-                dw(i,j,k+1,irho) = dw(i,j,k+1,irho) - fs
-                dw(i,j,k,  irho) = dw(i,j,k,  irho) + fs
+                        fs = rqsp + rqsm
+                        dw(i, j, k + 1, irho) = dw(i, j, k + 1, irho) - fs
+                        dw(i, j, k, irho) = dw(i, j, k, irho) + fs
 #ifndef USE_TAPENADE
-                ind = indFamilyK(i,j,k)
-                massFlowFamilyInv(ind,spectralSol) =       &
-                     massFlowFamilyInv(ind,spectralSol) &
-                     + factFamilyK(i,j,k)*fs
+                        ind = indFamilyK(i, j, k)
+                        massFlowFamilyInv(ind, spectralSol) = &
+                            massFlowFamilyInv(ind, spectralSol) &
+                            + factFamilyK(i, j, k) * fs
 #endif
-                fs = rqsp*w(i,j,k+1,ivx) + rqsm*w(i,j,k,ivx) &
-                     + pa*sK(i,j,k,1)
-                dw(i,j,k+1,imx) = dw(i,j,k+1,imx) - fs
-                dw(i,j,k,  imx) = dw(i,j,k,  imx) + fs
-
-                fs = rqsp*w(i,j,k+1,ivy) + rqsm*w(i,j,k,ivy) &
-                     + pa*sK(i,j,k,2)
-                dw(i,j,k+1,imy) = dw(i,j,k+1,imy) - fs
-                dw(i,j,k,  imy) = dw(i,j,k,  imy) + fs
-
-                fs = rqsp*w(i,j,k+1,ivz) + rqsm*w(i,j,k,ivz) &
-                     + pa*sK(i,j,k,3)
-                dw(i,j,k+1,imz) = dw(i,j,k+1,imz) - fs
-                dw(i,j,k,  imz) = dw(i,j,k,  imz) + fs
-
-                fs = qsp*w(i,j,k+1,irhoE) + qsm*w(i,j,k,irhoE) &
-                     + porFlux*(vnp*p(i,j,k+1) + vnm*p(i,j,k))
-                dw(i,j,k+1,irhoE) = dw(i,j,k+1,irhoE) - fs
-                dw(i,j,k,  irhoE) = dw(i,j,k,  irhoE) + fs
+                        fs = rqsp * w(i, j, k + 1, ivx) + rqsm * w(i, j, k, ivx) &
+                             + pa * sK(i, j, k, 1)
+                        dw(i, j, k + 1, imx) = dw(i, j, k + 1, imx) - fs
+                        dw(i, j, k, imx) = dw(i, j, k, imx) + fs
+
+                        fs = rqsp * w(i, j, k + 1, ivy) + rqsm * w(i, j, k, ivy) &
+                             + pa * sK(i, j, k, 2)
+                        dw(i, j, k + 1, imy) = dw(i, j, k + 1, imy) - fs
+                        dw(i, j, k, imy) = dw(i, j, k, imy) + fs
+
+                        fs = rqsp * w(i, j, k + 1, ivz) + rqsm * w(i, j, k, ivz) &
+                             + pa * sK(i, j, k, 3)
+                        dw(i, j, k + 1, imz) = dw(i, j, k + 1, imz) - fs
+                        dw(i, j, k, imz) = dw(i, j, k, imz) + fs
+
+                        fs = qsp * w(i, j, k + 1, irhoE) + qsm * w(i, j, k, irhoE) &
+                             + porFlux * (vnp * p(i, j, k + 1) + vnm * p(i, j, k))
+                        dw(i, j, k + 1, irhoE) = dw(i, j, k + 1, irhoE) - fs
+                        dw(i, j, k, irhoE) = dw(i, j, k, irhoE) + fs
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-    continue
-    !$AD CHECKPOINT-END
-
-    ! Add the rotational source terms for a moving block in a
-    ! steady state computation. These source terms account for the
-    ! centrifugal acceleration and the coriolis term. However, as
-    ! the the equations are solved in the inertial frame and not
-    ! in the moving frame, the form is different than what you
-    ! normally find in a text book.
-
-    continue
-    !$AD CHECKPOINT-START
-    rotation: if(blockIsMoving .and. equationMode == steady) then
-
-       ! Compute the three nonDimensional angular velocities.
-
-       wwx = timeRef*cgnsDoms(nbkGlobal)%rotRate(1)
-       wwy = timeRef*cgnsDoms(nbkGlobal)%rotRate(2)
-       wwz = timeRef*cgnsDoms(nbkGlobal)%rotRate(3)
-
-       ! Loop over the internal cells of this block to compute the
-       ! rotational terms for the momentum equations.
-       !$AD II-LOOP
-       do ii=0,nx*ny*nz-1
-          i = mod(ii, nx) + 2
-          j = mod(ii/nx, ny) + 2
-          k = ii/(nx*ny) + 2
-          rvol = w(i,j,k,irho)*vol(i,j,k)
-
-          dw(i,j,k,imx) = dw(i,j,k,imx) &
-               + rvol*(wwy*w(i,j,k,ivz) - wwz*w(i,j,k,ivy))
-          dw(i,j,k,imy) = dw(i,j,k,imy) &
-               + rvol*(wwz*w(i,j,k,ivx) - wwx*w(i,j,k,ivz))
-          dw(i,j,k,imz) = dw(i,j,k,imz) &
-               + rvol*(wwx*w(i,j,k,ivy) - wwy*w(i,j,k,ivx))
-       enddo
-
-    endif rotation
-
-    !$AD CHECKPOINT-END
-    continue
-  end subroutine inviscidCentralFlux
-
-  subroutine inviscidDissFluxMatrix
-    !
-    !       inviscidDissFluxMatrix computes the matrix artificial
-    !       dissipation term. Instead of the spectral radius, as used in
-    !       the scalar dissipation scheme, the absolute value of the flux
-    !       jacobian is used. This leads to a less diffusive and
-    !       consequently more accurate scheme. It is assumed that the
-    !       pointers in blockPointers already point to the correct block.
-    !
-    use constants
-    use blockPointers, only : nx, ny, nz, il, jl, kl, ie, je, ke, ib, jb, kb, &
-         w, p, porI, porJ, porK, fw, gamma, si, sj, sk, &
-         indFamilyI, indFamilyJ, indFamilyK, spectralSol, addGridVelocities, &
-         sFaceI, sfaceJ, sFacek, factFamilyI, factFamilyJ, factFamilyK
-    use flowVarRefState, only : pInfCorr
-    use inputDiscretization, only: vis2, vis4
-    use inputPhysics, only : equations
-    use iteration, only : rFil
-    use cgnsGrid, only: massFlowFamilyDiss
-    use utils, only : getCorrectForK, myDim
-    implicit none
-    !
-    !      Local parameters.
-    !
-    real(kind=realType), parameter :: dpMax        = 0.25_realType
-    real(kind=realType), parameter :: epsAcoustic  = 0.25_realType
-    real(kind=realType), parameter :: epsShear     = 0.025_realType
-    real(kind=realType), parameter :: omega        = 0.5_realType
-    real(kind=realType), parameter :: oneMinOmega = one - omega
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ind, ii
-
-    real(kind=realType) :: plim, sface
-    real(kind=realType) :: sfil, fis2, fis4
-    real(kind=realType) :: gammaAvg, gm1, ovgm1, gm53
-    real(kind=realType) :: ppor, rrad, dis2, dis4
-    real(kind=realType) :: dp1, dp2, tmp, fs
-    real(kind=realType) :: ddw1, ddw2, ddw3, ddw4, ddw5, ddw6
-    real(kind=realType) :: dr, dru, drv, drw, dre, drk, sx, sy, sz
-    real(kind=realType) :: uAvg, vAvg, wAvg, a2Avg, aAvg, hAvg
-    real(kind=realType) :: alphaAvg, unAvg, ovaAvg, ova2Avg
-    real(kind=realType) :: kAvg, lam1, lam2, lam3, area
-    real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
-    real(kind=realType),dimension(1:ie,1:je,1:ke,3) :: dss
-    logical :: correctForK
-
-    ! Check if rFil == 0. If so, the dissipative flux needs not to
-    ! be computed.
-
-    if(abs(rFil) < thresholdReal) return
-
-    ! Set the value of plim. To be fully consistent this must have
-    ! the dimension of a pressure. Therefore a fraction of pInfCorr
-    ! is used.
-
-    plim = 0.001_realType*pInfCorr
-
-    ! Determine whether or not the total energy must be corrected
-    ! for the presence of the turbulent kinetic energy.
-
-    correctForK = getCorrectForK()
-
-    ! Initialize sface to zero. This value will be used if the
-    ! block is not moving.
-
-    sface = zero
-
-    ! Set a couple of constants for the scheme.
-
-    fis2 = rFil*vis2
-    fis4 = rFil*vis4
-    sfil = one - rFil
-
-    ! Initialize the dissipative residual to a certain times,
-    ! possibly zero, the previously stored value.
-
-    fw = sfil*fw
-
-    ! Compute the pressure sensor for each cell, in each direction:
+        continue
+        !$AD CHECKPOINT-END
+
+        ! Add the rotational source terms for a moving block in a
+        ! steady state computation. These source terms account for the
+        ! centrifugal acceleration and the coriolis term. However, as
+        ! the the equations are solved in the inertial frame and not
+        ! in the moving frame, the form is different than what you
+        ! normally find in a text book.
+
+        continue
+        !$AD CHECKPOINT-START
+        rotation: if (blockIsMoving .and. equationMode == steady) then
+
+            ! Compute the three nonDimensional angular velocities.
+
+            wwx = timeRef * cgnsDoms(nbkGlobal)%rotRate(1)
+            wwy = timeRef * cgnsDoms(nbkGlobal)%rotRate(2)
+            wwz = timeRef * cgnsDoms(nbkGlobal)%rotRate(3)
+
+            ! Loop over the internal cells of this block to compute the
+            ! rotational terms for the momentum equations.
+            !$AD II-LOOP
+            do ii = 0, nx * ny * nz - 1
+                i = mod(ii, nx) + 2
+                j = mod(ii / nx, ny) + 2
+                k = ii / (nx * ny) + 2
+                rvol = w(i, j, k, irho) * vol(i, j, k)
+
+                dw(i, j, k, imx) = dw(i, j, k, imx) &
+                                   + rvol * (wwy * w(i, j, k, ivz) - wwz * w(i, j, k, ivy))
+                dw(i, j, k, imy) = dw(i, j, k, imy) &
+                                   + rvol * (wwz * w(i, j, k, ivx) - wwx * w(i, j, k, ivz))
+                dw(i, j, k, imz) = dw(i, j, k, imz) &
+                                   + rvol * (wwx * w(i, j, k, ivy) - wwy * w(i, j, k, ivx))
+            end do
+
+        end if rotation
+
+        !$AD CHECKPOINT-END
+        continue
+    end subroutine inviscidCentralFlux
+
+    subroutine inviscidDissFluxMatrix
+        !
+        !       inviscidDissFluxMatrix computes the matrix artificial
+        !       dissipation term. Instead of the spectral radius, as used in
+        !       the scalar dissipation scheme, the absolute value of the flux
+        !       jacobian is used. This leads to a less diffusive and
+        !       consequently more accurate scheme. It is assumed that the
+        !       pointers in blockPointers already point to the correct block.
+        !
+        use constants
+        use blockPointers, only: nx, ny, nz, il, jl, kl, ie, je, ke, ib, jb, kb, &
+                                 w, p, porI, porJ, porK, fw, gamma, si, sj, sk, &
+                                 indFamilyI, indFamilyJ, indFamilyK, spectralSol, addGridVelocities, &
+                                 sFaceI, sfaceJ, sFacek, factFamilyI, factFamilyJ, factFamilyK
+        use flowVarRefState, only: pInfCorr
+        use inputDiscretization, only: vis2, vis4
+        use inputPhysics, only: equations
+        use iteration, only: rFil
+        use cgnsGrid, only: massFlowFamilyDiss
+        use utils, only: getCorrectForK, myDim
+        implicit none
+        !
+        !      Local parameters.
+        !
+        real(kind=realType), parameter :: dpMax = 0.25_realType
+        real(kind=realType), parameter :: epsAcoustic = 0.25_realType
+        real(kind=realType), parameter :: epsShear = 0.025_realType
+        real(kind=realType), parameter :: omega = 0.5_realType
+        real(kind=realType), parameter :: oneMinOmega = one - omega
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ind, ii
+
+        real(kind=realType) :: plim, sface
+        real(kind=realType) :: sfil, fis2, fis4
+        real(kind=realType) :: gammaAvg, gm1, ovgm1, gm53
+        real(kind=realType) :: ppor, rrad, dis2, dis4
+        real(kind=realType) :: dp1, dp2, tmp, fs
+        real(kind=realType) :: ddw1, ddw2, ddw3, ddw4, ddw5, ddw6
+        real(kind=realType) :: dr, dru, drv, drw, dre, drk, sx, sy, sz
+        real(kind=realType) :: uAvg, vAvg, wAvg, a2Avg, aAvg, hAvg
+        real(kind=realType) :: alphaAvg, unAvg, ovaAvg, ova2Avg
+        real(kind=realType) :: kAvg, lam1, lam2, lam3, area
+        real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
+        real(kind=realType), dimension(1:ie, 1:je, 1:ke, 3) :: dss
+        logical :: correctForK
+
+        ! Check if rFil == 0. If so, the dissipative flux needs not to
+        ! be computed.
+
+        if (abs(rFil) < thresholdReal) return
+
+        ! Set the value of plim. To be fully consistent this must have
+        ! the dimension of a pressure. Therefore a fraction of pInfCorr
+        ! is used.
+
+        plim = 0.001_realType * pInfCorr
+
+        ! Determine whether or not the total energy must be corrected
+        ! for the presence of the turbulent kinetic energy.
+
+        correctForK = getCorrectForK()
+
+        ! Initialize sface to zero. This value will be used if the
+        ! block is not moving.
+
+        sface = zero
+
+        ! Set a couple of constants for the scheme.
+
+        fis2 = rFil * vis2
+        fis4 = rFil * vis4
+        sfil = one - rFil
+
+        ! Initialize the dissipative residual to a certain times,
+        ! possibly zero, the previously stored value.
+
+        fw = sfil * fw
+
+        ! Compute the pressure sensor for each cell, in each direction:
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,ie*je*ke-1
-       i = mod(ii, ie) + 1
-       j = mod(ii/ie, je) + 1
-       k = ii/(ie*je) + 1
+        !$AD II-LOOP
+        do ii = 0, ie * je * ke - 1
+            i = mod(ii, ie) + 1
+            j = mod(ii / ie, je) + 1
+            k = ii / (ie * je) + 1
 #else
-       do k=1,ke
-          do j=1,je
-             do i=1,ie
+            do k = 1, ke
+                do j = 1, je
+                    do i = 1, ie
 #endif
-                dss(i,j,k,1) =abs((p(i+1,j,k) - two*p(i,j,k) + p(i-1,j,k))        &
-                     /     (omega*(p(i+1,j,k) + two*p(i,j,k) + p(i-1,j,k)) &
-                     +      oneMinOmega*(abs(p(i+1,j,k) - p(i,j,k))      &
-                     +                   abs(p(i,j,k) - p(i-1,j,k))) + plim))
-
-
-                dss(i,j,k,2) =abs((p(i,j+1,k) - two*p(i,j,k) + p(i,j-1,k))        &
-                     /     (omega*(p(i,j+1,k) + two*p(i,j,k) + p(i,j-1,k)) &
-                     +      oneMinOmega*(abs(p(i,j+1,k) - p(i,j,k))      &
-                     +                   abs(p(i,j,k) - p(i,j-1,k))) + plim))
-
-                dss(i,j,k,3) =  abs((p(i,j,k+1) - two*p(i,j,k) + p(i,j,k-1))        &
-                     /     (omega*(p(i,j,k+1) + two*p(i,j,k) + p(i,j,k-1)) &
-                     +      oneMinOmega*(abs(p(i,j,k+1) - p(i,j,k))      &
-                     +                   abs(p(i,j,k) - p(i,j,k-1))) + plim))
+                        dss(i, j, k, 1) = abs((p(i + 1, j, k) - two * p(i, j, k) + p(i - 1, j, k)) &
+                                              / (omega * (p(i + 1, j, k) + two * p(i, j, k) + p(i - 1, j, k)) &
+                                                 + oneMinOmega * (abs(p(i + 1, j, k) - p(i, j, k)) &
+                                                                  + abs(p(i, j, k) - p(i - 1, j, k))) + plim))
+
+                        dss(i, j, k, 2) = abs((p(i, j + 1, k) - two * p(i, j, k) + p(i, j - 1, k)) &
+                                              / (omega * (p(i, j + 1, k) + two * p(i, j, k) + p(i, j - 1, k)) &
+                                                 + oneMinOmega * (abs(p(i, j + 1, k) - p(i, j, k)) &
+                                                                  + abs(p(i, j, k) - p(i, j - 1, k))) + plim))
+
+                        dss(i, j, k, 3) = abs((p(i, j, k + 1) - two * p(i, j, k) + p(i, j, k - 1)) &
+                                              / (omega * (p(i, j, k + 1) + two * p(i, j, k) + p(i, j, k - 1)) &
+                                                 + oneMinOmega * (abs(p(i, j, k + 1) - p(i, j, k)) &
+                                                                  + abs(p(i, j, k) - p(i, j, k - 1))) + plim))
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          end do
-       end do
-    end do
+                end do
+            end do
+        end do
 #endif
-    !
-    !       Dissipative fluxes in the i-direction.
-    !
+        !
+        !       Dissipative fluxes in the i-direction.
+        !
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,il*ny*nz-1
-       i = mod(ii, il) + 1
-       j = mod(ii/il, ny) + 2
-       k = ii/(il*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, il * ny * nz - 1
+            i = mod(ii, il) + 1
+            j = mod(ii / il, ny) + 2
+            k = ii / (il * ny) + 2
 #else
-       do k=2,kl
-          do j=2,jl
-             do i=1,il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 1, il
 #endif
-                ! Compute the dissipation coefficients for this face.
-
-                ppor = zero
-                if(porI(i,j,k) == normalFlux) ppor = one
-                dis2 = ppor*fis2*min(dpMax, max(dss(i,j,k,1), dss(i+1,j,k,1)))
-                dis4 = myDim(ppor*fis4, dis2)
-
-                ! Construct the vector of the first and third differences
-                ! multiplied by the appropriate constants.
-
-                ddw1 = w(i+1,j,k,irho) - w(i,j,k,irho)
-                dr  = dis2*ddw1 &
-                     - dis4*(w(i+2,j,k,irho) - w(i-1,j,k,irho) - three*ddw1)
-
-                ddw2 = w(i+1,j,k,irho)*w(i+1,j,k,ivx) &
-                     - w(i,j,k,irho)*w(i,j,k,ivx)
-                dru = dis2*ddw2                             &
-                     - dis4*(w(i+2,j,k,irho)*w(i+2,j,k,ivx) &
-                     -       w(i-1,j,k,irho)*w(i-1,j,k,ivx) - three*ddw2)
-
-                ddw3 = w(i+1,j,k,irho)*w(i+1,j,k,ivy) &
-                     - w(i,j,k,irho)*w(i,j,k,ivy)
-                drv = dis2*ddw3                             &
-                     - dis4*(w(i+2,j,k,irho)*w(i+2,j,k,ivy) &
-                     -       w(i-1,j,k,irho)*w(i-1,j,k,ivy) - three*ddw3)
-
-                ddw4 = w(i+1,j,k,irho)*w(i+1,j,k,ivz) &
-                     - w(i,j,k,irho)*w(i,j,k,ivz)
-                drw = dis2*ddw4                             &
-                     - dis4*(w(i+2,j,k,irho)*w(i+2,j,k,ivz) &
-                     -       w(i-1,j,k,irho)*w(i-1,j,k,ivz) - three*ddw4)
-
-                ddw5 = w(i+1,j,k,irhoE) - w(i,j,k,irhoE)
-                dre = dis2*ddw5 &
-                     - dis4*(w(i+2,j,k,irhoE) - w(i-1,j,k,irhoE) - three*ddw5)
-
-                ! In case a k-equation is present, compute the difference
-                ! of rhok and store the average value of k. If not present,
-                ! set both these values to zero, such that later on no
-                ! decision needs to be made anymore.
-
-                if( correctForK ) then
-                   ddw6 = w(i+1,j,k,irho)*w(i+1,j,k,itu1) &
-                        - w(i,j,k,irho)*w(i,j,k,itu1)
-                   drk = dis2*ddw6                              &
-                        - dis4*(w(i+2,j,k,irho)*w(i+2,j,k,itu1) &
-                        -       w(i-1,j,k,irho)*w(i-1,j,k,itu1) - three*ddw6)
-
-                   kAvg = half*(w(i,j,k,itu1) + w(i+1,j,k,itu1))
-                else
-                   drk   = zero
-                   kAvg = zero
-                endif
-
-                ! Compute the average value of gamma and compute some
-                ! expressions in which it occurs.
-
-                gammaAvg = half*(gamma(i+1,j,k) + gamma(i,j,k))
-                gm1      = gammaAvg - one
-                ovgm1    = one/gm1
-                gm53     = gammaAvg - five*third
-
-                ! Compute the average state at the interface.
-
-                uAvg  = half*(w(i+1,j,k,ivx) + w(i,j,k,ivx))
-                vAvg  = half*(w(i+1,j,k,ivy) + w(i,j,k,ivy))
-                wAvg  = half*(w(i+1,j,k,ivz) + w(i,j,k,ivz))
-                a2Avg = half*(gamma(i+1,j,k)*p(i+1,j,k)/w(i+1,j,k,irho) &
-                     +       gamma(i,  j,k)*p(i,  j,k)/w(i,  j,k,irho))
-
-                area = sqrt(si(i,j,k,1)**2 + si(i,j,k,2)**2 + si(i,j,k,3)**2)
-                tmp  = one/max(1.e-25_realType,area)
-                sx   = si(i,j,k,1)*tmp
-                sy   = si(i,j,k,2)*tmp
-                sz   = si(i,j,k,3)*tmp
-
-                alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-                hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-                aAvg     = sqrt(a2Avg)
-                unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-                ovaAvg   = one/aAvg
-                ova2Avg  = one/a2Avg
-
-                ! The mesh velocity if the face is moving. It must be
-                ! divided by the area to obtain a true velocity.
-
-                if( addGridVelocities ) sface = sFaceI(i,j,k)*tmp
-
-                ! Compute the absolute values of the three eigenvalues
-                ! and make sure they don't become zero by cutting them
-                ! off to a certain minimum.
-
-                lam1 = abs(unAvg - sface + aAvg)
-                lam2 = abs(unAvg - sface - aAvg)
-                lam3 = abs(unAvg - sface)
-
-                rrad = lam3 + aAvg
-
-                ! Multiply the eigenvalues by the area to obtain
-                ! the correct values for the dissipation term.
-
-                lam1 = max(lam1,epsAcoustic*rrad)*area
-                lam2 = max(lam2,epsAcoustic*rrad)*area
-                lam3 = max(lam3,epsShear*rrad)*area
-
-                ! Some abbreviations, which occur quite often in the
-                ! dissipation terms.
-
-                abv1 = half*(lam1 + lam2)
-                abv2 = half*(lam1 - lam2)
-                abv3 = abv1 - lam3
-
-                abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                     -      wAvg*drw + dre) - gm53*drk
-                abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
-
-                abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-                abv7 = abv2*abv4*ovaAvg  + abv3*abv5
-
-                ! Compute and scatter the dissipative flux.
-                ! Density.
-
-                fs               = lam3*dr  + abv6
-                fw(i+1,j,k,irho) = fw(i+1,j,k,irho) + fs
-                fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                        ! Compute the dissipation coefficients for this face.
+
+                        ppor = zero
+                        if (porI(i, j, k) == normalFlux) ppor = one
+                        dis2 = ppor * fis2 * min(dpMax, max(dss(i, j, k, 1), dss(i + 1, j, k, 1)))
+                        dis4 = myDim(ppor * fis4, dis2)
+
+                        ! Construct the vector of the first and third differences
+                        ! multiplied by the appropriate constants.
+
+                        ddw1 = w(i + 1, j, k, irho) - w(i, j, k, irho)
+                        dr = dis2 * ddw1 &
+                             - dis4 * (w(i + 2, j, k, irho) - w(i - 1, j, k, irho) - three * ddw1)
+
+                        ddw2 = w(i + 1, j, k, irho) * w(i + 1, j, k, ivx) &
+                               - w(i, j, k, irho) * w(i, j, k, ivx)
+                        dru = dis2 * ddw2 &
+                              - dis4 * (w(i + 2, j, k, irho) * w(i + 2, j, k, ivx) &
+                                        - w(i - 1, j, k, irho) * w(i - 1, j, k, ivx) - three * ddw2)
+
+                        ddw3 = w(i + 1, j, k, irho) * w(i + 1, j, k, ivy) &
+                               - w(i, j, k, irho) * w(i, j, k, ivy)
+                        drv = dis2 * ddw3 &
+                              - dis4 * (w(i + 2, j, k, irho) * w(i + 2, j, k, ivy) &
+                                        - w(i - 1, j, k, irho) * w(i - 1, j, k, ivy) - three * ddw3)
+
+                        ddw4 = w(i + 1, j, k, irho) * w(i + 1, j, k, ivz) &
+                               - w(i, j, k, irho) * w(i, j, k, ivz)
+                        drw = dis2 * ddw4 &
+                              - dis4 * (w(i + 2, j, k, irho) * w(i + 2, j, k, ivz) &
+                                        - w(i - 1, j, k, irho) * w(i - 1, j, k, ivz) - three * ddw4)
+
+                        ddw5 = w(i + 1, j, k, irhoE) - w(i, j, k, irhoE)
+                        dre = dis2 * ddw5 &
+                              - dis4 * (w(i + 2, j, k, irhoE) - w(i - 1, j, k, irhoE) - three * ddw5)
+
+                        ! In case a k-equation is present, compute the difference
+                        ! of rhok and store the average value of k. If not present,
+                        ! set both these values to zero, such that later on no
+                        ! decision needs to be made anymore.
+
+                        if (correctForK) then
+                            ddw6 = w(i + 1, j, k, irho) * w(i + 1, j, k, itu1) &
+                                   - w(i, j, k, irho) * w(i, j, k, itu1)
+                            drk = dis2 * ddw6 &
+                                  - dis4 * (w(i + 2, j, k, irho) * w(i + 2, j, k, itu1) &
+                                            - w(i - 1, j, k, irho) * w(i - 1, j, k, itu1) - three * ddw6)
+
+                            kAvg = half * (w(i, j, k, itu1) + w(i + 1, j, k, itu1))
+                        else
+                            drk = zero
+                            kAvg = zero
+                        end if
+
+                        ! Compute the average value of gamma and compute some
+                        ! expressions in which it occurs.
+
+                        gammaAvg = half * (gamma(i + 1, j, k) + gamma(i, j, k))
+                        gm1 = gammaAvg - one
+                        ovgm1 = one / gm1
+                        gm53 = gammaAvg - five * third
+
+                        ! Compute the average state at the interface.
+
+                        uAvg = half * (w(i + 1, j, k, ivx) + w(i, j, k, ivx))
+                        vAvg = half * (w(i + 1, j, k, ivy) + w(i, j, k, ivy))
+                        wAvg = half * (w(i + 1, j, k, ivz) + w(i, j, k, ivz))
+                        a2Avg = half * (gamma(i + 1, j, k) * p(i + 1, j, k) / w(i + 1, j, k, irho) &
+                                        + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
+
+                        area = sqrt(si(i, j, k, 1)**2 + si(i, j, k, 2)**2 + si(i, j, k, 3)**2)
+                        tmp = one / max(1.e-25_realType, area)
+                        sx = si(i, j, k, 1) * tmp
+                        sy = si(i, j, k, 2) * tmp
+                        sz = si(i, j, k, 3) * tmp
+
+                        alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                        hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                        aAvg = sqrt(a2Avg)
+                        unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                        ovaAvg = one / aAvg
+                        ova2Avg = one / a2Avg
+
+                        ! The mesh velocity if the face is moving. It must be
+                        ! divided by the area to obtain a true velocity.
+
+                        if (addGridVelocities) sface = sFaceI(i, j, k) * tmp
+
+                        ! Compute the absolute values of the three eigenvalues
+                        ! and make sure they don't become zero by cutting them
+                        ! off to a certain minimum.
+
+                        lam1 = abs(unAvg - sface + aAvg)
+                        lam2 = abs(unAvg - sface - aAvg)
+                        lam3 = abs(unAvg - sface)
+
+                        rrad = lam3 + aAvg
+
+                        ! Multiply the eigenvalues by the area to obtain
+                        ! the correct values for the dissipation term.
+
+                        lam1 = max(lam1, epsAcoustic * rrad) * area
+                        lam2 = max(lam2, epsAcoustic * rrad) * area
+                        lam3 = max(lam3, epsShear * rrad) * area
+
+                        ! Some abbreviations, which occur quite often in the
+                        ! dissipation terms.
+
+                        abv1 = half * (lam1 + lam2)
+                        abv2 = half * (lam1 - lam2)
+                        abv3 = abv1 - lam3
+
+                        abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                      - wAvg * drw + dre) - gm53 * drk
+                        abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
+
+                        abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                        abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
+
+                        ! Compute and scatter the dissipative flux.
+                        ! Density.
+
+                        fs = lam3 * dr + abv6
+                        fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) + fs
+                        fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 #ifndef USE_TAPENADE
-                ind = indFamilyI(i,j,k)
-                massFlowFamilyDiss(ind,spectralSol) =       &
-                     massFlowFamilyDiss(ind,spectralSol) &
-                     - factFamilyI(i,j,k)*fs
+                        ind = indFamilyI(i, j, k)
+                        massFlowFamilyDiss(ind, spectralSol) = &
+                            massFlowFamilyDiss(ind, spectralSol) &
+                            - factFamilyI(i, j, k) * fs
 #endif
-                ! X-momentum.
+                        ! X-momentum.
 
-                fs              = lam3*dru + uAvg*abv6 + sx*abv7
-                fw(i+1,j,k,imx) = fw(i+1,j,k,imx) + fs
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                        fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                        fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fs
+                        fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-                ! Y-momentum.
+                        ! Y-momentum.
 
-                fs              = lam3*drv + vAvg*abv6 + sy*abv7
-                fw(i+1,j,k,imy) = fw(i+1,j,k,imy) + fs
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                        fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                        fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fs
+                        fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-                ! Z-momentum.
+                        ! Z-momentum.
 
-                fs              = lam3*drw + wAvg*abv6 + sz*abv7
-                fw(i+1,j,k,imz) = fw(i+1,j,k,imz) + fs
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                        fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                        fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fs
+                        fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-                ! Energy.
+                        ! Energy.
 
-                fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-                fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + fs
-                fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                        fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                        fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + fs
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          end do
-       end do
-    end do
+                end do
+            end do
+        end do
 #endif
-    !
-    !       Dissipative fluxes in the j-direction.
-    !
+        !
+        !       Dissipative fluxes in the j-direction.
+        !
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*jl*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, jl) + 1
-       k = ii/(nx*jl) + 2
+        !$AD II-LOOP
+        do ii = 0, nx * jl * nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, jl) + 1
+            k = ii / (nx * jl) + 2
 #else
-       do k=2,kl
-          do j=1,jl
-             do i=2,il
+            do k = 2, kl
+                do j = 1, jl
+                    do i = 2, il
 #endif
 
-                ! Compute the dissipation coefficients for this face.
+                        ! Compute the dissipation coefficients for this face.
 
-                ppor = zero
-                if(porJ(i,j,k) == normalFlux) ppor = one
-
-                dis2 = ppor*fis2*min(dpMax, max(dss(i,j,k,2), dss(i,j+1,k,2)))
-                dis4 = myDim(ppor*fis4, dis2)
-
-                ! Construct the vector of the first and third differences
-                ! multiplied by the appropriate constants.
-
-                ddw1 = w(i,j+1,k,irho) - w(i,j,k,irho)
-                dr  = dis2*ddw1 &
-                     - dis4*(w(i,j+2,k,irho) - w(i,j-1,k,irho) - three*ddw1)
-
-                ddw2 = w(i,j+1,k,irho)*w(i,j+1,k,ivx) &
-                     - w(i,j,k,irho)*w(i,j,k,ivx)
-                dru = dis2*ddw2                             &
-                     - dis4*(w(i,j+2,k,irho)*w(i,j+2,k,ivx) &
-                     -       w(i,j-1,k,irho)*w(i,j-1,k,ivx) - three*ddw2)
+                        ppor = zero
+                        if (porJ(i, j, k) == normalFlux) ppor = one
+
+                        dis2 = ppor * fis2 * min(dpMax, max(dss(i, j, k, 2), dss(i, j + 1, k, 2)))
+                        dis4 = myDim(ppor * fis4, dis2)
+
+                        ! Construct the vector of the first and third differences
+                        ! multiplied by the appropriate constants.
+
+                        ddw1 = w(i, j + 1, k, irho) - w(i, j, k, irho)
+                        dr = dis2 * ddw1 &
+                             - dis4 * (w(i, j + 2, k, irho) - w(i, j - 1, k, irho) - three * ddw1)
+
+                        ddw2 = w(i, j + 1, k, irho) * w(i, j + 1, k, ivx) &
+                               - w(i, j, k, irho) * w(i, j, k, ivx)
+                        dru = dis2 * ddw2 &
+                              - dis4 * (w(i, j + 2, k, irho) * w(i, j + 2, k, ivx) &
+                                        - w(i, j - 1, k, irho) * w(i, j - 1, k, ivx) - three * ddw2)
 
-                ddw3 = w(i,j+1,k,irho)*w(i,j+1,k,ivy) &
-                     - w(i,j,k,irho)*w(i,j,k,ivy)
-                drv = dis2*ddw3 &
-                     - dis4*(w(i,j+2,k,irho)*w(i,j+2,k,ivy) &
-                     -       w(i,j-1,k,irho)*w(i,j-1,k,ivy) - three*ddw3)
+                        ddw3 = w(i, j + 1, k, irho) * w(i, j + 1, k, ivy) &
+                               - w(i, j, k, irho) * w(i, j, k, ivy)
+                        drv = dis2 * ddw3 &
+                              - dis4 * (w(i, j + 2, k, irho) * w(i, j + 2, k, ivy) &
+                                        - w(i, j - 1, k, irho) * w(i, j - 1, k, ivy) - three * ddw3)
 
-                ddw4 = w(i,j+1,k,irho)*w(i,j+1,k,ivz) &
-                     - w(i,j,k,irho)*w(i,j,k,ivz)
-                drw = dis2*ddw4 &
-                     - dis4*(w(i,j+2,k,irho)*w(i,j+2,k,ivz) &
-                     -       w(i,j-1,k,irho)*w(i,j-1,k,ivz) - three*ddw4)
+                        ddw4 = w(i, j + 1, k, irho) * w(i, j + 1, k, ivz) &
+                               - w(i, j, k, irho) * w(i, j, k, ivz)
+                        drw = dis2 * ddw4 &
+                              - dis4 * (w(i, j + 2, k, irho) * w(i, j + 2, k, ivz) &
+                                        - w(i, j - 1, k, irho) * w(i, j - 1, k, ivz) - three * ddw4)
 
-                ddw5 = w(i,j+1,k,irhoE) - w(i,j,k,irhoE)
-                dre = dis2*ddw5 &
-                     - dis4*(w(i,j+2,k,irhoE) - w(i,j-1,k,irhoE) - three*ddw5)
+                        ddw5 = w(i, j + 1, k, irhoE) - w(i, j, k, irhoE)
+                        dre = dis2 * ddw5 &
+                              - dis4 * (w(i, j + 2, k, irhoE) - w(i, j - 1, k, irhoE) - three * ddw5)
 
-                ! In case a k-equation is present, compute the difference
-                ! of rhok and store the average value of k. If not present,
-                ! set both these values to zero, such that later on no
-                ! decision needs to be made anymore.
+                        ! In case a k-equation is present, compute the difference
+                        ! of rhok and store the average value of k. If not present,
+                        ! set both these values to zero, such that later on no
+                        ! decision needs to be made anymore.
 
-                if( correctForK ) then
-                   ddw6 = w(i,j+1,k,irho)*w(i,j+1,k,itu1) &
-                        - w(i,j,k,irho)*w(i,j,k,itu1)
-                   drk = dis2*ddw6                              &
-                        - dis4*(w(i,j+2,k,irho)*w(i,j+2,k,itu1) &
-                        -       w(i,j-1,k,irho)*w(i,j-1,k,itu1) - three*ddw6)
+                        if (correctForK) then
+                            ddw6 = w(i, j + 1, k, irho) * w(i, j + 1, k, itu1) &
+                                   - w(i, j, k, irho) * w(i, j, k, itu1)
+                            drk = dis2 * ddw6 &
+                                  - dis4 * (w(i, j + 2, k, irho) * w(i, j + 2, k, itu1) &
+                                            - w(i, j - 1, k, irho) * w(i, j - 1, k, itu1) - three * ddw6)
 
-                   kAvg = half*(w(i,j,k,itu1) + w(i,j+1,k,itu1))
-                else
-                   drk   = zero
-                   kAvg = zero
-                endif
+                            kAvg = half * (w(i, j, k, itu1) + w(i, j + 1, k, itu1))
+                        else
+                            drk = zero
+                            kAvg = zero
+                        end if
 
-                ! Compute the average value of gamma and compute some
-                ! expressions in which it occurs.
+                        ! Compute the average value of gamma and compute some
+                        ! expressions in which it occurs.
 
-                gammaAvg = half*(gamma(i,j+1,k) + gamma(i,j,k))
-                gm1      = gammaAvg - one
-                ovgm1    = one/gm1
-                gm53     = gammaAvg - five*third
+                        gammaAvg = half * (gamma(i, j + 1, k) + gamma(i, j, k))
+                        gm1 = gammaAvg - one
+                        ovgm1 = one / gm1
+                        gm53 = gammaAvg - five * third
 
-                ! Compute the average state at the interface.
+                        ! Compute the average state at the interface.
 
-                uAvg  = half*(w(i,j+1,k,ivx) + w(i,j,k,ivx))
-                vAvg  = half*(w(i,j+1,k,ivy) + w(i,j,k,ivy))
-                wAvg  = half*(w(i,j+1,k,ivz) + w(i,j,k,ivz))
-                a2Avg = half*(gamma(i,j+1,k)*p(i,j+1,k)/w(i,j+1,k,irho) &
-                     +       gamma(i,j,  k)*p(i,j,  k)/w(i,j,  k,irho))
+                        uAvg = half * (w(i, j + 1, k, ivx) + w(i, j, k, ivx))
+                        vAvg = half * (w(i, j + 1, k, ivy) + w(i, j, k, ivy))
+                        wAvg = half * (w(i, j + 1, k, ivz) + w(i, j, k, ivz))
+                        a2Avg = half * (gamma(i, j + 1, k) * p(i, j + 1, k) / w(i, j + 1, k, irho) &
+                                        + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-                area = sqrt(sj(i,j,k,1)**2 + sj(i,j,k,2)**2 + sj(i,j,k,3)**2)
-                tmp  = one/max(1.e-25_realType,area)
-                sx   = sj(i,j,k,1)*tmp
-                sy   = sj(i,j,k,2)*tmp
-                sz   = sj(i,j,k,3)*tmp
+                        area = sqrt(sj(i, j, k, 1)**2 + sj(i, j, k, 2)**2 + sj(i, j, k, 3)**2)
+                        tmp = one / max(1.e-25_realType, area)
+                        sx = sj(i, j, k, 1) * tmp
+                        sy = sj(i, j, k, 2) * tmp
+                        sz = sj(i, j, k, 3) * tmp
 
-                alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-                hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-                aAvg     = sqrt(a2Avg)
-                unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-                ovaAvg   = one/aAvg
-                ova2Avg  = one/a2Avg
+                        alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                        hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                        aAvg = sqrt(a2Avg)
+                        unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                        ovaAvg = one / aAvg
+                        ova2Avg = one / a2Avg
 
-                ! The mesh velocity if the face is moving. It must be
-                ! divided by the area to obtain a true velocity.
+                        ! The mesh velocity if the face is moving. It must be
+                        ! divided by the area to obtain a true velocity.
 
-                if( addGridVelocities ) sface = sFaceJ(i,j,k)*tmp
+                        if (addGridVelocities) sface = sFaceJ(i, j, k) * tmp
 
-                ! Compute the absolute values of the three eigenvalues
-                ! and make sure they don't become zero by cutting them
-                ! off to a certain minimum.
+                        ! Compute the absolute values of the three eigenvalues
+                        ! and make sure they don't become zero by cutting them
+                        ! off to a certain minimum.
 
-                lam1 = abs(unAvg - sface + aAvg)
-                lam2 = abs(unAvg - sface - aAvg)
-                lam3 = abs(unAvg - sface)
+                        lam1 = abs(unAvg - sface + aAvg)
+                        lam2 = abs(unAvg - sface - aAvg)
+                        lam3 = abs(unAvg - sface)
 
-                rrad = lam3 + aAvg
+                        rrad = lam3 + aAvg
 
-                ! Multiply the eigenvalues by the area to obtain
-                ! the correct values for the dissipation term.
-
-                lam1 = max(lam1,epsAcoustic*rrad)*area
-                lam2 = max(lam2,epsAcoustic*rrad)*area
-                lam3 = max(lam3,epsShear*rrad)*area
-
-                ! Some abbreviations, which occur quite often in the
-                ! dissipation terms.
-
-                abv1 = half*(lam1 + lam2)
-                abv2 = half*(lam1 - lam2)
-                abv3 = abv1 - lam3
-
-                abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                     -      wAvg*drw + dre) - gm53*drk
-                abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
-
-                abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-                abv7 = abv2*abv4*ovaAvg  + abv3*abv5
-
-                ! Compute and scatter the dissipative flux.
-                ! Density.
-
-                fs               = lam3*dr  + abv6
-                fw(i,j+1,k,irho) = fw(i,j+1,k,irho) + fs
-                fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                        ! Multiply the eigenvalues by the area to obtain
+                        ! the correct values for the dissipation term.
+
+                        lam1 = max(lam1, epsAcoustic * rrad) * area
+                        lam2 = max(lam2, epsAcoustic * rrad) * area
+                        lam3 = max(lam3, epsShear * rrad) * area
+
+                        ! Some abbreviations, which occur quite often in the
+                        ! dissipation terms.
+
+                        abv1 = half * (lam1 + lam2)
+                        abv2 = half * (lam1 - lam2)
+                        abv3 = abv1 - lam3
+
+                        abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                      - wAvg * drw + dre) - gm53 * drk
+                        abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
+
+                        abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                        abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
+
+                        ! Compute and scatter the dissipative flux.
+                        ! Density.
+
+                        fs = lam3 * dr + abv6
+                        fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) + fs
+                        fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 #ifndef USE_TAPENADE
-                ind = indFamilyJ(i,j,k)
-                massFlowFamilyDiss(ind,spectralSol) =       &
-                     massFlowFamilyDiss(ind,spectralSol) &
-                     - factFamilyJ(i,j,k)*fs
+                        ind = indFamilyJ(i, j, k)
+                        massFlowFamilyDiss(ind, spectralSol) = &
+                            massFlowFamilyDiss(ind, spectralSol) &
+                            - factFamilyJ(i, j, k) * fs
 #endif
-                ! X-momentum.
+                        ! X-momentum.
 
-                fs              = lam3*dru + uAvg*abv6 + sx*abv7
-                fw(i,j+1,k,imx) = fw(i,j+1,k,imx) + fs
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                        fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                        fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fs
+                        fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-                ! Y-momentum.
+                        ! Y-momentum.
 
-                fs              = lam3*drv + vAvg*abv6 + sy*abv7
-                fw(i,j+1,k,imy) = fw(i,j+1,k,imy) + fs
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                        fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                        fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fs
+                        fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-                ! Z-momentum.
+                        ! Z-momentum.
 
-                fs              = lam3*drw + wAvg*abv6 + sz*abv7
-                fw(i,j+1,k,imz) = fw(i,j+1,k,imz) + fs
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                        fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                        fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fs
+                        fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-                ! Energy.
+                        ! Energy.
 
-                fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-                fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + fs
-                fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                        fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                        fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + fs
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          end do
-       end do
-    end do
+                end do
+            end do
+        end do
 #endif
-    !
-    !       Dissipative fluxes in the k-direction.
-    !
+        !
+        !       Dissipative fluxes in the k-direction.
+        !
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*kl-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 1
+        !$AD II-LOOP
+        do ii = 0, nx * ny * kl - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 1
 #else
-       do k=1,kl
-          do j=2,jl
-             do i=2,il
+            do k = 1, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
-                ! Compute the dissipation coefficients for this face.
-
-                ppor = zero
-                if(porK(i,j,k) == normalFlux) ppor = one
-
-                dis2 = ppor*fis2*min(dpMax, max(dss(i,j,k,3), dss(i,j,k+1,3)))
-                dis4 = myDim(ppor*fis4, dis2)
-
-                ! Construct the vector of the first and third differences
-                ! multiplied by the appropriate constants.
-
-                ddw1 = w(i,j,k+1,irho) - w(i,j,k,irho)
-                dr  = dis2*ddw1 &
-                     - dis4*(w(i,j,k+2,irho) - w(i,j,k-1,irho) - three*ddw1)
-
-                ddw2 = w(i,j,k+1,irho)*w(i,j,k+1,ivx) &
-                     - w(i,j,k,irho)*w(i,j,k,ivx)
-                dru = dis2*ddw2                             &
-                     - dis4*(w(i,j,k+2,irho)*w(i,j,k+2,ivx) &
-                     -       w(i,j,k-1,irho)*w(i,j,k-1,ivx) - three*ddw2)
-
-                ddw3 = w(i,j,k+1,irho)*w(i,j,k+1,ivy) &
-                     - w(i,j,k,irho)*w(i,j,k,ivy)
-                drv = dis2*ddw3 &
-                     - dis4*(w(i,j,k+2,irho)*w(i,j,k+2,ivy) &
-                     -       w(i,j,k-1,irho)*w(i,j,k-1,ivy) - three*ddw3)
-
-                ddw4 = w(i,j,k+1,irho)*w(i,j,k+1,ivz) &
-                     - w(i,j,k,irho)*w(i,j,k,ivz)
-                drw = dis2*ddw4 &
-                     - dis4*(w(i,j,k+2,irho)*w(i,j,k+2,ivz) &
-                     -       w(i,j,k-1,irho)*w(i,j,k-1,ivz) - three*ddw4)
-
-                ddw5 = w(i,j,k+1,irhoE) - w(i,j,k,irhoE)
-                dre = dis2*ddw5 &
-                     - dis4*(w(i,j,k+2,irhoE) - w(i,j,k-1,irhoE) - three*ddw5)
-
-                ! In case a k-equation is present, compute the difference
-                ! of rhok and store the average value of k. If not present,
-                ! set both these values to zero, such that later on no
-                ! decision needs to be made anymore.
-
-                if( correctForK ) then
-                   ddw6 = w(i,j,k+1,irho)*w(i,j,k+1,itu1) &
-                        - w(i,j,k,irho)*w(i,j,k,itu1)
-                   drk = dis2*ddw6                              &
-                        - dis4*(w(i,j,k+2,irho)*w(i,j,k+2,itu1) &
-                        -       w(i,j,k-1,irho)*w(i,j,k-1,itu1) - three*ddw6)
-
-                   kAvg = half*(w(i,j,k+1,itu1) + w(i,j,k,itu1))
-                else
-                   drk   = zero
-                   kAvg = zero
-                endif
-
-                ! Compute the average value of gamma and compute some
-                ! expressions in which it occurs.
-
-                gammaAvg = half*(gamma(i,j,k+1) + gamma(i,j,k))
-                gm1      = gammaAvg - one
-                ovgm1    = one/gm1
-                gm53     = gammaAvg - five*third
-
-                ! Compute the average state at the interface.
-
-                uAvg  = half*(w(i,j,k+1,ivx) + w(i,j,k,ivx))
-                vAvg  = half*(w(i,j,k+1,ivy) + w(i,j,k,ivy))
-                wAvg  = half*(w(i,j,k+1,ivz) + w(i,j,k,ivz))
-                a2Avg = half*(gamma(i,j,k+1)*p(i,j,k+1)/w(i,j,k+1,irho) &
-                     +       gamma(i,j,k)  *p(i,j,k)  /w(i,j,k,  irho))
-
-                area = sqrt(sk(i,j,k,1)**2 + sk(i,j,k,2)**2 + sk(i,j,k,3)**2)
-                tmp  = one/max(1.e-25_realType,area)
-                sx   = sk(i,j,k,1)*tmp
-                sy   = sk(i,j,k,2)*tmp
-                sz   = sk(i,j,k,3)*tmp
-
-                alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-                hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-                aAvg     = sqrt(a2Avg)
-                unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-                ovaAvg   = one/aAvg
-                ova2Avg  = one/a2Avg
-
-                ! The mesh velocity if the face is moving. It must be
-                ! divided by the area to obtain a true velocity.
-
-                if( addGridVelocities ) sface = sFaceK(i,j,k)*tmp
-
-                ! Compute the absolute values of the three eigenvalues
-                ! and make sure they don't become zero by cutting them
-                ! off to a certain minimum.
-
-                lam1 = abs(unAvg - sface + aAvg)
-                lam2 = abs(unAvg - sface - aAvg)
-                lam3 = abs(unAvg - sface)
-
-                rrad = lam3 + aAvg
-
-                ! Multiply the eigenvalues by the area to obtain
-                ! the correct values for the dissipation term.
-
-                lam1 = max(lam1,epsAcoustic*rrad)*area
-                lam2 = max(lam2,epsAcoustic*rrad)*area
-                lam3 = max(lam3,epsShear*rrad)*area
-
-                ! Some abbreviations, which occur quite often in the
-                ! dissipation terms.
-
-                abv1 = half*(lam1 + lam2)
-                abv2 = half*(lam1 - lam2)
-                abv3 = abv1 - lam3
-
-                abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                     -      wAvg*drw + dre) - gm53*drk
-                abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
-
-                abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-                abv7 = abv2*abv4*ovaAvg  + abv3*abv5
-
-                ! Compute and scatter the dissipative flux.
-                ! Density.
-
-                fs               = lam3*dr  + abv6
-                fw(i,j,k+1,irho) = fw(i,j,k+1,irho) + fs
-                fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                        ! Compute the dissipation coefficients for this face.
+
+                        ppor = zero
+                        if (porK(i, j, k) == normalFlux) ppor = one
+
+                        dis2 = ppor * fis2 * min(dpMax, max(dss(i, j, k, 3), dss(i, j, k + 1, 3)))
+                        dis4 = myDim(ppor * fis4, dis2)
+
+                        ! Construct the vector of the first and third differences
+                        ! multiplied by the appropriate constants.
+
+                        ddw1 = w(i, j, k + 1, irho) - w(i, j, k, irho)
+                        dr = dis2 * ddw1 &
+                             - dis4 * (w(i, j, k + 2, irho) - w(i, j, k - 1, irho) - three * ddw1)
+
+                        ddw2 = w(i, j, k + 1, irho) * w(i, j, k + 1, ivx) &
+                               - w(i, j, k, irho) * w(i, j, k, ivx)
+                        dru = dis2 * ddw2 &
+                              - dis4 * (w(i, j, k + 2, irho) * w(i, j, k + 2, ivx) &
+                                        - w(i, j, k - 1, irho) * w(i, j, k - 1, ivx) - three * ddw2)
+
+                        ddw3 = w(i, j, k + 1, irho) * w(i, j, k + 1, ivy) &
+                               - w(i, j, k, irho) * w(i, j, k, ivy)
+                        drv = dis2 * ddw3 &
+                              - dis4 * (w(i, j, k + 2, irho) * w(i, j, k + 2, ivy) &
+                                        - w(i, j, k - 1, irho) * w(i, j, k - 1, ivy) - three * ddw3)
+
+                        ddw4 = w(i, j, k + 1, irho) * w(i, j, k + 1, ivz) &
+                               - w(i, j, k, irho) * w(i, j, k, ivz)
+                        drw = dis2 * ddw4 &
+                              - dis4 * (w(i, j, k + 2, irho) * w(i, j, k + 2, ivz) &
+                                        - w(i, j, k - 1, irho) * w(i, j, k - 1, ivz) - three * ddw4)
+
+                        ddw5 = w(i, j, k + 1, irhoE) - w(i, j, k, irhoE)
+                        dre = dis2 * ddw5 &
+                              - dis4 * (w(i, j, k + 2, irhoE) - w(i, j, k - 1, irhoE) - three * ddw5)
+
+                        ! In case a k-equation is present, compute the difference
+                        ! of rhok and store the average value of k. If not present,
+                        ! set both these values to zero, such that later on no
+                        ! decision needs to be made anymore.
+
+                        if (correctForK) then
+                            ddw6 = w(i, j, k + 1, irho) * w(i, j, k + 1, itu1) &
+                                   - w(i, j, k, irho) * w(i, j, k, itu1)
+                            drk = dis2 * ddw6 &
+                                  - dis4 * (w(i, j, k + 2, irho) * w(i, j, k + 2, itu1) &
+                                            - w(i, j, k - 1, irho) * w(i, j, k - 1, itu1) - three * ddw6)
+
+                            kAvg = half * (w(i, j, k + 1, itu1) + w(i, j, k, itu1))
+                        else
+                            drk = zero
+                            kAvg = zero
+                        end if
+
+                        ! Compute the average value of gamma and compute some
+                        ! expressions in which it occurs.
+
+                        gammaAvg = half * (gamma(i, j, k + 1) + gamma(i, j, k))
+                        gm1 = gammaAvg - one
+                        ovgm1 = one / gm1
+                        gm53 = gammaAvg - five * third
+
+                        ! Compute the average state at the interface.
+
+                        uAvg = half * (w(i, j, k + 1, ivx) + w(i, j, k, ivx))
+                        vAvg = half * (w(i, j, k + 1, ivy) + w(i, j, k, ivy))
+                        wAvg = half * (w(i, j, k + 1, ivz) + w(i, j, k, ivz))
+                        a2Avg = half * (gamma(i, j, k + 1) * p(i, j, k + 1) / w(i, j, k + 1, irho) &
+                                        + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
+
+                        area = sqrt(sk(i, j, k, 1)**2 + sk(i, j, k, 2)**2 + sk(i, j, k, 3)**2)
+                        tmp = one / max(1.e-25_realType, area)
+                        sx = sk(i, j, k, 1) * tmp
+                        sy = sk(i, j, k, 2) * tmp
+                        sz = sk(i, j, k, 3) * tmp
+
+                        alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                        hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                        aAvg = sqrt(a2Avg)
+                        unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                        ovaAvg = one / aAvg
+                        ova2Avg = one / a2Avg
+
+                        ! The mesh velocity if the face is moving. It must be
+                        ! divided by the area to obtain a true velocity.
+
+                        if (addGridVelocities) sface = sFaceK(i, j, k) * tmp
+
+                        ! Compute the absolute values of the three eigenvalues
+                        ! and make sure they don't become zero by cutting them
+                        ! off to a certain minimum.
+
+                        lam1 = abs(unAvg - sface + aAvg)
+                        lam2 = abs(unAvg - sface - aAvg)
+                        lam3 = abs(unAvg - sface)
+
+                        rrad = lam3 + aAvg
+
+                        ! Multiply the eigenvalues by the area to obtain
+                        ! the correct values for the dissipation term.
+
+                        lam1 = max(lam1, epsAcoustic * rrad) * area
+                        lam2 = max(lam2, epsAcoustic * rrad) * area
+                        lam3 = max(lam3, epsShear * rrad) * area
+
+                        ! Some abbreviations, which occur quite often in the
+                        ! dissipation terms.
+
+                        abv1 = half * (lam1 + lam2)
+                        abv2 = half * (lam1 - lam2)
+                        abv3 = abv1 - lam3
+
+                        abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                      - wAvg * drw + dre) - gm53 * drk
+                        abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
+
+                        abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                        abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
+
+                        ! Compute and scatter the dissipative flux.
+                        ! Density.
+
+                        fs = lam3 * dr + abv6
+                        fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) + fs
+                        fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 #ifndef USE_TAPENADE
-                ind = indFamilyK(i,j,k)
-                massFlowFamilyDiss(ind,spectralSol) =       &
-                     massFlowFamilyDiss(ind,spectralSol) &
-                     - factFamilyK(i,j,k)*fs
+                        ind = indFamilyK(i, j, k)
+                        massFlowFamilyDiss(ind, spectralSol) = &
+                            massFlowFamilyDiss(ind, spectralSol) &
+                            - factFamilyK(i, j, k) * fs
 #endif
-                ! X-momentum.
+                        ! X-momentum.
 
-                fs              = lam3*dru + uAvg*abv6 + sx*abv7
-                fw(i,j,k+1,imx) = fw(i,j,k+1,imx) + fs
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                        fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                        fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fs
+                        fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-                ! Y-momentum.
+                        ! Y-momentum.
 
-                fs              = lam3*drv + vAvg*abv6 + sy*abv7
-                fw(i,j,k+1,imy) = fw(i,j,k+1,imy) + fs
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                        fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                        fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fs
+                        fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-                ! Z-momentum.
+                        ! Z-momentum.
 
-                fs              = lam3*drw + wAvg*abv6 + sz*abv7
-                fw(i,j,k+1,imz) = fw(i,j,k+1,imz) + fs
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                        fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                        fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fs
+                        fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-                ! Energy.
+                        ! Energy.
 
-                fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-                fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + fs
-                fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                        fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                        fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + fs
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          end do
-       end do
-    end do
+                end do
+            end do
+        end do
 #endif
-  end subroutine inviscidDissFluxMatrix
-
-  subroutine inviscidDissFluxScalar
-    !
-    !       inviscidDissFluxScalar computes the scalar artificial
-    !       dissipation, see AIAA paper 81-1259, for a given block.
-    !       Therefore it is assumed that the pointers in  blockPointers
-    !       already point to the correct block.
-    !
-    use constants
-    use blockPointers, only : nx, ny, nz, il, jl, kl, ie, je, ke, ib, jb, kb, &
-         w, p, porI, porJ, porK, fw, radI, radJ, radK, gamma
-    use flowVarRefState, only : gammaInf, pInfCorr, rhoInf
-    use inputDiscretization, only: vis2, vis4
-    use inputPhysics, only : equations
-    use iteration, only : rFil
-    use utils, only : myDim
-    implicit none
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: dssMax = 0.25_realType
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ind, ii
-
-    real(kind=realType) :: sslim, rhoi
-    real(kind=realType) :: sfil, fis2, fis4
-    real(kind=realType) :: ppor, rrad, dis2, dis4
-    real(kind=realType) :: ddw1,ddw2,ddw3,ddw4,ddw5,fs
-    real(kind=realType),dimension(1:ie,1:je,1:ke,3) :: dss
-    real(kind=realType), dimension(0:ib,0:jb,0:kb) :: ss
-
-    ! Check if rFil == 0. If so, the dissipative flux needs not to
-    ! be computed.
-
-    if(abs(rFil) < thresholdReal) return
-
-    ! Determine the variables used to compute the switch.
-    ! For the inviscid case this is the pressure; for the viscous
-    ! case it is the entropy.
-
-    select case (equations)
-    case (EulerEquations)
-
-       ! Inviscid case. Pressure switch is based on the pressure.
-       ! Also set the value of sslim. To be fully consistent this
-       ! must have the dimension of pressure and it is therefore
-       ! set to a fraction of the free stream value.
-
-       sslim = 0.001_realType*pInfCorr
-
-       ! Copy the pressure in ss. Only need the entries used in the
-       ! discretization, i.e. not including the corner halo's, but we'll
-       ! just copy all anyway.
-
-       ss = P
-       !===============================================================
-
-    case (NSEquations, RANSEquations)
-
-       ! Viscous case. Pressure switch is based on the entropy.
-       ! Also set the value of sslim. To be fully consistent this
-       ! must have the dimension of entropy and it is therefore
-       ! set to a fraction of the free stream value.
-
-       sslim = 0.001_realType*pInfCorr/(rhoInf**gammaInf)
-
-       ! Store the entropy in ss. See above.
+    end subroutine inviscidDissFluxMatrix
+
+    subroutine inviscidDissFluxScalar
+        !
+        !       inviscidDissFluxScalar computes the scalar artificial
+        !       dissipation, see AIAA paper 81-1259, for a given block.
+        !       Therefore it is assumed that the pointers in  blockPointers
+        !       already point to the correct block.
+        !
+        use constants
+        use blockPointers, only: nx, ny, nz, il, jl, kl, ie, je, ke, ib, jb, kb, &
+                                 w, p, porI, porJ, porK, fw, radI, radJ, radK, gamma
+        use flowVarRefState, only: gammaInf, pInfCorr, rhoInf
+        use inputDiscretization, only: vis2, vis4
+        use inputPhysics, only: equations
+        use iteration, only: rFil
+        use utils, only: myDim
+        implicit none
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: dssMax = 0.25_realType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ind, ii
+
+        real(kind=realType) :: sslim, rhoi
+        real(kind=realType) :: sfil, fis2, fis4
+        real(kind=realType) :: ppor, rrad, dis2, dis4
+        real(kind=realType) :: ddw1, ddw2, ddw3, ddw4, ddw5, fs
+        real(kind=realType), dimension(1:ie, 1:je, 1:ke, 3) :: dss
+        real(kind=realType), dimension(0:ib, 0:jb, 0:kb) :: ss
+
+        ! Check if rFil == 0. If so, the dissipative flux needs not to
+        ! be computed.
+
+        if (abs(rFil) < thresholdReal) return
+
+        ! Determine the variables used to compute the switch.
+        ! For the inviscid case this is the pressure; for the viscous
+        ! case it is the entropy.
+
+        select case (equations)
+        case (EulerEquations)
+
+            ! Inviscid case. Pressure switch is based on the pressure.
+            ! Also set the value of sslim. To be fully consistent this
+            ! must have the dimension of pressure and it is therefore
+            ! set to a fraction of the free stream value.
+
+            sslim = 0.001_realType * pInfCorr
+
+            ! Copy the pressure in ss. Only need the entries used in the
+            ! discretization, i.e. not including the corner halo's, but we'll
+            ! just copy all anyway.
+
+            ss = P
+            !===============================================================
+
+        case (NSEquations, RANSEquations)
+
+            ! Viscous case. Pressure switch is based on the entropy.
+            ! Also set the value of sslim. To be fully consistent this
+            ! must have the dimension of entropy and it is therefore
+            ! set to a fraction of the free stream value.
+
+            sslim = 0.001_realType * pInfCorr / (rhoInf**gammaInf)
+
+            ! Store the entropy in ss. See above.
 
 #ifdef TAPENADE_REVERSE
-       !$AD II-LOOP
-       do ii=0,(ib+1)*(jb+1)*(kb+1)-1
-          i = mod(ii, ib+1)
-          j = mod(ii/(ib+1), jb+1)
-          k = ii/((ib+1)*(jb+1))
+            !$AD II-LOOP
+            do ii = 0, (ib + 1) * (jb + 1) * (kb + 1) - 1
+                i = mod(ii, ib + 1)
+                j = mod(ii / (ib + 1), jb + 1)
+                k = ii / ((ib + 1) * (jb + 1))
 #else
-          do k=0,kb
-             do j=0,jb
-                do i=0,ib
+                do k = 0, kb
+                    do j = 0, jb
+                        do i = 0, ib
 #endif
-                   ss(i,j,k) = p(i,j,k)/(w(i,j,k,irho)**gamma(i,j,k))
+                            ss(i, j, k) = p(i, j, k) / (w(i, j, k, irho)**gamma(i, j, k))
 #ifdef TAPENADE_REVERSE
-                end do
+                        end do
 #else
-             end do
-          end do
-       end do
+                    end do
+                end do
+            end do
 #endif
-    end select
+        end select
 
-    ! Compute the pressure sensor for each cell, in each direction:
+        ! Compute the pressure sensor for each cell, in each direction:
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,ie*je*ke-1
-       i = mod(ii, ie) + 1
-       j = mod(ii/ie, je) + 1
-       k = ii/(ie*je) + 1
+        !$AD II-LOOP
+        do ii = 0, ie * je * ke - 1
+            i = mod(ii, ie) + 1
+            j = mod(ii / ie, je) + 1
+            k = ii / (ie * je) + 1
 #else
-       do k=1,ke
-          do j=1,je
-             do i=1,ie
+            do k = 1, ke
+                do j = 1, je
+                    do i = 1, ie
 #endif
-                dss(i,j,k,1) = abs((ss(i+1,j,k) - two*ss(i,j,k) + ss(i-1,j,k)) &
-                     /     (ss(i+1,j,k) + two*ss(i,j,k) + ss(i-1,j,k) + sslim))
+                        dss(i, j, k, 1) = abs((ss(i + 1, j, k) - two * ss(i, j, k) + ss(i - 1, j, k)) &
+                                              / (ss(i + 1, j, k) + two * ss(i, j, k) + ss(i - 1, j, k) + sslim))
 
-                dss(i,j,k,2) = abs((ss(i,j+1,k) - two*ss(i,j,k) + ss(i,j-1,k)) &
-                     /     (ss(i,j+1,k) + two*ss(i,j,k) + ss(i,j-1,k) + sslim))
+                        dss(i, j, k, 2) = abs((ss(i, j + 1, k) - two * ss(i, j, k) + ss(i, j - 1, k)) &
+                                              / (ss(i, j + 1, k) + two * ss(i, j, k) + ss(i, j - 1, k) + sslim))
 
-                dss(i,j,k,3) = abs((ss(i,j,k+1) - two*ss(i,j,k) + ss(i,j,k-1)) &
-                     /     (ss(i,j,k+1) + two*ss(i,j,k) + ss(i,j,k-1) + sslim))
+                        dss(i, j, k, 3) = abs((ss(i, j, k + 1) - two * ss(i, j, k) + ss(i, j, k - 1)) &
+                                              / (ss(i, j, k + 1) + two * ss(i, j, k) + ss(i, j, k - 1) + sslim))
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          end do
-       end do
-    end do
+                end do
+            end do
+        end do
 #endif
 
-    ! Set a couple of constants for the scheme.
+        ! Set a couple of constants for the scheme.
 
-    fis2 = rFil*vis2
-    fis4 = rFil*vis4
-    sfil = one - rFil
+        fis2 = rFil * vis2
+        fis4 = rFil * vis4
+        sfil = one - rFil
 
-    ! Initialize the dissipative residual to a certain times,
-    ! possibly zero, the previously stored value. Owned cells
-    ! only, because the halo values do not matter.
+        ! Initialize the dissipative residual to a certain times,
+        ! possibly zero, the previously stored value. Owned cells
+        ! only, because the halo values do not matter.
 
-    fw = sfil*fw
-    !
-    !       Dissipative fluxes in the i-direction.
-    !
+        fw = sfil * fw
+        !
+        !       Dissipative fluxes in the i-direction.
+        !
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,il*ny*nz-1
-       i = mod(ii, il) + 1
-       j = mod(ii/il, ny) + 2
-       k = ii/(il*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, il * ny * nz - 1
+            i = mod(ii, il) + 1
+            j = mod(ii / il, ny) + 2
+            k = ii / (il * ny) + 2
 #else
-       do k=2,kl
-          do j=2,jl
-             do i=1,il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 1, il
 #endif
-                ! Compute the dissipation coefficients for this face.
+                        ! Compute the dissipation coefficients for this face.
 
-                ppor = zero
-                if(porI(i,j,k) == normalFlux) ppor = half
-                rrad = ppor*(radI(i,j,k) + radI(i+1,j,k))
+                        ppor = zero
+                        if (porI(i, j, k) == normalFlux) ppor = half
+                        rrad = ppor * (radI(i, j, k) + radI(i + 1, j, k))
 
-                dis2 = fis2*rrad*min(dssMax, max(dss(i,j,k,1), dss(i+1,j,k,1)))
-                dis4 = myDim(fis4*rrad, dis2)
+                        dis2 = fis2 * rrad * min(dssMax, max(dss(i, j, k, 1), dss(i + 1, j, k, 1)))
+                        dis4 = myDim(fis4 * rrad, dis2)
 
-                ! Compute and scatter the dissipative flux.
-                ! Density. Store it in the mass flow of the
-                ! appropriate sliding mesh interface.
+                        ! Compute and scatter the dissipative flux.
+                        ! Density. Store it in the mass flow of the
+                        ! appropriate sliding mesh interface.
 
-                ddw1 = w(i+1,j,k,irho) - w(i,j,k,irho)
-                fs  = dis2*ddw1 &
-                     - dis4*(w(i+2,j,k,irho) - w(i-1,j,k,irho) - three*ddw1)
+                        ddw1 = w(i + 1, j, k, irho) - w(i, j, k, irho)
+                        fs = dis2 * ddw1 &
+                             - dis4 * (w(i + 2, j, k, irho) - w(i - 1, j, k, irho) - three * ddw1)
 
-                fw(i+1,j,k,irho) = fw(i+1,j,k,irho) + fs
-                fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                        fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) + fs
+                        fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-                ! X-momentum.
+                        ! X-momentum.
 
-                ddw2 = w(i+1,j,k,ivx)*w(i+1,j,k,irho) - w(i,j,k,ivx)*w(i,j,k,irho)
-                fs  = dis2*ddw2 &
-                     - dis4*(w(i+2,j,k,ivx)*w(i+2,j,k,irho) - w(i-1,j,k,ivx)*w(i-1,j,k,irho) - three*ddw2)
+                        ddw2 = w(i + 1, j, k, ivx) * w(i + 1, j, k, irho) - w(i, j, k, ivx) * w(i, j, k, irho)
+                        fs = dis2 * ddw2 &
+       - dis4 * (w(i + 2, j, k, ivx) * w(i + 2, j, k, irho) - w(i - 1, j, k, ivx) * w(i - 1, j, k, irho) - three * ddw2)
 
-                fw(i+1,j,k,imx) = fw(i+1,j,k,imx) + fs
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                        fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fs
+                        fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-                ! Y-momentum.
+                        ! Y-momentum.
 
-                ddw3 = w(i+1,j,k,ivy)*w(i+1,j,k,irho) - w(i,j,k,ivy)*w(i,j,k,irho)
-                fs  = dis2*ddw3 &
-                     - dis4*(w(i+2,j,k,ivy)*w(i+2,j,k,irho) - w(i-1,j,k,ivy)*w(i-1,j,k,irho) - three*ddw3)
+                        ddw3 = w(i + 1, j, k, ivy) * w(i + 1, j, k, irho) - w(i, j, k, ivy) * w(i, j, k, irho)
+                        fs = dis2 * ddw3 &
+       - dis4 * (w(i + 2, j, k, ivy) * w(i + 2, j, k, irho) - w(i - 1, j, k, ivy) * w(i - 1, j, k, irho) - three * ddw3)
 
-                fw(i+1,j,k,imy) = fw(i+1,j,k,imy) + fs
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                        fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fs
+                        fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-                ! Z-momentum.
+                        ! Z-momentum.
 
-                ddw4 = w(i+1,j,k,ivz)*w(i+1,j,k,irho) - w(i,j,k,ivz)*w(i,j,k,irho)
-                fs  = dis2*ddw4 &
-                     - dis4*(w(i+2,j,k,ivz)*w(i+2,j,k,irho) - w(i-1,j,k,ivz)*w(i-1,j,k,irho) - three*ddw4)
+                        ddw4 = w(i + 1, j, k, ivz) * w(i + 1, j, k, irho) - w(i, j, k, ivz) * w(i, j, k, irho)
+                        fs = dis2 * ddw4 &
+       - dis4 * (w(i + 2, j, k, ivz) * w(i + 2, j, k, irho) - w(i - 1, j, k, ivz) * w(i - 1, j, k, irho) - three * ddw4)
 
-                fw(i+1,j,k,imz) = fw(i+1,j,k,imz) + fs
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                        fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fs
+                        fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-                ! Energy.
+                        ! Energy.
 
-                ddw5 = (w(i+1,j,k,irhoE) + P(i+1,j,K))- (w(i,j,k,irhoE) + P(i,j,k))
-                fs  = dis2*ddw5 &
-                     - dis4*((w(i+2,j,k,irhoE) + P(i+2,j,k)) - (w(i-1,j,k,irhoE) + P(i-1,j,k)) - three*ddw5)
+                        ddw5 = (w(i + 1, j, k, irhoE) + P(i + 1, j, K)) - (w(i, j, k, irhoE) + P(i, j, k))
+                        fs = dis2 * ddw5 &
+           - dis4 * ((w(i + 2, j, k, irhoE) + P(i + 2, j, k)) - (w(i - 1, j, k, irhoE) + P(i - 1, j, k)) - three * ddw5)
 
-                fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + fs
-                fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                        fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + fs
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          end do
-       end do
-    end do
+                end do
+            end do
+        end do
 #endif
 
-    !
-    !       Dissipative fluxes in the j-direction.
-    !
+        !
+        !       Dissipative fluxes in the j-direction.
+        !
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*jl*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, jl) + 1
-       k = ii/(nx*jl) + 2
+        !$AD II-LOOP
+        do ii = 0, nx * jl * nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, jl) + 1
+            k = ii / (nx * jl) + 2
 #else
-       do k=2,kl
-          do j=1,jl
-             do i=2,il
+            do k = 2, kl
+                do j = 1, jl
+                    do i = 2, il
 #endif
-                ! Compute the dissipation coefficients for this face.
+                        ! Compute the dissipation coefficients for this face.
 
-                ppor = zero
-                if(porJ(i,j,k) == normalFlux) ppor = half
-                rrad = ppor*(radJ(i,j,k) + radJ(i,j+1,k))
+                        ppor = zero
+                        if (porJ(i, j, k) == normalFlux) ppor = half
+                        rrad = ppor * (radJ(i, j, k) + radJ(i, j + 1, k))
 
-                dis2 = fis2*rrad*min(dssMax, max(dss(i,j,k,2),dss(i,j+1,k,2)))
-                dis4 = myDim(fis4*rrad, dis2)
+                        dis2 = fis2 * rrad * min(dssMax, max(dss(i, j, k, 2), dss(i, j + 1, k, 2)))
+                        dis4 = myDim(fis4 * rrad, dis2)
 
-                ! Compute and scatter the dissipative flux.
-                ! Density. Store it in the mass flow of the
-                ! appropriate sliding mesh interface.
+                        ! Compute and scatter the dissipative flux.
+                        ! Density. Store it in the mass flow of the
+                        ! appropriate sliding mesh interface.
 
-                ddw1 = w(i,j+1,k,irho) - w(i,j,k,irho)
-                fs  = dis2*ddw1 &
-                     - dis4*(w(i,j+2,k,irho) - w(i,j-1,k,irho) - three*ddw1)
+                        ddw1 = w(i, j + 1, k, irho) - w(i, j, k, irho)
+                        fs = dis2 * ddw1 &
+                             - dis4 * (w(i, j + 2, k, irho) - w(i, j - 1, k, irho) - three * ddw1)
 
-                fw(i,j+1,k,irho) = fw(i,j+1,k,irho) + fs
-                fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                        fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) + fs
+                        fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-                ! X-momentum.
+                        ! X-momentum.
 
-                ddw2 = w(i,j+1,k,ivx)*w(i,j+1,k,irho) - w(i,j,k,ivx)*w(i,j,k,irho)
-                fs  = dis2*ddw2 &
-                     - dis4*(w(i,j+2,k,ivx)*w(i,j+2,k,irho) - w(i,j-1,k,ivx)*w(i,j-1,k,irho) - three*ddw2)
+                        ddw2 = w(i, j + 1, k, ivx) * w(i, j + 1, k, irho) - w(i, j, k, ivx) * w(i, j, k, irho)
+                        fs = dis2 * ddw2 &
+       - dis4 * (w(i, j + 2, k, ivx) * w(i, j + 2, k, irho) - w(i, j - 1, k, ivx) * w(i, j - 1, k, irho) - three * ddw2)
 
-                fw(i,j+1,k,imx) = fw(i,j+1,k,imx) + fs
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                        fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fs
+                        fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-                ! Y-momentum.
+                        ! Y-momentum.
 
-                ddw3 = w(i,j+1,k,ivy)*w(i,j+1,k,irho) - w(i,j,k,ivy)*w(i,j,k,irho)
-                fs  = dis2*ddw3 &
-                     - dis4*(w(i,j+2,k,ivy)*w(i,j+2,k,irho) - w(i,j-1,k,ivy)*w(i,j-1,k,irho) - three*ddw3)
+                        ddw3 = w(i, j + 1, k, ivy) * w(i, j + 1, k, irho) - w(i, j, k, ivy) * w(i, j, k, irho)
+                        fs = dis2 * ddw3 &
+       - dis4 * (w(i, j + 2, k, ivy) * w(i, j + 2, k, irho) - w(i, j - 1, k, ivy) * w(i, j - 1, k, irho) - three * ddw3)
 
-                fw(i,j+1,k,imy) = fw(i,j+1,k,imy) + fs
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                        fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fs
+                        fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-                ! Z-momentum.
+                        ! Z-momentum.
 
-                ddw4 = w(i,j+1,k,ivz)*w(i,j+1,k,irho) - w(i,j,k,ivz)*w(i,j,k,irho)
-                fs  = dis2*ddw4 &
-                     - dis4*(w(i,j+2,k,ivz)*w(i,j+2,k,irho) - w(i,j-1,k,ivz)*w(i,j-1,k,irho) - three*ddw4)
+                        ddw4 = w(i, j + 1, k, ivz) * w(i, j + 1, k, irho) - w(i, j, k, ivz) * w(i, j, k, irho)
+                        fs = dis2 * ddw4 &
+       - dis4 * (w(i, j + 2, k, ivz) * w(i, j + 2, k, irho) - w(i, j - 1, k, ivz) * w(i, j - 1, k, irho) - three * ddw4)
 
-                fw(i,j+1,k,imz) = fw(i,j+1,k,imz) + fs
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                        fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fs
+                        fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-                ! Energy.
+                        ! Energy.
 
-                ddw5 = (w(i,j+1,k,irhoE) + P(i,j+1,k)) - (w(i,j,k,irhoE) + P(i,j,k))
-                fs  = dis2*ddw5 &
-                     - dis4*((w(i,j+2,k,irhoE) + P(i,j+2,k)) - (w(i,j-1,k,irhoE) + P(i,j-1,k)) - three*ddw5)
+                        ddw5 = (w(i, j + 1, k, irhoE) + P(i, j + 1, k)) - (w(i, j, k, irhoE) + P(i, j, k))
+                        fs = dis2 * ddw5 &
+           - dis4 * ((w(i, j + 2, k, irhoE) + P(i, j + 2, k)) - (w(i, j - 1, k, irhoE) + P(i, j - 1, k)) - three * ddw5)
 
-                fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + fs
-                fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                        fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + fs
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          end do
-       end do
-    end do
+                end do
+            end do
+        end do
 #endif
-    !
-    !       Dissipative fluxes in the k-direction.
-    !
+        !
+        !       Dissipative fluxes in the k-direction.
+        !
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*kl-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 1
+        !$AD II-LOOP
+        do ii = 0, nx * ny * kl - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 1
 #else
-       do k=1,kl
-          do j=2,jl
-             do i=2,il
+            do k = 1, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
-                ! Compute the dissipation coefficients for this face.
+                        ! Compute the dissipation coefficients for this face.
 
-                ppor = zero
-                if(porK(i,j,k) == normalFlux) ppor = half
-                rrad = ppor*(radK(i,j,k) + radK(i,j,k+1))
+                        ppor = zero
+                        if (porK(i, j, k) == normalFlux) ppor = half
+                        rrad = ppor * (radK(i, j, k) + radK(i, j, k + 1))
 
-                dis2 = fis2*rrad*min(dssMax, max(dss(i,j,k,3), dss(i,j,k+1,3)))
-                dis4 = myDim(fis4*rrad, dis2)
+                        dis2 = fis2 * rrad * min(dssMax, max(dss(i, j, k, 3), dss(i, j, k + 1, 3)))
+                        dis4 = myDim(fis4 * rrad, dis2)
 
-                ! Compute and scatter the dissipative flux.
-                ! Density. Store it in the mass flow of the
-                ! appropriate sliding mesh interface.
+                        ! Compute and scatter the dissipative flux.
+                        ! Density. Store it in the mass flow of the
+                        ! appropriate sliding mesh interface.
 
-                ddw1 = w(i,j,k+1,irho) - w(i,j,k,irho)
-                fs  = dis2*ddw1 &
-                     - dis4*(w(i,j,k+2,irho) - w(i,j,k-1,irho) - three*ddw1)
+                        ddw1 = w(i, j, k + 1, irho) - w(i, j, k, irho)
+                        fs = dis2 * ddw1 &
+                             - dis4 * (w(i, j, k + 2, irho) - w(i, j, k - 1, irho) - three * ddw1)
 
-                fw(i,j,k+1,irho) = fw(i,j,k+1,irho) + fs
-                fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                        fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) + fs
+                        fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-                ! X-momentum.
+                        ! X-momentum.
 
-                ddw2 = w(i,j,k+1,ivx)*w(i,j,k+1,irho) - w(i,j,k,ivx)*w(i,j,k,irho)
-                fs  = dis2*ddw2 &
-                     - dis4*(w(i,j,k+2,ivx)*w(i,j,k+2,irho) - w(i,j,k-1,ivx)*w(i,j,k-1,irho) - three*ddw2)
+                        ddw2 = w(i, j, k + 1, ivx) * w(i, j, k + 1, irho) - w(i, j, k, ivx) * w(i, j, k, irho)
+                        fs = dis2 * ddw2 &
+       - dis4 * (w(i, j, k + 2, ivx) * w(i, j, k + 2, irho) - w(i, j, k - 1, ivx) * w(i, j, k - 1, irho) - three * ddw2)
 
-                fw(i,j,k+1,imx) = fw(i,j,k+1,imx) + fs
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                        fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fs
+                        fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-                ! Y-momentum.
+                        ! Y-momentum.
 
-                ddw3 = w(i,j,k+1,ivy)*w(i,j,k+1,irho) - w(i,j,k,ivy)*w(i,j,k,irho)
-                fs  = dis2*ddw3 &
-                     - dis4*(w(i,j,k+2,ivy)*w(i,j,k+2,irho) - w(i,j,k-1,ivy)*w(i,j,k-1,irho) - three*ddw3)
+                        ddw3 = w(i, j, k + 1, ivy) * w(i, j, k + 1, irho) - w(i, j, k, ivy) * w(i, j, k, irho)
+                        fs = dis2 * ddw3 &
+       - dis4 * (w(i, j, k + 2, ivy) * w(i, j, k + 2, irho) - w(i, j, k - 1, ivy) * w(i, j, k - 1, irho) - three * ddw3)
 
-                fw(i,j,k+1,imy) = fw(i,j,k+1,imy) + fs
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                        fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fs
+                        fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-                ! Z-momentum.
+                        ! Z-momentum.
 
-                ddw4 = w(i,j,k+1,ivz)*w(i,j,k+1,irho) - w(i,j,k,ivz)*w(i,j,k,irho)
-                fs  = dis2*ddw4 &
-                     - dis4*(w(i,j,k+2,ivz)*w(i,j,k+2,irho) - w(i,j,k-1,ivz)*w(i,j,k-1,irho) - three*ddw4)
+                        ddw4 = w(i, j, k + 1, ivz) * w(i, j, k + 1, irho) - w(i, j, k, ivz) * w(i, j, k, irho)
+                        fs = dis2 * ddw4 &
+       - dis4 * (w(i, j, k + 2, ivz) * w(i, j, k + 2, irho) - w(i, j, k - 1, ivz) * w(i, j, k - 1, irho) - three * ddw4)
 
-                fw(i,j,k+1,imz) = fw(i,j,k+1,imz) + fs
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                        fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fs
+                        fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-                ! Energy.
+                        ! Energy.
 
-                ddw5 = (w(i,j,k+1,irhoE) + P(i,j,k+1)) - (w(i,j,k,irhoE) + P(i,j,k))
-                fs  = dis2*ddw5 &
-                     - dis4*((w(i,j,k+2,irhoE) + P(i,j,k+2)) - (w(i,j,k-1,irhoE) + P(i,j,k-1)) - three*ddw5)
+                        ddw5 = (w(i, j, k + 1, irhoE) + P(i, j, k + 1)) - (w(i, j, k, irhoE) + P(i, j, k))
+                        fs = dis2 * ddw5 &
+           - dis4 * ((w(i, j, k + 2, irhoE) + P(i, j, k + 2)) - (w(i, j, k - 1, irhoE) + P(i, j, k - 1)) - three * ddw5)
 
-                fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + fs
-                fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                        fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + fs
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          end do
-       end do
-    end do
+                end do
+            end do
+        end do
 #endif
-  end subroutine inviscidDissFluxScalar
-
-  subroutine inviscidUpwindFlux(fineGrid)
-    !
-    !       inviscidUpwindFlux computes the artificial dissipation part of
-    !       the Euler fluxes by means of an approximate solution of the 1D
-    !       Riemann problem on the face. For first order schemes,
-    !       fineGrid == .false., the states in the cells are assumed to
-    !       be constant; for the second order schemes on the fine grid a
-    !       nonlinear reconstruction of the left and right state is done
-    !       for which several options exist.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use blockPointers, only : il, jl, kl, ie, je, ke, ib, jb, kb, w, p, &
-         porI, porJ, porK, fw, gamma, si, sj, sk, &
-         indFamilyI, indFamilyJ, indFamilyK, spectralSol, addGridVelocities, &
-         sFaceI, sfaceJ, sFacek, rotMatrixI, rotMatrixJ, rotMatrixK, &
-         factFamilyI, factFamilyJ, factFamilyK
-    use flowVarRefState, only : kPresent, nw, nwf, rgas, tref
-    use inputDiscretization, only: limiter, lumpedDiss, precond, riemann, &
-         riemannCoarse, orderTurb, kappaCoef
-    use inputPhysics, only : equations
-    use iteration, only : rFil, currentLevel, groundLevel
-    use cgnsGrid, only: massFlowFamilyDiss
-    use utils, only : getCorrectForK, terminate
-    use flowUtils, only : eTot
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: fineGrid
-    !
-    !      Local variables.
-    !
-    integer(kind=porType) :: por
-
-    integer(kind=intType) :: nwInt
-    integer(kind=intType) :: i, j, k, ind
-    integer(kind=intType) :: limUsed, riemannUsed
-
-    real(kind=realType) :: sx, sy, sz, omk, opk, sFil, gammaFace
-    real(kind=realType) :: factMinmod, sFace
-
-    real(kind=realType), dimension(nw)  :: left, right
-    real(kind=realType), dimension(nw)  :: du1, du2, du3
-    real(kind=realType), dimension(nwf) :: flux
-
-    logical :: firstOrderK, correctForK, rotationalPeriodic
-    !
-    ! Check if rFil == 0. If so, the dissipative flux needs not to
-    ! be computed.
-
-    if(abs(rFil) < thresholdReal) return
-
-    ! Check if the formulation for rotational periodic problems
-    ! must be used.
-
-    if( associated(rotMatrixI) ) then
-       rotationalPeriodic = .true.
-    else
-       rotationalPeriodic = .false.
-    endif
-
-    ! Initialize the dissipative residual to a certain times,
-    ! possibly zero, the previously stored value. Owned cells
-    ! only, because the halo values do not matter.
-
-    sFil = one - rFil
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             fw(i,j,k,irho)  = sFil*fw(i,j,k,irho)
-             fw(i,j,k,imx)   = sFil*fw(i,j,k,imx)
-             fw(i,j,k,imy)   = sFil*fw(i,j,k,imy)
-             fw(i,j,k,imz)   = sFil*fw(i,j,k,imz)
-             fw(i,j,k,irhoE) = sFil*fw(i,j,k,irhoE)
-          enddo
-       enddo
-    enddo
-
-    ! Determine whether or not the total energy must be corrected
-    ! for the presence of the turbulent kinetic energy.
-    correctForK = getCorrectForK()
-
-    ! Compute the factor used in the minmod limiter.
-
-    factMinmod = (three-kappaCoef) &
-         / max(1.e-10_realType, one-kappaCoef)
-
-    ! Determine the limiter scheme to be used. On the fine grid the
-    ! user specified scheme is used; on the coarse grid a first order
-    ! scheme is computed.
-
-    limUsed = firstOrder
-    if( fineGrid ) limUsed = limiter
-
-    ! Lumped diss is true for doing approx PC
-    if(lumpedDiss) then
-       limUsed = firstOrder
-    end if
-
-    ! Determine the riemann solver which must be used.
-
-    riemannUsed = riemannCoarse
-    if( fineGrid ) riemannUsed = riemann
-
-    ! Store 1-kappa and 1+kappa a bit easier and multiply it by 0.25.
-
-    omk = fourth*(one - kappaCoef)
-    opk = fourth*(one + kappaCoef)
-
-    ! Initialize sFace to zero. This value will be used if the
-    ! block is not moving.
-
-    sFace = zero
-
-    ! Set the number of variables to be interpolated depending
-    ! whether or not a k-equation is present. If a k-equation is
-    ! present also set the logical firstOrderK. This indicates
-    ! whether or not only a first order approximation is to be used
-    ! for the turbulent kinetic energy.
-
-    if( correctForK ) then
-       if(orderTurb == firstOrder) then
-          nwInt       = nwf
-          firstOrderK = .true.
-       else
-          nwInt       = itu1
-          firstOrderK = .false.
-       endif
-    else
-       nwInt       = nwf
-       firstOrderK = .false.
-    endif
-    !
-    !       Flux computation. A distinction is made between first and
-    !       second order schemes to avoid the overhead for the first order
-    !       scheme.
-    !
-    orderTest: if(limUsed == firstOrder) then
-       !
-       !         First order reconstruction. The states in the cells are
-       !         constant. The left and right states are constructed easily.
-       !
-       ! Fluxes in the i-direction.
-
-       do k=2,kl
-          do j=2,jl
-             do i=1,il
-
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
-
-                sx = si(i,j,k,1); sy = si(i,j,k,2); sz = si(i,j,k,3)
-                por = porI(i,j,k)
-                if( addGridVelocities ) sFace = sFaceI(i,j,k)
-
-                ! Determine the left and right state.
-
-                left(irho)  = w(i,j,k,irho)
-                left(ivx)   = w(i,j,k,ivx)
-                left(ivy)   = w(i,j,k,ivy)
-                left(ivz)   = w(i,j,k,ivz)
-                left(irhoE) = p(i,j,k)
-                if( correctForK ) left(itu1) = w(i,j,k,itu1)
-
-                right(irho)  = w(i+1,j,k,irho)
-                right(ivx)   = w(i+1,j,k,ivx)
-                right(ivy)   = w(i+1,j,k,ivy)
-                right(ivz)   = w(i+1,j,k,ivz)
-                right(irhoE) = p(i+1,j,k)
-                if( correctForK ) right(itu1) = w(i+1,j,k,itu1)
-
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
-
-                gammaFace = half*(gamma(i,j,k) + gamma(i+1,j,k))
-
-                ! Compute the dissipative flux across the interface.
-
-                call riemannFlux(left, right, flux)
-
-                ! And scatter it to the left and right.
-
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
-
-                fw(i+1,j,k,irho)  = fw(i+1,j,k,irho)  - flux(irho)
-                fw(i+1,j,k,imx)   = fw(i+1,j,k,imx)   - flux(imx)
-                fw(i+1,j,k,imy)   = fw(i+1,j,k,imy)   - flux(imy)
-                fw(i+1,j,k,imz)   = fw(i+1,j,k,imz)   - flux(imz)
-                fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) - flux(irhoE)
-
-                ! Store the density flux in the mass flow of the
-                ! appropriate sliding mesh interface.
+    end subroutine inviscidDissFluxScalar
+
+    subroutine inviscidUpwindFlux(fineGrid)
+        !
+        !       inviscidUpwindFlux computes the artificial dissipation part of
+        !       the Euler fluxes by means of an approximate solution of the 1D
+        !       Riemann problem on the face. For first order schemes,
+        !       fineGrid == .false., the states in the cells are assumed to
+        !       be constant; for the second order schemes on the fine grid a
+        !       nonlinear reconstruction of the left and right state is done
+        !       for which several options exist.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use blockPointers, only: il, jl, kl, ie, je, ke, ib, jb, kb, w, p, &
+                                 porI, porJ, porK, fw, gamma, si, sj, sk, &
+                                 indFamilyI, indFamilyJ, indFamilyK, spectralSol, addGridVelocities, &
+                                 sFaceI, sfaceJ, sFacek, rotMatrixI, rotMatrixJ, rotMatrixK, &
+                                 factFamilyI, factFamilyJ, factFamilyK
+        use flowVarRefState, only: kPresent, nw, nwf, rgas, tref
+        use inputDiscretization, only: limiter, lumpedDiss, precond, riemann, &
+                                       riemannCoarse, orderTurb, kappaCoef
+        use inputPhysics, only: equations
+        use iteration, only: rFil, currentLevel, groundLevel
+        use cgnsGrid, only: massFlowFamilyDiss
+        use utils, only: getCorrectForK, terminate
+        use flowUtils, only: eTot
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: fineGrid
+        !
+        !      Local variables.
+        !
+        integer(kind=porType) :: por
+
+        integer(kind=intType) :: nwInt
+        integer(kind=intType) :: i, j, k, ind
+        integer(kind=intType) :: limUsed, riemannUsed
+
+        real(kind=realType) :: sx, sy, sz, omk, opk, sFil, gammaFace
+        real(kind=realType) :: factMinmod, sFace
+
+        real(kind=realType), dimension(nw) :: left, right
+        real(kind=realType), dimension(nw) :: du1, du2, du3
+        real(kind=realType), dimension(nwf) :: flux
+
+        logical :: firstOrderK, correctForK, rotationalPeriodic
+        !
+        ! Check if rFil == 0. If so, the dissipative flux needs not to
+        ! be computed.
+
+        if (abs(rFil) < thresholdReal) return
+
+        ! Check if the formulation for rotational periodic problems
+        ! must be used.
+
+        if (associated(rotMatrixI)) then
+            rotationalPeriodic = .true.
+        else
+            rotationalPeriodic = .false.
+        end if
+
+        ! Initialize the dissipative residual to a certain times,
+        ! possibly zero, the previously stored value. Owned cells
+        ! only, because the halo values do not matter.
+
+        sFil = one - rFil
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    fw(i, j, k, irho) = sFil * fw(i, j, k, irho)
+                    fw(i, j, k, imx) = sFil * fw(i, j, k, imx)
+                    fw(i, j, k, imy) = sFil * fw(i, j, k, imy)
+                    fw(i, j, k, imz) = sFil * fw(i, j, k, imz)
+                    fw(i, j, k, irhoE) = sFil * fw(i, j, k, irhoE)
+                end do
+            end do
+        end do
+
+        ! Determine whether or not the total energy must be corrected
+        ! for the presence of the turbulent kinetic energy.
+        correctForK = getCorrectForK()
+
+        ! Compute the factor used in the minmod limiter.
+
+        factMinmod = (three - kappaCoef) &
+                     / max(1.e-10_realType, one - kappaCoef)
+
+        ! Determine the limiter scheme to be used. On the fine grid the
+        ! user specified scheme is used; on the coarse grid a first order
+        ! scheme is computed.
+
+        limUsed = firstOrder
+        if (fineGrid) limUsed = limiter
+
+        ! Lumped diss is true for doing approx PC
+        if (lumpedDiss) then
+            limUsed = firstOrder
+        end if
+
+        ! Determine the riemann solver which must be used.
+
+        riemannUsed = riemannCoarse
+        if (fineGrid) riemannUsed = riemann
+
+        ! Store 1-kappa and 1+kappa a bit easier and multiply it by 0.25.
+
+        omk = fourth * (one - kappaCoef)
+        opk = fourth * (one + kappaCoef)
+
+        ! Initialize sFace to zero. This value will be used if the
+        ! block is not moving.
+
+        sFace = zero
+
+        ! Set the number of variables to be interpolated depending
+        ! whether or not a k-equation is present. If a k-equation is
+        ! present also set the logical firstOrderK. This indicates
+        ! whether or not only a first order approximation is to be used
+        ! for the turbulent kinetic energy.
+
+        if (correctForK) then
+            if (orderTurb == firstOrder) then
+                nwInt = nwf
+                firstOrderK = .true.
+            else
+                nwInt = itu1
+                firstOrderK = .false.
+            end if
+        else
+            nwInt = nwf
+            firstOrderK = .false.
+        end if
+        !
+        !       Flux computation. A distinction is made between first and
+        !       second order schemes to avoid the overhead for the first order
+        !       scheme.
+        !
+        orderTest: if (limUsed == firstOrder) then
+            !
+            !         First order reconstruction. The states in the cells are
+            !         constant. The left and right states are constructed easily.
+            !
+            ! Fluxes in the i-direction.
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 1, il
+
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
+
+                        sx = si(i, j, k, 1); sy = si(i, j, k, 2); sz = si(i, j, k, 3)
+                        por = porI(i, j, k)
+                        if (addGridVelocities) sFace = sFaceI(i, j, k)
+
+                        ! Determine the left and right state.
+
+                        left(irho) = w(i, j, k, irho)
+                        left(ivx) = w(i, j, k, ivx)
+                        left(ivy) = w(i, j, k, ivy)
+                        left(ivz) = w(i, j, k, ivz)
+                        left(irhoE) = p(i, j, k)
+                        if (correctForK) left(itu1) = w(i, j, k, itu1)
+
+                        right(irho) = w(i + 1, j, k, irho)
+                        right(ivx) = w(i + 1, j, k, ivx)
+                        right(ivy) = w(i + 1, j, k, ivy)
+                        right(ivz) = w(i + 1, j, k, ivz)
+                        right(irhoE) = p(i + 1, j, k)
+                        if (correctForK) right(itu1) = w(i + 1, j, k, itu1)
+
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
+
+                        gammaFace = half * (gamma(i, j, k) + gamma(i + 1, j, k))
+
+                        ! Compute the dissipative flux across the interface.
+
+                        call riemannFlux(left, right, flux)
+
+                        ! And scatter it to the left and right.
+
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
+
+                        fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) - flux(irho)
+                        fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) - flux(imx)
+                        fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) - flux(imy)
+                        fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) - flux(imz)
+                        fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) - flux(irhoE)
+
+                        ! Store the density flux in the mass flow of the
+                        ! appropriate sliding mesh interface.
 #ifndef USE_TAPENADE
-                ind = indFamilyI(i,j,k)
-                massFlowFamilyDiss(ind,spectralSol) =       &
-                     massFlowFamilyDiss(ind,spectralSol) &
-                     + factFamilyI(i,j,k)*flux(irho)
+                        ind = indFamilyI(i, j, k)
+                        massFlowFamilyDiss(ind, spectralSol) = &
+                            massFlowFamilyDiss(ind, spectralSol) &
+                            + factFamilyI(i, j, k) * flux(irho)
 #endif
-             enddo
-          enddo
-       enddo
+                    end do
+                end do
+            end do
 
-       ! Fluxes in j-direction.
+            ! Fluxes in j-direction.
 
-       do k=2,kl
-          do j=1,jl
-             do i=2,il
+            do k = 2, kl
+                do j = 1, jl
+                    do i = 2, il
 
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
 
-                sx = sj(i,j,k,1); sy = sj(i,j,k,2); sz = sj(i,j,k,3)
-                por = porJ(i,j,k)
-                if( addGridVelocities ) sFace = sFaceJ(i,j,k)
+                        sx = sj(i, j, k, 1); sy = sj(i, j, k, 2); sz = sj(i, j, k, 3)
+                        por = porJ(i, j, k)
+                        if (addGridVelocities) sFace = sFaceJ(i, j, k)
 
-                ! Determine the left and right state.
+                        ! Determine the left and right state.
 
-                left(irho)  = w(i,j,k,irho)
-                left(ivx)   = w(i,j,k,ivx)
-                left(ivy)   = w(i,j,k,ivy)
-                left(ivz)   = w(i,j,k,ivz)
-                left(irhoE) = p(i,j,k)
-                if( correctForK ) left(itu1) = w(i,j,k,itu1)
+                        left(irho) = w(i, j, k, irho)
+                        left(ivx) = w(i, j, k, ivx)
+                        left(ivy) = w(i, j, k, ivy)
+                        left(ivz) = w(i, j, k, ivz)
+                        left(irhoE) = p(i, j, k)
+                        if (correctForK) left(itu1) = w(i, j, k, itu1)
 
-                right(irho)  = w(i,j+1,k,irho)
-                right(ivx)   = w(i,j+1,k,ivx)
-                right(ivy)   = w(i,j+1,k,ivy)
-                right(ivz)   = w(i,j+1,k,ivz)
-                right(irhoE) = p(i,j+1,k)
-                if( correctForK ) right(itu1) = w(i,j+1,k,itu1)
+                        right(irho) = w(i, j + 1, k, irho)
+                        right(ivx) = w(i, j + 1, k, ivx)
+                        right(ivy) = w(i, j + 1, k, ivy)
+                        right(ivz) = w(i, j + 1, k, ivz)
+                        right(irhoE) = p(i, j + 1, k)
+                        if (correctForK) right(itu1) = w(i, j + 1, k, itu1)
 
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
 
-                gammaFace = half*(gamma(i,j,k) + gamma(i,j+1,k))
+                        gammaFace = half * (gamma(i, j, k) + gamma(i, j + 1, k))
 
-                ! Compute the dissipative flux across the interface.
+                        ! Compute the dissipative flux across the interface.
 
-                call riemannFlux(left, right, flux)
+                        call riemannFlux(left, right, flux)
 
-                ! And scatter it to the left and right.
+                        ! And scatter it to the left and right.
 
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
 
-                fw(i,j+1,k,irho)  = fw(i,j+1,k,irho)  - flux(irho)
-                fw(i,j+1,k,imx)   = fw(i,j+1,k,imx)   - flux(imx)
-                fw(i,j+1,k,imy)   = fw(i,j+1,k,imy)   - flux(imy)
-                fw(i,j+1,k,imz)   = fw(i,j+1,k,imz)   - flux(imz)
-                fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) - flux(irhoE)
+                        fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) - flux(irho)
+                        fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) - flux(imx)
+                        fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) - flux(imy)
+                        fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) - flux(imz)
+                        fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) - flux(irhoE)
 
-                ! Store the density flux in the mass flow of the
-                ! appropriate sliding mesh interface.
+                        ! Store the density flux in the mass flow of the
+                        ! appropriate sliding mesh interface.
 #ifndef USE_TAPENADE
-                ind = indFamilyJ(i,j,k)
-                massFlowFamilyDiss(ind,spectralSol) =       &
-                     massFlowFamilyDiss(ind,spectralSol) &
-                     + factFamilyJ(i,j,k)*flux(irho)
+                        ind = indFamilyJ(i, j, k)
+                        massFlowFamilyDiss(ind, spectralSol) = &
+                            massFlowFamilyDiss(ind, spectralSol) &
+                            + factFamilyJ(i, j, k) * flux(irho)
 #endif
-             enddo
-          enddo
-       enddo
+                    end do
+                end do
+            end do
 
-       ! Fluxes in k-direction.
+            ! Fluxes in k-direction.
 
-       do k=1,kl
-          do j=2,jl
-             do i=2,il
+            do k = 1, kl
+                do j = 2, jl
+                    do i = 2, il
 
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
 
-                sx = sk(i,j,k,1); sy = sk(i,j,k,2); sz = sk(i,j,k,3)
-                por = porK(i,j,k)
-                if( addGridVelocities ) sFace = sFaceK(i,j,k)
+                        sx = sk(i, j, k, 1); sy = sk(i, j, k, 2); sz = sk(i, j, k, 3)
+                        por = porK(i, j, k)
+                        if (addGridVelocities) sFace = sFaceK(i, j, k)
 
-                ! Determine the left and right state.
+                        ! Determine the left and right state.
 
-                left(irho)  = w(i,j,k,irho)
-                left(ivx)   = w(i,j,k,ivx)
-                left(ivy)   = w(i,j,k,ivy)
-                left(ivz)   = w(i,j,k,ivz)
-                left(irhoE) = p(i,j,k)
-                if( correctForK ) left(itu1) = w(i,j,k,itu1)
+                        left(irho) = w(i, j, k, irho)
+                        left(ivx) = w(i, j, k, ivx)
+                        left(ivy) = w(i, j, k, ivy)
+                        left(ivz) = w(i, j, k, ivz)
+                        left(irhoE) = p(i, j, k)
+                        if (correctForK) left(itu1) = w(i, j, k, itu1)
 
-                right(irho)  = w(i,j,k+1,irho)
-                right(ivx)   = w(i,j,k+1,ivx)
-                right(ivy)   = w(i,j,k+1,ivy)
-                right(ivz)   = w(i,j,k+1,ivz)
-                right(irhoE) = p(i,j,k+1)
-                if( correctForK ) right(itu1) = w(i,j,k+1,itu1)
+                        right(irho) = w(i, j, k + 1, irho)
+                        right(ivx) = w(i, j, k + 1, ivx)
+                        right(ivy) = w(i, j, k + 1, ivy)
+                        right(ivz) = w(i, j, k + 1, ivz)
+                        right(irhoE) = p(i, j, k + 1)
+                        if (correctForK) right(itu1) = w(i, j, k + 1, itu1)
 
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
 
-                gammaFace = half*(gamma(i,j,k) + gamma(i,j,k+1))
+                        gammaFace = half * (gamma(i, j, k) + gamma(i, j, k + 1))
 
-                ! Compute the dissipative flux across the interface.
+                        ! Compute the dissipative flux across the interface.
 
-                call riemannFlux(left, right, flux)
+                        call riemannFlux(left, right, flux)
 
-                ! And scatter it the left and right.
+                        ! And scatter it the left and right.
 
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
 
-                fw(i,j,k+1,irho)  = fw(i,j,k+1,irho)  - flux(irho)
-                fw(i,j,k+1,imx)   = fw(i,j,k+1,imx)   - flux(imx)
-                fw(i,j,k+1,imy)   = fw(i,j,k+1,imy)   - flux(imy)
-                fw(i,j,k+1,imz)   = fw(i,j,k+1,imz)   - flux(imz)
-                fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) - flux(irhoE)
+                        fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) - flux(irho)
+                        fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) - flux(imx)
+                        fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) - flux(imy)
+                        fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) - flux(imz)
+                        fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) - flux(irhoE)
 
-                ! Store the density flux in the mass flow of the
-                ! appropriate sliding mesh interface.
+                        ! Store the density flux in the mass flow of the
+                        ! appropriate sliding mesh interface.
 #ifndef USE_TAPENADE
-                ind = indFamilyK(i,j,k)
-                massFlowFamilyDiss(ind,spectralSol) =       &
-                     massFlowFamilyDiss(ind,spectralSol) &
-                     + factFamilyK(i,j,k)*flux(irho)
+                        ind = indFamilyK(i, j, k)
+                        massFlowFamilyDiss(ind, spectralSol) = &
+                            massFlowFamilyDiss(ind, spectralSol) &
+                            + factFamilyK(i, j, k) * flux(irho)
 #endif
-             enddo
-          enddo
-       enddo
+                    end do
+                end do
+            end do
 
-       !      ==================================================================
+            !      ==================================================================
 
-    else orderTest
+            else orderTest
 
-       !      ==================================================================
-       !
-       !         Second order reconstruction of the left and right state.
-       !         The three differences used in the, possibly nonlinear,
-       !         interpolation are constructed here; the actual left and
-       !         right states, or at least the differences from the first
-       !         order interpolation, are computed in the subroutine
-       !         leftRightState.
-       !
-       ! Fluxes in the i-direction.
+            !      ==================================================================
+            !
+            !         Second order reconstruction of the left and right state.
+            !         The three differences used in the, possibly nonlinear,
+            !         interpolation are constructed here; the actual left and
+            !         right states, or at least the differences from the first
+            !         order interpolation, are computed in the subroutine
+            !         leftRightState.
+            !
+            ! Fluxes in the i-direction.
 
-       do k=2,kl
-          do j=2,jl
-             do i=1,il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 1, il
 
-                ! Store the three differences used in the interpolation
-                ! in du1, du2, du3.
+                        ! Store the three differences used in the interpolation
+                        ! in du1, du2, du3.
 
-                du1(irho) = w(i,  j,k,irho) - w(i-1,j,k,irho)
-                du2(irho) = w(i+1,j,k,irho) - w(i,  j,k,irho)
-                du3(irho) = w(i+2,j,k,irho) - w(i+1,j,k,irho)
+                        du1(irho) = w(i, j, k, irho) - w(i - 1, j, k, irho)
+                        du2(irho) = w(i + 1, j, k, irho) - w(i, j, k, irho)
+                        du3(irho) = w(i + 2, j, k, irho) - w(i + 1, j, k, irho)
 
-                du1(ivx) = w(i,  j,k,ivx) - w(i-1,j,k,ivx)
-                du2(ivx) = w(i+1,j,k,ivx) - w(i,  j,k,ivx)
-                du3(ivx) = w(i+2,j,k,ivx) - w(i+1,j,k,ivx)
+                        du1(ivx) = w(i, j, k, ivx) - w(i - 1, j, k, ivx)
+                        du2(ivx) = w(i + 1, j, k, ivx) - w(i, j, k, ivx)
+                        du3(ivx) = w(i + 2, j, k, ivx) - w(i + 1, j, k, ivx)
 
-                du1(ivy) = w(i,  j,k,ivy) - w(i-1,j,k,ivy)
-                du2(ivy) = w(i+1,j,k,ivy) - w(i,  j,k,ivy)
-                du3(ivy) = w(i+2,j,k,ivy) - w(i+1,j,k,ivy)
+                        du1(ivy) = w(i, j, k, ivy) - w(i - 1, j, k, ivy)
+                        du2(ivy) = w(i + 1, j, k, ivy) - w(i, j, k, ivy)
+                        du3(ivy) = w(i + 2, j, k, ivy) - w(i + 1, j, k, ivy)
 
-                du1(ivz) = w(i,  j,k,ivz) - w(i-1,j,k,ivz)
-                du2(ivz) = w(i+1,j,k,ivz) - w(i,  j,k,ivz)
-                du3(ivz) = w(i+2,j,k,ivz) - w(i+1,j,k,ivz)
+                        du1(ivz) = w(i, j, k, ivz) - w(i - 1, j, k, ivz)
+                        du2(ivz) = w(i + 1, j, k, ivz) - w(i, j, k, ivz)
+                        du3(ivz) = w(i + 2, j, k, ivz) - w(i + 1, j, k, ivz)
 
-                du1(irhoE) = p(i,  j,k) - p(i-1,j,k)
-                du2(irhoE) = p(i+1,j,k) - p(i,  j,k)
-                du3(irhoE) = p(i+2,j,k) - p(i+1,j,k)
+                        du1(irhoE) = p(i, j, k) - p(i - 1, j, k)
+                        du2(irhoE) = p(i + 1, j, k) - p(i, j, k)
+                        du3(irhoE) = p(i + 2, j, k) - p(i + 1, j, k)
 
-                if( correctForK ) then
-                   du1(itu1) = w(i,  j,k,itu1) - w(i-1,j,k,itu1)
-                   du2(itu1) = w(i+1,j,k,itu1) - w(i,  j,k,itu1)
-                   du3(itu1) = w(i+2,j,k,itu1) - w(i+1,j,k,itu1)
-                endif
+                        if (correctForK) then
+                            du1(itu1) = w(i, j, k, itu1) - w(i - 1, j, k, itu1)
+                            du2(itu1) = w(i + 1, j, k, itu1) - w(i, j, k, itu1)
+                            du3(itu1) = w(i + 2, j, k, itu1) - w(i + 1, j, k, itu1)
+                        end if
 
-                ! Compute the differences from the first order scheme.
+                        ! Compute the differences from the first order scheme.
 
-                call leftRightState(du1, du2, du3, rotMatrixI, &
-                     left, right)
+                        call leftRightState(du1, du2, du3, rotMatrixI, &
+                                            left, right)
 
-                ! Add the first order part to the currently stored
-                ! differences, such that the correct state vector
-                ! is stored.
+                        ! Add the first order part to the currently stored
+                        ! differences, such that the correct state vector
+                        ! is stored.
 
-                left(irho)  = left(irho)  + w(i,j,k,irho)
-                left(ivx)   = left(ivx)   + w(i,j,k,ivx)
-                left(ivy)   = left(ivy)   + w(i,j,k,ivy)
-                left(ivz)   = left(ivz)   + w(i,j,k,ivz)
-                left(irhoE) = left(irhoE) + p(i,j,k)
+                        left(irho) = left(irho) + w(i, j, k, irho)
+                        left(ivx) = left(ivx) + w(i, j, k, ivx)
+                        left(ivy) = left(ivy) + w(i, j, k, ivy)
+                        left(ivz) = left(ivz) + w(i, j, k, ivz)
+                        left(irhoE) = left(irhoE) + p(i, j, k)
 
-                right(irho)  = right(irho)  + w(i+1,j,k,irho)
-                right(ivx)   = right(ivx)   + w(i+1,j,k,ivx)
-                right(ivy)   = right(ivy)   + w(i+1,j,k,ivy)
-                right(ivz)   = right(ivz)   + w(i+1,j,k,ivz)
-                right(irhoE) = right(irhoE) + p(i+1,j,k)
+                        right(irho) = right(irho) + w(i + 1, j, k, irho)
+                        right(ivx) = right(ivx) + w(i + 1, j, k, ivx)
+                        right(ivy) = right(ivy) + w(i + 1, j, k, ivy)
+                        right(ivz) = right(ivz) + w(i + 1, j, k, ivz)
+                        right(irhoE) = right(irhoE) + p(i + 1, j, k)
 
-                if( correctForK ) then
-                   left(itu1)  = left(itu1)  + w(i,j,k,itu1)
-                   right(itu1) = right(itu1) + w(i+1,j,k,itu1)
-                endif
+                        if (correctForK) then
+                            left(itu1) = left(itu1) + w(i, j, k, itu1)
+                            right(itu1) = right(itu1) + w(i + 1, j, k, itu1)
+                        end if
 
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
 
-                sx = si(i,j,k,1); sy = si(i,j,k,2); sz = si(i,j,k,3)
-                por = porI(i,j,k)
-                if( addGridVelocities ) sFace = sFaceI(i,j,k)
+                        sx = si(i, j, k, 1); sy = si(i, j, k, 2); sz = si(i, j, k, 3)
+                        por = porI(i, j, k)
+                        if (addGridVelocities) sFace = sFaceI(i, j, k)
 
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
 
-                gammaFace = half*(gamma(i,j,k) + gamma(i+1,j,k))
+                        gammaFace = half * (gamma(i, j, k) + gamma(i + 1, j, k))
 
-                ! Compute the dissipative flux across the interface.
+                        ! Compute the dissipative flux across the interface.
 
-                call riemannFlux(left, right, flux)
+                        call riemannFlux(left, right, flux)
 
-                ! And scatter it to the left and right.
+                        ! And scatter it to the left and right.
 
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
 
-                fw(i+1,j,k,irho)  = fw(i+1,j,k,irho)  - flux(irho)
-                fw(i+1,j,k,imx)   = fw(i+1,j,k,imx)   - flux(imx)
-                fw(i+1,j,k,imy)   = fw(i+1,j,k,imy)   - flux(imy)
-                fw(i+1,j,k,imz)   = fw(i+1,j,k,imz)   - flux(imz)
-                fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) - flux(irhoE)
+                        fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) - flux(irho)
+                        fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) - flux(imx)
+                        fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) - flux(imy)
+                        fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) - flux(imz)
+                        fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) - flux(irhoE)
 
-                ! Store the density flux in the mass flow of the
-                ! appropriate sliding mesh interface.
+                        ! Store the density flux in the mass flow of the
+                        ! appropriate sliding mesh interface.
 #ifndef USE_TAPENADE
-                ind = indFamilyI(i,j,k)
-                massFlowFamilyDiss(ind,spectralSol) =       &
-                     massFlowFamilyDiss(ind,spectralSol) &
-                     + factFamilyI(i,j,k)*flux(irho)
+                        ind = indFamilyI(i, j, k)
+                        massFlowFamilyDiss(ind, spectralSol) = &
+                            massFlowFamilyDiss(ind, spectralSol) &
+                            + factFamilyI(i, j, k) * flux(irho)
 #endif
-             enddo
-          enddo
-       enddo
+                    end do
+                end do
+            end do
 
-       ! Fluxes in the j-direction.
+            ! Fluxes in the j-direction.
 
-       do k=2,kl
-          do j=1,jl
-             do i=2,il
+            do k = 2, kl
+                do j = 1, jl
+                    do i = 2, il
 
-                ! Store the three differences used in the interpolation
-                ! in du1, du2, du3.
+                        ! Store the three differences used in the interpolation
+                        ! in du1, du2, du3.
 
-                du1(irho) = w(i,  j,k,irho) - w(i,j-1,k,irho)
-                du2(irho) = w(i,j+1,k,irho) - w(i,  j,k,irho)
-                du3(irho) = w(i,j+2,k,irho) - w(i,j+1,k,irho)
+                        du1(irho) = w(i, j, k, irho) - w(i, j - 1, k, irho)
+                        du2(irho) = w(i, j + 1, k, irho) - w(i, j, k, irho)
+                        du3(irho) = w(i, j + 2, k, irho) - w(i, j + 1, k, irho)
 
-                du1(ivx) = w(i,  j,k,ivx) - w(i,j-1,k,ivx)
-                du2(ivx) = w(i,j+1,k,ivx) - w(i,  j,k,ivx)
-                du3(ivx) = w(i,j+2,k,ivx) - w(i,j+1,k,ivx)
+                        du1(ivx) = w(i, j, k, ivx) - w(i, j - 1, k, ivx)
+                        du2(ivx) = w(i, j + 1, k, ivx) - w(i, j, k, ivx)
+                        du3(ivx) = w(i, j + 2, k, ivx) - w(i, j + 1, k, ivx)
 
-                du1(ivy) = w(i,  j,k,ivy) - w(i,j-1,k,ivy)
-                du2(ivy) = w(i,j+1,k,ivy) - w(i,  j,k,ivy)
-                du3(ivy) = w(i,j+2,k,ivy) - w(i,j+1,k,ivy)
+                        du1(ivy) = w(i, j, k, ivy) - w(i, j - 1, k, ivy)
+                        du2(ivy) = w(i, j + 1, k, ivy) - w(i, j, k, ivy)
+                        du3(ivy) = w(i, j + 2, k, ivy) - w(i, j + 1, k, ivy)
 
-                du1(ivz) = w(i,  j,k,ivz) - w(i,j-1,k,ivz)
-                du2(ivz) = w(i,j+1,k,ivz) - w(i,  j,k,ivz)
-                du3(ivz) = w(i,j+2,k,ivz) - w(i,j+1,k,ivz)
+                        du1(ivz) = w(i, j, k, ivz) - w(i, j - 1, k, ivz)
+                        du2(ivz) = w(i, j + 1, k, ivz) - w(i, j, k, ivz)
+                        du3(ivz) = w(i, j + 2, k, ivz) - w(i, j + 1, k, ivz)
 
-                du1(irhoE) = p(i,  j,k) - p(i,j-1,k)
-                du2(irhoE) = p(i,j+1,k) - p(i,  j,k)
-                du3(irhoE) = p(i,j+2,k) - p(i,j+1,k)
+                        du1(irhoE) = p(i, j, k) - p(i, j - 1, k)
+                        du2(irhoE) = p(i, j + 1, k) - p(i, j, k)
+                        du3(irhoE) = p(i, j + 2, k) - p(i, j + 1, k)
 
-                if( correctForK ) then
-                   du1(itu1) = w(i,  j,k,itu1) - w(i,j-1,k,itu1)
-                   du2(itu1) = w(i,j+1,k,itu1) - w(i,  j,k,itu1)
-                   du3(itu1) = w(i,j+2,k,itu1) - w(i,j+1,k,itu1)
-                endif
+                        if (correctForK) then
+                            du1(itu1) = w(i, j, k, itu1) - w(i, j - 1, k, itu1)
+                            du2(itu1) = w(i, j + 1, k, itu1) - w(i, j, k, itu1)
+                            du3(itu1) = w(i, j + 2, k, itu1) - w(i, j + 1, k, itu1)
+                        end if
 
-                ! Compute the differences from the first order scheme.
+                        ! Compute the differences from the first order scheme.
 
-                call leftRightState(du1, du2, du3, rotMatrixJ, &
-                     left, right)
+                        call leftRightState(du1, du2, du3, rotMatrixJ, &
+                                            left, right)
 
-                ! Add the first order part to the currently stored
-                ! differences, such that the correct state vector
-                ! is stored.
+                        ! Add the first order part to the currently stored
+                        ! differences, such that the correct state vector
+                        ! is stored.
 
-                left(irho)  = left(irho)  + w(i,j,k,irho)
-                left(ivx)   = left(ivx)   + w(i,j,k,ivx)
-                left(ivy)   = left(ivy)   + w(i,j,k,ivy)
-                left(ivz)   = left(ivz)   + w(i,j,k,ivz)
-                left(irhoE) = left(irhoE) + p(i,j,k)
+                        left(irho) = left(irho) + w(i, j, k, irho)
+                        left(ivx) = left(ivx) + w(i, j, k, ivx)
+                        left(ivy) = left(ivy) + w(i, j, k, ivy)
+                        left(ivz) = left(ivz) + w(i, j, k, ivz)
+                        left(irhoE) = left(irhoE) + p(i, j, k)
 
-                right(irho)  = right(irho)  + w(i,j+1,k,irho)
-                right(ivx)   = right(ivx)   + w(i,j+1,k,ivx)
-                right(ivy)   = right(ivy)   + w(i,j+1,k,ivy)
-                right(ivz)   = right(ivz)   + w(i,j+1,k,ivz)
-                right(irhoE) = right(irhoE) + p(i,j+1,k)
+                        right(irho) = right(irho) + w(i, j + 1, k, irho)
+                        right(ivx) = right(ivx) + w(i, j + 1, k, ivx)
+                        right(ivy) = right(ivy) + w(i, j + 1, k, ivy)
+                        right(ivz) = right(ivz) + w(i, j + 1, k, ivz)
+                        right(irhoE) = right(irhoE) + p(i, j + 1, k)
 
-                if( correctForK ) then
-                   left(itu1)  = left(itu1)  + w(i,j,k,itu1)
-                   right(itu1) = right(itu1) + w(i,j+1,k,itu1)
-                endif
+                        if (correctForK) then
+                            left(itu1) = left(itu1) + w(i, j, k, itu1)
+                            right(itu1) = right(itu1) + w(i, j + 1, k, itu1)
+                        end if
 
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
 
-                sx = sj(i,j,k,1); sy = sj(i,j,k,2); sz = sj(i,j,k,3)
-                por = porJ(i,j,k)
-                if( addGridVelocities ) sFace = sFaceJ(i,j,k)
+                        sx = sj(i, j, k, 1); sy = sj(i, j, k, 2); sz = sj(i, j, k, 3)
+                        por = porJ(i, j, k)
+                        if (addGridVelocities) sFace = sFaceJ(i, j, k)
 
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
 
-                gammaFace = half*(gamma(i,j,k) + gamma(i,j+1,k))
+                        gammaFace = half * (gamma(i, j, k) + gamma(i, j + 1, k))
 
-                ! Compute the dissipative flux across the interface.
+                        ! Compute the dissipative flux across the interface.
 
-                call riemannFlux(left, right, flux)
+                        call riemannFlux(left, right, flux)
 
-                ! And scatter it to the left and right.
+                        ! And scatter it to the left and right.
 
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
 
-                fw(i,j+1,k,irho)  = fw(i,j+1,k,irho)  - flux(irho)
-                fw(i,j+1,k,imx)   = fw(i,j+1,k,imx)   - flux(imx)
-                fw(i,j+1,k,imy)   = fw(i,j+1,k,imy)   - flux(imy)
-                fw(i,j+1,k,imz)   = fw(i,j+1,k,imz)   - flux(imz)
-                fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) - flux(irhoE)
+                        fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) - flux(irho)
+                        fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) - flux(imx)
+                        fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) - flux(imy)
+                        fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) - flux(imz)
+                        fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) - flux(irhoE)
 
-                ! Store the density flux in the mass flow of the
-                ! appropriate sliding mesh interface.
+                        ! Store the density flux in the mass flow of the
+                        ! appropriate sliding mesh interface.
 #ifndef USE_TAPENADE
-                ind = indFamilyJ(i,j,k)
-                massFlowFamilyDiss(ind,spectralSol) =       &
-                     massFlowFamilyDiss(ind,spectralSol) &
-                     + factFamilyJ(i,j,k)*flux(irho)
+                        ind = indFamilyJ(i, j, k)
+                        massFlowFamilyDiss(ind, spectralSol) = &
+                            massFlowFamilyDiss(ind, spectralSol) &
+                            + factFamilyJ(i, j, k) * flux(irho)
 #endif
-             enddo
-          enddo
-       enddo
+                    end do
+                end do
+            end do
 
-       ! Fluxes in the k-direction.
+            ! Fluxes in the k-direction.
 
-       do k=1,kl
-          do j=2,jl
-             do i=2,il
+            do k = 1, kl
+                do j = 2, jl
+                    do i = 2, il
 
-                ! Store the three differences used in the interpolation
-                ! in du1, du2, du3.
+                        ! Store the three differences used in the interpolation
+                        ! in du1, du2, du3.
 
-                du1(irho) = w(i,  j,k,irho) - w(i,j,k-1,irho)
-                du2(irho) = w(i,j,k+1,irho) - w(i,  j,k,irho)
-                du3(irho) = w(i,j,k+2,irho) - w(i,j,k+1,irho)
+                        du1(irho) = w(i, j, k, irho) - w(i, j, k - 1, irho)
+                        du2(irho) = w(i, j, k + 1, irho) - w(i, j, k, irho)
+                        du3(irho) = w(i, j, k + 2, irho) - w(i, j, k + 1, irho)
 
-                du1(ivx) = w(i,  j,k,ivx) - w(i,j,k-1,ivx)
-                du2(ivx) = w(i,j,k+1,ivx) - w(i,  j,k,ivx)
-                du3(ivx) = w(i,j,k+2,ivx) - w(i,j,k+1,ivx)
+                        du1(ivx) = w(i, j, k, ivx) - w(i, j, k - 1, ivx)
+                        du2(ivx) = w(i, j, k + 1, ivx) - w(i, j, k, ivx)
+                        du3(ivx) = w(i, j, k + 2, ivx) - w(i, j, k + 1, ivx)
 
-                du1(ivy) = w(i,  j,k,ivy) - w(i,j,k-1,ivy)
-                du2(ivy) = w(i,j,k+1,ivy) - w(i,  j,k,ivy)
-                du3(ivy) = w(i,j,k+2,ivy) - w(i,j,k+1,ivy)
+                        du1(ivy) = w(i, j, k, ivy) - w(i, j, k - 1, ivy)
+                        du2(ivy) = w(i, j, k + 1, ivy) - w(i, j, k, ivy)
+                        du3(ivy) = w(i, j, k + 2, ivy) - w(i, j, k + 1, ivy)
 
-                du1(ivz) = w(i,  j,k,ivz) - w(i,j,k-1,ivz)
-                du2(ivz) = w(i,j,k+1,ivz) - w(i,  j,k,ivz)
-                du3(ivz) = w(i,j,k+2,ivz) - w(i,j,k+1,ivz)
+                        du1(ivz) = w(i, j, k, ivz) - w(i, j, k - 1, ivz)
+                        du2(ivz) = w(i, j, k + 1, ivz) - w(i, j, k, ivz)
+                        du3(ivz) = w(i, j, k + 2, ivz) - w(i, j, k + 1, ivz)
 
-                du1(irhoE) = p(i,  j,k) - p(i,j,k-1)
-                du2(irhoE) = p(i,j,k+1) - p(i,  j,k)
-                du3(irhoE) = p(i,j,k+2) - p(i,j,k+1)
+                        du1(irhoE) = p(i, j, k) - p(i, j, k - 1)
+                        du2(irhoE) = p(i, j, k + 1) - p(i, j, k)
+                        du3(irhoE) = p(i, j, k + 2) - p(i, j, k + 1)
 
-                if( correctForK ) then
-                   du1(itu1) = w(i,  j,k,itu1) - w(i,j,k-1,itu1)
-                   du2(itu1) = w(i,j,k+1,itu1) - w(i,  j,k,itu1)
-                   du3(itu1) = w(i,j,k+2,itu1) - w(i,j,k+1,itu1)
-                endif
+                        if (correctForK) then
+                            du1(itu1) = w(i, j, k, itu1) - w(i, j, k - 1, itu1)
+                            du2(itu1) = w(i, j, k + 1, itu1) - w(i, j, k, itu1)
+                            du3(itu1) = w(i, j, k + 2, itu1) - w(i, j, k + 1, itu1)
+                        end if
 
-                ! Compute the differences from the first order scheme.
+                        ! Compute the differences from the first order scheme.
 
-                call leftRightState(du1, du2, du3, rotMatrixK, &
-                     left, right)
+                        call leftRightState(du1, du2, du3, rotMatrixK, &
+                                            left, right)
 
-                ! Add the first order part to the currently stored
-                ! differences, such that the correct state vector
-                ! is stored.
+                        ! Add the first order part to the currently stored
+                        ! differences, such that the correct state vector
+                        ! is stored.
 
-                left(irho)  = left(irho)  + w(i,j,k,irho)
-                left(ivx)   = left(ivx)   + w(i,j,k,ivx)
-                left(ivy)   = left(ivy)   + w(i,j,k,ivy)
-                left(ivz)   = left(ivz)   + w(i,j,k,ivz)
-                left(irhoE) = left(irhoE) + p(i,j,k)
+                        left(irho) = left(irho) + w(i, j, k, irho)
+                        left(ivx) = left(ivx) + w(i, j, k, ivx)
+                        left(ivy) = left(ivy) + w(i, j, k, ivy)
+                        left(ivz) = left(ivz) + w(i, j, k, ivz)
+                        left(irhoE) = left(irhoE) + p(i, j, k)
 
-                right(irho)  = right(irho)  + w(i,j,k+1,irho)
-                right(ivx)   = right(ivx)   + w(i,j,k+1,ivx)
-                right(ivy)   = right(ivy)   + w(i,j,k+1,ivy)
-                right(ivz)   = right(ivz)   + w(i,j,k+1,ivz)
-                right(irhoE) = right(irhoE) + p(i,j,k+1)
+                        right(irho) = right(irho) + w(i, j, k + 1, irho)
+                        right(ivx) = right(ivx) + w(i, j, k + 1, ivx)
+                        right(ivy) = right(ivy) + w(i, j, k + 1, ivy)
+                        right(ivz) = right(ivz) + w(i, j, k + 1, ivz)
+                        right(irhoE) = right(irhoE) + p(i, j, k + 1)
 
-                if( correctForK ) then
-                   left(itu1)  = left(itu1)  + w(i,j,k,itu1)
-                   right(itu1) = right(itu1) + w(i,j,k+1,itu1)
-                endif
+                        if (correctForK) then
+                            left(itu1) = left(itu1) + w(i, j, k, itu1)
+                            right(itu1) = right(itu1) + w(i, j, k + 1, itu1)
+                        end if
 
-                ! Store the normal vector, the porosity and the
-                ! mesh velocity if present.
+                        ! Store the normal vector, the porosity and the
+                        ! mesh velocity if present.
 
-                sx = sk(i,j,k,1); sy = sk(i,j,k,2); sz = sk(i,j,k,3)
-                por = porK(i,j,k)
-                if( addGridVelocities ) sFace = sFaceK(i,j,k)
+                        sx = sk(i, j, k, 1); sy = sk(i, j, k, 2); sz = sk(i, j, k, 3)
+                        por = porK(i, j, k)
+                        if (addGridVelocities) sFace = sFaceK(i, j, k)
 
-                ! Compute the value of gamma on the face. Take an
-                ! arithmetic average of the two states.
+                        ! Compute the value of gamma on the face. Take an
+                        ! arithmetic average of the two states.
 
-                gammaFace = half*(gamma(i,j,k) + gamma(i,j,k+1))
+                        gammaFace = half * (gamma(i, j, k) + gamma(i, j, k + 1))
 
-                ! Compute the dissipative flux across the interface.
+                        ! Compute the dissipative flux across the interface.
 
-                call riemannFlux(left, right, flux)
+                        call riemannFlux(left, right, flux)
 
-                ! And scatter it to the left and right.
+                        ! And scatter it to the left and right.
 
-                fw(i,j,k,irho)  = fw(i,j,k,irho)  + flux(irho)
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   + flux(imx)
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   + flux(imy)
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   + flux(imz)
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) + flux(irhoE)
+                        fw(i, j, k, irho) = fw(i, j, k, irho) + flux(irho)
+                        fw(i, j, k, imx) = fw(i, j, k, imx) + flux(imx)
+                        fw(i, j, k, imy) = fw(i, j, k, imy) + flux(imy)
+                        fw(i, j, k, imz) = fw(i, j, k, imz) + flux(imz)
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) + flux(irhoE)
 
-                fw(i,j,k+1,irho)  = fw(i,j,k+1,irho)  - flux(irho)
-                fw(i,j,k+1,imx)   = fw(i,j,k+1,imx)   - flux(imx)
-                fw(i,j,k+1,imy)   = fw(i,j,k+1,imy)   - flux(imy)
-                fw(i,j,k+1,imz)   = fw(i,j,k+1,imz)   - flux(imz)
-                fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) - flux(irhoE)
+                        fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) - flux(irho)
+                        fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) - flux(imx)
+                        fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) - flux(imy)
+                        fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) - flux(imz)
+                        fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) - flux(irhoE)
 
-                ! Store the density flux in the mass flow of the
-                ! appropriate sliding mesh interface.
+                        ! Store the density flux in the mass flow of the
+                        ! appropriate sliding mesh interface.
 #ifndef USE_TAPENADE
-                ind = indFamilyK(i,j,k)
-                massFlowFamilyDiss(ind,spectralSol) =       &
-                     massFlowFamilyDiss(ind,spectralSol) &
-                     + factFamilyK(i,j,k)*flux(irho)
+                        ind = indFamilyK(i, j, k)
+                        massFlowFamilyDiss(ind, spectralSol) = &
+                            massFlowFamilyDiss(ind, spectralSol) &
+                            + factFamilyK(i, j, k) * flux(irho)
 #endif
-             enddo
-          enddo
-       enddo
+                    end do
+                end do
+            end do
 
-    endif orderTest
+        end if orderTest
 
-    !      ==================================================================
+        !      ==================================================================
 
-  contains
+    contains
 
-    subroutine leftRightState(du1, du2, du3, rotMatrix, left, right)
-      !
-      !         leftRightState computes the differences in the left and
-      !         right state compared to the first order interpolation. For a
-      !         monotonic second order discretization the interpolations
-      !         need to be nonlinear. The linear second order scheme can be
-      !         stable (depending on the value of kappa), but it will have
-      !         oscillations near discontinuities.
-      !
-      implicit none
-      !
-      !        Local parameter.
-      !
-      real(kind=realType), parameter :: epsLim = 1.e-10_realType
-      !
-      !        Subroutine arguments.
-      !
-      real(kind=realType), dimension(:), intent(inout) :: du1, du2, du3
-      real(kind=realType), dimension(:), intent(out)   :: left, right
+        subroutine leftRightState(du1, du2, du3, rotMatrix, left, right)
+            !
+            !         leftRightState computes the differences in the left and
+            !         right state compared to the first order interpolation. For a
+            !         monotonic second order discretization the interpolations
+            !         need to be nonlinear. The linear second order scheme can be
+            !         stable (depending on the value of kappa), but it will have
+            !         oscillations near discontinuities.
+            !
+            implicit none
+            !
+            !        Local parameter.
+            !
+            real(kind=realType), parameter :: epsLim = 1.e-10_realType
+            !
+            !        Subroutine arguments.
+            !
+            real(kind=realType), dimension(:), intent(inout) :: du1, du2, du3
+            real(kind=realType), dimension(:), intent(out) :: left, right
 
-      real(kind=realType), dimension(:,:,:,:,:), pointer :: rotMatrix
-      !
-      !        Local variables.
-      !
-      integer(kind=intType) :: l
+            real(kind=realType), dimension(:, :, :, :, :), pointer :: rotMatrix
+            !
+            !        Local variables.
+            !
+            integer(kind=intType) :: l
 
-      real(kind=realType) :: rl1, rl2, rr1, rr2, tmp, dvx, dvy, dvz
+            real(kind=realType) :: rl1, rl2, rr1, rr2, tmp, dvx, dvy, dvz
 
-      real(kind=realType), dimension(3,3) :: rot
+            real(kind=realType), dimension(3, 3) :: rot
 
-      ! Check if the velocity components should be transformed to
-      ! the cylindrical frame.
+            ! Check if the velocity components should be transformed to
+            ! the cylindrical frame.
 
-      if( rotationalPeriodic ) then
+            if (rotationalPeriodic) then
 
-         ! Store the rotation matrix a bit easier. Note that the i,j,k
-         ! come from the main subroutine.
+                ! Store the rotation matrix a bit easier. Note that the i,j,k
+                ! come from the main subroutine.
 
-         rot(1,1) = rotMatrix(i,j,k,1,1)
-         rot(1,2) = rotMatrix(i,j,k,1,2)
-         rot(1,3) = rotMatrix(i,j,k,1,3)
+                rot(1, 1) = rotMatrix(i, j, k, 1, 1)
+                rot(1, 2) = rotMatrix(i, j, k, 1, 2)
+                rot(1, 3) = rotMatrix(i, j, k, 1, 3)
 
-         rot(2,1) = rotMatrix(i,j,k,2,1)
-         rot(2,2) = rotMatrix(i,j,k,2,2)
-         rot(2,3) = rotMatrix(i,j,k,2,3)
+                rot(2, 1) = rotMatrix(i, j, k, 2, 1)
+                rot(2, 2) = rotMatrix(i, j, k, 2, 2)
+                rot(2, 3) = rotMatrix(i, j, k, 2, 3)
 
-         rot(3,1) = rotMatrix(i,j,k,3,1)
-         rot(3,2) = rotMatrix(i,j,k,3,2)
-         rot(3,3) = rotMatrix(i,j,k,3,3)
+                rot(3, 1) = rotMatrix(i, j, k, 3, 1)
+                rot(3, 2) = rotMatrix(i, j, k, 3, 2)
+                rot(3, 3) = rotMatrix(i, j, k, 3, 3)
 
-         ! Apply the transformation to the velocity components
-         ! of du1, du2 and du3.
+                ! Apply the transformation to the velocity components
+                ! of du1, du2 and du3.
 
-         dvx = du1(ivx); dvy = du1(ivy); dvz = du1(ivz)
-         du1(ivx) = rot(1,1)*dvx + rot(1,2)*dvy + rot(1,3)*dvz
-         du1(ivy) = rot(2,1)*dvx + rot(2,2)*dvy + rot(2,3)*dvz
-         du1(ivz) = rot(3,1)*dvx + rot(3,2)*dvy + rot(3,3)*dvz
+                dvx = du1(ivx); dvy = du1(ivy); dvz = du1(ivz)
+                du1(ivx) = rot(1, 1) * dvx + rot(1, 2) * dvy + rot(1, 3) * dvz
+                du1(ivy) = rot(2, 1) * dvx + rot(2, 2) * dvy + rot(2, 3) * dvz
+                du1(ivz) = rot(3, 1) * dvx + rot(3, 2) * dvy + rot(3, 3) * dvz
 
-         dvx = du2(ivx); dvy = du2(ivy); dvz = du2(ivz)
-         du2(ivx) = rot(1,1)*dvx + rot(1,2)*dvy + rot(1,3)*dvz
-         du2(ivy) = rot(2,1)*dvx + rot(2,2)*dvy + rot(2,3)*dvz
-         du2(ivz) = rot(3,1)*dvx + rot(3,2)*dvy + rot(3,3)*dvz
+                dvx = du2(ivx); dvy = du2(ivy); dvz = du2(ivz)
+                du2(ivx) = rot(1, 1) * dvx + rot(1, 2) * dvy + rot(1, 3) * dvz
+                du2(ivy) = rot(2, 1) * dvx + rot(2, 2) * dvy + rot(2, 3) * dvz
+                du2(ivz) = rot(3, 1) * dvx + rot(3, 2) * dvy + rot(3, 3) * dvz
 
-         dvx = du3(ivx); dvy = du3(ivy); dvz = du3(ivz)
-         du3(ivx) = rot(1,1)*dvx + rot(1,2)*dvy + rot(1,3)*dvz
-         du3(ivy) = rot(2,1)*dvx + rot(2,2)*dvy + rot(2,3)*dvz
-         du3(ivz) = rot(3,1)*dvx + rot(3,2)*dvy + rot(3,3)*dvz
+                dvx = du3(ivx); dvy = du3(ivy); dvz = du3(ivz)
+                du3(ivx) = rot(1, 1) * dvx + rot(1, 2) * dvy + rot(1, 3) * dvz
+                du3(ivy) = rot(2, 1) * dvx + rot(2, 2) * dvy + rot(2, 3) * dvz
+                du3(ivz) = rot(3, 1) * dvx + rot(3, 2) * dvy + rot(3, 3) * dvz
 
-      endif
+            end if
 
-      ! Determine the limiter used.
+            ! Determine the limiter used.
 
-      select case (limUsed)
+            select case (limUsed)
 
-      case (noLimiter)
+            case (noLimiter)
 
-         ! Linear interpolation; no limiter.
-         ! Loop over the number of variables to be interpolated.
+                ! Linear interpolation; no limiter.
+                ! Loop over the number of variables to be interpolated.
 
-         do l=1,nwInt
-            left(l)  =  omk*du1(l) + opk*du2(l)
-            right(l) = -omk*du3(l) - opk*du2(l)
-         enddo
+                do l = 1, nwInt
+                    left(l) = omk * du1(l) + opk * du2(l)
+                    right(l) = -omk * du3(l) - opk * du2(l)
+                end do
 
-         !          ==============================================================
+                !          ==============================================================
 
-      case (vanAlbeda)
+            case (vanAlbeda)
 
-         ! Nonlinear interpolation using the van albeda limiter.
-         ! Loop over the number of variables to be interpolated.
+                ! Nonlinear interpolation using the van albeda limiter.
+                ! Loop over the number of variables to be interpolated.
 
-         do l=1,nwInt
+                do l = 1, nwInt
 
-            ! Compute the limiter argument rl1, rl2, rr1 and rr2.
-            ! Note the cut off to 0.0.
+                    ! Compute the limiter argument rl1, rl2, rr1 and rr2.
+                    ! Note the cut off to 0.0.
 
-            tmp = one/sign(max(abs(du2(l)),epsLim),du2(l))
-            rl1 = max(zero, &
-                 du2(l)/sign(max(abs(du1(l)),epsLim),du1(l)))
-            rl2 = max(zero,du1(l)*tmp)
+                    tmp = one / sign(max(abs(du2(l)), epsLim), du2(l))
+                    rl1 = max(zero, &
+                              du2(l) / sign(max(abs(du1(l)), epsLim), du1(l)))
+                    rl2 = max(zero, du1(l) * tmp)
 
-            rr1 = max(zero,du3(l)*tmp)
-            rr2 = max(zero, &
-                 du2(l)/sign(max(abs(du3(l)),epsLim),du3(l)))
+                    rr1 = max(zero, du3(l) * tmp)
+                    rr2 = max(zero, &
+                              du2(l) / sign(max(abs(du3(l)), epsLim), du3(l)))
 
-            ! Compute the corresponding limiter values.
+                    ! Compute the corresponding limiter values.
 
-            rl1 = rl1*(rl1 + one)/(rl1*rl1 + one)
-            rl2 = rl2*(rl2 + one)/(rl2*rl2 + one)
-            rr1 = rr1*(rr1 + one)/(rr1*rr1 + one)
-            rr2 = rr2*(rr2 + one)/(rr2*rr2 + one)
+                    rl1 = rl1 * (rl1 + one) / (rl1 * rl1 + one)
+                    rl2 = rl2 * (rl2 + one) / (rl2 * rl2 + one)
+                    rr1 = rr1 * (rr1 + one) / (rr1 * rr1 + one)
+                    rr2 = rr2 * (rr2 + one) / (rr2 * rr2 + one)
 
-            ! Compute the nonlinear corrections to the first order
-            ! scheme.
+                    ! Compute the nonlinear corrections to the first order
+                    ! scheme.
 
-            left(l)  =  omk*rl1*du1(l) + opk*rl2*du2(l)
-            right(l) = -opk*rr1*du2(l) - omk*rr2*du3(l)
+                    left(l) = omk * rl1 * du1(l) + opk * rl2 * du2(l)
+                    right(l) = -opk * rr1 * du2(l) - omk * rr2 * du3(l)
 
-         enddo
+                end do
+
+                !          ==============================================================
 
-         !          ==============================================================
+            case (minmod)
 
-      case (minmod)
+                ! Nonlinear interpolation using the minmod limiter.
+                ! Loop over the number of variables to be interpolated.
 
-         ! Nonlinear interpolation using the minmod limiter.
-         ! Loop over the number of variables to be interpolated.
+                do l = 1, nwInt
 
-         do l=1,nwInt
+                    ! Compute the limiter argument rl1, rl2, rr1 and rr2.
+                    ! Note the cut off to 0.0.
 
-            ! Compute the limiter argument rl1, rl2, rr1 and rr2.
-            ! Note the cut off to 0.0.
+                    tmp = one / sign(max(abs(du2(l)), epsLim), du2(l))
+                    rl1 = max(zero, &
+                              du2(l) / sign(max(abs(du1(l)), epsLim), du1(l)))
+                    rl2 = max(zero, du1(l) * tmp)
 
-            tmp = one/sign(max(abs(du2(l)),epsLim),du2(l))
-            rl1 = max(zero, &
-                 du2(l)/sign(max(abs(du1(l)),epsLim),du1(l)))
-            rl2 = max(zero,du1(l)*tmp)
+                    rr1 = max(zero, du3(l) * tmp)
+                    rr2 = max(zero, &
+                              du2(l) / sign(max(abs(du3(l)), epsLim), du3(l)))
 
-            rr1 = max(zero,du3(l)*tmp)
-            rr2 = max(zero, &
-                 du2(l)/sign(max(abs(du3(l)),epsLim),du3(l)))
+                    ! Compute the corresponding limiter values.
 
-            ! Compute the corresponding limiter values.
+                    rl1 = min(one, factMinmod * rl1)
+                    rl2 = min(one, factMinmod * rl2)
+                    rr1 = min(one, factMinmod * rr1)
+                    rr2 = min(one, factMinmod * rr2)
 
-            rl1 = min(one, factMinmod*rl1)
-            rl2 = min(one, factMinmod*rl2)
-            rr1 = min(one, factMinmod*rr1)
-            rr2 = min(one, factMinmod*rr2)
+                    ! Compute the nonlinear corrections to the first order
+                    ! scheme.
 
-            ! Compute the nonlinear corrections to the first order
-            ! scheme.
+                    left(l) = omk * rl1 * du1(l) + opk * rl2 * du2(l)
+                    right(l) = -opk * rr1 * du2(l) - omk * rr2 * du3(l)
 
-            left(l)  =  omk*rl1*du1(l) + opk*rl2*du2(l)
-            right(l) = -opk*rr1*du2(l) - omk*rr2*du3(l)
+                end do
 
-         enddo
+            end select
 
-      end select
+            ! In case only a first order scheme must be used for the
+            ! turbulent transport equations, set the correction for the
+            ! turbulent kinetic energy to 0.
 
-      ! In case only a first order scheme must be used for the
-      ! turbulent transport equations, set the correction for the
-      ! turbulent kinetic energy to 0.
+            if (firstOrderK) then
+                left(itu1) = zero
+                right(itu1) = zero
+            end if
 
-      if( firstOrderK ) then
-         left(itu1)  = zero
-         right(itu1) = zero
-      endif
+            ! For rotational periodic problems transform the velocity
+            ! differences back to Cartesian again. Note that now the
+            ! transpose of the rotation matrix must be used.
 
-      ! For rotational periodic problems transform the velocity
-      ! differences back to Cartesian again. Note that now the
-      ! transpose of the rotation matrix must be used.
+            if (rotationalPeriodic) then
 
-      if( rotationalPeriodic ) then
+                ! Left state.
 
-         ! Left state.
+                dvx = left(ivx); dvy = left(ivy); dvz = left(ivz)
+                left(ivx) = rot(1, 1) * dvx + rot(2, 1) * dvy + rot(3, 1) * dvz
+                left(ivy) = rot(1, 2) * dvx + rot(2, 2) * dvy + rot(3, 2) * dvz
+                left(ivz) = rot(1, 3) * dvx + rot(2, 3) * dvy + rot(3, 3) * dvz
 
-         dvx = left(ivx); dvy = left(ivy); dvz = left(ivz)
-         left(ivx) = rot(1,1)*dvx + rot(2,1)*dvy + rot(3,1)*dvz
-         left(ivy) = rot(1,2)*dvx + rot(2,2)*dvy + rot(3,2)*dvz
-         left(ivz) = rot(1,3)*dvx + rot(2,3)*dvy + rot(3,3)*dvz
+                ! Right state.
 
-         ! Right state.
+                dvx = right(ivx); dvy = right(ivy); dvz = right(ivz)
+                right(ivx) = rot(1, 1) * dvx + rot(2, 1) * dvy + rot(3, 1) * dvz
+                right(ivy) = rot(1, 2) * dvx + rot(2, 2) * dvy + rot(3, 2) * dvz
+                right(ivz) = rot(1, 3) * dvx + rot(2, 3) * dvy + rot(3, 3) * dvz
 
-         dvx = right(ivx); dvy = right(ivy); dvz = right(ivz)
-         right(ivx) = rot(1,1)*dvx + rot(2,1)*dvy + rot(3,1)*dvz
-         right(ivy) = rot(1,2)*dvx + rot(2,2)*dvy + rot(3,2)*dvz
-         right(ivz) = rot(1,3)*dvx + rot(2,3)*dvy + rot(3,3)*dvz
+            end if
 
-      endif
+        end subroutine leftRightState
 
-    end subroutine leftRightState
+        !        ================================================================
 
-    !        ================================================================
+        subroutine riemannFlux(left, right, flux)
+            !
+            !         riemannFlux computes the flux for the given face and left
+            !         and right states.
+            !
+            implicit none
+            !
+            !        Subroutine arguments.
+            !
+            real(kind=realType), dimension(*), intent(in) :: left, right
+            real(kind=realType), dimension(*), intent(out) :: flux
+            !
+            !        Local variables.
+            !
+            real(kind=realType) :: porFlux, rFace
+            real(kind=realType) :: Etl, Etr, z1l, z1r, tmp
+            real(kind=realType) :: dr, dru, drv, drw, drE, drk
+            real(kind=realType) :: rAvg, uAvg, vAvg, wAvg, hAvg, kAvg
+            real(kind=realType) :: alphaAvg, a2Avg, aAvg, unAvg
+            real(kind=realType) :: ovaAvg, ova2Avg, area, Eta
+            real(kind=realType) :: gm1, gm53
+            real(kind=realType) :: lam1, lam2, lam3
+            real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
+            real(kind=realType), dimension(2) :: ktmp
 
-    subroutine riemannFlux(left, right, flux)
-      !
-      !         riemannFlux computes the flux for the given face and left
-      !         and right states.
-      !
-      implicit none
-      !
-      !        Subroutine arguments.
-      !
-      real(kind=realType), dimension(*), intent(in)  :: left, right
-      real(kind=realType), dimension(*), intent(out) :: flux
-      !
-      !        Local variables.
-      !
-      real(kind=realType) :: porFlux, rFace
-      real(kind=realType) :: Etl, Etr, z1l, z1r, tmp
-      real(kind=realType) :: dr, dru, drv, drw, drE, drk
-      real(kind=realType) :: rAvg, uAvg, vAvg, wAvg, hAvg, kAvg
-      real(kind=realType) :: alphaAvg, a2Avg, aAvg, unAvg
-      real(kind=realType) :: ovaAvg, ova2Avg, area, Eta
-      real(kind=realType) :: gm1, gm53
-      real(kind=realType) :: lam1, lam2, lam3
-      real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
-      real(kind=realType), dimension(2) :: ktmp
+            ! Set the porosity for the flux. The default value, 0.5*rFil, is
+            ! a scaling factor where an rFil != 1 is taken into account.
 
-      ! Set the porosity for the flux. The default value, 0.5*rFil, is
-      ! a scaling factor where an rFil != 1 is taken into account.
+            porFlux = half * rFil
+            if (por == noFlux .or. por == boundFlux) porFlux = zero
 
-      porFlux = half*rFil
-      if(por == noFlux .or. por == boundFlux)    porFlux = zero
+            ! Abbreviate some expressions in which gamma occurs.
 
-      ! Abbreviate some expressions in which gamma occurs.
+            gm1 = gammaFace - one
+            gm53 = gammaFace - five * third
 
-      gm1  = gammaFace - one
-      gm53 = gammaFace - five*third
+            ! Determine which riemann solver must be solved.
 
-      ! Determine which riemann solver must be solved.
+            select case (riemannUsed)
 
-      select case (riemannUsed)
+            case (Roe)
 
-      case (Roe)
+                ! Determine the preconditioner used.
 
-         ! Determine the preconditioner used.
+                select case (precond)
 
-         select case (precond)
+                case (noPrecond)
 
-         case (noPrecond)
+                    ! No preconditioner used. Use the Roe scheme of the
+                    ! standard equations.
 
-            ! No preconditioner used. Use the Roe scheme of the
-            ! standard equations.
+                    ! Compute the square root of the left and right densities
+                    ! and the inverse of the sum.
 
-            ! Compute the square root of the left and right densities
-            ! and the inverse of the sum.
+                    z1l = sqrt(left(irho))
+                    z1r = sqrt(right(irho))
+                    tmp = one / (z1l + z1r)
 
-            z1l = sqrt(left(irho))
-            z1r = sqrt(right(irho))
-            tmp = one/(z1l + z1r)
+                    ! Compute some variables depending whether or not a
+                    ! k-equation is present.
 
-            ! Compute some variables depending whether or not a
-            ! k-equation is present.
+                    if (correctForK) then
 
-            if( correctForK ) then
+                        ! Store the left and right kinetic energy in ktmp,
+                        ! which is needed to compute the total energy.
 
-               ! Store the left and right kinetic energy in ktmp,
-               ! which is needed to compute the total energy.
+                        ktmp(1) = left(itu1)
+                        ktmp(2) = right(itu1)
 
-               ktmp(1) = left(itu1)
-               ktmp(2) = right(itu1)
+                        ! Store the difference of the turbulent kinetic energy
+                        ! per unit volume, i.e. the conserved variable.
 
-               ! Store the difference of the turbulent kinetic energy
-               ! per unit volume, i.e. the conserved variable.
+                        drk = right(irho) * right(itu1) - left(irho) * left(itu1)
 
-               drk = right(irho)*right(itu1) - left(irho)*left(itu1)
+                        ! Compute the average turbulent energy per unit mass
+                        ! using Roe averages.
 
-               ! Compute the average turbulent energy per unit mass
-               ! using Roe averages.
+                        kAvg = tmp * (z1l * left(itu1) + z1r * right(itu1))
 
-               kAvg = tmp*(z1l*left(itu1) + z1r*right(itu1))
+                    else
 
-            else
+                        ! Set the difference of the turbulent kinetic energy
+                        ! per unit volume and the averaged kinetic energy per
+                        ! unit mass to zero.
+
+                        drk = 0.0
+                        kAvg = 0.0
+
+                    end if
+
+                    ! Compute the total energy of the left and right state.
+                    call etot(left(irho), left(ivx), left(ivy), left(ivz), &
+                              left(irhoe), ktmp(1), Etl, correctForK)
+
+                    call etot(right(irho), right(ivx), right(ivy), right(ivz), &
+                              right(irhoe), ktmp(2), Etr, correctForK)
+
+                    ! Compute the difference of the conservative mean
+                    ! flow variables.
+
+                    dr = right(irho) - left(irho)
+                    dru = right(irho) * right(ivx) - left(irho) * left(ivx)
+                    drv = right(irho) * right(ivy) - left(irho) * left(ivy)
+                    drw = right(irho) * right(ivz) - left(irho) * left(ivz)
+                    drE = Etr - Etl
+
+                    ! Compute the Roe average variables, which can be
+                    ! computed directly from the average Roe vector.
+
+                    rAvg = fourth * (z1r + z1l)**2
+                    uAvg = tmp * (z1l * left(ivx) + z1r * right(ivx))
+                    vAvg = tmp * (z1l * left(ivy) + z1r * right(ivy))
+                    wAvg = tmp * (z1l * left(ivz) + z1r * right(ivz))
+                    hAvg = tmp * ((Etl + left(irhoE)) / z1l &
+                                  + (Etr + right(irhoE)) / z1r)
 
-               ! Set the difference of the turbulent kinetic energy
-               ! per unit volume and the averaged kinetic energy per
-               ! unit mass to zero.
+                    ! Compute the unit vector and store the area of the
+                    ! normal. Also compute the unit normal velocity of the face.
 
-               drk  = 0.0
-               kAvg = 0.0
+                    area = sqrt(sx**2 + sy**2 + sz**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sx * tmp
+                    sy = sy * tmp
+                    sz = sz * tmp
+                    rFace = sFace * tmp
 
-            endif
+                    ! Compute some dependent variables at the Roe
+                    ! average state.
 
-            ! Compute the total energy of the left and right state.
-            call etot(left(irho), left(ivx), left(ivy), left(ivz), &
-                 left(irhoe), ktmp(1), Etl, correctForK)
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    a2Avg = abs(gm1 * (hAvg - alphaAvg) - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
 
-            call etot(right(irho), right(ivx), right(ivy), right(ivz), &
-                 right(irhoe), ktmp(2), Etr, correctForK)
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-            ! Compute the difference of the conservative mean
-            ! flow variables.
+                    ! Set for a boundary the normal velocity to rFace, the
+                    ! normal velocity of the boundary.
 
-            dr  = right(irho) - left(irho)
-            dru = right(irho)*right(ivx) - left(irho)*left(ivx)
-            drv = right(irho)*right(ivy) - left(irho)*left(ivy)
-            drw = right(irho)*right(ivz) - left(irho)*left(ivz)
-            drE = Etr - Etl
-
-            ! Compute the Roe average variables, which can be
-            ! computed directly from the average Roe vector.
+                    if (por == boundFlux) unAvg = rFace
 
-            rAvg = fourth*(z1r + z1l)**2
-            uAvg = tmp*(z1l*left(ivx) + z1r*right(ivx))
-            vAvg = tmp*(z1l*left(ivy) + z1r*right(ivy))
-            wAvg = tmp*(z1l*left(ivz) + z1r*right(ivz))
-            hAvg = tmp*((Etl+left(irhoE)) /z1l &
-                 +      (Etr+right(irhoE))/z1r)
-
-            ! Compute the unit vector and store the area of the
-            ! normal. Also compute the unit normal velocity of the face.
-
-            area  = sqrt(sx**2 + sy**2 + sz**2)
-            tmp   = one/max(1.e-25_realType,area)
-            sx    = sx*tmp
-            sy    = sy*tmp
-            sz    = sz*tmp
-            rFace = sFace*tmp
+                    ! Compute the coefficient eta for the entropy correction.
+                    ! At the moment a 1D entropy correction is used, which
+                    ! removes expansion shocks. Although it also reduces the
+                    ! carbuncle phenomenon, it does not remove it completely.
+                    ! In other to do that a multi-dimensional entropy fix is
+                    ! needed, see Sanders et. al, JCP, vol. 145, 1998,
+                    ! pp. 511 - 537. Although relatively easy to implement,
+                    ! an efficient implementation requires the storage of
+                    ! all the left and right states, which is rather
+                    ! expensive in terms of memory.
 
-            ! Compute some dependent variables at the Roe
-            ! average state.
+                    eta = half * (abs((left(ivx) - right(ivx)) * sx &
+                                      + (left(ivy) - right(ivy)) * sy &
+                                      + (left(ivz) - right(ivz)) * sz) &
+                                  + abs(sqrt(gammaFace * left(irhoE) / left(irho)) &
+                                        - sqrt(gammaFace * right(irhoE) / right(irho))))
 
-            alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-            a2Avg    = abs(gm1*(hAvg - alphaAvg) - gm53*kAvg)
-            aAvg     = sqrt(a2Avg)
-            unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
+                    ! Compute the absolute values of the three eigenvalues.
 
-            ovaAvg  = one/aAvg
-            ova2Avg = one/a2Avg
+                    lam1 = abs(unAvg - rFace + aAvg)
+                    lam2 = abs(unAvg - rFace - aAvg)
+                    lam3 = abs(unAvg - rFace)
 
-            ! Set for a boundary the normal velocity to rFace, the
-            ! normal velocity of the boundary.
+                    ! Apply the entropy correction to the eigenvalues.
 
-            if(por == boundFlux) unAvg = rFace
+                    tmp = two * eta
+                    if (lam1 < tmp) lam1 = eta + fourth * lam1 * lam1 / eta
+                    if (lam2 < tmp) lam2 = eta + fourth * lam2 * lam2 / eta
+                    if (lam3 < tmp) lam3 = eta + fourth * lam3 * lam3 / eta
 
-            ! Compute the coefficient eta for the entropy correction.
-            ! At the moment a 1D entropy correction is used, which
-            ! removes expansion shocks. Although it also reduces the
-            ! carbuncle phenomenon, it does not remove it completely.
-            ! In other to do that a multi-dimensional entropy fix is
-            ! needed, see Sanders et. al, JCP, vol. 145, 1998,
-            ! pp. 511 - 537. Although relatively easy to implement,
-            ! an efficient implementation requires the storage of
-            ! all the left and right states, which is rather
-            ! expensive in terms of memory.
-
-            eta = half*(abs((left(ivx) - right(ivx))*sx        &
-                 +           (left(ivy) - right(ivy))*sy        &
-                 +           (left(ivz) - right(ivz))*sz)       &
-                 +       abs(sqrt(gammaFace*left(irhoE)/left(irho)) &
-                 -           sqrt(gammaFace*right(irhoE)/right(irho))))
-
-            ! Compute the absolute values of the three eigenvalues.
-
-            lam1 = abs(unAvg - rFace + aAvg)
-            lam2 = abs(unAvg - rFace - aAvg)
-            lam3 = abs(unAvg - rFace)
-
-            ! Apply the entropy correction to the eigenvalues.
-
-            tmp = two*eta
-            if(lam1 < tmp) lam1 = eta + fourth*lam1*lam1/eta
-            if(lam2 < tmp) lam2 = eta + fourth*lam2*lam2/eta
-            if(lam3 < tmp) lam3 = eta + fourth*lam3*lam3/eta
-
-            ! Multiply the eigenvalues by the area to obtain
-            ! the correct values for the dissipation term.
-
-            lam1 = lam1*area
-            lam2 = lam2*area
-            lam3 = lam3*area
-
-            ! Some abbreviations, which occur quite often in the
-            ! dissipation terms.
-
-            abv1 = half*(lam1 + lam2)
-            abv2 = half*(lam1 - lam2)
-            abv3 = abv1 - lam3
-
-            abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                 -      wAvg*drw + drE) - gm53*drk
-            abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
-
-            abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-            abv7 = abv2*abv4*ovaAvg  + abv3*abv5
-
-            ! Compute the dissipation term, -|a| (wr - wl), which is
-            ! multiplied by porFlux. Note that porFlux is either
-            ! 0.0 or 0.5*rFil.
-
-            flux(irho)  = -porFlux*(lam3*dr  + abv6)
-            flux(imx)   = -porFlux*(lam3*dru + uAvg*abv6 &
-                 + sx*abv7)
-            flux(imy)   = -porFlux*(lam3*drv + vAvg*abv6 &
-                 + sy*abv7)
-            flux(imz)   = -porFlux*(lam3*drw + wAvg*abv6 &
-                 + sz*abv7)
-            flux(irhoE) = -porFlux*(lam3*drE + hAvg*abv6 &
-                 + unAvg*abv7)
-
-            !          tmp = max(lam1,lam2,lam3)
-
-            !          flux(irho)  = -porFlux*(tmp*dr)
-            !          flux(imx)   = -porFlux*(tmp*dru)
-            !          flux(imy)   = -porFlux*(tmp*drv)
-            !          flux(imz)   = -porFlux*(tmp*drw)
-            !          flux(irhoE) = -porFlux*(tmp*drE)
-
-         case (Turkel)
-            call terminate(&
-                 "riemannFlux",&
-                 "Turkel preconditioner not implemented yet")
-
-         case (ChoiMerkle)
-            call terminate("riemannFlux",&
-                 "choi merkle preconditioner not implemented yet")
-
-         end select
-
-      case (vanLeer)
-         call terminate("riemannFlux", "van leer fvs not implemented yet")
-
-      case (ausmdv)
-         call terminate("riemannFlux","ausmdv fvs not implemented yet")
-
-      end select
-
-    end subroutine riemannFlux
-
-  end subroutine inviscidUpwindFlux
-
-  subroutine viscousFlux
-    !
-    !       viscousFlux computes the viscous fluxes using a central
-    !       difference scheme for a block.
-    !       It is assumed that the pointers in block pointer already point
-    !       to the correct block.
-    !
-    use constants
-    use blockPointers
-    use flowVarRefState
-    use inputPhysics
-    use iteration
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
+
+                    lam1 = lam1 * area
+                    lam2 = lam2 * area
+                    lam3 = lam3 * area
+
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
+
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
+
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + drE) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
+
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
+
+                    ! Compute the dissipation term, -|a| (wr - wl), which is
+                    ! multiplied by porFlux. Note that porFlux is either
+                    ! 0.0 or 0.5*rFil.
+
+                    flux(irho) = -porFlux * (lam3 * dr + abv6)
+                    flux(imx) = -porFlux * (lam3 * dru + uAvg * abv6 &
+                                            + sx * abv7)
+                    flux(imy) = -porFlux * (lam3 * drv + vAvg * abv6 &
+                                            + sy * abv7)
+                    flux(imz) = -porFlux * (lam3 * drw + wAvg * abv6 &
+                                            + sz * abv7)
+                    flux(irhoE) = -porFlux * (lam3 * drE + hAvg * abv6 &
+                                              + unAvg * abv7)
+
+                    !          tmp = max(lam1,lam2,lam3)
+
+                    !          flux(irho)  = -porFlux*(tmp*dr)
+                    !          flux(imx)   = -porFlux*(tmp*dru)
+                    !          flux(imy)   = -porFlux*(tmp*drv)
+                    !          flux(imz)   = -porFlux*(tmp*drw)
+                    !          flux(irhoE) = -porFlux*(tmp*drE)
+
+                case (Turkel)
+                    call terminate( &
+                        "riemannFlux", &
+                        "Turkel preconditioner not implemented yet")
+
+                case (ChoiMerkle)
+                    call terminate("riemannFlux", &
+                                   "choi merkle preconditioner not implemented yet")
+
+                end select
+
+            case (vanLeer)
+                call terminate("riemannFlux", "van leer fvs not implemented yet")
+
+            case (ausmdv)
+                call terminate("riemannFlux", "ausmdv fvs not implemented yet")
+
+            end select
+
+        end subroutine riemannFlux
+
+    end subroutine inviscidUpwindFlux
+
+    subroutine viscousFlux
+        !
+        !       viscousFlux computes the viscous fluxes using a central
+        !       difference scheme for a block.
+        !       It is assumed that the pointers in block pointer already point
+        !       to the correct block.
+        !
+        use constants
+        use blockPointers
+        use flowVarRefState
+        use inputPhysics
+        use iteration
 #ifndef USE_TAPENADE
-    use solverUtils, only : utauWf
+        use solverUtils, only: utauWf
 #endif
-    implicit none
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: twoThird = two*third
-    real(kind=realType), parameter :: xminn = 1.e-14_realType
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii
-
-    real(kind=realType) :: rFilv, por, mul, mue, mut, heatCoef
-    real(kind=realType) :: gm1, factLamHeat, factTurbHeat
-    real(kind=realType) :: u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z
-    real(kind=realType) :: q_x, q_y, q_z, ubar, vbar, wbar
-    real(kind=realType) :: corr, ssx, ssy, ssz, ss, fracDiv
-    real(kind=realType) :: tauxx, tauyy, tauzz
-    real(kind=realType) :: tauxy, tauxz, tauyz
-    real(kind=realType) :: tauxxS, tauyyS, tauzzS
-    real(kind=realType) :: tauxyS, tauxzS, tauyzS
-    real(kind=realType) :: exx, eyy, ezz
-    real(kind=realType) :: exy, exz, eyz
-    real(kind=realType) :: Wxy, Wxz, Wyz, Wyx, Wzx, Wzy
-    real(kind=realType) :: den, Ccr1, fact
-    real(kind=realType) :: fmx, fmy, fmz, frhoE
-    logical :: correctForK, storeWallTensor
-
-    ! Set QCR parameters
-    Ccr1 = 0.3_realType
-
-    ! Set rFilv to rFil to indicate that this is the viscous part.
-    ! If rFilv == 0 the viscous residuals need not to be computed
-    ! and a return can be made.
-
-    rFilv = rFil
-
-    if(abs(rFilv) < thresholdReal) return
-
-    ! Determine whether or not the wall stress tensor and wall heat
-    ! flux must be stored for viscous walls.
-
-    storeWallTensor = .false.
-    if( wallFunctions ) then
-       storeWallTensor = .true.
-    else if(rkStage == 0 .and. currentLevel == groundLevel) then
-       storeWallTensor = .true.
-    endif
-
-    !
-    !         viscous fluxes in the k-direction.
-    !
-    continue
-    !$AD CHECKPOINT-START
-    mue = zero
+        implicit none
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: twoThird = two * third
+        real(kind=realType), parameter :: xminn = 1.e-14_realType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii
+
+        real(kind=realType) :: rFilv, por, mul, mue, mut, heatCoef
+        real(kind=realType) :: gm1, factLamHeat, factTurbHeat
+        real(kind=realType) :: u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z
+        real(kind=realType) :: q_x, q_y, q_z, ubar, vbar, wbar
+        real(kind=realType) :: corr, ssx, ssy, ssz, ss, fracDiv
+        real(kind=realType) :: tauxx, tauyy, tauzz
+        real(kind=realType) :: tauxy, tauxz, tauyz
+        real(kind=realType) :: tauxxS, tauyyS, tauzzS
+        real(kind=realType) :: tauxyS, tauxzS, tauyzS
+        real(kind=realType) :: exx, eyy, ezz
+        real(kind=realType) :: exy, exz, eyz
+        real(kind=realType) :: Wxy, Wxz, Wyz, Wyx, Wzx, Wzy
+        real(kind=realType) :: den, Ccr1, fact
+        real(kind=realType) :: fmx, fmy, fmz, frhoE
+        logical :: correctForK, storeWallTensor
+
+        ! Set QCR parameters
+        Ccr1 = 0.3_realType
+
+        ! Set rFilv to rFil to indicate that this is the viscous part.
+        ! If rFilv == 0 the viscous residuals need not to be computed
+        ! and a return can be made.
+
+        rFilv = rFil
+
+        if (abs(rFilv) < thresholdReal) return
+
+        ! Determine whether or not the wall stress tensor and wall heat
+        ! flux must be stored for viscous walls.
+
+        storeWallTensor = .false.
+        if (wallFunctions) then
+            storeWallTensor = .true.
+        else if (rkStage == 0 .and. currentLevel == groundLevel) then
+            storeWallTensor = .true.
+        end if
+
+        !
+        !         viscous fluxes in the k-direction.
+        !
+        continue
+        !$AD CHECKPOINT-START
+        mue = zero
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*kl-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 1
+        !$AD II-LOOP
+        do ii = 0, nx * ny * kl - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 1
 #else
-       do k=1,kl
-          do j=2,jl
-             do i=2,il
+            do k = 1, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
 
-                ! Set the value of the porosity. If not zero, it is set
-                ! to average the eddy-viscosity and to take the factor
-                ! rFilv into account.
-
-                por = half*rFilv
-                if(porK(i,j,k) == noFlux) por = zero
-
-                ! Compute the laminar and (if present) the eddy viscosities
-                ! multiplied by the porosity. Compute the factor in front of
-                ! the gradients of the speed of sound squared for the heat
-                ! flux.
-
-                mul = por*(rlv(i,j,k) + rlv(i,j,k+1))
-                if( eddyModel ) mue = por*(rev(i,j,k) + rev(i,j,k+1))
-                mut = mul + mue
-
-                gm1          = half*(gamma(i,j,k) + gamma(i,j,k+1)) - one
-                factLamHeat  = one/(prandtl*gm1)
-                factTurbHeat = one/(prandtlTurb*gm1)
-
-                heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-                ! Compute the gradients at the face by averaging the four
-                ! nodal values.
-
-                u_x = fourth*(ux(i-1,j-1,k) + ux(i,j-1,k) &
-                     +         ux(i-1,j,  k) + ux(i,j,  k))
-                u_y = fourth*(uy(i-1,j-1,k) + uy(i,j-1,k) &
-                     +         uy(i-1,j,  k) + uy(i,j,  k))
-                u_z = fourth*(uz(i-1,j-1,k) + uz(i,j-1,k) &
-                     +         uz(i-1,j,  k) + uz(i,j,  k))
-
-                v_x = fourth*(vx(i-1,j-1,k) + vx(i,j-1,k) &
-                     +         vx(i-1,j,  k) + vx(i,j,  k))
-                v_y = fourth*(vy(i-1,j-1,k) + vy(i,j-1,k) &
-                     +         vy(i-1,j,  k) + vy(i,j,  k))
-                v_z = fourth*(vz(i-1,j-1,k) + vz(i,j-1,k) &
-                     +         vz(i-1,j,  k) + vz(i,j,  k))
-
-                w_x = fourth*(wx(i-1,j-1,k) + wx(i,j-1,k) &
-                     +         wx(i-1,j,  k) + wx(i,j,  k))
-                w_y = fourth*(wy(i-1,j-1,k) + wy(i,j-1,k) &
-                     +         wy(i-1,j,  k) + wy(i,j,  k))
-                w_z = fourth*(wz(i-1,j-1,k) + wz(i,j-1,k) &
-                     +         wz(i-1,j,  k) + wz(i,j,  k))
-
-                q_x = fourth*(qx(i-1,j-1,k) + qx(i,j-1,k) &
-                     +         qx(i-1,j,  k) + qx(i,j,  k))
-                q_y = fourth*(qy(i-1,j-1,k) + qy(i,j-1,k) &
-                     +         qy(i-1,j,  k) + qy(i,j,  k))
-                q_z = fourth*(qz(i-1,j-1,k) + qz(i,j-1,k) &
-                     +         qz(i-1,j,  k) + qz(i,j,  k))
-
-
-                ! The gradients in the normal direction are corrected, such
-                ! that no averaging takes places here.
-                ! First determine the vector in the direction from the
-                ! cell center k to cell center k+1.
-
-                ssx = eighth*(x(i-1,j-1,k+1,1) - x(i-1,j-1,k-1,1) &
-                     +         x(i-1,j,  k+1,1) - x(i-1,j,  k-1,1) &
-                     +         x(i,  j-1,k+1,1) - x(i,  j-1,k-1,1) &
-                     +         x(i,  j,  k+1,1) - x(i,  j,  k-1,1))
-                ssy = eighth*(x(i-1,j-1,k+1,2) - x(i-1,j-1,k-1,2) &
-                     +         x(i-1,j,  k+1,2) - x(i-1,j,  k-1,2) &
-                     +         x(i,  j-1,k+1,2) - x(i,  j-1,k-1,2) &
-                     +         x(i,  j,  k+1,2) - x(i,  j,  k-1,2))
-                ssz = eighth*(x(i-1,j-1,k+1,3) - x(i-1,j-1,k-1,3) &
-                     +         x(i-1,j,  k+1,3) - x(i-1,j,  k-1,3) &
-                     +         x(i,  j-1,k+1,3) - x(i,  j-1,k-1,3) &
-                     +         x(i,  j,  k+1,3) - x(i,  j,  k-1,3))
-
-                ! Determine the length of this vector and create the
-                ! unit normal.
-
-                ss  = one/sqrt(ssx*ssx + ssy*ssy + ssz*ssz)
-                ssx = ss*ssx
-                ssy = ss*ssy
-                ssz = ss*ssz
-
-                ! Correct the gradients.
-
-                corr = u_x*ssx + u_y*ssy + u_z*ssz        &
-                     - (w(i,j,k+1,ivx) - w(i,j,k,ivx))*ss
-                u_x  = u_x - corr*ssx
-                u_y  = u_y - corr*ssy
-                u_z  = u_z - corr*ssz
-
-                corr = v_x*ssx + v_y*ssy + v_z*ssz        &
-                     - (w(i,j,k+1,ivy) - w(i,j,k,ivy))*ss
-                v_x  = v_x - corr*ssx
-                v_y  = v_y - corr*ssy
-                v_z  = v_z - corr*ssz
-
-                corr = w_x*ssx + w_y*ssy + w_z*ssz        &
-                     - (w(i,j,k+1,ivz) - w(i,j,k,ivz))*ss
-                w_x  = w_x - corr*ssx
-                w_y  = w_y - corr*ssy
-                w_z  = w_z - corr*ssz
-
-                corr = q_x*ssx + q_y*ssy + q_z*ssz &
-                     + (aa(i,j,k+1) - aa(i,j,k))*ss
-                q_x  = q_x - corr*ssx
-                q_y  = q_y - corr*ssy
-                q_z  = q_z - corr*ssz
-
-                ! Compute the stress tensor and the heat flux vector.
-                ! We remove the viscosity from the stress tensor (tau)
-                ! to define tauS since we still need to separate between
-                ! laminar and turbulent stress for QCR.
-                ! Therefore, laminar tau = mue*tauS, turbulent
-                ! tau = mue*tauS, and total tau = mut*tauS.
-
-                fracDiv = twoThird*(u_x + v_y + w_z)
-
-                tauxxS = two*u_x - fracDiv
-                tauyyS = two*v_y - fracDiv
-                tauzzS = two*w_z - fracDiv
-
-                tauxyS = u_y + v_x
-                tauxzS = u_z + w_x
-                tauyzS = v_z + w_y
-
-                q_x = heatCoef*q_x
-                q_y = heatCoef*q_y
-                q_z = heatCoef*q_z
-
-                ! Add QCR corrections if necessary
-                if (useQCR) then
-
-                   ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
-                   !
-                   ! tau_ij,QCR = tau_ij - e_ij
-                   !
-                   ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
-                   !
-                   ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
-                   !
-                   ! We are computing O_ik as follows:
-                   !
-                   ! O_ik = 2*W_ik/den
-                   !
-                   ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
-
-                   ! Compute denominator
-                   den = sqrt(u_x*u_x + u_y*u_y + u_z*u_z + &
-                        v_x*v_x + v_y*v_y + v_z*v_z + &
-                        w_x*w_x + w_y*w_y + w_z*w_z)
-
-                   ! Denominator should be limited to avoid division by zero in regions with
-                   ! no gradients
-                   den = max(den, xminn)
-
-                   ! Compute factor that will multiply all tensor components.
-                   ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
-                   ! components as well.
-                   fact = mue*Ccr1/den
-
-                   ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
-                   ! The diagonals of the vorticity tensor components are always zero
-                   Wxy = u_y - v_x
-                   Wxz = u_z - w_x
-                   Wyz = v_z - w_y
-                   Wyx = -Wxy
-                   Wzx = -Wxz
-                   Wzy = -Wyz
-
-                   ! Compute the extra terms of the Boussinesq relation
-                   exx = fact*(Wxy*tauxyS + Wxz*tauxzS)*two
-                   eyy = fact*(Wyx*tauxyS + Wyz*tauyzS)*two
-                   ezz = fact*(Wzx*tauxzS + Wzy*tauyzS)*two
-
-                   exy = fact*(Wxy*tauyyS + Wxz*tauyzS + &
-                               Wyx*tauxxS + Wyz*tauxzS)
-                   exz = fact*(Wxy*tauyzS + Wxz*tauzzS + &
-                               Wzx*tauxxS + Wzy*tauxyS)
-                   eyz = fact*(Wyx*tauxzS + Wyz*tauzzS + &
-                               Wzx*tauxyS + Wzy*tauyyS)
-
-                   ! Apply the total viscosity to the stress tensor and add extra terms
-                   tauxx = mut*tauxxS - exx
-                   tauyy = mut*tauyyS - eyy
-                   tauzz = mut*tauzzS - ezz
-                   tauxy = mut*tauxyS - exy
-                   tauxz = mut*tauxzS - exz
-                   tauyz = mut*tauyzS - eyz
-
-                else
-
-                   ! Just apply the total viscosity to the stress tensor
-                   tauxx = mut*tauxxS
-                   tauyy = mut*tauyyS
-                   tauzz = mut*tauzzS
-                   tauxy = mut*tauxyS
-                   tauxz = mut*tauxzS
-                   tauyz = mut*tauyzS
-
-                end if
-
-                ! Compute the average velocities for the face. Remember that
-                ! the velocities are stored and not the momentum.
-
-                ubar = half*(w(i,j,k,ivx) + w(i,j,k+1,ivx))
-                vbar = half*(w(i,j,k,ivy) + w(i,j,k+1,ivy))
-                wbar = half*(w(i,j,k,ivz) + w(i,j,k+1,ivz))
-
-                ! Compute the viscous fluxes for this k-face.
-
-                fmx   = tauxx*sk(i,j,k,1) + tauxy*sk(i,j,k,2) &
-                     + tauxz*sk(i,j,k,3)
-                fmy   = tauxy*sk(i,j,k,1) + tauyy*sk(i,j,k,2) &
-                     + tauyz*sk(i,j,k,3)
-                fmz   = tauxz*sk(i,j,k,1) + tauyz*sk(i,j,k,2) &
-                     + tauzz*sk(i,j,k,3)
-                frhoE =         (ubar*tauxx + vbar*tauxy + wbar*tauxz)*sk(i,j,k,1)
-                frhoE = frhoE + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*sk(i,j,k,2)
-                frhoE = frhoE + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*sk(i,j,k,3)
-                frhoE = frhoE -  q_x*sk(i,j,k,1) - q_y*sk(i,j,k,2) - q_z*sk(i,j,k,3)
-
-                ! Update the residuals of cell k and k+1.
-
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-                fw(i,j,k+1,imx)   = fw(i,j,k+1,imx)   + fmx
-                fw(i,j,k+1,imy)   = fw(i,j,k+1,imy)   + fmy
-                fw(i,j,k+1,imz)   = fw(i,j,k+1,imz)   + fmz
-                fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + frhoE
-
-                ! Store the stress tensor and the heat flux vector if this
-                ! face is part of a viscous subface. Both the cases k == 1
-                ! and k == kl must be tested.
-
-                if(k == 1 .and. storeWallTensor .and. &
-                     viscKminPointer(i,j) > 0) then
-                   ! We need to index viscSubface with viscKminPointer(i,j)
-                   ! since Tapenade does not like temporary indexes
-
-                   viscSubface(viscKminPointer(i,j))%tau(i,j,1) = tauxx
-                   viscSubface(viscKminPointer(i,j))%tau(i,j,2) = tauyy
-                   viscSubface(viscKminPointer(i,j))%tau(i,j,3) = tauzz
-                   viscSubface(viscKminPointer(i,j))%tau(i,j,4) = tauxy
-                   viscSubface(viscKminPointer(i,j))%tau(i,j,5) = tauxz
-                   viscSubface(viscKminPointer(i,j))%tau(i,j,6) = tauyz
-
-                   viscSubface(viscKminPointer(i,j))%q(i,j,1) = q_x
-                   viscSubface(viscKminPointer(i,j))%q(i,j,2) = q_y
-                   viscSubface(viscKminPointer(i,j))%q(i,j,3) = q_z
-                endif
-
-                ! And the k == kl case.
-                if(k == kl .and. storeWallTensor .and. &
-                     viscKmaxPointer(i,j) > 0) then
-                   viscSubface(viscKmaxPointer(i,j))%tau(i,j,1) = tauxx
-                   viscSubface(viscKmaxPointer(i,j))%tau(i,j,2) = tauyy
-                   viscSubface(viscKmaxPointer(i,j))%tau(i,j,3) = tauzz
-                   viscSubface(viscKmaxPointer(i,j))%tau(i,j,4) = tauxy
-                   viscSubface(viscKmaxPointer(i,j))%tau(i,j,5) = tauxz
-                   viscSubface(viscKmaxPointer(i,j))%tau(i,j,6) = tauyz
-
-                   viscSubface(viscKmaxPointer(i,j))%q(i,j,1) = q_x
-                   viscSubface(viscKmaxPointer(i,j))%q(i,j,2) = q_y
-                   viscSubface(viscKmaxPointer(i,j))%q(i,j,3) = q_z
-                endif
+                        ! Set the value of the porosity. If not zero, it is set
+                        ! to average the eddy-viscosity and to take the factor
+                        ! rFilv into account.
+
+                        por = half * rFilv
+                        if (porK(i, j, k) == noFlux) por = zero
+
+                        ! Compute the laminar and (if present) the eddy viscosities
+                        ! multiplied by the porosity. Compute the factor in front of
+                        ! the gradients of the speed of sound squared for the heat
+                        ! flux.
+
+                        mul = por * (rlv(i, j, k) + rlv(i, j, k + 1))
+                        if (eddyModel) mue = por * (rev(i, j, k) + rev(i, j, k + 1))
+                        mut = mul + mue
+
+                        gm1 = half * (gamma(i, j, k) + gamma(i, j, k + 1)) - one
+                        factLamHeat = one / (prandtl * gm1)
+                        factTurbHeat = one / (prandtlTurb * gm1)
+
+                        heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                        ! Compute the gradients at the face by averaging the four
+                        ! nodal values.
+
+                        u_x = fourth * (ux(i - 1, j - 1, k) + ux(i, j - 1, k) &
+                                        + ux(i - 1, j, k) + ux(i, j, k))
+                        u_y = fourth * (uy(i - 1, j - 1, k) + uy(i, j - 1, k) &
+                                        + uy(i - 1, j, k) + uy(i, j, k))
+                        u_z = fourth * (uz(i - 1, j - 1, k) + uz(i, j - 1, k) &
+                                        + uz(i - 1, j, k) + uz(i, j, k))
+
+                        v_x = fourth * (vx(i - 1, j - 1, k) + vx(i, j - 1, k) &
+                                        + vx(i - 1, j, k) + vx(i, j, k))
+                        v_y = fourth * (vy(i - 1, j - 1, k) + vy(i, j - 1, k) &
+                                        + vy(i - 1, j, k) + vy(i, j, k))
+                        v_z = fourth * (vz(i - 1, j - 1, k) + vz(i, j - 1, k) &
+                                        + vz(i - 1, j, k) + vz(i, j, k))
+
+                        w_x = fourth * (wx(i - 1, j - 1, k) + wx(i, j - 1, k) &
+                                        + wx(i - 1, j, k) + wx(i, j, k))
+                        w_y = fourth * (wy(i - 1, j - 1, k) + wy(i, j - 1, k) &
+                                        + wy(i - 1, j, k) + wy(i, j, k))
+                        w_z = fourth * (wz(i - 1, j - 1, k) + wz(i, j - 1, k) &
+                                        + wz(i - 1, j, k) + wz(i, j, k))
+
+                        q_x = fourth * (qx(i - 1, j - 1, k) + qx(i, j - 1, k) &
+                                        + qx(i - 1, j, k) + qx(i, j, k))
+                        q_y = fourth * (qy(i - 1, j - 1, k) + qy(i, j - 1, k) &
+                                        + qy(i - 1, j, k) + qy(i, j, k))
+                        q_z = fourth * (qz(i - 1, j - 1, k) + qz(i, j - 1, k) &
+                                        + qz(i - 1, j, k) + qz(i, j, k))
+
+                        ! The gradients in the normal direction are corrected, such
+                        ! that no averaging takes places here.
+                        ! First determine the vector in the direction from the
+                        ! cell center k to cell center k+1.
+
+                        ssx = eighth * (x(i - 1, j - 1, k + 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                        + x(i - 1, j, k + 1, 1) - x(i - 1, j, k - 1, 1) &
+                                        + x(i, j - 1, k + 1, 1) - x(i, j - 1, k - 1, 1) &
+                                        + x(i, j, k + 1, 1) - x(i, j, k - 1, 1))
+                        ssy = eighth * (x(i - 1, j - 1, k + 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                        + x(i - 1, j, k + 1, 2) - x(i - 1, j, k - 1, 2) &
+                                        + x(i, j - 1, k + 1, 2) - x(i, j - 1, k - 1, 2) &
+                                        + x(i, j, k + 1, 2) - x(i, j, k - 1, 2))
+                        ssz = eighth * (x(i - 1, j - 1, k + 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                        + x(i - 1, j, k + 1, 3) - x(i - 1, j, k - 1, 3) &
+                                        + x(i, j - 1, k + 1, 3) - x(i, j - 1, k - 1, 3) &
+                                        + x(i, j, k + 1, 3) - x(i, j, k - 1, 3))
+
+                        ! Determine the length of this vector and create the
+                        ! unit normal.
+
+                        ss = one / sqrt(ssx * ssx + ssy * ssy + ssz * ssz)
+                        ssx = ss * ssx
+                        ssy = ss * ssy
+                        ssz = ss * ssz
+
+                        ! Correct the gradients.
+
+                        corr = u_x * ssx + u_y * ssy + u_z * ssz &
+                               - (w(i, j, k + 1, ivx) - w(i, j, k, ivx)) * ss
+                        u_x = u_x - corr * ssx
+                        u_y = u_y - corr * ssy
+                        u_z = u_z - corr * ssz
+
+                        corr = v_x * ssx + v_y * ssy + v_z * ssz &
+                               - (w(i, j, k + 1, ivy) - w(i, j, k, ivy)) * ss
+                        v_x = v_x - corr * ssx
+                        v_y = v_y - corr * ssy
+                        v_z = v_z - corr * ssz
+
+                        corr = w_x * ssx + w_y * ssy + w_z * ssz &
+                               - (w(i, j, k + 1, ivz) - w(i, j, k, ivz)) * ss
+                        w_x = w_x - corr * ssx
+                        w_y = w_y - corr * ssy
+                        w_z = w_z - corr * ssz
+
+                        corr = q_x * ssx + q_y * ssy + q_z * ssz &
+                               + (aa(i, j, k + 1) - aa(i, j, k)) * ss
+                        q_x = q_x - corr * ssx
+                        q_y = q_y - corr * ssy
+                        q_z = q_z - corr * ssz
+
+                        ! Compute the stress tensor and the heat flux vector.
+                        ! We remove the viscosity from the stress tensor (tau)
+                        ! to define tauS since we still need to separate between
+                        ! laminar and turbulent stress for QCR.
+                        ! Therefore, laminar tau = mue*tauS, turbulent
+                        ! tau = mue*tauS, and total tau = mut*tauS.
+
+                        fracDiv = twoThird * (u_x + v_y + w_z)
+
+                        tauxxS = two * u_x - fracDiv
+                        tauyyS = two * v_y - fracDiv
+                        tauzzS = two * w_z - fracDiv
+
+                        tauxyS = u_y + v_x
+                        tauxzS = u_z + w_x
+                        tauyzS = v_z + w_y
+
+                        q_x = heatCoef * q_x
+                        q_y = heatCoef * q_y
+                        q_z = heatCoef * q_z
+
+                        ! Add QCR corrections if necessary
+                        if (useQCR) then
+
+                            ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
+                            !
+                            ! tau_ij,QCR = tau_ij - e_ij
+                            !
+                            ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
+                            !
+                            ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
+                            !
+                            ! We are computing O_ik as follows:
+                            !
+                            ! O_ik = 2*W_ik/den
+                            !
+                            ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
+
+                            ! Compute denominator
+                            den = sqrt(u_x * u_x + u_y * u_y + u_z * u_z + &
+                                       v_x * v_x + v_y * v_y + v_z * v_z + &
+                                       w_x * w_x + w_y * w_y + w_z * w_z)
+
+                            ! Denominator should be limited to avoid division by zero in regions with
+                            ! no gradients
+                            den = max(den, xminn)
+
+                            ! Compute factor that will multiply all tensor components.
+                            ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
+                            ! components as well.
+                            fact = mue * Ccr1 / den
+
+                            ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
+                            ! The diagonals of the vorticity tensor components are always zero
+                            Wxy = u_y - v_x
+                            Wxz = u_z - w_x
+                            Wyz = v_z - w_y
+                            Wyx = -Wxy
+                            Wzx = -Wxz
+                            Wzy = -Wyz
+
+                            ! Compute the extra terms of the Boussinesq relation
+                            exx = fact * (Wxy * tauxyS + Wxz * tauxzS) * two
+                            eyy = fact * (Wyx * tauxyS + Wyz * tauyzS) * two
+                            ezz = fact * (Wzx * tauxzS + Wzy * tauyzS) * two
+
+                            exy = fact * (Wxy * tauyyS + Wxz * tauyzS + &
+                                          Wyx * tauxxS + Wyz * tauxzS)
+                            exz = fact * (Wxy * tauyzS + Wxz * tauzzS + &
+                                          Wzx * tauxxS + Wzy * tauxyS)
+                            eyz = fact * (Wyx * tauxzS + Wyz * tauzzS + &
+                                          Wzx * tauxyS + Wzy * tauyyS)
+
+                            ! Apply the total viscosity to the stress tensor and add extra terms
+                            tauxx = mut * tauxxS - exx
+                            tauyy = mut * tauyyS - eyy
+                            tauzz = mut * tauzzS - ezz
+                            tauxy = mut * tauxyS - exy
+                            tauxz = mut * tauxzS - exz
+                            tauyz = mut * tauyzS - eyz
+
+                        else
+
+                            ! Just apply the total viscosity to the stress tensor
+                            tauxx = mut * tauxxS
+                            tauyy = mut * tauyyS
+                            tauzz = mut * tauzzS
+                            tauxy = mut * tauxyS
+                            tauxz = mut * tauxzS
+                            tauyz = mut * tauyzS
+
+                        end if
+
+                        ! Compute the average velocities for the face. Remember that
+                        ! the velocities are stored and not the momentum.
+
+                        ubar = half * (w(i, j, k, ivx) + w(i, j, k + 1, ivx))
+                        vbar = half * (w(i, j, k, ivy) + w(i, j, k + 1, ivy))
+                        wbar = half * (w(i, j, k, ivz) + w(i, j, k + 1, ivz))
+
+                        ! Compute the viscous fluxes for this k-face.
+
+                        fmx = tauxx * sk(i, j, k, 1) + tauxy * sk(i, j, k, 2) &
+                              + tauxz * sk(i, j, k, 3)
+                        fmy = tauxy * sk(i, j, k, 1) + tauyy * sk(i, j, k, 2) &
+                              + tauyz * sk(i, j, k, 3)
+                        fmz = tauxz * sk(i, j, k, 1) + tauyz * sk(i, j, k, 2) &
+                              + tauzz * sk(i, j, k, 3)
+                        frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * sk(i, j, k, 1)
+                        frhoE = frhoE + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * sk(i, j, k, 2)
+                        frhoE = frhoE + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * sk(i, j, k, 3)
+                        frhoE = frhoE - q_x * sk(i, j, k, 1) - q_y * sk(i, j, k, 2) - q_z * sk(i, j, k, 3)
+
+                        ! Update the residuals of cell k and k+1.
+
+                        fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                        fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                        fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                        fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fmx
+                        fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fmy
+                        fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fmz
+                        fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + frhoE
+
+                        ! Store the stress tensor and the heat flux vector if this
+                        ! face is part of a viscous subface. Both the cases k == 1
+                        ! and k == kl must be tested.
+
+                        if (k == 1 .and. storeWallTensor .and. &
+                            viscKminPointer(i, j) > 0) then
+                            ! We need to index viscSubface with viscKminPointer(i,j)
+                            ! since Tapenade does not like temporary indexes
+
+                            viscSubface(viscKminPointer(i, j))%tau(i, j, 1) = tauxx
+                            viscSubface(viscKminPointer(i, j))%tau(i, j, 2) = tauyy
+                            viscSubface(viscKminPointer(i, j))%tau(i, j, 3) = tauzz
+                            viscSubface(viscKminPointer(i, j))%tau(i, j, 4) = tauxy
+                            viscSubface(viscKminPointer(i, j))%tau(i, j, 5) = tauxz
+                            viscSubface(viscKminPointer(i, j))%tau(i, j, 6) = tauyz
+
+                            viscSubface(viscKminPointer(i, j))%q(i, j, 1) = q_x
+                            viscSubface(viscKminPointer(i, j))%q(i, j, 2) = q_y
+                            viscSubface(viscKminPointer(i, j))%q(i, j, 3) = q_z
+                        end if
+
+                        ! And the k == kl case.
+                        if (k == kl .and. storeWallTensor .and. &
+                            viscKmaxPointer(i, j) > 0) then
+                            viscSubface(viscKmaxPointer(i, j))%tau(i, j, 1) = tauxx
+                            viscSubface(viscKmaxPointer(i, j))%tau(i, j, 2) = tauyy
+                            viscSubface(viscKmaxPointer(i, j))%tau(i, j, 3) = tauzz
+                            viscSubface(viscKmaxPointer(i, j))%tau(i, j, 4) = tauxy
+                            viscSubface(viscKmaxPointer(i, j))%tau(i, j, 5) = tauxz
+                            viscSubface(viscKmaxPointer(i, j))%tau(i, j, 6) = tauyz
+
+                            viscSubface(viscKmaxPointer(i, j))%q(i, j, 1) = q_x
+                            viscSubface(viscKmaxPointer(i, j))%q(i, j, 2) = q_y
+                            viscSubface(viscKmaxPointer(i, j))%q(i, j, 3) = q_z
+                        end if
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-    continue
-    !$AD CHECKPOINT-END
-
-    !
-    !         Viscous fluxes in the j-direction.
-    !
-    continue
-    !$AD CHECKPOINT-START
-    mue = zero
+        continue
+        !$AD CHECKPOINT-END
+
+        !
+        !         Viscous fluxes in the j-direction.
+        !
+        continue
+        !$AD CHECKPOINT-START
+        mue = zero
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*jl*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, jl) + 1
-       k = ii/(nx*jl) + 2
+        !$AD II-LOOP
+        do ii = 0, nx * jl * nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, jl) + 1
+            k = ii / (nx * jl) + 2
 #else
-       do k=2,kl
-          do j=1,jl
-             do i=2,il
+            do k = 2, kl
+                do j = 1, jl
+                    do i = 2, il
 #endif
 
-                ! Set the value of the porosity. If not zero, it is set
-                ! to average the eddy-viscosity and to take the factor
-                ! rFilv into account.
-
-                por = half*rFilv
-                if(porJ(i,j,k) == noFlux) por = zero
-
-                ! Compute the laminar and (if present) the eddy viscosities
-                ! multiplied by the porosity. Compute the factor in front of
-                ! the gradients of the speed of sound squared for the heat
-                ! flux.
-
-                mul = por*(rlv(i,j,k) + rlv(i,j+1,k))
-                if( eddyModel ) mue = por*(rev(i,j,k) + rev(i,j+1,k))
-                mut = mul + mue
-
-                gm1          = half*(gamma(i,j,k) + gamma(i,j+1,k)) - one
-                factLamHeat  = one/(prandtl*gm1)
-                factTurbHeat = one/(prandtlTurb*gm1)
-
-                heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-                ! Compute the gradients at the face by averaging the four
-                ! nodal values.
-
-                u_x = fourth*(ux(i-1,j,k-1) + ux(i,j,k-1) &
-                     +        ux(i-1,j,k  ) + ux(i,j,k  ))
-                u_y = fourth*(uy(i-1,j,k-1) + uy(i,j,k-1) &
-                     +        uy(i-1,j,k  ) + uy(i,j,k  ))
-                u_z = fourth*(uz(i-1,j,k-1) + uz(i,j,k-1) &
-                     +        uz(i-1,j,k  ) + uz(i,j,k  ))
-
-                v_x = fourth*(vx(i-1,j,k-1) + vx(i,j,k-1) &
-                     +        vx(i-1,j,k  ) + vx(i,j,k  ))
-                v_y = fourth*(vy(i-1,j,k-1) + vy(i,j,k-1) &
-                     +        vy(i-1,j,k  ) + vy(i,j,k  ))
-                v_z = fourth*(vz(i-1,j,k-1) + vz(i,j,k-1) &
-                     +        vz(i-1,j,k  ) + vz(i,j,k  ))
-
-                w_x = fourth*(wx(i-1,j,k-1) + wx(i,j,k-1) &
-                     +        wx(i-1,j,k  ) + wx(i,j,k  ))
-                w_y = fourth*(wy(i-1,j,k-1) + wy(i,j,k-1) &
-                     +        wy(i-1,j,k  ) + wy(i,j,k  ))
-                w_z = fourth*(wz(i-1,j,k-1) + wz(i,j,k-1) &
-                     +        wz(i-1,j,k  ) + wz(i,j,k  ))
-
-                q_x = fourth*(qx(i-1,j,k-1) + qx(i,j,k-1) &
-                     +        qx(i-1,j,k  ) + qx(i,j,k  ))
-                q_y = fourth*(qy(i-1,j,k-1) + qy(i,j,k-1) &
-                     +        qy(i-1,j,k  ) + qy(i,j,k  ))
-                q_z = fourth*(qz(i-1,j,k-1) + qz(i,j,k-1) &
-                     +        qz(i-1,j,k  ) + qz(i,j,k  ))
-
-                ! The gradients in the normal direction are corrected, such
-                ! that no averaging takes places here.
-                ! First determine the vector in the direction from the
-                ! cell center j to cell center j+1.
-
-                ssx = eighth*(x(i-1,j+1,k-1,1) - x(i-1,j-1,k-1,1) &
-                     +         x(i-1,j+1,k,  1) - x(i-1,j-1,k,  1) &
-                     +         x(i,  j+1,k-1,1) - x(i,  j-1,k-1,1) &
-                     +         x(i,  j+1,k,  1) - x(i,  j-1,k,  1))
-                ssy = eighth*(x(i-1,j+1,k-1,2) - x(i-1,j-1,k-1,2) &
-                     +         x(i-1,j+1,k,  2) - x(i-1,j-1,k,  2) &
-                     +         x(i,  j+1,k-1,2) - x(i,  j-1,k-1,2) &
-                     +         x(i,  j+1,k,  2) - x(i,  j-1,k,  2))
-                ssz = eighth*(x(i-1,j+1,k-1,3) - x(i-1,j-1,k-1,3) &
-                     +         x(i-1,j+1,k,  3) - x(i-1,j-1,k,  3) &
-                     +         x(i,  j+1,k-1,3) - x(i,  j-1,k-1,3) &
-                     +         x(i,  j+1,k,  3) - x(i,  j-1,k,  3))
-
-                ! Determine the length of this vector and create the
-                ! unit normal.
-
-                ss  = one/sqrt(ssx*ssx + ssy*ssy + ssz*ssz)
-                ssx = ss*ssx
-                ssy = ss*ssy
-                ssz = ss*ssz
-
-                ! Correct the gradients.
-
-                corr = u_x*ssx + u_y*ssy + u_z*ssz        &
-                     - (w(i,j+1,k,ivx) - w(i,j,k,ivx))*ss
-                u_x  = u_x - corr*ssx
-                u_y  = u_y - corr*ssy
-                u_z  = u_z - corr*ssz
-
-                corr = v_x*ssx + v_y*ssy + v_z*ssz        &
-                     - (w(i,j+1,k,ivy) - w(i,j,k,ivy))*ss
-                v_x  = v_x - corr*ssx
-                v_y  = v_y - corr*ssy
-                v_z  = v_z - corr*ssz
-
-                corr = w_x*ssx + w_y*ssy + w_z*ssz        &
-                     - (w(i,j+1,k,ivz) - w(i,j,k,ivz))*ss
-                w_x  = w_x - corr*ssx
-                w_y  = w_y - corr*ssy
-                w_z  = w_z - corr*ssz
-
-                corr = q_x*ssx + q_y*ssy + q_z*ssz &
-                     + (aa(i,j+1,k) - aa(i,j,k))*ss
-                q_x  = q_x - corr*ssx
-                q_y  = q_y - corr*ssy
-                q_z  = q_z - corr*ssz
-
-                ! Compute the stress tensor and the heat flux vector.
-                ! We remove the viscosity from the stress tensor (tau)
-                ! to define tauS since we still need to separate between
-                ! laminar and turbulent stress for QCR.
-                ! Therefore, laminar tau = mue*tauS, turbulent
-                ! tau = mue*tauS, and total tau = mut*tauS.
-
-                fracDiv = twoThird*(u_x + v_y + w_z)
-
-                tauxxS = two*u_x - fracDiv
-                tauyyS = two*v_y - fracDiv
-                tauzzS = two*w_z - fracDiv
-
-                tauxyS = u_y + v_x
-                tauxzS = u_z + w_x
-                tauyzS = v_z + w_y
-
-                q_x = heatCoef*q_x
-                q_y = heatCoef*q_y
-                q_z = heatCoef*q_z
-
-                ! Add QCR corrections if necessary
-                if (useQCR) then
-
-                   ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
-                   !
-                   ! tau_ij,QCR = tau_ij - e_ij
-                   !
-                   ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
-                   !
-                   ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
-                   !
-                   ! We are computing O_ik as follows:
-                   !
-                   ! O_ik = 2*W_ik/den
-                   !
-                   ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
-
-                   ! Compute denominator
-                   den = sqrt(u_x*u_x + u_y*u_y + u_z*u_z + &
-                        v_x*v_x + v_y*v_y + v_z*v_z + &
-                        w_x*w_x + w_y*w_y + w_z*w_z)
-
-                   ! Denominator should be limited to avoid division by zero in regions with
-                   ! no gradients
-                   den = max(den, xminn)
-
-                   ! Compute factor that will multiply all tensor components.
-                   ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
-                   ! components as well.
-                   fact = mue*Ccr1/den
-
-                   ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
-                   ! The diagonals of the vorticity tensor components are always zero
-                   Wxy = u_y - v_x
-                   Wxz = u_z - w_x
-                   Wyz = v_z - w_y
-                   Wyx = -Wxy
-                   Wzx = -Wxz
-                   Wzy = -Wyz
-
-                   ! Compute the extra terms of the Boussinesq relation
-                   exx = fact*(Wxy*tauxyS + Wxz*tauxzS)*two
-                   eyy = fact*(Wyx*tauxyS + Wyz*tauyzS)*two
-                   ezz = fact*(Wzx*tauxzS + Wzy*tauyzS)*two
-
-                   exy = fact*(Wxy*tauyyS + Wxz*tauyzS + &
-                               Wyx*tauxxS + Wyz*tauxzS)
-                   exz = fact*(Wxy*tauyzS + Wxz*tauzzS + &
-                               Wzx*tauxxS + Wzy*tauxyS)
-                   eyz = fact*(Wyx*tauxzS + Wyz*tauzzS + &
-                               Wzx*tauxyS + Wzy*tauyyS)
-
-                   ! Apply the total viscosity to the stress tensor and add extra terms
-                   tauxx = mut*tauxxS - exx
-                   tauyy = mut*tauyyS - eyy
-                   tauzz = mut*tauzzS - ezz
-                   tauxy = mut*tauxyS - exy
-                   tauxz = mut*tauxzS - exz
-                   tauyz = mut*tauyzS - eyz
-
-                else
-
-                   ! Just apply the total viscosity to the stress tensor
-                   tauxx = mut*tauxxS
-                   tauyy = mut*tauyyS
-                   tauzz = mut*tauzzS
-                   tauxy = mut*tauxyS
-                   tauxz = mut*tauxzS
-                   tauyz = mut*tauyzS
-
-                end if
-
-                ! Compute the average velocities for the face. Remember that
-                ! the velocities are stored and not the momentum.
-
-                ubar = half*(w(i,j,k,ivx) + w(i,j+1,k,ivx))
-                vbar = half*(w(i,j,k,ivy) + w(i,j+1,k,ivy))
-                wbar = half*(w(i,j,k,ivz) + w(i,j+1,k,ivz))
-
-                ! Compute the viscous fluxes for this j-face.
-
-                fmx   = tauxx*sj(i,j,k,1) + tauxy*sj(i,j,k,2) &
-                     + tauxz*sj(i,j,k,3)
-                fmy   = tauxy*sj(i,j,k,1) + tauyy*sj(i,j,k,2) &
-                     + tauyz*sj(i,j,k,3)
-                fmz   = tauxz*sj(i,j,k,1) + tauyz*sj(i,j,k,2) &
-                     + tauzz*sj(i,j,k,3)
-                frhoE = (ubar*tauxx + vbar*tauxy + wbar*tauxz)*sj(i,j,k,1) &
-                     + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*sj(i,j,k,2) &
-                     + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*sj(i,j,k,3) &
-                     - q_x*sj(i,j,k,1) - q_y*sj(i,j,k,2) - q_z*sj(i,j,k,3)
-
-                ! Update the residuals of cell j and j+1.
-
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-                fw(i,j+1,k,imx)   = fw(i,j+1,k,imx)   + fmx
-                fw(i,j+1,k,imy)   = fw(i,j+1,k,imy)   + fmy
-                fw(i,j+1,k,imz)   = fw(i,j+1,k,imz)   + fmz
-                fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + frhoE
-
-                ! Store the stress tensor and the heat flux vector if this
-                ! face is part of a viscous subface. Both the cases j == 1
-                ! and j == jl must be tested.
-
-                if(j == 1 .and. storeWallTensor .and. &
-                     viscJminPointer(i,k) > 0) then
-                   ! We need to index viscSubface with viscJminPointer(i,k)
-                   ! since Tapenade does not like temporary indexes
-
-                   viscSubface(viscJminPointer(i,k))%tau(i,k,1) = tauxx
-                   viscSubface(viscJminPointer(i,k))%tau(i,k,2) = tauyy
-                   viscSubface(viscJminPointer(i,k))%tau(i,k,3) = tauzz
-                   viscSubface(viscJminPointer(i,k))%tau(i,k,4) = tauxy
-                   viscSubface(viscJminPointer(i,k))%tau(i,k,5) = tauxz
-                   viscSubface(viscJminPointer(i,k))%tau(i,k,6) = tauyz
-
-                   viscSubface(viscJminPointer(i,k))%q(i,k,1) = q_x
-                   viscSubface(viscJminPointer(i,k))%q(i,k,2) = q_y
-                   viscSubface(viscJminPointer(i,k))%q(i,k,3) = q_z
-                endif
-
-                ! And the j == jl case.
-
-                if(j == jl .and. storeWallTensor .and. &
-                     viscJmaxPointer(i,k) > 0) then
-                   viscSubface(viscJmaxPointer(i,k))%tau(i,k,1) = tauxx
-                   viscSubface(viscJmaxPointer(i,k))%tau(i,k,2) = tauyy
-                   viscSubface(viscJmaxPointer(i,k))%tau(i,k,3) = tauzz
-                   viscSubface(viscJmaxPointer(i,k))%tau(i,k,4) = tauxy
-                   viscSubface(viscJmaxPointer(i,k))%tau(i,k,5) = tauxz
-                   viscSubface(viscJmaxPointer(i,k))%tau(i,k,6) = tauyz
-
-                   viscSubface(viscJmaxPointer(i,k))%q(i,k,1) = q_x
-                   viscSubface(viscJmaxPointer(i,k))%q(i,k,2) = q_y
-                   viscSubface(viscJmaxPointer(i,k))%q(i,k,3) = q_z
-                endif
+                        ! Set the value of the porosity. If not zero, it is set
+                        ! to average the eddy-viscosity and to take the factor
+                        ! rFilv into account.
+
+                        por = half * rFilv
+                        if (porJ(i, j, k) == noFlux) por = zero
+
+                        ! Compute the laminar and (if present) the eddy viscosities
+                        ! multiplied by the porosity. Compute the factor in front of
+                        ! the gradients of the speed of sound squared for the heat
+                        ! flux.
+
+                        mul = por * (rlv(i, j, k) + rlv(i, j + 1, k))
+                        if (eddyModel) mue = por * (rev(i, j, k) + rev(i, j + 1, k))
+                        mut = mul + mue
+
+                        gm1 = half * (gamma(i, j, k) + gamma(i, j + 1, k)) - one
+                        factLamHeat = one / (prandtl * gm1)
+                        factTurbHeat = one / (prandtlTurb * gm1)
+
+                        heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                        ! Compute the gradients at the face by averaging the four
+                        ! nodal values.
+
+                        u_x = fourth * (ux(i - 1, j, k - 1) + ux(i, j, k - 1) &
+                                        + ux(i - 1, j, k) + ux(i, j, k))
+                        u_y = fourth * (uy(i - 1, j, k - 1) + uy(i, j, k - 1) &
+                                        + uy(i - 1, j, k) + uy(i, j, k))
+                        u_z = fourth * (uz(i - 1, j, k - 1) + uz(i, j, k - 1) &
+                                        + uz(i - 1, j, k) + uz(i, j, k))
+
+                        v_x = fourth * (vx(i - 1, j, k - 1) + vx(i, j, k - 1) &
+                                        + vx(i - 1, j, k) + vx(i, j, k))
+                        v_y = fourth * (vy(i - 1, j, k - 1) + vy(i, j, k - 1) &
+                                        + vy(i - 1, j, k) + vy(i, j, k))
+                        v_z = fourth * (vz(i - 1, j, k - 1) + vz(i, j, k - 1) &
+                                        + vz(i - 1, j, k) + vz(i, j, k))
+
+                        w_x = fourth * (wx(i - 1, j, k - 1) + wx(i, j, k - 1) &
+                                        + wx(i - 1, j, k) + wx(i, j, k))
+                        w_y = fourth * (wy(i - 1, j, k - 1) + wy(i, j, k - 1) &
+                                        + wy(i - 1, j, k) + wy(i, j, k))
+                        w_z = fourth * (wz(i - 1, j, k - 1) + wz(i, j, k - 1) &
+                                        + wz(i - 1, j, k) + wz(i, j, k))
+
+                        q_x = fourth * (qx(i - 1, j, k - 1) + qx(i, j, k - 1) &
+                                        + qx(i - 1, j, k) + qx(i, j, k))
+                        q_y = fourth * (qy(i - 1, j, k - 1) + qy(i, j, k - 1) &
+                                        + qy(i - 1, j, k) + qy(i, j, k))
+                        q_z = fourth * (qz(i - 1, j, k - 1) + qz(i, j, k - 1) &
+                                        + qz(i - 1, j, k) + qz(i, j, k))
+
+                        ! The gradients in the normal direction are corrected, such
+                        ! that no averaging takes places here.
+                        ! First determine the vector in the direction from the
+                        ! cell center j to cell center j+1.
+
+                        ssx = eighth * (x(i - 1, j + 1, k - 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                        + x(i - 1, j + 1, k, 1) - x(i - 1, j - 1, k, 1) &
+                                        + x(i, j + 1, k - 1, 1) - x(i, j - 1, k - 1, 1) &
+                                        + x(i, j + 1, k, 1) - x(i, j - 1, k, 1))
+                        ssy = eighth * (x(i - 1, j + 1, k - 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                        + x(i - 1, j + 1, k, 2) - x(i - 1, j - 1, k, 2) &
+                                        + x(i, j + 1, k - 1, 2) - x(i, j - 1, k - 1, 2) &
+                                        + x(i, j + 1, k, 2) - x(i, j - 1, k, 2))
+                        ssz = eighth * (x(i - 1, j + 1, k - 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                        + x(i - 1, j + 1, k, 3) - x(i - 1, j - 1, k, 3) &
+                                        + x(i, j + 1, k - 1, 3) - x(i, j - 1, k - 1, 3) &
+                                        + x(i, j + 1, k, 3) - x(i, j - 1, k, 3))
+
+                        ! Determine the length of this vector and create the
+                        ! unit normal.
+
+                        ss = one / sqrt(ssx * ssx + ssy * ssy + ssz * ssz)
+                        ssx = ss * ssx
+                        ssy = ss * ssy
+                        ssz = ss * ssz
+
+                        ! Correct the gradients.
+
+                        corr = u_x * ssx + u_y * ssy + u_z * ssz &
+                               - (w(i, j + 1, k, ivx) - w(i, j, k, ivx)) * ss
+                        u_x = u_x - corr * ssx
+                        u_y = u_y - corr * ssy
+                        u_z = u_z - corr * ssz
+
+                        corr = v_x * ssx + v_y * ssy + v_z * ssz &
+                               - (w(i, j + 1, k, ivy) - w(i, j, k, ivy)) * ss
+                        v_x = v_x - corr * ssx
+                        v_y = v_y - corr * ssy
+                        v_z = v_z - corr * ssz
+
+                        corr = w_x * ssx + w_y * ssy + w_z * ssz &
+                               - (w(i, j + 1, k, ivz) - w(i, j, k, ivz)) * ss
+                        w_x = w_x - corr * ssx
+                        w_y = w_y - corr * ssy
+                        w_z = w_z - corr * ssz
+
+                        corr = q_x * ssx + q_y * ssy + q_z * ssz &
+                               + (aa(i, j + 1, k) - aa(i, j, k)) * ss
+                        q_x = q_x - corr * ssx
+                        q_y = q_y - corr * ssy
+                        q_z = q_z - corr * ssz
+
+                        ! Compute the stress tensor and the heat flux vector.
+                        ! We remove the viscosity from the stress tensor (tau)
+                        ! to define tauS since we still need to separate between
+                        ! laminar and turbulent stress for QCR.
+                        ! Therefore, laminar tau = mue*tauS, turbulent
+                        ! tau = mue*tauS, and total tau = mut*tauS.
+
+                        fracDiv = twoThird * (u_x + v_y + w_z)
+
+                        tauxxS = two * u_x - fracDiv
+                        tauyyS = two * v_y - fracDiv
+                        tauzzS = two * w_z - fracDiv
+
+                        tauxyS = u_y + v_x
+                        tauxzS = u_z + w_x
+                        tauyzS = v_z + w_y
+
+                        q_x = heatCoef * q_x
+                        q_y = heatCoef * q_y
+                        q_z = heatCoef * q_z
+
+                        ! Add QCR corrections if necessary
+                        if (useQCR) then
+
+                            ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
+                            !
+                            ! tau_ij,QCR = tau_ij - e_ij
+                            !
+                            ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
+                            !
+                            ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
+                            !
+                            ! We are computing O_ik as follows:
+                            !
+                            ! O_ik = 2*W_ik/den
+                            !
+                            ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
+
+                            ! Compute denominator
+                            den = sqrt(u_x * u_x + u_y * u_y + u_z * u_z + &
+                                       v_x * v_x + v_y * v_y + v_z * v_z + &
+                                       w_x * w_x + w_y * w_y + w_z * w_z)
+
+                            ! Denominator should be limited to avoid division by zero in regions with
+                            ! no gradients
+                            den = max(den, xminn)
+
+                            ! Compute factor that will multiply all tensor components.
+                            ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
+                            ! components as well.
+                            fact = mue * Ccr1 / den
+
+                            ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
+                            ! The diagonals of the vorticity tensor components are always zero
+                            Wxy = u_y - v_x
+                            Wxz = u_z - w_x
+                            Wyz = v_z - w_y
+                            Wyx = -Wxy
+                            Wzx = -Wxz
+                            Wzy = -Wyz
+
+                            ! Compute the extra terms of the Boussinesq relation
+                            exx = fact * (Wxy * tauxyS + Wxz * tauxzS) * two
+                            eyy = fact * (Wyx * tauxyS + Wyz * tauyzS) * two
+                            ezz = fact * (Wzx * tauxzS + Wzy * tauyzS) * two
+
+                            exy = fact * (Wxy * tauyyS + Wxz * tauyzS + &
+                                          Wyx * tauxxS + Wyz * tauxzS)
+                            exz = fact * (Wxy * tauyzS + Wxz * tauzzS + &
+                                          Wzx * tauxxS + Wzy * tauxyS)
+                            eyz = fact * (Wyx * tauxzS + Wyz * tauzzS + &
+                                          Wzx * tauxyS + Wzy * tauyyS)
+
+                            ! Apply the total viscosity to the stress tensor and add extra terms
+                            tauxx = mut * tauxxS - exx
+                            tauyy = mut * tauyyS - eyy
+                            tauzz = mut * tauzzS - ezz
+                            tauxy = mut * tauxyS - exy
+                            tauxz = mut * tauxzS - exz
+                            tauyz = mut * tauyzS - eyz
+
+                        else
+
+                            ! Just apply the total viscosity to the stress tensor
+                            tauxx = mut * tauxxS
+                            tauyy = mut * tauyyS
+                            tauzz = mut * tauzzS
+                            tauxy = mut * tauxyS
+                            tauxz = mut * tauxzS
+                            tauyz = mut * tauyzS
+
+                        end if
+
+                        ! Compute the average velocities for the face. Remember that
+                        ! the velocities are stored and not the momentum.
+
+                        ubar = half * (w(i, j, k, ivx) + w(i, j + 1, k, ivx))
+                        vbar = half * (w(i, j, k, ivy) + w(i, j + 1, k, ivy))
+                        wbar = half * (w(i, j, k, ivz) + w(i, j + 1, k, ivz))
+
+                        ! Compute the viscous fluxes for this j-face.
+
+                        fmx = tauxx * sj(i, j, k, 1) + tauxy * sj(i, j, k, 2) &
+                              + tauxz * sj(i, j, k, 3)
+                        fmy = tauxy * sj(i, j, k, 1) + tauyy * sj(i, j, k, 2) &
+                              + tauyz * sj(i, j, k, 3)
+                        fmz = tauxz * sj(i, j, k, 1) + tauyz * sj(i, j, k, 2) &
+                              + tauzz * sj(i, j, k, 3)
+                        frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * sj(i, j, k, 1) &
+                                + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * sj(i, j, k, 2) &
+                                + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * sj(i, j, k, 3) &
+                                - q_x * sj(i, j, k, 1) - q_y * sj(i, j, k, 2) - q_z * sj(i, j, k, 3)
+
+                        ! Update the residuals of cell j and j+1.
+
+                        fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                        fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                        fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                        fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fmx
+                        fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fmy
+                        fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fmz
+                        fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + frhoE
+
+                        ! Store the stress tensor and the heat flux vector if this
+                        ! face is part of a viscous subface. Both the cases j == 1
+                        ! and j == jl must be tested.
+
+                        if (j == 1 .and. storeWallTensor .and. &
+                            viscJminPointer(i, k) > 0) then
+                            ! We need to index viscSubface with viscJminPointer(i,k)
+                            ! since Tapenade does not like temporary indexes
+
+                            viscSubface(viscJminPointer(i, k))%tau(i, k, 1) = tauxx
+                            viscSubface(viscJminPointer(i, k))%tau(i, k, 2) = tauyy
+                            viscSubface(viscJminPointer(i, k))%tau(i, k, 3) = tauzz
+                            viscSubface(viscJminPointer(i, k))%tau(i, k, 4) = tauxy
+                            viscSubface(viscJminPointer(i, k))%tau(i, k, 5) = tauxz
+                            viscSubface(viscJminPointer(i, k))%tau(i, k, 6) = tauyz
+
+                            viscSubface(viscJminPointer(i, k))%q(i, k, 1) = q_x
+                            viscSubface(viscJminPointer(i, k))%q(i, k, 2) = q_y
+                            viscSubface(viscJminPointer(i, k))%q(i, k, 3) = q_z
+                        end if
+
+                        ! And the j == jl case.
+
+                        if (j == jl .and. storeWallTensor .and. &
+                            viscJmaxPointer(i, k) > 0) then
+                            viscSubface(viscJmaxPointer(i, k))%tau(i, k, 1) = tauxx
+                            viscSubface(viscJmaxPointer(i, k))%tau(i, k, 2) = tauyy
+                            viscSubface(viscJmaxPointer(i, k))%tau(i, k, 3) = tauzz
+                            viscSubface(viscJmaxPointer(i, k))%tau(i, k, 4) = tauxy
+                            viscSubface(viscJmaxPointer(i, k))%tau(i, k, 5) = tauxz
+                            viscSubface(viscJmaxPointer(i, k))%tau(i, k, 6) = tauyz
+
+                            viscSubface(viscJmaxPointer(i, k))%q(i, k, 1) = q_x
+                            viscSubface(viscJmaxPointer(i, k))%q(i, k, 2) = q_y
+                            viscSubface(viscJmaxPointer(i, k))%q(i, k, 3) = q_z
+                        end if
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-    continue
-    !$AD CHECKPOINT-END
-
-
-
-    !
-    !         Viscous fluxes in the i-direction.
-    !
-    continue
-    !$AD CHECKPOINT-START
-    mue= zero
+        continue
+        !$AD CHECKPOINT-END
+
+        !
+        !         Viscous fluxes in the i-direction.
+        !
+        continue
+        !$AD CHECKPOINT-START
+        mue = zero
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,il*ny*nz-1
-       i = mod(ii, il) + 1
-       j = mod(ii/il, ny) + 2
-       k = ii/(il*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, il * ny * nz - 1
+            i = mod(ii, il) + 1
+            j = mod(ii / il, ny) + 2
+            k = ii / (il * ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=1, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 1, il
 #endif
 
-                ! Set the value of the porosity. If not zero, it is set
-                ! to average the eddy-viscosity and to take the factor
-                ! rFilv into account.
-
-                por = half*rFilv
-                if(porI(i,j,k) == noFlux) por = zero
-
-                ! Compute the laminar and (if present) the eddy viscosities
-                ! multiplied the porosity. Compute the factor in front of
-                ! the gradients of the speed of sound squared for the heat
-                ! flux.
-
-                mul = por*(rlv(i,j,k) + rlv(i+1,j,k))
-                if( eddyModel ) mue = por*(rev(i,j,k) + rev(i+1,j,k))
-                mut = mul + mue
-
-                gm1          = half*(gamma(i,j,k) + gamma(i+1,j,k)) - one
-                factLamHeat  = one/(prandtl*gm1)
-                factTurbHeat = one/(prandtlTurb*gm1)
-
-                heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-                ! Compute the gradients at the face by averaging the four
-                ! nodal values.
-
-                u_x = fourth*(ux(i,j-1,k-1) + ux(i,j,k-1) &
-                     +        ux(i,j-1,k  ) + ux(i,j,k  ))
-                u_y = fourth*(uy(i,j-1,k-1) + uy(i,j,k-1) &
-                     +        uy(i,j-1,k  ) + uy(i,j,k  ))
-                u_z = fourth*(uz(i,j-1,k-1) + uz(i,j,k-1) &
-                     +        uz(i,j-1,k  ) + uz(i,j,k  ))
-
-                v_x = fourth*(vx(i,j-1,k-1) + vx(i,j,k-1) &
-                     +        vx(i,j-1,k  ) + vx(i,j,k  ))
-                v_y = fourth*(vy(i,j-1,k-1) + vy(i,j,k-1) &
-                     +        vy(i,j-1,k  ) + vy(i,j,k  ))
-                v_z = fourth*(vz(i,j-1,k-1) + vz(i,j,k-1) &
-                     +        vz(i,j-1,k  ) + vz(i,j,k  ))
-
-                w_x = fourth*(wx(i,j-1,k-1) + wx(i,j,k-1) &
-                     +        wx(i,j-1,k  ) + wx(i,j,k  ))
-                w_y = fourth*(wy(i,j-1,k-1) + wy(i,j,k-1) &
-                     +        wy(i,j-1,k  ) + wy(i,j,k  ))
-                w_z = fourth*(wz(i,j-1,k-1) + wz(i,j,k-1) &
-                     +        wz(i,j-1,k  ) + wz(i,j,k  ))
-
-                q_x = fourth*(qx(i,j-1,k-1) + qx(i,j,k-1) &
-                     +        qx(i,j-1,k  ) + qx(i,j,k  ))
-                q_y = fourth*(qy(i,j-1,k-1) + qy(i,j,k-1) &
-                     +        qy(i,j-1,k  ) + qy(i,j,k  ))
-                q_z = fourth*(qz(i,j-1,k-1) + qz(i,j,k-1) &
-                     +        qz(i,j-1,k  ) + qz(i,j,k  ))
-
-                ! The gradients in the normal direction are corrected, such
-                ! that no averaging takes places here.
-                ! First determine the vector in the direction from the
-                ! cell center i to cell center i+1.
-
-                ssx = eighth*(x(i+1,j-1,k-1,1) - x(i-1,j-1,k-1,1) &
-                     +         x(i+1,j-1,k,  1) - x(i-1,j-1,k,  1) &
-                     +         x(i+1,j,  k-1,1) - x(i-1,j,  k-1,1) &
-                     +         x(i+1,j,  k,  1) - x(i-1,j,  k,  1))
-                ssy = eighth*(x(i+1,j-1,k-1,2) - x(i-1,j-1,k-1,2) &
-                     +         x(i+1,j-1,k,  2) - x(i-1,j-1,k,  2) &
-                     +         x(i+1,j,  k-1,2) - x(i-1,j,  k-1,2) &
-                     +         x(i+1,j,  k,  2) - x(i-1,j,  k,  2))
-                ssz = eighth*(x(i+1,j-1,k-1,3) - x(i-1,j-1,k-1,3) &
-                     +         x(i+1,j-1,k,  3) - x(i-1,j-1,k,  3) &
-                     +         x(i+1,j,  k-1,3) - x(i-1,j,  k-1,3) &
-                     +         x(i+1,j,  k,  3) - x(i-1,j,  k,  3))
-
-                ! Determine the length of this vector and create the
-                ! unit normal.
-
-                ss  = one/sqrt(ssx*ssx + ssy*ssy + ssz*ssz)
-                ssx = ss*ssx
-                ssy = ss*ssy
-                ssz = ss*ssz
-
-                ! Correct the gradients.
-
-                corr = u_x*ssx + u_y*ssy + u_z*ssz        &
-                     - (w(i+1,j,k,ivx) - w(i,j,k,ivx))*ss
-                u_x  = u_x - corr*ssx
-                u_y  = u_y - corr*ssy
-                u_z  = u_z - corr*ssz
-
-                corr = v_x*ssx + v_y*ssy + v_z*ssz        &
-                     - (w(i+1,j,k,ivy) - w(i,j,k,ivy))*ss
-                v_x  = v_x - corr*ssx
-                v_y  = v_y - corr*ssy
-                v_z  = v_z - corr*ssz
-
-                corr = w_x*ssx + w_y*ssy + w_z*ssz        &
-                     - (w(i+1,j,k,ivz) - w(i,j,k,ivz))*ss
-                w_x  = w_x - corr*ssx
-                w_y  = w_y - corr*ssy
-                w_z  = w_z - corr*ssz
-
-                corr = q_x*ssx + q_y*ssy + q_z*ssz &
-                     + (aa(i+1,j,k) - aa(i,j,k))*ss
-                q_x  = q_x - corr*ssx
-                q_y  = q_y - corr*ssy
-                q_z  = q_z - corr*ssz
-
-                ! Compute the stress tensor and the heat flux vector.
-                ! We remove the viscosity from the stress tensor (tau)
-                ! to define tauS since we still need to separate between
-                ! laminar and turbulent stress for QCR.
-                ! Therefore, laminar tau = mue*tauS, turbulent
-                ! tau = mue*tauS, and total tau = mut*tauS.
-
-                fracDiv = twoThird*(u_x + v_y + w_z)
-
-                tauxxS = two*u_x - fracDiv
-                tauyyS = two*v_y - fracDiv
-                tauzzS = two*w_z - fracDiv
-
-                tauxyS = u_y + v_x
-                tauxzS = u_z + w_x
-                tauyzS = v_z + w_y
-
-                q_x = heatCoef*q_x
-                q_y = heatCoef*q_y
-                q_z = heatCoef*q_z
-
-                ! Add QCR corrections if necessary
-                if (useQCR) then
-
-                   ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
-                   !
-                   ! tau_ij,QCR = tau_ij - e_ij
-                   !
-                   ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
-                   !
-                   ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
-                   !
-                   ! We are computing O_ik as follows:
-                   !
-                   ! O_ik = 2*W_ik/den
-                   !
-                   ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
-
-                   ! Compute denominator
-                   den = sqrt(u_x*u_x + u_y*u_y + u_z*u_z + &
-                        v_x*v_x + v_y*v_y + v_z*v_z + &
-                        w_x*w_x + w_y*w_y + w_z*w_z)
-
-                   ! Denominator should be limited to avoid division by zero in regions with
-                   ! no gradients
-                   den = max(den, xminn)
-
-                   ! Compute factor that will multiply all tensor components.
-                   ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
-                   ! components as well.
-                   fact = mue*Ccr1/den
-
-                   ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
-                   ! The diagonals of the vorticity tensor components are always zero
-                   Wxy = u_y - v_x
-                   Wxz = u_z - w_x
-                   Wyz = v_z - w_y
-                   Wyx = -Wxy
-                   Wzx = -Wxz
-                   Wzy = -Wyz
-
-                   ! Compute the extra terms of the Boussinesq relation
-                   exx = fact*(Wxy*tauxyS + Wxz*tauxzS)*two
-                   eyy = fact*(Wyx*tauxyS + Wyz*tauyzS)*two
-                   ezz = fact*(Wzx*tauxzS + Wzy*tauyzS)*two
-
-                   exy = fact*(Wxy*tauyyS + Wxz*tauyzS + &
-                               Wyx*tauxxS + Wyz*tauxzS)
-                   exz = fact*(Wxy*tauyzS + Wxz*tauzzS + &
-                               Wzx*tauxxS + Wzy*tauxyS)
-                   eyz = fact*(Wyx*tauxzS + Wyz*tauzzS + &
-                               Wzx*tauxyS + Wzy*tauyyS)
-
-                   ! Apply the total viscosity to the stress tensor and add extra terms
-                   tauxx = mut*tauxxS - exx
-                   tauyy = mut*tauyyS - eyy
-                   tauzz = mut*tauzzS - ezz
-                   tauxy = mut*tauxyS - exy
-                   tauxz = mut*tauxzS - exz
-                   tauyz = mut*tauyzS - eyz
-
-                else
-
-                   ! Just apply the total viscosity to the stress tensor
-                   tauxx = mut*tauxxS
-                   tauyy = mut*tauyyS
-                   tauzz = mut*tauzzS
-                   tauxy = mut*tauxyS
-                   tauxz = mut*tauxzS
-                   tauyz = mut*tauyzS
-
-                end if
-
-                ! Compute the average velocities for the face. Remember that
-                ! the velocities are stored and not the momentum.
-
-                ubar = half*(w(i,j,k,ivx) + w(i+1,j,k,ivx))
-                vbar = half*(w(i,j,k,ivy) + w(i+1,j,k,ivy))
-                wbar = half*(w(i,j,k,ivz) + w(i+1,j,k,ivz))
-
-                ! Compute the viscous fluxes for this i-face.
-
-                fmx   = tauxx*si(i,j,k,1) + tauxy*si(i,j,k,2) &
-                     + tauxz*si(i,j,k,3)
-                fmy   = tauxy*si(i,j,k,1) + tauyy*si(i,j,k,2) &
-                     + tauyz*si(i,j,k,3)
-                fmz   = tauxz*si(i,j,k,1) + tauyz*si(i,j,k,2) &
-                     + tauzz*si(i,j,k,3)
-                frhoE = (ubar*tauxx + vbar*tauxy + wbar*tauxz)*si(i,j,k,1) &
-                     + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*si(i,j,k,2) &
-                     + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*si(i,j,k,3) &
-                     - q_x*si(i,j,k,1) - q_y*si(i,j,k,2) - q_z*si(i,j,k,3)
-
-                ! Update the residuals of cell i and i+1.
-
-                fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-                fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-                fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-                fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-                fw(i+1,j,k,imx)   = fw(i+1,j,k,imx)   + fmx
-                fw(i+1,j,k,imy)   = fw(i+1,j,k,imy)   + fmy
-                fw(i+1,j,k,imz)   = fw(i+1,j,k,imz)   + fmz
-                fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + frhoE
-
-                ! Store the stress tensor and the heat flux vector if this
-                ! face is part of a viscous subface. Both the cases i == 1
-                ! and i == il must be tested.
-
-                if(i == 1 .and. storeWallTensor .and. &
-                     viscIminPointer(j,k) > 0) then
-                   ! We need to index viscSubface with viscIminPointer(j,k)
-                   ! since Tapenade does not like temporary indexes
-
-                   viscSubface(viscIminPointer(j,k))%tau(j,k,1) = tauxx
-                   viscSubface(viscIminPointer(j,k))%tau(j,k,2) = tauyy
-                   viscSubface(viscIminPointer(j,k))%tau(j,k,3) = tauzz
-                   viscSubface(viscIminPointer(j,k))%tau(j,k,4) = tauxy
-                   viscSubface(viscIminPointer(j,k))%tau(j,k,5) = tauxz
-                   viscSubface(viscIminPointer(j,k))%tau(j,k,6) = tauyz
-
-                   viscSubface(viscIminPointer(j,k))%q(j,k,1) = q_x
-                   viscSubface(viscIminPointer(j,k))%q(j,k,2) = q_y
-                   viscSubface(viscIminPointer(j,k))%q(j,k,3) = q_z
-                endif
-
-                ! And the i == il case.
-
-                if(i == il .and. storeWallTensor .and. &
-                     viscImaxPointer(j,k) > 0) then
-                   ! We need to index viscSubface with viscImaxPointer(j,k)
-                   ! since Tapenade does not like temporary indexes
-
-                   viscSubface(viscImaxPointer(j,k))%tau(j,k,1) = tauxx
-                   viscSubface(viscImaxPointer(j,k))%tau(j,k,2) = tauyy
-                   viscSubface(viscImaxPointer(j,k))%tau(j,k,3) = tauzz
-                   viscSubface(viscImaxPointer(j,k))%tau(j,k,4) = tauxy
-                   viscSubface(viscImaxPointer(j,k))%tau(j,k,5) = tauxz
-                   viscSubface(viscImaxPointer(j,k))%tau(j,k,6) = tauyz
-
-                   viscSubface(viscImaxPointer(j,k))%q(j,k,1) = q_x
-                   viscSubface(viscImaxPointer(j,k))%q(j,k,2) = q_y
-                   viscSubface(viscImaxPointer(j,k))%q(j,k,3) = q_z
-                endif
+                        ! Set the value of the porosity. If not zero, it is set
+                        ! to average the eddy-viscosity and to take the factor
+                        ! rFilv into account.
+
+                        por = half * rFilv
+                        if (porI(i, j, k) == noFlux) por = zero
+
+                        ! Compute the laminar and (if present) the eddy viscosities
+                        ! multiplied the porosity. Compute the factor in front of
+                        ! the gradients of the speed of sound squared for the heat
+                        ! flux.
+
+                        mul = por * (rlv(i, j, k) + rlv(i + 1, j, k))
+                        if (eddyModel) mue = por * (rev(i, j, k) + rev(i + 1, j, k))
+                        mut = mul + mue
+
+                        gm1 = half * (gamma(i, j, k) + gamma(i + 1, j, k)) - one
+                        factLamHeat = one / (prandtl * gm1)
+                        factTurbHeat = one / (prandtlTurb * gm1)
+
+                        heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                        ! Compute the gradients at the face by averaging the four
+                        ! nodal values.
+
+                        u_x = fourth * (ux(i, j - 1, k - 1) + ux(i, j, k - 1) &
+                                        + ux(i, j - 1, k) + ux(i, j, k))
+                        u_y = fourth * (uy(i, j - 1, k - 1) + uy(i, j, k - 1) &
+                                        + uy(i, j - 1, k) + uy(i, j, k))
+                        u_z = fourth * (uz(i, j - 1, k - 1) + uz(i, j, k - 1) &
+                                        + uz(i, j - 1, k) + uz(i, j, k))
+
+                        v_x = fourth * (vx(i, j - 1, k - 1) + vx(i, j, k - 1) &
+                                        + vx(i, j - 1, k) + vx(i, j, k))
+                        v_y = fourth * (vy(i, j - 1, k - 1) + vy(i, j, k - 1) &
+                                        + vy(i, j - 1, k) + vy(i, j, k))
+                        v_z = fourth * (vz(i, j - 1, k - 1) + vz(i, j, k - 1) &
+                                        + vz(i, j - 1, k) + vz(i, j, k))
+
+                        w_x = fourth * (wx(i, j - 1, k - 1) + wx(i, j, k - 1) &
+                                        + wx(i, j - 1, k) + wx(i, j, k))
+                        w_y = fourth * (wy(i, j - 1, k - 1) + wy(i, j, k - 1) &
+                                        + wy(i, j - 1, k) + wy(i, j, k))
+                        w_z = fourth * (wz(i, j - 1, k - 1) + wz(i, j, k - 1) &
+                                        + wz(i, j - 1, k) + wz(i, j, k))
+
+                        q_x = fourth * (qx(i, j - 1, k - 1) + qx(i, j, k - 1) &
+                                        + qx(i, j - 1, k) + qx(i, j, k))
+                        q_y = fourth * (qy(i, j - 1, k - 1) + qy(i, j, k - 1) &
+                                        + qy(i, j - 1, k) + qy(i, j, k))
+                        q_z = fourth * (qz(i, j - 1, k - 1) + qz(i, j, k - 1) &
+                                        + qz(i, j - 1, k) + qz(i, j, k))
+
+                        ! The gradients in the normal direction are corrected, such
+                        ! that no averaging takes places here.
+                        ! First determine the vector in the direction from the
+                        ! cell center i to cell center i+1.
+
+                        ssx = eighth * (x(i + 1, j - 1, k - 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                        + x(i + 1, j - 1, k, 1) - x(i - 1, j - 1, k, 1) &
+                                        + x(i + 1, j, k - 1, 1) - x(i - 1, j, k - 1, 1) &
+                                        + x(i + 1, j, k, 1) - x(i - 1, j, k, 1))
+                        ssy = eighth * (x(i + 1, j - 1, k - 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                        + x(i + 1, j - 1, k, 2) - x(i - 1, j - 1, k, 2) &
+                                        + x(i + 1, j, k - 1, 2) - x(i - 1, j, k - 1, 2) &
+                                        + x(i + 1, j, k, 2) - x(i - 1, j, k, 2))
+                        ssz = eighth * (x(i + 1, j - 1, k - 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                        + x(i + 1, j - 1, k, 3) - x(i - 1, j - 1, k, 3) &
+                                        + x(i + 1, j, k - 1, 3) - x(i - 1, j, k - 1, 3) &
+                                        + x(i + 1, j, k, 3) - x(i - 1, j, k, 3))
+
+                        ! Determine the length of this vector and create the
+                        ! unit normal.
+
+                        ss = one / sqrt(ssx * ssx + ssy * ssy + ssz * ssz)
+                        ssx = ss * ssx
+                        ssy = ss * ssy
+                        ssz = ss * ssz
+
+                        ! Correct the gradients.
+
+                        corr = u_x * ssx + u_y * ssy + u_z * ssz &
+                               - (w(i + 1, j, k, ivx) - w(i, j, k, ivx)) * ss
+                        u_x = u_x - corr * ssx
+                        u_y = u_y - corr * ssy
+                        u_z = u_z - corr * ssz
+
+                        corr = v_x * ssx + v_y * ssy + v_z * ssz &
+                               - (w(i + 1, j, k, ivy) - w(i, j, k, ivy)) * ss
+                        v_x = v_x - corr * ssx
+                        v_y = v_y - corr * ssy
+                        v_z = v_z - corr * ssz
+
+                        corr = w_x * ssx + w_y * ssy + w_z * ssz &
+                               - (w(i + 1, j, k, ivz) - w(i, j, k, ivz)) * ss
+                        w_x = w_x - corr * ssx
+                        w_y = w_y - corr * ssy
+                        w_z = w_z - corr * ssz
+
+                        corr = q_x * ssx + q_y * ssy + q_z * ssz &
+                               + (aa(i + 1, j, k) - aa(i, j, k)) * ss
+                        q_x = q_x - corr * ssx
+                        q_y = q_y - corr * ssy
+                        q_z = q_z - corr * ssz
+
+                        ! Compute the stress tensor and the heat flux vector.
+                        ! We remove the viscosity from the stress tensor (tau)
+                        ! to define tauS since we still need to separate between
+                        ! laminar and turbulent stress for QCR.
+                        ! Therefore, laminar tau = mue*tauS, turbulent
+                        ! tau = mue*tauS, and total tau = mut*tauS.
+
+                        fracDiv = twoThird * (u_x + v_y + w_z)
+
+                        tauxxS = two * u_x - fracDiv
+                        tauyyS = two * v_y - fracDiv
+                        tauzzS = two * w_z - fracDiv
+
+                        tauxyS = u_y + v_x
+                        tauxzS = u_z + w_x
+                        tauyzS = v_z + w_y
+
+                        q_x = heatCoef * q_x
+                        q_y = heatCoef * q_y
+                        q_z = heatCoef * q_z
+
+                        ! Add QCR corrections if necessary
+                        if (useQCR) then
+
+                            ! In the QCR formulation, we add an extra term to the turbulent stress tensor:
+                            !
+                            ! tau_ij,QCR = tau_ij - e_ij
+                            !
+                            ! where, according to TMR website (http://turbmodels.larc.nasa.gov/spalart.html):
+                            !
+                            ! e_ij = Ccr1*(O_ik*tau_jk + O_jk*tau_ik)
+                            !
+                            ! We are computing O_ik as follows:
+                            !
+                            ! O_ik = 2*W_ik/den
+                            !
+                            ! Remember that the tau_ij in e_ij should use only the eddy viscosity!
+
+                            ! Compute denominator
+                            den = sqrt(u_x * u_x + u_y * u_y + u_z * u_z + &
+                                       v_x * v_x + v_y * v_y + v_z * v_z + &
+                                       w_x * w_x + w_y * w_y + w_z * w_z)
+
+                            ! Denominator should be limited to avoid division by zero in regions with
+                            ! no gradients
+                            den = max(den, xminn)
+
+                            ! Compute factor that will multiply all tensor components.
+                            ! Here we add the eddy viscosity that should multiply the stress tensor (tau)
+                            ! components as well.
+                            fact = mue * Ccr1 / den
+
+                            ! Compute off-diagonal terms of vorticity tensor (we will ommit the 1/2)
+                            ! The diagonals of the vorticity tensor components are always zero
+                            Wxy = u_y - v_x
+                            Wxz = u_z - w_x
+                            Wyz = v_z - w_y
+                            Wyx = -Wxy
+                            Wzx = -Wxz
+                            Wzy = -Wyz
+
+                            ! Compute the extra terms of the Boussinesq relation
+                            exx = fact * (Wxy * tauxyS + Wxz * tauxzS) * two
+                            eyy = fact * (Wyx * tauxyS + Wyz * tauyzS) * two
+                            ezz = fact * (Wzx * tauxzS + Wzy * tauyzS) * two
+
+                            exy = fact * (Wxy * tauyyS + Wxz * tauyzS + &
+                                          Wyx * tauxxS + Wyz * tauxzS)
+                            exz = fact * (Wxy * tauyzS + Wxz * tauzzS + &
+                                          Wzx * tauxxS + Wzy * tauxyS)
+                            eyz = fact * (Wyx * tauxzS + Wyz * tauzzS + &
+                                          Wzx * tauxyS + Wzy * tauyyS)
+
+                            ! Apply the total viscosity to the stress tensor and add extra terms
+                            tauxx = mut * tauxxS - exx
+                            tauyy = mut * tauyyS - eyy
+                            tauzz = mut * tauzzS - ezz
+                            tauxy = mut * tauxyS - exy
+                            tauxz = mut * tauxzS - exz
+                            tauyz = mut * tauyzS - eyz
+
+                        else
+
+                            ! Just apply the total viscosity to the stress tensor
+                            tauxx = mut * tauxxS
+                            tauyy = mut * tauyyS
+                            tauzz = mut * tauzzS
+                            tauxy = mut * tauxyS
+                            tauxz = mut * tauxzS
+                            tauyz = mut * tauyzS
+
+                        end if
+
+                        ! Compute the average velocities for the face. Remember that
+                        ! the velocities are stored and not the momentum.
+
+                        ubar = half * (w(i, j, k, ivx) + w(i + 1, j, k, ivx))
+                        vbar = half * (w(i, j, k, ivy) + w(i + 1, j, k, ivy))
+                        wbar = half * (w(i, j, k, ivz) + w(i + 1, j, k, ivz))
+
+                        ! Compute the viscous fluxes for this i-face.
+
+                        fmx = tauxx * si(i, j, k, 1) + tauxy * si(i, j, k, 2) &
+                              + tauxz * si(i, j, k, 3)
+                        fmy = tauxy * si(i, j, k, 1) + tauyy * si(i, j, k, 2) &
+                              + tauyz * si(i, j, k, 3)
+                        fmz = tauxz * si(i, j, k, 1) + tauyz * si(i, j, k, 2) &
+                              + tauzz * si(i, j, k, 3)
+                        frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * si(i, j, k, 1) &
+                                + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * si(i, j, k, 2) &
+                                + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * si(i, j, k, 3) &
+                                - q_x * si(i, j, k, 1) - q_y * si(i, j, k, 2) - q_z * si(i, j, k, 3)
+
+                        ! Update the residuals of cell i and i+1.
+
+                        fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                        fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                        fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                        fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                        fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fmx
+                        fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fmy
+                        fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fmz
+                        fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + frhoE
+
+                        ! Store the stress tensor and the heat flux vector if this
+                        ! face is part of a viscous subface. Both the cases i == 1
+                        ! and i == il must be tested.
+
+                        if (i == 1 .and. storeWallTensor .and. &
+                            viscIminPointer(j, k) > 0) then
+                            ! We need to index viscSubface with viscIminPointer(j,k)
+                            ! since Tapenade does not like temporary indexes
+
+                            viscSubface(viscIminPointer(j, k))%tau(j, k, 1) = tauxx
+                            viscSubface(viscIminPointer(j, k))%tau(j, k, 2) = tauyy
+                            viscSubface(viscIminPointer(j, k))%tau(j, k, 3) = tauzz
+                            viscSubface(viscIminPointer(j, k))%tau(j, k, 4) = tauxy
+                            viscSubface(viscIminPointer(j, k))%tau(j, k, 5) = tauxz
+                            viscSubface(viscIminPointer(j, k))%tau(j, k, 6) = tauyz
+
+                            viscSubface(viscIminPointer(j, k))%q(j, k, 1) = q_x
+                            viscSubface(viscIminPointer(j, k))%q(j, k, 2) = q_y
+                            viscSubface(viscIminPointer(j, k))%q(j, k, 3) = q_z
+                        end if
+
+                        ! And the i == il case.
+
+                        if (i == il .and. storeWallTensor .and. &
+                            viscImaxPointer(j, k) > 0) then
+                            ! We need to index viscSubface with viscImaxPointer(j,k)
+                            ! since Tapenade does not like temporary indexes
+
+                            viscSubface(viscImaxPointer(j, k))%tau(j, k, 1) = tauxx
+                            viscSubface(viscImaxPointer(j, k))%tau(j, k, 2) = tauyy
+                            viscSubface(viscImaxPointer(j, k))%tau(j, k, 3) = tauzz
+                            viscSubface(viscImaxPointer(j, k))%tau(j, k, 4) = tauxy
+                            viscSubface(viscImaxPointer(j, k))%tau(j, k, 5) = tauxz
+                            viscSubface(viscImaxPointer(j, k))%tau(j, k, 6) = tauyz
+
+                            viscSubface(viscImaxPointer(j, k))%q(j, k, 1) = q_x
+                            viscSubface(viscImaxPointer(j, k))%q(j, k, 2) = q_y
+                            viscSubface(viscImaxPointer(j, k))%q(j, k, 3) = q_z
+                        end if
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-    !$AD CHECKPOINT-END
-    continue
-    ! Possibly correct the wall shear stress.
-    ! Wall function is not ADed
+        !$AD CHECKPOINT-END
+        continue
+        ! Possibly correct the wall shear stress.
+        ! Wall function is not ADed
 #ifndef USE_TAPENADE
-    call utauWF(rFilv)
+        call utauWF(rFilv)
 #endif
-  end subroutine viscousFlux
-
-  subroutine viscousFluxApprox
-    use constants
-    use blockPointers
-    use flowVarRefState
-    use inputPhysics
-    use iteration
-    implicit none
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: twoThird = two*third
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-    integer(kind=intType) :: ii, jj, kk
-
-    real(kind=realType) :: rFilv, por, mul, mue, mut, heatCoef
-    real(kind=realType) :: gm1, factLamHeat, factTurbHeat
-    real(kind=realType) :: u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z
-    real(kind=realType) :: q_x, q_y, q_z, ubar, vbar, wbar
-    real(kind=realType) :: corr, ssx, ssy, ssz, ss, fracDiv
-    real(kind=realType) :: tauxx, tauyy, tauzz
-    real(kind=realType) :: tauxy, tauxz, tauyz
-    real(kind=realType) :: fmx, fmy, fmz, frhoE
-    real(kind=realType) :: dd
-    logical :: correctForK
-
-    mue = zero
-    rFilv = rFil
-
-    ! Viscous fluxes in the I-direction
-
-    do k=2,kl
-       do j=2,jl
-          do i=1,il
-
-             ! Compute the vector from the center of cell i to cell i+1
-             ssx = eighth*(x(i+1,j-1,k-1,1) - x(i-1,j-1,k-1,1) &
-                  +         x(i+1,j-1,k,  1) - x(i-1,j-1,k,  1) &
-                  +         x(i+1,j,  k-1,1) - x(i-1,j,  k-1,1) &
-                  +         x(i+1,j,  k,  1) - x(i-1,j,  k,  1))
-             ssy = eighth*(x(i+1,j-1,k-1,2) - x(i-1,j-1,k-1,2) &
-                  +         x(i+1,j-1,k,  2) - x(i-1,j-1,k,  2) &
-                  +         x(i+1,j,  k-1,2) - x(i-1,j,  k-1,2) &
-                  +         x(i+1,j,  k,  2) - x(i-1,j,  k,  2))
-             ssz = eighth*(x(i+1,j-1,k-1,3) - x(i-1,j-1,k-1,3) &
-                  +         x(i+1,j-1,k,  3) - x(i-1,j-1,k,  3) &
-                  +         x(i+1,j,  k-1,3) - x(i-1,j,  k-1,3) &
-                  +         x(i+1,j,  k,  3) - x(i-1,j,  k,  3))
-
-             ! And determine one/ length of vector squared
-             ss  = one/(ssx*ssx + ssy*ssy + ssz*ssz)
-             ssx = ss*ssx
-             ssy = ss*ssy
-             ssz = ss*ssz
-
-             ! Now compute each gradient
-             dd = w(i+1, j, k, ivx)-w(i, j, k, ivx)
-             u_x = dd*ssx
-             u_y = dd*ssy
-             u_z = dd*ssz
-
-             dd = w(i+1, j, k, ivy)-w(i, j, k, ivy)
-             v_x = dd*ssx
-             v_y = dd*ssy
-             v_z = dd*ssz
-
-             dd = w(i+1, j, k, ivz)-w(i, j, k, ivz)
-             w_x = dd*ssx
-             w_y = dd*ssy
-             w_z = dd*ssz
-
-             dd = aa(i+1, j, k)-aa(i, j, k)
-             q_x = -dd*ssx
-             q_y = -dd*ssy
-             q_z = -dd*ssz
-
-             por = half*rFilv
-             if(porI(i,j,k) == noFlux) por = zero
-
-             ! Compute the laminar and (if present) the eddy viscosities
-             ! multiplied by the porosity. Compute the factor in front of
-             ! the gradients of the speed of sound squared for the heat
-             ! flux.
-
-             mul = por*(rlv(i,j,k) + rlv(i+1,j,k))
-             if( eddyModel ) mue = por*(rev(i,j,k) + rev(i+1,j,k))
-             mut = mul + mue
-
-             gm1          = half*(gamma(i,j,k) +gamma(i+1,j,k))- one
-             factLamHeat  = one/(prandtl*gm1)
-             factTurbHeat = one/(prandtlTurb*gm1)
-
-             heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-             ! Compute the stress tensor and the heat flux vector.
-
-             fracDiv = twoThird*(u_x + v_y + w_z)
-
-             tauxx = mut*(two*u_x - fracDiv)
-             tauyy = mut*(two*v_y - fracDiv)
-             tauzz = mut*(two*w_z - fracDiv)
-
-             tauxy = mut*(u_y + v_x)
-             tauxz = mut*(u_z + w_x)
-             tauyz = mut*(v_z + w_y)
-
-             q_x = heatCoef*q_x
-             q_y = heatCoef*q_y
-             q_z = heatCoef*q_z
-
-             ! Compute the average velocities for the face. Remember that
-             ! the velocities are stored and not the momentum.
-
-             ubar = half*(w(i,j,k,ivx) + w(i+1,j,k,ivx))
-             vbar = half*(w(i,j,k,ivy) + w(i+1,j,k,ivy))
-             wbar = half*(w(i,j,k,ivz) + w(i+1,j,k,ivz))
-
-             ! Compute the viscous fluxes for this i-face.
-
-             fmx   = tauxx*si(i,j,k,1) + tauxy*si(i,j,k,2) + tauxz*si(i,j,k,3)
-             fmy   = tauxy*si(i,j,k,1) + tauyy*si(i,j,k,2) + tauyz*si(i,j,k,3)
-             fmz   = tauxz*si(i,j,k,1) + tauyz*si(i,j,k,2) + tauzz*si(i,j,k,3)
-             frhoE = (ubar*tauxx + vbar*tauxy + wbar*tauxz)*si(i,j,k,1) &
-                  + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*si(i,j,k,2) &
-                  + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*si(i,j,k,3) &
-                  - q_x*si(i,j,k,1) - q_y*si(i,j,k,2) - q_z*si(i,j,k,3)
-
-             ! Update the residuals of cell i and i+1.
-
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-             fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-             fw(i+1,j,k,imx)   = fw(i+1,j,k,imx)   + fmx
-             fw(i+1,j,k,imy)   = fw(i+1,j,k,imy)   + fmy
-             fw(i+1,j,k,imz)   = fw(i+1,j,k,imz)   + fmz
-             fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + frhoE
-
-          end do
-       end do
-    end do
-
-    ! Viscous fluxes in the J-direction
-
-    do k=2,kl
-       do j=1,jl
-          do i=2,il
-
-             ! Compute the vector from the center of cell j to cell j+1
-             ssx = eighth*(x(i-1,j+1,k-1,1) - x(i-1,j-1,k-1,1) &
-                  +         x(i-1,j+1,k,  1) - x(i-1,j-1,k,  1) &
-                  +         x(i,  j+1,k-1,1) - x(i,  j-1,k-1,1) &
-                  +         x(i,  j+1,k,  1) - x(i,  j-1,k,  1))
-             ssy = eighth*(x(i-1,j+1,k-1,2) - x(i-1,j-1,k-1,2) &
-                  +         x(i-1,j+1,k,  2) - x(i-1,j-1,k,  2) &
-                  +         x(i,  j+1,k-1,2) - x(i,  j-1,k-1,2) &
-                  +         x(i,  j+1,k,  2) - x(i,  j-1,k,  2))
-             ssz = eighth*(x(i-1,j+1,k-1,3) - x(i-1,j-1,k-1,3) &
-                  +         x(i-1,j+1,k,  3) - x(i-1,j-1,k,  3) &
-                  +         x(i,  j+1,k-1,3) - x(i,  j-1,k-1,3) &
-                  +         x(i,  j+1,k,  3) - x(i,  j-1,k,  3))
-
-             ! And determine one/ length of vector squared
-             ss  = one/(ssx*ssx + ssy*ssy + ssz*ssz)
-             ssx = ss*ssx
-             ssy = ss*ssy
-             ssz = ss*ssz
-
-             ! Now compute each gradient
-             dd = w(i, j+1, k, ivx)-w(i, j, k, ivx)
-             u_x = dd*ssx
-             u_y = dd*ssy
-             u_z = dd*ssz
-
-             dd = w(i, j+1, k, ivy)-w(i, j, k, ivy)
-             v_x = dd*ssx
-             v_y = dd*ssy
-             v_z = dd*ssz
-
-             dd = w(i, j+1, k, ivz)-w(i, j, k, ivz)
-             w_x = dd*ssx
-             w_y = dd*ssy
-             w_z = dd*ssz
-
-             dd = aa(i, j+1, k)-aa(i, j, k)
-             q_x = -dd*ssx
-             q_y = -dd*ssy
-             q_z = -dd*ssz
-
-             por = half*rFilv
-             if(porJ(i,j,k) == noFlux) por = zero
-
-             ! Compute the laminar and (if present) the eddy viscosities
-             ! multiplied by the porosity. Compute the factor in front of
-             ! the gradients of the speed of sound squared for the heat
-             ! flux.
-
-             mul = por*(rlv(i,j,k) + rlv(i,j+1,k))
-             if( eddyModel ) mue = por*(rev(i,j,k) + rev(i,j+1,k))
-             mut = mul + mue
-
-             gm1          = half*(gamma(i,j,k) + gamma(i,j+1,k)) - one
-             factLamHeat  = one/(prandtl*gm1)
-             factTurbHeat = one/(prandtlTurb*gm1)
-
-             heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-             ! Compute the stress tensor and the heat flux vector.
-
-             fracDiv = twoThird*(u_x + v_y + w_z)
-
-             tauxx = mut*(two*u_x - fracDiv)
-             tauyy = mut*(two*v_y - fracDiv)
-             tauzz = mut*(two*w_z - fracDiv)
-
-             tauxy = mut*(u_y + v_x)
-             tauxz = mut*(u_z + w_x)
-             tauyz = mut*(v_z + w_y)
-
-             q_x = heatCoef*q_x
-             q_y = heatCoef*q_y
-             q_z = heatCoef*q_z
-
-             ! Compute the average velocities for the face. Remember that
-             ! the velocities are stored and not the momentum.
-
-             ubar = half*(w(i,j,k,ivx) + w(i,j+1,k,ivx))
-             vbar = half*(w(i,j,k,ivy) + w(i,j+1,k,ivy))
-             wbar = half*(w(i,j,k,ivz) + w(i,j+1,k,ivz))
-
-             ! Compute the viscous fluxes for this j-face.
-
-             fmx   = tauxx*sj(i,j,k,1) + tauxy*sj(i,j,k,2) + tauxz*sj(i,j,k,3)
-             fmy   = tauxy*sj(i,j,k,1) + tauyy*sj(i,j,k,2) + tauyz*sj(i,j,k,3)
-             fmz   = tauxz*sj(i,j,k,1) + tauyz*sj(i,j,k,2) + tauzz*sj(i,j,k,3)
-             frhoE = (ubar*tauxx + vbar*tauxy + wbar*tauxz)*sj(i,j,k,1) &
-                  + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*sj(i,j,k,2) &
-                  + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*sj(i,j,k,3) &
-                  - q_x*sj(i,j,k,1) - q_y*sj(i,j,k,2) - q_z*sj(i,j,k,3)
-
-             ! Update the residuals of cell j and j+1.
-
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-             fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-             fw(i,j+1,k,imx)   = fw(i,j+1,k,imx)   + fmx
-             fw(i,j+1,k,imy)   = fw(i,j+1,k,imy)   + fmy
-             fw(i,j+1,k,imz)   = fw(i,j+1,k,imz)   + fmz
-             fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + frhoE
-
-          end do
-       end do
-    end do
-
-    ! Viscous fluxes in the K-direction
-
-    do k=1,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the vector from the center of cell k to cell k+1
-             ssx = eighth*(x(i-1,j-1,k+1,1) - x(i-1,j-1,k-1,1) &
-                  +         x(i-1,j,  k+1,1) - x(i-1,j,  k-1,1) &
-                  +         x(i,  j-1,k+1,1) - x(i,  j-1,k-1,1) &
-                  +         x(i,  j,  k+1,1) - x(i,  j,  k-1,1))
-             ssy = eighth*(x(i-1,j-1,k+1,2) - x(i-1,j-1,k-1,2) &
-                  +         x(i-1,j,  k+1,2) - x(i-1,j,  k-1,2) &
-                  +         x(i,  j-1,k+1,2) - x(i,  j-1,k-1,2) &
-                  +         x(i,  j,  k+1,2) - x(i,  j,  k-1,2))
-             ssz = eighth*(x(i-1,j-1,k+1,3) - x(i-1,j-1,k-1,3) &
-                  +         x(i-1,j,  k+1,3) - x(i-1,j,  k-1,3) &
-                  +         x(i,  j-1,k+1,3) - x(i,  j-1,k-1,3) &
-                  +         x(i,  j,  k+1,3) - x(i,  j,  k-1,3))
-             ! And determine one/ length of vector squared
-             ss  = one/(ssx*ssx + ssy*ssy + ssz*ssz)
-             ssx = ss*ssx
-             ssy = ss*ssy
-             ssz = ss*ssz
-
-             ! Now compute each gradient
-             dd = w(i, j, k+1, ivx)-w(i, j, k, ivx)
-             u_x = dd*ssx
-             u_y = dd*ssy
-             u_z = dd*ssz
-
-             dd = w(i, j, k+1, ivy)-w(i, j, k, ivy)
-             v_x = dd*ssx
-             v_y = dd*ssy
-             v_z = dd*ssz
-
-             dd = w(i, j, k+1, ivz)-w(i, j, k, ivz)
-             w_x = dd*ssx
-             w_y = dd*ssy
-             w_z = dd*ssz
-
-             dd = aa(i, j, k+1)-aa(i, j, k)
-             q_x = -dd*ssx
-             q_y = -dd*ssy
-             q_z = -dd*ssz
-
-             por = half*rFilv
-             if(porK(i,j,k) == noFlux) por = zero
-
-             ! Compute the laminar and (if present) the eddy viscosities
-             ! multiplied by the porosity. Compute the factor in front of
-             ! the gradients of the speed of sound squared for the heat
-             ! flux.
-
-             mul = por*(rlv(i,j,k) + rlv(i,j,k+1))
-             if( eddyModel ) mue = por*(rev(i,j,k) + rev(i,j,k+1))
-             mut = mul + mue
-
-             gm1          = half*(gamma(i,j,k) + gamma(i,j,k+1)) - one
-             factLamHeat  = one/(prandtl*gm1)
-             factTurbHeat = one/(prandtlTurb*gm1)
-
-             heatCoef = mul*factLamHeat + mue*factTurbHeat
-
-             ! Compute the stress tensor and the heat flux vector.
-
-             fracDiv = twoThird*(u_x + v_y + w_z)
-
-             tauxx = mut*(two*u_x - fracDiv)
-             tauyy = mut*(two*v_y - fracDiv)
-             tauzz = mut*(two*w_z - fracDiv)
-
-             tauxy = mut*(u_y + v_x)
-             tauxz = mut*(u_z + w_x)
-             tauyz = mut*(v_z + w_y)
-
-             q_x = heatCoef*q_x
-             q_y = heatCoef*q_y
-             q_z = heatCoef*q_z
-
-             ! Compute the average velocities for the face. Remember that
-             ! the velocities are stored and not the momentum.
-
-             ubar = half*(w(i,j,k,ivx) + w(i,j,k+1,ivx))
-             vbar = half*(w(i,j,k,ivy) + w(i,j,k+1,ivy))
-             wbar = half*(w(i,j,k,ivz) + w(i,j,k+1,ivz))
-
-             ! Compute the viscous fluxes for this j-face.
-
-             fmx   = tauxx*sk(i,j,k,1) + tauxy*sk(i,j,k,2) + tauxz*sk(i,j,k,3)
-             fmy   = tauxy*sk(i,j,k,1) + tauyy*sk(i,j,k,2) + tauyz*sk(i,j,k,3)
-             fmz   = tauxz*sk(i,j,k,1) + tauyz*sk(i,j,k,2) + tauzz*sk(i,j,k,3)
-             frhoE = (ubar*tauxx + vbar*tauxy + wbar*tauxz)*sk(i,j,k,1) &
-                  + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*sk(i,j,k,2) &
-                  + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*sk(i,j,k,3) &
-                  - q_x*sk(i,j,k,1) - q_y*sk(i,j,k,2) - q_z*sk(i,j,k,3)
-
-             ! Update the residuals of cell j and j+1.
-
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fmx
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fmy
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fmz
-             fw(i,j,k,irhoE) = fw(i,j,k,irhoE) - frhoE
-
-             fw(i,j,k+1,imx)   = fw(i,j,k+1,imx)   + fmx
-             fw(i,j,k+1,imy)   = fw(i,j,k+1,imy)   + fmy
-             fw(i,j,k+1,imz)   = fw(i,j,k+1,imz)   + fmz
-             fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + frhoE
-
-          end do
-       end do
-    end do
-
-  end subroutine viscousFluxApprox
-
-  subroutine inviscidDissFluxScalarApprox
-    !
-    !       inviscidDissFluxScalar computes the scalar artificial
-    !       dissipation, see AIAA paper 81-1259, for a given block.
-    !       Therefore it is assumed that the pointers in  blockPointers
-    !       already point to the correct block.
-    !
-    use blockPointers
-    use cgnsGrid
-    use constants
-    use flowVarRefState
-    use inputDiscretization
-    use inputPhysics
-    use iteration
-    implicit none
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: dssMax = 0.25_realType
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ind
-
-    real(kind=realType) :: sslim, rhoi
-    real(kind=realType) :: sfil, fis2, fis4
-    real(kind=realType) :: ppor, rrad, dis2
-    real(kind=realType) :: dss1, dss2, ddw, fs
-
-
-    ! Check if rFil == 0. If so, the dissipative flux needs not to
-    ! be computed.
-
-    if(abs(rFil) < thresholdReal) return
-
-    ! Determine the variables used to compute the switch.
-    ! For the inviscid case this is the pressure; for the viscous
-    ! case it is the entropy.
-
-    select case (equations)
-    case (EulerEquations)
-
-       ! Inviscid case. Pressure switch is based on the pressure.
-       ! Also set the value of sslim. To be fully consistent this
-       ! must have the dimension of pressure and it is therefore
-       ! set to a fraction of the free stream value.
-
-       sslim = 0.001_realType*pInfCorr
-
-
-       !===============================================================
-
-    case (NSEquations, RANSEquations)
-
-       ! Viscous case. Pressure switch is based on the entropy.
-       ! Also set the value of sslim. To be fully consistent this
-       ! must have the dimension of entropy and it is therefore
-       ! set to a fraction of the free stream value.
-
-       sslim = 0.001_realType*pInfCorr/(rhoInf**gammaInf)
-
-    end select
-
-    ! Set a couple of constants for the scheme.
-
-    fis2 = rFil*vis2
-    fis4 = rFil*vis4
-    sfil = one - rFil
-
-    ! Replace the total energy by rho times the total enthalpy.
-    ! In this way the numerical solution is total enthalpy preserving
-    ! for the steady Euler equations. Also replace the velocities by
-    ! the momentum. Only done for the entries used in the
-    ! discretization, i.e. ignore the corner halo's.
-
-    do k=0,kb
-       do j=2,jl
-          do i=2,il
-             w(i,j,k,ivx)   = w(i,j,k,irho)*w(i,j,k,ivx)
-             w(i,j,k,ivy)   = w(i,j,k,irho)*w(i,j,k,ivy)
-             w(i,j,k,ivz)   = w(i,j,k,irho)*w(i,j,k,ivz)
-             w(i,j,k,irhoE) = w(i,j,k,irhoE) + p(i,j,k)
-          enddo
-       enddo
-    enddo
-
-    do k=2,kl
-       do j=2,jl
-          w(0,j,k,ivx)   = w(0,j,k,irho)*w(0,j,k,ivx)
-          w(0,j,k,ivy)   = w(0,j,k,irho)*w(0,j,k,ivy)
-          w(0,j,k,ivz)   = w(0,j,k,irho)*w(0,j,k,ivz)
-          w(0,j,k,irhoE) = w(0,j,k,irhoE) + p(0,j,k)
-
-          w(1,j,k,ivx)   = w(1,j,k,irho)*w(1,j,k,ivx)
-          w(1,j,k,ivy)   = w(1,j,k,irho)*w(1,j,k,ivy)
-          w(1,j,k,ivz)   = w(1,j,k,irho)*w(1,j,k,ivz)
-          w(1,j,k,irhoE) = w(1,j,k,irhoE) + p(1,j,k)
-
-          w(ie,j,k,ivx)   = w(ie,j,k,irho)*w(ie,j,k,ivx)
-          w(ie,j,k,ivy)   = w(ie,j,k,irho)*w(ie,j,k,ivy)
-          w(ie,j,k,ivz)   = w(ie,j,k,irho)*w(ie,j,k,ivz)
-          w(ie,j,k,irhoE) = w(ie,j,k,irhoE) + p(ie,j,k)
-
-          w(ib,j,k,ivx)   = w(ib,j,k,irho)*w(ib,j,k,ivx)
-          w(ib,j,k,ivy)   = w(ib,j,k,irho)*w(ib,j,k,ivy)
-          w(ib,j,k,ivz)   = w(ib,j,k,irho)*w(ib,j,k,ivz)
-          w(ib,j,k,irhoE) = w(ib,j,k,irhoE) + p(ib,j,k)
-       enddo
-    enddo
-
-    do k=2,kl
-       do i=2,il
-          w(i,0,k,ivx)   = w(i,0,k,irho)*w(i,0,k,ivx)
-          w(i,0,k,ivy)   = w(i,0,k,irho)*w(i,0,k,ivy)
-          w(i,0,k,ivz)   = w(i,0,k,irho)*w(i,0,k,ivz)
-          w(i,0,k,irhoE) = w(i,0,k,irhoE) + p(i,0,k)
+    end subroutine viscousFlux
+
+    subroutine viscousFluxApprox
+        use constants
+        use blockPointers
+        use flowVarRefState
+        use inputPhysics
+        use iteration
+        implicit none
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: twoThird = two * third
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
+        integer(kind=intType) :: ii, jj, kk
+
+        real(kind=realType) :: rFilv, por, mul, mue, mut, heatCoef
+        real(kind=realType) :: gm1, factLamHeat, factTurbHeat
+        real(kind=realType) :: u_x, u_y, u_z, v_x, v_y, v_z, w_x, w_y, w_z
+        real(kind=realType) :: q_x, q_y, q_z, ubar, vbar, wbar
+        real(kind=realType) :: corr, ssx, ssy, ssz, ss, fracDiv
+        real(kind=realType) :: tauxx, tauyy, tauzz
+        real(kind=realType) :: tauxy, tauxz, tauyz
+        real(kind=realType) :: fmx, fmy, fmz, frhoE
+        real(kind=realType) :: dd
+        logical :: correctForK
+
+        mue = zero
+        rFilv = rFil
+
+        ! Viscous fluxes in the I-direction
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 1, il
+
+                    ! Compute the vector from the center of cell i to cell i+1
+                    ssx = eighth * (x(i + 1, j - 1, k - 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                    + x(i + 1, j - 1, k, 1) - x(i - 1, j - 1, k, 1) &
+                                    + x(i + 1, j, k - 1, 1) - x(i - 1, j, k - 1, 1) &
+                                    + x(i + 1, j, k, 1) - x(i - 1, j, k, 1))
+                    ssy = eighth * (x(i + 1, j - 1, k - 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                    + x(i + 1, j - 1, k, 2) - x(i - 1, j - 1, k, 2) &
+                                    + x(i + 1, j, k - 1, 2) - x(i - 1, j, k - 1, 2) &
+                                    + x(i + 1, j, k, 2) - x(i - 1, j, k, 2))
+                    ssz = eighth * (x(i + 1, j - 1, k - 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                    + x(i + 1, j - 1, k, 3) - x(i - 1, j - 1, k, 3) &
+                                    + x(i + 1, j, k - 1, 3) - x(i - 1, j, k - 1, 3) &
+                                    + x(i + 1, j, k, 3) - x(i - 1, j, k, 3))
+
+                    ! And determine one/ length of vector squared
+                    ss = one / (ssx * ssx + ssy * ssy + ssz * ssz)
+                    ssx = ss * ssx
+                    ssy = ss * ssy
+                    ssz = ss * ssz
+
+                    ! Now compute each gradient
+                    dd = w(i + 1, j, k, ivx) - w(i, j, k, ivx)
+                    u_x = dd * ssx
+                    u_y = dd * ssy
+                    u_z = dd * ssz
+
+                    dd = w(i + 1, j, k, ivy) - w(i, j, k, ivy)
+                    v_x = dd * ssx
+                    v_y = dd * ssy
+                    v_z = dd * ssz
+
+                    dd = w(i + 1, j, k, ivz) - w(i, j, k, ivz)
+                    w_x = dd * ssx
+                    w_y = dd * ssy
+                    w_z = dd * ssz
+
+                    dd = aa(i + 1, j, k) - aa(i, j, k)
+                    q_x = -dd * ssx
+                    q_y = -dd * ssy
+                    q_z = -dd * ssz
+
+                    por = half * rFilv
+                    if (porI(i, j, k) == noFlux) por = zero
+
+                    ! Compute the laminar and (if present) the eddy viscosities
+                    ! multiplied by the porosity. Compute the factor in front of
+                    ! the gradients of the speed of sound squared for the heat
+                    ! flux.
+
+                    mul = por * (rlv(i, j, k) + rlv(i + 1, j, k))
+                    if (eddyModel) mue = por * (rev(i, j, k) + rev(i + 1, j, k))
+                    mut = mul + mue
+
+                    gm1 = half * (gamma(i, j, k) + gamma(i + 1, j, k)) - one
+                    factLamHeat = one / (prandtl * gm1)
+                    factTurbHeat = one / (prandtlTurb * gm1)
+
+                    heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                    ! Compute the stress tensor and the heat flux vector.
+
+                    fracDiv = twoThird * (u_x + v_y + w_z)
+
+                    tauxx = mut * (two * u_x - fracDiv)
+                    tauyy = mut * (two * v_y - fracDiv)
+                    tauzz = mut * (two * w_z - fracDiv)
+
+                    tauxy = mut * (u_y + v_x)
+                    tauxz = mut * (u_z + w_x)
+                    tauyz = mut * (v_z + w_y)
+
+                    q_x = heatCoef * q_x
+                    q_y = heatCoef * q_y
+                    q_z = heatCoef * q_z
+
+                    ! Compute the average velocities for the face. Remember that
+                    ! the velocities are stored and not the momentum.
+
+                    ubar = half * (w(i, j, k, ivx) + w(i + 1, j, k, ivx))
+                    vbar = half * (w(i, j, k, ivy) + w(i + 1, j, k, ivy))
+                    wbar = half * (w(i, j, k, ivz) + w(i + 1, j, k, ivz))
+
+                    ! Compute the viscous fluxes for this i-face.
+
+                    fmx = tauxx * si(i, j, k, 1) + tauxy * si(i, j, k, 2) + tauxz * si(i, j, k, 3)
+                    fmy = tauxy * si(i, j, k, 1) + tauyy * si(i, j, k, 2) + tauyz * si(i, j, k, 3)
+                    fmz = tauxz * si(i, j, k, 1) + tauyz * si(i, j, k, 2) + tauzz * si(i, j, k, 3)
+                    frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * si(i, j, k, 1) &
+                            + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * si(i, j, k, 2) &
+                            + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * si(i, j, k, 3) &
+                            - q_x * si(i, j, k, 1) - q_y * si(i, j, k, 2) - q_z * si(i, j, k, 3)
+
+                    ! Update the residuals of cell i and i+1.
+
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                    fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fmx
+                    fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fmy
+                    fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fmz
+                    fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + frhoE
+
+                end do
+            end do
+        end do
+
+        ! Viscous fluxes in the J-direction
+
+        do k = 2, kl
+            do j = 1, jl
+                do i = 2, il
+
+                    ! Compute the vector from the center of cell j to cell j+1
+                    ssx = eighth * (x(i - 1, j + 1, k - 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                    + x(i - 1, j + 1, k, 1) - x(i - 1, j - 1, k, 1) &
+                                    + x(i, j + 1, k - 1, 1) - x(i, j - 1, k - 1, 1) &
+                                    + x(i, j + 1, k, 1) - x(i, j - 1, k, 1))
+                    ssy = eighth * (x(i - 1, j + 1, k - 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                    + x(i - 1, j + 1, k, 2) - x(i - 1, j - 1, k, 2) &
+                                    + x(i, j + 1, k - 1, 2) - x(i, j - 1, k - 1, 2) &
+                                    + x(i, j + 1, k, 2) - x(i, j - 1, k, 2))
+                    ssz = eighth * (x(i - 1, j + 1, k - 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                    + x(i - 1, j + 1, k, 3) - x(i - 1, j - 1, k, 3) &
+                                    + x(i, j + 1, k - 1, 3) - x(i, j - 1, k - 1, 3) &
+                                    + x(i, j + 1, k, 3) - x(i, j - 1, k, 3))
+
+                    ! And determine one/ length of vector squared
+                    ss = one / (ssx * ssx + ssy * ssy + ssz * ssz)
+                    ssx = ss * ssx
+                    ssy = ss * ssy
+                    ssz = ss * ssz
+
+                    ! Now compute each gradient
+                    dd = w(i, j + 1, k, ivx) - w(i, j, k, ivx)
+                    u_x = dd * ssx
+                    u_y = dd * ssy
+                    u_z = dd * ssz
+
+                    dd = w(i, j + 1, k, ivy) - w(i, j, k, ivy)
+                    v_x = dd * ssx
+                    v_y = dd * ssy
+                    v_z = dd * ssz
+
+                    dd = w(i, j + 1, k, ivz) - w(i, j, k, ivz)
+                    w_x = dd * ssx
+                    w_y = dd * ssy
+                    w_z = dd * ssz
+
+                    dd = aa(i, j + 1, k) - aa(i, j, k)
+                    q_x = -dd * ssx
+                    q_y = -dd * ssy
+                    q_z = -dd * ssz
+
+                    por = half * rFilv
+                    if (porJ(i, j, k) == noFlux) por = zero
+
+                    ! Compute the laminar and (if present) the eddy viscosities
+                    ! multiplied by the porosity. Compute the factor in front of
+                    ! the gradients of the speed of sound squared for the heat
+                    ! flux.
+
+                    mul = por * (rlv(i, j, k) + rlv(i, j + 1, k))
+                    if (eddyModel) mue = por * (rev(i, j, k) + rev(i, j + 1, k))
+                    mut = mul + mue
+
+                    gm1 = half * (gamma(i, j, k) + gamma(i, j + 1, k)) - one
+                    factLamHeat = one / (prandtl * gm1)
+                    factTurbHeat = one / (prandtlTurb * gm1)
+
+                    heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                    ! Compute the stress tensor and the heat flux vector.
+
+                    fracDiv = twoThird * (u_x + v_y + w_z)
+
+                    tauxx = mut * (two * u_x - fracDiv)
+                    tauyy = mut * (two * v_y - fracDiv)
+                    tauzz = mut * (two * w_z - fracDiv)
+
+                    tauxy = mut * (u_y + v_x)
+                    tauxz = mut * (u_z + w_x)
+                    tauyz = mut * (v_z + w_y)
+
+                    q_x = heatCoef * q_x
+                    q_y = heatCoef * q_y
+                    q_z = heatCoef * q_z
+
+                    ! Compute the average velocities for the face. Remember that
+                    ! the velocities are stored and not the momentum.
+
+                    ubar = half * (w(i, j, k, ivx) + w(i, j + 1, k, ivx))
+                    vbar = half * (w(i, j, k, ivy) + w(i, j + 1, k, ivy))
+                    wbar = half * (w(i, j, k, ivz) + w(i, j + 1, k, ivz))
+
+                    ! Compute the viscous fluxes for this j-face.
+
+                    fmx = tauxx * sj(i, j, k, 1) + tauxy * sj(i, j, k, 2) + tauxz * sj(i, j, k, 3)
+                    fmy = tauxy * sj(i, j, k, 1) + tauyy * sj(i, j, k, 2) + tauyz * sj(i, j, k, 3)
+                    fmz = tauxz * sj(i, j, k, 1) + tauyz * sj(i, j, k, 2) + tauzz * sj(i, j, k, 3)
+                    frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * sj(i, j, k, 1) &
+                            + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * sj(i, j, k, 2) &
+                            + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * sj(i, j, k, 3) &
+                            - q_x * sj(i, j, k, 1) - q_y * sj(i, j, k, 2) - q_z * sj(i, j, k, 3)
+
+                    ! Update the residuals of cell j and j+1.
+
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                    fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fmx
+                    fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fmy
+                    fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fmz
+                    fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + frhoE
+
+                end do
+            end do
+        end do
+
+        ! Viscous fluxes in the K-direction
+
+        do k = 1, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the vector from the center of cell k to cell k+1
+                    ssx = eighth * (x(i - 1, j - 1, k + 1, 1) - x(i - 1, j - 1, k - 1, 1) &
+                                    + x(i - 1, j, k + 1, 1) - x(i - 1, j, k - 1, 1) &
+                                    + x(i, j - 1, k + 1, 1) - x(i, j - 1, k - 1, 1) &
+                                    + x(i, j, k + 1, 1) - x(i, j, k - 1, 1))
+                    ssy = eighth * (x(i - 1, j - 1, k + 1, 2) - x(i - 1, j - 1, k - 1, 2) &
+                                    + x(i - 1, j, k + 1, 2) - x(i - 1, j, k - 1, 2) &
+                                    + x(i, j - 1, k + 1, 2) - x(i, j - 1, k - 1, 2) &
+                                    + x(i, j, k + 1, 2) - x(i, j, k - 1, 2))
+                    ssz = eighth * (x(i - 1, j - 1, k + 1, 3) - x(i - 1, j - 1, k - 1, 3) &
+                                    + x(i - 1, j, k + 1, 3) - x(i - 1, j, k - 1, 3) &
+                                    + x(i, j - 1, k + 1, 3) - x(i, j - 1, k - 1, 3) &
+                                    + x(i, j, k + 1, 3) - x(i, j, k - 1, 3))
+                    ! And determine one/ length of vector squared
+                    ss = one / (ssx * ssx + ssy * ssy + ssz * ssz)
+                    ssx = ss * ssx
+                    ssy = ss * ssy
+                    ssz = ss * ssz
+
+                    ! Now compute each gradient
+                    dd = w(i, j, k + 1, ivx) - w(i, j, k, ivx)
+                    u_x = dd * ssx
+                    u_y = dd * ssy
+                    u_z = dd * ssz
+
+                    dd = w(i, j, k + 1, ivy) - w(i, j, k, ivy)
+                    v_x = dd * ssx
+                    v_y = dd * ssy
+                    v_z = dd * ssz
+
+                    dd = w(i, j, k + 1, ivz) - w(i, j, k, ivz)
+                    w_x = dd * ssx
+                    w_y = dd * ssy
+                    w_z = dd * ssz
+
+                    dd = aa(i, j, k + 1) - aa(i, j, k)
+                    q_x = -dd * ssx
+                    q_y = -dd * ssy
+                    q_z = -dd * ssz
+
+                    por = half * rFilv
+                    if (porK(i, j, k) == noFlux) por = zero
+
+                    ! Compute the laminar and (if present) the eddy viscosities
+                    ! multiplied by the porosity. Compute the factor in front of
+                    ! the gradients of the speed of sound squared for the heat
+                    ! flux.
+
+                    mul = por * (rlv(i, j, k) + rlv(i, j, k + 1))
+                    if (eddyModel) mue = por * (rev(i, j, k) + rev(i, j, k + 1))
+                    mut = mul + mue
+
+                    gm1 = half * (gamma(i, j, k) + gamma(i, j, k + 1)) - one
+                    factLamHeat = one / (prandtl * gm1)
+                    factTurbHeat = one / (prandtlTurb * gm1)
+
+                    heatCoef = mul * factLamHeat + mue * factTurbHeat
+
+                    ! Compute the stress tensor and the heat flux vector.
+
+                    fracDiv = twoThird * (u_x + v_y + w_z)
+
+                    tauxx = mut * (two * u_x - fracDiv)
+                    tauyy = mut * (two * v_y - fracDiv)
+                    tauzz = mut * (two * w_z - fracDiv)
+
+                    tauxy = mut * (u_y + v_x)
+                    tauxz = mut * (u_z + w_x)
+                    tauyz = mut * (v_z + w_y)
+
+                    q_x = heatCoef * q_x
+                    q_y = heatCoef * q_y
+                    q_z = heatCoef * q_z
+
+                    ! Compute the average velocities for the face. Remember that
+                    ! the velocities are stored and not the momentum.
+
+                    ubar = half * (w(i, j, k, ivx) + w(i, j, k + 1, ivx))
+                    vbar = half * (w(i, j, k, ivy) + w(i, j, k + 1, ivy))
+                    wbar = half * (w(i, j, k, ivz) + w(i, j, k + 1, ivz))
+
+                    ! Compute the viscous fluxes for this j-face.
+
+                    fmx = tauxx * sk(i, j, k, 1) + tauxy * sk(i, j, k, 2) + tauxz * sk(i, j, k, 3)
+                    fmy = tauxy * sk(i, j, k, 1) + tauyy * sk(i, j, k, 2) + tauyz * sk(i, j, k, 3)
+                    fmz = tauxz * sk(i, j, k, 1) + tauyz * sk(i, j, k, 2) + tauzz * sk(i, j, k, 3)
+                    frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * sk(i, j, k, 1) &
+                            + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * sk(i, j, k, 2) &
+                            + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * sk(i, j, k, 3) &
+                            - q_x * sk(i, j, k, 1) - q_y * sk(i, j, k, 2) - q_z * sk(i, j, k, 3)
+
+                    ! Update the residuals of cell j and j+1.
+
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fmx
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fmy
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fmz
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - frhoE
+
+                    fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fmx
+                    fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fmy
+                    fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fmz
+                    fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + frhoE
+
+                end do
+            end do
+        end do
+
+    end subroutine viscousFluxApprox
+
+    subroutine inviscidDissFluxScalarApprox
+        !
+        !       inviscidDissFluxScalar computes the scalar artificial
+        !       dissipation, see AIAA paper 81-1259, for a given block.
+        !       Therefore it is assumed that the pointers in  blockPointers
+        !       already point to the correct block.
+        !
+        use blockPointers
+        use cgnsGrid
+        use constants
+        use flowVarRefState
+        use inputDiscretization
+        use inputPhysics
+        use iteration
+        implicit none
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: dssMax = 0.25_realType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ind
+
+        real(kind=realType) :: sslim, rhoi
+        real(kind=realType) :: sfil, fis2, fis4
+        real(kind=realType) :: ppor, rrad, dis2
+        real(kind=realType) :: dss1, dss2, ddw, fs
+
+        ! Check if rFil == 0. If so, the dissipative flux needs not to
+        ! be computed.
+
+        if (abs(rFil) < thresholdReal) return
+
+        ! Determine the variables used to compute the switch.
+        ! For the inviscid case this is the pressure; for the viscous
+        ! case it is the entropy.
+
+        select case (equations)
+        case (EulerEquations)
+
+            ! Inviscid case. Pressure switch is based on the pressure.
+            ! Also set the value of sslim. To be fully consistent this
+            ! must have the dimension of pressure and it is therefore
+            ! set to a fraction of the free stream value.
+
+            sslim = 0.001_realType * pInfCorr
+
+            !===============================================================
+
+        case (NSEquations, RANSEquations)
+
+            ! Viscous case. Pressure switch is based on the entropy.
+            ! Also set the value of sslim. To be fully consistent this
+            ! must have the dimension of entropy and it is therefore
+            ! set to a fraction of the free stream value.
+
+            sslim = 0.001_realType * pInfCorr / (rhoInf**gammaInf)
+
+        end select
+
+        ! Set a couple of constants for the scheme.
+
+        fis2 = rFil * vis2
+        fis4 = rFil * vis4
+        sfil = one - rFil
+
+        ! Replace the total energy by rho times the total enthalpy.
+        ! In this way the numerical solution is total enthalpy preserving
+        ! for the steady Euler equations. Also replace the velocities by
+        ! the momentum. Only done for the entries used in the
+        ! discretization, i.e. ignore the corner halo's.
+
+        do k = 0, kb
+            do j = 2, jl
+                do i = 2, il
+                    w(i, j, k, ivx) = w(i, j, k, irho) * w(i, j, k, ivx)
+                    w(i, j, k, ivy) = w(i, j, k, irho) * w(i, j, k, ivy)
+                    w(i, j, k, ivz) = w(i, j, k, irho) * w(i, j, k, ivz)
+                    w(i, j, k, irhoE) = w(i, j, k, irhoE) + p(i, j, k)
+                end do
+            end do
+        end do
+
+        do k = 2, kl
+            do j = 2, jl
+                w(0, j, k, ivx) = w(0, j, k, irho) * w(0, j, k, ivx)
+                w(0, j, k, ivy) = w(0, j, k, irho) * w(0, j, k, ivy)
+                w(0, j, k, ivz) = w(0, j, k, irho) * w(0, j, k, ivz)
+                w(0, j, k, irhoE) = w(0, j, k, irhoE) + p(0, j, k)
+
+                w(1, j, k, ivx) = w(1, j, k, irho) * w(1, j, k, ivx)
+                w(1, j, k, ivy) = w(1, j, k, irho) * w(1, j, k, ivy)
+                w(1, j, k, ivz) = w(1, j, k, irho) * w(1, j, k, ivz)
+                w(1, j, k, irhoE) = w(1, j, k, irhoE) + p(1, j, k)
+
+                w(ie, j, k, ivx) = w(ie, j, k, irho) * w(ie, j, k, ivx)
+                w(ie, j, k, ivy) = w(ie, j, k, irho) * w(ie, j, k, ivy)
+                w(ie, j, k, ivz) = w(ie, j, k, irho) * w(ie, j, k, ivz)
+                w(ie, j, k, irhoE) = w(ie, j, k, irhoE) + p(ie, j, k)
+
+                w(ib, j, k, ivx) = w(ib, j, k, irho) * w(ib, j, k, ivx)
+                w(ib, j, k, ivy) = w(ib, j, k, irho) * w(ib, j, k, ivy)
+                w(ib, j, k, ivz) = w(ib, j, k, irho) * w(ib, j, k, ivz)
+                w(ib, j, k, irhoE) = w(ib, j, k, irhoE) + p(ib, j, k)
+            end do
+        end do
+
+        do k = 2, kl
+            do i = 2, il
+                w(i, 0, k, ivx) = w(i, 0, k, irho) * w(i, 0, k, ivx)
+                w(i, 0, k, ivy) = w(i, 0, k, irho) * w(i, 0, k, ivy)
+                w(i, 0, k, ivz) = w(i, 0, k, irho) * w(i, 0, k, ivz)
+                w(i, 0, k, irhoE) = w(i, 0, k, irhoE) + p(i, 0, k)
+
+                w(i, 1, k, ivx) = w(i, 1, k, irho) * w(i, 1, k, ivx)
+                w(i, 1, k, ivy) = w(i, 1, k, irho) * w(i, 1, k, ivy)
+                w(i, 1, k, ivz) = w(i, 1, k, irho) * w(i, 1, k, ivz)
+                w(i, 1, k, irhoE) = w(i, 1, k, irhoE) + p(i, 1, k)
+
+                w(i, je, k, ivx) = w(i, je, k, irho) * w(i, je, k, ivx)
+                w(i, je, k, ivy) = w(i, je, k, irho) * w(i, je, k, ivy)
+                w(i, je, k, ivz) = w(i, je, k, irho) * w(i, je, k, ivz)
+                w(i, je, k, irhoE) = w(i, je, k, irhoE) + p(i, je, k)
+
+                w(i, jb, k, ivx) = w(i, jb, k, irho) * w(i, jb, k, ivx)
+                w(i, jb, k, ivy) = w(i, jb, k, irho) * w(i, jb, k, ivy)
+                w(i, jb, k, ivz) = w(i, jb, k, irho) * w(i, jb, k, ivz)
+                w(i, jb, k, irhoE) = w(i, jb, k, irhoE) + p(i, jb, k)
+            end do
+        end do
+
+        ! Initialize the dissipative residual to a certain times,
+        ! possibly zero, the previously stored value. Owned cells
+        ! only, because the halo values do not matter.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    fw(i, j, k, irho) = sfil * fw(i, j, k, irho)
+                    fw(i, j, k, imx) = sfil * fw(i, j, k, imx)
+                    fw(i, j, k, imy) = sfil * fw(i, j, k, imy)
+                    fw(i, j, k, imz) = sfil * fw(i, j, k, imz)
+                    fw(i, j, k, irhoE) = sfil * fw(i, j, k, irhoE)
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+
+                ! Compute the pressure sensor in the first cell, which
+                ! is a halo cell.
+
+                dss1 = abs((shockSensor(2, j, k) - two * shockSensor(1, j, k) + shockSensor(0, j, k)) &
+                           / (shockSensor(2, j, k) + two * shockSensor(1, j, k) + shockSensor(0, j, k) + sslim))
+
+                ! Loop in i-direction.
 
-          w(i,1,k,ivx)   = w(i,1,k,irho)*w(i,1,k,ivx)
-          w(i,1,k,ivy)   = w(i,1,k,irho)*w(i,1,k,ivy)
-          w(i,1,k,ivz)   = w(i,1,k,irho)*w(i,1,k,ivz)
-          w(i,1,k,irhoE) = w(i,1,k,irhoE) + p(i,1,k)
+                do i = 1, il
 
-          w(i,je,k,ivx)   = w(i,je,k,irho)*w(i,je,k,ivx)
-          w(i,je,k,ivy)   = w(i,je,k,irho)*w(i,je,k,ivy)
-          w(i,je,k,ivz)   = w(i,je,k,irho)*w(i,je,k,ivz)
-          w(i,je,k,irhoE) = w(i,je,k,irhoE) + p(i,je,k)
+                    ! Compute the pressure sensor in the cell to the right
+                    ! of the face.
 
-          w(i,jb,k,ivx)   = w(i,jb,k,irho)*w(i,jb,k,ivx)
-          w(i,jb,k,ivy)   = w(i,jb,k,irho)*w(i,jb,k,ivy)
-          w(i,jb,k,ivz)   = w(i,jb,k,irho)*w(i,jb,k,ivz)
-          w(i,jb,k,irhoE) = w(i,jb,k,irhoE) + p(i,jb,k)
-       enddo
-    enddo
+                    dss2 = abs((shockSensor(i + 2, j, k) - two * shockSensor(i + 1, j, k) + shockSensor(i, j, k)) &
+                           / (shockSensor(i + 2, j, k) + two * shockSensor(i + 1, j, k) + shockSensor(i, j, k) + sslim))
 
-    ! Initialize the dissipative residual to a certain times,
-    ! possibly zero, the previously stored value. Owned cells
-    ! only, because the halo values do not matter.
+                    ! Compute the dissipation coefficients for this face.
 
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             fw(i,j,k,irho)  = sfil*fw(i,j,k,irho)
-             fw(i,j,k,imx)   = sfil*fw(i,j,k,imx)
-             fw(i,j,k,imy)   = sfil*fw(i,j,k,imy)
-             fw(i,j,k,imz)   = sfil*fw(i,j,k,imz)
-             fw(i,j,k,irhoE) = sfil*fw(i,j,k,irhoE)
-          enddo
-       enddo
-    enddo
-    !
-    !       Dissipative fluxes in the i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
+                    ppor = zero
+                    if (porI(i, j, k) == normalFlux) ppor = half
+                    rrad = ppor * (radI(i, j, k) + radI(i + 1, j, k))
 
-          ! Compute the pressure sensor in the first cell, which
-          ! is a halo cell.
+                    ! Modification for FD Preconditioner Note: This lumping
+                    ! actually still results in a greater than 3 cell stencil
+                    ! in any direction. Since this seems to work slightly
+                    ! better than the dis2=sigma*fis4*rrad, we will just use
+                    ! a 5-cell stencil for doing the PC
 
-          dss1 = abs((shockSensor(2,j,k) - two*shockSensor(1,j,k) + shockSensor(0,j,k)) &
-               /     (shockSensor(2,j,k) + two*shockSensor(1,j,k) + shockSensor(0,j,k) + sslim))
+                    dis2 = fis2 * rrad * min(dssMax, max(dss1, dss2)) + sigma * fis4 * rrad
 
-          ! Loop in i-direction.
+                    ! Compute and scatter the dissipative flux.
+                    ! Density. Store it in the mass flow of the
+                    ! appropriate sliding mesh interface.
 
-          do i=1,il
+                    ddw = w(i + 1, j, k, irho) - w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-             ! Compute the pressure sensor in the cell to the right
-             ! of the face.
+                    fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             dss2 = abs((shockSensor(i+2,j,k) - two*shockSensor(i+1,j,k) + shockSensor(i,j,k)) &
-                  /     (shockSensor(i+2,j,k) + two*shockSensor(i+1,j,k) + shockSensor(i,j,k) + sslim))
+                    ! X-momentum.
 
-             ! Compute the dissipation coefficients for this face.
+                    ddw = w(i + 1, j, k, ivx) - w(i, j, k, ivx)
+                    fs = dis2 * ddw
 
-             ppor = zero
-             if(porI(i,j,k) == normalFlux) ppor = half
-             rrad = ppor*(radI(i,j,k) + radI(i+1,j,k))
+                    fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             ! Modification for FD Preconditioner Note: This lumping
-             ! actually still results in a greater than 3 cell stencil
-             ! in any direction. Since this seems to work slightly
-             ! better than the dis2=sigma*fis4*rrad, we will just use
-             ! a 5-cell stencil for doing the PC
+                    ! Y-momentum.
 
-             dis2 = fis2*rrad*min(dssMax, max(dss1,dss2))+sigma*fis4*rrad
+                    ddw = w(i + 1, j, k, ivy) - w(i, j, k, ivy)
+                    fs = dis2 * ddw
 
-             ! Compute and scatter the dissipative flux.
-             ! Density. Store it in the mass flow of the
-             ! appropriate sliding mesh interface.
+                    fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             ddw = w(i+1,j,k,irho) - w(i,j,k,irho)
-             fs  = dis2*ddw
+                    ! Z-momentum.
 
-             fw(i+1,j,k,irho) = fw(i+1,j,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ddw = w(i + 1, j, k, ivz) - w(i, j, k, ivz)
+                    fs = dis2 * ddw
 
-             ! X-momentum.
+                    fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             ddw = w(i+1,j,k,ivx) - w(i,j,k,ivx)
-             fs  = dis2*ddw
+                    ! Energy.
 
-             fw(i+1,j,k,imx) = fw(i+1,j,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    ddw = w(i + 1, j, k, irhoE) - w(i, j, k, irhoE)
+                    fs = dis2 * ddw
 
-             ! Y-momentum.
+                    fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-             ddw = w(i+1,j,k,ivy) - w(i,j,k,ivy)
-             fs  = dis2*ddw
+                    ! Set dss1 to dss2 for the next face.
 
-             fw(i+1,j,k,imy) = fw(i+1,j,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    dss1 = dss2
 
-             ! Z-momentum.
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the j-direction.
+        !
+        do k = 2, kl
+            do i = 2, il
 
-             ddw = w(i+1,j,k,ivz) - w(i,j,k,ivz)
-             fs  = dis2*ddw
+                ! Compute the pressure sensor in the first cell, which
+                ! is a halo cell.
 
-             fw(i+1,j,k,imz) = fw(i+1,j,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                dss1 = abs((shockSensor(i, 2, k) - two * shockSensor(i, 1, k) + shockSensor(i, 0, k)) &
+                           / (shockSensor(i, 2, k) + two * shockSensor(i, 1, k) + shockSensor(i, 0, k) + sslim))
 
-             ! Energy.
+                ! Loop in j-direction.
 
-             ddw = w(i+1,j,k,irhoE) - w(i,j,k,irhoE)
-             fs  = dis2*ddw
+                do j = 1, jl
 
-             fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    ! Compute the pressure sensor in the cell to the right
+                    ! of the face.
 
-             ! Set dss1 to dss2 for the next face.
+                    dss2 = abs((shockSensor(i, j + 2, k) - two * shockSensor(i, j + 1, k) + shockSensor(i, j, k)) &
+                           / (shockSensor(i, j + 2, k) + two * shockSensor(i, j + 1, k) + shockSensor(i, j, k) + sslim))
 
-             dss1 = dss2
+                    ! Compute the dissipation coefficients for this face.
 
-          enddo
-       enddo
-    enddo
-    !
-    !       Dissipative fluxes in the j-direction.
-    !
-    do k=2,kl
-       do i=2,il
+                    ppor = zero
+                    if (porJ(i, j, k) == normalFlux) ppor = half
+                    rrad = ppor * (radJ(i, j, k) + radJ(i, j + 1, k))
 
-          ! Compute the pressure sensor in the first cell, which
-          ! is a halo cell.
+                    ! Modification for FD Preconditioner
+                    dis2 = fis2 * rrad * min(dssMax, max(dss1, dss2)) + sigma * fis4 * rrad
 
-          dss1 = abs((shockSensor(i,2,k) - two*shockSensor(i,1,k) + shockSensor(i,0,k)) &
-               /     (shockSensor(i,2,k) + two*shockSensor(i,1,k) + shockSensor(i,0,k) + sslim))
+                    ! Compute and scatter the dissipative flux.
+                    ! Density. Store it in the mass flow of the
+                    ! appropriate sliding mesh interface.
 
-          ! Loop in j-direction.
+                    ddw = w(i, j + 1, k, irho) - w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-          do j=1,jl
+                    fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             ! Compute the pressure sensor in the cell to the right
-             ! of the face.
+                    ! X-momentum.
 
-             dss2 = abs((shockSensor(i,j+2,k) - two*shockSensor(i,j+1,k) + shockSensor(i,j,k)) &
-                  /     (shockSensor(i,j+2,k) + two*shockSensor(i,j+1,k) + shockSensor(i,j,k) + sslim))
+                    ddw = w(i, j + 1, k, ivx) - w(i, j, k, ivx)
+                    fs = dis2 * ddw
 
-             ! Compute the dissipation coefficients for this face.
+                    fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             ppor = zero
-             if(porJ(i,j,k) == normalFlux) ppor = half
-             rrad = ppor*(radJ(i,j,k) + radJ(i,j+1,k))
+                    ! Y-momentum.
 
-             ! Modification for FD Preconditioner
-             dis2 = fis2*rrad*min(dssMax, max(dss1,dss2))+sigma*fis4*rrad
+                    ddw = w(i, j + 1, k, ivy) - w(i, j, k, ivy)
+                    fs = dis2 * ddw
 
-             ! Compute and scatter the dissipative flux.
-             ! Density. Store it in the mass flow of the
-             ! appropriate sliding mesh interface.
+                    fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             ddw = w(i,j+1,k,irho) - w(i,j,k,irho)
-             fs  = dis2*ddw
+                    ! Z-momentum.
 
-             fw(i,j+1,k,irho) = fw(i,j+1,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ddw = w(i, j + 1, k, ivz) - w(i, j, k, ivz)
+                    fs = dis2 * ddw
 
-             ! X-momentum.
+                    fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             ddw = w(i,j+1,k,ivx) - w(i,j,k,ivx)
-             fs  = dis2*ddw
+                    ! Energy.
 
-             fw(i,j+1,k,imx) = fw(i,j+1,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    ddw = w(i, j + 1, k, irhoE) - w(i, j, k, irhoE)
+                    fs = dis2 * ddw
 
-             ! Y-momentum.
+                    fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-             ddw = w(i,j+1,k,ivy) - w(i,j,k,ivy)
-             fs  = dis2*ddw
+                    ! Set dss1 to dss2 for the next face.
 
-             fw(i,j+1,k,imy) = fw(i,j+1,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    dss1 = dss2
 
-             ! Z-momentum.
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the k-direction.
+        !
+        do j = 2, jl
+            do i = 2, il
 
-             ddw = w(i,j+1,k,ivz) - w(i,j,k,ivz)
-             fs  = dis2*ddw
+                ! Compute the pressure sensor in the first cell, which
+                ! is a halo cell.
 
-             fw(i,j+1,k,imz) = fw(i,j+1,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                dss1 = abs((shockSensor(i, j, 2) - two * shockSensor(i, j, 1) + shockSensor(i, j, 0)) &
+                           / (shockSensor(i, j, 2) + two * shockSensor(i, j, 1) + shockSensor(i, j, 0) + sslim))
 
-             ! Energy.
+                ! Loop in k-direction.
 
-             ddw = w(i,j+1,k,irhoE) - w(i,j,k,irhoE)
-             fs  = dis2*ddw
+                do k = 1, kl
 
-             fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    ! Compute the pressure sensor in the cell to the right
+                    ! of the face.
 
-             ! Set dss1 to dss2 for the next face.
+                    dss2 = abs((shockSensor(i, j, k + 2) - two * shockSensor(i, j, k + 1) + shockSensor(i, j, k)) &
+                           / (shockSensor(i, j, k + 2) + two * shockSensor(i, j, k + 1) + shockSensor(i, j, k) + sslim))
 
-             dss1 = dss2
+                    ! Compute the dissipation coefficients for this face.
 
-          enddo
-       enddo
-    enddo
-    !
-    !       Dissipative fluxes in the k-direction.
-    !
-    do j=2,jl
-       do i=2,il
+                    ppor = zero
+                    if (porK(i, j, k) == normalFlux) ppor = half
+                    rrad = ppor * (radK(i, j, k) + radK(i, j, k + 1))
 
-          ! Compute the pressure sensor in the first cell, which
-          ! is a halo cell.
+                    ! Modification for FD Preconditioner
+                    dis2 = fis2 * rrad * min(dssMax, max(dss1, dss2)) + sigma * fis4 * rrad
 
-          dss1 = abs((shockSensor(i,j,2) - two*shockSensor(i,j,1) + shockSensor(i,j,0)) &
-               /     (shockSensor(i,j,2) + two*shockSensor(i,j,1) + shockSensor(i,j,0) + sslim))
+                    ! Compute and scatter the dissipative flux.
+                    ! Density. Store it in the mass flow of the
+                    ! appropriate sliding mesh interface.
 
-          ! Loop in k-direction.
+                    ddw = w(i, j, k + 1, irho) - w(i, j, k, irho)
+                    fs = dis2 * ddw
 
-          do k=1,kl
+                    fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             ! Compute the pressure sensor in the cell to the right
-             ! of the face.
+                    ! X-momentum.
 
-             dss2 = abs((shockSensor(i,j,k+2) - two*shockSensor(i,j,k+1) + shockSensor(i,j,k)) &
-                  /     (shockSensor(i,j,k+2) + two*shockSensor(i,j,k+1) + shockSensor(i,j,k) + sslim))
+                    ddw = w(i, j, k + 1, ivx) - w(i, j, k, ivx)
+                    fs = dis2 * ddw
 
-             ! Compute the dissipation coefficients for this face.
+                    fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             ppor = zero
-             if(porK(i,j,k) == normalFlux) ppor = half
-             rrad = ppor*(radK(i,j,k) + radK(i,j,k+1))
+                    ! Y-momentum.
 
-             ! Modification for FD Preconditioner
-             dis2 = fis2*rrad*min(dssMax, max(dss1,dss2))+sigma*fis4*rrad
+                    ddw = w(i, j, k + 1, ivy) - w(i, j, k, ivy)
+                    fs = dis2 * ddw
 
-             ! Compute and scatter the dissipative flux.
-             ! Density. Store it in the mass flow of the
-             ! appropriate sliding mesh interface.
+                    fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             ddw = w(i,j,k+1,irho) - w(i,j,k,irho)
-             fs  = dis2*ddw
+                    ! Z-momentum.
 
-             fw(i,j,k+1,irho) = fw(i,j,k+1,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ddw = w(i, j, k + 1, ivz) - w(i, j, k, ivz)
+                    fs = dis2 * ddw
 
-             ! X-momentum.
+                    fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             ddw = w(i,j,k+1,ivx) - w(i,j,k,ivx)
-             fs  = dis2*ddw
+                    ! Energy.
 
-             fw(i,j,k+1,imx) = fw(i,j,k+1,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    ddw = w(i, j, k + 1, irhoE) - w(i, j, k, irhoE)
+                    fs = dis2 * ddw
 
-             ! Y-momentum.
+                    fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-             ddw = w(i,j,k+1,ivy) - w(i,j,k,ivy)
-             fs  = dis2*ddw
+                    ! Set dss1 to dss2 for the next face.
 
-             fw(i,j,k+1,imy) = fw(i,j,k+1,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    dss1 = dss2
 
-             ! Z-momentum.
+                end do
+            end do
+        end do
+
+        ! Replace rho times the total enthalpy by the total energy and
+        ! store the velocities again instead of the momentum. Only for
+        ! those entries that have been altered, i.e. ignore the
+        ! corner halo's.
+
+        do k = 0, kb
+            do j = 2, jl
+                do i = 2, il
+                    rhoi = one / w(i, j, k, irho)
+                    w(i, j, k, ivx) = w(i, j, k, ivx) * rhoi
+                    w(i, j, k, ivy) = w(i, j, k, ivy) * rhoi
+                    w(i, j, k, ivz) = w(i, j, k, ivz) * rhoi
+                    w(i, j, k, irhoE) = w(i, j, k, irhoE) - p(i, j, k)
+                end do
+            end do
+        end do
+
+        do k = 2, kl
+            do j = 2, jl
+                rhoi = one / w(0, j, k, irho)
+                w(0, j, k, ivx) = w(0, j, k, ivx) * rhoi
+                w(0, j, k, ivy) = w(0, j, k, ivy) * rhoi
+                w(0, j, k, ivz) = w(0, j, k, ivz) * rhoi
+                w(0, j, k, irhoE) = w(0, j, k, irhoE) - p(0, j, k)
+
+                rhoi = one / w(1, j, k, irho)
+                w(1, j, k, ivx) = w(1, j, k, ivx) * rhoi
+                w(1, j, k, ivy) = w(1, j, k, ivy) * rhoi
+                w(1, j, k, ivz) = w(1, j, k, ivz) * rhoi
+                w(1, j, k, irhoE) = w(1, j, k, irhoE) - p(1, j, k)
+
+                rhoi = one / w(ie, j, k, irho)
+                w(ie, j, k, ivx) = w(ie, j, k, ivx) * rhoi
+                w(ie, j, k, ivy) = w(ie, j, k, ivy) * rhoi
+                w(ie, j, k, ivz) = w(ie, j, k, ivz) * rhoi
+                w(ie, j, k, irhoE) = w(ie, j, k, irhoE) - p(ie, j, k)
+
+                rhoi = one / w(ib, j, k, irho)
+                w(ib, j, k, ivx) = w(ib, j, k, ivx) * rhoi
+                w(ib, j, k, ivy) = w(ib, j, k, ivy) * rhoi
+                w(ib, j, k, ivz) = w(ib, j, k, ivz) * rhoi
+                w(ib, j, k, irhoE) = w(ib, j, k, irhoE) - p(ib, j, k)
+            end do
+        end do
+
+        do k = 2, kl
+            do i = 2, il
+                rhoi = one / w(i, 0, k, irho)
+                w(i, 0, k, ivx) = w(i, 0, k, ivx) * rhoi
+                w(i, 0, k, ivy) = w(i, 0, k, ivy) * rhoi
+                w(i, 0, k, ivz) = w(i, 0, k, ivz) * rhoi
+                w(i, 0, k, irhoE) = w(i, 0, k, irhoE) - p(i, 0, k)
+
+                rhoi = one / w(i, 1, k, irho)
+                w(i, 1, k, ivx) = w(i, 1, k, ivx) * rhoi
+                w(i, 1, k, ivy) = w(i, 1, k, ivy) * rhoi
+                w(i, 1, k, ivz) = w(i, 1, k, ivz) * rhoi
+                w(i, 1, k, irhoE) = w(i, 1, k, irhoE) - p(i, 1, k)
+
+                rhoi = one / w(i, je, k, irho)
+                w(i, je, k, ivx) = w(i, je, k, ivx) * rhoi
+                w(i, je, k, ivy) = w(i, je, k, ivy) * rhoi
+                w(i, je, k, ivz) = w(i, je, k, ivz) * rhoi
+                w(i, je, k, irhoE) = w(i, je, k, irhoE) - p(i, je, k)
+
+                rhoi = one / w(i, jb, k, irho)
+                w(i, jb, k, ivx) = w(i, jb, k, ivx) * rhoi
+                w(i, jb, k, ivy) = w(i, jb, k, ivy) * rhoi
+                w(i, jb, k, ivz) = w(i, jb, k, ivz) * rhoi
+                w(i, jb, k, irhoE) = w(i, jb, k, irhoE) - p(i, jb, k)
+            end do
+        end do
+
+    end subroutine inviscidDissFluxScalarApprox
+
+    subroutine inviscidDissFluxMatrixApprox
+        !
+        !       inviscidDissFluxMatrix computes the matrix artificial
+        !       dissipation term. Instead of the spectral radius, as used in
+        !       the scalar dissipation scheme, the absolute value of the flux
+        !       jacobian is used. This leads to a less diffusive and
+        !       consequently more accurate scheme. It is assumed that the
+        !       pointers in blockPointers already point to the correct block.
+        !
+        use blockPointers
+        use cgnsGrid
+        use constants
+        use flowVarRefState
+        use inputDiscretization
+        use inputPhysics
+        use iteration
+        use utils, only: getCorrectForK
+        implicit none
+        !
+        !      Local parameters.
+        !
+        real(kind=realType), parameter :: dpMax = 0.25_realType
+        real(kind=realType), parameter :: epsAcoustic = 0.25_realType
+        real(kind=realType), parameter :: epsShear = 0.025_realType
+        real(kind=realType), parameter :: omega = 0.5_realType
+        real(kind=realType), parameter :: oneMinOmega = one - omega
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ind
+
+        real(kind=realType) :: plim, sface
+        real(kind=realType) :: sfil, fis2, fis4
+        real(kind=realType) :: gammaAvg, gm1, ovgm1, gm53
+        real(kind=realType) :: ppor, rrad, dis2
+        real(kind=realType) :: dp1, dp2, ddw, tmp, fs
+        real(kind=realType) :: dr, dru, drv, drw, dre, drk, sx, sy, sz
+        real(kind=realType) :: uAvg, vAvg, wAvg, a2Avg, aAvg, hAvg
+        real(kind=realType) :: alphaAvg, unAvg, ovaAvg, ova2Avg
+        real(kind=realType) :: kAvg, lam1, lam2, lam3, area
+        real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
+        logical :: correctForK
+
+        ! Check if rFil == 0. If so, the dissipative flux needs not to
+        ! be computed.
+
+        if (abs(rFil) < thresholdReal) return
+
+        ! Set the value of plim. To be fully consistent this must have
+        ! the dimension of a pressure. Therefore a fraction of pInfCorr
+        ! is used.
+
+        plim = 0.001_realType * pInfCorr
+
+        ! Determine whether or not the total energy must be corrected
+        ! for the presence of the turbulent kinetic energy.
+
+        correctForK = getCorrectForK()
+
+        ! Initialize sface to zero. This value will be used if the
+        ! block is not moving.
+
+        sface = zero
+
+        ! Set a couple of constants for the scheme.
+
+        fis2 = rFil * vis2
+        fis4 = rFil * vis4
+        sfil = one - rFil
+
+        ! Initialize the dissipative residual to a certain times,
+        ! possibly zero, the previously stored value. Owned cells
+        ! only, because the halo values do not matter.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    fw(i, j, k, irho) = sfil * fw(i, j, k, irho)
+                    fw(i, j, k, imx) = sfil * fw(i, j, k, imx)
+                    fw(i, j, k, imy) = sfil * fw(i, j, k, imy)
+                    fw(i, j, k, imz) = sfil * fw(i, j, k, imz)
+                    fw(i, j, k, irhoE) = sfil * fw(i, j, k, irhoE)
+                end do
+            end do
+        end do
 
-             ddw = w(i,j,k+1,ivz) - w(i,j,k,ivz)
-             fs  = dis2*ddw
+        !
+        !       Dissipative fluxes in the i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
 
-             fw(i,j,k+1,imz) = fw(i,j,k+1,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                ! Compute the pressure sensor in the first cell, which
+                ! is a halo cell.
 
-             ! Energy.
+                dp1 = abs((shockSensor(2, j, k) - two * shockSensor(1, j, k) + shockSensor(0, j, k)) &
+                          / (omega * (shockSensor(2, j, k) + two * shockSensor(1, j, k) + shockSensor(0, j, k)) &
+                             + oneMinOmega * (abs(shockSensor(2, j, k) - shockSensor(1, j, k)) &
+                                              + abs(shockSensor(1, j, k) - shockSensor(0, j, k))) + plim))
 
-             ddw = w(i,j,k+1,irhoE) - w(i,j,k,irhoE)
-             fs  = dis2*ddw
+                ! Loop in i-direction.
 
-             fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
-
-             ! Set dss1 to dss2 for the next face.
-
-             dss1 = dss2
-
-          enddo
-       enddo
-    enddo
-
-    ! Replace rho times the total enthalpy by the total energy and
-    ! store the velocities again instead of the momentum. Only for
-    ! those entries that have been altered, i.e. ignore the
-    ! corner halo's.
-
-    do k=0,kb
-       do j=2,jl
-          do i=2,il
-             rhoi           = one/w(i,j,k,irho)
-             w(i,j,k,ivx)   = w(i,j,k,ivx)*rhoi
-             w(i,j,k,ivy)   = w(i,j,k,ivy)*rhoi
-             w(i,j,k,ivz)   = w(i,j,k,ivz)*rhoi
-             w(i,j,k,irhoE) = w(i,j,k,irhoE) - p(i,j,k)
-          enddo
-       enddo
-    enddo
-
-    do k=2,kl
-       do j=2,jl
-          rhoi           = one/w(0,j,k,irho)
-          w(0,j,k,ivx)   = w(0,j,k,ivx)*rhoi
-          w(0,j,k,ivy)   = w(0,j,k,ivy)*rhoi
-          w(0,j,k,ivz)   = w(0,j,k,ivz)*rhoi
-          w(0,j,k,irhoE) = w(0,j,k,irhoE) - p(0,j,k)
-
-          rhoi           = one/w(1,j,k,irho)
-          w(1,j,k,ivx)   = w(1,j,k,ivx)*rhoi
-          w(1,j,k,ivy)   = w(1,j,k,ivy)*rhoi
-          w(1,j,k,ivz)   = w(1,j,k,ivz)*rhoi
-          w(1,j,k,irhoE) = w(1,j,k,irhoE) - p(1,j,k)
-
-          rhoi            = one/w(ie,j,k,irho)
-          w(ie,j,k,ivx)   = w(ie,j,k,ivx)*rhoi
-          w(ie,j,k,ivy)   = w(ie,j,k,ivy)*rhoi
-          w(ie,j,k,ivz)   = w(ie,j,k,ivz)*rhoi
-          w(ie,j,k,irhoE) = w(ie,j,k,irhoE) - p(ie,j,k)
-
-          rhoi            = one/w(ib,j,k,irho)
-          w(ib,j,k,ivx)   = w(ib,j,k,ivx)*rhoi
-          w(ib,j,k,ivy)   = w(ib,j,k,ivy)*rhoi
-          w(ib,j,k,ivz)   = w(ib,j,k,ivz)*rhoi
-          w(ib,j,k,irhoE) = w(ib,j,k,irhoE) - p(ib,j,k)
-       enddo
-    enddo
-
-    do k=2,kl
-       do i=2,il
-          rhoi           = one/w(i,0,k,irho)
-          w(i,0,k,ivx)   = w(i,0,k,ivx)*rhoi
-          w(i,0,k,ivy)   = w(i,0,k,ivy)*rhoi
-          w(i,0,k,ivz)   = w(i,0,k,ivz)*rhoi
-          w(i,0,k,irhoE) = w(i,0,k,irhoE) - p(i,0,k)
-
-          rhoi           = one/w(i,1,k,irho)
-          w(i,1,k,ivx)   = w(i,1,k,ivx)*rhoi
-          w(i,1,k,ivy)   = w(i,1,k,ivy)*rhoi
-          w(i,1,k,ivz)   = w(i,1,k,ivz)*rhoi
-          w(i,1,k,irhoE) = w(i,1,k,irhoE) - p(i,1,k)
-
-          rhoi            = one/w(i,je,k,irho)
-          w(i,je,k,ivx)   = w(i,je,k,ivx)*rhoi
-          w(i,je,k,ivy)   = w(i,je,k,ivy)*rhoi
-          w(i,je,k,ivz)   = w(i,je,k,ivz)*rhoi
-          w(i,je,k,irhoE) = w(i,je,k,irhoE) - p(i,je,k)
-
-          rhoi            = one/w(i,jb,k,irho)
-          w(i,jb,k,ivx)   = w(i,jb,k,ivx)*rhoi
-          w(i,jb,k,ivy)   = w(i,jb,k,ivy)*rhoi
-          w(i,jb,k,ivz)   = w(i,jb,k,ivz)*rhoi
-          w(i,jb,k,irhoE) = w(i,jb,k,irhoE) - p(i,jb,k)
-       enddo
-    enddo
-
-  end subroutine inviscidDissFluxScalarApprox
-
-  subroutine inviscidDissFluxMatrixApprox
-    !
-    !       inviscidDissFluxMatrix computes the matrix artificial
-    !       dissipation term. Instead of the spectral radius, as used in
-    !       the scalar dissipation scheme, the absolute value of the flux
-    !       jacobian is used. This leads to a less diffusive and
-    !       consequently more accurate scheme. It is assumed that the
-    !       pointers in blockPointers already point to the correct block.
-    !
-    use blockPointers
-    use cgnsGrid
-    use constants
-    use flowVarRefState
-    use inputDiscretization
-    use inputPhysics
-    use iteration
-    use utils, only : getCorrectForK
-    implicit none
-    !
-    !      Local parameters.
-    !
-    real(kind=realType), parameter :: dpMax        = 0.25_realType
-    real(kind=realType), parameter :: epsAcoustic  = 0.25_realType
-    real(kind=realType), parameter :: epsShear     = 0.025_realType
-    real(kind=realType), parameter :: omega        = 0.5_realType
-    real(kind=realType), parameter :: oneMinOmega = one - omega
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ind
-
-    real(kind=realType) :: plim, sface
-    real(kind=realType) :: sfil, fis2, fis4
-    real(kind=realType) :: gammaAvg, gm1, ovgm1, gm53
-    real(kind=realType) :: ppor, rrad, dis2
-    real(kind=realType) :: dp1, dp2, ddw, tmp, fs
-    real(kind=realType) :: dr, dru, drv, drw, dre, drk, sx, sy, sz
-    real(kind=realType) :: uAvg, vAvg, wAvg, a2Avg, aAvg, hAvg
-    real(kind=realType) :: alphaAvg, unAvg, ovaAvg, ova2Avg
-    real(kind=realType) :: kAvg, lam1, lam2, lam3, area
-    real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
-    logical :: correctForK
-
-    ! Check if rFil == 0. If so, the dissipative flux needs not to
-    ! be computed.
-
-    if(abs(rFil) < thresholdReal) return
-
-    ! Set the value of plim. To be fully consistent this must have
-    ! the dimension of a pressure. Therefore a fraction of pInfCorr
-    ! is used.
-
-    plim = 0.001_realType*pInfCorr
-
-    ! Determine whether or not the total energy must be corrected
-    ! for the presence of the turbulent kinetic energy.
-
-    correctForK = getCorrectForK()
-
-    ! Initialize sface to zero. This value will be used if the
-    ! block is not moving.
-
-    sface = zero
-
-    ! Set a couple of constants for the scheme.
-
-    fis2 = rFil*vis2
-    fis4 = rFil*vis4
-    sfil = one - rFil
-
-    ! Initialize the dissipative residual to a certain times,
-    ! possibly zero, the previously stored value. Owned cells
-    ! only, because the halo values do not matter.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             fw(i,j,k,irho)  = sfil*fw(i,j,k,irho)
-             fw(i,j,k,imx)   = sfil*fw(i,j,k,imx)
-             fw(i,j,k,imy)   = sfil*fw(i,j,k,imy)
-             fw(i,j,k,imz)   = sfil*fw(i,j,k,imz)
-             fw(i,j,k,irhoE) = sfil*fw(i,j,k,irhoE)
-          enddo
-       enddo
-    enddo
-
-    !
-    !       Dissipative fluxes in the i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-
-          ! Compute the pressure sensor in the first cell, which
-          ! is a halo cell.
-
-          dp1 = abs((shockSensor(2,j,k) - two*shockSensor(1,j,k) + shockSensor(0,j,k))        &
-               /     (omega*(shockSensor(2,j,k) + two*shockSensor(1,j,k) + shockSensor(0,j,k)) &
-               +      oneMinOmega*(abs(shockSensor(2,j,k)-shockSensor(1,j,k))        &
-               +                   abs(shockSensor(1,j,k)-shockSensor(0,j,k))) + plim))
-
-          ! Loop in i-direction.
-
-          do i=1,il
-
-             ! Compute the pressure sensor in the cell to the right
-             ! of the face.
-
-             dp2 = abs((shockSensor(i+2,j,k) - two*shockSensor(i+1,j,k) + shockSensor(i,j,k))        &
-                  /     (omega*(shockSensor(i+2,j,k) + two*shockSensor(i+1,j,k) + shockSensor(i,j,k)) &
-                  +      oneMinOmega*(abs(shockSensor(i+2,j,k) - shockSensor(i+1,j,k))      &
-                  +                   abs(shockSensor(i+1,j,k) - shockSensor(i,j,k))) + plim))
-
-             ! Compute the dissipation coefficients for this face.
-
-             ppor = zero
-             if(porI(i,j,k) == normalFlux) ppor = one
-
-             dis2 = fis2*ppor*min(dpMax,max(dp1,dp2))+sigma*fis4*ppor
-
-             ! Construct the vector of the first and third differences
-             ! multiplied by the appropriate constants.
-
-             ddw = w(i+1,j,k,irho) - w(i,j,k,irho)
-             dr  = dis2*ddw
+                do i = 1, il
 
-             ddw = w(i+1,j,k,irho)*w(i+1,j,k,ivx) &
-                  - w(i,j,k,irho)*w(i,j,k,ivx)
-             dru = dis2*ddw
+                    ! Compute the pressure sensor in the cell to the right
+                    ! of the face.
 
-             ddw = w(i+1,j,k,irho)*w(i+1,j,k,ivy) &
-                  - w(i,j,k,irho)*w(i,j,k,ivy)
-             drv = dis2*ddw
+                    dp2 = abs((shockSensor(i + 2, j, k) - two * shockSensor(i + 1, j, k) + shockSensor(i, j, k)) &
+                         / (omega * (shockSensor(i + 2, j, k) + two * shockSensor(i + 1, j, k) + shockSensor(i, j, k)) &
+                                 + oneMinOmega * (abs(shockSensor(i + 2, j, k) - shockSensor(i + 1, j, k)) &
+                                                  + abs(shockSensor(i + 1, j, k) - shockSensor(i, j, k))) + plim))
 
-             ddw = w(i+1,j,k,irho)*w(i+1,j,k,ivz) &
-                  - w(i,j,k,irho)*w(i,j,k,ivz)
-             drw = dis2*ddw
+                    ! Compute the dissipation coefficients for this face.
 
-             ddw = w(i+1,j,k,irhoE) - w(i,j,k,irhoE)
-             dre = dis2*ddw
+                    ppor = zero
+                    if (porI(i, j, k) == normalFlux) ppor = one
 
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
+                    dis2 = fis2 * ppor * min(dpMax, max(dp1, dp2)) + sigma * fis4 * ppor
 
-             if( correctForK ) then
-                ddw = w(i+1,j,k,irho)*w(i+1,j,k,itu1) &
-                     - w(i,j,k,irho)*w(i,j,k,itu1)
-                drk = dis2*ddw
+                    ! Construct the vector of the first and third differences
+                    ! multiplied by the appropriate constants.
 
-                kAvg = half*(w(i,j,k,itu1) + w(i+1,j,k,itu1))
-             else
-                drk   = zero
-                kAvg = zero
-             endif
+                    ddw = w(i + 1, j, k, irho) - w(i, j, k, irho)
+                    dr = dis2 * ddw
 
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
+                    ddw = w(i + 1, j, k, irho) * w(i + 1, j, k, ivx) &
+                          - w(i, j, k, irho) * w(i, j, k, ivx)
+                    dru = dis2 * ddw
 
-             gammaAvg = half*(gamma(i+1,j,k) + gamma(i,j,k))
-             gm1      = gammaAvg - one
-             ovgm1    = one/gm1
-             gm53     = gammaAvg - five*third
+                    ddw = w(i + 1, j, k, irho) * w(i + 1, j, k, ivy) &
+                          - w(i, j, k, irho) * w(i, j, k, ivy)
+                    drv = dis2 * ddw
 
-             ! Compute the average state at the interface.
+                    ddw = w(i + 1, j, k, irho) * w(i + 1, j, k, ivz) &
+                          - w(i, j, k, irho) * w(i, j, k, ivz)
+                    drw = dis2 * ddw
 
-             uAvg  = half*(w(i+1,j,k,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i+1,j,k,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i+1,j,k,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i+1,j,k)*p(i+1,j,k)/w(i+1,j,k,irho) &
-                  +       gamma(i,  j,k)*p(i,  j,k)/w(i,  j,k,irho))
+                    ddw = w(i + 1, j, k, irhoE) - w(i, j, k, irhoE)
+                    dre = dis2 * ddw
 
-             sx = si(i,j,k,1); sy = si(i,j,k,2); sz = si(i,j,k,3)
-             area = sqrt(sx**2 + sy**2 + sz**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = sx*tmp
-             sy   = sy*tmp
-             sz   = sz*tmp
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
 
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
+                    if (correctForK) then
+                        ddw = w(i + 1, j, k, irho) * w(i + 1, j, k, itu1) &
+                              - w(i, j, k, irho) * w(i, j, k, itu1)
+                        drk = dis2 * ddw
 
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
+                        kAvg = half * (w(i, j, k, itu1) + w(i + 1, j, k, itu1))
+                    else
+                        drk = zero
+                        kAvg = zero
+                    end if
 
-             if( addGridVelocities ) sface = sFaceI(i,j,k)*tmp
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
+                    gammaAvg = half * (gamma(i + 1, j, k) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
+                    ! Compute the average state at the interface.
 
-             rrad = lam3 + aAvg
+                    uAvg = half * (w(i + 1, j, k, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i + 1, j, k, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i + 1, j, k, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i + 1, j, k) * p(i + 1, j, k) / w(i + 1, j, k, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-             lam1 = max(lam1,epsAcoustic*rrad)
-             lam2 = max(lam2,epsAcoustic*rrad)
-             lam3 = max(lam3,epsShear*rrad)
+                    sx = si(i, j, k, 1); sy = si(i, j, k, 2); sz = si(i, j, k, 3)
+                    area = sqrt(sx**2 + sy**2 + sz**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sx * tmp
+                    sy = sy * tmp
+                    sz = sz * tmp
 
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-             lam1 = lam1*area
-             lam2 = lam2*area
-             lam3 = lam3*area
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
+                    if (addGridVelocities) sface = sFaceI(i, j, k) * tmp
 
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
+                    rrad = lam3 + aAvg
 
-             ! Compute and scatter the dissipative flux.
-             ! Density.
+                    lam1 = max(lam1, epsAcoustic * rrad)
+                    lam2 = max(lam2, epsAcoustic * rrad)
+                    lam3 = max(lam3, epsShear * rrad)
 
-             fs               = lam3*dr  + abv6
-             fw(i+1,j,k,irho) = fw(i+1,j,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-             ! X-momentum.
+                    lam1 = lam1 * area
+                    lam2 = lam2 * area
+                    lam3 = lam3 * area
 
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i+1,j,k,imx) = fw(i+1,j,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
-             ! Y-momentum.
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i+1,j,k,imy) = fw(i+1,j,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-             ! Z-momentum.
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i+1,j,k,imz) = fw(i+1,j,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             ! Energy.
+                    fs = lam3 * dr + abv6
+                    fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    ! X-momentum.
 
-             ! Set dp1 to dp2 for the next face.
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             dp1 = dp2
+                    ! Y-momentum.
 
-          enddo
-       enddo
-    enddo
-    !
-    !       Dissipative fluxes in the j-direction.
-    !
-    do k=2,kl
-       do i=2,il
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-          ! Compute the pressure sensor in the first cell, which
-          ! is a halo cell.
+                    ! Z-momentum.
 
-          dp1 = abs((shockSensor(i,2,k) - two*shockSensor(i,1,k) + shockSensor(i,0,k))        &
-               /     (omega*(shockSensor(i,2,k) + two*shockSensor(i,1,k) + shockSensor(i,0,k)) &
-               +      oneMinOmega*(abs(shockSensor(i,2,k)-shockSensor(i,1,k))        &
-               +                    abs(shockSensor(i,1,k)-shockSensor(i,0,k))) + plim))
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-          ! Loop in j-direction.
+                    ! Energy.
 
-          do j=1,jl
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-             ! Compute the pressure sensor in the cell to the right
-             ! of the face.
+                    ! Set dp1 to dp2 for the next face.
 
-             dp2 = abs((shockSensor(i,j+2,k) - two*shockSensor(i,j+1,k) + shockSensor(i,j,k))        &
-                  /     (omega*(shockSensor(i,j+2,k) + two*shockSensor(i,j+1,k) + shockSensor(i,j,k)) &
-                  +      oneMinOmega*(abs(shockSensor(i,j+2,k) - shockSensor(i,j+1,k))      &
-                  +                   abs(shockSensor(i,j+1,k) - shockSensor(i,j,k))) + plim))
+                    dp1 = dp2
 
-             ! Compute the dissipation coefficients for this face.
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the j-direction.
+        !
+        do k = 2, kl
+            do i = 2, il
+
+                ! Compute the pressure sensor in the first cell, which
+                ! is a halo cell.
 
-             ppor = zero
-             if(porJ(i,j,k) == normalFlux) ppor = one
+                dp1 = abs((shockSensor(i, 2, k) - two * shockSensor(i, 1, k) + shockSensor(i, 0, k)) &
+                          / (omega * (shockSensor(i, 2, k) + two * shockSensor(i, 1, k) + shockSensor(i, 0, k)) &
+                             + oneMinOmega * (abs(shockSensor(i, 2, k) - shockSensor(i, 1, k)) &
+                                              + abs(shockSensor(i, 1, k) - shockSensor(i, 0, k))) + plim))
 
-             dis2 = fis2*ppor*min(dpMax,max(dp1,dp2))+sigma*fis4*ppor
+                ! Loop in j-direction.
 
-             ! Construct the vector of the first and third differences
-             ! multiplied by the appropriate constants.
+                do j = 1, jl
 
-             ddw = w(i,j+1,k,irho) - w(i,j,k,irho)
-             dr  = dis2*ddw
+                    ! Compute the pressure sensor in the cell to the right
+                    ! of the face.
 
-             ddw = w(i,j+1,k,irho)*w(i,j+1,k,ivx) &
-                  - w(i,j,k,irho)*w(i,j,k,ivx)
-             dru = dis2*ddw
+                    dp2 = abs((shockSensor(i, j + 2, k) - two * shockSensor(i, j + 1, k) + shockSensor(i, j, k)) &
+                         / (omega * (shockSensor(i, j + 2, k) + two * shockSensor(i, j + 1, k) + shockSensor(i, j, k)) &
+                                 + oneMinOmega * (abs(shockSensor(i, j + 2, k) - shockSensor(i, j + 1, k)) &
+                                                  + abs(shockSensor(i, j + 1, k) - shockSensor(i, j, k))) + plim))
 
-             ddw = w(i,j+1,k,irho)*w(i,j+1,k,ivy) &
-                  - w(i,j,k,irho)*w(i,j,k,ivy)
-             drv = dis2*ddw
+                    ! Compute the dissipation coefficients for this face.
 
-             ddw = w(i,j+1,k,irho)*w(i,j+1,k,ivz) &
-                  - w(i,j,k,irho)*w(i,j,k,ivz)
-             drw = dis2*ddw
+                    ppor = zero
+                    if (porJ(i, j, k) == normalFlux) ppor = one
 
-             ddw = w(i,j+1,k,irhoE) - w(i,j,k,irhoE)
-             dre = dis2*ddw
+                    dis2 = fis2 * ppor * min(dpMax, max(dp1, dp2)) + sigma * fis4 * ppor
 
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
+                    ! Construct the vector of the first and third differences
+                    ! multiplied by the appropriate constants.
 
-             if( correctForK ) then
-                ddw = w(i,j+1,k,irho)*w(i,j+1,k,itu1) &
-                     - w(i,j,k,irho)*w(i,j,k,itu1)
-                drk = dis2*ddw
+                    ddw = w(i, j + 1, k, irho) - w(i, j, k, irho)
+                    dr = dis2 * ddw
 
-                kAvg = half*(w(i,j,k,itu1) + w(i,j+1,k,itu1))
-             else
-                drk   = zero
-                kAvg = zero
-             endif
+                    ddw = w(i, j + 1, k, irho) * w(i, j + 1, k, ivx) &
+                          - w(i, j, k, irho) * w(i, j, k, ivx)
+                    dru = dis2 * ddw
 
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
+                    ddw = w(i, j + 1, k, irho) * w(i, j + 1, k, ivy) &
+                          - w(i, j, k, irho) * w(i, j, k, ivy)
+                    drv = dis2 * ddw
 
-             gammaAvg = half*(gamma(i,j+1,k) + gamma(i,j,k))
-             gm1      = gammaAvg - one
-             ovgm1    = one/gm1
-             gm53     = gammaAvg - five*third
+                    ddw = w(i, j + 1, k, irho) * w(i, j + 1, k, ivz) &
+                          - w(i, j, k, irho) * w(i, j, k, ivz)
+                    drw = dis2 * ddw
 
-             ! Compute the average state at the interface.
+                    ddw = w(i, j + 1, k, irhoE) - w(i, j, k, irhoE)
+                    dre = dis2 * ddw
 
-             uAvg  = half*(w(i,j+1,k,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i,j+1,k,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i,j+1,k,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i,j+1,k)*p(i,j+1,k)/w(i,j+1,k,irho) &
-                  +       gamma(i,j,  k)*p(i,j,  k)/w(i,j,  k,irho))
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
 
-             sx = sj(i,j,k,1); sy = sj(i,j,k,2); sz = sj(i,j,k,3)
-             area = sqrt(sx**2 + sy**2 + sz**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = sx*tmp
-             sy   = sy*tmp
-             sz   = sz*tmp
+                    if (correctForK) then
+                        ddw = w(i, j + 1, k, irho) * w(i, j + 1, k, itu1) &
+                              - w(i, j, k, irho) * w(i, j, k, itu1)
+                        drk = dis2 * ddw
 
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
+                        kAvg = half * (w(i, j, k, itu1) + w(i, j + 1, k, itu1))
+                    else
+                        drk = zero
+                        kAvg = zero
+                    end if
 
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-             if( addGridVelocities ) sface = sFaceJ(i,j,k)*tmp
+                    gammaAvg = half * (gamma(i, j + 1, k) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
+                    ! Compute the average state at the interface.
 
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
+                    uAvg = half * (w(i, j + 1, k, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i, j + 1, k, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i, j + 1, k, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i, j + 1, k) * p(i, j + 1, k) / w(i, j + 1, k, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-             rrad = lam3 + aAvg
+                    sx = sj(i, j, k, 1); sy = sj(i, j, k, 2); sz = sj(i, j, k, 3)
+                    area = sqrt(sx**2 + sy**2 + sz**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sx * tmp
+                    sy = sy * tmp
+                    sz = sz * tmp
 
-             lam1 = max(lam1,epsAcoustic*rrad)
-             lam2 = max(lam2,epsAcoustic*rrad)
-             lam3 = max(lam3,epsShear*rrad)
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-             lam1 = lam1*area
-             lam2 = lam2*area
-             lam3 = lam3*area
+                    if (addGridVelocities) sface = sFaceJ(i, j, k) * tmp
 
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
+                    rrad = lam3 + aAvg
 
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
+                    lam1 = max(lam1, epsAcoustic * rrad)
+                    lam2 = max(lam2, epsAcoustic * rrad)
+                    lam3 = max(lam3, epsShear * rrad)
 
-             ! Compute and scatter the dissipative flux.
-             ! Density.
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-             fs               = lam3*dr  + abv6
-             fw(i,j+1,k,irho) = fw(i,j+1,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    lam1 = lam1 * area
+                    lam2 = lam2 * area
+                    lam3 = lam3 * area
 
-             ! X-momentum.
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i,j+1,k,imx) = fw(i,j+1,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-             ! Y-momentum.
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i,j+1,k,imy) = fw(i,j+1,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-             ! Z-momentum.
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i,j+1,k,imz) = fw(i,j+1,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    fs = lam3 * dr + abv6
+                    fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             ! Energy.
+                    ! X-momentum.
 
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             ! Set dp1 to dp2 for the next face.
+                    ! Y-momentum.
 
-             dp1 = dp2
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-          enddo
-       enddo
-    enddo
-    !
-    !       Dissipative fluxes in the k-direction.
-    !
-    do j=2,jl
-       do i=2,il
+                    ! Z-momentum.
 
-          ! Compute the pressure sensor in the first cell, which
-          ! is a halo cell.
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-          dp1 = abs((shockSensor(i,j,2) - two*shockSensor(i,j,1) + shockSensor(i,j,0))        &
-               /     (omega*(shockSensor(i,j,2) + two*shockSensor(i,j,1) + shockSensor(i,j,0)) &
-               +      oneMinOmega*(abs(shockSensor(i,j,2)-shockSensor(i,j,1))        &
-               +                   abs(shockSensor(i,j,1)-shockSensor(i,j,0))) + plim))
+                    ! Energy.
 
-          ! Loop in k-direction.
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-          do k=1,kl
+                    ! Set dp1 to dp2 for the next face.
 
-             ! Compute the pressure sensor in the cell to the right
-             ! of the face.
+                    dp1 = dp2
+
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the k-direction.
+        !
+        do j = 2, jl
+            do i = 2, il
+
+                ! Compute the pressure sensor in the first cell, which
+                ! is a halo cell.
 
-             dp2 = abs((shockSensor(i,j,k+2) - two*shockSensor(i,j,k+1) + shockSensor(i,j,k))        &
-                  /     (omega*(shockSensor(i,j,k+2) + two*shockSensor(i,j,k+1) + shockSensor(i,j,k)) &
-                  +      oneMinOmega*(abs(shockSensor(i,j,k+2) - shockSensor(i,j,k+1))      &
-                  +                   abs(shockSensor(i,j,k+1) - shockSensor(i,j,k))) + plim))
+                dp1 = abs((shockSensor(i, j, 2) - two * shockSensor(i, j, 1) + shockSensor(i, j, 0)) &
+                          / (omega * (shockSensor(i, j, 2) + two * shockSensor(i, j, 1) + shockSensor(i, j, 0)) &
+                             + oneMinOmega * (abs(shockSensor(i, j, 2) - shockSensor(i, j, 1)) &
+                                              + abs(shockSensor(i, j, 1) - shockSensor(i, j, 0))) + plim))
 
-             ! Compute the dissipation coefficients for this face.
+                ! Loop in k-direction.
 
-             ppor = zero
-             if(porK(i,j,k) == normalFlux) ppor = one
+                do k = 1, kl
 
-             dis2 = fis2*ppor*min(dpMax,max(dp1,dp2))+sigma*fis4*ppor
+                    ! Compute the pressure sensor in the cell to the right
+                    ! of the face.
 
-             ! Construct the vector of the first and third differences
-             ! multiplied by the appropriate constants.
+                    dp2 = abs((shockSensor(i, j, k + 2) - two * shockSensor(i, j, k + 1) + shockSensor(i, j, k)) &
+                         / (omega * (shockSensor(i, j, k + 2) + two * shockSensor(i, j, k + 1) + shockSensor(i, j, k)) &
+                                 + oneMinOmega * (abs(shockSensor(i, j, k + 2) - shockSensor(i, j, k + 1)) &
+                                                  + abs(shockSensor(i, j, k + 1) - shockSensor(i, j, k))) + plim))
 
-             ddw = w(i,j,k+1,irho) - w(i,j,k,irho)
-             dr  = dis2*ddw
+                    ! Compute the dissipation coefficients for this face.
 
-             ddw = w(i,j,k+1,irho)*w(i,j,k+1,ivx) &
-                  - w(i,j,k,irho)*w(i,j,k,ivx)
-             dru = dis2*ddw
+                    ppor = zero
+                    if (porK(i, j, k) == normalFlux) ppor = one
 
-             ddw = w(i,j,k+1,irho)*w(i,j,k+1,ivy) &
-                  - w(i,j,k,irho)*w(i,j,k,ivy)
-             drv = dis2*ddw
+                    dis2 = fis2 * ppor * min(dpMax, max(dp1, dp2)) + sigma * fis4 * ppor
 
-             ddw = w(i,j,k+1,irho)*w(i,j,k+1,ivz) &
-                  - w(i,j,k,irho)*w(i,j,k,ivz)
-             drw = dis2*ddw
+                    ! Construct the vector of the first and third differences
+                    ! multiplied by the appropriate constants.
 
-             ddw = w(i,j,k+1,irhoE) - w(i,j,k,irhoE)
-             dre = dis2*ddw
+                    ddw = w(i, j, k + 1, irho) - w(i, j, k, irho)
+                    dr = dis2 * ddw
 
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
+                    ddw = w(i, j, k + 1, irho) * w(i, j, k + 1, ivx) &
+                          - w(i, j, k, irho) * w(i, j, k, ivx)
+                    dru = dis2 * ddw
 
-             if( correctForK ) then
-                ddw = w(i,j,k+1,irho)*w(i,j,k+1,itu1) &
-                     - w(i,j,k,irho)*w(i,j,k,itu1)
-                drk = dis2*ddw
-                kAvg = half*(w(i,j,k+1,itu1) + w(i,j,k,itu1))
-             else
-                drk   = zero
-                kAvg = zero
-             endif
+                    ddw = w(i, j, k + 1, irho) * w(i, j, k + 1, ivy) &
+                          - w(i, j, k, irho) * w(i, j, k, ivy)
+                    drv = dis2 * ddw
 
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
+                    ddw = w(i, j, k + 1, irho) * w(i, j, k + 1, ivz) &
+                          - w(i, j, k, irho) * w(i, j, k, ivz)
+                    drw = dis2 * ddw
 
-             gammaAvg = half*(gamma(i,j,k+1) + gamma(i,j,k))
-             gm1      = gammaAvg - one
-             ovgm1    = one/gm1
-             gm53     = gammaAvg - five*third
+                    ddw = w(i, j, k + 1, irhoE) - w(i, j, k, irhoE)
+                    dre = dis2 * ddw
 
-             ! Compute the average state at the interface.
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
 
-             uAvg  = half*(w(i,j,k+1,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i,j,k+1,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i,j,k+1,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i,j,k+1)*p(i,j,k+1)/w(i,j,k+1,irho) &
-                  +       gamma(i,j,k)  *p(i,j,k)  /w(i,j,k,  irho))
+                    if (correctForK) then
+                        ddw = w(i, j, k + 1, irho) * w(i, j, k + 1, itu1) &
+                              - w(i, j, k, irho) * w(i, j, k, itu1)
+                        drk = dis2 * ddw
+                        kAvg = half * (w(i, j, k + 1, itu1) + w(i, j, k, itu1))
+                    else
+                        drk = zero
+                        kAvg = zero
+                    end if
 
-             sx = sk(i,j,k,1); sy = sk(i,j,k,2); sz = sk(i,j,k,3)
-             area = sqrt(sx**2 + sy**2 + sz**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = sx*tmp
-             sy   = sy*tmp
-             sz   = sz*tmp
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
+                    gammaAvg = half * (gamma(i, j, k + 1) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
+                    ! Compute the average state at the interface.
 
-             if( addGridVelocities ) sface = sFaceK(i,j,k)*tmp
+                    uAvg = half * (w(i, j, k + 1, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i, j, k + 1, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i, j, k + 1, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i, j, k + 1) * p(i, j, k + 1) / w(i, j, k + 1, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
+                    sx = sk(i, j, k, 1); sy = sk(i, j, k, 2); sz = sk(i, j, k, 3)
+                    area = sqrt(sx**2 + sy**2 + sz**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sx * tmp
+                    sy = sy * tmp
+                    sz = sz * tmp
 
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-             rrad = lam3 + aAvg
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-             lam1 = max(lam1,epsAcoustic*rrad)
-             lam2 = max(lam2,epsAcoustic*rrad)
-             lam3 = max(lam3,epsShear*rrad)
+                    if (addGridVelocities) sface = sFaceK(i, j, k) * tmp
 
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-             lam1 = lam1*area
-             lam2 = lam2*area
-             lam3 = lam3*area
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
+                    rrad = lam3 + aAvg
 
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
+                    lam1 = max(lam1, epsAcoustic * rrad)
+                    lam2 = max(lam2, epsAcoustic * rrad)
+                    lam3 = max(lam3, epsShear * rrad)
 
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
+                    lam1 = lam1 * area
+                    lam2 = lam2 * area
+                    lam3 = lam3 * area
 
-             ! Compute and scatter the dissipative flux.
-             ! Density.
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
-             fs               = lam3*dr  + abv6
-             fw(i,j,k+1,irho) = fw(i,j,k+1,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-             ! X-momentum.
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i,j,k+1,imx) = fw(i,j,k+1,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-             ! Y-momentum.
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i,j,k+1,imy) = fw(i,j,k+1,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    fs = lam3 * dr + abv6
+                    fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             ! Z-momentum.
+                    ! X-momentum.
 
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i,j,k+1,imz) = fw(i,j,k+1,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             ! Energy.
+                    ! Y-momentum.
 
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             ! Set dp1 to dp2 for the next face.
+                    ! Z-momentum.
 
-             dp1 = dp2
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-          enddo
-       enddo
-    enddo
+                    ! Energy.
 
-  end subroutine inviscidDissFluxMatrixApprox
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-  ! ----------------------------------------------------------------------
-  !                                                                      |
-  !                    No Tapenade Routine below this line               |
-  !                                                                      |
-  ! ----------------------------------------------------------------------
+                    ! Set dp1 to dp2 for the next face.
+
+                    dp1 = dp2
+
+                end do
+            end do
+        end do
+
+    end subroutine inviscidDissFluxMatrixApprox
+
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 
 #ifndef  USE_TAPENADE
 
-  subroutine inviscidDissFluxScalarCoarse
-    !
-    !       inviscidDissFluxScalarCoarse computes the coarse grid, i.e.
-    !       1st order, artificial dissipation flux for the scalar
-    !       dissipation scheme for a given block. Therefore it is assumed
-    !       that the pointers in blockPointers already point to the
-    !       correct block.
-    !
-    use constants
-    use blockPointers, only : il, jl, kl, ie, je, ke, w, p, &
-         porI, porJ, porK, fw, radI, radJ, radK, gamma
-    use inputDiscretization, only: vis2Coarse
-    use iteration, only : rFil
-
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-
-    real(kind=realType) :: sfil, fis0, dis0, ppor, fs, rhoi
-
-    ! Check if rFil == 0. If so, the dissipative flux needs not to
-    ! be computed.
-
-    if(abs(rFil) < thresholdReal) return
-
-    ! Set a couple of constants for the scheme.
-
-    fis0 = rFil*vis2Coarse
-    sfil = one - rFil
-
-    ! Replace the total energy by rho times the total enthalpy.
-    ! In this way the numerical solution is total enthalpy preserving
-    ! for the steady Euler equations. Also replace the velocities by
-    ! the momentum. As only first order halo's are needed, only include
-    ! the first order halo's.
-
-    do k=1,ke
-       do j=1,je
-          do i=1,ie
-             w(i,j,k,ivx)   = w(i,j,k,irho)*w(i,j,k,ivx)
-             w(i,j,k,ivy)   = w(i,j,k,irho)*w(i,j,k,ivy)
-             w(i,j,k,ivz)   = w(i,j,k,irho)*w(i,j,k,ivz)
-             w(i,j,k,irhoE) = w(i,j,k,irhoE) + p(i,j,k)
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the dissipative residual to a certain times,
-    ! possibly zero, the previously stored value. Owned cells
-    ! only, because the halo values do not matter.
+    subroutine inviscidDissFluxScalarCoarse
+        !
+        !       inviscidDissFluxScalarCoarse computes the coarse grid, i.e.
+        !       1st order, artificial dissipation flux for the scalar
+        !       dissipation scheme for a given block. Therefore it is assumed
+        !       that the pointers in blockPointers already point to the
+        !       correct block.
+        !
+        use constants
+        use blockPointers, only: il, jl, kl, ie, je, ke, w, p, &
+                                 porI, porJ, porK, fw, radI, radJ, radK, gamma
+        use inputDiscretization, only: vis2Coarse
+        use iteration, only: rFil
+
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
+
+        real(kind=realType) :: sfil, fis0, dis0, ppor, fs, rhoi
+
+        ! Check if rFil == 0. If so, the dissipative flux needs not to
+        ! be computed.
+
+        if (abs(rFil) < thresholdReal) return
+
+        ! Set a couple of constants for the scheme.
+
+        fis0 = rFil * vis2Coarse
+        sfil = one - rFil
+
+        ! Replace the total energy by rho times the total enthalpy.
+        ! In this way the numerical solution is total enthalpy preserving
+        ! for the steady Euler equations. Also replace the velocities by
+        ! the momentum. As only first order halo's are needed, only include
+        ! the first order halo's.
+
+        do k = 1, ke
+            do j = 1, je
+                do i = 1, ie
+                    w(i, j, k, ivx) = w(i, j, k, irho) * w(i, j, k, ivx)
+                    w(i, j, k, ivy) = w(i, j, k, irho) * w(i, j, k, ivy)
+                    w(i, j, k, ivz) = w(i, j, k, irho) * w(i, j, k, ivz)
+                    w(i, j, k, irhoE) = w(i, j, k, irhoE) + p(i, j, k)
+                end do
+            end do
+        end do
+
+        ! Initialize the dissipative residual to a certain times,
+        ! possibly zero, the previously stored value. Owned cells
+        ! only, because the halo values do not matter.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    fw(i, j, k, irho) = sfil * fw(i, j, k, irho)
+                    fw(i, j, k, imx) = sfil * fw(i, j, k, imx)
+                    fw(i, j, k, imy) = sfil * fw(i, j, k, imy)
+                    fw(i, j, k, imz) = sfil * fw(i, j, k, imz)
+                    fw(i, j, k, irhoE) = sfil * fw(i, j, k, irhoE)
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 1, il
+
+                    ! Compute the dissipation coefficients for this face.
+
+                    ppor = zero
+                    if (porI(i, j, k) == normalFlux) ppor = half
+
+                    dis0 = fis0 * ppor * (radI(i, j, k) + radI(i + 1, j, k))
+
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
+
+                    fs = dis0 * (w(i + 1, j, k, irho) - w(i, j, k, irho))
+                    fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
+
+                    ! X-momentum.
+
+                    fs = dis0 * (w(i + 1, j, k, ivx) - w(i, j, k, ivx))
+                    fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
+
+                    ! Y-momentum.
+
+                    fs = dis0 * (w(i + 1, j, k, ivy) - w(i, j, k, ivy))
+                    fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
+
+                    ! Z-momentum.
+
+                    fs = dis0 * (w(i + 1, j, k, ivz) - w(i, j, k, ivz))
+                    fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
+
+                    ! Energy.
+
+                    fs = dis0 * (w(i + 1, j, k, irhoE) - w(i, j, k, irhoE))
+                    fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
+
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the j-direction.
+        !
+        do k = 2, kl
+            do j = 1, jl
+                do i = 2, il
+
+                    ! Compute the dissipation coefficients for this face.
+
+                    ppor = zero
+                    if (porJ(i, j, k) == normalFlux) ppor = half
+
+                    dis0 = fis0 * ppor * (radJ(i, j, k) + radJ(i, j + 1, k))
 
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             fw(i,j,k,irho)  = sfil*fw(i,j,k,irho)
-             fw(i,j,k,imx)   = sfil*fw(i,j,k,imx)
-             fw(i,j,k,imy)   = sfil*fw(i,j,k,imy)
-             fw(i,j,k,imz)   = sfil*fw(i,j,k,imz)
-             fw(i,j,k,irhoE) = sfil*fw(i,j,k,irhoE)
-          enddo
-       enddo
-    enddo
-    !
-    !       Dissipative fluxes in the i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=1,il
-
-             ! Compute the dissipation coefficients for this face.
-
-             ppor = zero
-             if(porI(i,j,k) == normalFlux) ppor = half
-
-             dis0 = fis0*ppor*(radI(i,j,k) + radI(i+1,j,k))
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             ! Compute and scatter the dissipative flux.
-             ! Density.
+                    fs = dis0 * (w(i, j + 1, k, irho) - w(i, j, k, irho))
+                    fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             fs = dis0*(w(i+1,j,k,irho) - w(i,j,k,irho))
-             fw(i+1,j,k,irho) = fw(i+1,j,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ! X-momentum.
 
-             ! X-momentum.
+                    fs = dis0 * (w(i, j + 1, k, ivx) - w(i, j, k, ivx))
+                    fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             fs = dis0*(w(i+1,j,k,ivx) - w(i,j,k,ivx))
-             fw(i+1,j,k,imx) = fw(i+1,j,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    ! Y-momentum.
 
-             ! Y-momentum.
+                    fs = dis0 * (w(i, j + 1, k, ivy) - w(i, j, k, ivy))
+                    fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             fs = dis0*(w(i+1,j,k,ivy) - w(i,j,k,ivy))
-             fw(i+1,j,k,imy) = fw(i+1,j,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    ! Z-momentum.
 
-             ! Z-momentum.
+                    fs = dis0 * (w(i, j + 1, k, ivz) - w(i, j, k, ivz))
+                    fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             fs = dis0*(w(i+1,j,k,ivz) - w(i,j,k,ivz))
-             fw(i+1,j,k,imz) = fw(i+1,j,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    ! Energy
 
-             ! Energy.
+                    fs = dis0 * (w(i, j + 1, k, irhoE) - w(i, j, k, irhoE))
+                    fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
 
-             fs = dis0*(w(i+1,j,k,irhoE) - w(i,j,k,irhoE))
-             fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the k-direction.
+        !
+        do k = 1, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the dissipation coefficients for this face.
+
+                    ppor = zero
+                    if (porK(i, j, k) == normalFlux) ppor = half
+
+                    dis0 = fis0 * ppor * (radK(i, j, k) + radK(i, j, k + 1))
+
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
+
+                    fs = dis0 * (w(i, j, k + 1, irho) - w(i, j, k, irho))
+                    fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
+
+                    ! X-momentum.
+
+                    fs = dis0 * (w(i, j, k + 1, ivx) - w(i, j, k, ivx))
+                    fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
+
+                    ! Y-momentum.
+
+                    fs = dis0 * (w(i, j, k + 1, ivy) - w(i, j, k, ivy))
+                    fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
+
+                    ! Z-momentum.
+
+                    fs = dis0 * (w(i, j, k + 1, ivz) - w(i, j, k, ivz))
+                    fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
+
+                    ! Energy
+
+                    fs = dis0 * (w(i, j, k + 1, irhoE) - w(i, j, k, irhoE))
+                    fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
+
+                end do
+            end do
+        end do
+
+        ! Replace rho times the total enthalpy by the total energy and
+        ! store the velocities again instead of the momentum. As only
+        ! the first halo cells are included, this must be done here again.
+
+        do k = 1, ke
+            do j = 1, je
+                do i = 1, ie
+                    rhoi = one / w(i, j, k, irho)
+                    w(i, j, k, ivx) = w(i, j, k, ivx) * rhoi
+                    w(i, j, k, ivy) = w(i, j, k, ivy) * rhoi
+                    w(i, j, k, ivz) = w(i, j, k, ivz) * rhoi
+
+                    w(i, j, k, irhoE) = w(i, j, k, irhoE) - p(i, j, k)
+                end do
+            end do
+        end do
+
+    end subroutine inviscidDissFluxScalarCoarse
+
+    subroutine inviscidDissFluxMatrixCoarse
+        !
+        !       inviscidDissFluxMatrixCoarse computes the matrix artificial
+        !       dissipation term. Instead of the spectral radius, as used in
+        !       the scalar dissipation scheme, the absolute value of the flux
+        !       jacobian is used. This routine is used on the coarser grids in
+        !       the multigrid cycle and only computes the first order
+        !       dissipation term. It is assumed that the pointers in
+        !       blockPointers already point to the correct block.
+        !
+        use constants
+        use blockPointers, only: il, jl, kl, ie, je, ke, ib, jb, kb, w, p, &
+                                 porI, porJ, porK, fw, gamma, si, sj, sk, &
+                                 addGridVelocities, sFaceI, sfaceJ, sFacek
+        use inputDiscretization, only: vis2Coarse
+        use inputPhysics, only: equations
+        use iteration, only: rFil
+        use utils, only: getCorrectForK
+        implicit none
+        !
+        !      Local parameters.
+        !
+        real(kind=realType), parameter :: epsAcoustic = 0.25_realType
+        real(kind=realType), parameter :: epsShear = 0.025_realType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
+
+        real(kind=realType) :: sfil, fis0, dis0, ppor, rrad, sface
+        real(kind=realType) :: gammaAvg, gm1, ovgm1, gm53, tmp, fs
+        real(kind=realType) :: dr, dru, drv, drw, dre, drk, sx, sy, sz
+        real(kind=realType) :: uAvg, vAvg, wAvg, a2Avg, aAvg, hAvg
+        real(kind=realType) :: alphaAvg, unAvg, ovaAvg, ova2Avg
+        real(kind=realType) :: kAvg, lam1, lam2, lam3, area
+        real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
+        logical :: correctForK
+
+        ! Check if rFil == 0. If so, the dissipative flux needs not to
+        ! be computed.
+
+        if (abs(rFil) < thresholdReal) return
+
+        ! Determine whether or not the total energy must be corrected
+        ! for the presence of the turbulent kinetic energy.
+
+        correctForK = getCorrectForK()
+
+        ! Initialize sface to zero. This value will be used if the
+        ! block is not moving.
+
+        sface = zero
+
+        ! Set a couple of constants for the scheme.
+
+        fis0 = rFil * vis2Coarse
+        sfil = one - rFil
+
+        ! Initialize the dissipative residual to a certain times,
+        ! possibly zero, the previously stored value. Owned cells
+        ! only, because the halo values do not matter.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    fw(i, j, k, irho) = sfil * fw(i, j, k, irho)
+                    fw(i, j, k, imx) = sfil * fw(i, j, k, imx)
+                    fw(i, j, k, imy) = sfil * fw(i, j, k, imy)
+                    fw(i, j, k, imz) = sfil * fw(i, j, k, imz)
+                    fw(i, j, k, irhoE) = sfil * fw(i, j, k, irhoE)
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 1, il
+
+                    ! Compute the dissipation coefficient for this face.
+
+                    ppor = zero
+                    if (porI(i, j, k) == normalFlux) ppor = one
+
+                    dis0 = fis0 * ppor
+
+                    ! Construct the vector of the first differences multiplied
+                    ! by dis0.
+
+                    dr = dis0 * (w(i + 1, j, k, irho) - w(i, j, k, irho))
+                    dru = dis0 * (w(i + 1, j, k, irho) * w(i + 1, j, k, ivx) &
+                                  - w(i, j, k, irho) * w(i, j, k, ivx))
+                    drv = dis0 * (w(i + 1, j, k, irho) * w(i + 1, j, k, ivy) &
+                                  - w(i, j, k, irho) * w(i, j, k, ivy))
+                    drw = dis0 * (w(i + 1, j, k, irho) * w(i + 1, j, k, ivz) &
+                                  - w(i, j, k, irho) * w(i, j, k, ivz))
+                    dre = dis0 * (w(i + 1, j, k, irhoE) - w(i, j, k, irhoE))
+
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
+
+                    if (correctForK) then
+                        drk = dis0 * (w(i + 1, j, k, irho) * w(i + 1, j, k, itu1) &
+                                      - w(i, j, k, irho) * w(i, j, k, itu1))
+                        kAvg = half * (w(i + 1, j, k, itu1) + w(i, j, k, itu1))
+                    else
+                        drk = zero
+                        kAvg = zero
+                    end if
+
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-          enddo
-       enddo
-    enddo
-    !
-    !       Dissipative fluxes in the j-direction.
-    !
-    do k=2,kl
-       do j=1,jl
-          do i=2,il
+                    gammaAvg = half * (gamma(i + 1, j, k) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-             ! Compute the dissipation coefficients for this face.
+                    ! Compute the average state at the interface.
 
-             ppor = zero
-             if(porJ(i,j,k) == normalFlux) ppor = half
+                    uAvg = half * (w(i + 1, j, k, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i + 1, j, k, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i + 1, j, k, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i + 1, j, k) * p(i + 1, j, k) / w(i + 1, j, k, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-             dis0 = fis0*ppor*(radJ(i,j,k) + radJ(i,j+1,k))
+                    sx = si(i, j, k, 1); sy = si(i, j, k, 2); sz = si(i, j, k, 3)
+                    area = sqrt(sx**2 + sy**2 + sz**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sx * tmp
+                    sy = sy * tmp
+                    sz = sz * tmp
 
-             ! Compute and scatter the dissipative flux.
-             ! Density.
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-             fs = dis0*(w(i,j+1,k,irho) - w(i,j,k,irho))
-             fw(i,j+1,k,irho) = fw(i,j+1,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-             ! X-momentum.
+                    if (addGridVelocities) sface = sFaceI(i, j, k) * tmp
 
-             fs = dis0*(w(i,j+1,k,ivx) - w(i,j,k,ivx))
-             fw(i,j+1,k,imx) = fw(i,j+1,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
-
-             ! Y-momentum.
-
-             fs = dis0*(w(i,j+1,k,ivy) - w(i,j,k,ivy))
-             fw(i,j+1,k,imy) = fw(i,j+1,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
-
-             ! Z-momentum.
-
-             fs = dis0*(w(i,j+1,k,ivz) - w(i,j,k,ivz))
-             fw(i,j+1,k,imz) = fw(i,j+1,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
-
-             ! Energy
-
-             fs = dis0*(w(i,j+1,k,irhoE) - w(i,j,k,irhoE))
-             fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Dissipative fluxes in the k-direction.
-    !
-    do k=1,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the dissipation coefficients for this face.
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-             ppor = zero
-             if(porK(i,j,k) == normalFlux) ppor = half
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-             dis0 = fis0*ppor*(radK(i,j,k) + radK(i,j,k+1))
+                    rrad = lam3 + aAvg
 
-             ! Compute and scatter the dissipative flux.
-             ! Density.
+                    lam1 = max(lam1, epsAcoustic * rrad)
+                    lam2 = max(lam2, epsAcoustic * rrad)
+                    lam3 = max(lam3, epsShear * rrad)
 
-             fs = dis0*(w(i,j,k+1,irho) - w(i,j,k,irho))
-             fw(i,j,k+1,irho) = fw(i,j,k+1,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-             ! X-momentum.
+                    lam1 = lam1 * area
+                    lam2 = lam2 * area
+                    lam3 = lam3 * area
 
-             fs = dis0*(w(i,j,k+1,ivx) - w(i,j,k,ivx))
-             fw(i,j,k+1,imx) = fw(i,j,k+1,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
-             ! Y-momentum.
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-             fs = dis0*(w(i,j,k+1,ivy) - w(i,j,k,ivy))
-             fw(i,j,k+1,imy) = fw(i,j,k+1,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-             ! Z-momentum.
-
-             fs = dis0*(w(i,j,k+1,ivz) - w(i,j,k,ivz))
-             fw(i,j,k+1,imz) = fw(i,j,k+1,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
-
-             ! Energy
-
-             fs = dis0*(w(i,j,k+1,irhoE) - w(i,j,k,irhoE))
-             fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
-
-          enddo
-       enddo
-    enddo
-
-    ! Replace rho times the total enthalpy by the total energy and
-    ! store the velocities again instead of the momentum. As only
-    ! the first halo cells are included, this must be done here again.
-
-    do k=1,ke
-       do j=1,je
-          do i=1,ie
-             rhoi = one/w(i,j,k,irho)
-             w(i,j,k,ivx) = w(i,j,k,ivx)*rhoi
-             w(i,j,k,ivy) = w(i,j,k,ivy)*rhoi
-             w(i,j,k,ivz) = w(i,j,k,ivz)*rhoi
-
-             w(i,j,k,irhoE) = w(i,j,k,irhoE) - p(i,j,k)
-          enddo
-       enddo
-    enddo
-
-  end subroutine inviscidDissFluxScalarCoarse
-
-  subroutine inviscidDissFluxMatrixCoarse
-    !
-    !       inviscidDissFluxMatrixCoarse computes the matrix artificial
-    !       dissipation term. Instead of the spectral radius, as used in
-    !       the scalar dissipation scheme, the absolute value of the flux
-    !       jacobian is used. This routine is used on the coarser grids in
-    !       the multigrid cycle and only computes the first order
-    !       dissipation term. It is assumed that the pointers in
-    !       blockPointers already point to the correct block.
-    !
-    use constants
-    use blockPointers, only : il, jl, kl, ie, je, ke, ib, jb, kb, w, p, &
-         porI, porJ, porK, fw, gamma, si, sj, sk, &
-         addGridVelocities, sFaceI, sfaceJ, sFacek
-    use inputDiscretization, only: vis2Coarse
-    use inputPhysics, only : equations
-    use iteration, only : rFil
-    use utils, only : getCorrectForK
-    implicit none
-    !
-    !      Local parameters.
-    !
-    real(kind=realType), parameter :: epsAcoustic = 0.25_realType
-    real(kind=realType), parameter :: epsShear    = 0.025_realType
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-
-    real(kind=realType) :: sfil, fis0, dis0, ppor, rrad, sface
-    real(kind=realType) :: gammaAvg, gm1, ovgm1, gm53, tmp, fs
-    real(kind=realType) :: dr, dru, drv, drw, dre, drk, sx, sy, sz
-    real(kind=realType) :: uAvg, vAvg, wAvg, a2Avg, aAvg, hAvg
-    real(kind=realType) :: alphaAvg, unAvg, ovaAvg, ova2Avg
-    real(kind=realType) :: kAvg, lam1, lam2, lam3, area
-    real(kind=realType) :: abv1, abv2, abv3, abv4, abv5, abv6, abv7
-    logical :: correctForK
-
-    ! Check if rFil == 0. If so, the dissipative flux needs not to
-    ! be computed.
-
-    if(abs(rFil) < thresholdReal) return
-
-    ! Determine whether or not the total energy must be corrected
-    ! for the presence of the turbulent kinetic energy.
-
-    correctForK = getCorrectForK()
-
-    ! Initialize sface to zero. This value will be used if the
-    ! block is not moving.
-
-    sface = zero
-
-    ! Set a couple of constants for the scheme.
-
-    fis0 = rFil*vis2Coarse
-    sfil = one - rFil
-
-    ! Initialize the dissipative residual to a certain times,
-    ! possibly zero, the previously stored value. Owned cells
-    ! only, because the halo values do not matter.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             fw(i,j,k,irho)  = sfil*fw(i,j,k,irho)
-             fw(i,j,k,imx)   = sfil*fw(i,j,k,imx)
-             fw(i,j,k,imy)   = sfil*fw(i,j,k,imy)
-             fw(i,j,k,imz)   = sfil*fw(i,j,k,imz)
-             fw(i,j,k,irhoE) = sfil*fw(i,j,k,irhoE)
-          enddo
-       enddo
-    enddo
-    !
-    !       Dissipative fluxes in the i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=1,il
-
-             ! Compute the dissipation coefficient for this face.
-
-             ppor = zero
-             if(porI(i,j,k) == normalFlux) ppor = one
-
-             dis0 = fis0*ppor
-
-             ! Construct the vector of the first differences multiplied
-             ! by dis0.
-
-             dr  = dis0*(w(i+1,j,k,irho) - w(i,j,k,irho))
-             dru = dis0*(w(i+1,j,k,irho)*w(i+1,j,k,ivx) &
-                  -       w(i,  j,k,irho)*w(i,  j,k,ivx))
-             drv = dis0*(w(i+1,j,k,irho)*w(i+1,j,k,ivy) &
-                  -       w(i,  j,k,irho)*w(i,j,k,ivy))
-             drw = dis0*(w(i+1,j,k,irho)*w(i+1,j,k,ivz) &
-                  -       w(i,  j,k,irho)*w(i,j,k,ivz))
-             dre = dis0*(w(i+1,j,k,irhoE) - w(i,j,k,irhoE))
-
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
-
-             if( correctForK ) then
-                drk  = dis0*(w(i+1,j,k,irho)*w(i+1,j,k,itu1) &
-                     -       w(i,  j,k,irho)*w(i,  j,k,itu1))
-                kAvg = half*(w(i+1,j,k,itu1) + w(i,j,k,itu1))
-             else
-                drk  = zero
-                kAvg = zero
-             endif
-
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
-
-             gammaAvg = half*(gamma(i+1,j,k) + gamma(i,j,k))
-             gm1      = gammaAvg - one
-             ovgm1    = one/gm1
-             gm53     = gammaAvg - five*third
-
-             ! Compute the average state at the interface.
-
-             uAvg  = half*(w(i+1,j,k,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i+1,j,k,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i+1,j,k,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i+1,j,k)*p(i+1,j,k)/w(i+1,j,k,irho) &
-                  +       gamma(i,  j,k)*p(i,  j,k)/w(i,  j,k,irho))
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-             sx = si(i,j,k,1); sy = si(i,j,k,2); sz = si(i,j,k,3)
-             area = sqrt(sx**2 + sy**2 + sz**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = sx*tmp
-             sy   = sy*tmp
-             sz   = sz*tmp
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
+                    fs = lam3 * dr + abv6
+                    fw(i + 1, j, k, irho) = fw(i + 1, j, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
-
-             if( addGridVelocities ) sface = sFaceI(i,j,k)*tmp
-
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
-
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
-
-             rrad = lam3 + aAvg
-
-             lam1 = max(lam1,epsAcoustic*rrad)
-             lam2 = max(lam2,epsAcoustic*rrad)
-             lam3 = max(lam3,epsShear*rrad)
-
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
-
-             lam1 = lam1*area
-             lam2 = lam2*area
-             lam3 = lam3*area
-
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
-
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
-
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
-
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
-
-             ! Compute and scatter the dissipative flux.
-             ! Density.
-
-             fs               = lam3*dr  + abv6
-             fw(i+1,j,k,irho) = fw(i+1,j,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
-
-             ! X-momentum.
-
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i+1,j,k,imx) = fw(i+1,j,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
-
-             ! Y-momentum.
-
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i+1,j,k,imy) = fw(i+1,j,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
-
-             ! Z-momentum.
-
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i+1,j,k,imz) = fw(i+1,j,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
-
-             ! Energy.
+                    ! X-momentum.
 
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i+1,j,k,irhoE) = fw(i+1,j,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i + 1, j, k, imx) = fw(i + 1, j, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-          enddo
-       enddo
-    enddo
-    !
-    !       Dissipative fluxes in the j-direction.
-    !
-    do k=2,kl
-       do j=1,jl
-          do i=2,il
+                    ! Y-momentum.
 
-             ! Compute the dissipation coefficient for this face.
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i + 1, j, k, imy) = fw(i + 1, j, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             ppor = zero
-             if(porJ(i,j,k) == normalFlux) ppor = one
+                    ! Z-momentum.
 
-             dis0 = fis0*ppor
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i + 1, j, k, imz) = fw(i + 1, j, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
 
-             ! Construct the vector of the first differences multiplied
-             ! by dis0.
+                    ! Energy.
 
-             dr  = dis0*(w(i,j+1,k,irho) - w(i,j,k,irho))
-             dru = dis0*(w(i,j+1,k,irho)*w(i,j+1,k,ivx) &
-                  -       w(i,j,  k,irho)*w(i,j,  k,ivx))
-             drv = dis0*(w(i,j+1,k,irho)*w(i,j+1,k,ivy) &
-                  -       w(i,j,  k,irho)*w(i,j,  k,ivy))
-             drw = dis0*(w(i,j+1,k,irho)*w(i,j+1,k,ivz) &
-                  -       w(i,j,  k,irho)*w(i,j,  k,ivz))
-             dre = dis0*(w(i,j+1,k,irhoE) - w(i,j,k,irhoE))
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i + 1, j, k, irhoE) = fw(i + 1, j, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
+
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the j-direction.
+        !
+        do k = 2, kl
+            do j = 1, jl
+                do i = 2, il
+
+                    ! Compute the dissipation coefficient for this face.
+
+                    ppor = zero
+                    if (porJ(i, j, k) == normalFlux) ppor = one
+
+                    dis0 = fis0 * ppor
+
+                    ! Construct the vector of the first differences multiplied
+                    ! by dis0.
+
+                    dr = dis0 * (w(i, j + 1, k, irho) - w(i, j, k, irho))
+                    dru = dis0 * (w(i, j + 1, k, irho) * w(i, j + 1, k, ivx) &
+                                  - w(i, j, k, irho) * w(i, j, k, ivx))
+                    drv = dis0 * (w(i, j + 1, k, irho) * w(i, j + 1, k, ivy) &
+                                  - w(i, j, k, irho) * w(i, j, k, ivy))
+                    drw = dis0 * (w(i, j + 1, k, irho) * w(i, j + 1, k, ivz) &
+                                  - w(i, j, k, irho) * w(i, j, k, ivz))
+                    dre = dis0 * (w(i, j + 1, k, irhoE) - w(i, j, k, irhoE))
+
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
 
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
+                    if (correctForK) then
+                        drk = dis0 * (w(i, j + 1, k, irho) * w(i, j + 1, k, itu1) &
+                                      - w(i, j, k, irho) * w(i, j, k, itu1))
+                        kAvg = half * (w(i, j + 1, k, itu1) + w(i, j, k, itu1))
+                    else
+                        drk = zero
+                        kAvg = zero
+                    end if
 
-             if( correctForK ) then
-                drk  = dis0*(w(i,j+1,k,irho)*w(i,j+1,k,itu1) &
-                     -       w(i,j,  k,irho)*w(i,j,  k,itu1))
-                kAvg = half*(w(i,j+1,k,itu1) + w(i,j,k,itu1))
-             else
-                drk  = zero
-                kAvg = zero
-             endif
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
+                    gammaAvg = half * (gamma(i, j + 1, k) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-             gammaAvg = half*(gamma(i,j+1,k) + gamma(i,j,k))
-             gm1      = gammaAvg - one
-             ovgm1    = one/gm1
-             gm53     = gammaAvg - five*third
+                    ! Compute the average state at the interface.
 
-             ! Compute the average state at the interface.
+                    uAvg = half * (w(i, j + 1, k, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i, j + 1, k, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i, j + 1, k, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i, j + 1, k) * p(i, j + 1, k) / w(i, j + 1, k, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-             uAvg  = half*(w(i,j+1,k,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i,j+1,k,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i,j+1,k,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i,j+1,k)*p(i,j+1,k)/w(i,j+1,k,irho) &
-                  +      gamma(i,j,  k)*p(i,j,  k)/w(i,j,  k,irho))
+                    sx = sj(i, j, k, 1); sy = sj(i, j, k, 2); sz = sj(i, j, k, 3)
+                    area = sqrt(sx**2 + sy**2 + sz**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sx * tmp
+                    sy = sy * tmp
+                    sz = sz * tmp
 
-             sx = sj(i,j,k,1); sy = sj(i,j,k,2); sz = sj(i,j,k,3)
-             area = sqrt(sx**2 + sy**2 + sz**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = sx*tmp
-             sy   = sy*tmp
-             sz   = sz*tmp
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
+                    if (addGridVelocities) sface = sFaceJ(i, j, k) * tmp
 
-             if( addGridVelocities ) sface = sFaceJ(i,j,k)*tmp
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
-
-             rrad = lam3 + aAvg
-
-             lam1 = max(lam1,epsAcoustic*rrad)
-             lam2 = max(lam2,epsAcoustic*rrad)
-             lam3 = max(lam3,epsShear*rrad)
-
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
-
-             lam1 = lam1*area
-             lam2 = lam2*area
-             lam3 = lam3*area
-
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
-
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
-
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
-
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
-
-             ! Compute and scatter the dissipative flux.
-             ! Density.
-
-             fs               = lam3*dr  + abv6
-             fw(i,j+1,k,irho) = fw(i,j+1,k,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
-
-             ! X-momentum.
-
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i,j+1,k,imx) = fw(i,j+1,k,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
-
-             ! Y-momentum.
-
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i,j+1,k,imy) = fw(i,j+1,k,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
-
-             ! Z-momentum.
-
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i,j+1,k,imz) = fw(i,j+1,k,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
-
-             ! Energy.
+                    rrad = lam3 + aAvg
 
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i,j+1,k,irhoE) = fw(i,j+1,k,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    lam1 = max(lam1, epsAcoustic * rrad)
+                    lam2 = max(lam2, epsAcoustic * rrad)
+                    lam3 = max(lam3, epsShear * rrad)
 
-          enddo
-       enddo
-    enddo
-    !
-    !       Dissipative fluxes in the k-direction.
-    !
-    do k=1,kl
-       do j=2,jl
-          do i=2,il
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-             ! Compute the dissipation coefficient for this face.
+                    lam1 = lam1 * area
+                    lam2 = lam2 * area
+                    lam3 = lam3 * area
 
-             ppor = zero
-             if(porK(i,j,k) == normalFlux) ppor = one
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
-             dis0 = fis0*ppor
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-             ! Construct the vector of the first differences multiplied
-             ! by dis0.
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-             dr  = dis0*(w(i,j,k+1,irho) - w(i,j,k,irho))
-             dru = dis0*(w(i,j,k+1,irho)*w(i,j,k+1,ivx) &
-                  -       w(i,j,k,  irho)*w(i,j,k,  ivx))
-             drv = dis0*(w(i,j,k+1,irho)*w(i,j,k+1,ivy) &
-                  -       w(i,j,k,  irho)*w(i,j,k,  ivy))
-             drw = dis0*(w(i,j,k+1,irho)*w(i,j,k+1,ivz) &
-                  -       w(i,j,k,  irho)*w(i,j,k,  ivz))
-             dre = dis0*(w(i,j,k+1,irhoE) - w(i,j,k,irhoE))
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-             ! In case a k-equation is present, compute the difference
-             ! of rhok and store the average value of k. If not present,
-             ! set both these values to zero, such that later on no
-             ! decision needs to be made anymore.
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             if( correctForK ) then
-                drk  = dis0*(w(i,j,k+1,irho)*w(i,j,k+1,itu1) &
-                     -       w(i,j,k,  irho)*w(i,j,k,  itu1))
-                kAvg = half*(w(i,j,k+1,itu1) + w(i,j,k,itu1))
-             else
-                drk  = zero
-                kAvg = zero
-             endif
+                    fs = lam3 * dr + abv6
+                    fw(i, j + 1, k, irho) = fw(i, j + 1, k, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             ! Compute the average value of gamma and compute some
-             ! expressions in which it occurs.
+                    ! X-momentum.
 
-             gammaAvg = half*(gamma(i,j,k+1) + gamma(i,j,k))
-             gm1       = gammaAvg - one
-             ovgm1     = one/gm1
-             gm53      = gammaAvg - five*third
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i, j + 1, k, imx) = fw(i, j + 1, k, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
 
-             ! Compute the average state at the interface.
+                    ! Y-momentum.
 
-             uAvg  = half*(w(i,j,k+1,ivx) + w(i,j,k,ivx))
-             vAvg  = half*(w(i,j,k+1,ivy) + w(i,j,k,ivy))
-             wAvg  = half*(w(i,j,k+1,ivz) + w(i,j,k,ivz))
-             a2Avg = half*(gamma(i,j,k+1)*p(i,j,k+1)/w(i,j,k+1,irho) &
-                  +       gamma(i,j,k)  *p(i,j,k)  /w(i,j,k,  irho))
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i, j + 1, k, imy) = fw(i, j + 1, k, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
 
-             sx = sk(i,j,k,1); sy = sk(i,j,k,2); sz = sk(i,j,k,3)
-             area = sqrt(sx**2 + sy**2 + sz**2)
-             tmp  = one/max(1.e-25_realType,area)
-             sx   = sx*tmp
-             sy   = sy*tmp
-             sz   = sz*tmp
+                    ! Z-momentum.
 
-             alphaAvg = half*(uAvg**2 + vAvg**2 + wAvg**2)
-             hAvg     = alphaAvg + ovgm1*(a2Avg - gm53*kAvg)
-             aAvg     = sqrt(a2Avg)
-             unAvg    = uAvg*sx + vAvg*sy + wAvg*sz
-             ovaAvg   = one/aAvg
-             ova2Avg  = one/a2Avg
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i, j + 1, k, imz) = fw(i, j + 1, k, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
+
+                    ! Energy.
+
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i, j + 1, k, irhoE) = fw(i, j + 1, k, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
+
+                end do
+            end do
+        end do
+        !
+        !       Dissipative fluxes in the k-direction.
+        !
+        do k = 1, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the dissipation coefficient for this face.
+
+                    ppor = zero
+                    if (porK(i, j, k) == normalFlux) ppor = one
+
+                    dis0 = fis0 * ppor
+
+                    ! Construct the vector of the first differences multiplied
+                    ! by dis0.
+
+                    dr = dis0 * (w(i, j, k + 1, irho) - w(i, j, k, irho))
+                    dru = dis0 * (w(i, j, k + 1, irho) * w(i, j, k + 1, ivx) &
+                                  - w(i, j, k, irho) * w(i, j, k, ivx))
+                    drv = dis0 * (w(i, j, k + 1, irho) * w(i, j, k + 1, ivy) &
+                                  - w(i, j, k, irho) * w(i, j, k, ivy))
+                    drw = dis0 * (w(i, j, k + 1, irho) * w(i, j, k + 1, ivz) &
+                                  - w(i, j, k, irho) * w(i, j, k, ivz))
+                    dre = dis0 * (w(i, j, k + 1, irhoE) - w(i, j, k, irhoE))
+
+                    ! In case a k-equation is present, compute the difference
+                    ! of rhok and store the average value of k. If not present,
+                    ! set both these values to zero, such that later on no
+                    ! decision needs to be made anymore.
 
-             ! The mesh velocity if the face is moving. It must be
-             ! divided by the area to obtain a true velocity.
+                    if (correctForK) then
+                        drk = dis0 * (w(i, j, k + 1, irho) * w(i, j, k + 1, itu1) &
+                                      - w(i, j, k, irho) * w(i, j, k, itu1))
+                        kAvg = half * (w(i, j, k + 1, itu1) + w(i, j, k, itu1))
+                    else
+                        drk = zero
+                        kAvg = zero
+                    end if
 
-             if( addGridVelocities ) sface = sFaceK(i,j,k)*tmp
+                    ! Compute the average value of gamma and compute some
+                    ! expressions in which it occurs.
 
-             ! Compute the absolute values of the three eigenvalues
-             ! and make sure they don't become zero by cutting them
-             ! off to a certain minimum.
+                    gammaAvg = half * (gamma(i, j, k + 1) + gamma(i, j, k))
+                    gm1 = gammaAvg - one
+                    ovgm1 = one / gm1
+                    gm53 = gammaAvg - five * third
 
-             lam1 = abs(unAvg - sface + aAvg)
-             lam2 = abs(unAvg - sface - aAvg)
-             lam3 = abs(unAvg - sface)
+                    ! Compute the average state at the interface.
 
-             rrad = lam3 + aAvg
+                    uAvg = half * (w(i, j, k + 1, ivx) + w(i, j, k, ivx))
+                    vAvg = half * (w(i, j, k + 1, ivy) + w(i, j, k, ivy))
+                    wAvg = half * (w(i, j, k + 1, ivz) + w(i, j, k, ivz))
+                    a2Avg = half * (gamma(i, j, k + 1) * p(i, j, k + 1) / w(i, j, k + 1, irho) &
+                                    + gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
 
-             lam1 = max(lam1,epsAcoustic*rrad)
-             lam2 = max(lam2,epsAcoustic*rrad)
-             lam3 = max(lam3,epsShear*rrad)
+                    sx = sk(i, j, k, 1); sy = sk(i, j, k, 2); sz = sk(i, j, k, 3)
+                    area = sqrt(sx**2 + sy**2 + sz**2)
+                    tmp = one / max(1.e-25_realType, area)
+                    sx = sx * tmp
+                    sy = sy * tmp
+                    sz = sz * tmp
 
-             ! Multiply the eigenvalues by the area to obtain
-             ! the correct values for the dissipation term.
+                    alphaAvg = half * (uAvg**2 + vAvg**2 + wAvg**2)
+                    hAvg = alphaAvg + ovgm1 * (a2Avg - gm53 * kAvg)
+                    aAvg = sqrt(a2Avg)
+                    unAvg = uAvg * sx + vAvg * sy + wAvg * sz
+                    ovaAvg = one / aAvg
+                    ova2Avg = one / a2Avg
 
-             lam1 = lam1*area
-             lam2 = lam2*area
-             lam3 = lam3*area
+                    ! The mesh velocity if the face is moving. It must be
+                    ! divided by the area to obtain a true velocity.
 
-             ! Some abbreviations, which occur quite often in the
-             ! dissipation terms.
+                    if (addGridVelocities) sface = sFaceK(i, j, k) * tmp
 
-             abv1 = half*(lam1 + lam2)
-             abv2 = half*(lam1 - lam2)
-             abv3 = abv1 - lam3
+                    ! Compute the absolute values of the three eigenvalues
+                    ! and make sure they don't become zero by cutting them
+                    ! off to a certain minimum.
 
-             abv4 = gm1*(alphaAvg*dr - uAvg*dru -vAvg*drv &
-                  -      wAvg*drw + dre) - gm53*drk
-             abv5 = sx*dru + sy*drv + sz*drw - unAvg*dr
+                    lam1 = abs(unAvg - sface + aAvg)
+                    lam2 = abs(unAvg - sface - aAvg)
+                    lam3 = abs(unAvg - sface)
 
-             abv6 = abv3*abv4*ova2Avg + abv2*abv5*ovaAvg
-             abv7 = abv2*abv4*ovaAvg  + abv3*abv5
+                    rrad = lam3 + aAvg
 
-             ! Compute and scatter the dissipative flux.
-             ! Density.
+                    lam1 = max(lam1, epsAcoustic * rrad)
+                    lam2 = max(lam2, epsAcoustic * rrad)
+                    lam3 = max(lam3, epsShear * rrad)
 
-             fs               = lam3*dr  + abv6
-             fw(i,j,k+1,irho) = fw(i,j,k+1,irho) + fs
-             fw(i,j,k,irho)   = fw(i,j,k,irho)   - fs
+                    ! Multiply the eigenvalues by the area to obtain
+                    ! the correct values for the dissipation term.
 
-             ! X-momentum.
+                    lam1 = lam1 * area
+                    lam2 = lam2 * area
+                    lam3 = lam3 * area
 
-             fs              = lam3*dru + uAvg*abv6 + sx*abv7
-             fw(i,j,k+1,imx) = fw(i,j,k+1,imx) + fs
-             fw(i,j,k,imx)   = fw(i,j,k,imx)   - fs
+                    ! Some abbreviations, which occur quite often in the
+                    ! dissipation terms.
 
-             ! Y-momentum.
+                    abv1 = half * (lam1 + lam2)
+                    abv2 = half * (lam1 - lam2)
+                    abv3 = abv1 - lam3
 
-             fs              = lam3*drv + vAvg*abv6 + sy*abv7
-             fw(i,j,k+1,imy) = fw(i,j,k+1,imy) + fs
-             fw(i,j,k,imy)   = fw(i,j,k,imy)   - fs
+                    abv4 = gm1 * (alphaAvg * dr - uAvg * dru - vAvg * drv &
+                                  - wAvg * drw + dre) - gm53 * drk
+                    abv5 = sx * dru + sy * drv + sz * drw - unAvg * dr
 
-             ! Z-momentum.
+                    abv6 = abv3 * abv4 * ova2Avg + abv2 * abv5 * ovaAvg
+                    abv7 = abv2 * abv4 * ovaAvg + abv3 * abv5
 
-             fs              = lam3*drw + wAvg*abv6 + sz*abv7
-             fw(i,j,k+1,imz) = fw(i,j,k+1,imz) + fs
-             fw(i,j,k,imz)   = fw(i,j,k,imz)   - fs
+                    ! Compute and scatter the dissipative flux.
+                    ! Density.
 
-             ! Energy.
+                    fs = lam3 * dr + abv6
+                    fw(i, j, k + 1, irho) = fw(i, j, k + 1, irho) + fs
+                    fw(i, j, k, irho) = fw(i, j, k, irho) - fs
 
-             fs                = lam3*dre + hAvg*abv6 + unAvg*abv7
-             fw(i,j,k+1,irhoE) = fw(i,j,k+1,irhoE) + fs
-             fw(i,j,k,irhoE)   = fw(i,j,k,irhoE)   - fs
+                    ! X-momentum.
 
-          enddo
-       enddo
-    enddo
+                    fs = lam3 * dru + uAvg * abv6 + sx * abv7
+                    fw(i, j, k + 1, imx) = fw(i, j, k + 1, imx) + fs
+                    fw(i, j, k, imx) = fw(i, j, k, imx) - fs
+
+                    ! Y-momentum.
+
+                    fs = lam3 * drv + vAvg * abv6 + sy * abv7
+                    fw(i, j, k + 1, imy) = fw(i, j, k + 1, imy) + fs
+                    fw(i, j, k, imy) = fw(i, j, k, imy) - fs
+
+                    ! Z-momentum.
+
+                    fs = lam3 * drw + wAvg * abv6 + sz * abv7
+                    fw(i, j, k + 1, imz) = fw(i, j, k + 1, imz) + fs
+                    fw(i, j, k, imz) = fw(i, j, k, imz) - fs
+
+                    ! Energy.
+
+                    fs = lam3 * dre + hAvg * abv6 + unAvg * abv7
+                    fw(i, j, k + 1, irhoE) = fw(i, j, k + 1, irhoE) + fs
+                    fw(i, j, k, irhoE) = fw(i, j, k, irhoE) - fs
+
+                end do
+            end do
+        end do
 
-  end subroutine inviscidDissFluxMatrixCoarse
+    end subroutine inviscidDissFluxMatrixCoarse
 #endif
 end module fluxes
diff --git a/src/solver/multiGrid.F90 b/src/solver/multiGrid.F90
index e7be07ff0..e12c559bb 100644
--- a/src/solver/multiGrid.F90
+++ b/src/solver/multiGrid.F90
@@ -2,1505 +2,1504 @@ module multigrid
 
 contains
 
-  subroutine transferToCoarseGrid
-    !
-    !       transferToCoarseGrid restricts both the solution and the
-    !       residual to the next coarser grid level and computes the
-    !       residual forcing term on this level.
-    !
-    use constants
-    use blockPointers, only : flowDoms, dw, il, jl, kl, ie, je, ke, w, &
-         p1, p, rev, w1, mgIFine, mgJFine, mgKFine, nDom, wr, mgIWeight, &
-         mgJWeight, mgKWeight, iblank
-    use flowVarRefState, only : nwf, kPresent
-    use inputIteration, only: fcoll
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use iteration, only : currentLevel, rkStage
-    use utils, only : setPointers
-    use haloExchange, only : whalo1
-    use flowUtils, only : computeEtotBlock, computeLamViscosity
-    use turbutils, only : computeeddyviscosity
-    use solverUtils, only : timeStep
-    use residuals, only : residual, initRes, sourceTerms
-    use BCRoutines, only : applyAllBC
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, sps, i, j, k, l
-    integer(kind=intType) :: ii, jj, kk, ii1, jj1, kk1
-    integer(kind=intType) :: fineLevel
-
-    integer(kind=intType), dimension(:,:,:), pointer :: iiblank
-
-    real(kind=realType) :: vola, tmp, weigth, blankFact
-
-    real(kind=realType), dimension(:,:,:,:), pointer :: ww
-    real(kind=realType), dimension(:,:,:), pointer :: pp, vvol, rrev
-
-    logical :: correctForK
-
-    ! Compute the residual on the fine grid. It is assumed that the
-    ! halo's contain the correct values. The time step is computed,
-    ! because this routine also computes the spectral radii for the
-    ! artificial dissipation terms.
-
-    rkStage = 0
-    call timeStep(.true.)
-
-    call initres(1_intType, nwf)
-    call sourceTerms()
-    call residual
-
-    ! Store the fine grid level and update the current level such
-    ! that it corresponds to the coarse grid
-
-    fineLevel    = currentLevel
-    currentLevel = currentLevel +1
-
-    ! Set the logical correctForK.
-    correctForK = .false.
-
-    ! Set the value of the blanking factor for the restricted
-    ! residual to 1. This will be overwritten below if needed.
-
-    blankFact = one
-
-    ! Loop over the number of spectral solutions and domains.
-
-    spectralLoop1: do sps=1,nTimeIntervalsSpectral
-       domains1: do nn=1,nDom
-
-          ! Set the pointers to the coarse block and to the fine grid
-          ! solution and volumes. Note that this is not needed for the,
-          ! residual, because only the fine grid residual is allocated.
-
-          call setPointers(nn, currentLevel, sps)
-
-          ww      => flowDoms(nn,fineLevel,sps)%w
-          pp      => flowDoms(nn,fineLevel,sps)%p
-          vvol    => flowDoms(nn,fineLevel,sps)%vol
-          rrev    => flowDoms(nn,fineLevel,sps)%rev
-          iiblank => flowDoms(nn,fineLevel,sps)%iblank
-
-          ! Restrict the solution and the residual to the coarser
-          ! meshes. The solution is done in a volume averaged way. Note
-          ! that the sum of the fine grid volumes is used to divide and
-          ! not the coarse grid volume; these are not necessarily the
-          ! same, especially for the flexible mg used.
-
-          do k=2,kl
-             kk  = mgKFine(k,1)
-             kk1 = mgKFine(k,2)
-             do j=2,jl
-                jj  = mgJFine(j,1)
-                jj1 = mgJFine(j,2)
-                do i=2,il
-                   ii  = mgIFine(i,1)
-                   ii1 = mgIFine(i,2)
-
-                   ! Determine the weight for the restricted residual. This
-                   ! weight is less than 1.0 if in at least 1 direction an
-                   ! irregular coarsening is used. This weight is not
-                   ! applied to the solution, because volume weighting is
-                   ! used there.
-
-                   weigth = mgKWeight(k)*mgJWeight(j)*mgIWeight(i)
-
-                   ! Compute the sum of the fine grid volumes.
-
-                   vola = vvol(ii,jj,kk)   + vvol(ii1,jj,kk)   &
-                        + vvol(ii,jj1,kk)  + vvol(ii1,jj1,kk)  &
-                        + vvol(ii,jj,kk1)  + vvol(ii1,jj,kk1)  &
-                        + vvol(ii,jj1,kk1) + vvol(ii1,jj1,kk1)
-
-                   ! Invert the sum of the fine grid volumes.
-
-                   vola = one/vola
-
-                   ! Store the restricted residual in wr, the residual
-                   ! forcing term, and the solution in ww.
-
-                   do l=1,nwf
-                      wr(i,j,k,l) = (dw(ii, jj,kk, l) + dw(ii, jj1,kk, l)  &
-                           +  dw(ii1,jj,kk, l) + dw(ii1,jj1,kk, l)  &
-                           +  dw(ii, jj,kk1,l) + dw(ii, jj1,kk1,l)  &
-                           +  dw(ii1,jj,kk1,l) + dw(ii1,jj1,kk1,l)) &
-                           *  weigth*blankFact
-                   enddo
-
-                   ! Restrict the solution.
-
-                   ! Density.
-
-                   w(i,j,k,irho) = (vvol(ii, jj, kk) *ww(ii, jj, kk, irho)  &
-                        +  vvol(ii, jj1,kk) *ww(ii, jj1,kk, irho)  &
-                        +  vvol(ii1,jj, kk) *ww(ii1,jj, kk, irho)  &
-                        +  vvol(ii1,jj1,kk) *ww(ii1,jj1,kk, irho)  &
-                        +  vvol(ii, jj, kk1)*ww(ii, jj, kk1,irho)  &
-                        +  vvol(ii, jj1,kk1)*ww(ii, jj1,kk1,irho)  &
-                        +  vvol(ii1,jj, kk1)*ww(ii1,jj, kk1,irho)  &
-                        +  vvol(ii1,jj1,kk1)*ww(ii1,jj1,kk1,irho)) &
-                        * vola
-
-                   ! X-velocity.
-
-                   w(i,j,k,ivx) = (vvol(ii, jj, kk) *ww(ii, jj, kk, ivx)  &
-                        +  vvol(ii, jj1,kk) *ww(ii, jj1,kk, ivx)  &
-                        +  vvol(ii1,jj, kk) *ww(ii1,jj, kk, ivx)  &
-                        +  vvol(ii1,jj1,kk) *ww(ii1,jj1,kk, ivx)  &
-                        +  vvol(ii, jj, kk1)*ww(ii, jj, kk1,ivx)  &
-                        +  vvol(ii, jj1,kk1)*ww(ii, jj1,kk1,ivx)  &
-                        +  vvol(ii1,jj, kk1)*ww(ii1,jj, kk1,ivx)  &
-                        +  vvol(ii1,jj1,kk1)*ww(ii1,jj1,kk1,ivx)) &
-                        * vola
-
-                   ! Y-velocity.
-
-                   w(i,j,k,ivy) = (vvol(ii, jj, kk) *ww(ii, jj, kk, ivy)  &
-                        +  vvol(ii, jj1,kk) *ww(ii, jj1,kk, ivy)  &
-                        +  vvol(ii1,jj, kk) *ww(ii1,jj, kk, ivy)  &
-                        +  vvol(ii1,jj1,kk) *ww(ii1,jj1,kk, ivy)  &
-                        +  vvol(ii, jj, kk1)*ww(ii, jj, kk1,ivy)  &
-                        +  vvol(ii, jj1,kk1)*ww(ii, jj1,kk1,ivy)  &
-                        +  vvol(ii1,jj, kk1)*ww(ii1,jj, kk1,ivy)  &
-                        +  vvol(ii1,jj1,kk1)*ww(ii1,jj1,kk1,ivy)) &
-                        * vola
-
-                   ! Z-velocity.
-
-                   w(i,j,k,ivz) = (vvol(ii, jj, kk) *ww(ii, jj, kk, ivz)  &
-                        +  vvol(ii, jj1,kk) *ww(ii, jj1,kk, ivz)  &
-                        +  vvol(ii1,jj, kk) *ww(ii1,jj, kk, ivz)  &
-                        +  vvol(ii1,jj1,kk) *ww(ii1,jj1,kk, ivz)  &
-                        +  vvol(ii, jj, kk1)*ww(ii, jj, kk1,ivz)  &
-                        +  vvol(ii, jj1,kk1)*ww(ii, jj1,kk1,ivz)  &
-                        +  vvol(ii1,jj, kk1)*ww(ii1,jj, kk1,ivz)  &
-                        +  vvol(ii1,jj1,kk1)*ww(ii1,jj1,kk1,ivz)) &
-                        * vola
-
-                   ! Pressure.
-
-                   p(i,j,k) = (vvol(ii, jj, kk) *pp(ii, jj, kk)  &
-                        +  vvol(ii, jj1,kk) *pp(ii, jj1,kk)  &
-                        +  vvol(ii1,jj, kk) *pp(ii1,jj, kk)  &
-                        +  vvol(ii1,jj1,kk) *pp(ii1,jj1,kk)  &
-                        +  vvol(ii, jj, kk1)*pp(ii, jj, kk1) &
-                        +  vvol(ii, jj1,kk1)*pp(ii, jj1,kk1) &
-                        +  vvol(ii1,jj, kk1)*pp(ii1,jj, kk1) &
-                        +  vvol(ii1,jj1,kk1)*pp(ii1,jj1,kk1))*vola
-
-                   ! Restrict the eddy viscosity if needed.
-                   rev(i,j,k) = (vvol(ii, jj, kk) *rrev(ii, jj, kk)  &
-                        +  vvol(ii, jj1,kk) *rrev(ii, jj1,kk)  &
-                        +  vvol(ii1,jj, kk) *rrev(ii1,jj, kk)  &
-                        +  vvol(ii1,jj1,kk) *rrev(ii1,jj1,kk)  &
-                        +  vvol(ii, jj, kk1)*rrev(ii, jj, kk1) &
-                        +  vvol(ii, jj1,kk1)*rrev(ii, jj1,kk1) &
-                        +  vvol(ii1,jj, kk1)*rrev(ii1,jj, kk1) &
-                        +  vvol(ii1,jj1,kk1)*rrev(ii1,jj1,kk1))*vola
-                enddo
-             enddo
-          enddo
-
-          ! Compute the total energy, laminar viscosity and eddy viscosity
-          ! for the owned cells of this block.
-
-          call computeEtotBlock(2_intType,il, 2_intType,jl, &
-               2_intType,kl, correctForK)
-          call computeLamViscosity(.False.)
-          call computeEddyViscosity(.False.)
-
-          ! Set the values of the 1st layer of corner row halo's to avoid
-          ! divisions by zero and uninitialized variables.
-
-          call setCornerRowHalos(nwf)
-
-       enddo domains1
-    enddo spectralLoop1
-
-    ! Apply all boundary conditions to all blocks on this level.
-    ! No need to exchange the pressure before, because on the coarser
-    ! grids a constant pressure boundary condition is used for the
-    ! inviscid walls.
-
-    call applyAllBC(.false.)
-
-    ! Exchange the solution. As on the coarse grid only the first
-    ! layer of halo's is needed, whalo1 is called.
-
-    call whalo1(currentLevel, 1_intType, nwf, .true., &
-         .true., .true.)
-
-    ! The second part of the residual forcing term is the residual
-    ! of the just restricted solution.
-    ! First compute the time step.
-
-    rkStage = 0
-    call timeStep(.false.)
-
-    ! The second part of the residual forcing term for the mean
-    ! flow equations. Furthermore the solution, primitive variables,
-    ! is stored in w1 and p1, also of the 1st level halo's. These
-    ! may be needed to determine the fine grid corrections.
-
-    spectralLoop3: do sps=1,nTimeIntervalsSpectral
-       domains3: do nn=1,nDom
-
-          ! Have the pointers point to this block.
-
-          call setPointers(nn, currentLevel, sps)
-
-          ! Initialize the mean flow residual to zero.
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   dw(i,j,k,irho)  = zero
-                   dw(i,j,k,imx)   = zero
-                   dw(i,j,k,imy)   = zero
-                   dw(i,j,k,imz)   = zero
-                   dw(i,j,k,irhoE) = zero
-                enddo
-             enddo
-          enddo
-
-          ! Store the restricted solution in w1 and p1.
-
-          ! Flow field variables.
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-                   w1(i,j,k,irho)  = w(i,j,k,irho)
-                   w1(i,j,k,ivx)   = w(i,j,k,ivx)
-                   w1(i,j,k,ivy)   = w(i,j,k,ivy)
-                   w1(i,j,k,ivz)   = w(i,j,k,ivz)
-                   w1(i,j,k,irhoE) = w(i,j,k,irhoE)
-
-                   p1(i,j,k)       = p(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       enddo domains3
-    enddo spectralLoop3
-
-    ! Compute the mean flow residual.
-
-    call residual
-
-    ! Substract the residual from the restricted residual and form
-    ! the residual forcing term, where the relaxation factor fcoll is
-    ! taken into account. Store the restricted residual, currently
-    ! stored in wr, in residual afterwards. This is the net result
-    ! of adding the residual to the residual forcing term.
-
-    spectralLoop4: do sps=1,nTimeIntervalsSpectral
-       domains4: do nn=1,nDom
-
-          ! Have the pointers point to this block.
-
-          call setPointers(nn, currentLevel, sps)
-
-          ! Loop over the owned cells. No need to do anything on the
-          ! halo's.
-
-          do l=1,nwf
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
-                      tmp = fcoll*wr(i,j,k,l)
-                      wr(i,j,k,l) = tmp - dw(i,j,k,l)
-                      dw(i,j,k,l) = tmp
-                   enddo
-                enddo
-             enddo
-          enddo
-
-       enddo domains4
-    enddo spectralLoop4
-
-  end subroutine transferToCoarseGrid
-
-  subroutine transferToFineGrid(corrections)
-    !
-    !       transferToFineGrid interpolates either the corrections or the
-    !       solution to the next finer grid level. A standard trilinear
-    !       interpolation is used.
-    !
-    use constants
-    use blockPointers, only : flowDoms, dw, il, jl, kl, ie, je, ke, w, &
-         p1, p, rev, w1, mgICoarse, mgJCoarse, mgKCoarse, nDom, wr, mgIWeight, &
-         mgJWeight, mgKWeight, iblank
-    use flowVarRefState, only : nwf, kPresent, pInfCorr, nw, rhoInf, nt1
-    use inputPhysics, only : equations
-    use inputIteration, only: fcoll, mgBoundCorr
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use iteration, only : currentLevel, rkStage, groundLevel, exchangePressureEarly
-    use utils, only : setPointers, getCorrectForK
-    use haloExchange, only : whalo1, whalo2
-    use flowUtils, only : computeEtotBlock, computeLamViscosity
-    use turbutils, only : computeeddyviscosity
-    use turbBCRoutines, only : applyAllTurbBC
-    use BCRoutines, only : applyAllBC
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: corrections
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: sps, nn, i, j, k, l
-    integer(kind=intType) :: ii, jj, kk, ii1, jj1, kk1
-    integer(kind=intType) :: coarseLevel, nVarInt
-
-    real(kind=realType) :: fact
-    real(kind=realType), dimension(:,:,:,:), pointer :: ww, ww1, res
-    real(kind=realType), dimension(:,:,:),   pointer :: pp, pp1
-
-    logical :: secondHalo, correctForK
-
-    ! Store the coarse grid level in coarseLevel.
-
-    coarseLevel = currentLevel +1
-
-    ! Set the number of variables for which either the corrections
-    ! or the solution must be interpolated. If the corrections must
-    ! be interpolated, this value is set to the number of variables
-    ! to which multigrid must be applied, otherwise all conservative
-    ! variables are interpolated.
-
-    nVarInt = nw
-    if( corrections ) nVarInt = nwf
-
-    ! Set the value of secondHalo, depending on the situation.
-    ! In the full MG (currentLevel < groundLevel) the second halo is
-    ! always set; otherwise only on the finest mesh in the current mg
-    ! cycle.
-
-    if(currentLevel <= groundLevel) then
-       secondHalo = .true.
-    else
-       secondHalo = .false.
-    endif
-
-    ! Determine whether or not the total energy must be corrected
-    ! for the presence of the turbulent kinetic energy.
-    correctForK = getCorrectForK()
-
-    ! Set fact to either 0.0 or 1.0, depending whether neumann or
-    ! dirichlet boundary conditions should be used for the boundary
-    ! halo's when interpolating.
-
-    fact = one
-    if(corrections .and. mgBoundCorr == bcDirichlet0) fact = zero
-
-    ! Loop over the number of spectral solutions and local blocks.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
-
-          ! Set the pointers to the fine level, i.e. currentLevel.
-          ! Also set the pointers for ww, pp, ww1 and pp1 to the
-          ! coarse grid values.
-
-          call setPointers(nn, currentLevel,sps)
-          ww  => flowDoms(nn,coarseLevel,sps)%w
-          pp  => flowDoms(nn,coarseLevel,sps)%p
-          ww1 => flowDoms(nn,coarseLevel,sps)%w1
-          pp1 => flowDoms(nn,coarseLevel,sps)%p1
-
-          ! Store the correction in ww if the corrections must be
-          ! interpolated. The 1st level halo's are included, because
-          ! these values are needed for the interpolation. Note that
-          ! flowDoms(nn,coarseLevel)%ie, etc. must be used, because
-          ! ie is equal to the fine grid value.
-
-          testCorrections1: if( corrections ) then
-
-             ! Flow field variables. Have res point to dw and compute the
-             ! corrections. Note that the pressure correction must be
-             ! stored instead of the total energy.
-
-             res => dw
-
-             do k=1,flowDoms(nn,coarseLevel,sps)%ke
-                do j=1,flowDoms(nn,coarseLevel,sps)%je
-                   do i=1,flowDoms(nn,coarseLevel,sps)%ie
-                      ww(i,j,k,irho)  = ww(i,j,k,irho) - ww1(i,j,k,irho)
-                      ww(i,j,k,ivx)   = ww(i,j,k,ivx)  - ww1(i,j,k,ivx)
-                      ww(i,j,k,ivy)   = ww(i,j,k,ivy)  - ww1(i,j,k,ivy)
-                      ww(i,j,k,ivz)   = ww(i,j,k,ivz)  - ww1(i,j,k,ivz)
-                      ww(i,j,k,irhoE) = pp(i,j,k)      - pp1(i,j,k)
-                   enddo
-                enddo
-             enddo
-
-             ! The possible turbulent variables.
-
-             do l=nt1,nVarInt
-                do k=1,flowDoms(nn,coarseLevel,sps)%ke
-                   do j=1,flowDoms(nn,coarseLevel,sps)%je
-                      do i=1,flowDoms(nn,coarseLevel,sps)%ie
-                         ww(i,j,k,l) = ww(i,j,k,l) - ww1(i,j,k,l)
-                      enddo
-                   enddo
-                enddo
-             enddo
+    subroutine transferToCoarseGrid
+        !
+        !       transferToCoarseGrid restricts both the solution and the
+        !       residual to the next coarser grid level and computes the
+        !       residual forcing term on this level.
+        !
+        use constants
+        use blockPointers, only: flowDoms, dw, il, jl, kl, ie, je, ke, w, &
+                                 p1, p, rev, w1, mgIFine, mgJFine, mgKFine, nDom, wr, mgIWeight, &
+                                 mgJWeight, mgKWeight, iblank
+        use flowVarRefState, only: nwf, kPresent
+        use inputIteration, only: fcoll
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: currentLevel, rkStage
+        use utils, only: setPointers
+        use haloExchange, only: whalo1
+        use flowUtils, only: computeEtotBlock, computeLamViscosity
+        use turbutils, only: computeeddyviscosity
+        use solverUtils, only: timeStep
+        use residuals, only: residual, initRes, sourceTerms
+        use BCRoutines, only: applyAllBC
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, sps, i, j, k, l
+        integer(kind=intType) :: ii, jj, kk, ii1, jj1, kk1
+        integer(kind=intType) :: fineLevel
+
+        integer(kind=intType), dimension(:, :, :), pointer :: iiblank
+
+        real(kind=realType) :: vola, tmp, weigth, blankFact
+
+        real(kind=realType), dimension(:, :, :, :), pointer :: ww
+        real(kind=realType), dimension(:, :, :), pointer :: pp, vvol, rrev
+
+        logical :: correctForK
+
+        ! Compute the residual on the fine grid. It is assumed that the
+        ! halo's contain the correct values. The time step is computed,
+        ! because this routine also computes the spectral radii for the
+        ! artificial dissipation terms.
+
+        rkStage = 0
+        call timeStep(.true.)
+
+        call initres(1_intType, nwf)
+        call sourceTerms()
+        call residual
+
+        ! Store the fine grid level and update the current level such
+        ! that it corresponds to the coarse grid
+
+        fineLevel = currentLevel
+        currentLevel = currentLevel + 1
+
+        ! Set the logical correctForK.
+        correctForK = .false.
+
+        ! Set the value of the blanking factor for the restricted
+        ! residual to 1. This will be overwritten below if needed.
+
+        blankFact = one
+
+        ! Loop over the number of spectral solutions and domains.
+
+        spectralLoop1: do sps = 1, nTimeIntervalsSpectral
+            domains1: do nn = 1, nDom
+
+                ! Set the pointers to the coarse block and to the fine grid
+                ! solution and volumes. Note that this is not needed for the,
+                ! residual, because only the fine grid residual is allocated.
+
+                call setPointers(nn, currentLevel, sps)
+
+                ww => flowDoms(nn, fineLevel, sps)%w
+                pp => flowDoms(nn, fineLevel, sps)%p
+                vvol => flowDoms(nn, fineLevel, sps)%vol
+                rrev => flowDoms(nn, fineLevel, sps)%rev
+                iiblank => flowDoms(nn, fineLevel, sps)%iblank
+
+                ! Restrict the solution and the residual to the coarser
+                ! meshes. The solution is done in a volume averaged way. Note
+                ! that the sum of the fine grid volumes is used to divide and
+                ! not the coarse grid volume; these are not necessarily the
+                ! same, especially for the flexible mg used.
+
+                do k = 2, kl
+                    kk = mgKFine(k, 1)
+                    kk1 = mgKFine(k, 2)
+                    do j = 2, jl
+                        jj = mgJFine(j, 1)
+                        jj1 = mgJFine(j, 2)
+                        do i = 2, il
+                            ii = mgIFine(i, 1)
+                            ii1 = mgIFine(i, 2)
+
+                            ! Determine the weight for the restricted residual. This
+                            ! weight is less than 1.0 if in at least 1 direction an
+                            ! irregular coarsening is used. This weight is not
+                            ! applied to the solution, because volume weighting is
+                            ! used there.
+
+                            weigth = mgKWeight(k) * mgJWeight(j) * mgIWeight(i)
+
+                            ! Compute the sum of the fine grid volumes.
+
+                            vola = vvol(ii, jj, kk) + vvol(ii1, jj, kk) &
+                                   + vvol(ii, jj1, kk) + vvol(ii1, jj1, kk) &
+                                   + vvol(ii, jj, kk1) + vvol(ii1, jj, kk1) &
+                                   + vvol(ii, jj1, kk1) + vvol(ii1, jj1, kk1)
+
+                            ! Invert the sum of the fine grid volumes.
+
+                            vola = one / vola
+
+                            ! Store the restricted residual in wr, the residual
+                            ! forcing term, and the solution in ww.
+
+                            do l = 1, nwf
+                                wr(i, j, k, l) = (dw(ii, jj, kk, l) + dw(ii, jj1, kk, l) &
+                                                  + dw(ii1, jj, kk, l) + dw(ii1, jj1, kk, l) &
+                                                  + dw(ii, jj, kk1, l) + dw(ii, jj1, kk1, l) &
+                                                  + dw(ii1, jj, kk1, l) + dw(ii1, jj1, kk1, l)) &
+                                                 * weigth * blankFact
+                            end do
+
+                            ! Restrict the solution.
+
+                            ! Density.
+
+                            w(i, j, k, irho) = (vvol(ii, jj, kk) * ww(ii, jj, kk, irho) &
+                                                + vvol(ii, jj1, kk) * ww(ii, jj1, kk, irho) &
+                                                + vvol(ii1, jj, kk) * ww(ii1, jj, kk, irho) &
+                                                + vvol(ii1, jj1, kk) * ww(ii1, jj1, kk, irho) &
+                                                + vvol(ii, jj, kk1) * ww(ii, jj, kk1, irho) &
+                                                + vvol(ii, jj1, kk1) * ww(ii, jj1, kk1, irho) &
+                                                + vvol(ii1, jj, kk1) * ww(ii1, jj, kk1, irho) &
+                                                + vvol(ii1, jj1, kk1) * ww(ii1, jj1, kk1, irho)) &
+                                               * vola
+
+                            ! X-velocity.
+
+                            w(i, j, k, ivx) = (vvol(ii, jj, kk) * ww(ii, jj, kk, ivx) &
+                                               + vvol(ii, jj1, kk) * ww(ii, jj1, kk, ivx) &
+                                               + vvol(ii1, jj, kk) * ww(ii1, jj, kk, ivx) &
+                                               + vvol(ii1, jj1, kk) * ww(ii1, jj1, kk, ivx) &
+                                               + vvol(ii, jj, kk1) * ww(ii, jj, kk1, ivx) &
+                                               + vvol(ii, jj1, kk1) * ww(ii, jj1, kk1, ivx) &
+                                               + vvol(ii1, jj, kk1) * ww(ii1, jj, kk1, ivx) &
+                                               + vvol(ii1, jj1, kk1) * ww(ii1, jj1, kk1, ivx)) &
+                                              * vola
+
+                            ! Y-velocity.
+
+                            w(i, j, k, ivy) = (vvol(ii, jj, kk) * ww(ii, jj, kk, ivy) &
+                                               + vvol(ii, jj1, kk) * ww(ii, jj1, kk, ivy) &
+                                               + vvol(ii1, jj, kk) * ww(ii1, jj, kk, ivy) &
+                                               + vvol(ii1, jj1, kk) * ww(ii1, jj1, kk, ivy) &
+                                               + vvol(ii, jj, kk1) * ww(ii, jj, kk1, ivy) &
+                                               + vvol(ii, jj1, kk1) * ww(ii, jj1, kk1, ivy) &
+                                               + vvol(ii1, jj, kk1) * ww(ii1, jj, kk1, ivy) &
+                                               + vvol(ii1, jj1, kk1) * ww(ii1, jj1, kk1, ivy)) &
+                                              * vola
+
+                            ! Z-velocity.
+
+                            w(i, j, k, ivz) = (vvol(ii, jj, kk) * ww(ii, jj, kk, ivz) &
+                                               + vvol(ii, jj1, kk) * ww(ii, jj1, kk, ivz) &
+                                               + vvol(ii1, jj, kk) * ww(ii1, jj, kk, ivz) &
+                                               + vvol(ii1, jj1, kk) * ww(ii1, jj1, kk, ivz) &
+                                               + vvol(ii, jj, kk1) * ww(ii, jj, kk1, ivz) &
+                                               + vvol(ii, jj1, kk1) * ww(ii, jj1, kk1, ivz) &
+                                               + vvol(ii1, jj, kk1) * ww(ii1, jj, kk1, ivz) &
+                                               + vvol(ii1, jj1, kk1) * ww(ii1, jj1, kk1, ivz)) &
+                                              * vola
+
+                            ! Pressure.
+
+                            p(i, j, k) = (vvol(ii, jj, kk) * pp(ii, jj, kk) &
+                                          + vvol(ii, jj1, kk) * pp(ii, jj1, kk) &
+                                          + vvol(ii1, jj, kk) * pp(ii1, jj, kk) &
+                                          + vvol(ii1, jj1, kk) * pp(ii1, jj1, kk) &
+                                          + vvol(ii, jj, kk1) * pp(ii, jj, kk1) &
+                                          + vvol(ii, jj1, kk1) * pp(ii, jj1, kk1) &
+                                          + vvol(ii1, jj, kk1) * pp(ii1, jj, kk1) &
+                                          + vvol(ii1, jj1, kk1) * pp(ii1, jj1, kk1)) * vola
+
+                            ! Restrict the eddy viscosity if needed.
+                            rev(i, j, k) = (vvol(ii, jj, kk) * rrev(ii, jj, kk) &
+                                            + vvol(ii, jj1, kk) * rrev(ii, jj1, kk) &
+                                            + vvol(ii1, jj, kk) * rrev(ii1, jj, kk) &
+                                            + vvol(ii1, jj1, kk) * rrev(ii1, jj1, kk) &
+                                            + vvol(ii, jj, kk1) * rrev(ii, jj, kk1) &
+                                            + vvol(ii, jj1, kk1) * rrev(ii, jj1, kk1) &
+                                            + vvol(ii1, jj, kk1) * rrev(ii1, jj, kk1) &
+                                            + vvol(ii1, jj1, kk1) * rrev(ii1, jj1, kk1)) * vola
+                        end do
+                    end do
+                end do
+
+                ! Compute the total energy, laminar viscosity and eddy viscosity
+                ! for the owned cells of this block.
+
+                call computeEtotBlock(2_intType, il, 2_intType, jl, &
+                                      2_intType, kl, correctForK)
+                call computeLamViscosity(.False.)
+                call computeEddyViscosity(.False.)
+
+                ! Set the values of the 1st layer of corner row halo's to avoid
+                ! divisions by zero and uninitialized variables.
+
+                call setCornerRowHalos(nwf)
+
+            end do domains1
+        end do spectralLoop1
+
+        ! Apply all boundary conditions to all blocks on this level.
+        ! No need to exchange the pressure before, because on the coarser
+        ! grids a constant pressure boundary condition is used for the
+        ! inviscid walls.
+
+        call applyAllBC(.false.)
+
+        ! Exchange the solution. As on the coarse grid only the first
+        ! layer of halo's is needed, whalo1 is called.
+
+        call whalo1(currentLevel, 1_intType, nwf, .true., &
+                    .true., .true.)
+
+        ! The second part of the residual forcing term is the residual
+        ! of the just restricted solution.
+        ! First compute the time step.
+
+        rkStage = 0
+        call timeStep(.false.)
+
+        ! The second part of the residual forcing term for the mean
+        ! flow equations. Furthermore the solution, primitive variables,
+        ! is stored in w1 and p1, also of the 1st level halo's. These
+        ! may be needed to determine the fine grid corrections.
+
+        spectralLoop3: do sps = 1, nTimeIntervalsSpectral
+            domains3: do nn = 1, nDom
+
+                ! Have the pointers point to this block.
+
+                call setPointers(nn, currentLevel, sps)
+
+                ! Initialize the mean flow residual to zero.
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            dw(i, j, k, irho) = zero
+                            dw(i, j, k, imx) = zero
+                            dw(i, j, k, imy) = zero
+                            dw(i, j, k, imz) = zero
+                            dw(i, j, k, irhoE) = zero
+                        end do
+                    end do
+                end do
+
+                ! Store the restricted solution in w1 and p1.
+
+                ! Flow field variables.
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            w1(i, j, k, irho) = w(i, j, k, irho)
+                            w1(i, j, k, ivx) = w(i, j, k, ivx)
+                            w1(i, j, k, ivy) = w(i, j, k, ivy)
+                            w1(i, j, k, ivz) = w(i, j, k, ivz)
+                            w1(i, j, k, irhoE) = w(i, j, k, irhoE)
+
+                            p1(i, j, k) = p(i, j, k)
+                        end do
+                    end do
+                end do
+
+            end do domains3
+        end do spectralLoop3
+
+        ! Compute the mean flow residual.
+
+        call residual
+
+        ! Substract the residual from the restricted residual and form
+        ! the residual forcing term, where the relaxation factor fcoll is
+        ! taken into account. Store the restricted residual, currently
+        ! stored in wr, in residual afterwards. This is the net result
+        ! of adding the residual to the residual forcing term.
+
+        spectralLoop4: do sps = 1, nTimeIntervalsSpectral
+            domains4: do nn = 1, nDom
+
+                ! Have the pointers point to this block.
+
+                call setPointers(nn, currentLevel, sps)
+
+                ! Loop over the owned cells. No need to do anything on the
+                ! halo's.
+
+                do l = 1, nwf
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                tmp = fcoll * wr(i, j, k, l)
+                                wr(i, j, k, l) = tmp - dw(i, j, k, l)
+                                dw(i, j, k, l) = tmp
+                            end do
+                        end do
+                    end do
+                end do
+
+            end do domains4
+        end do spectralLoop4
+
+    end subroutine transferToCoarseGrid
+
+    subroutine transferToFineGrid(corrections)
+        !
+        !       transferToFineGrid interpolates either the corrections or the
+        !       solution to the next finer grid level. A standard trilinear
+        !       interpolation is used.
+        !
+        use constants
+        use blockPointers, only: flowDoms, dw, il, jl, kl, ie, je, ke, w, &
+                                 p1, p, rev, w1, mgICoarse, mgJCoarse, mgKCoarse, nDom, wr, mgIWeight, &
+                                 mgJWeight, mgKWeight, iblank
+        use flowVarRefState, only: nwf, kPresent, pInfCorr, nw, rhoInf, nt1
+        use inputPhysics, only: equations
+        use inputIteration, only: fcoll, mgBoundCorr
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: currentLevel, rkStage, groundLevel, exchangePressureEarly
+        use utils, only: setPointers, getCorrectForK
+        use haloExchange, only: whalo1, whalo2
+        use flowUtils, only: computeEtotBlock, computeLamViscosity
+        use turbutils, only: computeeddyviscosity
+        use turbBCRoutines, only: applyAllTurbBC
+        use BCRoutines, only: applyAllBC
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: corrections
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: sps, nn, i, j, k, l
+        integer(kind=intType) :: ii, jj, kk, ii1, jj1, kk1
+        integer(kind=intType) :: coarseLevel, nVarInt
+
+        real(kind=realType) :: fact
+        real(kind=realType), dimension(:, :, :, :), pointer :: ww, ww1, res
+        real(kind=realType), dimension(:, :, :), pointer :: pp, pp1
+
+        logical :: secondHalo, correctForK
+
+        ! Store the coarse grid level in coarseLevel.
+
+        coarseLevel = currentLevel + 1
+
+        ! Set the number of variables for which either the corrections
+        ! or the solution must be interpolated. If the corrections must
+        ! be interpolated, this value is set to the number of variables
+        ! to which multigrid must be applied, otherwise all conservative
+        ! variables are interpolated.
+
+        nVarInt = nw
+        if (corrections) nVarInt = nwf
+
+        ! Set the value of secondHalo, depending on the situation.
+        ! In the full MG (currentLevel < groundLevel) the second halo is
+        ! always set; otherwise only on the finest mesh in the current mg
+        ! cycle.
+
+        if (currentLevel <= groundLevel) then
+            secondHalo = .true.
+        else
+            secondHalo = .false.
+        end if
+
+        ! Determine whether or not the total energy must be corrected
+        ! for the presence of the turbulent kinetic energy.
+        correctForK = getCorrectForK()
+
+        ! Set fact to either 0.0 or 1.0, depending whether neumann or
+        ! dirichlet boundary conditions should be used for the boundary
+        ! halo's when interpolating.
+
+        fact = one
+        if (corrections .and. mgBoundCorr == bcDirichlet0) fact = zero
+
+        ! Loop over the number of spectral solutions and local blocks.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers to the fine level, i.e. currentLevel.
+                ! Also set the pointers for ww, pp, ww1 and pp1 to the
+                ! coarse grid values.
+
+                call setPointers(nn, currentLevel, sps)
+                ww => flowDoms(nn, coarseLevel, sps)%w
+                pp => flowDoms(nn, coarseLevel, sps)%p
+                ww1 => flowDoms(nn, coarseLevel, sps)%w1
+                pp1 => flowDoms(nn, coarseLevel, sps)%p1
+
+                ! Store the correction in ww if the corrections must be
+                ! interpolated. The 1st level halo's are included, because
+                ! these values are needed for the interpolation. Note that
+                ! flowDoms(nn,coarseLevel)%ie, etc. must be used, because
+                ! ie is equal to the fine grid value.
+
+                testCorrections1: if (corrections) then
+
+                    ! Flow field variables. Have res point to dw and compute the
+                    ! corrections. Note that the pressure correction must be
+                    ! stored instead of the total energy.
+
+                    res => dw
+
+                    do k = 1, flowDoms(nn, coarseLevel, sps)%ke
+                        do j = 1, flowDoms(nn, coarseLevel, sps)%je
+                            do i = 1, flowDoms(nn, coarseLevel, sps)%ie
+                                ww(i, j, k, irho) = ww(i, j, k, irho) - ww1(i, j, k, irho)
+                                ww(i, j, k, ivx) = ww(i, j, k, ivx) - ww1(i, j, k, ivx)
+                                ww(i, j, k, ivy) = ww(i, j, k, ivy) - ww1(i, j, k, ivy)
+                                ww(i, j, k, ivz) = ww(i, j, k, ivz) - ww1(i, j, k, ivz)
+                                ww(i, j, k, irhoE) = pp(i, j, k) - pp1(i, j, k)
+                            end do
+                        end do
+                    end do
+
+                    ! The possible turbulent variables.
+
+                    do l = nt1, nVarInt
+                        do k = 1, flowDoms(nn, coarseLevel, sps)%ke
+                            do j = 1, flowDoms(nn, coarseLevel, sps)%je
+                                do i = 1, flowDoms(nn, coarseLevel, sps)%ie
+                                    ww(i, j, k, l) = ww(i, j, k, l) - ww1(i, j, k, l)
+                                end do
+                            end do
+                        end do
+                    end do
 
-          else testCorrections1
+                    else testCorrections1
 
-             ! The solution must be interpolated. Have res point to w and
-             ! store the pressure instead of the total energy.
+                    ! The solution must be interpolated. Have res point to w and
+                    ! store the pressure instead of the total energy.
 
-             res => w
+                    res => w
+
+                    do k = 1, flowDoms(nn, coarseLevel, sps)%ke
+                        do j = 1, flowDoms(nn, coarseLevel, sps)%je
+                            do i = 1, flowDoms(nn, coarseLevel, sps)%ie
+                                ww(i, j, k, irhoE) = pp(i, j, k)
+                            end do
+                        end do
+                    end do
+
+                end if testCorrections1
+
+                ! Set the values of the coarse grid boundary halo cells.
+
+                call setCorrectionsCoarseHalos(sps, nn, coarseLevel, &
+                                               fact, nVarInt)
+
+                ! Loop over the owned fine grid cells and determine res
+                ! by trilinear interpolation. Note that the coarse grid cell
+                ! ii (and jj and kk) are such that they are closest to the
+                ! fine grid cell center and ii1 is further away. This means
+                ! that in 1d ii get the weight 3/4 and ii1 1/4.
+
+                do k = 2, kl
+
+                    ! Determine the coarse grid cells kk and kk1.
+
+                    kk = mgKCoarse(k, 1)
+                    kk1 = mgKCoarse(k, 2)
+
+                    do j = 2, jl
+
+                        ! Determine the coarse grid cells jj and jj1.
+
+                        jj = mgJCoarse(j, 1)
+                        jj1 = mgJCoarse(j, 2)
+
+                        do i = 2, il
+
+                            ! Determine the coarse grid cells ii and ii1.
+
+                            ii = mgICoarse(i, 1)
+                            ii1 = mgICoarse(i, 2)
+
+                            ! Loop over the number of variables and interpolate them.
+                            ! The weights involved are 27/64, 9/64, 3/64 and 1/64.
+                            ! For computational efficiency their (exact) decimal
+                            ! counterparts are used in the loop below, which are
+                            ! 0.421875, 0.140625, 0.046875 and 0.015625 respectively.
+
+                            do l = 1, nVarInt
+                                res(i, j, k, l) = 0.421875_realType * ww(ii, jj, kk, l) &
+                                                  + 0.140625_realType * (ww(ii1, jj, kk, l) &
+                                                                         + ww(ii, jj1, kk, l) &
+                                                                         + ww(ii, jj, kk1, l)) &
+                                                  + 0.046875_realType * (ww(ii1, jj1, kk, l) &
+                                                                         + ww(ii1, jj, kk1, l) &
+                                                                         + ww(ii, jj1, kk1, l)) &
+                                                  + 0.015625_realType * ww(ii1, jj1, kk1, l)
+                            end do
 
-             do k=1,flowDoms(nn,coarseLevel,sps)%ke
-                do j=1,flowDoms(nn,coarseLevel,sps)%je
-                   do i=1,flowDoms(nn,coarseLevel,sps)%ie
-                      ww(i,j,k,irhoE) = pp(i,j,k)
-                   enddo
-                enddo
-             enddo
-
-          endif testCorrections1
-
-          ! Set the values of the coarse grid boundary halo cells.
-
-          call setCorrectionsCoarseHalos(sps, nn, coarseLevel, &
-               fact, nVarInt)
-
-          ! Loop over the owned fine grid cells and determine res
-          ! by trilinear interpolation. Note that the coarse grid cell
-          ! ii (and jj and kk) are such that they are closest to the
-          ! fine grid cell center and ii1 is further away. This means
-          ! that in 1d ii get the weight 3/4 and ii1 1/4.
-
-          do k=2,kl
-
-             ! Determine the coarse grid cells kk and kk1.
-
-             kk  = mgKCoarse(k,1)
-             kk1 = mgKCoarse(k,2)
+                        end do
+                    end do
+                end do
 
-             do j=2,jl
-
-                ! Determine the coarse grid cells jj and jj1.
+                ! Possibility to do smoothing on the corrections, if desired.
 
-                jj  = mgJCoarse(j,1)
-                jj1 = mgJCoarse(j,2)
-
-                do i=2,il
+                ! Compute the new state vector on the fine mesh in case the
+                ! corrections have just been interpolated.
 
-                   ! Determine the coarse grid cells ii and ii1.
+                testCorrections2: if (corrections) then
+
+                    ! Flow field variables. Again the pressure is updated
+                    ! and not the total energy. Make sure that the pressure and
+                    ! density do not become negative.
+
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                w(i, j, k, irho) = w(i, j, k, irho) + dw(i, j, k, irho)
+                                w(i, j, k, ivx) = w(i, j, k, ivx) + dw(i, j, k, ivx)
+                                w(i, j, k, ivy) = w(i, j, k, ivy) + dw(i, j, k, ivy)
+                                w(i, j, k, ivz) = w(i, j, k, ivz) + dw(i, j, k, ivz)
+                                p(i, j, k) = p(i, j, k) + dw(i, j, k, irhoE)
 
-                   ii  = mgICoarse(i,1)
-                   ii1 = mgICoarse(i,2)
-
-                   ! Loop over the number of variables and interpolate them.
-                   ! The weights involved are 27/64, 9/64, 3/64 and 1/64.
-                   ! For computational efficiency their (exact) decimal
-                   ! counterparts are used in the loop below, which are
-                   ! 0.421875, 0.140625, 0.046875 and 0.015625 respectively.
-
-                   do l=1,nVarInt
-                      res(i,j,k,l) = 0.421875_realType* ww(ii,jj,kk,l)    &
-                           + 0.140625_realType*(ww(ii1,jj,kk,l)   &
-                           +                    ww(ii,jj1,kk,l)   &
-                           +                    ww(ii,jj,kk1,l))  &
-                           + 0.046875_realType*(ww(ii1,jj1,kk,l)  &
-                           +                    ww(ii1,jj,kk1,l)  &
-                           +                    ww(ii,jj1,kk1,l)) &
-                           + 0.015625_realType* ww(ii1,jj1,kk1,l)
-                   enddo
+                                w(i, j, k, irho) = max(w(i, j, k, irho), &
+                                                       1.e-4_realType * rhoInf)
+                                p(i, j, k) = max(p(i, j, k), &
+                                                 1.e-4_realType * pInfCorr)
+                            end do
+                        end do
+                    end do
 
-                enddo
-             enddo
-          enddo
+                    ! The possible turbulent variables.
 
-          ! Possibility to do smoothing on the corrections, if desired.
+                    do l = nt1, nVarInt
+                        do k = 2, kl
+                            do j = 2, jl
+                                do i = 2, il
+                                    w(i, j, k, l) = w(i, j, k, l) + dw(i, j, k, l)
+                                end do
+                            end do
+                        end do
+                    end do
+
+                    else testCorrections2
+
+                    ! The solution must be interpolated. At the moment the
+                    ! pressure is stored at the place of the total energy.
+                    ! Copy this value in the pressure array.
+
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                p(i, j, k) = w(i, j, k, irhoE)
+                            end do
+                        end do
+                    end do
+
+                end if testCorrections2
+
+                ! Compute the total energy for the owned cells of this block.
+                ! If the solution must be interpolated, extrapolate the
+                ! solution in the halo's.
+
+                call computeEtotBlock(2_intType, il, 2_intType, jl, &
+                                      2_intType, kl, correctForK)
+                if (.not. corrections) call extrapolateSolution
+
+                ! Compute the laminar viscosity and eddy viscosity for the
+                ! owned cells of this block. If the solution must be
+                ! interpolated, extrapolate the viscosities in the halo's.
+
+                call computeLamViscosity(.False.)
+                call computeEddyViscosity(.False.)
+                if (.not. corrections) call extrapolateViscosities
+
+            end do domains
+        end do spectralLoop
+
+        ! Exchange the pressure if the pressure must be exchanged early.
+        ! Only the first halo's are needed, thus whalo1 is called.
+        ! On the finest mesh only.
+
+        if (exchangePressureEarly .and. currentLevel <= groundLevel) &
+            call whalo1(currentLevel, 1_intType, 0_intType, &
+                        .true., .false., .false.)
+
+        ! Apply all boundary conditions to all blocks on this level.
+        ! In case of a full mg mode, and a segegated turbulent solver,
+        ! first call the turbulent boundary conditions, such that the
+        ! turbulent kinetic energy is properly initialized in the halo's.
+
+        if (.not. corrections .and. equations == RANSEquations) then
+            call applyAllTurbBC(secondHalo)
+        end if
+
+        ! Apply all boundary conditions of the mean flow.
+
+        call applyAllBC(secondHalo)
+
+        ! If case this routine is called in full mg mode call the mean
+        ! flow boundary conditions again such that the normal momentum
+        ! boundary condition is treated correctly.
+
+        if (.not. corrections) call applyAllBC(secondHalo)
+
+        ! Exchange the solution. Either whalo1 or whalo2
+        ! must be called.
+
+        if (secondHalo) then
+            call whalo2(currentLevel, 1_intType, nVarInt, .true., &
+                        .true., .true.)
+        else
+            call whalo1(currentLevel, 1_intType, nVarInt, .true., &
+                        .true., .true.)
+        end if
+
+        ! For full multigrid mode the bleeds must be determined, the
+        ! boundary conditions must be applied one more time and the
+        ! solution must be exchanged again.
+
+        if (.not. corrections) then
+            call applyAllBC(secondHalo)
+
+            if (secondHalo) then
+                call whalo2(currentLevel, 1_intType, nVarInt, .true., &
+                            .true., .true.)
+            else
+                call whalo1(currentLevel, 1_intType, nVarInt, .true., &
+                            .true., .true.)
+            end if
+        end if
+
+    end subroutine transferToFineGrid
+
+    !      ==================================================================
+
+    subroutine extrapolateSolution
+        !
+        !       extrapolateSolution sets the solution of the cell halos by a
+        !       constant extrapolation. This routine is called after the
+        !       solution has been interpolated to the next finer grid and this
+        !       routine makes sure that the halo's are initialized.
+        !       Only the block to which the pointers in blockPointers
+        !       currently point is treated.
+        !
+        use blockPointers
+        use flowVarRefState
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, l
+
+        ! Constant extrapolation in i-direction.
+
+        do k = 2, kl
+            do j = 2, jl
+
+                do l = 1, nw
+                    w(0, j, k, l) = w(2, j, k, l)
+                    w(1, j, k, l) = w(2, j, k, l)
+                    w(ie, j, k, l) = w(il, j, k, l)
+                    w(ib, j, k, l) = w(il, j, k, l)
+                end do
+
+                p(0, j, k) = p(2, j, k)
+                p(1, j, k) = p(2, j, k)
+                p(ie, j, k) = p(il, j, k)
+                p(ib, j, k) = p(il, j, k)
+
+            end do
+        end do
+
+        ! Constant extrapolation in the j-direction. Take the just
+        ! interpolated values in i-direction into account.
+
+        do k = 2, kl
+            do i = 0, ib
+
+                do l = 1, nw
+                    w(i, 0, k, l) = w(i, 2, k, l)
+                    w(i, 1, k, l) = w(i, 2, k, l)
+                    w(i, je, k, l) = w(i, jl, k, l)
+                    w(i, jb, k, l) = w(i, jl, k, l)
+                end do
+
+                p(i, 0, k) = p(i, 2, k)
+                p(i, 1, k) = p(i, 2, k)
+                p(i, je, k) = p(i, jl, k)
+                p(i, jb, k) = p(i, jl, k)
+
+            end do
+        end do
+
+        ! Constant extrapolation in the k-direction. Take the just
+        ! interpolated values in i- and j-direction into account.
+
+        do j = 0, jb
+            do i = 0, ib
+
+                do l = 1, nw
+                    w(i, j, 0, l) = w(i, j, 2, l)
+                    w(i, j, 1, l) = w(i, j, 2, l)
+                    w(i, j, ke, l) = w(i, j, kl, l)
+                    w(i, j, kb, l) = w(i, j, kl, l)
+                end do
+
+                p(i, j, 0) = p(i, j, 2)
+                p(i, j, 1) = p(i, j, 2)
+                p(i, j, ke) = p(i, j, kl)
+                p(i, j, kb) = p(i, j, kl)
+
+            end do
+        end do
+
+    end subroutine extrapolateSolution
+
+    !      ==================================================================
+
+    subroutine extrapolateViscosities
+        !
+        !       extrapolateViscosities sets the laminar and eddy viscosities
+        !       of the cell halos by a constant extrapolation. This routine is
+        !       called after the solution has been interpolated to the next
+        !       finer grid and this routine makes sure that the halo's are
+        !       initialized.
+        !       Only the block to which the pointers in blockPointers
+        !       currently point is treated and only for a viscous problem.
+        !
+        use blockPointers
+        use flowVarRefState
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
+
+        ! Return immediately if this is not a viscous problem.
+
+        if (.not. viscous) return
+
+        ! Constant extrapolation in i-direction.
+
+        do k = 2, kl
+            do j = 2, jl
+
+                rlv(0, j, k) = rlv(2, j, k)
+                rlv(1, j, k) = rlv(2, j, k)
+                rlv(ie, j, k) = rlv(il, j, k)
+                rlv(ib, j, k) = rlv(il, j, k)
+
+                if (eddyModel) then
+                    rev(0, j, k) = rev(2, j, k)
+                    rev(1, j, k) = rev(2, j, k)
+                    rev(ie, j, k) = rev(il, j, k)
+                    rev(ib, j, k) = rev(il, j, k)
+                end if
+
+            end do
+        end do
+
+        ! Constant extrapolation in the j-direction. Take the just
+        ! interpolated values in i-direction into account.
 
+        do k = 2, kl
+            do i = 0, ib
 
-          ! Compute the new state vector on the fine mesh in case the
-          ! corrections have just been interpolated.
+                rlv(i, 0, k) = rlv(i, 2, k)
+                rlv(i, 1, k) = rlv(i, 2, k)
+                rlv(i, je, k) = rlv(i, jl, k)
+                rlv(i, jb, k) = rlv(i, jl, k)
 
-          testCorrections2: if( corrections ) then
+                if (eddyModel) then
+                    rev(i, 0, k) = rev(i, 2, k)
+                    rev(i, 1, k) = rev(i, 2, k)
+                    rev(i, je, k) = rev(i, jl, k)
+                    rev(i, jb, k) = rev(i, jl, k)
+                end if
 
-             ! Flow field variables. Again the pressure is updated
-             ! and not the total energy. Make sure that the pressure and
-             ! density do not become negative.
+            end do
+        end do
+
+        ! Constant extrapolation in the k-direction. Take the just
+        ! interpolated values in i- and j-direction into account.
+
+        do j = 0, jb
+            do i = 0, ib
+
+                rlv(i, j, 0) = rlv(i, j, 2)
+                rlv(i, j, 1) = rlv(i, j, 2)
+                rlv(i, j, ke) = rlv(i, j, kl)
+                rlv(i, j, kb) = rlv(i, j, kl)
+
+                if (eddyModel) then
+                    rev(i, j, 0) = rev(i, j, 2)
+                    rev(i, j, 1) = rev(i, j, 2)
+                    rev(i, j, ke) = rev(i, j, kl)
+                    rev(i, j, kb) = rev(i, j, kl)
+                end if
+
+            end do
+        end do
+
+    end subroutine extrapolateViscosities
 
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
-                      w(i,j,k,irho) = w(i,j,k,irho) + dw(i,j,k,irho)
-                      w(i,j,k,ivx)  = w(i,j,k,ivx)  + dw(i,j,k,ivx)
-                      w(i,j,k,ivy)  = w(i,j,k,ivy)  + dw(i,j,k,ivy)
-                      w(i,j,k,ivz)  = w(i,j,k,ivz)  + dw(i,j,k,ivz)
-                      p(i,j,k)      = p(i,j,k)      + dw(i,j,k,irhoE)
+    subroutine executeMGCycle
+        !
+        !       executeMGCycle performs a multigrid cycle defined by
+        !       cycling, see the module iteration.
+        !
+        use flowVarRefState
+        use iteration
+        use inputIteration
+        use inputPhysics
+        use utils, only: terminate
+        use turbAPI, only: turbSolveDDADI
+        use solverUtils, only: timeStep, computeUtau
+        use smoothers, only: rungeKuttaSmoother, DADISmoother
+        use residuals, only: residual, initRes, sourceTerms
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
 
-                      w(i,j,k,irho) = max(w(i,j,k,irho), &
-                           1.e-4_realType*rhoInf)
-                      p(i,j,k)      = max(p(i,j,k), &
-                           1.e-4_realType*pInfCorr)
-                   enddo
-                enddo
-             enddo
+        ! Initialize currentLevel to groundLevel, the ground level
+        ! of this multigrid cycle.
 
-             ! The possible turbulent variables.
+        currentLevel = groundLevel
 
-             do l=nt1,nVarInt
-                do k=2,kl
-                   do j=2,jl
-                      do i=2,il
-                         w(i,j,k,l) = w(i,j,k,l) + dw(i,j,k,l)
-                      enddo
-                   enddo
-                enddo
-             enddo
+        ! Loop over the number of steps in cycling.
 
-          else testCorrections2
+        mgLoop: do nn = 1, nstepsCycling
 
-             ! The solution must be interpolated. At the moment the
-             ! pressure is stored at the place of the total energy.
-             ! Copy this value in the pressure array.
-
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
-                      p(i,j,k) = w(i,j,k,irhoE)
-                   enddo
-                enddo
-             enddo
-
-          endif testCorrections2
-
-          ! Compute the total energy for the owned cells of this block.
-          ! If the solution must be interpolated, extrapolate the
-          ! solution in the halo's.
-
-          call computeEtotBlock(2_intType,il, 2_intType,jl, &
-               2_intType,kl, correctForK)
-          if(.not. corrections) call extrapolateSolution
-
-          ! Compute the laminar viscosity and eddy viscosity for the
-          ! owned cells of this block. If the solution must be
-          ! interpolated, extrapolate the viscosities in the halo's.
-
-          call computeLamViscosity(.False.)
-          call computeEddyViscosity(.False.)
-          if(.not. corrections) call extrapolateViscosities
-
-       enddo domains
-    enddo spectralLoop
-
-    ! Exchange the pressure if the pressure must be exchanged early.
-    ! Only the first halo's are needed, thus whalo1 is called.
-    ! On the finest mesh only.
-
-    if(exchangePressureEarly .and. currentLevel <= groundLevel) &
-         call whalo1(currentLevel, 1_intType, 0_intType, &
-         .true., .false., .false.)
-
-    ! Apply all boundary conditions to all blocks on this level.
-    ! In case of a full mg mode, and a segegated turbulent solver,
-    ! first call the turbulent boundary conditions, such that the
-    ! turbulent kinetic energy is properly initialized in the halo's.
-
-    if(.not. corrections .and. equations==RANSEquations) then
-       call applyAllTurbBC(secondHalo)
-    end if
-
-    ! Apply all boundary conditions of the mean flow.
-
-    call applyAllBC(secondHalo)
-
-    ! If case this routine is called in full mg mode call the mean
-    ! flow boundary conditions again such that the normal momentum
-    ! boundary condition is treated correctly.
-
-    if(.not. corrections) call applyAllBC(secondHalo)
-
-    ! Exchange the solution. Either whalo1 or whalo2
-    ! must be called.
-
-    if( secondHalo ) then
-       call whalo2(currentLevel, 1_intType, nVarInt, .true., &
-            .true., .true.)
-    else
-       call whalo1(currentLevel, 1_intType, nVarInt, .true., &
-            .true., .true.)
-    endif
-
-    ! For full multigrid mode the bleeds must be determined, the
-    ! boundary conditions must be applied one more time and the
-    ! solution must be exchanged again.
-
-    if(.not. corrections) then
-       call applyAllBC(secondHalo)
-
-       if( secondHalo ) then
-          call whalo2(currentLevel, 1_intType, nVarInt, .true., &
-               .true., .true.)
-       else
-          call whalo1(currentLevel, 1_intType, nVarInt, .true., &
-               .true., .true.)
-       endif
-    endif
-
-  end subroutine transferToFineGrid
-
-  !      ==================================================================
-
-  subroutine extrapolateSolution
-    !
-    !       extrapolateSolution sets the solution of the cell halos by a
-    !       constant extrapolation. This routine is called after the
-    !       solution has been interpolated to the next finer grid and this
-    !       routine makes sure that the halo's are initialized.
-    !       Only the block to which the pointers in blockPointers
-    !       currently point is treated.
-    !
-    use blockPointers
-    use flowVarRefState
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, l
-
-    ! Constant extrapolation in i-direction.
-
-    do k=2,kl
-       do j=2,jl
-
-          do l=1,nw
-             w(0,j,k,l)  = w(2,j,k,l)
-             w(1,j,k,l)  = w(2,j,k,l)
-             w(ie,j,k,l) = w(il,j,k,l)
-             w(ib,j,k,l) = w(il,j,k,l)
-          enddo
-
-          p(0,j,k)  = p(2,j,k)
-          p(1,j,k)  = p(2,j,k)
-          p(ie,j,k) = p(il,j,k)
-          p(ib,j,k) = p(il,j,k)
-
-       enddo
-    enddo
-
-    ! Constant extrapolation in the j-direction. Take the just
-    ! interpolated values in i-direction into account.
-
-    do k=2,kl
-       do i=0,ib
-
-          do l=1,nw
-             w(i,0,k,l)  = w(i,2,k,l)
-             w(i,1,k,l)  = w(i,2,k,l)
-             w(i,je,k,l) = w(i,jl,k,l)
-             w(i,jb,k,l) = w(i,jl,k,l)
-          enddo
-
-          p(i,0,k)  = p(i,2,k)
-          p(i,1,k)  = p(i,2,k)
-          p(i,je,k) = p(i,jl,k)
-          p(i,jb,k) = p(i,jl,k)
-
-       enddo
-    enddo
-
-    ! Constant extrapolation in the k-direction. Take the just
-    ! interpolated values in i- and j-direction into account.
-
-    do j=0,jb
-       do i=0,ib
-
-          do l=1,nw
-             w(i,j,0,l)  = w(i,j,2,l)
-             w(i,j,1,l)  = w(i,j,2,l)
-             w(i,j,ke,l) = w(i,j,kl,l)
-             w(i,j,kb,l) = w(i,j,kl,l)
-          enddo
-
-          p(i,j,0)  = p(i,j,2)
-          p(i,j,1)  = p(i,j,2)
-          p(i,j,ke) = p(i,j,kl)
-          p(i,j,kb) = p(i,j,kl)
-
-       enddo
-    enddo
-
-  end subroutine extrapolateSolution
-
-  !      ==================================================================
-
-  subroutine extrapolateViscosities
-    !
-    !       extrapolateViscosities sets the laminar and eddy viscosities
-    !       of the cell halos by a constant extrapolation. This routine is
-    !       called after the solution has been interpolated to the next
-    !       finer grid and this routine makes sure that the halo's are
-    !       initialized.
-    !       Only the block to which the pointers in blockPointers
-    !       currently point is treated and only for a viscous problem.
-    !
-    use blockPointers
-    use flowVarRefState
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-
-    ! Return immediately if this is not a viscous problem.
-
-    if(.not. viscous) return
-
-    ! Constant extrapolation in i-direction.
-
-    do k=2,kl
-       do j=2,jl
-
-          rlv(0,j,k)  = rlv(2,j,k)
-          rlv(1,j,k)  = rlv(2,j,k)
-          rlv(ie,j,k) = rlv(il,j,k)
-          rlv(ib,j,k) = rlv(il,j,k)
-
-          if( eddyModel ) then
-             rev(0,j,k)  = rev(2,j,k)
-             rev(1,j,k)  = rev(2,j,k)
-             rev(ie,j,k) = rev(il,j,k)
-             rev(ib,j,k) = rev(il,j,k)
-          endif
-
-       enddo
-    enddo
-
-    ! Constant extrapolation in the j-direction. Take the just
-    ! interpolated values in i-direction into account.
+            ! Determine what must be done.
 
-    do k=2,kl
-       do i=0,ib
+            select case (cycling(nn))
 
-          rlv(i,0,k)  = rlv(i,2,k)
-          rlv(i,1,k)  = rlv(i,2,k)
-          rlv(i,je,k) = rlv(i,jl,k)
-          rlv(i,jb,k) = rlv(i,jl,k)
+            case (-1_intType)
 
-          if( eddyModel ) then
-             rev(i,0,k)  = rev(i,2,k)
-             rev(i,1,k)  = rev(i,2,k)
-             rev(i,je,k) = rev(i,jl,k)
-             rev(i,jb,k) = rev(i,jl,k)
-          endif
+                ! Set the new currentLevel and and prolongate the
+                ! the corrections to this grid level.
 
-       enddo
-    enddo
-
-    ! Constant extrapolation in the k-direction. Take the just
-    ! interpolated values in i- and j-direction into account.
-
-    do j=0,jb
-       do i=0,ib
-
-          rlv(i,j,0)  = rlv(i,j,2)
-          rlv(i,j,1)  = rlv(i,j,2)
-          rlv(i,j,ke) = rlv(i,j,kl)
-          rlv(i,j,kb) = rlv(i,j,kl)
-
-          if( eddyModel ) then
-             rev(i,j,0)  = rev(i,j,2)
-             rev(i,j,1)  = rev(i,j,2)
-             rev(i,j,ke) = rev(i,j,kl)
-             rev(i,j,kb) = rev(i,j,kl)
-          endif
-
-       enddo
-    enddo
-
-  end subroutine extrapolateViscosities
+                currentLevel = currentLevel - 1
+                call transferToFineGrid(.true.)
 
-  subroutine executeMGCycle
-    !
-    !       executeMGCycle performs a multigrid cycle defined by
-    !       cycling, see the module iteration.
-    !
-    use flowVarRefState
-    use iteration
-    use inputIteration
-    use inputPhysics
-    use utils, only : terminate
-    use turbAPI, only : turbSolveDDADI
-    use solverUtils, only : timeStep, computeUtau
-    use smoothers, only : rungeKuttaSmoother, DADISmoother
-    use residuals, only : residual, initRes, sourceTerms
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
+            case (0_intType)
 
-    ! Initialize currentLevel to groundLevel, the ground level
-    ! of this multigrid cycle.
+                ! Perform a smoothing iteration on this grid level.
+                ! First determine the current situation. If this is the
+                ! first entry in cycling the residual already contains the
+                ! correct values and therefore it needs not to be computed.
 
-    currentLevel = groundLevel
+                if (nn > 1) then
 
-    ! Loop over the number of steps in cycling.
+                    ! Compute the residual if the previous action was not a
+                    ! restriction. In that case the residual already contains
+                    ! the correct values.
 
-    mgLoop: do nn=1,nstepsCycling
+                    if (cycling(nn - 1) /= 1_intType) then
 
-       ! Determine what must be done.
+                        ! Initialize and compute the residual.
 
-       select case (cycling(nn))
+                        rkStage = 0
+                        call timeStep(.false.)
 
-       case (-1_intType)
+                        call initres(1_intType, nwf)
+                        call sourceTerms()
+                        call residual
 
-          ! Set the new currentLevel and and prolongate the
-          ! the corrections to this grid level.
+                    end if
+                end if
 
-          currentLevel = currentLevel -1
-          call transferToFineGrid(.true.)
+                ! Perform a smoothing step. Determine the smoother to
+                ! be used and call the appropriate routine.
 
-       case ( 0_intType)
+                select case (smoother)
+                case (RungeKutta)
+                    call RungeKuttaSmoother
+                    iterType = "      RK"
+                case (DADI)
+                    call DADISmoother
+                    iterType = "    DADI"
+                case (nlLusgs)
+                    call terminate("executeMGCycle", &
+                                   "nlLusgs smoother not implemented yet")
+                case (nlLusgsLine)
+                    call terminate("executeMGCycle", &
+                         "nlLusgsLine smoother not implemented &
+                         &yet")
+                end select
 
-          ! Perform a smoothing iteration on this grid level.
-          ! First determine the current situation. If this is the
-          ! first entry in cycling the residual already contains the
-          ! correct values and therefore it needs not to be computed.
+            case (1_intType)
 
-          if(nn > 1) then
+                ! Restrict the solution and residual to the next coarser
+                ! grid level. Inside transferToCoarseGrid currentLevel
+                ! is updated.
 
-             ! Compute the residual if the previous action was not a
-             ! restriction. In that case the residual already contains
-             ! the correct values.
+                call transferToCoarseGrid
 
-             if(cycling(nn-1) /= 1_intType) then
+            end select
 
-                ! Initialize and compute the residual.
+        end do mgLoop
 
-                rkStage = 0
-                call timeStep(.false.)
+        ! Reset the values of rkStage and currentLevel, such that
+        ! they correspond to a new iteration.
 
-                call initres(1_intType, nwf)
-                call sourceTerms()
-                call residual
+        rkStage = 0
+        currentLevel = groundLevel
 
-             endif
-          endif
+        ! Compute the latest values of the skin friction velocity.
+        ! The currently stored values are of the previous iteration.
 
-          ! Perform a smoothing step. Determine the smoother to
-          ! be used and call the appropriate routine.
+        call computeUtau
 
-          select case (smoother)
-          case (RungeKutta)
-             call RungeKuttaSmoother
-             iterType = "      RK"
-          case (DADI)
-             call DADISmoother
-             iterType = "    DADI"
-          case (nlLusgs)
-             call terminate("executeMGCycle", &
-                  "nlLusgs smoother not implemented yet")
-          case (nlLusgsLine)
-             call terminate("executeMGCycle", &
-                  "nlLusgsLine smoother not implemented &
-                  &yet")
-          end select
+        ! Apply an iteration to the turbulent transport equations in
+        ! case these must be solved separately.
 
-       case ( 1_intType)
+        if (equations == RANSEquations) then
+            call turbSolveDDADI
+        end if
 
-          ! Restrict the solution and residual to the next coarser
-          ! grid level. Inside transferToCoarseGrid currentLevel
-          ! is updated.
+        ! Compute the time step.
 
-          call transferToCoarseGrid
+        call timeStep(.false.)
 
-       end select
+        ! Compute the residual of the new solution on the ground level.
 
-    enddo mgLoop
+        call initres(1_intType, nwf)
+        call sourceTerms()
+        call residual
 
-    ! Reset the values of rkStage and currentLevel, such that
-    ! they correspond to a new iteration.
+        ! Set some information for monitoring purposes
+        approxTotalIts = approxTotalIts + 1
 
-    rkStage = 0
-    currentLevel = groundLevel
+    end subroutine executeMGCycle
 
-    ! Compute the latest values of the skin friction velocity.
-    ! The currently stored values are of the previous iteration.
+    subroutine setCycleStrategy
+        !
+        !       setCycleStrategy sets the multigrid cycling strategy for the
+        !       multigrid level groundLevel. It is cycle strategy for the
+        !       fine grid cut off at the current grid level. If the grid level
+        !       is not in the range of the fine grid cycle strategy, cycling
+        !       will be set to a single grid strategy.
+        !
+        use constants
+        use inputIteration, only: nMGSteps, cycleStrategy
+        use iteration, only: cycling, groundLevel, nStepsCycling
+        use utils, only: returnFail
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i
+        integer(kind=intType) :: thisLevel, maxLevel
+
+        ! Initialize thisLevel and maxLevel to 1, i.e. the finest grid.
+
+        thisLevel = 1
+        maxLevel = 1
+
+        ! Determine the cycling strategy for groundLevel by looping over
+        ! the fine grid cycling strategy and picking the correct entries.
+
+        nStepsCycling = 0
+        do i = 1, nMGSteps
+            thisLevel = thisLevel + cycleStrategy(i)
+            maxLevel = max(maxLevel, thisLevel)
+
+            ! Store this entry in cycling if a) we are on a coarser grid
+            ! than groundLevel or b) if we are on groundLevel and
+            ! cycleStrategy(i) does not correspond to a restriction,
+            ! i.e. 1.
+
+            if ((thisLevel == groundLevel .and. cycleStrategy(i) /= 1) .or. &
+                thisLevel > groundLevel) then
+                nStepsCycling = nStepsCycling + 1
+                cycling(nstepsCycling) = cycleStrategy(i)
+            end if
+
+            ! Break the loop if a cycle on the current grid level has
+            ! been completed.
+
+            if (thisLevel == groundLevel .and. cycleStrategy(i) == -1) exit
+
+        end do
+
+        ! Take care of the case that groundLevel >= maxLevel.
+        ! In this case a single grid strategy is used.
+
+        if (groundLevel >= maxLevel) then
+            nStepsCycling = 1
+            cycling(1) = 0
+        end if
+
+        ! Check in debug mode if the multigrid strategy created is
+        ! a valid one.
+
+        if (debug) then
+
+            thisLevel = 0
+            do i = 1, nstepsCycling
+                thisLevel = thisLevel + cycling(i)
+            end do
+
+            if (thisLevel /= 0) &
+                call returnFail("setCyleStrategy", "Invalid strategy created")
+
+        end if
+
+    end subroutine setCycleStrategy
+
+    subroutine setCornerRowHalos(nVar)
+        !
+        !       setCornerRowHalos initializes the halo's next to corner row
+        !       halo's, such that it contains some values. Otherwise it may
+        !       be uninitialized or cause a floating point exception, as this
+        !       memory is also used to compute the mg corrections.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use blockPointers, only: w, p, rlv, rev, nx, ny, nz, &
+                                 il, ie, jl, je, kl, ke
+        use flowVarRefState, only: eddyModel, viscous
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType), intent(in) :: nVar
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, l, mm, ll
+
+        !       Halo's on the i=iMin and i=iMax plane.
+        !
+        ! K-rows.
+
+        mm = min(3_intType, jl)
+        ll = max(2_intType, ny)
+
+        do k = 2, kl
+            do l = 1, nVar
+                w(1, 2, k, l) = w(2, 2, k, l)
+                w(1, mm, k, l) = w(2, mm, k, l)
+                w(1, jl, k, l) = w(2, jl, k, l)
+                w(1, ll, k, l) = w(2, ll, k, l)
+                w(ie, 2, k, l) = w(il, 2, k, l)
+                w(ie, mm, k, l) = w(il, mm, k, l)
+                w(ie, jl, k, l) = w(il, jl, k, l)
+                w(ie, ll, k, l) = w(il, ll, k, l)
+            end do
+
+            p(1, 2, k) = p(2, 2, k)
+            p(1, mm, k) = p(2, mm, k)
+            p(1, jl, k) = p(2, jl, k)
+            p(1, ll, k) = p(2, ll, k)
+            p(ie, 2, k) = p(il, 2, k)
+            p(ie, mm, k) = p(il, mm, k)
+            p(ie, jl, k) = p(il, jl, k)
+            p(ie, ll, k) = p(il, ll, k)
+
+            if (viscous) then
+                rlv(1, 2, k) = rlv(2, 2, k)
+                rlv(1, mm, k) = rlv(2, mm, k)
+                rlv(1, jl, k) = rlv(2, jl, k)
+                rlv(1, ll, k) = rlv(2, ll, k)
+                rlv(ie, 2, k) = rlv(il, 2, k)
+                rlv(ie, mm, k) = rlv(il, mm, k)
+                rlv(ie, jl, k) = rlv(il, jl, k)
+                rlv(ie, ll, k) = rlv(il, ll, k)
+            end if
+
+            if (eddyModel) then
+                rev(1, 2, k) = rev(2, 2, k)
+                rev(1, mm, k) = rev(2, mm, k)
+                rev(1, jl, k) = rev(2, jl, k)
+                rev(1, ll, k) = rev(2, ll, k)
+                rev(ie, 2, k) = rev(il, 2, k)
+                rev(ie, mm, k) = rev(il, mm, k)
+                rev(ie, jl, k) = rev(il, jl, k)
+                rev(ie, ll, k) = rev(il, ll, k)
+            end if
+        end do
+
+        ! J-rows; no need to include the corners. These have been set in
+        ! the previous k-loop.
+
+        mm = min(3_intType, kl)
+        ll = max(2_intType, nz)
+
+        do j = 3, ny
+            do l = 1, nVar
+                w(1, j, 2, l) = w(2, j, 2, l)
+                w(1, j, mm, l) = w(2, j, mm, l)
+                w(1, j, kl, l) = w(2, j, kl, l)
+                w(1, j, ll, l) = w(2, j, ll, l)
+                w(ie, j, 2, l) = w(il, j, 2, l)
+                w(ie, j, mm, l) = w(il, j, mm, l)
+                w(ie, j, kl, l) = w(il, j, kl, l)
+                w(ie, j, ll, l) = w(il, j, ll, l)
+            end do
+
+            p(1, j, 2) = p(2, j, 2)
+            p(1, j, mm) = p(2, j, mm)
+            p(1, j, kl) = p(2, j, kl)
+            p(1, j, ll) = p(2, j, ll)
+            p(ie, j, 2) = p(il, j, 2)
+            p(ie, j, mm) = p(il, j, mm)
+            p(ie, j, kl) = p(il, j, kl)
+            p(ie, j, ll) = p(il, j, ll)
+
+            if (viscous) then
+                rlv(1, j, 2) = rlv(2, j, 2)
+                rlv(1, j, mm) = rlv(2, j, mm)
+                rlv(1, j, kl) = rlv(2, j, kl)
+                rlv(1, j, ll) = rlv(2, j, ll)
+                rlv(ie, j, 2) = rlv(il, j, 2)
+                rlv(ie, j, mm) = rlv(il, j, mm)
+                rlv(ie, j, kl) = rlv(il, j, kl)
+                rlv(ie, j, ll) = rlv(il, j, ll)
+            end if
+
+            if (eddyModel) then
+                rev(1, j, 2) = rev(2, j, 2)
+                rev(1, j, mm) = rev(2, j, mm)
+                rev(1, j, kl) = rev(2, j, kl)
+                rev(1, j, ll) = rev(2, j, ll)
+                rev(ie, j, 2) = rev(il, j, 2)
+                rev(ie, j, mm) = rev(il, j, mm)
+                rev(ie, j, kl) = rev(il, j, kl)
+                rev(ie, j, ll) = rev(il, j, ll)
+            end if
+        end do
+        !
+        !       Halo's on the j=jMin and j=jMax plane.
+        !
+        ! K-rows; no need to include the corners; this is done in the
+        ! next i-loop.
+
+        mm = min(3_intType, il)
+        ll = max(2_intType, nx)
+
+        do k = 3, nz
+            do l = 1, nVar
+                w(2, 1, k, l) = w(2, 2, k, l)
+                w(mm, 1, k, l) = w(mm, 2, k, l)
+                w(il, 1, k, l) = w(il, 2, k, l)
+                w(ll, 1, k, l) = w(ll, 2, k, l)
+                w(2, je, k, l) = w(2, jl, k, l)
+                w(mm, je, k, l) = w(mm, jl, k, l)
+                w(il, je, k, l) = w(il, jl, k, l)
+                w(ll, je, k, l) = w(ll, jl, k, l)
+            end do
+
+            p(2, 1, k) = p(2, 2, k)
+            p(mm, 1, k) = p(mm, 2, k)
+            p(il, 1, k) = p(il, 2, k)
+            p(ll, 1, k) = p(ll, 2, k)
+            p(2, je, k) = p(2, jl, k)
+            p(mm, je, k) = p(mm, jl, k)
+            p(il, je, k) = p(il, jl, k)
+            p(ll, je, k) = p(ll, jl, k)
+
+            if (viscous) then
+                rlv(2, 1, k) = rlv(2, 2, k)
+                rlv(mm, 1, k) = rlv(mm, 2, k)
+                rlv(il, 1, k) = rlv(il, 2, k)
+                rlv(ll, 1, k) = rlv(ll, 2, k)
+                rlv(2, je, k) = rlv(2, jl, k)
+                rlv(mm, je, k) = rlv(mm, jl, k)
+                rlv(il, je, k) = rlv(il, jl, k)
+                rlv(ll, je, k) = rlv(ll, jl, k)
+            end if
+
+            if (eddyModel) then
+                rev(2, 1, k) = rev(2, 2, k)
+                rev(mm, 1, k) = rev(mm, 2, k)
+                rev(il, 1, k) = rev(il, 2, k)
+                rev(ll, 1, k) = rev(ll, 2, k)
+                rev(2, je, k) = rev(2, jl, k)
+                rev(mm, je, k) = rev(mm, jl, k)
+                rev(il, je, k) = rev(il, jl, k)
+                rev(ll, je, k) = rev(ll, jl, k)
+            end if
+        end do
+
+        ! I-rows, including halo's set on the iMin and iMax plane.
+
+        mm = min(3_intType, kl)
+        ll = max(2_intType, nz)
+
+        do i = 1, ie
+            do l = 1, nVar
+                w(i, 1, 2, l) = w(i, 2, 2, l)
+                w(i, 1, mm, l) = w(i, 2, mm, l)
+                w(i, 1, kl, l) = w(i, 2, kl, l)
+                w(i, 1, ll, l) = w(i, 2, ll, l)
+                w(i, je, 2, l) = w(i, jl, 2, l)
+                w(i, je, mm, l) = w(i, jl, mm, l)
+                w(i, je, kl, l) = w(i, jl, kl, l)
+                w(i, je, ll, l) = w(i, jl, ll, l)
+            end do
+
+            p(i, 1, 2) = p(i, 2, 2)
+            p(i, 1, mm) = p(i, 2, mm)
+            p(i, 1, kl) = p(i, 2, kl)
+            p(i, 1, ll) = p(i, 2, ll)
+            p(i, je, 2) = p(i, jl, 2)
+            p(i, je, mm) = p(i, jl, mm)
+            p(i, je, kl) = p(i, jl, kl)
+            p(i, je, ll) = p(i, jl, ll)
+
+            if (viscous) then
+                rlv(i, 1, 2) = rlv(i, 2, 2)
+                rlv(i, 1, mm) = rlv(i, 2, mm)
+                rlv(i, 1, kl) = rlv(i, 2, kl)
+                rlv(i, 1, ll) = rlv(i, 2, ll)
+                rlv(i, je, 2) = rlv(i, jl, 2)
+                rlv(i, je, mm) = rlv(i, jl, mm)
+                rlv(i, je, kl) = rlv(i, jl, kl)
+                rlv(i, je, ll) = rlv(i, jl, ll)
+            end if
+
+            if (eddyModel) then
+                rev(i, 1, 2) = rev(i, 2, 2)
+                rev(i, 1, mm) = rev(i, 2, mm)
+                rev(i, 1, kl) = rev(i, 2, kl)
+                rev(i, 1, ll) = rev(i, 2, ll)
+                rev(i, je, 2) = rev(i, jl, 2)
+                rev(i, je, mm) = rev(i, jl, mm)
+                rev(i, je, kl) = rev(i, jl, kl)
+                rev(i, je, ll) = rev(i, jl, ll)
+            end if
+        end do
+        !
+        !       Halo's on the k=kMin and k=kMax plane.
+        !
+        ! J-rows, including halo's set on the jMin and jMax plane.
+
+        mm = min(3_intType, il)
+        ll = max(2_intType, nx)
+
+        do j = 1, je
+            do l = 1, nVar
+                w(2, j, 1, l) = w(2, j, 2, l)
+                w(mm, j, 1, l) = w(mm, j, 2, l)
+                w(il, j, 1, l) = w(il, j, 2, l)
+                w(ll, j, 1, l) = w(ll, j, 2, l)
+                w(2, j, ke, l) = w(2, j, kl, l)
+                w(mm, j, ke, l) = w(mm, j, kl, l)
+                w(il, j, ke, l) = w(il, j, kl, l)
+                w(ll, j, ke, l) = w(ll, j, kl, l)
+            end do
+
+            p(2, j, 1) = p(2, j, 2)
+            p(mm, j, 1) = p(mm, j, 2)
+            p(il, j, 1) = p(il, j, 2)
+            p(ll, j, 1) = p(ll, j, 2)
+            p(2, j, ke) = p(2, j, kl)
+            p(mm, j, ke) = p(mm, j, kl)
+            p(il, j, ke) = p(il, j, kl)
+            p(ll, j, ke) = p(ll, j, kl)
+
+            if (viscous) then
+                rlv(2, j, 1) = rlv(2, j, 2)
+                rlv(mm, j, 1) = rlv(mm, j, 2)
+                rlv(il, j, 1) = rlv(il, j, 2)
+                rlv(ll, j, 1) = rlv(ll, j, 2)
+                rlv(2, j, ke) = rlv(2, j, kl)
+                rlv(mm, j, ke) = rlv(mm, j, kl)
+                rlv(il, j, ke) = rlv(il, j, kl)
+                rlv(ll, j, ke) = rlv(ll, j, kl)
+            end if
+
+            if (eddyModel) then
+                rev(2, j, 1) = rev(2, j, 2)
+                rev(mm, j, 1) = rev(mm, j, 2)
+                rev(il, j, 1) = rev(il, j, 2)
+                rev(ll, j, 1) = rev(ll, j, 2)
+                rev(2, j, ke) = rev(2, j, kl)
+                rev(mm, j, ke) = rev(mm, j, kl)
+                rev(il, j, ke) = rev(il, j, kl)
+                rev(ll, j, ke) = rev(ll, j, kl)
+            end if
+        end do
+
+        ! I-rows, including halo's set on the iMin and iMax plane.
+
+        mm = min(3_intType, jl)
+        ll = max(2_intType, ny)
+
+        do i = 1, ie
+            do l = 1, nVar
+                w(i, 2, 1, l) = w(i, 2, 2, l)
+                w(i, mm, 1, l) = w(i, mm, 2, l)
+                w(i, jl, 1, l) = w(i, jl, 2, l)
+                w(i, ll, 1, l) = w(i, ll, 2, l)
+                w(i, 2, ke, l) = w(i, 2, kl, l)
+                w(i, mm, ke, l) = w(i, mm, kl, l)
+                w(i, jl, ke, l) = w(i, jl, kl, l)
+                w(i, ll, ke, l) = w(i, ll, kl, l)
+            end do
+
+            p(i, 2, 1) = p(i, 2, 2)
+            p(i, mm, 1) = p(i, mm, 2)
+            p(i, jl, 1) = p(i, jl, 2)
+            p(i, ll, 1) = p(i, ll, 2)
+            p(i, 2, ke) = p(i, 2, kl)
+            p(i, mm, ke) = p(i, mm, kl)
+            p(i, jl, ke) = p(i, jl, kl)
+            p(i, ll, ke) = p(i, ll, kl)
+
+            if (viscous) then
+                rlv(i, 2, 1) = rlv(i, 2, 2)
+                rlv(i, mm, 1) = rlv(i, mm, 2)
+                rlv(i, jl, 1) = rlv(i, jl, 2)
+                rlv(i, ll, 1) = rlv(i, ll, 2)
+                rlv(i, 2, ke) = rlv(i, 2, kl)
+                rlv(i, mm, ke) = rlv(i, mm, kl)
+                rlv(i, jl, ke) = rlv(i, jl, kl)
+                rlv(i, ll, ke) = rlv(i, ll, kl)
+            end if
+
+            if (eddyModel) then
+                rev(i, 2, 1) = rev(i, 2, 2)
+                rev(i, mm, 1) = rev(i, mm, 2)
+                rev(i, jl, 1) = rev(i, jl, 2)
+                rev(i, ll, 1) = rev(i, ll, 2)
+                rev(i, 2, ke) = rev(i, 2, kl)
+                rev(i, mm, ke) = rev(i, mm, kl)
+                rev(i, jl, ke) = rev(i, jl, kl)
+                rev(i, ll, ke) = rev(i, ll, kl)
+            end if
+        end do
+
+    end subroutine setCornerRowHalos
+
+    subroutine setCorrectionsCoarseHalos(sps, nn, coarseLevel, &
+                                         fact, nVarInt)
+        !
+        !       setCorrectionsCoarseHalos sets the values of the coarse
+        !       grid boundary halo corrections. For all boundaries, either a
+        !       homogeneous Dirichlet condition (fact = 0.0) or a Neumann
+        !       condition (fact = 1.0) is used. Exception are symmetry planes,
+        !       where a mirroring takes place.
+        !
+        use constants
+        use block, only: BCDataType, flowDoms
+        use flowVarRefState, only: nt1
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps, nn, coarseLevel
+        integer(kind=intType), intent(in) :: nVarInt
+        real(kind=realType), intent(in) :: fact
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, l, mm
+        integer(kind=intType) :: il, jl, kl, ie, je, ke
+
+        real(kind=realType) :: nnx, nny, nnz, vn
+
+        real(kind=realType), dimension(:, :, :, :), pointer :: ww
+        real(kind=realType), dimension(:, :, :), pointer :: ww1, ww2
+
+        type(BCDataType), dimension(:), pointer :: BCData
+
+        ! Set the pointer ww to the coarse grid variables. At the moment
+        ! when this routine is called, these contain the corrections in a
+        ! normal grid cycle and the true variables when used to
+        ! interpolate the solution to the next finer mesh level in full
+        ! multigrid mode. Also set the pointer for BCData, such that the
+        ! unit normals are accessed easier.
+
+        ww => flowDoms(nn, coarseLevel, sps)%w
+        BCData => flowDoms(nn, coarseLevel, sps)%BCData
+
+        ! Easier storage of the upper coarse block range.
+
+        il = flowDoms(nn, coarseLevel, sps)%il
+        jl = flowDoms(nn, coarseLevel, sps)%jl
+        kl = flowDoms(nn, coarseLevel, sps)%kl
+
+        ie = flowDoms(nn, coarseLevel, sps)%ie
+        je = flowDoms(nn, coarseLevel, sps)%je
+        ke = flowDoms(nn, coarseLevel, sps)%ke
+
+        ! Loop over the number of boundary subfaces to correct the
+        ! boundary halo values.
+
+        subfacesCoarse: do mm = 1, flowDoms(nn, coarseLevel, sps)%nBocos
+
+            ! Set the pointers for ww1 and ww2, depending on the block face
+            ! on which this subface is located. Note that bcFaceID is the
+            ! same for all spectral modes and only the 1st is allocated.
+            ! Therefore the value of the 1st spectral mode is used here.
+
+            select case (flowDoms(nn, coarseLevel, 1)%BCFaceID(mm))
+
+            case (iMin)
+                ww1 => ww(1, 1:, 1:, :); ww2 => ww(2, 1:, 1:, :)
+            case (iMax)
+                ww1 => ww(ie, 1:, 1:, :); ww2 => ww(il, 1:, 1:, :)
+            case (jMin)
+                ww1 => ww(1:, 1, 1:, :); ww2 => ww(1:, 2, 1:, :)
+            case (jMax)
+                ww1 => ww(1:, je, 1:, :); ww2 => ww(1:, jl, 1:, :)
+            case (kMin)
+                ww1 => ww(1:, 1:, 1, :); ww2 => ww(1:, 1:, 2, :)
+            case (kMax)
+                ww1 => ww(1:, 1:, ke, :); ww2 => ww(1:, 1:, kl, :)
+
+            end select
+
+            ! Choose what to do based on the BC type. Note that BCType is
+            ! the same for all spectral modes and only the 1st is allocated.
+            ! Therefore the value of the 1st spectral mode is used here.
+
+            select case (flowDoms(nn, coarseLevel, 1)%BCType(mm))
+
+            case (SlidingInterface, OversetOuterBound, &
+                  DomainInterfaceAll, DomainInterfaceRhoUVW, &
+                  DomainInterfaceP, DomainInterfaceRho, &
+                  DomainInterfaceTotal)
+
+                ! None of these are physical boundary conditions and thus
+                ! nothing needs to be done. The halos already contain the
+                ! corrections or solution.
+
+            case (Symm)
+
+                ! This is a symmetry plane. Loop over the faces of this
+                ! subface.
+
+                do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                    do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+
+                        ! Compute twice the normal velocity component of the
+                        ! internal cell.
+
+                        nnx = BCData(mm)%norm(i, j, 1)
+                        nny = BCData(mm)%norm(i, j, 2)
+                        nnz = BCData(mm)%norm(i, j, 3)
+
+                        vn = two * (ww2(i, j, ivx) * nnx + ww2(i, j, ivy) * nny &
+                                    + ww2(i, j, ivz) * nnz)
 
-    call computeUtau
+                        ! Compute the halo state. Make sure that the average
+                        ! normal velocity component is zero.
 
-    ! Apply an iteration to the turbulent transport equations in
-    ! case these must be solved separately.
+                        ww1(i, j, irho) = ww2(i, j, irho)
+                        ww1(i, j, ivx) = ww2(i, j, ivx) - vn * nnx
+                        ww1(i, j, ivy) = ww2(i, j, ivy) - vn * nny
+                        ww1(i, j, ivz) = ww2(i, j, ivz) - vn * nnz
+                        ww1(i, j, irhoE) = ww2(i, j, irhoE)
 
-    if (equations == RANSEquations) then
-       call turbSolveDDADI
-    end if
+                        do l = nt1, nVarInt
+                            ww1(i, j, l) = ww2(i, j, l)
+                        end do
 
-    ! Compute the time step.
+                    end do
+                end do
 
-    call timeStep(.false.)
+            case default
 
-    ! Compute the residual of the new solution on the ground level.
+                ! Other type of boundary subface. Set the boundary halo's
+                ! as fact times the internal cell value.
 
-    call initres(1_intType, nwf)
-    call sourceTerms()
-    call residual
+                do l = 1, nVarInt
+                    do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                        do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+                            ww1(i, j, l) = fact * ww2(i, j, l)
+                        end do
+                    end do
+                end do
 
-    ! Set some information for monitoring purposes
-    approxTotalIts = approxTotalIts + 1
+            end select
+        end do subfacesCoarse
 
-  end subroutine executeMGCycle
-
-  subroutine setCycleStrategy
-    !
-    !       setCycleStrategy sets the multigrid cycling strategy for the
-    !       multigrid level groundLevel. It is cycle strategy for the
-    !       fine grid cut off at the current grid level. If the grid level
-    !       is not in the range of the fine grid cycle strategy, cycling
-    !       will be set to a single grid strategy.
-    !
-    use constants
-    use inputIteration, only : nMGSteps, cycleStrategy
-    use iteration, only : cycling, groundLevel, nStepsCycling
-    use utils, only : returnFail
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i
-    integer(kind=intType) :: thisLevel, maxLevel
-
-    ! Initialize thisLevel and maxLevel to 1, i.e. the finest grid.
-
-    thisLevel = 1
-    maxLevel  = 1
-
-    ! Determine the cycling strategy for groundLevel by looping over
-    ! the fine grid cycling strategy and picking the correct entries.
-
-    nStepsCycling = 0
-    do i=1,nMGSteps
-       thisLevel = thisLevel + cycleStrategy(i)
-       maxLevel  = max(maxLevel, thisLevel)
-
-       ! Store this entry in cycling if a) we are on a coarser grid
-       ! than groundLevel or b) if we are on groundLevel and
-       ! cycleStrategy(i) does not correspond to a restriction,
-       ! i.e. 1.
-
-       if((thisLevel == groundLevel .and. cycleStrategy(i) /= 1) .or. &
-            thisLevel > groundLevel) then
-          nStepsCycling = nStepsCycling + 1
-          cycling(nstepsCycling) = cycleStrategy(i)
-       endif
-
-       ! Break the loop if a cycle on the current grid level has
-       ! been completed.
-
-       if(thisLevel == groundLevel .and. cycleStrategy(i) == -1) exit
-
-    enddo
-
-    ! Take care of the case that groundLevel >= maxLevel.
-    ! In this case a single grid strategy is used.
-
-    if(groundLevel >= maxLevel) then
-       nStepsCycling = 1
-       cycling(1)    = 0
-    endif
-
-    ! Check in debug mode if the multigrid strategy created is
-    ! a valid one.
-
-    if( debug ) then
-
-       thisLevel = 0
-       do i=1,nstepsCycling
-          thisLevel = thisLevel + cycling(i)
-       enddo
-
-       if(thisLevel /= 0) &
-            call returnFail("setCyleStrategy", "Invalid strategy created")
-
-    endif
-
-  end subroutine setCycleStrategy
-
-  subroutine setCornerRowHalos(nVar)
-    !
-    !       setCornerRowHalos initializes the halo's next to corner row
-    !       halo's, such that it contains some values. Otherwise it may
-    !       be uninitialized or cause a floating point exception, as this
-    !       memory is also used to compute the mg corrections.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use blockPointers, only : w, p, rlv, rev, nx, ny, nz,  &
-         il, ie, jl, je, kl, ke
-    use flowVarRefState, only : eddyModel, viscous
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType), intent(in) :: nVar
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, l, mm, ll
-
-    !       Halo's on the i=iMin and i=iMax plane.
-    !
-    ! K-rows.
-
-    mm = min(3_intType,jl)
-    ll = max(2_intType,ny)
-
-    do k=2,kl
-       do l=1,nVar
-          w(1, 2, k,l) = w(2, 2, k,l)
-          w(1, mm,k,l) = w(2, mm,k,l)
-          w(1, jl,k,l) = w(2, jl,k,l)
-          w(1, ll,k,l) = w(2, ll,k,l)
-          w(ie,2, k,l) = w(il,2, k,l)
-          w(ie,mm,k,l) = w(il,mm,k,l)
-          w(ie,jl,k,l) = w(il,jl,k,l)
-          w(ie,ll,k,l) = w(il,ll,k,l)
-       enddo
-
-       p(1, 2, k) = p(2, 2, k)
-       p(1, mm,k) = p(2, mm,k)
-       p(1, jl,k) = p(2, jl,k)
-       p(1, ll,k) = p(2, ll,k)
-       p(ie,2, k) = p(il,2, k)
-       p(ie,mm,k) = p(il,mm,k)
-       p(ie,jl,k) = p(il,jl,k)
-       p(ie,ll,k) = p(il,ll,k)
-
-       if( viscous) then
-          rlv(1, 2, k) = rlv(2, 2, k)
-          rlv(1, mm,k) = rlv(2, mm,k)
-          rlv(1, jl,k) = rlv(2, jl,k)
-          rlv(1, ll,k) = rlv(2, ll,k)
-          rlv(ie,2, k) = rlv(il,2, k)
-          rlv(ie,mm,k) = rlv(il,mm,k)
-          rlv(ie,jl,k) = rlv(il,jl,k)
-          rlv(ie,ll,k) = rlv(il,ll,k)
-       endif
-
-       if( eddyModel ) then
-          rev(1, 2, k) = rev(2, 2, k)
-          rev(1, mm,k) = rev(2, mm,k)
-          rev(1, jl,k) = rev(2, jl,k)
-          rev(1, ll,k) = rev(2, ll,k)
-          rev(ie,2, k) = rev(il,2, k)
-          rev(ie,mm,k) = rev(il,mm,k)
-          rev(ie,jl,k) = rev(il,jl,k)
-          rev(ie,ll,k) = rev(il,ll,k)
-       endif
-    enddo
-
-    ! J-rows; no need to include the corners. These have been set in
-    ! the previous k-loop.
-
-    mm = min(3_intType,kl)
-    ll = max(2_intType,nz)
-
-    do j=3,ny
-       do l=1,nVar
-          w(1, j,2, l) = w(2, j,2, l)
-          w(1, j,mm,l) = w(2, j,mm,l)
-          w(1, j,kl,l) = w(2, j,kl,l)
-          w(1, j,ll,l) = w(2, j,ll,l)
-          w(ie,j,2, l) = w(il,j,2, l)
-          w(ie,j,mm,l) = w(il,j,mm,l)
-          w(ie,j,kl,l) = w(il,j,kl,l)
-          w(ie,j,ll,l) = w(il,j,ll,l)
-       enddo
-
-       p(1, j, 2) = p(2, j, 2)
-       p(1, j,mm) = p(2, j,mm)
-       p(1, j,kl) = p(2, j,kl)
-       p(1, j,ll) = p(2, j,ll)
-       p(ie,j, 2) = p(il,j, 2)
-       p(ie,j,mm) = p(il,j,mm)
-       p(ie,j,kl) = p(il,j,kl)
-       p(ie,j,ll) = p(il,j,ll)
-
-       if( viscous) then
-          rlv(1, j, 2) = rlv(2, j, 2)
-          rlv(1, j,mm) = rlv(2, j,mm)
-          rlv(1, j,kl) = rlv(2, j,kl)
-          rlv(1, j,ll) = rlv(2, j,ll)
-          rlv(ie,j, 2) = rlv(il,j, 2)
-          rlv(ie,j,mm) = rlv(il,j,mm)
-          rlv(ie,j,kl) = rlv(il,j,kl)
-          rlv(ie,j,ll) = rlv(il,j,ll)
-       endif
-
-       if( eddyModel ) then
-          rev(1, j, 2) = rev(2, j, 2)
-          rev(1, j,mm) = rev(2, j,mm)
-          rev(1, j,kl) = rev(2, j,kl)
-          rev(1, j,ll) = rev(2, j,ll)
-          rev(ie,j, 2) = rev(il,j, 2)
-          rev(ie,j,mm) = rev(il,j,mm)
-          rev(ie,j,kl) = rev(il,j,kl)
-          rev(ie,j,ll) = rev(il,j,ll)
-       endif
-    enddo
-    !
-    !       Halo's on the j=jMin and j=jMax plane.
-    !
-    ! K-rows; no need to include the corners; this is done in the
-    ! next i-loop.
-
-    mm = min(3_intType,il)
-    ll = max(2_intType,nx)
-
-    do k=3,nz
-       do l=1,nVar
-          w(2, 1, k,l) = w(2, 2, k,l)
-          w(mm,1, k,l) = w(mm,2, k,l)
-          w(il,1, k,l) = w(il,2, k,l)
-          w(ll,1, k,l) = w(ll,2, k,l)
-          w(2, je,k,l) = w(2, jl,k,l)
-          w(mm,je,k,l) = w(mm,jl,k,l)
-          w(il,je,k,l) = w(il,jl,k,l)
-          w(ll,je,k,l) = w(ll,jl,k,l)
-       enddo
-
-       p(2, 1, k) = p(2, 2, k)
-       p(mm,1, k) = p(mm,2, k)
-       p(il,1, k) = p(il,2, k)
-       p(ll,1, k) = p(ll,2, k)
-       p(2, je,k) = p(2, jl,k)
-       p(mm,je,k) = p(mm,jl,k)
-       p(il,je,k) = p(il,jl,k)
-       p(ll,je,k) = p(ll,jl,k)
-
-       if( viscous) then
-          rlv(2, 1, k) = rlv(2, 2, k)
-          rlv(mm,1, k) = rlv(mm,2, k)
-          rlv(il,1, k) = rlv(il,2, k)
-          rlv(ll,1, k) = rlv(ll,2, k)
-          rlv(2, je,k) = rlv(2, jl,k)
-          rlv(mm,je,k) = rlv(mm,jl,k)
-          rlv(il,je,k) = rlv(il,jl,k)
-          rlv(ll,je,k) = rlv(ll,jl,k)
-       endif
-
-       if( eddyModel ) then
-          rev(2, 1, k) = rev(2, 2, k)
-          rev(mm,1, k) = rev(mm,2, k)
-          rev(il,1, k) = rev(il,2, k)
-          rev(ll,1, k) = rev(ll,2, k)
-          rev(2, je,k) = rev(2, jl,k)
-          rev(mm,je,k) = rev(mm,jl,k)
-          rev(il,je,k) = rev(il,jl,k)
-          rev(ll,je,k) = rev(ll,jl,k)
-       endif
-    enddo
-
-    ! I-rows, including halo's set on the iMin and iMax plane.
-
-    mm = min(3_intType,kl)
-    ll = max(2_intType,nz)
-
-    do i=1,ie
-       do l=1,nVar
-          w(i,1, 2, l) = w(i,2, 2, l)
-          w(i,1, mm,l) = w(i,2, mm,l)
-          w(i,1, kl,l) = w(i,2, kl,l)
-          w(i,1, ll,l) = w(i,2, ll,l)
-          w(i,je,2, l) = w(i,jl,2, l)
-          w(i,je,mm,l) = w(i,jl,mm,l)
-          w(i,je,kl,l) = w(i,jl,kl,l)
-          w(i,je,ll,l) = w(i,jl,ll,l)
-       enddo
-
-       p(i, 1, 2) = p(i, 2, 2)
-       p(i, 1,mm) = p(i, 2,mm)
-       p(i, 1,kl) = p(i, 2,kl)
-       p(i, 1,ll) = p(i, 2,ll)
-       p(i,je, 2) = p(i,jl, 2)
-       p(i,je,mm) = p(i,jl,mm)
-       p(i,je,kl) = p(i,jl,kl)
-       p(i,je,ll) = p(i,jl,ll)
-
-       if( viscous) then
-          rlv(i, 1, 2) = rlv(i, 2, 2)
-          rlv(i, 1,mm) = rlv(i, 2,mm)
-          rlv(i, 1,kl) = rlv(i, 2,kl)
-          rlv(i, 1,ll) = rlv(i, 2,ll)
-          rlv(i,je, 2) = rlv(i,jl, 2)
-          rlv(i,je,mm) = rlv(i,jl,mm)
-          rlv(i,je,kl) = rlv(i,jl,kl)
-          rlv(i,je,ll) = rlv(i,jl,ll)
-       endif
-
-       if( eddyModel ) then
-          rev(i, 1, 2) = rev(i, 2, 2)
-          rev(i, 1,mm) = rev(i, 2,mm)
-          rev(i, 1,kl) = rev(i, 2,kl)
-          rev(i, 1,ll) = rev(i, 2,ll)
-          rev(i,je, 2) = rev(i,jl, 2)
-          rev(i,je,mm) = rev(i,jl,mm)
-          rev(i,je,kl) = rev(i,jl,kl)
-          rev(i,je,ll) = rev(i,jl,ll)
-       endif
-    enddo
-    !
-    !       Halo's on the k=kMin and k=kMax plane.
-    !
-    ! J-rows, including halo's set on the jMin and jMax plane.
-
-    mm = min(3_intType,il)
-    ll = max(2_intType,nx)
-
-    do j=1,je
-       do l=1,nVar
-          w(2, j,1, l) = w(2, j,2, l)
-          w(mm,j,1, l) = w(mm,j,2, l)
-          w(il,j,1, l) = w(il,j,2, l)
-          w(ll,j,1, l) = w(ll,j,2, l)
-          w(2, j,ke,l) = w(2, j,kl,l)
-          w(mm,j,ke,l) = w(mm,j,kl,l)
-          w(il,j,ke,l) = w(il,j,kl,l)
-          w(ll,j,ke,l) = w(ll,j,kl,l)
-       enddo
-
-       p( 2,j, 1) = p( 2,j, 2)
-       p(mm,j, 1) = p(mm,j, 2)
-       p(il,j, 1) = p(il,j, 2)
-       p(ll,j, 1) = p(ll,j, 2)
-       p( 2,j,ke) = p( 2,j,kl)
-       p(mm,j,ke) = p(mm,j,kl)
-       p(il,j,ke) = p(il,j,kl)
-       p(ll,j,ke) = p(ll,j,kl)
-
-       if( viscous) then
-          rlv( 2,j, 1) = rlv( 2,j, 2)
-          rlv(mm,j, 1) = rlv(mm,j, 2)
-          rlv(il,j, 1) = rlv(il,j, 2)
-          rlv(ll,j, 1) = rlv(ll,j, 2)
-          rlv( 2,j,ke) = rlv( 2,j,kl)
-          rlv(mm,j,ke) = rlv(mm,j,kl)
-          rlv(il,j,ke) = rlv(il,j,kl)
-          rlv(ll,j,ke) = rlv(ll,j,kl)
-       endif
-
-       if( eddyModel ) then
-          rev( 2,j, 1) = rev( 2,j, 2)
-          rev(mm,j, 1) = rev(mm,j, 2)
-          rev(il,j, 1) = rev(il,j, 2)
-          rev(ll,j, 1) = rev(ll,j, 2)
-          rev( 2,j,ke) = rev( 2,j,kl)
-          rev(mm,j,ke) = rev(mm,j,kl)
-          rev(il,j,ke) = rev(il,j,kl)
-          rev(ll,j,ke) = rev(ll,j,kl)
-       endif
-    enddo
-
-    ! I-rows, including halo's set on the iMin and iMax plane.
-
-    mm = min(3_intType,jl)
-    ll = max(2_intType,ny)
-
-    do i=1,ie
-       do l=1,nVar
-          w(i, 2, 1,l) = w(i, 2, 2,l)
-          w(i,mm, 1,l) = w(i,mm, 2,l)
-          w(i,jl, 1,l) = w(i,jl, 2,l)
-          w(i,ll, 1,l) = w(i,ll, 2,l)
-          w(i, 2,ke,l) = w(i, 2,kl,l)
-          w(i,mm,ke,l) = w(i,mm,kl,l)
-          w(i,jl,ke,l) = w(i,jl,kl,l)
-          w(i,ll,ke,l) = w(i,ll,kl,l)
-       enddo
-
-       p(i, 2, 1) = p(i, 2, 2)
-       p(i,mm, 1) = p(i,mm, 2)
-       p(i,jl, 1) = p(i,jl, 2)
-       p(i,ll, 1) = p(i,ll, 2)
-       p(i, 2,ke) = p(i, 2,kl)
-       p(i,mm,ke) = p(i,mm,kl)
-       p(i,jl,ke) = p(i,jl,kl)
-       p(i,ll,ke) = p(i,ll,kl)
-
-       if( viscous) then
-          rlv(i, 2, 1) = rlv(i, 2, 2)
-          rlv(i,mm, 1) = rlv(i,mm, 2)
-          rlv(i,jl, 1) = rlv(i,jl, 2)
-          rlv(i,ll, 1) = rlv(i,ll, 2)
-          rlv(i, 2,ke) = rlv(i, 2,kl)
-          rlv(i,mm,ke) = rlv(i,mm,kl)
-          rlv(i,jl,ke) = rlv(i,jl,kl)
-          rlv(i,ll,ke) = rlv(i,ll,kl)
-       endif
-
-       if( eddyModel ) then
-          rev(i, 2, 1) = rev(i, 2, 2)
-          rev(i,mm, 1) = rev(i,mm, 2)
-          rev(i,jl, 1) = rev(i,jl, 2)
-          rev(i,ll, 1) = rev(i,ll, 2)
-          rev(i, 2,ke) = rev(i, 2,kl)
-          rev(i,mm,ke) = rev(i,mm,kl)
-          rev(i,jl,ke) = rev(i,jl,kl)
-          rev(i,ll,ke) = rev(i,ll,kl)
-       endif
-    enddo
-
-  end subroutine setCornerRowHalos
-
-  subroutine setCorrectionsCoarseHalos(sps, nn, coarseLevel, &
-       fact, nVarInt)
-    !
-    !       setCorrectionsCoarseHalos sets the values of the coarse
-    !       grid boundary halo corrections. For all boundaries, either a
-    !       homogeneous Dirichlet condition (fact = 0.0) or a Neumann
-    !       condition (fact = 1.0) is used. Exception are symmetry planes,
-    !       where a mirroring takes place.
-    !
-    use constants
-    use block, only : BCDataType, flowDoms
-    use flowVarRefState, only : nt1
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps, nn, coarseLevel
-    integer(kind=intType), intent(in) :: nVarInt
-    real(kind=realType), intent(in)   :: fact
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, l, mm
-    integer(kind=intType) :: il, jl, kl, ie, je, ke
-
-    real(kind=realType) :: nnx, nny, nnz, vn
-
-    real(kind=realType), dimension(:,:,:,:), pointer :: ww
-    real(kind=realType), dimension(:,:,:),   pointer :: ww1, ww2
-
-    type(BCDataType), dimension(:), pointer :: BCData
-
-    ! Set the pointer ww to the coarse grid variables. At the moment
-    ! when this routine is called, these contain the corrections in a
-    ! normal grid cycle and the true variables when used to
-    ! interpolate the solution to the next finer mesh level in full
-    ! multigrid mode. Also set the pointer for BCData, such that the
-    ! unit normals are accessed easier.
-
-    ww     => flowDoms(nn,coarseLevel,sps)%w
-    BCData => flowDoms(nn,coarseLevel,sps)%BCData
-
-    ! Easier storage of the upper coarse block range.
-
-    il = flowDoms(nn,coarseLevel,sps)%il
-    jl = flowDoms(nn,coarseLevel,sps)%jl
-    kl = flowDoms(nn,coarseLevel,sps)%kl
-
-    ie = flowDoms(nn,coarseLevel,sps)%ie
-    je = flowDoms(nn,coarseLevel,sps)%je
-    ke = flowDoms(nn,coarseLevel,sps)%ke
-
-    ! Loop over the number of boundary subfaces to correct the
-    ! boundary halo values.
-
-    subfacesCoarse: do mm=1,flowDoms(nn,coarseLevel,sps)%nBocos
-
-       ! Set the pointers for ww1 and ww2, depending on the block face
-       ! on which this subface is located. Note that bcFaceID is the
-       ! same for all spectral modes and only the 1st is allocated.
-       ! Therefore the value of the 1st spectral mode is used here.
-
-       select case (flowDoms(nn,coarseLevel,1)%BCFaceID(mm))
-
-       case (iMin)
-          ww1 => ww(1, 1:,1:,:); ww2 => ww(2, 1:,1:,:)
-       case (iMax)
-          ww1 => ww(ie,1:,1:,:); ww2 => ww(il,1:,1:,:)
-       case (jMin)
-          ww1 => ww(1:,1 ,1:,:); ww2 => ww(1:,2 ,1:,:)
-       case (jMax)
-          ww1 => ww(1:,je,1:,:); ww2 => ww(1:,jl,1:,:)
-       case (kMin)
-          ww1 => ww(1:,1:,1 ,:); ww2 => ww(1:,1:,2 ,:)
-       case (kMax)
-          ww1 => ww(1:,1:,ke,:); ww2 => ww(1:,1:,kl,:)
-
-       end select
-
-       ! Choose what to do based on the BC type. Note that BCType is
-       ! the same for all spectral modes and only the 1st is allocated.
-       ! Therefore the value of the 1st spectral mode is used here.
-
-       select case (flowDoms(nn,coarseLevel,1)%BCType(mm))
-
-       case (SlidingInterface,   OversetOuterBound,     &
-            DomainInterfaceAll, DomainInterfaceRhoUVW, &
-            DomainInterfaceP,   DomainInterfaceRho,    &
-            DomainInterfaceTotal)
-
-          ! None of these are physical boundary conditions and thus
-          ! nothing needs to be done. The halos already contain the
-          ! corrections or solution.
-
-       case (Symm)
-
-          ! This is a symmetry plane. Loop over the faces of this
-          ! subface.
-
-          do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-             do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-
-                ! Compute twice the normal velocity component of the
-                ! internal cell.
-
-                nnx = BCData(mm)%norm(i,j,1)
-                nny = BCData(mm)%norm(i,j,2)
-                nnz = BCData(mm)%norm(i,j,3)
-
-                vn = two*(ww2(i,j,ivx)*nnx + ww2(i,j,ivy)*nny &
-                     +      ww2(i,j,ivz)*nnz)
-
-                ! Compute the halo state. Make sure that the average
-                ! normal velocity component is zero.
-
-                ww1(i,j,irho)  = ww2(i,j,irho)
-                ww1(i,j,ivx)   = ww2(i,j,ivx) - vn*nnx
-                ww1(i,j,ivy)   = ww2(i,j,ivy) - vn*nny
-                ww1(i,j,ivz)   = ww2(i,j,ivz) - vn*nnz
-                ww1(i,j,irhoE) = ww2(i,j,irhoE)
-
-                do l=nt1,nVarInt
-                   ww1(i,j,l) = ww2(i,j,l)
-                enddo
-
-             enddo
-          enddo
-
-       case default
-
-          ! Other type of boundary subface. Set the boundary halo's
-          ! as fact times the internal cell value.
-
-          do l=1,nVarInt
-             do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-                   ww1(i,j,l) = fact*ww2(i,j,l)
-                enddo
-             enddo
-          enddo
-
-       end select
-    enddo subfacesCoarse
-
-  end subroutine setCorrectionsCoarseHalos
+    end subroutine setCorrectionsCoarseHalos
 
 end module multigrid
diff --git a/src/solver/residuals.F90 b/src/solver/residuals.F90
index 6635aa8bf..e12c01109 100644
--- a/src/solver/residuals.F90
+++ b/src/solver/residuals.F90
@@ -1,2094 +1,2073 @@
 module residuals
 contains
 
-  subroutine residual_block
-    !
-    !       residual computes the residual of the mean flow equations on
-    !       the current MG level.
-    !
-    use blockPointers
-    use cgnsGrid
-    use flowVarRefState
-    use inputIteration
-    use inputDiscretization
-    use inputTimeSpectral
-    use inputUnsteady ! Added by HDN
-    use iteration
-    use inputAdjoint
-    use flowUtils, only : computeSpeedOfSoundSquared, allNodalGradients
-    use fluxes
+    subroutine residual_block
+        !
+        !       residual computes the residual of the mean flow equations on
+        !       the current MG level.
+        !
+        use blockPointers
+        use cgnsGrid
+        use flowVarRefState
+        use inputIteration
+        use inputDiscretization
+        use inputTimeSpectral
+        use inputUnsteady ! Added by HDN
+        use iteration
+        use inputAdjoint
+        use flowUtils, only: computeSpeedOfSoundSquared, allNodalGradients
+        use fluxes
 #ifndef USE_TAPENADE
-    use ALEUtils, only : interpLevelALE_block, recoverLevelALE_block
+        use ALEUtils, only: interpLevelALE_block, recoverLevelALE_block
 #endif
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: discr
-    integer(kind=intType) :: i, j, k, l
-    integer(kind=intType) :: iale, jale, kale, lale, male ! For loops of ALE
-    real(kind=realType), parameter :: K1 = 1.05_realType
-    ! The line below is only used for the low-speed preconditioner part of this routine
-    real(kind=realType), parameter :: K2 = 0.6_realType ! Random given number
-
-    real(kind=realType), parameter :: M0 = 0.2_realType ! Mach number preconditioner activation
-    real(kind=realType), parameter :: alpha = 0_realType
-    real(kind=realType), parameter :: delta = 0_realType
-    !real(kind=realType), parameter :: hinf = 2_realType ! Test phase
-    real(kind=realType), parameter :: Cpres = 4.18_realType ! Test phase
-    real(kind=realType), parameter :: TEMP= 297.15_realType
-
-    !
-    !     Local variables
-    !
-    real(kind=realType) :: K3, h, velXrho, velYrho, velZrho,SoS,hinf
-    real(kind=realType) :: resM,A11,A12,A13,A14,A15,A21,A22,A23,A24,A25,A31,A32,A33,A34,A35
-    real(kind=realType) :: A41,A42,A43,A44,A45,A51,A52,A53,A54,A55,B11,B12,B13,B14,B15
-    real(kind=realType) :: B21,B22,B23,B24,B25,B31,B32,B33,B34,B35
-    real(kind=realType) :: B41,B42,B43,B44,B45,B51,B52,B53,B54,B55
-    real(kind=realType) :: rhoHdash, betaMr2
-    real(kind=realType) :: G, q
-    real(kind=realType) :: b1, b2, b3, b4, b5
-    real(kind=realType) :: dwo(nwf)
-    logical :: fineGrid
-
-    ! Set the value of rFil, which controls the fraction of the old
-    ! dissipation residual to be used. This is only for the runge-kutta
-    ! schemes; for other smoothers rFil is simply set to 1.0.
-    ! Note the index rkStage+1 for cdisRK. The reason is that the
-    ! residual computation is performed before rkStage is incremented.
-
-    if(smoother == RungeKutta) then
-       rFil = cdisRK(rkStage+1)
-    else
-       rFil = one
-    endif
-
-    ! Set the value of the discretization, depending on the grid level,
-    ! and the logical fineGrid, which indicates whether or not this
-    ! is the finest grid level of the current mg cycle.
-
-    discr = spaceDiscrCoarse
-    if(currentLevel == 1) discr = spaceDiscr
-
-    fineGrid = .false.
-    if(currentLevel == groundLevel) fineGrid = .true.
-
-
-    ! ===========================================================
-    !
-    ! Assuming ALE has nothing to do with MG
-    ! The geometric data will be interpolated if in MD mode
-    !
-    ! ===========================================================
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: discr
+        integer(kind=intType) :: i, j, k, l
+        integer(kind=intType) :: iale, jale, kale, lale, male ! For loops of ALE
+        real(kind=realType), parameter :: K1 = 1.05_realType
+        ! The line below is only used for the low-speed preconditioner part of this routine
+        real(kind=realType), parameter :: K2 = 0.6_realType ! Random given number
+
+        real(kind=realType), parameter :: M0 = 0.2_realType ! Mach number preconditioner activation
+        real(kind=realType), parameter :: alpha = 0_realType
+        real(kind=realType), parameter :: delta = 0_realType
+        !real(kind=realType), parameter :: hinf = 2_realType ! Test phase
+        real(kind=realType), parameter :: Cpres = 4.18_realType ! Test phase
+        real(kind=realType), parameter :: TEMP = 297.15_realType
+
+        !
+        !     Local variables
+        !
+        real(kind=realType) :: K3, h, velXrho, velYrho, velZrho, SoS, hinf
+        real(kind=realType) :: resM, A11, A12, A13, A14, A15, A21, A22, A23, A24, A25, A31, A32, A33, A34, A35
+        real(kind=realType) :: A41, A42, A43, A44, A45, A51, A52, A53, A54, A55, B11, B12, B13, B14, B15
+        real(kind=realType) :: B21, B22, B23, B24, B25, B31, B32, B33, B34, B35
+        real(kind=realType) :: B41, B42, B43, B44, B45, B51, B52, B53, B54, B55
+        real(kind=realType) :: rhoHdash, betaMr2
+        real(kind=realType) :: G, q
+        real(kind=realType) :: b1, b2, b3, b4, b5
+        real(kind=realType) :: dwo(nwf)
+        logical :: fineGrid
+
+        ! Set the value of rFil, which controls the fraction of the old
+        ! dissipation residual to be used. This is only for the runge-kutta
+        ! schemes; for other smoothers rFil is simply set to 1.0.
+        ! Note the index rkStage+1 for cdisRK. The reason is that the
+        ! residual computation is performed before rkStage is incremented.
+
+        if (smoother == RungeKutta) then
+            rFil = cdisRK(rkStage + 1)
+        else
+            rFil = one
+        end if
+
+        ! Set the value of the discretization, depending on the grid level,
+        ! and the logical fineGrid, which indicates whether or not this
+        ! is the finest grid level of the current mg cycle.
+
+        discr = spaceDiscrCoarse
+        if (currentLevel == 1) discr = spaceDiscr
+
+        fineGrid = .false.
+        if (currentLevel == groundLevel) fineGrid = .true.
+
+        ! ===========================================================
+        !
+        ! Assuming ALE has nothing to do with MG
+        ! The geometric data will be interpolated if in MD mode
+        !
+        ! ===========================================================
 #ifndef USE_TAPENADE
-    call interpLevelALE_block
+        call interpLevelALE_block
 #endif
-    ! ===========================================================
-    !
-    ! The fluxes are calculated as usual
-    !
-    ! ===========================================================
+        ! ===========================================================
+        !
+        ! The fluxes are calculated as usual
+        !
+        ! ===========================================================
 
-    call inviscidCentralFlux
+        call inviscidCentralFlux
 
-    select case (discr)
+        select case (discr)
 
-    case (dissScalar) ! Standard scalar dissipation scheme.
+        case (dissScalar) ! Standard scalar dissipation scheme.
 
-       if( fineGrid) then
-          if (.not. lumpedDiss) then
-             call inviscidDissFluxScalar
-          else
-             call inviscidDissFluxScalarApprox
-          end if
-       else
+            if (fineGrid) then
+                if (.not. lumpedDiss) then
+                    call inviscidDissFluxScalar
+                else
+                    call inviscidDissFluxScalarApprox
+                end if
+            else
 #ifndef USE_TAPENADE
-          call inviscidDissFluxScalarCoarse
+                call inviscidDissFluxScalarCoarse
 #endif
-       endif
+            end if
 
-       !===========================================================
+            !===========================================================
 
-    case (dissMatrix) ! Matrix dissipation scheme.
+        case (dissMatrix) ! Matrix dissipation scheme.
 
-       if( fineGrid ) then
-          if (.not. lumpedDiss) then
-             call inviscidDissFluxMatrix
-          else
-             call inviscidDissFluxMatrixApprox
-          end if
-       else
+            if (fineGrid) then
+                if (.not. lumpedDiss) then
+                    call inviscidDissFluxMatrix
+                else
+                    call inviscidDissFluxMatrixApprox
+                end if
+            else
 #ifndef USE_TAPENADE
-          call inviscidDissFluxMatrixCoarse
+                call inviscidDissFluxMatrixCoarse
 #endif
-       endif
+            end if
 
-       !===========================================================
+            !===========================================================
 
-    case (upwind) ! Dissipation via an upwind scheme.
+        case (upwind) ! Dissipation via an upwind scheme.
 
-       call inviscidUpwindFlux(fineGrid)
+            call inviscidUpwindFlux(fineGrid)
 
-    end select
+        end select
 
-    !-------------------------------------------------------
-    ! Lastly, recover the old s[I,J,K], sFace[I,J,K]
-    ! This shall be done before difussive and source terms
-    ! are computed.
-    !-------------------------------------------------------
+        !-------------------------------------------------------
+        ! Lastly, recover the old s[I,J,K], sFace[I,J,K]
+        ! This shall be done before difussive and source terms
+        ! are computed.
+        !-------------------------------------------------------
 #ifndef USE_TAPENADE
-    call recoverLevelALE_block
+        call recoverLevelALE_block
 #endif
 
-
-    if( viscous ) then
-       ! Only compute viscous fluxes if rFil > 0
-       if(abs(rFil) > thresholdReal) then
-          ! not lumpedDiss means it isn't the PC...call the vicousFlux
-          if (.not. lumpedDiss) then
-             call computeSpeedOfSoundSquared
-             call allNodalGradients
-             call viscousFlux
-          else
-             ! This is a PC calc...only include viscous fluxes if viscPC
-             ! is used
-             ! if full visc is true, also need full viscous terms, even if
-             ! lumpedDiss is true
-             call computeSpeedOfSoundSquared
-             if (viscPC) then
-                call allNodalGradients
-                call viscousFlux
-             else
-                call viscousFluxApprox
-             end if
-          end if
-       end if
-    end if
-
-    !===========================================================
-
-    ! Add the dissipative and possibly viscous fluxes to the
-    ! Euler fluxes. Loop over the owned cells and add fw to dw.
-    ! Also multiply by iblank so that no updates occur in holes
-    if ( lowspeedpreconditioner ) then
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-                !    Compute speed of sound
-                SoS = sqrt(gamma(i,j,k)*p(i,j,k)/w(i,j,k,irho))
-
-                ! Compute velocities without rho from state vector 
-                !      (w is pointer.. see type blockType setup in block.F90)
-                !      w(0:ib,0:jb,0:kb,1:nw) is allocated in block.F90 
-                !      these are per definition nw=[rho,u,v,w,rhoeE] 
-                !      so the velocity is simply just taken out below... 
-                !      we do not have to divide with rho since it is already 
-                !      without rho... 
-                velXrho = w(i,j,k,ivx) ! ivx: l. 60 in constants.F90
-                velYrho = w(i,j,k,ivy)
-                velZrho = w(i,j,k,ivz)
-
-                q = (velXrho**2 + velYrho**2 + velZrho**2)
- 
-                resM=sqrt(q)/SoS 
-                ! resM above is used as M_a (thesis) and M (paper 2015) 
-                ! and is the Free Stream Mach number 
- 
-                ! see routine setup above: 
-                ! l. 30: real(kind=realType), parameter :: K1 = 1.05_realType 
-                ! Random given number for K2: 
-                ! l. 31: real(kind=realType), parameter :: K2 = 0.6_realType 
-                ! Mach number preconditioner activation for K3: 
-                ! l. 32: real(kind=realType), parameter :: M0 = 0.2_realType 
-                ! 
-                !    Compute K3 
-                ! eq. 2.7 in Garg 2015. K1, M0 and resM are scalars 
-                ! 
-                ! unfortunately, Garg has switched the K1 and K3 here in the 
-                ! code. In both paper and thesis it is K3 that is used to det- 
-                ! ermine K1 below 
-
-                !
-                !    Compute K3
-                 
-                K3 = K1 * ( 1 + ((1-K1*M0**2)*resM**2)/(K1*M0**4) )
-                !    Compute BetaMr2 
-                ! betaMr2 -> eq. 7 in Garg 2015 
-                ! (use eq. 2.6 in thesis thesis since paper has an error) 
-                ! where a==SoS 
-                ! 
-                ! again, K1 and K3 are switched compared with paper/thesis
-                !    Compute BetaMr2
-                betaMr2 = min( max( K3*(velXrho**2 + velYrho**2  &
-                     + velZrho**2), ((K2)*(wInf(ivx)**2 &
-                     + wInf(ivy)**2 + wInf(ivz)**2))) , SoS**2 )
-
-
-                ! above, the wInf is the free stream velocity 
-                ! 
-                ! Should this first line's first element have SoS^4 or SoS^2  
-
-                A11=  (betaMr2)*(1/SoS**4)
-                A12 = zero
-                A13 = zero
-                A14 = zero
-                A15 = (-betaMr2)/SoS**4
-
-                A21 =one*velXrho/SoS**2
-                A22 = one*w(i,j,k,irho)
-                A23 = zero
-                A24 = zero
-                A25 = one*(-velXrho)/SoS**2
-
-                A31 = one*velYrho/SoS**2
-                A32 = zero
-                A33 = one*w(i,j,k,irho)
-                A34 = zero
-                A35 = one*(-velYrho)/SoS**2
-
-                A41 = one*velZrho/SoS**2
-                A42 = zero
-                A43 = zero
-                A44 = one*w(i,j,k,irho)
-                A45 = zero + one *(-velZrho)/SoS**2
-
-                ! mham: seems he fixed the above line an irregular way?
-
-
-                A51=  one*((1/(gamma(i,j,k)-1))+(resM**2)/2)
-                A52 = one * w(i,j,k,irho)*velXrho
-                A53 = one * w(i,j,k,irho)*velYrho
-                A54 = one * w(i,j,k,irho)*velzrho
-                A55 = one * ((-(resM**2))/2)
-
-                B11 = A11*(gamma(i,j,k)-1)*q/2+A12*(-velXrho)&
-                     /w(i,j,k,irho)+A13*(-velYrho)/w(i,j,k,irho)+A14*(-velZrho)/w(i,j,k,irho)&
-                     + A15* (((gamma(i,j,k)-1)*q/2)-SoS**2)
-                B12 = A11*(1-gamma(i,j,k))*velXrho+A12*1/w(i,j,k,irho)&
-                     +A15*(1-gamma(i,j,k))*velXrho
-                B13 = A11*(1-gamma(i,j,k))*velYrho+A13&
-                     /w(i,j,k,irho)+A15*(1-gamma(i,j,k))*velYrho
-                B14 = A11*(1-gamma(i,j,k))*velZrho&
-                     +A14/w(i,j,k,irho)+A15*(1-gamma(i,j,k))*velZrho
-                B15 = A11*(gamma(i,j,k)-1)+A15*(gamma(i,j,k)-1)
-
-                B21 = A21*(gamma(i,j,k)-1)*q/2+A22*(-velXrho)&
-                     /w(i,j,k,irho)+A23*(-velYrho)/w(i,j,k,irho)+A24*(-velZrho)&
-                     /w(i,j,k,irho)+ A25* (((gamma(i,j,k)-1)*q/2)-SoS**2)
-                B22 = A21*(1-gamma(i,j,k))*velXrho+A22&
-                     /w(i,j,k,irho)+A25*(1-gamma(i,j,k))*velXrho
-                B23 = A21*(1-gamma(i,j,k))*velYrho&
-                     +A23*1/w(i,j,k,irho)+A25*(1-gamma(i,j,k))*velYrho
-                B24 = A21*(1-gamma(i,j,k))*velZrho&
-                     +A24*1/w(i,j,k,irho)+A25*(1-gamma(i,j,k))*velZrho
-                B25 = A21*(gamma(i,j,k)-1)+A25*(gamma(i,j,k)-1)
-
-                B31 = A31*(gamma(i,j,k)-1)*q/2+A32*(-velXrho)&
-                     /w(i,j,k,irho)+A33*(-velYrho)/w(i,j,k,irho)+A34*(-velZrho)/w(i,j,k,irho)&
-                     + A35* (((gamma(i,j,k)-1)*q/2)-SoS**2)
-                B32 = A31*(1-gamma(i,j,k))*velXrho+A32&
-                     /w(i,j,k,irho)+A35*(1-gamma(i,j,k))*velXrho
-                B33 = A31*(1-gamma(i,j,k))*velYrho&
-                     +A33*1/w(i,j,k,irho)+A35*(1-gamma(i,j,k))*velYrho
-                B34 = A31*(1-gamma(i,j,k))*velZrho&
-                     +A34*1/w(i,j,k,irho)+A35*(1-gamma(i,j,k))*velZrho
-                B35 = A31*(gamma(i,j,k)-1)+A35*(gamma(i,j,k)-1)
-
-                B41 = A41*(gamma(i,j,k)-1)*q/2+A42*(-velXrho)&
-                     /w(i,j,k,irho)+A43*(-velYrho)/w(i,j,k,irho)+A44*(-velZrho) &
-                     /w(i,j,k,irho)+ A45* (((gamma(i,j,k)-1)*q/2)-SoS**2)
-                B42 = A41*(1-gamma(i,j,k))*velXrho+A42&
-                     /w(i,j,k,irho)+A45*(1-gamma(i,j,k))*velXrho
-                B43 = A41*(1-gamma(i,j,k))*velYrho&
-                     +A43*1/w(i,j,k,irho)+A45*(1-gamma(i,j,k))*velYrho
-                B44 = A41*(1-gamma(i,j,k))*velZrho&
-                     +A44*1/w(i,j,k,irho)+A45*(1-gamma(i,j,k))*velZrho
-                B45 = A41*(gamma(i,j,k)-1)+A45*(gamma(i,j,k)-1)
-
-                B51 = A51*(gamma(i,j,k)-1)*q/2+A52*(-velXrho)&
-                     /w(i,j,k,irho)+A53*(-velYrho)/w(i,j,k,irho)+A54*(-velZrho) &
-                     /w(i,j,k,irho)+ A55* (((gamma(i,j,k)-1)*q/2)-SoS**2)
-                B52 = A51*(1-gamma(i,j,k))*velXrho+A52&
-                     /w(i,j,k,irho)+A55*(1-gamma(i,j,k))*velXrho
-                B53 = A51*(1-gamma(i,j,k))*velYrho&
-                     +A53*1/w(i,j,k,irho)+A55*(1-gamma(i,j,k))*velYrho
-                B54 = A51*(1-gamma(i,j,k))*velZrho&
-                     +A54*1/w(i,j,k,irho)+A55*(1-gamma(i,j,k))*velZrho
-                B55 = A51*(gamma(i,j,k)-1)+A55*(gamma(i,j,k)-1)
-
-                ! dwo is the orginal redisual
-                do l=1,nwf
-                   dwo(l) = (dw(i,j,k,l) + fw(i,j,k,l))* max(real(iblank(i,j,k), realType), zero)
+        if (viscous) then
+            ! Only compute viscous fluxes if rFil > 0
+            if (abs(rFil) > thresholdReal) then
+                ! not lumpedDiss means it isn't the PC...call the vicousFlux
+                if (.not. lumpedDiss) then
+                    call computeSpeedOfSoundSquared
+                    call allNodalGradients
+                    call viscousFlux
+                else
+                    ! This is a PC calc...only include viscous fluxes if viscPC
+                    ! is used
+                    ! if full visc is true, also need full viscous terms, even if
+                    ! lumpedDiss is true
+                    call computeSpeedOfSoundSquared
+                    if (viscPC) then
+                        call allNodalGradients
+                        call viscousFlux
+                    else
+                        call viscousFluxApprox
+                    end if
+                end if
+            end if
+        end if
+
+        !===========================================================
+
+        ! Add the dissipative and possibly viscous fluxes to the
+        ! Euler fluxes. Loop over the owned cells and add fw to dw.
+        ! Also multiply by iblank so that no updates occur in holes
+        if (lowspeedpreconditioner) then
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        !    Compute speed of sound
+                        SoS = sqrt(gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
+
+                        ! Compute velocities without rho from state vector
+                        !      (w is pointer.. see type blockType setup in block.F90)
+                        !      w(0:ib,0:jb,0:kb,1:nw) is allocated in block.F90
+                        !      these are per definition nw=[rho,u,v,w,rhoeE]
+                        !      so the velocity is simply just taken out below...
+                        !      we do not have to divide with rho since it is already
+                        !      without rho...
+                        velXrho = w(i, j, k, ivx) ! ivx: l. 60 in constants.F90
+                        velYrho = w(i, j, k, ivy)
+                        velZrho = w(i, j, k, ivz)
+
+                        q = (velXrho**2 + velYrho**2 + velZrho**2)
+
+                        resM = sqrt(q) / SoS
+                        ! resM above is used as M_a (thesis) and M (paper 2015)
+                        ! and is the Free Stream Mach number
+
+                        ! see routine setup above:
+                        ! l. 30: real(kind=realType), parameter :: K1 = 1.05_realType
+                        ! Random given number for K2:
+                        ! l. 31: real(kind=realType), parameter :: K2 = 0.6_realType
+                        ! Mach number preconditioner activation for K3:
+                        ! l. 32: real(kind=realType), parameter :: M0 = 0.2_realType
+                        !
+                        !    Compute K3
+                        ! eq. 2.7 in Garg 2015. K1, M0 and resM are scalars
+                        !
+                        ! unfortunately, Garg has switched the K1 and K3 here in the
+                        ! code. In both paper and thesis it is K3 that is used to det-
+                        ! ermine K1 below
+
+                        !
+                        !    Compute K3
+
+                        K3 = K1 * (1 + ((1 - K1 * M0**2) * resM**2) / (K1 * M0**4))
+                        !    Compute BetaMr2
+                        ! betaMr2 -> eq. 7 in Garg 2015
+                        ! (use eq. 2.6 in thesis thesis since paper has an error)
+                        ! where a==SoS
+                        !
+                        ! again, K1 and K3 are switched compared with paper/thesis
+                        !    Compute BetaMr2
+                        betaMr2 = min(max(K3 * (velXrho**2 + velYrho**2 &
+                                                + velZrho**2), ((K2) * (wInf(ivx)**2 &
+                                                                        + wInf(ivy)**2 + wInf(ivz)**2))), SoS**2)
+
+                        ! above, the wInf is the free stream velocity
+                        !
+                        ! Should this first line's first element have SoS^4 or SoS^2
+
+                        A11 = (betaMr2) * (1 / SoS**4)
+                        A12 = zero
+                        A13 = zero
+                        A14 = zero
+                        A15 = (-betaMr2) / SoS**4
+
+                        A21 = one * velXrho / SoS**2
+                        A22 = one * w(i, j, k, irho)
+                        A23 = zero
+                        A24 = zero
+                        A25 = one * (-velXrho) / SoS**2
+
+                        A31 = one * velYrho / SoS**2
+                        A32 = zero
+                        A33 = one * w(i, j, k, irho)
+                        A34 = zero
+                        A35 = one * (-velYrho) / SoS**2
+
+                        A41 = one * velZrho / SoS**2
+                        A42 = zero
+                        A43 = zero
+                        A44 = one * w(i, j, k, irho)
+                        A45 = zero + one * (-velZrho) / SoS**2
+
+                        ! mham: seems he fixed the above line an irregular way?
+
+                        A51 = one * ((1 / (gamma(i, j, k) - 1)) + (resM**2) / 2)
+                        A52 = one * w(i, j, k, irho) * velXrho
+                        A53 = one * w(i, j, k, irho) * velYrho
+                        A54 = one * w(i, j, k, irho) * velzrho
+                        A55 = one * ((-(resM**2)) / 2)
+
+                        B11 = A11 * (gamma(i, j, k) - 1) * q / 2 + A12 * (-velXrho) &
+                        / w(i, j, k, irho) + A13 * (-velYrho) / w(i, j, k, irho) + A14 * (-velZrho) / w(i, j, k, irho) &
+                              + A15 * (((gamma(i, j, k) - 1) * q / 2) - SoS**2)
+                        B12 = A11 * (1 - gamma(i, j, k)) * velXrho + A12 * 1 / w(i, j, k, irho) &
+                              + A15 * (1 - gamma(i, j, k)) * velXrho
+                        B13 = A11 * (1 - gamma(i, j, k)) * velYrho + A13 &
+                              / w(i, j, k, irho) + A15 * (1 - gamma(i, j, k)) * velYrho
+                        B14 = A11 * (1 - gamma(i, j, k)) * velZrho &
+                              + A14 / w(i, j, k, irho) + A15 * (1 - gamma(i, j, k)) * velZrho
+                        B15 = A11 * (gamma(i, j, k) - 1) + A15 * (gamma(i, j, k) - 1)
+
+                        B21 = A21 * (gamma(i, j, k) - 1) * q / 2 + A22 * (-velXrho) &
+                              / w(i, j, k, irho) + A23 * (-velYrho) / w(i, j, k, irho) + A24 * (-velZrho) &
+                              / w(i, j, k, irho) + A25 * (((gamma(i, j, k) - 1) * q / 2) - SoS**2)
+                        B22 = A21 * (1 - gamma(i, j, k)) * velXrho + A22 &
+                              / w(i, j, k, irho) + A25 * (1 - gamma(i, j, k)) * velXrho
+                        B23 = A21 * (1 - gamma(i, j, k)) * velYrho &
+                              + A23 * 1 / w(i, j, k, irho) + A25 * (1 - gamma(i, j, k)) * velYrho
+                        B24 = A21 * (1 - gamma(i, j, k)) * velZrho &
+                              + A24 * 1 / w(i, j, k, irho) + A25 * (1 - gamma(i, j, k)) * velZrho
+                        B25 = A21 * (gamma(i, j, k) - 1) + A25 * (gamma(i, j, k) - 1)
+
+                        B31 = A31 * (gamma(i, j, k) - 1) * q / 2 + A32 * (-velXrho) &
+                        / w(i, j, k, irho) + A33 * (-velYrho) / w(i, j, k, irho) + A34 * (-velZrho) / w(i, j, k, irho) &
+                              + A35 * (((gamma(i, j, k) - 1) * q / 2) - SoS**2)
+                        B32 = A31 * (1 - gamma(i, j, k)) * velXrho + A32 &
+                              / w(i, j, k, irho) + A35 * (1 - gamma(i, j, k)) * velXrho
+                        B33 = A31 * (1 - gamma(i, j, k)) * velYrho &
+                              + A33 * 1 / w(i, j, k, irho) + A35 * (1 - gamma(i, j, k)) * velYrho
+                        B34 = A31 * (1 - gamma(i, j, k)) * velZrho &
+                              + A34 * 1 / w(i, j, k, irho) + A35 * (1 - gamma(i, j, k)) * velZrho
+                        B35 = A31 * (gamma(i, j, k) - 1) + A35 * (gamma(i, j, k) - 1)
+
+                        B41 = A41 * (gamma(i, j, k) - 1) * q / 2 + A42 * (-velXrho) &
+                              / w(i, j, k, irho) + A43 * (-velYrho) / w(i, j, k, irho) + A44 * (-velZrho) &
+                              / w(i, j, k, irho) + A45 * (((gamma(i, j, k) - 1) * q / 2) - SoS**2)
+                        B42 = A41 * (1 - gamma(i, j, k)) * velXrho + A42 &
+                              / w(i, j, k, irho) + A45 * (1 - gamma(i, j, k)) * velXrho
+                        B43 = A41 * (1 - gamma(i, j, k)) * velYrho &
+                              + A43 * 1 / w(i, j, k, irho) + A45 * (1 - gamma(i, j, k)) * velYrho
+                        B44 = A41 * (1 - gamma(i, j, k)) * velZrho &
+                              + A44 * 1 / w(i, j, k, irho) + A45 * (1 - gamma(i, j, k)) * velZrho
+                        B45 = A41 * (gamma(i, j, k) - 1) + A45 * (gamma(i, j, k) - 1)
+
+                        B51 = A51 * (gamma(i, j, k) - 1) * q / 2 + A52 * (-velXrho) &
+                              / w(i, j, k, irho) + A53 * (-velYrho) / w(i, j, k, irho) + A54 * (-velZrho) &
+                              / w(i, j, k, irho) + A55 * (((gamma(i, j, k) - 1) * q / 2) - SoS**2)
+                        B52 = A51 * (1 - gamma(i, j, k)) * velXrho + A52 &
+                              / w(i, j, k, irho) + A55 * (1 - gamma(i, j, k)) * velXrho
+                        B53 = A51 * (1 - gamma(i, j, k)) * velYrho &
+                              + A53 * 1 / w(i, j, k, irho) + A55 * (1 - gamma(i, j, k)) * velYrho
+                        B54 = A51 * (1 - gamma(i, j, k)) * velZrho &
+                              + A54 * 1 / w(i, j, k, irho) + A55 * (1 - gamma(i, j, k)) * velZrho
+                        B55 = A51 * (gamma(i, j, k) - 1) + A55 * (gamma(i, j, k) - 1)
+
+                        ! dwo is the orginal redisual
+                        do l = 1, nwf
+                            dwo(l) = (dw(i, j, k, l) + fw(i, j, k, l)) * max(real(iblank(i, j, k), realType), zero)
+                        end do
+
+                        dw(i, j, k, 1) = B11 * dwo(1) + B12 * dwo(2) + B13 * dwo(3) + B14 * dwo(4) + B15 * dwo(5)
+                        dw(i, j, k, 2) = B21 * dwo(1) + B22 * dwo(2) + B23 * dwo(3) + B24 * dwo(4) + B25 * dwo(5)
+                        dw(i, j, k, 3) = B31 * dwo(1) + B32 * dwo(2) + B33 * dwo(3) + B34 * dwo(4) + B35 * dwo(5)
+                        dw(i, j, k, 4) = B41 * dwo(1) + B42 * dwo(2) + B43 * dwo(3) + B44 * dwo(4) + B45 * dwo(5)
+                        dw(i, j, k, 5) = B51 * dwo(1) + B52 * dwo(2) + B53 * dwo(3) + B54 * dwo(4) + B55 * dwo(5)
+
+                    end do
+                end do
+            end do ! end of lowspeedpreconditioners three cells loops
+
+        else ! else.. i.e. if we do not have preconditioner turned on...
+            do l = 1, nwf
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            dw(i, j, k, l) = (dw(i, j, k, l) + fw(i, j, k, l)) &
+                                             * max(real(iblank(i, j, k), realType), zero)
+                        end do
+                    end do
                 end do
+            end do
+        end if
+
+    end subroutine residual_block
+
+    subroutine sourceTerms_block(nn, res, iRegion, pLocal)
+
+        ! Apply the source terms for the given block. Assume that the
+        ! block pointers are already set.
+        use constants
+        use actuatorRegionData
+        use blockPointers, only: volRef, dw, w
+        use flowVarRefState, only: pRef, uRef, LRef
+        use communication
+        use iteration, only: ordersConverged
+        implicit none
+
+        ! Input
+        integer(kind=intType), intent(in) :: nn, iRegion
+        logical, intent(in) :: res
+        real(kind=realType), intent(inout) :: pLocal
+
+        ! Working
+        integer(kind=intType) :: i, j, k, ii, iStart, iEnd
+        real(kind=realType) :: Ftmp(3), Vx, Vy, Vz, F_fact(3), Q_fact, Qtmp, reDim, factor, oStart, oEnd
+
+        reDim = pRef * uRef
+
+        ! Compute the relaxation factor based on the ordersConverged
+
+        ! How far we are into the ramp:
+        if (ordersConverged < actuatorRegions(iRegion)%relaxStart) then
+            factor = zero
+        else if (ordersConverged > actuatorRegions(iRegion)%relaxEnd) then
+            factor = one
+        else ! In between
+            oStart = actuatorRegions(iRegion)%relaxStart
+            oEnd = actuatorRegions(iRegion)%relaxEnd
+            factor = (ordersConverged - oStart) / (oEnd - oStart)
+        end if
+
+        ! Compute the constant force factor
+        F_fact = factor * actuatorRegions(iRegion)%force / actuatorRegions(iRegion)%volume / pRef
+
+        ! Heat factor. This is heat added per unit volume per unit time
+        Q_fact = factor * actuatorRegions(iRegion)%heat / actuatorRegions(iRegion)%volume / (pRef * uRef * LRef * LRef)
+
+        ! Loop over the ranges for this block
+        iStart = actuatorRegions(iRegion)%blkPtr(nn - 1) + 1
+        iEnd = actuatorRegions(iRegion)%blkPtr(nn)
+
+        !$AD II-LOOP
+        do ii = iStart, iEnd
+
+            ! Extract the cell ID.
+            i = actuatorRegions(iRegion)%cellIDs(1, ii)
+            j = actuatorRegions(iRegion)%cellIDs(2, ii)
+            k = actuatorRegions(iRegion)%cellIDs(3, ii)
+
+            ! This actually gets the force
+            FTmp = volRef(i, j, k) * F_fact
+
+            Vx = w(i, j, k, iVx)
+            Vy = w(i, j, k, iVy)
+            Vz = w(i, j, k, iVz)
+
+            ! this gets the heat addition rate
+            QTmp = volRef(i, j, k) * Q_fact
+
+            if (res) then
+                ! Momentum residuals
+                dw(i, j, k, imx:imz) = dw(i, j, k, imx:imz) - Ftmp
+
+                ! energy residuals
+                dw(i, j, k, iRhoE) = dw(i, j, k, iRhoE) - &
+                                     Ftmp(1) * Vx - Ftmp(2) * Vy - Ftmp(3) * Vz - Qtmp
+            else
+                ! Add in the local power contribution:
+                pLocal = pLocal + (Vx * Ftmp(1) + Vy * FTmp(2) + Vz * Ftmp(3)) * reDim
+            end if
+        end do
+
+    end subroutine sourceTerms_block
+
+    subroutine initres_block(varStart, varEnd, nn, sps)
+        !
+        !       initres initializes the given range of the residual. Either to
+        !       zero, steady computation, or to an unsteady term for the time
+        !       spectral and unsteady modes. For the coarser grid levels the
+        !       residual forcing term is taken into account.
+        !
+        use blockPointers
+        use flowVarRefState
+        use inputIteration
+        use inputPhysics
+        use inputTimeSpectral
+        use inputUnsteady
+        use iteration
+
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: varStart, varEnd, nn, sps
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: mm, ll, ii, jj, i, j, k, l, m
+        real(kind=realType) :: oneOverDt, tmp
+
+        real(kind=realType), dimension(:, :, :, :), pointer :: ww, wsp, wsp1
+        real(kind=realType), dimension(:, :, :), pointer :: volsp
+
+        ! Return immediately of no variables are in the range.
+
+        if (varEnd < varStart) return
+
+        ! Determine the equation mode and act accordingly.
+
+        select case (equationMode)
+        case (steady)
+
+            ! Steady state computation.
+            ! Determine the currently active multigrid level.
+
+            steadyLevelTest: if (currentLevel == groundLevel) then
+
+                ! Ground level of the multigrid cycle. Initialize the
+                ! owned residuals to zero.
+
+                do l = varStart, varEnd
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                dw(i, j, k, l) = zero
+                            end do
+                        end do
+                    end do
+                end do
+
+                else steadyLevelTest
 
-                dw(i,j,k,1)=B11*dwo(1) + B12*dwo(2)+ B13*dwo(3) + B14*dwo(4) + B15*dwo(5)
-                dw(i,j,k,2)=B21*dwo(1) + B22*dwo(2)+ B23*dwo(3) + B24*dwo(4) + B25*dwo(5)
-                dw(i,j,k,3)=B31*dwo(1) + B32*dwo(2)+ B33*dwo(3) + B34*dwo(4) + B35*dwo(5)
-                dw(i,j,k,4)=B41*dwo(1) + B42*dwo(2)+ B43*dwo(3) + B44*dwo(4) + B45*dwo(5)
-                dw(i,j,k,5)=B51*dwo(1) + B52*dwo(2)+ B53*dwo(3) + B54*dwo(4) + B55*dwo(5)
-
-             enddo
-          enddo
-       enddo ! end of lowspeedpreconditioners three cells loops
-
-    else ! else.. i.e. if we do not have preconditioner turned on...
-       do l=1,nwf
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   dw(i,j,k,l) = (dw(i,j,k,l) + fw(i,j,k,l)) &
-                        * max(real(iblank(i,j,k), realType), zero)
-                enddo
-             enddo
-          enddo
-       enddo
-    endif
-
-  end subroutine residual_block
-
-  subroutine sourceTerms_block(nn, res, iRegion, pLocal)
-
-    ! Apply the source terms for the given block. Assume that the
-    ! block pointers are already set.
-    use constants
-    use actuatorRegionData
-    use blockPointers, only : volRef, dw, w
-    use flowVarRefState, only : pRef, uRef, LRef
-    use communication
-    use iteration, only : ordersConverged
-    implicit none
-
-    ! Input
-    integer(kind=intType), intent(in) ::  nn, iRegion
-    logical, intent(in) :: res
-    real(kind=realType), intent(inout) :: pLocal
-
-    ! Working
-    integer(kind=intType) :: i, j, k, ii, iStart, iEnd
-    real(kind=realType) :: Ftmp(3), Vx, Vy, Vz, F_fact(3), Q_fact, Qtmp, reDim, factor, oStart, oEnd
-
-    reDim = pRef*uRef
-
-    ! Compute the relaxation factor based on the ordersConverged
-
-    ! How far we are into the ramp:
-    if (ordersConverged < actuatorRegions(iRegion)%relaxStart) then
-       factor = zero
-    else if (ordersConverged > actuatorRegions(iRegion)%relaxEnd) then
-       factor = one
-    else ! In between
-       oStart = actuatorRegions(iRegion)%relaxStart
-       oEnd   = actuatorRegions(iRegion)%relaxEnd
-       factor = (ordersConverged - oStart)/(oEnd - oStart)
-    end if
-
-    ! Compute the constant force factor
-    F_fact = factor*actuatorRegions(iRegion)%force / actuatorRegions(iRegion)%volume / pRef
-
-    ! Heat factor. This is heat added per unit volume per unit time
-    Q_fact = factor * actuatorRegions(iRegion)%heat / actuatorRegions(iRegion)%volume / (pRef * uRef * LRef * LRef)
-
-    ! Loop over the ranges for this block
-    iStart = actuatorRegions(iRegion)%blkPtr(nn-1) + 1
-    iEnd =  actuatorRegions(iRegion)%blkPtr(nn)
-
-    !$AD II-LOOP
-    do ii=iStart, iEnd
-
-       ! Extract the cell ID.
-       i = actuatorRegions(iRegion)%cellIDs(1, ii)
-       j = actuatorRegions(iRegion)%cellIDs(2, ii)
-       k = actuatorRegions(iRegion)%cellIDs(3, ii)
-
-       ! This actually gets the force
-       FTmp = volRef(i, j, k) * F_fact
-
-       Vx = w(i, j, k, iVx)
-       Vy = w(i, j, k, iVy)
-       Vz = w(i, j, k, iVz)
-
-       ! this gets the heat addition rate
-       QTmp = volRef(i, j, k) * Q_fact
-
-       if (res) then
-          ! Momentum residuals
-          dw(i, j, k, imx:imz) = dw(i, j, k, imx:imz) - Ftmp
-
-          ! energy residuals
-          dw(i, j, k, iRhoE) = dw(i, j, k, iRhoE)  - &
-               Ftmp(1)*Vx - Ftmp(2)*Vy - Ftmp(3)*Vz - Qtmp
-       else
-          ! Add in the local power contribution:
-          pLocal = pLocal + (Vx*Ftmp(1) + Vy*FTmp(2) + Vz*Ftmp(3))*reDim
-       end if
-    end do
-
-  end subroutine sourceTerms_block
-
-  subroutine initres_block(varStart, varEnd, nn, sps)
-   !
-   !       initres initializes the given range of the residual. Either to
-   !       zero, steady computation, or to an unsteady term for the time
-   !       spectral and unsteady modes. For the coarser grid levels the
-   !       residual forcing term is taken into account.
-   !
-   use blockPointers
-   use flowVarRefState
-   use inputIteration
-   use inputPhysics
-   use inputTimeSpectral
-   use inputUnsteady
-   use iteration
-
-   implicit none
-   !
-   !      Subroutine arguments.
-   !
-   integer(kind=intType), intent(in) :: varStart, varEnd, nn, sps
-   !
-   !      Local variables.
-   !
-   integer(kind=intType) :: mm, ll, ii, jj, i, j, k, l, m
-   real(kind=realType)   :: oneOverDt, tmp
-
-   real(kind=realType), dimension(:,:,:,:), pointer :: ww, wsp, wsp1
-   real(kind=realType), dimension(:,:,:),   pointer :: volsp
-
-   ! Return immediately of no variables are in the range.
-
-   if(varEnd < varStart) return
-
-   ! Determine the equation mode and act accordingly.
-
-   select case (equationMode)
-   case (steady)
-
-      ! Steady state computation.
-      ! Determine the currently active multigrid level.
-
-      steadyLevelTest: if(currentLevel == groundLevel) then
-
-         ! Ground level of the multigrid cycle. Initialize the
-         ! owned residuals to zero.
-
-         do l=varStart,varEnd
-            do k=2,kl
-               do j=2,jl
-                  do i=2,il
-                     dw(i,j,k,l) = zero
-                  enddo
-               enddo
-            enddo
-         enddo
-
-      else steadyLevelTest
-
-         ! Coarse grid level. Initialize the owned cells to the
-         ! residual forcing terms.
-
-         do l=varStart,varEnd
-            do k=2,kl
-               do j=2,jl
-                  do i=2,il
-                     dw(i,j,k,l) = wr(i,j,k,l)
-                  enddo
-               enddo
-            enddo
-         enddo
-      endif steadyLevelTest
+                ! Coarse grid level. Initialize the owned cells to the
+                ! residual forcing terms.
+
+                do l = varStart, varEnd
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                dw(i, j, k, l) = wr(i, j, k, l)
+                            end do
+                        end do
+                    end do
+                end do
+            end if steadyLevelTest
 
 #ifndef USE_TAPENADE
 
-   case (unsteady)
+        case (unsteady)
 
-      ! Unsteady computation.
-      ! A further distinction must be made.
+            ! Unsteady computation.
+            ! A further distinction must be made.
 
-      select case(timeIntegrationScheme)
+            select case (timeIntegrationScheme)
 
-      case (explicitRK)
+            case (explicitRK)
 
-         ! We are always on the finest grid.
-         ! Initialize the residual to zero.
+                ! We are always on the finest grid.
+                ! Initialize the residual to zero.
 
-         do l=varStart,varEnd
-            do k=2,kl
-               do j=2,jl
-                  do i=2,il
-                     dw(i,j,k,l) = zero
-                  enddo
-               enddo
-            enddo
-         enddo
+                do l = varStart, varEnd
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                dw(i, j, k, l) = zero
+                            end do
+                        end do
+                    end do
+                end do
 
-         !=======================================================
+                !=======================================================
 
-      case (MD,BDF) ! Modified by HDN
+            case (MD, BDF) ! Modified by HDN
+
+                ! Store the inverse of the physical nonDimensional
+                ! time step a bit easier.
+
+                oneOverDt = timeRef / deltaT
 
-         ! Store the inverse of the physical nonDimensional
-         ! time step a bit easier.
+                ! Store the pointer for the variable to be used to compute
+                ! the unsteady source term. For a runge-kutta smoother this
+                ! is the solution of the zeroth runge-kutta stage. As for
+                ! rkStage == 0 this variable is not yet set w is used.
+                ! For other smoothers w is to be used as well.
+
+                if (smoother == RungeKutta .and. rkStage > 0) then
+                    ww => wn
+                else
+                    ww => w
+                end if
+
+                ! Determine the currently active multigrid level.
+
+                unsteadyLevelTest: if (currentLevel == groundLevel) then
+
+                    ! Ground level of the multigrid cycle. Initialize the
+                    ! owned cells to the unsteady source term. First the
+                    ! term for the current time level. Note that in w the
+                    ! velocities are stored and not the momentum variables.
+                    ! Therefore the if-statement is present to correct this.
+
+                    do l = varStart, varEnd
+
+                        if (l == ivx .or. l == ivy .or. l == ivz) then
+
+                            ! Momentum variables.
+
+                            do k = 2, kl
+                                do j = 2, jl
+                                    do i = 2, il
+                                        dw(i, j, k, l) = coefTime(0) * vol(i, j, k) &
+                                                         * ww(i, j, k, l) * ww(i, j, k, irho)
+                                    end do
+                                end do
+                            end do
+
+                        else
+
+                            ! Non-momentum variables, for which the variable
+                            ! to be solved is stored; for the flow equations this
+                            ! is the conservative variable, for the turbulent
+                            ! equations the primitive variable.
+
+                            do k = 2, kl
+                                do j = 2, jl
+                                    do i = 2, il
+                                        dw(i, j, k, l) = coefTime(0) * vol(i, j, k) &
+                                                         * ww(i, j, k, l)
+                                    end do
+                                end do
+                            end do
+
+                        end if
+
+                    end do
+
+                    ! The terms from the older time levels. Here the
+                    ! conservative variables are stored. In case of a
+                    ! deforming mesh, also the old volumes must be taken.
+
+                    deformingTest: if (deforming_Grid) then
+
+                        ! Mesh is deforming and thus the volumes can change.
+                        ! Use the old volumes as well.
+
+                        do m = 1, nOldLevels
+                            do l = varStart, varEnd
+                                do k = 2, kl
+                                    do j = 2, jl
+                                        do i = 2, il
+                                            dw(i, j, k, l) = dw(i, j, k, l) &
+                                                             + coefTime(m) * volOld(m, i, j, k) &
+                                                             * wOld(m, i, j, k, l)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
 
-         oneOverDt = timeRef/deltaT
+                        else deformingTest
 
-         ! Store the pointer for the variable to be used to compute
-         ! the unsteady source term. For a runge-kutta smoother this
-         ! is the solution of the zeroth runge-kutta stage. As for
-         ! rkStage == 0 this variable is not yet set w is used.
-         ! For other smoothers w is to be used as well.
+                        ! Rigid mesh. The volumes remain constant.
 
-         if(smoother == RungeKutta .and. rkStage > 0) then
-            ww => wn
-         else
-            ww => w
-         endif
-
-         ! Determine the currently active multigrid level.
-
-         unsteadyLevelTest: if(currentLevel == groundLevel) then
-
-            ! Ground level of the multigrid cycle. Initialize the
-            ! owned cells to the unsteady source term. First the
-            ! term for the current time level. Note that in w the
-            ! velocities are stored and not the momentum variables.
-            ! Therefore the if-statement is present to correct this.
-
-            do l=varStart,varEnd
-
-               if(l == ivx .or. l == ivy .or. l == ivz) then
-
-                  ! Momentum variables.
-
-                  do k=2,kl
-                     do j=2,jl
-                        do i=2,il
-                           dw(i,j,k,l) = coefTime(0)*vol(i,j,k) &
-                                * ww(i,j,k,l)*ww(i,j,k,irho)
-                        enddo
-                     enddo
-                  enddo
-
-               else
-
-                  ! Non-momentum variables, for which the variable
-                  ! to be solved is stored; for the flow equations this
-                  ! is the conservative variable, for the turbulent
-                  ! equations the primitive variable.
-
-                  do k=2,kl
-                     do j=2,jl
-                        do i=2,il
-                           dw(i,j,k,l) = coefTime(0)*vol(i,j,k) &
-                                * ww(i,j,k,l)
-                        enddo
-                     enddo
-                  enddo
-
-               endif
-
-            enddo
-
-            ! The terms from the older time levels. Here the
-            ! conservative variables are stored. In case of a
-            ! deforming mesh, also the old volumes must be taken.
-
-            deformingTest: if( deforming_Grid ) then
+                        do m = 1, nOldLevels
+                            do l = varStart, varEnd
+                                do k = 2, kl
+                                    do j = 2, jl
+                                        do i = 2, il
+                                            dw(i, j, k, l) = dw(i, j, k, l) &
+                                                             + coefTime(m) * vol(i, j, k) &
+                                                             * wOld(m, i, j, k, l)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end do
 
-               ! Mesh is deforming and thus the volumes can change.
-               ! Use the old volumes as well.
+                    end if deformingTest
+
+                    ! Multiply the time derivative by the inverse of the
+                    ! time step to obtain the true time derivative.
+                    ! This is done after the summation has been done, because
+                    ! otherwise you run into finite accuracy problems for
+                    ! very small time steps.
 
-               do m=1,nOldLevels
-                  do l=varStart,varEnd
-                     do k=2,kl
-                        do j=2,jl
-                           do i=2,il
-                              dw(i,j,k,l) = dw(i,j,k,l)                 &
-                                   + coefTime(m)*volOld(m,i,j,k) &
-                                   * wOld(m,i,j,k,l)
-                           enddo
-                        enddo
-                     enddo
-                  enddo
-               enddo
+                    do l = varStart, varEnd
+                        do k = 2, kl
+                            do j = 2, jl
+                                do i = 2, il
+                                    dw(i, j, k, l) = oneOverDt * dw(i, j, k, l)
+                                end do
+                            end do
+                        end do
+                    end do
+
+                    else unsteadyLevelTest
+
+                    ! Coarse grid level. Initialize the owned cells to the
+                    ! residual forcing term plus a correction for the
+                    ! multigrid treatment of the time derivative term.
+                    ! As the velocities are stored instead of the momentum,
+                    ! these terms must be multiplied by the density.
+
+                    tmp = oneOverDt * coefTime(0)
+
+                    do l = varStart, varEnd
+
+                        if (l == ivx .or. l == ivy .or. l == ivz) then
+
+                            ! Momentum variables.
+
+                            do k = 2, kl
+                                do j = 2, jl
+                                    do i = 2, il
+                                        dw(i, j, k, l) = tmp * vol(i, j, k) &
+                                                         * (ww(i, j, k, l) * ww(i, j, k, irho) &
+                                                            - w1(i, j, k, l) * w1(i, j, k, irho))
+                                        dw(i, j, k, l) = dw(i, j, k, l) + wr(i, j, k, l)
+                                    end do
+                                end do
+                            end do
+
+                        else
+
+                            ! Non-momentum variables.
+
+                            do k = 2, kl
+                                do j = 2, jl
+                                    do i = 2, il
+                                        dw(i, j, k, l) = tmp * vol(i, j, k) &
+                                                         * (ww(i, j, k, l) - w1(i, j, k, l))
+                                        dw(i, j, k, l) = dw(i, j, k, l) + wr(i, j, k, l)
+                                    end do
+                                end do
+                            end do
 
-            else deformingTest
+                        end if
+
+                    end do
 
-               ! Rigid mesh. The volumes remain constant.
+                end if unsteadyLevelTest
+
+            end select
+
+            !===========================================================
+
+        case (timeSpectral)
+
+            ! Time spectral computation. The time derivative of the
+            ! current solution is given by a linear combination of
+            ! all other solutions, i.e. a matrix vector product.
 
-               do m=1,nOldLevels
-                  do l=varStart,varEnd
-                     do k=2,kl
-                        do j=2,jl
-                           do i=2,il
-                              dw(i,j,k,l) = dw(i,j,k,l)            &
-                                   + coefTime(m)*vol(i,j,k) &
-                                   * wOld(m,i,j,k,l)
-                           enddo
-                        enddo
-                     enddo
-                  enddo
-               enddo
+            ! First store the section to which this block belongs
+            ! in jj.
 
-            endif deformingTest
+            jj = sectionID
 
-            ! Multiply the time derivative by the inverse of the
-            ! time step to obtain the true time derivative.
-            ! This is done after the summation has been done, because
-            ! otherwise you run into finite accuracy problems for
-            ! very small time steps.
+            ! Determine the currently active multigrid level.
 
-            do l=varStart,varEnd
-               do k=2,kl
-                  do j=2,jl
-                     do i=2,il
-                        dw(i,j,k,l) = oneOverDt*dw(i,j,k,l)
-                     enddo
-                  enddo
-               enddo
-            enddo
+            spectralLevelTest: if (currentLevel == groundLevel) then
 
-         else unsteadyLevelTest
+                ! Finest multigrid level. The residual must be
+                ! initialized to the time derivative.
 
-            ! Coarse grid level. Initialize the owned cells to the
-            ! residual forcing term plus a correction for the
-            ! multigrid treatment of the time derivative term.
-            ! As the velocities are stored instead of the momentum,
-            ! these terms must be multiplied by the density.
+                ! Initialize it to zero.
 
-            tmp = oneOverDt*coefTime(0)
+                do l = varStart, varEnd
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                dw(i, j, k, l) = zero
+                            end do
+                        end do
+                    end do
+                end do
 
-            do l=varStart,varEnd
+                ! Loop over the number of terms which contribute
+                ! to the time derivative.
 
-               if(l == ivx .or. l == ivy .or. l == ivz) then
+                timeLoopFine: do mm = 1, nTimeIntervalsSpectral
 
-                  ! Momentum variables.
+                    ! Store the pointer for the variable to be used to
+                    ! compute the unsteady source term and the volume.
+                    ! Also store in ii the offset needed for vector
+                    ! quantities.
 
-                  do k=2,kl
-                     do j=2,jl
-                        do i=2,il
-                           dw(i,j,k,l) = tmp*vol(i,j,k)               &
-                                * (ww(i,j,k,l)*ww(i,j,k,irho)  &
-                                -  w1(i,j,k,l)*w1(i,j,k,irho))
-                           dw(i,j,k,l) = dw(i,j,k,l) + wr(i,j,k,l)
-                        enddo
-                     enddo
-                  enddo
+                    wsp => flowDoms(nn, currentLevel, mm)%w
+                    volsp => flowDoms(nn, currentLevel, mm)%vol
+                    ii = 3 * (mm - 1)
 
-               else
+                    ! Loop over the number of variables to be set.
 
-                  ! Non-momentum variables.
+                    varLoopFine: do l = varStart, varEnd
 
-                  do k=2,kl
-                     do j=2,jl
-                        do i=2,il
-                           dw(i,j,k,l) = tmp*vol(i,j,k)             &
-                                * (ww(i,j,k,l) - w1(i,j,k,l))
-                           dw(i,j,k,l) =  dw(i,j,k,l) + wr(i,j,k,l)
-                        enddo
-                     enddo
-                  enddo
+                        ! Test for a momentum variable.
 
-               endif
+                        if (l == ivx .or. l == ivy .or. l == ivz) then
 
-            enddo
+                            ! Momentum variable. A special treatment is
+                            ! needed because it is a vector and the velocities
+                            ! are stored instead of the momentum. Set the
+                            ! coefficient ll, which defines the row of the
+                            ! matrix used later on.
 
-         endif unsteadyLevelTest
+                            if (l == ivx) ll = 3 * sps - 2
+                            if (l == ivy) ll = 3 * sps - 1
+                            if (l == ivz) ll = 3 * sps
 
-      end select
+                            ! Loop over the owned cell centers to add the
+                            ! contribution from wsp.
 
-      !===========================================================
+                            do k = 2, kl
+                                do j = 2, jl
+                                    do i = 2, il
 
-   case (timeSpectral)
+                                        ! Store the matrix vector product with the
+                                        ! velocity in tmp.
 
-      ! Time spectral computation. The time derivative of the
-      ! current solution is given by a linear combination of
-      ! all other solutions, i.e. a matrix vector product.
+                                        tmp = dvector(jj, ll, ii + 1) * wsp(i, j, k, ivx) &
+                                              + dvector(jj, ll, ii + 2) * wsp(i, j, k, ivy) &
+                                              + dvector(jj, ll, ii + 3) * wsp(i, j, k, ivz)
 
-      ! First store the section to which this block belongs
-      ! in jj.
+                                        ! Update the residual. Note the
+                                        ! multiplication with the density to obtain
+                                        ! the correct time derivative for the
+                                        ! momentum variable.
 
-      jj = sectionID
+                                        dw(i, j, k, l) = dw(i, j, k, l) &
+                                                         + tmp * volsp(i, j, k) * wsp(i, j, k, irho)
 
-      ! Determine the currently active multigrid level.
+                                    end do
+                                end do
+                            end do
 
-      spectralLevelTest: if(currentLevel == groundLevel) then
+                        else
 
-         ! Finest multigrid level. The residual must be
-         ! initialized to the time derivative.
+                            ! Scalar variable.  Loop over the owned cells to
+                            ! add the contribution of wsp to the time
+                            ! derivative.
 
-         ! Initialize it to zero.
+                            do k = 2, kl
+                                do j = 2, jl
+                                    do i = 2, il
+                                        dw(i, j, k, l) = dw(i, j, k, l) &
+                                                         + dscalar(jj, sps, mm) &
+                                                         * volsp(i, j, k) * wsp(i, j, k, l)
 
-         do l=varStart,varEnd
-            do k=2,kl
-               do j=2,jl
-                  do i=2,il
-                     dw(i,j,k,l) = zero
-                  enddo
-               enddo
-            enddo
-         enddo
+                                    end do
+                                end do
+                            end do
 
-         ! Loop over the number of terms which contribute
-         ! to the time derivative.
+                        end if
 
-         timeLoopFine: do mm=1,nTimeIntervalsSpectral
+                    end do varLoopFine
 
-            ! Store the pointer for the variable to be used to
-            ! compute the unsteady source term and the volume.
-            ! Also store in ii the offset needed for vector
-            ! quantities.
+                end do timeLoopFine
+                else spectralLevelTest
 
-            wsp   => flowDoms(nn,currentLevel,mm)%w
-            volsp => flowDoms(nn,currentLevel,mm)%vol
-            ii    =  3*(mm-1)
+                ! Coarse grid level. Initialize the owned cells to the
+                ! residual forcing term plus a correction for the
+                ! multigrid treatment of the time derivative term.
 
-            ! Loop over the number of variables to be set.
+                ! Initialization to the residual forcing term.
 
-            varLoopFine: do l=varStart,varEnd
+                do l = varStart, varEnd
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                dw(i, j, k, l) = wr(i, j, k, l)
+                            end do
+                        end do
+                    end do
+                end do
 
-               ! Test for a momentum variable.
+                ! Loop over the number of terms which contribute
+                ! to the time derivative.
 
-               if(l == ivx .or. l == ivy .or. l == ivz) then
+                timeLoopCoarse: do mm = 1, nTimeIntervalsSpectral
 
-                  ! Momentum variable. A special treatment is
-                  ! needed because it is a vector and the velocities
-                  ! are stored instead of the momentum. Set the
-                  ! coefficient ll, which defines the row of the
-                  ! matrix used later on.
+                    ! Store the pointer for the variable to be used to
+                    ! compute the unsteady source term and the pointers
+                    ! for wsp1, the solution when entering this MG level
+                    ! and for the volume.
+                    ! Furthermore store in ii the offset needed for
+                    ! vector quantities.
 
-                  if(l == ivx) ll = 3*sps - 2
-                  if(l == ivy) ll = 3*sps - 1
-                  if(l == ivz) ll = 3*sps
+                    wsp => flowDoms(nn, currentLevel, mm)%w
+                    wsp1 => flowDoms(nn, currentLevel, mm)%w1
+                    volsp => flowDoms(nn, currentLevel, mm)%vol
+                    ii = 3 * (mm - 1)
 
-                  ! Loop over the owned cell centers to add the
-                  ! contribution from wsp.
+                    ! Loop over the number of variables to be set.
 
-                  do k=2,kl
-                     do j=2,jl
-                        do i=2,il
+                    varLoopCoarse: do l = varStart, varEnd
 
-                           ! Store the matrix vector product with the
-                           ! velocity in tmp.
+                        ! Test for a momentum variable.
 
-                           tmp = dvector(jj,ll,ii+1)*wsp(i,j,k,ivx) &
-                                + dvector(jj,ll,ii+2)*wsp(i,j,k,ivy) &
-                                + dvector(jj,ll,ii+3)*wsp(i,j,k,ivz)
+                        if (l == ivx .or. l == ivy .or. l == ivz) then
 
-                           ! Update the residual. Note the
-                           ! multiplication with the density to obtain
-                           ! the correct time derivative for the
-                           ! momentum variable.
+                            ! Momentum variable. A special treatment is
+                            ! needed because it is a vector and the velocities
+                            ! are stored instead of the momentum. Set the
+                            ! coefficient ll, which defines the row of the
+                            ! matrix used later on.
 
-                           dw(i,j,k,l) = dw(i,j,k,l) &
-                                + tmp*volsp(i,j,k)*wsp(i,j,k,irho)
+                            if (l == ivx) ll = 3 * sps - 2
+                            if (l == ivy) ll = 3 * sps - 1
+                            if (l == ivz) ll = 3 * sps
 
-                        enddo
-                     enddo
-                  enddo
+                            ! Add the contribution of wps to the correction
+                            ! of the time derivative. The difference between
+                            ! the current time derivative and the one when
+                            ! entering this grid level must be added, because
+                            ! the residual forcing term only takes the spatial
+                            ! part of the coarse grid residual into account.
 
-               else
+                            do k = 2, kl
+                                do j = 2, jl
+                                    do i = 2, il
 
-                  ! Scalar variable.  Loop over the owned cells to
-                  ! add the contribution of wsp to the time
-                  ! derivative.
+                                        ! Store the matrix vector product with the
+                                        ! momentum in tmp.
 
-                  do k=2,kl
-                     do j=2,jl
-                        do i=2,il
-                           dw(i,j,k,l) = dw(i,j,k,l)        &
-                                + dscalar(jj,sps,mm) &
-                                * volsp(i,j,k)*wsp(i,j,k,l)
+                                        tmp = dvector(jj, ll, ii + 1) &
+                                              * (wsp(i, j, k, irho) * wsp(i, j, k, ivx) &
+                                                 - wsp1(i, j, k, irho) * wsp1(i, j, k, ivx)) &
+                                              + dvector(jj, ll, ii + 2) &
+                                              * (wsp(i, j, k, irho) * wsp(i, j, k, ivy) &
+                                                 - wsp1(i, j, k, irho) * wsp1(i, j, k, ivy)) &
+                                              + dvector(jj, ll, ii + 3) &
+                                              * (wsp(i, j, k, irho) * wsp(i, j, k, ivz) &
+                                                 - wsp1(i, j, k, irho) * wsp1(i, j, k, ivz))
 
-                        enddo
-                     enddo
-                  enddo
+                                        ! Add tmp to the residual. Multiply it by
+                                        ! the volume to obtain the finite volume
+                                        ! formulation of the  derivative of the
+                                        ! momentum.
 
-               endif
+                                        dw(i, j, k, l) = dw(i, j, k, l) + tmp * volsp(i, j, k)
 
-            enddo varLoopFine
+                                    end do
+                                end do
+                            end do
 
-         enddo timeLoopFine
-      else spectralLevelTest
+                        else
 
-         ! Coarse grid level. Initialize the owned cells to the
-         ! residual forcing term plus a correction for the
-         ! multigrid treatment of the time derivative term.
+                            ! Scalar variable. Loop over the owned cells
+                            ! to add the contribution of wsp to the correction
+                            ! of the time derivative.
 
-         ! Initialization to the residual forcing term.
+                            do k = 2, kl
+                                do j = 2, jl
+                                    do i = 2, il
+                                        dw(i, j, k, l) = dw(i, j, k, l) &
+                                                         + dscalar(jj, sps, mm) &
+                                                         * volsp(i, j, k) &
+                                                         * (wsp(i, j, k, l) - wsp1(i, j, k, l))
+                                    end do
+                                end do
+                            end do
 
-         do l=varStart,varEnd
-            do k=2,kl
-               do j=2,jl
-                  do i=2,il
-                     dw(i,j,k,l) = wr(i,j,k,l)
-                  enddo
-               enddo
-            enddo
-         enddo
+                        end if
 
-         ! Loop over the number of terms which contribute
-         ! to the time derivative.
+                    end do varLoopCoarse
 
-         timeLoopCoarse: do mm=1,nTimeIntervalsSpectral
+                end do timeLoopCoarse
+            end if spectralLevelTest
+#endif
+        end select
+
+        ! Set the residual in the halo cells to zero. This is just
+        ! to avoid possible problems. Their values do not matter.
+
+        do l = varStart, varEnd
+            do k = 0, kb
+                do j = 0, jb
+                    dw(0, j, k, l) = zero
+                    dw(1, j, k, l) = zero
+                    dw(ie, j, k, l) = zero
+                    dw(ib, j, k, l) = zero
+                end do
+            end do
+
+            do k = 0, kb
+                do i = 2, il
+                    dw(i, 0, k, l) = zero
+                    dw(i, 1, k, l) = zero
+                    dw(i, je, k, l) = zero
+                    dw(i, jb, k, l) = zero
+                end do
+            end do
+
+            do j = 2, jl
+                do i = 2, il
+                    dw(i, j, 0, l) = zero
+                    dw(i, j, 1, l) = zero
+                    dw(i, j, ke, l) = zero
+                    dw(i, j, kb, l) = zero
+                end do
+            end do
+        end do
 
-            ! Store the pointer for the variable to be used to
-            ! compute the unsteady source term and the pointers
-            ! for wsp1, the solution when entering this MG level
-            ! and for the volume.
-            ! Furthermore store in ii the offset needed for
-            ! vector quantities.
+    end subroutine initres_block
 
-            wsp   => flowDoms(nn,currentLevel,mm)%w
-            wsp1  => flowDoms(nn,currentLevel,mm)%w1
-            volsp => flowDoms(nn,currentLevel,mm)%vol
-            ii    =  3*(mm-1)
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 
-            ! Loop over the number of variables to be set.
+#ifndef USE_TAPENADE
+    subroutine initres(varStart, varEnd)
+        !
+        ! Shell function to call initRes_block on all blocks
+        !
+        use blockPointers
+        use constants
+        use inputTimeSpectral
+        use iteration
+        use section
+        use utils, only: setPointers
+        !
+        !      Subroutine argument.
+        !
+        integer(kind=intType), intent(in) :: varStart, varEnd
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: sps, nn
+
+        ! Loop over the number of spectral solutions.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+
+            ! Loop over the number of blocks.
+
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block.
+
+                call setPointers(nn, currentLevel, sps)
+
+                call initres_block(varStart, varEnd, nn, sps)
+
+            end do domains
+
+        end do spectralLoop
+
+    end subroutine initRes
+
+    subroutine sourceTerms
+
+        ! Shell function to call sourceTerms_block on all blocks
+
+        use constants
+        use blockPointers
+        use utils, only: setPointers
+        use actuatorRegionData
+        implicit none
+
+        integer(kind=intType) :: nn, iRegion
+        real(kind=realType) :: dummy
+
+        ! Loop over the number of domains.
+        domains: do nn = 1, nDom
+
+            ! Set the pointers for this block.
+            call setPointers(nn, 1, 1)
+            do iRegion = 1, nActuatorRegions
+                call sourceTerms_block(nn, .True., iRegion, dummy)
+            end do
+        end do domains
+
+    end subroutine sourceTerms
+
+    subroutine residual
+        !
+        ! Shell function to call residual_block on all blocks
+        !
+        use blockPointers
+        use constants
+        use inputTimeSpectral
+        use Iteration
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: sps, nn
+
+        ! Loop over the number of spectral solutions.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+
+            ! Loop over the number of blocks.
+
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block.
+
+                call setPointers(nn, currentLevel, sps)
+
+                call residual_block
+
+            end do domains
+
+        end do spectralLoop
+    end subroutine residual
+
+    subroutine computedwDADI
+        !
+        !       executeRkStage executes one runge kutta stage. The stage
+        !       number, rkStage, is defined in the local module iteration.
+        !
+        use blockPointers
+        use constants
+        use flowVarRefState
+        use inputIteration
+        use inputPhysics
+        use inputTimeSpectral
+        use inputUnsteady
+        use iteration
+        implicit none
+        !
+        !      Local parameter.
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, n
+        real(kind=realType) :: gm1, epsval, fac, eps2
+        real(kind=realType) :: uvel, vvel, wvel, cijk, cijkinv, c2inv, uvw
+        real(kind=realType) :: ri1, ri2, ri3, rj1, rj2, rj3, rk1, rk2, rk3, uu
+        real(kind=realType) :: ri, rj, rk, qsi, qsj, qsk, currentCfl
+        real(kind=realType) :: xfact, cInf, cInf2
+        real(kind=realType) :: dw1, dw2, dw3, dw4, dw5
+        real(kind=realType) :: a1, a2, a3, a4, a5, a6, a7, mut, ge
+        real(kind=realType) :: viscTerm1, viscTerm2, viscTerm3
+        real(kind=realType) :: metterm
+        real(kind=realType) :: unsteadyImpl, mult
+        real(kind=realType) :: sqrt2, sqrt2inv, alph, alphinv
+        real(kind=realType) :: volhalf, volhalfrho, volfact
+        real(kind=realType) :: mutpI, mutmI, mutpJ, mutmJ, mutpK, mutmK, &
+                               mettermp, mettermm, &
+                               viscTermI, viscTermJ, viscTermK
+
+        real(kind=realType), dimension(:, :, :), pointer :: qq_i, qq_j, qq_k, &
+                                                           cc_i, cc_j, cc_k, spectral_i, spectral_j, spectral_k, dual_dt
+        real(kind=realType), dimension(ie, 5) :: bbi, cci, ddi, ffi
+        real(kind=realType), dimension(je, 5) :: bbj, ccj, ddj, ffj
+        real(kind=realType), dimension(ke, 5) :: bbk, cck, ddk, ffk
+        real(kind=realType), dimension(ie) :: mettermi
+        real(kind=realType), dimension(je) :: mettermj
+        real(kind=realType), dimension(ke) :: mettermk
+        real(kind=realType), dimension(5) :: diagPlus, diagMinus
+
+        ! Set the value of the current cfl number,
+        ! depending on the situation. On the finest grid in the mg cycle
+        ! the second halo is computed, otherwise not.
+
+        currentCfl = cflCoarse
+        if (currentLevel == 1) then
+            currentCfl = cfl
+        end if
+        qq_i => scratch(:, :, :, 1)
+        qq_j => scratch(:, :, :, 2)
+        qq_k => scratch(:, :, :, 3)
+        cc_i => scratch(:, :, :, 4)
+        cc_j => scratch(:, :, :, 5)
+        cc_k => scratch(:, :, :, 6)
+        spectral_i => scratch(:, :, :, 7)
+        spectral_j => scratch(:, :, :, 8)
+        spectral_k => scratch(:, :, :, 9)
+        dual_dt => scratch(:, :, :, 10)
+
+        !  havent thought about iblank
+
+        !   these are factors for robustness.
+
+        epsval = 0.08
+        fac = 1.05
+        mut = zero
+        cInf2 = gammaInf * pInf / rhoInf
+        cInf = sqrt(cInf2)
+
+        qsi = zero
+        qsj = zero
+        qsk = zero
+        viscTermi = zero
+        viscTermj = zero
+        viscTermk = zero
+        sqrt2 = sqrt(two)
+        sqrt2inv = one / sqrt2
+
+        if (equationMode .eq. steady) then
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        dual_dt(i, j, k) = currentCfl * dtl(i, j, k) * vol(i, j, k)
+                    end do
+                end do
+            end do
+        else
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        unsteadyImpl = coefTime(0) * timeRef / deltaT
+                        mult = currentCfl * dtl(i, j, k)
+                        mult = mult / (mult * unsteadyImpl * vol(i, j, k) + one)
+                        dual_dt(i, j, k) = mult * vol(i, j, k)
+                    end do
+                end do
+            end do
+        end if
+
+        !     Set up some arrays
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    volhalf = half / vol(i, j, k)
+                    volhalfrho = volhalf / w(i, j, k, irho)
+                    uvel = w(i, j, k, ivx)
+                    vvel = w(i, j, k, ivy)
+                    wvel = w(i, j, k, ivz)
+
+                    cijk = sqrt(gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
+
+                    ri1 = volhalf * (si(i, j, k, 1) + si(i - 1, j, k, 1))
+                    ri2 = volhalf * (si(i, j, k, 2) + si(i - 1, j, k, 2))
+                    ri3 = volhalf * (si(i, j, k, 3) + si(i - 1, j, k, 3))
+
+                    rj1 = volhalf * (sj(i, j, k, 1) + sj(i, j - 1, k, 1))
+                    rj2 = volhalf * (sj(i, j, k, 2) + sj(i, j - 1, k, 2))
+                    rj3 = volhalf * (sj(i, j, k, 3) + sj(i, j - 1, k, 3))
+
+                    rk1 = volhalf * (sk(i, j, k, 1) + sk(i, j, k - 1, 1))
+                    rk2 = volhalf * (sk(i, j, k, 2) + sk(i, j, k - 1, 2))
+                    rk3 = volhalf * (sk(i, j, k, 3) + sk(i, j, k - 1, 3))
+
+                    if (addGridVelocities) then
+                        qsi = (sFaceI(i - 1, j, k) + sFaceI(i, j, k)) * volhalf
+                        qsj = (sFaceJ(i, j - 1, k) + sFaceJ(i, j, k)) * volhalf
+                        qsk = (sFaceK(i, j, k - 1) + sFaceK(i, j, k)) * volhalf
+                    end if
+
+                    qq_i(i, j, k) = ri1 * w(i, j, k, ivx) + ri2 * w(i, j, k, ivy) + &
+                                    ri3 * w(i, j, k, ivz) - qsi
+                    qq_j(i, j, k) = rj1 * w(i, j, k, ivx) + rj2 * w(i, j, k, ivy) + &
+                                    rj3 * w(i, j, k, ivz) - qsj
+                    qq_k(i, j, k) = rk1 * w(i, j, k, ivx) + rk2 * w(i, j, k, ivy) + &
+                                    rk3 * w(i, j, k, ivz) - qsk
+
+                    ri = sqrt(ri1 * ri1 + ri2 * ri2 + ri3 * ri3)
+                    rj = sqrt(rj1 * rj1 + rj2 * rj2 + rj3 * rj3)
+                    rk = sqrt(rk1 * rk1 + rk2 * rk2 + rk3 * rk3)
+
+                    cc_i(i, j, k) = cijk * ri
+                    cc_j(i, j, k) = cijk * rj
+                    cc_k(i, j, k) = cijk * rk
+                    if (viscous) then
+                        mutpI = rlv(i, j, k) + rlv(i + 1, j, k)
+                        mutmI = rlv(i, j, k) + rlv(i - 1, j, k)
+                        mutpJ = rlv(i, j, k) + rlv(i, j + 1, k)
+                        mutmJ = rlv(i, j, k) + rlv(i, j - 1, k)
+                        mutpK = rlv(i, j, k) + rlv(i, j, k + 1)
+                        mutmK = rlv(i, j, k) + rlv(i, j, k - 1)
+                        if (eddyModel) then
+                            mutpI = mutpI + rev(i, j, k) + rev(i + 1, j, k)
+                            mutmI = mutpI + rev(i, j, k) + rev(i - 1, j, k)
+                            mutpJ = mutpJ + rev(i, j, k) + rev(i, j + 1, k)
+                            mutmJ = mutpJ + rev(i, j, k) + rev(i, j - 1, k)
+                            mutpK = mutpK + rev(i, j, k) + rev(i, j, k + 1)
+                            mutmK = mutpK + rev(i, j, k) + rev(i, j, k - 1)
+                        end if
+
+                        volfact = two / (vol(i, j, k) + vol(i + 1, j, k))
+                        mettermp = (si(i, j, k, 1) * si(i, j, k, 1) &
+                                    + si(i, j, k, 2) * si(i, j, k, 2) &
+                                    + si(i, j, k, 3) * si(i, j, k, 3)) * mutpI * volfact
+                        volfact = two / (vol(i, j, k) + vol(i - 1, j, k))
+                        mettermm = (si(i - 1, j, k, 1) * si(i - 1, j, k, 1) &
+                                    + si(i - 1, j, k, 2) * si(i - 1, j, k, 2) &
+                                    + si(i - 1, j, k, 3) * si(i - 1, j, k, 3)) * mutmI * volfact
+                        viscTermi = (mettermp + mettermm) * volhalfrho
+
+                        volfact = two / (vol(i, j, k) + vol(i, j + 1, k))
+                        mettermp = (sj(i, j, k, 1) * sj(i, j, k, 1) &
+                                    + sj(i, j, k, 2) * sj(i, j, k, 2) &
+                                    + sj(i, j, k, 3) * sj(i, j, k, 3)) * mutpJ * volfact
+                        volfact = two / (vol(i, j, k) + vol(i, j - 1, k))
+                        mettermm = (sj(i, j - 1, k, 1) * sj(i, j - 1, k, 1) &
+                                    + sj(i, j - 1, k, 2) * sj(i, j - 1, k, 2) &
+                                    + sj(i, j - 1, k, 3) * sj(i, j - 1, k, 3)) * mutmJ * volfact
+                        viscTermj = (mettermp + mettermm) * volhalfrho
+
+                        volfact = two / (vol(i, j, k) + vol(i, j, k + 1))
+                        mettermp = (sk(i, j, k, 1) * sk(i, j, k, 1) &
+                                    + sk(i, j, k, 2) * sk(i, j, k, 2) &
+                                    + sk(i, j, k, 3) * sk(i, j, k, 3)) * mutpK * volfact
+                        volfact = two / (vol(i, j, k) + vol(i, j, k - 1))
+                        mettermm = (sk(i, j, k - 1, 1) * sk(i, j, k - 1, 1) &
+                                    + sk(i, j, k - 1, 2) * sk(i, j, k - 1, 2) &
+                                    + sk(i, j, k - 1, 3) * sk(i, j, k - 1, 3)) * mutmK * volfact
+                        viscTermk = (mettermp + mettermm) * volhalfrho
+
+                    end if
+
+                    eps2 = ri * cInf * epsval
+                    spectral_i(i, j, k) = (fac * sqrt((abs(qq_i(i, j, k)) + cc_i(i, j, k))**2 &
+                                                      + eps2**2) + viscTermi) * dual_dt(i, j, k)
+                    eps2 = rj * cInf * epsval
+                    spectral_j(i, j, k) = (fac * sqrt((abs(qq_j(i, j, k)) + cc_j(i, j, k))**2 &
+                                                      + eps2**2) + viscTermj) * dual_dt(i, j, k)
+                    eps2 = rk * cInf * epsval
+                    spectral_k(i, j, k) = (fac * sqrt((abs(qq_k(i, j, k)) + cc_k(i, j, k))**2 &
+                                                      + eps2**2) + viscTermk) * dual_dt(i, j, k)
+                    spectral_i(i, j, k) = spectral_i(i, j, k) * zero
+                    spectral_j(i, j, k) = spectral_j(i, j, k) * zero
+                    spectral_k(i, j, k) = spectral_k(i, j, k) * zero
+                end do
+            end do
+        end do
+
+        !       Multiply by T_eta^inv
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    gm1 = gamma(i, j, k) - one
+                    cijk = sqrt(gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho))
+                    c2inv = one / (cijk * cijk)
+                    xfact = two * cijk
+                    alphinv = sqrt2 * cijk / w(i, j, k, irho)
+
+                    uvel = w(i, j, k, ivx)
+                    vvel = w(i, j, k, ivy)
+                    wvel = w(i, j, k, ivz)
+                    uvw = half * (uvel * uvel + vvel * vvel + wvel * wvel)
+
+                    rj1 = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1))
+                    rj2 = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2))
+                    rj3 = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3))
+                    rj = sqrt(rj1 * rj1 + rj2 * rj2 + rj3 * rj3)
+                    uu = uvel * rj1 + vvel * rj2 + wvel * rj3
+                    rj1 = rj1 / rj
+                    rj2 = rj2 / rj
+                    rj3 = rj3 / rj
+
+                    dw1 = dw(i, j, k, 1)
+                    dw2 = dw(i, j, k, 2)
+                    dw3 = dw(i, j, k, 3)
+                    dw4 = dw(i, j, k, 4)
+                    dw5 = dw(i, j, k, 5)
+
+                    a1 = dw2 * uvel + dw3 * vvel + dw4 * wvel - dw5
+                    a1 = a1 * gm1 * c2inv + dw1 * (one - uvw * gm1 * c2inv)
+
+                    a2 = (rj2 * wvel - rj3 * vvel) * dw1 + rj3 * dw3 - rj2 * dw4
+                    a3 = (rj3 * uvel - rj1 * wvel) * dw1 + rj1 * dw4 - rj3 * dw2
+                    a4 = (rj1 * vvel - rj2 * uvel) * dw1 + rj2 * dw2 - rj1 * dw3
+
+                    a5 = uvw * dw1 - uvel * dw2 - vvel * dw3 - wvel * dw4 + dw5
+                    a5 = a5 * gm1 * c2inv
+
+                    a6 = uu * dw1 / rj - rj1 * dw2 - rj2 * dw3 - rj3 * dw4
+
+                    dw(i, j, k, 1) = a1 * rj1 + a2 / w(i, j, k, irho)
+                    dw(i, j, k, 2) = a1 * rj2 + a3 / w(i, j, k, irho)
+                    dw(i, j, k, 3) = a1 * rj3 + a4 / w(i, j, k, irho)
+                    dw(i, j, k, 4) = (half * a5 - a6 / xfact) * alphinv
+                    dw(i, j, k, 5) = (half * a5 + a6 / xfact) * alphinv
 
-            varLoopCoarse: do l=varStart,varEnd
+                end do
+            end do
+        end do
+
+        if (jl .gt. 2) then
+
+            !  Inversion in j
+
+            do k = 2, kl
+                do i = 2, il
+                    do j = 1, jl
+                        if (viscous) mut = rlv(i, j, k) + rlv(i, j + 1, k)
+                        if (eddyModel) mut = mut + rev(i, j, k) + rev(i, j + 1, k)
+
+                        volfact = one / (vol(i, j, k) + vol(i, j + 1, k))
+                        metterm = (sj(i, j, k, 1) * sj(i, j, k, 1) &
+                                   + sj(i, j, k, 2) * sj(i, j, k, 2) &
+                                   + sj(i, j, k, 3) * sj(i, j, k, 3))
+                        mettermj(j) = metterm * mut * volfact
+                    end do
+
+                    do j = 2, jl
+
+                        viscTerm1 = mettermj(j) / vol(i, j, k) / w(i, j, k, irho)
+                        viscTerm3 = mettermj(j - 1) / vol(i, j, k) / w(i, j, k, irho)
+                        viscTerm2 = viscTerm1 + viscTerm3
+
+                        volhalf = half / vol(i, j, k)
+                        rj1 = volhalf * (sj(i, j, k, 1) + sj(i, j - 1, k, 1))
+                        rj2 = volhalf * (sj(i, j, k, 2) + sj(i, j - 1, k, 2))
+                        rj3 = volhalf * (sj(i, j, k, 3) + sj(i, j - 1, k, 3))
+                        metterm = rj1 * rj1 + rj2 * rj2 + rj3 * rj3
+                        eps2 = epsval * epsval * cInf2 * metterm
+                        diagPlus(1) = half * (qq_j(i, j, k) + fac * sqrt(qq_j(i, j, k)**2 + eps2))
+                        diagPlus(2) = diagPlus(1)
+                        diagPlus(3) = diagPlus(1)
+                        diagPlus(4) = half * (qq_j(i, j, k) + cc_j(i, j, k) &
+                                              + fac * sqrt((qq_j(i, j, k) + cc_j(i, j, k))**2 + eps2))
+                        diagPlus(5) = half * (qq_j(i, j, k) - cc_j(i, j, k) &
+                                              + fac * sqrt((qq_j(i, j, k) - cc_j(i, j, k))**2 + eps2))
+                        diagMinus(1) = half * (qq_j(i, j, k) - fac * sqrt(qq_j(i, j, k)**2 + eps2))
+                        diagMinus(2) = diagMinus(1)
+                        diagMinus(3) = diagMinus(1)
+                        diagMinus(4) = half * (qq_j(i, j, k) + cc_j(i, j, k) &
+                                               - fac * sqrt((qq_j(i, j, k) + cc_j(i, j, k))**2 + eps2))
+                        diagMinus(5) = half * (qq_j(i, j, k) - cc_j(i, j, k) &
+                                               - fac * sqrt((qq_j(i, j, k) - cc_j(i, j, k))**2 + eps2))
+
+                        do n = 1, 5
+                            bbj(j + 1, n) = -viscTerm1 - diagPlus(n)
+                            ddj(j - 1, n) = -viscTerm3 + diagMinus(n)
+                            ccj(j, n) = viscTerm2 + diagPlus(n) - diagMinus(n)
+                        end do
+                    end do
+
+                    do n = 1, 5
+                        bbj(je, n) = zero
+                        ddj(1, n) = zero
+                        do j = 2, jl
+                            bbj(j, n) = bbj(j, n) * dual_dt(i, j, k) * max(real(iblank(i, j, k), realType), zero)
+                            ddj(j, n) = ddj(j, n) * dual_dt(i, j, k) * max(real(iblank(i, j, k), realType), zero)
+         ccj(j,n)=one+ccj(j,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)+spectral_i(i,j,k)+spectral_k(i,j,k)
+                            ffj(j, n) = dw(i, j, k, n)
+                        end do
+                    end do
+
+                    call tridiagsolve(bbj, ccj, ddj, ffj, jl)
+
+                    do n = 1, 5
+                        do j = 2, jl
+                            dw(i, j, k, n) = ffj(j, n)
+                        end do
+                    end do
 
-               ! Test for a momentum variable.
+                end do
+            end do
+
+        end if
+        !       Multiply by T_xi^inv T_eta
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    ri1 = half * (si(i, j, k, 1) + si(i - 1, j, k, 1))
+                    ri2 = half * (si(i, j, k, 2) + si(i - 1, j, k, 2))
+                    ri3 = half * (si(i, j, k, 3) + si(i - 1, j, k, 3))
+                    ri = sqrt(ri1 * ri1 + ri2 * ri2 + ri3 * ri3)
+                    ri1 = ri1 / ri
+                    ri2 = ri2 / ri
+                    ri3 = ri3 / ri
+
+                    rj1 = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1))
+                    rj2 = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2))
+                    rj3 = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3))
+                    rj = sqrt(rj1 * rj1 + rj2 * rj2 + rj3 * rj3)
+                    rj1 = rj1 / rj
+                    rj2 = rj2 / rj
+                    rj3 = rj3 / rj
+
+                    dw1 = dw(i, j, k, 1)
+                    dw2 = dw(i, j, k, 2)
+                    dw3 = dw(i, j, k, 3)
+                    dw4 = dw(i, j, k, 4)
+                    dw5 = dw(i, j, k, 5)
+
+                    a1 = (ri1 * rj1 + ri2 * rj2 + ri3 * rj3)
+                    a2 = (ri1 * rj2 - rj1 * ri2)
+                    a3 = (ri3 * rj2 - rj3 * ri2)
+                    a4 = (ri1 * rj3 - rj1 * ri3)
+                    a5 = (dw4 - dw5) * sqrt2inv
+                    a6 = (dw4 + dw5) * half
+                    a7 = (a3 * dw1 + a4 * dw2 - a2 * dw3 - a5 * a1) * sqrt2inv
+
+                    dw(i, j, k, 1) = a1 * dw1 + a2 * dw2 + a4 * dw3 + a5 * a3
+                    dw(i, j, k, 2) = -a2 * dw1 + a1 * dw2 - a3 * dw3 + a5 * a4
+                    dw(i, j, k, 3) = -a4 * dw1 + a3 * dw2 + a1 * dw3 - a5 * a2
+                    dw(i, j, k, 4) = -a7 + a6
+                    dw(i, j, k, 5) = a7 + a6
 
-               if(l == ivx .or. l == ivy .or. l == ivz) then
-
-                  ! Momentum variable. A special treatment is
-                  ! needed because it is a vector and the velocities
-                  ! are stored instead of the momentum. Set the
-                  ! coefficient ll, which defines the row of the
-                  ! matrix used later on.
+                end do
+            end do
+        end do
+
+        !  Multiply by diagonal
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    xfact = one + spectral_i(i, j, k) + spectral_j(i, j, k) &
+                            + spectral_k(i, j, k)
+                    dw(i, j, k, 1) = dw(i, j, k, 1) * xfact
+                    dw(i, j, k, 2) = dw(i, j, k, 2) * xfact
+                    dw(i, j, k, 3) = dw(i, j, k, 3) * xfact
+                    dw(i, j, k, 4) = dw(i, j, k, 4) * xfact
+                    dw(i, j, k, 5) = dw(i, j, k, 5) * xfact
+                end do
+            end do
+        end do
+
+        if (il .gt. 2) then
+
+            !  Inversion in i
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 1, il
+                        if (viscous) mut = rlv(i, j, k) + rlv(i + 1, j, k)
+                        if (eddyModel) mut = mut + rev(i, j, k) + rev(i + 1, j, k)
+
+                        volfact = one / (vol(i, j, k) + vol(i + 1, j, k))
+                        metterm = (si(i, j, k, 1) * si(i, j, k, 1) &
+                                   + si(i, j, k, 2) * si(i, j, k, 2) &
+                                   + si(i, j, k, 3) * si(i, j, k, 3))
+                        mettermi(i) = metterm * mut * volfact
+                    end do
+
+                    do i = 2, il
+
+                        viscTerm1 = mettermi(i) / vol(i, j, k) / w(i, j, k, irho)
+                        viscTerm3 = mettermi(i - 1) / vol(i, j, k) / w(i, j, k, irho)
+                        viscTerm2 = viscTerm1 + viscTerm3
+
+                        volhalf = half / vol(i, j, k)
+                        ri1 = volhalf * (si(i, j, k, 1) + si(i - 1, j, k, 1))
+                        ri2 = volhalf * (si(i, j, k, 2) + si(i - 1, j, k, 2))
+                        ri3 = volhalf * (si(i, j, k, 3) + si(i - 1, j, k, 3))
+                        metterm = ri1 * ri1 + ri2 * ri2 + ri3 * ri3
+                        eps2 = epsval * epsval * cInf2 * metterm
+                        diagPlus(1) = half * (qq_i(i, j, k) + fac * sqrt(qq_i(i, j, k)**2 + eps2))
+                        diagPlus(2) = diagPlus(1)
+                        diagPlus(3) = diagPlus(1)
+                        diagPlus(4) = half * (qq_i(i, j, k) + cc_i(i, j, k) &
+                                              + fac * sqrt((qq_i(i, j, k) + cc_i(i, j, k))**2 + eps2))
+                        diagPlus(5) = half * (qq_i(i, j, k) - cc_i(i, j, k) &
+                                              + fac * sqrt((qq_i(i, j, k) - cc_i(i, j, k))**2 + eps2))
+                        diagMinus(1) = half * (qq_i(i, j, k) - fac * sqrt(qq_i(i, j, k)**2 + eps2))
+                        diagMinus(2) = diagMinus(1)
+                        diagMinus(3) = diagMinus(1)
+                        diagMinus(4) = half * (qq_i(i, j, k) + cc_i(i, j, k) &
+                                               - fac * sqrt((qq_i(i, j, k) + cc_i(i, j, k))**2 + eps2))
+                        diagMinus(5) = half * (qq_i(i, j, k) - cc_i(i, j, k) &
+                                               - fac * sqrt((qq_i(i, j, k) - cc_i(i, j, k))**2 + eps2))
+                        do n = 1, 5
+                            bbi(i + 1, n) = -viscTerm1 - diagPlus(n)
+                            ddi(i - 1, n) = -viscTerm3 + diagMinus(n)
+                            cci(i, n) = viscTerm2 + diagPlus(n) - diagMinus(n)
+                        end do
+                    end do
+
+                    do n = 1, 5
+                        bbi(ie, n) = zero
+                        ddi(1, n) = zero
+                        do i = 2, il
+                            bbi(i, n) = bbi(i, n) * dual_dt(i, j, k) * max(real(iblank(i, j, k), realType), zero)
+                            ddi(i, n) = ddi(i, n) * dual_dt(i, j, k) * max(real(iblank(i, j, k), realType), zero)
+         cci(i,n)=one+cci(i,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)+spectral_j(i,j,k)+spectral_k(i,j,k)
+                            ffi(i, n) = dw(i, j, k, n)
+                        end do
+                    end do
+
+                    call tridiagsolve(bbi, cci, ddi, ffi, il)
+
+                    do n = 1, 5
+                        do i = 2, il
+                            dw(i, j, k, n) = ffi(i, n)
+                        end do
+                    end do
 
-                  if(l == ivx) ll = 3*sps - 2
-                  if(l == ivy) ll = 3*sps - 1
-                  if(l == ivz) ll = 3*sps
+                end do
+            end do
+
+        end if
+        !       Multiply by T_zeta^inv T_xi
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    ri1 = half * (si(i, j, k, 1) + si(i - 1, j, k, 1))
+                    ri2 = half * (si(i, j, k, 2) + si(i - 1, j, k, 2))
+                    ri3 = half * (si(i, j, k, 3) + si(i - 1, j, k, 3))
+                    ri = sqrt(ri1 * ri1 + ri2 * ri2 + ri3 * ri3)
+                    ri1 = ri1 / ri
+                    ri2 = ri2 / ri
+                    ri3 = ri3 / ri
+
+                    rk1 = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1))
+                    rk2 = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2))
+                    rk3 = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3))
+                    rk = sqrt(rk1 * rk1 + rk2 * rk2 + rk3 * rk3)
+                    rk1 = rk1 / rk
+                    rk2 = rk2 / rk
+                    rk3 = rk3 / rk
+
+                    dw1 = dw(i, j, k, 1)
+                    dw2 = dw(i, j, k, 2)
+                    dw3 = dw(i, j, k, 3)
+                    dw4 = dw(i, j, k, 4)
+                    dw5 = dw(i, j, k, 5)
+
+                    a1 = ri1 * rk1 + ri2 * rk2 + ri3 * rk3
+                    a2 = rk1 * ri2 - ri1 * rk2
+                    a3 = rk3 * ri2 - ri3 * rk2
+                    a4 = rk1 * ri3 - ri1 * rk3
+                    a5 = (dw4 - dw5) * sqrt2inv
+                    a6 = (dw4 + dw5) * half
+                    a7 = (a3 * dw1 + a4 * dw2 - a2 * dw3 - a5 * a1) * sqrt2inv
+
+                    dw(i, j, k, 1) = a1 * dw1 + a2 * dw2 + a4 * dw3 + a5 * a3
+                    dw(i, j, k, 2) = -a2 * dw1 + a1 * dw2 - a3 * dw3 + a5 * a4
+                    dw(i, j, k, 3) = -a4 * dw1 + a3 * dw2 + a1 * dw3 - a5 * a2
+                    dw(i, j, k, 4) = -a7 + a6
+                    dw(i, j, k, 5) = a7 + a6
+                end do
+            end do
+        end do
+
+        !  Multiply by diagonal
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    xfact = one + spectral_i(i, j, k) + spectral_j(i, j, k) &
+                            + spectral_k(i, j, k)
+                    dw(i, j, k, 1) = dw(i, j, k, 1) * xfact
+                    dw(i, j, k, 2) = dw(i, j, k, 2) * xfact
+                    dw(i, j, k, 3) = dw(i, j, k, 3) * xfact
+                    dw(i, j, k, 4) = dw(i, j, k, 4) * xfact
+                    dw(i, j, k, 5) = dw(i, j, k, 5) * xfact
+                end do
+            end do
+        end do
+
+        if (kl .gt. 2) then
+
+            !  Inversion in k
+
+            do j = 2, jl
+                do i = 2, il
+                    do k = 1, kl
+                        if (viscous) mut = rlv(i, j, k) + rlv(i, j, k + 1)
+                        if (eddyModel) mut = mut + rev(i, j, k) + rev(i, j, k + 1)
+
+                        volfact = 1./(vol(i, j, k) + vol(i, j, k + 1))
+                        metterm = sk(i, j, k, 1) * sk(i, j, k, 1) &
+                                  + sk(i, j, k, 2) * sk(i, j, k, 2) &
+                                  + sk(i, j, k, 3) * sk(i, j, k, 3)
+                        mettermk(k) = metterm * mut * volfact
+                    end do
+
+                    do k = 2, kl
+
+                        viscTerm1 = mettermk(k) / vol(i, j, k) / w(i, j, k, irho)
+                        viscTerm3 = mettermk(k - 1) / vol(i, j, k) / w(i, j, k, irho)
+                        viscTerm2 = viscTerm1 + viscTerm3
+
+                        volhalf = half / vol(i, j, k)
+                        rk1 = volhalf * (sk(i, j, k, 1) + sj(i, j, k - 1, 1))
+                        rk2 = volhalf * (sk(i, j, k, 2) + sj(i, j, k - 1, 2))
+                        rk3 = volhalf * (sk(i, j, k, 3) + sj(i, j, k - 1, 3))
+                        metterm = rk1 * rk1 + rk2 * rk2 + rk3 * rk3
+                        eps2 = epsval * epsval * cInf2 * metterm
+                        diagPlus(1) = half * (qq_k(i, j, k) + fac * sqrt(qq_k(i, j, k)**2 + eps2))
+                        diagPlus(2) = diagPlus(1)
+                        diagPlus(3) = diagPlus(1)
+                        diagPlus(4) = half * (qq_k(i, j, k) + cc_k(i, j, k) &
+                                              + fac * sqrt((qq_k(i, j, k) + cc_k(i, j, k))**2 + eps2))
+                        diagPlus(5) = half * (qq_k(i, j, k) - cc_k(i, j, k) &
+                                              + fac * sqrt((qq_k(i, j, k) - cc_k(i, j, k))**2 + eps2))
+                        diagMinus(1) = half * (qq_k(i, j, k) - fac * sqrt(qq_k(i, j, k)**2 + eps2))
+                        diagMinus(2) = diagMinus(1)
+                        diagMinus(3) = diagMinus(1)
+                        diagMinus(4) = half * (qq_k(i, j, k) + cc_k(i, j, k) &
+                                               - fac * sqrt((qq_k(i, j, k) + cc_k(i, j, k))**2 + eps2))
+                        diagMinus(5) = half * (qq_k(i, j, k) - cc_k(i, j, k) &
+                                               - fac * sqrt((qq_k(i, j, k) - cc_k(i, j, k))**2 + eps2))
+
+                        do n = 1, 5
+                            bbk(k + 1, n) = -viscTerm1 - diagPlus(n)
+                            ddk(k - 1, n) = -viscTerm3 + diagMinus(n)
+                            cck(k, n) = viscTerm2 + diagPlus(n) - diagMinus(n)
+                        end do
+                    end do
+
+                    do n = 1, 5
+                        bbk(ke, n) = zero
+                        ddk(1, n) = zero
+                        do k = 2, kl
+                            bbk(k, n) = bbk(k, n) * dual_dt(i, j, k) * max(real(iblank(i, j, k), realType), zero)
+                            ddk(k, n) = ddk(k, n) * dual_dt(i, j, k) * max(real(iblank(i, j, k), realType), zero)
+         cck(k,n)=one+cck(k,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)+spectral_i(i,j,k)+spectral_j(i,j,k)
+                            ffk(k, n) = dw(i, j, k, n)
+                        end do
+                    end do
+
+                    call tridiagsolve(bbk, cck, ddk, ffk, kl)
+
+                    do n = 1, 5
+                        do k = 2, kl
+                            dw(i, j, k, n) = ffk(k, n)
+                        end do
+                    end do
 
-                  ! Add the contribution of wps to the correction
-                  ! of the time derivative. The difference between
-                  ! the current time derivative and the one when
-                  ! entering this grid level must be added, because
-                  ! the residual forcing term only takes the spatial
-                  ! part of the coarse grid residual into account.
-
-                  do k=2,kl
-                     do j=2,jl
-                        do i=2,il
-
-                           ! Store the matrix vector product with the
-                           ! momentum in tmp.
-
-                           tmp = dvector(jj,ll,ii+1)                &
-                                * (wsp( i,j,k,irho)*wsp( i,j,k,ivx)  &
-                                -  wsp1(i,j,k,irho)*wsp1(i,j,k,ivx)) &
-                                +  dvector(jj,ll,ii+2)               &
-                                * (wsp( i,j,k,irho)*wsp( i,j,k,ivy)  &
-                                -  wsp1(i,j,k,irho)*wsp1(i,j,k,ivy)) &
-                                + dvector(jj,ll,ii+3)                &
-                                * (wsp( i,j,k,irho)*wsp( i,j,k,ivz)  &
-                                -  wsp1(i,j,k,irho)*wsp1(i,j,k,ivz))
+                end do
+            end do
+        end if
+
+        !       Multiply by T_zeta
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    uvel = w(i, j, k, ivx)
+                    vvel = w(i, j, k, ivy)
+                    wvel = w(i, j, k, ivz)
+
+                    rk1 = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1))
+                    rk2 = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2))
+                    rk3 = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3))
+
+                    rk = sqrt(rk1 * rk1 + rk2 * rk2 + rk3 * rk3)
+                    uu = uvel * rk1 + vvel * rk2 + wvel * rk3
+
+                    rk1 = rk1 / rk
+                    rk2 = rk2 / rk
+                    rk3 = rk3 / rk
+
+                    uvw = half * (uvel * uvel + vvel * vvel + wvel * wvel)
+                    cijkinv = sqrt(w(i, j, k, irho) / gamma(i, j, k) / p(i, j, k))
+                    alph = w(i, j, k, irho) * cijkinv * sqrt2inv
+                    xfact = two / cijkinv
+
+                    ge = gamma(i, j, k) * w(i, j, k, irhoE) / w(i, j, k, irho) - &
+                         (gamma(i, j, k) - one) * uvw
+
+                    dw1 = dw(i, j, k, 1)
+                    dw2 = dw(i, j, k, 2)
+                    dw3 = dw(i, j, k, 3)
+                    dw4 = dw(i, j, k, 4) * alph
+                    dw5 = dw(i, j, k, 5) * alph
+
+                    a1 = dw1 * rk1 + dw2 * rk2 + dw3 * rk3 + dw4 + dw5
+                    a2 = half * xfact * (dw4 - dw5)
+                    a3 = uvw * (rk1 * dw1 + rk2 * dw2 + rk3 * dw3)
+
+                    dw(i, j, k, 1) = a1
+                    dw(i, j, k, 2) = a1 * uvel - w(i, j, k, irho) * (rk3 * dw2 - rk2 * dw3) &
+                                     + a2 * rk1
+                    dw(i, j, k, 3) = a1 * vvel - w(i, j, k, irho) * (rk1 * dw3 - rk3 * dw1) &
+                                     + a2 * rk2
+                    dw(i, j, k, 4) = a1 * wvel - w(i, j, k, irho) * (rk2 * dw1 - rk1 * dw2) &
+                                     + a2 * rk3
+                    dw(i, j, k, 5) = a3 + w(i, j, k, irho) * &
+                                     ((vvel * rk3 - wvel * rk2) * dw1 &
+                                      + (wvel * rk1 - uvel * rk3) * dw2 &
+                                      + (uvel * rk2 - vvel * rk1) * dw3) &
+                                     + (ge + half * xfact * uu / rk) * dw4 &
+                                     + (ge - half * xfact * uu / rk) * dw5
 
-                           ! Add tmp to the residual. Multiply it by
-                           ! the volume to obtain the finite volume
-                           ! formulation of the  derivative of the
-                           ! momentum.
-
-                           dw(i,j,k,l) = dw(i,j,k,l) + tmp*volsp(i,j,k)
-
-                        enddo
-                     enddo
-                  enddo
-
-               else
+                end do
+            end do
+        end do
+
+        !      For consistency with update.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    volfact = -one / vol(i, j, k)
+                    dw(i, j, k, 1) = dw(i, j, k, 1) * volfact
+                    dw(i, j, k, 2) = dw(i, j, k, 2) * volfact
+                    dw(i, j, k, 3) = dw(i, j, k, 3) * volfact
+                    dw(i, j, k, 4) = dw(i, j, k, 4) * volfact
+                    dw(i, j, k, 5) = dw(i, j, k, 5) * volfact
+                end do
+            end do
+        end do
+
+    end subroutine computedwDADI
+
+    subroutine tridiagsolve(bb, cc, dd, ff, nn)
+        use precision
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType) :: nn
+        real(kind=realType), dimension(nn + 1, 5) :: bb, cc, dd, ff
+
+        !     local variables
+
+        integer(kind=intType) :: m, n
+        real(kind=realType) :: d0, d2
+
+        do n = 1, 5
+            m = 2
+            d0 = 1./cc(m, n)
+            dd(m, n) = dd(m, n) * d0
+            ff(m, n) = ff(m, n) * d0
+
+            do m = 3, nn
+                d2 = bb(m, n)
+                d0 = 1./(cc(m, n) - d2 * dd(m - 1, n))
+                ff(m, n) = (ff(m, n) - d2 * ff(m - 1, n)) * d0
+                dd(m, n) = dd(m, n) * d0
+            end do
+
+            do m = nn - 1, 2, -1
+                ff(m, n) = ff(m, n) - dd(m, n) * ff(m + 1, n)
+            end do
+
+        end do
+
+    end subroutine tridiagsolve
+
+    subroutine residualAveraging
+        !
+        !       Implicit residual smoothing is a simple procedure that
+        !       replaces the residual at each point by a weighted sum of all
+        !       of the residuals in the block (although the residuals that are
+        !       closer to the cell under consideration are weighted more
+        !       heavily). This smoothing can be applied explicitly, but may
+        !       result in zero smoothed residuals for non-zero initial
+        !       residual modes.  For this reason, the smoothing is applied
+        !       implicitly in the following form:
+        !       -epz R{i+1} + (1 + 2 epz) R{i} -epz R{i-1} = r{i}
+        !       Where r{i} is the original residual at point i, and R{i} is
+        !       the implicitly smoothed residual at point i.  The analysis for
+        !       the 1-D scalar convection-diffusion equation shows that if
+        !       Epz >= (1/4)*((lambda/lambda*)^2 -1),
+        !       where lambda is the cfl number desired to be used, and
+        !       lambda* is the CFL limit of the unsmoothed scheme, the scheme
+        !       can be made unconditionally stable (arbitrarily large lambda).
+        !       In practice, lambda = 6-8 is common for Euler solutions.  For
+        !       RANS solutions lambda = 3-4 is what we have used in practice
+        !       resulting in a slight improvement in convergence rate, but,
+        !       more importantly, an increase in the robustness of the solver.
+        !       Note that this theoretical result can be shown for infinite
+        !       1-D problems, but also for finite-periodic 1-D problems and
+        !       finite-size 1-D problems (i.e. with block boundaries).  Such
+        !       theoretical results are not available for 3-D cases, where the
+        !       scheme is applied in an ADI fashion:
+        !       (1 -epzI d_ii)(1 -epzJ d_jj)(1 -epzK d_kk) r = r
+        !       Where d_ii, d_jj, d_kk are second difference operators in each
+        !       of the mesh coordinate directions.
+        !       For each of the coordinate direction solves, the initial
+        !       matrix problem is of the form:
+        !         -                                             - - -   - -
+        !         | (1+2 epz)   -epz                            | |r|   |r|
+        !         |   -epz    (1 +2 epz)    -epz                | |r|   |r|
+        !         |             -epz       (1 + 2 epz)   -epz   | |r| = |r|
+        !         |                 .           .           .   | |.|   |.|
+        !         |                   .            .          . | |.|   |.|
+        !         -                                             - - -   - -
+        !       And after the forward elimination phase a normalization is
+        !       applied so the result looks like:
+        !         -                   - - -   - -
+        !         |  1  -d            | |r|   |r|
+        !         |  0   1  -d        | |r|   |r|
+        !         |      0   1  -d    | |r| = |r|
+        !         |       .   .   .   | |.|   |.|
+        !         |          .  .   . | |.|   |.|
+        !         -                   - - -   - -
+        !       Which can then be used with a straightforward backsolve to
+        !       obtain the answer.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use blockPointers, only: nx, ny, nz, il, jl, kl, dw, p, iBlank
+        use inputIteration, only: cfl, cflCoarse, cflLimit, smoop
+        use flowVarRefState, only: pInfCorr, nwf
+        use iteration, only: currentLevel
+
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, l
+
+        real(kind=realType), parameter :: b = 2.0_realType
+
+        real(kind=realType) :: currentCfl, rfl0, plim
+        real(kind=realType) :: dpi, dpj, dpk, r
+        real(kind=realType), dimension(il, max(jl, kl)) :: epz, d, t, rfl
+
+        !      rfl0 is a measure of the ratio lambda/lambda*
+        !
+        currentCfl = cflCoarse
+        if (currentLevel == 1) then
+            currentCfl = cfl
+        end if
+
+        rfl0 = half * currentCfl / cflLimit
+
+        plim = 0.001_realType * pInfCorr
+
+        !       Smoothing in the i-direction. Only done when enough cells are
+        !       present in the i-direction.
+        !
+        if (nx > 1) then
+
+            do k = 2, kl
+
+                ! Compute smoothing coeficients
+
+                do j = 2, jl
+                    do i = 2, il
+                        dpi = abs(p(i + 1, j, k) - two * p(i, j, k) + p(i - 1, j, k)) &
+                              / (p(i + 1, j, k) + two * p(i, j, k) + p(i - 1, j, k) + plim)
+                        dpj = abs(p(i, j + 1, k) - two * p(i, j, k) + p(i, j - 1, k)) &
+                              / (p(i, j + 1, k) + two * p(i, j, k) + p(i, j - 1, k) + plim)
+                        dpk = abs(p(i, j, k + 1) - two * p(i, j, k) + p(i, j, k - 1)) &
+                              / (p(i, j, k + 1) + two * p(i, j, k) + p(i, j, k - 1) + plim)
+                        rfl(i, j) = one / (one + b * (dpi + dpj + dpk))
+                    end do
+                end do
 
-                  ! Scalar variable. Loop over the owned cells
-                  ! to add the contribution of wsp to the correction
-                  ! of the time derivative.
+                do j = 2, jl
+                    do i = 2, nx
+                        r = rfl0 * (rfl(i, j) + rfl(i + 1, j))
+                        epz(i, j) = fourth * smoop * dim(r * r, one) * max(real(iblank(i, j, k), realType), zero)
+                    end do
+                end do
 
-                  do k=2,kl
-                     do j=2,jl
-                        do i=2,il
-                           dw(i,j,k,l) = dw(i,j,k,l)        &
-                                + dscalar(jj,sps,mm) &
-                                * volsp(i,j,k)       &
-                                * (wsp(i,j,k,l) - wsp1(i,j,k,l))
-                        enddo
-                     enddo
-                  enddo
-
-               endif
-
-            enddo varLoopCoarse
-
-         enddo timeLoopCoarse
-      endif spectralLevelTest
-#endif
-   end select
-
-   ! Set the residual in the halo cells to zero. This is just
-   ! to avoid possible problems. Their values do not matter.
-
-   do l=varStart,varEnd
-      do k=0,kb
-         do j=0,jb
-            dw(0,j,k,l)  = zero
-            dw(1,j,k,l)  = zero
-            dw(ie,j,k,l) = zero
-            dw(ib,j,k,l) = zero
-         enddo
-      enddo
-
-      do k=0,kb
-         do i=2,il
-            dw(i,0,k,l)  = zero
-            dw(i,1,k,l)  = zero
-            dw(i,je,k,l) = zero
-            dw(i,jb,k,l) = zero
-         enddo
-      enddo
-
-      do j=2,jl
-         do i=2,il
-            dw(i,j,0,l)  = zero
-            dw(i,j,1,l)  = zero
-            dw(i,j,ke,l) = zero
-            dw(i,j,kb,l) = zero
-         enddo
-      enddo
-   enddo
-
- end subroutine initres_block
-
-
-  ! ----------------------------------------------------------------------
-  !                                                                      |
-  !                    No Tapenade Routine below this line               |
-  !                                                                      |
-  ! ----------------------------------------------------------------------
+                ! Zero out coefficients for boundary condition treatment
 
-#ifndef USE_TAPENADE
-  subroutine initres(varStart, varEnd)
-    !
-    ! Shell function to call initRes_block on all blocks
-    !
-    use blockPointers
-    use constants
-    use inputTimeSpectral
-    use iteration
-    use section
-    use utils, only : setPointers
-    !
-    !      Subroutine argument.
-    !
-    integer(kind=intType), intent(in) :: varStart, varEnd
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: sps, nn
-
-    ! Loop over the number of spectral solutions.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-
-       ! Loop over the number of blocks.
-
-       domains: do nn=1,nDom
-
-          ! Set the pointers for this block.
-
-          call setPointers(nn, currentLevel, sps)
-
-          call initres_block(varStart, varEnd, nn, sps)
-
-       end do domains
-
-    end do spectralLoop
-
-  end subroutine initRes
-
-  subroutine sourceTerms
-
-    ! Shell function to call sourceTerms_block on all blocks
-
-    use constants
-    use blockPointers
-    use utils, only : setPointers
-    use actuatorRegionData
-    implicit none
-
-    integer(kind=intType) :: nn, iRegion
-    real(kind=realType) :: dummy
-
-    ! Loop over the number of domains.
-    domains: do nn=1,nDom
-
-       ! Set the pointers for this block.
-       call setPointers(nn, 1, 1)
-       do iRegion=1, nActuatorRegions
-          call sourceTerms_block(nn, .True., iRegion, dummy)
-       end do
-    end do domains
-
-  end subroutine sourceTerms
-
-  subroutine residual
-    !
-    ! Shell function to call residual_block on all blocks
-    !
-    use blockPointers
-    use constants
-    use inputTimeSpectral
-    use Iteration
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: sps, nn
-
-    ! Loop over the number of spectral solutions.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-
-       ! Loop over the number of blocks.
-
-       domains: do nn=1,nDom
-
-          ! Set the pointers for this block.
-
-          call setPointers(nn, currentLevel, sps)
-
-          call residual_block
-
-       end do domains
-
-    end do spectralLoop
-  end subroutine residual
-
-  subroutine computedwDADI
-    !
-    !       executeRkStage executes one runge kutta stage. The stage
-    !       number, rkStage, is defined in the local module iteration.
-    !
-    use blockPointers
-    use constants
-    use flowVarRefState
-    use inputIteration
-    use inputPhysics
-    use inputTimeSpectral
-    use inputUnsteady
-    use iteration
-    implicit none
-    !
-    !      Local parameter.
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, n
-    real(kind=realType) :: gm1, epsval, fac, eps2
-    real(kind=realType) :: uvel, vvel, wvel, cijk, cijkinv, c2inv, uvw
-    real(kind=realType) :: ri1, ri2, ri3, rj1, rj2, rj3, rk1, rk2, rk3, uu
-    real(kind=realType) :: ri, rj, rk, qsi, qsj, qsk, currentCfl
-    real(kind=realType) :: xfact,  cInf, cInf2
-    real(kind=realType) :: dw1, dw2, dw3, dw4, dw5
-    real(kind=realType) :: a1, a2, a3, a4, a5, a6, a7, mut, ge
-    real(kind=realType) :: viscTerm1, viscTerm2, viscTerm3
-    real(kind=realType) :: metterm
-    real(kind=realType) :: unsteadyImpl, mult
-    real(kind=realType) :: sqrt2, sqrt2inv, alph, alphinv
-    real(kind=realType) :: volhalf, volhalfrho, volfact
-    real(kind=realType) :: mutpI, mutmI, mutpJ, mutmJ, mutpK, mutmK, &
-         mettermp,mettermm, &
-         viscTermI,viscTermJ,viscTermK
-
-    real(kind=realType), dimension(:,:,:), pointer:: qq_i,qq_j,qq_k, &
-         cc_i,cc_j,cc_k,spectral_i,spectral_j,spectral_k,dual_dt
-    real(kind=realType), dimension(ie,5) :: bbi,cci,ddi,ffi
-    real(kind=realType), dimension(je,5) :: bbj,ccj,ddj,ffj
-    real(kind=realType), dimension(ke,5) :: bbk,cck,ddk,ffk
-    real(kind=realType), dimension(ie) :: mettermi
-    real(kind=realType), dimension(je) :: mettermj
-    real(kind=realType), dimension(ke) :: mettermk
-    real(kind=realType), dimension(5) :: diagPlus,diagMinus
-
-
-    ! Set the value of the current cfl number,
-    ! depending on the situation. On the finest grid in the mg cycle
-    ! the second halo is computed, otherwise not.
-
-    currentCfl = cflCoarse
-    if (currentLevel == 1) then
-       currentCfl = cfl
-    end if
-    qq_i => scratch(:,:,:,1)
-    qq_j => scratch(:,:,:,2)
-    qq_k => scratch(:,:,:,3)
-    cc_i => scratch(:,:,:,4)
-    cc_j => scratch(:,:,:,5)
-    cc_k => scratch(:,:,:,6)
-    spectral_i => scratch(:,:,:,7)
-    spectral_j => scratch(:,:,:,8)
-    spectral_k => scratch(:,:,:,9)
-    dual_dt => scratch(:,:,:,10)
-
-    !  havent thought about iblank
-
-    !   these are factors for robustness.
-
-    epsval = 0.08
-    fac    = 1.05
-    mut    = zero
-    cInf2  = gammaInf*pInf/rhoInf
-    cInf   = sqrt(cInf2)
-
-
-    qsi = zero
-    qsj = zero
-    qsk = zero
-    viscTermi=zero
-    viscTermj=zero
-    viscTermk=zero
-    sqrt2=sqrt(two)
-    sqrt2inv=one/sqrt2
-
-
-    if(equationMode.eq.steady) then
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-                dual_dt(i,j,k)= currentCfl*dtl(i,j,k)*vol(i,j,k)
-             enddo
-          enddo
-       enddo
-    else
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-                unsteadyImpl = coefTime(0)*timeRef/deltaT
-                mult = currentCfl*dtl(i,j,k)
-                mult = mult/(mult*unsteadyImpl*vol(i,j,k) + one)
-                dual_dt(i,j,k)   = mult*vol(i,j,k)
-             enddo
-          enddo
-       enddo
-    endif
-
-    !     Set up some arrays
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             volhalf    = half/vol(i,j,k)
-             volhalfrho = volhalf/w(i,j,k,irho)
-             uvel  = w(i,j,k,ivx)
-             vvel  = w(i,j,k,ivy)
-             wvel  = w(i,j,k,ivz)
-
-             cijk  = sqrt(gamma(i,j,k)*p(i,j,k)/w(i,j,k,irho))
-
-             ri1  = volhalf*(si(i,j,k,1) + si(i-1,j,k,1))
-             ri2  = volhalf*(si(i,j,k,2) + si(i-1,j,k,2))
-             ri3  = volhalf*(si(i,j,k,3) + si(i-1,j,k,3))
-
-             rj1  = volhalf*(sj(i,j,k,1) + sj(i,j-1,k,1))
-             rj2  = volhalf*(sj(i,j,k,2) + sj(i,j-1,k,2))
-             rj3  = volhalf*(sj(i,j,k,3) + sj(i,j-1,k,3))
-
-             rk1  = volhalf*(sk(i,j,k,1) + sk(i,j,k-1,1))
-             rk2  = volhalf*(sk(i,j,k,2) + sk(i,j,k-1,2))
-             rk3  = volhalf*(sk(i,j,k,3) + sk(i,j,k-1,3))
-
-             if( addGridVelocities ) then
-                qsi = (sFaceI(i-1,j,k) + sFaceI(i,j,k))*volhalf
-                qsj = (sFaceJ(i,j-1,k) + sFaceJ(i,j,k))*volhalf
-                qsk = (sFaceK(i,j,k-1) + sFaceK(i,j,k))*volhalf
-             endif
-
-
-             qq_i(i,j,k) = ri1*w(i,j,k,ivx) + ri2*w(i,j,k,ivy) +  &
-                  ri3*w(i,j,k,ivz) - qsi
-             qq_j(i,j,k) = rj1*w(i,j,k,ivx) + rj2*w(i,j,k,ivy) +  &
-                  rj3*w(i,j,k,ivz) - qsj
-             qq_k(i,j,k) = rk1*w(i,j,k,ivx) + rk2*w(i,j,k,ivy) +  &
-                  rk3*w(i,j,k,ivz) - qsk
-
-             ri   = sqrt(ri1*ri1+ri2*ri2+ri3*ri3)
-             rj   = sqrt(rj1*rj1+rj2*rj2+rj3*rj3)
-             rk   = sqrt(rk1*rk1+rk2*rk2+rk3*rk3)
-
-             cc_i(i,j,k) = cijk*ri
-             cc_j(i,j,k) = cijk*rj
-             cc_k(i,j,k) = cijk*rk
-             if( viscous )  then
-                mutpI = rlv(i,j,k)+rlv(i+1,j,k)
-                mutmI = rlv(i,j,k)+rlv(i-1,j,k)
-                mutpJ = rlv(i,j,k)+rlv(i,j+1,k)
-                mutmJ = rlv(i,j,k)+rlv(i,j-1,k)
-                mutpK = rlv(i,j,k)+rlv(i,j,k+1)
-                mutmK = rlv(i,j,k)+rlv(i,j,k-1)
-                if( eddyModel ) then
-                   mutpI = mutpI+rev(i,j,k)+rev(i+1,j,k)
-                   mutmI = mutpI+rev(i,j,k)+rev(i-1,j,k)
-                   mutpJ = mutpJ+rev(i,j,k)+rev(i,j+1,k)
-                   mutmJ = mutpJ+rev(i,j,k)+rev(i,j-1,k)
-                   mutpK = mutpK+rev(i,j,k)+rev(i,j,k+1)
-                   mutmK = mutpK+rev(i,j,k)+rev(i,j,k-1)
-                endif
-
-                volfact  =   two/(vol(i,j,k)+vol(i+1,j,k))
-                mettermp =   (si(i,j,k,1)*si(i,j,k,1) &
-                     +si(i,j,k,2)*si(i,j,k,2) &
-                     +si(i,j,k,3)*si(i,j,k,3))*mutpI*volfact
-                volfact  =   two/(vol(i,j,k)+vol(i-1,j,k))
-                mettermm =   (si(i-1,j,k,1)*si(i-1,j,k,1) &
-                     +si(i-1,j,k,2)*si(i-1,j,k,2) &
-                     +si(i-1,j,k,3)*si(i-1,j,k,3))*mutmI*volfact
-                viscTermi=   (mettermp+mettermm)*volhalfrho
-
-                volfact  =   two/(vol(i,j,k)+vol(i,j+1,k))
-                mettermp =   (sj(i,j,k,1)*sj(i,j,k,1) &
-                     +sj(i,j,k,2)*sj(i,j,k,2) &
-                     +sj(i,j,k,3)*sj(i,j,k,3))*mutpJ*volfact
-                volfact  =   two/(vol(i,j,k)+vol(i,j-1,k))
-                mettermm =   (sj(i,j-1,k,1)*sj(i,j-1,k,1) &
-                     +sj(i,j-1,k,2)*sj(i,j-1,k,2) &
-                     +sj(i,j-1,k,3)*sj(i,j-1,k,3))*mutmJ*volfact
-                viscTermj=   (mettermp+mettermm)*volhalfrho
-
-                volfact  =   two/(vol(i,j,k)+vol(i,j,k+1))
-                mettermp =   (sk(i,j,k,1)*sk(i,j,k,1) &
-                     +sk(i,j,k,2)*sk(i,j,k,2) &
-                     +sk(i,j,k,3)*sk(i,j,k,3))*mutpK*volfact
-                volfact  =   two/(vol(i,j,k)+vol(i,j,k-1))
-                mettermm =   (sk(i,j,k-1,1)*sk(i,j,k-1,1) &
-                     +sk(i,j,k-1,2)*sk(i,j,k-1,2) &
-                     +sk(i,j,k-1,3)*sk(i,j,k-1,3))*mutmK*volfact
-                viscTermk=   (mettermp+mettermm)*volhalfrho
-
-             endif
-
-             eps2 = ri*cInf*epsval
-             spectral_i(i,j,k) = (fac*sqrt((abs(qq_i(i,j,k))+cc_i(i,j,k))**2 &
-                  + eps2**2)+viscTermi)*dual_dt(i,j,k)
-             eps2 = rj*cInf*epsval
-             spectral_j(i,j,k) = (fac*sqrt((abs(qq_j(i,j,k))+cc_j(i,j,k))**2 &
-                  + eps2**2)+viscTermj)*dual_dt(i,j,k)
-             eps2 = rk*cInf*epsval
-             spectral_k(i,j,k) = (fac*sqrt((abs(qq_k(i,j,k))+cc_k(i,j,k))**2 &
-                  + eps2**2)+viscTermk)*dual_dt(i,j,k)
-             spectral_i(i,j,k)=spectral_i(i,j,k)*zero
-             spectral_j(i,j,k)=spectral_j(i,j,k)*zero
-             spectral_k(i,j,k)=spectral_k(i,j,k)*zero
-          enddo
-       enddo
-    enddo
-
-    !       Multiply by T_eta^inv
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             gm1   = gamma(i,j,k)-one
-             cijk  = sqrt(gamma(i,j,k)*p(i,j,k)/w(i,j,k,irho))
-             c2inv = one/(cijk*cijk)
-             xfact = two*cijk
-             alphinv  = sqrt2*cijk/w(i,j,k,irho)
-
-             uvel  = w(i,j,k,ivx)
-             vvel  = w(i,j,k,ivy)
-             wvel  = w(i,j,k,ivz)
-             uvw   = half*(uvel*uvel+vvel*vvel+wvel*wvel)
-
-             rj1  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))
-             rj2  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))
-             rj3  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))
-             rj   = sqrt(rj1*rj1+rj2*rj2+rj3*rj3)
-             uu   = uvel*rj1+vvel*rj2+wvel*rj3
-             rj1  = rj1/rj
-             rj2  = rj2/rj
-             rj3  = rj3/rj
-
-             dw1   = dw(i,j,k,1)
-             dw2   = dw(i,j,k,2)
-             dw3   = dw(i,j,k,3)
-             dw4   = dw(i,j,k,4)
-             dw5   = dw(i,j,k,5)
-
-             a1    =  dw2*uvel+dw3*vvel+dw4*wvel-dw5
-             a1    =  a1*gm1*c2inv+dw1*(one-uvw*gm1*c2inv)
-
-             a2    =  (rj2*wvel-rj3*vvel)*dw1+rj3*dw3-rj2*dw4
-             a3    =  (rj3*uvel-rj1*wvel)*dw1+rj1*dw4-rj3*dw2
-             a4    =  (rj1*vvel-rj2*uvel)*dw1+rj2*dw2-rj1*dw3
-
-             a5    =  uvw*dw1-uvel*dw2-vvel*dw3-wvel*dw4+dw5
-             a5    =  a5*gm1*c2inv
-
-             a6    =  uu*dw1/rj-rj1*dw2-rj2*dw3-rj3*dw4
-
-             dw(i,j,k,1)  = a1*rj1+a2/w(i,j,k,irho)
-             dw(i,j,k,2)  = a1*rj2+a3/w(i,j,k,irho)
-             dw(i,j,k,3)  = a1*rj3+a4/w(i,j,k,irho)
-             dw(i,j,k,4)  = (half*a5-a6/xfact)*alphinv
-             dw(i,j,k,5)  = (half*a5+a6/xfact)*alphinv
-
-          enddo
-       enddo
-    enddo
-
-    if(jl.gt.2) then
-
-       !  Inversion in j
-
-       do k=2,kl
-          do i=2,il
-             do j=1,jl
-                if( viscous )   mut = rlv(i,j,k)+rlv(i,j+1,k)
-                if( eddyModel ) mut = mut+rev(i,j,k)+rev(i,j+1,k)
-
-                volfact=one/(vol(i,j,k)+vol(i,j+1,k))
-                metterm =   (sj(i,j,k,1)*sj(i,j,k,1) &
-                     +sj(i,j,k,2)*sj(i,j,k,2) &
-                     +sj(i,j,k,3)*sj(i,j,k,3))
-                mettermj(j)= metterm*mut*volfact
-             enddo
-
-             do j=2,jl
-
-                viscTerm1 = mettermj(j)  /vol(i,j,k)/w(i,j,k,irho)
-                viscTerm3 = mettermj(j-1)/vol(i,j,k)/w(i,j,k,irho)
-                viscTerm2 = viscTerm1+viscTerm3
-
-                volhalf = half/vol(i,j,k)
-                rj1  = volhalf*(sj(i,j,k,1) + sj(i,j-1,k,1))
-                rj2  = volhalf*(sj(i,j,k,2) + sj(i,j-1,k,2))
-                rj3  = volhalf*(sj(i,j,k,3) + sj(i,j-1,k,3))
-                metterm     = rj1*rj1+rj2*rj2+rj3*rj3
-                eps2        = epsval*epsval*cInf2*metterm
-                diagPlus(1) =half*(qq_j(i,j,k)+fac*sqrt(qq_j(i,j,k)**2+eps2))
-                diagPlus(2) =diagPlus(1)
-                diagPlus(3) =diagPlus(1)
-                diagPlus(4) =half*(qq_j(i,j,k)+cc_j(i,j,k)   &
-                     +fac*sqrt((qq_j(i,j,k)+cc_j(i,j,k))**2+eps2))
-                diagPlus(5) =half*(qq_j(i,j,k)-cc_j(i,j,k)   &
-                     +fac*sqrt((qq_j(i,j,k)-cc_j(i,j,k))**2+eps2))
-                diagMinus(1) =half*(qq_j(i,j,k)-fac*sqrt(qq_j(i,j,k)**2+eps2))
-                diagMinus(2) =diagMinus(1)
-                diagMinus(3) =diagMinus(1)
-                diagMinus(4) =half*(qq_j(i,j,k)+cc_j(i,j,k)   &
-                     -fac*sqrt((qq_j(i,j,k)+cc_j(i,j,k))**2+eps2))
-                diagMinus(5) =half*(qq_j(i,j,k)-cc_j(i,j,k)   &
-                     -fac*sqrt((qq_j(i,j,k)-cc_j(i,j,k))**2+eps2))
-
-                do n=1,5
-                   bbj(j+1,n)= -viscTerm1-diagPlus(n)
-                   ddj(j-1,n)= -viscTerm3+diagMinus(n)
-                   ccj(j  ,n)=  viscTerm2+diagPlus(n)-diagMinus(n)
-                enddo
-             enddo
-
-             do n=1,5
-                bbj(je,n)=zero
-                ddj(1 ,n)=zero
-                do j=2,jl
-                   bbj(j,n)=bbj(j,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)
-                   ddj(j,n)=ddj(j,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)
-                   ccj(j,n)=one+ccj(j,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)+spectral_i(i,j,k)+spectral_k(i,j,k)
-                   ffj(j,n)=dw(i,j,k,n)
-                enddo
-             enddo
-
-             call tridiagsolve(bbj,ccj,ddj,ffj,jl)
-
-             do n=1,5
-                do j=2,jl
-                   dw(i,j,k,n)=ffj(j,n)
-                enddo
-             enddo
-
-          enddo
-       enddo
-
-    endif
-    !       Multiply by T_xi^inv T_eta
-
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             ri1  = half*(si(i,j,k,1) + si(i-1,j,k,1))
-             ri2  = half*(si(i,j,k,2) + si(i-1,j,k,2))
-             ri3  = half*(si(i,j,k,3) + si(i-1,j,k,3))
-             ri   = sqrt(ri1*ri1+ri2*ri2+ri3*ri3)
-             ri1  = ri1/ri
-             ri2  = ri2/ri
-             ri3  = ri3/ri
-
-             rj1  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))
-             rj2  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))
-             rj3  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))
-             rj   = sqrt(rj1*rj1+rj2*rj2+rj3*rj3)
-             rj1  = rj1/rj
-             rj2  = rj2/rj
-             rj3  = rj3/rj
-
-             dw1   = dw(i,j,k,1)
-             dw2   = dw(i,j,k,2)
-             dw3   = dw(i,j,k,3)
-             dw4   = dw(i,j,k,4)
-             dw5   = dw(i,j,k,5)
-
-             a1 = ( ri1*rj1 + ri2*rj2 + ri3*rj3 )
-             a2 = ( ri1*rj2 - rj1*ri2 )
-             a3 = ( ri3*rj2 - rj3*ri2 )
-             a4 = ( ri1*rj3 - rj1*ri3 )
-             a5 = (dw4-dw5)*sqrt2inv
-             a6 = (dw4+dw5)*half
-             a7 = (a3*dw1+a4*dw2-a2*dw3-a5*a1)*sqrt2inv
-
-             dw(i,j,k,1)=  a1*dw1 + a2*dw2 + a4*dw3 + a5*a3
-             dw(i,j,k,2)=- a2*dw1 + a1*dw2 - a3*dw3 + a5*a4
-             dw(i,j,k,3)=- a4*dw1 + a3*dw2 + a1*dw3 - a5*a2
-             dw(i,j,k,4)= -a7+a6
-             dw(i,j,k,5)=  a7+a6
-
-          enddo
-       enddo
-    enddo
-
-    !  Multiply by diagonal
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             xfact = one+spectral_i(i,j,k)+spectral_j(i,j,k) &
-                  +spectral_k(i,j,k)
-             dw(i,j,k,1)=dw(i,j,k,1)*xfact
-             dw(i,j,k,2)=dw(i,j,k,2)*xfact
-             dw(i,j,k,3)=dw(i,j,k,3)*xfact
-             dw(i,j,k,4)=dw(i,j,k,4)*xfact
-             dw(i,j,k,5)=dw(i,j,k,5)*xfact
-          enddo
-       enddo
-    enddo
-
-
-
-    if(il.gt.2) then
-
-       !  Inversion in i
-
-       do k=2,kl
-          do j=2,jl
-             do i=1,il
-                if( viscous )   mut = rlv(i,j,k)+rlv(i+1,j,k)
-                if( eddyModel ) mut = mut+rev(i,j,k)+rev(i+1,j,k)
-
-                volfact=one/(vol(i,j,k)+vol(i+1,j,k))
-                metterm    =(si(i,j,k,1)*si(i,j,k,1) &
-                     +si(i,j,k,2)*si(i,j,k,2) &
-                     +si(i,j,k,3)*si(i,j,k,3))
-                mettermi(i)=metterm*mut*volfact
-             enddo
-
-             do i=2,il
-
-                viscTerm1 = mettermi(i)  /vol(i,j,k)/w(i,j,k,irho)
-                viscTerm3 = mettermi(i-1)/vol(i,j,k)/w(i,j,k,irho)
-                viscTerm2 = viscTerm1+viscTerm3
-
-                volhalf = half/vol(i,j,k)
-                ri1  = volhalf*(si(i,j,k,1) + si(i-1,j,k,1))
-                ri2  = volhalf*(si(i,j,k,2) + si(i-1,j,k,2))
-                ri3  = volhalf*(si(i,j,k,3) + si(i-1,j,k,3))
-                metterm     = ri1*ri1+ri2*ri2+ri3*ri3
-                eps2        = epsval*epsval*cInf2*metterm
-                diagPlus(1) =half*(qq_i(i,j,k)+fac*sqrt(qq_i(i,j,k)**2+eps2))
-                diagPlus(2) =diagPlus(1)
-                diagPlus(3) =diagPlus(1)
-                diagPlus(4) =half*(qq_i(i,j,k)+cc_i(i,j,k)   &
-                     +fac*sqrt((qq_i(i,j,k)+cc_i(i,j,k))**2+eps2))
-                diagPlus(5) =half*(qq_i(i,j,k)-cc_i(i,j,k)   &
-                     +fac*sqrt((qq_i(i,j,k)-cc_i(i,j,k))**2+eps2))
-                diagMinus(1) =half*(qq_i(i,j,k)-fac*sqrt(qq_i(i,j,k)**2+eps2))
-                diagMinus(2) =diagMinus(1)
-                diagMinus(3) =diagMinus(1)
-                diagMinus(4) =half*(qq_i(i,j,k)+cc_i(i,j,k)   &
-                     -fac*sqrt((qq_i(i,j,k)+cc_i(i,j,k))**2+eps2))
-                diagMinus(5) =half*(qq_i(i,j,k)-cc_i(i,j,k)   &
-                     -fac*sqrt((qq_i(i,j,k)-cc_i(i,j,k))**2+eps2))
-                do n=1,5
-                   bbi(i+1,n)= -viscTerm1-diagPlus(n)
-                   ddi(i-1,n)= -viscTerm3+diagMinus(n)
-                   cci(i  ,n)=  viscTerm2+diagPlus(n)-diagMinus(n)
-                enddo
-             enddo
-
-             do n=1,5
-                bbi(ie ,n)=zero
-                ddi(1 ,n)=zero
-                do i=2,il
-                   bbi(i,n)=bbi(i,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)
-                   ddi(i,n)=ddi(i,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)
-                   cci(i,n)=one+cci(i,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)+spectral_j(i,j,k)+spectral_k(i,j,k)
-                   ffi(i,n)=dw(i,j,k,n)
-                enddo
-             enddo
-
-
-             call tridiagsolve(bbi,cci,ddi,ffi,il)
-
-             do n=1,5
-                do i=2,il
-                   dw(i,j,k,n)=ffi(i,n)
-                enddo
-             enddo
-
-          enddo
-       enddo
-
-    endif
-    !       Multiply by T_zeta^inv T_xi
-
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             ri1  = half*(si(i,j,k,1) + si(i-1,j,k,1))
-             ri2  = half*(si(i,j,k,2) + si(i-1,j,k,2))
-             ri3  = half*(si(i,j,k,3) + si(i-1,j,k,3))
-             ri   = sqrt(ri1*ri1+ri2*ri2+ri3*ri3)
-             ri1  = ri1/ri
-             ri2  = ri2/ri
-             ri3  = ri3/ri
-
-             rk1  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))
-             rk2  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))
-             rk3  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))
-             rk   = sqrt(rk1*rk1+rk2*rk2+rk3*rk3)
-             rk1  = rk1/rk
-             rk2  = rk2/rk
-             rk3  = rk3/rk
-
-             dw1   = dw(i,j,k,1)
-             dw2   = dw(i,j,k,2)
-             dw3   = dw(i,j,k,3)
-             dw4   = dw(i,j,k,4)
-             dw5   = dw(i,j,k,5)
-
-             a1 =  ri1*rk1 + ri2*rk2 + ri3*rk3
-             a2 =  rk1*ri2 - ri1*rk2
-             a3 =  rk3*ri2 - ri3*rk2
-             a4 =  rk1*ri3 - ri1*rk3
-             a5 = (dw4-dw5)*sqrt2inv
-             a6 = (dw4+dw5)*half
-             a7 = (a3*dw1+a4*dw2-a2*dw3-a5*a1)*sqrt2inv
-
-             dw(i,j,k,1)=  a1*dw1 + a2*dw2 + a4*dw3 + a5*a3
-             dw(i,j,k,2)=- a2*dw1 + a1*dw2 - a3*dw3 + a5*a4
-             dw(i,j,k,3)=- a4*dw1 + a3*dw2 + a1*dw3 - a5*a2
-             dw(i,j,k,4)= -a7+a6
-             dw(i,j,k,5)=  a7+a6
-          enddo
-       enddo
-    enddo
-
-    !  Multiply by diagonal
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             xfact = one+spectral_i(i,j,k)+spectral_j(i,j,k) &
-                  +spectral_k(i,j,k)
-             dw(i,j,k,1)=dw(i,j,k,1)*xfact
-             dw(i,j,k,2)=dw(i,j,k,2)*xfact
-             dw(i,j,k,3)=dw(i,j,k,3)*xfact
-             dw(i,j,k,4)=dw(i,j,k,4)*xfact
-             dw(i,j,k,5)=dw(i,j,k,5)*xfact
-          enddo
-       enddo
-    enddo
-
-    if(kl.gt.2) then
-
-       !  Inversion in k
-
-       do j=2,jl
-          do i=2,il
-             do k=1,kl
-                if( viscous )   mut = rlv(i,j,k)+rlv(i,j,k+1)
-                if( eddyModel ) mut = mut+rev(i,j,k)+rev(i,j,k+1)
-
-                volfact=1./(vol(i,j,k)+vol(i,j,k+1))
-                metterm    = sk(i,j,k,1)*sk(i,j,k,1)  &
-                     +sk(i,j,k,2)*sk(i,j,k,2)  &
-                     +sk(i,j,k,3)*sk(i,j,k,3)
-                mettermk(k)= metterm*mut*volfact
-             enddo
-
-             do k=2,kl
-
-                viscTerm1 = mettermk(k)  /vol(i,j,k)/w(i,j,k,irho)
-                viscTerm3 = mettermk(k-1)/vol(i,j,k)/w(i,j,k,irho)
-                viscTerm2 = viscTerm1+viscTerm3
-
-                volhalf = half/vol(i,j,k)
-                rk1  = volhalf*(sk(i,j,k,1) + sj(i,j,k-1,1))
-                rk2  = volhalf*(sk(i,j,k,2) + sj(i,j,k-1,2))
-                rk3  = volhalf*(sk(i,j,k,3) + sj(i,j,k-1,3))
-                metterm     = rk1*rk1+rk2*rk2+rk3*rk3
-                eps2        = epsval*epsval*cInf2*metterm
-                diagPlus(1) =half*(qq_k(i,j,k)+fac*sqrt(qq_k(i,j,k)**2+eps2))
-                diagPlus(2) =diagPlus(1)
-                diagPlus(3) =diagPlus(1)
-                diagPlus(4) =half*(qq_k(i,j,k)+cc_k(i,j,k)   &
-                     +fac*sqrt((qq_k(i,j,k)+cc_k(i,j,k))**2+eps2))
-                diagPlus(5) =half*(qq_k(i,j,k)-cc_k(i,j,k)   &
-                     +fac*sqrt((qq_k(i,j,k)-cc_k(i,j,k))**2+eps2))
-                diagMinus(1) =half*(qq_k(i,j,k)-fac*sqrt(qq_k(i,j,k)**2+eps2))
-                diagMinus(2) =diagMinus(1)
-                diagMinus(3) =diagMinus(1)
-                diagMinus(4) =half*(qq_k(i,j,k)+cc_k(i,j,k)   &
-                     -fac*sqrt((qq_k(i,j,k)+cc_k(i,j,k))**2+eps2))
-                diagMinus(5) =half*(qq_k(i,j,k)-cc_k(i,j,k)   &
-                     -fac*sqrt((qq_k(i,j,k)-cc_k(i,j,k))**2+eps2))
-
-                do n=1,5
-                   bbk(k+1,n)= -viscTerm1-diagPlus(n)
-                   ddk(k-1,n)= -viscTerm3+diagMinus(n)
-                   cck(k  ,n)=  viscTerm2+diagPlus(n)-diagMinus(n)
-                enddo
-             enddo
-
-             do n=1,5
-                bbk(ke,n)=zero
-                ddk(1 ,n)=zero
-                do k=2,kl
-                   bbk(k,n)=bbk(k,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)
-                   ddk(k,n)=ddk(k,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)
-                   cck(k,n)=one+cck(k,n)*dual_dt(i,j,k)*max(real(iblank(i,j,k),realType),zero)+spectral_i(i,j,k)+spectral_j(i,j,k)
-                   ffk(k,n)=dw(i,j,k,n)
-                enddo
-             enddo
-
-             call tridiagsolve(bbk,cck,ddk,ffk,kl)
-
-             do n=1,5
-                do k=2,kl
-                   dw(i,j,k,n)=ffk(k,n)
-                enddo
-             enddo
-
-          enddo
-       enddo
-    endif
-
-    !       Multiply by T_zeta
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             uvel  = w(i,j,k,ivx)
-             vvel  = w(i,j,k,ivy)
-             wvel  = w(i,j,k,ivz)
-
-             rk1  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))
-             rk2  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))
-             rk3  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))
-
-             rk = sqrt(rk1*rk1+rk2*rk2+rk3*rk3)
-             uu = uvel*rk1+vvel*rk2+wvel*rk3
-
-             rk1  = rk1/rk
-             rk2  = rk2/rk
-             rk3  = rk3/rk
-
-             uvw   = half*(uvel*uvel+vvel*vvel+wvel*wvel)
-             cijkinv = sqrt(w(i,j,k,irho)/gamma(i,j,k)/p(i,j,k))
-             alph  = w(i,j,k,irho)*cijkinv*sqrt2inv
-             xfact = two/cijkinv
-
-             ge=gamma(i,j,k)*w(i,j,k,irhoE)/w(i,j,k,irho)-  &
-                  (gamma(i,j,k)-one)*uvw
-
-             dw1   = dw(i,j,k,1)
-             dw2   = dw(i,j,k,2)
-             dw3   = dw(i,j,k,3)
-             dw4   = dw(i,j,k,4)*alph
-             dw5   = dw(i,j,k,5)*alph
-
-             a1    =  dw1*rk1+dw2*rk2+dw3*rk3+dw4+dw5
-             a2    =  half*xfact*(dw4-dw5)
-             a3    =  uvw*(rk1*dw1+rk2*dw2+rk3*dw3)
-
-             dw(i,j,k,1)  = a1
-             dw(i,j,k,2)  = a1*uvel-w(i,j,k,irho)*(rk3*dw2-rk2*dw3) &
-                  +a2*rk1
-             dw(i,j,k,3)  = a1*vvel-w(i,j,k,irho)*(rk1*dw3-rk3*dw1) &
-                  +a2*rk2
-             dw(i,j,k,4)  = a1*wvel-w(i,j,k,irho)*(rk2*dw1-rk1*dw2) &
-                  +a2*rk3
-             dw(i,j,k,5)  = a3+ w(i,j,k,irho)*       &
-                  ((vvel*rk3-wvel*rk2)*dw1            &
-                  +(wvel*rk1-uvel*rk3)*dw2            &
-                  +(uvel*rk2-vvel*rk1)*dw3)           &
-                  +(ge+half*xfact*uu/rk)*dw4 &
-                  +(ge-half*xfact*uu/rk)*dw5
-
-          enddo
-       enddo
-    enddo
-
-
-    !      For consistency with update.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             volfact=-one/vol(i,j,k)
-             dw(i,j,k,1)=dw(i,j,k,1)*volfact
-             dw(i,j,k,2)=dw(i,j,k,2)*volfact
-             dw(i,j,k,3)=dw(i,j,k,3)*volfact
-             dw(i,j,k,4)=dw(i,j,k,4)*volfact
-             dw(i,j,k,5)=dw(i,j,k,5)*volfact
-          enddo
-       enddo
-    enddo
-
-
-  end subroutine computedwDADI
-
-  subroutine tridiagsolve(bb,cc,dd,ff,nn)
-    use precision
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType) :: nn
-    real(kind=realType), dimension(nn+1,5) :: bb,cc,dd,ff
-
-    !     local variables
-
-    integer(kind=intType) :: m,n
-    real(kind=realType) :: d0,d2
-
-    do n=1,5
-       m=2
-       d0=1./cc(m,n)
-       dd(m,n)=dd(m,n)*d0
-       ff(m,n)=ff(m,n)*d0
-
-
-       do m=3,nn
-          d2=bb(m,n)
-          d0=1./(cc(m,n)-d2*dd(m-1,n))
-          ff(m,n)=(ff(m,n)-d2*ff(m-1,n))*d0
-          dd(m,n)=dd(m,n)*d0
-       enddo
-
-
-       do m=nn-1,2,-1
-          ff(m,n)=ff(m,n)-dd(m,n)*ff(m+1,n)
-       enddo
-
-    enddo
-
-
-  end subroutine tridiagsolve
-
-
-  subroutine residualAveraging
-    !
-    !       Implicit residual smoothing is a simple procedure that
-    !       replaces the residual at each point by a weighted sum of all
-    !       of the residuals in the block (although the residuals that are
-    !       closer to the cell under consideration are weighted more
-    !       heavily). This smoothing can be applied explicitly, but may
-    !       result in zero smoothed residuals for non-zero initial
-    !       residual modes.  For this reason, the smoothing is applied
-    !       implicitly in the following form:
-    !       -epz R{i+1} + (1 + 2 epz) R{i} -epz R{i-1} = r{i}
-    !       Where r{i} is the original residual at point i, and R{i} is
-    !       the implicitly smoothed residual at point i.  The analysis for
-    !       the 1-D scalar convection-diffusion equation shows that if
-    !       Epz >= (1/4)*((lambda/lambda*)^2 -1),
-    !       where lambda is the cfl number desired to be used, and
-    !       lambda* is the CFL limit of the unsmoothed scheme, the scheme
-    !       can be made unconditionally stable (arbitrarily large lambda).
-    !       In practice, lambda = 6-8 is common for Euler solutions.  For
-    !       RANS solutions lambda = 3-4 is what we have used in practice
-    !       resulting in a slight improvement in convergence rate, but,
-    !       more importantly, an increase in the robustness of the solver.
-    !       Note that this theoretical result can be shown for infinite
-    !       1-D problems, but also for finite-periodic 1-D problems and
-    !       finite-size 1-D problems (i.e. with block boundaries).  Such
-    !       theoretical results are not available for 3-D cases, where the
-    !       scheme is applied in an ADI fashion:
-    !       (1 -epzI d_ii)(1 -epzJ d_jj)(1 -epzK d_kk) r = r
-    !       Where d_ii, d_jj, d_kk are second difference operators in each
-    !       of the mesh coordinate directions.
-    !       For each of the coordinate direction solves, the initial
-    !       matrix problem is of the form:
-    !         -                                             - - -   - -
-    !         | (1+2 epz)   -epz                            | |r|   |r|
-    !         |   -epz    (1 +2 epz)    -epz                | |r|   |r|
-    !         |             -epz       (1 + 2 epz)   -epz   | |r| = |r|
-    !         |                 .           .           .   | |.|   |.|
-    !         |                   .            .          . | |.|   |.|
-    !         -                                             - - -   - -
-    !       And after the forward elimination phase a normalization is
-    !       applied so the result looks like:
-    !         -                   - - -   - -
-    !         |  1  -d            | |r|   |r|
-    !         |  0   1  -d        | |r|   |r|
-    !         |      0   1  -d    | |r| = |r|
-    !         |       .   .   .   | |.|   |.|
-    !         |          .  .   . | |.|   |.|
-    !         -                   - - -   - -
-    !       Which can then be used with a straightforward backsolve to
-    !       obtain the answer.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use blockPointers, only : nx, ny, nz, il, jl, kl, dw, p, iBlank
-    use inputIteration, only: cfl, cflCoarse, cflLimit, smoop
-    use flowVarRefState, only : pInfCorr, nwf
-    use iteration, only : currentLevel
-
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, l
-
-    real(kind=realType), parameter :: b = 2.0_realType
-
-    real(kind=realType) :: currentCfl, rfl0, plim
-    real(kind=realType) :: dpi, dpj, dpk, r
-    real(kind=realType), dimension(il,max(jl,kl)) :: epz, d, t, rfl
-
-
-    !      rfl0 is a measure of the ratio lambda/lambda*
-    !
-    currentCfl = cflCoarse
-    if (currentLevel == 1) then
-       currentCfl = cfl
-    end if
-
-    rfl0  = half*currentCfl/cflLimit
-
-    plim  = 0.001_realType*pInfCorr
-
-    !       Smoothing in the i-direction. Only done when enough cells are
-    !       present in the i-direction.
-    !
-    if(nx > 1) then
-
-       do k=2,kl
-
-          ! Compute smoothing coeficients
-
-          do j=2,jl
-             do i=2,il
-                dpi = abs(p(i+1,j,k) - two*p(i,j,k) + p(i-1,j,k)) &
-                     /    (p(i+1,j,k) + two*p(i,j,k) + p(i-1,j,k) + plim)
-                dpj = abs(p(i,j+1,k) - two*p(i,j,k) + p(i,j-1,k)) &
-                     /    (p(i,j+1,k) + two*p(i,j,k) + p(i,j-1,k) + plim)
-                dpk = abs(p(i,j,k+1) - two*p(i,j,k) + p(i,j,k-1)) &
-                     /    (p(i,j,k+1) + two*p(i,j,k) + p(i,j,k-1) + plim)
-                rfl(i,j) = one/(one + b*(dpi  +dpj  +dpk))
-             end do
-          end do
-
-          do j=2,jl
-             do i=2,nx
-                r = rfl0*(rfl(i,j) +rfl(i+1,j))
-                epz(i,j) = fourth*smoop*dim(r*r,one)*max(real(iblank(i,j,k), realType), zero)
-             end do
-          end do
-
-          ! Zero out coefficients for boundary condition treatment
-
-          do j=2,jl
-             epz(1,j)  = zero
-             epz(il,j) = zero
-             d(1,j)    = zero
-          end do
-
-          ! Compute coefficients for forward elimination process
-
-          do i=2,il
-             do j=2,jl
-                t(i,j) = one &
-                     / (one +epz(i,j) +epz(i-1,j) -epz(i-1,j)*d(i-1,j))
-                d(i,j) = t(i,j)*epz(i,j)
-             end do
-          end do
-
-          ! Apply same transformation to the rhs vector of residuals
-
-          do i=2,il
-             do j=2,jl
-                do l=1,nwf
-                   dw(i,j,k,l) = t(i,j) &
-                        * (dw(i,j,k,l) +epz(i-1,j)*dw(i-1,j,k,l))
+                do j = 2, jl
+                    epz(1, j) = zero
+                    epz(il, j) = zero
+                    d(1, j) = zero
                 end do
-             end do
-          end do
 
-          ! Backsolve operation.  Smoothed residuals are left
-          !  in dw(i,j,k,l)
+                ! Compute coefficients for forward elimination process
 
-          do i=nx,2,-1
-             do j=2,jl
-                do l=1,nwf
-                   dw(i,j,k,l) = dw(i,j,k,l) +d(i,j)*dw(i+1,j,k,l)
+                do i = 2, il
+                    do j = 2, jl
+                        t(i, j) = one &
+                                  / (one + epz(i, j) + epz(i - 1, j) - epz(i - 1, j) * d(i - 1, j))
+                        d(i, j) = t(i, j) * epz(i, j)
+                    end do
                 end do
-             end do
-          end do
-
-       enddo
-    endif
-    !
-    !       Smoothing in the j-direction. Only done when enough cells are
-    !       present in the j-direction.
-    !
-    if(ny > 1) then
-
-       do k=2,kl
-
-          ! Compute smoothing coeficients
-
-          do j=2,jl
-             do i=2,il
-                dpi = abs(p(i+1,j,k) - two*p(i,j,k) + p(i-1,j,k)) &
-                     /    (p(i+1,j,k) + two*p(i,j,k) + p(i-1,j,k) + plim)
-                dpj = abs(p(i,j+1,k) - two*p(i,j,k) + p(i,j-1,k)) &
-                     /    (p(i,j+1,k) + two*p(i,j,k) + p(i,j-1,k) + plim)
-                dpk = abs(p(i,j,k+1) - two*p(i,j,k) + p(i,j,k-1)) &
-                     /    (p(i,j,k+1) + two*p(i,j,k) + p(i,j,k-1) + plim)
-                rfl(i,j) = one/(one + b*(dpi  +dpj  +dpk))
-             end do
-          end do
-
-          do j=2,ny
-             do i=2,il
-                r = rfl0*(rfl(i,j) +rfl(i,j+1))
-                epz(i,j) = fourth*smoop*dim(r*r,one)*max(real(iblank(i,j,k), realType), zero)
-             end do
-          end do
-
-          ! Zero out coefficients for boundary condition treatment
-
-          do i=2,il
-             epz(i,1)  = zero
-             epz(i,jl) = zero
-             d(i,1)    = zero
-          end do
-
-          ! Compute coefficients for forward eliMination process
-
-          do j=2,jl
-             do i=2,il
-                t(i,j) = one &
-                     / (one +epz(i,j) +epz(i,j-1) -epz(i,j-1)*d(i,j-1))
-                d(i,j) = t(i,j)*epz(i,j)
-             end do
-          end do
-
-          ! Apply same transformation to the rhs vector of residuals
-
-          do j=2,jl
-             do i=2,il
-                do l=1,nwf
-                   dw(i,j,k,l) = t(i,j) &
-                        * (dw(i,j,k,l) +epz(i,j-1)*dw(i,j-1,k,l))
+
+                ! Apply same transformation to the rhs vector of residuals
+
+                do i = 2, il
+                    do j = 2, jl
+                        do l = 1, nwf
+                            dw(i, j, k, l) = t(i, j) &
+                                             * (dw(i, j, k, l) + epz(i - 1, j) * dw(i - 1, j, k, l))
+                        end do
+                    end do
+                end do
+
+                ! Backsolve operation.  Smoothed residuals are left
+                !  in dw(i,j,k,l)
+
+                do i = nx, 2, -1
+                    do j = 2, jl
+                        do l = 1, nwf
+                            dw(i, j, k, l) = dw(i, j, k, l) + d(i, j) * dw(i + 1, j, k, l)
+                        end do
+                    end do
+                end do
+
+            end do
+        end if
+        !
+        !       Smoothing in the j-direction. Only done when enough cells are
+        !       present in the j-direction.
+        !
+        if (ny > 1) then
+
+            do k = 2, kl
+
+                ! Compute smoothing coeficients
+
+                do j = 2, jl
+                    do i = 2, il
+                        dpi = abs(p(i + 1, j, k) - two * p(i, j, k) + p(i - 1, j, k)) &
+                              / (p(i + 1, j, k) + two * p(i, j, k) + p(i - 1, j, k) + plim)
+                        dpj = abs(p(i, j + 1, k) - two * p(i, j, k) + p(i, j - 1, k)) &
+                              / (p(i, j + 1, k) + two * p(i, j, k) + p(i, j - 1, k) + plim)
+                        dpk = abs(p(i, j, k + 1) - two * p(i, j, k) + p(i, j, k - 1)) &
+                              / (p(i, j, k + 1) + two * p(i, j, k) + p(i, j, k - 1) + plim)
+                        rfl(i, j) = one / (one + b * (dpi + dpj + dpk))
+                    end do
+                end do
+
+                do j = 2, ny
+                    do i = 2, il
+                        r = rfl0 * (rfl(i, j) + rfl(i, j + 1))
+                        epz(i, j) = fourth * smoop * dim(r * r, one) * max(real(iblank(i, j, k), realType), zero)
+                    end do
+                end do
+
+                ! Zero out coefficients for boundary condition treatment
+
+                do i = 2, il
+                    epz(i, 1) = zero
+                    epz(i, jl) = zero
+                    d(i, 1) = zero
+                end do
+
+                ! Compute coefficients for forward eliMination process
+
+                do j = 2, jl
+                    do i = 2, il
+                        t(i, j) = one &
+                                  / (one + epz(i, j) + epz(i, j - 1) - epz(i, j - 1) * d(i, j - 1))
+                        d(i, j) = t(i, j) * epz(i, j)
+                    end do
                 end do
-             end do
-          end do
 
-          ! Backsolve operation.  Smoothed residuals are left
-          !  in dw(i,j,k,l)
+                ! Apply same transformation to the rhs vector of residuals
 
-          do j=ny,2,-1
-             do i=2,il
-                do l=1,nwf
-                   dw(i,j,k,l) = dw(i,j,k,l) +d(i,j)*dw(i,j+1,k,l)
+                do j = 2, jl
+                    do i = 2, il
+                        do l = 1, nwf
+                            dw(i, j, k, l) = t(i, j) &
+                                             * (dw(i, j, k, l) + epz(i, j - 1) * dw(i, j - 1, k, l))
+                        end do
+                    end do
                 end do
-             end do
-          end do
-
-       enddo
-    endif
-    !
-    !       Smoothing in the k-direction. Only done when enough cells are
-    !       present in the k-direction.
-    !
-    if(nz > 1) then
-
-       do j=2,jl
-
-          ! Compute smoothing coeficients
-
-          do k=2,kl
-             do i=2,il
-                dpi = abs(p(i+1,j,k) - two*p(i,j,k) + p(i-1,j,k)) &
-                     /    (p(i+1,j,k) + two*p(i,j,k) + p(i-1,j,k) + plim)
-                dpj = abs(p(i,j+1,k) - two*p(i,j,k) + p(i,j-1,k)) &
-                     /    (p(i,j+1,k) + two*p(i,j,k) + p(i,j-1,k) + plim)
-                dpk = abs(p(i,j,k+1) - two*p(i,j,k) + p(i,j,k-1)) &
-                     /    (p(i,j,k+1) + two*p(i,j,k) + p(i,j,k-1) + plim)
-                rfl(i,k) = one/(one + b*(dpi  +dpj  +dpk))
-             end do
-          end do
-
-          do k=2,nz
-             do i=2,il
-                r = rfl0*(rfl(i,k) +rfl(i,k+1))
-                epz(i,k) = fourth*smoop*dim(r*r,one)*max(real(iblank(i,j,k), realType), zero)
-             end do
-          end do
-
-          ! Zero out coefficients for boundary condition treatment
-
-          do i=2,il
-             epz(i,1)  = zero
-             epz(i,kl) = zero
-             d(i,1)    = zero
-          end do
-
-          ! Compute coefficients for forward eliMination process
-
-          do k=2,kl
-             do i=2,il
-                t(i,k) = one &
-                     / (one +epz(i,k) +epz(i,k-1) -epz(i,k-1)*d(i,k-1))
-                d(i,k) = t(i,k)*epz(i,k)
-             end do
-          end do
-
-          ! Apply same transformation to the rhs vector of residuals
-
-          do k=2,kl
-             do i=2,il
-                do l=1,nwf
-                   dw(i,j,k,l) = t(i,k) &
-                        * (dw(i,j,k,l) +epz(i,k-1)*dw(i,j,k-1,l))
+
+                ! Backsolve operation.  Smoothed residuals are left
+                !  in dw(i,j,k,l)
+
+                do j = ny, 2, -1
+                    do i = 2, il
+                        do l = 1, nwf
+                            dw(i, j, k, l) = dw(i, j, k, l) + d(i, j) * dw(i, j + 1, k, l)
+                        end do
+                    end do
+                end do
+
+            end do
+        end if
+        !
+        !       Smoothing in the k-direction. Only done when enough cells are
+        !       present in the k-direction.
+        !
+        if (nz > 1) then
+
+            do j = 2, jl
+
+                ! Compute smoothing coeficients
+
+                do k = 2, kl
+                    do i = 2, il
+                        dpi = abs(p(i + 1, j, k) - two * p(i, j, k) + p(i - 1, j, k)) &
+                              / (p(i + 1, j, k) + two * p(i, j, k) + p(i - 1, j, k) + plim)
+                        dpj = abs(p(i, j + 1, k) - two * p(i, j, k) + p(i, j - 1, k)) &
+                              / (p(i, j + 1, k) + two * p(i, j, k) + p(i, j - 1, k) + plim)
+                        dpk = abs(p(i, j, k + 1) - two * p(i, j, k) + p(i, j, k - 1)) &
+                              / (p(i, j, k + 1) + two * p(i, j, k) + p(i, j, k - 1) + plim)
+                        rfl(i, k) = one / (one + b * (dpi + dpj + dpk))
+                    end do
+                end do
+
+                do k = 2, nz
+                    do i = 2, il
+                        r = rfl0 * (rfl(i, k) + rfl(i, k + 1))
+                        epz(i, k) = fourth * smoop * dim(r * r, one) * max(real(iblank(i, j, k), realType), zero)
+                    end do
+                end do
+
+                ! Zero out coefficients for boundary condition treatment
+
+                do i = 2, il
+                    epz(i, 1) = zero
+                    epz(i, kl) = zero
+                    d(i, 1) = zero
+                end do
+
+                ! Compute coefficients for forward eliMination process
+
+                do k = 2, kl
+                    do i = 2, il
+                        t(i, k) = one &
+                                  / (one + epz(i, k) + epz(i, k - 1) - epz(i, k - 1) * d(i, k - 1))
+                        d(i, k) = t(i, k) * epz(i, k)
+                    end do
+                end do
+
+                ! Apply same transformation to the rhs vector of residuals
+
+                do k = 2, kl
+                    do i = 2, il
+                        do l = 1, nwf
+                            dw(i, j, k, l) = t(i, k) &
+                                             * (dw(i, j, k, l) + epz(i, k - 1) * dw(i, j, k - 1, l))
+                        end do
+                    end do
                 end do
-             end do
-          end do
 
-          ! Backsolve operation.  Smoothed residuals are left
-          !  in dw(i,j,k,l)
+                ! Backsolve operation.  Smoothed residuals are left
+                !  in dw(i,j,k,l)
 
-          do k=nz,2,-1
-             do i=2,il
-                do l=1,nwf
-                   dw(i,j,k,l) = dw(i,j,k,l) +d(i,k)*dw(i,j,k+1,l)
+                do k = nz, 2, -1
+                    do i = 2, il
+                        do l = 1, nwf
+                            dw(i, j, k, l) = dw(i, j, k, l) + d(i, k) * dw(i, j, k + 1, l)
+                        end do
+                    end do
                 end do
-             end do
-          end do
 
-       enddo
-    endif
+            end do
+        end if
 
-  end subroutine residualAveraging
+    end subroutine residualAveraging
 #endif
 end module residuals
diff --git a/src/solver/smoothers.F90 b/src/solver/smoothers.F90
index 3f165bfb1..650c33932 100644
--- a/src/solver/smoothers.F90
+++ b/src/solver/smoothers.F90
@@ -1,697 +1,695 @@
 module smoothers
 
 contains
-  subroutine RungeKuttaSmoother
-    !
-    !       RungeKuttaSmoother performs one multi-stage runge kutta
-    !       explicit time step for the current multigrid level. On
-    !       entrance it is assumed that the residual and time step are
-    !       already computed. On exit the solution in the halo's contain
-    !       the latest values. However, the residual corresponding to
-    !       these values is not computed.
-    !
-    use constants
-    use blockPointers, only : w, p, wn, pn, il, jl, kl, nDom
-    use flowVarRefState, only: nwf
-    use inputIteration, only : nRKStages
-    use inputTimeSpectral, only :ntimeIntervalsSpectral
-    use iteration, only: currentLevel, rkStage
-    use utils, only : setPointers
-    use residuals, only : initRes, residual, residualAveraging, sourceTerms
-    use BCRoutines, only : applyAllBC
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: sps, nn, i, j, k, l
-
-    ! Store the variables of the zeroth runge kutta stage.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
-
-          ! Set the pointers to this block.
-
-          call setPointers(nn, currentLevel, sps)
-
-          ! The variables stored in w.
-
-          do l=1,nwf
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
-                      wn(i,j,k,l) = w(i,j,k,l)
-                   enddo
-                enddo
-             enddo
-          enddo
-
-          ! And the pressure.
-
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   pn(i,j,k) = p(i,j,k)
-                enddo
-             enddo
-          enddo
-
-       enddo domains
-    enddo spectralLoop
-
-    ! Loop over all but the last Runge Kutta stages. Note that the
-    ! counter variable rkStage is defined in the module iteration.
-
-    do rkStage=1,(nRKStages-1)
-
-       ! Execute a Runge Kutta stage and exchange the externals.
-
-       call executeRkStage
-
-       ! Compute the residuals for the next stage.
-
-       call initres(1_intType, nwf)
-       call sourceTerms()
-       call residual
-
-    enddo
-
-    ! Execute the last RK stage. Set rkStage to nRKStages, for
-    ! clarity; after the previous loop rkStage == nRKStages.
-
-    rkStage = nRKStages
-
-    call executeRkStage
-
-  end subroutine RungeKuttaSmoother
-
-  !      ==================================================================
-
-  subroutine executeRkStage
-    !
-    !       executeRkStage executes one runge kutta stage. The stage
-    !       number, rkStage, is defined in the local module iteration.
-    !
-    use blockPointers
-    use constants
-    use flowVarRefState
-    use inputIteration
-    use inputPhysics
-    use inputTimeSpectral
-    use inputUnsteady
-    use iteration
-    use inputDiscretization
-    use haloExchange, only : whalo1, whalo2
-    use utils, only : setPointers
-    use flowUtils, only : computeEtotBlock, computeLamViscosity
-    use turbutils, only : computeeddyviscosity
-    use residuals, only : residualAveraging
-    use BCRoutines, only : applyAllBC
-
-    implicit none
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: fiveThird = five*third
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: sps, nn, i, j, k, l
-
-    real(kind=realType) :: tmp, unsteadyImpl, mult
-    real(kind=realType) :: dt, currentCfl, gm1, gm53
-    real(kind=realType) :: v2, ovr, dp, factK, ru, rv, rw
+    subroutine RungeKuttaSmoother
+        !
+        !       RungeKuttaSmoother performs one multi-stage runge kutta
+        !       explicit time step for the current multigrid level. On
+        !       entrance it is assumed that the residual and time step are
+        !       already computed. On exit the solution in the halo's contain
+        !       the latest values. However, the residual corresponding to
+        !       these values is not computed.
+        !
+        use constants
+        use blockPointers, only: w, p, wn, pn, il, jl, kl, nDom
+        use flowVarRefState, only: nwf
+        use inputIteration, only: nRKStages
+        use inputTimeSpectral, only: ntimeIntervalsSpectral
+        use iteration, only: currentLevel, rkStage
+        use utils, only: setPointers
+        use residuals, only: initRes, residual, residualAveraging, sourceTerms
+        use BCRoutines, only: applyAllBC
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: sps, nn, i, j, k, l
+
+        ! Store the variables of the zeroth runge kutta stage.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers to this block.
+
+                call setPointers(nn, currentLevel, sps)
+
+                ! The variables stored in w.
+
+                do l = 1, nwf
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                wn(i, j, k, l) = w(i, j, k, l)
+                            end do
+                        end do
+                    end do
+                end do
+
+                ! And the pressure.
+
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            pn(i, j, k) = p(i, j, k)
+                        end do
+                    end do
+                end do
+
+            end do domains
+        end do spectralLoop
+
+        ! Loop over all but the last Runge Kutta stages. Note that the
+        ! counter variable rkStage is defined in the module iteration.
+
+        do rkStage = 1, (nRKStages - 1)
+
+            ! Execute a Runge Kutta stage and exchange the externals.
+
+            call executeRkStage
+
+            ! Compute the residuals for the next stage.
+
+            call initres(1_intType, nwf)
+            call sourceTerms()
+            call residual
+
+        end do
+
+        ! Execute the last RK stage. Set rkStage to nRKStages, for
+        ! clarity; after the previous loop rkStage == nRKStages.
+
+        rkStage = nRKStages
+
+        call executeRkStage
+
+    end subroutine RungeKuttaSmoother
+
+    !      ==================================================================
+
+    subroutine executeRkStage
+        !
+        !       executeRkStage executes one runge kutta stage. The stage
+        !       number, rkStage, is defined in the local module iteration.
+        !
+        use blockPointers
+        use constants
+        use flowVarRefState
+        use inputIteration
+        use inputPhysics
+        use inputTimeSpectral
+        use inputUnsteady
+        use iteration
+        use inputDiscretization
+        use haloExchange, only: whalo1, whalo2
+        use utils, only: setPointers
+        use flowUtils, only: computeEtotBlock, computeLamViscosity
+        use turbutils, only: computeeddyviscosity
+        use residuals, only: residualAveraging
+        use BCRoutines, only: applyAllBC
+
+        implicit none
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: fiveThird = five * third
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: sps, nn, i, j, k, l
+
+        real(kind=realType) :: tmp, unsteadyImpl, mult
+        real(kind=realType) :: dt, currentCfl, gm1, gm53
+        real(kind=realType) :: v2, ovr, dp, factK, ru, rv, rw
 
-    logical :: secondHalo, smoothResidual, correctForK
+        logical :: secondHalo, smoothResidual, correctForK
 
-    ! Set the value of secondHalo and the current cfl number,
-    ! depending on the situation. On the finest grid in the mg cycle
-    ! the second halo is computed, otherwise not.
+        ! Set the value of secondHalo and the current cfl number,
+        ! depending on the situation. On the finest grid in the mg cycle
+        ! the second halo is computed, otherwise not.
 
-    if(currentLevel <= groundLevel) then
-       secondHalo = .true.
-    else
-       secondHalo = .false.
-    endif
+        if (currentLevel <= groundLevel) then
+            secondHalo = .true.
+        else
+            secondHalo = .false.
+        end if
 
-    currentCfl = cflCoarse
-    if (currentLevel == 1) then
-       currentCfl = cfl
-    end if
-
-    ! Determine whether or not residual averaging must be applied.
+        currentCfl = cflCoarse
+        if (currentLevel == 1) then
+            currentCfl = cfl
+        end if
+
+        ! Determine whether or not residual averaging must be applied.
 
-    if(resAveraging == noResAveraging) then
-       smoothResidual = .false.
-    else if(resAveraging == alwaysResAveraging) then
-       smoothResidual = .true.
-    else if(mod(rkStage,2_intType) == 1) then
-       smoothResidual = .true.
-    else
-       smoothResidual = .false.
-    endif
+        if (resAveraging == noResAveraging) then
+            smoothResidual = .false.
+        else if (resAveraging == alwaysResAveraging) then
+            smoothResidual = .true.
+        else if (mod(rkStage, 2_intType) == 1) then
+            smoothResidual = .true.
+        else
+            smoothResidual = .false.
+        end if
 
-    ! Determine whether or not the total energy must be corrected
-    ! for the presence of the turbulent kinetic energy.
+        ! Determine whether or not the total energy must be corrected
+        ! for the presence of the turbulent kinetic energy.
 
-    if( kPresent ) then
-       if((currentLevel <= groundLevel)) then
-          correctForK = .true.
-       else
-          correctForK = .false.
-       endif
-    else
-       correctForK = .false.
-    endif
-    !
-    !       Compute the updates of the conservative variables.
-    !
-    ! Loop over the local number of blocks.
+        if (kPresent) then
+            if ((currentLevel <= groundLevel)) then
+                correctForK = .true.
+            else
+                correctForK = .false.
+            end if
+        else
+            correctForK = .false.
+        end if
+        !
+        !       Compute the updates of the conservative variables.
+        !
+        ! Loop over the local number of blocks.
 
-    domainsUpdate: do nn=1,nDom
+        domainsUpdate: do nn = 1, nDom
 
-       ! Determine the equation mode solved.
+            ! Determine the equation mode solved.
 
-       select case (equationMode)
+            select case (equationMode)
 
-       case (steady, timeSpectral)
+            case (steady, timeSpectral)
 
-          ! Steady equations, including time spectral. Everything is
-          ! solved explicitly. Store the cfl number times the RK
-          ! coefficient in tmp.
+                ! Steady equations, including time spectral. Everything is
+                ! solved explicitly. Store the cfl number times the RK
+                ! coefficient in tmp.
 
-          tmp = currentCfl*etaRk(rkStage)
-
-          ! Loop over the number of spectral solutions. Note that
-          ! for the steady mode this value is 1.
+                tmp = currentCfl * etaRk(rkStage)
+
+                ! Loop over the number of spectral solutions. Note that
+                ! for the steady mode this value is 1.
 
-          spectralSteady: do sps=1,nTimeIntervalsSpectral
+                spectralSteady: do sps = 1, nTimeIntervalsSpectral
 
-             ! Set the pointers to this block.
+                    ! Set the pointers to this block.
 
-             call setPointers(nn, currentLevel, sps)
+                    call setPointers(nn, currentLevel, sps)
 
-             ! Loop over the owned cells of this block.
+                    ! Loop over the owned cells of this block.
 
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
 
-                      ! Determine the local time step (multiplied by the
-                      ! current rk coefficient).
-                      if ( lowspeedpreconditioner) then
-                         dt = 0.8*tmp*dtl(i,j,k)
-                      else
-                         dt = tmp*dtl(i,j,k)
-                      end if
+                                ! Determine the local time step (multiplied by the
+                                ! current rk coefficient).
+                                if (lowspeedpreconditioner) then
+                                    dt = 0.8 * tmp * dtl(i, j, k)
+                                else
+                                    dt = tmp * dtl(i, j, k)
+                                end if
 
-                      ! Compute the updates of the flow field variables.
+                                ! Compute the updates of the flow field variables.
 
-                      dw(i,j,k,irho)  = dw(i,j,k,irho)*dt
-                      dw(i,j,k,imx)   = dw(i,j,k,imx)*dt
-                      dw(i,j,k,imy)   = dw(i,j,k,imy)*dt
-                      dw(i,j,k,imz)   = dw(i,j,k,imz)*dt
-                      dw(i,j,k,irhoE) = dw(i,j,k,irhoE)*dt
+                                dw(i, j, k, irho) = dw(i, j, k, irho) * dt
+                                dw(i, j, k, imx) = dw(i, j, k, imx) * dt
+                                dw(i, j, k, imy) = dw(i, j, k, imy) * dt
+                                dw(i, j, k, imz) = dw(i, j, k, imz) * dt
+                                dw(i, j, k, irhoE) = dw(i, j, k, irhoE) * dt
 
-                   enddo
-                enddo
-             enddo
+                            end do
+                        end do
+                    end do
 
-          enddo spectralSteady
+                end do spectralSteady
 
-          !=============================================================
+                !=============================================================
 
-       case (unsteady)
+            case (unsteady)
 
-          ! Unsteady equations are solved via the dual time
-          ! stepping technique. The leading term of the time
-          ! integrator must be treated implicitly for stability
-          ! reasons. This leads to a different multiplication
-          ! factor of the residual compared to the steady case.
-          ! Compute this additional term and store the cfl number
-          ! times the rk coefficient in tmp.
+                ! Unsteady equations are solved via the dual time
+                ! stepping technique. The leading term of the time
+                ! integrator must be treated implicitly for stability
+                ! reasons. This leads to a different multiplication
+                ! factor of the residual compared to the steady case.
+                ! Compute this additional term and store the cfl number
+                ! times the rk coefficient in tmp.
 
-          unsteadyImpl = coefTime(0)*timeRef/deltaT
-          tmp          = currentCfl*etaRk(rkStage)
+                unsteadyImpl = coefTime(0) * timeRef / deltaT
+                tmp = currentCfl * etaRk(rkStage)
 
-          ! Loop over the number of spectral modes, although this is
-          ! always 1 for the unsteady mode. The loop is executed for
-          ! consistency reasons.
+                ! Loop over the number of spectral modes, although this is
+                ! always 1 for the unsteady mode. The loop is executed for
+                ! consistency reasons.
 
-          spectralUnsteady: do sps=1,nTimeIntervalsSpectral
+                spectralUnsteady: do sps = 1, nTimeIntervalsSpectral
 
-             ! Set the pointers to this block.
+                    ! Set the pointers to this block.
 
-             call setPointers(nn, currentLevel, sps)
+                    call setPointers(nn, currentLevel, sps)
 
-             ! Determine the updates of the flow field variables.
-             ! Owned cells only. The result is stored in dw.
+                    ! Determine the updates of the flow field variables.
+                    ! Owned cells only. The result is stored in dw.
 
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
 
-                      ! Determine the local time step (multiplied by the
-                      ! current rk coefficient) and the multiplication
-                      ! factor for the residuals.
+                                ! Determine the local time step (multiplied by the
+                                ! current rk coefficient) and the multiplication
+                                ! factor for the residuals.
 
-                      dt   = tmp*dtl(i,j,k)
-                      mult = dt/(dt*unsteadyImpl*vol(i,j,k) + one)
+                                dt = tmp * dtl(i, j, k)
+                                mult = dt / (dt * unsteadyImpl * vol(i, j, k) + one)
 
-                      ! Compute the updates of the flow field variables.
+                                ! Compute the updates of the flow field variables.
 
-                      dw(i,j,k,irho)  = dw(i,j,k,irho)*mult
-                      dw(i,j,k,imx)   = dw(i,j,k,imx)*mult
-                      dw(i,j,k,imy)   = dw(i,j,k,imy)*mult
-                      dw(i,j,k,imz)   = dw(i,j,k,imz)*mult
-                      dw(i,j,k,irhoE) = dw(i,j,k,irhoE)*mult
+                                dw(i, j, k, irho) = dw(i, j, k, irho) * mult
+                                dw(i, j, k, imx) = dw(i, j, k, imx) * mult
+                                dw(i, j, k, imy) = dw(i, j, k, imy) * mult
+                                dw(i, j, k, imz) = dw(i, j, k, imz) * mult
+                                dw(i, j, k, irhoE) = dw(i, j, k, irhoE) * mult
 
-                   enddo
-                enddo
-             enddo
+                            end do
+                        end do
+                    end do
 
-          enddo spectralUnsteady
+                end do spectralUnsteady
 
-       end select
+            end select
 
-    enddo domainsUpdate
-    !
-    !       Compute the new state vector.
-    !
-    ! Loop over the number of spectral solutions and local blocks.
+        end do domainsUpdate
+        !
+        !       Compute the new state vector.
+        !
+        ! Loop over the number of spectral solutions and local blocks.
 
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domainsState: do nn=1,nDom
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domainsState: do nn = 1, nDom
 
-          ! Set the pointers to this block.
+                ! Set the pointers to this block.
 
-          call setPointers(nn, currentLevel, sps)
+                call setPointers(nn, currentLevel, sps)
 
-          ! Possibility to smooth the updates.
+                ! Possibility to smooth the updates.
 
-          if( smoothResidual ) then
-             call residualAveraging
-          end if
-          ! Flow variables.
+                if (smoothResidual) then
+                    call residualAveraging
+                end if
+                ! Flow variables.
 
-          factK = zero
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
+                factK = zero
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
 
-                   ! Store gamma -1 and gamma - 5/3 a bit easier.
+                            ! Store gamma -1 and gamma - 5/3 a bit easier.
 
-                   gm1  = gamma(i,j,k) - one
-                   gm53 = gamma(i,j,k) - fiveThird
+                            gm1 = gamma(i, j, k) - one
+                            gm53 = gamma(i, j, k) - fiveThird
 
-                   ! Compute the pressure update from the conservative
-                   ! updates. The expression used below is valid even if
-                   ! cp is not constant. For the calorically perfect case,
-                   ! cp is constant, it can be simplified to the usual
-                   ! expression, but not for variable cp.
+                            ! Compute the pressure update from the conservative
+                            ! updates. The expression used below is valid even if
+                            ! cp is not constant. For the calorically perfect case,
+                            ! cp is constant, it can be simplified to the usual
+                            ! expression, but not for variable cp.
 
-                   ovr = one/w(i,j,k,irho)
-                   v2  = w(i,j,k,ivx)**2 + w(i,j,k,ivy)**2 + w(i,j,k,ivz)**2
-                   if( correctForK ) factK = gm53*w(i,j,k,itu1)
+                            ovr = one / w(i, j, k, irho)
+                            v2 = w(i, j, k, ivx)**2 + w(i, j, k, ivy)**2 + w(i, j, k, ivz)**2
+                            if (correctForK) factK = gm53 * w(i, j, k, itu1)
 
-                   dp = (ovr*p(i,j,k) + factK                             &
-                        -  gm1*(ovr*w(i,j,k,irhoE) - v2))*dw(i,j,k,irho)    &
-                        + gm1*(dw(i,j,k,irhoE) - w(i,j,k,ivx)*dw(i,j,k,imx) &
-                        - w(i,j,k,ivy)*dw(i,j,k,imy) &
-                        - w(i,j,k,ivz)*dw(i,j,k,imz))
+                            dp = (ovr * p(i, j, k) + factK &
+                                  - gm1 * (ovr * w(i, j, k, irhoE) - v2)) * dw(i, j, k, irho) &
+                                 + gm1 * (dw(i, j, k, irhoE) - w(i, j, k, ivx) * dw(i, j, k, imx) &
+                                          - w(i, j, k, ivy) * dw(i, j, k, imy) &
+                                          - w(i, j, k, ivz) * dw(i, j, k, imz))
 
-                   ! Compute the density. Correct for negative values.
+                            ! Compute the density. Correct for negative values.
 
-                   w(i,j,k,irho) = wn(i,j,k,irho) - dw(i,j,k,irho)
-                   w(i,j,k,irho) = max(w(i,j,k,irho), 1.e-4_realType*rhoInf)
+                            w(i, j, k, irho) = wn(i, j, k, irho) - dw(i, j, k, irho)
+                            w(i, j, k, irho) = max(w(i, j, k, irho), 1.e-4_realType * rhoInf)
 
-                   ! Compute the velocities.
+                            ! Compute the velocities.
 
-                   ru  = wn(i,j,k,irho)*wn(i,j,k,ivx) - dw(i,j,k,imx)
-                   rv  = wn(i,j,k,irho)*wn(i,j,k,ivy) - dw(i,j,k,imy)
-                   rw  = wn(i,j,k,irho)*wn(i,j,k,ivz) - dw(i,j,k,imz)
+                            ru = wn(i, j, k, irho) * wn(i, j, k, ivx) - dw(i, j, k, imx)
+                            rv = wn(i, j, k, irho) * wn(i, j, k, ivy) - dw(i, j, k, imy)
+                            rw = wn(i, j, k, irho) * wn(i, j, k, ivz) - dw(i, j, k, imz)
 
-                   ovr = one/w(i,j,k,irho)
-                   w(i,j,k,ivx) = ovr*ru
-                   w(i,j,k,ivy) = ovr*rv
-                   w(i,j,k,ivz) = ovr*rw
+                            ovr = one / w(i, j, k, irho)
+                            w(i, j, k, ivx) = ovr * ru
+                            w(i, j, k, ivy) = ovr * rv
+                            w(i, j, k, ivz) = ovr * rw
 
-                   ! Compute the pressure. Correct for negative values.
+                            ! Compute the pressure. Correct for negative values.
 
-                   p(i,j,k) = pn(i,j,k) - dp
-                   p(i,j,k) = max(p(i,j,k), 1.e-4_realType*pInfCorr)
+                            p(i, j, k) = pn(i, j, k) - dp
+                            p(i, j, k) = max(p(i, j, k), 1.e-4_realType * pInfCorr)
 
-                enddo
-             enddo
-          enddo
-
-          ! Compute the total energy and possibly the laminar and eddy
-          ! viscosity in the owned cells.
-
-          call computeEtotBlock(2_intType,il, 2_intType,jl, &
-               2_intType,kl, correctForK)
-          call computeLamViscosity(.False.)
-          call computeEddyViscosity(.False.)
-
-       enddo domainsState
-    enddo spectralLoop
-
-    ! Exchange the pressure if the pressure must be exchanged early.
-    ! Only the first halo's are needed, thus whalo1 is called.
-    ! Only on the fine grid.
-
-    if(exchangePressureEarly .and. currentLevel <= groundLevel) &
-         call whalo1(currentLevel, 1_intType, 0_intType, .true.,&
-         .false., .false.)
-
-    ! Apply all boundary conditions to all blocks on this level.
-
-    call applyAllBC(secondHalo)
-
-    ! Exchange the solution. Either whalo1 or whalo2
-    ! must be called.
-
-    if( secondHalo ) then
-       call whalo2(currentLevel, 1_intType, nwf, .true., &
-            .true., .true.)
-    else
-       call whalo1(currentLevel, 1_intType, nwf, .true., &
-            .true., .true.)
-    endif
-
-  end subroutine executeRkStage
-  subroutine DADISmoother
-    !
-    !       RungeKuttaSmoother performs one multi-stage runge kutta
-    !       explicit time step for the current multigrid level. On
-    !       entrance it is assumed that the residual and time step are
-    !       already computed. On exit the solution in the halo's contain
-    !       the latest values. However, the residual corresponding to
-    !       these values is not computed.
-    !
-    use blockPointers
-    use flowVarRefState
-    use inputIteration
-    use inputTimeSpectral
-    use iteration
-    use residuals, only : initRes, residual, sourceTerms
-    implicit none
-
-
-    if (groundLevel == 1) then
-       do Subit=1,nSubiterations-1
-
-          ! Execute a DADI step and exchange the externals.
-
-          call executeDADIStep
-
-          ! Compute the residuals for the next stage.
-          call initres(1_intType, nwf)
-          call sourceTerms()
-          call residual
-
-       enddo
-
-       ! Set Subit to nSubiterations, for clarity;
-       Subit = nSubiterations
-    end if
-
-    ! Execute the last subiteration.
-    call executeDADIStep
+                        end do
+                    end do
+                end do
+
+                ! Compute the total energy and possibly the laminar and eddy
+                ! viscosity in the owned cells.
+
+                call computeEtotBlock(2_intType, il, 2_intType, jl, &
+                                      2_intType, kl, correctForK)
+                call computeLamViscosity(.False.)
+                call computeEddyViscosity(.False.)
+
+            end do domainsState
+        end do spectralLoop
+
+        ! Exchange the pressure if the pressure must be exchanged early.
+        ! Only the first halo's are needed, thus whalo1 is called.
+        ! Only on the fine grid.
+
+        if (exchangePressureEarly .and. currentLevel <= groundLevel) &
+            call whalo1(currentLevel, 1_intType, 0_intType, .true., &
+                        .false., .false.)
+
+        ! Apply all boundary conditions to all blocks on this level.
+
+        call applyAllBC(secondHalo)
+
+        ! Exchange the solution. Either whalo1 or whalo2
+        ! must be called.
+
+        if (secondHalo) then
+            call whalo2(currentLevel, 1_intType, nwf, .true., &
+                        .true., .true.)
+        else
+            call whalo1(currentLevel, 1_intType, nwf, .true., &
+                        .true., .true.)
+        end if
+
+    end subroutine executeRkStage
+    subroutine DADISmoother
+        !
+        !       RungeKuttaSmoother performs one multi-stage runge kutta
+        !       explicit time step for the current multigrid level. On
+        !       entrance it is assumed that the residual and time step are
+        !       already computed. On exit the solution in the halo's contain
+        !       the latest values. However, the residual corresponding to
+        !       these values is not computed.
+        !
+        use blockPointers
+        use flowVarRefState
+        use inputIteration
+        use inputTimeSpectral
+        use iteration
+        use residuals, only: initRes, residual, sourceTerms
+        implicit none
+
+        if (groundLevel == 1) then
+            do Subit = 1, nSubiterations - 1
+
+                ! Execute a DADI step and exchange the externals.
+
+                call executeDADIStep
+
+                ! Compute the residuals for the next stage.
+                call initres(1_intType, nwf)
+                call sourceTerms()
+                call residual
+
+            end do
+
+            ! Set Subit to nSubiterations, for clarity;
+            Subit = nSubiterations
+        end if
+
+        ! Execute the last subiteration.
+        call executeDADIStep
+
+    end subroutine DADISmoother
 
-  end subroutine DADISmoother
+    !      ==================================================================
 
-  !      ==================================================================
+    subroutine executeDADIStep
+        !
+        !       executeDADIStep executes one DADI step.
+        !
+        use blockPointers
+        use constants
+        use flowVarRefState
+        use inputIteration
+        use inputPhysics
+        use inputTimeSpectral
+        use inputUnsteady
+        use iteration
+        use utils, only: getCorrectForK, setPointers
+        use haloExchange, only: whalo1, whalo2
+        use flowUtils, only: computeETotBlock, computeLamViscosity
+        use turbutils, only: computeeddyviscosity
+        use residuals, only: residualAveraging, computeDwDADI
+        use BCRoutines, only: applyAllBC
+        implicit none
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: fiveThird = five * third
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: sps, nn, i, j, k, l
+
+        real(kind=realType) :: unsteadyImpl, mult
+        real(kind=realType) :: dt, currentCfl, gm1, gm53
+        real(kind=realType) :: v2, ovr, dp, factK, ru, rv, rw
 
-  subroutine executeDADIStep
-    !
-    !       executeDADIStep executes one DADI step.
-    !
-    use blockPointers
-    use constants
-    use flowVarRefState
-    use inputIteration
-    use inputPhysics
-    use inputTimeSpectral
-    use inputUnsteady
-    use iteration
-    use utils, only : getCorrectForK, setPointers
-    use haloExchange, only : whalo1, whalo2
-    use flowUtils, only : computeETotBlock, computeLamViscosity
-    use turbutils, only : computeeddyviscosity
-    use residuals, only : residualAveraging, computeDwDADI
-    use BCRoutines, only : applyAllBC
-    implicit none
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: fiveThird = five*third
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: sps, nn, i, j, k, l
+        logical :: secondHalo, smoothResidual, correctForK
 
-    real(kind=realType) :: unsteadyImpl, mult
-    real(kind=realType) :: dt, currentCfl, gm1, gm53
-    real(kind=realType) :: v2, ovr, dp, factK, ru, rv, rw
+        ! Set the value of secondHalo and the current cfl number,
+        ! depending on the situation. On the finest grid in the mg cycle
+        ! the second halo is computed, otherwise not.
 
-    logical :: secondHalo, smoothResidual, correctForK
+        if (currentLevel <= groundLevel) then
+            secondHalo = .true.
+        else
+            secondHalo = .false.
+        end if
 
-    ! Set the value of secondHalo and the current cfl number,
-    ! depending on the situation. On the finest grid in the mg cycle
-    ! the second halo is computed, otherwise not.
+        currentCfl = cflCoarse
+        if (currentLevel == 1) then
+            currentCfl = cfl
+        end if
 
-    if(currentLevel <= groundLevel) then
-       secondHalo = .true.
-    else
-       secondHalo = .false.
-    endif
+        ! Determine whether or not residual averaging must be applied.
 
-    currentCfl = cflCoarse
-    if (currentLevel == 1) then
-       currentCfl = cfl
-    end if
+        if (resAveraging == noResAveraging) then
+            smoothResidual = .false.
+        else if (resAveraging == alwaysResAveraging) then
+            smoothResidual = .true.
+        else if (mod(rkStage, 2_intType) == 1) then
+            smoothResidual = .true.
+        else
+            smoothResidual = .false.
+        end if
 
-    ! Determine whether or not residual averaging must be applied.
+        ! Determine whether or not the total energy must be corrected
+        ! for the presence of the turbulent kinetic energy.
+        correctForK = getCorrectForK()
 
-    if(resAveraging == noResAveraging) then
-       smoothResidual = .false.
-    else if(resAveraging == alwaysResAveraging) then
-       smoothResidual = .true.
-    else if(mod(rkStage,2_intType) == 1) then
-       smoothResidual = .true.
-    else
-       smoothResidual = .false.
-    endif
+        !
+        !       Compute the updates of the conservative variables.
+        !
+        ! Loop over the local number of blocks.
 
-    ! Determine whether or not the total energy must be corrected
-    ! for the presence of the turbulent kinetic energy.
-    correctForK = getCorrectForK()
+        domainsUpdate: do nn = 1, nDom
 
-    !
-    !       Compute the updates of the conservative variables.
-    !
-    ! Loop over the local number of blocks.
+            ! Determine the equation mode solved.
 
-    domainsUpdate: do nn=1,nDom
+            select case (equationMode)
 
-       ! Determine the equation mode solved.
+            case (steady, timeSpectral)
 
-       select case (equationMode)
+                ! Loop over the number of spectral solutions. Note that
+                ! for the steady mode this value is 1.
 
-       case (steady, timeSpectral)
+                spectralSteady: do sps = 1, nTimeIntervalsSpectral
 
-          ! Loop over the number of spectral solutions. Note that
-          ! for the steady mode this value is 1.
+                    ! Set the pointers to this block.
 
-          spectralSteady: do sps=1,nTimeIntervalsSpectral
+                    call setPointers(nn, currentLevel, sps)
 
-             ! Set the pointers to this block.
+                    ! Loop over the owned cells of this block.
 
-             call setPointers(nn, currentLevel, sps)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
 
-             ! Loop over the owned cells of this block.
+                                ! Determine the local time step
 
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
+                                dt = -currentCfl * dtl(i, j, k) * vol(i, j, k)
 
-                      ! Determine the local time step
+                                ! Compute the updates of the flow field variables.
 
-                      dt =-currentCfl*dtl(i,j,k)*vol(i,j,k)
+                                dw(i, j, k, irho) = dw(i, j, k, irho) * dt
+                                dw(i, j, k, imx) = dw(i, j, k, imx) * dt
+                                dw(i, j, k, imy) = dw(i, j, k, imy) * dt
+                                dw(i, j, k, imz) = dw(i, j, k, imz) * dt
+                                dw(i, j, k, irhoE) = dw(i, j, k, irhoE) * dt
 
-                      ! Compute the updates of the flow field variables.
+                            end do
+                        end do
+                    end do
 
-                      dw(i,j,k,irho)  =dw(i,j,k,irho)*dt
-                      dw(i,j,k,imx)   =dw(i,j,k,imx)*dt
-                      dw(i,j,k,imy)   =dw(i,j,k,imy)*dt
-                      dw(i,j,k,imz)   =dw(i,j,k,imz)*dt
-                      dw(i,j,k,irhoE) =dw(i,j,k,irhoE)*dt
+                    call computedwDADI
 
-                   enddo
-                enddo
-             enddo
+                end do spectralSteady
 
-             call computedwDADI
+                !=============================================================
 
-          enddo spectralSteady
+            case (unsteady)
 
-          !=============================================================
+                ! Unsteady equations are solved via the dual time
+                ! stepping technique. The leading term of the time
+                ! integrator must be treated implicitly for stability
+                ! reasons. This leads to a different multiplication
+                ! factor of the residual compared to the steady case.
 
-       case (unsteady)
+                unsteadyImpl = coefTime(0) * timeRef / deltaT
 
-          ! Unsteady equations are solved via the dual time
-          ! stepping technique. The leading term of the time
-          ! integrator must be treated implicitly for stability
-          ! reasons. This leads to a different multiplication
-          ! factor of the residual compared to the steady case.
+                ! Loop over the number of spectral modes, although this is
+                ! always 1 for the unsteady mode. The loop is executed for
+                ! consistency reasons.
 
-          unsteadyImpl = coefTime(0)*timeRef/deltaT
+                spectralUnsteady: do sps = 1, nTimeIntervalsSpectral
 
-          ! Loop over the number of spectral modes, although this is
-          ! always 1 for the unsteady mode. The loop is executed for
-          ! consistency reasons.
+                    ! Set the pointers to this block.
 
-          spectralUnsteady: do sps=1,nTimeIntervalsSpectral
+                    call setPointers(nn, currentLevel, sps)
 
-             ! Set the pointers to this block.
+                    ! Determine the updates of the flow field variables.
+                    ! Owned cells only. The result is stored in dw.
 
-             call setPointers(nn, currentLevel, sps)
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
 
-             ! Determine the updates of the flow field variables.
-             ! Owned cells only. The result is stored in dw.
+                                ! Determine the local time step
 
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
+                                dt = currentCfl * dtl(i, j, k)
+                                mult = dt / (dt * unsteadyImpl * vol(i, j, k) + one)
+                                mult = -mult * vol(i, j, k)
 
-                      ! Determine the local time step
+                                dw(i, j, k, irho) = dw(i, j, k, irho) * mult
+                                dw(i, j, k, imx) = dw(i, j, k, imx) * mult
+                                dw(i, j, k, imy) = dw(i, j, k, imy) * mult
+                                dw(i, j, k, imz) = dw(i, j, k, imz) * mult
+                                dw(i, j, k, irhoE) = dw(i, j, k, irhoE) * mult
 
-                      dt   = currentCfl*dtl(i,j,k)
-                      mult = dt/(dt*unsteadyImpl*vol(i,j,k) + one)
-                      mult = -mult*vol(i,j,k)
+                            end do
+                        end do
+                    end do
 
-                      dw(i,j,k,irho)  = dw(i,j,k,irho)*mult
-                      dw(i,j,k,imx)   = dw(i,j,k,imx)*mult
-                      dw(i,j,k,imy)   = dw(i,j,k,imy)*mult
-                      dw(i,j,k,imz)   = dw(i,j,k,imz)*mult
-                      dw(i,j,k,irhoE) = dw(i,j,k,irhoE)*mult
+                    call computedwDADI
 
-                   enddo
-                enddo
-             enddo
+                end do spectralUnsteady
 
-             call computedwDADI
+            end select
 
-          enddo spectralUnsteady
+        end do domainsUpdate
+        !
+        !       Compute the new state vector.
+        !
+        ! Loop over the number of spectral solutions and local blocks.
 
-       end select
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domainsState: do nn = 1, nDom
 
-    enddo domainsUpdate
-    !
-    !       Compute the new state vector.
-    !
-    ! Loop over the number of spectral solutions and local blocks.
+                ! Set the pointers to this block.
 
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domainsState: do nn=1,nDom
+                call setPointers(nn, currentLevel, sps)
 
-          ! Set the pointers to this block.
+                ! Possibility to smooth the updates.
 
-          call setPointers(nn, currentLevel, sps)
+                if (smoothResidual) call residualAveraging
 
-          ! Possibility to smooth the updates.
+                ! Flow variables.
 
-          if( smoothResidual ) call residualAveraging
+                factK = zero
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
 
-          ! Flow variables.
+                            ! Store gamma -1 and gamma - 5/3 a bit easier.
 
-          factK = zero
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
+                            gm1 = gamma(i, j, k) - one
+                            gm53 = gamma(i, j, k) - fiveThird
 
-                   ! Store gamma -1 and gamma - 5/3 a bit easier.
+                            ! Compute the pressure update from the conservative
+                            ! updates. The expression used below is valid even if
+                            ! cp is not constant. For the calorically perfect case,
+                            ! cp is constant, it can be simplified to the usual
+                            ! expression, but not for variable cp.
 
-                   gm1  = gamma(i,j,k) - one
-                   gm53 = gamma(i,j,k) - fiveThird
+                            ovr = one / w(i, j, k, irho)
+                            v2 = w(i, j, k, ivx)**2 + w(i, j, k, ivy)**2 + w(i, j, k, ivz)**2
+                            if (correctForK) factK = gm53 * w(i, j, k, itu1)
 
-                   ! Compute the pressure update from the conservative
-                   ! updates. The expression used below is valid even if
-                   ! cp is not constant. For the calorically perfect case,
-                   ! cp is constant, it can be simplified to the usual
-                   ! expression, but not for variable cp.
+                            dp = (ovr * p(i, j, k) + factK &
+                                  - gm1 * (ovr * w(i, j, k, irhoE) - v2)) * dw(i, j, k, irho) &
+                                 + gm1 * (dw(i, j, k, irhoE) - w(i, j, k, ivx) * dw(i, j, k, imx) &
+                                          - w(i, j, k, ivy) * dw(i, j, k, imy) &
+                                          - w(i, j, k, ivz) * dw(i, j, k, imz))
 
-                   ovr = one/w(i,j,k,irho)
-                   v2  = w(i,j,k,ivx)**2 + w(i,j,k,ivy)**2 + w(i,j,k,ivz)**2
-                   if( correctForK ) factK = gm53*w(i,j,k,itu1)
+                            ! Compute the velocities.
 
-                   dp = (ovr*p(i,j,k) + factK                             &
-                        -  gm1*(ovr*w(i,j,k,irhoE) - v2))*dw(i,j,k,irho)    &
-                        + gm1*(dw(i,j,k,irhoE) - w(i,j,k,ivx)*dw(i,j,k,imx) &
-                        - w(i,j,k,ivy)*dw(i,j,k,imy) &
-                        - w(i,j,k,ivz)*dw(i,j,k,imz))
+                            ru = w(i, j, k, irho) * w(i, j, k, ivx) - dw(i, j, k, imx)
+                            rv = w(i, j, k, irho) * w(i, j, k, ivy) - dw(i, j, k, imy)
+                            rw = w(i, j, k, irho) * w(i, j, k, ivz) - dw(i, j, k, imz)
 
+                            ! Compute the density. Correct for negative values.
 
-                   ! Compute the velocities.
+                            w(i, j, k, irho) = w(i, j, k, irho) - dw(i, j, k, irho)
+                            w(i, j, k, irho) = max(w(i, j, k, irho), 1.e-4_realType * rhoInf)
 
-                   ru  = w(i,j,k,irho)*w(i,j,k,ivx) - dw(i,j,k,imx)
-                   rv  = w(i,j,k,irho)*w(i,j,k,ivy) - dw(i,j,k,imy)
-                   rw  = w(i,j,k,irho)*w(i,j,k,ivz) - dw(i,j,k,imz)
+                            ovr = one / w(i, j, k, irho)
+                            w(i, j, k, ivx) = ovr * ru
+                            w(i, j, k, ivy) = ovr * rv
+                            w(i, j, k, ivz) = ovr * rw
 
-                   ! Compute the density. Correct for negative values.
+                            ! Compute the pressure. Correct for negative values.
 
-                   w(i,j,k,irho) = w(i,j,k,irho) - dw(i,j,k,irho)
-                   w(i,j,k,irho) = max(w(i,j,k,irho), 1.e-4_realType*rhoInf)
+                            p(i, j, k) = p(i, j, k) - dp
+                            p(i, j, k) = max(p(i, j, k), 1.e-4_realType * pInfCorr)
 
-                   ovr = one/w(i,j,k,irho)
-                   w(i,j,k,ivx) = ovr*ru
-                   w(i,j,k,ivy) = ovr*rv
-                   w(i,j,k,ivz) = ovr*rw
+                        end do
+                    end do
+                end do
 
-                   ! Compute the pressure. Correct for negative values.
+                ! Compute the total energy and possibly the laminar and eddy
+                ! viscosity in the owned cells.
 
-                   p(i,j,k) = p(i,j,k) - dp
-                   p(i,j,k) = max(p(i,j,k), 1.e-4_realType*pInfCorr)
+                call computeEtotBlock(2_intType, il, 2_intType, jl, &
+                                      2_intType, kl, correctForK)
+                call computeLamViscosity(.False.)
+                call computeEddyViscosity(.False.)
 
-                enddo
-             enddo
-          enddo
+            end do domainsState
+        end do spectralLoop
 
-          ! Compute the total energy and possibly the laminar and eddy
-          ! viscosity in the owned cells.
+        ! Exchange the pressure if the pressure must be exchanged early.
+        ! Only the first halo's are needed, thus whalo1 is called.
+        ! Only on the fine grid.
 
-          call computeEtotBlock(2_intType,il, 2_intType,jl, &
-               2_intType,kl, correctForK)
-          call computeLamViscosity(.False.)
-          call computeEddyViscosity(.False.)
+        if (exchangePressureEarly .and. currentLevel <= groundLevel) &
+            call whalo1(currentLevel, 1_intType, 0_intType, .true., &
+                        .false., .false.)
 
-       enddo domainsState
-    enddo spectralLoop
+        ! Apply all boundary conditions to all blocks on this level.
 
-    ! Exchange the pressure if the pressure must be exchanged early.
-    ! Only the first halo's are needed, thus whalo1 is called.
-    ! Only on the fine grid.
+        call applyAllBC(secondHalo)
 
-    if(exchangePressureEarly .and. currentLevel <= groundLevel) &
-         call whalo1(currentLevel, 1_intType, 0_intType, .true.,&
-         .false., .false.)
+        ! Exchange the solution. Either whalo1 or whalo2
+        ! must be called.
 
-    ! Apply all boundary conditions to all blocks on this level.
+        if (secondHalo) then
+            call whalo2(currentLevel, 1_intType, nwf, .true., &
+                        .true., .true.)
+        else
+            call whalo1(currentLevel, 1_intType, nwf, .true., &
+                        .true., .true.)
+        end if
 
-    call applyAllBC(secondHalo)
-
-    ! Exchange the solution. Either whalo1 or whalo2
-    ! must be called.
-
-    if( secondHalo ) then
-       call whalo2(currentLevel, 1_intType, nwf, .true., &
-            .true., .true.)
-    else
-       call whalo1(currentLevel, 1_intType, nwf, .true., &
-            .true., .true.)
-    endif
-
-  end subroutine executeDADIStep
+    end subroutine executeDADIStep
 
 end module smoothers
diff --git a/src/solver/solverUtils.F90 b/src/solver/solverUtils.F90
index 3d78f759b..2a52fb9c2 100644
--- a/src/solver/solverUtils.F90
+++ b/src/solver/solverUtils.F90
@@ -1,4056 +1,4035 @@
 module solverUtils
 contains
 #ifndef USE_TAPENADE
-  subroutine timeStep(onlyRadii)
-    !
-    ! Shell function to call timeStep_block on all blocks
-    !
-    use constants
-    use blockPointers, only : nDom
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use iteration, only : currentLevel
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    logical, intent(in) :: onlyRadii
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: sps, nn
+    subroutine timeStep(onlyRadii)
+        !
+        ! Shell function to call timeStep_block on all blocks
+        !
+        use constants
+        use blockPointers, only: nDom
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: currentLevel
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        logical, intent(in) :: onlyRadii
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: sps, nn
 
-    ! Loop over the number of spectral solutions.
+        ! Loop over the number of spectral solutions.
 
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
 
-       ! Loop over the number of blocks.
+            ! Loop over the number of blocks.
 
-       domains: do nn=1,nDom
+            domains: do nn = 1, nDom
 
-          ! Set the pointers for this block.
+                ! Set the pointers for this block.
 
-          call setPointers(nn, currentLevel, sps)
+                call setPointers(nn, currentLevel, sps)
 
-          call timeStep_Block(onlyRadii)
+                call timeStep_Block(onlyRadii)
 
-       end do domains
+            end do domains
 
-    end do spectralLoop
+        end do spectralLoop
 
-  end subroutine timeStep
+    end subroutine timeStep
 #endif
-  subroutine timeStep_block(onlyRadii)
-    !
-    !       timeStep computes the time step, or more precisely the time
-    !       step divided by the volume per unit CFL, in the owned cells.
-    !       However, for the artificial dissipation schemes, the spectral
-    !       radIi in the halo's are needed. Therefore the loop is taken
-    !       over the the first level of halo cells. The spectral radIi are
-    !       stored and possibly modified for high aspect ratio cells.
-    !
-    use constants
-    use blockPointers, only : ie, je, ke, il, jl, kl, w, p, rlv, rev, &
-         radi, radj, radk, si, sj, sk, sFaceI, sfaceJ, sfaceK, dtl, gamma, vol, &
-         addGridVelocities, sectionID
-    use flowVarRefState, only : timeRef, eddyModel, gammaInf, pInfCorr, &
-         viscous, rhoInf
-    use inputDiscretization, only : adis, dirScaling, radiiNeededCoarse, &
-         radiiNeededFine, precond
-    use inputPhysics, only : equationMode
-    use iteration, only : groundLevel, currentLevel
-    use section, only : sections
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    logical, intent(in) :: onlyRadii
-    !
-    !      Local parameters.
-    !
-    real(kind=realType), parameter :: b = 2.0_realType
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii
-
-    real(kind=realType) :: plim, rlim, clim2
-    real(kind=realType) :: uux, uuy, uuz, cc2, qsi, qsj, qsk, sx, sy, sz, rmu
-    real(kind=realType) :: ri, rj, rk, rij, rjk, rki
-    real(kind=realType) :: vsi, vsj, vsk, rfl, dpi, dpj, dpk
-    real(kind=realType) :: sFace, tmp
-
-    logical :: radiiNeeded, doScaling
-
-    ! Determine whether or not the spectral radii are needed for the
-    ! flux computation.
-
-    radiiNeeded = radiiNeededCoarse
-    if(currentLevel <= groundLevel) radiiNeeded = radiiNeededFine
-
-    ! Return immediately if only the spectral radii must be computed
-    ! and these are not needed for the flux computation.
-
-    if(onlyRadii .and. (.not. radiiNeeded)) return
-
-    ! Set the value of plim. To be fully consistent this must have
-    ! the dimension of a pressure. Therefore a fraction of pInfCorr
-    ! is used. Idem for rlim; compute clim2 as well.
-
-    plim  = 0.001_realType*pInfCorr
-    rlim  = 0.001_realType*rhoInf
-    clim2 = 0.000001_realType*gammaInf*pInfCorr/rhoInf
-
-    doScaling = (dirScaling .and. currentLevel <= groundLevel)
-
-    ! Initialize sFace to zero. This value will be used if the
-    ! block is not moving.
-
-    sFace = zero
-    !
-    !           Inviscid contribution, depending on the preconditioner.
-    !           Compute the cell centered values of the spectral radii.
-    !
-    select case (precond)
-
-    case (noPrecond)
-
-       ! No preconditioner. Simply the standard spectral radius.
-       ! Loop over the cells, including the first level halo.
+    subroutine timeStep_block(onlyRadii)
+        !
+        !       timeStep computes the time step, or more precisely the time
+        !       step divided by the volume per unit CFL, in the owned cells.
+        !       However, for the artificial dissipation schemes, the spectral
+        !       radIi in the halo's are needed. Therefore the loop is taken
+        !       over the the first level of halo cells. The spectral radIi are
+        !       stored and possibly modified for high aspect ratio cells.
+        !
+        use constants
+        use blockPointers, only: ie, je, ke, il, jl, kl, w, p, rlv, rev, &
+                                 radi, radj, radk, si, sj, sk, sFaceI, sfaceJ, sfaceK, dtl, gamma, vol, &
+                                 addGridVelocities, sectionID
+        use flowVarRefState, only: timeRef, eddyModel, gammaInf, pInfCorr, &
+                                   viscous, rhoInf
+        use inputDiscretization, only: adis, dirScaling, radiiNeededCoarse, &
+                                       radiiNeededFine, precond
+        use inputPhysics, only: equationMode
+        use iteration, only: groundLevel, currentLevel
+        use section, only: sections
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        logical, intent(in) :: onlyRadii
+        !
+        !      Local parameters.
+        !
+        real(kind=realType), parameter :: b = 2.0_realType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii
+
+        real(kind=realType) :: plim, rlim, clim2
+        real(kind=realType) :: uux, uuy, uuz, cc2, qsi, qsj, qsk, sx, sy, sz, rmu
+        real(kind=realType) :: ri, rj, rk, rij, rjk, rki
+        real(kind=realType) :: vsi, vsj, vsk, rfl, dpi, dpj, dpk
+        real(kind=realType) :: sFace, tmp
+
+        logical :: radiiNeeded, doScaling
+
+        ! Determine whether or not the spectral radii are needed for the
+        ! flux computation.
+
+        radiiNeeded = radiiNeededCoarse
+        if (currentLevel <= groundLevel) radiiNeeded = radiiNeededFine
+
+        ! Return immediately if only the spectral radii must be computed
+        ! and these are not needed for the flux computation.
+
+        if (onlyRadii .and. (.not. radiiNeeded)) return
+
+        ! Set the value of plim. To be fully consistent this must have
+        ! the dimension of a pressure. Therefore a fraction of pInfCorr
+        ! is used. Idem for rlim; compute clim2 as well.
+
+        plim = 0.001_realType * pInfCorr
+        rlim = 0.001_realType * rhoInf
+        clim2 = 0.000001_realType * gammaInf * pInfCorr / rhoInf
+
+        doScaling = (dirScaling .and. currentLevel <= groundLevel)
+
+        ! Initialize sFace to zero. This value will be used if the
+        ! block is not moving.
+
+        sFace = zero
+        !
+        !           Inviscid contribution, depending on the preconditioner.
+        !           Compute the cell centered values of the spectral radii.
+        !
+        select case (precond)
+
+        case (noPrecond)
+
+            ! No preconditioner. Simply the standard spectral radius.
+            ! Loop over the cells, including the first level halo.
 
 #ifdef TAPENADE_REVERSE
-       !$AD II-LOOP
-       do ii=0,ie*je*ke-1
-          i = mod(ii, ie) + 1
-          j = mod(ii/ie, je) + 1
-          k = ii/(ie*je) + 1
+            !$AD II-LOOP
+            do ii = 0, ie * je * ke - 1
+                i = mod(ii, ie) + 1
+                j = mod(ii / ie, je) + 1
+                k = ii / (ie * je) + 1
 #else
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
 #endif
-                   ! Compute the velocities and speed of sound squared.
+                            ! Compute the velocities and speed of sound squared.
 
-                   uux  = w(i,j,k,ivx)
-                   uuy  = w(i,j,k,ivy)
-                   uuz  = w(i,j,k,ivz)
-                   cc2 = gamma(i,j,k)*p(i,j,k)/w(i,j,k,irho)
-                   cc2 = max(cc2,clim2)
+                            uux = w(i, j, k, ivx)
+                            uuy = w(i, j, k, ivy)
+                            uuz = w(i, j, k, ivz)
+                            cc2 = gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho)
+                            cc2 = max(cc2, clim2)
 
-                   ! Set the dot product of the grid velocity and the
-                   ! normal in i-direction for a moving face. To avoid
-                   ! a number of multiplications by 0.5 simply the sum
-                   ! is taken.
+                            ! Set the dot product of the grid velocity and the
+                            ! normal in i-direction for a moving face. To avoid
+                            ! a number of multiplications by 0.5 simply the sum
+                            ! is taken.
 
-                   if( addGridVelocities ) &
-                        sFace = sFaceI(i-1,j,k) + sFaceI(i,j,k)
+                            if (addGridVelocities) &
+                                sFace = sFaceI(i - 1, j, k) + sFaceI(i, j, k)
 
-                   ! Spectral radius in i-direction.
+                            ! Spectral radius in i-direction.
 
-                   sx = si(i-1,j,k,1) + si(i,j,k,1)
-                   sy = si(i-1,j,k,2) + si(i,j,k,2)
-                   sz = si(i-1,j,k,3) + si(i,j,k,3)
+                            sx = si(i - 1, j, k, 1) + si(i, j, k, 1)
+                            sy = si(i - 1, j, k, 2) + si(i, j, k, 2)
+                            sz = si(i - 1, j, k, 3) + si(i, j, k, 3)
 
-                   qsi = uux*sx + uuy*sy + uuz*sz - sFace
+                            qsi = uux * sx + uuy * sy + uuz * sz - sFace
 
-                   ri = half*(abs(qsi) &
-                        +       sqrt(cc2*(sx**2 + sy**2 + sz**2)))
+                            ri = half * (abs(qsi) &
+                                         + sqrt(cc2 * (sx**2 + sy**2 + sz**2)))
 
-                   ! The grid velocity in j-direction.
+                            ! The grid velocity in j-direction.
 
-                   if( addGridVelocities ) &
-                        sFace = sFaceJ(i,j-1,k) + sFaceJ(i,j,k)
+                            if (addGridVelocities) &
+                                sFace = sFaceJ(i, j - 1, k) + sFaceJ(i, j, k)
 
-                   ! Spectral radius in j-direction.
+                            ! Spectral radius in j-direction.
 
-                   sx = sj(i,j-1,k,1) + sj(i,j,k,1)
-                   sy = sj(i,j-1,k,2) + sj(i,j,k,2)
-                   sz = sj(i,j-1,k,3) + sj(i,j,k,3)
+                            sx = sj(i, j - 1, k, 1) + sj(i, j, k, 1)
+                            sy = sj(i, j - 1, k, 2) + sj(i, j, k, 2)
+                            sz = sj(i, j - 1, k, 3) + sj(i, j, k, 3)
 
-                   qsj = uux*sx + uuy*sy + uuz*sz - sFace
+                            qsj = uux * sx + uuy * sy + uuz * sz - sFace
 
-                   rj = half*(abs(qsj) &
-                        +       sqrt(cc2*(sx**2 + sy**2 + sz**2)))
+                            rj = half * (abs(qsj) &
+                                         + sqrt(cc2 * (sx**2 + sy**2 + sz**2)))
 
-                   ! The grid velocity in k-direction.
+                            ! The grid velocity in k-direction.
 
-                   if( addGridVelocities ) &
-                        sFace = sFaceK(i,j,k-1) + sFaceK(i,j,k)
+                            if (addGridVelocities) &
+                                sFace = sFaceK(i, j, k - 1) + sFaceK(i, j, k)
 
-                   ! Spectral radius in k-direction.
+                            ! Spectral radius in k-direction.
 
-                   sx = sk(i,j,k-1,1) + sk(i,j,k,1)
-                   sy = sk(i,j,k-1,2) + sk(i,j,k,2)
-                   sz = sk(i,j,k-1,3) + sk(i,j,k,3)
+                            sx = sk(i, j, k - 1, 1) + sk(i, j, k, 1)
+                            sy = sk(i, j, k - 1, 2) + sk(i, j, k, 2)
+                            sz = sk(i, j, k - 1, 3) + sk(i, j, k, 3)
 
-                   qsk = uux*sx + uuy*sy + uuz*sz - sFace
+                            qsk = uux * sx + uuy * sy + uuz * sz - sFace
 
-                   rk = half*(abs(qsk) &
-                        +       sqrt(cc2*(sx**2 + sy**2 + sz**2)))
+                            rk = half * (abs(qsk) &
+                                         + sqrt(cc2 * (sx**2 + sy**2 + sz**2)))
 
-                   ! Compute the inviscid contribution to the time step.
+                            ! Compute the inviscid contribution to the time step.
 
-                   if (.not. onlyRadii) dtl(i,j,k) = ri + rj + rk
+                            if (.not. onlyRadii) dtl(i, j, k) = ri + rj + rk
 
-                   !
-                   !           Adapt the spectral radii if directional scaling must be
-                   !           applied.
-                   !
-                   if(doScaling) then
+                            !
+                            !           Adapt the spectral radii if directional scaling must be
+                            !           applied.
+                            !
+                            if (doScaling) then
 
-                      ! Avoid division by zero by clipping radi, radJ and
-                      ! radK.
+                                ! Avoid division by zero by clipping radi, radJ and
+                                ! radK.
 
-                      ri = max(ri, eps)
-                      rj = max(rj, eps)
-                      rk = max(rk, eps)
+                                ri = max(ri, eps)
+                                rj = max(rj, eps)
+                                rk = max(rk, eps)
 
-                      ! Compute the scaling in the three coordinate
-                      ! directions.
+                                ! Compute the scaling in the three coordinate
+                                ! directions.
 
-                      rij = (ri/rj)**adis
-                      rjk = (rj/rk)**adis
-                      rki = (rk/ri)**adis
+                                rij = (ri / rj)**adis
+                                rjk = (rj / rk)**adis
+                                rki = (rk / ri)**adis
 
-                      ! Create the scaled versions of the aspect ratios.
-                      ! Note that the multiplication is done with radi, radJ
-                      ! and radK, such that the influence of the clipping
-                      ! is negligible.
+                                ! Create the scaled versions of the aspect ratios.
+                                ! Note that the multiplication is done with radi, radJ
+                                ! and radK, such that the influence of the clipping
+                                ! is negligible.
 
-                      radi(i,j,k) = ri*(one + one/rij + rki)
-                      radJ(i,j,k) = rj*(one + one/rjk + rij)
-                      radK(i,j,k) = rk*(one + one/rki + rjk)
-                   else
-                      radi(i,j,k) = ri
-                      radj(i,j,k) = rj
-                      radk(i,j,k) = rk
-                   end if
+                                radi(i, j, k) = ri * (one + one / rij + rki)
+                                radJ(i, j, k) = rj * (one + one / rjk + rij)
+                                radK(i, j, k) = rk * (one + one / rki + rjk)
+                            else
+                                radi(i, j, k) = ri
+                                radj(i, j, k) = rj
+                                radk(i, j, k) = rk
+                            end if
 #ifdef TAPENADE_REVERSE
-                end do
+                        end do
 #else
-             enddo
-          enddo
-       enddo
+                    end do
+                end do
+            end do
 #endif
 
-    case (Turkel)
-       call terminate("timeStep","Turkel preconditioner not implemented yet")
-
+        case (Turkel)
+            call terminate("timeStep", "Turkel preconditioner not implemented yet")
 
-    case (ChoiMerkle)
-       call terminate("timeStep", &
-            "choi merkle preconditioner not implemented yet")
-    end select
+        case (ChoiMerkle)
+            call terminate("timeStep", &
+                           "choi merkle preconditioner not implemented yet")
+        end select
 
+        ! The rest of this file can be skipped if only the spectral
+        ! radii need to be computed.
+        testRadiiOnly: if (.not. onlyRadii) then
 
-    ! The rest of this file can be skipped if only the spectral
-    ! radii need to be computed.
-    testRadiiOnly: if(.not. onlyRadii) then
+            ! The viscous contribution, if needed.
 
-       ! The viscous contribution, if needed.
+            viscousTerm: if (viscous) then
 
-       viscousTerm: if( viscous ) then
+                ! Loop over the owned cell centers.
 
-          ! Loop over the owned cell centers.
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
 
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
+                            ! Compute the effective viscosity coefficient. The
+                            ! factor 0.5 is a combination of two things. In the
+                            ! standard central discretization of a second
+                            ! derivative there is a factor 2 multiplying the
+                            ! central node. However in the code below not the
+                            ! average but the sum of the left and the right face
+                            ! is taken and squared. This leads to a factor 4.
+                            ! Combining both effects leads to 0.5. Furthermore,
+                            ! it is divided by the volume and density to obtain
+                            ! the correct dimensions and multiplied by the
+                            ! non-dimensional factor factVis.
 
-                   ! Compute the effective viscosity coefficient. The
-                   ! factor 0.5 is a combination of two things. In the
-                   ! standard central discretization of a second
-                   ! derivative there is a factor 2 multiplying the
-                   ! central node. However in the code below not the
-                   ! average but the sum of the left and the right face
-                   ! is taken and squared. This leads to a factor 4.
-                   ! Combining both effects leads to 0.5. Furthermore,
-                   ! it is divided by the volume and density to obtain
-                   ! the correct dimensions and multiplied by the
-                   ! non-dimensional factor factVis.
+                            rmu = rlv(i, j, k)
+                            if (eddyModel) rmu = rmu + rev(i, j, k)
+                            rmu = half * rmu / (w(i, j, k, irho) * vol(i, j, k))
 
-                   rmu = rlv(i,j,k)
-                   if( eddyModel ) rmu = rmu + rev(i,j,k)
-                   rmu = half*rmu/(w(i,j,k,irho)*vol(i,j,k))
+                            ! Add the viscous contribution in i-direction to the
+                            ! (inverse) of the time step.
 
-                   ! Add the viscous contribution in i-direction to the
-                   ! (inverse) of the time step.
+                            sx = si(i, j, k, 1) + si(i - 1, j, k, 1)
+                            sy = si(i, j, k, 2) + si(i - 1, j, k, 2)
+                            sz = si(i, j, k, 3) + si(i - 1, j, k, 3)
 
-                   sx = si(i,j,k,1) + si(i-1,j,k,1)
-                   sy = si(i,j,k,2) + si(i-1,j,k,2)
-                   sz = si(i,j,k,3) + si(i-1,j,k,3)
+                            vsi = rmu * (sx * sx + sy * sy + sz * sz)
+                            dtl(i, j, k) = dtl(i, j, k) + vsi
 
-                   vsi        = rmu*(sx*sx + sy*sy + sz*sz)
-                   dtl(i,j,k) = dtl(i,j,k) + vsi
+                            ! Add the viscous contribution in j-direction to the
+                            ! (inverse) of the time step.
 
-                   ! Add the viscous contribution in j-direction to the
-                   ! (inverse) of the time step.
+                            sx = sj(i, j, k, 1) + sj(i, j - 1, k, 1)
+                            sy = sj(i, j, k, 2) + sj(i, j - 1, k, 2)
+                            sz = sj(i, j, k, 3) + sj(i, j - 1, k, 3)
 
-                   sx = sj(i,j,k,1) + sj(i,j-1,k,1)
-                   sy = sj(i,j,k,2) + sj(i,j-1,k,2)
-                   sz = sj(i,j,k,3) + sj(i,j-1,k,3)
+                            vsj = rmu * (sx * sx + sy * sy + sz * sz)
+                            dtl(i, j, k) = dtl(i, j, k) + vsj
 
-                   vsj        = rmu*(sx*sx + sy*sy + sz*sz)
-                   dtl(i,j,k) = dtl(i,j,k) + vsj
+                            ! Add the viscous contribution in k-direction to the
+                            ! (inverse) of the time step.
 
-                   ! Add the viscous contribution in k-direction to the
-                   ! (inverse) of the time step.
+                            sx = sk(i, j, k, 1) + sk(i, j, k - 1, 1)
+                            sy = sk(i, j, k, 2) + sk(i, j, k - 1, 2)
+                            sz = sk(i, j, k, 3) + sk(i, j, k - 1, 3)
 
-                   sx = sk(i,j,k,1) + sk(i,j,k-1,1)
-                   sy = sk(i,j,k,2) + sk(i,j,k-1,2)
-                   sz = sk(i,j,k,3) + sk(i,j,k-1,3)
+                            vsk = rmu * (sx * sx + sy * sy + sz * sz)
+                            dtl(i, j, k) = dtl(i, j, k) + vsk
 
-                   vsk        = rmu*(sx*sx + sy*sy + sz*sz)
-                   dtl(i,j,k) = dtl(i,j,k) + vsk
+                        end do
+                    end do
+                end do
 
-                enddo
-             enddo
-          enddo
+            end if viscousTerm
 
-       endif viscousTerm
+            ! For the spectral mode an additional term term must be
+            ! taken into account, which corresponds to the contribution
+            ! of the highest frequency.
 
-       ! For the spectral mode an additional term term must be
-       ! taken into account, which corresponds to the contribution
-       ! of the highest frequency.
+            if (equationMode == timeSpectral) then
 
-       if(equationMode == timeSpectral) then
+                tmp = nTimeIntervalsSpectral * pi * timeRef &
+                      / sections(sectionID)%timePeriod
 
-          tmp = nTimeIntervalsSpectral*pi*timeRef &
-               / sections(sectionID)%timePeriod
+                ! Loop over the owned cell centers and add the term.
 
-          ! Loop over the owned cell centers and add the term.
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            dtl(i, j, k) = dtl(i, j, k) + tmp * vol(i, j, k)
+                        end do
+                    end do
+                end do
 
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   dtl(i,j,k) = dtl(i,j,k) + tmp*vol(i,j,k)
-                enddo
-             enddo
-          enddo
+            end if
+
+            ! Currently the inverse of dt/vol is stored in dtl. Invert
+            ! this value such that the time step per unit cfl number is
+            ! stored and correct in cases of high gradients.
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        dpi = abs(p(i + 1, j, k) - two * p(i, j, k) + p(i - 1, j, k)) &
+                              / (p(i + 1, j, k) + two * p(i, j, k) + p(i - 1, j, k) + plim)
+                        dpj = abs(p(i, j + 1, k) - two * p(i, j, k) + p(i, j - 1, k)) &
+                              / (p(i, j + 1, k) + two * p(i, j, k) + p(i, j - 1, k) + plim)
+                        dpk = abs(p(i, j, k + 1) - two * p(i, j, k) + p(i, j, k - 1)) &
+                              / (p(i, j, k + 1) + two * p(i, j, k) + p(i, j, k - 1) + plim)
+                        rfl = one / (one + b * (dpi + dpj + dpk))
+
+                        dtl(i, j, k) = rfl / dtl(i, j, k)
+                    end do
+                end do
+            end do
 
-       endif
+        end if testRadiiOnly
 
-       ! Currently the inverse of dt/vol is stored in dtl. Invert
-       ! this value such that the time step per unit cfl number is
-       ! stored and correct in cases of high gradients.
+    end subroutine timeStep_block
 
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-                dpi = abs(p(i+1,j,k) - two*p(i,j,k) + p(i-1,j,k)) &
-                     /    (p(i+1,j,k) + two*p(i,j,k) + p(i-1,j,k) + plim)
-                dpj = abs(p(i,j+1,k) - two*p(i,j,k) + p(i,j-1,k)) &
-                     /    (p(i,j+1,k) + two*p(i,j,k) + p(i,j-1,k) + plim)
-                dpk = abs(p(i,j,k+1) - two*p(i,j,k) + p(i,j,k-1)) &
-                     /    (p(i,j,k+1) + two*p(i,j,k) + p(i,j,k-1) + plim)
-                rfl = one/(one + b*(dpi  +dpj  +dpk))
+#ifndef USE_TAPENADE
+    subroutine gridVelocitiesFineLevel(useOldCoor, t, sps)
+        !
+        ! Shell function to call gridVelocitiesFineLevel on all blocks
+        !
+        use blockPointers
+        use constants
+        use inputTimeSpectral
+        use iteration
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        logical, intent(in) :: useOldCoor
+        real(kind=realType), dimension(*), intent(in) :: t  !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        ! Loop over the number of blocks.
+
+        domains: do nn = 1, nDom
+
+            ! Set the pointers for this block.
+
+            call setPointers(nn, groundLevel, sps)
+            if (.NOT. useTSInterpolatedGridVelocity) then
+                call gridVelocitiesFineLevel_block(useOldCoor, t, sps, nn)
+            else
+                call gridVelocitiesFineLevel_TS_block(nn, sps)
+            end if
+
+        end do domains
+
+    end subroutine gridVelocitiesFineLevel
+#endif
+#ifndef USE_TAPENADE
+    subroutine gridVelocitiesFineLevel_TS_block(nn, sps)
+
+        use precision
+        use constants
+        use blockPointers
+        use inputPhysics, only: machgrid, velDirFreestream
+        use flowVarRefState, only: gammaInf, pInf, rhoInf
+        use inputTimeSpectral, only: dscalar, nTimeIntervalsSpectral
+
+        integer(kind=intType), intent(in) :: nn, sps
+        integer :: i, j, k, mm, ii, ie_l, je_l, ke_l
+        real(kind=realType) :: x_vc, y_vc, z_vc
+        real(kind=realType) :: x_fc, y_fc, z_fc
+        real(kind=realType) :: aInf
+        real(kind=realType) :: velxFreestream, velyFreestream, velzFreestream
+
+        ! get the grid free stream velocity
+        aInf = sqrt(gammaInf * pInf / rhoInf)
+        velxFreestream = (aInf * machgrid) * (-velDirFreestream(1))
+        velyFreestream = (aInf * machgrid) * (-velDirFreestream(2))
+        velzFreestream = (aInf * machgrid) * (-velDirFreestream(3))
+
+        ! Grid velocities of the cell centers, including the
+        ! 1st level halo cells.
+
+        ! Initialize with free stream velocity
+        ie_l = flowDoms(nn, 1, sps)%ie
+        je_l = flowDoms(nn, 1, sps)%je
+        ke_l = flowDoms(nn, 1, sps)%ke
+
+        do k = 1, ke_l
+            do j = 1, je_l
+                do i = 1, ie_l
+
+                    s(i, j, k, 1) = velxFreestream
+                    s(i, j, k, 2) = velyFreestream
+                    s(i, j, k, 3) = velzFreestream
 
-                dtl(i,j,k) = rfl/dtl(i,j,k)
-             enddo
-          enddo
-       enddo
+                end do
+            end do
+        end do
 
-    endif testRadiiOnly
+        ! The velocity contributed from mesh deformation
+        do mm = 1, nTimeIntervalsSpectral
 
-  end subroutine timeStep_block
+            ie_l = flowDoms(nn, 1, mm)%ie
+            je_l = flowDoms(nn, 1, mm)%je
+            ke_l = flowDoms(nn, 1, mm)%ke
 
-#ifndef USE_TAPENADE
-  subroutine gridVelocitiesFineLevel(useOldCoor, t, sps)
-    !
-    ! Shell function to call gridVelocitiesFineLevel on all blocks
-    !
-    use blockPointers
-    use constants
-    use inputTimeSpectral
-    use iteration
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps
-    logical,               intent(in) :: useOldCoor
-    real(kind=realType), dimension(*), intent(in) :: t  !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
+            do k = 1, ke_l
+                do j = 1, je_l
+                    do i = 1, ie_l
 
-    ! Loop over the number of blocks.
+            x_vc = eighth * (flowDoms(nn, 1, mm)%x(i - 1, j - 1, k - 1, 1) + flowDoms(nn, 1, mm)%x(i, j - 1, k - 1, 1) &
+                                   + flowDoms(nn, 1, mm)%x(i - 1, j, k - 1, 1) + flowDoms(nn, 1, mm)%x(i, j, k - 1, 1) &
+                                   + flowDoms(nn, 1, mm)%x(i - 1, j - 1, k, 1) + flowDoms(nn, 1, mm)%x(i, j - 1, k, 1) &
+                                         + flowDoms(nn, 1, mm)%x(i - 1, j, k, 1) + flowDoms(nn, 1, mm)%x(i, j, k, 1))
 
-    domains: do nn=1,nDom
+            y_vc = eighth * (flowDoms(nn, 1, mm)%x(i - 1, j - 1, k - 1, 2) + flowDoms(nn, 1, mm)%x(i, j - 1, k - 1, 2) &
+                                   + flowDoms(nn, 1, mm)%x(i - 1, j, k - 1, 2) + flowDoms(nn, 1, mm)%x(i, j, k - 1, 2) &
+                                   + flowDoms(nn, 1, mm)%x(i - 1, j - 1, k, 2) + flowDoms(nn, 1, mm)%x(i, j - 1, k, 2) &
+                                         + flowDoms(nn, 1, mm)%x(i - 1, j, k, 2) + flowDoms(nn, 1, mm)%x(i, j, k, 2))
 
-       ! Set the pointers for this block.
+            z_vc = eighth * (flowDoms(nn, 1, mm)%x(i - 1, j - 1, k - 1, 3) + flowDoms(nn, 1, mm)%x(i, j - 1, k - 1, 3) &
+                                   + flowDoms(nn, 1, mm)%x(i - 1, j, k - 1, 3) + flowDoms(nn, 1, mm)%x(i, j, k - 1, 3) &
+                                   + flowDoms(nn, 1, mm)%x(i - 1, j - 1, k, 3) + flowDoms(nn, 1, mm)%x(i, j - 1, k, 3) &
+                                         + flowDoms(nn, 1, mm)%x(i - 1, j, k, 3) + flowDoms(nn, 1, mm)%x(i, j, k, 3))
 
-       call setPointers(nn, groundLevel, sps)
-       if (.NOT. useTSInterpolatedGridVelocity) then
-         call gridVelocitiesFineLevel_block(useOldCoor, t, sps, nn)
-       else
-         call gridVelocitiesFineLevel_TS_block(nn, sps)
-       end if
+                        s(i, j, k, 1) = s(i, j, k, 1) + dscalar(1, sps, mm) * x_vc
+                        s(i, j, k, 2) = s(i, j, k, 2) + dscalar(1, sps, mm) * y_vc
+                        s(i, j, k, 3) = s(i, j, k, 3) + dscalar(1, sps, mm) * z_vc
 
-    end do domains
+                    end do
+                end do
+            end do
 
-  end subroutine gridVelocitiesFineLevel
-#endif
-#ifndef USE_TAPENADE
-  subroutine gridVelocitiesFineLevel_TS_block(nn, sps)
-
-    use precision
-    use constants
-    use blockPointers
-    use inputPhysics, only: machgrid, velDirFreestream
-    use flowVarRefState, only: gammaInf, pInf, rhoInf
-    use inputTimeSpectral, only: dscalar, nTimeIntervalsSpectral
-
-    integer(kind=intType), intent(in) :: nn, sps
-    integer :: i, j, k, mm, ii, ie_l, je_l, ke_l
-    real(kind=realType) :: x_vc, y_vc, z_vc
-    real(kind=realType) :: x_fc, y_fc, z_fc
-    real(kind=realType) :: aInf
-    real(kind=realType) :: velxFreestream, velyFreestream, velzFreestream
-     
-    ! get the grid free stream velocity
-    aInf = sqrt(gammaInf*pInf/rhoInf)
-    velxFreestream = (aInf*machgrid)*(-velDirFreestream(1))
-    velyFreestream = (aInf*machgrid)*(-velDirFreestream(2))
-    velzFreestream = (aInf*machgrid)*(-velDirFreestream(3))
-
-    ! Grid velocities of the cell centers, including the
-    ! 1st level halo cells.
-
-    ! Initialize with free stream velocity
-    ie_l = flowDoms(nn, 1, sps)%ie
-    je_l = flowDoms(nn, 1, sps)%je
-    ke_l = flowDoms(nn, 1, sps)%ke  
+        end do
 
-    do k=1, ke_l
-       do j=1, je_l
-          do i=1, ie_l
+        ! Normal grid velocities of the faces.
 
-             s(i, j, k, 1) = velxFreestream
-             s(i, j, k, 2) = velyFreestream             
-             s(i, j, k, 3) = velzFreestream
+        ! sFaceI=dot(sI, v)
+        ! =dot(sI, v_freestream + v_meshmotion)
+        ! =dot(sI, v_freestream) + dot(sI, v_meshmotion)
 
-          end do
-       end do
-    end do
+        ! sFaceJ, sFaceK follow the same rule.
 
-    ! The velocity contributed from mesh deformation 
-    do mm=1, nTimeIntervalsSpectral
+        ! dot(sI, v_freestream)
+        ie_l = flowDoms(nn, 1, sps)%ie
+        je_l = flowDoms(nn, 1, sps)%je
+        ke_l = flowDoms(nn, 1, sps)%ke
 
-       ie_l = flowDoms(nn, 1, mm)%ie
-       je_l = flowDoms(nn, 1, mm)%je
-       ke_l = flowDoms(nn, 1, mm)%ke
-       
-       do k=1,ke_l
-          do j=1,je_l
-             do i=1,ie_l
+        ! i
+        do k = 1, ke_l
+            do j = 1, je_l
+                do i = 0, ie_l
 
-                x_vc = eighth*(flowDoms(nn, 1, mm)%x(i-1,j-1,k-1,1) + flowDoms(nn, 1, mm)%x(i,j-1,k-1,1) &
-                     +         flowDoms(nn, 1, mm)%x(i-1,j,  k-1,1) + flowDoms(nn, 1, mm)%x(i,j,  k-1,1) &
-                     +         flowDoms(nn, 1, mm)%x(i-1,j-1,k,  1) + flowDoms(nn, 1, mm)%x(i,j-1,k,  1) &
-                     +         flowDoms(nn, 1, mm)%x(i-1,j,  k,  1) + flowDoms(nn, 1, mm)%x(i,j,  k,  1))
+                    sFaceI(i, j, k) = velxFreestream * sI(i, j, k, 1) + velyFreestream * sI(i, j, k, 2) &
+                                      + velzFreestream * sI(i, j, k, 3)
 
-                y_vc = eighth*(flowDoms(nn, 1, mm)%x(i-1,j-1,k-1,2) + flowDoms(nn, 1, mm)%x(i,j-1,k-1,2) &
-                     +         flowDoms(nn, 1, mm)%x(i-1,j,  k-1,2) + flowDoms(nn, 1, mm)%x(i,j,  k-1,2) &
-                     +         flowDoms(nn, 1, mm)%x(i-1,j-1,k,  2) + flowDoms(nn, 1, mm)%x(i,j-1,k,  2) &
-                     +         flowDoms(nn, 1, mm)%x(i-1,j,  k,  2) + flowDoms(nn, 1, mm)%x(i,j,  k,  2))
+                end do
+            end do
+        end do
 
-                z_vc = eighth*(flowDoms(nn, 1, mm)%x(i-1,j-1,k-1,3) + flowDoms(nn, 1, mm)%x(i,j-1,k-1,3) &
-                     +         flowDoms(nn, 1, mm)%x(i-1,j,  k-1,3) + flowDoms(nn, 1, mm)%x(i,j,  k-1,3) &
-                     +         flowDoms(nn, 1, mm)%x(i-1,j-1,k,  3) + flowDoms(nn, 1, mm)%x(i,j-1,k,  3) &
-                     +         flowDoms(nn, 1, mm)%x(i-1,j,  k,  3) + flowDoms(nn, 1, mm)%x(i,j,  k,  3))
-
-
-                s(i,j,k,1) = s(i,j,k,1) + dscalar(1, sps, mm)*x_vc
-                s(i,j,k,2) = s(i,j,k,2) + dscalar(1, sps, mm)*y_vc
-                s(i,j,k,3) = s(i,j,k,3) + dscalar(1, sps, mm)*z_vc
-
-             end do
-          end do
-       end do
-
-    end do
-     
-    ! Normal grid velocities of the faces.
-
-    ! sFaceI=dot(sI, v)
-    ! =dot(sI, v_freestream + v_meshmotion)
-    ! =dot(sI, v_freestream) + dot(sI, v_meshmotion)
-
-    ! sFaceJ, sFaceK follow the same rule.
-
-    ! dot(sI, v_freestream)
-    ie_l = flowDoms(nn, 1, sps)%ie
-    je_l = flowDoms(nn, 1, sps)%je
-    ke_l = flowDoms(nn, 1, sps)%ke  
-
-
-    ! i
-    do k=1, ke_l
-       do j=1, je_l
-          do i=0, ie_l
-
-             sFaceI(i, j, k) = velxFreestream*sI(i, j, k, 1) + velyFreestream*sI(i, j, k, 2) &
-                             + velzFreestream*sI(i, j, k, 3)
-
-
-          end do
-       end do
-    end do
-
-    ! j
-    do k=1, ke_l
-       do j=0, je_l
-          do i=1, ie_l
-
-             sFaceJ(i, j, k) = velxFreestream*sJ(i, j, k, 1) + velyFreestream*sJ(i, j, k, 2) &
-                             + velzFreestream*sJ(i, j, k, 3)
-
-
-          end do
-       end do
-    end do
-
-    ! k
-    do k=0, ke_l
-       do j=1, je_l
-          do i=1, ie_l
-
-             sFaceK(i, j, k) = velxFreestream*sK(i, j, k, 1) + velyFreestream*sK(i, j, k, 2) &
-                             + velzFreestream*sK(i, j, k, 3)
-
-
-          end do
-       end do
-    end do
-
-
-    !  dot(sI, v_meshmotion)
-
-    do mm=1,nTimeIntervalsSpectral
-
-       ie_l = flowDoms(nn, 1, mm)%ie
-       je_l = flowDoms(nn, 1, mm)%je
-       ke_l = flowDoms(nn, 1, mm)%ke
-
-
-       ! i
-       do k=1, ke_l
-          do j=1, je_l
-             do i=0, ie_l
-   
-                x_fc = fourth*(flowDoms(nn, 1, mm)%x(  i,j-1,k-1, 1) + flowDoms(nn, 1, mm)%x(  i,  j,  k, 1)&
-                             + flowDoms(nn, 1, mm)%x(  i,j-1,  k, 1) + flowDoms(nn, 1, mm)%x(  i,  j,k-1, 1))
-                y_fc = fourth*(flowDoms(nn, 1, mm)%x(  i,j-1,k-1, 2) + flowDoms(nn, 1, mm)%x(  i,  j,  k, 2)&
-                             + flowDoms(nn, 1, mm)%x(  i,j-1,  k, 2) + flowDoms(nn, 1, mm)%x(  i,  j,k-1, 2))
-                z_fc = fourth*(flowDoms(nn, 1, mm)%x(  i,j-1,k-1, 3) + flowDoms(nn, 1, mm)%x(  i,  j,  k, 3)&
-                             + flowDoms(nn, 1, mm)%x(  i,j-1,  k, 3) + flowDoms(nn, 1, mm)%x(  i,  j,k-1, 3))
-
-                sFaceI(i, j, k) = sFaceI(i, j, k) & 
-                                + dscalar(1, sps, mm)*x_fc*sI(i, j, k, 1) &
-                                + dscalar(1, sps, mm)*y_fc*sI(i, j, k, 2) &
-                                + dscalar(1, sps, mm)*z_fc*sI(i, j, k, 3)                
-
-             end do
-          end do
-       end do       
+        ! j
+        do k = 1, ke_l
+            do j = 0, je_l
+                do i = 1, ie_l
 
-       ! j
-       do k=1, ke_l
-          do j=0, je_l
-             do i=1, ie_l
-   
-                x_fc = fourth*(flowDoms(nn, 1, mm)%x(i-1,  j,k-1, 1) + flowDoms(nn, 1, mm)%x(  i,  j,  k, 1)&
-                             + flowDoms(nn, 1, mm)%x(i-1,  j,  k, 1) + flowDoms(nn, 1, mm)%x(  i,  j,k-1, 1))
-                y_fc = fourth*(flowDoms(nn, 1, mm)%x(i-1,  j,k-1, 2) + flowDoms(nn, 1, mm)%x(  i,  j,  k, 2)&
-                             + flowDoms(nn, 1, mm)%x(i-1,  j,  k, 2) + flowDoms(nn, 1, mm)%x(  i,  j,k-1, 2))
-                z_fc = fourth*(flowDoms(nn, 1, mm)%x(i-1,  j,k-1, 3) + flowDoms(nn, 1, mm)%x(  i,  j,  k, 3)&
-                             + flowDoms(nn, 1, mm)%x(i-1,  j,  k, 3) + flowDoms(nn, 1, mm)%x(  i,  j,k-1, 3))
-
-                sFaceJ(i, j, k) = sFaceJ(i, j, k)                          & 
-                                + dscalar(1, sps, mm)*x_fc*sJ(i, j, k, 1) &
-                                + dscalar(1, sps, mm)*y_fc*sJ(i, j, k, 2) &
-                                + dscalar(1, sps, mm)*z_fc*sJ(i, j, k, 3)              
-
-             end do
-          end do
-       end do
-
-       ! k
-       do k=0, ke_l
-          do j=1, je_l
-             do i=1, ie_l
-   
-                x_fc = fourth*(flowDoms(nn, 1, mm)%x(i-1,j-1,  k, 1) + flowDoms(nn, 1, mm)%x(  i,  j,  k, 1)&
-                             + flowDoms(nn, 1, mm)%x(  i,j-1,  k, 1) + flowDoms(nn, 1, mm)%x(i-1,  j,  k, 1))
-                y_fc = fourth*(flowDoms(nn, 1, mm)%x(i-1,j-1,  k, 2) + flowDoms(nn, 1, mm)%x(  i,  j,  k, 2)&
-                             + flowDoms(nn, 1, mm)%x(  i,j-1,  k, 2) + flowDoms(nn, 1, mm)%x(i-1,  j,  k, 2))
-                z_fc = fourth*(flowDoms(nn, 1, mm)%x(i-1,j-1,  k, 3) + flowDoms(nn, 1, mm)%x(  i,  j,  k, 3)&
-                             + flowDoms(nn, 1, mm)%x(  i,j-1,  k, 3) + flowDoms(nn, 1, mm)%x(i-1,  j,  k, 3))
-
-                sFaceK(i, j, k) = sFaceK(i, j, k)                          & 
-                                + dscalar(1, sps, mm)*x_fc*sK(i, j, k, 1) &
-                                + dscalar(1, sps, mm)*y_fc*sK(i, j, k, 2) &
-                                + dscalar(1, sps, mm)*z_fc*sK(i, j, k, 3)               
-
-             end do
-          end do
-       end do  
-     
-    end do
-
-  end subroutine gridVelocitiesFineLevel_TS_block
-#endif
-  subroutine gridVelocitiesFineLevel_block(useOldCoor, t, sps, nn)
-    !
-    !       gridVelocitiesFineLevel computes the grid velocities for
-    !       the cell centers and the normal grid velocities for the faces
-    !       of moving blocks for the currently finest grid, i.e.
-    !       groundLevel. The velocities are computed at time t for
-    !       spectral mode sps. If useOldCoor is .true. the velocities
-    !       are determined using the unsteady time integrator in
-    !       combination with the old coordinates; otherwise the analytic
-    !       form is used.
-    !
-    use blockPointers
-    use cgnsGrid
-    use flowVarRefState
-    use inputMotion
-    use inputUnsteady
-    use iteration
-    use inputPhysics
-    use inputTSStabDeriv
-    use monitor
-    use communication
-    use flowUtils, only :  derivativeRotMatrixRigid, getDirVector
-    use utils, only : setCoefTimeIntegrator,tsAlpha, tsBeta, tsMach, terminate, &
-         rotMatrixRigidBody, getDirAngle
-
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps, nn
-    logical,               intent(in) :: useOldCoor
+                    sFaceJ(i, j, k) = velxFreestream * sJ(i, j, k, 1) + velyFreestream * sJ(i, j, k, 2) &
+                                      + velzFreestream * sJ(i, j, k, 3)
 
-    real(kind=realType), dimension(*), intent(in) :: t
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: mm
-    integer(kind=intType) :: i, j, k, ii, iie, jje, kke
+                end do
+            end do
+        end do
 
-    real(kind=realType) :: oneOver4dt, oneOver8dt
-    real(kind=realType) :: velxGrid, velyGrid, velzGrid,ainf
-    real(kind=realType) :: velxGrid0, velyGrid0, velzGrid0
+        ! k
+        do k = 0, ke_l
+            do j = 1, je_l
+                do i = 1, ie_l
 
-    real(kind=realType), dimension(3) :: sc, xc, xxc
-    real(kind=realType), dimension(3) :: rotCenter, rotRate
+                    sFaceK(i, j, k) = velxFreestream * sK(i, j, k, 1) + velyFreestream * sK(i, j, k, 2) &
+                                      + velzFreestream * sK(i, j, k, 3)
 
-    real(kind=realType), dimension(3)   :: rotationPoint
-    real(kind=realType), dimension(3,3) :: rotationMatrix,&
-         derivRotationMatrix
+                end do
+            end do
+        end do
 
-    real(kind=realType) :: tNew, tOld
-    real(kind=realType), dimension(:,:), pointer :: sFace
+        !  dot(sI, v_meshmotion)
 
-    real(kind=realType), dimension(:,:,:),   pointer :: xx, ss
-    real(kind=realType), dimension(:,:,:,:), pointer :: xxOld
+        do mm = 1, nTimeIntervalsSpectral
 
-    real(kind=realType) :: intervalMach,alphaTS,alphaIncrement,&
-         betaTS,betaIncrement
-    real(kind=realType), dimension(3) ::velDir
-    real(kind=realType), dimension(3) :: refDirection
+            ie_l = flowDoms(nn, 1, mm)%ie
+            je_l = flowDoms(nn, 1, mm)%je
+            ke_l = flowDoms(nn, 1, mm)%ke
 
+            ! i
+            do k = 1, ke_l
+                do j = 1, je_l
+                    do i = 0, ie_l
 
-    ! Compute the mesh velocity from the given mesh Mach number.
+                        x_fc = fourth * (flowDoms(nn, 1, mm)%x(i, j - 1, k - 1, 1) + flowDoms(nn, 1, mm)%x(i, j, k, 1) &
+                                        + flowDoms(nn, 1, mm)%x(i, j - 1, k, 1) + flowDoms(nn, 1, mm)%x(i, j, k - 1, 1))
+                        y_fc = fourth * (flowDoms(nn, 1, mm)%x(i, j - 1, k - 1, 2) + flowDoms(nn, 1, mm)%x(i, j, k, 2) &
+                                        + flowDoms(nn, 1, mm)%x(i, j - 1, k, 2) + flowDoms(nn, 1, mm)%x(i, j, k - 1, 2))
+                        z_fc = fourth * (flowDoms(nn, 1, mm)%x(i, j - 1, k - 1, 3) + flowDoms(nn, 1, mm)%x(i, j, k, 3) &
+                                        + flowDoms(nn, 1, mm)%x(i, j - 1, k, 3) + flowDoms(nn, 1, mm)%x(i, j, k - 1, 3))
 
-    ! vel{x,y,z}Grid0 is the ACTUAL velocity you want at the
-    ! geometry.
-    aInf = sqrt(gammaInf*pInf/rhoInf)
-    velxGrid0 = (aInf*machgrid)*(-velDirFreestream(1))
-    velyGrid0 = (aInf*machgrid)*(-velDirFreestream(2))
-    velzGrid0 = (aInf*machgrid)*(-velDirFreestream(3))
+                        sFaceI(i, j, k) = sFaceI(i, j, k) &
+                                          + dscalar(1, sps, mm) * x_fc * sI(i, j, k, 1) &
+                                          + dscalar(1, sps, mm) * y_fc * sI(i, j, k, 2) &
+                                          + dscalar(1, sps, mm) * z_fc * sI(i, j, k, 3)
 
-    ! Compute the derivative of the rotation matrix and the rotation
-    ! point; needed for velocity due to the rigid body rotation of
-    ! the entire grid. It is assumed that the rigid body motion of
-    ! the grid is only specified if there is only 1 section present.
+                    end do
+                end do
+            end do
+
+            ! j
+            do k = 1, ke_l
+                do j = 0, je_l
+                    do i = 1, ie_l
+
+                        x_fc = fourth * (flowDoms(nn, 1, mm)%x(i - 1, j, k - 1, 1) + flowDoms(nn, 1, mm)%x(i, j, k, 1) &
+                                        + flowDoms(nn, 1, mm)%x(i - 1, j, k, 1) + flowDoms(nn, 1, mm)%x(i, j, k - 1, 1))
+                        y_fc = fourth * (flowDoms(nn, 1, mm)%x(i - 1, j, k - 1, 2) + flowDoms(nn, 1, mm)%x(i, j, k, 2) &
+                                        + flowDoms(nn, 1, mm)%x(i - 1, j, k, 2) + flowDoms(nn, 1, mm)%x(i, j, k - 1, 2))
+                        z_fc = fourth * (flowDoms(nn, 1, mm)%x(i - 1, j, k - 1, 3) + flowDoms(nn, 1, mm)%x(i, j, k, 3) &
+                                        + flowDoms(nn, 1, mm)%x(i - 1, j, k, 3) + flowDoms(nn, 1, mm)%x(i, j, k - 1, 3))
+
+                        sFaceJ(i, j, k) = sFaceJ(i, j, k) &
+                                          + dscalar(1, sps, mm) * x_fc * sJ(i, j, k, 1) &
+                                          + dscalar(1, sps, mm) * y_fc * sJ(i, j, k, 2) &
+                                          + dscalar(1, sps, mm) * z_fc * sJ(i, j, k, 3)
+
+                    end do
+                end do
+            end do
+
+            ! k
+            do k = 0, ke_l
+                do j = 1, je_l
+                    do i = 1, ie_l
+
+                        x_fc = fourth * (flowDoms(nn, 1, mm)%x(i - 1, j - 1, k, 1) + flowDoms(nn, 1, mm)%x(i, j, k, 1) &
+                                        + flowDoms(nn, 1, mm)%x(i, j - 1, k, 1) + flowDoms(nn, 1, mm)%x(i - 1, j, k, 1))
+                        y_fc = fourth * (flowDoms(nn, 1, mm)%x(i - 1, j - 1, k, 2) + flowDoms(nn, 1, mm)%x(i, j, k, 2) &
+                                        + flowDoms(nn, 1, mm)%x(i, j - 1, k, 2) + flowDoms(nn, 1, mm)%x(i - 1, j, k, 2))
+                        z_fc = fourth * (flowDoms(nn, 1, mm)%x(i - 1, j - 1, k, 3) + flowDoms(nn, 1, mm)%x(i, j, k, 3) &
+                                        + flowDoms(nn, 1, mm)%x(i, j - 1, k, 3) + flowDoms(nn, 1, mm)%x(i - 1, j, k, 3))
+
+                        sFaceK(i, j, k) = sFaceK(i, j, k) &
+                                          + dscalar(1, sps, mm) * x_fc * sK(i, j, k, 1) &
+                                          + dscalar(1, sps, mm) * y_fc * sK(i, j, k, 2) &
+                                          + dscalar(1, sps, mm) * z_fc * sK(i, j, k, 3)
+
+                    end do
+                end do
+            end do
 
-    call derivativeRotMatrixRigid(derivRotationMatrix, rotationPoint, t(1))
+        end do
 
-    !compute the rotation matrix to update the velocities for the time
-    !spectral stability derivative case...
+    end subroutine gridVelocitiesFineLevel_TS_block
+#endif
+    subroutine gridVelocitiesFineLevel_block(useOldCoor, t, sps, nn)
+        !
+        !       gridVelocitiesFineLevel computes the grid velocities for
+        !       the cell centers and the normal grid velocities for the faces
+        !       of moving blocks for the currently finest grid, i.e.
+        !       groundLevel. The velocities are computed at time t for
+        !       spectral mode sps. If useOldCoor is .true. the velocities
+        !       are determined using the unsteady time integrator in
+        !       combination with the old coordinates; otherwise the analytic
+        !       form is used.
+        !
+        use blockPointers
+        use cgnsGrid
+        use flowVarRefState
+        use inputMotion
+        use inputUnsteady
+        use iteration
+        use inputPhysics
+        use inputTSStabDeriv
+        use monitor
+        use communication
+        use flowUtils, only: derivativeRotMatrixRigid, getDirVector
+        use utils, only: setCoefTimeIntegrator, tsAlpha, tsBeta, tsMach, terminate, &
+                         rotMatrixRigidBody, getDirAngle
+
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps, nn
+        logical, intent(in) :: useOldCoor
+
+        real(kind=realType), dimension(*), intent(in) :: t
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: mm
+        integer(kind=intType) :: i, j, k, ii, iie, jje, kke
+
+        real(kind=realType) :: oneOver4dt, oneOver8dt
+        real(kind=realType) :: velxGrid, velyGrid, velzGrid, ainf
+        real(kind=realType) :: velxGrid0, velyGrid0, velzGrid0
+
+        real(kind=realType), dimension(3) :: sc, xc, xxc
+        real(kind=realType), dimension(3) :: rotCenter, rotRate
+
+        real(kind=realType), dimension(3) :: rotationPoint
+        real(kind=realType), dimension(3, 3) :: rotationMatrix, &
+                                                derivRotationMatrix
+
+        real(kind=realType) :: tNew, tOld
+        real(kind=realType), dimension(:, :), pointer :: sFace
+
+        real(kind=realType), dimension(:, :, :), pointer :: xx, ss
+        real(kind=realType), dimension(:, :, :, :), pointer :: xxOld
+
+        real(kind=realType) :: intervalMach, alphaTS, alphaIncrement, &
+                               betaTS, betaIncrement
+        real(kind=realType), dimension(3) :: velDir
+        real(kind=realType), dimension(3) :: refDirection
+
+        ! Compute the mesh velocity from the given mesh Mach number.
+
+        ! vel{x,y,z}Grid0 is the ACTUAL velocity you want at the
+        ! geometry.
+        aInf = sqrt(gammaInf * pInf / rhoInf)
+        velxGrid0 = (aInf * machgrid) * (-velDirFreestream(1))
+        velyGrid0 = (aInf * machgrid) * (-velDirFreestream(2))
+        velzGrid0 = (aInf * machgrid) * (-velDirFreestream(3))
+
+        ! Compute the derivative of the rotation matrix and the rotation
+        ! point; needed for velocity due to the rigid body rotation of
+        ! the entire grid. It is assumed that the rigid body motion of
+        ! the grid is only specified if there is only 1 section present.
+
+        call derivativeRotMatrixRigid(derivRotationMatrix, rotationPoint, t(1))
+
+        !compute the rotation matrix to update the velocities for the time
+        !spectral stability derivative case...
 
 #ifndef USE_TAPENADE
 ! We do not differentiate the time spectral stability stuff for now
-    if(TSStability)then
-       ! Determine the time values of the old and new time level.
-       ! It is assumed that the rigid body rotation of the mesh is only
-       ! used when only 1 section is present.
-
-       tNew = timeUnsteady + timeUnsteadyRestart
-       tOld = tNew - t(1)
-
-       if(TSpMode.or. TSqMode .or.TSrMode) then
-          ! Compute the rotation matrix of the rigid body rotation as
-          ! well as the rotation point; the latter may vary in time due
-          ! to rigid body translation.
-
-          call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
-
-          if(TSAlphaFollowing) then
-
-             velxgrid0 = rotationMatrix(1,1)*velxgrid0 &
-                  + rotationMatrix(1,2)*velygrid0 &
-                  + rotationMatrix(1,3)*velzgrid0
-             velygrid0 = rotationMatrix(2,1)*velxgrid0 &
-                  + rotationMatrix(2,2)*velygrid0 &
-                  + rotationMatrix(2,3)*velzgrid0
-             velzgrid0 = rotationMatrix(3,1)*velxgrid0 &
-                  + rotationMatrix(3,2)*velygrid0 &
-                  + rotationMatrix(3,3)*velzgrid0
-
-          end if
-
-       elseif(tsAlphaMode)then
-
-          !Determine the alpha for this time instance
-          alphaIncrement = TSAlpha(degreePolAlpha,   coefPolAlpha,       &
-               degreeFourAlpha,  omegaFourAlpha,     &
-               cosCoefFourAlpha, sinCoefFourAlpha, t(1))
-
-          alphaTS = alpha+alphaIncrement
-          !Determine the grid velocity for this alpha
-          refDirection(:) = zero
-          refDirection(1) = one
-          call getDirVector(refDirection, alphaTS, beta, velDir, liftIndex)
-
-          !do I need to update the lift direction and drag direction as well?
-          !set the effictive grid velocity for this time interval
-          velxGrid0 = (aInf*machgrid)*(-velDir(1))
-          velyGrid0 = (aInf*machgrid)*(-velDir(2))
-          velzGrid0 = (aInf*machgrid)*(-velDir(3))
-
-       elseif(tsBetaMode)then
-
-          !Determine the alpha for this time instance
-          betaIncrement = TSBeta(degreePolBeta,   coefPolBeta,       &
-               degreeFourBeta,  omegaFourBeta,     &
-               cosCoefFourBeta, sinCoefFourBeta, t(1))
-
-          betaTS = beta+betaIncrement
-          !Determine the grid velocity for this alpha
-          refDirection(:) = zero
-          refDirection(1) = one
-          call getDirVector(refDirection, alpha, betaTS, velDir, liftIndex)
-
-          !do I need to update the lift direction and drag direction as well?
-          !set the effictive grid velocity for this time interval
-          velxGrid0 = (aInf*machgrid)*(-velDir(1))
-          velyGrid0 = (aInf*machgrid)*(-velDir(2))
-          velzGrid0 = (aInf*machgrid)*(-velDir(3))
-       elseif(TSMachMode)then
-          !determine the mach number at this time interval
-          IntervalMach = TSMach(degreePolMach,   coefPolMach,       &
-               degreeFourMach,  omegaFourMach,     &
-               cosCoefFourMach, sinCoefFourMach, t(1))
-          !set the effective grid velocity
-          velxGrid0 = (aInf*(IntervalMach+machgrid))*(-velDirFreestream(1))
-          velyGrid0 = (aInf*(IntervalMach+machgrid))*(-velDirFreestream(2))
-          velzGrid0 = (aInf*(IntervalMach+machgrid))*(-velDirFreestream(3))
-
-       elseif(TSAltitudeMode)then
-          call terminate('gridVelocityFineLevel','altitude motion not yet implemented...')
-       else
-          call terminate('gridVelocityFineLevel','Not a recognized Stability Motion')
-       end if
-    endif
+        if (TSStability) then
+            ! Determine the time values of the old and new time level.
+            ! It is assumed that the rigid body rotation of the mesh is only
+            ! used when only 1 section is present.
+
+            tNew = timeUnsteady + timeUnsteadyRestart
+            tOld = tNew - t(1)
+
+            if (TSpMode .or. TSqMode .or. TSrMode) then
+                ! Compute the rotation matrix of the rigid body rotation as
+                ! well as the rotation point; the latter may vary in time due
+                ! to rigid body translation.
+
+                call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
+
+                if (TSAlphaFollowing) then
+
+                    velxgrid0 = rotationMatrix(1, 1) * velxgrid0 &
+                                + rotationMatrix(1, 2) * velygrid0 &
+                                + rotationMatrix(1, 3) * velzgrid0
+                    velygrid0 = rotationMatrix(2, 1) * velxgrid0 &
+                                + rotationMatrix(2, 2) * velygrid0 &
+                                + rotationMatrix(2, 3) * velzgrid0
+                    velzgrid0 = rotationMatrix(3, 1) * velxgrid0 &
+                                + rotationMatrix(3, 2) * velygrid0 &
+                                + rotationMatrix(3, 3) * velzgrid0
+
+                end if
+
+            elseif (tsAlphaMode) then
+
+                !Determine the alpha for this time instance
+                alphaIncrement = TSAlpha(degreePolAlpha, coefPolAlpha, &
+                                         degreeFourAlpha, omegaFourAlpha, &
+                                         cosCoefFourAlpha, sinCoefFourAlpha, t(1))
+
+                alphaTS = alpha + alphaIncrement
+                !Determine the grid velocity for this alpha
+                refDirection(:) = zero
+                refDirection(1) = one
+                call getDirVector(refDirection, alphaTS, beta, velDir, liftIndex)
+
+                !do I need to update the lift direction and drag direction as well?
+                !set the effictive grid velocity for this time interval
+                velxGrid0 = (aInf * machgrid) * (-velDir(1))
+                velyGrid0 = (aInf * machgrid) * (-velDir(2))
+                velzGrid0 = (aInf * machgrid) * (-velDir(3))
+
+            elseif (tsBetaMode) then
+
+                !Determine the alpha for this time instance
+                betaIncrement = TSBeta(degreePolBeta, coefPolBeta, &
+                                       degreeFourBeta, omegaFourBeta, &
+                                       cosCoefFourBeta, sinCoefFourBeta, t(1))
+
+                betaTS = beta + betaIncrement
+                !Determine the grid velocity for this alpha
+                refDirection(:) = zero
+                refDirection(1) = one
+                call getDirVector(refDirection, alpha, betaTS, velDir, liftIndex)
+
+                !do I need to update the lift direction and drag direction as well?
+                !set the effictive grid velocity for this time interval
+                velxGrid0 = (aInf * machgrid) * (-velDir(1))
+                velyGrid0 = (aInf * machgrid) * (-velDir(2))
+                velzGrid0 = (aInf * machgrid) * (-velDir(3))
+            elseif (TSMachMode) then
+                !determine the mach number at this time interval
+                IntervalMach = TSMach(degreePolMach, coefPolMach, &
+                                      degreeFourMach, omegaFourMach, &
+                                      cosCoefFourMach, sinCoefFourMach, t(1))
+                !set the effective grid velocity
+                velxGrid0 = (aInf * (IntervalMach + machgrid)) * (-velDirFreestream(1))
+                velyGrid0 = (aInf * (IntervalMach + machgrid)) * (-velDirFreestream(2))
+                velzGrid0 = (aInf * (IntervalMach + machgrid)) * (-velDirFreestream(3))
+
+            elseif (TSAltitudeMode) then
+                call terminate('gridVelocityFineLevel', 'altitude motion not yet implemented...')
+            else
+                call terminate('gridVelocityFineLevel', 'Not a recognized Stability Motion')
+            end if
+        end if
 #endif
 
-    testMoving: if( blockIsMoving ) then
-       ! Determine the situation we are having here.
+        testMoving: if (blockIsMoving) then
+            ! Determine the situation we are having here.
 
-       testUseOldCoor: if( useOldCoor ) then
+            testUseOldCoor: if (useOldCoor) then
 #ifndef USE_TAPENADE
 ! We do not consider the finite difference based velocity computation for now
-          !
-          !             The velocities must be determined via a finite
-          !             difference formula using the coordinates of the old
-          !             levels.
-          !
-          ! Set the coefficients for the time integrator and store
-          ! the inverse of the physical nonDimensional time step,
-          ! divided by 4 and 8, a bit easier.
-
-          call setCoefTimeIntegrator
-          oneOver4dt = fourth*timeRef/deltaT
-          oneOver8dt = half*oneOver4dt
-          !
-          !             Grid velocities of the cell centers, including the
-          !             1st level halo cells.
-          !
-          ! Loop over the cells, including the 1st level halo's.
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-
-                   ! The velocity of the cell center is determined
-                   ! by a finite difference formula. First store
-                   ! the current coordinate, multiplied by 8 and
-                   ! coefTime(0) in sc.
-
-                   sc(1) = (x(i-1,j-1,k-1,1) + x(i,j-1,k-1,1)  &
-                        +  x(i-1,j,  k-1,1) + x(i,j,  k-1,1)  &
-                        +  x(i-1,j-1,k,  1) + x(i,j-1,k,  1)  &
-                        +  x(i-1,j,  k,  1) + x(i,j,  k,  1)) &
-                        * coefTime(0)
-                   sc(2) = (x(i-1,j-1,k-1,2) + x(i,j-1,k-1,2)  &
-                        +  x(i-1,j,  k-1,2) + x(i,j,  k-1,2)  &
-                        +  x(i-1,j-1,k,  2) + x(i,j-1,k,  2)  &
-                        +  x(i-1,j,  k,  2) + x(i,j,  k,  2)) &
-                        * coefTime(0)
-                   sc(3) = (x(i-1,j-1,k-1,3) + x(i,j-1,k-1,3)  &
-                        +  x(i-1,j,  k-1,3) + x(i,j,  k-1,3)  &
-                        +  x(i-1,j-1,k,  3) + x(i,j-1,k,  3)  &
-                        +  x(i-1,j,  k,  3) + x(i,j,  k,  3)) &
-                        * coefTime(0)
-
-                   ! Loop over the older levels to complete the
-                   ! finite difference formula.
-
-                   do ii=1,nOldLevels
-                      sc(1) = sc(1) + (xOld(ii,i-1,j-1,k-1,1)  &
-                           +          xOld(ii,i,  j-1,k-1,1)  &
-                           +          xOld(ii,i-1,j,  k-1,1)  &
-                           +          xOld(ii,i,  j,  k-1,1)  &
-                           +          xOld(ii,i-1,j-1,k,  1)  &
-                           +          xOld(ii,i,  j-1,k,  1)  &
-                           +          xOld(ii,i-1,j,  k,  1)  &
-                           +          xOld(ii,i,  j,  k,  1)) &
-                           * coefTime(ii)
-                      sc(2) = sc(2) + (xOld(ii,i-1,j-1,k-1,2)  &
-                           +          xOld(ii,i,  j-1,k-1,2)  &
-                           +          xOld(ii,i-1,j,  k-1,2)  &
-                           +          xOld(ii,i,  j,  k-1,2)  &
-                           +          xOld(ii,i-1,j-1,k,  2)  &
-                           +          xOld(ii,i,  j-1,k,  2)  &
-                           +          xOld(ii,i-1,j,  k,  2)  &
-                           +          xOld(ii,i,  j,  k,  2)) &
-                           * coefTime(ii)
-                      sc(3) = sc(3) + (xOld(ii,i-1,j-1,k-1,3)  &
-                           +          xOld(ii,i,  j-1,k-1,3)  &
-                           +          xOld(ii,i-1,j,  k-1,3)  &
-                           +          xOld(ii,i,  j,  k-1,3)  &
-                           +          xOld(ii,i-1,j-1,k,  3)  &
-                           +          xOld(ii,i,  j-1,k,  3)  &
-                           +          xOld(ii,i-1,j,  k,  3)  &
-                           +          xOld(ii,i,  j,  k,  3)) &
-                           * coefTime(ii)
-                   enddo
-
-                   ! Divide by 8 delta t to obtain the correct
-                   ! velocities.
-
-                   s(i,j,k,1) = sc(1)*oneOver8dt
-                   s(i,j,k,2) = sc(2)*oneOver8dt
-                   s(i,j,k,3) = sc(3)*oneOver8dt
-                enddo
-             enddo
-          enddo
-          !
-          !             Normal grid velocities of the faces.
-          !
-          ! Loop over the three directions.
-
-          loopDir: do mm=1,3
-
-             ! Set the upper boundaries depending on the direction.
-
-             select case (mm)
-             case (1_intType)       ! normals in i-direction
-                iie = ie; jje = je; kke = ke
-
-             case (2_intType)       ! normals in j-direction
-                iie = je; jje = ie; kke = ke
-
-             case (3_intType)       ! normals in k-direction
-                iie = ke; jje = ie; kke = je
-             end select
-             !
-             !               Normal grid velocities in generalized i-direction.
-             !               Mm == 1: i-direction
-             !               mm == 2: j-direction
-             !               mm == 3: k-direction
-             !
-             do i=0,iie
-
-                ! Set the pointers for the coordinates, normals and
-                ! normal velocities for this generalized i-plane.
-                ! This depends on the value of mm.
-
-                select case (mm)
-                case (1_intType)       ! normals in i-direction
-                   xx =>  x(i,:,:,:);  xxOld => xOld(:,i,:,:,:)
-                   ss => si(i,:,:,:);  sFace => sFaceI(i,:,:)
-
-                case (2_intType)       ! normals in j-direction
-                   xx =>  x(:,i,:,:);  xxOld => xOld(:,:,i,:,:)
-                   ss => sj(:,i,:,:);  sFace => sFaceJ(:,i,:)
-
-                case (3_intType)       ! normals in k-direction
-                   xx =>  x(:,:,i,:);  xxOld => xOld(:,:,:,i,:)
-                   ss => sk(:,:,i,:);  sFace => sFaceK(:,:,i)
-                end select
-
-                ! Loop over the k and j-direction of this
-                ! generalized i-face. Note that due to the usage of
-                ! the pointers xx and xxOld an offset of +1 must be
-                ! used in the coordinate arrays, because x and xOld
-                ! originally start at 0 for the i, j and k indices.
-
-                do k=1,kke
-                   do j=1,jje
-
-                      ! The velocity of the face center is determined
-                      ! by a finite difference formula. First store
-                      ! the current coordinate, multiplied by 4 and
-                      ! coefTime(0) in sc.
-
-                      sc(1) = coefTime(0)*(xx(j+1,k+1,1) + xx(j,k+1,1) &
-                           +              xx(j+1,k,  1) + xx(j,k,  1))
-                      sc(2) = coefTime(0)*(xx(j+1,k+1,2) + xx(j,k+1,2) &
-                           +              xx(j+1,k,  2) + xx(j,k,  2))
-                      sc(3) = coefTime(0)*(xx(j+1,k+1,3) + xx(j,k+1,3) &
-                           +              xx(j+1,k,  3) + xx(j,k,  3))
-
-                      ! Loop over the older levels to complete the
-                      ! finite difference.
-
-                      do ii=1,nOldLevels
-
-                         sc(1) = sc(1) + coefTime(ii)         &
-                              *         (xxOld(ii,j+1,k+1,1) &
-                              +          xxOld(ii,j,  k+1,1) &
-                              +          xxOld(ii,j+1,k,  1) &
-                              +          xxOld(ii,j,  k,  1))
-                         sc(2) = sc(2) + coefTime(ii)         &
-                              *         (xxOld(ii,j+1,k+1,2) &
-                              +          xxOld(ii,j,  k+1,2) &
-                              +          xxOld(ii,j+1,k,  2) &
-                              +          xxOld(ii,j,  k,  2))
-                         sc(3) = sc(3) + coefTime(ii)         &
-                              *         (xxOld(ii,j+1,k+1,3) &
-                              +          xxOld(ii,j,  k+1,3) &
-                              +          xxOld(ii,j+1,k,  3) &
-                              +          xxOld(ii,j,  k,  3))
-                      enddo
-
-                      ! Determine the dot product of sc and the normal
-                      ! and divide by 4 deltaT to obtain the correct
-                      ! value of the normal velocity.
-
-                      sFace(j,k) = sc(1)*ss(j,k,1) + sc(2)*ss(j,k,2) &
-                           + sc(3)*ss(j,k,3)
-                      sFace(j,k) = sFace(j,k)*oneOver4dt
-
-                   enddo
-                enddo
-             enddo
-
-          enddo loopDir
-#endif
-       else testUseOldCoor
-          !
-          !             The velocities must be determined analytically.
-          !
-          ! Store the rotation center and determine the
-          ! nonDimensional rotation rate of this block. As the
-          ! reference length is 1 timeRef == 1/uRef and at the end
-          ! the nonDimensional velocity is computed.
-
-          j = nbkGlobal
-
-          rotCenter = cgnsDoms(j)%rotCenter
-          rotRate   = timeRef*cgnsDoms(j)%rotRate
-
-          velXgrid = velXGrid0
-          velYgrid = velYGrid0
-          velZgrid = velZGrid0
-          !
-          !             Grid velocities of the cell centers, including the
-          !             1st level halo cells.
-          !
-          ! Loop over the cells, including the 1st level halo's.
-
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
-
-                   ! Determine the coordinates of the cell center,
-                   ! which are stored in xc.
-
-                   xc(1) = eighth*(flowDoms(nn, groundLevel, sps)%x(i-1,j-1,k-1,1) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j-1,k-1,1) &
-                         + flowDoms(nn, groundLevel, sps)%x(i-1,j,  k-1,1) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j,  k-1,1) &
-                         + flowDoms(nn, groundLevel, sps)%x(i-1,j-1,k,  1) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j-1,k,  1) &
-                         + flowDoms(nn, groundLevel, sps)%x(i-1,j,  k,  1) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j,  k,  1))
-                   xc(2) = eighth*(flowDoms(nn, groundLevel, sps)%x(i-1,j-1,k-1,2) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j-1,k-1,2) &
-                         + flowDoms(nn, groundLevel, sps)%x(i-1,j,  k-1,2) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j,  k-1,2) &
-                         + flowDoms(nn, groundLevel, sps)%x(i-1,j-1,k,  2) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j-1,k,  2) &
-                         + flowDoms(nn, groundLevel, sps)%x(i-1,j,  k,  2) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j,  k,  2))
-                   xc(3) = eighth*(flowDoms(nn, groundLevel, sps)%x(i-1,j-1,k-1,3) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j-1,k-1,3) &
-                         + flowDoms(nn, groundLevel, sps)%x(i-1,j,  k-1,3) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j,  k-1,3) &
-                         + flowDoms(nn, groundLevel, sps)%x(i-1,j-1,k,  3) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j-1,k,  3) &
-                         + flowDoms(nn, groundLevel, sps)%x(i-1,j,  k,  3) &
-                         + flowDoms(nn, groundLevel, sps)%x(i,j,  k,  3))
-
-                   ! Determine the coordinates relative to the
-                   ! center of rotation.
-
-                   xxc(1) = xc(1) - rotCenter(1)
-                   xxc(2) = xc(2) - rotCenter(2)
-                   xxc(3) = xc(3) - rotCenter(3)
-
-                   ! Determine the rotation speed of the cell center,
-                   ! which is omega*r.
-
-                   sc(1) = rotRate(2)*xxc(3) - rotRate(3)*xxc(2)
-                   sc(2) = rotRate(3)*xxc(1) - rotRate(1)*xxc(3)
-                   sc(3) = rotRate(1)*xxc(2) - rotRate(2)*xxc(1)
-
-                   ! Determine the coordinates relative to the
-                   ! rigid body rotation point.
-
-                   xxc(1) = xc(1) - rotationPoint(1)
-                   xxc(2) = xc(2) - rotationPoint(2)
-                   xxc(3) = xc(3) - rotationPoint(3)
-
-                   ! Determine the total velocity of the cell center.
-                   ! This is a combination of rotation speed of this
-                   ! block and the entire rigid body rotation.
-
-                   s(i,j,k,1) = sc(1) + velxGrid           &
-                        + derivRotationMatrix(1,1)*xxc(1) &
-                        + derivRotationMatrix(1,2)*xxc(2) &
-                        + derivRotationMatrix(1,3)*xxc(3)
-                   s(i,j,k,2) = sc(2) + velyGrid           &
-                        + derivRotationMatrix(2,1)*xxc(1) &
-                        + derivRotationMatrix(2,2)*xxc(2) &
-                        + derivRotationMatrix(2,3)*xxc(3)
-                   s(i,j,k,3) = sc(3) + velzGrid           &
-                        + derivRotationMatrix(3,1)*xxc(1) &
-                        + derivRotationMatrix(3,2)*xxc(2) &
-                        + derivRotationMatrix(3,3)*xxc(3)
-                enddo
-             enddo
-          enddo
-          !
-          !             Normal grid velocities of the faces.
-          !
-          ! Loop over the three directions.
-
-          ! The original code is elegant but the Tapenade has a difficult time 
-          ! to understand it. Thus, we unfold it and make it easier for the 
-          ! Tapenade.
-
-          ! i-direction
-          do k=1,ke
-             do j=1,je
-                do i=0,ie
-             
-                   ! Determine the coordinates of the face center,
-                   ! which are stored in xc.
- 
-                   xc(1) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,j-1,k-1,1) + flowDoms(nn, groundLevel, sps)%x(i,j,k-1,1) &
-                         +         flowDoms(nn, groundLevel, sps)%x(i,j-1,k,  1) + flowDoms(nn, groundLevel, sps)%x(i,j,k,  1))
-                   xc(2) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,j-1,k-1,2) + flowDoms(nn, groundLevel, sps)%x(i,j,k-1,2) &
-                         +         flowDoms(nn, groundLevel, sps)%x(i,j-1,k,  2) + flowDoms(nn, groundLevel, sps)%x(i,j,k,  2))
-                   xc(3) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,j-1,k-1,3) + flowDoms(nn, groundLevel, sps)%x(i,j,k-1,3) &
-                         +         flowDoms(nn, groundLevel, sps)%x(i,j-1,k,  3) + flowDoms(nn, groundLevel, sps)%x(i,j,k,  3))
- 
-                   call cellFaceVelocities(xc, rotCenter, rotRate, velxGrid, velyGrid, velzGrid, derivRotationMatrix, sc)
- 
-                   ! Store the dot product of grid velocity sc and
-                   ! the normal ss in sFace.
- 
-                   sFaceI(i,j,k) = sc(1)*si(i,j,k,1) + sc(2)*si(i,j,k,2) &
-                      + sc(3)*si(i,j,k,3)
-                enddo
-             enddo
-          enddo
-
-          ! j-direction
-          do k=1,ke
-             do j=0,je
-                do i=1,ie
-              
-                   ! Determine the coordinates of the face center,
-                   ! which are stored in xc.
- 
-                   xc(1) = fourth*(flowDoms(nn, groundLevel, sps)%x(i-1,j,k,  1) + flowDoms(nn, groundLevel, sps)%x(i,j,k-1,1) &
-                         +         flowDoms(nn, groundLevel, sps)%x(i-1,j,k-1,1) + flowDoms(nn, groundLevel, sps)%x(i,j,k,  1))
-                   xc(2) = fourth*(flowDoms(nn, groundLevel, sps)%x(i-1,j,k,  2) + flowDoms(nn, groundLevel, sps)%x(i,j,k-1,2) &
-                         +         flowDoms(nn, groundLevel, sps)%x(i-1,j,k-1,2) + flowDoms(nn, groundLevel, sps)%x(i,j,k,  2))
-                   xc(3) = fourth*(flowDoms(nn, groundLevel, sps)%x(i-1,j,k,  3) + flowDoms(nn, groundLevel, sps)%x(i,j,k-1,3) &
-                         +         flowDoms(nn, groundLevel, sps)%x(i-1,j,k-1,3) + flowDoms(nn, groundLevel, sps)%x(i,j,k,  3))
- 
-                   call cellFaceVelocities(xc, rotCenter, rotRate, velxGrid, velyGrid, velzGrid, derivRotationMatrix, sc)
- 
-                   ! Store the dot product of grid velocity sc and
-                   ! the normal ss in sFace.
- 
-                   sFaceJ(i,j,k) = sc(1)*sj(i,j,k,1) + sc(2)*sj(i,j,k,2) &
-                      + sc(3)*sj(i,j,k,3)
-                enddo
-             enddo
-          enddo
-
-          ! k-direction
-          do k=0,ke
-             do j=1,je
-                do i=1,ie
-                
-                   ! Determine the coordinates of the face center,
-                   ! which are stored in xc.
- 
-                   xc(1) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  j-1,k,1) + flowDoms(nn, groundLevel, sps)%x(i-1,j,k,1) &
-                         +         flowDoms(nn, groundLevel, sps)%x(i-1,j-1,k,1) + flowDoms(nn, groundLevel, sps)%x(i,  j,k,1))
-                   xc(2) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  j-1,k,2) + flowDoms(nn, groundLevel, sps)%x(i-1,j,k,2) &
-                         +         flowDoms(nn, groundLevel, sps)%x(i-1,j-1,k,2) + flowDoms(nn, groundLevel, sps)%x(i,  j,k,2))
-                   xc(3) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  j-1,k,3) + flowDoms(nn, groundLevel, sps)%x(i-1,j,k,3) &
-                         +         flowDoms(nn, groundLevel, sps)%x(i-1,j-1,k,3) + flowDoms(nn, groundLevel, sps)%x(i,  j,k,3))
- 
-                  
-                   call cellFaceVelocities(xc, rotCenter, rotRate, velxGrid, velyGrid, velzGrid, derivRotationMatrix, sc)
- 
-                   ! Store the dot product of grid velocity sc and
-                   ! the normal ss in sFace.
- 
-                   sFaceK(i,j,k) = sc(1)*sk(i,j,k,1) + sc(2)*sk(i,j,k,2) &
-                      + sc(3)*sk(i,j,k,3)
-                enddo
-             enddo
-          enddo
-
-       endif testUseOldCoor
-    endif testMoving
-
-  end subroutine gridVelocitiesFineLevel_block
-
-  subroutine cellFaceVelocities(xc, rotCenter, rotRate, velxGrid, velyGrid, velzGrid, derivRotationMatrix, sc)
-   !
-   !  Returns the cell face velocities for a given face center
-   !
-   use constants
-
-   implicit none
-   !
-   !      Subroutine arguments.
-   !
-   real(kind=realType), dimension(3), intent(in) :: xc, rotCenter, rotRate
-   real(kind=realType), intent(in) :: velxGrid, velyGrid, velzGrid
-   real(kind=realType), dimension(3,3), intent(in) :: derivRotationMatrix
-   real(kind=realType), dimension(3), intent(out) :: sc
-   !
-   !      Local variables.
-   !
-   real(kind=realType), dimension(3) :: rotationPoint, xxc
-
-   ! Determine the coordinates relative to the
-   ! center of rotation.
- 
-   xxc(1) = xc(1) - rotCenter(1)
-   xxc(2) = xc(2) - rotCenter(2)
-   xxc(3) = xc(3) - rotCenter(3)
-
-   ! Determine the rotation speed of the face center,
-   ! which is omega*r.
-
-   sc(1) = rotRate(2)*xxc(3) - rotRate(3)*xxc(2)
-   sc(2) = rotRate(3)*xxc(1) - rotRate(1)*xxc(3)
-   sc(3) = rotRate(1)*xxc(2) - rotRate(2)*xxc(1)
-
-   ! Determine the coordinates relative to the
-   ! rigid body rotation point.
-
-   xxc(1) = xc(1) - rotationPoint(1)
-   xxc(2) = xc(2) - rotationPoint(2)
-   xxc(3) = xc(3) - rotationPoint(3)
-
-   ! Determine the total velocity of the cell face.
-   ! This is a combination of rotation speed of this
-   ! block and the entire rigid body rotation.
-
-   sc(1) = sc(1) + velxGrid           &
-      + derivRotationMatrix(1,1)*xxc(1) &
-      + derivRotationMatrix(1,2)*xxc(2) &
-      + derivRotationMatrix(1,3)*xxc(3)
-   sc(2) = sc(2) + velyGrid           &
-      + derivRotationMatrix(2,1)*xxc(1) &
-      + derivRotationMatrix(2,2)*xxc(2) &
-      + derivRotationMatrix(2,3)*xxc(3)
-   sc(3) = sc(3) + velzGrid           &
-      + derivRotationMatrix(3,1)*xxc(1) &
-      + derivRotationMatrix(3,2)*xxc(2) &
-      + derivRotationMatrix(3,3)*xxc(3)
-   
-  end subroutine cellFaceVelocities
-
-#ifndef USE_TAPENADE
-  subroutine slipVelocitiesFineLevel(useOldCoor, t, sps)
-    !
-    ! Shell function to call slipVelocitiesFineLevel on all blocks
-    !
-    use constants
-    use blockPointers, only : nDom
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use iteration, only : groundLevel
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps
-    logical,               intent(in) :: useOldCoor
-    real(kind=realType), dimension(*), intent(in) :: t  !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-
-    ! Loop over the number of blocks.
-
-    domains: do nn=1,nDom
-
-       ! Set the pointers for this block.
-
-       call setPointers(nn, groundLevel, sps)
-
-       call slipVelocitiesFineLevel_block(useOldCoor, t, sps, nn)
-
-    end do domains
+                !
+                !             The velocities must be determined via a finite
+                !             difference formula using the coordinates of the old
+                !             levels.
+                !
+                ! Set the coefficients for the time integrator and store
+                ! the inverse of the physical nonDimensional time step,
+                ! divided by 4 and 8, a bit easier.
 
-  end subroutine slipVelocitiesFineLevel
+                call setCoefTimeIntegrator
+                oneOver4dt = fourth * timeRef / deltaT
+                oneOver8dt = half * oneOver4dt
+                !
+                !             Grid velocities of the cell centers, including the
+                !             1st level halo cells.
+                !
+                ! Loop over the cells, including the 1st level halo's.
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+
+                            ! The velocity of the cell center is determined
+                            ! by a finite difference formula. First store
+                            ! the current coordinate, multiplied by 8 and
+                            ! coefTime(0) in sc.
+
+                            sc(1) = (x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
+                                     + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
+                                     + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
+                                     + x(i - 1, j, k, 1) + x(i, j, k, 1)) &
+                                    * coefTime(0)
+                            sc(2) = (x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
+                                     + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
+                                     + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
+                                     + x(i - 1, j, k, 2) + x(i, j, k, 2)) &
+                                    * coefTime(0)
+                            sc(3) = (x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
+                                     + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
+                                     + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
+                                     + x(i - 1, j, k, 3) + x(i, j, k, 3)) &
+                                    * coefTime(0)
+
+                            ! Loop over the older levels to complete the
+                            ! finite difference formula.
+
+                            do ii = 1, nOldLevels
+                                sc(1) = sc(1) + (xOld(ii, i - 1, j - 1, k - 1, 1) &
+                                                 + xOld(ii, i, j - 1, k - 1, 1) &
+                                                 + xOld(ii, i - 1, j, k - 1, 1) &
+                                                 + xOld(ii, i, j, k - 1, 1) &
+                                                 + xOld(ii, i - 1, j - 1, k, 1) &
+                                                 + xOld(ii, i, j - 1, k, 1) &
+                                                 + xOld(ii, i - 1, j, k, 1) &
+                                                 + xOld(ii, i, j, k, 1)) &
+                                        * coefTime(ii)
+                                sc(2) = sc(2) + (xOld(ii, i - 1, j - 1, k - 1, 2) &
+                                                 + xOld(ii, i, j - 1, k - 1, 2) &
+                                                 + xOld(ii, i - 1, j, k - 1, 2) &
+                                                 + xOld(ii, i, j, k - 1, 2) &
+                                                 + xOld(ii, i - 1, j - 1, k, 2) &
+                                                 + xOld(ii, i, j - 1, k, 2) &
+                                                 + xOld(ii, i - 1, j, k, 2) &
+                                                 + xOld(ii, i, j, k, 2)) &
+                                        * coefTime(ii)
+                                sc(3) = sc(3) + (xOld(ii, i - 1, j - 1, k - 1, 3) &
+                                                 + xOld(ii, i, j - 1, k - 1, 3) &
+                                                 + xOld(ii, i - 1, j, k - 1, 3) &
+                                                 + xOld(ii, i, j, k - 1, 3) &
+                                                 + xOld(ii, i - 1, j - 1, k, 3) &
+                                                 + xOld(ii, i, j - 1, k, 3) &
+                                                 + xOld(ii, i - 1, j, k, 3) &
+                                                 + xOld(ii, i, j, k, 3)) &
+                                        * coefTime(ii)
+                            end do
+
+                            ! Divide by 8 delta t to obtain the correct
+                            ! velocities.
+
+                            s(i, j, k, 1) = sc(1) * oneOver8dt
+                            s(i, j, k, 2) = sc(2) * oneOver8dt
+                            s(i, j, k, 3) = sc(3) * oneOver8dt
+                        end do
+                    end do
+                end do
+                !
+                !             Normal grid velocities of the faces.
+                !
+                ! Loop over the three directions.
+
+                loopDir: do mm = 1, 3
+
+                    ! Set the upper boundaries depending on the direction.
+
+                    select case (mm)
+                    case (1_intType)       ! normals in i-direction
+                        iie = ie; jje = je; kke = ke
+
+                    case (2_intType)       ! normals in j-direction
+                        iie = je; jje = ie; kke = ke
+
+                    case (3_intType)       ! normals in k-direction
+                        iie = ke; jje = ie; kke = je
+                    end select
+                    !
+                    !               Normal grid velocities in generalized i-direction.
+                    !               Mm == 1: i-direction
+                    !               mm == 2: j-direction
+                    !               mm == 3: k-direction
+                    !
+                    do i = 0, iie
+
+                        ! Set the pointers for the coordinates, normals and
+                        ! normal velocities for this generalized i-plane.
+                        ! This depends on the value of mm.
+
+                        select case (mm)
+                        case (1_intType)       ! normals in i-direction
+                            xx => x(i, :, :, :); xxOld => xOld(:, i, :, :, :)
+                            ss => si(i, :, :, :); sFace => sFaceI(i, :, :)
+
+                        case (2_intType)       ! normals in j-direction
+                            xx => x(:, i, :, :); xxOld => xOld(:, :, i, :, :)
+                            ss => sj(:, i, :, :); sFace => sFaceJ(:, i, :)
+
+                        case (3_intType)       ! normals in k-direction
+                            xx => x(:, :, i, :); xxOld => xOld(:, :, :, i, :)
+                            ss => sk(:, :, i, :); sFace => sFaceK(:, :, i)
+                        end select
+
+                        ! Loop over the k and j-direction of this
+                        ! generalized i-face. Note that due to the usage of
+                        ! the pointers xx and xxOld an offset of +1 must be
+                        ! used in the coordinate arrays, because x and xOld
+                        ! originally start at 0 for the i, j and k indices.
+
+                        do k = 1, kke
+                            do j = 1, jje
+
+                                ! The velocity of the face center is determined
+                                ! by a finite difference formula. First store
+                                ! the current coordinate, multiplied by 4 and
+                                ! coefTime(0) in sc.
+
+                                sc(1) = coefTime(0) * (xx(j + 1, k + 1, 1) + xx(j, k + 1, 1) &
+                                                       + xx(j + 1, k, 1) + xx(j, k, 1))
+                                sc(2) = coefTime(0) * (xx(j + 1, k + 1, 2) + xx(j, k + 1, 2) &
+                                                       + xx(j + 1, k, 2) + xx(j, k, 2))
+                                sc(3) = coefTime(0) * (xx(j + 1, k + 1, 3) + xx(j, k + 1, 3) &
+                                                       + xx(j + 1, k, 3) + xx(j, k, 3))
+
+                                ! Loop over the older levels to complete the
+                                ! finite difference.
+
+                                do ii = 1, nOldLevels
+
+                                    sc(1) = sc(1) + coefTime(ii) &
+                                            * (xxOld(ii, j + 1, k + 1, 1) &
+                                               + xxOld(ii, j, k + 1, 1) &
+                                               + xxOld(ii, j + 1, k, 1) &
+                                               + xxOld(ii, j, k, 1))
+                                    sc(2) = sc(2) + coefTime(ii) &
+                                            * (xxOld(ii, j + 1, k + 1, 2) &
+                                               + xxOld(ii, j, k + 1, 2) &
+                                               + xxOld(ii, j + 1, k, 2) &
+                                               + xxOld(ii, j, k, 2))
+                                    sc(3) = sc(3) + coefTime(ii) &
+                                            * (xxOld(ii, j + 1, k + 1, 3) &
+                                               + xxOld(ii, j, k + 1, 3) &
+                                               + xxOld(ii, j + 1, k, 3) &
+                                               + xxOld(ii, j, k, 3))
+                                end do
+
+                                ! Determine the dot product of sc and the normal
+                                ! and divide by 4 deltaT to obtain the correct
+                                ! value of the normal velocity.
+
+                                sFace(j, k) = sc(1) * ss(j, k, 1) + sc(2) * ss(j, k, 2) &
+                                              + sc(3) * ss(j, k, 3)
+                                sFace(j, k) = sFace(j, k) * oneOver4dt
+
+                            end do
+                        end do
+                    end do
+
+                end do loopDir
 #endif
+                else testUseOldCoor
+                !
+                !             The velocities must be determined analytically.
+                !
+                ! Store the rotation center and determine the
+                ! nonDimensional rotation rate of this block. As the
+                ! reference length is 1 timeRef == 1/uRef and at the end
+                ! the nonDimensional velocity is computed.
 
-  subroutine slipVelocitiesFineLevel_block(useOldCoor, t, sps, nn)
-    !
-    !       slipVelocitiesFineLevel computes the slip velocities for
-    !       viscous subfaces on all viscous boundaries on groundLevel for
-    !       the given spectral solution. If useOldCoor is .true. the
-    !       velocities are determined using the unsteady time integrator;
-    !       otherwise the analytic form is used.
-    !
-    use constants
-    use inputTimeSpectral
-    use blockPointers
-    use cgnsGrid
-    use flowVarRefState
-    use inputMotion
-    use inputUnsteady
-    use iteration
-    use inputPhysics
-    use inputTSStabDeriv
-    use monitor
-    use communication
-    use flowUtils, only :  derivativeRotMatrixRigid, getDirVector
-    use utils, only : tsAlpha, tsBeta, tsMach, terminate, rotMatrixRigidBody, &
-         setCoefTimeIntegrator, getDirAngle
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps, nn
-    logical,               intent(in) :: useOldCoor
+                j = nbkGlobal
 
-    real(kind=realType), dimension(*), intent(in) :: t
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: mm, i, j, level, ii
+                rotCenter = cgnsDoms(j)%rotCenter
+                rotRate = timeRef * cgnsDoms(j)%rotRate
 
-    real(kind=realType) :: oneOver4dt
-    real(kind=realType) :: velxGrid, velyGrid, velzGrid,ainf
-    real(kind=realType) :: velxGrid0, velyGrid0, velzGrid0
+                velXgrid = velXGrid0
+                velYgrid = velYGrid0
+                velZgrid = velZGrid0
+                !
+                !             Grid velocities of the cell centers, including the
+                !             1st level halo cells.
+                !
+                ! Loop over the cells, including the 1st level halo's.
+
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+
+                            ! Determine the coordinates of the cell center,
+                            ! which are stored in xc.
+
+                            xc(1) = eighth * (flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, k - 1, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, k - 1, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k - 1, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j, k - 1, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, k, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, k, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j, k, 1))
+                            xc(2) = eighth * (flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, k - 1, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, k - 1, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k - 1, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j, k - 1, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, k, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, k, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j, k, 2))
+                            xc(3) = eighth * (flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, k - 1, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, k - 1, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k - 1, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j, k - 1, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, k, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, k, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j, k, 3))
+
+                            ! Determine the coordinates relative to the
+                            ! center of rotation.
+
+                            xxc(1) = xc(1) - rotCenter(1)
+                            xxc(2) = xc(2) - rotCenter(2)
+                            xxc(3) = xc(3) - rotCenter(3)
+
+                            ! Determine the rotation speed of the cell center,
+                            ! which is omega*r.
+
+                            sc(1) = rotRate(2) * xxc(3) - rotRate(3) * xxc(2)
+                            sc(2) = rotRate(3) * xxc(1) - rotRate(1) * xxc(3)
+                            sc(3) = rotRate(1) * xxc(2) - rotRate(2) * xxc(1)
+
+                            ! Determine the coordinates relative to the
+                            ! rigid body rotation point.
+
+                            xxc(1) = xc(1) - rotationPoint(1)
+                            xxc(2) = xc(2) - rotationPoint(2)
+                            xxc(3) = xc(3) - rotationPoint(3)
+
+                            ! Determine the total velocity of the cell center.
+                            ! This is a combination of rotation speed of this
+                            ! block and the entire rigid body rotation.
+
+                            s(i, j, k, 1) = sc(1) + velxGrid &
+                                            + derivRotationMatrix(1, 1) * xxc(1) &
+                                            + derivRotationMatrix(1, 2) * xxc(2) &
+                                            + derivRotationMatrix(1, 3) * xxc(3)
+                            s(i, j, k, 2) = sc(2) + velyGrid &
+                                            + derivRotationMatrix(2, 1) * xxc(1) &
+                                            + derivRotationMatrix(2, 2) * xxc(2) &
+                                            + derivRotationMatrix(2, 3) * xxc(3)
+                            s(i, j, k, 3) = sc(3) + velzGrid &
+                                            + derivRotationMatrix(3, 1) * xxc(1) &
+                                            + derivRotationMatrix(3, 2) * xxc(2) &
+                                            + derivRotationMatrix(3, 3) * xxc(3)
+                        end do
+                    end do
+                end do
+                !
+                !             Normal grid velocities of the faces.
+                !
+                ! Loop over the three directions.
 
-    real(kind=realType), dimension(3) :: xc, xxc
-    real(kind=realType), dimension(3) :: rotCenter, rotRate
+                ! The original code is elegant but the Tapenade has a difficult time
+                ! to understand it. Thus, we unfold it and make it easier for the
+                ! Tapenade.
 
-    real(kind=realType), dimension(3)   :: rotationPoint
-    real(kind=realType), dimension(3,3) :: rotationMatrix,&
-         derivRotationMatrix
+                ! i-direction
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 0, ie
 
-    real(kind=realType) :: tNew, tOld
+                            ! Determine the coordinates of the face center,
+                            ! which are stored in xc.
 
-    real(kind=realType), dimension(:,:,:),   pointer :: uSlip
-    real(kind=realType), dimension(:,:,:),   pointer :: xFace
-    real(kind=realType), dimension(:,:,:,:), pointer :: xFaceOld
+           xc(1) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,j-1,k-1,1) + flowDoms(nn, groundLevel, sps)%x(i,j,k-1,1) &
+                      + flowDoms(nn, groundLevel, sps)%x(i, j - 1, k, 1) + flowDoms(nn, groundLevel, sps)%x(i, j, k, 1))
+           xc(2) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,j-1,k-1,2) + flowDoms(nn, groundLevel, sps)%x(i,j,k-1,2) &
+                      + flowDoms(nn, groundLevel, sps)%x(i, j - 1, k, 2) + flowDoms(nn, groundLevel, sps)%x(i, j, k, 2))
+           xc(3) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,j-1,k-1,3) + flowDoms(nn, groundLevel, sps)%x(i,j,k-1,3) &
+                      + flowDoms(nn, groundLevel, sps)%x(i, j - 1, k, 3) + flowDoms(nn, groundLevel, sps)%x(i, j, k, 3))
 
-    real(kind=realType) :: intervalMach,alphaTS,alphaIncrement,&
-         betaTS,betaIncrement
-    real(kind=realType), dimension(3) ::velDir
-    real(kind=realType), dimension(3) :: refDirection
+                  call cellFaceVelocities(xc, rotCenter, rotRate, velxGrid, velyGrid, velzGrid, derivRotationMatrix, sc)
 
-    ! Determine the situation we are having here.
+                            ! Store the dot product of grid velocity sc and
+                            ! the normal ss in sFace.
 
-    testUseOldCoor: if( useOldCoor ) then
+                            sFaceI(i, j, k) = sc(1) * si(i, j, k, 1) + sc(2) * si(i, j, k, 2) &
+                                              + sc(3) * si(i, j, k, 3)
+                        end do
+                    end do
+                end do
 
-#ifndef USE_TAPENADE
-! the time-stepping portion is not ADed.
+                ! j-direction
+                do k = 1, ke
+                    do j = 0, je
+                        do i = 1, ie
 
-       ! The velocities must be determined via a finite difference
-       ! formula using the coordinates of the old levels.
+                            ! Determine the coordinates of the face center,
+                            ! which are stored in xc.
 
-       ! Set the coefficients for the time integrator and store the
-       ! inverse of the physical nonDimensional time step, divided
-       ! by 4, a bit easier.
+ xc(1) = fourth * (flowDoms(nn, groundLevel, sps)%x(i - 1, j, k, 1) + flowDoms(nn, groundLevel, sps)%x(i, j, k - 1, 1) &
+                  + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k - 1, 1) + flowDoms(nn, groundLevel, sps)%x(i, j, k, 1))
+ xc(2) = fourth * (flowDoms(nn, groundLevel, sps)%x(i - 1, j, k, 2) + flowDoms(nn, groundLevel, sps)%x(i, j, k - 1, 2) &
+                  + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k - 1, 2) + flowDoms(nn, groundLevel, sps)%x(i, j, k, 2))
+ xc(3) = fourth * (flowDoms(nn, groundLevel, sps)%x(i - 1, j, k, 3) + flowDoms(nn, groundLevel, sps)%x(i, j, k - 1, 3) &
+                  + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k - 1, 3) + flowDoms(nn, groundLevel, sps)%x(i, j, k, 3))
 
-       call setCoefTimeIntegrator
-       oneOver4dt = fourth*timeRef/deltaT
+                  call cellFaceVelocities(xc, rotCenter, rotRate, velxGrid, velyGrid, velzGrid, derivRotationMatrix, sc)
 
-       ! Loop over the number of viscous subfaces.
+                            ! Store the dot product of grid velocity sc and
+                            ! the normal ss in sFace.
 
-       bocoLoop1: do mm=1,nViscBocos
+                            sFaceJ(i, j, k) = sc(1) * sj(i, j, k, 1) + sc(2) * sj(i, j, k, 2) &
+                                              + sc(3) * sj(i, j, k, 3)
+                        end do
+                    end do
+                end do
 
-          ! Set the pointer for uSlip to make the code more
-          ! readable.
+                ! k-direction
+                do k = 0, ke
+                    do j = 1, je
+                        do i = 1, ie
 
-          uSlip => BCData(mm)%uSlip
+                            ! Determine the coordinates of the face center,
+                            ! which are stored in xc.
 
-          ! Determine the grid face on which the subface is located
-          ! and set some variables accordingly.
+ xc(1) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, j - 1, k, 1) + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k, 1) &
+                  + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, k, 1) + flowDoms(nn, groundLevel, sps)%x(i, j, k, 1))
+ xc(2) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, j - 1, k, 2) + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k, 2) &
+                  + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, k, 2) + flowDoms(nn, groundLevel, sps)%x(i, j, k, 2))
+ xc(3) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, j - 1, k, 3) + flowDoms(nn, groundLevel, sps)%x(i - 1, j, k, 3) &
+                  + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, k, 3) + flowDoms(nn, groundLevel, sps)%x(i, j, k, 3))
 
-          select case (BCFaceID(mm))
+                  call cellFaceVelocities(xc, rotCenter, rotRate, velxGrid, velyGrid, velzGrid, derivRotationMatrix, sc)
 
-          case (iMin)
-             xFace => x(1,:,:,:);  xFaceOld => xOld(:,1,:,:,:)
+                            ! Store the dot product of grid velocity sc and
+                            ! the normal ss in sFace.
 
-          case (iMax)
-             xFace => x(il,:,:,:); xFaceOld => xOld(:,il,:,:,:)
+                            sFaceK(i, j, k) = sc(1) * sk(i, j, k, 1) + sc(2) * sk(i, j, k, 2) &
+                                              + sc(3) * sk(i, j, k, 3)
+                        end do
+                    end do
+                end do
 
-          case (jMin)
-             xFace => x(:,1,:,:);  xFaceOld => xOld(:,:,1,:,:)
+            end if testUseOldCoor
+        end if testMoving
+
+    end subroutine gridVelocitiesFineLevel_block
+
+    subroutine cellFaceVelocities(xc, rotCenter, rotRate, velxGrid, velyGrid, velzGrid, derivRotationMatrix, sc)
+        !
+        !  Returns the cell face velocities for a given face center
+        !
+        use constants
+
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), dimension(3), intent(in) :: xc, rotCenter, rotRate
+        real(kind=realType), intent(in) :: velxGrid, velyGrid, velzGrid
+        real(kind=realType), dimension(3, 3), intent(in) :: derivRotationMatrix
+        real(kind=realType), dimension(3), intent(out) :: sc
+        !
+        !      Local variables.
+        !
+        real(kind=realType), dimension(3) :: rotationPoint, xxc
+
+        ! Determine the coordinates relative to the
+        ! center of rotation.
+
+        xxc(1) = xc(1) - rotCenter(1)
+        xxc(2) = xc(2) - rotCenter(2)
+        xxc(3) = xc(3) - rotCenter(3)
+
+        ! Determine the rotation speed of the face center,
+        ! which is omega*r.
+
+        sc(1) = rotRate(2) * xxc(3) - rotRate(3) * xxc(2)
+        sc(2) = rotRate(3) * xxc(1) - rotRate(1) * xxc(3)
+        sc(3) = rotRate(1) * xxc(2) - rotRate(2) * xxc(1)
+
+        ! Determine the coordinates relative to the
+        ! rigid body rotation point.
+
+        xxc(1) = xc(1) - rotationPoint(1)
+        xxc(2) = xc(2) - rotationPoint(2)
+        xxc(3) = xc(3) - rotationPoint(3)
+
+        ! Determine the total velocity of the cell face.
+        ! This is a combination of rotation speed of this
+        ! block and the entire rigid body rotation.
+
+        sc(1) = sc(1) + velxGrid &
+                + derivRotationMatrix(1, 1) * xxc(1) &
+                + derivRotationMatrix(1, 2) * xxc(2) &
+                + derivRotationMatrix(1, 3) * xxc(3)
+        sc(2) = sc(2) + velyGrid &
+                + derivRotationMatrix(2, 1) * xxc(1) &
+                + derivRotationMatrix(2, 2) * xxc(2) &
+                + derivRotationMatrix(2, 3) * xxc(3)
+        sc(3) = sc(3) + velzGrid &
+                + derivRotationMatrix(3, 1) * xxc(1) &
+                + derivRotationMatrix(3, 2) * xxc(2) &
+                + derivRotationMatrix(3, 3) * xxc(3)
+
+    end subroutine cellFaceVelocities
 
-          case (jMax)
-             xFace => x(:,jl,:,:); xFaceOld => xOld(:,:,jl,:,:)
+#ifndef USE_TAPENADE
+    subroutine slipVelocitiesFineLevel(useOldCoor, t, sps)
+        !
+        ! Shell function to call slipVelocitiesFineLevel on all blocks
+        !
+        use constants
+        use blockPointers, only: nDom
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: groundLevel
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        logical, intent(in) :: useOldCoor
+        real(kind=realType), dimension(*), intent(in) :: t  !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        ! Loop over the number of blocks.
+
+        domains: do nn = 1, nDom
+
+            ! Set the pointers for this block.
+
+            call setPointers(nn, groundLevel, sps)
+
+            call slipVelocitiesFineLevel_block(useOldCoor, t, sps, nn)
+
+        end do domains
+
+    end subroutine slipVelocitiesFineLevel
+#endif
 
-          case (kMin)
-             xFace => x(:,:,1,:);  xFaceOld => xOld(:,:,:,1,:)
+    subroutine slipVelocitiesFineLevel_block(useOldCoor, t, sps, nn)
+        !
+        !       slipVelocitiesFineLevel computes the slip velocities for
+        !       viscous subfaces on all viscous boundaries on groundLevel for
+        !       the given spectral solution. If useOldCoor is .true. the
+        !       velocities are determined using the unsteady time integrator;
+        !       otherwise the analytic form is used.
+        !
+        use constants
+        use inputTimeSpectral
+        use blockPointers
+        use cgnsGrid
+        use flowVarRefState
+        use inputMotion
+        use inputUnsteady
+        use iteration
+        use inputPhysics
+        use inputTSStabDeriv
+        use monitor
+        use communication
+        use flowUtils, only: derivativeRotMatrixRigid, getDirVector
+        use utils, only: tsAlpha, tsBeta, tsMach, terminate, rotMatrixRigidBody, &
+                         setCoefTimeIntegrator, getDirAngle
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps, nn
+        logical, intent(in) :: useOldCoor
+
+        real(kind=realType), dimension(*), intent(in) :: t
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: mm, i, j, level, ii
+
+        real(kind=realType) :: oneOver4dt
+        real(kind=realType) :: velxGrid, velyGrid, velzGrid, ainf
+        real(kind=realType) :: velxGrid0, velyGrid0, velzGrid0
+
+        real(kind=realType), dimension(3) :: xc, xxc
+        real(kind=realType), dimension(3) :: rotCenter, rotRate
+
+        real(kind=realType), dimension(3) :: rotationPoint
+        real(kind=realType), dimension(3, 3) :: rotationMatrix, &
+                                                derivRotationMatrix
+
+        real(kind=realType) :: tNew, tOld
+
+        real(kind=realType), dimension(:, :, :), pointer :: uSlip
+        real(kind=realType), dimension(:, :, :), pointer :: xFace
+        real(kind=realType), dimension(:, :, :, :), pointer :: xFaceOld
+
+        real(kind=realType) :: intervalMach, alphaTS, alphaIncrement, &
+                               betaTS, betaIncrement
+        real(kind=realType), dimension(3) :: velDir
+        real(kind=realType), dimension(3) :: refDirection
+
+        ! Determine the situation we are having here.
+
+        testUseOldCoor: if (useOldCoor) then
 
-          case (kMax)
-             xFace => x(:,:,kl,:); xFaceOld => xOld(:,:,:,kl,:)
+#ifndef USE_TAPENADE
+! the time-stepping portion is not ADed.
 
-          end select
+            ! The velocities must be determined via a finite difference
+            ! formula using the coordinates of the old levels.
 
-          ! Some boundary faces have a different rotation speed than
-          ! the corresponding block. This happens e.g. in the tip gap
-          ! region of turboMachinary problems where the casing does
-          ! not rotate. As the coordinate difference corresponds to
-          ! the rotation rate of the block, a correction must be
-          ! computed. Therefore compute the difference in rotation
-          ! rate and store the rotation center a bit easier. Note that
-          ! the rotation center of subface is taken, because if there
-          ! is a difference in rotation rate this info for the subface
-          ! must always be specified.
+            ! Set the coefficients for the time integrator and store the
+            ! inverse of the physical nonDimensional time step, divided
+            ! by 4, a bit easier.
 
-          j = nbkGlobal
-          i = cgnsSubface(mm)
+            call setCoefTimeIntegrator
+            oneOver4dt = fourth * timeRef / deltaT
 
-          rotCenter = cgnsDoms(j)%bocoInfo(i)%rotCenter
-          rotRate   = timeRef*(cgnsDoms(j)%bocoInfo(i)%rotRate &
-               -          cgnsDoms(j)%rotRate)
+            ! Loop over the number of viscous subfaces.
 
-          ! Loop over the quadrilateral faces of the viscous subface.
-          ! Note that due to the usage of the pointers xFace and
-          ! xFaceOld an offset of +1 must be used in the coordinate
-          ! arrays, because x and xOld originally start at 0 for the
-          ! i, j and k indices.
+            bocoLoop1: do mm = 1, nViscBocos
 
-          do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-             do i=BCData(mm)%icBeg, BCData(mm)%icEnd
+                ! Set the pointer for uSlip to make the code more
+                ! readable.
 
-                ! Determine the coordinates of the centroid of the
-                ! face, multiplied by 4.
+                uSlip => BCData(mm)%uSlip
 
-                xc(1) = xFace(i+1,j+1,1) + xFace(i+1,j,1) &
-                     + xFace(i,  j+1,1) + xFace(i,  j,1)
-                xc(2) = xFace(i+1,j+1,2) + xFace(i+1,j,2) &
-                     + xFace(i,  j+1,2) + xFace(i,  j,2)
-                xc(3) = xFace(i+1,j+1,3) + xFace(i+1,j,3) &
-                     + xFace(i,  j+1,3) + xFace(i,  j,3)
+                ! Determine the grid face on which the subface is located
+                ! and set some variables accordingly.
 
-                ! Multiply the sum of the 4 vertex coordinates with
-                ! coefTime(0) to obtain the contribution for the
-                ! current time level. The division by 4*deltaT will
-                ! take place later. This is both more efficient and
-                ! more accurate for extremely small time steps.
+                select case (BCFaceID(mm))
 
-                uSlip(i,j,1) = coefTime(0)*xc(1)
-                uSlip(i,j,2) = coefTime(0)*xc(2)
-                uSlip(i,j,3) = coefTime(0)*xc(3)
+                case (iMin)
+                    xFace => x(1, :, :, :); xFaceOld => xOld(:, 1, :, :, :)
 
-                ! Loop over the older time levels and take their
-                ! contribution into account.
+                case (iMax)
+                    xFace => x(il, :, :, :); xFaceOld => xOld(:, il, :, :, :)
 
-                do level=1,nOldLevels
+                case (jMin)
+                    xFace => x(:, 1, :, :); xFaceOld => xOld(:, :, 1, :, :)
 
-                   uSlip(i,j,1) = uSlip(i,j,1) + coefTime(level)      &
-                        *          (xFaceOld(level,i+1,j+1,1) &
-                        +           xFaceOld(level,i+1,j,  1) &
-                        +           xFaceOld(level,i,  j+1,1) &
-                        +           xFaceOld(level,i,  j,  1))
+                case (jMax)
+                    xFace => x(:, jl, :, :); xFaceOld => xOld(:, :, jl, :, :)
 
-                   uSlip(i,j,2) = uSlip(i,j,2) + coefTime(level)      &
-                        *          (xFaceOld(level,i+1,j+1,2) &
-                        +           xFaceOld(level,i+1,j,  2) &
-                        +           xFaceOld(level,i,  j+1,2) &
-                        +           xFaceOld(level,i,  j,  2))
+                case (kMin)
+                    xFace => x(:, :, 1, :); xFaceOld => xOld(:, :, :, 1, :)
 
-                   uSlip(i,j,3) = uSlip(i,j,3) + coefTime(level)      &
-                        *          (xFaceOld(level,i+1,j+1,3) &
-                        +           xFaceOld(level,i+1,j,  3) &
-                        +           xFaceOld(level,i,  j+1,3) &
-                        +           xFaceOld(level,i,  j,  3))
-                enddo
+                case (kMax)
+                    xFace => x(:, :, kl, :); xFaceOld => xOld(:, :, :, kl, :)
 
-                ! Divide by 4 times the time step to obtain the
-                ! correct velocity.
+                end select
 
-                uSlip(i,j,1) = uSlip(i,j,1)*oneOver4dt
-                uSlip(i,j,2) = uSlip(i,j,2)*oneOver4dt
-                uSlip(i,j,3) = uSlip(i,j,3)*oneOver4dt
+                ! Some boundary faces have a different rotation speed than
+                ! the corresponding block. This happens e.g. in the tip gap
+                ! region of turboMachinary problems where the casing does
+                ! not rotate. As the coordinate difference corresponds to
+                ! the rotation rate of the block, a correction must be
+                ! computed. Therefore compute the difference in rotation
+                ! rate and store the rotation center a bit easier. Note that
+                ! the rotation center of subface is taken, because if there
+                ! is a difference in rotation rate this info for the subface
+                ! must always be specified.
+
+                j = nbkGlobal
+                i = cgnsSubface(mm)
+
+                rotCenter = cgnsDoms(j)%bocoInfo(i)%rotCenter
+                rotRate = timeRef * (cgnsDoms(j)%bocoInfo(i)%rotRate &
+                                     - cgnsDoms(j)%rotRate)
+
+                ! Loop over the quadrilateral faces of the viscous subface.
+                ! Note that due to the usage of the pointers xFace and
+                ! xFaceOld an offset of +1 must be used in the coordinate
+                ! arrays, because x and xOld originally start at 0 for the
+                ! i, j and k indices.
+
+                do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                    do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+
+                        ! Determine the coordinates of the centroid of the
+                        ! face, multiplied by 4.
+
+                        xc(1) = xFace(i + 1, j + 1, 1) + xFace(i + 1, j, 1) &
+                                + xFace(i, j + 1, 1) + xFace(i, j, 1)
+                        xc(2) = xFace(i + 1, j + 1, 2) + xFace(i + 1, j, 2) &
+                                + xFace(i, j + 1, 2) + xFace(i, j, 2)
+                        xc(3) = xFace(i + 1, j + 1, 3) + xFace(i + 1, j, 3) &
+                                + xFace(i, j + 1, 3) + xFace(i, j, 3)
+
+                        ! Multiply the sum of the 4 vertex coordinates with
+                        ! coefTime(0) to obtain the contribution for the
+                        ! current time level. The division by 4*deltaT will
+                        ! take place later. This is both more efficient and
+                        ! more accurate for extremely small time steps.
+
+                        uSlip(i, j, 1) = coefTime(0) * xc(1)
+                        uSlip(i, j, 2) = coefTime(0) * xc(2)
+                        uSlip(i, j, 3) = coefTime(0) * xc(3)
+
+                        ! Loop over the older time levels and take their
+                        ! contribution into account.
+
+                        do level = 1, nOldLevels
+
+                            uSlip(i, j, 1) = uSlip(i, j, 1) + coefTime(level) &
+                                             * (xFaceOld(level, i + 1, j + 1, 1) &
+                                                + xFaceOld(level, i + 1, j, 1) &
+                                                + xFaceOld(level, i, j + 1, 1) &
+                                                + xFaceOld(level, i, j, 1))
+
+                            uSlip(i, j, 2) = uSlip(i, j, 2) + coefTime(level) &
+                                             * (xFaceOld(level, i + 1, j + 1, 2) &
+                                                + xFaceOld(level, i + 1, j, 2) &
+                                                + xFaceOld(level, i, j + 1, 2) &
+                                                + xFaceOld(level, i, j, 2))
+
+                            uSlip(i, j, 3) = uSlip(i, j, 3) + coefTime(level) &
+                                             * (xFaceOld(level, i + 1, j + 1, 3) &
+                                                + xFaceOld(level, i + 1, j, 3) &
+                                                + xFaceOld(level, i, j + 1, 3) &
+                                                + xFaceOld(level, i, j, 3))
+                        end do
+
+                        ! Divide by 4 times the time step to obtain the
+                        ! correct velocity.
+
+                        uSlip(i, j, 1) = uSlip(i, j, 1) * oneOver4dt
+                        uSlip(i, j, 2) = uSlip(i, j, 2) * oneOver4dt
+                        uSlip(i, j, 3) = uSlip(i, j, 3) * oneOver4dt
+
+                        ! Determine the correction due to the difference
+                        ! in rotation rate between the block and subface.
+
+                        ! First determine the coordinates relative to the
+                        ! rotation center. Remember that 4 times this value
+                        ! is currently stored in xc.
+
+                        xc(1) = fourth * xc(1) - rotCenter(1)
+                        xc(2) = fourth * xc(2) - rotCenter(2)
+                        xc(3) = fourth * xc(3) - rotCenter(3)
+
+                        ! Compute the velocity, which is the cross product
+                        ! of rotRate and xc and add it to uSlip.
+
+                        uSlip(i, j, 1) = uSlip(i, j, 1) &
+                                         + rotRate(2) * xc(3) - rotRate(3) * xc(2)
+                        uSlip(i, j, 2) = uSlip(i, j, 2) &
+                                         + rotRate(3) * xc(1) - rotRate(1) * xc(3)
+                        uSlip(i, j, 3) = uSlip(i, j, 3) &
+                                         + rotRate(1) * xc(2) - rotRate(2) * xc(1)
+
+                    end do
+                end do
 
-                ! Determine the correction due to the difference
-                ! in rotation rate between the block and subface.
-
-                ! First determine the coordinates relative to the
-                ! rotation center. Remember that 4 times this value
-                ! is currently stored in xc.
-
-                xc(1) = fourth*xc(1) - rotCenter(1)
-                xc(2) = fourth*xc(2) - rotCenter(2)
-                xc(3) = fourth*xc(3) - rotCenter(3)
-
-                ! Compute the velocity, which is the cross product
-                ! of rotRate and xc and add it to uSlip.
-
-                uSlip(i,j,1) = uSlip(i,j,1) &
-                     + rotRate(2)*xc(3) - rotRate(3)*xc(2)
-                uSlip(i,j,2) = uSlip(i,j,2) &
-                     + rotRate(3)*xc(1) - rotRate(1)*xc(3)
-                uSlip(i,j,3) = uSlip(i,j,3) &
-                     + rotRate(1)*xc(2) - rotRate(2)*xc(1)
-
-             enddo
-          enddo
-
-       enddo bocoLoop1
+            end do bocoLoop1
 #else
-       continue
+            continue
 #endif
-    else
+        else
 
-       ! The velocities must be determined analytically.
+            ! The velocities must be determined analytically.
 
-       ! Compute the mesh velocity from the given mesh Mach number.
+            ! Compute the mesh velocity from the given mesh Mach number.
 
-       !  aInf = sqrt(gammaInf*pInf/rhoInf)
-       !  velxGrid = aInf*MachGrid(1)
-       !  velyGrid = aInf*MachGrid(2)
-       !  velzGrid = aInf*MachGrid(3)
+            !  aInf = sqrt(gammaInf*pInf/rhoInf)
+            !  velxGrid = aInf*MachGrid(1)
+            !  velyGrid = aInf*MachGrid(2)
+            !  velzGrid = aInf*MachGrid(3)
 
-       aInf = sqrt(gammaInf*pInf/rhoInf)
-       velxGrid0 = (aInf*machgrid)*(-velDirFreestream(1))
-       velyGrid0 = (aInf*machgrid)*(-velDirFreestream(2))
-       velzGrid0 = (aInf*machgrid)*(-velDirFreestream(3))
+            aInf = sqrt(gammaInf * pInf / rhoInf)
+            velxGrid0 = (aInf * machgrid) * (-velDirFreestream(1))
+            velyGrid0 = (aInf * machgrid) * (-velDirFreestream(2))
+            velzGrid0 = (aInf * machgrid) * (-velDirFreestream(3))
 
-       ! Compute the derivative of the rotation matrix and the rotation
-       ! point; needed for velocity due to the rigid body rotation of
-       ! the entire grid. It is assumed that the rigid body motion of
-       ! the grid is only specified if there is only 1 section present.
+            ! Compute the derivative of the rotation matrix and the rotation
+            ! point; needed for velocity due to the rigid body rotation of
+            ! the entire grid. It is assumed that the rigid body motion of
+            ! the grid is only specified if there is only 1 section present.
 
-       call derivativeRotMatrixRigid(derivRotationMatrix, rotationPoint, &
-            t(1))
+            call derivativeRotMatrixRigid(derivRotationMatrix, rotationPoint, &
+                                          t(1))
 
-       !compute the rotation matrix to update the velocities for the time
-       !spectral stability derivative case...
+            !compute the rotation matrix to update the velocities for the time
+            !spectral stability derivative case...
 
 #ifndef USE_TAPENADE
 ! the stability portion is not ADed.
 
-       if(TSStability)then
-          ! Determine the time values of the old and new time level.
-          ! It is assumed that the rigid body rotation of the mesh is only
-          ! used when only 1 section is present.
-
-          tNew = timeUnsteady + timeUnsteadyRestart
-          tOld = tNew - t(1)
-
-          if(TSpMode.or. TSqMode .or.TSrMode) then
-             ! Compute the rotation matrix of the rigid body rotation as
-             ! well as the rotation point; the latter may vary in time due
-             ! to rigid body translation.
-
-             call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
-
-             if(TSAlphaFollowing) then
-
-                velxgrid0 = rotationMatrix(1,1)*velxgrid0 &
-                     + rotationMatrix(1,2)*velygrid0 &
-                     + rotationMatrix(1,3)*velzgrid0
-                velygrid0 = rotationMatrix(2,1)*velxgrid0 &
-                     + rotationMatrix(2,2)*velygrid0 &
-                     + rotationMatrix(2,3)*velzgrid0
-                velzgrid0 = rotationMatrix(3,1)*velxgrid0 &
-                     + rotationMatrix(3,2)*velygrid0 &
-                     + rotationMatrix(3,3)*velzgrid0
-
-             endif
-          elseif(tsAlphaMode)then
-             !Determine the alpha for this time instance
-             alphaIncrement = TSAlpha(degreePolAlpha,   coefPolAlpha,       &
-                  degreeFourAlpha,  omegaFourAlpha,     &
-                  cosCoefFourAlpha, sinCoefFourAlpha, t(1))
-
-             alphaTS = alpha+alphaIncrement
-             !Determine the grid velocity for this alpha
-             refDirection(:) = zero
-             refDirection(1) = one
-             call getDirVector(refDirection, alphaTS, beta, velDir, liftIndex)
-
-             !do I need to update the lift direction and drag direction as well?
-             !set the effictive grid velocity for this time interval
-             velxGrid0 = (aInf*machgrid)*(-velDir(1))
-             velyGrid0 = (aInf*machgrid)*(-velDir(2))
-             velzGrid0 = (aInf*machgrid)*(-velDir(3))
-
-          elseif(tsBetaMode)then
-
-             !Determine the alpha for this time instance
-             betaIncrement = TSBeta(degreePolBeta,   coefPolBeta,       &
-                  degreeFourBeta,  omegaFourBeta,     &
-                  cosCoefFourBeta, sinCoefFourBeta, t(1))
-
-             betaTS = beta+betaIncrement
-             !Determine the grid velocity for this alpha
-             refDirection(:) = zero
-             refDirection(1) = one
-             call getDirVector(refDirection, alpha, betaTS, velDir, liftIndex)
-
-             !do I need to update the lift direction and drag direction as well?
-             !set the effictive grid velocity for this time interval
-             velxGrid0 = (aInf*machgrid)*(-velDir(1))
-             velyGrid0 = (aInf*machgrid)*(-velDir(2))
-             velzGrid0 = (aInf*machgrid)*(-velDir(3))
-          elseif(TSMachMode)then
-             !determine the mach number at this time interval
-             IntervalMach = TSMach(degreePolMach,   coefPolMach,       &
-                  degreeFourMach,  omegaFourMach,     &
-                  cosCoefFourMach, sinCoefFourMach, t(1))
-             !set the effective grid velocity
-             velxGrid0 = (aInf*(IntervalMach+machgrid))*(-velDirFreestream(1))
-             velyGrid0 = (aInf*(IntervalMach+machgrid))*(-velDirFreestream(2))
-             velzGrid0 = (aInf*(IntervalMach+machgrid))*(-velDirFreestream(3))
-
-          elseif(TSAltitudeMode)then
-             call terminate('gridVelocityFineLevel','altitude motion not yet implemented...')
-          else
-             call terminate('gridVelocityFineLevel','Not a recognized Stability Motion')
-          end if
-       endif
+            if (TSStability) then
+                ! Determine the time values of the old and new time level.
+                ! It is assumed that the rigid body rotation of the mesh is only
+                ! used when only 1 section is present.
+
+                tNew = timeUnsteady + timeUnsteadyRestart
+                tOld = tNew - t(1)
+
+                if (TSpMode .or. TSqMode .or. TSrMode) then
+                    ! Compute the rotation matrix of the rigid body rotation as
+                    ! well as the rotation point; the latter may vary in time due
+                    ! to rigid body translation.
+
+                    call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
+
+                    if (TSAlphaFollowing) then
+
+                        velxgrid0 = rotationMatrix(1, 1) * velxgrid0 &
+                                    + rotationMatrix(1, 2) * velygrid0 &
+                                    + rotationMatrix(1, 3) * velzgrid0
+                        velygrid0 = rotationMatrix(2, 1) * velxgrid0 &
+                                    + rotationMatrix(2, 2) * velygrid0 &
+                                    + rotationMatrix(2, 3) * velzgrid0
+                        velzgrid0 = rotationMatrix(3, 1) * velxgrid0 &
+                                    + rotationMatrix(3, 2) * velygrid0 &
+                                    + rotationMatrix(3, 3) * velzgrid0
+
+                    end if
+                elseif (tsAlphaMode) then
+                    !Determine the alpha for this time instance
+                    alphaIncrement = TSAlpha(degreePolAlpha, coefPolAlpha, &
+                                             degreeFourAlpha, omegaFourAlpha, &
+                                             cosCoefFourAlpha, sinCoefFourAlpha, t(1))
+
+                    alphaTS = alpha + alphaIncrement
+                    !Determine the grid velocity for this alpha
+                    refDirection(:) = zero
+                    refDirection(1) = one
+                    call getDirVector(refDirection, alphaTS, beta, velDir, liftIndex)
+
+                    !do I need to update the lift direction and drag direction as well?
+                    !set the effictive grid velocity for this time interval
+                    velxGrid0 = (aInf * machgrid) * (-velDir(1))
+                    velyGrid0 = (aInf * machgrid) * (-velDir(2))
+                    velzGrid0 = (aInf * machgrid) * (-velDir(3))
+
+                elseif (tsBetaMode) then
+
+                    !Determine the alpha for this time instance
+                    betaIncrement = TSBeta(degreePolBeta, coefPolBeta, &
+                                           degreeFourBeta, omegaFourBeta, &
+                                           cosCoefFourBeta, sinCoefFourBeta, t(1))
+
+                    betaTS = beta + betaIncrement
+                    !Determine the grid velocity for this alpha
+                    refDirection(:) = zero
+                    refDirection(1) = one
+                    call getDirVector(refDirection, alpha, betaTS, velDir, liftIndex)
+
+                    !do I need to update the lift direction and drag direction as well?
+                    !set the effictive grid velocity for this time interval
+                    velxGrid0 = (aInf * machgrid) * (-velDir(1))
+                    velyGrid0 = (aInf * machgrid) * (-velDir(2))
+                    velzGrid0 = (aInf * machgrid) * (-velDir(3))
+                elseif (TSMachMode) then
+                    !determine the mach number at this time interval
+                    IntervalMach = TSMach(degreePolMach, coefPolMach, &
+                                          degreeFourMach, omegaFourMach, &
+                                          cosCoefFourMach, sinCoefFourMach, t(1))
+                    !set the effective grid velocity
+                    velxGrid0 = (aInf * (IntervalMach + machgrid)) * (-velDirFreestream(1))
+                    velyGrid0 = (aInf * (IntervalMach + machgrid)) * (-velDirFreestream(2))
+                    velzGrid0 = (aInf * (IntervalMach + machgrid)) * (-velDirFreestream(3))
+
+                elseif (TSAltitudeMode) then
+                    call terminate('gridVelocityFineLevel', 'altitude motion not yet implemented...')
+                else
+                    call terminate('gridVelocityFineLevel', 'Not a recognized Stability Motion')
+                end if
+            end if
 #endif
 
-       ! Loop over the number of viscous subfaces.
-
-       bocoLoop2: do mm=1,nViscBocos
-
-          ! Store the rotation center and the rotation rate
-          ! for this subface.
-
-          ii = cgnsSubface(mm)
-
-          rotCenter = cgnsDoms(nbkGlobal)%bocoInfo(ii)%rotCenter
-          rotRate   = timeRef*cgnsDoms(nbkGlobal)%bocoInfo(ii)%rotRate
-
-          ! useWindAxis should go back here!
-          velXgrid = velXGrid0
-          velYgrid = velYGrid0
-          velZgrid = velZGrid0
-
-          ! Loop over the quadrilateral faces of the viscous
-          ! subface.
-
-          ! The new procedure is less elegant as the previous one.
-          ! But the new stands up to Tapenade.
-          if (BCFaceID(mm) == iMin) then
-
-            do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-               do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-
-                  ! Compute the coordinates of the centroid of the face.
-                  ! Normally this would be an average of i-1 and i, but
-                  ! due to the usage of the pointer xFace and the fact
-                  ! that x starts at index 0 this is shifted 1 index.
-
-                  xc(1) = fourth*(flowDoms(nn, groundLevel, sps)%x(1, i,  j,  1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(1, i,  j-1,1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(1, i-1,j,  1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(1, i-1,j-1,1))
-                  xc(2) = fourth*(flowDoms(nn, groundLevel, sps)%x(1, i,  j,  2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(1, i,  j-1,2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(1, i-1,j,  2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(1, i-1,j-1,2))
-                  xc(3) = fourth*(flowDoms(nn, groundLevel, sps)%x(1, i,  j,  3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(1, i,  j-1,3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(1, i-1,j,  3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(1, i-1,j-1,3))
-
-                  ! Determine the coordinates relative to the center
-                  ! of rotation.
-
-                  xxc(1) = xc(1) - rotCenter(1)
-                  xxc(2) = xc(2) - rotCenter(2)
-                  xxc(3) = xc(3) - rotCenter(3)
-
-                  ! Compute the velocity, which is the cross product
-                  ! of rotRate and xc.
-
-                  BCData(mm)%uSlip(i,j,1) = rotRate(2)*xxc(3) - rotRate(3)*xxc(2)
-                  BCData(mm)%uSlip(i,j,2) = rotRate(3)*xxc(1) - rotRate(1)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = rotRate(1)*xxc(2) - rotRate(2)*xxc(1)
-
-                  ! Determine the coordinates relative to the
-                  ! rigid body rotation point.
-
-                  xxc(1) = xc(1) - rotationPoint(1)
-                  xxc(2) = xc(2) - rotationPoint(2)
-                  xxc(3) = xc(3) - rotationPoint(3)
-
-                  ! Determine the total velocity of the cell center.
-                  ! This is a combination of rotation speed of this
-                  ! block and the entire rigid body rotation.
-
-                  BCData(mm)%uSlip(i,j,1) = BCData(mm)%uSlip(i,j,1) + velxGrid    &
-                        + derivRotationMatrix(1,1)*xxc(1) &
-                        + derivRotationMatrix(1,2)*xxc(2) &
-                        + derivRotationMatrix(1,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,2) = BCData(mm)%uSlip(i,j,2) + velyGrid    &
-                        + derivRotationMatrix(2,1)*xxc(1) &
-                        + derivRotationMatrix(2,2)*xxc(2) &
-                        + derivRotationMatrix(2,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = BCData(mm)%uSlip(i,j,3) + velzGrid    &
-                        + derivRotationMatrix(3,1)*xxc(1) &
-                        + derivRotationMatrix(3,2)*xxc(2) &
-                        + derivRotationMatrix(3,3)*xxc(3)
-               enddo
-            enddo
-          
-          else if (BCFaceID(mm) == iMax) then
-
-            do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-               do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-
-                  ! Compute the coordinates of the centroid of the face.
-                  ! Normally this would be an average of i-1 and i, but
-                  ! due to the usage of the pointer xFace and the fact
-                  ! that x starts at index 0 this is shifted 1 index.
-
-                  xc(1) = fourth*(flowDoms(nn, groundLevel, sps)%x(il, i,  j,  1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(il, i,  j-1,1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(il, i-1,j,  1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(il, i-1,j-1,1))
-                  xc(2) = fourth*(flowDoms(nn, groundLevel, sps)%x(il, i,  j,  2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(il, i,  j-1,2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(il, i-1,j,  2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(il, i-1,j-1,2))
-                  xc(3) = fourth*(flowDoms(nn, groundLevel, sps)%x(il, i,  j,  3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(il, i,  j-1,3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(il, i-1,j,  3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(il, i-1,j-1,3))
-
-                  ! Determine the coordinates relative to the center
-                  ! of rotation.
-
-                  xxc(1) = xc(1) - rotCenter(1)
-                  xxc(2) = xc(2) - rotCenter(2)
-                  xxc(3) = xc(3) - rotCenter(3)
-
-                  ! Compute the velocity, which is the cross product
-                  ! of rotRate and xc.
-
-                  BCData(mm)%uSlip(i,j,1) = rotRate(2)*xxc(3) - rotRate(3)*xxc(2)
-                  BCData(mm)%uSlip(i,j,2) = rotRate(3)*xxc(1) - rotRate(1)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = rotRate(1)*xxc(2) - rotRate(2)*xxc(1)
-
-                  ! Determine the coordinates relative to the
-                  ! rigid body rotation point.
-
-                  xxc(1) = xc(1) - rotationPoint(1)
-                  xxc(2) = xc(2) - rotationPoint(2)
-                  xxc(3) = xc(3) - rotationPoint(3)
-
-                  ! Determine the total velocity of the cell center.
-                  ! This is a combination of rotation speed of this
-                  ! block and the entire rigid body rotation.
-
-                  BCData(mm)%uSlip(i,j,1) = BCData(mm)%uSlip(i,j,1) + velxGrid    &
-                        + derivRotationMatrix(1,1)*xxc(1) &
-                        + derivRotationMatrix(1,2)*xxc(2) &
-                        + derivRotationMatrix(1,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,2) = BCData(mm)%uSlip(i,j,2) + velyGrid    &
-                        + derivRotationMatrix(2,1)*xxc(1) &
-                        + derivRotationMatrix(2,2)*xxc(2) &
-                        + derivRotationMatrix(2,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = BCData(mm)%uSlip(i,j,3) + velzGrid    &
-                        + derivRotationMatrix(3,1)*xxc(1) &
-                        + derivRotationMatrix(3,2)*xxc(2) &
-                        + derivRotationMatrix(3,3)*xxc(3)
-               enddo
-            enddo
-
-          else if (BCFaceID(mm) == jMin) then
-
-            do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-               do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-
-                  ! Compute the coordinates of the centroid of the face.
-                  ! Normally this would be an average of i-1 and i, but
-                  ! due to the usage of the pointer xFace and the fact
-                  ! that x starts at index 0 this is shifted 1 index.
-
-                  xc(1) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  1,j,  1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  1,j-1,1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,1,j,  1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,1,j-1,1))
-                  xc(2) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  1,j,  2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  1,j-1,2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,1,j,  2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,1,j-1,2))
-                  xc(3) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  1,j,  3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  1,j-1,3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,1,j,  3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,1,j-1,3))
-
-                  ! Determine the coordinates relative to the center
-                  ! of rotation.
-
-                  xxc(1) = xc(1) - rotCenter(1)
-                  xxc(2) = xc(2) - rotCenter(2)
-                  xxc(3) = xc(3) - rotCenter(3)
-
-                  ! Compute the velocity, which is the cross product
-                  ! of rotRate and xc.
-
-                  BCData(mm)%uSlip(i,j,1) = rotRate(2)*xxc(3) - rotRate(3)*xxc(2)
-                  BCData(mm)%uSlip(i,j,2) = rotRate(3)*xxc(1) - rotRate(1)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = rotRate(1)*xxc(2) - rotRate(2)*xxc(1)
-
-                  ! Determine the coordinates relative to the
-                  ! rigid body rotation point.
-
-                  xxc(1) = xc(1) - rotationPoint(1)
-                  xxc(2) = xc(2) - rotationPoint(2)
-                  xxc(3) = xc(3) - rotationPoint(3)
-
-                  ! Determine the total velocity of the cell center.
-                  ! This is a combination of rotation speed of this
-                  ! block and the entire rigid body rotation.
-
-                  BCData(mm)%uSlip(i,j,1) = BCData(mm)%uSlip(i,j,1) + velxGrid    &
-                        + derivRotationMatrix(1,1)*xxc(1) &
-                        + derivRotationMatrix(1,2)*xxc(2) &
-                        + derivRotationMatrix(1,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,2) = BCData(mm)%uSlip(i,j,2) + velyGrid    &
-                        + derivRotationMatrix(2,1)*xxc(1) &
-                        + derivRotationMatrix(2,2)*xxc(2) &
-                        + derivRotationMatrix(2,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = BCData(mm)%uSlip(i,j,3) + velzGrid    &
-                        + derivRotationMatrix(3,1)*xxc(1) &
-                        + derivRotationMatrix(3,2)*xxc(2) &
-                        + derivRotationMatrix(3,3)*xxc(3)
-               enddo
-            enddo
-
-          else if (BCFaceID(mm) == jMax) then
-
-            do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-               do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-
-                  ! Compute the coordinates of the centroid of the face.
-                  ! Normally this would be an average of i-1 and i, but
-                  ! due to the usage of the pointer xFace and the fact
-                  ! that x starts at index 0 this is shifted 1 index.
-
-                  xc(1) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  jl,j,  1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  jl,j-1,1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,jl,j,  1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,jl,j-1,1))
-                  xc(2) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  jl,j,  2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  jl,j-1,2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,jl,j,  2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,jl,j-1,2))
-                  xc(3) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  jl,j,  3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  jl,j-1,3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,jl,j,  3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,jl,j-1,3))
-
-                  ! Determine the coordinates relative to the center
-                  ! of rotation.
-
-                  xxc(1) = xc(1) - rotCenter(1)
-                  xxc(2) = xc(2) - rotCenter(2)
-                  xxc(3) = xc(3) - rotCenter(3)
-
-                  ! Compute the velocity, which is the cross product
-                  ! of rotRate and xc.
-
-                  BCData(mm)%uSlip(i,j,1) = rotRate(2)*xxc(3) - rotRate(3)*xxc(2)
-                  BCData(mm)%uSlip(i,j,2) = rotRate(3)*xxc(1) - rotRate(1)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = rotRate(1)*xxc(2) - rotRate(2)*xxc(1)
-
-                  ! Determine the coordinates relative to the
-                  ! rigid body rotation point.
-
-                  xxc(1) = xc(1) - rotationPoint(1)
-                  xxc(2) = xc(2) - rotationPoint(2)
-                  xxc(3) = xc(3) - rotationPoint(3)
-
-                  ! Determine the total velocity of the cell center.
-                  ! This is a combination of rotation speed of this
-                  ! block and the entire rigid body rotation.
-
-                  BCData(mm)%uSlip(i,j,1) = BCData(mm)%uSlip(i,j,1) + velxGrid    &
-                        + derivRotationMatrix(1,1)*xxc(1) &
-                        + derivRotationMatrix(1,2)*xxc(2) &
-                        + derivRotationMatrix(1,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,2) = BCData(mm)%uSlip(i,j,2) + velyGrid    &
-                        + derivRotationMatrix(2,1)*xxc(1) &
-                        + derivRotationMatrix(2,2)*xxc(2) &
-                        + derivRotationMatrix(2,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = BCData(mm)%uSlip(i,j,3) + velzGrid    &
-                        + derivRotationMatrix(3,1)*xxc(1) &
-                        + derivRotationMatrix(3,2)*xxc(2) &
-                        + derivRotationMatrix(3,3)*xxc(3)
-               enddo
-            enddo
-
-          else if (BCFaceID(mm) == kMin) then
-
-            do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-               do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-
-                  ! Compute the coordinates of the centroid of the face.
-                  ! Normally this would be an average of i-1 and i, but
-                  ! due to the usage of the pointer xFace and the fact
-                  ! that x starts at index 0 this is shifted 1 index.
-
-                  xc(1) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  j,  1,1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  j-1,1,1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j,  1,1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j-1,1,1))
-                  xc(2) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  j,  1,2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  j-1,1,2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j,  1,2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j-1,1,2))
-                  xc(3) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  j,  1,3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  j-1,1,3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j,  1,3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j-1,1,3))
-
-                  ! Determine the coordinates relative to the center
-                  ! of rotation.
-
-                  xxc(1) = xc(1) - rotCenter(1)
-                  xxc(2) = xc(2) - rotCenter(2)
-                  xxc(3) = xc(3) - rotCenter(3)
-
-                  ! Compute the velocity, which is the cross product
-                  ! of rotRate and xc.
-
-                  BCData(mm)%uSlip(i,j,1) = rotRate(2)*xxc(3) - rotRate(3)*xxc(2)
-                  BCData(mm)%uSlip(i,j,2) = rotRate(3)*xxc(1) - rotRate(1)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = rotRate(1)*xxc(2) - rotRate(2)*xxc(1)
-
-                  ! Determine the coordinates relative to the
-                  ! rigid body rotation point.
-
-                  xxc(1) = xc(1) - rotationPoint(1)
-                  xxc(2) = xc(2) - rotationPoint(2)
-                  xxc(3) = xc(3) - rotationPoint(3)
-
-                  ! Determine the total velocity of the cell center.
-                  ! This is a combination of rotation speed of this
-                  ! block and the entire rigid body rotation.
-
-                  BCData(mm)%uSlip(i,j,1) = BCData(mm)%uSlip(i,j,1) + velxGrid    &
-                        + derivRotationMatrix(1,1)*xxc(1) &
-                        + derivRotationMatrix(1,2)*xxc(2) &
-                        + derivRotationMatrix(1,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,2) = BCData(mm)%uSlip(i,j,2) + velyGrid    &
-                        + derivRotationMatrix(2,1)*xxc(1) &
-                        + derivRotationMatrix(2,2)*xxc(2) &
-                        + derivRotationMatrix(2,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = BCData(mm)%uSlip(i,j,3) + velzGrid    &
-                        + derivRotationMatrix(3,1)*xxc(1) &
-                        + derivRotationMatrix(3,2)*xxc(2) &
-                        + derivRotationMatrix(3,3)*xxc(3)
-               enddo
-            enddo
-
-          else if (BCFaceID(mm) == kMax) then
-
-            do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-               do i=BCData(mm)%icBeg, BCData(mm)%icEnd
-
-                  ! Compute the coordinates of the centroid of the face.
-                  ! Normally this would be an average of i-1 and i, but
-                  ! due to the usage of the pointer xFace and the fact
-                  ! that x starts at index 0 this is shifted 1 index.
-
-                  xc(1) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  j,  kl,1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  j-1,kl,1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j,  kl,1) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j-1,kl,1))
-                  xc(2) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  j,  kl,2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  j-1,kl,2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j,  kl,2) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j-1,kl,2))
-                  xc(3) = fourth*(flowDoms(nn, groundLevel, sps)%x(i,  j,  kl,3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i,  j-1,kl,3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j,  kl,3) &
-                        +         flowDoms(nn, groundLevel, sps)%x(i-1,j-1,kl,3))
-
-                  ! Determine the coordinates relative to the center
-                  ! of rotation.
-
-                  xxc(1) = xc(1) - rotCenter(1)
-                  xxc(2) = xc(2) - rotCenter(2)
-                  xxc(3) = xc(3) - rotCenter(3)
-
-                  ! Compute the velocity, which is the cross product
-                  ! of rotRate and xc.
-
-                  BCData(mm)%uSlip(i,j,1) = rotRate(2)*xxc(3) - rotRate(3)*xxc(2)
-                  BCData(mm)%uSlip(i,j,2) = rotRate(3)*xxc(1) - rotRate(1)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = rotRate(1)*xxc(2) - rotRate(2)*xxc(1)
-
-                  ! Determine the coordinates relative to the
-                  ! rigid body rotation point.
-
-                  xxc(1) = xc(1) - rotationPoint(1)
-                  xxc(2) = xc(2) - rotationPoint(2)
-                  xxc(3) = xc(3) - rotationPoint(3)
-
-                  ! Determine the total velocity of the cell center.
-                  ! This is a combination of rotation speed of this
-                  ! block and the entire rigid body rotation.
-
-                  BCData(mm)%uSlip(i,j,1) = BCData(mm)%uSlip(i,j,1) + velxGrid    &
-                        + derivRotationMatrix(1,1)*xxc(1) &
-                        + derivRotationMatrix(1,2)*xxc(2) &
-                        + derivRotationMatrix(1,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,2) = BCData(mm)%uSlip(i,j,2) + velyGrid    &
-                        + derivRotationMatrix(2,1)*xxc(1) &
-                        + derivRotationMatrix(2,2)*xxc(2) &
-                        + derivRotationMatrix(2,3)*xxc(3)
-                  BCData(mm)%uSlip(i,j,3) = BCData(mm)%uSlip(i,j,3) + velzGrid    &
-                        + derivRotationMatrix(3,1)*xxc(1) &
-                        + derivRotationMatrix(3,2)*xxc(2) &
-                        + derivRotationMatrix(3,3)*xxc(3)
-               enddo
-            enddo
-
-          end if
-
-       enddo bocoLoop2
-
-
-    endif testUseOldCoor
-
-  end subroutine slipVelocitiesFineLevel_block
-
-
-
+            ! Loop over the number of viscous subfaces.
+
+            bocoLoop2: do mm = 1, nViscBocos
+
+                ! Store the rotation center and the rotation rate
+                ! for this subface.
+
+                ii = cgnsSubface(mm)
+
+                rotCenter = cgnsDoms(nbkGlobal)%bocoInfo(ii)%rotCenter
+                rotRate = timeRef * cgnsDoms(nbkGlobal)%bocoInfo(ii)%rotRate
+
+                ! useWindAxis should go back here!
+                velXgrid = velXGrid0
+                velYgrid = velYGrid0
+                velZgrid = velZGrid0
+
+                ! Loop over the quadrilateral faces of the viscous
+                ! subface.
+
+                ! The new procedure is less elegant as the previous one.
+                ! But the new stands up to Tapenade.
+                if (BCFaceID(mm) == iMin) then
+
+                    do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                        do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+
+                            ! Compute the coordinates of the centroid of the face.
+                            ! Normally this would be an average of i-1 and i, but
+                            ! due to the usage of the pointer xFace and the fact
+                            ! that x starts at index 0 this is shifted 1 index.
+
+                            xc(1) = fourth * (flowDoms(nn, groundLevel, sps)%x(1, i, j, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(1, i, j - 1, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(1, i - 1, j, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(1, i - 1, j - 1, 1))
+                            xc(2) = fourth * (flowDoms(nn, groundLevel, sps)%x(1, i, j, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(1, i, j - 1, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(1, i - 1, j, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(1, i - 1, j - 1, 2))
+                            xc(3) = fourth * (flowDoms(nn, groundLevel, sps)%x(1, i, j, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(1, i, j - 1, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(1, i - 1, j, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(1, i - 1, j - 1, 3))
+
+                            ! Determine the coordinates relative to the center
+                            ! of rotation.
+
+                            xxc(1) = xc(1) - rotCenter(1)
+                            xxc(2) = xc(2) - rotCenter(2)
+                            xxc(3) = xc(3) - rotCenter(3)
+
+                            ! Compute the velocity, which is the cross product
+                            ! of rotRate and xc.
+
+                            BCData(mm)%uSlip(i, j, 1) = rotRate(2) * xxc(3) - rotRate(3) * xxc(2)
+                            BCData(mm)%uSlip(i, j, 2) = rotRate(3) * xxc(1) - rotRate(1) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = rotRate(1) * xxc(2) - rotRate(2) * xxc(1)
+
+                            ! Determine the coordinates relative to the
+                            ! rigid body rotation point.
+
+                            xxc(1) = xc(1) - rotationPoint(1)
+                            xxc(2) = xc(2) - rotationPoint(2)
+                            xxc(3) = xc(3) - rotationPoint(3)
+
+                            ! Determine the total velocity of the cell center.
+                            ! This is a combination of rotation speed of this
+                            ! block and the entire rigid body rotation.
+
+                            BCData(mm)%uSlip(i, j, 1) = BCData(mm)%uSlip(i, j, 1) + velxGrid &
+                                                        + derivRotationMatrix(1, 1) * xxc(1) &
+                                                        + derivRotationMatrix(1, 2) * xxc(2) &
+                                                        + derivRotationMatrix(1, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 2) = BCData(mm)%uSlip(i, j, 2) + velyGrid &
+                                                        + derivRotationMatrix(2, 1) * xxc(1) &
+                                                        + derivRotationMatrix(2, 2) * xxc(2) &
+                                                        + derivRotationMatrix(2, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = BCData(mm)%uSlip(i, j, 3) + velzGrid &
+                                                        + derivRotationMatrix(3, 1) * xxc(1) &
+                                                        + derivRotationMatrix(3, 2) * xxc(2) &
+                                                        + derivRotationMatrix(3, 3) * xxc(3)
+                        end do
+                    end do
+
+                else if (BCFaceID(mm) == iMax) then
+
+                    do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                        do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+
+                            ! Compute the coordinates of the centroid of the face.
+                            ! Normally this would be an average of i-1 and i, but
+                            ! due to the usage of the pointer xFace and the fact
+                            ! that x starts at index 0 this is shifted 1 index.
+
+                            xc(1) = fourth * (flowDoms(nn, groundLevel, sps)%x(il, i, j, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(il, i, j - 1, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(il, i - 1, j, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(il, i - 1, j - 1, 1))
+                            xc(2) = fourth * (flowDoms(nn, groundLevel, sps)%x(il, i, j, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(il, i, j - 1, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(il, i - 1, j, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(il, i - 1, j - 1, 2))
+                            xc(3) = fourth * (flowDoms(nn, groundLevel, sps)%x(il, i, j, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(il, i, j - 1, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(il, i - 1, j, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(il, i - 1, j - 1, 3))
+
+                            ! Determine the coordinates relative to the center
+                            ! of rotation.
+
+                            xxc(1) = xc(1) - rotCenter(1)
+                            xxc(2) = xc(2) - rotCenter(2)
+                            xxc(3) = xc(3) - rotCenter(3)
+
+                            ! Compute the velocity, which is the cross product
+                            ! of rotRate and xc.
+
+                            BCData(mm)%uSlip(i, j, 1) = rotRate(2) * xxc(3) - rotRate(3) * xxc(2)
+                            BCData(mm)%uSlip(i, j, 2) = rotRate(3) * xxc(1) - rotRate(1) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = rotRate(1) * xxc(2) - rotRate(2) * xxc(1)
+
+                            ! Determine the coordinates relative to the
+                            ! rigid body rotation point.
+
+                            xxc(1) = xc(1) - rotationPoint(1)
+                            xxc(2) = xc(2) - rotationPoint(2)
+                            xxc(3) = xc(3) - rotationPoint(3)
+
+                            ! Determine the total velocity of the cell center.
+                            ! This is a combination of rotation speed of this
+                            ! block and the entire rigid body rotation.
+
+                            BCData(mm)%uSlip(i, j, 1) = BCData(mm)%uSlip(i, j, 1) + velxGrid &
+                                                        + derivRotationMatrix(1, 1) * xxc(1) &
+                                                        + derivRotationMatrix(1, 2) * xxc(2) &
+                                                        + derivRotationMatrix(1, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 2) = BCData(mm)%uSlip(i, j, 2) + velyGrid &
+                                                        + derivRotationMatrix(2, 1) * xxc(1) &
+                                                        + derivRotationMatrix(2, 2) * xxc(2) &
+                                                        + derivRotationMatrix(2, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = BCData(mm)%uSlip(i, j, 3) + velzGrid &
+                                                        + derivRotationMatrix(3, 1) * xxc(1) &
+                                                        + derivRotationMatrix(3, 2) * xxc(2) &
+                                                        + derivRotationMatrix(3, 3) * xxc(3)
+                        end do
+                    end do
+
+                else if (BCFaceID(mm) == jMin) then
+
+                    do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                        do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+
+                            ! Compute the coordinates of the centroid of the face.
+                            ! Normally this would be an average of i-1 and i, but
+                            ! due to the usage of the pointer xFace and the fact
+                            ! that x starts at index 0 this is shifted 1 index.
+
+                            xc(1) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, 1, j, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, 1, j - 1, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, 1, j, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, 1, j - 1, 1))
+                            xc(2) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, 1, j, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, 1, j - 1, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, 1, j, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, 1, j - 1, 2))
+                            xc(3) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, 1, j, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, 1, j - 1, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, 1, j, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, 1, j - 1, 3))
+
+                            ! Determine the coordinates relative to the center
+                            ! of rotation.
+
+                            xxc(1) = xc(1) - rotCenter(1)
+                            xxc(2) = xc(2) - rotCenter(2)
+                            xxc(3) = xc(3) - rotCenter(3)
+
+                            ! Compute the velocity, which is the cross product
+                            ! of rotRate and xc.
+
+                            BCData(mm)%uSlip(i, j, 1) = rotRate(2) * xxc(3) - rotRate(3) * xxc(2)
+                            BCData(mm)%uSlip(i, j, 2) = rotRate(3) * xxc(1) - rotRate(1) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = rotRate(1) * xxc(2) - rotRate(2) * xxc(1)
+
+                            ! Determine the coordinates relative to the
+                            ! rigid body rotation point.
+
+                            xxc(1) = xc(1) - rotationPoint(1)
+                            xxc(2) = xc(2) - rotationPoint(2)
+                            xxc(3) = xc(3) - rotationPoint(3)
+
+                            ! Determine the total velocity of the cell center.
+                            ! This is a combination of rotation speed of this
+                            ! block and the entire rigid body rotation.
+
+                            BCData(mm)%uSlip(i, j, 1) = BCData(mm)%uSlip(i, j, 1) + velxGrid &
+                                                        + derivRotationMatrix(1, 1) * xxc(1) &
+                                                        + derivRotationMatrix(1, 2) * xxc(2) &
+                                                        + derivRotationMatrix(1, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 2) = BCData(mm)%uSlip(i, j, 2) + velyGrid &
+                                                        + derivRotationMatrix(2, 1) * xxc(1) &
+                                                        + derivRotationMatrix(2, 2) * xxc(2) &
+                                                        + derivRotationMatrix(2, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = BCData(mm)%uSlip(i, j, 3) + velzGrid &
+                                                        + derivRotationMatrix(3, 1) * xxc(1) &
+                                                        + derivRotationMatrix(3, 2) * xxc(2) &
+                                                        + derivRotationMatrix(3, 3) * xxc(3)
+                        end do
+                    end do
+
+                else if (BCFaceID(mm) == jMax) then
+
+                    do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                        do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+
+                            ! Compute the coordinates of the centroid of the face.
+                            ! Normally this would be an average of i-1 and i, but
+                            ! due to the usage of the pointer xFace and the fact
+                            ! that x starts at index 0 this is shifted 1 index.
+
+                            xc(1) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, jl, j, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, jl, j - 1, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, jl, j, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, jl, j - 1, 1))
+                            xc(2) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, jl, j, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, jl, j - 1, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, jl, j, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, jl, j - 1, 2))
+                            xc(3) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, jl, j, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, jl, j - 1, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, jl, j, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, jl, j - 1, 3))
+
+                            ! Determine the coordinates relative to the center
+                            ! of rotation.
+
+                            xxc(1) = xc(1) - rotCenter(1)
+                            xxc(2) = xc(2) - rotCenter(2)
+                            xxc(3) = xc(3) - rotCenter(3)
+
+                            ! Compute the velocity, which is the cross product
+                            ! of rotRate and xc.
+
+                            BCData(mm)%uSlip(i, j, 1) = rotRate(2) * xxc(3) - rotRate(3) * xxc(2)
+                            BCData(mm)%uSlip(i, j, 2) = rotRate(3) * xxc(1) - rotRate(1) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = rotRate(1) * xxc(2) - rotRate(2) * xxc(1)
+
+                            ! Determine the coordinates relative to the
+                            ! rigid body rotation point.
+
+                            xxc(1) = xc(1) - rotationPoint(1)
+                            xxc(2) = xc(2) - rotationPoint(2)
+                            xxc(3) = xc(3) - rotationPoint(3)
+
+                            ! Determine the total velocity of the cell center.
+                            ! This is a combination of rotation speed of this
+                            ! block and the entire rigid body rotation.
+
+                            BCData(mm)%uSlip(i, j, 1) = BCData(mm)%uSlip(i, j, 1) + velxGrid &
+                                                        + derivRotationMatrix(1, 1) * xxc(1) &
+                                                        + derivRotationMatrix(1, 2) * xxc(2) &
+                                                        + derivRotationMatrix(1, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 2) = BCData(mm)%uSlip(i, j, 2) + velyGrid &
+                                                        + derivRotationMatrix(2, 1) * xxc(1) &
+                                                        + derivRotationMatrix(2, 2) * xxc(2) &
+                                                        + derivRotationMatrix(2, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = BCData(mm)%uSlip(i, j, 3) + velzGrid &
+                                                        + derivRotationMatrix(3, 1) * xxc(1) &
+                                                        + derivRotationMatrix(3, 2) * xxc(2) &
+                                                        + derivRotationMatrix(3, 3) * xxc(3)
+                        end do
+                    end do
+
+                else if (BCFaceID(mm) == kMin) then
+
+                    do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                        do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+
+                            ! Compute the coordinates of the centroid of the face.
+                            ! Normally this would be an average of i-1 and i, but
+                            ! due to the usage of the pointer xFace and the fact
+                            ! that x starts at index 0 this is shifted 1 index.
+
+                            xc(1) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, j, 1, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, 1, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, 1, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, 1, 1))
+                            xc(2) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, j, 1, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, 1, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, 1, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, 1, 2))
+                            xc(3) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, j, 1, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, 1, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, 1, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, 1, 3))
+
+                            ! Determine the coordinates relative to the center
+                            ! of rotation.
+
+                            xxc(1) = xc(1) - rotCenter(1)
+                            xxc(2) = xc(2) - rotCenter(2)
+                            xxc(3) = xc(3) - rotCenter(3)
+
+                            ! Compute the velocity, which is the cross product
+                            ! of rotRate and xc.
+
+                            BCData(mm)%uSlip(i, j, 1) = rotRate(2) * xxc(3) - rotRate(3) * xxc(2)
+                            BCData(mm)%uSlip(i, j, 2) = rotRate(3) * xxc(1) - rotRate(1) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = rotRate(1) * xxc(2) - rotRate(2) * xxc(1)
+
+                            ! Determine the coordinates relative to the
+                            ! rigid body rotation point.
+
+                            xxc(1) = xc(1) - rotationPoint(1)
+                            xxc(2) = xc(2) - rotationPoint(2)
+                            xxc(3) = xc(3) - rotationPoint(3)
+
+                            ! Determine the total velocity of the cell center.
+                            ! This is a combination of rotation speed of this
+                            ! block and the entire rigid body rotation.
+
+                            BCData(mm)%uSlip(i, j, 1) = BCData(mm)%uSlip(i, j, 1) + velxGrid &
+                                                        + derivRotationMatrix(1, 1) * xxc(1) &
+                                                        + derivRotationMatrix(1, 2) * xxc(2) &
+                                                        + derivRotationMatrix(1, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 2) = BCData(mm)%uSlip(i, j, 2) + velyGrid &
+                                                        + derivRotationMatrix(2, 1) * xxc(1) &
+                                                        + derivRotationMatrix(2, 2) * xxc(2) &
+                                                        + derivRotationMatrix(2, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = BCData(mm)%uSlip(i, j, 3) + velzGrid &
+                                                        + derivRotationMatrix(3, 1) * xxc(1) &
+                                                        + derivRotationMatrix(3, 2) * xxc(2) &
+                                                        + derivRotationMatrix(3, 3) * xxc(3)
+                        end do
+                    end do
+
+                else if (BCFaceID(mm) == kMax) then
+
+                    do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                        do i = BCData(mm)%icBeg, BCData(mm)%icEnd
+
+                            ! Compute the coordinates of the centroid of the face.
+                            ! Normally this would be an average of i-1 and i, but
+                            ! due to the usage of the pointer xFace and the fact
+                            ! that x starts at index 0 this is shifted 1 index.
+
+                            xc(1) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, j, kl, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, kl, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, kl, 1) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, kl, 1))
+                            xc(2) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, j, kl, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, kl, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, kl, 2) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, kl, 2))
+                            xc(3) = fourth * (flowDoms(nn, groundLevel, sps)%x(i, j, kl, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i, j - 1, kl, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j, kl, 3) &
+                                              + flowDoms(nn, groundLevel, sps)%x(i - 1, j - 1, kl, 3))
+
+                            ! Determine the coordinates relative to the center
+                            ! of rotation.
+
+                            xxc(1) = xc(1) - rotCenter(1)
+                            xxc(2) = xc(2) - rotCenter(2)
+                            xxc(3) = xc(3) - rotCenter(3)
+
+                            ! Compute the velocity, which is the cross product
+                            ! of rotRate and xc.
+
+                            BCData(mm)%uSlip(i, j, 1) = rotRate(2) * xxc(3) - rotRate(3) * xxc(2)
+                            BCData(mm)%uSlip(i, j, 2) = rotRate(3) * xxc(1) - rotRate(1) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = rotRate(1) * xxc(2) - rotRate(2) * xxc(1)
+
+                            ! Determine the coordinates relative to the
+                            ! rigid body rotation point.
+
+                            xxc(1) = xc(1) - rotationPoint(1)
+                            xxc(2) = xc(2) - rotationPoint(2)
+                            xxc(3) = xc(3) - rotationPoint(3)
+
+                            ! Determine the total velocity of the cell center.
+                            ! This is a combination of rotation speed of this
+                            ! block and the entire rigid body rotation.
+
+                            BCData(mm)%uSlip(i, j, 1) = BCData(mm)%uSlip(i, j, 1) + velxGrid &
+                                                        + derivRotationMatrix(1, 1) * xxc(1) &
+                                                        + derivRotationMatrix(1, 2) * xxc(2) &
+                                                        + derivRotationMatrix(1, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 2) = BCData(mm)%uSlip(i, j, 2) + velyGrid &
+                                                        + derivRotationMatrix(2, 1) * xxc(1) &
+                                                        + derivRotationMatrix(2, 2) * xxc(2) &
+                                                        + derivRotationMatrix(2, 3) * xxc(3)
+                            BCData(mm)%uSlip(i, j, 3) = BCData(mm)%uSlip(i, j, 3) + velzGrid &
+                                                        + derivRotationMatrix(3, 1) * xxc(1) &
+                                                        + derivRotationMatrix(3, 2) * xxc(2) &
+                                                        + derivRotationMatrix(3, 3) * xxc(3)
+                        end do
+                    end do
+
+                end if
+
+            end do bocoLoop2
+
+        end if testUseOldCoor
+
+    end subroutine slipVelocitiesFineLevel_block
 
 #ifndef USE_TAPENADE
-  subroutine normalVelocitiesAllLevels(sps)
-    !
-    ! Shell function to call normalVelocities_block on all blocks/levels
-    !
-    use constants
-    use blockPointers, only : nDom, flowDoms
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use iteration, only : groundLevel
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, level, nLevels
-
-    nLevels = ubound(flowDoms,2)
-    levelLoop: do level=groundLevel, nLevels
-       domains: do nn=1,nDom
-
-          ! Set the pointers for this block.
-
-          call setPointers(nn, level, sps)
-
-          call normalVelocities_block(sps)
-
-       end do domains
-    end do levelLoop
-  end subroutine normalVelocitiesAllLevels
+    subroutine normalVelocitiesAllLevels(sps)
+        !
+        ! Shell function to call normalVelocities_block on all blocks/levels
+        !
+        use constants
+        use blockPointers, only: nDom, flowDoms
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration, only: groundLevel
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, level, nLevels
+
+        nLevels = ubound(flowDoms, 2)
+        levelLoop: do level = groundLevel, nLevels
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block.
+
+                call setPointers(nn, level, sps)
+
+                call normalVelocities_block(sps)
+
+            end do domains
+        end do levelLoop
+    end subroutine normalVelocitiesAllLevels
 #endif
 
-  subroutine normalVelocities_block(sps)
-    !
-    !       normalVelocitiesAllLevels computes the normal grid
-    !       velocities of some boundary faces of the moving blocks for
-    !       spectral mode sps. All grid levels from ground level to the
-    !       coarsest level are considered.
-    !
-    use constants
-    use blockPointers, only : il, jl, kl, addGridVelocities, nBocos, BCData, &
-         sfaceI, sfaceJ, sfaceK, bcFaceID, si, sj, sk
-    !use iteration
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: mm
-    integer(kind=intType) :: i, j
+    subroutine normalVelocities_block(sps)
+        !
+        !       normalVelocitiesAllLevels computes the normal grid
+        !       velocities of some boundary faces of the moving blocks for
+        !       spectral mode sps. All grid levels from ground level to the
+        !       coarsest level are considered.
+        !
+        use constants
+        use blockPointers, only: il, jl, kl, addGridVelocities, nBocos, BCData, &
+                                 sfaceI, sfaceJ, sfaceK, bcFaceID, si, sj, sk
+        !use iteration
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: mm
+        integer(kind=intType) :: i, j
 
-    real(kind=realType) :: weight, mult
+        real(kind=realType) :: weight, mult
 
-    real(kind=realType), dimension(:,:),   pointer :: sFace
-    real(kind=realType), dimension(:,:,:), pointer :: ss
+        real(kind=realType), dimension(:, :), pointer :: sFace
+        real(kind=realType), dimension(:, :, :), pointer :: ss
 
-    ! Check for a moving block. As it is possible that in a
-    ! multidisicplinary environment additional grid velocities
-    ! are set, the test should be done on addGridVelocities
-    ! and not on blockIsMoving.
+        ! Check for a moving block. As it is possible that in a
+        ! multidisicplinary environment additional grid velocities
+        ! are set, the test should be done on addGridVelocities
+        ! and not on blockIsMoving.
 
-    testMoving: if( addGridVelocities ) then
-       !
-       !             Determine the normal grid velocities of the boundaries.
-       !             As these values are based on the unit normal. A division
-       !             by the length of the normal is needed.
-       !             Furthermore the boundary unit normals are per definition
-       !             outward pointing, while on the iMin, jMin and kMin
-       !             boundaries the face normals are inward pointing. This
-       !             is taken into account by the factor mult.
-       !
-       ! Loop over the boundary subfaces.
+        testMoving: if (addGridVelocities) then
+            !
+            !             Determine the normal grid velocities of the boundaries.
+            !             As these values are based on the unit normal. A division
+            !             by the length of the normal is needed.
+            !             Furthermore the boundary unit normals are per definition
+            !             outward pointing, while on the iMin, jMin and kMin
+            !             boundaries the face normals are inward pointing. This
+            !             is taken into account by the factor mult.
+            !
+            ! Loop over the boundary subfaces.
 
-       bocoLoop: do mm=1,nBocos
+            bocoLoop: do mm = 1, nBocos
 
-          ! Check whether rFace is allocated.
+                ! Check whether rFace is allocated.
 
-          testAssoc: if( associated(BCData(mm)%rFace) ) then
+                testAssoc: if (associated(BCData(mm)%rFace)) then
 
-             ! Determine the block face on which the subface is
-             ! located and set some variables accordingly.
-             
-             ! The new procedure is less elegant as the previous one.
-             ! But the new stands up to Tapenade.
-             if (BCFaceID(mm) == iMin) then
+                    ! Determine the block face on which the subface is
+                    ! located and set some variables accordingly.
 
-               mult = -one
+                    ! The new procedure is less elegant as the previous one.
+                    ! But the new stands up to Tapenade.
+                    if (BCFaceID(mm) == iMin) then
 
-               do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                  do i=BCData(mm)%icBeg, BCData(mm)%icEnd
+                        mult = -one
 
-                     ! Compute the inverse of the length of the normal
-                     ! vector and possibly correct for inward pointing.
+                        do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                            do i = BCData(mm)%icBeg, BCData(mm)%icEnd
 
-                     weight = sqrt(si(1,i,j,1)**2 + si(1,i,j,2)**2 &
-                        +      si(1,i,j,3)**2)
-                     if(weight > zero) weight = mult/weight
+                                ! Compute the inverse of the length of the normal
+                                ! vector and possibly correct for inward pointing.
 
-                     ! Compute the normal velocity based on the outward
-                     ! pointing unit normal.
+                                weight = sqrt(si(1, i, j, 1)**2 + si(1, i, j, 2)**2 &
+                                              + si(1, i, j, 3)**2)
+                                if (weight > zero) weight = mult / weight
 
-                     BCData(mm)%rFace(i,j) = weight*sFaceI(1,i,j)
-                  enddo
-               enddo
+                                ! Compute the normal velocity based on the outward
+                                ! pointing unit normal.
 
-             else if (BCFaceID(mm) == iMax) then 
+                                BCData(mm)%rFace(i, j) = weight * sFaceI(1, i, j)
+                            end do
+                        end do
 
-               mult = one
+                    else if (BCFaceID(mm) == iMax) then
 
-               do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                  do i=BCData(mm)%icBeg, BCData(mm)%icEnd
+                        mult = one
 
-                     ! Compute the inverse of the length of the normal
-                     ! vector and possibly correct for inward pointing.
+                        do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                            do i = BCData(mm)%icBeg, BCData(mm)%icEnd
 
-                     weight = sqrt(si(il,i,j,1)**2 + si(il,i,j,2)**2 &
-                        +      si(il,i,j,3)**2)
-                     if(weight > zero) weight = mult/weight
+                                ! Compute the inverse of the length of the normal
+                                ! vector and possibly correct for inward pointing.
 
-                     ! Compute the normal velocity based on the outward
-                     ! pointing unit normal.
+                                weight = sqrt(si(il, i, j, 1)**2 + si(il, i, j, 2)**2 &
+                                              + si(il, i, j, 3)**2)
+                                if (weight > zero) weight = mult / weight
 
-                     BCData(mm)%rFace(i,j) = weight*sFaceI(il,i,j)
-                  enddo
-               enddo
+                                ! Compute the normal velocity based on the outward
+                                ! pointing unit normal.
 
-             else if (BCFaceID(mm) == jMin) then 
+                                BCData(mm)%rFace(i, j) = weight * sFaceI(il, i, j)
+                            end do
+                        end do
 
-               mult = -one
+                    else if (BCFaceID(mm) == jMin) then
 
-               do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                  do i=BCData(mm)%icBeg, BCData(mm)%icEnd
+                        mult = -one
 
-                     ! Compute the inverse of the length of the normal
-                     ! vector and possibly correct for inward pointing.
+                        do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                            do i = BCData(mm)%icBeg, BCData(mm)%icEnd
 
-                     weight = sqrt(sj(i,1,j,1)**2 + sj(i,1,j,2)**2 &
-                        +      sj(i,1,j,3)**2)
-                     if(weight > zero) weight = mult/weight
+                                ! Compute the inverse of the length of the normal
+                                ! vector and possibly correct for inward pointing.
 
-                     ! Compute the normal velocity based on the outward
-                     ! pointing unit normal.
+                                weight = sqrt(sj(i, 1, j, 1)**2 + sj(i, 1, j, 2)**2 &
+                                              + sj(i, 1, j, 3)**2)
+                                if (weight > zero) weight = mult / weight
 
-                     BCData(mm)%rFace(i,j) = weight*sFaceJ(i,1,j)
-                  enddo
-               enddo
+                                ! Compute the normal velocity based on the outward
+                                ! pointing unit normal.
 
-             else if (BCFaceID(mm) == jMax) then 
+                                BCData(mm)%rFace(i, j) = weight * sFaceJ(i, 1, j)
+                            end do
+                        end do
 
-               mult = one
+                    else if (BCFaceID(mm) == jMax) then
 
-               do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                  do i=BCData(mm)%icBeg, BCData(mm)%icEnd
+                        mult = one
 
-                     ! Compute the inverse of the length of the normal
-                     ! vector and possibly correct for inward pointing.
+                        do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                            do i = BCData(mm)%icBeg, BCData(mm)%icEnd
 
-                     weight = sqrt(sj(i,jl,j,1)**2 + sj(i,jl,j,2)**2 &
-                        +      sj(i,jl,j,3)**2)
-                     if(weight > zero) weight = mult/weight
+                                ! Compute the inverse of the length of the normal
+                                ! vector and possibly correct for inward pointing.
 
-                     ! Compute the normal velocity based on the outward
-                     ! pointing unit normal.
+                                weight = sqrt(sj(i, jl, j, 1)**2 + sj(i, jl, j, 2)**2 &
+                                              + sj(i, jl, j, 3)**2)
+                                if (weight > zero) weight = mult / weight
 
-                     BCData(mm)%rFace(i,j) = weight*sFaceJ(i,jl,j)
-                  enddo
-               enddo
+                                ! Compute the normal velocity based on the outward
+                                ! pointing unit normal.
 
-             else if (BCFaceID(mm) == kMin) then 
+                                BCData(mm)%rFace(i, j) = weight * sFaceJ(i, jl, j)
+                            end do
+                        end do
 
-               mult = -one
+                    else if (BCFaceID(mm) == kMin) then
 
-               do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                  do i=BCData(mm)%icBeg, BCData(mm)%icEnd
+                        mult = -one
 
-                     ! Compute the inverse of the length of the normal
-                     ! vector and possibly correct for inward pointing.
+                        do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                            do i = BCData(mm)%icBeg, BCData(mm)%icEnd
 
-                     weight = sqrt(sk(i,j,1,1)**2 + sk(i,j,1,2)**2 &
-                        +      sk(i,j,1,3)**2)
-                     if(weight > zero) weight = mult/weight
+                                ! Compute the inverse of the length of the normal
+                                ! vector and possibly correct for inward pointing.
 
-                     ! Compute the normal velocity based on the outward
-                     ! pointing unit normal.
+                                weight = sqrt(sk(i, j, 1, 1)**2 + sk(i, j, 1, 2)**2 &
+                                              + sk(i, j, 1, 3)**2)
+                                if (weight > zero) weight = mult / weight
 
-                     BCData(mm)%rFace(i,j) = weight*sFaceK(i,j,1)
-                  enddo
-               enddo
+                                ! Compute the normal velocity based on the outward
+                                ! pointing unit normal.
 
-             else if (BCFaceID(mm) == kMax) then 
+                                BCData(mm)%rFace(i, j) = weight * sFaceK(i, j, 1)
+                            end do
+                        end do
 
-               mult = one
+                    else if (BCFaceID(mm) == kMax) then
 
-               do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
-                  do i=BCData(mm)%icBeg, BCData(mm)%icEnd
+                        mult = one
 
-                     ! Compute the inverse of the length of the normal
-                     ! vector and possibly correct for inward pointing.
+                        do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                            do i = BCData(mm)%icBeg, BCData(mm)%icEnd
 
-                     weight = sqrt(sk(i,j,kl,1)**2 + sk(i,j,kl,2)**2 &
-                        +      sk(i,j,kl,3)**2)
-                     if(weight > zero) weight = mult/weight
+                                ! Compute the inverse of the length of the normal
+                                ! vector and possibly correct for inward pointing.
 
-                     ! Compute the normal velocity based on the outward
-                     ! pointing unit normal.
+                                weight = sqrt(sk(i, j, kl, 1)**2 + sk(i, j, kl, 2)**2 &
+                                              + sk(i, j, kl, 3)**2)
+                                if (weight > zero) weight = mult / weight
 
-                     BCData(mm)%rFace(i,j) = weight*sFaceK(i,j,kl)
-                  enddo
-               enddo
+                                ! Compute the normal velocity based on the outward
+                                ! pointing unit normal.
 
-             endif
+                                BCData(mm)%rFace(i, j) = weight * sFaceK(i, j, kl)
+                            end do
+                        end do
 
-          endif testAssoc
-       enddo bocoLoop
+                    end if
 
-    else testMoving
+                end if testAssoc
+            end do bocoLoop
 
-       ! Block is not moving. Loop over the boundary faces and set
-       ! the normal grid velocity to zero if allocated.
+            else testMoving
 
-       do mm=1,nBocos
-          if( associated(BCData(mm)%rFace) ) &
-               BCData(mm)%rFace = zero
-       enddo
+            ! Block is not moving. Loop over the boundary faces and set
+            ! the normal grid velocity to zero if allocated.
 
-    endif testMoving
+            do mm = 1, nBocos
+                if (associated(BCData(mm)%rFace)) &
+                    BCData(mm)%rFace = zero
+            end do
 
-  end subroutine normalVelocities_block
+        end if testMoving
 
-  ! ----------------------------------------------------------------------
-  !                                                                      |
-  !                    No Tapenade Routine below this line               |
-  !                                                                      |
-  ! ----------------------------------------------------------------------
+    end subroutine normalVelocities_block
 
-#ifndef USE_TAPENADE
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 
-  subroutine shiftSolution
-    !
-    !       shiftSolution shifts the solution of the older time levels,
-    !       such that a new time step can be started.
-    !
-    use constants
-    use blockPointers, only: il, jl, kl, nbkglobal, wOld, w, nDom
-    use cgnsGrid, only : cgnsDoms
-    use flowvarrefstate, only : nw
-    use iteration, only : groundLevel, nOldLevels
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputUnsteady, only : deltaT
-    use monitor, only : timeUnsteadyRestart, timeUnsteady
-    use utils, only : setPointers, rotMatrixRigidBody
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, l, sps, nn, mm, ll
-
-    real(kind=realType) :: tOld, tNew
-    real(kind=realType) :: vvX, vvY, vvZ, vXi, vEta, vZeta
-    real(kind=realType) :: t, angleX, angleY, angleZ
-    real(kind=realType) :: phi, cosPhi, sinPhi
-    real(kind=realType) :: xiX, xiY, xiZ, etaX, etaY, etaZ
-    real(kind=realType) :: zetaX, zetaY, zetaZ
-
-    real(kind=realType), dimension(3)   :: rotationPoint
-    real(kind=realType), dimension(3,3) :: rotationMatrix
-
-    ! Compute the rotation matrix of the rigid body rotation as well
-    ! as the rotation point; the latter is not needed to correct the
-    ! velocities, but the routine rotMatrixRigidBody is also used
-    ! for coordinates.
-
-    tNew = timeUnsteady + timeUnsteadyRestart
-    tOld = tNew         - deltaT
-
-    call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
-
-    ! Loop over the number of spectral solutions and local blocks.
-    ! Although this routine is only called in unsteady mode where the
-    ! number of spectral solutions is 1, this loop is there just for
-    ! consistency.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
-
-          ! Set the pointers for this block on the ground level.
-
-          call setPointers(nn, groundLevel, sps)
-
-          ! Shift the solution already stored in wOld.
-
-          loopOldLevels: do mm=nOldLevels,2,-1
-
-             ! Shift the owned solution variables from level mm-1 to mm.
-
-             ll = mm - 1
-
-             do l=1,nw
-                do k=2,kl
-                   do j=2,jl
-                      do i=2,il
-                         wOld(mm,i,j,k,l) = wOld(ll,i,j,k,l)
-                      enddo
-                   enddo
-                enddo
-             enddo
-
-          enddo loopOldLevels
-
-          ! Shift the current solution into the 1st level of wOld.
-          ! Note that in wOld the conservative flow variables are stored,
-          ! while in w the velocity components are stored and not
-          ! the momentum. Therefore this must be corrected.
-          ! Also the turbulent primitive variables are stored, but this
-          ! is okay, because the quasi-linear form of the turbulent
-          ! transport equations is solved and not the conservative one.
-
-          do l=1,nw
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
-                      wOld(1,i,j,k,l) = w(i,j,k,l)
-                   enddo
-                enddo
-             enddo
-          enddo
-
-          ! Make sure that the momentum variables are stored in wOld.
-
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   wOld(1,i,j,k,ivx) = wOld(1,i,j,k,ivx)*wOld(1,i,j,k,irho)
-                   wOld(1,i,j,k,ivy) = wOld(1,i,j,k,ivy)*wOld(1,i,j,k,irho)
-                   wOld(1,i,j,k,ivz) = wOld(1,i,j,k,ivz)*wOld(1,i,j,k,irho)
-                enddo
-             enddo
-          enddo
-
-          ! To improve the initial guess of the velocity field the
-          ! velocity of rotating parts is rotated. First the rigid
-          ! body motion.
-
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   vvX = w(i,j,k,ivx)
-                   vvY = w(i,j,k,ivy)
-                   vvZ = w(i,j,k,ivz)
-
-                   w(i,j,k,ivx) = rotationMatrix(1,1)*vvX &
-                        + rotationMatrix(1,2)*vvY &
-                        + rotationMatrix(1,3)*vvZ
-                   w(i,j,k,ivy) = rotationMatrix(2,1)*vvX &
-                        + rotationMatrix(2,2)*vvY &
-                        + rotationMatrix(2,3)*vvZ
-                   w(i,j,k,ivz) = rotationMatrix(3,1)*vvX &
-                        + rotationMatrix(3,2)*vvY &
-                        + rotationMatrix(3,3)*vvZ
-                enddo
-             enddo
-          enddo
-
-          ! Apply an additional correction for the velocity components
-          ! if a rotation rate is prescribed for this block.
+#ifndef USE_TAPENADE
 
-          rotTest: if( cgnsDoms(nbkGlobal)%rotatingFrameSpecified ) then
+    subroutine shiftSolution
+        !
+        !       shiftSolution shifts the solution of the older time levels,
+        !       such that a new time step can be started.
+        !
+        use constants
+        use blockPointers, only: il, jl, kl, nbkglobal, wOld, w, nDom
+        use cgnsGrid, only: cgnsDoms
+        use flowvarrefstate, only: nw
+        use iteration, only: groundLevel, nOldLevels
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputUnsteady, only: deltaT
+        use monitor, only: timeUnsteadyRestart, timeUnsteady
+        use utils, only: setPointers, rotMatrixRigidBody
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, l, sps, nn, mm, ll
+
+        real(kind=realType) :: tOld, tNew
+        real(kind=realType) :: vvX, vvY, vvZ, vXi, vEta, vZeta
+        real(kind=realType) :: t, angleX, angleY, angleZ
+        real(kind=realType) :: phi, cosPhi, sinPhi
+        real(kind=realType) :: xiX, xiY, xiZ, etaX, etaY, etaZ
+        real(kind=realType) :: zetaX, zetaY, zetaZ
+
+        real(kind=realType), dimension(3) :: rotationPoint
+        real(kind=realType), dimension(3, 3) :: rotationMatrix
+
+        ! Compute the rotation matrix of the rigid body rotation as well
+        ! as the rotation point; the latter is not needed to correct the
+        ! velocities, but the routine rotMatrixRigidBody is also used
+        ! for coordinates.
+
+        tNew = timeUnsteady + timeUnsteadyRestart
+        tOld = tNew - deltaT
+
+        call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
+
+        ! Loop over the number of spectral solutions and local blocks.
+        ! Although this routine is only called in unsteady mode where the
+        ! number of spectral solutions is 1, this loop is there just for
+        ! consistency.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block on the ground level.
+
+                call setPointers(nn, groundLevel, sps)
+
+                ! Shift the solution already stored in wOld.
+
+                loopOldLevels: do mm = nOldLevels, 2, -1
+
+                    ! Shift the owned solution variables from level mm-1 to mm.
+
+                    ll = mm - 1
+
+                    do l = 1, nw
+                        do k = 2, kl
+                            do j = 2, jl
+                                do i = 2, il
+                                    wOld(mm, i, j, k, l) = wOld(ll, i, j, k, l)
+                                end do
+                            end do
+                        end do
+                    end do
+
+                end do loopOldLevels
+
+                ! Shift the current solution into the 1st level of wOld.
+                ! Note that in wOld the conservative flow variables are stored,
+                ! while in w the velocity components are stored and not
+                ! the momentum. Therefore this must be corrected.
+                ! Also the turbulent primitive variables are stored, but this
+                ! is okay, because the quasi-linear form of the turbulent
+                ! transport equations is solved and not the conservative one.
+
+                do l = 1, nw
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                wOld(1, i, j, k, l) = w(i, j, k, l)
+                            end do
+                        end do
+                    end do
+                end do
 
-             ! Compute the rotation angles.
+                ! Make sure that the momentum variables are stored in wOld.
 
-             angleX = deltaT*cgnsDoms(nbkGlobal)%rotRate(1)
-             angleY = deltaT*cgnsDoms(nbkGlobal)%rotRate(2)
-             angleZ = deltaT*cgnsDoms(nbkGlobal)%rotRate(3)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            wOld(1, i, j, k, ivx) = wOld(1, i, j, k, ivx) * wOld(1, i, j, k, irho)
+                            wOld(1, i, j, k, ivy) = wOld(1, i, j, k, ivy) * wOld(1, i, j, k, irho)
+                            wOld(1, i, j, k, ivz) = wOld(1, i, j, k, ivz) * wOld(1, i, j, k, irho)
+                        end do
+                    end do
+                end do
 
-             ! Compute the unit vector in the direction of the rotation
-             ! axis, which will be called the xi-direction.
+                ! To improve the initial guess of the velocity field the
+                ! velocity of rotating parts is rotated. First the rigid
+                ! body motion.
+
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            vvX = w(i, j, k, ivx)
+                            vvY = w(i, j, k, ivy)
+                            vvZ = w(i, j, k, ivz)
+
+                            w(i, j, k, ivx) = rotationMatrix(1, 1) * vvX &
+                                              + rotationMatrix(1, 2) * vvY &
+                                              + rotationMatrix(1, 3) * vvZ
+                            w(i, j, k, ivy) = rotationMatrix(2, 1) * vvX &
+                                              + rotationMatrix(2, 2) * vvY &
+                                              + rotationMatrix(2, 3) * vvZ
+                            w(i, j, k, ivz) = rotationMatrix(3, 1) * vvX &
+                                              + rotationMatrix(3, 2) * vvY &
+                                              + rotationMatrix(3, 3) * vvZ
+                        end do
+                    end do
+                end do
 
-             t   = one/max(eps,sqrt(angleX**2 + angleY**2 + angleZ**2))
-             xiX = t*angleX
-             xiY = t*angleY
-             xiZ = t*angleZ
+                ! Apply an additional correction for the velocity components
+                ! if a rotation rate is prescribed for this block.
 
-             ! Determine the rotation angle in xi-direction and its sine
-             ! and cosine. Due to the definition of the xi-direction this
-             ! angle will always be positive.
+                rotTest: if (cgnsDoms(nbkGlobal)%rotatingFrameSpecified) then
 
-             phi    = xiX*angleX + xiY*angleY + xiZ*angleZ
-             cosPhi = cos(phi)
-             sinPhi = sin(phi)
+                    ! Compute the rotation angles.
 
-             ! Loop over the cell centers.
+                    angleX = deltaT * cgnsDoms(nbkGlobal)%rotRate(1)
+                    angleY = deltaT * cgnsDoms(nbkGlobal)%rotRate(2)
+                    angleZ = deltaT * cgnsDoms(nbkGlobal)%rotRate(3)
 
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
-
-                      ! Abbreviate the velocity components a bit easier.
-
-                      vvX = w(i,j,k,ivx)
-                      vvY = w(i,j,k,ivy)
-                      vvZ = w(i,j,k,ivz)
-
-                      ! Determine the component of the velocity vector
-                      ! in xi direction and determine the direction eta,
-                      ! the direction of the velocity when the xi component
-                      ! is substracted.
+                    ! Compute the unit vector in the direction of the rotation
+                    ! axis, which will be called the xi-direction.
 
-                      vXi = vvX*xiX + vvY*xiY + vvZ*xiZ
+                    t = one / max(eps, sqrt(angleX**2 + angleY**2 + angleZ**2))
+                    xiX = t * angleX
+                    xiY = t * angleY
+                    xiZ = t * angleZ
 
-                      etaX = vvX - vXi*xiX
-                      etaY = vvY - vXi*xiY
-                      etaZ = vvZ - vXi*xiZ
+                    ! Determine the rotation angle in xi-direction and its sine
+                    ! and cosine. Due to the definition of the xi-direction this
+                    ! angle will always be positive.
 
-                      t    = one/max(eps,sqrt(etaX**2 + etaY**2 + etaZ**2))
-                      etaX = t*etaX
-                      etaY = t*etaY
-                      etaZ = t*etaZ
+                    phi = xiX * angleX + xiY * angleY + xiZ * angleZ
+                    cosPhi = cos(phi)
+                    sinPhi = sin(phi)
 
-                      ! Determine the velocity component in eta direction.
+                    ! Loop over the cell centers.
 
-                      vEta = vvX*etaX + vvY*etaY + vvZ*etaZ
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
 
-                      ! Determine the unit vector in zeta-direction. This is
-                      ! the cross product of the unit vectors in xi and in
-                      ! eta-direction.
+                                ! Abbreviate the velocity components a bit easier.
 
-                      zetaX = xiY*etaZ - xiZ*etaY
-                      zetaY = xiZ*etaX - xiX*etaZ
-                      zetaZ = xiX*etaY - xiY*etaX
+                                vvX = w(i, j, k, ivx)
+                                vvY = w(i, j, k, ivy)
+                                vvZ = w(i, j, k, ivz)
 
-                      ! Determine the velocity components in eta and zeta
-                      ! direction after the rotation.
+                                ! Determine the component of the velocity vector
+                                ! in xi direction and determine the direction eta,
+                                ! the direction of the velocity when the xi component
+                                ! is substracted.
 
-                      vZeta = vEta*sinPhi
-                      vEta  = vEta*cosPhi
+                                vXi = vvX * xiX + vvY * xiY + vvZ * xiZ
 
-                      ! Compute the new Cartesian velocity components.
+                                etaX = vvX - vXi * xiX
+                                etaY = vvY - vXi * xiY
+                                etaZ = vvZ - vXi * xiZ
 
-                      w(i,j,k,ivx) = vXi*xiX + vEta*etaX + vZeta*zetaX
-                      w(i,j,k,ivy) = vXi*xiY + vEta*etaY + vZeta*zetaY
-                      w(i,j,k,ivz) = vXi*xiZ + vEta*etaZ + vZeta*zetaZ
+                                t = one / max(eps, sqrt(etaX**2 + etaY**2 + etaZ**2))
+                                etaX = t * etaX
+                                etaY = t * etaY
+                                etaZ = t * etaZ
 
-                   enddo
-                enddo
-             enddo
+                                ! Determine the velocity component in eta direction.
 
-          endif rotTest
+                                vEta = vvX * etaX + vvY * etaY + vvZ * etaZ
 
-       enddo domains
-    enddo spectralLoop
+                                ! Determine the unit vector in zeta-direction. This is
+                                ! the cross product of the unit vectors in xi and in
+                                ! eta-direction.
 
-  end subroutine shiftSolution
+                                zetaX = xiY * etaZ - xiZ * etaY
+                                zetaY = xiZ * etaX - xiX * etaZ
+                                zetaZ = xiX * etaY - xiY * etaX
 
-  subroutine computeUtau
-    !
-    ! Shell function to call computUTau  on all blocks
-    !
-    use constants
-    use blockPointers, only : nDom
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use Iteration, only : groundLevel
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: sps, nn
+                                ! Determine the velocity components in eta and zeta
+                                ! direction after the rotation.
 
-    ! Loop over the number of spectral solutions.
+                                vZeta = vEta * sinPhi
+                                vEta = vEta * cosPhi
 
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
+                                ! Compute the new Cartesian velocity components.
 
-       ! Loop over the number of blocks.
+                                w(i, j, k, ivx) = vXi * xiX + vEta * etaX + vZeta * zetaX
+                                w(i, j, k, ivy) = vXi * xiY + vEta * etaY + vZeta * zetaY
+                                w(i, j, k, ivz) = vXi * xiZ + vEta * etaZ + vZeta * zetaZ
 
-       domains: do nn=1,nDom
+                            end do
+                        end do
+                    end do
 
-          ! Set the pointers for this block.
+                end if rotTest
 
-          call setPointers(nn, groundLevel, sps)
+            end do domains
+        end do spectralLoop
 
-          call computeUtau_block
+    end subroutine shiftSolution
 
-       end do domains
+    subroutine computeUtau
+        !
+        ! Shell function to call computUTau  on all blocks
+        !
+        use constants
+        use blockPointers, only: nDom
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use Iteration, only: groundLevel
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: sps, nn
 
-    end do spectralLoop
+        ! Loop over the number of spectral solutions.
 
-  end subroutine computeUtau
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
 
-  subroutine computeUtau_block
-    !
-    !       computeUtau computes the skin friction velocity for the
-    !       viscous subfaces. This data is only needed if wall functions
-    !       are used.
-    !
-    use constants
-    use blockPointers
-    use inputPhysics
-    use inputTimeSpectral
-    use iteration
-    use turbCurveFits, only : curveUpRe
-
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: mm, i, j
+            ! Loop over the number of blocks.
 
-    real(kind=realType) :: re, vvx, vvy, vvz, veln, veltmag
+            domains: do nn = 1, nDom
 
-    real(kind=realType), dimension(:,:,:), pointer :: ww, norm, uSlip
-    real(kind=realType), dimension(:,:),   pointer :: dd2Wall, rrlv
-    real(kind=realType), dimension(:,:),   pointer :: utau
+                ! Set the pointers for this block.
 
-    ! Return immediately if no wall functions must be used.
+                call setPointers(nn, groundLevel, sps)
 
-    if(.not. wallFunctions) return
+                call computeUtau_block
 
-    ! Loop over the viscous subfaces of this block.
+            end do domains
 
-    viscSubfaces: do mm=1,nViscBocos
+        end do spectralLoop
 
-       ! Set a bunch of pointers depending on the face id to make
-       ! a generic treatment possible.
+    end subroutine computeUtau
 
-       select case (BCFaceID(mm))
+    subroutine computeUtau_block
+        !
+        !       computeUtau computes the skin friction velocity for the
+        !       viscous subfaces. This data is only needed if wall functions
+        !       are used.
+        !
+        use constants
+        use blockPointers
+        use inputPhysics
+        use inputTimeSpectral
+        use iteration
+        use turbCurveFits, only: curveUpRe
 
-       case (iMin)
-          ww => w(2,1:,1:,:);
-          dd2Wall => d2Wall(2,:,:); rrlv => rlv(2,1:,1:)
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: mm, i, j
 
-          !=========================================================
+        real(kind=realType) :: re, vvx, vvy, vvz, veln, veltmag
 
-       case (iMax)
-          ww => w(il,1:,1:,:)
-          dd2Wall => d2Wall(il,:,:); rrlv => rlv(il,1:,1:)
+        real(kind=realType), dimension(:, :, :), pointer :: ww, norm, uSlip
+        real(kind=realType), dimension(:, :), pointer :: dd2Wall, rrlv
+        real(kind=realType), dimension(:, :), pointer :: utau
 
-          !=========================================================
+        ! Return immediately if no wall functions must be used.
 
-       case (jMin)
-          ww => w(1:,2,1:,:)
-          dd2Wall => d2Wall(:,2,:); rrlv => rlv(1:,2,1:)
+        if (.not. wallFunctions) return
 
-          !=========================================================
+        ! Loop over the viscous subfaces of this block.
 
-       case (jMax)
-          ww => w(1:,jl,1:,:)
-          dd2Wall => d2Wall(:,jl,:); rrlv => rlv(1:,jl,1:)
+        viscSubfaces: do mm = 1, nViscBocos
 
-          !=========================================================
+            ! Set a bunch of pointers depending on the face id to make
+            ! a generic treatment possible.
 
-       case (kMin)
-          ww => w(1:,1:,2,:)
-          dd2Wall => d2Wall(:,:,2); rrlv => rlv(1:,1:,2)
+            select case (BCFaceID(mm))
 
-          !=========================================================
+            case (iMin)
+                ww => w(2, 1:, 1:, :); 
+                dd2Wall => d2Wall(2, :, :); rrlv => rlv(2, 1:, 1:)
 
-       case (kMax)
-          ww => w(1:,1:,kl,:)
-          dd2Wall => d2Wall(:,:,kl); rrlv => rlv(1:,1:,kl)
+                !=========================================================
 
-       end select
+            case (iMax)
+                ww => w(il, 1:, 1:, :)
+                dd2Wall => d2Wall(il, :, :); rrlv => rlv(il, 1:, 1:)
 
-       ! Set the pointers for the unit outward normals, uSlip
-       ! and utau to make the code more readable.
+                !=========================================================
 
-       norm  => BCData(mm)%norm
-       uSlip => BCData(mm)%uSlip
-       utau  => viscSubface(mm)%utau
+            case (jMin)
+                ww => w(1:, 2, 1:, :)
+                dd2Wall => d2Wall(:, 2, :); rrlv => rlv(1:, 2, 1:)
 
-       ! Loop over the quadrilateral faces of the subface. Note
-       ! that the nodal range of BCData must be used and not the
-       ! cell range, because the latter may include the halo's in i
-       ! and j-direction. The offset +1 is there, because inBeg and
-       ! jnBeg refer to nodal ranges and not to cell ranges.
-       ! Note that an offset of -1 must be used in dd2Wall, because
-       ! the original array d2Wall starts at 2.
+                !=========================================================
 
-       do j=(BCData(mm)%jnBeg+1),BCData(mm)%jnEnd
-          do i=(BCData(mm)%inBeg+1),BCData(mm)%inEnd
+            case (jMax)
+                ww => w(1:, jl, 1:, :)
+                dd2Wall => d2Wall(:, jl, :); rrlv => rlv(1:, jl, 1:)
 
-             ! Compute the velocity difference between the internal
-             ! cell and the wall.
+                !=========================================================
 
-             vvx = ww(i,j,ivx) - uSlip(i,j,1)
-             vvy = ww(i,j,ivy) - uSlip(i,j,2)
-             vvz = ww(i,j,ivz) - uSlip(i,j,3)
+            case (kMin)
+                ww => w(1:, 1:, 2, :)
+                dd2Wall => d2Wall(:, :, 2); rrlv => rlv(1:, 1:, 2)
 
-             ! Compute the normal velocity of the internal cell.
+                !=========================================================
 
-             veln  = vvx*norm(i,j,1) + vvy*norm(i,j,2) + vvz*norm(i,j,3)
+            case (kMax)
+                ww => w(1:, 1:, kl, :)
+                dd2Wall => d2Wall(:, :, kl); rrlv => rlv(1:, 1:, kl)
 
-             ! Compute the magnitude of the tangential velocity.
+            end select
 
-             veltmag = max(eps,sqrt(vvx*vvx + vvy*vvy + vvz*vvz - veln*veln))
+            ! Set the pointers for the unit outward normals, uSlip
+            ! and utau to make the code more readable.
 
-             ! Compute the Reynolds number. Note that an offset of -1
-             ! must be used in dd2Wall, because the original array
-             ! d2Wall starts at 2.
-             ! Afterwards compute utau.
+            norm => BCData(mm)%norm
+            uSlip => BCData(mm)%uSlip
+            utau => viscSubface(mm)%utau
 
-             re = ww(i,j,irho)*veltmag*dd2Wall(i-1,j-1)/rrlv(i,j)
-             utau(i,j) = veltmag/max(curveUpRe(re),eps)
+            ! Loop over the quadrilateral faces of the subface. Note
+            ! that the nodal range of BCData must be used and not the
+            ! cell range, because the latter may include the halo's in i
+            ! and j-direction. The offset +1 is there, because inBeg and
+            ! jnBeg refer to nodal ranges and not to cell ranges.
+            ! Note that an offset of -1 must be used in dd2Wall, because
+            ! the original array d2Wall starts at 2.
 
-          enddo
-       enddo
+            do j = (BCData(mm)%jnBeg + 1), BCData(mm)%jnEnd
+                do i = (BCData(mm)%inBeg + 1), BCData(mm)%inEnd
 
-    enddo viscSubfaces
+                    ! Compute the velocity difference between the internal
+                    ! cell and the wall.
 
-  end subroutine computeUtau_block
+                    vvx = ww(i, j, ivx) - uSlip(i, j, 1)
+                    vvy = ww(i, j, ivy) - uSlip(i, j, 2)
+                    vvz = ww(i, j, ivz) - uSlip(i, j, 3)
 
+                    ! Compute the normal velocity of the internal cell.
 
-  subroutine gridVelocitiesFineLevelPart1(useOldCoor, t, sps)
-    !
-    ! Shell function to call gridVelocitiesFineLevel on all blocks
-    !
-    use blockPointers
-    use constants
-    use inputTimeSpectral
-    use iteration
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps
-    logical,               intent(in) :: useOldCoor
-    real(kind=realType), dimension(*), intent(in) :: t  !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
+                    veln = vvx * norm(i, j, 1) + vvy * norm(i, j, 2) + vvz * norm(i, j, 3)
 
-    ! Loop over the number of blocks.
+                    ! Compute the magnitude of the tangential velocity.
 
-    domains: do nn=1,nDom
+                    veltmag = max(eps, sqrt(vvx * vvx + vvy * vvy + vvz * vvz - veln * veln))
 
-       ! Set the pointers for this block.
+                    ! Compute the Reynolds number. Note that an offset of -1
+                    ! must be used in dd2Wall, because the original array
+                    ! d2Wall starts at 2.
+                    ! Afterwards compute utau.
 
-       call setPointers(nn, groundLevel, sps)
-       call gridVelocitiesFineLevelPart1_block(useOldCoor, t, sps)
+                    re = ww(i, j, irho) * veltmag * dd2Wall(i - 1, j - 1) / rrlv(i, j)
+                    utau(i, j) = veltmag / max(curveUpRe(re), eps)
 
-    end do domains
+                end do
+            end do
+
+        end do viscSubfaces
+
+    end subroutine computeUtau_block
+
+    subroutine gridVelocitiesFineLevelPart1(useOldCoor, t, sps)
+        !
+        ! Shell function to call gridVelocitiesFineLevel on all blocks
+        !
+        use blockPointers
+        use constants
+        use inputTimeSpectral
+        use iteration
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        logical, intent(in) :: useOldCoor
+        real(kind=realType), dimension(*), intent(in) :: t  !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        ! Loop over the number of blocks.
+
+        domains: do nn = 1, nDom
+
+            ! Set the pointers for this block.
+
+            call setPointers(nn, groundLevel, sps)
+            call gridVelocitiesFineLevelPart1_block(useOldCoor, t, sps)
+
+        end do domains
+
+    end subroutine gridVelocitiesFineLevelPart1
+
+    subroutine gridVelocitiesFineLevelPart1_block(useOldCoor, t, sps)
+        !
+        !       gridVelocitiesFineLevel computes the grid velocities for
+        !       the cell centers and the normal grid velocities for the faces
+        !       of moving blocks for the currently finest grid, i.e.
+        !       groundLevel. The velocities are computed at time t for
+        !       spectral mode sps. If useOldCoor is .true. the velocities
+        !       are determined using the unsteady time integrator in
+        !       combination with the old coordinates; otherwise the analytic
+        !       form is used.
+        !       Now it is split up into two parts.
+        !       First part calculate the grid velocity using FIRST order BDF.
+        !       Second part calculate the surface normal and normal velocity.
+        !
+        use blockPointers
+        use cgnsGrid
+        use flowVarRefState
+        use inputMotion
+        use inputUnsteady
+        use iteration
+        use inputPhysics
+        use inputTSStabDeriv
+        use monitor
+        use communication
+        use utils, only: TSAlpha, TSBeta, TSMach, terminate, rotMatrixRigidBody, &
+                         setCoefTimeIntegrator, getDirAngle
+        use flowUtils, only: derivativeRotMatrixRigid, getDirVector
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        logical, intent(in) :: useOldCoor
+
+        real(kind=realType), dimension(*), intent(in) :: t
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm
+        integer(kind=intType) :: i, j, k, ii, iie, jje, kke
+
+        real(kind=realType) :: oneOver4dt, oneOver8dt
+        real(kind=realType) :: velxGrid, velyGrid, velzGrid, ainf
+        real(kind=realType) :: velxGrid0, velyGrid0, velzGrid0
+
+        real(kind=realType), dimension(3) :: sc, xc, xxc
+        real(kind=realType), dimension(3) :: rotCenter, rotRate
+
+        real(kind=realType), dimension(3) :: rotationPoint
+        real(kind=realType), dimension(3, 3) :: rotationMatrix, &
+                                                derivRotationMatrix
+
+        real(kind=realType) :: tNew, tOld
+        real(kind=realType), dimension(:, :), pointer :: sFace
+        real(kind=realType), dimension(:, :, :), pointer :: sVelo
+
+        real(kind=realType), dimension(:, :, :), pointer :: xx, ss
+        real(kind=realType), dimension(:, :, :, :), pointer :: xxOld
+
+        real(kind=realType) :: intervalMach, alphaTS, alphaIncrement, &
+                               betaTS, betaIncrement
+        real(kind=realType), dimension(3) :: velDir
+        real(kind=realType), dimension(3) :: refDirection
+
+        ! Compute the mesh velocity from the given mesh Mach number.
+
+        ! vel{x,y,z}Grid0 is the ACTUAL velocity you want at the
+        ! geometry.
+        aInf = sqrt(gammaInf * pInf / rhoInf)
+        velxGrid0 = (aInf * machgrid) * (-velDirFreestream(1))
+        velyGrid0 = (aInf * machgrid) * (-velDirFreestream(2))
+        velzGrid0 = (aInf * machgrid) * (-velDirFreestream(3))
+
+        ! Compute the derivative of the rotation matrix and the rotation
+        ! point; needed for velocity due to the rigid body rotation of
+        ! the entire grid. It is assumed that the rigid body motion of
+        ! the grid is only specified if there is only 1 section present.
+
+        call derivativeRotMatrixRigid(derivRotationMatrix, rotationPoint, t(1))
+
+        !compute the rotation matrix to update the velocities for the time
+        !spectral stability derivative case...
+
+        if (TSStability) then
+            ! Determine the time values of the old and new time level.
+            ! It is assumed that the rigid body rotation of the mesh is only
+            ! used when only 1 section is present.
+
+            tNew = timeUnsteady + timeUnsteadyRestart
+            tOld = tNew - t(1)
+
+            if (TSpMode .or. TSqMode .or. TSrMode) then
+                ! Compute the rotation matrix of the rigid body rotation as
+                ! well as the rotation point; the latter may vary in time due
+                ! to rigid body translation.
+
+                call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
+
+                velxgrid0 = rotationMatrix(1, 1) * velxgrid0 &
+                            + rotationMatrix(1, 2) * velygrid0 &
+                            + rotationMatrix(1, 3) * velzgrid0
+                velygrid0 = rotationMatrix(2, 1) * velxgrid0 &
+                            + rotationMatrix(2, 2) * velygrid0 &
+                            + rotationMatrix(2, 3) * velzgrid0
+                velzgrid0 = rotationMatrix(3, 1) * velxgrid0 &
+                            + rotationMatrix(3, 2) * velygrid0 &
+                            + rotationMatrix(3, 3) * velzgrid0
+
+            elseif (tsAlphaMode) then
+                !Determine the alpha for this time instance
+                alphaIncrement = TSAlpha(degreePolAlpha, coefPolAlpha, &
+                                         degreeFourAlpha, omegaFourAlpha, &
+                                         cosCoefFourAlpha, sinCoefFourAlpha, t(1))
+
+                alphaTS = alpha + alphaIncrement
+                !Determine the grid velocity for this alpha
+                refDirection(:) = zero
+                refDirection(1) = one
+                call getDirVector(refDirection, alphaTS, beta, velDir, liftIndex)
+
+                !do I need to update the lift direction and drag direction as well?
+                !set the effictive grid velocity for this time interval
+                velxGrid0 = (aInf * machgrid) * (-velDir(1))
+                velyGrid0 = (aInf * machgrid) * (-velDir(2))
+                velzGrid0 = (aInf * machgrid) * (-velDir(3))
+
+            elseif (tsBetaMode) then
+
+                !Determine the alpha for this time instance
+                betaIncrement = TSBeta(degreePolBeta, coefPolBeta, &
+                                       degreeFourBeta, omegaFourBeta, &
+                                       cosCoefFourBeta, sinCoefFourBeta, t(1))
+
+                betaTS = beta + betaIncrement
+                !Determine the grid velocity for this alpha
+                refDirection(:) = zero
+                refDirection(1) = one
+                call getDirVector(refDirection, alpha, betaTS, velDir, liftIndex)
+
+                !do I need to update the lift direction and drag direction as well?
+                !set the effictive grid velocity for this time interval
+                velxGrid0 = (aInf * machgrid) * (-velDir(1))
+                velyGrid0 = (aInf * machgrid) * (-velDir(2))
+                velzGrid0 = (aInf * machgrid) * (-velDir(3))
+            elseif (TSMachMode) then
+                !determine the mach number at this time interval
+                IntervalMach = TSMach(degreePolMach, coefPolMach, &
+                                      degreeFourMach, omegaFourMach, &
+                                      cosCoefFourMach, sinCoefFourMach, t(1))
+                !set the effective grid velocity
+                velxGrid0 = (aInf * (IntervalMach + machgrid)) * (-velDirFreestream(1))
+                velyGrid0 = (aInf * (IntervalMach + machgrid)) * (-velDirFreestream(2))
+                velzGrid0 = (aInf * (IntervalMach + machgrid)) * (-velDirFreestream(3))
+
+            elseif (TSAltitudeMode) then
+                call terminate('gridVelocityFineLevel', 'altitude motion not yet implemented...')
+            else
+                call terminate('gridVelocityFineLevel', 'Not a recognized Stability Motion')
+            end if
+        end if
+
+        testMoving: if (blockIsMoving) then
+            ! REMOVED the rigid body rotation part for simplicity
+
+            !
+            !             The velocities must be determined via a finite
+            !             difference formula using the coordinates of the old
+            !             levels.
+            !
+            ! Set the coefficients for the time integrator and store
+            ! the inverse of the physical nonDimensional time step,
+            ! divided by 4 and 8, a bit easier.
+
+            call setCoefTimeIntegrator
+            oneOver4dt = fourth * timeRef / deltaT
+            oneOver8dt = half * oneOver4dt
+            !
+            !             Grid velocities of the cell centers, including the
+            !             1st level halo cells.
+            !
+            ! Loop over the cells, including the 1st level halo's.
+
+            do k = 1, ke
+                do j = 1, je
+                    do i = 1, ie
+
+                        ! Using FIRST order BDF for all cases
+                        ! Refer to eq. 11b, found paper by C.Farhat http://dx.doi.org/10.1016/S0021-9991(03)00311-5
+                        ! Same applies for the velocities of the faces below. Theta(n+1) = 1, Theta(n) = -1 therfore
+                        ! it becoms a first order scheme.
+
+                        ! The velocity of the cell center is determined
+                        ! by a finite difference formula. First store
+                        ! the current coordinate, multiplied by 8 and
+                        ! coefTime(0) in sc.
+
+                        sc(1) = (x(i - 1, j - 1, k - 1, 1) + x(i, j - 1, k - 1, 1) &
+                                 + x(i - 1, j, k - 1, 1) + x(i, j, k - 1, 1) &
+                                 + x(i - 1, j - 1, k, 1) + x(i, j - 1, k, 1) &
+                                 + x(i - 1, j, k, 1) + x(i, j, k, 1))
+                        sc(2) = (x(i - 1, j - 1, k - 1, 2) + x(i, j - 1, k - 1, 2) &
+                                 + x(i - 1, j, k - 1, 2) + x(i, j, k - 1, 2) &
+                                 + x(i - 1, j - 1, k, 2) + x(i, j - 1, k, 2) &
+                                 + x(i - 1, j, k, 2) + x(i, j, k, 2))
+                        sc(3) = (x(i - 1, j - 1, k - 1, 3) + x(i, j - 1, k - 1, 3) &
+                                 + x(i - 1, j, k - 1, 3) + x(i, j, k - 1, 3) &
+                                 + x(i - 1, j - 1, k, 3) + x(i, j - 1, k, 3) &
+                                 + x(i - 1, j, k, 3) + x(i, j, k, 3))
+
+                        ! Loop over the older levels to complete the
+                        ! finite difference formula.
+
+                        ii = 1 ! There was a loop over all old levels
+                        sc(1) = sc(1) + (xOld(ii, i - 1, j - 1, k - 1, 1) &
+                                         + xOld(ii, i, j - 1, k - 1, 1) &
+                                         + xOld(ii, i - 1, j, k - 1, 1) &
+                                         + xOld(ii, i, j, k - 1, 1) &
+                                         + xOld(ii, i - 1, j - 1, k, 1) &
+                                         + xOld(ii, i, j - 1, k, 1) &
+                                         + xOld(ii, i - 1, j, k, 1) &
+                                         + xOld(ii, i, j, k, 1)) &
+                                * (-1.0_realType)
+                        sc(2) = sc(2) + (xOld(ii, i - 1, j - 1, k - 1, 2) &
+                                         + xOld(ii, i, j - 1, k - 1, 2) &
+                                         + xOld(ii, i - 1, j, k - 1, 2) &
+                                         + xOld(ii, i, j, k - 1, 2) &
+                                         + xOld(ii, i - 1, j - 1, k, 2) &
+                                         + xOld(ii, i, j - 1, k, 2) &
+                                         + xOld(ii, i - 1, j, k, 2) &
+                                         + xOld(ii, i, j, k, 2)) &
+                                * (-1.0_realType)
+                        sc(3) = sc(3) + (xOld(ii, i - 1, j - 1, k - 1, 3) &
+                                         + xOld(ii, i, j - 1, k - 1, 3) &
+                                         + xOld(ii, i - 1, j, k - 1, 3) &
+                                         + xOld(ii, i, j, k - 1, 3) &
+                                         + xOld(ii, i - 1, j - 1, k, 3) &
+                                         + xOld(ii, i, j - 1, k, 3) &
+                                         + xOld(ii, i - 1, j, k, 3) &
+                                         + xOld(ii, i, j, k, 3)) &
+                                * (-1.0_realType)
+
+                        ! Divide by 8 delta t to obtain the correct
+                        ! velocities.
+
+                        s(i, j, k, 1) = sc(1) * oneOver8dt
+                        s(i, j, k, 2) = sc(2) * oneOver8dt
+                        s(i, j, k, 3) = sc(3) * oneOver8dt
+                    end do
+                end do
+            end do
 
-  end subroutine gridVelocitiesFineLevelPart1
+            !
+            !             Velocities of the faces, vector.
+            !
+            ! Loop over the three directions.
 
-  subroutine gridVelocitiesFineLevelPart1_block(useOldCoor, t, sps)
-    !
-    !       gridVelocitiesFineLevel computes the grid velocities for
-    !       the cell centers and the normal grid velocities for the faces
-    !       of moving blocks for the currently finest grid, i.e.
-    !       groundLevel. The velocities are computed at time t for
-    !       spectral mode sps. If useOldCoor is .true. the velocities
-    !       are determined using the unsteady time integrator in
-    !       combination with the old coordinates; otherwise the analytic
-    !       form is used.
-    !       Now it is split up into two parts.
-    !       First part calculate the grid velocity using FIRST order BDF.
-    !       Second part calculate the surface normal and normal velocity.
-    !
-    use blockPointers
-    use cgnsGrid
-    use flowVarRefState
-    use inputMotion
-    use inputUnsteady
-    use iteration
-    use inputPhysics
-    use inputTSStabDeriv
-    use monitor
-    use communication
-    use utils, only : TSAlpha, TSBeta, TSMach, terminate, rotMatrixRigidBody, &
-         setCoefTimeIntegrator, getDirAngle
-    use flowUtils, only : derivativeRotMatrixRigid, getDirVector
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps
-    logical,               intent(in) :: useOldCoor
+            loopDir: do mm = 1, 3
 
-    real(kind=realType), dimension(*), intent(in) :: t
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm
-    integer(kind=intType) :: i, j, k, ii, iie, jje, kke
-
-    real(kind=realType) :: oneOver4dt, oneOver8dt
-    real(kind=realType) :: velxGrid, velyGrid, velzGrid,ainf
-    real(kind=realType) :: velxGrid0, velyGrid0, velzGrid0
-
-    real(kind=realType), dimension(3) :: sc, xc, xxc
-    real(kind=realType), dimension(3) :: rotCenter, rotRate
-
-    real(kind=realType), dimension(3)   :: rotationPoint
-    real(kind=realType), dimension(3,3) :: rotationMatrix,&
-         derivRotationMatrix
-
-    real(kind=realType) :: tNew, tOld
-    real(kind=realType), dimension(:,:), pointer :: sFace
-    real(kind=realType), dimension(:,:,:),   pointer :: sVelo
-
-    real(kind=realType), dimension(:,:,:),   pointer :: xx, ss
-    real(kind=realType), dimension(:,:,:,:), pointer :: xxOld
-
-    real(kind=realType) :: intervalMach,alphaTS,alphaIncrement,&
-         betaTS,betaIncrement
-    real(kind=realType), dimension(3) ::velDir
-    real(kind=realType), dimension(3) :: refDirection
-
-    ! Compute the mesh velocity from the given mesh Mach number.
-
-    ! vel{x,y,z}Grid0 is the ACTUAL velocity you want at the
-    ! geometry.
-    aInf = sqrt(gammaInf*pInf/rhoInf)
-    velxGrid0 = (aInf*machgrid)*(-velDirFreestream(1))
-    velyGrid0 = (aInf*machgrid)*(-velDirFreestream(2))
-    velzGrid0 = (aInf*machgrid)*(-velDirFreestream(3))
-
-    ! Compute the derivative of the rotation matrix and the rotation
-    ! point; needed for velocity due to the rigid body rotation of
-    ! the entire grid. It is assumed that the rigid body motion of
-    ! the grid is only specified if there is only 1 section present.
-
-    call derivativeRotMatrixRigid(derivRotationMatrix, rotationPoint, t(1))
-
-    !compute the rotation matrix to update the velocities for the time
-    !spectral stability derivative case...
-
-    if(TSStability)then
-       ! Determine the time values of the old and new time level.
-       ! It is assumed that the rigid body rotation of the mesh is only
-       ! used when only 1 section is present.
-
-       tNew = timeUnsteady + timeUnsteadyRestart
-       tOld = tNew - t(1)
-
-       if(TSpMode.or. TSqMode .or.TSrMode) then
-          ! Compute the rotation matrix of the rigid body rotation as
-          ! well as the rotation point; the latter may vary in time due
-          ! to rigid body translation.
-
-          call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
-
-          velxgrid0 = rotationMatrix(1,1)*velxgrid0 &
-               + rotationMatrix(1,2)*velygrid0 &
-               + rotationMatrix(1,3)*velzgrid0
-          velygrid0 = rotationMatrix(2,1)*velxgrid0 &
-               + rotationMatrix(2,2)*velygrid0 &
-               + rotationMatrix(2,3)*velzgrid0
-          velzgrid0 = rotationMatrix(3,1)*velxgrid0 &
-               + rotationMatrix(3,2)*velygrid0 &
-               + rotationMatrix(3,3)*velzgrid0
-
-       elseif(tsAlphaMode)then
-          !Determine the alpha for this time instance
-          alphaIncrement = TSAlpha(degreePolAlpha,   coefPolAlpha,       &
-               degreeFourAlpha,  omegaFourAlpha,     &
-               cosCoefFourAlpha, sinCoefFourAlpha, t(1))
-
-          alphaTS = alpha+alphaIncrement
-          !Determine the grid velocity for this alpha
-          refDirection(:) = zero
-          refDirection(1) = one
-          call getDirVector(refDirection, alphaTS, beta, velDir, liftIndex)
-
-          !do I need to update the lift direction and drag direction as well?
-          !set the effictive grid velocity for this time interval
-          velxGrid0 = (aInf*machgrid)*(-velDir(1))
-          velyGrid0 = (aInf*machgrid)*(-velDir(2))
-          velzGrid0 = (aInf*machgrid)*(-velDir(3))
-
-       elseif(tsBetaMode)then
-
-          !Determine the alpha for this time instance
-          betaIncrement = TSBeta(degreePolBeta,   coefPolBeta,       &
-               degreeFourBeta,  omegaFourBeta,     &
-               cosCoefFourBeta, sinCoefFourBeta, t(1))
-
-          betaTS = beta+betaIncrement
-          !Determine the grid velocity for this alpha
-          refDirection(:) = zero
-          refDirection(1) = one
-          call getDirVector(refDirection, alpha, betaTS, velDir, liftIndex)
-
-          !do I need to update the lift direction and drag direction as well?
-          !set the effictive grid velocity for this time interval
-          velxGrid0 = (aInf*machgrid)*(-velDir(1))
-          velyGrid0 = (aInf*machgrid)*(-velDir(2))
-          velzGrid0 = (aInf*machgrid)*(-velDir(3))
-       elseif(TSMachMode)then
-          !determine the mach number at this time interval
-          IntervalMach = TSMach(degreePolMach,   coefPolMach,       &
-               degreeFourMach,  omegaFourMach,     &
-               cosCoefFourMach, sinCoefFourMach, t(1))
-          !set the effective grid velocity
-          velxGrid0 = (aInf*(IntervalMach+machgrid))*(-velDirFreestream(1))
-          velyGrid0 = (aInf*(IntervalMach+machgrid))*(-velDirFreestream(2))
-          velzGrid0 = (aInf*(IntervalMach+machgrid))*(-velDirFreestream(3))
-
-       elseif(TSAltitudeMode)then
-          call terminate('gridVelocityFineLevel','altitude motion not yet implemented...')
-       else
-          call terminate('gridVelocityFineLevel','Not a recognized Stability Motion')
-       end if
-    endif
-
-    testMoving: if( blockIsMoving ) then
-       ! REMOVED the rigid body rotation part for simplicity
-
-       !
-       !             The velocities must be determined via a finite
-       !             difference formula using the coordinates of the old
-       !             levels.
-       !
-       ! Set the coefficients for the time integrator and store
-       ! the inverse of the physical nonDimensional time step,
-       ! divided by 4 and 8, a bit easier.
-
-       call setCoefTimeIntegrator
-       oneOver4dt = fourth*timeRef/deltaT
-       oneOver8dt = half*oneOver4dt
-       !
-       !             Grid velocities of the cell centers, including the
-       !             1st level halo cells.
-       !
-       ! Loop over the cells, including the 1st level halo's.
-
-       do k=1,ke
-          do j=1,je
-             do i=1,ie
-
-                ! Using FIRST order BDF for all cases
-                ! Refer to eq. 11b, found paper by C.Farhat http://dx.doi.org/10.1016/S0021-9991(03)00311-5
-                ! Same applies for the velocities of the faces below. Theta(n+1) = 1, Theta(n) = -1 therfore
-                ! it becoms a first order scheme.
-
-                ! The velocity of the cell center is determined
-                ! by a finite difference formula. First store
-                ! the current coordinate, multiplied by 8 and
-                ! coefTime(0) in sc.
-
-                sc(1) = (x(i-1,j-1,k-1,1) + x(i,j-1,k-1,1)  &
-                     +  x(i-1,j,  k-1,1) + x(i,j,  k-1,1)  &
-                     +  x(i-1,j-1,k,  1) + x(i,j-1,k,  1)  &
-                     +  x(i-1,j,  k,  1) + x(i,j,  k,  1))
-                sc(2) = (x(i-1,j-1,k-1,2) + x(i,j-1,k-1,2)  &
-                     +  x(i-1,j,  k-1,2) + x(i,j,  k-1,2)  &
-                     +  x(i-1,j-1,k,  2) + x(i,j-1,k,  2)  &
-                     +  x(i-1,j,  k,  2) + x(i,j,  k,  2))
-                sc(3) = (x(i-1,j-1,k-1,3) + x(i,j-1,k-1,3)  &
-                     +  x(i-1,j,  k-1,3) + x(i,j,  k-1,3)  &
-                     +  x(i-1,j-1,k,  3) + x(i,j-1,k,  3)  &
-                     +  x(i-1,j,  k,  3) + x(i,j,  k,  3))
-
-                ! Loop over the older levels to complete the
-                ! finite difference formula.
-
-                ii = 1 ! There was a loop over all old levels
-                sc(1) = sc(1) + (xOld(ii,i-1,j-1,k-1,1)  &
-                     +          xOld(ii,i,  j-1,k-1,1)  &
-                     +          xOld(ii,i-1,j,  k-1,1)  &
-                     +          xOld(ii,i,  j,  k-1,1)  &
-                     +          xOld(ii,i-1,j-1,k,  1)  &
-                     +          xOld(ii,i,  j-1,k,  1)  &
-                     +          xOld(ii,i-1,j,  k,  1)  &
-                     +          xOld(ii,i,  j,  k,  1)) &
-                     * (-1.0_realType)
-                sc(2) = sc(2) + (xOld(ii,i-1,j-1,k-1,2)  &
-                     +          xOld(ii,i,  j-1,k-1,2)  &
-                     +          xOld(ii,i-1,j,  k-1,2)  &
-                     +          xOld(ii,i,  j,  k-1,2)  &
-                     +          xOld(ii,i-1,j-1,k,  2)  &
-                     +          xOld(ii,i,  j-1,k,  2)  &
-                     +          xOld(ii,i-1,j,  k,  2)  &
-                     +          xOld(ii,i,  j,  k,  2)) &
-                     * (-1.0_realType)
-                sc(3) = sc(3) + (xOld(ii,i-1,j-1,k-1,3)  &
-                     +          xOld(ii,i,  j-1,k-1,3)  &
-                     +          xOld(ii,i-1,j,  k-1,3)  &
-                     +          xOld(ii,i,  j,  k-1,3)  &
-                     +          xOld(ii,i-1,j-1,k,  3)  &
-                     +          xOld(ii,i,  j-1,k,  3)  &
-                     +          xOld(ii,i-1,j,  k,  3)  &
-                     +          xOld(ii,i,  j,  k,  3)) &
-                     * (-1.0_realType)
-
-                ! Divide by 8 delta t to obtain the correct
-                ! velocities.
-
-                s(i,j,k,1) = sc(1)*oneOver8dt
-                s(i,j,k,2) = sc(2)*oneOver8dt
-                s(i,j,k,3) = sc(3)*oneOver8dt
-             enddo
-          enddo
-       enddo
-
-       !
-       !             Velocities of the faces, vector.
-       !
-       ! Loop over the three directions.
-
-       loopDir: do mm=1,3
-
-          ! Set the upper boundaries depending on the direction.
-
-          select case (mm)
-          case (1_intType)       ! normals in i-direction
-             iie = ie; jje = je; kke = ke
-
-          case (2_intType)       ! normals in j-direction
-             iie = je; jje = ie; kke = ke
-
-          case (3_intType)       ! normals in k-direction
-             iie = ke; jje = ie; kke = je
-          end select
-          !
-          !               Face velocities in generalized i-direction.
-          !               mm == 1: i-direction
-          !               mm == 2: j-direction
-          !               mm == 3: k-direction
-          !
-          do i=0,iie
-
-             ! Set the pointers for the coordinates, normals and
-             ! normal velocities for this generalized i-plane.
-             ! This depends on the value of mm.
-
-             select case (mm)
-             case (1_intType)       ! normals in i-direction
-                xx =>  x(i,:,:,:);  xxOld => xOld(:,i,:,:,:)
-                sVelo => sVeloIALE(i,:,:,:)
-
-             case (2_intType)       ! normals in j-direction
-                xx =>  x(:,i,:,:);  xxOld => xOld(:,:,i,:,:)
-                sVelo => sVeloJALE(:,i,:,:)
-
-             case (3_intType)       ! normals in k-direction
-                xx =>  x(:,:,i,:);  xxOld => xOld(:,:,:,i,:)
-                sVelo => sVeloKALE(:,:,i,:)
-             end select
-
-             ! Loop over the k and j-direction of this
-             ! generalized i-face. Note that due to the usage of
-             ! the pointers xx and xxOld an offset of +1 must be
-             ! used in the coordinate arrays, because x and xOld
-             ! originally start at 0 for the i, j and k indices.
-             ! print *, mm
-             do k=1,kke
-                do j=1,jje
-
-                   ! The velocity of the face center is determined
-                   ! by a finite difference formula. First store
-                   ! the current coordinate, multiplied by 4 and
-                   ! coefTime(0) in sc.
-
-                   sc(1) = (xx(j+1,k+1,1) + xx(j,k+1,1) &
-                        +  xx(j+1,k,  1) + xx(j,k,  1))
-                   sc(2) = (xx(j+1,k+1,2) + xx(j,k+1,2) &
-                        +  xx(j+1,k,  2) + xx(j,k,  2))
-                   sc(3) = (xx(j+1,k+1,3) + xx(j,k+1,3) &
-                        +  xx(j+1,k,  3) + xx(j,k,  3))
-
-                   ii = 1 ! There was a loop who looped over nOldLevels
-                   sc(1) = sc(1) + (xxOld(ii,j+1,k+1,1) &
-                        +          xxOld(ii,j,  k+1,1) &
-                        +          xxOld(ii,j+1,k,  1) &
-                        +          xxOld(ii,j,  k,  1)) &
-                        * (-1.0_realType)
-                   sc(2) = sc(2) + (xxOld(ii,j+1,k+1,2) &
-                        +          xxOld(ii,j,  k+1,2) &
-                        +          xxOld(ii,j+1,k,  2) &
-                        +          xxOld(ii,j,  k,  2)) &
-                        * (-1.0_realType)
-                   sc(3) = sc(3) + (xxOld(ii,j+1,k+1,3) &
-                        +          xxOld(ii,j,  k+1,3) &
-                        +          xxOld(ii,j+1,k,  3) &
-                        +          xxOld(ii,j,  k,  3)) &
-                        * (-1.0_realType)
-
-                   ! Determine the dot product of sc and the normal
-                   ! and divide by 4 deltaT to obtain the correct
-                   ! value of the normal velocity.
-
-                   sVelo(j,k,1) = sc(1)*oneOver4dt
-                   sVelo(j,k,2) = sc(2)*oneOver4dt
-                   sVelo(j,k,3) = sc(3)*oneOver4dt
-
-                   ! if ((i.ge.2) .and. (i.le.3) .and. (j.ge.2) .and. (j.le.3) .and. (k.ge.2) .and. (k.le.3)) then
-                   !    print *, i,j,k, sVelo(j,k,:)
-                   !    print *, '                                   ', xx(j,k,:)
-                   !    print *, '                                   ', xxOld(1,j,k,:)
-                   ! end if
-
-                enddo
-             enddo
-          enddo
-
-       enddo loopDir
-
-    endif testMoving
-
-  end subroutine gridVelocitiesFineLevelPart1_block
-
-  !
-  !      Here begins the second part
-  !
-
-  subroutine gridVelocitiesFineLevelPart2(useOldCoor, t, sps)
-    !
-    ! Shell function to call gridVelocitiesFineLevel on all blocks
-    !
-    use blockPointers
-    use constants
-    use inputTimeSpectral
-    use iteration
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps
-    logical,               intent(in) :: useOldCoor
-    real(kind=realType), dimension(*), intent(in) :: t  !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
+                ! Set the upper boundaries depending on the direction.
 
-    ! Loop over the number of blocks.
-
-    domains: do nn=1,nDom
+                select case (mm)
+                case (1_intType)       ! normals in i-direction
+                    iie = ie; jje = je; kke = ke
 
-       ! Set the pointers for this block.
+                case (2_intType)       ! normals in j-direction
+                    iie = je; jje = ie; kke = ke
 
-       call setPointers(nn, groundLevel, sps)
-       call gridVelocitiesFineLevelPart2_block(useOldCoor, t, sps)
+                case (3_intType)       ! normals in k-direction
+                    iie = ke; jje = ie; kke = je
+                end select
+                !
+                !               Face velocities in generalized i-direction.
+                !               mm == 1: i-direction
+                !               mm == 2: j-direction
+                !               mm == 3: k-direction
+                !
+                do i = 0, iie
+
+                    ! Set the pointers for the coordinates, normals and
+                    ! normal velocities for this generalized i-plane.
+                    ! This depends on the value of mm.
+
+                    select case (mm)
+                    case (1_intType)       ! normals in i-direction
+                        xx => x(i, :, :, :); xxOld => xOld(:, i, :, :, :)
+                        sVelo => sVeloIALE(i, :, :, :)
+
+                    case (2_intType)       ! normals in j-direction
+                        xx => x(:, i, :, :); xxOld => xOld(:, :, i, :, :)
+                        sVelo => sVeloJALE(:, i, :, :)
+
+                    case (3_intType)       ! normals in k-direction
+                        xx => x(:, :, i, :); xxOld => xOld(:, :, :, i, :)
+                        sVelo => sVeloKALE(:, :, i, :)
+                    end select
+
+                    ! Loop over the k and j-direction of this
+                    ! generalized i-face. Note that due to the usage of
+                    ! the pointers xx and xxOld an offset of +1 must be
+                    ! used in the coordinate arrays, because x and xOld
+                    ! originally start at 0 for the i, j and k indices.
+                    ! print *, mm
+                    do k = 1, kke
+                        do j = 1, jje
+
+                            ! The velocity of the face center is determined
+                            ! by a finite difference formula. First store
+                            ! the current coordinate, multiplied by 4 and
+                            ! coefTime(0) in sc.
+
+                            sc(1) = (xx(j + 1, k + 1, 1) + xx(j, k + 1, 1) &
+                                     + xx(j + 1, k, 1) + xx(j, k, 1))
+                            sc(2) = (xx(j + 1, k + 1, 2) + xx(j, k + 1, 2) &
+                                     + xx(j + 1, k, 2) + xx(j, k, 2))
+                            sc(3) = (xx(j + 1, k + 1, 3) + xx(j, k + 1, 3) &
+                                     + xx(j + 1, k, 3) + xx(j, k, 3))
+
+                            ii = 1 ! There was a loop who looped over nOldLevels
+                            sc(1) = sc(1) + (xxOld(ii, j + 1, k + 1, 1) &
+                                             + xxOld(ii, j, k + 1, 1) &
+                                             + xxOld(ii, j + 1, k, 1) &
+                                             + xxOld(ii, j, k, 1)) &
+                                    * (-1.0_realType)
+                            sc(2) = sc(2) + (xxOld(ii, j + 1, k + 1, 2) &
+                                             + xxOld(ii, j, k + 1, 2) &
+                                             + xxOld(ii, j + 1, k, 2) &
+                                             + xxOld(ii, j, k, 2)) &
+                                    * (-1.0_realType)
+                            sc(3) = sc(3) + (xxOld(ii, j + 1, k + 1, 3) &
+                                             + xxOld(ii, j, k + 1, 3) &
+                                             + xxOld(ii, j + 1, k, 3) &
+                                             + xxOld(ii, j, k, 3)) &
+                                    * (-1.0_realType)
+
+                            ! Determine the dot product of sc and the normal
+                            ! and divide by 4 deltaT to obtain the correct
+                            ! value of the normal velocity.
+
+                            sVelo(j, k, 1) = sc(1) * oneOver4dt
+                            sVelo(j, k, 2) = sc(2) * oneOver4dt
+                            sVelo(j, k, 3) = sc(3) * oneOver4dt
+
+                            ! if ((i.ge.2) .and. (i.le.3) .and. (j.ge.2) .and. (j.le.3) .and. (k.ge.2) .and. (k.le.3)) then
+                            !    print *, i,j,k, sVelo(j,k,:)
+                            !    print *, '                                   ', xx(j,k,:)
+                            !    print *, '                                   ', xxOld(1,j,k,:)
+                            ! end if
+
+                        end do
+                    end do
+                end do
 
-    end do domains
+            end do loopDir
+
+        end if testMoving
+
+    end subroutine gridVelocitiesFineLevelPart1_block
+
+    !
+    !      Here begins the second part
+    !
+
+    subroutine gridVelocitiesFineLevelPart2(useOldCoor, t, sps)
+        !
+        ! Shell function to call gridVelocitiesFineLevel on all blocks
+        !
+        use blockPointers
+        use constants
+        use inputTimeSpectral
+        use iteration
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        logical, intent(in) :: useOldCoor
+        real(kind=realType), dimension(*), intent(in) :: t  !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+
+        ! Loop over the number of blocks.
+
+        domains: do nn = 1, nDom
+
+            ! Set the pointers for this block.
+
+            call setPointers(nn, groundLevel, sps)
+            call gridVelocitiesFineLevelPart2_block(useOldCoor, t, sps)
+
+        end do domains
+
+    end subroutine gridVelocitiesFineLevelPart2
+
+    subroutine gridVelocitiesFineLevelPart2_block(useOldCoor, t, sps)
+        !
+        use blockPointers
+        use cgnsGrid
+        use flowVarRefState
+        use inputMotion
+        use inputUnsteady
+        use iteration
+        use inputPhysics
+        use inputTSStabDeriv
+        use monitor
+        use communication
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        logical, intent(in) :: useOldCoor
+        real(kind=realType), dimension(*), intent(in) :: t
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm
+        integer(kind=intType) :: i, j, k, ii, iie, jje, kke
+
+        real(kind=realType) :: oneOver4dt, oneOver8dt
+        real(kind=realType), dimension(3) :: sc, xc, xxc
+        real(kind=realType), dimension(:, :), pointer :: sFace
+        real(kind=realType), dimension(:, :, :), pointer :: sVelo
+        real(kind=realType), dimension(:, :, :), pointer :: xx, ss
+        real(kind=realType), dimension(:, :, :, :), pointer :: xxOld
+
+        testMoving: if (blockIsMoving) then
+            !
+            !             Normal grid velocities of the faces.
+            !
+            ! Loop over the three directions.
+
+            loopDir: do mm = 1, 3
+
+                ! Set the upper boundaries depending on the direction.
 
-  end subroutine gridVelocitiesFineLevelPart2
+                select case (mm)
+                case (1_intType)       ! normals in i-direction
+                    iie = ie; jje = je; kke = ke
 
-  subroutine gridVelocitiesFineLevelPart2_block(useOldCoor, t, sps)
-    !
-    use blockPointers
-    use cgnsGrid
-    use flowVarRefState
-    use inputMotion
-    use inputUnsteady
-    use iteration
-    use inputPhysics
-    use inputTSStabDeriv
-    use monitor
-    use communication
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps
-    logical,               intent(in) :: useOldCoor
-    real(kind=realType), dimension(*), intent(in) :: t
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm
-    integer(kind=intType) :: i, j, k, ii, iie, jje, kke
-
-    real(kind=realType) :: oneOver4dt, oneOver8dt
-    real(kind=realType), dimension(3) :: sc, xc, xxc
-    real(kind=realType), dimension(:,:), pointer :: sFace
-    real(kind=realType), dimension(:,:,:),   pointer :: sVelo
-    real(kind=realType), dimension(:,:,:),   pointer :: xx, ss
-    real(kind=realType), dimension(:,:,:,:), pointer :: xxOld
-
-    testMoving: if( blockIsMoving ) then
-       !
-       !             Normal grid velocities of the faces.
-       !
-       ! Loop over the three directions.
-
-       loopDir: do mm=1,3
-
-          ! Set the upper boundaries depending on the direction.
-
-          select case (mm)
-          case (1_intType)       ! normals in i-direction
-             iie = ie; jje = je; kke = ke
-
-          case (2_intType)       ! normals in j-direction
-             iie = je; jje = ie; kke = ke
-
-          case (3_intType)       ! normals in k-direction
-             iie = ke; jje = ie; kke = je
-          end select
-          !
-          !               Normal grid velocities in generalized i-direction.
-          !               Mm == 1: i-direction
-          !               mm == 2: j-direction
-          !               mm == 3: k-direction
-          !
-          do i=0,iie
-
-             ! Set the pointers for the coordinates, normals and
-             ! normal velocities for this generalized i-plane.
-             ! This depends on the value of mm.
-
-             select case (mm)
-             case (1_intType)       ! normals in i-direction
-                ss => si(i,:,:,:);  sFace => sFaceI(i,:,:)
-                sVelo => sVeloIALE(i,:,:,:)
-
-             case (2_intType)       ! normals in j-direction
-                ss => sj(:,i,:,:);  sFace => sFaceJ(:,i,:)
-                sVelo => sVeloJALE(:,i,:,:)
-
-             case (3_intType)       ! normals in k-direction
-                ss => sk(:,:,i,:);  sFace => sFaceK(:,:,i)
-                sVelo => sVeloKALE(:,:,i,:)
-             end select
-
-             ! Loop over the k and j-direction of this
-             ! generalized i-face. Note that due to the usage of
-             ! the pointers xx and xxOld an offset of +1 must be
-             ! used in the coordinate arrays, because x and xOld
-             ! originally start at 0 for the i, j and k indices.
-
-             do k=1,kke
-                do j=1,jje
-
-                   ! Determine the dot product of sc and the normal
-                   ! and divide by 4 deltaT to obtain the correct
-                   ! value of the normal velocity.
-
-                   sFace(j,k) = sVelo(j,k,1)*ss(j,k,1) &
-                        + sVelo(j,k,2)*ss(j,k,2) &
-                        + sVelo(j,k,3)*ss(j,k,3)
-
-                enddo
-             enddo
-          enddo
-
-       enddo loopDir
-    endif testMoving
-
-  end subroutine gridVelocitiesFineLevelPart2_block
-
-  subroutine utauWF(rFilv)
-    !
-    !       utauWF substitutes the wall shear stress with values from a
-    !       look-up table, if desired.
-    !
-    use constants
-    use blockPointers, only : si, sj, sk, fw, rlv, d2wall, w, BCData, viscSubFace, &
-         ie, je, ke, il, jl, kl, nViscBocos, BCFaceID
-    use inputPhysics, only : wallFunctions
-    use turbCurveFits, only : curveUpRe
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    real(kind=realType), intent(in) :: rFilv
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, nn
+                case (2_intType)       ! normals in j-direction
+                    iie = je; jje = ie; kke = ke
 
-    real(kind=realType) :: fact
-    real(kind=realType) :: tauxx, tauyy, tauzz
-    real(kind=realType) :: tauxy, tauxz, tauyz
-    real(kind=realType) :: rbar, ubar, vbar, wbar, vvx, vvy, vvz
-    real(kind=realType) :: fmx, fmy, fmz, frhoe
-    real(kind=realType) :: veln, velnx, velny, velnz, tx, ty, tz
-    real(kind=realType) :: veltx, velty, veltz, veltmag
-    real(kind=realType) :: txnx, txny, txnz, tynx, tyny, tynz
-    real(kind=realType) :: tznx, tzny, tznz
-    real(kind=realType) :: tautn, tauWall, utau, re
+                case (3_intType)       ! normals in k-direction
+                    iie = ke; jje = ie; kke = je
+                end select
+                !
+                !               Normal grid velocities in generalized i-direction.
+                !               Mm == 1: i-direction
+                !               mm == 2: j-direction
+                !               mm == 3: k-direction
+                !
+                do i = 0, iie
 
-    real(kind=realType), dimension(:,:,:), pointer :: ww1, ww2
-    real(kind=realType), dimension(:,:,:), pointer :: ss, rres
-    real(kind=realType), dimension(:,:,:), pointer :: norm
-    real(kind=realType), dimension(:,:),   pointer :: rrlv2, dd2Wall2
+                    ! Set the pointers for the coordinates, normals and
+                    ! normal velocities for this generalized i-plane.
+                    ! This depends on the value of mm.
 
-    ! Return immediately if no wall functions must be used.
+                    select case (mm)
+                    case (1_intType)       ! normals in i-direction
+                        ss => si(i, :, :, :); sFace => sFaceI(i, :, :)
+                        sVelo => sVeloIALE(i, :, :, :)
 
-    if(.not. wallFunctions) return
+                    case (2_intType)       ! normals in j-direction
+                        ss => sj(:, i, :, :); sFace => sFaceJ(:, i, :)
+                        sVelo => sVeloJALE(:, i, :, :)
 
-    ! Loop over the viscous subfaces of this block.
+                    case (3_intType)       ! normals in k-direction
+                        ss => sk(:, :, i, :); sFace => sFaceK(:, :, i)
+                        sVelo => sVeloKALE(:, :, i, :)
+                    end select
 
-    viscSubfaces: do nn=1,nViscBocos
+                    ! Loop over the k and j-direction of this
+                    ! generalized i-face. Note that due to the usage of
+                    ! the pointers xx and xxOld an offset of +1 must be
+                    ! used in the coordinate arrays, because x and xOld
+                    ! originally start at 0 for the i, j and k indices.
 
-       ! Set a bunch of variables depending on the face id to make
-       ! a generic treatment possible.
+                    do k = 1, kke
+                        do j = 1, jje
 
-       select case (BCFaceID(nn))
+                            ! Determine the dot product of sc and the normal
+                            ! and divide by 4 deltaT to obtain the correct
+                            ! value of the normal velocity.
 
-       case (iMin)
-          fact = -one
+                            sFace(j, k) = sVelo(j, k, 1) * ss(j, k, 1) &
+                                          + sVelo(j, k, 2) * ss(j, k, 2) &
+                                          + sVelo(j, k, 3) * ss(j, k, 3)
 
-          ss  => si(1,:,:,:);  rres => fw(2,1:,1:,:)
-          ww2 => w(2,1:,1:,:); ww1  => w(1,1:,1:,:)
-          dd2Wall2 => d2Wall(2,:,:); rrlv2 => rlv(2,1:,1:)
+                        end do
+                    end do
+                end do
 
-          !===========================================================
+            end do loopDir
+        end if testMoving
 
-       case (iMax)
-          fact = one
+    end subroutine gridVelocitiesFineLevelPart2_block
 
-          ss  => si(il,:,:,:);  rres => fw(il,1:,1:,:)
-          ww2 => w(il,1:,1:,:); ww1  => w(ie,1:,1:,:)
-          dd2Wall2 => d2Wall(il,:,:); rrlv2 => rlv(il,1:,1:)
+    subroutine utauWF(rFilv)
+        !
+        !       utauWF substitutes the wall shear stress with values from a
+        !       look-up table, if desired.
+        !
+        use constants
+        use blockPointers, only: si, sj, sk, fw, rlv, d2wall, w, BCData, viscSubFace, &
+                                 ie, je, ke, il, jl, kl, nViscBocos, BCFaceID
+        use inputPhysics, only: wallFunctions
+        use turbCurveFits, only: curveUpRe
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        real(kind=realType), intent(in) :: rFilv
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, nn
 
-          !===========================================================
+        real(kind=realType) :: fact
+        real(kind=realType) :: tauxx, tauyy, tauzz
+        real(kind=realType) :: tauxy, tauxz, tauyz
+        real(kind=realType) :: rbar, ubar, vbar, wbar, vvx, vvy, vvz
+        real(kind=realType) :: fmx, fmy, fmz, frhoe
+        real(kind=realType) :: veln, velnx, velny, velnz, tx, ty, tz
+        real(kind=realType) :: veltx, velty, veltz, veltmag
+        real(kind=realType) :: txnx, txny, txnz, tynx, tyny, tynz
+        real(kind=realType) :: tznx, tzny, tznz
+        real(kind=realType) :: tautn, tauWall, utau, re
 
-       case (jMin)
-          fact = -one
+        real(kind=realType), dimension(:, :, :), pointer :: ww1, ww2
+        real(kind=realType), dimension(:, :, :), pointer :: ss, rres
+        real(kind=realType), dimension(:, :, :), pointer :: norm
+        real(kind=realType), dimension(:, :), pointer :: rrlv2, dd2Wall2
 
-          ss  => sj(:,1,:,:);  rres => fw(1:,2,1:,:)
-          ww2 => w(1:,2,1:,:); ww1  => w(1:,1,1:,:)
-          dd2Wall2 => d2Wall(:,2,:); rrlv2 => rlv(1:,2,1:)
+        ! Return immediately if no wall functions must be used.
 
-          !===========================================================
+        if (.not. wallFunctions) return
 
-       case (jMax)
-          fact = one
+        ! Loop over the viscous subfaces of this block.
 
-          ss  => sj(:,jl,:,:);  rres => fw(1:,jl,1:,:)
-          ww2 => w(1:,jl,1:,:); ww1  => w(1:,je,1:,:)
-          dd2Wall2 => d2Wall(:,jl,:); rrlv2 => rlv(1:,jl,1:)
+        viscSubfaces: do nn = 1, nViscBocos
 
-          !===========================================================
+            ! Set a bunch of variables depending on the face id to make
+            ! a generic treatment possible.
 
-       case (kMin)
-          fact = -one
+            select case (BCFaceID(nn))
 
-          ss  => sk(:,:,1,:);  rres => fw(1:,1:,2,:)
-          ww2 => w(1:,1:,2,:); ww1  => w(1:,1:,1,:)
-          dd2Wall2 => d2Wall(:,:,2); rrlv2 => rlv(1:,1:,2)
+            case (iMin)
+                fact = -one
 
-          !===========================================================
+                ss => si(1, :, :, :); rres => fw(2, 1:, 1:, :)
+                ww2 => w(2, 1:, 1:, :); ww1 => w(1, 1:, 1:, :)
+                dd2Wall2 => d2Wall(2, :, :); rrlv2 => rlv(2, 1:, 1:)
 
-       case (kMax)
-          fact = one
+                !===========================================================
 
-          ss  => sk(:,:,kl,:);  rres => fw(1:,1:,kl,:)
-          ww2 => w(1:,1:,kl,:); ww1  => w(1:,1:,ke,:)
-          dd2Wall2 => d2Wall(:,:,kl); rrlv2 => rlv(1:,1:,kl)
+            case (iMax)
+                fact = one
 
-       end select
+                ss => si(il, :, :, :); rres => fw(il, 1:, 1:, :)
+                ww2 => w(il, 1:, 1:, :); ww1 => w(ie, 1:, 1:, :)
+                dd2Wall2 => d2Wall(il, :, :); rrlv2 => rlv(il, 1:, 1:)
 
-       ! Set the pointer for the unit outward normals.
+                !===========================================================
 
-       norm => BCData(nn)%norm
+            case (jMin)
+                fact = -one
 
-       ! Loop over the quadrilateral faces of the subface. Note
-       ! that the nodal range of BCData must be used and not the
-       ! cell range, because the latter may include the halo's in i
-       ! and j-direction. The offset +1 is there, because inBeg and
-       ! jnBeg refer to nodal ranges and not to cell ranges.
+                ss => sj(:, 1, :, :); rres => fw(1:, 2, 1:, :)
+                ww2 => w(1:, 2, 1:, :); ww1 => w(1:, 1, 1:, :)
+                dd2Wall2 => d2Wall(:, 2, :); rrlv2 => rlv(1:, 2, 1:)
 
-       do j=(BCData(nn)%jnBeg+1),BCData(nn)%jnEnd
-          do i=(BCData(nn)%inBeg+1),BCData(nn)%inEnd
+                !===========================================================
 
-             ! Store the viscous stress tensor a bit easier.
+            case (jMax)
+                fact = one
 
-             tauxx = viscSubface(nn)%tau(i,j,1)
-             tauyy = viscSubface(nn)%tau(i,j,2)
-             tauzz = viscSubface(nn)%tau(i,j,3)
-             tauxy = viscSubface(nn)%tau(i,j,4)
-             tauxz = viscSubface(nn)%tau(i,j,5)
-             tauyz = viscSubface(nn)%tau(i,j,6)
+                ss => sj(:, jl, :, :); rres => fw(1:, jl, 1:, :)
+                ww2 => w(1:, jl, 1:, :); ww1 => w(1:, je, 1:, :)
+                dd2Wall2 => d2Wall(:, jl, :); rrlv2 => rlv(1:, jl, 1:)
 
-             ! Compute the velocities at the wall face; these are only
-             ! non-zero for moving a block. Also compute the density,
-             ! which is needed to compute the wall shear stress via
-             ! wall functions.
+                !===========================================================
 
-             rbar = half*(ww2(i,j,irho) + ww1(i,j,irho))
-             ubar = half*(ww2(i,j,ivx)  + ww1(i,j,ivx))
-             vbar = half*(ww2(i,j,ivy)  + ww1(i,j,ivy))
-             wbar = half*(ww2(i,j,ivz)  + ww1(i,j,ivz))
+            case (kMin)
+                fact = -one
 
-             ! Compute the velocity difference between the internal cell
-             ! and the wall.
+                ss => sk(:, :, 1, :); rres => fw(1:, 1:, 2, :)
+                ww2 => w(1:, 1:, 2, :); ww1 => w(1:, 1:, 1, :)
+                dd2Wall2 => d2Wall(:, :, 2); rrlv2 => rlv(1:, 1:, 2)
 
-             vvx = ww2(i,j,ivx) - ubar
-             vvy = ww2(i,j,ivy) - vbar
-             vvz = ww2(i,j,ivz) - wbar
+                !===========================================================
 
-             ! Compute the normal velocity of the internal cell.
+            case (kMax)
+                fact = one
 
-             veln  = vvx*norm(i,j,1) + vvy*norm(i,j,2) + vvz*norm(i,j,3)
-             velnx = veln*norm(i,j,1)
-             velny = veln*norm(i,j,2)
-             velnz = veln*norm(i,j,3)
+                ss => sk(:, :, kl, :); rres => fw(1:, 1:, kl, :)
+                ww2 => w(1:, 1:, kl, :); ww1 => w(1:, 1:, ke, :)
+                dd2Wall2 => d2Wall(:, :, kl); rrlv2 => rlv(1:, 1:, kl)
 
-             ! Compute the tangential velocity, its magnitude and its
-             ! unit vector of the internal cell.
+            end select
 
-             veltx = vvx - velnx
-             velty = vvy - velny
-             veltz = vvz - velnz
+            ! Set the pointer for the unit outward normals.
 
-             veltmag = max(eps,sqrt(veltx**2 + velty**2 + veltz**2))
+            norm => BCData(nn)%norm
 
-             tx = veltx/veltmag
-             ty = velty/veltmag
-             tz = veltz/veltmag
+            ! Loop over the quadrilateral faces of the subface. Note
+            ! that the nodal range of BCData must be used and not the
+            ! cell range, because the latter may include the halo's in i
+            ! and j-direction. The offset +1 is there, because inBeg and
+            ! jnBeg refer to nodal ranges and not to cell ranges.
 
-             ! Compute some coefficients needed for the transformation
-             ! between the cartesian frame and the frame defined by the
-             ! tangential direction (tx,ty,tz) and the normal direction.
-             ! The minus sign is present, because for this transformation
-             ! the normal direction should be inward pointing and norm
-             ! is outward pointing.
+            do j = (BCData(nn)%jnBeg + 1), BCData(nn)%jnEnd
+                do i = (BCData(nn)%inBeg + 1), BCData(nn)%inEnd
 
-             txnx = -tx*norm(i,j,1)
-             txny = -tx*norm(i,j,2)
-             txnz = -tx*norm(i,j,3)
+                    ! Store the viscous stress tensor a bit easier.
 
-             tynx = -ty*norm(i,j,1)
-             tyny = -ty*norm(i,j,2)
-             tynz = -ty*norm(i,j,3)
+                    tauxx = viscSubface(nn)%tau(i, j, 1)
+                    tauyy = viscSubface(nn)%tau(i, j, 2)
+                    tauzz = viscSubface(nn)%tau(i, j, 3)
+                    tauxy = viscSubface(nn)%tau(i, j, 4)
+                    tauxz = viscSubface(nn)%tau(i, j, 5)
+                    tauyz = viscSubface(nn)%tau(i, j, 6)
 
-             tznx = -tz*norm(i,j,1)
-             tzny = -tz*norm(i,j,2)
-             tznz = -tz*norm(i,j,3)
+                    ! Compute the velocities at the wall face; these are only
+                    ! non-zero for moving a block. Also compute the density,
+                    ! which is needed to compute the wall shear stress via
+                    ! wall functions.
 
-             ! Compute the tn component of the wall shear stress
-             ! tensor. Normally this is the only nonzero shear
-             ! stress component in the t-n frame.
+                    rbar = half * (ww2(i, j, irho) + ww1(i, j, irho))
+                    ubar = half * (ww2(i, j, ivx) + ww1(i, j, ivx))
+                    vbar = half * (ww2(i, j, ivy) + ww1(i, j, ivy))
+                    wbar = half * (ww2(i, j, ivz) + ww1(i, j, ivz))
 
-             tautn = tauxx*txnx + tauyy*tyny + tauzz*tznz &
-                  + tauxy*(txny + tynx)                  &
-                  + tauxz*(txnz + tznx)                  &
-                  + tauyz*(tynz + tzny)
+                    ! Compute the velocity difference between the internal cell
+                    ! and the wall.
 
-             ! Compute the Reynolds number using the velocity, density,
-             ! laminar viscosity and wall distance. Note that an offset
-             ! of -1 must be used in dd2Wall2, because the original array
-             ! d2Wall starts at 2.
+                    vvx = ww2(i, j, ivx) - ubar
+                    vvy = ww2(i, j, ivy) - vbar
+                    vvz = ww2(i, j, ivz) - wbar
 
-             re = ww2(i,j,irho)*veltmag*dd2Wall2(i-1,j-1)/rrlv2(i,j)
+                    ! Compute the normal velocity of the internal cell.
 
-             ! Determine the friction velocity from the table and
-             ! compute the wall shear stress from it.
+                    veln = vvx * norm(i, j, 1) + vvy * norm(i, j, 2) + vvz * norm(i, j, 3)
+                    velnx = veln * norm(i, j, 1)
+                    velny = veln * norm(i, j, 2)
+                    velnz = veln * norm(i, j, 3)
 
-             utau    = veltmag/max(curveUpRe(re),eps)
-             tauWall = rbar*utau*utau
+                    ! Compute the tangential velocity, its magnitude and its
+                    ! unit vector of the internal cell.
 
-             ! Compute the correction to the wall shear stress tautn and
-             ! transform this correction back to the cartesian frame.
-             ! Take rFilv into account, such that the correction to the
-             ! stress tensor is computed correctly.
+                    veltx = vvx - velnx
+                    velty = vvy - velny
+                    veltz = vvz - velnz
 
-             tautn = rFilv*tauWall - tautn
+                    veltmag = max(eps, sqrt(veltx**2 + velty**2 + veltz**2))
 
-             tauxx = two*tautn*txnx
-             tauyy = two*tautn*tyny
-             tauzz = two*tautn*tznz
+                    tx = veltx / veltmag
+                    ty = velty / veltmag
+                    tz = veltz / veltmag
 
-             tauxy = tautn*(txny + tynx)
-             tauxz = tautn*(txnz + tznx)
-             tauyz = tautn*(tynz + tzny)
+                    ! Compute some coefficients needed for the transformation
+                    ! between the cartesian frame and the frame defined by the
+                    ! tangential direction (tx,ty,tz) and the normal direction.
+                    ! The minus sign is present, because for this transformation
+                    ! the normal direction should be inward pointing and norm
+                    ! is outward pointing.
 
-             ! Compute the correction to the viscous flux at the wall.
+                    txnx = -tx * norm(i, j, 1)
+                    txny = -tx * norm(i, j, 2)
+                    txnz = -tx * norm(i, j, 3)
 
-             fmx   = tauxx*ss(i,j,1) + tauxy*ss(i,j,2) &
-                  + tauxz*ss(i,j,3)
-             fmy   = tauxy*ss(i,j,1) + tauyy*ss(i,j,2) &
-                  + tauyz*ss(i,j,3)
-             fmz   = tauxz*ss(i,j,1) + tauyz*ss(i,j,2) &
-                  + tauzz*ss(i,j,3)
-             frhoE = (ubar*tauxx + vbar*tauxy + wbar*tauxz)*ss(i,j,1) &
-                  + (ubar*tauxy + vbar*tauyy + wbar*tauyz)*ss(i,j,2) &
-                  + (ubar*tauxz + vbar*tauyz + wbar*tauzz)*ss(i,j,3)
+                    tynx = -ty * norm(i, j, 1)
+                    tyny = -ty * norm(i, j, 2)
+                    tynz = -ty * norm(i, j, 3)
 
-             ! Add them to the residual. Note that now the factor rFilv
-             ! is already taken into account via tau. Fact is present to
-             ! take inward/outward pointing normals into account
+                    tznx = -tz * norm(i, j, 1)
+                    tzny = -tz * norm(i, j, 2)
+                    tznz = -tz * norm(i, j, 3)
 
-             rres(i,j,imx)   = rres(i,j,imx)   - fact*fmx
-             rres(i,j,imy)   = rres(i,j,imy)   - fact*fmy
-             rres(i,j,imz)   = rres(i,j,imz)   - fact*fmz
-             rres(i,j,irhoE) = rres(i,j,irhoE) - fact*frhoE
+                    ! Compute the tn component of the wall shear stress
+                    ! tensor. Normally this is the only nonzero shear
+                    ! stress component in the t-n frame.
 
-             ! Store the friction velocity for later use.
+                    tautn = tauxx * txnx + tauyy * tyny + tauzz * tznz &
+                            + tauxy * (txny + tynx) &
+                            + tauxz * (txnz + tznx) &
+                            + tauyz * (tynz + tzny)
 
-             viscSubface(nn)%utau(i,j) = utau
+                    ! Compute the Reynolds number using the velocity, density,
+                    ! laminar viscosity and wall distance. Note that an offset
+                    ! of -1 must be used in dd2Wall2, because the original array
+                    ! d2Wall starts at 2.
 
-             ! Also add the correction to the wall stress tensor.
+                    re = ww2(i, j, irho) * veltmag * dd2Wall2(i - 1, j - 1) / rrlv2(i, j)
 
-             viscSubface(nn)%tau(i,j,1) = &
-                  viscSubface(nn)%tau(i,j,1) + tauxx
-             viscSubface(nn)%tau(i,j,2) = &
-                  viscSubface(nn)%tau(i,j,2) + tauyy
-             viscSubface(nn)%tau(i,j,3) = &
-                  viscSubface(nn)%tau(i,j,3) + tauzz
-             viscSubface(nn)%tau(i,j,4) = &
-                  viscSubface(nn)%tau(i,j,4) + tauxy
-             viscSubface(nn)%tau(i,j,5) = &
-                  viscSubface(nn)%tau(i,j,5) + tauxz
-             viscSubface(nn)%tau(i,j,6) = &
-                  viscSubface(nn)%tau(i,j,6) + tauyz
-          enddo
-       enddo
+                    ! Determine the friction velocity from the table and
+                    ! compute the wall shear stress from it.
 
-    enddo viscSubfaces
+                    utau = veltmag / max(curveUpRe(re), eps)
+                    tauWall = rbar * utau * utau
 
-  end subroutine utauWF
+                    ! Compute the correction to the wall shear stress tautn and
+                    ! transform this correction back to the cartesian frame.
+                    ! Take rFilv into account, such that the correction to the
+                    ! stress tensor is computed correctly.
+
+                    tautn = rFilv * tauWall - tautn
+
+                    tauxx = two * tautn * txnx
+                    tauyy = two * tautn * tyny
+                    tauzz = two * tautn * tznz
+
+                    tauxy = tautn * (txny + tynx)
+                    tauxz = tautn * (txnz + tznx)
+                    tauyz = tautn * (tynz + tzny)
+
+                    ! Compute the correction to the viscous flux at the wall.
+
+                    fmx = tauxx * ss(i, j, 1) + tauxy * ss(i, j, 2) &
+                          + tauxz * ss(i, j, 3)
+                    fmy = tauxy * ss(i, j, 1) + tauyy * ss(i, j, 2) &
+                          + tauyz * ss(i, j, 3)
+                    fmz = tauxz * ss(i, j, 1) + tauyz * ss(i, j, 2) &
+                          + tauzz * ss(i, j, 3)
+                    frhoE = (ubar * tauxx + vbar * tauxy + wbar * tauxz) * ss(i, j, 1) &
+                            + (ubar * tauxy + vbar * tauyy + wbar * tauyz) * ss(i, j, 2) &
+                            + (ubar * tauxz + vbar * tauyz + wbar * tauzz) * ss(i, j, 3)
+
+                    ! Add them to the residual. Note that now the factor rFilv
+                    ! is already taken into account via tau. Fact is present to
+                    ! take inward/outward pointing normals into account
+
+                    rres(i, j, imx) = rres(i, j, imx) - fact * fmx
+                    rres(i, j, imy) = rres(i, j, imy) - fact * fmy
+                    rres(i, j, imz) = rres(i, j, imz) - fact * fmz
+                    rres(i, j, irhoE) = rres(i, j, irhoE) - fact * frhoE
+
+                    ! Store the friction velocity for later use.
+
+                    viscSubface(nn)%utau(i, j) = utau
+
+                    ! Also add the correction to the wall stress tensor.
+
+                    viscSubface(nn)%tau(i, j, 1) = &
+                        viscSubface(nn)%tau(i, j, 1) + tauxx
+                    viscSubface(nn)%tau(i, j, 2) = &
+                        viscSubface(nn)%tau(i, j, 2) + tauyy
+                    viscSubface(nn)%tau(i, j, 3) = &
+                        viscSubface(nn)%tau(i, j, 3) + tauzz
+                    viscSubface(nn)%tau(i, j, 4) = &
+                        viscSubface(nn)%tau(i, j, 4) + tauxy
+                    viscSubface(nn)%tau(i, j, 5) = &
+                        viscSubface(nn)%tau(i, j, 5) + tauxz
+                    viscSubface(nn)%tau(i, j, 6) = &
+                        viscSubface(nn)%tau(i, j, 6) + tauyz
+                end do
+            end do
 
+        end do viscSubfaces
 
+    end subroutine utauWF
 
 #ifndef USE_TAPENADE
-  subroutine slipVelocitiesCoarseLevels(sps)
-    !
-    !       slipVelocitiesCoarseLevels determines the slip velocities
-    !       for the given spectral solution starting from the known
-    !       velocities on the finer level.
-    !
-    use constants
-    use blockPointers
-    use iteration
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, iiMax, jjMax
-    integer(kind=intType) :: if1, if2, jf1, jf2
-    integer(kind=intType) :: nLevels, level, levm1, nn, mm
+    subroutine slipVelocitiesCoarseLevels(sps)
+        !
+        !       slipVelocitiesCoarseLevels determines the slip velocities
+        !       for the given spectral solution starting from the known
+        !       velocities on the finer level.
+        !
+        use constants
+        use blockPointers
+        use iteration
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, iiMax, jjMax
+        integer(kind=intType) :: if1, if2, jf1, jf2
+        integer(kind=intType) :: nLevels, level, levm1, nn, mm
 
-    integer(kind=intType), dimension(:,:), pointer :: iFine, jFine
+        integer(kind=intType), dimension(:, :), pointer :: iFine, jFine
 
-    real(kind=realType), dimension(:,:,:), pointer :: uSlip
-    real(kind=realType), dimension(:,:,:), pointer :: uSlipFine
+        real(kind=realType), dimension(:, :, :), pointer :: uSlip
+        real(kind=realType), dimension(:, :, :), pointer :: uSlipFine
 
-    ! Determine the number of multigrid levels.
+        ! Determine the number of multigrid levels.
 
-    nLevels = ubound(flowDoms,2)
+        nLevels = ubound(flowDoms, 2)
 
-    ! Loop over coarser grid levels, where ground level is considered
-    ! as the finest grid.
+        ! Loop over coarser grid levels, where ground level is considered
+        ! as the finest grid.
 
-    levelLoop: do level=(groundLevel+1),nLevels
+        levelLoop: do level = (groundLevel + 1), nLevels
 
-       ! Set levm1 for the finer level.
+            ! Set levm1 for the finer level.
 
-       levm1 = level - 1
+            levm1 = level - 1
 
-       ! Loop over the number of local blocks.
+            ! Loop over the number of local blocks.
 
-       domains: do nn=1,nDom
+            domains: do nn = 1, nDom
 
-          ! Set the pointers to the coarse block.
+                ! Set the pointers to the coarse block.
 
-          call setPointers(nn, level, sps)
+                call setPointers(nn, level, sps)
 
-          ! Loop over the number of viscous subfaces.
+                ! Loop over the number of viscous subfaces.
 
-          bocoLoop: do mm=1,nViscBocos
+                bocoLoop: do mm = 1, nViscBocos
 
-             ! Set the pointers for uSlip and uSlipFine to make the
-             ! code more readable.
+                    ! Set the pointers for uSlip and uSlipFine to make the
+                    ! code more readable.
 
-             uSlip     => BCData(mm)%uSlip
-             uSlipFine => flowDoms(nn,levm1,sps)%BCData(mm)%uSlip
+                    uSlip => BCData(mm)%uSlip
+                    uSlipFine => flowDoms(nn, levm1, sps)%BCData(mm)%uSlip
 
-             ! Determine the grid face on which the subface is located
-             ! and set some variables accordingly.
+                    ! Determine the grid face on which the subface is located
+                    ! and set some variables accordingly.
 
-             select case (BCFaceID(mm))
+                    select case (BCFaceID(mm))
 
-             case (iMin,iMax)
-                iiMax = jl; jjMax = kl
-                iFine => mgJFine; jFine => mgKFine
+                    case (iMin, iMax)
+                        iiMax = jl; jjMax = kl
+                        iFine => mgJFine; jFine => mgKFine
 
-             case (jMin,jMax)
-                iiMax = il; jjMax = kl
-                iFine => mgIFine; jFine => mgKFine
+                    case (jMin, jMax)
+                        iiMax = il; jjMax = kl
+                        iFine => mgIFine; jFine => mgKFine
 
-             case (kMin,kMax)
-                iiMax = il; jjMax = jl
-                iFine => mgIFine; jFine => mgJFine
+                    case (kMin, kMax)
+                        iiMax = il; jjMax = jl
+                        iFine => mgIFine; jFine => mgJFine
 
-             end select
+                    end select
 
-             ! Loop over the number of faces of the viscous subface.
-             ! First in the generalized j-direction.
+                    ! Loop over the number of faces of the viscous subface.
+                    ! First in the generalized j-direction.
 
-             do j=BCData(mm)%jcBeg, BCData(mm)%jcEnd
+                    do j = BCData(mm)%jcBeg, BCData(mm)%jcEnd
 
-                ! Determine the two children in this direction.
-                ! Take care of the halo's, as this info is only
-                ! available for owned cells.
+                        ! Determine the two children in this direction.
+                        ! Take care of the halo's, as this info is only
+                        ! available for owned cells.
 
-                if(j < 2) then
-                   jf1 = 1; jf2 = 1
-                else if(j > jjMax) then
-                   jf1 = jFine(jjMax,2) +1; jf2 = jf1
-                else
-                   jf1 = jFine(j,1); jf2 = jFine(j,2)
-                endif
+                        if (j < 2) then
+                            jf1 = 1; jf2 = 1
+                        else if (j > jjMax) then
+                            jf1 = jFine(jjMax, 2) + 1; jf2 = jf1
+                        else
+                            jf1 = jFine(j, 1); jf2 = jFine(j, 2)
+                        end if
 
-                ! Loop in the generalized i-direction.
+                        ! Loop in the generalized i-direction.
 
-                do i=BCData(mm)%icBeg, BCData(mm)%icEnd
+                        do i = BCData(mm)%icBeg, BCData(mm)%icEnd
 
-                   ! Determine the two children in this direction.
-                   ! Same story as in j-direction.
+                            ! Determine the two children in this direction.
+                            ! Same story as in j-direction.
 
-                   if(i < 2) then
-                      if1 = 1; if2 = 1
-                   else if(i > iiMax) then
-                      if1 = iFine(iiMax,2) +1; if2 = if1
-                   else
-                      if1 = iFine(i,1); if2 = iFine(i,2)
-                   endif
+                            if (i < 2) then
+                                if1 = 1; if2 = 1
+                            else if (i > iiMax) then
+                                if1 = iFine(iiMax, 2) + 1; if2 = if1
+                            else
+                                if1 = iFine(i, 1); if2 = iFine(i, 2)
+                            end if
 
-                   ! Average the fine grid velocities to the
-                   ! coarse grid velocities.
+                            ! Average the fine grid velocities to the
+                            ! coarse grid velocities.
 
-                   uSlip(i,j,1) = fourth*(uSlipFine(if1,jf1,1) &
-                        +         uSlipFine(if2,jf1,1) &
-                        +         uSlipFine(if1,jf2,1) &
-                        +         uSlipFine(if2,jf2,1))
+                            uSlip(i, j, 1) = fourth * (uSlipFine(if1, jf1, 1) &
+                                                       + uSlipFine(if2, jf1, 1) &
+                                                       + uSlipFine(if1, jf2, 1) &
+                                                       + uSlipFine(if2, jf2, 1))
 
-                   uSlip(i,j,2) = fourth*(uSlipFine(if1,jf1,2) &
-                        +         uSlipFine(if2,jf1,2) &
-                        +         uSlipFine(if1,jf2,2) &
-                        +         uSlipFine(if2,jf2,2))
+                            uSlip(i, j, 2) = fourth * (uSlipFine(if1, jf1, 2) &
+                                                       + uSlipFine(if2, jf1, 2) &
+                                                       + uSlipFine(if1, jf2, 2) &
+                                                       + uSlipFine(if2, jf2, 2))
 
-                   uSlip(i,j,3) = fourth*(uSlipFine(if1,jf1,3) &
-                        +         uSlipFine(if2,jf1,3) &
-                        +         uSlipFine(if1,jf2,3) &
-                        +         uSlipFine(if2,jf2,3))
-                enddo
-             enddo
+                            uSlip(i, j, 3) = fourth * (uSlipFine(if1, jf1, 3) &
+                                                       + uSlipFine(if2, jf1, 3) &
+                                                       + uSlipFine(if1, jf2, 3) &
+                                                       + uSlipFine(if2, jf2, 3))
+                        end do
+                    end do
 
-          enddo bocoLoop
-       enddo domains
-    enddo levelLoop
+                end do bocoLoop
+            end do domains
+        end do levelLoop
 
-  end subroutine slipVelocitiesCoarseLevels
+    end subroutine slipVelocitiesCoarseLevels
 #endif
 
-  subroutine gridVelocitiesCoarseLevels(sps)
-    !
-    !       gridVelocitiesCoarseLevels computes the grid velocities for
-    !       the cell centers and the normal grid velocities for the faces
-    !       of moving blocks on the coarser grid levels. GroundLevel is
-    !       considered the fine grid level.
-    !
-    use constants
-    use blockPointers
-    use iteration
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: sps
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nLevels, level, levm1, nn, mm
-    integer(kind=intType) :: i, j, k, iie, jje, kke
-    integer(kind=intType) :: ii, ii1, jj, jj1, kk, kk1
-
-    integer(kind=intType), dimension(:,:), pointer :: iFine, jFine
-    integer(kind=intType), dimension(:,:), pointer :: kFine
-
-    real(kind=realType) :: jjWeight, kkWeight, weight
-
-    real(kind=realType), dimension(:), pointer :: jWeight, kWeight
-
-    real(kind=realType), dimension(:,:),     pointer :: sFine, sFace
-    real(kind=realType), dimension(:,:,:,:), pointer :: sf
-
-    ! Loop over the number of coarse grid levels, starting at
-    ! groundLevel+1,
-
-    nLevels = ubound(flowDoms,2)
-    levelLoop: do level=groundLevel+1,nLevels
-
-       ! Loop over the number of local blocks.
-
-       domains: do nn=1,nDom
-
-          ! Set the pointers for this block.
-
-          call setPointers(nn, level, sps)
-
-          ! Check for a moving block. As it is possible that in a
-          ! multidisicplinary environment additional grid velocities
-          ! are set, the test should be done on addGridVelocities
-          ! and not on blockIsMoving.
-
-          testMoving: if( addGridVelocities ) then
-             !
-             !             Grid velocities of the cell centers, including the 1st
-             !             level halo cells. These are determined by accumulating
-             !             the fine grid values. At the end the internal halo's are
-             !             communicated to obtain the correct values.
-             !
-             levm1 = level - 1
-
-             ! Set the pointer sf to the cell velocities on the fine mesh.
-
-             sf => flowDoms(nn,levm1,sps)%s
-
-             ! Loop over the cells, including the 1st level halo's.
-             ! The indices kk, kk1 contain the corresponding fine
-             ! grid indices in k-direction. Idem for jj, jj1, ii
-             ! and ii1.
-
-             do k=1,ke
-                if(k == 1) then
-                   kk = 1; kk1 = 1
-                else if(k == ke) then
-                   kk = flowDoms(nn,levm1,sps)%ke; kk1 = kk
+    subroutine gridVelocitiesCoarseLevels(sps)
+        !
+        !       gridVelocitiesCoarseLevels computes the grid velocities for
+        !       the cell centers and the normal grid velocities for the faces
+        !       of moving blocks on the coarser grid levels. GroundLevel is
+        !       considered the fine grid level.
+        !
+        use constants
+        use blockPointers
+        use iteration
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: sps
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nLevels, level, levm1, nn, mm
+        integer(kind=intType) :: i, j, k, iie, jje, kke
+        integer(kind=intType) :: ii, ii1, jj, jj1, kk, kk1
+
+        integer(kind=intType), dimension(:, :), pointer :: iFine, jFine
+        integer(kind=intType), dimension(:, :), pointer :: kFine
+
+        real(kind=realType) :: jjWeight, kkWeight, weight
+
+        real(kind=realType), dimension(:), pointer :: jWeight, kWeight
+
+        real(kind=realType), dimension(:, :), pointer :: sFine, sFace
+        real(kind=realType), dimension(:, :, :, :), pointer :: sf
+
+        ! Loop over the number of coarse grid levels, starting at
+        ! groundLevel+1,
+
+        nLevels = ubound(flowDoms, 2)
+        levelLoop: do level = groundLevel + 1, nLevels
+
+            ! Loop over the number of local blocks.
+
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block.
+
+                call setPointers(nn, level, sps)
+
+                ! Check for a moving block. As it is possible that in a
+                ! multidisicplinary environment additional grid velocities
+                ! are set, the test should be done on addGridVelocities
+                ! and not on blockIsMoving.
+
+                testMoving: if (addGridVelocities) then
+                    !
+                    !             Grid velocities of the cell centers, including the 1st
+                    !             level halo cells. These are determined by accumulating
+                    !             the fine grid values. At the end the internal halo's are
+                    !             communicated to obtain the correct values.
+                    !
+                    levm1 = level - 1
+
+                    ! Set the pointer sf to the cell velocities on the fine mesh.
+
+                    sf => flowDoms(nn, levm1, sps)%s
+
+                    ! Loop over the cells, including the 1st level halo's.
+                    ! The indices kk, kk1 contain the corresponding fine
+                    ! grid indices in k-direction. Idem for jj, jj1, ii
+                    ! and ii1.
+
+                    do k = 1, ke
+                        if (k == 1) then
+                            kk = 1; kk1 = 1
+                        else if (k == ke) then
+                            kk = flowDoms(nn, levm1, sps)%ke; kk1 = kk
+                        else
+                            kk = mgKFine(k, 1); kk1 = mgKFine(k, 2)
+                        end if
+
+                        do j = 1, je
+                            if (j == 1) then
+                                jj = 1; jj1 = 1
+                            else if (j == je) then
+                                jj = flowDoms(nn, levm1, sps)%je; jj1 = jj
+                            else
+                                jj = mgJFine(j, 1); jj1 = mgJFine(j, 2)
+                            end if
+
+                            do i = 1, ie
+                                if (i == 1) then
+                                    ii = 1; ii1 = 1
+                                else if (i == ie) then
+                                    ii = flowDoms(nn, levm1, sps)%ie; ii1 = ii
+                                else
+                                    ii = mgIFine(i, 1); ii1 = mgIFine(i, 2)
+                                end if
+
+                                ! Determine the coarse grid velocity by
+                                ! averaging the fine grid values.
+
+                                s(i, j, k, 1) = (sf(ii1, jj1, kk1, 1) + sf(ii, jj1, kk1, 1) &
+                                                 + sf(ii1, jj, kk1, 1) + sf(ii, jj, kk1, 1) &
+                                                 + sf(ii1, jj1, kk, 1) + sf(ii, jj1, kk, 1) &
+                                                 + sf(ii1, jj, kk, 1) + sf(ii, jj, kk, 1)) &
+                                                * eighth
+                                s(i, j, k, 2) = (sf(ii1, jj1, kk1, 2) + sf(ii, jj1, kk1, 2) &
+                                                 + sf(ii1, jj, kk1, 2) + sf(ii, jj, kk1, 2) &
+                                                 + sf(ii1, jj1, kk, 2) + sf(ii, jj1, kk, 2) &
+                                                 + sf(ii1, jj, kk, 2) + sf(ii, jj, kk, 2)) &
+                                                * eighth
+                                s(i, j, k, 3) = (sf(ii1, jj1, kk1, 3) + sf(ii, jj1, kk1, 3) &
+                                                 + sf(ii1, jj, kk1, 3) + sf(ii, jj, kk1, 3) &
+                                                 + sf(ii1, jj1, kk, 3) + sf(ii, jj1, kk, 3) &
+                                                 + sf(ii1, jj, kk, 3) + sf(ii, jj, kk, 3)) &
+                                                * eighth
+                            end do
+                        end do
+                    end do
+                    !
+                    !             Normal grid velocities of the faces.
+                    !
+                    ! Loop over the three directions.
+
+                    loopCoarseDir: do mm = 1, 3
+
+                        ! Set some values depending on the situation.
+
+                        select case (mm)
+
+                        case (1_intType)       ! Normals in i-direction
+                            iie = ie; jje = je; kke = ke
+                            iFine => mgIFine; jFine => mgJFine; kFine => mgKFine
+                            jWeight => mgJWeight; kWeight => mgKWeight
+
+                        case (2_intType)       ! Normals in j-direction
+                            iie = je; jje = ie; kke = ke
+                            iFine => mgJFine; jFine => mgIFine; kFine => mgKFine
+                            jWeight => mgIWeight; kWeight => mgKWeight
+
+                        case (3_intType)       ! Normals in k-direction
+                            iie = ke; jje = ie; kke = je
+                            iFine => mgKFine; jFine => mgIFine; kFine => mgJFine
+                            jWeight => mgIWeight; kWeight => mgJWeight
+
+                        end select
+                        !
+                        !               Normal grid velocities in generalized i-direction.
+                        !               mm == 1: i-direction
+                        !               mm == 2: j-direction
+                        !               mm == 3: k-direction
+                        !
+                        do i = 0, iie
+
+                            ! Determine the i-index of the corresponding plane on
+                            ! the fine grid. Note that halo planes are not entirely
+                            ! correct. This is not really a problem.
+
+                            if (i < 2) then
+                                ii = i
+                            else if (i < iie) then
+                                ii = iFine(i, 2)
+                            else
+                                ii = iFine(iie - 1, 2) + 1
+                            end if
+
+                            ! Set the pointers for sFine and sFace, which will
+                            ! contain the mesh velocities for this particular
+                            ! plane. The index depends on the value of mm.
+
+                            select case (mm)
+                            case (1_intType)
+                                sFine => flowDoms(nn, levm1, sps)%sFaceI(ii, :, :)
+                                sFace => sFaceI(i, :, :)
+                            case (2_intType)
+                                sFine => flowDoms(nn, levm1, sps)%sFaceJ(:, ii, :)
+                                sFace => sFaceJ(:, i, :)
+                            case (3_intType)
+                                sFine => flowDoms(nn, levm1, sps)%sFaceK(:, :, ii)
+                                sFace => sFaceK(:, :, i)
+                            end select
+
+                            ! Loop over the k and j faces for this general i-plane.
+                            ! Again the halo's are not entirely correct. Kk, kk1,
+                            ! jj and jj1 are the children in k and j-direction
+                            ! respectively.
+
+                            do k = 1, kke
+                                if (k == 1) then
+                                    kk = 1; kk1 = 1
+                                    kkWeight = kWeight(2)
+                                else if (k == kke) then
+                                    kk = kFine(kke - 1, 2) + 1; kk1 = kk
+                                    kkWeight = kWeight(kke - 1)
+                                else
+                                    kk = kFine(k, 1); kk1 = kFine(k, 2)
+                                    kWeight = kWeight(k)
+                                end if
+
+                                do j = 1, jje
+                                    if (j == 1) then
+                                        jj = 1; jj1 = 1
+                                        jjWeight = jWeight(2)
+                                    else if (j == jje) then
+                                        jj = jFine(jje - 1, 2) + 1; jj1 = jj
+                                        jWeight = jWeight(jje - 1)
+                                    else
+                                        jj = jFine(j, 1); jj1 = jFine(j, 2)
+                                        jjWeight = jWeight(j)
+                                    end if
+
+                                    ! Determine the coarse grid normal velocity.
+                                    ! Take the averaging weight into account; for
+                                    ! a normal coarsening this weight is 1.0.
+
+                                    weight = kkWeight * jjWeight
+                                    sFace(j, k) = weight * (sFine(jj1, kk1) &
+                                                            + sFine(jj, kk1) &
+                                                            + sFine(jj1, kk) &
+                                                            + sFine(jj, kk))
+                                end do
+                            end do
+                        end do
+
+                    end do loopCoarseDir
+                end if testMoving
+            end do domains
+
+            ! Exchange the cell centered velocities.
+
+            call exchangeCellGridVelocities(level, sps)
+
+        end do levelLoop
+
+    end subroutine gridVelocitiesCoarseLevels
+
+    !      ==================================================================
+
+    subroutine exchangeCellGridVelocities(level, sps)
+        !
+        !       exchangeCellGridVelocities exchanges the grid velocities in
+        !       the cell centers for the given grid level and spectral
+        !       solution.
+        !
+        use constants
+        use block
+        use communication
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: i, j, ii, jj
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+
+        real(kind=realType) :: alp
+        real(kind=realType), dimension(3) :: vv
+
+        !       The 1 to 1 communication.
+        !
+        ! Send the variables. The data is first copied into
+        ! the send buffer after which the buffer is sent asap.
+
+        ii = 1
+        sends: do i = 1, commPatternCell_1st(level)%nProcSend
+
+            ! Store the processor id and the size of the message
+            ! a bit easier.
+
+            procID = commPatternCell_1st(level)%sendProc(i)
+            size = 3 * commPatternCell_1st(level)%nSend(i)
+
+            ! Copy the data in the correct part of the send buffer.
+
+            jj = ii
+            do j = 1, commPatternCell_1st(level)%nSend(i)
+
+                ! Store the block id and the indices of the donor
+                ! a bit easier.
+
+                d1 = commPatternCell_1st(level)%sendList(i)%block(j)
+                i1 = commPatternCell_1st(level)%sendList(i)%indices(j, 1)
+                j1 = commPatternCell_1st(level)%sendList(i)%indices(j, 2)
+                k1 = commPatternCell_1st(level)%sendList(i)%indices(j, 3)
+
+                ! Store the grid velocities in sendBuffer, if they
+                ! are allocated. Otherwise they are simply zero.
+
+                if (flowDoms(d1, level, sps)%addGridVelocities) then
+                    sendBuffer(jj) = flowDoms(d1, level, sps)%s(i1, j1, k1, 1)
+                    sendBuffer(jj + 1) = flowDoms(d1, level, sps)%s(i1, j1, k1, 2)
+                    sendBuffer(jj + 2) = flowDoms(d1, level, sps)%s(i1, j1, k1, 3)
                 else
-                   kk = mgKFine(k,1); kk1 = mgKFine(k,2)
-                endif
-
-                do j=1,je
-                   if(j == 1) then
-                      jj = 1; jj1 = 1
-                   else if(j == je) then
-                      jj = flowDoms(nn,levm1,sps)%je; jj1 = jj
-                   else
-                      jj = mgJFine(j,1); jj1 = mgJFine(j,2)
-                   endif
-
-                   do i=1,ie
-                      if(i == 1) then
-                         ii = 1; ii1 = 1
-                      else if(i == ie) then
-                         ii = flowDoms(nn,levm1,sps)%ie; ii1 = ii
-                      else
-                         ii = mgIFine(i,1); ii1 = mgIFine(i,2)
-                      endif
-
-                      ! Determine the coarse grid velocity by
-                      ! averaging the fine grid values.
-
-                      s(i,j,k,1) = (sf(ii1,jj1,kk1,1) + sf(ii,jj1,kk1,1)  &
-                           +  sf(ii1,jj, kk1,1) + sf(ii,jj, kk1,1)  &
-                           +  sf(ii1,jj1,kk, 1) + sf(ii,jj1,kk, 1)  &
-                           +  sf(ii1,jj, kk, 1) + sf(ii,jj, kk, 1)) &
-                           * eighth
-                      s(i,j,k,2) = (sf(ii1,jj1,kk1,2) + sf(ii,jj1,kk1,2)  &
-                           +  sf(ii1,jj, kk1,2) + sf(ii,jj, kk1,2)  &
-                           +  sf(ii1,jj1,kk, 2) + sf(ii,jj1,kk, 2)  &
-                           +  sf(ii1,jj, kk, 2) + sf(ii,jj, kk, 2)) &
-                           * eighth
-                      s(i,j,k,3) = (sf(ii1,jj1,kk1,3) + sf(ii,jj1,kk1,3)  &
-                           +  sf(ii1,jj, kk1,3) + sf(ii,jj, kk1,3)  &
-                           +  sf(ii1,jj1,kk, 3) + sf(ii,jj1,kk, 3)  &
-                           +  sf(ii1,jj, kk, 3) + sf(ii,jj, kk, 3)) &
-                           * eighth
-                   enddo
-                enddo
-             enddo
-             !
-             !             Normal grid velocities of the faces.
-             !
-             ! Loop over the three directions.
-
-             loopCoarseDir: do mm=1,3
-
-                ! Set some values depending on the situation.
+                    sendBuffer(jj) = zero
+                    sendBuffer(jj + 1) = zero
+                    sendBuffer(jj + 2) = zero
+                end if
 
-                select case (mm)
+                jj = jj + 3
+            end do
 
-                case (1_intType)       ! Normals in i-direction
-                   iie = ie; jje = je; kke = ke
-                   iFine => mgIFine; jFine => mgJFine; kFine => mgKFine
-                   jWeight => mgJWeight; kWeight => mgKWeight
+            ! Send the data.
 
-                case (2_intType)       ! Normals in j-direction
-                   iie = je; jje = ie; kke = ke
-                   iFine => mgJFine; jFine => mgIFine; kFine => mgKFine
-                   jWeight => mgIWeight; kWeight => mgKWeight
+            call mpi_isend(sendBuffer(ii), size, adflow_real, procID, &
+                           procID, ADflow_comm_world, sendRequests(i), &
+                           ierr)
 
-                case (3_intType)       ! Normals in k-direction
-                   iie = ke; jje = ie; kke = je
-                   iFine => mgKFine; jFine => mgIFine; kFine => mgJFine
-                   jWeight => mgIWeight; kWeight => mgJWeight
+            ! Set ii to jj for the next processor.
 
-                end select
-                !
-                !               Normal grid velocities in generalized i-direction.
-                !               mm == 1: i-direction
-                !               mm == 2: j-direction
-                !               mm == 3: k-direction
-                !
-                do i=0,iie
-
-                   ! Determine the i-index of the corresponding plane on
-                   ! the fine grid. Note that halo planes are not entirely
-                   ! correct. This is not really a problem.
-
-                   if(i < 2) then
-                      ii = i
-                   else if(i < iie) then
-                      ii = iFine(i,2)
-                   else
-                      ii = iFine(iie-1,2) + 1
-                   endif
-
-                   ! Set the pointers for sFine and sFace, which will
-                   ! contain the mesh velocities for this particular
-                   ! plane. The index depends on the value of mm.
-
-                   select case (mm)
-                   case (1_intType)
-                      sFine => flowDoms(nn,levm1,sps)%sFaceI(ii,:,:)
-                      sFace => sFaceI(i,:,:)
-                   case (2_intType)
-                      sFine => flowDoms(nn,levm1,sps)%sFaceJ(:,ii,:)
-                      sFace => sFaceJ(:,i,:)
-                   case (3_intType)
-                      sFine => flowDoms(nn,levm1,sps)%sFaceK(:,:,ii)
-                      sFace => sFaceK(:,:,i)
-                   end select
-
-                   ! Loop over the k and j faces for this general i-plane.
-                   ! Again the halo's are not entirely correct. Kk, kk1,
-                   ! jj and jj1 are the children in k and j-direction
-                   ! respectively.
-
-                   do k=1,kke
-                      if(k == 1) then
-                         kk = 1; kk1 = 1
-                         kkWeight = kWeight(2)
-                      else if(k == kke) then
-                         kk = kFine(kke-1,2) + 1; kk1 = kk
-                         kkWeight = kWeight(kke-1)
-                      else
-                         kk = kFine(k,1); kk1 = kFine(k,2)
-                         kWeight = kWeight(k)
-                      endif
-
-                      do j=1,jje
-                         if(j == 1) then
-                            jj = 1; jj1 = 1
-                            jjWeight = jWeight(2)
-                         else if(j == jje) then
-                            jj = jFine(jje-1,2) + 1; jj1 = jj
-                            jWeight = jWeight(jje-1)
-                         else
-                            jj = jFine(j,1); jj1 = jFine(j,2)
-                            jjWeight = jWeight(j)
-                         endif
-
-                         ! Determine the coarse grid normal velocity.
-                         ! Take the averaging weight into account; for
-                         ! a normal coarsening this weight is 1.0.
-
-                         weight = kkWeight*jjWeight
-                         sFace(j,k) = weight*(sFine(jj1,kk1) &
-                              +         sFine(jj ,kk1) &
-                              +         sFine(jj1,kk)  &
-                              +         sFine(jj ,kk))
-                      enddo
-                   enddo
-                enddo
-
-             enddo loopCoarseDir
-          endif testMoving
-       enddo domains
-
-       ! Exchange the cell centered velocities.
-
-       call exchangeCellGridVelocities(level,sps)
-
-    enddo levelLoop
-
-  end subroutine gridVelocitiesCoarseLevels
-
-  !      ==================================================================
-
-  subroutine exchangeCellGridVelocities(level,sps)
-    !
-    !       exchangeCellGridVelocities exchanges the grid velocities in
-    !       the cell centers for the given grid level and spectral
-    !       solution.
-    !
-    use constants
-    use block
-    use communication
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
-    !
-    !      Local variables.
-    !
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
-
-    integer(kind=intType) :: i, j, ii, jj
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
-
-    real(kind=realType) :: alp
-    real(kind=realType), dimension(3) :: vv
-
-    !       The 1 to 1 communication.
-    !
-    ! Send the variables. The data is first copied into
-    ! the send buffer after which the buffer is sent asap.
-
-    ii = 1
-    sends: do i=1,commPatternCell_1st(level)%nProcSend
-
-       ! Store the processor id and the size of the message
-       ! a bit easier.
-
-       procID = commPatternCell_1st(level)%sendProc(i)
-       size   = 3*commPatternCell_1st(level)%nSend(i)
-
-       ! Copy the data in the correct part of the send buffer.
-
-       jj = ii
-       do j=1,commPatternCell_1st(level)%nSend(i)
-
-          ! Store the block id and the indices of the donor
-          ! a bit easier.
-
-          d1 = commPatternCell_1st(level)%sendList(i)%block(j)
-          i1 = commPatternCell_1st(level)%sendList(i)%indices(j,1)
-          j1 = commPatternCell_1st(level)%sendList(i)%indices(j,2)
-          k1 = commPatternCell_1st(level)%sendList(i)%indices(j,3)
+            ii = jj
+        end do sends
 
-          ! Store the grid velocities in sendBuffer, if they
-          ! are allocated. Otherwise they are simply zero.
+        ! Post the nonblocking receives.
 
-          if( flowDoms(d1,level,sps)%addGridVelocities ) then
-             sendBuffer(jj)   = flowDoms(d1,level,sps)%s(i1,j1,k1,1)
-             sendBuffer(jj+1) = flowDoms(d1,level,sps)%s(i1,j1,k1,2)
-             sendBuffer(jj+2) = flowDoms(d1,level,sps)%s(i1,j1,k1,3)
-          else
-             sendBuffer(jj)   = zero
-             sendBuffer(jj+1) = zero
-             sendBuffer(jj+2) = zero
-          endif
+        ii = 1
+        receives: do i = 1, commPatternCell_1st(level)%nProcRecv
 
-          jj = jj + 3
-       enddo
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       ! Send the data.
+            procID = commPatternCell_1st(level)%recvProc(i)
+            size = 3 * commPatternCell_1st(level)%nRecv(i)
 
-       call mpi_isend(sendBuffer(ii), size, adflow_real, procID,  &
-            procID, ADflow_comm_world, sendRequests(i), &
-            ierr)
+            ! Post the receive.
 
-       ! Set ii to jj for the next processor.
+            call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
+                           myID, ADflow_comm_world, recvRequests(i), ierr)
 
-       ii = jj
-    enddo sends
+            ! And update ii.
 
-    ! Post the nonblocking receives.
+            ii = ii + size
+        end do receives
 
-    ii = 1
-    receives: do i=1,commPatternCell_1st(level)%nProcRecv
+        ! Copy the local data.
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+        localCopy: do i = 1, internalCell_1st(level)%nCopy
 
-       procID = commPatternCell_1st(level)%recvProc(i)
-       size   = 3*commPatternCell_1st(level)%nRecv(i)
+            ! Store the block and the indices of the donor a bit easier.
 
-       ! Post the receive.
+            d1 = internalCell_1st(level)%donorBlock(i)
+            i1 = internalCell_1st(level)%donorIndices(i, 1)
+            j1 = internalCell_1st(level)%donorIndices(i, 2)
+            k1 = internalCell_1st(level)%donorIndices(i, 3)
 
-       call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
-            myID, ADflow_comm_world, recvRequests(i), ierr)
+            ! Idem for the halo's.
 
-       ! And update ii.
+            d2 = internalCell_1st(level)%haloBlock(i)
+            i2 = internalCell_1st(level)%haloIndices(i, 1)
+            j2 = internalCell_1st(level)%haloIndices(i, 2)
+            k2 = internalCell_1st(level)%haloIndices(i, 3)
 
-       ii = ii + size
-    enddo receives
+            ! Copy the grid velocities, if they are both allocated.
+            ! Otherwise they are either set to zero or nothing is done.
 
-    ! Copy the local data.
-
-    localCopy: do i=1,internalCell_1st(level)%nCopy
-
-       ! Store the block and the indices of the donor a bit easier.
-
-       d1 = internalCell_1st(level)%donorBlock(i)
-       i1 = internalCell_1st(level)%donorIndices(i,1)
-       j1 = internalCell_1st(level)%donorIndices(i,2)
-       k1 = internalCell_1st(level)%donorIndices(i,3)
-
-       ! Idem for the halo's.
-
-       d2 = internalCell_1st(level)%haloBlock(i)
-       i2 = internalCell_1st(level)%haloIndices(i,1)
-       j2 = internalCell_1st(level)%haloIndices(i,2)
-       k2 = internalCell_1st(level)%haloIndices(i,3)
-
-       ! Copy the grid velocities, if they are both allocated.
-       ! Otherwise they are either set to zero or nothing is done.
-
-       if( flowDoms(d2,level,sps)%addGridVelocities ) then
-          if( flowDoms(d1,level,sps)%addGridVelocities ) then
-             flowDoms(d2,level,sps)%s(i2,j2,k2,1) = &
-                  flowDoms(d1,level,sps)%s(i1,j1,k1,1)
-             flowDoms(d2,level,sps)%s(i2,j2,k2,2) = &
-                  flowDoms(d1,level,sps)%s(i1,j1,k1,2)
-             flowDoms(d2,level,sps)%s(i2,j2,k2,3) = &
-                  flowDoms(d1,level,sps)%s(i1,j1,k1,3)
-          else
-             flowDoms(d2,level,sps)%s(i2,j2,k2,1) = zero
-             flowDoms(d2,level,sps)%s(i2,j2,k2,2) = zero
-             flowDoms(d2,level,sps)%s(i2,j2,k2,3) = zero
-          endif
-       endif
-
-    enddo localCopy
-
-    ! Correct the periodic halo's of the internal communication
-    ! pattern, if present.
-
-    if(internalCell_1st(level)%nPeriodic > 0) &
-         call correctPeriodicGridVel(level, sps,                        &
-         internalCell_1st(level)%nPeriodic, &
-         internalCell_1st(level)%periodicData)
+            if (flowDoms(d2, level, sps)%addGridVelocities) then
+                if (flowDoms(d1, level, sps)%addGridVelocities) then
+                    flowDoms(d2, level, sps)%s(i2, j2, k2, 1) = &
+                        flowDoms(d1, level, sps)%s(i1, j1, k1, 1)
+                    flowDoms(d2, level, sps)%s(i2, j2, k2, 2) = &
+                        flowDoms(d1, level, sps)%s(i1, j1, k1, 2)
+                    flowDoms(d2, level, sps)%s(i2, j2, k2, 3) = &
+                        flowDoms(d1, level, sps)%s(i1, j1, k1, 3)
+                else
+                    flowDoms(d2, level, sps)%s(i2, j2, k2, 1) = zero
+                    flowDoms(d2, level, sps)%s(i2, j2, k2, 2) = zero
+                    flowDoms(d2, level, sps)%s(i2, j2, k2, 3) = zero
+                end if
+            end if
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+        end do localCopy
 
-    size = commPatternCell_1st(level)%nProcRecv
-    completeRecvs: do i=1,commPatternCell_1st(level)%nProcRecv
+        ! Correct the periodic halo's of the internal communication
+        ! pattern, if present.
 
-       ! Complete any of the requests.
+        if (internalCell_1st(level)%nPeriodic > 0) &
+            call correctPeriodicGridVel(level, sps, &
+                                        internalCell_1st(level)%nPeriodic, &
+                                        internalCell_1st(level)%periodicData)
 
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-       ! Copy the data just arrived in the halo's.
+        size = commPatternCell_1st(level)%nProcRecv
+        completeRecvs: do i = 1, commPatternCell_1st(level)%nProcRecv
 
-       ii = index
-       jj = 3*commPatternCell_1st(level)%nRecvCum(ii-1)
-       do j=1,commPatternCell_1st(level)%nRecv(ii)
+            ! Complete any of the requests.
 
-          ! Store the block and the indices of the halo a bit easier.
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
 
-          d2 = commPatternCell_1st(level)%recvList(ii)%block(j)
-          i2 = commPatternCell_1st(level)%recvList(ii)%indices(j,1)
-          j2 = commPatternCell_1st(level)%recvList(ii)%indices(j,2)
-          k2 = commPatternCell_1st(level)%recvList(ii)%indices(j,3)
+            ! Copy the data just arrived in the halo's.
 
-          ! Copy the grid velocities from recvBuffer if they are
-          ! both allocated.
+            ii = index
+            jj = 3 * commPatternCell_1st(level)%nRecvCum(ii - 1)
+            do j = 1, commPatternCell_1st(level)%nRecv(ii)
 
-          if( flowDoms(d2,level,sps)%addGridVelocities ) then
-             flowDoms(d2,level,sps)%s(i2,j2,k2,1) = recvBuffer(jj+1)
-             flowDoms(d2,level,sps)%s(i2,j2,k2,2) = recvBuffer(jj+2)
-             flowDoms(d2,level,sps)%s(i2,j2,k2,3) = recvBuffer(jj+3)
-          endif
+                ! Store the block and the indices of the halo a bit easier.
 
-          jj = jj + 3
-       enddo
+                d2 = commPatternCell_1st(level)%recvList(ii)%block(j)
+                i2 = commPatternCell_1st(level)%recvList(ii)%indices(j, 1)
+                j2 = commPatternCell_1st(level)%recvList(ii)%indices(j, 2)
+                k2 = commPatternCell_1st(level)%recvList(ii)%indices(j, 3)
 
-    enddo completeRecvs
+                ! Copy the grid velocities from recvBuffer if they are
+                ! both allocated.
 
-    ! Correct the periodic halo's of the external communication
-    ! pattern, if present.
+                if (flowDoms(d2, level, sps)%addGridVelocities) then
+                    flowDoms(d2, level, sps)%s(i2, j2, k2, 1) = recvBuffer(jj + 1)
+                    flowDoms(d2, level, sps)%s(i2, j2, k2, 2) = recvBuffer(jj + 2)
+                    flowDoms(d2, level, sps)%s(i2, j2, k2, 3) = recvBuffer(jj + 3)
+                end if
 
-    if(commPatternCell_1st(level)%nPeriodic > 0) &
-         call correctPeriodicGridVel(level, sps,                           &
-         commPatternCell_1st(level)%nPeriodic, &
-         commPatternCell_1st(level)%periodicData)
+                jj = jj + 3
+            end do
 
-    ! Complete the nonblocking sends.
+        end do completeRecvs
 
-    size = commPatternCell_1st(level)%nProcSend
-    do i=1,commPatternCell_1st(level)%nProcSend
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-    enddo
+        ! Correct the periodic halo's of the external communication
+        ! pattern, if present.
 
-  end subroutine exchangeCellGridVelocities
+        if (commPatternCell_1st(level)%nPeriodic > 0) &
+            call correctPeriodicGridVel(level, sps, &
+                                        commPatternCell_1st(level)%nPeriodic, &
+                                        commPatternCell_1st(level)%periodicData)
 
-  !      ==================================================================
+        ! Complete the nonblocking sends.
 
-  subroutine correctPeriodicGridVel(level, sps, nPeriodic, &
-       periodicData)
-    !
-    !       correctPeriodicGridVel applies the periodic transformation
-    !       to the grid velocities of the cell halo's in periodicData.
-    !
-    use block
-    use communication
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps, nPeriodic
-    type(periodicDataType), dimension(:), pointer :: periodicData
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, ii, i, j, k
-    real(kind=realType)   :: vx, vy, vz
+        size = commPatternCell_1st(level)%nProcSend
+        do i = 1, commPatternCell_1st(level)%nProcSend
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+        end do
 
-    real(kind=realType), dimension(3,3) :: rotMatrix
+    end subroutine exchangeCellGridVelocities
 
-    ! Loop over the number of periodic transformations.
+    !      ==================================================================
 
-    do nn=1,nPeriodic
+    subroutine correctPeriodicGridVel(level, sps, nPeriodic, &
+                                      periodicData)
+        !
+        !       correctPeriodicGridVel applies the periodic transformation
+        !       to the grid velocities of the cell halo's in periodicData.
+        !
+        use block
+        use communication
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps, nPeriodic
+        type(periodicDataType), dimension(:), pointer :: periodicData
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, ii, i, j, k
+        real(kind=realType) :: vx, vy, vz
 
-       ! Store the rotation matrix a bit easier.
+        real(kind=realType), dimension(3, 3) :: rotMatrix
 
-       rotMatrix = periodicData(nn)%rotMatrix
+        ! Loop over the number of periodic transformations.
 
-       ! Loop over the number of halo cells for this transformation.
+        do nn = 1, nPeriodic
 
-       do ii=1,periodicData(nn)%nHalos
+            ! Store the rotation matrix a bit easier.
 
-          ! Store the block and the indices a bit easier.
+            rotMatrix = periodicData(nn)%rotMatrix
 
-          mm = periodicData(nn)%block(ii)
-          i  = periodicData(nn)%indices(ii,1)
-          j  = periodicData(nn)%indices(ii,2)
-          k  = periodicData(nn)%indices(ii,3)
+            ! Loop over the number of halo cells for this transformation.
 
-          ! Check if the grid velocities have been allocated.
+            do ii = 1, periodicData(nn)%nHalos
 
-          if( flowDoms(mm,level,sps)%addGridVelocities ) then
+                ! Store the block and the indices a bit easier.
 
-             ! Store the original velocities in vx, vy, vz.
+                mm = periodicData(nn)%block(ii)
+                i = periodicData(nn)%indices(ii, 1)
+                j = periodicData(nn)%indices(ii, 2)
+                k = periodicData(nn)%indices(ii, 3)
 
-             vx = flowDoms(mm,level,sps)%s(i,j,k,1)
-             vy = flowDoms(mm,level,sps)%s(i,j,k,2)
-             vz = flowDoms(mm,level,sps)%s(i,j,k,3)
+                ! Check if the grid velocities have been allocated.
 
-             ! Compute the new velocity vector.
+                if (flowDoms(mm, level, sps)%addGridVelocities) then
 
-             flowDoms(mm,level,sps)%s(i,j,k,1) = rotMatrix(1,1)*vx &
-                  + rotMatrix(1,2)*vy &
-                  + rotMatrix(1,3)*vz
-             flowDoms(mm,level,sps)%s(i,j,k,2) = rotMatrix(2,1)*vx &
-                  + rotMatrix(2,2)*vy &
-                  + rotMatrix(2,3)*vz
-             flowDoms(mm,level,sps)%s(i,j,k,3) = rotMatrix(3,1)*vx &
-                  + rotMatrix(3,2)*vy &
-                  + rotMatrix(3,3)*vz
+                    ! Store the original velocities in vx, vy, vz.
 
-          endif
-       enddo
+                    vx = flowDoms(mm, level, sps)%s(i, j, k, 1)
+                    vy = flowDoms(mm, level, sps)%s(i, j, k, 2)
+                    vz = flowDoms(mm, level, sps)%s(i, j, k, 3)
 
-    enddo
+                    ! Compute the new velocity vector.
 
-  end subroutine correctPeriodicGridVel
+                    flowDoms(mm, level, sps)%s(i, j, k, 1) = rotMatrix(1, 1) * vx &
+                                                             + rotMatrix(1, 2) * vy &
+                                                             + rotMatrix(1, 3) * vz
+                    flowDoms(mm, level, sps)%s(i, j, k, 2) = rotMatrix(2, 1) * vx &
+                                                             + rotMatrix(2, 2) * vy &
+                                                             + rotMatrix(2, 3) * vz
+                    flowDoms(mm, level, sps)%s(i, j, k, 3) = rotMatrix(3, 1) * vx &
+                                                             + rotMatrix(3, 2) * vy &
+                                                             + rotMatrix(3, 3) * vz
 
+                end if
+            end do
 
+        end do
 
-  !      ==================================================================
+    end subroutine correctPeriodicGridVel
 
+    !      ==================================================================
 
 #endif
 end module solverUtils
diff --git a/src/solver/solvers.F90 b/src/solver/solvers.F90
index aa36284d4..40d254bbb 100644
--- a/src/solver/solvers.F90
+++ b/src/solver/solvers.F90
@@ -1,1935 +1,1927 @@
 module solvers
 contains
 
-  subroutine solver
-    !
-    !       solver is the main subroutine of the solver library.
-    !       It controls the full multigrid and the kill signals.
-    !
-    use constants
-    use communication, only : myid
-    use inputDiscretization, only : eulerWallBCTreatment
-    use inputIteration, only: mgStartLevel, printIterations
-    use inputPhysics, only : equationMode
-    use inputUnsteady, only : timeIntegrationScheme
-    use killSignals, only : localSignal, noSignal
-    use iteration, only : changing_grid, currentLevel, exchangePressureEarly, &
-         groundLevel, nOldSolAvail, t0Solver
-    use monitor, only : timeUnsteady
-    use section, only : nSections
-    use utils, only : eulerWallsPresent
-    use multiGrid, only : transferToFineGrid
-    use partitioning, only : updateCoorFineMesh
-    use commonFormats, only : stringInt1
-    implicit none
-    !
-    !      Local variables.
-    !
-    real(kind=realType), dimension(nSections) :: dtAdvance
-
-    ! If the normal momentum equation should be used to determine
-    ! the pressure in the halo for inviscid walls, find out if there
-    ! actually are inviscid walls. If so, set the logical
-    ! exchangePressureEarly to .true.; otherwise set it to .false.
-
-    if(eulerWallBcTreatment == normalMomentum) then
-       exchangePressureEarly = EulerWallsPresent()
-    else
-       exchangePressureEarly = .false.
-    endif
-
-    ! Connect the kill signals with the appropriate functions.
-    ! Initialize localSignal for safety.
-    ! Only if signalling is supported.
+    subroutine solver
+        !
+        !       solver is the main subroutine of the solver library.
+        !       It controls the full multigrid and the kill signals.
+        !
+        use constants
+        use communication, only: myid
+        use inputDiscretization, only: eulerWallBCTreatment
+        use inputIteration, only: mgStartLevel, printIterations
+        use inputPhysics, only: equationMode
+        use inputUnsteady, only: timeIntegrationScheme
+        use killSignals, only: localSignal, noSignal
+        use iteration, only: changing_grid, currentLevel, exchangePressureEarly, &
+                             groundLevel, nOldSolAvail, t0Solver
+        use monitor, only: timeUnsteady
+        use section, only: nSections
+        use utils, only: eulerWallsPresent
+        use multiGrid, only: transferToFineGrid
+        use partitioning, only: updateCoorFineMesh
+        use commonFormats, only: stringInt1
+        implicit none
+        !
+        !      Local variables.
+        !
+        real(kind=realType), dimension(nSections) :: dtAdvance
+
+        ! If the normal momentum equation should be used to determine
+        ! the pressure in the halo for inviscid walls, find out if there
+        ! actually are inviscid walls. If so, set the logical
+        ! exchangePressureEarly to .true.; otherwise set it to .false.
+
+        if (eulerWallBcTreatment == normalMomentum) then
+            exchangePressureEarly = EulerWallsPresent()
+        else
+            exchangePressureEarly = .false.
+        end if
+
+        ! Connect the kill signals with the appropriate functions.
+        ! Initialize localSignal for safety.
+        ! Only if signalling is supported.
 
 #ifndef USE_NO_SIGNALS
-    localSignal = noSignal
-    call connect_signals
+        localSignal = noSignal
+        call connect_signals
 #endif
 
-    ! Determine the reference time for the solver.
+        ! Determine the reference time for the solver.
 
-    t0Solver = mpi_wtime()
+        t0Solver = mpi_wtime()
 
-    ! Set timeUnsteady to zero; this is amount of time simulated
-    ! in unsteady mode.
+        ! Set timeUnsteady to zero; this is amount of time simulated
+        ! in unsteady mode.
 
-    timeUnsteady = zero
+        timeUnsteady = zero
 
-    ! Loop over the number of grid levels in the current computation.
-    ! Note that the counter variable, groundLevel, is defined in
-    ! the module iteration.
+        ! Loop over the number of grid levels in the current computation.
+        ! Note that the counter variable, groundLevel, is defined in
+        ! the module iteration.
 
-    do groundLevel=mgStartlevel,1,-1
+        do groundLevel = mgStartlevel, 1, -1
 
-       ! Solve either the steady or the unsteady equations for this
-       ! grid level. The time spectral method can be considered as
-       ! a special kind of steady mode.
-       select case (equationMode)
-       case (steady, timeSpectral)
-          call solveState
-       case (unsteady)
-          select case (timeIntegrationScheme)
-          case (explicitRK)
-             call solverUnsteadyExplicitRK
-          end select
-       end select
+            ! Solve either the steady or the unsteady equations for this
+            ! grid level. The time spectral method can be considered as
+            ! a special kind of steady mode.
+            select case (equationMode)
+            case (steady, timeSpectral)
+                call solveState
+            case (unsteady)
+                select case (timeIntegrationScheme)
+                case (explicitRK)
+                    call solverUnsteadyExplicitRK
+                end select
+            end select
 
-       ! If this is not the finest grid level, interpolate the
-       ! solution to the next finer mesh and write a message that
-       ! this is happening. Only processor 0 performs this latter
-       ! task.
+            ! If this is not the finest grid level, interpolate the
+            ! solution to the next finer mesh and write a message that
+            ! this is happening. Only processor 0 performs this latter
+            ! task.
 
-       if(groundLevel > 1) then
+            if (groundLevel > 1) then
 
-          currentLevel = groundLevel - 1
+                currentLevel = groundLevel - 1
 
-          if(myID == 0) then
-             if (printIterations) then
-                print "(a)", "#"
-                print stringInt1, "# Going down to grid level ", currentLevel
-                print "(a)", "#"
-             end if
-          endif
-
-          call transferToFineGrid(.false.)
-
-          ! Move the coordinates of the new fine grid level into the
-          ! correct position. Only for unsteady problems with changing
-          ! meshes. Note that the first argument of updateCoorFineMesh
-          ! is an array with the time step for section.
-
-          if(equationMode == unsteady .and. changing_Grid) then
-             dtAdvance = timeUnsteady
-             call updateCoorFineMesh(dtAdvance, 1_intType)
-          endif
-
-          ! Reset nOldSolAvail to 1, such that the unsteady
-          ! computation on the finer mesh starts with a lower
-          ! order scheme.
-
-          nOldSolAvail = 1
-       endif
-
-    enddo
-
-    ! Explictly set groundlevel to 1
-    groundLevel = 1
-
-  end subroutine solver
-
-  ! ================================================
-  ! Utilities for unsteady simulation
-  ! ================================================
-  subroutine solverUnsteadyInit
-    !
-    !       Initialize variables related to unsteady simulation.
-    !       Some are the same as those in the *solver* subroutine,
-    !       while others are specific for unsteady problems.
-    !
-    use ALEUtils, only : fillCoor, setLevelALE
-    use constants
-    use inputDiscretization, only : eulerWallBCTreatment
-    use iteration, only : exchangePressureEarly, t0Solver
-    use killSignals, only : localSignal, noSignal
-    use monitor, only : timeUnsteady, timeStepUnsteady, writeVolume, writeSurface, writeGrid
-    use utils, only : eulerWallsPresent
-    implicit none
-
-    ! BC treatment for normal momentum equation
-    if(eulerWallBcTreatment == normalMomentum) then
-       exchangePressureEarly = EulerWallsPresent()
-    else
-       exchangePressureEarly = .false.
-    endif
-
-    ! Connect the kill signals
+                if (myID == 0) then
+                    if (printIterations) then
+                        print "(a)", "#"
+                        print stringInt1, "# Going down to grid level ", currentLevel
+                        print "(a)", "#"
+                    end if
+                end if
+
+                call transferToFineGrid(.false.)
+
+                ! Move the coordinates of the new fine grid level into the
+                ! correct position. Only for unsteady problems with changing
+                ! meshes. Note that the first argument of updateCoorFineMesh
+                ! is an array with the time step for section.
+
+                if (equationMode == unsteady .and. changing_Grid) then
+                    dtAdvance = timeUnsteady
+                    call updateCoorFineMesh(dtAdvance, 1_intType)
+                end if
+
+                ! Reset nOldSolAvail to 1, such that the unsteady
+                ! computation on the finer mesh starts with a lower
+                ! order scheme.
+
+                nOldSolAvail = 1
+            end if
+
+        end do
+
+        ! Explictly set groundlevel to 1
+        groundLevel = 1
+
+    end subroutine solver
+
+    ! ================================================
+    ! Utilities for unsteady simulation
+    ! ================================================
+    subroutine solverUnsteadyInit
+        !
+        !       Initialize variables related to unsteady simulation.
+        !       Some are the same as those in the *solver* subroutine,
+        !       while others are specific for unsteady problems.
+        !
+        use ALEUtils, only: fillCoor, setLevelALE
+        use constants
+        use inputDiscretization, only: eulerWallBCTreatment
+        use iteration, only: exchangePressureEarly, t0Solver
+        use killSignals, only: localSignal, noSignal
+        use monitor, only: timeUnsteady, timeStepUnsteady, writeVolume, writeSurface, writeGrid
+        use utils, only: eulerWallsPresent
+        implicit none
+
+        ! BC treatment for normal momentum equation
+        if (eulerWallBcTreatment == normalMomentum) then
+            exchangePressureEarly = EulerWallsPresent()
+        else
+            exchangePressureEarly = .false.
+        end if
+
+        ! Connect the kill signals
 #ifndef USE_NO_SIGNALS
-    localSignal = noSignal
-    call connect_signals
+        localSignal = noSignal
+        call connect_signals
 #endif
 
-    ! Determine the reference time for the solver.
-    t0Solver = mpi_wtime()
-
-    ! Set time to zero
-    timeUnsteady = zero
-    timeStepUnsteady = 0
-
-    ! Fill up old, xold and volold
-    call fillCoor
-
-    ! Set all ALE levels by initial configuration
-    call setLevelALE(-1_intType)
-  end subroutine solverUnsteadyInit
-
-  subroutine updateUnsteadyGeometry
-    !
-    !       Update quantities related to geometry due to modification of mesh
-    !       That could happen when
-    !       - Steady mode with mesh modification
-    !       - Unsteady mode with non-moving mesh but with prescribed mesh motion
-    !       - Unsteady mode with warping and/or rigidly moving mesh
-    !       - Unsteady mode coupled with an external solver
-    !
-    use ALEUtils, only : storeCoor, interpCoor, recoverCoor, setLevelALE, &
-         slipVelocitiesFineLevel_ALE
-    use bcdata, only : setbcdataFineGrid, setBCDataCoarseGrid
-    use constants
-    use inputMotion, only : gridMotionSpecified
-    use inputUnsteady, only : deltaT, updateWallDistanceUnsteady, useALE
-    use iteration, only : changing_grid, deforming_grid, currentLevel, groundLevel, nALEMeshes
-    use monitor, only : timeUnsteady, timeUnsteadyRestart
-    use partitioning, only : updateCoorFineMesh
-    use preprocessingAPI, only : shiftCoorAndVolumes, metric, &
-         updateCoordinatesAllLevels, updateMetricsAllLevels, faceRotationMatrices
-    use section, only : nSections
-    use solverUtils, only : gridVelocitiesFineLevel, gridVelocitiesCoarseLevels, &
-         gridvelocitiesfinelevelpart1, gridvelocitiesfinelevelpart2, &
-         normalVelocitiesAllLevels, slipVelocitiesFineLevel, slipVelocitiesCoarseLevels
-    use wallDistance, only : updateWallDistanceAllLevels
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-    real(kind=realType), dimension(nSections) :: tNewSec, deltaTSec
-    integer(kind=intType) :: lale
-
-    testChanging: if(changing_Grid .or. gridMotionSpecified) then
-
-       ! Set the new time for all sections
-       do nn=1,nSections
-          tNewSec(nn)   = timeUnsteady + timeUnsteadyRestart
-          deltaTSec(nn) = deltaT
-       enddo
-
-       ! For prescribed motion only
-       if (gridMotionSpecified) &
-            call updateCoorFineMesh(deltaTSec, 1_intType)
+        ! Determine the reference time for the solver.
+        t0Solver = mpi_wtime()
+
+        ! Set time to zero
+        timeUnsteady = zero
+        timeStepUnsteady = 0
+
+        ! Fill up old, xold and volold
+        call fillCoor
+
+        ! Set all ALE levels by initial configuration
+        call setLevelALE(-1_intType)
+    end subroutine solverUnsteadyInit
+
+    subroutine updateUnsteadyGeometry
+        !
+        !       Update quantities related to geometry due to modification of mesh
+        !       That could happen when
+        !       - Steady mode with mesh modification
+        !       - Unsteady mode with non-moving mesh but with prescribed mesh motion
+        !       - Unsteady mode with warping and/or rigidly moving mesh
+        !       - Unsteady mode coupled with an external solver
+        !
+        use ALEUtils, only: storeCoor, interpCoor, recoverCoor, setLevelALE, &
+                            slipVelocitiesFineLevel_ALE
+        use bcdata, only: setbcdataFineGrid, setBCDataCoarseGrid
+        use constants
+        use inputMotion, only: gridMotionSpecified
+        use inputUnsteady, only: deltaT, updateWallDistanceUnsteady, useALE
+        use iteration, only: changing_grid, deforming_grid, currentLevel, groundLevel, nALEMeshes
+        use monitor, only: timeUnsteady, timeUnsteadyRestart
+        use partitioning, only: updateCoorFineMesh
+        use preprocessingAPI, only: shiftCoorAndVolumes, metric, &
+                                    updateCoordinatesAllLevels, updateMetricsAllLevels, faceRotationMatrices
+        use section, only: nSections
+        use solverUtils, only: gridVelocitiesFineLevel, gridVelocitiesCoarseLevels, &
+                               gridvelocitiesfinelevelpart1, gridvelocitiesfinelevelpart2, &
+                               normalVelocitiesAllLevels, slipVelocitiesFineLevel, slipVelocitiesCoarseLevels
+        use wallDistance, only: updateWallDistanceAllLevels
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+        real(kind=realType), dimension(nSections) :: tNewSec, deltaTSec
+        integer(kind=intType) :: lale
+
+        testChanging: if (changing_Grid .or. gridMotionSpecified) then
+
+            ! Set the new time for all sections
+            do nn = 1, nSections
+                tNewSec(nn) = timeUnsteady + timeUnsteadyRestart
+                deltaTSec(nn) = deltaT
+            end do
+
+            ! For prescribed motion only
+            if (gridMotionSpecified) &
+                call updateCoorFineMesh(deltaTSec, 1_intType)
+
+            ! Adapt the geometric info on all grid levels needed for the
+            ! current ground level and multigrid cycle.
+            ! The wall distance only needs to be recomputed when the
+            ! grid is changing; not when a rigid body motion is
+            ! specified. Furthermore, the user can choose not to update
+            ! the wall distance, because he may know a priori that the
+            ! changes in geometry happen quite far away from the boundary
+            ! layer. This is accomplished via updateWallDistanceUnsteady.
 
-       ! Adapt the geometric info on all grid levels needed for the
-       ! current ground level and multigrid cycle.
-       ! The wall distance only needs to be recomputed when the
-       ! grid is changing; not when a rigid body motion is
-       ! specified. Furthermore, the user can choose not to update
-       ! the wall distance, because he may know a priori that the
-       ! changes in geometry happen quite far away from the boundary
-       ! layer. This is accomplished via updateWallDistanceUnsteady.
-
-       call updateCoordinatesAllLevels
-       if (changing_Grid .and. updateWallDistanceUnsteady) &
-            call updateWallDistanceAllLevels
-       call updateMetricsAllLevels
-
-       ! Update the rotation matrices of the faces. Only needed
-       ! on the finest grid level.
-       ! For prescribed motion only
-
-       if (gridMotionSpecified) &
-            call faceRotationMatrices(currentLevel, .false.)
+            call updateCoordinatesAllLevels
+            if (changing_Grid .and. updateWallDistanceUnsteady) &
+                call updateWallDistanceAllLevels
+            call updateMetricsAllLevels
+
+            ! Update the rotation matrices of the faces. Only needed
+            ! on the finest grid level.
+            ! For prescribed motion only
 
-       if (useALE) then
-          ! Update the velocities using ALE scheme if moving mesh is present
+            if (gridMotionSpecified) &
+                call faceRotationMatrices(currentLevel, .false.)
 
-          ! First update cell and surface velocity, both are vectors
-          ! Only quantities in blocks are updated, and they will not
-          ! be interpolated
+            if (useALE) then
+                ! Update the velocities using ALE scheme if moving mesh is present
 
-          call gridVelocitiesFineLevelPart1(deforming_Grid, tNewSec, 1_intType)
+                ! First update cell and surface velocity, both are vectors
+                ! Only quantities in blocks are updated, and they will not
+                ! be interpolated
 
-          ! Secondly store x to a temporary variable xALE
+                call gridVelocitiesFineLevelPart1(deforming_Grid, tNewSec, 1_intType)
 
-          call storeCoor
+                ! Secondly store x to a temporary variable xALE
 
-          ! Thirdly update surface normal and normal velocity
+                call storeCoor
 
-          ALEloop : do lale = 1, nALEMeshes
-             ! Interpolate mesh over latest time step for all ALE Meshes
-             call interpCoor(lale)
+                ! Thirdly update surface normal and normal velocity
 
-             ! Update s[I,J,K], norm
-             call metric(groundLevel)
+                ALEloop: do lale = 1, nALEMeshes
+                    ! Interpolate mesh over latest time step for all ALE Meshes
+                    call interpCoor(lale)
 
-             ! Update sFace[I,J,K]
-             call gridVelocitiesFineLevelPart2(deforming_Grid, tNewSec, 1_intType)
+                    ! Update s[I,J,K], norm
+                    call metric(groundLevel)
+
+                    ! Update sFace[I,J,K]
+                    call gridVelocitiesFineLevelPart2(deforming_Grid, tNewSec, 1_intType)
+
+                    ! Update uSlip
+                    call slipVelocitiesFineLevel_ALE(deforming_Grid, tNewSec, 1_intType)
+
+                    ! Update coarse level quantities to make sure multigrid is working
+                    call gridVelocitiesCoarseLevels(1_intType)
+                    call slipVelocitiesCoarseLevels(1_intType)
+
+                    ! Update rFace
+                    call normalVelocitiesAllLevels(1_intType)
+
+                    ! Store data to *lale* ALE level
+                    call setLevelALE(lale)
+                end do ALEloop
+
+                ! Lastly recover x from temporary variable
+                ! Then compute data for current level
+
+                call recoverCoor
+
+                ! Finish the rest of the update
+                call metric(groundLevel)
+                call gridVelocitiesFineLevelPart2(deforming_Grid, tNewSec, 1_intType)
+                call slipVelocitiesFineLevel_ALE(deforming_Grid, tNewSec, 1_intType)
 
-             ! Update uSlip
-             call slipVelocitiesFineLevel_ALE(deforming_Grid, tNewSec, 1_intType)
+            else
+                ! Otherwise update the velocities naively
 
-             ! Update coarse level quantities to make sure multigrid is working
-             call gridVelocitiesCoarseLevels(1_intType)
-             call slipVelocitiesCoarseLevels(1_intType)
+                ! Determine the velocities of the cell centers and faces
+                ! for the current ground level. Note that the spectral mode
+                ! is always 1 for unsteady mode.
 
-             ! Update rFace
-             call normalVelocitiesAllLevels(1_intType)
+                call gridVelocitiesFineLevel(deforming_Grid, tNewSec, 1_intType)
 
-             ! Store data to *lale* ALE level
-             call setLevelALE(lale)
-          enddo ALEloop
+                ! Determine the new slip velocities on the viscous walls.
 
-          ! Lastly recover x from temporary variable
-          ! Then compute data for current level
+                call slipVelocitiesFineLevel(deforming_Grid, tNewSec, 1_intType)
+            end if
 
-          call recoverCoor
+            ! After velocity computations on finest level are done,
+            ! Update those on coarser levels
 
-          ! Finish the rest of the update
-          call metric(groundLevel)
-          call gridVelocitiesFineLevelPart2(deforming_Grid, tNewSec, 1_intType)
-          call slipVelocitiesFineLevel_ALE(deforming_Grid, tNewSec, 1_intType)
+            call gridVelocitiesCoarseLevels(1_intType)
+            call slipVelocitiesCoarseLevels(1_intType)
 
-       else
-          ! Otherwise update the velocities naively
+            ! Compute the normal velocities of the boundaries, if
+            ! needed for the corresponding boundary condition.
 
-          ! Determine the velocities of the cell centers and faces
-          ! for the current ground level. Note that the spectral mode
-          ! is always 1 for unsteady mode.
+            call normalVelocitiesAllLevels(1_intType)
 
-          call gridVelocitiesFineLevel(deforming_Grid, tNewSec, 1_intType)
+            ! Determine the prescribed boundary condition data for the
+            ! boundary subfaces. First for the (currently) finest grid
+            ! and then iteratively for the coarser grid levels.
 
-          ! Determine the new slip velocities on the viscous walls.
+            call setBCDataFineGrid(.false.)
+            call setBCDataCoarseGrid
 
-          call slipVelocitiesFineLevel(deforming_Grid, tNewSec, 1_intType)
-       endif
+        end if testChanging
 
-       ! After velocity computations on finest level are done,
-       ! Update those on coarser levels
+    end subroutine updateUnsteadyGeometry
 
-       call gridVelocitiesCoarseLevels(1_intType)
-       call slipVelocitiesCoarseLevels(1_intType)
+    subroutine solverUnsteadyStep
+        !
+        !       Solve for next time step in unsteady simulation
+        !
+        use iteration, only: nOldSolAvail
+        use solverUtils, only: shiftsolution
+        use utils, only: setCoefTimeIntegrator
+        use wallDistance, only: updateWallDistanceAllLevels
+        implicit none
 
-       ! Compute the normal velocities of the boundaries, if
-       ! needed for the corresponding boundary condition.
+        ! Shift the old solution for the new time step.
 
-       call normalVelocitiesAllLevels(1_intType)
+        call shiftSolution
 
-       ! Determine the prescribed boundary condition data for the
-       ! boundary subfaces. First for the (currently) finest grid
-       ! and then iteratively for the coarser grid levels.
+        ! Set the coefficients for the time integrator.
 
-       call setBCDataFineGrid(.false.)
-       call setBCDataCoarseGrid
+        call setCoefTimeIntegrator
 
-    endif testChanging
+        ! Solve the state for the current time step and
+        ! update nOldSolAvail.
+        call solveState
+        nOldSolAvail = nOldSolAvail + 1
 
-  end subroutine updateUnsteadyGeometry
+    end subroutine solverUnsteadyStep
 
-  subroutine solverUnsteadyStep
-    !
-    !       Solve for next time step in unsteady simulation
-    !
-    use iteration, only : nOldSolAvail
-    use solverUtils, only : shiftsolution
-    use utils, only : setCoefTimeIntegrator
-    use wallDistance, only : updateWallDistanceAllLevels
-    implicit none
+    ! ================================================
+    ! The following are not interfaced with Python
+    ! ================================================
 
-    ! Shift the old solution for the new time step.
+    subroutine checkWriteUnsteadyInLoop
+        !
+        !       checkWriteUnsteadyInLoop checks if a solution must be
+        !       written inside the time loop and if so write it.
+        !
+        use communication, only: ADflow_comm_world
+        use constants
+        use inputIO, only: liftDistributionFile, sliceSolFile
+        use inputIteration, only: nSaveSurface, nSaveVolume
+        use inputMotion, only: gridMotionSpecified
+        use iteration, only: changing_grid, groundLevel, nOldLevels, oldSolWritten
+        use killSignals, only: localSignal, globalSignal, signalWrite, signalWriteQuit
+        use monitor, only: nTimeStepsRestart, timeStepUnsteady, timeUnsteadyRestart, &
+                           writeVolume, writeSurface, writeGrid
+        use tecplotIO, only: writeLiftDistributionFile, writeSlicesFile
+        use surfaceFamilies, only: fullFamList
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
 
-    call shiftSolution
+        integer(kind=intType) :: nn
+        character(len=7) :: intString
 
-    ! Set the coefficients for the time integrator.
+        ! Determine the global kill parameter if signals are supported.
 
-    call setCoefTimeIntegrator
+#ifndef USE_NO_SIGNALS
+        call mpi_allreduce(localSignal, globalSignal, 1, adflow_integer, &
+                           mpi_max, ADflow_comm_world, ierr)
+#endif
 
-    ! Solve the state for the current time step and
-    ! update nOldSolAvail.
-    call solveState
-    nOldSolAvail = nOldSolAvail + 1
+        ! Initialize the logicals for the writing to .false.
 
-  end subroutine solverUnsteadyStep
+        writeVolume = .false.
+        writeSurface = .false.
+        writeGrid = .false.
 
+        ! Check whether a solution file, either volume or surface, must
+        ! be written. Only on the finest grid level in stand alone mode.
 
-  ! ================================================
-  ! The following are not interfaced with Python
-  ! ================================================
+        !if(standAloneMode .and. groundLevel == 1) then
+        if (groundLevel == 1) then
+            if (mod(timeStepUnsteady, nSaveVolume) == 0) &
+                writeVolume = .true.
+            if (mod(timeStepUnsteady, nSaveSurface) == 0) &
+                writeSurface = .true.
 
-  subroutine checkWriteUnsteadyInLoop
-    !
-    !       checkWriteUnsteadyInLoop checks if a solution must be
-    !       written inside the time loop and if so write it.
-    !
-    use communication, only : ADflow_comm_world
-    use constants
-    use inputIO, only : liftDistributionFile, sliceSolFile
-    use inputIteration, only : nSaveSurface, nSaveVolume
-    use inputMotion, only : gridMotionSpecified
-    use iteration, only : changing_grid, groundLevel, nOldLevels, oldSolWritten
-    use killSignals, only : localSignal, globalSignal, signalWrite, signalWriteQuit
-    use monitor, only : nTimeStepsRestart, timeStepUnsteady, timeUnsteadyRestart, &
-         writeVolume, writeSurface, writeGrid
-    use tecplotIO, only : writeLiftDistributionFile, writeSlicesFile
-    use surfaceFamilies, only : fullFamList
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
+            if (globalSignal == signalWrite .or. &
+                globalSignal == signalWriteQuit) then
+                writeVolume = .true.
+                writeSurface = .true.
+            end if
 
-    integer(kind=intType) :: nn
-    character(len=7) :: intString
+            ! Determine whether or not a grid file must be written.
 
-    ! Determine the global kill parameter if signals are supported.
+            if (changing_Grid .or. gridMotionSpecified) &
+                writeGrid = writeVolume
 
-#ifndef USE_NO_SIGNALS
-    call mpi_allreduce(localSignal, globalSignal, 1, adflow_integer, &
-         mpi_max, ADflow_comm_world, ierr)
-#endif
+            ! Write the solution.
 
-    ! Initialize the logicals for the writing to .false.
+            if (writeGrid .or. writeVolume .or. writeSurface) &
+                call writeSol(fullFamList, size(fullFamList))
 
-    writeVolume  = .false.
-    writeSurface = .false.
-    writeGrid    = .false.
+            ! Write the slice files for this timestep if they have been specified. TEMPORARY
+            ! Write lift distribution TEMPORARY
+            write (intString, "(i4.4)") timeStepUnsteady + nTimeStepsRestart
+            intString = adjustl(intString)
+            !call writeSlicesFile(trim(slicesolfile)//"_Timestep"//trim(intString)//".dat", .True.)
+            !call writeLiftDistributionFile(trim(liftDistributionFile)//"_Timestep"//trim(intString)//".dat", .True.)
+
+        end if
+
+        ! Update the variable oldSolWritten.
+
+        do nn = (nOldLevels - 1), 2, -1
+            oldSolWritten(nn) = oldSolWritten(nn - 1)
+        end do
+
+        oldSolWritten(1) = writeVolume
+
+    end subroutine checkWriteUnsteadyInLoop
+
+    !      ==================================================================
+
+    subroutine checkWriteUnsteadyEndLoop
+        !
+        !       checkWriteUnsteadyEndLoop checks if a solution must be
+        !       written at the end of the time loop and if so write it.
+        !
+        use constants
+        use inputMotion, only: gridMotionSpecified
+        use iteration, only: changing_grid, groundLevel, nOldLevels, &
+                             oldSolWritten, standAloneMode
+        use monitor, only: writeVolume, writeSurface, writeGrid
+        use surfaceFamilies, only: fullFamList
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
 
-    ! Check whether a solution file, either volume or surface, must
-    ! be written. Only on the finest grid level in stand alone mode.
+        ! Write a solution. Only on the finest grid in stand alone mode
+        ! and if some time steps have been performed since the last write.
 
-    !if(standAloneMode .and. groundLevel == 1) then
-    if (groundLevel == 1) then
-       if(mod(timeStepUnsteady, nSaveVolume) == 0)  &
-            writeVolume  = .true.
-       if(mod(timeStepUnsteady, nSaveSurface) == 0) &
-            writeSurface = .true.
+        if (standAloneMode .and. groundLevel == 1) then
 
-       if(globalSignal == signalWrite .or. &
-            globalSignal == signalWriteQuit) then
-          writeVolume  = .true.
-          writeSurface = .true.
-       endif
+            writeVolume = .not. writeVolume
+            writeSurface = .not. writeSurface
 
-       ! Determine whether or not a grid file must be written.
+            if (writeVolume .or. writeSurface) then
 
-       if(changing_Grid .or. gridMotionSpecified) &
-            writeGrid = writeVolume
+                ! Determine whether or not a grid file must be written.
 
-       ! Write the solution.
+                writeGrid = .false.
+                if (changing_Grid .or. gridMotionSpecified) &
+                    writeGrid = writeVolume
 
-       if(writeGrid .or. writeVolume .or. writeSurface) &
-            call writeSol(fullFamList, size(fullFamList))
+                ! Write the solution.
 
-       ! Write the slice files for this timestep if they have been specified. TEMPORARY
-       ! Write lift distribution TEMPORARY
-       write(intString,"(i4.4)") timeStepUnsteady + nTimeStepsRestart
-       intString = adjustl(intString)
-       !call writeSlicesFile(trim(slicesolfile)//"_Timestep"//trim(intString)//".dat", .True.)
-       !call writeLiftDistributionFile(trim(liftDistributionFile)//"_Timestep"//trim(intString)//".dat", .True.)
-
-    endif
-
-    ! Update the variable oldSolWritten.
-
-    do nn=(nOldLevels-1),2,-1
-       oldSolWritten(nn) = oldSolWritten(nn-1)
-    enddo
-
-    oldSolWritten(1) = writeVolume
-
-  end subroutine checkWriteUnsteadyInLoop
-
-  !      ==================================================================
-
-  subroutine checkWriteUnsteadyEndLoop
-    !
-    !       checkWriteUnsteadyEndLoop checks if a solution must be
-    !       written at the end of the time loop and if so write it.
-    !
-    use constants
-    use inputMotion, only : gridMotionSpecified
-    use iteration, only : changing_grid, groundLevel, nOldLevels, &
-         oldSolWritten, standAloneMode
-    use monitor, only : writeVolume, writeSurface, writeGrid
-    use surfaceFamilies, only : fullFamList
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
+                call writeSol(fullFamList, size(fullFamList))
 
-    ! Write a solution. Only on the finest grid in stand alone mode
-    ! and if some time steps have been performed since the last write.
+                ! Update the variable oldSolWritten.
 
-    if(standAloneMode .and. groundLevel == 1) then
+                do nn = (nOldLevels - 1), 2, -1
+                    oldSolWritten(nn) = oldSolWritten(nn - 1)
+                end do
 
-       writeVolume  = .not. writeVolume
-       writeSurface = .not. writeSurface
+                oldSolWritten(1) = writeVolume
 
-       if(writeVolume .or. writeSurface) then
+            end if
 
-          ! Determine whether or not a grid file must be written.
+        end if
 
-          writeGrid = .false.
-          if(changing_Grid .or. gridMotionSpecified) &
-               writeGrid = writeVolume
+    end subroutine checkWriteUnsteadyEndLoop
+
+    ! ================================================
+    ! Utilities for explicit RK solver
+    ! ================================================
+
+    subroutine solverUnsteadyExplicitRK
+        !
+        !       solverUnsteadyExplicitRK solves the unsteady equations using
+        !       the explicit Runge-Kutta schemes for the multigrid level
+        !       groundLevel.
+        !
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, vol, dw, w, dwOldRK, p
+        use communication
+        use flowVarRefState
+        use inputPhysics
+        use inputUnsteady
+        use iteration
+        use killSignals
+        use monitor
+        use utils, only: setPointers
+        use flowUtils, only: computePressure
+        use haloExchange, only: whalo1, whalo2
+        use turbutils, only: computeeddyviscosity
+        use turbAPI, only: turbResidual
+        use turbBCRoutines, only: applyAllTurbBC
+        use utils, only: convergenceHeader
+        use residuals, only: initRes, residual, sourceTerms
+        use flowUtils, only: computeLamViscosity
+        use BCRoutines, only: applyAllBC
+        use preprocessingAPI, only: updateCoordinatesAllLevels
+        implicit none
+        !
+        !      Local parameter.
+        !
+        integer(kind=intType), parameter :: nWriteConvHeader = 50
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: iter, nTimeSteps, stage
+        integer(kind=intType) :: i, j, k, l, m, nn
+
+        real(kind=realType) :: tmp, timeUnsteadyOld
+
+        character(len=7) :: numberString
+
+        ! Set rkStage to 0. This variable is only relevant if Runge-Kutta
+        ! smoothers are used as an iterative algorithm, but rkStage needs
+        ! to be initialized.
+
+        rkStage = 0
+
+        ! Initializations of the write and signal parameters.
+
+        writeVolume = .false.
+        writeSurface = .false.
+        writeGrid = .false.
+        globalSignal = noSignal
+        localSignal = noSignal
+
+        ! Initialize timeStepUnsteady to 0 and set the number of
+        ! time steps depending on the grid level.
 
-          ! Write the solution.
+        timeStepUnsteady = 0
 
-          call writeSol(fullFamList, size(fullFamList))
+        nTimeSteps = nTimeStepsCoarse
+        if (groundLevel == 1) nTimeSteps = nTimeStepsFine
 
-          ! Update the variable oldSolWritten.
+        ! Write a message to stdout about how many time steps will
+        ! be taken on the current grid. Only processor 0 does this.
 
-          do nn=(nOldLevels-1),2,-1
-             oldSolWritten(nn) = oldSolWritten(nn-1)
-          enddo
+        if (myID == 0) then
+            write (numberString, "(i6)") nTimeSteps
+            numberString = adjustl(numberString)
 
-          oldSolWritten(1) = writeVolume
+            print "(a)", "#"
+            print "(A, I1, 3(A))", "# Grid ", groundLevel, ": Performing ", trim(numberString), &
+                " explicit Runge Kutta time steps."
+            print "(a)", "#"
+
+            ! Also write the convergence header. Technically this is
+            ! not really a convergence history for explicit RK, but
+            ! the routine is called this way.
 
-       endif
+            call convergenceHeader
+        end if
 
-    endif
+        ! Determine and write the initial convergence info. Again not
+        ! really convergence for explicit RK.
 
-  end subroutine checkWriteUnsteadyEndLoop
+        call convergenceInfo
+        !
+        !       Loop over the number of time steps to be computed.
+        !
+        ! Initialize the unsteady time.
 
-  ! ================================================
-  ! Utilities for explicit RK solver
-  ! ================================================
+        timeUnsteadyOld = timeUnsteady
+        timeUnsteady = timeUnsteadyOld + gammaRKUnsteady(1) * deltaT
 
-  subroutine solverUnsteadyExplicitRK
-    !
-    !       solverUnsteadyExplicitRK solves the unsteady equations using
-    !       the explicit Runge-Kutta schemes for the multigrid level
-    !       groundLevel.
-    !
-    use constants
-    use blockPointers, only: nDom, il, jl, kl, vol, dw, w, dwOldRK, p
-    use communication
-    use flowVarRefState
-    use inputPhysics
-    use inputUnsteady
-    use iteration
-    use killSignals
-    use monitor
-    use utils, only : setPointers
-    use flowUtils, only : computePressure
-    use haloExchange, only : whalo1, whalo2
-    use turbutils, only : computeeddyviscosity
-    use turbAPI, only : turbResidual
-    use turbBCRoutines, only : applyAllTurbBC
-    use utils, only : convergenceHeader
-    use residuals, only :initRes, residual, sourceTerms
-    use flowUtils, only : computeLamViscosity
-    use BCRoutines, only : applyAllBC
-    use preprocessingAPI, only : updateCoordinatesAllLevels
-    implicit none
-    !
-    !      Local parameter.
-    !
-    integer(kind=intType), parameter :: nWriteConvHeader = 50
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: iter, nTimeSteps, stage
-    integer(kind=intType) :: i, j, k, l, m, nn
-
-    real(kind=realType) :: tmp, timeUnsteadyOld
+        timeLoop: do iter = 1, nTimeSteps
 
-    character(len=7) :: numberString
-
-    ! Set rkStage to 0. This variable is only relevant if Runge-Kutta
-    ! smoothers are used as an iterative algorithm, but rkStage needs
-    ! to be initialized.
-
-    rkStage = 0
-
-    ! Initializations of the write and signal parameters.
-
-    writeVolume  = .false.
-    writeSurface = .false.
-    writeGrid    = .false.
-    globalSignal = noSignal
-    localSignal  = noSignal
+            ! Rewrite the convergence header after a certain number of
+            ! iterations. Only processor 0 does this.
 
-    ! Initialize timeStepUnsteady to 0 and set the number of
-    ! time steps depending on the grid level.
+            if (myID == 0 .and. mod(iter, nWriteConvHeader) == 0) &
+                call convergenceHeader
 
-    timeStepUnsteady = 0
+            ! Loop over the number of RK stages.
 
-    nTimeSteps = nTimeStepsCoarse
-    if(groundLevel == 1) nTimeSteps = nTimeStepsFine
+            stageLoop: do stage = 1, nRKStagesUnsteady
 
-    ! Write a message to stdout about how many time steps will
-    ! be taken on the current grid. Only processor 0 does this.
+                ! Compute the residual. Note that the turbulent residual must
+                ! be computed first, because it uses the array of the flow
+                ! field residuals as temporary buffers.
 
-    if(myID == 0) then
-       write(numberString, "(i6)") nTimeSteps
-       numberString = adjustl(numberString)
+                if (equations == RANSEquations) then
+                    call initres(nt1, nt2)
+                    call turbResidual
+                end if
 
-       print "(a)", "#"
-       print "(A, I1, 3(A))", "# Grid ", groundLevel,": Performing ", trim(numberString), &
-         " explicit Runge Kutta time steps."
-       print "(a)", "#"
-
-       ! Also write the convergence header. Technically this is
-       ! not really a convergence history for explicit RK, but
-       ! the routine is called this way.
-
-       call convergenceHeader
-    endif
-
-    ! Determine and write the initial convergence info. Again not
-    ! really convergence for explicit RK.
-
-    call convergenceInfo
-    !
-    !       Loop over the number of time steps to be computed.
-    !
-    ! Initialize the unsteady time.
-
-    timeUnsteadyOld = timeUnsteady
-    timeUnsteady    = timeUnsteadyOld + gammaRKUnsteady(1)*deltaT
-
-    timeLoop: do iter=1,nTimeSteps
-
-       ! Rewrite the convergence header after a certain number of
-       ! iterations. Only processor 0 does this.
-
-       if(myID == 0 .and. mod(iter,nWriteConvHeader) == 0) &
-            call convergenceHeader
+                call initres(1_intType, nwf)
+                call sourceTerms()
+                call residual
+
+                ! Loop over the number of domains.
+
+                domainLoop: do nn = 1, nDom
+
+                    ! Set the pointers to this domain. Note that there is only
+                    ! one time instance for a time accurate computation.
+
+                    call setPointers(nn, currentLevel, 1)
+
+                    ! Step 1: Store in dw the conservative update for the
+                    !         flow field variables. This means a multiplication
+                    !         with deltaT/vol. Also store in w the conservative
+                    !         flow field variables.
+
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                tmp = deltaT / vol(i, j, k)
+                                do l = 1, nw
+                                    dw(i, j, k, l) = tmp * dw(i, j, k, l)
+                                end do
+
+                                w(i, j, k, ivx) = w(i, j, k, ivx) * w(i, j, k, irho)
+                                w(i, j, k, ivy) = w(i, j, k, ivy) * w(i, j, k, irho)
+                                w(i, j, k, ivz) = w(i, j, k, ivz) * w(i, j, k, irho)
+
+                            end do
+                        end do
+                    end do
+
+                    ! Step 2: Compute the new conservative flow field variables
+                    !         and primitive turbulence variables.
+
+                    do m = 1, (stage - 1)
+                        tmp = betaRKUnsteady(stage, m)
+                        if (tmp /= zero) then
+                            do l = 1, nw
+                                do k = 2, kl
+                                    do j = 2, jl
+                                        do i = 2, il
+                                            w(i, j, k, l) = w(i, j, k, l) - tmp * dwOldRK(m, i, j, k, l)
+                                        end do
+                                    end do
+                                end do
+                            end do
+                        end if
+                    end do
+
+                    tmp = betaRKUnsteady(stage, stage)
+                    do l = 1, nw
+                        do k = 2, kl
+                            do j = 2, jl
+                                do i = 2, il
+                                    w(i, j, k, l) = w(i, j, k, l) - tmp * dw(i, j, k, l)
+                                end do
+                            end do
+                        end do
+                    end do
+
+                    ! Step 3. Convert the conservative variables back to
+                    !         primitive variables for this block. Compute the
+                    !         laminar and eddy viscosities as well.
+
+                    ! Convert the momentum into velocities.
+                    ! Also store the total energy in the pressure, such that
+                    ! the routine computePressure can be used.
+
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                tmp = one / w(i, j, k, irho)
+                                w(i, j, k, ivx) = w(i, j, k, ivx) * tmp
+                                w(i, j, k, ivy) = w(i, j, k, ivy) * tmp
+                                w(i, j, k, ivz) = w(i, j, k, ivz) * tmp
+                                p(i, j, k) = w(i, j, k, irhoE)
+                            end do
+                        end do
+                    end do
+
+                    ! Compute the pressure.
+
+                    call computePressure(2_intType, il, 2_intType, jl, &
+                                         2_intType, kl, 0_intType)
+
+                    ! Swap the pressure and total energy, because
+                    ! computePressure stores the pressure in the position of
+                    ! the total energy.
+
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
+                                tmp = p(i, j, k)
+                                p(i, j, k) = w(i, j, k, irhoE)
+                                w(i, j, k, irhoE) = tmp
+                            end do
+                        end do
+                    end do
+
+                    ! Compute the laminar and eddy viscosities.
+
+                    call computeLamViscosity(.False.)
+                    call computeEddyViscosity(.False.)
+
+                    ! Step 4. Store dw in dwOldRK if this is not the last
+                    !         RK stage.
+
+                    if (stage < nRKStagesUnsteady) then
+                        do l = 1, nw
+                            do k = 2, kl
+                                do j = 2, jl
+                                    do i = 2, il
+                                        dwOldRK(stage, i, j, k, l) = dw(i, j, k, l)
+                                    end do
+                                end do
+                            end do
+                        end do
+                    end if
+
+                end do domainLoop
+
+                ! Exchange the pressure if the pressure must be exchanged
+                ! early. Only the first halo's are needed, thus whalo1 is
+                ! called.
+
+                call whalo1(currentLevel, 1_intType, 0_intType, .true., &
+                            .false., .false.)
+
+                ! Apply the boundary conditions to all blocks.
+
+                call applyAllBC(.true.)
+                if (equations == RANSEquations) call applyAllTurbBC(.true.)
+
+                ! Exchange the halo data. As we are on the fine grid
+                ! the second halo is needed.
+
+                call whalo2(currentLevel, 1_intType, nw, .true., &
+                            .true., .true.)
+
+                ! Determine the time and initialize the geometrical and
+                ! boundary info for next stage, if needed.
+
+                if (stage < nRKStagesUnsteady) then
+                    timeUnsteady = timeUnsteadyOld &
+                                   + gammaRKUnsteady(stage + 1) * deltaT
+
+                    call initStageRK(stage + 1)
+                end if
 
-       ! Loop over the number of RK stages.
-
-       stageLoop: do stage=1,nRKStagesUnsteady
-
-          ! Compute the residual. Note that the turbulent residual must
-          ! be computed first, because it uses the array of the flow
-          ! field residuals as temporary buffers.
-
-          if(equations == RANSEquations) then
-             call initres(nt1, nt2)
-             call turbResidual
-          endif
-
-          call initres(1_intType, nwf)
-          call sourceTerms()
-          call residual
-
-          ! Loop over the number of domains.
-
-          domainLoop: do nn=1,nDom
-
-             ! Set the pointers to this domain. Note that there is only
-             ! one time instance for a time accurate computation.
-
-             call setPointers(nn,currentLevel,1)
-
-             ! Step 1: Store in dw the conservative update for the
-             !         flow field variables. This means a multiplication
-             !         with deltaT/vol. Also store in w the conservative
-             !         flow field variables.
-
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
-                      tmp = deltaT/vol(i,j,k)
-                      do l=1,nw
-                         dw(i,j,k,l) = tmp*dw(i,j,k,l)
-                      enddo
-
-                      w(i,j,k,ivx) = w(i,j,k,ivx)*w(i,j,k,irho)
-                      w(i,j,k,ivy) = w(i,j,k,ivy)*w(i,j,k,irho)
-                      w(i,j,k,ivz) = w(i,j,k,ivz)*w(i,j,k,irho)
-
-                   enddo
-                enddo
-             enddo
-
-             ! Step 2: Compute the new conservative flow field variables
-             !         and primitive turbulence variables.
-
-             do m=1,(stage-1)
-                tmp = betaRKUnsteady(stage,m)
-                if(tmp /= zero) then
-                   do l=1,nw
-                      do k=2,kl
-                         do j=2,jl
-                            do i=2,il
-                               w(i,j,k,l) = w(i,j,k,l) - tmp*dwOldRK(m,i,j,k,l)
-                            enddo
-                         enddo
-                      enddo
-                   enddo
-                endif
-             enddo
-
-             tmp = betaRKUnsteady(stage,stage)
-             do l=1,nw
-                do k=2,kl
-                   do j=2,jl
-                      do i=2,il
-                         w(i,j,k,l) = w(i,j,k,l) - tmp*dw(i,j,k,l)
-                      enddo
-                   enddo
-                enddo
-             enddo
-
-             ! Step 3. Convert the conservative variables back to
-             !         primitive variables for this block. Compute the
-             !         laminar and eddy viscosities as well.
-
-             ! Convert the momentum into velocities.
-             ! Also store the total energy in the pressure, such that
-             ! the routine computePressure can be used.
-
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
-                      tmp          = one/w(i,j,k,irho)
-                      w(i,j,k,ivx) = w(i,j,k,ivx)*tmp
-                      w(i,j,k,ivy) = w(i,j,k,ivy)*tmp
-                      w(i,j,k,ivz) = w(i,j,k,ivz)*tmp
-                      p(i,j,k)     = w(i,j,k,irhoE)
-                   enddo
-                enddo
-             enddo
+            end do stageLoop
 
-             ! Compute the pressure.
+            ! Increment timeStepUnsteady and update
+            ! timeUnsteady with the current time step.
 
-             call computePressure(2_intType, il, 2_intType, jl, &
-                  2_intType, kl, 0_intType)
+            timeStepUnsteady = timeStepUnsteady + 1
+            timeUnsteady = timeUnsteadyOld + deltaT
 
-             ! Swap the pressure and total energy, because
-             ! computePressure stores the pressure in the position of
-             ! the total energy.
+            ! Write the convergence info.
 
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
-                      tmp            = p(i,j,k)
-                      p(i,j,k)       = w(i,j,k,irhoE)
-                      w(i,j,k,irhoE) = tmp
-                   enddo
-                enddo
-             enddo
+            call convergenceInfo
 
-             ! Compute the laminar and eddy viscosities.
+            ! Determine the time and initialize the geometrical and
+            ! boundary info for the next time step, such that the writing
+            ! of grid files with moving geometries is done correctly.
 
-             call computeLamViscosity(.False.)
-             call computeEddyViscosity(.False.)
-
-             ! Step 4. Store dw in dwOldRK if this is not the last
-             !         RK stage.
-
-             if(stage < nRKStagesUnsteady) then
-                do l=1,nw
-                   do k=2,kl
-                      do j=2,jl
-                         do i=2,il
-                            dwOldRK(stage,i,j,k,l) = dw(i,j,k,l)
-                         enddo
-                      enddo
-                   enddo
-                enddo
-             endif
-
-          enddo domainLoop
-
-          ! Exchange the pressure if the pressure must be exchanged
-          ! early. Only the first halo's are needed, thus whalo1 is
-          ! called.
-
-          call whalo1(currentLevel, 1_intType, 0_intType, .true.,&
-               .false., .false.)
-
-          ! Apply the boundary conditions to all blocks.
-
-          call applyAllBC(.true.)
-          if(equations == RANSEquations) call applyAllTurbBC(.true.)
-
-          ! Exchange the halo data. As we are on the fine grid
-          ! the second halo is needed.
-
-          call whalo2(currentLevel, 1_intType, nw, .true., &
-               .true., .true.)
-
-          ! Determine the time and initialize the geometrical and
-          ! boundary info for next stage, if needed.
-
-          if(stage < nRKStagesUnsteady) then
-             timeUnsteady = timeUnsteadyOld &
-                  + gammaRKUnsteady(stage+1)*deltaT
+            timeUnsteadyOld = timeUnsteady
+            timeUnsteady = timeUnsteadyOld + gammaRKUnsteady(1) * deltaT
 
-             call initStageRK(stage+1)
-          endif
+            call initStageRK(1_intType)
 
-       enddo stageLoop
-
-       ! Increment timeStepUnsteady and update
-       ! timeUnsteady with the current time step.
-
-       timeStepUnsteady = timeStepUnsteady + 1
-       timeUnsteady     = timeUnsteadyOld  + deltaT
+            ! Determine whether or not solution files must be written.
 
-       ! Write the convergence info.
+            call checkWriteUnsteadyInLoop
 
-       call convergenceInfo
+            ! Exit the loop if the corresponding kill signal
+            ! has been received.
 
-       ! Determine the time and initialize the geometrical and
-       ! boundary info for the next time step, such that the writing
-       ! of grid files with moving geometries is done correctly.
+            if (globalSignal == signalWriteQuit) exit
 
-       timeUnsteadyOld = timeUnsteady
-       timeUnsteady    = timeUnsteadyOld + gammaRKUnsteady(1)*deltaT
+        end do timeLoop
 
-       call initStageRK(1_intType)
+        ! Determine whether or not the final solution must be written.
 
-       ! Determine whether or not solution files must be written.
+        call checkWriteUnsteadyEndLoop
 
-       call checkWriteUnsteadyInLoop
+    end subroutine solverUnsteadyExplicitRK
 
-       ! Exit the loop if the corresponding kill signal
-       ! has been received.
+    !=================================================================
 
-       if(globalSignal == signalWriteQuit) exit
+    subroutine initStageRK(stage)
+        !
+        !       initStageRK performs the initialization tasks for the
+        !       Runge-Kutta schemes in unsteady mode.
+        !
+        use inputMotion
+        use inputUnsteady
+        use iteration
+        use monitor
+        use section
+        use BCData, only: setBCDataFineGrid
+        use wallDistance, only: updateWallDistanceAllLevels
+        use solverUtils
+        use ALEUtils
+        use preprocessingAPI, only: updateMetricsAllLevels, &
+                                    updateCoordinatesAllLevels, faceRotationMatrices
+        use partitioning, only: updateCoorFineMesh
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: stage
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn
+        real(kind=realType) :: fact
 
-    enddo timeLoop
+        real(kind=realType), dimension(nSections) :: tNewSec, deltaTSec
 
-    ! Determine whether or not the final solution must be written.
+        ! If the grid is changing a whole lot of geometric
+        ! info must be adapted.
 
-    call checkWriteUnsteadyEndLoop
+        testChanging: if (changing_Grid .or. gridMotionSpecified) then
 
-  end subroutine solverUnsteadyExplicitRK
+            ! Determine the time step relative to the previous stage.
 
-  !=================================================================
+            if (stage == 1) then
+                fact = deltaT * (one - gammaRKUnsteady(nRKStagesUnsteady))
+            else
+                fact = deltaT * (gammaRKUnsteady(stage) &
+                                 - gammaRKUnsteady(stage - 1))
+            end if
 
-  subroutine initStageRK(stage)
-    !
-    !       initStageRK performs the initialization tasks for the
-    !       Runge-Kutta schemes in unsteady mode.
-    !
-    use inputMotion
-    use inputUnsteady
-    use iteration
-    use monitor
-    use section
-    use BCData, only : setBCDataFineGrid
-    use wallDistance, only : updateWallDistanceAllLevels
-    use solverUtils
-    use ALEUtils
-    use preprocessingAPI, only :  updateMetricsAllLevels, &
-         updateCoordinatesAllLevels, faceRotationMatrices
-    use partitioning, only : updateCoorFineMesh
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: stage
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn
-    real(kind=realType)   :: fact
+            ! Set the time for all sections; also store their
+            ! time step. They are the same for all sections, but all
+            ! of them should be updated because of consistency.
 
-    real(kind=realType), dimension(nSections) :: tNewSec, deltaTSec
+            do nn = 1, nSections
+                tNewSec(nn) = timeUnsteady + timeUnsteadyRestart
+                deltaTSec(nn) = fact
+            end do
 
-    ! If the grid is changing a whole lot of geometric
-    ! info must be adapted.
-
-    testChanging: if(changing_Grid .or. gridMotionSpecified) then
-
-       ! Determine the time step relative to the previous stage.
-
-       if(stage == 1) then
-          fact = deltaT*(one - gammaRKUnsteady(nRKStagesUnsteady))
-       else
-          fact = deltaT*(gammaRKUnsteady(stage) &
-               -         gammaRKUnsteady(stage-1))
-       endif
-
-       ! Set the time for all sections; also store their
-       ! time step. They are the same for all sections, but all
-       ! of them should be updated because of consistency.
-
-       do nn=1,nSections
-          tNewSec(nn)   = timeUnsteady + timeUnsteadyRestart
-          deltaTSec(nn) = fact
-       enddo
-
-       ! Advance the coordinates 1 time step.
-
-       call updateCoorFineMesh(deltaTSec, 1_intType)
-
-       ! Adapt the geometric info on all grid levels needed for the
-       ! current ground level and multigrid cycle.
-       ! The wall distance only needs to be recomputed when the
-       ! grid is changing; not when a rigid body motion is
-       ! specified. Furthermore, the user can choose not to update
-       ! the wall distance, because he may know a priori that the
-       ! changes in geometry happen quite far away from the boundary
-       ! layer. This is accomplished via updateWallDistanceUnsteady.
-
-       call updateCoordinatesAllLevels
-       if(changing_Grid .and. updateWallDistanceUnsteady) &
-            call updateWallDistanceAllLevels
-
-       call updateMetricsAllLevels
-
-       ! Update the rotation matrices of the faces. Only needed
-       ! on the finest grid level.
-
-       call faceRotationMatrices(currentLevel, .false.)
-
-       ! Determine the velocities of the cell centers and faces
-       ! for the current ground level. Note that the spectral mode
-       ! is always 1 for unsteady mode.
-
-       call gridVelocitiesFineLevel(deforming_Grid, tNewSec, 1_intType)
-
-       ! Determine the new slip velocities on the viscous walls.
-
-       call slipVelocitiesFineLevel(deforming_Grid, tNewSec, 1_intType)
-
-    endif testChanging
-
-    ! Determine the prescribed boundary condition data for the
-    ! boundary subfaces for the (currently) finest grid.
-
-    call setBCDataFineGrid(.false.)
-
-  end subroutine initStageRK
-
-  ! ================================================
-  ! Internal utilities
-  ! ================================================
-  subroutine solveState
-    !
-    !       solveState computes either the steady or unsteady state
-    !       vector for the multigrid level groundLevel, which can be
-    !       found in the module iteration.
-    !
-    use constants
-    use communication, only : myID, adflow_comm_world
-    use NKSolver, only : NKLSFuncEvals, freestreamResset, NK_LS, &
-         NK_switchTol, useNKSolver, NK_CFL, getFreeStreamResidual, &
-         getCurrentResidual, NKStep, computeResidualNK
-    use anksolver, only : ANK_switchTol, useANKSolver, ANK_CFL, ANKStep, destroyANKSolver
-    use inputio, only : forcedLiftFile, forcedSliceFile, forcedVolumeFile, &
-         forcedSurfaceFile, solFile, newGridFile, surfaceSolFile, convSolFileBasename
-    use inputIteration, only: CFL, CFLCoarse, minIterNum, nCycles, &
-         nCyclesCoarse, nMGSteps, nUpdateBleeds, printIterations, rkReset, timeLimit
-    use iteration, only : cycling, approxTotalIts, converged, CFLMonitor, &
-         groundLevel, iterTot, iterType, currentLevel, rhoRes0, totalR, t0Solver,&
-         rhoResStart, totalR0, totalRFinal, totalRStart, stepMonitor, linResMonitor, ordersConverged
-    use killSignals, only : globalSignal, localSignal, noSignal, routineFailed, signalWrite, &
-         signalWriteQuit
-    use monitor, only : writeGrid, writeSurface, writeVolume, writeSolEachIter
-    use utils, only: allocConvArrays, convergenceHeader
-    use tecplotIO, only : writeTecplot
-    use multiGrid, only : setCycleStrategy, executeMGCycle
-    use surfaceFamilies, only : fullFamList
-    use flowVarRefState, only : nwf
-    use residuals, only : residual, initres, sourceTerms
-    use solverUtils, only : timeStep
-    implicit none
-    !
-    !      Local parameter
-    !
-    integer(kind=intType), parameter :: nWriteConvHeader = 50
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-    integer(kind=intType) ::  nMGCycles
-    character(len=7) :: numberString
-    logical :: absConv, relConv, firstNK, firstANK, old_writeGrid
-    real(kind=realType) :: nk_switchtol_save, curTime, ordersConvergedOld
-    character(len=maxStringLen) :: iterFormat
-
-    ! Allocate the memory for cycling.
-    if (allocated(cycling)) then
-       deallocate(cycling)
-    end if
-    allocate(cycling(nMGSteps), stat=ierr)
-
-    ! Some initializations.
-
-    converged    = .false.
-    globalSignal = noSignal
-    localSignal  = noSignal
-    iterTot      = 0
-
-    ! Determine the cycling strategy for this level.
-
-    call setCycleStrategy
-
-    ! Determine the number of multigrid cycles, which depends
-    ! on the current multigrid level.
-
-    nMGCycles = nCycles
-    if(groundLevel > 1) nMGCycles = nCyclesCoarse
-
-    ! Allocate (or reallocate) the convergence array for this solveState.
-    call allocConvArrays(nMGCycles)
-
-    ! Allocate space for storing hisotry of function evaluations for NK
-    ! solver with non-monotone line search if necessary
-    if (currentLevel == 1 .and. useNKSolver .and. NK_LS==nonMonotoneLineSearch) then
-       allocate(NKLSFuncEvals(nMGCycles))
-    end if
-
-    ! Only compute the free stream resisudal once for efficiency on the
-    ! fine grid only.
-    if (groundLevel == 1)  then
-       if (.not. freeStreamResSet)  then
-          call getFreeStreamResidual(rhoRes0, totalR0)
-          freeStreamResSet = .True.
-       end if
-    end if
-    ! Write a message. Only done by processor 0.
-
-    if(myID == 0) then
-
-       ! Write a message about the number of multigrid iterations
-       ! to be performed.
+            ! Advance the coordinates 1 time step.
+
+            call updateCoorFineMesh(deltaTSec, 1_intType)
+
+            ! Adapt the geometric info on all grid levels needed for the
+            ! current ground level and multigrid cycle.
+            ! The wall distance only needs to be recomputed when the
+            ! grid is changing; not when a rigid body motion is
+            ! specified. Furthermore, the user can choose not to update
+            ! the wall distance, because he may know a priori that the
+            ! changes in geometry happen quite far away from the boundary
+            ! layer. This is accomplished via updateWallDistanceUnsteady.
+
+            call updateCoordinatesAllLevels
+            if (changing_Grid .and. updateWallDistanceUnsteady) &
+                call updateWallDistanceAllLevels
+
+            call updateMetricsAllLevels
+
+            ! Update the rotation matrices of the faces. Only needed
+            ! on the finest grid level.
+
+            call faceRotationMatrices(currentLevel, .false.)
+
+            ! Determine the velocities of the cell centers and faces
+            ! for the current ground level. Note that the spectral mode
+            ! is always 1 for unsteady mode.
+
+            call gridVelocitiesFineLevel(deforming_Grid, tNewSec, 1_intType)
+
+            ! Determine the new slip velocities on the viscous walls.
+
+            call slipVelocitiesFineLevel(deforming_Grid, tNewSec, 1_intType)
+
+        end if testChanging
+
+        ! Determine the prescribed boundary condition data for the
+        ! boundary subfaces for the (currently) finest grid.
+
+        call setBCDataFineGrid(.false.)
+
+    end subroutine initStageRK
+
+    ! ================================================
+    ! Internal utilities
+    ! ================================================
+    subroutine solveState
+        !
+        !       solveState computes either the steady or unsteady state
+        !       vector for the multigrid level groundLevel, which can be
+        !       found in the module iteration.
+        !
+        use constants
+        use communication, only: myID, adflow_comm_world
+        use NKSolver, only: NKLSFuncEvals, freestreamResset, NK_LS, &
+                            NK_switchTol, useNKSolver, NK_CFL, getFreeStreamResidual, &
+                            getCurrentResidual, NKStep, computeResidualNK
+        use anksolver, only: ANK_switchTol, useANKSolver, ANK_CFL, ANKStep, destroyANKSolver
+        use inputio, only: forcedLiftFile, forcedSliceFile, forcedVolumeFile, &
+                           forcedSurfaceFile, solFile, newGridFile, surfaceSolFile, convSolFileBasename
+        use inputIteration, only: CFL, CFLCoarse, minIterNum, nCycles, &
+                                  nCyclesCoarse, nMGSteps, nUpdateBleeds, printIterations, rkReset, timeLimit
+        use iteration, only: cycling, approxTotalIts, converged, CFLMonitor, &
+                             groundLevel, iterTot, iterType, currentLevel, rhoRes0, totalR, t0Solver, &
+                             rhoResStart, totalR0, totalRFinal, totalRStart, stepMonitor, linResMonitor, ordersConverged
+        use killSignals, only: globalSignal, localSignal, noSignal, routineFailed, signalWrite, &
+                               signalWriteQuit
+        use monitor, only: writeGrid, writeSurface, writeVolume, writeSolEachIter
+        use utils, only: allocConvArrays, convergenceHeader
+        use tecplotIO, only: writeTecplot
+        use multiGrid, only: setCycleStrategy, executeMGCycle
+        use surfaceFamilies, only: fullFamList
+        use flowVarRefState, only: nwf
+        use residuals, only: residual, initres, sourceTerms
+        use solverUtils, only: timeStep
+        implicit none
+        !
+        !      Local parameter
+        !
+        integer(kind=intType), parameter :: nWriteConvHeader = 50
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+        integer(kind=intType) :: nMGCycles
+        character(len=7) :: numberString
+        logical :: absConv, relConv, firstNK, firstANK, old_writeGrid
+        real(kind=realType) :: nk_switchtol_save, curTime, ordersConvergedOld
+        character(len=maxStringLen) :: iterFormat
+
+        ! Allocate the memory for cycling.
+        if (allocated(cycling)) then
+            deallocate (cycling)
+        end if
+        allocate (cycling(nMGSteps), stat=ierr)
+
+        ! Some initializations.
+
+        converged = .false.
+        globalSignal = noSignal
+        localSignal = noSignal
+        iterTot = 0
+
+        ! Determine the cycling strategy for this level.
+
+        call setCycleStrategy
+
+        ! Determine the number of multigrid cycles, which depends
+        ! on the current multigrid level.
+
+        nMGCycles = nCycles
+        if (groundLevel > 1) nMGCycles = nCyclesCoarse
+
+        ! Allocate (or reallocate) the convergence array for this solveState.
+        call allocConvArrays(nMGCycles)
+
+        ! Allocate space for storing hisotry of function evaluations for NK
+        ! solver with non-monotone line search if necessary
+        if (currentLevel == 1 .and. useNKSolver .and. NK_LS == nonMonotoneLineSearch) then
+            allocate (NKLSFuncEvals(nMGCycles))
+        end if
+
+        ! Only compute the free stream resisudal once for efficiency on the
+        ! fine grid only.
+        if (groundLevel == 1) then
+            if (.not. freeStreamResSet) then
+                call getFreeStreamResidual(rhoRes0, totalR0)
+                freeStreamResSet = .True.
+            end if
+        end if
+        ! Write a message. Only done by processor 0.
+
+        if (myID == 0) then
+
+            ! Write a message about the number of multigrid iterations
+            ! to be performed.
 
 #ifndef USE_COMPLEX
-      iterFormat = "(A, I1, 3(A), I6, A, ES10.2)"
+            iterFormat = "(A, I1, 3(A), I6, A, ES10.2)"
 #else
-      iterFormat = "(A, I1, 3(A), I6, A, 2ES10.2)"
+            iterFormat = "(A, I1, 3(A), I6, A, 2ES10.2)"
 #endif
 
-       write(numberString,"(i6)") nMGCycles
-       numberString = adjustl(numberString)
-       numberString = trim(numberString)
-       if (printIterations) then
-          print "(a)", "#"
-          print iterFormat, "# Grid ", groundLevel,": Performing ", trim(numberString), &
-            " iterations, unless converged earlier. Minimum required iteration before NK switch: ", &
-            minIterNum,". Switch to NK at totalR of: ", (NK_switchTol * totalR0)
-          print "(a)", "#"
-       end if
+            write (numberString, "(i6)") nMGCycles
+            numberString = adjustl(numberString)
+            numberString = trim(numberString)
+            if (printIterations) then
+                print "(a)", "#"
+                print iterFormat, "# Grid ", groundLevel, ": Performing ", trim(numberString), &
+                    " iterations, unless converged earlier. Minimum required iteration before NK switch: ", &
+                    minIterNum, ". Switch to NK at totalR of: ", (NK_switchTol * totalR0)
+                print "(a)", "#"
+            end if
 
-       if (printIterations)  then
-          call convergenceHeader
-       end if
-    end if
+            if (printIterations) then
+                call convergenceHeader
+            end if
+        end if
 
+        ! Initialize the approxiate iteration count. We won't count this
+        ! first residual evaluation. This way on the first convergence info
+        ! call, "Iter" and "Iter Total" will both display 0.
+        approxTotalIts = 0
 
-    ! Initialize the approxiate iteration count. We won't count this
-    ! first residual evaluation. This way on the first convergence info
-    ! call, "Iter" and "Iter Total" will both display 0.
-    approxTotalIts = 0
+        ! we need to re-set the orders converged to 16 as it might have been modified in the previous iteration
+        ordersConverged = 16.0_realType
 
-    ! we need to re-set the orders converged to 16 as it might have been modified in the previous iteration
-    ordersConverged = 16.0_realType
+        ! Evaluate the initial residual
+        call computeResidualNK
 
-    ! Evaluate the initial residual
-    call computeResidualNK
+        ! Need to run the time step here since the RK/DADI is expecting
+        ! the rad{i,j,k} to be computed.
+        call timeStep(.False.)
 
-    ! Need to run the time step here since the RK/DADI is expecting
-    ! the rad{i,j,k} to be computed.
-    call timeStep(.False.)
+        ! Extract the rhoResStart and totalRStart
+        call getCurrentResidual(rhoResStart, totalRStart)
 
-    ! Extract the rhoResStart and totalRStart
-    call getCurrentResidual(rhoResStart, totalRStart)
+        ! No iteration type for first residual evaluation
+        iterType = "    None"
+        ! also no CFL, step size, or linear residual for this iteration
+        CFLMonitor = -1
+        stepMonitor = -1
+        linResMonitor = -1
 
-    ! No iteration type for first residual evaluation
-    iterType = "    None"
-    ! also no CFL, step size, or linear residual for this iteration
-    CFLMonitor = -1
-    stepMonitor = -1
-    linResMonitor = -1
+        ! Determine and write the initial convergence info.
+        call convergenceInfo
 
-    ! Determine and write the initial convergence info.
-    call convergenceInfo
+        ! Loop over the maximum number of nonlinear iterations
+        firstANK = .True.
+        firstNK = .True.
 
-    ! Loop over the maximum number of nonlinear iterations
-    firstANK = .True.
-    firstNK = .True.
+        ! Save the NKSwitch tol since it may be modified in the loop
+        NK_SwitchTol_save = NK_switchtol
 
-    ! Save the NKSwitch tol since it may be modified in the loop
-    NK_SwitchTol_save = NK_switchtol
+        ! Set the converged order factor now:
+        ordersConverged = log10(totalR0 / totalRStart)
+        ordersConvergedOld = ordersConverged
 
-    ! Set the converged order factor now:
-    ordersConverged = log10(totalR0/totalRStart)
-    ordersConvergedOld = ordersConverged
+        nonlinearIteration: do while (approxTotalIts < nMGCycles)
 
-    nonlinearIteration: do while (approxTotalIts < nMGCycles)
+            ! Update iterTot
+            iterTot = iterTot + 1
 
-       ! Update iterTot
-       iterTot = iterTot + 1
+            if (mod(iterTot, nWriteConvHeader) == 0 .and. &
+                myID == 0 .and. printIterations) then
+                call convergenceHeader
+            end if
 
-       if(mod(iterTot, nWriteConvHeader) == 0 .and. &
-            myID == 0 .and. printIterations) then
-          call convergenceHeader
-       endif
+            ! Defaults for the monitor
+            CFLMonitor = CFL
+            stepMonitor = 1.0
+            linResMonitor = -1
 
-       ! Defaults for the monitor
-       CFLMonitor = CFL
-       stepMonitor = 1.0
-       linResMonitor = -1
+            ! Determine what type of update to do:
+            if (currentLevel > 1) then
 
-       ! Determine what type of update to do:
-       if (currentLevel > 1) then
+                ! Coarse grids do RK/DADI always
+                call executeMGCycle
+                CFLMonitor = CFLCoarse
+            else
+                if (.not. useANKSolver .and. .not. useNKSolver .or. (iterTot <= minIterNum .and. rkreset)) then
 
-          ! Coarse grids do RK/DADI always
-          call executeMGCycle
-          CFLMonitor = CFLCoarse
-       else
-          if (.not. useANKSolver .and. .not. useNKSolver .or. (iterTot <= minIterNum .and. rkreset)) then
+                    ! Always RK/DADI or a RK startup. Run the MG Cycle
 
-             ! Always RK/DADI or a RK startup. Run the MG Cycle
+                    call executeMGCycle
 
-             call executeMGCycle
+                else if (useANKSolver .and. .not. useNKSolver) then
 
-          else if (useANKSolver .and. .not. useNKSolver) then
+                    ! Approx solver, no NKSolver
 
-             ! Approx solver, no NKSolver
+                    if (totalR > ANK_switchTol * totalR0) then
 
-             if (totalR > ANK_switchTol * totalR0) then
+                        call executeMGCycle
 
-                call executeMGCycle
+                    else
+                        call ANKStep(firstANK)
+                        firstANK = .False.
+                        CFLMonitor = ANK_CFL
 
-             else
-                call ANKStep(firstANK)
-                firstANK = .False.
-                CFLMonitor = ANK_CFL
+                    end if
 
-             end if
+                else if (.not. useANKSolver .and. useNKSolver) then
 
-          else if (.not. useANKSolver .and. useNKSolver) then
+                    ! NK Solver no approx solver
 
-             ! NK Solver no approx solver
+                    if (totalR > NK_switchTol * totalR0) then
 
-             if (totalR > NK_switchTol * totalR0) then
+                        call executeMGCycle
 
-                call executeMGCycle
+                    else
 
-             else
+                        call NKStep(firstNK)
+                        firstNK = .False.
+                        CFLMonitor = -1
 
-                call NKStep(firstNK)
-                firstNK = .False.
-                CFLMonitor = -1
+                    end if
 
-             end if
+                else if (useANKSolver .and. useNKSolver) then
 
-          else if (useANKSolver .and. useNKSolver) then
+                    ! Both approximate and NK solver.
 
-             ! Both approximate and NK solver.
+                    if (totalR > ANK_switchTol * totalR0) then
 
-             if (totalR > ANK_switchTol*totalR0) then
+                        call executeMGCycle
 
-                call executeMGCycle
+                    else if (totalR <= ANK_switchTol * totalR0 .and. &
+                             totalR > NK_switchTol * totalR0) then
+
+                        call ANKStep(firstANK)
+                        firstANK = .False.
+                        firstNK = .True.
+                        CFLMonitor = ANK_CFL
 
-             else if (totalR <= ANK_switchTol*totalR0 .and. &
-                  totalR > NK_switchTol*totalR0) then
+                    else
 
-                call ANKStep(firstANK)
-                firstANK = .False.
-                firstNK = .True.
-                CFLMonitor = ANK_CFL
+                        ! We free the memory for the ANK solver here because ANK solver
+                        ! module depends on the NK solver module and we cannot call
+                        ! destroyANKSolver within NKSolver itself.
+                        if (firstNK) then
+                            call destroyANKSolver()
+                        end if
 
-             else
+                        call NKStep(firstNK)
+                        firstNK = .False.
+                        firstANK = .True.
+                        CFLMonitor = -1
 
-                ! We free the memory for the ANK solver here because ANK solver
-                ! module depends on the NK solver module and we cannot call
-                ! destroyANKSolver within NKSolver itself.
-                if (firstNK) then
-                    call destroyANKSolver()
+                    end if
                 end if
+            end if
 
-                call NKStep(firstNK)
-                firstNK = .False.
-                firstANK = .True.
-                CFLMonitor = -1
+            if (timeLimit > zero) then
+                ! Check if we ran out of time but only if we are required to use the timeLimit
+                if (myid == 0) then
+                    curTime = mpi_wtime() - t0solver
+                end if
 
-             end if
-          end if
-       end if
+                call mpi_bcast(curTime, 1, adflow_real, 0, adflow_comm_world, ierr)
 
-       if (timeLimit > zero) then
-          ! Check if we ran out of time but only if we are required to use the timeLimit
-          if (myid == 0) then
-             curTime = mpi_wtime() - t0solver
-          end if
+                if (curTime > timeLimit) then
+                    ! Set the iterTot to the limit directly so that convergence
+                    ! info thinks we are just out of cycles
+                    approxTotalIts = nMGCycles
+                end if
+            end if
 
-          call mpi_bcast(curTime, 1, adflow_real, 0, adflow_comm_world, ierr)
+            ! Determine and write the convergence info.
+            call convergenceInfo
 
-          if (curTime > timeLimit) then
-             ! Set the iterTot to the limit directly so that convergence
-             ! info thinks we are just out of cycles
-             approxTotalIts = nMGCycles
-          end if
-       end if
+            totalRFinal = totalR
 
-       ! Determine and write the convergence info.
-       call convergenceInfo
+            ! Update how far we are converged:
+            ordersConverged = max(log10(totalR0 / totalR), ordersConvergedOld)
+            ordersConvergedOld = ordersConverged
 
-       totalRFinal = totalR
+            ! Check for divergence or nan here
+            if (routineFailed) then
+                exit NonLinearIteration
+            end if
 
-       ! Update how far we are converged:
-       ordersConverged = max(log10(totalR0/totalR), ordersConvergedOld)
-       ordersConvergedOld = ordersConverged
+            ! Exit the loop if we are converged
+            if (converged) then
+                exit nonLinearIteration
+            end if
 
-       ! Check for divergence or nan here
-       if(routineFailed) then
-          exit NonLinearIteration
-       endif
+            ! Check if the bleed boundary conditions must be updated and
+            ! do so if needed.
 
-       ! Exit the loop if we are converged
-       if (converged) then
-          exit nonLinearIteration
-       end if
+            ! Check if we've received a signal:
+#ifndef USE_NO_SIGNALS
+            call mpi_allreduce(localSignal, globalSignal, 1, &
+                               adflow_integer, mpi_max, ADflow_comm_world, &
+                               ierr)
+#endif
 
-       ! Check if the bleed boundary conditions must be updated and
-       ! do so if needed.
+            if (globalSignal == signalWrite .or. writeSolEachIter) then
+                ! We have been told to write the solution even though we are not done iterating
 
+                ! The grid must be written along with the volume solution solution
+                if (writeVolume) then
+                    ! temporary change the writeGrid option
+                    old_writeGrid = writeGrid
+                    writeGrid = .True.
+                end if
 
-       ! Check if we've received a signal:
-#ifndef USE_NO_SIGNALS
-       call mpi_allreduce(localSignal, globalSignal, 1,         &
-            adflow_integer, mpi_max, ADflow_comm_world, &
-            ierr)
-#endif
+                if (writeSolEachIter) then
+                    write (numberString, "(i7)") iterTot
+                    numberString = adjustl(numberString)
+                    numberString = trim(numberString)
+                    surfaceSolFile = trim(convSolFileBasename)//"_"//trim(numberString)//"_surf.cgns"
+                    newGridFile = trim(convSolFileBasename)//"_"//trim(numberString)//"_vol.cgns"
+                    solFile = trim(convSolFileBasename)//"_"//trim(numberString)//"_vol.cgns"
+                else
+                    surfaceSolFile = forcedSurfaceFile
+                    newGridFile = forcedVolumeFile
+                    solFile = forcedVolumeFile
+                end if
+
+                call writeSol(fullFamList, size(fullFamList))
+
+                ! Also write potential tecplot files. Note that we are not
+                ! writing the tecplot surface file so pass in an empty file
+                ! name and a nonsense family list.
+                call writeTecplot(forcedSliceFile, .True., forcedLiftFile, .True., &
+                                  "", .False., [0], 1)
 
-       if (globalSignal == signalWrite .or. writeSolEachIter) then
-          ! We have been told to write the solution even though we are not done iterating
-          
-          ! The grid must be written along with the volume solution solution         
-         if (writeVolume) then
-            ! temporary change the writeGrid option
-            old_writeGrid = writeGrid
-            writeGrid = .True.
-         end if
-          
-          if (writeSolEachIter) then
-               write(numberString,"(i7)") iterTot
-               numberString = adjustl(numberString)
-               numberString = trim(numberString)
-               surfaceSolFile = trim(convSolFileBasename)//"_"//trim(numberString)//"_surf.cgns"
-               newGridFile = trim(convSolFileBasename)//"_"//trim(numberString)//"_vol.cgns"
-               solFile = trim(convSolFileBasename)//"_"//trim(numberString)//"_vol.cgns"
-          else
-               surfaceSolFile = forcedSurfaceFile
-               newGridFile = forcedVolumeFile
-               solFile = forcedVolumeFile
-          end if
-          
-          call writeSol(fullFamList, size(fullFamList))
-
-          ! Also write potential tecplot files. Note that we are not
-          ! writing the tecplot surface file so pass in an empty file
-          ! name and a nonsense family list.
-          call writeTecplot(forcedSliceFile, .True., forcedLiftFile, .True., &
-               "", .False., [0], 1)
-
-         if (writeVolume) then
-            ! change the writeGrid option back
-            writeGrid = old_writeGrid
-         end if
-               
-          ! Reset the signal
-          localSignal = noSignal
-       end if
-
-       if (globalSignal == signalWriteQuit) then
-          exit nonLinearIteration
-       end if
-
-
-    enddo nonLinearIteration
-
-    ! Restore the switch tol in case it was changed
-    NK_switchtol = NK_SwitchTol_save
-
-    ! deallocate space for storing hisotry of function evaluations for NK
-    ! solver with non-monotone line search if necessary
-    if (currentLevel == 1 .and. useNKSolver .and. NK_LS==nonMonotoneLineSearch) then
-       deallocate(NKLSFuncEvals)
-    end if
-
-  end subroutine solveState
-
-
-  subroutine convergenceInfo
-    !
-    !       convergenceInfo computes and writes the convergence info to
-    !       standard output. In spectral mode a convergence history for
-    !       every mode is written.
-    !
-    use constants
-    use cgnsNames
-    use block, only : nCellGlobal
-    use blockPointers, only : nDom
-    use communication, only : adflow_comm_world, myid
-    use inputIteration, only : printIterations, l2convcoarse, l2conv, l2convrel, &
-         minIterNum, maxL2DeviationFactor, ncycles, RKReset
-    use inputPhysics, only : liftDirection, dragDirection, equationMode, &
-         lengthRef, machCoef, surfaceRef
-    use flowVarRefState, only : pRef, lRef, gammaInf
-    use inputIO, only : storeConvInnerIter
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use inputUnsteady, only : timeIntegrationScheme
-    use monitor, only : monLoc, monGlob, nMon, nMonMax, nMonSum, monNames, timeDataArray, &
-         showCPU, monRef, convArray, timeUnsteadyRestart, timeArray, timeStepUnsteady, &
-         timeUnsteady, nTimeStepsRestart, solverDataArray, solverTypeArray
-    use iteration, only : groundLevel, currentLevel, iterTot, iterType, approxTotalIts, &
-         CFLMonitor, stepMonitor, t0solver, converged, linResMonitor
-    use killSignals, only : routineFailed, fromPython
-    use iteration, only : rhoRes, rhoResStart, totalR, totalRStart, totalR0
-    use oversetData, only: oversetPresent
-    use utils, only : setPointers, returnFail, maxHDiffMach, maxEddyv, sumResiduals, sumAllResiduals
-    use genericISNAN, only : myisnan
-    use surfaceIntegrations, only : integrateSurfaces
-    use zipperIntegrations, only : integrateZippers
-    use surfaceFamilies, only : fullFamLIst
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr, iConvStdout
-
-    integer(kind=intType) :: sps, nn, mm, iConv
-
-    real(kind=realType) :: hdiffMax, MachMax
-    real(kind=realType) :: eddyvisMax, yplusMax, sepSensor, Cavitation, axisMoment
-    real(kind=realType) :: sepSensorAvg(3)
-
-    real(kind=realType) :: L2ConvThisLevel, fact
-    real(kind=realType), dimension(3) :: cfp, cfv, cmp, cmv
-    real(kind=realType) :: cmpaxis, cmvaxis
-    logical :: nanOccurred, writeIterations
-    logical :: absConv, relConv
-    real(kind=realType) :: localValues(nLocalValues)
-    real(kind=realType) :: funcValues(nCostFunction)
-    ! Determine whether or not the iterations must be written.
-
-    writeIterations = .true.
-    if(equationMode            == unsteady .and. &
-         timeIntegrationScheme == explicitRK) writeIterations = .false.
-
-    ! Initializations
-    converged   = .False.
-    nanOccurred = .false.
-
-    ! Set the L2 norm for convergence for this level.
-
-    L2ConvThisLevel = L2ConvCoarse
-    if(groundLevel == 1) L2ConvThisLevel = L2Conv
-
-    ! Loop over the number of spectral solutions.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-
-       ! Initialize the local monitoring variables to zero.
-
-       monLoc = zero
-
-       ! Loop over the blocks.
-
-       domains: do nn=1,nDom
-
-          ! Set the pointers for this block.
-
-          call setPointers(nn, groundLevel, sps)
-
-          ! Compute the forces and moments for this block.  Note that
-          ! we zero localValues before each call becuase we are
-          ! summing into momLocal.
-          localvalues = zero
-          call integrateSurfaces(localValues, fullFamList)
-
-          ! Convert to coefficients for monitoring:
-          fact = two/(gammaInf*MachCoef*MachCoef &
-               *surfaceRef*LRef*LRef*pRef)
-          cfp = fact*localValues(iFp:iFp+2)
-          cfv = fact*localValues(iFv:iFv+2)
-          fact = fact/(lengthRef*Lref)
-          cmp = fact*localValues(iMp:iMp+2)
-          cmv = fact*localValues(iMv:iMv+2)
-          yplusmax = localValues(iYplus)
-          ! Determine the maximum values of the monitoring variables
-          ! of this block.
-
-          call maxHdiffMach(hdiffMax, MachMax)
-          call maxEddyv(eddyvisMax)
-
-          ! Loop over the number of monitoring variables.
-          nMonitoringVar: do mm=1,nMon
-
-             ! Determine the monitoring variable and act accordingly.
+                if (writeVolume) then
+                    ! change the writeGrid option back
+                    writeGrid = old_writeGrid
+                end if
+
+                ! Reset the signal
+                localSignal = noSignal
+            end if
+
+            if (globalSignal == signalWriteQuit) then
+                exit nonLinearIteration
+            end if
+
+        end do nonLinearIteration
+
+        ! Restore the switch tol in case it was changed
+        NK_switchtol = NK_SwitchTol_save
+
+        ! deallocate space for storing hisotry of function evaluations for NK
+        ! solver with non-monotone line search if necessary
+        if (currentLevel == 1 .and. useNKSolver .and. NK_LS == nonMonotoneLineSearch) then
+            deallocate (NKLSFuncEvals)
+        end if
+
+    end subroutine solveState
+
+    subroutine convergenceInfo
+        !
+        !       convergenceInfo computes and writes the convergence info to
+        !       standard output. In spectral mode a convergence history for
+        !       every mode is written.
+        !
+        use constants
+        use cgnsNames
+        use block, only: nCellGlobal
+        use blockPointers, only: nDom
+        use communication, only: adflow_comm_world, myid
+        use inputIteration, only: printIterations, l2convcoarse, l2conv, l2convrel, &
+                                  minIterNum, maxL2DeviationFactor, ncycles, RKReset
+        use inputPhysics, only: liftDirection, dragDirection, equationMode, &
+                                lengthRef, machCoef, surfaceRef
+        use flowVarRefState, only: pRef, lRef, gammaInf
+        use inputIO, only: storeConvInnerIter
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use inputUnsteady, only: timeIntegrationScheme
+        use monitor, only: monLoc, monGlob, nMon, nMonMax, nMonSum, monNames, timeDataArray, &
+                           showCPU, monRef, convArray, timeUnsteadyRestart, timeArray, timeStepUnsteady, &
+                           timeUnsteady, nTimeStepsRestart, solverDataArray, solverTypeArray
+        use iteration, only: groundLevel, currentLevel, iterTot, iterType, approxTotalIts, &
+                             CFLMonitor, stepMonitor, t0solver, converged, linResMonitor
+        use killSignals, only: routineFailed, fromPython
+        use iteration, only: rhoRes, rhoResStart, totalR, totalRStart, totalR0
+        use oversetData, only: oversetPresent
+        use utils, only: setPointers, returnFail, maxHDiffMach, maxEddyv, sumResiduals, sumAllResiduals
+        use genericISNAN, only: myisnan
+        use surfaceIntegrations, only: integrateSurfaces
+        use zipperIntegrations, only: integrateZippers
+        use surfaceFamilies, only: fullFamLIst
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr, iConvStdout
+
+        integer(kind=intType) :: sps, nn, mm, iConv
+
+        real(kind=realType) :: hdiffMax, MachMax
+        real(kind=realType) :: eddyvisMax, yplusMax, sepSensor, Cavitation, axisMoment
+        real(kind=realType) :: sepSensorAvg(3)
+
+        real(kind=realType) :: L2ConvThisLevel, fact
+        real(kind=realType), dimension(3) :: cfp, cfv, cmp, cmv
+        real(kind=realType) :: cmpaxis, cmvaxis
+        logical :: nanOccurred, writeIterations
+        logical :: absConv, relConv
+        real(kind=realType) :: localValues(nLocalValues)
+        real(kind=realType) :: funcValues(nCostFunction)
+        ! Determine whether or not the iterations must be written.
+
+        writeIterations = .true.
+        if (equationMode == unsteady .and. &
+            timeIntegrationScheme == explicitRK) writeIterations = .false.
+
+        ! Initializations
+        converged = .False.
+        nanOccurred = .false.
+
+        ! Set the L2 norm for convergence for this level.
+
+        L2ConvThisLevel = L2ConvCoarse
+        if (groundLevel == 1) L2ConvThisLevel = L2Conv
+
+        ! Loop over the number of spectral solutions.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+
+            ! Initialize the local monitoring variables to zero.
+
+            monLoc = zero
+
+            ! Loop over the blocks.
+
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block.
+
+                call setPointers(nn, groundLevel, sps)
+
+                ! Compute the forces and moments for this block.  Note that
+                ! we zero localValues before each call becuase we are
+                ! summing into momLocal.
+                localvalues = zero
+                call integrateSurfaces(localValues, fullFamList)
+
+                ! Convert to coefficients for monitoring:
+                fact = two / (gammaInf * MachCoef * MachCoef &
+                              * surfaceRef * LRef * LRef * pRef)
+                cfp = fact * localValues(iFp:iFp + 2)
+                cfv = fact * localValues(iFv:iFv + 2)
+                fact = fact / (lengthRef * Lref)
+                cmp = fact * localValues(iMp:iMp + 2)
+                cmv = fact * localValues(iMv:iMv + 2)
+                yplusmax = localValues(iYplus)
+                ! Determine the maximum values of the monitoring variables
+                ! of this block.
+
+                call maxHdiffMach(hdiffMax, MachMax)
+                call maxEddyv(eddyvisMax)
+
+                ! Loop over the number of monitoring variables.
+                nMonitoringVar: do mm = 1, nMon
+
+                    ! Determine the monitoring variable and act accordingly.
 
-             select case (monNames(mm))
+                    select case (monNames(mm))
 
-             case ('totalR')
-                call sumAllResiduals(mm)
+                    case ('totalR')
+                        call sumAllResiduals(mm)
 
-             case (cgnsL2resRho)
-                call sumResiduals(irho, mm)
+                    case (cgnsL2resRho)
+                        call sumResiduals(irho, mm)
 
-             case (cgnsL2resMomx)
-                call sumResiduals(imx, mm)
+                    case (cgnsL2resMomx)
+                        call sumResiduals(imx, mm)
 
-             case (cgnsL2resMomy)
-                call sumResiduals(imy, mm)
+                    case (cgnsL2resMomy)
+                        call sumResiduals(imy, mm)
 
-             case (cgnsL2resMomz)
-                call sumResiduals(imz, mm)
+                    case (cgnsL2resMomz)
+                        call sumResiduals(imz, mm)
 
-             case (cgnsL2resRhoe)
-                call sumResiduals(irhoE, mm)
+                    case (cgnsL2resRhoe)
+                        call sumResiduals(irhoE, mm)
 
-             case (cgnsL2resNu, cgnsL2resK)
-                call sumResiduals(itu1, mm)
+                    case (cgnsL2resNu, cgnsL2resK)
+                        call sumResiduals(itu1, mm)
 
-             case (cgnsL2resOmega, cgnsL2resTau, cgnsL2resEpsilon)
-                call sumResiduals(itu2, mm)
+                    case (cgnsL2resOmega, cgnsL2resTau, cgnsL2resEpsilon)
+                        call sumResiduals(itu2, mm)
 
-             case (cgnsL2resV2)
-                call sumResiduals(itu3, mm)
+                    case (cgnsL2resV2)
+                        call sumResiduals(itu3, mm)
 
-             case (cgnsL2resF)
-                call sumResiduals(itu4, mm)
+                    case (cgnsL2resF)
+                        call sumResiduals(itu4, mm)
 
-             case (cgnsCl)
-                monLoc(mm) = monLoc(mm)                         &
-                     + (cfp(1) + cfv(1))*liftDirection(1) &
-                     + (cfp(2) + cfv(2))*liftDirection(2) &
-                     + (cfp(3) + cfv(3))*liftDirection(3)
+                    case (cgnsCl)
+                        monLoc(mm) = monLoc(mm) &
+                                     + (cfp(1) + cfv(1)) * liftDirection(1) &
+                                     + (cfp(2) + cfv(2)) * liftDirection(2) &
+                                     + (cfp(3) + cfv(3)) * liftDirection(3)
 
-             case (cgnsClp)
-                monLoc(mm) = monLoc(mm) + cfp(1)*liftDirection(1) &
-                     +              cfp(2)*liftDirection(2) &
-                     +              cfp(3)*liftDirection(3)
+                    case (cgnsClp)
+                        monLoc(mm) = monLoc(mm) + cfp(1) * liftDirection(1) &
+                                     + cfp(2) * liftDirection(2) &
+                                     + cfp(3) * liftDirection(3)
 
-             case (cgnsClv)
-                monLoc(mm) = monLoc(mm) + cfv(1)*liftDirection(1) &
-                     +              cfv(2)*liftDirection(2) &
-                     +              cfv(3)*liftDirection(3)
+                    case (cgnsClv)
+                        monLoc(mm) = monLoc(mm) + cfv(1) * liftDirection(1) &
+                                     + cfv(2) * liftDirection(2) &
+                                     + cfv(3) * liftDirection(3)
 
-             case (cgnsCd)
-                monLoc(mm) = monLoc(mm)                         &
-                     + (cfp(1) + cfv(1))*dragDirection(1) &
-                     + (cfp(2) + cfv(2))*dragDirection(2) &
-                     + (cfp(3) + cfv(3))*dragDirection(3)
+                    case (cgnsCd)
+                        monLoc(mm) = monLoc(mm) &
+                                     + (cfp(1) + cfv(1)) * dragDirection(1) &
+                                     + (cfp(2) + cfv(2)) * dragDirection(2) &
+                                     + (cfp(3) + cfv(3)) * dragDirection(3)
 
-             case (cgnsCdp)
-                monLoc(mm) = monLoc(mm) + cfp(1)*dragDirection(1) &
-                     +              cfp(2)*dragDirection(2) &
-                     +              cfp(3)*dragDirection(3)
+                    case (cgnsCdp)
+                        monLoc(mm) = monLoc(mm) + cfp(1) * dragDirection(1) &
+                                     + cfp(2) * dragDirection(2) &
+                                     + cfp(3) * dragDirection(3)
 
-             case (cgnsCdv)
-                monLoc(mm) = monLoc(mm) + cfv(1)*dragDirection(1) &
-                     +              cfv(2)*dragDirection(2) &
-                     +              cfv(3)*dragDirection(3)
+                    case (cgnsCdv)
+                        monLoc(mm) = monLoc(mm) + cfv(1) * dragDirection(1) &
+                                     + cfv(2) * dragDirection(2) &
+                                     + cfv(3) * dragDirection(3)
 
-             case (cgnsCfx)
-                monLoc(mm) = monLoc(mm) + cfp(1) + cfv(1)
+                    case (cgnsCfx)
+                        monLoc(mm) = monLoc(mm) + cfp(1) + cfv(1)
 
-             case (cgnsCfy)
-                monLoc(mm) = monLoc(mm) + cfp(2) + cfv(2)
+                    case (cgnsCfy)
+                        monLoc(mm) = monLoc(mm) + cfp(2) + cfv(2)
 
-             case (cgnsCfz)
-                monLoc(mm) = monLoc(mm) + cfp(3) + cfv(3)
+                    case (cgnsCfz)
+                        monLoc(mm) = monLoc(mm) + cfp(3) + cfv(3)
 
-             case (cgnsCmx)
-                monLoc(mm) = monLoc(mm) + cmp(1) + cmv(1)
+                    case (cgnsCmx)
+                        monLoc(mm) = monLoc(mm) + cmp(1) + cmv(1)
 
-             case (cgnsCmy)
-                monLoc(mm) = monLoc(mm) + cmp(2) + cmv(2)
+                    case (cgnsCmy)
+                        monLoc(mm) = monLoc(mm) + cmp(2) + cmv(2)
 
-             case (cgnsCmz)
-                monLoc(mm) = monLoc(mm) + cmp(3) + cmv(3)
+                    case (cgnsCmz)
+                        monLoc(mm) = monLoc(mm) + cmp(3) + cmv(3)
 
-             case (cgnsHdiffMax)
-                monLoc(mm) = max(monLoc(mm), hdiffMax)
+                    case (cgnsHdiffMax)
+                        monLoc(mm) = max(monLoc(mm), hdiffMax)
 
-             case (cgnsMachMax)
-                monLoc(mm) = max(monLoc(mm), MachMax)
+                    case (cgnsMachMax)
+                        monLoc(mm) = max(monLoc(mm), MachMax)
 
-             case (cgnsYplusMax)
-                monLoc(mm) = max(monLoc(mm), yplusMax)
+                    case (cgnsYplusMax)
+                        monLoc(mm) = max(monLoc(mm), yplusMax)
 
-             case (cgnsEddyMax)
-                monLoc(mm) = max(monLoc(mm), eddyvisMax)
+                    case (cgnsEddyMax)
+                        monLoc(mm) = max(monLoc(mm), eddyvisMax)
 
-             case (cgnsSepSensor)
-                monLoc(mm) = monLoc(mm) + localValues(isepSensor)
+                    case (cgnsSepSensor)
+                        monLoc(mm) = monLoc(mm) + localValues(isepSensor)
 
-             case (cgnsCavitation)
-                monLoc(mm) = monLoc(mm) + localValues(iCavitation)
+                    case (cgnsCavitation)
+                        monLoc(mm) = monLoc(mm) + localValues(iCavitation)
 
-             case (cgnsAxisMoment)
-                monLoc(mm) = monLoc(mm) + localValues(iaxisMoment)
+                    case (cgnsAxisMoment)
+                        monLoc(mm) = monLoc(mm) + localValues(iaxisMoment)
 
-             end select
+                    end select
 
-          end do nMonitoringVar
-       end do domains
+                end do nMonitoringVar
+            end do domains
 
-       ! Add the corrections from zipper meshes from proc 0
-       if (oversetPresent) then
-          localValues = zero
-          call integrateZippers(localValues, fullFamList, sps)
+            ! Add the corrections from zipper meshes from proc 0
+            if (oversetPresent) then
+                localValues = zero
+                call integrateZippers(localValues, fullFamList, sps)
 
-          fact = two/(gammaInf*MachCoef*MachCoef &
-               *surfaceRef*LRef*LRef*pRef)
-          cfp = localValues(iFp:iFp+2)*fact
-          cfv = localValues(iFv:iFv+2)*fact
-          fact = fact/(lengthRef*Lref)
-          cmp = localValues(iMp:iMp+2)*fact
-          cmv = localValues(iMv:iMv+2)*fact
+                fact = two / (gammaInf * MachCoef * MachCoef &
+                              * surfaceRef * LRef * LRef * pRef)
+                cfp = localValues(iFp:iFp + 2) * fact
+                cfv = localValues(iFv:iFv + 2) * fact
+                fact = fact / (lengthRef * Lref)
+                cmp = localValues(iMp:iMp + 2) * fact
+                cmv = localValues(iMv:iMv + 2) * fact
 
-          !Loop over the number of monitoring variables and just modify
-          !the ones that need to be updated with the zipper forces we just
-          !computed.
-          nMonitoringVarZip: do mm=1,nMon
+                !Loop over the number of monitoring variables and just modify
+                !the ones that need to be updated with the zipper forces we just
+                !computed.
+                nMonitoringVarZip: do mm = 1, nMon
 
-             ! Determine the monitoring variable and act accordingly.
+                    ! Determine the monitoring variable and act accordingly.
 
-             select case (monNames(mm))
+                    select case (monNames(mm))
 
-             case (cgnsCl)
-                monLoc(mm) = monLoc(mm)                         &
-                     + (cfp(1) + cfv(1))*liftDirection(1) &
-                     + (cfp(2) + cfv(2))*liftDirection(2) &
-                     + (cfp(3) + cfv(3))*liftDirection(3)
+                    case (cgnsCl)
+                        monLoc(mm) = monLoc(mm) &
+                                     + (cfp(1) + cfv(1)) * liftDirection(1) &
+                                     + (cfp(2) + cfv(2)) * liftDirection(2) &
+                                     + (cfp(3) + cfv(3)) * liftDirection(3)
 
-             case (cgnsClp)
-                monLoc(mm) = monLoc(mm) + cfp(1)*liftDirection(1) &
-                     +              cfp(2)*liftDirection(2) &
-                     +              cfp(3)*liftDirection(3)
+                    case (cgnsClp)
+                        monLoc(mm) = monLoc(mm) + cfp(1) * liftDirection(1) &
+                                     + cfp(2) * liftDirection(2) &
+                                     + cfp(3) * liftDirection(3)
 
-             case (cgnsClv)
-                monLoc(mm) = monLoc(mm) + cfv(1)*liftDirection(1) &
-                     +              cfv(2)*liftDirection(2) &
-                     +              cfv(3)*liftDirection(3)
+                    case (cgnsClv)
+                        monLoc(mm) = monLoc(mm) + cfv(1) * liftDirection(1) &
+                                     + cfv(2) * liftDirection(2) &
+                                     + cfv(3) * liftDirection(3)
 
-             case (cgnsCd)
-                monLoc(mm) = monLoc(mm)                         &
-                     + (cfp(1) + cfv(1))*dragDirection(1) &
-                     + (cfp(2) + cfv(2))*dragDirection(2) &
-                     + (cfp(3) + cfv(3))*dragDirection(3)
+                    case (cgnsCd)
+                        monLoc(mm) = monLoc(mm) &
+                                     + (cfp(1) + cfv(1)) * dragDirection(1) &
+                                     + (cfp(2) + cfv(2)) * dragDirection(2) &
+                                     + (cfp(3) + cfv(3)) * dragDirection(3)
 
-             case (cgnsCdp)
-                monLoc(mm) = monLoc(mm) + cfp(1)*dragDirection(1) &
-                     +              cfp(2)*dragDirection(2) &
-                     +              cfp(3)*dragDirection(3)
+                    case (cgnsCdp)
+                        monLoc(mm) = monLoc(mm) + cfp(1) * dragDirection(1) &
+                                     + cfp(2) * dragDirection(2) &
+                                     + cfp(3) * dragDirection(3)
 
-             case (cgnsCdv)
-                monLoc(mm) = monLoc(mm) + cfv(1)*dragDirection(1) &
-                     +              cfv(2)*dragDirection(2) &
-                     +              cfv(3)*dragDirection(3)
+                    case (cgnsCdv)
+                        monLoc(mm) = monLoc(mm) + cfv(1) * dragDirection(1) &
+                                     + cfv(2) * dragDirection(2) &
+                                     + cfv(3) * dragDirection(3)
 
-             case (cgnsCfx)
-                monLoc(mm) = monLoc(mm) + cfp(1) + cfv(1)
+                    case (cgnsCfx)
+                        monLoc(mm) = monLoc(mm) + cfp(1) + cfv(1)
 
-             case (cgnsCfy)
-                monLoc(mm) = monLoc(mm) + cfp(2) + cfv(2)
+                    case (cgnsCfy)
+                        monLoc(mm) = monLoc(mm) + cfp(2) + cfv(2)
 
-             case (cgnsCfz)
-                monLoc(mm) = monLoc(mm) + cfp(3) + cfv(3)
+                    case (cgnsCfz)
+                        monLoc(mm) = monLoc(mm) + cfp(3) + cfv(3)
 
-             case (cgnsCmx)
-                monLoc(mm) = monLoc(mm) + cmp(1) + cmv(1)
+                    case (cgnsCmx)
+                        monLoc(mm) = monLoc(mm) + cmp(1) + cmv(1)
 
-             case (cgnsCmy)
-                monLoc(mm) = monLoc(mm) + cmp(2) + cmv(2)
+                    case (cgnsCmy)
+                        monLoc(mm) = monLoc(mm) + cmp(2) + cmv(2)
 
-             case (cgnsCmz)
-                monLoc(mm) = monLoc(mm) + cmp(3) + cmv(3)
+                    case (cgnsCmz)
+                        monLoc(mm) = monLoc(mm) + cmp(3) + cmv(3)
 
-             end select
+                    end select
 
-          end do nMonitoringVarZip
-       end if
-       ! Determine the global sum of the summation monitoring
-       ! variables. This is an all reduce since every processor needs to
-       ! know the residual to make the same descisions.
+                end do nMonitoringVarZip
+            end if
+            ! Determine the global sum of the summation monitoring
+            ! variables. This is an all reduce since every processor needs to
+            ! know the residual to make the same descisions.
 
-       if(nMonSum > 0) &
-            call mpi_allreduce(monLoc, monGlob, nMonSum, adflow_real, &
-            mpi_sum, ADflow_comm_world, ierr)
+            if (nMonSum > 0) &
+                call mpi_allreduce(monLoc, monGlob, nMonSum, adflow_real, &
+                                   mpi_sum, ADflow_comm_world, ierr)
 
-       ! Idem for the maximum monitoring variables.
+            ! Idem for the maximum monitoring variables.
 #ifndef USE_COMPLEX
-       if(nMonMax > 0) &
-            call mpi_allreduce(monLoc(nMonSum+1), monGlob(nMonSum+1), &
-            nMonMax, adflow_real, mpi_max, ADflow_comm_world, ierr)
+            if (nMonMax > 0) &
+                call mpi_allreduce(monLoc(nMonSum + 1), monGlob(nMonSum + 1), &
+                                   nMonMax, adflow_real, mpi_max, ADflow_comm_world, ierr)
 #else
-       if (nMonMax < 0) &
-            monGlob(nMonSum+1) = zero
+            if (nMonMax < 0) &
+                monGlob(nMonSum + 1) = zero
 #endif
 
-       ! Write the convergence info; only processor 0 does this.
+            ! Write the convergence info; only processor 0 does this.
 
-       testRootProc: if(myID == 0) then
+            testRootProc: if (myID == 0) then
 
-          ! The variables which must always be written.
+                ! The variables which must always be written.
 
-          if(printIterations) then
-             write(*,"(1x,i6,2x)",advance="no") groundLevel
+                if (printIterations) then
+                    write (*, "(1x,i6,2x)", advance="no") groundLevel
 
-             if(equationMode == unsteady) then
+                    if (equationMode == unsteady) then
 
-                write(*,"(i6,1x)",advance="no") timeStepUnsteady + &
-                     nTimeStepsRestart
-                write(*,"(es12.5,1x)",advance="no") timeUnsteady + &
-                     timeUnsteadyRestart
+                        write (*, "(i6,1x)", advance="no") timeStepUnsteady + &
+                            nTimeStepsRestart
+                        write (*, "(es12.5,1x)", advance="no") timeUnsteady + &
+                            timeUnsteadyRestart
 
-             else if(equationMode == timeSpectral) then
+                    else if (equationMode == timeSpectral) then
 
-                write(*,"(i8,3x)",advance="no") sps
+                        write (*, "(i8,3x)", advance="no") sps
 
-             endif
+                    end if
 
-             if( writeIterations ) then
-                write(*,"(i6,1x)",advance="no") iterTot
-                write(*,"(i6,1x)",advance="no") approxTotalIts
-                write(*,"(a,1x)", advance="no") iterType
-                
-                if( storeConvInnerIter ) then
-                   solverDataArray(iterTot, sps, 1) = approxTotalIts
-                   solverTypeArray(iterTot, sps) = iterType
-                endif
-                
-                if (CFLMonitor < zero) then
-                   ! Print dashes if no cfl term is used, i.e. NK solver
-                   write(*,"(a,1x)", advance="no") "    ----  "
-                else
+                    if (writeIterations) then
+                        write (*, "(i6,1x)", advance="no") iterTot
+                        write (*, "(i6,1x)", advance="no") approxTotalIts
+                        write (*, "(a,1x)", advance="no") iterType
+
+                        if (storeConvInnerIter) then
+                            solverDataArray(iterTot, sps, 1) = approxTotalIts
+                            solverTypeArray(iterTot, sps) = iterType
+                        end if
+
+                        if (CFLMonitor < zero) then
+                            ! Print dashes if no cfl term is used, i.e. NK solver
+                            write (*, "(a,1x)", advance="no") "    ----  "
+                        else
 #ifndef USE_COMPLEX
-                   write(*,"(es10.2,1x)",advance="no") CFLMonitor
-                   if( storeConvInnerIter ) then
-                     solverDataArray(iterTot, sps, 2) = CFLMonitor
-                   endif
+                            write (*, "(es10.2,1x)", advance="no") CFLMonitor
+                            if (storeConvInnerIter) then
+                                solverDataArray(iterTot, sps, 2) = CFLMonitor
+                            end if
 #else
-                   write(*,"(es10.2,1x)",advance="no") real(CFLMonitor)
+                            write (*, "(es10.2,1x)", advance="no") real(CFLMonitor)
 #endif
-                end if
+                        end if
 
-                if (stepMonitor < zero) then
-                  ! Print dashes in the first None iteration
-                  write(*,"(a,1x)", advance="no") " ---- "
-                else
+                        if (stepMonitor < zero) then
+                            ! Print dashes in the first None iteration
+                            write (*, "(a,1x)", advance="no") " ---- "
+                        else
 #ifndef USE_COMPLEX
-                  write(*,"(f5.2,2x)",advance="no") stepMonitor
-                  if( storeConvInnerIter ) then
-                     solverDataArray(iterTot, sps, 3) = stepMonitor
-                  endif
+                            write (*, "(f5.2,2x)", advance="no") stepMonitor
+                            if (storeConvInnerIter) then
+                                solverDataArray(iterTot, sps, 3) = stepMonitor
+                            end if
 #else
-                  write(*,"(f5.2,2x)",advance="no") real(stepMonitor)
+                            write (*, "(f5.2,2x)", advance="no") real(stepMonitor)
 #endif
-                end if
+                        end if
 
-                if (linResMonitor < zero) then
-                   ! For RK/DADI just print dashes
-                   write(*,"(a,1x)", advance="no") " ----"
-                else
+                        if (linResMonitor < zero) then
+                            ! For RK/DADI just print dashes
+                            write (*, "(a,1x)", advance="no") " ----"
+                        else
 
 #ifndef USE_COMPLEX
-                   write(*,"(f5.3,1x)",advance="no") linResMonitor
-                   if( storeConvInnerIter ) then
-                     solverDataArray(iterTot, sps, 4) = linResMonitor
-                  endif
+                            write (*, "(f5.3,1x)", advance="no") linResMonitor
+                            if (storeConvInnerIter) then
+                                solverDataArray(iterTot, sps, 4) = linResMonitor
+                            end if
 #else
-                   write(*,"(f5.3,1x)",advance="no") real(linResMonitor)
+                            write (*, "(f5.3,1x)", advance="no") real(linResMonitor)
 #endif
-                end if
+                        end if
 
-                if( showCPU ) then
+                        if (showCPU) then
 #ifndef USE_COMPLEX
-                   write(*,"(es12.5,1x)",advance="no") mpi_wtime() - t0Solver
-                   if( storeConvInnerIter ) then
-                     solverDataArray(iterTot, sps, 5) = mpi_wtime() - t0Solver
-                  endif
+                            write (*, "(es12.5,1x)", advance="no") mpi_wtime() - t0Solver
+                            if (storeConvInnerIter) then
+                                solverDataArray(iterTot, sps, 5) = mpi_wtime() - t0Solver
+                            end if
 #else
-                   write(*,"(es12.5,1x)",advance="no") real(mpi_wtime() - t0Solver)
+                            write (*, "(es12.5,1x)", advance="no") real(mpi_wtime() - t0Solver)
 #endif
-                   
+
+                        end if
+                    end if
                 end if
-             end if
-          end if
-       end if testRootProc
+            end if testRootProc
 
-       ! Loop over the number of monitoring values.
-       variableLoop: do mm=1, nMon
+            ! Loop over the number of monitoring values.
+            variableLoop: do mm = 1, nMon
 
-          ! The residual variables must be corrected.
+                ! The residual variables must be corrected.
 
-          select case (monNames(mm))
+                select case (monNames(mm))
 
-          case (cgnsL2resRho,  cgnsL2resMomx,    &
-               cgnsL2resMomy, cgnsL2resMomz,    &
-               cgnsL2resRhoe, cgnsL2resNu,      &
-               cgnsL2resK,    cgnsL2resOmega,   &
-               cgnsL2resTau,  cgnsL2resEpsilon, &
-               cgnsL2resV2,   cgnsL2resF        )
+                case (cgnsL2resRho, cgnsL2resMomx, &
+                      cgnsL2resMomy, cgnsL2resMomz, &
+                      cgnsL2resRhoe, cgnsL2resNu, &
+                      cgnsL2resK, cgnsL2resOmega, &
+                      cgnsL2resTau, cgnsL2resEpsilon, &
+                      cgnsL2resV2, cgnsL2resF)
 #ifndef USE_COMPLEX
-             monGlob(mm) = sqrt(monGlob(mm)/nCellGlobal(groundLevel))
+                    monGlob(mm) = sqrt(monGlob(mm) / nCellGlobal(groundLevel))
 #else
-            ! take the square roots separately in complex mode
-            monGlob(mm) = cmplx(sqrt(real(monGlob(mm)/nCellGlobal(groundLevel))), &
-                               sqrt(aimag(monGlob(mm)/nCellGlobal(groundLevel))))
+                    ! take the square roots separately in complex mode
+                    monGlob(mm) = cmplx(sqrt(real(monGlob(mm) / nCellGlobal(groundLevel))), &
+                                        sqrt(aimag(monGlob(mm) / nCellGlobal(groundLevel))))
 #endif
 
-             if (monNames(mm) == cgnsL2resRho) then
-                rhoRes = monGlob(mm)
-             end if
-          case ('totalR')
+                    if (monNames(mm) == cgnsL2resRho) then
+                        rhoRes = monGlob(mm)
+                    end if
+                case ('totalR')
 #ifndef USE_COMPLEX
-             monGlob(mm) = sqrt(monGlob(mm))
+                    monGlob(mm) = sqrt(monGlob(mm))
 #else
-             ! take the square roots separately in complex mode
-             monGlob(mm) = cmplx(sqrt(real(monGlob(mm))), sqrt(aimag(monGlob(mm))))
+                    ! take the square roots separately in complex mode
+                    monGlob(mm) = cmplx(sqrt(real(monGlob(mm))), sqrt(aimag(monGlob(mm))))
 #endif
-             totalR = monGlob(mm)
-          end select
+                    totalR = monGlob(mm)
+                end select
 
-          if( myIsNAN(monGlob(mm)) ) nanOccurred = .true.
+                if (myIsNAN(monGlob(mm))) nanOccurred = .true.
 
-          if (myid == 0 .and. printIterations) then
-             ! Write the convergence info to stdout.
+                if (myid == 0 .and. printIterations) then
+                    ! Write the convergence info to stdout.
 #ifndef USE_COMPLEX
-             write(*,"(es24.16,1x)",advance="no") monGlob(mm)
+                    write (*, "(es24.16,1x)", advance="no") monGlob(mm)
 #else
-            select case (monNames(mm))
-
-            case (cgnsL2resRho,  cgnsL2resMomx,    &
-                  cgnsL2resMomy, cgnsL2resMomz,    &
-                  cgnsL2resRhoe, cgnsL2resNu,      &
-                  cgnsL2resK,    cgnsL2resOmega,   &
-                  cgnsL2resTau,  cgnsL2resEpsilon, &
-                  cgnsL2resV2,   cgnsL2resF, 'totalR')
-
-               ! we can do a shorter print for residuals because only the leading few digits
-               ! and the exponents are important anyways
-               write(*,'(es16.8,SP,es17.8E3,"i")',advance="no") monGlob(mm)
-
-            case default
-               ! we need to do the regular full print for functionals because they are useful
-               write(*,'(es24.16,SP,es25.16E3,"i")',advance="no") monGlob(mm)
-            end select
+                    select case (monNames(mm))
+
+                    case (cgnsL2resRho, cgnsL2resMomx, &
+                          cgnsL2resMomy, cgnsL2resMomz, &
+                          cgnsL2resRhoe, cgnsL2resNu, &
+                          cgnsL2resK, cgnsL2resOmega, &
+                          cgnsL2resTau, cgnsL2resEpsilon, &
+                          cgnsL2resV2, cgnsL2resF, 'totalR')
+
+                        ! we can do a shorter print for residuals because only the leading few digits
+                        ! and the exponents are important anyways
+                        write (*, '(es16.8,SP,es17.8E3,"i")', advance="no") monGlob(mm)
+
+                    case default
+                        ! we need to do the regular full print for functionals because they are useful
+                        write (*, '(es24.16,SP,es25.16E3,"i")', advance="no") monGlob(mm)
+                    end select
 #endif
-          end if
-
-          ! Store the convergence info in convArray, if desired.
-          if( storeConvInnerIter ) then
-             convArray(iterTot, sps, mm) = monGlob(mm)
-          endif
-       end do variableLoop
-
-       if (myid == 0 .and. printIterations) then
-          ! Write the carriage return.
-          print "(1x)"
-       end if
-
-       ! Determine whether or not the solution is converged.
-       ! A distinction must be made between unsteady mode and the
-       ! other modes, because in unsteady mode the convergence of
-       ! the inner iterations is not necessarily stored.
-
-       select case (equationMode)
-       case (steady, timeSpectral)
-
-          ! Steady or time spectral mode. The convergence histories
-          ! are stored and this info can be used. The logical
-          ! converged is set to .false. if the density residual
-          ! has not converged yet.
-
-          absConv = .False.
-          relConv = .False.
-
-          ! We make a split here based on if we are operating on a
-          ! coarse grid or the finest. On the coarse grid levels, we
-          ! l2convThisLevel refers to the relative convergence of
-          ! rhoRes.
-
-          if (currentLevel /= 1) then
-             if (rhoRes < L2ConvThisLevel * rhoResStart) then
-                relConv = .True.
-             end if
-          else
-             ! We are on the fine level. All checking is done using
-             ! the total residual.
-
-             ! Absolute convergence check
-             if (totalR < L2ConvThisLevel * totalR0) then
-                absConv = .True.
-             end if
-
-             ! Relative check only done on finest level
-             if (totalR < L2ConvRel*totalRStart) then
-                relConv = .True.
-             end if
-
-             ! If the totla number of iterations is less than the
-             ! RKReset, don't check the residual
-             if (iterTot < minIterNum .and. rkreset) then
-                relConv = .False.
-                absConv = .False.
-             end if
-          end if
+                end if
+
+                ! Store the convergence info in convArray, if desired.
+                if (storeConvInnerIter) then
+                    convArray(iterTot, sps, mm) = monGlob(mm)
+                end if
+            end do variableLoop
+
+            if (myid == 0 .and. printIterations) then
+                ! Write the carriage return.
+                print "(1x)"
+            end if
 
-          ! Combine the two flags.
-          if (absConv .or. relConv)  then
-             converged = .True.
-          end if
+            ! Determine whether or not the solution is converged.
+            ! A distinction must be made between unsteady mode and the
+            ! other modes, because in unsteady mode the convergence of
+            ! the inner iterations is not necessarily stored.
 
-          !===========================================================
+            select case (equationMode)
+            case (steady, timeSpectral)
 
-       case (unsteady)
+                ! Steady or time spectral mode. The convergence histories
+                ! are stored and this info can be used. The logical
+                ! converged is set to .false. if the density residual
+                ! has not converged yet.
 
-          ! Unsteady mode. The array convArray may not be present
-          ! and therefore something else must be done.
-          ! First determine the position in the array timeDataArray
-          ! that can be used. For the coarser grids this is 1,
-          ! because the time evolution is overwritten. For the fine
-          ! mesh the actual position is determined.
+                absConv = .False.
+                relConv = .False.
 
-          nn = 1
-          if(groundLevel == 1) &
-               nn = timeStepUnsteady + nTimeStepsRestart
-          nn = max(nn,1_intType)
+                ! We make a split here based on if we are operating on a
+                ! coarse grid or the finest. On the coarse grid levels, we
+                ! l2convThisLevel refers to the relative convergence of
+                ! rhoRes.
 
-          ! Make a distinction between the time integration
-          ! schemes.
+                if (currentLevel /= 1) then
+                    if (rhoRes < L2ConvThisLevel * rhoResStart) then
+                        relConv = .True.
+                    end if
+                else
+                    ! We are on the fine level. All checking is done using
+                    ! the total residual.
+
+                    ! Absolute convergence check
+                    if (totalR < L2ConvThisLevel * totalR0) then
+                        absConv = .True.
+                    end if
+
+                    ! Relative check only done on finest level
+                    if (totalR < L2ConvRel * totalRStart) then
+                        relConv = .True.
+                    end if
+
+                    ! If the totla number of iterations is less than the
+                    ! RKReset, don't check the residual
+                    if (iterTot < minIterNum .and. rkreset) then
+                        relConv = .False.
+                        absConv = .False.
+                    end if
+                end if
 
-          select case(timeIntegrationScheme)
+                ! Combine the two flags.
+                if (absConv .or. relConv) then
+                    converged = .True.
+                end if
 
-          case (explicitRK)
+                !===========================================================
 
-             ! Explicit scheme. Simply store the data in the
-             ! convergence arrays.
+            case (unsteady)
 
-             timeArray(nn) = timeUnsteady + timeUnsteadyRestart
+                ! Unsteady mode. The array convArray may not be present
+                ! and therefore something else must be done.
+                ! First determine the position in the array timeDataArray
+                ! that can be used. For the coarser grids this is 1,
+                ! because the time evolution is overwritten. For the fine
+                ! mesh the actual position is determined.
 
-             ! For explicit schemes the residuals are not
-             ! monitored and therefore the monitoring variables
-             ! can simply be copied.
+                nn = 1
+                if (groundLevel == 1) &
+                    nn = timeStepUnsteady + nTimeStepsRestart
+                nn = max(nn, 1_intType)
 
-             do mm=1,nMon
-                timeDataArray(nn,mm) = monGlob(mm)
-             enddo
+                ! Make a distinction between the time integration
+                ! schemes.
 
-             !=======================================================
+                select case (timeIntegrationScheme)
 
-          case (BDF,implicitRK)
+                case (explicitRK)
 
-             ! An implicit scheme is used and therefore an
-             ! iterative algorithm within every time step.
-             ! The array convArray may not be present and
-             ! therefore
-             ! something else must be done. First determine the
-             ! position in the array timeDataArray that can be
-             ! used.
-             ! For the coarser grids this is 1, because the time
-             ! evolution is overwritten. For the fine mesh the
-             ! actual position is determined.
+                    ! Explicit scheme. Simply store the data in the
+                    ! convergence arrays.
 
-             ! Determine the situation we have here.
+                    timeArray(nn) = timeUnsteady + timeUnsteadyRestart
 
-             testInitUnsteady: if(iterTot == 0) then
+                    ! For explicit schemes the residuals are not
+                    ! monitored and therefore the monitoring variables
+                    ! can simply be copied.
 
-                ! This is the initialization phase for this time step.
-                ! Simply copy monGlob into monRef, store the value
-                ! of the physical time and set converged to .false..
+                    do mm = 1, nMon
+                        timeDataArray(nn, mm) = monGlob(mm)
+                    end do
 
+                    !=======================================================
 
-                do mm=1,nMon
-                   monRef(mm) = monGlob(mm)
-                enddo
+                case (BDF, implicitRK)
 
-                timeArray(nn) = timeUnsteady + timeUnsteadyRestart
-                converged     = .false.
+                    ! An implicit scheme is used and therefore an
+                    ! iterative algorithm within every time step.
+                    ! The array convArray may not be present and
+                    ! therefore
+                    ! something else must be done. First determine the
+                    ! position in the array timeDataArray that can be
+                    ! used.
+                    ! For the coarser grids this is 1, because the time
+                    ! evolution is overwritten. For the fine mesh the
+                    ! actual position is determined.
 
-             else testInitUnsteady
+                    ! Determine the situation we have here.
 
-                ! Iteration for this time step. Store the relative
-                ! convergence for the residual compared to the start
-                ! of this time step; an absolute norm does not give
-                ! any information here. For all other monitoring
-                ! variables the current value is stored.
+                    testInitUnsteady: if (iterTot == 0) then
 
-                do mm=1,nMon
-                   select case (monNames(mm))
+                        ! This is the initialization phase for this time step.
+                        ! Simply copy monGlob into monRef, store the value
+                        ! of the physical time and set converged to .false..
 
-                   case (cgnsL2resRho,  cgnsL2resMomx,    &
-                        cgnsL2resMomy, cgnsL2resMomz,    &
-                        cgnsL2resRhoE, cgnsL2resNu,      &
-                        cgnsL2resK,    cgnsL2resOmega,   &
-                        cgnsL2resTau,  cgnsL2resEpsilon, &
-                        cgnsL2resV2,   cgnsL2resF)
+                        do mm = 1, nMon
+                            monRef(mm) = monGlob(mm)
+                        end do
 
-                      timeDataArray(nn,mm) = monGlob(mm) &
-                           / max(monRef(mm),eps)
+                        timeArray(nn) = timeUnsteady + timeUnsteadyRestart
+                        converged = .false.
 
-                      !===============================================
+                        else testInitUnsteady
 
-                   case default
+                        ! Iteration for this time step. Store the relative
+                        ! convergence for the residual compared to the start
+                        ! of this time step; an absolute norm does not give
+                        ! any information here. For all other monitoring
+                        ! variables the current value is stored.
 
+                        do mm = 1, nMon
+                            select case (monNames(mm))
 
-                      timeDataArray(nn,mm) = monGlob(mm)
+                            case (cgnsL2resRho, cgnsL2resMomx, &
+                                  cgnsL2resMomy, cgnsL2resMomz, &
+                                  cgnsL2resRhoE, cgnsL2resNu, &
+                                  cgnsL2resK, cgnsL2resOmega, &
+                                  cgnsL2resTau, cgnsL2resEpsilon, &
+                                  cgnsL2resV2, cgnsL2resF)
 
-                   end select
-                enddo
+                                timeDataArray(nn, mm) = monGlob(mm) &
+                                                        / max(monRef(mm), eps)
 
-                ! Set the logical converged to .false. if the density
-                ! residual has not converged yet.
+                                !===============================================
 
-                if(timeDataArray(nn,1) < L2ConvThisLevel) &
-                     converged = .True.
+                            case default
 
-             endif testInitUnsteady
-          end select ! temporal integration scheme
+                                timeDataArray(nn, mm) = monGlob(mm)
 
-       end select ! unsteady
+                            end select
+                        end do
 
-    end do spectralLoop
+                        ! Set the logical converged to .false. if the density
+                        ! residual has not converged yet.
 
+                        if (timeDataArray(nn, 1) < L2ConvThisLevel) &
+                            converged = .True.
 
-    if( nanOccurred )then
+                    end if testInitUnsteady
+                end select ! temporal integration scheme
 
-       if (myid == 0) then
-          print *,'Nan occured in Convergence Info on proc:',myid
-       end if
+            end select ! unsteady
 
-       routineFailed = .True.
+        end do spectralLoop
 
-       call returnFail("convergenceInfo", &
-            "A NaN occurred during the computation.")
+        if (nanOccurred) then
 
-       ! in a normal computation, code will simply exit.
-       ! in a python based computation, code will set
-       ! routinedFailed to .True. and return to the
-       ! python level...
-       return
-    endif
+            if (myid == 0) then
+                print *, 'Nan occured in Convergence Info on proc:', myid
+            end if
 
-    ! ! If we are at the max iteration limit but the residual is
-    ! ! *close*, ie within maxL2DeviationFactor we say that's fine
+            routineFailed = .True.
 
-    if(fromPython .and. groundLevel ==1 .and. approxTotalIts >= nCycles) then
+            call returnFail("convergenceInfo", &
+                            "A NaN occurred during the computation.")
 
-       !Check to see if residuals are diverging or stalled for python
-       select case (equationMode)
+            ! in a normal computation, code will simply exit.
+            ! in a python based computation, code will set
+            ! routinedFailed to .True. and return to the
+            ! python level...
+            return
+        end if
 
-       case (steady, timeSpectral)
+        ! ! If we are at the max iteration limit but the residual is
+        ! ! *close*, ie within maxL2DeviationFactor we say that's fine
 
-          ! Steady or time spectral mode. The convergence histories
-          ! are stored and this info can be used. If the residuals
-          ! are diverging the, logical routineFailed in killSignals
-          ! is set to true and the progress is halted.
-          !only check on root porcessor
-          if (myID == 0) then
+        if (fromPython .and. groundLevel == 1 .and. approxTotalIts >= nCycles) then
 
-             ! If we made it to ncycles, check to see if we're
-             ! "close" to being converged.
-             do sps = 1,nTimeIntervalsSpectral
-                if (totalR > maxL2DeviationFactor * totalR0 * L2ConvThisLevel) then
-                   routineFailed = .True.
+            !Check to see if residuals are diverging or stalled for python
+            select case (equationMode)
+
+            case (steady, timeSpectral)
+
+                ! Steady or time spectral mode. The convergence histories
+                ! are stored and this info can be used. If the residuals
+                ! are diverging the, logical routineFailed in killSignals
+                ! is set to true and the progress is halted.
+                !only check on root porcessor
+                if (myID == 0) then
+
+                    ! If we made it to ncycles, check to see if we're
+                    ! "close" to being converged.
+                    do sps = 1, nTimeIntervalsSpectral
+                        if (totalR > maxL2DeviationFactor * totalR0 * L2ConvThisLevel) then
+                            routineFailed = .True.
+                        end if
+                    end do
                 end if
-             enddo
-          endif
-          call mpi_bcast(routineFailed, 1, MPI_LOGICAL, 0, ADflow_comm_world, ierr)
+                call mpi_bcast(routineFailed, 1, MPI_LOGICAL, 0, ADflow_comm_world, ierr)
 
-       end select
-    end if
+            end select
+        end if
 
-    ! Determine whether or not the solution is considered
-    ! converged.  This info is only known at processor 0 and must
-    ! therefore be broadcast to the other processors.
+        ! Determine whether or not the solution is considered
+        ! converged.  This info is only known at processor 0 and must
+        ! therefore be broadcast to the other processors.
 
-    call mpi_bcast(converged, 1, MPI_LOGICAL, 0, ADflow_comm_world, ierr)
+        call mpi_bcast(converged, 1, MPI_LOGICAL, 0, ADflow_comm_world, ierr)
 
-  end subroutine convergenceInfo
+    end subroutine convergenceInfo
 
 end module solvers
diff --git a/src/solver/surfaceIntegrations.F90 b/src/solver/surfaceIntegrations.F90
index 860f44686..d39af2d5f 100644
--- a/src/solver/surfaceIntegrations.F90
+++ b/src/solver/surfaceIntegrations.F90
@@ -2,1588 +2,1584 @@ module surfaceIntegrations
 
 contains
 
-  subroutine getCostFunctions(globalVals, funcValues)
-
-    use constants
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use flowVarRefState, only : pRef, rhoRef, tRef, LRef, gammaInf, pInf, uRef, uInf
-    use inputPhysics, only : liftDirection, dragDirection, surfaceRef, &
-    machCoef, lengthRef, alpha, beta, liftIndex, cpmin_family, &
-    cpmin_rho
-    use inputCostFunctions, only : computeCavitation
-    use inputTSStabDeriv, only : TSstability
-    use utils, only : computeTSDerivatives
-    use flowUtils, only : getDirVector
-    implicit none
-
-    ! Input/Output
-    real(kind=realType), intent(in), dimension(:, :) :: globalVals
-    real(Kind=realType), intent(out), dimension(:) :: funcValues
-
-    ! Working
-    real(kind=realType) :: fact, factMoment, ovrNTS
-    real(kind=realType), dimension(3, nTimeIntervalsSpectral) :: force, forceP, forceV, forceM, &
-         moment, cForce, cForceP, cForceV, cForceM, cMoment, coFx, coFy, coFz
-    real(kind=realType), dimension(3) :: VcoordRef, VFreestreamRef
-    real(kind=realType) ::  mAvgPtot, mAvgTtot, mAvgRho, mAvgPs, mFlow, mAvgMn, mAvga, &
-                            mAvgVx, mAvgVy, mAvgVz, gArea, mAvgVi
-
-    real(kind=realType) ::  vdotn, mag, u, v, w
-    integer(kind=intType) :: sps
-    real(kind=realType), dimension(8):: dcdq, dcdqdot
-    real(kind=realType), dimension(8):: dcdalpha,dcdalphadot
-    real(kind=realType), dimension(8):: Coef0
-
-    ! Factor used for time-averaged quantities.
-    ovrNTS = one/nTimeIntervalsSpectral
-
-    ! Sum pressure and viscous contributions
-    force = globalvals(iFp:iFp+2, :) + globalvals(iFv:iFv+2, :) + globalvals(iFlowFm:iFlowFm+2, :)
-    forceP = globalvals(iFp:iFp+2, :)
-    forceV = globalvals(iFv:iFv+2, :)
-    forceM = globalvals(iFlowFm:iFlowFm+2, :)
-
-    coFx = globalvals(iCoForceX:iCoForceX+2, :)
-    coFy = globalvals(iCoForceY:iCoForceY+2, :)
-    coFz = globalvals(iCoForceZ:iCoForceZ+2, :)
-
-    Moment = globalvals(iMp:iMp+2, :) + globalvals(iMv:iMv+2, :) + globalvals(iFlowMm:iFlowMm+2, :)
-
-    fact = two/(gammaInf*MachCoef*MachCoef &
-         *surfaceRef*LRef*LRef*pRef)
-    cForce = fact*force
-    cForceP = fact*forceP
-    cForceV = fact*forceV
-    cForceM = fact*forceM
-
-    ! Moment factor has an extra lengthRef
-    fact = fact/(lengthRef*LRef)
-    cMoment = fact*Moment
-
-    ! Zero values since we are summing.
-    funcValues = zero
-
-    ! Here we finally assign the final function values
-    !$AD II-LOOP
-    do sps=1, nTimeIntervalsSpectral
-       funcValues(costFuncForceX) = funcValues(costFuncForceX) + ovrNTS*force(1, sps)
-       funcValues(costFuncForceY) = funcValues(costFuncForceY) + ovrNTS*force(2, sps)
-       funcValues(costFuncForceZ) = funcValues(costFuncForceZ) + ovrNTS*force(3, sps)
-
-       funcValues(costFuncForceXPressure) = funcValues(costFuncForceXPressure) + ovrNTS*forceP(1, sps)
-       funcValues(costFuncForceYPressure) = funcValues(costFuncForceYPressure) + ovrNTS*forceP(2, sps)
-       funcValues(costFuncForceZPressure) = funcValues(costFuncForceZPressure) + ovrNTS*forceP(3, sps)
-
-       funcValues(costFuncForceXViscous) = funcValues(costFuncForceXViscous) + ovrNTS*forceV(1, sps)
-       funcValues(costFuncForceYViscous) = funcValues(costFuncForceYViscous) + ovrNTS*forceV(2, sps)
-       funcValues(costFuncForceZViscous) = funcValues(costFuncForceZViscous) + ovrNTS*forceV(3, sps)
-
-       funcValues(costFuncForceXMomentum) = funcValues(costFuncForceXMomentum) + ovrNTS*forceM(1, sps)
-       funcValues(costFuncForceYMomentum) = funcValues(costFuncForceYMomentum) + ovrNTS*forceM(2, sps)
-       funcValues(costFuncForceZMomentum) = funcValues(costFuncForceZMomentum) + ovrNTS*forceM(3, sps)
-
-       ! ------------
-
-       funcValues(costFuncForceXCoef) = funcValues(costFuncForceXCoef) + ovrNTS*cForce(1, sps)
-       funcValues(costFuncForceYCoef) = funcValues(costFuncForceYCoef) + ovrNTS*cForce(2, sps)
-       funcValues(costFuncForceZCoef) = funcValues(costFuncForceZCoef) + ovrNTS*cForce(3, sps)
-
-       funcValues(costFuncForceXCoefPressure) = funcValues(costFuncForceXCoefPressure) + ovrNTS*cForceP(1, sps)
-       funcValues(costFuncForceYCoefPressure) = funcValues(costFuncForceYCoefPressure) + ovrNTS*cForceP(2, sps)
-       funcValues(costFuncForceZCoefPressure) = funcValues(costFuncForceZCoefPressure) + ovrNTS*cForceP(3, sps)
-
-       funcValues(costFuncForceXCoefViscous) = funcValues(costFuncForceXCoefViscous) + ovrNTS*cForceV(1, sps)
-       funcValues(costFuncForceYCoefViscous) = funcValues(costFuncForceYCoefViscous) + ovrNTS*cForceV(2, sps)
-       funcValues(costFuncForceZCoefViscous) = funcValues(costFuncForceZCoefViscous) + ovrNTS*cForceV(3, sps)
-
-       funcValues(costFuncForceXCoefMomentum) = funcValues(costFuncForceXCoefMomentum) + ovrNTS*cForceM(1, sps)
-       funcValues(costFuncForceYCoefMomentum) = funcValues(costFuncForceYCoefMomentum) + ovrNTS*cForceM(2, sps)
-       funcValues(costFuncForceZCoefMomentum) = funcValues(costFuncForceZCoefMomentum) + ovrNTS*cForceM(3, sps)
-
-       ! ------------
-
-       ! center of pressure (these are actually center of all forces)
-       ! protect the divisions against zero, and divide the weighed sum by the force magnitude
-       ! for this time spectral instance before we add it to the sum
-       if (force(1, sps) /= zero) then
-          coFx(:, sps) = coFx(:, sps) / force(1, sps)
-       else
-          coFx(:, sps) = zero
-       end if
-
-       if (force(2, sps) /= zero) then
-          coFy(:, sps) = coFy(:, sps) / force(2, sps)
-       else
-          coFy(:, sps) = zero
-       end if
-
-       if (force(3, sps) /= zero) then
-          coFz(:, sps) = coFz(:, sps) / force(3, sps)
-       else
-          coFz(:, sps) = zero
-       end if
-
-       ! Fx
-       funcValues(costFuncCOForceXX) = funcValues(costFuncCOForceXX) + ovrNTS*coFx(1, sps)
-       funcValues(costFuncCOForceXY) = funcValues(costFuncCOForceXY) + ovrNTS*coFx(2, sps)
-       funcValues(costFuncCOForceXZ) = funcValues(costFuncCOForceXZ) + ovrNTS*coFx(3, sps)
-
-       ! Fy
-       funcValues(costFuncCOForceYX) = funcValues(costFuncCOForceYX) + ovrNTS*coFy(1, sps)
-       funcValues(costFuncCOForceYY) = funcValues(costFuncCOForceYY) + ovrNTS*coFy(2, sps)
-       funcValues(costFuncCOForceYZ) = funcValues(costFuncCOForceYZ) + ovrNTS*coFy(3, sps)
-
-       ! Fz
-       funcValues(costFuncCOForceZX) = funcValues(costFuncCOForceZX) + ovrNTS*coFz(1, sps)
-       funcValues(costFuncCOForceZY) = funcValues(costFuncCOForceZY) + ovrNTS*coFz(2, sps)
-       funcValues(costFuncCOForceZZ) = funcValues(costFuncCOForceZZ) + ovrNTS*coFz(3, sps)
-
-       ! ------------
-
-       funcValues(costFuncMomX) = funcValues(costFuncMomX) + ovrNTS*moment(1, sps)
-       funcValues(costFuncMomY) = funcValues(costFuncMomY) + ovrNTS*moment(2, sps)
-       funcValues(costFuncMomZ) = funcValues(costFuncMomZ) + ovrNTS*moment(3, sps)
-
-       funcValues(costFuncMomXCoef) = funcValues(costFuncMomXCoef) + ovrNTS*cMoment(1, sps)
-       funcValues(costFuncMomYCoef) = funcValues(costFuncMomYCoef) + ovrNTS*cMoment(2, sps)
-       funcValues(costFuncMomZCoef) = funcValues(costFuncMomZCoef) + ovrNTS*cMoment(3, sps)
-
-       funcValues(costFuncSepSensor) = funcValues(costFuncSepSensor) + ovrNTS*globalVals(iSepSensor, sps)
-       funcValues(costFuncCavitation) = funcValues(costFuncCavitation) + ovrNTS*globalVals(iCavitation, sps)
-       ! final part of the KS computation
-       if (computeCavitation) then
-          ! only calculate the log part if we are actually computing for cavitation.
-          ! If we are not computing cavitation, the iCpMin in globalVals will be zero,
-          ! which doesn't play well with log. we just want to return zero here.
-          funcValues(costfunccpmin) = funcValues(costfunccpmin) + ovrNTS * &
-               (cpmin_family(sps) - log(globalVals(iCpMin, sps)) / cpmin_rho)
-       endif
-
-       funcValues(costFuncAxisMoment) = funcValues(costFuncAxisMoment) + ovrNTS*globalVals(iAxisMoment, sps)
-       funcValues(costFuncSepSensorAvgX) = funcValues(costFuncSepSensorAvgX) + ovrNTS*globalVals(iSepAvg  , sps)
-       funcValues(costFuncSepSensorAvgY) = funcValues(costFuncSepSensorAvgY) + ovrNTS*globalVals(iSepAvg+1, sps)
-       funcValues(costFuncSepSensorAvgZ) = funcValues(costFuncSepSensorAvgZ) + ovrNTS*globalVals(iSepAvg+2, sps)
-       funcValues(costFuncArea)    = funcValues(costFuncArea) + ovrNTS*globalVals(iArea, sps)
-       funcValues(costFuncFlowPower) = funcValues(costFuncFlowPower) + ovrNTS*globalVals(iPower, sps)
-
-       funcValues(costFuncCpError2) = funcValues(costFuncCpError2) + ovrNTS*globalVals(iCpError2,sps)
-
-       ! Mass flow like objective
-       mFlow = globalVals(iMassFlow, sps)
-       if (mFlow /= zero) then
-          mAvgPtot = globalVals(iMassPtot, sps)/mFlow
-          mAvgTtot = globalVals(iMassTtot, sps)/mFlow
-          mAvgrho   = globalVals(iMassRho, sps)/mFlow
-          mAvgPs   = globalVals(iMassPs, sps)/mFlow
-          mAvgMn   = globalVals(iMassMn, sps)/mFlow
-          mAvga   = globalVals(iMassa, sps)/mFlow
-
-          mAvgVx   = globalVals(iMassVx, sps)/mFlow
-          mAvgVy   = globalVals(iMassVy, sps)/mFlow
-          mAvgVz   = globalVals(iMassVz, sps)/mFlow
-
-          mAvgVi = (globalVals(iMassVi, sps) / mFlow)
-
-          mag = sqrt(globalVals(iMassnx, sps)**2 + &
-                    globalVals(iMassny, sps)**2 + &
-                    globalVals(iMassnz, sps)**2)
-
-       else
-          mAvgPtot = zero
-          mAvgTtot = zero
-          mAvgrho = zero
-          mAvgPs = zero
-          mAvgMn = zero
-          mAvga = zero
-          mAvgVx   = zero
-          mAvgVy   = zero
-          mAvgVz   = zero
-          mAvgVi = zero
-
-       end if
-
-       ! area averaged objectives
-       gArea = globalVals(iArea, sps)
-       if (gArea /= zero) then
-          ! area averaged pressure
-          funcValues(costFuncAAvgPTot) = funcValues(costFuncAAvgPTot) + ovrNTS*globalVals(iAreaPTot, sps) / gArea
-          funcValues(costFuncAAvgPs) = funcValues(costFuncAAvgPs) + ovrNTS*globalVals(iAreaPs, sps) / gArea
-       end if
-
-       funcValues(costFuncMdot)      = funcValues(costFuncMdot) + ovrNTS*mFlow
-       funcValues(costFuncMavgPtot ) = funcValues(costFuncMavgPtot) + ovrNTS*mAvgPtot
-       funcValues(costFuncMavgTtot)  = funcValues(costFuncMavgTtot) + ovrNTS*mAvgTtot
-       funcValues(costFuncMavgRho)  = funcValues(costFuncMavgRho) + ovrNTS*mAvgRho
-       funcValues(costFuncMavgPs)    = funcValues(costFuncMAvgPs) + ovrNTS*mAvgPs
-       funcValues(costFuncMavgMn)    = funcValues(costFuncMAvgMn) + ovrNTS*mAvgMn
-       funcValues(costFuncMavga)    = funcValues(costFuncMAvga) + ovrNTS*mAvga
-       funcValues(costfuncmavgvx)    = funcValues(costfuncmavgvx) + ovrNTS*mAvgVx
-       funcValues(costfuncmavgvy)    = funcValues(costfuncmavgvy) + ovrNTS*mAvgVy
-       funcValues(costfuncmavgvz)    = funcValues(costfuncmavgvz) + ovrNTS*mAvgVz
-       funcValues(costfuncmavgvi)    = funcValues(costfuncmavgvi) + ovrNTS*mAvgVi
-       ! Bending moment calc - also broken.
-       ! call computeRootBendingMoment(cForce, cMoment, liftIndex, bendingMoment)
-       ! funcValues(costFuncBendingCoef) = funcValues(costFuncBendingCoef) + ovrNTS*bendingMoment
-
-    end do
-
-    ! Lift and Drag (coefficients): Dot product with the lift/drag direction.
-    funcValues(costFuncLift) = &
-         funcValues(costFuncForceX)*liftDirection(1) + &
-         funcValues(costFuncForceY)*liftDirection(2) + &
-         funcValues(costFuncForceZ)*liftDirection(3)
-
-    funcValues(costFuncLiftPressure) = &
-         funcValues(costFuncForceXPressure)*liftDirection(1) + &
-         funcValues(costFuncForceYPressure)*liftDirection(2) + &
-         funcValues(costFuncForceZPressure)*liftDirection(3)
-
-    funcValues(costFuncLiftViscous) = &
-         funcValues(costFuncForceXViscous)*liftDirection(1) + &
-         funcValues(costFuncForceYViscous)*liftDirection(2) + &
-         funcValues(costFuncForceZViscous)*liftDirection(3)
-
-    funcValues(costFuncLiftMomentum) = &
-         funcValues(costFuncForceXMomentum)*liftDirection(1) + &
-         funcValues(costFuncForceYMomentum)*liftDirection(2) + &
-         funcValues(costFuncForceZMomentum)*liftDirection(3)
-
-    !-----
-
-    funcValues(costFuncDrag) = &
-         funcValues(costFuncForceX)*dragDirection(1) + &
-         funcValues(costFuncForceY)*dragDirection(2) + &
-         funcValues(costFuncForceZ)*dragDirection(3)
-
-    funcValues(costFuncDragPressure) = &
-         funcValues(costFuncForceXPressure)*dragDirection(1) + &
-         funcValues(costFuncForceYPressure)*dragDirection(2) + &
-         funcValues(costFuncForceZPressure)*dragDirection(3)
-
-    funcValues(costFuncDragViscous) = &
-         funcValues(costFuncForceXViscous)*dragDirection(1) + &
-         funcValues(costFuncForceYViscous)*dragDirection(2) + &
-         funcValues(costFuncForceZViscous)*dragDirection(3)
-
-    funcValues(costFuncDragMomentum) = &
-         funcValues(costFuncForceXMomentum)*dragDirection(1) + &
-         funcValues(costFuncForceYMomentum)*dragDirection(2) + &
-         funcValues(costFuncForceZMomentum)*dragDirection(3)
-
-    !-----
-
-    funcValues(costFuncLiftCoef) = &
-         funcValues(costFuncForceXCoef)*liftDirection(1) + &
-         funcValues(costFuncForceYCoef)*liftDirection(2) + &
-         funcValues(costFuncForceZCoef)*liftDirection(3)
-
-    funcValues(costFuncLiftCoefPressure) = &
-         funcValues(costFuncForceXCoefPressure)*liftDirection(1) + &
-         funcValues(costFuncForceYCoefPressure)*liftDirection(2) + &
-         funcValues(costFuncForceZCoefPressure)*liftDirection(3)
-
-    funcValues(costFuncLiftCoefViscous) = &
-         funcValues(costFuncForceXCoefViscous)*liftDirection(1) + &
-         funcValues(costFuncForceYCoefViscous)*liftDirection(2) + &
-         funcValues(costFuncForceZCoefViscous)*liftDirection(3)
-
-    funcValues(costFuncLiftCoefMomentum) = &
-         funcValues(costFuncForceXCoefMomentum)*liftDirection(1) + &
-         funcValues(costFuncForceYCoefMomentum)*liftDirection(2) + &
-         funcValues(costFuncForceZCoefMomentum)*liftDirection(3)
-
-    !-----
-
-    funcValues(costFuncDragCoef) = &
-         funcValues(costFuncForceXCoef)*dragDirection(1) + &
-         funcValues(costFuncForceYCoef)*dragDirection(2) + &
-         funcValues(costFuncForceZCoef)*dragDirection(3)
-
-    funcValues(costFuncDragCoefPressure) = &
-         funcValues(costFuncForceXCoefPressure)*dragDirection(1) + &
-         funcValues(costFuncForceYCoefPressure)*dragDirection(2) + &
-         funcValues(costFuncForceZCoefPressure)*dragDirection(3)
-
-    funcValues(costFuncDragCoefViscous) = &
-         funcValues(costFuncForceXCoefViscous)*dragDirection(1) + &
-         funcValues(costFuncForceYCoefViscous)*dragDirection(2) + &
-         funcValues(costFuncForceZCoefViscous)*dragDirection(3)
-
-    funcValues(costFuncDragCoefMomentum) = &
-         funcValues(costFuncForceXCoefMomentum)*dragDirection(1) + &
-         funcValues(costFuncForceYCoefMomentum)*dragDirection(2) + &
-         funcValues(costFuncForceZCoefMomentum)*dragDirection(3)
-
-    ! -------------------- Time Spectral Objectives ------------------
-
-    if (TSSTability) then
-       print *,'Error: TSStabilityDerivatives are *BROKEN*. They need to be '&
-            &'completely verifed from scratch'
-       stop
-
-       call computeTSDerivatives(force, moment, coef0, dcdalpha, &
-            dcdalphadot, dcdq, dcdqdot)
-
-       funcValues( costFuncCl0  )       = coef0(1)
-       funcValues( costFuncCd0 )        = coef0(2)
-       funcValues( costFuncCFy0 )       = coef0(4)
-       funcValues( costFuncCm0 )        = coef0(8)
-
-       funcValues( costFuncClAlpha)     = dcdalpha(1)
-       funcValues( costFuncCdAlpha)     = dcdalpha(2)
-       funcValues( costFuncCFyAlpha)    = dcdalpha(4)
-       funcValues( costFuncCmzAlpha)    = dcdalpha(8)
-
-       funcValues( costFuncClAlphaDot)     = dcdalphadot(1)
-       funcValues( costFuncCdAlphaDot)     = dcdalphadot(2)
-       funcValues( costFuncCFyAlphaDot)    = dcdalphadot(4)
-       funcValues( costFuncCmzAlphaDot)    = dcdalphadot(8)
-
-       funcValues( costFuncClq)         = dcdq(1)
-       funcValues( costFuncCdq)         = dcdq(2)
-       funcValues( costFuncCfyq)        = dcdq(4)
-       funcValues( costFuncCmzq)        = dcdq(8)
-
-       funcValues( costFuncClqDot)         = dcdqdot(1)
-       funcValues( costFuncCdqDot)         = dcdqdot(2)
-       funcValues( costFuncCfyqDot)        = dcdqdot(4)
-       funcValues( costFuncCmzqDot)        = dcdqdot(8)
-    end if
-
-  end subroutine getCostFunctions
-
-  subroutine wallIntegrationFace(localValues, mm)
-    !
-    !       wallIntegrations computes the contribution of the block
-    !       given by the pointers in blockPointers to the force and
-    !       moment of the geometry. A distinction is made
-    !       between the inviscid and viscous parts. In case the maximum
-    !       yplus value must be monitored (only possible for rans), this
-    !       value is also computed. The separation sensor and the cavita-
-    !       tion sensor is also computed
-    !       here.
-    !
-    use constants
-    use communication
-    use blockPointers
-    use flowVarRefState
-    use inputCostFunctions
-    use inputPhysics, only : MachCoef, pointRef, velDirFreeStream, &
-          equations, momentAxis, cpmin_family, cpmin_rho, cavitationnumber
-    use BCPointers
-    implicit none
-
-    ! Input/output variables
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
-    integer(kind=intType) :: mm
-
-    ! Local variables.
-    real(kind=realType), dimension(3)  :: Fp, Fv, Mp, Mv
-    real(kind=realType), dimension(3)  :: COFSumFx, COFSumFy, COFSumFz
-    real(kind=realType)  :: yplusMax, sepSensor, sepSensorAvg(3), Cavitation, cpmin_ks_sum
-    integer(kind=intType) :: i, j, ii, blk
-
-    real(kind=realType) :: pm1, fx, fy, fz, fn
-    real(kind=realType) :: xc, xco, yc, yco, zc, zco, qf(3), r(3), n(3), L
-    real(kind=realType) :: fact, rho, mul, yplus, dwall
-    real(kind=realType) :: V(3), sensor, sensor1, Cp, tmp, plocal, ks_exponent
-    real(kind=realType) :: tauXx, tauYy, tauZz
-    real(kind=realType) :: tauXy, tauXz, tauYz
-
-    real(kind=realType), dimension(3) :: refPoint
-    real(kind=realType), dimension(3,2) :: axisPoints
-    real(kind=realType) :: mx, my, mz, cellArea, m0x, m0y, m0z, Mvaxis, Mpaxis
-    real(kind=realType) :: CpError, CpError2
-
-    select case (BCFaceID(mm))
-    case (iMin, jMin, kMin)
-       fact = -one
-    case (iMax, jMax, kMax)
-       fact = one
-    end select
-
-    ! Determine the reference point for the moment computation in
-    ! meters.
-
-    refPoint(1) = LRef*pointRef(1)
-    refPoint(2) = LRef*pointRef(2)
-    refPoint(3) = LRef*pointRef(3)
-
-    ! Determine the points defining the axis about which to compute a moment
-    axisPoints(1,1) = LRef*momentAxis(1,1)
-    axisPoints(1,2) = LRef*momentAxis(1,2)
-    axisPoints(2,1) = LRef*momentAxis(2,1)
-    axisPoints(2,2) = LRef*momentAxis(2,2)
-    axisPoints(3,1) = LRef*momentAxis(3,1)
-    axisPoints(3,2) = LRef*momentAxis(3,2)
-
-    ! Initialize the force and moment coefficients to 0 as well as
-    ! yplusMax.
-
-    Fp = zero; Fv = zero;
-    Mp = zero; Mv = zero;
-    COFSumFx = zero; COFSumFy = zero; COFSumFz = zero
-    yplusMax = zero
-    sepSensor = zero
-    Cavitation = zero
-    cpmin_ks_sum = zero
-    sepSensorAvg = zero
-    Mpaxis = zero; Mvaxis = zero;
-    CpError2 = zero;
-
-    !
-    !         Integrate the inviscid contribution over the solid walls,
-    !         either inviscid or viscous. The integration is done with
-    !         cp. For closed contours this is equal to the integration
-    !         of p; for open contours this is not the case anymore.
-    !         Question is whether a force for an open contour is
-    !         meaningful anyway.
-    !
-
-
-    ! Loop over the quadrilateral faces of the subface. Note that
-    ! the nodal range of BCData must be used and not the cell
-    ! range, because the latter may include the halo's in i and
-    ! j-direction. The offset +1 is there, because inBeg and jnBeg
-    ! refer to nodal ranges and not to cell ranges. The loop
-    ! (without the AD stuff) would look like:
-    !
-    ! do j=(BCData(mm)%jnBeg+1),BCData(mm)%jnEnd
-    !    do i=(BCData(mm)%inBeg+1),BCData(mm)%inEnd
-
-    !$AD II-LOOP
-    do ii=0,(BCData(mm)%jnEnd - bcData(mm)%jnBeg)*(bcData(mm)%inEnd - bcData(mm)%inBeg) -1
-       i = mod(ii, (bcData(mm)%inEnd-bcData(mm)%inBeg)) + bcData(mm)%inBeg + 1
-       j = ii/(bcData(mm)%inEnd-bcData(mm)%inBeg) + bcData(mm)%jnBeg + 1
-
-       ! Compute the average pressure minus 1 and the coordinates
-       ! of the centroid of the face relative from from the
-       ! moment reference point. Due to the usage of pointers for
-       ! the coordinates, whose original array starts at 0, an
-       ! offset of 1 must be used. The pressure is multipled by
-       ! fact to account for the possibility of an inward or
-       ! outward pointing normal.
-
-       pm1 = fact*(half*(pp2(i,j) + pp1(i,j)) - pInf)*pRef
-
-       tmp = two/(gammaInf*pInf*MachCoef*MachCoef)
-       cp = (half*(pp2(i,j) + pp1(i,j)) - pInf)*tmp
-       CpError = (cp - BCData(mm)%CpTarget(i,j))
-       CPError2 = CpError2 + CpError*CpError
-
-       xc = fourth*(xx(i,j,  1) + xx(i+1,j,  1) &
-            +         xx(i,j+1,1) + xx(i+1,j+1,1)) - refPoint(1)
-       yc = fourth*(xx(i,j,  2) + xx(i+1,j,  2) &
-            +         xx(i,j+1,2) + xx(i+1,j+1,2)) - refPoint(2)
-       zc = fourth*(xx(i,j,  3) + xx(i+1,j,  3) &
-            +         xx(i,j+1,3) + xx(i+1,j+1,3)) - refPoint(3)
-
-       ! Compute the force components.
-       blk = max(BCData(mm)%iblank(i,j), 0)
-       fx = pm1*ssi(i,j,1)
-       fy = pm1*ssi(i,j,2)
-       fz = pm1*ssi(i,j,3)
-
-       ! Note from AY: Technically, we can just compute the moments using the center of force
-       ! terms. However, the moment computations coded here distinguish pressure,
-       ! viscous, and momentum contributions to moment. Even though these individual
-       ! contributions are not exposed to python, I still wanted to keep how it's done in the
-       ! code in case its useful in the future. This is also true for the face integrations
-
-       ! Update the inviscid force and moment coefficients. Iblank as we sum
-       Fp(1) = Fp(1) + fx*blk
-       Fp(2) = Fp(2) + fy*blk
-       Fp(3) = Fp(3) + fz*blk
-
-       mx = yc*fz - zc*fy
-       my = zc*fx - xc*fz
-       mz = xc*fy - yc*fx
-
-       Mp(1) = Mp(1) + mx*blk
-       Mp(2) = Mp(2) + my*blk
-       Mp(3) = Mp(3) + mz*blk
-
-       ! the force integral for the center of pressure computation.
-       ! We need the cell centers wrt origin
-       xco = fourth*(xx(i,j,  1) + xx(i+1,j,  1) &
-            +         xx(i,j+1,1) + xx(i+1,j+1,1))
-       yco = fourth*(xx(i,j,  2) + xx(i+1,j,  2) &
-            +         xx(i,j+1,2) + xx(i+1,j+1,2))
-       zco = fourth*(xx(i,j,  3) + xx(i+1,j,  3) &
-            +         xx(i,j+1,3) + xx(i+1,j+1,3))
-
-       ! accumulate in the sums. each force component is tracked separately
-
-       ! Force-X
-       COFSumFx(1) = COFSumFx(1) + xco * fx * blk
-       COFSumFx(2) = COFSumFx(2) + yco * fx * blk
-       COFSumFx(3) = COFSumFx(3) + zco * fx * blk
-
-       ! Force-Y
-       COFSumFy(1) = COFSumFy(1) + xco * fy * blk
-       COFSumFy(2) = COFSumFy(2) + yco * fy * blk
-       COFSumFy(3) = COFSumFy(3) + zco * fy * blk
-
-       ! Force-Z
-       COFSumFz(1) = COFSumFz(1) + xco * fz * blk
-       COFSumFz(2) = COFSumFz(2) + yco * fz * blk
-       COFSumFz(3) = COFSumFz(3) + zco * fz * blk
-
-       ! Compute the r and n vectores for the moment around an
-       ! axis computation where r is the distance from the
-       ! force to the first point on the axis and n is a unit
-       ! normal in the direction of the axis
-       r(1) = fourth*(xx(i,j,   1) + xx(i+1,j,   1)&
-            + xx(i,j+1,1) + xx(i+1,j+1,1)) - axisPoints(1,1)
-       r(2) = fourth*(xx(i,j,   2) + xx(i+1,j,   2)&
-            + xx(i,j+1,2) + xx(i+1,j+1,2)) - axisPoints(2,1)
-       r(3) = fourth*(xx(i,j,   3) + xx(i+1,j,   3)&
-            + xx(i,j+1,3) + xx(i+1,j+1,3)) - axisPoints(3,1)
-
-       L = sqrt((axisPoints(1,2) - axisPoints(1,1)) **2 &
-            + (axisPoints(2,2) - axisPoints(2,1)) **2 &
-            + (axisPoints(3,2) - axisPoints(3,1)) **2)
-
-       n(1) = (axisPoints(1,2) - axisPoints(1,1)) / L
-       n(2) = (axisPoints(2,2) - axisPoints(2,1)) / L
-       n(3) = (axisPoints(3,2) - axisPoints(3,1)) / L
-
-       ! Compute the moment of the force about the first point
-       ! used to define the axis, and the project that axis in
-       ! the n direction
-       m0x = r(2)*fz - r(3)*fy
-       m0y = r(3)*fx - r(1)*fz
-       m0z = r(1)*fy - r(2)*fx
-       Mpaxis = Mpaxis +(m0x*n(1) + m0y*n(2) + m0z*n(3))*blk
-
-       ! Save the face-based forces and area
-       bcData(mm)%Fp(i, j, 1) = fx
-       bcData(mm)%Fp(i, j, 2) = fy
-       bcData(mm)%Fp(i, j, 3) = fz
-       cellArea = sqrt(ssi(i,j,1)**2 + ssi(i,j,2)**2 + ssi(i,j,3)**2)
-
-       bcData(mm)%area(i, j) = cellArea
-
-       ! Get normalized surface velocity:
-       v(1) = ww2(i, j, ivx)
-       v(2) = ww2(i, j, ivy)
-       v(3) = ww2(i, j, ivz)
-       v = v / (sqrt(v(1)**2 + v(2)**2 + v(3)**2) + 1e-16)
-
-       ! Dot product with free stream
-       sensor = -(v(1)*velDirFreeStream(1) + v(2)*velDirFreeStream(2) + &
-            v(3)*velDirFreeStream(3))
-
-       !Now run through a smooth heaviside function:
-       sensor = one/(one + exp(-2*sepSensorSharpness*(sensor-sepSensorOffset)))
-
-       ! And integrate over the area of this cell and save, blanking as we go.
-       sensor = sensor * cellArea * blk
-       sepSensor = sepSensor + sensor
-
-       ! Also accumulate into the sepSensorAvg
-       ! x-y-zco are computed above for center of force computations
-       sepSensorAvg(1) = sepSensorAvg(1)  + sensor * xco
-       sepSensorAvg(2) = sepSensorAvg(2)  + sensor * yco
-       sepSensorAvg(3) = sepSensorAvg(3)  + sensor * zco
-
-       if (computeCavitation) then
-          plocal = pp2(i,j)
-          tmp = two/(gammaInf*MachCoef*MachCoef)
-          Cp = tmp*(plocal-pinf)
-          Sensor1 = -Cp - cavitationnumber
-          Sensor1 = (Sensor1**cavExponent)/(one+exp(2*cavSensorSharpness*(-Sensor1+cavSensorOffset)))
-          Sensor1 = Sensor1 * cellArea * blk
-          Cavitation = Cavitation + Sensor1
-
-          ! also do the ks-based cpmin computation
-          ks_exponent = exp(cpmin_rho * (-Cp + cpmin_family(spectralSol)))
-          cpmin_ks_sum = cpmin_ks_sum + ks_exponent * blk
-       end if
-    enddo
-
-    !
-    ! Integration of the viscous forces.
-    ! Only for viscous boundaries.
-    !
-    visForce: if( BCType(mm) == NSWallAdiabatic .or. &
-         BCType(mm) == NSWallIsoThermal) then
-
-       ! Initialize dwall for the laminar case and set the pointer
-       ! for the unit normals.
-
-       dwall = zero
-
-       ! Loop over the quadrilateral faces of the subface and
-       ! compute the viscous contribution to the force and
-       ! moment and update the maximum value of y+.
-
-       !$AD II-LOOP
-       do ii=0,(BCData(mm)%jnEnd - bcData(mm)%jnBeg)*(bcData(mm)%inEnd - bcData(mm)%inBeg) -1
-          i = mod(ii, (bcData(mm)%inEnd-bcData(mm)%inBeg)) + bcData(mm)%inBeg + 1
-          j = ii/(bcData(mm)%inEnd-bcData(mm)%inBeg) + bcData(mm)%jnBeg + 1
-
-          ! Store the viscous stress tensor a bit easier.
-          blk = max(BCData(mm)%iblank(i,j), 0)
-
-          tauXx = viscSubface(mm)%tau(i,j,1)
-          tauYy = viscSubface(mm)%tau(i,j,2)
-          tauZz = viscSubface(mm)%tau(i,j,3)
-          tauXy = viscSubface(mm)%tau(i,j,4)
-          tauXz = viscSubface(mm)%tau(i,j,5)
-          tauYz = viscSubface(mm)%tau(i,j,6)
-
-          ! Compute the viscous force on the face. A minus sign
-          ! is now present, due to the definition of this force.
-
-          fx = -fact*(tauXx*ssi(i,j,1) + tauXy*ssi(i,j,2) &
-               +        tauXz*ssi(i,j,3))*pRef
-          fy = -fact*(tauXy*ssi(i,j,1) + tauYy*ssi(i,j,2) &
-               +        tauYz*ssi(i,j,3))*pRef
-          fz = -fact*(tauXz*ssi(i,j,1) + tauYz*ssi(i,j,2) &
-               +        tauZz*ssi(i,j,3))*pRef
-
-          ! Compute the coordinates of the centroid of the face
-          ! relative from the moment reference point. Due to the
-          ! usage of pointers for xx and offset of 1 is present,
-          ! because x originally starts at 0.
-
-          xc = fourth*(xx(i,j,  1) + xx(i+1,j,  1) &
-               +         xx(i,j+1,1) + xx(i+1,j+1,1)) - refPoint(1)
-          yc = fourth*(xx(i,j,  2) + xx(i+1,j,  2) &
-               +         xx(i,j+1,2) + xx(i+1,j+1,2)) - refPoint(2)
-          zc = fourth*(xx(i,j,  3) + xx(i+1,j,  3) &
-               +         xx(i,j+1,3) + xx(i+1,j+1,3)) - refPoint(3)
-
-          ! Update the viscous force and moment coefficients, blanking as we go.
-
-          Fv(1) = Fv(1) + fx * blk
-          Fv(2) = Fv(2) + fy * blk
-          Fv(3) = Fv(3) + fz * blk
-
-          mx = yc*fz - zc*fy
-          my = zc*fx - xc*fz
-          mz = xc*fy - yc*fx
-
-          Mv(1) = Mv(1) + mx * blk
-          Mv(2) = Mv(2) + my * blk
-          Mv(3) = Mv(3) + mz * blk
-
-          ! the force integral for the center of pressure computation.
-          ! We need the cell centers wrt origin
-          xco = fourth*(xx(i,j,  1) + xx(i+1,j,  1) &
-               +         xx(i,j+1,1) + xx(i+1,j+1,1))
-          yco = fourth*(xx(i,j,  2) + xx(i+1,j,  2) &
-               +         xx(i,j+1,2) + xx(i+1,j+1,2))
-          zco = fourth*(xx(i,j,  3) + xx(i+1,j,  3) &
-               +         xx(i,j+1,3) + xx(i+1,j+1,3))
-
-          ! accumulate in the sums. each force component is tracked separately
-
-          ! Force-X
-          COFSumFx(1) = COFSumFx(1) + xco * fx * blk
-          COFSumFx(2) = COFSumFx(2) + yco * fx * blk
-          COFSumFx(3) = COFSumFx(3) + zco * fx * blk
-
-          ! Force-Y
-          COFSumFy(1) = COFSumFy(1) + xco * fy * blk
-          COFSumFy(2) = COFSumFy(2) + yco * fy * blk
-          COFSumFy(3) = COFSumFy(3) + zco * fy * blk
-
-          ! Force-Z
-          COFSumFz(1) = COFSumFz(1) + xco * fz * blk
-          COFSumFz(2) = COFSumFz(2) + yco * fz * blk
-          COFSumFz(3) = COFSumFz(3) + zco * fz * blk
-
-          ! Compute the r and n vectors for the moment around an
-          ! axis computation where r is the distance from the
-          ! force to the first point on the axis and n is a unit
-          ! normal in the direction of the axis
-          r(1) = fourth*(xx(i,j,   1) + xx(i+1,j,   1)&
-               + xx(i,j+1,1) + xx(i+1,j+1,1)) - axisPoints(1,1)
-          r(2) = fourth*(xx(i,j,   2) + xx(i+1,j,   2)&
-               + xx(i,j+1,2) + xx(i+1,j+1,2)) - axisPoints(2,1)
-          r(3) = fourth*(xx(i,j,   3) + xx(i+1,j,   3)&
-               + xx(i,j+1,3) + xx(i+1,j+1,3)) - axisPoints(3,1)
-
-          L = sqrt((axisPoints(1,2) - axisPoints(1,1)) ** 2 &
-               + (axisPoints(2,2) - axisPoints(2,1)) ** 2 &
-               + (axisPoints(3,2) - axisPoints(3,1)) ** 2 )
-
-          n(1) = (axisPoints(1,2) - axisPoints(1,1)) / L
-          n(2) = (axisPoints(2,2) - axisPoints(2,1)) / L
-          n(3) = (axisPoints(3,2) - axisPoints(3,1)) / L
-
-          ! Compute the moment of the force about the first point
-          ! used to define the axis, and then project that axis in
-          ! the n direction
-          m0x = r(2)*fz - r(3)*fy
-          m0y = r(3)*fx - r(1)*fz
-          m0z = r(1)*fy - r(2)*fx
-          Mvaxis = Mvaxis + (m0x*n(1) + m0y*n(2) + m0z*n(3))*blk
-
-          ! Save the face based forces for the slice operations
-          bcData(mm)%Fv(i, j, 1) = fx
-          bcData(mm)%Fv(i, j, 2) = fy
-          bcData(mm)%Fv(i, j, 3) = fz
-
-          ! Compute the tangential component of the stress tensor,
-          ! which is needed to monitor y+. The result is stored
-          ! in fx, fy, fz, although it is not really a force.
-          ! As later on only the magnitude of the tangential
-          ! component is important, there is no need to take the
-          ! sign into account (it should be a minus sign).
-
-          fx = tauXx*BCData(mm)%norm(i,j,1) + tauXy*BCData(mm)%norm(i,j,2) &
-               + tauXz*BCData(mm)%norm(i,j,3)
-          fy = tauXy*BCData(mm)%norm(i,j,1) + tauYy*BCData(mm)%norm(i,j,2) &
-               + tauYz*BCData(mm)%norm(i,j,3)
-          fz = tauXz*BCData(mm)%norm(i,j,1) + tauYz*BCData(mm)%norm(i,j,2) &
-               + tauZz*BCData(mm)%norm(i,j,3)
-
-          fn = fx*BCData(mm)%norm(i,j,1) + fy*BCData(mm)%norm(i,j,2) + fz*BCData(mm)%norm(i,j,3)
-
-          fx = fx - fn*BCData(mm)%norm(i,j,1)
-          fy = fy - fn*BCData(mm)%norm(i,j,2)
-          fz = fz - fn*BCData(mm)%norm(i,j,3)
-
-          ! Compute the local value of y+. Due to the usage
-          ! of pointers there is on offset of -1 in dd2Wall..
+    subroutine getCostFunctions(globalVals, funcValues)
+
+        use constants
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use flowVarRefState, only: pRef, rhoRef, tRef, LRef, gammaInf, pInf, uRef, uInf
+        use inputPhysics, only: liftDirection, dragDirection, surfaceRef, &
+                                machCoef, lengthRef, alpha, beta, liftIndex, cpmin_family, &
+                                cpmin_rho
+        use inputCostFunctions, only: computeCavitation
+        use inputTSStabDeriv, only: TSstability
+        use utils, only: computeTSDerivatives
+        use flowUtils, only: getDirVector
+        implicit none
+
+        ! Input/Output
+        real(kind=realType), intent(in), dimension(:, :) :: globalVals
+        real(Kind=realType), intent(out), dimension(:) :: funcValues
+
+        ! Working
+        real(kind=realType) :: fact, factMoment, ovrNTS
+        real(kind=realType), dimension(3, nTimeIntervalsSpectral) :: force, forceP, forceV, forceM, &
+                                                    moment, cForce, cForceP, cForceV, cForceM, cMoment, coFx, coFy, coFz
+        real(kind=realType), dimension(3) :: VcoordRef, VFreestreamRef
+        real(kind=realType) :: mAvgPtot, mAvgTtot, mAvgRho, mAvgPs, mFlow, mAvgMn, mAvga, &
+                               mAvgVx, mAvgVy, mAvgVz, gArea, mAvgVi
+
+        real(kind=realType) :: vdotn, mag, u, v, w
+        integer(kind=intType) :: sps
+        real(kind=realType), dimension(8) :: dcdq, dcdqdot
+        real(kind=realType), dimension(8) :: dcdalpha, dcdalphadot
+        real(kind=realType), dimension(8) :: Coef0
+
+        ! Factor used for time-averaged quantities.
+        ovrNTS = one / nTimeIntervalsSpectral
+
+        ! Sum pressure and viscous contributions
+        force = globalvals(iFp:iFp + 2, :) + globalvals(iFv:iFv + 2, :) + globalvals(iFlowFm:iFlowFm + 2, :)
+        forceP = globalvals(iFp:iFp + 2, :)
+        forceV = globalvals(iFv:iFv + 2, :)
+        forceM = globalvals(iFlowFm:iFlowFm + 2, :)
+
+        coFx = globalvals(iCoForceX:iCoForceX + 2, :)
+        coFy = globalvals(iCoForceY:iCoForceY + 2, :)
+        coFz = globalvals(iCoForceZ:iCoForceZ + 2, :)
+
+        Moment = globalvals(iMp:iMp + 2, :) + globalvals(iMv:iMv + 2, :) + globalvals(iFlowMm:iFlowMm + 2, :)
+
+        fact = two / (gammaInf * MachCoef * MachCoef &
+                      * surfaceRef * LRef * LRef * pRef)
+        cForce = fact * force
+        cForceP = fact * forceP
+        cForceV = fact * forceV
+        cForceM = fact * forceM
+
+        ! Moment factor has an extra lengthRef
+        fact = fact / (lengthRef * LRef)
+        cMoment = fact * Moment
+
+        ! Zero values since we are summing.
+        funcValues = zero
+
+        ! Here we finally assign the final function values
+        !$AD II-LOOP
+        do sps = 1, nTimeIntervalsSpectral
+            funcValues(costFuncForceX) = funcValues(costFuncForceX) + ovrNTS * force(1, sps)
+            funcValues(costFuncForceY) = funcValues(costFuncForceY) + ovrNTS * force(2, sps)
+            funcValues(costFuncForceZ) = funcValues(costFuncForceZ) + ovrNTS * force(3, sps)
+
+            funcValues(costFuncForceXPressure) = funcValues(costFuncForceXPressure) + ovrNTS * forceP(1, sps)
+            funcValues(costFuncForceYPressure) = funcValues(costFuncForceYPressure) + ovrNTS * forceP(2, sps)
+            funcValues(costFuncForceZPressure) = funcValues(costFuncForceZPressure) + ovrNTS * forceP(3, sps)
+
+            funcValues(costFuncForceXViscous) = funcValues(costFuncForceXViscous) + ovrNTS * forceV(1, sps)
+            funcValues(costFuncForceYViscous) = funcValues(costFuncForceYViscous) + ovrNTS * forceV(2, sps)
+            funcValues(costFuncForceZViscous) = funcValues(costFuncForceZViscous) + ovrNTS * forceV(3, sps)
+
+            funcValues(costFuncForceXMomentum) = funcValues(costFuncForceXMomentum) + ovrNTS * forceM(1, sps)
+            funcValues(costFuncForceYMomentum) = funcValues(costFuncForceYMomentum) + ovrNTS * forceM(2, sps)
+            funcValues(costFuncForceZMomentum) = funcValues(costFuncForceZMomentum) + ovrNTS * forceM(3, sps)
+
+            ! ------------
+
+            funcValues(costFuncForceXCoef) = funcValues(costFuncForceXCoef) + ovrNTS * cForce(1, sps)
+            funcValues(costFuncForceYCoef) = funcValues(costFuncForceYCoef) + ovrNTS * cForce(2, sps)
+            funcValues(costFuncForceZCoef) = funcValues(costFuncForceZCoef) + ovrNTS * cForce(3, sps)
+
+            funcValues(costFuncForceXCoefPressure) = funcValues(costFuncForceXCoefPressure) + ovrNTS * cForceP(1, sps)
+            funcValues(costFuncForceYCoefPressure) = funcValues(costFuncForceYCoefPressure) + ovrNTS * cForceP(2, sps)
+            funcValues(costFuncForceZCoefPressure) = funcValues(costFuncForceZCoefPressure) + ovrNTS * cForceP(3, sps)
+
+            funcValues(costFuncForceXCoefViscous) = funcValues(costFuncForceXCoefViscous) + ovrNTS * cForceV(1, sps)
+            funcValues(costFuncForceYCoefViscous) = funcValues(costFuncForceYCoefViscous) + ovrNTS * cForceV(2, sps)
+            funcValues(costFuncForceZCoefViscous) = funcValues(costFuncForceZCoefViscous) + ovrNTS * cForceV(3, sps)
+
+            funcValues(costFuncForceXCoefMomentum) = funcValues(costFuncForceXCoefMomentum) + ovrNTS * cForceM(1, sps)
+            funcValues(costFuncForceYCoefMomentum) = funcValues(costFuncForceYCoefMomentum) + ovrNTS * cForceM(2, sps)
+            funcValues(costFuncForceZCoefMomentum) = funcValues(costFuncForceZCoefMomentum) + ovrNTS * cForceM(3, sps)
+
+            ! ------------
+
+            ! center of pressure (these are actually center of all forces)
+            ! protect the divisions against zero, and divide the weighed sum by the force magnitude
+            ! for this time spectral instance before we add it to the sum
+            if (force(1, sps) /= zero) then
+                coFx(:, sps) = coFx(:, sps) / force(1, sps)
+            else
+                coFx(:, sps) = zero
+            end if
+
+            if (force(2, sps) /= zero) then
+                coFy(:, sps) = coFy(:, sps) / force(2, sps)
+            else
+                coFy(:, sps) = zero
+            end if
+
+            if (force(3, sps) /= zero) then
+                coFz(:, sps) = coFz(:, sps) / force(3, sps)
+            else
+                coFz(:, sps) = zero
+            end if
+
+            ! Fx
+            funcValues(costFuncCOForceXX) = funcValues(costFuncCOForceXX) + ovrNTS * coFx(1, sps)
+            funcValues(costFuncCOForceXY) = funcValues(costFuncCOForceXY) + ovrNTS * coFx(2, sps)
+            funcValues(costFuncCOForceXZ) = funcValues(costFuncCOForceXZ) + ovrNTS * coFx(3, sps)
+
+            ! Fy
+            funcValues(costFuncCOForceYX) = funcValues(costFuncCOForceYX) + ovrNTS * coFy(1, sps)
+            funcValues(costFuncCOForceYY) = funcValues(costFuncCOForceYY) + ovrNTS * coFy(2, sps)
+            funcValues(costFuncCOForceYZ) = funcValues(costFuncCOForceYZ) + ovrNTS * coFy(3, sps)
+
+            ! Fz
+            funcValues(costFuncCOForceZX) = funcValues(costFuncCOForceZX) + ovrNTS * coFz(1, sps)
+            funcValues(costFuncCOForceZY) = funcValues(costFuncCOForceZY) + ovrNTS * coFz(2, sps)
+            funcValues(costFuncCOForceZZ) = funcValues(costFuncCOForceZZ) + ovrNTS * coFz(3, sps)
+
+            ! ------------
+
+            funcValues(costFuncMomX) = funcValues(costFuncMomX) + ovrNTS * moment(1, sps)
+            funcValues(costFuncMomY) = funcValues(costFuncMomY) + ovrNTS * moment(2, sps)
+            funcValues(costFuncMomZ) = funcValues(costFuncMomZ) + ovrNTS * moment(3, sps)
+
+            funcValues(costFuncMomXCoef) = funcValues(costFuncMomXCoef) + ovrNTS * cMoment(1, sps)
+            funcValues(costFuncMomYCoef) = funcValues(costFuncMomYCoef) + ovrNTS * cMoment(2, sps)
+            funcValues(costFuncMomZCoef) = funcValues(costFuncMomZCoef) + ovrNTS * cMoment(3, sps)
+
+            funcValues(costFuncSepSensor) = funcValues(costFuncSepSensor) + ovrNTS * globalVals(iSepSensor, sps)
+            funcValues(costFuncCavitation) = funcValues(costFuncCavitation) + ovrNTS * globalVals(iCavitation, sps)
+            ! final part of the KS computation
+            if (computeCavitation) then
+                ! only calculate the log part if we are actually computing for cavitation.
+                ! If we are not computing cavitation, the iCpMin in globalVals will be zero,
+                ! which doesn't play well with log. we just want to return zero here.
+                funcValues(costfunccpmin) = funcValues(costfunccpmin) + ovrNTS * &
+                                            (cpmin_family(sps) - log(globalVals(iCpMin, sps)) / cpmin_rho)
+            end if
+
+            funcValues(costFuncAxisMoment) = funcValues(costFuncAxisMoment) + ovrNTS * globalVals(iAxisMoment, sps)
+            funcValues(costFuncSepSensorAvgX) = funcValues(costFuncSepSensorAvgX) + ovrNTS * globalVals(iSepAvg, sps)
+           funcValues(costFuncSepSensorAvgY) = funcValues(costFuncSepSensorAvgY) + ovrNTS * globalVals(iSepAvg + 1, sps)
+           funcValues(costFuncSepSensorAvgZ) = funcValues(costFuncSepSensorAvgZ) + ovrNTS * globalVals(iSepAvg + 2, sps)
+            funcValues(costFuncArea) = funcValues(costFuncArea) + ovrNTS * globalVals(iArea, sps)
+            funcValues(costFuncFlowPower) = funcValues(costFuncFlowPower) + ovrNTS * globalVals(iPower, sps)
+
+            funcValues(costFuncCpError2) = funcValues(costFuncCpError2) + ovrNTS * globalVals(iCpError2, sps)
+
+            ! Mass flow like objective
+            mFlow = globalVals(iMassFlow, sps)
+            if (mFlow /= zero) then
+                mAvgPtot = globalVals(iMassPtot, sps) / mFlow
+                mAvgTtot = globalVals(iMassTtot, sps) / mFlow
+                mAvgrho = globalVals(iMassRho, sps) / mFlow
+                mAvgPs = globalVals(iMassPs, sps) / mFlow
+                mAvgMn = globalVals(iMassMn, sps) / mFlow
+                mAvga = globalVals(iMassa, sps) / mFlow
+
+                mAvgVx = globalVals(iMassVx, sps) / mFlow
+                mAvgVy = globalVals(iMassVy, sps) / mFlow
+                mAvgVz = globalVals(iMassVz, sps) / mFlow
+
+                mAvgVi = (globalVals(iMassVi, sps) / mFlow)
+
+                mag = sqrt(globalVals(iMassnx, sps)**2 + &
+                           globalVals(iMassny, sps)**2 + &
+                           globalVals(iMassnz, sps)**2)
+
+            else
+                mAvgPtot = zero
+                mAvgTtot = zero
+                mAvgrho = zero
+                mAvgPs = zero
+                mAvgMn = zero
+                mAvga = zero
+                mAvgVx = zero
+                mAvgVy = zero
+                mAvgVz = zero
+                mAvgVi = zero
+
+            end if
+
+            ! area averaged objectives
+            gArea = globalVals(iArea, sps)
+            if (gArea /= zero) then
+                ! area averaged pressure
+               funcValues(costFuncAAvgPTot) = funcValues(costFuncAAvgPTot) + ovrNTS * globalVals(iAreaPTot, sps) / gArea
+                funcValues(costFuncAAvgPs) = funcValues(costFuncAAvgPs) + ovrNTS * globalVals(iAreaPs, sps) / gArea
+            end if
+
+            funcValues(costFuncMdot) = funcValues(costFuncMdot) + ovrNTS * mFlow
+            funcValues(costFuncMavgPtot) = funcValues(costFuncMavgPtot) + ovrNTS * mAvgPtot
+            funcValues(costFuncMavgTtot) = funcValues(costFuncMavgTtot) + ovrNTS * mAvgTtot
+            funcValues(costFuncMavgRho) = funcValues(costFuncMavgRho) + ovrNTS * mAvgRho
+            funcValues(costFuncMavgPs) = funcValues(costFuncMAvgPs) + ovrNTS * mAvgPs
+            funcValues(costFuncMavgMn) = funcValues(costFuncMAvgMn) + ovrNTS * mAvgMn
+            funcValues(costFuncMavga) = funcValues(costFuncMAvga) + ovrNTS * mAvga
+            funcValues(costfuncmavgvx) = funcValues(costfuncmavgvx) + ovrNTS * mAvgVx
+            funcValues(costfuncmavgvy) = funcValues(costfuncmavgvy) + ovrNTS * mAvgVy
+            funcValues(costfuncmavgvz) = funcValues(costfuncmavgvz) + ovrNTS * mAvgVz
+            funcValues(costfuncmavgvi) = funcValues(costfuncmavgvi) + ovrNTS * mAvgVi
+            ! Bending moment calc - also broken.
+            ! call computeRootBendingMoment(cForce, cMoment, liftIndex, bendingMoment)
+            ! funcValues(costFuncBendingCoef) = funcValues(costFuncBendingCoef) + ovrNTS*bendingMoment
+
+        end do
+
+        ! Lift and Drag (coefficients): Dot product with the lift/drag direction.
+        funcValues(costFuncLift) = &
+            funcValues(costFuncForceX) * liftDirection(1) + &
+            funcValues(costFuncForceY) * liftDirection(2) + &
+            funcValues(costFuncForceZ) * liftDirection(3)
+
+        funcValues(costFuncLiftPressure) = &
+            funcValues(costFuncForceXPressure) * liftDirection(1) + &
+            funcValues(costFuncForceYPressure) * liftDirection(2) + &
+            funcValues(costFuncForceZPressure) * liftDirection(3)
+
+        funcValues(costFuncLiftViscous) = &
+            funcValues(costFuncForceXViscous) * liftDirection(1) + &
+            funcValues(costFuncForceYViscous) * liftDirection(2) + &
+            funcValues(costFuncForceZViscous) * liftDirection(3)
+
+        funcValues(costFuncLiftMomentum) = &
+            funcValues(costFuncForceXMomentum) * liftDirection(1) + &
+            funcValues(costFuncForceYMomentum) * liftDirection(2) + &
+            funcValues(costFuncForceZMomentum) * liftDirection(3)
+
+        !-----
+
+        funcValues(costFuncDrag) = &
+            funcValues(costFuncForceX) * dragDirection(1) + &
+            funcValues(costFuncForceY) * dragDirection(2) + &
+            funcValues(costFuncForceZ) * dragDirection(3)
+
+        funcValues(costFuncDragPressure) = &
+            funcValues(costFuncForceXPressure) * dragDirection(1) + &
+            funcValues(costFuncForceYPressure) * dragDirection(2) + &
+            funcValues(costFuncForceZPressure) * dragDirection(3)
+
+        funcValues(costFuncDragViscous) = &
+            funcValues(costFuncForceXViscous) * dragDirection(1) + &
+            funcValues(costFuncForceYViscous) * dragDirection(2) + &
+            funcValues(costFuncForceZViscous) * dragDirection(3)
+
+        funcValues(costFuncDragMomentum) = &
+            funcValues(costFuncForceXMomentum) * dragDirection(1) + &
+            funcValues(costFuncForceYMomentum) * dragDirection(2) + &
+            funcValues(costFuncForceZMomentum) * dragDirection(3)
+
+        !-----
+
+        funcValues(costFuncLiftCoef) = &
+            funcValues(costFuncForceXCoef) * liftDirection(1) + &
+            funcValues(costFuncForceYCoef) * liftDirection(2) + &
+            funcValues(costFuncForceZCoef) * liftDirection(3)
+
+        funcValues(costFuncLiftCoefPressure) = &
+            funcValues(costFuncForceXCoefPressure) * liftDirection(1) + &
+            funcValues(costFuncForceYCoefPressure) * liftDirection(2) + &
+            funcValues(costFuncForceZCoefPressure) * liftDirection(3)
+
+        funcValues(costFuncLiftCoefViscous) = &
+            funcValues(costFuncForceXCoefViscous) * liftDirection(1) + &
+            funcValues(costFuncForceYCoefViscous) * liftDirection(2) + &
+            funcValues(costFuncForceZCoefViscous) * liftDirection(3)
+
+        funcValues(costFuncLiftCoefMomentum) = &
+            funcValues(costFuncForceXCoefMomentum) * liftDirection(1) + &
+            funcValues(costFuncForceYCoefMomentum) * liftDirection(2) + &
+            funcValues(costFuncForceZCoefMomentum) * liftDirection(3)
+
+        !-----
+
+        funcValues(costFuncDragCoef) = &
+            funcValues(costFuncForceXCoef) * dragDirection(1) + &
+            funcValues(costFuncForceYCoef) * dragDirection(2) + &
+            funcValues(costFuncForceZCoef) * dragDirection(3)
+
+        funcValues(costFuncDragCoefPressure) = &
+            funcValues(costFuncForceXCoefPressure) * dragDirection(1) + &
+            funcValues(costFuncForceYCoefPressure) * dragDirection(2) + &
+            funcValues(costFuncForceZCoefPressure) * dragDirection(3)
+
+        funcValues(costFuncDragCoefViscous) = &
+            funcValues(costFuncForceXCoefViscous) * dragDirection(1) + &
+            funcValues(costFuncForceYCoefViscous) * dragDirection(2) + &
+            funcValues(costFuncForceZCoefViscous) * dragDirection(3)
+
+        funcValues(costFuncDragCoefMomentum) = &
+            funcValues(costFuncForceXCoefMomentum) * dragDirection(1) + &
+            funcValues(costFuncForceYCoefMomentum) * dragDirection(2) + &
+            funcValues(costFuncForceZCoefMomentum) * dragDirection(3)
+
+        ! -------------------- Time Spectral Objectives ------------------
+
+        if (TSSTability) then
+            print *, 'Error: TSStabilityDerivatives are *BROKEN*. They need to be '&
+                 &'completely verifed from scratch'
+            stop
+
+            call computeTSDerivatives(force, moment, coef0, dcdalpha, &
+                                      dcdalphadot, dcdq, dcdqdot)
+
+            funcValues(costFuncCl0) = coef0(1)
+            funcValues(costFuncCd0) = coef0(2)
+            funcValues(costFuncCFy0) = coef0(4)
+            funcValues(costFuncCm0) = coef0(8)
+
+            funcValues(costFuncClAlpha) = dcdalpha(1)
+            funcValues(costFuncCdAlpha) = dcdalpha(2)
+            funcValues(costFuncCFyAlpha) = dcdalpha(4)
+            funcValues(costFuncCmzAlpha) = dcdalpha(8)
+
+            funcValues(costFuncClAlphaDot) = dcdalphadot(1)
+            funcValues(costFuncCdAlphaDot) = dcdalphadot(2)
+            funcValues(costFuncCFyAlphaDot) = dcdalphadot(4)
+            funcValues(costFuncCmzAlphaDot) = dcdalphadot(8)
+
+            funcValues(costFuncClq) = dcdq(1)
+            funcValues(costFuncCdq) = dcdq(2)
+            funcValues(costFuncCfyq) = dcdq(4)
+            funcValues(costFuncCmzq) = dcdq(8)
+
+            funcValues(costFuncClqDot) = dcdqdot(1)
+            funcValues(costFuncCdqDot) = dcdqdot(2)
+            funcValues(costFuncCfyqDot) = dcdqdot(4)
+            funcValues(costFuncCmzqDot) = dcdqdot(8)
+        end if
+
+    end subroutine getCostFunctions
+
+    subroutine wallIntegrationFace(localValues, mm)
+        !
+        !       wallIntegrations computes the contribution of the block
+        !       given by the pointers in blockPointers to the force and
+        !       moment of the geometry. A distinction is made
+        !       between the inviscid and viscous parts. In case the maximum
+        !       yplus value must be monitored (only possible for rans), this
+        !       value is also computed. The separation sensor and the cavita-
+        !       tion sensor is also computed
+        !       here.
+        !
+        use constants
+        use communication
+        use blockPointers
+        use flowVarRefState
+        use inputCostFunctions
+        use inputPhysics, only: MachCoef, pointRef, velDirFreeStream, &
+                                equations, momentAxis, cpmin_family, cpmin_rho, cavitationnumber
+        use BCPointers
+        implicit none
+
+        ! Input/output variables
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
+        integer(kind=intType) :: mm
+
+        ! Local variables.
+        real(kind=realType), dimension(3) :: Fp, Fv, Mp, Mv
+        real(kind=realType), dimension(3) :: COFSumFx, COFSumFy, COFSumFz
+        real(kind=realType) :: yplusMax, sepSensor, sepSensorAvg(3), Cavitation, cpmin_ks_sum
+        integer(kind=intType) :: i, j, ii, blk
+
+        real(kind=realType) :: pm1, fx, fy, fz, fn
+        real(kind=realType) :: xc, xco, yc, yco, zc, zco, qf(3), r(3), n(3), L
+        real(kind=realType) :: fact, rho, mul, yplus, dwall
+        real(kind=realType) :: V(3), sensor, sensor1, Cp, tmp, plocal, ks_exponent
+        real(kind=realType) :: tauXx, tauYy, tauZz
+        real(kind=realType) :: tauXy, tauXz, tauYz
+
+        real(kind=realType), dimension(3) :: refPoint
+        real(kind=realType), dimension(3, 2) :: axisPoints
+        real(kind=realType) :: mx, my, mz, cellArea, m0x, m0y, m0z, Mvaxis, Mpaxis
+        real(kind=realType) :: CpError, CpError2
+
+        select case (BCFaceID(mm))
+        case (iMin, jMin, kMin)
+            fact = -one
+        case (iMax, jMax, kMax)
+            fact = one
+        end select
+
+        ! Determine the reference point for the moment computation in
+        ! meters.
+
+        refPoint(1) = LRef * pointRef(1)
+        refPoint(2) = LRef * pointRef(2)
+        refPoint(3) = LRef * pointRef(3)
+
+        ! Determine the points defining the axis about which to compute a moment
+        axisPoints(1, 1) = LRef * momentAxis(1, 1)
+        axisPoints(1, 2) = LRef * momentAxis(1, 2)
+        axisPoints(2, 1) = LRef * momentAxis(2, 1)
+        axisPoints(2, 2) = LRef * momentAxis(2, 2)
+        axisPoints(3, 1) = LRef * momentAxis(3, 1)
+        axisPoints(3, 2) = LRef * momentAxis(3, 2)
+
+        ! Initialize the force and moment coefficients to 0 as well as
+        ! yplusMax.
+
+        Fp = zero; Fv = zero; 
+        Mp = zero; Mv = zero; 
+        COFSumFx = zero; COFSumFy = zero; COFSumFz = zero
+        yplusMax = zero
+        sepSensor = zero
+        Cavitation = zero
+        cpmin_ks_sum = zero
+        sepSensorAvg = zero
+        Mpaxis = zero; Mvaxis = zero; 
+        CpError2 = zero; 
+        !
+        !         Integrate the inviscid contribution over the solid walls,
+        !         either inviscid or viscous. The integration is done with
+        !         cp. For closed contours this is equal to the integration
+        !         of p; for open contours this is not the case anymore.
+        !         Question is whether a force for an open contour is
+        !         meaningful anyway.
+        !
+
+        ! Loop over the quadrilateral faces of the subface. Note that
+        ! the nodal range of BCData must be used and not the cell
+        ! range, because the latter may include the halo's in i and
+        ! j-direction. The offset +1 is there, because inBeg and jnBeg
+        ! refer to nodal ranges and not to cell ranges. The loop
+        ! (without the AD stuff) would look like:
+        !
+        ! do j=(BCData(mm)%jnBeg+1),BCData(mm)%jnEnd
+        !    do i=(BCData(mm)%inBeg+1),BCData(mm)%inEnd
+
+        !$AD II-LOOP
+        do ii = 0, (BCData(mm)%jnEnd - bcData(mm)%jnBeg) * (bcData(mm)%inEnd - bcData(mm)%inBeg) - 1
+            i = mod(ii, (bcData(mm)%inEnd - bcData(mm)%inBeg)) + bcData(mm)%inBeg + 1
+            j = ii / (bcData(mm)%inEnd - bcData(mm)%inBeg) + bcData(mm)%jnBeg + 1
+
+            ! Compute the average pressure minus 1 and the coordinates
+            ! of the centroid of the face relative from from the
+            ! moment reference point. Due to the usage of pointers for
+            ! the coordinates, whose original array starts at 0, an
+            ! offset of 1 must be used. The pressure is multipled by
+            ! fact to account for the possibility of an inward or
+            ! outward pointing normal.
+
+            pm1 = fact * (half * (pp2(i, j) + pp1(i, j)) - pInf) * pRef
+
+            tmp = two / (gammaInf * pInf * MachCoef * MachCoef)
+            cp = (half * (pp2(i, j) + pp1(i, j)) - pInf) * tmp
+            CpError = (cp - BCData(mm)%CpTarget(i, j))
+            CPError2 = CpError2 + CpError * CpError
+
+            xc = fourth * (xx(i, j, 1) + xx(i + 1, j, 1) &
+                           + xx(i, j + 1, 1) + xx(i + 1, j + 1, 1)) - refPoint(1)
+            yc = fourth * (xx(i, j, 2) + xx(i + 1, j, 2) &
+                           + xx(i, j + 1, 2) + xx(i + 1, j + 1, 2)) - refPoint(2)
+            zc = fourth * (xx(i, j, 3) + xx(i + 1, j, 3) &
+                           + xx(i, j + 1, 3) + xx(i + 1, j + 1, 3)) - refPoint(3)
+
+            ! Compute the force components.
+            blk = max(BCData(mm)%iblank(i, j), 0)
+            fx = pm1 * ssi(i, j, 1)
+            fy = pm1 * ssi(i, j, 2)
+            fz = pm1 * ssi(i, j, 3)
+
+            ! Note from AY: Technically, we can just compute the moments using the center of force
+            ! terms. However, the moment computations coded here distinguish pressure,
+            ! viscous, and momentum contributions to moment. Even though these individual
+            ! contributions are not exposed to python, I still wanted to keep how it's done in the
+            ! code in case its useful in the future. This is also true for the face integrations
+
+            ! Update the inviscid force and moment coefficients. Iblank as we sum
+            Fp(1) = Fp(1) + fx * blk
+            Fp(2) = Fp(2) + fy * blk
+            Fp(3) = Fp(3) + fz * blk
+
+            mx = yc * fz - zc * fy
+            my = zc * fx - xc * fz
+            mz = xc * fy - yc * fx
+
+            Mp(1) = Mp(1) + mx * blk
+            Mp(2) = Mp(2) + my * blk
+            Mp(3) = Mp(3) + mz * blk
+
+            ! the force integral for the center of pressure computation.
+            ! We need the cell centers wrt origin
+            xco = fourth * (xx(i, j, 1) + xx(i + 1, j, 1) &
+                            + xx(i, j + 1, 1) + xx(i + 1, j + 1, 1))
+            yco = fourth * (xx(i, j, 2) + xx(i + 1, j, 2) &
+                            + xx(i, j + 1, 2) + xx(i + 1, j + 1, 2))
+            zco = fourth * (xx(i, j, 3) + xx(i + 1, j, 3) &
+                            + xx(i, j + 1, 3) + xx(i + 1, j + 1, 3))
+
+            ! accumulate in the sums. each force component is tracked separately
+
+            ! Force-X
+            COFSumFx(1) = COFSumFx(1) + xco * fx * blk
+            COFSumFx(2) = COFSumFx(2) + yco * fx * blk
+            COFSumFx(3) = COFSumFx(3) + zco * fx * blk
+
+            ! Force-Y
+            COFSumFy(1) = COFSumFy(1) + xco * fy * blk
+            COFSumFy(2) = COFSumFy(2) + yco * fy * blk
+            COFSumFy(3) = COFSumFy(3) + zco * fy * blk
+
+            ! Force-Z
+            COFSumFz(1) = COFSumFz(1) + xco * fz * blk
+            COFSumFz(2) = COFSumFz(2) + yco * fz * blk
+            COFSumFz(3) = COFSumFz(3) + zco * fz * blk
+
+            ! Compute the r and n vectores for the moment around an
+            ! axis computation where r is the distance from the
+            ! force to the first point on the axis and n is a unit
+            ! normal in the direction of the axis
+            r(1) = fourth * (xx(i, j, 1) + xx(i + 1, j, 1) &
+                             + xx(i, j + 1, 1) + xx(i + 1, j + 1, 1)) - axisPoints(1, 1)
+            r(2) = fourth * (xx(i, j, 2) + xx(i + 1, j, 2) &
+                             + xx(i, j + 1, 2) + xx(i + 1, j + 1, 2)) - axisPoints(2, 1)
+            r(3) = fourth * (xx(i, j, 3) + xx(i + 1, j, 3) &
+                             + xx(i, j + 1, 3) + xx(i + 1, j + 1, 3)) - axisPoints(3, 1)
+
+            L = sqrt((axisPoints(1, 2) - axisPoints(1, 1))**2 &
+                     + (axisPoints(2, 2) - axisPoints(2, 1))**2 &
+                     + (axisPoints(3, 2) - axisPoints(3, 1))**2)
+
+            n(1) = (axisPoints(1, 2) - axisPoints(1, 1)) / L
+            n(2) = (axisPoints(2, 2) - axisPoints(2, 1)) / L
+            n(3) = (axisPoints(3, 2) - axisPoints(3, 1)) / L
+
+            ! Compute the moment of the force about the first point
+            ! used to define the axis, and the project that axis in
+            ! the n direction
+            m0x = r(2) * fz - r(3) * fy
+            m0y = r(3) * fx - r(1) * fz
+            m0z = r(1) * fy - r(2) * fx
+            Mpaxis = Mpaxis + (m0x * n(1) + m0y * n(2) + m0z * n(3)) * blk
+
+            ! Save the face-based forces and area
+            bcData(mm)%Fp(i, j, 1) = fx
+            bcData(mm)%Fp(i, j, 2) = fy
+            bcData(mm)%Fp(i, j, 3) = fz
+            cellArea = sqrt(ssi(i, j, 1)**2 + ssi(i, j, 2)**2 + ssi(i, j, 3)**2)
+
+            bcData(mm)%area(i, j) = cellArea
+
+            ! Get normalized surface velocity:
+            v(1) = ww2(i, j, ivx)
+            v(2) = ww2(i, j, ivy)
+            v(3) = ww2(i, j, ivz)
+            v = v / (sqrt(v(1)**2 + v(2)**2 + v(3)**2) + 1e-16)
+
+            ! Dot product with free stream
+            sensor = -(v(1) * velDirFreeStream(1) + v(2) * velDirFreeStream(2) + &
+                       v(3) * velDirFreeStream(3))
+
+            !Now run through a smooth heaviside function:
+            sensor = one / (one + exp(-2 * sepSensorSharpness * (sensor - sepSensorOffset)))
+
+            ! And integrate over the area of this cell and save, blanking as we go.
+            sensor = sensor * cellArea * blk
+            sepSensor = sepSensor + sensor
+
+            ! Also accumulate into the sepSensorAvg
+            ! x-y-zco are computed above for center of force computations
+            sepSensorAvg(1) = sepSensorAvg(1) + sensor * xco
+            sepSensorAvg(2) = sepSensorAvg(2) + sensor * yco
+            sepSensorAvg(3) = sepSensorAvg(3) + sensor * zco
+
+            if (computeCavitation) then
+                plocal = pp2(i, j)
+                tmp = two / (gammaInf * MachCoef * MachCoef)
+                Cp = tmp * (plocal - pinf)
+                Sensor1 = -Cp - cavitationnumber
+                Sensor1 = (Sensor1**cavExponent) / (one + exp(2 * cavSensorSharpness * (-Sensor1 + cavSensorOffset)))
+                Sensor1 = Sensor1 * cellArea * blk
+                Cavitation = Cavitation + Sensor1
+
+                ! also do the ks-based cpmin computation
+                ks_exponent = exp(cpmin_rho * (-Cp + cpmin_family(spectralSol)))
+                cpmin_ks_sum = cpmin_ks_sum + ks_exponent * blk
+            end if
+        end do
+
+        !
+        ! Integration of the viscous forces.
+        ! Only for viscous boundaries.
+        !
+        visForce: if (BCType(mm) == NSWallAdiabatic .or. &
+                      BCType(mm) == NSWallIsoThermal) then
+
+            ! Initialize dwall for the laminar case and set the pointer
+            ! for the unit normals.
+
+            dwall = zero
+
+            ! Loop over the quadrilateral faces of the subface and
+            ! compute the viscous contribution to the force and
+            ! moment and update the maximum value of y+.
+
+            !$AD II-LOOP
+            do ii = 0, (BCData(mm)%jnEnd - bcData(mm)%jnBeg) * (bcData(mm)%inEnd - bcData(mm)%inBeg) - 1
+                i = mod(ii, (bcData(mm)%inEnd - bcData(mm)%inBeg)) + bcData(mm)%inBeg + 1
+                j = ii / (bcData(mm)%inEnd - bcData(mm)%inBeg) + bcData(mm)%jnBeg + 1
+
+                ! Store the viscous stress tensor a bit easier.
+                blk = max(BCData(mm)%iblank(i, j), 0)
+
+                tauXx = viscSubface(mm)%tau(i, j, 1)
+                tauYy = viscSubface(mm)%tau(i, j, 2)
+                tauZz = viscSubface(mm)%tau(i, j, 3)
+                tauXy = viscSubface(mm)%tau(i, j, 4)
+                tauXz = viscSubface(mm)%tau(i, j, 5)
+                tauYz = viscSubface(mm)%tau(i, j, 6)
+
+                ! Compute the viscous force on the face. A minus sign
+                ! is now present, due to the definition of this force.
+
+                fx = -fact * (tauXx * ssi(i, j, 1) + tauXy * ssi(i, j, 2) &
+                              + tauXz * ssi(i, j, 3)) * pRef
+                fy = -fact * (tauXy * ssi(i, j, 1) + tauYy * ssi(i, j, 2) &
+                              + tauYz * ssi(i, j, 3)) * pRef
+                fz = -fact * (tauXz * ssi(i, j, 1) + tauYz * ssi(i, j, 2) &
+                              + tauZz * ssi(i, j, 3)) * pRef
+
+                ! Compute the coordinates of the centroid of the face
+                ! relative from the moment reference point. Due to the
+                ! usage of pointers for xx and offset of 1 is present,
+                ! because x originally starts at 0.
+
+                xc = fourth * (xx(i, j, 1) + xx(i + 1, j, 1) &
+                               + xx(i, j + 1, 1) + xx(i + 1, j + 1, 1)) - refPoint(1)
+                yc = fourth * (xx(i, j, 2) + xx(i + 1, j, 2) &
+                               + xx(i, j + 1, 2) + xx(i + 1, j + 1, 2)) - refPoint(2)
+                zc = fourth * (xx(i, j, 3) + xx(i + 1, j, 3) &
+                               + xx(i, j + 1, 3) + xx(i + 1, j + 1, 3)) - refPoint(3)
+
+                ! Update the viscous force and moment coefficients, blanking as we go.
+
+                Fv(1) = Fv(1) + fx * blk
+                Fv(2) = Fv(2) + fy * blk
+                Fv(3) = Fv(3) + fz * blk
+
+                mx = yc * fz - zc * fy
+                my = zc * fx - xc * fz
+                mz = xc * fy - yc * fx
+
+                Mv(1) = Mv(1) + mx * blk
+                Mv(2) = Mv(2) + my * blk
+                Mv(3) = Mv(3) + mz * blk
+
+                ! the force integral for the center of pressure computation.
+                ! We need the cell centers wrt origin
+                xco = fourth * (xx(i, j, 1) + xx(i + 1, j, 1) &
+                                + xx(i, j + 1, 1) + xx(i + 1, j + 1, 1))
+                yco = fourth * (xx(i, j, 2) + xx(i + 1, j, 2) &
+                                + xx(i, j + 1, 2) + xx(i + 1, j + 1, 2))
+                zco = fourth * (xx(i, j, 3) + xx(i + 1, j, 3) &
+                                + xx(i, j + 1, 3) + xx(i + 1, j + 1, 3))
+
+                ! accumulate in the sums. each force component is tracked separately
+
+                ! Force-X
+                COFSumFx(1) = COFSumFx(1) + xco * fx * blk
+                COFSumFx(2) = COFSumFx(2) + yco * fx * blk
+                COFSumFx(3) = COFSumFx(3) + zco * fx * blk
+
+                ! Force-Y
+                COFSumFy(1) = COFSumFy(1) + xco * fy * blk
+                COFSumFy(2) = COFSumFy(2) + yco * fy * blk
+                COFSumFy(3) = COFSumFy(3) + zco * fy * blk
+
+                ! Force-Z
+                COFSumFz(1) = COFSumFz(1) + xco * fz * blk
+                COFSumFz(2) = COFSumFz(2) + yco * fz * blk
+                COFSumFz(3) = COFSumFz(3) + zco * fz * blk
+
+                ! Compute the r and n vectors for the moment around an
+                ! axis computation where r is the distance from the
+                ! force to the first point on the axis and n is a unit
+                ! normal in the direction of the axis
+                r(1) = fourth * (xx(i, j, 1) + xx(i + 1, j, 1) &
+                                 + xx(i, j + 1, 1) + xx(i + 1, j + 1, 1)) - axisPoints(1, 1)
+                r(2) = fourth * (xx(i, j, 2) + xx(i + 1, j, 2) &
+                                 + xx(i, j + 1, 2) + xx(i + 1, j + 1, 2)) - axisPoints(2, 1)
+                r(3) = fourth * (xx(i, j, 3) + xx(i + 1, j, 3) &
+                                 + xx(i, j + 1, 3) + xx(i + 1, j + 1, 3)) - axisPoints(3, 1)
+
+                L = sqrt((axisPoints(1, 2) - axisPoints(1, 1))**2 &
+                         + (axisPoints(2, 2) - axisPoints(2, 1))**2 &
+                         + (axisPoints(3, 2) - axisPoints(3, 1))**2)
+
+                n(1) = (axisPoints(1, 2) - axisPoints(1, 1)) / L
+                n(2) = (axisPoints(2, 2) - axisPoints(2, 1)) / L
+                n(3) = (axisPoints(3, 2) - axisPoints(3, 1)) / L
+
+                ! Compute the moment of the force about the first point
+                ! used to define the axis, and then project that axis in
+                ! the n direction
+                m0x = r(2) * fz - r(3) * fy
+                m0y = r(3) * fx - r(1) * fz
+                m0z = r(1) * fy - r(2) * fx
+                Mvaxis = Mvaxis + (m0x * n(1) + m0y * n(2) + m0z * n(3)) * blk
+
+                ! Save the face based forces for the slice operations
+                bcData(mm)%Fv(i, j, 1) = fx
+                bcData(mm)%Fv(i, j, 2) = fy
+                bcData(mm)%Fv(i, j, 3) = fz
+
+                ! Compute the tangential component of the stress tensor,
+                ! which is needed to monitor y+. The result is stored
+                ! in fx, fy, fz, although it is not really a force.
+                ! As later on only the magnitude of the tangential
+                ! component is important, there is no need to take the
+                ! sign into account (it should be a minus sign).
+
+                fx = tauXx * BCData(mm)%norm(i, j, 1) + tauXy * BCData(mm)%norm(i, j, 2) &
+                     + tauXz * BCData(mm)%norm(i, j, 3)
+                fy = tauXy * BCData(mm)%norm(i, j, 1) + tauYy * BCData(mm)%norm(i, j, 2) &
+                     + tauYz * BCData(mm)%norm(i, j, 3)
+                fz = tauXz * BCData(mm)%norm(i, j, 1) + tauYz * BCData(mm)%norm(i, j, 2) &
+                     + tauZz * BCData(mm)%norm(i, j, 3)
+
+                fn = fx * BCData(mm)%norm(i, j, 1) + fy * BCData(mm)%norm(i, j, 2) + fz * BCData(mm)%norm(i, j, 3)
+
+                fx = fx - fn * BCData(mm)%norm(i, j, 1)
+                fy = fy - fn * BCData(mm)%norm(i, j, 2)
+                fz = fz - fn * BCData(mm)%norm(i, j, 3)
+
+                ! Compute the local value of y+. Due to the usage
+                ! of pointers there is on offset of -1 in dd2Wall..
 #ifndef USE_TAPENADE
-          if(equations == RANSEquations) then
-             dwall = dd2Wall(i-1,j-1)
-             rho   = half*(ww2(i,j,irho) + ww1(i,j,irho))
-             mul   = half*(rlv2(i,j) + rlv1(i,j))
-             yplus = sqrt(rho*sqrt(fx*fx + fy*fy + fz*fz))*dwall/mul
+                if (equations == RANSEquations) then
+                    dwall = dd2Wall(i - 1, j - 1)
+                    rho = half * (ww2(i, j, irho) + ww1(i, j, irho))
+                    mul = half * (rlv2(i, j) + rlv1(i, j))
+                    yplus = sqrt(rho * sqrt(fx * fx + fy * fy + fz * fz)) * dwall / mul
 
-             ! Store this value if this value is larger than the
-             ! currently stored value. Blank non-active cells.
+                    ! Store this value if this value is larger than the
+                    ! currently stored value. Blank non-active cells.
 
-             yplusMax = max(yplusMax, yplus*blk)
-          end if
+                    yplusMax = max(yplusMax, yplus * blk)
+                end if
 #endif
-       enddo
-    else
-       ! If we had no viscous force, set the viscous component to zero
-       bcData(mm)%Fv = zero
-    end if visForce
-
-    ! Increment the local values array with the values we computed here.
-    localValues(iFp:iFp+2) = localValues(iFp:iFp+2) + Fp
-    localValues(iFv:iFv+2) = localValues(iFv:iFv+2) + Fv
-    localValues(iMp:iMp+2) = localValues(iMp:iMp+2) + Mp
-    localValues(iMv:iMv+2) = localValues(iMv:iMv+2) + Mv
-    localValues(iCoForceX:iCoForceX+2) = localValues(iCoForceX:iCoForceX+2) + COFSumFx
-    localValues(iCoForceY:iCoForceY+2) = localValues(iCoForceY:iCoForceY+2) + COFSumFy
-    localValues(iCoForceZ:iCoForceZ+2) = localValues(iCoForceZ:iCoForceZ+2) + COFSumFz
-    localValues(iSepSensor) = localValues(iSepSensor) + sepSensor
-    localValues(iCavitation) = localValues(iCavitation) + cavitation
-    localValues(iCpMin) = localValues(iCpMin) + cpmin_ks_sum
-    localValues(iSepAvg:iSepAvg+2) = localValues(iSepAvg:iSepAvg+2) + sepSensorAvg
-    localValues(iAxisMoment) = localValues(iAxisMoment) + Mpaxis + Mvaxis
-    localValues(iCpError2) = localValues(iCpError2) + CpError2
+            end do
+        else
+            ! If we had no viscous force, set the viscous component to zero
+            bcData(mm)%Fv = zero
+        end if visForce
+
+        ! Increment the local values array with the values we computed here.
+        localValues(iFp:iFp + 2) = localValues(iFp:iFp + 2) + Fp
+        localValues(iFv:iFv + 2) = localValues(iFv:iFv + 2) + Fv
+        localValues(iMp:iMp + 2) = localValues(iMp:iMp + 2) + Mp
+        localValues(iMv:iMv + 2) = localValues(iMv:iMv + 2) + Mv
+        localValues(iCoForceX:iCoForceX + 2) = localValues(iCoForceX:iCoForceX + 2) + COFSumFx
+        localValues(iCoForceY:iCoForceY + 2) = localValues(iCoForceY:iCoForceY + 2) + COFSumFy
+        localValues(iCoForceZ:iCoForceZ + 2) = localValues(iCoForceZ:iCoForceZ + 2) + COFSumFz
+        localValues(iSepSensor) = localValues(iSepSensor) + sepSensor
+        localValues(iCavitation) = localValues(iCavitation) + cavitation
+        localValues(iCpMin) = localValues(iCpMin) + cpmin_ks_sum
+        localValues(iSepAvg:iSepAvg + 2) = localValues(iSepAvg:iSepAvg + 2) + sepSensorAvg
+        localValues(iAxisMoment) = localValues(iAxisMoment) + Mpaxis + Mvaxis
+        localValues(iCpError2) = localValues(iCpError2) + CpError2
 
 #ifndef USE_TAPENADE
-    localValues(iyPlus) = max(localValues(iyPlus), yplusMax)
+        localValues(iyPlus) = max(localValues(iyPlus), yplusMax)
 #endif
-  end subroutine wallIntegrationFace
-
-  subroutine flowIntegrationFace(isInflow, localValues, mm)
-
-    use constants
-    use blockPointers, only : BCType, BCFaceID, BCData, addGridVelocities
-    use flowVarRefState, only : pRef, pInf, rhoRef, timeRef, LRef, TRef, RGas, uRef, uInf, rhoInf, gammaInf
-    use inputPhysics, only : pointRef, flowType, rGasDim
-    use flowUtils, only : computePtot, computeTtot
-    use BCPointers, only : ssi, sFace, ww1, ww2, pp1, pp2, xx, gamma1, gamma2
-    use utils, only : mynorm2
-    implicit none
-
-    ! Input/Output variables
-    logical, intent(in) :: isInflow
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
-    integer(kind=intType), intent(in) :: mm
-
-    ! Local variables
-    real(kind=realType) ::  massFlowRate, mass_Ptot, mass_Ttot, mass_Ps, mass_MN, mass_a, mass_rho, &
-                            mass_Vx, mass_Vy, mass_Vz, mass_nx, mass_ny, mass_nz, mass_Vi
-    real(kind=realType) ::  area_Ptot, area_Ps
-    real(kind=realType) ::  govgm1, gm1ovg, viConst, viLocal, pratio
-    real(kind=realType) ::  mReDim
-    integer(kind=intType) :: i, j, ii, blk
-    real(kind=realType) :: internalFlowFact, inFlowFact, fact, xc, xco, yc, yco, zc, zco, mx, my, mz
-    real(kind=realType) :: sF, vmag, vnm, vnmFreeStreamRef, vxm, vym, vzm, Fx, Fy, Fz, u, v, w
-    real(kind=realType) :: pm, Ptot, Ttot, rhom, gammam, am
-    real(kind=realType) :: area, cellArea, overCellArea
-    real(kind=realType), dimension(3) :: Fp, Mp, FMom, MMom, refPoint, sFaceCoordRef
-    real(kind=realType), dimension(3)  :: COFSumFx, COFSumFy, COFSumFz
-    real(kind=realType) :: MNm, massFlowRateLocal
-
-    refPoint(1) = LRef*pointRef(1)
-    refPoint(2) = LRef*pointRef(2)
-    refPoint(3) = LRef*pointRef(3)
-
-    ! Note that these are *opposite* of force integrations. The reason
-    ! is that we want positive mass flow into the domain and negative
-    ! mass flow out of the domain. Since the low faces have ssi
-    ! vectors pointining into the domain, this is correct. The high
-    ! end faces need to flip this.
-    select case (BCFaceID(mm))
-    case (iMin, jMin, kMin)
-       fact = one
-    case (iMax, jMax, kMax)
-       fact = -one
-    end select
-
-    ! the sign of momentum forces are flipped for internal flows
-    internalFlowFact = one
-    if (flowType == internalFlow) then
-      internalFlowFact = -one
-    end if
-
-    inFlowFact = one
-    if (isInflow) then
-      inflowFact= -one
-    end if
-
-    ! Loop over the quadrilateral faces of the subface. Note that
-    ! the nodal range of BCData must be used and not the cell
-    ! range, because the latter may include the halo's in i and
-    ! j-direction. The offset +1 is there, because inBeg and jnBeg
-    ! refer to nodal ranges and not to cell ranges. The loop
-    ! (without the AD stuff) would look like:
-    !
-    ! do j=(BCData(mm)%jnBeg+1),BCData(mm)%jnEnd
-    !    do i=(BCData(mm)%inBeg+1),BCData(mm)%inEnd
-
-    mReDim = sqrt(pRef*rhoRef)
-    Fp = zero
-    Mp = zero
-    FMom = zero
-    MMom = zero
-
-    COFSumFx = zero
-    COFSumFy = zero
-    COFSumFz = zero
-
-    massFlowRate = zero
-    area = zero
-    mass_Ptot = zero
-    mass_Ttot = zero
-    mass_Ps = zero
-    mass_MN = zero
-    mass_a = zero
-    mass_rho = zero
-
-    mass_Vx = zero
-    mass_Vy = zero
-    mass_Vz = zero
-    mass_nx = zero
-    mass_ny = zero
-    mass_nz = zero
-    mass_vi = zero
-
-    area_Ptot = zero
-    area_Ps   = zero
-
-    !$AD II-LOOP
-    do ii=0,(BCData(mm)%jnEnd - bcData(mm)%jnBeg)*(bcData(mm)%inEnd - bcData(mm)%inBeg) -1
-      i = mod(ii, (bcData(mm)%inEnd-bcData(mm)%inBeg)) + bcData(mm)%inBeg + 1
-      j = ii/(bcData(mm)%inEnd-bcData(mm)%inBeg) + bcData(mm)%jnBeg + 1
-
-      if( addGridVelocities ) then
-        sF = sFace(i,j)
-      else
-        sF = zero
-      end if
-
-      ! Compute the force components.
-      blk = max(BCData(mm)%iblank(i,j), 0) ! iBlank forces for overset stuff
-
-      vxm = half*(ww1(i,j,ivx) + ww2(i,j,ivx))
-      vym = half*(ww1(i,j,ivy) + ww2(i,j,ivy))
-      vzm = half*(ww1(i,j,ivz) + ww2(i,j,ivz))
-      rhom = half*(ww1(i,j,irho) + ww2(i,j,irho))
-      pm = half*(pp1(i,j)+ pp2(i,j))
-      gammam = half*(gamma1(i,j) + gamma2(i,j))
-
-      vnm = vxm*ssi(i,j,1) + vym*ssi(i,j,2) + vzm*ssi(i,j,3)  - sF
-      vmag = sqrt((vxm**2 + vym**2 + vzm**2)) - sF
-      am = sqrt(gammam*pm/rhom)
-      MNm = vmag/am
-
-      cellArea = sqrt(ssi(i,j,1)**2 + ssi(i,j,2)**2 + ssi(i,j,3)**2)
-      area = area + cellArea*blk
-      overCellArea = 1/cellArea
-
-      call computePtot(rhom, vxm, vym, vzm, pm, Ptot)
-      call computeTtot(rhom, vxm, vym, vzm, pm, Ttot)
-
-      massFlowRateLocal = rhom*vnm*blk*fact*mReDim
-
-      massFlowRate = massFlowRate + massFlowRateLocal
-
-      ! re-dimentionalize quantities
-      pm = pm*pRef
-
-      mass_Ptot = mass_pTot + Ptot * massFlowRateLocal * Pref
-      mass_Ttot = mass_Ttot + Ttot * massFlowRateLocal * Tref
-      mass_rho  = mass_rho + rhom * massFlowRateLocal * rhoRef
-      mass_a  = mass_a + am * massFlowRateLocal * uRef
-
-      mass_Ps = mass_Ps + pm*massFlowRateLocal
-      mass_MN = mass_MN + MNm*massFlowRateLocal
-
-      area_pTot = area_pTot + Ptot * Pref * cellArea * blk
-      area_Ps = area_Ps + pm * cellArea * blk
-
-      sFaceCoordRef(1) = sF * ssi(i,j,1)*overCellArea
-      sFaceCoordRef(2) = sF * ssi(i,j,2)*overCellArea
-      sFaceCoordRef(3) = sF * ssi(i,j,3)*overCellArea
-
-      mass_Vx = mass_Vx + (vxm*uRef - sFaceCoordRef(1)) *massFlowRateLocal
-      mass_Vy = mass_Vy + (vym*uRef - sFaceCoordRef(2)) *massFlowRateLocal
-      mass_Vz = mass_Vz + (vzm*uRef - sFaceCoordRef(3)) *massFlowRateLocal
-
-      govgm1 = gammaInf/(gammaInf-one)
-      gm1ovg = one/govgm1
-      viConst = two * govgm1 * rGasDim
-      ! the prefs in psinf / ptot cancel out so we can just take the ratio
-      ! we need to clip the ratio to stay under one. right next to the wall,
-      ! the pTot can go below the static free stream pressure. To prevent
-      ! nans from the sqrt, we just clip this. This does not affect the computation
-      ! because when pTot is this small, the velocities are also small, and the
-      ! mdot is almost zero, so the cells in this area don't contribute much
-      ! to the mass weighed sum.
-      pratio = min(one, one / pTot)
-      viLocal = sqrt(viConst * (one - (pratio) ** gm1ovg) * Ttot * Tref)
-      mass_vi = mass_vi + viLocal * massFlowRateLocal
-
-      mass_nx = mass_nx + ssi(i,j,1)*overCellArea * massFlowRateLocal
-      mass_ny = mass_ny + ssi(i,j,2)*overCellArea * massFlowRateLocal
-      mass_nz = mass_nz + ssi(i,j,3)*overCellArea * massFlowRateLocal
-
-      xc = fourth*(xx(i,j,  1) + xx(i+1,j,  1) &
-          +         xx(i,j+1,1) + xx(i+1,j+1,1)) - refPoint(1)
-      yc = fourth*(xx(i,j,  2) + xx(i+1,j,  2) &
-          +         xx(i,j+1,2) + xx(i+1,j+1,2)) - refPoint(2)
-      zc = fourth*(xx(i,j,  3) + xx(i+1,j,  3) &
-          +         xx(i,j+1,3) + xx(i+1,j+1,3)) - refPoint(3)
-
-      ! Pressure forces. Note that these need a *negative* and to subtract
-      ! the reference pressure sign to be consistent with the force
-      ! computation on the walls.
-      pm = -(pm-pInf*pRef)*fact*blk
-
-      fx = pm*ssi(i,j,1)
-      fy = pm*ssi(i,j,2)
-      fz = pm*ssi(i,j,3)
-
-      ! Update the pressure force and moment coefficients.
-      Fp(1) = Fp(1) + fx
-      Fp(2) = Fp(2) + fy
-      Fp(3) = Fp(3) + fz
-
-      mx = yc*fz - zc*fy
-      my = zc*fx - xc*fz
-      mz = xc*fy - yc*fx
-
-      Mp(1) = Mp(1) + mx
-      Mp(2) = Mp(2) + my
-      Mp(3) = Mp(3) + mz
-
-      ! the force integral for the center of pressure computation.
-      ! We need the cell centers wrt origin
-      xco = fourth*(xx(i,j,  1) + xx(i+1,j,  1) &
-           +         xx(i,j+1,1) + xx(i+1,j+1,1))
-      yco = fourth*(xx(i,j,  2) + xx(i+1,j,  2) &
-           +         xx(i,j+1,2) + xx(i+1,j+1,2))
-      zco = fourth*(xx(i,j,  3) + xx(i+1,j,  3) &
-           +         xx(i,j+1,3) + xx(i+1,j+1,3))
-
-      ! Center of force computations. Here we accumulate in the sums.
-      ! accumulate in the sums. each force component is tracked separately
-      ! blanking is included in the mdot multiplier for the force.
-
-      ! Force-X
-      COFSumFx(1) = COFSumFx(1) + xco * fx
-      COFSumFx(2) = COFSumFx(2) + yco * fx
-      COFSumFx(3) = COFSumFx(3) + zco * fx
-
-      ! Force-Y
-      COFSumFy(1) = COFSumFy(1) + xco * fy
-      COFSumFy(2) = COFSumFy(2) + yco * fy
-      COFSumFy(3) = COFSumFy(3) + zco * fy
-
-      ! Force-Z
-      COFSumFz(1) = COFSumFz(1) + xco * fz
-      COFSumFz(2) = COFSumFz(2) + yco * fz
-      COFSumFz(3) = COFSumFz(3) + zco * fz
-
-      ! Momentum forces are a little tricky.  We negate because
-      ! have to re-apply fact to massFlowRateLocal to undoo it, because
-      ! we need the signed behavior of ssi to get the momentum forces correct.
-      ! Also, the sign is flipped between inflow and outflow types
-
-      massFlowRateLocal = massFlowRateLocal*fact/timeRef*blk/cellArea*internalFlowFact*inFlowFact
-
-      fx = massFlowRateLocal * ssi(i,j,1)*vxm
-      fy = massFlowRateLocal * ssi(i,j,2)*vym
-      fz = massFlowRateLocal * ssi(i,j,3)*vzm
-
-      FMom(1) = FMom(1) + fx
-      FMom(2) = FMom(2) + fy
-      FMom(3) = FMom(3) + fz
-
-      mx = yc*fz - zc*fy
-      my = zc*fx - xc*fz
-      mz = xc*fy - yc*fx
-
-      MMom(1) = MMom(1) + mx
-      MMom(2) = MMom(2) + my
-      MMom(3) = MMom(3) + mz
-
-      ! Center of force computations. Here we accumulate in the sums.
-      ! each force component is tracked separately
-      ! blanking is included in the mdot multiplier for the force.
-
-      ! Force-X
-      COFSumFx(1) = COFSumFx(1) + xco * fx
-      COFSumFx(2) = COFSumFx(2) + yco * fx
-      COFSumFx(3) = COFSumFx(3) + zco * fx
-
-      ! Force-Y
-      COFSumFy(1) = COFSumFy(1) + xco * fy
-      COFSumFy(2) = COFSumFy(2) + yco * fy
-      COFSumFy(3) = COFSumFy(3) + zco * fy
-
-      ! Force-Z
-      COFSumFz(1) = COFSumFz(1) + xco * fz
-      COFSumFz(2) = COFSumFz(2) + yco * fz
-      COFSumFz(3) = COFSumFz(3) + zco * fz
-
-    enddo
-
-    ! Increment the local values array with what we computed here
-    localValues(iMassFlow) = localValues(iMassFlow) + massFlowRate
-    localValues(iArea) = localValues(iArea) + area
-    localValues(iMassRho) = localValues(iMassRho) + mass_rho
-    localValues(iMassa) = localValues(iMassa) + mass_a
-    localValues(iMassPtot) = localValues(iMassPtot) + mass_Ptot
-    localValues(iMassTtot) = localValues(iMassTtot) + mass_Ttot
-    localValues(iMassPs)   = localValues(iMassPs)   + mass_Ps
-    localValues(iMassMN)   = localValues(iMassMN)   + mass_MN
-    localValues(iFp:iFp+2)   = localValues(iFp:iFp+2) + Fp
-    localValues(iFlowFm:iFlowFm+2)   = localValues(iFlowFm:iFlowFm+2) + FMom
-    localValues(iFlowMp:iFlowMp+2)   = localValues(iFlowMp:iFlowMp+2) + Mp
-    localValues(iFlowMm:iFlowMm+2)   = localValues(iFlowMm:iFlowMm+2) + MMom
-
-    localValues(iCoForceX:iCoForceX+2) = localValues(iCoForceX:iCoForceX+2) + COFSumFx
-    localValues(iCoForceY:iCoForceY+2) = localValues(iCoForceY:iCoForceY+2) + COFSumFy
-    localValues(iCoForceZ:iCoForceZ+2) = localValues(iCoForceZ:iCoForceZ+2) + COFSumFz
-
-    localValues(iAreaPTot) = localValues(iAreaPTot) + area_pTot
-    localValues(iAreaPs) = localValues(iAreaPs) + area_Ps
-
-    localValues(iMassVx)   = localValues(iMassVx)   + mass_Vx
-    localValues(iMassVy)   = localValues(iMassVy)   + mass_Vy
-    localValues(iMassVz)   = localValues(iMassVz)   + mass_Vz
-    localValues(iMassnx)   = localValues(iMassnx)   + mass_nx
-    localValues(iMassny)   = localValues(iMassny)   + mass_ny
-    localValues(iMassnz)   = localValues(iMassnz)   + mass_nz
-
-    localValues(iMassVi) = localValues(iMassVi) + mass_Vi
-
-  end subroutine flowIntegrationFace
-
-
-  ! ----------------------------------------------------------------------
-  !                                                                      |
-  !                    No Tapenade Routine below this line               |
-  !                                                                      |
-  ! ----------------------------------------------------------------------
+    end subroutine wallIntegrationFace
+
+    subroutine flowIntegrationFace(isInflow, localValues, mm)
+
+        use constants
+        use blockPointers, only: BCType, BCFaceID, BCData, addGridVelocities
+        use flowVarRefState, only: pRef, pInf, rhoRef, timeRef, LRef, TRef, RGas, uRef, uInf, rhoInf, gammaInf
+        use inputPhysics, only: pointRef, flowType, rGasDim
+        use flowUtils, only: computePtot, computeTtot
+        use BCPointers, only: ssi, sFace, ww1, ww2, pp1, pp2, xx, gamma1, gamma2
+        use utils, only: mynorm2
+        implicit none
+
+        ! Input/Output variables
+        logical, intent(in) :: isInflow
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
+        integer(kind=intType), intent(in) :: mm
+
+        ! Local variables
+        real(kind=realType) :: massFlowRate, mass_Ptot, mass_Ttot, mass_Ps, mass_MN, mass_a, mass_rho, &
+                               mass_Vx, mass_Vy, mass_Vz, mass_nx, mass_ny, mass_nz, mass_Vi
+        real(kind=realType) :: area_Ptot, area_Ps
+        real(kind=realType) :: govgm1, gm1ovg, viConst, viLocal, pratio
+        real(kind=realType) :: mReDim
+        integer(kind=intType) :: i, j, ii, blk
+        real(kind=realType) :: internalFlowFact, inFlowFact, fact, xc, xco, yc, yco, zc, zco, mx, my, mz
+        real(kind=realType) :: sF, vmag, vnm, vnmFreeStreamRef, vxm, vym, vzm, Fx, Fy, Fz, u, v, w
+        real(kind=realType) :: pm, Ptot, Ttot, rhom, gammam, am
+        real(kind=realType) :: area, cellArea, overCellArea
+        real(kind=realType), dimension(3) :: Fp, Mp, FMom, MMom, refPoint, sFaceCoordRef
+        real(kind=realType), dimension(3) :: COFSumFx, COFSumFy, COFSumFz
+        real(kind=realType) :: MNm, massFlowRateLocal
+
+        refPoint(1) = LRef * pointRef(1)
+        refPoint(2) = LRef * pointRef(2)
+        refPoint(3) = LRef * pointRef(3)
+
+        ! Note that these are *opposite* of force integrations. The reason
+        ! is that we want positive mass flow into the domain and negative
+        ! mass flow out of the domain. Since the low faces have ssi
+        ! vectors pointining into the domain, this is correct. The high
+        ! end faces need to flip this.
+        select case (BCFaceID(mm))
+        case (iMin, jMin, kMin)
+            fact = one
+        case (iMax, jMax, kMax)
+            fact = -one
+        end select
+
+        ! the sign of momentum forces are flipped for internal flows
+        internalFlowFact = one
+        if (flowType == internalFlow) then
+            internalFlowFact = -one
+        end if
+
+        inFlowFact = one
+        if (isInflow) then
+            inflowFact = -one
+        end if
+
+        ! Loop over the quadrilateral faces of the subface. Note that
+        ! the nodal range of BCData must be used and not the cell
+        ! range, because the latter may include the halo's in i and
+        ! j-direction. The offset +1 is there, because inBeg and jnBeg
+        ! refer to nodal ranges and not to cell ranges. The loop
+        ! (without the AD stuff) would look like:
+        !
+        ! do j=(BCData(mm)%jnBeg+1),BCData(mm)%jnEnd
+        !    do i=(BCData(mm)%inBeg+1),BCData(mm)%inEnd
+
+        mReDim = sqrt(pRef * rhoRef)
+        Fp = zero
+        Mp = zero
+        FMom = zero
+        MMom = zero
+
+        COFSumFx = zero
+        COFSumFy = zero
+        COFSumFz = zero
+
+        massFlowRate = zero
+        area = zero
+        mass_Ptot = zero
+        mass_Ttot = zero
+        mass_Ps = zero
+        mass_MN = zero
+        mass_a = zero
+        mass_rho = zero
+
+        mass_Vx = zero
+        mass_Vy = zero
+        mass_Vz = zero
+        mass_nx = zero
+        mass_ny = zero
+        mass_nz = zero
+        mass_vi = zero
+
+        area_Ptot = zero
+        area_Ps = zero
+
+        !$AD II-LOOP
+        do ii = 0, (BCData(mm)%jnEnd - bcData(mm)%jnBeg) * (bcData(mm)%inEnd - bcData(mm)%inBeg) - 1
+            i = mod(ii, (bcData(mm)%inEnd - bcData(mm)%inBeg)) + bcData(mm)%inBeg + 1
+            j = ii / (bcData(mm)%inEnd - bcData(mm)%inBeg) + bcData(mm)%jnBeg + 1
+
+            if (addGridVelocities) then
+                sF = sFace(i, j)
+            else
+                sF = zero
+            end if
+
+            ! Compute the force components.
+            blk = max(BCData(mm)%iblank(i, j), 0) ! iBlank forces for overset stuff
+
+            vxm = half * (ww1(i, j, ivx) + ww2(i, j, ivx))
+            vym = half * (ww1(i, j, ivy) + ww2(i, j, ivy))
+            vzm = half * (ww1(i, j, ivz) + ww2(i, j, ivz))
+            rhom = half * (ww1(i, j, irho) + ww2(i, j, irho))
+            pm = half * (pp1(i, j) + pp2(i, j))
+            gammam = half * (gamma1(i, j) + gamma2(i, j))
+
+            vnm = vxm * ssi(i, j, 1) + vym * ssi(i, j, 2) + vzm * ssi(i, j, 3) - sF
+            vmag = sqrt((vxm**2 + vym**2 + vzm**2)) - sF
+            am = sqrt(gammam * pm / rhom)
+            MNm = vmag / am
+
+            cellArea = sqrt(ssi(i, j, 1)**2 + ssi(i, j, 2)**2 + ssi(i, j, 3)**2)
+            area = area + cellArea * blk
+            overCellArea = 1 / cellArea
+
+            call computePtot(rhom, vxm, vym, vzm, pm, Ptot)
+            call computeTtot(rhom, vxm, vym, vzm, pm, Ttot)
+
+            massFlowRateLocal = rhom * vnm * blk * fact * mReDim
+
+            massFlowRate = massFlowRate + massFlowRateLocal
+
+            ! re-dimentionalize quantities
+            pm = pm * pRef
+
+            mass_Ptot = mass_pTot + Ptot * massFlowRateLocal * Pref
+            mass_Ttot = mass_Ttot + Ttot * massFlowRateLocal * Tref
+            mass_rho = mass_rho + rhom * massFlowRateLocal * rhoRef
+            mass_a = mass_a + am * massFlowRateLocal * uRef
+
+            mass_Ps = mass_Ps + pm * massFlowRateLocal
+            mass_MN = mass_MN + MNm * massFlowRateLocal
+
+            area_pTot = area_pTot + Ptot * Pref * cellArea * blk
+            area_Ps = area_Ps + pm * cellArea * blk
+
+            sFaceCoordRef(1) = sF * ssi(i, j, 1) * overCellArea
+            sFaceCoordRef(2) = sF * ssi(i, j, 2) * overCellArea
+            sFaceCoordRef(3) = sF * ssi(i, j, 3) * overCellArea
+
+            mass_Vx = mass_Vx + (vxm * uRef - sFaceCoordRef(1)) * massFlowRateLocal
+            mass_Vy = mass_Vy + (vym * uRef - sFaceCoordRef(2)) * massFlowRateLocal
+            mass_Vz = mass_Vz + (vzm * uRef - sFaceCoordRef(3)) * massFlowRateLocal
+
+            govgm1 = gammaInf / (gammaInf - one)
+            gm1ovg = one / govgm1
+            viConst = two * govgm1 * rGasDim
+            ! the prefs in psinf / ptot cancel out so we can just take the ratio
+            ! we need to clip the ratio to stay under one. right next to the wall,
+            ! the pTot can go below the static free stream pressure. To prevent
+            ! nans from the sqrt, we just clip this. This does not affect the computation
+            ! because when pTot is this small, the velocities are also small, and the
+            ! mdot is almost zero, so the cells in this area don't contribute much
+            ! to the mass weighed sum.
+            pratio = min(one, one / pTot)
+            viLocal = sqrt(viConst * (one - (pratio)**gm1ovg) * Ttot * Tref)
+            mass_vi = mass_vi + viLocal * massFlowRateLocal
+
+            mass_nx = mass_nx + ssi(i, j, 1) * overCellArea * massFlowRateLocal
+            mass_ny = mass_ny + ssi(i, j, 2) * overCellArea * massFlowRateLocal
+            mass_nz = mass_nz + ssi(i, j, 3) * overCellArea * massFlowRateLocal
+
+            xc = fourth * (xx(i, j, 1) + xx(i + 1, j, 1) &
+                           + xx(i, j + 1, 1) + xx(i + 1, j + 1, 1)) - refPoint(1)
+            yc = fourth * (xx(i, j, 2) + xx(i + 1, j, 2) &
+                           + xx(i, j + 1, 2) + xx(i + 1, j + 1, 2)) - refPoint(2)
+            zc = fourth * (xx(i, j, 3) + xx(i + 1, j, 3) &
+                           + xx(i, j + 1, 3) + xx(i + 1, j + 1, 3)) - refPoint(3)
+
+            ! Pressure forces. Note that these need a *negative* and to subtract
+            ! the reference pressure sign to be consistent with the force
+            ! computation on the walls.
+            pm = -(pm - pInf * pRef) * fact * blk
+
+            fx = pm * ssi(i, j, 1)
+            fy = pm * ssi(i, j, 2)
+            fz = pm * ssi(i, j, 3)
+
+            ! Update the pressure force and moment coefficients.
+            Fp(1) = Fp(1) + fx
+            Fp(2) = Fp(2) + fy
+            Fp(3) = Fp(3) + fz
+
+            mx = yc * fz - zc * fy
+            my = zc * fx - xc * fz
+            mz = xc * fy - yc * fx
+
+            Mp(1) = Mp(1) + mx
+            Mp(2) = Mp(2) + my
+            Mp(3) = Mp(3) + mz
+
+            ! the force integral for the center of pressure computation.
+            ! We need the cell centers wrt origin
+            xco = fourth * (xx(i, j, 1) + xx(i + 1, j, 1) &
+                            + xx(i, j + 1, 1) + xx(i + 1, j + 1, 1))
+            yco = fourth * (xx(i, j, 2) + xx(i + 1, j, 2) &
+                            + xx(i, j + 1, 2) + xx(i + 1, j + 1, 2))
+            zco = fourth * (xx(i, j, 3) + xx(i + 1, j, 3) &
+                            + xx(i, j + 1, 3) + xx(i + 1, j + 1, 3))
+
+            ! Center of force computations. Here we accumulate in the sums.
+            ! accumulate in the sums. each force component is tracked separately
+            ! blanking is included in the mdot multiplier for the force.
+
+            ! Force-X
+            COFSumFx(1) = COFSumFx(1) + xco * fx
+            COFSumFx(2) = COFSumFx(2) + yco * fx
+            COFSumFx(3) = COFSumFx(3) + zco * fx
+
+            ! Force-Y
+            COFSumFy(1) = COFSumFy(1) + xco * fy
+            COFSumFy(2) = COFSumFy(2) + yco * fy
+            COFSumFy(3) = COFSumFy(3) + zco * fy
+
+            ! Force-Z
+            COFSumFz(1) = COFSumFz(1) + xco * fz
+            COFSumFz(2) = COFSumFz(2) + yco * fz
+            COFSumFz(3) = COFSumFz(3) + zco * fz
+
+            ! Momentum forces are a little tricky.  We negate because
+            ! have to re-apply fact to massFlowRateLocal to undoo it, because
+            ! we need the signed behavior of ssi to get the momentum forces correct.
+            ! Also, the sign is flipped between inflow and outflow types
+
+            massFlowRateLocal = massFlowRateLocal * fact / timeRef * blk / cellArea * internalFlowFact * inFlowFact
+
+            fx = massFlowRateLocal * ssi(i, j, 1) * vxm
+            fy = massFlowRateLocal * ssi(i, j, 2) * vym
+            fz = massFlowRateLocal * ssi(i, j, 3) * vzm
+
+            FMom(1) = FMom(1) + fx
+            FMom(2) = FMom(2) + fy
+            FMom(3) = FMom(3) + fz
+
+            mx = yc * fz - zc * fy
+            my = zc * fx - xc * fz
+            mz = xc * fy - yc * fx
+
+            MMom(1) = MMom(1) + mx
+            MMom(2) = MMom(2) + my
+            MMom(3) = MMom(3) + mz
+
+            ! Center of force computations. Here we accumulate in the sums.
+            ! each force component is tracked separately
+            ! blanking is included in the mdot multiplier for the force.
+
+            ! Force-X
+            COFSumFx(1) = COFSumFx(1) + xco * fx
+            COFSumFx(2) = COFSumFx(2) + yco * fx
+            COFSumFx(3) = COFSumFx(3) + zco * fx
+
+            ! Force-Y
+            COFSumFy(1) = COFSumFy(1) + xco * fy
+            COFSumFy(2) = COFSumFy(2) + yco * fy
+            COFSumFy(3) = COFSumFy(3) + zco * fy
+
+            ! Force-Z
+            COFSumFz(1) = COFSumFz(1) + xco * fz
+            COFSumFz(2) = COFSumFz(2) + yco * fz
+            COFSumFz(3) = COFSumFz(3) + zco * fz
+
+        end do
+
+        ! Increment the local values array with what we computed here
+        localValues(iMassFlow) = localValues(iMassFlow) + massFlowRate
+        localValues(iArea) = localValues(iArea) + area
+        localValues(iMassRho) = localValues(iMassRho) + mass_rho
+        localValues(iMassa) = localValues(iMassa) + mass_a
+        localValues(iMassPtot) = localValues(iMassPtot) + mass_Ptot
+        localValues(iMassTtot) = localValues(iMassTtot) + mass_Ttot
+        localValues(iMassPs) = localValues(iMassPs) + mass_Ps
+        localValues(iMassMN) = localValues(iMassMN) + mass_MN
+        localValues(iFp:iFp + 2) = localValues(iFp:iFp + 2) + Fp
+        localValues(iFlowFm:iFlowFm + 2) = localValues(iFlowFm:iFlowFm + 2) + FMom
+        localValues(iFlowMp:iFlowMp + 2) = localValues(iFlowMp:iFlowMp + 2) + Mp
+        localValues(iFlowMm:iFlowMm + 2) = localValues(iFlowMm:iFlowMm + 2) + MMom
+
+        localValues(iCoForceX:iCoForceX + 2) = localValues(iCoForceX:iCoForceX + 2) + COFSumFx
+        localValues(iCoForceY:iCoForceY + 2) = localValues(iCoForceY:iCoForceY + 2) + COFSumFy
+        localValues(iCoForceZ:iCoForceZ + 2) = localValues(iCoForceZ:iCoForceZ + 2) + COFSumFz
+
+        localValues(iAreaPTot) = localValues(iAreaPTot) + area_pTot
+        localValues(iAreaPs) = localValues(iAreaPs) + area_Ps
+
+        localValues(iMassVx) = localValues(iMassVx) + mass_Vx
+        localValues(iMassVy) = localValues(iMassVy) + mass_Vy
+        localValues(iMassVz) = localValues(iMassVz) + mass_Vz
+        localValues(iMassnx) = localValues(iMassnx) + mass_nx
+        localValues(iMassny) = localValues(iMassny) + mass_ny
+        localValues(iMassnz) = localValues(iMassnz) + mass_nz
+
+        localValues(iMassVi) = localValues(iMassVi) + mass_Vi
+
+    end subroutine flowIntegrationFace
+
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 
 #ifndef USE_TAPENADE
 
-  subroutine getSolutionWrap(famLists, funcValues, nCost, nGroups, nFamMax)
-
-    use constants
-    use inputTimeSpectral , only : nTimeIntervalsSpectral
-    implicit none
-    ! Input/output Variables
-    integer(kind=intType) :: nGroups, nCost, nFamMax
-    integer(kind=intType), dimension(nGroups, nFamMax) :: famLists
-    real(kind=realType), dimension(nCost, nGroups), intent(out)  :: funcValues
-
-    ! Local variable
-
-    call getSolution(famLists, funcValues)
-  end subroutine getSolutionWrap
-
-  subroutine getSolution(famLists, funcValues, globalValues)
-    !--------------------------------------------------------------
-    ! Manual Differentiation Warning: Modifying this routine requires
-    ! modifying the hand-written forward and reverse routines.
-    ! --------------------------------------------------------------
-
-    use constants
-    use inputTimeSpectral , only : nTimeIntervalsSpectral
-    use communication, only : adflow_comm_world
-    use blockPointers, only : nDom
-    use utils, only : setPointers, EChk
-    use zipperIntegrations, only :integrateZippers
-    use userSurfaceIntegrations, only : integrateUserSurfaces
-    use actuatorRegion, only : integrateActuatorRegions
-    use inputCostFunctions, only : computeCavitation
-    implicit none
-
-    ! Input/Output Variables
-    integer(kind=intType), dimension(:, :), intent(in), target :: famLists
-    real(kind=realType), dimension(:, :), intent(out) :: funcValues
-    real(kind=realType), optional, intent(out), dimension(:, :, :) :: globalValues
-
-    ! Working
-    real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral) :: localVal, globalVal
-    integer(kind=intType) :: nn, sps, ierr, iGroup, nFam
-    integer(kind=intType), dimension(:), pointer :: famList
-    ! Master loop over the each of the groups we have
-
-    groupLoop: do iGroup=1, size(famLists, 1)
-
-       ! Extract the current family list
-       nFam = famLists(iGroup, 1)
-       famList => famLists(iGroup, 2:2+nFam-1)
-       funcValues(:, iGroup) = zero
-       localVal = zero
-
-       ! compute the current cp min value for the cavitation computation with KS aggregation
-       if (computeCavitation) then
-          call computeCpMinFamily(famList)
-       end if
-
-       do sps=1, nTimeIntervalsSpectral
-          ! Integrate the normal block surfaces.
-          do nn=1, nDom
-             call setPointers(nn, 1, sps)
-             call integrateSurfaces(localval(:, sps), famList)
-          end do
-
-          ! Integrate any zippers we have
-          call integrateZippers(localVal(:, sps), famList, sps)
-
-          ! Integrate any user-supplied surfaces as have as well.
-          call integrateUserSurfaces(localVal(:, sps), famList, sps)
-
-          ! Integrate any actuator regions we have
-          call integrateActuatorRegions(localVal(:, sps), famList, sps)
-       end do
-
-       ! Now we need to reduce all the cost functions
-       call mpi_allreduce(localval, globalVal, nLocalValues*nTimeIntervalsSpectral, adflow_real, &
-            MPI_SUM, adflow_comm_world, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Call the final routine that will comptue all of our functions of
-       ! interest.
-       call getCostFunctions(globalVal, funcValues(:, iGroup))
-
-       if (present(globalValues)) then
-          globalValues(:, :, iGroup) = globalVal
-       end if
-    end do groupLoop
-  end subroutine getSolution
-
-  subroutine integrateSurfaces(localValues, famList)
-    !--------------------------------------------------------------
-    ! Manual Differentiation Warning: Modifying this routine requires
-    ! modifying the hand-written forward and reverse routines.
-    ! --------------------------------------------------------------
-    !
-    ! This is a shell routine that calls the specific surface
-    ! integration routines. Currently we have have the forceAndMoment
-    ! routine as well as the flow properties routine. This routine
-    ! takes care of setting pointers, while the actual computational
-    ! routine just acts on a specific fast pointed to by pointers.
-
-    use constants
-    use blockPointers, only : nBocos, BCData, BCType, sk, sj, si, x, rlv, &
-         sfacei, sfacej, sfacek, gamma, rev, p, viscSubface
-    use utils, only : setBCPointers, isWallType
-    use sorting, only : famInList
-    ! Tapenade needs to see these modules that the callees use.
-    use BCPointers
-    use flowVarRefState
-    use inputPhysics
-
-    implicit none
-
-    ! Input/output Variables
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
-    integer(kind=intType), dimension(:), intent(in) :: famList
-
-    ! Working variables
-    integer(kind=intType) :: mm
-
-    ! Loop over all possible boundary conditions
-    bocos: do mm=1, nBocos
-
-       ! Determine if this boundary condition is to be incldued in the
-       ! currently active group
-       famInclude: if (famInList(BCData(mm)%famID, famList)) then
-
-          ! Set a bunch of pointers depending on the face id to make
-          ! a generic treatment possible.
-          call setBCPointers(mm, .True.)
-
-          ! no post gathered integrations currently
-          isWall: if( isWallType(BCType(mm)) ) then
-             call wallIntegrationFace(localvalues, mm)
-          end if isWall
-
-          isInflowOutflow: if (BCType(mm) == SubsonicInflow .or. &
-               BCType(mm) == SupersonicInflow) then
-             call flowIntegrationFace(.true., localValues, mm)
-          else if (BCType(mm) == SubsonicOutflow .or. &
-               BCType(mm) == SupersonicOutflow) then
-             call flowIntegrationFace(.false., localValues, mm)
-          end if isInflowOutflow
-
-       end if famInclude
-    end do bocos
-
-  end subroutine integrateSurfaces
-
-  subroutine computeCpMinFamily(famList)
-
-     use constants
-     use inputTimeSpectral, only : nTimeIntervalsSpectral
-     use communication, only : ADflow_comm_world, myID
-     use blockPointers, only : nDom
-     use inputPhysics, only : cpmin_family, MachCoef
-     use blockPointers
-     use flowVarRefState
-     use BCPointers
-     use utils, only : setPointers, setBCPointers, isWallType, EChk
-     use sorting, only : famInList
-
-    implicit none
-
-     integer(kind=intType), dimension(:), intent(in) :: famList
-     integer(kind=intType) :: mm, nn, sps
-     integer(kind=intType) :: i, j, ii, blk, ierr
-     real(kind=realType) :: Cp, plocal, tmp, cpmin_local
-
-     ! this routine loops over the surface cells in the given family
-     ! and computes the true minimum Cp value.
-     ! this is then used in the surface integration routine to compute
-     ! the cpmin using KS aggregation.
-     ! the goal is to get a differentiable cpmin output.
-
-     ! loop over the TS instances and compute cpmin_family for each TS instance
-     do sps=1, nTimeIntervalsSpectral
-       ! set the local cp min to a large value so that we get the actual min
-       cpmin_local = 10000.0_realType
-       do nn=1, nDom
-           call setPointers(nn, 1, sps)
-
-           ! Loop over all possible boundary conditions
-           bocos: do mm=1, nBocos
-              ! Determine if this boundary condition is to be incldued in the
-              ! currently active group
+    subroutine getSolutionWrap(famLists, funcValues, nCost, nGroups, nFamMax)
+
+        use constants
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        implicit none
+        ! Input/output Variables
+        integer(kind=intType) :: nGroups, nCost, nFamMax
+        integer(kind=intType), dimension(nGroups, nFamMax) :: famLists
+        real(kind=realType), dimension(nCost, nGroups), intent(out) :: funcValues
+
+        ! Local variable
+
+        call getSolution(famLists, funcValues)
+    end subroutine getSolutionWrap
+
+    subroutine getSolution(famLists, funcValues, globalValues)
+        !--------------------------------------------------------------
+        ! Manual Differentiation Warning: Modifying this routine requires
+        ! modifying the hand-written forward and reverse routines.
+        ! --------------------------------------------------------------
+
+        use constants
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use communication, only: adflow_comm_world
+        use blockPointers, only: nDom
+        use utils, only: setPointers, EChk
+        use zipperIntegrations, only: integrateZippers
+        use userSurfaceIntegrations, only: integrateUserSurfaces
+        use actuatorRegion, only: integrateActuatorRegions
+        use inputCostFunctions, only: computeCavitation
+        implicit none
+
+        ! Input/Output Variables
+        integer(kind=intType), dimension(:, :), intent(in), target :: famLists
+        real(kind=realType), dimension(:, :), intent(out) :: funcValues
+        real(kind=realType), optional, intent(out), dimension(:, :, :) :: globalValues
+
+        ! Working
+        real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral) :: localVal, globalVal
+        integer(kind=intType) :: nn, sps, ierr, iGroup, nFam
+        integer(kind=intType), dimension(:), pointer :: famList
+        ! Master loop over the each of the groups we have
+
+        groupLoop: do iGroup = 1, size(famLists, 1)
+
+            ! Extract the current family list
+            nFam = famLists(iGroup, 1)
+            famList => famLists(iGroup, 2:2 + nFam - 1)
+            funcValues(:, iGroup) = zero
+            localVal = zero
+
+            ! compute the current cp min value for the cavitation computation with KS aggregation
+            if (computeCavitation) then
+                call computeCpMinFamily(famList)
+            end if
+
+            do sps = 1, nTimeIntervalsSpectral
+                ! Integrate the normal block surfaces.
+                do nn = 1, nDom
+                    call setPointers(nn, 1, sps)
+                    call integrateSurfaces(localval(:, sps), famList)
+                end do
+
+                ! Integrate any zippers we have
+                call integrateZippers(localVal(:, sps), famList, sps)
+
+                ! Integrate any user-supplied surfaces as have as well.
+                call integrateUserSurfaces(localVal(:, sps), famList, sps)
+
+                ! Integrate any actuator regions we have
+                call integrateActuatorRegions(localVal(:, sps), famList, sps)
+            end do
+
+            ! Now we need to reduce all the cost functions
+            call mpi_allreduce(localval, globalVal, nLocalValues * nTimeIntervalsSpectral, adflow_real, &
+                               MPI_SUM, adflow_comm_world, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Call the final routine that will comptue all of our functions of
+            ! interest.
+            call getCostFunctions(globalVal, funcValues(:, iGroup))
+
+            if (present(globalValues)) then
+                globalValues(:, :, iGroup) = globalVal
+            end if
+        end do groupLoop
+    end subroutine getSolution
+
+    subroutine integrateSurfaces(localValues, famList)
+        !--------------------------------------------------------------
+        ! Manual Differentiation Warning: Modifying this routine requires
+        ! modifying the hand-written forward and reverse routines.
+        ! --------------------------------------------------------------
+        !
+        ! This is a shell routine that calls the specific surface
+        ! integration routines. Currently we have have the forceAndMoment
+        ! routine as well as the flow properties routine. This routine
+        ! takes care of setting pointers, while the actual computational
+        ! routine just acts on a specific fast pointed to by pointers.
+
+        use constants
+        use blockPointers, only: nBocos, BCData, BCType, sk, sj, si, x, rlv, &
+                                 sfacei, sfacej, sfacek, gamma, rev, p, viscSubface
+        use utils, only: setBCPointers, isWallType
+        use sorting, only: famInList
+        ! Tapenade needs to see these modules that the callees use.
+        use BCPointers
+        use flowVarRefState
+        use inputPhysics
+
+        implicit none
+
+        ! Input/output Variables
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
+        integer(kind=intType), dimension(:), intent(in) :: famList
+
+        ! Working variables
+        integer(kind=intType) :: mm
+
+        ! Loop over all possible boundary conditions
+        bocos: do mm = 1, nBocos
+
+            ! Determine if this boundary condition is to be incldued in the
+            ! currently active group
             famInclude: if (famInList(BCData(mm)%famID, famList)) then
 
-               ! Set a bunch of pointers depending on the face id to make
-               ! a generic treatment possible.
-               call setBCPointers(mm, .True.)
+                ! Set a bunch of pointers depending on the face id to make
+                ! a generic treatment possible.
+                call setBCPointers(mm, .True.)
 
-               ! no post gathered integrations currently
-               isWall: if( isWallType(BCType(mm)) ) then
+                ! no post gathered integrations currently
+                isWall: if (isWallType(BCType(mm))) then
+                    call wallIntegrationFace(localvalues, mm)
+                end if isWall
 
-                  !$AD II-LOOP
-                  do ii=0,(BCData(mm)%jnEnd - bcData(mm)%jnBeg)*(bcData(mm)%inEnd - bcData(mm)%inBeg) -1
-                     i = mod(ii, (bcData(mm)%inEnd-bcData(mm)%inBeg)) + bcData(mm)%inBeg + 1
-                     j = ii/(bcData(mm)%inEnd-bcData(mm)%inBeg) + bcData(mm)%jnBeg + 1
+                isInflowOutflow: if (BCType(mm) == SubsonicInflow .or. &
+                                     BCType(mm) == SupersonicInflow) then
+                    call flowIntegrationFace(.true., localValues, mm)
+                else if (BCType(mm) == SubsonicOutflow .or. &
+                         BCType(mm) == SupersonicOutflow) then
+                    call flowIntegrationFace(.false., localValues, mm)
+                end if isInflowOutflow
 
-                     ! only take this if its a compute cell
-                     if (BCData(mm)%iblank(i,j) .eq. one) then
+            end if famInclude
+        end do bocos
+
+    end subroutine integrateSurfaces
+
+    subroutine computeCpMinFamily(famList)
+
+        use constants
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use communication, only: ADflow_comm_world, myID
+        use blockPointers, only: nDom
+        use inputPhysics, only: cpmin_family, MachCoef
+        use blockPointers
+        use flowVarRefState
+        use BCPointers
+        use utils, only: setPointers, setBCPointers, isWallType, EChk
+        use sorting, only: famInList
+
+        implicit none
+
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType) :: mm, nn, sps
+        integer(kind=intType) :: i, j, ii, blk, ierr
+        real(kind=realType) :: Cp, plocal, tmp, cpmin_local
+
+        ! this routine loops over the surface cells in the given family
+        ! and computes the true minimum Cp value.
+        ! this is then used in the surface integration routine to compute
+        ! the cpmin using KS aggregation.
+        ! the goal is to get a differentiable cpmin output.
+
+        ! loop over the TS instances and compute cpmin_family for each TS instance
+        do sps = 1, nTimeIntervalsSpectral
+            ! set the local cp min to a large value so that we get the actual min
+            cpmin_local = 10000.0_realType
+            do nn = 1, nDom
+                call setPointers(nn, 1, sps)
+
+                ! Loop over all possible boundary conditions
+                bocos: do mm = 1, nBocos
+                    ! Determine if this boundary condition is to be incldued in the
+                    ! currently active group
+                    famInclude: if (famInList(BCData(mm)%famID, famList)) then
+
+                        ! Set a bunch of pointers depending on the face id to make
+                        ! a generic treatment possible.
+                        call setBCPointers(mm, .True.)
+
+                        ! no post gathered integrations currently
+                        isWall: if (isWallType(BCType(mm))) then
+
+                            !$AD II-LOOP
+                            do ii = 0, (BCData(mm)%jnEnd - bcData(mm)%jnBeg) * (bcData(mm)%inEnd - bcData(mm)%inBeg) - 1
+                                i = mod(ii, (bcData(mm)%inEnd - bcData(mm)%inBeg)) + bcData(mm)%inBeg + 1
+                                j = ii / (bcData(mm)%inEnd - bcData(mm)%inBeg) + bcData(mm)%jnBeg + 1
+
+                                ! only take this if its a compute cell
+                                if (BCData(mm)%iblank(i, j) .eq. one) then
+
+                                    ! compute local CP
+                                    plocal = pp2(i, j)
+                                    tmp = two / (gammaInf * MachCoef * MachCoef)
+                                    Cp = tmp * (plocal - pinf)
+
+                                    ! compare it against the current value on this proc
+                                    cpmin_local = min(cpmin_local, Cp)
+                                end if
+                            end do
+                        end if isWall
+
+                    end if famInclude
+                end do bocos
+            end do
+            ! finally communicate across all processors for this time spectral instance
+            call mpi_allreduce(cpmin_local, cpmin_family(sps), 1, MPI_DOUBLE, &
+                               MPI_MIN, adflow_comm_world, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
+
+    end subroutine computeCpMinFamily
 
-                         ! compute local CP
-                         plocal = pp2(i,j)
-                         tmp = two/(gammaInf*MachCoef*MachCoef)
-                         Cp = tmp*(plocal-pinf)
+#ifndef USE_COMPLEX
+    subroutine integrateSurfaces_d(localValues, localValuesd, famList)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+
+        ! Forward mode linearization of integrateSurfaces
+        use constants
+        use blockPointers, only: nBocos, BCData, BCType
+        use utils, only: setBCPointers_d, isWallType
+        use sorting, only: famInList
+        use surfaceIntegrations_d, only: wallIntegrationFace_d, flowIntegrationFace_d
+        implicit none
+
+        ! Input/output Variables
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
+        integer(kind=intType), dimension(:), intent(in) :: famList
+
+        ! Working variables
+        integer(kind=intType) :: mm
+
+        ! Loop over all possible boundary conditions
+        do mm = 1, nBocos
+            ! Determine if this boundary condition is to be incldued in the
+            ! currently active group
+            famInclude: if (famInList(BCData(mm)%famID, famList)) then
 
-                         ! compare it against the current value on this proc
-                         cpmin_local = min(cpmin_local, Cp)
-                     end if
-                  enddo
-               end if isWall
+                ! Set a bunch of pointers depending on the face id to make
+                ! a generic treatment possible.
+                call setBCPointers_d(mm, .True.)
 
-            end if famInclude
-         end do bocos
-       end do
-       ! finally communicate across all processors for this time spectral instance
-       call mpi_allreduce(cpmin_local, cpmin_family(sps), 1, MPI_DOUBLE, &
-            MPI_MIN, adflow_comm_world, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-     end do
+                ! not post gathered integrations currently
+                isWall: if (isWallType(BCType(mm))) then
+                    call wallIntegrationFace_d(localValues, localValuesd, mm)
+                end if isWall
 
-  end subroutine computeCpMinFamily
+                isInflowOutflow: if (BCType(mm) == SubsonicInflow .or. &
+                                     BCType(mm) == SupersonicInflow) then
+                    call flowIntegrationFace_d(.true., localValues, localValuesd, mm)
+                else if (BCType(mm) == SubsonicOutflow .or. &
+                         BCType(mm) == SupersonicOutflow) then
 
-#ifndef USE_COMPLEX
-  subroutine integrateSurfaces_d(localValues, localValuesd, famList)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-
-    ! Forward mode linearization of integrateSurfaces
-    use constants
-    use blockPointers, only : nBocos, BCData, BCType
-    use utils, only : setBCPointers_d, isWallType
-    use sorting, only : famInList
-    use surfaceIntegrations_d, only : wallIntegrationFace_d, flowIntegrationFace_d
-    implicit none
-
-    ! Input/output Variables
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
-    integer(kind=intType), dimension(:), intent(in) :: famList
-
-    ! Working variables
-    integer(kind=intType) :: mm
-
-    ! Loop over all possible boundary conditions
-    do mm=1, nBocos
-       ! Determine if this boundary condition is to be incldued in the
-       ! currently active group
-       famInclude: if (famInList(BCData(mm)%famID, famList)) then
-
-          ! Set a bunch of pointers depending on the face id to make
-          ! a generic treatment possible.
-          call setBCPointers_d(mm, .True.)
-
-          ! not post gathered integrations currently
-          isWall: if( isWallType(BCType(mm)) ) then
-             call wallIntegrationFace_d(localValues, localValuesd, mm)
-          end if isWall
-
-          isInflowOutflow: if (BCType(mm) == SubsonicInflow .or. &
-               BCType(mm) == SupersonicInflow) then
-             call flowIntegrationFace_d(.true., localValues, localValuesd, mm)
-            else if (BCType(mm) == SubsonicOutflow .or. &
-               BCType(mm) == SupersonicOutflow) then
-
-               call flowIntegrationFace_d(.false., localValues, localValuesd, mm)
-
-          end if isInflowOutflow
-       end if famInclude
-    end do
-  end subroutine integrateSurfaces_d
-
-  subroutine integrateSurfaces_b(localValues, localValuesd, famList)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-
-    ! Reverse mode linearization of integrateSurfaces
-    use constants
-    use blockPointers, only : nBocos, BCData, BCType, bcDatad
-    use utils, only : setBCPointers_d, isWallType
-    use sorting, only : famInList
-    use surfaceIntegrations_b, only : wallIntegrationFace_b, flowIntegrationFace_b
-    implicit none
-
-    ! Input/output Variables
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    ! Working variables
-    integer(kind=intType) :: mm
-
-    ! Call the individual integration routines.
-    do mm=1, nBocos
-       ! Determine if this boundary condition is to be incldued in the
-       ! currently active group
-       famInclude: if (famInList(BCData(mm)%famID, famList)) then
-
-          ! Set a bunch of pointers depending on the face id to make
-          ! a generic treatment possible.
-          call setBCPointers_d(mm, .True.)
-
-          ! not post gathered integrations currently
-          isWall: if( isWallType(BCType(mm)) ) then
-             call wallIntegrationFace_b(localValues, localValuesd, mm)
-          end if isWall
-
-          isInflowOutflow: if (BCType(mm) == SubsonicInflow .or. &
-               BCType(mm) == SupersonicInflow) then
-             call flowIntegrationFace_b(.true., localValues, localValuesd, mm)
-          else if (BCType(mm) == SubsonicOutflow .or. &
-               BCType(mm) == SupersonicOutflow) then
-             call flowIntegrationFace_b(.false., localValues, localValuesd, mm)
-          end if isInflowOutflow
-       end if famInclude
-    end do
-  end subroutine integrateSurfaces_b
-
-  subroutine getSolution_d(famLists, funcValues, funcValuesd)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-
-    use constants
-    use inputTSStabDeriv, only : TSSTability
-    use inputTimeSpectral , only : nTimeIntervalsSpectral
-    use communication, only : adflow_comm_world
-    use blockPointers, only : nDom
-    use utils, only : setPointers_d, EChk
-    use surfaceIntegrations_d, only : getCostFunctions_d
-    use zipperIntegrations, only :integrateZippers_d
-    use userSurfaceIntegrations, only : integrateUserSurfaces_d
-    use actuatorRegion, only : integrateActuatorRegions_d
-    use inputCostFunctions, only : computeCavitation
-    implicit none
-
-    ! Input/Output Variables
-    integer(kind=intType), dimension(:, :), target, intent(in) :: famLists
-    real(kind=realType), dimension(:, :), intent(out) :: funcValues, funcValuesd
-
-    ! Working
-    real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral) :: localVal, globalVal
-    real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral) :: localVald, globalVald
-    integer(kind=intType) :: nn, sps, ierr, iGroup, nFam
-    integer(kind=intType), dimension(:), pointer :: famList
-
-    groupLoop: do iGroup=1, size(famLists, 1)
-
-       ! Extract the current family list
-       nFam = famLists(iGroup, 1)
-       famList => famLists(iGroup, 2:2+nFam-1)
-
-       ! compute the current cp min value for the cavitation computation with KS aggregation
-       if (computeCavitation) then
-          call computeCpMinFamily(famList)
-       end if
-
-       localVal = zero
-       localVald = zero
-       do sps=1, nTimeIntervalsSpectral
-          ! Integrate the normal block surfaces.
-          do nn=1, nDom
-             call setPointers_d(nn, 1, sps)
-             call integrateSurfaces_d(localval(:, sps), localvald(:, sps), famList)
-          end do
-
-          ! Integrate any zippers we have
-          call integrateZippers_d(localVal(:, sps), localVald(:, sps), famList, sps)
-
-          ! Integrate any user-supplied surface as have as well.
-          call integrateUserSurfaces_d(localVal(:, sps), localVald(:, sps), famList, sps)
-
-          ! Integrate any actuator regions we have
-          call integrateActuatorRegions_d(localVal(:, sps), localVald(:, sps), famList, sps)
-       end do
-
-       ! Now we need to reduce all the cost functions
-       call mpi_allreduce(localval, globalVal, nLocalValues*nTimeIntervalsSpectral, adflow_real, &
-            MPI_SUM, adflow_comm_world, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Now we need to reduce all the cost functions
-       call mpi_allreduce(localvald, globalVald, nLocalValues*nTimeIntervalsSpectral, adflow_real, &
-            MPI_SUM, adflow_comm_world, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       ! Call the final routine that will comptue all of our functions of
-       ! interest.
-
-       call getCostFunctions_d(globalVal, globalVald, funcValues(:, iGroup), funcValuesd(:, iGroup))
-
-       ! if (present(globalValues)) then
-       !    globalValues = globalVal
-       !    globalValuesd = globalVald
-       ! end if
-    end do groupLoop
-
-  end subroutine getSolution_d
-
-  subroutine getSolution_b(famLists, funcValues, funcValuesd)
-    ! -----------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-
-    use constants
-    use communication, only : myid
-    use inputTSStabDeriv, only : TSSTability
-    use inputTimeSpectral , only : nTimeIntervalsSpectral
-    use communication, only : adflow_comm_world
-    use blockPointers, only : nDom
-    use utils, only : setPointers_b, EChk, setPointers
-    use surfaceIntegrations_b, only : getCostFunctions_b
-    use zipperIntegrations, only :integrateZippers_b
-    use userSurfaceIntegrations, only : integrateUserSurfaces_b
-    use actuatorRegion, only : integrateActuatorRegions_b
-    use inputCostFunctions, only : computeCavitation
-    implicit none
-
-    ! Input/Output Variables
-    integer(kind=intType), dimension(:, :), target, intent(in) :: famLists
-    real(kind=realType), dimension(:, :) :: funcValues, funcValuesd
-
-    ! Working
-    real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral) :: localVal, globalVal
-    real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral) :: localVald, globalVald
-    real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral, size(famLists, 1)) :: globalValues
-    integer(kind=intType) :: nn, sps, ierr, iGroup, nFam
-    integer(kind=intType), dimension(:), pointer :: famList
-
-
-    call getSolution(famLists, funcValues, globalValues)
-
-    groupLoop: do iGroup=1, size(famLists, 1)
-
-       ! Extract the current family list
-       nFam = famLists(iGroup, 1)
-       famList => famLists(iGroup, 2:2+nFam-1)
-
-       ! compute the current cp min value for the cavitation computation with KS aggregation
-       if (computeCavitation) then
-          call computeCpMinFamily(famList)
-       end if
-
-       localVal = zero
-       localVald = zero
-
-       ! Retrive the forward pass values from getSolution
-       globalVal = globalValues(:, :, iGroup)
-
-       if (myid == 0) then
-          call getCostFunctions_b(globalVal, globalVald, funcValues(:, iGroup), funcValuesd(:, iGroup))
-          localVald = globalVald
-       end if
-
-       ! Now we need to bcast out the localValues to all procs.
-       call mpi_bcast(localVald, nLocalValues*nTimeIntervalsSpectral, &
-            adflow_real, 0, adflow_comm_world, ierr)
-       call EChk(ierr, __FILE__, __LINE__)
-
-       do sps=1, nTimeIntervalsSpectral
-
-          ! Integrate any actuator regions we have:
-          call integrateActuatorRegions_b(localVal(:, sps), localVald(:, sps), famList, sps)
-
-          ! Integrate any user-supplied planes as have as well.
-          call integrateUserSurfaces_b(localVal(:, sps), localVald(:, sps), famList, sps)
-
-          ! Integrate any zippers we have
-          call integrateZippers_b(localVal(:, sps), localVald(:, sps), famList, sps)
-
-          ! Integrate the normal block surfaces.
-          do nn=1, nDom
-             call setPointers_b(nn, 1, sps)
-             call integrateSurfaces_b(localval(:, sps), localVald(:, sps), famList)
-          end do
-
-       end do
-    end do groupLoop
-  end subroutine getSolution_b
+                    call flowIntegrationFace_d(.false., localValues, localValuesd, mm)
+
+                end if isInflowOutflow
+            end if famInclude
+        end do
+    end subroutine integrateSurfaces_d
+
+    subroutine integrateSurfaces_b(localValues, localValuesd, famList)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+
+        ! Reverse mode linearization of integrateSurfaces
+        use constants
+        use blockPointers, only: nBocos, BCData, BCType, bcDatad
+        use utils, only: setBCPointers_d, isWallType
+        use sorting, only: famInList
+        use surfaceIntegrations_b, only: wallIntegrationFace_b, flowIntegrationFace_b
+        implicit none
+
+        ! Input/output Variables
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        ! Working variables
+        integer(kind=intType) :: mm
+
+        ! Call the individual integration routines.
+        do mm = 1, nBocos
+            ! Determine if this boundary condition is to be incldued in the
+            ! currently active group
+            famInclude: if (famInList(BCData(mm)%famID, famList)) then
+
+                ! Set a bunch of pointers depending on the face id to make
+                ! a generic treatment possible.
+                call setBCPointers_d(mm, .True.)
+
+                ! not post gathered integrations currently
+                isWall: if (isWallType(BCType(mm))) then
+                    call wallIntegrationFace_b(localValues, localValuesd, mm)
+                end if isWall
+
+                isInflowOutflow: if (BCType(mm) == SubsonicInflow .or. &
+                                     BCType(mm) == SupersonicInflow) then
+                    call flowIntegrationFace_b(.true., localValues, localValuesd, mm)
+                else if (BCType(mm) == SubsonicOutflow .or. &
+                         BCType(mm) == SupersonicOutflow) then
+                    call flowIntegrationFace_b(.false., localValues, localValuesd, mm)
+                end if isInflowOutflow
+            end if famInclude
+        end do
+    end subroutine integrateSurfaces_b
+
+    subroutine getSolution_d(famLists, funcValues, funcValuesd)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+
+        use constants
+        use inputTSStabDeriv, only: TSSTability
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use communication, only: adflow_comm_world
+        use blockPointers, only: nDom
+        use utils, only: setPointers_d, EChk
+        use surfaceIntegrations_d, only: getCostFunctions_d
+        use zipperIntegrations, only: integrateZippers_d
+        use userSurfaceIntegrations, only: integrateUserSurfaces_d
+        use actuatorRegion, only: integrateActuatorRegions_d
+        use inputCostFunctions, only: computeCavitation
+        implicit none
+
+        ! Input/Output Variables
+        integer(kind=intType), dimension(:, :), target, intent(in) :: famLists
+        real(kind=realType), dimension(:, :), intent(out) :: funcValues, funcValuesd
+
+        ! Working
+        real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral) :: localVal, globalVal
+        real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral) :: localVald, globalVald
+        integer(kind=intType) :: nn, sps, ierr, iGroup, nFam
+        integer(kind=intType), dimension(:), pointer :: famList
+
+        groupLoop: do iGroup = 1, size(famLists, 1)
+
+            ! Extract the current family list
+            nFam = famLists(iGroup, 1)
+            famList => famLists(iGroup, 2:2 + nFam - 1)
+
+            ! compute the current cp min value for the cavitation computation with KS aggregation
+            if (computeCavitation) then
+                call computeCpMinFamily(famList)
+            end if
+
+            localVal = zero
+            localVald = zero
+            do sps = 1, nTimeIntervalsSpectral
+                ! Integrate the normal block surfaces.
+                do nn = 1, nDom
+                    call setPointers_d(nn, 1, sps)
+                    call integrateSurfaces_d(localval(:, sps), localvald(:, sps), famList)
+                end do
+
+                ! Integrate any zippers we have
+                call integrateZippers_d(localVal(:, sps), localVald(:, sps), famList, sps)
+
+                ! Integrate any user-supplied surface as have as well.
+                call integrateUserSurfaces_d(localVal(:, sps), localVald(:, sps), famList, sps)
+
+                ! Integrate any actuator regions we have
+                call integrateActuatorRegions_d(localVal(:, sps), localVald(:, sps), famList, sps)
+            end do
+
+            ! Now we need to reduce all the cost functions
+            call mpi_allreduce(localval, globalVal, nLocalValues * nTimeIntervalsSpectral, adflow_real, &
+                               MPI_SUM, adflow_comm_world, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Now we need to reduce all the cost functions
+            call mpi_allreduce(localvald, globalVald, nLocalValues * nTimeIntervalsSpectral, adflow_real, &
+                               MPI_SUM, adflow_comm_world, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Call the final routine that will comptue all of our functions of
+            ! interest.
+
+            call getCostFunctions_d(globalVal, globalVald, funcValues(:, iGroup), funcValuesd(:, iGroup))
+
+            ! if (present(globalValues)) then
+            !    globalValues = globalVal
+            !    globalValuesd = globalVald
+            ! end if
+        end do groupLoop
+
+    end subroutine getSolution_d
+
+    subroutine getSolution_b(famLists, funcValues, funcValuesd)
+        ! -----------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+
+        use constants
+        use communication, only: myid
+        use inputTSStabDeriv, only: TSSTability
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use communication, only: adflow_comm_world
+        use blockPointers, only: nDom
+        use utils, only: setPointers_b, EChk, setPointers
+        use surfaceIntegrations_b, only: getCostFunctions_b
+        use zipperIntegrations, only: integrateZippers_b
+        use userSurfaceIntegrations, only: integrateUserSurfaces_b
+        use actuatorRegion, only: integrateActuatorRegions_b
+        use inputCostFunctions, only: computeCavitation
+        implicit none
+
+        ! Input/Output Variables
+        integer(kind=intType), dimension(:, :), target, intent(in) :: famLists
+        real(kind=realType), dimension(:, :) :: funcValues, funcValuesd
+
+        ! Working
+        real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral) :: localVal, globalVal
+        real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral) :: localVald, globalVald
+        real(kind=realType), dimension(nLocalValues, nTimeIntervalsSpectral, size(famLists, 1)) :: globalValues
+        integer(kind=intType) :: nn, sps, ierr, iGroup, nFam
+        integer(kind=intType), dimension(:), pointer :: famList
+
+        call getSolution(famLists, funcValues, globalValues)
+
+        groupLoop: do iGroup = 1, size(famLists, 1)
+
+            ! Extract the current family list
+            nFam = famLists(iGroup, 1)
+            famList => famLists(iGroup, 2:2 + nFam - 1)
+
+            ! compute the current cp min value for the cavitation computation with KS aggregation
+            if (computeCavitation) then
+                call computeCpMinFamily(famList)
+            end if
+
+            localVal = zero
+            localVald = zero
+
+            ! Retrive the forward pass values from getSolution
+            globalVal = globalValues(:, :, iGroup)
+
+            if (myid == 0) then
+                call getCostFunctions_b(globalVal, globalVald, funcValues(:, iGroup), funcValuesd(:, iGroup))
+                localVald = globalVald
+            end if
+
+            ! Now we need to bcast out the localValues to all procs.
+            call mpi_bcast(localVald, nLocalValues * nTimeIntervalsSpectral, &
+                           adflow_real, 0, adflow_comm_world, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            do sps = 1, nTimeIntervalsSpectral
+
+                ! Integrate any actuator regions we have:
+                call integrateActuatorRegions_b(localVal(:, sps), localVald(:, sps), famList, sps)
+
+                ! Integrate any user-supplied planes as have as well.
+                call integrateUserSurfaces_b(localVal(:, sps), localVald(:, sps), famList, sps)
+
+                ! Integrate any zippers we have
+                call integrateZippers_b(localVal(:, sps), localVald(:, sps), famList, sps)
+
+                ! Integrate the normal block surfaces.
+                do nn = 1, nDom
+                    call setPointers_b(nn, 1, sps)
+                    call integrateSurfaces_b(localval(:, sps), localVald(:, sps), famList)
+                end do
+
+            end do
+        end do groupLoop
+    end subroutine getSolution_b
 #endif
 #endif
 end module surfaceIntegrations
diff --git a/src/solver/userSurfaceIntegrations.F90 b/src/solver/userSurfaceIntegrations.F90
index 1462416b1..af72b0070 100644
--- a/src/solver/userSurfaceIntegrations.F90
+++ b/src/solver/userSurfaceIntegrations.F90
@@ -1,1625 +1,1621 @@
 module userSurfaceIntegrations
 
-  use constants
-  use communication, only : commType, internalCommType
-  use userSurfaceIntegrationData
+    use constants
+    use communication, only: commType, internalCommType
+    use userSurfaceIntegrationData
 contains
 
-  subroutine integrateUserSurfaces(localValues, famList, sps)
+    subroutine integrateUserSurfaces(localValues, famList, sps)
+
+        use constants
+        use block, onlY: flowDoms, nDom
+        use flowVarRefState, only: pRef, rhoRef, pRef, timeRef, LRef, TRef
+        use communication, only: myid, adflow_comm_world
+        use utils, only: EChk, mynorm2
+        use flowUtils, only: computePtot, computeTtot
+        use sorting, only: famInList
+        use zipperIntegrations, only: flowIntegrationZipper
+        use utils, only: terminate
+        implicit none
+
+        ! Input Parameters
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+
+        ! Working parameters
+        integer(kind=intType) :: iSurf, i, j, k, jj, ierr, nn, iDim, nPts
+        real(kind=realType), dimension(:), allocatable :: recvBuffer1, recvBuffer2
+        real(kind=realType), dimension(:, :), allocatable :: vars
+        integer(kind=intType), dimension(:), allocatable :: fams
+        logical, dimension(:), allocatable :: ptValid
+        type(userIntSurf), pointer :: surf
+
+        if (nUserIntSurfs == 0) then
+            return ! Nothing to do
+        end if
+
+        ! Set the pointers for the required communication variables
+        domainLoop: do nn = 1, nDom
+            if (flowDoms(nn, 1, sps)%addGridVelocities) then
+                call terminate("userSurfaceIntegrations", "Cannot use user-supplied surface integrations"&
+                     &"on with moving grids")
+            end if
+
+            flowDoms(nn, 1, sps)%realCommVars(iRho)%var => flowDoms(nn, 1, sps)%w(:, :, :, iRho)
+            flowDoms(nn, 1, sps)%realCommVars(iVx)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVx)
+            flowDoms(nn, 1, sps)%realCommVars(iVy)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVy)
+            flowDoms(nn, 1, sps)%realCommVars(iVz)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVz)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowP)%var => flowDoms(nn, 1, sps)%P(:, :, :)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowGamma)%var => flowDoms(nn, 1, sps)%gamma(:, :, :)
+            ! flowDoms(nn, 1, sps)%realCommVars(iZippFlowSface)%var => Not Implemented
+
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowX)%var => flowDoms(nn, 1, sps)%x(:, :, :, 1)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowY)%var => flowDoms(nn, 1, sps)%x(:, :, :, 2)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowZ)%var => flowDoms(nn, 1, sps)%x(:, :, :, 3)
+        end do domainLoop
+
+        masterLoop: do iSurf = 1, nUserIntSurfs
+
+            ! Pointer for easier reading
+            surf => userIntSurfs(iSurf)
+
+            ! We will make a short-cut here: By definition user supplied
+            ! surfaces have a fixed family, we won't do anything if we
+            ! are not told to deal with this surface.
+
+            famInclude: if (famInList(surf%famID, famList)) then
+                nPts = size(surf%pts, 2)
+
+                ! Communicate the face values and the nodal values
+                if (myid == 0) then
+                    allocate (recvBuffer1(6 * nPts), recvBuffer2(3 * nPts))
+                else
+                    allocate (recvBuffer1(0), recvBuffer2(0))
+                end if
 
-    use constants
-    use block, onlY : flowDoms, nDom
-    use flowVarRefState, only : pRef, rhoRef, pRef, timeRef, LRef, TRef
-    use communication, only : myid, adflow_comm_world
-    use utils, only : EChk, mynorm2
-    use flowUtils, only : computePtot, computeTtot
-    use sorting, only : famInList
-    use zipperIntegrations, only : flowIntegrationZipper
-    use utils, only : terminate
-    implicit none
-
-    ! Input Parameters
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    integer(kind=intType), intent(in) :: sps
-
-    ! Working parameters
-    integer(kind=intType) :: iSurf, i, j, k, jj, ierr, nn, iDim, nPts
-    real(kind=realType), dimension(:), allocatable :: recvBuffer1, recvBuffer2
-    real(kind=realType), dimension(:, :), allocatable :: vars
-    integer(kind=intType), dimension(:), allocatable :: fams
-    logical, dimension(:), allocatable :: ptValid
-    type(userIntSurf), pointer :: surf
-
-    if (nUserIntSurfs == 0) then
-       return ! Nothing to do
-    end if
-
-    ! Set the pointers for the required communication variables
-    domainLoop:do nn=1, nDom
-       if (flowDoms(nn, 1, sps)%addGridVelocities) then
-          call terminate("userSurfaceIntegrations", "Cannot use user-supplied surface integrations"&
-               &"on with moving grids")
-       end if
-
-       flowDoms(nn, 1, sps)%realCommVars(iRho)%var => flowDoms(nn, 1, sps)%w(:, :, :, iRho)
-       flowDoms(nn, 1, sps)%realCommVars(iVx)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVx)
-       flowDoms(nn, 1, sps)%realCommVars(iVy)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVy)
-       flowDoms(nn, 1, sps)%realCommVars(iVz)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVz)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowP)%var => flowDoms(nn, 1, sps)%P(:, :, :)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowGamma)%var => flowDoms(nn, 1, sps)%gamma(:, :, :)
-       ! flowDoms(nn, 1, sps)%realCommVars(iZippFlowSface)%var => Not Implemented
-
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowX)%var => flowDoms(nn, 1, sps)%x(:, :, :, 1)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowY)%var => flowDoms(nn, 1, sps)%x(:, :, :, 2)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowZ)%var => flowDoms(nn, 1, sps)%x(:, :, :, 3)
-    end do domainLoop
-
-    masterLoop: do iSurf=1, nUserIntSurfs
-
-       ! Pointer for easier reading
-       surf => userIntSurfs(iSurf)
-
-       ! We will make a short-cut here: By definition user supplied
-       ! surfaces have a fixed family, we won't do anything if we
-       ! are not told to deal with this surface.
-
-       famInclude: if (famInList(surf%famID, famList)) then
-          nPts = size(surf%pts, 2)
-
-          ! Communicate the face values and the nodal values
-          if (myid == 0) then
-             allocate(recvBuffer1(6*nPts), recvBuffer2(3*nPts))
-          else
-             allocate(recvBuffer1(0), recvBuffer2(0))
-          end if
-
-          call commUserIntegrationSurfaceVars(recvBuffer1, iRho, iZippFlowGamma, surf%flowComm)
-          call commUserIntegrationSurfaceVars(recvBuffer2, iZippFlowX, iZippFlowZ, surf%nodeComm)
-
-          ! *Finally* we can do the actual integrations
-          if (myid == 0)  then
-
-             ! Allocate some temporary data needed to supply to the
-             ! zipper integration routine.
-             allocate(ptValid(npts), vars(npts, nZippFlowComm), fams(size(surf%conn, 2)))
-
-             ! Initialize ptValid to True. If we find that it isn't,
-             ! we'll permenantly set that point to false. This could
-             ! come from either the node or the flow comms.
-
-             ptValid = .True.
-             ! Prepare for the "zipper" integration call. We have to
-             ! re-order the data according to the "inv" array in each
-             ! of the two comms.
-             do i=1, nPts
-
-                ! Flow Variables
-                j = surf%flowComm%inv(i)
-                vars(j, iRho:iZippFlowGamma) = recvBuffer1(6*(i-1) + iRho : 6*(i-1) + iZippFlowGamma)
-
-                if (.not. surf%flowComm%valid(i)) then
-                   ptValid(j) = .False.
+                call commUserIntegrationSurfaceVars(recvBuffer1, iRho, iZippFlowGamma, surf%flowComm)
+                call commUserIntegrationSurfaceVars(recvBuffer2, iZippFlowX, iZippFlowZ, surf%nodeComm)
+
+                ! *Finally* we can do the actual integrations
+                if (myid == 0) then
+
+                    ! Allocate some temporary data needed to supply to the
+                    ! zipper integration routine.
+                    allocate (ptValid(npts), vars(npts, nZippFlowComm), fams(size(surf%conn, 2)))
+
+                    ! Initialize ptValid to True. If we find that it isn't,
+                    ! we'll permenantly set that point to false. This could
+                    ! come from either the node or the flow comms.
+
+                    ptValid = .True.
+                    ! Prepare for the "zipper" integration call. We have to
+                    ! re-order the data according to the "inv" array in each
+                    ! of the two comms.
+                    do i = 1, nPts
+
+                        ! Flow Variables
+                        j = surf%flowComm%inv(i)
+                        vars(j, iRho:iZippFlowGamma) = recvBuffer1(6 * (i - 1) + iRho:6 * (i - 1) + iZippFlowGamma)
+
+                        if (.not. surf%flowComm%valid(i)) then
+                            ptValid(j) = .False.
+                        end if
+
+                        ! Sface is not implemented. To correctly do this,
+                        ! interpolate the three components of 's', do the dot
+                        ! product with the local normal to get the sFace value.
+                        vars(j, iZippFlowSface) = zero
+
+                        ! Node Comm Values
+                        j = surf%nodeComm%inv(i)
+                        vars(j, iZippFlowX:iZippFlowZ) = recvBuffer2(3 * i - 2:3 * i)
+
+                        ! The additional pt-valid array
+                        if (.not. surf%nodeComm%valid(i)) then
+                            ptValid(j) = .False.
+                        end if
+                    end do
+
+                    ! The family array is all the same value:
+                    fams = surf%famID
+                    ! Perform the actual integration
+                    call flowIntegrationZipper(surf%isInflow, surf%conn, fams, vars, localValues, famList, sps, ptValid)
+                    deallocate (ptValid, vars, fams)
+                end if
+                deallocate (recvBuffer1, recvBuffer2)
+            end if famInclude
+        end do masterLoop
+    end subroutine integrateUserSurfaces
+#ifndef USE_COMPLEX
+    subroutine integrateUserSurfaces_d(localValues, localValuesd, famList, sps)
+
+        use constants
+        use block, onlY: flowDoms, flowDomsd, nDom
+        use flowVarRefState, only: pRef, rhoRef, pRef, timeRef, LRef, TRef
+        use communication, only: myid, adflow_comm_world
+        use utils, only: EChk, mynorm2
+        use flowUtils, only: computePtot, computeTtot
+        use sorting, only: famInList
+        use zipperIntegrations_d, only: flowIntegrationZipper_d
+        use utils, only: terminate
+        implicit none
+
+        ! Input Parameters
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+
+        ! Working parameters
+        integer(kind=intType) :: iSurf, i, j, k, jj, ierr, nn, iDim, nPts
+        real(kind=realType), dimension(:), allocatable :: recvBuffer1, recvBuffer2
+        real(kind=realType), dimension(:), allocatable :: recvBuffer1d, recvBuffer2d
+        real(kind=realType), dimension(:, :), allocatable :: vars, varsd
+        integer(kind=intType), dimension(:), allocatable :: fams
+        logical, dimension(:), allocatable :: ptValid
+        type(userIntSurf), pointer :: surf
+
+        if (nUserIntSurfs == 0) then
+            return ! Nothing to do
+        end if
+
+        ! Set the pointers for the required communication variables
+        domainLoop: do nn = 1, nDom
+            if (flowDoms(nn, 1, sps)%addGridVelocities) then
+                call terminate("userSurfaceIntegrations", "Cannot use user-supplied surface integrations"&
+                     &"on with moving grids")
+            end if
+
+            flowDoms(nn, 1, sps)%realCommVars(iRho)%var => flowDoms(nn, 1, sps)%w(:, :, :, iRho)
+            flowDoms(nn, 1, sps)%realCommVars(iVx)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVx)
+            flowDoms(nn, 1, sps)%realCommVars(iVy)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVy)
+            flowDoms(nn, 1, sps)%realCommVars(iVz)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVz)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowP)%var => flowDoms(nn, 1, sps)%P(:, :, :)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowGamma)%var => flowDoms(nn, 1, sps)%gamma(:, :, :)
+            ! flowDoms(nn, 1, sps)%realCommVars(iZippFlowSface)%var => Not Implemented
+
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowX)%var => flowDoms(nn, 1, sps)%x(:, :, :, 1)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowY)%var => flowDoms(nn, 1, sps)%x(:, :, :, 2)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowZ)%var => flowDoms(nn, 1, sps)%x(:, :, :, 3)
+
+            flowDoms(nn, 1, sps)%realCommVars(iRho + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iRho)
+            flowDoms(nn, 1, sps)%realCommVars(iVx + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVx)
+            flowDoms(nn, 1, sps)%realCommVars(iVy + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVy)
+            flowDoms(nn, 1, sps)%realCommVars(iVz + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVz)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowP + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%P(:, :, :)
+           flowDoms(nn, 1, sps)%realCommVars(iZippFlowGamma + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%gamma(:, :, :)
+            ! flowDoms(nn, 1, sps)%realCommVars(iZippFlowSface+nZippFlowComm)%var => Not Implemented
+
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowX + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 1)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowY + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 2)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowZ + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 3)
+
+        end do domainLoop
+
+        masterLoop: do iSurf = 1, nUserIntSurfs
+
+            ! Pointer for easier reading
+            surf => userIntSurfs(iSurf)
+
+            ! We will make a short-cut here: By definition user supplied
+            ! surfaces have a fixed family, we won't do anything if we
+            ! are not told to deal with this surface.
+
+            famInclude: if (famInList(surf%famID, famList)) then
+                nPts = size(surf%pts, 2)
+
+                ! Communicate the face values and the nodal values
+                if (myid == 0) then
+                    allocate (recvBuffer1(6 * nPts), recvBuffer2(3 * nPts))
+                    allocate (recvBuffer1d(6 * nPts), recvBuffer2d(3 * nPts))
+                else
+                    allocate (recvBuffer1(0), recvBuffer2(0))
+                    allocate (recvBuffer1d(0), recvBuffer2d(0))
                 end if
 
-                ! Sface is not implemented. To correctly do this,
-                ! interpolate the three components of 's', do the dot
-                ! product with the local normal to get the sFace value.
-                vars(j, iZippFlowSface)  = zero
+                call commUserIntegrationSurfaceVars_d(recvBuffer1, recvBuffer1d, iRho, iZippFlowGamma, surf%flowComm)
+                call commUserIntegrationSurfaceVars_d(recvBuffer2, recvBuffer2d, iZippFlowX, iZippFlowZ, surf%nodeComm)
 
-                ! Node Comm Values
-                j = surf%nodeComm%inv(i)
-                vars(j, iZippFlowX:iZippFlowZ) = recvBuffer2(3*i-2:3*i)
+                ! *Finally* we can do the actual integrations
+                if (myid == 0) then
 
-                ! The additional pt-valid array
-                if (.not. surf%nodeComm%valid(i)) then
-                   ptValid(j) = .False.
-                end if
-             end do
-
-             ! The family array is all the same value:
-             fams = surf%famID
-             ! Perform the actual integration
-             call flowIntegrationZipper(surf%isInflow, surf%conn, fams, vars, localValues, famList, sps, ptValid)
-             deallocate(ptValid, vars, fams)
-          end if
-          deallocate(recvBuffer1, recvBuffer2)
-       end if famInclude
-    end do masterLoop
-  end subroutine integrateUserSurfaces
-#ifndef USE_COMPLEX
-  subroutine integrateUserSurfaces_d(localValues, localValuesd, famList, sps)
+                    ! Allocate some temporary data needed to supply to the
+                    ! zipper integration routine.
+                    allocate (ptValid(npts), vars(npts, nZippFlowComm), &
+                              varsd(npts, nZippFlowComm), fams(size(surf%conn, 2)))
 
-    use constants
-    use block, onlY : flowDoms, flowDomsd, nDom
-    use flowVarRefState, only : pRef, rhoRef, pRef, timeRef, LRef, TRef
-    use communication, only : myid, adflow_comm_world
-    use utils, only : EChk, mynorm2
-    use flowUtils, only : computePtot, computeTtot
-    use sorting, only : famInList
-    use zipperIntegrations_d, only : flowIntegrationZipper_d
-    use utils, only : terminate
-    implicit none
-
-    ! Input Parameters
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    integer(kind=intType), intent(in) :: sps
-
-    ! Working parameters
-    integer(kind=intType) :: iSurf, i, j, k, jj, ierr, nn, iDim, nPts
-    real(kind=realType), dimension(:), allocatable :: recvBuffer1, recvBuffer2
-    real(kind=realType), dimension(:), allocatable :: recvBuffer1d, recvBuffer2d
-    real(kind=realType), dimension(:, :), allocatable :: vars, varsd
-    integer(kind=intType), dimension(:), allocatable :: fams
-    logical, dimension(:), allocatable :: ptValid
-    type(userIntSurf), pointer :: surf
-
-    if (nUserIntSurfs == 0) then
-       return ! Nothing to do
-    end if
-
-    ! Set the pointers for the required communication variables
-    domainLoop:do nn=1, nDom
-       if (flowDoms(nn, 1, sps)%addGridVelocities) then
-          call terminate("userSurfaceIntegrations", "Cannot use user-supplied surface integrations"&
-               &"on with moving grids")
-       end if
-
-       flowDoms(nn, 1, sps)%realCommVars(iRho)%var => flowDoms(nn, 1, sps)%w(:, :, :, iRho)
-       flowDoms(nn, 1, sps)%realCommVars(iVx)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVx)
-       flowDoms(nn, 1, sps)%realCommVars(iVy)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVy)
-       flowDoms(nn, 1, sps)%realCommVars(iVz)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVz)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowP)%var => flowDoms(nn, 1, sps)%P(:, :, :)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowGamma)%var => flowDoms(nn, 1, sps)%gamma(:, :, :)
-       ! flowDoms(nn, 1, sps)%realCommVars(iZippFlowSface)%var => Not Implemented
-
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowX)%var => flowDoms(nn, 1, sps)%x(:, :, :, 1)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowY)%var => flowDoms(nn, 1, sps)%x(:, :, :, 2)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowZ)%var => flowDoms(nn, 1, sps)%x(:, :, :, 3)
-
-       flowDoms(nn, 1, sps)%realCommVars(iRho+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iRho)
-       flowDoms(nn, 1, sps)%realCommVars(iVx+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVx)
-       flowDoms(nn, 1, sps)%realCommVars(iVy+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVy)
-       flowDoms(nn, 1, sps)%realCommVars(iVz+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVz)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowP+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%P(:, :, :)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowGamma+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%gamma(:, :, :)
-       ! flowDoms(nn, 1, sps)%realCommVars(iZippFlowSface+nZippFlowComm)%var => Not Implemented
-
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowX+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 1)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowY+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 2)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowZ+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 3)
-
-    end do domainLoop
-
-    masterLoop: do iSurf=1, nUserIntSurfs
-
-       ! Pointer for easier reading
-       surf => userIntSurfs(iSurf)
-
-       ! We will make a short-cut here: By definition user supplied
-       ! surfaces have a fixed family, we won't do anything if we
-       ! are not told to deal with this surface.
-
-       famInclude: if (famInList(surf%famID, famList)) then
-          nPts = size(surf%pts, 2)
-
-          ! Communicate the face values and the nodal values
-          if (myid == 0) then
-             allocate(recvBuffer1(6*nPts), recvBuffer2(3*nPts))
-             allocate(recvBuffer1d(6*nPts), recvBuffer2d(3*nPts))
-          else
-             allocate(recvBuffer1(0), recvBuffer2(0))
-             allocate(recvBuffer1d(0), recvBuffer2d(0))
-          end if
-
-          call commUserIntegrationSurfaceVars_d(recvBuffer1, recvBuffer1d, iRho, iZippFlowGamma, surf%flowComm)
-          call commUserIntegrationSurfaceVars_d(recvBuffer2, recvBuffer2d, iZippFlowX, iZippFlowZ, surf%nodeComm)
-
-          ! *Finally* we can do the actual integrations
-          if (myid == 0)  then
-
-             ! Allocate some temporary data needed to supply to the
-             ! zipper integration routine.
-             allocate(ptValid(npts), vars(npts, nZippFlowComm), &
-                  varsd(npts, nZippFlowComm), fams(size(surf%conn, 2)))
-
-             ! Initialize ptValid to True. If we find that it isn't,
-             ! we'll permenantly set that point to false. This could
-             ! come from either the node or the flow comms.
-
-             ptValid = .True.
-
-             ! Prepare for the "zipper" integration call. We have to
-             ! re-order the data according to the "inv" array in each
-             ! of the two comms.
-             do i=1, nPts
-
-                ! Flow Variables
-                j = surf%flowComm%inv(i)
-
-                vars(j, iRho:iZippFlowGamma) = recvBuffer1(6*(i-1) + iRho : 6*(i-1) + iZippFlowGamma)
-                varsd(j, iRho:iZippFlowGamma) = recvBuffer1d(6*(i-1) + iRho : 6*(i-1) + iZippFlowGamma)
-
-                if (.not. surf%flowComm%valid(i)) then
-                   ptValid(j) = .False.
-                end if
+                    ! Initialize ptValid to True. If we find that it isn't,
+                    ! we'll permenantly set that point to false. This could
+                    ! come from either the node or the flow comms.
 
-                ! Sface is not implemented. To correctly do this,
-                ! interpolate the three components of 's', do the dot
-                ! product with the local normal to get the sFace value.
-                vars(j, iZippFlowSface)  = zero
-                varsd(j, iZippFlowSface)  = zero
+                    ptValid = .True.
 
-                ! Node Comm Values
-                j = surf%nodeComm%inv(i)
-                vars(j, iZippFlowX:iZippFlowZ) = recvBuffer2(3*i-2:3*i)
-                varsd(j, iZippFlowX:iZippFlowZ) = recvBuffer2d(3*i-2:3*i)
+                    ! Prepare for the "zipper" integration call. We have to
+                    ! re-order the data according to the "inv" array in each
+                    ! of the two comms.
+                    do i = 1, nPts
 
-                ! The additional pt-valid array
-                if (.not. surf%nodeComm%valid(i)) then
-                   ptValid(j) = .False.
-                end if
-             end do
+                        ! Flow Variables
+                        j = surf%flowComm%inv(i)
 
-             ! The family array is all the same value:
-             fams = surf%famID
+                        vars(j, iRho:iZippFlowGamma) = recvBuffer1(6 * (i - 1) + iRho:6 * (i - 1) + iZippFlowGamma)
+                        varsd(j, iRho:iZippFlowGamma) = recvBuffer1d(6 * (i - 1) + iRho:6 * (i - 1) + iZippFlowGamma)
 
-             ! Perform the actual integration
-             call flowIntegrationZipper_d(surf%isInflow, surf%conn, fams, vars, varsd, localValues, localValuesd, &
-                  famList, sps, ptValid)
-             deallocate(ptValid, vars, varsd, fams)
-          end if
-          deallocate(recvBuffer1, recvBuffer2, recvBuffer1d, recvBuffer2d)
-       end if famInclude
-    end do masterLoop
-  end subroutine integrateUserSurfaces_d
+                        if (.not. surf%flowComm%valid(i)) then
+                            ptValid(j) = .False.
+                        end if
 
- subroutine integrateUserSurfaces_b(localValues, localValuesd, famList, sps)
+                        ! Sface is not implemented. To correctly do this,
+                        ! interpolate the three components of 's', do the dot
+                        ! product with the local normal to get the sFace value.
+                        vars(j, iZippFlowSface) = zero
+                        varsd(j, iZippFlowSface) = zero
 
-    use constants
-    use block, onlY : flowDoms, flowDomsd, nDom
-    use flowVarRefState, only : pRef, rhoRef, pRef, timeRef, LRef, TRef
-    use communication, only : myid, adflow_comm_world
-    use utils, only : EChk, mynorm2
-    use flowUtils, only : computePtot, computeTtot
-    use sorting, only : famInList
-    use zipperIntegrations_b, only : flowIntegrationZipper_b
-    use utils, only : terminate
-    implicit none
-
-    ! Input Parameters
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    integer(kind=intType), intent(in) :: sps
-
-    ! Working parameters
-    integer(kind=intType) :: iSurf, i, j, k, jj, ierr, nn, iDim, nPts
-    real(kind=realType), dimension(:), allocatable :: recvBuffer1, recvBuffer2
-    real(kind=realType), dimension(:), allocatable :: recvBuffer1d, recvBuffer2d
-    real(kind=realType), dimension(:, :), allocatable :: vars, varsd
-    integer(kind=intType), dimension(:), allocatable :: fams
-    logical, dimension(:), allocatable :: ptValid
-    type(userIntSurf), pointer :: surf
-
-    if (nUserIntSurfs == 0) then
-       return ! Nothing to do
-    end if
-
-    ! Run the foward mode code pass:
-    call IntegrateUserSurfaces(localValues, famLIst, sps)
-
-    ! Set the pointers for the required communication variables
-    domainLoop:do nn=1, nDom
-       if (flowDoms(nn, 1, sps)%addGridVelocities) then
-          call terminate("userSurfaceIntegrations", "Cannot use user-supplied surface integrations"&
-               &"on with moving grids")
-       end if
-
-       flowDoms(nn, 1, sps)%realCommVars(iRho)%var => flowDoms(nn, 1, sps)%w(:, :, :, iRho)
-       flowDoms(nn, 1, sps)%realCommVars(iVx)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVx)
-       flowDoms(nn, 1, sps)%realCommVars(iVy)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVy)
-       flowDoms(nn, 1, sps)%realCommVars(iVz)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVz)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowP)%var => flowDoms(nn, 1, sps)%P(:, :, :)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowGamma)%var => flowDoms(nn, 1, sps)%gamma(:, :, :)
-       ! flowDoms(nn, 1, sps)%realCommVars(iZippFlowSface)%var => Not Implemented
-
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowX)%var => flowDoms(nn, 1, sps)%x(:, :, :, 1)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowY)%var => flowDoms(nn, 1, sps)%x(:, :, :, 2)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowZ)%var => flowDoms(nn, 1, sps)%x(:, :, :, 3)
-
-       flowDoms(nn, 1, sps)%realCommVars(iRho+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iRho)
-       flowDoms(nn, 1, sps)%realCommVars(iVx+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVx)
-       flowDoms(nn, 1, sps)%realCommVars(iVy+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVy)
-       flowDoms(nn, 1, sps)%realCommVars(iVz+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVz)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowP+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%P(:, :, :)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowGamma+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%gamma(:, :, :)
-       ! flowDoms(nn, 1, sps)%realCommVars(iZippFlowSface+nZippFlowComm)%var => Not Implemented
-
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowX+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 1)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowY+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 2)
-       flowDoms(nn, 1, sps)%realCommVars(iZippFlowZ+nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 3)
-
-    end do domainLoop
-
-    masterLoop: do iSurf=1, nUserIntSurfs
-
-       ! Pointer for easier reading
-       surf => userIntSurfs(iSurf)
-
-       ! We will make a short-cut here: By definition user supplied
-       ! surfaces have a fixed family, we won't do anything if we
-       ! are not told to deal with this surface.
-
-       famInclude: if (famInList(surf%famID, famList)) then
-          nPts = size(surf%pts, 2)
-
-          ! Communicate the face values and the nodal values
-          if (myid == 0) then
-             allocate(recvBuffer1(6*nPts), recvBuffer2(3*nPts))
-             allocate(recvBuffer1d(6*nPts), recvBuffer2d(3*nPts))
-          else
-             allocate(recvBuffer1(0), recvBuffer2(0))
-             allocate(recvBuffer1d(0), recvBuffer2d(0))
-          end if
-
-          call commUserIntegrationSurfaceVars(recvBuffer1, iRho, iZippFlowGamma, surf%flowComm)
-          call commUserIntegrationSurfaceVars(recvBuffer2, iZippFlowX, iZippFlowZ, surf%nodeComm)
-
-          ! *Finally* we can do the actual integrations
-          if (myid == 0)  then
-
-             ! Allocate some temporary data needed to supply to the
-             ! zipper integration routine.
-             allocate(ptValid(npts), vars(npts, nZippFlowComm), &
-                  varsd(npts, nZippFlowComm), fams(size(surf%conn, 2)))
-
-             ! Initialize ptValid to True. If we find that it isn't,
-             ! we'll permenantly set that point to false. This could
-             ! come from either the node or the flow comms.
-
-             ptValid = .True.
-
-             varsd= zero
-             ! Prepare for the "zipper" integration call. We have to
-             ! re-order the data according to the "inv" array in each
-             ! of the two comms.
-             do i=1, nPts
-
-                ! Flow Variables
-                j = surf%flowComm%inv(i)
-                vars(j, iRho:iZippFlowGamma) = recvBuffer1(6*(i-1) + iRho : 6*(i-1) + iZippFlowGamma)
-
-                if (.not. surf%flowComm%valid(i)) then
-                   ptValid(j) = .False.
-                end if
+                        ! Node Comm Values
+                        j = surf%nodeComm%inv(i)
+                        vars(j, iZippFlowX:iZippFlowZ) = recvBuffer2(3 * i - 2:3 * i)
+                        varsd(j, iZippFlowX:iZippFlowZ) = recvBuffer2d(3 * i - 2:3 * i)
 
-                ! Sface is not implemented. To correctly do this,
-                ! interpolate the three components of 's', do the dot
-                ! product with the local normal to get the sFace value.
-                vars(j, iZippFlowSface)  = zero
+                        ! The additional pt-valid array
+                        if (.not. surf%nodeComm%valid(i)) then
+                            ptValid(j) = .False.
+                        end if
+                    end do
 
-                ! Node Comm Values
-                j = surf%nodeComm%inv(i)
-                vars(j, iZippFlowX:iZippFlowZ) = recvBuffer2(3*i-2:3*i)
+                    ! The family array is all the same value:
+                    fams = surf%famID
 
-                ! The additional pt-valid array
-                if (.not. surf%nodeComm%valid(i)) then
-                   ptValid(j) = .False.
+                    ! Perform the actual integration
+                  call flowIntegrationZipper_d(surf%isInflow, surf%conn, fams, vars, varsd, localValues, localValuesd, &
+                                                 famList, sps, ptValid)
+                    deallocate (ptValid, vars, varsd, fams)
+                end if
+                deallocate (recvBuffer1, recvBuffer2, recvBuffer1d, recvBuffer2d)
+            end if famInclude
+        end do masterLoop
+    end subroutine integrateUserSurfaces_d
+
+    subroutine integrateUserSurfaces_b(localValues, localValuesd, famList, sps)
+
+        use constants
+        use block, onlY: flowDoms, flowDomsd, nDom
+        use flowVarRefState, only: pRef, rhoRef, pRef, timeRef, LRef, TRef
+        use communication, only: myid, adflow_comm_world
+        use utils, only: EChk, mynorm2
+        use flowUtils, only: computePtot, computeTtot
+        use sorting, only: famInList
+        use zipperIntegrations_b, only: flowIntegrationZipper_b
+        use utils, only: terminate
+        implicit none
+
+        ! Input Parameters
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+
+        ! Working parameters
+        integer(kind=intType) :: iSurf, i, j, k, jj, ierr, nn, iDim, nPts
+        real(kind=realType), dimension(:), allocatable :: recvBuffer1, recvBuffer2
+        real(kind=realType), dimension(:), allocatable :: recvBuffer1d, recvBuffer2d
+        real(kind=realType), dimension(:, :), allocatable :: vars, varsd
+        integer(kind=intType), dimension(:), allocatable :: fams
+        logical, dimension(:), allocatable :: ptValid
+        type(userIntSurf), pointer :: surf
+
+        if (nUserIntSurfs == 0) then
+            return ! Nothing to do
+        end if
+
+        ! Run the foward mode code pass:
+        call IntegrateUserSurfaces(localValues, famLIst, sps)
+
+        ! Set the pointers for the required communication variables
+        domainLoop: do nn = 1, nDom
+            if (flowDoms(nn, 1, sps)%addGridVelocities) then
+                call terminate("userSurfaceIntegrations", "Cannot use user-supplied surface integrations"&
+                     &"on with moving grids")
+            end if
+
+            flowDoms(nn, 1, sps)%realCommVars(iRho)%var => flowDoms(nn, 1, sps)%w(:, :, :, iRho)
+            flowDoms(nn, 1, sps)%realCommVars(iVx)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVx)
+            flowDoms(nn, 1, sps)%realCommVars(iVy)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVy)
+            flowDoms(nn, 1, sps)%realCommVars(iVz)%var => flowDoms(nn, 1, sps)%w(:, :, :, iVz)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowP)%var => flowDoms(nn, 1, sps)%P(:, :, :)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowGamma)%var => flowDoms(nn, 1, sps)%gamma(:, :, :)
+            ! flowDoms(nn, 1, sps)%realCommVars(iZippFlowSface)%var => Not Implemented
+
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowX)%var => flowDoms(nn, 1, sps)%x(:, :, :, 1)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowY)%var => flowDoms(nn, 1, sps)%x(:, :, :, 2)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowZ)%var => flowDoms(nn, 1, sps)%x(:, :, :, 3)
+
+            flowDoms(nn, 1, sps)%realCommVars(iRho + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iRho)
+            flowDoms(nn, 1, sps)%realCommVars(iVx + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVx)
+            flowDoms(nn, 1, sps)%realCommVars(iVy + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVy)
+            flowDoms(nn, 1, sps)%realCommVars(iVz + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%w(:, :, :, iVz)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowP + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%P(:, :, :)
+           flowDoms(nn, 1, sps)%realCommVars(iZippFlowGamma + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%gamma(:, :, :)
+            ! flowDoms(nn, 1, sps)%realCommVars(iZippFlowSface+nZippFlowComm)%var => Not Implemented
+
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowX + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 1)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowY + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 2)
+            flowDoms(nn, 1, sps)%realCommVars(iZippFlowZ + nZippFlowComm)%var => flowDomsd(nn, 1, sps)%x(:, :, :, 3)
+
+        end do domainLoop
+
+        masterLoop: do iSurf = 1, nUserIntSurfs
+
+            ! Pointer for easier reading
+            surf => userIntSurfs(iSurf)
+
+            ! We will make a short-cut here: By definition user supplied
+            ! surfaces have a fixed family, we won't do anything if we
+            ! are not told to deal with this surface.
+
+            famInclude: if (famInList(surf%famID, famList)) then
+                nPts = size(surf%pts, 2)
+
+                ! Communicate the face values and the nodal values
+                if (myid == 0) then
+                    allocate (recvBuffer1(6 * nPts), recvBuffer2(3 * nPts))
+                    allocate (recvBuffer1d(6 * nPts), recvBuffer2d(3 * nPts))
+                else
+                    allocate (recvBuffer1(0), recvBuffer2(0))
+                    allocate (recvBuffer1d(0), recvBuffer2d(0))
                 end if
-             end do
 
-             ! The family array is all the same value:
-             fams = surf%famID
+                call commUserIntegrationSurfaceVars(recvBuffer1, iRho, iZippFlowGamma, surf%flowComm)
+                call commUserIntegrationSurfaceVars(recvBuffer2, iZippFlowX, iZippFlowZ, surf%nodeComm)
 
-             ! Perform the actual (reverse) integration
-             call flowIntegrationZipper_b(surf%isInflow, surf%conn, fams, vars, varsd, localValues, localValuesd, &
-                  famList, sps, ptValid)
+                ! *Finally* we can do the actual integrations
+                if (myid == 0) then
 
-             ! Accumulate into the receive buffers
-             recvBuffer1d = zero
-             recvBuffer2d = zero
-             do i=1, nPts
+                    ! Allocate some temporary data needed to supply to the
+                    ! zipper integration routine.
+                    allocate (ptValid(npts), vars(npts, nZippFlowComm), &
+                              varsd(npts, nZippFlowComm), fams(size(surf%conn, 2)))
 
-                ! Accumulte back to the buffer
-                j = surf%flowComm%inv(i)
+                    ! Initialize ptValid to True. If we find that it isn't,
+                    ! we'll permenantly set that point to false. This could
+                    ! come from either the node or the flow comms.
 
-                recvBuffer1d(6*(i-1) + iRho : 6*(i-1) + iZippFlowGamma) = &
-                     recvBuffer1d(6*(i-1) + iRho : 6*(i-1) + iZippFlowGamma) + &
-                     varsd(j, iRho:iZippFlowGamma)
+                    ptValid = .True.
 
-                ! Sface is not implemented. No reverse seed. Just zero
+                    varsd = zero
+                    ! Prepare for the "zipper" integration call. We have to
+                    ! re-order the data according to the "inv" array in each
+                    ! of the two comms.
+                    do i = 1, nPts
 
-                ! Node Comm Values
-                j = surf%nodeComm%inv(i)
-                recvBuffer2d(3*i-2:3*i) = recvBuffer2d(3*i-2:3*i) + varsd(j, iZippFlowX:iZippFlowZ)
-             end do
+                        ! Flow Variables
+                        j = surf%flowComm%inv(i)
+                        vars(j, iRho:iZippFlowGamma) = recvBuffer1(6 * (i - 1) + iRho:6 * (i - 1) + iZippFlowGamma)
 
-             deallocate(ptValid, vars, varsd, fams)
-          end if
+                        if (.not. surf%flowComm%valid(i)) then
+                            ptValid(j) = .False.
+                        end if
 
-          ! Finish the reverse scatter
-          call commUserIntegrationSurfaceVars_b(recvBuffer1, recvBuffer1d, iRho, iZippFlowGamma, surf%flowComm)
-          call commUserIntegrationSurfaceVars_b(recvBuffer2, recvBuffer2d, iZippFlowX, iZippFlowZ, surf%nodeComm)
+                        ! Sface is not implemented. To correctly do this,
+                        ! interpolate the three components of 's', do the dot
+                        ! product with the local normal to get the sFace value.
+                        vars(j, iZippFlowSface) = zero
 
-          deallocate(recvBuffer1, recvBuffer2, recvBuffer1d, recvBuffer2d)
-       end if famInclude
-    end do masterLoop
-  end subroutine integrateUserSurfaces_b
+                        ! Node Comm Values
+                        j = surf%nodeComm%inv(i)
+                        vars(j, iZippFlowX:iZippFlowZ) = recvBuffer2(3 * i - 2:3 * i)
 
+                        ! The additional pt-valid array
+                        if (.not. surf%nodeComm%valid(i)) then
+                            ptValid(j) = .False.
+                        end if
+                    end do
 
-#endif
+                    ! The family array is all the same value:
+                    fams = surf%famID
 
-  subroutine addIntegrationSurface(pts, conn, famName, famID, isInflow, nPts, nConn)
-    ! Add a user-supplied integration surface.
+                    ! Perform the actual (reverse) integration
+                  call flowIntegrationZipper_b(surf%isInflow, surf%conn, fams, vars, varsd, localValues, localValuesd, &
+                                                 famList, sps, ptValid)
 
-    use communication, only : myID
-    use constants
+                    ! Accumulate into the receive buffers
+                    recvBuffer1d = zero
+                    recvBuffer2d = zero
+                    do i = 1, nPts
 
-    implicit none
-
-    ! Input variables
-    integer(kind=intType), intent(in) :: nPts, nConn, famID
-    real(kind=realType), dimension(3, nPts), intent(in) :: pts
-    integer(kind=intType), dimension(3, nConn), intent(in) :: conn
-    logical, intent(in) :: isInflow
-    character(len=*) :: famName
-    type(userIntSurf), pointer :: surf
-
-    ! Not really much to do here...we just have to save the data
-    ! into the data structure untilly we actual have to do the
-    ! search.
-    nUserIntSurfs = nUserIntSurfs + 1
-    if (nUserIntSurfs > nUserIntSurfsMax) then
-       print *,"Error: Exceeded the maximum number of user-supplied "&
-            &"integration slices. Increase nUserIntSurfsMax"
-       stop
-    end if
-
-    surf => userIntSurfs(nUserIntSurfs)
-    if (myid == 0) then
-       allocate(surf%pts(3, nPts), surf%conn(3, nConn))
-       surf%pts = pts
-       surf%conn = conn
-    end if
-    surf%famName = famName
-    surf%famID = famID
-    surf%isInflow = isInflow
-  end subroutine addIntegrationSurface
-
-  subroutine buildVolumeADTs(oBlocks, useDual)
-
-    ! This builds volume ADTs for the the owned blocks. It will build
-    ! either the dual mesh or the primal mesh depening on the flag
-    ! useDual.
+                        ! Accumulte back to the buffer
+                        j = surf%flowComm%inv(i)
 
-    use constants
-    use oversetData, only : oversetBlock
-    use blockPointers, only : nDom, x, ie, je, ke, il, jl, kl,  vol, ib, jb, kb,&
-         iBlank, BCData, nBocos, BCFaceID, BCType
-    use adtBuild, only : buildSerialHex
-    use utils, only : setPointers, EChk
-    implicit none
-
-    ! Input/Output Parameters
-    type(oversetBlock), dimension(:), target :: oBlocks
-    logical :: useDual
-
-    ! Working Parameters
-    integer(kind=intType) :: nInterpol, nn, i, j, k, iii, jjj, kkk
-    integer(kind=intType) :: iStart, jStart, kStart, iEnd, jEnd, kEnd
-    integer(kind=intType) :: ii, jj, kk, mm, nADT, nHexa, planeOffset
-    type(oversetBlock), pointer :: oBlock
-
-    nInterpol = 1 ! we get the ADT to compute the interpolated volume for us.
-
-    domainLoop: do nn=1, nDom
-
-       call setPointers(nn, 1, 1)
-       oBlock => oBlocks(nn)
-
-       primalOrDual: if (useDual) then
-
-          ! Now setup the data for the ADT
-          nHexa = il * jl * kl
-          nADT = ie * je * ke
-          oBlock%il = il
-          oBlock%jl = jl
-          oBlock%kl = kl
-
-          allocate(oBlock%xADT(3, nADT), oBlock%hexaConn(8, nHexa), &
-               oBlock%qualDonor(1, nADT))
-          ! Fill up the xADT using cell centers (dual mesh)
-          mm = 0
-          do k=1, ke
-             do j=1, je
-                do i=1, ie
-                   mm = mm + 1
-                   oBlock%xADT(:, mm) = eighth*(&
-                        x(i-1, j-1, k-1, :) + &
-                        x(i  , j-1, k-1, :) + &
-                        x(i-1, j  , k-1, :) + &
-                        x(i  , j  , k-1, :) + &
-                        x(i-1, j-1, k  , :) + &
-                        x(i  , j-1, k  , :) + &
-                        x(i-1, j  , k  , :) + &
-                        x(i  , j  , k  , :))
-                   oBlock%qualDonor(1, mm) = vol(i, j, k)
+                        recvBuffer1d(6 * (i - 1) + iRho:6 * (i - 1) + iZippFlowGamma) = &
+                            recvBuffer1d(6 * (i - 1) + iRho:6 * (i - 1) + iZippFlowGamma) + &
+                            varsd(j, iRho:iZippFlowGamma)
+
+                        ! Sface is not implemented. No reverse seed. Just zero
+
+                        ! Node Comm Values
+                        j = surf%nodeComm%inv(i)
+                        recvBuffer2d(3 * i - 2:3 * i) = recvBuffer2d(3 * i - 2:3 * i) + varsd(j, iZippFlowX:iZippFlowZ)
+                    end do
+
+                    deallocate (ptValid, vars, varsd, fams)
+                end if
+
+                ! Finish the reverse scatter
+                call commUserIntegrationSurfaceVars_b(recvBuffer1, recvBuffer1d, iRho, iZippFlowGamma, surf%flowComm)
+                call commUserIntegrationSurfaceVars_b(recvBuffer2, recvBuffer2d, iZippFlowX, iZippFlowZ, surf%nodeComm)
+
+                deallocate (recvBuffer1, recvBuffer2, recvBuffer1d, recvBuffer2d)
+            end if famInclude
+        end do masterLoop
+    end subroutine integrateUserSurfaces_b
+
+#endif
+
+    subroutine addIntegrationSurface(pts, conn, famName, famID, isInflow, nPts, nConn)
+        ! Add a user-supplied integration surface.
+
+        use communication, only: myID
+        use constants
+
+        implicit none
+
+        ! Input variables
+        integer(kind=intType), intent(in) :: nPts, nConn, famID
+        real(kind=realType), dimension(3, nPts), intent(in) :: pts
+        integer(kind=intType), dimension(3, nConn), intent(in) :: conn
+        logical, intent(in) :: isInflow
+        character(len=*) :: famName
+        type(userIntSurf), pointer :: surf
+
+        ! Not really much to do here...we just have to save the data
+        ! into the data structure untilly we actual have to do the
+        ! search.
+        nUserIntSurfs = nUserIntSurfs + 1
+        if (nUserIntSurfs > nUserIntSurfsMax) then
+            print *, "Error: Exceeded the maximum number of user-supplied "&
+                 &"integration slices. Increase nUserIntSurfsMax"
+            stop
+        end if
+
+        surf => userIntSurfs(nUserIntSurfs)
+        if (myid == 0) then
+            allocate (surf%pts(3, nPts), surf%conn(3, nConn))
+            surf%pts = pts
+            surf%conn = conn
+        end if
+        surf%famName = famName
+        surf%famID = famID
+        surf%isInflow = isInflow
+    end subroutine addIntegrationSurface
+
+    subroutine buildVolumeADTs(oBlocks, useDual)
+
+        ! This builds volume ADTs for the the owned blocks. It will build
+        ! either the dual mesh or the primal mesh depening on the flag
+        ! useDual.
+
+        use constants
+        use oversetData, only: oversetBlock
+        use blockPointers, only: nDom, x, ie, je, ke, il, jl, kl, vol, ib, jb, kb, &
+                                 iBlank, BCData, nBocos, BCFaceID, BCType
+        use adtBuild, only: buildSerialHex
+        use utils, only: setPointers, EChk
+        implicit none
+
+        ! Input/Output Parameters
+        type(oversetBlock), dimension(:), target :: oBlocks
+        logical :: useDual
+
+        ! Working Parameters
+        integer(kind=intType) :: nInterpol, nn, i, j, k, iii, jjj, kkk
+        integer(kind=intType) :: iStart, jStart, kStart, iEnd, jEnd, kEnd
+        integer(kind=intType) :: ii, jj, kk, mm, nADT, nHexa, planeOffset
+        type(oversetBlock), pointer :: oBlock
+
+        nInterpol = 1 ! we get the ADT to compute the interpolated volume for us.
+
+        domainLoop: do nn = 1, nDom
+
+            call setPointers(nn, 1, 1)
+            oBlock => oBlocks(nn)
+
+            primalOrDual: if (useDual) then
+
+                ! Now setup the data for the ADT
+                nHexa = il * jl * kl
+                nADT = ie * je * ke
+                oBlock%il = il
+                oBlock%jl = jl
+                oBlock%kl = kl
+
+                allocate (oBlock%xADT(3, nADT), oBlock%hexaConn(8, nHexa), &
+                          oBlock%qualDonor(1, nADT))
+                ! Fill up the xADT using cell centers (dual mesh)
+                mm = 0
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            mm = mm + 1
+                            oBlock%xADT(:, mm) = eighth * ( &
+                                                 x(i - 1, j - 1, k - 1, :) + &
+                                                 x(i, j - 1, k - 1, :) + &
+                                                 x(i - 1, j, k - 1, :) + &
+                                                 x(i, j, k - 1, :) + &
+                                                 x(i - 1, j - 1, k, :) + &
+                                                 x(i, j - 1, k, :) + &
+                                                 x(i - 1, j, k, :) + &
+                                                 x(i, j, k, :))
+                            oBlock%qualDonor(1, mm) = vol(i, j, k)
+                        end do
+                    end do
                 end do
-             end do
-          end do
-
-          do mm=1,nBocos
-             ! We need to make sure that the interpolation does not
-             ! use a halo behind an overset outer bound. This could
-             ! happen because the last cell is interpolated (-1) and
-             ! the iblank on the halos are still 1. Just set the
-             ! quality very high so it is not accepted.
-
-             select case (BCFaceID(mm))
-             case (iMin)
-                iStart=1; iEnd=1;
-                jStart=BCData(mm)%icBeg; jEnd=BCData(mm)%icEnd
-                kStart=BCData(mm)%jcBeg; kEnd=BCData(mm)%jcEnd
-             case (iMax)
-                iStart=ie; iEnd=ie;
-                jStart=BCData(mm)%icBeg; jEnd=BCData(mm)%icEnd
-                kStart=BCData(mm)%jcBeg; kEnd=BCData(mm)%jcEnd
-             case (jMin)
-                iStart=BCData(mm)%icBeg; iEnd=BCData(mm)%icEnd
-                jStart=1; jEnd=1
-                kStart=BCData(mm)%jcBeg; kEnd=BCData(mm)%jcEnd
-             case (jMax)
-                iStart=BCData(mm)%icBeg; iEnd=BCData(mm)%icEnd
-                jStart=je; jEnd=je;
-                kStart=BCData(mm)%jcBeg; kEnd=BCData(mm)%jcEnd
-             case (kMin)
-                iStart=BCData(mm)%icBeg; iEnd=BCData(mm)%icEnd
-                jStart=BCData(mm)%jcBeg; jEnd=BCData(mm)%jcEnd
-                kStart=1; kEnd=1;
-             case (kMax)
-                iStart=BCData(mm)%icBeg; iEnd=BCData(mm)%icEnd
-                jStart=BCData(mm)%jcBeg; jEnd=BCData(mm)%jcEnd
-                kStart=ke; kEnd=ke;
-             end select
-
-             if (BCType(mm) == OversetOuterBound) then
-                do k=kStart, kEnd
-                   do j=jStart, jEnd
-                      do i=iStart, iEnd
-                         ! recompute the index
-                         kk = (k-1)*ie*je + (j-1)*ie + i
-                         oBlock%qualDonor(1, kk) = large
-                      end do
-                   end do
+
+                do mm = 1, nBocos
+                    ! We need to make sure that the interpolation does not
+                    ! use a halo behind an overset outer bound. This could
+                    ! happen because the last cell is interpolated (-1) and
+                    ! the iblank on the halos are still 1. Just set the
+                    ! quality very high so it is not accepted.
+
+                    select case (BCFaceID(mm))
+                    case (iMin)
+                        iStart = 1; iEnd = 1; 
+                        jStart = BCData(mm)%icBeg; jEnd = BCData(mm)%icEnd
+                        kStart = BCData(mm)%jcBeg; kEnd = BCData(mm)%jcEnd
+                    case (iMax)
+                        iStart = ie; iEnd = ie; 
+                        jStart = BCData(mm)%icBeg; jEnd = BCData(mm)%icEnd
+                        kStart = BCData(mm)%jcBeg; kEnd = BCData(mm)%jcEnd
+                    case (jMin)
+                        iStart = BCData(mm)%icBeg; iEnd = BCData(mm)%icEnd
+                        jStart = 1; jEnd = 1
+                        kStart = BCData(mm)%jcBeg; kEnd = BCData(mm)%jcEnd
+                    case (jMax)
+                        iStart = BCData(mm)%icBeg; iEnd = BCData(mm)%icEnd
+                        jStart = je; jEnd = je; 
+                        kStart = BCData(mm)%jcBeg; kEnd = BCData(mm)%jcEnd
+                    case (kMin)
+                        iStart = BCData(mm)%icBeg; iEnd = BCData(mm)%icEnd
+                        jStart = BCData(mm)%jcBeg; jEnd = BCData(mm)%jcEnd
+                        kStart = 1; kEnd = 1; 
+                    case (kMax)
+                        iStart = BCData(mm)%icBeg; iEnd = BCData(mm)%icEnd
+                        jStart = BCData(mm)%jcBeg; jEnd = BCData(mm)%jcEnd
+                        kStart = ke; kEnd = ke; 
+                    end select
+
+                    if (BCType(mm) == OversetOuterBound) then
+                        do k = kStart, kEnd
+                            do j = jStart, jEnd
+                                do i = iStart, iEnd
+                                    ! recompute the index
+                                    kk = (k - 1) * ie * je + (j - 1) * ie + i
+                                    oBlock%qualDonor(1, kk) = large
+                                end do
+                            end do
+                        end do
+                    end if
                 end do
-             end if
-          end do
-
-
-          mm = 0
-          ! These are the 'elements' of the dual mesh.
-          planeOffset = ie * je
-          do k=2, ke
-             do j=2, je
-                do i=2, ie
-                   mm = mm + 1
-                   oBlock%hexaConn(1, mm) = (k-2)*planeOffset + (j-2)*ie + (i-2) + 1
-                   oBlock%hexaConn(2, mm) = oBlock%hexaConn(1, mm) + 1
-                   oBlock%hexaConn(3, mm) = oBlock%hexaConn(2, mm) + ie
-                   oBlock%hexaConn(4, mm) = oBlock%hexaConn(3, mm) - 1
-
-                   oBlock%hexaConn(5, mm) = oBlock%hexaConn(1, mm) + planeOffset
-                   oBlock%hexaConn(6, mm) = oBlock%hexaConn(2, mm) + planeOffset
-                   oBlock%hexaConn(7, mm) = oBlock%hexaConn(3, mm) + planeOffset
-                   oBlock%hexaConn(8, mm) = oBlock%hexaConn(4, mm) + planeOffset
+
+                mm = 0
+                ! These are the 'elements' of the dual mesh.
+                planeOffset = ie * je
+                do k = 2, ke
+                    do j = 2, je
+                        do i = 2, ie
+                            mm = mm + 1
+                            oBlock%hexaConn(1, mm) = (k - 2) * planeOffset + (j - 2) * ie + (i - 2) + 1
+                            oBlock%hexaConn(2, mm) = oBlock%hexaConn(1, mm) + 1
+                            oBlock%hexaConn(3, mm) = oBlock%hexaConn(2, mm) + ie
+                            oBlock%hexaConn(4, mm) = oBlock%hexaConn(3, mm) - 1
+
+                            oBlock%hexaConn(5, mm) = oBlock%hexaConn(1, mm) + planeOffset
+                            oBlock%hexaConn(6, mm) = oBlock%hexaConn(2, mm) + planeOffset
+                            oBlock%hexaConn(7, mm) = oBlock%hexaConn(3, mm) + planeOffset
+                            oBlock%hexaConn(8, mm) = oBlock%hexaConn(4, mm) + planeOffset
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       else
-          ! Note that we will be including the halo primal cells. This
-          ! should slightly increase robusness for viscous off-wall
-          ! spacing. This means the primal mesh has 1 MORE node/cell
-          ! in each direction.
-
-          ! Now setup the data for the ADT
-          nHexa = ie * je * ke
-          nADT = ib * jb * kb
-          oBlock%il = ie
-          oBlock%jl = je
-          oBlock%kl = ke
-
-          allocate(oBlock%xADT(3, nADT), oBlock%hexaConn(8, nHexa), &
-               oBlock%qualDonor(1, nADT))
-
-          oBlock%qualDonor = zero
-          ! Fill up the xADT using the primal nodes
-          mm = 0
-          do k=0, ke
-             do j=0, je
-                do i=0, ie
-                   mm = mm + 1
-                   oBlock%xADT(:, mm) = x(i, j, k, :)
-
-                   ! Since we don't have all 8 volumes surrounding the
-                   ! halo nodes, clip the volumes to be between 0 and ib etc.
-                   do iii=0,1
-                      do jjj=0,1
-                         do kkk=0,1
-                            ii = min(max(0, iii+i), ib)
-                            jj = min(max(0, jjj+j), jb)
-                            kk = min(max(0, kkk+k), kb)
-
-                            oBlock%qualDonor(1, mm) = oBlock%qualDonor(1, mm) + &
-                                 vol(ii, jj, kk)
-                         end do
-                      end do
-                   end do
-
-                   ! Dividing by 8 isn't strictly necessary but we'll
-                   ! do it anyway.
-                   oBlock%qualDonor(1, mm) = oBlock%qualDonor(1, mm) * eighth
+            else
+                ! Note that we will be including the halo primal cells. This
+                ! should slightly increase robusness for viscous off-wall
+                ! spacing. This means the primal mesh has 1 MORE node/cell
+                ! in each direction.
+
+                ! Now setup the data for the ADT
+                nHexa = ie * je * ke
+                nADT = ib * jb * kb
+                oBlock%il = ie
+                oBlock%jl = je
+                oBlock%kl = ke
+
+                allocate (oBlock%xADT(3, nADT), oBlock%hexaConn(8, nHexa), &
+                          oBlock%qualDonor(1, nADT))
+
+                oBlock%qualDonor = zero
+                ! Fill up the xADT using the primal nodes
+                mm = 0
+                do k = 0, ke
+                    do j = 0, je
+                        do i = 0, ie
+                            mm = mm + 1
+                            oBlock%xADT(:, mm) = x(i, j, k, :)
+
+                            ! Since we don't have all 8 volumes surrounding the
+                            ! halo nodes, clip the volumes to be between 0 and ib etc.
+                            do iii = 0, 1
+                                do jjj = 0, 1
+                                    do kkk = 0, 1
+                                        ii = min(max(0, iii + i), ib)
+                                        jj = min(max(0, jjj + j), jb)
+                                        kk = min(max(0, kkk + k), kb)
+
+                                        oBlock%qualDonor(1, mm) = oBlock%qualDonor(1, mm) + &
+                                                                  vol(ii, jj, kk)
+                                    end do
+                                end do
+                            end do
+
+                            ! Dividing by 8 isn't strictly necessary but we'll
+                            ! do it anyway.
+                            oBlock%qualDonor(1, mm) = oBlock%qualDonor(1, mm) * eighth
+                        end do
+                    end do
                 end do
-             end do
-          end do
-
-          mm = 0
-          ! These are the 'elements' of the dual mesh.
-          planeOffset = ib * jb
-          do k=1, ke
-             do j=1, je
-                do i=1, ie
-                   mm = mm + 1
-                   oBlock%hexaConn(1, mm) = (k-1)*planeOffset + (j-1)*ib + (i-1) + 1
-                   oBlock%hexaConn(2, mm) = oBlock%hexaConn(1, mm) + 1
-                   oBlock%hexaConn(3, mm) = oBlock%hexaConn(2, mm) + ib
-                   oBlock%hexaConn(4, mm) = oBlock%hexaConn(3, mm) - 1
-
-                   oBlock%hexaConn(5, mm) = oBlock%hexaConn(1, mm) + planeOffset
-                   oBlock%hexaConn(6, mm) = oBlock%hexaConn(2, mm) + planeOffset
-                   oBlock%hexaConn(7, mm) = oBlock%hexaConn(3, mm) + planeOffset
-                   oBlock%hexaConn(8, mm) = oBlock%hexaConn(4, mm) + planeOffset
+
+                mm = 0
+                ! These are the 'elements' of the dual mesh.
+                planeOffset = ib * jb
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
+                            mm = mm + 1
+                            oBlock%hexaConn(1, mm) = (k - 1) * planeOffset + (j - 1) * ib + (i - 1) + 1
+                            oBlock%hexaConn(2, mm) = oBlock%hexaConn(1, mm) + 1
+                            oBlock%hexaConn(3, mm) = oBlock%hexaConn(2, mm) + ib
+                            oBlock%hexaConn(4, mm) = oBlock%hexaConn(3, mm) - 1
+
+                            oBlock%hexaConn(5, mm) = oBlock%hexaConn(1, mm) + planeOffset
+                            oBlock%hexaConn(6, mm) = oBlock%hexaConn(2, mm) + planeOffset
+                            oBlock%hexaConn(7, mm) = oBlock%hexaConn(3, mm) + planeOffset
+                            oBlock%hexaConn(8, mm) = oBlock%hexaConn(4, mm) + planeOffset
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end if primalOrDual
+            end if primalOrDual
+
+            ! Call the custom build routine -- Serial only, only Hexa volumes,
+            ! we supply our own ADT Type
+
+            call buildSerialHex(nHexa, nADT, oBlock%xADT, oBlock%hexaConn, oBlock%ADT)
+        end do domainLoop
+
+    end subroutine buildVolumeADTs
+
+    subroutine performInterpolation(pts, oBlocks, useDual, comm)
+
+        ! This routine performs the actual searches for the slices. It is
+        ! generic in the sense that it will search an arbtitrary set of
+        ! points on either the primal or dual meshes. The final required
+        ! communication data is then written into the supplied comm.
+
+        use constants
+        use block, only: interpPtType
+        use communication, only: adflow_comm_world, myid, nProc
+        use oversetData, only: oversetBlock
+        use blockPointers, only: nDom, x, ie, je, ke, il, jl, kl, x, iBlank, vol
+        use adtLocalSearch, only: mindistancetreesearchsinglepoint, &
+                                  containmenttreesearchsinglepoint
+        use adtUtils, only: stack
+        use adtData, only: adtBBoxTargetType
+        use utils, only: setPointers, mynorm2, EChk
+        use inputOverset, only: oversetProjTol
+        use oversetUtilities, only: fracToWeights2, getCumulativeForm
+
+        implicit none
+
+        ! Input parameters:
+        real(kind=realType), dimension(:, :), intent(in) :: pts
+        type(userIntSurf) :: surf
+        type(oversetBlock), dimension(:), target, intent(in) :: oBlocks
+        logical, intent(in) :: useDual
+        !type(oversetSurf), dimension(:), intent(in) :: oSurfs
+        type(userSurfCommType) :: comm
+
+        ! Working parameters
+        type(oversetBlock), pointer :: oBlock
+        type(interpPtType), dimension(:), allocatable :: surfFringes
+
+        integer(Kind=intType) :: i, j, k, ii, jj, kk, iii, jjj, kkk, nn, mm
+        integer(kind=intType) :: iSurf, ierr, nInterpol, iProc
+        integer(kind=intType) :: nHexa, nAdt, planeOffset, elemID, nPts
+        real(kind=realType) :: xc(4), weight(8)
+        integer mpiStatus(MPI_STATUS_SIZE)
+
+        real(kind=realType) :: uvw(5), uvw2(5), donorQual, xcheck(3)
+        integer(kind=intType) :: intInfo(3), intInfo2(3)
+        logical :: failed, invalid
+
+        integer(kind=intType), dimension(:, :), allocatable :: donorInfo, intSend
+        integer(kind=intType), dimension(:), allocatable :: procSizes
+        real(kind=realType), dimension(:, :), allocatable :: donorFrac, realSend
+
+        ! Variables we have to pass the ADT search routine
+        integer(kind=intType), dimension(:), pointer :: BB
+        type(adtBBoxTargetType), dimension(:), pointer :: BB2
+        integer(kind=intType), dimension(:), pointer :: frontLeaves
+        integer(kind=intType), dimension(:), pointer :: frontLeavesNew
+
+        ! Data for the search
+        allocate (BB(20), frontLeaves(25), frontLeavesNew(25), stack(100), BB2(20))
+
+        nPts = size(pts, 2)
+
+        ! Allocate donor information arrays
+        allocate (donorFrac(4, nPts), donorInfo(5, nPts))
+        donorInfo = -1
+        donorFrac(4, :) = large
+        nInterpol = 1
+        domainSearch: do nn = 1, nDom
+            oBlock => oBlocks(nn)
+            call setPointers(nn, 1, 1)
+
+            ! Search the supplied pts one at a time
+            elemLoop: do i = 1, nPts
+
+                xc(1:3) = pts(:, i)
+
+                ! Call the standard tree search
+                call containmentTreeSearchSinglePoint(oBlock%ADT, xc, intInfo, uvw, &
+                                                   oBlock%qualDonor, nInterpol, BB, frontLeaves, frontLeavesNew, failed)
+
+                ! Make sure this point is not garbage.
+                if (intInfo(1) >= 0) then
+                    call fracToWeights2(uvw(1:3), weight)
+                    xcheck = zero
+                    do j = 1, 8
+                        xcheck = xcheck + weight(j) * oBlock%xADT(:, oBlock%hexaConn(j, intInfo(3)))
+                    end do
+
+                    if (mynorm2(xcheck - xc(1:3)) > oversetProjTol) then
+                        failed = .True.
+                    end if
+                end if
 
-       ! Call the custom build routine -- Serial only, only Hexa volumes,
-       ! we supply our own ADT Type
+                if (intInfo(1) >= 0 .and. failed) then
+                    ! we "found" a point but it is garbage. Do the failsafe search
+                    xc(4) = large
+                    call minDistanceTreeSearchSinglePoint(oBlock%ADT, xc, intInfo, uvw, &
+                                                          oBlock%qualDonor, nInterpol, BB2, frontLeaves, frontLeavesNew)
+
+                    ! Check this one:
+                    call fracToWeights2(uvw(1:3), weight)
+                    xcheck = zero
+                    do j = 1, 8
+                        xcheck = xcheck + weight(j) * oBlock%xADT(:, oBlock%hexaConn(j, intInfo(3)))
+                    end do
+
+                    ! Since this is the last line of defence, relax the tolerance a bit
+                    if (mynorm2(xcheck - xc(1:3)) > 100 * oversetProjTol) then
+                        ! This fringe has not found a donor
+                        intInfo(1) = -1
+                    else
+                        ! This one has now passed.
+
+                        ! Important! uvw(4) is the distance squared for this search
+                        ! not interpolated value
+                        uvw(4) = uvw(5)
+                    end if
+                end if
 
-       call buildSerialHex(nHexa, nADT, oBlock%xADT, oBlock%hexaConn, oBlock%ADT)
-    end do domainLoop
+                elemFound: if (intInfo(1) >= 0) then
+
+                    ! Donor and block and index information for this donor.
+                    donorQual = uvw(4)
+                    elemID = intInfo(3) - 1 ! Make it zero based
+
+                    ! The dual mesh needs an offset of 1 becuase it only used
+                    ! 1:ie values. This is not necessary for the primal.
+                    if (useDual) then
+                        ii = mod(elemID, oBlock%il) + 1
+                        jj = mod(elemID / oBlock%il, oBlock%jl) + 1
+                        kk = elemID / (oBlock%il * oBlock%jl) + 1
+                    else
+                        ii = mod(elemID, oBlock%il)
+                        jj = mod(elemID / oBlock%il, oBlock%jl)
+                        kk = elemID / (oBlock%il * oBlock%jl)
+                    end if
+                    ! Rememebr donorFrac(4, i) is the current best quality
+                    if (donorQual < donorFrac(4, i)) then
+
+                        invalid = .False.
+
+                        ! For the dual mesh search, we have to make sure the
+                        ! potential donors are valid. Such a check is not
+                        ! necessary for the primal search since all nodes are
+                        ! considered valid.
+
+                        if (useDual) then
+                            ! Check if the point is invalid. We can do this
+                            ! with i-blank array. We can only accept compute
+                            ! cells (iblank=1) or interpolated
+                            ! cells(iblank=-1).
+                            do kkk = 0, 1
+                                do jjj = 0, 1
+                                    do iii = 0, 1
+                                        if (.not. (iblank(ii + iii, jj + jjj, kk + kkk) == 1 .or. &
+                                                   iblank(ii + iii, jj + jjj, kk + kkk) == -1)) then
+                                            invalid = .True.
+                                        end if
+                                    end do
+                                end do
+                            end do
+                        end if
+
+                        if (.not. invalid) then
+
+                            ! Set the quality of the donor to the one we
+                            ! just found. Save the rest of the necessary
+                            ! information.
+                            donorInfo(1, i) = myid
+                            donorInfo(2, i) = nn
+                            donorInfo(3, i) = ii
+                            donorInfo(4, i) = jj
+                            donorInfo(5, i) = kk
+                            donorFrac(1:3, i) = uvw(1:3)
+                            donorFrac(4, i) = donorQual
+                        end if
+                    end if
+                end if elemFound
+            end do elemLoop
+        end do domainSearch
+
+        ! Next count up the number of valid donors we've found and compact
+        ! the info back to that length.
+        if (myid /= 0) then
+            j = 0
+            do i = 1, nPts
+                if (donorInfo(1, i) /= -1) then
+                    j = j + 1
+                end if
+            end do
+            allocate (intSend(6, j), realSend(4, i))
+            if (j > 0) then
+                j = 0
+                do i = 1, nPts
+                    if (donorInfo(1, i) /= -1) then
+                        j = j + 1
+                        intSend(1:5, j) = donorInfo(:, i)
+                        intSend(6, j) = i
+                        realSend(:, j) = donorFrac(:, i)
+                    end if
+                end do
+            end if
+        else
+            ! On the root proc, use intSend and realSend as the receiver
+            ! buffer. These can be at most nPts sized.
+            allocate (intSend(6, nPts), realSend(4, nPts))
+        end if
+
+        ! Gather up the sizes (j) to the root processor so he know who to
+        ! expect data from.
+        allocate (procSizes(0:nProc - 1))
+
+        call mpi_gather(j, 1, adflow_integer, procSizes, 1, &
+                        adflow_integer, 0, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Next all the procs need to send all the information back to the
+        ! root processor where we will determine the proper donors for
+        ! each of the cells
+
+        ! All procs except root fire off their data.
+        if (myid >= 1) then
+            if (j > 0) then
+                call mpi_send(intSend, j * 6, adflow_integer, 0, myid, &
+                              adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call mpi_send(realSend, j * 4, adflow_real, 0, myid, &
+                              adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end if
+        end if
+
+        ! And the root processor recieves it...
+        if (myid == 0) then
+            do iProc = 1, nProc - 1
+                ! Determine if this proc has sent anything:
+                if (procSizes(iProc) /= 0) then
+
+                    call MPI_recv(intSend, 6 * nPts, adflow_integer, iProc, iProc, &
+                                  adflow_comm_world, mpiStatus, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+
+                    call MPI_recv(realSend, 4 * nPts, adflow_real, iProc, iProc, &
+                                  adflow_comm_world, mpiStatus, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+
+                    ! Now process the data (intSend and realSend) that we
+                    ! just received. We don't need to check the status for
+                    ! the sizes becuase we already know the sizes from the
+                    ! initial gather we did.
+
+                    do i = 1, procSizes(iProc)
+                        ii = intSend(6, i)
+
+                        if (realSend(4, i) < donorFrac(4, ii)) then
+                            ! The incoming quality is better. Accept it.
+                            donorInfo(1:5, ii) = intSend(1:5, i)
+                            donorFrac(:, ii) = realSend(:, i)
+                        end if
+                    end do
+                end if
+            end do
+
+            ! To make this easier, convert the information we have to a
+            ! 'fringeType' array so we can use the pre-existing sorting
+            ! routine.
+            allocate (surfFringes(nPts))
+            do i = 1, nPts
+                surfFringes(i)%donorProc = donorInfo(1, i)
+                surfFringes(i)%donorBlock = donorInfo(2, i)
+                surfFringes(i)%dI = donorInfo(3, i)
+                surfFringes(i)%dJ = donorInfo(4, i)
+                surfFringes(i)%dK = donorInfo(5, i)
+                surfFringes(i)%donorFrac = donorFrac(1:3, i)
+                ! Use the myBlock attribute to keep track of the original
+                ! index. When we sort the fringes, they will no longer be
+                ! in the same order
+                surfFringes(i)%myBlock = i
+            end do
+
+            ! Perform the actual sort.
+            call qsortInterpPtType(surfFringes, nPts)
+
+            ! We will reuse-proc sizes to now mean the number of elements
+            ! that the processor *actually* has to send. We will include
+            ! the root proc itself in the calc becuase that will tell us
+            ! the size of the internal comm structure.
+
+            procSizes = 0
+            allocate (comm%valid(nPts))
+            comm%valid = .True.
+            do i = 1, nPts
+                if (surfFringes(i)%donorProc < 0) then
+                    ! We dont have a donor. Flag this point as invalid
+                    comm%valid(i) = .False.
+                end if
+
+                ! Dump the points without donors on the root proc by making
+                ! sure j is at least 0 for the root proc. These will just
+                ! simply be ignored during the comm.
+                j = max(surfFringes(i)%donorProc, 0)
+                procSizes(j) = procSizes(j) + 1
+            end do
+        end if
+
+        ! Simply broadcast out the the proc sizes back to everyone so all
+        ! processors know if they are to receive anything back.
+        call mpi_bcast(procSizes, nProc, adflow_Integer, 0, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! We can now save some of the final comm data required on the
+        ! root proc for this surf.
+        allocate (comm%procSizes(0:nProc - 1), comm%procDisps(0:nProc))
+
+        ! Copy over procSizes and generate the cumulative form of
+        ! the size array, procDisps
+        comm%procSizes = procSizes
+        call getCumulativeForm(comm%procSizes, nProc, comm%procDisps)
+
+        ! Record the elemInverse which is necessary to index into
+        ! the original conn array.
+        if (myid == 0) then
+            allocate (comm%Inv(nPts))
+            do i = 1, nPts
+                comm%Inv(i) = surfFringes(i)%myBlock
+            end do
+        end if
+
+        ! Now we can send out the final donor information to the
+        ! processors that must supply it.
+        comm%nDonor = procSizes(myID)
+        allocate (comm%frac(3, comm%nDonor), comm%donorInfo(4, comm%nDonor))
+
+        if (myid >= 1) then
+            if (comm%nDonor > 0) then
+                ! We are responible for at least 1 donor. We have to make
+                ! use of the intSend and realSend buffers again (which
+                ! are guaranteed to be big enough). The reason we can't
+                ! dump the data in directlyis that intSend and realSend
+                ! have a different leading index than we need on the
+                ! final data structure.
+
+                call MPI_recv(intSend, 6 * comm%nDonor, adflow_integer, 0, myid, &
+                              adflow_comm_world, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call MPI_recv(realSend, 4 * comm%nDonor, adflow_real, 0, myID, &
+                              adflow_comm_world, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Copy into final structure
+                do i = 1, comm%nDonor
+                    comm%donorInfo(:, i) = intSend(1:4, i)
+                    comm%frac(:, i) = realSend(1:3, i)
+                end do
+            end if
+        else
+            ! We are the root processor.
+            if (comm%nDonor > 0) then
+                ! We need to copy out our donor info on the root proc if we have any
+                do i = comm%procDisps(myID) + 1, comm%procDisps(myID + 1)
+                    comm%donorInfo(1, i) = surfFringes(i)%donorBlock
+                    comm%donorInfo(2, i) = surfFringes(i)%dI
+                    comm%donorInfo(3, i) = surfFringes(i)%dJ
+                    comm%donorInfo(4, i) = surfFringes(i)%dK
+                    comm%frac(1:3, i) = surfFringes(i)%donorFrac
+                end do
+            end if
+
+            ! Now loop over the rest of the procs and send out the info we
+            ! need. We have to temporarily copy the data back out of
+            ! fringes to the intSend and realSend arrays
+            do iProc = 1, nProc - 1
+
+                if (comm%procSizes(iProc) > 0) then
+                    ! Have something to send here:
+                    j = 0
+                    do i = comm%procDisps(iProc) + 1, comm%procDisps(iProc + 1)
+                        j = j + 1
+
+                        intSend(1, j) = surfFringes(i)%donorBlock
+                        intSend(2, j) = surfFringes(i)%dI
+                        intSend(3, j) = surfFringes(i)%dJ
+                        intSend(4, j) = surfFringes(i)%dK
+                        realSend(1:3, j) = surfFringes(i)%donorFrac
+                    end do
+
+                    call mpi_send(intSend, j * 6, adflow_integer, iProc, iProc, &
+                                  adflow_comm_world, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+
+                    call mpi_send(realSend, j * 4, adflow_real, iProc, iProc, &
+                                  adflow_comm_world, ierr)
+                    call EChk(ierr, __FILE__, __LINE__)
+                end if
+            end do
+
+            ! Deallocate data allocatd only on root proc
+            deallocate (surfFringes)
+        end if
+
+        ! Nuke rest of allocated on all procs
+        deallocate (intSend, realSend, procSizes, donorInfo, donorFrac)
+        deallocate (BB, frontLeaves, frontLeavesNew, stack, BB2)
+    end subroutine performInterpolation
+
+    subroutine interpolateIntegrationSurfaces
+
+        ! This routine performs the actual searches for the slices. We
+        ! reuse much of the same machinery as is used in the overset code.
+
+        use constants
+        use communication, only: adflow_comm_world, myid
+        use oversetData, only: oversetBlock
+        use blockPointers, only: nDom, ie, je, ke, il, jl, kl
+        use adtBuild, only: buildSerialHex, destroySerialHex
+        use utils, only: setPointers, EChk
+
+        implicit none
+
+        ! Working parameters
+        type(oversetBlock), dimension(nDom), target :: oBlocks
+        type(userIntSurf), pointer :: surf
+
+        integer(Kind=intType) :: iSurf, ii, i, nn, nPts, ierr
+        real(kind=realType), dimension(:, :), allocatable :: pts
+        logical :: useDual
+        if (nUserIntSurfs == 0) then
+            return
+        end if
+        primalDualLoop: do ii = 1, 2
+            if (ii == 1) then
+                useDual = .True.
+            else
+                useDual = .False.
+            end if
+
+            call buildVolumeADTs(oBlocks, useDual)
+
+            masterLoop: do iSurf = 1, nUserIntSurfs
+
+                surf => userIntSurfs(iSurf)
+
+                if (myid == 0) then
+
+                    ! We are interpolating the nodal values for both the
+                    ! nodes and the solution variables.
+                    nPts = size(surf%pts, 2)
+                    allocate (pts(3, nPts))
+                    nodeLoop: do i = 1, nPts
+                        pts(:, i) = surf%pts(:, i)
+                    end do nodeLoop
+                end if
 
-  end subroutine buildVolumeADTs
+                ! Send the number of points back to all procs:
+                call mpi_bcast(nPts, 1, adflow_integer, 0, adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-  subroutine performInterpolation(pts, oBlocks, useDual, comm)
+                ! All other procs except the root allocate space and receive
+                ! the pt array.
+                if (myid /= 0) then
+                    allocate (pts(3, nPts))
+                end if
 
-    ! This routine performs the actual searches for the slices. It is
-    ! generic in the sense that it will search an arbtitrary set of
-    ! points on either the primal or dual meshes. The final required
-    ! communication data is then written into the supplied comm.
+                call mpi_bcast(pts, 3 * nPts, adflow_real, 0, adflow_comm_world, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use block, only : interpPtType
-    use communication, only : adflow_comm_world, myid, nProc
-    use oversetData, only : oversetBlock
-    use blockPointers, only : nDom, x, ie, je, ke, il, jl, kl, x, iBlank, vol
-    use adtLocalSearch, only :  mindistancetreesearchsinglepoint, &
-         containmenttreesearchsinglepoint
-    use adtUtils, only : stack
-    use adtData, only : adtBBoxTargetType
-    use utils, only : setPointers, mynorm2, EChk
-    use inputOverset, only : oversetProjTol
-    use oversetUtilities, only : fracToWeights2, getCumulativeForm
-
-    implicit none
-
-    ! Input parameters:
-    real(kind=realType), dimension(:, :), intent(in) :: pts
-    type(userIntSurf) :: surf
-    type(oversetBlock), dimension(:), target, intent(in) :: oBlocks
-    logical, intent(in) :: useDual
-    !type(oversetSurf), dimension(:), intent(in) :: oSurfs
-    type(userSurfCommType) :: comm
-
-    ! Working parameters
-    type(oversetBlock), pointer :: oBlock
-    type(interpPtType), dimension(:), allocatable :: surfFringes
-
-    integer(Kind=intType) :: i, j, k, ii, jj, kk, iii, jjj, kkk, nn, mm
-    integer(kind=intType) :: iSurf, ierr, nInterpol, iProc
-    integer(kind=intType) :: nHexa, nAdt, planeOffset, elemID, nPts
-    real(kind=realType) :: xc(4), weight(8)
-    integer mpiStatus(MPI_STATUS_SIZE)
-
-    real(kind=realType) :: uvw(5), uvw2(5), donorQual, xcheck(3)
-    integer(kind=intType) :: intInfo(3), intInfo2(3)
-    logical :: failed, invalid
-
-    integer(kind=intType), dimension(:, :), allocatable :: donorInfo, intSend
-    integer(kind=intType), dimension(:), allocatable :: procSizes
-    real(kind=realType), dimension(:, :), allocatable :: donorFrac, realSend
-
-    ! Variables we have to pass the ADT search routine
-    integer(kind=intType), dimension(:), pointer :: BB
-    type(adtBBoxTargetType), dimension(:), pointer :: BB2
-    integer(kind=intType), dimension(:), pointer :: frontLeaves
-    integer(kind=intType), dimension(:), pointer :: frontLeavesNew
-
-    ! Data for the search
-    allocate(BB(20), frontLeaves(25), frontLeavesNew(25), stack(100), BB2(20))
-
-    nPts = size(pts, 2)
-
-    ! Allocate donor information arrays
-    allocate(donorFrac(4, nPts), donorInfo(5, nPts))
-    donorInfo = -1
-    donorFrac(4, :) = large
-    nInterpol = 1
-    domainSearch: do nn=1, nDom
-       oBlock => oBlocks(nn)
-       call setPointers(nn, 1, 1)
-
-       ! Search the supplied pts one at a time
-       elemLoop: do i=1, nPts
-
-          xc(1:3) = pts(:, i)
-
-          ! Call the standard tree search
-          call containmentTreeSearchSinglePoint(oBlock%ADT, xc, intInfo, uvw, &
-               oBlock%qualDonor, nInterpol, BB, frontLeaves, frontLeavesNew, failed)
-
-          ! Make sure this point is not garbage.
-          if (intInfo(1) >= 0) then
-             call fracToWeights2(uvw(1:3), weight)
-             xcheck = zero
-             do j=1,8
-                xcheck = xcheck + weight(j)*oBlock%xADT(:, oBlock%hexaConn(j, intInfo(3)))
-             end do
-
-             if (mynorm2(xcheck - xc(1:3)) > oversetProjTol) then
-                failed = .True.
-             end if
-          end if
-
-          if (intInfo(1) >= 0 .and. failed) then
-             ! we "found" a point but it is garbage. Do the failsafe search
-             xc(4) = large
-             call minDistanceTreeSearchSinglePoint(oBlock%ADT, xc, intInfo, uvw, &
-                  oBlock%qualDonor, nInterpol, BB2, frontLeaves, frontLeavesNew)
-
-             ! Check this one:
-             call fracToWeights2(uvw(1:3), weight)
-             xcheck = zero
-             do j=1,8
-                xcheck = xcheck + weight(j)*oBlock%xADT(:, oBlock%hexaConn(j, intInfo(3)))
-             end do
-
-             ! Since this is the last line of defence, relax the tolerance a bit
-             if (mynorm2(xcheck - xc(1:3)) > 100*oversetProjTol) then
-                ! This fringe has not found a donor
-                intInfo(1) = -1
-             else
-                ! This one has now passed.
-
-                ! Important! uvw(4) is the distance squared for this search
-                ! not interpolated value
-                uvw(4) = uvw(5)
-             end if
-          end if
-
-          elemFound: if (intInfo(1) >= 0) then
-
-             ! Donor and block and index information for this donor.
-             donorQual = uvw(4)
-             elemID = intInfo(3) - 1 ! Make it zero based
-
-             ! The dual mesh needs an offset of 1 becuase it only used
-             ! 1:ie values. This is not necessary for the primal.
-             if (useDual) then
-                ii = mod(elemID, oBlock%il) + 1
-                jj = mod(elemID/oBlock%il, oBlock%jl) + 1
-                kk = elemID/(oBlock%il*oBlock%jl) + 1
-             else
-                ii = mod(elemID, oBlock%il)
-                jj = mod(elemID/oBlock%il, oBlock%jl)
-                kk = elemID/(oBlock%il*oBlock%jl)
-             end if
-             ! Rememebr donorFrac(4, i) is the current best quality
-             if ( donorQual < donorFrac(4, i)) then
-
-                invalid = .False.
-
-                ! For the dual mesh search, we have to make sure the
-                ! potential donors are valid. Such a check is not
-                ! necessary for the primal search since all nodes are
-                ! considered valid.
-
-                if (useDual) then
-                   ! Check if the point is invalid. We can do this
-                   ! with i-blank array. We can only accept compute
-                   ! cells (iblank=1) or interpolated
-                   ! cells(iblank=-1).
-                   do kkk=0,1
-                      do jjj=0,1
-                         do iii=0,1
-                            if (.not. (iblank(ii+iii, jj+jjj, kk+kkk) == 1 .or. &
-                                 iblank(ii+iii, jj+jjj, kk+kkk) == -1)) then
-                               invalid = .True.
-                            end if
-                         end do
-                      end do
-                   end do
+                ! Call the actual interpolation routine
+                if (ii == 1) then
+                    call performInterpolation(pts, oBlocks, .True., surf%flowComm)
+                else
+                    call performInterpolation(pts, oBlocks, .False., surf%nodeComm)
                 end if
 
-                if (.not. invalid) then
-
-                   ! Set the quality of the donor to the one we
-                   ! just found. Save the rest of the necessary
-                   ! information.
-                   donorInfo(1, i) = myid
-                   donorInfo(2, i) = nn
-                   donorInfo(3, i) = ii
-                   donorInfo(4, i) = jj
-                   donorInfo(5, i) = kk
-                   donorFrac(1:3, i) = uvw(1:3)
-                   donorFrac(4, i) = donorQual
+                deallocate (pts)
+            end do masterLoop
+
+            ! Destroy the ADT Data and allocated values
+            do nn = 1, nDom
+                call destroySerialHex(oBlocks(nn)%ADT)
+                deallocate (oBlocks(nn)%xADT, oBlocks(nn)%hexaConn, oBlocks(nn)%qualDonor)
+            end do
+        end do primalDualLoop
+    end subroutine interpolateIntegrationSurfaces
+
+    subroutine commUserIntegrationSurfaceVars(recvBuffer, varStart, varEnd, comm)
+
+        use constants
+        use block, onlY: flowDoms, nDom
+        use communication, only: myid, adflow_comm_world
+        use utils, only: EChk
+        use oversetUtilities, only: fracToWeights
+
+        implicit none
+
+        ! Input/Output
+        real(kind=realType), dimension(:) :: recvBuffer
+        integer(kind=intType), intent(in) :: varStart, varEnd
+        type(userSurfCommType) :: comm
+
+        ! Working
+        real(kind=realType), dimension(:), allocatable :: sendBuffer
+        integer(Kind=intType) :: d1, i1, j1, k1, jj, k, nvar, i, ierr
+        real(kind=realType), dimension(8) :: weight
+
+        ! The number of variables we are transferring:
+        nVar = varEnd - varStart + 1
+
+        ! We assume that the pointers to the realCommVars have already been set.
+
+        allocate (sendBuffer(nVar * comm%nDonor))
+        ! initialize the sendBuffer to an arbitrary value. If the
+        ! integration tries to use this value, you know something is
+        ! wrong.
+        sendBuffer = -99999
+        ! First generate the interpolated data necessary
+        jj = 0
+        donorLoop: do i = 1, comm%nDonor
+            ! Convert the frac to weights
+            call fracToWeights(comm%frac(:, i), weight)
+
+            ! Block and indices for easier reading. The +1 is due to the
+            ! pointer offset on realCommVars.
+
+            d1 = comm%donorInfo(1, i) ! Block Index
+            i1 = comm%donorInfo(2, i) + 1 ! donor I index
+            j1 = comm%donorInfo(3, i) + 1 ! donor J index
+            k1 = comm%donorInfo(4, i) + 1 ! donor K index
+
+            ! We are interpolating nVar variables
+            do k = varStart, varEnd
+                jj = jj + 1
+                if (d1 > 0) then ! Is this pt valid?
+                    sendBuffer(jj) = &
+                        weight(1) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1, j1, k1) + &
+                        weight(2) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1 + 1, j1, k1) + &
+                        weight(3) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1, j1 + 1, k1) + &
+                        weight(4) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1 + 1, j1 + 1, k1) + &
+                        weight(5) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1, j1, k1 + 1) + &
+                        weight(6) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1 + 1, j1, k1 + 1) + &
+                        weight(7) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1, j1 + 1, k1 + 1) + &
+                        weight(8) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1 + 1, j1 + 1, k1 + 1)
                 end if
-             end if
-          end if elemFound
-       end do elemLoop
-    end do domainSearch
-
-    ! Next count up the number of valid donors we've found and compact
-    ! the info back to that length.
-    if (myid /=0) then
-       j = 0
-       do i=1, nPts
-          if (donorInfo(1, i) /= -1) then
-             j = j + 1
-          end if
-       end do
-       allocate(intSend(6, j), realSend(4, i))
-       if (j > 0) then
-          j = 0
-          do i=1, nPts
-             if (donorInfo(1, i) /= -1) then
-                j = j + 1
-                intSend(1:5, j) = donorInfo(:, i)
-                intSend(6, j) = i
-                realSend(:, j) = donorFrac(:, i)
-             end if
-          end do
-       end if
-    else
-       ! On the root proc, use intSend and realSend as the receiver
-       ! buffer. These can be at most nPts sized.
-       allocate(intSend(6, nPts), realSend(4, nPts))
-    end if
-
-    ! Gather up the sizes (j) to the root processor so he know who to
-    ! expect data from.
-    allocate(procSizes(0:nProc-1))
-
-    call mpi_gather(j, 1, adflow_integer, procSizes, 1, &
-         adflow_integer, 0, adflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! Next all the procs need to send all the information back to the
-    ! root processor where we will determine the proper donors for
-    ! each of the cells
-
-    ! All procs except root fire off their data.
-    if (myid >= 1) then
-       if (j > 0) then
-          call mpi_send(intSend, j*6, adflow_integer, 0, myid, &
-               adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          call mpi_send(realSend, j*4, adflow_real, 0, myid, &
-               adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-       end if
-    end if
-
-    ! And the root processor recieves it...
-    if (myid == 0) then
-       do iProc=1, nProc-1
-          ! Determine if this proc has sent anything:
-          if (procSizes(iProc) /= 0) then
-
-             call MPI_recv(intSend, 6*nPts, adflow_integer, iProc, iProc,&
-                  adflow_comm_world, mpiStatus, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-
-             call MPI_recv(realSend, 4*nPts, adflow_real, iProc, iProc,&
-                  adflow_comm_world, mpiStatus, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-
-             ! Now process the data (intSend and realSend) that we
-             ! just received. We don't need to check the status for
-             ! the sizes becuase we already know the sizes from the
-             ! initial gather we did.
-
-             do i=1, procSizes(iProc)
-                ii = intSend(6, i)
-
-                if (realSend(4, i) < donorFrac(4, ii)) then
-                   ! The incoming quality is better. Accept it.
-                   donorInfo(1:5, ii) = intSend(1:5, i)
-                   donorFrac(:, ii) = realSend(:, i)
+            end do
+        end do donorLoop
+
+        ! Now we can do an mpi_gatherv to the root proc:
+        call mpi_gatherv(sendBuffer, nVar * comm%nDonor, adflow_real, recvBuffer, &
+                         nVar * comm%procSizes, nVar * comm%procDisps, adflow_real, 0, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        deallocate (sendBuffer)
+
+    end subroutine commUserIntegrationSurfaceVars
+
+    subroutine commUserIntegrationSurfaceVars_d(recvBuffer, recvBufferd, varStart, varEnd, comm)
+
+        use constants
+        use block, onlY: flowDoms, nDom
+        use communication, only: myid, adflow_comm_world
+        use utils, only: EChk
+        use oversetUtilities, only: fracToWeights
+
+        implicit none
+
+        ! Input/Output
+        real(kind=realType), dimension(:) :: recvBuffer, recvBufferd
+        integer(kind=intType), intent(in) :: varStart, varEnd
+        type(userSurfCommType) :: comm
+
+        ! Working
+        real(kind=realType), dimension(:), allocatable :: sendBuffer, sendBufferd
+        integer(Kind=intType) :: d1, i1, j1, k1, jj, k, nvar, i, ierr
+        real(kind=realType), dimension(8) :: weight
+
+        ! The number of variables we are transferring:
+        nVar = varEnd - varStart + 1
+
+        ! We assume that the pointers to the realCommVars have already been set.
+
+        allocate (sendBuffer(nVar * comm%nDonor), &
+                  sendBufferd(nVar * comm%nDonor))
+
+        ! First generate the interpolated data necessary
+        jj = 0
+        donorLoop: do i = 1, comm%nDonor
+            ! Convert the frac to weights
+            call fracToWeights(comm%frac(:, i), weight)
+
+            ! Block and indices for easier reading. The +1 is due to the
+            ! pointer offset on realCommVars.
+
+            d1 = comm%donorInfo(1, i) ! Block Index
+            i1 = comm%donorInfo(2, i) + 1 ! donor I index
+            j1 = comm%donorInfo(3, i) + 1 ! donor J index
+            k1 = comm%donorInfo(4, i) + 1 ! donor K index
+
+            ! We are interpolating nVar variables
+            do k = varStart, varEnd
+                jj = jj + 1
+                if (d1 > 0) then ! Is this pt valid?
+                    sendBuffer(jj) = &
+                        weight(1) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1, j1, k1) + &
+                        weight(2) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1 + 1, j1, k1) + &
+                        weight(3) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1, j1 + 1, k1) + &
+                        weight(4) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1 + 1, j1 + 1, k1) + &
+                        weight(5) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1, j1, k1 + 1) + &
+                        weight(6) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1 + 1, j1, k1 + 1) + &
+                        weight(7) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1, j1 + 1, k1 + 1) + &
+                        weight(8) * flowDoms(d1, 1, 1)%realCommVars(k)%var(i1 + 1, j1 + 1, k1 + 1)
+                    sendBufferd(jj) = &
+                        weight(1) * flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1, k1) + &
+                        weight(2) * flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1, k1) + &
+                        weight(3) * flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1 + 1, k1) + &
+                        weight(4) * flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1 + 1, k1) + &
+                        weight(5) * flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1, k1 + 1) + &
+                        weight(6) * flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1, k1 + 1) + &
+                        weight(7) * flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1 + 1, k1 + 1) + &
+                        weight(8) * flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1 + 1, k1 + 1)
                 end if
-             end do
-          end if
-       end do
-
-       ! To make this easier, convert the information we have to a
-       ! 'fringeType' array so we can use the pre-existing sorting
-       ! routine.
-       allocate(surfFringes(nPts))
-       do i=1, nPts
-          surfFringes(i)%donorProc = donorInfo(1, i)
-          surfFringes(i)%donorBlock = donorInfo(2, i)
-          surfFringes(i)%dI = donorInfo(3, i)
-          surfFringes(i)%dJ = donorInfo(4, i)
-          surfFringes(i)%dK = donorInfo(5, i)
-          surfFringes(i)%donorFrac = donorFrac(1:3, i)
-          ! Use the myBlock attribute to keep track of the original
-          ! index. When we sort the fringes, they will no longer be
-          ! in the same order
-          surfFringes(i)%myBlock = i
-       end do
-
-       ! Perform the actual sort.
-       call qsortInterpPtType(surfFringes, nPts)
-
-       ! We will reuse-proc sizes to now mean the number of elements
-       ! that the processor *actually* has to send. We will include
-       ! the root proc itself in the calc becuase that will tell us
-       ! the size of the internal comm structure.
-
-       procSizes = 0
-       allocate(comm%valid(nPts))
-       comm%valid = .True.
-       do i=1, nPts
-          if (surfFringes(i)%donorProc < 0) then
-             ! We dont have a donor. Flag this point as invalid
-             comm%valid(i) = .False.
-          end if
-
-          ! Dump the points without donors on the root proc by making
-          ! sure j is at least 0 for the root proc. These will just
-          ! simply be ignored during the comm.
-          j = max(surfFringes(i)%donorProc, 0)
-          procSizes(j) = procSizes(j) + 1
-       end do
-    end if
-
-    ! Simply broadcast out the the proc sizes back to everyone so all
-    ! processors know if they are to receive anything back.
-    call mpi_bcast(procSizes, nProc, adflow_Integer, 0, adflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    ! We can now save some of the final comm data required on the
-    ! root proc for this surf.
-    allocate(comm%procSizes(0:nProc-1), comm%procDisps(0:nProc))
-
-    ! Copy over procSizes and generate the cumulative form of
-    ! the size array, procDisps
-    comm%procSizes = procSizes
-    call getCumulativeForm(comm%procSizes, nProc, comm%procDisps)
-
-    ! Record the elemInverse which is necessary to index into
-    ! the original conn array.
-    if (myid == 0) then
-       allocate(comm%Inv(nPts))
-       do i=1, nPts
-          comm%Inv(i) = surfFringes(i)%myBlock
-       end do
-    end if
-
-    ! Now we can send out the final donor information to the
-    ! processors that must supply it.
-    comm%nDonor = procSizes(myID)
-    allocate(comm%frac(3, comm%nDonor), comm%donorInfo(4, comm%nDonor))
-
-    if (myid >= 1) then
-       if (comm%nDonor > 0) then
-          ! We are responible for at least 1 donor. We have to make
-          ! use of the intSend and realSend buffers again (which
-          ! are guaranteed to be big enough). The reason we can't
-          ! dump the data in directlyis that intSend and realSend
-          ! have a different leading index than we need on the
-          ! final data structure.
-
-          call MPI_recv(intSend, 6*comm%nDonor, adflow_integer, 0, myid, &
-               adflow_comm_world, mpiStatus, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          call MPI_recv(realSend, 4*comm%nDonor, adflow_real, 0, myID, &
-               adflow_comm_world, mpiStatus, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Copy into final structure
-          do i=1, comm%nDonor
-             comm%donorInfo(:, i) = intSend(1:4, i)
-             comm%frac(:, i) = realSend(1:3, i)
-          end do
-       end if
-    else
-       ! We are the root processor.
-       if (comm%nDonor > 0) then
-          ! We need to copy out our donor info on the root proc if we have any
-          do i= comm%procDisps(myID)+1, comm%procDisps(myID+1)
-             comm%donorInfo(1, i) = surfFringes(i)%donorBlock
-             comm%donorInfo(2, i) = surfFringes(i)%dI
-             comm%donorInfo(3, i) = surfFringes(i)%dJ
-             comm%donorInfo(4, i) = surfFringes(i)%dK
-             comm%frac(1:3, i)    = surfFringes(i)%donorFrac
-          end do
-       end if
-
-       ! Now loop over the rest of the procs and send out the info we
-       ! need. We have to temporarily copy the data back out of
-       ! fringes to the intSend and realSend arrays
-       do iProc=1, nProc-1
-
-          if (comm%procSizes(iProc) > 0) then
-             ! Have something to send here:
-             j = 0
-             do i=comm%procDisps(iProc)+1, comm%procDisps(iProc+1)
-                j = j + 1
-
-                intSend(1, j) = surfFringes(i)%donorBlock
-                intSend(2, j) = surfFringes(i)%dI
-                intSend(3, j) = surfFringes(i)%dJ
-                intSend(4, j) = surfFringes(i)%dK
-                realSend(1:3, j) = surfFringes(i)%donorFrac
-             end do
-
-             call mpi_send(intSend, j*6, adflow_integer, iProc, iProc, &
-                  adflow_comm_world, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-
-             call mpi_send(realSend, j*4, adflow_real, iProc, iProc, &
-                  adflow_comm_world, ierr)
-             call EChk(ierr,__FILE__,__LINE__)
-          end if
-       end do
-
-       ! Deallocate data allocatd only on root proc
-       deallocate(surfFringes)
-    end if
-
-    ! Nuke rest of allocated on all procs
-    deallocate(intSend, realSend, procSizes, donorInfo, donorFrac)
-    deallocate(BB, frontLeaves, frontLeavesNew, stack, BB2)
-  end subroutine performInterpolation
-
-  subroutine interpolateIntegrationSurfaces
-
-    ! This routine performs the actual searches for the slices. We
-    ! reuse much of the same machinery as is used in the overset code.
+            end do
+        end do donorLoop
 
-    use constants
-    use communication, only : adflow_comm_world, myid
-    use oversetData, only : oversetBlock
-    use blockPointers, only : nDom, ie, je, ke, il, jl, kl
-    use adtBuild, only : buildSerialHex, destroySerialHex
-    use utils, only : setPointers, EChk
-
-    implicit none
-
-    ! Working parameters
-    type(oversetBlock), dimension(nDom), target :: oBlocks
-    type(userIntSurf), pointer :: surf
-
-    integer(Kind=intType) :: iSurf, ii, i, nn, nPts, ierr
-    real(kind=realType), dimension(:, :), allocatable :: pts
-    logical :: useDual
-    if (nUserIntSurfs == 0) then
-       return
-    end if
-    primalDualLoop: do ii=1, 2
-       if (ii==1) then
-          useDual = .True.
-       else
-          useDual = .False.
-       end if
-
-       call buildVolumeADTs(oBlocks, useDual)
-
-       masterLoop: do iSurf=1, nUserIntSurfs
-
-          surf => userIntSurfs(iSurf)
-
-          if (myid == 0) then
-
-             ! We are interpolating the nodal values for both the
-             ! nodes and the solution variables.
-             nPts = size(surf%pts, 2)
-             allocate(pts(3, nPts))
-             nodeLoop: do i=1, nPts
-                pts(:, i) = surf%pts(:, i)
-             end do nodeLoop
-          end if
-
-          ! Send the number of points back to all procs:
-          call mpi_bcast(nPts, 1, adflow_integer, 0, adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! All other procs except the root allocate space and receive
-          ! the pt array.
-          if (myid /= 0) then
-             allocate(pts(3, nPts))
-          end if
-
-          call mpi_bcast(pts, 3*nPts, adflow_real, 0, adflow_comm_world, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Call the actual interpolation routine
-          if (ii==1) then
-             call performInterpolation(pts, oBlocks, .True., surf%flowComm)
-          else
-             call performInterpolation(pts, oBlocks, .False., surf%nodeComm)
-          end if
-
-          deallocate(pts)
-       end do masterLoop
-
-       ! Destroy the ADT Data and allocated values
-       do nn=1, nDom
-          call destroySerialHex(oBlocks(nn)%ADT)
-          deallocate(oBlocks(nn)%xADT, oBlocks(nn)%hexaConn, oBlocks(nn)%qualDonor)
-       end do
-    end do primalDualLoop
-  end subroutine interpolateIntegrationSurfaces
-
-  subroutine commUserIntegrationSurfaceVars(recvBuffer, varStart, varEnd, comm)
+        ! Now we can do an mpi_gatherv to the root proc:
+        call mpi_gatherv(sendBuffer, nVar * comm%nDonor, adflow_real, recvBuffer, &
+                         nVar * comm%procSizes, nVar * comm%procDisps, adflow_real, 0, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use block, onlY : flowDoms, nDom
-    use communication, only : myid, adflow_comm_world
-    use utils, only : EChk
-    use oversetUtilities, only :fracToWeights
-
-    implicit none
-
-    ! Input/Output
-    real(kind=realType), dimension(:) :: recvBuffer
-    integer(kind=intType), intent(in) :: varStart, varEnd
-    type(userSurfCommType) :: comm
-
-    ! Working
-    real(kind=realType), dimension(:), allocatable :: sendBuffer
-    integer(Kind=intType) :: d1, i1, j1, k1, jj, k, nvar, i, ierr
-    real(kind=realType), dimension(8) :: weight
-
-    ! The number of variables we are transferring:
-    nVar = varEnd - varStart + 1
-
-    ! We assume that the pointers to the realCommVars have already been set.
-
-    allocate(sendBuffer(nVar*comm%nDonor))
-    ! initialize the sendBuffer to an arbitrary value. If the
-    ! integration tries to use this value, you know something is
-    ! wrong.
-    sendBuffer = -99999
-    ! First generate the interpolated data necessary
-    jj = 0
-    donorLoop: do i=1, comm%nDonor
-       ! Convert the frac to weights
-       call fracToWeights(comm%frac(:, i), weight)
-
-       ! Block and indices for easier reading. The +1 is due to the
-       ! pointer offset on realCommVars.
-
-       d1 = comm%donorInfo(1, i) ! Block Index
-       i1 = comm%donorInfo(2, i)+1 ! donor I index
-       j1 = comm%donorInfo(3, i)+1 ! donor J index
-       k1 = comm%donorInfo(4, i)+1 ! donor K index
-
-       ! We are interpolating nVar variables
-       do k=varStart, varEnd
-          jj = jj + 1
-          if (d1 > 0) then ! Is this pt valid?
-             sendBuffer(jj) = &
-                  weight(1)*flowDoms(d1,1,1)%realCommVars(k)%var(i1  ,j1  ,k1  ) + &
-                  weight(2)*flowDoms(d1,1,1)%realCommVars(k)%var(i1+1,j1  ,k1  ) + &
-                  weight(3)*flowDoms(d1,1,1)%realCommVars(k)%var(i1  ,j1+1,k1  ) + &
-                  weight(4)*flowDoms(d1,1,1)%realCommVars(k)%var(i1+1,j1+1,k1  ) + &
-                  weight(5)*flowDoms(d1,1,1)%realCommVars(k)%var(i1  ,j1  ,k1+1) + &
-                  weight(6)*flowDoms(d1,1,1)%realCommVars(k)%var(i1+1,j1  ,k1+1) + &
-                  weight(7)*flowDoms(d1,1,1)%realCommVars(k)%var(i1  ,j1+1,k1+1) + &
-                  weight(8)*flowDoms(d1,1,1)%realCommVars(k)%var(i1+1,j1+1,k1+1)
-          end if
-       end do
-    end do donorLoop
-
-    ! Now we can do an mpi_gatherv to the root proc:
-    call mpi_gatherv(sendBuffer, nVar*comm%nDonor, adflow_real, recvBuffer, &
-         nVar*comm%procSizes, nVar*comm%procDisps, adflow_real, 0, adflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-    deallocate(sendBuffer)
-
-  end subroutine commUserIntegrationSurfaceVars
-
- subroutine commUserIntegrationSurfaceVars_d(recvBuffer, recvBufferd, varStart, varEnd, comm)
+        call mpi_gatherv(sendBufferd, nVar * comm%nDonor, adflow_real, recvBufferd, &
+                         nVar * comm%procSizes, nVar * comm%procDisps, adflow_real, 0, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-    use constants
-    use block, onlY : flowDoms, nDom
-    use communication, only : myid, adflow_comm_world
-    use utils, only : EChk
-    use oversetUtilities, only :fracToWeights
-
-    implicit none
-
-    ! Input/Output
-    real(kind=realType), dimension(:) :: recvBuffer, recvBufferd
-    integer(kind=intType), intent(in) :: varStart, varEnd
-    type(userSurfCommType) :: comm
-
-    ! Working
-    real(kind=realType), dimension(:), allocatable :: sendBuffer, sendBufferd
-    integer(Kind=intType) :: d1, i1, j1, k1, jj, k, nvar, i, ierr
-    real(kind=realType), dimension(8) :: weight
-
-    ! The number of variables we are transferring:
-    nVar = varEnd - varStart + 1
-
-    ! We assume that the pointers to the realCommVars have already been set.
-
-    allocate(sendBuffer(nVar*comm%nDonor), &
-         sendBufferd(nVar*comm%nDonor))
-
-    ! First generate the interpolated data necessary
-    jj = 0
-    donorLoop: do i=1, comm%nDonor
-       ! Convert the frac to weights
-       call fracToWeights(comm%frac(:, i), weight)
-
-       ! Block and indices for easier reading. The +1 is due to the
-       ! pointer offset on realCommVars.
-
-       d1 = comm%donorInfo(1, i) ! Block Index
-       i1 = comm%donorInfo(2, i)+1 ! donor I index
-       j1 = comm%donorInfo(3, i)+1 ! donor J index
-       k1 = comm%donorInfo(4, i)+1 ! donor K index
-
-       ! We are interpolating nVar variables
-       do k=varStart, varEnd
-          jj = jj + 1
-          if (d1 > 0) then ! Is this pt valid?
-             sendBuffer(jj) = &
-                  weight(1)*flowDoms(d1,1,1)%realCommVars(k)%var(i1  ,j1  ,k1  ) + &
-                  weight(2)*flowDoms(d1,1,1)%realCommVars(k)%var(i1+1,j1  ,k1  ) + &
-                  weight(3)*flowDoms(d1,1,1)%realCommVars(k)%var(i1  ,j1+1,k1  ) + &
-                  weight(4)*flowDoms(d1,1,1)%realCommVars(k)%var(i1+1,j1+1,k1  ) + &
-                  weight(5)*flowDoms(d1,1,1)%realCommVars(k)%var(i1  ,j1  ,k1+1) + &
-                  weight(6)*flowDoms(d1,1,1)%realCommVars(k)%var(i1+1,j1  ,k1+1) + &
-                  weight(7)*flowDoms(d1,1,1)%realCommVars(k)%var(i1  ,j1+1,k1+1) + &
-                  weight(8)*flowDoms(d1,1,1)%realCommVars(k)%var(i1+1,j1+1,k1+1)
-             sendBufferd(jj) = &
-                  weight(1)*flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  ,j1  ,k1  ) + &
-                  weight(2)*flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1,j1  ,k1  ) + &
-                  weight(3)*flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  ,j1+1,k1  ) + &
-                  weight(4)*flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1,j1+1,k1  ) + &
-                  weight(5)*flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  ,j1  ,k1+1) + &
-                  weight(6)*flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1,j1  ,k1+1) + &
-                  weight(7)*flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  ,j1+1,k1+1) + &
-                  weight(8)*flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1,j1+1,k1+1)
-          end if
-       end do
-    end do donorLoop
-
-    ! Now we can do an mpi_gatherv to the root proc:
-    call mpi_gatherv(sendBuffer, nVar*comm%nDonor, adflow_real, recvBuffer, &
-         nVar*comm%procSizes, nVar*comm%procDisps, adflow_real, 0, adflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    call mpi_gatherv(sendBufferd, nVar*comm%nDonor, adflow_real, recvBufferd, &
-         nVar*comm%procSizes, nVar*comm%procDisps, adflow_real, 0, adflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
-
-    deallocate(sendBuffer, sendBufferd)
-
-  end subroutine commUserIntegrationSurfaceVars_d
-
-subroutine commUserIntegrationSurfaceVars_b(recvBuffer, recvBufferd, varStart, varEnd, comm)
+        deallocate (sendBuffer, sendBufferd)
 
-    use constants
-    use block, onlY : flowDoms, nDom
-    use communication, only : myid, adflow_comm_world
-    use utils, only : EChk
-    use oversetUtilities, only :fracToWeights
+    end subroutine commUserIntegrationSurfaceVars_d
 
-    implicit none
+    subroutine commUserIntegrationSurfaceVars_b(recvBuffer, recvBufferd, varStart, varEnd, comm)
 
-    ! Input/Output
-    real(kind=realType), dimension(:) :: recvBuffer, recvBufferd
-    integer(kind=intType), intent(in) :: varStart, varEnd
-    type(userSurfCommType) :: comm
+        use constants
+        use block, onlY: flowDoms, nDom
+        use communication, only: myid, adflow_comm_world
+        use utils, only: EChk
+        use oversetUtilities, only: fracToWeights
 
-    ! Working
-    real(kind=realType), dimension(:), allocatable :: sendBuffer, sendBufferd
-    integer(Kind=intType) :: d1, i1, j1, k1, jj, k, nvar, i, ierr
-    real(kind=realType), dimension(8) :: weight
+        implicit none
 
-    ! The number of variables we are transferring:
-    nVar = varEnd - varStart + 1
+        ! Input/Output
+        real(kind=realType), dimension(:) :: recvBuffer, recvBufferd
+        integer(kind=intType), intent(in) :: varStart, varEnd
+        type(userSurfCommType) :: comm
 
-    allocate(sendBufferd(nVar*comm%nDonor))
+        ! Working
+        real(kind=realType), dimension(:), allocatable :: sendBuffer, sendBufferd
+        integer(Kind=intType) :: d1, i1, j1, k1, jj, k, nvar, i, ierr
+        real(kind=realType), dimension(8) :: weight
 
-    ! Adjoint of a gatherv is a scatterv. Flip the send/recv relative
-    ! to the forward call.
-    call mpi_scatterv(recvBufferd, nVar*comm%procSizes, nVar*comm%procDisps, adflow_real, &
-         sendBufferd, nVar*comm%nDonor, adflow_real, 0, adflow_comm_world, ierr)
-    call EChk(ierr,__FILE__,__LINE__)
+        ! The number of variables we are transferring:
+        nVar = varEnd - varStart + 1
 
-    ! First generate the interpolated data necessary
-    jj = 0
-    donorLoop: do i=1, comm%nDonor
-       ! Convert the frac to weights
-       call fracToWeights(comm%frac(:, i), weight)
+        allocate (sendBufferd(nVar * comm%nDonor))
 
-       ! Block and indices for easier reading. The +1 is due to the
-       ! pointer offset on realCommVars.
+        ! Adjoint of a gatherv is a scatterv. Flip the send/recv relative
+        ! to the forward call.
+        call mpi_scatterv(recvBufferd, nVar * comm%procSizes, nVar * comm%procDisps, adflow_real, &
+                          sendBufferd, nVar * comm%nDonor, adflow_real, 0, adflow_comm_world, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-       d1 = comm%donorInfo(1, i) ! Block Index
-       i1 = comm%donorInfo(2, i)+1 ! donor I index
-       j1 = comm%donorInfo(3, i)+1 ! donor J index
-       k1 = comm%donorInfo(4, i)+1 ! donor K index
+        ! First generate the interpolated data necessary
+        jj = 0
+        donorLoop: do i = 1, comm%nDonor
+            ! Convert the frac to weights
+            call fracToWeights(comm%frac(:, i), weight)
 
-       ! We are interpolating nVar variables
-       do k=varStart, varEnd
-          jj = jj + 1
-          if (d1 > 0) then ! Is this pt valid?
+            ! Block and indices for easier reading. The +1 is due to the
+            ! pointer offset on realCommVars.
 
-             ! Accumulate back onto the derivative variables.
-             flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  , j1  , k1) = &
-                  flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  , j1  , k1) + weight(1)*sendBufferd(jj)
+            d1 = comm%donorInfo(1, i) ! Block Index
+            i1 = comm%donorInfo(2, i) + 1 ! donor I index
+            j1 = comm%donorInfo(3, i) + 1 ! donor J index
+            k1 = comm%donorInfo(4, i) + 1 ! donor K index
 
-             flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1, j1  , k1) = &
-                  flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1, j1  , k1) + weight(2)*sendBufferd(jj)
+            ! We are interpolating nVar variables
+            do k = varStart, varEnd
+                jj = jj + 1
+                if (d1 > 0) then ! Is this pt valid?
 
-             flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  , j1+1, k1) = &
-                  flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  , j1+1, k1) + weight(3)*sendBufferd(jj)
+                    ! Accumulate back onto the derivative variables.
+                    flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1, k1) = &
+                        flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1, k1) + weight(1) * sendBufferd(jj)
 
-             flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1, j1+1, k1) = &
-                  flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1, j1+1, k1) + weight(4)*sendBufferd(jj)
+                    flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1, k1) = &
+                    flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1, k1) + weight(2) * sendBufferd(jj)
 
-             flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  , j1  , k1+1) = &
-                  flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  , j1  , k1+1) + weight(5)*sendBufferd(jj)
+                    flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1 + 1, k1) = &
+                    flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1 + 1, k1) + weight(3) * sendBufferd(jj)
 
-             flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1, j1  , k1+1) = &
-                  flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1, j1  , k1+1) + weight(6)*sendBufferd(jj)
+                    flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1 + 1, k1) = &
+                flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1 + 1, k1) + weight(4) * sendBufferd(jj)
 
-             flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  , j1+1, k1+1) = &
-                  flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1  , j1+1, k1+1) + weight(7)*sendBufferd(jj)
+                    flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1, k1 + 1) = &
+                    flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1, k1 + 1) + weight(5) * sendBufferd(jj)
 
-             flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1, j1+1, k1+1) = &
-                  flowDoms(d1,1,1)%realCommVars(k+nZippFlowComm)%var(i1+1, j1+1, k1+1) + weight(8)*sendBufferd(jj)
-          end if
-       end do
-    end do donorLoop
+                    flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1, k1 + 1) = &
+                flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1, k1 + 1) + weight(6) * sendBufferd(jj)
 
-    deallocate(sendBufferd)
+                    flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1 + 1, k1 + 1) = &
+                flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1, j1 + 1, k1 + 1) + weight(7) * sendBufferd(jj)
 
-  end subroutine commUserIntegrationSurfaceVars_b
+                    flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1 + 1, k1 + 1) = &
+            flowDoms(d1, 1, 1)%realCommVars(k + nZippFlowComm)%var(i1 + 1, j1 + 1, k1 + 1) + weight(8) * sendBufferd(jj)
+                end if
+            end do
+        end do donorLoop
 
+        deallocate (sendBufferd)
 
+    end subroutine commUserIntegrationSurfaceVars_b
 
-  subroutine qsortInterpPtType(arr, nn)
+    subroutine qsortInterpPtType(arr, nn)
 
-    use constants
-    use block ! Cannot use-only becuase of <= operator
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nn
+        use constants
+        use block ! Cannot use-only becuase of <= operator
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nn
+
+        type(interpPtType), dimension(*), intent(inout) :: arr
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), parameter :: m = 7
+
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
+
+        integer :: ierr
 
-    type(interpPtType), dimension(*), intent(inout) :: arr
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), parameter :: m = 7
+        type(interpPtType) :: a, tmp
 
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-    integer :: ierr
+        ! Allocate the memory for stack.
 
-    type(interpPtType) :: a, tmp
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortinterpPt", &
+                           "Memory allocation failure for stack")
 
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
+        ! Initialize the variables that control the sorting.
 
-    ! Allocate the memory for stack.
+        jStack = 0
+        l = 1
+        r = nn
 
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("qsortinterpPt", &
-         "Memory allocation failure for stack")
+        ! Start of the algorithm
 
-    ! Initialize the variables that control the sorting.
+        do
 
-    jStack = 0
-    l      = 1
-    r      = nn
+            ! Check for the size of the subarray.
 
-    ! Start of the algorithm
+            if ((r - l) < m) then
 
-    do
+                ! Perform insertion sort
 
-       ! Check for the size of the subarray.
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-       if((r-l) < m) then
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-          ! Perform insertion sort
+                if (jStack == 0) exit
 
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+                ! Pop stack and begin a new round of partitioning.
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-          if(jStack == 0) exit
+            else
 
-          ! Pop stack and begin a new round of partitioning.
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as
+                ! partitioning element a.
+                ! Also rearrange so that (l) <= (l+1) <= (r).
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+                k = (l + r) / 2
+                tmp = arr(k)             ! Swap the elements
+                arr(k) = arr(l + 1)           ! k and l+1.
+                arr(l + 1) = tmp
 
-       else
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as
-          ! partitioning element a.
-          ! Also rearrange so that (l) <= (l+1) <= (r).
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-          k = (l+r)/2
-          tmp      = arr(k)             ! Swap the elements
-          arr(k)   = arr(l+1)           ! k and l+1.
-          arr(l+1) = tmp
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+                ! Initialize the pointers for partitioning.
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+                ! The innermost loop
 
-          ! Initialize the pointers for partitioning.
+                do
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-          ! The innermost loop
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-          do
+                    ! Exit the loop in case the pointers i and j crossed.
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+                    if (j < i) exit
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+                    ! Swap the element i and j.
 
-             ! Exit the loop in case the pointers i and j crossed.
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-             if(j < i) exit
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-             ! Swap the element i and j.
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+                jStack = jStack + 2
+                if (jStack > nStack) then
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+                    ! Storage of the stack is too small. Reallocate.
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortinterpPt", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-             ! Storage of the stack is too small. Reallocate.
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortinterpPt", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
 
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortinterpPt", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
 
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortinterpPt", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
 
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortinterpPt", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
+                    ! And finally release the memory of tmpStack.
 
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortinterpPt", &
+                                       "Deallocation error for tmpStack")
+                end if
 
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortinterpPt", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
-
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                         &
-                  call terminate("qsortinterpPt", &
-                  "Deallocation error for tmpStack")
-          endif
-
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
 
-       endif
-    enddo
+            end if
+        end do
 
-    ! Release the memory of stack.
+        ! Release the memory of stack.
 
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                         &
-         call terminate("qsortinterpPt", &
-         "Deallocation error for stack")
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortinterpPt", &
+                           "Deallocation error for stack")
 
-    ! Check in debug mode whether the array is really sorted.
+        ! Check in debug mode whether the array is really sorted.
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))                 &
-               call terminate("qsortinterpPt", &
-               "Array is not sorted correctly")
-       enddo
-    endif
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortinterpPt", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
 
-  end subroutine qsortInterpPtType
+    end subroutine qsortInterpPtType
 
 end module userSurfaceIntegrations
diff --git a/src/solver/zipperIntegrations.F90 b/src/solver/zipperIntegrations.F90
index c9fe2c4b6..672360364 100644
--- a/src/solver/zipperIntegrations.F90
+++ b/src/solver/zipperIntegrations.F90
@@ -1,738 +1,733 @@
 module zipperIntegrations
 contains
 
-  subroutine flowIntegrationZipper(isInflow, conn, fams, vars, localValues, famList, sps, ptValid)
-
-    ! Integrate over the trianges for the inflow/outflow conditions.
-
-    use constants
-    use blockPointers, only : BCType
-    use sorting, only : famInList
-    use flowVarRefState, only : pRef, pInf, rhoRef, pRef, timeRef, LRef, TRef, rGas, uRef, uInf, rhoInf, gammaInf
-    use inputPhysics, only : pointRef, flowType, rGasDim
-    use flowUtils, only : computePtot, computeTtot
-    use surfaceFamilies, only : familyExchange, BCFamExchange
-    use utils, only : mynorm2, cross_prod
-    implicit none
-
-    ! Input/output Variables
-    logical, intent(in) :: isInflow
-    integer(kind=intType), intent(in), dimension(:, :) :: conn
-    integer(kind=intType), intent(in), dimension(:) :: fams
-    real(kind=realType), dimension(:, :), intent(in) :: vars
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    integer(kind=intType), intent(in) :: sps
-    logical(kind=intType), dimension(:), optional, intent(in) :: ptValid
-
-    ! Working variables
-    integer(kind=intType) :: i, j
-    real(kind=realType) :: sF, vmag, vnm, vxm, vym, vzm, Fx, Fy, Fz, u, v, w, vnmFreeStreamRef
-    real(kind=realType), dimension(3) :: Fp, Mp, FMom, MMom, refPoint, ss, x1, x2, x3, &
-      norm, sFaceCoordRef
-    real(kind=realType) :: pm, Ptot, Ttot, rhom, gammam, MNm, massFlowRateLocal, am
-    real(kind=realType) ::  massFlowRate, mass_Ptot, mass_Ttot, mass_Ps, mass_MN, mass_a, mass_rho, &
-                            mass_Vx, mass_Vy, mass_Vz, mass_nx, mass_ny, mass_nz, mass_Vi
-    real(kind=realType) :: area, cellArea, overCellArea
-    real(kind=realType) :: area_Ptot, area_Ps
-    real(kind=realType) ::  govgm1, gm1ovg, viConst, viLocal, pratio
-    real(kind=realType) ::  mReDim
-    real(kind=realType) :: internalFlowFact, inflowFact, xc, xco, yc, yco, zc, zco, mx, my, mz
-    real(kind=realType), dimension(3)  :: COFSumFx, COFSumFy, COFSumFz
-
-    logical :: triIsValid
-
-    mReDim = sqrt(pRef*rhoRef)
-    Fp = zero
-    Mp = zero
-    FMom = zero
-    MMom = zero
-
-    COFSumFx = zero
-    COFSumFy = zero
-    COFSumFz = zero
-
-    massFlowRate = zero
-    area = zero
-    mass_Ptot = zero
-    mass_Ttot = zero
-    mass_Ps = zero
-    mass_MN = zero
-    mass_a = zero
-    mass_rho = zero
-
-    mass_Vx = zero
-    mass_Vy = zero
-    mass_Vz = zero
-    mass_nx = zero
-    mass_ny = zero
-    mass_nz = zero
-    mass_vi = zero
-
-    area_Ptot = zero
-    area_Ps   = zero
-
-    refPoint(1) = LRef*pointRef(1)
-    refPoint(2) = LRef*pointRef(2)
-    refPoint(3) = LRef*pointRef(3)
-
-    internalFlowFact = one
-    if (flowType == internalFlow) then
-       internalFlowFact = -one
-    end if
-
-    inFlowFact = one
-    if (isInflow) then
-       inflowFact=-one
-    end if
-
-
-    !$AD II-LOOP
-    do i=1, size(conn, 2)
-       if (famInList(fams(i), famList)) then
-
-          ! If the ptValid list is given, check if we should integrate
-          ! this triangle.
-          triIsValid = .True.
-          if (present(ptValid)) then
-             ! Check if each of the three nodes are valid
-             if ((ptValid(conn(1, i)) .eqv. .False.) .or. &
-                 (ptValid(conn(2, i)) .eqv. .False.) .or. &
-                 (ptValid(conn(3, i)) .eqv. .False.)) then
-                triIsValid = .False.
-             end if
-          end if
-
-          validTrianlge: if (triIsValid) then
-
-             ! Compute the averaged values for this triangle
-             vxm = zero; vym = zero; vzm = zero; rhom = zero; pm = zero; MNm = zero; gammam = zero;
-             sF = zero
-             do j=1,3
-                rhom = rhom + vars(conn(j, i), iRho)
-                vxm = vxm + vars(conn(j, i), iVx)
-                vym = vym + vars(conn(j, i), iVy)
-                vzm = vzm + vars(conn(j, i), iVz)
-                pm = pm + vars(conn(j, i), iRhoE)
-                gammam = gammam + vars(conn(j, i), iZippFlowGamma)
-                sF = sF + vars(conn(j, i), iZippFlowSFace)
-             end do
-
-             ! Divide by 3 due to the summation above:
-             rhom = third*rhom
-             vxm = third*vxm
-             vym = third*vym
-             vzm = third*vzm
-             pm = third*pm
-             gammam = third*gammam
-             sF = third*sF
-
-             ! Get the nodes of triangle.
-             x1 = vars(conn(1, i), iZippFLowX:iZippFlowZ)
-             x2 = vars(conn(2, i), iZippFlowX:iZippFlowZ)
-             x3 = vars(conn(3, i), iZippFlowX:iZippFlowZ)
-             call cross_prod(x2-x1, x3-x1, norm)
-             ss = half*norm
-
-             call computePtot(rhom, vxm, vym, vzm, pm, Ptot)
-             call computeTtot(rhom, vxm, vym, vzm, pm, Ttot)
-
-             vnm = vxm*ss(1) + vym*ss(2) + vzm*ss(3)  - sF
-
-             vmag = sqrt((vxm**2 + vym**2 + vzm**2)) - sF
-             am = sqrt(gammam*pm/rhom)
-             MNm = vmag/sqrt(gammam*pm/rhom)
-
-             cellArea = sqrt(ss(1)**2 + ss(2)**2 + ss(3)**2)
-             area = area + cellArea
-             overCellArea = 1/cellArea
-
-             massFlowRateLocal = rhom*vnm*mReDim
-             massFlowRate = massFlowRate + massFlowRateLocal
-
-             pm = pm*pRef
-
-             mass_Ptot = mass_pTot + Ptot * massFlowRateLocal * Pref
-             mass_Ttot = mass_Ttot + Ttot * massFlowRateLocal * Tref
-             mass_rho  = mass_rho + rhom * massFlowRateLocal * rhoRef
-             mass_a  = mass_a + am * massFlowRateLocal * uRef
-
-             mass_Ps = mass_Ps + pm*massFlowRateLocal
-             mass_MN = mass_MN + MNm*massFlowRateLocal
-
-             area_pTot = area_pTot + Ptot * Pref * cellArea
-             area_Ps = area_Ps + pm * cellArea
-
-             sFaceCoordRef(1) = sF * ss(1)*overCellArea
-             sFaceCoordRef(2) = sF * ss(2)*overCellArea
-             sFaceCoordRef(3) = sF * ss(3)*overCellArea
-
-             mass_Vx = mass_Vx + (vxm*uRef - sFaceCoordRef(1)) *massFlowRateLocal
-             mass_Vy = mass_Vy + (vym*uRef - sFaceCoordRef(2)) *massFlowRateLocal
-             mass_Vz = mass_Vz + (vzm*uRef - sFaceCoordRef(3)) *massFlowRateLocal
-
-             govgm1 = gammaInf/(gammaInf-one)
-             gm1ovg = one/govgm1
-             viConst = two * govgm1 * rGasDim
-             ! the prefs in psinf / ptot cancel out so we can just take the ratio
-             ! we need to clip the ratio to stay under one. right next to the wall,
-             ! the pTot can go below the static free stream pressure. To prevent
-             ! nans from the sqrt, we just clip this. This does not affect the computation
-             ! because when pTot is this small, the velocities are also small, and the
-             ! mdot is almost zero, so the cells in this area don't contribute much
-             ! to the mass weighed sum.
-             pratio = min(one, one / pTot)
-             viLocal = sqrt(viConst * (one - (pRatio) ** gm1ovg) * Ttot * Tref)
-             mass_vi = mass_vi + viLocal * massFlowRateLocal
-
-             mass_nx = mass_nx + ss(1)*overCellArea * massFlowRateLocal
-             mass_ny = mass_ny + ss(2)*overCellArea * massFlowRateLocal
-             mass_nz = mass_nz + ss(3)*overCellArea * massFlowRateLocal
-
-
-             ! Compute the average cell center.
-             xco = zero
-             yco = zero
-             zco = zero
-             do j=1,3
-                xco = xco + (vars(conn(1, i), iZippFlowX))
-                yco = yco + (vars(conn(2, i), iZippFlowY))
-                zco = zco + (vars(conn(3, i), iZippFlowZ))
-             end do
-
-             ! Finish average for cell center
-             xco = third*xco
-             yco = third*yco
-             zco = third*zco
-
-             ! x-y-zco is the cell center w.r.t. the origin, x-y-zc is w.r.t. the reference point
-             xc = xco - refPoint(1)
-             yc = yco - refPoint(2)
-             zc = zco - refPoint(3)
-
-             pm = -(pm-pInf*pRef)
-             fx = pm*ss(1)
-             fy = pm*ss(2)
-             fz = pm*ss(3)
-
-             ! Update the pressure force and moment coefficients.
-             Fp(1) = Fp(1) + fx
-             Fp(2) = Fp(2) + fy
-             Fp(3) = Fp(3) + fz
-
-             mx = yc*fz - zc*fy
-             my = zc*fx - xc*fz
-             mz = xc*fy - yc*fx
-
-             Mp(1) = Mp(1) + mx
-             Mp(2) = Mp(2) + my
-             Mp(3) = Mp(3) + mz
-
-             ! Center of force computations. Here we accumulate in the sums.
-             ! Force-X
-             COFSumFx(1) = COFSumFx(1) + xco * fx
-             COFSumFx(2) = COFSumFx(2) + yco * fx
-             COFSumFx(3) = COFSumFx(3) + zco * fx
-
-             ! Force-Y
-             COFSumFy(1) = COFSumFy(1) + xco * fy
-             COFSumFy(2) = COFSumFy(2) + yco * fy
-             COFSumFy(3) = COFSumFy(3) + zco * fy
-
-             ! Force-Z
-             COFSumFz(1) = COFSumFz(1) + xco * fz
-             COFSumFz(2) = COFSumFz(2) + yco * fz
-             COFSumFz(3) = COFSumFz(3) + zco * fz
-
-             ! Momentum forces
-
-             ! Get unit normal vector.
-             ss = ss/cellArea
-             massFlowRateLocal = massFlowRateLocal/timeRef*internalFlowFact*inflowFact
-
-             fx = massFlowRateLocal*ss(1) * vxm
-             fy = massFlowRateLocal*ss(2) * vym
-             fz = massFlowRateLocal*ss(3) * vzm
-
-             FMom(1) = FMom(1) - fx
-             FMom(2) = FMom(2) - fy
-             FMom(3) = FMom(3) - fz
-
-             mx = yc*fz - zc*fy
-             my = zc*fx - xc*fz
-             mz = xc*fy - yc*fx
-
-             MMom(1) = MMom(1) + mx
-             MMom(2) = MMom(2) + my
-             MMom(3) = MMom(3) + mz
-
-             ! Center of force computations. Here we accumulate in the sums.
-             ! Force-X
-             COFSumFx(1) = COFSumFx(1) + xco * fx
-             COFSumFx(2) = COFSumFx(2) + yco * fx
-             COFSumFx(3) = COFSumFx(3) + zco * fx
-
-             ! Force-Y
-             COFSumFy(1) = COFSumFy(1) + xco * fy
-             COFSumFy(2) = COFSumFy(2) + yco * fy
-             COFSumFy(3) = COFSumFy(3) + zco * fy
-
-             ! Force-Z
-             COFSumFz(1) = COFSumFz(1) + xco * fz
-             COFSumFz(2) = COFSumFz(2) + yco * fz
-             COFSumFz(3) = COFSumFz(3) + zco * fz
-          end if validTrianlge
-       end if
-    enddo
-
-    ! Increment the local values array with what we computed here
-    localValues(iMassFlow) = localValues(iMassFlow) + massFlowRate
-    localValues(iArea) = localValues(iArea) + area
-    localValues(iMassRho) = localValues(iMassRho) + mass_rho
-    localValues(iMassa) = localValues(iMassa) + mass_a
-    localValues(iMassPtot) = localValues(iMassPtot) + mass_Ptot
-    localValues(iMassTtot) = localValues(iMassTtot) + mass_Ttot
-    localValues(iMassPs)   = localValues(iMassPs)   + mass_Ps
-    localValues(iMassMN)   = localValues(iMassMN)   + mass_MN
-    localValues(iFp:iFp+2)   = localValues(iFp:iFp+2) + Fp
-    localValues(iFlowFm:iFlowFm+2)   = localValues(iFlowFm:iFlowFm+2) + FMom
-    localValues(iFlowMp:iFlowMp+2)   = localValues(iFlowMp:iFlowMp+2) + Mp
-    localValues(iFlowMm:iFlowMm+2)   = localValues(iFlowMm:iFlowMm+2) + MMom
-
-    localValues(iCoForceX:iCoForceX+2) = localValues(iCoForceX:iCoForceX+2) + COFSumFx
-    localValues(iCoForceY:iCoForceY+2) = localValues(iCoForceY:iCoForceY+2) + COFSumFy
-    localValues(iCoForceZ:iCoForceZ+2) = localValues(iCoForceZ:iCoForceZ+2) + COFSumFz
-
-    localValues(iAreaPTot) = localValues(iAreaPTot) + area_pTot
-    localValues(iAreaPs) = localValues(iAreaPs) + area_Ps
-
-    localValues(iMassVx)   = localValues(iMassVx)   + mass_Vx
-    localValues(iMassVy)   = localValues(iMassVy)   + mass_Vy
-    localValues(iMassVz)   = localValues(iMassVz)   + mass_Vz
-    localValues(iMassnx)   = localValues(iMassnx)   + mass_nx
-    localValues(iMassny)   = localValues(iMassny)   + mass_ny
-    localValues(iMassnz)   = localValues(iMassnz)   + mass_nz
-    localValues(iMassVi) = localValues(iMassVi) + mass_Vi
-
-
-  end subroutine flowIntegrationZipper
-
-  subroutine wallIntegrationZipper(conn, fams, vars, localValues, famList, sps)
-
-    use constants
-    use sorting, only : famInList
-    use flowVarRefState, only : LRef
-    use inputPhysics, only : pointRef
-    use utils, only : mynorm2, cross_prod
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in), dimension(:, :) :: conn
-    integer(kind=intType), intent(in), dimension(:) :: fams
-    real(kind=realType), intent(in), dimension(:, :) :: vars
-    real(kind=realType), intent(inout) :: localValues(nLocalValues)
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    integer(kind=intType), intent(in) :: sps
-
-    ! Working
-    real(kind=realType), dimension(3) :: Fp, Fv, Mp, Mv
-    real(kind=realType), dimension(3) :: COFSumFx, COFSumFy, COFSumFz
-
-
-    integer(kind=intType) :: i, j
-    real(kind=realType), dimension(3) :: ss, norm, refPoint
-    real(kind=realType), dimension(3) :: p1, p2, p3, v1, v2, v3, x1, x2, x3
-    real(kind=realType) :: fact, triArea, fx, fy, fz, mx, my, mz, xc, xco, yc, yco, zc, zco
-
-    ! Determine the reference point for the moment computation in
-    ! meters.
-    refPoint(1) = LRef*pointRef(1)
-    refPoint(2) = LRef*pointRef(2)
-    refPoint(3) = LRef*pointRef(3)
-    Fp = zero
-    Fv = zero
-    Mp = zero
-    Mv = zero
-    COFSumFx = zero
-    COFSumFy = zero
-    COFSumFz = zero
-
-    !$AD II-LOOP
-    do i=1, size(conn, 2)
-       if (famInList(fams(i), famList)) then
-
-          ! Get the nodes of triangle.
-          x1 = vars(conn(1, i), iZippWallX:iZIppWallZ)
-          x2 = vars(conn(2, i), iZippWallX:iZIppWallZ)
-          x3 = vars(conn(3, i), iZippWallX:iZIppWallZ)
-          call cross_prod(x2-x1, x3-x1, norm)
-          ss = half*norm
-          ! The third here is to account for the summation of P1, p2
-          ! and P3
-          triArea = mynorm2(ss)*third
-
-          ! Compute the average cell center.
-          xco = third*(x1(1)+x2(1)+x3(1))
-          yco = third*(x1(2)+x2(2)+x3(2))
-          zco = third*(x1(3)+x2(3)+x3(3))
-
-          xc = xco - refPoint(1)
-          yc = yco - refPoint(2)
-          zc = zco - refPoint(3)
-
-          ! Update the pressure force and moment coefficients.
-          p1 = vars(conn(1, i), iZippWallTpx:iZippWallTpz)
-          p2 = vars(conn(2, i), iZippWallTpx:iZippWallTpz)
-          p3 = vars(conn(3, i), iZippWallTpx:iZippWallTpz)
-
-          fx = (p1(1) + p2(1) + p3(1))*triArea
-          fy = (p1(2) + p2(2) + p3(2))*triArea
-          fz = (p1(3) + p2(3) + p3(3))*triArea
-
-          Fp(1) = Fp(1) + fx
-          Fp(2) = Fp(2) + fy
-          Fp(3) = Fp(3) + fz
-
-          mx = yc*fz - zc*fy
-          my = zc*fx - xc*fz
-          mz = xc*fy - yc*fx
-
-          Mp(1) = Mp(1) + mx
-          Mp(2) = Mp(2) + my
-          Mp(3) = Mp(3) + mz
-
-          ! accumulate in the sums. each force component is tracked separately
-
-          ! Force-X
-          COFSumFx(1) = COFSumFx(1) + xco * fx
-          COFSumFx(2) = COFSumFx(2) + yco * fx
-          COFSumFx(3) = COFSumFx(3) + zco * fx
-
-          ! Force-Y
-          COFSumFy(1) = COFSumFy(1) + xco * fy
-          COFSumFy(2) = COFSumFy(2) + yco * fy
-          COFSumFy(3) = COFSumFy(3) + zco * fy
-
-          ! Force-Z
-          COFSumFz(1) = COFSumFz(1) + xco * fz
-          COFSumFz(2) = COFSumFz(2) + yco * fz
-          COFSumFz(3) = COFSumFz(3) + zco * fz
-
-          ! Update the viscous force and moment coefficients
-          v1 = vars(conn(1, i), iZippWallTvx:iZippWallTvz)
-          v2 = vars(conn(2, i), iZippWallTvx:iZippWallTvz)
-          v3 = vars(conn(3, i), iZippWallTvx:iZippWallTvz)
-
-          fx = (v1(1) + v2(1) + v3(1))*triArea
-          fy = (v1(2) + v2(2) + v3(2))*triArea
-          fz = (v1(3) + v2(3) + v3(3))*triArea
-
-          ! Note: momentum forces have opposite sign to pressure forces
-          Fv(1) = Fv(1) + fx
-          Fv(2) = Fv(2) + fy
-          Fv(3) = Fv(3) + fz
-
-          mx = yc*fz - zc*fy
-          my = zc*fx - xc*fz
-          mz = xc*fy - yc*fx
-
-          Mv(1) = Mv(1) + mx
-          Mv(2) = Mv(2) + my
-          Mv(3) = Mv(3) + mz
-
-          ! accumulate in the sums. each force component is tracked separately
-
-          ! Force-X
-          COFSumFx(1) = COFSumFx(1) + xco * fx
-          COFSumFx(2) = COFSumFx(2) + yco * fx
-          COFSumFx(3) = COFSumFx(3) + zco * fx
-
-          ! Force-Y
-          COFSumFy(1) = COFSumFy(1) + xco * fy
-          COFSumFy(2) = COFSumFy(2) + yco * fy
-          COFSumFy(3) = COFSumFy(3) + zco * fy
-
-          ! Force-Z
-          COFSumFz(1) = COFSumFz(1) + xco * fz
-          COFSumFz(2) = COFSumFz(2) + yco * fz
-          COFSumFz(3) = COFSumFz(3) + zco * fz
-       end if
-    enddo
-
-    ! Increment into the local vector
-    localValues(iFp:iFp+2) = localValues(iFp:iFp+2) + Fp
-    localValues(iFv:iFv+2) = localValues(iFv:iFv+2) + Fv
-    localValues(iMp:iMp+2) = localValues(iMp:iMp+2) + Mp
-    localValues(iMv:iMv+2) = localValues(iMv:iMv+2) + Mv
-    localValues(iCoForceX:iCoForceX+2) = localValues(iCoForceX:iCoForceX+2) + COFSumFx
-    localValues(iCoForceY:iCoForceY+2) = localValues(iCoForceY:iCoForceY+2) + COFSumFy
-    localValues(iCoForceZ:iCoForceZ+2) = localValues(iCoForceZ:iCoForceZ+2) + COFSumFz
-
-  end subroutine wallIntegrationZipper
+    subroutine flowIntegrationZipper(isInflow, conn, fams, vars, localValues, famList, sps, ptValid)
+
+        ! Integrate over the trianges for the inflow/outflow conditions.
+
+        use constants
+        use blockPointers, only: BCType
+        use sorting, only: famInList
+        use flowVarRefState, only: pRef, pInf, rhoRef, pRef, timeRef, LRef, TRef, rGas, uRef, uInf, rhoInf, gammaInf
+        use inputPhysics, only: pointRef, flowType, rGasDim
+        use flowUtils, only: computePtot, computeTtot
+        use surfaceFamilies, only: familyExchange, BCFamExchange
+        use utils, only: mynorm2, cross_prod
+        implicit none
+
+        ! Input/output Variables
+        logical, intent(in) :: isInflow
+        integer(kind=intType), intent(in), dimension(:, :) :: conn
+        integer(kind=intType), intent(in), dimension(:) :: fams
+        real(kind=realType), dimension(:, :), intent(in) :: vars
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+        logical(kind=intType), dimension(:), optional, intent(in) :: ptValid
+
+        ! Working variables
+        integer(kind=intType) :: i, j
+        real(kind=realType) :: sF, vmag, vnm, vxm, vym, vzm, Fx, Fy, Fz, u, v, w, vnmFreeStreamRef
+        real(kind=realType), dimension(3) :: Fp, Mp, FMom, MMom, refPoint, ss, x1, x2, x3, &
+                                             norm, sFaceCoordRef
+        real(kind=realType) :: pm, Ptot, Ttot, rhom, gammam, MNm, massFlowRateLocal, am
+        real(kind=realType) :: massFlowRate, mass_Ptot, mass_Ttot, mass_Ps, mass_MN, mass_a, mass_rho, &
+                               mass_Vx, mass_Vy, mass_Vz, mass_nx, mass_ny, mass_nz, mass_Vi
+        real(kind=realType) :: area, cellArea, overCellArea
+        real(kind=realType) :: area_Ptot, area_Ps
+        real(kind=realType) :: govgm1, gm1ovg, viConst, viLocal, pratio
+        real(kind=realType) :: mReDim
+        real(kind=realType) :: internalFlowFact, inflowFact, xc, xco, yc, yco, zc, zco, mx, my, mz
+        real(kind=realType), dimension(3) :: COFSumFx, COFSumFy, COFSumFz
+
+        logical :: triIsValid
+
+        mReDim = sqrt(pRef * rhoRef)
+        Fp = zero
+        Mp = zero
+        FMom = zero
+        MMom = zero
+
+        COFSumFx = zero
+        COFSumFy = zero
+        COFSumFz = zero
+
+        massFlowRate = zero
+        area = zero
+        mass_Ptot = zero
+        mass_Ttot = zero
+        mass_Ps = zero
+        mass_MN = zero
+        mass_a = zero
+        mass_rho = zero
+
+        mass_Vx = zero
+        mass_Vy = zero
+        mass_Vz = zero
+        mass_nx = zero
+        mass_ny = zero
+        mass_nz = zero
+        mass_vi = zero
+
+        area_Ptot = zero
+        area_Ps = zero
+
+        refPoint(1) = LRef * pointRef(1)
+        refPoint(2) = LRef * pointRef(2)
+        refPoint(3) = LRef * pointRef(3)
+
+        internalFlowFact = one
+        if (flowType == internalFlow) then
+            internalFlowFact = -one
+        end if
+
+        inFlowFact = one
+        if (isInflow) then
+            inflowFact = -one
+        end if
+
+        !$AD II-LOOP
+        do i = 1, size(conn, 2)
+            if (famInList(fams(i), famList)) then
+
+                ! If the ptValid list is given, check if we should integrate
+                ! this triangle.
+                triIsValid = .True.
+                if (present(ptValid)) then
+                    ! Check if each of the three nodes are valid
+                    if ((ptValid(conn(1, i)) .eqv. .False.) .or. &
+                        (ptValid(conn(2, i)) .eqv. .False.) .or. &
+                        (ptValid(conn(3, i)) .eqv. .False.)) then
+                        triIsValid = .False.
+                    end if
+                end if
+
+                validTrianlge: if (triIsValid) then
+
+                    ! Compute the averaged values for this triangle
+                    vxm = zero; vym = zero; vzm = zero; rhom = zero; pm = zero; MNm = zero; gammam = zero; 
+                    sF = zero
+                    do j = 1, 3
+                        rhom = rhom + vars(conn(j, i), iRho)
+                        vxm = vxm + vars(conn(j, i), iVx)
+                        vym = vym + vars(conn(j, i), iVy)
+                        vzm = vzm + vars(conn(j, i), iVz)
+                        pm = pm + vars(conn(j, i), iRhoE)
+                        gammam = gammam + vars(conn(j, i), iZippFlowGamma)
+                        sF = sF + vars(conn(j, i), iZippFlowSFace)
+                    end do
+
+                    ! Divide by 3 due to the summation above:
+                    rhom = third * rhom
+                    vxm = third * vxm
+                    vym = third * vym
+                    vzm = third * vzm
+                    pm = third * pm
+                    gammam = third * gammam
+                    sF = third * sF
+
+                    ! Get the nodes of triangle.
+                    x1 = vars(conn(1, i), iZippFLowX:iZippFlowZ)
+                    x2 = vars(conn(2, i), iZippFlowX:iZippFlowZ)
+                    x3 = vars(conn(3, i), iZippFlowX:iZippFlowZ)
+                    call cross_prod(x2 - x1, x3 - x1, norm)
+                    ss = half * norm
+
+                    call computePtot(rhom, vxm, vym, vzm, pm, Ptot)
+                    call computeTtot(rhom, vxm, vym, vzm, pm, Ttot)
+
+                    vnm = vxm * ss(1) + vym * ss(2) + vzm * ss(3) - sF
+
+                    vmag = sqrt((vxm**2 + vym**2 + vzm**2)) - sF
+                    am = sqrt(gammam * pm / rhom)
+                    MNm = vmag / sqrt(gammam * pm / rhom)
+
+                    cellArea = sqrt(ss(1)**2 + ss(2)**2 + ss(3)**2)
+                    area = area + cellArea
+                    overCellArea = 1 / cellArea
+
+                    massFlowRateLocal = rhom * vnm * mReDim
+                    massFlowRate = massFlowRate + massFlowRateLocal
+
+                    pm = pm * pRef
+
+                    mass_Ptot = mass_pTot + Ptot * massFlowRateLocal * Pref
+                    mass_Ttot = mass_Ttot + Ttot * massFlowRateLocal * Tref
+                    mass_rho = mass_rho + rhom * massFlowRateLocal * rhoRef
+                    mass_a = mass_a + am * massFlowRateLocal * uRef
+
+                    mass_Ps = mass_Ps + pm * massFlowRateLocal
+                    mass_MN = mass_MN + MNm * massFlowRateLocal
+
+                    area_pTot = area_pTot + Ptot * Pref * cellArea
+                    area_Ps = area_Ps + pm * cellArea
+
+                    sFaceCoordRef(1) = sF * ss(1) * overCellArea
+                    sFaceCoordRef(2) = sF * ss(2) * overCellArea
+                    sFaceCoordRef(3) = sF * ss(3) * overCellArea
+
+                    mass_Vx = mass_Vx + (vxm * uRef - sFaceCoordRef(1)) * massFlowRateLocal
+                    mass_Vy = mass_Vy + (vym * uRef - sFaceCoordRef(2)) * massFlowRateLocal
+                    mass_Vz = mass_Vz + (vzm * uRef - sFaceCoordRef(3)) * massFlowRateLocal
+
+                    govgm1 = gammaInf / (gammaInf - one)
+                    gm1ovg = one / govgm1
+                    viConst = two * govgm1 * rGasDim
+                    ! the prefs in psinf / ptot cancel out so we can just take the ratio
+                    ! we need to clip the ratio to stay under one. right next to the wall,
+                    ! the pTot can go below the static free stream pressure. To prevent
+                    ! nans from the sqrt, we just clip this. This does not affect the computation
+                    ! because when pTot is this small, the velocities are also small, and the
+                    ! mdot is almost zero, so the cells in this area don't contribute much
+                    ! to the mass weighed sum.
+                    pratio = min(one, one / pTot)
+                    viLocal = sqrt(viConst * (one - (pRatio)**gm1ovg) * Ttot * Tref)
+                    mass_vi = mass_vi + viLocal * massFlowRateLocal
+
+                    mass_nx = mass_nx + ss(1) * overCellArea * massFlowRateLocal
+                    mass_ny = mass_ny + ss(2) * overCellArea * massFlowRateLocal
+                    mass_nz = mass_nz + ss(3) * overCellArea * massFlowRateLocal
+
+                    ! Compute the average cell center.
+                    xco = zero
+                    yco = zero
+                    zco = zero
+                    do j = 1, 3
+                        xco = xco + (vars(conn(1, i), iZippFlowX))
+                        yco = yco + (vars(conn(2, i), iZippFlowY))
+                        zco = zco + (vars(conn(3, i), iZippFlowZ))
+                    end do
+
+                    ! Finish average for cell center
+                    xco = third * xco
+                    yco = third * yco
+                    zco = third * zco
+
+                    ! x-y-zco is the cell center w.r.t. the origin, x-y-zc is w.r.t. the reference point
+                    xc = xco - refPoint(1)
+                    yc = yco - refPoint(2)
+                    zc = zco - refPoint(3)
+
+                    pm = -(pm - pInf * pRef)
+                    fx = pm * ss(1)
+                    fy = pm * ss(2)
+                    fz = pm * ss(3)
+
+                    ! Update the pressure force and moment coefficients.
+                    Fp(1) = Fp(1) + fx
+                    Fp(2) = Fp(2) + fy
+                    Fp(3) = Fp(3) + fz
+
+                    mx = yc * fz - zc * fy
+                    my = zc * fx - xc * fz
+                    mz = xc * fy - yc * fx
+
+                    Mp(1) = Mp(1) + mx
+                    Mp(2) = Mp(2) + my
+                    Mp(3) = Mp(3) + mz
+
+                    ! Center of force computations. Here we accumulate in the sums.
+                    ! Force-X
+                    COFSumFx(1) = COFSumFx(1) + xco * fx
+                    COFSumFx(2) = COFSumFx(2) + yco * fx
+                    COFSumFx(3) = COFSumFx(3) + zco * fx
+
+                    ! Force-Y
+                    COFSumFy(1) = COFSumFy(1) + xco * fy
+                    COFSumFy(2) = COFSumFy(2) + yco * fy
+                    COFSumFy(3) = COFSumFy(3) + zco * fy
+
+                    ! Force-Z
+                    COFSumFz(1) = COFSumFz(1) + xco * fz
+                    COFSumFz(2) = COFSumFz(2) + yco * fz
+                    COFSumFz(3) = COFSumFz(3) + zco * fz
+
+                    ! Momentum forces
+
+                    ! Get unit normal vector.
+                    ss = ss / cellArea
+                    massFlowRateLocal = massFlowRateLocal / timeRef * internalFlowFact * inflowFact
+
+                    fx = massFlowRateLocal * ss(1) * vxm
+                    fy = massFlowRateLocal * ss(2) * vym
+                    fz = massFlowRateLocal * ss(3) * vzm
+
+                    FMom(1) = FMom(1) - fx
+                    FMom(2) = FMom(2) - fy
+                    FMom(3) = FMom(3) - fz
+
+                    mx = yc * fz - zc * fy
+                    my = zc * fx - xc * fz
+                    mz = xc * fy - yc * fx
+
+                    MMom(1) = MMom(1) + mx
+                    MMom(2) = MMom(2) + my
+                    MMom(3) = MMom(3) + mz
+
+                    ! Center of force computations. Here we accumulate in the sums.
+                    ! Force-X
+                    COFSumFx(1) = COFSumFx(1) + xco * fx
+                    COFSumFx(2) = COFSumFx(2) + yco * fx
+                    COFSumFx(3) = COFSumFx(3) + zco * fx
+
+                    ! Force-Y
+                    COFSumFy(1) = COFSumFy(1) + xco * fy
+                    COFSumFy(2) = COFSumFy(2) + yco * fy
+                    COFSumFy(3) = COFSumFy(3) + zco * fy
+
+                    ! Force-Z
+                    COFSumFz(1) = COFSumFz(1) + xco * fz
+                    COFSumFz(2) = COFSumFz(2) + yco * fz
+                    COFSumFz(3) = COFSumFz(3) + zco * fz
+                end if validTrianlge
+            end if
+        end do
+
+        ! Increment the local values array with what we computed here
+        localValues(iMassFlow) = localValues(iMassFlow) + massFlowRate
+        localValues(iArea) = localValues(iArea) + area
+        localValues(iMassRho) = localValues(iMassRho) + mass_rho
+        localValues(iMassa) = localValues(iMassa) + mass_a
+        localValues(iMassPtot) = localValues(iMassPtot) + mass_Ptot
+        localValues(iMassTtot) = localValues(iMassTtot) + mass_Ttot
+        localValues(iMassPs) = localValues(iMassPs) + mass_Ps
+        localValues(iMassMN) = localValues(iMassMN) + mass_MN
+        localValues(iFp:iFp + 2) = localValues(iFp:iFp + 2) + Fp
+        localValues(iFlowFm:iFlowFm + 2) = localValues(iFlowFm:iFlowFm + 2) + FMom
+        localValues(iFlowMp:iFlowMp + 2) = localValues(iFlowMp:iFlowMp + 2) + Mp
+        localValues(iFlowMm:iFlowMm + 2) = localValues(iFlowMm:iFlowMm + 2) + MMom
+
+        localValues(iCoForceX:iCoForceX + 2) = localValues(iCoForceX:iCoForceX + 2) + COFSumFx
+        localValues(iCoForceY:iCoForceY + 2) = localValues(iCoForceY:iCoForceY + 2) + COFSumFy
+        localValues(iCoForceZ:iCoForceZ + 2) = localValues(iCoForceZ:iCoForceZ + 2) + COFSumFz
+
+        localValues(iAreaPTot) = localValues(iAreaPTot) + area_pTot
+        localValues(iAreaPs) = localValues(iAreaPs) + area_Ps
+
+        localValues(iMassVx) = localValues(iMassVx) + mass_Vx
+        localValues(iMassVy) = localValues(iMassVy) + mass_Vy
+        localValues(iMassVz) = localValues(iMassVz) + mass_Vz
+        localValues(iMassnx) = localValues(iMassnx) + mass_nx
+        localValues(iMassny) = localValues(iMassny) + mass_ny
+        localValues(iMassnz) = localValues(iMassnz) + mass_nz
+        localValues(iMassVi) = localValues(iMassVi) + mass_Vi
+
+    end subroutine flowIntegrationZipper
+
+    subroutine wallIntegrationZipper(conn, fams, vars, localValues, famList, sps)
+
+        use constants
+        use sorting, only: famInList
+        use flowVarRefState, only: LRef
+        use inputPhysics, only: pointRef
+        use utils, only: mynorm2, cross_prod
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in), dimension(:, :) :: conn
+        integer(kind=intType), intent(in), dimension(:) :: fams
+        real(kind=realType), intent(in), dimension(:, :) :: vars
+        real(kind=realType), intent(inout) :: localValues(nLocalValues)
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+
+        ! Working
+        real(kind=realType), dimension(3) :: Fp, Fv, Mp, Mv
+        real(kind=realType), dimension(3) :: COFSumFx, COFSumFy, COFSumFz
+
+        integer(kind=intType) :: i, j
+        real(kind=realType), dimension(3) :: ss, norm, refPoint
+        real(kind=realType), dimension(3) :: p1, p2, p3, v1, v2, v3, x1, x2, x3
+        real(kind=realType) :: fact, triArea, fx, fy, fz, mx, my, mz, xc, xco, yc, yco, zc, zco
+
+        ! Determine the reference point for the moment computation in
+        ! meters.
+        refPoint(1) = LRef * pointRef(1)
+        refPoint(2) = LRef * pointRef(2)
+        refPoint(3) = LRef * pointRef(3)
+        Fp = zero
+        Fv = zero
+        Mp = zero
+        Mv = zero
+        COFSumFx = zero
+        COFSumFy = zero
+        COFSumFz = zero
+
+        !$AD II-LOOP
+        do i = 1, size(conn, 2)
+            if (famInList(fams(i), famList)) then
+
+                ! Get the nodes of triangle.
+                x1 = vars(conn(1, i), iZippWallX:iZIppWallZ)
+                x2 = vars(conn(2, i), iZippWallX:iZIppWallZ)
+                x3 = vars(conn(3, i), iZippWallX:iZIppWallZ)
+                call cross_prod(x2 - x1, x3 - x1, norm)
+                ss = half * norm
+                ! The third here is to account for the summation of P1, p2
+                ! and P3
+                triArea = mynorm2(ss) * third
+
+                ! Compute the average cell center.
+                xco = third * (x1(1) + x2(1) + x3(1))
+                yco = third * (x1(2) + x2(2) + x3(2))
+                zco = third * (x1(3) + x2(3) + x3(3))
+
+                xc = xco - refPoint(1)
+                yc = yco - refPoint(2)
+                zc = zco - refPoint(3)
+
+                ! Update the pressure force and moment coefficients.
+                p1 = vars(conn(1, i), iZippWallTpx:iZippWallTpz)
+                p2 = vars(conn(2, i), iZippWallTpx:iZippWallTpz)
+                p3 = vars(conn(3, i), iZippWallTpx:iZippWallTpz)
+
+                fx = (p1(1) + p2(1) + p3(1)) * triArea
+                fy = (p1(2) + p2(2) + p3(2)) * triArea
+                fz = (p1(3) + p2(3) + p3(3)) * triArea
+
+                Fp(1) = Fp(1) + fx
+                Fp(2) = Fp(2) + fy
+                Fp(3) = Fp(3) + fz
+
+                mx = yc * fz - zc * fy
+                my = zc * fx - xc * fz
+                mz = xc * fy - yc * fx
+
+                Mp(1) = Mp(1) + mx
+                Mp(2) = Mp(2) + my
+                Mp(3) = Mp(3) + mz
+
+                ! accumulate in the sums. each force component is tracked separately
+
+                ! Force-X
+                COFSumFx(1) = COFSumFx(1) + xco * fx
+                COFSumFx(2) = COFSumFx(2) + yco * fx
+                COFSumFx(3) = COFSumFx(3) + zco * fx
+
+                ! Force-Y
+                COFSumFy(1) = COFSumFy(1) + xco * fy
+                COFSumFy(2) = COFSumFy(2) + yco * fy
+                COFSumFy(3) = COFSumFy(3) + zco * fy
+
+                ! Force-Z
+                COFSumFz(1) = COFSumFz(1) + xco * fz
+                COFSumFz(2) = COFSumFz(2) + yco * fz
+                COFSumFz(3) = COFSumFz(3) + zco * fz
+
+                ! Update the viscous force and moment coefficients
+                v1 = vars(conn(1, i), iZippWallTvx:iZippWallTvz)
+                v2 = vars(conn(2, i), iZippWallTvx:iZippWallTvz)
+                v3 = vars(conn(3, i), iZippWallTvx:iZippWallTvz)
+
+                fx = (v1(1) + v2(1) + v3(1)) * triArea
+                fy = (v1(2) + v2(2) + v3(2)) * triArea
+                fz = (v1(3) + v2(3) + v3(3)) * triArea
+
+                ! Note: momentum forces have opposite sign to pressure forces
+                Fv(1) = Fv(1) + fx
+                Fv(2) = Fv(2) + fy
+                Fv(3) = Fv(3) + fz
+
+                mx = yc * fz - zc * fy
+                my = zc * fx - xc * fz
+                mz = xc * fy - yc * fx
+
+                Mv(1) = Mv(1) + mx
+                Mv(2) = Mv(2) + my
+                Mv(3) = Mv(3) + mz
+
+                ! accumulate in the sums. each force component is tracked separately
+
+                ! Force-X
+                COFSumFx(1) = COFSumFx(1) + xco * fx
+                COFSumFx(2) = COFSumFx(2) + yco * fx
+                COFSumFx(3) = COFSumFx(3) + zco * fx
+
+                ! Force-Y
+                COFSumFy(1) = COFSumFy(1) + xco * fy
+                COFSumFy(2) = COFSumFy(2) + yco * fy
+                COFSumFy(3) = COFSumFy(3) + zco * fy
+
+                ! Force-Z
+                COFSumFz(1) = COFSumFz(1) + xco * fz
+                COFSumFz(2) = COFSumFz(2) + yco * fz
+                COFSumFz(3) = COFSumFz(3) + zco * fz
+            end if
+        end do
+
+        ! Increment into the local vector
+        localValues(iFp:iFp + 2) = localValues(iFp:iFp + 2) + Fp
+        localValues(iFv:iFv + 2) = localValues(iFv:iFv + 2) + Fv
+        localValues(iMp:iMp + 2) = localValues(iMp:iMp + 2) + Mp
+        localValues(iMv:iMv + 2) = localValues(iMv:iMv + 2) + Mv
+        localValues(iCoForceX:iCoForceX + 2) = localValues(iCoForceX:iCoForceX + 2) + COFSumFx
+        localValues(iCoForceY:iCoForceY + 2) = localValues(iCoForceY:iCoForceY + 2) + COFSumFy
+        localValues(iCoForceZ:iCoForceZ + 2) = localValues(iCoForceZ:iCoForceZ + 2) + COFSumFz
+
+    end subroutine wallIntegrationZipper
 
 #ifndef USE_TAPENADE
 
-  subroutine integrateZippers(localValues, famList, sps)
-    !--------------------------------------------------------------
-    ! Manual Differentiation Warning: Modifying this routine requires
-    ! modifying the hand-written forward and reverse routines.
-    ! --------------------------------------------------------------
-
-    ! Integrate over the triangles formed by the zipper mesh. This
-    ! will perform both all necesasry zipper integrations. Currently
-    ! this includes the wall force integrations as well as the
-    ! flow-though surface integration.
-
-    use constants
-    use oversetData, only : zipperMeshes, zipperMesh
-    use haloExchange, only : wallIntegrationZipperComm, flowIntegrationZipperComm
-    implicit none
-
-    ! Input Variables
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    integer(kind=intType), intent(in) :: sps
-    real(kind=realType), dimension(:, :), allocatable :: vars
-    type(zipperMesh), pointer :: zipper
-
-    ! Determine if we have a wall Zipper:
-    zipper => zipperMeshes(iBCGroupWalls)
-    if (zipper%allocated) then
-
-       ! Allocate space necessary to store variables. Only non-zero on
-       ! root proc.
-       allocate(vars(size(zipper%indices), nZippWallComm))
-
-       ! Gather up the required variables in "vars" on the root
-       ! proc. This routine is differientated by hand.
-       call wallIntegrationZipperComm(vars, sps)
-
-       ! Perform actual integration. Tapenade ADs this routine.
-       call wallIntegrationZipper(zipper%conn, zipper%fam, vars, localValues, famList, sps)
-
-       ! Cleanup vars
-       deallocate(vars)
-    end if
-
-    zipper => zipperMeshes(iBCGroupInflow)
-    ! Determine if we have a flowthrough Zipper:
-    if (zipper%allocated) then
-
-       ! Allocate space necessary to store variables. Only non-zero on
-       ! root proc.
-       allocate(vars(size(zipper%indices), nZippFlowComm))
-
-       ! Gather up the required variables in "vars" on the root
-       ! proc. This routine is differientated by hand.
-       call flowIntegrationZipperComm(.true., vars, sps)
-
-       ! Perform actual integration. Tapenade ADs this routine.
-       call flowIntegrationZipper(.true., zipper%conn, zipper%fam, vars, &
-            localValues, famList, sps)
-
-       ! Cleanup vars
-       deallocate(vars)
-    end if
-
-    zipper => zipperMeshes(iBCGroupOutflow)
-    ! Determine if we have a flowthrough Zipper:
-    if (zipper%allocated) then
-
-       ! Allocate space necessary to store variables. Only non-zero on
-       ! root proc.
-       allocate(vars(size(zipper%indices), nZippFlowComm))
-
-       ! Gather up the required variables in "vars" on the root
-       ! proc. This routine is differientated by hand.
-       call flowIntegrationZipperComm(.false., vars, sps)
-
-       ! Perform actual integration. Tapenade ADs this routine.
-       call flowIntegrationZipper(.false., zipper%conn, zipper%fam, vars, &
-            localValues, famList, sps)
-
-       ! Cleanup vars
-       deallocate(vars)
-    end if
-  end subroutine integrateZippers
-
+    subroutine integrateZippers(localValues, famList, sps)
+        !--------------------------------------------------------------
+        ! Manual Differentiation Warning: Modifying this routine requires
+        ! modifying the hand-written forward and reverse routines.
+        ! --------------------------------------------------------------
+
+        ! Integrate over the triangles formed by the zipper mesh. This
+        ! will perform both all necesasry zipper integrations. Currently
+        ! this includes the wall force integrations as well as the
+        ! flow-though surface integration.
+
+        use constants
+        use oversetData, only: zipperMeshes, zipperMesh
+        use haloExchange, only: wallIntegrationZipperComm, flowIntegrationZipperComm
+        implicit none
+
+        ! Input Variables
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+        real(kind=realType), dimension(:, :), allocatable :: vars
+        type(zipperMesh), pointer :: zipper
+
+        ! Determine if we have a wall Zipper:
+        zipper => zipperMeshes(iBCGroupWalls)
+        if (zipper%allocated) then
+
+            ! Allocate space necessary to store variables. Only non-zero on
+            ! root proc.
+            allocate (vars(size(zipper%indices), nZippWallComm))
+
+            ! Gather up the required variables in "vars" on the root
+            ! proc. This routine is differientated by hand.
+            call wallIntegrationZipperComm(vars, sps)
+
+            ! Perform actual integration. Tapenade ADs this routine.
+            call wallIntegrationZipper(zipper%conn, zipper%fam, vars, localValues, famList, sps)
+
+            ! Cleanup vars
+            deallocate (vars)
+        end if
+
+        zipper => zipperMeshes(iBCGroupInflow)
+        ! Determine if we have a flowthrough Zipper:
+        if (zipper%allocated) then
+
+            ! Allocate space necessary to store variables. Only non-zero on
+            ! root proc.
+            allocate (vars(size(zipper%indices), nZippFlowComm))
+
+            ! Gather up the required variables in "vars" on the root
+            ! proc. This routine is differientated by hand.
+            call flowIntegrationZipperComm(.true., vars, sps)
+
+            ! Perform actual integration. Tapenade ADs this routine.
+            call flowIntegrationZipper(.true., zipper%conn, zipper%fam, vars, &
+                                       localValues, famList, sps)
+
+            ! Cleanup vars
+            deallocate (vars)
+        end if
+
+        zipper => zipperMeshes(iBCGroupOutflow)
+        ! Determine if we have a flowthrough Zipper:
+        if (zipper%allocated) then
+
+            ! Allocate space necessary to store variables. Only non-zero on
+            ! root proc.
+            allocate (vars(size(zipper%indices), nZippFlowComm))
+
+            ! Gather up the required variables in "vars" on the root
+            ! proc. This routine is differientated by hand.
+            call flowIntegrationZipperComm(.false., vars, sps)
+
+            ! Perform actual integration. Tapenade ADs this routine.
+            call flowIntegrationZipper(.false., zipper%conn, zipper%fam, vars, &
+                                       localValues, famList, sps)
+
+            ! Cleanup vars
+            deallocate (vars)
+        end if
+    end subroutine integrateZippers
 
 #ifndef USE_COMPLEX
-  subroutine integrateZippers_d(localValues, localValuesd, famList, sps)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-
-    ! Forward mode linearization of the zipper integration.
-
-    use constants
-    use oversetData, only : zipperMeshes, zipperMesh
-    use haloExchange, only : wallIntegrationZipperComm_d, flowIntegrationZipperComm_d
-    use zipperIntegrations_d, only : flowIntegrationZipper_d, wallIntegrationZipper_d
-    implicit none
-
-    ! Input Variables
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    integer(kind=intType), intent(in) :: sps
-    real(kind=realType), dimension(:, :), allocatable :: vars, varsd
-    type(zipperMesh), pointer :: zipper
-
-    zipper => zipperMeshes(iBCGroupWalls)
-    if (zipper%allocated) then
-
-       ! Allocate space necessary to store variables. Only non-zero on
-       ! root proc.
-       allocate(vars(size(zipper%indices), nZippWallComm), varsd(size(zipper%indices), nZippWallComm))
-
-       ! Gather up the required variables in "vars" on the root
-       ! proc. This routine is differientated by hand.
-       call wallIntegrationZipperComm_d(vars, varsd, sps)
-
-       ! Perform actual integration. Tapenade ADs this routine.
-       call wallIntegrationZipper_d(zipper%conn, zipper%fam, vars, varsd, &
-            localValues, localValuesd, famList, sps)
-
-       ! Cleanup vars
-       deallocate(vars, varsd)
-    end if
-
-    zipper => zipperMeshes(iBCGroupInflow)
-    ! Determine if we have a flowthrough Zipper:
-    if (zipper%allocated) then
-
-       ! Allocate space necessary to store variables. Only non-zero on
-       ! root proc.
-       allocate(vars(size(zipper%indices), nZippFlowComm), varsd(size(zipper%indices), nZippFlowComm))
-
-       ! Gather up the required variables in "vars" on the root
-       ! proc. This routine is differientated by hand.
-       call flowIntegrationZipperComm_d(.true., vars, varsd, sps)
-
-       ! Perform actual integration. Tapenade ADs this routine.
-       call flowIntegrationZipper_d(.true., zipper%conn, zipper%fam, vars, varsd, &
-            localValues, localValuesd, famList, sps)
-
-       ! Cleanup vars
-       deallocate(vars, varsd)
-    end if
-
-    zipper => zipperMeshes(iBCGroupOutflow)
-    ! Determine if we have a flowthrough Zipper:
-    if (zipper%allocated) then
-
-       ! Allocate space necessary to store variables. Only non-zero on
-       ! root proc.
-       allocate(vars(size(zipper%indices), nZippFlowComm), varsd(size(zipper%indices), nZippFlowComm))
-
-       ! Gather up the required variables in "vars" on the root
-       ! proc. This routine is differientated by hand.
-       call flowIntegrationZipperComm_d(.false., vars, varsd, sps)
-
-       ! Perform actual integration. Tapenade ADs this routine.
-       call flowIntegrationZipper_d(.false., zipper%conn, zipper%fam, vars, varsd, &
-            localValues, localValuesd, famList, sps)
-
-       ! Cleanup vars
-       deallocate(vars, varsd)
-    end if
-  end subroutine integrateZippers_d
-
-  subroutine integrateZippers_b(localValues, localValuesd, famList, sps)
-    !------------------------------------------------------------------------
-    ! Manual Differentiation Warning: This routine is differentiated by hand.
-    ! -----------------------------------------------------------------------
-
-    ! Reverse mode linearization of the zipper integration.
-
-    use constants
-    use oversetData, only : zipperMeshes, zipperMesh
-    use haloExchange, only : wallIntegrationZipperComm_b, flowIntegrationZipperComm_b, &
-         wallIntegrationZipperComm, flowIntegrationZipperComm
-    use zipperIntegrations_b, only : wallIntegrationZipper_b, flowIntegrationZipper_b
-    implicit none
-
-    ! Input Variables
-    real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
-    integer(kind=intType), dimension(:), intent(in) :: famList
-    integer(kind=intType), intent(in) :: sps
-    real(kind=realType), dimension(:, :), allocatable :: vars, varsd
-    type(zipperMesh), pointer :: zipper
-
-    ! Determine if we have a wall Zipper:
-    zipper => zipperMeshes(iBCGroupWalls)
-    if (zipper%allocated) then
-
-       ! Allocate space necessary to store variables. Only non-zero on
-       ! root proc.
-       allocate(vars(size(zipper%indices), nZippWallComm), varsd(size(zipper%indices), nZippWallComm))
-
-       ! Set the forward variables
-       call wallIntegrationZipperComm(vars, sps)
-
-       ! Perform actual integration. Tapenade ADs this routine.
-       varsd = zero
-       call wallIntegrationZipper_b(zipper%conn, zipper%fam, vars, varsd, &
-            localValues, localValuesd, famList, sps)
-
-       ! Scatter (becuase we are reverse) the values from the root
-       ! back out to all necessary procs.  This routine is
-       ! differientated by hand.
-       call wallIntegrationZipperComm_b(vars, varsd, sps)
-
-       ! Cleanup vars
-       deallocate(vars, varsd)
-    end if
-
-    zipper => zipperMeshes(iBCGroupInflow)
-    ! Determine if we have a flowthrough Zipper:
-    if (zipper%allocated) then
-
-       ! Allocate space necessary to store variables. Only non-zero on
-       ! root proc.
-       allocate(vars(size(zipper%indices), nZippFlowComm), varsd(size(zipper%indices), nZippFlowComm))
-
-       ! Set the forward variables
-       call flowIntegrationZipperComm(.true., vars, sps)
-
-       ! Perform actual integration. Tapenade ADs this routine.
-       varsd = zero
-       call flowIntegrationZipper_b(.true., zipper%conn, zipper%fam, vars, varsd, &
-            localValues, localValuesd, famList, sps)
-
-       ! Scatter (becuase we are reverse) the values from the root
-       ! back out to all necessary procs.  This routine is
-       ! differientated by hand.
-       call flowIntegrationZipperComm_b(.true., vars, varsd, sps)
-
-       ! Cleanup vars
-       deallocate(vars, varsd)
-    end if
-
-    zipper => zipperMeshes(iBCGroupOutflow)
-    ! Determine if we have a flowthrough Zipper:
-    if (zipper%allocated) then
-
-       ! Allocate space necessary to store variables. Only non-zero on
-       ! root proc.
-       allocate(vars(size(zipper%indices), nZippFlowComm), varsd(size(zipper%indices), nZippFlowComm))
-
-       ! Set the forward variables
-       call flowIntegrationZipperComm(.false., vars, sps)
-
-       ! Perform actual integration. Tapenade ADs this routine.
-       varsd = zero
-       call flowIntegrationZipper_b(.false., zipper%conn, zipper%fam, vars, varsd, &
-            localValues, localValuesd, famList, sps)
-
-       ! Scatter (becuase we are reverse) the values from the root
-       ! back out to all necessary procs.  This routine is
-       ! differientated by hand.
-       call flowIntegrationZipperComm_b(.false., vars, varsd, sps)
-
-       ! Cleanup vars
-       deallocate(vars, varsd)
-    end if
-  end subroutine integrateZippers_b
+    subroutine integrateZippers_d(localValues, localValuesd, famList, sps)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+
+        ! Forward mode linearization of the zipper integration.
+
+        use constants
+        use oversetData, only: zipperMeshes, zipperMesh
+        use haloExchange, only: wallIntegrationZipperComm_d, flowIntegrationZipperComm_d
+        use zipperIntegrations_d, only: flowIntegrationZipper_d, wallIntegrationZipper_d
+        implicit none
+
+        ! Input Variables
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+        real(kind=realType), dimension(:, :), allocatable :: vars, varsd
+        type(zipperMesh), pointer :: zipper
+
+        zipper => zipperMeshes(iBCGroupWalls)
+        if (zipper%allocated) then
+
+            ! Allocate space necessary to store variables. Only non-zero on
+            ! root proc.
+            allocate (vars(size(zipper%indices), nZippWallComm), varsd(size(zipper%indices), nZippWallComm))
+
+            ! Gather up the required variables in "vars" on the root
+            ! proc. This routine is differientated by hand.
+            call wallIntegrationZipperComm_d(vars, varsd, sps)
+
+            ! Perform actual integration. Tapenade ADs this routine.
+            call wallIntegrationZipper_d(zipper%conn, zipper%fam, vars, varsd, &
+                                         localValues, localValuesd, famList, sps)
+
+            ! Cleanup vars
+            deallocate (vars, varsd)
+        end if
+
+        zipper => zipperMeshes(iBCGroupInflow)
+        ! Determine if we have a flowthrough Zipper:
+        if (zipper%allocated) then
+
+            ! Allocate space necessary to store variables. Only non-zero on
+            ! root proc.
+            allocate (vars(size(zipper%indices), nZippFlowComm), varsd(size(zipper%indices), nZippFlowComm))
+
+            ! Gather up the required variables in "vars" on the root
+            ! proc. This routine is differientated by hand.
+            call flowIntegrationZipperComm_d(.true., vars, varsd, sps)
+
+            ! Perform actual integration. Tapenade ADs this routine.
+            call flowIntegrationZipper_d(.true., zipper%conn, zipper%fam, vars, varsd, &
+                                         localValues, localValuesd, famList, sps)
+
+            ! Cleanup vars
+            deallocate (vars, varsd)
+        end if
+
+        zipper => zipperMeshes(iBCGroupOutflow)
+        ! Determine if we have a flowthrough Zipper:
+        if (zipper%allocated) then
+
+            ! Allocate space necessary to store variables. Only non-zero on
+            ! root proc.
+            allocate (vars(size(zipper%indices), nZippFlowComm), varsd(size(zipper%indices), nZippFlowComm))
+
+            ! Gather up the required variables in "vars" on the root
+            ! proc. This routine is differientated by hand.
+            call flowIntegrationZipperComm_d(.false., vars, varsd, sps)
+
+            ! Perform actual integration. Tapenade ADs this routine.
+            call flowIntegrationZipper_d(.false., zipper%conn, zipper%fam, vars, varsd, &
+                                         localValues, localValuesd, famList, sps)
+
+            ! Cleanup vars
+            deallocate (vars, varsd)
+        end if
+    end subroutine integrateZippers_d
+
+    subroutine integrateZippers_b(localValues, localValuesd, famList, sps)
+        !------------------------------------------------------------------------
+        ! Manual Differentiation Warning: This routine is differentiated by hand.
+        ! -----------------------------------------------------------------------
+
+        ! Reverse mode linearization of the zipper integration.
+
+        use constants
+        use oversetData, only: zipperMeshes, zipperMesh
+        use haloExchange, only: wallIntegrationZipperComm_b, flowIntegrationZipperComm_b, &
+                                wallIntegrationZipperComm, flowIntegrationZipperComm
+        use zipperIntegrations_b, only: wallIntegrationZipper_b, flowIntegrationZipper_b
+        implicit none
+
+        ! Input Variables
+        real(kind=realType), dimension(nLocalValues), intent(inout) :: localValues, localValuesd
+        integer(kind=intType), dimension(:), intent(in) :: famList
+        integer(kind=intType), intent(in) :: sps
+        real(kind=realType), dimension(:, :), allocatable :: vars, varsd
+        type(zipperMesh), pointer :: zipper
+
+        ! Determine if we have a wall Zipper:
+        zipper => zipperMeshes(iBCGroupWalls)
+        if (zipper%allocated) then
+
+            ! Allocate space necessary to store variables. Only non-zero on
+            ! root proc.
+            allocate (vars(size(zipper%indices), nZippWallComm), varsd(size(zipper%indices), nZippWallComm))
+
+            ! Set the forward variables
+            call wallIntegrationZipperComm(vars, sps)
+
+            ! Perform actual integration. Tapenade ADs this routine.
+            varsd = zero
+            call wallIntegrationZipper_b(zipper%conn, zipper%fam, vars, varsd, &
+                                         localValues, localValuesd, famList, sps)
+
+            ! Scatter (becuase we are reverse) the values from the root
+            ! back out to all necessary procs.  This routine is
+            ! differientated by hand.
+            call wallIntegrationZipperComm_b(vars, varsd, sps)
+
+            ! Cleanup vars
+            deallocate (vars, varsd)
+        end if
+
+        zipper => zipperMeshes(iBCGroupInflow)
+        ! Determine if we have a flowthrough Zipper:
+        if (zipper%allocated) then
+
+            ! Allocate space necessary to store variables. Only non-zero on
+            ! root proc.
+            allocate (vars(size(zipper%indices), nZippFlowComm), varsd(size(zipper%indices), nZippFlowComm))
+
+            ! Set the forward variables
+            call flowIntegrationZipperComm(.true., vars, sps)
+
+            ! Perform actual integration. Tapenade ADs this routine.
+            varsd = zero
+            call flowIntegrationZipper_b(.true., zipper%conn, zipper%fam, vars, varsd, &
+                                         localValues, localValuesd, famList, sps)
+
+            ! Scatter (becuase we are reverse) the values from the root
+            ! back out to all necessary procs.  This routine is
+            ! differientated by hand.
+            call flowIntegrationZipperComm_b(.true., vars, varsd, sps)
+
+            ! Cleanup vars
+            deallocate (vars, varsd)
+        end if
+
+        zipper => zipperMeshes(iBCGroupOutflow)
+        ! Determine if we have a flowthrough Zipper:
+        if (zipper%allocated) then
+
+            ! Allocate space necessary to store variables. Only non-zero on
+            ! root proc.
+            allocate (vars(size(zipper%indices), nZippFlowComm), varsd(size(zipper%indices), nZippFlowComm))
+
+            ! Set the forward variables
+            call flowIntegrationZipperComm(.false., vars, sps)
+
+            ! Perform actual integration. Tapenade ADs this routine.
+            varsd = zero
+            call flowIntegrationZipper_b(.false., zipper%conn, zipper%fam, vars, varsd, &
+                                         localValues, localValuesd, famList, sps)
+
+            ! Scatter (becuase we are reverse) the values from the root
+            ! back out to all necessary procs.  This routine is
+            ! differientated by hand.
+            call flowIntegrationZipperComm_b(.false., vars, varsd, sps)
+
+            ! Cleanup vars
+            deallocate (vars, varsd)
+        end if
+    end subroutine integrateZippers_b
 #endif
 #endif
 
diff --git a/src/turbulence/SST.F90 b/src/turbulence/SST.F90
index a1202f271..0b4731715 100644
--- a/src/turbulence/SST.F90
+++ b/src/turbulence/SST.F90
@@ -1,1650 +1,1647 @@
 module SST
 
-  ! This module contains the source code related to the SST turbulence
-  ! model. It is slightly more modularized than the original which makes
-  ! performing reverse mode AD simplier.
-
+    ! This module contains the source code related to the SST turbulence
+    ! model. It is slightly more modularized than the original which makes
+    ! performing reverse mode AD simplier.
 
 contains
 
-  subroutine SST_block(resOnly)
-
-    use constants
-    use blockPointers, only : il, jl, kl
-    use inputTimeSpectral
-    use iteration
-    use turbUtils, only : SSTEddyViscosity
-    use turbBCRoutines, only : bcTurbTreatment, applyAllTurbBCThisBlock
-    implicit none
-
-    !
-    !      Subroutine argument.
-    !
-    logical, intent(in) :: resOnly
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, sps
-
-
-    ! Set the arrays for the boundary condition treatment.
-
-    call bcTurbTreatment
-
-    ! Solve the transport equations for k and omega.
-
-    call SSTSolve(resOnly)
-
-    ! The eddy viscosity and the boundary conditions are only
-    ! applied if an actual update has been computed in SSTSolve.
-
-    if(.not. resOnly ) then
-
-       ! Compute the corresponding eddy viscosity.
-
-       call SSTEddyViscosity(2, il, 2, jl, 2, kl)
-
-       ! Set the halo values for the turbulent variables.
-       ! We are on the finest mesh, so the second layer of halo
-       ! cells must be computed as well.
-
-       call applyAllTurbBCThisBlock(.true.)
-    endif
-
-  end subroutine SST_block
-
-  subroutine SSTSolve(resOnly)
-    !
-    !       SSTSolve solves the turbulent transport equations for
-    !       menter's SST variant of the k-omega model in a decoupled
-    !       manner using a diagonal dominant ADI-scheme.
-    !
-    use blockPointers
-    use constants
-    use flowVarRefState
-    use inputIteration
-    use inputPhysics
-    use paramTurb
-    use turbMod, only : dvt, vort, prod, kwCD, f1
-    use turbUtils, only : prodSmag2, prodWmag2, prodKatoLaunder, &
-         turbAdvection, unsteadyTurbTerm, tdia3, kwCDterm
-    use turbCurveFits, only : curveTupYp
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: resOnly
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, nn
-
-    real(kind=realType) :: rSSTGam1, rSSTGam2, t1, t2
-    real(kind=realType) :: rSSTGam, rSSTBeta
-    real(kind=realType) :: rhoi, ss, spk, sdk
-    real(kind=realType) :: voli, volmi, volpi
-    real(kind=realType) :: xm, ym, zm, xp, yp, zp, xa, ya, za
-    real(kind=realType) :: ttm, ttp, mulm, mulp, muem, muep
-    real(kind=realType) :: rSSTSigkp1, rSSTSigk, rSSTSigkm1
-    real(kind=realType) :: rSSTSigwp1, rSSTSigw, rSSTSigwm1
-    real(kind=realType) :: c1m, c1p, c10, c2m, c2p, c20
-    real(kind=realType) :: b1, b2, c1, c2, d1, d2
-    real(kind=realType) :: utau, qs, uu, um, up, factor, rblank
-
-    real(kind=realType), dimension(itu1:itu2) :: tup
-
-    real(kind=realType), dimension(2:il,2:jl,2:kl,2,2)  :: qq
-    real(kind=realType), dimension(2,2:max(il,jl,kl))   :: bb, dd, ff
-    real(kind=realType), dimension(2,2,2:max(il,jl,kl)) :: cc
-
-    real(kind=realType), dimension(:,:,:), pointer :: ddw, ww, ddvt
-    real(kind=realType), dimension(:,:),   pointer :: rrlv
-    real(kind=realType), dimension(:,:),   pointer :: dd2Wall
-
-    logical, dimension(2:jl,2:kl), target :: flagI2, flagIl
-    logical, dimension(2:il,2:kl), target :: flagJ2, flagJl
-    logical, dimension(2:il,2:jl), target :: flagK2, flagKl
-
-    logical, dimension(:,:), pointer :: flag
-
-    ! Set model constants
-
-    rSSTGam1 = rSSTBeta1/rSSTBetas &
-         - rSSTSigw1*rSSTK*rSSTK/sqrt(rSSTBetas)
-    rSSTGam2 = rSSTBeta2/rSSTBetas &
-         - rSSTSigw2*rSSTK*rSSTK/sqrt(rSSTBetas)
-
-    ! Set a couple of pointers to the correct entries in dw to
-    ! make the code more readable.
-
-    dvt  => scratch(1:,1:,1:,idvt:)
-    prod => scratch(1:,1:,1:,iprod)
-    vort => prod
-    kwCD => scratch(1:,1:,1:,icd)
-    f1   => scratch(1:,1:,1:,if1SST)
-    !
-    !       Production term.
-    !
-    select case (turbProd)
-    case (strain)
-       call prodSmag2
-
-    case (vorticity)
-       call prodWmag2
-
-    case (katoLaunder)
-       call prodKatoLaunder
-
-    end select
-    !
-    !       Source terms.
-    !       Determine the source term and its derivative w.r.t. k and
-    !       omega for all internal cells of the block.
-    !       Note that the blending function f1 and the cross diffusion
-    !       were computed earlier in f1SST.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the blended value of rSSTGam and rSSTBeta,
-             ! which occur in the production terms of k and omega.
-
-             t1 = f1(i,j,k);  t2 = one - t1
-             rSSTGam  = t1*rSSTGam1  + t2*rSSTGam2
-             rSSTBeta = t1*rSSTBeta1 + t2*rSSTBeta2
-
-             ! Compute the source terms for both the k and the omega
-             ! equation. Note that dw(i,j,k,iprod) currently contains the
-             ! unscaled source term. Furthermore the production term of
-             ! k is limited to a certain times the destruction term.
-
-             rhoi = one/w(i,j,k,irho)
-             ss   = prod(i,j,k)
-             spk  = rev(i,j,k)*ss*rhoi
-             sdk  = rSSTBetas*w(i,j,k,itu1)*w(i,j,k,itu2)
-             spk  = min(spk, pklim*sdk)
-
-             dvt(i,j,k,1) = spk - sdk
-             dvt(i,j,k,2) = rSSTGam*ss + two*t2*rSSTSigw2*kwCD(i,j,k) &
-                  - rSSTBeta*w(i,j,k,itu2)**2
-
-             ! Compute the source term jacobian. Note that only the
-             ! destruction terms are linearized to increase the diagonal
-             ! dominance of the matrix. Furthermore minus the source
-             ! term jacobian is stored.
-
-             qq(i,j,k,1,1) = rSSTBetas*w(i,j,k,itu2)
-             qq(i,j,k,1,2) = zero
-             qq(i,j,k,2,1) = zero
-             qq(i,j,k,2,2) = two*rSSTBeta*w(i,j,k,itu2)
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Advection and unsteady terms.
-    !
-    nn = itu1 - 1
-    call turbAdvection(2_intType, 2_intType, nn, qq)
-
-    call unsteadyTurbTerm(2_intType, 2_intType, nn, qq)
-    !
-    !       Viscous terms in k-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in zeta-direction, i.e. along the
-             ! line k = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Compute the blended diffusion coefficients for k-1,
-             ! k and k+1.
-
-             t1 = f1(i,j,k+1); t2 = one - t1
-             rSSTSigkp1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwp1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i,j,k); t2 = one - t1
-             rSSTSigk = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigw = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i,j,k-1); t2 = one - t1
-             rSSTSigkm1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwm1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             ! Computation of the viscous terms in zeta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in zeta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/dzeta derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and omega.
-
-             ! First the k-term.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j,k-1) + rlv(i,j,k))
-             mulp = half*(rlv(i,j,k+1) + rlv(i,j,k))
-             muem = half*(rSSTSigkm1*rev(i,j,k-1) + rSSTSigk*rev(i,j,k))
-             muep = half*(rSSTSigkp1*rev(i,j,k+1) + rSSTSigk*rev(i,j,k))
-
-             c1m = ttm*(mulm + muem)*rhoi
-             c1p = ttp*(mulp + muep)*rhoi
-             c10 = c1m + c1p
-
-             ! And the omega term.
-
-             muem = half*(rSSTSigwm1*rev(i,j,k-1) + rSSTSigw*rev(i,j,k))
-             muep = half*(rSSTSigwp1*rev(i,j,k+1) + rSSTSigw*rev(i,j,k))
-
-             c2m = ttm*(mulm + muem)*rhoi
-             c2p = ttp*(mulp + muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i,j,k-1,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i,j,k+1,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i,j,k-1,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i,j,k+1,itu2)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(k == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmtk1(i,j,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmtk1(i,j,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmtk1(i,j,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmtk1(i,j,itu2,itu2),zero)
-             else if(k == kl) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmtk2(i,j,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmtk2(i,j,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmtk2(i,j,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmtk2(i,j,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Viscous terms in j-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in eta-direction, i.e. along the
-             ! line j = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Compute the blended diffusion coefficients for j-1,
-             ! j and j+1.
-
-             t1 = f1(i,j+1,k); t2 = one - t1
-             rSSTSigkp1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwp1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i,j,k); t2 = one - t1
-             rSSTSigk = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigw = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i,j-1,k); t2 = one - t1
-             rSSTSigkm1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwm1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             ! Computation of the viscous terms in eta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in eta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/deta derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and omega.
-
-             ! First the k-term.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j-1,k) + rlv(i,j,k))
-             mulp = half*(rlv(i,j+1,k) + rlv(i,j,k))
-             muem = half*(rSSTSigkm1*rev(i,j-1,k) + rSSTSigk*rev(i,j,k))
-             muep = half*(rSSTSigkp1*rev(i,j+1,k) + rSSTSigk*rev(i,j,k))
-
-             c1m = ttm*(mulm + muem)*rhoi
-             c1p = ttp*(mulp + muep)*rhoi
-             c10 = c1m + c1p
-
-             ! And the omega term.
-
-             muem = half*(rSSTSigwm1*rev(i,j-1,k) + rSSTSigw*rev(i,j,k))
-             muep = half*(rSSTSigwp1*rev(i,j+1,k) + rSSTSigw*rev(i,j,k))
-
-             c2m = ttm*(mulm + muem)*rhoi
-             c2p = ttp*(mulp + muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i,j-1,k,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i,j+1,k,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i,j-1,k,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i,j+1,k,itu2)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(j == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmtj1(i,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmtj1(i,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmtj1(i,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmtj1(i,k,itu2,itu2),zero)
-             else if(j == jl) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmtj2(i,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmtj2(i,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmtj2(i,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmtj2(i,k,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Viscous terms in i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in xi-direction, i.e. along the
-             ! line i = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
-
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Compute the blended diffusion coefficients for i-1,
-             ! i and i+1.
-
-             t1 = f1(i+1,j,k); t2 = one - t1
-             rSSTSigkp1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwp1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i,j,k); t2 = one - t1
-             rSSTSigk = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigw = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i-1,j,k); t2 = one - t1
-             rSSTSigkm1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwm1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             ! Computation of the viscous terms in xi-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in xi-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/dxi derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and omega.
-
-             ! First the k-term.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i-1,j,k) + rlv(i,j,k))
-             mulp = half*(rlv(i+1,j,k) + rlv(i,j,k))
-             muem = half*(rSSTSigkm1*rev(i-1,j,k) + rSSTSigk*rev(i,j,k))
-             muep = half*(rSSTSigkp1*rev(i+1,j,k) + rSSTSigk*rev(i,j,k))
-
-             c1m = ttm*(mulm + muem)*rhoi
-             c1p = ttp*(mulp + muep)*rhoi
-             c10 = c1m + c1p
-
-             ! And the omega term.
-
-             muem = half*(rSSTSigwm1*rev(i-1,j,k) + rSSTSigw*rev(i,j,k))
-             muep = half*(rSSTSigwp1*rev(i+1,j,k) + rSSTSigw*rev(i,j,k))
-
-             c2m = ttm*(mulm + muem)*rhoi
-             c2p = ttp*(mulp + muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i-1,j,k,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i+1,j,k,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i-1,j,k,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i+1,j,k,itu2)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(i == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmti1(j,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmti1(j,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmti1(j,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmti1(j,k,itu2,itu2),zero)
-             else if(i == il) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmti2(j,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmti2(j,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmti2(j,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmti2(j,k,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-
-    ! Multiply the residual by the volume and store this in dw; this
-    ! is done for monitoring reasons only. The multiplication with the
-    ! volume is present to be consistent with the flow residuals; also
-    ! the negative value is taken, again to be consistent with the
-    ! flow equations. Also multiply by iblank so that no updates occur
-    ! in holes or the overset boundary.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             rblank = real(iblank(i,j,k), realType)
-             dw(i,j,k,itu1) = -volRef(i,j,k)*dvt(i,j,k,1)*rblank
-             dw(i,j,k,itu2) = -volRef(i,j,k)*dvt(i,j,k,2)*rblank
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the wall function flags to .false.
-
-    flagI2 = .false.
-    flagIl = .false.
-    flagJ2 = .false.
-    flagJl = .false.
-    flagK2 = .false.
-    flagKl = .false.
-
-    ! Modify the rhs of the 1st internal cell, if wall functions
-    ! are used; their value is determined by the table.
-
-    testWallFunctions: if( wallFunctions ) then
-
-       bocos: do nn=1,nViscBocos
-
-          ! Determine the block face on which the subface is located
-          ! and set some variables. As flag points to the entire array
-          ! flagI2, etc., its starting indices are the starting indices
-          ! of its target and not 1.
-
-          select case (BCFaceID(nn))
-          case (iMin)
-             flag    => flagI2
-             ddw     => dw(2,1:,1:,1:);  ddvt => dvt(2,1:,1:,1:)
-             ww      => w(2,1:,1:,1:);   rrlv => rlv(2,1:,1:)
-             dd2Wall => d2Wall(2,:,:)
-
-          case (iMax)
-             flag    => flagIl
-             ddw     => dw(il,1:,1:,1:);  ddvt => dvt(il,1:,1:,1:)
-             ww      => w(il,1:,1:,1:);   rrlv => rlv(il,1:,1:)
-             dd2Wall => d2Wall(il,:,:)
-
-          case (jMin)
-             flag    => flagJ2
-             ddw     => dw(1:,2,1:,1:);  ddvt => dvt(1:,2,1:,1:)
-             ww      => w(1:,2,1:,1:);   rrlv => rlv(1:,2,1:)
-             dd2Wall => d2Wall(:,2,:)
-
-          case (jMax)
-             flag    => flagJl
-             ddw     => dw(1:,jl,1:,1:);  ddvt => dvt(1:,jl,1:,1:)
-             ww      => w(1:,jl,1:,1:);   rrlv => rlv(1:,jl,1:)
-             dd2Wall => d2Wall(:,jl,:)
-
-          case (kMin)
-             flag    => flagK2
-             ddw     => dw(1:,1:,2,1:);  ddvt => dvt(1:,1:,2,1:)
-             ww      => w(1:,1:,2,1:);   rrlv => rlv(1:,1:,2)
-             dd2Wall => d2Wall(:,:,2)
-
-          case (kMax)
-             flag    => flagKl
-             ddw     => dw(1:,1:,kl,1:);  ddvt => dvt(1:,1:,kl,1:)
-             ww      => w(1:,1:,kl,1:);   rrlv => rlv(1:,1:,kl)
-             dd2Wall => d2Wall(:,:,kl)
-
-          end select
-
-          ! Loop over the owned faces of this subface. Therefore the
-          ! nodal range of BCData must be used. The offset of +1 is
-          ! present, because the starting index of the cell range is
-          ! 1 larger than the starting index of the nodal range.
-
-          do j=(BCData(nn)%jnBeg+1),BCData(nn)%jnEnd
-             do i=(BCData(nn)%inBeg+1),BCData(nn)%inEnd
-
-                ! Set ddw to zero.
-
-                ddw(i,j,itu1) = zero
-                ddw(i,j,itu2) = zero
-
-                ! Enforce k and omega in the 1st internal cell from
-                ! the wall function table. There is an offset of -1 in
-                ! the wall distance. Note that the offset compared to
-                ! the current value must be stored. Also note that the
-                ! curve fits contain the non-dimensional values.
-
-                utau = viscSubface(nn)%utau(i,j)
-                yp = ww(i,j,irho)*dd2Wall(i-1,j-1)*utau/rrlv(i,j)
-
-                call curveTupYp(tup, yp, itu1, itu2)
-
-                tup(itu1) = tup(itu1)*utau**2
-                tup(itu2) = tup(itu2)*utau**2/rrlv(i,j)*ww(i,j,irho)
-
-                ddvt(i,j,1) = tup(itu1) - ww(i,j,itu1)
-                ddvt(i,j,2) = tup(itu2) - ww(i,j,itu2)
-
-                ! Set the wall flag to .true.
-
-                flag(i,j) = .true.
-
-             enddo
-          enddo
-
-       enddo bocos
-    endif testWallFunctions
-
-    ! Return if only the residual must be computed.
-
-    if( resOnly ) return
-
-    ! For implicit relaxation take the local time step into account,
-    ! where dt is the inverse of the central jacobian times the cfl
-    ! number. The following system is solved:
-    ! (I/dt + cc + bb + dd)*dw = rhs, in which I/dt = cc/cfl. As in
-    ! the rest of the algorithm only the modified central jacobian is
-    ! used, stored it now.
-
-    ! Compute the factor multiplying the central jacobian, which
-    ! is 1 + 1/cfl (implicit relaxation only).
-
-    factor = one
-    if(turbRelax == turbRelaxImplicit) &
-         factor = one + (one-alfaTurb)/alfaTurb
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             qq(i,j,k,1,1) = factor*qq(i,j,k,1,1)
-             qq(i,j,k,1,2) = factor*qq(i,j,k,1,2)
-             qq(i,j,k,2,1) = factor*qq(i,j,k,2,1)
-             qq(i,j,k,2,2) = factor*qq(i,j,k,2,2)
-
-             ! Set qq to i if the value is determined by the table.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                qq(i,j,k,1,1) = one
-                qq(i,j,k,1,2) = zero
-                qq(i,j,k,2,1) = zero
-                qq(i,j,k,2,2) = one
-             endif
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the grid velocity to zero. This value will be used
-    ! if the block is not moving.
-
-    qs = zero
-    !
-    !       dd-ADI step in j-direction. There is no particular reason to
-    !       start in j-direction, it just happened to be so. As we solve
-    !       in j-direction, the j-loop is the innermost loop.
-    !
-    do k=2,kl
-       do i=2,il
-          do j=2,jl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the j-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Compute the blended diffusion coefficients for j-1,
-             ! j and j+1.
-
-             t1 = f1(i,j+1,k); t2 = one - t1
-             rSSTSigkp1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwp1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i,j,k); t2 = one - t1
-             rSSTSigk = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigw = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i,j-1,k); t2 = one - t1
-             rSSTSigkm1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwm1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in j-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j-1,k) + rlv(i,j,k))
-             mulp = half*(rlv(i,j+1,k) + rlv(i,j,k))
-             muem = half*(rSSTSigkm1*rev(i,j-1,k) + rSSTSigk*rev(i,j,k))
-             muep = half*(rSSTSigkp1*rev(i,j+1,k) + rSSTSigk*rev(i,j,k))
-
-             c1m = ttm*(mulm + muem)*rhoi
-             c1p = ttp*(mulp + muep)*rhoi
-
-             muem = half*(rSSTSigwm1*rev(i,j-1,k) + rSSTSigw*rev(i,j,k))
-             muep = half*(rSSTSigwp1*rev(i,j+1,k) + rSSTSigw*rev(i,j,k))
-
-             c2m = ttm*(mulm + muem)*rhoi
-             c2p = ttp*(mulp + muep)*rhoi
-
-             bb(1,j) = -c1m
-             dd(1,j) = -c1p
-             bb(2,j) = -c2m
-             dd(2,j) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of j and j-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceJ(i,j,k) + sFaceJ(i,j-1,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! j-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,j) = bb(1,j) - up
-             dd(1,j) = dd(1,j) + um
-             bb(2,j) = bb(2,j) - up
-             dd(2,j) = dd(2,j) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,j) = qq(i,j,k,1,1)
-             cc(1,2,j) = qq(i,j,k,1,2)*rblank
-             cc(2,1,j) = qq(i,j,k,2,1)*rblank
-             cc(2,2,j) = qq(i,j,k,2,2)
-
-             ff(1,j) = dvt(i,j,k,1)*rblank
-             ff(2,j) = dvt(i,j,k,2)*rblank
-
-             bb(:,j) = bb(:,j)*rblank
-             dd(:,j) = dd(:,j)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,j) = zero
-                dd(1,j) = zero
-                bb(2,j) = zero
-                dd(2,j) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in j-direction.
-
-          call tdia3(2_intType, jl, bb, cc, dd, ff)
-
-          ! Determine the new rhs for the next direction.
-
-          do j=2,jl
-             dvt(i,j,k,1) = qq(i,j,k,1,1)*ff(1,j) + qq(i,j,k,1,2)*ff(2,j)
-             dvt(i,j,k,2) = qq(i,j,k,2,1)*ff(1,j) + qq(i,j,k,2,2)*ff(2,j)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-ADI step in i-direction. As we solve in i-direction, the
-    !       i-loop is the innermost loop.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the i-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
-
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Compute the blended diffusion coefficients for i-1,
-             ! i and i+1.
-
-             t1 = f1(i+1,j,k); t2 = one - t1
-             rSSTSigkp1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwp1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i,j,k); t2 = one - t1
-             rSSTSigk = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigw = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i-1,j,k); t2 = one - t1
-             rSSTSigkm1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwm1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in i-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i-1,j,k) + rlv(i,j,k))
-             mulp = half*(rlv(i+1,j,k) + rlv(i,j,k))
-             muem = half*(rSSTSigkm1*rev(i-1,j,k) + rSSTSigk*rev(i,j,k))
-             muep = half*(rSSTSigkp1*rev(i+1,j,k) + rSSTSigk*rev(i,j,k))
-
-             c1m = ttm*(mulm + muem)*rhoi
-             c1p = ttp*(mulp + muep)*rhoi
-
-             muem = half*(rSSTSigwm1*rev(i-1,j,k) + rSSTSigw*rev(i,j,k))
-             muep = half*(rSSTSigwp1*rev(i+1,j,k) + rSSTSigw*rev(i,j,k))
-
-             c2m = ttm*(mulm + muem)*rhoi
-             c2p = ttp*(mulp + muep)*rhoi
-             c20 = c2m + c2p
-
-             bb(1,i) = -c1m
-             dd(1,i) = -c1p
-             bb(2,i) = -c2m
-             dd(2,i) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of i and i-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceI(i,j,k) + sFaceI(i-1,j,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! i-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,i) = bb(1,i) - up
-             dd(1,i) = dd(1,i) + um
-             bb(2,i) = bb(2,i) - up
-             dd(2,i) = dd(2,i) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,i) = qq(i,j,k,1,1)
-             cc(1,2,i) = qq(i,j,k,1,2)*rblank
-             cc(2,1,i) = qq(i,j,k,2,1)*rblank
-             cc(2,2,i) = qq(i,j,k,2,2)
-
-             ff(1,i) = dvt(i,j,k,1)*rblank
-             ff(2,i) = dvt(i,j,k,2)*rblank
-
-             bb(:,i) = bb(:,i)*rblank
-             dd(:,i) = dd(:,i)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,i) = zero
-                dd(1,i) = zero
-                bb(2,i) = zero
-                dd(2,i) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in i-direction.
-
-          call tdia3(2_intType, il, bb, cc, dd, ff)
-
-          ! Determine the new rhs for the next direction.
-
-          do i=2,il
-             dvt(i,j,k,1) = qq(i,j,k,1,1)*ff(1,i) + qq(i,j,k,1,2)*ff(2,i)
-             dvt(i,j,k,2) = qq(i,j,k,2,1)*ff(1,i) + qq(i,j,k,2,2)*ff(2,i)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-ADI step in k-direction. As we solve in k-direction, the
-    !       k-loop is the innermost loop.
-    !
-    do j=2,jl
-       do i=2,il
-          do k=2,kl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the k-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Compute the blended diffusion coefficients for k-1,
-             ! k and k+1.
-
-             t1 = f1(i,j,k+1); t2 = one - t1
-             rSSTSigkp1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwp1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i,j,k); t2 = one - t1
-             rSSTSigk = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigw = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             t1 = f1(i,j,k-1); t2 = one - t1
-             rSSTSigkm1 = t1*rSSTSigk1 + t2*rSSTSigk2
-             rSSTSigwm1 = t1*rSSTSigw1 + t2*rSSTSigw2
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in k-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j,k-1) + rlv(i,j,k))
-             mulp = half*(rlv(i,j,k+1) + rlv(i,j,k))
-             muem = half*(rSSTSigkm1*rev(i,j,k-1) + rSSTSigk*rev(i,j,k))
-             muep = half*(rSSTSigkp1*rev(i,j,k+1) + rSSTSigk*rev(i,j,k))
-
-             c1m = ttm*(mulm + muem)*rhoi
-             c1p = ttp*(mulp + muep)*rhoi
-
-             muem = half*(rSSTSigwm1*rev(i,j,k-1) + rSSTSigw*rev(i,j,k))
-             muep = half*(rSSTSigwp1*rev(i,j,k+1) + rSSTSigw*rev(i,j,k))
-
-             c2m = ttm*(mulm + muem)*rhoi
-             c2p = ttp*(mulp + muep)*rhoi
-
-             bb(1,k) = -c1m
-             dd(1,k) = -c1p
-             bb(2,k) = -c2m
-             dd(2,k) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of k and k-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceK(i,j,k) + sFaceK(i,j,k-1))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! k-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,k) = bb(1,k) - up
-             dd(1,k) = dd(1,k) + um
-             bb(2,k) = bb(2,k) - up
-             dd(2,k) = dd(2,k) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,k) = qq(i,j,k,1,1)
-             cc(1,2,k) = qq(i,j,k,1,2)*rblank
-             cc(2,1,k) = qq(i,j,k,2,1)*rblank
-             cc(2,2,k) = qq(i,j,k,2,2)
-
-             ff(1,k) = dvt(i,j,k,1)*rblank
-             ff(2,k) = dvt(i,j,k,2)*rblank
-
-             bb(:,k) = bb(:,k)*rblank
-             dd(:,k) = dd(:,k)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,k) = zero
-                dd(1,k) = zero
-                bb(2,k) = zero
-                dd(2,k) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in k-direction.
-
-          call tdia3(2_intType, kl, bb, cc, dd, ff)
-
-          ! Store the update in dvt.
-
-          do k=2,kl
-             dvt(i,j,k,1) = ff(1,k)
-             dvt(i,j,k,2) = ff(2,k)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       Update the turbulent variables. For explicit relaxation the
-    !       update must be relaxed; for implicit relaxation this has been
-    !       done via the time step.
-    !
-    factor = one
-    if(turbRelax == turbRelaxExplicit) factor = alfaTurb
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             w(i,j,k,itu1) = w(i,j,k,itu1) + factor*dvt(i,j,k,1)
-             w(i,j,k,itu1) = max(w(i,j,k,itu1), zero)
-
-             w(i,j,k,itu2) = w(i,j,k,itu2) + factor*dvt(i,j,k,2)
-             w(i,j,k,itu2) = max(w(i,j,k,itu2), 1.e-5_realType*wInf(itu2))
-          enddo
-       enddo
-    enddo
+    subroutine SST_block(resOnly)
+
+        use constants
+        use blockPointers, only: il, jl, kl
+        use inputTimeSpectral
+        use iteration
+        use turbUtils, only: SSTEddyViscosity
+        use turbBCRoutines, only: bcTurbTreatment, applyAllTurbBCThisBlock
+        implicit none
+
+        !
+        !      Subroutine argument.
+        !
+        logical, intent(in) :: resOnly
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, sps
+
+        ! Set the arrays for the boundary condition treatment.
+
+        call bcTurbTreatment
+
+        ! Solve the transport equations for k and omega.
+
+        call SSTSolve(resOnly)
+
+        ! The eddy viscosity and the boundary conditions are only
+        ! applied if an actual update has been computed in SSTSolve.
+
+        if (.not. resOnly) then
+
+            ! Compute the corresponding eddy viscosity.
+
+            call SSTEddyViscosity(2, il, 2, jl, 2, kl)
+
+            ! Set the halo values for the turbulent variables.
+            ! We are on the finest mesh, so the second layer of halo
+            ! cells must be computed as well.
+
+            call applyAllTurbBCThisBlock(.true.)
+        end if
+
+    end subroutine SST_block
+
+    subroutine SSTSolve(resOnly)
+        !
+        !       SSTSolve solves the turbulent transport equations for
+        !       menter's SST variant of the k-omega model in a decoupled
+        !       manner using a diagonal dominant ADI-scheme.
+        !
+        use blockPointers
+        use constants
+        use flowVarRefState
+        use inputIteration
+        use inputPhysics
+        use paramTurb
+        use turbMod, only: dvt, vort, prod, kwCD, f1
+        use turbUtils, only: prodSmag2, prodWmag2, prodKatoLaunder, &
+                             turbAdvection, unsteadyTurbTerm, tdia3, kwCDterm
+        use turbCurveFits, only: curveTupYp
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: resOnly
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn
+
+        real(kind=realType) :: rSSTGam1, rSSTGam2, t1, t2
+        real(kind=realType) :: rSSTGam, rSSTBeta
+        real(kind=realType) :: rhoi, ss, spk, sdk
+        real(kind=realType) :: voli, volmi, volpi
+        real(kind=realType) :: xm, ym, zm, xp, yp, zp, xa, ya, za
+        real(kind=realType) :: ttm, ttp, mulm, mulp, muem, muep
+        real(kind=realType) :: rSSTSigkp1, rSSTSigk, rSSTSigkm1
+        real(kind=realType) :: rSSTSigwp1, rSSTSigw, rSSTSigwm1
+        real(kind=realType) :: c1m, c1p, c10, c2m, c2p, c20
+        real(kind=realType) :: b1, b2, c1, c2, d1, d2
+        real(kind=realType) :: utau, qs, uu, um, up, factor, rblank
+
+        real(kind=realType), dimension(itu1:itu2) :: tup
+
+        real(kind=realType), dimension(2:il, 2:jl, 2:kl, 2, 2) :: qq
+        real(kind=realType), dimension(2, 2:max(il, jl, kl)) :: bb, dd, ff
+        real(kind=realType), dimension(2, 2, 2:max(il, jl, kl)) :: cc
+
+        real(kind=realType), dimension(:, :, :), pointer :: ddw, ww, ddvt
+        real(kind=realType), dimension(:, :), pointer :: rrlv
+        real(kind=realType), dimension(:, :), pointer :: dd2Wall
+
+        logical, dimension(2:jl, 2:kl), target :: flagI2, flagIl
+        logical, dimension(2:il, 2:kl), target :: flagJ2, flagJl
+        logical, dimension(2:il, 2:jl), target :: flagK2, flagKl
+
+        logical, dimension(:, :), pointer :: flag
+
+        ! Set model constants
+
+        rSSTGam1 = rSSTBeta1 / rSSTBetas &
+                   - rSSTSigw1 * rSSTK * rSSTK / sqrt(rSSTBetas)
+        rSSTGam2 = rSSTBeta2 / rSSTBetas &
+                   - rSSTSigw2 * rSSTK * rSSTK / sqrt(rSSTBetas)
+
+        ! Set a couple of pointers to the correct entries in dw to
+        ! make the code more readable.
+
+        dvt => scratch(1:, 1:, 1:, idvt:)
+        prod => scratch(1:, 1:, 1:, iprod)
+        vort => prod
+        kwCD => scratch(1:, 1:, 1:, icd)
+        f1 => scratch(1:, 1:, 1:, if1SST)
+        !
+        !       Production term.
+        !
+        select case (turbProd)
+        case (strain)
+            call prodSmag2
+
+        case (vorticity)
+            call prodWmag2
+
+        case (katoLaunder)
+            call prodKatoLaunder
+
+        end select
+        !
+        !       Source terms.
+        !       Determine the source term and its derivative w.r.t. k and
+        !       omega for all internal cells of the block.
+        !       Note that the blending function f1 and the cross diffusion
+        !       were computed earlier in f1SST.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the blended value of rSSTGam and rSSTBeta,
+                    ! which occur in the production terms of k and omega.
+
+                    t1 = f1(i, j, k); t2 = one - t1
+                    rSSTGam = t1 * rSSTGam1 + t2 * rSSTGam2
+                    rSSTBeta = t1 * rSSTBeta1 + t2 * rSSTBeta2
+
+                    ! Compute the source terms for both the k and the omega
+                    ! equation. Note that dw(i,j,k,iprod) currently contains the
+                    ! unscaled source term. Furthermore the production term of
+                    ! k is limited to a certain times the destruction term.
+
+                    rhoi = one / w(i, j, k, irho)
+                    ss = prod(i, j, k)
+                    spk = rev(i, j, k) * ss * rhoi
+                    sdk = rSSTBetas * w(i, j, k, itu1) * w(i, j, k, itu2)
+                    spk = min(spk, pklim * sdk)
+
+                    dvt(i, j, k, 1) = spk - sdk
+                    dvt(i, j, k, 2) = rSSTGam * ss + two * t2 * rSSTSigw2 * kwCD(i, j, k) &
+                                      - rSSTBeta * w(i, j, k, itu2)**2
+
+                    ! Compute the source term jacobian. Note that only the
+                    ! destruction terms are linearized to increase the diagonal
+                    ! dominance of the matrix. Furthermore minus the source
+                    ! term jacobian is stored.
+
+                    qq(i, j, k, 1, 1) = rSSTBetas * w(i, j, k, itu2)
+                    qq(i, j, k, 1, 2) = zero
+                    qq(i, j, k, 2, 1) = zero
+                    qq(i, j, k, 2, 2) = two * rSSTBeta * w(i, j, k, itu2)
+
+                end do
+            end do
+        end do
+        !
+        !       Advection and unsteady terms.
+        !
+        nn = itu1 - 1
+        call turbAdvection(2_intType, 2_intType, nn, qq)
+
+        call unsteadyTurbTerm(2_intType, 2_intType, nn, qq)
+        !
+        !       Viscous terms in k-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in zeta-direction, i.e. along the
+                    ! line k = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
+
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Compute the blended diffusion coefficients for k-1,
+                    ! k and k+1.
+
+                    t1 = f1(i, j, k + 1); t2 = one - t1
+                    rSSTSigkp1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwp1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i, j, k); t2 = one - t1
+                    rSSTSigk = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigw = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i, j, k - 1); t2 = one - t1
+                    rSSTSigkm1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwm1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    ! Computation of the viscous terms in zeta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in zeta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/dzeta derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and omega.
+
+                    ! First the k-term.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j, k - 1) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j, k + 1) + rlv(i, j, k))
+                    muem = half * (rSSTSigkm1 * rev(i, j, k - 1) + rSSTSigk * rev(i, j, k))
+                    muep = half * (rSSTSigkp1 * rev(i, j, k + 1) + rSSTSigk * rev(i, j, k))
+
+                    c1m = ttm * (mulm + muem) * rhoi
+                    c1p = ttp * (mulp + muep) * rhoi
+                    c10 = c1m + c1p
+
+                    ! And the omega term.
+
+                    muem = half * (rSSTSigwm1 * rev(i, j, k - 1) + rSSTSigw * rev(i, j, k))
+                    muep = half * (rSSTSigwp1 * rev(i, j, k + 1) + rSSTSigw * rev(i, j, k))
+
+                    c2m = ttm * (mulm + muem) * rhoi
+                    c2p = ttp * (mulp + muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i, j, k - 1, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i, j, k + 1, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i, j, k - 1, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i, j, k + 1, itu2)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (k == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmtk1(i, j, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmtk1(i, j, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmtk1(i, j, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmtk1(i, j, itu2, itu2), zero)
+                    else if (k == kl) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmtk2(i, j, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmtk2(i, j, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmtk2(i, j, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmtk2(i, j, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in j-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in eta-direction, i.e. along the
+                    ! line j = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
+
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Compute the blended diffusion coefficients for j-1,
+                    ! j and j+1.
+
+                    t1 = f1(i, j + 1, k); t2 = one - t1
+                    rSSTSigkp1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwp1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i, j, k); t2 = one - t1
+                    rSSTSigk = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigw = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i, j - 1, k); t2 = one - t1
+                    rSSTSigkm1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwm1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    ! Computation of the viscous terms in eta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in eta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/deta derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and omega.
+
+                    ! First the k-term.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j - 1, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j + 1, k) + rlv(i, j, k))
+                    muem = half * (rSSTSigkm1 * rev(i, j - 1, k) + rSSTSigk * rev(i, j, k))
+                    muep = half * (rSSTSigkp1 * rev(i, j + 1, k) + rSSTSigk * rev(i, j, k))
+
+                    c1m = ttm * (mulm + muem) * rhoi
+                    c1p = ttp * (mulp + muep) * rhoi
+                    c10 = c1m + c1p
+
+                    ! And the omega term.
+
+                    muem = half * (rSSTSigwm1 * rev(i, j - 1, k) + rSSTSigw * rev(i, j, k))
+                    muep = half * (rSSTSigwp1 * rev(i, j + 1, k) + rSSTSigw * rev(i, j, k))
+
+                    c2m = ttm * (mulm + muem) * rhoi
+                    c2p = ttp * (mulp + muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i, j - 1, k, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i, j + 1, k, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i, j - 1, k, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i, j + 1, k, itu2)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (j == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmtj1(i, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmtj1(i, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmtj1(i, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmtj1(i, k, itu2, itu2), zero)
+                    else if (j == jl) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmtj2(i, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmtj2(i, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmtj2(i, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmtj2(i, k, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in xi-direction, i.e. along the
+                    ! line i = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
+
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Compute the blended diffusion coefficients for i-1,
+                    ! i and i+1.
+
+                    t1 = f1(i + 1, j, k); t2 = one - t1
+                    rSSTSigkp1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwp1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i, j, k); t2 = one - t1
+                    rSSTSigk = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigw = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i - 1, j, k); t2 = one - t1
+                    rSSTSigkm1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwm1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    ! Computation of the viscous terms in xi-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in xi-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/dxi derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and omega.
+
+                    ! First the k-term.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i - 1, j, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i + 1, j, k) + rlv(i, j, k))
+                    muem = half * (rSSTSigkm1 * rev(i - 1, j, k) + rSSTSigk * rev(i, j, k))
+                    muep = half * (rSSTSigkp1 * rev(i + 1, j, k) + rSSTSigk * rev(i, j, k))
+
+                    c1m = ttm * (mulm + muem) * rhoi
+                    c1p = ttp * (mulp + muep) * rhoi
+                    c10 = c1m + c1p
+
+                    ! And the omega term.
+
+                    muem = half * (rSSTSigwm1 * rev(i - 1, j, k) + rSSTSigw * rev(i, j, k))
+                    muep = half * (rSSTSigwp1 * rev(i + 1, j, k) + rSSTSigw * rev(i, j, k))
+
+                    c2m = ttm * (mulm + muem) * rhoi
+                    c2p = ttp * (mulp + muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i - 1, j, k, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i + 1, j, k, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i - 1, j, k, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i + 1, j, k, itu2)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (i == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmti1(j, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmti1(j, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmti1(j, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmti1(j, k, itu2, itu2), zero)
+                    else if (i == il) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmti2(j, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmti2(j, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmti2(j, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmti2(j, k, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+
+        ! Multiply the residual by the volume and store this in dw; this
+        ! is done for monitoring reasons only. The multiplication with the
+        ! volume is present to be consistent with the flow residuals; also
+        ! the negative value is taken, again to be consistent with the
+        ! flow equations. Also multiply by iblank so that no updates occur
+        ! in holes or the overset boundary.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    rblank = real(iblank(i, j, k), realType)
+                    dw(i, j, k, itu1) = -volRef(i, j, k) * dvt(i, j, k, 1) * rblank
+                    dw(i, j, k, itu2) = -volRef(i, j, k) * dvt(i, j, k, 2) * rblank
+                end do
+            end do
+        end do
+
+        ! Initialize the wall function flags to .false.
+
+        flagI2 = .false.
+        flagIl = .false.
+        flagJ2 = .false.
+        flagJl = .false.
+        flagK2 = .false.
+        flagKl = .false.
+
+        ! Modify the rhs of the 1st internal cell, if wall functions
+        ! are used; their value is determined by the table.
+
+        testWallFunctions: if (wallFunctions) then
+
+            bocos: do nn = 1, nViscBocos
+
+                ! Determine the block face on which the subface is located
+                ! and set some variables. As flag points to the entire array
+                ! flagI2, etc., its starting indices are the starting indices
+                ! of its target and not 1.
+
+                select case (BCFaceID(nn))
+                case (iMin)
+                    flag => flagI2
+                    ddw => dw(2, 1:, 1:, 1:); ddvt => dvt(2, 1:, 1:, 1:)
+                    ww => w(2, 1:, 1:, 1:); rrlv => rlv(2, 1:, 1:)
+                    dd2Wall => d2Wall(2, :, :)
+
+                case (iMax)
+                    flag => flagIl
+                    ddw => dw(il, 1:, 1:, 1:); ddvt => dvt(il, 1:, 1:, 1:)
+                    ww => w(il, 1:, 1:, 1:); rrlv => rlv(il, 1:, 1:)
+                    dd2Wall => d2Wall(il, :, :)
+
+                case (jMin)
+                    flag => flagJ2
+                    ddw => dw(1:, 2, 1:, 1:); ddvt => dvt(1:, 2, 1:, 1:)
+                    ww => w(1:, 2, 1:, 1:); rrlv => rlv(1:, 2, 1:)
+                    dd2Wall => d2Wall(:, 2, :)
+
+                case (jMax)
+                    flag => flagJl
+                    ddw => dw(1:, jl, 1:, 1:); ddvt => dvt(1:, jl, 1:, 1:)
+                    ww => w(1:, jl, 1:, 1:); rrlv => rlv(1:, jl, 1:)
+                    dd2Wall => d2Wall(:, jl, :)
+
+                case (kMin)
+                    flag => flagK2
+                    ddw => dw(1:, 1:, 2, 1:); ddvt => dvt(1:, 1:, 2, 1:)
+                    ww => w(1:, 1:, 2, 1:); rrlv => rlv(1:, 1:, 2)
+                    dd2Wall => d2Wall(:, :, 2)
+
+                case (kMax)
+                    flag => flagKl
+                    ddw => dw(1:, 1:, kl, 1:); ddvt => dvt(1:, 1:, kl, 1:)
+                    ww => w(1:, 1:, kl, 1:); rrlv => rlv(1:, 1:, kl)
+                    dd2Wall => d2Wall(:, :, kl)
+
+                end select
+
+                ! Loop over the owned faces of this subface. Therefore the
+                ! nodal range of BCData must be used. The offset of +1 is
+                ! present, because the starting index of the cell range is
+                ! 1 larger than the starting index of the nodal range.
+
+                do j = (BCData(nn)%jnBeg + 1), BCData(nn)%jnEnd
+                    do i = (BCData(nn)%inBeg + 1), BCData(nn)%inEnd
+
+                        ! Set ddw to zero.
+
+                        ddw(i, j, itu1) = zero
+                        ddw(i, j, itu2) = zero
+
+                        ! Enforce k and omega in the 1st internal cell from
+                        ! the wall function table. There is an offset of -1 in
+                        ! the wall distance. Note that the offset compared to
+                        ! the current value must be stored. Also note that the
+                        ! curve fits contain the non-dimensional values.
+
+                        utau = viscSubface(nn)%utau(i, j)
+                        yp = ww(i, j, irho) * dd2Wall(i - 1, j - 1) * utau / rrlv(i, j)
+
+                        call curveTupYp(tup, yp, itu1, itu2)
+
+                        tup(itu1) = tup(itu1) * utau**2
+                        tup(itu2) = tup(itu2) * utau**2 / rrlv(i, j) * ww(i, j, irho)
+
+                        ddvt(i, j, 1) = tup(itu1) - ww(i, j, itu1)
+                        ddvt(i, j, 2) = tup(itu2) - ww(i, j, itu2)
+
+                        ! Set the wall flag to .true.
+
+                        flag(i, j) = .true.
+
+                    end do
+                end do
+
+            end do bocos
+        end if testWallFunctions
+
+        ! Return if only the residual must be computed.
+
+        if (resOnly) return
+
+        ! For implicit relaxation take the local time step into account,
+        ! where dt is the inverse of the central jacobian times the cfl
+        ! number. The following system is solved:
+        ! (I/dt + cc + bb + dd)*dw = rhs, in which I/dt = cc/cfl. As in
+        ! the rest of the algorithm only the modified central jacobian is
+        ! used, stored it now.
+
+        ! Compute the factor multiplying the central jacobian, which
+        ! is 1 + 1/cfl (implicit relaxation only).
+
+        factor = one
+        if (turbRelax == turbRelaxImplicit) &
+            factor = one + (one - alfaTurb) / alfaTurb
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    qq(i, j, k, 1, 1) = factor * qq(i, j, k, 1, 1)
+                    qq(i, j, k, 1, 2) = factor * qq(i, j, k, 1, 2)
+                    qq(i, j, k, 2, 1) = factor * qq(i, j, k, 2, 1)
+                    qq(i, j, k, 2, 2) = factor * qq(i, j, k, 2, 2)
+
+                    ! Set qq to i if the value is determined by the table.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        qq(i, j, k, 1, 1) = one
+                        qq(i, j, k, 1, 2) = zero
+                        qq(i, j, k, 2, 1) = zero
+                        qq(i, j, k, 2, 2) = one
+                    end if
+                end do
+            end do
+        end do
+
+        ! Initialize the grid velocity to zero. This value will be used
+        ! if the block is not moving.
+
+        qs = zero
+        !
+        !       dd-ADI step in j-direction. There is no particular reason to
+        !       start in j-direction, it just happened to be so. As we solve
+        !       in j-direction, the j-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do i = 2, il
+                do j = 2, jl
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the j-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
+
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Compute the blended diffusion coefficients for j-1,
+                    ! j and j+1.
+
+                    t1 = f1(i, j + 1, k); t2 = one - t1
+                    rSSTSigkp1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwp1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i, j, k); t2 = one - t1
+                    rSSTSigk = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigw = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i, j - 1, k); t2 = one - t1
+                    rSSTSigkm1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwm1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in j-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j - 1, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j + 1, k) + rlv(i, j, k))
+                    muem = half * (rSSTSigkm1 * rev(i, j - 1, k) + rSSTSigk * rev(i, j, k))
+                    muep = half * (rSSTSigkp1 * rev(i, j + 1, k) + rSSTSigk * rev(i, j, k))
+
+                    c1m = ttm * (mulm + muem) * rhoi
+                    c1p = ttp * (mulp + muep) * rhoi
+
+                    muem = half * (rSSTSigwm1 * rev(i, j - 1, k) + rSSTSigw * rev(i, j, k))
+                    muep = half * (rSSTSigwp1 * rev(i, j + 1, k) + rSSTSigw * rev(i, j, k))
+
+                    c2m = ttm * (mulm + muem) * rhoi
+                    c2p = ttp * (mulp + muep) * rhoi
+
+                    bb(1, j) = -c1m
+                    dd(1, j) = -c1p
+                    bb(2, j) = -c2m
+                    dd(2, j) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of j and j-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceJ(i, j, k) + sFaceJ(i, j - 1, k)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! j-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, j) = bb(1, j) - up
+                    dd(1, j) = dd(1, j) + um
+                    bb(2, j) = bb(2, j) - up
+                    dd(2, j) = dd(2, j) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, j) = qq(i, j, k, 1, 1)
+                    cc(1, 2, j) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, j) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, j) = qq(i, j, k, 2, 2)
+
+                    ff(1, j) = dvt(i, j, k, 1) * rblank
+                    ff(2, j) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, j) = bb(:, j) * rblank
+                    dd(:, j) = dd(:, j) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, j) = zero
+                        dd(1, j) = zero
+                        bb(2, j) = zero
+                        dd(2, j) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in j-direction.
+
+                call tdia3(2_intType, jl, bb, cc, dd, ff)
+
+                ! Determine the new rhs for the next direction.
+
+                do j = 2, jl
+                    dvt(i, j, k, 1) = qq(i, j, k, 1, 1) * ff(1, j) + qq(i, j, k, 1, 2) * ff(2, j)
+                    dvt(i, j, k, 2) = qq(i, j, k, 2, 1) * ff(1, j) + qq(i, j, k, 2, 2) * ff(2, j)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-ADI step in i-direction. As we solve in i-direction, the
+        !       i-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the i-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
+
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Compute the blended diffusion coefficients for i-1,
+                    ! i and i+1.
+
+                    t1 = f1(i + 1, j, k); t2 = one - t1
+                    rSSTSigkp1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwp1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i, j, k); t2 = one - t1
+                    rSSTSigk = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigw = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i - 1, j, k); t2 = one - t1
+                    rSSTSigkm1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwm1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in i-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i - 1, j, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i + 1, j, k) + rlv(i, j, k))
+                    muem = half * (rSSTSigkm1 * rev(i - 1, j, k) + rSSTSigk * rev(i, j, k))
+                    muep = half * (rSSTSigkp1 * rev(i + 1, j, k) + rSSTSigk * rev(i, j, k))
+
+                    c1m = ttm * (mulm + muem) * rhoi
+                    c1p = ttp * (mulp + muep) * rhoi
+
+                    muem = half * (rSSTSigwm1 * rev(i - 1, j, k) + rSSTSigw * rev(i, j, k))
+                    muep = half * (rSSTSigwp1 * rev(i + 1, j, k) + rSSTSigw * rev(i, j, k))
+
+                    c2m = ttm * (mulm + muem) * rhoi
+                    c2p = ttp * (mulp + muep) * rhoi
+                    c20 = c2m + c2p
+
+                    bb(1, i) = -c1m
+                    dd(1, i) = -c1p
+                    bb(2, i) = -c2m
+                    dd(2, i) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of i and i-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceI(i, j, k) + sFaceI(i - 1, j, k)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! i-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, i) = bb(1, i) - up
+                    dd(1, i) = dd(1, i) + um
+                    bb(2, i) = bb(2, i) - up
+                    dd(2, i) = dd(2, i) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, i) = qq(i, j, k, 1, 1)
+                    cc(1, 2, i) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, i) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, i) = qq(i, j, k, 2, 2)
+
+                    ff(1, i) = dvt(i, j, k, 1) * rblank
+                    ff(2, i) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, i) = bb(:, i) * rblank
+                    dd(:, i) = dd(:, i) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, i) = zero
+                        dd(1, i) = zero
+                        bb(2, i) = zero
+                        dd(2, i) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in i-direction.
+
+                call tdia3(2_intType, il, bb, cc, dd, ff)
+
+                ! Determine the new rhs for the next direction.
+
+                do i = 2, il
+                    dvt(i, j, k, 1) = qq(i, j, k, 1, 1) * ff(1, i) + qq(i, j, k, 1, 2) * ff(2, i)
+                    dvt(i, j, k, 2) = qq(i, j, k, 2, 1) * ff(1, i) + qq(i, j, k, 2, 2) * ff(2, i)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-ADI step in k-direction. As we solve in k-direction, the
+        !       k-loop is the innermost loop.
+        !
+        do j = 2, jl
+            do i = 2, il
+                do k = 2, kl
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the k-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
+
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Compute the blended diffusion coefficients for k-1,
+                    ! k and k+1.
+
+                    t1 = f1(i, j, k + 1); t2 = one - t1
+                    rSSTSigkp1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwp1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i, j, k); t2 = one - t1
+                    rSSTSigk = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigw = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    t1 = f1(i, j, k - 1); t2 = one - t1
+                    rSSTSigkm1 = t1 * rSSTSigk1 + t2 * rSSTSigk2
+                    rSSTSigwm1 = t1 * rSSTSigw1 + t2 * rSSTSigw2
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in k-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j, k - 1) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j, k + 1) + rlv(i, j, k))
+                    muem = half * (rSSTSigkm1 * rev(i, j, k - 1) + rSSTSigk * rev(i, j, k))
+                    muep = half * (rSSTSigkp1 * rev(i, j, k + 1) + rSSTSigk * rev(i, j, k))
+
+                    c1m = ttm * (mulm + muem) * rhoi
+                    c1p = ttp * (mulp + muep) * rhoi
+
+                    muem = half * (rSSTSigwm1 * rev(i, j, k - 1) + rSSTSigw * rev(i, j, k))
+                    muep = half * (rSSTSigwp1 * rev(i, j, k + 1) + rSSTSigw * rev(i, j, k))
+
+                    c2m = ttm * (mulm + muem) * rhoi
+                    c2p = ttp * (mulp + muep) * rhoi
+
+                    bb(1, k) = -c1m
+                    dd(1, k) = -c1p
+                    bb(2, k) = -c2m
+                    dd(2, k) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of k and k-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceK(i, j, k) + sFaceK(i, j, k - 1)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! k-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, k) = bb(1, k) - up
+                    dd(1, k) = dd(1, k) + um
+                    bb(2, k) = bb(2, k) - up
+                    dd(2, k) = dd(2, k) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, k) = qq(i, j, k, 1, 1)
+                    cc(1, 2, k) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, k) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, k) = qq(i, j, k, 2, 2)
+
+                    ff(1, k) = dvt(i, j, k, 1) * rblank
+                    ff(2, k) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, k) = bb(:, k) * rblank
+                    dd(:, k) = dd(:, k) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, k) = zero
+                        dd(1, k) = zero
+                        bb(2, k) = zero
+                        dd(2, k) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in k-direction.
+
+                call tdia3(2_intType, kl, bb, cc, dd, ff)
+
+                ! Store the update in dvt.
+
+                do k = 2, kl
+                    dvt(i, j, k, 1) = ff(1, k)
+                    dvt(i, j, k, 2) = ff(2, k)
+                end do
 
-  end subroutine SSTSolve
+            end do
+        end do
+        !
+        !       Update the turbulent variables. For explicit relaxation the
+        !       update must be relaxed; for implicit relaxation this has been
+        !       done via the time step.
+        !
+        factor = one
+        if (turbRelax == turbRelaxExplicit) factor = alfaTurb
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    w(i, j, k, itu1) = w(i, j, k, itu1) + factor * dvt(i, j, k, 1)
+                    w(i, j, k, itu1) = max(w(i, j, k, itu1), zero)
+
+                    w(i, j, k, itu2) = w(i, j, k, itu2) + factor * dvt(i, j, k, 2)
+                    w(i, j, k, itu2) = max(w(i, j, k, itu2), 1.e-5_realType * wInf(itu2))
+                end do
+            end do
+        end do
 
-  subroutine f1SST
-    !
-    !       f1SST computes the blending function f1 in both the owned
-    !       cells and the first layer of halo's. The result is stored in
-    !       scratch(:,:,:,if1SST). For the computation of f1 also the cross
-    !       diffusion term is needed. This is stored in scratch(:,:,:,icd) such
-    !       that it can be used in SSTSolve later on.
-    !
-    use constants
-    use blockPointers
-    use inputTimeSpectral
-    use iteration
-    use turbMod
-    use utils, only : setPointers
-    use turbUtils, only :kwCDTerm
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: sps, nn, mm, i, j, k
+    end subroutine SSTSolve
 
-    real(kind=realType) :: t1, t2, arg1
+    subroutine f1SST
+        !
+        !       f1SST computes the blending function f1 in both the owned
+        !       cells and the first layer of halo's. The result is stored in
+        !       scratch(:,:,:,if1SST). For the computation of f1 also the cross
+        !       diffusion term is needed. This is stored in scratch(:,:,:,icd) such
+        !       that it can be used in SSTSolve later on.
+        !
+        use constants
+        use blockPointers
+        use inputTimeSpectral
+        use iteration
+        use turbMod
+        use utils, only: setPointers
+        use turbUtils, only: kwCDTerm
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: sps, nn, mm, i, j, k
 
-    ! First part. Compute the values of the blending function f1
-    ! for each block and spectral solution.
+        real(kind=realType) :: t1, t2, arg1
 
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domains: do mm=1,nDom
+        ! First part. Compute the values of the blending function f1
+        ! for each block and spectral solution.
 
-          ! Set the pointers to this block.
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domains: do mm = 1, nDom
 
-          call setPointers(mm, currentLevel, sps)
+                ! Set the pointers to this block.
 
-          ! Set the pointers for f1 and kwCD to the correct entries
-          ! in scratch which are currently not used.
+                call setPointers(mm, currentLevel, sps)
 
-          f1   => scratch(1:,1:,1:,if1SST)
-          kwCD => scratch(1:,1:,1:,icd)
+                ! Set the pointers for f1 and kwCD to the correct entries
+                ! in scratch which are currently not used.
 
-          ! Compute the cross diffusion term.
+                f1 => scratch(1:, 1:, 1:, if1SST)
+                kwCD => scratch(1:, 1:, 1:, icd)
 
-          call kwCDterm
+                ! Compute the cross diffusion term.
 
-          ! Compute the blending function f1 for all owned cells.
+                call kwCDterm
 
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
+                ! Compute the blending function f1 for all owned cells.
 
-                   t1 = sqrt(w(i,j,k,itu1)) &
-                        / (0.09_realType*w(i,j,k,itu2)*d2Wall(i,j,k))
-                   t2 = 500.0_realType*rlv(i,j,k) &
-                        / (w(i,j,k,irho)*w(i,j,k,itu2)*d2Wall(i,j,k)**2)
-                   t1 = max(t1,t2)
-                   t2 = two*w(i,j,k,itu1)&
-                        / (max(eps,kwCD(i,j,k))*d2Wall(i,j,k)**2)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
 
-                   arg1      = min(t1,t2)
-                   f1(i,j,k) = tanh(arg1**4)
+                            t1 = sqrt(w(i, j, k, itu1)) &
+                                 / (0.09_realType * w(i, j, k, itu2) * d2Wall(i, j, k))
+                            t2 = 500.0_realType * rlv(i, j, k) &
+                                 / (w(i, j, k, irho) * w(i, j, k, itu2) * d2Wall(i, j, k)**2)
+                            t1 = max(t1, t2)
+                            t2 = two * w(i, j, k, itu1) &
+                                 / (max(eps, kwCD(i, j, k)) * d2Wall(i, j, k)**2)
 
-                enddo
-             enddo
-          enddo
+                            arg1 = min(t1, t2)
+                            f1(i, j, k) = tanh(arg1**4)
 
-          ! Loop over the boundary conditions to set f1 in the boundary
-          ! halo's. A Neumann boundary condition is used for all BC's.
+                        end do
+                    end do
+                end do
 
-          bocos: do nn=1,nBocos
+                ! Loop over the boundary conditions to set f1 in the boundary
+                ! halo's. A Neumann boundary condition is used for all BC's.
 
-             ! Determine the face on which this subface is located, loop
-             ! over its range and copy f1. Although the range may include
-             ! indirect halo's which are not computed, this is no problem,
-             ! because in SSTSolve only direct halo's are used.
+                bocos: do nn = 1, nBocos
 
-             select case (BCFaceID(nn))
+                    ! Determine the face on which this subface is located, loop
+                    ! over its range and copy f1. Although the range may include
+                    ! indirect halo's which are not computed, this is no problem,
+                    ! because in SSTSolve only direct halo's are used.
 
-             case (iMin)
-                do k=kcBeg(nn),kcEnd(nn)
-                   do j=jcBeg(nn),jcEnd(nn)
-                      f1(1,j,k) = f1(2,j,k)
-                   enddo
-                enddo
+                    select case (BCFaceID(nn))
 
-                !              ==========================================================
+                    case (iMin)
+                        do k = kcBeg(nn), kcEnd(nn)
+                            do j = jcBeg(nn), jcEnd(nn)
+                                f1(1, j, k) = f1(2, j, k)
+                            end do
+                        end do
 
-             case (iMax)
+                        !              ==========================================================
 
-                do k=kcBeg(nn),kcEnd(nn)
-                   do j=jcBeg(nn),jcEnd(nn)
-                      f1(ie,j,k) = f1(il,j,k)
-                   enddo
-                enddo
+                    case (iMax)
 
-                !              ==========================================================
+                        do k = kcBeg(nn), kcEnd(nn)
+                            do j = jcBeg(nn), jcEnd(nn)
+                                f1(ie, j, k) = f1(il, j, k)
+                            end do
+                        end do
 
-             case (jMin)
+                        !              ==========================================================
 
-                do k=kcBeg(nn),kcEnd(nn)
-                   do i=icBeg(nn),icEnd(nn)
-                      f1(i,1,k) = f1(i,2,k)
-                   enddo
-                enddo
+                    case (jMin)
 
-                !              ==========================================================
+                        do k = kcBeg(nn), kcEnd(nn)
+                            do i = icBeg(nn), icEnd(nn)
+                                f1(i, 1, k) = f1(i, 2, k)
+                            end do
+                        end do
 
-             case (jMax)
+                        !              ==========================================================
 
-                do k=kcBeg(nn),kcEnd(nn)
-                   do i=icBeg(nn),icEnd(nn)
-                      f1(i,je,k) = f1(i,jl,k)
-                   enddo
-                enddo
+                    case (jMax)
 
-                !              ==========================================================
+                        do k = kcBeg(nn), kcEnd(nn)
+                            do i = icBeg(nn), icEnd(nn)
+                                f1(i, je, k) = f1(i, jl, k)
+                            end do
+                        end do
 
-             case (kMin)
+                        !              ==========================================================
 
-                do j=jcBeg(nn),jcEnd(nn)
-                   do i=icBeg(nn),icEnd(nn)
-                      f1(i,j,1) = f1(i,j,2)
-                   enddo
-                enddo
+                    case (kMin)
 
-                !              ==========================================================
+                        do j = jcBeg(nn), jcEnd(nn)
+                            do i = icBeg(nn), icEnd(nn)
+                                f1(i, j, 1) = f1(i, j, 2)
+                            end do
+                        end do
 
-             case (kMax)
+                        !              ==========================================================
 
-                do j=jcBeg(nn),jcEnd(nn)
-                   do i=icBeg(nn),icEnd(nn)
-                      f1(i,j,ke) = f1(i,j,kl)
-                   enddo
-                enddo
+                    case (kMax)
 
-             end select
+                        do j = jcBeg(nn), jcEnd(nn)
+                            do i = icBeg(nn), icEnd(nn)
+                                f1(i, j, ke) = f1(i, j, kl)
+                            end do
+                        end do
 
-          enddo bocos
+                    end select
 
-       enddo domains
-    enddo spectralLoop
+                end do bocos
 
-    ! Exchange the values of f1.
+            end do domains
+        end do spectralLoop
 
-    call exchangeF1SST1to1
-    call exchangeF1SSTOverset
+        ! Exchange the values of f1.
 
-  end subroutine f1SST
+        call exchangeF1SST1to1
+        call exchangeF1SSTOverset
 
-  !      ==================================================================
+    end subroutine f1SST
 
-  subroutine exchangeF1SST1to1
-    !
-    !       exchangeF1SST1to1 communicates the 1st layer of halo values
-    !       for the blending function f1 of the SST model for 1 to 1
-    !       matching halo's. This variable is stored in scratch(:,:,:,if1SST).
-    !
-    use constants
-    use block
-    use communication
-    use inputTimeSpectral
-    use iteration
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
+    !      ==================================================================
 
-    integer(kind=intType) :: i, j, ii, jj, sps, ll
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+    subroutine exchangeF1SST1to1
+        !
+        !       exchangeF1SST1to1 communicates the 1st layer of halo values
+        !       for the blending function f1 of the SST model for 1 to 1
+        !       matching halo's. This variable is stored in scratch(:,:,:,if1SST).
+        !
+        use constants
+        use block
+        use communication
+        use inputTimeSpectral
+        use iteration
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
 
-    ! Easier storage of the current mg level.
+        integer(kind=intType) :: i, j, ii, jj, sps, ll
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
 
-    ll = currentLevel
+        ! Easier storage of the current mg level.
 
-    ! Loop over the number of spectral solutions.
+        ll = currentLevel
 
-    spectralModes: do sps=1,nTimeIntervalsSpectral
+        ! Loop over the number of spectral solutions.
 
-       ii = 1
-       sends: do i=1,commPatternCell_1st(ll)%nProcSend
+        spectralModes: do sps = 1, nTimeIntervalsSpectral
 
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+            ii = 1
+            sends: do i = 1, commPatternCell_1st(ll)%nProcSend
 
-          procID = commPatternCell_1st(ll)%sendProc(i)
-          size   = commPatternCell_1st(ll)%nSend(i)
+                ! Store the processor id and the size of the message
+                ! a bit easier.
 
-          ! Copy the data in the correct part of the send buffer.
+                procID = commPatternCell_1st(ll)%sendProc(i)
+                size = commPatternCell_1st(ll)%nSend(i)
 
-          jj = ii
-          do j=1,commPatternCell_1st(ll)%nSend(i)
+                ! Copy the data in the correct part of the send buffer.
 
-             ! Store the block id and the indices of the donor a
-             !  bit easier.
+                jj = ii
+                do j = 1, commPatternCell_1st(ll)%nSend(i)
 
-             d1 = commPatternCell_1st(ll)%sendList(i)%block(j)
-             i1 = commPatternCell_1st(ll)%sendList(i)%indices(j,1)
-             j1 = commPatternCell_1st(ll)%sendList(i)%indices(j,2)
-             k1 = commPatternCell_1st(ll)%sendList(i)%indices(j,3)
+                    ! Store the block id and the indices of the donor a
+                    !  bit easier.
 
-             ! Store the value of f1 in the send buffer. Note that the
-             ! level is 1 and not ll (= currentLevel).
+                    d1 = commPatternCell_1st(ll)%sendList(i)%block(j)
+                    i1 = commPatternCell_1st(ll)%sendList(i)%indices(j, 1)
+                    j1 = commPatternCell_1st(ll)%sendList(i)%indices(j, 2)
+                    k1 = commPatternCell_1st(ll)%sendList(i)%indices(j, 3)
 
-             sendBuffer(jj) = flowDoms(d1,1,sps)%scratch(i1,j1,k1,if1SST)
-             jj = jj + 1
+                    ! Store the value of f1 in the send buffer. Note that the
+                    ! level is 1 and not ll (= currentLevel).
 
-          enddo
+                    sendBuffer(jj) = flowDoms(d1, 1, sps)%scratch(i1, j1, k1, if1SST)
+                    jj = jj + 1
 
-          ! Send the data.
+                end do
 
-          call mpi_isend(sendBuffer(ii), size, adflow_real, procID,  &
-               procID, ADflow_comm_world, sendRequests(i), &
-               ierr)
+                ! Send the data.
 
-          ! Set ii to jj for the next processor.
+                call mpi_isend(sendBuffer(ii), size, adflow_real, procID, &
+                               procID, ADflow_comm_world, sendRequests(i), &
+                               ierr)
 
-          ii = jj
+                ! Set ii to jj for the next processor.
 
-       enddo sends
+                ii = jj
 
-       ! Post the nonblocking receives.
+            end do sends
 
-       ii = 1
-       receives: do i=1,commPatternCell_1st(ll)%nProcRecv
+            ! Post the nonblocking receives.
 
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+            ii = 1
+            receives: do i = 1, commPatternCell_1st(ll)%nProcRecv
 
-          procID = commPatternCell_1st(ll)%recvProc(i)
-          size   = commPatternCell_1st(ll)%nRecv(i)
+                ! Store the processor id and the size of the message
+                ! a bit easier.
 
-          ! Post the receive.
+                procID = commPatternCell_1st(ll)%recvProc(i)
+                size = commPatternCell_1st(ll)%nRecv(i)
 
-          call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
-               myID, ADflow_comm_world, recvRequests(i),  &
-               ierr)
+                ! Post the receive.
 
-          ! And update ii.
+                call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
+                               myID, ADflow_comm_world, recvRequests(i), &
+                               ierr)
 
-          ii = ii + size
+                ! And update ii.
 
-       enddo receives
+                ii = ii + size
 
-       ! Copy the local data.
+            end do receives
 
-       localCopy: do i=1,internalCell_1st(ll)%ncopy
+            ! Copy the local data.
 
-          ! Store the block and the indices of the donor a bit easier.
+            localCopy: do i = 1, internalCell_1st(ll)%ncopy
 
-          d1 = internalCell_1st(ll)%donorBlock(i)
-          i1 = internalCell_1st(ll)%donorIndices(i,1)
-          j1 = internalCell_1st(ll)%donorIndices(i,2)
-          k1 = internalCell_1st(ll)%donorIndices(i,3)
+                ! Store the block and the indices of the donor a bit easier.
 
-          ! Idem for the halo's.
+                d1 = internalCell_1st(ll)%donorBlock(i)
+                i1 = internalCell_1st(ll)%donorIndices(i, 1)
+                j1 = internalCell_1st(ll)%donorIndices(i, 2)
+                k1 = internalCell_1st(ll)%donorIndices(i, 3)
 
-          d2 = internalCell_1st(ll)%haloBlock(i)
-          i2 = internalCell_1st(ll)%haloIndices(i,1)
-          j2 = internalCell_1st(ll)%haloIndices(i,2)
-          k2 = internalCell_1st(ll)%haloIndices(i,3)
+                ! Idem for the halo's.
 
-          ! Copy the values. Note that level is 1 and not
-          ! ll (= currentLevel).
+                d2 = internalCell_1st(ll)%haloBlock(i)
+                i2 = internalCell_1st(ll)%haloIndices(i, 1)
+                j2 = internalCell_1st(ll)%haloIndices(i, 2)
+                k2 = internalCell_1st(ll)%haloIndices(i, 3)
 
-          flowDoms(d2,1,sps)%scratch(i2,j2,k2,if1SST) = &
-               flowDoms(d1,1,sps)%scratch(i1,j1,k1,if1SST)
+                ! Copy the values. Note that level is 1 and not
+                ! ll (= currentLevel).
 
-       enddo localCopy
+                flowDoms(d2, 1, sps)%scratch(i2, j2, k2, if1SST) = &
+                    flowDoms(d1, 1, sps)%scratch(i1, j1, k1, if1SST)
 
-       ! Complete the nonblocking receives in an arbitrary sequence and
-       ! copy the variables from the buffer into the halo's.
+            end do localCopy
 
-       size = commPatternCell_1st(ll)%nProcRecv
-       completeRecvs: do i=1,commPatternCell_1st(ll)%nProcRecv
+            ! Complete the nonblocking receives in an arbitrary sequence and
+            ! copy the variables from the buffer into the halo's.
 
-          ! Complete any of the requests.
+            size = commPatternCell_1st(ll)%nProcRecv
+            completeRecvs: do i = 1, commPatternCell_1st(ll)%nProcRecv
 
-          call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                ! Complete any of the requests.
 
-          ! Copy the data just arrived in the halo's.
+                call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
 
-          ii = index
-          jj = commPatternCell_1st(ll)%nRecvCum(ii-1) +1
-          do j=1,commPatternCell_1st(ll)%nRecv(ii)
+                ! Copy the data just arrived in the halo's.
 
-             ! Store the block and the indices of the halo a bit easier.
+                ii = index
+                jj = commPatternCell_1st(ll)%nRecvCum(ii - 1) + 1
+                do j = 1, commPatternCell_1st(ll)%nRecv(ii)
 
-             d2 = commPatternCell_1st(ll)%recvList(ii)%block(j)
-             i2 = commPatternCell_1st(ll)%recvList(ii)%indices(j,1)
-             j2 = commPatternCell_1st(ll)%recvList(ii)%indices(j,2)
-             k2 = commPatternCell_1st(ll)%recvList(ii)%indices(j,3)
+                    ! Store the block and the indices of the halo a bit easier.
 
-             ! And copy the data in the appropriate place in scratch. Note
-             ! that level == 1 and not ll (= currentLevel).
+                    d2 = commPatternCell_1st(ll)%recvList(ii)%block(j)
+                    i2 = commPatternCell_1st(ll)%recvList(ii)%indices(j, 1)
+                    j2 = commPatternCell_1st(ll)%recvList(ii)%indices(j, 2)
+                    k2 = commPatternCell_1st(ll)%recvList(ii)%indices(j, 3)
 
-             flowDoms(d2,1,sps)%scratch(i2,j2,k2,if1SST) = recvBuffer(jj)
-             jj = jj + 1
+                    ! And copy the data in the appropriate place in scratch. Note
+                    ! that level == 1 and not ll (= currentLevel).
 
-          enddo
+                    flowDoms(d2, 1, sps)%scratch(i2, j2, k2, if1SST) = recvBuffer(jj)
+                    jj = jj + 1
 
-       enddo completeRecvs
+                end do
 
-       ! Complete the nonblocking sends.
+            end do completeRecvs
 
-       size = commPatternCell_1st(ll)%nProcSend
-       do i=1,commPatternCell_1st(ll)%nProcSend
-          call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-       enddo
+            ! Complete the nonblocking sends.
 
-    enddo spectralModes
+            size = commPatternCell_1st(ll)%nProcSend
+            do i = 1, commPatternCell_1st(ll)%nProcSend
+                call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+            end do
 
-  end subroutine exchangeF1SST1to1
+        end do spectralModes
 
+    end subroutine exchangeF1SST1to1
 
-  subroutine exchangeF1SSTOverset
-    !
-    !       exchangeF1SSTOverset communicates the overset boundary values
-    !       for the blending function f1 of the SST model. This variable
-    !       is stored in scratch(:,:,:,if1SST).
-    use constants
-    use block
-    use communication
-    use inputTimeSpectral
-    use iteration
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
+    subroutine exchangeF1SSTOverset
+        !
+        !       exchangeF1SSTOverset communicates the overset boundary values
+        !       for the blending function f1 of the SST model. This variable
+        !       is stored in scratch(:,:,:,if1SST).
+        use constants
+        use block
+        use communication
+        use inputTimeSpectral
+        use iteration
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
 
-    integer(kind=intType) :: i, j, ii, jj, sps, ll
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+        integer(kind=intType) :: i, j, ii, jj, sps, ll
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
 
-    real(kind=realType), dimension(:), pointer :: weight
+        real(kind=realType), dimension(:), pointer :: weight
 
-    ! Easier storage of the current mg level.
+        ! Easier storage of the current mg level.
 
-    ll = currentLevel
+        ll = currentLevel
 
-    ! Loop over the number of spectral solutions.
+        ! Loop over the number of spectral solutions.
 
-    spectralModes: do sps=1,nTimeIntervalsSpectral
+        spectralModes: do sps = 1, nTimeIntervalsSpectral
 
-       ii = 1
-       sends: do i=1,commPatternOverset(ll,sps)%nProcSend
+            ii = 1
+            sends: do i = 1, commPatternOverset(ll, sps)%nProcSend
 
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+                ! Store the processor id and the size of the message
+                ! a bit easier.
 
-          procID = commPatternOverset(ll,sps)%sendProc(i)
-          size   = commPatternOverset(ll,sps)%nSend(i)
+                procID = commPatternOverset(ll, sps)%sendProc(i)
+                size = commPatternOverset(ll, sps)%nSend(i)
 
-          ! Copy the data in the correct part of the send buffer.
+                ! Copy the data in the correct part of the send buffer.
 
-          jj = ii
-          do j=1,commPatternOverset(ll,sps)%nSend(i)
+                jj = ii
+                do j = 1, commPatternOverset(ll, sps)%nSend(i)
 
-             ! Store the block id and the indices of the donor a
-             !  bit easier.
+                    ! Store the block id and the indices of the donor a
+                    !  bit easier.
 
-             d1 = commPatternOverset(ll,sps)%sendList(i)%block(j)
-             i1 = commPatternOverset(ll,sps)%sendList(i)%indices(j,1)
-             j1 = commPatternOverset(ll,sps)%sendList(i)%indices(j,2)
-             k1 = commPatternOverset(ll,sps)%sendList(i)%indices(j,3)
+                    d1 = commPatternOverset(ll, sps)%sendList(i)%block(j)
+                    i1 = commPatternOverset(ll, sps)%sendList(i)%indices(j, 1)
+                    j1 = commPatternOverset(ll, sps)%sendList(i)%indices(j, 2)
+                    k1 = commPatternOverset(ll, sps)%sendList(i)%indices(j, 3)
 
-             weight => commPatternOverset(ll,sps)%sendList(i)%interp(j,:)
+                    weight => commPatternOverset(ll, sps)%sendList(i)%interp(j, :)
 
-             ! Store the value of f1 in the send buffer. Note that the
-             ! level is 1 and not ll (= currentLevel).
+                    ! Store the value of f1 in the send buffer. Note that the
+                    ! level is 1 and not ll (= currentLevel).
 
-             sendBuffer(jj) = &
-                  weight(1)*flowDoms(d1,1,sps)%scratch(i1  ,j1  ,k1  ,if1SST) + &
-                  weight(2)*flowDoms(d1,1,sps)%scratch(i1+1,j1  ,k1  ,if1SST) + &
-                  weight(3)*flowDoms(d1,1,sps)%scratch(i1  ,j1+1,k1  ,if1SST) + &
-                  weight(4)*flowDoms(d1,1,sps)%scratch(i1+1,j1+1,k1  ,if1SST) + &
-                  weight(5)*flowDoms(d1,1,sps)%scratch(i1  ,j1  ,k1+1,if1SST) + &
-                  weight(6)*flowDoms(d1,1,sps)%scratch(i1+1,j1  ,k1+1,if1SST) + &
-                  weight(7)*flowDoms(d1,1,sps)%scratch(i1  ,j1+1,k1+1,if1SST) + &
-                  weight(8)*flowDoms(d1,1,sps)%scratch(i1+1,j1+1,k1+1,if1SST)
-             jj = jj + 1
+                    sendBuffer(jj) = &
+                        weight(1) * flowDoms(d1, 1, sps)%scratch(i1, j1, k1, if1SST) + &
+                        weight(2) * flowDoms(d1, 1, sps)%scratch(i1 + 1, j1, k1, if1SST) + &
+                        weight(3) * flowDoms(d1, 1, sps)%scratch(i1, j1 + 1, k1, if1SST) + &
+                        weight(4) * flowDoms(d1, 1, sps)%scratch(i1 + 1, j1 + 1, k1, if1SST) + &
+                        weight(5) * flowDoms(d1, 1, sps)%scratch(i1, j1, k1 + 1, if1SST) + &
+                        weight(6) * flowDoms(d1, 1, sps)%scratch(i1 + 1, j1, k1 + 1, if1SST) + &
+                        weight(7) * flowDoms(d1, 1, sps)%scratch(i1, j1 + 1, k1 + 1, if1SST) + &
+                        weight(8) * flowDoms(d1, 1, sps)%scratch(i1 + 1, j1 + 1, k1 + 1, if1SST)
+                    jj = jj + 1
 
-          enddo
+                end do
 
-          ! Send the data.
+                ! Send the data.
 
-          call mpi_isend(sendBuffer(ii), size, adflow_real, procID,  &
-               procID, ADflow_comm_world, sendRequests(i), &
-               ierr)
+                call mpi_isend(sendBuffer(ii), size, adflow_real, procID, &
+                               procID, ADflow_comm_world, sendRequests(i), &
+                               ierr)
 
-          ! Set ii to jj for the next processor.
+                ! Set ii to jj for the next processor.
 
-          ii = jj
+                ii = jj
 
-       enddo sends
+            end do sends
 
-       ! Post the nonblocking receives.
+            ! Post the nonblocking receives.
 
-       ii = 1
-       receives: do i=1,commPatternOverset(ll,sps)%nProcRecv
+            ii = 1
+            receives: do i = 1, commPatternOverset(ll, sps)%nProcRecv
 
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+                ! Store the processor id and the size of the message
+                ! a bit easier.
 
-          procID = commPatternOverset(ll,sps)%recvProc(i)
-          size   = commPatternOverset(ll,sps)%nRecv(i)
+                procID = commPatternOverset(ll, sps)%recvProc(i)
+                size = commPatternOverset(ll, sps)%nRecv(i)
 
-          ! Post the receive.
+                ! Post the receive.
 
-          call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
-               myID, ADflow_comm_world, recvRequests(i),  &
-               ierr)
+                call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
+                               myID, ADflow_comm_world, recvRequests(i), &
+                               ierr)
 
-          ! And update ii.
+                ! And update ii.
 
-          ii = ii + size
+                ii = ii + size
 
-       enddo receives
+            end do receives
 
-       ! Copy the local data.
+            ! Copy the local data.
 
-       localCopy: do i=1,internalOverset(ll,sps)%ncopy
+            localCopy: do i = 1, internalOverset(ll, sps)%ncopy
 
-          ! Store the block and the indices of the donor a bit easier.
+                ! Store the block and the indices of the donor a bit easier.
 
-          d1 = internalOverset(ll,sps)%donorBlock(i)
-          i1 = internalOverset(ll,sps)%donorIndices(i,1)
-          j1 = internalOverset(ll,sps)%donorIndices(i,2)
-          k1 = internalOverset(ll,sps)%donorIndices(i,3)
+                d1 = internalOverset(ll, sps)%donorBlock(i)
+                i1 = internalOverset(ll, sps)%donorIndices(i, 1)
+                j1 = internalOverset(ll, sps)%donorIndices(i, 2)
+                k1 = internalOverset(ll, sps)%donorIndices(i, 3)
 
-          weight => internalOverset(ll,sps)%donorInterp(i,:)
+                weight => internalOverset(ll, sps)%donorInterp(i, :)
 
-          ! Idem for the halo's.
+                ! Idem for the halo's.
 
-          d2 = internalOverset(ll,sps)%haloBlock(i)
-          i2 = internalOverset(ll,sps)%haloIndices(i,1)
-          j2 = internalOverset(ll,sps)%haloIndices(i,2)
-          k2 = internalOverset(ll,sps)%haloIndices(i,3)
+                d2 = internalOverset(ll, sps)%haloBlock(i)
+                i2 = internalOverset(ll, sps)%haloIndices(i, 1)
+                j2 = internalOverset(ll, sps)%haloIndices(i, 2)
+                k2 = internalOverset(ll, sps)%haloIndices(i, 3)
 
-          ! Copy the values. Note that level is 1 and not
-          ! ll (= currentLevel).
+                ! Copy the values. Note that level is 1 and not
+                ! ll (= currentLevel).
 
-          flowDoms(d2,1,sps)%scratch(i2,j2,k2,if1SST) = &
-               weight(1)*flowDoms(d1,1,sps)%scratch(i1  ,j1  ,k1  ,if1SST) + &
-               weight(2)*flowDoms(d1,1,sps)%scratch(i1+1,j1  ,k1  ,if1SST) + &
-               weight(3)*flowDoms(d1,1,sps)%scratch(i1  ,j1+1,k1  ,if1SST) + &
-               weight(4)*flowDoms(d1,1,sps)%scratch(i1+1,j1+1,k1  ,if1SST) + &
-               weight(5)*flowDoms(d1,1,sps)%scratch(i1  ,j1  ,k1+1,if1SST) + &
-               weight(6)*flowDoms(d1,1,sps)%scratch(i1+1,j1  ,k1+1,if1SST) + &
-               weight(7)*flowDoms(d1,1,sps)%scratch(i1  ,j1+1,k1+1,if1SST) + &
-               weight(8)*flowDoms(d1,1,sps)%scratch(i1+1,j1+1,k1+1,if1SST)
+                flowDoms(d2, 1, sps)%scratch(i2, j2, k2, if1SST) = &
+                    weight(1) * flowDoms(d1, 1, sps)%scratch(i1, j1, k1, if1SST) + &
+                    weight(2) * flowDoms(d1, 1, sps)%scratch(i1 + 1, j1, k1, if1SST) + &
+                    weight(3) * flowDoms(d1, 1, sps)%scratch(i1, j1 + 1, k1, if1SST) + &
+                    weight(4) * flowDoms(d1, 1, sps)%scratch(i1 + 1, j1 + 1, k1, if1SST) + &
+                    weight(5) * flowDoms(d1, 1, sps)%scratch(i1, j1, k1 + 1, if1SST) + &
+                    weight(6) * flowDoms(d1, 1, sps)%scratch(i1 + 1, j1, k1 + 1, if1SST) + &
+                    weight(7) * flowDoms(d1, 1, sps)%scratch(i1, j1 + 1, k1 + 1, if1SST) + &
+                    weight(8) * flowDoms(d1, 1, sps)%scratch(i1 + 1, j1 + 1, k1 + 1, if1SST)
 
-       enddo localCopy
+            end do localCopy
 
-       ! Complete the nonblocking receives in an arbitrary sequence and
-       ! copy the variables from the buffer into the halo's.
+            ! Complete the nonblocking receives in an arbitrary sequence and
+            ! copy the variables from the buffer into the halo's.
 
-       size = commPatternOverset(ll,sps)%nProcRecv
-       completeRecvs: do i=1,commPatternOverset(ll,sps)%nProcRecv
+            size = commPatternOverset(ll, sps)%nProcRecv
+            completeRecvs: do i = 1, commPatternOverset(ll, sps)%nProcRecv
 
-          ! Complete any of the requests.
+                ! Complete any of the requests.
 
-          call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
 
-          ! Copy the data just arrived in the halo's.
+                ! Copy the data just arrived in the halo's.
 
-          ii = index
-          jj = commPatternOverset(ll,sps)%nRecvCum(ii-1) +1
-          do j=1,commPatternOverset(ll,sps)%nRecv(ii)
+                ii = index
+                jj = commPatternOverset(ll, sps)%nRecvCum(ii - 1) + 1
+                do j = 1, commPatternOverset(ll, sps)%nRecv(ii)
 
-             ! Store the block and the indices of the halo a bit easier.
+                    ! Store the block and the indices of the halo a bit easier.
 
-             d2 = commPatternOverset(ll,sps)%recvList(ii)%block(j)
-             i2 = commPatternOverset(ll,sps)%recvList(ii)%indices(j,1)
-             j2 = commPatternOverset(ll,sps)%recvList(ii)%indices(j,2)
-             k2 = commPatternOverset(ll,sps)%recvList(ii)%indices(j,3)
+                    d2 = commPatternOverset(ll, sps)%recvList(ii)%block(j)
+                    i2 = commPatternOverset(ll, sps)%recvList(ii)%indices(j, 1)
+                    j2 = commPatternOverset(ll, sps)%recvList(ii)%indices(j, 2)
+                    k2 = commPatternOverset(ll, sps)%recvList(ii)%indices(j, 3)
 
-             ! And copy the data in the appropriate place in scratch. Note
-             ! that level == 1 and not ll (= currentLevel).
+                    ! And copy the data in the appropriate place in scratch. Note
+                    ! that level == 1 and not ll (= currentLevel).
 
-             flowDoms(d2,1,sps)%scratch(i2,j2,k2,if1SST) = recvBuffer(jj)
-             jj = jj + 1
+                    flowDoms(d2, 1, sps)%scratch(i2, j2, k2, if1SST) = recvBuffer(jj)
+                    jj = jj + 1
 
-          enddo
+                end do
 
-       enddo completeRecvs
+            end do completeRecvs
 
-       ! Complete the nonblocking sends.
+            ! Complete the nonblocking sends.
 
-       size = commPatternOverset(ll,sps)%nProcSend
-       do i=1,commPatternOverset(ll,sps)%nProcSend
-          call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-       enddo
+            size = commPatternOverset(ll, sps)%nProcSend
+            do i = 1, commPatternOverset(ll, sps)%nProcSend
+                call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+            end do
 
-    enddo spectralModes
+        end do spectralModes
 
-  end subroutine exchangeF1SSTOverset
+    end subroutine exchangeF1SSTOverset
 
 end module SST
diff --git a/src/turbulence/kt.F90 b/src/turbulence/kt.F90
index 05d38881a..af7eaf6bd 100644
--- a/src/turbulence/kt.F90
+++ b/src/turbulence/kt.F90
@@ -2,1228 +2,1225 @@ module kt
 
 contains
 
-  subroutine kt_block(resOnly)
-    !
-    !       kt solves the transport equations for the k-tau turbulence
-    !       model, including a cross-diffusion term, in a decoupled
-    !       manner using a diagonal dominant ADI-scheme.
-    !
-    use constants
-    use blockPointers, only : il, jl, kl
-    use inputTimeSpectral
-    use iteration
-    use utils, only : setPointers
-    use turbUtils, only : ktEddyViscosity
-    use turbBCRoutines, only : bcTurbTreatment, applyAllTurbBCThisBlock
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    logical, intent(in) :: resOnly
-    !
-
-    ! Set the arrays for the boundary condition treatment.
-
-    call bcTurbTreatment
-
-    ! Solve the transport equations for k and tau.
-
-    call ktSolve(resOnly)
-
-    ! The eddy viscosity and the boundary conditions are only
-    ! applied if an actual update has been computed in SSTSolve.
-
-    if(.not. resOnly ) then
-
-       ! Compute the corresponding eddy viscosity.
-
-       call ktEddyViscosity(2, il, 2, jl, 2, kl)
-
-       ! Set the halo values for the turbulent variables.
-       ! We are on the finest mesh, so the second layer of halo
-       ! cells must be computed as well.
-
-       call applyAllTurbBCThisBlock(.true.)
-
-    endif
-  end subroutine kt_block
-
-  subroutine ktCDterm
-    !
-    !       ktCdterm computes the cross-diffusion term in the tau-eqn
-    !       for the k-tau turbulence model for the given block.
-    !
-    use constants
-    use blockPointers
-    use turbMod
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-    real(kind=realType)   :: kx, ky, kz, tx, ty, tz, cd
-
-    ! Loop over the cell centers of the given block. It may be more
-    ! efficient to loop over the faces and to scatter the gradient,
-    ! but in that case the gradients for k and tau must be stored.
-    ! In the current approach no extra memory is needed.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the gradient of k in the cell center. Use is made
-             ! of the fact that the surrounding normals sum up to zero,
-             ! such that the cell i,j,k does not give a contribution.
-             ! The gradient is scaled by a factor 1/2vol.
-
-             kx = w(i+1,j,k,itu1)*si(i,j,k,1) - w(i-1,j,k,itu1)*si(i-1,j,k,1) &
-                  + w(i,j+1,k,itu1)*sj(i,j,k,1) - w(i,j-1,k,itu1)*sj(i,j-1,k,1) &
-                  + w(i,j,k+1,itu1)*sk(i,j,k,1) - w(i,j,k-1,itu1)*sk(i,j,k-1,1)
-             ky = w(i+1,j,k,itu1)*si(i,j,k,2) - w(i-1,j,k,itu1)*si(i-1,j,k,2) &
-                  + w(i,j+1,k,itu1)*sj(i,j,k,2) - w(i,j-1,k,itu1)*sj(i,j-1,k,2) &
-                  + w(i,j,k+1,itu1)*sk(i,j,k,2) - w(i,j,k-1,itu1)*sk(i,j,k-1,2)
-             kz = w(i+1,j,k,itu1)*si(i,j,k,3) - w(i-1,j,k,itu1)*si(i-1,j,k,3) &
-                  + w(i,j+1,k,itu1)*sj(i,j,k,3) - w(i,j-1,k,itu1)*sj(i,j-1,k,3) &
-                  + w(i,j,k+1,itu1)*sk(i,j,k,3) - w(i,j,k-1,itu1)*sk(i,j,k-1,3)
-
-             ! Idem for tau.
-
-             tx = w(i+1,j,k,itu2)*si(i,j,k,1) - w(i-1,j,k,itu2)*si(i-1,j,k,1) &
-                  + w(i,j+1,k,itu2)*sj(i,j,k,1) - w(i,j-1,k,itu2)*sj(i,j-1,k,1) &
-                  + w(i,j,k+1,itu2)*sk(i,j,k,1) - w(i,j,k-1,itu2)*sk(i,j,k-1,1)
-             ty = w(i+1,j,k,itu2)*si(i,j,k,2) - w(i-1,j,k,itu2)*si(i-1,j,k,2) &
-                  + w(i,j+1,k,itu2)*sj(i,j,k,2) - w(i,j-1,k,itu2)*sj(i,j-1,k,2) &
-                  + w(i,j,k+1,itu2)*sk(i,j,k,2) - w(i,j,k-1,itu2)*sk(i,j,k-1,2)
-             tz = w(i+1,j,k,itu2)*si(i,j,k,3) - w(i-1,j,k,itu2)*si(i-1,j,k,3) &
-                  + w(i,j+1,k,itu2)*sj(i,j,k,3) - w(i,j-1,k,itu2)*sj(i,j-1,k,3) &
-                  + w(i,j,k+1,itu2)*sk(i,j,k,3) - w(i,j,k-1,itu2)*sk(i,j,k-1,3)
-
-             ! Compute the dot product grad k grad tau. Multiply it by
-             ! the correct scaling factor.
-
-             cd = fourth*(kx*tx + ky*ty + kz*tz)/(vol(i,j,k)**2)
-
-             ! Cd must only be taken into account when it is negative.
-             ! Take care of that here.
-
-             ktCD(i,j,k) = min(zero, cd)
-
-          enddo
-       enddo
-    enddo
-
-  end subroutine ktCDterm
-
-  subroutine ktSolve(resOnly)
-    !
-    !       ktSolve solves the k-tau equations of the k-tau model
-    !       in a coupled manner using a diagonal dominant ADI-scheme.
-    !
-    use blockPointers
-    use constants
-    use flowVarRefState
-    use inputIteration
-    use inputPhysics
-    use paramTurb
-    use turbMod, only : prod, dvt, sct, ktCD, sig1, sig2, vort
-    use turbUtils, only : prodSmag2, prodWmag2, prodKatoLaunder, &
-         turbAdvection, unsteadyTurbTerm, tdia3
-    use turbCurveFits, only : curveTupYp
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: resOnly
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, nn
-
-    real(kind=realType) :: rktGam1
-    real(kind=realType) :: rhoi, ss, spk, sdk, tau, tau2, cd
-    real(kind=realType) :: voli, volmi, volpi
-    real(kind=realType) :: xm, ym, zm, xp, yp, zp, xa, ya, za
-    real(kind=realType) :: ttm, ttp, mulm, mulp, muem, muep
-    real(kind=realType) :: c1m, c1p, c10, c2m, c2p, c20
-    real(kind=realType) :: nui, voli2, sp2, sm2, spm
-    real(kind=realType) :: taup, taum, gp, gm
-    real(kind=realType) :: b1, b2, c1, c2, d1, d2
-    real(kind=realType) :: qs, uu, um, up, factor, utau, rblank
-
-    real(kind=realType), dimension(itu1:itu2) :: tup
-
-    real(kind=realType), dimension(2:il,2:jl,2:kl,2,2)  :: qq
-    real(kind=realType), dimension(2,2:max(il,jl,kl))   :: bb, dd, ff
-    real(kind=realType), dimension(2,2,2:max(il,jl,kl)) :: cc
-
-    real(kind=realType), dimension(:,:,:), pointer :: ddw, ww, ddvt
-    real(kind=realType), dimension(:,:),   pointer :: rrlv
-    real(kind=realType), dimension(:,:),   pointer :: dd2Wall
-
-    logical, dimension(2:jl,2:kl), target :: flagI2, flagIl
-    logical, dimension(2:il,2:kl), target :: flagJ2, flagJl
-    logical, dimension(2:il,2:jl), target :: flagK2, flagKl
-
-    logical, dimension(:,:), pointer :: flag
-
-    ! Set model constants
-
-    rktGam1 = rktBeta1/rktBetas &
-         - rktSigt1*rktK*rktK/sqrt(rktBetas)
-    sig1    = rktSigk1
-    sig2    = rktSigt1
-
-    ! Set a couple of pointers to the correct entries in dw to
-    ! make the code more readable.
-
-    dvt  => scratch(1:,1:,1:,idvt:)
-    prod => scratch(1:,1:,1:,iprod)
-    vort => prod
-    ktCD => scratch(1:,1:,1:,icd)
-    !
-    !       Production term.
-    !
-    select case (turbProd)
-    case (strain)
-       call prodSmag2
-
-    case (vorticity)
-       call prodWmag2
-
-    case (katoLaunder)
-       call prodKatoLaunder
-
-    end select
-    !
-    !       The cross diffusion term of the k-tau equation.
-    !
-    call ktCDterm
-    !
-    !       Source terms.
-    !       Determine the source term and its derivative w.r.t. k and
-    !       tau for all internal cells of the block.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the source terms for both the k and the tau
-             ! equation. The production term of k is limited to a
-             ! certain times the destruction term.
-
-             rhoi = one/w(i,j,k,irho)
-             tau  = w(i,j,k,itu2)
-             tau2 = tau*tau
-             ss   = prod(i,j,k)
-             cd   = rktSigd1*ktCD(i,j,k)
-             spk  = rev(i,j,k)*ss*rhoi
-             sdk  = rktBetas*w(i,j,k,itu1)/tau
-             spk  = min(spk, pklim*sdk)
-
-             dvt(i,j,k,1) = spk - sdk
-             dvt(i,j,k,2) = rktBeta1 - rktGam1*ss*tau2 + cd*tau
-
-             ! Compute the source term jacobian. Note that only the
-             ! destruction terms are linearized to increase the diagonal
-             ! dominance of the matrix. Furthermore minus the source
-             ! term jacobian is stored. The cross diffusion term is also
-             ! a destruction term, because cd <= 0.
-
-             qq(i,j,k,1,1) = rktBetas/tau
-             ! qq(i,j,k,1,2) =-rktBetas*w(i,j,k,itu1)/tau2
-             qq(i,j,k,1,2) = zero
-             qq(i,j,k,2,1) = zero
-             qq(i,j,k,2,2) = two*rktGam1*ss*tau - cd
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Advection and unsteady terms.
-    !
-    nn = itu1 - 1
-    call turbAdvection(2_intType, 2_intType, nn, qq)
-
-    call unsteadyTurbTerm(2_intType, 2_intType, nn, qq)
-    !
-    !       Viscous terms in k-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in zeta-direction, i.e. along the
-             ! line k = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in zeta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in zeta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/dzeta derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and tau.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j,k-1) + rlv(i,j,k))
-             mulp = half*(rlv(i,j,k+1) + rlv(i,j,k))
-             muem = half*(rev(i,j,k-1) + rev(i,j,k))
-             muep = half*(rev(i,j,k+1) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Due to the transformation to the tau variable an
-             ! additional term, coming from the diffusion term, appears
-             ! in the tau-equation. This additional diffusion term can
-             ! be rewritten as -8*(nu + sigmat*nut)*grad(sqrt(tau))**2.
-             ! Below is the discretized form in zeta-direction.
-             ! Also here the cross derivatives are neglected, i.e. the
-             ! assumption is made that the grid is orthogonal.
-
-             nui   = eight*(rlv(i,j,k) + sig2*rev(i,j,k))*rhoi
-             voli2 = voli*voli
-
-             sp2 = voli2*(sk(i,j,k,1)**2 + sk(i,j,k,2)**2 &
-                  +        sk(i,j,k,3)**2)
-             sm2 = voli2*(sk(i,j,k-1,1)**2 + sk(i,j,k-1,2)**2 &
-                  +        sk(i,j,k-1,3)**2)
-             spm = voli2*(sk(i,j,k,1)*sk(i,j,k-1,1) &
-                  +        sk(i,j,k,2)*sk(i,j,k-1,2) &
-                  +        sk(i,j,k,3)*sk(i,j,k-1,3))
-
-             taup = half*(w(i,j,k+1,itu2) + w(i,j,k,itu2))
-             taum = half*(w(i,j,k-1,itu2) + w(i,j,k,itu2))
-             gp   = sqrt(max(zero,taup))
-             gm   = sqrt(max(zero,taum))
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-             ! The derivatives of the additional diffusion term are an
-             ! approximation to avoid numerical difficulties near a
-             ! viscous wall. You can prove analytically that due to the
-             ! implicit treatment of the BC's two singular terms cancel,
-             ! but numerically this leads to difficulties. Therefore the
-             ! assumption is made that gp == gm in the implicit part.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i,j,k-1,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i,j,k+1,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i,j,k-1,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i,j,k+1,itu2) &
-                  - nui*(taup*sp2 + taum*sm2 - two*gp*gm*spm)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m + half*nui*(sp2 - spm)
-             c2 =  c20 + half*nui*(sp2 + sm2 - two*spm)
-             d2 = -c2p + half*nui*(sm2 - spm)
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(k == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmtk1(i,j,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmtk1(i,j,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmtk1(i,j,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmtk1(i,j,itu2,itu2),zero)
-             else if(k == kl) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmtk2(i,j,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmtk2(i,j,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmtk2(i,j,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmtk2(i,j,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Viscous terms in j-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in eta-direction, i.e. along the
-             ! line j = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in eta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in eta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/deta derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and omega.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j-1,k) + rlv(i,j,k))
-             mulp = half*(rlv(i,j+1,k) + rlv(i,j,k))
-             muem = half*(rev(i,j-1,k) + rev(i,j,k))
-             muep = half*(rev(i,j+1,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Due to the transformation to the tau variable an
-             ! additional term, coming from the diffusion term, appears
-             ! in the tau-equation. This additional diffusion term can
-             ! be rewritten as -8*(nu + sigmat*nut)*grad(sqrt(tau))**2.
-             ! Below is the discretized form in eta-direction.
-             ! Also here the cross derivatives are neglected, i.e. the
-             ! assumption is made that the grid is orthogonal.
-
-             nui   = eight*(rlv(i,j,k) + sig2*rev(i,j,k))*rhoi
-             voli2 = voli*voli
-
-             sp2 = voli2*(sj(i,j,k,1)**2 + sj(i,j,k,2)**2 &
-                  +        sj(i,j,k,3)**2)
-             sm2 = voli2*(sj(i,j-1,k,1)**2 + sj(i,j-1,k,2)**2 &
-                  +        sj(i,j-1,k,3)**2)
-             spm = voli2*(sj(i,j,k,1)*sj(i,j-1,k,1) &
-                  +        sj(i,j,k,2)*sj(i,j-1,k,2) &
-                  +        sj(i,j,k,3)*sj(i,j-1,k,3))
-
-             taup = half*(w(i,j+1,k,itu2) + w(i,j,k,itu2))
-             taum = half*(w(i,j-1,k,itu2) + w(i,j,k,itu2))
-             gp   = sqrt(max(zero,taup))
-             gm   = sqrt(max(zero,taum))
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-             ! The derivatives of the additional diffusion term are an
-             ! approximation to avoid numerical difficulties near a
-             ! viscous wall. You can prove analytically that due to the
-             ! implicit treatment of the BC's two singular terms cancel,
-             ! but numerically this leads to difficulties. Therefore the
-             ! assumption is made that gp == gm in the implicit part.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i,j-1,k,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i,j+1,k,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i,j-1,k,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i,j+1,k,itu2) &
-                  - nui*(taup*sp2 + taum*sm2 - two*gp*gm*spm)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m + half*nui*(sp2 - spm)
-             c2 =  c20 + half*nui*(sp2 + sm2 - two*spm)
-             d2 = -c2p + half*nui*(sm2 - spm)
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(j == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmtj1(i,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmtj1(i,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmtj1(i,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmtj1(i,k,itu2,itu2),zero)
-             else if(j == jl) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmtj2(i,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmtj2(i,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmtj2(i,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmtj2(i,k,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Viscous terms in i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in xi-direction, i.e. along the
-             ! line i = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
-
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in xi-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in xi-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/dxi derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and omega.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i-1,j,k) + rlv(i,j,k))
-             mulp = half*(rlv(i+1,j,k) + rlv(i,j,k))
-             muem = half*(rev(i-1,j,k) + rev(i,j,k))
-             muep = half*(rev(i+1,j,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Due to the transformation to the tau variable an
-             ! additional term, coming from the diffusion term, appears
-             ! in the tau-equation. This additional diffusion term can
-             ! be rewritten as -8*(nu + sigmat*nut)*grad(sqrt(tau))**2.
-             ! Below is the discretized form in xi-direction.
-             ! Also here the cross derivatives are neglected, i.e. the
-             ! assumption is made that the grid is orthogonal.
-
-             nui   = eight*(rlv(i,j,k) + sig2*rev(i,j,k))*rhoi
-             voli2 = voli*voli
-
-             sp2 = voli2*(si(i,j,k,1)**2 + si(i,j,k,2)**2 &
-                  +        si(i,j,k,3)**2)
-             sm2 = voli2*(si(i-1,j,k,1)**2 + si(i-1,j,k,2)**2 &
-                  +        si(i-1,j,k,3)**2)
-             spm = voli2*(si(i,j,k,1)*si(i-1,j,k,1) &
-                  +        si(i,j,k,2)*si(i-1,j,k,2) &
-                  +        si(i,j,k,3)*si(i-1,j,k,3))
-
-             taup = half*(w(i+1,j,k,itu2) + w(i,j,k,itu2))
-             taum = half*(w(i-1,j,k,itu2) + w(i,j,k,itu2))
-             gp   = sqrt(max(zero,taup))
-             gm   = sqrt(max(zero,taum))
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-             ! The derivatives of the additional diffusion term are an
-             ! approximation to avoid numerical difficulties near a
-             ! viscous wall. You can prove analytically that due to the
-             ! implicit treatment of the BC's two singular terms cancel,
-             ! but numerically this leads to difficulties. Therefore the
-             ! assumption is made that gp == gm in the implicit part.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i-1,j,k,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i+1,j,k,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i-1,j,k,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i+1,j,k,itu2) &
-                  - nui*(taup*sp2 + taum*sm2 - two*gp*gm*spm)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m + half*nui*(sp2 - spm)
-             c2 =  c20 + half*nui*(sp2 + sm2 - two*spm)
-             d2 = -c2p + half*nui*(sm2 - spm)
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(i == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmti1(j,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmti1(j,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmti1(j,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmti1(j,k,itu2,itu2),zero)
-             else if(i == il) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmti2(j,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmti2(j,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmti2(j,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmti2(j,k,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-
-    ! Multiply the residual by the volume and store this in dw; this
-    ! is done for monitoring reasons only. The multiplication with the
-    ! volume is present to be consistent with the flow residuals; also
-    ! the negative value is taken, again to be consistent with the
-    ! flow equations. Also multiply by iblank so that no updates occur
-    ! in holes or the overset boundary.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             rblank = real(iblank(i,j,k), realType)
-             dw(i,j,k,itu1) = -volRef(i,j,k)*dvt(i,j,k,1)*rblank
-             dw(i,j,k,itu2) = -volRef(i,j,k)*dvt(i,j,k,2)*rblank
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the wall function flags to .false.
-
-    flagI2 = .false.
-    flagIl = .false.
-    flagJ2 = .false.
-    flagJl = .false.
-    flagK2 = .false.
-    flagKl = .false.
-
-    ! Modify the rhs of the 1st internal cell, if wall functions
-    ! are used; their value is determined by the table.
-
-    testWallFunctions: if( wallFunctions ) then
-
-       bocos: do nn=1,nViscBocos
-
-          ! Determine the block face on which the subface is located
-          ! and set some variables. As flag points to the entire array
-          ! flagI2, etc., its starting indices are the starting indices
-          ! of its target and not 1.
-
-          select case (BCFaceID(nn))
-          case (iMin)
-             flag    => flagI2
-             ddw     => dw(2,1:,1:,1:);  ddvt => dvt(2,1:,1:,1:)
-             ww      => w(2,1:,1:,1:);   rrlv => rlv(2,1:,1:)
-             dd2Wall => d2Wall(2,:,:)
-
-          case (iMax)
-             flag    => flagIl
-             ddw     => dw(il,1:,1:,1:);  ddvt => dvt(il,1:,1:,1:)
-             ww      => w(il,1:,1:,1:);   rrlv => rlv(il,1:,1:)
-             dd2Wall => d2Wall(il,:,:)
-
-          case (jMin)
-             flag    => flagJ2
-             ddw     => dw(1:,2,1:,1:);  ddvt => dvt(1:,2,1:,1:)
-             ww      => w(1:,2,1:,1:);   rrlv => rlv(1:,2,1:)
-             dd2Wall => d2Wall(:,2,:)
-
-          case (jMax)
-             flag    => flagJl
-             ddw     => dw(1:,jl,1:,1:);  ddvt => dvt(1:,jl,1:,1:)
-             ww      => w(1:,jl,1:,1:);   rrlv => rlv(1:,jl,1:)
-             dd2Wall => d2Wall(:,jl,:)
-
-          case (kMin)
-             flag    => flagK2
-             ddw     => dw(1:,1:,2,1:);  ddvt => dvt(1:,1:,2,1:)
-             ww      => w(1:,1:,2,1:);   rrlv => rlv(1:,1:,2)
-             dd2Wall => d2Wall(:,:,2)
-
-          case (kMax)
-             flag    => flagKl
-             ddw     => dw(1:,1:,kl,1:);  ddvt => dvt(1:,1:,kl,1:)
-             ww      => w(1:,1:,kl,1:);   rrlv => rlv(1:,1:,kl)
-             dd2Wall => d2Wall(:,:,kl)
-
-          end select
-
-          ! Loop over the owned faces of this subface. Therefore the
-          ! nodal range of BCData must be used. The offset of +1 is
-          ! present, because the starting index of the cell range is
-          ! 1 larger than the starting index of the nodal range.
-
-          do j=(BCData(nn)%jnBeg+1),BCData(nn)%jnEnd
-             do i=(BCData(nn)%inBeg+1),BCData(nn)%inEnd
-
-                ! Set ddw to zero.
-
-                ddw(i,j,itu1) = zero
-                ddw(i,j,itu2) = zero
-
-                ! Enforce k and tau in the 1st internal cell from
-                ! the wall function table. There is an offset of -1 in
-                ! the wall distance. Note that the offset compared to
-                ! the current value must be stored. Also note that the
-                ! curve fits contain the non-dimensional values.
-
-                utau = viscSubface(nn)%utau(i,j)
-                yp = ww(i,j,irho)*dd2Wall(i-1,j-1)*utau/rrlv(i,j)
-
-                call curveTupYp(tup, yp, itu1, itu2)
-
-                tup(itu1) = tup(itu1)*utau**2
-                tup(itu2) = tup(itu2)*rrlv(i,j)/(ww(i,j,irho)*utau**2)
-
-                ddvt(i,j,1) = tup(itu1) - ww(i,j,itu1)
-                ddvt(i,j,2) = tup(itu2) - ww(i,j,itu2)
-
-                ! Set the wall flag to .true.
-
-                flag(i,j) = .true.
-
-             enddo
-          enddo
-
-       enddo bocos
-    endif testWallFunctions
-
-    ! Return if only the residual must be computed.
-
-    if( resOnly ) return
-
-    ! For implicit relaxation take the local time step into account,
-    ! where dt is the inverse of the central jacobian times the cfl
-    ! number. The following system is solved:
-    ! (I/dt + cc + bb + dd)*dw = rhs, in which I/dt = cc/cfl. As in
-    ! the rest of the algorithm only the modified central jacobian is
-    ! used, stored it now.
-
-    ! Compute the factor multiplying the central jacobian, which
-    ! is 1 + 1/cfl (implicit relaxation only).
-
-    factor = one
-    if(turbRelax == turbRelaxImplicit) &
-         factor = one + (one-alfaTurb)/alfaTurb
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             qq(i,j,k,1,1) = factor*qq(i,j,k,1,1)
-             qq(i,j,k,1,2) = factor*qq(i,j,k,1,2)
-             qq(i,j,k,2,1) = factor*qq(i,j,k,2,1)
-             qq(i,j,k,2,2) = factor*qq(i,j,k,2,2)
-
-             ! Set qq to 1 if the value is determined by the table.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                qq(i,j,k,1,1) = one
-                qq(i,j,k,2,2) = one
-                qq(i,j,k,1,2) = zero
-                qq(i,j,k,2,1) = zero
-             endif
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the grid velocity to zero. This value will be used
-    ! if the block is not moving.
-
-    qs = zero
-    !
-    !       dd-ADI step in j-direction. There is no particular reason to
-    !       start in j-direction, it just happened to be so. As we solve
-    !       in j-direction, the j-loop is the innermost loop.
-    !
-    do k=2,kl
-       do i=2,il
-          do j=2,jl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the j-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in j-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j-1,k) + rlv(i,j,k))
-             mulp = half*(rlv(i,j+1,k) + rlv(i,j,k))
-             muem = half*(rev(i,j-1,k) + rev(i,j,k))
-             muep = half*(rev(i,j+1,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-
-             ! Terms due to the additional diffusion term in j-direction.
-
-             nui   = eight*(rlv(i,j,k) + sig2*rev(i,j,k))*rhoi
-             voli2 = voli*voli
-
-             sp2 = voli2*(sj(i,j,k,1)**2 + sj(i,j,k,2)**2 &
-                  +        sj(i,j,k,3)**2)
-             sm2 = voli2*(sj(i,j-1,k,1)**2 + sj(i,j-1,k,2)**2 &
-                  +        sj(i,j-1,k,3)**2)
-             spm = voli2*(sj(i,j,k,1)*sj(i,j-1,k,1) &
-                  +        sj(i,j,k,2)*sj(i,j-1,k,2) &
-                  +        sj(i,j,k,3)*sj(i,j-1,k,3))
-
-             ! Store the off-diagonal terms.
-
-             bb(1,j) = -c1m
-             dd(1,j) = -c1p
-             bb(2,j) = -c2m + half*nui*(sp2 - spm)
-             dd(2,j) = -c2p + half*nui*(sm2 - spm)
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of j and j-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceJ(i,j,k) + sFaceJ(i,j-1,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! j-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,j) = bb(1,j) - up
-             dd(1,j) = dd(1,j) + um
-             bb(2,j) = bb(2,j) - up
-             dd(2,j) = dd(2,j) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,j) = qq(i,j,k,1,1)
-             cc(1,2,j) = qq(i,j,k,1,2)*rblank
-             cc(2,1,j) = qq(i,j,k,2,1)*rblank
-             cc(2,2,j) = qq(i,j,k,2,2)
-
-             ff(1,j) = dvt(i,j,k,1)*rblank
-             ff(2,j) = dvt(i,j,k,2)*rblank
-
-             bb(:,j) = bb(:,j)*rblank
-             dd(:,j) = dd(:,j)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,j) = zero
-                dd(1,j) = zero
-                bb(2,j) = zero
-                dd(2,j) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in j-direction.
-
-          call tdia3(2_intType, jl, bb, cc, dd, ff)
-
-          ! Determine the new rhs for the next direction.
-
-          do j=2,jl
-             dvt(i,j,k,1) = qq(i,j,k,1,1)*ff(1,j) + qq(i,j,k,1,2)*ff(2,j)
-             dvt(i,j,k,2) = qq(i,j,k,2,1)*ff(1,j) + qq(i,j,k,2,2)*ff(2,j)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-ADI step in i-direction. As we solve in i-direction, the
-    !       i-loop is the innermost loop.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
+    subroutine kt_block(resOnly)
+        !
+        !       kt solves the transport equations for the k-tau turbulence
+        !       model, including a cross-diffusion term, in a decoupled
+        !       manner using a diagonal dominant ADI-scheme.
+        !
+        use constants
+        use blockPointers, only: il, jl, kl
+        use inputTimeSpectral
+        use iteration
+        use utils, only: setPointers
+        use turbUtils, only: ktEddyViscosity
+        use turbBCRoutines, only: bcTurbTreatment, applyAllTurbBCThisBlock
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        logical, intent(in) :: resOnly
+        !
+
+        ! Set the arrays for the boundary condition treatment.
+
+        call bcTurbTreatment
+
+        ! Solve the transport equations for k and tau.
+
+        call ktSolve(resOnly)
+
+        ! The eddy viscosity and the boundary conditions are only
+        ! applied if an actual update has been computed in SSTSolve.
+
+        if (.not. resOnly) then
+
+            ! Compute the corresponding eddy viscosity.
+
+            call ktEddyViscosity(2, il, 2, jl, 2, kl)
+
+            ! Set the halo values for the turbulent variables.
+            ! We are on the finest mesh, so the second layer of halo
+            ! cells must be computed as well.
+
+            call applyAllTurbBCThisBlock(.true.)
+
+        end if
+    end subroutine kt_block
+
+    subroutine ktCDterm
+        !
+        !       ktCdterm computes the cross-diffusion term in the tau-eqn
+        !       for the k-tau turbulence model for the given block.
+        !
+        use constants
+        use blockPointers
+        use turbMod
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
+        real(kind=realType) :: kx, ky, kz, tx, ty, tz, cd
+
+        ! Loop over the cell centers of the given block. It may be more
+        ! efficient to loop over the faces and to scatter the gradient,
+        ! but in that case the gradients for k and tau must be stored.
+        ! In the current approach no extra memory is needed.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the gradient of k in the cell center. Use is made
+                    ! of the fact that the surrounding normals sum up to zero,
+                    ! such that the cell i,j,k does not give a contribution.
+                    ! The gradient is scaled by a factor 1/2vol.
+
+                    kx = w(i + 1, j, k, itu1) * si(i, j, k, 1) - w(i - 1, j, k, itu1) * si(i - 1, j, k, 1) &
+                         + w(i, j + 1, k, itu1) * sj(i, j, k, 1) - w(i, j - 1, k, itu1) * sj(i, j - 1, k, 1) &
+                         + w(i, j, k + 1, itu1) * sk(i, j, k, 1) - w(i, j, k - 1, itu1) * sk(i, j, k - 1, 1)
+                    ky = w(i + 1, j, k, itu1) * si(i, j, k, 2) - w(i - 1, j, k, itu1) * si(i - 1, j, k, 2) &
+                         + w(i, j + 1, k, itu1) * sj(i, j, k, 2) - w(i, j - 1, k, itu1) * sj(i, j - 1, k, 2) &
+                         + w(i, j, k + 1, itu1) * sk(i, j, k, 2) - w(i, j, k - 1, itu1) * sk(i, j, k - 1, 2)
+                    kz = w(i + 1, j, k, itu1) * si(i, j, k, 3) - w(i - 1, j, k, itu1) * si(i - 1, j, k, 3) &
+                         + w(i, j + 1, k, itu1) * sj(i, j, k, 3) - w(i, j - 1, k, itu1) * sj(i, j - 1, k, 3) &
+                         + w(i, j, k + 1, itu1) * sk(i, j, k, 3) - w(i, j, k - 1, itu1) * sk(i, j, k - 1, 3)
+
+                    ! Idem for tau.
+
+                    tx = w(i + 1, j, k, itu2) * si(i, j, k, 1) - w(i - 1, j, k, itu2) * si(i - 1, j, k, 1) &
+                         + w(i, j + 1, k, itu2) * sj(i, j, k, 1) - w(i, j - 1, k, itu2) * sj(i, j - 1, k, 1) &
+                         + w(i, j, k + 1, itu2) * sk(i, j, k, 1) - w(i, j, k - 1, itu2) * sk(i, j, k - 1, 1)
+                    ty = w(i + 1, j, k, itu2) * si(i, j, k, 2) - w(i - 1, j, k, itu2) * si(i - 1, j, k, 2) &
+                         + w(i, j + 1, k, itu2) * sj(i, j, k, 2) - w(i, j - 1, k, itu2) * sj(i, j - 1, k, 2) &
+                         + w(i, j, k + 1, itu2) * sk(i, j, k, 2) - w(i, j, k - 1, itu2) * sk(i, j, k - 1, 2)
+                    tz = w(i + 1, j, k, itu2) * si(i, j, k, 3) - w(i - 1, j, k, itu2) * si(i - 1, j, k, 3) &
+                         + w(i, j + 1, k, itu2) * sj(i, j, k, 3) - w(i, j - 1, k, itu2) * sj(i, j - 1, k, 3) &
+                         + w(i, j, k + 1, itu2) * sk(i, j, k, 3) - w(i, j, k - 1, itu2) * sk(i, j, k - 1, 3)
+
+                    ! Compute the dot product grad k grad tau. Multiply it by
+                    ! the correct scaling factor.
+
+                    cd = fourth * (kx * tx + ky * ty + kz * tz) / (vol(i, j, k)**2)
+
+                    ! Cd must only be taken into account when it is negative.
+                    ! Take care of that here.
+
+                    ktCD(i, j, k) = min(zero, cd)
+
+                end do
+            end do
+        end do
+
+    end subroutine ktCDterm
+
+    subroutine ktSolve(resOnly)
+        !
+        !       ktSolve solves the k-tau equations of the k-tau model
+        !       in a coupled manner using a diagonal dominant ADI-scheme.
+        !
+        use blockPointers
+        use constants
+        use flowVarRefState
+        use inputIteration
+        use inputPhysics
+        use paramTurb
+        use turbMod, only: prod, dvt, sct, ktCD, sig1, sig2, vort
+        use turbUtils, only: prodSmag2, prodWmag2, prodKatoLaunder, &
+                             turbAdvection, unsteadyTurbTerm, tdia3
+        use turbCurveFits, only: curveTupYp
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: resOnly
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn
+
+        real(kind=realType) :: rktGam1
+        real(kind=realType) :: rhoi, ss, spk, sdk, tau, tau2, cd
+        real(kind=realType) :: voli, volmi, volpi
+        real(kind=realType) :: xm, ym, zm, xp, yp, zp, xa, ya, za
+        real(kind=realType) :: ttm, ttp, mulm, mulp, muem, muep
+        real(kind=realType) :: c1m, c1p, c10, c2m, c2p, c20
+        real(kind=realType) :: nui, voli2, sp2, sm2, spm
+        real(kind=realType) :: taup, taum, gp, gm
+        real(kind=realType) :: b1, b2, c1, c2, d1, d2
+        real(kind=realType) :: qs, uu, um, up, factor, utau, rblank
+
+        real(kind=realType), dimension(itu1:itu2) :: tup
+
+        real(kind=realType), dimension(2:il, 2:jl, 2:kl, 2, 2) :: qq
+        real(kind=realType), dimension(2, 2:max(il, jl, kl)) :: bb, dd, ff
+        real(kind=realType), dimension(2, 2, 2:max(il, jl, kl)) :: cc
+
+        real(kind=realType), dimension(:, :, :), pointer :: ddw, ww, ddvt
+        real(kind=realType), dimension(:, :), pointer :: rrlv
+        real(kind=realType), dimension(:, :), pointer :: dd2Wall
+
+        logical, dimension(2:jl, 2:kl), target :: flagI2, flagIl
+        logical, dimension(2:il, 2:kl), target :: flagJ2, flagJl
+        logical, dimension(2:il, 2:jl), target :: flagK2, flagKl
+
+        logical, dimension(:, :), pointer :: flag
+
+        ! Set model constants
+
+        rktGam1 = rktBeta1 / rktBetas &
+                  - rktSigt1 * rktK * rktK / sqrt(rktBetas)
+        sig1 = rktSigk1
+        sig2 = rktSigt1
+
+        ! Set a couple of pointers to the correct entries in dw to
+        ! make the code more readable.
+
+        dvt => scratch(1:, 1:, 1:, idvt:)
+        prod => scratch(1:, 1:, 1:, iprod)
+        vort => prod
+        ktCD => scratch(1:, 1:, 1:, icd)
+        !
+        !       Production term.
+        !
+        select case (turbProd)
+        case (strain)
+            call prodSmag2
+
+        case (vorticity)
+            call prodWmag2
+
+        case (katoLaunder)
+            call prodKatoLaunder
+
+        end select
+        !
+        !       The cross diffusion term of the k-tau equation.
+        !
+        call ktCDterm
+        !
+        !       Source terms.
+        !       Determine the source term and its derivative w.r.t. k and
+        !       tau for all internal cells of the block.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the source terms for both the k and the tau
+                    ! equation. The production term of k is limited to a
+                    ! certain times the destruction term.
+
+                    rhoi = one / w(i, j, k, irho)
+                    tau = w(i, j, k, itu2)
+                    tau2 = tau * tau
+                    ss = prod(i, j, k)
+                    cd = rktSigd1 * ktCD(i, j, k)
+                    spk = rev(i, j, k) * ss * rhoi
+                    sdk = rktBetas * w(i, j, k, itu1) / tau
+                    spk = min(spk, pklim * sdk)
+
+                    dvt(i, j, k, 1) = spk - sdk
+                    dvt(i, j, k, 2) = rktBeta1 - rktGam1 * ss * tau2 + cd * tau
+
+                    ! Compute the source term jacobian. Note that only the
+                    ! destruction terms are linearized to increase the diagonal
+                    ! dominance of the matrix. Furthermore minus the source
+                    ! term jacobian is stored. The cross diffusion term is also
+                    ! a destruction term, because cd <= 0.
+
+                    qq(i, j, k, 1, 1) = rktBetas / tau
+                    ! qq(i,j,k,1,2) =-rktBetas*w(i,j,k,itu1)/tau2
+                    qq(i, j, k, 1, 2) = zero
+                    qq(i, j, k, 2, 1) = zero
+                    qq(i, j, k, 2, 2) = two * rktGam1 * ss * tau - cd
+
+                end do
+            end do
+        end do
+        !
+        !       Advection and unsteady terms.
+        !
+        nn = itu1 - 1
+        call turbAdvection(2_intType, 2_intType, nn, qq)
+
+        call unsteadyTurbTerm(2_intType, 2_intType, nn, qq)
+        !
+        !       Viscous terms in k-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in zeta-direction, i.e. along the
+                    ! line k = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
+
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in zeta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in zeta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/dzeta derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and tau.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j, k - 1) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j, k + 1) + rlv(i, j, k))
+                    muem = half * (rev(i, j, k - 1) + rev(i, j, k))
+                    muep = half * (rev(i, j, k + 1) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Due to the transformation to the tau variable an
+                    ! additional term, coming from the diffusion term, appears
+                    ! in the tau-equation. This additional diffusion term can
+                    ! be rewritten as -8*(nu + sigmat*nut)*grad(sqrt(tau))**2.
+                    ! Below is the discretized form in zeta-direction.
+                    ! Also here the cross derivatives are neglected, i.e. the
+                    ! assumption is made that the grid is orthogonal.
+
+                    nui = eight * (rlv(i, j, k) + sig2 * rev(i, j, k)) * rhoi
+                    voli2 = voli * voli
+
+                    sp2 = voli2 * (sk(i, j, k, 1)**2 + sk(i, j, k, 2)**2 &
+                                   + sk(i, j, k, 3)**2)
+                    sm2 = voli2 * (sk(i, j, k - 1, 1)**2 + sk(i, j, k - 1, 2)**2 &
+                                   + sk(i, j, k - 1, 3)**2)
+                    spm = voli2 * (sk(i, j, k, 1) * sk(i, j, k - 1, 1) &
+                                   + sk(i, j, k, 2) * sk(i, j, k - 1, 2) &
+                                   + sk(i, j, k, 3) * sk(i, j, k - 1, 3))
+
+                    taup = half * (w(i, j, k + 1, itu2) + w(i, j, k, itu2))
+                    taum = half * (w(i, j, k - 1, itu2) + w(i, j, k, itu2))
+                    gp = sqrt(max(zero, taup))
+                    gm = sqrt(max(zero, taum))
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+                    ! The derivatives of the additional diffusion term are an
+                    ! approximation to avoid numerical difficulties near a
+                    ! viscous wall. You can prove analytically that due to the
+                    ! implicit treatment of the BC's two singular terms cancel,
+                    ! but numerically this leads to difficulties. Therefore the
+                    ! assumption is made that gp == gm in the implicit part.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i, j, k - 1, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i, j, k + 1, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i, j, k - 1, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i, j, k + 1, itu2) &
+                                      - nui * (taup * sp2 + taum * sm2 - two * gp * gm * spm)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m + half * nui * (sp2 - spm)
+                    c2 = c20 + half * nui * (sp2 + sm2 - two * spm)
+                    d2 = -c2p + half * nui * (sm2 - spm)
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (k == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmtk1(i, j, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmtk1(i, j, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmtk1(i, j, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmtk1(i, j, itu2, itu2), zero)
+                    else if (k == kl) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmtk2(i, j, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmtk2(i, j, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmtk2(i, j, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmtk2(i, j, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in j-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in eta-direction, i.e. along the
+                    ! line j = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
+
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in eta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in eta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/deta derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and omega.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j - 1, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j + 1, k) + rlv(i, j, k))
+                    muem = half * (rev(i, j - 1, k) + rev(i, j, k))
+                    muep = half * (rev(i, j + 1, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Due to the transformation to the tau variable an
+                    ! additional term, coming from the diffusion term, appears
+                    ! in the tau-equation. This additional diffusion term can
+                    ! be rewritten as -8*(nu + sigmat*nut)*grad(sqrt(tau))**2.
+                    ! Below is the discretized form in eta-direction.
+                    ! Also here the cross derivatives are neglected, i.e. the
+                    ! assumption is made that the grid is orthogonal.
+
+                    nui = eight * (rlv(i, j, k) + sig2 * rev(i, j, k)) * rhoi
+                    voli2 = voli * voli
+
+                    sp2 = voli2 * (sj(i, j, k, 1)**2 + sj(i, j, k, 2)**2 &
+                                   + sj(i, j, k, 3)**2)
+                    sm2 = voli2 * (sj(i, j - 1, k, 1)**2 + sj(i, j - 1, k, 2)**2 &
+                                   + sj(i, j - 1, k, 3)**2)
+                    spm = voli2 * (sj(i, j, k, 1) * sj(i, j - 1, k, 1) &
+                                   + sj(i, j, k, 2) * sj(i, j - 1, k, 2) &
+                                   + sj(i, j, k, 3) * sj(i, j - 1, k, 3))
+
+                    taup = half * (w(i, j + 1, k, itu2) + w(i, j, k, itu2))
+                    taum = half * (w(i, j - 1, k, itu2) + w(i, j, k, itu2))
+                    gp = sqrt(max(zero, taup))
+                    gm = sqrt(max(zero, taum))
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+                    ! The derivatives of the additional diffusion term are an
+                    ! approximation to avoid numerical difficulties near a
+                    ! viscous wall. You can prove analytically that due to the
+                    ! implicit treatment of the BC's two singular terms cancel,
+                    ! but numerically this leads to difficulties. Therefore the
+                    ! assumption is made that gp == gm in the implicit part.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i, j - 1, k, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i, j + 1, k, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i, j - 1, k, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i, j + 1, k, itu2) &
+                                      - nui * (taup * sp2 + taum * sm2 - two * gp * gm * spm)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m + half * nui * (sp2 - spm)
+                    c2 = c20 + half * nui * (sp2 + sm2 - two * spm)
+                    d2 = -c2p + half * nui * (sm2 - spm)
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (j == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmtj1(i, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmtj1(i, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmtj1(i, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmtj1(i, k, itu2, itu2), zero)
+                    else if (j == jl) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmtj2(i, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmtj2(i, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmtj2(i, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmtj2(i, k, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in xi-direction, i.e. along the
+                    ! line i = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
+
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in xi-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in xi-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/dxi derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and omega.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i - 1, j, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i + 1, j, k) + rlv(i, j, k))
+                    muem = half * (rev(i - 1, j, k) + rev(i, j, k))
+                    muep = half * (rev(i + 1, j, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Due to the transformation to the tau variable an
+                    ! additional term, coming from the diffusion term, appears
+                    ! in the tau-equation. This additional diffusion term can
+                    ! be rewritten as -8*(nu + sigmat*nut)*grad(sqrt(tau))**2.
+                    ! Below is the discretized form in xi-direction.
+                    ! Also here the cross derivatives are neglected, i.e. the
+                    ! assumption is made that the grid is orthogonal.
+
+                    nui = eight * (rlv(i, j, k) + sig2 * rev(i, j, k)) * rhoi
+                    voli2 = voli * voli
+
+                    sp2 = voli2 * (si(i, j, k, 1)**2 + si(i, j, k, 2)**2 &
+                                   + si(i, j, k, 3)**2)
+                    sm2 = voli2 * (si(i - 1, j, k, 1)**2 + si(i - 1, j, k, 2)**2 &
+                                   + si(i - 1, j, k, 3)**2)
+                    spm = voli2 * (si(i, j, k, 1) * si(i - 1, j, k, 1) &
+                                   + si(i, j, k, 2) * si(i - 1, j, k, 2) &
+                                   + si(i, j, k, 3) * si(i - 1, j, k, 3))
+
+                    taup = half * (w(i + 1, j, k, itu2) + w(i, j, k, itu2))
+                    taum = half * (w(i - 1, j, k, itu2) + w(i, j, k, itu2))
+                    gp = sqrt(max(zero, taup))
+                    gm = sqrt(max(zero, taum))
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+                    ! The derivatives of the additional diffusion term are an
+                    ! approximation to avoid numerical difficulties near a
+                    ! viscous wall. You can prove analytically that due to the
+                    ! implicit treatment of the BC's two singular terms cancel,
+                    ! but numerically this leads to difficulties. Therefore the
+                    ! assumption is made that gp == gm in the implicit part.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i - 1, j, k, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i + 1, j, k, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i - 1, j, k, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i + 1, j, k, itu2) &
+                                      - nui * (taup * sp2 + taum * sm2 - two * gp * gm * spm)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m + half * nui * (sp2 - spm)
+                    c2 = c20 + half * nui * (sp2 + sm2 - two * spm)
+                    d2 = -c2p + half * nui * (sm2 - spm)
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (i == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmti1(j, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmti1(j, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmti1(j, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmti1(j, k, itu2, itu2), zero)
+                    else if (i == il) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmti2(j, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmti2(j, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmti2(j, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmti2(j, k, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+
+        ! Multiply the residual by the volume and store this in dw; this
+        ! is done for monitoring reasons only. The multiplication with the
+        ! volume is present to be consistent with the flow residuals; also
+        ! the negative value is taken, again to be consistent with the
+        ! flow equations. Also multiply by iblank so that no updates occur
+        ! in holes or the overset boundary.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    rblank = real(iblank(i, j, k), realType)
+                    dw(i, j, k, itu1) = -volRef(i, j, k) * dvt(i, j, k, 1) * rblank
+                    dw(i, j, k, itu2) = -volRef(i, j, k) * dvt(i, j, k, 2) * rblank
+                end do
+            end do
+        end do
+
+        ! Initialize the wall function flags to .false.
+
+        flagI2 = .false.
+        flagIl = .false.
+        flagJ2 = .false.
+        flagJl = .false.
+        flagK2 = .false.
+        flagKl = .false.
+
+        ! Modify the rhs of the 1st internal cell, if wall functions
+        ! are used; their value is determined by the table.
+
+        testWallFunctions: if (wallFunctions) then
+
+            bocos: do nn = 1, nViscBocos
+
+                ! Determine the block face on which the subface is located
+                ! and set some variables. As flag points to the entire array
+                ! flagI2, etc., its starting indices are the starting indices
+                ! of its target and not 1.
+
+                select case (BCFaceID(nn))
+                case (iMin)
+                    flag => flagI2
+                    ddw => dw(2, 1:, 1:, 1:); ddvt => dvt(2, 1:, 1:, 1:)
+                    ww => w(2, 1:, 1:, 1:); rrlv => rlv(2, 1:, 1:)
+                    dd2Wall => d2Wall(2, :, :)
+
+                case (iMax)
+                    flag => flagIl
+                    ddw => dw(il, 1:, 1:, 1:); ddvt => dvt(il, 1:, 1:, 1:)
+                    ww => w(il, 1:, 1:, 1:); rrlv => rlv(il, 1:, 1:)
+                    dd2Wall => d2Wall(il, :, :)
+
+                case (jMin)
+                    flag => flagJ2
+                    ddw => dw(1:, 2, 1:, 1:); ddvt => dvt(1:, 2, 1:, 1:)
+                    ww => w(1:, 2, 1:, 1:); rrlv => rlv(1:, 2, 1:)
+                    dd2Wall => d2Wall(:, 2, :)
+
+                case (jMax)
+                    flag => flagJl
+                    ddw => dw(1:, jl, 1:, 1:); ddvt => dvt(1:, jl, 1:, 1:)
+                    ww => w(1:, jl, 1:, 1:); rrlv => rlv(1:, jl, 1:)
+                    dd2Wall => d2Wall(:, jl, :)
+
+                case (kMin)
+                    flag => flagK2
+                    ddw => dw(1:, 1:, 2, 1:); ddvt => dvt(1:, 1:, 2, 1:)
+                    ww => w(1:, 1:, 2, 1:); rrlv => rlv(1:, 1:, 2)
+                    dd2Wall => d2Wall(:, :, 2)
+
+                case (kMax)
+                    flag => flagKl
+                    ddw => dw(1:, 1:, kl, 1:); ddvt => dvt(1:, 1:, kl, 1:)
+                    ww => w(1:, 1:, kl, 1:); rrlv => rlv(1:, 1:, kl)
+                    dd2Wall => d2Wall(:, :, kl)
+
+                end select
+
+                ! Loop over the owned faces of this subface. Therefore the
+                ! nodal range of BCData must be used. The offset of +1 is
+                ! present, because the starting index of the cell range is
+                ! 1 larger than the starting index of the nodal range.
+
+                do j = (BCData(nn)%jnBeg + 1), BCData(nn)%jnEnd
+                    do i = (BCData(nn)%inBeg + 1), BCData(nn)%inEnd
+
+                        ! Set ddw to zero.
+
+                        ddw(i, j, itu1) = zero
+                        ddw(i, j, itu2) = zero
+
+                        ! Enforce k and tau in the 1st internal cell from
+                        ! the wall function table. There is an offset of -1 in
+                        ! the wall distance. Note that the offset compared to
+                        ! the current value must be stored. Also note that the
+                        ! curve fits contain the non-dimensional values.
+
+                        utau = viscSubface(nn)%utau(i, j)
+                        yp = ww(i, j, irho) * dd2Wall(i - 1, j - 1) * utau / rrlv(i, j)
+
+                        call curveTupYp(tup, yp, itu1, itu2)
+
+                        tup(itu1) = tup(itu1) * utau**2
+                        tup(itu2) = tup(itu2) * rrlv(i, j) / (ww(i, j, irho) * utau**2)
+
+                        ddvt(i, j, 1) = tup(itu1) - ww(i, j, itu1)
+                        ddvt(i, j, 2) = tup(itu2) - ww(i, j, itu2)
+
+                        ! Set the wall flag to .true.
+
+                        flag(i, j) = .true.
+
+                    end do
+                end do
+
+            end do bocos
+        end if testWallFunctions
+
+        ! Return if only the residual must be computed.
+
+        if (resOnly) return
+
+        ! For implicit relaxation take the local time step into account,
+        ! where dt is the inverse of the central jacobian times the cfl
+        ! number. The following system is solved:
+        ! (I/dt + cc + bb + dd)*dw = rhs, in which I/dt = cc/cfl. As in
+        ! the rest of the algorithm only the modified central jacobian is
+        ! used, stored it now.
+
+        ! Compute the factor multiplying the central jacobian, which
+        ! is 1 + 1/cfl (implicit relaxation only).
+
+        factor = one
+        if (turbRelax == turbRelaxImplicit) &
+            factor = one + (one - alfaTurb) / alfaTurb
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    qq(i, j, k, 1, 1) = factor * qq(i, j, k, 1, 1)
+                    qq(i, j, k, 1, 2) = factor * qq(i, j, k, 1, 2)
+                    qq(i, j, k, 2, 1) = factor * qq(i, j, k, 2, 1)
+                    qq(i, j, k, 2, 2) = factor * qq(i, j, k, 2, 2)
+
+                    ! Set qq to 1 if the value is determined by the table.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        qq(i, j, k, 1, 1) = one
+                        qq(i, j, k, 2, 2) = one
+                        qq(i, j, k, 1, 2) = zero
+                        qq(i, j, k, 2, 1) = zero
+                    end if
+                end do
+            end do
+        end do
+
+        ! Initialize the grid velocity to zero. This value will be used
+        ! if the block is not moving.
+
+        qs = zero
+        !
+        !       dd-ADI step in j-direction. There is no particular reason to
+        !       start in j-direction, it just happened to be so. As we solve
+        !       in j-direction, the j-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do i = 2, il
+                do j = 2, jl
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the j-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
+
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in j-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j - 1, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j + 1, k) + rlv(i, j, k))
+                    muem = half * (rev(i, j - 1, k) + rev(i, j, k))
+                    muep = half * (rev(i, j + 1, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+
+                    ! Terms due to the additional diffusion term in j-direction.
+
+                    nui = eight * (rlv(i, j, k) + sig2 * rev(i, j, k)) * rhoi
+                    voli2 = voli * voli
+
+                    sp2 = voli2 * (sj(i, j, k, 1)**2 + sj(i, j, k, 2)**2 &
+                                   + sj(i, j, k, 3)**2)
+                    sm2 = voli2 * (sj(i, j - 1, k, 1)**2 + sj(i, j - 1, k, 2)**2 &
+                                   + sj(i, j - 1, k, 3)**2)
+                    spm = voli2 * (sj(i, j, k, 1) * sj(i, j - 1, k, 1) &
+                                   + sj(i, j, k, 2) * sj(i, j - 1, k, 2) &
+                                   + sj(i, j, k, 3) * sj(i, j - 1, k, 3))
+
+                    ! Store the off-diagonal terms.
+
+                    bb(1, j) = -c1m
+                    dd(1, j) = -c1p
+                    bb(2, j) = -c2m + half * nui * (sp2 - spm)
+                    dd(2, j) = -c2p + half * nui * (sm2 - spm)
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of j and j-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceJ(i, j, k) + sFaceJ(i, j - 1, k)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! j-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, j) = bb(1, j) - up
+                    dd(1, j) = dd(1, j) + um
+                    bb(2, j) = bb(2, j) - up
+                    dd(2, j) = dd(2, j) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, j) = qq(i, j, k, 1, 1)
+                    cc(1, 2, j) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, j) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, j) = qq(i, j, k, 2, 2)
+
+                    ff(1, j) = dvt(i, j, k, 1) * rblank
+                    ff(2, j) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, j) = bb(:, j) * rblank
+                    dd(:, j) = dd(:, j) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, j) = zero
+                        dd(1, j) = zero
+                        bb(2, j) = zero
+                        dd(2, j) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in j-direction.
+
+                call tdia3(2_intType, jl, bb, cc, dd, ff)
+
+                ! Determine the new rhs for the next direction.
+
+                do j = 2, jl
+                    dvt(i, j, k, 1) = qq(i, j, k, 1, 1) * ff(1, j) + qq(i, j, k, 1, 2) * ff(2, j)
+                    dvt(i, j, k, 2) = qq(i, j, k, 2, 1) * ff(1, j) + qq(i, j, k, 2, 2) * ff(2, j)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-ADI step in i-direction. As we solve in i-direction, the
+        !       i-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
 
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the i-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the i-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
 
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in i-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i-1,j,k) + rlv(i,j,k))
-             mulp = half*(rlv(i+1,j,k) + rlv(i,j,k))
-             muem = half*(rev(i-1,j,k) + rev(i,j,k))
-             muep = half*(rev(i+1,j,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-
-             ! Terms due to the additional diffusion term in j-direction.
-
-             nui   = eight*(rlv(i,j,k) + sig2*rev(i,j,k))*rhoi
-             voli2 = voli*voli
-
-             sp2 = voli2*(si(i,j,k,1)**2 + si(i,j,k,2)**2 &
-                  +        si(i,j,k,3)**2)
-             sm2 = voli2*(si(i-1,j,k,1)**2 + si(i-1,j,k,2)**2 &
-                  +        si(i-1,j,k,3)**2)
-             spm = voli2*(si(i,j,k,1)*si(i-1,j,k,1) &
-                  +        si(i,j,k,2)*si(i-1,j,k,2) &
-                  +        si(i,j,k,3)*si(i-1,j,k,3))
-
-             ! Store the off-diagonal terms.
-
-             bb(1,i) = -c1m
-             dd(1,i) = -c1p
-             bb(2,i) = -c2m + half*nui*(sp2 - spm)
-             dd(2,i) = -c2p + half*nui*(sm2 - spm)
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of i and i-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceI(i,j,k) + sFaceI(i-1,j,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! i-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,i) = bb(1,i) - up
-             dd(1,i) = dd(1,i) + um
-             bb(2,i) = bb(2,i) - up
-             dd(2,i) = dd(2,i) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,i) = qq(i,j,k,1,1)
-             cc(1,2,i) = qq(i,j,k,1,2)*rblank
-             cc(2,1,i) = qq(i,j,k,2,1)*rblank
-             cc(2,2,i) = qq(i,j,k,2,2)
-
-             ff(1,i) = dvt(i,j,k,1)*rblank
-             ff(2,i) = dvt(i,j,k,2)*rblank
-
-             bb(:,i) = bb(:,i)*rblank
-             dd(:,i) = dd(:,i)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,i) = zero
-                dd(1,i) = zero
-                bb(2,i) = zero
-                dd(2,i) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in i-direction.
-
-          call tdia3(2_intType, il, bb, cc, dd, ff)
-
-          ! Determine the new rhs for the next direction.
-
-          do i=2,il
-             dvt(i,j,k,1) = qq(i,j,k,1,1)*ff(1,i) + qq(i,j,k,1,2)*ff(2,i)
-             dvt(i,j,k,2) = qq(i,j,k,2,1)*ff(1,i) + qq(i,j,k,2,2)*ff(2,i)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-ADI step in k-direction. As we solve in k-direction, the
-    !       k-loop is the innermost loop.
-    !
-    do j=2,jl
-       do i=2,il
-          do k=2,kl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the k-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in k-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j,k-1) + rlv(i,j,k))
-             mulp = half*(rlv(i,j,k+1) + rlv(i,j,k))
-             muem = half*(rev(i,j,k-1) + rev(i,j,k))
-             muep = half*(rev(i,j,k+1) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-
-             ! Terms due to the additional diffusion term in j-direction.
-
-             nui   = eight*(rlv(i,j,k) + sig2*rev(i,j,k))*rhoi
-             voli2 = voli*voli
-
-             sp2 = voli2*(sk(i,j,k,1)**2 + sk(i,j,k,2)**2 &
-                  +        sk(i,j,k,3)**2)
-             sm2 = voli2*(sk(i,j,k-1,1)**2 + sk(i,j,k-1,2)**2 &
-                  +        sk(i,j,k-1,3)**2)
-             spm = voli2*(sk(i,j,k,1)*sk(i,j,k-1,1) &
-                  +        sk(i,j,k,2)*sk(i,j,k-1,2) &
-                  +        sk(i,j,k,3)*sk(i,j,k-1,3))
-
-             ! Store the off-diagonal terms.
-
-             bb(1,k) = -c1m
-             dd(1,k) = -c1p
-             bb(2,k) = -c2m + half*nui*(sp2 - spm)
-             dd(2,k) = -c2p + half*nui*(sm2 - spm)
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of k and k-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceK(i,j,k) + sFaceK(i,j,k-1))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! k-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,k) = bb(1,k) - up
-             dd(1,k) = dd(1,k) + um
-             bb(2,k) = bb(2,k) - up
-             dd(2,k) = dd(2,k) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,k) = qq(i,j,k,1,1)
-             cc(1,2,k) = qq(i,j,k,1,2)*rblank
-             cc(2,1,k) = qq(i,j,k,2,1)*rblank
-             cc(2,2,k) = qq(i,j,k,2,2)
-
-             ff(1,k) = dvt(i,j,k,1)*rblank
-             ff(2,k) = dvt(i,j,k,2)*rblank
-
-             bb(:,k) = bb(:,k)*rblank
-             dd(:,k) = dd(:,k)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,k) = zero
-                dd(1,k) = zero
-                bb(2,k) = zero
-                dd(2,k) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in k-direction.
-
-          call tdia3(2_intType, kl, bb, cc, dd, ff)
-
-          ! Store the update in dvt.
-
-          do k=2,kl
-             dvt(i,j,k,1) = ff(1,k)
-             dvt(i,j,k,2) = ff(2,k)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       Update the turbulent variables. For explicit relaxation the
-    !       update must be relaxed; for implicit relaxation this has been
-    !       done via the time step.
-    !
-    factor = one
-    if(turbRelax == turbRelaxExplicit) factor = alfaTurb
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             w(i,j,k,itu1) = w(i,j,k,itu1) + factor*dvt(i,j,k,1)
-             w(i,j,k,itu1) = max(w(i,j,k,itu1), zero)
-
-             w(i,j,k,itu2) = w(i,j,k,itu2) + factor*dvt(i,j,k,2)
-             w(i,j,k,itu2) = max(w(i,j,k,itu2), 1.e-10_realType*wInf(itu2))
-          enddo
-       enddo
-    enddo
-
-  end subroutine ktSolve
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in i-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i - 1, j, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i + 1, j, k) + rlv(i, j, k))
+                    muem = half * (rev(i - 1, j, k) + rev(i, j, k))
+                    muep = half * (rev(i + 1, j, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+
+                    ! Terms due to the additional diffusion term in j-direction.
+
+                    nui = eight * (rlv(i, j, k) + sig2 * rev(i, j, k)) * rhoi
+                    voli2 = voli * voli
+
+                    sp2 = voli2 * (si(i, j, k, 1)**2 + si(i, j, k, 2)**2 &
+                                   + si(i, j, k, 3)**2)
+                    sm2 = voli2 * (si(i - 1, j, k, 1)**2 + si(i - 1, j, k, 2)**2 &
+                                   + si(i - 1, j, k, 3)**2)
+                    spm = voli2 * (si(i, j, k, 1) * si(i - 1, j, k, 1) &
+                                   + si(i, j, k, 2) * si(i - 1, j, k, 2) &
+                                   + si(i, j, k, 3) * si(i - 1, j, k, 3))
+
+                    ! Store the off-diagonal terms.
+
+                    bb(1, i) = -c1m
+                    dd(1, i) = -c1p
+                    bb(2, i) = -c2m + half * nui * (sp2 - spm)
+                    dd(2, i) = -c2p + half * nui * (sm2 - spm)
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of i and i-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceI(i, j, k) + sFaceI(i - 1, j, k)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! i-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, i) = bb(1, i) - up
+                    dd(1, i) = dd(1, i) + um
+                    bb(2, i) = bb(2, i) - up
+                    dd(2, i) = dd(2, i) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, i) = qq(i, j, k, 1, 1)
+                    cc(1, 2, i) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, i) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, i) = qq(i, j, k, 2, 2)
+
+                    ff(1, i) = dvt(i, j, k, 1) * rblank
+                    ff(2, i) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, i) = bb(:, i) * rblank
+                    dd(:, i) = dd(:, i) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, i) = zero
+                        dd(1, i) = zero
+                        bb(2, i) = zero
+                        dd(2, i) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in i-direction.
+
+                call tdia3(2_intType, il, bb, cc, dd, ff)
+
+                ! Determine the new rhs for the next direction.
+
+                do i = 2, il
+                    dvt(i, j, k, 1) = qq(i, j, k, 1, 1) * ff(1, i) + qq(i, j, k, 1, 2) * ff(2, i)
+                    dvt(i, j, k, 2) = qq(i, j, k, 2, 1) * ff(1, i) + qq(i, j, k, 2, 2) * ff(2, i)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-ADI step in k-direction. As we solve in k-direction, the
+        !       k-loop is the innermost loop.
+        !
+        do j = 2, jl
+            do i = 2, il
+                do k = 2, kl
 
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the k-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
 
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in k-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j, k - 1) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j, k + 1) + rlv(i, j, k))
+                    muem = half * (rev(i, j, k - 1) + rev(i, j, k))
+                    muep = half * (rev(i, j, k + 1) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+
+                    ! Terms due to the additional diffusion term in j-direction.
+
+                    nui = eight * (rlv(i, j, k) + sig2 * rev(i, j, k)) * rhoi
+                    voli2 = voli * voli
+
+                    sp2 = voli2 * (sk(i, j, k, 1)**2 + sk(i, j, k, 2)**2 &
+                                   + sk(i, j, k, 3)**2)
+                    sm2 = voli2 * (sk(i, j, k - 1, 1)**2 + sk(i, j, k - 1, 2)**2 &
+                                   + sk(i, j, k - 1, 3)**2)
+                    spm = voli2 * (sk(i, j, k, 1) * sk(i, j, k - 1, 1) &
+                                   + sk(i, j, k, 2) * sk(i, j, k - 1, 2) &
+                                   + sk(i, j, k, 3) * sk(i, j, k - 1, 3))
+
+                    ! Store the off-diagonal terms.
+
+                    bb(1, k) = -c1m
+                    dd(1, k) = -c1p
+                    bb(2, k) = -c2m + half * nui * (sp2 - spm)
+                    dd(2, k) = -c2p + half * nui * (sm2 - spm)
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of k and k-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceK(i, j, k) + sFaceK(i, j, k - 1)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! k-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, k) = bb(1, k) - up
+                    dd(1, k) = dd(1, k) + um
+                    bb(2, k) = bb(2, k) - up
+                    dd(2, k) = dd(2, k) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, k) = qq(i, j, k, 1, 1)
+                    cc(1, 2, k) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, k) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, k) = qq(i, j, k, 2, 2)
+
+                    ff(1, k) = dvt(i, j, k, 1) * rblank
+                    ff(2, k) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, k) = bb(:, k) * rblank
+                    dd(:, k) = dd(:, k) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, k) = zero
+                        dd(1, k) = zero
+                        bb(2, k) = zero
+                        dd(2, k) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in k-direction.
+
+                call tdia3(2_intType, kl, bb, cc, dd, ff)
+
+                ! Store the update in dvt.
+
+                do k = 2, kl
+                    dvt(i, j, k, 1) = ff(1, k)
+                    dvt(i, j, k, 2) = ff(2, k)
+                end do
+
+            end do
+        end do
+        !
+        !       Update the turbulent variables. For explicit relaxation the
+        !       update must be relaxed; for implicit relaxation this has been
+        !       done via the time step.
+        !
+        factor = one
+        if (turbRelax == turbRelaxExplicit) factor = alfaTurb
 
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    w(i, j, k, itu1) = w(i, j, k, itu1) + factor * dvt(i, j, k, 1)
+                    w(i, j, k, itu1) = max(w(i, j, k, itu1), zero)
+
+                    w(i, j, k, itu2) = w(i, j, k, itu2) + factor * dvt(i, j, k, 2)
+                    w(i, j, k, itu2) = max(w(i, j, k, itu2), 1.e-10_realType * wInf(itu2))
+                end do
+            end do
+        end do
+
+    end subroutine ktSolve
 
 end module kt
diff --git a/src/turbulence/kw.F90 b/src/turbulence/kw.F90
index 0135f1059..f2d8e8285 100644
--- a/src/turbulence/kw.F90
+++ b/src/turbulence/kw.F90
@@ -2,1036 +2,1036 @@ module kw
 
 contains
 
-  subroutine kw_block(resOnly)
-    !
-    !       kw solves the transport equations for the standard and
-    !       modified k-omega turbulence models in a decoupled manner
-    !       using a diagonal dominant adi-scheme.
-    !
-    use constants
-    use blockPointers, only : il, jl, kl
-    use inputTimeSpectral
-    use iteration
-    use utils, only : setPointers
-    use turbUtils, only : kwEddyViscosity
-    use turbBCRoutines, only : bcTurbTreatment, applyAllTurbBCThisBlock
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    logical, intent(in) :: resOnly
-
-    ! Set the arrays for the boundary condition treatment.
-
-    call bcTurbTreatment
-
-    ! Solve the transport equations for k and omega.
-
-    call kwSolve(resOnly)
-
-    ! The eddy viscosity and the boundary conditions are only
-    ! applied if an actual update has been computed in kwSolve.
-
-    if(.not. resOnly ) then
-
-       ! Compute the corresponding eddy viscosity.
-
-       call kwEddyViscosity(2, il, 2, jl, 2, kl)
-
-       ! Set the halo values for the turbulent variables.
-       ! We are on the finest mesh, so the second layer of halo
-       ! cells must be computed as well.
-
-       call applyAllTurbBCThisBlock(.true.)
-
-    endif
-
-  end subroutine kw_block
-
-  subroutine kwSolve(resOnly)
-    !
-    !       kwSolve solves the k-omega transport equations of both
-    !       the original and modified k-omega models
-    !       in a coupled manner using a diagonal dominant ADI-scheme.
-    !
-    use blockPointers
-    use constants
-    use flowVarRefState
-    use inputIteration
-    use inputPhysics
-    use paramTurb
-    use turbMod, only : prod, dvt, sct, kwCD, sig1, sig2, vort
-    use turbUtils, only : prodSmag2, prodWmag2, prodKatoLaunder, &
-         turbAdvection, unsteadyTurbTerm, tdia3, kwCDterm
-    use turbCurveFits, only : curveTupYp
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: resOnly
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, nn
-
-    real(kind=realType) :: rkwGam1
-    real(kind=realType) :: rhoi, ss, spk, sdk
-    real(kind=realType) :: voli, volmi, volpi
-    real(kind=realType) :: xm, ym, zm, xp, yp, zp, xa, ya, za
-    real(kind=realType) :: ttm, ttp, mulm, mulp, muem, muep
-    real(kind=realType) :: c1m, c1p, c10, c2m, c2p, c20
-    real(kind=realType) :: b1, b2, c1, c2, d1, d2
-    real(kind=realType) :: qs, uu, um, up, factor, utau, rblank
-
-    real(kind=realType), dimension(itu1:itu2) :: tup
-
-    real(kind=realType), dimension(2:il,2:jl,2:kl,2,2)  :: qq
-    real(kind=realType), dimension(2,2:max(il,jl,kl))   :: bb, dd, ff
-    real(kind=realType), dimension(2,2,2:max(il,jl,kl)) :: cc
-
-    real(kind=realType), dimension(:,:,:), pointer :: ddw, ww, ddvt
-    real(kind=realType), dimension(:,:),   pointer :: rrlv
-    real(kind=realType), dimension(:,:),   pointer :: dd2Wall
-
-    logical, dimension(2:jl,2:kl), target :: flagI2, flagIl
-    logical, dimension(2:il,2:kl), target :: flagJ2, flagJl
-    logical, dimension(2:il,2:jl), target :: flagK2, flagKl
-
-    logical, dimension(:,:), pointer :: flag
-
-    ! Set model constants
-
-    rkwGam1  = rkwBeta1/rkwBetas - rkwSigw1*rkwK*rkwK/sqrt(rkwBetas)
-    sig1     = rkwSigk1
-    sig2     = rkwSigw1
-
-    ! Set the pointer for dvt in dw, such that the code is more
-    ! readable. Also set the pointers for the production term,
-    ! vorticity and the cross diffusion term.
-
-    dvt  => scratch(1:,1:,1:,idvt:)
-    prod => scratch(1:,1:,1:,iprod)
-    vort => prod
-    kwCD => scratch(1:,1:,1:,icd)
-    !
-    !       Production term.
-    !
-    select case (turbProd)
-    case (strain)
-       call prodSmag2
-
-    case (vorticity)
-       call prodWmag2
-
-    case (katoLaunder)
-       call prodKatoLaunder
-
-    end select
-    !
-    !       Source terms.
-    !       Determine the source term and its derivative w.r.t. k and
-    !       omega for all internal cells of the block.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the source terms for both the k and the omega
-             ! equation. Note that prod contains the unscaled production
-             ! term. Furthermore the production term of k is limited to
-             ! a certain times the destruction term.
-
-             rhoi = one/w(i,j,k,irho)
-             ss   = prod(i,j,k)
-             spk  = rev(i,j,k)*ss*rhoi
-             sdk  = rkwBetas*w(i,j,k,itu1)*w(i,j,k,itu2)
-             spk  = min(spk, pklim*sdk)
-
-             dvt(i,j,k,1) = spk - sdk
-             dvt(i,j,k,2) = rkwGam1*ss - rkwBeta1*w(i,j,k,itu2)**2
-
-             ! Compute the source term jacobian. Note that only the
-             ! destruction terms are linearized to increase the diagonal
-             ! dominance of the matrix. Furthermore minus the source
-             ! term jacobian is stored.
-
-             qq(i,j,k,1,1) = rkwBetas*w(i,j,k,itu2)
-             ! qq(i,j,k,1,2) = rkwBetas*w(i,j,k,itu1)
-             qq(i,j,k,1,2) = zero
-             qq(i,j,k,2,1) = zero
-             qq(i,j,k,2,2) = two*rkwBeta1*w(i,j,k,itu2)
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Compute the cross-diffusion term for the modified version of
-    !       the k-omega model. It should cure the free-stream dependency
-    !       of the original model.
-    !
-    if(turbModel == komegaModified) then
-
-       ! Compute the cross diffusion term.
-
-       call kwCDterm
-
-       ! Multiply it with the correct constant and take it only into
-       ! account if it is positive.
-
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-                dvt(i,j,k,2) = dvt(i,j,k,2) &
-                     + rkwSigd1*max(zero, kwCD(i,j,k))
-             enddo
-          enddo
-       enddo
-
-    endif
-    !
-    !       Advection and unsteady terms.
-    !
-    nn = itu1 - 1
-    call turbAdvection(2_intType, 2_intType, nn, qq)
-
-    call unsteadyTurbTerm(2_intType, 2_intType, nn, qq)
-    !
-    !       Viscous terms in k-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in zeta-direction, i.e. along the
-             ! line k = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in zeta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in zeta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/dzeta derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and omega.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j,k-1) + rlv(i,j,k))
-             mulp = half*(rlv(i,j,k+1) + rlv(i,j,k))
-             muem = half*(rev(i,j,k-1) + rev(i,j,k))
-             muep = half*(rev(i,j,k+1) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i,j,k-1,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i,j,k+1,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i,j,k-1,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i,j,k+1,itu2)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(k == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmtk1(i,j,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmtk1(i,j,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmtk1(i,j,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmtk1(i,j,itu2,itu2),zero)
-             else if(k == kl) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmtk2(i,j,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmtk2(i,j,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmtk2(i,j,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmtk2(i,j,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Viscous terms in j-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in eta-direction, i.e. along the
-             ! line j = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in eta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in eta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/deta derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and omega.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j-1,k) + rlv(i,j,k))
-             mulp = half*(rlv(i,j+1,k) + rlv(i,j,k))
-             muem = half*(rev(i,j-1,k) + rev(i,j,k))
-             muep = half*(rev(i,j+1,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i,j-1,k,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i,j+1,k,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i,j-1,k,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i,j+1,k,itu2)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(j == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmtj1(i,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmtj1(i,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmtj1(i,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmtj1(i,k,itu2,itu2),zero)
-             else if(j == jl) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmtj2(i,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmtj2(i,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmtj2(i,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmtj2(i,k,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Viscous terms in i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in xi-direction, i.e. along the
-             ! line i = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
-
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in xi-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in xi-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/dxi derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and omega.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i-1,j,k) + rlv(i,j,k))
-             mulp = half*(rlv(i+1,j,k) + rlv(i,j,k))
-             muem = half*(rev(i-1,j,k) + rev(i,j,k))
-             muep = half*(rev(i+1,j,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i-1,j,k,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i+1,j,k,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i-1,j,k,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i+1,j,k,itu2)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(i == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmti1(j,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmti1(j,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmti1(j,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmti1(j,k,itu2,itu2),zero)
-             else if(i == il) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmti2(j,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmti2(j,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmti2(j,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmti2(j,k,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-
-    ! Multiply the residual by the volume and store this in dw; this
-    ! is done for monitoring reasons only. The multiplication with the
-    ! volume is present to be consistent with the flow residuals; also
-    ! the negative value is taken, again to be consistent with the
-    ! flow equations. Also multiply by iblank so that no updates occur
-    ! in holes or the overset boundary.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             rblank = real(iblank(i,j,k), realType)
-             dw(i,j,k,itu1) = -volRef(i,j,k)*dvt(i,j,k,1)*rblank
-             dw(i,j,k,itu2) = -volRef(i,j,k)*dvt(i,j,k,2)*rblank
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the wall function flags to .false.
-
-    flagI2 = .false.
-    flagIl = .false.
-    flagJ2 = .false.
-    flagJl = .false.
-    flagK2 = .false.
-    flagKl = .false.
-
-    ! Modify the rhs of the 1st internal cell, if wall functions
-    ! are used; their value is determined by the table.
-
-    testWallFunctions: if( wallFunctions ) then
-
-       bocos: do nn=1,nViscBocos
-
-          ! Determine the block face on which the subface is located
-          ! and set some variables. As flag points to the entire array
-          ! flagI2, etc., its starting indices are the starting indices
-          ! of its target and not 1.
-
-          select case (BCFaceID(nn))
-          case (iMin)
-             flag    => flagI2
-             ddw     => dw(2,1:,1:,1:);  ddvt => dvt(2,1:,1:,1:)
-             ww      => w(2,1:,1:,1:);   rrlv => rlv(2,1:,1:)
-             dd2Wall => d2Wall(2,:,:)
-
-          case (iMax)
-             flag    => flagIl
-             ddw     => dw(il,1:,1:,1:);  ddvt => dvt(il,1:,1:,1:)
-             ww      => w(il,1:,1:,1:);   rrlv => rlv(il,1:,1:)
-             dd2Wall => d2Wall(il,:,:)
-
-          case (jMin)
-             flag    => flagJ2
-             ddw     => dw(1:,2,1:,1:);  ddvt => dvt(1:,2,1:,1:)
-             ww      => w(1:,2,1:,1:);   rrlv => rlv(1:,2,1:)
-             dd2Wall => d2Wall(:,2,:)
-
-          case (jMax)
-             flag    => flagJl
-             ddw     => dw(1:,jl,1:,1:);  ddvt => dvt(1:,jl,1:,1:)
-             ww      => w(1:,jl,1:,1:);   rrlv => rlv(1:,jl,1:)
-             dd2Wall => d2Wall(:,jl,:)
-
-          case (kMin)
-             flag    => flagK2
-             ddw     => dw(1:,1:,2,1:);  ddvt => dvt(1:,1:,2,1:)
-             ww      => w(1:,1:,2,1:);   rrlv => rlv(1:,1:,2)
-             dd2Wall => d2Wall(:,:,2)
-
-          case (kMax)
-             flag    => flagKl
-             ddw     => dw(1:,1:,kl,1:);  ddvt => dvt(1:,1:,kl,1:)
-             ww      => w(1:,1:,kl,1:);   rrlv => rlv(1:,1:,kl)
-             dd2Wall => d2Wall(:,:,kl)
-
-          end select
-
-          ! Loop over the owned faces of this subface. Therefore the
-          ! nodal range of BCData must be used. The offset of +1 is
-          ! present, because the starting index of the cell range is
-          ! 1 larger than the starting index of the nodal range.
-
-          do j=(BCData(nn)%jnBeg+1),BCData(nn)%jnEnd
-             do i=(BCData(nn)%inBeg+1),BCData(nn)%inEnd
-
-                ! Set ddw to zero for proper monitoring of the
-                ! convergence.
-
-                ddw(i,j,itu1) = zero
-                ddw(i,j,itu2) = zero
-
-                ! Enforce k and omega in the 1st internal cell from
-                ! the wall function table. There is an offset of -1 in
-                ! the wall distance. Note that the offset compared to
-                ! the current value must be stored. Also note that the
-                ! curve fits contain the non-dimensional values.
-
-                utau = viscSubface(nn)%utau(i,j)
-                yp = ww(i,j,irho)*dd2Wall(i-1,j-1)*utau/rrlv(i,j)
-
-                call curveTupYp(tup, yp, itu1, itu2)
-
-                tup(itu1) = tup(itu1)*utau**2
-                tup(itu2) = tup(itu2)*utau**2/rrlv(i,j)*ww(i,j,irho)
-
-                ddvt(i,j,1) = tup(itu1) - ww(i,j,itu1)
-                ddvt(i,j,2) = tup(itu2) - ww(i,j,itu2)
-
-                ! Set the wall flag to .true.
-
-                flag(i,j) = .true.
-
-             enddo
-          enddo
-
-       enddo bocos
-    endif testWallFunctions
-
-    ! Return if only the residual must be computed.
-
-    if( resOnly ) return
-
-    ! For implicit relaxation take the local time step into account,
-    ! where dt is the inverse of the central jacobian times the cfl
-    ! number. The following system is solved:
-    ! (i/dt + cc + bb + dd)*dw = rhs, in which i/dt = cc/cfl. As in
-    ! the rest of the algorithm only the modified central jacobian is
-    ! used, stored it now.
-
-    ! Compute the factor multiplying the central jacobian, which
-    ! is 1 + 1/cfl (implicit relaxation only).
-
-    factor = one
-    if(turbRelax == turbRelaxImplicit) &
-         factor = one + (one-alfaTurb)/alfaTurb
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             qq(i,j,k,1,1) = factor*qq(i,j,k,1,1)
-             qq(i,j,k,1,2) = factor*qq(i,j,k,1,2)
-             qq(i,j,k,2,1) = factor*qq(i,j,k,2,1)
-             qq(i,j,k,2,2) = factor*qq(i,j,k,2,2)
-
-             ! Set qq to 1 if the value is determined by the table.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                qq(i,j,k,1,1) = one
-                qq(i,j,k,1,2) = zero
-                qq(i,j,k,2,1) = zero
-                qq(i,j,k,2,2) = one
-             endif
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the grid velocity to zero. This value will be used
-    ! if the block is not moving.
-
-    qs = zero
-    !
-    !       dd-ADI step in j-direction. There is no particular reason to
-    !       start in j-direction, it just happened to be so. As we solve
-    !       in j-direction, the j-loop is the innermost loop.
-    !
-    do k=2,kl
-       do i=2,il
-          do j=2,jl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the j-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in j-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j-1,k) + rlv(i,j,k))
-             mulp = half*(rlv(i,j+1,k) + rlv(i,j,k))
-             muem = half*(rev(i,j-1,k) + rev(i,j,k))
-             muep = half*(rev(i,j+1,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-
-             bb(1,j) = -c1m
-             dd(1,j) = -c1p
-             bb(2,j) = -c2m
-             dd(2,j) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of j and j-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceJ(i,j,k) + sFaceJ(i,j-1,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! j-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,j) = bb(1,j) - up
-             dd(1,j) = dd(1,j) + um
-             bb(2,j) = bb(2,j) - up
-             dd(2,j) = dd(2,j) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,j) = qq(i,j,k,1,1)
-             cc(1,2,j) = qq(i,j,k,1,2)*rblank
-             cc(2,1,j) = qq(i,j,k,2,1)*rblank
-             cc(2,2,j) = qq(i,j,k,2,2)
-
-             ff(1,j) = dvt(i,j,k,1)*rblank
-             ff(2,j) = dvt(i,j,k,2)*rblank
-
-             bb(:,j) = bb(:,j)*rblank
-             dd(:,j) = dd(:,j)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,j) = zero
-                dd(1,j) = zero
-                bb(2,j) = zero
-                dd(2,j) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in j-direction.
-
-          call tdia3(2_intType, jl, bb, cc, dd, ff)
-
-          ! Determine the new rhs for the next direction.
-
-          do j=2,jl
-             dvt(i,j,k,1) = qq(i,j,k,1,1)*ff(1,j) + qq(i,j,k,1,2)*ff(2,j)
-             dvt(i,j,k,2) = qq(i,j,k,2,1)*ff(1,j) + qq(i,j,k,2,2)*ff(2,j)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-ADI step in i-direction. As we solve in i-direction, the
-    !       i-loop is the innermost loop.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the i-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
-
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in i-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i-1,j,k) + rlv(i,j,k))
-             mulp = half*(rlv(i+1,j,k) + rlv(i,j,k))
-             muem = half*(rev(i-1,j,k) + rev(i,j,k))
-             muep = half*(rev(i+1,j,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-
-             bb(1,i) = -c1m
-             dd(1,i) = -c1p
-             bb(2,i) = -c2m
-             dd(2,i) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of i and i-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceI(i,j,k) + sFaceI(i-1,j,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! i-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,i) = bb(1,i) - up
-             dd(1,i) = dd(1,i) + um
-             bb(2,i) = bb(2,i) - up
-             dd(2,i) = dd(2,i) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,i) = qq(i,j,k,1,1)
-             cc(1,2,i) = qq(i,j,k,1,2)*rblank
-             cc(2,1,i) = qq(i,j,k,2,1)*rblank
-             cc(2,2,i) = qq(i,j,k,2,2)
-
-             ff(1,i) = dvt(i,j,k,1)*rblank
-             ff(2,i) = dvt(i,j,k,2)*rblank
-
-             bb(:,i) = bb(:,i)*rblank
-             dd(:,i) = dd(:,i)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,i) = zero
-                dd(1,i) = zero
-                bb(2,i) = zero
-                dd(2,i) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in i-direction.
-
-          call tdia3(2_intType, il, bb, cc, dd, ff)
-
-          ! Determine the new rhs for the next direction.
-
-          do i=2,il
-             dvt(i,j,k,1) = qq(i,j,k,1,1)*ff(1,i) + qq(i,j,k,1,2)*ff(2,i)
-             dvt(i,j,k,2) = qq(i,j,k,2,1)*ff(1,i) + qq(i,j,k,2,2)*ff(2,i)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-adi step in k-direction. As we solve in k-direction, the
-    !       k-loop is the innermost loop.
-    !
-    do j=2,jl
-       do i=2,il
-          do k=2,kl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the k-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in k-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j,k-1) + rlv(i,j,k))
-             mulp = half*(rlv(i,j,k+1) + rlv(i,j,k))
-             muem = half*(rev(i,j,k-1) + rev(i,j,k))
-             muep = half*(rev(i,j,k+1) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-
-             bb(1,k) = -c1m
-             dd(1,k) = -c1p
-             bb(2,k) = -c2m
-             dd(2,k) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of k and k-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceK(i,j,k) + sFaceK(i,j,k-1))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! k-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,k) = bb(1,k) - up
-             dd(1,k) = dd(1,k) + um
-             bb(2,k) = bb(2,k) - up
-             dd(2,k) = dd(2,k) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,k) = qq(i,j,k,1,1)
-             cc(1,2,k) = qq(i,j,k,1,2)*rblank
-             cc(2,1,k) = qq(i,j,k,2,1)*rblank
-             cc(2,2,k) = qq(i,j,k,2,2)
-
-             ff(1,k) = dvt(i,j,k,1)*rblank
-             ff(2,k) = dvt(i,j,k,2)*rblank
-
-             bb(:,k) = bb(:,k)*rblank
-             dd(:,k) = dd(:,k)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,k) = zero
-                dd(1,k) = zero
-                bb(2,k) = zero
-                dd(2,k) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in k-direction.
-
-          call tdia3(2_intType, kl, bb, cc, dd, ff)
-
-          ! Store the update in dvt.
-
-          do k=2,kl
-             dvt(i,j,k,1) = ff(1,k)
-             dvt(i,j,k,2) = ff(2,k)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       Update the turbulent variables. For explicit relaxation the
-    !       update must be relaxed; for implicit relaxation this has been
-    !       done via the time step.
-    !
-    factor = one
-    if(turbRelax == turbRelaxExplicit) factor = alfaTurb
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             w(i,j,k,itu1) = w(i,j,k,itu1) + factor*dvt(i,j,k,1)
-             w(i,j,k,itu1) = max(w(i,j,k,itu1), zero)
-
-             w(i,j,k,itu2) = w(i,j,k,itu2) + factor*dvt(i,j,k,2)
-             w(i,j,k,itu2) = max(w(i,j,k,itu2), 1.e-5_realType*wInf(itu2))
-          enddo
-       enddo
-    enddo
-
-  end subroutine kwSolve
+    subroutine kw_block(resOnly)
+        !
+        !       kw solves the transport equations for the standard and
+        !       modified k-omega turbulence models in a decoupled manner
+        !       using a diagonal dominant adi-scheme.
+        !
+        use constants
+        use blockPointers, only: il, jl, kl
+        use inputTimeSpectral
+        use iteration
+        use utils, only: setPointers
+        use turbUtils, only: kwEddyViscosity
+        use turbBCRoutines, only: bcTurbTreatment, applyAllTurbBCThisBlock
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        logical, intent(in) :: resOnly
+
+        ! Set the arrays for the boundary condition treatment.
+
+        call bcTurbTreatment
+
+        ! Solve the transport equations for k and omega.
+
+        call kwSolve(resOnly)
+
+        ! The eddy viscosity and the boundary conditions are only
+        ! applied if an actual update has been computed in kwSolve.
+
+        if (.not. resOnly) then
+
+            ! Compute the corresponding eddy viscosity.
+
+            call kwEddyViscosity(2, il, 2, jl, 2, kl)
+
+            ! Set the halo values for the turbulent variables.
+            ! We are on the finest mesh, so the second layer of halo
+            ! cells must be computed as well.
+
+            call applyAllTurbBCThisBlock(.true.)
+
+        end if
+
+    end subroutine kw_block
+
+    subroutine kwSolve(resOnly)
+        !
+        !       kwSolve solves the k-omega transport equations of both
+        !       the original and modified k-omega models
+        !       in a coupled manner using a diagonal dominant ADI-scheme.
+        !
+        use blockPointers
+        use constants
+        use flowVarRefState
+        use inputIteration
+        use inputPhysics
+        use paramTurb
+        use turbMod, only: prod, dvt, sct, kwCD, sig1, sig2, vort
+        use turbUtils, only: prodSmag2, prodWmag2, prodKatoLaunder, &
+                             turbAdvection, unsteadyTurbTerm, tdia3, kwCDterm
+        use turbCurveFits, only: curveTupYp
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: resOnly
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn
+
+        real(kind=realType) :: rkwGam1
+        real(kind=realType) :: rhoi, ss, spk, sdk
+        real(kind=realType) :: voli, volmi, volpi
+        real(kind=realType) :: xm, ym, zm, xp, yp, zp, xa, ya, za
+        real(kind=realType) :: ttm, ttp, mulm, mulp, muem, muep
+        real(kind=realType) :: c1m, c1p, c10, c2m, c2p, c20
+        real(kind=realType) :: b1, b2, c1, c2, d1, d2
+        real(kind=realType) :: qs, uu, um, up, factor, utau, rblank
+
+        real(kind=realType), dimension(itu1:itu2) :: tup
+
+        real(kind=realType), dimension(2:il, 2:jl, 2:kl, 2, 2) :: qq
+        real(kind=realType), dimension(2, 2:max(il, jl, kl)) :: bb, dd, ff
+        real(kind=realType), dimension(2, 2, 2:max(il, jl, kl)) :: cc
+
+        real(kind=realType), dimension(:, :, :), pointer :: ddw, ww, ddvt
+        real(kind=realType), dimension(:, :), pointer :: rrlv
+        real(kind=realType), dimension(:, :), pointer :: dd2Wall
+
+        logical, dimension(2:jl, 2:kl), target :: flagI2, flagIl
+        logical, dimension(2:il, 2:kl), target :: flagJ2, flagJl
+        logical, dimension(2:il, 2:jl), target :: flagK2, flagKl
+
+        logical, dimension(:, :), pointer :: flag
+
+        ! Set model constants
+
+        rkwGam1 = rkwBeta1 / rkwBetas - rkwSigw1 * rkwK * rkwK / sqrt(rkwBetas)
+        sig1 = rkwSigk1
+        sig2 = rkwSigw1
+
+        ! Set the pointer for dvt in dw, such that the code is more
+        ! readable. Also set the pointers for the production term,
+        ! vorticity and the cross diffusion term.
+
+        dvt => scratch(1:, 1:, 1:, idvt:)
+        prod => scratch(1:, 1:, 1:, iprod)
+        vort => prod
+        kwCD => scratch(1:, 1:, 1:, icd)
+        !
+        !       Production term.
+        !
+        select case (turbProd)
+        case (strain)
+            call prodSmag2
+
+        case (vorticity)
+            call prodWmag2
+
+        case (katoLaunder)
+            call prodKatoLaunder
+
+        end select
+        !
+        !       Source terms.
+        !       Determine the source term and its derivative w.r.t. k and
+        !       omega for all internal cells of the block.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the source terms for both the k and the omega
+                    ! equation. Note that prod contains the unscaled production
+                    ! term. Furthermore the production term of k is limited to
+                    ! a certain times the destruction term.
+
+                    rhoi = one / w(i, j, k, irho)
+                    ss = prod(i, j, k)
+                    spk = rev(i, j, k) * ss * rhoi
+                    sdk = rkwBetas * w(i, j, k, itu1) * w(i, j, k, itu2)
+                    spk = min(spk, pklim * sdk)
+
+                    dvt(i, j, k, 1) = spk - sdk
+                    dvt(i, j, k, 2) = rkwGam1 * ss - rkwBeta1 * w(i, j, k, itu2)**2
+
+                    ! Compute the source term jacobian. Note that only the
+                    ! destruction terms are linearized to increase the diagonal
+                    ! dominance of the matrix. Furthermore minus the source
+                    ! term jacobian is stored.
+
+                    qq(i, j, k, 1, 1) = rkwBetas * w(i, j, k, itu2)
+                    ! qq(i,j,k,1,2) = rkwBetas*w(i,j,k,itu1)
+                    qq(i, j, k, 1, 2) = zero
+                    qq(i, j, k, 2, 1) = zero
+                    qq(i, j, k, 2, 2) = two * rkwBeta1 * w(i, j, k, itu2)
+
+                end do
+            end do
+        end do
+        !
+        !       Compute the cross-diffusion term for the modified version of
+        !       the k-omega model. It should cure the free-stream dependency
+        !       of the original model.
+        !
+        if (turbModel == komegaModified) then
+
+            ! Compute the cross diffusion term.
+
+            call kwCDterm
+
+            ! Multiply it with the correct constant and take it only into
+            ! account if it is positive.
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        dvt(i, j, k, 2) = dvt(i, j, k, 2) &
+                                          + rkwSigd1 * max(zero, kwCD(i, j, k))
+                    end do
+                end do
+            end do
+
+        end if
+        !
+        !       Advection and unsteady terms.
+        !
+        nn = itu1 - 1
+        call turbAdvection(2_intType, 2_intType, nn, qq)
+
+        call unsteadyTurbTerm(2_intType, 2_intType, nn, qq)
+        !
+        !       Viscous terms in k-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in zeta-direction, i.e. along the
+                    ! line k = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
+
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in zeta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in zeta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/dzeta derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and omega.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j, k - 1) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j, k + 1) + rlv(i, j, k))
+                    muem = half * (rev(i, j, k - 1) + rev(i, j, k))
+                    muep = half * (rev(i, j, k + 1) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i, j, k - 1, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i, j, k + 1, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i, j, k - 1, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i, j, k + 1, itu2)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (k == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmtk1(i, j, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmtk1(i, j, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmtk1(i, j, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmtk1(i, j, itu2, itu2), zero)
+                    else if (k == kl) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmtk2(i, j, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmtk2(i, j, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmtk2(i, j, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmtk2(i, j, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in j-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in eta-direction, i.e. along the
+                    ! line j = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
+
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in eta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in eta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/deta derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and omega.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j - 1, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j + 1, k) + rlv(i, j, k))
+                    muem = half * (rev(i, j - 1, k) + rev(i, j, k))
+                    muep = half * (rev(i, j + 1, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i, j - 1, k, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i, j + 1, k, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i, j - 1, k, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i, j + 1, k, itu2)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (j == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmtj1(i, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmtj1(i, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmtj1(i, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmtj1(i, k, itu2, itu2), zero)
+                    else if (j == jl) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmtj2(i, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmtj2(i, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmtj2(i, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmtj2(i, k, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in xi-direction, i.e. along the
+                    ! line i = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
+
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in xi-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in xi-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/dxi derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and omega.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i - 1, j, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i + 1, j, k) + rlv(i, j, k))
+                    muem = half * (rev(i - 1, j, k) + rev(i, j, k))
+                    muep = half * (rev(i + 1, j, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i - 1, j, k, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i + 1, j, k, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i - 1, j, k, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i + 1, j, k, itu2)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (i == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmti1(j, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmti1(j, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmti1(j, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmti1(j, k, itu2, itu2), zero)
+                    else if (i == il) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmti2(j, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmti2(j, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmti2(j, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmti2(j, k, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+
+        ! Multiply the residual by the volume and store this in dw; this
+        ! is done for monitoring reasons only. The multiplication with the
+        ! volume is present to be consistent with the flow residuals; also
+        ! the negative value is taken, again to be consistent with the
+        ! flow equations. Also multiply by iblank so that no updates occur
+        ! in holes or the overset boundary.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    rblank = real(iblank(i, j, k), realType)
+                    dw(i, j, k, itu1) = -volRef(i, j, k) * dvt(i, j, k, 1) * rblank
+                    dw(i, j, k, itu2) = -volRef(i, j, k) * dvt(i, j, k, 2) * rblank
+                end do
+            end do
+        end do
+
+        ! Initialize the wall function flags to .false.
+
+        flagI2 = .false.
+        flagIl = .false.
+        flagJ2 = .false.
+        flagJl = .false.
+        flagK2 = .false.
+        flagKl = .false.
+
+        ! Modify the rhs of the 1st internal cell, if wall functions
+        ! are used; their value is determined by the table.
+
+        testWallFunctions: if (wallFunctions) then
+
+            bocos: do nn = 1, nViscBocos
+
+                ! Determine the block face on which the subface is located
+                ! and set some variables. As flag points to the entire array
+                ! flagI2, etc., its starting indices are the starting indices
+                ! of its target and not 1.
+
+                select case (BCFaceID(nn))
+                case (iMin)
+                    flag => flagI2
+                    ddw => dw(2, 1:, 1:, 1:); ddvt => dvt(2, 1:, 1:, 1:)
+                    ww => w(2, 1:, 1:, 1:); rrlv => rlv(2, 1:, 1:)
+                    dd2Wall => d2Wall(2, :, :)
+
+                case (iMax)
+                    flag => flagIl
+                    ddw => dw(il, 1:, 1:, 1:); ddvt => dvt(il, 1:, 1:, 1:)
+                    ww => w(il, 1:, 1:, 1:); rrlv => rlv(il, 1:, 1:)
+                    dd2Wall => d2Wall(il, :, :)
+
+                case (jMin)
+                    flag => flagJ2
+                    ddw => dw(1:, 2, 1:, 1:); ddvt => dvt(1:, 2, 1:, 1:)
+                    ww => w(1:, 2, 1:, 1:); rrlv => rlv(1:, 2, 1:)
+                    dd2Wall => d2Wall(:, 2, :)
+
+                case (jMax)
+                    flag => flagJl
+                    ddw => dw(1:, jl, 1:, 1:); ddvt => dvt(1:, jl, 1:, 1:)
+                    ww => w(1:, jl, 1:, 1:); rrlv => rlv(1:, jl, 1:)
+                    dd2Wall => d2Wall(:, jl, :)
+
+                case (kMin)
+                    flag => flagK2
+                    ddw => dw(1:, 1:, 2, 1:); ddvt => dvt(1:, 1:, 2, 1:)
+                    ww => w(1:, 1:, 2, 1:); rrlv => rlv(1:, 1:, 2)
+                    dd2Wall => d2Wall(:, :, 2)
+
+                case (kMax)
+                    flag => flagKl
+                    ddw => dw(1:, 1:, kl, 1:); ddvt => dvt(1:, 1:, kl, 1:)
+                    ww => w(1:, 1:, kl, 1:); rrlv => rlv(1:, 1:, kl)
+                    dd2Wall => d2Wall(:, :, kl)
+
+                end select
+
+                ! Loop over the owned faces of this subface. Therefore the
+                ! nodal range of BCData must be used. The offset of +1 is
+                ! present, because the starting index of the cell range is
+                ! 1 larger than the starting index of the nodal range.
+
+                do j = (BCData(nn)%jnBeg + 1), BCData(nn)%jnEnd
+                    do i = (BCData(nn)%inBeg + 1), BCData(nn)%inEnd
+
+                        ! Set ddw to zero for proper monitoring of the
+                        ! convergence.
+
+                        ddw(i, j, itu1) = zero
+                        ddw(i, j, itu2) = zero
+
+                        ! Enforce k and omega in the 1st internal cell from
+                        ! the wall function table. There is an offset of -1 in
+                        ! the wall distance. Note that the offset compared to
+                        ! the current value must be stored. Also note that the
+                        ! curve fits contain the non-dimensional values.
+
+                        utau = viscSubface(nn)%utau(i, j)
+                        yp = ww(i, j, irho) * dd2Wall(i - 1, j - 1) * utau / rrlv(i, j)
+
+                        call curveTupYp(tup, yp, itu1, itu2)
+
+                        tup(itu1) = tup(itu1) * utau**2
+                        tup(itu2) = tup(itu2) * utau**2 / rrlv(i, j) * ww(i, j, irho)
+
+                        ddvt(i, j, 1) = tup(itu1) - ww(i, j, itu1)
+                        ddvt(i, j, 2) = tup(itu2) - ww(i, j, itu2)
+
+                        ! Set the wall flag to .true.
+
+                        flag(i, j) = .true.
+
+                    end do
+                end do
+
+            end do bocos
+        end if testWallFunctions
+
+        ! Return if only the residual must be computed.
+
+        if (resOnly) return
+
+        ! For implicit relaxation take the local time step into account,
+        ! where dt is the inverse of the central jacobian times the cfl
+        ! number. The following system is solved:
+        ! (i/dt + cc + bb + dd)*dw = rhs, in which i/dt = cc/cfl. As in
+        ! the rest of the algorithm only the modified central jacobian is
+        ! used, stored it now.
+
+        ! Compute the factor multiplying the central jacobian, which
+        ! is 1 + 1/cfl (implicit relaxation only).
+
+        factor = one
+        if (turbRelax == turbRelaxImplicit) &
+            factor = one + (one - alfaTurb) / alfaTurb
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    qq(i, j, k, 1, 1) = factor * qq(i, j, k, 1, 1)
+                    qq(i, j, k, 1, 2) = factor * qq(i, j, k, 1, 2)
+                    qq(i, j, k, 2, 1) = factor * qq(i, j, k, 2, 1)
+                    qq(i, j, k, 2, 2) = factor * qq(i, j, k, 2, 2)
+
+                    ! Set qq to 1 if the value is determined by the table.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        qq(i, j, k, 1, 1) = one
+                        qq(i, j, k, 1, 2) = zero
+                        qq(i, j, k, 2, 1) = zero
+                        qq(i, j, k, 2, 2) = one
+                    end if
+                end do
+            end do
+        end do
+
+        ! Initialize the grid velocity to zero. This value will be used
+        ! if the block is not moving.
+
+        qs = zero
+        !
+        !       dd-ADI step in j-direction. There is no particular reason to
+        !       start in j-direction, it just happened to be so. As we solve
+        !       in j-direction, the j-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do i = 2, il
+                do j = 2, jl
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the j-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
+
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in j-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j - 1, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j + 1, k) + rlv(i, j, k))
+                    muem = half * (rev(i, j - 1, k) + rev(i, j, k))
+                    muep = half * (rev(i, j + 1, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+
+                    bb(1, j) = -c1m
+                    dd(1, j) = -c1p
+                    bb(2, j) = -c2m
+                    dd(2, j) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of j and j-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceJ(i, j, k) + sFaceJ(i, j - 1, k)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! j-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, j) = bb(1, j) - up
+                    dd(1, j) = dd(1, j) + um
+                    bb(2, j) = bb(2, j) - up
+                    dd(2, j) = dd(2, j) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, j) = qq(i, j, k, 1, 1)
+                    cc(1, 2, j) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, j) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, j) = qq(i, j, k, 2, 2)
+
+                    ff(1, j) = dvt(i, j, k, 1) * rblank
+                    ff(2, j) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, j) = bb(:, j) * rblank
+                    dd(:, j) = dd(:, j) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, j) = zero
+                        dd(1, j) = zero
+                        bb(2, j) = zero
+                        dd(2, j) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in j-direction.
+
+                call tdia3(2_intType, jl, bb, cc, dd, ff)
+
+                ! Determine the new rhs for the next direction.
+
+                do j = 2, jl
+                    dvt(i, j, k, 1) = qq(i, j, k, 1, 1) * ff(1, j) + qq(i, j, k, 1, 2) * ff(2, j)
+                    dvt(i, j, k, 2) = qq(i, j, k, 2, 1) * ff(1, j) + qq(i, j, k, 2, 2) * ff(2, j)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-ADI step in i-direction. As we solve in i-direction, the
+        !       i-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the i-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
+
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in i-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i - 1, j, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i + 1, j, k) + rlv(i, j, k))
+                    muem = half * (rev(i - 1, j, k) + rev(i, j, k))
+                    muep = half * (rev(i + 1, j, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+
+                    bb(1, i) = -c1m
+                    dd(1, i) = -c1p
+                    bb(2, i) = -c2m
+                    dd(2, i) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of i and i-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceI(i, j, k) + sFaceI(i - 1, j, k)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! i-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, i) = bb(1, i) - up
+                    dd(1, i) = dd(1, i) + um
+                    bb(2, i) = bb(2, i) - up
+                    dd(2, i) = dd(2, i) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, i) = qq(i, j, k, 1, 1)
+                    cc(1, 2, i) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, i) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, i) = qq(i, j, k, 2, 2)
+
+                    ff(1, i) = dvt(i, j, k, 1) * rblank
+                    ff(2, i) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, i) = bb(:, i) * rblank
+                    dd(:, i) = dd(:, i) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, i) = zero
+                        dd(1, i) = zero
+                        bb(2, i) = zero
+                        dd(2, i) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in i-direction.
+
+                call tdia3(2_intType, il, bb, cc, dd, ff)
+
+                ! Determine the new rhs for the next direction.
+
+                do i = 2, il
+                    dvt(i, j, k, 1) = qq(i, j, k, 1, 1) * ff(1, i) + qq(i, j, k, 1, 2) * ff(2, i)
+                    dvt(i, j, k, 2) = qq(i, j, k, 2, 1) * ff(1, i) + qq(i, j, k, 2, 2) * ff(2, i)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-adi step in k-direction. As we solve in k-direction, the
+        !       k-loop is the innermost loop.
+        !
+        do j = 2, jl
+            do i = 2, il
+                do k = 2, kl
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the k-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
+
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in k-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j, k - 1) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j, k + 1) + rlv(i, j, k))
+                    muem = half * (rev(i, j, k - 1) + rev(i, j, k))
+                    muep = half * (rev(i, j, k + 1) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+
+                    bb(1, k) = -c1m
+                    dd(1, k) = -c1p
+                    bb(2, k) = -c2m
+                    dd(2, k) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of k and k-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceK(i, j, k) + sFaceK(i, j, k - 1)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! k-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, k) = bb(1, k) - up
+                    dd(1, k) = dd(1, k) + um
+                    bb(2, k) = bb(2, k) - up
+                    dd(2, k) = dd(2, k) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, k) = qq(i, j, k, 1, 1)
+                    cc(1, 2, k) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, k) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, k) = qq(i, j, k, 2, 2)
+
+                    ff(1, k) = dvt(i, j, k, 1) * rblank
+                    ff(2, k) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, k) = bb(:, k) * rblank
+                    dd(:, k) = dd(:, k) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, k) = zero
+                        dd(1, k) = zero
+                        bb(2, k) = zero
+                        dd(2, k) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in k-direction.
+
+                call tdia3(2_intType, kl, bb, cc, dd, ff)
+
+                ! Store the update in dvt.
+
+                do k = 2, kl
+                    dvt(i, j, k, 1) = ff(1, k)
+                    dvt(i, j, k, 2) = ff(2, k)
+                end do
+
+            end do
+        end do
+        !
+        !       Update the turbulent variables. For explicit relaxation the
+        !       update must be relaxed; for implicit relaxation this has been
+        !       done via the time step.
+        !
+        factor = one
+        if (turbRelax == turbRelaxExplicit) factor = alfaTurb
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    w(i, j, k, itu1) = w(i, j, k, itu1) + factor * dvt(i, j, k, 1)
+                    w(i, j, k, itu1) = max(w(i, j, k, itu1), zero)
+
+                    w(i, j, k, itu2) = w(i, j, k, itu2) + factor * dvt(i, j, k, 2)
+                    w(i, j, k, itu2) = max(w(i, j, k, itu2), 1.e-5_realType * wInf(itu2))
+                end do
+            end do
+        end do
+
+    end subroutine kwSolve
 end module kw
diff --git a/src/turbulence/sa.F90 b/src/turbulence/sa.F90
index 53ae30bb4..0651b182b 100644
--- a/src/turbulence/sa.F90
+++ b/src/turbulence/sa.F90
@@ -321,11 +321,13 @@ subroutine saSource
                         ! treatment.
 
                         dfv1 = three * chi2 * cv13 / ((chi3 + cv13)**2)
+
                         if (.not. useRoughSA) then
                             dfv2 = (chi2 * dfv1 - one) / (nu * ((one + chi * fv1)**2))
                         else
                             dfv2 = (w(i, j, k, itu1) * dfv1 - nu) / (nu + w(i, j, k, itu1) * fv1)**2
                         end if
+
                         dft2 = -two * rsaCt4 * chi * ft2 / nu
 
                         drr = (one - rr * (fv2 + w(i, j, k, itu1) * dfv2)) &
diff --git a/src/turbulence/turbAPI.F90 b/src/turbulence/turbAPI.F90
index 9393efbf8..ea8882c66 100644
--- a/src/turbulence/turbAPI.F90
+++ b/src/turbulence/turbAPI.F90
@@ -1,163 +1,162 @@
 module turbAPI
 
 contains
-  subroutine turbSolveDDADI
-    !
-    !       turbSolveDDADI solves the turbulent transport equations
-    !       separately, i.e. the mean flow variables are kept constant
-    !       and the turbulent variables are updated.
-    !
-    use constants
-    use blockPointers, only : nDom
-    use flowVarRefState
-    use inputDiscretization
-    use inputIteration
-    use inputPhysics
-    use iteration
-    use turbMod
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use sa
-    use kw
-    use kt
-    use SST
-    use vf
-    use haloExchange, only : whalo2
-    use utils, only : setPointers
-    use turbUtils, only : unsteadyTurbSpectral
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: iter, sps, nn
-
-    ! Loop over the number of iterations for the turbulence.
-
-    do iter=1,nSubIterTurb
-
-       ! Compute the quantities for certain turbulence models that
-       ! need to be communicated between blocks.
-
-       if (turbModel == menterSST) then
-          call f1SST
-       end if
-
-       ! Compute the time derivative for the time spectral mode.
-       select case(turbModel)
-       case(spalartAllmaras)
-          call unsteadyTurbSpectral(itu1,itu1)
-       case (komegaWilcox, komegaModified, menterSST, ktau)
-          call unsteadyTurbSpectral(itu1,itu2)
-       case (v2f)
-          call unsteadyTurbSpectral(itu1,itu3)
-       end select
+    subroutine turbSolveDDADI
+        !
+        !       turbSolveDDADI solves the turbulent transport equations
+        !       separately, i.e. the mean flow variables are kept constant
+        !       and the turbulent variables are updated.
+        !
+        use constants
+        use blockPointers, only: nDom
+        use flowVarRefState
+        use inputDiscretization
+        use inputIteration
+        use inputPhysics
+        use iteration
+        use turbMod
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use sa
+        use kw
+        use kt
+        use SST
+        use vf
+        use haloExchange, only: whalo2
+        use utils, only: setPointers
+        use turbUtils, only: unsteadyTurbSpectral
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: iter, sps, nn
+
+        ! Loop over the number of iterations for the turbulence.
+
+        do iter = 1, nSubIterTurb
+
+            ! Compute the quantities for certain turbulence models that
+            ! need to be communicated between blocks.
+
+            if (turbModel == menterSST) then
+                call f1SST
+            end if
+
+            ! Compute the time derivative for the time spectral mode.
+            select case (turbModel)
+            case (spalartAllmaras)
+                call unsteadyTurbSpectral(itu1, itu1)
+            case (komegaWilcox, komegaModified, menterSST, ktau)
+                call unsteadyTurbSpectral(itu1, itu2)
+            case (v2f)
+                call unsteadyTurbSpectral(itu1, itu3)
+            end select
 
-       ! Loop over the number of spectral solutions.
+            ! Loop over the number of spectral solutions.
 
-       spectralLoop: do sps=1,nTimeIntervalsSpectral
+            spectralLoop: do sps = 1, nTimeIntervalsSpectral
 
-          ! Loop over the number of blocks.
+                ! Loop over the number of blocks.
 
-          domains: do nn=1,nDom
+                domains: do nn = 1, nDom
 
-             ! setPointers for this block:
-             call setPointers(nn, currentLevel, sps)
+                    ! setPointers for this block:
+                    call setPointers(nn, currentLevel, sps)
 
-             ! Now call the selected turbulence model
-             select case (turbModel)
+                    ! Now call the selected turbulence model
+                    select case (turbModel)
 
-             case (spalartAllmaras)
-                call sa_block(.false.)
+                    case (spalartAllmaras)
+                        call sa_block(.false.)
 
-             case (komegaWilcox, komegaModified)
-                call kw_block(.false.)
+                    case (komegaWilcox, komegaModified)
+                        call kw_block(.false.)
 
-             case (menterSST)
-                call SST_block(.false.)
+                    case (menterSST)
+                        call SST_block(.false.)
 
-             case (ktau)
-                call kt_block(.false.)
+                    case (ktau)
+                        call kt_block(.false.)
 
-             case (v2f)
-                call vf_block(.false.)
+                    case (v2f)
+                        call vf_block(.false.)
 
-             end select
+                    end select
 
-          end do domains
-       end do spectralLoop
+                end do domains
+            end do spectralLoop
 
-       ! Exchange the halo data. As it is guaranteed that we are on the
-       ! finest mesh, exchange both layers of halo's.
+            ! Exchange the halo data. As it is guaranteed that we are on the
+            ! finest mesh, exchange both layers of halo's.
 
-       call whalo2(groundLevel, nt1, nt2, .false., .false., .true.)
+            call whalo2(groundLevel, nt1, nt2, .false., .false., .true.)
 
-    enddo
+        end do
 
-  end subroutine turbSolveDDADI
+    end subroutine turbSolveDDADI
 
-  subroutine turbResidual
-    !
-    !       turbResidual computes the residual of the residual of the
-    !       turbulent transport equations on the current multigrid level.
-    !
-    use constants
-    use blockPointers, only : nDom
-    use inputDiscretization
-    use inputPhysics
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use iteration
-    use turbMod
-    use sa
-    use kt
-    use kw
-    use SST
-    use vf
-    use utils, only : setPointers
-    implicit none
+    subroutine turbResidual
+        !
+        !       turbResidual computes the residual of the residual of the
+        !       turbulent transport equations on the current multigrid level.
+        !
+        use constants
+        use blockPointers, only: nDom
+        use inputDiscretization
+        use inputPhysics
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use iteration
+        use turbMod
+        use sa
+        use kt
+        use kw
+        use SST
+        use vf
+        use utils, only: setPointers
+        implicit none
 
-    integer(kind=intType) :: nn, sps
+        integer(kind=intType) :: nn, sps
 
-    ! Compute the quantities for certain turbulence models that
-    ! need to be communicated between blocks.
+        ! Compute the quantities for certain turbulence models that
+        ! need to be communicated between blocks.
 
-    if (turbModel == menterSST) then
-       call f1SST
-    end if
+        if (turbModel == menterSST) then
+            call f1SST
+        end if
 
-    ! Loop over the number of spectral solutions.
+        ! Loop over the number of spectral solutions.
 
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
 
-       ! Loop over the number of blocks.
+            ! Loop over the number of blocks.
 
-       domains: do nn=1,nDom
+            domains: do nn = 1, nDom
 
-          ! setPointers for this block:
-          call setPointers(nn, currentLevel, sps)
+                ! setPointers for this block:
+                call setPointers(nn, currentLevel, sps)
 
-          ! Now call the selected turbulence model
-          select case (turbModel)
+                ! Now call the selected turbulence model
+                select case (turbModel)
 
-          case (spalartAllmaras)
-             call sa_block(.True.)
+                case (spalartAllmaras)
+                    call sa_block(.True.)
 
-          case (komegaWilcox, komegaModified)
-             call kw_block(.True.)
+                case (komegaWilcox, komegaModified)
+                    call kw_block(.True.)
 
-          case (menterSST)
-             call SST_block(.True.)
+                case (menterSST)
+                    call SST_block(.True.)
 
-          case (ktau)
-             call kt_block(.True.)
+                case (ktau)
+                    call kt_block(.True.)
 
-          case (v2f)
-             call vf_block(.True.)
+                case (v2f)
+                    call vf_block(.True.)
 
-          end select
+                end select
 
-       end do domains
-    end do spectralLoop
-
-  end subroutine turbResidual
+            end do domains
+        end do spectralLoop
 
+    end subroutine turbResidual
 
 end module turbAPI
diff --git a/src/turbulence/turbCurveFits.F90 b/src/turbulence/turbCurveFits.F90
index 52d6f2c7c..582a9b3c4 100644
--- a/src/turbulence/turbCurveFits.F90
+++ b/src/turbulence/turbCurveFits.F90
@@ -1,3950 +1,3950 @@
 module turbCurveFits
 contains
-  function curveUpRe(Re)
-    !
-    !       curveUpRe determines the value of the nonDimensional
-    !       tangential velocity (made nonDimensional with the skin
-    !       friction velocity) for the given Reynolds number.
-    !       This data has been curve fitted with cubic splines.
-    !
-    use paramTurb
-    implicit none
-    !
-    !      Function type.
-    !
-    real(kind=realType) :: curveUpRe
-    !
-    !      Function arguments.
-    !
-    real(kind=realType), intent(in) :: Re
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: ii, nn, start
-    real(kind=realType)   :: x, x2, x3, upRe
+    function curveUpRe(Re)
+        !
+        !       curveUpRe determines the value of the nonDimensional
+        !       tangential velocity (made nonDimensional with the skin
+        !       friction velocity) for the given Reynolds number.
+        !       This data has been curve fitted with cubic splines.
+        !
+        use paramTurb
+        implicit none
+        !
+        !      Function type.
+        !
+        real(kind=realType) :: curveUpRe
+        !
+        !      Function arguments.
+        !
+        real(kind=realType), intent(in) :: Re
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: ii, nn, start
+        real(kind=realType) :: x, x2, x3, upRe
 
-    ! Determine the situation we are dealing with.
+        ! Determine the situation we are dealing with.
 
-    if(Re <= reT(0)) then
+        if (Re <= reT(0)) then
 
-       ! Reynolds number is less than the smallest number in the curve
-       ! fit. Use extrapolation.
+            ! Reynolds number is less than the smallest number in the curve
+            ! fit. Use extrapolation.
 
-       x     = sqrt(Re/reT(0))
-       upRe = x*up0(1)
+            x = sqrt(Re / reT(0))
+            upRe = x * up0(1)
 
-    else if(Re >= reT(nFit)) then
+        else if (Re >= reT(nFit)) then
 
-       ! Reynolds number is larger than the largest number in the curve
-       ! fit. Set upRe to the largest value available.
+            ! Reynolds number is larger than the largest number in the curve
+            ! fit. Set upRe to the largest value available.
 
-       nn = nFit
-       x  = reT(nn) - reT(nn-1)
-       x2 = x*x
-       x3 = x*x2
+            nn = nFit
+            x = reT(nn) - reT(nn - 1)
+            x2 = x * x
+            x3 = x * x2
 
-       upRe = up0(nn) + up1(nn)*x + up2(nn)*x2 + up3(nn)*x3
+            upRe = up0(nn) + up1(nn) * x + up2(nn) * x2 + up3(nn) * x3
 
-    else
+        else
 
-       ! Reynolds number is in the range of the curve fits.
-       ! First find the correct interval.
+            ! Reynolds number is in the range of the curve fits.
+            ! First find the correct interval.
 
-       ii    = nFit
-       start = 1
-       interval: do
+            ii = nFit
+            start = 1
+            interval: do
 
-          ! Next guess for the interval.
+                ! Next guess for the interval.
 
-          nn = start + ii/2
+                nn = start + ii / 2
 
-          ! Determine the situation we are having here.
+                ! Determine the situation we are having here.
 
-          if(Re > reT(nn)) then
+                if (Re > reT(nn)) then
 
-             ! Reynoldls number is larger than the upper boundary of
-             ! the current interval. Update the lower boundary.
+                    ! Reynoldls number is larger than the upper boundary of
+                    ! the current interval. Update the lower boundary.
 
-             start = nn + 1
-             ii    = ii - 1
+                    start = nn + 1
+                    ii = ii - 1
 
-          else if(Re >= reT(nn-1)) then
+                else if (Re >= reT(nn - 1)) then
 
-             ! This is the correct range. Exit the do-loop.
+                    ! This is the correct range. Exit the do-loop.
 
-             exit
+                    exit
 
-          endif
+                end if
 
-          ! Modify ii for the next branch to search.
+                ! Modify ii for the next branch to search.
 
-          ii = ii/2
+                ii = ii / 2
 
-       enddo interval
+            end do interval
 
-       ! Compute upRe using the cubic polynomial for this interval.
+            ! Compute upRe using the cubic polynomial for this interval.
 
-       x  = Re - reT(nn-1)
-       x2 = x*x
-       x3 = x*x2
+            x = Re - reT(nn - 1)
+            x2 = x * x
+            x3 = x * x2
 
-       upRe = up0(nn) + up1(nn)*x + up2(nn)*x2 + up3(nn)*x3
+            upRe = up0(nn) + up1(nn) * x + up2(nn) * x2 + up3(nn) * x3
 
-    endif
+        end if
 
-    ! And set the function value.
+        ! And set the function value.
 
-    curveUpRe = upRe
+        curveUpRe = upRe
 
-  end function curveUpRe
-  !
-  !      ==================================================================
-  !
-  subroutine curveTupYp(tup, yp, ntu1, ntu2)
-    !
-    !       CurveTupYp determines the value of the turbulent variables
-    !       ntu1 to ntu2 for the given yplus.
-    !       This data has been curve fitted with cubic splines.
+    end function curveUpRe
     !
-    use constants
-    use inputPhysics
-    use paramTurb
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: ntu1, ntu2
-    real(kind=realType),   intent(in) :: yp
-
-    real(kind=realType), dimension(ntu1:ntu2), intent(out) :: tup
+    !      ==================================================================
     !
-    !      Local variables.
-    !
-    integer(kind=intType) :: ii, nn, start, mm
-    real(kind=realType)   :: x, x2, x3, epsWall, fWall
-
-    ! Determine the situation we are dealing with.
-
-    if(yp <= ypT(0)) then
-
-       ! Yplus is less than the smallest number in the curve
-       ! fit. The treatment is turbulence model dependent.
-
-       select case(turbModel)
-
-       case (spalartAllmaras, spalartAllmarasEdwards)
-
-          ! Transport variable is zero on the wall. Use linear
-          ! interpolation.
-
-          x = yp/ypT(0)
-          do mm=ntu1,ntu2
-             tup(mm) = x*tup0(1,mm)
-          enddo
-
-          !=============================================================
-
-       case (komegaWilcox, komegaModified, menterSST)
-
-          ! Use the near wall expressions for k and omega.
-
-          x = yp/ypT(0)
-          do mm=ntu1,ntu2
-             select case(mm)
-             case (itu1)
-                if( tuLogFit(mm) ) then
-                   tup(mm) = exp(tup0(1,mm))*(x**3.23_realType)
-                else
-                   tup(mm) = tup0(1,mm)*(x**3.23_realType)
-                endif
-
-             case (itu2)
-                if( tuLogFit(mm) ) then
-                   tup(mm) = exp(tup0(1,mm))/(max(x,eps)**2)
-                else
-                   tup(mm) = tup0(1,mm)/(max(x,eps)**2)
-                endif
-             end select
-          enddo
-
-          !=============================================================
-
-       case (ktau)
-
-          ! Use the near wall expressions for k and tau.
-
-          x = yp/ypT(0)
-          do mm=ntu1,ntu2
-             select case(mm)
-             case (itu1)
-                if( tuLogFit(mm) ) then
-                   tup(mm) = exp(tup0(1,mm))*(x**3.23_realType)
-                else
-                   tup(mm) = tup0(1,mm)*(x**3.23_realType)
-                endif
-
-             case (itu2)
-                if( tuLogFit(mm) ) then
-                   tup(mm) = exp(tup0(1,mm))*x*x
-                else
-                   tup(mm) = tup0(1,mm)*x*x
-                endif
-             end select
-          enddo
-
-          !=============================================================
-
-       case (v2f)
-
-          ! Use the near wall expressions for k, epsilon, v2 and f.
-
-          x = yp/ypT(0)
-          do mm=ntu1,ntu2
-             select case(mm)
-             case (itu1)
-                if( tuLogFit(mm) ) then
-                   tup(mm) = exp(tup0(1,mm))*x**2
-                else
-                   tup(mm) = tup0(1,mm)*x**2
-                endif
-
-             case (itu2)  ! epsilon cannot be fitted logarithmically.
-                if( tuLogFit(mm) ) then
-                   call terminate(&
-                        "curveTupYp", &
-                        "Check curveFit, epsilon cannot be fitted with log")
-                else
-                   if(rvfN == 1) epsWall = 0.33_realType
-                   if(rvfN == 6) epsWall = 0.27_realType
-                   tup(mm) = epsWall + (tup0(1,mm)-epsWall)*x
-                endif
-
-             case (itu3)
-                if( tuLogFit(mm) ) then
-                   tup(mm) = exp(tup0(1,mm))*x**4
-                else
-                   tup(mm) = tup0(1,mm)*x**4
-                endif
-
-             case (itu4)
-                if( tuLogFit(mm) ) then
-                   if(rvfN == 1) &
-                        call terminate(&
-                        "curveTupYp", &
-                        "Check curveFit, f cannot be fitted with log")
-                   if(rvfN == 6) tup(mm) = exp(tup(mm))*x
-                else
-                   if(rvfN == 1) fWall =-0.0035_realType
-                   if(rvfN == 6) fWall = zero
-                   tup(mm) = fWall + (tup0(1,mm)-fWall)*x
-                endif
-
-             case (itu5)
-                if( tuLogFit(mm) ) then
-                   tup(mm) = exp(tup(mm))*x**4
-                else
-                   tup(mm) = tup0(1,mm)*x**4
-                endif
-             end select
-          enddo
-
-       end select
-
-       !=================================================================
-
-    else if(yp >= ypT(nFit)) then
-
-       ! Yplus is larger than the largest number in the curve
-       ! fit. Set tup to the largest value available.
-
-       nn = nFit
-       x  = ypT(nn) - ypT(nn-1)
-       x2 = x*x
-       x3 = x*x2
+    subroutine curveTupYp(tup, yp, ntu1, ntu2)
+        !
+        !       CurveTupYp determines the value of the turbulent variables
+        !       ntu1 to ntu2 for the given yplus.
+        !       This data has been curve fitted with cubic splines.
+        !
+        use constants
+        use inputPhysics
+        use paramTurb
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: ntu1, ntu2
+        real(kind=realType), intent(in) :: yp
+
+        real(kind=realType), dimension(ntu1:ntu2), intent(out) :: tup
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: ii, nn, start, mm
+        real(kind=realType) :: x, x2, x3, epsWall, fWall
+
+        ! Determine the situation we are dealing with.
+
+        if (yp <= ypT(0)) then
+
+            ! Yplus is less than the smallest number in the curve
+            ! fit. The treatment is turbulence model dependent.
+
+            select case (turbModel)
+
+            case (spalartAllmaras, spalartAllmarasEdwards)
+
+                ! Transport variable is zero on the wall. Use linear
+                ! interpolation.
+
+                x = yp / ypT(0)
+                do mm = ntu1, ntu2
+                    tup(mm) = x * tup0(1, mm)
+                end do
+
+                !=============================================================
+
+            case (komegaWilcox, komegaModified, menterSST)
+
+                ! Use the near wall expressions for k and omega.
+
+                x = yp / ypT(0)
+                do mm = ntu1, ntu2
+                    select case (mm)
+                    case (itu1)
+                        if (tuLogFit(mm)) then
+                            tup(mm) = exp(tup0(1, mm)) * (x**3.23_realType)
+                        else
+                            tup(mm) = tup0(1, mm) * (x**3.23_realType)
+                        end if
+
+                    case (itu2)
+                        if (tuLogFit(mm)) then
+                            tup(mm) = exp(tup0(1, mm)) / (max(x, eps)**2)
+                        else
+                            tup(mm) = tup0(1, mm) / (max(x, eps)**2)
+                        end if
+                    end select
+                end do
+
+                !=============================================================
+
+            case (ktau)
+
+                ! Use the near wall expressions for k and tau.
+
+                x = yp / ypT(0)
+                do mm = ntu1, ntu2
+                    select case (mm)
+                    case (itu1)
+                        if (tuLogFit(mm)) then
+                            tup(mm) = exp(tup0(1, mm)) * (x**3.23_realType)
+                        else
+                            tup(mm) = tup0(1, mm) * (x**3.23_realType)
+                        end if
+
+                    case (itu2)
+                        if (tuLogFit(mm)) then
+                            tup(mm) = exp(tup0(1, mm)) * x * x
+                        else
+                            tup(mm) = tup0(1, mm) * x * x
+                        end if
+                    end select
+                end do
+
+                !=============================================================
+
+            case (v2f)
+
+                ! Use the near wall expressions for k, epsilon, v2 and f.
+
+                x = yp / ypT(0)
+                do mm = ntu1, ntu2
+                    select case (mm)
+                    case (itu1)
+                        if (tuLogFit(mm)) then
+                            tup(mm) = exp(tup0(1, mm)) * x**2
+                        else
+                            tup(mm) = tup0(1, mm) * x**2
+                        end if
 
-       do mm=ntu1,ntu2
-          tup(mm) = tup0(nn,mm)    + tup1(nn,mm)*x &
-               + tup2(nn,mm)*x2 + tup3(nn,mm)*x3
-          if( tuLogFit(mm) ) tup(mm) = exp(tup(mm))
-       enddo
+                    case (itu2)  ! epsilon cannot be fitted logarithmically.
+                        if (tuLogFit(mm)) then
+                            call terminate( &
+                                "curveTupYp", &
+                                "Check curveFit, epsilon cannot be fitted with log")
+                        else
+                            if (rvfN == 1) epsWall = 0.33_realType
+                            if (rvfN == 6) epsWall = 0.27_realType
+                            tup(mm) = epsWall + (tup0(1, mm) - epsWall) * x
+                        end if
 
-       !=================================================================
+                    case (itu3)
+                        if (tuLogFit(mm)) then
+                            tup(mm) = exp(tup0(1, mm)) * x**4
+                        else
+                            tup(mm) = tup0(1, mm) * x**4
+                        end if
 
-    else
-
-       ! y-plus is in the range of the curve fits.
-       ! First find the correct interval.
-
-       ii    = nFit
-       start = 1
-       interval: do
-
-          ! Next guess for the interval.
-
-          nn = start + ii/2
-
-          ! Determine the situation we are having here.
-
-          if(yp > ypT(nn)) then
-
-             ! Yplus is larger than the upper boundary of
-             ! the current interval. Update the lower boundary.
-
-             start = nn + 1
-             ii    = ii - 1
-
-          else if(yp >= ypT(nn-1)) then
-
-             ! This is the correct range. Exit the do-loop.
-
-             exit
-
-          endif
-
-          ! Modify ii for the next branch to search.
-
-          ii = ii/2
-
-       enddo interval
-
-       ! Compute tup using the cubic polynomial for this interval.
-
-       x  = yp - ypT(nn-1)
-       x2 = x*x
-       x3 = x*x2
-
-       do mm=ntu1,ntu2
-          tup(mm) = tup0(nn,mm)    + tup1(nn,mm)*x &
-               + tup2(nn,mm)*x2 + tup3(nn,mm)*x3
-          if( tuLogFit(mm) ) tup(mm) = exp(tup(mm))
-       enddo
-
-    endif
-
-  end subroutine curveTupYp
-
-  subroutine initCurveFitDataKtau
-    !
-    !       initCurveFitDataKtau contains the curve fit constants for
-    !       the wall function data for the k-tau turbulence model.
-    !
-    use flowVarRefState
-    use paramTurb
-    implicit none
-    !
-    !      Local variables.
-    !
-    ! integer :: ierr
-
-    call terminate("initCurveFitDataKtau", &
-         "Not implemented yet")
-
-  end subroutine initCurveFitDataKtau
-
-  subroutine initCurveFitDataKw
-    !
-    !       initCurveFitDataKw contains the curve fit constants for
-    !       the wall function data for the standard Wilcox k-omega model.
-    !
-    use constants
-    use flowVarRefState
-    use paramTurb
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    ! Set the number of data points and allocate the memory for the
-    ! arrays of the curve fits.
-
-    nFit = 34
-
-    allocate(ypT(0:nFit), reT(0:nFit),                     &
-         up0(nFit),   up1(nFit), up2(nFit), up3(nFit), &
-         tup0(nFit,nt1:nt2), tup1(nFit,nt1:nt2),       &
-         tup2(nFit,nt1:nt2), tup3(nFit,nt1:nt2),       &
-         tuLogFit(nt1:nt2), stat=ierr)
-    if(ierr /= 0)                              &
-         call terminate("initCurveFitDataKw", &
-         "Memory allocation failure for curve fit &
-         &coefficients")
-
-    ! Set the values of the Reynolds numbers at interval boundaries.
-
-    reT(0)  = 0.12529547e+00_realType
-    reT(1)  = 0.44996057e+00_realType
-    reT(2)  = 0.11581311e+01_realType
-    reT(3)  = 0.25353238e+01_realType
-    reT(4)  = 0.50446282e+01_realType
-    reT(5)  = 0.94194631e+01_realType
-    reT(6)  = 0.16766555e+02_realType
-    reT(7)  = 0.28556753e+02_realType
-    reT(8)  = 0.46274930e+02_realType
-    reT(9)  = 0.71021000e+02_realType
-    reT(10) = 0.10383163e+03_realType
-    reT(11) = 0.14621738e+03_realType
-    reT(12) = 0.20028019e+03_realType
-    reT(13) = 0.26868298e+03_realType
-    reT(14) = 0.35467049e+03_realType
-    reT(15) = 0.46212508e+03_realType
-    reT(16) = 0.59566097e+03_realType
-    reT(17) = 0.76073076e+03_realType
-    reT(18) = 0.96373333e+03_realType
-    reT(19) = 0.12121761e+04_realType
-    reT(20) = 0.15147917e+04_realType
-    reT(21) = 0.18817196e+04_realType
-    reT(22) = 0.23247121e+04_realType
-    reT(23) = 0.28572322e+04_realType
-    reT(24) = 0.34947840e+04_realType
-    reT(25) = 0.42551444e+04_realType
-    reT(26) = 0.51584529e+04_realType
-    reT(27) = 0.62277581e+04_realType
-    reT(28) = 0.74889831e+04_realType
-    reT(29) = 0.89716314e+04_realType
-    reT(30) = 0.10708764e+05_realType
-    reT(31) = 0.12737815e+05_realType
-    reT(32) = 0.15100490e+05_realType
-    reT(33) = 0.17843939e+05_realType
-    reT(34) = 0.21020534e+05_realType
-
-    ! Set the values of the y+ values at interval boundaries.
-
-    ypT(0)  = 0.35397100e+00_realType
-    ypT(1)  = 0.67079200e+00_realType
-    ypT(2)  = 0.10761700e+01_realType
-    ypT(3)  = 0.15923000e+01_realType
-    ypT(4)  = 0.22463000e+01_realType
-    ypT(5)  = 0.30710600e+01_realType
-    ypT(6)  = 0.41063800e+01_realType
-    ypT(7)  = 0.54001200e+01_realType
-    ypT(8)  = 0.70095900e+01_realType
-    ypT(9)  = 0.90031400e+01_realType
-    ypT(10) = 0.11461900e+02_realType
-    ypT(11) = 0.14481700e+02_realType
-    ypT(12) = 0.18175400e+02_realType
-    ypT(13) = 0.22675200e+02_realType
-    ypT(14) = 0.28135500e+02_realType
-    ypT(15) = 0.34735800e+02_realType
-    ypT(16) = 0.42683800e+02_realType
-    ypT(17) = 0.52219300e+02_realType
-    ypT(18) = 0.63617800e+02_realType
-    ypT(19) = 0.77194900e+02_realType
-    ypT(20) = 0.93310400e+02_realType
-    ypT(21) = 0.11237300e+03_realType
-    ypT(22) = 0.13484800e+03_realType
-    ypT(23) = 0.16125700e+03_realType
-    ypT(24) = 0.19218900e+03_realType
-    ypT(25) = 0.22830600e+03_realType
-    ypT(26) = 0.27034500e+03_realType
-    ypT(27) = 0.31913000e+03_realType
-    ypT(28) = 0.37557400e+03_realType
-    ypT(29) = 0.44069100e+03_realType
-    ypT(30) = 0.51559800e+03_realType
-    ypT(31) = 0.60152700e+03_realType
-    ypT(32) = 0.69982900e+03_realType
-    ypT(33) = 0.81198500e+03_realType
-    ypT(34) = 0.93960800e+03_realType
-
-    ! Set the values of constants for the cubic fits of the
-    ! non-dimensional tangential velocity.
-
-    up0(1)  = 0.35397100e+00_realType
-    up0(2)  = 0.67079000e+00_realType
-    up0(3)  = 0.10761600e+01_realType
-    up0(4)  = 0.15922400e+01_realType
-    up0(5)  = 0.22457500e+01_realType
-    up0(6)  = 0.30671700e+01_realType
-    up0(7)  = 0.40830500e+01_realType
-    up0(8)  = 0.52881700e+01_realType
-    up0(9)  = 0.66016600e+01_realType
-    up0(10) = 0.78884700e+01_realType
-    up0(11) = 0.90588500e+01_realType
-    up0(12) = 0.10096700e+02_realType
-    up0(13) = 0.11019300e+02_realType
-    up0(14) = 0.11849200e+02_realType
-    up0(15) = 0.12605800e+02_realType
-    up0(16) = 0.13304000e+02_realType
-    up0(17) = 0.13955200e+02_realType
-    up0(18) = 0.14568000e+02_realType
-    up0(19) = 0.15148800e+02_realType
-    up0(20) = 0.15702800e+02_realType
-    up0(21) = 0.16233900e+02_realType
-    up0(22) = 0.16745300e+02_realType
-    up0(23) = 0.17239500e+02_realType
-    up0(24) = 0.17718500e+02_realType
-    up0(25) = 0.18184100e+02_realType
-    up0(26) = 0.18637900e+02_realType
-    up0(27) = 0.19081000e+02_realType
-    up0(28) = 0.19514800e+02_realType
-    up0(29) = 0.19940100e+02_realType
-    up0(30) = 0.20358100e+02_realType
-    up0(31) = 0.20769600e+02_realType
-    up0(32) = 0.21175800e+02_realType
-    up0(33) = 0.21577400e+02_realType
-    up0(34) = 0.21975700e+02_realType
-
-    up1(1)  = 0.12846958e+01_realType
-    up1(2)  = 0.69922936e+00_realType
-    up1(3)  = 0.44186548e+00_realType
-    up1(4)  = 0.30093680e+00_realType
-    up1(5)  = 0.21425046e+00_realType
-    up1(6)  = 0.15674045e+00_realType
-    up1(7)  = 0.11605614e+00_realType
-    up1(8)  = 0.85352379e-01_realType
-    up1(9)  = 0.61235043e-01_realType
-    up1(10) = 0.42691639e-01_realType
-    up1(11) = 0.29366174e-01_realType
-    up1(12) = 0.20326381e-01_realType
-    up1(13) = 0.14310141e-01_realType
-    up1(14) = 0.10275905e-01_realType
-    up1(15) = 0.75205965e-02_realType
-    up1(16) = 0.55993913e-02_realType
-    up1(17) = 0.42330072e-02_realType
-    up1(18) = 0.32428406e-02_realType
-    up1(19) = 0.25137042e-02_realType
-    up1(20) = 0.19691199e-02_realType
-    up1(21) = 0.15570310e-02_realType
-    up1(22) = 0.12416035e-02_realType
-    up1(23) = 0.99762939e-03_realType
-    up1(24) = 0.80730082e-03_realType
-    up1(25) = 0.65769508e-03_realType
-    up1(26) = 0.53910966e-03_realType
-    up1(27) = 0.44453711e-03_realType
-    up1(28) = 0.36862857e-03_realType
-    up1(29) = 0.30733926e-03_realType
-    up1(30) = 0.25762621e-03_realType
-    up1(31) = 0.21711632e-03_realType
-    up1(32) = 0.18393679e-03_realType
-    up1(33) = 0.15665505e-03_realType
-    up1(34) = 0.13415441e-03_realType
-
-    up2(1)  = -0.10506864e+01_realType
-    up2(2)  = -0.17378349e+00_realType
-    up2(3)  = -0.43906517e-01_realType
-    up2(4)  = -0.13876317e-01_realType
-    up2(5)  = -0.50197713e-02_realType
-    up2(6)  = -0.20046033e-02_realType
-    up2(7)  = -0.91800803e-03_realType
-    up2(8)  = -0.53858650e-03_realType
-    up2(9)  = -0.37015912e-03_realType
-    up2(10) = -0.23581357e-03_realType
-    up2(11) = -0.13214946e-03_realType
-    up2(12) = -0.69676197e-04_realType
-    up2(13) = -0.36527030e-04_realType
-    up2(14) = -0.19485941e-04_realType
-    up2(15) = -0.10680793e-04_realType
-    up2(16) = -0.60059830e-05_realType
-    up2(17) = -0.34636741e-05_realType
-    up2(18) = -0.20504308e-05_realType
-    up2(19) = -0.12351270e-05_realType
-    up2(20) = -0.76062105e-06_realType
-    up2(21) = -0.47546804e-06_realType
-    up2(22) = -0.30260755e-06_realType
-    up2(23) = -0.19542870e-06_realType
-    up2(24) = -0.12770106e-06_realType
-    up2(25) = -0.84214126e-07_realType
-    up2(26) = -0.56643139e-07_realType
-    up2(27) = -0.38016078e-07_realType
-    up2(28) = -0.26134020e-07_realType
-    up2(29) = -0.17887512e-07_realType
-    up2(30) = -0.12500449e-07_realType
-    up2(31) = -0.86706361e-08_realType
-    up2(32) = -0.61786381e-08_realType
-    up2(33) = -0.43440887e-08_realType
-    up2(34) = -0.31212378e-08_realType
-
-    up3(1)  =  0.30603769e+00_realType
-    up3(2)  = -0.74623173e-02_realType
-    up3(3)  = -0.35137590e-02_realType
-    up3(4)  = -0.90241883e-03_realType
-    up3(5)  = -0.23666410e-03_realType
-    up3(6)  = -0.69336451e-04_realType
-    up3(7)  = -0.21717508e-04_realType
-    up3(8)  = -0.53427330e-05_realType
-    up3(9)  = -0.12162441e-06_realType
-    up3(10) =  0.66537991e-06_realType
-    up3(11) =  0.40127135e-06_realType
-    up3(12) =  0.17307016e-06_realType
-    up3(13) =  0.68595664e-07_realType
-    up3(14) =  0.26859566e-07_realType
-    up3(15) =  0.10802572e-07_realType
-    up3(16) =  0.44423253e-08_realType
-    up3(17) =  0.18757232e-08_realType
-    up3(18) =  0.83595370e-09_realType
-    up3(19) =  0.37334228e-09_realType
-    up3(20) =  0.17567414e-09_realType
-    up3(21) =  0.82933451e-10_realType
-    up3(22) =  0.40989510e-10_realType
-    up3(23) =  0.20935863e-10_realType
-    up3(24) =  0.10846455e-10_realType
-    up3(25) =  0.54661649e-11_realType
-    up3(26) =  0.31700296e-11_realType
-    up3(27) =  0.15722041e-11_realType
-    up3(28) =  0.97074333e-12_realType
-    up3(29) =  0.50475514e-12_realType
-    up3(30) =  0.32254746e-12_realType
-    up3(31) =  0.16247920e-12_realType
-    up3(32) =  0.11432002e-12_realType
-    up3(33) =  0.59121027e-13_realType
-    up3(34) =  0.38726995e-13_realType
-
-    ! Set the values of tuLogFit. Both for k and omega the
-    ! logarithm has been fitted.
-
-    tuLogFit(itu1) = .true.
-    tuLogFit(itu2) = .true.
-
-    ! Set the values of constants for the cubic fits of the
-    ! non-dimensional k and omega values.
-
-    ! Constants for k.
-
-    tup0(1,itu1)  = -0.10178274e+02_realType
-    tup0(2,itu1)  = -0.79134047e+01_realType
-    tup0(3,itu1)  = -0.62154735e+01_realType
-    tup0(4,itu1)  = -0.48268972e+01_realType
-    tup0(5,itu1)  = -0.36279650e+01_realType
-    tup0(6,itu1)  = -0.25597781e+01_realType
-    tup0(7,itu1)  = -0.16005079e+01_realType
-    tup0(8,itu1)  = -0.76521262e+00_realType
-    tup0(9,itu1)  = -0.10076775e+00_realType
-    tup0(10,itu1) =  0.36262719e+00_realType
-    tup0(11,itu1) =  0.65553877e+00_realType
-    tup0(12,itu1) =  0.83590897e+00_realType
-    tup0(13,itu1) =  0.94909088e+00_realType
-    tup0(14,itu1) =  0.10224941e+01_realType
-    tup0(15,itu1) =  0.10717000e+01_realType
-    tup0(16,itu1) =  0.11056409e+01_realType
-    tup0(17,itu1) =  0.11295908e+01_realType
-    tup0(18,itu1) =  0.11467673e+01_realType
-    tup0(19,itu1) =  0.11591867e+01_realType
-    tup0(20,itu1) =  0.11681570e+01_realType
-    tup0(21,itu1) =  0.11745296e+01_realType
-    tup0(22,itu1) =  0.11788734e+01_realType
-    tup0(23,itu1) =  0.11815615e+01_realType
-    tup0(24,itu1) =  0.11828278e+01_realType
-    tup0(25,itu1) =  0.11828094e+01_realType
-    tup0(26,itu1) =  0.11815707e+01_realType
-    tup0(27,itu1) =  0.11791103e+01_realType
-    tup0(28,itu1) =  0.11753665e+01_realType
-    tup0(29,itu1) =  0.11702319e+01_realType
-    tup0(30,itu1) =  0.11635476e+01_realType
-    tup0(31,itu1) =  0.11550903e+01_realType
-    tup0(32,itu1) =  0.11445826e+01_realType
-    tup0(33,itu1) =  0.11316601e+01_realType
-    tup0(34,itu1) =  0.11158659e+01_realType
-
-    ! Constants for omega.
-
-    tup0(1,itu2)  =  0.68385895e+01_realType
-    tup0(2,itu2)  =  0.55423492e+01_realType
-    tup0(3,itu2)  =  0.45364394e+01_realType
-    tup0(4,itu2)  =  0.37003435e+01_realType
-    tup0(5,itu2)  =  0.29762436e+01_realType
-    tup0(6,itu2)  =  0.23400254e+01_realType
-    tup0(7,itu2)  =  0.17897909e+01_realType
-    tup0(8,itu2)  =  0.13296526e+01_realType
-    tup0(9,itu2)  =  0.94313517e+00_realType
-    tup0(10,itu2) =  0.59512633e+00_realType
-    tup0(11,itu2) =  0.26383242e+00_realType
-    tup0(12,itu2) = -0.54289357e-01_realType
-    tup0(13,itu2) = -0.35764684e+00_realType
-    tup0(14,itu2) = -0.64548336e+00_realType
-    tup0(15,itu2) = -0.91832029e+00_realType
-    tup0(16,itu2) = -0.11773601e+01_realType
-    tup0(17,itu2) = -0.14240004e+01_realType
-    tup0(18,itu2) = -0.16596108e+01_realType
-    tup0(19,itu2) = -0.18854088e+01_realType
-    tup0(20,itu2) = -0.21024564e+01_realType
-    tup0(21,itu2) = -0.23116299e+01_realType
-    tup0(22,itu2) = -0.25136741e+01_realType
-    tup0(23,itu2) = -0.27091934e+01_realType
-    tup0(24,itu2) = -0.28986818e+01_realType
-    tup0(25,itu2) = -0.30825349e+01_realType
-    tup0(26,itu2) = -0.32610659e+01_realType
-    tup0(27,itu2) = -0.34345194e+01_realType
-    tup0(28,itu2) = -0.36030725e+01_realType
-    tup0(29,itu2) = -0.37668496e+01_realType
-    tup0(30,itu2) = -0.39259191e+01_realType
-    tup0(31,itu2) = -0.40803056e+01_realType
-    tup0(32,itu2) = -0.42299856e+01_realType
-    tup0(33,itu2) = -0.43749001e+01_realType
-    tup0(34,itu2) = -0.45149548e+01_realType
-
-    ! Constants for k.
-
-    tup1(1,itu1)  =  0.10151083e+02_realType
-    tup1(2,itu1)  =  0.54871316e+01_realType
-    tup1(3,itu1)  =  0.33494093e+01_realType
-    tup1(4,itu1)  =  0.22113000e+01_realType
-    tup1(5,itu1)  =  0.15331218e+01_realType
-    tup1(6,itu1)  =  0.10899838e+01_realType
-    tup1(7,itu1)  =  0.77051060e+00_realType
-    tup1(8,itu1)  =  0.51657998e+00_realType
-    tup1(9,itu1)  =  0.31302624e+00_realType
-    tup1(10,itu1) =  0.16986834e+00_realType
-    tup1(11,itu1) =  0.86387987e-01_realType
-    tup1(12,itu1) =  0.43725644e-01_realType
-    tup1(13,itu1) =  0.22772335e-01_realType
-    tup1(14,itu1) =  0.12310034e-01_realType
-    tup1(15,itu1) =  0.68940825e-02_realType
-    tup1(16,itu1) =  0.39792104e-02_realType
-    tup1(17,itu1) =  0.23523017e-02_realType
-    tup1(18,itu1) =  0.14137727e-02_realType
-    tup1(19,itu1) =  0.85642296e-03_realType
-    tup1(20,itu1) =  0.51672343e-03_realType
-    tup1(21,itu1) =  0.30463346e-03_realType
-    tup1(22,itu1) =  0.16929149e-03_realType
-    tup1(23,itu1) =  0.80893185e-04_realType
-    tup1(24,itu1) =  0.21762685e-04_realType
-    tup1(25,itu1) = -0.18748602e-04_realType
-    tup1(26,itu1) = -0.47330395e-04_realType
-    tup1(27,itu1) = -0.68310393e-04_realType
-    tup1(28,itu1) = -0.84371684e-04_realType
-    tup1(29,itu1) = -0.97226185e-04_realType
-    tup1(30,itu1) = -0.10813606e-03_realType
-    tup1(31,itu1) = -0.11791513e-03_realType
-    tup1(32,itu1) = -0.12717807e-03_realType
-    tup1(33,itu1) = -0.13644855e-03_realType
-    tup1(34,itu1) = -0.14611996e-03_realType
-
-    ! Constants for omega.
-
-    tup1(1,itu2)  = -0.55838269e+01_realType
-    tup1(2,itu2)  = -0.31876950e+01_realType
-    tup1(3,itu2)  = -0.19989037e+01_realType
-    tup1(4,itu2)  = -0.13333526e+01_realType
-    tup1(5,itu2)  = -0.91990459e+00_realType
-    tup1(6,itu2)  = -0.63785038e+00_realType
-    tup1(7,itu2)  = -0.43381141e+00_realType
-    tup1(8,itu2)  = -0.29162744e+00_realType
-    tup1(9,itu2)  = -0.20386405e+00_realType
-    tup1(10,itu2) = -0.15257310e+00_realType
-    tup1(11,itu2) = -0.11853766e+00_realType
-    tup1(12,itu2) = -0.92571574e-01_realType
-    tup1(13,itu2) = -0.72154025e-01_realType
-    tup1(14,itu2) = -0.56291949e-01_realType
-    tup1(15,itu2) = -0.44100353e-01_realType
-    tup1(16,itu2) = -0.34758707e-01_realType
-    tup1(17,itu2) = -0.27583190e-01_realType
-    tup1(18,itu2) = -0.22041103e-01_realType
-    tup1(19,itu2) = -0.17731129e-01_realType
-    tup1(20,itu2) = -0.14354453e-01_realType
-    tup1(21,itu2) = -0.11689595e-01_realType
-    tup1(22,itu2) = -0.95711712e-02_realType
-    tup1(23,itu2) = -0.78759450e-02_realType
-    tup1(24,itu2) = -0.65109013e-02_realType
-    tup1(25,itu2) = -0.54047659e-02_realType
-    tup1(26,itu2) = -0.45036146e-02_realType
-    tup1(27,itu2) = -0.37655964e-02_realType
-    tup1(28,itu2) = -0.31581617e-02_realType
-    tup1(29,itu2) = -0.26558406e-02_realType
-    tup1(30,itu2) = -0.22385872e-02_realType
-    tup1(31,itu2) = -0.18905375e-02_realType
-    tup1(32,itu2) = -0.15990497e-02_realType
-    tup1(33,itu2) = -0.13540430e-02_realType
-    tup1(34,itu2) = -0.11473506e-02_realType
-
-    ! Constants for k.
-
-    tup2(1,itu1)  = -0.13708334e+02_realType
-    tup2(2,itu1)  = -0.43370192e+01_realType
-    tup2(3,itu1)  = -0.16256260e+01_realType
-    tup2(4,itu1)  = -0.69729756e+00_realType
-    tup2(5,itu1)  = -0.32831484e+00_realType
-    tup2(6,itu1)  = -0.16501608e+00_realType
-    tup2(7,itu1)  = -0.93271929e-01_realType
-    tup2(8,itu1)  = -0.66905532e-01_realType
-    tup2(9,itu1)  = -0.49449221e-01_realType
-    tup2(10,itu1) = -0.27955265e-01_realType
-    tup2(11,itu1) = -0.12356466e-01_realType
-    tup2(12,itu1) = -0.49538360e-02_realType
-    tup2(13,itu1) = -0.19816552e-02_realType
-    tup2(14,itu1) = -0.82035726e-03_realType
-    tup2(15,itu1) = -0.35459496e-03_realType
-    tup2(16,itu1) = -0.15988154e-03_realType
-    tup2(17,itu1) = -0.74920111e-04_realType
-    tup2(18,itu1) = -0.36432844e-04_realType
-    tup2(19,itu1) = -0.18228517e-04_realType
-    tup2(20,itu1) = -0.94187249e-05_realType
-    tup2(21,itu1) = -0.49803455e-05_realType
-    tup2(22,itu1) = -0.26991628e-05_realType
-    tup2(23,itu1) = -0.15033797e-05_realType
-    tup2(24,itu1) = -0.85865308e-06_realType
-    tup2(25,itu1) = -0.50010225e-06_realType
-    tup2(26,itu1) = -0.30003597e-06_realType
-    tup2(27,itu1) = -0.18914449e-06_realType
-    tup2(28,itu1) = -0.12284699e-06_realType
-    tup2(29,itu1) = -0.82382282e-07_realType
-    tup2(30,itu1) = -0.60415725e-07_realType
-    tup2(31,itu1) = -0.44704797e-07_realType
-    tup2(32,itu1) = -0.36272580e-07_realType
-    tup2(33,itu1) = -0.30797090e-07_realType
-    tup2(34,itu1) = -0.27066472e-07_realType
-
-    ! Constants for omega.
-
-    tup2(1,itu2)  =  0.65688815e+01_realType
-    tup2(2,itu2)  =  0.22942977e+01_realType
-    tup2(3,itu2)  =  0.91326107e+00_realType
-    tup2(4,itu2)  =  0.40527609e+00_realType
-    tup2(5,itu2)  =  0.19819770e+00_realType
-    tup2(6,itu2)  =  0.11119507e+00_realType
-    tup2(7,itu2)  =  0.71308510e-01_realType
-    tup2(8,itu2)  =  0.41419218e-01_realType
-    tup2(9,itu2)  =  0.18358706e-01_realType
-    tup2(10,itu2) =  0.79158389e-02_realType
-    tup2(11,itu2) =  0.45072385e-02_realType
-    tup2(12,itu2) =  0.29542525e-02_realType
-    tup2(13,itu2) =  0.19335211e-02_realType
-    tup2(14,itu2) =  0.12420728e-02_realType
-    tup2(15,itu2) =  0.79078278e-03_realType
-    tup2(16,itu2) =  0.50394724e-03_realType
-    tup2(17,itu2) =  0.32312896e-03_realType
-    tup2(18,itu2) =  0.20923598e-03_realType
-    tup2(19,itu2) =  0.13683514e-03_realType
-    tup2(20,itu2) =  0.90568628e-04_realType
-    tup2(21,itu2) =  0.60506139e-04_realType
-    tup2(22,itu2) =  0.40936800e-04_realType
-    tup2(23,itu2) =  0.27920509e-04_realType
-    tup2(24,itu2) =  0.19242656e-04_realType
-    tup2(25,itu2) =  0.13394215e-04_realType
-    tup2(26,itu2) =  0.93908861e-05_realType
-    tup2(27,itu2) =  0.66475834e-05_realType
-    tup2(28,itu2) =  0.47374937e-05_realType
-    tup2(29,itu2) =  0.34060533e-05_realType
-    tup2(30,itu2) =  0.24642067e-05_realType
-    tup2(31,itu2) =  0.17969941e-05_realType
-    tup2(32,itu2) =  0.13185213e-05_realType
-    tup2(33,itu2) =  0.97355039e-06_realType
-    tup2(34,itu2) =  0.72438303e-06_realType
-
-    ! Constants for k.
-
-    tup3(1,itu1)  =  0.13357255e+02_realType
-    tup3(2,itu1)  =  0.27962653e+01_realType
-    tup3(3,itu1)  =  0.67564983e+00_realType
-    tup3(4,itu1)  =  0.18227593e+00_realType
-    tup3(5,itu1)  =  0.48230769e-01_realType
-    tup3(6,itu1)  =  0.69085885e-02_realType
-    tup3(7,itu1)  = -0.25075980e-02_realType
-    tup3(8,itu1)  =  0.15198665e-02_realType
-    tup3(9,itu1)  =  0.45292571e-02_realType
-    tup3(10,itu1) =  0.29768816e-02_realType
-    tup3(11,itu1) =  0.11684431e-02_realType
-    tup3(12,itu1) =  0.38217835e-03_realType
-    tup3(13,itu1) =  0.12135735e-03_realType
-    tup3(14,itu1) =  0.39609332e-04_realType
-    tup3(15,itu1) =  0.13512654e-04_realType
-    tup3(16,itu1) =  0.48259094e-05_realType
-    tup3(17,itu1) =  0.17973362e-05_realType
-    tup3(18,itu1) =  0.70093792e-06_realType
-    tup3(19,itu1) =  0.28079141e-06_realType
-    tup3(20,itu1) =  0.11741966e-06_realType
-    tup3(21,itu1) =  0.50025034e-07_realType
-    tup3(22,itu1) =  0.21729950e-07_realType
-    tup3(23,itu1) =  0.96902710e-08_realType
-    tup3(24,itu1) =  0.43926199e-08_realType
-    tup3(25,itu1) =  0.19274189e-08_realType
-    tup3(26,itu1) =  0.80093554e-09_realType
-    tup3(27,itu1) =  0.33523262e-09_realType
-    tup3(28,itu1) =  0.10603408e-09_realType
-    tup3(29,itu1) = -0.14220954e-10_realType
-    tup3(30,itu1) = -0.43245103e-10_realType
-    tup3(31,itu1) = -0.71330147e-10_realType
-    tup3(32,itu1) = -0.73789519e-10_realType
-    tup3(33,itu1) = -0.73223977e-10_realType
-    tup3(34,itu1) = -0.73679919e-10_realType
-
-    ! Constants for omega.
-
-    tup3(1,itu2)  = -0.58652639e+01_realType
-    tup3(2,itu2)  = -0.13617293e+01_realType
-    tup3(3,itu2)  = -0.34682427e+00_realType
-    tup3(4,itu2)  = -0.90911692e-01_realType
-    tup3(5,itu2)  = -0.21991054e-01_realType
-    tup3(6,itu2)  = -0.81494825e-02_realType
-    tup3(7,itu2)  = -0.84291839e-02_realType
-    tup3(8,itu2)  = -0.58630201e-02_realType
-    tup3(9,itu2)  = -0.18374198e-02_realType
-    tup3(10,itu2) = -0.26966903e-03_realType
-    tup3(11,itu2) = -0.45904311e-04_realType
-    tup3(12,itu2) = -0.34368125e-04_realType
-    tup3(13,itu2) = -0.25332953e-04_realType
-    tup3(14,itu2) = -0.15345625e-04_realType
-    tup3(15,itu2) = -0.83950168e-05_realType
-    tup3(16,itu2) = -0.44072558e-05_realType
-    tup3(17,itu2) = -0.22740368e-05_realType
-    tup3(18,itu2) = -0.11801059e-05_realType
-    tup3(19,itu2) = -0.61295741e-06_realType
-    tup3(20,itu2) = -0.32633745e-06_realType
-    tup3(21,itu2) = -0.17280693e-06_realType
-    tup3(22,itu2) = -0.95608569e-07_realType
-    tup3(23,itu2) = -0.52411878e-07_realType
-    tup3(24,itu2) = -0.29366409e-07_realType
-    tup3(25,itu2) = -0.16959125e-07_realType
-    tup3(26,itu2) = -0.97228139e-08_realType
-    tup3(27,itu2) = -0.57661683e-08_realType
-    tup3(28,itu2) = -0.33988144e-08_realType
-    tup3(29,itu2) = -0.20698972e-08_realType
-    tup3(30,itu2) = -0.12548468e-08_realType
-    tup3(31,itu2) = -0.78279227e-09_realType
-    tup3(32,itu2) = -0.49051180e-09_realType
-    tup3(33,itu2) = -0.30968731e-09_realType
-    tup3(34,itu2) = -0.20495795e-09_realType
-
-  end subroutine initCurveFitDataKw
-
-  subroutine initCurveFitDataKwMod
-    !
-    !       initCurveFitDataKwMod contains the curve fit constants
-    !       for the wall function data for the modified k-omega turbulence
-    !       model.
-    !
-    use flowVarRefState
-    use paramTurb
-    implicit none
-    !
-    !      Local variables.
-    !
-    ! integer :: ierr
-
-    call terminate("initCurveFitDataKwMod", &
-         "Not implemented yet")
-
-  end subroutine initCurveFitDataKwMod
-
-  subroutine initCurveFitDataSST
-    !
-    !       initCurveFitDataSST contains the curve fit constants for
-    !       the wall function data for Menter's SST turbulence model.
-    !       Warning: Wall function data developed for k-omega model
-    !
-    use constants
-    use flowVarRefState
-    use paramTurb
-
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    ! Set the number of data points and allocate the memory for the
-    ! arrays of the curve fits.
-
-    nFit = 34
-
-    allocate(ypT(0:nFit), reT(0:nFit),                     &
-         up0(nFit),   up1(nFit), up2(nFit), up3(nFit), &
-         tup0(nFit,nt1:nt2), tup1(nFit,nt1:nt2),       &
-         tup2(nFit,nt1:nt2), tup3(nFit,nt1:nt2),       &
-         tuLogFit(nt1:nt2), stat=ierr)
-    if(ierr /= 0)                           &
-         call terminate("initCurveFitDataSST", &
-         "Memory allocation failure for curve fit &
-         &coefficients")
-
-    ! Set the values of the Reynolds numbers at interval boundaries.
-
-    reT(0)  = 0.12529547e+00_realType
-    reT(1)  = 0.44996057e+00_realType
-    reT(2)  = 0.11581311e+01_realType
-    reT(3)  = 0.25353238e+01_realType
-    reT(4)  = 0.50446282e+01_realType
-    reT(5)  = 0.94194631e+01_realType
-    reT(6)  = 0.16766555e+02_realType
-    reT(7)  = 0.28556753e+02_realType
-    reT(8)  = 0.46274930e+02_realType
-    reT(9)  = 0.71021000e+02_realType
-    reT(10) = 0.10383163e+03_realType
-    reT(11) = 0.14621738e+03_realType
-    reT(12) = 0.20028019e+03_realType
-    reT(13) = 0.26868298e+03_realType
-    reT(14) = 0.35467049e+03_realType
-    reT(15) = 0.46212508e+03_realType
-    reT(16) = 0.59566097e+03_realType
-    reT(17) = 0.76073076e+03_realType
-    reT(18) = 0.96373333e+03_realType
-    reT(19) = 0.12121761e+04_realType
-    reT(20) = 0.15147917e+04_realType
-    reT(21) = 0.18817196e+04_realType
-    reT(22) = 0.23247121e+04_realType
-    reT(23) = 0.28572322e+04_realType
-    reT(24) = 0.34947840e+04_realType
-    reT(25) = 0.42551444e+04_realType
-    reT(26) = 0.51584529e+04_realType
-    reT(27) = 0.62277581e+04_realType
-    reT(28) = 0.74889831e+04_realType
-    reT(29) = 0.89716314e+04_realType
-    reT(30) = 0.10708764e+05_realType
-    reT(31) = 0.12737815e+05_realType
-    reT(32) = 0.15100490e+05_realType
-    reT(33) = 0.17843939e+05_realType
-    reT(34) = 0.21020534e+05_realType
-
-    ! Set the values of the y+ values at interval boundaries.
-
-    ypT(0)  = 0.35397100e+00_realType
-    ypT(1)  = 0.67079200e+00_realType
-    ypT(2)  = 0.10761700e+01_realType
-    ypT(3)  = 0.15923000e+01_realType
-    ypT(4)  = 0.22463000e+01_realType
-    ypT(5)  = 0.30710600e+01_realType
-    ypT(6)  = 0.41063800e+01_realType
-    ypT(7)  = 0.54001200e+01_realType
-    ypT(8)  = 0.70095900e+01_realType
-    ypT(9)  = 0.90031400e+01_realType
-    ypT(10) = 0.11461900e+02_realType
-    ypT(11) = 0.14481700e+02_realType
-    ypT(12) = 0.18175400e+02_realType
-    ypT(13) = 0.22675200e+02_realType
-    ypT(14) = 0.28135500e+02_realType
-    ypT(15) = 0.34735800e+02_realType
-    ypT(16) = 0.42683800e+02_realType
-    ypT(17) = 0.52219300e+02_realType
-    ypT(18) = 0.63617800e+02_realType
-    ypT(19) = 0.77194900e+02_realType
-    ypT(20) = 0.93310400e+02_realType
-    ypT(21) = 0.11237300e+03_realType
-    ypT(22) = 0.13484800e+03_realType
-    ypT(23) = 0.16125700e+03_realType
-    ypT(24) = 0.19218900e+03_realType
-    ypT(25) = 0.22830600e+03_realType
-    ypT(26) = 0.27034500e+03_realType
-    ypT(27) = 0.31913000e+03_realType
-    ypT(28) = 0.37557400e+03_realType
-    ypT(29) = 0.44069100e+03_realType
-    ypT(30) = 0.51559800e+03_realType
-    ypT(31) = 0.60152700e+03_realType
-    ypT(32) = 0.69982900e+03_realType
-    ypT(33) = 0.81198500e+03_realType
-    ypT(34) = 0.93960800e+03_realType
-
-    ! Set the values of constants for the cubic fits of the
-    ! non-dimensional tangential velocity.
-
-    up0(1)  = 0.35397100e+00_realType
-    up0(2)  = 0.67079000e+00_realType
-    up0(3)  = 0.10761600e+01_realType
-    up0(4)  = 0.15922400e+01_realType
-    up0(5)  = 0.22457500e+01_realType
-    up0(6)  = 0.30671700e+01_realType
-    up0(7)  = 0.40830500e+01_realType
-    up0(8)  = 0.52881700e+01_realType
-    up0(9)  = 0.66016600e+01_realType
-    up0(10) = 0.78884700e+01_realType
-    up0(11) = 0.90588500e+01_realType
-    up0(12) = 0.10096700e+02_realType
-    up0(13) = 0.11019300e+02_realType
-    up0(14) = 0.11849200e+02_realType
-    up0(15) = 0.12605800e+02_realType
-    up0(16) = 0.13304000e+02_realType
-    up0(17) = 0.13955200e+02_realType
-    up0(18) = 0.14568000e+02_realType
-    up0(19) = 0.15148800e+02_realType
-    up0(20) = 0.15702800e+02_realType
-    up0(21) = 0.16233900e+02_realType
-    up0(22) = 0.16745300e+02_realType
-    up0(23) = 0.17239500e+02_realType
-    up0(24) = 0.17718500e+02_realType
-    up0(25) = 0.18184100e+02_realType
-    up0(26) = 0.18637900e+02_realType
-    up0(27) = 0.19081000e+02_realType
-    up0(28) = 0.19514800e+02_realType
-    up0(29) = 0.19940100e+02_realType
-    up0(30) = 0.20358100e+02_realType
-    up0(31) = 0.20769600e+02_realType
-    up0(32) = 0.21175800e+02_realType
-    up0(33) = 0.21577400e+02_realType
-    up0(34) = 0.21975700e+02_realType
-
-    up1(1)  = 0.12846958e+01_realType
-    up1(2)  = 0.69922936e+00_realType
-    up1(3)  = 0.44186548e+00_realType
-    up1(4)  = 0.30093680e+00_realType
-    up1(5)  = 0.21425046e+00_realType
-    up1(6)  = 0.15674045e+00_realType
-    up1(7)  = 0.11605614e+00_realType
-    up1(8)  = 0.85352379e-01_realType
-    up1(9)  = 0.61235043e-01_realType
-    up1(10) = 0.42691639e-01_realType
-    up1(11) = 0.29366174e-01_realType
-    up1(12) = 0.20326381e-01_realType
-    up1(13) = 0.14310141e-01_realType
-    up1(14) = 0.10275905e-01_realType
-    up1(15) = 0.75205965e-02_realType
-    up1(16) = 0.55993913e-02_realType
-    up1(17) = 0.42330072e-02_realType
-    up1(18) = 0.32428406e-02_realType
-    up1(19) = 0.25137042e-02_realType
-    up1(20) = 0.19691199e-02_realType
-    up1(21) = 0.15570310e-02_realType
-    up1(22) = 0.12416035e-02_realType
-    up1(23) = 0.99762939e-03_realType
-    up1(24) = 0.80730082e-03_realType
-    up1(25) = 0.65769508e-03_realType
-    up1(26) = 0.53910966e-03_realType
-    up1(27) = 0.44453711e-03_realType
-    up1(28) = 0.36862857e-03_realType
-    up1(29) = 0.30733926e-03_realType
-    up1(30) = 0.25762621e-03_realType
-    up1(31) = 0.21711632e-03_realType
-    up1(32) = 0.18393679e-03_realType
-    up1(33) = 0.15665505e-03_realType
-    up1(34) = 0.13415441e-03_realType
-
-    up2(1)  = -0.10506864e+01_realType
-    up2(2)  = -0.17378349e+00_realType
-    up2(3)  = -0.43906517e-01_realType
-    up2(4)  = -0.13876317e-01_realType
-    up2(5)  = -0.50197713e-02_realType
-    up2(6)  = -0.20046033e-02_realType
-    up2(7)  = -0.91800803e-03_realType
-    up2(8)  = -0.53858650e-03_realType
-    up2(9)  = -0.37015912e-03_realType
-    up2(10) = -0.23581357e-03_realType
-    up2(11) = -0.13214946e-03_realType
-    up2(12) = -0.69676197e-04_realType
-    up2(13) = -0.36527030e-04_realType
-    up2(14) = -0.19485941e-04_realType
-    up2(15) = -0.10680793e-04_realType
-    up2(16) = -0.60059830e-05_realType
-    up2(17) = -0.34636741e-05_realType
-    up2(18) = -0.20504308e-05_realType
-    up2(19) = -0.12351270e-05_realType
-    up2(20) = -0.76062105e-06_realType
-    up2(21) = -0.47546804e-06_realType
-    up2(22) = -0.30260755e-06_realType
-    up2(23) = -0.19542870e-06_realType
-    up2(24) = -0.12770106e-06_realType
-    up2(25) = -0.84214126e-07_realType
-    up2(26) = -0.56643139e-07_realType
-    up2(27) = -0.38016078e-07_realType
-    up2(28) = -0.26134020e-07_realType
-    up2(29) = -0.17887512e-07_realType
-    up2(30) = -0.12500449e-07_realType
-    up2(31) = -0.86706361e-08_realType
-    up2(32) = -0.61786381e-08_realType
-    up2(33) = -0.43440887e-08_realType
-    up2(34) = -0.31212378e-08_realType
-
-    up3(1)  =  0.30603769e+00_realType
-    up3(2)  = -0.74623173e-02_realType
-    up3(3)  = -0.35137590e-02_realType
-    up3(4)  = -0.90241883e-03_realType
-    up3(5)  = -0.23666410e-03_realType
-    up3(6)  = -0.69336451e-04_realType
-    up3(7)  = -0.21717508e-04_realType
-    up3(8)  = -0.53427330e-05_realType
-    up3(9)  = -0.12162441e-06_realType
-    up3(10) =  0.66537991e-06_realType
-    up3(11) =  0.40127135e-06_realType
-    up3(12) =  0.17307016e-06_realType
-    up3(13) =  0.68595664e-07_realType
-    up3(14) =  0.26859566e-07_realType
-    up3(15) =  0.10802572e-07_realType
-    up3(16) =  0.44423253e-08_realType
-    up3(17) =  0.18757232e-08_realType
-    up3(18) =  0.83595370e-09_realType
-    up3(19) =  0.37334228e-09_realType
-    up3(20) =  0.17567414e-09_realType
-    up3(21) =  0.82933451e-10_realType
-    up3(22) =  0.40989510e-10_realType
-    up3(23) =  0.20935863e-10_realType
-    up3(24) =  0.10846455e-10_realType
-    up3(25) =  0.54661649e-11_realType
-    up3(26) =  0.31700296e-11_realType
-    up3(27) =  0.15722041e-11_realType
-    up3(28) =  0.97074333e-12_realType
-    up3(29) =  0.50475514e-12_realType
-    up3(30) =  0.32254746e-12_realType
-    up3(31) =  0.16247920e-12_realType
-    up3(32) =  0.11432002e-12_realType
-    up3(33) =  0.59121027e-13_realType
-    up3(34) =  0.38726995e-13_realType
-
-    ! Set the values of tuLogFit. Both for k and omega the
-    ! logarithm has been fitted.
-
-    tuLogFit(itu1) = .true.
-    tuLogFit(itu2) = .true.
-
-    ! Set the values of constants for the cubic fits of the
-    ! non-dimensional k and omega values.
-
-    ! Constants for k.
-
-    tup0(1,itu1)  = -0.10178274e+02_realType
-    tup0(2,itu1)  = -0.79134047e+01_realType
-    tup0(3,itu1)  = -0.62154735e+01_realType
-    tup0(4,itu1)  = -0.48268972e+01_realType
-    tup0(5,itu1)  = -0.36279650e+01_realType
-    tup0(6,itu1)  = -0.25597781e+01_realType
-    tup0(7,itu1)  = -0.16005079e+01_realType
-    tup0(8,itu1)  = -0.76521262e+00_realType
-    tup0(9,itu1)  = -0.10076775e+00_realType
-    tup0(10,itu1) =  0.36262719e+00_realType
-    tup0(11,itu1) =  0.65553877e+00_realType
-    tup0(12,itu1) =  0.83590897e+00_realType
-    tup0(13,itu1) =  0.94909088e+00_realType
-    tup0(14,itu1) =  0.10224941e+01_realType
-    tup0(15,itu1) =  0.10717000e+01_realType
-    tup0(16,itu1) =  0.11056409e+01_realType
-    tup0(17,itu1) =  0.11295908e+01_realType
-    tup0(18,itu1) =  0.11467673e+01_realType
-    tup0(19,itu1) =  0.11591867e+01_realType
-    tup0(20,itu1) =  0.11681570e+01_realType
-    tup0(21,itu1) =  0.11745296e+01_realType
-    tup0(22,itu1) =  0.11788734e+01_realType
-    tup0(23,itu1) =  0.11815615e+01_realType
-    tup0(24,itu1) =  0.11828278e+01_realType
-    tup0(25,itu1) =  0.11828094e+01_realType
-    tup0(26,itu1) =  0.11815707e+01_realType
-    tup0(27,itu1) =  0.11791103e+01_realType
-    tup0(28,itu1) =  0.11753665e+01_realType
-    tup0(29,itu1) =  0.11702319e+01_realType
-    tup0(30,itu1) =  0.11635476e+01_realType
-    tup0(31,itu1) =  0.11550903e+01_realType
-    tup0(32,itu1) =  0.11445826e+01_realType
-    tup0(33,itu1) =  0.11316601e+01_realType
-    tup0(34,itu1) =  0.11158659e+01_realType
-
-    ! Constants for omega.
-
-    tup0(1,itu2)  =  0.68385895e+01_realType
-    tup0(2,itu2)  =  0.55423492e+01_realType
-    tup0(3,itu2)  =  0.45364394e+01_realType
-    tup0(4,itu2)  =  0.37003435e+01_realType
-    tup0(5,itu2)  =  0.29762436e+01_realType
-    tup0(6,itu2)  =  0.23400254e+01_realType
-    tup0(7,itu2)  =  0.17897909e+01_realType
-    tup0(8,itu2)  =  0.13296526e+01_realType
-    tup0(9,itu2)  =  0.94313517e+00_realType
-    tup0(10,itu2) =  0.59512633e+00_realType
-    tup0(11,itu2) =  0.26383242e+00_realType
-    tup0(12,itu2) = -0.54289357e-01_realType
-    tup0(13,itu2) = -0.35764684e+00_realType
-    tup0(14,itu2) = -0.64548336e+00_realType
-    tup0(15,itu2) = -0.91832029e+00_realType
-    tup0(16,itu2) = -0.11773601e+01_realType
-    tup0(17,itu2) = -0.14240004e+01_realType
-    tup0(18,itu2) = -0.16596108e+01_realType
-    tup0(19,itu2) = -0.18854088e+01_realType
-    tup0(20,itu2) = -0.21024564e+01_realType
-    tup0(21,itu2) = -0.23116299e+01_realType
-    tup0(22,itu2) = -0.25136741e+01_realType
-    tup0(23,itu2) = -0.27091934e+01_realType
-    tup0(24,itu2) = -0.28986818e+01_realType
-    tup0(25,itu2) = -0.30825349e+01_realType
-    tup0(26,itu2) = -0.32610659e+01_realType
-    tup0(27,itu2) = -0.34345194e+01_realType
-    tup0(28,itu2) = -0.36030725e+01_realType
-    tup0(29,itu2) = -0.37668496e+01_realType
-    tup0(30,itu2) = -0.39259191e+01_realType
-    tup0(31,itu2) = -0.40803056e+01_realType
-    tup0(32,itu2) = -0.42299856e+01_realType
-    tup0(33,itu2) = -0.43749001e+01_realType
-    tup0(34,itu2) = -0.45149548e+01_realType
-
-    ! Constants for k.
-
-    tup1(1,itu1)  =  0.10151083e+02_realType
-    tup1(2,itu1)  =  0.54871316e+01_realType
-    tup1(3,itu1)  =  0.33494093e+01_realType
-    tup1(4,itu1)  =  0.22113000e+01_realType
-    tup1(5,itu1)  =  0.15331218e+01_realType
-    tup1(6,itu1)  =  0.10899838e+01_realType
-    tup1(7,itu1)  =  0.77051060e+00_realType
-    tup1(8,itu1)  =  0.51657998e+00_realType
-    tup1(9,itu1)  =  0.31302624e+00_realType
-    tup1(10,itu1) =  0.16986834e+00_realType
-    tup1(11,itu1) =  0.86387987e-01_realType
-    tup1(12,itu1) =  0.43725644e-01_realType
-    tup1(13,itu1) =  0.22772335e-01_realType
-    tup1(14,itu1) =  0.12310034e-01_realType
-    tup1(15,itu1) =  0.68940825e-02_realType
-    tup1(16,itu1) =  0.39792104e-02_realType
-    tup1(17,itu1) =  0.23523017e-02_realType
-    tup1(18,itu1) =  0.14137727e-02_realType
-    tup1(19,itu1) =  0.85642296e-03_realType
-    tup1(20,itu1) =  0.51672343e-03_realType
-    tup1(21,itu1) =  0.30463346e-03_realType
-    tup1(22,itu1) =  0.16929149e-03_realType
-    tup1(23,itu1) =  0.80893185e-04_realType
-    tup1(24,itu1) =  0.21762685e-04_realType
-    tup1(25,itu1) = -0.18748602e-04_realType
-    tup1(26,itu1) = -0.47330395e-04_realType
-    tup1(27,itu1) = -0.68310393e-04_realType
-    tup1(28,itu1) = -0.84371684e-04_realType
-    tup1(29,itu1) = -0.97226185e-04_realType
-    tup1(30,itu1) = -0.10813606e-03_realType
-    tup1(31,itu1) = -0.11791513e-03_realType
-    tup1(32,itu1) = -0.12717807e-03_realType
-    tup1(33,itu1) = -0.13644855e-03_realType
-    tup1(34,itu1) = -0.14611996e-03_realType
-
-    ! Constants for omega.
-
-    tup1(1,itu2)  = -0.55838269e+01_realType
-    tup1(2,itu2)  = -0.31876950e+01_realType
-    tup1(3,itu2)  = -0.19989037e+01_realType
-    tup1(4,itu2)  = -0.13333526e+01_realType
-    tup1(5,itu2)  = -0.91990459e+00_realType
-    tup1(6,itu2)  = -0.63785038e+00_realType
-    tup1(7,itu2)  = -0.43381141e+00_realType
-    tup1(8,itu2)  = -0.29162744e+00_realType
-    tup1(9,itu2)  = -0.20386405e+00_realType
-    tup1(10,itu2) = -0.15257310e+00_realType
-    tup1(11,itu2) = -0.11853766e+00_realType
-    tup1(12,itu2) = -0.92571574e-01_realType
-    tup1(13,itu2) = -0.72154025e-01_realType
-    tup1(14,itu2) = -0.56291949e-01_realType
-    tup1(15,itu2) = -0.44100353e-01_realType
-    tup1(16,itu2) = -0.34758707e-01_realType
-    tup1(17,itu2) = -0.27583190e-01_realType
-    tup1(18,itu2) = -0.22041103e-01_realType
-    tup1(19,itu2) = -0.17731129e-01_realType
-    tup1(20,itu2) = -0.14354453e-01_realType
-    tup1(21,itu2) = -0.11689595e-01_realType
-    tup1(22,itu2) = -0.95711712e-02_realType
-    tup1(23,itu2) = -0.78759450e-02_realType
-    tup1(24,itu2) = -0.65109013e-02_realType
-    tup1(25,itu2) = -0.54047659e-02_realType
-    tup1(26,itu2) = -0.45036146e-02_realType
-    tup1(27,itu2) = -0.37655964e-02_realType
-    tup1(28,itu2) = -0.31581617e-02_realType
-    tup1(29,itu2) = -0.26558406e-02_realType
-    tup1(30,itu2) = -0.22385872e-02_realType
-    tup1(31,itu2) = -0.18905375e-02_realType
-    tup1(32,itu2) = -0.15990497e-02_realType
-    tup1(33,itu2) = -0.13540430e-02_realType
-    tup1(34,itu2) = -0.11473506e-02_realType
-
-    ! Constants for k.
-
-    tup2(1,itu1)  = -0.13708334e+02_realType
-    tup2(2,itu1)  = -0.43370192e+01_realType
-    tup2(3,itu1)  = -0.16256260e+01_realType
-    tup2(4,itu1)  = -0.69729756e+00_realType
-    tup2(5,itu1)  = -0.32831484e+00_realType
-    tup2(6,itu1)  = -0.16501608e+00_realType
-    tup2(7,itu1)  = -0.93271929e-01_realType
-    tup2(8,itu1)  = -0.66905532e-01_realType
-    tup2(9,itu1)  = -0.49449221e-01_realType
-    tup2(10,itu1) = -0.27955265e-01_realType
-    tup2(11,itu1) = -0.12356466e-01_realType
-    tup2(12,itu1) = -0.49538360e-02_realType
-    tup2(13,itu1) = -0.19816552e-02_realType
-    tup2(14,itu1) = -0.82035726e-03_realType
-    tup2(15,itu1) = -0.35459496e-03_realType
-    tup2(16,itu1) = -0.15988154e-03_realType
-    tup2(17,itu1) = -0.74920111e-04_realType
-    tup2(18,itu1) = -0.36432844e-04_realType
-    tup2(19,itu1) = -0.18228517e-04_realType
-    tup2(20,itu1) = -0.94187249e-05_realType
-    tup2(21,itu1) = -0.49803455e-05_realType
-    tup2(22,itu1) = -0.26991628e-05_realType
-    tup2(23,itu1) = -0.15033797e-05_realType
-    tup2(24,itu1) = -0.85865308e-06_realType
-    tup2(25,itu1) = -0.50010225e-06_realType
-    tup2(26,itu1) = -0.30003597e-06_realType
-    tup2(27,itu1) = -0.18914449e-06_realType
-    tup2(28,itu1) = -0.12284699e-06_realType
-    tup2(29,itu1) = -0.82382282e-07_realType
-    tup2(30,itu1) = -0.60415725e-07_realType
-    tup2(31,itu1) = -0.44704797e-07_realType
-    tup2(32,itu1) = -0.36272580e-07_realType
-    tup2(33,itu1) = -0.30797090e-07_realType
-    tup2(34,itu1) = -0.27066472e-07_realType
-
-    ! Constants for omega.
-
-    tup2(1,itu2)  =  0.65688815e+01_realType
-    tup2(2,itu2)  =  0.22942977e+01_realType
-    tup2(3,itu2)  =  0.91326107e+00_realType
-    tup2(4,itu2)  =  0.40527609e+00_realType
-    tup2(5,itu2)  =  0.19819770e+00_realType
-    tup2(6,itu2)  =  0.11119507e+00_realType
-    tup2(7,itu2)  =  0.71308510e-01_realType
-    tup2(8,itu2)  =  0.41419218e-01_realType
-    tup2(9,itu2)  =  0.18358706e-01_realType
-    tup2(10,itu2) =  0.79158389e-02_realType
-    tup2(11,itu2) =  0.45072385e-02_realType
-    tup2(12,itu2) =  0.29542525e-02_realType
-    tup2(13,itu2) =  0.19335211e-02_realType
-    tup2(14,itu2) =  0.12420728e-02_realType
-    tup2(15,itu2) =  0.79078278e-03_realType
-    tup2(16,itu2) =  0.50394724e-03_realType
-    tup2(17,itu2) =  0.32312896e-03_realType
-    tup2(18,itu2) =  0.20923598e-03_realType
-    tup2(19,itu2) =  0.13683514e-03_realType
-    tup2(20,itu2) =  0.90568628e-04_realType
-    tup2(21,itu2) =  0.60506139e-04_realType
-    tup2(22,itu2) =  0.40936800e-04_realType
-    tup2(23,itu2) =  0.27920509e-04_realType
-    tup2(24,itu2) =  0.19242656e-04_realType
-    tup2(25,itu2) =  0.13394215e-04_realType
-    tup2(26,itu2) =  0.93908861e-05_realType
-    tup2(27,itu2) =  0.66475834e-05_realType
-    tup2(28,itu2) =  0.47374937e-05_realType
-    tup2(29,itu2) =  0.34060533e-05_realType
-    tup2(30,itu2) =  0.24642067e-05_realType
-    tup2(31,itu2) =  0.17969941e-05_realType
-    tup2(32,itu2) =  0.13185213e-05_realType
-    tup2(33,itu2) =  0.97355039e-06_realType
-    tup2(34,itu2) =  0.72438303e-06_realType
-
-    ! Constants for k.
-
-    tup3(1,itu1)  =  0.13357255e+02_realType
-    tup3(2,itu1)  =  0.27962653e+01_realType
-    tup3(3,itu1)  =  0.67564983e+00_realType
-    tup3(4,itu1)  =  0.18227593e+00_realType
-    tup3(5,itu1)  =  0.48230769e-01_realType
-    tup3(6,itu1)  =  0.69085885e-02_realType
-    tup3(7,itu1)  = -0.25075980e-02_realType
-    tup3(8,itu1)  =  0.15198665e-02_realType
-    tup3(9,itu1)  =  0.45292571e-02_realType
-    tup3(10,itu1) =  0.29768816e-02_realType
-    tup3(11,itu1) =  0.11684431e-02_realType
-    tup3(12,itu1) =  0.38217835e-03_realType
-    tup3(13,itu1) =  0.12135735e-03_realType
-    tup3(14,itu1) =  0.39609332e-04_realType
-    tup3(15,itu1) =  0.13512654e-04_realType
-    tup3(16,itu1) =  0.48259094e-05_realType
-    tup3(17,itu1) =  0.17973362e-05_realType
-    tup3(18,itu1) =  0.70093792e-06_realType
-    tup3(19,itu1) =  0.28079141e-06_realType
-    tup3(20,itu1) =  0.11741966e-06_realType
-    tup3(21,itu1) =  0.50025034e-07_realType
-    tup3(22,itu1) =  0.21729950e-07_realType
-    tup3(23,itu1) =  0.96902710e-08_realType
-    tup3(24,itu1) =  0.43926199e-08_realType
-    tup3(25,itu1) =  0.19274189e-08_realType
-    tup3(26,itu1) =  0.80093554e-09_realType
-    tup3(27,itu1) =  0.33523262e-09_realType
-    tup3(28,itu1) =  0.10603408e-09_realType
-    tup3(29,itu1) = -0.14220954e-10_realType
-    tup3(30,itu1) = -0.43245103e-10_realType
-    tup3(31,itu1) = -0.71330147e-10_realType
-    tup3(32,itu1) = -0.73789519e-10_realType
-    tup3(33,itu1) = -0.73223977e-10_realType
-    tup3(34,itu1) = -0.73679919e-10_realType
-
-    ! Constants for omega.
-
-    tup3(1,itu2)  = -0.58652639e+01_realType
-    tup3(2,itu2)  = -0.13617293e+01_realType
-    tup3(3,itu2)  = -0.34682427e+00_realType
-    tup3(4,itu2)  = -0.90911692e-01_realType
-    tup3(5,itu2)  = -0.21991054e-01_realType
-    tup3(6,itu2)  = -0.81494825e-02_realType
-    tup3(7,itu2)  = -0.84291839e-02_realType
-    tup3(8,itu2)  = -0.58630201e-02_realType
-    tup3(9,itu2)  = -0.18374198e-02_realType
-    tup3(10,itu2) = -0.26966903e-03_realType
-    tup3(11,itu2) = -0.45904311e-04_realType
-    tup3(12,itu2) = -0.34368125e-04_realType
-    tup3(13,itu2) = -0.25332953e-04_realType
-    tup3(14,itu2) = -0.15345625e-04_realType
-    tup3(15,itu2) = -0.83950168e-05_realType
-    tup3(16,itu2) = -0.44072558e-05_realType
-    tup3(17,itu2) = -0.22740368e-05_realType
-    tup3(18,itu2) = -0.11801059e-05_realType
-    tup3(19,itu2) = -0.61295741e-06_realType
-    tup3(20,itu2) = -0.32633745e-06_realType
-    tup3(21,itu2) = -0.17280693e-06_realType
-    tup3(22,itu2) = -0.95608569e-07_realType
-    tup3(23,itu2) = -0.52411878e-07_realType
-    tup3(24,itu2) = -0.29366409e-07_realType
-    tup3(25,itu2) = -0.16959125e-07_realType
-    tup3(26,itu2) = -0.97228139e-08_realType
-    tup3(27,itu2) = -0.57661683e-08_realType
-    tup3(28,itu2) = -0.33988144e-08_realType
-    tup3(29,itu2) = -0.20698972e-08_realType
-    tup3(30,itu2) = -0.12548468e-08_realType
-    tup3(31,itu2) = -0.78279227e-09_realType
-    tup3(32,itu2) = -0.49051180e-09_realType
-    tup3(33,itu2) = -0.30968731e-09_realType
-    tup3(34,itu2) = -0.20495795e-09_realType
-
-  end subroutine initCurveFitDataSST
-
-  subroutine initCurveFitDataSa
-    !
-    !       initCurveFitDataSa contains the curve fit constants for
-    !       the wall function data for the Spalart-Allmaras turbulence
-    !       model.
-    !
-    use constants
-    use flowVarRefState
-    use paramTurb
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    ! Set the number of data points and allocate the memory for the
-    ! arrays of the curve fits.
-
-    nFit = 34
-
-    allocate(ypT(0:nFit), reT(0:nFit),                     &
-         up0(nFit),   up1(nFit), up2(nFit), up3(nFit), &
-         tup0(nFit,nt1:nt2), tup1(nFit,nt1:nt2),       &
-         tup2(nFit,nt1:nt2), tup3(nFit,nt1:nt2),       &
-         tuLogFit(nt1:nt2), stat=ierr)
-    if(ierr /= 0)                          &
-         call terminate("initCurveFitDataSa", &
-         "Memory allocation failure for curve fit &
-         &coefficients")
-
-    ! Set the values of the Reynolds numbers at interval boundaries.
-
-    reT(0)  = 0.12361553e+00_realType
-    reT(1)  = 0.44392837e+00_realType
-    reT(2)  = 0.11425793e+01_realType
-    reT(3)  = 0.25011739e+01_realType
-    reT(4)  = 0.49762007e+01_realType
-    reT(5)  = 0.92920979e+01_realType
-    reT(6)  = 0.16564578e+02_realType
-    reT(7)  = 0.28414621e+02_realType
-    reT(8)  = 0.46909987e+02_realType
-    reT(9)  = 0.73988906e+02_realType
-    reT(10) = 0.11046933e+03_realType
-    reT(11) = 0.15636562e+03_realType
-    reT(12) = 0.21263059e+03_realType
-    reT(13) = 0.28162960e+03_realType
-    reT(14) = 0.36666795e+03_realType
-    reT(15) = 0.47173270e+03_realType
-    reT(16) = 0.60148482e+03_realType
-    reT(17) = 0.76135031e+03_realType
-    reT(18) = 0.95763636e+03_realType
-    reT(19) = 0.11976883e+04_realType
-    reT(20) = 0.14900416e+04_realType
-    reT(21) = 0.18445991e+04_realType
-    reT(22) = 0.22728484e+04_realType
-    reT(23) = 0.27879873e+04_realType
-    reT(24) = 0.34052767e+04_realType
-    reT(25) = 0.41422400e+04_realType
-    reT(26) = 0.50189226e+04_realType
-    reT(27) = 0.60583758e+04_realType
-    reT(28) = 0.72868976e+04_realType
-    reT(29) = 0.87344007e+04_realType
-    reT(30) = 0.10435120e+05_realType
-    reT(31) = 0.12427867e+05_realType
-    reT(32) = 0.14756830e+05_realType
-    reT(33) = 0.17471977e+05_realType
-    reT(34) = 0.20629717e+05_realType
-
-    ! Set the values of the y+ values at interval boundaries.
-
-    ypT(0)  = 0.35159200e+00_realType
-    ypT(1)  = 0.66628400e+00_realType
-    ypT(2)  = 0.10689300e+01_realType
-    ypT(3)  = 0.15816000e+01_realType
-    ypT(4)  = 0.22312000e+01_realType
-    ypT(5)  = 0.30504200e+01_realType
-    ypT(6)  = 0.40787900e+01_realType
-    ypT(7)  = 0.53638300e+01_realType
-    ypT(8)  = 0.69624800e+01_realType
-    ypT(9)  = 0.89426300e+01_realType
-    ypT(10) = 0.11384800e+02_realType
-    ypT(11) = 0.14384400e+02_realType
-    ypT(12) = 0.18053200e+02_realType
-    ypT(13) = 0.22522800e+02_realType
-    ypT(14) = 0.27946400e+02_realType
-    ypT(15) = 0.34502300e+02_realType
-    ypT(16) = 0.42396900e+02_realType
-    ypT(17) = 0.51868400e+02_realType
-    ypT(18) = 0.63190300e+02_realType
-    ypT(19) = 0.76676100e+02_realType
-    ypT(20) = 0.92683300e+02_realType
-    ypT(21) = 0.11161800e+03_realType
-    ypT(22) = 0.13394200e+03_realType
-    ypT(23) = 0.16017300e+03_realType
-    ypT(24) = 0.19089800e+03_realType
-    ypT(25) = 0.22677200e+03_realType
-    ypT(26) = 0.26852800e+03_realType
-    ypT(27) = 0.31698500e+03_realType
-    ypT(28) = 0.37305000e+03_realType
-    ypT(29) = 0.43772900e+03_realType
-    ypT(30) = 0.51213300e+03_realType
-    ypT(31) = 0.59748500e+03_realType
-    ypT(32) = 0.69512600e+03_realType
-    ypT(33) = 0.80652800e+03_realType
-    ypT(34) = 0.93329400e+03_realType
-
-    ! Set the values of constants for the cubic fits of the
-    ! non-dimensional tangential velocity.
-
-    up0(1)  = 0.35158800e+00_realType
-    up0(2)  = 0.66627500e+00_realType
-    up0(3)  = 0.10689000e+01_realType
-    up0(4)  = 0.15814200e+01_realType
-    up0(5)  = 0.22302800e+01_realType
-    up0(6)  = 0.30461700e+01_realType
-    up0(7)  = 0.40611500e+01_realType
-    up0(8)  = 0.52974500e+01_realType
-    up0(9)  = 0.67375400e+01_realType
-    up0(10) = 0.82737300e+01_realType
-    up0(11) = 0.97032300e+01_realType
-    up0(12) = 0.10870500e+02_realType
-    up0(13) = 0.11778000e+02_realType
-    up0(14) = 0.12504200e+02_realType
-    up0(15) = 0.13120400e+02_realType
-    up0(16) = 0.13672500e+02_realType
-    up0(17) = 0.14187000e+02_realType
-    up0(18) = 0.14678500e+02_realType
-    up0(19) = 0.15154800e+02_realType
-    up0(20) = 0.15620100e+02_realType
-    up0(21) = 0.16076700e+02_realType
-    up0(22) = 0.16526000e+02_realType
-    up0(23) = 0.16968900e+02_realType
-    up0(24) = 0.17406100e+02_realType
-    up0(25) = 0.17838200e+02_realType
-    up0(26) = 0.18266100e+02_realType
-    up0(27) = 0.18690500e+02_realType
-    up0(28) = 0.19112500e+02_realType
-    up0(29) = 0.19533300e+02_realType
-    up0(30) = 0.19953900e+02_realType
-    up0(31) = 0.20375800e+02_realType
-    up0(32) = 0.20800300e+02_realType
-    up0(33) = 0.21229000e+02_realType
-    up0(34) = 0.21663200e+02_realType
-
-    up1(1)  = 0.12933934e+01_realType
-    up1(2)  = 0.70396224e+00_realType
-    up1(3)  = 0.44483996e+00_realType
-    up1(4)  = 0.30294593e+00_realType
-    up1(5)  = 0.21569230e+00_realType
-    up1(6)  = 0.15799192e+00_realType
-    up1(7)  = 0.11772923e+00_realType
-    up1(8)  = 0.88197525e-01_realType
-    up1(9)  = 0.65306126e-01_realType
-    up1(10) = 0.46660172e-01_realType
-    up1(11) = 0.31523107e-01_realType
-    up1(12) = 0.20308775e-01_realType
-    up1(13) = 0.13042058e-01_realType
-    up1(14) = 0.87147691e-02_realType
-    up1(15) = 0.61456125e-02_realType
-    up1(16) = 0.45422630e-02_realType
-    up1(17) = 0.34735457e-02_realType
-    up1(18) = 0.27173826e-02_realType
-    up1(19) = 0.21579599e-02_realType
-    up1(20) = 0.17315757e-02_realType
-    up1(21) = 0.14003478e-02_realType
-    up1(22) = 0.11397448e-02_realType
-    up1(23) = 0.93291395e-03_realType
-    up1(24) = 0.76764242e-03_realType
-    up1(25) = 0.63503654e-03_realType
-    up1(26) = 0.52818280e-03_realType
-    up1(27) = 0.44172235e-03_realType
-    up1(28) = 0.37160904e-03_realType
-    up1(29) = 0.31442159e-03_realType
-    up1(30) = 0.26761137e-03_realType
-    up1(31) = 0.22916141e-03_realType
-    up1(32) = 0.19742184e-03_realType
-    up1(33) = 0.17107081e-03_realType
-    up1(34) = 0.14902380e-03_realType
-
-    up2(1)  = -0.10722013e+01_realType
-    up2(2)  = -0.17733707e+00_realType
-    up2(3)  = -0.44823897e-01_realType
-    up2(4)  = -0.14179933e-01_realType
-    up2(5)  = -0.51548071e-02_realType
-    up2(6)  = -0.20652649e-02_realType
-    up2(7)  = -0.90040092e-03_realType
-    up2(8)  = -0.43873478e-03_realType
-    up2(9)  = -0.26153548e-03_realType
-    up2(10) = -0.19975811e-03_realType
-    up2(11) = -0.15375234e-03_realType
-    up2(12) = -0.93708131e-04_realType
-    up2(13) = -0.46732800e-04_realType
-    up2(14) = -0.21598767e-04_realType
-    up2(15) = -0.10173953e-04_realType
-    up2(16) = -0.51044453e-05_realType
-    up2(17) = -0.27591627e-05_realType
-    up2(18) = -0.15948319e-05_realType
-    up2(19) = -0.96856201e-06_realType
-    up2(20) = -0.60909779e-06_realType
-    up2(21) = -0.39147369e-06_realType
-    up2(22) = -0.25632692e-06_realType
-    up2(23) = -0.16958665e-06_realType
-    up2(24) = -0.11394020e-06_realType
-    up2(25) = -0.76500636e-07_realType
-    up2(26) = -0.52236558e-07_realType
-    up2(27) = -0.35697343e-07_realType
-    up2(28) = -0.24471063e-07_realType
-    up2(29) = -0.17096052e-07_realType
-    up2(30) = -0.11859363e-07_realType
-    up2(31) = -0.83689945e-08_realType
-    up2(32) = -0.58800329e-08_realType
-    up2(33) = -0.42032634e-08_realType
-    up2(34) = -0.30343729e-08_realType
-
-    up3(1)  =  0.31659592e+00_realType
-    up3(2)  = -0.77365039e-02_realType
-    up3(3)  = -0.36297270e-02_realType
-    up3(4)  = -0.92844018e-03_realType
-    up3(5)  = -0.23630863e-03_realType
-    up3(6)  = -0.64433685e-04_realType
-    up3(7)  = -0.19446239e-04_realType
-    up3(8)  = -0.64919565e-05_realType
-    up3(9)  = -0.20373446e-05_realType
-    up3(10) = -0.14090103e-06_realType
-    up3(11) =  0.45874410e-06_realType
-    up3(12) =  0.34517950e-06_realType
-    up3(13) =  0.14855464e-06_realType
-    up3(14) =  0.50901715e-07_realType
-    up3(15) =  0.16140276e-07_realType
-    up3(16) =  0.50667962e-08_realType
-    up3(17) =  0.16437361e-08_realType
-    up3(18) =  0.57675302e-09_realType
-    up3(19) =  0.22343487e-09_realType
-    up3(20) =  0.97170203e-10_realType
-    up3(21) =  0.45068635e-10_realType
-    up3(22) =  0.23106070e-10_realType
-    up3(23) =  0.11870070e-10_realType
-    up3(24) =  0.70527690e-11_realType
-    up3(25) =  0.36226728e-11_realType
-    up3(26) =  0.22246040e-11_realType
-    up3(27) =  0.12643111e-11_realType
-    up3(28) =  0.64910588e-12_realType
-    up3(29) =  0.42682810e-12_realType
-    up3(30) =  0.21768527e-12_realType
-    up3(31) =  0.13556644e-12_realType
-    up3(32) =  0.63772628e-13_realType
-    up3(33) =  0.35175874e-13_realType
-    up3(34) =  0.21542623e-13_realType
-
-    ! Set the values of tuLogFit to .false., because a linear
-    ! fit has been used.
-
-    tuLogFit(itu1) = .false.
-
-    ! Set the values of constants for the cubic fits of the
-    ! non-dimensional spalart-allmaras viscosity.
-
-    tup0(1,itu1)  = 0.14399200e+00_realType
-    tup0(2,itu1)  = 0.27285000e+00_realType
-    tup0(3,itu1)  = 0.43767100e+00_realType
-    tup0(4,itu1)  = 0.64739300e+00_realType
-    tup0(5,itu1)  = 0.91283700e+00_realType
-    tup0(6,itu1)  = 0.12469600e+01_realType
-    tup0(7,itu1)  = 0.16651800e+01_realType
-    tup0(8,itu1)  = 0.21861800e+01_realType
-    tup0(9,itu1)  = 0.28347900e+01_realType
-    tup0(10,itu1) = 0.36492600e+01_realType
-    tup0(11,itu1) = 0.46812500e+01_realType
-    tup0(12,itu1) = 0.59588800e+01_realType
-    tup0(13,itu1) = 0.74961200e+01_realType
-    tup0(14,itu1) = 0.93387200e+01_realType
-    tup0(15,itu1) = 0.11555500e+02_realType
-    tup0(16,itu1) = 0.14225700e+02_realType
-    tup0(17,itu1) = 0.17436000e+02_realType
-    tup0(18,itu1) = 0.21280700e+02_realType
-    tup0(19,itu1) = 0.25863100e+02_realType
-    tup0(20,itu1) = 0.31296700e+02_realType
-    tup0(21,itu1) = 0.37704800e+02_realType
-    tup0(22,itu1) = 0.45218300e+02_realType
-    tup0(23,itu1) = 0.53972900e+02_realType
-    tup0(24,itu1) = 0.64103200e+02_realType
-    tup0(25,itu1) = 0.75735500e+02_realType
-    tup0(26,itu1) = 0.88977700e+02_realType
-    tup0(27,itu1) = 0.10390800e+03_realType
-    tup0(28,itu1) = 0.12056400e+03_realType
-    tup0(29,itu1) = 0.13892700e+03_realType
-    tup0(30,itu1) = 0.15892000e+03_realType
-    tup0(31,itu1) = 0.18039200e+03_realType
-    tup0(32,itu1) = 0.20312000e+03_realType
-    tup0(33,itu1) = 0.22680300e+03_realType
-    tup0(34,itu1) = 0.25105400e+03_realType
-
-    tup1(1,itu1)  = 0.40950260e+00_realType
-    tup1(2,itu1)  = 0.40940115e+00_realType
-    tup1(3,itu1)  = 0.40919529e+00_realType
-    tup1(4,itu1)  = 0.40882583e+00_realType
-    tup1(5,itu1)  = 0.40819638e+00_realType
-    tup1(6,itu1)  = 0.40720236e+00_realType
-    tup1(7,itu1)  = 0.40598943e+00_realType
-    tup1(8,itu1)  = 0.40559491e+00_realType
-    tup1(9,itu1)  = 0.40881860e+00_realType
-    tup1(10,itu1) = 0.41753197e+00_realType
-    tup1(11,itu1) = 0.42442441e+00_realType
-    tup1(12,itu1) = 0.42212075e+00_realType
-    tup1(13,itu1) = 0.41529539e+00_realType
-    tup1(14,itu1) = 0.41032022e+00_realType
-    tup1(15,itu1) = 0.40794524e+00_realType
-    tup1(16,itu1) = 0.40694094e+00_realType
-    tup1(17,itu1) = 0.40625126e+00_realType
-    tup1(18,itu1) = 0.40527764e+00_realType
-    tup1(19,itu1) = 0.40374561e+00_realType
-    tup1(20,itu1) = 0.40150883e+00_realType
-    tup1(21,itu1) = 0.39842138e+00_realType
-    tup1(22,itu1) = 0.39429502e+00_realType
-    tup1(23,itu1) = 0.38893832e+00_realType
-    tup1(24,itu1) = 0.38209495e+00_realType
-    tup1(25,itu1) = 0.37349660e+00_realType
-    tup1(26,itu1) = 0.36290738e+00_realType
-    tup1(27,itu1) = 0.35013025e+00_realType
-    tup1(28,itu1) = 0.33503951e+00_realType
-    tup1(29,itu1) = 0.31766382e+00_realType
-    tup1(30,itu1) = 0.29813133e+00_realType
-    tup1(31,itu1) = 0.27667192e+00_realType
-    tup1(32,itu1) = 0.25362172e+00_realType
-    tup1(33,itu1) = 0.22930211e+00_realType
-    tup1(34,itu1) = 0.20402405e+00_realType
-
-    tup2(1,itu1)  =  0.43946228e-04_realType
-    tup2(2,itu1)  =  0.90566870e-04_realType
-    tup2(3,itu1)  =  0.34081055e-04_realType
-    tup2(4,itu1)  =  0.50034075e-04_realType
-    tup2(5,itu1)  = -0.36629521e-04_realType
-    tup2(6,itu1)  = -0.33730915e-03_realType
-    tup2(7,itu1)  = -0.98768734e-03_realType
-    tup2(8,itu1)  = -0.17750541e-02_realType
-    tup2(9,itu1)  = -0.61469972e-03_realType
-    tup2(10,itu1) =  0.33676511e-02_realType
-    tup2(11,itu1) =  0.22772412e-02_realType
-    tup2(12,itu1) = -0.68862307e-03_realType
-    tup2(13,itu1) = -0.92984441e-03_realType
-    tup2(14,itu1) = -0.44253269e-03_realType
-    tup2(15,itu1) = -0.14333421e-03_realType
-    tup2(16,itu1) = -0.25078749e-04_realType
-    tup2(17,itu1) = -0.11675748e-05_realType
-    tup2(18,itu1) = -0.77479474e-05_realType
-    tup2(19,itu1) = -0.19425991e-04_realType
-    tup2(20,itu1) = -0.28782981e-04_realType
-    tup2(21,itu1) = -0.37198549e-04_realType
-    tup2(22,itu1) = -0.46839769e-04_realType
-    tup2(23,itu1) = -0.52777911e-04_realType
-    tup2(24,itu1) = -0.61987420e-04_realType
-    tup2(25,itu1) = -0.69912435e-04_realType
-    tup2(26,itu1) = -0.78150190e-04_realType
-    tup2(27,itu1) = -0.84976835e-04_realType
-    tup2(28,itu1) = -0.91879236e-04_realType
-    tup2(29,itu1) = -0.94706478e-04_realType
-    tup2(30,itu1) = -0.96428722e-04_realType
-    tup2(31,itu1) = -0.95007410e-04_realType
-    tup2(32,itu1) = -0.91049468e-04_realType
-    tup2(33,itu1) = -0.85824231e-04_realType
-    tup2(34,itu1) = -0.79305036e-04_realType
-
-    tup3(1,itu1)  = -0.43457258e-03_realType
-    tup3(2,itu1)  = -0.57319466e-03_realType
-    tup3(3,itu1)  = -0.51288659e-03_realType
-    tup3(4,itu1)  = -0.54857331e-03_realType
-    tup3(5,itu1)  = -0.46390243e-03_realType
-    tup3(6,itu1)  = -0.16364047e-03_realType
-    tup3(7,itu1)  =  0.43276759e-03_realType
-    tup3(8,itu1)  =  0.11606902e-02_realType
-    tup3(9,itu1)  =  0.94769940e-03_realType
-    tup3(10,itu1) = -0.53409400e-03_realType
-    tup3(11,itu1) = -0.59146449e-03_realType
-    tup3(12,itu1) = -0.43895639e-04_realType
-    tup3(13,itu1) =  0.55678050e-04_realType
-    tup3(14,itu1) =  0.27482853e-04_realType
-    tup3(15,itu1) =  0.67866418e-05_realType
-    tup3(16,itu1) = -0.15708231e-05_realType
-    tup3(17,itu1) = -0.35355159e-05_realType
-    tup3(18,itu1) = -0.35276554e-05_realType
-    tup3(19,itu1) = -0.31393462e-05_realType
-    tup3(20,itu1) = -0.28177541e-05_realType
-    tup3(21,itu1) = -0.25267299e-05_realType
-    tup3(22,itu1) = -0.21840943e-05_realType
-    tup3(23,itu1) = -0.19739075e-05_realType
-    tup3(24,itu1) = -0.16910644e-05_realType
-    tup3(25,itu1) = -0.14435078e-05_realType
-    tup3(26,itu1) = -0.11949962e-05_realType
-    tup3(27,itu1) = -0.97317619e-06_realType
-    tup3(28,itu1) = -0.75009409e-06_realType
-    tup3(29,itu1) = -0.58018935e-06_realType
-    tup3(30,itu1) = -0.42811291e-06_realType
-    tup3(31,itu1) = -0.31260995e-06_realType
-    tup3(32,itu1) = -0.22863635e-06_realType
-    tup3(33,itu1) = -0.16534708e-06_realType
-    tup3(34,itu1) = -0.12169915e-06_realType
-
-  end subroutine initCurveFitDataSa
-  subroutine initCurveFitDataSae
-    !
-    !       initCurveFitDataSae contains the curve fit constants for
-    !       the wall function data for the Spalart-Allmaras (Edwards
-    !       modification) turbulence model.
-    !
-    use flowVarRefState
-    use paramTurb
-    implicit none
-    !
-    !      Local variables.
-    !
-    !   integer :: ierr
-
-    call terminate("initCurveFitDataSae", &
-         "Not implemented yet")
-
-  end subroutine initCurveFitDataSae
-
-  subroutine initCurveFitDataVf
-    !
-    !       initCurveFitDataVf contains the curve fit constants for
-    !       the wall function data for the v2-f turbulence model.
-    !
-    use constants
-    use flowVarRefState
-    use inputPhysics
-    use paramTurb
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: ierr
-
-    ! Determine the version of the v2-f model.
-
-    select case (rvfN)
-
-    case (1_intType)
-
-       ! Version 1 of the model.
-
-       ! Set the number of data points and allocate the memory for
-       ! the arrays of the curve fits.
-
-       nFit = 34
-
-       allocate(ypT(0:nFit),          reT(0:nFit),          &
-            up0(nFit), up1(nFit), up2(nFit), up3(nFit), &
-            tup0(nFit,nt1:nt2+1), tup1(nFit,nt1:nt2+1), &
-            tup2(nFit,nt1:nt2+1), tup3(nFit,nt1:nt2+1), &
-            tuLogFit(nt1:nt2+1),  stat=ierr)
-       if(ierr /= 0)                          &
-            call terminate("initCurveFitDataVf", &
-            "Memory allocation failure for curve fit &
-            &coefficients")
-
-       ! Set the values of the Reynolds numbers at interval
-       ! boundaries.
-
-       reT( 0) = 0.12795537e+00_realType
-       reT( 1) = 0.45951248e+00_realType
-       reT( 2) = 0.11826997e+01_realType
-       reT( 3) = 0.25888649e+01_realType
-       reT( 4) = 0.51497900e+01_realType
-       reT( 5) = 0.96108242e+01_realType
-       reT( 6) = 0.17107619e+02_realType
-       reT( 7) = 0.29176338e+02_realType
-       reT( 8) = 0.47402263e+02_realType
-       reT( 9) = 0.73016653e+02_realType
-       reT(10) = 0.10712161e+03_realType
-       reT(11) = 0.15119025e+03_realType
-       reT(12) = 0.20726939e+03_realType
-       reT(13) = 0.27799459e+03_realType
-       reT(14) = 0.36660352e+03_realType
-       reT(15) = 0.47698645e+03_realType
-       reT(16) = 0.61377565e+03_realType
-       reT(17) = 0.78244065e+03_realType
-       reT(18) = 0.98942933e+03_realType
-       reT(19) = 0.12422738e+04_realType
-       reT(20) = 0.15497731e+04_realType
-       reT(21) = 0.19221837e+04_realType
-       reT(22) = 0.23713440e+04_realType
-       reT(23) = 0.29109888e+04_realType
-       reT(24) = 0.35569669e+04_realType
-       reT(25) = 0.43274783e+04_realType
-       reT(26) = 0.52434023e+04_realType
-       reT(27) = 0.63287428e+04_realType
-       reT(28) = 0.76109132e+04_realType
-       reT(29) = 0.91213906e+04_realType
-       reT(30) = 0.10896077e+05_realType
-       reT(31) = 0.12976000e+05_realType
-       reT(32) = 0.15408257e+05_realType
-       reT(33) = 0.18246449e+05_realType
-       reT(34) = 0.21552478e+05_realType
-
-       ! Set the values of the y+ values at interval boundaries.
-
-       ypT( 0) = 0.35770800e+00_realType
-       ypT( 1) = 0.67787500e+00_realType
-       ypT( 2) = 0.10875400e+01_realType
-       ypT( 3) = 0.16091400e+01_realType
-       ypT( 4) = 0.22700900e+01_realType
-       ypT( 5) = 0.31037100e+01_realType
-       ypT( 6) = 0.41503100e+01_realType
-       ypT( 7) = 0.54585000e+01_realType
-       ypT( 8) = 0.70866100e+01_realType
-       ypT( 9) = 0.91042300e+01_realType
-       ypT(10) = 0.11593900e+02_realType
-       ypT(11) = 0.14653200e+02_realType
-       ypT(12) = 0.18396800e+02_realType
-       ypT(13) = 0.22959200e+02_realType
-       ypT(14) = 0.28497300e+02_realType
-       ypT(15) = 0.35193900e+02_realType
-       ypT(16) = 0.43260500e+02_realType
-       ypT(17) = 0.52941300e+02_realType
-       ypT(18) = 0.64517200e+02_realType
-       ypT(19) = 0.78309700e+02_realType
-       ypT(20) = 0.94686000e+02_realType
-       ypT(21) = 0.11406400e+03_realType
-       ypT(22) = 0.13691600e+03_realType
-       ypT(23) = 0.16377700e+03_realType
-       ypT(24) = 0.19525000e+03_realType
-       ypT(25) = 0.23200900e+03_realType
-       ypT(26) = 0.27481000e+03_realType
-       ypT(27) = 0.32449600e+03_realType
-       ypT(28) = 0.38200300e+03_realType
-       ypT(29) = 0.44837100e+03_realType
-       ypT(30) = 0.52474800e+03_realType
-       ypT(31) = 0.61239900e+03_realType
-       ypT(32) = 0.71271500e+03_realType
-       ypT(33) = 0.82722200e+03_realType
-       ypT(34) = 0.95759000e+03_realType
-
-       ! Set the values of constants for the cubic fits of the
-       ! non-dimensional tangential velocity.
-
-       up0( 1) = 0.35770900e+00_realType
-       up0( 2) = 0.67787200e+00_realType
-       up0( 3) = 0.10875000e+01_realType
-       up0( 4) = 0.16088500e+01_realType
-       up0( 5) = 0.22685400e+01_realType
-       up0( 6) = 0.30965600e+01_realType
-       up0( 7) = 0.41220100e+01_realType
-       up0( 8) = 0.53451200e+01_realType
-       up0( 9) = 0.66889900e+01_realType
-       up0(10) = 0.80200800e+01_realType
-       up0(11) = 0.92394800e+01_realType
-       up0(12) = 0.10317900e+02_realType
-       up0(13) = 0.11266600e+02_realType
-       up0(14) = 0.12108200e+02_realType
-       up0(15) = 0.12864500e+02_realType
-       up0(16) = 0.13553100e+02_realType
-       up0(17) = 0.14187900e+02_realType
-       up0(18) = 0.14779400e+02_realType
-       up0(19) = 0.15335900e+02_realType
-       up0(20) = 0.15863600e+02_realType
-       up0(21) = 0.16367500e+02_realType
-       up0(22) = 0.16851800e+02_realType
-       up0(23) = 0.17319700e+02_realType
-       up0(24) = 0.17774100e+02_realType
-       up0(25) = 0.18217500e+02_realType
-       up0(26) = 0.18652200e+02_realType
-       up0(27) = 0.19080100e+02_realType
-       up0(28) = 0.19503300e+02_realType
-       up0(29) = 0.19923700e+02_realType
-       up0(30) = 0.20343400e+02_realType
-       up0(31) = 0.20764400e+02_realType
-       up0(32) = 0.21188800e+02_realType
-       up0(33) = 0.21619100e+02_realType
-       up0(34) = 0.22057500e+02_realType
-
-       up1( 1) = 0.12712722e+01_realType
-       up1( 2) = 0.69191267e+00_realType
-       up1( 3) = 0.43721180e+00_realType
-       up1( 4) = 0.29770939e+00_realType
-       up1( 5) = 0.21186537e+00_realType
-       up1( 6) = 0.15500054e+00_realType
-       up1( 7) = 0.11492466e+00_realType
-       up1( 8) = 0.84733790e-01_realType
-       up1( 9) = 0.61015984e-01_realType
-       up1(10) = 0.42707937e-01_realType
-       up1(11) = 0.29393811e-01_realType
-       up1(12) = 0.20241287e-01_realType
-       up1(13) = 0.14118603e-01_realType
-       up1(14) = 0.10028611e-01_realType
-       up1(15) = 0.72611010e-02_realType
-       up1(16) = 0.53541635e-02_realType
-       up1(17) = 0.40146771e-02_realType
-       up1(18) = 0.30560063e-02_realType
-       up1(19) = 0.23578120e-02_realType
-       up1(20) = 0.18410127e-02_realType
-       up1(21) = 0.14534277e-02_realType
-       up1(22) = 0.11589991e-02_realType
-       up1(23) = 0.93274199e-03_realType
-       up1(24) = 0.75723913e-03_realType
-       up1(25) = 0.61991282e-03_realType
-       up1(26) = 0.51149306e-03_realType
-       up1(27) = 0.42528110e-03_realType
-       up1(28) = 0.35632360e-03_realType
-       up1(29) = 0.30082562e-03_realType
-       up1(30) = 0.25590806e-03_realType
-       up1(31) = 0.21932184e-03_realType
-       up1(32) = 0.18942066e-03_realType
-       up1(33) = 0.16482466e-03_realType
-       up1(34) = 0.14450977e-03_realType
-
-       up2( 1) = -0.10180856e+01_realType
-       up2( 2) = -0.16838796e+00_realType
-       up2( 3) = -0.42564371e-01_realType
-       up2( 4) = -0.13467470e-01_realType
-       up2( 5) = -0.49083413e-02_realType
-       up2( 6) = -0.19435488e-02_realType
-       up2( 7) = -0.87388594e-03_realType
-       up2( 8) = -0.50925439e-03_realType
-       up2( 9) = -0.34513516e-03_realType
-       up2(10) = -0.22127804e-03_realType
-       up2(11) = -0.12741049e-03_realType
-       up2(12) = -0.68647301e-04_realType
-       up2(13) = -0.36296584e-04_realType
-       up2(14) = -0.19327079e-04_realType
-       up2(15) = -0.10522599e-04_realType
-       up2(16) = -0.58546470e-05_realType
-       up2(17) = -0.33469044e-05_realType
-       up2(18) = -0.19525994e-05_realType
-       up2(19) = -0.11686013e-05_realType
-       up2(20) = -0.71331900e-06_realType
-       up2(21) = -0.44175896e-06_realType
-       up2(22) = -0.27957847e-06_realType
-       up2(23) = -0.17903765e-06_realType
-       up2(24) = -0.11639426e-06_realType
-       up2(25) = -0.76321282e-07_realType
-       up2(26) = -0.51021408e-07_realType
-       up2(27) = -0.34196379e-07_realType
-       up2(28) = -0.23263805e-07_realType
-       up2(29) = -0.15876970e-07_realType
-       up2(30) = -0.10967048e-07_realType
-       up2(31) = -0.76570908e-08_realType
-       up2(32) = -0.53137237e-08_realType
-       up2(33) = -0.37931015e-08_realType
-       up2(34) = -0.26536226e-08_realType
-
-       up3( 1) =  0.29032828e+00_realType
-       up3( 2) = -0.71056794e-02_realType
-       up3( 3) = -0.33374366e-02_realType
-       up3( 4) = -0.85721208e-03_realType
-       up3( 5) = -0.21895691e-03_realType
-       up3( 6) = -0.64856031e-04_realType
-       up3( 7) = -0.20819908e-04_realType
-       up3( 8) = -0.51723681e-05_realType
-       up3( 9) = -0.31864903e-06_realType
-       up3(10) =  0.50988875e-06_realType
-       up3(11) =  0.35651270e-06_realType
-       up3(12) =  0.16711639e-06_realType
-       up3(13) =  0.69583056e-07_realType
-       up3(14) =  0.27917810e-07_realType
-       up3(15) =  0.11383228e-07_realType
-       up3(16) =  0.46713098e-08_realType
-       up3(17) =  0.19959356e-08_realType
-       up3(18) =  0.85688237e-09_realType
-       up3(19) =  0.38661576e-09_realType
-       up3(20) =  0.18015921e-09_realType
-       up3(21) =  0.83166064e-10_realType
-       up3(22) =  0.41131094e-10_realType
-       up3(23) =  0.20294857e-10_realType
-       up3(24) =  0.10424447e-10_realType
-       up3(25) =  0.51615620e-11_realType
-       up3(26) =  0.28812993e-11_realType
-       up3(27) =  0.14918197e-11_realType
-       up3(28) =  0.84315572e-12_realType
-       up3(29) =  0.44502764e-12_realType
-       up3(30) =  0.24764964e-12_realType
-       up3(31) =  0.15033872e-12_realType
-       up3(32) =  0.70583114e-13_realType
-       up3(33) =  0.50327579e-13_realType
-       up3(34) =  0.20760485e-13_realType
-
-       ! Set the values of tuLogFit.All variables have been
-       ! fitted linearly; the fifth variable is the eddy viscosity.
-
-       tuLogFit(itu1) = .false.
-       tuLogFit(itu2) = .false.
-       tuLogFit(itu3) = .false.
-       tuLogFit(itu4) = .false.
-       tuLogFit(itu5) = .false.
-
-       ! Set the values of constants for the cubic fits of the
-       ! non-dimensional k, eps, v2 and f values.
-
-       ! Constants for k.
-
-       tup0( 1,itu1) = 0.23837900e-01_realType
-       tup0( 2,itu1) = 0.78315200e-01_realType
-       tup0( 3,itu1) = 0.18685000e+00_realType
-       tup0( 4,itu1) = 0.38036300e+00_realType
-       tup0( 5,itu1) = 0.70148100e+00_realType
-       tup0( 6,itu1) = 0.12056400e+01_realType
-       tup0( 7,itu1) = 0.19570000e+01_realType
-       tup0( 8,itu1) = 0.29949800e+01_realType
-       tup0( 9,itu1) = 0.42181500e+01_realType
-       tup0(10,itu1) = 0.53378900e+01_realType
-       tup0(11,itu1) = 0.61118500e+01_realType
-       tup0(12,itu1) = 0.65046800e+01_realType
-       tup0(13,itu1) = 0.66026900e+01_realType
-       tup0(14,itu1) = 0.65082900e+01_realType
-       tup0(15,itu1) = 0.63012300e+01_realType
-       tup0(16,itu1) = 0.60354500e+01_realType
-       tup0(17,itu1) = 0.57452900e+01_realType
-       tup0(18,itu1) = 0.54518200e+01_realType
-       tup0(19,itu1) = 0.51675400e+01_realType
-       tup0(20,itu1) = 0.48994200e+01_realType
-       tup0(21,itu1) = 0.46509700e+01_realType
-       tup0(22,itu1) = 0.44234600e+01_realType
-       tup0(23,itu1) = 0.42167200e+01_realType
-       tup0(24,itu1) = 0.40297300e+01_realType
-       tup0(25,itu1) = 0.38609000e+01_realType
-       tup0(26,itu1) = 0.37082900e+01_realType
-       tup0(27,itu1) = 0.35697500e+01_realType
-       tup0(28,itu1) = 0.34429600e+01_realType
-       tup0(29,itu1) = 0.33254900e+01_realType
-       tup0(30,itu1) = 0.32147800e+01_realType
-       tup0(31,itu1) = 0.31080800e+01_realType
-       tup0(32,itu1) = 0.30023700e+01_realType
-       tup0(33,itu1) = 0.28942800e+01_realType
-       tup0(34,itu1) = 0.27799400e+01_realType
-
-       tup1( 1,itu1) =  0.13413856e+00_realType
-       tup1( 2,itu1) =  0.22335565e+00_realType
-       tup1( 3,itu1) =  0.32434141e+00_realType
-       tup1( 4,itu1) =  0.43518752e+00_realType
-       tup1( 5,itu1) =  0.55218357e+00_realType
-       tup1( 6,itu1) =  0.66775111e+00_realType
-       tup1( 7,itu1) =  0.75987243e+00_realType
-       tup1( 8,itu1) =  0.77006777e+00_realType
-       tup1( 9,itu1) =  0.64264496e+00_realType
-       tup1(10,itu1) =  0.42014159e+00_realType
-       tup1(11,itu1) =  0.21027146e+00_realType
-       tup1(12,itu1) =  0.72151582e-01_realType
-       tup1(13,itu1) =  0.43462557e-03_realType
-       tup1(14,itu1) = -0.29846047e-01_realType
-       tup1(15,itu1) = -0.38647454e-01_realType
-       tup1(16,itu1) = -0.37657148e-01_realType
-       tup1(17,itu1) = -0.32885380e-01_realType
-       tup1(18,itu1) = -0.27179666e-01_realType
-       tup1(19,itu1) = -0.21775122e-01_realType
-       tup1(20,itu1) = -0.17122657e-01_realType
-       tup1(21,itu1) = -0.13311965e-01_realType
-       tup1(22,itu1) = -0.10282974e-01_realType
-       tup1(23,itu1) = -0.79200612e-02_realType
-       tup1(24,itu1) = -0.60997017e-02_realType
-       tup1(25,itu1) = -0.47109860e-02_realType
-       tup1(26,itu1) = -0.36595023e-02_realType
-       tup1(27,itu1) = -0.28688356e-02_realType
-       tup1(28,itu1) = -0.22786936e-02_realType
-       tup1(29,itu1) = -0.18420182e-02_realType
-       tup1(30,itu1) = -0.15230656e-02_realType
-       tup1(31,itu1) = -0.12949618e-02_realType
-       tup1(32,itu1) = -0.11374337e-02_realType
-       tup1(33,itu1) = -0.10354105e-02_realType
-       tup1(34,itu1) = -0.97825421e-03_realType
-
-       tup2( 1,itu1) =  0.58799048e-01_realType
-       tup2( 2,itu1) =  0.57983435e-01_realType
-       tup2( 3,itu1) =  0.55840141e-01_realType
-       tup2( 4,itu1) =  0.52909760e-01_realType
-       tup2( 5,itu1) =  0.50658583e-01_realType
-       tup2( 6,itu1) =  0.55744447e-01_realType
-       tup2( 7,itu1) =  0.69202235e-01_realType
-       tup2( 8,itu1) =  0.43649374e-01_realType
-       tup2( 9,itu1) = -0.20068023e-01_realType
-       tup2(10,itu1) = -0.47375401e-01_realType
-       tup2(11,itu1) = -0.35131867e-01_realType
-       tup2(12,itu1) = -0.17682377e-01_realType
-       tup2(13,itu1) = -0.72540324e-02_realType
-       tup2(14,itu1) = -0.24963930e-02_realType
-       tup2(15,itu1) = -0.61439197e-03_realType
-       tup2(16,itu1) =  0.35707704e-04_realType
-       tup2(17,itu1) =  0.20726554e-03_realType
-       tup2(18,itu1) =  0.21257094e-03_realType
-       tup2(19,itu1) =  0.17069050e-03_realType
-       tup2(20,itu1) =  0.12477387e-03_realType
-       tup2(21,itu1) =  0.86954464e-04_realType
-       tup2(22,itu1) =  0.58869152e-04_realType
-       tup2(23,itu1) =  0.39300193e-04_realType
-       tup2(24,itu1) =  0.25976170e-04_realType
-       tup2(25,itu1) =  0.17045223e-04_realType
-       tup2(26,itu1) =  0.11152061e-04_realType
-       tup2(27,itu1) =  0.72633824e-05_realType
-       tup2(28,itu1) =  0.47173696e-05_realType
-       tup2(29,itu1) =  0.30546703e-05_realType
-       tup2(30,itu1) =  0.19644758e-05_realType
-       tup2(31,itu1) =  0.12465154e-05_realType
-       tup2(32,itu1) =  0.77547548e-06_realType
-       tup2(33,itu1) =  0.46678879e-06_realType
-       tup2(34,itu1) =  0.25787187e-06_realType
-
-       tup3( 1,itu1) =  0.16768319e+00_realType
-       tup3( 2,itu1) =  0.10621790e+00_realType
-       tup3( 3,itu1) =  0.64437267e-01_realType
-       tup3( 4,itu1) =  0.35904096e-01_realType
-       tup3( 5,itu1) =  0.14921346e-01_realType
-       tup3( 6,itu1) = -0.74747729e-02_realType
-       tup3( 7,itu1) = -0.33280334e-01_realType
-       tup3( 8,itu1) = -0.33896768e-01_realType
-       tup3( 9,itu1) = -0.11588583e-01_realType
-       tup3(10,itu1) =  0.13997083e-02_realType
-       tup3(11,itu1) =  0.27365948e-02_realType
-       tup3(12,itu1) =  0.14431329e-02_realType
-       tup3(13,itu1) =  0.57506718e-03_realType
-       tup3(14,itu1) =  0.20485602e-03_realType
-       tup3(15,itu1) =  0.68525613e-04_realType
-       tup3(16,itu1) =  0.21493188e-04_realType
-       tup3(17,itu1) =  0.60206301e-05_realType
-       tup3(18,itu1) =  0.12018375e-05_realType
-       tup3(19,itu1) = -0.98181484e-07_realType
-       tup3(20,itu1) = -0.34302113e-06_realType
-       tup3(21,itu1) = -0.30271628e-06_realType
-       tup3(22,itu1) = -0.20913696e-06_realType
-       tup3(23,itu1) = -0.13440526e-06_realType
-       tup3(24,itu1) = -0.82910301e-07_realType
-       tup3(25,itu1) = -0.49744260e-07_realType
-       tup3(26,itu1) = -0.29836016e-07_realType
-       tup3(27,itu1) = -0.17773844e-07_realType
-       tup3(28,itu1) = -0.10672954e-07_realType
-       tup3(29,itu1) = -0.65469411e-08_realType
-       tup3(30,itu1) = -0.41129357e-08_realType
-       tup3(31,itu1) = -0.26461407e-08_realType
-       tup3(32,itu1) = -0.17741710e-08_realType
-       tup3(33,itu1) = -0.12646276e-08_realType
-       tup3(34,itu1) = -0.92958789e-09_realType
-
-       ! Constants for epsilon.
-
-       tup0( 1,itu2) = 0.29354900e+00_realType
-       tup0( 2,itu2) = 0.26347600e+00_realType
-       tup0( 3,itu2) = 0.23131100e+00_realType
-       tup0( 4,itu2) = 0.19880500e+00_realType
-       tup0( 5,itu2) = 0.16843900e+00_realType
-       tup0( 6,itu2) = 0.14348600e+00_realType
-       tup0( 7,itu2) = 0.12800300e+00_realType
-       tup0( 8,itu2) = 0.12650200e+00_realType
-       tup0( 9,itu2) = 0.13424600e+00_realType
-       tup0(10,itu2) = 0.14121500e+00_realType
-       tup0(11,itu2) = 0.14022500e+00_realType
-       tup0(12,itu2) = 0.13108600e+00_realType
-       tup0(13,itu2) = 0.11710400e+00_realType
-       tup0(14,itu2) = 0.10147100e+00_realType
-       tup0(15,itu2) = 0.86210400e-01_realType
-       tup0(16,itu2) = 0.72337500e-01_realType
-       tup0(17,itu2) = 0.60231700e-01_realType
-       tup0(18,itu2) = 0.49927500e-01_realType
-       tup0(19,itu2) = 0.41292900e-01_realType
-       tup0(20,itu2) = 0.34129200e-01_realType
-       tup0(21,itu2) = 0.28223300e-01_realType
-       tup0(22,itu2) = 0.23372700e-01_realType
-       tup0(23,itu2) = 0.19397100e-01_realType
-       tup0(24,itu2) = 0.16140700e-01_realType
-       tup0(25,itu2) = 0.13472400e-01_realType
-       tup0(26,itu2) = 0.11283300e-01_realType
-       tup0(27,itu2) = 0.94837500e-02_realType
-       tup0(28,itu2) = 0.80004600e-02_realType
-       tup0(29,itu2) = 0.67736800e-02_realType
-       tup0(30,itu2) = 0.57548200e-02_realType
-       tup0(31,itu2) = 0.49043300e-02_realType
-       tup0(32,itu2) = 0.41899500e-02_realType
-       tup0(33,itu2) = 0.35852000e-02_realType
-       tup0(34,itu2) = 0.30682200e-02_realType
-
-       tup1( 1,itu2) = -0.10010771e+00_realType
-       tup1( 2,itu2) = -0.85277160e-01_realType
-       tup1( 3,itu2) = -0.69444251e-01_realType
-       tup1( 4,itu2) = -0.53166462e-01_realType
-       tup1( 5,itu2) = -0.37013322e-01_realType
-       tup1( 6,itu2) = -0.21505994e-01_realType
-       tup1( 7,itu2) = -0.72125328e-02_realType
-       tup1( 8,itu2) =  0.21261451e-02_realType
-       tup1( 9,itu2) =  0.40356801e-02_realType
-       tup1(10,itu2) =  0.13265177e-02_realType
-       tup1(11,itu2) = -0.18253838e-02_realType
-       tup1(12,itu2) = -0.33986976e-02_realType
-       tup1(13,itu2) = -0.35654948e-02_realType
-       tup1(14,itu2) = -0.30586209e-02_realType
-       tup1(15,itu2) = -0.23812190e-02_realType
-       tup1(16,itu2) = -0.17596930e-02_realType
-       tup1(17,itu2) = -0.12627202e-02_realType
-       tup1(18,itu2) = -0.89095673e-03_realType
-       tup1(19,itu2) = -0.62275508e-03_realType
-       tup1(20,itu2) = -0.43321577e-03_realType
-       tup1(21,itu2) = -0.30084493e-03_realType
-       tup1(22,itu2) = -0.20900308e-03_realType
-       tup1(23,itu2) = -0.14547503e-03_realType
-       tup1(24,itu2) = -0.10156512e-03_realType
-       tup1(25,itu2) = -0.71189471e-04_realType
-       tup1(26,itu2) = -0.50133861e-04_realType
-       tup1(27,itu2) = -0.35495151e-04_realType
-       tup1(28,itu2) = -0.25282155e-04_realType
-       tup1(29,itu2) = -0.18128274e-04_realType
-       tup1(30,itu2) = -0.13095730e-04_realType
-       tup1(31,itu2) = -0.95402614e-05_realType
-       tup1(32,itu2) = -0.70178808e-05_realType
-       tup1(33,itu2) = -0.52216476e-05_realType
-       tup1(34,itu2) = -0.39388668e-05_realType
-
-       tup2( 1,itu2) =  0.11572886e-01_realType
-       tup2( 2,itu2) =  0.10868522e-01_realType
-       tup2( 3,itu2) =  0.97691864e-02_realType
-       tup2( 4,itu2) =  0.83476261e-02_realType
-       tup2( 5,itu2) =  0.68769083e-02_realType
-       tup2( 6,itu2) =  0.55834783e-02_realType
-       tup2( 7,itu2) =  0.67702397e-02_realType
-       tup2( 8,itu2) =  0.36737909e-02_realType
-       tup2( 9,itu2) =  0.47795474e-03_realType
-       tup2(10,itu2) = -0.81158579e-03_realType
-       tup2(11,itu2) = -0.62510665e-03_realType
-       tup2(12,itu2) = -0.22487250e-03_realType
-       tup2(13,itu2) = -0.19693072e-04_realType
-       tup2(14,itu2) =  0.41848783e-04_realType
-       tup2(15,itu2) =  0.45878641e-04_realType
-       tup2(16,itu2) =  0.34700119e-04_realType
-       tup2(17,itu2) =  0.23057037e-04_realType
-       tup2(18,itu2) =  0.14420758e-04_realType
-       tup2(19,itu2) =  0.87404760e-05_realType
-       tup2(20,itu2) =  0.52127418e-05_realType
-       tup2(21,itu2) =  0.30833200e-05_realType
-       tup2(22,itu2) =  0.18189328e-05_realType
-       tup2(23,itu2) =  0.10729534e-05_realType
-       tup2(24,itu2) =  0.63476327e-06_realType
-       tup2(25,itu2) =  0.37690133e-06_realType
-       tup2(26,itu2) =  0.22497460e-06_realType
-       tup2(27,itu2) =  0.13510084e-06_realType
-       tup2(28,itu2) =  0.81632875e-07_realType
-       tup2(29,itu2) =  0.49681361e-07_realType
-       tup2(30,itu2) =  0.30447133e-07_realType
-       tup2(31,itu2) =  0.18796936e-07_realType
-       tup2(32,itu2) =  0.11683669e-07_realType
-       tup2(33,itu2) =  0.73153823e-08_realType
-       tup2(34,itu2) =  0.46047384e-08_realType
-
-       tup3( 1,itu2) =  0.24128568e-01_realType
-       tup3( 2,itu2) =  0.13760338e-01_realType
-       tup3( 3,itu2) =  0.74572108e-02_realType
-       tup3( 4,itu2) =  0.39055025e-02_realType
-       tup3( 5,itu2) =  0.19387640e-02_realType
-       tup3( 6,itu2) =  0.79307164e-03_realType
-       tup3( 7,itu2) = -0.16312244e-02_realType
-       tup3( 8,itu2) = -0.12641914e-02_realType
-       tup3( 9,itu2) = -0.37976445e-03_realType
-       tup3(10,itu2) =  0.47821584e-04_realType
-       tup3(11,itu2) =  0.80186189e-04_realType
-       tup3(12,itu2) =  0.36078436e-04_realType
-       tup3(13,itu2) =  0.10994524e-04_realType
-       tup3(14,itu2) =  0.23244487e-05_realType
-       tup3(15,itu2) =  0.52508614e-07_realType
-       tup3(16,itu2) = -0.32196650e-06_realType
-       tup3(17,itu2) = -0.26554053e-06_realType
-       tup3(18,itu2) = -0.16334363e-06_realType
-       tup3(19,itu2) = -0.90357098e-07_realType
-       tup3(20,itu2) = -0.47678861e-07_realType
-       tup3(21,itu2) = -0.24549302e-07_realType
-       tup3(22,itu2) = -0.12513680e-07_realType
-       tup3(23,itu2) = -0.63437513e-08_realType
-       tup3(24,itu2) = -0.32238513e-08_realType
-       tup3(25,itu2) = -0.16413320e-08_realType
-       tup3(26,itu2) = -0.84056640e-09_realType
-       tup3(27,itu2) = -0.43372978e-09_realType
-       tup3(28,itu2) = -0.22528033e-09_realType
-       tup3(29,itu2) = -0.11820379e-09_realType
-       tup3(30,itu2) = -0.62596081e-10_realType
-       tup3(31,itu2) = -0.33528145e-10_realType
-       tup3(32,itu2) = -0.18147943e-10_realType
-       tup3(33,itu2) = -0.99793712e-11_realType
-       tup3(34,itu2) = -0.55553743e-11_realType
-
-       ! Constants for v2.
-
-       tup0( 1,itu3) = 0.47502000e-06_realType
-       tup0( 2,itu3) = 0.89181800e-05_realType
-       tup0( 3,itu3) = 0.62272000e-04_realType
-       tup0( 4,itu3) = 0.29003500e-03_realType
-       tup0( 5,itu3) = 0.10734400e-02_realType
-       tup0( 6,itu3) = 0.34009200e-02_realType
-       tup0( 7,itu3) = 0.95514600e-02_realType
-       tup0( 8,itu3) = 0.23917300e-01_realType
-       tup0( 9,itu3) = 0.52481200e-01_realType
-       tup0(10,itu3) = 0.99458700e-01_realType
-       tup0(11,itu3) = 0.16446900e+00_realType
-       tup0(12,itu3) = 0.24385400e+00_realType
-       tup0(13,itu3) = 0.33343500e+00_realType
-       tup0(14,itu3) = 0.42968800e+00_realType
-       tup0(15,itu3) = 0.52978700e+00_realType
-       tup0(16,itu3) = 0.63143400e+00_realType
-       tup0(17,itu3) = 0.73270600e+00_realType
-       tup0(18,itu3) = 0.83194900e+00_realType
-       tup0(19,itu3) = 0.92772000e+00_realType
-       tup0(20,itu3) = 0.10187500e+01_realType
-       tup0(21,itu3) = 0.11039500e+01_realType
-       tup0(22,itu3) = 0.11823500e+01_realType
-       tup0(23,itu3) = 0.12531300e+01_realType
-       tup0(24,itu3) = 0.13155900e+01_realType
-       tup0(25,itu3) = 0.13691300e+01_realType
-       tup0(26,itu3) = 0.14132300e+01_realType
-       tup0(27,itu3) = 0.14474100e+01_realType
-       tup0(28,itu3) = 0.14712200e+01_realType
-       tup0(29,itu3) = 0.14842200e+01_realType
-       tup0(30,itu3) = 0.14859400e+01_realType
-       tup0(31,itu3) = 0.14758600e+01_realType
-       tup0(32,itu3) = 0.14533600e+01_realType
-       tup0(33,itu3) = 0.14177100e+01_realType
-       tup0(34,itu3) = 0.13680600e+01_realType
-
-       tup1( 1,itu3) =  0.15655101e-04_realType
-       tup1( 2,itu3) =  0.84672884e-04_realType
-       tup1( 3,itu3) =  0.30186555e-03_realType
-       tup1( 4,itu3) =  0.85507420e-03_realType
-       tup1( 5,itu3) =  0.20814582e-02_realType
-       tup1( 6,itu3) =  0.45090575e-02_realType
-       tup1( 7,itu3) =  0.87126156e-02_realType
-       tup1( 8,itu3) =  0.14620352e-01_realType
-       tup1( 9,itu3) =  0.20720514e-01_realType
-       tup1(10,itu3) =  0.24845927e-01_realType
-       tup1(11,itu3) =  0.26022000e-01_realType
-       tup1(12,itu3) =  0.24837349e-01_realType
-       tup1(13,itu3) =  0.22373465e-01_realType
-       tup1(14,itu3) =  0.19439830e-01_realType
-       tup1(15,itu3) =  0.16489656e-01_realType
-       tup1(16,itu3) =  0.13744920e-01_realType
-       tup1(17,itu3) =  0.11298275e-01_realType
-       tup1(18,itu3) =  0.91742368e-02_realType
-       tup1(19,itu3) =  0.73635310e-02_realType
-       tup1(20,itu3) =  0.58414654e-02_realType
-       tup1(21,itu3) =  0.45756734e-02_realType
-       tup1(22,itu3) =  0.35325598e-02_realType
-       tup1(23,itu3) =  0.26801843e-02_realType
-       tup1(24,itu3) =  0.19885487e-02_realType
-       tup1(25,itu3) =  0.14310001e-02_realType
-       tup1(26,itu3) =  0.98391151e-03_realType
-       tup1(27,itu3) =  0.62700704e-03_realType
-       tup1(28,itu3) =  0.34339929e-03_realType
-       tup1(29,itu3) =  0.11882947e-03_realType
-       tup1(30,itu3) = -0.58565974e-04_realType
-       tup1(31,itu3) = -0.19862463e-03_realType
-       tup1(32,itu3) = -0.30936281e-03_realType
-       tup1(33,itu3) = -0.39707108e-03_realType
-       tup1(34,itu3) = -0.46709546e-03_realType
-
-       tup2( 1,itu3) = -0.11515786e-03_realType
-       tup2( 2,itu3) = -0.19649780e-03_realType
-       tup2( 3,itu3) = -0.28531197e-03_realType
-       tup2( 4,itu3) = -0.35674865e-03_realType
-       tup2( 5,itu3) = -0.35499057e-03_realType
-       tup2( 6,itu3) = -0.96188825e-04_realType
-       tup2( 7,itu3) =  0.68706040e-03_realType
-       tup2( 8,itu3) =  0.16408011e-02_realType
-       tup2( 9,itu3) =  0.17663922e-02_realType
-       tup2(10,itu3) =  0.10532277e-02_realType
-       tup2(11,itu3) =  0.31539555e-03_realType
-       tup2(12,itu3) = -0.69677717e-04_realType
-       tup2(13,itu3) = -0.19632838e-03_realType
-       tup2(14,itu3) = -0.20683711e-03_realType
-       tup2(15,itu3) = -0.17733514e-03_realType
-       tup2(16,itu3) = -0.13942213e-03_realType
-       tup2(17,itu3) = -0.10497064e-03_realType
-       tup2(18,itu3) = -0.77063300e-04_realType
-       tup2(19,itu3) = -0.55728557e-04_realType
-       tup2(20,itu3) = -0.39733207e-04_realType
-       tup2(21,itu3) = -0.28198531e-04_realType
-       tup2(22,itu3) = -0.19838008e-04_realType
-       tup2(23,itu3) = -0.13886459e-04_realType
-       tup2(24,itu3) = -0.96805370e-05_realType
-       tup2(25,itu3) = -0.67138161e-05_realType
-       tup2(26,itu3) = -0.46515653e-05_realType
-       tup2(27,itu3) = -0.32158971e-05_realType
-       tup2(28,itu3) = -0.22162473e-05_realType
-       tup2(29,itu3) = -0.15270107e-05_realType
-       tup2(30,itu3) = -0.10497157e-05_realType
-       tup2(31,itu3) = -0.72432186e-06_realType
-       tup2(32,itu3) = -0.50175766e-06_realType
-       tup2(33,itu3) = -0.34544811e-06_realType
-       tup2(34,itu3) = -0.24279564e-06_realType
-
-       tup3( 1,itu3) =  0.46422002e-03_realType
-       tup3( 2,itu3) =  0.75115602e-03_realType
-       tup3( 3,itu3) =  0.10424485e-02_realType
-       tup3( 4,itu3) =  0.12956018e-02_realType
-       tup3( 5,itu3) =  0.14483411e-02_realType
-       tup3( 6,itu3) =  0.13404586e-02_realType
-       tup3( 7,itu3) =  0.80055685e-03_realType
-       tup3( 8,itu3) =  0.95237809e-04_realType
-       tup3( 9,itu3) = -0.24584932e-03_realType
-       tup3(10,itu3) = -0.21878056e-03_realType
-       tup3(11,itu3) = -0.11092088e-03_realType
-       tup3(12,itu3) = -0.46194706e-04_realType
-       tup3(13,itu3) = -0.18290511e-04_realType
-       tup3(14,itu3) = -0.71643534e-05_realType
-       tup3(15,itu3) = -0.27476585e-05_realType
-       tup3(16,itu3) = -0.10108088e-05_realType
-       tup3(17,itu3) = -0.32593663e-06_realType
-       tup3(18,itu3) = -0.66048306e-07_realType
-       tup3(19,itu3) =  0.26645554e-07_realType
-       tup3(20,itu3) =  0.44215925e-07_realType
-       tup3(21,itu3) =  0.44161326e-07_realType
-       tup3(22,itu3) =  0.34660614e-07_realType
-       tup3(23,itu3) =  0.25119801e-07_realType
-       tup3(24,itu3) =  0.17432238e-07_realType
-       tup3(25,itu3) =  0.11470519e-07_realType
-       tup3(26,itu3) =  0.75110077e-08_realType
-       tup3(27,itu3) =  0.48557801e-08_realType
-       tup3(28,itu3) =  0.30570649e-08_realType
-       tup3(29,itu3) =  0.19141510e-08_realType
-       tup3(30,itu3) =  0.11593848e-08_realType
-       tup3(31,itu3) =  0.70447835e-09_realType
-       tup3(32,itu3) =  0.42929522e-09_realType
-       tup3(33,itu3) =  0.23103965e-09_realType
-       tup3(34,itu3) =  0.13124926e-09_realType
-
-       ! Constants for f.
-
-       tup0( 1,itu4) = -0.33990e-02_realType
-       tup0( 2,itu4) = -0.32796e-02_realType
-       tup0( 3,itu4) = -0.31317e-02_realType
-       tup0( 4,itu4) = -0.29501e-02_realType
-       tup0( 5,itu4) = -0.27297e-02_realType
-       tup0( 6,itu4) = -0.24652e-02_realType
-       tup0( 7,itu4) = -0.21525e-02_realType
-       tup0( 8,itu4) = -0.17911e-02_realType
-       tup0( 9,itu4) = -0.13852e-02_realType
-       tup0(10,itu4) = -0.95384e-03_realType
-       tup0(11,itu4) = -0.52641e-03_realType
-       tup0(12,itu4) = -0.13576e-03_realType
-       tup0(13,itu4) =  0.21413e-03_realType
-       tup0(14,itu4) =  0.52393e-03_realType
-       tup0(15,itu4) =  0.79302e-03_realType
-       tup0(16,itu4) =  0.10205e-02_realType
-       tup0(17,itu4) =  0.12056e-02_realType
-       tup0(18,itu4) =  0.13485e-02_realType
-       tup0(19,itu4) =  0.14505e-02_realType
-       tup0(20,itu4) =  0.15137e-02_realType
-       tup0(21,itu4) =  0.15414e-02_realType
-       tup0(22,itu4) =  0.15379e-02_realType
-       tup0(23,itu4) =  0.15079e-02_realType
-       tup0(24,itu4) =  0.14563e-02_realType
-       tup0(25,itu4) =  0.13881e-02_realType
-       tup0(26,itu4) =  0.13079e-02_realType
-       tup0(27,itu4) =  0.12200e-02_realType
-       tup0(28,itu4) =  0.11280e-02_realType
-       tup0(29,itu4) =  0.10348e-02_realType
-       tup0(30,itu4) =  0.94297e-03_realType
-       tup0(31,itu4) =  0.85414e-03_realType
-       tup0(32,itu4) =  0.76956e-03_realType
-       tup0(33,itu4) =  0.68990e-03_realType
-       tup0(34,itu4) =  0.61535e-03_realType
-
-       tup1( 1,itu4) =  0.36489e-03_realType
-       tup1( 2,itu4) =  0.35429e-03_realType
-       tup1( 3,itu4) =  0.34223e-03_realType
-       tup1( 4,itu4) =  0.32877e-03_realType
-       tup1( 5,itu4) =  0.31384e-03_realType
-       tup1( 6,itu4) =  0.29698e-03_realType
-       tup1( 7,itu4) =  0.27691e-03_realType
-       tup1( 8,itu4) =  0.25273e-03_realType
-       tup1( 9,itu4) =  0.22211e-03_realType
-       tup1(10,itu4) =  0.18427e-03_realType
-       tup1(11,itu4) =  0.14258e-03_realType
-       tup1(12,itu4) =  0.10527e-03_realType
-       tup1(13,itu4) =  0.76806e-04_realType
-       tup1(14,itu4) =  0.55424e-04_realType
-       tup1(15,itu4) =  0.39245e-04_realType
-       tup1(16,itu4) =  0.27022e-04_realType
-       tup1(17,itu4) =  0.17875e-04_realType
-       tup1(18,itu4) =  0.11139e-04_realType
-       tup1(19,itu4) =  0.62935e-05_realType
-       tup1(20,itu4) =  0.29155e-05_realType
-       tup1(21,itu4) =  0.65684e-06_realType
-       tup1(22,itu4) = -0.76742e-06_realType
-       tup1(23,itu4) = -0.15875e-05_realType
-       tup1(24,itu4) = -0.19855e-05_realType
-       tup1(25,itu4) = -0.21022e-05_realType
-       tup1(26,itu4) = -0.20422e-05_realType
-       tup1(27,itu4) = -0.18806e-05_realType
-       tup1(28,itu4) = -0.16696e-05_realType
-       tup1(29,itu4) = -0.14434e-05_realType
-       tup1(30,itu4) = -0.12233e-05_realType
-       tup1(31,itu4) = -0.10215e-05_realType
-       tup1(32,itu4) = -0.84412e-06_realType
-       tup1(33,itu4) = -0.69337e-06_realType
-       tup1(34,itu4) = -0.56947e-06_realType
-
-       tup2( 1,itu4) = -0.66773e-05_realType
-       tup2( 2,itu4) = -0.65637e-05_realType
-       tup2( 3,itu4) = -0.61090e-05_realType
-       tup2( 4,itu4) = -0.54483e-05_realType
-       tup2( 5,itu4) = -0.44808e-05_realType
-       tup2( 6,itu4) = -0.34229e-05_realType
-       tup2( 7,itu4) = -0.36764e-05_realType
-       tup2( 8,itu4) = -0.25226e-05_realType
-       tup2( 9,itu4) = -0.39077e-05_realType
-       tup2(10,itu4) = -0.50583e-05_realType
-       tup2(11,itu4) = -0.63087e-05_realType
-       tup2(12,itu4) = -0.41839e-05_realType
-       tup2(13,itu4) = -0.25527e-05_realType
-       tup2(14,itu4) = -0.15942e-05_realType
-       tup2(15,itu4) = -0.10087e-05_realType
-       tup2(16,itu4) = -0.64089e-06_realType
-       tup2(17,itu4) = -0.40493e-06_realType
-       tup2(18,itu4) = -0.25305e-06_realType
-       tup2(19,itu4) = -0.15515e-06_realType
-       tup2(20,itu4) = -0.92540e-07_realType
-       tup2(21,itu4) = -0.53318e-07_realType
-       tup2(22,itu4) = -0.29160e-07_realType
-       tup2(23,itu4) = -0.14786e-07_realType
-       tup2(24,itu4) = -0.65163e-08_realType
-       tup2(25,itu4) = -0.20578e-08_realType
-       tup2(26,itu4) =  0.19849e-09_realType
-       tup2(27,itu4) =  0.11643e-08_realType
-       tup2(28,itu4) =  0.14562e-08_realType
-       tup2(29,itu4) =  0.14088e-08_realType
-       tup2(30,itu4) =  0.12218e-08_realType
-       tup2(31,itu4) =  0.99318e-09_realType
-       tup2(32,itu4) =  0.77547e-09_realType
-       tup2(33,itu4) =  0.58737e-09_realType
-       tup2(34,itu4) =  0.43430e-09_realType
-
-       tup3( 1,itu4) = -0.18797e-04_realType
-       tup3( 2,itu4) = -0.12078e-04_realType
-       tup3( 3,itu4) = -0.78817e-05_realType
-       tup3( 4,itu4) = -0.53414e-05_realType
-       tup3( 5,itu4) = -0.40995e-05_realType
-       tup3( 6,itu4) = -0.36062e-05_realType
-       tup3( 7,itu4) = -0.25946e-05_realType
-       tup3( 8,itu4) = -0.26029e-05_realType
-       tup3( 9,itu4) = -0.16491e-05_realType
-       tup3(10,itu4) = -0.78717e-06_realType
-       tup3(11,itu4) =  0.86868e-07_realType
-       tup3(12,itu4) =  0.87323e-07_realType
-       tup3(13,itu4) =  0.40496e-07_realType
-       tup3(14,itu4) =  0.21155e-07_realType
-       tup3(15,itu4) =  0.12152e-07_realType
-       tup3(16,itu4) =  0.74005e-08_realType
-       tup3(17,itu4) =  0.45651e-08_realType
-       tup3(18,itu4) =  0.28215e-08_realType
-       tup3(19,itu4) =  0.17171e-08_realType
-       tup3(20,itu4) =  0.10180e-08_realType
-       tup3(21,itu4) =  0.59166e-09_realType
-       tup3(22,itu4) =  0.33318e-09_realType
-       tup3(23,itu4) =  0.18290e-09_realType
-       tup3(24,itu4) =  0.96740e-10_realType
-       tup3(25,itu4) =  0.49927e-10_realType
-       tup3(26,itu4) =  0.24485e-10_realType
-       tup3(27,itu4) =  0.11513e-10_realType
-       tup3(28,itu4) =  0.49778e-11_realType
-       tup3(29,itu4) =  0.18786e-11_realType
-       tup3(30,itu4) =  0.46227e-12_realType
-       tup3(31,itu4) = -0.11281e-12_realType
-       tup3(32,itu4) = -0.31872e-12_realType
-       tup3(33,itu4) = -0.36442e-12_realType
-       tup3(34,itu4) = -0.35534e-12_realType
-
-       ! Constants for nut.
-
-       tup0( 1,itu5) =  0.11403e-05_realType
-       tup0( 2,itu5) =  0.22598e-04_realType
-       tup0( 3,itu5) =  0.16840e-03_realType
-       tup0( 4,itu5) =  0.84569e-03_realType
-       tup0( 5,itu5) =  0.33954e-02_realType
-       tup0( 6,itu5) =  0.11611e-01_realType
-       tup0( 7,itu5) =  0.34239e-01_realType
-       tup0( 8,itu5) =  0.12517e+00_realType
-       tup0( 9,itu5) =  0.35370e+00_realType
-       tup0(10,itu5) =  0.78688e+00_realType
-       tup0(11,itu5) =  0.14743e+01_realType
-       tup0(12,itu5) =  0.24581e+01_realType
-       tup0(13,itu5) =  0.37861e+01_realType
-       tup0(14,itu5) =  0.55146e+01_realType
-       tup0(15,itu5) =  0.77089e+01_realType
-       tup0(16,itu5) =  0.10444e+02_realType
-       tup0(17,itu5) =  0.13801e+02_realType
-       tup0(18,itu5) =  0.17873e+02_realType
-       tup0(19,itu5) =  0.22754e+02_realType
-       tup0(20,itu5) =  0.28545e+02_realType
-       tup0(21,itu5) =  0.35342e+02_realType
-       tup0(22,itu5) =  0.43237e+02_realType
-       tup0(23,itu5) =  0.52301e+02_realType
-       tup0(24,itu5) =  0.62582e+02_realType
-       tup0(25,itu5) =  0.74081e+02_realType
-       tup0(26,itu5) =  0.86742e+02_realType
-       tup0(27,itu5) =  0.10042e+03_realType
-       tup0(28,itu5) =  0.11489e+03_realType
-       tup0(29,itu5) =  0.12977e+03_realType
-       tup0(30,itu5) =  0.14455e+03_realType
-       tup0(31,itu5) =  0.15854e+03_realType
-       tup0(32,itu5) =  0.17088e+03_realType
-       tup0(33,itu5) =  0.18051e+03_realType
-       tup0(34,itu5) =  0.18619e+03_realType
-
-       tup1( 1,itu5) =  0.38376e-04_realType
-       tup1( 2,itu5) =  0.22169e-03_realType
-       tup1( 3,itu5) =  0.85495e-03_realType
-       tup1( 4,itu5) =  0.26396e-02_realType
-       tup1( 5,itu5) =  0.69671e-02_realType
-       tup1( 6,itu5) =  0.15867e-01_realType
-       tup1( 7,itu5) =  0.46643e-01_realType
-       tup1( 8,itu5) =  0.10523e+00_realType
-       tup1( 9,itu5) =  0.17554e+00_realType
-       tup1(10,itu5) =  0.24044e+00_realType
-       tup1(11,itu5) =  0.29128e+00_realType
-       tup1(12,itu5) =  0.32865e+00_realType
-       tup1(13,itu5) =  0.35586e+00_realType
-       tup1(14,itu5) =  0.37557e+00_realType
-       tup1(15,itu5) =  0.38958e+00_realType
-       tup1(16,itu5) =  0.39905e+00_realType
-       tup1(17,itu5) =  0.40478e+00_realType
-       tup1(18,itu5) =  0.40725e+00_realType
-       tup1(19,itu5) =  0.40676e+00_realType
-       tup1(20,itu5) =  0.40343e+00_realType
-       tup1(21,itu5) =  0.39729e+00_realType
-       tup1(22,itu5) =  0.38827e+00_realType
-       tup1(23,itu5) =  0.37621e+00_realType
-       tup1(24,itu5) =  0.36094e+00_realType
-       tup1(25,itu5) =  0.34229e+00_realType
-       tup1(26,itu5) =  0.32008e+00_realType
-       tup1(27,itu5) =  0.29419e+00_realType
-       tup1(28,itu5) =  0.26461e+00_realType
-       tup1(29,itu5) =  0.23139e+00_realType
-       tup1(30,itu5) =  0.19476e+00_realType
-       tup1(31,itu5) =  0.15509e+00_realType
-       tup1(32,itu5) =  0.11289e+00_realType
-       tup1(33,itu5) =  0.68853e-01_realType
-       tup1(34,itu5) =  0.23785e-01_realType
-
-       tup2( 1,itu5) = -0.31406e-03_realType
-       tup2( 2,itu5) = -0.62688e-03_realType
-       tup2( 3,itu5) = -0.10783e-02_realType
-       tup2( 4,itu5) = -0.15394e-02_realType
-       tup2( 5,itu5) = -0.13969e-02_realType
-       tup2( 6,itu5) = -0.14454e-01_realType
-       tup2( 7,itu5) =  0.23424e-02_realType
-       tup2( 8,itu5) =  0.12646e-01_realType
-       tup2( 9,itu5) =  0.15053e-01_realType
-       tup2(10,itu5) =  0.11222e-01_realType
-       tup2(11,itu5) =  0.69106e-02_realType
-       tup2(12,itu5) =  0.41344e-02_realType
-       tup2(13,itu5) =  0.25022e-02_realType
-       tup2(14,itu5) =  0.15280e-02_realType
-       tup2(15,itu5) =  0.93303e-03_realType
-       tup2(16,itu5) =  0.55503e-03_realType
-       tup2(17,itu5) =  0.32030e-03_realType
-       tup2(18,itu5) =  0.16369e-03_realType
-       tup2(19,itu5) =  0.59340e-04_realType
-       tup2(20,itu5) = -0.10107e-04_realType
-       tup2(21,itu5) = -0.60320e-04_realType
-       tup2(22,itu5) = -0.93922e-04_realType
-       tup2(23,itu5) = -0.12076e-03_realType
-       tup2(24,itu5) = -0.13968e-03_realType
-       tup2(25,itu5) = -0.15343e-03_realType
-       tup2(26,itu5) = -0.16392e-03_realType
-       tup2(27,itu5) = -0.17035e-03_realType
-       tup2(28,itu5) = -0.17361e-03_realType
-       tup2(29,itu5) = -0.17458e-03_realType
-       tup2(30,itu5) = -0.17152e-03_realType
-       tup2(31,itu5) = -0.16658e-03_realType
-       tup2(32,itu5) = -0.15883e-03_realType
-       tup2(33,itu5) = -0.14875e-03_realType
-       tup2(34,itu5) = -0.13710e-03_realType
-
-       tup3( 1,itu5) =  0.11903e-02_realType
-       tup3( 2,itu5) =  0.21637e-02_realType
-       tup3( 3,itu5) =  0.33791e-02_realType
-       tup3( 4,itu5) =  0.45914e-02_realType
-       tup3( 5,itu5) =  0.50750e-02_realType
-       tup3( 6,itu5) =  0.17668e-01_realType
-       tup3( 7,itu5) =  0.95211e-02_realType
-       tup3( 8,itu5) =  0.32633e-02_realType
-       tup3( 9,itu5) =  0.15990e-03_realType
-       tup3(10,itu5) = -0.34910e-03_realType
-       tup3(11,itu5) = -0.21152e-03_realType
-       tup3(12,itu5) = -0.10676e-03_realType
-       tup3(13,itu5) = -0.58327e-04_realType
-       tup3(14,itu5) = -0.35522e-04_realType
-       tup3(15,itu5) = -0.24007e-04_realType
-       tup3(16,itu5) = -0.16925e-04_realType
-       tup3(17,itu5) = -0.13086e-04_realType
-       tup3(18,itu5) = -0.10268e-04_realType
-       tup3(19,itu5) = -0.82175e-05_realType
-       tup3(20,itu5) = -0.67361e-05_realType
-       tup3(21,itu5) = -0.54809e-05_realType
-       tup3(22,itu5) = -0.45452e-05_realType
-       tup3(23,itu5) = -0.36958e-05_realType
-       tup3(24,itu5) = -0.30062e-05_realType
-       tup3(25,itu5) = -0.24308e-05_realType
-       tup3(26,itu5) = -0.19327e-05_realType
-       tup3(27,itu5) = -0.15225e-05_realType
-       tup3(28,itu5) = -0.11825e-05_realType
-       tup3(29,itu5) = -0.89374e-06_realType
-       tup3(30,itu5) = -0.66991e-06_realType
-       tup3(31,itu5) = -0.48509e-06_realType
-       tup3(32,itu5) = -0.34165e-06_realType
-       tup3(33,itu5) = -0.23235e-06_realType
-       tup3(34,itu5) = -0.14885e-06_realType
-
-       !===============================================================
-
-    case (6_intType)
-
-       ! Version 6 of the model.
-
-       ! Set the number of data points and allocate the memory for
-       ! the arrays of the curve fits.
-
-       nFit = 34
-
-       allocate(ypT(0:nFit),          reT(0:nFit),          &
-            up0(nFit), up1(nFit), up2(nFit), up3(nFit), &
-            tup0(nFit,nt1:nt2+1), tup1(nFit,nt1:nt2+1), &
-            tup2(nFit,nt1:nt2+1), tup3(nFit,nt1:nt2+1), &
-            tuLogFit(nt1:nt2+1), stat=ierr)
-       if(ierr /= 0)                              &
-            call terminate("initCurveFitDataVf", &
-            "Memory allocation failure for curve fit &
-            &coefficients")
-
-       ! Set the values of the Reynolds numbers at interval
-       ! boundaries.
-
-       reT( 0) =     0.13341_realType
-       reT( 1) =     0.47908_realType
-       reT( 2) =     1.2328_realType
-       reT( 3) =     2.6968_realType
-       reT( 4) =     5.3545_realType
-       reT( 5) =     9.9485_realType
-       reT( 6) =    17.557_realType
-       reT( 7) =    29.618_realType
-       reT( 8) =    47.649_realType
-       reT( 9) =    73.015_realType
-       reT(10) =   107.22_realType
-       reT(11) =   152.01_realType
-       reT(12) =   209.51_realType
-       reT(13) =   282.32_realType
-       reT(14) =   373.64_realType
-       reT(15) =   487.34_realType
-       reT(16) =   628.02_realType
-       reT(17) =   801.15_realType
-       reT(18) =  1013.1_realType
-       reT(19) =  1271.3_realType
-       reT(20) =  1584.4_realType
-       reT(21) =  1962.4_realType
-       reT(22) =  2416.6_realType
-       reT(23) =  2960.4_realType
-       reT(24) =  3608.7_realType
-       reT(25) =  4378.7_realType
-       reT(26) =  5289.8_realType
-       reT(27) =  6364.3_realType
-       reT(28) =  7627.3_realType
-       reT(29) =  9107.0_realType
-       reT(30) = 10835.0_realType
-       reT(31) = 12849.0_realType
-       reT(32) = 15187.0_realType
-       reT(33) = 17896.0_realType
-       reT(34) = 21028.0_realType
-
-       ! Set the values of the y+ values at interval boundaries.
-
-       ypT( 0) =   0.36526_realType
-       ypT( 1) =   0.69218_realType
-       ypT( 2) =   1.1105_realType
-       ypT( 3) =   1.6431_realType
-       ypT( 4) =   2.3179_realType
-       ypT( 5) =   3.169_realType
-       ypT( 6) =   4.2373_realType
-       ypT( 7) =   5.5723_realType
-       ypT( 8) =   7.2331_realType
-       ypT( 9) =   9.2902_realType
-       ypT(10) =  11.827_realType
-       ypT(11) =  14.943_realType
-       ypT(12) =  18.755_realType
-       ypT(13) =  23.398_realType
-       ypT(14) =  29.033_realType
-       ypT(15) =  35.843_realType
-       ypT(16) =  44.045_realType
-       ypT(17) =  53.884_realType
-       ypT(18) =  65.646_realType
-       ypT(19) =  79.656_realType
-       ypT(20) =  96.285_realType
-       ypT(21) = 115.96_realType
-       ypT(22) = 139.15_realType
-       ypT(23) = 166.4_realType
-       ypT(24) = 198.32_realType
-       ypT(25) = 235.59_realType
-       ypT(26) = 278.96_realType
-       ypT(27) = 329.31_realType
-       ypT(28) = 387.55_realType
-       ypT(29) = 454.74_realType
-       ypT(30) = 532.04_realType
-       ypT(31) = 620.71_realType
-       ypT(32) = 722.14_realType
-       ypT(33) = 837.87_realType
-       ypT(34) = 969.57_realType
-
-       ! Set the values of constants for the cubic fits of the
-       ! non-dimensional tangential velocity.
-
-       up0( 1) =  0.36525e+00_realType
-       up0( 2) =  0.69214e+00_realType
-       up0( 3) =  0.11102e+01_realType
-       up0( 4) =  0.16413e+01_realType
-       up0( 5) =  0.23101e+01_realType
-       up0( 6) =  0.31393e+01_realType
-       up0( 7) =  0.41434e+01_realType
-       up0( 8) =  0.53152e+01_realType
-       up0( 9) =  0.65876e+01_realType
-       up0(10) =  0.78593e+01_realType
-       up0(11) =  0.90657e+01_realType
-       up0(12) =  0.10173e+02_realType
-       up0(13) =  0.11171e+02_realType
-       up0(14) =  0.12066e+02_realType
-       up0(15) =  0.12870e+02_realType
-       up0(16) =  0.13596e+02_realType
-       up0(17) =  0.14259e+02_realType
-       up0(18) =  0.14868e+02_realType
-       up0(19) =  0.15433e+02_realType
-       up0(20) =  0.15960e+02_realType
-       up0(21) =  0.16455e+02_realType
-       up0(22) =  0.16923e+02_realType
-       up0(23) =  0.17367e+02_realType
-       up0(24) =  0.17791e+02_realType
-       up0(25) =  0.18197e+02_realType
-       up0(26) =  0.18586e+02_realType
-       up0(27) =  0.18962e+02_realType
-       up0(28) =  0.19327e+02_realType
-       up0(29) =  0.19681e+02_realType
-       up0(30) =  0.20027e+02_realType
-       up0(31) =  0.20366e+02_realType
-       up0(32) =  0.20700e+02_realType
-       up0(33) =  0.21030e+02_realType
-       up0(34) =  0.21359e+02_realType
-
-       up1( 1) =  0.12450e+01_realType
-       up1( 2) =  0.67756e+00_realType
-       up1( 3) =  0.42800e+00_realType
-       up1( 4) =  0.29111e+00_realType
-       up1( 5) =  0.20657e+00_realType
-       up1( 6) =  0.15024e+00_realType
-       up1( 7) =  0.11062e+00_realType
-       up1( 8) =  0.81225e-01_realType
-       up1( 9) =  0.58625e-01_realType
-       up1(10) =  0.41597e-01_realType
-       up1(11) =  0.29283e-01_realType
-       up1(12) =  0.20582e-01_realType
-       up1(13) =  0.14529e-01_realType
-       up1(14) =  0.10350e-01_realType
-       up1(15) =  0.74648e-02_realType
-       up1(16) =  0.54606e-02_realType
-       up1(17) =  0.40524e-02_realType
-       up1(18) =  0.30487e-02_realType
-       up1(19) =  0.23228e-02_realType
-       up1(20) =  0.17899e-02_realType
-       up1(21) =  0.13940e-02_realType
-       up1(22) =  0.10960e-02_realType
-       up1(23) =  0.86932e-03_realType
-       up1(24) =  0.69545e-03_realType
-       up1(25) =  0.56075e-03_realType
-       up1(26) =  0.45552e-03_realType
-       up1(27) =  0.37282e-03_realType
-       up1(28) =  0.30742e-03_realType
-       up1(29) =  0.25530e-03_realType
-       up1(30) =  0.21352e-03_realType
-       up1(31) =  0.17992e-03_realType
-       up1(32) =  0.15273e-03_realType
-       up1(33) =  0.13061e-03_realType
-       up1(34) =  0.11257e-03_realType
-
-       up2( 1) = -0.95638e+00_realType
-       up2( 2) = -0.15826e+00_realType
-       up2( 3) = -0.40071e-01_realType
-       up2( 4) = -0.12775e-01_realType
-       up2( 5) = -0.47553e-02_realType
-       up2( 6) = -0.19994e-02_realType
-       up2( 7) = -0.91167e-03_realType
-       up2( 8) = -0.51977e-03_realType
-       up2( 9) = -0.33276e-03_realType
-       up2(10) = -0.19526e-03_realType
-       up2(11) = -0.11176e-03_realType
-       up2(12) = -0.62687e-04_realType
-       up2(13) = -0.34811e-04_realType
-       up2(14) = -0.19255e-04_realType
-       up2(15) = -0.10712e-04_realType
-       up2(16) = -0.60184e-05_realType
-       up2(17) = -0.34485e-05_realType
-       up2(18) = -0.20101e-05_realType
-       up2(19) = -0.11974e-05_realType
-       up2(20) = -0.72812e-06_realType
-       up2(21) = -0.44763e-06_realType
-       up2(22) = -0.28271e-06_realType
-       up2(23) = -0.17940e-06_realType
-       up2(24) = -0.11595e-06_realType
-       up2(25) = -0.75702e-07_realType
-       up2(26) = -0.50362e-07_realType
-       up2(27) = -0.33465e-07_realType
-       up2(28) = -0.22609e-07_realType
-       up2(29) = -0.15431e-07_realType
-       up2(30) = -0.10636e-07_realType
-       up2(31) = -0.73064e-08_realType
-       up2(32) = -0.51550e-08_realType
-       up2(33) = -0.35885e-08_realType
-       up2(34) = -0.25596e-08_realType
-
-       up3( 1) =  0.26155e+00_realType
-       up3( 2) = -0.64433e-02_realType
-       up3( 3) = -0.30415e-02_realType
-       up3( 4) = -0.78504e-03_realType
-       up3( 5) = -0.19960e-03_realType
-       up3( 6) = -0.52972e-04_realType
-       up3( 7) = -0.16968e-04_realType
-       up3( 8) = -0.39545e-05_realType
-       up3( 9) = -0.75520e-07_realType
-       up3(10) =  0.29768e-06_realType
-       up3(11) =  0.21782e-06_realType
-       up3(12) =  0.11651e-06_realType
-       up3(13) =  0.55958e-07_realType
-       up3(14) =  0.25252e-07_realType
-       up3(15) =  0.11133e-07_realType
-       up3(16) =  0.48030e-08_realType
-       up3(17) =  0.21171e-08_realType
-       up3(18) =  0.93600e-09_realType
-       up3(19) =  0.42750e-09_realType
-       up3(20) =  0.20397e-09_realType
-       up3(21) =  0.94244e-10_realType
-       up3(22) =  0.48769e-10_realType
-       up3(23) =  0.23922e-10_realType
-       up3(24) =  0.12409e-10_realType
-       up3(25) =  0.63795e-11_realType
-       up3(26) =  0.36413e-11_realType
-       up3(27) =  0.18837e-11_realType
-       up3(28) =  0.10413e-11_realType
-       up3(29) =  0.59232e-12_realType
-       up3(30) =  0.35361e-12_realType
-       up3(31) =  0.18262e-12_realType
-       up3(32) =  0.12163e-12_realType
-       up3(33) =  0.63587e-13_realType
-       up3(34) =  0.41966e-13_realType
-
-       ! Set the values of tuLogFit. All variables have been
-       ! fitted linearly; the fifth variable is the eddy viscosity.
-
-       tuLogFit(itu1) = .false.
-       tuLogFit(itu2) = .false.
-       tuLogFit(itu3) = .false.
-       tuLogFit(itu4) = .false.
-       tuLogFit(itu5) = .false.
-
-       ! Set the values of constants for the cubic fits of the
-       ! non-dimensional k, eps, v2 and f values.
-
-       ! Constants for k.
-
-       tup0( 1,itu1) =  0.20232e-01_realType
-       tup0( 2,itu1) =  0.66720e-01_realType
-       tup0( 3,itu1) =  0.15994e+00_realType
-       tup0( 4,itu1) =  0.32733e+00_realType
-       tup0( 5,itu1) =  0.60629e+00_realType
-       tup0( 6,itu1) =  0.10415e+01_realType
-       tup0( 7,itu1) =  0.16715e+01_realType
-       tup0( 8,itu1) =  0.24989e+01_realType
-       tup0( 9,itu1) =  0.34343e+01_realType
-       tup0(10,itu1) =  0.42683e+01_realType
-       tup0(11,itu1) =  0.48591e+01_realType
-       tup0(12,itu1) =  0.51750e+01_realType
-       tup0(13,itu1) =  0.52611e+01_realType
-       tup0(14,itu1) =  0.51878e+01_realType
-       tup0(15,itu1) =  0.50187e+01_realType
-       tup0(16,itu1) =  0.48000e+01_realType
-       tup0(17,itu1) =  0.45620e+01_realType
-       tup0(18,itu1) =  0.43226e+01_realType
-       tup0(19,itu1) =  0.40922e+01_realType
-       tup0(20,itu1) =  0.38760e+01_realType
-       tup0(21,itu1) =  0.36764e+01_realType
-       tup0(22,itu1) =  0.34942e+01_realType
-       tup0(23,itu1) =  0.33290e+01_realType
-       tup0(24,itu1) =  0.31798e+01_realType
-       tup0(25,itu1) =  0.30456e+01_realType
-       tup0(26,itu1) =  0.29248e+01_realType
-       tup0(27,itu1) =  0.28163e+01_realType
-       tup0(28,itu1) =  0.27186e+01_realType
-       tup0(29,itu1) =  0.26304e+01_realType
-       tup0(30,itu1) =  0.25505e+01_realType
-       tup0(31,itu1) =  0.24775e+01_realType
-       tup0(32,itu1) =  0.24103e+01_realType
-       tup0(33,itu1) =  0.23475e+01_realType
-       tup0(34,itu1) =  0.22880e+01_realType
-
-       tup1( 1,itu1) =  0.11194e+00_realType
-       tup1( 2,itu1) =  0.18748e+00_realType
-       tup1( 3,itu1) =  0.27407e+00_realType
-       tup1( 4,itu1) =  0.36966e+00_realType
-       tup1( 5,itu1) =  0.46803e+00_realType
-       tup1( 6,itu1) =  0.55499e+00_realType
-       tup1( 7,itu1) =  0.60642e+00_realType
-       tup1( 8,itu1) =  0.58843e+00_realType
-       tup1( 9,itu1) =  0.47590e+00_realType
-       tup1(10,itu1) =  0.31013e+00_realType
-       tup1(11,itu1) =  0.16039e+00_realType
-       tup1(12,itu1) =  0.58026e-01_realType
-       tup1(13,itu1) =  0.15163e-02_realType
-       tup1(14,itu1) = -0.23590e-01_realType
-       tup1(15,itu1) = -0.31162e-01_realType
-       tup1(16,itu1) = -0.30422e-01_realType
-       tup1(17,itu1) = -0.26463e-01_realType
-       tup1(18,itu1) = -0.21749e-01_realType
-       tup1(19,itu1) = -0.17331e-01_realType
-       tup1(20,itu1) = -0.13569e-01_realType
-       tup1(21,itu1) = -0.10516e-01_realType
-       tup1(22,itu1) = -0.81061e-02_realType
-       tup1(23,itu1) = -0.62327e-02_realType
-       tup1(24,itu1) = -0.47901e-02_realType
-       tup1(25,itu1) = -0.36855e-02_realType
-       tup1(26,itu1) = -0.28427e-02_realType
-       tup1(27,itu1) = -0.22005e-02_realType
-       tup1(28,itu1) = -0.17118e-02_realType
-       tup1(29,itu1) = -0.13404e-02_realType
-       tup1(30,itu1) = -0.10584e-02_realType
-       tup1(31,itu1) = -0.84476e-03_realType
-       tup1(32,itu1) = -0.68362e-03_realType
-       tup1(33,itu1) = -0.56284e-03_realType
-       tup1(34,itu1) = -0.47348e-03_realType
-
-       tup2( 1,itu1) =  0.46642e-01_realType
-       tup2( 2,itu1) =  0.46777e-01_realType
-       tup2( 3,itu1) =  0.47060e-01_realType
-       tup2( 4,itu1) =  0.48495e-01_realType
-       tup2( 5,itu1) =  0.50649e-01_realType
-       tup2( 6,itu1) =  0.49396e-01_realType
-       tup2( 7,itu1) =  0.43509e-01_realType
-       tup2( 8,itu1) =  0.22224e-01_realType
-       tup2( 9,itu1) = -0.22230e-01_realType
-       tup2(10,itu1) = -0.32317e-01_realType
-       tup2(11,itu1) = -0.23975e-01_realType
-       tup2(12,itu1) = -0.13064e-01_realType
-       tup2(13,itu1) = -0.57706e-02_realType
-       tup2(14,itu1) = -0.20813e-02_realType
-       tup2(15,itu1) = -0.52362e-03_realType
-       tup2(16,itu1) =  0.28081e-04_realType
-       tup2(17,itu1) =  0.17226e-03_realType
-       tup2(18,itu1) =  0.17433e-03_realType
-       tup2(19,itu1) =  0.13808e-03_realType
-       tup2(20,itu1) =  0.99662e-04_realType
-       tup2(21,itu1) =  0.68603e-04_realType
-       tup2(22,itu1) =  0.46166e-04_realType
-       tup2(23,itu1) =  0.30634e-04_realType
-       tup2(24,itu1) =  0.20237e-04_realType
-       tup2(25,itu1) =  0.13310e-04_realType
-       tup2(26,itu1) =  0.87532e-05_realType
-       tup2(27,itu1) =  0.57793e-05_realType
-       tup2(28,itu1) =  0.38107e-05_realType
-       tup2(29,itu1) =  0.25226e-05_realType
-       tup2(30,itu1) =  0.16733e-05_realType
-       tup2(31,itu1) =  0.11109e-05_realType
-       tup2(32,itu1) =  0.73765e-06_realType
-       tup2(33,itu1) =  0.49068e-06_realType
-       tup2(34,itu1) =  0.32170e-06_realType
-
-       tup3( 1,itu1) =  0.14048e+00_realType
-       tup3( 2,itu1) =  0.90412e-01_realType
-       tup3( 3,itu1) =  0.53426e-01_realType
-       tup3( 4,itu1) =  0.24095e-01_realType
-       tup3( 5,itu1) =  0.34292e-03_realType
-       tup3( 6,itu1) = -0.15803e-01_realType
-       tup3( 7,itu1) = -0.25093e-01_realType
-       tup3( 8,itu1) = -0.22520e-01_realType
-       tup3( 9,itu1) = -0.58532e-02_realType
-       tup3(10,itu1) =  0.73777e-03_realType
-       tup3(11,itu1) =  0.16152e-02_realType
-       tup3(12,itu1) =  0.98832e-03_realType
-       tup3(13,itu1) =  0.44037e-03_realType
-       tup3(14,itu1) =  0.16675e-03_realType
-       tup3(15,itu1) =  0.56569e-04_realType
-       tup3(16,itu1) =  0.17339e-04_realType
-       tup3(17,itu1) =  0.45579e-05_realType
-       tup3(18,itu1) =  0.76381e-06_realType
-       tup3(19,itu1) = -0.18121e-06_realType
-       tup3(20,itu1) = -0.31622e-06_realType
-       tup3(21,itu1) = -0.24874e-06_realType
-       tup3(22,itu1) = -0.16604e-06_realType
-       tup3(23,itu1) = -0.10191e-06_realType
-       tup3(24,itu1) = -0.61291e-07_realType
-       tup3(25,itu1) = -0.35829e-07_realType
-       tup3(26,itu1) = -0.20759e-07_realType
-       tup3(27,itu1) = -0.12251e-07_realType
-       tup3(28,itu1) = -0.71234e-08_realType
-       tup3(29,itu1) = -0.42082e-08_realType
-       tup3(30,itu1) = -0.25154e-08_realType
-       tup3(31,itu1) = -0.15209e-08_realType
-       tup3(32,itu1) = -0.93515e-09_realType
-       tup3(33,itu1) = -0.60259e-09_realType
-       tup3(34,itu1) = -0.38327e-09_realType
-
-       ! Constants for epsilon.
-
-       tup0( 1,itu2) =  0.24164e+00_realType
-       tup0( 2,itu2) =  0.22008e+00_realType
-       tup0( 3,itu2) =  0.19751e+00_realType
-       tup0( 4,itu2) =  0.17571e+00_realType
-       tup0( 5,itu2) =  0.15723e+00_realType
-       tup0( 6,itu2) =  0.14536e+00_realType
-       tup0( 7,itu2) =  0.14328e+00_realType
-       tup0( 8,itu2) =  0.15220e+00_realType
-       tup0( 9,itu2) =  0.16784e+00_realType
-       tup0(10,itu2) =  0.17712e+00_realType
-       tup0(11,itu2) =  0.17387e+00_realType
-       tup0(12,itu2) =  0.15954e+00_realType
-       tup0(13,itu2) =  0.13909e+00_realType
-       tup0(14,itu2) =  0.11713e+00_realType
-       tup0(15,itu2) =  0.96528e-01_realType
-       tup0(16,itu2) =  0.78571e-01_realType
-       tup0(17,itu2) =  0.63547e-01_realType
-       tup0(18,itu2) =  0.51265e-01_realType
-       tup0(19,itu2) =  0.41354e-01_realType
-       tup0(20,itu2) =  0.33411e-01_realType
-       tup0(21,itu2) =  0.27064e-01_realType
-       tup0(22,itu2) =  0.21997e-01_realType
-       tup0(23,itu2) =  0.17947e-01_realType
-       tup0(24,itu2) =  0.14703e-01_realType
-       tup0(25,itu2) =  0.12100e-01_realType
-       tup0(26,itu2) =  0.10003e-01_realType
-       tup0(27,itu2) =  0.83083e-02_realType
-       tup0(28,itu2) =  0.69337e-02_realType
-       tup0(29,itu2) =  0.58144e-02_realType
-       tup0(30,itu2) =  0.48991e-02_realType
-       tup0(31,itu2) =  0.41476e-02_realType
-       tup0(32,itu2) =  0.35278e-02_realType
-       tup0(33,itu2) =  0.30142e-02_realType
-       tup0(34,itu2) =  0.25866e-02_realType
-
-       tup1( 1,itu2) = -0.70667e-01_realType
-       tup1( 2,itu2) = -0.59210e-01_realType
-       tup1( 3,itu2) = -0.46663e-01_realType
-       tup1( 4,itu2) = -0.33366e-01_realType
-       tup1( 5,itu2) = -0.19886e-01_realType
-       tup1( 6,itu2) = -0.72638e-02_realType
-       tup1( 7,itu2) =  0.28452e-02_realType
-       tup1( 8,itu2) =  0.81949e-02_realType
-       tup1( 9,itu2) =  0.67022e-02_realType
-       tup1(10,itu2) =  0.13143e-02_realType
-       tup1(11,itu2) = -0.31089e-02_realType
-       tup1(12,itu2) = -0.50215e-02_realType
-       tup1(13,itu2) = -0.50170e-02_realType
-       tup1(14,itu2) = -0.41408e-02_realType
-       tup1(15,itu2) = -0.30979e-02_realType
-       tup1(16,itu2) = -0.21970e-02_realType
-       tup1(17,itu2) = -0.15136e-02_realType
-       tup1(18,itu2) = -0.10274e-02_realType
-       tup1(19,itu2) = -0.69277e-03_realType
-       tup1(20,itu2) = -0.46639e-03_realType
-       tup1(21,itu2) = -0.31444e-03_realType
-       tup1(22,itu2) = -0.21272e-03_realType
-       tup1(23,itu2) = -0.14458e-03_realType
-       tup1(24,itu2) = -0.98812e-04_realType
-       tup1(25,itu2) = -0.67941e-04_realType
-       tup1(26,itu2) = -0.47013e-04_realType
-       tup1(27,itu2) = -0.32748e-04_realType
-       tup1(28,itu2) = -0.22968e-04_realType
-       tup1(29,itu2) = -0.16220e-04_realType
-       tup1(30,itu2) = -0.11536e-04_realType
-       tup1(31,itu2) = -0.82628e-05_realType
-       tup1(32,itu2) = -0.59619e-05_realType
-       tup1(33,itu2) = -0.43340e-05_realType
-       tup1(34,itu2) = -0.31753e-05_realType
-
-       tup2( 1,itu2) =  0.82009e-02_realType
-       tup2( 2,itu2) =  0.78044e-02_realType
-       tup2( 3,itu2) =  0.72279e-02_realType
-       tup2( 4,itu2) =  0.66236e-02_realType
-       tup2( 5,itu2) =  0.61231e-02_realType
-       tup2( 6,itu2) =  0.54757e-02_realType
-       tup2( 7,itu2) =  0.46058e-02_realType
-       tup2( 8,itu2) =  0.31013e-02_realType
-       tup2( 9,itu2) = -0.57395e-03_realType
-       tup2(10,itu2) = -0.13230e-02_realType
-       tup2(11,itu2) = -0.82054e-03_realType
-       tup2(12,itu2) = -0.27325e-03_realType
-       tup2(13,itu2) = -0.29977e-05_realType
-       tup2(14,itu2) =  0.73241e-04_realType
-       tup2(15,itu2) =  0.70951e-04_realType
-       tup2(16,itu2) =  0.50202e-04_realType
-       tup2(17,itu2) =  0.31490e-04_realType
-       tup2(18,itu2) =  0.18675e-04_realType
-       tup2(19,itu2) =  0.10781e-04_realType
-       tup2(20,itu2) =  0.61493e-05_realType
-       tup2(21,itu2) =  0.34945e-05_realType
-       tup2(22,itu2) =  0.19882e-05_realType
-       tup2(23,itu2) =  0.11363e-05_realType
-       tup2(24,itu2) =  0.65315e-06_realType
-       tup2(25,itu2) =  0.37828e-06_realType
-       tup2(26,itu2) =  0.22089e-06_realType
-       tup2(27,itu2) =  0.13005e-06_realType
-       tup2(28,itu2) =  0.77269e-07_realType
-       tup2(29,itu2) =  0.46321e-07_realType
-       tup2(30,itu2) =  0.28022e-07_realType
-       tup2(31,itu2) =  0.17109e-07_realType
-       tup2(32,itu2) =  0.10536e-07_realType
-       tup2(33,itu2) =  0.65500e-08_realType
-       tup2(34,itu2) =  0.41065e-08_realType
-
-       tup3( 1,itu2) =  0.19008e-01_realType
-       tup3( 2,itu2) =  0.11466e-01_realType
-       tup3( 3,itu2) =  0.65783e-02_realType
-       tup3( 4,itu2) =  0.33228e-02_realType
-       tup3( 5,itu2) =  0.10124e-02_realType
-       tup3( 6,itu2) = -0.46456e-03_realType
-       tup3( 7,itu2) = -0.12995e-02_realType
-       tup3( 8,itu2) = -0.14253e-02_realType
-       tup3( 9,itu2) = -0.23840e-03_realType
-       tup3(10,itu2) =  0.11860e-03_realType
-       tup3(11,itu2) =  0.10989e-03_realType
-       tup3(12,itu2) =  0.47896e-04_realType
-       tup3(13,itu2) =  0.13977e-04_realType
-       tup3(14,itu2) =  0.22846e-05_realType
-       tup3(15,itu2) = -0.47051e-06_realType
-       tup3(16,itu2) = -0.69412e-06_realType
-       tup3(17,itu2) = -0.45961e-06_realType
-       tup3(18,itu2) = -0.25229e-06_realType
-       tup3(19,itu2) = -0.12854e-06_realType
-       tup3(20,itu2) = -0.63372e-07_realType
-       tup3(21,itu2) = -0.30806e-07_realType
-       tup3(22,itu2) = -0.14923e-07_realType
-       tup3(23,itu2) = -0.72529e-08_realType
-       tup3(24,itu2) = -0.35415e-08_realType
-       tup3(25,itu2) = -0.17443e-08_realType
-       tup3(26,itu2) = -0.86787e-09_realType
-       tup3(27,itu2) = -0.43581e-09_realType
-       tup3(28,itu2) = -0.22141e-09_realType
-       tup3(29,itu2) = -0.11372e-09_realType
-       tup3(30,itu2) = -0.59087e-10_realType
-       tup3(31,itu2) = -0.31082e-10_realType
-       tup3(32,itu2) = -0.16513e-10_realType
-       tup3(33,itu2) = -0.88934e-11_realType
-       tup3(34,itu2) = -0.48436e-11_realType
-
-       ! Constants for v2.
-
-       tup0( 1,itu3) =  0.86118e-05_realType
-       tup0( 2,itu3) =  0.76561e-04_realType
-       tup0( 3,itu3) =  0.38909e-03_realType
-       tup0( 4,itu3) =  0.14605e-02_realType
-       tup0( 5,itu3) =  0.44732e-02_realType
-       tup0( 6,itu3) =  0.11704e-01_realType
-       tup0( 7,itu3) =  0.26786e-01_realType
-       tup0( 8,itu3) =  0.54333e-01_realType
-       tup0( 9,itu3) =  0.97821e-01_realType
-       tup0(10,itu3) =  0.15774e+00_realType
-       tup0(11,itu3) =  0.23273e+00_realType
-       tup0(12,itu3) =  0.32009e+00_realType
-       tup0(13,itu3) =  0.41667e+00_realType
-       tup0(14,itu3) =  0.51933e+00_realType
-       tup0(15,itu3) =  0.62515e+00_realType
-       tup0(16,itu3) =  0.73157e+00_realType
-       tup0(17,itu3) =  0.83660e+00_realType
-       tup0(18,itu3) =  0.93915e+00_realType
-       tup0(19,itu3) =  0.10385e+01_realType
-       tup0(20,itu3) =  0.11342e+01_realType
-       tup0(21,itu3) =  0.12256e+01_realType
-       tup0(22,itu3) =  0.13125e+01_realType
-       tup0(23,itu3) =  0.13944e+01_realType
-       tup0(24,itu3) =  0.14708e+01_realType
-       tup0(25,itu3) =  0.15412e+01_realType
-       tup0(26,itu3) =  0.16053e+01_realType
-       tup0(27,itu3) =  0.16624e+01_realType
-       tup0(28,itu3) =  0.17120e+01_realType
-       tup0(29,itu3) =  0.17534e+01_realType
-       tup0(30,itu3) =  0.17862e+01_realType
-       tup0(31,itu3) =  0.18095e+01_realType
-       tup0(32,itu3) =  0.18227e+01_realType
-       tup0(33,itu3) =  0.18250e+01_realType
-       tup0(34,itu3) =  0.18158e+01_realType
-
-       tup1( 1,itu3) =  0.13148e-03_realType
-       tup1( 2,itu3) =  0.51056e-03_realType
-       tup1( 3,itu3) =  0.14555e-02_realType
-       tup1( 4,itu3) =  0.33824e-02_realType
-       tup1( 5,itu3) =  0.67133e-02_realType
-       tup1( 6,itu3) =  0.11625e-01_realType
-       tup1( 7,itu3) =  0.17737e-01_realType
-       tup1( 8,itu3) =  0.23712e-01_realType
-       tup1( 9,itu3) =  0.27813e-01_realType
-       tup1(10,itu3) =  0.29364e-01_realType
-       tup1(11,itu3) =  0.28719e-01_realType
-       tup1(12,itu3) =  0.26553e-01_realType
-       tup1(13,itu3) =  0.23565e-01_realType
-       tup1(14,itu3) =  0.20284e-01_realType
-       tup1(15,itu3) =  0.17054e-01_realType
-       tup1(16,itu3) =  0.14085e-01_realType
-       tup1(17,itu3) =  0.11506e-01_realType
-       tup1(18,itu3) =  0.93475e-02_realType
-       tup1(19,itu3) =  0.75674e-02_realType
-       tup1(20,itu3) =  0.61075e-02_realType
-       tup1(21,itu3) =  0.49132e-02_realType
-       tup1(22,itu3) =  0.39366e-02_realType
-       tup1(23,itu3) =  0.31369e-02_realType
-       tup1(24,itu3) =  0.24817e-02_realType
-       tup1(25,itu3) =  0.19440e-02_realType
-       tup1(26,itu3) =  0.15022e-02_realType
-       tup1(27,itu3) =  0.11385e-02_realType
-       tup1(28,itu3) =  0.83880e-03_realType
-       tup1(29,itu3) =  0.59169e-03_realType
-       tup1(30,itu3) =  0.38785e-03_realType
-       tup1(31,itu3) =  0.21987e-03_realType
-       tup1(32,itu3) =  0.81692e-04_realType
-       tup1(33,itu3) = -0.31589e-04_realType
-       tup1(34,itu3) = -0.12412e-03_realType
-
-       tup2( 1,itu3) = -0.45876e-03_realType
-       tup2( 2,itu3) = -0.56213e-03_realType
-       tup2( 3,itu3) = -0.48447e-03_realType
-       tup2( 4,itu3) = -0.12712e-03_realType
-       tup2( 5,itu3) =  0.51606e-03_realType
-       tup2( 6,itu3) =  0.12754e-02_realType
-       tup2( 7,itu3) =  0.20365e-02_realType
-       tup2( 8,itu3) =  0.19980e-02_realType
-       tup2( 9,itu3) =  0.11629e-02_realType
-       tup2(10,itu3) =  0.48125e-03_realType
-       tup2(11,itu3) =  0.38876e-04_realType
-       tup2(12,itu3) = -0.17127e-03_realType
-       tup2(13,itu3) = -0.23448e-03_realType
-       tup2(14,itu3) = -0.22707e-03_realType
-       tup2(15,itu3) = -0.19347e-03_realType
-       tup2(16,itu3) = -0.15335e-03_realType
-       tup2(17,itu3) = -0.11100e-03_realType
-       tup2(18,itu3) = -0.78059e-04_realType
-       tup2(19,itu3) = -0.54121e-04_realType
-       tup2(20,itu3) = -0.37687e-04_realType
-       tup2(21,itu3) = -0.26073e-04_realType
-       tup2(22,itu3) = -0.18195e-04_realType
-       tup2(23,itu3) = -0.12730e-04_realType
-       tup2(24,itu3) = -0.89271e-05_realType
-       tup2(25,itu3) = -0.63091e-05_realType
-       tup2(26,itu3) = -0.44582e-05_realType
-       tup2(27,itu3) = -0.31881e-05_realType
-       tup2(28,itu3) = -0.22705e-05_realType
-       tup2(29,itu3) = -0.16359e-05_realType
-       tup2(30,itu3) = -0.11754e-05_realType
-       tup2(31,itu3) = -0.85145e-06_realType
-       tup2(32,itu3) = -0.61411e-06_realType
-       tup2(33,itu3) = -0.44454e-06_realType
-       tup2(34,itu3) = -0.32118e-06_realType
-
-       tup3( 1,itu3) =  0.21178e-02_realType
-       tup3( 2,itu3) =  0.26960e-02_realType
-       tup3( 3,itu3) =  0.28709e-02_realType
-       tup3( 4,itu3) =  0.25635e-02_realType
-       tup3( 5,itu3) =  0.18560e-02_realType
-       tup3( 6,itu3) =  0.98939e-03_realType
-       tup3( 7,itu3) =  0.10046e-03_realType
-       tup3( 8,itu3) = -0.30638e-03_realType
-       tup3( 9,itu3) = -0.25468e-03_realType
-       tup3(10,itu3) = -0.15987e-03_realType
-       tup3(11,itu3) = -0.82673e-04_realType
-       tup3(12,itu3) = -0.38599e-04_realType
-       tup3(13,itu3) = -0.17054e-04_realType
-       tup3(14,itu3) = -0.70519e-05_realType
-       tup3(15,itu3) = -0.23971e-05_realType
-       tup3(16,itu3) = -0.31545e-06_realType
-       tup3(17,itu3) =  0.88126e-07_realType
-       tup3(18,itu3) =  0.13543e-06_realType
-       tup3(19,itu3) =  0.96125e-07_realType
-       tup3(20,itu3) =  0.71267e-07_realType
-       tup3(21,itu3) =  0.42315e-07_realType
-       tup3(22,itu3) =  0.27372e-07_realType
-       tup3(23,itu3) =  0.17327e-07_realType
-       tup3(24,itu3) =  0.10546e-07_realType
-       tup3(25,itu3) =  0.68294e-08_realType
-       tup3(26,itu3) =  0.40887e-08_realType
-       tup3(27,itu3) =  0.27989e-08_realType
-       tup3(28,itu3) =  0.17076e-08_realType
-       tup3(29,itu3) =  0.11817e-08_realType
-       tup3(30,itu3) =  0.76583e-09_realType
-       tup3(31,itu3) =  0.54352e-09_realType
-       tup3(32,itu3) =  0.36625e-09_realType
-       tup3(33,itu3) =  0.25793e-09_realType
-       tup3(34,itu3) =  0.18203e-09_realType
-
-       ! Constants for f.
-
-       tup0( 1,itu4) =  0.36932e-03_realType
-       tup0( 2,itu4) =  0.69498e-03_realType
-       tup0( 3,itu4) =  0.11057e-02_realType
-       tup0( 4,itu4) =  0.16193e-02_realType
-       tup0( 5,itu4) =  0.22555e-02_realType
-       tup0( 6,itu4) =  0.30335e-02_realType
-       tup0( 7,itu4) =  0.39685e-02_realType
-       tup0( 8,itu4) =  0.50643e-02_realType
-       tup0( 9,itu4) =  0.63046e-02_realType
-       tup0(10,itu4) =  0.76520e-02_realType
-       tup0(11,itu4) =  0.90512e-02_realType
-       tup0(12,itu4) =  0.10438e-01_realType
-       tup0(13,itu4) =  0.11748e-01_realType
-       tup0(14,itu4) =  0.12920e-01_realType
-       tup0(15,itu4) =  0.13898e-01_realType
-       tup0(16,itu4) =  0.14631e-01_realType
-       tup0(17,itu4) =  0.15089e-01_realType
-       tup0(18,itu4) =  0.15297e-01_realType
-       tup0(19,itu4) =  0.15285e-01_realType
-       tup0(20,itu4) =  0.15080e-01_realType
-       tup0(21,itu4) =  0.14711e-01_realType
-       tup0(22,itu4) =  0.14207e-01_realType
-       tup0(23,itu4) =  0.13596e-01_realType
-       tup0(24,itu4) =  0.12904e-01_realType
-       tup0(25,itu4) =  0.12154e-01_realType
-       tup0(26,itu4) =  0.11369e-01_realType
-       tup0(27,itu4) =  0.10567e-01_realType
-       tup0(28,itu4) =  0.97653e-02_realType
-       tup0(29,itu4) =  0.89762e-02_realType
-       tup0(30,itu4) =  0.82104e-02_realType
-       tup0(31,itu4) =  0.74760e-02_realType
-       tup0(32,itu4) =  0.67787e-02_realType
-       tup0(33,itu4) =  0.61223e-02_realType
-       tup0(34,itu4) =  0.55092e-02_realType
-
-       tup1( 1,itu4) =  0.10014e-02_realType
-       tup1( 2,itu4) =  0.98811e-03_realType
-       tup1( 3,itu4) =  0.97209e-03_realType
-       tup1( 4,itu4) =  0.95225e-03_realType
-       tup1( 5,itu4) =  0.92674e-03_realType
-       tup1( 6,itu4) =  0.89247e-03_realType
-       tup1( 7,itu4) =  0.84501e-03_realType
-       tup1( 8,itu4) =  0.77980e-03_realType
-       tup1( 9,itu4) =  0.69604e-03_realType
-       tup1(10,itu4) =  0.59785e-03_realType
-       tup1(11,itu4) =  0.49284e-03_realType
-       tup1(12,itu4) =  0.38934e-03_realType
-       tup1(13,itu4) =  0.29358e-03_realType
-       tup1(14,itu4) =  0.20915e-03_realType
-       tup1(15,itu4) =  0.13748e-03_realType
-       tup1(16,itu4) =  0.79309e-04_realType
-       tup1(17,itu4) =  0.36899e-04_realType
-       tup1(18,itu4) =  0.90827e-05_realType
-       tup1(19,itu4) = -0.84239e-05_realType
-       tup1(20,itu4) = -0.18715e-04_realType
-       tup1(21,itu4) = -0.24036e-04_realType
-       tup1(22,itu4) = -0.26019e-04_realType
-       tup1(23,itu4) = -0.25851e-04_realType
-       tup1(24,itu4) = -0.24377e-04_realType
-       tup1(25,itu4) = -0.22183e-04_realType
-       tup1(26,itu4) = -0.19673e-04_realType
-       tup1(27,itu4) = -0.17109e-04_realType
-       tup1(28,itu4) = -0.14652e-04_realType
-       tup1(29,itu4) = -0.12395e-04_realType
-       tup1(30,itu4) = -0.10383e-04_realType
-       tup1(31,itu4) = -0.86269e-05_realType
-       tup1(32,itu4) = -0.71206e-05_realType
-       tup1(33,itu4) = -0.58456e-05_realType
-       tup1(34,itu4) = -0.47776e-05_realType
-
-       tup2( 1,itu4) = -0.73381e-05_realType
-       tup2( 2,itu4) = -0.67793e-05_realType
-       tup2( 3,itu4) = -0.58449e-05_realType
-       tup2( 4,itu4) = -0.49347e-05_realType
-       tup2( 5,itu4) = -0.41793e-05_realType
-       tup2( 6,itu4) = -0.40232e-05_realType
-       tup2( 7,itu4) = -0.54838e-05_realType
-       tup2( 8,itu4) = -0.91450e-05_realType
-       tup2( 9,itu4) = -0.12056e-04_realType
-       tup2(10,itu4) = -0.13441e-04_realType
-       tup2(11,itu4) = -0.12747e-04_realType
-       tup2(12,itu4) = -0.10761e-04_realType
-       tup2(13,itu4) = -0.84210e-05_realType
-       tup2(14,itu4) = -0.62599e-05_realType
-       tup2(15,itu4) = -0.45849e-05_realType
-       tup2(16,itu4) = -0.34480e-05_realType
-       tup2(17,itu4) = -0.19626e-05_realType
-       tup2(18,itu4) = -0.10949e-05_realType
-       tup2(19,itu4) = -0.59189e-06_realType
-       tup2(20,itu4) = -0.30260e-06_realType
-       tup2(21,itu4) = -0.14033e-06_realType
-       tup2(22,itu4) = -0.51286e-07_realType
-       tup2(23,itu4) = -0.65167e-08_realType
-       tup2(24,itu4) =  0.14883e-07_realType
-       tup2(25,itu4) =  0.22563e-07_realType
-       tup2(26,itu4) =  0.23657e-07_realType
-       tup2(27,itu4) =  0.21434e-07_realType
-       tup2(28,itu4) =  0.18140e-07_realType
-       tup2(29,itu4) =  0.14649e-07_realType
-       tup2(30,itu4) =  0.11482e-07_realType
-       tup2(31,itu4) =  0.88134e-08_realType
-       tup2(32,itu4) =  0.66562e-08_realType
-       tup2(33,itu4) =  0.49705e-08_realType
-       tup2(34,itu4) =  0.36802e-08_realType
-
-       tup3( 1,itu4) = -0.26409e-04_realType
-       tup3( 2,itu4) = -0.19723e-04_realType
-       tup3( 3,itu4) = -0.15989e-04_realType
-       tup3( 4,itu4) = -0.13800e-04_realType
-       tup3( 5,itu4) = -0.12497e-04_realType
-       tup3( 6,itu4) = -0.11352e-04_realType
-       tup3( 7,itu4) = -0.94581e-05_realType
-       tup3( 8,itu4) = -0.64516e-05_realType
-       tup3( 9,itu4) = -0.38268e-05_realType
-       tup3(10,itu4) = -0.19060e-05_realType
-       tup3(11,itu4) = -0.82598e-06_realType
-       tup3(12,itu4) = -0.31494e-06_realType
-       tup3(13,itu4) = -0.96327e-07_realType
-       tup3(14,itu4) = -0.11829e-07_realType
-       tup3(15,itu4) =  0.30746e-07_realType
-       tup3(16,itu4) =  0.70108e-07_realType
-       tup3(17,itu4) =  0.37205e-07_realType
-       tup3(18,itu4) =  0.19880e-07_realType
-       tup3(19,itu4) =  0.10689e-07_realType
-       tup3(20,itu4) =  0.57169e-08_realType
-       tup3(21,itu4) =  0.30479e-08_realType
-       tup3(22,itu4) =  0.15779e-08_realType
-       tup3(23,itu4) =  0.82116e-09_realType
-       tup3(24,itu4) =  0.40692e-09_realType
-       tup3(25,itu4) =  0.19883e-09_realType
-       tup3(26,itu4) =  0.90676e-10_realType
-       tup3(27,itu4) =  0.39255e-10_realType
-       tup3(28,itu4) =  0.14131e-10_realType
-       tup3(29,itu4) =  0.32481e-11_realType
-       tup3(30,itu4) = -0.10665e-11_realType
-       tup3(31,itu4) = -0.24047e-11_realType
-       tup3(32,itu4) = -0.24412e-11_realType
-       tup3(33,itu4) = -0.20525e-11_realType
-       tup3(34,itu4) = -0.15829e-11_realType
-
-       ! Constants for nut.
-
-       tup0( 1,itu5) =  0.23125e-04_realType
-       tup0( 2,itu5) =  0.21542e-03_realType
-       tup0( 3,itu5) =  0.11557e-02_realType
-       tup0( 4,itu5) =  0.45993e-02_realType
-       tup0( 5,itu5) =  0.14891e-01_realType
-       tup0( 6,itu5) =  0.40523e-01_realType
-       tup0( 7,itu5) =  0.93406e-01_realType
-       tup0( 8,itu5) =  0.19626e+00_realType
-       tup0( 9,itu5) =  0.44036e+00_realType
-       tup0(10,itu5) =  0.83627e+00_realType
-       tup0(11,itu5) =  0.14308e+01_realType
-       tup0(12,itu5) =  0.22842e+01_realType
-       tup0(13,itu5) =  0.34675e+01_realType
-       tup0(14,itu5) =  0.50606e+01_realType
-       tup0(15,itu5) =  0.71506e+01_realType
-       tup0(16,itu5) =  0.98324e+01_realType
-       tup0(17,itu5) =  0.13213e+02_realType
-       tup0(18,itu5) =  0.17422e+02_realType
-       tup0(19,itu5) =  0.22609e+02_realType
-       tup0(20,itu5) =  0.28946e+02_realType
-       tup0(21,itu5) =  0.36629e+02_realType
-       tup0(22,itu5) =  0.45870e+02_realType
-       tup0(23,itu5) =  0.56903e+02_realType
-       tup0(24,itu5) =  0.69976e+02_realType
-       tup0(25,itu5) =  0.85344e+02_realType
-       tup0(26,itu5) =  0.10326e+03_realType
-       tup0(27,itu5) =  0.12397e+03_realType
-       tup0(28,itu5) =  0.14767e+03_realType
-       tup0(29,itu5) =  0.17452e+03_realType
-       tup0(30,itu5) =  0.20457e+03_realType
-       tup0(31,itu5) =  0.23779e+03_realType
-       tup0(32,itu5) =  0.27396e+03_realType
-       tup0(33,itu5) =  0.31270e+03_realType
-       tup0(34,itu5) =  0.35337e+03_realType
-
-       tup1( 1,itu5) =  0.37001e-03_realType
-       tup1( 2,itu5) =  0.15197e-02_realType
-       tup1( 3,itu5) =  0.46103e-02_realType
-       tup1( 4,itu5) =  0.11376e-01_realType
-       tup1( 5,itu5) =  0.23543e-01_realType
-       tup1( 6,itu5) =  0.40906e-01_realType
-       tup1( 7,itu5) =  0.64800e-01_realType
-       tup1( 8,itu5) =  0.11582e+00_realType
-       tup1( 9,itu5) =  0.17214e+00_realType
-       tup1(10,itu5) =  0.21559e+00_realType
-       tup1(11,itu5) =  0.25612e+00_realType
-       tup1(12,itu5) =  0.29399e+00_realType
-       tup1(13,itu5) =  0.32838e+00_realType
-       tup1(14,itu5) =  0.35836e+00_realType
-       tup1(15,itu5) =  0.38343e+00_realType
-       tup1(16,itu5) =  0.40383e+00_realType
-       tup1(17,itu5) =  0.42066e+00_realType
-       tup1(18,itu5) =  0.43496e+00_realType
-       tup1(19,itu5) =  0.44719e+00_realType
-       tup1(20,itu5) =  0.45758e+00_realType
-       tup1(21,itu5) =  0.46621e+00_realType
-       tup1(22,itu5) =  0.47302e+00_realType
-       tup1(23,itu5) =  0.47789e+00_realType
-       tup1(24,itu5) =  0.48067e+00_realType
-       tup1(25,itu5) =  0.48112e+00_realType
-       tup1(26,itu5) =  0.47894e+00_realType
-       tup1(27,itu5) =  0.47384e+00_realType
-       tup1(28,itu5) =  0.46551e+00_realType
-       tup1(29,itu5) =  0.45364e+00_realType
-       tup1(30,itu5) =  0.43792e+00_realType
-       tup1(31,itu5) =  0.41811e+00_realType
-       tup1(32,itu5) =  0.39406e+00_realType
-       tup1(33,itu5) =  0.36565e+00_realType
-       tup1(34,itu5) =  0.33291e+00_realType
-
-       tup2( 1,itu5) = -0.15144e-02_realType
-       tup2( 2,itu5) = -0.21673e-02_realType
-       tup2( 3,itu5) = -0.22502e-02_realType
-       tup2( 4,itu5) = -0.80447e-03_realType
-       tup2( 5,itu5) =  0.27762e-02_realType
-       tup2( 6,itu5) =  0.17677e-02_realType
-       tup2( 7,itu5) = -0.10701e-01_realType
-       tup2( 8,itu5) =  0.22380e-01_realType
-       tup2( 9,itu5) =  0.85050e-02_realType
-       tup2(10,itu5) =  0.62055e-02_realType
-       tup2(11,itu5) =  0.49141e-02_realType
-       tup2(12,itu5) =  0.39356e-02_realType
-       tup2(13,itu5) =  0.30516e-02_realType
-       tup2(14,itu5) =  0.22492e-02_realType
-       tup2(15,itu5) =  0.15552e-02_realType
-       tup2(16,itu5) =  0.10053e-02_realType
-       tup2(17,itu5) =  0.70099e-03_realType
-       tup2(18,itu5) =  0.50284e-03_realType
-       tup2(19,itu5) =  0.36969e-03_realType
-       tup2(20,itu5) =  0.27228e-03_realType
-       tup2(21,itu5) =  0.20061e-03_realType
-       tup2(22,itu5) =  0.14330e-03_realType
-       tup2(23,itu5) =  0.98897e-04_realType
-       tup2(24,itu5) =  0.62505e-04_realType
-       tup2(25,itu5) =  0.33283e-04_realType
-       tup2(26,itu5) =  0.68299e-05_realType
-       tup2(27,itu5) = -0.14485e-04_realType
-       tup2(28,itu5) = -0.32453e-04_realType
-       tup2(29,itu5) = -0.47992e-04_realType
-       tup2(30,itu5) = -0.61021e-04_realType
-       tup2(31,itu5) = -0.71877e-04_realType
-       tup2(32,itu5) = -0.79636e-04_realType
-       tup2(33,itu5) = -0.86125e-04_realType
-       tup2(34,itu5) = -0.90063e-04_realType
-
-       tup3( 1,itu5) =  0.66740e-02_realType
-       tup3( 2,itu5) =  0.93418e-02_realType
-       tup3( 3,itu5) =  0.10767e-01_realType
-       tup3( 4,itu5) =  0.97000e-02_realType
-       tup3( 5,itu5) =  0.58162e-02_realType
-       tup3( 6,itu5) =  0.58753e-02_realType
-       tup3( 7,itu5) =  0.14886e-01_realType
-       tup3( 8,itu5) = -0.21765e-02_realType
-       tup3( 9,itu5) =  0.66623e-03_realType
-       tup3(10,itu5) =  0.46817e-03_realType
-       tup3(11,itu5) =  0.24852e-03_realType
-       tup3(12,itu5) =  0.10079e-03_realType
-       tup3(13,itu5) =  0.25392e-04_realType
-       tup3(14,itu5) = -0.29448e-05_realType
-       tup3(15,itu5) = -0.56548e-05_realType
-       tup3(16,itu5) =  0.17092e-05_realType
-       tup3(17,itu5) =  0.17225e-05_realType
-       tup3(18,itu5) =  0.97328e-06_realType
-       tup3(19,itu5) =  0.61893e-07_realType
-       tup3(20,itu5) = -0.52008e-06_realType
-       tup3(21,itu5) = -0.93452e-06_realType
-       tup3(22,itu5) = -0.10961e-05_realType
-       tup3(23,itu5) = -0.11719e-05_realType
-       tup3(24,itu5) = -0.11584e-05_realType
-       tup3(25,itu5) = -0.11184e-05_realType
-       tup3(26,itu5) = -0.10094e-05_realType
-       tup3(27,itu5) = -0.90341e-06_realType
-       tup3(28,itu5) = -0.79513e-06_realType
-       tup3(29,itu5) = -0.68439e-06_realType
-       tup3(30,itu5) = -0.57895e-06_realType
-       tup3(31,itu5) = -0.47935e-06_realType
-       tup3(32,itu5) = -0.39672e-06_realType
-       tup3(33,itu5) = -0.31868e-06_realType
-       tup3(34,itu5) = -0.25374e-06_realType
-
-    end select
-
-  end subroutine initCurveFitDataVf
-
-subroutine terminate(routineName, errorMessage)
-    !
-    !       terminate writes an error message to standard output and
-    !       terminates the execution of the program.
-    !
-    use constants
-    use communication, only : adflow_comm_world, myid
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    character(len=*), intent(in) :: routineName
-    character(len=*), intent(in) :: errorMessage
+                    case (itu4)
+                        if (tuLogFit(mm)) then
+                            if (rvfN == 1) &
+                                call terminate( &
+                                "curveTupYp", &
+                                "Check curveFit, f cannot be fitted with log")
+                            if (rvfN == 6) tup(mm) = exp(tup(mm)) * x
+                        else
+                            if (rvfN == 1) fWall = -0.0035_realType
+                            if (rvfN == 6) fWall = zero
+                            tup(mm) = fWall + (tup0(1, mm) - fWall) * x
+                        end if
+
+                    case (itu5)
+                        if (tuLogFit(mm)) then
+                            tup(mm) = exp(tup(mm)) * x**4
+                        else
+                            tup(mm) = tup0(1, mm) * x**4
+                        end if
+                    end select
+                end do
+
+            end select
+
+            !=================================================================
+
+        else if (yp >= ypT(nFit)) then
+
+            ! Yplus is larger than the largest number in the curve
+            ! fit. Set tup to the largest value available.
+
+            nn = nFit
+            x = ypT(nn) - ypT(nn - 1)
+            x2 = x * x
+            x3 = x * x2
+
+            do mm = ntu1, ntu2
+                tup(mm) = tup0(nn, mm) + tup1(nn, mm) * x &
+                          + tup2(nn, mm) * x2 + tup3(nn, mm) * x3
+                if (tuLogFit(mm)) tup(mm) = exp(tup(mm))
+            end do
+
+            !=================================================================
+
+        else
+
+            ! y-plus is in the range of the curve fits.
+            ! First find the correct interval.
+
+            ii = nFit
+            start = 1
+            interval: do
+
+                ! Next guess for the interval.
+
+                nn = start + ii / 2
+
+                ! Determine the situation we are having here.
+
+                if (yp > ypT(nn)) then
+
+                    ! Yplus is larger than the upper boundary of
+                    ! the current interval. Update the lower boundary.
+
+                    start = nn + 1
+                    ii = ii - 1
+
+                else if (yp >= ypT(nn - 1)) then
+
+                    ! This is the correct range. Exit the do-loop.
+
+                    exit
+
+                end if
+
+                ! Modify ii for the next branch to search.
+
+                ii = ii / 2
+
+            end do interval
+
+            ! Compute tup using the cubic polynomial for this interval.
+
+            x = yp - ypT(nn - 1)
+            x2 = x * x
+            x3 = x * x2
+
+            do mm = ntu1, ntu2
+                tup(mm) = tup0(nn, mm) + tup1(nn, mm) * x &
+                          + tup2(nn, mm) * x2 + tup3(nn, mm) * x3
+                if (tuLogFit(mm)) tup(mm) = exp(tup(mm))
+            end do
+
+        end if
+
+    end subroutine curveTupYp
+
+    subroutine initCurveFitDataKtau
+        !
+        !       initCurveFitDataKtau contains the curve fit constants for
+        !       the wall function data for the k-tau turbulence model.
+        !
+        use flowVarRefState
+        use paramTurb
+        implicit none
+        !
+        !      Local variables.
+        !
+        ! integer :: ierr
+
+        call terminate("initCurveFitDataKtau", &
+                       "Not implemented yet")
+
+    end subroutine initCurveFitDataKtau
+
+    subroutine initCurveFitDataKw
+        !
+        !       initCurveFitDataKw contains the curve fit constants for
+        !       the wall function data for the standard Wilcox k-omega model.
+        !
+        use constants
+        use flowVarRefState
+        use paramTurb
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        ! Set the number of data points and allocate the memory for the
+        ! arrays of the curve fits.
+
+        nFit = 34
+
+        allocate (ypT(0:nFit), reT(0:nFit), &
+                  up0(nFit), up1(nFit), up2(nFit), up3(nFit), &
+                  tup0(nFit, nt1:nt2), tup1(nFit, nt1:nt2), &
+                  tup2(nFit, nt1:nt2), tup3(nFit, nt1:nt2), &
+                  tuLogFit(nt1:nt2), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("initCurveFitDataKw", &
+             "Memory allocation failure for curve fit &
+             &coefficients")
+
+        ! Set the values of the Reynolds numbers at interval boundaries.
+
+        reT(0) = 0.12529547e+00_realType
+        reT(1) = 0.44996057e+00_realType
+        reT(2) = 0.11581311e+01_realType
+        reT(3) = 0.25353238e+01_realType
+        reT(4) = 0.50446282e+01_realType
+        reT(5) = 0.94194631e+01_realType
+        reT(6) = 0.16766555e+02_realType
+        reT(7) = 0.28556753e+02_realType
+        reT(8) = 0.46274930e+02_realType
+        reT(9) = 0.71021000e+02_realType
+        reT(10) = 0.10383163e+03_realType
+        reT(11) = 0.14621738e+03_realType
+        reT(12) = 0.20028019e+03_realType
+        reT(13) = 0.26868298e+03_realType
+        reT(14) = 0.35467049e+03_realType
+        reT(15) = 0.46212508e+03_realType
+        reT(16) = 0.59566097e+03_realType
+        reT(17) = 0.76073076e+03_realType
+        reT(18) = 0.96373333e+03_realType
+        reT(19) = 0.12121761e+04_realType
+        reT(20) = 0.15147917e+04_realType
+        reT(21) = 0.18817196e+04_realType
+        reT(22) = 0.23247121e+04_realType
+        reT(23) = 0.28572322e+04_realType
+        reT(24) = 0.34947840e+04_realType
+        reT(25) = 0.42551444e+04_realType
+        reT(26) = 0.51584529e+04_realType
+        reT(27) = 0.62277581e+04_realType
+        reT(28) = 0.74889831e+04_realType
+        reT(29) = 0.89716314e+04_realType
+        reT(30) = 0.10708764e+05_realType
+        reT(31) = 0.12737815e+05_realType
+        reT(32) = 0.15100490e+05_realType
+        reT(33) = 0.17843939e+05_realType
+        reT(34) = 0.21020534e+05_realType
+
+        ! Set the values of the y+ values at interval boundaries.
+
+        ypT(0) = 0.35397100e+00_realType
+        ypT(1) = 0.67079200e+00_realType
+        ypT(2) = 0.10761700e+01_realType
+        ypT(3) = 0.15923000e+01_realType
+        ypT(4) = 0.22463000e+01_realType
+        ypT(5) = 0.30710600e+01_realType
+        ypT(6) = 0.41063800e+01_realType
+        ypT(7) = 0.54001200e+01_realType
+        ypT(8) = 0.70095900e+01_realType
+        ypT(9) = 0.90031400e+01_realType
+        ypT(10) = 0.11461900e+02_realType
+        ypT(11) = 0.14481700e+02_realType
+        ypT(12) = 0.18175400e+02_realType
+        ypT(13) = 0.22675200e+02_realType
+        ypT(14) = 0.28135500e+02_realType
+        ypT(15) = 0.34735800e+02_realType
+        ypT(16) = 0.42683800e+02_realType
+        ypT(17) = 0.52219300e+02_realType
+        ypT(18) = 0.63617800e+02_realType
+        ypT(19) = 0.77194900e+02_realType
+        ypT(20) = 0.93310400e+02_realType
+        ypT(21) = 0.11237300e+03_realType
+        ypT(22) = 0.13484800e+03_realType
+        ypT(23) = 0.16125700e+03_realType
+        ypT(24) = 0.19218900e+03_realType
+        ypT(25) = 0.22830600e+03_realType
+        ypT(26) = 0.27034500e+03_realType
+        ypT(27) = 0.31913000e+03_realType
+        ypT(28) = 0.37557400e+03_realType
+        ypT(29) = 0.44069100e+03_realType
+        ypT(30) = 0.51559800e+03_realType
+        ypT(31) = 0.60152700e+03_realType
+        ypT(32) = 0.69982900e+03_realType
+        ypT(33) = 0.81198500e+03_realType
+        ypT(34) = 0.93960800e+03_realType
+
+        ! Set the values of constants for the cubic fits of the
+        ! non-dimensional tangential velocity.
+
+        up0(1) = 0.35397100e+00_realType
+        up0(2) = 0.67079000e+00_realType
+        up0(3) = 0.10761600e+01_realType
+        up0(4) = 0.15922400e+01_realType
+        up0(5) = 0.22457500e+01_realType
+        up0(6) = 0.30671700e+01_realType
+        up0(7) = 0.40830500e+01_realType
+        up0(8) = 0.52881700e+01_realType
+        up0(9) = 0.66016600e+01_realType
+        up0(10) = 0.78884700e+01_realType
+        up0(11) = 0.90588500e+01_realType
+        up0(12) = 0.10096700e+02_realType
+        up0(13) = 0.11019300e+02_realType
+        up0(14) = 0.11849200e+02_realType
+        up0(15) = 0.12605800e+02_realType
+        up0(16) = 0.13304000e+02_realType
+        up0(17) = 0.13955200e+02_realType
+        up0(18) = 0.14568000e+02_realType
+        up0(19) = 0.15148800e+02_realType
+        up0(20) = 0.15702800e+02_realType
+        up0(21) = 0.16233900e+02_realType
+        up0(22) = 0.16745300e+02_realType
+        up0(23) = 0.17239500e+02_realType
+        up0(24) = 0.17718500e+02_realType
+        up0(25) = 0.18184100e+02_realType
+        up0(26) = 0.18637900e+02_realType
+        up0(27) = 0.19081000e+02_realType
+        up0(28) = 0.19514800e+02_realType
+        up0(29) = 0.19940100e+02_realType
+        up0(30) = 0.20358100e+02_realType
+        up0(31) = 0.20769600e+02_realType
+        up0(32) = 0.21175800e+02_realType
+        up0(33) = 0.21577400e+02_realType
+        up0(34) = 0.21975700e+02_realType
+
+        up1(1) = 0.12846958e+01_realType
+        up1(2) = 0.69922936e+00_realType
+        up1(3) = 0.44186548e+00_realType
+        up1(4) = 0.30093680e+00_realType
+        up1(5) = 0.21425046e+00_realType
+        up1(6) = 0.15674045e+00_realType
+        up1(7) = 0.11605614e+00_realType
+        up1(8) = 0.85352379e-01_realType
+        up1(9) = 0.61235043e-01_realType
+        up1(10) = 0.42691639e-01_realType
+        up1(11) = 0.29366174e-01_realType
+        up1(12) = 0.20326381e-01_realType
+        up1(13) = 0.14310141e-01_realType
+        up1(14) = 0.10275905e-01_realType
+        up1(15) = 0.75205965e-02_realType
+        up1(16) = 0.55993913e-02_realType
+        up1(17) = 0.42330072e-02_realType
+        up1(18) = 0.32428406e-02_realType
+        up1(19) = 0.25137042e-02_realType
+        up1(20) = 0.19691199e-02_realType
+        up1(21) = 0.15570310e-02_realType
+        up1(22) = 0.12416035e-02_realType
+        up1(23) = 0.99762939e-03_realType
+        up1(24) = 0.80730082e-03_realType
+        up1(25) = 0.65769508e-03_realType
+        up1(26) = 0.53910966e-03_realType
+        up1(27) = 0.44453711e-03_realType
+        up1(28) = 0.36862857e-03_realType
+        up1(29) = 0.30733926e-03_realType
+        up1(30) = 0.25762621e-03_realType
+        up1(31) = 0.21711632e-03_realType
+        up1(32) = 0.18393679e-03_realType
+        up1(33) = 0.15665505e-03_realType
+        up1(34) = 0.13415441e-03_realType
+
+        up2(1) = -0.10506864e+01_realType
+        up2(2) = -0.17378349e+00_realType
+        up2(3) = -0.43906517e-01_realType
+        up2(4) = -0.13876317e-01_realType
+        up2(5) = -0.50197713e-02_realType
+        up2(6) = -0.20046033e-02_realType
+        up2(7) = -0.91800803e-03_realType
+        up2(8) = -0.53858650e-03_realType
+        up2(9) = -0.37015912e-03_realType
+        up2(10) = -0.23581357e-03_realType
+        up2(11) = -0.13214946e-03_realType
+        up2(12) = -0.69676197e-04_realType
+        up2(13) = -0.36527030e-04_realType
+        up2(14) = -0.19485941e-04_realType
+        up2(15) = -0.10680793e-04_realType
+        up2(16) = -0.60059830e-05_realType
+        up2(17) = -0.34636741e-05_realType
+        up2(18) = -0.20504308e-05_realType
+        up2(19) = -0.12351270e-05_realType
+        up2(20) = -0.76062105e-06_realType
+        up2(21) = -0.47546804e-06_realType
+        up2(22) = -0.30260755e-06_realType
+        up2(23) = -0.19542870e-06_realType
+        up2(24) = -0.12770106e-06_realType
+        up2(25) = -0.84214126e-07_realType
+        up2(26) = -0.56643139e-07_realType
+        up2(27) = -0.38016078e-07_realType
+        up2(28) = -0.26134020e-07_realType
+        up2(29) = -0.17887512e-07_realType
+        up2(30) = -0.12500449e-07_realType
+        up2(31) = -0.86706361e-08_realType
+        up2(32) = -0.61786381e-08_realType
+        up2(33) = -0.43440887e-08_realType
+        up2(34) = -0.31212378e-08_realType
+
+        up3(1) = 0.30603769e+00_realType
+        up3(2) = -0.74623173e-02_realType
+        up3(3) = -0.35137590e-02_realType
+        up3(4) = -0.90241883e-03_realType
+        up3(5) = -0.23666410e-03_realType
+        up3(6) = -0.69336451e-04_realType
+        up3(7) = -0.21717508e-04_realType
+        up3(8) = -0.53427330e-05_realType
+        up3(9) = -0.12162441e-06_realType
+        up3(10) = 0.66537991e-06_realType
+        up3(11) = 0.40127135e-06_realType
+        up3(12) = 0.17307016e-06_realType
+        up3(13) = 0.68595664e-07_realType
+        up3(14) = 0.26859566e-07_realType
+        up3(15) = 0.10802572e-07_realType
+        up3(16) = 0.44423253e-08_realType
+        up3(17) = 0.18757232e-08_realType
+        up3(18) = 0.83595370e-09_realType
+        up3(19) = 0.37334228e-09_realType
+        up3(20) = 0.17567414e-09_realType
+        up3(21) = 0.82933451e-10_realType
+        up3(22) = 0.40989510e-10_realType
+        up3(23) = 0.20935863e-10_realType
+        up3(24) = 0.10846455e-10_realType
+        up3(25) = 0.54661649e-11_realType
+        up3(26) = 0.31700296e-11_realType
+        up3(27) = 0.15722041e-11_realType
+        up3(28) = 0.97074333e-12_realType
+        up3(29) = 0.50475514e-12_realType
+        up3(30) = 0.32254746e-12_realType
+        up3(31) = 0.16247920e-12_realType
+        up3(32) = 0.11432002e-12_realType
+        up3(33) = 0.59121027e-13_realType
+        up3(34) = 0.38726995e-13_realType
+
+        ! Set the values of tuLogFit. Both for k and omega the
+        ! logarithm has been fitted.
+
+        tuLogFit(itu1) = .true.
+        tuLogFit(itu2) = .true.
+
+        ! Set the values of constants for the cubic fits of the
+        ! non-dimensional k and omega values.
+
+        ! Constants for k.
+
+        tup0(1, itu1) = -0.10178274e+02_realType
+        tup0(2, itu1) = -0.79134047e+01_realType
+        tup0(3, itu1) = -0.62154735e+01_realType
+        tup0(4, itu1) = -0.48268972e+01_realType
+        tup0(5, itu1) = -0.36279650e+01_realType
+        tup0(6, itu1) = -0.25597781e+01_realType
+        tup0(7, itu1) = -0.16005079e+01_realType
+        tup0(8, itu1) = -0.76521262e+00_realType
+        tup0(9, itu1) = -0.10076775e+00_realType
+        tup0(10, itu1) = 0.36262719e+00_realType
+        tup0(11, itu1) = 0.65553877e+00_realType
+        tup0(12, itu1) = 0.83590897e+00_realType
+        tup0(13, itu1) = 0.94909088e+00_realType
+        tup0(14, itu1) = 0.10224941e+01_realType
+        tup0(15, itu1) = 0.10717000e+01_realType
+        tup0(16, itu1) = 0.11056409e+01_realType
+        tup0(17, itu1) = 0.11295908e+01_realType
+        tup0(18, itu1) = 0.11467673e+01_realType
+        tup0(19, itu1) = 0.11591867e+01_realType
+        tup0(20, itu1) = 0.11681570e+01_realType
+        tup0(21, itu1) = 0.11745296e+01_realType
+        tup0(22, itu1) = 0.11788734e+01_realType
+        tup0(23, itu1) = 0.11815615e+01_realType
+        tup0(24, itu1) = 0.11828278e+01_realType
+        tup0(25, itu1) = 0.11828094e+01_realType
+        tup0(26, itu1) = 0.11815707e+01_realType
+        tup0(27, itu1) = 0.11791103e+01_realType
+        tup0(28, itu1) = 0.11753665e+01_realType
+        tup0(29, itu1) = 0.11702319e+01_realType
+        tup0(30, itu1) = 0.11635476e+01_realType
+        tup0(31, itu1) = 0.11550903e+01_realType
+        tup0(32, itu1) = 0.11445826e+01_realType
+        tup0(33, itu1) = 0.11316601e+01_realType
+        tup0(34, itu1) = 0.11158659e+01_realType
+
+        ! Constants for omega.
+
+        tup0(1, itu2) = 0.68385895e+01_realType
+        tup0(2, itu2) = 0.55423492e+01_realType
+        tup0(3, itu2) = 0.45364394e+01_realType
+        tup0(4, itu2) = 0.37003435e+01_realType
+        tup0(5, itu2) = 0.29762436e+01_realType
+        tup0(6, itu2) = 0.23400254e+01_realType
+        tup0(7, itu2) = 0.17897909e+01_realType
+        tup0(8, itu2) = 0.13296526e+01_realType
+        tup0(9, itu2) = 0.94313517e+00_realType
+        tup0(10, itu2) = 0.59512633e+00_realType
+        tup0(11, itu2) = 0.26383242e+00_realType
+        tup0(12, itu2) = -0.54289357e-01_realType
+        tup0(13, itu2) = -0.35764684e+00_realType
+        tup0(14, itu2) = -0.64548336e+00_realType
+        tup0(15, itu2) = -0.91832029e+00_realType
+        tup0(16, itu2) = -0.11773601e+01_realType
+        tup0(17, itu2) = -0.14240004e+01_realType
+        tup0(18, itu2) = -0.16596108e+01_realType
+        tup0(19, itu2) = -0.18854088e+01_realType
+        tup0(20, itu2) = -0.21024564e+01_realType
+        tup0(21, itu2) = -0.23116299e+01_realType
+        tup0(22, itu2) = -0.25136741e+01_realType
+        tup0(23, itu2) = -0.27091934e+01_realType
+        tup0(24, itu2) = -0.28986818e+01_realType
+        tup0(25, itu2) = -0.30825349e+01_realType
+        tup0(26, itu2) = -0.32610659e+01_realType
+        tup0(27, itu2) = -0.34345194e+01_realType
+        tup0(28, itu2) = -0.36030725e+01_realType
+        tup0(29, itu2) = -0.37668496e+01_realType
+        tup0(30, itu2) = -0.39259191e+01_realType
+        tup0(31, itu2) = -0.40803056e+01_realType
+        tup0(32, itu2) = -0.42299856e+01_realType
+        tup0(33, itu2) = -0.43749001e+01_realType
+        tup0(34, itu2) = -0.45149548e+01_realType
+
+        ! Constants for k.
+
+        tup1(1, itu1) = 0.10151083e+02_realType
+        tup1(2, itu1) = 0.54871316e+01_realType
+        tup1(3, itu1) = 0.33494093e+01_realType
+        tup1(4, itu1) = 0.22113000e+01_realType
+        tup1(5, itu1) = 0.15331218e+01_realType
+        tup1(6, itu1) = 0.10899838e+01_realType
+        tup1(7, itu1) = 0.77051060e+00_realType
+        tup1(8, itu1) = 0.51657998e+00_realType
+        tup1(9, itu1) = 0.31302624e+00_realType
+        tup1(10, itu1) = 0.16986834e+00_realType
+        tup1(11, itu1) = 0.86387987e-01_realType
+        tup1(12, itu1) = 0.43725644e-01_realType
+        tup1(13, itu1) = 0.22772335e-01_realType
+        tup1(14, itu1) = 0.12310034e-01_realType
+        tup1(15, itu1) = 0.68940825e-02_realType
+        tup1(16, itu1) = 0.39792104e-02_realType
+        tup1(17, itu1) = 0.23523017e-02_realType
+        tup1(18, itu1) = 0.14137727e-02_realType
+        tup1(19, itu1) = 0.85642296e-03_realType
+        tup1(20, itu1) = 0.51672343e-03_realType
+        tup1(21, itu1) = 0.30463346e-03_realType
+        tup1(22, itu1) = 0.16929149e-03_realType
+        tup1(23, itu1) = 0.80893185e-04_realType
+        tup1(24, itu1) = 0.21762685e-04_realType
+        tup1(25, itu1) = -0.18748602e-04_realType
+        tup1(26, itu1) = -0.47330395e-04_realType
+        tup1(27, itu1) = -0.68310393e-04_realType
+        tup1(28, itu1) = -0.84371684e-04_realType
+        tup1(29, itu1) = -0.97226185e-04_realType
+        tup1(30, itu1) = -0.10813606e-03_realType
+        tup1(31, itu1) = -0.11791513e-03_realType
+        tup1(32, itu1) = -0.12717807e-03_realType
+        tup1(33, itu1) = -0.13644855e-03_realType
+        tup1(34, itu1) = -0.14611996e-03_realType
+
+        ! Constants for omega.
+
+        tup1(1, itu2) = -0.55838269e+01_realType
+        tup1(2, itu2) = -0.31876950e+01_realType
+        tup1(3, itu2) = -0.19989037e+01_realType
+        tup1(4, itu2) = -0.13333526e+01_realType
+        tup1(5, itu2) = -0.91990459e+00_realType
+        tup1(6, itu2) = -0.63785038e+00_realType
+        tup1(7, itu2) = -0.43381141e+00_realType
+        tup1(8, itu2) = -0.29162744e+00_realType
+        tup1(9, itu2) = -0.20386405e+00_realType
+        tup1(10, itu2) = -0.15257310e+00_realType
+        tup1(11, itu2) = -0.11853766e+00_realType
+        tup1(12, itu2) = -0.92571574e-01_realType
+        tup1(13, itu2) = -0.72154025e-01_realType
+        tup1(14, itu2) = -0.56291949e-01_realType
+        tup1(15, itu2) = -0.44100353e-01_realType
+        tup1(16, itu2) = -0.34758707e-01_realType
+        tup1(17, itu2) = -0.27583190e-01_realType
+        tup1(18, itu2) = -0.22041103e-01_realType
+        tup1(19, itu2) = -0.17731129e-01_realType
+        tup1(20, itu2) = -0.14354453e-01_realType
+        tup1(21, itu2) = -0.11689595e-01_realType
+        tup1(22, itu2) = -0.95711712e-02_realType
+        tup1(23, itu2) = -0.78759450e-02_realType
+        tup1(24, itu2) = -0.65109013e-02_realType
+        tup1(25, itu2) = -0.54047659e-02_realType
+        tup1(26, itu2) = -0.45036146e-02_realType
+        tup1(27, itu2) = -0.37655964e-02_realType
+        tup1(28, itu2) = -0.31581617e-02_realType
+        tup1(29, itu2) = -0.26558406e-02_realType
+        tup1(30, itu2) = -0.22385872e-02_realType
+        tup1(31, itu2) = -0.18905375e-02_realType
+        tup1(32, itu2) = -0.15990497e-02_realType
+        tup1(33, itu2) = -0.13540430e-02_realType
+        tup1(34, itu2) = -0.11473506e-02_realType
+
+        ! Constants for k.
+
+        tup2(1, itu1) = -0.13708334e+02_realType
+        tup2(2, itu1) = -0.43370192e+01_realType
+        tup2(3, itu1) = -0.16256260e+01_realType
+        tup2(4, itu1) = -0.69729756e+00_realType
+        tup2(5, itu1) = -0.32831484e+00_realType
+        tup2(6, itu1) = -0.16501608e+00_realType
+        tup2(7, itu1) = -0.93271929e-01_realType
+        tup2(8, itu1) = -0.66905532e-01_realType
+        tup2(9, itu1) = -0.49449221e-01_realType
+        tup2(10, itu1) = -0.27955265e-01_realType
+        tup2(11, itu1) = -0.12356466e-01_realType
+        tup2(12, itu1) = -0.49538360e-02_realType
+        tup2(13, itu1) = -0.19816552e-02_realType
+        tup2(14, itu1) = -0.82035726e-03_realType
+        tup2(15, itu1) = -0.35459496e-03_realType
+        tup2(16, itu1) = -0.15988154e-03_realType
+        tup2(17, itu1) = -0.74920111e-04_realType
+        tup2(18, itu1) = -0.36432844e-04_realType
+        tup2(19, itu1) = -0.18228517e-04_realType
+        tup2(20, itu1) = -0.94187249e-05_realType
+        tup2(21, itu1) = -0.49803455e-05_realType
+        tup2(22, itu1) = -0.26991628e-05_realType
+        tup2(23, itu1) = -0.15033797e-05_realType
+        tup2(24, itu1) = -0.85865308e-06_realType
+        tup2(25, itu1) = -0.50010225e-06_realType
+        tup2(26, itu1) = -0.30003597e-06_realType
+        tup2(27, itu1) = -0.18914449e-06_realType
+        tup2(28, itu1) = -0.12284699e-06_realType
+        tup2(29, itu1) = -0.82382282e-07_realType
+        tup2(30, itu1) = -0.60415725e-07_realType
+        tup2(31, itu1) = -0.44704797e-07_realType
+        tup2(32, itu1) = -0.36272580e-07_realType
+        tup2(33, itu1) = -0.30797090e-07_realType
+        tup2(34, itu1) = -0.27066472e-07_realType
+
+        ! Constants for omega.
+
+        tup2(1, itu2) = 0.65688815e+01_realType
+        tup2(2, itu2) = 0.22942977e+01_realType
+        tup2(3, itu2) = 0.91326107e+00_realType
+        tup2(4, itu2) = 0.40527609e+00_realType
+        tup2(5, itu2) = 0.19819770e+00_realType
+        tup2(6, itu2) = 0.11119507e+00_realType
+        tup2(7, itu2) = 0.71308510e-01_realType
+        tup2(8, itu2) = 0.41419218e-01_realType
+        tup2(9, itu2) = 0.18358706e-01_realType
+        tup2(10, itu2) = 0.79158389e-02_realType
+        tup2(11, itu2) = 0.45072385e-02_realType
+        tup2(12, itu2) = 0.29542525e-02_realType
+        tup2(13, itu2) = 0.19335211e-02_realType
+        tup2(14, itu2) = 0.12420728e-02_realType
+        tup2(15, itu2) = 0.79078278e-03_realType
+        tup2(16, itu2) = 0.50394724e-03_realType
+        tup2(17, itu2) = 0.32312896e-03_realType
+        tup2(18, itu2) = 0.20923598e-03_realType
+        tup2(19, itu2) = 0.13683514e-03_realType
+        tup2(20, itu2) = 0.90568628e-04_realType
+        tup2(21, itu2) = 0.60506139e-04_realType
+        tup2(22, itu2) = 0.40936800e-04_realType
+        tup2(23, itu2) = 0.27920509e-04_realType
+        tup2(24, itu2) = 0.19242656e-04_realType
+        tup2(25, itu2) = 0.13394215e-04_realType
+        tup2(26, itu2) = 0.93908861e-05_realType
+        tup2(27, itu2) = 0.66475834e-05_realType
+        tup2(28, itu2) = 0.47374937e-05_realType
+        tup2(29, itu2) = 0.34060533e-05_realType
+        tup2(30, itu2) = 0.24642067e-05_realType
+        tup2(31, itu2) = 0.17969941e-05_realType
+        tup2(32, itu2) = 0.13185213e-05_realType
+        tup2(33, itu2) = 0.97355039e-06_realType
+        tup2(34, itu2) = 0.72438303e-06_realType
+
+        ! Constants for k.
+
+        tup3(1, itu1) = 0.13357255e+02_realType
+        tup3(2, itu1) = 0.27962653e+01_realType
+        tup3(3, itu1) = 0.67564983e+00_realType
+        tup3(4, itu1) = 0.18227593e+00_realType
+        tup3(5, itu1) = 0.48230769e-01_realType
+        tup3(6, itu1) = 0.69085885e-02_realType
+        tup3(7, itu1) = -0.25075980e-02_realType
+        tup3(8, itu1) = 0.15198665e-02_realType
+        tup3(9, itu1) = 0.45292571e-02_realType
+        tup3(10, itu1) = 0.29768816e-02_realType
+        tup3(11, itu1) = 0.11684431e-02_realType
+        tup3(12, itu1) = 0.38217835e-03_realType
+        tup3(13, itu1) = 0.12135735e-03_realType
+        tup3(14, itu1) = 0.39609332e-04_realType
+        tup3(15, itu1) = 0.13512654e-04_realType
+        tup3(16, itu1) = 0.48259094e-05_realType
+        tup3(17, itu1) = 0.17973362e-05_realType
+        tup3(18, itu1) = 0.70093792e-06_realType
+        tup3(19, itu1) = 0.28079141e-06_realType
+        tup3(20, itu1) = 0.11741966e-06_realType
+        tup3(21, itu1) = 0.50025034e-07_realType
+        tup3(22, itu1) = 0.21729950e-07_realType
+        tup3(23, itu1) = 0.96902710e-08_realType
+        tup3(24, itu1) = 0.43926199e-08_realType
+        tup3(25, itu1) = 0.19274189e-08_realType
+        tup3(26, itu1) = 0.80093554e-09_realType
+        tup3(27, itu1) = 0.33523262e-09_realType
+        tup3(28, itu1) = 0.10603408e-09_realType
+        tup3(29, itu1) = -0.14220954e-10_realType
+        tup3(30, itu1) = -0.43245103e-10_realType
+        tup3(31, itu1) = -0.71330147e-10_realType
+        tup3(32, itu1) = -0.73789519e-10_realType
+        tup3(33, itu1) = -0.73223977e-10_realType
+        tup3(34, itu1) = -0.73679919e-10_realType
+
+        ! Constants for omega.
+
+        tup3(1, itu2) = -0.58652639e+01_realType
+        tup3(2, itu2) = -0.13617293e+01_realType
+        tup3(3, itu2) = -0.34682427e+00_realType
+        tup3(4, itu2) = -0.90911692e-01_realType
+        tup3(5, itu2) = -0.21991054e-01_realType
+        tup3(6, itu2) = -0.81494825e-02_realType
+        tup3(7, itu2) = -0.84291839e-02_realType
+        tup3(8, itu2) = -0.58630201e-02_realType
+        tup3(9, itu2) = -0.18374198e-02_realType
+        tup3(10, itu2) = -0.26966903e-03_realType
+        tup3(11, itu2) = -0.45904311e-04_realType
+        tup3(12, itu2) = -0.34368125e-04_realType
+        tup3(13, itu2) = -0.25332953e-04_realType
+        tup3(14, itu2) = -0.15345625e-04_realType
+        tup3(15, itu2) = -0.83950168e-05_realType
+        tup3(16, itu2) = -0.44072558e-05_realType
+        tup3(17, itu2) = -0.22740368e-05_realType
+        tup3(18, itu2) = -0.11801059e-05_realType
+        tup3(19, itu2) = -0.61295741e-06_realType
+        tup3(20, itu2) = -0.32633745e-06_realType
+        tup3(21, itu2) = -0.17280693e-06_realType
+        tup3(22, itu2) = -0.95608569e-07_realType
+        tup3(23, itu2) = -0.52411878e-07_realType
+        tup3(24, itu2) = -0.29366409e-07_realType
+        tup3(25, itu2) = -0.16959125e-07_realType
+        tup3(26, itu2) = -0.97228139e-08_realType
+        tup3(27, itu2) = -0.57661683e-08_realType
+        tup3(28, itu2) = -0.33988144e-08_realType
+        tup3(29, itu2) = -0.20698972e-08_realType
+        tup3(30, itu2) = -0.12548468e-08_realType
+        tup3(31, itu2) = -0.78279227e-09_realType
+        tup3(32, itu2) = -0.49051180e-09_realType
+        tup3(33, itu2) = -0.30968731e-09_realType
+        tup3(34, itu2) = -0.20495795e-09_realType
+
+    end subroutine initCurveFitDataKw
+
+    subroutine initCurveFitDataKwMod
+        !
+        !       initCurveFitDataKwMod contains the curve fit constants
+        !       for the wall function data for the modified k-omega turbulence
+        !       model.
+        !
+        use flowVarRefState
+        use paramTurb
+        implicit none
+        !
+        !      Local variables.
+        !
+        ! integer :: ierr
+
+        call terminate("initCurveFitDataKwMod", &
+                       "Not implemented yet")
+
+    end subroutine initCurveFitDataKwMod
+
+    subroutine initCurveFitDataSST
+        !
+        !       initCurveFitDataSST contains the curve fit constants for
+        !       the wall function data for Menter's SST turbulence model.
+        !       Warning: Wall function data developed for k-omega model
+        !
+        use constants
+        use flowVarRefState
+        use paramTurb
+
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        ! Set the number of data points and allocate the memory for the
+        ! arrays of the curve fits.
+
+        nFit = 34
+
+        allocate (ypT(0:nFit), reT(0:nFit), &
+                  up0(nFit), up1(nFit), up2(nFit), up3(nFit), &
+                  tup0(nFit, nt1:nt2), tup1(nFit, nt1:nt2), &
+                  tup2(nFit, nt1:nt2), tup3(nFit, nt1:nt2), &
+                  tuLogFit(nt1:nt2), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("initCurveFitDataSST", &
+             "Memory allocation failure for curve fit &
+             &coefficients")
+
+        ! Set the values of the Reynolds numbers at interval boundaries.
+
+        reT(0) = 0.12529547e+00_realType
+        reT(1) = 0.44996057e+00_realType
+        reT(2) = 0.11581311e+01_realType
+        reT(3) = 0.25353238e+01_realType
+        reT(4) = 0.50446282e+01_realType
+        reT(5) = 0.94194631e+01_realType
+        reT(6) = 0.16766555e+02_realType
+        reT(7) = 0.28556753e+02_realType
+        reT(8) = 0.46274930e+02_realType
+        reT(9) = 0.71021000e+02_realType
+        reT(10) = 0.10383163e+03_realType
+        reT(11) = 0.14621738e+03_realType
+        reT(12) = 0.20028019e+03_realType
+        reT(13) = 0.26868298e+03_realType
+        reT(14) = 0.35467049e+03_realType
+        reT(15) = 0.46212508e+03_realType
+        reT(16) = 0.59566097e+03_realType
+        reT(17) = 0.76073076e+03_realType
+        reT(18) = 0.96373333e+03_realType
+        reT(19) = 0.12121761e+04_realType
+        reT(20) = 0.15147917e+04_realType
+        reT(21) = 0.18817196e+04_realType
+        reT(22) = 0.23247121e+04_realType
+        reT(23) = 0.28572322e+04_realType
+        reT(24) = 0.34947840e+04_realType
+        reT(25) = 0.42551444e+04_realType
+        reT(26) = 0.51584529e+04_realType
+        reT(27) = 0.62277581e+04_realType
+        reT(28) = 0.74889831e+04_realType
+        reT(29) = 0.89716314e+04_realType
+        reT(30) = 0.10708764e+05_realType
+        reT(31) = 0.12737815e+05_realType
+        reT(32) = 0.15100490e+05_realType
+        reT(33) = 0.17843939e+05_realType
+        reT(34) = 0.21020534e+05_realType
+
+        ! Set the values of the y+ values at interval boundaries.
+
+        ypT(0) = 0.35397100e+00_realType
+        ypT(1) = 0.67079200e+00_realType
+        ypT(2) = 0.10761700e+01_realType
+        ypT(3) = 0.15923000e+01_realType
+        ypT(4) = 0.22463000e+01_realType
+        ypT(5) = 0.30710600e+01_realType
+        ypT(6) = 0.41063800e+01_realType
+        ypT(7) = 0.54001200e+01_realType
+        ypT(8) = 0.70095900e+01_realType
+        ypT(9) = 0.90031400e+01_realType
+        ypT(10) = 0.11461900e+02_realType
+        ypT(11) = 0.14481700e+02_realType
+        ypT(12) = 0.18175400e+02_realType
+        ypT(13) = 0.22675200e+02_realType
+        ypT(14) = 0.28135500e+02_realType
+        ypT(15) = 0.34735800e+02_realType
+        ypT(16) = 0.42683800e+02_realType
+        ypT(17) = 0.52219300e+02_realType
+        ypT(18) = 0.63617800e+02_realType
+        ypT(19) = 0.77194900e+02_realType
+        ypT(20) = 0.93310400e+02_realType
+        ypT(21) = 0.11237300e+03_realType
+        ypT(22) = 0.13484800e+03_realType
+        ypT(23) = 0.16125700e+03_realType
+        ypT(24) = 0.19218900e+03_realType
+        ypT(25) = 0.22830600e+03_realType
+        ypT(26) = 0.27034500e+03_realType
+        ypT(27) = 0.31913000e+03_realType
+        ypT(28) = 0.37557400e+03_realType
+        ypT(29) = 0.44069100e+03_realType
+        ypT(30) = 0.51559800e+03_realType
+        ypT(31) = 0.60152700e+03_realType
+        ypT(32) = 0.69982900e+03_realType
+        ypT(33) = 0.81198500e+03_realType
+        ypT(34) = 0.93960800e+03_realType
+
+        ! Set the values of constants for the cubic fits of the
+        ! non-dimensional tangential velocity.
+
+        up0(1) = 0.35397100e+00_realType
+        up0(2) = 0.67079000e+00_realType
+        up0(3) = 0.10761600e+01_realType
+        up0(4) = 0.15922400e+01_realType
+        up0(5) = 0.22457500e+01_realType
+        up0(6) = 0.30671700e+01_realType
+        up0(7) = 0.40830500e+01_realType
+        up0(8) = 0.52881700e+01_realType
+        up0(9) = 0.66016600e+01_realType
+        up0(10) = 0.78884700e+01_realType
+        up0(11) = 0.90588500e+01_realType
+        up0(12) = 0.10096700e+02_realType
+        up0(13) = 0.11019300e+02_realType
+        up0(14) = 0.11849200e+02_realType
+        up0(15) = 0.12605800e+02_realType
+        up0(16) = 0.13304000e+02_realType
+        up0(17) = 0.13955200e+02_realType
+        up0(18) = 0.14568000e+02_realType
+        up0(19) = 0.15148800e+02_realType
+        up0(20) = 0.15702800e+02_realType
+        up0(21) = 0.16233900e+02_realType
+        up0(22) = 0.16745300e+02_realType
+        up0(23) = 0.17239500e+02_realType
+        up0(24) = 0.17718500e+02_realType
+        up0(25) = 0.18184100e+02_realType
+        up0(26) = 0.18637900e+02_realType
+        up0(27) = 0.19081000e+02_realType
+        up0(28) = 0.19514800e+02_realType
+        up0(29) = 0.19940100e+02_realType
+        up0(30) = 0.20358100e+02_realType
+        up0(31) = 0.20769600e+02_realType
+        up0(32) = 0.21175800e+02_realType
+        up0(33) = 0.21577400e+02_realType
+        up0(34) = 0.21975700e+02_realType
+
+        up1(1) = 0.12846958e+01_realType
+        up1(2) = 0.69922936e+00_realType
+        up1(3) = 0.44186548e+00_realType
+        up1(4) = 0.30093680e+00_realType
+        up1(5) = 0.21425046e+00_realType
+        up1(6) = 0.15674045e+00_realType
+        up1(7) = 0.11605614e+00_realType
+        up1(8) = 0.85352379e-01_realType
+        up1(9) = 0.61235043e-01_realType
+        up1(10) = 0.42691639e-01_realType
+        up1(11) = 0.29366174e-01_realType
+        up1(12) = 0.20326381e-01_realType
+        up1(13) = 0.14310141e-01_realType
+        up1(14) = 0.10275905e-01_realType
+        up1(15) = 0.75205965e-02_realType
+        up1(16) = 0.55993913e-02_realType
+        up1(17) = 0.42330072e-02_realType
+        up1(18) = 0.32428406e-02_realType
+        up1(19) = 0.25137042e-02_realType
+        up1(20) = 0.19691199e-02_realType
+        up1(21) = 0.15570310e-02_realType
+        up1(22) = 0.12416035e-02_realType
+        up1(23) = 0.99762939e-03_realType
+        up1(24) = 0.80730082e-03_realType
+        up1(25) = 0.65769508e-03_realType
+        up1(26) = 0.53910966e-03_realType
+        up1(27) = 0.44453711e-03_realType
+        up1(28) = 0.36862857e-03_realType
+        up1(29) = 0.30733926e-03_realType
+        up1(30) = 0.25762621e-03_realType
+        up1(31) = 0.21711632e-03_realType
+        up1(32) = 0.18393679e-03_realType
+        up1(33) = 0.15665505e-03_realType
+        up1(34) = 0.13415441e-03_realType
+
+        up2(1) = -0.10506864e+01_realType
+        up2(2) = -0.17378349e+00_realType
+        up2(3) = -0.43906517e-01_realType
+        up2(4) = -0.13876317e-01_realType
+        up2(5) = -0.50197713e-02_realType
+        up2(6) = -0.20046033e-02_realType
+        up2(7) = -0.91800803e-03_realType
+        up2(8) = -0.53858650e-03_realType
+        up2(9) = -0.37015912e-03_realType
+        up2(10) = -0.23581357e-03_realType
+        up2(11) = -0.13214946e-03_realType
+        up2(12) = -0.69676197e-04_realType
+        up2(13) = -0.36527030e-04_realType
+        up2(14) = -0.19485941e-04_realType
+        up2(15) = -0.10680793e-04_realType
+        up2(16) = -0.60059830e-05_realType
+        up2(17) = -0.34636741e-05_realType
+        up2(18) = -0.20504308e-05_realType
+        up2(19) = -0.12351270e-05_realType
+        up2(20) = -0.76062105e-06_realType
+        up2(21) = -0.47546804e-06_realType
+        up2(22) = -0.30260755e-06_realType
+        up2(23) = -0.19542870e-06_realType
+        up2(24) = -0.12770106e-06_realType
+        up2(25) = -0.84214126e-07_realType
+        up2(26) = -0.56643139e-07_realType
+        up2(27) = -0.38016078e-07_realType
+        up2(28) = -0.26134020e-07_realType
+        up2(29) = -0.17887512e-07_realType
+        up2(30) = -0.12500449e-07_realType
+        up2(31) = -0.86706361e-08_realType
+        up2(32) = -0.61786381e-08_realType
+        up2(33) = -0.43440887e-08_realType
+        up2(34) = -0.31212378e-08_realType
+
+        up3(1) = 0.30603769e+00_realType
+        up3(2) = -0.74623173e-02_realType
+        up3(3) = -0.35137590e-02_realType
+        up3(4) = -0.90241883e-03_realType
+        up3(5) = -0.23666410e-03_realType
+        up3(6) = -0.69336451e-04_realType
+        up3(7) = -0.21717508e-04_realType
+        up3(8) = -0.53427330e-05_realType
+        up3(9) = -0.12162441e-06_realType
+        up3(10) = 0.66537991e-06_realType
+        up3(11) = 0.40127135e-06_realType
+        up3(12) = 0.17307016e-06_realType
+        up3(13) = 0.68595664e-07_realType
+        up3(14) = 0.26859566e-07_realType
+        up3(15) = 0.10802572e-07_realType
+        up3(16) = 0.44423253e-08_realType
+        up3(17) = 0.18757232e-08_realType
+        up3(18) = 0.83595370e-09_realType
+        up3(19) = 0.37334228e-09_realType
+        up3(20) = 0.17567414e-09_realType
+        up3(21) = 0.82933451e-10_realType
+        up3(22) = 0.40989510e-10_realType
+        up3(23) = 0.20935863e-10_realType
+        up3(24) = 0.10846455e-10_realType
+        up3(25) = 0.54661649e-11_realType
+        up3(26) = 0.31700296e-11_realType
+        up3(27) = 0.15722041e-11_realType
+        up3(28) = 0.97074333e-12_realType
+        up3(29) = 0.50475514e-12_realType
+        up3(30) = 0.32254746e-12_realType
+        up3(31) = 0.16247920e-12_realType
+        up3(32) = 0.11432002e-12_realType
+        up3(33) = 0.59121027e-13_realType
+        up3(34) = 0.38726995e-13_realType
+
+        ! Set the values of tuLogFit. Both for k and omega the
+        ! logarithm has been fitted.
+
+        tuLogFit(itu1) = .true.
+        tuLogFit(itu2) = .true.
+
+        ! Set the values of constants for the cubic fits of the
+        ! non-dimensional k and omega values.
+
+        ! Constants for k.
+
+        tup0(1, itu1) = -0.10178274e+02_realType
+        tup0(2, itu1) = -0.79134047e+01_realType
+        tup0(3, itu1) = -0.62154735e+01_realType
+        tup0(4, itu1) = -0.48268972e+01_realType
+        tup0(5, itu1) = -0.36279650e+01_realType
+        tup0(6, itu1) = -0.25597781e+01_realType
+        tup0(7, itu1) = -0.16005079e+01_realType
+        tup0(8, itu1) = -0.76521262e+00_realType
+        tup0(9, itu1) = -0.10076775e+00_realType
+        tup0(10, itu1) = 0.36262719e+00_realType
+        tup0(11, itu1) = 0.65553877e+00_realType
+        tup0(12, itu1) = 0.83590897e+00_realType
+        tup0(13, itu1) = 0.94909088e+00_realType
+        tup0(14, itu1) = 0.10224941e+01_realType
+        tup0(15, itu1) = 0.10717000e+01_realType
+        tup0(16, itu1) = 0.11056409e+01_realType
+        tup0(17, itu1) = 0.11295908e+01_realType
+        tup0(18, itu1) = 0.11467673e+01_realType
+        tup0(19, itu1) = 0.11591867e+01_realType
+        tup0(20, itu1) = 0.11681570e+01_realType
+        tup0(21, itu1) = 0.11745296e+01_realType
+        tup0(22, itu1) = 0.11788734e+01_realType
+        tup0(23, itu1) = 0.11815615e+01_realType
+        tup0(24, itu1) = 0.11828278e+01_realType
+        tup0(25, itu1) = 0.11828094e+01_realType
+        tup0(26, itu1) = 0.11815707e+01_realType
+        tup0(27, itu1) = 0.11791103e+01_realType
+        tup0(28, itu1) = 0.11753665e+01_realType
+        tup0(29, itu1) = 0.11702319e+01_realType
+        tup0(30, itu1) = 0.11635476e+01_realType
+        tup0(31, itu1) = 0.11550903e+01_realType
+        tup0(32, itu1) = 0.11445826e+01_realType
+        tup0(33, itu1) = 0.11316601e+01_realType
+        tup0(34, itu1) = 0.11158659e+01_realType
+
+        ! Constants for omega.
+
+        tup0(1, itu2) = 0.68385895e+01_realType
+        tup0(2, itu2) = 0.55423492e+01_realType
+        tup0(3, itu2) = 0.45364394e+01_realType
+        tup0(4, itu2) = 0.37003435e+01_realType
+        tup0(5, itu2) = 0.29762436e+01_realType
+        tup0(6, itu2) = 0.23400254e+01_realType
+        tup0(7, itu2) = 0.17897909e+01_realType
+        tup0(8, itu2) = 0.13296526e+01_realType
+        tup0(9, itu2) = 0.94313517e+00_realType
+        tup0(10, itu2) = 0.59512633e+00_realType
+        tup0(11, itu2) = 0.26383242e+00_realType
+        tup0(12, itu2) = -0.54289357e-01_realType
+        tup0(13, itu2) = -0.35764684e+00_realType
+        tup0(14, itu2) = -0.64548336e+00_realType
+        tup0(15, itu2) = -0.91832029e+00_realType
+        tup0(16, itu2) = -0.11773601e+01_realType
+        tup0(17, itu2) = -0.14240004e+01_realType
+        tup0(18, itu2) = -0.16596108e+01_realType
+        tup0(19, itu2) = -0.18854088e+01_realType
+        tup0(20, itu2) = -0.21024564e+01_realType
+        tup0(21, itu2) = -0.23116299e+01_realType
+        tup0(22, itu2) = -0.25136741e+01_realType
+        tup0(23, itu2) = -0.27091934e+01_realType
+        tup0(24, itu2) = -0.28986818e+01_realType
+        tup0(25, itu2) = -0.30825349e+01_realType
+        tup0(26, itu2) = -0.32610659e+01_realType
+        tup0(27, itu2) = -0.34345194e+01_realType
+        tup0(28, itu2) = -0.36030725e+01_realType
+        tup0(29, itu2) = -0.37668496e+01_realType
+        tup0(30, itu2) = -0.39259191e+01_realType
+        tup0(31, itu2) = -0.40803056e+01_realType
+        tup0(32, itu2) = -0.42299856e+01_realType
+        tup0(33, itu2) = -0.43749001e+01_realType
+        tup0(34, itu2) = -0.45149548e+01_realType
+
+        ! Constants for k.
+
+        tup1(1, itu1) = 0.10151083e+02_realType
+        tup1(2, itu1) = 0.54871316e+01_realType
+        tup1(3, itu1) = 0.33494093e+01_realType
+        tup1(4, itu1) = 0.22113000e+01_realType
+        tup1(5, itu1) = 0.15331218e+01_realType
+        tup1(6, itu1) = 0.10899838e+01_realType
+        tup1(7, itu1) = 0.77051060e+00_realType
+        tup1(8, itu1) = 0.51657998e+00_realType
+        tup1(9, itu1) = 0.31302624e+00_realType
+        tup1(10, itu1) = 0.16986834e+00_realType
+        tup1(11, itu1) = 0.86387987e-01_realType
+        tup1(12, itu1) = 0.43725644e-01_realType
+        tup1(13, itu1) = 0.22772335e-01_realType
+        tup1(14, itu1) = 0.12310034e-01_realType
+        tup1(15, itu1) = 0.68940825e-02_realType
+        tup1(16, itu1) = 0.39792104e-02_realType
+        tup1(17, itu1) = 0.23523017e-02_realType
+        tup1(18, itu1) = 0.14137727e-02_realType
+        tup1(19, itu1) = 0.85642296e-03_realType
+        tup1(20, itu1) = 0.51672343e-03_realType
+        tup1(21, itu1) = 0.30463346e-03_realType
+        tup1(22, itu1) = 0.16929149e-03_realType
+        tup1(23, itu1) = 0.80893185e-04_realType
+        tup1(24, itu1) = 0.21762685e-04_realType
+        tup1(25, itu1) = -0.18748602e-04_realType
+        tup1(26, itu1) = -0.47330395e-04_realType
+        tup1(27, itu1) = -0.68310393e-04_realType
+        tup1(28, itu1) = -0.84371684e-04_realType
+        tup1(29, itu1) = -0.97226185e-04_realType
+        tup1(30, itu1) = -0.10813606e-03_realType
+        tup1(31, itu1) = -0.11791513e-03_realType
+        tup1(32, itu1) = -0.12717807e-03_realType
+        tup1(33, itu1) = -0.13644855e-03_realType
+        tup1(34, itu1) = -0.14611996e-03_realType
+
+        ! Constants for omega.
+
+        tup1(1, itu2) = -0.55838269e+01_realType
+        tup1(2, itu2) = -0.31876950e+01_realType
+        tup1(3, itu2) = -0.19989037e+01_realType
+        tup1(4, itu2) = -0.13333526e+01_realType
+        tup1(5, itu2) = -0.91990459e+00_realType
+        tup1(6, itu2) = -0.63785038e+00_realType
+        tup1(7, itu2) = -0.43381141e+00_realType
+        tup1(8, itu2) = -0.29162744e+00_realType
+        tup1(9, itu2) = -0.20386405e+00_realType
+        tup1(10, itu2) = -0.15257310e+00_realType
+        tup1(11, itu2) = -0.11853766e+00_realType
+        tup1(12, itu2) = -0.92571574e-01_realType
+        tup1(13, itu2) = -0.72154025e-01_realType
+        tup1(14, itu2) = -0.56291949e-01_realType
+        tup1(15, itu2) = -0.44100353e-01_realType
+        tup1(16, itu2) = -0.34758707e-01_realType
+        tup1(17, itu2) = -0.27583190e-01_realType
+        tup1(18, itu2) = -0.22041103e-01_realType
+        tup1(19, itu2) = -0.17731129e-01_realType
+        tup1(20, itu2) = -0.14354453e-01_realType
+        tup1(21, itu2) = -0.11689595e-01_realType
+        tup1(22, itu2) = -0.95711712e-02_realType
+        tup1(23, itu2) = -0.78759450e-02_realType
+        tup1(24, itu2) = -0.65109013e-02_realType
+        tup1(25, itu2) = -0.54047659e-02_realType
+        tup1(26, itu2) = -0.45036146e-02_realType
+        tup1(27, itu2) = -0.37655964e-02_realType
+        tup1(28, itu2) = -0.31581617e-02_realType
+        tup1(29, itu2) = -0.26558406e-02_realType
+        tup1(30, itu2) = -0.22385872e-02_realType
+        tup1(31, itu2) = -0.18905375e-02_realType
+        tup1(32, itu2) = -0.15990497e-02_realType
+        tup1(33, itu2) = -0.13540430e-02_realType
+        tup1(34, itu2) = -0.11473506e-02_realType
+
+        ! Constants for k.
+
+        tup2(1, itu1) = -0.13708334e+02_realType
+        tup2(2, itu1) = -0.43370192e+01_realType
+        tup2(3, itu1) = -0.16256260e+01_realType
+        tup2(4, itu1) = -0.69729756e+00_realType
+        tup2(5, itu1) = -0.32831484e+00_realType
+        tup2(6, itu1) = -0.16501608e+00_realType
+        tup2(7, itu1) = -0.93271929e-01_realType
+        tup2(8, itu1) = -0.66905532e-01_realType
+        tup2(9, itu1) = -0.49449221e-01_realType
+        tup2(10, itu1) = -0.27955265e-01_realType
+        tup2(11, itu1) = -0.12356466e-01_realType
+        tup2(12, itu1) = -0.49538360e-02_realType
+        tup2(13, itu1) = -0.19816552e-02_realType
+        tup2(14, itu1) = -0.82035726e-03_realType
+        tup2(15, itu1) = -0.35459496e-03_realType
+        tup2(16, itu1) = -0.15988154e-03_realType
+        tup2(17, itu1) = -0.74920111e-04_realType
+        tup2(18, itu1) = -0.36432844e-04_realType
+        tup2(19, itu1) = -0.18228517e-04_realType
+        tup2(20, itu1) = -0.94187249e-05_realType
+        tup2(21, itu1) = -0.49803455e-05_realType
+        tup2(22, itu1) = -0.26991628e-05_realType
+        tup2(23, itu1) = -0.15033797e-05_realType
+        tup2(24, itu1) = -0.85865308e-06_realType
+        tup2(25, itu1) = -0.50010225e-06_realType
+        tup2(26, itu1) = -0.30003597e-06_realType
+        tup2(27, itu1) = -0.18914449e-06_realType
+        tup2(28, itu1) = -0.12284699e-06_realType
+        tup2(29, itu1) = -0.82382282e-07_realType
+        tup2(30, itu1) = -0.60415725e-07_realType
+        tup2(31, itu1) = -0.44704797e-07_realType
+        tup2(32, itu1) = -0.36272580e-07_realType
+        tup2(33, itu1) = -0.30797090e-07_realType
+        tup2(34, itu1) = -0.27066472e-07_realType
+
+        ! Constants for omega.
+
+        tup2(1, itu2) = 0.65688815e+01_realType
+        tup2(2, itu2) = 0.22942977e+01_realType
+        tup2(3, itu2) = 0.91326107e+00_realType
+        tup2(4, itu2) = 0.40527609e+00_realType
+        tup2(5, itu2) = 0.19819770e+00_realType
+        tup2(6, itu2) = 0.11119507e+00_realType
+        tup2(7, itu2) = 0.71308510e-01_realType
+        tup2(8, itu2) = 0.41419218e-01_realType
+        tup2(9, itu2) = 0.18358706e-01_realType
+        tup2(10, itu2) = 0.79158389e-02_realType
+        tup2(11, itu2) = 0.45072385e-02_realType
+        tup2(12, itu2) = 0.29542525e-02_realType
+        tup2(13, itu2) = 0.19335211e-02_realType
+        tup2(14, itu2) = 0.12420728e-02_realType
+        tup2(15, itu2) = 0.79078278e-03_realType
+        tup2(16, itu2) = 0.50394724e-03_realType
+        tup2(17, itu2) = 0.32312896e-03_realType
+        tup2(18, itu2) = 0.20923598e-03_realType
+        tup2(19, itu2) = 0.13683514e-03_realType
+        tup2(20, itu2) = 0.90568628e-04_realType
+        tup2(21, itu2) = 0.60506139e-04_realType
+        tup2(22, itu2) = 0.40936800e-04_realType
+        tup2(23, itu2) = 0.27920509e-04_realType
+        tup2(24, itu2) = 0.19242656e-04_realType
+        tup2(25, itu2) = 0.13394215e-04_realType
+        tup2(26, itu2) = 0.93908861e-05_realType
+        tup2(27, itu2) = 0.66475834e-05_realType
+        tup2(28, itu2) = 0.47374937e-05_realType
+        tup2(29, itu2) = 0.34060533e-05_realType
+        tup2(30, itu2) = 0.24642067e-05_realType
+        tup2(31, itu2) = 0.17969941e-05_realType
+        tup2(32, itu2) = 0.13185213e-05_realType
+        tup2(33, itu2) = 0.97355039e-06_realType
+        tup2(34, itu2) = 0.72438303e-06_realType
+
+        ! Constants for k.
+
+        tup3(1, itu1) = 0.13357255e+02_realType
+        tup3(2, itu1) = 0.27962653e+01_realType
+        tup3(3, itu1) = 0.67564983e+00_realType
+        tup3(4, itu1) = 0.18227593e+00_realType
+        tup3(5, itu1) = 0.48230769e-01_realType
+        tup3(6, itu1) = 0.69085885e-02_realType
+        tup3(7, itu1) = -0.25075980e-02_realType
+        tup3(8, itu1) = 0.15198665e-02_realType
+        tup3(9, itu1) = 0.45292571e-02_realType
+        tup3(10, itu1) = 0.29768816e-02_realType
+        tup3(11, itu1) = 0.11684431e-02_realType
+        tup3(12, itu1) = 0.38217835e-03_realType
+        tup3(13, itu1) = 0.12135735e-03_realType
+        tup3(14, itu1) = 0.39609332e-04_realType
+        tup3(15, itu1) = 0.13512654e-04_realType
+        tup3(16, itu1) = 0.48259094e-05_realType
+        tup3(17, itu1) = 0.17973362e-05_realType
+        tup3(18, itu1) = 0.70093792e-06_realType
+        tup3(19, itu1) = 0.28079141e-06_realType
+        tup3(20, itu1) = 0.11741966e-06_realType
+        tup3(21, itu1) = 0.50025034e-07_realType
+        tup3(22, itu1) = 0.21729950e-07_realType
+        tup3(23, itu1) = 0.96902710e-08_realType
+        tup3(24, itu1) = 0.43926199e-08_realType
+        tup3(25, itu1) = 0.19274189e-08_realType
+        tup3(26, itu1) = 0.80093554e-09_realType
+        tup3(27, itu1) = 0.33523262e-09_realType
+        tup3(28, itu1) = 0.10603408e-09_realType
+        tup3(29, itu1) = -0.14220954e-10_realType
+        tup3(30, itu1) = -0.43245103e-10_realType
+        tup3(31, itu1) = -0.71330147e-10_realType
+        tup3(32, itu1) = -0.73789519e-10_realType
+        tup3(33, itu1) = -0.73223977e-10_realType
+        tup3(34, itu1) = -0.73679919e-10_realType
+
+        ! Constants for omega.
+
+        tup3(1, itu2) = -0.58652639e+01_realType
+        tup3(2, itu2) = -0.13617293e+01_realType
+        tup3(3, itu2) = -0.34682427e+00_realType
+        tup3(4, itu2) = -0.90911692e-01_realType
+        tup3(5, itu2) = -0.21991054e-01_realType
+        tup3(6, itu2) = -0.81494825e-02_realType
+        tup3(7, itu2) = -0.84291839e-02_realType
+        tup3(8, itu2) = -0.58630201e-02_realType
+        tup3(9, itu2) = -0.18374198e-02_realType
+        tup3(10, itu2) = -0.26966903e-03_realType
+        tup3(11, itu2) = -0.45904311e-04_realType
+        tup3(12, itu2) = -0.34368125e-04_realType
+        tup3(13, itu2) = -0.25332953e-04_realType
+        tup3(14, itu2) = -0.15345625e-04_realType
+        tup3(15, itu2) = -0.83950168e-05_realType
+        tup3(16, itu2) = -0.44072558e-05_realType
+        tup3(17, itu2) = -0.22740368e-05_realType
+        tup3(18, itu2) = -0.11801059e-05_realType
+        tup3(19, itu2) = -0.61295741e-06_realType
+        tup3(20, itu2) = -0.32633745e-06_realType
+        tup3(21, itu2) = -0.17280693e-06_realType
+        tup3(22, itu2) = -0.95608569e-07_realType
+        tup3(23, itu2) = -0.52411878e-07_realType
+        tup3(24, itu2) = -0.29366409e-07_realType
+        tup3(25, itu2) = -0.16959125e-07_realType
+        tup3(26, itu2) = -0.97228139e-08_realType
+        tup3(27, itu2) = -0.57661683e-08_realType
+        tup3(28, itu2) = -0.33988144e-08_realType
+        tup3(29, itu2) = -0.20698972e-08_realType
+        tup3(30, itu2) = -0.12548468e-08_realType
+        tup3(31, itu2) = -0.78279227e-09_realType
+        tup3(32, itu2) = -0.49051180e-09_realType
+        tup3(33, itu2) = -0.30968731e-09_realType
+        tup3(34, itu2) = -0.20495795e-09_realType
+
+    end subroutine initCurveFitDataSST
+
+    subroutine initCurveFitDataSa
+        !
+        !       initCurveFitDataSa contains the curve fit constants for
+        !       the wall function data for the Spalart-Allmaras turbulence
+        !       model.
+        !
+        use constants
+        use flowVarRefState
+        use paramTurb
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        ! Set the number of data points and allocate the memory for the
+        ! arrays of the curve fits.
+
+        nFit = 34
+
+        allocate (ypT(0:nFit), reT(0:nFit), &
+                  up0(nFit), up1(nFit), up2(nFit), up3(nFit), &
+                  tup0(nFit, nt1:nt2), tup1(nFit, nt1:nt2), &
+                  tup2(nFit, nt1:nt2), tup3(nFit, nt1:nt2), &
+                  tuLogFit(nt1:nt2), stat=ierr)
+        if (ierr /= 0) &
+             call terminate("initCurveFitDataSa", &
+             "Memory allocation failure for curve fit &
+             &coefficients")
+
+        ! Set the values of the Reynolds numbers at interval boundaries.
+
+        reT(0) = 0.12361553e+00_realType
+        reT(1) = 0.44392837e+00_realType
+        reT(2) = 0.11425793e+01_realType
+        reT(3) = 0.25011739e+01_realType
+        reT(4) = 0.49762007e+01_realType
+        reT(5) = 0.92920979e+01_realType
+        reT(6) = 0.16564578e+02_realType
+        reT(7) = 0.28414621e+02_realType
+        reT(8) = 0.46909987e+02_realType
+        reT(9) = 0.73988906e+02_realType
+        reT(10) = 0.11046933e+03_realType
+        reT(11) = 0.15636562e+03_realType
+        reT(12) = 0.21263059e+03_realType
+        reT(13) = 0.28162960e+03_realType
+        reT(14) = 0.36666795e+03_realType
+        reT(15) = 0.47173270e+03_realType
+        reT(16) = 0.60148482e+03_realType
+        reT(17) = 0.76135031e+03_realType
+        reT(18) = 0.95763636e+03_realType
+        reT(19) = 0.11976883e+04_realType
+        reT(20) = 0.14900416e+04_realType
+        reT(21) = 0.18445991e+04_realType
+        reT(22) = 0.22728484e+04_realType
+        reT(23) = 0.27879873e+04_realType
+        reT(24) = 0.34052767e+04_realType
+        reT(25) = 0.41422400e+04_realType
+        reT(26) = 0.50189226e+04_realType
+        reT(27) = 0.60583758e+04_realType
+        reT(28) = 0.72868976e+04_realType
+        reT(29) = 0.87344007e+04_realType
+        reT(30) = 0.10435120e+05_realType
+        reT(31) = 0.12427867e+05_realType
+        reT(32) = 0.14756830e+05_realType
+        reT(33) = 0.17471977e+05_realType
+        reT(34) = 0.20629717e+05_realType
+
+        ! Set the values of the y+ values at interval boundaries.
+
+        ypT(0) = 0.35159200e+00_realType
+        ypT(1) = 0.66628400e+00_realType
+        ypT(2) = 0.10689300e+01_realType
+        ypT(3) = 0.15816000e+01_realType
+        ypT(4) = 0.22312000e+01_realType
+        ypT(5) = 0.30504200e+01_realType
+        ypT(6) = 0.40787900e+01_realType
+        ypT(7) = 0.53638300e+01_realType
+        ypT(8) = 0.69624800e+01_realType
+        ypT(9) = 0.89426300e+01_realType
+        ypT(10) = 0.11384800e+02_realType
+        ypT(11) = 0.14384400e+02_realType
+        ypT(12) = 0.18053200e+02_realType
+        ypT(13) = 0.22522800e+02_realType
+        ypT(14) = 0.27946400e+02_realType
+        ypT(15) = 0.34502300e+02_realType
+        ypT(16) = 0.42396900e+02_realType
+        ypT(17) = 0.51868400e+02_realType
+        ypT(18) = 0.63190300e+02_realType
+        ypT(19) = 0.76676100e+02_realType
+        ypT(20) = 0.92683300e+02_realType
+        ypT(21) = 0.11161800e+03_realType
+        ypT(22) = 0.13394200e+03_realType
+        ypT(23) = 0.16017300e+03_realType
+        ypT(24) = 0.19089800e+03_realType
+        ypT(25) = 0.22677200e+03_realType
+        ypT(26) = 0.26852800e+03_realType
+        ypT(27) = 0.31698500e+03_realType
+        ypT(28) = 0.37305000e+03_realType
+        ypT(29) = 0.43772900e+03_realType
+        ypT(30) = 0.51213300e+03_realType
+        ypT(31) = 0.59748500e+03_realType
+        ypT(32) = 0.69512600e+03_realType
+        ypT(33) = 0.80652800e+03_realType
+        ypT(34) = 0.93329400e+03_realType
+
+        ! Set the values of constants for the cubic fits of the
+        ! non-dimensional tangential velocity.
+
+        up0(1) = 0.35158800e+00_realType
+        up0(2) = 0.66627500e+00_realType
+        up0(3) = 0.10689000e+01_realType
+        up0(4) = 0.15814200e+01_realType
+        up0(5) = 0.22302800e+01_realType
+        up0(6) = 0.30461700e+01_realType
+        up0(7) = 0.40611500e+01_realType
+        up0(8) = 0.52974500e+01_realType
+        up0(9) = 0.67375400e+01_realType
+        up0(10) = 0.82737300e+01_realType
+        up0(11) = 0.97032300e+01_realType
+        up0(12) = 0.10870500e+02_realType
+        up0(13) = 0.11778000e+02_realType
+        up0(14) = 0.12504200e+02_realType
+        up0(15) = 0.13120400e+02_realType
+        up0(16) = 0.13672500e+02_realType
+        up0(17) = 0.14187000e+02_realType
+        up0(18) = 0.14678500e+02_realType
+        up0(19) = 0.15154800e+02_realType
+        up0(20) = 0.15620100e+02_realType
+        up0(21) = 0.16076700e+02_realType
+        up0(22) = 0.16526000e+02_realType
+        up0(23) = 0.16968900e+02_realType
+        up0(24) = 0.17406100e+02_realType
+        up0(25) = 0.17838200e+02_realType
+        up0(26) = 0.18266100e+02_realType
+        up0(27) = 0.18690500e+02_realType
+        up0(28) = 0.19112500e+02_realType
+        up0(29) = 0.19533300e+02_realType
+        up0(30) = 0.19953900e+02_realType
+        up0(31) = 0.20375800e+02_realType
+        up0(32) = 0.20800300e+02_realType
+        up0(33) = 0.21229000e+02_realType
+        up0(34) = 0.21663200e+02_realType
+
+        up1(1) = 0.12933934e+01_realType
+        up1(2) = 0.70396224e+00_realType
+        up1(3) = 0.44483996e+00_realType
+        up1(4) = 0.30294593e+00_realType
+        up1(5) = 0.21569230e+00_realType
+        up1(6) = 0.15799192e+00_realType
+        up1(7) = 0.11772923e+00_realType
+        up1(8) = 0.88197525e-01_realType
+        up1(9) = 0.65306126e-01_realType
+        up1(10) = 0.46660172e-01_realType
+        up1(11) = 0.31523107e-01_realType
+        up1(12) = 0.20308775e-01_realType
+        up1(13) = 0.13042058e-01_realType
+        up1(14) = 0.87147691e-02_realType
+        up1(15) = 0.61456125e-02_realType
+        up1(16) = 0.45422630e-02_realType
+        up1(17) = 0.34735457e-02_realType
+        up1(18) = 0.27173826e-02_realType
+        up1(19) = 0.21579599e-02_realType
+        up1(20) = 0.17315757e-02_realType
+        up1(21) = 0.14003478e-02_realType
+        up1(22) = 0.11397448e-02_realType
+        up1(23) = 0.93291395e-03_realType
+        up1(24) = 0.76764242e-03_realType
+        up1(25) = 0.63503654e-03_realType
+        up1(26) = 0.52818280e-03_realType
+        up1(27) = 0.44172235e-03_realType
+        up1(28) = 0.37160904e-03_realType
+        up1(29) = 0.31442159e-03_realType
+        up1(30) = 0.26761137e-03_realType
+        up1(31) = 0.22916141e-03_realType
+        up1(32) = 0.19742184e-03_realType
+        up1(33) = 0.17107081e-03_realType
+        up1(34) = 0.14902380e-03_realType
+
+        up2(1) = -0.10722013e+01_realType
+        up2(2) = -0.17733707e+00_realType
+        up2(3) = -0.44823897e-01_realType
+        up2(4) = -0.14179933e-01_realType
+        up2(5) = -0.51548071e-02_realType
+        up2(6) = -0.20652649e-02_realType
+        up2(7) = -0.90040092e-03_realType
+        up2(8) = -0.43873478e-03_realType
+        up2(9) = -0.26153548e-03_realType
+        up2(10) = -0.19975811e-03_realType
+        up2(11) = -0.15375234e-03_realType
+        up2(12) = -0.93708131e-04_realType
+        up2(13) = -0.46732800e-04_realType
+        up2(14) = -0.21598767e-04_realType
+        up2(15) = -0.10173953e-04_realType
+        up2(16) = -0.51044453e-05_realType
+        up2(17) = -0.27591627e-05_realType
+        up2(18) = -0.15948319e-05_realType
+        up2(19) = -0.96856201e-06_realType
+        up2(20) = -0.60909779e-06_realType
+        up2(21) = -0.39147369e-06_realType
+        up2(22) = -0.25632692e-06_realType
+        up2(23) = -0.16958665e-06_realType
+        up2(24) = -0.11394020e-06_realType
+        up2(25) = -0.76500636e-07_realType
+        up2(26) = -0.52236558e-07_realType
+        up2(27) = -0.35697343e-07_realType
+        up2(28) = -0.24471063e-07_realType
+        up2(29) = -0.17096052e-07_realType
+        up2(30) = -0.11859363e-07_realType
+        up2(31) = -0.83689945e-08_realType
+        up2(32) = -0.58800329e-08_realType
+        up2(33) = -0.42032634e-08_realType
+        up2(34) = -0.30343729e-08_realType
+
+        up3(1) = 0.31659592e+00_realType
+        up3(2) = -0.77365039e-02_realType
+        up3(3) = -0.36297270e-02_realType
+        up3(4) = -0.92844018e-03_realType
+        up3(5) = -0.23630863e-03_realType
+        up3(6) = -0.64433685e-04_realType
+        up3(7) = -0.19446239e-04_realType
+        up3(8) = -0.64919565e-05_realType
+        up3(9) = -0.20373446e-05_realType
+        up3(10) = -0.14090103e-06_realType
+        up3(11) = 0.45874410e-06_realType
+        up3(12) = 0.34517950e-06_realType
+        up3(13) = 0.14855464e-06_realType
+        up3(14) = 0.50901715e-07_realType
+        up3(15) = 0.16140276e-07_realType
+        up3(16) = 0.50667962e-08_realType
+        up3(17) = 0.16437361e-08_realType
+        up3(18) = 0.57675302e-09_realType
+        up3(19) = 0.22343487e-09_realType
+        up3(20) = 0.97170203e-10_realType
+        up3(21) = 0.45068635e-10_realType
+        up3(22) = 0.23106070e-10_realType
+        up3(23) = 0.11870070e-10_realType
+        up3(24) = 0.70527690e-11_realType
+        up3(25) = 0.36226728e-11_realType
+        up3(26) = 0.22246040e-11_realType
+        up3(27) = 0.12643111e-11_realType
+        up3(28) = 0.64910588e-12_realType
+        up3(29) = 0.42682810e-12_realType
+        up3(30) = 0.21768527e-12_realType
+        up3(31) = 0.13556644e-12_realType
+        up3(32) = 0.63772628e-13_realType
+        up3(33) = 0.35175874e-13_realType
+        up3(34) = 0.21542623e-13_realType
+
+        ! Set the values of tuLogFit to .false., because a linear
+        ! fit has been used.
+
+        tuLogFit(itu1) = .false.
+
+        ! Set the values of constants for the cubic fits of the
+        ! non-dimensional spalart-allmaras viscosity.
+
+        tup0(1, itu1) = 0.14399200e+00_realType
+        tup0(2, itu1) = 0.27285000e+00_realType
+        tup0(3, itu1) = 0.43767100e+00_realType
+        tup0(4, itu1) = 0.64739300e+00_realType
+        tup0(5, itu1) = 0.91283700e+00_realType
+        tup0(6, itu1) = 0.12469600e+01_realType
+        tup0(7, itu1) = 0.16651800e+01_realType
+        tup0(8, itu1) = 0.21861800e+01_realType
+        tup0(9, itu1) = 0.28347900e+01_realType
+        tup0(10, itu1) = 0.36492600e+01_realType
+        tup0(11, itu1) = 0.46812500e+01_realType
+        tup0(12, itu1) = 0.59588800e+01_realType
+        tup0(13, itu1) = 0.74961200e+01_realType
+        tup0(14, itu1) = 0.93387200e+01_realType
+        tup0(15, itu1) = 0.11555500e+02_realType
+        tup0(16, itu1) = 0.14225700e+02_realType
+        tup0(17, itu1) = 0.17436000e+02_realType
+        tup0(18, itu1) = 0.21280700e+02_realType
+        tup0(19, itu1) = 0.25863100e+02_realType
+        tup0(20, itu1) = 0.31296700e+02_realType
+        tup0(21, itu1) = 0.37704800e+02_realType
+        tup0(22, itu1) = 0.45218300e+02_realType
+        tup0(23, itu1) = 0.53972900e+02_realType
+        tup0(24, itu1) = 0.64103200e+02_realType
+        tup0(25, itu1) = 0.75735500e+02_realType
+        tup0(26, itu1) = 0.88977700e+02_realType
+        tup0(27, itu1) = 0.10390800e+03_realType
+        tup0(28, itu1) = 0.12056400e+03_realType
+        tup0(29, itu1) = 0.13892700e+03_realType
+        tup0(30, itu1) = 0.15892000e+03_realType
+        tup0(31, itu1) = 0.18039200e+03_realType
+        tup0(32, itu1) = 0.20312000e+03_realType
+        tup0(33, itu1) = 0.22680300e+03_realType
+        tup0(34, itu1) = 0.25105400e+03_realType
+
+        tup1(1, itu1) = 0.40950260e+00_realType
+        tup1(2, itu1) = 0.40940115e+00_realType
+        tup1(3, itu1) = 0.40919529e+00_realType
+        tup1(4, itu1) = 0.40882583e+00_realType
+        tup1(5, itu1) = 0.40819638e+00_realType
+        tup1(6, itu1) = 0.40720236e+00_realType
+        tup1(7, itu1) = 0.40598943e+00_realType
+        tup1(8, itu1) = 0.40559491e+00_realType
+        tup1(9, itu1) = 0.40881860e+00_realType
+        tup1(10, itu1) = 0.41753197e+00_realType
+        tup1(11, itu1) = 0.42442441e+00_realType
+        tup1(12, itu1) = 0.42212075e+00_realType
+        tup1(13, itu1) = 0.41529539e+00_realType
+        tup1(14, itu1) = 0.41032022e+00_realType
+        tup1(15, itu1) = 0.40794524e+00_realType
+        tup1(16, itu1) = 0.40694094e+00_realType
+        tup1(17, itu1) = 0.40625126e+00_realType
+        tup1(18, itu1) = 0.40527764e+00_realType
+        tup1(19, itu1) = 0.40374561e+00_realType
+        tup1(20, itu1) = 0.40150883e+00_realType
+        tup1(21, itu1) = 0.39842138e+00_realType
+        tup1(22, itu1) = 0.39429502e+00_realType
+        tup1(23, itu1) = 0.38893832e+00_realType
+        tup1(24, itu1) = 0.38209495e+00_realType
+        tup1(25, itu1) = 0.37349660e+00_realType
+        tup1(26, itu1) = 0.36290738e+00_realType
+        tup1(27, itu1) = 0.35013025e+00_realType
+        tup1(28, itu1) = 0.33503951e+00_realType
+        tup1(29, itu1) = 0.31766382e+00_realType
+        tup1(30, itu1) = 0.29813133e+00_realType
+        tup1(31, itu1) = 0.27667192e+00_realType
+        tup1(32, itu1) = 0.25362172e+00_realType
+        tup1(33, itu1) = 0.22930211e+00_realType
+        tup1(34, itu1) = 0.20402405e+00_realType
+
+        tup2(1, itu1) = 0.43946228e-04_realType
+        tup2(2, itu1) = 0.90566870e-04_realType
+        tup2(3, itu1) = 0.34081055e-04_realType
+        tup2(4, itu1) = 0.50034075e-04_realType
+        tup2(5, itu1) = -0.36629521e-04_realType
+        tup2(6, itu1) = -0.33730915e-03_realType
+        tup2(7, itu1) = -0.98768734e-03_realType
+        tup2(8, itu1) = -0.17750541e-02_realType
+        tup2(9, itu1) = -0.61469972e-03_realType
+        tup2(10, itu1) = 0.33676511e-02_realType
+        tup2(11, itu1) = 0.22772412e-02_realType
+        tup2(12, itu1) = -0.68862307e-03_realType
+        tup2(13, itu1) = -0.92984441e-03_realType
+        tup2(14, itu1) = -0.44253269e-03_realType
+        tup2(15, itu1) = -0.14333421e-03_realType
+        tup2(16, itu1) = -0.25078749e-04_realType
+        tup2(17, itu1) = -0.11675748e-05_realType
+        tup2(18, itu1) = -0.77479474e-05_realType
+        tup2(19, itu1) = -0.19425991e-04_realType
+        tup2(20, itu1) = -0.28782981e-04_realType
+        tup2(21, itu1) = -0.37198549e-04_realType
+        tup2(22, itu1) = -0.46839769e-04_realType
+        tup2(23, itu1) = -0.52777911e-04_realType
+        tup2(24, itu1) = -0.61987420e-04_realType
+        tup2(25, itu1) = -0.69912435e-04_realType
+        tup2(26, itu1) = -0.78150190e-04_realType
+        tup2(27, itu1) = -0.84976835e-04_realType
+        tup2(28, itu1) = -0.91879236e-04_realType
+        tup2(29, itu1) = -0.94706478e-04_realType
+        tup2(30, itu1) = -0.96428722e-04_realType
+        tup2(31, itu1) = -0.95007410e-04_realType
+        tup2(32, itu1) = -0.91049468e-04_realType
+        tup2(33, itu1) = -0.85824231e-04_realType
+        tup2(34, itu1) = -0.79305036e-04_realType
+
+        tup3(1, itu1) = -0.43457258e-03_realType
+        tup3(2, itu1) = -0.57319466e-03_realType
+        tup3(3, itu1) = -0.51288659e-03_realType
+        tup3(4, itu1) = -0.54857331e-03_realType
+        tup3(5, itu1) = -0.46390243e-03_realType
+        tup3(6, itu1) = -0.16364047e-03_realType
+        tup3(7, itu1) = 0.43276759e-03_realType
+        tup3(8, itu1) = 0.11606902e-02_realType
+        tup3(9, itu1) = 0.94769940e-03_realType
+        tup3(10, itu1) = -0.53409400e-03_realType
+        tup3(11, itu1) = -0.59146449e-03_realType
+        tup3(12, itu1) = -0.43895639e-04_realType
+        tup3(13, itu1) = 0.55678050e-04_realType
+        tup3(14, itu1) = 0.27482853e-04_realType
+        tup3(15, itu1) = 0.67866418e-05_realType
+        tup3(16, itu1) = -0.15708231e-05_realType
+        tup3(17, itu1) = -0.35355159e-05_realType
+        tup3(18, itu1) = -0.35276554e-05_realType
+        tup3(19, itu1) = -0.31393462e-05_realType
+        tup3(20, itu1) = -0.28177541e-05_realType
+        tup3(21, itu1) = -0.25267299e-05_realType
+        tup3(22, itu1) = -0.21840943e-05_realType
+        tup3(23, itu1) = -0.19739075e-05_realType
+        tup3(24, itu1) = -0.16910644e-05_realType
+        tup3(25, itu1) = -0.14435078e-05_realType
+        tup3(26, itu1) = -0.11949962e-05_realType
+        tup3(27, itu1) = -0.97317619e-06_realType
+        tup3(28, itu1) = -0.75009409e-06_realType
+        tup3(29, itu1) = -0.58018935e-06_realType
+        tup3(30, itu1) = -0.42811291e-06_realType
+        tup3(31, itu1) = -0.31260995e-06_realType
+        tup3(32, itu1) = -0.22863635e-06_realType
+        tup3(33, itu1) = -0.16534708e-06_realType
+        tup3(34, itu1) = -0.12169915e-06_realType
+
+    end subroutine initCurveFitDataSa
+    subroutine initCurveFitDataSae
+        !
+        !       initCurveFitDataSae contains the curve fit constants for
+        !       the wall function data for the Spalart-Allmaras (Edwards
+        !       modification) turbulence model.
+        !
+        use flowVarRefState
+        use paramTurb
+        implicit none
+        !
+        !      Local variables.
+        !
+        !   integer :: ierr
+
+        call terminate("initCurveFitDataSae", &
+                       "Not implemented yet")
+
+    end subroutine initCurveFitDataSae
+
+    subroutine initCurveFitDataVf
+        !
+        !       initCurveFitDataVf contains the curve fit constants for
+        !       the wall function data for the v2-f turbulence model.
+        !
+        use constants
+        use flowVarRefState
+        use inputPhysics
+        use paramTurb
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: ierr
+
+        ! Determine the version of the v2-f model.
+
+        select case (rvfN)
+
+        case (1_intType)
+
+            ! Version 1 of the model.
+
+            ! Set the number of data points and allocate the memory for
+            ! the arrays of the curve fits.
+
+            nFit = 34
+
+            allocate (ypT(0:nFit), reT(0:nFit), &
+                      up0(nFit), up1(nFit), up2(nFit), up3(nFit), &
+                      tup0(nFit, nt1:nt2 + 1), tup1(nFit, nt1:nt2 + 1), &
+                      tup2(nFit, nt1:nt2 + 1), tup3(nFit, nt1:nt2 + 1), &
+                      tuLogFit(nt1:nt2 + 1), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("initCurveFitDataVf", &
+                 "Memory allocation failure for curve fit &
+                 &coefficients")
+
+            ! Set the values of the Reynolds numbers at interval
+            ! boundaries.
+
+            reT(0) = 0.12795537e+00_realType
+            reT(1) = 0.45951248e+00_realType
+            reT(2) = 0.11826997e+01_realType
+            reT(3) = 0.25888649e+01_realType
+            reT(4) = 0.51497900e+01_realType
+            reT(5) = 0.96108242e+01_realType
+            reT(6) = 0.17107619e+02_realType
+            reT(7) = 0.29176338e+02_realType
+            reT(8) = 0.47402263e+02_realType
+            reT(9) = 0.73016653e+02_realType
+            reT(10) = 0.10712161e+03_realType
+            reT(11) = 0.15119025e+03_realType
+            reT(12) = 0.20726939e+03_realType
+            reT(13) = 0.27799459e+03_realType
+            reT(14) = 0.36660352e+03_realType
+            reT(15) = 0.47698645e+03_realType
+            reT(16) = 0.61377565e+03_realType
+            reT(17) = 0.78244065e+03_realType
+            reT(18) = 0.98942933e+03_realType
+            reT(19) = 0.12422738e+04_realType
+            reT(20) = 0.15497731e+04_realType
+            reT(21) = 0.19221837e+04_realType
+            reT(22) = 0.23713440e+04_realType
+            reT(23) = 0.29109888e+04_realType
+            reT(24) = 0.35569669e+04_realType
+            reT(25) = 0.43274783e+04_realType
+            reT(26) = 0.52434023e+04_realType
+            reT(27) = 0.63287428e+04_realType
+            reT(28) = 0.76109132e+04_realType
+            reT(29) = 0.91213906e+04_realType
+            reT(30) = 0.10896077e+05_realType
+            reT(31) = 0.12976000e+05_realType
+            reT(32) = 0.15408257e+05_realType
+            reT(33) = 0.18246449e+05_realType
+            reT(34) = 0.21552478e+05_realType
+
+            ! Set the values of the y+ values at interval boundaries.
+
+            ypT(0) = 0.35770800e+00_realType
+            ypT(1) = 0.67787500e+00_realType
+            ypT(2) = 0.10875400e+01_realType
+            ypT(3) = 0.16091400e+01_realType
+            ypT(4) = 0.22700900e+01_realType
+            ypT(5) = 0.31037100e+01_realType
+            ypT(6) = 0.41503100e+01_realType
+            ypT(7) = 0.54585000e+01_realType
+            ypT(8) = 0.70866100e+01_realType
+            ypT(9) = 0.91042300e+01_realType
+            ypT(10) = 0.11593900e+02_realType
+            ypT(11) = 0.14653200e+02_realType
+            ypT(12) = 0.18396800e+02_realType
+            ypT(13) = 0.22959200e+02_realType
+            ypT(14) = 0.28497300e+02_realType
+            ypT(15) = 0.35193900e+02_realType
+            ypT(16) = 0.43260500e+02_realType
+            ypT(17) = 0.52941300e+02_realType
+            ypT(18) = 0.64517200e+02_realType
+            ypT(19) = 0.78309700e+02_realType
+            ypT(20) = 0.94686000e+02_realType
+            ypT(21) = 0.11406400e+03_realType
+            ypT(22) = 0.13691600e+03_realType
+            ypT(23) = 0.16377700e+03_realType
+            ypT(24) = 0.19525000e+03_realType
+            ypT(25) = 0.23200900e+03_realType
+            ypT(26) = 0.27481000e+03_realType
+            ypT(27) = 0.32449600e+03_realType
+            ypT(28) = 0.38200300e+03_realType
+            ypT(29) = 0.44837100e+03_realType
+            ypT(30) = 0.52474800e+03_realType
+            ypT(31) = 0.61239900e+03_realType
+            ypT(32) = 0.71271500e+03_realType
+            ypT(33) = 0.82722200e+03_realType
+            ypT(34) = 0.95759000e+03_realType
+
+            ! Set the values of constants for the cubic fits of the
+            ! non-dimensional tangential velocity.
+
+            up0(1) = 0.35770900e+00_realType
+            up0(2) = 0.67787200e+00_realType
+            up0(3) = 0.10875000e+01_realType
+            up0(4) = 0.16088500e+01_realType
+            up0(5) = 0.22685400e+01_realType
+            up0(6) = 0.30965600e+01_realType
+            up0(7) = 0.41220100e+01_realType
+            up0(8) = 0.53451200e+01_realType
+            up0(9) = 0.66889900e+01_realType
+            up0(10) = 0.80200800e+01_realType
+            up0(11) = 0.92394800e+01_realType
+            up0(12) = 0.10317900e+02_realType
+            up0(13) = 0.11266600e+02_realType
+            up0(14) = 0.12108200e+02_realType
+            up0(15) = 0.12864500e+02_realType
+            up0(16) = 0.13553100e+02_realType
+            up0(17) = 0.14187900e+02_realType
+            up0(18) = 0.14779400e+02_realType
+            up0(19) = 0.15335900e+02_realType
+            up0(20) = 0.15863600e+02_realType
+            up0(21) = 0.16367500e+02_realType
+            up0(22) = 0.16851800e+02_realType
+            up0(23) = 0.17319700e+02_realType
+            up0(24) = 0.17774100e+02_realType
+            up0(25) = 0.18217500e+02_realType
+            up0(26) = 0.18652200e+02_realType
+            up0(27) = 0.19080100e+02_realType
+            up0(28) = 0.19503300e+02_realType
+            up0(29) = 0.19923700e+02_realType
+            up0(30) = 0.20343400e+02_realType
+            up0(31) = 0.20764400e+02_realType
+            up0(32) = 0.21188800e+02_realType
+            up0(33) = 0.21619100e+02_realType
+            up0(34) = 0.22057500e+02_realType
+
+            up1(1) = 0.12712722e+01_realType
+            up1(2) = 0.69191267e+00_realType
+            up1(3) = 0.43721180e+00_realType
+            up1(4) = 0.29770939e+00_realType
+            up1(5) = 0.21186537e+00_realType
+            up1(6) = 0.15500054e+00_realType
+            up1(7) = 0.11492466e+00_realType
+            up1(8) = 0.84733790e-01_realType
+            up1(9) = 0.61015984e-01_realType
+            up1(10) = 0.42707937e-01_realType
+            up1(11) = 0.29393811e-01_realType
+            up1(12) = 0.20241287e-01_realType
+            up1(13) = 0.14118603e-01_realType
+            up1(14) = 0.10028611e-01_realType
+            up1(15) = 0.72611010e-02_realType
+            up1(16) = 0.53541635e-02_realType
+            up1(17) = 0.40146771e-02_realType
+            up1(18) = 0.30560063e-02_realType
+            up1(19) = 0.23578120e-02_realType
+            up1(20) = 0.18410127e-02_realType
+            up1(21) = 0.14534277e-02_realType
+            up1(22) = 0.11589991e-02_realType
+            up1(23) = 0.93274199e-03_realType
+            up1(24) = 0.75723913e-03_realType
+            up1(25) = 0.61991282e-03_realType
+            up1(26) = 0.51149306e-03_realType
+            up1(27) = 0.42528110e-03_realType
+            up1(28) = 0.35632360e-03_realType
+            up1(29) = 0.30082562e-03_realType
+            up1(30) = 0.25590806e-03_realType
+            up1(31) = 0.21932184e-03_realType
+            up1(32) = 0.18942066e-03_realType
+            up1(33) = 0.16482466e-03_realType
+            up1(34) = 0.14450977e-03_realType
+
+            up2(1) = -0.10180856e+01_realType
+            up2(2) = -0.16838796e+00_realType
+            up2(3) = -0.42564371e-01_realType
+            up2(4) = -0.13467470e-01_realType
+            up2(5) = -0.49083413e-02_realType
+            up2(6) = -0.19435488e-02_realType
+            up2(7) = -0.87388594e-03_realType
+            up2(8) = -0.50925439e-03_realType
+            up2(9) = -0.34513516e-03_realType
+            up2(10) = -0.22127804e-03_realType
+            up2(11) = -0.12741049e-03_realType
+            up2(12) = -0.68647301e-04_realType
+            up2(13) = -0.36296584e-04_realType
+            up2(14) = -0.19327079e-04_realType
+            up2(15) = -0.10522599e-04_realType
+            up2(16) = -0.58546470e-05_realType
+            up2(17) = -0.33469044e-05_realType
+            up2(18) = -0.19525994e-05_realType
+            up2(19) = -0.11686013e-05_realType
+            up2(20) = -0.71331900e-06_realType
+            up2(21) = -0.44175896e-06_realType
+            up2(22) = -0.27957847e-06_realType
+            up2(23) = -0.17903765e-06_realType
+            up2(24) = -0.11639426e-06_realType
+            up2(25) = -0.76321282e-07_realType
+            up2(26) = -0.51021408e-07_realType
+            up2(27) = -0.34196379e-07_realType
+            up2(28) = -0.23263805e-07_realType
+            up2(29) = -0.15876970e-07_realType
+            up2(30) = -0.10967048e-07_realType
+            up2(31) = -0.76570908e-08_realType
+            up2(32) = -0.53137237e-08_realType
+            up2(33) = -0.37931015e-08_realType
+            up2(34) = -0.26536226e-08_realType
+
+            up3(1) = 0.29032828e+00_realType
+            up3(2) = -0.71056794e-02_realType
+            up3(3) = -0.33374366e-02_realType
+            up3(4) = -0.85721208e-03_realType
+            up3(5) = -0.21895691e-03_realType
+            up3(6) = -0.64856031e-04_realType
+            up3(7) = -0.20819908e-04_realType
+            up3(8) = -0.51723681e-05_realType
+            up3(9) = -0.31864903e-06_realType
+            up3(10) = 0.50988875e-06_realType
+            up3(11) = 0.35651270e-06_realType
+            up3(12) = 0.16711639e-06_realType
+            up3(13) = 0.69583056e-07_realType
+            up3(14) = 0.27917810e-07_realType
+            up3(15) = 0.11383228e-07_realType
+            up3(16) = 0.46713098e-08_realType
+            up3(17) = 0.19959356e-08_realType
+            up3(18) = 0.85688237e-09_realType
+            up3(19) = 0.38661576e-09_realType
+            up3(20) = 0.18015921e-09_realType
+            up3(21) = 0.83166064e-10_realType
+            up3(22) = 0.41131094e-10_realType
+            up3(23) = 0.20294857e-10_realType
+            up3(24) = 0.10424447e-10_realType
+            up3(25) = 0.51615620e-11_realType
+            up3(26) = 0.28812993e-11_realType
+            up3(27) = 0.14918197e-11_realType
+            up3(28) = 0.84315572e-12_realType
+            up3(29) = 0.44502764e-12_realType
+            up3(30) = 0.24764964e-12_realType
+            up3(31) = 0.15033872e-12_realType
+            up3(32) = 0.70583114e-13_realType
+            up3(33) = 0.50327579e-13_realType
+            up3(34) = 0.20760485e-13_realType
+
+            ! Set the values of tuLogFit.All variables have been
+            ! fitted linearly; the fifth variable is the eddy viscosity.
+
+            tuLogFit(itu1) = .false.
+            tuLogFit(itu2) = .false.
+            tuLogFit(itu3) = .false.
+            tuLogFit(itu4) = .false.
+            tuLogFit(itu5) = .false.
+
+            ! Set the values of constants for the cubic fits of the
+            ! non-dimensional k, eps, v2 and f values.
+
+            ! Constants for k.
+
+            tup0(1, itu1) = 0.23837900e-01_realType
+            tup0(2, itu1) = 0.78315200e-01_realType
+            tup0(3, itu1) = 0.18685000e+00_realType
+            tup0(4, itu1) = 0.38036300e+00_realType
+            tup0(5, itu1) = 0.70148100e+00_realType
+            tup0(6, itu1) = 0.12056400e+01_realType
+            tup0(7, itu1) = 0.19570000e+01_realType
+            tup0(8, itu1) = 0.29949800e+01_realType
+            tup0(9, itu1) = 0.42181500e+01_realType
+            tup0(10, itu1) = 0.53378900e+01_realType
+            tup0(11, itu1) = 0.61118500e+01_realType
+            tup0(12, itu1) = 0.65046800e+01_realType
+            tup0(13, itu1) = 0.66026900e+01_realType
+            tup0(14, itu1) = 0.65082900e+01_realType
+            tup0(15, itu1) = 0.63012300e+01_realType
+            tup0(16, itu1) = 0.60354500e+01_realType
+            tup0(17, itu1) = 0.57452900e+01_realType
+            tup0(18, itu1) = 0.54518200e+01_realType
+            tup0(19, itu1) = 0.51675400e+01_realType
+            tup0(20, itu1) = 0.48994200e+01_realType
+            tup0(21, itu1) = 0.46509700e+01_realType
+            tup0(22, itu1) = 0.44234600e+01_realType
+            tup0(23, itu1) = 0.42167200e+01_realType
+            tup0(24, itu1) = 0.40297300e+01_realType
+            tup0(25, itu1) = 0.38609000e+01_realType
+            tup0(26, itu1) = 0.37082900e+01_realType
+            tup0(27, itu1) = 0.35697500e+01_realType
+            tup0(28, itu1) = 0.34429600e+01_realType
+            tup0(29, itu1) = 0.33254900e+01_realType
+            tup0(30, itu1) = 0.32147800e+01_realType
+            tup0(31, itu1) = 0.31080800e+01_realType
+            tup0(32, itu1) = 0.30023700e+01_realType
+            tup0(33, itu1) = 0.28942800e+01_realType
+            tup0(34, itu1) = 0.27799400e+01_realType
+
+            tup1(1, itu1) = 0.13413856e+00_realType
+            tup1(2, itu1) = 0.22335565e+00_realType
+            tup1(3, itu1) = 0.32434141e+00_realType
+            tup1(4, itu1) = 0.43518752e+00_realType
+            tup1(5, itu1) = 0.55218357e+00_realType
+            tup1(6, itu1) = 0.66775111e+00_realType
+            tup1(7, itu1) = 0.75987243e+00_realType
+            tup1(8, itu1) = 0.77006777e+00_realType
+            tup1(9, itu1) = 0.64264496e+00_realType
+            tup1(10, itu1) = 0.42014159e+00_realType
+            tup1(11, itu1) = 0.21027146e+00_realType
+            tup1(12, itu1) = 0.72151582e-01_realType
+            tup1(13, itu1) = 0.43462557e-03_realType
+            tup1(14, itu1) = -0.29846047e-01_realType
+            tup1(15, itu1) = -0.38647454e-01_realType
+            tup1(16, itu1) = -0.37657148e-01_realType
+            tup1(17, itu1) = -0.32885380e-01_realType
+            tup1(18, itu1) = -0.27179666e-01_realType
+            tup1(19, itu1) = -0.21775122e-01_realType
+            tup1(20, itu1) = -0.17122657e-01_realType
+            tup1(21, itu1) = -0.13311965e-01_realType
+            tup1(22, itu1) = -0.10282974e-01_realType
+            tup1(23, itu1) = -0.79200612e-02_realType
+            tup1(24, itu1) = -0.60997017e-02_realType
+            tup1(25, itu1) = -0.47109860e-02_realType
+            tup1(26, itu1) = -0.36595023e-02_realType
+            tup1(27, itu1) = -0.28688356e-02_realType
+            tup1(28, itu1) = -0.22786936e-02_realType
+            tup1(29, itu1) = -0.18420182e-02_realType
+            tup1(30, itu1) = -0.15230656e-02_realType
+            tup1(31, itu1) = -0.12949618e-02_realType
+            tup1(32, itu1) = -0.11374337e-02_realType
+            tup1(33, itu1) = -0.10354105e-02_realType
+            tup1(34, itu1) = -0.97825421e-03_realType
+
+            tup2(1, itu1) = 0.58799048e-01_realType
+            tup2(2, itu1) = 0.57983435e-01_realType
+            tup2(3, itu1) = 0.55840141e-01_realType
+            tup2(4, itu1) = 0.52909760e-01_realType
+            tup2(5, itu1) = 0.50658583e-01_realType
+            tup2(6, itu1) = 0.55744447e-01_realType
+            tup2(7, itu1) = 0.69202235e-01_realType
+            tup2(8, itu1) = 0.43649374e-01_realType
+            tup2(9, itu1) = -0.20068023e-01_realType
+            tup2(10, itu1) = -0.47375401e-01_realType
+            tup2(11, itu1) = -0.35131867e-01_realType
+            tup2(12, itu1) = -0.17682377e-01_realType
+            tup2(13, itu1) = -0.72540324e-02_realType
+            tup2(14, itu1) = -0.24963930e-02_realType
+            tup2(15, itu1) = -0.61439197e-03_realType
+            tup2(16, itu1) = 0.35707704e-04_realType
+            tup2(17, itu1) = 0.20726554e-03_realType
+            tup2(18, itu1) = 0.21257094e-03_realType
+            tup2(19, itu1) = 0.17069050e-03_realType
+            tup2(20, itu1) = 0.12477387e-03_realType
+            tup2(21, itu1) = 0.86954464e-04_realType
+            tup2(22, itu1) = 0.58869152e-04_realType
+            tup2(23, itu1) = 0.39300193e-04_realType
+            tup2(24, itu1) = 0.25976170e-04_realType
+            tup2(25, itu1) = 0.17045223e-04_realType
+            tup2(26, itu1) = 0.11152061e-04_realType
+            tup2(27, itu1) = 0.72633824e-05_realType
+            tup2(28, itu1) = 0.47173696e-05_realType
+            tup2(29, itu1) = 0.30546703e-05_realType
+            tup2(30, itu1) = 0.19644758e-05_realType
+            tup2(31, itu1) = 0.12465154e-05_realType
+            tup2(32, itu1) = 0.77547548e-06_realType
+            tup2(33, itu1) = 0.46678879e-06_realType
+            tup2(34, itu1) = 0.25787187e-06_realType
+
+            tup3(1, itu1) = 0.16768319e+00_realType
+            tup3(2, itu1) = 0.10621790e+00_realType
+            tup3(3, itu1) = 0.64437267e-01_realType
+            tup3(4, itu1) = 0.35904096e-01_realType
+            tup3(5, itu1) = 0.14921346e-01_realType
+            tup3(6, itu1) = -0.74747729e-02_realType
+            tup3(7, itu1) = -0.33280334e-01_realType
+            tup3(8, itu1) = -0.33896768e-01_realType
+            tup3(9, itu1) = -0.11588583e-01_realType
+            tup3(10, itu1) = 0.13997083e-02_realType
+            tup3(11, itu1) = 0.27365948e-02_realType
+            tup3(12, itu1) = 0.14431329e-02_realType
+            tup3(13, itu1) = 0.57506718e-03_realType
+            tup3(14, itu1) = 0.20485602e-03_realType
+            tup3(15, itu1) = 0.68525613e-04_realType
+            tup3(16, itu1) = 0.21493188e-04_realType
+            tup3(17, itu1) = 0.60206301e-05_realType
+            tup3(18, itu1) = 0.12018375e-05_realType
+            tup3(19, itu1) = -0.98181484e-07_realType
+            tup3(20, itu1) = -0.34302113e-06_realType
+            tup3(21, itu1) = -0.30271628e-06_realType
+            tup3(22, itu1) = -0.20913696e-06_realType
+            tup3(23, itu1) = -0.13440526e-06_realType
+            tup3(24, itu1) = -0.82910301e-07_realType
+            tup3(25, itu1) = -0.49744260e-07_realType
+            tup3(26, itu1) = -0.29836016e-07_realType
+            tup3(27, itu1) = -0.17773844e-07_realType
+            tup3(28, itu1) = -0.10672954e-07_realType
+            tup3(29, itu1) = -0.65469411e-08_realType
+            tup3(30, itu1) = -0.41129357e-08_realType
+            tup3(31, itu1) = -0.26461407e-08_realType
+            tup3(32, itu1) = -0.17741710e-08_realType
+            tup3(33, itu1) = -0.12646276e-08_realType
+            tup3(34, itu1) = -0.92958789e-09_realType
+
+            ! Constants for epsilon.
+
+            tup0(1, itu2) = 0.29354900e+00_realType
+            tup0(2, itu2) = 0.26347600e+00_realType
+            tup0(3, itu2) = 0.23131100e+00_realType
+            tup0(4, itu2) = 0.19880500e+00_realType
+            tup0(5, itu2) = 0.16843900e+00_realType
+            tup0(6, itu2) = 0.14348600e+00_realType
+            tup0(7, itu2) = 0.12800300e+00_realType
+            tup0(8, itu2) = 0.12650200e+00_realType
+            tup0(9, itu2) = 0.13424600e+00_realType
+            tup0(10, itu2) = 0.14121500e+00_realType
+            tup0(11, itu2) = 0.14022500e+00_realType
+            tup0(12, itu2) = 0.13108600e+00_realType
+            tup0(13, itu2) = 0.11710400e+00_realType
+            tup0(14, itu2) = 0.10147100e+00_realType
+            tup0(15, itu2) = 0.86210400e-01_realType
+            tup0(16, itu2) = 0.72337500e-01_realType
+            tup0(17, itu2) = 0.60231700e-01_realType
+            tup0(18, itu2) = 0.49927500e-01_realType
+            tup0(19, itu2) = 0.41292900e-01_realType
+            tup0(20, itu2) = 0.34129200e-01_realType
+            tup0(21, itu2) = 0.28223300e-01_realType
+            tup0(22, itu2) = 0.23372700e-01_realType
+            tup0(23, itu2) = 0.19397100e-01_realType
+            tup0(24, itu2) = 0.16140700e-01_realType
+            tup0(25, itu2) = 0.13472400e-01_realType
+            tup0(26, itu2) = 0.11283300e-01_realType
+            tup0(27, itu2) = 0.94837500e-02_realType
+            tup0(28, itu2) = 0.80004600e-02_realType
+            tup0(29, itu2) = 0.67736800e-02_realType
+            tup0(30, itu2) = 0.57548200e-02_realType
+            tup0(31, itu2) = 0.49043300e-02_realType
+            tup0(32, itu2) = 0.41899500e-02_realType
+            tup0(33, itu2) = 0.35852000e-02_realType
+            tup0(34, itu2) = 0.30682200e-02_realType
+
+            tup1(1, itu2) = -0.10010771e+00_realType
+            tup1(2, itu2) = -0.85277160e-01_realType
+            tup1(3, itu2) = -0.69444251e-01_realType
+            tup1(4, itu2) = -0.53166462e-01_realType
+            tup1(5, itu2) = -0.37013322e-01_realType
+            tup1(6, itu2) = -0.21505994e-01_realType
+            tup1(7, itu2) = -0.72125328e-02_realType
+            tup1(8, itu2) = 0.21261451e-02_realType
+            tup1(9, itu2) = 0.40356801e-02_realType
+            tup1(10, itu2) = 0.13265177e-02_realType
+            tup1(11, itu2) = -0.18253838e-02_realType
+            tup1(12, itu2) = -0.33986976e-02_realType
+            tup1(13, itu2) = -0.35654948e-02_realType
+            tup1(14, itu2) = -0.30586209e-02_realType
+            tup1(15, itu2) = -0.23812190e-02_realType
+            tup1(16, itu2) = -0.17596930e-02_realType
+            tup1(17, itu2) = -0.12627202e-02_realType
+            tup1(18, itu2) = -0.89095673e-03_realType
+            tup1(19, itu2) = -0.62275508e-03_realType
+            tup1(20, itu2) = -0.43321577e-03_realType
+            tup1(21, itu2) = -0.30084493e-03_realType
+            tup1(22, itu2) = -0.20900308e-03_realType
+            tup1(23, itu2) = -0.14547503e-03_realType
+            tup1(24, itu2) = -0.10156512e-03_realType
+            tup1(25, itu2) = -0.71189471e-04_realType
+            tup1(26, itu2) = -0.50133861e-04_realType
+            tup1(27, itu2) = -0.35495151e-04_realType
+            tup1(28, itu2) = -0.25282155e-04_realType
+            tup1(29, itu2) = -0.18128274e-04_realType
+            tup1(30, itu2) = -0.13095730e-04_realType
+            tup1(31, itu2) = -0.95402614e-05_realType
+            tup1(32, itu2) = -0.70178808e-05_realType
+            tup1(33, itu2) = -0.52216476e-05_realType
+            tup1(34, itu2) = -0.39388668e-05_realType
+
+            tup2(1, itu2) = 0.11572886e-01_realType
+            tup2(2, itu2) = 0.10868522e-01_realType
+            tup2(3, itu2) = 0.97691864e-02_realType
+            tup2(4, itu2) = 0.83476261e-02_realType
+            tup2(5, itu2) = 0.68769083e-02_realType
+            tup2(6, itu2) = 0.55834783e-02_realType
+            tup2(7, itu2) = 0.67702397e-02_realType
+            tup2(8, itu2) = 0.36737909e-02_realType
+            tup2(9, itu2) = 0.47795474e-03_realType
+            tup2(10, itu2) = -0.81158579e-03_realType
+            tup2(11, itu2) = -0.62510665e-03_realType
+            tup2(12, itu2) = -0.22487250e-03_realType
+            tup2(13, itu2) = -0.19693072e-04_realType
+            tup2(14, itu2) = 0.41848783e-04_realType
+            tup2(15, itu2) = 0.45878641e-04_realType
+            tup2(16, itu2) = 0.34700119e-04_realType
+            tup2(17, itu2) = 0.23057037e-04_realType
+            tup2(18, itu2) = 0.14420758e-04_realType
+            tup2(19, itu2) = 0.87404760e-05_realType
+            tup2(20, itu2) = 0.52127418e-05_realType
+            tup2(21, itu2) = 0.30833200e-05_realType
+            tup2(22, itu2) = 0.18189328e-05_realType
+            tup2(23, itu2) = 0.10729534e-05_realType
+            tup2(24, itu2) = 0.63476327e-06_realType
+            tup2(25, itu2) = 0.37690133e-06_realType
+            tup2(26, itu2) = 0.22497460e-06_realType
+            tup2(27, itu2) = 0.13510084e-06_realType
+            tup2(28, itu2) = 0.81632875e-07_realType
+            tup2(29, itu2) = 0.49681361e-07_realType
+            tup2(30, itu2) = 0.30447133e-07_realType
+            tup2(31, itu2) = 0.18796936e-07_realType
+            tup2(32, itu2) = 0.11683669e-07_realType
+            tup2(33, itu2) = 0.73153823e-08_realType
+            tup2(34, itu2) = 0.46047384e-08_realType
+
+            tup3(1, itu2) = 0.24128568e-01_realType
+            tup3(2, itu2) = 0.13760338e-01_realType
+            tup3(3, itu2) = 0.74572108e-02_realType
+            tup3(4, itu2) = 0.39055025e-02_realType
+            tup3(5, itu2) = 0.19387640e-02_realType
+            tup3(6, itu2) = 0.79307164e-03_realType
+            tup3(7, itu2) = -0.16312244e-02_realType
+            tup3(8, itu2) = -0.12641914e-02_realType
+            tup3(9, itu2) = -0.37976445e-03_realType
+            tup3(10, itu2) = 0.47821584e-04_realType
+            tup3(11, itu2) = 0.80186189e-04_realType
+            tup3(12, itu2) = 0.36078436e-04_realType
+            tup3(13, itu2) = 0.10994524e-04_realType
+            tup3(14, itu2) = 0.23244487e-05_realType
+            tup3(15, itu2) = 0.52508614e-07_realType
+            tup3(16, itu2) = -0.32196650e-06_realType
+            tup3(17, itu2) = -0.26554053e-06_realType
+            tup3(18, itu2) = -0.16334363e-06_realType
+            tup3(19, itu2) = -0.90357098e-07_realType
+            tup3(20, itu2) = -0.47678861e-07_realType
+            tup3(21, itu2) = -0.24549302e-07_realType
+            tup3(22, itu2) = -0.12513680e-07_realType
+            tup3(23, itu2) = -0.63437513e-08_realType
+            tup3(24, itu2) = -0.32238513e-08_realType
+            tup3(25, itu2) = -0.16413320e-08_realType
+            tup3(26, itu2) = -0.84056640e-09_realType
+            tup3(27, itu2) = -0.43372978e-09_realType
+            tup3(28, itu2) = -0.22528033e-09_realType
+            tup3(29, itu2) = -0.11820379e-09_realType
+            tup3(30, itu2) = -0.62596081e-10_realType
+            tup3(31, itu2) = -0.33528145e-10_realType
+            tup3(32, itu2) = -0.18147943e-10_realType
+            tup3(33, itu2) = -0.99793712e-11_realType
+            tup3(34, itu2) = -0.55553743e-11_realType
+
+            ! Constants for v2.
+
+            tup0(1, itu3) = 0.47502000e-06_realType
+            tup0(2, itu3) = 0.89181800e-05_realType
+            tup0(3, itu3) = 0.62272000e-04_realType
+            tup0(4, itu3) = 0.29003500e-03_realType
+            tup0(5, itu3) = 0.10734400e-02_realType
+            tup0(6, itu3) = 0.34009200e-02_realType
+            tup0(7, itu3) = 0.95514600e-02_realType
+            tup0(8, itu3) = 0.23917300e-01_realType
+            tup0(9, itu3) = 0.52481200e-01_realType
+            tup0(10, itu3) = 0.99458700e-01_realType
+            tup0(11, itu3) = 0.16446900e+00_realType
+            tup0(12, itu3) = 0.24385400e+00_realType
+            tup0(13, itu3) = 0.33343500e+00_realType
+            tup0(14, itu3) = 0.42968800e+00_realType
+            tup0(15, itu3) = 0.52978700e+00_realType
+            tup0(16, itu3) = 0.63143400e+00_realType
+            tup0(17, itu3) = 0.73270600e+00_realType
+            tup0(18, itu3) = 0.83194900e+00_realType
+            tup0(19, itu3) = 0.92772000e+00_realType
+            tup0(20, itu3) = 0.10187500e+01_realType
+            tup0(21, itu3) = 0.11039500e+01_realType
+            tup0(22, itu3) = 0.11823500e+01_realType
+            tup0(23, itu3) = 0.12531300e+01_realType
+            tup0(24, itu3) = 0.13155900e+01_realType
+            tup0(25, itu3) = 0.13691300e+01_realType
+            tup0(26, itu3) = 0.14132300e+01_realType
+            tup0(27, itu3) = 0.14474100e+01_realType
+            tup0(28, itu3) = 0.14712200e+01_realType
+            tup0(29, itu3) = 0.14842200e+01_realType
+            tup0(30, itu3) = 0.14859400e+01_realType
+            tup0(31, itu3) = 0.14758600e+01_realType
+            tup0(32, itu3) = 0.14533600e+01_realType
+            tup0(33, itu3) = 0.14177100e+01_realType
+            tup0(34, itu3) = 0.13680600e+01_realType
+
+            tup1(1, itu3) = 0.15655101e-04_realType
+            tup1(2, itu3) = 0.84672884e-04_realType
+            tup1(3, itu3) = 0.30186555e-03_realType
+            tup1(4, itu3) = 0.85507420e-03_realType
+            tup1(5, itu3) = 0.20814582e-02_realType
+            tup1(6, itu3) = 0.45090575e-02_realType
+            tup1(7, itu3) = 0.87126156e-02_realType
+            tup1(8, itu3) = 0.14620352e-01_realType
+            tup1(9, itu3) = 0.20720514e-01_realType
+            tup1(10, itu3) = 0.24845927e-01_realType
+            tup1(11, itu3) = 0.26022000e-01_realType
+            tup1(12, itu3) = 0.24837349e-01_realType
+            tup1(13, itu3) = 0.22373465e-01_realType
+            tup1(14, itu3) = 0.19439830e-01_realType
+            tup1(15, itu3) = 0.16489656e-01_realType
+            tup1(16, itu3) = 0.13744920e-01_realType
+            tup1(17, itu3) = 0.11298275e-01_realType
+            tup1(18, itu3) = 0.91742368e-02_realType
+            tup1(19, itu3) = 0.73635310e-02_realType
+            tup1(20, itu3) = 0.58414654e-02_realType
+            tup1(21, itu3) = 0.45756734e-02_realType
+            tup1(22, itu3) = 0.35325598e-02_realType
+            tup1(23, itu3) = 0.26801843e-02_realType
+            tup1(24, itu3) = 0.19885487e-02_realType
+            tup1(25, itu3) = 0.14310001e-02_realType
+            tup1(26, itu3) = 0.98391151e-03_realType
+            tup1(27, itu3) = 0.62700704e-03_realType
+            tup1(28, itu3) = 0.34339929e-03_realType
+            tup1(29, itu3) = 0.11882947e-03_realType
+            tup1(30, itu3) = -0.58565974e-04_realType
+            tup1(31, itu3) = -0.19862463e-03_realType
+            tup1(32, itu3) = -0.30936281e-03_realType
+            tup1(33, itu3) = -0.39707108e-03_realType
+            tup1(34, itu3) = -0.46709546e-03_realType
+
+            tup2(1, itu3) = -0.11515786e-03_realType
+            tup2(2, itu3) = -0.19649780e-03_realType
+            tup2(3, itu3) = -0.28531197e-03_realType
+            tup2(4, itu3) = -0.35674865e-03_realType
+            tup2(5, itu3) = -0.35499057e-03_realType
+            tup2(6, itu3) = -0.96188825e-04_realType
+            tup2(7, itu3) = 0.68706040e-03_realType
+            tup2(8, itu3) = 0.16408011e-02_realType
+            tup2(9, itu3) = 0.17663922e-02_realType
+            tup2(10, itu3) = 0.10532277e-02_realType
+            tup2(11, itu3) = 0.31539555e-03_realType
+            tup2(12, itu3) = -0.69677717e-04_realType
+            tup2(13, itu3) = -0.19632838e-03_realType
+            tup2(14, itu3) = -0.20683711e-03_realType
+            tup2(15, itu3) = -0.17733514e-03_realType
+            tup2(16, itu3) = -0.13942213e-03_realType
+            tup2(17, itu3) = -0.10497064e-03_realType
+            tup2(18, itu3) = -0.77063300e-04_realType
+            tup2(19, itu3) = -0.55728557e-04_realType
+            tup2(20, itu3) = -0.39733207e-04_realType
+            tup2(21, itu3) = -0.28198531e-04_realType
+            tup2(22, itu3) = -0.19838008e-04_realType
+            tup2(23, itu3) = -0.13886459e-04_realType
+            tup2(24, itu3) = -0.96805370e-05_realType
+            tup2(25, itu3) = -0.67138161e-05_realType
+            tup2(26, itu3) = -0.46515653e-05_realType
+            tup2(27, itu3) = -0.32158971e-05_realType
+            tup2(28, itu3) = -0.22162473e-05_realType
+            tup2(29, itu3) = -0.15270107e-05_realType
+            tup2(30, itu3) = -0.10497157e-05_realType
+            tup2(31, itu3) = -0.72432186e-06_realType
+            tup2(32, itu3) = -0.50175766e-06_realType
+            tup2(33, itu3) = -0.34544811e-06_realType
+            tup2(34, itu3) = -0.24279564e-06_realType
+
+            tup3(1, itu3) = 0.46422002e-03_realType
+            tup3(2, itu3) = 0.75115602e-03_realType
+            tup3(3, itu3) = 0.10424485e-02_realType
+            tup3(4, itu3) = 0.12956018e-02_realType
+            tup3(5, itu3) = 0.14483411e-02_realType
+            tup3(6, itu3) = 0.13404586e-02_realType
+            tup3(7, itu3) = 0.80055685e-03_realType
+            tup3(8, itu3) = 0.95237809e-04_realType
+            tup3(9, itu3) = -0.24584932e-03_realType
+            tup3(10, itu3) = -0.21878056e-03_realType
+            tup3(11, itu3) = -0.11092088e-03_realType
+            tup3(12, itu3) = -0.46194706e-04_realType
+            tup3(13, itu3) = -0.18290511e-04_realType
+            tup3(14, itu3) = -0.71643534e-05_realType
+            tup3(15, itu3) = -0.27476585e-05_realType
+            tup3(16, itu3) = -0.10108088e-05_realType
+            tup3(17, itu3) = -0.32593663e-06_realType
+            tup3(18, itu3) = -0.66048306e-07_realType
+            tup3(19, itu3) = 0.26645554e-07_realType
+            tup3(20, itu3) = 0.44215925e-07_realType
+            tup3(21, itu3) = 0.44161326e-07_realType
+            tup3(22, itu3) = 0.34660614e-07_realType
+            tup3(23, itu3) = 0.25119801e-07_realType
+            tup3(24, itu3) = 0.17432238e-07_realType
+            tup3(25, itu3) = 0.11470519e-07_realType
+            tup3(26, itu3) = 0.75110077e-08_realType
+            tup3(27, itu3) = 0.48557801e-08_realType
+            tup3(28, itu3) = 0.30570649e-08_realType
+            tup3(29, itu3) = 0.19141510e-08_realType
+            tup3(30, itu3) = 0.11593848e-08_realType
+            tup3(31, itu3) = 0.70447835e-09_realType
+            tup3(32, itu3) = 0.42929522e-09_realType
+            tup3(33, itu3) = 0.23103965e-09_realType
+            tup3(34, itu3) = 0.13124926e-09_realType
+
+            ! Constants for f.
+
+            tup0(1, itu4) = -0.33990e-02_realType
+            tup0(2, itu4) = -0.32796e-02_realType
+            tup0(3, itu4) = -0.31317e-02_realType
+            tup0(4, itu4) = -0.29501e-02_realType
+            tup0(5, itu4) = -0.27297e-02_realType
+            tup0(6, itu4) = -0.24652e-02_realType
+            tup0(7, itu4) = -0.21525e-02_realType
+            tup0(8, itu4) = -0.17911e-02_realType
+            tup0(9, itu4) = -0.13852e-02_realType
+            tup0(10, itu4) = -0.95384e-03_realType
+            tup0(11, itu4) = -0.52641e-03_realType
+            tup0(12, itu4) = -0.13576e-03_realType
+            tup0(13, itu4) = 0.21413e-03_realType
+            tup0(14, itu4) = 0.52393e-03_realType
+            tup0(15, itu4) = 0.79302e-03_realType
+            tup0(16, itu4) = 0.10205e-02_realType
+            tup0(17, itu4) = 0.12056e-02_realType
+            tup0(18, itu4) = 0.13485e-02_realType
+            tup0(19, itu4) = 0.14505e-02_realType
+            tup0(20, itu4) = 0.15137e-02_realType
+            tup0(21, itu4) = 0.15414e-02_realType
+            tup0(22, itu4) = 0.15379e-02_realType
+            tup0(23, itu4) = 0.15079e-02_realType
+            tup0(24, itu4) = 0.14563e-02_realType
+            tup0(25, itu4) = 0.13881e-02_realType
+            tup0(26, itu4) = 0.13079e-02_realType
+            tup0(27, itu4) = 0.12200e-02_realType
+            tup0(28, itu4) = 0.11280e-02_realType
+            tup0(29, itu4) = 0.10348e-02_realType
+            tup0(30, itu4) = 0.94297e-03_realType
+            tup0(31, itu4) = 0.85414e-03_realType
+            tup0(32, itu4) = 0.76956e-03_realType
+            tup0(33, itu4) = 0.68990e-03_realType
+            tup0(34, itu4) = 0.61535e-03_realType
+
+            tup1(1, itu4) = 0.36489e-03_realType
+            tup1(2, itu4) = 0.35429e-03_realType
+            tup1(3, itu4) = 0.34223e-03_realType
+            tup1(4, itu4) = 0.32877e-03_realType
+            tup1(5, itu4) = 0.31384e-03_realType
+            tup1(6, itu4) = 0.29698e-03_realType
+            tup1(7, itu4) = 0.27691e-03_realType
+            tup1(8, itu4) = 0.25273e-03_realType
+            tup1(9, itu4) = 0.22211e-03_realType
+            tup1(10, itu4) = 0.18427e-03_realType
+            tup1(11, itu4) = 0.14258e-03_realType
+            tup1(12, itu4) = 0.10527e-03_realType
+            tup1(13, itu4) = 0.76806e-04_realType
+            tup1(14, itu4) = 0.55424e-04_realType
+            tup1(15, itu4) = 0.39245e-04_realType
+            tup1(16, itu4) = 0.27022e-04_realType
+            tup1(17, itu4) = 0.17875e-04_realType
+            tup1(18, itu4) = 0.11139e-04_realType
+            tup1(19, itu4) = 0.62935e-05_realType
+            tup1(20, itu4) = 0.29155e-05_realType
+            tup1(21, itu4) = 0.65684e-06_realType
+            tup1(22, itu4) = -0.76742e-06_realType
+            tup1(23, itu4) = -0.15875e-05_realType
+            tup1(24, itu4) = -0.19855e-05_realType
+            tup1(25, itu4) = -0.21022e-05_realType
+            tup1(26, itu4) = -0.20422e-05_realType
+            tup1(27, itu4) = -0.18806e-05_realType
+            tup1(28, itu4) = -0.16696e-05_realType
+            tup1(29, itu4) = -0.14434e-05_realType
+            tup1(30, itu4) = -0.12233e-05_realType
+            tup1(31, itu4) = -0.10215e-05_realType
+            tup1(32, itu4) = -0.84412e-06_realType
+            tup1(33, itu4) = -0.69337e-06_realType
+            tup1(34, itu4) = -0.56947e-06_realType
+
+            tup2(1, itu4) = -0.66773e-05_realType
+            tup2(2, itu4) = -0.65637e-05_realType
+            tup2(3, itu4) = -0.61090e-05_realType
+            tup2(4, itu4) = -0.54483e-05_realType
+            tup2(5, itu4) = -0.44808e-05_realType
+            tup2(6, itu4) = -0.34229e-05_realType
+            tup2(7, itu4) = -0.36764e-05_realType
+            tup2(8, itu4) = -0.25226e-05_realType
+            tup2(9, itu4) = -0.39077e-05_realType
+            tup2(10, itu4) = -0.50583e-05_realType
+            tup2(11, itu4) = -0.63087e-05_realType
+            tup2(12, itu4) = -0.41839e-05_realType
+            tup2(13, itu4) = -0.25527e-05_realType
+            tup2(14, itu4) = -0.15942e-05_realType
+            tup2(15, itu4) = -0.10087e-05_realType
+            tup2(16, itu4) = -0.64089e-06_realType
+            tup2(17, itu4) = -0.40493e-06_realType
+            tup2(18, itu4) = -0.25305e-06_realType
+            tup2(19, itu4) = -0.15515e-06_realType
+            tup2(20, itu4) = -0.92540e-07_realType
+            tup2(21, itu4) = -0.53318e-07_realType
+            tup2(22, itu4) = -0.29160e-07_realType
+            tup2(23, itu4) = -0.14786e-07_realType
+            tup2(24, itu4) = -0.65163e-08_realType
+            tup2(25, itu4) = -0.20578e-08_realType
+            tup2(26, itu4) = 0.19849e-09_realType
+            tup2(27, itu4) = 0.11643e-08_realType
+            tup2(28, itu4) = 0.14562e-08_realType
+            tup2(29, itu4) = 0.14088e-08_realType
+            tup2(30, itu4) = 0.12218e-08_realType
+            tup2(31, itu4) = 0.99318e-09_realType
+            tup2(32, itu4) = 0.77547e-09_realType
+            tup2(33, itu4) = 0.58737e-09_realType
+            tup2(34, itu4) = 0.43430e-09_realType
+
+            tup3(1, itu4) = -0.18797e-04_realType
+            tup3(2, itu4) = -0.12078e-04_realType
+            tup3(3, itu4) = -0.78817e-05_realType
+            tup3(4, itu4) = -0.53414e-05_realType
+            tup3(5, itu4) = -0.40995e-05_realType
+            tup3(6, itu4) = -0.36062e-05_realType
+            tup3(7, itu4) = -0.25946e-05_realType
+            tup3(8, itu4) = -0.26029e-05_realType
+            tup3(9, itu4) = -0.16491e-05_realType
+            tup3(10, itu4) = -0.78717e-06_realType
+            tup3(11, itu4) = 0.86868e-07_realType
+            tup3(12, itu4) = 0.87323e-07_realType
+            tup3(13, itu4) = 0.40496e-07_realType
+            tup3(14, itu4) = 0.21155e-07_realType
+            tup3(15, itu4) = 0.12152e-07_realType
+            tup3(16, itu4) = 0.74005e-08_realType
+            tup3(17, itu4) = 0.45651e-08_realType
+            tup3(18, itu4) = 0.28215e-08_realType
+            tup3(19, itu4) = 0.17171e-08_realType
+            tup3(20, itu4) = 0.10180e-08_realType
+            tup3(21, itu4) = 0.59166e-09_realType
+            tup3(22, itu4) = 0.33318e-09_realType
+            tup3(23, itu4) = 0.18290e-09_realType
+            tup3(24, itu4) = 0.96740e-10_realType
+            tup3(25, itu4) = 0.49927e-10_realType
+            tup3(26, itu4) = 0.24485e-10_realType
+            tup3(27, itu4) = 0.11513e-10_realType
+            tup3(28, itu4) = 0.49778e-11_realType
+            tup3(29, itu4) = 0.18786e-11_realType
+            tup3(30, itu4) = 0.46227e-12_realType
+            tup3(31, itu4) = -0.11281e-12_realType
+            tup3(32, itu4) = -0.31872e-12_realType
+            tup3(33, itu4) = -0.36442e-12_realType
+            tup3(34, itu4) = -0.35534e-12_realType
+
+            ! Constants for nut.
+
+            tup0(1, itu5) = 0.11403e-05_realType
+            tup0(2, itu5) = 0.22598e-04_realType
+            tup0(3, itu5) = 0.16840e-03_realType
+            tup0(4, itu5) = 0.84569e-03_realType
+            tup0(5, itu5) = 0.33954e-02_realType
+            tup0(6, itu5) = 0.11611e-01_realType
+            tup0(7, itu5) = 0.34239e-01_realType
+            tup0(8, itu5) = 0.12517e+00_realType
+            tup0(9, itu5) = 0.35370e+00_realType
+            tup0(10, itu5) = 0.78688e+00_realType
+            tup0(11, itu5) = 0.14743e+01_realType
+            tup0(12, itu5) = 0.24581e+01_realType
+            tup0(13, itu5) = 0.37861e+01_realType
+            tup0(14, itu5) = 0.55146e+01_realType
+            tup0(15, itu5) = 0.77089e+01_realType
+            tup0(16, itu5) = 0.10444e+02_realType
+            tup0(17, itu5) = 0.13801e+02_realType
+            tup0(18, itu5) = 0.17873e+02_realType
+            tup0(19, itu5) = 0.22754e+02_realType
+            tup0(20, itu5) = 0.28545e+02_realType
+            tup0(21, itu5) = 0.35342e+02_realType
+            tup0(22, itu5) = 0.43237e+02_realType
+            tup0(23, itu5) = 0.52301e+02_realType
+            tup0(24, itu5) = 0.62582e+02_realType
+            tup0(25, itu5) = 0.74081e+02_realType
+            tup0(26, itu5) = 0.86742e+02_realType
+            tup0(27, itu5) = 0.10042e+03_realType
+            tup0(28, itu5) = 0.11489e+03_realType
+            tup0(29, itu5) = 0.12977e+03_realType
+            tup0(30, itu5) = 0.14455e+03_realType
+            tup0(31, itu5) = 0.15854e+03_realType
+            tup0(32, itu5) = 0.17088e+03_realType
+            tup0(33, itu5) = 0.18051e+03_realType
+            tup0(34, itu5) = 0.18619e+03_realType
+
+            tup1(1, itu5) = 0.38376e-04_realType
+            tup1(2, itu5) = 0.22169e-03_realType
+            tup1(3, itu5) = 0.85495e-03_realType
+            tup1(4, itu5) = 0.26396e-02_realType
+            tup1(5, itu5) = 0.69671e-02_realType
+            tup1(6, itu5) = 0.15867e-01_realType
+            tup1(7, itu5) = 0.46643e-01_realType
+            tup1(8, itu5) = 0.10523e+00_realType
+            tup1(9, itu5) = 0.17554e+00_realType
+            tup1(10, itu5) = 0.24044e+00_realType
+            tup1(11, itu5) = 0.29128e+00_realType
+            tup1(12, itu5) = 0.32865e+00_realType
+            tup1(13, itu5) = 0.35586e+00_realType
+            tup1(14, itu5) = 0.37557e+00_realType
+            tup1(15, itu5) = 0.38958e+00_realType
+            tup1(16, itu5) = 0.39905e+00_realType
+            tup1(17, itu5) = 0.40478e+00_realType
+            tup1(18, itu5) = 0.40725e+00_realType
+            tup1(19, itu5) = 0.40676e+00_realType
+            tup1(20, itu5) = 0.40343e+00_realType
+            tup1(21, itu5) = 0.39729e+00_realType
+            tup1(22, itu5) = 0.38827e+00_realType
+            tup1(23, itu5) = 0.37621e+00_realType
+            tup1(24, itu5) = 0.36094e+00_realType
+            tup1(25, itu5) = 0.34229e+00_realType
+            tup1(26, itu5) = 0.32008e+00_realType
+            tup1(27, itu5) = 0.29419e+00_realType
+            tup1(28, itu5) = 0.26461e+00_realType
+            tup1(29, itu5) = 0.23139e+00_realType
+            tup1(30, itu5) = 0.19476e+00_realType
+            tup1(31, itu5) = 0.15509e+00_realType
+            tup1(32, itu5) = 0.11289e+00_realType
+            tup1(33, itu5) = 0.68853e-01_realType
+            tup1(34, itu5) = 0.23785e-01_realType
+
+            tup2(1, itu5) = -0.31406e-03_realType
+            tup2(2, itu5) = -0.62688e-03_realType
+            tup2(3, itu5) = -0.10783e-02_realType
+            tup2(4, itu5) = -0.15394e-02_realType
+            tup2(5, itu5) = -0.13969e-02_realType
+            tup2(6, itu5) = -0.14454e-01_realType
+            tup2(7, itu5) = 0.23424e-02_realType
+            tup2(8, itu5) = 0.12646e-01_realType
+            tup2(9, itu5) = 0.15053e-01_realType
+            tup2(10, itu5) = 0.11222e-01_realType
+            tup2(11, itu5) = 0.69106e-02_realType
+            tup2(12, itu5) = 0.41344e-02_realType
+            tup2(13, itu5) = 0.25022e-02_realType
+            tup2(14, itu5) = 0.15280e-02_realType
+            tup2(15, itu5) = 0.93303e-03_realType
+            tup2(16, itu5) = 0.55503e-03_realType
+            tup2(17, itu5) = 0.32030e-03_realType
+            tup2(18, itu5) = 0.16369e-03_realType
+            tup2(19, itu5) = 0.59340e-04_realType
+            tup2(20, itu5) = -0.10107e-04_realType
+            tup2(21, itu5) = -0.60320e-04_realType
+            tup2(22, itu5) = -0.93922e-04_realType
+            tup2(23, itu5) = -0.12076e-03_realType
+            tup2(24, itu5) = -0.13968e-03_realType
+            tup2(25, itu5) = -0.15343e-03_realType
+            tup2(26, itu5) = -0.16392e-03_realType
+            tup2(27, itu5) = -0.17035e-03_realType
+            tup2(28, itu5) = -0.17361e-03_realType
+            tup2(29, itu5) = -0.17458e-03_realType
+            tup2(30, itu5) = -0.17152e-03_realType
+            tup2(31, itu5) = -0.16658e-03_realType
+            tup2(32, itu5) = -0.15883e-03_realType
+            tup2(33, itu5) = -0.14875e-03_realType
+            tup2(34, itu5) = -0.13710e-03_realType
+
+            tup3(1, itu5) = 0.11903e-02_realType
+            tup3(2, itu5) = 0.21637e-02_realType
+            tup3(3, itu5) = 0.33791e-02_realType
+            tup3(4, itu5) = 0.45914e-02_realType
+            tup3(5, itu5) = 0.50750e-02_realType
+            tup3(6, itu5) = 0.17668e-01_realType
+            tup3(7, itu5) = 0.95211e-02_realType
+            tup3(8, itu5) = 0.32633e-02_realType
+            tup3(9, itu5) = 0.15990e-03_realType
+            tup3(10, itu5) = -0.34910e-03_realType
+            tup3(11, itu5) = -0.21152e-03_realType
+            tup3(12, itu5) = -0.10676e-03_realType
+            tup3(13, itu5) = -0.58327e-04_realType
+            tup3(14, itu5) = -0.35522e-04_realType
+            tup3(15, itu5) = -0.24007e-04_realType
+            tup3(16, itu5) = -0.16925e-04_realType
+            tup3(17, itu5) = -0.13086e-04_realType
+            tup3(18, itu5) = -0.10268e-04_realType
+            tup3(19, itu5) = -0.82175e-05_realType
+            tup3(20, itu5) = -0.67361e-05_realType
+            tup3(21, itu5) = -0.54809e-05_realType
+            tup3(22, itu5) = -0.45452e-05_realType
+            tup3(23, itu5) = -0.36958e-05_realType
+            tup3(24, itu5) = -0.30062e-05_realType
+            tup3(25, itu5) = -0.24308e-05_realType
+            tup3(26, itu5) = -0.19327e-05_realType
+            tup3(27, itu5) = -0.15225e-05_realType
+            tup3(28, itu5) = -0.11825e-05_realType
+            tup3(29, itu5) = -0.89374e-06_realType
+            tup3(30, itu5) = -0.66991e-06_realType
+            tup3(31, itu5) = -0.48509e-06_realType
+            tup3(32, itu5) = -0.34165e-06_realType
+            tup3(33, itu5) = -0.23235e-06_realType
+            tup3(34, itu5) = -0.14885e-06_realType
+
+            !===============================================================
+
+        case (6_intType)
+
+            ! Version 6 of the model.
+
+            ! Set the number of data points and allocate the memory for
+            ! the arrays of the curve fits.
+
+            nFit = 34
+
+            allocate (ypT(0:nFit), reT(0:nFit), &
+                      up0(nFit), up1(nFit), up2(nFit), up3(nFit), &
+                      tup0(nFit, nt1:nt2 + 1), tup1(nFit, nt1:nt2 + 1), &
+                      tup2(nFit, nt1:nt2 + 1), tup3(nFit, nt1:nt2 + 1), &
+                      tuLogFit(nt1:nt2 + 1), stat=ierr)
+            if (ierr /= 0) &
+                 call terminate("initCurveFitDataVf", &
+                 "Memory allocation failure for curve fit &
+                 &coefficients")
+
+            ! Set the values of the Reynolds numbers at interval
+            ! boundaries.
+
+            reT(0) = 0.13341_realType
+            reT(1) = 0.47908_realType
+            reT(2) = 1.2328_realType
+            reT(3) = 2.6968_realType
+            reT(4) = 5.3545_realType
+            reT(5) = 9.9485_realType
+            reT(6) = 17.557_realType
+            reT(7) = 29.618_realType
+            reT(8) = 47.649_realType
+            reT(9) = 73.015_realType
+            reT(10) = 107.22_realType
+            reT(11) = 152.01_realType
+            reT(12) = 209.51_realType
+            reT(13) = 282.32_realType
+            reT(14) = 373.64_realType
+            reT(15) = 487.34_realType
+            reT(16) = 628.02_realType
+            reT(17) = 801.15_realType
+            reT(18) = 1013.1_realType
+            reT(19) = 1271.3_realType
+            reT(20) = 1584.4_realType
+            reT(21) = 1962.4_realType
+            reT(22) = 2416.6_realType
+            reT(23) = 2960.4_realType
+            reT(24) = 3608.7_realType
+            reT(25) = 4378.7_realType
+            reT(26) = 5289.8_realType
+            reT(27) = 6364.3_realType
+            reT(28) = 7627.3_realType
+            reT(29) = 9107.0_realType
+            reT(30) = 10835.0_realType
+            reT(31) = 12849.0_realType
+            reT(32) = 15187.0_realType
+            reT(33) = 17896.0_realType
+            reT(34) = 21028.0_realType
+
+            ! Set the values of the y+ values at interval boundaries.
+
+            ypT(0) = 0.36526_realType
+            ypT(1) = 0.69218_realType
+            ypT(2) = 1.1105_realType
+            ypT(3) = 1.6431_realType
+            ypT(4) = 2.3179_realType
+            ypT(5) = 3.169_realType
+            ypT(6) = 4.2373_realType
+            ypT(7) = 5.5723_realType
+            ypT(8) = 7.2331_realType
+            ypT(9) = 9.2902_realType
+            ypT(10) = 11.827_realType
+            ypT(11) = 14.943_realType
+            ypT(12) = 18.755_realType
+            ypT(13) = 23.398_realType
+            ypT(14) = 29.033_realType
+            ypT(15) = 35.843_realType
+            ypT(16) = 44.045_realType
+            ypT(17) = 53.884_realType
+            ypT(18) = 65.646_realType
+            ypT(19) = 79.656_realType
+            ypT(20) = 96.285_realType
+            ypT(21) = 115.96_realType
+            ypT(22) = 139.15_realType
+            ypT(23) = 166.4_realType
+            ypT(24) = 198.32_realType
+            ypT(25) = 235.59_realType
+            ypT(26) = 278.96_realType
+            ypT(27) = 329.31_realType
+            ypT(28) = 387.55_realType
+            ypT(29) = 454.74_realType
+            ypT(30) = 532.04_realType
+            ypT(31) = 620.71_realType
+            ypT(32) = 722.14_realType
+            ypT(33) = 837.87_realType
+            ypT(34) = 969.57_realType
+
+            ! Set the values of constants for the cubic fits of the
+            ! non-dimensional tangential velocity.
+
+            up0(1) = 0.36525e+00_realType
+            up0(2) = 0.69214e+00_realType
+            up0(3) = 0.11102e+01_realType
+            up0(4) = 0.16413e+01_realType
+            up0(5) = 0.23101e+01_realType
+            up0(6) = 0.31393e+01_realType
+            up0(7) = 0.41434e+01_realType
+            up0(8) = 0.53152e+01_realType
+            up0(9) = 0.65876e+01_realType
+            up0(10) = 0.78593e+01_realType
+            up0(11) = 0.90657e+01_realType
+            up0(12) = 0.10173e+02_realType
+            up0(13) = 0.11171e+02_realType
+            up0(14) = 0.12066e+02_realType
+            up0(15) = 0.12870e+02_realType
+            up0(16) = 0.13596e+02_realType
+            up0(17) = 0.14259e+02_realType
+            up0(18) = 0.14868e+02_realType
+            up0(19) = 0.15433e+02_realType
+            up0(20) = 0.15960e+02_realType
+            up0(21) = 0.16455e+02_realType
+            up0(22) = 0.16923e+02_realType
+            up0(23) = 0.17367e+02_realType
+            up0(24) = 0.17791e+02_realType
+            up0(25) = 0.18197e+02_realType
+            up0(26) = 0.18586e+02_realType
+            up0(27) = 0.18962e+02_realType
+            up0(28) = 0.19327e+02_realType
+            up0(29) = 0.19681e+02_realType
+            up0(30) = 0.20027e+02_realType
+            up0(31) = 0.20366e+02_realType
+            up0(32) = 0.20700e+02_realType
+            up0(33) = 0.21030e+02_realType
+            up0(34) = 0.21359e+02_realType
+
+            up1(1) = 0.12450e+01_realType
+            up1(2) = 0.67756e+00_realType
+            up1(3) = 0.42800e+00_realType
+            up1(4) = 0.29111e+00_realType
+            up1(5) = 0.20657e+00_realType
+            up1(6) = 0.15024e+00_realType
+            up1(7) = 0.11062e+00_realType
+            up1(8) = 0.81225e-01_realType
+            up1(9) = 0.58625e-01_realType
+            up1(10) = 0.41597e-01_realType
+            up1(11) = 0.29283e-01_realType
+            up1(12) = 0.20582e-01_realType
+            up1(13) = 0.14529e-01_realType
+            up1(14) = 0.10350e-01_realType
+            up1(15) = 0.74648e-02_realType
+            up1(16) = 0.54606e-02_realType
+            up1(17) = 0.40524e-02_realType
+            up1(18) = 0.30487e-02_realType
+            up1(19) = 0.23228e-02_realType
+            up1(20) = 0.17899e-02_realType
+            up1(21) = 0.13940e-02_realType
+            up1(22) = 0.10960e-02_realType
+            up1(23) = 0.86932e-03_realType
+            up1(24) = 0.69545e-03_realType
+            up1(25) = 0.56075e-03_realType
+            up1(26) = 0.45552e-03_realType
+            up1(27) = 0.37282e-03_realType
+            up1(28) = 0.30742e-03_realType
+            up1(29) = 0.25530e-03_realType
+            up1(30) = 0.21352e-03_realType
+            up1(31) = 0.17992e-03_realType
+            up1(32) = 0.15273e-03_realType
+            up1(33) = 0.13061e-03_realType
+            up1(34) = 0.11257e-03_realType
+
+            up2(1) = -0.95638e+00_realType
+            up2(2) = -0.15826e+00_realType
+            up2(3) = -0.40071e-01_realType
+            up2(4) = -0.12775e-01_realType
+            up2(5) = -0.47553e-02_realType
+            up2(6) = -0.19994e-02_realType
+            up2(7) = -0.91167e-03_realType
+            up2(8) = -0.51977e-03_realType
+            up2(9) = -0.33276e-03_realType
+            up2(10) = -0.19526e-03_realType
+            up2(11) = -0.11176e-03_realType
+            up2(12) = -0.62687e-04_realType
+            up2(13) = -0.34811e-04_realType
+            up2(14) = -0.19255e-04_realType
+            up2(15) = -0.10712e-04_realType
+            up2(16) = -0.60184e-05_realType
+            up2(17) = -0.34485e-05_realType
+            up2(18) = -0.20101e-05_realType
+            up2(19) = -0.11974e-05_realType
+            up2(20) = -0.72812e-06_realType
+            up2(21) = -0.44763e-06_realType
+            up2(22) = -0.28271e-06_realType
+            up2(23) = -0.17940e-06_realType
+            up2(24) = -0.11595e-06_realType
+            up2(25) = -0.75702e-07_realType
+            up2(26) = -0.50362e-07_realType
+            up2(27) = -0.33465e-07_realType
+            up2(28) = -0.22609e-07_realType
+            up2(29) = -0.15431e-07_realType
+            up2(30) = -0.10636e-07_realType
+            up2(31) = -0.73064e-08_realType
+            up2(32) = -0.51550e-08_realType
+            up2(33) = -0.35885e-08_realType
+            up2(34) = -0.25596e-08_realType
+
+            up3(1) = 0.26155e+00_realType
+            up3(2) = -0.64433e-02_realType
+            up3(3) = -0.30415e-02_realType
+            up3(4) = -0.78504e-03_realType
+            up3(5) = -0.19960e-03_realType
+            up3(6) = -0.52972e-04_realType
+            up3(7) = -0.16968e-04_realType
+            up3(8) = -0.39545e-05_realType
+            up3(9) = -0.75520e-07_realType
+            up3(10) = 0.29768e-06_realType
+            up3(11) = 0.21782e-06_realType
+            up3(12) = 0.11651e-06_realType
+            up3(13) = 0.55958e-07_realType
+            up3(14) = 0.25252e-07_realType
+            up3(15) = 0.11133e-07_realType
+            up3(16) = 0.48030e-08_realType
+            up3(17) = 0.21171e-08_realType
+            up3(18) = 0.93600e-09_realType
+            up3(19) = 0.42750e-09_realType
+            up3(20) = 0.20397e-09_realType
+            up3(21) = 0.94244e-10_realType
+            up3(22) = 0.48769e-10_realType
+            up3(23) = 0.23922e-10_realType
+            up3(24) = 0.12409e-10_realType
+            up3(25) = 0.63795e-11_realType
+            up3(26) = 0.36413e-11_realType
+            up3(27) = 0.18837e-11_realType
+            up3(28) = 0.10413e-11_realType
+            up3(29) = 0.59232e-12_realType
+            up3(30) = 0.35361e-12_realType
+            up3(31) = 0.18262e-12_realType
+            up3(32) = 0.12163e-12_realType
+            up3(33) = 0.63587e-13_realType
+            up3(34) = 0.41966e-13_realType
+
+            ! Set the values of tuLogFit. All variables have been
+            ! fitted linearly; the fifth variable is the eddy viscosity.
+
+            tuLogFit(itu1) = .false.
+            tuLogFit(itu2) = .false.
+            tuLogFit(itu3) = .false.
+            tuLogFit(itu4) = .false.
+            tuLogFit(itu5) = .false.
+
+            ! Set the values of constants for the cubic fits of the
+            ! non-dimensional k, eps, v2 and f values.
+
+            ! Constants for k.
+
+            tup0(1, itu1) = 0.20232e-01_realType
+            tup0(2, itu1) = 0.66720e-01_realType
+            tup0(3, itu1) = 0.15994e+00_realType
+            tup0(4, itu1) = 0.32733e+00_realType
+            tup0(5, itu1) = 0.60629e+00_realType
+            tup0(6, itu1) = 0.10415e+01_realType
+            tup0(7, itu1) = 0.16715e+01_realType
+            tup0(8, itu1) = 0.24989e+01_realType
+            tup0(9, itu1) = 0.34343e+01_realType
+            tup0(10, itu1) = 0.42683e+01_realType
+            tup0(11, itu1) = 0.48591e+01_realType
+            tup0(12, itu1) = 0.51750e+01_realType
+            tup0(13, itu1) = 0.52611e+01_realType
+            tup0(14, itu1) = 0.51878e+01_realType
+            tup0(15, itu1) = 0.50187e+01_realType
+            tup0(16, itu1) = 0.48000e+01_realType
+            tup0(17, itu1) = 0.45620e+01_realType
+            tup0(18, itu1) = 0.43226e+01_realType
+            tup0(19, itu1) = 0.40922e+01_realType
+            tup0(20, itu1) = 0.38760e+01_realType
+            tup0(21, itu1) = 0.36764e+01_realType
+            tup0(22, itu1) = 0.34942e+01_realType
+            tup0(23, itu1) = 0.33290e+01_realType
+            tup0(24, itu1) = 0.31798e+01_realType
+            tup0(25, itu1) = 0.30456e+01_realType
+            tup0(26, itu1) = 0.29248e+01_realType
+            tup0(27, itu1) = 0.28163e+01_realType
+            tup0(28, itu1) = 0.27186e+01_realType
+            tup0(29, itu1) = 0.26304e+01_realType
+            tup0(30, itu1) = 0.25505e+01_realType
+            tup0(31, itu1) = 0.24775e+01_realType
+            tup0(32, itu1) = 0.24103e+01_realType
+            tup0(33, itu1) = 0.23475e+01_realType
+            tup0(34, itu1) = 0.22880e+01_realType
+
+            tup1(1, itu1) = 0.11194e+00_realType
+            tup1(2, itu1) = 0.18748e+00_realType
+            tup1(3, itu1) = 0.27407e+00_realType
+            tup1(4, itu1) = 0.36966e+00_realType
+            tup1(5, itu1) = 0.46803e+00_realType
+            tup1(6, itu1) = 0.55499e+00_realType
+            tup1(7, itu1) = 0.60642e+00_realType
+            tup1(8, itu1) = 0.58843e+00_realType
+            tup1(9, itu1) = 0.47590e+00_realType
+            tup1(10, itu1) = 0.31013e+00_realType
+            tup1(11, itu1) = 0.16039e+00_realType
+            tup1(12, itu1) = 0.58026e-01_realType
+            tup1(13, itu1) = 0.15163e-02_realType
+            tup1(14, itu1) = -0.23590e-01_realType
+            tup1(15, itu1) = -0.31162e-01_realType
+            tup1(16, itu1) = -0.30422e-01_realType
+            tup1(17, itu1) = -0.26463e-01_realType
+            tup1(18, itu1) = -0.21749e-01_realType
+            tup1(19, itu1) = -0.17331e-01_realType
+            tup1(20, itu1) = -0.13569e-01_realType
+            tup1(21, itu1) = -0.10516e-01_realType
+            tup1(22, itu1) = -0.81061e-02_realType
+            tup1(23, itu1) = -0.62327e-02_realType
+            tup1(24, itu1) = -0.47901e-02_realType
+            tup1(25, itu1) = -0.36855e-02_realType
+            tup1(26, itu1) = -0.28427e-02_realType
+            tup1(27, itu1) = -0.22005e-02_realType
+            tup1(28, itu1) = -0.17118e-02_realType
+            tup1(29, itu1) = -0.13404e-02_realType
+            tup1(30, itu1) = -0.10584e-02_realType
+            tup1(31, itu1) = -0.84476e-03_realType
+            tup1(32, itu1) = -0.68362e-03_realType
+            tup1(33, itu1) = -0.56284e-03_realType
+            tup1(34, itu1) = -0.47348e-03_realType
+
+            tup2(1, itu1) = 0.46642e-01_realType
+            tup2(2, itu1) = 0.46777e-01_realType
+            tup2(3, itu1) = 0.47060e-01_realType
+            tup2(4, itu1) = 0.48495e-01_realType
+            tup2(5, itu1) = 0.50649e-01_realType
+            tup2(6, itu1) = 0.49396e-01_realType
+            tup2(7, itu1) = 0.43509e-01_realType
+            tup2(8, itu1) = 0.22224e-01_realType
+            tup2(9, itu1) = -0.22230e-01_realType
+            tup2(10, itu1) = -0.32317e-01_realType
+            tup2(11, itu1) = -0.23975e-01_realType
+            tup2(12, itu1) = -0.13064e-01_realType
+            tup2(13, itu1) = -0.57706e-02_realType
+            tup2(14, itu1) = -0.20813e-02_realType
+            tup2(15, itu1) = -0.52362e-03_realType
+            tup2(16, itu1) = 0.28081e-04_realType
+            tup2(17, itu1) = 0.17226e-03_realType
+            tup2(18, itu1) = 0.17433e-03_realType
+            tup2(19, itu1) = 0.13808e-03_realType
+            tup2(20, itu1) = 0.99662e-04_realType
+            tup2(21, itu1) = 0.68603e-04_realType
+            tup2(22, itu1) = 0.46166e-04_realType
+            tup2(23, itu1) = 0.30634e-04_realType
+            tup2(24, itu1) = 0.20237e-04_realType
+            tup2(25, itu1) = 0.13310e-04_realType
+            tup2(26, itu1) = 0.87532e-05_realType
+            tup2(27, itu1) = 0.57793e-05_realType
+            tup2(28, itu1) = 0.38107e-05_realType
+            tup2(29, itu1) = 0.25226e-05_realType
+            tup2(30, itu1) = 0.16733e-05_realType
+            tup2(31, itu1) = 0.11109e-05_realType
+            tup2(32, itu1) = 0.73765e-06_realType
+            tup2(33, itu1) = 0.49068e-06_realType
+            tup2(34, itu1) = 0.32170e-06_realType
+
+            tup3(1, itu1) = 0.14048e+00_realType
+            tup3(2, itu1) = 0.90412e-01_realType
+            tup3(3, itu1) = 0.53426e-01_realType
+            tup3(4, itu1) = 0.24095e-01_realType
+            tup3(5, itu1) = 0.34292e-03_realType
+            tup3(6, itu1) = -0.15803e-01_realType
+            tup3(7, itu1) = -0.25093e-01_realType
+            tup3(8, itu1) = -0.22520e-01_realType
+            tup3(9, itu1) = -0.58532e-02_realType
+            tup3(10, itu1) = 0.73777e-03_realType
+            tup3(11, itu1) = 0.16152e-02_realType
+            tup3(12, itu1) = 0.98832e-03_realType
+            tup3(13, itu1) = 0.44037e-03_realType
+            tup3(14, itu1) = 0.16675e-03_realType
+            tup3(15, itu1) = 0.56569e-04_realType
+            tup3(16, itu1) = 0.17339e-04_realType
+            tup3(17, itu1) = 0.45579e-05_realType
+            tup3(18, itu1) = 0.76381e-06_realType
+            tup3(19, itu1) = -0.18121e-06_realType
+            tup3(20, itu1) = -0.31622e-06_realType
+            tup3(21, itu1) = -0.24874e-06_realType
+            tup3(22, itu1) = -0.16604e-06_realType
+            tup3(23, itu1) = -0.10191e-06_realType
+            tup3(24, itu1) = -0.61291e-07_realType
+            tup3(25, itu1) = -0.35829e-07_realType
+            tup3(26, itu1) = -0.20759e-07_realType
+            tup3(27, itu1) = -0.12251e-07_realType
+            tup3(28, itu1) = -0.71234e-08_realType
+            tup3(29, itu1) = -0.42082e-08_realType
+            tup3(30, itu1) = -0.25154e-08_realType
+            tup3(31, itu1) = -0.15209e-08_realType
+            tup3(32, itu1) = -0.93515e-09_realType
+            tup3(33, itu1) = -0.60259e-09_realType
+            tup3(34, itu1) = -0.38327e-09_realType
+
+            ! Constants for epsilon.
+
+            tup0(1, itu2) = 0.24164e+00_realType
+            tup0(2, itu2) = 0.22008e+00_realType
+            tup0(3, itu2) = 0.19751e+00_realType
+            tup0(4, itu2) = 0.17571e+00_realType
+            tup0(5, itu2) = 0.15723e+00_realType
+            tup0(6, itu2) = 0.14536e+00_realType
+            tup0(7, itu2) = 0.14328e+00_realType
+            tup0(8, itu2) = 0.15220e+00_realType
+            tup0(9, itu2) = 0.16784e+00_realType
+            tup0(10, itu2) = 0.17712e+00_realType
+            tup0(11, itu2) = 0.17387e+00_realType
+            tup0(12, itu2) = 0.15954e+00_realType
+            tup0(13, itu2) = 0.13909e+00_realType
+            tup0(14, itu2) = 0.11713e+00_realType
+            tup0(15, itu2) = 0.96528e-01_realType
+            tup0(16, itu2) = 0.78571e-01_realType
+            tup0(17, itu2) = 0.63547e-01_realType
+            tup0(18, itu2) = 0.51265e-01_realType
+            tup0(19, itu2) = 0.41354e-01_realType
+            tup0(20, itu2) = 0.33411e-01_realType
+            tup0(21, itu2) = 0.27064e-01_realType
+            tup0(22, itu2) = 0.21997e-01_realType
+            tup0(23, itu2) = 0.17947e-01_realType
+            tup0(24, itu2) = 0.14703e-01_realType
+            tup0(25, itu2) = 0.12100e-01_realType
+            tup0(26, itu2) = 0.10003e-01_realType
+            tup0(27, itu2) = 0.83083e-02_realType
+            tup0(28, itu2) = 0.69337e-02_realType
+            tup0(29, itu2) = 0.58144e-02_realType
+            tup0(30, itu2) = 0.48991e-02_realType
+            tup0(31, itu2) = 0.41476e-02_realType
+            tup0(32, itu2) = 0.35278e-02_realType
+            tup0(33, itu2) = 0.30142e-02_realType
+            tup0(34, itu2) = 0.25866e-02_realType
+
+            tup1(1, itu2) = -0.70667e-01_realType
+            tup1(2, itu2) = -0.59210e-01_realType
+            tup1(3, itu2) = -0.46663e-01_realType
+            tup1(4, itu2) = -0.33366e-01_realType
+            tup1(5, itu2) = -0.19886e-01_realType
+            tup1(6, itu2) = -0.72638e-02_realType
+            tup1(7, itu2) = 0.28452e-02_realType
+            tup1(8, itu2) = 0.81949e-02_realType
+            tup1(9, itu2) = 0.67022e-02_realType
+            tup1(10, itu2) = 0.13143e-02_realType
+            tup1(11, itu2) = -0.31089e-02_realType
+            tup1(12, itu2) = -0.50215e-02_realType
+            tup1(13, itu2) = -0.50170e-02_realType
+            tup1(14, itu2) = -0.41408e-02_realType
+            tup1(15, itu2) = -0.30979e-02_realType
+            tup1(16, itu2) = -0.21970e-02_realType
+            tup1(17, itu2) = -0.15136e-02_realType
+            tup1(18, itu2) = -0.10274e-02_realType
+            tup1(19, itu2) = -0.69277e-03_realType
+            tup1(20, itu2) = -0.46639e-03_realType
+            tup1(21, itu2) = -0.31444e-03_realType
+            tup1(22, itu2) = -0.21272e-03_realType
+            tup1(23, itu2) = -0.14458e-03_realType
+            tup1(24, itu2) = -0.98812e-04_realType
+            tup1(25, itu2) = -0.67941e-04_realType
+            tup1(26, itu2) = -0.47013e-04_realType
+            tup1(27, itu2) = -0.32748e-04_realType
+            tup1(28, itu2) = -0.22968e-04_realType
+            tup1(29, itu2) = -0.16220e-04_realType
+            tup1(30, itu2) = -0.11536e-04_realType
+            tup1(31, itu2) = -0.82628e-05_realType
+            tup1(32, itu2) = -0.59619e-05_realType
+            tup1(33, itu2) = -0.43340e-05_realType
+            tup1(34, itu2) = -0.31753e-05_realType
+
+            tup2(1, itu2) = 0.82009e-02_realType
+            tup2(2, itu2) = 0.78044e-02_realType
+            tup2(3, itu2) = 0.72279e-02_realType
+            tup2(4, itu2) = 0.66236e-02_realType
+            tup2(5, itu2) = 0.61231e-02_realType
+            tup2(6, itu2) = 0.54757e-02_realType
+            tup2(7, itu2) = 0.46058e-02_realType
+            tup2(8, itu2) = 0.31013e-02_realType
+            tup2(9, itu2) = -0.57395e-03_realType
+            tup2(10, itu2) = -0.13230e-02_realType
+            tup2(11, itu2) = -0.82054e-03_realType
+            tup2(12, itu2) = -0.27325e-03_realType
+            tup2(13, itu2) = -0.29977e-05_realType
+            tup2(14, itu2) = 0.73241e-04_realType
+            tup2(15, itu2) = 0.70951e-04_realType
+            tup2(16, itu2) = 0.50202e-04_realType
+            tup2(17, itu2) = 0.31490e-04_realType
+            tup2(18, itu2) = 0.18675e-04_realType
+            tup2(19, itu2) = 0.10781e-04_realType
+            tup2(20, itu2) = 0.61493e-05_realType
+            tup2(21, itu2) = 0.34945e-05_realType
+            tup2(22, itu2) = 0.19882e-05_realType
+            tup2(23, itu2) = 0.11363e-05_realType
+            tup2(24, itu2) = 0.65315e-06_realType
+            tup2(25, itu2) = 0.37828e-06_realType
+            tup2(26, itu2) = 0.22089e-06_realType
+            tup2(27, itu2) = 0.13005e-06_realType
+            tup2(28, itu2) = 0.77269e-07_realType
+            tup2(29, itu2) = 0.46321e-07_realType
+            tup2(30, itu2) = 0.28022e-07_realType
+            tup2(31, itu2) = 0.17109e-07_realType
+            tup2(32, itu2) = 0.10536e-07_realType
+            tup2(33, itu2) = 0.65500e-08_realType
+            tup2(34, itu2) = 0.41065e-08_realType
+
+            tup3(1, itu2) = 0.19008e-01_realType
+            tup3(2, itu2) = 0.11466e-01_realType
+            tup3(3, itu2) = 0.65783e-02_realType
+            tup3(4, itu2) = 0.33228e-02_realType
+            tup3(5, itu2) = 0.10124e-02_realType
+            tup3(6, itu2) = -0.46456e-03_realType
+            tup3(7, itu2) = -0.12995e-02_realType
+            tup3(8, itu2) = -0.14253e-02_realType
+            tup3(9, itu2) = -0.23840e-03_realType
+            tup3(10, itu2) = 0.11860e-03_realType
+            tup3(11, itu2) = 0.10989e-03_realType
+            tup3(12, itu2) = 0.47896e-04_realType
+            tup3(13, itu2) = 0.13977e-04_realType
+            tup3(14, itu2) = 0.22846e-05_realType
+            tup3(15, itu2) = -0.47051e-06_realType
+            tup3(16, itu2) = -0.69412e-06_realType
+            tup3(17, itu2) = -0.45961e-06_realType
+            tup3(18, itu2) = -0.25229e-06_realType
+            tup3(19, itu2) = -0.12854e-06_realType
+            tup3(20, itu2) = -0.63372e-07_realType
+            tup3(21, itu2) = -0.30806e-07_realType
+            tup3(22, itu2) = -0.14923e-07_realType
+            tup3(23, itu2) = -0.72529e-08_realType
+            tup3(24, itu2) = -0.35415e-08_realType
+            tup3(25, itu2) = -0.17443e-08_realType
+            tup3(26, itu2) = -0.86787e-09_realType
+            tup3(27, itu2) = -0.43581e-09_realType
+            tup3(28, itu2) = -0.22141e-09_realType
+            tup3(29, itu2) = -0.11372e-09_realType
+            tup3(30, itu2) = -0.59087e-10_realType
+            tup3(31, itu2) = -0.31082e-10_realType
+            tup3(32, itu2) = -0.16513e-10_realType
+            tup3(33, itu2) = -0.88934e-11_realType
+            tup3(34, itu2) = -0.48436e-11_realType
+
+            ! Constants for v2.
+
+            tup0(1, itu3) = 0.86118e-05_realType
+            tup0(2, itu3) = 0.76561e-04_realType
+            tup0(3, itu3) = 0.38909e-03_realType
+            tup0(4, itu3) = 0.14605e-02_realType
+            tup0(5, itu3) = 0.44732e-02_realType
+            tup0(6, itu3) = 0.11704e-01_realType
+            tup0(7, itu3) = 0.26786e-01_realType
+            tup0(8, itu3) = 0.54333e-01_realType
+            tup0(9, itu3) = 0.97821e-01_realType
+            tup0(10, itu3) = 0.15774e+00_realType
+            tup0(11, itu3) = 0.23273e+00_realType
+            tup0(12, itu3) = 0.32009e+00_realType
+            tup0(13, itu3) = 0.41667e+00_realType
+            tup0(14, itu3) = 0.51933e+00_realType
+            tup0(15, itu3) = 0.62515e+00_realType
+            tup0(16, itu3) = 0.73157e+00_realType
+            tup0(17, itu3) = 0.83660e+00_realType
+            tup0(18, itu3) = 0.93915e+00_realType
+            tup0(19, itu3) = 0.10385e+01_realType
+            tup0(20, itu3) = 0.11342e+01_realType
+            tup0(21, itu3) = 0.12256e+01_realType
+            tup0(22, itu3) = 0.13125e+01_realType
+            tup0(23, itu3) = 0.13944e+01_realType
+            tup0(24, itu3) = 0.14708e+01_realType
+            tup0(25, itu3) = 0.15412e+01_realType
+            tup0(26, itu3) = 0.16053e+01_realType
+            tup0(27, itu3) = 0.16624e+01_realType
+            tup0(28, itu3) = 0.17120e+01_realType
+            tup0(29, itu3) = 0.17534e+01_realType
+            tup0(30, itu3) = 0.17862e+01_realType
+            tup0(31, itu3) = 0.18095e+01_realType
+            tup0(32, itu3) = 0.18227e+01_realType
+            tup0(33, itu3) = 0.18250e+01_realType
+            tup0(34, itu3) = 0.18158e+01_realType
+
+            tup1(1, itu3) = 0.13148e-03_realType
+            tup1(2, itu3) = 0.51056e-03_realType
+            tup1(3, itu3) = 0.14555e-02_realType
+            tup1(4, itu3) = 0.33824e-02_realType
+            tup1(5, itu3) = 0.67133e-02_realType
+            tup1(6, itu3) = 0.11625e-01_realType
+            tup1(7, itu3) = 0.17737e-01_realType
+            tup1(8, itu3) = 0.23712e-01_realType
+            tup1(9, itu3) = 0.27813e-01_realType
+            tup1(10, itu3) = 0.29364e-01_realType
+            tup1(11, itu3) = 0.28719e-01_realType
+            tup1(12, itu3) = 0.26553e-01_realType
+            tup1(13, itu3) = 0.23565e-01_realType
+            tup1(14, itu3) = 0.20284e-01_realType
+            tup1(15, itu3) = 0.17054e-01_realType
+            tup1(16, itu3) = 0.14085e-01_realType
+            tup1(17, itu3) = 0.11506e-01_realType
+            tup1(18, itu3) = 0.93475e-02_realType
+            tup1(19, itu3) = 0.75674e-02_realType
+            tup1(20, itu3) = 0.61075e-02_realType
+            tup1(21, itu3) = 0.49132e-02_realType
+            tup1(22, itu3) = 0.39366e-02_realType
+            tup1(23, itu3) = 0.31369e-02_realType
+            tup1(24, itu3) = 0.24817e-02_realType
+            tup1(25, itu3) = 0.19440e-02_realType
+            tup1(26, itu3) = 0.15022e-02_realType
+            tup1(27, itu3) = 0.11385e-02_realType
+            tup1(28, itu3) = 0.83880e-03_realType
+            tup1(29, itu3) = 0.59169e-03_realType
+            tup1(30, itu3) = 0.38785e-03_realType
+            tup1(31, itu3) = 0.21987e-03_realType
+            tup1(32, itu3) = 0.81692e-04_realType
+            tup1(33, itu3) = -0.31589e-04_realType
+            tup1(34, itu3) = -0.12412e-03_realType
+
+            tup2(1, itu3) = -0.45876e-03_realType
+            tup2(2, itu3) = -0.56213e-03_realType
+            tup2(3, itu3) = -0.48447e-03_realType
+            tup2(4, itu3) = -0.12712e-03_realType
+            tup2(5, itu3) = 0.51606e-03_realType
+            tup2(6, itu3) = 0.12754e-02_realType
+            tup2(7, itu3) = 0.20365e-02_realType
+            tup2(8, itu3) = 0.19980e-02_realType
+            tup2(9, itu3) = 0.11629e-02_realType
+            tup2(10, itu3) = 0.48125e-03_realType
+            tup2(11, itu3) = 0.38876e-04_realType
+            tup2(12, itu3) = -0.17127e-03_realType
+            tup2(13, itu3) = -0.23448e-03_realType
+            tup2(14, itu3) = -0.22707e-03_realType
+            tup2(15, itu3) = -0.19347e-03_realType
+            tup2(16, itu3) = -0.15335e-03_realType
+            tup2(17, itu3) = -0.11100e-03_realType
+            tup2(18, itu3) = -0.78059e-04_realType
+            tup2(19, itu3) = -0.54121e-04_realType
+            tup2(20, itu3) = -0.37687e-04_realType
+            tup2(21, itu3) = -0.26073e-04_realType
+            tup2(22, itu3) = -0.18195e-04_realType
+            tup2(23, itu3) = -0.12730e-04_realType
+            tup2(24, itu3) = -0.89271e-05_realType
+            tup2(25, itu3) = -0.63091e-05_realType
+            tup2(26, itu3) = -0.44582e-05_realType
+            tup2(27, itu3) = -0.31881e-05_realType
+            tup2(28, itu3) = -0.22705e-05_realType
+            tup2(29, itu3) = -0.16359e-05_realType
+            tup2(30, itu3) = -0.11754e-05_realType
+            tup2(31, itu3) = -0.85145e-06_realType
+            tup2(32, itu3) = -0.61411e-06_realType
+            tup2(33, itu3) = -0.44454e-06_realType
+            tup2(34, itu3) = -0.32118e-06_realType
+
+            tup3(1, itu3) = 0.21178e-02_realType
+            tup3(2, itu3) = 0.26960e-02_realType
+            tup3(3, itu3) = 0.28709e-02_realType
+            tup3(4, itu3) = 0.25635e-02_realType
+            tup3(5, itu3) = 0.18560e-02_realType
+            tup3(6, itu3) = 0.98939e-03_realType
+            tup3(7, itu3) = 0.10046e-03_realType
+            tup3(8, itu3) = -0.30638e-03_realType
+            tup3(9, itu3) = -0.25468e-03_realType
+            tup3(10, itu3) = -0.15987e-03_realType
+            tup3(11, itu3) = -0.82673e-04_realType
+            tup3(12, itu3) = -0.38599e-04_realType
+            tup3(13, itu3) = -0.17054e-04_realType
+            tup3(14, itu3) = -0.70519e-05_realType
+            tup3(15, itu3) = -0.23971e-05_realType
+            tup3(16, itu3) = -0.31545e-06_realType
+            tup3(17, itu3) = 0.88126e-07_realType
+            tup3(18, itu3) = 0.13543e-06_realType
+            tup3(19, itu3) = 0.96125e-07_realType
+            tup3(20, itu3) = 0.71267e-07_realType
+            tup3(21, itu3) = 0.42315e-07_realType
+            tup3(22, itu3) = 0.27372e-07_realType
+            tup3(23, itu3) = 0.17327e-07_realType
+            tup3(24, itu3) = 0.10546e-07_realType
+            tup3(25, itu3) = 0.68294e-08_realType
+            tup3(26, itu3) = 0.40887e-08_realType
+            tup3(27, itu3) = 0.27989e-08_realType
+            tup3(28, itu3) = 0.17076e-08_realType
+            tup3(29, itu3) = 0.11817e-08_realType
+            tup3(30, itu3) = 0.76583e-09_realType
+            tup3(31, itu3) = 0.54352e-09_realType
+            tup3(32, itu3) = 0.36625e-09_realType
+            tup3(33, itu3) = 0.25793e-09_realType
+            tup3(34, itu3) = 0.18203e-09_realType
+
+            ! Constants for f.
+
+            tup0(1, itu4) = 0.36932e-03_realType
+            tup0(2, itu4) = 0.69498e-03_realType
+            tup0(3, itu4) = 0.11057e-02_realType
+            tup0(4, itu4) = 0.16193e-02_realType
+            tup0(5, itu4) = 0.22555e-02_realType
+            tup0(6, itu4) = 0.30335e-02_realType
+            tup0(7, itu4) = 0.39685e-02_realType
+            tup0(8, itu4) = 0.50643e-02_realType
+            tup0(9, itu4) = 0.63046e-02_realType
+            tup0(10, itu4) = 0.76520e-02_realType
+            tup0(11, itu4) = 0.90512e-02_realType
+            tup0(12, itu4) = 0.10438e-01_realType
+            tup0(13, itu4) = 0.11748e-01_realType
+            tup0(14, itu4) = 0.12920e-01_realType
+            tup0(15, itu4) = 0.13898e-01_realType
+            tup0(16, itu4) = 0.14631e-01_realType
+            tup0(17, itu4) = 0.15089e-01_realType
+            tup0(18, itu4) = 0.15297e-01_realType
+            tup0(19, itu4) = 0.15285e-01_realType
+            tup0(20, itu4) = 0.15080e-01_realType
+            tup0(21, itu4) = 0.14711e-01_realType
+            tup0(22, itu4) = 0.14207e-01_realType
+            tup0(23, itu4) = 0.13596e-01_realType
+            tup0(24, itu4) = 0.12904e-01_realType
+            tup0(25, itu4) = 0.12154e-01_realType
+            tup0(26, itu4) = 0.11369e-01_realType
+            tup0(27, itu4) = 0.10567e-01_realType
+            tup0(28, itu4) = 0.97653e-02_realType
+            tup0(29, itu4) = 0.89762e-02_realType
+            tup0(30, itu4) = 0.82104e-02_realType
+            tup0(31, itu4) = 0.74760e-02_realType
+            tup0(32, itu4) = 0.67787e-02_realType
+            tup0(33, itu4) = 0.61223e-02_realType
+            tup0(34, itu4) = 0.55092e-02_realType
+
+            tup1(1, itu4) = 0.10014e-02_realType
+            tup1(2, itu4) = 0.98811e-03_realType
+            tup1(3, itu4) = 0.97209e-03_realType
+            tup1(4, itu4) = 0.95225e-03_realType
+            tup1(5, itu4) = 0.92674e-03_realType
+            tup1(6, itu4) = 0.89247e-03_realType
+            tup1(7, itu4) = 0.84501e-03_realType
+            tup1(8, itu4) = 0.77980e-03_realType
+            tup1(9, itu4) = 0.69604e-03_realType
+            tup1(10, itu4) = 0.59785e-03_realType
+            tup1(11, itu4) = 0.49284e-03_realType
+            tup1(12, itu4) = 0.38934e-03_realType
+            tup1(13, itu4) = 0.29358e-03_realType
+            tup1(14, itu4) = 0.20915e-03_realType
+            tup1(15, itu4) = 0.13748e-03_realType
+            tup1(16, itu4) = 0.79309e-04_realType
+            tup1(17, itu4) = 0.36899e-04_realType
+            tup1(18, itu4) = 0.90827e-05_realType
+            tup1(19, itu4) = -0.84239e-05_realType
+            tup1(20, itu4) = -0.18715e-04_realType
+            tup1(21, itu4) = -0.24036e-04_realType
+            tup1(22, itu4) = -0.26019e-04_realType
+            tup1(23, itu4) = -0.25851e-04_realType
+            tup1(24, itu4) = -0.24377e-04_realType
+            tup1(25, itu4) = -0.22183e-04_realType
+            tup1(26, itu4) = -0.19673e-04_realType
+            tup1(27, itu4) = -0.17109e-04_realType
+            tup1(28, itu4) = -0.14652e-04_realType
+            tup1(29, itu4) = -0.12395e-04_realType
+            tup1(30, itu4) = -0.10383e-04_realType
+            tup1(31, itu4) = -0.86269e-05_realType
+            tup1(32, itu4) = -0.71206e-05_realType
+            tup1(33, itu4) = -0.58456e-05_realType
+            tup1(34, itu4) = -0.47776e-05_realType
+
+            tup2(1, itu4) = -0.73381e-05_realType
+            tup2(2, itu4) = -0.67793e-05_realType
+            tup2(3, itu4) = -0.58449e-05_realType
+            tup2(4, itu4) = -0.49347e-05_realType
+            tup2(5, itu4) = -0.41793e-05_realType
+            tup2(6, itu4) = -0.40232e-05_realType
+            tup2(7, itu4) = -0.54838e-05_realType
+            tup2(8, itu4) = -0.91450e-05_realType
+            tup2(9, itu4) = -0.12056e-04_realType
+            tup2(10, itu4) = -0.13441e-04_realType
+            tup2(11, itu4) = -0.12747e-04_realType
+            tup2(12, itu4) = -0.10761e-04_realType
+            tup2(13, itu4) = -0.84210e-05_realType
+            tup2(14, itu4) = -0.62599e-05_realType
+            tup2(15, itu4) = -0.45849e-05_realType
+            tup2(16, itu4) = -0.34480e-05_realType
+            tup2(17, itu4) = -0.19626e-05_realType
+            tup2(18, itu4) = -0.10949e-05_realType
+            tup2(19, itu4) = -0.59189e-06_realType
+            tup2(20, itu4) = -0.30260e-06_realType
+            tup2(21, itu4) = -0.14033e-06_realType
+            tup2(22, itu4) = -0.51286e-07_realType
+            tup2(23, itu4) = -0.65167e-08_realType
+            tup2(24, itu4) = 0.14883e-07_realType
+            tup2(25, itu4) = 0.22563e-07_realType
+            tup2(26, itu4) = 0.23657e-07_realType
+            tup2(27, itu4) = 0.21434e-07_realType
+            tup2(28, itu4) = 0.18140e-07_realType
+            tup2(29, itu4) = 0.14649e-07_realType
+            tup2(30, itu4) = 0.11482e-07_realType
+            tup2(31, itu4) = 0.88134e-08_realType
+            tup2(32, itu4) = 0.66562e-08_realType
+            tup2(33, itu4) = 0.49705e-08_realType
+            tup2(34, itu4) = 0.36802e-08_realType
+
+            tup3(1, itu4) = -0.26409e-04_realType
+            tup3(2, itu4) = -0.19723e-04_realType
+            tup3(3, itu4) = -0.15989e-04_realType
+            tup3(4, itu4) = -0.13800e-04_realType
+            tup3(5, itu4) = -0.12497e-04_realType
+            tup3(6, itu4) = -0.11352e-04_realType
+            tup3(7, itu4) = -0.94581e-05_realType
+            tup3(8, itu4) = -0.64516e-05_realType
+            tup3(9, itu4) = -0.38268e-05_realType
+            tup3(10, itu4) = -0.19060e-05_realType
+            tup3(11, itu4) = -0.82598e-06_realType
+            tup3(12, itu4) = -0.31494e-06_realType
+            tup3(13, itu4) = -0.96327e-07_realType
+            tup3(14, itu4) = -0.11829e-07_realType
+            tup3(15, itu4) = 0.30746e-07_realType
+            tup3(16, itu4) = 0.70108e-07_realType
+            tup3(17, itu4) = 0.37205e-07_realType
+            tup3(18, itu4) = 0.19880e-07_realType
+            tup3(19, itu4) = 0.10689e-07_realType
+            tup3(20, itu4) = 0.57169e-08_realType
+            tup3(21, itu4) = 0.30479e-08_realType
+            tup3(22, itu4) = 0.15779e-08_realType
+            tup3(23, itu4) = 0.82116e-09_realType
+            tup3(24, itu4) = 0.40692e-09_realType
+            tup3(25, itu4) = 0.19883e-09_realType
+            tup3(26, itu4) = 0.90676e-10_realType
+            tup3(27, itu4) = 0.39255e-10_realType
+            tup3(28, itu4) = 0.14131e-10_realType
+            tup3(29, itu4) = 0.32481e-11_realType
+            tup3(30, itu4) = -0.10665e-11_realType
+            tup3(31, itu4) = -0.24047e-11_realType
+            tup3(32, itu4) = -0.24412e-11_realType
+            tup3(33, itu4) = -0.20525e-11_realType
+            tup3(34, itu4) = -0.15829e-11_realType
+
+            ! Constants for nut.
+
+            tup0(1, itu5) = 0.23125e-04_realType
+            tup0(2, itu5) = 0.21542e-03_realType
+            tup0(3, itu5) = 0.11557e-02_realType
+            tup0(4, itu5) = 0.45993e-02_realType
+            tup0(5, itu5) = 0.14891e-01_realType
+            tup0(6, itu5) = 0.40523e-01_realType
+            tup0(7, itu5) = 0.93406e-01_realType
+            tup0(8, itu5) = 0.19626e+00_realType
+            tup0(9, itu5) = 0.44036e+00_realType
+            tup0(10, itu5) = 0.83627e+00_realType
+            tup0(11, itu5) = 0.14308e+01_realType
+            tup0(12, itu5) = 0.22842e+01_realType
+            tup0(13, itu5) = 0.34675e+01_realType
+            tup0(14, itu5) = 0.50606e+01_realType
+            tup0(15, itu5) = 0.71506e+01_realType
+            tup0(16, itu5) = 0.98324e+01_realType
+            tup0(17, itu5) = 0.13213e+02_realType
+            tup0(18, itu5) = 0.17422e+02_realType
+            tup0(19, itu5) = 0.22609e+02_realType
+            tup0(20, itu5) = 0.28946e+02_realType
+            tup0(21, itu5) = 0.36629e+02_realType
+            tup0(22, itu5) = 0.45870e+02_realType
+            tup0(23, itu5) = 0.56903e+02_realType
+            tup0(24, itu5) = 0.69976e+02_realType
+            tup0(25, itu5) = 0.85344e+02_realType
+            tup0(26, itu5) = 0.10326e+03_realType
+            tup0(27, itu5) = 0.12397e+03_realType
+            tup0(28, itu5) = 0.14767e+03_realType
+            tup0(29, itu5) = 0.17452e+03_realType
+            tup0(30, itu5) = 0.20457e+03_realType
+            tup0(31, itu5) = 0.23779e+03_realType
+            tup0(32, itu5) = 0.27396e+03_realType
+            tup0(33, itu5) = 0.31270e+03_realType
+            tup0(34, itu5) = 0.35337e+03_realType
+
+            tup1(1, itu5) = 0.37001e-03_realType
+            tup1(2, itu5) = 0.15197e-02_realType
+            tup1(3, itu5) = 0.46103e-02_realType
+            tup1(4, itu5) = 0.11376e-01_realType
+            tup1(5, itu5) = 0.23543e-01_realType
+            tup1(6, itu5) = 0.40906e-01_realType
+            tup1(7, itu5) = 0.64800e-01_realType
+            tup1(8, itu5) = 0.11582e+00_realType
+            tup1(9, itu5) = 0.17214e+00_realType
+            tup1(10, itu5) = 0.21559e+00_realType
+            tup1(11, itu5) = 0.25612e+00_realType
+            tup1(12, itu5) = 0.29399e+00_realType
+            tup1(13, itu5) = 0.32838e+00_realType
+            tup1(14, itu5) = 0.35836e+00_realType
+            tup1(15, itu5) = 0.38343e+00_realType
+            tup1(16, itu5) = 0.40383e+00_realType
+            tup1(17, itu5) = 0.42066e+00_realType
+            tup1(18, itu5) = 0.43496e+00_realType
+            tup1(19, itu5) = 0.44719e+00_realType
+            tup1(20, itu5) = 0.45758e+00_realType
+            tup1(21, itu5) = 0.46621e+00_realType
+            tup1(22, itu5) = 0.47302e+00_realType
+            tup1(23, itu5) = 0.47789e+00_realType
+            tup1(24, itu5) = 0.48067e+00_realType
+            tup1(25, itu5) = 0.48112e+00_realType
+            tup1(26, itu5) = 0.47894e+00_realType
+            tup1(27, itu5) = 0.47384e+00_realType
+            tup1(28, itu5) = 0.46551e+00_realType
+            tup1(29, itu5) = 0.45364e+00_realType
+            tup1(30, itu5) = 0.43792e+00_realType
+            tup1(31, itu5) = 0.41811e+00_realType
+            tup1(32, itu5) = 0.39406e+00_realType
+            tup1(33, itu5) = 0.36565e+00_realType
+            tup1(34, itu5) = 0.33291e+00_realType
+
+            tup2(1, itu5) = -0.15144e-02_realType
+            tup2(2, itu5) = -0.21673e-02_realType
+            tup2(3, itu5) = -0.22502e-02_realType
+            tup2(4, itu5) = -0.80447e-03_realType
+            tup2(5, itu5) = 0.27762e-02_realType
+            tup2(6, itu5) = 0.17677e-02_realType
+            tup2(7, itu5) = -0.10701e-01_realType
+            tup2(8, itu5) = 0.22380e-01_realType
+            tup2(9, itu5) = 0.85050e-02_realType
+            tup2(10, itu5) = 0.62055e-02_realType
+            tup2(11, itu5) = 0.49141e-02_realType
+            tup2(12, itu5) = 0.39356e-02_realType
+            tup2(13, itu5) = 0.30516e-02_realType
+            tup2(14, itu5) = 0.22492e-02_realType
+            tup2(15, itu5) = 0.15552e-02_realType
+            tup2(16, itu5) = 0.10053e-02_realType
+            tup2(17, itu5) = 0.70099e-03_realType
+            tup2(18, itu5) = 0.50284e-03_realType
+            tup2(19, itu5) = 0.36969e-03_realType
+            tup2(20, itu5) = 0.27228e-03_realType
+            tup2(21, itu5) = 0.20061e-03_realType
+            tup2(22, itu5) = 0.14330e-03_realType
+            tup2(23, itu5) = 0.98897e-04_realType
+            tup2(24, itu5) = 0.62505e-04_realType
+            tup2(25, itu5) = 0.33283e-04_realType
+            tup2(26, itu5) = 0.68299e-05_realType
+            tup2(27, itu5) = -0.14485e-04_realType
+            tup2(28, itu5) = -0.32453e-04_realType
+            tup2(29, itu5) = -0.47992e-04_realType
+            tup2(30, itu5) = -0.61021e-04_realType
+            tup2(31, itu5) = -0.71877e-04_realType
+            tup2(32, itu5) = -0.79636e-04_realType
+            tup2(33, itu5) = -0.86125e-04_realType
+            tup2(34, itu5) = -0.90063e-04_realType
+
+            tup3(1, itu5) = 0.66740e-02_realType
+            tup3(2, itu5) = 0.93418e-02_realType
+            tup3(3, itu5) = 0.10767e-01_realType
+            tup3(4, itu5) = 0.97000e-02_realType
+            tup3(5, itu5) = 0.58162e-02_realType
+            tup3(6, itu5) = 0.58753e-02_realType
+            tup3(7, itu5) = 0.14886e-01_realType
+            tup3(8, itu5) = -0.21765e-02_realType
+            tup3(9, itu5) = 0.66623e-03_realType
+            tup3(10, itu5) = 0.46817e-03_realType
+            tup3(11, itu5) = 0.24852e-03_realType
+            tup3(12, itu5) = 0.10079e-03_realType
+            tup3(13, itu5) = 0.25392e-04_realType
+            tup3(14, itu5) = -0.29448e-05_realType
+            tup3(15, itu5) = -0.56548e-05_realType
+            tup3(16, itu5) = 0.17092e-05_realType
+            tup3(17, itu5) = 0.17225e-05_realType
+            tup3(18, itu5) = 0.97328e-06_realType
+            tup3(19, itu5) = 0.61893e-07_realType
+            tup3(20, itu5) = -0.52008e-06_realType
+            tup3(21, itu5) = -0.93452e-06_realType
+            tup3(22, itu5) = -0.10961e-05_realType
+            tup3(23, itu5) = -0.11719e-05_realType
+            tup3(24, itu5) = -0.11584e-05_realType
+            tup3(25, itu5) = -0.11184e-05_realType
+            tup3(26, itu5) = -0.10094e-05_realType
+            tup3(27, itu5) = -0.90341e-06_realType
+            tup3(28, itu5) = -0.79513e-06_realType
+            tup3(29, itu5) = -0.68439e-06_realType
+            tup3(30, itu5) = -0.57895e-06_realType
+            tup3(31, itu5) = -0.47935e-06_realType
+            tup3(32, itu5) = -0.39672e-06_realType
+            tup3(33, itu5) = -0.31868e-06_realType
+            tup3(34, itu5) = -0.25374e-06_realType
+
+        end select
+
+    end subroutine initCurveFitDataVf
+
+    subroutine terminate(routineName, errorMessage)
+        !
+        !       terminate writes an error message to standard output and
+        !       terminates the execution of the program.
+        !
+        use constants
+        use communication, only: adflow_comm_world, myid
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        character(len=*), intent(in) :: routineName
+        character(len=*), intent(in) :: errorMessage
 #ifndef USE_TAPENADE
 
-    !
-    !      Local parameter
-    !
-    integer, parameter :: maxCharLine = 55
-    !
-    !      Local variables
-    !
-    integer :: ierr, len, i2
-    logical :: firstTime
+        !
+        !      Local parameter
+        !
+        integer, parameter :: maxCharLine = 55
+        !
+        !      Local variables
+        !
+        integer :: ierr, len, i2
+        logical :: firstTime
 
-    character(len=len_trim(errorMessage)) :: message
-    character(len=8) :: integerString
+        character(len=len_trim(errorMessage)) :: message
+        character(len=8) :: integerString
 
-    !
-    ! Copy the errorMessage into message. It is not possible to work
-    ! with errorMessage directly, because it is modified in this
-    ! routine. Sometimes a constant string is passed to this routine
-    ! and some compilers simply fail then.
+        !
+        ! Copy the errorMessage into message. It is not possible to work
+        ! with errorMessage directly, because it is modified in this
+        ! routine. Sometimes a constant string is passed to this routine
+        ! and some compilers simply fail then.
 
-    message = errorMessage
+        message = errorMessage
 
-    ! Print a nice error message. In case of a parallel executable
-    ! also the processor id is printed.
+        ! Print a nice error message. In case of a parallel executable
+        ! also the processor id is printed.
 
-    print "(a)", "#"
-    print "(a)", "#--------------------------- !!! Error !!! &
-         &----------------------------"
+        print "(a)", "#"
+        print "(a)", "#--------------------------- !!! Error !!! &
+             &----------------------------"
 
-    write(integerString,"(i8)") myID
-    integerString = adjustl(integerString)
+        write (integerString, "(i8)") myID
+        integerString = adjustl(integerString)
 
-    print "(2a)", "#* Terminate called by processor ", &
-         trim(integerString)
+        print "(2a)", "#* Terminate called by processor ", &
+            trim(integerString)
 
-    ! Write the header of the error message.
+        ! Write the header of the error message.
 
-    print "(2a)", "#* Run-time error in procedure ", &
-         trim(routineName)
+        print "(2a)", "#* Run-time error in procedure ", &
+            trim(routineName)
 
-    ! Loop to write the error message. If the message is too long it
-    ! is split over several lines.
+        ! Loop to write the error message. If the message is too long it
+        ! is split over several lines.
 
-    firstTime = .true.
-    do
-       ! Determine the remaining error message to be written.
-       ! If longer than the maximum number of characters allowed
-       ! on a line, it is attempted to split the message.
+        firstTime = .true.
+        do
+            ! Determine the remaining error message to be written.
+            ! If longer than the maximum number of characters allowed
+            ! on a line, it is attempted to split the message.
 
-       message = adjustl(message)
-       len = len_trim(message)
-       i2  = min(maxCharLine,len)
+            message = adjustl(message)
+            len = len_trim(message)
+            i2 = min(maxCharLine, len)
 
-       if(i2 < len) i2 = index(message(:i2), " ", .true.) - 1
-       if(i2 < 0)   i2 = index(message, " ") - 1
-       if(i2 < 0)   i2 = len
+            if (i2 < len) i2 = index(message(:i2), " ", .true.) - 1
+            if (i2 < 0) i2 = index(message, " ") - 1
+            if (i2 < 0) i2 = len
 
-       ! Write this part of the error message. If it is the first
-       ! line of the message some additional stuff is printed.
+            ! Write this part of the error message. If it is the first
+            ! line of the message some additional stuff is printed.
 
-       if( firstTime ) then
-          print "(2a)", "#* Error message: ", &
-               trim(message(:i2))
-          firstTime = .false.
-       else
-          print "(2a)", "#*                ", &
-               trim(message(:i2))
-       endif
+            if (firstTime) then
+                print "(2a)", "#* Error message: ", &
+                    trim(message(:i2))
+                firstTime = .false.
+            else
+                print "(2a)", "#*                ", &
+                    trim(message(:i2))
+            end if
 
-       ! Exit the loop if the entire message has been written.
+            ! Exit the loop if the entire message has been written.
 
-       if(i2 == len) exit
+            if (i2 == len) exit
 
-       ! Adapt the string for the next part to be written.
+            ! Adapt the string for the next part to be written.
 
-       message = message(i2+1:)
+            message = message(i2 + 1:)
 
-    enddo
+        end do
 
-    ! Write the trailing message.
+        ! Write the trailing message.
 
-    print "(a)", "#*"
-    print "(a)", "#* Now exiting"
-    print "(a)", "#------------------------------------------&
-         &----------------------------"
-    print "(a)", "#"
+        print "(a)", "#*"
+        print "(a)", "#* Now exiting"
+        print "(a)", "#------------------------------------------&
+             &----------------------------"
+        print "(a)", "#"
 
-    ! Call abort and stop the program. This stop should be done in
-    ! abort, but just to be sure.
+        ! Call abort and stop the program. This stop should be done in
+        ! abort, but just to be sure.
 
-    call mpi_abort(ADflow_comm_world, 1, ierr)
-    stop
+        call mpi_abort(ADflow_comm_world, 1, ierr)
+        stop
 
 #endif
 
-  end subroutine terminate
+    end subroutine terminate
 
 end module turbCurveFits
diff --git a/src/turbulence/turbMod.F90 b/src/turbulence/turbMod.F90
index b2dd90e64..31ef814f5 100644
--- a/src/turbulence/turbMod.F90
+++ b/src/turbulence/turbMod.F90
@@ -1,55 +1,55 @@
 module turbMod
-  !
-  !       This local module contains variables used when the turbulence
-  !       equations are solved.
-  !
-  use precision
-  implicit none
-  save
+    !
+    !       This local module contains variables used when the turbulence
+    !       equations are solved.
+    !
+    use precision
+    implicit none
+    save
 
-  ! secondOrd:  whether or not a second order discretization for
-  !             the advective terms must be used.
-  ! sig1, sig2: Sigma coefficients in the diffusion terms of the
-  !             different turbulence models.
+    ! secondOrd:  whether or not a second order discretization for
+    !             the advective terms must be used.
+    ! sig1, sig2: Sigma coefficients in the diffusion terms of the
+    !             different turbulence models.
 
-  logical :: secondOrd
-  real(kind=realType) :: sig1, sig2
+    logical :: secondOrd
+    real(kind=realType) :: sig1, sig2
 
-  ! dvt:     Pointer, which points to an unused part of dw. It is
-  !          used for temporary storage of residual.
-  ! vort:    Pointer, which points to an unused part of dw. It is
-  !          used for temporary storage of the magnitude of
-  !          vorticity squared.
-  ! prod:    Pointer, which points to an unused part of dw. It is
-  !          used for temporary storage of the unscaled production
-  !          term.
-  ! f1:      F1 blending function in the SST model.
-  ! kwCD:   Cross diffusion term in the k-omega type models.
-  ! ktCD:   Cross diffusion term in the k-tau model
-  ! sct:     Time scale in the v2-f model.
-  ! scl2:    Length scale in the v2-f model.
-  ! strain2: Square of the strain.
+    ! dvt:     Pointer, which points to an unused part of dw. It is
+    !          used for temporary storage of residual.
+    ! vort:    Pointer, which points to an unused part of dw. It is
+    !          used for temporary storage of the magnitude of
+    !          vorticity squared.
+    ! prod:    Pointer, which points to an unused part of dw. It is
+    !          used for temporary storage of the unscaled production
+    !          term.
+    ! f1:      F1 blending function in the SST model.
+    ! kwCD:   Cross diffusion term in the k-omega type models.
+    ! ktCD:   Cross diffusion term in the k-tau model
+    ! sct:     Time scale in the v2-f model.
+    ! scl2:    Length scale in the v2-f model.
+    ! strain2: Square of the strain.
 
-  real(kind=realType), dimension(:,:,:,:), pointer :: dvt
-  real(kind=realType), dimension(:,:,:),   pointer :: vort
-  real(kind=realType), dimension(:,:,:),   pointer :: prod
-  real(kind=realType), dimension(:,:,:),   pointer :: f1
-  real(kind=realType), dimension(:,:,:),   pointer :: kwCD
-  real(kind=realType), dimension(:,:,:),   pointer :: ktCD
-  real(kind=realType), dimension(:,:,:),   pointer :: sct
-  real(kind=realType), dimension(:,:,:),   pointer :: scl2
-  real(kind=realType), dimension(:,:,:),   pointer :: strain2
+    real(kind=realType), dimension(:, :, :, :), pointer :: dvt
+    real(kind=realType), dimension(:, :, :), pointer :: vort
+    real(kind=realType), dimension(:, :, :), pointer :: prod
+    real(kind=realType), dimension(:, :, :), pointer :: f1
+    real(kind=realType), dimension(:, :, :), pointer :: kwCD
+    real(kind=realType), dimension(:, :, :), pointer :: ktCD
+    real(kind=realType), dimension(:, :, :), pointer :: sct
+    real(kind=realType), dimension(:, :, :), pointer :: scl2
+    real(kind=realType), dimension(:, :, :), pointer :: strain2
 
 #ifndef USE_TAPENADE
-  real(kind=realType), dimension(:,:,:,:), pointer :: dvtd
-  real(kind=realType), dimension(:,:,:),   pointer :: vortd
-  real(kind=realType), dimension(:,:,:),   pointer :: prodd
-  real(kind=realType), dimension(:,:,:),   pointer :: f1d
-  real(kind=realType), dimension(:,:,:),   pointer :: kwCDd
-  real(kind=realType), dimension(:,:,:),   pointer :: ktCDd
-  real(kind=realType), dimension(:,:,:),   pointer :: sctd
-  real(kind=realType), dimension(:,:,:),   pointer :: scl2d
-  real(kind=realType), dimension(:,:,:),   pointer :: strain2d
+    real(kind=realType), dimension(:, :, :, :), pointer :: dvtd
+    real(kind=realType), dimension(:, :, :), pointer :: vortd
+    real(kind=realType), dimension(:, :, :), pointer :: prodd
+    real(kind=realType), dimension(:, :, :), pointer :: f1d
+    real(kind=realType), dimension(:, :, :), pointer :: kwCDd
+    real(kind=realType), dimension(:, :, :), pointer :: ktCDd
+    real(kind=realType), dimension(:, :, :), pointer :: sctd
+    real(kind=realType), dimension(:, :, :), pointer :: scl2d
+    real(kind=realType), dimension(:, :, :), pointer :: strain2d
 #endif
 end module turbMod
 
diff --git a/src/turbulence/turbUtils.F90 b/src/turbulence/turbUtils.F90
index 257d916e3..b8159a0b2 100644
--- a/src/turbulence/turbUtils.F90
+++ b/src/turbulence/turbUtils.F90
@@ -2,2148 +2,2143 @@ module turbUtils
 
 contains
 
-  subroutine prodKatoLaunder
-    !
-    !       prodKatoLaunder computes the turbulent production term using
-    !       the Kato-Launder formulation.
-    !
-    use constants
-    use blockPointers, only : nx, ny, nz, il, jl, kl, w, si, sj, sk, vol, sectionID, scratch
-    use flowVarRefState, only : timeRef
-    use section, only : sections
-    use turbMod, only : prod
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii
-
-    real(kind=realType) :: uux, uuy, uuz, vvx, vvy, vvz, wwx, wwy, wwz
-    real(kind=realType) :: qxx, qyy, qzz, qxy, qxz, qyz, sijsij
-    real(kind=realType) :: oxy, oxz, oyz, oijoij
-    real(kind=realType) :: fact, omegax, omegay, omegaz
-
-    ! Determine the non-dimensional wheel speed of this block.
-    ! The vorticity term, which appears in Kato-Launder is of course
-    ! not frame invariant. To approximate frame invariance the wheel
-    ! speed should be substracted from oxy, oxz and oyz, which results
-    ! in the vorticity in the rotating frame. However some people
-    ! claim that the absolute vorticity should be used to obtain the
-    ! best results. In that omega should be set to zero.
-
-    omegax = timeRef*sections(sectionID)%rotRate(1)
-    omegay = timeRef*sections(sectionID)%rotRate(2)
-    omegaz = timeRef*sections(sectionID)%rotRate(3)
-
-    ! Loop over the cell centers of the given block. It may be more
-    ! efficient to loop over the faces and to scatter the gradient,
-    ! but in that case the gradients for u, v and w must be stored.
-    ! In the current approach no extra memory is needed.
+    subroutine prodKatoLaunder
+        !
+        !       prodKatoLaunder computes the turbulent production term using
+        !       the Kato-Launder formulation.
+        !
+        use constants
+        use blockPointers, only: nx, ny, nz, il, jl, kl, w, si, sj, sk, vol, sectionID, scratch
+        use flowVarRefState, only: timeRef
+        use section, only: sections
+        use turbMod, only: prod
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii
+
+        real(kind=realType) :: uux, uuy, uuz, vvx, vvy, vvz, wwx, wwy, wwz
+        real(kind=realType) :: qxx, qyy, qzz, qxy, qxz, qyz, sijsij
+        real(kind=realType) :: oxy, oxz, oyz, oijoij
+        real(kind=realType) :: fact, omegax, omegay, omegaz
+
+        ! Determine the non-dimensional wheel speed of this block.
+        ! The vorticity term, which appears in Kato-Launder is of course
+        ! not frame invariant. To approximate frame invariance the wheel
+        ! speed should be substracted from oxy, oxz and oyz, which results
+        ! in the vorticity in the rotating frame. However some people
+        ! claim that the absolute vorticity should be used to obtain the
+        ! best results. In that omega should be set to zero.
+
+        omegax = timeRef * sections(sectionID)%rotRate(1)
+        omegay = timeRef * sections(sectionID)%rotRate(2)
+        omegaz = timeRef * sections(sectionID)%rotRate(3)
+
+        ! Loop over the cell centers of the given block. It may be more
+        ! efficient to loop over the faces and to scatter the gradient,
+        ! but in that case the gradients for u, v and w must be stored.
+        ! In the current approach no extra memory is needed.
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, nx * ny * nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
 
-                ! Compute the gradient of u in the cell center. Use is made
-                ! of the fact that the surrounding normals sum up to zero,
-                ! such that the cell i,j,k does not give a contribution.
-                ! The gradient is scaled by a factor 2*vol.
-
-                uux = w(i+1,j,k,ivx)*si(i,j,k,1) - w(i-1,j,k,ivx)*si(i-1,j,k,1) &
-                     + w(i,j+1,k,ivx)*sj(i,j,k,1) - w(i,j-1,k,ivx)*sj(i,j-1,k,1) &
-                     + w(i,j,k+1,ivx)*sk(i,j,k,1) - w(i,j,k-1,ivx)*sk(i,j,k-1,1)
-                uuy = w(i+1,j,k,ivx)*si(i,j,k,2) - w(i-1,j,k,ivx)*si(i-1,j,k,2) &
-                     + w(i,j+1,k,ivx)*sj(i,j,k,2) - w(i,j-1,k,ivx)*sj(i,j-1,k,2) &
-                     + w(i,j,k+1,ivx)*sk(i,j,k,2) - w(i,j,k-1,ivx)*sk(i,j,k-1,2)
-                uuz = w(i+1,j,k,ivx)*si(i,j,k,3) - w(i-1,j,k,ivx)*si(i-1,j,k,3) &
-                     + w(i,j+1,k,ivx)*sj(i,j,k,3) - w(i,j-1,k,ivx)*sj(i,j-1,k,3) &
-                     + w(i,j,k+1,ivx)*sk(i,j,k,3) - w(i,j,k-1,ivx)*sk(i,j,k-1,3)
-
-                ! Idem for the gradient of v.
-
-                vvx = w(i+1,j,k,ivy)*si(i,j,k,1) - w(i-1,j,k,ivy)*si(i-1,j,k,1) &
-                     + w(i,j+1,k,ivy)*sj(i,j,k,1) - w(i,j-1,k,ivy)*sj(i,j-1,k,1) &
-                     + w(i,j,k+1,ivy)*sk(i,j,k,1) - w(i,j,k-1,ivy)*sk(i,j,k-1,1)
-                vvy = w(i+1,j,k,ivy)*si(i,j,k,2) - w(i-1,j,k,ivy)*si(i-1,j,k,2) &
-                     + w(i,j+1,k,ivy)*sj(i,j,k,2) - w(i,j-1,k,ivy)*sj(i,j-1,k,2) &
-                     + w(i,j,k+1,ivy)*sk(i,j,k,2) - w(i,j,k-1,ivy)*sk(i,j,k-1,2)
-                vvz = w(i+1,j,k,ivy)*si(i,j,k,3) - w(i-1,j,k,ivy)*si(i-1,j,k,3) &
-                     + w(i,j+1,k,ivy)*sj(i,j,k,3) - w(i,j-1,k,ivy)*sj(i,j-1,k,3) &
-                     + w(i,j,k+1,ivy)*sk(i,j,k,3) - w(i,j,k-1,ivy)*sk(i,j,k-1,3)
-
-                ! And for the gradient of w.
-
-                wwx = w(i+1,j,k,ivz)*si(i,j,k,1) - w(i-1,j,k,ivz)*si(i-1,j,k,1) &
-                     + w(i,j+1,k,ivz)*sj(i,j,k,1) - w(i,j-1,k,ivz)*sj(i,j-1,k,1) &
-                     + w(i,j,k+1,ivz)*sk(i,j,k,1) - w(i,j,k-1,ivz)*sk(i,j,k-1,1)
-                wwy = w(i+1,j,k,ivz)*si(i,j,k,2) - w(i-1,j,k,ivz)*si(i-1,j,k,2) &
-                     + w(i,j+1,k,ivz)*sj(i,j,k,2) - w(i,j-1,k,ivz)*sj(i,j-1,k,2) &
-                     + w(i,j,k+1,ivz)*sk(i,j,k,2) - w(i,j,k-1,ivz)*sk(i,j,k-1,2)
-                wwz = w(i+1,j,k,ivz)*si(i,j,k,3) - w(i-1,j,k,ivz)*si(i-1,j,k,3) &
-                     + w(i,j+1,k,ivz)*sj(i,j,k,3) - w(i,j-1,k,ivz)*sj(i,j-1,k,3) &
-                     + w(i,j,k+1,ivz)*sk(i,j,k,3) - w(i,j,k-1,ivz)*sk(i,j,k-1,3)
-
-                ! Compute the strain and vorticity terms. The multiplication
-                ! is present to obtain the correct gradients. Note that
-                ! the wheel speed is substracted from the vorticity terms.
-
-                fact = half/vol(i,j,k)
-
-                qxx = fact*uux
-                qyy = fact*vvy
-                qzz = fact*wwz
-
-                qxy = fact*half*(uuy + vvx)
-                qxz = fact*half*(uuz + wwx)
-                qyz = fact*half*(vvz + wwy)
-
-                oxy = fact*half*(vvx - uuy) - omegaz
-                oxz = fact*half*(uuz - wwx) - omegay
-                oyz = fact*half*(wwy - vvz) - omegax
-
-                ! Compute the summation of the strain and vorticity tensors.
-
-                sijsij = two*(qxy**2 + qxz**2 + qyz**2) &
-                     +      qxx**2 + qyy**2 + qzz**2
-                oijoij = two*(oxy**2 + oxz**2 + oyz**2)
-
-                ! Compute the production term.
-
-                scratch(i,j,k, iprod) = two*sqrt(sijsij*oijoij)
+                        ! Compute the gradient of u in the cell center. Use is made
+                        ! of the fact that the surrounding normals sum up to zero,
+                        ! such that the cell i,j,k does not give a contribution.
+                        ! The gradient is scaled by a factor 2*vol.
+
+                        uux = w(i + 1, j, k, ivx) * si(i, j, k, 1) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivx) * sj(i, j, k, 1) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivx) * sk(i, j, k, 1) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 1)
+                        uuy = w(i + 1, j, k, ivx) * si(i, j, k, 2) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivx) * sj(i, j, k, 2) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivx) * sk(i, j, k, 2) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 2)
+                        uuz = w(i + 1, j, k, ivx) * si(i, j, k, 3) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivx) * sj(i, j, k, 3) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivx) * sk(i, j, k, 3) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 3)
+
+                        ! Idem for the gradient of v.
+
+                        vvx = w(i + 1, j, k, ivy) * si(i, j, k, 1) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivy) * sj(i, j, k, 1) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivy) * sk(i, j, k, 1) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 1)
+                        vvy = w(i + 1, j, k, ivy) * si(i, j, k, 2) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivy) * sj(i, j, k, 2) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivy) * sk(i, j, k, 2) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 2)
+                        vvz = w(i + 1, j, k, ivy) * si(i, j, k, 3) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivy) * sj(i, j, k, 3) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivy) * sk(i, j, k, 3) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 3)
+
+                        ! And for the gradient of w.
+
+                        wwx = w(i + 1, j, k, ivz) * si(i, j, k, 1) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivz) * sj(i, j, k, 1) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivz) * sk(i, j, k, 1) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 1)
+                        wwy = w(i + 1, j, k, ivz) * si(i, j, k, 2) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivz) * sj(i, j, k, 2) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivz) * sk(i, j, k, 2) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 2)
+                        wwz = w(i + 1, j, k, ivz) * si(i, j, k, 3) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivz) * sj(i, j, k, 3) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivz) * sk(i, j, k, 3) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 3)
+
+                        ! Compute the strain and vorticity terms. The multiplication
+                        ! is present to obtain the correct gradients. Note that
+                        ! the wheel speed is substracted from the vorticity terms.
+
+                        fact = half / vol(i, j, k)
+
+                        qxx = fact * uux
+                        qyy = fact * vvy
+                        qzz = fact * wwz
+
+                        qxy = fact * half * (uuy + vvx)
+                        qxz = fact * half * (uuz + wwx)
+                        qyz = fact * half * (vvz + wwy)
+
+                        oxy = fact * half * (vvx - uuy) - omegaz
+                        oxz = fact * half * (uuz - wwx) - omegay
+                        oyz = fact * half * (wwy - vvz) - omegax
+
+                        ! Compute the summation of the strain and vorticity tensors.
+
+                        sijsij = two * (qxy**2 + qxz**2 + qyz**2) &
+                                 + qxx**2 + qyy**2 + qzz**2
+                        oijoij = two * (oxy**2 + oxz**2 + oyz**2)
+
+                        ! Compute the production term.
+
+                        scratch(i, j, k, iprod) = two * sqrt(sijsij * oijoij)
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-  end subroutine prodKatoLaunder
-
-  subroutine prodSmag2
-    !
-    !       prodSmag2 computes the term:
-    !              2*sij*sij - 2/3 div(u)**2 with  sij=0.5*(duidxj+dujdxi)
-    !       which is used for the turbulence equations.
-    !       It is assumed that the pointer prod, stored in turbMod, is
-    !       already set to the correct entry.
-    !
-    use constants
-    use blockPointers, only : nx, ny, nz, il, jl, kl, w, si, sj, sk, vol, sectionID, scratch
-    implicit none
-    !
-    !      Local parameter
-    !
-    real(kind=realType), parameter :: f23 = two*third
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii
-    real(kind=realType)   :: uux, uuy, uuz, vvx, vvy, vvz, wwx, wwy, wwz
-    real(kind=realType)   :: div2, fact, sxx, syy, szz, sxy, sxz, syz
-
-    ! Loop over the cell centers of the given block. It may be more
-    ! efficient to loop over the faces and to scatter the gradient,
-    ! but in that case the gradients for u, v and w must be stored.
-    ! In the current approach no extra memory is needed.
+    end subroutine prodKatoLaunder
+
+    subroutine prodSmag2
+        !
+        !       prodSmag2 computes the term:
+        !              2*sij*sij - 2/3 div(u)**2 with  sij=0.5*(duidxj+dujdxi)
+        !       which is used for the turbulence equations.
+        !       It is assumed that the pointer prod, stored in turbMod, is
+        !       already set to the correct entry.
+        !
+        use constants
+        use blockPointers, only: nx, ny, nz, il, jl, kl, w, si, sj, sk, vol, sectionID, scratch
+        implicit none
+        !
+        !      Local parameter
+        !
+        real(kind=realType), parameter :: f23 = two * third
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii
+        real(kind=realType) :: uux, uuy, uuz, vvx, vvy, vvz, wwx, wwy, wwz
+        real(kind=realType) :: div2, fact, sxx, syy, szz, sxy, sxz, syz
+
+        ! Loop over the cell centers of the given block. It may be more
+        ! efficient to loop over the faces and to scatter the gradient,
+        ! but in that case the gradients for u, v and w must be stored.
+        ! In the current approach no extra memory is needed.
 
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, nx * ny * nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
 
-                ! Compute the gradient of u in the cell center. Use is made
-                ! of the fact that the surrounding normals sum up to zero,
-                ! such that the cell i,j,k does not give a contribution.
-                ! The gradient is scaled by the factor 2*vol.
-
-                uux = w(i+1,j,k,ivx)*si(i,j,k,1) - w(i-1,j,k,ivx)*si(i-1,j,k,1) &
-                     + w(i,j+1,k,ivx)*sj(i,j,k,1) - w(i,j-1,k,ivx)*sj(i,j-1,k,1) &
-                     + w(i,j,k+1,ivx)*sk(i,j,k,1) - w(i,j,k-1,ivx)*sk(i,j,k-1,1)
-                uuy = w(i+1,j,k,ivx)*si(i,j,k,2) - w(i-1,j,k,ivx)*si(i-1,j,k,2) &
-                     + w(i,j+1,k,ivx)*sj(i,j,k,2) - w(i,j-1,k,ivx)*sj(i,j-1,k,2) &
-                     + w(i,j,k+1,ivx)*sk(i,j,k,2) - w(i,j,k-1,ivx)*sk(i,j,k-1,2)
-                uuz = w(i+1,j,k,ivx)*si(i,j,k,3) - w(i-1,j,k,ivx)*si(i-1,j,k,3) &
-                     + w(i,j+1,k,ivx)*sj(i,j,k,3) - w(i,j-1,k,ivx)*sj(i,j-1,k,3) &
-                     + w(i,j,k+1,ivx)*sk(i,j,k,3) - w(i,j,k-1,ivx)*sk(i,j,k-1,3)
-
-                ! Idem for the gradient of v.
-
-                vvx = w(i+1,j,k,ivy)*si(i,j,k,1) - w(i-1,j,k,ivy)*si(i-1,j,k,1) &
-                     + w(i,j+1,k,ivy)*sj(i,j,k,1) - w(i,j-1,k,ivy)*sj(i,j-1,k,1) &
-                     + w(i,j,k+1,ivy)*sk(i,j,k,1) - w(i,j,k-1,ivy)*sk(i,j,k-1,1)
-                vvy = w(i+1,j,k,ivy)*si(i,j,k,2) - w(i-1,j,k,ivy)*si(i-1,j,k,2) &
-                     + w(i,j+1,k,ivy)*sj(i,j,k,2) - w(i,j-1,k,ivy)*sj(i,j-1,k,2) &
-                     + w(i,j,k+1,ivy)*sk(i,j,k,2) - w(i,j,k-1,ivy)*sk(i,j,k-1,2)
-                vvz = w(i+1,j,k,ivy)*si(i,j,k,3) - w(i-1,j,k,ivy)*si(i-1,j,k,3) &
-                     + w(i,j+1,k,ivy)*sj(i,j,k,3) - w(i,j-1,k,ivy)*sj(i,j-1,k,3) &
-                     + w(i,j,k+1,ivy)*sk(i,j,k,3) - w(i,j,k-1,ivy)*sk(i,j,k-1,3)
-
-                ! And for the gradient of w.
-
-                wwx = w(i+1,j,k,ivz)*si(i,j,k,1) - w(i-1,j,k,ivz)*si(i-1,j,k,1) &
-                     + w(i,j+1,k,ivz)*sj(i,j,k,1) - w(i,j-1,k,ivz)*sj(i,j-1,k,1) &
-                     + w(i,j,k+1,ivz)*sk(i,j,k,1) - w(i,j,k-1,ivz)*sk(i,j,k-1,1)
-                wwy = w(i+1,j,k,ivz)*si(i,j,k,2) - w(i-1,j,k,ivz)*si(i-1,j,k,2) &
-                     + w(i,j+1,k,ivz)*sj(i,j,k,2) - w(i,j-1,k,ivz)*sj(i,j-1,k,2) &
-                     + w(i,j,k+1,ivz)*sk(i,j,k,2) - w(i,j,k-1,ivz)*sk(i,j,k-1,2)
-                wwz = w(i+1,j,k,ivz)*si(i,j,k,3) - w(i-1,j,k,ivz)*si(i-1,j,k,3) &
-                     + w(i,j+1,k,ivz)*sj(i,j,k,3) - w(i,j-1,k,ivz)*sj(i,j-1,k,3) &
-                     + w(i,j,k+1,ivz)*sk(i,j,k,3) - w(i,j,k-1,ivz)*sk(i,j,k-1,3)
-
-                ! Compute the components of the stress tensor.
-                ! The combination of the current scaling of the velocity
-                ! gradients (2*vol) and the definition of the stress tensor,
-                ! leads to the factor 1/(4*vol).
-
-                fact = fourth/vol(i,j,k)
-
-                sxx = two*fact*uux
-                syy = two*fact*vvy
-                szz = two*fact*wwz
-
-                sxy = fact*(uuy + vvx)
-                sxz = fact*(uuz + wwx)
-                syz = fact*(vvz + wwy)
-
-                ! Compute 2/3 * divergence of velocity squared
-
-                div2 = f23*(sxx+syy+szz)**2
-
-                ! Store the square of strain as the production term.
-
-                scratch(i,j,k, iprod) = two*(two*(sxy**2 + sxz**2 + syz**2) &
-                     +           sxx**2 + syy**2 + szz**2) - div2
+                        ! Compute the gradient of u in the cell center. Use is made
+                        ! of the fact that the surrounding normals sum up to zero,
+                        ! such that the cell i,j,k does not give a contribution.
+                        ! The gradient is scaled by the factor 2*vol.
+
+                        uux = w(i + 1, j, k, ivx) * si(i, j, k, 1) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivx) * sj(i, j, k, 1) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivx) * sk(i, j, k, 1) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 1)
+                        uuy = w(i + 1, j, k, ivx) * si(i, j, k, 2) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivx) * sj(i, j, k, 2) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivx) * sk(i, j, k, 2) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 2)
+                        uuz = w(i + 1, j, k, ivx) * si(i, j, k, 3) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivx) * sj(i, j, k, 3) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivx) * sk(i, j, k, 3) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 3)
+
+                        ! Idem for the gradient of v.
+
+                        vvx = w(i + 1, j, k, ivy) * si(i, j, k, 1) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivy) * sj(i, j, k, 1) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivy) * sk(i, j, k, 1) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 1)
+                        vvy = w(i + 1, j, k, ivy) * si(i, j, k, 2) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivy) * sj(i, j, k, 2) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivy) * sk(i, j, k, 2) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 2)
+                        vvz = w(i + 1, j, k, ivy) * si(i, j, k, 3) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivy) * sj(i, j, k, 3) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivy) * sk(i, j, k, 3) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 3)
+
+                        ! And for the gradient of w.
+
+                        wwx = w(i + 1, j, k, ivz) * si(i, j, k, 1) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivz) * sj(i, j, k, 1) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivz) * sk(i, j, k, 1) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 1)
+                        wwy = w(i + 1, j, k, ivz) * si(i, j, k, 2) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivz) * sj(i, j, k, 2) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivz) * sk(i, j, k, 2) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 2)
+                        wwz = w(i + 1, j, k, ivz) * si(i, j, k, 3) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivz) * sj(i, j, k, 3) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivz) * sk(i, j, k, 3) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 3)
+
+                        ! Compute the components of the stress tensor.
+                        ! The combination of the current scaling of the velocity
+                        ! gradients (2*vol) and the definition of the stress tensor,
+                        ! leads to the factor 1/(4*vol).
+
+                        fact = fourth / vol(i, j, k)
+
+                        sxx = two * fact * uux
+                        syy = two * fact * vvy
+                        szz = two * fact * wwz
+
+                        sxy = fact * (uuy + vvx)
+                        sxz = fact * (uuz + wwx)
+                        syz = fact * (vvz + wwy)
+
+                        ! Compute 2/3 * divergence of velocity squared
+
+                        div2 = f23 * (sxx + syy + szz)**2
+
+                        ! Store the square of strain as the production term.
+
+                        scratch(i, j, k, iprod) = two * (two * (sxy**2 + sxz**2 + syz**2) &
+                                                         + sxx**2 + syy**2 + szz**2) - div2
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-  end subroutine prodSmag2
-
-  subroutine prodWmag2
-    !
-    !       prodWmag2 computes the term:
-    !          2*oij*oij  with oij=0.5*(duidxj - dujdxi).
-    !       This is equal to the magnitude squared of the vorticity.
-    !       It is assumed that the pointer vort, stored in turbMod, is
-    !       already set to the correct entry.
-    !
-    use constants
-    use blockPointers, only : nx, ny, nz, il, jl, kl, w, si, sj, sk, vol, sectionID, scratch
-    use flowVarRefState, only : timeRef
-    use section, only : sections
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer :: i, j, k, ii
-
-    real(kind=realType) :: uuy, uuz, vvx, vvz, wwx, wwy
-    real(kind=realType) :: fact, vortx, vorty, vortz
-    real(kind=realType) :: omegax, omegay, omegaz
-
-    ! Determine the non-dimensional wheel speed of this block.
-
-    omegax = timeRef*sections(sectionID)%rotRate(1)
-    omegay = timeRef*sections(sectionID)%rotRate(2)
-    omegaz = timeRef*sections(sectionID)%rotRate(3)
-
-    ! Loop over the cell centers of the given block. It may be more
-    ! efficient to loop over the faces and to scatter the gradient,
-    ! but in that case the gradients for u, v and w must be stored.
-    ! In the current approach no extra memory is needed.
+    end subroutine prodSmag2
+
+    subroutine prodWmag2
+        !
+        !       prodWmag2 computes the term:
+        !          2*oij*oij  with oij=0.5*(duidxj - dujdxi).
+        !       This is equal to the magnitude squared of the vorticity.
+        !       It is assumed that the pointer vort, stored in turbMod, is
+        !       already set to the correct entry.
+        !
+        use constants
+        use blockPointers, only: nx, ny, nz, il, jl, kl, w, si, sj, sk, vol, sectionID, scratch
+        use flowVarRefState, only: timeRef
+        use section, only: sections
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer :: i, j, k, ii
+
+        real(kind=realType) :: uuy, uuz, vvx, vvz, wwx, wwy
+        real(kind=realType) :: fact, vortx, vorty, vortz
+        real(kind=realType) :: omegax, omegay, omegaz
+
+        ! Determine the non-dimensional wheel speed of this block.
+
+        omegax = timeRef * sections(sectionID)%rotRate(1)
+        omegay = timeRef * sections(sectionID)%rotRate(2)
+        omegaz = timeRef * sections(sectionID)%rotRate(3)
+
+        ! Loop over the cell centers of the given block. It may be more
+        ! efficient to loop over the faces and to scatter the gradient,
+        ! but in that case the gradients for u, v and w must be stored.
+        ! In the current approach no extra memory is needed.
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0,nx*ny*nz-1
-       i = mod(ii, nx) + 2
-       j = mod(ii/nx, ny) + 2
-       k = ii/(nx*ny) + 2
+        !$AD II-LOOP
+        do ii = 0, nx * ny * nz - 1
+            i = mod(ii, nx) + 2
+            j = mod(ii / nx, ny) + 2
+            k = ii / (nx * ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
 
-                ! Compute the necessary derivatives of u in the cell center.
-                ! Use is made of the fact that the surrounding normals sum up
-                ! to zero, such that the cell i,j,k does not give a
-                ! contribution. The gradient is scaled by a factor 2*vol.
+                        ! Compute the necessary derivatives of u in the cell center.
+                        ! Use is made of the fact that the surrounding normals sum up
+                        ! to zero, such that the cell i,j,k does not give a
+                        ! contribution. The gradient is scaled by a factor 2*vol.
 
-                uuy = w(i+1,j,k,ivx)*si(i,j,k,2) - w(i-1,j,k,ivx)*si(i-1,j,k,2) &
-                     + w(i,j+1,k,ivx)*sj(i,j,k,2) - w(i,j-1,k,ivx)*sj(i,j-1,k,2) &
-                     + w(i,j,k+1,ivx)*sk(i,j,k,2) - w(i,j,k-1,ivx)*sk(i,j,k-1,2)
-                uuz = w(i+1,j,k,ivx)*si(i,j,k,3) - w(i-1,j,k,ivx)*si(i-1,j,k,3) &
-                     + w(i,j+1,k,ivx)*sj(i,j,k,3) - w(i,j-1,k,ivx)*sj(i,j-1,k,3) &
-                     + w(i,j,k+1,ivx)*sk(i,j,k,3) - w(i,j,k-1,ivx)*sk(i,j,k-1,3)
+                        uuy = w(i + 1, j, k, ivx) * si(i, j, k, 2) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivx) * sj(i, j, k, 2) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivx) * sk(i, j, k, 2) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 2)
+                        uuz = w(i + 1, j, k, ivx) * si(i, j, k, 3) - w(i - 1, j, k, ivx) * si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivx) * sj(i, j, k, 3) - w(i, j - 1, k, ivx) * sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivx) * sk(i, j, k, 3) - w(i, j, k - 1, ivx) * sk(i, j, k - 1, 3)
 
-                ! Idem for the gradient of v.
+                        ! Idem for the gradient of v.
 
-                vvx = w(i+1,j,k,ivy)*si(i,j,k,1) - w(i-1,j,k,ivy)*si(i-1,j,k,1) &
-                     + w(i,j+1,k,ivy)*sj(i,j,k,1) - w(i,j-1,k,ivy)*sj(i,j-1,k,1) &
-                     + w(i,j,k+1,ivy)*sk(i,j,k,1) - w(i,j,k-1,ivy)*sk(i,j,k-1,1)
-                vvz = w(i+1,j,k,ivy)*si(i,j,k,3) - w(i-1,j,k,ivy)*si(i-1,j,k,3) &
-                     + w(i,j+1,k,ivy)*sj(i,j,k,3) - w(i,j-1,k,ivy)*sj(i,j-1,k,3) &
-                     + w(i,j,k+1,ivy)*sk(i,j,k,3) - w(i,j,k-1,ivy)*sk(i,j,k-1,3)
+                        vvx = w(i + 1, j, k, ivy) * si(i, j, k, 1) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivy) * sj(i, j, k, 1) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivy) * sk(i, j, k, 1) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 1)
+                        vvz = w(i + 1, j, k, ivy) * si(i, j, k, 3) - w(i - 1, j, k, ivy) * si(i - 1, j, k, 3) &
+                              + w(i, j + 1, k, ivy) * sj(i, j, k, 3) - w(i, j - 1, k, ivy) * sj(i, j - 1, k, 3) &
+                              + w(i, j, k + 1, ivy) * sk(i, j, k, 3) - w(i, j, k - 1, ivy) * sk(i, j, k - 1, 3)
 
-                ! And for the gradient of w.
+                        ! And for the gradient of w.
 
-                wwx = w(i+1,j,k,ivz)*si(i,j,k,1) - w(i-1,j,k,ivz)*si(i-1,j,k,1) &
-                     + w(i,j+1,k,ivz)*sj(i,j,k,1) - w(i,j-1,k,ivz)*sj(i,j-1,k,1) &
-                     + w(i,j,k+1,ivz)*sk(i,j,k,1) - w(i,j,k-1,ivz)*sk(i,j,k-1,1)
-                wwy = w(i+1,j,k,ivz)*si(i,j,k,2) - w(i-1,j,k,ivz)*si(i-1,j,k,2) &
-                     + w(i,j+1,k,ivz)*sj(i,j,k,2) - w(i,j-1,k,ivz)*sj(i,j-1,k,2) &
-                     + w(i,j,k+1,ivz)*sk(i,j,k,2) - w(i,j,k-1,ivz)*sk(i,j,k-1,2)
+                        wwx = w(i + 1, j, k, ivz) * si(i, j, k, 1) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 1) &
+                              + w(i, j + 1, k, ivz) * sj(i, j, k, 1) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 1) &
+                              + w(i, j, k + 1, ivz) * sk(i, j, k, 1) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 1)
+                        wwy = w(i + 1, j, k, ivz) * si(i, j, k, 2) - w(i - 1, j, k, ivz) * si(i - 1, j, k, 2) &
+                              + w(i, j + 1, k, ivz) * sj(i, j, k, 2) - w(i, j - 1, k, ivz) * sj(i, j - 1, k, 2) &
+                              + w(i, j, k + 1, ivz) * sk(i, j, k, 2) - w(i, j, k - 1, ivz) * sk(i, j, k - 1, 2)
 
-                ! Compute the three components of the vorticity vector.
-                ! Substract the part coming from the rotating frame.
+                        ! Compute the three components of the vorticity vector.
+                        ! Substract the part coming from the rotating frame.
 
-                fact = half/vol(i,j,k)
+                        fact = half / vol(i, j, k)
 
-                vortx = fact*(wwy - vvz) - two*omegax
-                vorty = fact*(uuz - wwx) - two*omegay
-                vortz = fact*(vvx - uuy) - two*omegaz
+                        vortx = fact * (wwy - vvz) - two * omegax
+                        vorty = fact * (uuz - wwx) - two * omegay
+                        vortz = fact * (vvx - uuy) - two * omegaz
 
-                ! Compute the magnitude squared of the vorticity.
+                        ! Compute the magnitude squared of the vorticity.
 
-                scratch(i,j,k,ivort) = vortx**2 + vorty**2 + vortz**2
+                        scratch(i, j, k, ivort) = vortx**2 + vorty**2 + vortz**2
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-  end subroutine prodWmag2
-  function saNuKnownEddyRatio(eddyRatio, nuLam)
-    !
-    !       saNuKnownEddyRatio computes the Spalart-Allmaras transport
-    !       variable nu for the given eddy viscosity ratio.
-    !
-    use constants
-    use paramTurb
-    implicit none
-    !
-    !      Function type.
-    !
-    real(kind=realType) :: saNuKnownEddyRatio
-    !
-    !      Function arguments.
-    !
-    real(kind=realType), intent(in) :: eddyRatio, nuLam
-    !
-    !      Local variables.
-    !
-    real(kind=realType) :: cv13, chi, chi2, chi3, chi4, f, df, dchi
-
-    ! Take care of the exceptional cases.
-
-    if(eddyRatio <= zero) then
-       saNuKnownEddyRatio = zero
-       return
-    endif
-
-    ! Set the value of cv1^3, which is the constant appearing in the
-    ! sa function fv1 to compute the eddy viscosity
-
-    cv13 = rsaCv1**3
-
-    ! Determine the value of chi, which is given by the quartic
-    ! polynomial chi^4 - ratio*(chi^3 + cv1^3) = 0.
-    ! First determine the start value, depending on the eddyRatio.
-
-    if(eddyRatio < 1.e-4_realType) then
-       chi = 0.5_realType
-    else if(eddyRatio < 1.0_realType) then
-       chi = 5.0_realType
-    else if(eddyRatio < 10.0_realType) then
-       chi = 10.0_realType
-    else
-       chi = eddyRatio
-    endif
-
-    ! The actual newton algorithm.
-
-    do
-       ! Compute the function value and the derivative.
-
-       chi2 = chi*chi
-       chi3 = chi*chi2
-       chi4 = chi*chi3
+    end subroutine prodWmag2
+    function saNuKnownEddyRatio(eddyRatio, nuLam)
+        !
+        !       saNuKnownEddyRatio computes the Spalart-Allmaras transport
+        !       variable nu for the given eddy viscosity ratio.
+        !
+        use constants
+        use paramTurb
+        implicit none
+        !
+        !      Function type.
+        !
+        real(kind=realType) :: saNuKnownEddyRatio
+        !
+        !      Function arguments.
+        !
+        real(kind=realType), intent(in) :: eddyRatio, nuLam
+        !
+        !      Local variables.
+        !
+        real(kind=realType) :: cv13, chi, chi2, chi3, chi4, f, df, dchi
+
+        ! Take care of the exceptional cases.
+
+        if (eddyRatio <= zero) then
+            saNuKnownEddyRatio = zero
+            return
+        end if
+
+        ! Set the value of cv1^3, which is the constant appearing in the
+        ! sa function fv1 to compute the eddy viscosity
+
+        cv13 = rsaCv1**3
+
+        ! Determine the value of chi, which is given by the quartic
+        ! polynomial chi^4 - ratio*(chi^3 + cv1^3) = 0.
+        ! First determine the start value, depending on the eddyRatio.
+
+        if (eddyRatio < 1.e-4_realType) then
+            chi = 0.5_realType
+        else if (eddyRatio < 1.0_realType) then
+            chi = 5.0_realType
+        else if (eddyRatio < 10.0_realType) then
+            chi = 10.0_realType
+        else
+            chi = eddyRatio
+        end if
+
+        ! The actual newton algorithm.
+
+        do
+            ! Compute the function value and the derivative.
+
+            chi2 = chi * chi
+            chi3 = chi * chi2
+            chi4 = chi * chi3
 
-       f  = chi4 - eddyRatio*(chi3 + cv13)
-       df = four*chi3 - three*eddyRatio*chi2
+            f = chi4 - eddyRatio * (chi3 + cv13)
+            df = four * chi3 - three * eddyRatio * chi2
 
-       ! Compute the negative update and the new value of chi.
+            ! Compute the negative update and the new value of chi.
 
-       dchi = f/df
-       chi  = chi - dchi
+            dchi = f / df
+            chi = chi - dchi
 
-       ! Condition to exit the loop.
+            ! Condition to exit the loop.
 
-       if(abs(dchi/chi) <= thresholdReal) exit
-    enddo
+            if (abs(dchi / chi) <= thresholdReal) exit
+        end do
 
-    ! Chi is the ratio of the spalart allmaras transport variable and
-    ! the laminar viscosity. So multiply chi with the laminar viscosity
-    ! to obtain the correct value.
+        ! Chi is the ratio of the spalart allmaras transport variable and
+        ! the laminar viscosity. So multiply chi with the laminar viscosity
+        ! to obtain the correct value.
 
-    saNuKnownEddyRatio = nuLam*chi
+        saNuKnownEddyRatio = nuLam * chi
 
-  end function saNuKnownEddyRatio
+    end function saNuKnownEddyRatio
 
-  subroutine unsteadyTurbTerm(mAdv, nAdv, offset, qq)
-    !
-    !       unsteadyTurbTerm discretizes the time derivative of the
-    !       turbulence transport equations and add it to the residual.
-    !       As the time derivative is the same for all turbulence models,
-    !       this generic routine can be used; both the discretization of
-    !       the time derivative and its contribution to the central
-    !       jacobian are computed by this routine.
-    !       Only nAdv equations are treated, while the actual system has
-    !       size mAdv. The reason is that some equations for some
-    !       turbulence equations do not have a time derivative, e.g. the
-    !       f equation in the v2-f model. The argument offset indicates
-    !       the offset in the w vector where this subsystem starts. As a
-    !       consequence it is assumed that the indices of the current
-    !       subsystem are contiguous, e.g. if a 2*2 system is solved the
-    !       Last index in w is offset+1 and offset+2 respectively.
-    !
-    use blockPointers
-    use flowVarRefState
-    use inputPhysics
-    use inputTimeSpectral
-    use inputUnsteady
-    use iteration
-    use section
-    use turbMod
+    subroutine unsteadyTurbTerm(mAdv, nAdv, offset, qq)
+        !
+        !       unsteadyTurbTerm discretizes the time derivative of the
+        !       turbulence transport equations and add it to the residual.
+        !       As the time derivative is the same for all turbulence models,
+        !       this generic routine can be used; both the discretization of
+        !       the time derivative and its contribution to the central
+        !       jacobian are computed by this routine.
+        !       Only nAdv equations are treated, while the actual system has
+        !       size mAdv. The reason is that some equations for some
+        !       turbulence equations do not have a time derivative, e.g. the
+        !       f equation in the v2-f model. The argument offset indicates
+        !       the offset in the w vector where this subsystem starts. As a
+        !       consequence it is assumed that the indices of the current
+        !       subsystem are contiguous, e.g. if a 2*2 system is solved the
+        !       Last index in w is offset+1 and offset+2 respectively.
+        !
+        use blockPointers
+        use flowVarRefState
+        use inputPhysics
+        use inputTimeSpectral
+        use inputUnsteady
+        use iteration
+        use section
+        use turbMod
 
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: mAdv, nAdv, offset
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: mAdv, nAdv, offset
 
-    real(kind=realType), dimension(2:il,2:jl,2:kl,mAdv,mAdv), &
-         intent(inout) :: qq
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii, jj, nn
+        real(kind=realType), dimension(2:il, 2:jl, 2:kl, mAdv, mAdv), &
+            intent(inout) :: qq
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii, jj, nn
 
-    real(kind=realType) :: oneOverDt, tmp
+        real(kind=realType) :: oneOverDt, tmp
 
-    ! Determine the equation mode.
+        ! Determine the equation mode.
 
-    select case (equationMode)
+        select case (equationMode)
 
-    case (steady)
+        case (steady)
 
-       ! Steady computation. No time derivative present.
+            ! Steady computation. No time derivative present.
 
-       return
+            return
 
-       !===============================================================
+            !===============================================================
 
-    case (unsteady)
+        case (unsteady)
 
-       ! The time deritvative term depends on the integration
-       ! scheme used.
+            ! The time deritvative term depends on the integration
+            ! scheme used.
 
-       select case (timeIntegrationScheme)
+            select case (timeIntegrationScheme)
 
-       case (BDF)
+            case (BDF)
 
-          ! Backward difference formula is used as time
-          ! integration scheme.
+                ! Backward difference formula is used as time
+                ! integration scheme.
 
-          ! Store the inverse of the physical nonDimensional
-          ! time step a bit easier.
+                ! Store the inverse of the physical nonDimensional
+                ! time step a bit easier.
 
-          oneOverDt = timeRef/deltaT
+                oneOverDt = timeRef / deltaT
 
-          ! Loop over the number of turbulent transport equations.
+                ! Loop over the number of turbulent transport equations.
 
-          nAdvLoopUnsteady: do ii=1,nAdv
+                nAdvLoopUnsteady: do ii = 1, nAdv
 
-             ! Store the index of the current turbulent variable in jj.
+                    ! Store the index of the current turbulent variable in jj.
 
-             jj = ii + offset
+                    jj = ii + offset
 
-             ! Loop over the owned cells of this block to compute the
-             ! time derivative.
+                    ! Loop over the owned cells of this block to compute the
+                    ! time derivative.
 
-             do k=2,kl
-                do j=2,jl
-                   do i=2,il
+                    do k = 2, kl
+                        do j = 2, jl
+                            do i = 2, il
 
-                      ! Initialize tmp to the value of the current
-                      ! level multiplied by the corresponding coefficient
-                      ! in the time integration scheme.
+                                ! Initialize tmp to the value of the current
+                                ! level multiplied by the corresponding coefficient
+                                ! in the time integration scheme.
 
-                      tmp = coefTime(0)*w(i,j,k,jj)
+                                tmp = coefTime(0) * w(i, j, k, jj)
 
-                      ! Loop over the old time levels and add the
-                      ! corresponding contribution to tmp.
+                                ! Loop over the old time levels and add the
+                                ! corresponding contribution to tmp.
 
-                      do nn=1,noldLevels
-                         tmp = tmp + coefTime(nn)*wold(nn,i,j,k,jj)
-                      enddo
+                                do nn = 1, noldLevels
+                                    tmp = tmp + coefTime(nn) * wold(nn, i, j, k, jj)
+                                end do
 
-                      ! Update the residual. Note that in the turbulent
-                      ! routines the residual is defined with an opposite
-                      ! sign compared to the residual of the flow equations.
-                      ! Therefore the time derivative must be substracted
-                      ! from dvt.
+                                ! Update the residual. Note that in the turbulent
+                                ! routines the residual is defined with an opposite
+                                ! sign compared to the residual of the flow equations.
+                                ! Therefore the time derivative must be substracted
+                                ! from dvt.
 
-                      scratch(i,j,k,idvt+ii-1) = scratch(i,j,k,idvt+ii-1) - oneOverDt*tmp
+                                scratch(i, j, k, idvt + ii - 1) = scratch(i, j, k, idvt + ii - 1) - oneOverDt * tmp
 
-                      ! Update the central jacobian.
+                                ! Update the central jacobian.
 
-                      qq(i,j,k,ii,ii) = qq(i,j,k,ii,ii) &
-                           + coefTime(0)*oneOverDt
-                   enddo
-                enddo
-             enddo
+                                qq(i, j, k, ii, ii) = qq(i, j, k, ii, ii) &
+                                                      + coefTime(0) * oneOverDt
+                            end do
+                        end do
+                    end do
 
-          enddo nAdvLoopUnsteady
+                end do nAdvLoopUnsteady
 
-          !===========================================================
+                !===========================================================
 
-       case (explicitRK)
+            case (explicitRK)
 
-          ! Explicit time integration scheme. The time derivative
-          ! is handled differently.
+                ! Explicit time integration scheme. The time derivative
+                ! is handled differently.
 
-          return
+                return
 
-       end select
+            end select
 
-       !===============================================================
+            !===============================================================
 
-    case (timeSpectral)
+        case (timeSpectral)
 
-       ! Time spectral method.
+            ! Time spectral method.
 
-       ! Loop over the number of turbulent transport equations.
+            ! Loop over the number of turbulent transport equations.
 
-       nAdvLoopSpectral: do ii=1,nAdv
+            nAdvLoopSpectral: do ii = 1, nAdv
 
-          ! Store the index of the current turbulent variable in jj.
+                ! Store the index of the current turbulent variable in jj.
 
-          jj = ii + offset
+                jj = ii + offset
 
-          ! The time derivative has been computed earlier in
-          ! unsteadyTurbSpectral and stored in entry jj of scratch.
-          ! Substract this value for all owned cells. It must be
-          ! substracted, because in the turbulent routines the
-          ! residual is defined with an opposite sign compared to
-          ! the residual of the flow equations.
-          ! Also add a term to the diagonal matrix, which corresponds
-          ! to to the contribution of the highest frequency. This is
-          ! equivalent to an explicit treatment of the time derivative
-          ! and may need to be changed.
+                ! The time derivative has been computed earlier in
+                ! unsteadyTurbSpectral and stored in entry jj of scratch.
+                ! Substract this value for all owned cells. It must be
+                ! substracted, because in the turbulent routines the
+                ! residual is defined with an opposite sign compared to
+                ! the residual of the flow equations.
+                ! Also add a term to the diagonal matrix, which corresponds
+                ! to to the contribution of the highest frequency. This is
+                ! equivalent to an explicit treatment of the time derivative
+                ! and may need to be changed.
 
-          tmp = nTimeIntervalsSpectral*pi*timeRef &
-               / sections(sectionID)%timePeriod
+                tmp = nTimeIntervalsSpectral * pi * timeRef &
+                      / sections(sectionID)%timePeriod
 
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   scratch(i,j,k,idvt+ii-1)   = scratch(i,j,k,idvt+ii-1)   - dw(i,j,k,jj)
-                   qq(i,j,k,ii,ii) = qq(i,j,k,ii,ii) + tmp
-                enddo
-             enddo
-          enddo
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            scratch(i, j, k, idvt + ii - 1) = scratch(i, j, k, idvt + ii - 1) - dw(i, j, k, jj)
+                            qq(i, j, k, ii, ii) = qq(i, j, k, ii, ii) + tmp
+                        end do
+                    end do
+                end do
 
-       enddo nAdvLoopSpectral
+            end do nAdvLoopSpectral
 
-    end select
+        end select
 
-  end subroutine unsteadyTurbTerm
+    end subroutine unsteadyTurbTerm
 
-  subroutine computeEddyViscosity(includeHalos)
-    !
-    !       computeEddyViscosity computes the eddy viscosity in the
-    !       owned cell centers of the given block. It is assumed that the
-    !       pointes already point to the correct block before entering
-    !       this subroutine.
-    !
-    use constants
-    use flowVarRefState
-    use inputPhysics
-    use iteration
-    use blockPointers
-    implicit none
+    subroutine computeEddyViscosity(includeHalos)
+        !
+        !       computeEddyViscosity computes the eddy viscosity in the
+        !       owned cell centers of the given block. It is assumed that the
+        !       pointes already point to the correct block before entering
+        !       this subroutine.
+        !
+        use constants
+        use flowVarRefState
+        use inputPhysics
+        use iteration
+        use blockPointers
+        implicit none
 
-    ! Input Parameter
-    logical, intent(in) :: includeHalos
+        ! Input Parameter
+        logical, intent(in) :: includeHalos
 
-    !
-    !      Local variables.
-    !
-    logical :: returnImmediately
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    ! Check if an immediate return can be made.
-
-    if( eddyModel ) then
-       if((currentLevel <= groundLevel)) then
-          returnImmediately = .false.
-       else
-          returnImmediately = .true.
-       endif
-    else
-       returnImmediately = .true.
-    endif
-
-    if( returnImmediately ) return
-
-    ! Determine the turbulence model and call the appropriate
-    ! routine to compute the eddy viscosity.
-    if (includeHalos) then
-       iBeg = 1
-       iEnd = ie
-       jBeg = 1
-       jEnd = je
-       kBeg = 1
-       kEnd = ke
-    else
-       iBeg = 2
-       iEnd = il
-       jBeg = 2
-       jEnd = jl
-       kBeg = 2
-       kEnd = kl
-    end if
-
-    select case (turbModel)
-
-    case (spalartAllmaras, spalartAllmarasEdwards)
-       call saEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        !
+        !      Local variables.
+        !
+        logical :: returnImmediately
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        ! Check if an immediate return can be made.
+
+        if (eddyModel) then
+            if ((currentLevel <= groundLevel)) then
+                returnImmediately = .false.
+            else
+                returnImmediately = .true.
+            end if
+        else
+            returnImmediately = .true.
+        end if
+
+        if (returnImmediately) return
+
+        ! Determine the turbulence model and call the appropriate
+        ! routine to compute the eddy viscosity.
+        if (includeHalos) then
+            iBeg = 1
+            iEnd = ie
+            jBeg = 1
+            jEnd = je
+            kBeg = 1
+            kEnd = ke
+        else
+            iBeg = 2
+            iEnd = il
+            jBeg = 2
+            jEnd = jl
+            kBeg = 2
+            kEnd = kl
+        end if
+
+        select case (turbModel)
+
+        case (spalartAllmaras, spalartAllmarasEdwards)
+            call saEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
 #ifndef USE_TAPENADE
 
-    case (v2f)
-       call vfEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
-    case (komegaWilcox, komegaModified)
-       call kwEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        case (v2f)
+            call vfEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        case (komegaWilcox, komegaModified)
+            call kwEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
 
-    case (menterSST)
-       call SSTEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        case (menterSST)
+            call SSTEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
 
-    case (ktau)
-       call ktEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        case (ktau)
+            call ktEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
 #endif
-    end select
-
-  end subroutine computeEddyViscosity
-
-  subroutine saEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
-    !
-    !       saEddyViscosity computes the eddy-viscosity according to the
-    !       Spalart-Allmaras model for the block given in blockPointers.
-    !       This routine for both the original version as well as the
-    !       modified version according to Edwards.
-    !
-    use constants
-    use blockPointers
-    use constants
-    use paramTurb
-    implicit none
-    ! Input variables
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii, iSize, jSize, kSize
-    real(kind=realType)   :: chi, chi3, fv1, rnuSA, cv13
-
-    ! Store the cv1^3; cv1 is a constant of the Spalart-Allmaras model.
-
-    cv13 = rsaCv1**3
-
-    ! Loop over the cells of this block and compute the eddy viscosity.
-    ! Do not include halo's.
+        end select
+
+    end subroutine computeEddyViscosity
+
+    subroutine saEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        !
+        !       saEddyViscosity computes the eddy-viscosity according to the
+        !       Spalart-Allmaras model for the block given in blockPointers.
+        !       This routine for both the original version as well as the
+        !       modified version according to Edwards.
+        !
+        use constants
+        use blockPointers
+        use constants
+        use paramTurb
+        implicit none
+        ! Input variables
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii, iSize, jSize, kSize
+        real(kind=realType) :: chi, chi3, fv1, rnuSA, cv13
+
+        ! Store the cv1^3; cv1 is a constant of the Spalart-Allmaras model.
+
+        cv13 = rsaCv1**3
+
+        ! Loop over the cells of this block and compute the eddy viscosity.
+        ! Do not include halo's.
 #ifdef TAPENADE_REVERSE
-    iSize = (iEnd-iBeg)+1
-    jSize = (jEnd-jBeg)+1
-    kSize = (kEnd-kBeg)+1
-
-    !$AD II-LOOP
-    do ii=0, iSize*jSize*kSize-1
-       i = mod(ii, iSize) + iBeg
-       j = mod(ii/iSize, jSize) + jBeg
-       k = ii/((iSize*jSize)) + kBeg
+        iSize = (iEnd - iBeg) + 1
+        jSize = (jEnd - jBeg) + 1
+        kSize = (kEnd - kBeg) + 1
+
+        !$AD II-LOOP
+        do ii = 0, iSize * jSize * kSize - 1
+            i = mod(ii, iSize) + iBeg
+            j = mod(ii / iSize, jSize) + jBeg
+            k = ii / ((iSize * jSize)) + kBeg
 #else
-       do k=kBeg, kEnd
-          do j=jBeg, jEnd
-             do i=iBeg, iEnd
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
 #endif
-                rnuSA      = w(i,j,k,itu1)*w(i,j,k,irho)
-                chi        = rnuSA/rlv(i,j,k)
-                chi3       = chi**3
-                fv1        = chi3/(chi3+cv13)
-                rev(i,j,k) = fv1*rnuSA
+                        rnuSA = w(i, j, k, itu1) * w(i, j, k, irho)
+                        chi = rnuSA / rlv(i, j, k)
+                        chi3 = chi**3
+                        fv1 = chi3 / (chi3 + cv13)
+                        rev(i, j, k) = fv1 * rnuSA
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-  end subroutine saEddyViscosity
-
-  subroutine kwEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
-    !
-    !       kwEddyViscosity computes the eddy viscosity according to the
-    !       k-omega models (both the original Wilcox as well as the
-    !       modified version) for the block given in blockPointers.
-    !
-    use constants
-    use blockPointers
-    implicit none
-    ! Input variables
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii, iSize, jSize, kSize
-
-
-    ! Loop over the cells of this block and compute the eddy viscosity.
-    ! Do not include halo's.
+    end subroutine saEddyViscosity
+
+    subroutine kwEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        !
+        !       kwEddyViscosity computes the eddy viscosity according to the
+        !       k-omega models (both the original Wilcox as well as the
+        !       modified version) for the block given in blockPointers.
+        !
+        use constants
+        use blockPointers
+        implicit none
+        ! Input variables
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii, iSize, jSize, kSize
+
+        ! Loop over the cells of this block and compute the eddy viscosity.
+        ! Do not include halo's.
 #ifdef TAPENADE_REVERSE
-    iSize = (iEnd-iBeg)+1
-    jSize = (jEnd-jBeg)+1
-    kSize = (kEnd-kBeg)+1
-
-    !$AD II-LOOP
-    do ii=0, iSize*jSize*kSize-1
-       i = mod(ii, iSize) + iBeg
-       j = mod(ii/iSize, jSize) + jBeg
-       k = ii/((iSize*jSize)) + kBeg
+        iSize = (iEnd - iBeg) + 1
+        jSize = (jEnd - jBeg) + 1
+        kSize = (kEnd - kBeg) + 1
+
+        !$AD II-LOOP
+        do ii = 0, iSize * jSize * kSize - 1
+            i = mod(ii, iSize) + iBeg
+            j = mod(ii / iSize, jSize) + jBeg
+            k = ii / ((iSize * jSize)) + kBeg
 #else
-       do k=kBeg, kEnd
-          do j=jBeg, jEnd
-             do i=iBeg, iEnd
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
 #endif
-                rev(i,j,k) = abs(w(i,j,k,irho)*w(i,j,k,itu1)/w(i,j,k,itu2))
+                        rev(i, j, k) = abs(w(i, j, k, irho) * w(i, j, k, itu1) / w(i, j, k, itu2))
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
 
-  end subroutine kwEddyViscosity
-
-  subroutine SSTEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
-    !
-    !       SSTEddyViscosity computes the eddy viscosity according to
-    !       menter's SST variant of the k-omega turbulence model for the
-    !       block given in blockPointers.
-    !
-    use constants
-    use blockPointers
-    use paramTurb
-    use turbMod
-    implicit none
-    ! Input variables
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii, iSize, jSize, kSize
-    real(kind=realType) :: t1, t2, arg2, f2, vortMag
-
-    ! Compute the vorticity squared in the cell centers. The reason
-    ! for computing the vorticity squared is that a routine exists
-    ! for it; for the actual eddy viscosity computation the vorticity
-    ! itself is needed.
-
-    call prodWmag2
-
-    ! Loop over the cells of this block and compute the eddy viscosity.
-    ! Do not include halo's.
+    end subroutine kwEddyViscosity
+
+    subroutine SSTEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        !
+        !       SSTEddyViscosity computes the eddy viscosity according to
+        !       menter's SST variant of the k-omega turbulence model for the
+        !       block given in blockPointers.
+        !
+        use constants
+        use blockPointers
+        use paramTurb
+        use turbMod
+        implicit none
+        ! Input variables
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii, iSize, jSize, kSize
+        real(kind=realType) :: t1, t2, arg2, f2, vortMag
+
+        ! Compute the vorticity squared in the cell centers. The reason
+        ! for computing the vorticity squared is that a routine exists
+        ! for it; for the actual eddy viscosity computation the vorticity
+        ! itself is needed.
+
+        call prodWmag2
+
+        ! Loop over the cells of this block and compute the eddy viscosity.
+        ! Do not include halo's.
 #ifdef TAPENADE_REVERSE
-    iSize = (iEnd-iBeg)+1
-    jSize = (jEnd-jBeg)+1
-    kSize = (kEnd-kBeg)+1
-
-    !$AD II-LOOP
-    do ii=0, iSize*jSize*kSize-1
-       i = mod(ii, iSize) + iBeg
-       j = mod(ii/iSize, jSize) + jBeg
-       k = ii/((iSize*jSize)) + kBeg
+        iSize = (iEnd - iBeg) + 1
+        jSize = (jEnd - jBeg) + 1
+        kSize = (kEnd - kBeg) + 1
+
+        !$AD II-LOOP
+        do ii = 0, iSize * jSize * kSize - 1
+            i = mod(ii, iSize) + iBeg
+            j = mod(ii / iSize, jSize) + jBeg
+            k = ii / ((iSize * jSize)) + kBeg
 #else
-       do k=kBeg, kEnd
-          do j=jBeg, jEnd
-             do i=iBeg, iEnd
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
 #endif
-                ! Compute the value of the function f2, which occurs in the
-                ! eddy-viscosity computation.
+                        ! Compute the value of the function f2, which occurs in the
+                        ! eddy-viscosity computation.
 
-                t1 = two*sqrt(w(i,j,k,itu1)) &
-                     / (0.09_realType*w(i,j,k,itu2)*d2Wall(i,j,k))
-                t2 = 500.0_realType*rlv(i,j,k) &
-                     / (w(i,j,k,irho)*w(i,j,k,itu2)*d2Wall(i,j,k)**2)
+                        t1 = two * sqrt(w(i, j, k, itu1)) &
+                             / (0.09_realType * w(i, j, k, itu2) * d2Wall(i, j, k))
+                        t2 = 500.0_realType * rlv(i, j, k) &
+                             / (w(i, j, k, irho) * w(i, j, k, itu2) * d2Wall(i, j, k)**2)
 
-                arg2 = max(t1,t2)
-                f2   = tanh(arg2**2)
+                        arg2 = max(t1, t2)
+                        f2 = tanh(arg2**2)
 
-                ! And compute the eddy viscosity.
+                        ! And compute the eddy viscosity.
 
-                vortMag    = sqrt(scratch(i,j,k,iprod))
-                rev(i,j,k) = w(i,j,k,irho)*rSSTA1*w(i,j,k,itu1) &
-                     / max(rSSTA1*w(i,j,k,itu2), f2*vortMag)
+                        vortMag = sqrt(scratch(i, j, k, iprod))
+                        rev(i, j, k) = w(i, j, k, irho) * rSSTA1 * w(i, j, k, itu1) &
+                                       / max(rSSTA1 * w(i, j, k, itu2), f2 * vortMag)
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
 
-  end subroutine SSTEddyViscosity
-
-  subroutine turbAdvection(mAdv, nAdv, offset, qq)
-    !
-    !       turbAdvection discretizes the advection part of the turbulent
-    !       transport equations. As the advection part is the same for all
-    !       models, this generic routine can be used. Both the
-    !       discretization and the central jacobian are computed in this
-    !       subroutine. The former can either be 1st or 2nd order
-    !       accurate; the latter is always based on the 1st order upwind
-    !       discretization. When the discretization must be second order
-    !       accurate, the fully upwind (kappa = -1) scheme in combination
-    !       with the minmod limiter is used.
-    !       Only nAdv equations are treated, while the actual system has
-    !       size mAdv. The reason is that some equations for some
-    !       turbulence equations do not have an advection part, e.g. the
-    !       f equation in the v2-f model. The argument offset indicates
-    !       the offset in the w vector where this subsystem starts. As a
-    !       consequence it is assumed that the indices of the current
-    !       subsystem are contiguous, e.g. if a 2*2 system is solved the
-    !       Last index in w is offset+1 and offset+2 respectively.
-    !
-    use constants
-    use blockPointers, only : nx, ny, nz, il, jl, kl, vol, sfaceI, sfaceJ, sfaceK, &
-         w, si, sj, sk, addGridVelocities, bmti1, bmti2, bmtj1, bmtj2, &
-         bmtk1, bmtk2, scratch
-    use inputDiscretization, only : orderTurb
-    use iteration, only : groundLevel
-    use turbMod, only : secondOrd
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nAdv, mAdv, offset
-
-    real(kind=realType), dimension(2:il,2:jl,2:kl,mAdv,mAdv), &
-         intent(inout) :: qq
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii, jj, kk, iii
-
-    real(kind=realType) :: qs, voli, xa, ya, za
-    real(kind=realType) :: uu, dwt, dwtm1, dwtp1, dwti, dwtj, dwtk
-
-    real(kind=realType), dimension(mAdv) :: impl
-
-    ! Determine whether or not a second order discretization for the
-    ! advective terms must be used.
-    secondOrd = .false.
-    if(groundLevel == 1_intType .and. &
-         orderTurb == secondOrder) secondOrd = .true.
-
-    ! Initialize the grid velocity to zero. This value will be used
-    ! if the block is not moving.
-    continue
-    !$AD CHECKPOINT-START
-    qs = zero
-    !
-    !       Upwind discretization of the convective term in k (zeta)
-    !       direction. Either the 1st order upwind or the second order
-    !       fully upwind interpolation scheme, kappa = -1, is used in
-    !       combination with the minmod limiter.
-    !       The possible grid velocity must be taken into account.
-    !
+    end subroutine SSTEddyViscosity
+
+    subroutine turbAdvection(mAdv, nAdv, offset, qq)
+        !
+        !       turbAdvection discretizes the advection part of the turbulent
+        !       transport equations. As the advection part is the same for all
+        !       models, this generic routine can be used. Both the
+        !       discretization and the central jacobian are computed in this
+        !       subroutine. The former can either be 1st or 2nd order
+        !       accurate; the latter is always based on the 1st order upwind
+        !       discretization. When the discretization must be second order
+        !       accurate, the fully upwind (kappa = -1) scheme in combination
+        !       with the minmod limiter is used.
+        !       Only nAdv equations are treated, while the actual system has
+        !       size mAdv. The reason is that some equations for some
+        !       turbulence equations do not have an advection part, e.g. the
+        !       f equation in the v2-f model. The argument offset indicates
+        !       the offset in the w vector where this subsystem starts. As a
+        !       consequence it is assumed that the indices of the current
+        !       subsystem are contiguous, e.g. if a 2*2 system is solved the
+        !       Last index in w is offset+1 and offset+2 respectively.
+        !
+        use constants
+        use blockPointers, only: nx, ny, nz, il, jl, kl, vol, sfaceI, sfaceJ, sfaceK, &
+                                 w, si, sj, sk, addGridVelocities, bmti1, bmti2, bmtj1, bmtj2, &
+                                 bmtk1, bmtk2, scratch
+        use inputDiscretization, only: orderTurb
+        use iteration, only: groundLevel
+        use turbMod, only: secondOrd
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nAdv, mAdv, offset
+
+        real(kind=realType), dimension(2:il, 2:jl, 2:kl, mAdv, mAdv), &
+            intent(inout) :: qq
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii, jj, kk, iii
+
+        real(kind=realType) :: qs, voli, xa, ya, za
+        real(kind=realType) :: uu, dwt, dwtm1, dwtp1, dwti, dwtj, dwtk
+
+        real(kind=realType), dimension(mAdv) :: impl
+
+        ! Determine whether or not a second order discretization for the
+        ! advective terms must be used.
+        secondOrd = .false.
+        if (groundLevel == 1_intType .and. &
+            orderTurb == secondOrder) secondOrd = .true.
+
+        ! Initialize the grid velocity to zero. This value will be used
+        ! if the block is not moving.
+        continue
+        !$AD CHECKPOINT-START
+        qs = zero
+        !
+        !       Upwind discretization of the convective term in k (zeta)
+        !       direction. Either the 1st order upwind or the second order
+        !       fully upwind interpolation scheme, kappa = -1, is used in
+        !       combination with the minmod limiter.
+        !       The possible grid velocity must be taken into account.
+        !
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do iii=0,nx*ny*nz-1
-       i = mod(iii, nx) + 2
-       j = mod(iii/nx, ny) + 2
-       k = iii/(nx*ny) + 2
+        !$AD II-LOOP
+        do iii = 0, nx * ny * nz - 1
+            i = mod(iii, nx) + 2
+            j = mod(iii / nx, ny) + 2
+            k = iii / (nx * ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
-                ! Compute the grid velocity if present.
-                ! It is taken as the average of k and k-1,
+                        ! Compute the grid velocity if present.
+                        ! It is taken as the average of k and k-1,
 
-                voli = half/vol(i,j,k)
-                if( addGridVelocities ) &
-                     qs = (sFaceK(i,j,k) + sFaceK(i,j,k-1))*voli
+                        voli = half / vol(i, j, k)
+                        if (addGridVelocities) &
+                            qs = (sFaceK(i, j, k) + sFaceK(i, j, k - 1)) * voli
 
-                ! Compute the normal velocity, where the normal direction
-                ! is taken as the average of faces k and k-1.
+                        ! Compute the normal velocity, where the normal direction
+                        ! is taken as the average of faces k and k-1.
 
-                xa = (sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-                ya = (sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-                za = (sk(i,j,k,3) + sk(i,j,k-1,3))*voli
+                        xa = (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                        ya = (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                        za = (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
 
-                uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-                ! This term has unit: velocity/length
+                        uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                        ! This term has unit: velocity/length
 
-                ! Determine the situation we are having here, i.e. positive
-                ! or negative normal velocity.
+                        ! Determine the situation we are having here, i.e. positive
+                        ! or negative normal velocity.
 
-                velKdir: if(uu > zero) then
+                        velKdir: if (uu > zero) then
 
-                   ! Velocity has a component in positive k-direction.
-                   ! Loop over the number of advection equations.
-                   !$AD II-LOOP
-                   do ii=1,nAdv
+                            ! Velocity has a component in positive k-direction.
+                            ! Loop over the number of advection equations.
+                            !$AD II-LOOP
+                            do ii = 1, nAdv
 
-                      ! Set the value of jj such that it corresponds to the
-                      ! turbulent entry in w.
+                                ! Set the value of jj such that it corresponds to the
+                                ! turbulent entry in w.
 
-                      jj = ii + offset
+                                jj = ii + offset
 
-                      ! Check whether a first or a second order discretization
-                      ! must be used.
+                                ! Check whether a first or a second order discretization
+                                ! must be used.
 
-                      if( secondOrd ) then
+                                if (secondOrd) then
 
-                         ! Second order; store the three differences for the
-                         ! discretization of the derivative in k-direction.
+                                    ! Second order; store the three differences for the
+                                    ! discretization of the derivative in k-direction.
 
-                         dwtm1 = w(i,j,k-1,jj) - w(i,j,k-2,jj)
-                         dwt   = w(i,j,k,  jj) - w(i,j,k-1,jj)
-                         dwtp1 = w(i,j,k+1,jj) - w(i,j,k,  jj)
+                                    dwtm1 = w(i, j, k - 1, jj) - w(i, j, k - 2, jj)
+                                    dwt = w(i, j, k, jj) - w(i, j, k - 1, jj)
+                                    dwtp1 = w(i, j, k + 1, jj) - w(i, j, k, jj)
 
-                         ! Construct the derivative in this cell center. This
-                         ! is the first order upwind derivative with two
-                         ! nonlinear corrections.
+                                    ! Construct the derivative in this cell center. This
+                                    ! is the first order upwind derivative with two
+                                    ! nonlinear corrections.
 
-                         dwtk = dwt
+                                    dwtk = dwt
 
-                         if(dwt*dwtp1 > zero) then
-                            if(abs(dwt) < abs(dwtp1)) then
-                               dwtk = dwtk + half*dwt
-                            else
-                               dwtk = dwtk + half*dwtp1
-                            endif
-                         endif
+                                    if (dwt * dwtp1 > zero) then
+                                        if (abs(dwt) < abs(dwtp1)) then
+                                            dwtk = dwtk + half * dwt
+                                        else
+                                            dwtk = dwtk + half * dwtp1
+                                        end if
+                                    end if
 
-                         if(dwt*dwtm1 > zero) then
-                            if(abs(dwt) < abs(dwtm1)) then
-                               dwtk = dwtk - half*dwt
-                            else
-                               dwtk = dwtk - half*dwtm1
-                            endif
-                         endif
+                                    if (dwt * dwtm1 > zero) then
+                                        if (abs(dwt) < abs(dwtm1)) then
+                                            dwtk = dwtk - half * dwt
+                                        else
+                                            dwtk = dwtk - half * dwtm1
+                                        end if
+                                    end if
 
-                      else
+                                else
 
-                         ! 1st order upwind scheme.
+                                    ! 1st order upwind scheme.
 
-                         dwtk = w(i,j,k,jj) - w(i,j,k-1,jj)
+                                    dwtk = w(i, j, k, jj) - w(i, j, k - 1, jj)
 
-                      endif
+                                end if
 
-                      ! Update the residual. The convective term must be
-                      ! substracted, because it appears on the other side of
-                      ! the equation as the source and viscous terms.
-                      ! uu*dwtk = (V.dot.face_normal)*delta(nuTilde)/delta(x)
+                                ! Update the residual. The convective term must be
+                                ! substracted, because it appears on the other side of
+                                ! the equation as the source and viscous terms.
+                                ! uu*dwtk = (V.dot.face_normal)*delta(nuTilde)/delta(x)
 
-                      scratch(i,j,k,idvt+ii-1) = scratch(i,j,k,idvt+ii-1) - uu*dwtk
+                                scratch(i, j, k, idvt + ii - 1) = scratch(i, j, k, idvt + ii - 1) - uu * dwtk
 #ifndef USE_TAPENADE
-                      ! Update the central jacobian. First the term which is
-                      ! always present, i.e. uu.
+                                ! Update the central jacobian. First the term which is
+                                ! always present, i.e. uu.
 
-                      qq(i,j,k,ii,ii) = qq(i,j,k,ii,ii) + uu
+                                qq(i, j, k, ii, ii) = qq(i, j, k, ii, ii) + uu
 
-                      ! For boundary cells k == 2, the implicit treatment must
-                      ! be taken into account. Note that the implicit part
-                      ! is only based on the 1st order discretization.
-                      ! To improve stability the diagonal term is only taken
-                      ! into account when it improves stability, i.e. when
-                      ! it is positive.
+                                ! For boundary cells k == 2, the implicit treatment must
+                                ! be taken into account. Note that the implicit part
+                                ! is only based on the 1st order discretization.
+                                ! To improve stability the diagonal term is only taken
+                                ! into account when it improves stability, i.e. when
+                                ! it is positive.
 
-                      if(k == 2) then
-                         do kk=1,mAdv
-                            impl(kk) = bmtk1(i,j,jj,kk+offset)
-                         enddo
+                                if (k == 2) then
+                                    do kk = 1, mAdv
+                                        impl(kk) = bmtk1(i, j, jj, kk + offset)
+                                    end do
 
-                         impl(ii) = max(impl(ii),zero)
+                                    impl(ii) = max(impl(ii), zero)
 
-                         do kk=1,mAdv
-                            qq(i,j,k,ii,kk) = qq(i,j,k,ii,kk) + uu*impl(kk)
-                         enddo
-                      endif
+                                    do kk = 1, mAdv
+                                        qq(i, j, k, ii, kk) = qq(i, j, k, ii, kk) + uu * impl(kk)
+                                    end do
+                                end if
 #endif
 
-                   enddo
+                            end do
 
-                else velKdir
+                            else velKdir
 
-                   ! Velocity has a component in negative k-direction.
-                   ! Loop over the number of advection equations
-                   !$AD II-LOOP
-                   do ii=1,nAdv
+                            ! Velocity has a component in negative k-direction.
+                            ! Loop over the number of advection equations
+                            !$AD II-LOOP
+                            do ii = 1, nAdv
 
-                      ! Set the value of jj such that it corresponds to the
-                      ! turbulent entry in w.
+                                ! Set the value of jj such that it corresponds to the
+                                ! turbulent entry in w.
 
-                      jj = ii + offset
+                                jj = ii + offset
 
-                      ! Check whether a first or a second order discretization
-                      ! must be used.
+                                ! Check whether a first or a second order discretization
+                                ! must be used.
 
-                      if( secondOrd ) then
+                                if (secondOrd) then
 
-                         ! Store the three differences for the discretization of
-                         ! the derivative in k-direction.
+                                    ! Store the three differences for the discretization of
+                                    ! the derivative in k-direction.
 
-                         dwtm1 = w(i,j,k,  jj) - w(i,j,k-1,jj)
-                         dwt   = w(i,j,k+1,jj) - w(i,j,k,  jj)
-                         dwtp1 = w(i,j,k+2,jj) - w(i,j,k+1,jj)
+                                    dwtm1 = w(i, j, k, jj) - w(i, j, k - 1, jj)
+                                    dwt = w(i, j, k + 1, jj) - w(i, j, k, jj)
+                                    dwtp1 = w(i, j, k + 2, jj) - w(i, j, k + 1, jj)
 
-                         ! Construct the derivative in this cell center. This is
-                         ! the first order upwind derivative with two nonlinear
-                         ! corrections.
+                                    ! Construct the derivative in this cell center. This is
+                                    ! the first order upwind derivative with two nonlinear
+                                    ! corrections.
 
-                         dwtk = dwt
+                                    dwtk = dwt
 
-                         if(dwt*dwtp1 > zero) then
-                            if(abs(dwt) < abs(dwtp1)) then
-                               dwtk = dwtk - half*dwt
-                            else
-                               dwtk = dwtk - half*dwtp1
-                            endif
-                         endif
+                                    if (dwt * dwtp1 > zero) then
+                                        if (abs(dwt) < abs(dwtp1)) then
+                                            dwtk = dwtk - half * dwt
+                                        else
+                                            dwtk = dwtk - half * dwtp1
+                                        end if
+                                    end if
 
-                         if(dwt*dwtm1 > zero) then
-                            if(abs(dwt) < abs(dwtm1)) then
-                               dwtk = dwtk + half*dwt
-                            else
-                               dwtk = dwtk + half*dwtm1
-                            endif
-                         endif
+                                    if (dwt * dwtm1 > zero) then
+                                        if (abs(dwt) < abs(dwtm1)) then
+                                            dwtk = dwtk + half * dwt
+                                        else
+                                            dwtk = dwtk + half * dwtm1
+                                        end if
+                                    end if
 
-                      else
+                                else
 
-                         ! 1st order upwind scheme.
+                                    ! 1st order upwind scheme.
 
-                         dwtk = w(i,j,k+1,jj) - w(i,j,k,jj)
+                                    dwtk = w(i, j, k + 1, jj) - w(i, j, k, jj)
 
-                      endif
+                                end if
 
-                      ! Update the residual. The convective term must be
-                      ! substracted, because it appears on the other side
-                      ! of the equation as the source and viscous terms.
+                                ! Update the residual. The convective term must be
+                                ! substracted, because it appears on the other side
+                                ! of the equation as the source and viscous terms.
 
-                      scratch(i,j,k,idvt+ii-1) = scratch(i,j,k,idvt+ii-1) - uu*dwtk
+                                scratch(i, j, k, idvt + ii - 1) = scratch(i, j, k, idvt + ii - 1) - uu * dwtk
 
-                      ! Update the central jacobian. First the term which is
-                      ! always present, i.e. -uu.
+                                ! Update the central jacobian. First the term which is
+                                ! always present, i.e. -uu.
 #ifndef USE_TAPENADE
-                      qq(i,j,k,ii,ii) = qq(i,j,k,ii,ii) - uu
-
-                      ! For boundary cells k == kl, the implicit treatment must
-                      ! be taken into account. Note that the implicit part
-                      ! is only based on the 1st order discretization.
-                      ! To improve stability the diagonal term is only taken
-                      ! into account when it improves stability, i.e. when
-                      ! it is positive.
-
-                      if(k == kl) then
-                         do kk=1,mAdv
-                            impl(kk) = bmtk2(i,j,jj,kk+offset)
-                         enddo
-
-                         impl(ii) = max(impl(ii),zero)
-
-                         do kk=1,mAdv
-                            qq(i,j,k,ii,kk) = qq(i,j,k,ii,kk) - uu*impl(kk)
-                         enddo
-                      endif
+                                qq(i, j, k, ii, ii) = qq(i, j, k, ii, ii) - uu
+
+                                ! For boundary cells k == kl, the implicit treatment must
+                                ! be taken into account. Note that the implicit part
+                                ! is only based on the 1st order discretization.
+                                ! To improve stability the diagonal term is only taken
+                                ! into account when it improves stability, i.e. when
+                                ! it is positive.
+
+                                if (k == kl) then
+                                    do kk = 1, mAdv
+                                        impl(kk) = bmtk2(i, j, jj, kk + offset)
+                                    end do
+
+                                    impl(ii) = max(impl(ii), zero)
+
+                                    do kk = 1, mAdv
+                                        qq(i, j, k, ii, kk) = qq(i, j, k, ii, kk) - uu * impl(kk)
+                                    end do
+                                end if
 #endif
-                   enddo
+                            end do
 
-                endif velKdir
+                        end if velKdir
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-    continue
-    !$AD CHECKPOINT-END
-    !
-    !       Upwind discretization of the convective term in j (eta)
-    !       direction. Either the 1st order upwind or the second order
-    !       fully upwind interpolation scheme, kappa = -1, is used in
-    !       combination with the minmod limiter.
-    !       The possible grid velocity must be taken into account.
-    !
-    continue
-    !$AD CHECKPOINT-START
-    qs = zero
+        continue
+        !$AD CHECKPOINT-END
+        !
+        !       Upwind discretization of the convective term in j (eta)
+        !       direction. Either the 1st order upwind or the second order
+        !       fully upwind interpolation scheme, kappa = -1, is used in
+        !       combination with the minmod limiter.
+        !       The possible grid velocity must be taken into account.
+        !
+        continue
+        !$AD CHECKPOINT-START
+        qs = zero
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do iii=0,nx*ny*nz-1
-       i = mod(iii, nx) + 2
-       j = mod(iii/nx, ny) + 2
-       k = iii/(nx*ny) + 2
+        !$AD II-LOOP
+        do iii = 0, nx * ny * nz - 1
+            i = mod(iii, nx) + 2
+            j = mod(iii / nx, ny) + 2
+            k = iii / (nx * ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
 
-                ! Compute the grid velocity if present.
-                ! It is taken as the average of j and j-1,
+                        ! Compute the grid velocity if present.
+                        ! It is taken as the average of j and j-1,
 
-                voli = half/vol(i,j,k)
-                if( addGridVelocities ) &
-                     qs = (sFaceJ(i,j,k) + sFaceJ(i,j-1,k))*voli
+                        voli = half / vol(i, j, k)
+                        if (addGridVelocities) &
+                            qs = (sFaceJ(i, j, k) + sFaceJ(i, j - 1, k)) * voli
 
-                ! Compute the normal velocity, where the normal direction
-                ! is taken as the average of faces j and j-1.
+                        ! Compute the normal velocity, where the normal direction
+                        ! is taken as the average of faces j and j-1.
 
-                xa = (sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-                ya = (sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-                za = (sj(i,j,k,3) + sj(i,j-1,k,3))*voli
+                        xa = (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                        ya = (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                        za = (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
 
-                uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
+                        uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
 
-                ! Determine the situation we are having here, i.e. positive
-                ! or negative normal velocity.
+                        ! Determine the situation we are having here, i.e. positive
+                        ! or negative normal velocity.
 
-                velJdir: if(uu > zero) then
+                        velJdir: if (uu > zero) then
 
-                   ! Velocity has a component in positive j-direction.
-                   ! Loop over the number of advection equations.
-                   !$AD II-LOOP
-                   do ii=1,nAdv
+                            ! Velocity has a component in positive j-direction.
+                            ! Loop over the number of advection equations.
+                            !$AD II-LOOP
+                            do ii = 1, nAdv
 
-                      ! Set the value of jj such that it corresponds to the
-                      ! turbulent entry in w.
+                                ! Set the value of jj such that it corresponds to the
+                                ! turbulent entry in w.
 
-                      jj = ii + offset
+                                jj = ii + offset
 
-                      ! Check whether a first or a second order discretization
-                      ! must be used.
+                                ! Check whether a first or a second order discretization
+                                ! must be used.
 
-                      if( secondOrd ) then
+                                if (secondOrd) then
 
-                         ! Second order; store the three differences for the
-                         ! discretization of the derivative in j-direction.
+                                    ! Second order; store the three differences for the
+                                    ! discretization of the derivative in j-direction.
 
-                         dwtm1 = w(i,j-1,k,jj) - w(i,j-2,k,jj)
-                         dwt   = w(i,j,  k,jj) - w(i,j-1,k,jj)
-                         dwtp1 = w(i,j+1,k,jj) - w(i,j,  k,jj)
+                                    dwtm1 = w(i, j - 1, k, jj) - w(i, j - 2, k, jj)
+                                    dwt = w(i, j, k, jj) - w(i, j - 1, k, jj)
+                                    dwtp1 = w(i, j + 1, k, jj) - w(i, j, k, jj)
 
-                         ! Construct the derivative in this cell center. This is
-                         ! the first order upwind derivative with two nonlinear
-                         ! corrections.
+                                    ! Construct the derivative in this cell center. This is
+                                    ! the first order upwind derivative with two nonlinear
+                                    ! corrections.
 
-                         dwtj = dwt
+                                    dwtj = dwt
 
-                         if(dwt*dwtp1 > zero) then
-                            if(abs(dwt) < abs(dwtp1)) then
-                               dwtj = dwtj + half*dwt
-                            else
-                               dwtj = dwtj + half*dwtp1
-                            endif
-                         endif
+                                    if (dwt * dwtp1 > zero) then
+                                        if (abs(dwt) < abs(dwtp1)) then
+                                            dwtj = dwtj + half * dwt
+                                        else
+                                            dwtj = dwtj + half * dwtp1
+                                        end if
+                                    end if
 
-                         if(dwt*dwtm1 > zero) then
-                            if(abs(dwt) < abs(dwtm1)) then
-                               dwtj = dwtj - half*dwt
-                            else
-                               dwtj = dwtj - half*dwtm1
-                            endif
-                         endif
+                                    if (dwt * dwtm1 > zero) then
+                                        if (abs(dwt) < abs(dwtm1)) then
+                                            dwtj = dwtj - half * dwt
+                                        else
+                                            dwtj = dwtj - half * dwtm1
+                                        end if
+                                    end if
 
-                      else
+                                else
 
-                         ! 1st order upwind scheme.
+                                    ! 1st order upwind scheme.
 
-                         dwtj = w(i,j,k,jj) - w(i,j-1,k,jj)
+                                    dwtj = w(i, j, k, jj) - w(i, j - 1, k, jj)
 
-                      endif
+                                end if
 
-                      ! Update the residual. The convective term must be
-                      ! substracted, because it appears on the other side of
-                      ! the equation as the source and viscous terms.
+                                ! Update the residual. The convective term must be
+                                ! substracted, because it appears on the other side of
+                                ! the equation as the source and viscous terms.
 
-                      scratch(i,j,k,idvt+ii-1) = scratch(i,j,k,idvt+ii-1) - uu*dwtj
+                                scratch(i, j, k, idvt + ii - 1) = scratch(i, j, k, idvt + ii - 1) - uu * dwtj
 
-                      ! Update the central jacobian. First the term which is
-                      ! always present, i.e. uu.
+                                ! Update the central jacobian. First the term which is
+                                ! always present, i.e. uu.
 #ifndef USE_TAPENADE
-                      qq(i,j,k,ii,ii) = qq(i,j,k,ii,ii) + uu
-
-                      ! For boundary cells j == 2, the implicit treatment must
-                      ! be taken into account. Note that the implicit part
-                      ! is only based on the 1st order discretization.
-                      ! To improve stability the diagonal term is only taken
-                      ! into account when it improves stability, i.e. when
-                      ! it is positive.
-
-                      if(j == 2) then
-                         do kk=1,mAdv
-                            impl(kk) = bmtj1(i,k,jj,kk+offset)
-                         enddo
-
-                         impl(ii) = max(impl(ii),zero)
-
-                         do kk=1,mAdv
-                            qq(i,j,k,ii,kk) = qq(i,j,k,ii,kk) + uu*impl(kk)
-                         enddo
-                      endif
+                                qq(i, j, k, ii, ii) = qq(i, j, k, ii, ii) + uu
+
+                                ! For boundary cells j == 2, the implicit treatment must
+                                ! be taken into account. Note that the implicit part
+                                ! is only based on the 1st order discretization.
+                                ! To improve stability the diagonal term is only taken
+                                ! into account when it improves stability, i.e. when
+                                ! it is positive.
+
+                                if (j == 2) then
+                                    do kk = 1, mAdv
+                                        impl(kk) = bmtj1(i, k, jj, kk + offset)
+                                    end do
+
+                                    impl(ii) = max(impl(ii), zero)
+
+                                    do kk = 1, mAdv
+                                        qq(i, j, k, ii, kk) = qq(i, j, k, ii, kk) + uu * impl(kk)
+                                    end do
+                                end if
 #endif
-                   enddo
+                            end do
 
-                else velJdir
+                            else velJdir
 
-                   ! Velocity has a component in negative j-direction.
-                   ! Loop over the number of advection equations.
-                   !$AD II-LOOP
-                   do ii=1,nAdv
+                            ! Velocity has a component in negative j-direction.
+                            ! Loop over the number of advection equations.
+                            !$AD II-LOOP
+                            do ii = 1, nAdv
 
-                      ! Set the value of jj such that it corresponds to the
-                      ! turbulent entry in w.
+                                ! Set the value of jj such that it corresponds to the
+                                ! turbulent entry in w.
 
-                      jj = ii + offset
+                                jj = ii + offset
 
-                      ! Check whether a first or a second order discretization
-                      ! must be used.
+                                ! Check whether a first or a second order discretization
+                                ! must be used.
 
-                      if( secondOrd ) then
+                                if (secondOrd) then
 
-                         ! Store the three differences for the discretization of
-                         ! the derivative in j-direction.
+                                    ! Store the three differences for the discretization of
+                                    ! the derivative in j-direction.
 
-                         dwtm1 = w(i,j,  k,jj) - w(i,j-1,k,jj)
-                         dwt   = w(i,j+1,k,jj) - w(i,j,  k,jj)
-                         dwtp1 = w(i,j+2,k,jj) - w(i,j+1,k,jj)
+                                    dwtm1 = w(i, j, k, jj) - w(i, j - 1, k, jj)
+                                    dwt = w(i, j + 1, k, jj) - w(i, j, k, jj)
+                                    dwtp1 = w(i, j + 2, k, jj) - w(i, j + 1, k, jj)
 
-                         ! Construct the derivative in this cell center. This is
-                         ! the first order upwind derivative with two nonlinear
-                         ! corrections.
+                                    ! Construct the derivative in this cell center. This is
+                                    ! the first order upwind derivative with two nonlinear
+                                    ! corrections.
 
-                         dwtj = dwt
+                                    dwtj = dwt
 
-                         if(dwt*dwtp1 > zero) then
-                            if(abs(dwt) < abs(dwtp1)) then
-                               dwtj = dwtj - half*dwt
-                            else
-                               dwtj = dwtj - half*dwtp1
-                            endif
-                         endif
+                                    if (dwt * dwtp1 > zero) then
+                                        if (abs(dwt) < abs(dwtp1)) then
+                                            dwtj = dwtj - half * dwt
+                                        else
+                                            dwtj = dwtj - half * dwtp1
+                                        end if
+                                    end if
 
-                         if(dwt*dwtm1 > zero) then
-                            if(abs(dwt) < abs(dwtm1)) then
-                               dwtj = dwtj + half*dwt
-                            else
-                               dwtj = dwtj + half*dwtm1
-                            endif
-                         endif
+                                    if (dwt * dwtm1 > zero) then
+                                        if (abs(dwt) < abs(dwtm1)) then
+                                            dwtj = dwtj + half * dwt
+                                        else
+                                            dwtj = dwtj + half * dwtm1
+                                        end if
+                                    end if
 
-                      else
+                                else
 
-                         ! 1st order upwind scheme.
+                                    ! 1st order upwind scheme.
 
-                         dwtj = w(i,j+1,k,jj) - w(i,j,k,jj)
+                                    dwtj = w(i, j + 1, k, jj) - w(i, j, k, jj)
 
-                      endif
+                                end if
 
-                      ! Update the residual. The convective term must be
-                      ! substracted, because it appears on the other side
-                      ! of the equation as the source and viscous terms.
+                                ! Update the residual. The convective term must be
+                                ! substracted, because it appears on the other side
+                                ! of the equation as the source and viscous terms.
 
-                      scratch(i,j,k,idvt+ii-1) = scratch(i,j,k,idvt+ii-1) - uu*dwtj
+                                scratch(i, j, k, idvt + ii - 1) = scratch(i, j, k, idvt + ii - 1) - uu * dwtj
 
-                      ! Update the central jacobian. First the term which is
-                      ! always present, i.e. -uu.
+                                ! Update the central jacobian. First the term which is
+                                ! always present, i.e. -uu.
 #ifndef USE_TAPENADE
-                      qq(i,j,k,ii,ii) = qq(i,j,k,ii,ii) - uu
-
-                      ! For boundary cells j == jl, the implicit treatment must
-                      ! be taken into account. Note that the implicit part
-                      ! is only based on the 1st order discretization.
-                      ! To improve stability the diagonal term is only taken
-                      ! into account when it improves stability, i.e. when
-                      ! it is positive.
-
-                      if(j == jl) then
-                         do kk=1,mAdv
-                            impl(kk) = bmtj2(i,k,jj,kk+offset)
-                         enddo
-
-                         impl(ii) = max(impl(ii),zero)
-
-                         do kk=1,mAdv
-                            qq(i,j,k,ii,kk) = qq(i,j,k,ii,kk) - uu*impl(kk)
-                         enddo
-                      endif
+                                qq(i, j, k, ii, ii) = qq(i, j, k, ii, ii) - uu
+
+                                ! For boundary cells j == jl, the implicit treatment must
+                                ! be taken into account. Note that the implicit part
+                                ! is only based on the 1st order discretization.
+                                ! To improve stability the diagonal term is only taken
+                                ! into account when it improves stability, i.e. when
+                                ! it is positive.
+
+                                if (j == jl) then
+                                    do kk = 1, mAdv
+                                        impl(kk) = bmtj2(i, k, jj, kk + offset)
+                                    end do
+
+                                    impl(ii) = max(impl(ii), zero)
+
+                                    do kk = 1, mAdv
+                                        qq(i, j, k, ii, kk) = qq(i, j, k, ii, kk) - uu * impl(kk)
+                                    end do
+                                end if
 #endif
-                   enddo
+                            end do
 
-                endif velJdir
+                        end if velJdir
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-    continue
-    !$AD CHECKPOINT-END
-    !
-    !       Upwind discretization of the convective term in i (xi)
-    !       direction. Either the 1st order upwind or the second order
-    !       fully upwind interpolation scheme, kappa = -1, is used in
-    !       combination with the minmod limiter.
-    !       The possible grid velocity must be taken into account.
-    !
-    continue
-    !$AD CHECKPOINT-START
-    qs = zero
+        continue
+        !$AD CHECKPOINT-END
+        !
+        !       Upwind discretization of the convective term in i (xi)
+        !       direction. Either the 1st order upwind or the second order
+        !       fully upwind interpolation scheme, kappa = -1, is used in
+        !       combination with the minmod limiter.
+        !       The possible grid velocity must be taken into account.
+        !
+        continue
+        !$AD CHECKPOINT-START
+        qs = zero
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do iii=0,nx*ny*nz-1
-       i = mod(iii, nx) + 2
-       j = mod(iii/nx, ny) + 2
-       k = iii/(nx*ny) + 2
+        !$AD II-LOOP
+        do iii = 0, nx * ny * nz - 1
+            i = mod(iii, nx) + 2
+            j = mod(iii / nx, ny) + 2
+            k = iii / (nx * ny) + 2
 #else
-       do k=2, kl
-          do j=2, jl
-             do i=2, il
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
 #endif
 
+                        ! Compute the grid velocity if present.
+                        ! It is taken as the average of i and i-1,
 
-                ! Compute the grid velocity if present.
-                ! It is taken as the average of i and i-1,
+                        voli = half / vol(i, j, k)
+                        if (addGridVelocities) &
+                            qs = (sFaceI(i, j, k) + sFaceI(i - 1, j, k)) * voli
 
-                voli = half/vol(i,j,k)
-                if( addGridVelocities ) &
-                     qs = (sFaceI(i,j,k) + sFaceI(i-1,j,k))*voli
+                        ! Compute the normal velocity, where the normal direction
+                        ! is taken as the average of faces i and i-1.
 
-                ! Compute the normal velocity, where the normal direction
-                ! is taken as the average of faces i and i-1.
+                        xa = (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                        ya = (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                        za = (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
 
-                xa = (si(i,j,k,1) + si(i-1,j,k,1))*voli
-                ya = (si(i,j,k,2) + si(i-1,j,k,2))*voli
-                za = (si(i,j,k,3) + si(i-1,j,k,3))*voli
+                        uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
 
-                uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
+                        ! Determine the situation we are having here, i.e. positive
+                        ! or negative normal velocity.
 
-                ! Determine the situation we are having here, i.e. positive
-                ! or negative normal velocity.
+                        velIdir: if (uu > zero) then
 
-                velIdir: if(uu > zero) then
+                            ! Velocity has a component in positive i-direction.
+                            ! Loop over the number of advection equations.
+                            !$AD II-LOOP
+                            do ii = 1, nAdv
 
-                   ! Velocity has a component in positive i-direction.
-                   ! Loop over the number of advection equations.
-                   !$AD II-LOOP
-                   do ii=1,nAdv
+                                ! Set the value of jj such that it corresponds to the
+                                ! turbulent entry in w.
 
-                      ! Set the value of jj such that it corresponds to the
-                      ! turbulent entry in w.
+                                jj = ii + offset
 
-                      jj = ii + offset
+                                ! Check whether a first or a second order discretization
+                                ! must be used.
 
-                      ! Check whether a first or a second order discretization
-                      ! must be used.
+                                if (secondOrd) then
 
-                      if( secondOrd ) then
+                                    ! Second order; store the three differences for the
+                                    ! discretization of the derivative in i-direction.
 
-                         ! Second order; store the three differences for the
-                         ! discretization of the derivative in i-direction.
+                                    dwtm1 = w(i - 1, j, k, jj) - w(i - 2, j, k, jj)
+                                    dwt = w(i, j, k, jj) - w(i - 1, j, k, jj)
+                                    dwtp1 = w(i + 1, j, k, jj) - w(i, j, k, jj)
 
-                         dwtm1 = w(i-1,j,k,jj) - w(i-2,j,k,jj)
-                         dwt   = w(i,  j,k,jj) - w(i-1,j,k,jj)
-                         dwtp1 = w(i+1,j,k,jj) - w(i,  j,k,jj)
+                                    ! Construct the derivative in this cell center. This is
+                                    ! the first order upwind derivative with two nonlinear
+                                    ! corrections.
 
-                         ! Construct the derivative in this cell center. This is
-                         ! the first order upwind derivative with two nonlinear
-                         ! corrections.
+                                    dwti = dwt
 
-                         dwti = dwt
+                                    if (dwt * dwtp1 > zero) then
+                                        if (abs(dwt) < abs(dwtp1)) then
+                                            dwti = dwti + half * dwt
+                                        else
+                                            dwti = dwti + half * dwtp1
+                                        end if
+                                    end if
 
-                         if(dwt*dwtp1 > zero) then
-                            if(abs(dwt) < abs(dwtp1)) then
-                               dwti = dwti + half*dwt
-                            else
-                               dwti = dwti + half*dwtp1
-                            endif
-                         endif
+                                    if (dwt * dwtm1 > zero) then
+                                        if (abs(dwt) < abs(dwtm1)) then
+                                            dwti = dwti - half * dwt
+                                        else
+                                            dwti = dwti - half * dwtm1
+                                        end if
+                                    end if
 
-                         if(dwt*dwtm1 > zero) then
-                            if(abs(dwt) < abs(dwtm1)) then
-                               dwti = dwti - half*dwt
-                            else
-                               dwti = dwti - half*dwtm1
-                            endif
-                         endif
+                                else
 
-                      else
+                                    ! 1st order upwind scheme.
 
-                         ! 1st order upwind scheme.
+                                    dwti = w(i, j, k, jj) - w(i - 1, j, k, jj)
 
-                         dwti = w(i,j,k,jj) - w(i-1,j,k,jj)
+                                end if
 
-                      endif
+                                ! Update the residual. The convective term must be
+                                ! substracted, because it appears on the other side of
+                                ! the equation as the source and viscous terms.
 
-                      ! Update the residual. The convective term must be
-                      ! substracted, because it appears on the other side of
-                      ! the equation as the source and viscous terms.
+                                scratch(i, j, k, idvt + ii - 1) = scratch(i, j, k, idvt + ii - 1) - uu * dwti
 
-                      scratch(i,j,k,idvt+ii-1) = scratch(i,j,k,idvt+ii-1) - uu*dwti
-
-                      ! Update the central jacobian. First the term which is
-                      ! always present, i.e. uu.
+                                ! Update the central jacobian. First the term which is
+                                ! always present, i.e. uu.
 #ifndef USE_TAPENADE
-                      qq(i,j,k,ii,ii) = qq(i,j,k,ii,ii) + uu
-
-                      ! For boundary cells i == 2, the implicit treatment must
-                      ! be taken into account. Note that the implicit part
-                      ! is only based on the 1st order discretization.
-                      ! To improve stability the diagonal term is only taken
-                      ! into account when it improves stability, i.e. when
-                      ! it is positive.
-
-                      if(i == 2) then
-                         do kk=1,mAdv
-                            impl(kk) = bmti1(j,k,jj,kk+offset)
-                         enddo
-
-                         impl(ii) = max(impl(ii),zero)
-
-                         do kk=1,mAdv
-                            qq(i,j,k,ii,kk) = qq(i,j,k,ii,kk) + uu*impl(kk)
-                         enddo
-                      endif
+                                qq(i, j, k, ii, ii) = qq(i, j, k, ii, ii) + uu
+
+                                ! For boundary cells i == 2, the implicit treatment must
+                                ! be taken into account. Note that the implicit part
+                                ! is only based on the 1st order discretization.
+                                ! To improve stability the diagonal term is only taken
+                                ! into account when it improves stability, i.e. when
+                                ! it is positive.
+
+                                if (i == 2) then
+                                    do kk = 1, mAdv
+                                        impl(kk) = bmti1(j, k, jj, kk + offset)
+                                    end do
+
+                                    impl(ii) = max(impl(ii), zero)
+
+                                    do kk = 1, mAdv
+                                        qq(i, j, k, ii, kk) = qq(i, j, k, ii, kk) + uu * impl(kk)
+                                    end do
+                                end if
 #endif
-                   enddo
+                            end do
 
-                else velIdir
+                            else velIdir
 
-                   ! Velocity has a component in negative i-direction.
-                   ! Loop over the number of advection equations.
-                   !$AD II-LOOP
-                   do ii=1,nAdv
+                            ! Velocity has a component in negative i-direction.
+                            ! Loop over the number of advection equations.
+                            !$AD II-LOOP
+                            do ii = 1, nAdv
 
-                      ! Set the value of jj such that it corresponds to the
-                      ! turbulent entry in w.
+                                ! Set the value of jj such that it corresponds to the
+                                ! turbulent entry in w.
 
-                      jj = ii + offset
+                                jj = ii + offset
 
-                      ! Check whether a first or a second order discretization
-                      ! must be used.
+                                ! Check whether a first or a second order discretization
+                                ! must be used.
 
-                      if( secondOrd ) then
+                                if (secondOrd) then
 
-                         ! Second order; store the three differences for the
-                         ! discretization of the derivative in i-direction.
+                                    ! Second order; store the three differences for the
+                                    ! discretization of the derivative in i-direction.
 
-                         dwtm1 = w(i,  j,k,jj) - w(i-1,j,k,jj)
-                         dwt   = w(i+1,j,k,jj) - w(i,  j,k,jj)
-                         dwtp1 = w(i+2,j,k,jj) - w(i+1,j,k,jj)
+                                    dwtm1 = w(i, j, k, jj) - w(i - 1, j, k, jj)
+                                    dwt = w(i + 1, j, k, jj) - w(i, j, k, jj)
+                                    dwtp1 = w(i + 2, j, k, jj) - w(i + 1, j, k, jj)
 
-                         ! Construct the derivative in this cell center. This is
-                         ! the first order upwind derivative with two nonlinear
-                         ! corrections.
+                                    ! Construct the derivative in this cell center. This is
+                                    ! the first order upwind derivative with two nonlinear
+                                    ! corrections.
 
-                         dwti = dwt
+                                    dwti = dwt
 
-                         if(dwt*dwtp1 > zero) then
-                            if(abs(dwt) < abs(dwtp1)) then
-                               dwti = dwti - half*dwt
-                            else
-                               dwti = dwti - half*dwtp1
-                            endif
-                         endif
+                                    if (dwt * dwtp1 > zero) then
+                                        if (abs(dwt) < abs(dwtp1)) then
+                                            dwti = dwti - half * dwt
+                                        else
+                                            dwti = dwti - half * dwtp1
+                                        end if
+                                    end if
 
-                         if(dwt*dwtm1 > zero) then
-                            if(abs(dwt) < abs(dwtm1)) then
-                               dwti = dwti + half*dwt
-                            else
-                               dwti = dwti + half*dwtm1
-                            endif
-                         endif
+                                    if (dwt * dwtm1 > zero) then
+                                        if (abs(dwt) < abs(dwtm1)) then
+                                            dwti = dwti + half * dwt
+                                        else
+                                            dwti = dwti + half * dwtm1
+                                        end if
+                                    end if
 
-                      else
+                                else
 
-                         ! 1st order upwind scheme.
+                                    ! 1st order upwind scheme.
 
-                         dwti = w(i+1,j,k,jj) - w(i,j,k,jj)
+                                    dwti = w(i + 1, j, k, jj) - w(i, j, k, jj)
 
-                      endif
+                                end if
 
-                      ! Update the residual. The convective term must be
-                      ! substracted, because it appears on the other side
-                      ! of the equation as the source and viscous terms.
+                                ! Update the residual. The convective term must be
+                                ! substracted, because it appears on the other side
+                                ! of the equation as the source and viscous terms.
 
-                      scratch(i,j,k,idvt+ii-1) = scratch(i,j,k,idvt+ii-1) - uu*dwti
+                                scratch(i, j, k, idvt + ii - 1) = scratch(i, j, k, idvt + ii - 1) - uu * dwti
 
-                      ! Update the central jacobian. First the term which is
-                      ! always present, i.e. -uu.
+                                ! Update the central jacobian. First the term which is
+                                ! always present, i.e. -uu.
 #ifndef USE_TAPENADE
-                      qq(i,j,k,ii,ii) = qq(i,j,k,ii,ii) - uu
-
-                      ! For boundary cells i == il, the implicit treatment must
-                      ! be taken into account. Note that the implicit part
-                      ! is only based on the 1st order discretization.
-                      ! To improve stability the diagonal term is only taken
-                      ! into account when it improves stability, i.e. when
-                      ! it is positive.
-
-                      if(i == il) then
-                         do kk=1,mAdv
-                            impl(kk) = bmti2(j,k,jj,kk+offset)
-                         enddo
-
-                         impl(ii) = max(impl(ii),zero)
-
-                         do kk=1,mAdv
-                            qq(i,j,k,ii,kk) = qq(i,j,k,ii,kk) - uu*impl(kk)
-                         enddo
-                      endif
+                                qq(i, j, k, ii, ii) = qq(i, j, k, ii, ii) - uu
+
+                                ! For boundary cells i == il, the implicit treatment must
+                                ! be taken into account. Note that the implicit part
+                                ! is only based on the 1st order discretization.
+                                ! To improve stability the diagonal term is only taken
+                                ! into account when it improves stability, i.e. when
+                                ! it is positive.
+
+                                if (i == il) then
+                                    do kk = 1, mAdv
+                                        impl(kk) = bmti2(j, k, jj, kk + offset)
+                                    end do
+
+                                    impl(ii) = max(impl(ii), zero)
+
+                                    do kk = 1, mAdv
+                                        qq(i, j, k, ii, kk) = qq(i, j, k, ii, kk) - uu * impl(kk)
+                                    end do
+                                end if
 #endif
-                   enddo
+                            end do
 
-                endif velIdir
+                        end if velIdir
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
 
-    !$AD CHECKPOINT-END
-    continue
-  end subroutine turbAdvection
-
+        !$AD CHECKPOINT-END
+        continue
+    end subroutine turbAdvection
 
-  ! ----------------------------------------------------------------------
-  !                                                                      |
-  !                    No Tapenade Routine below this line               |
-  !                                                                      |
-  ! ----------------------------------------------------------------------
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 
 #ifndef USE_TAPENADE
 
-  subroutine tdia3(nb, ne, l, c, u, r)
-    !
-    !       tdia3 solves the tridiagonal linear system (l+c+u) v = r,
-    !       where l is the lower, c the central and u the upper diagonal.
-    !       Every entry in the matrix is a 2x2 block matrix, i.e.
-    !                    x x  x 0  0 0  ........        = c(nb) u(nb)
-    !                    x x  0 x  0 0  ........
-    !                    x 0  x x  x 0  ........        = l(i) c(i) u(i)
-    !                    0 x  x x  0 x  ........
-    !                         ........  x 0  x x        = l(ne) c(ne)
-    !                         ........  0 x  x x
-    !               With c = x x     u,l = x 0
-    !                        x x           0 x
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: nb, ne
-
-    real(kind=realType), dimension(2,nb:ne), intent(inout) :: l, u, r
-    real(kind=realType), dimension(2,2,nb:ne), intent(inout) :: c
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: n
-    real(kind=realType)   :: deti, f11, f12, f21, f22, r1
-
-    ! Perform the backward sweep to eliMinate the upper diagonal uu.
-    ! f     = u(n)*c^-1(n+1),
-    ! c'(n) = c(n) - f*l(n+1)
-    ! r'(n) = r(n) - f*r(n+1)
-
-    do n=ne-1,nb,-1
-
-       deti =  one/(c(1,1,n+1)*c(2,2,n+1) - c(1,2,n+1)*c(2,1,n+1))
-       f11  =  u(1,n)*c(2,2,n+1)*deti
-       f12  = -u(1,n)*c(1,2,n+1)*deti
-       f21  = -u(2,n)*c(2,1,n+1)*deti
-       f22  =  u(2,n)*c(1,1,n+1)*deti
-
-       c(1,1,n) = c(1,1,n) - f11*l(1,n+1)
-       c(1,2,n) = c(1,2,n) - f12*l(2,n+1)
-       c(2,1,n) = c(2,1,n) - f21*l(1,n+1)
-       c(2,2,n) = c(2,2,n) - f22*l(2,n+1)
-
-       r(1,n)  = r(1,n) - f11*r(1,n+1) - f12*r(2,n+1)
-       r(2,n)  = r(2,n) - f21*r(1,n+1) - f22*r(2,n+1)
-
-    enddo
-
-    ! The matrix is now in low block bi-diagonal form and can thus
-    ! be solved be a forward sweep. The solution is stored in r.
-
-    deti    =  one/(c(1,1,nb)*c(2,2,nb) - c(1,2,nb)*c(2,1,nb))
-    r1      =  r(1,nb)
-    r(1,nb) =  deti*(c(2,2,nb)*r1 - c(1,2,nb)*r(2,nb))
-    r(2,nb) = -deti*(c(2,1,nb)*r1 - c(1,1,nb)*r(2,nb))
-
-    do n=nb+1,ne
-
-       r(1,n) = r(1,n) - l(1,n)*r(1,n-1)
-       r(2,n) = r(2,n) - l(2,n)*r(2,n-1)
-
-       deti   =  one/(c(1,1,n)*c(2,2,n) - c(1,2,n)*c(2,1,n))
-       r1     =  r(1,n)
-       r(1,n) =  deti*(c(2,2,n)*r1 - c(1,2,n)*r(2,n))
-       r(2,n) = -deti*(c(2,1,n)*r1 - c(1,1,n)*r(2,n))
-
-    enddo
-
-  end subroutine tdia3
-
-  subroutine vfEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
-    !
-    !       vfEddyViscosity computes the eddy-viscosity according to the
-    !       v2f turbulence model for the block given in blockPointers.
-    !       This routine is for both the n=1 and n=6 version.
-    !
-    use constants
-    use blockPointers
-    use constants
-    use paramTurb
-    use turbMod
-    use inputPhysics
-    use turbCurveFits, only : curvetupyp
-    implicit none
-    ! Input variables
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, nn, ii, iSize, jSize, kSize
-    real(kind=realType)   :: tke, tep, tkea, tepa, tepl, tv2, tv2a
-    real(kind=realType)   :: yp, utau
-
-    real(kind=realType), dimension(itu1:itu5) :: tup
-    real(kind=realType), dimension(:,:,:), pointer :: ww
-    real(kind=realType), dimension(:,:),   pointer :: rrlv, rrev
-    real(kind=realType), dimension(:,:),   pointer :: dd2Wall
-
-
-    ! Compute time and length scale
-
-    call vfScale
-
-    ! Loop over the cells of this block and compute the eddy viscosity.
-    ! Do not include halo's.
+    subroutine tdia3(nb, ne, l, c, u, r)
+        !
+        !       tdia3 solves the tridiagonal linear system (l+c+u) v = r,
+        !       where l is the lower, c the central and u the upper diagonal.
+        !       Every entry in the matrix is a 2x2 block matrix, i.e.
+        !                    x x  x 0  0 0  ........        = c(nb) u(nb)
+        !                    x x  0 x  0 0  ........
+        !                    x 0  x x  x 0  ........        = l(i) c(i) u(i)
+        !                    0 x  x x  0 x  ........
+        !                         ........  x 0  x x        = l(ne) c(ne)
+        !                         ........  0 x  x x
+        !               With c = x x     u,l = x 0
+        !                        x x           0 x
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: nb, ne
+
+        real(kind=realType), dimension(2, nb:ne), intent(inout) :: l, u, r
+        real(kind=realType), dimension(2, 2, nb:ne), intent(inout) :: c
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: n
+        real(kind=realType) :: deti, f11, f12, f21, f22, r1
+
+        ! Perform the backward sweep to eliMinate the upper diagonal uu.
+        ! f     = u(n)*c^-1(n+1),
+        ! c'(n) = c(n) - f*l(n+1)
+        ! r'(n) = r(n) - f*r(n+1)
+
+        do n = ne - 1, nb, -1
+
+            deti = one / (c(1, 1, n + 1) * c(2, 2, n + 1) - c(1, 2, n + 1) * c(2, 1, n + 1))
+            f11 = u(1, n) * c(2, 2, n + 1) * deti
+            f12 = -u(1, n) * c(1, 2, n + 1) * deti
+            f21 = -u(2, n) * c(2, 1, n + 1) * deti
+            f22 = u(2, n) * c(1, 1, n + 1) * deti
+
+            c(1, 1, n) = c(1, 1, n) - f11 * l(1, n + 1)
+            c(1, 2, n) = c(1, 2, n) - f12 * l(2, n + 1)
+            c(2, 1, n) = c(2, 1, n) - f21 * l(1, n + 1)
+            c(2, 2, n) = c(2, 2, n) - f22 * l(2, n + 1)
+
+            r(1, n) = r(1, n) - f11 * r(1, n + 1) - f12 * r(2, n + 1)
+            r(2, n) = r(2, n) - f21 * r(1, n + 1) - f22 * r(2, n + 1)
+
+        end do
+
+        ! The matrix is now in low block bi-diagonal form and can thus
+        ! be solved be a forward sweep. The solution is stored in r.
+
+        deti = one / (c(1, 1, nb) * c(2, 2, nb) - c(1, 2, nb) * c(2, 1, nb))
+        r1 = r(1, nb)
+        r(1, nb) = deti * (c(2, 2, nb) * r1 - c(1, 2, nb) * r(2, nb))
+        r(2, nb) = -deti * (c(2, 1, nb) * r1 - c(1, 1, nb) * r(2, nb))
+
+        do n = nb + 1, ne
+
+            r(1, n) = r(1, n) - l(1, n) * r(1, n - 1)
+            r(2, n) = r(2, n) - l(2, n) * r(2, n - 1)
+
+            deti = one / (c(1, 1, n) * c(2, 2, n) - c(1, 2, n) * c(2, 1, n))
+            r1 = r(1, n)
+            r(1, n) = deti * (c(2, 2, n) * r1 - c(1, 2, n) * r(2, n))
+            r(2, n) = -deti * (c(2, 1, n) * r1 - c(1, 1, n) * r(2, n))
+
+        end do
+
+    end subroutine tdia3
+
+    subroutine vfEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        !
+        !       vfEddyViscosity computes the eddy-viscosity according to the
+        !       v2f turbulence model for the block given in blockPointers.
+        !       This routine is for both the n=1 and n=6 version.
+        !
+        use constants
+        use blockPointers
+        use constants
+        use paramTurb
+        use turbMod
+        use inputPhysics
+        use turbCurveFits, only: curvetupyp
+        implicit none
+        ! Input variables
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn, ii, iSize, jSize, kSize
+        real(kind=realType) :: tke, tep, tkea, tepa, tepl, tv2, tv2a
+        real(kind=realType) :: yp, utau
+
+        real(kind=realType), dimension(itu1:itu5) :: tup
+        real(kind=realType), dimension(:, :, :), pointer :: ww
+        real(kind=realType), dimension(:, :), pointer :: rrlv, rrev
+        real(kind=realType), dimension(:, :), pointer :: dd2Wall
+
+        ! Compute time and length scale
+
+        call vfScale
+
+        ! Loop over the cells of this block and compute the eddy viscosity.
+        ! Do not include halo's.
 #ifdef TAPENADE_REVERSE
-    iSize = (iEnd-iBeg)+1
-    jSize = (jEnd-jBeg)+1
-    kSize = (kEnd-kBeg)+1
-
-    !$AD II-LOOP
-    do ii=0, iSize*jSize*kSize-1
-       i = mod(ii, iSize) + iBeg
-       j = mod(ii/iSize, jSize) + jBeg
-       k = ii/((iSize*jSize)) + kBeg
+        iSize = (iEnd - iBeg) + 1
+        jSize = (jEnd - jBeg) + 1
+        kSize = (kEnd - kBeg) + 1
+
+        !$AD II-LOOP
+        do ii = 0, iSize * jSize * kSize - 1
+            i = mod(ii, iSize) + iBeg
+            j = mod(ii / iSize, jSize) + jBeg
+            k = ii / ((iSize * jSize)) + kBeg
 #else
-       do k=kBeg, kEnd
-          do j=jBeg, jEnd
-             do i=iBeg, iEnd
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
 #endif
 
-                tke   = w(i,j,k,itu1)
-                tep   = w(i,j,k,itu2)
-                tv2   = w(i,j,k,itu3)
-                tkea  = abs(tke)
-                tepa  = abs(tep)
-                tv2a  = abs(tv2)
-                tepl  = max(tepa,rvfLimitE)
+                        tke = w(i, j, k, itu1)
+                        tep = w(i, j, k, itu2)
+                        tv2 = w(i, j, k, itu3)
+                        tkea = abs(tke)
+                        tepa = abs(tep)
+                        tv2a = abs(tv2)
+                        tepl = max(tepa, rvfLimitE)
 
-                rev(i,j,k) = rvfCmu*w(i,j,k,irho)*tv2a/tepl*sct(i,j,k)
+                        rev(i, j, k) = rvfCmu * w(i, j, k, irho) * tv2a / tepl * sct(i, j, k)
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
 
-    ! Modify the rhs of the 1st internal cell, if wall functions
-    ! are used; their value is determined by the table.
-
-    testWallFunctions: if( wallFunctions ) then
-
-       bocos: do nn=1,nViscBocos
-
-          ! Determine the block face on which the subface is located
-          ! and set some variables. As flag points to the entire array
-          ! flagI2, etc., its starting indices are the starting indices
-          ! of its target and not 1.
-
-          select case (bcFaceid(nn))
-          case (iMin)
-             ww      => w(2,1:,1:,1:);   rrlv => rlv(2,1:,1:)
-             dd2Wall => d2Wall(2,:,:);   rrev => rev(2,1:,1:)
-
-          case (iMax)
-             ww      => w(il,1:,1:,1:);   rrlv => rlv(il,1:,1:)
-             dd2Wall => d2Wall(il,:,:);   rrev => rev(il,1:,1:)
-
-          case (jMin)
-             ww      => w(1:,2,1:,1:);   rrlv => rlv(1:,2,1:)
-             dd2Wall => d2Wall(:,2,:);   rrev => rev(1:,2,1:)
-
-          case (jMax)
-             ww      => w(1:,jl,1:,1:);   rrlv => rlv(1:,jl,1:)
-             dd2Wall => d2Wall(:,jl,:);   rrev => rev(1:,jl,1:)
-
-          case (kMin)
-             ww      => w(1:,1:,2,1:);   rrlv => rlv(1:,1:,2)
-             dd2Wall => d2Wall(:,:,2);   rrev => rev(1:,1:,2)
-
-          case (kMax)
-             ww      => w(1:,1:,kl,1:);   rrlv => rlv(1:,1:,kl)
-             dd2Wall => d2Wall(:,:,kl);   rrev => rev(1:,1:,kl)
-
-          end select
-
-          ! Loop over the owned faces of this subface. Therefore the
-          ! nodal range of bcData must be used. The offset of +1 is
-          ! present, because the starting index of the cell range is
-          ! 1 larger than the starting index of the nodal range.
-
-          do j=(BCData(nn)%jnBeg+1),BCData(nn)%jnEnd
-             do i=(BCData(nn)%inBeg+1),BCData(nn)%inEnd
-
-                ! Enforce k and epsilon in the 1st internal cell from
-                ! the wall function table. There is an offset of -1 in
-                ! the wall distance. Note that the offset compared to
-                ! the current value must be stored. Also note that the
-                ! curve fits contain the non-dimensional values.
-
-                utau = viscSubface(nn)%utau(i,j)
-                yp = ww(i,j,irho)*dd2Wall(i-1,j-1)*utau/rrlv(i,j)
-
-                call curveTupYp(tup(itu5:itu5), yp, itu5, itu5)
-                rrev(i,j) = tup(itu5)*rrlv(i,j)
-             enddo
-          enddo
-
-       enddo bocos
-    endif testWallFunctions
-
-  end subroutine vfEddyViscosity
-  subroutine vfScale
-    !
-    !       time and length scale definition for v2f turbulence model. The
-    !       upper bound can be switched on by setting rvfB to .true.
-    !       The strain squared is defined as: strain2 = 2 sij sij
-    !
-    use constants
-    use blockPointers
-    use inputPhysics
-    use paramTurb
-    use turbMod
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-
-    real(kind=realType) :: sqrt3
-    real(kind=realType) :: tkea, tepa, tv2a, supi, rn2
-    real(kind=realType) :: rsct, rscl2, rnu, rstrain
-
-    ! Some constants in the model.
-
-    sqrt3 = sqrt(three)
-
-    ! Set the pointer for dvt in dw, such that the code is more
-    ! readable. Also set the pointers for the production term,
-    ! vorticity, strain and the time and lenght scale of v2f.
-
-    dvt     => scratch(1:,1:,1:,idvt:)
-    prod    => scratch(1:,1:,1:,iprod)
-    sct     => scratch(1:,1:,1:,isct)
-    scl2    => scratch(1:,1:,1:,iscl2)
-    vort    => prod
-    strain2 => prod
-    !
-    !       Production term.
-    !
-    select case (turbProd)
-    case (strain)
-       call prodSmag2
-
-    case (vorticity)
-       call prodWmag2
-
-    case (katoLaunder)
-       call prodKatoLaunder
-
-    end select
-    !
-    !       Compute the length and time scale for all internal cells.
-    !
-    if( rvfB ) then
-
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-
-                ! Compute the time and length scale with upper bound
-
-                rstrain     = sqrt(strain2(i,j,k))
-                rnu         = rlv(i,j,k)/w(i,j,k,irho)
-                tkea        = abs(w(i,j,k,itu1))
-                tepa        = abs(w(i,j,k,itu2))
-                tv2a        = abs(w(i,j,k,itu3))
-                supi        = tepa*tkea/max(sqrt3*tv2a*rvfCmu*rstrain,eps)
-                rn2         = rvfCn**2*(rnu*tepa)**1.5_realType
-
-                rsct        = max(tkea,six*sqrt(rnu*tepa))
-                sct(i,j,k)  = min(rsct,0.6_realType*supi)
-                rscl2       = tkea*min(tkea**2,supi**2)
-                scl2(i,j,k) = rvfCl**2*max(rscl2,rn2)
-
-             enddo
-          enddo
-       enddo
-
-    else
-
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-
-                ! Compute the time and length scale without upper bound
-
-                rnu         = rlv(i,j,k)/w(i,j,k,irho)
-                tkea        = abs(w(i,j,k,itu1))
-                tepa        = abs(w(i,j,k,itu2))
-                rn2         = rvfCn**2*(rnu*tepa)**1.5_realType
-
-                rsct        = max(tkea,six*sqrt(rnu*tepa))
-                sct(i,j,k)  = rsct
-                rscl2       = tkea**3
-                scl2(i,j,k) = rvfCl**2*max(rscl2,rn2)
-             enddo
-          enddo
-       enddo
-    endif
-
-  end subroutine vfScale
-
-  subroutine ktEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
-    !
-    !       ktEddyViscosity computes the eddy viscosity according to the
-    !       k-tau turbulence model for the block given in blockPointers.
-    !
-    use blockPointers
-    use constants
-    implicit none
-    ! Input variables
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii, iSize, jSize, kSize
-
-    ! Loop over the cells of this block and compute the eddy viscosity.
-    ! Do not include halo's.
+        ! Modify the rhs of the 1st internal cell, if wall functions
+        ! are used; their value is determined by the table.
+
+        testWallFunctions: if (wallFunctions) then
+
+            bocos: do nn = 1, nViscBocos
+
+                ! Determine the block face on which the subface is located
+                ! and set some variables. As flag points to the entire array
+                ! flagI2, etc., its starting indices are the starting indices
+                ! of its target and not 1.
+
+                select case (bcFaceid(nn))
+                case (iMin)
+                    ww => w(2, 1:, 1:, 1:); rrlv => rlv(2, 1:, 1:)
+                    dd2Wall => d2Wall(2, :, :); rrev => rev(2, 1:, 1:)
+
+                case (iMax)
+                    ww => w(il, 1:, 1:, 1:); rrlv => rlv(il, 1:, 1:)
+                    dd2Wall => d2Wall(il, :, :); rrev => rev(il, 1:, 1:)
+
+                case (jMin)
+                    ww => w(1:, 2, 1:, 1:); rrlv => rlv(1:, 2, 1:)
+                    dd2Wall => d2Wall(:, 2, :); rrev => rev(1:, 2, 1:)
+
+                case (jMax)
+                    ww => w(1:, jl, 1:, 1:); rrlv => rlv(1:, jl, 1:)
+                    dd2Wall => d2Wall(:, jl, :); rrev => rev(1:, jl, 1:)
+
+                case (kMin)
+                    ww => w(1:, 1:, 2, 1:); rrlv => rlv(1:, 1:, 2)
+                    dd2Wall => d2Wall(:, :, 2); rrev => rev(1:, 1:, 2)
+
+                case (kMax)
+                    ww => w(1:, 1:, kl, 1:); rrlv => rlv(1:, 1:, kl)
+                    dd2Wall => d2Wall(:, :, kl); rrev => rev(1:, 1:, kl)
+
+                end select
+
+                ! Loop over the owned faces of this subface. Therefore the
+                ! nodal range of bcData must be used. The offset of +1 is
+                ! present, because the starting index of the cell range is
+                ! 1 larger than the starting index of the nodal range.
+
+                do j = (BCData(nn)%jnBeg + 1), BCData(nn)%jnEnd
+                    do i = (BCData(nn)%inBeg + 1), BCData(nn)%inEnd
+
+                        ! Enforce k and epsilon in the 1st internal cell from
+                        ! the wall function table. There is an offset of -1 in
+                        ! the wall distance. Note that the offset compared to
+                        ! the current value must be stored. Also note that the
+                        ! curve fits contain the non-dimensional values.
+
+                        utau = viscSubface(nn)%utau(i, j)
+                        yp = ww(i, j, irho) * dd2Wall(i - 1, j - 1) * utau / rrlv(i, j)
+
+                        call curveTupYp(tup(itu5:itu5), yp, itu5, itu5)
+                        rrev(i, j) = tup(itu5) * rrlv(i, j)
+                    end do
+                end do
+
+            end do bocos
+        end if testWallFunctions
+
+    end subroutine vfEddyViscosity
+    subroutine vfScale
+        !
+        !       time and length scale definition for v2f turbulence model. The
+        !       upper bound can be switched on by setting rvfB to .true.
+        !       The strain squared is defined as: strain2 = 2 sij sij
+        !
+        use constants
+        use blockPointers
+        use inputPhysics
+        use paramTurb
+        use turbMod
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
+
+        real(kind=realType) :: sqrt3
+        real(kind=realType) :: tkea, tepa, tv2a, supi, rn2
+        real(kind=realType) :: rsct, rscl2, rnu, rstrain
+
+        ! Some constants in the model.
+
+        sqrt3 = sqrt(three)
+
+        ! Set the pointer for dvt in dw, such that the code is more
+        ! readable. Also set the pointers for the production term,
+        ! vorticity, strain and the time and lenght scale of v2f.
+
+        dvt => scratch(1:, 1:, 1:, idvt:)
+        prod => scratch(1:, 1:, 1:, iprod)
+        sct => scratch(1:, 1:, 1:, isct)
+        scl2 => scratch(1:, 1:, 1:, iscl2)
+        vort => prod
+        strain2 => prod
+        !
+        !       Production term.
+        !
+        select case (turbProd)
+        case (strain)
+            call prodSmag2
+
+        case (vorticity)
+            call prodWmag2
+
+        case (katoLaunder)
+            call prodKatoLaunder
+
+        end select
+        !
+        !       Compute the length and time scale for all internal cells.
+        !
+        if (rvfB) then
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+
+                        ! Compute the time and length scale with upper bound
+
+                        rstrain = sqrt(strain2(i, j, k))
+                        rnu = rlv(i, j, k) / w(i, j, k, irho)
+                        tkea = abs(w(i, j, k, itu1))
+                        tepa = abs(w(i, j, k, itu2))
+                        tv2a = abs(w(i, j, k, itu3))
+                        supi = tepa * tkea / max(sqrt3 * tv2a * rvfCmu * rstrain, eps)
+                        rn2 = rvfCn**2 * (rnu * tepa)**1.5_realType
+
+                        rsct = max(tkea, six * sqrt(rnu * tepa))
+                        sct(i, j, k) = min(rsct, 0.6_realType * supi)
+                        rscl2 = tkea * min(tkea**2, supi**2)
+                        scl2(i, j, k) = rvfCl**2 * max(rscl2, rn2)
+
+                    end do
+                end do
+            end do
+
+        else
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+
+                        ! Compute the time and length scale without upper bound
+
+                        rnu = rlv(i, j, k) / w(i, j, k, irho)
+                        tkea = abs(w(i, j, k, itu1))
+                        tepa = abs(w(i, j, k, itu2))
+                        rn2 = rvfCn**2 * (rnu * tepa)**1.5_realType
+
+                        rsct = max(tkea, six * sqrt(rnu * tepa))
+                        sct(i, j, k) = rsct
+                        rscl2 = tkea**3
+                        scl2(i, j, k) = rvfCl**2 * max(rscl2, rn2)
+                    end do
+                end do
+            end do
+        end if
+
+    end subroutine vfScale
+
+    subroutine ktEddyViscosity(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd)
+        !
+        !       ktEddyViscosity computes the eddy viscosity according to the
+        !       k-tau turbulence model for the block given in blockPointers.
+        !
+        use blockPointers
+        use constants
+        implicit none
+        ! Input variables
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii, iSize, jSize, kSize
+
+        ! Loop over the cells of this block and compute the eddy viscosity.
+        ! Do not include halo's.
 #ifdef TAPENADE_REVERSE
-    iSize = (iEnd-iBeg)+1
-    jSize = (jEnd-jBeg)+1
-    kSize = (kEnd-kBeg)+1
-
-    !$AD II-LOOP
-    do ii=0, iSize*jSize*kSize-1
-       i = mod(ii, iSize) + iBeg
-       j = mod(ii/iSize, jSize) + jBeg
-       k = ii/((iSize*jSize)) + kBeg
+        iSize = (iEnd - iBeg) + 1
+        jSize = (jEnd - jBeg) + 1
+        kSize = (kEnd - kBeg) + 1
+
+        !$AD II-LOOP
+        do ii = 0, iSize * jSize * kSize - 1
+            i = mod(ii, iSize) + iBeg
+            j = mod(ii / iSize, jSize) + jBeg
+            k = ii / ((iSize * jSize)) + kBeg
 #else
-       do k=kBeg, kEnd
-          do j=jBeg, jEnd
-             do i=iBeg, iEnd
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
 #endif
-                rev(i,j,k) = abs(w(i,j,k,irho)*w(i,j,k,itu1)*w(i,j,k,itu2))
+                        rev(i, j, k) = abs(w(i, j, k, irho) * w(i, j, k, itu1) * w(i, j, k, itu2))
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          enddo
-       enddo
-    enddo
+                end do
+            end do
+        end do
 #endif
-  end subroutine ktEddyViscosity
-
+    end subroutine ktEddyViscosity
 
 #ifndef USE_TAPENADE
-  subroutine unsteadyTurbSpectral(ntu1, ntu2)
-    use constants
-    use blockPointers
-    use inputPhysics
-    use inputTimeSpectral
-    use iteration
-    use utils, only : setPointers
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: ntu1, ntu2
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, sps
-
-    ! Return immediately if not the time spectral equations are to
-    ! be solved.
-
-    if(equationMode /= timeSpectral) return
-
-    ! Loop over the number of spectral modes and local blocks.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domains: do nn=1,nDom
-
-          ! Set the pointers for this block.
-
-          call setPointers(nn, currentLevel, sps)
-
-          call unsteadyTurbSpectral_block(ntu1, ntu2, nn, sps)
-       end do domains
-    end do spectralLoop
-  end subroutine unsteadyTurbSpectral
+    subroutine unsteadyTurbSpectral(ntu1, ntu2)
+        use constants
+        use blockPointers
+        use inputPhysics
+        use inputTimeSpectral
+        use iteration
+        use utils, only: setPointers
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: ntu1, ntu2
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, sps
+
+        ! Return immediately if not the time spectral equations are to
+        ! be solved.
+
+        if (equationMode /= timeSpectral) return
+
+        ! Loop over the number of spectral modes and local blocks.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domains: do nn = 1, nDom
+
+                ! Set the pointers for this block.
+
+                call setPointers(nn, currentLevel, sps)
+
+                call unsteadyTurbSpectral_block(ntu1, ntu2, nn, sps)
+            end do domains
+        end do spectralLoop
+    end subroutine unsteadyTurbSpectral
 #endif
 
-  subroutine unsteadyTurbSpectral_block(ntu1, ntu2, nn, sps)
-    !
-    !       unsteadyTurbSpectral determines the spectral time derivative
-    !       for all owned cells. This routine is called before the actual
-    !       solve routines, such that the treatment is identical for all
-    !       spectral solutions. The results is stored in the corresponding
-    !       entry in dw.
-    !
-    use constants
-    use blockPointers
-    use inputPhysics
-    use inputTimeSpectral
-    use iteration
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: ntu1, ntu2, nn, sps
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: ii, mm, i, j, k
-
-    ! Return immediately if not the time spectral equations are to
-    ! be solved.
-
-    if(equationMode /= timeSpectral) return
-
-    ! Loop over the number of turbulent transport equations.
-
-    nAdvLoop: do ii=ntu1, ntu2
-
-       ! Initialize the time derivative to zero for the owned
-       ! cell centers.
-
-       do k=2,kl
-          do j=2,jl
-             do i=2,il
-                dw(i,j,k,ii) = zero
-             enddo
-          enddo
-       enddo
-
-       ! Loop over the number of terms which contribute to the
-       ! time derivative.
-
-       do mm=1,nTimeIntervalsSpectral
-
-          ! Add the contribution to the time derivative for
-          ! all owned cells.
-
-          do k=2,kl
-             do j=2,jl
-                do i=2,il
-                   dw(i,j,k,ii) = dw(i,j,k,ii)              &
-                        + dscalar(sectionID,sps,mm) &
-                        * flowDoms(nn,currentLevel,mm)%w(i,j,k,ii)
-                enddo
-             enddo
-          enddo
-
-       enddo
-
-    enddo nAdvLoop
-  end subroutine unsteadyTurbSpectral_block
-
-  subroutine initKOmega(pOffset)
-    !
-    !       initKOmega initializes the values of k and omega a bit more
-    !       intelligently than just free-stream values.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block on the correct level.
-    !       The argument pOffset is present such that in case of restart
-    !       a possible pointer offset is taken into account. For more
-    !       details see the corresponding routines in initFlow.
-    !
-    use blockPointers
-    use constants
-    use flowVarRefState
-    use paramTurb
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: pOffset
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ip, jp, kp
-    real(kind=realType)   :: rhoi, tmp
-
-    ! Loop over the owned cells of the block.
-
-    do k=2,kl
-       kp = k + pOffset
-       do j=2,jl
-          jp = j + pOffset
-          do i=2,il
-             ip = i + pOffset
-
-             ! Compute a value of omega based on the wall distance.
-
-             rhoi = one/w(ip,jp,kp,irho)
-             tmp  = six*rhoi*rlv(i,j,k)/(rkwBeta1*(d2Wall(i,j,k)**2))
-
-             ! Initialize omega using the value just computed; make sure
-             ! that the free stream value is the lowest possible value.
-             ! After that initialize k using the current value of omega
-             ! and the eddy viscosity. Again the free-stream value is
-             ! the lower bound.
-
-             w(ip,jp,kp,itu2) = max(tmp,wInf(itu2))
-             tmp              = rhoi*rev(i,j,k)*w(ip,jp,kp,itu2)
-             w(ip,jp,kp,itu1) = max(tmp,wInf(itu1))
-
-          enddo
-       enddo
-    enddo
-
-  end subroutine initKOmega
-
-  subroutine kwCDterm
-    !
-    !       kwCDterm computes the cross-diffusion term in the omega-eqn
-    !       for the SST version as well as the modified k-omega turbulence
-    !       model. It is assumed that the pointers in blockPointers and
-    !       turbMod are already set.
-    !
-    use constants
-    use blockPointers
-    use turbMod
-    implicit none
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k
-    real(kind=realType)   :: kx, ky, kz, wwx, wwy, wwz
-    real(kind=realType)   :: lnwip1, lnwim1, lnwjp1, lnwjm1
-    real(kind=realType)   :: lnwkp1, lnwkm1
-
-    ! Loop over the cell centers of the given block. It may be more
-    ! efficient to loop over the faces and to scatter the gradient,
-    ! but in that case the gradients for k and omega must be stored.
-    ! In the current approach no extra memory is needed.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the gradient of k in the cell center. Use is made
-             ! of the fact that the surrounding normals sum up to zero,
-             ! such that the cell i,j,k does not give a contribution.
-             ! The gradient is scaled by a factor 1/2vol.
-
-             kx = w(i+1,j,k,itu1)*si(i,j,k,1) - w(i-1,j,k,itu1)*si(i-1,j,k,1) &
-                  + w(i,j+1,k,itu1)*sj(i,j,k,1) - w(i,j-1,k,itu1)*sj(i,j-1,k,1) &
-                  + w(i,j,k+1,itu1)*sk(i,j,k,1) - w(i,j,k-1,itu1)*sk(i,j,k-1,1)
-             ky = w(i+1,j,k,itu1)*si(i,j,k,2) - w(i-1,j,k,itu1)*si(i-1,j,k,2) &
-                  + w(i,j+1,k,itu1)*sj(i,j,k,2) - w(i,j-1,k,itu1)*sj(i,j-1,k,2) &
-                  + w(i,j,k+1,itu1)*sk(i,j,k,2) - w(i,j,k-1,itu1)*sk(i,j,k-1,2)
-             kz = w(i+1,j,k,itu1)*si(i,j,k,3) - w(i-1,j,k,itu1)*si(i-1,j,k,3) &
-                  + w(i,j+1,k,itu1)*sj(i,j,k,3) - w(i,j-1,k,itu1)*sj(i,j-1,k,3) &
-                  + w(i,j,k+1,itu1)*sk(i,j,k,3) - w(i,j,k-1,itu1)*sk(i,j,k-1,3)
-
-             ! Compute the logarithm of omega in the points that
-             ! contribute to the gradient in this cell.
-
-             lnwip1 = log(abs(w(i+1,j,k,itu2)))
-             lnwim1 = log(abs(w(i-1,j,k,itu2)))
-             lnwjp1 = log(abs(w(i,j+1,k,itu2)))
-             lnwjm1 = log(abs(w(i,j-1,k,itu2)))
-             lnwkp1 = log(abs(w(i,j,k+1,itu2)))
-             lnwkm1 = log(abs(w(i,j,k-1,itu2)))
-
-             ! Compute the scaled gradient of ln omega.
-
-             wwx = lnwip1*si(i,j,k,1) - lnwim1*si(i-1,j,k,1) &
-                  + lnwjp1*sj(i,j,k,1) - lnwjm1*sj(i,j-1,k,1) &
-                  + lnwkp1*sk(i,j,k,1) - lnwkm1*sk(i,j,k-1,1)
-             wwy = lnwip1*si(i,j,k,2) - lnwim1*si(i-1,j,k,2) &
-                  + lnwjp1*sj(i,j,k,2) - lnwjm1*sj(i,j-1,k,2) &
-                  + lnwkp1*sk(i,j,k,2) - lnwkm1*sk(i,j,k-1,2)
-             wwz = lnwip1*si(i,j,k,3) - lnwim1*si(i-1,j,k,3) &
-                  + lnwjp1*sj(i,j,k,3) - lnwjm1*sj(i,j-1,k,3) &
-                  + lnwkp1*sk(i,j,k,3) - lnwkm1*sk(i,j,k-1,3)
-
-             ! Compute the dot product grad k grad ln omega.
-             ! Multiply it by the correct scaling factor and store it.
-
-             kwCD(i,j,k) = fourth*(kx*wwx + ky*wwy + kz*wwz)/(vol(i,j,k)**2)
-
-          enddo
-       enddo
-    enddo
-
-  end subroutine kwCDterm
+    subroutine unsteadyTurbSpectral_block(ntu1, ntu2, nn, sps)
+        !
+        !       unsteadyTurbSpectral determines the spectral time derivative
+        !       for all owned cells. This routine is called before the actual
+        !       solve routines, such that the treatment is identical for all
+        !       spectral solutions. The results is stored in the corresponding
+        !       entry in dw.
+        !
+        use constants
+        use blockPointers
+        use inputPhysics
+        use inputTimeSpectral
+        use iteration
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: ntu1, ntu2, nn, sps
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: ii, mm, i, j, k
+
+        ! Return immediately if not the time spectral equations are to
+        ! be solved.
+
+        if (equationMode /= timeSpectral) return
+
+        ! Loop over the number of turbulent transport equations.
+
+        nAdvLoop: do ii = ntu1, ntu2
+
+            ! Initialize the time derivative to zero for the owned
+            ! cell centers.
+
+            do k = 2, kl
+                do j = 2, jl
+                    do i = 2, il
+                        dw(i, j, k, ii) = zero
+                    end do
+                end do
+            end do
+
+            ! Loop over the number of terms which contribute to the
+            ! time derivative.
+
+            do mm = 1, nTimeIntervalsSpectral
+
+                ! Add the contribution to the time derivative for
+                ! all owned cells.
+
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+                            dw(i, j, k, ii) = dw(i, j, k, ii) &
+                                              + dscalar(sectionID, sps, mm) &
+                                              * flowDoms(nn, currentLevel, mm)%w(i, j, k, ii)
+                        end do
+                    end do
+                end do
+
+            end do
+
+        end do nAdvLoop
+    end subroutine unsteadyTurbSpectral_block
+
+    subroutine initKOmega(pOffset)
+        !
+        !       initKOmega initializes the values of k and omega a bit more
+        !       intelligently than just free-stream values.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block on the correct level.
+        !       The argument pOffset is present such that in case of restart
+        !       a possible pointer offset is taken into account. For more
+        !       details see the corresponding routines in initFlow.
+        !
+        use blockPointers
+        use constants
+        use flowVarRefState
+        use paramTurb
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: pOffset
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ip, jp, kp
+        real(kind=realType) :: rhoi, tmp
+
+        ! Loop over the owned cells of the block.
+
+        do k = 2, kl
+            kp = k + pOffset
+            do j = 2, jl
+                jp = j + pOffset
+                do i = 2, il
+                    ip = i + pOffset
+
+                    ! Compute a value of omega based on the wall distance.
+
+                    rhoi = one / w(ip, jp, kp, irho)
+                    tmp = six * rhoi * rlv(i, j, k) / (rkwBeta1 * (d2Wall(i, j, k)**2))
+
+                    ! Initialize omega using the value just computed; make sure
+                    ! that the free stream value is the lowest possible value.
+                    ! After that initialize k using the current value of omega
+                    ! and the eddy viscosity. Again the free-stream value is
+                    ! the lower bound.
+
+                    w(ip, jp, kp, itu2) = max(tmp, wInf(itu2))
+                    tmp = rhoi * rev(i, j, k) * w(ip, jp, kp, itu2)
+                    w(ip, jp, kp, itu1) = max(tmp, wInf(itu1))
+
+                end do
+            end do
+        end do
+
+    end subroutine initKOmega
+
+    subroutine kwCDterm
+        !
+        !       kwCDterm computes the cross-diffusion term in the omega-eqn
+        !       for the SST version as well as the modified k-omega turbulence
+        !       model. It is assumed that the pointers in blockPointers and
+        !       turbMod are already set.
+        !
+        use constants
+        use blockPointers
+        use turbMod
+        implicit none
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k
+        real(kind=realType) :: kx, ky, kz, wwx, wwy, wwz
+        real(kind=realType) :: lnwip1, lnwim1, lnwjp1, lnwjm1
+        real(kind=realType) :: lnwkp1, lnwkm1
+
+        ! Loop over the cell centers of the given block. It may be more
+        ! efficient to loop over the faces and to scatter the gradient,
+        ! but in that case the gradients for k and omega must be stored.
+        ! In the current approach no extra memory is needed.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the gradient of k in the cell center. Use is made
+                    ! of the fact that the surrounding normals sum up to zero,
+                    ! such that the cell i,j,k does not give a contribution.
+                    ! The gradient is scaled by a factor 1/2vol.
+
+                    kx = w(i + 1, j, k, itu1) * si(i, j, k, 1) - w(i - 1, j, k, itu1) * si(i - 1, j, k, 1) &
+                         + w(i, j + 1, k, itu1) * sj(i, j, k, 1) - w(i, j - 1, k, itu1) * sj(i, j - 1, k, 1) &
+                         + w(i, j, k + 1, itu1) * sk(i, j, k, 1) - w(i, j, k - 1, itu1) * sk(i, j, k - 1, 1)
+                    ky = w(i + 1, j, k, itu1) * si(i, j, k, 2) - w(i - 1, j, k, itu1) * si(i - 1, j, k, 2) &
+                         + w(i, j + 1, k, itu1) * sj(i, j, k, 2) - w(i, j - 1, k, itu1) * sj(i, j - 1, k, 2) &
+                         + w(i, j, k + 1, itu1) * sk(i, j, k, 2) - w(i, j, k - 1, itu1) * sk(i, j, k - 1, 2)
+                    kz = w(i + 1, j, k, itu1) * si(i, j, k, 3) - w(i - 1, j, k, itu1) * si(i - 1, j, k, 3) &
+                         + w(i, j + 1, k, itu1) * sj(i, j, k, 3) - w(i, j - 1, k, itu1) * sj(i, j - 1, k, 3) &
+                         + w(i, j, k + 1, itu1) * sk(i, j, k, 3) - w(i, j, k - 1, itu1) * sk(i, j, k - 1, 3)
+
+                    ! Compute the logarithm of omega in the points that
+                    ! contribute to the gradient in this cell.
+
+                    lnwip1 = log(abs(w(i + 1, j, k, itu2)))
+                    lnwim1 = log(abs(w(i - 1, j, k, itu2)))
+                    lnwjp1 = log(abs(w(i, j + 1, k, itu2)))
+                    lnwjm1 = log(abs(w(i, j - 1, k, itu2)))
+                    lnwkp1 = log(abs(w(i, j, k + 1, itu2)))
+                    lnwkm1 = log(abs(w(i, j, k - 1, itu2)))
+
+                    ! Compute the scaled gradient of ln omega.
+
+                    wwx = lnwip1 * si(i, j, k, 1) - lnwim1 * si(i - 1, j, k, 1) &
+                          + lnwjp1 * sj(i, j, k, 1) - lnwjm1 * sj(i, j - 1, k, 1) &
+                          + lnwkp1 * sk(i, j, k, 1) - lnwkm1 * sk(i, j, k - 1, 1)
+                    wwy = lnwip1 * si(i, j, k, 2) - lnwim1 * si(i - 1, j, k, 2) &
+                          + lnwjp1 * sj(i, j, k, 2) - lnwjm1 * sj(i, j - 1, k, 2) &
+                          + lnwkp1 * sk(i, j, k, 2) - lnwkm1 * sk(i, j, k - 1, 2)
+                    wwz = lnwip1 * si(i, j, k, 3) - lnwim1 * si(i - 1, j, k, 3) &
+                          + lnwjp1 * sj(i, j, k, 3) - lnwjm1 * sj(i, j - 1, k, 3) &
+                          + lnwkp1 * sk(i, j, k, 3) - lnwkm1 * sk(i, j, k - 1, 3)
+
+                    ! Compute the dot product grad k grad ln omega.
+                    ! Multiply it by the correct scaling factor and store it.
+
+                    kwCD(i, j, k) = fourth * (kx * wwx + ky * wwy + kz * wwz) / (vol(i, j, k)**2)
+
+                end do
+            end do
+        end do
+
+    end subroutine kwCDterm
 #endif
 end module turbUtils
diff --git a/src/turbulence/vf.F90 b/src/turbulence/vf.F90
index 51f29e7ef..5add3d5b4 100644
--- a/src/turbulence/vf.F90
+++ b/src/turbulence/vf.F90
@@ -1,2036 +1,2034 @@
 module vf
 contains
 
-  subroutine vf_block(resOnly)
-    !
-    !       vf solves the transport equations for the v2-f model
-    !       in a coupled manner using a diagonal dominant ADI-scheme.
-    !
-    use constants
-    use blockPointers, only : il, jl, kl
-    use inputTimeSpectral
-    use iteration
-    use utils, only : setPointers
-    use turbUtils, only :  vfEddyViscosity, vfScale, unsteadyTurbTerm
-    use turbBCRoutines, only : bcTurbTreatment, applyAllTurbBCThisBlock
-
-    implicit none
-    !
-    !      Subroutine argument.
-    !
-    logical, intent(in) :: resOnly
-
-
-    ! Set the arrays for the boundary condition treatment.
-
-    call bcTurbTreatment
-
-    ! Compute time and length scale
-
-    call vfScale
-
-    ! Solve the transport equations for k and epsilon.
-
-    call keSolve(resOnly)
-
-    ! Solve the transport equation for v2 and the elliptic
-    ! equation for f.
-
-    call vfSolve(resOnly)
-
-    ! The eddy viscosity and the boundary conditions are only
-    ! applied if actual updates have been computed in keSolve
-    ! and vfSolve.
-
-    if(.not. resOnly ) then
-
-       ! Compute the corresponding eddy viscosity.
-
-       call vfEddyViscosity(2, il, 2, jl, 2, kl)
-
-       ! Set the halo values for the turbulent variables.
-       ! We are on the finest mesh, so the second layer of halo
-       ! cells must be computed as well.
-
-       call applyAllTurbBCThisBlock(.true.)
-
-    endif
-
-  end subroutine vf_block
-
-
-  subroutine vfSolve(resOnly)
-    !
-    !       vfSolve solves the v2 transport equation and the
-    !       f elliptic relaxation equation of the v2-f model
-    !       in a coupled manner using a diagonal dominant ADI-scheme.
-    !
-    use blockPointers
-    use constants
-    use flowVarRefState
-    use inputIteration
-    use inputPhysics
-    use paramTurb
-    use turbMod, only : prod, dvt, sig1, sig2, sct, scl2
-    use turbUtils, only : turbAdvection, unsteadyTurbTerm, tdia3
-    use turbCurveFits, only :curveTupYp
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: resOnly
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, nn
-
-    real(kind=realType) :: rhoi, ss, spk, ff1, ff2, ff3, ss1, ss2, ss3
-    real(kind=realType) :: voli, volmi, volpi
-    real(kind=realType) :: xm, ym, zm, xp, yp, zp, xa, ya, za
-    real(kind=realType) :: ttm, ttp, mulm, mulp, muem, muep
-    real(kind=realType) :: c1m, c1p, c10, c2m, c2p, c20
-    real(kind=realType) :: b1, b2, c1, c2, d1, d2
-    real(kind=realType) :: qs, uu, um, up, utau
-    real(kind=realType) :: tke, tep, tv2, tf2, tkea, tepa, tv2a
-    real(kind=realType) :: tkel, tv2l, sle2i, stei
-    real(kind=realType) :: rsct, rnu
-    real(kind=realType) :: tu12, tu22, tu32, tu42, tu52
-    real(kind=realType) :: rnu23, dtu23, rblank
-
-    real(kind=realType), dimension(itu1:itu5) :: tup
-
-    real(kind=realType), dimension(2:il,2:jl,2:kl,2,2)  :: qq
-    real(kind=realType), dimension(2,2:max(il,jl,kl))   :: bb, dd, ff
-    real(kind=realType), dimension(2,2,2:max(il,jl,kl)) :: cc
-
-    real(kind=realType), dimension(:,:,:), pointer :: dw2, dvt2, w2, w3
-    real(kind=realType), dimension(:,:),   pointer :: rlv2, rlv3
-    real(kind=realType), dimension(:,:),   pointer :: rev2, rev3
-    real(kind=realType), dimension(:,:),   pointer :: d2Wall2, d2Wall3
-
-    logical, dimension(2:jl,2:kl), target :: flagI2, flagIl
-    logical, dimension(2:il,2:kl), target :: flagJ2, flagJl
-    logical, dimension(2:il,2:jl), target :: flagK2, flagKl
-
-    logical, dimension(:,:), pointer :: flag
-
-    ! Set model constants
-
-    sig1 = rvfSigv1
-    sig2 = one
-    !
-    !       Source terms.
-    !       Determine the source term and its derivative w.r.t. v2 and f
-    !       for all internal cells of the block.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the source terms for both the k and the epsilon
-             ! equation. Note that dw(i,j,k,iprod) contains the unscaled
-             ! production term.
-
-             rhoi = one/w(i,j,k,irho)
-             tke  = w(i,j,k,itu1)
-             tep  = w(i,j,k,itu2)
-             tv2  = w(i,j,k,itu3)
-             tf2  = w(i,j,k,itu4)
-             tkea = abs(tke)
-             tepa = abs(tep)
-             tv2a = abs(tv2)
-             tkel = max(tkea,rvfLimitK)
-             tv2l = max(tv2a,rvfLimitK*0.666666666_realType)
-             stei = tepa/sct(i,j,k)
-             sle2i= tepa**2/scl2(i,j,k)
-
-             if(rvfN == 6) then
-                rnu  = rlv(i,j,k)*rhoi
-                rsct = max(tkea,6.*sqrt(rnu*tepa))
-                stei = tepa/rsct
-
-                ! rn2  = rvfCn**2*(rnu*tepa)**1.5
-                ! sle2i= rvfCl**2*max(tkea**3,rn2)
-             endif
-
-             ss   = prod(i,j,k)
-             spk  = rev(i,j,k)*ss*rhoi
-             spk  = min(spk, pklim*tepa)
-
-             ff1  =-tepa/tkel
-             ff2  = tkel
-             ff3  = zero
-
-             ss1  =-(rvfC1-1.)/tkel*stei*sle2i
-             ss2  =-sle2i
-             ss3  = (rvfC1-1.)*2./3.*stei*sle2i + rvfC2*spk/tkel*sle2i
-
-             if(rvfN == 6) then
-                ff1 = ff1 - 5.0*tepa/tkel
-                ss1 = ss1 + 5.0/tkel*stei*sle2i
-             endif
-
-             dvt(i,j,k,1) = ff1*tv2 + ff2*tf2 + ff3
-             dvt(i,j,k,2) = ss1*tv2 + ss2*tf2 + ss3
-
-             ! Compute the source term jacobian. Note that only the
-             ! destruction terms are linearized to increase the diagonal
-             ! dominance of the matrix. Furthermore minus the source
-             ! term jacobian is stored.
-
-             qq(i,j,k,1,1) = -ff1
-             qq(i,j,k,1,2) = -ff2
-             qq(i,j,k,2,1) = -ss1
-             qq(i,j,k,2,2) = -ss2
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Advection and unsteady terms.
-    !
-    nn = itu3 - 1
-    call turbAdvection(2_intType, 1_intType, nn, qq)
-
-    call unsteadyTurbTerm(2_intType, 1_intType, nn, qq)
-    !
-    !       Viscous terms in k-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in zeta-direction, i.e. along the
-             ! line k = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in zeta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in zeta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/dzeta derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for v2 and f.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j,k-1) + rlv(i,j,k))
-             mulp = half*(rlv(i,j,k+1) + rlv(i,j,k))
-             muem = half*(rev(i,j,k-1) + rev(i,j,k))
-             muep = half*(rev(i,j,k+1) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm
-             c2p = ttp
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i,j,k-1,itu3) &
-                  - c10*w(i,j,k,itu3) + c1p*w(i,j,k+1,itu3)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i,j,k-1,itu4) &
-                  - c20*w(i,j,k,itu4) + c2p*w(i,j,k+1,itu4)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(k == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmtk1(i,j,itu3,itu3),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmtk1(i,j,itu3,itu4)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmtk1(i,j,itu4,itu3)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmtk1(i,j,itu4,itu4),zero)
-             else if(k == kl) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmtk2(i,j,itu3,itu3),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmtk2(i,j,itu3,itu4)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmtk2(i,j,itu4,itu3)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmtk2(i,j,itu4,itu4),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Viscous terms in j-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in eta-direction, i.e. along the
-             ! line j = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in eta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in eta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/deta derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for v2 and f.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j-1,k) + rlv(i,j,k))
-             mulp = half*(rlv(i,j+1,k) + rlv(i,j,k))
-             muem = half*(rev(i,j-1,k) + rev(i,j,k))
-             muep = half*(rev(i,j+1,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm
-             c2p = ttp
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i,j-1,k,itu3) &
-                  - c10*w(i,j,k,itu3) + c1p*w(i,j+1,k,itu3)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i,j-1,k,itu4) &
-                  - c20*w(i,j,k,itu4) + c2p*w(i,j+1,k,itu4)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(j == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmtj1(i,k,itu3,itu3),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmtj1(i,k,itu3,itu4)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmtj1(i,k,itu4,itu3)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmtj1(i,k,itu4,itu4),zero)
-             else if(j == jl) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmtj2(i,k,itu3,itu3),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmtj2(i,k,itu3,itu4)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmtj2(i,k,itu4,itu3)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmtj2(i,k,itu4,itu4),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Viscous terms in i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in xi-direction, i.e. along the
-             ! line i = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
-
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in xi-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in xi-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/dxi derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for v2 and f.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i-1,j,k) + rlv(i,j,k))
-             mulp = half*(rlv(i+1,j,k) + rlv(i,j,k))
-             muem = half*(rev(i-1,j,k) + rev(i,j,k))
-             muep = half*(rev(i+1,j,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm
-             c2p = ttp
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i-1,j,k,itu3) &
-                  - c10*w(i,j,k,itu3) + c1p*w(i+1,j,k,itu3)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i-1,j,k,itu4) &
-                  - c20*w(i,j,k,itu4) + c2p*w(i+1,j,k,itu4)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(i == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmti1(j,k,itu3,itu3),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmti1(j,k,itu3,itu4)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmti1(j,k,itu4,itu3)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmti1(j,k,itu4,itu4),zero)
-             else if(i == il) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmti2(j,k,itu3,itu3),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmti2(j,k,itu3,itu4)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmti2(j,k,itu4,itu3)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmti2(j,k,itu4,itu4),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-
-    ! Multiply the residual by the volume and store this in dw; this
-    ! is done for monitoring reasons only. The multiplication with the
-    ! volume is present to be consistent with the flow residuals; also
-    ! the negative value is taken, again to be consistent with the
-    ! flow equations. Also multiply by iblank so that no updates occur
-    ! in holes or the overset boundary.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             rblank = real(iblank(i,j,k), realType)
-             dw(i,j,k,itu3) = -volRef(i,j,k)*dvt(i,j,k,1)*rblank
-             dw(i,j,k,itu4) = -volRef(i,j,k)*dvt(i,j,k,2)*rblank
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the wall function flags to .false.
-
-    flagI2 = .false.
-    flagIl = .false.
-    flagJ2 = .false.
-    flagJl = .false.
-    flagK2 = .false.
-    flagKl = .false.
-
-    ! Modify the rhs of the 1st internal cell, if wall functions
-    ! are used; their value is determined by the table.
-
-    testWallFunctions: if( wallFunctions ) then
-
-       bocos: do nn=1,nViscBocos
-
-          ! Determine the block face on which the subface is located
-          ! and set some variables. As flag points to the entire array
-          ! flagI2, etc., its starting indices are the starting indices
-          ! of its target and not 1.
-
-          select case (BCFaceID(nn))
-          case (iMin)
-             flag    => flagI2
-             dw2    => dw(2,1:,1:,1:);    dvt2 => dvt(2,1:,1:,1:)
-             w2     => w (2,1:,1:,1:);    rlv2 => rlv(2,1:,1:)
-             w3     => w (3,1:,1:,1:);    rlv3 => rlv(3,1:,1:)
-             d2Wall2=> d2Wall(2,:,:);     rev2 => rev(2,1:,1:)
-             d2Wall3=> d2Wall(3,:,:);     rev3 => rev(3,1:,1:)
-
-          case (iMax)
-             flag    => flagIl
-             dw2    => dw(il  ,1:,1:,1:); dvt2 => dvt(il  ,1:,1:,1:)
-             w2     => w (il  ,1:,1:,1:); rlv2 => rlv(il  ,1:,1:)
-             w3     => w (il-1,1:,1:,1:); rlv3 => rlv(il-1,1:,1:)
-             d2Wall2=> d2Wall(il  ,:,:);  rev2 => rev(il  ,1:,1:)
-             d2Wall3=> d2Wall(il-1,:,:);  rev3 => rev(il-1,1:,1:)
-
-          case (jMin)
-             flag    => flagJ2
-             dw2    => dw(1:,2,1:,1:);    dvt2 => dvt(1:,2,1:,1:)
-             w2     => w (1:,2,1:,1:);    rlv2 => rlv(1:,2,1:)
-             w3     => w (1:,3,1:,1:);    rlv3 => rlv(1:,3,1:)
-             d2Wall2=> d2Wall(:,2,:);     rev2 => rev(1:,2,1:)
-             d2Wall3=> d2Wall(:,3,:);     rev3 => rev(1:,3,1:)
-
-          case (jMax)
-             flag    => flagJl
-             dw2    => dw(1:,jl  ,1:,1:); dvt2 => dvt(1:,jl  ,1:,1:)
-             w2     => w (1:,jl  ,1:,1:); rlv2 => rlv(1:,jl  ,1:)
-             w3     => w (1:,jl-1,1:,1:); rlv3 => rlv(1:,jl-1,1:)
-             d2Wall2=> d2Wall(:,jl  ,:);  rev2 => rev(1:,jl  ,1:)
-             d2Wall3=> d2Wall(:,jl-1,:);  rev3 => rev(1:,jl-1,1:)
-
-          case (kMin)
-             flag    => flagK2
-             dw2    => dw(1:,1:,2,1:);    dvt2 => dvt(1:,1:,2,1:)
-             w2     => w (1:,1:,2,1:);    rlv2 => rlv(1:,1:,2)
-             w3     => w (1:,1:,3,1:);    rlv3 => rlv(1:,1:,3)
-             d2Wall2=> d2Wall(:,:,2);     rev2 => rev(1:,1:,2)
-             d2Wall3=> d2Wall(:,:,3);     rev3 => rev(1:,1:,3)
-
-          case (kMax)
-             flag    => flagKl
-             dw2    => dw(1:,1:,kl  ,1:); dvt2 => dvt(1:,1:,kl,1:)
-             w2     => w (1:,1:,kl  ,1:); rlv2 => rlv(1:,1:,kl)
-             w3     => w (1:,1:,kl-1,1:); rlv3 => rlv(1:,1:,kl-1)
-             d2Wall2=> d2Wall(:,:,kl  );  rev2 => rev(1:,1:,kl)
-             d2Wall3=> d2Wall(:,:,kl-1);  rev3 => rev(1:,1:,kl-1)
-
-          end select
-
-          ! Loop over the owned faces of this subface. Therefore the
-          ! nodal range of BCData must be used. The offset of +1 is
-          ! present, because the starting index of the cell range is
-          ! 1 larger than the starting index of the nodal range.
-
-          do j=(BCData(nn)%jnBeg+1),BCData(nn)%jnEnd
-             do i=(BCData(nn)%inBeg+1),BCData(nn)%inEnd
-
-                ! Enforce v and f in the 1st internal cell from
-                ! the wall function table. There is an offset of -1 in
-                ! the wall distance. Note that the offset compared to
-                ! the current value must be stored. Also note that the
-                ! curve fits contain the non-dimensional values.
-
-                utau = viscSubface(nn)%utau(i,j)
-                yp = w2(i,j,irho)*d2Wall2(i-1,j-1)*utau/rlv2(i,j)
-
-                ! Set dw2 to zero for proper monitoring of the
-                ! convergence.
-
-                dw2(i,j,itu3) = zero
-                dw2(i,j,itu4) = zero
-
-                ! Get table values
-
-                call curveTupYp(tup(itu3:itu4), yp, itu3, itu4)
-                tu32  = tup(itu3)*utau**2
-                tu42  = tup(itu4)*utau**2/rlv2(i,j)*w2(i,j,irho)
-
-                ! Compute f from balance
-
-                if(rvfN .eq. 1) then
-                   call curveTupYp(tup(itu1:itu2), yp, itu1, itu2)
-                   tu12  = tup(itu1)*utau**2
-                   tu22  = tup(itu2)*utau**4/rlv2(i,j)*w2(i,j,irho)
-                   call curveTupYp(tup(itu5:itu5), yp, itu5, itu5)
-                   tu52  = tup(itu5)*rlv2(i,j)
-                   dtu23 = (w3(i,j,itu3)-tu32) &
-                        / (d2Wall3(i-1,j-1)-d2Wall2(i-1,j-1))
-                   rnu23 = half*( (tu52     +rlv2(i,j))/w2(i,j,irho) + &
-                        (rev3(i,j)+rlv3(i,j))/w3(i,j,irho) )
-                   tu42  = (tu22*tu32/tu12-rnu23*dtu23 &
-                        / (two*d2Wall2(i-1,j-1)))/tu12
-                endif
-
-                ! Set rhs to turbulence variables divide by betaTurb
-                ! because the update is scaled by betaTurb.
-                ! (see end of routine)
-
-                dvt2(i,j,1) = (tu32 - w2(i,j,itu3))/betaTurb
-                dvt2(i,j,2) = (tu42 - w2(i,j,itu4))/betaTurb
-                if(rvfN .eq. 1) dvt2(i,j,2) = (tu42 - w2(i,j,itu4))*0.01
-
-                ! Set the wall flag to .true.
-
-                flag(i,j) = .true.
-
-             enddo
-          enddo
-
-       enddo bocos
-    endif testWallFunctions
-
-    ! Return if only the residual must be computed.
-
-    if( resOnly ) return
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Set qq to 1 if the value is determined by the table.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                qq(i,j,k,1,1) = one
-                qq(i,j,k,1,2) = zero
-                qq(i,j,k,2,1) = zero
-                qq(i,j,k,2,2) = one
-             endif
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the grid velocity to zero. This value will be used
-    ! if the block is not moving.
-
-    qs = zero
-    !
-    !       dd-ADI step in j-direction. There is no particular reason to
-    !       start in j-direction, it just happened to be so. As we solve
-    !       in j-direction, the j-loop is the innermost loop.
-    !
-    do k=2,kl
-       do i=2,il
-          do j=2,jl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the j-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in j-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j-1,k) + rlv(i,j,k))
-             mulp = half*(rlv(i,j+1,k) + rlv(i,j,k))
-             muem = half*(rev(i,j-1,k) + rev(i,j,k))
-             muep = half*(rev(i,j+1,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm
-             c2p = ttp
-
-             bb(1,j) = -c1m
-             dd(1,j) = -c1p
-             bb(2,j) = -c2m
-             dd(2,j) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of j and j-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceJ(i,j,k) + sFaceJ(i,j-1,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! j-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,j) = bb(1,j) - up
-             dd(1,j) = dd(1,j) + um
-             bb(2,j) = bb(2,j)
-             dd(2,j) = dd(2,j)
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,j) = qq(i,j,k,1,1)
-             cc(1,2,j) = qq(i,j,k,1,2)*rblank
-             cc(2,1,j) = qq(i,j,k,2,1)*rblank
-             cc(2,2,j) = qq(i,j,k,2,2)
-
-             ff(1,j) = dvt(i,j,k,1)*rblank
-             ff(2,j) = dvt(i,j,k,2)*rblank
-
-             bb(:,j) = bb(:,j)*rblank
-             dd(:,j) = dd(:,j)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,j) = zero
-                dd(1,j) = zero
-                bb(2,j) = zero
-                dd(2,j) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in j-direction.
-
-          call tdia3(2_intType, jl, bb, cc, dd, ff)
-
-          ! Determine the new rhs for the next direction.
-
-          do j=2,jl
-             dvt(i,j,k,1) = qq(i,j,k,1,1)*ff(1,j) + qq(i,j,k,1,2)*ff(2,j)
-             dvt(i,j,k,2) = qq(i,j,k,2,1)*ff(1,j) + qq(i,j,k,2,2)*ff(2,j)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-ADI step in i-direction. As we solve in i-direction, the
-    !       i-loop is the innermost loop.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the i-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
-
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in i-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i-1,j,k) + rlv(i,j,k))
-             mulp = half*(rlv(i+1,j,k) + rlv(i,j,k))
-             muem = half*(rev(i-1,j,k) + rev(i,j,k))
-             muep = half*(rev(i+1,j,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm
-             c2p = ttp
-
-             bb(1,i) = -c1m
-             dd(1,i) = -c1p
-             bb(2,i) = -c2m
-             dd(2,i) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of i and i-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceI(i,j,k) + sFaceI(i-1,j,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! i-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,i) = bb(1,i) - up
-             dd(1,i) = dd(1,i) + um
-             bb(2,i) = bb(2,i)
-             dd(2,i) = dd(2,i)
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,i) = qq(i,j,k,1,1)
-             cc(1,2,i) = qq(i,j,k,1,2)*rblank
-             cc(2,1,i) = qq(i,j,k,2,1)*rblank
-             cc(2,2,i) = qq(i,j,k,2,2)
-
-             ff(1,i) = dvt(i,j,k,1)*rblank
-             ff(2,i) = dvt(i,j,k,2)*rblank
-
-             bb(:,i) = bb(:,i)*rblank
-             dd(:,i) = dd(:,i)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,i) = zero
-                dd(1,i) = zero
-                bb(2,i) = zero
-                dd(2,i) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in i-direction.
-
-          call tdia3(2_intType, il, bb, cc, dd, ff)
-
-          ! Determine the new rhs for the next direction.
-
-          do i=2,il
-             dvt(i,j,k,1) = qq(i,j,k,1,1)*ff(1,i) + qq(i,j,k,1,2)*ff(2,i)
-             dvt(i,j,k,2) = qq(i,j,k,2,1)*ff(1,i) + qq(i,j,k,2,2)*ff(2,i)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-ADI step in k-direction. As we solve in k-direction, the
-    !       k-loop is the innermost loop.
-    !
-    do j=2,jl
-       do i=2,il
-          do k=2,kl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the k-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in k-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j,k-1) + rlv(i,j,k))
-             mulp = half*(rlv(i,j,k+1) + rlv(i,j,k))
-             muem = half*(rev(i,j,k-1) + rev(i,j,k))
-             muep = half*(rev(i,j,k+1) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm
-             c2p = ttp
-
-             bb(1,k) = -c1m
-             dd(1,k) = -c1p
-             bb(2,k) = -c2m
-             dd(2,k) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of k and k-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceK(i,j,k) + sFaceK(i,j,k-1))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! k-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,k) = bb(1,k) - up
-             dd(1,k) = dd(1,k) + um
-             bb(2,k) = bb(2,k)
-             dd(2,k) = dd(2,k)
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,k) = qq(i,j,k,1,1)
-             cc(1,2,k) = qq(i,j,k,1,2)*rblank
-             cc(2,1,k) = qq(i,j,k,2,1)*rblank
-             cc(2,2,k) = qq(i,j,k,2,2)
-
-             ff(1,k) = dvt(i,j,k,1)*rblank
-             ff(2,k) = dvt(i,j,k,2)*rblank
-
-             bb(:,k) = bb(:,k)*rblank
-             dd(:,k) = dd(:,k)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,k) = zero
-                dd(1,k) = zero
-                bb(2,k) = zero
-                dd(2,k) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in k-direction.
-
-          call tdia3(2_intType, kl, bb, cc, dd, ff)
-
-          ! Store the update in dvt.
-
-          do k=2,kl
-             dvt(i,j,k,1) = ff(1,k)
-             dvt(i,j,k,2) = ff(2,k)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       Update the turbulent variables.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             w(i,j,k,itu3) = w(i,j,k,itu3) + betaTurb*dvt(i,j,k,1)
-             w(i,j,k,itu4) = w(i,j,k,itu4) + betaTurb*dvt(i,j,k,2)
-          enddo
-       enddo
-    enddo
-
-  end subroutine vfSolve
-  subroutine keSolve(resOnly)
-    !
-    !       keSolve solves the k-eps transport equations of the v2-f model
-    !       in a coupled manner using a diagonal dominant ADI-scheme.
-    !
-    use blockPointers
-    use constants
-    use flowVarRefState
-    use inputIteration
-    use inputPhysics
-    use paramTurb
-    use turbMod, only : prod, dvt, sct, scl2, sig1, sig2
-    use turbUtils, only : turbAdvection, unsteadyTurbTerm, tdia3
-    use turbCurveFits, only : curveTupYp
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    logical, intent(in) :: resOnly
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, nn
-
-    real(kind=realType) :: alp
-    real(kind=realType) :: rhoi, ss, spk, ff1, ff2, ff3
-    real(kind=realType) :: ss1, ss2, ss3
-    real(kind=realType) :: voli, volmi, volpi
-    real(kind=realType) :: xm, ym, zm, xp, yp, zp, xa, ya, za
-    real(kind=realType) :: ttm, ttp, mulm, mulp, muem, muep
-    real(kind=realType) :: c1m, c1p, c10, c2m, c2p, c20
-    real(kind=realType) :: b1, b2, c1, c2, d1, d2
-    real(kind=realType) :: qs, uu, um, up, utau
-    real(kind=realType) :: tke, tep, tv2, tkea, tepa, tv2a
-    real(kind=realType) :: tv2l, stei, sle2i
-    real(kind=realType) :: rnu23, tu12, tu22, tu52, prod2, dtu23
-    real(kind=realType) :: factor, rblank
-
-    real(kind=realType), dimension(itu1:itu5) :: tup
-
-    real(kind=realType), dimension(2:il,2:jl,2:kl,2,2)  :: qq
-    real(kind=realType), dimension(2,2:max(il,jl,kl))   :: bb, dd, ff
-    real(kind=realType), dimension(2,2,2:max(il,jl,kl)) :: cc
-
-    real(kind=realType), dimension(:,:,:), pointer :: dw2, dvt2, w2, w3
-    real(kind=realType), dimension(:,:),   pointer :: rlv2, rlv3
-    real(kind=realType), dimension(:,:),   pointer :: rev2, rev3
-    real(kind=realType), dimension(:,:),   pointer :: d2Wall2, d2Wall3
-
-    logical, dimension(2:jl,2:kl), target :: flagI2, flagIl
-    logical, dimension(2:il,2:kl), target :: flagJ2, flagJl
-    logical, dimension(2:il,2:jl), target :: flagK2, flagKl
-
-    logical, dimension(:,:), pointer :: flag
-
-    ! Set model constants
-
-    sig1 = rvfSigk1
-    sig2 = rvfSige1
-
-    ! Set the pointer dvt to the correct entries in dw.
-
-    dvt => scratch(1:,1:,1:,idvt:)
-    sct => scratch(1:,1:,1:,isct)
-    scl2=> scratch(1:,1:,1:,iscl2)
-    !
-    !       Source terms.
-    !       Determine the source term and its derivative w.r.t. k and
-    !       epsilon for all internal cells of the block.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the source terms for both the k and the epsilon
-             ! equation.
-
-             rhoi = one/w(i,j,k,irho)
-             tke  = w(i,j,k,itu1)
-             tep  = w(i,j,k,itu2)
-             tv2  = w(i,j,k,itu3)
-             tkea = abs(tke)
-             tepa = abs(tep)
-             tv2a = abs(tv2)
-             tv2l = max(tv2a,rvfLimitK*0.66666_realType)
-             stei = tepa/sct(i,j,k)
-             sle2i= tepa**2/scl2(i,j,k)
-             ss   = prod(i,j,k)
-             spk  = rev(i,j,k)*ss*rhoi
-             spk  = min(spk, pklim*tepa)
-
-             ! alp = rvfN1A+rvfN1B &
-             !     / (1.+(d2Wall(i,j,k)*.5*rvfCl)**2*sle2i)**4
-             alp = rvfN1A+rvfN1B &
-                  / (1.+d2Wall(i,j,k)**2*.25*rvfCl**2*tepa**2/scl2(i,j,k))**4
-             if(rvfN == 6) alp = rvfN6A*(1.+rvfN6B*sqrt(tkea/tv2l))
-
-             ff1 = zero
-             ff2 =-one
-             ff3 = spk
-
-             ss1 = zero
-             ss2 =-rvfBeta*stei
-             ss3 = alp*spk*stei
-
-             dvt(i,j,k,1) = ff1*tke + ff2*tep + ff3
-             dvt(i,j,k,2) = ss1*tke + ss2*tep + ss3
-
-             ! Compute the source term jacobian. Note that only the
-             ! destruction terms are linearized to increase the diagonal
-             ! dominance of the matrix. Furthermore minus the source
-             ! term jacobian is stored.
-
-             qq(i,j,k,1,1) = -ff1
-             qq(i,j,k,1,2) = -ff2
-             qq(i,j,k,2,1) = -ss1
-             qq(i,j,k,2,2) = -ss2
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Advection and unsteady terms.
-    !
-    nn = itu1 - 1
-    call turbAdvection(2_intType, 2_intType, nn, qq)
-
-    call unsteadyTurbTerm(2_intType, 2_intType, nn, qq)
-    !
-    !       Viscous terms in k-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in zeta-direction, i.e. along the
-             ! line k = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in zeta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in zeta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/dzeta derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and epsilon.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j,k-1) + rlv(i,j,k))
-             mulp = half*(rlv(i,j,k+1) + rlv(i,j,k))
-             muem = half*(rev(i,j,k-1) + rev(i,j,k))
-             muep = half*(rev(i,j,k+1) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i,j,k-1,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i,j,k+1,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i,j,k-1,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i,j,k+1,itu2)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(k == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmtk1(i,j,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmtk1(i,j,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmtk1(i,j,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmtk1(i,j,itu2,itu2),zero)
-             else if(k == kl) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmtk2(i,j,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmtk2(i,j,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmtk2(i,j,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmtk2(i,j,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Viscous terms in j-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in eta-direction, i.e. along the
-             ! line j = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in eta-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in eta-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/deta derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and epsilon.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j-1,k) + rlv(i,j,k))
-             mulp = half*(rlv(i,j+1,k) + rlv(i,j,k))
-             muem = half*(rev(i,j-1,k) + rev(i,j,k))
-             muep = half*(rev(i,j+1,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i,j-1,k,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i,j+1,k,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i,j-1,k,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i,j+1,k,itu2)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(j == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmtj1(i,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmtj1(i,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmtj1(i,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmtj1(i,k,itu2,itu2),zero)
-             else if(j == jl) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmtj2(i,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmtj2(i,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmtj2(i,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmtj2(i,k,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-    !
-    !       Viscous terms in i-direction.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! Compute the metrics in xi-direction, i.e. along the
-             ! line i = constant.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
-
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Computation of the viscous terms in xi-direction; note
-             ! that cross-derivatives are neglected, i.e. the mesh is
-             ! assumed to be orthogonal.
-             ! The second derivative in xi-direction is constructed as
-             ! the central difference of the first order derivatives, i.e.
-             ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
-             ! In this way the metric as well as the varying viscosity
-             ! can be taken into account; the latter appears inside the
-             ! d/dxi derivative. The whole term is divided by rho to
-             ! obtain the diffusion term for k and epsilon.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i-1,j,k) + rlv(i,j,k))
-             mulp = half*(rlv(i+1,j,k) + rlv(i,j,k))
-             muem = half*(rev(i-1,j,k) + rev(i,j,k))
-             muep = half*(rev(i+1,j,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-             c10 = c1m + c1p
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-             c20 = c2m + c2p
-
-             ! Update the residual for this cell and store the possible
-             ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
-
-             dvt(i,j,k,1) = dvt(i,j,k,1)      + c1m*w(i-1,j,k,itu1) &
-                  - c10*w(i,j,k,itu1) + c1p*w(i+1,j,k,itu1)
-             dvt(i,j,k,2) = dvt(i,j,k,2)      + c2m*w(i-1,j,k,itu2) &
-                  - c20*w(i,j,k,itu2) + c2p*w(i+1,j,k,itu2)
-
-             b1 = -c1m
-             c1 =  c10
-             d1 = -c1p
-
-             b2 = -c2m
-             c2 =  c20
-             d2 = -c2p
-
-             ! Update the central jacobian. For nonboundary cells this
-             ! is simply c1 and c2. For boundary cells this is slightly
-             ! more complicated, because the boundary conditions are
-             ! treated implicitly and the off-diagonal terms b1, b2 and
-             ! d1, d2 must be taken into account.
-             ! The boundary conditions are only treated implicitly if
-             ! the diagonal dominance of the matrix is increased.
-
-             if(i == 2) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - b1*max(bmti1(j,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - b1*bmti1(j,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - b2*bmti1(j,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - b2*max(bmti1(j,k,itu2,itu2),zero)
-             else if(i == il) then
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1 &
-                     - d1*max(bmti2(j,k,itu1,itu1),zero)
-                qq(i,j,k,1,2) = qq(i,j,k,1,2) - d1*bmti2(j,k,itu1,itu2)
-                qq(i,j,k,2,1) = qq(i,j,k,2,1) - d2*bmti2(j,k,itu2,itu1)
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2 &
-                     - d2*max(bmti2(j,k,itu2,itu2),zero)
-             else
-                qq(i,j,k,1,1) = qq(i,j,k,1,1) + c1
-                qq(i,j,k,2,2) = qq(i,j,k,2,2) + c2
-             endif
-
-          enddo
-       enddo
-    enddo
-
-    ! Multiply the residual by the volume and store this in dw; this
-    ! is done for monitoring reasons only. The multiplication with the
-    ! volume is present to be consistent with the flow residuals; also
-    ! the negative value is taken, again to be consistent with the
-    ! flow equations. Also multiply by iblank so that no updates occur
-    ! in holes or the overset boundary.
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             rblank = real(iblank(i,j,k), realType)
-             dw(i,j,k,itu1) = -volRef(i,j,k)*dvt(i,j,k,1)*rblank
-             dw(i,j,k,itu2) = -volRef(i,j,k)*dvt(i,j,k,2)*rblank
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the wall function flags to .false.
-
-    flagI2 = .false.
-    flagIl = .false.
-    flagJ2 = .false.
-    flagJl = .false.
-    flagK2 = .false.
-    flagKl = .false.
-
-    ! Modify the rhs of the 1st internal cell, if wall functions
-    ! are used; their value is determined by the table.
-
-    testWallFunctions: if( wallFunctions ) then
-
-       bocos: do nn=1,nViscBocos
-
-          ! Determine the block face on which the subface is located
-          ! and set some variables. As flag points to the entire array
-          ! flagI2, etc., its starting indices are the starting indices
-          ! of its target and not 1.
-
-          select case (BCFaceID(nn))
-          case (iMin)
-             flag   => flagI2
-             dw2    => dw(2,1:,1:,1:);    dvt2 => dvt(2,1:,1:,1:)
-             w2     => w (2,1:,1:,1:);    rlv2 => rlv(2,1:,1:)
-             w3     => w (3,1:,1:,1:);    rlv3 => rlv(3,1:,1:)
-             d2Wall2=> d2Wall(2,:,:);     rev2 => rev(2,1:,1:)
-             d2Wall3=> d2Wall(3,:,:);     rev3 => rev(3,1:,1:)
-
-          case (iMax)
-             flag   => flagIl
-             dw2    => dw(il  ,1:,1:,1:); dvt2 => dvt(il  ,1:,1:,1:)
-             w2     => w (il  ,1:,1:,1:); rlv2 => rlv(il  ,1:,1:)
-             w3     => w (il-1,1:,1:,1:); rlv3 => rlv(il-1,1:,1:)
-             d2Wall2=> d2Wall(il  ,:,:);  rev2 => rev(il  ,1:,1:)
-             d2Wall3=> d2Wall(il-1,:,:);  rev3 => rev(il-1,1:,1:)
-
-          case (jMin)
-             flag   => flagJ2
-             dw2    => dw(1:,2,1:,1:);    dvt2 => dvt(1:,2,1:,1:)
-             w2     => w (1:,2,1:,1:);    rlv2 => rlv(1:,2,1:)
-             w3     => w (1:,3,1:,1:);    rlv3 => rlv(1:,3,1:)
-             d2Wall2=> d2Wall(:,2,:);     rev2 => rev(1:,2,1:)
-             d2Wall3=> d2Wall(:,3,:);     rev3 => rev(1:,3,1:)
-
-          case (jMax)
-             flag   => flagJl
-             dw2    => dw(1:,jl  ,1:,1:); dvt2 => dvt(1:,jl  ,1:,1:)
-             w2     => w (1:,jl  ,1:,1:); rlv2 => rlv(1:,jl  ,1:)
-             w3     => w (1:,jl-1,1:,1:); rlv3 => rlv(1:,jl-1,1:)
-             d2Wall2=> d2Wall(:,jl  ,:);  rev2 => rev(1:,jl  ,1:)
-             d2Wall3=> d2Wall(:,jl-1,:);  rev3 => rev(1:,jl-1,1:)
-
-          case (kMin)
-             flag   => flagK2
-             dw2    => dw(1:,1:,2,1:);    dvt2 => dvt(1:,1:,2,1:)
-             w2     => w (1:,1:,2,1:);    rlv2 => rlv(1:,1:,2)
-             w3     => w (1:,1:,3,1:);    rlv3 => rlv(1:,1:,3)
-             d2Wall2=> d2Wall(:,:,2);     rev2 => rev(1:,1:,2)
-             d2Wall3=> d2Wall(:,:,3);     rev3 => rev(1:,1:,3)
-
-          case (kMax)
-             flag   => flagKl
-             dw2    => dw(1:,1:,kl  ,1:); dvt2 => dvt(1:,1:,kl,1:)
-             w2     => w (1:,1:,kl  ,1:); rlv2 => rlv(1:,1:,kl)
-             w3     => w (1:,1:,kl-1,1:); rlv3 => rlv(1:,1:,kl-1)
-             d2Wall2=> d2Wall(:,:,kl  );  rev2 => rev(1:,1:,kl)
-             d2Wall3=> d2Wall(:,:,kl-1);  rev3 => rev(1:,1:,kl-1)
-
-          end select
-
-          ! Loop over the owned faces of this subface. Therefore the
-          ! nodal range of BCData must be used. The offset of +1 is
-          ! present, because the starting index of the cell range is
-          ! 1 larger than the starting index of the nodal range.
-
-          do j=(BCData(nn)%jnBeg+1),BCData(nn)%jnEnd
-             do i=(BCData(nn)%inBeg+1),BCData(nn)%inEnd
-
-                ! Enforce k and epsilon in the 1st internal cell from
-                ! the wall function table. There is an offset of -1 in
-                ! the wall distance. Note that the offset compared to
-                ! the current value must be stored. Also note that the
-                ! curve fits contain the non-dimensional values.
-
-                utau = viscSubface(nn)%utau(i,j)
-                yp = w2(i,j,irho)*d2Wall2(i-1,j-1)*utau/rlv2(i,j)
-
-                ! Set dw2 to zero for proper monitoring of the
-                ! convergence.
-
-                dw2(i,j,itu1) = zero
-                dw2(i,j,itu2) = zero
-
-                ! Get table values
-
-                call curveTupYp(tup(itu1:itu2), yp, itu1, itu2)
-                tu12  = tup(itu1)*utau**2
-                tu22  = tup(itu2)*utau**4/rlv2(i,j)*w2(i,j,irho)
-
-                ! Compute epsilon from balance
-
-                call curveTupYp(tup(itu5:itu5), yp, itu5, itu5)
-                tu52  = tup(itu5)*rlv2(i,j)
-                dtu23 = (w3(i,j,itu1)-tu12) &
-                     / (d2Wall3(i-1,j-1)-d2Wall2(i-1,j-1))
-                rnu23 = half*( (tu52     +rlv2(i,j))/w2(i,j,irho) + &
-                     (rev3(i,j)+rlv3(i,j))/w3(i,j,irho) )
-                prod2 = tu52/w2(i,j,irho)*(utau**2*w2(i,j,irho) &
-                     / (rlv2(i,j)+tu52))**2
-                tu22  = prod2 + rnu23*dtu23/(two*d2Wall2(i-1,j-1))
-
-                ! Set rhs to turbulence variables
-
-                ! dvt2(i,j,1) = (tu12 - w2(i,j,itu1))/alfaTurb
-                ! dvt2(i,j,2) = (tu22 - w2(i,j,itu2))/alfaTurb
-
-                dvt2(i,j,1) = (tu12 - w2(i,j,itu1))
-                dvt2(i,j,2) = (tu22 - w2(i,j,itu2))*0.01
-
-                ! Set the wall flag to .true.
-
-                flag(i,j) = .true.
-
-             enddo
-          enddo
-
-       enddo bocos
-    endif testWallFunctions
-
-    ! Return if only the residual must be computed.
-
-    if( resOnly ) return
-
-    ! Take the local time step into account. Use characteristic
-    ! time stepping, i.e. a matrix time step is used, where
-    ! dt is the inverse of the central jacobian times the cfl
-    ! number. To avoid a matrix inversion the following system
-    ! is solved. (I/dt + cc + bb + dd)*dw = rhs. Due to the
-    ! matrix time stepping I/dt = cc/cfl. As in the rest of the
-    ! algorithm only the modified central jacobian is used,
-    ! stored it now.
-
-    ! Compute the factor multiplying the central jacobian, which
-    ! is 1 + 1/cfl.
-
-    factor = one
-    if( wallFunctions ) factor = one/alfaTurb
-
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             qq(i,j,k,1,1) = factor*qq(i,j,k,1,1)
-             qq(i,j,k,1,2) = factor*qq(i,j,k,1,2)
-             qq(i,j,k,2,1) = factor*qq(i,j,k,2,1)
-             qq(i,j,k,2,2) = factor*qq(i,j,k,2,2)
-
-             ! Set qq to 1 if the value is determined by the table.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                qq(i,j,k,1,1) = one
-                qq(i,j,k,1,2) = zero
-                qq(i,j,k,2,1) = zero
-                qq(i,j,k,2,2) = one
-             endif
-          enddo
-       enddo
-    enddo
-
-    ! Initialize the grid velocity to zero. This value will be used
-    ! if the block is not moving.
-
-    qs = zero
-    !
-    !       dd-ADI step in j-direction. There is no particular reason to
-    !       start in j-direction, it just happened to be so. As we solve
-    !       in j-direction, the j-loop is the innermost loop.
-    !
-    do k=2,kl
-       do i=2,il
-          do j=2,jl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the j-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j-1,k))
-             volpi = two/(vol(i,j,k) + vol(i,j+1,k))
-
-             xm = sj(i,j-1,k,1)*volmi
-             ym = sj(i,j-1,k,2)*volmi
-             zm = sj(i,j-1,k,3)*volmi
-             xp = sj(i,j,  k,1)*volpi
-             yp = sj(i,j,  k,2)*volpi
-             zp = sj(i,j,  k,3)*volpi
-
-             xa  = half*(sj(i,j,k,1) + sj(i,j-1,k,1))*voli
-             ya  = half*(sj(i,j,k,2) + sj(i,j-1,k,2))*voli
-             za  = half*(sj(i,j,k,3) + sj(i,j-1,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in j-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j-1,k) + rlv(i,j,k))
-             mulp = half*(rlv(i,j+1,k) + rlv(i,j,k))
-             muem = half*(rev(i,j-1,k) + rev(i,j,k))
-             muep = half*(rev(i,j+1,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-
-             bb(1,j) = -c1m
-             dd(1,j) = -c1p
-             bb(2,j) = -c2m
-             dd(2,j) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of j and j-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceJ(i,j,k) + sFaceJ(i,j-1,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! j-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,j) = bb(1,j) - up
-             dd(1,j) = dd(1,j) + um
-             bb(2,j) = bb(2,j) - up
-             dd(2,j) = dd(2,j) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,j) = qq(i,j,k,1,1)
-             cc(1,2,j) = qq(i,j,k,1,2)*rblank
-             cc(2,1,j) = qq(i,j,k,2,1)*rblank
-             cc(2,2,j) = qq(i,j,k,2,2)
-
-             ff(1,j) = dvt(i,j,k,1)*rblank
-             ff(2,j) = dvt(i,j,k,2)*rblank
-
-             bb(:,j) = bb(:,j)*rblank
-             dd(:,j) = dd(:,j)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,j) = zero
-                dd(1,j) = zero
-                bb(2,j) = zero
-                dd(2,j) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in j-direction.
-
-          call tdia3(2_intType, jl, bb, cc, dd, ff)
-
-          ! Determine the new rhs for the next direction.
-
-          do j=2,jl
-             dvt(i,j,k,1) = qq(i,j,k,1,1)*ff(1,j) + qq(i,j,k,1,2)*ff(2,j)
-             dvt(i,j,k,2) = qq(i,j,k,2,1)*ff(1,j) + qq(i,j,k,2,2)*ff(2,j)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-ADI step in i-direction. As we solve in i-direction, the
-    !       i-loop is the innermost loop.
-    !
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the i-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i-1,j,k))
-             volpi = two/(vol(i,j,k) + vol(i+1,j,k))
-
-             xm = si(i-1,j,k,1)*volmi
-             ym = si(i-1,j,k,2)*volmi
-             zm = si(i-1,j,k,3)*volmi
-             xp = si(i,  j,k,1)*volpi
-             yp = si(i,  j,k,2)*volpi
-             zp = si(i,  j,k,3)*volpi
-
-             xa  = half*(si(i,j,k,1) + si(i-1,j,k,1))*voli
-             ya  = half*(si(i,j,k,2) + si(i-1,j,k,2))*voli
-             za  = half*(si(i,j,k,3) + si(i-1,j,k,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in i-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i-1,j,k) + rlv(i,j,k))
-             mulp = half*(rlv(i+1,j,k) + rlv(i,j,k))
-             muem = half*(rev(i-1,j,k) + rev(i,j,k))
-             muep = half*(rev(i+1,j,k) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-
-             bb(1,i) = -c1m
-             dd(1,i) = -c1p
-             bb(2,i) = -c2m
-             dd(2,i) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of i and i-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceI(i,j,k) + sFaceI(i-1,j,k))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! i-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,i) = bb(1,i) - up
-             dd(1,i) = dd(1,i) + um
-             bb(2,i) = bb(2,i) - up
-             dd(2,i) = dd(2,i) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,i) = qq(i,j,k,1,1)
-             cc(1,2,i) = qq(i,j,k,1,2)*rblank
-             cc(2,1,i) = qq(i,j,k,2,1)*rblank
-             cc(2,2,i) = qq(i,j,k,2,2)
-
-             ff(1,i) = dvt(i,j,k,1)*rblank
-             ff(2,i) = dvt(i,j,k,2)*rblank
-
-             bb(:,i) = bb(:,i)*rblank
-             dd(:,i) = dd(:,i)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,i) = zero
-                dd(1,i) = zero
-                bb(2,i) = zero
-                dd(2,i) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in i-direction.
-
-          call tdia3(2_intType, il, bb, cc, dd, ff)
-
-          ! Determine the new rhs for the next direction.
-
-          do i=2,il
-             dvt(i,j,k,1) = qq(i,j,k,1,1)*ff(1,i) + qq(i,j,k,1,2)*ff(2,i)
-             dvt(i,j,k,2) = qq(i,j,k,2,1)*ff(1,i) + qq(i,j,k,2,2)*ff(2,i)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       dd-ADI step in k-direction. As we solve in k-direction, the
-    !       k-loop is the innermost loop.
-    !
-    do j=2,jl
-       do i=2,il
-          do k=2,kl
-
-             ! More or less the same code is executed here as above when
-             ! the residual was built. However, now the off-diagonal
-             ! terms for the dd-ADI must be built and stored. This could
-             ! have been done earlier, but then all the coefficients had
-             ! to be stored. To save memory, they are recomputed.
-             ! Consequently, see the k-loop to build the residual for
-             ! the comments.
-
-             voli  = one/vol(i,j,k)
-             volmi = two/(vol(i,j,k) + vol(i,j,k-1))
-             volpi = two/(vol(i,j,k) + vol(i,j,k+1))
-
-             xm = sk(i,j,k-1,1)*volmi
-             ym = sk(i,j,k-1,2)*volmi
-             zm = sk(i,j,k-1,3)*volmi
-             xp = sk(i,j,k,  1)*volpi
-             yp = sk(i,j,k,  2)*volpi
-             zp = sk(i,j,k,  3)*volpi
-
-             xa  = half*(sk(i,j,k,1) + sk(i,j,k-1,1))*voli
-             ya  = half*(sk(i,j,k,2) + sk(i,j,k-1,2))*voli
-             za  = half*(sk(i,j,k,3) + sk(i,j,k-1,3))*voli
-             ttm = xm*xa + ym*ya + zm*za
-             ttp = xp*xa + yp*ya + zp*za
-
-             ! Off-diagonal terms due to the diffusion terms
-             ! in k-direction.
-
-             rhoi = one/w(i,j,k,irho)
-             mulm = half*(rlv(i,j,k-1) + rlv(i,j,k))
-             mulp = half*(rlv(i,j,k+1) + rlv(i,j,k))
-             muem = half*(rev(i,j,k-1) + rev(i,j,k))
-             muep = half*(rev(i,j,k+1) + rev(i,j,k))
-
-             c1m = ttm*(mulm + sig1*muem)*rhoi
-             c1p = ttp*(mulp + sig1*muep)*rhoi
-
-             c2m = ttm*(mulm + sig2*muem)*rhoi
-             c2p = ttp*(mulp + sig2*muep)*rhoi
-
-             bb(1,k) = -c1m
-             dd(1,k) = -c1p
-             bb(2,k) = -c2m
-             dd(2,k) = -c2p
-
-             ! Compute the grid velocity if present.
-             ! It is taken as the average of k and k-1,
-
-             if( addGridVelocities ) &
-                  qs = half*(sFaceK(i,j,k) + sFaceK(i,j,k-1))*voli
-
-             ! Off-diagonal terms due to the advection term in
-             ! k-direction. First order approximation.
-
-             uu = xa*w(i,j,k,ivx) + ya*w(i,j,k,ivy) + za*w(i,j,k,ivz) - qs
-             um = zero
-             up = zero
-             if(uu < zero) um = uu
-             if(uu > zero) up = uu
-
-             bb(1,k) = bb(1,k) - up
-             dd(1,k) = dd(1,k) + um
-             bb(2,k) = bb(2,k) - up
-             dd(2,k) = dd(2,k) + um
-
-             ! Store the central jacobian and rhs in cc and ff.
-             ! Multiply the off-diagonal terms and rhs by the iblank
-             ! value so the update determined for iblank = 0 is zero.
-
-             rblank = real(iblank(i,j,k), realType)
-
-             cc(1,1,k) = qq(i,j,k,1,1)
-             cc(1,2,k) = qq(i,j,k,1,2)*rblank
-             cc(2,1,k) = qq(i,j,k,2,1)*rblank
-             cc(2,2,k) = qq(i,j,k,2,2)
-
-             ff(1,k) = dvt(i,j,k,1)*rblank
-             ff(2,k) = dvt(i,j,k,2)*rblank
-
-             bb(:,k) = bb(:,k)*rblank
-             dd(:,k) = dd(:,k)*rblank
-
-             ! Set off diagonal terms to zero if wall function are used.
-
-             if((i == 2  .and. flagI2(j,k)) .or. &
-                  (i == il .and. flagIl(j,k)) .or. &
-                  (j == 2  .and. flagJ2(i,k)) .or. &
-                  (j == jl .and. flagJl(i,k)) .or. &
-                  (k == 2  .and. flagK2(i,j)) .or. &
-                  (k == kl .and. flagKl(i,j))) then
-                bb(1,k) = zero
-                dd(1,k) = zero
-                bb(2,k) = zero
-                dd(2,k) = zero
-             endif
-
-          enddo
-
-          ! Solve the tri-diagonal system in k-direction.
-
-          call tdia3(2_intType, kl, bb, cc, dd, ff)
-
-          ! Store the update in dvt.
-
-          do k=2,kl
-             dvt(i,j,k,1) = ff(1,k)
-             dvt(i,j,k,2) = ff(2,k)
-          enddo
-
-       enddo
-    enddo
-    !
-    !       Update the turbulent variables.
-    !
-    factor = alfaTurb
-    if( wallFunctions ) factor = one
-    do k=2,kl
-       do j=2,jl
-          do i=2,il
-             w(i,j,k,itu1) = w(i,j,k,itu1) + factor*dvt(i,j,k,1)
-             w(i,j,k,itu2) = w(i,j,k,itu2) + factor*dvt(i,j,k,2)
-          enddo
-       enddo
-    enddo
-
-  end subroutine keSolve
+    subroutine vf_block(resOnly)
+        !
+        !       vf solves the transport equations for the v2-f model
+        !       in a coupled manner using a diagonal dominant ADI-scheme.
+        !
+        use constants
+        use blockPointers, only: il, jl, kl
+        use inputTimeSpectral
+        use iteration
+        use utils, only: setPointers
+        use turbUtils, only: vfEddyViscosity, vfScale, unsteadyTurbTerm
+        use turbBCRoutines, only: bcTurbTreatment, applyAllTurbBCThisBlock
+
+        implicit none
+        !
+        !      Subroutine argument.
+        !
+        logical, intent(in) :: resOnly
+
+        ! Set the arrays for the boundary condition treatment.
+
+        call bcTurbTreatment
+
+        ! Compute time and length scale
+
+        call vfScale
+
+        ! Solve the transport equations for k and epsilon.
+
+        call keSolve(resOnly)
+
+        ! Solve the transport equation for v2 and the elliptic
+        ! equation for f.
+
+        call vfSolve(resOnly)
+
+        ! The eddy viscosity and the boundary conditions are only
+        ! applied if actual updates have been computed in keSolve
+        ! and vfSolve.
+
+        if (.not. resOnly) then
+
+            ! Compute the corresponding eddy viscosity.
+
+            call vfEddyViscosity(2, il, 2, jl, 2, kl)
+
+            ! Set the halo values for the turbulent variables.
+            ! We are on the finest mesh, so the second layer of halo
+            ! cells must be computed as well.
+
+            call applyAllTurbBCThisBlock(.true.)
+
+        end if
+
+    end subroutine vf_block
+
+    subroutine vfSolve(resOnly)
+        !
+        !       vfSolve solves the v2 transport equation and the
+        !       f elliptic relaxation equation of the v2-f model
+        !       in a coupled manner using a diagonal dominant ADI-scheme.
+        !
+        use blockPointers
+        use constants
+        use flowVarRefState
+        use inputIteration
+        use inputPhysics
+        use paramTurb
+        use turbMod, only: prod, dvt, sig1, sig2, sct, scl2
+        use turbUtils, only: turbAdvection, unsteadyTurbTerm, tdia3
+        use turbCurveFits, only: curveTupYp
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: resOnly
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn
+
+        real(kind=realType) :: rhoi, ss, spk, ff1, ff2, ff3, ss1, ss2, ss3
+        real(kind=realType) :: voli, volmi, volpi
+        real(kind=realType) :: xm, ym, zm, xp, yp, zp, xa, ya, za
+        real(kind=realType) :: ttm, ttp, mulm, mulp, muem, muep
+        real(kind=realType) :: c1m, c1p, c10, c2m, c2p, c20
+        real(kind=realType) :: b1, b2, c1, c2, d1, d2
+        real(kind=realType) :: qs, uu, um, up, utau
+        real(kind=realType) :: tke, tep, tv2, tf2, tkea, tepa, tv2a
+        real(kind=realType) :: tkel, tv2l, sle2i, stei
+        real(kind=realType) :: rsct, rnu
+        real(kind=realType) :: tu12, tu22, tu32, tu42, tu52
+        real(kind=realType) :: rnu23, dtu23, rblank
+
+        real(kind=realType), dimension(itu1:itu5) :: tup
+
+        real(kind=realType), dimension(2:il, 2:jl, 2:kl, 2, 2) :: qq
+        real(kind=realType), dimension(2, 2:max(il, jl, kl)) :: bb, dd, ff
+        real(kind=realType), dimension(2, 2, 2:max(il, jl, kl)) :: cc
+
+        real(kind=realType), dimension(:, :, :), pointer :: dw2, dvt2, w2, w3
+        real(kind=realType), dimension(:, :), pointer :: rlv2, rlv3
+        real(kind=realType), dimension(:, :), pointer :: rev2, rev3
+        real(kind=realType), dimension(:, :), pointer :: d2Wall2, d2Wall3
+
+        logical, dimension(2:jl, 2:kl), target :: flagI2, flagIl
+        logical, dimension(2:il, 2:kl), target :: flagJ2, flagJl
+        logical, dimension(2:il, 2:jl), target :: flagK2, flagKl
+
+        logical, dimension(:, :), pointer :: flag
+
+        ! Set model constants
+
+        sig1 = rvfSigv1
+        sig2 = one
+        !
+        !       Source terms.
+        !       Determine the source term and its derivative w.r.t. v2 and f
+        !       for all internal cells of the block.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the source terms for both the k and the epsilon
+                    ! equation. Note that dw(i,j,k,iprod) contains the unscaled
+                    ! production term.
+
+                    rhoi = one / w(i, j, k, irho)
+                    tke = w(i, j, k, itu1)
+                    tep = w(i, j, k, itu2)
+                    tv2 = w(i, j, k, itu3)
+                    tf2 = w(i, j, k, itu4)
+                    tkea = abs(tke)
+                    tepa = abs(tep)
+                    tv2a = abs(tv2)
+                    tkel = max(tkea, rvfLimitK)
+                    tv2l = max(tv2a, rvfLimitK * 0.666666666_realType)
+                    stei = tepa / sct(i, j, k)
+                    sle2i = tepa**2 / scl2(i, j, k)
+
+                    if (rvfN == 6) then
+                        rnu = rlv(i, j, k) * rhoi
+                        rsct = max(tkea, 6.*sqrt(rnu * tepa))
+                        stei = tepa / rsct
+
+                        ! rn2  = rvfCn**2*(rnu*tepa)**1.5
+                        ! sle2i= rvfCl**2*max(tkea**3,rn2)
+                    end if
+
+                    ss = prod(i, j, k)
+                    spk = rev(i, j, k) * ss * rhoi
+                    spk = min(spk, pklim * tepa)
+
+                    ff1 = -tepa / tkel
+                    ff2 = tkel
+                    ff3 = zero
+
+                    ss1 = -(rvfC1 - 1.) / tkel * stei * sle2i
+                    ss2 = -sle2i
+                    ss3 = (rvfC1 - 1.) * 2./3.*stei * sle2i + rvfC2 * spk / tkel * sle2i
+
+                    if (rvfN == 6) then
+                        ff1 = ff1 - 5.0 * tepa / tkel
+                        ss1 = ss1 + 5.0 / tkel * stei * sle2i
+                    end if
+
+                    dvt(i, j, k, 1) = ff1 * tv2 + ff2 * tf2 + ff3
+                    dvt(i, j, k, 2) = ss1 * tv2 + ss2 * tf2 + ss3
+
+                    ! Compute the source term jacobian. Note that only the
+                    ! destruction terms are linearized to increase the diagonal
+                    ! dominance of the matrix. Furthermore minus the source
+                    ! term jacobian is stored.
+
+                    qq(i, j, k, 1, 1) = -ff1
+                    qq(i, j, k, 1, 2) = -ff2
+                    qq(i, j, k, 2, 1) = -ss1
+                    qq(i, j, k, 2, 2) = -ss2
+
+                end do
+            end do
+        end do
+        !
+        !       Advection and unsteady terms.
+        !
+        nn = itu3 - 1
+        call turbAdvection(2_intType, 1_intType, nn, qq)
+
+        call unsteadyTurbTerm(2_intType, 1_intType, nn, qq)
+        !
+        !       Viscous terms in k-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in zeta-direction, i.e. along the
+                    ! line k = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
+
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in zeta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in zeta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/dzeta derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for v2 and f.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j, k - 1) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j, k + 1) + rlv(i, j, k))
+                    muem = half * (rev(i, j, k - 1) + rev(i, j, k))
+                    muep = half * (rev(i, j, k + 1) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm
+                    c2p = ttp
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i, j, k - 1, itu3) &
+                                      - c10 * w(i, j, k, itu3) + c1p * w(i, j, k + 1, itu3)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i, j, k - 1, itu4) &
+                                      - c20 * w(i, j, k, itu4) + c2p * w(i, j, k + 1, itu4)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (k == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmtk1(i, j, itu3, itu3), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmtk1(i, j, itu3, itu4)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmtk1(i, j, itu4, itu3)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmtk1(i, j, itu4, itu4), zero)
+                    else if (k == kl) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmtk2(i, j, itu3, itu3), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmtk2(i, j, itu3, itu4)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmtk2(i, j, itu4, itu3)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmtk2(i, j, itu4, itu4), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in j-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in eta-direction, i.e. along the
+                    ! line j = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
+
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in eta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in eta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/deta derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for v2 and f.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j - 1, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j + 1, k) + rlv(i, j, k))
+                    muem = half * (rev(i, j - 1, k) + rev(i, j, k))
+                    muep = half * (rev(i, j + 1, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm
+                    c2p = ttp
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i, j - 1, k, itu3) &
+                                      - c10 * w(i, j, k, itu3) + c1p * w(i, j + 1, k, itu3)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i, j - 1, k, itu4) &
+                                      - c20 * w(i, j, k, itu4) + c2p * w(i, j + 1, k, itu4)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (j == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmtj1(i, k, itu3, itu3), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmtj1(i, k, itu3, itu4)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmtj1(i, k, itu4, itu3)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmtj1(i, k, itu4, itu4), zero)
+                    else if (j == jl) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmtj2(i, k, itu3, itu3), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmtj2(i, k, itu3, itu4)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmtj2(i, k, itu4, itu3)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmtj2(i, k, itu4, itu4), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in xi-direction, i.e. along the
+                    ! line i = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
+
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in xi-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in xi-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/dxi derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for v2 and f.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i - 1, j, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i + 1, j, k) + rlv(i, j, k))
+                    muem = half * (rev(i - 1, j, k) + rev(i, j, k))
+                    muep = half * (rev(i + 1, j, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm
+                    c2p = ttp
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i - 1, j, k, itu3) &
+                                      - c10 * w(i, j, k, itu3) + c1p * w(i + 1, j, k, itu3)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i - 1, j, k, itu4) &
+                                      - c20 * w(i, j, k, itu4) + c2p * w(i + 1, j, k, itu4)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (i == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmti1(j, k, itu3, itu3), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmti1(j, k, itu3, itu4)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmti1(j, k, itu4, itu3)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmti1(j, k, itu4, itu4), zero)
+                    else if (i == il) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmti2(j, k, itu3, itu3), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmti2(j, k, itu3, itu4)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmti2(j, k, itu4, itu3)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmti2(j, k, itu4, itu4), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+
+        ! Multiply the residual by the volume and store this in dw; this
+        ! is done for monitoring reasons only. The multiplication with the
+        ! volume is present to be consistent with the flow residuals; also
+        ! the negative value is taken, again to be consistent with the
+        ! flow equations. Also multiply by iblank so that no updates occur
+        ! in holes or the overset boundary.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    rblank = real(iblank(i, j, k), realType)
+                    dw(i, j, k, itu3) = -volRef(i, j, k) * dvt(i, j, k, 1) * rblank
+                    dw(i, j, k, itu4) = -volRef(i, j, k) * dvt(i, j, k, 2) * rblank
+                end do
+            end do
+        end do
+
+        ! Initialize the wall function flags to .false.
+
+        flagI2 = .false.
+        flagIl = .false.
+        flagJ2 = .false.
+        flagJl = .false.
+        flagK2 = .false.
+        flagKl = .false.
+
+        ! Modify the rhs of the 1st internal cell, if wall functions
+        ! are used; their value is determined by the table.
+
+        testWallFunctions: if (wallFunctions) then
+
+            bocos: do nn = 1, nViscBocos
+
+                ! Determine the block face on which the subface is located
+                ! and set some variables. As flag points to the entire array
+                ! flagI2, etc., its starting indices are the starting indices
+                ! of its target and not 1.
+
+                select case (BCFaceID(nn))
+                case (iMin)
+                    flag => flagI2
+                    dw2 => dw(2, 1:, 1:, 1:); dvt2 => dvt(2, 1:, 1:, 1:)
+                    w2 => w(2, 1:, 1:, 1:); rlv2 => rlv(2, 1:, 1:)
+                    w3 => w(3, 1:, 1:, 1:); rlv3 => rlv(3, 1:, 1:)
+                    d2Wall2 => d2Wall(2, :, :); rev2 => rev(2, 1:, 1:)
+                    d2Wall3 => d2Wall(3, :, :); rev3 => rev(3, 1:, 1:)
+
+                case (iMax)
+                    flag => flagIl
+                    dw2 => dw(il, 1:, 1:, 1:); dvt2 => dvt(il, 1:, 1:, 1:)
+                    w2 => w(il, 1:, 1:, 1:); rlv2 => rlv(il, 1:, 1:)
+                    w3 => w(il - 1, 1:, 1:, 1:); rlv3 => rlv(il - 1, 1:, 1:)
+                    d2Wall2 => d2Wall(il, :, :); rev2 => rev(il, 1:, 1:)
+                    d2Wall3 => d2Wall(il - 1, :, :); rev3 => rev(il - 1, 1:, 1:)
+
+                case (jMin)
+                    flag => flagJ2
+                    dw2 => dw(1:, 2, 1:, 1:); dvt2 => dvt(1:, 2, 1:, 1:)
+                    w2 => w(1:, 2, 1:, 1:); rlv2 => rlv(1:, 2, 1:)
+                    w3 => w(1:, 3, 1:, 1:); rlv3 => rlv(1:, 3, 1:)
+                    d2Wall2 => d2Wall(:, 2, :); rev2 => rev(1:, 2, 1:)
+                    d2Wall3 => d2Wall(:, 3, :); rev3 => rev(1:, 3, 1:)
+
+                case (jMax)
+                    flag => flagJl
+                    dw2 => dw(1:, jl, 1:, 1:); dvt2 => dvt(1:, jl, 1:, 1:)
+                    w2 => w(1:, jl, 1:, 1:); rlv2 => rlv(1:, jl, 1:)
+                    w3 => w(1:, jl - 1, 1:, 1:); rlv3 => rlv(1:, jl - 1, 1:)
+                    d2Wall2 => d2Wall(:, jl, :); rev2 => rev(1:, jl, 1:)
+                    d2Wall3 => d2Wall(:, jl - 1, :); rev3 => rev(1:, jl - 1, 1:)
+
+                case (kMin)
+                    flag => flagK2
+                    dw2 => dw(1:, 1:, 2, 1:); dvt2 => dvt(1:, 1:, 2, 1:)
+                    w2 => w(1:, 1:, 2, 1:); rlv2 => rlv(1:, 1:, 2)
+                    w3 => w(1:, 1:, 3, 1:); rlv3 => rlv(1:, 1:, 3)
+                    d2Wall2 => d2Wall(:, :, 2); rev2 => rev(1:, 1:, 2)
+                    d2Wall3 => d2Wall(:, :, 3); rev3 => rev(1:, 1:, 3)
+
+                case (kMax)
+                    flag => flagKl
+                    dw2 => dw(1:, 1:, kl, 1:); dvt2 => dvt(1:, 1:, kl, 1:)
+                    w2 => w(1:, 1:, kl, 1:); rlv2 => rlv(1:, 1:, kl)
+                    w3 => w(1:, 1:, kl - 1, 1:); rlv3 => rlv(1:, 1:, kl - 1)
+                    d2Wall2 => d2Wall(:, :, kl); rev2 => rev(1:, 1:, kl)
+                    d2Wall3 => d2Wall(:, :, kl - 1); rev3 => rev(1:, 1:, kl - 1)
+
+                end select
+
+                ! Loop over the owned faces of this subface. Therefore the
+                ! nodal range of BCData must be used. The offset of +1 is
+                ! present, because the starting index of the cell range is
+                ! 1 larger than the starting index of the nodal range.
+
+                do j = (BCData(nn)%jnBeg + 1), BCData(nn)%jnEnd
+                    do i = (BCData(nn)%inBeg + 1), BCData(nn)%inEnd
+
+                        ! Enforce v and f in the 1st internal cell from
+                        ! the wall function table. There is an offset of -1 in
+                        ! the wall distance. Note that the offset compared to
+                        ! the current value must be stored. Also note that the
+                        ! curve fits contain the non-dimensional values.
+
+                        utau = viscSubface(nn)%utau(i, j)
+                        yp = w2(i, j, irho) * d2Wall2(i - 1, j - 1) * utau / rlv2(i, j)
+
+                        ! Set dw2 to zero for proper monitoring of the
+                        ! convergence.
+
+                        dw2(i, j, itu3) = zero
+                        dw2(i, j, itu4) = zero
+
+                        ! Get table values
+
+                        call curveTupYp(tup(itu3:itu4), yp, itu3, itu4)
+                        tu32 = tup(itu3) * utau**2
+                        tu42 = tup(itu4) * utau**2 / rlv2(i, j) * w2(i, j, irho)
+
+                        ! Compute f from balance
+
+                        if (rvfN .eq. 1) then
+                            call curveTupYp(tup(itu1:itu2), yp, itu1, itu2)
+                            tu12 = tup(itu1) * utau**2
+                            tu22 = tup(itu2) * utau**4 / rlv2(i, j) * w2(i, j, irho)
+                            call curveTupYp(tup(itu5:itu5), yp, itu5, itu5)
+                            tu52 = tup(itu5) * rlv2(i, j)
+                            dtu23 = (w3(i, j, itu3) - tu32) &
+                                    / (d2Wall3(i - 1, j - 1) - d2Wall2(i - 1, j - 1))
+                            rnu23 = half * ((tu52 + rlv2(i, j)) / w2(i, j, irho) + &
+                                            (rev3(i, j) + rlv3(i, j)) / w3(i, j, irho))
+                            tu42 = (tu22 * tu32 / tu12 - rnu23 * dtu23 &
+                                    / (two * d2Wall2(i - 1, j - 1))) / tu12
+                        end if
+
+                        ! Set rhs to turbulence variables divide by betaTurb
+                        ! because the update is scaled by betaTurb.
+                        ! (see end of routine)
+
+                        dvt2(i, j, 1) = (tu32 - w2(i, j, itu3)) / betaTurb
+                        dvt2(i, j, 2) = (tu42 - w2(i, j, itu4)) / betaTurb
+                        if (rvfN .eq. 1) dvt2(i, j, 2) = (tu42 - w2(i, j, itu4)) * 0.01
+
+                        ! Set the wall flag to .true.
+
+                        flag(i, j) = .true.
+
+                    end do
+                end do
+
+            end do bocos
+        end if testWallFunctions
+
+        ! Return if only the residual must be computed.
+
+        if (resOnly) return
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Set qq to 1 if the value is determined by the table.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        qq(i, j, k, 1, 1) = one
+                        qq(i, j, k, 1, 2) = zero
+                        qq(i, j, k, 2, 1) = zero
+                        qq(i, j, k, 2, 2) = one
+                    end if
+                end do
+            end do
+        end do
+
+        ! Initialize the grid velocity to zero. This value will be used
+        ! if the block is not moving.
+
+        qs = zero
+        !
+        !       dd-ADI step in j-direction. There is no particular reason to
+        !       start in j-direction, it just happened to be so. As we solve
+        !       in j-direction, the j-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do i = 2, il
+                do j = 2, jl
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the j-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
+
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in j-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j - 1, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j + 1, k) + rlv(i, j, k))
+                    muem = half * (rev(i, j - 1, k) + rev(i, j, k))
+                    muep = half * (rev(i, j + 1, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm
+                    c2p = ttp
+
+                    bb(1, j) = -c1m
+                    dd(1, j) = -c1p
+                    bb(2, j) = -c2m
+                    dd(2, j) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of j and j-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceJ(i, j, k) + sFaceJ(i, j - 1, k)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! j-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, j) = bb(1, j) - up
+                    dd(1, j) = dd(1, j) + um
+                    bb(2, j) = bb(2, j)
+                    dd(2, j) = dd(2, j)
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, j) = qq(i, j, k, 1, 1)
+                    cc(1, 2, j) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, j) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, j) = qq(i, j, k, 2, 2)
+
+                    ff(1, j) = dvt(i, j, k, 1) * rblank
+                    ff(2, j) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, j) = bb(:, j) * rblank
+                    dd(:, j) = dd(:, j) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, j) = zero
+                        dd(1, j) = zero
+                        bb(2, j) = zero
+                        dd(2, j) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in j-direction.
+
+                call tdia3(2_intType, jl, bb, cc, dd, ff)
+
+                ! Determine the new rhs for the next direction.
+
+                do j = 2, jl
+                    dvt(i, j, k, 1) = qq(i, j, k, 1, 1) * ff(1, j) + qq(i, j, k, 1, 2) * ff(2, j)
+                    dvt(i, j, k, 2) = qq(i, j, k, 2, 1) * ff(1, j) + qq(i, j, k, 2, 2) * ff(2, j)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-ADI step in i-direction. As we solve in i-direction, the
+        !       i-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the i-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
+
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in i-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i - 1, j, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i + 1, j, k) + rlv(i, j, k))
+                    muem = half * (rev(i - 1, j, k) + rev(i, j, k))
+                    muep = half * (rev(i + 1, j, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm
+                    c2p = ttp
+
+                    bb(1, i) = -c1m
+                    dd(1, i) = -c1p
+                    bb(2, i) = -c2m
+                    dd(2, i) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of i and i-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceI(i, j, k) + sFaceI(i - 1, j, k)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! i-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, i) = bb(1, i) - up
+                    dd(1, i) = dd(1, i) + um
+                    bb(2, i) = bb(2, i)
+                    dd(2, i) = dd(2, i)
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, i) = qq(i, j, k, 1, 1)
+                    cc(1, 2, i) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, i) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, i) = qq(i, j, k, 2, 2)
+
+                    ff(1, i) = dvt(i, j, k, 1) * rblank
+                    ff(2, i) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, i) = bb(:, i) * rblank
+                    dd(:, i) = dd(:, i) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, i) = zero
+                        dd(1, i) = zero
+                        bb(2, i) = zero
+                        dd(2, i) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in i-direction.
+
+                call tdia3(2_intType, il, bb, cc, dd, ff)
+
+                ! Determine the new rhs for the next direction.
+
+                do i = 2, il
+                    dvt(i, j, k, 1) = qq(i, j, k, 1, 1) * ff(1, i) + qq(i, j, k, 1, 2) * ff(2, i)
+                    dvt(i, j, k, 2) = qq(i, j, k, 2, 1) * ff(1, i) + qq(i, j, k, 2, 2) * ff(2, i)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-ADI step in k-direction. As we solve in k-direction, the
+        !       k-loop is the innermost loop.
+        !
+        do j = 2, jl
+            do i = 2, il
+                do k = 2, kl
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the k-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
+
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in k-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j, k - 1) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j, k + 1) + rlv(i, j, k))
+                    muem = half * (rev(i, j, k - 1) + rev(i, j, k))
+                    muep = half * (rev(i, j, k + 1) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm
+                    c2p = ttp
+
+                    bb(1, k) = -c1m
+                    dd(1, k) = -c1p
+                    bb(2, k) = -c2m
+                    dd(2, k) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of k and k-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceK(i, j, k) + sFaceK(i, j, k - 1)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! k-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, k) = bb(1, k) - up
+                    dd(1, k) = dd(1, k) + um
+                    bb(2, k) = bb(2, k)
+                    dd(2, k) = dd(2, k)
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, k) = qq(i, j, k, 1, 1)
+                    cc(1, 2, k) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, k) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, k) = qq(i, j, k, 2, 2)
+
+                    ff(1, k) = dvt(i, j, k, 1) * rblank
+                    ff(2, k) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, k) = bb(:, k) * rblank
+                    dd(:, k) = dd(:, k) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, k) = zero
+                        dd(1, k) = zero
+                        bb(2, k) = zero
+                        dd(2, k) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in k-direction.
+
+                call tdia3(2_intType, kl, bb, cc, dd, ff)
+
+                ! Store the update in dvt.
+
+                do k = 2, kl
+                    dvt(i, j, k, 1) = ff(1, k)
+                    dvt(i, j, k, 2) = ff(2, k)
+                end do
+
+            end do
+        end do
+        !
+        !       Update the turbulent variables.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    w(i, j, k, itu3) = w(i, j, k, itu3) + betaTurb * dvt(i, j, k, 1)
+                    w(i, j, k, itu4) = w(i, j, k, itu4) + betaTurb * dvt(i, j, k, 2)
+                end do
+            end do
+        end do
+
+    end subroutine vfSolve
+    subroutine keSolve(resOnly)
+        !
+        !       keSolve solves the k-eps transport equations of the v2-f model
+        !       in a coupled manner using a diagonal dominant ADI-scheme.
+        !
+        use blockPointers
+        use constants
+        use flowVarRefState
+        use inputIteration
+        use inputPhysics
+        use paramTurb
+        use turbMod, only: prod, dvt, sct, scl2, sig1, sig2
+        use turbUtils, only: turbAdvection, unsteadyTurbTerm, tdia3
+        use turbCurveFits, only: curveTupYp
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        logical, intent(in) :: resOnly
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, nn
+
+        real(kind=realType) :: alp
+        real(kind=realType) :: rhoi, ss, spk, ff1, ff2, ff3
+        real(kind=realType) :: ss1, ss2, ss3
+        real(kind=realType) :: voli, volmi, volpi
+        real(kind=realType) :: xm, ym, zm, xp, yp, zp, xa, ya, za
+        real(kind=realType) :: ttm, ttp, mulm, mulp, muem, muep
+        real(kind=realType) :: c1m, c1p, c10, c2m, c2p, c20
+        real(kind=realType) :: b1, b2, c1, c2, d1, d2
+        real(kind=realType) :: qs, uu, um, up, utau
+        real(kind=realType) :: tke, tep, tv2, tkea, tepa, tv2a
+        real(kind=realType) :: tv2l, stei, sle2i
+        real(kind=realType) :: rnu23, tu12, tu22, tu52, prod2, dtu23
+        real(kind=realType) :: factor, rblank
+
+        real(kind=realType), dimension(itu1:itu5) :: tup
+
+        real(kind=realType), dimension(2:il, 2:jl, 2:kl, 2, 2) :: qq
+        real(kind=realType), dimension(2, 2:max(il, jl, kl)) :: bb, dd, ff
+        real(kind=realType), dimension(2, 2, 2:max(il, jl, kl)) :: cc
+
+        real(kind=realType), dimension(:, :, :), pointer :: dw2, dvt2, w2, w3
+        real(kind=realType), dimension(:, :), pointer :: rlv2, rlv3
+        real(kind=realType), dimension(:, :), pointer :: rev2, rev3
+        real(kind=realType), dimension(:, :), pointer :: d2Wall2, d2Wall3
+
+        logical, dimension(2:jl, 2:kl), target :: flagI2, flagIl
+        logical, dimension(2:il, 2:kl), target :: flagJ2, flagJl
+        logical, dimension(2:il, 2:jl), target :: flagK2, flagKl
+
+        logical, dimension(:, :), pointer :: flag
+
+        ! Set model constants
+
+        sig1 = rvfSigk1
+        sig2 = rvfSige1
+
+        ! Set the pointer dvt to the correct entries in dw.
+
+        dvt => scratch(1:, 1:, 1:, idvt:)
+        sct => scratch(1:, 1:, 1:, isct)
+        scl2 => scratch(1:, 1:, 1:, iscl2)
+        !
+        !       Source terms.
+        !       Determine the source term and its derivative w.r.t. k and
+        !       epsilon for all internal cells of the block.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the source terms for both the k and the epsilon
+                    ! equation.
+
+                    rhoi = one / w(i, j, k, irho)
+                    tke = w(i, j, k, itu1)
+                    tep = w(i, j, k, itu2)
+                    tv2 = w(i, j, k, itu3)
+                    tkea = abs(tke)
+                    tepa = abs(tep)
+                    tv2a = abs(tv2)
+                    tv2l = max(tv2a, rvfLimitK * 0.66666_realType)
+                    stei = tepa / sct(i, j, k)
+                    sle2i = tepa**2 / scl2(i, j, k)
+                    ss = prod(i, j, k)
+                    spk = rev(i, j, k) * ss * rhoi
+                    spk = min(spk, pklim * tepa)
+
+                    ! alp = rvfN1A+rvfN1B &
+                    !     / (1.+(d2Wall(i,j,k)*.5*rvfCl)**2*sle2i)**4
+                    alp = rvfN1A + rvfN1B &
+                          / (1.+d2Wall(i, j, k)**2*.25 * rvfCl**2 * tepa**2 / scl2(i, j, k))**4
+                    if (rvfN == 6) alp = rvfN6A * (1.+rvfN6B * sqrt(tkea / tv2l))
+
+                    ff1 = zero
+                    ff2 = -one
+                    ff3 = spk
+
+                    ss1 = zero
+                    ss2 = -rvfBeta * stei
+                    ss3 = alp * spk * stei
+
+                    dvt(i, j, k, 1) = ff1 * tke + ff2 * tep + ff3
+                    dvt(i, j, k, 2) = ss1 * tke + ss2 * tep + ss3
+
+                    ! Compute the source term jacobian. Note that only the
+                    ! destruction terms are linearized to increase the diagonal
+                    ! dominance of the matrix. Furthermore minus the source
+                    ! term jacobian is stored.
+
+                    qq(i, j, k, 1, 1) = -ff1
+                    qq(i, j, k, 1, 2) = -ff2
+                    qq(i, j, k, 2, 1) = -ss1
+                    qq(i, j, k, 2, 2) = -ss2
+
+                end do
+            end do
+        end do
+        !
+        !       Advection and unsteady terms.
+        !
+        nn = itu1 - 1
+        call turbAdvection(2_intType, 2_intType, nn, qq)
+
+        call unsteadyTurbTerm(2_intType, 2_intType, nn, qq)
+        !
+        !       Viscous terms in k-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in zeta-direction, i.e. along the
+                    ! line k = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
+
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in zeta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in zeta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dzeta^2 = d/dzeta (d/dzeta k+1/2 - d/dzeta k-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/dzeta derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and epsilon.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j, k - 1) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j, k + 1) + rlv(i, j, k))
+                    muem = half * (rev(i, j, k - 1) + rev(i, j, k))
+                    muep = half * (rev(i, j, k + 1) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i, j, k - 1, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i, j, k + 1, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i, j, k - 1, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i, j, k + 1, itu2)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (k == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmtk1(i, j, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmtk1(i, j, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmtk1(i, j, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmtk1(i, j, itu2, itu2), zero)
+                    else if (k == kl) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmtk2(i, j, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmtk2(i, j, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmtk2(i, j, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmtk2(i, j, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in j-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in eta-direction, i.e. along the
+                    ! line j = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
+
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in eta-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in eta-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/deta^2 = d/deta (d/deta j+1/2 - d/deta j-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/deta derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and epsilon.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j - 1, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j + 1, k) + rlv(i, j, k))
+                    muem = half * (rev(i, j - 1, k) + rev(i, j, k))
+                    muep = half * (rev(i, j + 1, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i, j - 1, k, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i, j + 1, k, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i, j - 1, k, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i, j + 1, k, itu2)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (j == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmtj1(i, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmtj1(i, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmtj1(i, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmtj1(i, k, itu2, itu2), zero)
+                    else if (j == jl) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmtj2(i, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmtj2(i, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmtj2(i, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmtj2(i, k, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+        !
+        !       Viscous terms in i-direction.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! Compute the metrics in xi-direction, i.e. along the
+                    ! line i = constant.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
+
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Computation of the viscous terms in xi-direction; note
+                    ! that cross-derivatives are neglected, i.e. the mesh is
+                    ! assumed to be orthogonal.
+                    ! The second derivative in xi-direction is constructed as
+                    ! the central difference of the first order derivatives, i.e.
+                    ! d^2/dxi^2 = d/dxi (d/dxi i+1/2 - d/dxi i-1/2).
+                    ! In this way the metric as well as the varying viscosity
+                    ! can be taken into account; the latter appears inside the
+                    ! d/dxi derivative. The whole term is divided by rho to
+                    ! obtain the diffusion term for k and epsilon.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i - 1, j, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i + 1, j, k) + rlv(i, j, k))
+                    muem = half * (rev(i - 1, j, k) + rev(i, j, k))
+                    muep = half * (rev(i + 1, j, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+                    c10 = c1m + c1p
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+                    c20 = c2m + c2p
+
+                    ! Update the residual for this cell and store the possible
+                    ! coefficients for the matrix in b1, b2, c1, c2, d1 and d2.
+
+                    dvt(i, j, k, 1) = dvt(i, j, k, 1) + c1m * w(i - 1, j, k, itu1) &
+                                      - c10 * w(i, j, k, itu1) + c1p * w(i + 1, j, k, itu1)
+                    dvt(i, j, k, 2) = dvt(i, j, k, 2) + c2m * w(i - 1, j, k, itu2) &
+                                      - c20 * w(i, j, k, itu2) + c2p * w(i + 1, j, k, itu2)
+
+                    b1 = -c1m
+                    c1 = c10
+                    d1 = -c1p
+
+                    b2 = -c2m
+                    c2 = c20
+                    d2 = -c2p
+
+                    ! Update the central jacobian. For nonboundary cells this
+                    ! is simply c1 and c2. For boundary cells this is slightly
+                    ! more complicated, because the boundary conditions are
+                    ! treated implicitly and the off-diagonal terms b1, b2 and
+                    ! d1, d2 must be taken into account.
+                    ! The boundary conditions are only treated implicitly if
+                    ! the diagonal dominance of the matrix is increased.
+
+                    if (i == 2) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - b1 * max(bmti1(j, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - b1 * bmti1(j, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - b2 * bmti1(j, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - b2 * max(bmti1(j, k, itu2, itu2), zero)
+                    else if (i == il) then
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1 &
+                                            - d1 * max(bmti2(j, k, itu1, itu1), zero)
+                        qq(i, j, k, 1, 2) = qq(i, j, k, 1, 2) - d1 * bmti2(j, k, itu1, itu2)
+                        qq(i, j, k, 2, 1) = qq(i, j, k, 2, 1) - d2 * bmti2(j, k, itu2, itu1)
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2 &
+                                            - d2 * max(bmti2(j, k, itu2, itu2), zero)
+                    else
+                        qq(i, j, k, 1, 1) = qq(i, j, k, 1, 1) + c1
+                        qq(i, j, k, 2, 2) = qq(i, j, k, 2, 2) + c2
+                    end if
+
+                end do
+            end do
+        end do
+
+        ! Multiply the residual by the volume and store this in dw; this
+        ! is done for monitoring reasons only. The multiplication with the
+        ! volume is present to be consistent with the flow residuals; also
+        ! the negative value is taken, again to be consistent with the
+        ! flow equations. Also multiply by iblank so that no updates occur
+        ! in holes or the overset boundary.
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    rblank = real(iblank(i, j, k), realType)
+                    dw(i, j, k, itu1) = -volRef(i, j, k) * dvt(i, j, k, 1) * rblank
+                    dw(i, j, k, itu2) = -volRef(i, j, k) * dvt(i, j, k, 2) * rblank
+                end do
+            end do
+        end do
+
+        ! Initialize the wall function flags to .false.
+
+        flagI2 = .false.
+        flagIl = .false.
+        flagJ2 = .false.
+        flagJl = .false.
+        flagK2 = .false.
+        flagKl = .false.
+
+        ! Modify the rhs of the 1st internal cell, if wall functions
+        ! are used; their value is determined by the table.
+
+        testWallFunctions: if (wallFunctions) then
+
+            bocos: do nn = 1, nViscBocos
+
+                ! Determine the block face on which the subface is located
+                ! and set some variables. As flag points to the entire array
+                ! flagI2, etc., its starting indices are the starting indices
+                ! of its target and not 1.
+
+                select case (BCFaceID(nn))
+                case (iMin)
+                    flag => flagI2
+                    dw2 => dw(2, 1:, 1:, 1:); dvt2 => dvt(2, 1:, 1:, 1:)
+                    w2 => w(2, 1:, 1:, 1:); rlv2 => rlv(2, 1:, 1:)
+                    w3 => w(3, 1:, 1:, 1:); rlv3 => rlv(3, 1:, 1:)
+                    d2Wall2 => d2Wall(2, :, :); rev2 => rev(2, 1:, 1:)
+                    d2Wall3 => d2Wall(3, :, :); rev3 => rev(3, 1:, 1:)
+
+                case (iMax)
+                    flag => flagIl
+                    dw2 => dw(il, 1:, 1:, 1:); dvt2 => dvt(il, 1:, 1:, 1:)
+                    w2 => w(il, 1:, 1:, 1:); rlv2 => rlv(il, 1:, 1:)
+                    w3 => w(il - 1, 1:, 1:, 1:); rlv3 => rlv(il - 1, 1:, 1:)
+                    d2Wall2 => d2Wall(il, :, :); rev2 => rev(il, 1:, 1:)
+                    d2Wall3 => d2Wall(il - 1, :, :); rev3 => rev(il - 1, 1:, 1:)
+
+                case (jMin)
+                    flag => flagJ2
+                    dw2 => dw(1:, 2, 1:, 1:); dvt2 => dvt(1:, 2, 1:, 1:)
+                    w2 => w(1:, 2, 1:, 1:); rlv2 => rlv(1:, 2, 1:)
+                    w3 => w(1:, 3, 1:, 1:); rlv3 => rlv(1:, 3, 1:)
+                    d2Wall2 => d2Wall(:, 2, :); rev2 => rev(1:, 2, 1:)
+                    d2Wall3 => d2Wall(:, 3, :); rev3 => rev(1:, 3, 1:)
+
+                case (jMax)
+                    flag => flagJl
+                    dw2 => dw(1:, jl, 1:, 1:); dvt2 => dvt(1:, jl, 1:, 1:)
+                    w2 => w(1:, jl, 1:, 1:); rlv2 => rlv(1:, jl, 1:)
+                    w3 => w(1:, jl - 1, 1:, 1:); rlv3 => rlv(1:, jl - 1, 1:)
+                    d2Wall2 => d2Wall(:, jl, :); rev2 => rev(1:, jl, 1:)
+                    d2Wall3 => d2Wall(:, jl - 1, :); rev3 => rev(1:, jl - 1, 1:)
+
+                case (kMin)
+                    flag => flagK2
+                    dw2 => dw(1:, 1:, 2, 1:); dvt2 => dvt(1:, 1:, 2, 1:)
+                    w2 => w(1:, 1:, 2, 1:); rlv2 => rlv(1:, 1:, 2)
+                    w3 => w(1:, 1:, 3, 1:); rlv3 => rlv(1:, 1:, 3)
+                    d2Wall2 => d2Wall(:, :, 2); rev2 => rev(1:, 1:, 2)
+                    d2Wall3 => d2Wall(:, :, 3); rev3 => rev(1:, 1:, 3)
+
+                case (kMax)
+                    flag => flagKl
+                    dw2 => dw(1:, 1:, kl, 1:); dvt2 => dvt(1:, 1:, kl, 1:)
+                    w2 => w(1:, 1:, kl, 1:); rlv2 => rlv(1:, 1:, kl)
+                    w3 => w(1:, 1:, kl - 1, 1:); rlv3 => rlv(1:, 1:, kl - 1)
+                    d2Wall2 => d2Wall(:, :, kl); rev2 => rev(1:, 1:, kl)
+                    d2Wall3 => d2Wall(:, :, kl - 1); rev3 => rev(1:, 1:, kl - 1)
+
+                end select
+
+                ! Loop over the owned faces of this subface. Therefore the
+                ! nodal range of BCData must be used. The offset of +1 is
+                ! present, because the starting index of the cell range is
+                ! 1 larger than the starting index of the nodal range.
+
+                do j = (BCData(nn)%jnBeg + 1), BCData(nn)%jnEnd
+                    do i = (BCData(nn)%inBeg + 1), BCData(nn)%inEnd
+
+                        ! Enforce k and epsilon in the 1st internal cell from
+                        ! the wall function table. There is an offset of -1 in
+                        ! the wall distance. Note that the offset compared to
+                        ! the current value must be stored. Also note that the
+                        ! curve fits contain the non-dimensional values.
+
+                        utau = viscSubface(nn)%utau(i, j)
+                        yp = w2(i, j, irho) * d2Wall2(i - 1, j - 1) * utau / rlv2(i, j)
+
+                        ! Set dw2 to zero for proper monitoring of the
+                        ! convergence.
+
+                        dw2(i, j, itu1) = zero
+                        dw2(i, j, itu2) = zero
+
+                        ! Get table values
+
+                        call curveTupYp(tup(itu1:itu2), yp, itu1, itu2)
+                        tu12 = tup(itu1) * utau**2
+                        tu22 = tup(itu2) * utau**4 / rlv2(i, j) * w2(i, j, irho)
+
+                        ! Compute epsilon from balance
+
+                        call curveTupYp(tup(itu5:itu5), yp, itu5, itu5)
+                        tu52 = tup(itu5) * rlv2(i, j)
+                        dtu23 = (w3(i, j, itu1) - tu12) &
+                                / (d2Wall3(i - 1, j - 1) - d2Wall2(i - 1, j - 1))
+                        rnu23 = half * ((tu52 + rlv2(i, j)) / w2(i, j, irho) + &
+                                        (rev3(i, j) + rlv3(i, j)) / w3(i, j, irho))
+                        prod2 = tu52 / w2(i, j, irho) * (utau**2 * w2(i, j, irho) &
+                                                         / (rlv2(i, j) + tu52))**2
+                        tu22 = prod2 + rnu23 * dtu23 / (two * d2Wall2(i - 1, j - 1))
+
+                        ! Set rhs to turbulence variables
+
+                        ! dvt2(i,j,1) = (tu12 - w2(i,j,itu1))/alfaTurb
+                        ! dvt2(i,j,2) = (tu22 - w2(i,j,itu2))/alfaTurb
+
+                        dvt2(i, j, 1) = (tu12 - w2(i, j, itu1))
+                        dvt2(i, j, 2) = (tu22 - w2(i, j, itu2)) * 0.01
+
+                        ! Set the wall flag to .true.
+
+                        flag(i, j) = .true.
+
+                    end do
+                end do
+
+            end do bocos
+        end if testWallFunctions
+
+        ! Return if only the residual must be computed.
+
+        if (resOnly) return
+
+        ! Take the local time step into account. Use characteristic
+        ! time stepping, i.e. a matrix time step is used, where
+        ! dt is the inverse of the central jacobian times the cfl
+        ! number. To avoid a matrix inversion the following system
+        ! is solved. (I/dt + cc + bb + dd)*dw = rhs. Due to the
+        ! matrix time stepping I/dt = cc/cfl. As in the rest of the
+        ! algorithm only the modified central jacobian is used,
+        ! stored it now.
+
+        ! Compute the factor multiplying the central jacobian, which
+        ! is 1 + 1/cfl.
+
+        factor = one
+        if (wallFunctions) factor = one / alfaTurb
+
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    qq(i, j, k, 1, 1) = factor * qq(i, j, k, 1, 1)
+                    qq(i, j, k, 1, 2) = factor * qq(i, j, k, 1, 2)
+                    qq(i, j, k, 2, 1) = factor * qq(i, j, k, 2, 1)
+                    qq(i, j, k, 2, 2) = factor * qq(i, j, k, 2, 2)
+
+                    ! Set qq to 1 if the value is determined by the table.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        qq(i, j, k, 1, 1) = one
+                        qq(i, j, k, 1, 2) = zero
+                        qq(i, j, k, 2, 1) = zero
+                        qq(i, j, k, 2, 2) = one
+                    end if
+                end do
+            end do
+        end do
+
+        ! Initialize the grid velocity to zero. This value will be used
+        ! if the block is not moving.
+
+        qs = zero
+        !
+        !       dd-ADI step in j-direction. There is no particular reason to
+        !       start in j-direction, it just happened to be so. As we solve
+        !       in j-direction, the j-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do i = 2, il
+                do j = 2, jl
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the j-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j - 1, k))
+                    volpi = two / (vol(i, j, k) + vol(i, j + 1, k))
+
+                    xm = sj(i, j - 1, k, 1) * volmi
+                    ym = sj(i, j - 1, k, 2) * volmi
+                    zm = sj(i, j - 1, k, 3) * volmi
+                    xp = sj(i, j, k, 1) * volpi
+                    yp = sj(i, j, k, 2) * volpi
+                    zp = sj(i, j, k, 3) * volpi
+
+                    xa = half * (sj(i, j, k, 1) + sj(i, j - 1, k, 1)) * voli
+                    ya = half * (sj(i, j, k, 2) + sj(i, j - 1, k, 2)) * voli
+                    za = half * (sj(i, j, k, 3) + sj(i, j - 1, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in j-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j - 1, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j + 1, k) + rlv(i, j, k))
+                    muem = half * (rev(i, j - 1, k) + rev(i, j, k))
+                    muep = half * (rev(i, j + 1, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+
+                    bb(1, j) = -c1m
+                    dd(1, j) = -c1p
+                    bb(2, j) = -c2m
+                    dd(2, j) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of j and j-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceJ(i, j, k) + sFaceJ(i, j - 1, k)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! j-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, j) = bb(1, j) - up
+                    dd(1, j) = dd(1, j) + um
+                    bb(2, j) = bb(2, j) - up
+                    dd(2, j) = dd(2, j) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, j) = qq(i, j, k, 1, 1)
+                    cc(1, 2, j) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, j) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, j) = qq(i, j, k, 2, 2)
+
+                    ff(1, j) = dvt(i, j, k, 1) * rblank
+                    ff(2, j) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, j) = bb(:, j) * rblank
+                    dd(:, j) = dd(:, j) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, j) = zero
+                        dd(1, j) = zero
+                        bb(2, j) = zero
+                        dd(2, j) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in j-direction.
+
+                call tdia3(2_intType, jl, bb, cc, dd, ff)
+
+                ! Determine the new rhs for the next direction.
+
+                do j = 2, jl
+                    dvt(i, j, k, 1) = qq(i, j, k, 1, 1) * ff(1, j) + qq(i, j, k, 1, 2) * ff(2, j)
+                    dvt(i, j, k, 2) = qq(i, j, k, 2, 1) * ff(1, j) + qq(i, j, k, 2, 2) * ff(2, j)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-ADI step in i-direction. As we solve in i-direction, the
+        !       i-loop is the innermost loop.
+        !
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the i-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i - 1, j, k))
+                    volpi = two / (vol(i, j, k) + vol(i + 1, j, k))
+
+                    xm = si(i - 1, j, k, 1) * volmi
+                    ym = si(i - 1, j, k, 2) * volmi
+                    zm = si(i - 1, j, k, 3) * volmi
+                    xp = si(i, j, k, 1) * volpi
+                    yp = si(i, j, k, 2) * volpi
+                    zp = si(i, j, k, 3) * volpi
+
+                    xa = half * (si(i, j, k, 1) + si(i - 1, j, k, 1)) * voli
+                    ya = half * (si(i, j, k, 2) + si(i - 1, j, k, 2)) * voli
+                    za = half * (si(i, j, k, 3) + si(i - 1, j, k, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in i-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i - 1, j, k) + rlv(i, j, k))
+                    mulp = half * (rlv(i + 1, j, k) + rlv(i, j, k))
+                    muem = half * (rev(i - 1, j, k) + rev(i, j, k))
+                    muep = half * (rev(i + 1, j, k) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+
+                    bb(1, i) = -c1m
+                    dd(1, i) = -c1p
+                    bb(2, i) = -c2m
+                    dd(2, i) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of i and i-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceI(i, j, k) + sFaceI(i - 1, j, k)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! i-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, i) = bb(1, i) - up
+                    dd(1, i) = dd(1, i) + um
+                    bb(2, i) = bb(2, i) - up
+                    dd(2, i) = dd(2, i) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, i) = qq(i, j, k, 1, 1)
+                    cc(1, 2, i) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, i) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, i) = qq(i, j, k, 2, 2)
+
+                    ff(1, i) = dvt(i, j, k, 1) * rblank
+                    ff(2, i) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, i) = bb(:, i) * rblank
+                    dd(:, i) = dd(:, i) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, i) = zero
+                        dd(1, i) = zero
+                        bb(2, i) = zero
+                        dd(2, i) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in i-direction.
+
+                call tdia3(2_intType, il, bb, cc, dd, ff)
+
+                ! Determine the new rhs for the next direction.
+
+                do i = 2, il
+                    dvt(i, j, k, 1) = qq(i, j, k, 1, 1) * ff(1, i) + qq(i, j, k, 1, 2) * ff(2, i)
+                    dvt(i, j, k, 2) = qq(i, j, k, 2, 1) * ff(1, i) + qq(i, j, k, 2, 2) * ff(2, i)
+                end do
+
+            end do
+        end do
+        !
+        !       dd-ADI step in k-direction. As we solve in k-direction, the
+        !       k-loop is the innermost loop.
+        !
+        do j = 2, jl
+            do i = 2, il
+                do k = 2, kl
+
+                    ! More or less the same code is executed here as above when
+                    ! the residual was built. However, now the off-diagonal
+                    ! terms for the dd-ADI must be built and stored. This could
+                    ! have been done earlier, but then all the coefficients had
+                    ! to be stored. To save memory, they are recomputed.
+                    ! Consequently, see the k-loop to build the residual for
+                    ! the comments.
+
+                    voli = one / vol(i, j, k)
+                    volmi = two / (vol(i, j, k) + vol(i, j, k - 1))
+                    volpi = two / (vol(i, j, k) + vol(i, j, k + 1))
+
+                    xm = sk(i, j, k - 1, 1) * volmi
+                    ym = sk(i, j, k - 1, 2) * volmi
+                    zm = sk(i, j, k - 1, 3) * volmi
+                    xp = sk(i, j, k, 1) * volpi
+                    yp = sk(i, j, k, 2) * volpi
+                    zp = sk(i, j, k, 3) * volpi
+
+                    xa = half * (sk(i, j, k, 1) + sk(i, j, k - 1, 1)) * voli
+                    ya = half * (sk(i, j, k, 2) + sk(i, j, k - 1, 2)) * voli
+                    za = half * (sk(i, j, k, 3) + sk(i, j, k - 1, 3)) * voli
+                    ttm = xm * xa + ym * ya + zm * za
+                    ttp = xp * xa + yp * ya + zp * za
+
+                    ! Off-diagonal terms due to the diffusion terms
+                    ! in k-direction.
+
+                    rhoi = one / w(i, j, k, irho)
+                    mulm = half * (rlv(i, j, k - 1) + rlv(i, j, k))
+                    mulp = half * (rlv(i, j, k + 1) + rlv(i, j, k))
+                    muem = half * (rev(i, j, k - 1) + rev(i, j, k))
+                    muep = half * (rev(i, j, k + 1) + rev(i, j, k))
+
+                    c1m = ttm * (mulm + sig1 * muem) * rhoi
+                    c1p = ttp * (mulp + sig1 * muep) * rhoi
+
+                    c2m = ttm * (mulm + sig2 * muem) * rhoi
+                    c2p = ttp * (mulp + sig2 * muep) * rhoi
+
+                    bb(1, k) = -c1m
+                    dd(1, k) = -c1p
+                    bb(2, k) = -c2m
+                    dd(2, k) = -c2p
+
+                    ! Compute the grid velocity if present.
+                    ! It is taken as the average of k and k-1,
+
+                    if (addGridVelocities) &
+                        qs = half * (sFaceK(i, j, k) + sFaceK(i, j, k - 1)) * voli
+
+                    ! Off-diagonal terms due to the advection term in
+                    ! k-direction. First order approximation.
+
+                    uu = xa * w(i, j, k, ivx) + ya * w(i, j, k, ivy) + za * w(i, j, k, ivz) - qs
+                    um = zero
+                    up = zero
+                    if (uu < zero) um = uu
+                    if (uu > zero) up = uu
+
+                    bb(1, k) = bb(1, k) - up
+                    dd(1, k) = dd(1, k) + um
+                    bb(2, k) = bb(2, k) - up
+                    dd(2, k) = dd(2, k) + um
+
+                    ! Store the central jacobian and rhs in cc and ff.
+                    ! Multiply the off-diagonal terms and rhs by the iblank
+                    ! value so the update determined for iblank = 0 is zero.
+
+                    rblank = real(iblank(i, j, k), realType)
+
+                    cc(1, 1, k) = qq(i, j, k, 1, 1)
+                    cc(1, 2, k) = qq(i, j, k, 1, 2) * rblank
+                    cc(2, 1, k) = qq(i, j, k, 2, 1) * rblank
+                    cc(2, 2, k) = qq(i, j, k, 2, 2)
+
+                    ff(1, k) = dvt(i, j, k, 1) * rblank
+                    ff(2, k) = dvt(i, j, k, 2) * rblank
+
+                    bb(:, k) = bb(:, k) * rblank
+                    dd(:, k) = dd(:, k) * rblank
+
+                    ! Set off diagonal terms to zero if wall function are used.
+
+                    if ((i == 2 .and. flagI2(j, k)) .or. &
+                        (i == il .and. flagIl(j, k)) .or. &
+                        (j == 2 .and. flagJ2(i, k)) .or. &
+                        (j == jl .and. flagJl(i, k)) .or. &
+                        (k == 2 .and. flagK2(i, j)) .or. &
+                        (k == kl .and. flagKl(i, j))) then
+                        bb(1, k) = zero
+                        dd(1, k) = zero
+                        bb(2, k) = zero
+                        dd(2, k) = zero
+                    end if
+
+                end do
+
+                ! Solve the tri-diagonal system in k-direction.
+
+                call tdia3(2_intType, kl, bb, cc, dd, ff)
+
+                ! Store the update in dvt.
+
+                do k = 2, kl
+                    dvt(i, j, k, 1) = ff(1, k)
+                    dvt(i, j, k, 2) = ff(2, k)
+                end do
+
+            end do
+        end do
+        !
+        !       Update the turbulent variables.
+        !
+        factor = alfaTurb
+        if (wallFunctions) factor = one
+        do k = 2, kl
+            do j = 2, jl
+                do i = 2, il
+                    w(i, j, k, itu1) = w(i, j, k, itu1) + factor * dvt(i, j, k, 1)
+                    w(i, j, k, itu2) = w(i, j, k, itu2) + factor * dvt(i, j, k, 2)
+                end do
+            end do
+        end do
+
+    end subroutine keSolve
 
 end module vf
diff --git a/src/utils/flowUtils.F90 b/src/utils/flowUtils.F90
index a8db1ee02..d50a08c4b 100644
--- a/src/utils/flowUtils.F90
+++ b/src/utils/flowUtils.F90
@@ -1,1374 +1,1370 @@
 module flowUtils
 contains
 
-  subroutine computeTtot(rho, u, v, w, p, Ttot)
-    !
-    !       computeTtot computes the total temperature for the given
-    !       pressures, densities and velocities.
-    !
-    use constants
-    use inputPhysics, only : cpModel
-    use flowVarRefState, only : RGas, gammaInf
-    use utils, only : terminate
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    real(kind=realType), intent(in)  :: rho, p, u, v, w
-    real(kind=realType), intent(out) :: Ttot
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i
-
-    real(kind=realType) :: govgm1, T, kin
-
-    ! Determine the cp model used.
-
-    select case (cpModel)
-
-    case (cpConstant)
-
-       ! Constant cp and thus constant gamma. The well-known
-       ! formula is valid.
-
-       govgm1 = gammainf/(gammainf-one)
-       T       = p/(rho*RGas)
-       kin     = half*(u*u + v*v + w*w)
-       Ttot = T*(one + rho*kin/(govgm1*p))
-
-       !===============================================================
-
-    case (cpTempCurveFits)
-
-       ! Cp is a function of the temperature. The formula used for
-       ! constant cp is not valid anymore and a more complicated
-       ! procedure must be followed.
-
-       call terminate("computeTtot", &
-            "Variable cp formulation not implemented yet")
-
-    end select
-
-  end subroutine computeTtot
-
-  subroutine computeGamma(T, gamma, mm)
-    !
-    !       computeGamma computes the corresponding values of gamma for
-    !       the given dimensional temperatures.
-    !
-    use constants
-    use cpCurveFits
-    use inputPhysics, only : cpModel, gammaConstant
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in)               :: mm
-    real(kind=realType), dimension(mm), intent(in)  :: T
-    real(kind=realType), dimension(mm), intent(out) :: gamma
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, ii, nn, start
-    real(kind=realType)   :: cp, T2
-
-    ! Determine the cp model used in the computation.
-
-    select case (cpModel)
-
-    case (cpConstant)
-
-       ! Constant cp and thus constant gamma. Set the values.
-
-       do i=1,mm
-          gamma(i) = gammaConstant
-       enddo
-
-       !        ================================================================
+    subroutine computeTtot(rho, u, v, w, p, Ttot)
+        !
+        !       computeTtot computes the total temperature for the given
+        !       pressures, densities and velocities.
+        !
+        use constants
+        use inputPhysics, only: cpModel
+        use flowVarRefState, only: RGas, gammaInf
+        use utils, only: terminate
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), intent(in) :: rho, p, u, v, w
+        real(kind=realType), intent(out) :: Ttot
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i
+
+        real(kind=realType) :: govgm1, T, kin
+
+        ! Determine the cp model used.
+
+        select case (cpModel)
+
+        case (cpConstant)
+
+            ! Constant cp and thus constant gamma. The well-known
+            ! formula is valid.
+
+            govgm1 = gammainf / (gammainf - one)
+            T = p / (rho * RGas)
+            kin = half * (u * u + v * v + w * w)
+            Ttot = T * (one + rho * kin / (govgm1 * p))
+
+            !===============================================================
+
+        case (cpTempCurveFits)
+
+            ! Cp is a function of the temperature. The formula used for
+            ! constant cp is not valid anymore and a more complicated
+            ! procedure must be followed.
+
+            call terminate("computeTtot", &
+                           "Variable cp formulation not implemented yet")
+
+        end select
+
+    end subroutine computeTtot
+
+    subroutine computeGamma(T, gamma, mm)
+        !
+        !       computeGamma computes the corresponding values of gamma for
+        !       the given dimensional temperatures.
+        !
+        use constants
+        use cpCurveFits
+        use inputPhysics, only: cpModel, gammaConstant
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: mm
+        real(kind=realType), dimension(mm), intent(in) :: T
+        real(kind=realType), dimension(mm), intent(out) :: gamma
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, ii, nn, start
+        real(kind=realType) :: cp, T2
+
+        ! Determine the cp model used in the computation.
+
+        select case (cpModel)
+
+        case (cpConstant)
+
+            ! Constant cp and thus constant gamma. Set the values.
+
+            do i = 1, mm
+                gamma(i) = gammaConstant
+            end do
+
+            !        ================================================================
 #ifndef USE_TAPENADE
-    case (cpTempCurveFits)
+        case (cpTempCurveFits)
 
-       ! Cp as function of the temperature is given via curve fits.
+            ! Cp as function of the temperature is given via curve fits.
 
-       do i=1,mm
+            do i = 1, mm
 
-          ! Determine the case we are having here.
+                ! Determine the case we are having here.
 
-          if(T(i) <= cpTrange(0)) then
+                if (T(i) <= cpTrange(0)) then
 
-             ! Temperature is less than the smallest temperature
-             ! in the curve fits. Use cv0 to compute gamma.
+                    ! Temperature is less than the smallest temperature
+                    ! in the curve fits. Use cv0 to compute gamma.
 
-             gamma(i) = (cv0 + one)/cv0
+                    gamma(i) = (cv0 + one) / cv0
 
-          else if(T(i) >= cpTrange(cpNparts)) then
+                else if (T(i) >= cpTrange(cpNparts)) then
 
-             ! Temperature is larger than the largest temperature
-             ! in the curve fits. Use cvn to compute gamma.
+                    ! Temperature is larger than the largest temperature
+                    ! in the curve fits. Use cvn to compute gamma.
 
-             gamma(i) = (cvn + one)/cvn
+                    gamma(i) = (cvn + one) / cvn
 
-          else
+                else
 
-             ! Temperature is in the curve fit range.
-             ! First find the valid range.
+                    ! Temperature is in the curve fit range.
+                    ! First find the valid range.
 
-             ii    = cpNparts
-             start = 1
-             interval: do
+                    ii = cpNparts
+                    start = 1
+                    interval: do
 
-                ! Next guess for the interval.
+                        ! Next guess for the interval.
 
-                nn = start + ii/2
+                        nn = start + ii / 2
 
-                ! Determine the situation we are having here.
+                        ! Determine the situation we are having here.
 
-                if(T(i) > cpTrange(nn)) then
+                        if (T(i) > cpTrange(nn)) then
 
-                   ! Temperature is larger than the upper boundary of
-                   ! the current interval. Update the lower boundary.
+                            ! Temperature is larger than the upper boundary of
+                            ! the current interval. Update the lower boundary.
 
-                   start = nn + 1
-                   ii    = ii - 1
+                            start = nn + 1
+                            ii = ii - 1
 
-                else if(T(i) >= cpTrange(nn-1)) then
+                        else if (T(i) >= cpTrange(nn - 1)) then
 
-                   ! This is the correct range. Exit the do-loop.
+                            ! This is the correct range. Exit the do-loop.
 
-                   exit
+                            exit
 
-                endif
+                        end if
 
-                ! Modify ii for the next branch to search.
+                        ! Modify ii for the next branch to search.
 
-                ii = ii/2
+                        ii = ii / 2
 
-             enddo interval
+                    end do interval
 
-             ! Nn contains the correct curve fit interval.
-             ! Compute the value of cp.
+                    ! Nn contains the correct curve fit interval.
+                    ! Compute the value of cp.
 
-             cp = zero
-             do ii=1,cpTempFit(nn)%nterm
-                T2 = T(i)**(cpTempFit(nn)%exponents(ii))
-                cp = cp + cpTempFit(nn)%constants(ii)*T2
-             enddo
+                    cp = zero
+                    do ii = 1, cpTempFit(nn)%nterm
+                        T2 = T(i)**(cpTempFit(nn)%exponents(ii))
+                        cp = cp + cpTempFit(nn)%constants(ii) * T2
+                    end do
 
-             ! Compute the corresponding value of gamma.
+                    ! Compute the corresponding value of gamma.
 
-             gamma(i) = cp/(cp-one)
+                    gamma(i) = cp / (cp - one)
 
-          endif
+                end if
 
-       enddo
+            end do
 #endif
-    end select
-
-  end subroutine computeGamma
-
-  subroutine computePtot(rho, u, v, w, p, ptot)
-    !
-    !       ComputePtot computes the total pressure for the given
-    !       pressures, densities and velocities.
-    !
-    use constants
-    use cpCurveFits
-    use flowVarRefState, only : tref, RGas, gammaInf
-    use inputPhysics, only : cpModel
-    implicit none
-
-    real(kind=realType), intent(in)  :: rho, p, u, v, w
-    real(kind=realType), intent(out) :: ptot
-    !
-    !      Local parameters.
-    !
-    real(kind=realType), parameter :: dtStop  = 0.01_realType
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, ii, mm, nn, nnt, start
-
-    real(kind=realType) :: govgm1, kin
-    real(kind=realType) :: T, T2, tt, dt, h, htot, cp, scale, alp
-    real(kind=realType) :: intCport, intCportT, intCportTt
-    !
-    ! Determine the cp model used.
-
-    select case (cpModel)
-
-    case (cpConstant)
-
-       ! Constant cp and thus constant gamma. The well-known
-       ! formula is valid.
-
-       govgm1 = gammaInf/(gammaInf-one)
-
-       kin     = half*(u*u + v*v + w*w)
-       ptot = p*((one + rho*kin/(govgm1*p))**govgm1)
-
-       !===============================================================
+        end select
+
+    end subroutine computeGamma
+
+    subroutine computePtot(rho, u, v, w, p, ptot)
+        !
+        !       ComputePtot computes the total pressure for the given
+        !       pressures, densities and velocities.
+        !
+        use constants
+        use cpCurveFits
+        use flowVarRefState, only: tref, RGas, gammaInf
+        use inputPhysics, only: cpModel
+        implicit none
+
+        real(kind=realType), intent(in) :: rho, p, u, v, w
+        real(kind=realType), intent(out) :: ptot
+        !
+        !      Local parameters.
+        !
+        real(kind=realType), parameter :: dtStop = 0.01_realType
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, ii, mm, nn, nnt, start
+
+        real(kind=realType) :: govgm1, kin
+        real(kind=realType) :: T, T2, tt, dt, h, htot, cp, scale, alp
+        real(kind=realType) :: intCport, intCportT, intCportTt
+        !
+        ! Determine the cp model used.
+
+        select case (cpModel)
+
+        case (cpConstant)
+
+            ! Constant cp and thus constant gamma. The well-known
+            ! formula is valid.
+
+            govgm1 = gammaInf / (gammaInf - one)
+
+            kin = half * (u * u + v * v + w * w)
+            ptot = p * ((one + rho * kin / (govgm1 * p))**govgm1)
+
+            !===============================================================
 #ifndef USE_TAPENADE
-    case (cpTempCurveFits)
+        case (cpTempCurveFits)
 
-       ! Cp is a function of the temperature. The formula used for
-       ! constant cp is not valid anymore and a more complicated
-       ! procedure must be followed.
+            ! Cp is a function of the temperature. The formula used for
+            ! constant cp is not valid anymore and a more complicated
+            ! procedure must be followed.
 
+            ! Compute the dimensional temperature and the scale
+            ! factor to convert the integral of cp to the correct
+            ! nonDimensional value.
 
-       ! Compute the dimensional temperature and the scale
-       ! factor to convert the integral of cp to the correct
-       ! nonDimensional value.
+            T = Tref * p / (RGas * rho)
+            scale = RGas / Tref
 
-       T     = Tref*p/(RGas*rho)
-       scale = RGas/Tref
+            ! Compute the enthalpy and the integrand of cp/(r*t) at the
+            ! given temperature. Take care of the exceptional situations.
 
-       ! Compute the enthalpy and the integrand of cp/(r*t) at the
-       ! given temperature. Take care of the exceptional situations.
+            if (T <= cpTrange(0)) then
 
-       if(T <= cpTrange(0)) then
+                ! Temperature is smaller than the smallest temperature in
+                ! the curve fit range. Use extrapolation using constant cp.
 
-          ! Temperature is smaller than the smallest temperature in
-          ! the curve fit range. Use extrapolation using constant cp.
+                nn = 0
+                cp = cv0 + one
+                h = scale * (cpHint(0) + cp * (T - cpTrange(0)))
 
-          nn = 0
-          cp = cv0+one
-          h  = scale*(cpHint(0) + cp*(T-cpTrange(0)))
+                intCportT = cp * log(T)
 
-          intCportT = cp*log(T)
+            else if (T >= cpTrange(cpNparts)) then
 
-       else if(T >= cpTrange(cpNparts)) then
+                ! Temperature is larger than the largest temperature in the
+                ! curve fit range. Use extrapolation using constant cp.
 
-          ! Temperature is larger than the largest temperature in the
-          ! curve fit range. Use extrapolation using constant cp.
+                nn = cpNparts + 1
+                cp = cvn + one
+                h = scale * (cpHint(cpNparts) + cp * (T - cpTrange(cpNparts)))
 
-          nn = cpNparts + 1
-          cp = cvn+one
-          h  = scale*(cpHint(cpNparts) + cp*(T-cpTrange(cpNparts)))
+                intCportT = cp * log(T)
 
-          intCportT = cp*log(T)
+            else
 
-       else
+                ! Temperature lies in the curve fit range. Find the correct
+                ! interval.
 
-          ! Temperature lies in the curve fit range. Find the correct
-          ! interval.
+                ii = cpNparts
+                start = 1
+                interval: do
 
-          ii    = cpNparts
-          start = 1
-          interval: do
+                    ! Next guess for the interval.
 
-             ! Next guess for the interval.
+                    nn = start + ii / 2
 
-             nn = start + ii/2
+                    ! Determine the situation we are having here.
 
-             ! Determine the situation we are having here.
+                    if (T > cpTrange(nn)) then
 
-             if(T > cpTrange(nn)) then
+                        ! Temperature is larger than the upper boundary of
+                        ! the current interval. Update the lower boundary.
 
-                ! Temperature is larger than the upper boundary of
-                ! the current interval. Update the lower boundary.
+                        start = nn + 1
+                        ii = ii - 1
 
-                start = nn + 1
-                ii    = ii - 1
+                    else if (T >= cpTrange(nn - 1)) then
 
-             else if(T >= cpTrange(nn-1)) then
+                        ! This is the correct range. Exit the do-loop.
 
-                ! This is the correct range. Exit the do-loop.
+                        exit
 
-                exit
+                    end if
 
-             endif
+                    ! Modify ii for the next branch to search.
 
-             ! Modify ii for the next branch to search.
+                    ii = ii / 2
 
-             ii = ii/2
+                end do interval
 
-          enddo interval
+                ! nn contains the correct curve fit interval.
+                ! Integrate cp to compute h and the integrand of cp/(r*t)
 
-          ! nn contains the correct curve fit interval.
-          ! Integrate cp to compute h and the integrand of cp/(r*t)
+                h = cpTempFit(nn)%eint0
+                intCportT = zero
 
-          h           = cpTempFit(nn)%eint0
-          intCportT = zero
+                do ii = 1, cpTempFit(nn)%nterm
+                    if (cpTempFit(nn)%exponents(ii) == -1_intType) then
+                        h = h + cpTempFit(nn)%constants(ii) * log(T)
+                    else
+                        mm = cpTempFit(nn)%exponents(ii) + 1
+                        T2 = T**mm
+                        h = h + cpTempFit(nn)%constants(ii) * T2 / mm
+                    end if
 
-          do ii=1,cpTempFit(nn)%nterm
-             if(cpTempFit(nn)%exponents(ii) == -1_intType) then
-                h = h + cpTempFit(nn)%constants(ii)*log(T)
-             else
-                mm = cpTempFit(nn)%exponents(ii) + 1
-                T2 = T**mm
-                h  = h + cpTempFit(nn)%constants(ii)*T2/mm
-             endif
+                    if (cpTempFit(nn)%exponents(ii) == 0_intType) then
+                        intCportT = intCportT &
+                                    + cpTempFit(nn)%constants(ii) * log(T)
+                    else
+                        mm = cpTempFit(nn)%exponents(ii)
+                        T2 = T**mm
+                        intCportT = intCportT &
+                                    + cpTempFit(nn)%constants(ii) * T2 / mm
+                    end if
+                end do
 
-             if(cpTempFit(nn)%exponents(ii) == 0_intType) then
-                intCportT = intCportT &
-                     + cpTempFit(nn)%constants(ii)*log(T)
-             else
-                mm = cpTempFit(nn)%exponents(ii)
-                T2 = T**mm
-                intCportT = intCportT &
-                     + cpTempFit(nn)%constants(ii)*T2/mm
-             endif
-          enddo
+                h = scale * h
 
-          h = scale*h
+            end if
 
-       endif
+            ! Compute the total enthalpy. Divide by scale to get the same
+            ! dimensions as for the integral of cp/r.
 
-       ! Compute the total enthalpy. Divide by scale to get the same
-       ! dimensions as for the integral of cp/r.
+            htot = (h + half * (u * u + v * v + w * w)) / scale
 
-       htot = (h + half*(u*u + v*v + w*w))/scale
+            ! Compute the corresponding total temperature. First determine
+            ! the situation we are having here.
 
-       ! Compute the corresponding total temperature. First determine
-       ! the situation we are having here.
+            if (htot <= cpHint(0)) then
 
-       if(htot <= cpHint(0)) then
+                ! Total enthalpy is smaller than the lowest value of the
+                ! curve fit. Use extrapolation using constant cp.
 
-          ! Total enthalpy is smaller than the lowest value of the
-          ! curve fit. Use extrapolation using constant cp.
+                nnt = 0
+                Tt = cpTrange(0) + (htot - cpHint(0)) / (cv0 + one)
 
-          nnt = 0
-          Tt  = cpTrange(0) + (htot - cpHint(0))/(cv0+one)
+            else if (htot >= cpHint(cpNparts)) then
 
-       else if(htot >= cpHint(cpNparts)) then
+                ! Total enthalpy is larger than the largest value of the
+                ! curve fit. Use extrapolation using constant cp.
 
-          ! Total enthalpy is larger than the largest value of the
-          ! curve fit. Use extrapolation using constant cp.
+                nnt = cpNparts + 1
+                Tt = cpTrange(cpNparts) &
+                     + (htot - cpHint(cpNparts)) / (cvn + one)
 
-          nnt = cpNparts + 1
-          Tt  = cpTrange(cpNparts) &
-               + (htot - cpHint(cpNparts))/(cvn+one)
+            else
 
-       else
+                ! Total temperature is in the range of the curve fits.
+                ! Use a newton algorithm to find the correct temperature.
+                ! First find the correct interval.
 
-          ! Total temperature is in the range of the curve fits.
-          ! Use a newton algorithm to find the correct temperature.
-          ! First find the correct interval.
+                ii = cpNparts
+                start = 1
+                intervalTt: do
 
-          ii    = cpNparts
-          start = 1
-          intervalTt: do
+                    ! Next guess for the interval.
 
-             ! Next guess for the interval.
+                    nnt = start + ii / 2
 
-             nnt = start + ii/2
+                    ! Determine the situation we are having here.
 
-             ! Determine the situation we are having here.
+                    if (htot > cpHint(nnt)) then
 
-             if(htot > cpHint(nnt)) then
+                        ! Enthalpy is larger than the upper boundary of
+                        ! the current interval. Update the lower boundary.
 
-                ! Enthalpy is larger than the upper boundary of
-                ! the current interval. Update the lower boundary.
+                        start = nnt + 1
+                        ii = ii - 1
 
-                start = nnt + 1
-                ii    = ii - 1
+                    else if (htot >= cpHint(nnt - 1)) then
 
-             else if(htot >= cpHint(nnt-1)) then
+                        ! This is the correct range. Exit the do-loop.
 
-                ! This is the correct range. Exit the do-loop.
+                        exit
 
-                exit
+                    end if
 
-             endif
+                    ! Modify ii for the next branch to search.
 
-             ! Modify ii for the next branch to search.
+                    ii = ii / 2
 
-             ii = ii/2
+                end do intervalTt
 
-          enddo intervalTt
+                ! Nnt contains the range in which the newton algorithm must
+                ! be applied. Initial guess of the total temperature.
 
-          ! Nnt contains the range in which the newton algorithm must
-          ! be applied. Initial guess of the total temperature.
+                alp = (cpHint(nnt) - htot) / (cpHint(nnt) - cpHint(nnt - 1))
+                Tt = alp * cpTrange(nnt - 1) + (one - alp) * cpTrange(nnt)
 
-          alp = (cpHint(nnt) - htot)/(cpHint(nnt) - cpHint(nnt-1))
-          Tt  = alp*cpTrange(nnt-1) + (one-alp)*cpTrange(nnt)
+                ! The actual newton algorithm to compute the total
+                ! temperature.
 
-          ! The actual newton algorithm to compute the total
-          ! temperature.
+                newton: do
 
-          newton: do
+                    ! Compute the energy as well as the value of cv/r for the
+                    ! given temperature.
 
-             ! Compute the energy as well as the value of cv/r for the
-             ! given temperature.
+                    cp = zero
+                    h = cpTempFit(nnt)%eint0
 
-             cp = zero
-             h  = cpTempFit(nnt)%eint0
+                    do ii = 1, cpTempFit(nnt)%nterm
 
-             do ii=1,cpTempFit(nnt)%nterm
+                        ! Update cp.
 
-                ! Update cp.
+                        T2 = Tt**(cpTempFit(nnt)%exponents(ii))
+                        cp = cp + cpTempFit(nnt)%constants(ii) * t2
 
-                T2 = Tt**(cpTempFit(nnt)%exponents(ii))
-                cp = cp + cpTempFit(nnt)%constants(ii)*t2
+                        ! Update h, for which this contribution must be
+                        ! integrated. Take the exceptional case that the
+                        ! exponent == -1 into account.
 
-                ! Update h, for which this contribution must be
-                ! integrated. Take the exceptional case that the
-                ! exponent == -1 into account.
+                        if (cpTempFit(nnt)%exponents(ii) == -1_intType) then
+                            h = h + cpTempFit(nnt)%constants(ii) * log(Tt)
+                        else
+                            h = h + cpTempFit(nnt)%constants(ii) * t2 * Tt &
+                                / (cpTempFit(nnt)%exponents(ii) + 1)
+                        end if
 
-                if(cpTempFit(nnt)%exponents(ii) == -1_intType) then
-                   h = h + cpTempFit(nnt)%constants(ii)*log(Tt)
-                else
-                   h = h + cpTempFit(nnt)%constants(ii)*t2*Tt &
-                        / (cpTempFit(nnt)%exponents(ii) + 1)
-                endif
+                    end do
 
-             enddo
+                    ! Compute the update and the new total temperature.
 
-             ! Compute the update and the new total temperature.
+                    dT = (htot - h) / cp
+                    Tt = Tt + dT
 
-             dT = (htot - h)/cp
-             Tt = Tt + dT
+                    ! Exit the newton loop if the update is smaller than the
+                    ! threshold value.
 
-             ! Exit the newton loop if the update is smaller than the
-             ! threshold value.
+                    if (abs(dT) < dTStop) exit
 
-             if(abs(dT) < dTStop) exit
+                end do newton
 
-          enddo newton
+            end if
 
-       endif
+            ! To compute the total pressure, the integral of cp/(r*T)
+            ! must be computed from T = T to T = Tt. Compute the integrand
+            ! at T = Tt; take care of the exceptional situations.
 
-       ! To compute the total pressure, the integral of cp/(r*T)
-       ! must be computed from T = T to T = Tt. Compute the integrand
-       ! at T = Tt; take care of the exceptional situations.
+            if (Tt <= cpTrange(0)) then
+                intCportTt = (cv0 + one) * log(Tt)
+            else if (Tt >= cpTrange(cpNparts)) then
+                intCportTt = (cvn + one) * log(Tt)
+            else
 
-       if(Tt <= cpTrange(0)) then
-          intCportTt = (cv0+one)*log(Tt)
-       else if( Tt >= cpTrange(cpNparts)) then
-          intCportTt = (cvn+one)*log(Tt)
-       else
-
-          intCportTt = zero
-          do ii=1,cpTempFit(nnt)%nterm
-             if(cpTempFit(nnt)%exponents(ii) == 0_intType) then
-                intCportTt = intCportTt &
-                     + cpTempFit(nnt)%constants(ii)*log(Tt)
-             else
-                mm = cpTempFit(nnt)%exponents(ii)
-                T2 = Tt**mm
-                intCportTt = intCportTt &
-                     + cpTempFit(nnt)%constants(ii)*T2/mm
-             endif
-          enddo
+                intCportTt = zero
+                do ii = 1, cpTempFit(nnt)%nterm
+                    if (cpTempFit(nnt)%exponents(ii) == 0_intType) then
+                        intCportTt = intCportTt &
+                                     + cpTempFit(nnt)%constants(ii) * log(Tt)
+                    else
+                        mm = cpTempFit(nnt)%exponents(ii)
+                        T2 = Tt**mm
+                        intCportTt = intCportTt &
+                                     + cpTempFit(nnt)%constants(ii) * T2 / mm
+                    end if
+                end do
 
-       endif
+            end if
 
-       ! Compute the integral of cp/(r*T) from T to Tt. First
-       ! substract the lower boundary from the upper boundary.
+            ! Compute the integral of cp/(r*T) from T to Tt. First
+            ! substract the lower boundary from the upper boundary.
 
-       intCport = intCportTt - intCportT
+            intCport = intCportTt - intCportT
 
-       ! Add the contributions from the possible internal curve fit
-       ! boundaries if Tt and T are in different curve fit intervals.
+            ! Add the contributions from the possible internal curve fit
+            ! boundaries if Tt and T are in different curve fit intervals.
 
-       do mm=(nn+1),nnt
-          ii = mm - 1
+            do mm = (nn + 1), nnt
+                ii = mm - 1
 
-          if(ii == 0_intType) then
-             intCport = intCport + (cv0+one)*log(cpTrange(0))
-          else
-             intCport = intCport + cpTempFit(ii)%intCpovrt_2
-          endif
+                if (ii == 0_intType) then
+                    intCport = intCport + (cv0 + one) * log(cpTrange(0))
+                else
+                    intCport = intCport + cpTempFit(ii)%intCpovrt_2
+                end if
 
-          if(mm > cpNparts) then
-             intCport = intCport - (cvn+one)*log(cpTrange(cpNparts))
-          else
-             intCport = intCport - cpTempFit(mm)%intCpovrt_1
-          endif
-       enddo
+                if (mm > cpNparts) then
+                    intCport = intCport - (cvn + one) * log(cpTrange(cpNparts))
+                else
+                    intCport = intCport - cpTempFit(mm)%intCpovrt_1
+                end if
+            end do
 
-       ! And finally, compute the total pressure.
+            ! And finally, compute the total pressure.
 
-       ptot = p*exp(intCport)
+            ptot = p * exp(intCport)
 #endif
-    end select
-
-  end subroutine computePtot
-
-  subroutine computeSpeedOfSoundSquared
-
-    !
-    !       computeSpeedOfSoundSquared does what it says.
-    !
-    use constants
-    use blockPointers, only : ie, je, ke, w, p, aa, gamma
-    use utils, only : getCorrectForK
-    implicit none
-    !
-    !      Local variables.
-    !
-    real(kind=realType), PARAMETER :: twothird=two*third
-    integer(kind=intType) :: i, j, k, ii
-    real(kind=realType) :: pp
-    logical :: correctForK
-
-    ! Determine if we need to correct for K
-    correctForK = getCorrectForK()
-
-    if (correctForK) then
+        end select
+
+    end subroutine computePtot
+
+    subroutine computeSpeedOfSoundSquared
+
+        !
+        !       computeSpeedOfSoundSquared does what it says.
+        !
+        use constants
+        use blockPointers, only: ie, je, ke, w, p, aa, gamma
+        use utils, only: getCorrectForK
+        implicit none
+        !
+        !      Local variables.
+        !
+        real(kind=realType), PARAMETER :: twothird = two * third
+        integer(kind=intType) :: i, j, k, ii
+        real(kind=realType) :: pp
+        logical :: correctForK
+
+        ! Determine if we need to correct for K
+        correctForK = getCorrectForK()
+
+        if (correctForK) then
 #ifdef TAPENADE_REVERSE
-       !$AD II-LOOP
-       do ii=0,ie*je*ke - 1
-          i = mod(ii, ie) + 1
-          j = mod(ii/ie, je) + 1
-          k = ii/(ie*je) + 1
+            !$AD II-LOOP
+            do ii = 0, ie * je * ke - 1
+                i = mod(ii, ie) + 1
+                j = mod(ii / ie, je) + 1
+                k = ii / (ie * je) + 1
 #else
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
 #endif
-                   pp = p(i,j,k) - twoThird*w(i,j,k,irho)*w(i,j,k,itu1)
-                   aa(i,j,k) = gamma(i,j,k)*pp/w(i,j,k,irho)
+                            pp = p(i, j, k) - twoThird * w(i, j, k, irho) * w(i, j, k, itu1)
+                            aa(i, j, k) = gamma(i, j, k) * pp / w(i, j, k, irho)
 #ifdef TAPENADE_REVERSE
-                end do
+                        end do
 #else
-             enddo
-          enddo
-       enddo
+                    end do
+                end do
+            end do
 #endif
-    else
+        else
 #ifdef TAPENADE_REVERSE
-       !$AD II-LOOP
-       do ii=0,ie*je*ke - 1
-          i = mod(ii, ie) + 1
-          j = mod(ii/ie, je) + 1
-          k = ii/(ie*je) + 1
+            !$AD II-LOOP
+            do ii = 0, ie * je * ke - 1
+                i = mod(ii, ie) + 1
+                j = mod(ii / ie, je) + 1
+                k = ii / (ie * je) + 1
 #else
-          do k=1,ke
-             do j=1,je
-                do i=1,ie
+                do k = 1, ke
+                    do j = 1, je
+                        do i = 1, ie
 #endif
-                   aa(i,j,k) = gamma(i,j,k)*p(i,j,k)/w(i,j,k,irho)
+                            aa(i, j, k) = gamma(i, j, k) * p(i, j, k) / w(i, j, k, irho)
 #ifdef TAPENADE_REVERSE
-                end do
+                        end do
 #else
-             enddo
-          enddo
-       enddo
+                    end do
+                end do
+            end do
 #endif
-    end if
-  end subroutine computeSpeedOfSoundSquared
- subroutine computeEtotBlock(iStart,iEnd, jStart,jEnd, kStart, kEnd, &
-       correctForK)
-    !
-    !       ComputeEtot computes the total energy from the given density,
-    !       velocity and presssure. For a calorically and thermally
-    !       perfect gas the well-known expression is used; for only a
-    !       thermally perfect gas, cp is a function of temperature, curve
-    !       fits are used and a more complex expression is obtained.
-    !       It is assumed that the pointers in blockPointers already
-    !       point to the correct block.
-    !
-    use constants
-    use blockPointers, only : w, p
-    use flowVarRefState, only : RGas, Tref
-    use inputPhysics, only : cpModel, gammaConstant
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: iStart,iEnd, jStart,jEnd
-    integer(kind=intType), intent(in) :: kStart, kEnd
-    logical,               intent(in) :: correctForK
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ii, iSize, jSize, kSize
-    real(kind=realType)   :: ovgm1, factK, scale
-    !
-    ! Determine the cp model used in the computation.
-
-    select case (cpModel)
-
-    case (cpConstant)
-
-       ! Constant cp and thus constant gamma.
-       ! Abbreviate 1/(gamma -1) a bit easier.
-
-       ovgm1 = one/(gammaConstant - one)
-
-       ! Loop over the given range of the block and compute the first
-       ! step of the energy.
-
-    iSize = (iEnd-iStart)+1
-    jSize = (jEnd-jStart)+1
-    kSize = (kEnd-kStart)+1
-    factK = ovgm1*(five*third - gammaConstant)
-
-if (correctForK) then
+        end if
+    end subroutine computeSpeedOfSoundSquared
+    subroutine computeEtotBlock(iStart, iEnd, jStart, jEnd, kStart, kEnd, &
+                                correctForK)
+        !
+        !       ComputeEtot computes the total energy from the given density,
+        !       velocity and presssure. For a calorically and thermally
+        !       perfect gas the well-known expression is used; for only a
+        !       thermally perfect gas, cp is a function of temperature, curve
+        !       fits are used and a more complex expression is obtained.
+        !       It is assumed that the pointers in blockPointers already
+        !       point to the correct block.
+        !
+        use constants
+        use blockPointers, only: w, p
+        use flowVarRefState, only: RGas, Tref
+        use inputPhysics, only: cpModel, gammaConstant
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: iStart, iEnd, jStart, jEnd
+        integer(kind=intType), intent(in) :: kStart, kEnd
+        logical, intent(in) :: correctForK
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii, iSize, jSize, kSize
+        real(kind=realType) :: ovgm1, factK, scale
+        !
+        ! Determine the cp model used in the computation.
+
+        select case (cpModel)
+
+        case (cpConstant)
+
+            ! Constant cp and thus constant gamma.
+            ! Abbreviate 1/(gamma -1) a bit easier.
+
+            ovgm1 = one / (gammaConstant - one)
+
+            ! Loop over the given range of the block and compute the first
+            ! step of the energy.
+
+            iSize = (iEnd - iStart) + 1
+            jSize = (jEnd - jStart) + 1
+            kSize = (kEnd - kStart) + 1
+            factK = ovgm1 * (five * third - gammaConstant)
+
+            if (correctForK) then
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0, iSize*jSize*kSize-1
-       i = mod(ii, iSize) + iStart
-       j = mod(ii/(iSize), jSize) + jStart
-       k = ii/((iSize*jSize)) + kStart
+                !$AD II-LOOP
+                do ii = 0, iSize * jSize * kSize - 1
+                    i = mod(ii, iSize) + iStart
+                    j = mod(ii / (iSize), jSize) + jStart
+                    k = ii / ((iSize * jSize)) + kStart
 #else
-       do k=kStart, kEnd
-          do j=jStart, jEnd
-             do i=iStart, iEnd
+                    do k = kStart, kEnd
+                        do j = jStart, jEnd
+                            do i = iStart, iEnd
 #endif
-                w(i,j,k,irhoE) = ovgm1*p(i,j,k) &
-                     + half*w(i,j,k,irho)*(w(i,j,k,ivx)**2 &
-                     +                     w(i,j,k,ivy)**2 &
-                     +                     w(i,j,k,ivz)**2) - &
-                     factK*w(i,j,k,irho)*w(i,j,k,itu1)
+                                w(i, j, k, irhoE) = ovgm1 * p(i, j, k) &
+                                                    + half * w(i, j, k, irho) * (w(i, j, k, ivx)**2 &
+                                                                                 + w(i, j, k, ivy)**2 &
+                                                                                 + w(i, j, k, ivz)**2) - &
+                                                    factK * w(i, j, k, irho) * w(i, j, k, itu1)
 
 #ifdef TAPENADE_REVERSE
-             end do
+                            end do
 #else
-          end do
-       end do
-    end do
+                        end do
+                    end do
+                end do
 #endif
 
- else
+            else
 #ifdef TAPENADE_REVERSE
-    !$AD II-LOOP
-    do ii=0, iSize*jSize*kSize-1
-       i = mod(ii, iSize) + iStart
-       j = mod(ii/(iSize), jSize) + jStart
-       k = ii/((iSize*jSize)) + kStart
+                !$AD II-LOOP
+                do ii = 0, iSize * jSize * kSize - 1
+                    i = mod(ii, iSize) + iStart
+                    j = mod(ii / (iSize), jSize) + jStart
+                    k = ii / ((iSize * jSize)) + kStart
 #else
-       do k=kStart, kEnd
-          do j=jStart, jEnd
-             do i=iStart, iEnd
+                    do k = kStart, kEnd
+                        do j = jStart, jEnd
+                            do i = iStart, iEnd
 #endif
-                w(i,j,k,irhoE) = ovgm1*p(i,j,k) &
-                     + half*w(i,j,k,irho)*(w(i,j,k,ivx)**2 &
-                     +                     w(i,j,k,ivy)**2 &
-                     +                     w(i,j,k,ivz)**2)
+                                w(i, j, k, irhoE) = ovgm1 * p(i, j, k) &
+                                                    + half * w(i, j, k, irho) * (w(i, j, k, ivx)**2 &
+                                                                                 + w(i, j, k, ivy)**2 &
+                                                                                 + w(i, j, k, ivz)**2)
 #ifdef TAPENADE_REVERSE
-             end do
+                            end do
 #else
-          end do
-       end do
-    end do
+                        end do
+                    end do
+                end do
 #endif
- end if
-
+            end if
 
 #ifndef USE_TAPENADE
-    case (cpTempCurveFits)
+        case (cpTempCurveFits)
 
-       ! Cp as function of the temperature is given via curve fits.
+            ! Cp as function of the temperature is given via curve fits.
 
-       ! Store a scale factor to compute the nonDimensional
-       ! internal energy.
+            ! Store a scale factor to compute the nonDimensional
+            ! internal energy.
 
-       scale = RGas/Tref
+            scale = RGas / Tref
 
-       ! Loop over the given range of the block.
+            ! Loop over the given range of the block.
 
-       do k=kStart,kEnd
-          do j=jStart,jEnd
-             do i=iStart,iEnd
-                call computeEtotCellCpfit(i, j, k, scale, &
-                     correctForK)
-             enddo
-          enddo
-       enddo
+            do k = kStart, kEnd
+                do j = jStart, jEnd
+                    do i = iStart, iEnd
+                        call computeEtotCellCpfit(i, j, k, scale, &
+                                                  correctForK)
+                    end do
+                end do
+            end do
 #endif
 
-    end select
-  end subroutine computeEtotBlock
-
-  subroutine etot(rho, u, v, w, p, k, etotal, correctForK)
-    !
-    !       EtotArray computes the total energy from the given density,
-    !       velocity and presssure.
-    !       First the internal energy per unit mass is computed and after
-    !       that the kinetic energy is added as well the conversion to
-    !       energy per unit volume.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    real(kind=realType), intent(in)  :: rho, p, k
-    real(kind=realType), intent(in)  :: u, v, w
-    real(kind=realType), intent(out) :: etotal
-    logical, intent(in) :: correctForK
-
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i
-
-    ! Compute the internal energy for unit mass.
-
-    call eint(rho, p, k, etotal, correctForK)
-    etotal = rho*(etotal &
-         +         half*(u*u + v*v + w*w))
-
-  end subroutine etot
-
-  !      ==================================================================
-
-  subroutine eint(rho, p, k, einternal, correctForK)
-    !
-    !       EintArray computes the internal energy per unit mass from the
-    !       given density and pressure (and possibly turbulent energy)
-    !       For a calorically and thermally perfect gas the well-known
-    !       expression is used; for only a thermally perfect gas, cp is a
-    !       function of temperature, curve fits are used and a more
-    !       complex expression is obtained.
-    !
-    use constants
-    use cpCurveFits
-    use flowVarRefState, only : RGas, Tref
-    use inputPhysics, only : cpModel, gammaConstant
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    real(kind=realType), intent(in)  :: rho, p, k
-    real(kind=realType), intent(out) :: eInternal
-    logical, intent(in)                             :: correctForK
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: twoThird = two*third
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, nn, mm, ii, start
-
-    real(kind=realType) :: ovgm1, factK, pp, t, t2, scale
-
-    ! Determine the cp model used in the computation.
-
-    select case (cpModel)
-
-    case (cpConstant)
-
-       ! Abbreviate 1/(gamma -1) a bit easier.
-
-       ovgm1 = one/(gammaConstant - one)
-
-       ! Loop over the number of elements of the array and compute
-       ! the total energy.
-
-       eInternal = ovgm1*p/rho
-
-       ! Second step. Correct the energy in case a turbulent kinetic
-       ! energy is present.
-
-       if( correctForK ) then
-
-          factK = ovgm1*(five*third - gammaConstant)
-
-          eInternal = eInternal - factK*k
-
-       endif
+        end select
+    end subroutine computeEtotBlock
+
+    subroutine etot(rho, u, v, w, p, k, etotal, correctForK)
+        !
+        !       EtotArray computes the total energy from the given density,
+        !       velocity and presssure.
+        !       First the internal energy per unit mass is computed and after
+        !       that the kinetic energy is added as well the conversion to
+        !       energy per unit volume.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), intent(in) :: rho, p, k
+        real(kind=realType), intent(in) :: u, v, w
+        real(kind=realType), intent(out) :: etotal
+        logical, intent(in) :: correctForK
+
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i
+
+        ! Compute the internal energy for unit mass.
+
+        call eint(rho, p, k, etotal, correctForK)
+        etotal = rho * (etotal &
+                        + half * (u * u + v * v + w * w))
+
+    end subroutine etot
+
+    !      ==================================================================
+
+    subroutine eint(rho, p, k, einternal, correctForK)
+        !
+        !       EintArray computes the internal energy per unit mass from the
+        !       given density and pressure (and possibly turbulent energy)
+        !       For a calorically and thermally perfect gas the well-known
+        !       expression is used; for only a thermally perfect gas, cp is a
+        !       function of temperature, curve fits are used and a more
+        !       complex expression is obtained.
+        !
+        use constants
+        use cpCurveFits
+        use flowVarRefState, only: RGas, Tref
+        use inputPhysics, only: cpModel, gammaConstant
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        real(kind=realType), intent(in) :: rho, p, k
+        real(kind=realType), intent(out) :: eInternal
+        logical, intent(in) :: correctForK
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: twoThird = two * third
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, nn, mm, ii, start
+
+        real(kind=realType) :: ovgm1, factK, pp, t, t2, scale
+
+        ! Determine the cp model used in the computation.
+
+        select case (cpModel)
+
+        case (cpConstant)
+
+            ! Abbreviate 1/(gamma -1) a bit easier.
+
+            ovgm1 = one / (gammaConstant - one)
+
+            ! Loop over the number of elements of the array and compute
+            ! the total energy.
+
+            eInternal = ovgm1 * p / rho
+
+            ! Second step. Correct the energy in case a turbulent kinetic
+            ! energy is present.
+
+            if (correctForK) then
+
+                factK = ovgm1 * (five * third - gammaConstant)
+
+                eInternal = eInternal - factK * k
+
+            end if
 
 #ifndef USE_TAPENADE
-    case (cpTempCurveFits)
-
-       ! Cp as function of the temperature is given via curve fits.
+        case (cpTempCurveFits)
 
-       ! Store a scale factor to compute the nonDimensional
-       ! internal energy.
+            ! Cp as function of the temperature is given via curve fits.
 
-       scale = RGas/Tref
+            ! Store a scale factor to compute the nonDimensional
+            ! internal energy.
 
-       ! Loop over the number of elements of the array
+            scale = RGas / Tref
 
+            ! Loop over the number of elements of the array
 
-       ! Compute the dimensional temperature.
+            ! Compute the dimensional temperature.
 
-       pp = p
-       if( correctForK ) pp = pp - twoThird*rho*k
-       t = Tref*pp/(RGas*rho)
+            pp = p
+            if (correctForK) pp = pp - twoThird * rho * k
+            t = Tref * pp / (RGas * rho)
 
-       ! Determine the case we are having here.
+            ! Determine the case we are having here.
 
-       if(t <= cpTrange(0)) then
+            if (t <= cpTrange(0)) then
 
-          ! Temperature is less than the smallest temperature
-          ! in the curve fits. Use extrapolation using
-          ! constant cv.
+                ! Temperature is less than the smallest temperature
+                ! in the curve fits. Use extrapolation using
+                ! constant cv.
 
-          eInternal = scale*(cpEint(0) + cv0*(t - cpTrange(0)))
+                eInternal = scale * (cpEint(0) + cv0 * (t - cpTrange(0)))
 
-       else if(t >= cpTrange(cpNparts)) then
+            else if (t >= cpTrange(cpNparts)) then
 
-          ! Temperature is larger than the largest temperature
-          ! in the curve fits. Use extrapolation using
-          ! constant cv.
+                ! Temperature is larger than the largest temperature
+                ! in the curve fits. Use extrapolation using
+                ! constant cv.
 
-          eInternal = scale*(cpEint(cpNparts) &
-               +        cvn*(t - cpTrange(cpNparts)))
+                eInternal = scale * (cpEint(cpNparts) &
+                                     + cvn * (t - cpTrange(cpNparts)))
 
-       else
+            else
 
-          ! Temperature is in the curve fit range.
-          ! First find the valid range.
+                ! Temperature is in the curve fit range.
+                ! First find the valid range.
 
-          ii    = cpNparts
-          start = 1
-          interval: do
+                ii = cpNparts
+                start = 1
+                interval: do
 
-             ! Next guess for the interval.
+                    ! Next guess for the interval.
 
-             nn = start + ii/2
+                    nn = start + ii / 2
 
-             ! Determine the situation we are having here.
+                    ! Determine the situation we are having here.
 
-             if(t > cpTrange(nn)) then
+                    if (t > cpTrange(nn)) then
 
-                ! Temperature is larger than the upper boundary of
-                ! the current interval. Update the lower boundary.
+                        ! Temperature is larger than the upper boundary of
+                        ! the current interval. Update the lower boundary.
 
-                start = nn + 1
-                ii    = ii - 1
+                        start = nn + 1
+                        ii = ii - 1
 
-             else if(t >= cpTrange(nn-1)) then
+                    else if (t >= cpTrange(nn - 1)) then
 
-                ! This is the correct range. Exit the do-loop.
+                        ! This is the correct range. Exit the do-loop.
 
-                exit
+                        exit
 
-             endif
+                    end if
 
-             ! Modify ii for the next branch to search.
+                    ! Modify ii for the next branch to search.
 
-             ii = ii/2
+                    ii = ii / 2
 
-          enddo interval
+                end do interval
 
-          ! Nn contains the correct curve fit interval.
-          ! Integrate cv to compute eInternal.
+                ! Nn contains the correct curve fit interval.
+                ! Integrate cv to compute eInternal.
 
-          eInternal = cpTempFit(nn)%eint0 - t
-          do ii=1,cpTempFit(nn)%nterm
-             if(cpTempFit(nn)%exponents(ii) == -1) then
-                eInternal = eInternal &
-                     + cpTempFit(nn)%constants(ii)*log(t)
-             else
-                mm   = cpTempFit(nn)%exponents(ii) + 1
-                t2   = t**mm
-                eInternal = eInternal &
-                     + cpTempFit(nn)%constants(ii)*t2/mm
-             endif
-          enddo
+                eInternal = cpTempFit(nn)%eint0 - t
+                do ii = 1, cpTempFit(nn)%nterm
+                    if (cpTempFit(nn)%exponents(ii) == -1) then
+                        eInternal = eInternal &
+                                    + cpTempFit(nn)%constants(ii) * log(t)
+                    else
+                        mm = cpTempFit(nn)%exponents(ii) + 1
+                        t2 = t**mm
+                        eInternal = eInternal &
+                                    + cpTempFit(nn)%constants(ii) * t2 / mm
+                    end if
+                end do
 
-          eInternal = scale*eInternal
+                eInternal = scale * eInternal
 
-       endif
+            end if
 
-       ! Add the turbulent energy if needed.
+            ! Add the turbulent energy if needed.
 
-       if( correctForK ) eInternal = eInternal + k
+            if (correctForK) eInternal = eInternal + k
 
 #endif
-    end select
-
-  end subroutine eint
-
-  subroutine computePressureSimple(includeHalos)
-
-    ! Compute the pressure on a block with the pointers already set. This
-    ! routine is used by the forward mode AD code only.
-
-    use constants
-    use blockPointers
-    use flowVarRefState
-    use inputPhysics
-    implicit none
-
-    ! Input parameter
-    logical, intent(in) :: includeHalos
-
-    ! Local Variables
-    integer(kind=intType) :: i, j, k, ii
-    real(kind=realType) :: gm1, v2
-    integer(kind=intType) :: iBeg, iEnd, iSize, jBeg, jEnd, jSize, kBeg, kEnd, kSize
-    ! Compute the pressures
-    gm1 = gammaConstant - one
-
-    if (includeHalos) then
-       iBeg = 0
-       jBeg = 0
-       kBeg = 0
-       iEnd = ib
-       jEnd = jb
-       kEnd = kb
-    else
-       iBeg = 2
-       jBeg = 2
-       kBeg = 2
-       iEnd = il
-       jEnd = jl
-       kEnd = kl
-    end if
+        end select
+
+    end subroutine eint
+
+    subroutine computePressureSimple(includeHalos)
+
+        ! Compute the pressure on a block with the pointers already set. This
+        ! routine is used by the forward mode AD code only.
+
+        use constants
+        use blockPointers
+        use flowVarRefState
+        use inputPhysics
+        implicit none
+
+        ! Input parameter
+        logical, intent(in) :: includeHalos
+
+        ! Local Variables
+        integer(kind=intType) :: i, j, k, ii
+        real(kind=realType) :: gm1, v2
+        integer(kind=intType) :: iBeg, iEnd, iSize, jBeg, jEnd, jSize, kBeg, kEnd, kSize
+        ! Compute the pressures
+        gm1 = gammaConstant - one
+
+        if (includeHalos) then
+            iBeg = 0
+            jBeg = 0
+            kBeg = 0
+            iEnd = ib
+            jEnd = jb
+            kEnd = kb
+        else
+            iBeg = 2
+            jBeg = 2
+            kBeg = 2
+            iEnd = il
+            jEnd = jl
+            kEnd = kl
+        end if
 
 #ifdef TAPENADE_REVERSE
-    iSize = (iEnd-iBeg)+1
-    jSize = (jEnd-jBeg)+1
-    kSize = (kEnd-kBeg)+1
-
-    !$AD II-LOOP
-    do ii=0, iSize*jSize*kSize-1
-       i = mod(ii, iSize) + iBeg
-       j = mod(ii/(iSize), jSize) + jBeg
-       k = ii/((iSize*jSize)) + kBeg
+        iSize = (iEnd - iBeg) + 1
+        jSize = (jEnd - jBeg) + 1
+        kSize = (kEnd - kBeg) + 1
+
+        !$AD II-LOOP
+        do ii = 0, iSize * jSize * kSize - 1
+            i = mod(ii, iSize) + iBeg
+            j = mod(ii / (iSize), jSize) + jBeg
+            k = ii / ((iSize * jSize)) + kBeg
 #else
-       do k=kBeg, kEnd
-          do j=jBeg, jEnd
-             do i=iBeg, iEnd
+            do k = kBeg, kEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
 #endif
-                v2 = w(i, j, k, ivx)**2 + w(i, j, k, ivy)**2 + w(i, j, k, ivz)**2
-                p(i, j, k) = gm1*(w(i, j, k, irhoE) - half*w( i, j, k, irho)*v2)
-                p(i, j, k) = max(p(i, j, k), 1.e-4_realType*pInfCorr)
+                        v2 = w(i, j, k, ivx)**2 + w(i, j, k, ivy)**2 + w(i, j, k, ivz)**2
+                        p(i, j, k) = gm1 * (w(i, j, k, irhoE) - half * w(i, j, k, irho) * v2)
+                        p(i, j, k) = max(p(i, j, k), 1.e-4_realType * pInfCorr)
 
 #ifdef TAPENADE_REVERSE
-             end do
+                    end do
 #else
-          end do
-       end do
-    end do
+                end do
+            end do
+        end do
 #endif
-  end subroutine computePressureSimple
-
-  subroutine computePressure(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd, &
-       pointerOffset)
-    !
-    !       computePressure computes the pressure from the total energy,
-    !       density and velocities in the given cell range of the block to
-    !       which the pointers in blockPointers currently point.
-    !       It is possible to specify a possible pointer offset, because
-    !       this routine is also used when reading a restart file.
-    !
-    use constants
-    use inputPhysics, only : cpModel, gammaConstant
-    use blockPointers, only : w, p
-    use flowVarRefState, only : kPresent, Pinf, RGas, TRef
-    use cpCurveFits
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: iBeg,iEnd,jBeg,jEnd,kBeg,kEnd
-    integer(kind=intType), intent(in) :: pointerOffset
-    !
-    !      Local parameters.
-    !
-    real(kind=realType), parameter :: dTStop  = 0.01_realType
-    real(kind=realType), parameter :: twothird = two*third
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: i, j, k, ip, jp, kp, nn, ii, start
-
-    real(kind=realType) :: gm1, factK, v2, scale, e0, e
-    real(kind=realType) :: TRefInv, T, dT, T2, alp, cv
-
-    ! Determine the cp model used in the computation.
-
-    select case (cpModel)
-
-    case (cpConstant)
-
-       ! Constant cp and thus constant gamma. The relation
-       ! eint = cv*T can be used and consequently the standard
-       ! relation between pressure and internal energy is valid.
-
-       ! Abbreviate some constants that occur in the pressure
-       ! computation.
-
-       gm1   = gammaConstant - one
-       factK = five*third - gammaConstant
-
-       ! Loop over the cells. Take the possible pointer
-       ! offset into account and store the pressure in the
-       ! position w(:,:,:, irhoE).
-
-       do k=kBeg,kEnd
-          kp = k + pointerOffset
-          do j=jBeg,jEnd
-             jp = j + pointerOffset
-             do i=iBeg,iEnd
-                ip = i + pointerOffset
-
-                v2 = w(ip,jp,kp,ivx)**2 + w(ip,jp,kp,ivy)**2 &
-                     + w(ip,jp,kp,ivz)**2
-                w(i,j,k,irhoE) = gm1*(w(ip,jp,kp,irhoE) &
-                     - half*w(ip,jp,kp,irho)*v2)
-                w(i,j,k,irhoE) = max(w(i,j,k,irhoE), &
-                     1.e-5_realType*pInf)
-             enddo
-          enddo
-       enddo
-
-       ! Correct p if a k-equation is present.
-
-       if( kPresent ) then
-          do k=kBeg,kEnd
-             kp = k + pointerOffset
-             do j=jBeg,jEnd
-                jp = j + pointerOffset
-                do i=iBeg,iEnd
-                   ip = i + pointerOffset
-
-                   w(i,j,k,irhoE) = w(i,j,k,irhoE) &
-                        + factK*w(ip,jp,kp,irho) &
-                        * w(ip,jp,kp,itu1)
-                enddo
-             enddo
-          enddo
-       endif
-
-       !        ================================================================
-
-    case (cpTempCurveFits)
-
-       ! Cp as function of the temperature is given via curve fits.
-
-       ! Store a scale factor when converting the nonDimensional
-       ! energy to the units of cpEint
-
-       TRefInv = one/TRef
-       scale   = TRef/RGas
-
-       ! Loop over the cells to compute the internal energy per
-       ! unit mass. This is stored in w(:,:,:,irhoE) for the moment.
-
-       do k=kBeg,kEnd
-          kp = k + pointerOffset
-          do j=jBeg,jEnd
-             jp = j + pointerOffset
-             do i=iBeg,iEnd
-                ip = i + pointerOffset
-
-                w(i,j,k,irhoE) = w(ip,jp,kp,irhoE)/w(ip,jp,kp,irho)
-                if( kPresent ) &
-                     w(i,j,k,irhoE) = w(i,j,k,irhoE) - w(ip,jp,kp,itu1)
-
-                v2 = w(ip,jp,kp,ivx)**2 + w(ip,jp,kp,ivy)**2 &
-                     + w(ip,jp,kp,ivz)**2
-                w(i,j,k,irhoE) = w(i,j,k,irhoE) - half*v2
-             enddo
-          enddo
-       enddo
+    end subroutine computePressureSimple
+
+    subroutine computePressure(iBeg, iEnd, jBeg, jEnd, kBeg, kEnd, &
+                               pointerOffset)
+        !
+        !       computePressure computes the pressure from the total energy,
+        !       density and velocities in the given cell range of the block to
+        !       which the pointers in blockPointers currently point.
+        !       It is possible to specify a possible pointer offset, because
+        !       this routine is also used when reading a restart file.
+        !
+        use constants
+        use inputPhysics, only: cpModel, gammaConstant
+        use blockPointers, only: w, p
+        use flowVarRefState, only: kPresent, Pinf, RGas, TRef
+        use cpCurveFits
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+        integer(kind=intType), intent(in) :: pointerOffset
+        !
+        !      Local parameters.
+        !
+        real(kind=realType), parameter :: dTStop = 0.01_realType
+        real(kind=realType), parameter :: twothird = two * third
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ip, jp, kp, nn, ii, start
+
+        real(kind=realType) :: gm1, factK, v2, scale, e0, e
+        real(kind=realType) :: TRefInv, T, dT, T2, alp, cv
+
+        ! Determine the cp model used in the computation.
+
+        select case (cpModel)
+
+        case (cpConstant)
+
+            ! Constant cp and thus constant gamma. The relation
+            ! eint = cv*T can be used and consequently the standard
+            ! relation between pressure and internal energy is valid.
+
+            ! Abbreviate some constants that occur in the pressure
+            ! computation.
+
+            gm1 = gammaConstant - one
+            factK = five * third - gammaConstant
+
+            ! Loop over the cells. Take the possible pointer
+            ! offset into account and store the pressure in the
+            ! position w(:,:,:, irhoE).
+
+            do k = kBeg, kEnd
+                kp = k + pointerOffset
+                do j = jBeg, jEnd
+                    jp = j + pointerOffset
+                    do i = iBeg, iEnd
+                        ip = i + pointerOffset
+
+                        v2 = w(ip, jp, kp, ivx)**2 + w(ip, jp, kp, ivy)**2 &
+                             + w(ip, jp, kp, ivz)**2
+                        w(i, j, k, irhoE) = gm1 * (w(ip, jp, kp, irhoE) &
+                                                   - half * w(ip, jp, kp, irho) * v2)
+                        w(i, j, k, irhoE) = max(w(i, j, k, irhoE), &
+                                                1.e-5_realType * pInf)
+                    end do
+                end do
+            end do
+
+            ! Correct p if a k-equation is present.
+
+            if (kPresent) then
+                do k = kBeg, kEnd
+                    kp = k + pointerOffset
+                    do j = jBeg, jEnd
+                        jp = j + pointerOffset
+                        do i = iBeg, iEnd
+                            ip = i + pointerOffset
+
+                            w(i, j, k, irhoE) = w(i, j, k, irhoE) &
+                                                + factK * w(ip, jp, kp, irho) &
+                                                * w(ip, jp, kp, itu1)
+                        end do
+                    end do
+                end do
+            end if
 
-       ! Newton algorithm to compute the temperature from the known
-       ! value of the internal energy.
+            !        ================================================================
 
-       do k=kBeg,kEnd
-          kp = k + pointerOffset
-          do j=jBeg,jEnd
-             jp = j + pointerOffset
-             do i=iBeg,iEnd
-                ip = i + pointerOffset
+        case (cpTempCurveFits)
 
-                ! Store the internal energy in the same dimensional
-                ! units as cpEint.
+            ! Cp as function of the temperature is given via curve fits.
 
-                e0 = scale*w(i,j,k,irhoE)
+            ! Store a scale factor when converting the nonDimensional
+            ! energy to the units of cpEint
 
-                ! Take care of the exceptional cases.
+            TRefInv = one / TRef
+            scale = TRef / RGas
 
-                if(e0 <= cpEint(0)) then
+            ! Loop over the cells to compute the internal energy per
+            ! unit mass. This is stored in w(:,:,:,irhoE) for the moment.
 
-                   ! Energy smaller than the lowest value of the curve
-                   ! fit. Use extrapolation using constant cv.
+            do k = kBeg, kEnd
+                kp = k + pointerOffset
+                do j = jBeg, jEnd
+                    jp = j + pointerOffset
+                    do i = iBeg, iEnd
+                        ip = i + pointerOffset
 
-                   T = TRefInv*(cpTrange(0) + (e0 - cpEint(0))/cv0)
+                        w(i, j, k, irhoE) = w(ip, jp, kp, irhoE) / w(ip, jp, kp, irho)
+                        if (kPresent) &
+                            w(i, j, k, irhoE) = w(i, j, k, irhoE) - w(ip, jp, kp, itu1)
 
-                else if(e0 >= cpEint(cpNparts)) then
+                        v2 = w(ip, jp, kp, ivx)**2 + w(ip, jp, kp, ivy)**2 &
+                             + w(ip, jp, kp, ivz)**2
+                        w(i, j, k, irhoE) = w(i, j, k, irhoE) - half * v2
+                    end do
+                end do
+            end do
 
-                   ! Energy larger than the largest value of the curve
-                   ! fit. Use extrapolation using constant cv.
+            ! Newton algorithm to compute the temperature from the known
+            ! value of the internal energy.
 
-                   T = TRefInv*(cpTrange(cpNparts) &
-                        +         (e0 - cpEint(cpNparts))/cvn)
+            do k = kBeg, kEnd
+                kp = k + pointerOffset
+                do j = jBeg, jEnd
+                    jp = j + pointerOffset
+                    do i = iBeg, iEnd
+                        ip = i + pointerOffset
 
-                else
+                        ! Store the internal energy in the same dimensional
+                        ! units as cpEint.
+
+                        e0 = scale * w(i, j, k, irhoE)
+
+                        ! Take care of the exceptional cases.
+
+                        if (e0 <= cpEint(0)) then
 
-                   ! The value is in the range of the curve fits.
-                   ! A Newton algorithm is used to find the temperature.
+                            ! Energy smaller than the lowest value of the curve
+                            ! fit. Use extrapolation using constant cv.
 
-                   ! First find the curve fit interval to be searched.
+                            T = TRefInv * (cpTrange(0) + (e0 - cpEint(0)) / cv0)
 
-                   ii    = cpNparts
-                   start = 1
-                   interval: do
+                        else if (e0 >= cpEint(cpNparts)) then
 
-                      ! Next guess for the interval.
+                            ! Energy larger than the largest value of the curve
+                            ! fit. Use extrapolation using constant cv.
 
-                      nn = start + ii/2
+                            T = TRefInv * (cpTrange(cpNparts) &
+                                           + (e0 - cpEint(cpNparts)) / cvn)
 
-                      ! Determine the situation we are having here.
+                        else
 
-                      if(e0 > cpEint(nn)) then
+                            ! The value is in the range of the curve fits.
+                            ! A Newton algorithm is used to find the temperature.
 
-                         ! Energy is larger than the upper boundary of
-                         ! the current interval. Update the lower
-                         ! boundary.
+                            ! First find the curve fit interval to be searched.
 
-                         start = nn + 1
-                         ii    = ii - 1
+                            ii = cpNparts
+                            start = 1
+                            interval: do
 
-                      else if(e0 >= cpEint(nn-1)) then
+                                ! Next guess for the interval.
 
-                         ! This is the correct range. Exit the do-loop.
+                                nn = start + ii / 2
 
-                         exit
+                                ! Determine the situation we are having here.
 
-                      endif
+                                if (e0 > cpEint(nn)) then
 
-                      ! Modify ii for the next branch to search.
+                                    ! Energy is larger than the upper boundary of
+                                    ! the current interval. Update the lower
+                                    ! boundary.
 
-                      ii = ii/2
+                                    start = nn + 1
+                                    ii = ii - 1
 
-                   enddo interval
+                                else if (e0 >= cpEint(nn - 1)) then
 
-                   ! nn contains the range in which the Newton algorithm
-                   ! must be applied.
+                                    ! This is the correct range. Exit the do-loop.
 
-                   ! Initial guess of the dimensional temperature.
+                                    exit
 
-                   alp = (cpEint(nn) - e0)/(cpEint(nn) - cpEint(nn-1))
-                   T   = alp*cpTrange(nn-1) + (one-alp)*cpTrange(nn)
+                                end if
 
-                   ! The actual Newton algorithm to compute the
-                   ! temperature.
+                                ! Modify ii for the next branch to search.
 
-                   Newton: do
+                                ii = ii / 2
 
-                      ! Compute the internal energy as well as the
-                      ! value of cv/r for the given temperature.
+                            end do interval
 
-                      cv = -one                      ! cv/r = cp/r - 1.0
-                      e  =  cpTempFit(nn)%eint0 - T  ! e = integral of cv,
-                      ! Not of cp.
-                      do ii=1,cpTempFit(nn)%nterm
+                            ! nn contains the range in which the Newton algorithm
+                            ! must be applied.
 
-                         ! Update cv.
+                            ! Initial guess of the dimensional temperature.
 
-                         T2 = T**(cpTempFit(nn)%exponents(ii))
-                         cv = cv + cpTempFit(nn)%constants(ii)*T2
+                            alp = (cpEint(nn) - e0) / (cpEint(nn) - cpEint(nn - 1))
+                            T = alp * cpTrange(nn - 1) + (one - alp) * cpTrange(nn)
 
-                         ! Update e, for which this contribution must be
-                         ! integrated. Take the exceptional case that the
-                         ! exponent == -1 into account.
+                            ! The actual Newton algorithm to compute the
+                            ! temperature.
 
-                         if(cpTempFit(nn)%exponents(ii) == -1_intType) then
-                            e = e + cpTempFit(nn)%constants(ii)*log(T)
-                         else
-                            e = e + cpTempFit(nn)%constants(ii)*T2*T &
-                                 / (cpTempFit(nn)%exponents(ii) + 1)
-                         endif
+                            Newton: do
 
-                      enddo
+                                ! Compute the internal energy as well as the
+                                ! value of cv/r for the given temperature.
 
-                      ! Compute the update and the new temperature.
+                                cv = -one                      ! cv/r = cp/r - 1.0
+                                e = cpTempFit(nn)%eint0 - T  ! e = integral of cv,
+                                ! Not of cp.
+                                do ii = 1, cpTempFit(nn)%nterm
 
-                      dT = (e0 - e)/cv
-                      T  = T + dT
+                                    ! Update cv.
 
-                      ! Exit the Newton loop if the update is smaller
-                      ! than the threshold value.
+                                    T2 = T**(cpTempFit(nn)%exponents(ii))
+                                    cv = cv + cpTempFit(nn)%constants(ii) * T2
 
-                      if(abs(dT) < dTStop) exit
+                                    ! Update e, for which this contribution must be
+                                    ! integrated. Take the exceptional case that the
+                                    ! exponent == -1 into account.
 
-                   enddo Newton
+                                    if (cpTempFit(nn)%exponents(ii) == -1_intType) then
+                                        e = e + cpTempFit(nn)%constants(ii) * log(T)
+                                    else
+                                        e = e + cpTempFit(nn)%constants(ii) * T2 * T &
+                                            / (cpTempFit(nn)%exponents(ii) + 1)
+                                    end if
 
-                   ! Create the nonDimensional temperature.
+                                end do
 
-                   T = T*TRefInv
+                                ! Compute the update and the new temperature.
 
-                endif
+                                dT = (e0 - e) / cv
+                                T = T + dT
 
-                ! Compute the pressure from the known temperature
-                ! and density. Include the correction if a k-equation
-                ! is present.
+                                ! Exit the Newton loop if the update is smaller
+                                ! than the threshold value.
 
-                w(i,j,k,irhoE) = w(ip,jp,kp,irho)*RGas*T
-                w(i,j,k,irhoE) = max(w(i,j,k,irhoE), &
-                     1.e-5_realType*pInf)
-                if( kPresent ) &
-                     w(i,j,k,irhoE) = w(i,j,k,irhoE) &
-                     + twothird*w(ip,jp,kp,irho) &
-                     * w(ip,jp,kp,itu1)
-             enddo
-          enddo
-       enddo
+                                if (abs(dT) < dTStop) exit
 
-    end select
+                            end do Newton
 
-  end subroutine computePressure
+                            ! Create the nonDimensional temperature.
 
-subroutine computeLamViscosity(includeHalos)
-  !
-  !       computeLamViscosity computes the laminar viscosity ratio in
-  !       the owned cell centers of the given block. Sutherland's law is
-  !       used. It is assumed that the pointes already point to the
-  !       correct block before entering this subroutine.
-  !
-  use blockPointers
-  use constants
-  use flowVarRefState
-  use inputPhysics
-  use iteration
-  use utils, only : getCorrectForK
-  implicit none
+                            T = T * TRefInv
 
-  ! input variables
-  logical, intent(in) :: includeHalos
-  !
-  !      Local parameter.
-  !
-  real(kind=realType), parameter :: twoThird = two*third
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i, j, k, ii
-  real(kind=realType)   :: muSuth, TSuth, SSuth, T, pp
-  logical               :: correctForK
-  integer(kind=intType) :: iBeg, iEnd, iSize, jBeg, jEnd, jSize, kBeg, kEnd, kSize
-
-  ! Return immediately if no laminar viscosity needs to be computed.
-
-  if(.not. viscous ) return
-
-  ! Determine whether or not the pressure must be corrected
-  ! for the presence of the turbulent kinetic energy.
-
-  correctForK = getCorrectForK()
-
-  ! Compute the nonDimensional constants in sutherland's law.
-
-  muSuth = muSuthDim/muRef
-  TSuth  = TSuthDim/Tref
-  SSuth  = SSuthDim/Tref
-
-    if (includeHalos) then
-       iBeg = 1
-       jBeg = 1
-       kBeg = 1
-       iEnd = ie
-       jEnd = je
-       kEnd = ke
-    else
-       iBeg = 2
-       jBeg = 2
-       kBeg = 2
-       iEnd = il
-       jEnd = jl
-       kEnd = kl
-    end if
-
-  ! Substract 2/3 rho k, which is a part of the normal turbulent
-  ! stresses, in case the pressure must be corrected.
-
-  if( correctForK ) then
+                        end if
+
+                        ! Compute the pressure from the known temperature
+                        ! and density. Include the correction if a k-equation
+                        ! is present.
+
+                        w(i, j, k, irhoE) = w(ip, jp, kp, irho) * RGas * T
+                        w(i, j, k, irhoE) = max(w(i, j, k, irhoE), &
+                                                1.e-5_realType * pInf)
+                        if (kPresent) &
+                            w(i, j, k, irhoE) = w(i, j, k, irhoE) &
+                                                + twothird * w(ip, jp, kp, irho) &
+                                                * w(ip, jp, kp, itu1)
+                    end do
+                end do
+            end do
+
+        end select
+
+    end subroutine computePressure
+
+    subroutine computeLamViscosity(includeHalos)
+        !
+        !       computeLamViscosity computes the laminar viscosity ratio in
+        !       the owned cell centers of the given block. Sutherland's law is
+        !       used. It is assumed that the pointes already point to the
+        !       correct block before entering this subroutine.
+        !
+        use blockPointers
+        use constants
+        use flowVarRefState
+        use inputPhysics
+        use iteration
+        use utils, only: getCorrectForK
+        implicit none
+
+        ! input variables
+        logical, intent(in) :: includeHalos
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: twoThird = two * third
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: i, j, k, ii
+        real(kind=realType) :: muSuth, TSuth, SSuth, T, pp
+        logical :: correctForK
+        integer(kind=intType) :: iBeg, iEnd, iSize, jBeg, jEnd, jSize, kBeg, kEnd, kSize
+
+        ! Return immediately if no laminar viscosity needs to be computed.
+
+        if (.not. viscous) return
+
+        ! Determine whether or not the pressure must be corrected
+        ! for the presence of the turbulent kinetic energy.
+
+        correctForK = getCorrectForK()
+
+        ! Compute the nonDimensional constants in sutherland's law.
+
+        muSuth = muSuthDim / muRef
+        TSuth = TSuthDim / Tref
+        SSuth = SSuthDim / Tref
+
+        if (includeHalos) then
+            iBeg = 1
+            jBeg = 1
+            kBeg = 1
+            iEnd = ie
+            jEnd = je
+            kEnd = ke
+        else
+            iBeg = 2
+            jBeg = 2
+            kBeg = 2
+            iEnd = il
+            jEnd = jl
+            kEnd = kl
+        end if
+
+        ! Substract 2/3 rho k, which is a part of the normal turbulent
+        ! stresses, in case the pressure must be corrected.
+
+        if (correctForK) then
 #ifdef TAPENADE_REVERSE
-     iSize = (iEnd-iBeg)+1
-     jSize = (jEnd-jBeg)+1
-     kSize = (kEnd-kBeg)+1
-
-     !$AD II-LOOP
-     do ii=0, iSize*jSize*kSize-1
-        i = mod(ii, iSize) + iBeg
-        j = mod(ii/(iSize), jSize) + jBeg
-        k = ii/((iSize*jSize)) + kBeg
+            iSize = (iEnd - iBeg) + 1
+            jSize = (jEnd - jBeg) + 1
+            kSize = (kEnd - kBeg) + 1
+
+            !$AD II-LOOP
+            do ii = 0, iSize * jSize * kSize - 1
+                i = mod(ii, iSize) + iBeg
+                j = mod(ii / (iSize), jSize) + jBeg
+                k = ii / ((iSize * jSize)) + kBeg
 #else
-        do k=kBeg, kEnd
-           do j=jBeg, jEnd
-              do i=iBeg, iEnd
+                do k = kBeg, kEnd
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
 #endif
-                 pp = p(i,j,k) - twoThird*w(i,j,k,irho)*w(i,j,k,itu1)
-                 T = pp/(RGas*w(i,j,k,irho))
-                 rlv(i,j,k) = muSuth*((TSuth + SSuth)/(T + SSuth)) &
-                      * ((T/TSuth)**1.5_realType)
+                            pp = p(i, j, k) - twoThird * w(i, j, k, irho) * w(i, j, k, itu1)
+                            T = pp / (RGas * w(i, j, k, irho))
+                            rlv(i, j, k) = muSuth * ((TSuth + SSuth) / (T + SSuth)) &
+                                           * ((T / TSuth)**1.5_realType)
 #ifdef TAPENADE_REVERSE
-              end do
+                        end do
 #else
-           enddo
-        enddo
-     enddo
+                    end do
+                end do
+            end do
 #endif
-  else
-     ! Loop over the owned cells *AND* first level halos of this
-     ! block and compute the laminar viscosity ratio.
+        else
+            ! Loop over the owned cells *AND* first level halos of this
+            ! block and compute the laminar viscosity ratio.
 #ifdef TAPENADE_REVERSE
-     iSize = (iEnd-iBeg)+1
-     jSize = (jEnd-jBeg)+1
-     kSize = (kEnd-kBeg)+1
-
-     !$AD II-LOOP
-     do ii=0, iSize*jSize*kSize-1
-        i = mod(ii, iSize) + iBeg
-        j = mod(ii/(iSize), jSize) + jBeg
-        k = ii/((iSize*jSize)) + kBeg
+            iSize = (iEnd - iBeg) + 1
+            jSize = (jEnd - jBeg) + 1
+            kSize = (kEnd - kBeg) + 1
+
+            !$AD II-LOOP
+            do ii = 0, iSize * jSize * kSize - 1
+                i = mod(ii, iSize) + iBeg
+                j = mod(ii / (iSize), jSize) + jBeg
+                k = ii / ((iSize * jSize)) + kBeg
 #else
-        do k=kBeg, kEnd
-           do j=jBeg, jEnd
-              do i=iBeg, iEnd
+                do k = kBeg, kEnd
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
 #endif
 
-                 ! Compute the nonDimensional temperature and the
-                 ! nonDimensional laminar viscosity.
-                 T = p(i,j,k)/(RGas*w(i,j,k,irho))
-                 rlv(i,j,k) = muSuth*((TSuth + SSuth)/(T + SSuth)) &
-                      * ((T/TSuth)**1.5_realType)
+                            ! Compute the nonDimensional temperature and the
+                            ! nonDimensional laminar viscosity.
+                            T = p(i, j, k) / (RGas * w(i, j, k, irho))
+                            rlv(i, j, k) = muSuth * ((TSuth + SSuth) / (T + SSuth)) &
+                                           * ((T / TSuth)**1.5_realType)
 #ifdef TAPENADE_REVERSE
-              end do
+                        end do
 #else
-           enddo
-        enddo
-     enddo
+                    end do
+                end do
+            end do
 #endif
-  end if
-end subroutine computeLamViscosity
+        end if
+    end subroutine computeLamViscosity
 
-subroutine adjustInflowAngle()
+    subroutine adjustInflowAngle()
 
-  use constants
-  use inputPhysics, only : alpha, beta, liftIndex, velDirFreeStream, &
-       liftDirection, dragDirection
+        use constants
+        use inputPhysics, only: alpha, beta, liftIndex, velDirFreeStream, &
+                                liftDirection, dragDirection
 
-  implicit none
+        implicit none
 
-  !Local Vars
-  real(kind=realType), dimension(3) :: refDir1, refDir2
+        !Local Vars
+        real(kind=realType), dimension(3) :: refDir1, refDir2
 
-  ! Velocity direction given by the rotation of a unit vector
-  ! initially aligned along the positive x-direction (1,0,0)
-  ! 1) rotate alpha radians cw about y or z-axis
-  ! 2) rotate beta radians ccw about z or y-axis
+        ! Velocity direction given by the rotation of a unit vector
+        ! initially aligned along the positive x-direction (1,0,0)
+        ! 1) rotate alpha radians cw about y or z-axis
+        ! 2) rotate beta radians ccw about z or y-axis
 
-  refDir1(:) = zero
-  refDir1(1) = one
-  call getDirVector(refDir1, alpha, beta, velDirFreestream,&
-       liftIndex)
+        refDir1(:) = zero
+        refDir1(1) = one
+        call getDirVector(refDir1, alpha, beta, velDirFreestream, &
+                          liftIndex)
 
-  ! Drag direction given by the rotation of a unit vector
-  ! initially aligned along the positive x-direction (1,0,0)
-  ! 1) rotate alpha radians cw about y or z-axis
-  ! 2) rotate beta radians ccw about z or y-axis
+        ! Drag direction given by the rotation of a unit vector
+        ! initially aligned along the positive x-direction (1,0,0)
+        ! 1) rotate alpha radians cw about y or z-axis
+        ! 2) rotate beta radians ccw about z or y-axis
 
-  call getDirVector(refDir1, alpha, beta, dragDirection, &
-       liftIndex)
+        call getDirVector(refDir1, alpha, beta, dragDirection, &
+                          liftIndex)
 
-  ! Lift direction given by the rotation of a unit vector
-  ! initially aligned along the positive z-direction (0,0,1)
-  ! 1) rotate alpha radians cw about y or z-axis
-  ! 2) rotate beta radians ccw about z or y-axis
+        ! Lift direction given by the rotation of a unit vector
+        ! initially aligned along the positive z-direction (0,0,1)
+        ! 1) rotate alpha radians cw about y or z-axis
+        ! 2) rotate beta radians ccw about z or y-axis
 
-  refDir2(:) = zero
-  refDir2(liftIndex) = one
+        refDir2(:) = zero
+        refDir2(liftIndex) = one
 
-  call getDirVector(refDir2, alpha, beta,liftDirection, liftIndex)
+        call getDirVector(refDir2, alpha, beta, liftDirection, liftIndex)
 
-end subroutine adjustInflowAngle
+    end subroutine adjustInflowAngle
 
-
-       subroutine derivativeRotMatrixRigid(rotationMatrix, &
-                                           rotationPoint, t)
+    subroutine derivativeRotMatrixRigid(rotationMatrix, &
+                                        rotationPoint, t)
 !
 !       derivativeRotMatrixRigid determines the derivative of the
 !       rotation matrix at the given time for the rigid body rotation,
@@ -1377,164 +1373,163 @@ subroutine derivativeRotMatrixRigid(rotationMatrix, &
 !       determined. This value can change due to translation of the
 !       entire grid.
 !
-       use constants
-       use flowVarRefState
-       use inputMotion
-       use monitor
-       use utils, only : rigidRotAngle, derivativeRigidRotAngle
-       implicit none
+        use constants
+        use flowVarRefState
+        use inputMotion
+        use monitor
+        use utils, only: rigidRotAngle, derivativeRigidRotAngle
+        implicit none
 !
 !      Subroutine arguments.
 !
-       real(kind=realType), intent(in) :: t
+        real(kind=realType), intent(in) :: t
 
-       real(kind=realType), dimension(3),   intent(out) :: rotationPoint
-       real(kind=realType), dimension(3,3), intent(out) :: rotationMatrix
+        real(kind=realType), dimension(3), intent(out) :: rotationPoint
+        real(kind=realType), dimension(3, 3), intent(out) :: rotationMatrix
 !
 !      Local variables.
 !
-       integer(kind=intType) :: i, j
-
-       real(kind=realType) :: phi, dphiX, dphiY, dphiZ
-       real(kind=realType) :: cosX, cosY, cosZ, sinX, sinY, sinZ
-
-       real(kind=realType), dimension(3,3) :: dm, m
-
-       ! Determine the rotation angle around the x-axis for the new
-       ! time level and the corresponding values of the sine and cosine.
-
-       phi = rigidRotAngle(degreePolXRot,   coefPolXRot,      &
-                           degreeFourXRot,  omegaFourXRot,    &
-                           cosCoefFourXRot, sinCoefFourXRot, t)
-       sinX = sin(phi)
-       cosX = cos(phi)
-       ! Idem for the y-axis.
-
-       phi = rigidRotAngle(degreePolYRot,   coefPolYRot,      &
-                           degreeFourYRot,  omegaFourYRot,    &
-                           cosCoefFourYRot, sinCoefFourYRot, t)
-       sinY = sin(phi)
-       cosY = cos(phi)
-       ! Idem for the z-axis.
-
-       phi = rigidRotAngle(degreePolZRot,   coefPolZRot,      &
-                           degreeFourZRot,  omegaFourZRot,    &
-                           cosCoefFourZRot, sinCoefFourZRot, t)
-       sinZ = sin(phi)
-       cosZ = cos(phi)
-       ! Compute the time derivative of the rotation angles around the
-       ! x-axis, y-axis and z-axis.
-
-       dphiX = derivativeRigidRotAngle(degreePolXRot,   &
-                                       coefPolXRot,     &
-                                       degreeFourXRot,  &
-                                       omegaFourXRot,   &
-                                       cosCoefFourXRot, &
-                                       sinCoefFourXRot, t)
-
-       dphiY = derivativeRigidRotAngle(degreePolYRot,   &
-                                       coefPolYRot,     &
-                                       degreeFourYRot,  &
-                                       omegaFourYRot,   &
-                                       cosCoefFourYRot, &
-                                       sinCoefFourYRot, t)
-
-       dphiZ = derivativeRigidRotAngle(degreePolZRot,   &
-                                       coefPolZRot,     &
-                                       degreeFourZRot,  &
-                                       omegaFourZRot,   &
-                                       cosCoefFourZRot, &
-                                       sinCoefFourZRot, t)
-
-       ! Compute the time derivative of the rotation matrix applied to
-       ! the coordinates at t == 0.
-
-       ! Part 1. Derivative of the z-rotation matrix multiplied by the
-       ! x and y rotation matrix, i.e. dmz * my * mx
-
-       dm(1,1) = -cosY*sinZ*dphiZ
-       dm(1,2) = (-cosX*cosZ - sinX*sinY*sinZ)*dphiZ
-       dm(1,3) = ( sinX*cosZ - cosX*sinY*sinZ)*dphiZ
-
-       dm(2,1) = cosY*cosZ*dphiZ
-       dm(2,2) = (sinX*sinY*cosZ - cosX*sinZ)*dphiZ
-       dm(2,3) = (cosX*sinY*cosZ + sinX*sinZ)*dphiZ
+        integer(kind=intType) :: i, j
 
-       dm(3,1) = zero
-       dm(3,2) = zero
-       dm(3,3) = zero
+        real(kind=realType) :: phi, dphiX, dphiY, dphiZ
+        real(kind=realType) :: cosX, cosY, cosZ, sinX, sinY, sinZ
+
+        real(kind=realType), dimension(3, 3) :: dm, m
 
-       ! Part 2: mz * dmy * mx.
+        ! Determine the rotation angle around the x-axis for the new
+        ! time level and the corresponding values of the sine and cosine.
 
-       dm(1,1) = dm(1,1) - sinY*cosZ*dphiY
-       dm(1,2) = dm(1,2) + sinX*cosY*cosZ*dphiY
-       dm(1,3) = dm(1,3) + cosX*cosY*cosZ*dphiY
+        phi = rigidRotAngle(degreePolXRot, coefPolXRot, &
+                            degreeFourXRot, omegaFourXRot, &
+                            cosCoefFourXRot, sinCoefFourXRot, t)
+        sinX = sin(phi)
+        cosX = cos(phi)
+        ! Idem for the y-axis.
+
+        phi = rigidRotAngle(degreePolYRot, coefPolYRot, &
+                            degreeFourYRot, omegaFourYRot, &
+                            cosCoefFourYRot, sinCoefFourYRot, t)
+        sinY = sin(phi)
+        cosY = cos(phi)
+        ! Idem for the z-axis.
+
+        phi = rigidRotAngle(degreePolZRot, coefPolZRot, &
+                            degreeFourZRot, omegaFourZRot, &
+                            cosCoefFourZRot, sinCoefFourZRot, t)
+        sinZ = sin(phi)
+        cosZ = cos(phi)
+        ! Compute the time derivative of the rotation angles around the
+        ! x-axis, y-axis and z-axis.
+
+        dphiX = derivativeRigidRotAngle(degreePolXRot, &
+                                        coefPolXRot, &
+                                        degreeFourXRot, &
+                                        omegaFourXRot, &
+                                        cosCoefFourXRot, &
+                                        sinCoefFourXRot, t)
 
-       dm(2,1) = dm(2,1) - sinY*sinZ*dphiY
-       dm(2,2) = dm(2,2) + sinX*cosY*sinZ*dphiY
-       dm(2,3) = dm(2,3) + cosX*cosY*sinZ*dphiY
+        dphiY = derivativeRigidRotAngle(degreePolYRot, &
+                                        coefPolYRot, &
+                                        degreeFourYRot, &
+                                        omegaFourYRot, &
+                                        cosCoefFourYRot, &
+                                        sinCoefFourYRot, t)
+
+        dphiZ = derivativeRigidRotAngle(degreePolZRot, &
+                                        coefPolZRot, &
+                                        degreeFourZRot, &
+                                        omegaFourZRot, &
+                                        cosCoefFourZRot, &
+                                        sinCoefFourZRot, t)
+
+        ! Compute the time derivative of the rotation matrix applied to
+        ! the coordinates at t == 0.
+
+        ! Part 1. Derivative of the z-rotation matrix multiplied by the
+        ! x and y rotation matrix, i.e. dmz * my * mx
+
+        dm(1, 1) = -cosY * sinZ * dphiZ
+        dm(1, 2) = (-cosX * cosZ - sinX * sinY * sinZ) * dphiZ
+        dm(1, 3) = (sinX * cosZ - cosX * sinY * sinZ) * dphiZ
+
+        dm(2, 1) = cosY * cosZ * dphiZ
+        dm(2, 2) = (sinX * sinY * cosZ - cosX * sinZ) * dphiZ
+        dm(2, 3) = (cosX * sinY * cosZ + sinX * sinZ) * dphiZ
+
+        dm(3, 1) = zero
+        dm(3, 2) = zero
+        dm(3, 3) = zero
+
+        ! Part 2: mz * dmy * mx.
+
+        dm(1, 1) = dm(1, 1) - sinY * cosZ * dphiY
+        dm(1, 2) = dm(1, 2) + sinX * cosY * cosZ * dphiY
+        dm(1, 3) = dm(1, 3) + cosX * cosY * cosZ * dphiY
+
+        dm(2, 1) = dm(2, 1) - sinY * sinZ * dphiY
+        dm(2, 2) = dm(2, 2) + sinX * cosY * sinZ * dphiY
+        dm(2, 3) = dm(2, 3) + cosX * cosY * sinZ * dphiY
+
+        dm(3, 1) = dm(3, 1) - cosY * dphiY
+        dm(3, 2) = dm(3, 2) - sinX * sinY * dphiY
+        dm(3, 3) = dm(3, 3) - cosX * sinY * dphiY
+
+        ! Part 3: mz * my * dmx
+
+        dm(1, 2) = dm(1, 2) + (sinX * sinZ + cosX * sinY * cosZ) * dphiX
+        dm(1, 3) = dm(1, 3) + (cosX * sinZ - sinX * sinY * cosZ) * dphiX
+
+        dm(2, 2) = dm(2, 2) + (cosX * sinY * sinZ - sinX * cosZ) * dphiX
+        dm(2, 3) = dm(2, 3) - (sinX * sinY * sinZ + cosX * cosZ) * dphiX
+
+        dm(3, 2) = dm(3, 2) + cosX * cosY * dphiX
+        dm(3, 3) = dm(3, 3) - sinX * cosY * dphiX
+
+        ! Determine the rotation matrix at t == t.
+
+        m(1, 1) = cosY * cosZ
+        m(2, 1) = cosY * sinZ
+        m(3, 1) = -sinY
+
+        m(1, 2) = sinX * sinY * cosZ - cosX * sinZ
+        m(2, 2) = sinX * sinY * sinZ + cosX * cosZ
+        m(3, 2) = sinX * cosY
+
+        m(1, 3) = cosX * sinY * cosZ + sinX * sinZ
+        m(2, 3) = cosX * sinY * sinZ - sinX * cosZ
+        m(3, 3) = cosX * cosY
+
+        ! Determine the matrix product dm * inverse(m), which will give
+        ! the derivative of the rotation matrix when applied to the
+        ! current coordinates. Note that inverse(m) == transpose(m).
+
+        do j = 1, 3
+            do i = 1, 3
+                rotationMatrix(i, j) = dm(i, 1) * m(j, 1) + dm(i, 2) * m(j, 2) &
+                                       + dm(i, 3) * m(j, 3)
+            end do
+        end do
+
+        ! Determine the rotation point at the new time level; it is
+        ! possible that this value changes due to translation of the grid.
 
-       dm(3,1) = dm(3,1) - cosY*dphiY
-       dm(3,2) = dm(3,2) - sinX*sinY*dphiY
-       dm(3,3) = dm(3,3) - cosX*sinY*dphiY
+        !  aInf = sqrt(gammaInf*pInf/rhoInf)
+
+        !  RotationPoint(1) = LRef*rotPoint(1) &
+        !                    + MachGrid(1)*aInf*t/timeRef
+        !  rotationPoint(2) = LRef*rotPoint(2) &
+        !                    + MachGrid(2)*aInf*t/timeRef
+        !  rotationPoint(3) = LRef*rotPoint(3) &
+        !                    + MachGrid(3)*aInf*t/timeRef
 
-       ! Part 3: mz * my * dmx
+        rotationPoint(1) = LRef * rotPoint(1)
+        rotationPoint(2) = LRef * rotPoint(2)
+        rotationPoint(3) = LRef * rotPoint(3)
 
-       dm(1,2) = dm(1,2) + (sinX*sinZ + cosX*sinY*cosZ)*dphiX
-       dm(1,3) = dm(1,3) + (cosX*sinZ - sinX*sinY*cosZ)*dphiX
+    end subroutine derivativeRotMatrixRigid
 
-       dm(2,2) = dm(2,2) + (cosX*sinY*sinZ - sinX*cosZ)*dphiX
-       dm(2,3) = dm(2,3) - (sinX*sinY*sinZ + cosX*cosZ)*dphiX
-
-       dm(3,2) = dm(3,2) + cosX*cosY*dphiX
-       dm(3,3) = dm(3,3) - sinX*cosY*dphiX
-
-       ! Determine the rotation matrix at t == t.
-
-       m(1,1) =  cosY*cosZ
-       m(2,1) =  cosY*sinZ
-       m(3,1) = -sinY
-
-       m(1,2) = sinX*sinY*cosZ - cosX*sinZ
-       m(2,2) = sinX*sinY*sinZ + cosX*cosZ
-       m(3,2) = sinX*cosY
-
-       m(1,3) = cosX*sinY*cosZ + sinX*sinZ
-       m(2,3) = cosX*sinY*sinZ - sinX*cosZ
-       m(3,3) = cosX*cosY
-
-       ! Determine the matrix product dm * inverse(m), which will give
-       ! the derivative of the rotation matrix when applied to the
-       ! current coordinates. Note that inverse(m) == transpose(m).
-
-       do j=1,3
-         do i=1,3
-           rotationMatrix(i,j) = dm(i,1)*m(j,1) + dm(i,2)*m(j,2) &
-                               + dm(i,3)*m(j,3)
-         enddo
-       enddo
-
-       ! Determine the rotation point at the new time level; it is
-       ! possible that this value changes due to translation of the grid.
-
-    !  aInf = sqrt(gammaInf*pInf/rhoInf)
-
-    !  RotationPoint(1) = LRef*rotPoint(1) &
-    !                    + MachGrid(1)*aInf*t/timeRef
-    !  rotationPoint(2) = LRef*rotPoint(2) &
-    !                    + MachGrid(2)*aInf*t/timeRef
-    !  rotationPoint(3) = LRef*rotPoint(3) &
-    !                    + MachGrid(3)*aInf*t/timeRef
-
-       rotationPoint(1) = LRef*rotPoint(1)
-       rotationPoint(2) = LRef*rotPoint(2)
-       rotationPoint(3) = LRef*rotPoint(3)
-
-       end subroutine derivativeRotMatrixRigid
-
-
-      subroutine getDirVector(refDirection, alpha, beta,&
-           windDirection,liftIndex)
+    subroutine getDirVector(refDirection, alpha, beta, &
+                            windDirection, liftIndex)
         !(xb,yb,zb,alpha,beta,xw,yw,zw)
 !
 !      Convert the angle of attack and side slip angle to wind axes.
@@ -1553,28 +1548,28 @@ subroutine getDirVector(refDirection, alpha, beta,&
 !         output arguments:
 !            windDirection = unit wind vector in body axes
 !
-      use constants
-      use utils, only : terminate
-      implicit none
+        use constants
+        use utils, only: terminate
+        implicit none
 !
 !     Subroutine arguments.
 !
-      real(kind=realType),dimension(3), intent(in)  :: refDirection
-      real(kind=realType)  :: alpha, beta
-      real(kind=realType),dimension (3), intent(out) :: windDirection
-      integer(kind=intType)::liftIndex
+        real(kind=realType), dimension(3), intent(in) :: refDirection
+        real(kind=realType) :: alpha, beta
+        real(kind=realType), dimension(3), intent(out) :: windDirection
+        integer(kind=intType) :: liftIndex
 !
 !     Local variables.
 !
-      real(kind=realType) :: rnorm,x1,y1,z1,xbn,ybn,zbn,xw,yw,zw
-      real(kind=realType) :: tmp
+        real(kind=realType) :: rnorm, x1, y1, z1, xbn, ybn, zbn, xw, yw, zw
+        real(kind=realType) :: tmp
 
-      ! Normalize the input vector.
+        ! Normalize the input vector.
 
-      rnorm = sqrt( refDirection(1)**2 + refDirection(2)**2 + refDirection(3)**2 )
-      xbn = refDirection(1) / rnorm
-      ybn = refDirection(2) / rnorm
-      zbn = refDirection(3) / rnorm
+        rnorm = sqrt(refDirection(1)**2 + refDirection(2)**2 + refDirection(3)**2)
+        xbn = refDirection(1) / rnorm
+        ybn = refDirection(2) / rnorm
+        zbn = refDirection(3) / rnorm
 
 !!$      ! Compute the wind direction vector.
 !!$
@@ -1588,45 +1583,44 @@ subroutine getDirVector(refDirection, alpha, beta,&
 !!$
 !!$      call vectorRotation(xw, yw, zw, 3, -beta, x1, y1, z1)
 
-      if (liftIndex==2)then
-         ! Compute the wind direction vector.Aerosurf axes different!!
+        if (liftIndex == 2) then
+            ! Compute the wind direction vector.Aerosurf axes different!!
 
-         ! 1) rotate alpha radians cw about z-axis
-         !    ( <=> rotate z-axis alpha radians ccw)
+            ! 1) rotate alpha radians cw about z-axis
+            !    ( <=> rotate z-axis alpha radians ccw)
 
-         tmp = -alpha
-         call vectorRotation(x1, y1, z1, 3, tmp, xbn, ybn, zbn)
+            tmp = -alpha
+            call vectorRotation(x1, y1, z1, 3, tmp, xbn, ybn, zbn)
 
-         ! 2) rotate beta radians ccw about y-axis
-         !    ( <=> rotate z-axis -beta radians ccw)
-         tmp = -beta
-         call vectorRotation(xw, yw, zw, 2, tmp, x1, y1, z1)
+            ! 2) rotate beta radians ccw about y-axis
+            !    ( <=> rotate z-axis -beta radians ccw)
+            tmp = -beta
+            call vectorRotation(xw, yw, zw, 2, tmp, x1, y1, z1)
 
-      elseif(liftIndex==3)then
-         ! Compute the wind direction vector.Aerosurf axes different!!
+        elseif (liftIndex == 3) then
+            ! Compute the wind direction vector.Aerosurf axes different!!
 
-         ! 1) rotate alpha radians cw about z-axis
-         !    ( <=> rotate z-axis alpha radians ccw)
+            ! 1) rotate alpha radians cw about z-axis
+            !    ( <=> rotate z-axis alpha radians ccw)
 
-         call vectorRotation(x1, y1, z1, 2, alpha, xbn, ybn, zbn)
+            call vectorRotation(x1, y1, z1, 2, alpha, xbn, ybn, zbn)
 
-         ! 2) rotate beta radians ccw about y-axis
-         !    ( <=> rotate z-axis -beta radians ccw)
+            ! 2) rotate beta radians ccw about y-axis
+            !    ( <=> rotate z-axis -beta radians ccw)
 
-         call vectorRotation(xw, yw, zw, 3, beta, x1, y1, z1)
+            call vectorRotation(xw, yw, zw, 3, beta, x1, y1, z1)
 
+        else
+            call terminate('getDirVector', 'Invalid Lift Direction')
 
-      else
-         call terminate('getDirVector', 'Invalid Lift Direction')
+        end if
 
-      endif
+        windDirection(1) = xw
+        windDirection(2) = yw
+        windDirection(3) = zw
 
-      windDirection(1) = xw
-      windDirection(2) = yw
-      windDirection(3) = zw
-
-      end subroutine getDirVector
-      subroutine vectorRotation(xp, yp, zp, iaxis, angle, x, y, z)
+    end subroutine getDirVector
+    subroutine vectorRotation(xp, yp, zp, iaxis, angle, x, y, z)
 !
 !      vectorRotation rotates a given vector with respect to a
 !      specified axis by a given angle.
@@ -1637,614 +1631,609 @@ subroutine vectorRotation(xp, yp, zp, iaxis, angle, x, y, z)
 !         output arguments:
 !            xp, yp, zp = coordinates in rotated system
 !
-      use precision
-      implicit none
+        use precision
+        implicit none
 !
 !     Subroutine arguments.
 !
-      integer(kind=intType), intent(in) :: iaxis
-      real(kind=realType), intent(in)   :: angle, x, y, z
-      real(kind=realType), intent(out)  :: xp, yp, zp
+        integer(kind=intType), intent(in) :: iaxis
+        real(kind=realType), intent(in) :: angle, x, y, z
+        real(kind=realType), intent(out) :: xp, yp, zp
 
-      ! rotation about specified axis by specified angle
+        ! rotation about specified axis by specified angle
 
-      select case(iaxis)
+        select case (iaxis)
 
-        ! rotation about the x-axis
+            ! rotation about the x-axis
 
-        case(1)
-          xp =        1.    * x +     0.     * y +     0.     * z
-          yp =        0.    * x + cos(angle) * y + sin(angle) * z
-          zp =        0.    * x - sin(angle) * y + cos(angle) * z
+        case (1)
+            xp = 1.*x + 0.*y + 0.*z
+            yp = 0.*x + cos(angle) * y + sin(angle) * z
+            zp = 0.*x - sin(angle) * y + cos(angle) * z
 
-        ! rotation about the y-axis
+            ! rotation about the y-axis
 
-        case(2)
-          xp =   cos(angle) * x +     0.     * y - sin(angle) * z
-          yp =        0.    * x +     1.     * y +     0.     * z
-          zp =   sin(angle) * x +     0.     * y + cos(angle) * z
+        case (2)
+            xp = cos(angle) * x + 0.*y - sin(angle) * z
+            yp = 0.*x + 1.*y + 0.*z
+            zp = sin(angle) * x + 0.*y + cos(angle) * z
 
-        ! rotation about the z-axis
+            ! rotation about the z-axis
 
-        case(3)
-          xp =   cos(angle) * x + sin(angle) * y +     0.     * z
-          yp = - sin(angle) * x + cos(angle) * y +     0.     * z
-          zp =       0.     * x +     0.     * y +     1.     * z
+        case (3)
+            xp = cos(angle) * x + sin(angle) * y + 0.*z
+            yp = -sin(angle) * x + cos(angle) * y + 0.*z
+            zp = 0.*x + 0.*y + 1.*z
 
         case default
-          write(*,*) "vectorRotation called with invalid arguments"
-          stop
-
-      end select
-
-      end subroutine vectorRotation
-
-subroutine allNodalGradients
-  !
-  !         nodalGradients computes the nodal velocity gradients and
-  !         minus the gradient of the speed of sound squared. The minus
-  !         sign is present, because this is the definition of the heat
-  !         flux. These gradients are computed for all nodes.
-  !
-  use constants
-  use blockPointers
-  implicit none
-  !        Local variables.
-  integer(kind=intType) :: i, j, k
-  integer(kind=intType) :: k1, kk
-  integer(kind=intType) :: istart, iend, isize, ii
-  integer(kind=intType) :: jstart, jend, jsize
-  integer(kind=intType) :: kstart, kend, ksize
-
-  real(kind=realType) :: oneOverV, ubar, vbar, wbar, a2
-  real(kind=realType) :: sx, sx1, sy, sy1, sz, sz1
-
-
-  ! Zero all nodeal gradients:
-  ux = zero; uy = zero; uz = zero;
-  vx = zero; vy = zero; vz = zero;
-  wx = zero; wy = zero; wz = zero;
-  qx = zero; qy = zero; qz = zero;
-
-  ! First part. Contribution in the k-direction.
-  ! The contribution is scattered to both the left and right node
-  ! in k-direction.
+            write (*, *) "vectorRotation called with invalid arguments"
+            stop
+
+        end select
+
+    end subroutine vectorRotation
+
+    subroutine allNodalGradients
+        !
+        !         nodalGradients computes the nodal velocity gradients and
+        !         minus the gradient of the speed of sound squared. The minus
+        !         sign is present, because this is the definition of the heat
+        !         flux. These gradients are computed for all nodes.
+        !
+        use constants
+        use blockPointers
+        implicit none
+        !        Local variables.
+        integer(kind=intType) :: i, j, k
+        integer(kind=intType) :: k1, kk
+        integer(kind=intType) :: istart, iend, isize, ii
+        integer(kind=intType) :: jstart, jend, jsize
+        integer(kind=intType) :: kstart, kend, ksize
+
+        real(kind=realType) :: oneOverV, ubar, vbar, wbar, a2
+        real(kind=realType) :: sx, sx1, sy, sy1, sz, sz1
+
+        ! Zero all nodeal gradients:
+        ux = zero; uy = zero; uz = zero; 
+        vx = zero; vy = zero; vz = zero; 
+        wx = zero; wy = zero; wz = zero; 
+        qx = zero; qy = zero; qz = zero; 
+        ! First part. Contribution in the k-direction.
+        ! The contribution is scattered to both the left and right node
+        ! in k-direction.
 
 #ifdef TAPENADE_REVERSE
-  !$AD II-LOOP
-  do ii=0,il*jl*ke-1
-     i = mod(ii, il) + 1
-     j = mod(ii/il, jl) + 1
-     k = ii/(il*jl) + 1
+        !$AD II-LOOP
+        do ii = 0, il * jl * ke - 1
+            i = mod(ii, il) + 1
+            j = mod(ii / il, jl) + 1
+            k = ii / (il * jl) + 1
 #else
-     do k=1, ke
-        do j=1, jl
-           do i=1, il
+            do k = 1, ke
+                do j = 1, jl
+                    do i = 1, il
 #endif
-              ! Compute 8 times the average normal for this part of
-              ! the control volume. The factor 8 is taken care of later
-              ! on when the division by the volume takes place.
-
-              sx = sk(i,j,k-1,  1) + sk(i+1,j,k-1,  1) &
-                   + sk(i,j+1,k-1,1) + sk(i+1,j+1,k-1,1) &
-                   + sk(i,j,  k,  1) + sk(i+1,j,  k,  1) &
-                   + sk(i,j+1,k  ,1) + sk(i+1,j+1,k  ,1)
-              sy = sk(i,j,k-1,  2) + sk(i+1,j,k-1,  2) &
-                   + sk(i,j+1,k-1,2) + sk(i+1,j+1,k-1,2) &
-                   + sk(i,j,  k,  2) + sk(i+1,j,  k,  2) &
-                   + sk(i,j+1,k  ,2) + sk(i+1,j+1,k  ,2)
-              sz = sk(i,j,k-1,  3) + sk(i+1,j,k-1,  3) &
-                   + sk(i,j+1,k-1,3) + sk(i+1,j+1,k-1,3) &
-                   + sk(i,j,  k,  3) + sk(i+1,j,  k,  3) &
-                   + sk(i,j+1,k  ,3) + sk(i+1,j+1,k  ,3)
-
-              ! Compute the average velocities and speed of sound squared
-              ! for this integration point. Node that these variables are
-              ! stored in w(ivx), w(ivy), w(ivz) and p.
-
-              ubar = fourth*(w(i,j,  k,ivx) + w(i+1,j,  k,ivx) &
-                   +         w(i,j+1,k,ivx) + w(i+1,j+1,k,ivx))
-              vbar = fourth*(w(i,j,  k,ivy) + w(i+1,j,  k,ivy) &
-                   +         w(i,j+1,k,ivy) + w(i+1,j+1,k,ivy))
-              wbar = fourth*(w(i,j,  k,ivz) + w(i+1,j,  k,ivz) &
-                   +         w(i,j+1,k,ivz) + w(i+1,j+1,k,ivz))
-
-              a2 = fourth*(aa(i,j,k) + aa(i+1,j,k) + aa(i,j+1,k) + aa(i+1,j+1,k))
-
-
-              ! Add the contributions to the surface integral to the node
-              ! j-1 and substract it from the node j. For the heat flux it
-              ! is reversed, because the negative of the gradient of the
-              ! speed of sound must be computed.
-
-              if(k > 1) then
-                 ux(i,j,k-1) = ux(i,j,k-1) + ubar*sx
-                 uy(i,j,k-1) = uy(i,j,k-1) + ubar*sy
-                 uz(i,j,k-1) = uz(i,j,k-1) + ubar*sz
-
-                 vx(i,j,k-1) = vx(i,j,k-1) + vbar*sx
-                 vy(i,j,k-1) = vy(i,j,k-1) + vbar*sy
-                 vz(i,j,k-1) = vz(i,j,k-1) + vbar*sz
-
-                 wx(i,j,k-1) = wx(i,j,k-1) + wbar*sx
-                 wy(i,j,k-1) = wy(i,j,k-1) + wbar*sy
-                 wz(i,j,k-1) = wz(i,j,k-1) + wbar*sz
-
-                 qx(i,j,k-1) = qx(i,j,k-1) - a2*sx
-                 qy(i,j,k-1) = qy(i,j,k-1) - a2*sy
-                 qz(i,j,k-1) = qz(i,j,k-1) - a2*sz
-              endif
-
-              if(k < ke) then
-                 ux(i,j,k) = ux(i,j,k) - ubar*sx
-                 uy(i,j,k) = uy(i,j,k) - ubar*sy
-                 uz(i,j,k) = uz(i,j,k) - ubar*sz
-
-                 vx(i,j,k) = vx(i,j,k) - vbar*sx
-                 vy(i,j,k) = vy(i,j,k) - vbar*sy
-                 vz(i,j,k) = vz(i,j,k) - vbar*sz
-
-                 wx(i,j,k) = wx(i,j,k) - wbar*sx
-                 wy(i,j,k) = wy(i,j,k) - wbar*sy
-                 wz(i,j,k) = wz(i,j,k) - wbar*sz
-
-                 qx(i,j,k) = qx(i,j,k) + a2*sx
-                 qy(i,j,k) = qy(i,j,k) + a2*sy
-                 qz(i,j,k) = qz(i,j,k) + a2*sz
-              endif
+                        ! Compute 8 times the average normal for this part of
+                        ! the control volume. The factor 8 is taken care of later
+                        ! on when the division by the volume takes place.
+
+                        sx = sk(i, j, k - 1, 1) + sk(i + 1, j, k - 1, 1) &
+                             + sk(i, j + 1, k - 1, 1) + sk(i + 1, j + 1, k - 1, 1) &
+                             + sk(i, j, k, 1) + sk(i + 1, j, k, 1) &
+                             + sk(i, j + 1, k, 1) + sk(i + 1, j + 1, k, 1)
+                        sy = sk(i, j, k - 1, 2) + sk(i + 1, j, k - 1, 2) &
+                             + sk(i, j + 1, k - 1, 2) + sk(i + 1, j + 1, k - 1, 2) &
+                             + sk(i, j, k, 2) + sk(i + 1, j, k, 2) &
+                             + sk(i, j + 1, k, 2) + sk(i + 1, j + 1, k, 2)
+                        sz = sk(i, j, k - 1, 3) + sk(i + 1, j, k - 1, 3) &
+                             + sk(i, j + 1, k - 1, 3) + sk(i + 1, j + 1, k - 1, 3) &
+                             + sk(i, j, k, 3) + sk(i + 1, j, k, 3) &
+                             + sk(i, j + 1, k, 3) + sk(i + 1, j + 1, k, 3)
+
+                        ! Compute the average velocities and speed of sound squared
+                        ! for this integration point. Node that these variables are
+                        ! stored in w(ivx), w(ivy), w(ivz) and p.
+
+                        ubar = fourth * (w(i, j, k, ivx) + w(i + 1, j, k, ivx) &
+                                         + w(i, j + 1, k, ivx) + w(i + 1, j + 1, k, ivx))
+                        vbar = fourth * (w(i, j, k, ivy) + w(i + 1, j, k, ivy) &
+                                         + w(i, j + 1, k, ivy) + w(i + 1, j + 1, k, ivy))
+                        wbar = fourth * (w(i, j, k, ivz) + w(i + 1, j, k, ivz) &
+                                         + w(i, j + 1, k, ivz) + w(i + 1, j + 1, k, ivz))
+
+                        a2 = fourth * (aa(i, j, k) + aa(i + 1, j, k) + aa(i, j + 1, k) + aa(i + 1, j + 1, k))
+
+                        ! Add the contributions to the surface integral to the node
+                        ! j-1 and substract it from the node j. For the heat flux it
+                        ! is reversed, because the negative of the gradient of the
+                        ! speed of sound must be computed.
+
+                        if (k > 1) then
+                            ux(i, j, k - 1) = ux(i, j, k - 1) + ubar * sx
+                            uy(i, j, k - 1) = uy(i, j, k - 1) + ubar * sy
+                            uz(i, j, k - 1) = uz(i, j, k - 1) + ubar * sz
+
+                            vx(i, j, k - 1) = vx(i, j, k - 1) + vbar * sx
+                            vy(i, j, k - 1) = vy(i, j, k - 1) + vbar * sy
+                            vz(i, j, k - 1) = vz(i, j, k - 1) + vbar * sz
+
+                            wx(i, j, k - 1) = wx(i, j, k - 1) + wbar * sx
+                            wy(i, j, k - 1) = wy(i, j, k - 1) + wbar * sy
+                            wz(i, j, k - 1) = wz(i, j, k - 1) + wbar * sz
+
+                            qx(i, j, k - 1) = qx(i, j, k - 1) - a2 * sx
+                            qy(i, j, k - 1) = qy(i, j, k - 1) - a2 * sy
+                            qz(i, j, k - 1) = qz(i, j, k - 1) - a2 * sz
+                        end if
+
+                        if (k < ke) then
+                            ux(i, j, k) = ux(i, j, k) - ubar * sx
+                            uy(i, j, k) = uy(i, j, k) - ubar * sy
+                            uz(i, j, k) = uz(i, j, k) - ubar * sz
+
+                            vx(i, j, k) = vx(i, j, k) - vbar * sx
+                            vy(i, j, k) = vy(i, j, k) - vbar * sy
+                            vz(i, j, k) = vz(i, j, k) - vbar * sz
+
+                            wx(i, j, k) = wx(i, j, k) - wbar * sx
+                            wy(i, j, k) = wy(i, j, k) - wbar * sy
+                            wz(i, j, k) = wz(i, j, k) - wbar * sz
+
+                            qx(i, j, k) = qx(i, j, k) + a2 * sx
+                            qy(i, j, k) = qy(i, j, k) + a2 * sy
+                            qz(i, j, k) = qz(i, j, k) + a2 * sz
+                        end if
 #ifdef TAPENADE_REVERSE
-           end do
+                    end do
 #else
-        enddo
-     enddo
-  enddo
+                end do
+            end do
+        end do
 #endif
 
-  ! Second part. Contribution in the j-direction.
-  ! The contribution is scattered to both the left and right node
-  ! in j-direction.
+        ! Second part. Contribution in the j-direction.
+        ! The contribution is scattered to both the left and right node
+        ! in j-direction.
 
 #ifdef TAPENADE_REVERSE
-  !$AD II-LOOP
-  do ii=0,il*je*kl-1
-     i = mod(ii, il) + 1
-     j = mod(ii/il, je) + 1
-     k = ii/(il*je) + 1
+        !$AD II-LOOP
+        do ii = 0, il * je * kl - 1
+            i = mod(ii, il) + 1
+            j = mod(ii / il, je) + 1
+            k = ii / (il * je) + 1
 #else
-     do k=1, kl
-        do j=1, je
-           do i=1, il
+            do k = 1, kl
+                do j = 1, je
+                    do i = 1, il
 #endif
 
-              ! Compute 8 times the average normal for this part of
-              ! the control volume. The factor 8 is taken care of later
-              ! on when the division by the volume takes place.
-
-              sx = sj(i,j-1,k,  1) + sj(i+1,j-1,k,  1) &
-                   + sj(i,j-1,k+1,1) + sj(i+1,j-1,k+1,1) &
-                   + sj(i,j,  k,  1) + sj(i+1,j,  k,  1) &
-                   + sj(i,j,  k+1,1) + sj(i+1,j,  k+1,1)
-              sy = sj(i,j-1,k,  2) + sj(i+1,j-1,k,  2) &
-                   + sj(i,j-1,k+1,2) + sj(i+1,j-1,k+1,2) &
-                   + sj(i,j,  k,  2) + sj(i+1,j,  k,  2) &
-                   + sj(i,j,  k+1,2) + sj(i+1,j,  k+1,2)
-              sz = sj(i,j-1,k,  3) + sj(i+1,j-1,k,  3) &
-                   + sj(i,j-1,k+1,3) + sj(i+1,j-1,k+1,3) &
-                   + sj(i,j,  k,  3) + sj(i+1,j,  k,  3) &
-                   + sj(i,j,  k+1,3) + sj(i+1,j,  k+1,3)
-
-              ! Compute the average velocities and speed of sound squared
-              ! for this integration point. Node that these variables are
-              ! stored in w(ivx), w(ivy), w(ivz) and p.
-
-              ubar = fourth*(w(i,j,k,  ivx) + w(i+1,j,k,  ivx) &
-                   +         w(i,j,k+1,ivx) + w(i+1,j,k+1,ivx))
-              vbar = fourth*(w(i,j,k,  ivy) + w(i+1,j,k,  ivy) &
-                   +         w(i,j,k+1,ivy) + w(i+1,j,k+1,ivy))
-              wbar = fourth*(w(i,j,k,  ivz) + w(i+1,j,k,  ivz) &
-                   +         w(i,j,k+1,ivz) + w(i+1,j,k+1,ivz))
-
-              a2 = fourth*(aa(i,j,k) + aa(i+1,j,k) + aa(i,j,k+1) + aa(i+1,j,k+1))
-
-              ! Add the contributions to the surface integral to the node
-              ! j-1 and substract it from the node j. For the heat flux it
-              ! is reversed, because the negative of the gradient of the
-              ! speed of sound must be computed.
-
-              if(j > 1) then
-                 ux(i,j-1,k) = ux(i,j-1,k) + ubar*sx
-                 uy(i,j-1,k) = uy(i,j-1,k) + ubar*sy
-                 uz(i,j-1,k) = uz(i,j-1,k) + ubar*sz
-
-                 vx(i,j-1,k) = vx(i,j-1,k) + vbar*sx
-                 vy(i,j-1,k) = vy(i,j-1,k) + vbar*sy
-                 vz(i,j-1,k) = vz(i,j-1,k) + vbar*sz
-
-                 wx(i,j-1,k) = wx(i,j-1,k) + wbar*sx
-                 wy(i,j-1,k) = wy(i,j-1,k) + wbar*sy
-                 wz(i,j-1,k) = wz(i,j-1,k) + wbar*sz
-
-                 qx(i,j-1,k) = qx(i,j-1,k) - a2*sx
-                 qy(i,j-1,k) = qy(i,j-1,k) - a2*sy
-                 qz(i,j-1,k) = qz(i,j-1,k) - a2*sz
-              endif
-
-              if(j < je) then
-                 ux(i,j,k) = ux(i,j,k) - ubar*sx
-                 uy(i,j,k) = uy(i,j,k) - ubar*sy
-                 uz(i,j,k) = uz(i,j,k) - ubar*sz
-
-                 vx(i,j,k) = vx(i,j,k) - vbar*sx
-                 vy(i,j,k) = vy(i,j,k) - vbar*sy
-                 vz(i,j,k) = vz(i,j,k) - vbar*sz
-
-                 wx(i,j,k) = wx(i,j,k) - wbar*sx
-                 wy(i,j,k) = wy(i,j,k) - wbar*sy
-                 wz(i,j,k) = wz(i,j,k) - wbar*sz
-
-                 qx(i,j,k) = qx(i,j,k) + a2*sx
-                 qy(i,j,k) = qy(i,j,k) + a2*sy
-                 qz(i,j,k) = qz(i,j,k) + a2*sz
-              endif
+                        ! Compute 8 times the average normal for this part of
+                        ! the control volume. The factor 8 is taken care of later
+                        ! on when the division by the volume takes place.
+
+                        sx = sj(i, j - 1, k, 1) + sj(i + 1, j - 1, k, 1) &
+                             + sj(i, j - 1, k + 1, 1) + sj(i + 1, j - 1, k + 1, 1) &
+                             + sj(i, j, k, 1) + sj(i + 1, j, k, 1) &
+                             + sj(i, j, k + 1, 1) + sj(i + 1, j, k + 1, 1)
+                        sy = sj(i, j - 1, k, 2) + sj(i + 1, j - 1, k, 2) &
+                             + sj(i, j - 1, k + 1, 2) + sj(i + 1, j - 1, k + 1, 2) &
+                             + sj(i, j, k, 2) + sj(i + 1, j, k, 2) &
+                             + sj(i, j, k + 1, 2) + sj(i + 1, j, k + 1, 2)
+                        sz = sj(i, j - 1, k, 3) + sj(i + 1, j - 1, k, 3) &
+                             + sj(i, j - 1, k + 1, 3) + sj(i + 1, j - 1, k + 1, 3) &
+                             + sj(i, j, k, 3) + sj(i + 1, j, k, 3) &
+                             + sj(i, j, k + 1, 3) + sj(i + 1, j, k + 1, 3)
+
+                        ! Compute the average velocities and speed of sound squared
+                        ! for this integration point. Node that these variables are
+                        ! stored in w(ivx), w(ivy), w(ivz) and p.
+
+                        ubar = fourth * (w(i, j, k, ivx) + w(i + 1, j, k, ivx) &
+                                         + w(i, j, k + 1, ivx) + w(i + 1, j, k + 1, ivx))
+                        vbar = fourth * (w(i, j, k, ivy) + w(i + 1, j, k, ivy) &
+                                         + w(i, j, k + 1, ivy) + w(i + 1, j, k + 1, ivy))
+                        wbar = fourth * (w(i, j, k, ivz) + w(i + 1, j, k, ivz) &
+                                         + w(i, j, k + 1, ivz) + w(i + 1, j, k + 1, ivz))
+
+                        a2 = fourth * (aa(i, j, k) + aa(i + 1, j, k) + aa(i, j, k + 1) + aa(i + 1, j, k + 1))
+
+                        ! Add the contributions to the surface integral to the node
+                        ! j-1 and substract it from the node j. For the heat flux it
+                        ! is reversed, because the negative of the gradient of the
+                        ! speed of sound must be computed.
+
+                        if (j > 1) then
+                            ux(i, j - 1, k) = ux(i, j - 1, k) + ubar * sx
+                            uy(i, j - 1, k) = uy(i, j - 1, k) + ubar * sy
+                            uz(i, j - 1, k) = uz(i, j - 1, k) + ubar * sz
+
+                            vx(i, j - 1, k) = vx(i, j - 1, k) + vbar * sx
+                            vy(i, j - 1, k) = vy(i, j - 1, k) + vbar * sy
+                            vz(i, j - 1, k) = vz(i, j - 1, k) + vbar * sz
+
+                            wx(i, j - 1, k) = wx(i, j - 1, k) + wbar * sx
+                            wy(i, j - 1, k) = wy(i, j - 1, k) + wbar * sy
+                            wz(i, j - 1, k) = wz(i, j - 1, k) + wbar * sz
+
+                            qx(i, j - 1, k) = qx(i, j - 1, k) - a2 * sx
+                            qy(i, j - 1, k) = qy(i, j - 1, k) - a2 * sy
+                            qz(i, j - 1, k) = qz(i, j - 1, k) - a2 * sz
+                        end if
+
+                        if (j < je) then
+                            ux(i, j, k) = ux(i, j, k) - ubar * sx
+                            uy(i, j, k) = uy(i, j, k) - ubar * sy
+                            uz(i, j, k) = uz(i, j, k) - ubar * sz
+
+                            vx(i, j, k) = vx(i, j, k) - vbar * sx
+                            vy(i, j, k) = vy(i, j, k) - vbar * sy
+                            vz(i, j, k) = vz(i, j, k) - vbar * sz
+
+                            wx(i, j, k) = wx(i, j, k) - wbar * sx
+                            wy(i, j, k) = wy(i, j, k) - wbar * sy
+                            wz(i, j, k) = wz(i, j, k) - wbar * sz
+
+                            qx(i, j, k) = qx(i, j, k) + a2 * sx
+                            qy(i, j, k) = qy(i, j, k) + a2 * sy
+                            qz(i, j, k) = qz(i, j, k) + a2 * sz
+                        end if
 #ifdef TAPENADE_REVERSE
-           end do
+                    end do
 #else
-        enddo
-     enddo
-  enddo
+                end do
+            end do
+        end do
 #endif
 
-  ! Third part. Contribution in the i-direction.
-  ! The contribution is scattered to both the left and right node
-  ! in i-direction.
+        ! Third part. Contribution in the i-direction.
+        ! The contribution is scattered to both the left and right node
+        ! in i-direction.
 
 #ifdef TAPENADE_REVERSE
-  !$AD II-LOOP
-  do ii=0,ie*jl*kl-1
-     i = mod(ii, ie) + 1
-     j = mod(ii/ie, jl) + 1
-     k = ii/(ie*jl) + 1
+        !$AD II-LOOP
+        do ii = 0, ie * jl * kl - 1
+            i = mod(ii, ie) + 1
+            j = mod(ii / ie, jl) + 1
+            k = ii / (ie * jl) + 1
 #else
-     do k=1,kl
-        do j=1,jl
-           do i=1,ie
+            do k = 1, kl
+                do j = 1, jl
+                    do i = 1, ie
 #endif
 
-              ! Compute 8 times the average normal for this part of
-              ! the control volume. The factor 8 is taken care of later
-              ! on when the division by the volume takes place.
-
-              sx = si(i-1,j,k,  1) + si(i-1,j+1,k,  1) &
-                   + si(i-1,j,k+1,1) + si(i-1,j+1,k+1,1) &
-                   + si(i,  j,k,  1) + si(i,  j+1,k,  1) &
-                   + si(i,  j,k+1,1) + si(i,  j+1,k+1,1)
-              sy = si(i-1,j,k,  2) + si(i-1,j+1,k,  2) &
-                   + si(i-1,j,k+1,2) + si(i-1,j+1,k+1,2) &
-                   + si(i,  j,k,  2) + si(i,  j+1,k,  2) &
-                   + si(i,  j,k+1,2) + si(i,  j+1,k+1,2)
-              sz = si(i-1,j,k,  3) + si(i-1,j+1,k,  3) &
-                   + si(i-1,j,k+1,3) + si(i-1,j+1,k+1,3) &
-                   + si(i,  j,k,  3) + si(i,  j+1,k,  3) &
-                   + si(i,  j,k+1,3) + si(i,  j+1,k+1,3)
-
-              ! Compute the average velocities and speed of sound squared
-              ! for this integration point. Node that these variables are
-              ! stored in w(ivx), w(ivy), w(ivz) and p.
-
-              ubar = fourth*(w(i,j,k,  ivx) + w(i,j+1,k,  ivx) &
-                   +         w(i,j,k+1,ivx) + w(i,j+1,k+1,ivx))
-              vbar = fourth*(w(i,j,k,  ivy) + w(i,j+1,k,  ivy) &
-                   +         w(i,j,k+1,ivy) + w(i,j+1,k+1,ivy))
-              wbar = fourth*(w(i,j,k,  ivz) + w(i,j+1,k,  ivz) &
-                   +         w(i,j,k+1,ivz) + w(i,j+1,k+1,ivz))
-
-              a2 = fourth*(aa(i,j,k) + aa(i,j+1,k) + aa(i,j,k+1) + aa(i,j+1,k+1))
-
-              ! Add the contributions to the surface integral to the node
-              ! j-1 and substract it from the node j. For the heat flux it
-              ! is reversed, because the negative of the gradient of the
-              ! speed of sound must be computed.
-
-              if(i > 1) then
-                 ux(i-1,j,k) = ux(i-1,j,k) + ubar*sx
-                 uy(i-1,j,k) = uy(i-1,j,k) + ubar*sy
-                 uz(i-1,j,k) = uz(i-1,j,k) + ubar*sz
-
-                 vx(i-1,j,k) = vx(i-1,j,k) + vbar*sx
-                 vy(i-1,j,k) = vy(i-1,j,k) + vbar*sy
-                 vz(i-1,j,k) = vz(i-1,j,k) + vbar*sz
-
-                 wx(i-1,j,k) = wx(i-1,j,k) + wbar*sx
-                 wy(i-1,j,k) = wy(i-1,j,k) + wbar*sy
-                 wz(i-1,j,k) = wz(i-1,j,k) + wbar*sz
-
-                 qx(i-1,j,k) = qx(i-1,j,k) - a2*sx
-                 qy(i-1,j,k) = qy(i-1,j,k) - a2*sy
-                 qz(i-1,j,k) = qz(i-1,j,k) - a2*sz
-              endif
-
-              if(i < ie) then
-                 ux(i,j,k) = ux(i,j,k) - ubar*sx
-                 uy(i,j,k) = uy(i,j,k) - ubar*sy
-                 uz(i,j,k) = uz(i,j,k) - ubar*sz
-
-                 vx(i,j,k) = vx(i,j,k) - vbar*sx
-                 vy(i,j,k) = vy(i,j,k) - vbar*sy
-                 vz(i,j,k) = vz(i,j,k) - vbar*sz
-
-                 wx(i,j,k) = wx(i,j,k) - wbar*sx
-                 wy(i,j,k) = wy(i,j,k) - wbar*sy
-                 wz(i,j,k) = wz(i,j,k) - wbar*sz
-
-                 qx(i,j,k) = qx(i,j,k) + a2*sx
-                 qy(i,j,k) = qy(i,j,k) + a2*sy
-                 qz(i,j,k) = qz(i,j,k) + a2*sz
-              endif
+                        ! Compute 8 times the average normal for this part of
+                        ! the control volume. The factor 8 is taken care of later
+                        ! on when the division by the volume takes place.
+
+                        sx = si(i - 1, j, k, 1) + si(i - 1, j + 1, k, 1) &
+                             + si(i - 1, j, k + 1, 1) + si(i - 1, j + 1, k + 1, 1) &
+                             + si(i, j, k, 1) + si(i, j + 1, k, 1) &
+                             + si(i, j, k + 1, 1) + si(i, j + 1, k + 1, 1)
+                        sy = si(i - 1, j, k, 2) + si(i - 1, j + 1, k, 2) &
+                             + si(i - 1, j, k + 1, 2) + si(i - 1, j + 1, k + 1, 2) &
+                             + si(i, j, k, 2) + si(i, j + 1, k, 2) &
+                             + si(i, j, k + 1, 2) + si(i, j + 1, k + 1, 2)
+                        sz = si(i - 1, j, k, 3) + si(i - 1, j + 1, k, 3) &
+                             + si(i - 1, j, k + 1, 3) + si(i - 1, j + 1, k + 1, 3) &
+                             + si(i, j, k, 3) + si(i, j + 1, k, 3) &
+                             + si(i, j, k + 1, 3) + si(i, j + 1, k + 1, 3)
+
+                        ! Compute the average velocities and speed of sound squared
+                        ! for this integration point. Node that these variables are
+                        ! stored in w(ivx), w(ivy), w(ivz) and p.
+
+                        ubar = fourth * (w(i, j, k, ivx) + w(i, j + 1, k, ivx) &
+                                         + w(i, j, k + 1, ivx) + w(i, j + 1, k + 1, ivx))
+                        vbar = fourth * (w(i, j, k, ivy) + w(i, j + 1, k, ivy) &
+                                         + w(i, j, k + 1, ivy) + w(i, j + 1, k + 1, ivy))
+                        wbar = fourth * (w(i, j, k, ivz) + w(i, j + 1, k, ivz) &
+                                         + w(i, j, k + 1, ivz) + w(i, j + 1, k + 1, ivz))
+
+                        a2 = fourth * (aa(i, j, k) + aa(i, j + 1, k) + aa(i, j, k + 1) + aa(i, j + 1, k + 1))
+
+                        ! Add the contributions to the surface integral to the node
+                        ! j-1 and substract it from the node j. For the heat flux it
+                        ! is reversed, because the negative of the gradient of the
+                        ! speed of sound must be computed.
+
+                        if (i > 1) then
+                            ux(i - 1, j, k) = ux(i - 1, j, k) + ubar * sx
+                            uy(i - 1, j, k) = uy(i - 1, j, k) + ubar * sy
+                            uz(i - 1, j, k) = uz(i - 1, j, k) + ubar * sz
+
+                            vx(i - 1, j, k) = vx(i - 1, j, k) + vbar * sx
+                            vy(i - 1, j, k) = vy(i - 1, j, k) + vbar * sy
+                            vz(i - 1, j, k) = vz(i - 1, j, k) + vbar * sz
+
+                            wx(i - 1, j, k) = wx(i - 1, j, k) + wbar * sx
+                            wy(i - 1, j, k) = wy(i - 1, j, k) + wbar * sy
+                            wz(i - 1, j, k) = wz(i - 1, j, k) + wbar * sz
+
+                            qx(i - 1, j, k) = qx(i - 1, j, k) - a2 * sx
+                            qy(i - 1, j, k) = qy(i - 1, j, k) - a2 * sy
+                            qz(i - 1, j, k) = qz(i - 1, j, k) - a2 * sz
+                        end if
+
+                        if (i < ie) then
+                            ux(i, j, k) = ux(i, j, k) - ubar * sx
+                            uy(i, j, k) = uy(i, j, k) - ubar * sy
+                            uz(i, j, k) = uz(i, j, k) - ubar * sz
+
+                            vx(i, j, k) = vx(i, j, k) - vbar * sx
+                            vy(i, j, k) = vy(i, j, k) - vbar * sy
+                            vz(i, j, k) = vz(i, j, k) - vbar * sz
+
+                            wx(i, j, k) = wx(i, j, k) - wbar * sx
+                            wy(i, j, k) = wy(i, j, k) - wbar * sy
+                            wz(i, j, k) = wz(i, j, k) - wbar * sz
+
+                            qx(i, j, k) = qx(i, j, k) + a2 * sx
+                            qy(i, j, k) = qy(i, j, k) + a2 * sy
+                            qz(i, j, k) = qz(i, j, k) + a2 * sz
+                        end if
 #ifdef TAPENADE_REVERSE
-           end do
+                    end do
 #else
-        enddo
-     enddo
-  enddo
+                end do
+            end do
+        end do
 #endif
-  ! Divide by 8 times the volume to obtain the correct gradients.
+        ! Divide by 8 times the volume to obtain the correct gradients.
 
 #ifdef TAPENADE_REVERSE
-  !$AD II-LOOP
-  do ii=0,il*jl*kl-1
-     i = mod(ii, il) + 1
-     j = mod(ii/il, jl) + 1
-     k = ii/(il*jl) + 1
+        !$AD II-LOOP
+        do ii = 0, il * jl * kl - 1
+            i = mod(ii, il) + 1
+            j = mod(ii / il, jl) + 1
+            k = ii / (il * jl) + 1
 #else
-     do k=1,kl
-        do j=1,jl
-           do i=1,il
+            do k = 1, kl
+                do j = 1, jl
+                    do i = 1, il
 #endif
-              ! Compute the inverse of 8 times the volume for this node.
+                        ! Compute the inverse of 8 times the volume for this node.
 
-              oneOverV = one/(vol(i,  j,  k) + vol(i,  j,  k+1) &
-                   +      vol(i+1,j,  k) + vol(i+1,j,  k+1) &
-                   +      vol(i,  j+1,k) + vol(i,  j+1,k+1) &
-                   +      vol(i+1,j+1,k) + vol(i+1,j+1,k+1))
+                        oneOverV = one / (vol(i, j, k) + vol(i, j, k + 1) &
+                                          + vol(i + 1, j, k) + vol(i + 1, j, k + 1) &
+                                          + vol(i, j + 1, k) + vol(i, j + 1, k + 1) &
+                                          + vol(i + 1, j + 1, k) + vol(i + 1, j + 1, k + 1))
 
-              ! Compute the correct velocity gradients and "unit" heat
-              ! fluxes. The velocity gradients are stored in ux, etc.
+                        ! Compute the correct velocity gradients and "unit" heat
+                        ! fluxes. The velocity gradients are stored in ux, etc.
 
-              ux(i,j,k) = ux(i,j,k)*oneOverV
-              uy(i,j,k) = uy(i,j,k)*oneOverV
-              uz(i,j,k) = uz(i,j,k)*oneOverV
+                        ux(i, j, k) = ux(i, j, k) * oneOverV
+                        uy(i, j, k) = uy(i, j, k) * oneOverV
+                        uz(i, j, k) = uz(i, j, k) * oneOverV
 
-              vx(i,j,k) = vx(i,j,k)*oneOverV
-              vy(i,j,k) = vy(i,j,k)*oneOverV
-              vz(i,j,k) = vz(i,j,k)*oneOverV
+                        vx(i, j, k) = vx(i, j, k) * oneOverV
+                        vy(i, j, k) = vy(i, j, k) * oneOverV
+                        vz(i, j, k) = vz(i, j, k) * oneOverV
 
-              wx(i,j,k) = wx(i,j,k)*oneOverV
-              wy(i,j,k) = wy(i,j,k)*oneOverV
-              wz(i,j,k) = wz(i,j,k)*oneOverV
+                        wx(i, j, k) = wx(i, j, k) * oneOverV
+                        wy(i, j, k) = wy(i, j, k) * oneOverV
+                        wz(i, j, k) = wz(i, j, k) * oneOverV
 
-              qx(i,j,k) = qx(i,j,k)*oneOverV
-              qy(i,j,k) = qy(i,j,k)*oneOverV
-              qz(i,j,k) = qz(i,j,k)*oneOverV
+                        qx(i, j, k) = qx(i, j, k) * oneOverV
+                        qy(i, j, k) = qy(i, j, k) * oneOverV
+                        qz(i, j, k) = qz(i, j, k) * oneOverV
 #ifdef TAPENADE_REVERSE
-           end do
+                    end do
 #else
-        enddo
-     enddo
-  enddo
+                end do
+            end do
+        end do
 #endif
-end subroutine allNodalGradients
+    end subroutine allNodalGradients
 
-  ! ----------------------------------------------------------------------
-  !                                                                      |
-  !                    No Tapenade Routine below this line               |
-  !                                                                      |
-  ! ----------------------------------------------------------------------
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 
 #ifndef  USE_TAPENADE
 
-  subroutine fixAllNodalGradientsFromAD
-
-    use constants
-    use blockPointers, only : il, jl, kl, vol, ux, uy, uz, vx, vy, vz, &
-         wx, wy, wz, qx, qy, qz, vol
-    implicit none
-
-    ! WorkingVariables
-    integer(kind=intType) :: i, j, k
-    real(kind=realType) :: oneOverV
-
-    ! So the all nodal gradients doesnt' perform the final
-    ! scalaing by the volume since it isn't necessary for the
-    ! derivative. We need to fix that here:
-
-    ! Divide by 8 times the volume to obtain the correct gradients.
-    do k=1,kl
-       do j=1,jl
-          do i=1,il
-
-             ! Compute the inverse of 8 times the volume for this node.
-
-             oneOverV = one/(vol(i,  j,  k) + vol(i,  j,  k+1) &
-                  +      vol(i+1,j,  k) + vol(i+1,j,  k+1) &
-                  +      vol(i,  j+1,k) + vol(i,  j+1,k+1) &
-                  +      vol(i+1,j+1,k) + vol(i+1,j+1,k+1))
-
-             ! Compute the correct velocity gradients and "unit" heat
-             ! fluxes. The velocity gradients are stored in ux, etc.
-
-             ux(i,j,k) = ux(i,j,k)*oneOverV
-             uy(i,j,k) = uy(i,j,k)*oneOverV
-             uz(i,j,k) = uz(i,j,k)*oneOverV
-
-             vx(i,j,k) = vx(i,j,k)*oneOverV
-             vy(i,j,k) = vy(i,j,k)*oneOverV
-             vz(i,j,k) = vz(i,j,k)*oneOverV
-
-             wx(i,j,k) = wx(i,j,k)*oneOverV
-             wy(i,j,k) = wy(i,j,k)*oneOverV
-             wz(i,j,k) = wz(i,j,k)*oneOverV
+    subroutine fixAllNodalGradientsFromAD
 
-             qx(i,j,k) = qx(i,j,k)*oneOverV
-             qy(i,j,k) = qy(i,j,k)*oneOverV
-             qz(i,j,k) = qz(i,j,k)*oneOverV
-          enddo
-       enddo
-    enddo
-  end subroutine fixAllNodalGradientsFromAD
+        use constants
+        use blockPointers, only: il, jl, kl, vol, ux, uy, uz, vx, vy, vz, &
+                                 wx, wy, wz, qx, qy, qz, vol
+        implicit none
 
+        ! WorkingVariables
+        integer(kind=intType) :: i, j, k
+        real(kind=realType) :: oneOverV
 
-  subroutine computeEtotCellCpfit(i, j, k, scale, correctForK)
-    !
-    !       ComputeEtotCellCpfit will compute the total energy for the
-    !       given cell of the block given by the current pointers with the
-    !       cp temperature curve fit model.
-    !
-    use constants
-    use cpCurveFits
-    use blockPointers, only : gamma, w, P
-    use flowVarRefState, only : Rgas, TRef
-    implicit none
-    !
-    !      Local parameter.
-    !
-    real(kind=realType), parameter :: twoThird = two*third
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: i, j, k
-    real(kind=realType),   intent(in) :: scale
-    logical,                intent(in) :: correctForK
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, ii, start
+        ! So the all nodal gradients doesnt' perform the final
+        ! scalaing by the volume since it isn't necessary for the
+        ! derivative. We need to fix that here:
 
-    real(kind=realType) :: pp, t, t2, cv, eint
+        ! Divide by 8 times the volume to obtain the correct gradients.
+        do k = 1, kl
+            do j = 1, jl
+                do i = 1, il
 
-    ! Compute the dimensional temperature.
+                    ! Compute the inverse of 8 times the volume for this node.
 
-    pp = p(i,j,k)
-    if( correctForK ) pp = pp - twoThird &
-         * w(i,j,k,irho)*w(i,j,k,itu1)
-    t = Tref*pp/(RGas*w(i,j,k,irho))
+                    oneOverV = one / (vol(i, j, k) + vol(i, j, k + 1) &
+                                      + vol(i + 1, j, k) + vol(i + 1, j, k + 1) &
+                                      + vol(i, j + 1, k) + vol(i, j + 1, k + 1) &
+                                      + vol(i + 1, j + 1, k) + vol(i + 1, j + 1, k + 1))
 
-    ! Determine the case we are having here.
+                    ! Compute the correct velocity gradients and "unit" heat
+                    ! fluxes. The velocity gradients are stored in ux, etc.
 
-    if(t <= cpTrange(0)) then
+                    ux(i, j, k) = ux(i, j, k) * oneOverV
+                    uy(i, j, k) = uy(i, j, k) * oneOverV
+                    uz(i, j, k) = uz(i, j, k) * oneOverV
 
-       ! Temperature is less than the smallest temperature
-       ! in the curve fits. Use extrapolation using
-       ! constant cv.
+                    vx(i, j, k) = vx(i, j, k) * oneOverV
+                    vy(i, j, k) = vy(i, j, k) * oneOverV
+                    vz(i, j, k) = vz(i, j, k) * oneOverV
 
-       eint = scale*(cpEint(0) + cv0*(t - cpTrange(0)))
-       gamma(i,j,k) = (cv0 + one)/cv0
+                    wx(i, j, k) = wx(i, j, k) * oneOverV
+                    wy(i, j, k) = wy(i, j, k) * oneOverV
+                    wz(i, j, k) = wz(i, j, k) * oneOverV
 
-    else if(t >= cpTrange(cpNparts)) then
-
-       ! Temperature is larger than the largest temperature
-       ! in the curve fits. Use extrapolation using
-       ! constant cv.
-
-       eint = scale*(cpEint(cpNparts) &
-            +        cvn*(t - cpTrange(cpNparts)))
-
-       gamma(i,j,k) = (cvn + one)/cvn
-
-    else
-
-       ! Temperature is in the curve fit range.
-       ! First find the valid range.
-
-       ii    = cpNparts
-       start = 1
-       interval: do
-
-          ! Next guess for the interval.
-
-          nn = start + ii/2
-
-          ! Determine the situation we are having here.
-
-          if(t > cpTrange(nn)) then
-
-             ! Temperature is larger than the upper boundary of
-             ! the current interval. Update the lower boundary.
-
-             start = nn + 1
-             ii    = ii - 1
-
-          else if(t >= cpTrange(nn-1)) then
-
-             ! This is the correct range. Exit the do-loop.
-
-             exit
-
-          endif
+                    qx(i, j, k) = qx(i, j, k) * oneOverV
+                    qy(i, j, k) = qy(i, j, k) * oneOverV
+                    qz(i, j, k) = qz(i, j, k) * oneOverV
+                end do
+            end do
+        end do
+    end subroutine fixAllNodalGradientsFromAD
+
+    subroutine computeEtotCellCpfit(i, j, k, scale, correctForK)
+        !
+        !       ComputeEtotCellCpfit will compute the total energy for the
+        !       given cell of the block given by the current pointers with the
+        !       cp temperature curve fit model.
+        !
+        use constants
+        use cpCurveFits
+        use blockPointers, only: gamma, w, P
+        use flowVarRefState, only: Rgas, TRef
+        implicit none
+        !
+        !      Local parameter.
+        !
+        real(kind=realType), parameter :: twoThird = two * third
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: i, j, k
+        real(kind=realType), intent(in) :: scale
+        logical, intent(in) :: correctForK
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, ii, start
+
+        real(kind=realType) :: pp, t, t2, cv, eint
+
+        ! Compute the dimensional temperature.
+
+        pp = p(i, j, k)
+        if (correctForK) pp = pp - twoThird &
+                              * w(i, j, k, irho) * w(i, j, k, itu1)
+        t = Tref * pp / (RGas * w(i, j, k, irho))
+
+        ! Determine the case we are having here.
+
+        if (t <= cpTrange(0)) then
+
+            ! Temperature is less than the smallest temperature
+            ! in the curve fits. Use extrapolation using
+            ! constant cv.
+
+            eint = scale * (cpEint(0) + cv0 * (t - cpTrange(0)))
+            gamma(i, j, k) = (cv0 + one) / cv0
+
+        else if (t >= cpTrange(cpNparts)) then
+
+            ! Temperature is larger than the largest temperature
+            ! in the curve fits. Use extrapolation using
+            ! constant cv.
+
+            eint = scale * (cpEint(cpNparts) &
+                            + cvn * (t - cpTrange(cpNparts)))
+
+            gamma(i, j, k) = (cvn + one) / cvn
+
+        else
+
+            ! Temperature is in the curve fit range.
+            ! First find the valid range.
+
+            ii = cpNparts
+            start = 1
+            interval: do
 
-          ! Modify ii for the next branch to search.
+                ! Next guess for the interval.
 
-          ii = ii/2
+                nn = start + ii / 2
 
-       enddo interval
+                ! Determine the situation we are having here.
 
-       ! Nn contains the correct curve fit interval.
-       ! Integrate cv to compute eint.
+                if (t > cpTrange(nn)) then
 
-       eint = cpTempFit(nn)%eint0 - t
-       cv   = -one
+                    ! Temperature is larger than the upper boundary of
+                    ! the current interval. Update the lower boundary.
 
-       do ii=1,cpTempFit(nn)%nterm
-          t2 = t**cpTempFit(nn)%exponents(ii)
-          cv = cv + cpTempFit(nn)%constants(ii)*t2
+                    start = nn + 1
+                    ii = ii - 1
 
-          if(cpTempFit(nn)%exponents(ii) == -1) then
-             eint = eint + cpTempFit(nn)%constants(ii)*log(t)
-          else
-             mm   = cpTempFit(nn)%exponents(ii) + 1
-             t2   = t*t2
-             eint = eint + cpTempFit(nn)%constants(ii)*t2/mm
-          endif
-       enddo
+                else if (t >= cpTrange(nn - 1)) then
 
-       eint = scale*eint
-       gamma(i,j,k) = (cv + one)/cv
+                    ! This is the correct range. Exit the do-loop.
 
-    endif
+                    exit
 
-    ! Compute the total energy per unit volume.
+                end if
 
-    w(i,j,k,irhoE) = w(i,j,k,irho)*(eint &
-         +                half*(w(i,j,k,ivx)**2 &
-         +                      w(i,j,k,ivy)**2 &
-         +                      w(i,j,k,ivz)**2))
+                ! Modify ii for the next branch to search.
 
-    if( correctForK )                 &
-         w(i,j,k,irhoE) = w(i,j,k,irhoE) &
-         + w(i,j,k,irho)*w(i,j,k,itu1)
+                ii = ii / 2
 
-  end subroutine computeEtotCellCpfit
+            end do interval
 
+            ! Nn contains the correct curve fit interval.
+            ! Integrate cv to compute eint.
 
-  subroutine updateGamma
-    !
-    !       This is a utility routine to update the gamma variable from
-    !       from gammaConstant if gammaConstant has changed.
-    !
-    use constants
-    use blockPointers, only : nDom, gamma
-    use inputtimespectral, only : nTimeIntervalsSpectral
-    use inputPhysics, only : gammaConstant
-    use utils, only : setPointers
-    implicit none
+            eint = cpTempFit(nn)%eint0 - t
+            cv = -one
 
-    integer :: nn, sps
+            do ii = 1, cpTempFit(nn)%nterm
+                t2 = t**cpTempFit(nn)%exponents(ii)
+                cv = cv + cpTempFit(nn)%constants(ii) * t2
 
-    do sps=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-          call setPointers(nn, 1_intType, sps)
-          gamma = gammaConstant
-       end do
-    end do
-  end subroutine updateGamma
+                if (cpTempFit(nn)%exponents(ii) == -1) then
+                    eint = eint + cpTempFit(nn)%constants(ii) * log(t)
+                else
+                    mm = cpTempFit(nn)%exponents(ii) + 1
+                    t2 = t * t2
+                    eint = eint + cpTempFit(nn)%constants(ii) * t2 / mm
+                end if
+            end do
+
+            eint = scale * eint
+            gamma(i, j, k) = (cv + one) / cv
+
+        end if
+
+        ! Compute the total energy per unit volume.
+
+        w(i, j, k, irhoE) = w(i, j, k, irho) * (eint &
+                                                + half * (w(i, j, k, ivx)**2 &
+                                                          + w(i, j, k, ivy)**2 &
+                                                          + w(i, j, k, ivz)**2))
+
+        if (correctForK) &
+            w(i, j, k, irhoE) = w(i, j, k, irhoE) &
+                                + w(i, j, k, irho) * w(i, j, k, itu1)
+
+    end subroutine computeEtotCellCpfit
+
+    subroutine updateGamma
+        !
+        !       This is a utility routine to update the gamma variable from
+        !       from gammaConstant if gammaConstant has changed.
+        !
+        use constants
+        use blockPointers, only: nDom, gamma
+        use inputtimespectral, only: nTimeIntervalsSpectral
+        use inputPhysics, only: gammaConstant
+        use utils, only: setPointers
+        implicit none
+
+        integer :: nn, sps
+
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, 1_intType, sps)
+                gamma = gammaConstant
+            end do
+        end do
+    end subroutine updateGamma
 #endif
 end module flowUtils
diff --git a/src/utils/genericISNAN.F90 b/src/utils/genericISNAN.F90
index 91ff3e384..2c31a561e 100644
--- a/src/utils/genericISNAN.F90
+++ b/src/utils/genericISNAN.F90
@@ -1,47 +1,47 @@
 module genericISNAN
     implicit none
     interface myIsNAN
-     module procedure myIsNAN_r
-     module procedure myIsNAN_c
+        module procedure myIsNAN_r
+        module procedure myIsNAN_c
     end interface
 
-    contains
+contains
 
     logical function myIsNAN_r(val)
-    !
-    !       myIsNAN_r determines whether or not the given real value is a NAN or INF and
-    !       returns the according logical.
-    !
-    use precision
-    use, intrinsic :: ieee_arithmetic, only : ieee_is_nan, ieee_is_finite
-    implicit none
-    !
-    !      Function arguments.
-    !
-    real(kind=alwaysRealType), intent(in) :: val
-
-    ! Check if NAN or INF
-    myIsNAN_r = ieee_is_nan(val) .or. .not. ieee_is_finite(val)
+        !
+        !       myIsNAN_r determines whether or not the given real value is a NAN or INF and
+        !       returns the according logical.
+        !
+        use precision
+        use, intrinsic :: ieee_arithmetic, only: ieee_is_nan, ieee_is_finite
+        implicit none
+        !
+        !      Function arguments.
+        !
+        real(kind=alwaysRealType), intent(in) :: val
+
+        ! Check if NAN or INF
+        myIsNAN_r = ieee_is_nan(val) .or. .not. ieee_is_finite(val)
 
     end function myIsNAN_r
 
     logical function myIsNAN_c(val)
-    !
-    !       myIsNAN_c determines whether or not the given complex value contains NAN of INF and
-    !       returns the according logical.
-    !
-    use precision
-    use, intrinsic :: ieee_arithmetic, only : ieee_is_nan, ieee_is_finite
-    implicit none
-    !
-    !      Function arguments.
-    !
-    complex(kind=realType), intent(in) :: val
-
-    ! Check if either real or imag part is NAN or INF
-    myIsNAN_c = ieee_is_nan(real(val)) .or. .not. ieee_is_finite(real(val))
-    myIsNAN_c = myIsNAN_c .or. ieee_is_nan(aimag(val)) .or. .not. ieee_is_finite(aimag(val))
+        !
+        !       myIsNAN_c determines whether or not the given complex value contains NAN of INF and
+        !       returns the according logical.
+        !
+        use precision
+        use, intrinsic :: ieee_arithmetic, only: ieee_is_nan, ieee_is_finite
+        implicit none
+        !
+        !      Function arguments.
+        !
+        complex(kind=realType), intent(in) :: val
+
+        ! Check if either real or imag part is NAN or INF
+        myIsNAN_c = ieee_is_nan(real(val)) .or. .not. ieee_is_finite(real(val))
+        myIsNAN_c = myIsNAN_c .or. ieee_is_nan(aimag(val)) .or. .not. ieee_is_finite(aimag(val))
 
     end function myIsNAN_c
 
-end module genericISNAN
\ No newline at end of file
+end module genericISNAN
diff --git a/src/utils/haloExchange.F90 b/src/utils/haloExchange.F90
index 05803422f..275260815 100644
--- a/src/utils/haloExchange.F90
+++ b/src/utils/haloExchange.F90
@@ -2,3767 +2,3757 @@ module haloExchange
 
 contains
 
-  subroutine whalo1(level, start, end, commPressure, commGamma, &
-       commViscous)
-    !
-    !       whalo1 exchanges all the 1st level internal halo's for the
-    !       cell centered variables.
-    !
-    use constants
-    use blockPointers
-    use communication
-    use flowVarRefState, only : viscous, eddyModel
-    use inputPhysics
-    use inputTimeSpectral, only : ntimeIntervalsSpectral
-    use iteration
-    use utils, only : setPointers, getCorrectForK
-    use flowUtils, only : computeEtotBlock
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, start, end
-    logical, intent(in) :: commPressure, commGamma, commViscous
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, ll
-
-    logical :: correctForK, commLamVis, commEddyVis, commVarGamma
-
-    ! Set the logicals whether or not to communicate the viscosities.
-
-    commLamVis = .false.
-    if(viscous .and. commViscous) commLamVis = .true.
-
-    commEddyVis = .false.
-    if(eddyModel .and. commViscous) commEddyVis = .true.
-
-    ! Set the logical whether or not to communicate gamma.
-
-    commVarGamma = .false.
-    if(commGamma .and. (cpModel == cpTempCurveFits)) &
-         commVarGamma = .true.
-
-    ! Exchange the 1 to 1 matching 1st level cell halo's.
-
-    call whalo1to1(level, start, end, commPressure, commVarGamma, &
-         commLamVis, commEddyVis, commPatternCell_1st,  &
-         internalCell_1st)
-
-    ! Exchange the overset cells
-    call wOverset(level, start, end, commPressure, commVarGamma, &
-         commLamVis, commEddyVis, commPatternOverset, internalOverset)
-
-    ! Average any overset orphans.
-
-    do ll=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-          call setPointers(nn,level,ll)
-          call orphanAverage(start, end, commPressure, commGamma, &
-               commLamVis, commEddyVis)
-       end do
-    end do
-
-    ! If both the pressure and the total energy has been communicated
-    ! compute the energy again. The reason is that both values are
-    ! interpolated and consequently the values are not consistent.
-    ! The energy depends quadratically on the velocity.
-
-    bothPAndE: if(commPressure .and. start <= irhoE .and. &
-         end >= irhoE) then
-
-       ! First determine whether or not the total energy must be
-       ! corrected for the presence of the turbulent kinetic energy.
-       correctForK = getCorrectForK()
-
-       ! Loop over the blocks to find the sliding mesh subfaces.
-       ! Use is made of the fact the boundary conditions are identical
-       ! for all spectral solutions. So that loop can be inside the
-       ! test for the sliding mesh subface.
-
-       domains: do nn=1,nDom
-          do mm=1,flowDoms(nn,level,1)%nBocos
-             if(flowDoms(nn,level,1)%BCType(mm) == slidingInterface) then
-
-                ! Loop over the number of spectral solutions.
-
-                do ll=1,nTimeIntervalsSpectral
-
-                   ! Set the pointers for this block and compute the energy
-                   ! for the halo cells of this sliding interface subface.
-
-                   call setPointers(nn,level,ll)
-                   call computeEtotBlock(icBeg(mm), icEnd(mm), &
-                        jcBeg(mm), jcEnd(mm), &
-                        kcBeg(mm), kcEnd(mm), correctForK)
-                enddo
-             endif
-          enddo
-
-       enddo domains
-
-    endif bothPAndE
-
-  end subroutine whalo1
-
-  subroutine whalo2(level, start, end, commPressure, commGamma, &
-       commViscous)
-    !
-    !       whalo2 exchanges all the 2nd level internal halo's for the
-    !       cell centered variables.
-    !
-    use constants
-    use blockPointers
-    use communication
-    use flowVarRefState
-    use inputPhysics
-    use inputTimeSpectral
-    use iteration
-    use utils, only : setPointers, getCorrectForK
-    use flowUtils, only : computeEtotBlock
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, start, end
-    logical, intent(in) :: commPressure, commGamma, commViscous
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, ll
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    logical :: correctForK, commLamVis, commEddyVis, commVarGamma
-
-    ! Set the logicals whether or not to communicate the viscosities.
-
-    commLamVis = .false.
-    if(viscous .and. commViscous) commLamVis = .true.
+    subroutine whalo1(level, start, end, commPressure, commGamma, &
+                      commViscous)
+        !
+        !       whalo1 exchanges all the 1st level internal halo's for the
+        !       cell centered variables.
+        !
+        use constants
+        use blockPointers
+        use communication
+        use flowVarRefState, only: viscous, eddyModel
+        use inputPhysics
+        use inputTimeSpectral, only: ntimeIntervalsSpectral
+        use iteration
+        use utils, only: setPointers, getCorrectForK
+        use flowUtils, only: computeEtotBlock
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, start, end
+        logical, intent(in) :: commPressure, commGamma, commViscous
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, ll
+
+        logical :: correctForK, commLamVis, commEddyVis, commVarGamma
+
+        ! Set the logicals whether or not to communicate the viscosities.
+
+        commLamVis = .false.
+        if (viscous .and. commViscous) commLamVis = .true.
+
+        commEddyVis = .false.
+        if (eddyModel .and. commViscous) commEddyVis = .true.
+
+        ! Set the logical whether or not to communicate gamma.
+
+        commVarGamma = .false.
+        if (commGamma .and. (cpModel == cpTempCurveFits)) &
+            commVarGamma = .true.
+
+        ! Exchange the 1 to 1 matching 1st level cell halo's.
+
+        call whalo1to1(level, start, end, commPressure, commVarGamma, &
+                       commLamVis, commEddyVis, commPatternCell_1st, &
+                       internalCell_1st)
+
+        ! Exchange the overset cells
+        call wOverset(level, start, end, commPressure, commVarGamma, &
+                      commLamVis, commEddyVis, commPatternOverset, internalOverset)
+
+        ! Average any overset orphans.
+
+        do ll = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, level, ll)
+                call orphanAverage(start, end, commPressure, commGamma, &
+                                   commLamVis, commEddyVis)
+            end do
+        end do
+
+        ! If both the pressure and the total energy has been communicated
+        ! compute the energy again. The reason is that both values are
+        ! interpolated and consequently the values are not consistent.
+        ! The energy depends quadratically on the velocity.
+
+        bothPAndE: if (commPressure .and. start <= irhoE .and. &
+                       end >= irhoE) then
+
+            ! First determine whether or not the total energy must be
+            ! corrected for the presence of the turbulent kinetic energy.
+            correctForK = getCorrectForK()
+
+            ! Loop over the blocks to find the sliding mesh subfaces.
+            ! Use is made of the fact the boundary conditions are identical
+            ! for all spectral solutions. So that loop can be inside the
+            ! test for the sliding mesh subface.
+
+            domains: do nn = 1, nDom
+                do mm = 1, flowDoms(nn, level, 1)%nBocos
+                    if (flowDoms(nn, level, 1)%BCType(mm) == slidingInterface) then
+
+                        ! Loop over the number of spectral solutions.
+
+                        do ll = 1, nTimeIntervalsSpectral
+
+                            ! Set the pointers for this block and compute the energy
+                            ! for the halo cells of this sliding interface subface.
+
+                            call setPointers(nn, level, ll)
+                            call computeEtotBlock(icBeg(mm), icEnd(mm), &
+                                                  jcBeg(mm), jcEnd(mm), &
+                                                  kcBeg(mm), kcEnd(mm), correctForK)
+                        end do
+                    end if
+                end do
 
-    commEddyVis = .false.
-    if(eddyModel .and. commViscous) commEddyVis = .true.
+            end do domains
+
+        end if bothPAndE
+
+    end subroutine whalo1
+
+    subroutine whalo2(level, start, end, commPressure, commGamma, &
+                      commViscous)
+        !
+        !       whalo2 exchanges all the 2nd level internal halo's for the
+        !       cell centered variables.
+        !
+        use constants
+        use blockPointers
+        use communication
+        use flowVarRefState
+        use inputPhysics
+        use inputTimeSpectral
+        use iteration
+        use utils, only: setPointers, getCorrectForK
+        use flowUtils, only: computeEtotBlock
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, start, end
+        logical, intent(in) :: commPressure, commGamma, commViscous
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, ll
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        logical :: correctForK, commLamVis, commEddyVis, commVarGamma
+
+        ! Set the logicals whether or not to communicate the viscosities.
+
+        commLamVis = .false.
+        if (viscous .and. commViscous) commLamVis = .true.
+
+        commEddyVis = .false.
+        if (eddyModel .and. commViscous) commEddyVis = .true.
+
+        ! Set the logical whether or not to communicate gamma.
+
+        commVarGamma = .false.
+        if (commGamma .and. (cpModel == cpTempCurveFits)) &
+            commVarGamma = .true.
+
+        ! Exchange the 1 to 1 matching 2nd level cell halo's.
+
+        call whalo1to1(level, start, end, commPressure, commVarGamma, &
+                       commLamVis, commEddyVis, commPatternCell_2nd, &
+                       internalCell_2nd)
+
+        ! Exchange the overset cells
+        call wOverset(level, start, end, commPressure, commVarGamma, &
+                      commLamVis, commEddyVis, commPatternOverset, internalOverset)
+
+        ! Average any overset orphans.
+
+        do ll = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, level, ll)
+                call orphanAverage(start, end, commPressure, commGamma, &
+                                   commLamVis, commEddyVis)
+            end do
+        end do
+
+        ! If both the pressure and the total energy has been communicated
+        ! compute the energy again. The reason is that both values are
+        ! interpolated and consequently the values are not consistent.
+        ! The energy depends quadratically on the velocity.
+
+        bothPAndE: if (commPressure .and. start <= irhoE .and. &
+                       end >= irhoE) then
+
+            ! First determine whether or not the total energy must be
+            ! corrected for the presence of the turbulent kinetic energy.
+
+            correctForK = getCorrectForK()
+
+            domains: do nn = 1, nDom
+
+                ! Treat the overset blocks. Since we don't have the logic
+                ! setup here correctly to only update the overset cells,
+                ! just do the whole block, for every block
+                do ll = 1, nTimeIntervalsSpectral
+                    call setPointers(nn, level, ll)
+                    call computeETotBlock(2, il, 2, jl, 2, kl, correctForK)
+                end do
 
-    ! Set the logical whether or not to communicate gamma.
-
-    commVarGamma = .false.
-    if(commGamma .and. (cpModel == cpTempCurveFits)) &
-         commVarGamma = .true.
+            end do domains
 
-    ! Exchange the 1 to 1 matching 2nd level cell halo's.
-
-    call whalo1to1(level, start, end, commPressure, commVarGamma, &
-         commLamVis, commEddyVis, commPatternCell_2nd,  &
-         internalCell_2nd)
-
-    ! Exchange the overset cells
-    call wOverset(level, start, end, commPressure, commVarGamma, &
-         commLamVis, commEddyVis, commPatternOverset, internalOverset)
-
-    ! Average any overset orphans.
+        end if bothPAndE
 
-    do ll=1,nTimeIntervalsSpectral
-       do nn=1,nDom
-          call setPointers(nn,level,ll)
-          call orphanAverage(start, end, commPressure, commGamma, &
-               commLamVis, commEddyVis)
-       end do
-    end do
+    end subroutine whalo2
 
-    ! If both the pressure and the total energy has been communicated
-    ! compute the energy again. The reason is that both values are
-    ! interpolated and consequently the values are not consistent.
-    ! The energy depends quadratically on the velocity.
+    subroutine orphanAverage(wstart, wend, calcPressure, calcGamma, &
+                             calcLamVis, calcEddyVis)
+        !
+        !       orphanAverage uses the neighboring cells of an overset orphan
+        !       to set the flow state for the orphan cell by a simple average.
+        !       This routine operates on the block given by the block pointers
+        !       so it is assumed they are set.
+        !
+        use constants
+        use blockPointers
+        use flowVarRefState
+        use inputPhysics
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: wstart, wend
 
-    bothPAndE: if(commPressure .and. start <= irhoE .and. &
-         end >= irhoE) then
+        logical, intent(in) :: calcPressure, calcGamma, calcLamVis
+        logical, intent(in) :: calcEddyVis
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: oi, oj, ok, ni, nj, nk, i, l, m, n, nAvg
 
-       ! First determine whether or not the total energy must be
-       ! corrected for the presence of the turbulent kinetic energy.
+        integer(kind=intType), dimension(3) :: del
 
-       correctForK = getCorrectForK()
+        real(kind=realType) :: nAvgReal
 
-       domains: do nn=1,nDom
+        ! Return immediately if there are no orphans for this block.
 
-          ! Treat the overset blocks. Since we don't have the logic
-          ! setup here correctly to only update the overset cells,
-          ! just do the whole block, for every block
-          do ll=1, nTimeIntervalsSpectral
-             call setPointers(nn, level, ll)
-             call computeETotBlock(2, il, 2, jl, 2, kl, correctForK)
-          end do
+        if (nOrphans == 0) return
 
+        ! Loop over the number of orphans.
 
-       enddo domains
+        orphanLoop: do n = 1, nOrphans
 
-    endif bothPAndE
+            ! Store the orphan indices easier.
 
-  end subroutine whalo2
+            oi = orphans(1, n)
+            oj = orphans(2, n)
+            ok = orphans(3, n)
 
-  subroutine orphanAverage(wstart, wend, calcPressure, calcGamma, &
-       calcLamVis, calcEddyVis)
-    !
-    !       orphanAverage uses the neighboring cells of an overset orphan
-    !       to set the flow state for the orphan cell by a simple average.
-    !       This routine operates on the block given by the block pointers
-    !       so it is assumed they are set.
-    !
-    use constants
-    use blockPointers
-    use flowVarRefState
-    use inputPhysics
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: wstart, wend
+            ! Initialize the number of neighbors used to 0 and also set
+            ! the flow variables to zero such that an average can be
+            ! accumulated below.
 
-    logical, intent(in) :: calcPressure, calcGamma, calcLamVis
-    logical, intent(in) :: calcEddyVis
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: oi, oj, ok, ni, nj, nk, i, l, m, n, nAvg
+            nAvg = 0
 
-    integer(kind=intType), dimension(3) :: del
+            do l = wstart, wend
+                w(oi, oj, ok, l) = zero
+            end do
 
-    real(kind=realType) :: nAvgReal
+            if (calcPressure) p(oi, oj, ok) = zero
+            if (calcGamma) gamma(oi, oj, ok) = zero
+            if (calcLamVis) rlv(oi, oj, ok) = zero
+            if (calcEddyVis) rev(oi, oj, ok) = zero
 
-    ! Return immediately if there are no orphans for this block.
+            ! Loop over the 3 coordinate directions, and for both the
+            ! positive and negative direction set the delta vector to be a
+            ! unit vector in that direction.
 
-    if (nOrphans == 0) return
+            directionLoop: do m = 1, 3
+                plusMinusLoop: do i = -1, 1, 2
 
-    ! Loop over the number of orphans.
+                    del = 0
+                    del(m) = i
 
-    orphanLoop: do n = 1,nOrphans
+                    ! Compute the neighbor indices and skip if it is outside the
+                    ! boundaries of the block.
 
-       ! Store the orphan indices easier.
+                    ni = oi + del(1)
+                    nj = oj + del(2)
+                    nk = ok + del(3)
 
-       oi = orphans(1, n)
-       oj = orphans(2, n)
-       ok = orphans(3, n)
+                    if (ni < 0 .or. ni > ib .or. &
+                        nj < 0 .or. nj > jb .or. &
+                        nk < 0 .or. nk > kb) cycle
 
-       ! Initialize the number of neighbors used to 0 and also set
-       ! the flow variables to zero such that an average can be
-       ! accumulated below.
+                    ! If the neighboring iblank value indicates a cell that is
+                    ! part of either the field, fringe, or boundary condition,
+                    ! then use its flow state in the average.
 
-       nAvg = 0
+                    if (iblank(ni, nj, nk) == 1) then
 
-       do l=wstart, wend
-          w(oi,oj,ok,l) = zero
-       end do
+                        ! Update the number of neighbors used in the average and
+                        ! compute the flow variables for the given range.
 
-       if (calcPressure) p(oi,oj,ok)     = zero
-       if (calcGamma)    gamma(oi,oj,ok) = zero
-       if (calcLamVis)   rlv(oi,oj,ok)   = zero
-       if (calcEddyVis)  rev(oi,oj,ok)   = zero
+                        nAvg = nAvg + 1
 
-       ! Loop over the 3 coordinate directions, and for both the
-       ! positive and negative direction set the delta vector to be a
-       ! unit vector in that direction.
+                        do l = wstart, wend
+                            w(oi, oj, ok, l) = w(oi, oj, ok, l) + w(ni, nj, nk, l)
+                        end do
 
-       directionLoop: do m = 1,3
-          plusMinusLoop: do i = -1,1,2
+                        ! Check if the pressure, specific heat ratio, laminar
+                        ! viscosity, and/or eddy viscosity needs to be computed.
 
-             del    = 0
-             del(m) = i
+                        if (calcPressure) &
+                            p(oi, oj, ok) = p(oi, oj, ok) + p(ni, nj, nk)
+                        if (calcGamma) &
+                            gamma(oi, oj, ok) = gamma(oi, oj, ok) + gamma(ni, nj, nk)
+                        if (calcLamVis) &
+                            rlv(oi, oj, ok) = rlv(oi, oj, ok) + rlv(ni, nj, nk)
+                        if (calcEddyVis) &
+                            rev(oi, oj, ok) = rev(oi, oj, ok) + rev(ni, nj, nk)
 
-             ! Compute the neighbor indices and skip if it is outside the
-             ! boundaries of the block.
+                    end if
 
-             ni = oi + del(1)
-             nj = oj + del(2)
-             nk = ok + del(3)
+                end do plusMinusLoop
+            end do directionLoop
 
-             if (ni < 0 .or. ni > ib .or. &
-                  nj < 0 .or. nj > jb .or. &
-                  nk < 0 .or. nk > kb) cycle
+            ! Check to make sure that at least 1 suitable neighbeor was
+            ! found to use in the average.
 
-             ! If the neighboring iblank value indicates a cell that is
-             ! part of either the field, fringe, or boundary condition,
-             ! then use its flow state in the average.
+            checkNoNeighbors: if (nAvg > 0) then
 
-             if (iblank(ni,nj,nk) == 1) then
+                ! Divide each of the variables being computed by the number
+                ! of neighbors used in the average.
 
-                ! Update the number of neighbors used in the average and
-                ! compute the flow variables for the given range.
+                nAvgReal = real(nAvg, realType)
 
-                nAvg = nAvg + 1
+                ! Average the flow variables for the given range.
 
-                do l=wstart,wend
-                   w(oi,oj,ok,l) = w(oi,oj,ok,l) + w(ni,nj,nk,l)
+                do l = wstart, wend
+                    w(oi, oj, ok, l) = w(oi, oj, ok, l) / nAvgReal
                 end do
 
                 ! Check if the pressure, specific heat ratio, laminar
-                ! viscosity, and/or eddy viscosity needs to be computed.
-
-                if (calcPressure) &
-                     p(oi,oj,ok) = p(oi,oj,ok) + p(ni,nj,nk)
-                if (calcGamma) &
-                     gamma(oi,oj,ok) = gamma(oi,oj,ok) + gamma(ni,nj,nk)
-                if (calcLamVis) &
-                     rlv(oi,oj,ok) = rlv(oi,oj,ok) + rlv(ni,nj,nk)
-                if (calcEddyVis) &
-                     rev(oi,oj,ok) = rev(oi,oj,ok) + rev(ni,nj,nk)
-
-             end if
-
-          end do plusMinusLoop
-       end do directionLoop
-
-       ! Check to make sure that at least 1 suitable neighbeor was
-       ! found to use in the average.
+                ! viscosity, and/or eddy viscosity needs to be averaged.
 
-       checkNoNeighbors: if (nAvg > 0) then
+                if (calcPressure) p(oi, oj, ok) = p(oi, oj, ok) / nAvgReal
+                if (calcGamma) gamma(oi, oj, ok) = gamma(oi, oj, ok) / nAvgReal
+                if (calcLamVis) rlv(oi, oj, ok) = rlv(oi, oj, ok) / nAvgReal
+                if (calcEddyVis) rev(oi, oj, ok) = rev(oi, oj, ok) / nAvgReal
 
-          ! Divide each of the variables being computed by the number
-          ! of neighbors used in the average.
+                else checkNoNeighbors
 
-          nAvgReal = real(nAvg, realType)
+                ! No suitable neighbors were found in order to compute an
+                ! average. Set the variables back to the the freestream.
 
-          ! Average the flow variables for the given range.
-
-          do l=wstart,wend
-             w(oi,oj,ok,l) = w(oi,oj,ok,l)/nAvgReal
-          end do
+                do l = wstart, wend
+                    w(oi, oj, ok, l) = wInf(l)
+                end do
 
-          ! Check if the pressure, specific heat ratio, laminar
-          ! viscosity, and/or eddy viscosity needs to be averaged.
+                if (calcPressure) p(oi, oj, ok) = pInfCorr
+                if (calcGamma) gamma(oi, oj, ok) = gammaInf
+                if (calcLamVis) rlv(oi, oj, ok) = muInf
+                if (calcEddyVis) rev(oi, oj, ok) = eddyVisInfRatio * muInf
+
+            end if checkNoNeighbors
+
+        end do orphanLoop
+
+    end subroutine orphanAverage
+
+    subroutine setCommPointers(start, end, commPressure, commVarGamma, commLamVis, &
+                               commEddyVis, level, sps, derivPointers, nVar, varOffset)
+
+        ! Generic routine for setting pointers to the communication
+        ! variables. Can also set pointers to derivatve values if derivPts is True.
+
+        use constants
+        use block, only: fLowDoms, blockType, flowDomsd, nDom
+        implicit none
+
+        ! Input
+        integer(kind=intType), intent(in) :: start, end, level, sps
+        logical, intent(in) :: commPressure, commVarGamma, commLamVis, commEddyVis
+        logical, intent(in) :: derivPointers
+        integer(kind=intType), intent(in) :: varOffset
+
+        ! Output
+        integer(kind=intType), intent(out) :: nVar
+
+        ! Working:
+        integer(kind=intType) :: nn, k
+        type(blockType), pointer :: blk, blk1, blkLevel
+
+        ! Set the pointers for the required variables
+        domainLoop: do nn = 1, nDom
+            nVar = varOffset
+            blk => flowDoms(nn, level, sps)
+
+            if (derivPointers) then
+                blkLevel => flowDomsd(nn, level, sps)
+                blk1 => flowDomsd(nn, 1, sps)
+            else
+                blkLevel => flowDoms(nn, level, sps)
+                blk1 => flowDoms(nn, 1, sps)
+            end if
+
+            do k = start, end
+                nVar = nVar + 1
+                blk%realCommVars(nVar)%var => blkLevel%w(:, :, :, k)
+            end do
+
+            if (commPressure) then
+                nVar = nVar + 1
+                blk%realCommVars(nVar)%var => blkLevel%P(:, :, :)
+            end if
+
+            if (commVarGamma) then
+                nVar = nVar + 1
+                blk%realCommVars(nVar)%var => blk1%gamma(:, :, :)
+            end if
+
+            if (commLamVis) then
+                nVar = nVar + 1
+                blk%realCommvars(nVar)%var => blk1%rlv(:, :, :)
+            end if
+
+            if (commEddyVis) then
+                nVar = nVar + 1
+                blk%realCommVars(nVar)%var => blkLevel%rev(:, :, :)
+            end if
+
+        end do domainLoop
+        nVar = nVar - varOffset
+    end subroutine setCommPointers
+
+    subroutine whalo1to1(level, start, end, commPressure, &
+                         commVarGamma, commLamVis, commEddyVis, &
+                         commPattern, internal)
+        !
+        !       whalo1to1 exchanges the 1 to 1 internal halo's for the cell
+        !       centered variables for the given communication pattern. It
+        !       is possible to send a range of variables and not the entire
+        !       set, e.g. only the flow variables or only the turbulent
+        !       variables. This is controlled by the arguments start, end,
+        !       commPressure and commViscous. The exchange takes place for
+        !       the given grid level.
+        !
+        use constants
+        use block
+        use communication
+        use inputTimeSpectral
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, start, end
+        logical, intent(in) :: commPressure, commVarGamma
+        logical, intent(in) :: commLamVis, commEddyVis
+
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+
+        integer(kind=intType) :: nVar, nn, k, sps
+
+        logical :: correctPeriodic
+
+        ! Set the logical correctPeriodic. Only if a momentum variable
+        ! is communicated it is needed to apply the periodic
+        ! transformations.
+
+        correctPeriodic = .false.
+        if (start <= ivx .and. end >= ivz) correctPeriodic = .true.
+
+        spectralModes: do sps = 1, nTimeIntervalsSpectral
+
+            call setCommPointers(start, end, commPressure, commVarGamma, &
+                                 commLamVis, commEddyVis, level, sps, .False., nVar, 0)
+
+            if (nVar == 0) then
+                return
+            end if
+
+            ! Run the generic exchange
+            call wHalo1to1RealGeneric(nVar, level, sps, commPattern, internal)
+
+            if (correctPeriodic) then
+                if (internal(level)%nPeriodic > 0) then
+                    call correctPeriodicVelocity(level, sps, &
+                                                 internal(level)%nPeriodic, internal(level)%periodicData)
+                end if
+
+                if (commPattern(level)%nPeriodic > 0) then
+                    call correctPeriodicVelocity(level, sps, &
+                                                 commPattern(level)%nPeriodic, commPattern(level)%periodicData)
+                end if
+            end if
+
+        end do spectralModes
+
+    end subroutine whalo1to1
+
+    subroutine correctPeriodicVelocity(level, sp, nPeriodic, &
+                                       periodicData)
+        !
+        !       correctPeriodicVelocity applies the periodic transformation
+        !       to the velocity of the cell halo's in periodicData.
+        !
+        use constants
+        use block
+        use communication
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sp, nPeriodic
+        type(periodicDataType), dimension(:), pointer :: periodicData
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, ii, i, j, k
+        real(kind=realType) :: vx, vy, vz
+
+        real(kind=realType), dimension(3, 3) :: rotMatrix
+
+        ! Loop over the number of periodic transformations.
+
+        do nn = 1, nPeriodic
+
+            ! Store the rotation matrix a bit easier.
+
+            rotMatrix = periodicData(nn)%rotMatrix
+
+            ! Loop over the number of halo cells for this transformation.
+            !DIR$ NOVECTOR
+            do ii = 1, periodicData(nn)%nhalos
+
+                ! Store the block and the indices a bit easier.
+
+                mm = periodicData(nn)%block(ii)
+                i = periodicData(nn)%indices(ii, 1)
+                j = periodicData(nn)%indices(ii, 2)
+                k = periodicData(nn)%indices(ii, 3)
+
+                ! Store the original velocities in vx, vy, vz.
+
+                vx = flowDoms(mm, level, sp)%w(i, j, k, ivx)
+                vy = flowDoms(mm, level, sp)%w(i, j, k, ivy)
+                vz = flowDoms(mm, level, sp)%w(i, j, k, ivz)
+
+                ! Compute the new velocity vector.
+
+                flowDoms(mm, level, sp)%w(i, j, k, ivx) = rotMatrix(1, 1) * vx &
+                                                          + rotMatrix(1, 2) * vy &
+                                                          + rotMatrix(1, 3) * vz
+                flowDoms(mm, level, sp)%w(i, j, k, ivy) = rotMatrix(2, 1) * vx &
+                                                          + rotMatrix(2, 2) * vy &
+                                                          + rotMatrix(2, 3) * vz
+                flowDoms(mm, level, sp)%w(i, j, k, ivz) = rotMatrix(3, 1) * vx &
+                                                          + rotMatrix(3, 2) * vy &
+                                                          + rotMatrix(3, 3) * vz
+            end do
+
+        end do
+
+    end subroutine correctPeriodicVelocity
+
+    subroutine whalo1to1RealGeneric(nVar, level, sps, commPattern, internal)
+        !
+        !       whalo1to1 exchanges the 1 to 1 internal halo's for the cell
+        !       centered variables for the given communication pattern.
+        !       Pointers must be set for var1, var2...varN
+        !
+        use constants
+        use block
+        use communication
+        use inputTimeSpectral
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+        !
+        !      Local variables.
+        !
+
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: nVar, mm
+        integer(kind=intType) :: i, j, k, ii, jj
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+
+        ! Send the variables. The data is first copied into
+        ! the send buffer after which the buffer is sent asap.
+
+        ii = 1
+        sends: do i = 1, commPattern(level)%nProcSend
+
+            ! Store the processor id and the size of the message
+            ! a bit easier.
+
+            procID = commPattern(level)%sendProc(i)
+            size = nVar * commPattern(level)%nsend(i)
+
+            ! Copy the data in the correct part of the send buffer.
+
+            jj = ii
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level)%nsend(i)
 
-          if (calcPressure) p(oi,oj,ok)     = p(oi,oj,ok)/nAvgReal
-          if (calcGamma)    gamma(oi,oj,ok) = gamma(oi,oj,ok)/nAvgReal
-          if (calcLamVis)   rlv(oi,oj,ok)   = rlv(oi,oj,ok)/nAvgReal
-          if (calcEddyVis)  rev(oi,oj,ok)   = rev(oi,oj,ok)/nAvgReal
-
-       else checkNoNeighbors
-
-          ! No suitable neighbors were found in order to compute an
-          ! average. Set the variables back to the the freestream.
-
-          do l=wstart,wend
-             w(oi,oj,ok,l) = wInf(l)
-          end do
-
-          if (calcPressure) p(oi,oj,ok)     = pInfCorr
-          if (calcGamma)    gamma(oi,oj,ok) = gammaInf
-          if (calcLamVis)   rlv(oi,oj,ok)   = muInf
-          if (calcEddyVis)  rev(oi,oj,ok)   = eddyVisInfRatio*muInf
-
-       end if checkNoNeighbors
-
-    end do orphanLoop
-
-  end subroutine orphanAverage
-
-  subroutine setCommPointers(start, end, commPressure, commVarGamma, commLamVis, &
-       commEddyVis, level, sps, derivPointers, nVar, varOffset)
-
-    ! Generic routine for setting pointers to the communication
-    ! variables. Can also set pointers to derivatve values if derivPts is True.
-
-    use constants
-    use block, only : fLowDoms, blockType, flowDomsd, nDom
-    implicit none
-
-    ! Input
-    integer(kind=intType), intent(in) :: start, end, level, sps
-    logical, intent(in) :: commPressure, commVarGamma, commLamVis, commEddyVis
-    logical, intent(in) :: derivPointers
-    integer(kind=intType), intent(in) :: varOffset
-
-    ! Output
-    integer(kind=intType), intent(out) :: nVar
-
-    ! Working:
-    integer(kind=intType) :: nn, k
-    type(blockType) , pointer :: blk, blk1, blkLevel
-
-    ! Set the pointers for the required variables
-    domainLoop:do nn=1, nDom
-       nVar = varOffset
-       blk => flowDoms(nn, level, sps)
-
-       if (derivPointers) then
-          blkLevel => flowDomsd(nn, level, sps)
-          blk1     => flowDomsd(nn, 1    , sps)
-       else
-          blkLevel => flowDoms(nn, level, sps)
-          blk1     => flowDoms(nn, 1    , sps)
-       end if
-
-       do k=start, end
-          nVar = nVar + 1
-          blk%realCommVars(nVar)%var => blkLevel%w(:, :, :, k)
-       end do
-
-       if( commPressure )  then
-          nVar = nVar + 1
-          blk%realCommVars(nVar)%var => blkLevel%P(:, :, :)
-       end if
-
-       if( commVarGamma ) then
-          nVar = nVar + 1
-          blk%realCommVars(nVar)%var => blk1%gamma(:, :, :)
-       end if
-
-       if( commLamVis ) then
-          nVar = nVar + 1
-          blk%realCommvars(nVar)%var => blk1%rlv(:, :, :)
-       end if
-
-       if( commEddyVis ) then
-          nVar = nVar + 1
-          blk%realCommVars(nVar)%var => blkLevel%rev(:, :, :)
-       end if
-
-    end do domainLoop
-    nVar = nVar - varOffset
-  end subroutine setCommPointers
-
-  subroutine whalo1to1(level, start, end, commPressure,       &
-       commVarGamma, commLamVis, commEddyVis, &
-       commPattern, internal)
-    !
-    !       whalo1to1 exchanges the 1 to 1 internal halo's for the cell
-    !       centered variables for the given communication pattern. It
-    !       is possible to send a range of variables and not the entire
-    !       set, e.g. only the flow variables or only the turbulent
-    !       variables. This is controlled by the arguments start, end,
-    !       commPressure and commViscous. The exchange takes place for
-    !       the given grid level.
-    !
-    use constants
-    use block
-    use communication
-    use inputTimeSpectral
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, start, end
-    logical, intent(in) :: commPressure, commVarGamma
-    logical, intent(in) :: commLamVis, commEddyVis
-
-    type(commType), dimension(*), intent(in)         :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-
-    integer(kind=intType) :: nVar, nn, k, sps
-
-    logical :: correctPeriodic
-
-    ! Set the logical correctPeriodic. Only if a momentum variable
-    ! is communicated it is needed to apply the periodic
-    ! transformations.
-
-    correctPeriodic = .false.
-    if(start <= ivx .and. end >= ivz) correctPeriodic = .true.
-
-    spectralModes: do sps=1,nTimeIntervalsSpectral
-
-       call setCommPointers(start, end, commPressure, commVarGamma, &
-            commLamVis, commEddyVis, level, sps, .False., nVar, 0)
-
-       if (nVar == 0) then
-          return
-       end if
-
-       ! Run the generic exchange
-       call wHalo1to1RealGeneric(nVar, level, sps, commPattern, internal)
-
-       if (correctPeriodic) then
-          if ( internal(level)%nPeriodic > 0 )   then
-             call correctPeriodicVelocity(level, sps,             &
-                  internal(level)%nPeriodic, internal(level)%periodicData)
-          end if
-
-          if ( commPattern(level)%nPeriodic > 0 )   then
-             call correctPeriodicVelocity(level, sps,             &
-                  commPattern(level)%nPeriodic, commPattern(level)%periodicData)
-          end if
-       end if
-
-    end do spectralModes
-
-  end subroutine whalo1to1
-
-  subroutine correctPeriodicVelocity(level, sp, nPeriodic, &
-       periodicData)
-    !
-    !       correctPeriodicVelocity applies the periodic transformation
-    !       to the velocity of the cell halo's in periodicData.
-    !
-    use constants
-    use block
-    use communication
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sp, nPeriodic
-    type(periodicDataType), dimension(:), pointer :: periodicData
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, ii, i, j, k
-    real(kind=realType)   :: vx, vy, vz
-
-    real(kind=realType), dimension(3,3) :: rotMatrix
-
-    ! Loop over the number of periodic transformations.
-
-    do nn=1,nPeriodic
-
-       ! Store the rotation matrix a bit easier.
-
-       rotMatrix = periodicData(nn)%rotMatrix
-
-       ! Loop over the number of halo cells for this transformation.
-       !DIR$ NOVECTOR
-       do ii=1,periodicData(nn)%nhalos
-
-          ! Store the block and the indices a bit easier.
-
-          mm = periodicData(nn)%block(ii)
-          i  = periodicData(nn)%indices(ii,1)
-          j  = periodicData(nn)%indices(ii,2)
-          k  = periodicData(nn)%indices(ii,3)
-
-          ! Store the original velocities in vx, vy, vz.
-
-          vx = flowDoms(mm,level,sp)%w(i,j,k,ivx)
-          vy = flowDoms(mm,level,sp)%w(i,j,k,ivy)
-          vz = flowDoms(mm,level,sp)%w(i,j,k,ivz)
-
-          ! Compute the new velocity vector.
-
-          flowDoms(mm,level,sp)%w(i,j,k,ivx) = rotMatrix(1,1)*vx &
-               + rotMatrix(1,2)*vy &
-               + rotMatrix(1,3)*vz
-          flowDoms(mm,level,sp)%w(i,j,k,ivy) = rotMatrix(2,1)*vx &
-               + rotMatrix(2,2)*vy &
-               + rotMatrix(2,3)*vz
-          flowDoms(mm,level,sp)%w(i,j,k,ivz) = rotMatrix(3,1)*vx &
-               + rotMatrix(3,2)*vy &
-               + rotMatrix(3,3)*vz
-       enddo
+                ! Store the block id and the indices of the donor
+                ! a bit easier.
 
-    enddo
+                d1 = commPattern(level)%sendList(i)%block(j)
+                i1 = commPattern(level)%sendList(i)%indices(j, 1) + 1
+                j1 = commPattern(level)%sendList(i)%indices(j, 2) + 1
+                k1 = commPattern(level)%sendList(i)%indices(j, 3) + 1
 
-  end subroutine correctPeriodicVelocity
+                ! Copy the given range of the working variables for
+                ! this cell in the buffer. Update the counter jj.
+                !DIR$ NOVECTOR
+                do k = 1, nvar
+                    sendBuffer(jj) = flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1)
+                    jj = jj + 1
+                end do
+            end do
 
-  subroutine whalo1to1RealGeneric(nVar, level, sps, commPattern, internal)
-    !
-    !       whalo1to1 exchanges the 1 to 1 internal halo's for the cell
-    !       centered variables for the given communication pattern.
-    !       Pointers must be set for var1, var2...varN
-    !
-    use constants
-    use block
-    use communication
-    use inputTimeSpectral
-    use utils, only : EChk
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
+            ! Send the data.
 
-    type(commType), dimension(*), intent(in)         :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-    !
-    !      Local variables.
-    !
+            call mpi_isend(sendBuffer(ii), size, adflow_real, procID, &
+                           procID, ADflow_comm_world, sendRequests(i), &
+                           ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
+            ! Set ii to jj for the next processor.
 
-    integer(kind=intType) :: nVar, mm
-    integer(kind=intType) :: i, j, k, ii, jj
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+            ii = jj
 
-    ! Send the variables. The data is first copied into
-    ! the send buffer after which the buffer is sent asap.
+        end do sends
 
-    ii = 1
-    sends: do i=1,commPattern(level)%nProcSend
+        ! Post the nonblocking receives.
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+        ii = 1
+        receives: do i = 1, commPattern(level)%nProcRecv
 
-       procID = commPattern(level)%sendProc(i)
-       size    = nVar*commPattern(level)%nsend(i)
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       ! Copy the data in the correct part of the send buffer.
+            procID = commPattern(level)%recvProc(i)
+            size = nVar * commPattern(level)%nrecv(i)
 
-       jj = ii
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level)%nsend(i)
+            ! Post the receive.
 
-          ! Store the block id and the indices of the donor
-          ! a bit easier.
+            call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
+                           myID, ADflow_comm_world, recvRequests(i), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-          d1 = commPattern(level)%sendList(i)%block(j)
-          i1 = commPattern(level)%sendList(i)%indices(j,1)+1
-          j1 = commPattern(level)%sendList(i)%indices(j,2)+1
-          k1 = commPattern(level)%sendList(i)%indices(j,3)+1
+            ! And update ii.
 
-          ! Copy the given range of the working variables for
-          ! this cell in the buffer. Update the counter jj.
-          !DIR$ NOVECTOR
-          do k=1, nvar
-             sendBuffer(jj) = flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1)
-             jj = jj + 1
-          end do
-       end do
+            ii = ii + size
 
-       ! Send the data.
+        end do receives
 
-       call mpi_isend(sendBuffer(ii), size, adflow_real, procID,  &
-            procID, ADflow_comm_world, sendRequests(i), &
-            ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+        ! Copy the local data.
 
-       ! Set ii to jj for the next processor.
+        !DIR$ NOVECTOR
+        localCopy: do i = 1, internal(level)%ncopy
 
-       ii = jj
+            ! Store the block and the indices of the donor a bit easier.
 
-    enddo sends
+            d1 = internal(level)%donorBlock(i)
+            i1 = internal(level)%donorIndices(i, 1) + 1
+            j1 = internal(level)%donorIndices(i, 2) + 1
+            k1 = internal(level)%donorIndices(i, 3) + 1
 
-    ! Post the nonblocking receives.
+            ! Idem for the halo's.
 
-    ii = 1
-    receives: do i=1,commPattern(level)%nProcRecv
+            d2 = internal(level)%haloBlock(i)
+            i2 = internal(level)%haloIndices(i, 1) + 1
+            j2 = internal(level)%haloIndices(i, 2) + 1
+            k2 = internal(level)%haloIndices(i, 3) + 1
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+            do k = 1, nVar
+                flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = &
+                    flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1)
+            end do
 
-       procID = commPattern(level)%recvProc(i)
-       size    = nVar*commPattern(level)%nrecv(i)
+        end do localCopy
 
-       ! Post the receive.
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-       call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
-            myID, ADflow_comm_world, recvRequests(i), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+        size = commPattern(level)%nProcRecv
+        completeRecvs: do i = 1, commPattern(level)%nProcRecv
 
-       ! And update ii.
+            ! Complete any of the requests.
 
-       ii = ii + size
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    enddo receives
+            ! Copy the data just arrived in the halo's.
 
-    ! Copy the local data.
+            ii = index
+            jj = nVar * commPattern(level)%nrecvCum(ii - 1)
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level)%nrecv(ii)
 
+                ! Store the block and the indices of the halo a bit easier.
 
-    !DIR$ NOVECTOR
-    localCopy: do i=1,internal(level)%ncopy
+                d2 = commPattern(level)%recvList(ii)%block(j)
+                i2 = commPattern(level)%recvList(ii)%indices(j, 1) + 1
+                j2 = commPattern(level)%recvList(ii)%indices(j, 2) + 1
+                k2 = commPattern(level)%recvList(ii)%indices(j, 3) + 1
 
-       ! Store the block and the indices of the donor a bit easier.
+                do k = 1, nVar
+                    jj = jj + 1
+                    flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = recvBuffer(jj)
+                end do
+            end do
+        end do completeRecvs
+
+        ! Complete the nonblocking sends.
+
+        size = commPattern(level)%nProcSend
+        do i = 1, commPattern(level)%nProcSend
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+        end do
+
+    end subroutine whalo1to1RealGeneric
+
+    subroutine whalo1to1RealGeneric_b(nVar, level, sps, commPattern, internal)
+        !
+        !       whalo1to1RealGeneric_b is a generic implementation
+        !       of the reverse mode of whalo1to1RealGeneric.
+        !
+        use constants
+        use block
+        use communication
+        use inputTimeSpectral
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+        !
+        !      Local variables.
+        !
+
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: nVar, mm
+        integer(kind=intType) :: i, j, k, ii, jj
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+
+        ! Gather up the seeds into the *recv* buffer. Note we loop
+        ! over nProcRECV here! After the buffer is assembled it is
+        ! sent off.
+
+        jj = 1
+        ii = 1
+        recvs: do i = 1, commPattern(level)%nProcRecv
+
+            ! Store the processor id and the size of the message
+            ! a bit easier.
+
+            procID = commPattern(level)%recvProc(i)
+            size = nVar * commPattern(level)%nrecv(i)
+
+            ! Copy the data into the buffer
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level)%nrecv(i)
+
+                ! Store the block and the indices of the halo a bit easier.
+
+                d2 = commPattern(level)%recvList(i)%block(j)
+                i2 = commPattern(level)%recvList(i)%indices(j, 1) + 1
+                j2 = commPattern(level)%recvList(i)%indices(j, 2) + 1
+                k2 = commPattern(level)%recvList(i)%indices(j, 3) + 1
+
+                do k = 1, nVar
+                    recvBuffer(jj) = flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2)
+                    flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = zero
+                    jj = jj + 1
+                end do
+            end do
 
-       d1 = internal(level)%donorBlock(i)
-       i1 = internal(level)%donorIndices(i,1)+1
-       j1 = internal(level)%donorIndices(i,2)+1
-       k1 = internal(level)%donorIndices(i,3)+1
+            ! Send the data.
+            call mpi_isend(recvBuffer(ii), size, adflow_real, procID, &
+                           procID, ADflow_comm_world, recvRequests(i), &
+                           ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Idem for the halo's.
+            ! Set ii to jj for the next processor.
 
-       d2 = internal(level)%haloBlock(i)
-       i2 = internal(level)%haloIndices(i,1)+1
-       j2 = internal(level)%haloIndices(i,2)+1
-       k2 = internal(level)%haloIndices(i,3)+1
+            ii = jj
 
-       do k=1, nVar
-          flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = &
-               flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1)
-       end do
+        end do recvs
 
-    enddo localCopy
+        ! Post the nonblocking receives.
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+        ii = 1
+        sends: do i = 1, commPattern(level)%nProcSend
 
-    size = commPattern(level)%nProcRecv
-    completeRecvs: do i=1,commPattern(level)%nProcRecv
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       ! Complete any of the requests.
+            procID = commPattern(level)%sendProc(i)
+            size = nVar * commPattern(level)%nsend(i)
 
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Post the receive.
 
-       ! Copy the data just arrived in the halo's.
+            call mpi_irecv(sendBuffer(ii), size, adflow_real, procID, &
+                           myID, ADflow_comm_world, sendRequests(i), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ii = index
-       jj = nVar*commPattern(level)%nrecvCum(ii-1)
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level)%nrecv(ii)
+            ! And update ii.
 
-          ! Store the block and the indices of the halo a bit easier.
+            ii = ii + size
 
-          d2 = commPattern(level)%recvList(ii)%block(j)
-          i2 = commPattern(level)%recvList(ii)%indices(j,1)+1
-          j2 = commPattern(level)%recvList(ii)%indices(j,2)+1
-          k2 = commPattern(level)%recvList(ii)%indices(j,3)+1
+        end do sends
 
-          do k=1, nVar
-             jj = jj + 1
-             flowDoms(d2,level,sps)%realCommVars(k)%var(i2, j2, k2) = recvBuffer(jj)
-          end do
-       end do
-    enddo completeRecvs
+        ! Copy the local data.
+        !DIR$ NOVECTOR
+        localCopy: do i = 1, internal(level)%ncopy
 
-    ! Complete the nonblocking sends.
+            ! Store the block and the indices of the donor a bit easier.
 
-    size = commPattern(level)%nProcSend
-    do i=1,commPattern(level)%nProcSend
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-    enddo
+            d1 = internal(level)%donorBlock(i)
+            i1 = internal(level)%donorIndices(i, 1) + 1
+            j1 = internal(level)%donorIndices(i, 2) + 1
+            k1 = internal(level)%donorIndices(i, 3) + 1
 
-  end subroutine whalo1to1RealGeneric
+            ! Idem for the halo's.
 
-  subroutine whalo1to1RealGeneric_b(nVar, level, sps, commPattern, internal)
-    !
-    !       whalo1to1RealGeneric_b is a generic implementation
-    !       of the reverse mode of whalo1to1RealGeneric.
-    !
-    use constants
-    use block
-    use communication
-    use inputTimeSpectral
-    use utils, only : EChk
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
+            d2 = internal(level)%haloBlock(i)
+            i2 = internal(level)%haloIndices(i, 1) + 1
+            j2 = internal(level)%haloIndices(i, 2) + 1
+            k2 = internal(level)%haloIndices(i, 3) + 1
 
-    type(commType), dimension(*), intent(in)         :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-    !
-    !      Local variables.
-    !
+            ! Sum into the '1' values from the '2' values (halos).
+            do k = 1, nVar
+                flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1) = &
+                    flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1) + &
+                    flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2)
+                flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = zero
+            end do
 
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
+        end do localCopy
 
-    integer(kind=intType) :: nVar, mm
-    integer(kind=intType) :: i, j, k, ii, jj
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-    ! Gather up the seeds into the *recv* buffer. Note we loop
-    ! over nProcRECV here! After the buffer is assembled it is
-    ! sent off.
+        size = commPattern(level)%nProcSend
+        completeSends: do i = 1, commPattern(level)%nProcSend
 
-    jj = 1
-    ii = 1
-    recvs: do i=1,commPattern(level)%nProcRecv
+            ! Complete any of the requests.
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       procID = commPattern(level)%recvProc(i)
-       size    = nVar*commPattern(level)%nrecv(i)
+            ! ! Copy the data just arrived in the halo's.
 
-       ! Copy the data into the buffer
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level)%nrecv(i)
+            ii = index
+            jj = nVar * commPattern(level)%nsendCum(ii - 1)
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level)%nsend(ii)
 
-          ! Store the block and the indices of the halo a bit easier.
+                ! Store the block and the indices of the halo a bit easier.
 
-          d2 = commPattern(level)%recvList(i)%block(j)
-          i2 = commPattern(level)%recvList(i)%indices(j,1)+1
-          j2 = commPattern(level)%recvList(i)%indices(j,2)+1
-          k2 = commPattern(level)%recvList(i)%indices(j,3)+1
+                d2 = commPattern(level)%sendList(ii)%block(j)
+                i2 = commPattern(level)%sendList(ii)%indices(j, 1) + 1
+                j2 = commPattern(level)%sendList(ii)%indices(j, 2) + 1
+                k2 = commPattern(level)%sendList(ii)%indices(j, 3) + 1
 
-          do k=1, nVar
-             recvBuffer(jj) = flowDoms(d2,level,sps)%realCommVars(k)%var(i2,j2,k2)
-             flowDoms(d2,level,sps)%realCommVars(k)%var(i2, j2, k2) = zero
-             jj = jj + 1
-          enddo
-       end do
+                ! Copy the conservative variables.
 
-       ! Send the data.
-       call mpi_isend(recvBuffer(ii), size, adflow_real, procID,  &
-            procID, ADflow_comm_world, recvRequests(i), &
-            ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+                do k = 1, nVar
+                    jj = jj + 1
+                    flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = &
+                        flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) + sendBuffer(jj)
+                end do
+            end do
 
-       ! Set ii to jj for the next processor.
+        end do completeSends
 
-       ii = jj
+        ! Complete the nonblocking sends.
 
-    enddo recvs
+        size = commPattern(level)%nProcRecv
+        do i = 1, commPattern(level)%nProcRecv
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
 
-    ! Post the nonblocking receives.
+    end subroutine whalo1to1RealGeneric_b
 
-    ii = 1
-    sends: do i=1,commPattern(level)%nProcSend
+    subroutine whalo1to1IntGeneric(nVar, level, sps, commPattern, internal)
+        !
+        !       whalo1to1 exchanges the 1 to 1 internal halo's for the cell
+        !       centered variables for the given communication pattern.
+        !       Pointers must be set for var1, var2...varN
+        !
+        use constants
+        use block
+        use communication
+        use inputTimeSpectral
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+        !
+        !      Local variables.
+        !
 
-       procID = commPattern(level)%sendProc(i)
-       size    = nVar*commPattern(level)%nsend(i)
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
 
-       ! Post the receive.
+        integer(kind=intType) :: nVar, mm
+        integer(kind=intType) :: i, j, k, ii, jj
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
 
-       call mpi_irecv(sendBuffer(ii), size, adflow_real, procID, &
-            myID, ADflow_comm_world, sendRequests(i), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+        integer(kind=intType), dimension(:), allocatable :: sendBufInt
+        integer(kind=intType), dimension(:), allocatable :: recvBufInt
 
-       ! And update ii.
+        ii = commPattern(level)%nProcSend
+        ii = commPattern(level)%nsendCum(ii)
+        jj = commPattern(level)%nProcRecv
+        jj = commPattern(level)%nrecvCum(jj)
 
-       ii = ii + size
+        allocate (sendBufInt(ii * nVar), recvBufInt(jj * nVar), stat=ierr)
 
-    enddo sends
+        ! Send the variables. The data is first copied into
+        ! the send buffer after which the buffer is sent asap.
 
-    ! Copy the local data.
-    !DIR$ NOVECTOR
-    localCopy: do i=1,internal(level)%ncopy
+        ii = 1
+        sends: do i = 1, commPattern(level)%nProcSend
 
-       ! Store the block and the indices of the donor a bit easier.
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       d1 = internal(level)%donorBlock(i)
-       i1 = internal(level)%donorIndices(i,1)+1
-       j1 = internal(level)%donorIndices(i,2)+1
-       k1 = internal(level)%donorIndices(i,3)+1
+            procID = commPattern(level)%sendProc(i)
+            size = nVar * commPattern(level)%nsend(i)
 
-       ! Idem for the halo's.
+            ! Copy the data in the correct part of the send buffer.
 
-       d2 = internal(level)%haloBlock(i)
-       i2 = internal(level)%haloIndices(i,1)+1
-       j2 = internal(level)%haloIndices(i,2)+1
-       k2 = internal(level)%haloIndices(i,3)+1
+            jj = ii
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level)%nsend(i)
 
-       ! Sum into the '1' values from the '2' values (halos).
-       do k=1, nVar
-          flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1) = &
-               flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1) + &
-               flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2)
-          flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = zero
-       enddo
+                ! Store the block id and the indices of the donor
+                ! a bit easier.
 
-    enddo localCopy
+                d1 = commPattern(level)%sendList(i)%block(j)
+                i1 = commPattern(level)%sendList(i)%indices(j, 1) + 1
+                j1 = commPattern(level)%sendList(i)%indices(j, 2) + 1
+                k1 = commPattern(level)%sendList(i)%indices(j, 3) + 1
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+                ! Copy the given range of the working variables for
+                ! this cell in the buffer. Update the counter jj.
+                !DIR$ NOVECTOR
+                do k = 1, nvar
+                    sendBufInt(jj) = flowDoms(d1, level, sps)%intCommVars(k)%var(i1, j1, k1)
+                    jj = jj + 1
+                end do
+            end do
 
-    size = commPattern(level)%nProcSend
-    completeSends: do i=1,commPattern(level)%nProcSend
+            ! Send the data.
 
-       ! Complete any of the requests.
+            call mpi_isend(sendBufInt(ii), size, adflow_integer, procID, &
+                           procID, ADflow_comm_world, sendRequests(i), &
+                           ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Set ii to jj for the next processor.
 
-       ! ! Copy the data just arrived in the halo's.
+            ii = jj
 
-       ii = index
-       jj = nVar*commPattern(level)%nsendCum(ii-1)
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level)%nsend(ii)
+        end do sends
 
-          ! Store the block and the indices of the halo a bit easier.
+        ! Post the nonblocking receives.
 
-          d2 = commPattern(level)%sendList(ii)%block(j)
-          i2 = commPattern(level)%sendList(ii)%indices(j,1)+1
-          j2 = commPattern(level)%sendList(ii)%indices(j,2)+1
-          k2 = commPattern(level)%sendList(ii)%indices(j,3)+1
+        ii = 1
+        receives: do i = 1, commPattern(level)%nProcRecv
 
-          ! Copy the conservative variables.
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-          do k=1, nVar
-             jj = jj + 1
-             flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = &
-                  flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) + sendBuffer(jj)
-          enddo
-       enddo
+            procID = commPattern(level)%recvProc(i)
+            size = nVar * commPattern(level)%nrecv(i)
 
-    enddo completeSends
+            ! Post the receive.
 
-    ! Complete the nonblocking sends.
+            call mpi_irecv(recvBufInt(ii), size, adflow_integer, procID, &
+                           myID, ADflow_comm_world, recvRequests(i), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    size = commPattern(level)%nProcRecv
-    do i=1,commPattern(level)%nProcRecv
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    enddo
+            ! And update ii.
 
-  end subroutine whalo1to1RealGeneric_b
+            ii = ii + size
 
-  subroutine whalo1to1IntGeneric(nVar, level, sps, commPattern, internal)
-    !
-    !       whalo1to1 exchanges the 1 to 1 internal halo's for the cell
-    !       centered variables for the given communication pattern.
-    !       Pointers must be set for var1, var2...varN
-    !
-    use constants
-    use block
-    use communication
-    use inputTimeSpectral
-    use utils, only : EChk
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
+        end do receives
 
-    type(commType), dimension(*), intent(in)         :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-    !
-    !      Local variables.
-    !
+        ! Copy the local data.
+        !DIR$ NOVECTOR
+        localCopy: do i = 1, internal(level)%ncopy
 
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
+            ! Store the block and the indices of the donor a bit easier.
 
-    integer(kind=intType) :: nVar, mm
-    integer(kind=intType) :: i, j, k, ii, jj
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+            d1 = internal(level)%donorBlock(i)
+            i1 = internal(level)%donorIndices(i, 1) + 1
+            j1 = internal(level)%donorIndices(i, 2) + 1
+            k1 = internal(level)%donorIndices(i, 3) + 1
 
-    integer(kind=intType), dimension(:), allocatable :: sendBufInt
-    integer(kind=intType), dimension(:), allocatable :: recvBufInt
+            ! Idem for the halo's.
 
-    ii = commPattern(level)%nProcSend
-    ii = commPattern(level)%nsendCum(ii)
-    jj = commPattern(level)%nProcRecv
-    jj = commPattern(level)%nrecvCum(jj)
+            d2 = internal(level)%haloBlock(i)
+            i2 = internal(level)%haloIndices(i, 1) + 1
+            j2 = internal(level)%haloIndices(i, 2) + 1
+            k2 = internal(level)%haloIndices(i, 3) + 1
 
-    allocate(sendBufInt(ii*nVar), recvBufInt(jj*nVar), stat=ierr)
+            do k = 1, nVar
+                flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2) = &
+                    flowDoms(d1, level, sps)%intCommVars(k)%var(i1, j1, k1)
+            end do
 
-    ! Send the variables. The data is first copied into
-    ! the send buffer after which the buffer is sent asap.
+        end do localCopy
 
-    ii = 1
-    sends: do i=1,commPattern(level)%nProcSend
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+        size = commPattern(level)%nProcRecv
+        completeRecvs: do i = 1, commPattern(level)%nProcRecv
 
-       procID = commPattern(level)%sendProc(i)
-       size    = nVar*commPattern(level)%nsend(i)
+            ! Complete any of the requests.
 
-       ! Copy the data in the correct part of the send buffer.
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       jj = ii
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level)%nsend(i)
+            ! Copy the data just arrived in the halo's.
 
-          ! Store the block id and the indices of the donor
-          ! a bit easier.
+            ii = index
+            jj = nVar * commPattern(level)%nrecvCum(ii - 1)
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level)%nrecv(ii)
 
-          d1 = commPattern(level)%sendList(i)%block(j)
-          i1 = commPattern(level)%sendList(i)%indices(j,1)+1
-          j1 = commPattern(level)%sendList(i)%indices(j,2)+1
-          k1 = commPattern(level)%sendList(i)%indices(j,3)+1
+                ! Store the block and the indices of the halo a bit easier.
 
-          ! Copy the given range of the working variables for
-          ! this cell in the buffer. Update the counter jj.
-          !DIR$ NOVECTOR
-          do k=1, nvar
-             sendBufInt(jj) = flowDoms(d1, level, sps)%intCommVars(k)%var(i1, j1, k1)
-             jj = jj + 1
-          end do
-       end do
+                d2 = commPattern(level)%recvList(ii)%block(j)
+                i2 = commPattern(level)%recvList(ii)%indices(j, 1) + 1
+                j2 = commPattern(level)%recvList(ii)%indices(j, 2) + 1
+                k2 = commPattern(level)%recvList(ii)%indices(j, 3) + 1
 
-       ! Send the data.
+                do k = 1, nVar
+                    jj = jj + 1
+                    flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2) = recvBufInt(jj)
+                end do
+            end do
+        end do completeRecvs
+
+        ! Complete the nonblocking sends.
+
+        size = commPattern(level)%nProcSend
+        do i = 1, commPattern(level)%nProcSend
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
+
+        deallocate (recvBufInt, sendBufInt)
+
+    end subroutine whalo1to1IntGeneric
+
+    subroutine whalo1to1IntGeneric_b(nVar, level, sps, commPattern, internal)
+        !
+        !       whalo1to1IntGeneric_b is a generic implementation of the
+        !       reverse mode of whalo1to1IntGeneric. Integers are summed
+        !       together in reverse.
+        !
+        use constants
+        use block
+        use communication
+        use inputTimeSpectral
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+        !
+        !      Local variables.
+        !
+
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: nVar, mm
+        integer(kind=intType) :: i, j, k, ii, jj
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+        integer(kind=intType), dimension(:), allocatable :: sendBufInt
+        integer(kind=intType), dimension(:), allocatable :: recvBufInt
+
+        ii = commPattern(level)%nProcSend
+        ii = commPattern(level)%nsendCum(ii)
+        jj = commPattern(level)%nProcRecv
+        jj = commPattern(level)%nrecvCum(jj)
+
+        allocate (sendBufInt(ii * nVar), recvBufInt(jj * nVar), stat=ierr)
+
+        ! Gather up the seeds into the *recv* buffer. Note we loop
+        ! over nProcRECV here! After the buffer is assembled it is
+        ! sent off.
+
+        jj = 1
+        ii = 1
+        recvs: do i = 1, commPattern(level)%nProcRecv
+
+            ! Store the processor id and the size of the message
+            ! a bit easier.
+
+            procID = commPattern(level)%recvProc(i)
+            size = nVar * commPattern(level)%nrecv(i)
+
+            ! Copy the data into the buffer
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level)%nrecv(i)
+
+                ! Store the block and the indices of the halo a bit easier.
+
+                d2 = commPattern(level)%recvList(i)%block(j)
+                i2 = commPattern(level)%recvList(i)%indices(j, 1) + 1
+                j2 = commPattern(level)%recvList(i)%indices(j, 2) + 1
+                k2 = commPattern(level)%recvList(i)%indices(j, 3) + 1
+                !DIR$ NOVECTOR
+                do k = 1, nVar
+                    recvBufInt(jj) = flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2)
+                    flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2) = 0
+                    jj = jj + 1
+                end do
+            end do
 
-       call mpi_isend(sendBufInt(ii), size, adflow_integer, procID,  &
-            procID, ADflow_comm_world, sendRequests(i), &
-            ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Send the data.
+            call mpi_isend(recvBufInt(ii), size, adflow_integer, procID, &
+                           procID, ADflow_comm_world, recvRequests(i), &
+                           ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Set ii to jj for the next processor.
+            ! Set ii to jj for the next processor.
 
-       ii = jj
+            ii = jj
 
-    enddo sends
+        end do recvs
 
-    ! Post the nonblocking receives.
+        ! Post the nonblocking receives.
 
-    ii = 1
-    receives: do i=1,commPattern(level)%nProcRecv
+        ii = 1
+        sends: do i = 1, commPattern(level)%nProcSend
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       procID = commPattern(level)%recvProc(i)
-       size    = nVar*commPattern(level)%nrecv(i)
+            procID = commPattern(level)%sendProc(i)
+            size = nVar * commPattern(level)%nsend(i)
 
-       ! Post the receive.
+            ! Post the receive.
 
-       call mpi_irecv(recvBufInt(ii), size, adflow_integer, procID, &
-            myID, ADflow_comm_world, recvRequests(i), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            call mpi_irecv(sendBufInt(ii), size, adflow_integer, procID, &
+                           myID, ADflow_comm_world, sendRequests(i), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! And update ii.
+            ! And update ii.
 
-       ii = ii + size
+            ii = ii + size
 
-    enddo receives
+        end do sends
 
-    ! Copy the local data.
-    !DIR$ NOVECTOR
-    localCopy: do i=1,internal(level)%ncopy
+        ! Copy the local data.
+        !DIR$ NOVECTOR
+        localCopy: do i = 1, internal(level)%ncopy
 
-       ! Store the block and the indices of the donor a bit easier.
+            ! Store the block and the indices of the donor a bit easier.
 
-       d1 = internal(level)%donorBlock(i)
-       i1 = internal(level)%donorIndices(i,1)+1
-       j1 = internal(level)%donorIndices(i,2)+1
-       k1 = internal(level)%donorIndices(i,3)+1
+            d1 = internal(level)%donorBlock(i)
+            i1 = internal(level)%donorIndices(i, 1) + 1
+            j1 = internal(level)%donorIndices(i, 2) + 1
+            k1 = internal(level)%donorIndices(i, 3) + 1
 
-       ! Idem for the halo's.
+            ! Idem for the halo's.
 
-       d2 = internal(level)%haloBlock(i)
-       i2 = internal(level)%haloIndices(i,1)+1
-       j2 = internal(level)%haloIndices(i,2)+1
-       k2 = internal(level)%haloIndices(i,3)+1
+            d2 = internal(level)%haloBlock(i)
+            i2 = internal(level)%haloIndices(i, 1) + 1
+            j2 = internal(level)%haloIndices(i, 2) + 1
+            k2 = internal(level)%haloIndices(i, 3) + 1
 
-       do k=1, nVar
-          flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2) = &
-               flowDoms(d1, level, sps)%intCommVars(k)%var(i1, j1, k1)
-       end do
+            ! Sum into the '1' values from the '2' values (halos).
+            !DIR$ NOVECTOR
+            do k = 1, nVar
+                flowDoms(d1, level, sps)%intCommVars(k)%var(i1, j1, k1) = &
+                    flowDoms(d1, level, sps)%intCommVars(k)%var(i1, j1, k1) + &
+                    flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2)
+                flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2) = 0
+            end do
 
-    enddo localCopy
+        end do localCopy
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-    size = commPattern(level)%nProcRecv
-    completeRecvs: do i=1,commPattern(level)%nProcRecv
+        size = commPattern(level)%nProcSend
+        completeSends: do i = 1, commPattern(level)%nProcSend
 
-       ! Complete any of the requests.
+            ! Complete any of the requests.
 
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Copy the data just arrived in the halo's.
+            ! ! Copy the data just arrived in the halo's.
 
-       ii = index
-       jj = nVar*commPattern(level)%nrecvCum(ii-1)
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level)%nrecv(ii)
+            ii = index
+            jj = nVar * commPattern(level)%nsendCum(ii - 1)
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level)%nsend(ii)
 
-          ! Store the block and the indices of the halo a bit easier.
+                ! Store the block and the indices of the halo a bit easier.
 
-          d2 = commPattern(level)%recvList(ii)%block(j)
-          i2 = commPattern(level)%recvList(ii)%indices(j,1)+1
-          j2 = commPattern(level)%recvList(ii)%indices(j,2)+1
-          k2 = commPattern(level)%recvList(ii)%indices(j,3)+1
+                d2 = commPattern(level)%sendList(ii)%block(j)
+                i2 = commPattern(level)%sendList(ii)%indices(j, 1) + 1
+                j2 = commPattern(level)%sendList(ii)%indices(j, 2) + 1
+                k2 = commPattern(level)%sendList(ii)%indices(j, 3) + 1
 
-          do k=1, nVar
-             jj = jj + 1
-             flowDoms(d2,level,sps)%intCommVars(k)%var(i2, j2, k2) = recvBufInt(jj)
-          end do
-       end do
-    enddo completeRecvs
+                ! Copy the conservative variables.
+                !DIR$ NOVECTOR
+                do k = 1, nVar
+                    jj = jj + 1
+                    flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2) = &
+                        flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2) + sendBufInt(jj)
+                end do
+            end do
+
+        end do completeSends
+
+        ! Complete the nonblocking sends.
+
+        size = commPattern(level)%nProcRecv
+        do i = 1, commPattern(level)%nProcRecv
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
+
+        deallocate (recvBufInt, sendBufInt)
+
+    end subroutine whalo1to1IntGeneric_b
+
+    subroutine whalo1to1_d(level, start, end, commPressure, &
+                           commVarGamma, commLamVis, commEddyVis, &
+                           commPattern, internal)
+        !
+        !       whalo1to1 exchanges the 1 to 1 internal halo's derivatives
+        !
+        use constants
+        use communication, only: commType, internalCommType
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, start, end
+        logical, intent(in) :: commPressure, commVarGamma
+        logical, intent(in) :: commLamVis, commEddyVis
+
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+
+        integer(kind=intType) :: nVar, nn, k, sps
+
+        spectralModes: do sps = 1, nTimeIntervalsSpectral
+
+            call setCommPointers(start, end, commPressure, commVarGamma, &
+                                 commLamVis, commEddyVis, level, sps, .True., nVar, 0)
+
+            if (nVar == 0) then
+                return
+            end if
+
+            ! Run the generic exchange
+            call wHalo1to1RealGeneric(nVar, level, sps, commPattern, internal)
+
+        end do spectralModes
+
+    end subroutine whalo1to1_d
+
+    subroutine whalo1to1_b(level, start, end, commPressure, &
+                           commVarGamma, commLamVis, commEddyVis, &
+                           commPattern, internal)
+        !
+        !       whalo1to1 exchanges the 1 to 1 internal halo's derivatives
+        !
+        use constants
+        use communication, only: commType, internalCommType
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, start, end
+        logical, intent(in) :: commPressure, commVarGamma
+        logical, intent(in) :: commLamVis, commEddyVis
+
+        type(commType), dimension(*), intent(in) :: commPattern
+        type(internalCommType), dimension(*), intent(in) :: internal
+
+        integer(kind=intType) :: nVar, nn, k, sps
+
+        spectralModes: do sps = 1, nTimeIntervalsSpectral
+
+            call setCommPointers(start, end, commPressure, commVarGamma, &
+                                 commLamVis, commEddyVis, level, sps, .True., nVar, 0)
+
+            if (nVar == 0) then
+                return
+            end if
+
+            ! Run the generic exchange
+            call wHalo1to1RealGeneric_b(nVar, level, sps, commPattern, internal)
+
+        end do spectralModes
+
+    end subroutine whalo1to1_b
+
+    subroutine wOverset(level, start, end, commPressure, &
+                        commVarGamma, commLamVis, commEddyVis, &
+                        commPattern, internal)
+        !
+        !       wOverset controls the communication between overset halos
+        !       for the cell-centered variables by interpolating the solution
+        !       from other blocks consistent with the chimera approach. A tri-
+        !       linear interpolation is used as per the input from cgns. It
+        !       is possible to send a range of variables and not the entire
+        !       set, e.g. only the flow variables or only the turbulent
+        !       variables. This is controlled by the arguments start, end,
+        !       commPressure and commViscous. The exchange takes place for
+        !       the given grid level.
+        !
+        use constants
+        use communication, only: commType, internalCommType
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, start, end
+        logical, intent(in) :: commPressure, commVarGamma
+        logical, intent(in) :: commLamVis, commEddyVis
+
+        type(commType), dimension(:, :), intent(in) :: commPattern
+        type(internalCommType), dimension(:, :), intent(in) :: internal
+
+        !      Local variables.
+        integer(kind=intType) :: nVar, sps
+
+        spectralModes: do sps = 1, nTimeIntervalsSpectral
+
+            call setCommPointers(start, end, commPressure, commVarGamma, &
+                                 commLamVis, commEddyVis, level, sps, .False., nVar, 0)
+
+            if (nVar == 0) then
+                return
+            end if
+
+            ! Run the generic exchange
+            call wOversetGeneric(nVar, level, sps, commPattern, internal)
+        end do spectralModes
+
+    end subroutine wOverset
+
+    subroutine wOverset_d(level, start, end, commPressure, &
+                          commVarGamma, commLamVis, commEddyVis, &
+                          commPattern, internal)
+        !
+        !       wOverset controls the communication between overset halos
+        !       for the cell-centered variables by interpolating the solution
+        !       from other blocks consistent with the chimera approach. A tri-
+        !       linear interpolation is used as per the input from cgns. It
+        !       is possible to send a range of variables and not the entire
+        !       set, e.g. only the flow variables or only the turbulent
+        !       variables. This is controlled by the arguments start, end,
+        !       commPressure and commViscous. The exchange takes place for
+        !       the given grid level.
+        !
+        use constants
+        use communication, only: commType, internalCommType
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, start, end
+        logical, intent(in) :: commPressure, commVarGamma
+        logical, intent(in) :: commLamVis, commEddyVis
+
+        type(commType), dimension(:, :), intent(in) :: commPattern
+        type(internalCommType), dimension(:, :), intent(in) :: internal
+        integer(kind=intType) :: nVar, sps, offset
+
+        spectralModes: do sps = 1, nTimeIntervalsSpectral
+
+            ! this one is tricker: We have to set BOTH the real values and
+            ! the the derivative values. Set the derivative values first:
+            call setCommPointers(start, end, commPressure, commVarGamma, &
+                                 commLamVis, commEddyVis, level, sps, .True., nVar, 0)
+
+            ! And then the original real values
+            offset = nVar
+            call setCommPointers(start, end, commPressure, commVarGamma, &
+                                 commLamVis, commEddyVis, level, sps, .False., nVar, offset)
+
+            if (nVar == 0) then
+                return
+            end if
+
+            ! Run the generic exchange
+            call wOversetGeneric_d(nVar, level, sps, commPattern, internal)
+        end do spectralModes
+    end subroutine wOverset_d
+
+    subroutine wOverset_b(level, start, end, commPressure, &
+                          commVarGamma, commLamVis, commEddyVis, &
+                          commPattern, internal)
+        !
+        !       wOverset_b performs the *TRANSPOSE* operation of wOverset
+        !       It is used for adjoint/reverse mode residual evaluations.
+        !      * See wOverset  for more information.
+        !
+        use constants
+        use communication, only: commType, internalCommType
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, start, end
+        logical, intent(in) :: commPressure, commVarGamma
+        logical, intent(in) :: commLamVis, commEddyVis
+
+        type(commType), dimension(:, :), intent(in) :: commPattern
+        type(internalCommType), dimension(:, :), intent(in) :: internal
+        integer(kind=intType) :: nVar, sps, offset
+
+        spectralModes: do sps = 1, nTimeIntervalsSpectral
+
+            ! this one is tricker: We have to set BOTH the real values and
+            ! the the derivative values. Set the derivative values first:
+            call setCommPointers(start, end, commPressure, commVarGamma, &
+                                 commLamVis, commEddyVis, level, sps, .True., nVar, 0)
+
+            ! And then the original real values
+            offset = nVar
+            call setCommPointers(start, end, commPressure, commVarGamma, &
+                                 commLamVis, commEddyVis, level, sps, .False., nVar, offset)
+
+            if (nVar == 0) then
+                return
+            end if
+
+            ! Run the generic exchange
+            call wOversetGeneric_b(nVar, level, sps, commPattern, internal)
+        end do spectralModes
+
+    end subroutine wOverset_b
+
+    subroutine wOversetGeneric(nVar, level, sps, commPattern, Internal)
+        !
+        !       wOverset is the generic halo exhcnage code for the
+        !       overset halos.
+        !
+        use constants
+        use block, only: flowDoms
+        use communication
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+
+        type(commType), dimension(:, :), intent(in) :: commPattern
+        type(internalCommType), dimension(:, :), intent(in) :: internal
+        !
+        !      Local variables.
+        !
+        integer :: size, procId, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: nVar
+        integer(kind=intType) :: i, j, k, ii, jj
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+        real(kind=realType), dimension(:), pointer :: weight
+
+        ! Send the variables. The data is first copied into
+        ! the send buffer after which the buffer is sent asap.
+
+        ii = 1
+        sends: do i = 1, commPattern(level, sps)%nProcSend
+
+            ! Store the processor id and the size of the message
+            ! a bit easier.
+
+            procID = commPattern(level, sps)%sendProc(i)
+            size = nVar * commPattern(level, sps)%nsend(i)
+
+            ! Copy the data in the correct part of the send buffer.
+
+            jj = ii
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level, sps)%nsend(i)
+
+                ! Store the block id and the indices of the donor
+                ! a bit easier.
+
+                d1 = commPattern(level, sps)%sendList(i)%block(j)
+                i1 = commPattern(level, sps)%sendList(i)%indices(j, 1) + 1
+                j1 = commPattern(level, sps)%sendList(i)%indices(j, 2) + 1
+                k1 = commPattern(level, sps)%sendList(i)%indices(j, 3) + 1
+                weight => commPattern(level, sps)%sendList(i)%interp(j, :)
+
+                ! Copy the given range of the working variables for
+                ! this cell in the buffer. Update the counter jj.
+                !DIR$ NOVECTOR
+                do k = 1, nvar
+                    sendBuffer(jj) = &
+                        weight(1) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1) + &
+                        weight(2) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1, k1) + &
+                        weight(3) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1 + 1, k1) + &
+                        weight(4) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1 + 1, k1) + &
+                        weight(5) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1 + 1) + &
+                        weight(6) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1, k1 + 1) + &
+                        weight(7) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1 + 1, k1 + 1) + &
+                        weight(8) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1 + 1, k1 + 1)
+                    jj = jj + 1
+                end do
+            end do
 
-    ! Complete the nonblocking sends.
+            ! Send the data.
 
-    size = commPattern(level)%nProcSend
-    do i=1,commPattern(level)%nProcSend
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    enddo
+            call mpi_isend(sendBuffer(ii), size, adflow_real, procId, &
+                           procId, ADflow_comm_world, sendRequests(i), &
+                           ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    deallocate(recvBufInt, sendBufInt)
+            ! Set ii to jj for the next processor.
 
-  end subroutine whalo1to1IntGeneric
+            ii = jj
 
-  subroutine whalo1to1IntGeneric_b(nVar, level, sps, commPattern, internal)
-    !
-    !       whalo1to1IntGeneric_b is a generic implementation of the
-    !       reverse mode of whalo1to1IntGeneric. Integers are summed
-    !       together in reverse.
-    !
-    use constants
-    use block
-    use communication
-    use inputTimeSpectral
-    use utils, only : EChk
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
+        end do sends
 
-    type(commType), dimension(*), intent(in)         :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-    !
-    !      Local variables.
-    !
+        ! Post the nonblocking receives.
 
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
+        ii = 1
+        receives: do i = 1, commPattern(level, sps)%nProcRecv
 
-    integer(kind=intType) :: nVar, mm
-    integer(kind=intType) :: i, j, k, ii, jj
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
-    integer(kind=intType), dimension(:), allocatable :: sendBufInt
-    integer(kind=intType), dimension(:), allocatable :: recvBufInt
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-    ii = commPattern(level)%nProcSend
-    ii = commPattern(level)%nsendCum(ii)
-    jj = commPattern(level)%nProcRecv
-    jj = commPattern(level)%nrecvCum(jj)
+            procID = commPattern(level, sps)%recvProc(i)
+            size = nVar * commPattern(level, sps)%nrecv(i)
 
-    allocate(sendBufInt(ii*nVar), recvBufInt(jj*nVar), stat=ierr)
+            ! Post the receive.
 
-    ! Gather up the seeds into the *recv* buffer. Note we loop
-    ! over nProcRECV here! After the buffer is assembled it is
-    ! sent off.
+            call mpi_irecv(recvBuffer(ii), size, adflow_real, procId, &
+                           myId, ADflow_comm_world, recvRequests(i), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    jj = 1
-    ii = 1
-    recvs: do i=1,commPattern(level)%nProcRecv
+            ! And update ii.
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+            ii = ii + size
 
-       procID = commPattern(level)%recvProc(i)
-       size    = nVar*commPattern(level)%nrecv(i)
+        end do receives
 
-       ! Copy the data into the buffer
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level)%nrecv(i)
+        ! Do the local interpolation.
+        !DIR$ NOVECTOR
+        localInterp: do i = 1, internal(level, sps)%ncopy
 
-          ! Store the block and the indices of the halo a bit easier.
+            ! Store the block and the indices of the donor a bit easier.
 
-          d2 = commPattern(level)%recvList(i)%block(j)
-          i2 = commPattern(level)%recvList(i)%indices(j,1)+1
-          j2 = commPattern(level)%recvList(i)%indices(j,2)+1
-          k2 = commPattern(level)%recvList(i)%indices(j,3)+1
-          !DIR$ NOVECTOR
-          do k=1, nVar
-             recvBufInt(jj) = flowDoms(d2,level,sps)%intCommVars(k)%var(i2,j2,k2)
-             flowDoms(d2,level,sps)%intCommVars(k)%var(i2, j2, k2) = 0
-             jj = jj + 1
-          enddo
-       end do
+            d1 = internal(level, sps)%donorBlock(i)
+            i1 = internal(level, sps)%donorIndices(i, 1) + 1
+            j1 = internal(level, sps)%donorIndices(i, 2) + 1
+            k1 = internal(level, sps)%donorIndices(i, 3) + 1
 
-       ! Send the data.
-       call mpi_isend(recvBufInt(ii), size, adflow_integer, procID,  &
-            procID, ADflow_comm_world, recvRequests(i), &
-            ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            weight => internal(level, sps)%donorInterp(i, :)
 
-       ! Set ii to jj for the next processor.
+            ! Idem for the halo's.
 
-       ii = jj
+            d2 = internal(level, sps)%haloBlock(i)
+            i2 = internal(level, sps)%haloIndices(i, 1) + 1
+            j2 = internal(level, sps)%haloIndices(i, 2) + 1
+            k2 = internal(level, sps)%haloIndices(i, 3) + 1
 
-    enddo recvs
+            ! Copy the given range of working variables.
+            !DIR$ NOVECTOR
+            do k = 1, nVar
+                flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = &
+                    weight(1) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1) + &
+                    weight(2) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1, k1) + &
+                    weight(3) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1 + 1, k1) + &
+                    weight(4) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1 + 1, k1) + &
+                    weight(5) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1 + 1) + &
+                    weight(6) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1, k1 + 1) + &
+                    weight(7) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1 + 1, k1 + 1) + &
+                    weight(8) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1 + 1, k1 + 1)
+            end do
+        end do localInterp
 
-    ! Post the nonblocking receives.
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-    ii = 1
-    sends: do i=1,commPattern(level)%nProcSend
+        size = commPattern(level, sps)%nProcRecv
+        completeRecvs: do i = 1, commPattern(level, sps)%nProcRecv
 
-       ! Store the processor id and the size of the message
-       ! a bit easier.
+            ! Complete any of the requests.
 
-       procID = commPattern(level)%sendProc(i)
-       size    = nVar*commPattern(level)%nsend(i)
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Post the receive.
+            ! Copy the data just arrived in the halo's.
 
-       call mpi_irecv(sendBufInt(ii), size, adflow_integer, procID, &
-            myID, ADflow_comm_world, sendRequests(i), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! And update ii.
-
-       ii = ii + size
-
-    enddo sends
+            ii = index
+            jj = nVar * commPattern(level, sps)%nrecvCum(ii - 1)
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level, sps)%nrecv(ii)
 
-    ! Copy the local data.
-    !DIR$ NOVECTOR
-    localCopy: do i=1,internal(level)%ncopy
+                ! Store the block and the indices of the halo a bit easier.
 
-       ! Store the block and the indices of the donor a bit easier.
+                d2 = commPattern(level, sps)%recvList(ii)%block(j)
+                i2 = commPattern(level, sps)%recvList(ii)%indices(j, 1) + 1
+                j2 = commPattern(level, sps)%recvList(ii)%indices(j, 2) + 1
+                k2 = commPattern(level, sps)%recvList(ii)%indices(j, 3) + 1
+                !DIR$ NOVECTOR
+                do k = 1, nVar
+                    jj = jj + 1
+                    flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = recvBuffer(jj)
+                end do
+            end do
+        end do completeRecvs
+
+        ! Complete the nonblocking sends.
+
+        size = commPattern(level, sps)%nProcSend
+        do i = 1, commPattern(level, sps)%nProcSend
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
+
+    end subroutine wOversetGeneric
+
+    subroutine wOversetGeneric_d(nVar, level, sps, commPattern, Internal)
+        !
+        !       wOverset_d is the generic halo forward mode linearized
+        !       code for overset halos.
+        !
+        use constants
+        use block, only: flowDoms
+        use communication
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+
+        type(commType), dimension(:, :), intent(in) :: commPattern
+        type(internalCommType), dimension(:, :), intent(in) :: internal
+        !
+        !      Local variables.
+        !
+        integer :: size, procId, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: nVar
+        integer(kind=intType) :: i, j, k, ii, jj
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+        real(kind=realType), dimension(:), pointer :: weight, weightd
+
+        ! Send the variables. The data is first copied into
+        ! the send buffer after which the buffer is sent asap.
+
+        ii = 1
+        sends: do i = 1, commPattern(level, sps)%nProcSend
+
+            ! Store the processor id and the size of the message
+            ! a bit easier.
+
+            procID = commPattern(level, sps)%sendProc(i)
+            size = nVar * commPattern(level, sps)%nsend(i)
+
+            ! Copy the data in the correct part of the send buffer.
+
+            jj = ii
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level, sps)%nsend(i)
+
+                ! Store the block id and the indices of the donor
+                ! a bit easier.
+
+                d1 = commPattern(level, sps)%sendList(i)%block(j)
+                i1 = commPattern(level, sps)%sendList(i)%indices(j, 1) + 1
+                j1 = commPattern(level, sps)%sendList(i)%indices(j, 2) + 1
+                k1 = commPattern(level, sps)%sendList(i)%indices(j, 3) + 1
+                weight => commPattern(level, sps)%sendList(i)%interp(j, :)
+                weightd => commPattern(level, sps)%sendList(i)%interpd(j, :)
+
+                ! Copy the given range of the working variables for
+                ! this cell in the buffer. Update the counter jj.
+                !DIR$ NOVECTOR
+                do k = 1, nvar
+                    sendBuffer(jj) = &
+                        weight(1) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1) + &
+                        weight(2) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1, k1) + &
+                        weight(3) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1 + 1, k1) + &
+                        weight(4) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1 + 1, k1) + &
+                        weight(5) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1 + 1) + &
+                        weight(6) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1, k1 + 1) + &
+                        weight(7) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1 + 1, k1 + 1) + &
+                        weight(8) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1 + 1, k1 + 1) + &
+                        weightd(1) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1, j1, k1) + &
+                        weightd(2) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1 + 1, j1, k1) + &
+                        weightd(3) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1, j1 + 1, k1) + &
+                        weightd(4) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1 + 1, j1 + 1, k1) + &
+                        weightd(5) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1, j1, k1 + 1) + &
+                        weightd(6) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1 + 1, j1, k1 + 1) + &
+                        weightd(7) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1, j1 + 1, k1 + 1) + &
+                        weightd(8) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1 + 1, j1 + 1, k1 + 1)
+
+                    jj = jj + 1
+                end do
+            end do
 
-       d1 = internal(level)%donorBlock(i)
-       i1 = internal(level)%donorIndices(i,1)+1
-       j1 = internal(level)%donorIndices(i,2)+1
-       k1 = internal(level)%donorIndices(i,3)+1
+            ! Send the data.
 
-       ! Idem for the halo's.
+            call mpi_isend(sendBuffer(ii), size, adflow_real, procId, &
+                           procId, ADflow_comm_world, sendRequests(i), &
+                           ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       d2 = internal(level)%haloBlock(i)
-       i2 = internal(level)%haloIndices(i,1)+1
-       j2 = internal(level)%haloIndices(i,2)+1
-       k2 = internal(level)%haloIndices(i,3)+1
+            ! Set ii to jj for the next processor.
 
-       ! Sum into the '1' values from the '2' values (halos).
-       !DIR$ NOVECTOR
-       do k=1, nVar
-          flowDoms(d1, level, sps)%intCommVars(k)%var(i1, j1, k1) = &
-               flowDoms(d1, level, sps)%intCommVars(k)%var(i1, j1, k1) + &
-               flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2)
-          flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2) = 0
-       enddo
+            ii = jj
 
-    enddo localCopy
+        end do sends
 
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
+        ! Post the nonblocking receives.
 
-    size = commPattern(level)%nProcSend
-    completeSends: do i=1,commPattern(level)%nProcSend
+        ii = 1
+        receives: do i = 1, commPattern(level, sps)%nProcRecv
 
-       ! Complete any of the requests.
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            procID = commPattern(level, sps)%recvProc(i)
+            size = nVar * commPattern(level, sps)%nrecv(i)
 
-       ! ! Copy the data just arrived in the halo's.
+            ! Post the receive.
 
-       ii = index
-       jj = nVar*commPattern(level)%nsendCum(ii-1)
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level)%nsend(ii)
+            call mpi_irecv(recvBuffer(ii), size, adflow_real, procId, &
+                           myId, ADflow_comm_world, recvRequests(i), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-          ! Store the block and the indices of the halo a bit easier.
+            ! And update ii.
 
-          d2 = commPattern(level)%sendList(ii)%block(j)
-          i2 = commPattern(level)%sendList(ii)%indices(j,1)+1
-          j2 = commPattern(level)%sendList(ii)%indices(j,2)+1
-          k2 = commPattern(level)%sendList(ii)%indices(j,3)+1
+            ii = ii + size
 
-          ! Copy the conservative variables.
-          !DIR$ NOVECTOR
-          do k=1, nVar
-             jj = jj + 1
-             flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2) = &
-                  flowDoms(d2, level, sps)%intCommVars(k)%var(i2, j2, k2) + sendBufInt(jj)
-          enddo
-       enddo
+        end do receives
 
-    enddo completeSends
+        ! Do the local interpolation.
+        !DIR$ NOVECTOR
+        localInterp: do i = 1, internal(level, sps)%ncopy
 
-    ! Complete the nonblocking sends.
+            ! Store the block and the indices of the donor a bit easier.
 
-    size = commPattern(level)%nProcRecv
-    do i=1,commPattern(level)%nProcRecv
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    enddo
+            d1 = internal(level, sps)%donorBlock(i)
+            i1 = internal(level, sps)%donorIndices(i, 1) + 1
+            j1 = internal(level, sps)%donorIndices(i, 2) + 1
+            k1 = internal(level, sps)%donorIndices(i, 3) + 1
 
-    deallocate(recvBufInt, sendBufInt)
+            weight => internal(level, sps)%donorInterp(i, :)
+            weightd => internal(level, sps)%donorInterpd(i, :)
 
-  end subroutine whalo1to1IntGeneric_b
-
-  subroutine whalo1to1_d(level, start, end, commPressure,       &
-       commVarGamma, commLamVis, commEddyVis, &
-       commPattern, internal)
-    !
-    !       whalo1to1 exchanges the 1 to 1 internal halo's derivatives
-    !
-    use constants
-    use communication, only : commType, internalCommType
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, start, end
-    logical, intent(in) :: commPressure, commVarGamma
-    logical, intent(in) :: commLamVis, commEddyVis
-
-    type(commType), dimension(*), intent(in)         :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-
-    integer(kind=intType) :: nVar, nn, k, sps
-
-    spectralModes: do sps=1,nTimeIntervalsSpectral
-
-       call setCommPointers(start, end, commPressure, commVarGamma, &
-            commLamVis, commEddyVis, level, sps, .True., nVar, 0)
-
-       if (nVar == 0) then
-          return
-       end if
-
-       ! Run the generic exchange
-       call wHalo1to1RealGeneric(nVar, level, sps, commPattern, internal)
-
-    end do spectralModes
-
-  end subroutine whalo1to1_d
-
-  subroutine whalo1to1_b(level, start, end, commPressure,       &
-       commVarGamma, commLamVis, commEddyVis, &
-       commPattern, internal)
-    !
-    !       whalo1to1 exchanges the 1 to 1 internal halo's derivatives
-    !
-    use constants
-    use communication, only : commType, internalCommType
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, start, end
-    logical, intent(in) :: commPressure, commVarGamma
-    logical, intent(in) :: commLamVis, commEddyVis
-
-    type(commType), dimension(*), intent(in)         :: commPattern
-    type(internalCommType), dimension(*), intent(in) :: internal
-
-    integer(kind=intType) :: nVar, nn, k, sps
-
-    spectralModes: do sps=1,nTimeIntervalsSpectral
-
-       call setCommPointers(start, end, commPressure, commVarGamma, &
-            commLamVis, commEddyVis, level, sps, .True., nVar, 0)
-
-       if (nVar == 0) then
-          return
-       end if
-
-       ! Run the generic exchange
-       call wHalo1to1RealGeneric_b(nVar, level, sps, commPattern, internal)
-
-    end do spectralModes
-
-  end subroutine whalo1to1_b
-
-  subroutine wOverset(level, start, end, commPressure,       &
-       commVarGamma, commLamVis, commEddyVis, &
-       commPattern, internal)
-    !
-    !       wOverset controls the communication between overset halos
-    !       for the cell-centered variables by interpolating the solution
-    !       from other blocks consistent with the chimera approach. A tri-
-    !       linear interpolation is used as per the input from cgns. It
-    !       is possible to send a range of variables and not the entire
-    !       set, e.g. only the flow variables or only the turbulent
-    !       variables. This is controlled by the arguments start, end,
-    !       commPressure and commViscous. The exchange takes place for
-    !       the given grid level.
-    !
-    use constants
-    use communication, only : commType, internalCommType
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, start, end
-    logical, intent(in) :: commPressure, commVarGamma
-    logical, intent(in) :: commLamVis, commEddyVis
-
-    type(commType), dimension(:, :), intent(in) :: commPattern
-    type(internalCommType), dimension(:, :), intent(in) :: internal
-
-    !      Local variables.
-    integer(kind=intType) :: nVar, sps
-
-    spectralModes: do sps=1,nTimeIntervalsSpectral
-
-       call setCommPointers(start, end, commPressure, commVarGamma, &
-            commLamVis, commEddyVis, level, sps, .False., nVar, 0)
-
-       if (nVar == 0) then
-          return
-       end if
-
-       ! Run the generic exchange
-       call wOversetGeneric(nVar, level, sps, commPattern, internal)
-    end do spectralModes
-
-  end subroutine wOverset
-
-  subroutine wOverset_d(level, start, end, commPressure,       &
-       commVarGamma, commLamVis, commEddyVis, &
-       commPattern, internal)
-    !
-    !       wOverset controls the communication between overset halos
-    !       for the cell-centered variables by interpolating the solution
-    !       from other blocks consistent with the chimera approach. A tri-
-    !       linear interpolation is used as per the input from cgns. It
-    !       is possible to send a range of variables and not the entire
-    !       set, e.g. only the flow variables or only the turbulent
-    !       variables. This is controlled by the arguments start, end,
-    !       commPressure and commViscous. The exchange takes place for
-    !       the given grid level.
-    !
-    use constants
-    use communication, only : commType, internalCommType
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, start, end
-    logical, intent(in) :: commPressure, commVarGamma
-    logical, intent(in) :: commLamVis, commEddyVis
-
-    type(commType), dimension(:, :), intent(in) :: commPattern
-    type(internalCommType), dimension(:, :), intent(in) :: internal
-    integer(kind=intType) :: nVar, sps, offset
-
-    spectralModes: do sps=1,nTimeIntervalsSpectral
-
-       ! this one is tricker: We have to set BOTH the real values and
-       ! the the derivative values. Set the derivative values first:
-       call setCommPointers(start, end, commPressure, commVarGamma, &
-            commLamVis, commEddyVis, level, sps, .True., nVar, 0)
-
-       ! And then the original real values
-       offset = nVar
-       call setCommPointers(start, end, commPressure, commVarGamma, &
-            commLamVis, commEddyVis, level, sps, .False., nVar, offset)
-
-       if (nVar == 0) then
-          return
-       end if
-
-       ! Run the generic exchange
-       call wOversetGeneric_d(nVar, level, sps, commPattern, internal)
-    end do spectralModes
-  end subroutine wOverset_d
-
-  subroutine wOverset_b(level, start, end, commPressure,       &
-       commVarGamma, commLamVis, commEddyVis, &
-       commPattern, internal)
-    !
-    !       wOverset_b performs the *TRANSPOSE* operation of wOverset
-    !       It is used for adjoint/reverse mode residual evaluations.
-    !      * See wOverset  for more information.
-    !
-    use constants
-    use communication, only : commType, internalCommType
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, start, end
-    logical, intent(in) :: commPressure, commVarGamma
-    logical, intent(in) :: commLamVis, commEddyVis
-
-    type(commType), dimension(:, :), intent(in) :: commPattern
-    type(internalCommType), dimension(:, :), intent(in) :: internal
-    integer(kind=intType) :: nVar, sps, offset
-
-    spectralModes: do sps=1,nTimeIntervalsSpectral
-
-       ! this one is tricker: We have to set BOTH the real values and
-       ! the the derivative values. Set the derivative values first:
-       call setCommPointers(start, end, commPressure, commVarGamma, &
-            commLamVis, commEddyVis, level, sps, .True., nVar, 0)
-
-       ! And then the original real values
-       offset = nVar
-       call setCommPointers(start, end, commPressure, commVarGamma, &
-            commLamVis, commEddyVis, level, sps, .False., nVar, offset)
-
-       if (nVar == 0) then
-          return
-       end if
-
-       ! Run the generic exchange
-       call wOversetGeneric_b(nVar, level, sps, commPattern, internal)
-    end do spectralModes
-
-  end subroutine wOverset_b
-
-  subroutine wOversetGeneric(nVar, level, sps, commPattern, Internal)
-    !
-    !       wOverset is the generic halo exhcnage code for the
-    !       overset halos.
-    !
-    use constants
-    use block, only : flowDoms
-    use communication
-    use utils, only : EChk
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
-
-    type(commType), dimension(:, :), intent(in)         :: commPattern
-    type(internalCommType), dimension(:, :), intent(in) :: internal
-    !
-    !      Local variables.
-    !
-    integer :: size, procId, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
-
-    integer(kind=intType) :: nVar
-    integer(kind=intType) :: i, j, k, ii, jj
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
-    real(kind=realType), dimension(:), pointer :: weight
-
-    ! Send the variables. The data is first copied into
-    ! the send buffer after which the buffer is sent asap.
-
-    ii = 1
-    sends: do i=1,commPattern(level, sps)%nProcSend
-
-       ! Store the processor id and the size of the message
-       ! a bit easier.
-
-       procID = commPattern(level, sps)%sendProc(i)
-       size    = nVar*commPattern(level, sps)%nsend(i)
-
-       ! Copy the data in the correct part of the send buffer.
-
-       jj = ii
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level, sps)%nsend(i)
-
-          ! Store the block id and the indices of the donor
-          ! a bit easier.
-
-          d1 = commPattern(level, sps)%sendList(i)%block(j)
-          i1 = commPattern(level, sps)%sendList(i)%indices(j,1)+1
-          j1 = commPattern(level, sps)%sendList(i)%indices(j,2)+1
-          k1 = commPattern(level, sps)%sendList(i)%indices(j,3)+1
-          weight => commPattern(level, sps)%sendList(i)%interp(j, :)
-
-          ! Copy the given range of the working variables for
-          ! this cell in the buffer. Update the counter jj.
-          !DIR$ NOVECTOR
-          do k=1, nvar
-             sendBuffer(jj) = &
-                  weight(1)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1  , j1,   k1  ) + &
-                  weight(2)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1,   k1  ) + &
-                  weight(3)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1,   j1+1, k1  ) + &
-                  weight(4)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1+1, k1  ) + &
-                  weight(5)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1  , j1,   k1+1) + &
-                  weight(6)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1,   k1+1) + &
-                  weight(7)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1,   j1+1, k1+1) + &
-                  weight(8)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1+1, k1+1)
-             jj = jj + 1
-          end do
-       enddo
-
-       ! Send the data.
-
-       call mpi_isend(sendBuffer(ii), size, adflow_real, procId,  &
-            procId, ADflow_comm_world, sendRequests(i), &
-            ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Set ii to jj for the next processor.
-
-       ii = jj
-
-    enddo sends
-
-    ! Post the nonblocking receives.
-
-    ii = 1
-    receives: do i=1,commPattern(level, sps)%nProcRecv
-
-       ! Store the processor id and the size of the message
-       ! a bit easier.
-
-       procID = commPattern(level,sps)%recvProc(i)
-       size    = nVar*commPattern(level,sps)%nrecv(i)
-
-       ! Post the receive.
-
-       call mpi_irecv(recvBuffer(ii), size, adflow_real, procId, &
-            myId, ADflow_comm_world, recvRequests(i), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! And update ii.
-
-       ii = ii + size
-
-    enddo receives
-
-    ! Do the local interpolation.
-    !DIR$ NOVECTOR
-    localInterp: do i=1,internal(level, sps)%ncopy
-
-       ! Store the block and the indices of the donor a bit easier.
-
-       d1 = internal(level,sps)%donorBlock(i)
-       i1 = internal(level,sps)%donorIndices(i, 1)+1
-       j1 = internal(level,sps)%donorIndices(i, 2)+1
-       k1 = internal(level,sps)%donorIndices(i, 3)+1
-
-       weight => internal(level,sps)%donorInterp(i, :)
-
-       ! Idem for the halo's.
-
-       d2 = internal(level,sps)%haloBlock(i)
-       i2 = internal(level,sps)%haloIndices(i, 1)+1
-       j2 = internal(level,sps)%haloIndices(i, 2)+1
-       k2 = internal(level,sps)%haloIndices(i, 3)+1
+            ! Idem for the halo's.
 
-       ! Copy the given range of working variables.
-       !DIR$ NOVECTOR
-       do k=1, nVar
-          flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = &
-               weight(1)*flowDoms(d1, level, sps)%realCommVars(k)%var(i1,   j1,   k1  ) + &
-               weight(2)*flowDoms(d1, level, sps)%realCommVars(k)%var(i1+1, j1,   k1  ) + &
-               weight(3)*flowDoms(d1, level, sps)%realCommVars(k)%var(i1,   j1+1, k1  ) + &
-               weight(4)*flowDoms(d1, level, sps)%realCommVars(k)%var(i1+1, j1+1, k1  ) + &
-               weight(5)*flowDoms(d1, level, sps)%realCommVars(k)%var(i1,   j1,   k1+1) + &
-               weight(6)*flowDoms(d1, level, sps)%realCommVars(k)%var(i1+1, j1,   k1+1) + &
-               weight(7)*flowDoms(d1, level, sps)%realCommVars(k)%var(i1,   j1+1, k1+1) + &
-               weight(8)*flowDoms(d1, level, sps)%realCommVars(k)%var(i1+1, j1+1, k1+1)
-       end do
-    enddo localInterp
-
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
-
-    size = commPattern(level, sps)%nProcRecv
-    completeRecvs: do i=1,commPattern(level, sps)%nProcRecv
-
-       ! Complete any of the requests.
-
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Copy the data just arrived in the halo's.
-
-       ii = index
-       jj = nVar*commPattern(level,sps)%nrecvCum(ii-1)
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level,sps)%nrecv(ii)
-
-          ! Store the block and the indices of the halo a bit easier.
-
-          d2 = commPattern(level,sps)%recvList(ii)%block(j)
-          i2 = commPattern(level,sps)%recvList(ii)%indices(j,1)+1
-          j2 = commPattern(level,sps)%recvList(ii)%indices(j,2)+1
-          k2 = commPattern(level,sps)%recvList(ii)%indices(j,3)+1
-          !DIR$ NOVECTOR
-          do k=1, nVar
-             jj = jj + 1
-             flowDoms(d2,level,sps)%realCommVars(k)%var(i2,j2,k2) = recvBuffer(jj)
-          enddo
-       enddo
-    end do completeRecvs
-
-    ! Complete the nonblocking sends.
-
-    size = commPattern(level,sps)%nProcSend
-    do i=1,commPattern(level,sps)%nProcSend
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    enddo
-
-  end subroutine wOversetGeneric
-
-  subroutine wOversetGeneric_d(nVar, level, sps, commPattern, Internal)
-    !
-    !       wOverset_d is the generic halo forward mode linearized
-    !       code for overset halos.
-    !
-    use constants
-    use block, only : flowDoms
-    use communication
-    use utils, only : EChk
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
-
-    type(commType), dimension(:, :), intent(in)         :: commPattern
-    type(internalCommType), dimension(:, :), intent(in) :: internal
-    !
-    !      Local variables.
-    !
-    integer :: size, procId, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
-
-    integer(kind=intType) :: nVar
-    integer(kind=intType) :: i, j, k, ii, jj
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
-    real(kind=realType), dimension(:), pointer :: weight, weightd
-
-    ! Send the variables. The data is first copied into
-    ! the send buffer after which the buffer is sent asap.
-
-    ii = 1
-    sends: do i=1,commPattern(level, sps)%nProcSend
-
-       ! Store the processor id and the size of the message
-       ! a bit easier.
-
-       procID = commPattern(level, sps)%sendProc(i)
-       size    = nVar*commPattern(level, sps)%nsend(i)
-
-       ! Copy the data in the correct part of the send buffer.
-
-       jj = ii
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level, sps)%nsend(i)
-
-          ! Store the block id and the indices of the donor
-          ! a bit easier.
-
-          d1 = commPattern(level, sps)%sendList(i)%block(j)
-          i1 = commPattern(level, sps)%sendList(i)%indices(j,1)+1
-          j1 = commPattern(level, sps)%sendList(i)%indices(j,2)+1
-          k1 = commPattern(level, sps)%sendList(i)%indices(j,3)+1
-          weight => commPattern(level, sps)%sendList(i)%interp(j, :)
-          weightd => commPattern(level, sps)%sendList(i)%interpd(j, :)
-
-          ! Copy the given range of the working variables for
-          ! this cell in the buffer. Update the counter jj.
-          !DIR$ NOVECTOR
-          do k=1, nvar
-             sendBuffer(jj) = &
-                  weight(1)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1  , j1,   k1  ) + &
-                  weight(2)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1,   k1  ) + &
-                  weight(3)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1,   j1+1, k1  ) + &
-                  weight(4)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1+1, k1  ) + &
-                  weight(5)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1  , j1,   k1+1) + &
-                  weight(6)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1,   k1+1) + &
-                  weight(7)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1,   j1+1, k1+1) + &
-                  weight(8)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1+1, k1+1) + &
-                  weightd(1)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1  , j1,   k1  ) + &
-                  weightd(2)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1+1, j1,   k1  ) + &
-                  weightd(3)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1,   j1+1, k1  ) + &
-                  weightd(4)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1+1, j1+1, k1  ) + &
-                  weightd(5)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1  , j1,   k1+1) + &
-                  weightd(6)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1+1, j1,   k1+1) + &
-                  weightd(7)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1,   j1+1, k1+1) + &
-                  weightd(8)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1+1, j1+1, k1+1)
-
-
-             jj = jj + 1
-          end do
-       enddo
-
-       ! Send the data.
-
-       call mpi_isend(sendBuffer(ii), size, adflow_real, procId,  &
-            procId, ADflow_comm_world, sendRequests(i), &
-            ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Set ii to jj for the next processor.
-
-       ii = jj
-
-    enddo sends
-
-    ! Post the nonblocking receives.
-
-    ii = 1
-    receives: do i=1,commPattern(level, sps)%nProcRecv
-
-       ! Store the processor id and the size of the message
-       ! a bit easier.
-
-       procID = commPattern(level,sps)%recvProc(i)
-       size    = nVar*commPattern(level,sps)%nrecv(i)
-
-       ! Post the receive.
-
-       call mpi_irecv(recvBuffer(ii), size, adflow_real, procId, &
-            myId, ADflow_comm_world, recvRequests(i), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! And update ii.
-
-       ii = ii + size
-
-    enddo receives
-
-    ! Do the local interpolation.
-    !DIR$ NOVECTOR
-    localInterp: do i=1,internal(level, sps)%ncopy
-
-       ! Store the block and the indices of the donor a bit easier.
-
-       d1 = internal(level,sps)%donorBlock(i)
-       i1 = internal(level,sps)%donorIndices(i, 1)+1
-       j1 = internal(level,sps)%donorIndices(i, 2)+1
-       k1 = internal(level,sps)%donorIndices(i, 3)+1
-
-       weight => internal(level,sps)%donorInterp(i, :)
-       weightd => internal(level, sps)%donorInterpd(i, :)
-
-       ! Idem for the halo's.
-
-       d2 = internal(level,sps)%haloBlock(i)
-       i2 = internal(level,sps)%haloIndices(i, 1)+1
-       j2 = internal(level,sps)%haloIndices(i, 2)+1
-       k2 = internal(level,sps)%haloIndices(i, 3)+1
-
-       ! Copy the given range of working variables.
-       !DIR$ NOVECTOR
-       do k=1, nVar
-          flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = &
-               weight(1)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1  , j1,   k1  ) + &
-               weight(2)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1,   k1  ) + &
-               weight(3)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1,   j1+1, k1  ) + &
-               weight(4)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1+1, k1  ) + &
-               weight(5)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1  , j1,   k1+1) + &
-               weight(6)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1,   k1+1) + &
-               weight(7)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1,   j1+1, k1+1) + &
-               weight(8)*flowDoms(d1,level,sps)%realCommVars(k)%var(i1+1, j1+1, k1+1) + &
-               weightd(1)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1  , j1,   k1  ) + &
-               weightd(2)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1+1, j1,   k1  ) + &
-               weightd(3)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1,   j1+1, k1  ) + &
-               weightd(4)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1+1, j1+1, k1  ) + &
-               weightd(5)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1  , j1,   k1+1) + &
-               weightd(6)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1+1, j1,   k1+1) + &
-               weightd(7)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1,   j1+1, k1+1) + &
-               weightd(8)*flowDoms(d1,level,sps)%realCommVars(k+nVar)%var(i1+1, j1+1, k1+1)
-
-       end do
-    enddo localInterp
-
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
-
-    size = commPattern(level, sps)%nProcRecv
-    completeRecvs: do i=1,commPattern(level, sps)%nProcRecv
-
-       ! Complete any of the requests.
-
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Copy the data just arrived in the halo's.
+            d2 = internal(level, sps)%haloBlock(i)
+            i2 = internal(level, sps)%haloIndices(i, 1) + 1
+            j2 = internal(level, sps)%haloIndices(i, 2) + 1
+            k2 = internal(level, sps)%haloIndices(i, 3) + 1
 
-       ii = index
-       jj = nVar*commPattern(level,sps)%nrecvCum(ii-1)
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level,sps)%nrecv(ii)
+            ! Copy the given range of working variables.
+            !DIR$ NOVECTOR
+            do k = 1, nVar
+                flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = &
+                    weight(1) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1) + &
+                    weight(2) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1, k1) + &
+                    weight(3) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1 + 1, k1) + &
+                    weight(4) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1 + 1, k1) + &
+                    weight(5) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1, k1 + 1) + &
+                    weight(6) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1, k1 + 1) + &
+                    weight(7) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1, j1 + 1, k1 + 1) + &
+                    weight(8) * flowDoms(d1, level, sps)%realCommVars(k)%var(i1 + 1, j1 + 1, k1 + 1) + &
+                    weightd(1) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1, j1, k1) + &
+                    weightd(2) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1 + 1, j1, k1) + &
+                    weightd(3) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1, j1 + 1, k1) + &
+                    weightd(4) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1 + 1, j1 + 1, k1) + &
+                    weightd(5) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1, j1, k1 + 1) + &
+                    weightd(6) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1 + 1, j1, k1 + 1) + &
+                    weightd(7) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1, j1 + 1, k1 + 1) + &
+                    weightd(8) * flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(i1 + 1, j1 + 1, k1 + 1)
 
-          ! Store the block and the indices of the halo a bit easier.
-
-          d2 = commPattern(level,sps)%recvList(ii)%block(j)
-          i2 = commPattern(level,sps)%recvList(ii)%indices(j,1)+1
-          j2 = commPattern(level,sps)%recvList(ii)%indices(j,2)+1
-          k2 = commPattern(level,sps)%recvList(ii)%indices(j,3)+1
+            end do
+        end do localInterp
 
-          do k=1, nVar
-             jj = jj + 1
-             flowDoms(d2,level,sps)%realCommVars(k)%var(i2,j2,k2) = recvBuffer(jj)
-          enddo
-       enddo
-    end do completeRecvs
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-    ! Complete the nonblocking sends.
+        size = commPattern(level, sps)%nProcRecv
+        completeRecvs: do i = 1, commPattern(level, sps)%nProcRecv
 
-    size = commPattern(level,sps)%nProcSend
-    do i=1,commPattern(level,sps)%nProcSend
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-   enddo
+            ! Complete any of the requests.
 
-  end subroutine wOversetGeneric_d
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-  subroutine wOversetGeneric_b(nVar, level, sps, commPattern, Internal)
-    !
-    !       wOversetGeneric_b is the generic reverse mode linearized
-    !       code for overset halos.
-    !
-    use constants
-    use block, only : flowDoms
-    use communication
-    use utils, only : EChk
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, sps
-
-    type(commType), dimension(:, :), intent(in)         :: commPattern
-    type(internalCommType), dimension(:, :), intent(in) :: internal
-    !
-    !      Local variables.
-    !
-    integer :: size, procId, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
-
-    integer(kind=intType) :: nVar
-    integer(kind=intType) :: i, j, k, ii, jj, kk
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2, iii, jjj, kkk
-    real(kind=realType), dimension(:), pointer :: weight, weightd
-    real(kind=realType) :: vard
-    ! Gather up the seeds into the *recv* buffer. Note we loop over
-    ! nProcRECV here! After the buffer is assembled it is send off.
-
-    jj = 1
-    ii = 1
-    recvs: do i=1, commPattern(level, sps)%nProcRecv
-
-       ! Store the processor id and the size of the message
-       ! a bit easier.
-
-       procID = commPattern(level, sps)%recvProc(i)
-       size    = nVar*commPattern(level, sps)%nrecv(i)
-
-       ! Copy the data into the buffer
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level, sps)%nrecv(i)
-
-          ! Store the block and the indices to make code a bit easier to read
-
-          d2 = commPattern(level, sps)%recvList(i)%block(j)
-          i2 = commPattern(level, sps)%recvList(i)%indices(j,1)+1
-          j2 = commPattern(level, sps)%recvList(i)%indices(j,2)+1
-          k2 = commPattern(level, sps)%recvList(i)%indices(j,3)+1
-          !DIR$ NOVECTOR
-          do k=1, nVar
-             recvBuffer(jj) = flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2)
-             jj = jj + 1
-             flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = zero
-          enddo
-       enddo
-
-       ! Send the data.
-       call mpi_isend(recvBuffer(ii), size, adflow_real, procID,  &
-            procID, ADflow_comm_world, recvRequests(i), &
-            ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Set ii to jj for the next processor.
-
-       ii = jj
-
-    end do recvs
-
-    ! Post the nonblocking receives.
-
-    ii = 1
-    sends: do i=1,commPattern(level, sps)%nProcSend
-
-       ! Store the processor id and the size of the message
-       ! a bit easier.
-
-       procID = commPattern(level, sps)%sendProc(i)
-       size    = nVar*commPattern(level, sps)%nsend(i)
-
-       ! Post the receive.
-
-       call mpi_irecv(sendBuffer(ii), size, adflow_real, procId, &
-            myId, ADflow_comm_world, sendRequests(i), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! And update ii.
-
-       ii = ii + size
-
-    enddo sends
-
-    ! Do the local interpolation.
-    !DIR$ NOVECTOR
-    localInterp: do i=1,internal(level, sps)%ncopy
-
-       ! Store the block and the indices of the donor a bit easier.
-
-       d1 = internal(level, sps)%donorBlock(i)
-       i1 = internal(level, sps)%donorIndices(i, 1)+1
-       j1 = internal(level, sps)%donorIndices(i, 2)+1
-       k1 = internal(level, sps)%donorIndices(i, 3)+1
-
-       weight => internal(level, sps)%donorInterp(i, :)
-       weightd => internal(level, sps)%donorInterpd(i, :)
-
-       ! Idem for the halo's.
+            ! Copy the data just arrived in the halo's.
 
-       d2 = internal(level, sps)%haloBlock(i)
-       i2 = internal(level, sps)%haloIndices(i, 1)+1
-       j2 = internal(level, sps)%haloIndices(i, 2)+1
-       k2 = internal(level, sps)%haloIndices(i, 3)+1
+            ii = index
+            jj = nVar * commPattern(level, sps)%nrecvCum(ii - 1)
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level, sps)%nrecv(ii)
 
-       ! Sum into the '1' values from the '2' values accouting for the weights
-       !DIR$ NOVECTOR
-       do k=1, nVar
-          vard = flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2)
-          kk = 0
-          do kkk=k1,k1+1
-             do jjj=j1,j1+1
-                do iii=i1,i1+1
-                   kk = kk + 1
-                   flowDoms(d1, level, sps)%realCommVars(k)%var(iii, jjj, kkk) = &
-                        flowDoms(d1, level, sps)%realCommVars(k)%var(iii, jjj, kkk) + &
-                        weight(kk)*vard
+                ! Store the block and the indices of the halo a bit easier.
 
-                   weightd(kk) = weightd(kk) + &
-                        flowDoms(d1, level, sps)%realCommVars(k+nVar)%var(iii,jjj,kkk)*vard
+                d2 = commPattern(level, sps)%recvList(ii)%block(j)
+                i2 = commPattern(level, sps)%recvList(ii)%indices(j, 1) + 1
+                j2 = commPattern(level, sps)%recvList(ii)%indices(j, 2) + 1
+                k2 = commPattern(level, sps)%recvList(ii)%indices(j, 3) + 1
 
+                do k = 1, nVar
+                    jj = jj + 1
+                    flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = recvBuffer(jj)
                 end do
-             end do
-          end do
-          flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = zero
-       end do
-    enddo localInterp
-
-    ! Complete the nonblocking receives in an arbitrary sequence and
-    ! copy the variables from the buffer into the halo's.
-
-    size = commPattern(level, sps)%nProcSend
-    completeSends: do i=1,commPattern(level, sps)%nProcSend
-
-       ! Complete any of the requests.
-
-       call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Copy the data just arrived in the halo's.
-
-       ii = index
-
-       jj = nVar*commPattern(level, sps)%nsendCum(ii-1)
-       !DIR$ NOVECTOR
-       do j=1,commPattern(level, sps)%nsend(ii)
-
-          ! Store the block and the indices of the halo a bit easier.
-
-          d2 = commPattern(level, sps)%sendList(ii)%block(j)
-          i2 = commPattern(level, sps)%sendList(ii)%indices(j,1)+1
-          j2 = commPattern(level, sps)%sendList(ii)%indices(j,2)+1
-          k2 = commPattern(level, sps)%sendList(ii)%indices(j,3)+1
-
-          weight => commPattern(level, sps)%sendList(ii)%interp(j, :)
-          weightd => commPattern(level, sps)%sendList(ii)%interpd(j, :)
-          !DIR$ NOVECTOR
-          do k=1, nVar
-             jj =jj + 1
-             vard = sendBuffer(jj)
-             kk = 0
-             do kkk=k2,k2+1
-                do jjj=j2,j2+1
-                   do iii=i2,i2+1
-
-                      kk = kk + 1
-                      flowDoms(d2, level, sps)%realCommVars(k)%var(iii, jjj, kkk) = &
-                           flowDoms(d2, level, sps)%realCommVars(k)%var(iii, jjj, kkk) + &
-                           weight(kk)*vard
-
-                      weightd(kk) = weightd(kk) + &
-                           flowDoms(d2, level, sps)%realCommVars(k+nVar)%var(iii,jjj,kkk)*vard
-
-                   end do
+            end do
+        end do completeRecvs
+
+        ! Complete the nonblocking sends.
+
+        size = commPattern(level, sps)%nProcSend
+        do i = 1, commPattern(level, sps)%nProcSend
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
+
+    end subroutine wOversetGeneric_d
+
+    subroutine wOversetGeneric_b(nVar, level, sps, commPattern, Internal)
+        !
+        !       wOversetGeneric_b is the generic reverse mode linearized
+        !       code for overset halos.
+        !
+        use constants
+        use block, only: flowDoms
+        use communication
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, sps
+
+        type(commType), dimension(:, :), intent(in) :: commPattern
+        type(internalCommType), dimension(:, :), intent(in) :: internal
+        !
+        !      Local variables.
+        !
+        integer :: size, procId, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: nVar
+        integer(kind=intType) :: i, j, k, ii, jj, kk
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2, iii, jjj, kkk
+        real(kind=realType), dimension(:), pointer :: weight, weightd
+        real(kind=realType) :: vard
+        ! Gather up the seeds into the *recv* buffer. Note we loop over
+        ! nProcRECV here! After the buffer is assembled it is send off.
+
+        jj = 1
+        ii = 1
+        recvs: do i = 1, commPattern(level, sps)%nProcRecv
+
+            ! Store the processor id and the size of the message
+            ! a bit easier.
+
+            procID = commPattern(level, sps)%recvProc(i)
+            size = nVar * commPattern(level, sps)%nrecv(i)
+
+            ! Copy the data into the buffer
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level, sps)%nrecv(i)
+
+                ! Store the block and the indices to make code a bit easier to read
+
+                d2 = commPattern(level, sps)%recvList(i)%block(j)
+                i2 = commPattern(level, sps)%recvList(i)%indices(j, 1) + 1
+                j2 = commPattern(level, sps)%recvList(i)%indices(j, 2) + 1
+                k2 = commPattern(level, sps)%recvList(i)%indices(j, 3) + 1
+                !DIR$ NOVECTOR
+                do k = 1, nVar
+                    recvBuffer(jj) = flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2)
+                    jj = jj + 1
+                    flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = zero
                 end do
-             end do
-          end do
-       enddo
-
-    enddo completeSends
+            end do
 
-    ! Complete the nonblocking sends.
+            ! Send the data.
+            call mpi_isend(recvBuffer(ii), size, adflow_real, procID, &
+                           procID, ADflow_comm_world, recvRequests(i), &
+                           ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    size = commPattern(level, sps)%nProcRecv
-    do i=1,commPattern(level, sps)%nProcRecv
-       call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    enddo
+            ! Set ii to jj for the next processor.
 
-  end subroutine wOversetGeneric_b
+            ii = jj
 
+        end do recvs
 
-#ifndef USE_COMPLEX
-  subroutine whalo2_b(level, start, end, commPressure, commGamma, &
-       commViscous)
-    !
-    !       whalo2_b exchanges all the 2nd level internal halo's for the
-    !       cell centered variables IN REVERSE MODE
-    !
-    use constants
-    use blockPointers
-    use communication
-    use flowVarRefState
-    use inputPhysics
-    use inputTimeSpectral
-    use iteration
-    use flowUtils_b, only : computeETotBlock_b
-    use utils, only : setPointers_b, getCorrectForK
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, start, end
-    logical, intent(in) :: commPressure, commGamma, commViscous
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, ll
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    logical :: correctForK, commLamVis, commEddyVis, commVarGamma
-
-    ! Set the logicals whether or not to communicate the viscosities.
-
-    commLamVis = .false.
-    if(viscous .and. commViscous) commLamVis = .true.
-
-    commEddyVis = .false.
-    if(eddyModel .and. commViscous) commEddyVis = .true.
-
-    ! Set the logical whether or not to communicate gamma.
-
-    commVarGamma = .false.
-    if(commGamma .and. (cpModel == cpTempCurveFits)) &
-         commVarGamma = .true.
-
-    bothPAndE: if(commPressure .and. start <= irhoE .and. &
-         end >= irhoE) then
-
-       ! First determine whether or not the total energy must be
-       ! corrected for the presence of the turbulent kinetic energy.
-
-       correctForK = getCorrectForK()
-
-       domains: do nn=1,nDom
-
-          ! Treat the overset blocks. Since we don't have the logic
-          ! setup here correctly to only update the overset cells,
-          ! just do the whole block, for every block
-          do ll=1, nTimeIntervalsSpectral
-             call setPointers_b(nn, level, ll)
-             call computeETotBlock_b(2, il, 2, jl, 2, kl, correctForK)
-          end do
-       enddo domains
-    endif bothPAndE
-
-    call wOverset_b(level, start, end, commPressure, commVarGamma, &
-         commLamVis, commEddyVis, commPatternOverset, internalOverset)
-
-    ! Exchange the 1 to 1 matching 2nd level cell halo's.
-    call whalo1to1_b(level, start, end, commPressure, commVarGamma, &
-         commLamVis, commEddyVis, commPatternCell_2nd,  &
-         internalCell_2nd)
-
-    ! NOTE: Only the 1to1 halo exchange and overset is done. whalosliding,
-    ! whalomixing, orphanAverage and PandE corrections
-    ! calculation are NOT implementent.
-
-  end subroutine whalo2_b
-
-  subroutine whalo2_d(level, start, end, commPressure, commGamma, &
-       commViscous)
-    !
-    !       whalo2_b exchanges all the 2nd level internal halo's for the
-    !       cell centered variables IN FORWARD MODE
-    !
-    use constants
-    use blockPointers
-    use communication
-    use flowVarRefState
-    use inputPhysics
-    use inputTimeSpectral
-    use iteration
-    use flowUtils_d, only : computeETotBlock_d
-    use utils, only : setPointers_d, getCorrectForK
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level, start, end
-    logical, intent(in) :: commPressure, commGamma, commViscous
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, ll
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
-
-    logical :: correctForK, commLamVis, commEddyVis, commVarGamma
-
-    ! Set the logicals whether or not to communicate the viscosities.
-
-    commLamVis = .false.
-    if(viscous .and. commViscous) commLamVis = .true.
-
-    commEddyVis = .false.
-    if(eddyModel .and. commViscous) commEddyVis = .true.
-
-    ! Set the logical whether or not to communicate gamma.
-
-    commVarGamma = .false.
-    if(commGamma .and. (cpModel == cpTempCurveFits)) &
-         commVarGamma = .true.
-
-    ! Exchange the 1 to 1 matching 2nd level cell halo's.
-
-    call whalo1to1_d(level, start, end, commPressure, commVarGamma, &
-         commLamVis, commEddyVis, commPatternCell_2nd,  &
-         internalCell_2nd)
+        ! Post the nonblocking receives.
 
-    ! Exchange the overset cells
-    call wOverset_d(level, start, end, commPressure, commVarGamma, &
-         commLamVis, commEddyVis, commPatternOverset, internalOverset)
-
-    ! NOTE: Only the 1to1 halo and wOverset exchange is done. whalosliding,
-    ! whalomixing, orphanAverage and PandE corrections
-    ! calculation are NOT implementent.
-
-    ! If both the pressure and the total energy has been communicated
-    ! compute the energy again. The reason is that both values are
-    ! interpolated and consequently the values are not consistent.
-    ! The energy depends quadratically on the velocity.
-
-    bothPAndE: if(commPressure .and. start <= irhoE .and. &
-         end >= irhoE) then
-
-       ! First determine whether or not the total energy must be
-       ! corrected for the presence of the turbulent kinetic energy.
-
-       correctForK = getCorrectForK()
-
-       domains: do nn=1,nDom
-
-          ! Treat the overset blocks. Since we don't have the logic
-          ! setup here correctly to only update the overset cells,
-          ! just do the whole block, for every block
-          do ll=1, nTimeIntervalsSpectral
-             call setPointers_d(nn, level, ll)
-             call computeETotBlock_d(2, il, 2, jl, 2, kl, correctForK)
-          end do
-       enddo domains
-
-    endif bothPAndE
-  end subroutine whalo2_d
-#endif
+        ii = 1
+        sends: do i = 1, commPattern(level, sps)%nProcSend
 
+            ! Store the processor id and the size of the message
+            ! a bit easier.
 
-  subroutine resHalo1(level, start, end)
-    !
-    !       resHalo1 determines the residuals in the 1st layer of halo
-    !       cells by applying both the boundary conditions and the
-    !       exchange. The halo values are needed for post processing
-    !       reasons.
-    !
-    use constants
-    use blockPointers
-    use communication
-    use inputTimeSpectral
-    use utils, only : setPointers, EChk
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType) :: level, start, end
-    !
-    !      Local variables.
-    !
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
-
-    integer(kind=intType) :: nVar, sps
-    integer(kind=intType) :: ii, jj, mm, nn, i, j, k, l
-    integer(kind=intType) :: dd1, ii1, jj1, kk1, dd2, ii2, jj2, kk2
-
-    real(kind=realType), pointer, dimension(:,:,:) :: ddw1, ddw2
-
-    ! Determine the number of variables per cell to be sent.
-
-    nVar = max(0_intType,(end - start + 1))
-    if(nVar == 0) return
-
-    ! Loop over the spectral solutions and local blocks to apply
-    ! the boundary conditions for the residual.
-
-    spectralLoop: do sps=1,nTimeIntervalsSpectral
-       domains: do mm=1,nDom
-
-          ! Set the pointers for this block.
-
-          call setPointers(mm, level, sps)
-
-          ! Loop over the boundary condition subfaces of this block
-          ! and apply a neumann boundary condition. I know that this is
-          ! not entirely correct for some boundary conditions, symmetry,
-          ! solid wall, but this is not so important.
-
-          bocos: do nn=1,nBocos
-
-             ! Set the pointer for ddw1 and ddw2, depending on the block
-             ! face on which the subface is located.
-
-             select case (BCFaceID(nn))
-             case (iMin)
-                ddw1 => dw(1, 1:,1:,:); ddw2 => dw(2, 1:,1:,:)
-             case (iMax)
-                ddw1 => dw(ie,1:,1:,:); ddw2 => dw(il,1:,1:,:)
-             case (jMin)
-                ddw1 => dw(1:,1, 1:,:); ddw2 => dw(1:,2, 1:,:)
-             case (jMax)
-                ddw1 => dw(1:,je,1:,:); ddw2 => dw(1:,jl,1:,:)
-             case (kMin)
-                ddw1 => dw(1:,1:,1, :); ddw2 => dw(1:,1:,2, :)
-             case (kMax)
-                ddw1 => dw(1:,1:,ke,:); ddw2 => dw(1:,1:,kl,:)
-             end select
-
-             ! Loop over the cell range of the subface.
-             !DIR$ NOVECTOR
-             do j=BCData(nn)%jcBeg, BCData(nn)%jcEnd
-                !DIR$ NOVECTOR
-                do i=BCData(nn)%icBeg, BCData(nn)%icEnd
-                   !DIR$ NOVECTOR
-                   do l=start,end
-                      ddw1(i,j,l) = ddw2(i,j,l)
-                   enddo
-                enddo
-             enddo
+            procID = commPattern(level, sps)%sendProc(i)
+            size = nVar * commPattern(level, sps)%nsend(i)
 
-          enddo bocos
-       enddo domains
+            ! Post the receive.
 
-       ! Send the variables. The data is first copied into
-       ! the send buffer after which the buffer is sent asap.
+            call mpi_irecv(sendBuffer(ii), size, adflow_real, procId, &
+                           myId, ADflow_comm_world, sendRequests(i), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ii = 1
-       sends: do i=1,commPatternCell_1st(level)%nProcSend
+            ! And update ii.
 
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+            ii = ii + size
 
-          procID = commPatternCell_1st(level)%sendProc(i)
-          size    = nVar*commPatternCell_1st(level)%nsend(i)
+        end do sends
 
-          ! Copy the data in the correct part of the send buffer.
+        ! Do the local interpolation.
+        !DIR$ NOVECTOR
+        localInterp: do i = 1, internal(level, sps)%ncopy
 
-          jj = ii
-          !DIR$ NOVECTOR
-          do j=1,commPatternCell_1st(level)%nsend(i)
+            ! Store the block and the indices of the donor a bit easier.
 
-             ! Store the block id and the indices of the donor
-             ! a bit easier.
+            d1 = internal(level, sps)%donorBlock(i)
+            i1 = internal(level, sps)%donorIndices(i, 1) + 1
+            j1 = internal(level, sps)%donorIndices(i, 2) + 1
+            k1 = internal(level, sps)%donorIndices(i, 3) + 1
 
-             dd1 = commPatternCell_1st(level)%sendList(i)%block(j)
-             ii1 = commPatternCell_1st(level)%sendList(i)%indices(j,1)
-             jj1 = commPatternCell_1st(level)%sendList(i)%indices(j,2)
-             kk1 = commPatternCell_1st(level)%sendList(i)%indices(j,3)
+            weight => internal(level, sps)%donorInterp(i, :)
+            weightd => internal(level, sps)%donorInterpd(i, :)
 
-             ! Copy the given range of the residuals for this cell
-             ! in the buffer. Update the counter jj accordingly.
-             !DIR$ NOVECTOR
-             do k=start,end
-                sendBuffer(jj) = flowDoms(dd1,level,sps)%dw(ii1,jj1,kk1,k)
-                jj = jj + 1
-             enddo
+            ! Idem for the halo's.
 
-          enddo
+            d2 = internal(level, sps)%haloBlock(i)
+            i2 = internal(level, sps)%haloIndices(i, 1) + 1
+            j2 = internal(level, sps)%haloIndices(i, 2) + 1
+            k2 = internal(level, sps)%haloIndices(i, 3) + 1
 
-          ! Send the data.
+            ! Sum into the '1' values from the '2' values accouting for the weights
+            !DIR$ NOVECTOR
+            do k = 1, nVar
+                vard = flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2)
+                kk = 0
+                do kkk = k1, k1 + 1
+                    do jjj = j1, j1 + 1
+                        do iii = i1, i1 + 1
+                            kk = kk + 1
+                            flowDoms(d1, level, sps)%realCommVars(k)%var(iii, jjj, kkk) = &
+                                flowDoms(d1, level, sps)%realCommVars(k)%var(iii, jjj, kkk) + &
+                                weight(kk) * vard
 
-          call mpi_isend(sendBuffer(ii), size, adflow_real, procID,  &
-               procID, ADflow_comm_world, sendRequests(i), &
-               ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+                            weightd(kk) = weightd(kk) + &
+                                          flowDoms(d1, level, sps)%realCommVars(k + nVar)%var(iii, jjj, kkk) * vard
 
-          ! Set ii to jj for the next processor.
+                        end do
+                    end do
+                end do
+                flowDoms(d2, level, sps)%realCommVars(k)%var(i2, j2, k2) = zero
+            end do
+        end do localInterp
 
-          ii = jj
+        ! Complete the nonblocking receives in an arbitrary sequence and
+        ! copy the variables from the buffer into the halo's.
 
-       enddo sends
+        size = commPattern(level, sps)%nProcSend
+        completeSends: do i = 1, commPattern(level, sps)%nProcSend
 
-       ! Post the nonblocking receives.
+            ! Complete any of the requests.
 
-       ii = 1
-       receives: do i=1,commPatternCell_1st(level)%nProcRecv
+            call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+            ! Copy the data just arrived in the halo's.
 
-          procID = commPatternCell_1st(level)%recvProc(i)
-          size    = nVar*commPatternCell_1st(level)%nrecv(i)
+            ii = index
 
-          ! Post the receive.
+            jj = nVar * commPattern(level, sps)%nsendCum(ii - 1)
+            !DIR$ NOVECTOR
+            do j = 1, commPattern(level, sps)%nsend(ii)
 
-          call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
-               myID, ADflow_comm_world, recvRequests(i), ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+                ! Store the block and the indices of the halo a bit easier.
 
-          ! And update ii.
+                d2 = commPattern(level, sps)%sendList(ii)%block(j)
+                i2 = commPattern(level, sps)%sendList(ii)%indices(j, 1) + 1
+                j2 = commPattern(level, sps)%sendList(ii)%indices(j, 2) + 1
+                k2 = commPattern(level, sps)%sendList(ii)%indices(j, 3) + 1
 
-          ii = ii + size
+                weight => commPattern(level, sps)%sendList(ii)%interp(j, :)
+                weightd => commPattern(level, sps)%sendList(ii)%interpd(j, :)
+                !DIR$ NOVECTOR
+                do k = 1, nVar
+                    jj = jj + 1
+                    vard = sendBuffer(jj)
+                    kk = 0
+                    do kkk = k2, k2 + 1
+                        do jjj = j2, j2 + 1
+                            do iii = i2, i2 + 1
+
+                                kk = kk + 1
+                                flowDoms(d2, level, sps)%realCommVars(k)%var(iii, jjj, kkk) = &
+                                    flowDoms(d2, level, sps)%realCommVars(k)%var(iii, jjj, kkk) + &
+                                    weight(kk) * vard
+
+                                weightd(kk) = weightd(kk) + &
+                                              flowDoms(d2, level, sps)%realCommVars(k + nVar)%var(iii, jjj, kkk) * vard
+
+                            end do
+                        end do
+                    end do
+                end do
+            end do
 
-       enddo receives
+        end do completeSends
 
-       ! Copy the local data.
-       !DIR$ NOVECTOR
-       localCopy: do i=1,internalCell_1st(level)%ncopy
+        ! Complete the nonblocking sends.
 
-          ! Store the block and the indices of the donor a bit easier.
+        size = commPattern(level, sps)%nProcRecv
+        do i = 1, commPattern(level, sps)%nProcRecv
+            call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do
 
-          dd1 = internalCell_1st(level)%donorBlock(i)
-          ii1 = internalCell_1st(level)%donorIndices(i,1)
-          jj1 = internalCell_1st(level)%donorIndices(i,2)
-          kk1 = internalCell_1st(level)%donorIndices(i,3)
+    end subroutine wOversetGeneric_b
 
-          ! Idem for the halo's.
+#ifndef USE_COMPLEX
+    subroutine whalo2_b(level, start, end, commPressure, commGamma, &
+                        commViscous)
+        !
+        !       whalo2_b exchanges all the 2nd level internal halo's for the
+        !       cell centered variables IN REVERSE MODE
+        !
+        use constants
+        use blockPointers
+        use communication
+        use flowVarRefState
+        use inputPhysics
+        use inputTimeSpectral
+        use iteration
+        use flowUtils_b, only: computeETotBlock_b
+        use utils, only: setPointers_b, getCorrectForK
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, start, end
+        logical, intent(in) :: commPressure, commGamma, commViscous
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, ll
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        logical :: correctForK, commLamVis, commEddyVis, commVarGamma
+
+        ! Set the logicals whether or not to communicate the viscosities.
+
+        commLamVis = .false.
+        if (viscous .and. commViscous) commLamVis = .true.
+
+        commEddyVis = .false.
+        if (eddyModel .and. commViscous) commEddyVis = .true.
+
+        ! Set the logical whether or not to communicate gamma.
+
+        commVarGamma = .false.
+        if (commGamma .and. (cpModel == cpTempCurveFits)) &
+            commVarGamma = .true.
+
+        bothPAndE: if (commPressure .and. start <= irhoE .and. &
+                       end >= irhoE) then
+
+            ! First determine whether or not the total energy must be
+            ! corrected for the presence of the turbulent kinetic energy.
+
+            correctForK = getCorrectForK()
+
+            domains: do nn = 1, nDom
+
+                ! Treat the overset blocks. Since we don't have the logic
+                ! setup here correctly to only update the overset cells,
+                ! just do the whole block, for every block
+                do ll = 1, nTimeIntervalsSpectral
+                    call setPointers_b(nn, level, ll)
+                    call computeETotBlock_b(2, il, 2, jl, 2, kl, correctForK)
+                end do
+            end do domains
+        end if bothPAndE
+
+        call wOverset_b(level, start, end, commPressure, commVarGamma, &
+                        commLamVis, commEddyVis, commPatternOverset, internalOverset)
+
+        ! Exchange the 1 to 1 matching 2nd level cell halo's.
+        call whalo1to1_b(level, start, end, commPressure, commVarGamma, &
+                         commLamVis, commEddyVis, commPatternCell_2nd, &
+                         internalCell_2nd)
+
+        ! NOTE: Only the 1to1 halo exchange and overset is done. whalosliding,
+        ! whalomixing, orphanAverage and PandE corrections
+        ! calculation are NOT implementent.
+
+    end subroutine whalo2_b
+
+    subroutine whalo2_d(level, start, end, commPressure, commGamma, &
+                        commViscous)
+        !
+        !       whalo2_b exchanges all the 2nd level internal halo's for the
+        !       cell centered variables IN FORWARD MODE
+        !
+        use constants
+        use blockPointers
+        use communication
+        use flowVarRefState
+        use inputPhysics
+        use inputTimeSpectral
+        use iteration
+        use flowUtils_d, only: computeETotBlock_d
+        use utils, only: setPointers_d, getCorrectForK
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level, start, end
+        logical, intent(in) :: commPressure, commGamma, commViscous
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, ll
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, kBeg, kEnd
+
+        logical :: correctForK, commLamVis, commEddyVis, commVarGamma
+
+        ! Set the logicals whether or not to communicate the viscosities.
+
+        commLamVis = .false.
+        if (viscous .and. commViscous) commLamVis = .true.
+
+        commEddyVis = .false.
+        if (eddyModel .and. commViscous) commEddyVis = .true.
+
+        ! Set the logical whether or not to communicate gamma.
+
+        commVarGamma = .false.
+        if (commGamma .and. (cpModel == cpTempCurveFits)) &
+            commVarGamma = .true.
+
+        ! Exchange the 1 to 1 matching 2nd level cell halo's.
+
+        call whalo1to1_d(level, start, end, commPressure, commVarGamma, &
+                         commLamVis, commEddyVis, commPatternCell_2nd, &
+                         internalCell_2nd)
+
+        ! Exchange the overset cells
+        call wOverset_d(level, start, end, commPressure, commVarGamma, &
+                        commLamVis, commEddyVis, commPatternOverset, internalOverset)
+
+        ! NOTE: Only the 1to1 halo and wOverset exchange is done. whalosliding,
+        ! whalomixing, orphanAverage and PandE corrections
+        ! calculation are NOT implementent.
+
+        ! If both the pressure and the total energy has been communicated
+        ! compute the energy again. The reason is that both values are
+        ! interpolated and consequently the values are not consistent.
+        ! The energy depends quadratically on the velocity.
+
+        bothPAndE: if (commPressure .and. start <= irhoE .and. &
+                       end >= irhoE) then
+
+            ! First determine whether or not the total energy must be
+            ! corrected for the presence of the turbulent kinetic energy.
+
+            correctForK = getCorrectForK()
+
+            domains: do nn = 1, nDom
+
+                ! Treat the overset blocks. Since we don't have the logic
+                ! setup here correctly to only update the overset cells,
+                ! just do the whole block, for every block
+                do ll = 1, nTimeIntervalsSpectral
+                    call setPointers_d(nn, level, ll)
+                    call computeETotBlock_d(2, il, 2, jl, 2, kl, correctForK)
+                end do
+            end do domains
 
-          dd2 = internalCell_1st(level)%haloBlock(i)
-          ii2 = internalCell_1st(level)%haloIndices(i,1)
-          jj2 = internalCell_1st(level)%haloIndices(i,2)
-          kk2 = internalCell_1st(level)%haloIndices(i,3)
+        end if bothPAndE
+    end subroutine whalo2_d
+#endif
 
-          ! Copy the given range of residuals.
-          !DIR$ NOVECTOR
-          do k=start,end
-             flowDoms(dd2,level,sps)%dw(ii2,jj2,kk2,k) = &
-                  flowDoms(dd1,level,sps)%dw(ii1,jj1,kk1,k)
-          enddo
+    subroutine resHalo1(level, start, end)
+        !
+        !       resHalo1 determines the residuals in the 1st layer of halo
+        !       cells by applying both the boundary conditions and the
+        !       exchange. The halo values are needed for post processing
+        !       reasons.
+        !
+        use constants
+        use blockPointers
+        use communication
+        use inputTimeSpectral
+        use utils, only: setPointers, EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType) :: level, start, end
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: nVar, sps
+        integer(kind=intType) :: ii, jj, mm, nn, i, j, k, l
+        integer(kind=intType) :: dd1, ii1, jj1, kk1, dd2, ii2, jj2, kk2
+
+        real(kind=realType), pointer, dimension(:, :, :) :: ddw1, ddw2
+
+        ! Determine the number of variables per cell to be sent.
+
+        nVar = max(0_intType, (end - start + 1))
+        if (nVar == 0) return
+
+        ! Loop over the spectral solutions and local blocks to apply
+        ! the boundary conditions for the residual.
+
+        spectralLoop: do sps = 1, nTimeIntervalsSpectral
+            domains: do mm = 1, nDom
+
+                ! Set the pointers for this block.
+
+                call setPointers(mm, level, sps)
+
+                ! Loop over the boundary condition subfaces of this block
+                ! and apply a neumann boundary condition. I know that this is
+                ! not entirely correct for some boundary conditions, symmetry,
+                ! solid wall, but this is not so important.
+
+                bocos: do nn = 1, nBocos
+
+                    ! Set the pointer for ddw1 and ddw2, depending on the block
+                    ! face on which the subface is located.
+
+                    select case (BCFaceID(nn))
+                    case (iMin)
+                        ddw1 => dw(1, 1:, 1:, :); ddw2 => dw(2, 1:, 1:, :)
+                    case (iMax)
+                        ddw1 => dw(ie, 1:, 1:, :); ddw2 => dw(il, 1:, 1:, :)
+                    case (jMin)
+                        ddw1 => dw(1:, 1, 1:, :); ddw2 => dw(1:, 2, 1:, :)
+                    case (jMax)
+                        ddw1 => dw(1:, je, 1:, :); ddw2 => dw(1:, jl, 1:, :)
+                    case (kMin)
+                        ddw1 => dw(1:, 1:, 1, :); ddw2 => dw(1:, 1:, 2, :)
+                    case (kMax)
+                        ddw1 => dw(1:, 1:, ke, :); ddw2 => dw(1:, 1:, kl, :)
+                    end select
+
+                    ! Loop over the cell range of the subface.
+                    !DIR$ NOVECTOR
+                    do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
+                        !DIR$ NOVECTOR
+                        do i = BCData(nn)%icBeg, BCData(nn)%icEnd
+                            !DIR$ NOVECTOR
+                            do l = start, end
+                                ddw1(i, j, l) = ddw2(i, j, l)
+                            end do
+                        end do
+                    end do
+
+                end do bocos
+            end do domains
+
+            ! Send the variables. The data is first copied into
+            ! the send buffer after which the buffer is sent asap.
+
+            ii = 1
+            sends: do i = 1, commPatternCell_1st(level)%nProcSend
+
+                ! Store the processor id and the size of the message
+                ! a bit easier.
+
+                procID = commPatternCell_1st(level)%sendProc(i)
+                size = nVar * commPatternCell_1st(level)%nsend(i)
+
+                ! Copy the data in the correct part of the send buffer.
+
+                jj = ii
+                !DIR$ NOVECTOR
+                do j = 1, commPatternCell_1st(level)%nsend(i)
 
-       enddo localCopy
+                    ! Store the block id and the indices of the donor
+                    ! a bit easier.
 
-       ! Complete the nonblocking receives in an arbitrary sequence and
-       ! copy the variables from the buffer into the halo's.
+                    dd1 = commPatternCell_1st(level)%sendList(i)%block(j)
+                    ii1 = commPatternCell_1st(level)%sendList(i)%indices(j, 1)
+                    jj1 = commPatternCell_1st(level)%sendList(i)%indices(j, 2)
+                    kk1 = commPatternCell_1st(level)%sendList(i)%indices(j, 3)
 
-       size = commPatternCell_1st(level)%nProcRecv
-       completeRecvs: do i=1,commPatternCell_1st(level)%nProcRecv
+                    ! Copy the given range of the residuals for this cell
+                    ! in the buffer. Update the counter jj accordingly.
+                    !DIR$ NOVECTOR
+                    do k = start, end
+                        sendBuffer(jj) = flowDoms(dd1, level, sps)%dw(ii1, jj1, kk1, k)
+                        jj = jj + 1
+                    end do
 
-          ! Complete any of the requests.
+                end do
 
-          call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+                ! Send the data.
 
-          ! Copy the data just arrived in the halo's.
+                call mpi_isend(sendBuffer(ii), size, adflow_real, procID, &
+                               procID, ADflow_comm_world, sendRequests(i), &
+                               ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-          ii = index
-          jj = nVar*commPatternCell_1st(level)%nrecvCum(ii-1) +1
-          !DIR$ NOVECTOR
-          do j=1,commPatternCell_1st(level)%nrecv(ii)
+                ! Set ii to jj for the next processor.
 
-             ! Store the block and the indices of the halo a bit easier.
+                ii = jj
 
-             dd2 = commPatternCell_1st(level)%recvList(ii)%block(j)
-             ii2 = commPatternCell_1st(level)%recvList(ii)%indices(j,1)
-             jj2 = commPatternCell_1st(level)%recvList(ii)%indices(j,2)
-             kk2 = commPatternCell_1st(level)%recvList(ii)%indices(j,3)
+            end do sends
 
-             ! Copy the residuals.
-             !DIR$ NOVECTOR
-             do k=start,end
-                flowDoms(dd2,level,sps)%dw(ii2,jj2,kk2,k) = recvBuffer(jj)
-                jj = jj + 1
-             enddo
+            ! Post the nonblocking receives.
 
-          enddo
+            ii = 1
+            receives: do i = 1, commPatternCell_1st(level)%nProcRecv
 
-       enddo completeRecvs
+                ! Store the processor id and the size of the message
+                ! a bit easier.
 
-       ! Complete the nonblocking sends.
+                procID = commPatternCell_1st(level)%recvProc(i)
+                size = nVar * commPatternCell_1st(level)%nrecv(i)
 
-       size = commPatternCell_1st(level)%nProcSend
-       do i=1,commPatternCell_1st(level)%nProcSend
-          call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-    enddo
+                ! Post the receive.
 
-    enddo spectralLoop
+                call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
+                               myID, ADflow_comm_world, recvRequests(i), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-  end subroutine resHalo1
+                ! And update ii.
 
-  subroutine exchangeCoor(level)
-    !
-    !       ExchangeCoor exchanges the coordinates of the given grid
-    !       level.
-    !
-    use block
-    use communication
-    use inputTimeSpectral
-    use utils, only : EChk
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
+                ii = ii + size
 
-    integer(kind=intType) :: i, j, ii, jj, mm
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+            end do receives
 
-    ! Loop over the number of spectral solutions.
+            ! Copy the local data.
+            !DIR$ NOVECTOR
+            localCopy: do i = 1, internalCell_1st(level)%ncopy
 
-    spectralLoop: do mm=1,nTimeIntervalsSpectral
+                ! Store the block and the indices of the donor a bit easier.
 
-       ! Send the coordinates i have to send. The data is first copied
-       ! into the send buffer and this buffer is sent.
+                dd1 = internalCell_1st(level)%donorBlock(i)
+                ii1 = internalCell_1st(level)%donorIndices(i, 1)
+                jj1 = internalCell_1st(level)%donorIndices(i, 2)
+                kk1 = internalCell_1st(level)%donorIndices(i, 3)
 
-       ii = 1
-       sends: do i=1,commPatternNode_1st(level)%nProcSend
+                ! Idem for the halo's.
 
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+                dd2 = internalCell_1st(level)%haloBlock(i)
+                ii2 = internalCell_1st(level)%haloIndices(i, 1)
+                jj2 = internalCell_1st(level)%haloIndices(i, 2)
+                kk2 = internalCell_1st(level)%haloIndices(i, 3)
 
-          procID = commPatternNode_1st(level)%sendProc(i)
-          size   = 3*commPatternNode_1st(level)%nSend(i)
+                ! Copy the given range of residuals.
+                !DIR$ NOVECTOR
+                do k = start, end
+                    flowDoms(dd2, level, sps)%dw(ii2, jj2, kk2, k) = &
+                        flowDoms(dd1, level, sps)%dw(ii1, jj1, kk1, k)
+                end do
 
-          ! Copy the data in the correct part of the send buffer.
+            end do localCopy
 
-          jj = ii
-          !DIR$ NOVECTOR
-          do j=1,commPatternNode_1st(level)%nSend(i)
+            ! Complete the nonblocking receives in an arbitrary sequence and
+            ! copy the variables from the buffer into the halo's.
 
-             ! Store the block id and the indices of the donor
-             ! a bit easier.
+            size = commPatternCell_1st(level)%nProcRecv
+            completeRecvs: do i = 1, commPatternCell_1st(level)%nProcRecv
 
-             d1 = commPatternNode_1st(level)%sendList(i)%block(j)
-             i1 = commPatternNode_1st(level)%sendList(i)%indices(j,1)
-             j1 = commPatternNode_1st(level)%sendList(i)%indices(j,2)
-             k1 = commPatternNode_1st(level)%sendList(i)%indices(j,3)
+                ! Complete any of the requests.
 
-             ! Copy the coordinates of this point in the buffer.
-             ! Update the counter jj accordingly.
+                call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-             sendBuffer(jj)   = flowDoms(d1,level,mm)%x(i1,j1,k1,1)
-             sendBuffer(jj+1) = flowDoms(d1,level,mm)%x(i1,j1,k1,2)
-             sendBuffer(jj+2) = flowDoms(d1,level,mm)%x(i1,j1,k1,3)
-             jj = jj + 3
+                ! Copy the data just arrived in the halo's.
 
-          enddo
+                ii = index
+                jj = nVar * commPatternCell_1st(level)%nrecvCum(ii - 1) + 1
+                !DIR$ NOVECTOR
+                do j = 1, commPatternCell_1st(level)%nrecv(ii)
 
-          ! Send the data.
+                    ! Store the block and the indices of the halo a bit easier.
 
-          call mpi_isend(sendBuffer(ii), size, adflow_real, procID,    &
-               procID, ADflow_comm_world, sendRequests(i), &
-               ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+                    dd2 = commPatternCell_1st(level)%recvList(ii)%block(j)
+                    ii2 = commPatternCell_1st(level)%recvList(ii)%indices(j, 1)
+                    jj2 = commPatternCell_1st(level)%recvList(ii)%indices(j, 2)
+                    kk2 = commPatternCell_1st(level)%recvList(ii)%indices(j, 3)
 
-          ! Set ii to jj for the next processor.
+                    ! Copy the residuals.
+                    !DIR$ NOVECTOR
+                    do k = start, end
+                        flowDoms(dd2, level, sps)%dw(ii2, jj2, kk2, k) = recvBuffer(jj)
+                        jj = jj + 1
+                    end do
 
-          ii = jj
+                end do
 
-       enddo sends
+            end do completeRecvs
 
-       ! Post the nonblocking receives.
+            ! Complete the nonblocking sends.
 
-       ii = 1
-       receives: do i=1,commPatternNode_1st(level)%nProcRecv
+            size = commPatternCell_1st(level)%nProcSend
+            do i = 1, commPatternCell_1st(level)%nProcSend
+                call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
 
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+        end do spectralLoop
 
-          procID = commPatternNode_1st(level)%recvProc(i)
-          size   = 3*commPatternNode_1st(level)%nRecv(i)
+    end subroutine resHalo1
 
-          ! Post the receive.
+    subroutine exchangeCoor(level)
+        !
+        !       ExchangeCoor exchanges the coordinates of the given grid
+        !       level.
+        !
+        use block
+        use communication
+        use inputTimeSpectral
+        use utils, only: EChk
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
 
-          call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
-               myID, ADflow_comm_world, recvRequests(i), ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+        integer(kind=intType) :: i, j, ii, jj, mm
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
 
-          ! And update ii.
+        ! Loop over the number of spectral solutions.
 
-          ii = ii + size
+        spectralLoop: do mm = 1, nTimeIntervalsSpectral
 
-       enddo receives
+            ! Send the coordinates i have to send. The data is first copied
+            ! into the send buffer and this buffer is sent.
 
-       ! Copy the local data.
-       !DIR$ NOVECTOR
-       localCopy: do i=1,internalNode_1st(level)%nCopy
+            ii = 1
+            sends: do i = 1, commPatternNode_1st(level)%nProcSend
 
-          ! Store the block and the indices of the donor a bit easier.
+                ! Store the processor id and the size of the message
+                ! a bit easier.
 
-          d1 = internalNode_1st(level)%donorBlock(i)
-          i1 = internalNode_1st(level)%donorIndices(i,1)
-          j1 = internalNode_1st(level)%donorIndices(i,2)
-          k1 = internalNode_1st(level)%donorIndices(i,3)
-          ! Idem for the halo's.
+                procID = commPatternNode_1st(level)%sendProc(i)
+                size = 3 * commPatternNode_1st(level)%nSend(i)
 
-          d2 = internalNode_1st(level)%haloBlock(i)
-          i2 = internalNode_1st(level)%haloIndices(i,1)
-          j2 = internalNode_1st(level)%haloIndices(i,2)
-          k2 = internalNode_1st(level)%haloIndices(i,3)
-          ! Copy the coordinates.
-          flowDoms(d2,level,mm)%x(i2,j2,k2,1) = &
-               flowDoms(d1,level,mm)%x(i1,j1,k1,1)
-          flowDoms(d2,level,mm)%x(i2,j2,k2,2) = &
-               flowDoms(d1,level,mm)%x(i1,j1,k1,2)
-          flowDoms(d2,level,mm)%x(i2,j2,k2,3) = &
-               flowDoms(d1,level,mm)%x(i1,j1,k1,3)
+                ! Copy the data in the correct part of the send buffer.
 
-       enddo localCopy
+                jj = ii
+                !DIR$ NOVECTOR
+                do j = 1, commPatternNode_1st(level)%nSend(i)
 
-       ! Correct the periodic halos of the internal communication
-       ! pattern
+                    ! Store the block id and the indices of the donor
+                    ! a bit easier.
 
-       call correctPeriodicCoor(level, mm,                          &
-            internalNode_1st(level)%nPeriodic,  &
-            internalNode_1st(level)%periodicData)
+                    d1 = commPatternNode_1st(level)%sendList(i)%block(j)
+                    i1 = commPatternNode_1st(level)%sendList(i)%indices(j, 1)
+                    j1 = commPatternNode_1st(level)%sendList(i)%indices(j, 2)
+                    k1 = commPatternNode_1st(level)%sendList(i)%indices(j, 3)
 
-       ! Complete the nonblocking receives in an arbitrary sequence and
-       ! copy the coordinates from the buffer into the halo's.
+                    ! Copy the coordinates of this point in the buffer.
+                    ! Update the counter jj accordingly.
 
-       size = commPatternNode_1st(level)%nProcRecv
-       completeRecvs: do i=1,commPatternNode_1st(level)%nProcRecv
+                    sendBuffer(jj) = flowDoms(d1, level, mm)%x(i1, j1, k1, 1)
+                    sendBuffer(jj + 1) = flowDoms(d1, level, mm)%x(i1, j1, k1, 2)
+                    sendBuffer(jj + 2) = flowDoms(d1, level, mm)%x(i1, j1, k1, 3)
+                    jj = jj + 3
 
-          ! Complete any of the requests.
+                end do
 
-          call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+                ! Send the data.
 
-          ! Copy the data just arrived in the halo's.
+                call mpi_isend(sendBuffer(ii), size, adflow_real, procID, &
+                               procID, ADflow_comm_world, sendRequests(i), &
+                               ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-          ii = index
-          jj = 3*commPatternNode_1st(level)%nRecvCum(ii-1) +1
-          !DIR$ NOVECTOR
-          do j=1,commPatternNode_1st(level)%nRecv(ii)
+                ! Set ii to jj for the next processor.
 
-             ! Store the block and the indices of the halo a bit easier.
+                ii = jj
 
-             d2 = commPatternNode_1st(level)%recvList(ii)%block(j)
-             i2 = commPatternNode_1st(level)%recvList(ii)%indices(j,1)
-             j2 = commPatternNode_1st(level)%recvList(ii)%indices(j,2)
-             k2 = commPatternNode_1st(level)%recvList(ii)%indices(j,3)
+            end do sends
 
-             ! Copy the data.
+            ! Post the nonblocking receives.
 
-             flowDoms(d2,level,mm)%x(i2,j2,k2,1) = recvBuffer(jj)
-             flowDoms(d2,level,mm)%x(i2,j2,k2,2) = recvBuffer(jj+1)
-             flowDoms(d2,level,mm)%x(i2,j2,k2,3) = recvBuffer(jj+2)
-             jj = jj + 3
+            ii = 1
+            receives: do i = 1, commPatternNode_1st(level)%nProcRecv
 
-          enddo
+                ! Store the processor id and the size of the message
+                ! a bit easier.
 
-       enddo completeRecvs
+                procID = commPatternNode_1st(level)%recvProc(i)
+                size = 3 * commPatternNode_1st(level)%nRecv(i)
 
-       ! Correct the periodic halos of the external communication
-       ! pattern.
+                ! Post the receive.
 
-       call correctPeriodicCoor(level, mm,                            &
-            commPatternNode_1st(level)%nPeriodic, &
-            commPatternNode_1st(level)%periodicData)
-
-       ! Complete the nonblocking sends.
-
-       size = commPatternNode_1st(level)%nProcSend
-       do i=1,commPatternNode_1st(level)%nProcSend
-          call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-    enddo
-
-    enddo spectralLoop
-
-  end subroutine exchangeCoor
+                call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
+                               myID, ADflow_comm_world, recvRequests(i), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-  !      ==================================================================
-
-  subroutine correctPeriodicCoor(level, sp, nPeriodic, periodicData)
-    !
-    !       correctPeriodicCoor applies the periodic transformation to
-    !       the coordinates of the nodal halo's in periodicData.
-    !
-    use block
-    use communication
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType), intent(in) :: level, sp, nPeriodic
-    type(periodicDataType), dimension(:), pointer :: periodicData
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: nn, mm, ii, i, j, k
-    real(kind=realType)   :: dx, dy, dz
-
-    real(kind=realType), dimension(3,3) :: rotMatrix
-    real(kind=realType), dimension(3)   :: rotCenter, translation
-
-    ! Loop over the number of periodic transformations.
-
-    do nn=1,nPeriodic
-
-       ! Store the rotation matrix, rotation center and translation
-       ! vector a bit easier.
-
-       rotMatrix   = periodicData(nn)%rotMatrix
-       rotCenter   = periodicData(nn)%rotCenter
-       translation = periodicData(nn)%translation + rotCenter
+                ! And update ii.
 
-       ! Loop over the number of halo nodes for this transformation.
-       !DIR$ NOVECTOR
-       do ii=1,periodicData(nn)%nHalos
-
-          ! Store the block and the indices a bit easier.
-
-          mm = periodicData(nn)%block(ii)
-          i  = periodicData(nn)%indices(ii,1)
-          j  = periodicData(nn)%indices(ii,2)
-          k  = periodicData(nn)%indices(ii,3)
+                ii = ii + size
 
-          ! Determine the vector from the center of rotation to the
-          ! uncorrected halo value.
+            end do receives
 
-          dx = flowDoms(mm,level,sp)%x(i,j,k,1) - rotCenter(1)
-          dy = flowDoms(mm,level,sp)%x(i,j,k,2) - rotCenter(2)
-          dz = flowDoms(mm,level,sp)%x(i,j,k,3) - rotCenter(3)
+            ! Copy the local data.
+            !DIR$ NOVECTOR
+            localCopy: do i = 1, internalNode_1st(level)%nCopy
 
-          ! Compute the corrected coordinates.
+                ! Store the block and the indices of the donor a bit easier.
 
-          flowDoms(mm,level,sp)%x(i,j,k,1) = rotMatrix(1,1)*dx &
-               + rotMatrix(1,2)*dy &
-               + rotMatrix(1,3)*dz &
-               + translation(1)
-          flowDoms(mm,level,sp)%x(i,j,k,2) = rotMatrix(2,1)*dx &
-               + rotMatrix(2,2)*dy &
-               + rotMatrix(2,3)*dz &
-               + translation(2)
-          flowDoms(mm,level,sp)%x(i,j,k,3) = rotMatrix(3,1)*dx &
-               + rotMatrix(3,2)*dy &
-               + rotMatrix(3,3)*dz &
-               + translation(3)
-       enddo
-    enddo
-
-  end subroutine correctPeriodicCoor
-  subroutine exchangeCoor_b(level)
-    !
-    !       ExchangeCoor_b exchanges the *derivatives* of the given grid
-    !       level IN REVERSE MODE.
-    !
-    use constants
-    use block
-    use communication
-    use inputTimeSpectral
-    use utils, only : EChk
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
-
-    integer(kind=intType) :: i, j, ii, jj, mm, idim
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+                d1 = internalNode_1st(level)%donorBlock(i)
+                i1 = internalNode_1st(level)%donorIndices(i, 1)
+                j1 = internalNode_1st(level)%donorIndices(i, 2)
+                k1 = internalNode_1st(level)%donorIndices(i, 3)
+                ! Idem for the halo's.
 
+                d2 = internalNode_1st(level)%haloBlock(i)
+                i2 = internalNode_1st(level)%haloIndices(i, 1)
+                j2 = internalNode_1st(level)%haloIndices(i, 2)
+                k2 = internalNode_1st(level)%haloIndices(i, 3)
+                ! Copy the coordinates.
+                flowDoms(d2, level, mm)%x(i2, j2, k2, 1) = &
+                    flowDoms(d1, level, mm)%x(i1, j1, k1, 1)
+                flowDoms(d2, level, mm)%x(i2, j2, k2, 2) = &
+                    flowDoms(d1, level, mm)%x(i1, j1, k1, 2)
+                flowDoms(d2, level, mm)%x(i2, j2, k2, 3) = &
+                    flowDoms(d1, level, mm)%x(i1, j1, k1, 3)
 
-    ! Loop over the number of spectral solutions.
-
-    spectralLoop: do mm=1,nTimeIntervalsSpectral
-
-       ! Send the coordinates i have to send. The data is first copied
-       ! into the send buffer and this buffer is sent.
-
-       ii = 1
-       jj = 1
-       recvs: do i=1,commPatternNode_1st(level)%nProcRecv
-
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+            end do localCopy
 
-          procID = commPatternNode_1st(level)%recvProc(i)
-          size   = 3*commPatternNode_1st(level)%nRecv(i)
+            ! Correct the periodic halos of the internal communication
+            ! pattern
 
-          ! Copy the data in the correct part of the send buffer.
-          !DIR$ NOVECTOR
-          do j=1,commPatternNode_1st(level)%nRecv(i)
+            call correctPeriodicCoor(level, mm, &
+                                     internalNode_1st(level)%nPeriodic, &
+                                     internalNode_1st(level)%periodicData)
 
-             ! Store the block id and the indices of the donor
-             ! a bit easier.
+            ! Complete the nonblocking receives in an arbitrary sequence and
+            ! copy the coordinates from the buffer into the halo's.
 
-             d1 = commPatternNode_1st(level)%recvList(i)%block(j)
-             i1 = commPatternNode_1st(level)%recvList(i)%indices(j,1)
-             j1 = commPatternNode_1st(level)%recvList(i)%indices(j,2)
-             k1 = commPatternNode_1st(level)%recvList(i)%indices(j,3)
+            size = commPatternNode_1st(level)%nProcRecv
+            completeRecvs: do i = 1, commPatternNode_1st(level)%nProcRecv
 
-             ! Copy the coordinates of this point in the buffer.
-             ! Update the counter jj accordingly.
+                ! Complete any of the requests.
 
-             recvBuffer(jj)   = flowDomsd(d1,level,mm)%x(i1,j1,k1,1)
-             recvBuffer(jj+1) = flowDomsd(d1,level,mm)%x(i1,j1,k1,2)
-             recvBuffer(jj+2) = flowDomsd(d1,level,mm)%x(i1,j1,k1,3)
-             jj = jj + 3
-             flowDomsd(d1, level, mm)%x(i1,j1,k1,:) = zero
-          enddo
+                call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-          ! Send the data.
+                ! Copy the data just arrived in the halo's.
 
-          call mpi_isend(recvBuffer(ii), size, adflow_real, procID,    &
-               procID, ADflow_comm_world, recvRequests(i), &
-               ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+                ii = index
+                jj = 3 * commPatternNode_1st(level)%nRecvCum(ii - 1) + 1
+                !DIR$ NOVECTOR
+                do j = 1, commPatternNode_1st(level)%nRecv(ii)
 
-          ! Set ii to jj for the next processor.
+                    ! Store the block and the indices of the halo a bit easier.
 
-          ii = jj
+                    d2 = commPatternNode_1st(level)%recvList(ii)%block(j)
+                    i2 = commPatternNode_1st(level)%recvList(ii)%indices(j, 1)
+                    j2 = commPatternNode_1st(level)%recvList(ii)%indices(j, 2)
+                    k2 = commPatternNode_1st(level)%recvList(ii)%indices(j, 3)
 
-       enddo recvs
+                    ! Copy the data.
 
-       ! Post the nonblocking receives.
+                    flowDoms(d2, level, mm)%x(i2, j2, k2, 1) = recvBuffer(jj)
+                    flowDoms(d2, level, mm)%x(i2, j2, k2, 2) = recvBuffer(jj + 1)
+                    flowDoms(d2, level, mm)%x(i2, j2, k2, 3) = recvBuffer(jj + 2)
+                    jj = jj + 3
 
-       ii = 1
-       send: do i=1,commPatternNode_1st(level)%nProcSend
+                end do
 
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+            end do completeRecvs
+
+            ! Correct the periodic halos of the external communication
+            ! pattern.
+
+            call correctPeriodicCoor(level, mm, &
+                                     commPatternNode_1st(level)%nPeriodic, &
+                                     commPatternNode_1st(level)%periodicData)
+
+            ! Complete the nonblocking sends.
+
+            size = commPatternNode_1st(level)%nProcSend
+            do i = 1, commPatternNode_1st(level)%nProcSend
+                call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+
+        end do spectralLoop
+
+    end subroutine exchangeCoor
+
+    !      ==================================================================
+
+    subroutine correctPeriodicCoor(level, sp, nPeriodic, periodicData)
+        !
+        !       correctPeriodicCoor applies the periodic transformation to
+        !       the coordinates of the nodal halo's in periodicData.
+        !
+        use block
+        use communication
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType), intent(in) :: level, sp, nPeriodic
+        type(periodicDataType), dimension(:), pointer :: periodicData
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: nn, mm, ii, i, j, k
+        real(kind=realType) :: dx, dy, dz
+
+        real(kind=realType), dimension(3, 3) :: rotMatrix
+        real(kind=realType), dimension(3) :: rotCenter, translation
+
+        ! Loop over the number of periodic transformations.
+
+        do nn = 1, nPeriodic
+
+            ! Store the rotation matrix, rotation center and translation
+            ! vector a bit easier.
+
+            rotMatrix = periodicData(nn)%rotMatrix
+            rotCenter = periodicData(nn)%rotCenter
+            translation = periodicData(nn)%translation + rotCenter
+
+            ! Loop over the number of halo nodes for this transformation.
+            !DIR$ NOVECTOR
+            do ii = 1, periodicData(nn)%nHalos
+
+                ! Store the block and the indices a bit easier.
+
+                mm = periodicData(nn)%block(ii)
+                i = periodicData(nn)%indices(ii, 1)
+                j = periodicData(nn)%indices(ii, 2)
+                k = periodicData(nn)%indices(ii, 3)
+
+                ! Determine the vector from the center of rotation to the
+                ! uncorrected halo value.
+
+                dx = flowDoms(mm, level, sp)%x(i, j, k, 1) - rotCenter(1)
+                dy = flowDoms(mm, level, sp)%x(i, j, k, 2) - rotCenter(2)
+                dz = flowDoms(mm, level, sp)%x(i, j, k, 3) - rotCenter(3)
+
+                ! Compute the corrected coordinates.
+
+                flowDoms(mm, level, sp)%x(i, j, k, 1) = rotMatrix(1, 1) * dx &
+                                                        + rotMatrix(1, 2) * dy &
+                                                        + rotMatrix(1, 3) * dz &
+                                                        + translation(1)
+                flowDoms(mm, level, sp)%x(i, j, k, 2) = rotMatrix(2, 1) * dx &
+                                                        + rotMatrix(2, 2) * dy &
+                                                        + rotMatrix(2, 3) * dz &
+                                                        + translation(2)
+                flowDoms(mm, level, sp)%x(i, j, k, 3) = rotMatrix(3, 1) * dx &
+                                                        + rotMatrix(3, 2) * dy &
+                                                        + rotMatrix(3, 3) * dz &
+                                                        + translation(3)
+            end do
+        end do
+
+    end subroutine correctPeriodicCoor
+    subroutine exchangeCoor_b(level)
+        !
+        !       ExchangeCoor_b exchanges the *derivatives* of the given grid
+        !       level IN REVERSE MODE.
+        !
+        use constants
+        use block
+        use communication
+        use inputTimeSpectral
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: i, j, ii, jj, mm, idim
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+
+        ! Loop over the number of spectral solutions.
+
+        spectralLoop: do mm = 1, nTimeIntervalsSpectral
+
+            ! Send the coordinates i have to send. The data is first copied
+            ! into the send buffer and this buffer is sent.
+
+            ii = 1
+            jj = 1
+            recvs: do i = 1, commPatternNode_1st(level)%nProcRecv
+
+                ! Store the processor id and the size of the message
+                ! a bit easier.
+
+                procID = commPatternNode_1st(level)%recvProc(i)
+                size = 3 * commPatternNode_1st(level)%nRecv(i)
+
+                ! Copy the data in the correct part of the send buffer.
+                !DIR$ NOVECTOR
+                do j = 1, commPatternNode_1st(level)%nRecv(i)
 
-          procID = commPatternNode_1st(level)%sendProc(i)
-          size   = 3*commPatternNode_1st(level)%nSend(i)
+                    ! Store the block id and the indices of the donor
+                    ! a bit easier.
 
-          ! Post the receive.
+                    d1 = commPatternNode_1st(level)%recvList(i)%block(j)
+                    i1 = commPatternNode_1st(level)%recvList(i)%indices(j, 1)
+                    j1 = commPatternNode_1st(level)%recvList(i)%indices(j, 2)
+                    k1 = commPatternNode_1st(level)%recvList(i)%indices(j, 3)
 
-          call mpi_irecv(sendBuffer(ii), size, adflow_real, procID, &
-               myID, ADflow_comm_world, sendRequests(i), ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+                    ! Copy the coordinates of this point in the buffer.
+                    ! Update the counter jj accordingly.
 
-          ! And update ii.
+                    recvBuffer(jj) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 1)
+                    recvBuffer(jj + 1) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 2)
+                    recvBuffer(jj + 2) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 3)
+                    jj = jj + 3
+                    flowDomsd(d1, level, mm)%x(i1, j1, k1, :) = zero
+                end do
 
-          ii = ii + size
+                ! Send the data.
 
-       enddo send
+                call mpi_isend(recvBuffer(ii), size, adflow_real, procID, &
+                               procID, ADflow_comm_world, recvRequests(i), &
+                               ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-       ! Copy the local data.
-       !DIR$ NOVECTOR
-       localCopy: do i=1,internalNode_1st(level)%nCopy
+                ! Set ii to jj for the next processor.
 
-          ! Store the block and the indices of the donor a bit easier.
+                ii = jj
 
-          d1 = internalNode_1st(level)%donorBlock(i)
-          i1 = internalNode_1st(level)%donorIndices(i,1)
-          j1 = internalNode_1st(level)%donorIndices(i,2)
-          k1 = internalNode_1st(level)%donorIndices(i,3)
-          ! Idem for the halo's.
+            end do recvs
 
-          d2 = internalNode_1st(level)%haloBlock(i)
-          i2 = internalNode_1st(level)%haloIndices(i,1)
-          j2 = internalNode_1st(level)%haloIndices(i,2)
-          k2 = internalNode_1st(level)%haloIndices(i,3)
+            ! Post the nonblocking receives.
 
-          ! Sum into the '1' values fro the '2' values
-          do idim=1,3
-             flowDomsd(d1,level,mm)%x(i1,j1,k1,idim) = flowDomsd(d1,level,mm)%x(i1,j1,k1,idim) + &
-                  flowDomsd(d2,level,mm)%x(i2,j2,k2,idim)
-             flowDomsd(d2, level, mm)%x(i2,j2,k2,idim) = zero
-          end do
-       enddo localCopy
+            ii = 1
+            send: do i = 1, commPatternNode_1st(level)%nProcSend
 
-       ! Correct the periodic halos of the internal communication
-       ! pattern
+                ! Store the processor id and the size of the message
+                ! a bit easier.
 
-       ! NOT IMPLEMENTED
-       ! call correctPeriodicCoor(level, mm,                          &
-       !      internalNode_1st(level)%nPeriodic,  &
-       !      internalNode_1st(level)%periodicData)
+                procID = commPatternNode_1st(level)%sendProc(i)
+                size = 3 * commPatternNode_1st(level)%nSend(i)
 
-       ! Complete the nonblocking receives in an arbitrary sequence and
-       ! copy the coordinates from the buffer into the halo's.
+                ! Post the receive.
 
-       size = commPatternNode_1st(level)%nProcSend
-       completeSends: do i=1,commPatternNode_1st(level)%nProcSend
+                call mpi_irecv(sendBuffer(ii), size, adflow_real, procID, &
+                               myID, ADflow_comm_world, sendRequests(i), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-          ! Complete any of the requests.
+                ! And update ii.
 
-          call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+                ii = ii + size
 
-          ! Copy the data just arrived in the halo's.
+            end do send
 
-          ii = index
-          jj = 3*commPatternNode_1st(level)%nSendCum(ii-1)
-          !DIR$ NOVECTOR
-          do j=1,commPatternNode_1st(level)%nSend(ii)
+            ! Copy the local data.
+            !DIR$ NOVECTOR
+            localCopy: do i = 1, internalNode_1st(level)%nCopy
 
-             ! Store the block and the indices of the halo a bit easier.
+                ! Store the block and the indices of the donor a bit easier.
 
-             d2 = commPatternNode_1st(level)%sendList(ii)%block(j)
-             i2 = commPatternNode_1st(level)%sendList(ii)%indices(j,1)
-             j2 = commPatternNode_1st(level)%sendList(ii)%indices(j,2)
-             k2 = commPatternNode_1st(level)%sendList(ii)%indices(j,3)
+                d1 = internalNode_1st(level)%donorBlock(i)
+                i1 = internalNode_1st(level)%donorIndices(i, 1)
+                j1 = internalNode_1st(level)%donorIndices(i, 2)
+                k1 = internalNode_1st(level)%donorIndices(i, 3)
+                ! Idem for the halo's.
 
-             ! Sum into the '2' values from the recv buffer
-             do idim=1,3
-                flowDomsd(d2,level,mm)%x(i2,j2,k2,idim) = flowDomsd(d2,level,mm)%x(i2,j2,k2,idim) + &
-                     sendBuffer(jj + idim )
-             end do
-             jj = jj + 3
+                d2 = internalNode_1st(level)%haloBlock(i)
+                i2 = internalNode_1st(level)%haloIndices(i, 1)
+                j2 = internalNode_1st(level)%haloIndices(i, 2)
+                k2 = internalNode_1st(level)%haloIndices(i, 3)
 
-          enddo
+                ! Sum into the '1' values fro the '2' values
+                do idim = 1, 3
+                    flowDomsd(d1, level, mm)%x(i1, j1, k1, idim) = flowDomsd(d1, level, mm)%x(i1, j1, k1, idim) + &
+                                                                   flowDomsd(d2, level, mm)%x(i2, j2, k2, idim)
+                    flowDomsd(d2, level, mm)%x(i2, j2, k2, idim) = zero
+                end do
+            end do localCopy
 
-       enddo completeSends
+            ! Correct the periodic halos of the internal communication
+            ! pattern
 
-       ! Correct the periodic halos of the external communication
-       ! pattern.
-       ! NOT IMLEMENTED
-       ! call correctPeriodicCoor(level, mm,                            &
-       !      commPatternNode_1st(level)%nPeriodic, &
-       !      commPatternNode_1st(level)%periodicData)
+            ! NOT IMPLEMENTED
+            ! call correctPeriodicCoor(level, mm,                          &
+            !      internalNode_1st(level)%nPeriodic,  &
+            !      internalNode_1st(level)%periodicData)
 
-       ! Complete the nonblocking sends.
+            ! Complete the nonblocking receives in an arbitrary sequence and
+            ! copy the coordinates from the buffer into the halo's.
 
-       size = commPatternNode_1st(level)%nProcRecv
-       do i=1,commPatternNode_1st(level)%nProcRecv
-          call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-       enddo
+            size = commPatternNode_1st(level)%nProcSend
+            completeSends: do i = 1, commPatternNode_1st(level)%nProcSend
 
-    enddo spectralLoop
+                ! Complete any of the requests.
 
-  end subroutine exchangeCoor_b
-  subroutine exchangeCoor_d(level)
-    !
-    !       ExchangeCoor_d exchanges the *derivatives* of the given grid
-    !       level.
-    !
-    use block
-    use communication
-    use inputTimeSpectral
-    use utils, only : EChk
-    implicit none
-    !
-    !      Subroutine arguments.
-    !
-    integer(kind=intType), intent(in) :: level
-    !
-    !      Local variables.
-    !
-    integer :: size, procID, ierr, index
-    integer, dimension(mpi_status_size) :: mpiStatus
+                call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-    integer(kind=intType) :: i, j, ii, jj, mm
-    integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+                ! Copy the data just arrived in the halo's.
 
+                ii = index
+                jj = 3 * commPatternNode_1st(level)%nSendCum(ii - 1)
+                !DIR$ NOVECTOR
+                do j = 1, commPatternNode_1st(level)%nSend(ii)
 
-    ! Loop over the number of spectral solutions.
+                    ! Store the block and the indices of the halo a bit easier.
 
-    spectralLoop: do mm=1,nTimeIntervalsSpectral
+                    d2 = commPatternNode_1st(level)%sendList(ii)%block(j)
+                    i2 = commPatternNode_1st(level)%sendList(ii)%indices(j, 1)
+                    j2 = commPatternNode_1st(level)%sendList(ii)%indices(j, 2)
+                    k2 = commPatternNode_1st(level)%sendList(ii)%indices(j, 3)
 
-       ! Send the coordinates i have to send. The data is first copied
-       ! into the send buffer and this buffer is sent.
+                    ! Sum into the '2' values from the recv buffer
+                    do idim = 1, 3
+                        flowDomsd(d2, level, mm)%x(i2, j2, k2, idim) = flowDomsd(d2, level, mm)%x(i2, j2, k2, idim) + &
+                                                                       sendBuffer(jj + idim)
+                    end do
+                    jj = jj + 3
 
-       ii = 1
-       sends: do i=1,commPatternNode_1st(level)%nProcSend
+                end do
 
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+            end do completeSends
 
-          procID = commPatternNode_1st(level)%sendProc(i)
-          size   = 3*commPatternNode_1st(level)%nSend(i)
+            ! Correct the periodic halos of the external communication
+            ! pattern.
+            ! NOT IMLEMENTED
+            ! call correctPeriodicCoor(level, mm,                            &
+            !      commPatternNode_1st(level)%nPeriodic, &
+            !      commPatternNode_1st(level)%periodicData)
 
-          ! Copy the data in the correct part of the send buffer.
+            ! Complete the nonblocking sends.
 
-          jj = ii
-          !DIR$ NOVECTOR
-          do j=1,commPatternNode_1st(level)%nSend(i)
+            size = commPatternNode_1st(level)%nProcRecv
+            do i = 1, commPatternNode_1st(level)%nProcRecv
+                call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
 
-             ! Store the block id and the indices of the donor
-             ! a bit easier.
+        end do spectralLoop
 
-             d1 = commPatternNode_1st(level)%sendList(i)%block(j)
-             i1 = commPatternNode_1st(level)%sendList(i)%indices(j,1)
-             j1 = commPatternNode_1st(level)%sendList(i)%indices(j,2)
-             k1 = commPatternNode_1st(level)%sendList(i)%indices(j,3)
+    end subroutine exchangeCoor_b
+    subroutine exchangeCoor_d(level)
+        !
+        !       ExchangeCoor_d exchanges the *derivatives* of the given grid
+        !       level.
+        !
+        use block
+        use communication
+        use inputTimeSpectral
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
 
-             ! Copy the coordinates of this point in the buffer.
-             ! Update the counter jj accordingly.
+        integer(kind=intType) :: i, j, ii, jj, mm
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
 
-             sendBuffer(jj)   = flowDomsd(d1,level,mm)%x(i1,j1,k1,1)
-             sendBuffer(jj+1) = flowDomsd(d1,level,mm)%x(i1,j1,k1,2)
-             sendBuffer(jj+2) = flowDomsd(d1,level,mm)%x(i1,j1,k1,3)
-             jj = jj + 3
+        ! Loop over the number of spectral solutions.
 
-          enddo
+        spectralLoop: do mm = 1, nTimeIntervalsSpectral
 
-          ! Send the data.
+            ! Send the coordinates i have to send. The data is first copied
+            ! into the send buffer and this buffer is sent.
 
-          call mpi_isend(sendBuffer(ii), size, adflow_real, procID,    &
-               procID, ADflow_comm_world, sendRequests(i), &
-               ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+            ii = 1
+            sends: do i = 1, commPatternNode_1st(level)%nProcSend
 
-          ! Set ii to jj for the next processor.
+                ! Store the processor id and the size of the message
+                ! a bit easier.
 
-          ii = jj
+                procID = commPatternNode_1st(level)%sendProc(i)
+                size = 3 * commPatternNode_1st(level)%nSend(i)
 
-       enddo sends
+                ! Copy the data in the correct part of the send buffer.
 
-       ! Post the nonblocking receives.
+                jj = ii
+                !DIR$ NOVECTOR
+                do j = 1, commPatternNode_1st(level)%nSend(i)
 
-       ii = 1
-       receives: do i=1,commPatternNode_1st(level)%nProcRecv
+                    ! Store the block id and the indices of the donor
+                    ! a bit easier.
 
-          ! Store the processor id and the size of the message
-          ! a bit easier.
+                    d1 = commPatternNode_1st(level)%sendList(i)%block(j)
+                    i1 = commPatternNode_1st(level)%sendList(i)%indices(j, 1)
+                    j1 = commPatternNode_1st(level)%sendList(i)%indices(j, 2)
+                    k1 = commPatternNode_1st(level)%sendList(i)%indices(j, 3)
 
-          procID = commPatternNode_1st(level)%recvProc(i)
-          size   = 3*commPatternNode_1st(level)%nRecv(i)
+                    ! Copy the coordinates of this point in the buffer.
+                    ! Update the counter jj accordingly.
 
-          ! Post the receive.
+                    sendBuffer(jj) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 1)
+                    sendBuffer(jj + 1) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 2)
+                    sendBuffer(jj + 2) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 3)
+                    jj = jj + 3
 
-          call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
-               myID, ADflow_comm_world, recvRequests(i), ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+                end do
 
-          ! And update ii.
+                ! Send the data.
 
-          ii = ii + size
+                call mpi_isend(sendBuffer(ii), size, adflow_real, procID, &
+                               procID, ADflow_comm_world, sendRequests(i), &
+                               ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-       enddo receives
+                ! Set ii to jj for the next processor.
 
-       ! Copy the local data.
-       !DIR$ NOVECTOR
-       localCopy: do i=1,internalNode_1st(level)%nCopy
+                ii = jj
 
-          ! Store the block and the indices of the donor a bit easier.
+            end do sends
 
-          d1 = internalNode_1st(level)%donorBlock(i)
-          i1 = internalNode_1st(level)%donorIndices(i,1)
-          j1 = internalNode_1st(level)%donorIndices(i,2)
-          k1 = internalNode_1st(level)%donorIndices(i,3)
-          ! Idem for the halo's.
+            ! Post the nonblocking receives.
 
-          d2 = internalNode_1st(level)%haloBlock(i)
-          i2 = internalNode_1st(level)%haloIndices(i,1)
-          j2 = internalNode_1st(level)%haloIndices(i,2)
-          k2 = internalNode_1st(level)%haloIndices(i,3)
-          ! Copy the coordinates.
-          flowDomsd(d2,level,mm)%x(i2,j2,k2,1) = &
-               flowDomsd(d1,level,mm)%x(i1,j1,k1,1)
-          flowDomsd(d2,level,mm)%x(i2,j2,k2,2) = &
-               flowDomsd(d1,level,mm)%x(i1,j1,k1,2)
-          flowDomsd(d2,level,mm)%x(i2,j2,k2,3) = &
-               flowDomsd(d1,level,mm)%x(i1,j1,k1,3)
+            ii = 1
+            receives: do i = 1, commPatternNode_1st(level)%nProcRecv
 
-       enddo localCopy
+                ! Store the processor id and the size of the message
+                ! a bit easier.
 
-       ! Correct the periodic halos of the internal communication
-       ! pattern
+                procID = commPatternNode_1st(level)%recvProc(i)
+                size = 3 * commPatternNode_1st(level)%nRecv(i)
 
-       ! NOT IMPLEMENTED
-       ! call correctPeriodicCoor(level, mm,                          &
-       !      internalNode_1st(level)%nPeriodic,  &
-       !      internalNode_1st(level)%periodicData)
+                ! Post the receive.
 
-       ! Complete the nonblocking receives in an arbitrary sequence and
-       ! copy the coordinates from the buffer into the halo's.
+                call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
+                               myID, ADflow_comm_world, recvRequests(i), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-       size = commPatternNode_1st(level)%nProcRecv
-       completeRecvs: do i=1,commPatternNode_1st(level)%nProcRecv
+                ! And update ii.
 
-          ! Complete any of the requests.
+                ii = ii + size
 
-          call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
+            end do receives
 
-          ! Copy the data just arrived in the halo's.
+            ! Copy the local data.
+            !DIR$ NOVECTOR
+            localCopy: do i = 1, internalNode_1st(level)%nCopy
 
-          ii = index
-          jj = 3*commPatternNode_1st(level)%nRecvCum(ii-1) +1
-          !DIR$ NOVECTOR
-          do j=1,commPatternNode_1st(level)%nRecv(ii)
+                ! Store the block and the indices of the donor a bit easier.
 
-             ! Store the block and the indices of the halo a bit easier.
+                d1 = internalNode_1st(level)%donorBlock(i)
+                i1 = internalNode_1st(level)%donorIndices(i, 1)
+                j1 = internalNode_1st(level)%donorIndices(i, 2)
+                k1 = internalNode_1st(level)%donorIndices(i, 3)
+                ! Idem for the halo's.
 
-             d2 = commPatternNode_1st(level)%recvList(ii)%block(j)
-             i2 = commPatternNode_1st(level)%recvList(ii)%indices(j,1)
-             j2 = commPatternNode_1st(level)%recvList(ii)%indices(j,2)
-             k2 = commPatternNode_1st(level)%recvList(ii)%indices(j,3)
+                d2 = internalNode_1st(level)%haloBlock(i)
+                i2 = internalNode_1st(level)%haloIndices(i, 1)
+                j2 = internalNode_1st(level)%haloIndices(i, 2)
+                k2 = internalNode_1st(level)%haloIndices(i, 3)
+                ! Copy the coordinates.
+                flowDomsd(d2, level, mm)%x(i2, j2, k2, 1) = &
+                    flowDomsd(d1, level, mm)%x(i1, j1, k1, 1)
+                flowDomsd(d2, level, mm)%x(i2, j2, k2, 2) = &
+                    flowDomsd(d1, level, mm)%x(i1, j1, k1, 2)
+                flowDomsd(d2, level, mm)%x(i2, j2, k2, 3) = &
+                    flowDomsd(d1, level, mm)%x(i1, j1, k1, 3)
 
-             ! Copy the data.
+            end do localCopy
 
-             flowDomsd(d2,level,mm)%x(i2,j2,k2,1) = recvBuffer(jj)
-             flowDomsd(d2,level,mm)%x(i2,j2,k2,2) = recvBuffer(jj+1)
-             flowDomsd(d2,level,mm)%x(i2,j2,k2,3) = recvBuffer(jj+2)
-             jj = jj + 3
+            ! Correct the periodic halos of the internal communication
+            ! pattern
 
-          enddo
+            ! NOT IMPLEMENTED
+            ! call correctPeriodicCoor(level, mm,                          &
+            !      internalNode_1st(level)%nPeriodic,  &
+            !      internalNode_1st(level)%periodicData)
 
-       enddo completeRecvs
+            ! Complete the nonblocking receives in an arbitrary sequence and
+            ! copy the coordinates from the buffer into the halo's.
 
-       ! Correct the periodic halos of the external communication
-       ! pattern.
-       ! NOT IMLEMENTED
-       ! call correctPeriodicCoor(level, mm,                            &
-       !      commPatternNode_1st(level)%nPeriodic, &
-       !      commPatternNode_1st(level)%periodicData)
+            size = commPatternNode_1st(level)%nProcRecv
+            completeRecvs: do i = 1, commPatternNode_1st(level)%nProcRecv
 
-       ! Complete the nonblocking sends.
+                ! Complete any of the requests.
 
-       size = commPatternNode_1st(level)%nProcSend
-       do i=1,commPatternNode_1st(level)%nProcSend
-          call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-    enddo
+                call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
 
-    enddo spectralLoop
+                ! Copy the data just arrived in the halo's.
 
-  end subroutine exchangeCoor_d
+                ii = index
+                jj = 3 * commPatternNode_1st(level)%nRecvCum(ii - 1) + 1
+                !DIR$ NOVECTOR
+                do j = 1, commPatternNode_1st(level)%nRecv(ii)
 
-  ! -----------------------------------------------------------------
-  !              Comm routines for zippers
-  ! -----------------------------------------------------------------
+                    ! Store the block and the indices of the halo a bit easier.
 
-  subroutine flowIntegrationZipperComm(isInflow, vars, sps)
+                    d2 = commPatternNode_1st(level)%recvList(ii)%block(j)
+                    i2 = commPatternNode_1st(level)%recvList(ii)%indices(j, 1)
+                    j2 = commPatternNode_1st(level)%recvList(ii)%indices(j, 2)
+                    k2 = commPatternNode_1st(level)%recvList(ii)%indices(j, 3)
 
-    ! This routine could technically be inside of the
-    ! flowIntegrationZipper subroutine but we split it out becuase it
-    ! has petsc comm stuff that will differentiate manually that
-    ! tapenade doesn't need to see.
+                    ! Copy the data.
 
-    use constants
-    use blockPointers, only : BCFaceID, BCData, addGridVelocities, nDom, nBocos, BCType
-    use BCPointers, only : sFace, ww1, ww2, pp1, pp2, gamma1, gamma2, xx
-    use oversetData, only : zipperMeshes, zipperMesh
-    use surfaceFamilies, only : familyExchange, BCFamExchange
-    use utils, only : setPointers, setBCPointers, EChk
+                    flowDomsd(d2, level, mm)%x(i2, j2, k2, 1) = recvBuffer(jj)
+                    flowDomsd(d2, level, mm)%x(i2, j2, k2, 2) = recvBuffer(jj + 1)
+                    flowDomsd(d2, level, mm)%x(i2, j2, k2, 3) = recvBuffer(jj + 2)
+                    jj = jj + 3
 
-#include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! Input variables
-    logical, intent(in) :: isInflow
-    real(kind=realType), dimension(:, :) :: vars
-    integer(kind=intType) :: sps
-
-    ! Working variables
-    integer(kind=intType) :: ii, iVar, i,j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
-    real(kind=realType), dimension(:), pointer :: localPtr
-    type(zipperMesh), pointer :: zipper
-    type(familyExchange), pointer :: exch
-
-    ! Set the zipper pointer to the zipper for inflow/outflow conditions
-    if (isInflow) then
-        zipper => zipperMeshes(iBCGroupInflow)
-        exch => BCFamExchange(iBCGroupInflow, sps)
-    else
-        zipper => zipperMeshes(iBCGroupOutFlow)
-        exch => BCFamExchange(iBCGroupOutflow, sps)
-    end if
-
-    ! Note that we can generate all the nodal values we need locally
-    ! using simple arithematic averaging since they are all flow
-    ! properties. There is no need to use the cellToNodeScatter stuff
-    ! here like for the forces.
-    varLoop: do iVar=1, nZIppFlowComm
-       call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ii = 0
-       domainLoop: do nn=1, nDom
-          call setPointers(nn, 1, sps)
-          bocoLoop: do mm=1, nBocos
-
-             if (((BCType(mm) == SubsonicInflow .or. &
-                  BCType(mm) == SupersonicInflow) .and. (isInflow)) &
-                  .or. &
-                  (BCType(mm) == SubsonicOutflow .or. &
-                  BCType(mm) == SupersonicOutflow) .and. (.not. isInflow)) then
-
-                call setBCPointers(mm, .True.)
-                iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-                jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-                do j=jBeg, jEnd
-                   do i=iBeg, iEnd
-                      ii = ii + 1
-                      select case(iVar)
-                      case (iRho, iVx, iVy, iVz)
-                         localPtr(ii) = eighth*(&
-                              ww1(i, j,   iVar) + ww1(i+1, j,   iVar) + &
-                              ww1(i, j+1, ivar) + ww1(i+1, j+1, iVar) + &
-                              ww2(i, j,   iVar) + ww2(i+1, j,   iVar) + &
-                              ww2(i, j+1, ivar) + ww2(i+1, j+1, iVar))
-                      case (iZippFlowP)
-                         localPtr(ii) = eighth*(&
-                              pp1(i, j  ) + pp1(i+1, j  ) + &
-                              pp1(i, j+1) + pp1(i+1, j+1) + &
-                              pp2(i, j  ) + pp2(i+1, j  ) + &
-                              pp2(i, j+1) + pp2(i+1, j+1))
-                      case (iZippFlowGamma)
-                         localPtr(ii) = eighth*(&
-                              gamma1(i, j  ) + gamma1(i+1, j  ) + &
-                              gamma1(i, j+1) + gamma1(i+1, j+1) + &
-                              gamma2(i, j  ) + gamma2(i+1, j  ) + &
-                              gamma2(i, j+1) + gamma2(i+1, j+1))
-                      case (iZippFlowSface)
-                         if (addGridVelocities) then
-                            localPtr(ii) = fourth*(&
-                                 sface(i, j  ) + sface(i+1, j  ) + &
-                                 sface(i, j+1) + sface(i+1, j+1))
-                         else
-                            localPtr(ii) = zero
-                         end if
-                      case (iZippFlowX, iZippFlowY, iZippFlowZ)
-                         localPtr(ii) = xx(i+1, j+1, iVar-iZippFlowX+1)
-                      end select
-                   end do
                 end do
-             end if
-          end do bocoLoop
-       end do domainLoop
 
-       ! Return pointer to nodeValLocal
-       call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            end do completeRecvs
 
-       call VecPlaceArray(zipper%localVal, vars(:, iVar), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Correct the periodic halos of the external communication
+            ! pattern.
+            ! NOT IMLEMENTED
+            ! call correctPeriodicCoor(level, mm,                            &
+            !      commPatternNode_1st(level)%nPeriodic, &
+            !      commPatternNode_1st(level)%periodicData)
 
-       ! Send these values to the root using the zipper scatter.
-       call VecScatterBegin(zipper%scatter, exch%nodeValLocal,&
-            zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Complete the nonblocking sends.
 
-       call VecScatterEnd(zipper%scatter, exch%nodeValLocal,&
-            zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            size = commPatternNode_1st(level)%nProcSend
+            do i = 1, commPatternNode_1st(level)%nProcSend
+                call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
 
-       ! Reset the original petsc vector.
-       call vecResetArray(zipper%localVal, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    end do varLoop
+        end do spectralLoop
 
-  end subroutine flowIntegrationZipperComm
+    end subroutine exchangeCoor_d
 
-  subroutine flowIntegrationZipperComm_d(isInflow, vars, varsd, sps)
+    ! -----------------------------------------------------------------
+    !              Comm routines for zippers
+    ! -----------------------------------------------------------------
 
-    ! Forward mode linearization of flowIntegratoinZipperComm
+    subroutine flowIntegrationZipperComm(isInflow, vars, sps)
 
-    use constants
-    use blockPointers, only : BCFaceID, BCData, addGridVelocities, nDom, nBocos, BCType
-    use BCPointers, only : sFaced, ww1d, ww2d, pp1d, pp2d, xxd
-    use oversetData, only : zipperMeshes, zipperMesh
-    use surfaceFamilies, only : familyExchange, BCFamExchange
-    use utils, only : setPointers_d, setBCPointers_d, EChk
-#include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-    ! Input variables
-    logical, intent(in) :: isInflow
-    real(kind=realType), dimension(:, :) :: vars, varsd
-    integer(kind=intType) :: sps
-
-    ! Working variables
-    integer(kind=intType) :: ii, iVar, i,j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
-    real(kind=realType), dimension(:), pointer :: localPtr
-    type(zipperMesh), pointer :: zipper
-    type(familyExchange), pointer :: exch
-
-
-
-    ! Set the zipper pointer to the zipper for inflow/outflow conditions
-    if (isInflow) then
-        ! Need to generate the vars themselves.
-        call flowIntegrationZipperComm(.true., vars, sps)
-        zipper => zipperMeshes(iBCGroupInflow)
-        exch => BCFamExchange(iBCGroupInflow, sps)
-    else
-        ! Need to generate the vars themselves.
-        call flowIntegrationZipperComm(.false., vars, sps)
-        zipper => zipperMeshes(iBCGroupOutFlow)
-        exch => BCFamExchange(iBCGroupOutflow, sps)
-    end if
-
-    ! Note that we can generate all the nodal values we need locally
-    ! using simple arithematic averaging since they are all flow
-    ! properties. There is no need to use the cellToNodeScatter stuff
-    ! here like for the forces.
-    varLoop: do iVar=1, nZIppFlowComm
-       call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ii = 0
-       domainLoop: do nn=1, nDom
-          call setPointers_d(nn, 1, sps)
-          bocoLoop: do mm=1, nBocos
-             if (((BCType(mm) == SubsonicInflow .or. &
-                  BCType(mm) == SupersonicInflow) .and. isInflow) &
-                  .or. &
-                  (BCType(mm) == SubsonicOutflow .or. &
-                  BCType(mm) == SupersonicOutflow) .and. (.not. isInflow)) then
-
-                call setBCPointers_d(mm, .True.)
-                iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-                jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-                do j=jBeg, jEnd
-                   do i=iBeg, iEnd
-                      ii = ii + 1
-                      select case(iVar)
-                      case (iRho, iVx, iVy, iVz)
-                         localPtr(ii) = eighth*(&
-                              ww1d(i, j,   iVar) + ww1d(i+1, j,   iVar) + &
-                              ww1d(i, j+1, ivar) + ww1d(i+1, j+1, iVar) + &
-                              ww2d(i, j,   iVar) + ww2d(i+1, j,   iVar) + &
-                              ww2d(i, j+1, ivar) + ww2d(i+1, j+1, iVar))
-                      case (iZippFlowP)
-                         localPtr(ii) = eighth*(&
-                              pp1d(i, j  ) + pp1d(i+1, j  ) + &
-                              pp1d(i, j+1) + pp1d(i+1, j+1) + &
-                              pp2d(i, j  ) + pp2d(i+1, j  ) + &
-                              pp2d(i, j+1) + pp2d(i+1, j+1))
-                      case (iZippFlowGamma)
-                         ! Gamma is not currently an active variable
-                         localPtr(ii) = zero
-                      case (iZippFlowSface)
-                         if (addGridVelocities) then
-                            localPtr(ii) = fourth*(&
-                                 sfaced(i, j  ) + sfaced(i+1, j  ) + &
-                                 sfaced(i, j+1) + sfaced(i+1, j+1))
-                         else
-                            localPtr(ii) = zero
-                         end if
-                      case (iZippFlowX, iZippFlowY, iZippFlowZ)
-                         localPtr(ii) = xxd(i+1, j+1, iVar-iZippFlowX+1)
-                      end select
-                   end do
-                end do
-             end if
-          end do bocoLoop
-       end do domainLoop
-
-       ! Return pointer to nodeValLocal
-       call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecPlaceArray(zipper%localVal, varsd(:, iVar), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Send these values to the root using the zipper scatter.
-       call VecScatterBegin(zipper%scatter, exch%nodeValLocal,&
-            zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecScatterEnd(zipper%scatter, exch%nodeValLocal,&
-            zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Reset the original petsc vector.
-       call vecResetArray(zipper%localVal, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    end do varLoop
+        ! This routine could technically be inside of the
+        ! flowIntegrationZipper subroutine but we split it out becuase it
+        ! has petsc comm stuff that will differentiate manually that
+        ! tapenade doesn't need to see.
 
-  end subroutine flowIntegrationZipperComm_d
+        use constants
+        use blockPointers, only: BCFaceID, BCData, addGridVelocities, nDom, nBocos, BCType
+        use BCPointers, only: sFace, ww1, ww2, pp1, pp2, gamma1, gamma2, xx
+        use oversetData, only: zipperMeshes, zipperMesh
+        use surfaceFamilies, only: familyExchange, BCFamExchange
+        use utils, only: setPointers, setBCPointers, EChk
 
-  subroutine flowIntegrationZipperComm_b(isInflow, vars, varsd, sps)
-
-    ! Reverse mode linearization of the flowIntegrationZipperComm routine
-
-    use constants
-    use blockPointers, only : BCFaceID, BCData, addGridVelocities, nDom, nBocos, BCType
-    use BCPointers, only : sFaced, ww1d, ww2d, pp1d, pp2d, xxd
-    use oversetData, only : zipperMeshes, zipperMesh
-    use surfaceFamilies, only : familyExchange, BCFamExchange
-    use utils, only : setPointers_b, setBCPointers_d, EChk
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! Input variables
-    logical, intent(in) :: isInflow
-    real(kind=realType), dimension(:, :) :: vars, varsd
-    integer(kind=intType) :: sps
-
-    ! Working variables
-    integer(kind=intType) :: ii, iVar, i,j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
-    real(kind=realType), dimension(:), pointer :: localPtr
-    real(kind=realType) ::tmp
-    type(zipperMesh), pointer :: zipper
-    type(familyExchange), pointer :: exch
-
-    ! Set the zipper pointer to the zipper for inflow/outflow conditions
-    if (isInflow) then
-        zipper => zipperMeshes(iBCGroupInflow)
-        exch => BCFamExchange(iBCGroupInflow, sps)
-    else
-        zipper => zipperMeshes(iBCGroupOutFlow)
-        exch => BCFamExchange(iBCGroupOutflow, sps)
-    end if
-
-    ! Run the var exchange loop backwards:
-    varLoop: do iVar=1, nZIppFlowComm
-
-       ! Zero the vector we are scatting into:
-       call VecSet(exch%nodeValLocal, zero, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecPlaceArray(zipper%localVal, varsd(:, iVar), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Send these values to the root using the zipper scatter.
-       call VecScatterBegin(zipper%scatter, zipper%localVal, &
-            exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecScatterEnd(zipper%scatter, zipper%localVal, &
-            exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Reset the original petsc vector.
-       call vecResetArray(zipper%localVal, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! To be consistent, varsd must be zeroed since it is "used"
-       varsd(:, iVar) = zero
-
-       ! Now finish the scatting back to the acutual BCs pointers (and
-       ! thus the state variables).
-
-       call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ii = 0
-       domainLoop: do nn=1, nDom
-          call setPointers_b(nn, 1, sps)
-          bocoLoop: do mm=1, nBocos
-             if (((BCType(mm) == SubsonicInflow .or. &
-                  BCType(mm) == SupersonicInflow) .and. isInflow) &
-                  .or. &
-                  (BCType(mm) == SubsonicOutflow .or. &
-                  BCType(mm) == SupersonicOutflow) .and. (.not. isInflow)) then
-
-                call setBCPointers_d(mm, .True.)
-                iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-                jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-                do j=jBeg, jEnd
-                   do i=iBeg, iEnd
-                      ii = ii + 1
-                      select case(iVar)
-                      case (iRho, iVx, iVy, iVz)
-                         tmp = eighth * localPtr(ii)
-                         ww1d(i  , j  , iVar) = ww1d(i  , j  , iVar) + tmp
-                         ww1d(i+1, j  , iVar) = ww1d(i+1, j  , iVar) + tmp
-                         ww1d(i  , j+1, iVar) = ww1d(i  , j+1, iVar) + tmp
-                         ww1d(i+1, j+1, iVar) = ww1d(i+1, j+1, iVar) + tmp
-
-                         ww2d(i  , j  , iVar) = ww2d(i  , j  , iVar) + tmp
-                         ww2d(i+1, j  , iVar) = ww2d(i+1, j  , iVar) + tmp
-                         ww2d(i  , j+1, iVar) = ww2d(i  , j+1, iVar) + tmp
-                         ww2d(i+1, j+1, iVar) = ww2d(i+1, j+1, iVar) + tmp
-                      case (iZippFlowP)
-                         tmp = eighth * localPtr(ii)
-                         pp1d(i  , j  ) = pp1d(i  , j  ) + tmp
-                         pp1d(i+1, j  ) = pp1d(i+1, j  ) + tmp
-                         pp1d(i  , j+1) = pp1d(i  , j+1) + tmp
-                         pp1d(i+1, j+1) = pp1d(i+1, j+1) + tmp
-
-                         pp2d(i  , j  ) = pp2d(i  , j  ) + tmp
-                         pp2d(i+1, j  ) = pp2d(i+1, j  ) + tmp
-                         pp2d(i  , j+1) = pp2d(i  , j+1) + tmp
-                         pp2d(i+1, j+1) = pp2d(i+1, j+1) + tmp
-
-                      case (iZippFlowGamma)
-                         ! gamma is not currently active
-
-
-                      case (iZippFlowSFace)
-                         if (addGridVelocities) then
-                            tmp = fourth*localPtr(ii)
-                            sfaced(i  , j  ) = sfaced(i  , j  ) + tmp
-                            sfaced(i+1, j  ) = sfaced(i+1, j  ) + tmp
-                            sfaced(i  , j+1) = sfaced(i  , j+1) + tmp
-                            sfaced(i+1, j+1) = sfaced(i+1, j+1) + tmp
-                         else
-                            localPtr(ii) = zero
-                         end if
-                      case (iZippFlowX, iZippFlowY, iZippFlowZ)
-                         xxd(i+1, j+1, iVar-iZippFlowX+1) = xxd(i+1, j+1, iVar-iZippFlowX+1) + localPtr(ii)
-                      end select
-                   end do
-                end do
-             end if
-          end do bocoLoop
-       end do domainLoop
-
-       ! Return pointer to nodeValLocal
-       call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-    end do varLoop
-
-  end subroutine flowIntegrationZipperComm_b
-
-  subroutine wallIntegrationZipperComm(vars, sps)
-
-    ! This routine could technically be inside of the
-    ! flowIntegrationZipper subroutine but we split it out becuase it
-    ! has petsc comm stuff that will differentiate manually that
-    ! tapenade doesn't need to see.
-
-    use constants
-    use blockPointers, only : BCData, nDom, BCType, nBocos
-    use BCPointers, only : xx
-    use oversetData, only : zipperMeshes, zipperMesh
-    use surfaceFamilies, only : familyExchange, BCFamExchange
-    use utils, only : setPointers, setBCPointers, EChk, isWallType
+        use petsc
+        implicit none
+
+        ! Input variables
+        logical, intent(in) :: isInflow
+        real(kind=realType), dimension(:, :) :: vars
+        integer(kind=intType) :: sps
+
+        ! Working variables
+        integer(kind=intType) :: ii, iVar, i, j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
+        real(kind=realType), dimension(:), pointer :: localPtr
+        type(zipperMesh), pointer :: zipper
+        type(familyExchange), pointer :: exch
+
+        ! Set the zipper pointer to the zipper for inflow/outflow conditions
+        if (isInflow) then
+            zipper => zipperMeshes(iBCGroupInflow)
+            exch => BCFamExchange(iBCGroupInflow, sps)
+        else
+            zipper => zipperMeshes(iBCGroupOutFlow)
+            exch => BCFamExchange(iBCGroupOutflow, sps)
+        end if
+
+        ! Note that we can generate all the nodal values we need locally
+        ! using simple arithematic averaging since they are all flow
+        ! properties. There is no need to use the cellToNodeScatter stuff
+        ! here like for the forces.
+        varLoop: do iVar = 1, nZIppFlowComm
+            call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ii = 0
+            domainLoop: do nn = 1, nDom
+                call setPointers(nn, 1, sps)
+                bocoLoop: do mm = 1, nBocos
+
+                    if (((BCType(mm) == SubsonicInflow .or. &
+                          BCType(mm) == SupersonicInflow) .and. (isInflow)) &
+                        .or. &
+                        (BCType(mm) == SubsonicOutflow .or. &
+                         BCType(mm) == SupersonicOutflow) .and. (.not. isInflow)) then
+
+                        call setBCPointers(mm, .True.)
+                        iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                        jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                ii = ii + 1
+                                select case (iVar)
+                                case (iRho, iVx, iVy, iVz)
+                                    localPtr(ii) = eighth * ( &
+                                                   ww1(i, j, iVar) + ww1(i + 1, j, iVar) + &
+                                                   ww1(i, j + 1, ivar) + ww1(i + 1, j + 1, iVar) + &
+                                                   ww2(i, j, iVar) + ww2(i + 1, j, iVar) + &
+                                                   ww2(i, j + 1, ivar) + ww2(i + 1, j + 1, iVar))
+                                case (iZippFlowP)
+                                    localPtr(ii) = eighth * ( &
+                                                   pp1(i, j) + pp1(i + 1, j) + &
+                                                   pp1(i, j + 1) + pp1(i + 1, j + 1) + &
+                                                   pp2(i, j) + pp2(i + 1, j) + &
+                                                   pp2(i, j + 1) + pp2(i + 1, j + 1))
+                                case (iZippFlowGamma)
+                                    localPtr(ii) = eighth * ( &
+                                                   gamma1(i, j) + gamma1(i + 1, j) + &
+                                                   gamma1(i, j + 1) + gamma1(i + 1, j + 1) + &
+                                                   gamma2(i, j) + gamma2(i + 1, j) + &
+                                                   gamma2(i, j + 1) + gamma2(i + 1, j + 1))
+                                case (iZippFlowSface)
+                                    if (addGridVelocities) then
+                                        localPtr(ii) = fourth * ( &
+                                                       sface(i, j) + sface(i + 1, j) + &
+                                                       sface(i, j + 1) + sface(i + 1, j + 1))
+                                    else
+                                        localPtr(ii) = zero
+                                    end if
+                                case (iZippFlowX, iZippFlowY, iZippFlowZ)
+                                    localPtr(ii) = xx(i + 1, j + 1, iVar - iZippFlowX + 1)
+                                end select
+                            end do
+                        end do
+                    end if
+                end do bocoLoop
+            end do domainLoop
+
+            ! Return pointer to nodeValLocal
+            call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecPlaceArray(zipper%localVal, vars(:, iVar), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Send these values to the root using the zipper scatter.
+            call VecScatterBegin(zipper%scatter, exch%nodeValLocal, &
+                                 zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecScatterEnd(zipper%scatter, exch%nodeValLocal, &
+                               zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Reset the original petsc vector.
+            call vecResetArray(zipper%localVal, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do varLoop
+
+    end subroutine flowIntegrationZipperComm
+
+    subroutine flowIntegrationZipperComm_d(isInflow, vars, varsd, sps)
+
+        ! Forward mode linearization of flowIntegratoinZipperComm
+
+        use constants
+        use blockPointers, only: BCFaceID, BCData, addGridVelocities, nDom, nBocos, BCType
+        use BCPointers, only: sFaced, ww1d, ww2d, pp1d, pp2d, xxd
+        use oversetData, only: zipperMeshes, zipperMesh
+        use surfaceFamilies, only: familyExchange, BCFamExchange
+        use utils, only: setPointers_d, setBCPointers_d, EChk
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-    ! Input variables
-    real(kind=realType), dimension(:, :) :: vars
-    integer(kind=intType) :: sps
-
-    ! Working variables
-    integer(kind=intType) :: ii, iVar, i,j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
-    real(kind=realType), dimension(:), pointer :: localPtr
-    type(zipperMesh), pointer :: zipper
-    type(familyExchange), pointer :: exch
-
-    ! Set the zipper pointer to the zipper for inflow/outflow conditions
-    zipper => zipperMeshes(iBCGroupWalls)
-    exch => BCFamExchange(iBCGroupWalls, sps)
-
-    ! Make sure the nodal tractions are computed
-    call computeNodalTractions(sps)
-
-    varLoop: do iVar=1, nZippWallComm
-       call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ii = 0
-       domainLoop: do nn=1, nDom
-          call setPointers(nn, 1, sps)
-          bocoLoop: do mm=1, nBocos
-             if (isWallType(BCType(mm))) then
-                call setBCPointers(mm, .True.)
-                iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-                jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-                do j=jBeg, jEnd
-                   do i=iBeg, iEnd
-                      ii = ii + 1
-                      select case(iVar)
-
-                      case (iZippWallTpx, iZippWallTpy, iZippWallTpz)
-
-                         localPtr(ii) = BCData(mm)%Tp(i, j, iVar)
-
-                      case (iZippWallTvx, iZippWallTvy, iZippWallTvz)
-
-                         localPtr(ii) = BCData(mm)%Tv(i, j, iVar-iZippWallTvx+1)
-
-                      case (iZippWallX, iZippWallY, iZippWallZ)
-
-                         ! The +1 is due to pointer offset
-                         localPtr(ii) = xx(i+1, j+1, iVar-iZippWallX+1)
-
-                      end select
-                   end do
-                end do
-             end if
-          end do bocoLoop
-       end do domainLoop
-
-       ! Return pointer to nodeValLocal
-       call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecPlaceArray(zipper%localVal, vars(:, iVar), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Send these values to the root using the zipper scatter.
-       call VecScatterBegin(zipper%scatter, exch%nodeValLocal,&
-            zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       call VecScatterEnd(zipper%scatter, exch%nodeValLocal,&
-            zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-
-       ! Reset the original petsc vector.
-       call vecResetArray(zipper%localVal, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    end do varLoop
-
-  end subroutine wallIntegrationZipperComm
-
-  subroutine wallIntegrationZipperComm_d(vars, varsd, sps)
-
-    ! Forward mode linearization of the wallIntegrationZipperComm
-
-    use constants
-    use blockPointers, only : BCDatad, BCData, nBocos, nDom, BCType
-    use BCPointers, only : xxd
-    use oversetData, only : zipperMeshes, zipperMesh
-    use surfaceFamilies, only : familyExchange, BCFamExchange
-    use utils, only : setPointers_d, setBCPointers_d, EChk, isWallType
+        use petsc
+        implicit none
+        ! Input variables
+        logical, intent(in) :: isInflow
+        real(kind=realType), dimension(:, :) :: vars, varsd
+        integer(kind=intType) :: sps
+
+        ! Working variables
+        integer(kind=intType) :: ii, iVar, i, j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
+        real(kind=realType), dimension(:), pointer :: localPtr
+        type(zipperMesh), pointer :: zipper
+        type(familyExchange), pointer :: exch
+
+        ! Set the zipper pointer to the zipper for inflow/outflow conditions
+        if (isInflow) then
+            ! Need to generate the vars themselves.
+            call flowIntegrationZipperComm(.true., vars, sps)
+            zipper => zipperMeshes(iBCGroupInflow)
+            exch => BCFamExchange(iBCGroupInflow, sps)
+        else
+            ! Need to generate the vars themselves.
+            call flowIntegrationZipperComm(.false., vars, sps)
+            zipper => zipperMeshes(iBCGroupOutFlow)
+            exch => BCFamExchange(iBCGroupOutflow, sps)
+        end if
+
+        ! Note that we can generate all the nodal values we need locally
+        ! using simple arithematic averaging since they are all flow
+        ! properties. There is no need to use the cellToNodeScatter stuff
+        ! here like for the forces.
+        varLoop: do iVar = 1, nZIppFlowComm
+            call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ii = 0
+            domainLoop: do nn = 1, nDom
+                call setPointers_d(nn, 1, sps)
+                bocoLoop: do mm = 1, nBocos
+                    if (((BCType(mm) == SubsonicInflow .or. &
+                          BCType(mm) == SupersonicInflow) .and. isInflow) &
+                        .or. &
+                        (BCType(mm) == SubsonicOutflow .or. &
+                         BCType(mm) == SupersonicOutflow) .and. (.not. isInflow)) then
+
+                        call setBCPointers_d(mm, .True.)
+                        iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                        jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                ii = ii + 1
+                                select case (iVar)
+                                case (iRho, iVx, iVy, iVz)
+                                    localPtr(ii) = eighth * ( &
+                                                   ww1d(i, j, iVar) + ww1d(i + 1, j, iVar) + &
+                                                   ww1d(i, j + 1, ivar) + ww1d(i + 1, j + 1, iVar) + &
+                                                   ww2d(i, j, iVar) + ww2d(i + 1, j, iVar) + &
+                                                   ww2d(i, j + 1, ivar) + ww2d(i + 1, j + 1, iVar))
+                                case (iZippFlowP)
+                                    localPtr(ii) = eighth * ( &
+                                                   pp1d(i, j) + pp1d(i + 1, j) + &
+                                                   pp1d(i, j + 1) + pp1d(i + 1, j + 1) + &
+                                                   pp2d(i, j) + pp2d(i + 1, j) + &
+                                                   pp2d(i, j + 1) + pp2d(i + 1, j + 1))
+                                case (iZippFlowGamma)
+                                    ! Gamma is not currently an active variable
+                                    localPtr(ii) = zero
+                                case (iZippFlowSface)
+                                    if (addGridVelocities) then
+                                        localPtr(ii) = fourth * ( &
+                                                       sfaced(i, j) + sfaced(i + 1, j) + &
+                                                       sfaced(i, j + 1) + sfaced(i + 1, j + 1))
+                                    else
+                                        localPtr(ii) = zero
+                                    end if
+                                case (iZippFlowX, iZippFlowY, iZippFlowZ)
+                                    localPtr(ii) = xxd(i + 1, j + 1, iVar - iZippFlowX + 1)
+                                end select
+                            end do
+                        end do
+                    end if
+                end do bocoLoop
+            end do domainLoop
+
+            ! Return pointer to nodeValLocal
+            call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecPlaceArray(zipper%localVal, varsd(:, iVar), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Send these values to the root using the zipper scatter.
+            call VecScatterBegin(zipper%scatter, exch%nodeValLocal, &
+                                 zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecScatterEnd(zipper%scatter, exch%nodeValLocal, &
+                               zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Reset the original petsc vector.
+            call vecResetArray(zipper%localVal, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do varLoop
+
+    end subroutine flowIntegrationZipperComm_d
+
+    subroutine flowIntegrationZipperComm_b(isInflow, vars, varsd, sps)
+
+        ! Reverse mode linearization of the flowIntegrationZipperComm routine
+
+        use constants
+        use blockPointers, only: BCFaceID, BCData, addGridVelocities, nDom, nBocos, BCType
+        use BCPointers, only: sFaced, ww1d, ww2d, pp1d, pp2d, xxd
+        use oversetData, only: zipperMeshes, zipperMesh
+        use surfaceFamilies, only: familyExchange, BCFamExchange
+        use utils, only: setPointers_b, setBCPointers_d, EChk
+#include <petsc/finclude/petsc.h>
+        use petsc
+        implicit none
+
+        ! Input variables
+        logical, intent(in) :: isInflow
+        real(kind=realType), dimension(:, :) :: vars, varsd
+        integer(kind=intType) :: sps
+
+        ! Working variables
+        integer(kind=intType) :: ii, iVar, i, j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
+        real(kind=realType), dimension(:), pointer :: localPtr
+        real(kind=realType) :: tmp
+        type(zipperMesh), pointer :: zipper
+        type(familyExchange), pointer :: exch
+
+        ! Set the zipper pointer to the zipper for inflow/outflow conditions
+        if (isInflow) then
+            zipper => zipperMeshes(iBCGroupInflow)
+            exch => BCFamExchange(iBCGroupInflow, sps)
+        else
+            zipper => zipperMeshes(iBCGroupOutFlow)
+            exch => BCFamExchange(iBCGroupOutflow, sps)
+        end if
+
+        ! Run the var exchange loop backwards:
+        varLoop: do iVar = 1, nZIppFlowComm
+
+            ! Zero the vector we are scatting into:
+            call VecSet(exch%nodeValLocal, zero, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecPlaceArray(zipper%localVal, varsd(:, iVar), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Send these values to the root using the zipper scatter.
+            call VecScatterBegin(zipper%scatter, zipper%localVal, &
+                                 exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecScatterEnd(zipper%scatter, zipper%localVal, &
+                               exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Reset the original petsc vector.
+            call vecResetArray(zipper%localVal, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! To be consistent, varsd must be zeroed since it is "used"
+            varsd(:, iVar) = zero
+
+            ! Now finish the scatting back to the acutual BCs pointers (and
+            ! thus the state variables).
+
+            call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ii = 0
+            domainLoop: do nn = 1, nDom
+                call setPointers_b(nn, 1, sps)
+                bocoLoop: do mm = 1, nBocos
+                    if (((BCType(mm) == SubsonicInflow .or. &
+                          BCType(mm) == SupersonicInflow) .and. isInflow) &
+                        .or. &
+                        (BCType(mm) == SubsonicOutflow .or. &
+                         BCType(mm) == SupersonicOutflow) .and. (.not. isInflow)) then
+
+                        call setBCPointers_d(mm, .True.)
+                        iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                        jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                ii = ii + 1
+                                select case (iVar)
+                                case (iRho, iVx, iVy, iVz)
+                                    tmp = eighth * localPtr(ii)
+                                    ww1d(i, j, iVar) = ww1d(i, j, iVar) + tmp
+                                    ww1d(i + 1, j, iVar) = ww1d(i + 1, j, iVar) + tmp
+                                    ww1d(i, j + 1, iVar) = ww1d(i, j + 1, iVar) + tmp
+                                    ww1d(i + 1, j + 1, iVar) = ww1d(i + 1, j + 1, iVar) + tmp
+
+                                    ww2d(i, j, iVar) = ww2d(i, j, iVar) + tmp
+                                    ww2d(i + 1, j, iVar) = ww2d(i + 1, j, iVar) + tmp
+                                    ww2d(i, j + 1, iVar) = ww2d(i, j + 1, iVar) + tmp
+                                    ww2d(i + 1, j + 1, iVar) = ww2d(i + 1, j + 1, iVar) + tmp
+                                case (iZippFlowP)
+                                    tmp = eighth * localPtr(ii)
+                                    pp1d(i, j) = pp1d(i, j) + tmp
+                                    pp1d(i + 1, j) = pp1d(i + 1, j) + tmp
+                                    pp1d(i, j + 1) = pp1d(i, j + 1) + tmp
+                                    pp1d(i + 1, j + 1) = pp1d(i + 1, j + 1) + tmp
+
+                                    pp2d(i, j) = pp2d(i, j) + tmp
+                                    pp2d(i + 1, j) = pp2d(i + 1, j) + tmp
+                                    pp2d(i, j + 1) = pp2d(i, j + 1) + tmp
+                                    pp2d(i + 1, j + 1) = pp2d(i + 1, j + 1) + tmp
+
+                                case (iZippFlowGamma)
+                                    ! gamma is not currently active
+
+                                case (iZippFlowSFace)
+                                    if (addGridVelocities) then
+                                        tmp = fourth * localPtr(ii)
+                                        sfaced(i, j) = sfaced(i, j) + tmp
+                                        sfaced(i + 1, j) = sfaced(i + 1, j) + tmp
+                                        sfaced(i, j + 1) = sfaced(i, j + 1) + tmp
+                                        sfaced(i + 1, j + 1) = sfaced(i + 1, j + 1) + tmp
+                                    else
+                                        localPtr(ii) = zero
+                                    end if
+                                case (iZippFlowX, iZippFlowY, iZippFlowZ)
+                      xxd(i + 1, j + 1, iVar - iZippFlowX + 1) = xxd(i + 1, j + 1, iVar - iZippFlowX + 1) + localPtr(ii)
+                                end select
+                            end do
+                        end do
+                    end if
+                end do bocoLoop
+            end do domainLoop
+
+            ! Return pointer to nodeValLocal
+            call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+        end do varLoop
+
+    end subroutine flowIntegrationZipperComm_b
+
+    subroutine wallIntegrationZipperComm(vars, sps)
+
+        ! This routine could technically be inside of the
+        ! flowIntegrationZipper subroutine but we split it out becuase it
+        ! has petsc comm stuff that will differentiate manually that
+        ! tapenade doesn't need to see.
+
+        use constants
+        use blockPointers, only: BCData, nDom, BCType, nBocos
+        use BCPointers, only: xx
+        use oversetData, only: zipperMeshes, zipperMesh
+        use surfaceFamilies, only: familyExchange, BCFamExchange
+        use utils, only: setPointers, setBCPointers, EChk, isWallType
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
+        use petsc
+        implicit none
+        ! Input variables
+        real(kind=realType), dimension(:, :) :: vars
+        integer(kind=intType) :: sps
+
+        ! Working variables
+        integer(kind=intType) :: ii, iVar, i, j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
+        real(kind=realType), dimension(:), pointer :: localPtr
+        type(zipperMesh), pointer :: zipper
+        type(familyExchange), pointer :: exch
+
+        ! Set the zipper pointer to the zipper for inflow/outflow conditions
+        zipper => zipperMeshes(iBCGroupWalls)
+        exch => BCFamExchange(iBCGroupWalls, sps)
+
+        ! Make sure the nodal tractions are computed
+        call computeNodalTractions(sps)
+
+        varLoop: do iVar = 1, nZippWallComm
+            call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ii = 0
+            domainLoop: do nn = 1, nDom
+                call setPointers(nn, 1, sps)
+                bocoLoop: do mm = 1, nBocos
+                    if (isWallType(BCType(mm))) then
+                        call setBCPointers(mm, .True.)
+                        iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                        jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                ii = ii + 1
+                                select case (iVar)
+
+                                case (iZippWallTpx, iZippWallTpy, iZippWallTpz)
+
+                                    localPtr(ii) = BCData(mm)%Tp(i, j, iVar)
+
+                                case (iZippWallTvx, iZippWallTvy, iZippWallTvz)
+
+                                    localPtr(ii) = BCData(mm)%Tv(i, j, iVar - iZippWallTvx + 1)
+
+                                case (iZippWallX, iZippWallY, iZippWallZ)
+
+                                    ! The +1 is due to pointer offset
+                                    localPtr(ii) = xx(i + 1, j + 1, iVar - iZippWallX + 1)
+
+                                end select
+                            end do
+                        end do
+                    end if
+                end do bocoLoop
+            end do domainLoop
+
+            ! Return pointer to nodeValLocal
+            call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecPlaceArray(zipper%localVal, vars(:, iVar), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Send these values to the root using the zipper scatter.
+            call VecScatterBegin(zipper%scatter, exch%nodeValLocal, &
+                                 zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecScatterEnd(zipper%scatter, exch%nodeValLocal, &
+                               zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Reset the original petsc vector.
+            call vecResetArray(zipper%localVal, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do varLoop
+
+    end subroutine wallIntegrationZipperComm
+
+    subroutine wallIntegrationZipperComm_d(vars, varsd, sps)
+
+        ! Forward mode linearization of the wallIntegrationZipperComm
+
+        use constants
+        use blockPointers, only: BCDatad, BCData, nBocos, nDom, BCType
+        use BCPointers, only: xxd
+        use oversetData, only: zipperMeshes, zipperMesh
+        use surfaceFamilies, only: familyExchange, BCFamExchange
+        use utils, only: setPointers_d, setBCPointers_d, EChk, isWallType
+#include <petsc/finclude/petsc.h>
+        use petsc
+        implicit none
 
-    ! Input variables
-    real(kind=realType), dimension(:, :):: vars, varsd
-    integer(kind=intType) :: sps
+        ! Input variables
+        real(kind=realType), dimension(:, :) :: vars, varsd
+        integer(kind=intType) :: sps
 
-    ! Working variables
-    integer(kind=intType) :: ii, iVar, i,j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
-    real(kind=realType), dimension(:), pointer :: localPtr
-    type(zipperMesh), pointer :: zipper
-    type(familyExchange), pointer :: exch
+        ! Working variables
+        integer(kind=intType) :: ii, iVar, i, j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
+        real(kind=realType), dimension(:), pointer :: localPtr
+        type(zipperMesh), pointer :: zipper
+        type(familyExchange), pointer :: exch
 
-    ! Need to set the actual variables first.
-    call wallIntegrationZIpperCOmm(vars, sps)
+        ! Need to set the actual variables first.
+        call wallIntegrationZIpperCOmm(vars, sps)
 
-    ! Compute the derivative of the nodal tractions
-    call computeNodalTractions_d(sps)
+        ! Compute the derivative of the nodal tractions
+        call computeNodalTractions_d(sps)
 
-    ! Set the zipper pointer to the zipper for inflow/outflow conditions
-    zipper => zipperMeshes(iBCGroupWalls)
-    exch => BCFamExchange(iBCGroupWalls, sps)
+        ! Set the zipper pointer to the zipper for inflow/outflow conditions
+        zipper => zipperMeshes(iBCGroupWalls)
+        exch => BCFamExchange(iBCGroupWalls, sps)
 
-    varLoop: do iVar=1, nZippWallComm
-       call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+        varLoop: do iVar = 1, nZippWallComm
+            call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ii = 0
-       domainLoop: do nn=1, nDom
-          call setPointers_d(nn, 1, sps)
-          bocoLoop: do mm=1, nBocos
-             if (isWallType(BCType(mm))) then
-                call setBCPointers_d(mm, .True.)
-                iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-                jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-                do j=jBeg, jEnd
-                   do i=iBeg, iEnd
-                      ii = ii + 1
-                      select case(iVar)
+            ii = 0
+            domainLoop: do nn = 1, nDom
+                call setPointers_d(nn, 1, sps)
+                bocoLoop: do mm = 1, nBocos
+                    if (isWallType(BCType(mm))) then
+                        call setBCPointers_d(mm, .True.)
+                        iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                        jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                ii = ii + 1
+                                select case (iVar)
 
-                      case (iZippWallTpx, iZippWallTpy, iZippWallTpz)
+                                case (iZippWallTpx, iZippWallTpy, iZippWallTpz)
 
-                         localPtr(ii) = BCDatad(mm)%Tp(i, j, iVar)
+                                    localPtr(ii) = BCDatad(mm)%Tp(i, j, iVar)
 
-                      case (iZippWallTvx, iZippWallTvy, iZippWallTvz)
+                                case (iZippWallTvx, iZippWallTvy, iZippWallTvz)
 
-                         localPtr(ii) = BCDatad(mm)%Tv(i, j, iVar-iZippWallTvx+1)
+                                    localPtr(ii) = BCDatad(mm)%Tv(i, j, iVar - iZippWallTvx + 1)
 
-                      case (iZippWallX, iZippWallY, iZippWallZ)
+                                case (iZippWallX, iZippWallY, iZippWallZ)
 
-                         ! The +1 is due to pointer offset
-                         localPtr(ii) = xxd(i+1, j+1, iVar-iZippWallX+1)
+                                    ! The +1 is due to pointer offset
+                                    localPtr(ii) = xxd(i + 1, j + 1, iVar - iZippWallX + 1)
 
-                      end select
-                   end do
-                end do
-             end if
-          end do bocoLoop
-       end do domainLoop
+                                end select
+                            end do
+                        end do
+                    end if
+                end do bocoLoop
+            end do domainLoop
 
-       ! Return pointer to nodeValLocal
-       call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Return pointer to nodeValLocal
+            call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecPlaceArray(zipper%localVal, varsd(:, iVar), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            call VecPlaceArray(zipper%localVal, varsd(:, iVar), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Send these values to the root using the zipper scatter.
-       call VecScatterBegin(zipper%scatter, exch%nodeValLocal,&
-            zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Send these values to the root using the zipper scatter.
+            call VecScatterBegin(zipper%scatter, exch%nodeValLocal, &
+                                 zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecScatterEnd(zipper%scatter, exch%nodeValLocal,&
-            zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            call VecScatterEnd(zipper%scatter, exch%nodeValLocal, &
+                               zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Reset the original petsc vector.
-       call vecResetArray(zipper%localVal, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
-    end do varLoop
+            ! Reset the original petsc vector.
+            call vecResetArray(zipper%localVal, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+        end do varLoop
 
-  end subroutine wallIntegrationZipperComm_d
+    end subroutine wallIntegrationZipperComm_d
 
-  subroutine wallIntegrationZipperComm_b(vars, varsd, sps)
+    subroutine wallIntegrationZipperComm_b(vars, varsd, sps)
 
-    ! Reverse mode linearization of the wallIntegrationZipperComm
+        ! Reverse mode linearization of the wallIntegrationZipperComm
 
-    use constants
-    use blockPointers, only : BCDatad, BCData, nBocos, nDom, BCType
-    use BCPointers, only : xxd
-    use oversetData, only : zipperMeshes, zipperMesh
-    use surfaceFamilies, only : familyExchange, BCFamExchange
-    use utils, only : setPointers_b, setBCPointers_d, EChk, isWallType
+        use constants
+        use blockPointers, only: BCDatad, BCData, nBocos, nDom, BCType
+        use BCPointers, only: xxd
+        use oversetData, only: zipperMeshes, zipperMesh
+        use surfaceFamilies, only: familyExchange, BCFamExchange
+        use utils, only: setPointers_b, setBCPointers_d, EChk, isWallType
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
+        use petsc
+        implicit none
 
-    ! Input variables
-    real(kind=realType), dimension(:, :) :: vars, varsd
-    integer(kind=intType) :: sps
+        ! Input variables
+        real(kind=realType), dimension(:, :) :: vars, varsd
+        integer(kind=intType) :: sps
 
-    ! Working variables
-    integer(kind=intType) :: ii, iVar, i,j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
-    real(kind=realType), dimension(:), pointer :: localPtr
-    type(zipperMesh), pointer :: zipper
-    type(familyExchange), pointer :: exch
+        ! Working variables
+        integer(kind=intType) :: ii, iVar, i, j, iBeg, iEnd, jBeg, jEnd, ierr, nn, mm
+        real(kind=realType), dimension(:), pointer :: localPtr
+        type(zipperMesh), pointer :: zipper
+        type(familyExchange), pointer :: exch
 
-    ! Set the zipper pointer to the zipper for inflow/outflow conditions
-    zipper => zipperMeshes(iBCGroupWalls)
-    exch => BCFamExchange(iBCGroupWalls, sps)
+        ! Set the zipper pointer to the zipper for inflow/outflow conditions
+        zipper => zipperMeshes(iBCGroupWalls)
+        exch => BCFamExchange(iBCGroupWalls, sps)
 
-    ! Run the var exchange loop backwards:
-    varLoop: do iVar=1, nZippWallComm
+        ! Run the var exchange loop backwards:
+        varLoop: do iVar = 1, nZippWallComm
 
-       ! Zero the vector we are scatting into:
-       call VecSet(exch%nodeValLocal, zero, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Zero the vector we are scatting into:
+            call VecSet(exch%nodeValLocal, zero, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecPlaceArray(zipper%localVal, varsd(:, iVar), ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            call VecPlaceArray(zipper%localVal, varsd(:, iVar), ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Send these values to the root using the zipper scatter.
-       call VecScatterBegin(zipper%scatter, zipper%localVal, &
-            exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Send these values to the root using the zipper scatter.
+            call VecScatterBegin(zipper%scatter, zipper%localVal, &
+                                 exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       call VecScatterEnd(zipper%scatter, zipper%localVal, &
-            exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            call VecScatterEnd(zipper%scatter, zipper%localVal, &
+                               exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! Reset the original petsc vector.
-       call vecResetArray(zipper%localVal, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Reset the original petsc vector.
+            call vecResetArray(zipper%localVal, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ! To be consistent, varsd must be zeroed since it is "used"
-       varsd(:, iVar) = zero
+            ! To be consistent, varsd must be zeroed since it is "used"
+            varsd(:, iVar) = zero
 
-       ! Now finish the scatting back to the acutual BCs pointers (and
-       ! thus the state variables).
+            ! Now finish the scatting back to the acutual BCs pointers (and
+            ! thus the state variables).
 
-       call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-       ii = 0
-       domainLoop: do nn=1, nDom
-          call setPointers_b(nn, 1, sps)
-          bocoLoop: do mm=1, nBocos
-             if (isWallType(BCType(mm))) then
-                call setBCPointers_d(mm, .True.)
-                iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-                jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-                do j=jBeg, jEnd
-                   do i=iBeg, iEnd
-                      ii = ii + 1
-                      select case(iVar)
-                      case (iZippWallTpx, iZippWallTpy, iZippWallTpz)
+            ii = 0
+            domainLoop: do nn = 1, nDom
+                call setPointers_b(nn, 1, sps)
+                bocoLoop: do mm = 1, nBocos
+                    if (isWallType(BCType(mm))) then
+                        call setBCPointers_d(mm, .True.)
+                        iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                        jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                        do j = jBeg, jEnd
+                            do i = iBeg, iEnd
+                                ii = ii + 1
+                                select case (iVar)
+                                case (iZippWallTpx, iZippWallTpy, iZippWallTpz)
 
-                         BCDatad(mm)%Tp(i, j, iVar) = localPtr(ii)
+                                    BCDatad(mm)%Tp(i, j, iVar) = localPtr(ii)
 
-                      case (iZippWallTvx, iZippWallTvy, iZippWallTvz)
+                                case (iZippWallTvx, iZippWallTvy, iZippWallTvz)
 
-                         BCDatad(mm)%Tv(i, j, iVar-iZippWallTvx+1) = localPtr(ii)
+                                    BCDatad(mm)%Tv(i, j, iVar - iZippWallTvx + 1) = localPtr(ii)
 
-                      case (iZippWallX, iZippWallY, iZippWallZ)
+                                case (iZippWallX, iZippWallY, iZippWallZ)
 
-                         ! The +1 is due to pointer offset
-                         xxd(i+1, j+1, iVar-iZippWallX+1) = xxd(i+1, j+1, iVar-iZippWallX+1) + localPtr(ii)
+                                    ! The +1 is due to pointer offset
+                      xxd(i + 1, j + 1, iVar - iZippWallX + 1) = xxd(i + 1, j + 1, iVar - iZippWallX + 1) + localPtr(ii)
 
-                      end select
-                   end do
-                end do
-             end if
-          end do bocoLoop
-       end do domainLoop
+                                end select
+                            end do
+                        end do
+                    end if
+                end do bocoLoop
+            end do domainLoop
 
-       ! Return pointer to nodeValLocal
-       call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-       call EChk(ierr,__FILE__,__LINE__)
+            ! Return pointer to nodeValLocal
+            call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
 
-    end do varLoop
+        end do varLoop
 
-    ! Compute the derivatives of the nodal tractions. The will
-    !accumulate the seeds onto bcDatad%Fv, bcDatad%Fv and bcDatad%area
+        ! Compute the derivatives of the nodal tractions. The will
+        !accumulate the seeds onto bcDatad%Fv, bcDatad%Fv and bcDatad%area
 
-    call computeNodalTractions_b(sps)
+        call computeNodalTractions_b(sps)
 
-  end subroutine wallIntegrationZipperComm_b
+    end subroutine wallIntegrationZipperComm_b
 end module haloExchange
diff --git a/src/utils/signals.F90 b/src/utils/signals.F90
index 84fb3ae35..4b13fae33 100644
--- a/src/utils/signals.F90
+++ b/src/utils/signals.F90
@@ -1,87 +1,87 @@
   subroutine set_signal_write
-    !
-    !       set_signal_write sets the localSignal to signalWrite. On the
-    !       finest mesh this means that after the current iteration a
-    !       solution is written. On the coarser grids this signal will be
-    !       ignored.
-    !       This routine is only compiled when signalling is supported.
-    !
+      !
+      !       set_signal_write sets the localSignal to signalWrite. On the
+      !       finest mesh this means that after the current iteration a
+      !       solution is written. On the coarser grids this signal will be
+      !       ignored.
+      !       This routine is only compiled when signalling is supported.
+      !
 #ifndef USE_NO_SIGNALS
 
-    use constants
-    use communication, only : myID
-    use inputPhysics, only : equationMode
-    use killSignals, only : localSignal, noSignal, signalWrite
-    use iteration, only : groundLevel
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Local variables.
-    !
-    character(len=7) :: integerString
+      use constants
+      use communication, only: myID
+      use inputPhysics, only: equationMode
+      use killSignals, only: localSignal, noSignal, signalWrite
+      use iteration, only: groundLevel
+      use commonFormats, only: strings
+      implicit none
+      !
+      !      Local variables.
+      !
+      character(len=7) :: integerString
 
-    ! The user signals must be reconnected again, because the
-    ! connection is lost after a signal has been given.
+      ! The user signals must be reconnected again, because the
+      ! connection is lost after a signal has been given.
 
-    call connect_signals
+      call connect_signals
 
-    ! Parallel executable.  Write a general message that this
-    ! processor has received a write signal.
+      ! Parallel executable.  Write a general message that this
+      ! processor has received a write signal.
 
-    write(integerString,'(i7)') myID
-    integerString = adjustl(integerString)
-    integerString = trim(integerString)
-    print "(a)", "#"
-    print strings, "# Processor ", integerString(:len_trim(integerString)),": Received write signal."
+      write (integerString, '(i7)') myID
+      integerString = adjustl(integerString)
+      integerString = trim(integerString)
+      print "(a)", "#"
+      print strings, "# Processor ", integerString(:len_trim(integerString)), ": Received write signal."
 
-    ! Check if a signal was set previously.
+      ! Check if a signal was set previously.
 
-    testPrevious: if(localSignal /= noSignal) then
+      testPrevious: if (localSignal /= noSignal) then
 
-       ! A signal was set before. Set it back to noSignal
-       ! and write a message that this has been done.
+          ! A signal was set before. Set it back to noSignal
+          ! and write a message that this has been done.
 
-       localSignal = noSignal
+          localSignal = noSignal
 
-       print "(a)", "# Signal was set previously and will now be overwritten to no signal"
+          print "(a)", "# Signal was set previously and will now be overwritten to no signal"
 
-    else testPrevious
+          else testPrevious
 
-       ! No signal was set yet.
-       ! Determine the situation and act accordingly.
+          ! No signal was set yet.
+          ! Determine the situation and act accordingly.
 
-       if(groundLevel == 1) then
+          if (groundLevel == 1) then
 
-          ! Finest grid. A solution file will be written after either
-          ! this multigrid cycle or this time step. This depends whether
-          ! this is a steady or an unsteady computation.
-          ! In both cases localSignal must be set to signalWrite.
+              ! Finest grid. A solution file will be written after either
+              ! this multigrid cycle or this time step. This depends whether
+              ! this is a steady or an unsteady computation.
+              ! In both cases localSignal must be set to signalWrite.
 
-          localSignal = signalWrite
+              localSignal = signalWrite
 
-          select case (equationMode)
+              select case (equationMode)
 
-          case (steady, timeSpectral)
-             print "(a)", "# Solution will be written after this iteration"
+              case (steady, timeSpectral)
+                  print "(a)", "# Solution will be written after this iteration"
 
-          case (unsteady)
-             print "(a)", "# Solution will be written after this time step"
-          end select
+              case (unsteady)
+                  print "(a)", "# Solution will be written after this time step"
+              end select
 
-       else
+          else
 
-          ! Coarser grid. Signal information will be ignored.
+              ! Coarser grid. Signal information will be ignored.
 
-          print "(a)", "# Solver is still on a coarse grid and therefore the signal is ignored."
-          print "(a)", "# Use kill -USR2 if you want to go to the next finer grid level."
+              print "(a)", "# Solver is still on a coarse grid and therefore the signal is ignored."
+              print "(a)", "# Use kill -USR2 if you want to go to the next finer grid level."
 
-       endif
+          end if
 
-    endif testPrevious
+      end if testPrevious
 
-    ! Write a blank line, such that the message is pretty clear.
+      ! Write a blank line, such that the message is pretty clear.
 
-    print "(a)", "#"
+      print "(a)", "#"
 
 #endif /* USE_NO_SIGNALS */
 
@@ -90,103 +90,102 @@ end subroutine set_signal_write
   !      ==================================================================
 
   subroutine set_signal_write_quit
-    !
-    !       set_signal_write_quit sets the localSignal to
-    !       signalWriteQuit. On the finest mesh this means that after
-    !       the current iteration the solution is written and the
-    !       computation is stopped. On the coarser grids the solution is
-    !       transferred to the next finer level and the computation is
-    !       continued there.
-    !       This routine is only compiled when signalling is supported.
-    !
+      !
+      !       set_signal_write_quit sets the localSignal to
+      !       signalWriteQuit. On the finest mesh this means that after
+      !       the current iteration the solution is written and the
+      !       computation is stopped. On the coarser grids the solution is
+      !       transferred to the next finer level and the computation is
+      !       continued there.
+      !       This routine is only compiled when signalling is supported.
+      !
 #ifndef USE_NO_SIGNALS
 
+      use constants
+      use communication, only: myID
+      use inputPhysics, only: equationMode
+      use killSignals, only: localSignal, noSignal, signalWriteQuit
+      use iteration, only: groundLevel
+      use commonFormats, only: strings
+      implicit none
+      !
+      !      Local variables.
+      !
+      character(len=7) :: integerString
 
-    use constants
-    use communication, only : myID
-    use inputPhysics, only : equationMode
-    use killSignals, only : localSignal, noSignal, signalWriteQuit
-    use iteration, only : groundLevel
-    use commonFormats, only : strings
-    implicit none
-    !
-    !      Local variables.
-    !
-    character(len=7) :: integerString
+      ! The user signals must be reconnected again, because the
+      ! connection is lost after a signal has been given.
+      ! Only when signalling is wanted.
 
-    ! The user signals must be reconnected again, because the
-    ! connection is lost after a signal has been given.
-    ! Only when signalling is wanted.
+      call connect_signals
 
-    call connect_signals
+      ! Parallel executable. Write a general message that this
+      ! processor has received a write and quit signal.
 
-    ! Parallel executable. Write a general message that this
-    ! processor has received a write and quit signal.
+      write (integerString, '(i7)') myID
+      integerString = adjustl(integerString)
+      integerString = trim(integerString)
+      print "(a)", "#"
+      print strings, "# Processor ", integerString(:len_trim(integerString)), ": Received write and quit signal."
 
-    write(integerString,'(i7)') myID
-    integerString = adjustl(integerString)
-    integerString = trim(integerString)
-    print "(a)", "#"
-    print strings, "# Processor ", integerString(:len_trim(integerString)),": Received write and quit signal."
+      ! Check if a signal was set previously.
 
-    ! Check if a signal was set previously.
+      testPrevious: if (localSignal /= noSignal) then
 
-    testPrevious: if(localSignal /= noSignal) then
+          ! A signal was set before. Set it back to noSignal
+          ! and write a message that this has been done.
 
-       ! A signal was set before. Set it back to noSignal
-       ! and write a message that this has been done.
+          localSignal = noSignal
 
-       localSignal = noSignal
+          print "(a)", "# Signal was set previously and will now be overwritten to no signal"
 
-       print "(a)", "# Signal was set previously and will now be overwritten to no signal"
+          else testPrevious
 
-    else testPrevious
+          ! Set localSignal to signalWriteQuit.
 
-       ! Set localSignal to signalWriteQuit.
+          localSignal = signalWriteQuit
 
-       localSignal = signalWriteQuit
+          ! Determine the situation and act accordingly.
 
-       ! Determine the situation and act accordingly.
+          if (groundLevel == 1) then
 
-       if(groundLevel == 1) then
+              ! Finest grid. A solution file will be written and the
+              ! computation stopped after either this multigrid cycle or
+              ! this time step. This depends whether this is a steady or an
+              ! unsteady computation.
 
-          ! Finest grid. A solution file will be written and the
-          ! computation stopped after either this multigrid cycle or
-          ! this time step. This depends whether this is a steady or an
-          ! unsteady computation.
+              select case (equationMode)
+              case (steady, timeSpectral)
+                  print "(a)", "# Solution will be written and computation stopped after this multigrid cycle"
 
-          select case (equationMode)
-          case (steady, timeSpectral)
-             print "(a)", "# Solution will be written and computation stopped after this multigrid cycle"
+              case (unsteady)
+                  print "(a)", "# Solution will be written and computation stopped after this time step"
+              end select
 
-          case (unsteady)
-             print "(a)", "# Solution will be written and computation stopped after this time step"
-          end select
+          else
 
-       else
+              ! Coarser grid. The solution is transferred to the next finer
+              ! grid level. This happens either after this multigrid cycle
+              ! or after this time step depending on whether this is a
+              ! steady or an unsteady computation.
 
-          ! Coarser grid. The solution is transferred to the next finer
-          ! grid level. This happens either after this multigrid cycle
-          ! or after this time step depending on whether this is a
-          ! steady or an unsteady computation.
+              print "(a)", "# Solver is still on the coarse grid."
 
-          print "(a)", "# Solver is still on the coarse grid."
+              select case (equationMode)
+              case (steady, timeSpectral)
+                  print "(a)", "# Solution will be transferred to the next finer grid after this multigrid cycle."
 
-          select case (equationMode)
-          case (steady, timeSpectral)
-             print "(a)", "# Solution will be transferred to the next finer grid after this multigrid cycle."
+              case (unsteady)
+                  print "(a)", "# Solution will be transferred to the next finer grid after this time step."
+              end select
 
-          case (unsteady)
-             print "(a)", "# Solution will be transferred to the next finer grid after this time step."
-          end select
+          end if
 
-       endif
+      end if testPrevious
 
-    endif testPrevious
+      ! Write a blank line, such that the message is pretty clear.
 
-    ! Write a blank line, such that the message is pretty clear.
-
-    print "(a)", "#"
+      print "(a)", "#"
 
 #endif /* USE_NO_SIGNALS */
 
diff --git a/src/utils/sorting.F90 b/src/utils/sorting.F90
index dea4e59c1..48381034d 100644
--- a/src/utils/sorting.F90
+++ b/src/utils/sorting.F90
@@ -1,1463 +1,1458 @@
 module sorting
 
-  use utils, only : terminate
+    use utils, only: terminate
 contains
 
-  function famInList(famID, famList)
-    use constants
-    implicit none
-     integer(kind=intType), intent(in) :: famID, famList(:)
-     logical :: famInList
-     famInLIst = .False.
-     if (bsearchIntegers(famID, famList) > 0) then
-        famInList = .True.
-     end if
-   end function famInList
-
-
-  function bsearchIntegers(key, base)
-    !
-    !       bsearchIntegers returns the index in base where key is stored.
-    !       A binary search algorithm is used here, so it is assumed that
-    !       base is sorted in increasing order. In case key appears more
-    !       than once in base, the result is arbitrary. If key is not
-    !       found, a zero is returned.
-    !
-    use precision
-    implicit none
-    !
-    !      Function type
-    !
-    integer(kind=intType) :: bsearchIntegers
-    !
-    !      Function arguments.
-    !
-    integer(kind=intType), intent(in)               :: key
-    integer(kind=intType), dimension(:), intent(in) :: base
-    integer(kind=intType)                           :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: ii, pos, start
-    logical               :: entryFound
-
-    ! Initialize some values.
-
-    start       = 1
-    ii          = size(base)
-    entryFound = .false.
-
-    ! Binary search to find key.
-
-    do
-       ! Condition for breaking the loop
-
-       if(ii == 0) exit
-
-       ! Determine the position in the array to compare.
-
-       pos = start + ii/2
-
-       ! In case this is the entry, break the search loop.
-
-       if(base(pos) == key) then
-          entryFound = .true.
-          exit
-       endif
-
-       ! In case the search key is larger than the current position,
-       ! only parts to the right must be searched. Remember that base
-       ! is sorted in increasing order. Nothing needs to be done if the
-       ! key is smaller than the current element.
-
-       if(key > base(pos)) then
-          start = pos +1
-          ii    = ii -1
-       endif
-
-       ! Modify ii for the next branch to search.
-
-       ii = ii/2
-    enddo
-
-    ! Set bsearchIntegers. This depends whether the key was found.
-
-    if( entryFound ) then
-       bsearchIntegers = pos
-    else
-       bsearchIntegers = 0
-    endif
-
-  end function bsearchIntegers
-
-  ! ----------------------------------------------------------------------
-  !                                                                      |
-  !                    No Tapenade Routine below this line               |
-  !                                                                      |
-  ! ----------------------------------------------------------------------
+    function famInList(famID, famList)
+        use constants
+        implicit none
+        integer(kind=intType), intent(in) :: famID, famList(:)
+        logical :: famInList
+        famInLIst = .False.
+        if (bsearchIntegers(famID, famList) > 0) then
+            famInList = .True.
+        end if
+    end function famInList
+
+    function bsearchIntegers(key, base)
+        !
+        !       bsearchIntegers returns the index in base where key is stored.
+        !       A binary search algorithm is used here, so it is assumed that
+        !       base is sorted in increasing order. In case key appears more
+        !       than once in base, the result is arbitrary. If key is not
+        !       found, a zero is returned.
+        !
+        use precision
+        implicit none
+        !
+        !      Function type
+        !
+        integer(kind=intType) :: bsearchIntegers
+        !
+        !      Function arguments.
+        !
+        integer(kind=intType), intent(in) :: key
+        integer(kind=intType), dimension(:), intent(in) :: base
+        integer(kind=intType) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: ii, pos, start
+        logical :: entryFound
+
+        ! Initialize some values.
+
+        start = 1
+        ii = size(base)
+        entryFound = .false.
+
+        ! Binary search to find key.
+
+        do
+            ! Condition for breaking the loop
+
+            if (ii == 0) exit
+
+            ! Determine the position in the array to compare.
+
+            pos = start + ii / 2
+
+            ! In case this is the entry, break the search loop.
+
+            if (base(pos) == key) then
+                entryFound = .true.
+                exit
+            end if
+
+            ! In case the search key is larger than the current position,
+            ! only parts to the right must be searched. Remember that base
+            ! is sorted in increasing order. Nothing needs to be done if the
+            ! key is smaller than the current element.
+
+            if (key > base(pos)) then
+                start = pos + 1
+                ii = ii - 1
+            end if
+
+            ! Modify ii for the next branch to search.
+
+            ii = ii / 2
+        end do
+
+        ! Set bsearchIntegers. This depends whether the key was found.
+
+        if (entryFound) then
+            bsearchIntegers = pos
+        else
+            bsearchIntegers = 0
+        end if
+
+    end function bsearchIntegers
+
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 
 #ifndef  USE_TAPENADE
 
+    subroutine qsortIntegers(arr, nn)
+        !
+        !       qsortIntegers sorts the given number of integers in
+        !       increasing order.
+        !
+        use precision
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        integer(kind=intType), dimension(*), intent(inout) :: arr
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables
+        !
+        integer(kind=intType), parameter :: m = 7
 
-  subroutine qsortIntegers(arr, nn)
-    !
-    !       qsortIntegers sorts the given number of integers in
-    !       increasing order.
-    !
-    use precision
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    integer(kind=intType), dimension(*), intent(inout) :: arr
-    integer(kind=intType), intent(in)                  :: nn
-    !
-    !      Local variables
-    !
-    integer(kind=intType), parameter :: m = 7
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
 
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
+        integer :: ierr
 
-    integer :: ierr
+        integer(kind=intType) :: a, tmp
 
-    integer(kind=intType) :: a, tmp
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
+        ! Allocate the memory for stack.
 
-    ! Allocate the memory for stack.
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortIntegers", &
+                           "Memory allocation failure for stack")
 
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                     &
-         call terminate("qsortIntegers", &
-         "Memory allocation failure for stack")
+        ! Initialize the variables that control the sorting.
 
-    ! Initialize the variables that control the sorting.
+        jStack = 0
+        l = 1
+        r = nn
 
-    jStack = 0
-    l      = 1
-    r      = nn
+        ! Start of the algorithm
 
-    ! Start of the algorithm
+        do
 
-    do
+            ! Check for the size of the subarray.
 
-       ! Check for the size of the subarray.
+            if ((r - l) < m) then
 
-       if((r-l) < m) then
+                ! Perform insertion sort
 
-          ! Perform insertion sort
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+                if (jStack == 0) exit
 
-          if(jStack == 0) exit
+                ! Pop stack and begin a new round of partitioning.
 
-          ! Pop stack and begin a new round of partitioning.
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+            else
 
-       else
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as partitioning
+                ! element a. Also rearrange so that (l) <= (l+1) <= (r).
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as partitioning
-          ! element a. Also rearrange so that (l) <= (l+1) <= (r).
+                k = (l + r) / 2
+                tmp = arr(k)      ! Wwap the elements
+                arr(k) = arr(l + 1)    ! k and l+1.
+                arr(l + 1) = tmp
 
-          k = (l+r)/2
-          tmp      = arr(k)      ! Wwap the elements
-          arr(k)   = arr(l+1)    ! k and l+1.
-          arr(l+1) = tmp
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+                ! Initialize the pointers for partitioning.
 
-          ! Initialize the pointers for partitioning.
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+                ! The innermost loop
 
-          ! The innermost loop
+                do
 
-          do
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+                    ! Exit the loop in case the pointers i and j crossed.
 
-             ! Exit the loop in case the pointers i and j crossed.
+                    if (j < i) exit
 
-             if(j < i) exit
+                    ! Swap the element i and j.
+
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-             ! Swap the element i and j.
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+                jStack = jStack + 2
+                if (jStack > nStack) then
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+                    ! Storage of the stack is too small. Reallocate.
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortIntegers", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
+
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-             ! Storage of the stack is too small. Reallocate.
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortIntegers", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
 
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                     &
-                  call terminate("qsortIntegers", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
 
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortIntegers", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
 
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                     &
-                  call terminate("qsortIntegers", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
+                    ! And finally release the memory of tmpStack.
+
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortIntegers", &
+                                       "Deallocation error for tmpStack")
+                end if
 
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
 
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                     &
-                  call terminate("qsortIntegers", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
+            end if
+        end do
 
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                     &
-                  call terminate("qsortIntegers", &
-                  "Deallocation error for tmpStack")
-          endif
+        ! Release the memory of stack.
 
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortIntegers", &
+                           "Deallocation error for stack")
 
-       endif
-    enddo
+        ! Check in debug mode whether the array is really sorted.
 
-    ! Release the memory of stack.
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortIntegers", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
 
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                      &
-         call terminate("qsortIntegers", &
-         "Deallocation error for stack")
+    end subroutine qsortIntegers
 
-    ! Check in debug mode whether the array is really sorted.
+    subroutine qsortReals(arr, nn)
+        !
+        !       qsortReals sorts the given number of reals in increasing
+        !       order.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        real(kind=realType), dimension(*), intent(inout) :: arr
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables
+        !
+        integer(kind=intType), parameter :: m = 7
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))              &
-               call terminate("qsortIntegers", &
-               "Array is not sorted correctly")
-       enddo
-    endif
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
 
-  end subroutine qsortIntegers
+        integer :: ierr
 
-  subroutine qsortReals(arr, nn)
-    !
-    !       qsortReals sorts the given number of reals in increasing
-    !       order.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    real(kind=realType), dimension(*), intent(inout) :: arr
-    integer(kind=intType), intent(in)                :: nn
-    !
-    !      Local variables
-    !
-    integer(kind=intType), parameter :: m = 7
+        real(kind=realType) :: a, tmp
 
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-    integer :: ierr
+        ! Allocate the memory for stack.
 
-    real(kind=realType) :: a, tmp
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortReals", &
+                           "Memory allocation failure for stack")
 
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
+        ! Initialize the variables that control the sorting.
 
-    ! Allocate the memory for stack.
+        jStack = 0
+        l = 1
+        r = nn
 
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                  &
-         call terminate("qsortReals", &
-         "Memory allocation failure for stack")
+        ! Start of the algorithm
 
-    ! Initialize the variables that control the sorting.
+        do
 
-    jStack = 0
-    l      = 1
-    r      = nn
+            ! Check for the size of the subarray.
 
-    ! Start of the algorithm
+            if ((r - l) < m) then
 
-    do
+                ! Perform insertion sort
 
-       ! Check for the size of the subarray.
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-       if((r-l) < m) then
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-          ! Perform insertion sort
+                if (jStack == 0) exit
 
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
+                ! Pop stack and begin a new round of partitioning.
 
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-          if(jStack == 0) exit
+            else
 
-          ! Pop stack and begin a new round of partitioning.
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as partitioning
+                ! element a. Also rearrange so that (l) <= (l+1) <= (r).
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+                k = (l + r) / 2
+                tmp = arr(k)      ! Swap the elements
+                arr(k) = arr(l + 1)    ! k and l+1.
+                arr(l + 1) = tmp
 
-       else
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as partitioning
-          ! element a. Also rearrange so that (l) <= (l+1) <= (r).
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-          k = (l+r)/2
-          tmp      = arr(k)      ! Swap the elements
-          arr(k)   = arr(l+1)    ! k and l+1.
-          arr(l+1) = tmp
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+                ! Initialize the pointers for partitioning.
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+                ! The innermost loop
 
-          ! Initialize the pointers for partitioning.
+                do
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-          ! The innermost loop
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-          do
+                    ! Exit the loop in case the pointers i and j crossed.
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+                    if (j < i) exit
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+                    ! Swap the element i and j.
+
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
 
-             ! Exit the loop in case the pointers i and j crossed.
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
 
-             if(j < i) exit
+                arr(l + 1) = arr(j)
+                arr(j) = a
 
-             ! Swap the element i and j.
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+                jStack = jStack + 2
+                if (jStack > nStack) then
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+                    ! Storage of the stack is too small. Reallocate.
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortReals", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
+
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortReals", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
 
-             ! Storage of the stack is too small. Reallocate.
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortReals", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
 
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                  &
-                  call terminate("qsortReals", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
+                    ! And finally release the memory of tmpStack.
+
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortReals", &
+                                       "Deallocation error for tmpStack")
+                end if
 
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
 
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                  &
-                  call terminate("qsortReals", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
+            end if
+        end do
 
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
+        ! Release the memory of stack.
 
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                  &
-                  call terminate("qsortReals", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortReals", &
+                           "Deallocation error for stack")
 
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                  &
-                  call terminate("qsortReals", &
-                  "Deallocation error for tmpStack")
-          endif
+        ! Check in debug mode whether the array is really sorted.
 
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortReals", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
 
-       endif
-    enddo
+    end subroutine qsortReals
 
-    ! Release the memory of stack.
+    subroutine qsortStrings(arr, nn)
+        !
+        !       qsortStrings sorts the given number of strings in increasing
+        !       order.
+        !
+        use constants
+        implicit none
+        !
+        !      Subroutine arguments
+        !
+        character(len=*), dimension(*), intent(inout) :: arr
+        integer(kind=intType), intent(in) :: nn
+        !
+        !      Local variables
+        !
+        integer(kind=intType), parameter :: m = 7
 
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                  &
-         call terminate("qsortReals", &
-         "Deallocation error for stack")
+        integer(kind=intType) :: nStack
+        integer(kind=intType) :: i, j, k, r, l, jStack, ii
 
-    ! Check in debug mode whether the array is really sorted.
+        integer :: ierr
 
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))          &
-               call terminate("qsortReals", &
-               "Array is not sorted correctly")
-       enddo
-    endif
+        character(len=maxStringLen) :: a, tmp
 
-  end subroutine qsortReals
+        integer(kind=intType), allocatable, dimension(:) :: stack
+        integer(kind=intType), allocatable, dimension(:) :: tmpStack
 
-  subroutine qsortStrings(arr, nn)
-    !
-    !       qsortStrings sorts the given number of strings in increasing
-    !       order.
-    !
-    use constants
-    implicit none
-    !
-    !      Subroutine arguments
-    !
-    character(len=*), dimension(*), intent(inout) :: arr
-    integer(kind=intType), intent(in)             :: nn
-    !
-    !      Local variables
-    !
-    integer(kind=intType), parameter :: m = 7
-
-    integer(kind=intType) :: nStack
-    integer(kind=intType) :: i, j, k, r, l, jStack, ii
-
-    integer :: ierr
-
-    character(len=maxStringLen) :: a, tmp
-
-    integer(kind=intType), allocatable, dimension(:) :: stack
-    integer(kind=intType), allocatable, dimension(:) :: tmpStack
-
-    ! Allocate the memory for stack.
-
-    nStack = 100
-    allocate(stack(nStack), stat=ierr)
-    if(ierr /= 0)                    &
-         call terminate("qsortStrings", &
-         "Memory allocation failure for stack")
-
-    ! Initialize the variables that control the sorting.
-
-    jStack = 0
-    l      = 1
-    r      = nn
-
-    ! Start of the algorithm
-
-    do
-
-       ! Check for the size of the subarray.
-
-       if((r-l) < m) then
-
-          ! Perform insertion sort
-
-          do j=l+1,r
-             a = arr(j)
-             do i=(j-1),l,-1
-                if(arr(i) <= a) exit
-                arr(i+1) = arr(i)
-             enddo
-             arr(i+1) = a
-          enddo
-
-          ! In case there are no more elements on the stack, exit from
-          ! the outermost do-loop. Algorithm has finished.
-
-          if(jStack == 0) exit
+        ! Allocate the memory for stack.
 
-          ! Pop stack and begin a new round of partitioning.
+        nStack = 100
+        allocate (stack(nStack), stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortStrings", &
+                           "Memory allocation failure for stack")
 
-          r = stack(jStack)
-          l = stack(jStack-1)
-          jStack = jStack - 2
+        ! Initialize the variables that control the sorting.
 
-       else
+        jStack = 0
+        l = 1
+        r = nn
 
-          ! Subarray is larger than the threshold for a linear sort.
-          ! Choose median of left, center and right elements as partitioning
-          ! element a. Also rearrange so that (l) <= (l+1) <= (r).
+        ! Start of the algorithm
 
-          k = (l+r)/2
-          tmp      = arr(k)      ! Swap the elements
-          arr(k)   = arr(l+1)    ! k and l+1.
-          arr(l+1) = tmp
+        do
 
-          if(arr(r) < arr(l)) then
-             tmp    = arr(l)             ! Swap the elements
-             arr(l) = arr(r)             ! r and l.
-             arr(r) = tmp
-          endif
+            ! Check for the size of the subarray.
 
-          if(arr(r) < arr(l+1)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(r)           ! r and l+1.
-             arr(r)   = tmp
-          endif
+            if ((r - l) < m) then
 
-          if(arr(l+1) < arr(l)) then
-             tmp      = arr(l+1)         ! Swap the elements
-             arr(l+1) = arr(l)           ! l and l+1.
-             arr(l)   = tmp
-          endif
+                ! Perform insertion sort
 
-          ! Initialize the pointers for partitioning.
+                do j = l + 1, r
+                    a = arr(j)
+                    do i = (j - 1), l, -1
+                        if (arr(i) <= a) exit
+                        arr(i + 1) = arr(i)
+                    end do
+                    arr(i + 1) = a
+                end do
 
-          i = l+1
-          j = r
-          a = arr(l+1)
+                ! In case there are no more elements on the stack, exit from
+                ! the outermost do-loop. Algorithm has finished.
 
-          ! The innermost loop
+                if (jStack == 0) exit
 
-          do
+                ! Pop stack and begin a new round of partitioning.
 
-             ! Scan up to find element >= a.
-             do
-                i = i+1
-                if(a <= arr(i)) exit
-             enddo
+                r = stack(jStack)
+                l = stack(jStack - 1)
+                jStack = jStack - 2
 
-             ! Scan down to find element <= a.
-             do
-                j = j-1
-                if(arr(j) <= a) exit
-             enddo
+            else
 
-             ! Exit the loop in case the pointers i and j crossed.
+                ! Subarray is larger than the threshold for a linear sort.
+                ! Choose median of left, center and right elements as partitioning
+                ! element a. Also rearrange so that (l) <= (l+1) <= (r).
 
-             if(j < i) exit
+                k = (l + r) / 2
+                tmp = arr(k)      ! Swap the elements
+                arr(k) = arr(l + 1)    ! k and l+1.
+                arr(l + 1) = tmp
 
-             ! Swap the element i and j.
+                if (arr(r) < arr(l)) then
+                    tmp = arr(l)             ! Swap the elements
+                    arr(l) = arr(r)             ! r and l.
+                    arr(r) = tmp
+                end if
 
-             tmp    = arr(i)
-             arr(i) = arr(j)
-             arr(j) = tmp
-          enddo
+                if (arr(r) < arr(l + 1)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(r)           ! r and l+1.
+                    arr(r) = tmp
+                end if
 
-          ! Swap the entries j and l+1. Remember that a equals
-          ! arr(l+1).
+                if (arr(l + 1) < arr(l)) then
+                    tmp = arr(l + 1)         ! Swap the elements
+                    arr(l + 1) = arr(l)           ! l and l+1.
+                    arr(l) = tmp
+                end if
 
-          arr(l+1) = arr(j)
-          arr(j)   = a
+                ! Initialize the pointers for partitioning.
 
-          ! Push pointers to larger subarray on stack,
-          ! process smaller subarray immediately.
+                i = l + 1
+                j = r
+                a = arr(l + 1)
 
-          jStack = jStack + 2
-          if(jStack > nStack) then
+                ! The innermost loop
 
-             ! Storage of the stack is too small. Reallocate.
+                do
 
-             allocate(tmpStack(nStack), stat=ierr)
-             if(ierr /= 0)                    &
-                  call terminate("qsortStrings", &
-                  "Memory allocation error for tmpStack")
-             tmpStack = stack
+                    ! Scan up to find element >= a.
+                    do
+                        i = i + 1
+                        if (a <= arr(i)) exit
+                    end do
 
-             ! Free the memory of stack, store the old value of nStack
-             ! in tmp and increase nStack.
+                    ! Scan down to find element <= a.
+                    do
+                        j = j - 1
+                        if (arr(j) <= a) exit
+                    end do
 
-             deallocate(stack, stat=ierr)
-             if(ierr /= 0)                    &
-                  call terminate("qsortStrings", &
-                  "Deallocation error for stack")
-             ii = nStack
-             nStack = nStack + 100
+                    ! Exit the loop in case the pointers i and j crossed.
 
-             ! Allocate the memory for stack and copy the old values
-             ! from tmpStack.
-
-             allocate(stack(nStack), stat=ierr)
-             if(ierr /= 0)                    &
-                  call terminate("qsortStrings", &
-                  "Memory reallocation error for stack")
-             stack(1:ii) = tmpStack(1:ii)
-
-             ! And finally release the memory of tmpStack.
-
-             deallocate(tmpStack, stat=ierr)
-             if(ierr /= 0)                    &
-                  call terminate("qsortStrings", &
-                  "Deallocation error for tmpStack")
-          endif
-
-          if((r-i+1) >= (j-l)) then
-             stack(jStack)   = r
-             r               = j-1
-             stack(jStack-1) = j
-          else
-             stack(jStack)   = j-1
-             stack(jStack-1) = l
-             l               = j
-          endif
-
-       endif
-    enddo
-
-    ! Release the memory of stack.
-
-    deallocate(stack, stat=ierr)
-    if(ierr /= 0)                    &
-         call terminate("qsortStrings", &
-         "Deallocation error for stack")
-
-    ! Check in debug mode whether the array is really sorted.
-
-    if( debug ) then
-       do i=1,(nn-1)
-          if(arr(i+1) < arr(i))            &
-               call terminate("qsortStrings", &
-               "Array is not sorted correctly")
-       enddo
-    endif
-
-  end subroutine qsortStrings
-
-
-  function bsearchReals(key, base)
-    !
-    !       bsearchReals returns the index in base where key is stored.
-    !       A binary search algorithm is used here, so it is assumed that
-    !       base is sorted in increasing order. In case key appears more
-    !       than once in base, the result is arbitrary. If key is not
-    !       found, a zero is returned.
-    !
-    use precision
-    implicit none
-    !
-    !      Function type
-    !
-    integer(kind=intType) :: bsearchReals
-    !
-    !      Function arguments.
-    !
-    real(kind=realType), intent(in)               :: key
-    real(kind=realType), dimension(:), intent(in) :: base
-    integer(kind=intType)                         :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: ii, pos, start
-    logical               :: entryFound
+                    if (j < i) exit
 
-    ! Initialize some values.
-
-    start       = 1
-    ii          = size(base)
-    entryFound = .false.
-
-    ! Binary search to find key.
-
-    do
-       ! Condition for breaking the loop
-
-       if(ii == 0) exit
-
-       ! Determine the position in the array to compare.
-
-       pos = start + ii/2
-
-       ! In case this is the entry, break the search loop.
-
-       if(base(pos) == key) then
-          entryFound = .true.
-          exit
-       endif
-
-       ! In case the search key is larger than the current position,
-       ! only parts to the right must be searched. Remember that base
-       ! is sorted in increasing order. Nothing needs to be done if the
-       ! key is smaller than the current element.
-
-       if(key > base(pos)) then
-          start = pos +1
-          ii    = ii -1
-       endif
-
-       ! Modify ii for the next branch to search.
-
-       ii = ii/2
-    enddo
-
-    ! Set bsearchReals. This depends whether the key was found.
-
-    if( entryFound ) then
-       bsearchReals = pos
-    else
-       bsearchReals = 0
-    endif
-
-  end function bsearchReals
-
-  function bsearchStrings(key, base)
-    !
-    !       bsearchStrings returns the index in base where key is stored.
-    !       A binary search algorithm is used here, so it is assumed that
-    !       base is sorted in increasing order. In case key appears more
-    !       than once in base, the result is arbitrary. If key is not
-    !       found, a zero is returned.
-    !
-    use precision
-    implicit none
-    !
-    !      Function type
-    !
-    integer(kind=intType) :: bsearchStrings
-    !
-    !      Function arguments.
-    !
-    character(len=*), intent(in)               :: key
-    character(len=*), dimension(:), intent(in) :: base
-    integer(kind=intType)                      :: nn
-    !
-    !      Local variables.
-    !
-    integer(kind=intType) :: ii, pos, start
-    logical               :: entryFound
-
-    ! Initialize some values.
-
-    start       = 1
-    ii          = size(base)
-    entryFound = .false.
-
-    ! Binary search to find key.
-
-    do
-       ! Condition for breaking the loop
-
-       if(ii == 0) exit
-
-       ! Determine the position in the array to compare.
-
-       pos = start + ii/2
-
-       ! In case this is the entry, break the search loop.
-
-       if(base(pos) == key) then
-          entryFound = .true.
-          exit
-       endif
-
-       ! In case the search key is larger than the current position,
-       ! only parts to the right must be searched. Remember that base
-       ! is sorted in increasing order. Nothing needs to be done if the
-       ! key is smaller than the current element.
-
-       if(key > base(pos)) then
-          start = pos +1
-          ii    = ii -1
-       endif
-
-       ! Modify ii for the next branch to search.
-
-       ii = ii/2
-    enddo
-
-    ! Set bsearchStrings. This depends whether the key was found.
-
-    if( entryFound ) then
-       bsearchStrings = pos
-    else
-       bsearchStrings = 0
-    endif
-
-  end function bsearchStrings
-  ! -----------------------------------------------------------------
-  !
-  ! This file contains two functions that are used to find the unique
-  ! values from a list of integers as well as the inverse mapping. These
-  ! routines have been generously borrowed from
-  !
-  ! Michel Olagnon
-  ! http://www.fortran-2000.com/rank/
-  !
-  ! Slight modifications of I_unirnk and I_uniinv for use with ADflow by
-  ! Gaetan Kenway. The 'unique' subroutine was added that combines the
-  ! functionality of both routines in a single call.
-
-  subroutine unique(arr, nn, n_unique, inverse)
-    use precision
-    implicit none
-    ! Input:
-    ! arr: Array of integers to be unique-sorted. Overwirtten on output
-    !      with unique values
-    ! nn : Number of input values
-    ! n_unique: Number of unique output values in arr. Only the first n_unique
-    !           values of arr on output are meaningful
-    ! inverse: size(nn): For each origianl entry in arr, this gives the index
-    !          into to unique, sorted array.
-
-    ! Input Arguments
-    integer(kind=intType), intent(in) :: nn
-    integer(kind=intType), intent(inout), dimension(nn) :: arr
-
-    ! Output Arguments
-    integer(kind=intType), intent(out) :: n_unique
-    integer(kind=intType), intent(out), dimension(nn) :: inverse
-
-    ! Local Arguments
-    integer(kind=intType), dimension(:), allocatable :: temp_arr, irngt
-    integer(kind=intType) :: i
-
-    allocate(temp_arr(nn), irngt(nn))
-    ! Copy arr to temp array:
-    temp_arr(:) = arr(:)
-
-    call i_uniinv(arr, nn, inverse)
-    call i_unirnk(arr, nn, irngt, n_unique)
-
-    ! Since unirank is an arg sort, fill in sorted values into arr_unique
-
-    do i=1,n_unique
-       arr(i) = temp_arr(irngt(i))
-    end do
-
-    ! Fill remaining values of array with zeros since these have no
-    ! meaning
-    do i=n_unique+1,nn
-       arr(i) = 0_intType
-    end do
-    deallocate(temp_arr, irngt)
-  end subroutine unique
-
-
-  Subroutine I_unirnk (XVALT, NVAL, IRNGT, NUNI)
-    use precision
-    implicit none
-
-    ! __________________________________________________________
-    !   UNIRNK = Merge-sort ranking of an array, with removal of
-    !   duplicate entries.
-    !   The routine is similar to pure merge-sort ranking, but on
-    !   the last pass, it discards indices that correspond to
-    !   duplicate entries.
-    !   For performance reasons, the first 2 passes are taken
-    !   out of the standard loop, and use dedicated coding.
-    ! __________________________________________________________
-    ! __________________________________________________________
-    integer(kind=intType), intent(in) :: NVAL
-    integer(kind=intType), Dimension (NVAL), Intent (In) :: XVALT
-
-    integer(kind=intType), Dimension (NVAL), Intent (Out) :: IRNGT
-    integer(kind=intType), Intent (Out) :: NUNI
-    ! __________________________________________________________
-    integer(kind=intType), Dimension (:), allocatable :: JWRKT
-    integer(kind=intType) :: LMTNA, LMTNC, IRNG, IRNG1, IRNG2
-    integer(kind=intType) :: IIND, IWRKD, IWRK, IWRKF, JINDA, IINDA, IINDB
-    integer(kind=intType) :: XTST, XVALA, XVALB
-
-
-    NUNI = NVAL
-    !
-    Select Case (NVAL)
-    Case (:0)
-       Return
-    Case (1)
-       IRNGT (1) = 1
-       Return
-    Case Default
-       Continue
-    End Select
-    allocate(JWRKT(NVAL))
-    !
-    !  Fill-in the index array, creating ordered couples
-    !
-    Do IIND = 2, NVAL, 2
-       If (XVALT(IIND-1) < XVALT(IIND)) Then
-          IRNGT (IIND-1) = IIND - 1
-          IRNGT (IIND) = IIND
-       Else
-          IRNGT (IIND-1) = IIND
-          IRNGT (IIND) = IIND - 1
-       End If
-    End Do
-    If (Modulo(NVAL, 2) /= 0) Then
-       IRNGT (NVAL) = NVAL
-    End If
-    !
-    !  We will now have ordered subsets A - B - A - B - ...
-    !  and merge A and B couples into     C   -   C   - ...
-    !
-    LMTNA = 2
-    LMTNC = 4
-    !
-    !  First iteration. The length of the ordered subsets goes from 2 to 4
-    !
-    Do
-       If (NVAL <= 4) Exit
-       !
-       !   Loop on merges of A and B into C
-       !
-       Do IWRKD = 0, NVAL - 1, 4
-          If ((IWRKD+4) > NVAL) Then
-             If ((IWRKD+2) >= NVAL) Exit
-             !
-             !   1 2 3
-             !
-             If (XVALT(IRNGT(IWRKD+2)) <= XVALT(IRNGT(IWRKD+3))) Exit
-             !
-             !   1 3 2
-             !
-             If (XVALT(IRNGT(IWRKD+1)) <= XVALT(IRNGT(IWRKD+3))) Then
-                IRNG2 = IRNGT (IWRKD+2)
-                IRNGT (IWRKD+2) = IRNGT (IWRKD+3)
-                IRNGT (IWRKD+3) = IRNG2
+                    ! Swap the element i and j.
+
+                    tmp = arr(i)
+                    arr(i) = arr(j)
+                    arr(j) = tmp
+                end do
+
+                ! Swap the entries j and l+1. Remember that a equals
+                ! arr(l+1).
+
+                arr(l + 1) = arr(j)
+                arr(j) = a
+
+                ! Push pointers to larger subarray on stack,
+                ! process smaller subarray immediately.
+
+                jStack = jStack + 2
+                if (jStack > nStack) then
+
+                    ! Storage of the stack is too small. Reallocate.
+
+                    allocate (tmpStack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortStrings", &
+                                       "Memory allocation error for tmpStack")
+                    tmpStack = stack
+
+                    ! Free the memory of stack, store the old value of nStack
+                    ! in tmp and increase nStack.
+
+                    deallocate (stack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortStrings", &
+                                       "Deallocation error for stack")
+                    ii = nStack
+                    nStack = nStack + 100
+
+                    ! Allocate the memory for stack and copy the old values
+                    ! from tmpStack.
+
+                    allocate (stack(nStack), stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortStrings", &
+                                       "Memory reallocation error for stack")
+                    stack(1:ii) = tmpStack(1:ii)
+
+                    ! And finally release the memory of tmpStack.
+
+                    deallocate (tmpStack, stat=ierr)
+                    if (ierr /= 0) &
+                        call terminate("qsortStrings", &
+                                       "Deallocation error for tmpStack")
+                end if
+
+                if ((r - i + 1) >= (j - l)) then
+                    stack(jStack) = r
+                    r = j - 1
+                    stack(jStack - 1) = j
+                else
+                    stack(jStack) = j - 1
+                    stack(jStack - 1) = l
+                    l = j
+                end if
+
+            end if
+        end do
+
+        ! Release the memory of stack.
+
+        deallocate (stack, stat=ierr)
+        if (ierr /= 0) &
+            call terminate("qsortStrings", &
+                           "Deallocation error for stack")
+
+        ! Check in debug mode whether the array is really sorted.
+
+        if (debug) then
+            do i = 1, (nn - 1)
+                if (arr(i + 1) < arr(i)) &
+                    call terminate("qsortStrings", &
+                                   "Array is not sorted correctly")
+            end do
+        end if
+
+    end subroutine qsortStrings
+
+    function bsearchReals(key, base)
+        !
+        !       bsearchReals returns the index in base where key is stored.
+        !       A binary search algorithm is used here, so it is assumed that
+        !       base is sorted in increasing order. In case key appears more
+        !       than once in base, the result is arbitrary. If key is not
+        !       found, a zero is returned.
+        !
+        use precision
+        implicit none
+        !
+        !      Function type
+        !
+        integer(kind=intType) :: bsearchReals
+        !
+        !      Function arguments.
+        !
+        real(kind=realType), intent(in) :: key
+        real(kind=realType), dimension(:), intent(in) :: base
+        integer(kind=intType) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: ii, pos, start
+        logical :: entryFound
+
+        ! Initialize some values.
+
+        start = 1
+        ii = size(base)
+        entryFound = .false.
+
+        ! Binary search to find key.
+
+        do
+            ! Condition for breaking the loop
+
+            if (ii == 0) exit
+
+            ! Determine the position in the array to compare.
+
+            pos = start + ii / 2
+
+            ! In case this is the entry, break the search loop.
+
+            if (base(pos) == key) then
+                entryFound = .true.
+                exit
+            end if
+
+            ! In case the search key is larger than the current position,
+            ! only parts to the right must be searched. Remember that base
+            ! is sorted in increasing order. Nothing needs to be done if the
+            ! key is smaller than the current element.
+
+            if (key > base(pos)) then
+                start = pos + 1
+                ii = ii - 1
+            end if
+
+            ! Modify ii for the next branch to search.
+
+            ii = ii / 2
+        end do
+
+        ! Set bsearchReals. This depends whether the key was found.
+
+        if (entryFound) then
+            bsearchReals = pos
+        else
+            bsearchReals = 0
+        end if
+
+    end function bsearchReals
+
+    function bsearchStrings(key, base)
+        !
+        !       bsearchStrings returns the index in base where key is stored.
+        !       A binary search algorithm is used here, so it is assumed that
+        !       base is sorted in increasing order. In case key appears more
+        !       than once in base, the result is arbitrary. If key is not
+        !       found, a zero is returned.
+        !
+        use precision
+        implicit none
+        !
+        !      Function type
+        !
+        integer(kind=intType) :: bsearchStrings
+        !
+        !      Function arguments.
+        !
+        character(len=*), intent(in) :: key
+        character(len=*), dimension(:), intent(in) :: base
+        integer(kind=intType) :: nn
+        !
+        !      Local variables.
+        !
+        integer(kind=intType) :: ii, pos, start
+        logical :: entryFound
+
+        ! Initialize some values.
+
+        start = 1
+        ii = size(base)
+        entryFound = .false.
+
+        ! Binary search to find key.
+
+        do
+            ! Condition for breaking the loop
+
+            if (ii == 0) exit
+
+            ! Determine the position in the array to compare.
+
+            pos = start + ii / 2
+
+            ! In case this is the entry, break the search loop.
+
+            if (base(pos) == key) then
+                entryFound = .true.
+                exit
+            end if
+
+            ! In case the search key is larger than the current position,
+            ! only parts to the right must be searched. Remember that base
+            ! is sorted in increasing order. Nothing needs to be done if the
+            ! key is smaller than the current element.
+
+            if (key > base(pos)) then
+                start = pos + 1
+                ii = ii - 1
+            end if
+
+            ! Modify ii for the next branch to search.
+
+            ii = ii / 2
+        end do
+
+        ! Set bsearchStrings. This depends whether the key was found.
+
+        if (entryFound) then
+            bsearchStrings = pos
+        else
+            bsearchStrings = 0
+        end if
+
+    end function bsearchStrings
+    ! -----------------------------------------------------------------
+    !
+    ! This file contains two functions that are used to find the unique
+    ! values from a list of integers as well as the inverse mapping. These
+    ! routines have been generously borrowed from
+    !
+    ! Michel Olagnon
+    ! http://www.fortran-2000.com/rank/
+    !
+    ! Slight modifications of I_unirnk and I_uniinv for use with ADflow by
+    ! Gaetan Kenway. The 'unique' subroutine was added that combines the
+    ! functionality of both routines in a single call.
+
+    subroutine unique(arr, nn, n_unique, inverse)
+        use precision
+        implicit none
+        ! Input:
+        ! arr: Array of integers to be unique-sorted. Overwirtten on output
+        !      with unique values
+        ! nn : Number of input values
+        ! n_unique: Number of unique output values in arr. Only the first n_unique
+        !           values of arr on output are meaningful
+        ! inverse: size(nn): For each origianl entry in arr, this gives the index
+        !          into to unique, sorted array.
+
+        ! Input Arguments
+        integer(kind=intType), intent(in) :: nn
+        integer(kind=intType), intent(inout), dimension(nn) :: arr
+
+        ! Output Arguments
+        integer(kind=intType), intent(out) :: n_unique
+        integer(kind=intType), intent(out), dimension(nn) :: inverse
+
+        ! Local Arguments
+        integer(kind=intType), dimension(:), allocatable :: temp_arr, irngt
+        integer(kind=intType) :: i
+
+        allocate (temp_arr(nn), irngt(nn))
+        ! Copy arr to temp array:
+        temp_arr(:) = arr(:)
+
+        call i_uniinv(arr, nn, inverse)
+        call i_unirnk(arr, nn, irngt, n_unique)
+
+        ! Since unirank is an arg sort, fill in sorted values into arr_unique
+
+        do i = 1, n_unique
+            arr(i) = temp_arr(irngt(i))
+        end do
+
+        ! Fill remaining values of array with zeros since these have no
+        ! meaning
+        do i = n_unique + 1, nn
+            arr(i) = 0_intType
+        end do
+        deallocate (temp_arr, irngt)
+    end subroutine unique
+
+    Subroutine I_unirnk(XVALT, NVAL, IRNGT, NUNI)
+        use precision
+        implicit none
+
+        ! __________________________________________________________
+        !   UNIRNK = Merge-sort ranking of an array, with removal of
+        !   duplicate entries.
+        !   The routine is similar to pure merge-sort ranking, but on
+        !   the last pass, it discards indices that correspond to
+        !   duplicate entries.
+        !   For performance reasons, the first 2 passes are taken
+        !   out of the standard loop, and use dedicated coding.
+        ! __________________________________________________________
+        ! __________________________________________________________
+        integer(kind=intType), intent(in) :: NVAL
+        integer(kind=intType), Dimension(NVAL), Intent(In) :: XVALT
+
+        integer(kind=intType), Dimension(NVAL), Intent(Out) :: IRNGT
+        integer(kind=intType), Intent(Out) :: NUNI
+        ! __________________________________________________________
+        integer(kind=intType), Dimension(:), allocatable :: JWRKT
+        integer(kind=intType) :: LMTNA, LMTNC, IRNG, IRNG1, IRNG2
+        integer(kind=intType) :: IIND, IWRKD, IWRK, IWRKF, JINDA, IINDA, IINDB
+        integer(kind=intType) :: XTST, XVALA, XVALB
+
+        NUNI = NVAL
+        !
+        Select Case (NVAL)
+        Case (:0)
+            Return
+        Case (1)
+            IRNGT(1) = 1
+            Return
+        Case Default
+            Continue
+        End Select
+        allocate (JWRKT(NVAL))
+        !
+        !  Fill-in the index array, creating ordered couples
+        !
+        Do IIND = 2, NVAL, 2
+            If (XVALT(IIND - 1) < XVALT(IIND)) Then
+                IRNGT(IIND - 1) = IIND - 1
+                IRNGT(IIND) = IIND
+            Else
+                IRNGT(IIND - 1) = IIND
+                IRNGT(IIND) = IIND - 1
+            End If
+        End Do
+        If (Modulo(NVAL, 2) /= 0) Then
+            IRNGT(NVAL) = NVAL
+        End If
+        !
+        !  We will now have ordered subsets A - B - A - B - ...
+        !  and merge A and B couples into     C   -   C   - ...
+        !
+        LMTNA = 2
+        LMTNC = 4
+        !
+        !  First iteration. The length of the ordered subsets goes from 2 to 4
+        !
+        Do
+            If (NVAL <= 4) Exit
+            !
+            !   Loop on merges of A and B into C
+            !
+            Do IWRKD = 0, NVAL - 1, 4
+                If ((IWRKD + 4) > NVAL) Then
+                    If ((IWRKD + 2) >= NVAL) Exit
+                    !
+                    !   1 2 3
+                    !
+                    If (XVALT(IRNGT(IWRKD + 2)) <= XVALT(IRNGT(IWRKD + 3))) Exit
+                    !
+                    !   1 3 2
+                    !
+                    If (XVALT(IRNGT(IWRKD + 1)) <= XVALT(IRNGT(IWRKD + 3))) Then
+                        IRNG2 = IRNGT(IWRKD + 2)
+                        IRNGT(IWRKD + 2) = IRNGT(IWRKD + 3)
+                        IRNGT(IWRKD + 3) = IRNG2
+                        !
+                        !   3 1 2
+                        !
+                    Else
+                        IRNG1 = IRNGT(IWRKD + 1)
+                        IRNGT(IWRKD + 1) = IRNGT(IWRKD + 3)
+                        IRNGT(IWRKD + 3) = IRNGT(IWRKD + 2)
+                        IRNGT(IWRKD + 2) = IRNG1
+                    End If
+                    Exit
+                End If
                 !
-                !   3 1 2
+                !   1 2 3 4
                 !
-             Else
-                IRNG1 = IRNGT (IWRKD+1)
-                IRNGT (IWRKD+1) = IRNGT (IWRKD+3)
-                IRNGT (IWRKD+3) = IRNGT (IWRKD+2)
-                IRNGT (IWRKD+2) = IRNG1
-             End If
-             Exit
-          End If
-          !
-          !   1 2 3 4
-          !
-          If (XVALT(IRNGT(IWRKD+2)) <= XVALT(IRNGT(IWRKD+3))) Cycle
-          !
-          !   1 3 x x
-          !
-          If (XVALT(IRNGT(IWRKD+1)) <= XVALT(IRNGT(IWRKD+3))) Then
-             IRNG2 = IRNGT (IWRKD+2)
-             IRNGT (IWRKD+2) = IRNGT (IWRKD+3)
-             If (XVALT(IRNG2) <= XVALT(IRNGT(IWRKD+4))) Then
-                !   1 3 2 4
-                IRNGT (IWRKD+3) = IRNG2
-             Else
-                !   1 3 4 2
-                IRNGT (IWRKD+3) = IRNGT (IWRKD+4)
-                IRNGT (IWRKD+4) = IRNG2
-             End If
-             !
-             !   3 x x x
-             !
-          Else
-             IRNG1 = IRNGT (IWRKD+1)
-             IRNG2 = IRNGT (IWRKD+2)
-             IRNGT (IWRKD+1) = IRNGT (IWRKD+3)
-             If (XVALT(IRNG1) <= XVALT(IRNGT(IWRKD+4))) Then
-                IRNGT (IWRKD+2) = IRNG1
-                If (XVALT(IRNG2) <= XVALT(IRNGT(IWRKD+4))) Then
-                   !   3 1 2 4
-                   IRNGT (IWRKD+3) = IRNG2
+                If (XVALT(IRNGT(IWRKD + 2)) <= XVALT(IRNGT(IWRKD + 3))) Cycle
+                !
+                !   1 3 x x
+                !
+                If (XVALT(IRNGT(IWRKD + 1)) <= XVALT(IRNGT(IWRKD + 3))) Then
+                    IRNG2 = IRNGT(IWRKD + 2)
+                    IRNGT(IWRKD + 2) = IRNGT(IWRKD + 3)
+                    If (XVALT(IRNG2) <= XVALT(IRNGT(IWRKD + 4))) Then
+                        !   1 3 2 4
+                        IRNGT(IWRKD + 3) = IRNG2
+                    Else
+                        !   1 3 4 2
+                        IRNGT(IWRKD + 3) = IRNGT(IWRKD + 4)
+                        IRNGT(IWRKD + 4) = IRNG2
+                    End If
+                    !
+                    !   3 x x x
+                    !
                 Else
-                   !   3 1 4 2
-                   IRNGT (IWRKD+3) = IRNGT (IWRKD+4)
-                   IRNGT (IWRKD+4) = IRNG2
+                    IRNG1 = IRNGT(IWRKD + 1)
+                    IRNG2 = IRNGT(IWRKD + 2)
+                    IRNGT(IWRKD + 1) = IRNGT(IWRKD + 3)
+                    If (XVALT(IRNG1) <= XVALT(IRNGT(IWRKD + 4))) Then
+                        IRNGT(IWRKD + 2) = IRNG1
+                        If (XVALT(IRNG2) <= XVALT(IRNGT(IWRKD + 4))) Then
+                            !   3 1 2 4
+                            IRNGT(IWRKD + 3) = IRNG2
+                        Else
+                            !   3 1 4 2
+                            IRNGT(IWRKD + 3) = IRNGT(IWRKD + 4)
+                            IRNGT(IWRKD + 4) = IRNG2
+                        End If
+                    Else
+                        !   3 4 1 2
+                        IRNGT(IWRKD + 2) = IRNGT(IWRKD + 4)
+                        IRNGT(IWRKD + 3) = IRNG1
+                        IRNGT(IWRKD + 4) = IRNG2
+                    End If
                 End If
-             Else
-                !   3 4 1 2
-                IRNGT (IWRKD+2) = IRNGT (IWRKD+4)
-                IRNGT (IWRKD+3) = IRNG1
-                IRNGT (IWRKD+4) = IRNG2
-             End If
-          End If
-       End Do
-       !
-       !  The Cs become As and Bs
-       !
-       LMTNA = 4
-       Exit
-    End Do
-    !
-    !  Iteration loop. Each time, the length of the ordered subsets
-    !  is doubled.
-    !
-    Do
-       If (2*LMTNA >= NVAL) Exit
-       IWRKF = 0
-       LMTNC = 2 * LMTNC
-       !
-       !   Loop on merges of A and B into C
-       !
-       Do
-          IWRK = IWRKF
-          IWRKD = IWRKF + 1
-          JINDA = IWRKF + LMTNA
-          IWRKF = IWRKF + LMTNC
-          If (IWRKF >= NVAL) Then
-             If (JINDA >= NVAL) Exit
-             IWRKF = NVAL
-          End If
-          IINDA = 1
-          IINDB = JINDA + 1
-          !
-          !  One steps in the C subset, that we create in the final rank array
-          !
-          !  Make a copy of the rank array for the iteration
-          !
-          JWRKT (1:LMTNA) = IRNGT (IWRKD:JINDA)
-          XVALA = XVALT (JWRKT(IINDA))
-          XVALB = XVALT (IRNGT(IINDB))
-          !
-          Do
-             IWRK = IWRK + 1
-             !
-             !  We still have unprocessed values in both A and B
-             !
-             If (XVALA > XVALB) Then
-                IRNGT (IWRK) = IRNGT (IINDB)
-                IINDB = IINDB + 1
-                If (IINDB > IWRKF) Then
-                   !  Only A still with unprocessed values
-                   IRNGT (IWRK+1:IWRKF) = JWRKT (IINDA:LMTNA)
-                   Exit
+            End Do
+            !
+            !  The Cs become As and Bs
+            !
+            LMTNA = 4
+            Exit
+        End Do
+        !
+        !  Iteration loop. Each time, the length of the ordered subsets
+        !  is doubled.
+        !
+        Do
+            If (2 * LMTNA >= NVAL) Exit
+            IWRKF = 0
+            LMTNC = 2 * LMTNC
+            !
+            !   Loop on merges of A and B into C
+            !
+            Do
+                IWRK = IWRKF
+                IWRKD = IWRKF + 1
+                JINDA = IWRKF + LMTNA
+                IWRKF = IWRKF + LMTNC
+                If (IWRKF >= NVAL) Then
+                    If (JINDA >= NVAL) Exit
+                    IWRKF = NVAL
                 End If
-                XVALB = XVALT (IRNGT(IINDB))
-             Else
-                IRNGT (IWRK) = JWRKT (IINDA)
-                IINDA = IINDA + 1
-                If (IINDA > LMTNA) Exit! Only B still with unprocessed values
-                XVALA = XVALT (JWRKT(IINDA))
-             End If
-             !
-          End Do
-       End Do
-       !
-       !  The Cs become As and Bs
-       !
-       LMTNA = 2 * LMTNA
-    End Do
-    !
-    !   Last merge of A and B into C, with removal of duplicates.
-    !
-    IINDA = 1
-    IINDB = LMTNA + 1
-    NUNI = 0
-    !
-    !  One steps in the C subset, that we create in the final rank array
-    !
-    JWRKT (1:LMTNA) = IRNGT (1:LMTNA)
-    If (IINDB <= NVAL) Then
-       XTST = I_NEARLESS (Min(XVALT(JWRKT(1)), XVALT(IRNGT(IINDB))))
-    Else
-       XTST = I_NEARLESS (XVALT(JWRKT(1)))
-    Endif
-    Do IWRK = 1, NVAL
-       !
-       !  We still have unprocessed values in both A and B
-       !
-       If (IINDA <= LMTNA) Then
-          If (IINDB <= NVAL) Then
-             If (XVALT(JWRKT(IINDA)) > XVALT(IRNGT(IINDB))) Then
-                IRNG = IRNGT (IINDB)
-                IINDB = IINDB + 1
-             Else
-                IRNG = JWRKT (IINDA)
-                IINDA = IINDA + 1
-             End If
-          Else
-             !
-             !  Only A still with unprocessed values
-             !
-             IRNG = JWRKT (IINDA)
-             IINDA = IINDA + 1
-          End If
-       Else
-          !
-          !  Only B still with unprocessed values
-          !
-          IRNG = IRNGT (IWRK)
-       End If
-       If (XVALT(IRNG) > XTST) Then
-          XTST = XVALT (IRNG)
-          NUNI = NUNI + 1
-          IRNGT (NUNI) = IRNG
-       End If
-       !
-    End Do
-    deallocate(JWRKT)
-    Return
-    !
-  End Subroutine I_unirnk
-
-  Subroutine I_uniinv (XDONT, NVAL, IGOEST)
-    use precision
-    implicit none
-    ! __________________________________________________________
-    !   UNIINV = Merge-sort inverse ranking of an array, with removal of
-    !   duplicate entries.
-    !   The routine is similar to pure merge-sort ranking, but on
-    !   the last pass, it sets indices in IGOEST to the rank
-    !   of the value in the ordered set with duplicates removed.
-    !   For performance reasons, the first 2 passes are taken
-    !   out of the standard loop, and use dedicated coding.
-    ! __________________________________________________________
-    ! __________________________________________________________
-    Integer(kind=IntType), intent(in) :: NVAL
-    Integer(kind=IntType), Dimension (NVAL), Intent (In)  :: XDONT
-    Integer(kind=IntType), Dimension (NVAL), Intent (Out) :: IGOEST
-    ! __________________________________________________________
-    Integer(kind=IntType) :: XTST, XDONA, XDONB
-    !
-    ! __________________________________________________________
-    Integer(kind=IntType), allocatable, Dimension (:) :: JWRKT, IRNGT
-    Integer(kind=IntType) :: LMTNA, LMTNC, IRNG, IRNG1, IRNG2, NUNI
-    Integer(kind=IntType) :: IIND, IWRKD, IWRK, IWRKF, JINDA, IINDA, IINDB
-
-    !
-    Select Case (NVAL)
-    Case (:0)
-       Return
-    Case (1)
-       IGOEST (1) = 1
-       Return
-    Case Default
-       Continue
-    End Select
-    allocate(JWRKT(NVAL), IRNGT(NVAL))
-    !
-    !  Fill-in the index array, creating ordered couples
-    !
-    Do IIND = 2, NVAL, 2
-       If (XDONT(IIND-1) < XDONT(IIND)) Then
-          IRNGT (IIND-1) = IIND - 1
-          IRNGT (IIND) = IIND
-       Else
-          IRNGT (IIND-1) = IIND
-          IRNGT (IIND) = IIND - 1
-       End If
-    End Do
-    If (Modulo (NVAL, 2) /= 0) Then
-       IRNGT (NVAL) = NVAL
-    End If
-    !
-    !  We will now have ordered subsets A - B - A - B - ...
-    !  and merge A and B couples into     C   -   C   - ...
-    !
-    LMTNA = 2
-    LMTNC = 4
-    !
-    !  First iteration. The length of the ordered subsets goes from 2 to 4
-    !
-    Do
-       If (NVAL <= 4) Exit
-       !
-       !   Loop on merges of A and B into C
-       !
-       Do IWRKD = 0, NVAL - 1, 4
-          If ((IWRKD+4) > NVAL) Then
-             If ((IWRKD+2) >= NVAL) Exit
-             !
-             !   1 2 3
-             !
-             If (XDONT(IRNGT(IWRKD+2)) <= XDONT(IRNGT(IWRKD+3))) Exit
-             !
-             !   1 3 2
-             !
-             If (XDONT(IRNGT(IWRKD+1)) <= XDONT(IRNGT(IWRKD+3))) Then
-                IRNG2 = IRNGT (IWRKD+2)
-                IRNGT (IWRKD+2) = IRNGT (IWRKD+3)
-                IRNGT (IWRKD+3) = IRNG2
+                IINDA = 1
+                IINDB = JINDA + 1
+                !
+                !  One steps in the C subset, that we create in the final rank array
+                !
+                !  Make a copy of the rank array for the iteration
                 !
-                !   3 1 2
+                JWRKT(1:LMTNA) = IRNGT(IWRKD:JINDA)
+                XVALA = XVALT(JWRKT(IINDA))
+                XVALB = XVALT(IRNGT(IINDB))
                 !
-             Else
-                IRNG1 = IRNGT (IWRKD+1)
-                IRNGT (IWRKD+1) = IRNGT (IWRKD+3)
-                IRNGT (IWRKD+3) = IRNGT (IWRKD+2)
-                IRNGT (IWRKD+2) = IRNG1
-             End If
-             Exit
-          End If
-          !
-          !   1 2 3 4
-          !
-          If (XDONT(IRNGT(IWRKD+2)) <= XDONT(IRNGT(IWRKD+3))) Cycle
-          !
-          !   1 3 x x
-          !
-          If (XDONT(IRNGT(IWRKD+1)) <= XDONT(IRNGT(IWRKD+3))) Then
-             IRNG2 = IRNGT (IWRKD+2)
-             IRNGT (IWRKD+2) = IRNGT (IWRKD+3)
-             If (XDONT(IRNG2) <= XDONT(IRNGT(IWRKD+4))) Then
-                !   1 3 2 4
-                IRNGT (IWRKD+3) = IRNG2
-             Else
-                !   1 3 4 2
-                IRNGT (IWRKD+3) = IRNGT (IWRKD+4)
-                IRNGT (IWRKD+4) = IRNG2
-             End If
-             !
-             !   3 x x x
-             !
-          Else
-             IRNG1 = IRNGT (IWRKD+1)
-             IRNG2 = IRNGT (IWRKD+2)
-             IRNGT (IWRKD+1) = IRNGT (IWRKD+3)
-             If (XDONT(IRNG1) <= XDONT(IRNGT(IWRKD+4))) Then
-                IRNGT (IWRKD+2) = IRNG1
-                If (XDONT(IRNG2) <= XDONT(IRNGT(IWRKD+4))) Then
-                   !   3 1 2 4
-                   IRNGT (IWRKD+3) = IRNG2
+                Do
+                    IWRK = IWRK + 1
+                    !
+                    !  We still have unprocessed values in both A and B
+                    !
+                    If (XVALA > XVALB) Then
+                        IRNGT(IWRK) = IRNGT(IINDB)
+                        IINDB = IINDB + 1
+                        If (IINDB > IWRKF) Then
+                            !  Only A still with unprocessed values
+                            IRNGT(IWRK + 1:IWRKF) = JWRKT(IINDA:LMTNA)
+                            Exit
+                        End If
+                        XVALB = XVALT(IRNGT(IINDB))
+                    Else
+                        IRNGT(IWRK) = JWRKT(IINDA)
+                        IINDA = IINDA + 1
+                        If (IINDA > LMTNA) Exit! Only B still with unprocessed values
+                        XVALA = XVALT(JWRKT(IINDA))
+                    End If
+                    !
+                End Do
+            End Do
+            !
+            !  The Cs become As and Bs
+            !
+            LMTNA = 2 * LMTNA
+        End Do
+        !
+        !   Last merge of A and B into C, with removal of duplicates.
+        !
+        IINDA = 1
+        IINDB = LMTNA + 1
+        NUNI = 0
+        !
+        !  One steps in the C subset, that we create in the final rank array
+        !
+        JWRKT(1:LMTNA) = IRNGT(1:LMTNA)
+        If (IINDB <= NVAL) Then
+            XTST = I_NEARLESS(Min(XVALT(JWRKT(1)), XVALT(IRNGT(IINDB))))
+        Else
+            XTST = I_NEARLESS(XVALT(JWRKT(1)))
+        End if
+        Do IWRK = 1, NVAL
+            !
+            !  We still have unprocessed values in both A and B
+            !
+            If (IINDA <= LMTNA) Then
+                If (IINDB <= NVAL) Then
+                    If (XVALT(JWRKT(IINDA)) > XVALT(IRNGT(IINDB))) Then
+                        IRNG = IRNGT(IINDB)
+                        IINDB = IINDB + 1
+                    Else
+                        IRNG = JWRKT(IINDA)
+                        IINDA = IINDA + 1
+                    End If
                 Else
-                   !   3 1 4 2
-                   IRNGT (IWRKD+3) = IRNGT (IWRKD+4)
-                   IRNGT (IWRKD+4) = IRNG2
+                    !
+                    !  Only A still with unprocessed values
+                    !
+                    IRNG = JWRKT(IINDA)
+                    IINDA = IINDA + 1
                 End If
-             Else
-                !   3 4 1 2
-                IRNGT (IWRKD+2) = IRNGT (IWRKD+4)
-                IRNGT (IWRKD+3) = IRNG1
-                IRNGT (IWRKD+4) = IRNG2
-             End If
-          End If
-       End Do
-       !
-       !  The Cs become As and Bs
-       !
-       LMTNA = 4
-       Exit
-    End Do
-    !
-    !  Iteration loop. Each time, the length of the ordered subsets
-    !  is doubled.
-    !
-    Do
-       If (2*LMTNA >= NVAL) Exit
-       IWRKF = 0
-       LMTNC = 2 * LMTNC
-       !
-       !   Loop on merges of A and B into C
-       !
-       Do
-          IWRK = IWRKF
-          IWRKD = IWRKF + 1
-          JINDA = IWRKF + LMTNA
-          IWRKF = IWRKF + LMTNC
-          If (IWRKF >= NVAL) Then
-             If (JINDA >= NVAL) Exit
-             IWRKF = NVAL
-          End If
-          IINDA = 1
-          IINDB = JINDA + 1
-          !
-          !  One steps in the C subset, that we create in the final rank array
-          !
-          !  Make a copy of the rank array for the iteration
-          !
-          JWRKT (1:LMTNA) = IRNGT (IWRKD:JINDA)
-          XDONA = XDONT (JWRKT(IINDA))
-          XDONB = XDONT (IRNGT(IINDB))
-          !
-          Do
-             IWRK = IWRK + 1
-             !
-             !  We still have unprocessed values in both A and B
-             !
-             If (XDONA > XDONB) Then
-                IRNGT (IWRK) = IRNGT (IINDB)
-                IINDB = IINDB + 1
-                If (IINDB > IWRKF) Then
-                   !  Only A still with unprocessed values
-                   IRNGT (IWRK+1:IWRKF) = JWRKT (IINDA:LMTNA)
-                   Exit
+            Else
+                !
+                !  Only B still with unprocessed values
+                !
+                IRNG = IRNGT(IWRK)
+            End If
+            If (XVALT(IRNG) > XTST) Then
+                XTST = XVALT(IRNG)
+                NUNI = NUNI + 1
+                IRNGT(NUNI) = IRNG
+            End If
+            !
+        End Do
+        deallocate (JWRKT)
+        Return
+        !
+    End Subroutine I_unirnk
+
+    Subroutine I_uniinv(XDONT, NVAL, IGOEST)
+        use precision
+        implicit none
+        ! __________________________________________________________
+        !   UNIINV = Merge-sort inverse ranking of an array, with removal of
+        !   duplicate entries.
+        !   The routine is similar to pure merge-sort ranking, but on
+        !   the last pass, it sets indices in IGOEST to the rank
+        !   of the value in the ordered set with duplicates removed.
+        !   For performance reasons, the first 2 passes are taken
+        !   out of the standard loop, and use dedicated coding.
+        ! __________________________________________________________
+        ! __________________________________________________________
+        Integer(kind=IntType), intent(in) :: NVAL
+        Integer(kind=IntType), Dimension(NVAL), Intent(In) :: XDONT
+        Integer(kind=IntType), Dimension(NVAL), Intent(Out) :: IGOEST
+        ! __________________________________________________________
+        Integer(kind=IntType) :: XTST, XDONA, XDONB
+        !
+        ! __________________________________________________________
+        Integer(kind=IntType), allocatable, Dimension(:) :: JWRKT, IRNGT
+        Integer(kind=IntType) :: LMTNA, LMTNC, IRNG, IRNG1, IRNG2, NUNI
+        Integer(kind=IntType) :: IIND, IWRKD, IWRK, IWRKF, JINDA, IINDA, IINDB
+
+        !
+        Select Case (NVAL)
+        Case (:0)
+            Return
+        Case (1)
+            IGOEST(1) = 1
+            Return
+        Case Default
+            Continue
+        End Select
+        allocate (JWRKT(NVAL), IRNGT(NVAL))
+        !
+        !  Fill-in the index array, creating ordered couples
+        !
+        Do IIND = 2, NVAL, 2
+            If (XDONT(IIND - 1) < XDONT(IIND)) Then
+                IRNGT(IIND - 1) = IIND - 1
+                IRNGT(IIND) = IIND
+            Else
+                IRNGT(IIND - 1) = IIND
+                IRNGT(IIND) = IIND - 1
+            End If
+        End Do
+        If (Modulo(NVAL, 2) /= 0) Then
+            IRNGT(NVAL) = NVAL
+        End If
+        !
+        !  We will now have ordered subsets A - B - A - B - ...
+        !  and merge A and B couples into     C   -   C   - ...
+        !
+        LMTNA = 2
+        LMTNC = 4
+        !
+        !  First iteration. The length of the ordered subsets goes from 2 to 4
+        !
+        Do
+            If (NVAL <= 4) Exit
+            !
+            !   Loop on merges of A and B into C
+            !
+            Do IWRKD = 0, NVAL - 1, 4
+                If ((IWRKD + 4) > NVAL) Then
+                    If ((IWRKD + 2) >= NVAL) Exit
+                    !
+                    !   1 2 3
+                    !
+                    If (XDONT(IRNGT(IWRKD + 2)) <= XDONT(IRNGT(IWRKD + 3))) Exit
+                    !
+                    !   1 3 2
+                    !
+                    If (XDONT(IRNGT(IWRKD + 1)) <= XDONT(IRNGT(IWRKD + 3))) Then
+                        IRNG2 = IRNGT(IWRKD + 2)
+                        IRNGT(IWRKD + 2) = IRNGT(IWRKD + 3)
+                        IRNGT(IWRKD + 3) = IRNG2
+                        !
+                        !   3 1 2
+                        !
+                    Else
+                        IRNG1 = IRNGT(IWRKD + 1)
+                        IRNGT(IWRKD + 1) = IRNGT(IWRKD + 3)
+                        IRNGT(IWRKD + 3) = IRNGT(IWRKD + 2)
+                        IRNGT(IWRKD + 2) = IRNG1
+                    End If
+                    Exit
                 End If
-                XDONB = XDONT (IRNGT(IINDB))
-             Else
-                IRNGT (IWRK) = JWRKT (IINDA)
-                IINDA = IINDA + 1
-                If (IINDA > LMTNA) Exit! Only B still with unprocessed values
-                XDONA = XDONT (JWRKT(IINDA))
-             End If
-             !
-          End Do
-       End Do
-       !
-       !  The Cs become As and Bs
-       !
-       LMTNA = 2 * LMTNA
-    End Do
-    !
-    !   Last merge of A and B into C, with removal of duplicates.
-    !
-    IINDA = 1
-    IINDB = LMTNA + 1
-    NUNI = 0
-    !
-    !  One steps in the C subset, that we create in the final rank array
-    !
-    JWRKT (1:LMTNA) = IRNGT (1:LMTNA)
-    If (IINDB <= NVAL) Then
-       XTST = I_NEARLESS (Min(XDONT(JWRKT(1)), XDONT(IRNGT(IINDB))))
-    Else
-       XTST = I_NEARLESS (XDONT(JWRKT(1)))
-    Endif
-    Do IWRK = 1, NVAL
-       !
-       !  We still have unprocessed values in both A and B
-       !
-       If (IINDA <= LMTNA) Then
-          If (IINDB <= NVAL) Then
-             If (XDONT(JWRKT(IINDA)) > XDONT(IRNGT(IINDB))) Then
-                IRNG = IRNGT (IINDB)
-                IINDB = IINDB + 1
-             Else
-                IRNG = JWRKT (IINDA)
-                IINDA = IINDA + 1
-             End If
-          Else
-             !
-             !  Only A still with unprocessed values
-             !
-             IRNG = JWRKT (IINDA)
-             IINDA = IINDA + 1
-          End If
-       Else
-          !
-          !  Only B still with unprocessed values
-          !
-          IRNG = IRNGT (IWRK)
-       End If
-       If (XDONT(IRNG) > XTST) Then
-          XTST = XDONT (IRNG)
-          NUNI = NUNI + 1
-       End If
-       IGOEST (IRNG) = NUNI
-       !
-    End Do
-    deallocate(JWRKT, IRNGT)
-    Return
-    !
-  End Subroutine I_uniinv
-
-  Function I_nearless (XVAL) result (I_nl)
-    use precision
-    implicit none
-    !  Nearest value less than given value
-    ! __________________________________________________________
-    Integer(kind=intType), Intent (In) :: XVAL
-    Integer(kind=intType) :: I_nl
-    ! __________________________________________________________
-    I_nl = XVAL - 1
-    return
-    !
-  End Function I_nearless
+                !
+                !   1 2 3 4
+                !
+                If (XDONT(IRNGT(IWRKD + 2)) <= XDONT(IRNGT(IWRKD + 3))) Cycle
+                !
+                !   1 3 x x
+                !
+                If (XDONT(IRNGT(IWRKD + 1)) <= XDONT(IRNGT(IWRKD + 3))) Then
+                    IRNG2 = IRNGT(IWRKD + 2)
+                    IRNGT(IWRKD + 2) = IRNGT(IWRKD + 3)
+                    If (XDONT(IRNG2) <= XDONT(IRNGT(IWRKD + 4))) Then
+                        !   1 3 2 4
+                        IRNGT(IWRKD + 3) = IRNG2
+                    Else
+                        !   1 3 4 2
+                        IRNGT(IWRKD + 3) = IRNGT(IWRKD + 4)
+                        IRNGT(IWRKD + 4) = IRNG2
+                    End If
+                    !
+                    !   3 x x x
+                    !
+                Else
+                    IRNG1 = IRNGT(IWRKD + 1)
+                    IRNG2 = IRNGT(IWRKD + 2)
+                    IRNGT(IWRKD + 1) = IRNGT(IWRKD + 3)
+                    If (XDONT(IRNG1) <= XDONT(IRNGT(IWRKD + 4))) Then
+                        IRNGT(IWRKD + 2) = IRNG1
+                        If (XDONT(IRNG2) <= XDONT(IRNGT(IWRKD + 4))) Then
+                            !   3 1 2 4
+                            IRNGT(IWRKD + 3) = IRNG2
+                        Else
+                            !   3 1 4 2
+                            IRNGT(IWRKD + 3) = IRNGT(IWRKD + 4)
+                            IRNGT(IWRKD + 4) = IRNG2
+                        End If
+                    Else
+                        !   3 4 1 2
+                        IRNGT(IWRKD + 2) = IRNGT(IWRKD + 4)
+                        IRNGT(IWRKD + 3) = IRNG1
+                        IRNGT(IWRKD + 4) = IRNG2
+                    End If
+                End If
+            End Do
+            !
+            !  The Cs become As and Bs
+            !
+            LMTNA = 4
+            Exit
+        End Do
+        !
+        !  Iteration loop. Each time, the length of the ordered subsets
+        !  is doubled.
+        !
+        Do
+            If (2 * LMTNA >= NVAL) Exit
+            IWRKF = 0
+            LMTNC = 2 * LMTNC
+            !
+            !   Loop on merges of A and B into C
+            !
+            Do
+                IWRK = IWRKF
+                IWRKD = IWRKF + 1
+                JINDA = IWRKF + LMTNA
+                IWRKF = IWRKF + LMTNC
+                If (IWRKF >= NVAL) Then
+                    If (JINDA >= NVAL) Exit
+                    IWRKF = NVAL
+                End If
+                IINDA = 1
+                IINDB = JINDA + 1
+                !
+                !  One steps in the C subset, that we create in the final rank array
+                !
+                !  Make a copy of the rank array for the iteration
+                !
+                JWRKT(1:LMTNA) = IRNGT(IWRKD:JINDA)
+                XDONA = XDONT(JWRKT(IINDA))
+                XDONB = XDONT(IRNGT(IINDB))
+                !
+                Do
+                    IWRK = IWRK + 1
+                    !
+                    !  We still have unprocessed values in both A and B
+                    !
+                    If (XDONA > XDONB) Then
+                        IRNGT(IWRK) = IRNGT(IINDB)
+                        IINDB = IINDB + 1
+                        If (IINDB > IWRKF) Then
+                            !  Only A still with unprocessed values
+                            IRNGT(IWRK + 1:IWRKF) = JWRKT(IINDA:LMTNA)
+                            Exit
+                        End If
+                        XDONB = XDONT(IRNGT(IINDB))
+                    Else
+                        IRNGT(IWRK) = JWRKT(IINDA)
+                        IINDA = IINDA + 1
+                        If (IINDA > LMTNA) Exit! Only B still with unprocessed values
+                        XDONA = XDONT(JWRKT(IINDA))
+                    End If
+                    !
+                End Do
+            End Do
+            !
+            !  The Cs become As and Bs
+            !
+            LMTNA = 2 * LMTNA
+        End Do
+        !
+        !   Last merge of A and B into C, with removal of duplicates.
+        !
+        IINDA = 1
+        IINDB = LMTNA + 1
+        NUNI = 0
+        !
+        !  One steps in the C subset, that we create in the final rank array
+        !
+        JWRKT(1:LMTNA) = IRNGT(1:LMTNA)
+        If (IINDB <= NVAL) Then
+            XTST = I_NEARLESS(Min(XDONT(JWRKT(1)), XDONT(IRNGT(IINDB))))
+        Else
+            XTST = I_NEARLESS(XDONT(JWRKT(1)))
+        End if
+        Do IWRK = 1, NVAL
+            !
+            !  We still have unprocessed values in both A and B
+            !
+            If (IINDA <= LMTNA) Then
+                If (IINDB <= NVAL) Then
+                    If (XDONT(JWRKT(IINDA)) > XDONT(IRNGT(IINDB))) Then
+                        IRNG = IRNGT(IINDB)
+                        IINDB = IINDB + 1
+                    Else
+                        IRNG = JWRKT(IINDA)
+                        IINDA = IINDA + 1
+                    End If
+                Else
+                    !
+                    !  Only A still with unprocessed values
+                    !
+                    IRNG = JWRKT(IINDA)
+                    IINDA = IINDA + 1
+                End If
+            Else
+                !
+                !  Only B still with unprocessed values
+                !
+                IRNG = IRNGT(IWRK)
+            End If
+            If (XDONT(IRNG) > XTST) Then
+                XTST = XDONT(IRNG)
+                NUNI = NUNI + 1
+            End If
+            IGOEST(IRNG) = NUNI
+            !
+        End Do
+        deallocate (JWRKT, IRNGT)
+        Return
+        !
+    End Subroutine I_uniinv
+
+    Function I_nearless(XVAL) result(I_nl)
+        use precision
+        implicit none
+        !  Nearest value less than given value
+        ! __________________________________________________________
+        Integer(kind=intType), Intent(In) :: XVAL
+        Integer(kind=intType) :: I_nl
+        ! __________________________________________________________
+        I_nl = XVAL - 1
+        return
+        !
+    End Function I_nearless
 #endif
 end module sorting
diff --git a/src/utils/surfaceUtils.F90 b/src/utils/surfaceUtils.F90
index 28e23bfb4..2e377afd6 100644
--- a/src/utils/surfaceUtils.F90
+++ b/src/utils/surfaceUtils.F90
@@ -2,665 +2,665 @@ module surfaceUtils
 
 contains
 
-  subroutine getSurfaceSize(sizeNode, sizeCell, famList, n, includeZipper)
-    ! Compute the number of points that will be returned from getForces
-    ! or getForcePoints
-    use constants
-    use blockPointers, only : BCData, nDom, nBocos
-    use utils, only : setPointers
-    use sorting, only : famInList
-    use surfaceFamilies, only  : BCFamGroups
-    use oversetData, only : zipperMeshes, zipperMesh, oversetPresent
-
-    implicit none
-
-    integer(kind=intType),intent(out) :: sizeNode, sizeCell
-    logical, intent(in) :: includeZipper
-    integer(kind=intType) :: nn, mm, i, j,iimax, shp(1)
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iBCGroup
-    integer(kind=intType), intent(in) :: n, famList(n)
-    type(zipperMesh), pointer :: zipper
-    logical :: BCGroupNeeded
-    sizeNode = 0_intType
-    sizeCell = 0_intType
-
-    domains: do nn=1,nDom
-       call setPointers(nn,1_intType,1_intType)
-       bocos: do mm=1,nBocos
-          ! Check if this surface should be included or not:
-          famInclude: if (famInList(BCData(mm)%famID, famList)) then
-
-             jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-             sizeNode = sizeNode + (iEnd - iBeg + 1)*(jEnd - jBeg + 1)
-
-             ! If we don't care about blanking, it's easy:
-             blanking:if (.not. includeZipper) then
-                sizeCell = sizeCell + (iEnd - iBeg)*(jEnd - jBeg)
-             else
-                ! Otherwise we have to consider the iBlank
-                do j=jBeg+1, jEnd
-                   do i=iBeg+1, iEnd
-                      if (BCData(mm)%iBlank(i,j) == 1) then
-                         sizeCell = sizeCell + 1
-                      end if
-                   end do
-                end do
-             end if blanking
-          end if famInclude
-       end do bocos
-    end do domains
-
-    ! We know must consider additional nodes that are required by the
-    ! zipper mesh triangles on the root proc.
-
-    ! No overset or we don't want to include the zipper, return immediately
-    if (.not. oversetPresent .or. .not. includeZipper) then
-       return
-    end if
-
-    ! If there are zipper meshes, we must include the nodes that the
-    ! zipper triangles will use.
-    do iBCGroup=1, nfamExchange
-       BCGroupNeeded = .False.
-       BCGroupFamLoop: do j=1, size(BCFamGroups(iBCGroup)%famList)
-          if (famInList(BCFamGroups(iBCGroup)%famList(j), famList)) then
-             BCGroupNeeded = .True.
-             exit BCGroupFamLoop
-          end if
-       end do BCGroupFamLoop
-
-       if (.not. BCGroupNeeded) then
-          cycle
-       end if
-
-       ! Pointer for easier reading.
-       zipper => zipperMeshes(iBCGroup)
-
-       ! If we don't have a zipper for this BCGroup, just keep going.
-       if (.not. zipper%allocated) then
-          cycle
-       end if
-
-       ! Include the total extra number of nodes. Not necessairly all
-       ! nodes are needed, but they will be returned anyway.
-       sizeNode = sizeNode + size(zipper%indices)
-
-       ! Include the extra number of cells. Not necessairly all cells
-       ! are needed, but here we have to check indvidually.
-
-       do i=1,size(zipper%fam)
-          if (famInList(zipper%fam(i), famList)) then
-             sizeCell = sizeCell + 1
-          end if
-       end do
-    end do
-
-  end subroutine getSurfaceSize
-
-  subroutine getSurfaceConnectivity(conn, cgnsBlockID, ncell, famList, nFamList, includeZipper)
-    ! Return the connectivity list for the each of the patches
-    ! cgnsBlockID is the domain number of the CGNS file that each face patch corresponds to.
-    ! the zipper mesh patches will have cgnsBlockID = -1
-    use constants
-    use blockPointers, only : nDom, nBocos, BCData, BCFaceID, rightHanded, nbkGlobal
-    use utils, only : setPointers
-    use sorting, only : famInList
-    use surfaceFamilies, only : BCFamGroups
-    use oversetData, only : zipperMeshes, zipperMesh, oversetPresent
-
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: ncell
-    integer(kind=intType), intent(inout) :: conn(4*ncell), cgnsBlockID(ncell)
-    integer(kind=intType), intent(in) :: nFamList, famList(nFamList)
-    logical, intent(in) :: includeZipper
-
-    ! Working
-    integer(kind=intType) :: nn, mm, cellCount, nodeCount, ni, nj, i, j
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iBCGroup
-    logical regularOrdering, BCGroupNeeded
-    type(zipperMesh), pointer :: zipper
-    cellCount = 0
-    nodeCount = 0
-
-    ! Initialize all cgns IDs to -1. This will take care of the zipper meshes IDs, which should
-    ! be -1 since they do not belong to the CGNS file.
-    cgnsBlockID = -1
-
-    domains: do nn=1,nDom
-       call setPointers(nn, 1_intType, 1_intType)
-       bocos: do mm=1,nBocos
-          famInclude: if (famInList(BCData(mm)%famID, famList)) then
-
-             jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-
-             ni = iEnd - iBeg + 1
-             nj = jEnd - jBeg + 1
-
-             ! We want to ensure that all the normals of the faces are
-             ! consistent. To ensure this, we enforce that all normals
-             ! are "into" the domain. Therefore we must treat difference
-             ! faces of a block differently. For example for an iLow
-             ! face, when looping over j-k in the regular way, results
-             ! in in a domain inward pointing normal for iLow but
-             ! outward pointing normal for iHigh. The same is true for
-             ! kMin and kMax. However, it is reverse for the J-faces:
-             ! This is becuase the way the pointers are extracted i then
-             ! k is the reverse of what "should" be for consistency. The
-             ! other two, the pointers are cyclic consistent: i,j->k,
-             ! j,k (wrap) ->i, but for the j-direction is is i,k->j when
-             ! to be consistent with the others it should be
-             ! k,i->j. Hope that made sense.
-
-             select case(BCFaceID(mm))
-             case(iMin, jMax, kMin)
-                regularOrdering = .True.
-             case default
-                regularOrdering = .False.
-             end select
-
-             ! Now this can be reversed *again* if we have a block that
-             ! is left handed.
-             if (.not. rightHanded) then
-                regularOrdering = .not. (regularOrdering)
-             end if
-
-             if (regularOrdering) then
-                ! Do regular ordering.
-
-                ! Loop over generic face size...Note we are doing zero
-                ! based ordering!
-
-                ! This cartoon of a generic cell might help:
-                !
-                ! i, j+1 +-----+ i+1, j+1
-                !   n4   |     | n3
-                !        +-----+
-                !       i,j    i+1, j
-                !       n1     n2
-                !
-
-                do j=0,nj-2
-                   do i=0,ni-2
-                      if (.not. includeZipper .or. BCData(mm)%iBlank(i+iBeg+1, j+jBeg+1) ==1 ) then
-                         conn(4*cellCount+1) = nodeCount + (j  )*ni + i       + 1! n1
-                         conn(4*cellCount+2) = nodeCount + (j  )*ni + i + 1   + 1! n2
-                         conn(4*cellCount+3) = nodeCount + (j+1)*ni + i + 1   + 1! n3
-                         conn(4*cellCount+4) = nodeCount + (j+1)*ni + i       + 1! n4
-
-                         ! Assign the corresponding block IDs.
-                         ! Remember that nbkGlobal is the CGNS ID of the current domain,
-                         ! and this is set when we call setPointers.
-                         cgnsBlockID(cellCount+1) = nbkGlobal
-
-                         cellCount = cellCount + 1
-                      end if
-                   end do
-                end do
-             else
-                ! Do reverse ordering:
-                do j=0,nj-2
-                   do i=0,ni-2
-                      if (.not. includeZipper .or. BCData(mm)%iBlank(i+iBeg+1, j+JBeg+1) ==1 ) then
-                         conn(4*cellCount+1) = nodeCount + (j  )*ni + i        + 1! n1
-                         conn(4*cellCount+2) = nodeCount + (j+1)*ni + i        + 1! n4
-                         conn(4*cellCount+3) = nodeCount + (j+1)*ni + i + 1    + 1! n3
-                         conn(4*cellCount+4) = nodeCount + (j  )*ni + i + 1    + 1! n2
-
-                         ! Assign the corresponding block IDs.
-                         ! Remember that nbkGlobal is the CGNS ID of the current domain,
-                         ! and this is set when we call setPointers.
-                         cgnsBlockID(cellCount+1) = nbkGlobal
-
-                         cellCount = cellCount + 1
-                      end if
-                   end do
-                end do
-             end if
-             nodeCount = nodeCount + ni*nj
-          end if famInclude
-       end do bocos
-    end do domains
-
-    ! We know must consider additional connectivity required by the
-    ! zipper mesh triangles on the root proc
-
-    ! No overset or don't want zipper return immediately
-    if (.not. oversetPresent .or. .not. includeZipper) then
-       return
-    end if
-
-    ! If there are zipper meshes, we must include the nodes that the
-    ! zipper triangles will use.
-    BCGroupLoop: do iBCGroup=1, nFamExchange
-
-       BCGroupNeeded = .False.
-       BCGroupFamLoop: do i=1, size(BCFamGroups(iBCGroup)%famList)
-          if (famInList(BCFamGroups(iBCGroup)%famList(i), famList)) then
-             BCGroupNeeded = .True.
-             exit BCGroupFamLoop
-          end if
-       end do BCGroupFamLoop
-
-       if (.not. BCGroupNeeded) then
-          cycle
-       end if
-
-       ! Pointer for easier reading.
-       zipper => zipperMeshes(iBCGroup)
-
-       ! If the zipper isn't done yet, don't do anything
-       if (.not. zipper%allocated) then
-          cycle
-       end if
-
-       ! Include the extra number of cells. Not necessairly all cells
-       ! are needed, but ehre we have to check indvidually.
-
-       do i=1,size(zipper%fam)
-          if (famInList(zipper%fam(i), famList)) then
-             ! This triangle should be included. Note that we use
-             ! degenerate quads for the triangles.
-             conn(4*cellCount+1) = nodeCount + zipper%conn(1, i)
-             conn(4*cellCount+2) = nodeCount + zipper%conn(2, i)
-             conn(4*cellCount+3) = nodeCount + zipper%conn(3, i)
-             conn(4*cellCount+4) = nodeCount + zipper%conn(3, i)
-             cellCount = cellCount + 1
-          end if
-       end do
-    end do BCGroupLoop
-
-  end subroutine getSurfaceConnectivity
-
-  subroutine getSurfaceFamily(elemFam, ncell, famList, nFamList, includeZipper)
-
-    use constants
-    use blockPointers, only : nDom, nBocos, BCData
-    use utils, only : setPointers
-    use sorting, only : famInList
-    use surfaceFamilies, only : BCFamGroups
-    use oversetData, only : zipperMeshes, zipperMesh, oversetPresent
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType), intent(in) :: ncell
-    integer(kind=intType), intent(inout) :: elemFam(nCell)
-    integer(kind=intType), intent(in) :: nFamList, famList(nFamList)
-    logical, intent(in) :: includeZipper
-
-    ! Working
-    integer(kind=intType) :: nn, mm, cellCount, nodeCount, ni, nj, i, j
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iBCGroup
-    logical BCGroupNeeded
-    type(zipperMesh), pointer :: zipper
-
-    cellCount = 0
-    nodeCount = 0
-
-    domains: do nn=1,nDom
-       call setPointers(nn, 1_intType, 1_intType)
-       bocos: do mm=1,nBocos
-          famInclude: if (famInList(BCData(mm)%famID, famList)) then
-
-             jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-
-             ni = iEnd - iBeg + 1
-             nj = jEnd - jBeg + 1
-             do j=0,nj-2
-                do i=0,ni-2
-                   if (.not. includeZipper .or. BCData(mm)%iBlank(i+iBeg+1, j+JBeg+1)==1 ) then
-                      cellCount = cellCount + 1
-                      elemFam(cellCount) = BCdata(mm)%famID
-                   end if
-                end do
-             end do
-             nodeCount = nodeCount + ni*nj
-          end if famInclude
-       end do bocos
-    end do domains
-
-    ! We know must consider additional elements quired by the zipper
-    ! mesh triangles on the root proc
-
-    ! No overset or don't want zipper
-    if (.not. oversetPresent .or. .not. includeZipper) then
-       return
-    end if
-
-    ! If there are zipper meshes, we must include the nodes that the
-    ! zipper triangles will use.
-    BCGroupLoop: do iBCGroup=1, nFamExchange
-
-       BCGroupNeeded = .False.
-       BCGroupFamLoop: do i=1, size(BCFamGroups(iBCGroup)%famList)
-          if (famInList(BCFamGroups(iBCGroup)%famList(i), famList)) then
-             BCGroupNeeded = .True.
-             exit BCGroupFamLoop
-          end if
-       end do BCGroupFamLoop
-
-       if (.not. BCGroupNeeded) then
-          cycle
-       end if
-
-       ! Pointer for easier reading.
-       zipper => zipperMeshes(iBCGroup)
-
-       ! If the zipper isn't done yet, don't do anything
-       if (.not. zipper%allocated) then
-          cycle
-       end if
-
-       ! Include the extra number of cells. Not necessairly all cells
-       ! are needed, but ehre we have to check indvidually.
-
-       do i=1, size(zipper%fam)
-          if (famInList(zipper%fam(i), famList)) then
-             ! This triangle should be included. Note that we use
-             ! degenerate quads for the triangles.
-             cellCount = cellCount + 1
-             elemFam(cellCount) = zipper%fam(i)
-          end if
-       end do
-    end do BCGroupLoop
-  end subroutine getSurfaceFamily
-
-  subroutine getSurfacePoints(points, npts, sps_in, famList, nFamList, includeZipper)
-    use constants
-    use communication, only : myid
-    use blockPointers, only : nDom, BCData, nBocos, x, BCFaceID, il, jl, kl
-    use BCPointers, only : xx
-    use surfaceFamilies, only : BCFamGroups, familyExchange, BCFamExchange
-    use oversetData, only : zipperMeshes, zipperMesh, oversetPresent
-    use sorting, only : famInList
-    use utils, only : setPointers, EChk, setBCPointers
+    subroutine getSurfaceSize(sizeNode, sizeCell, famList, n, includeZipper)
+        ! Compute the number of points that will be returned from getForces
+        ! or getForcePoints
+        use constants
+        use blockPointers, only: BCData, nDom, nBocos
+        use utils, only: setPointers
+        use sorting, only: famInList
+        use surfaceFamilies, only: BCFamGroups
+        use oversetData, only: zipperMeshes, zipperMesh, oversetPresent
+
+        implicit none
+
+        integer(kind=intType), intent(out) :: sizeNode, sizeCell
+        logical, intent(in) :: includeZipper
+        integer(kind=intType) :: nn, mm, i, j, iimax, shp(1)
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iBCGroup
+        integer(kind=intType), intent(in) :: n, famList(n)
+        type(zipperMesh), pointer :: zipper
+        logical :: BCGroupNeeded
+        sizeNode = 0_intType
+        sizeCell = 0_intType
+
+        domains: do nn = 1, nDom
+            call setPointers(nn, 1_intType, 1_intType)
+            bocos: do mm = 1, nBocos
+                ! Check if this surface should be included or not:
+                famInclude: if (famInList(BCData(mm)%famID, famList)) then
+
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                    sizeNode = sizeNode + (iEnd - iBeg + 1) * (jEnd - jBeg + 1)
+
+                    ! If we don't care about blanking, it's easy:
+                    blanking: if (.not. includeZipper) then
+                        sizeCell = sizeCell + (iEnd - iBeg) * (jEnd - jBeg)
+                    else
+                        ! Otherwise we have to consider the iBlank
+                        do j = jBeg + 1, jEnd
+                            do i = iBeg + 1, iEnd
+                                if (BCData(mm)%iBlank(i, j) == 1) then
+                                    sizeCell = sizeCell + 1
+                                end if
+                            end do
+                        end do
+                    end if blanking
+                end if famInclude
+            end do bocos
+        end do domains
+
+        ! We know must consider additional nodes that are required by the
+        ! zipper mesh triangles on the root proc.
+
+        ! No overset or we don't want to include the zipper, return immediately
+        if (.not. oversetPresent .or. .not. includeZipper) then
+            return
+        end if
+
+        ! If there are zipper meshes, we must include the nodes that the
+        ! zipper triangles will use.
+        do iBCGroup = 1, nfamExchange
+            BCGroupNeeded = .False.
+            BCGroupFamLoop: do j = 1, size(BCFamGroups(iBCGroup)%famList)
+                if (famInList(BCFamGroups(iBCGroup)%famList(j), famList)) then
+                    BCGroupNeeded = .True.
+                    exit BCGroupFamLoop
+                end if
+            end do BCGroupFamLoop
+
+            if (.not. BCGroupNeeded) then
+                cycle
+            end if
+
+            ! Pointer for easier reading.
+            zipper => zipperMeshes(iBCGroup)
+
+            ! If we don't have a zipper for this BCGroup, just keep going.
+            if (.not. zipper%allocated) then
+                cycle
+            end if
+
+            ! Include the total extra number of nodes. Not necessairly all
+            ! nodes are needed, but they will be returned anyway.
+            sizeNode = sizeNode + size(zipper%indices)
+
+            ! Include the extra number of cells. Not necessairly all cells
+            ! are needed, but here we have to check indvidually.
+
+            do i = 1, size(zipper%fam)
+                if (famInList(zipper%fam(i), famList)) then
+                    sizeCell = sizeCell + 1
+                end if
+            end do
+        end do
+
+    end subroutine getSurfaceSize
+
+    subroutine getSurfaceConnectivity(conn, cgnsBlockID, ncell, famList, nFamList, includeZipper)
+        ! Return the connectivity list for the each of the patches
+        ! cgnsBlockID is the domain number of the CGNS file that each face patch corresponds to.
+        ! the zipper mesh patches will have cgnsBlockID = -1
+        use constants
+        use blockPointers, only: nDom, nBocos, BCData, BCFaceID, rightHanded, nbkGlobal
+        use utils, only: setPointers
+        use sorting, only: famInList
+        use surfaceFamilies, only: BCFamGroups
+        use oversetData, only: zipperMeshes, zipperMesh, oversetPresent
+
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: ncell
+        integer(kind=intType), intent(inout) :: conn(4 * ncell), cgnsBlockID(ncell)
+        integer(kind=intType), intent(in) :: nFamList, famList(nFamList)
+        logical, intent(in) :: includeZipper
+
+        ! Working
+        integer(kind=intType) :: nn, mm, cellCount, nodeCount, ni, nj, i, j
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iBCGroup
+        logical regularOrdering, BCGroupNeeded
+        type(zipperMesh), pointer :: zipper
+        cellCount = 0
+        nodeCount = 0
+
+        ! Initialize all cgns IDs to -1. This will take care of the zipper meshes IDs, which should
+        ! be -1 since they do not belong to the CGNS file.
+        cgnsBlockID = -1
+
+        domains: do nn = 1, nDom
+            call setPointers(nn, 1_intType, 1_intType)
+            bocos: do mm = 1, nBocos
+                famInclude: if (famInList(BCData(mm)%famID, famList)) then
+
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                    ni = iEnd - iBeg + 1
+                    nj = jEnd - jBeg + 1
+
+                    ! We want to ensure that all the normals of the faces are
+                    ! consistent. To ensure this, we enforce that all normals
+                    ! are "into" the domain. Therefore we must treat difference
+                    ! faces of a block differently. For example for an iLow
+                    ! face, when looping over j-k in the regular way, results
+                    ! in in a domain inward pointing normal for iLow but
+                    ! outward pointing normal for iHigh. The same is true for
+                    ! kMin and kMax. However, it is reverse for the J-faces:
+                    ! This is becuase the way the pointers are extracted i then
+                    ! k is the reverse of what "should" be for consistency. The
+                    ! other two, the pointers are cyclic consistent: i,j->k,
+                    ! j,k (wrap) ->i, but for the j-direction is is i,k->j when
+                    ! to be consistent with the others it should be
+                    ! k,i->j. Hope that made sense.
+
+                    select case (BCFaceID(mm))
+                    case (iMin, jMax, kMin)
+                        regularOrdering = .True.
+                    case default
+                        regularOrdering = .False.
+                    end select
+
+                    ! Now this can be reversed *again* if we have a block that
+                    ! is left handed.
+                    if (.not. rightHanded) then
+                        regularOrdering = .not. (regularOrdering)
+                    end if
+
+                    if (regularOrdering) then
+                        ! Do regular ordering.
+
+                        ! Loop over generic face size...Note we are doing zero
+                        ! based ordering!
+
+                        ! This cartoon of a generic cell might help:
+                        !
+                        ! i, j+1 +-----+ i+1, j+1
+                        !   n4   |     | n3
+                        !        +-----+
+                        !       i,j    i+1, j
+                        !       n1     n2
+                        !
+
+                        do j = 0, nj - 2
+                            do i = 0, ni - 2
+                                if (.not. includeZipper .or. BCData(mm)%iBlank(i + iBeg + 1, j + jBeg + 1) == 1) then
+                                    conn(4 * cellCount + 1) = nodeCount + (j) * ni + i + 1! n1
+                                    conn(4 * cellCount + 2) = nodeCount + (j) * ni + i + 1 + 1! n2
+                                    conn(4 * cellCount + 3) = nodeCount + (j + 1) * ni + i + 1 + 1! n3
+                                    conn(4 * cellCount + 4) = nodeCount + (j + 1) * ni + i + 1! n4
+
+                                    ! Assign the corresponding block IDs.
+                                    ! Remember that nbkGlobal is the CGNS ID of the current domain,
+                                    ! and this is set when we call setPointers.
+                                    cgnsBlockID(cellCount + 1) = nbkGlobal
+
+                                    cellCount = cellCount + 1
+                                end if
+                            end do
+                        end do
+                    else
+                        ! Do reverse ordering:
+                        do j = 0, nj - 2
+                            do i = 0, ni - 2
+                                if (.not. includeZipper .or. BCData(mm)%iBlank(i + iBeg + 1, j + JBeg + 1) == 1) then
+                                    conn(4 * cellCount + 1) = nodeCount + (j) * ni + i + 1! n1
+                                    conn(4 * cellCount + 2) = nodeCount + (j + 1) * ni + i + 1! n4
+                                    conn(4 * cellCount + 3) = nodeCount + (j + 1) * ni + i + 1 + 1! n3
+                                    conn(4 * cellCount + 4) = nodeCount + (j) * ni + i + 1 + 1! n2
+
+                                    ! Assign the corresponding block IDs.
+                                    ! Remember that nbkGlobal is the CGNS ID of the current domain,
+                                    ! and this is set when we call setPointers.
+                                    cgnsBlockID(cellCount + 1) = nbkGlobal
+
+                                    cellCount = cellCount + 1
+                                end if
+                            end do
+                        end do
+                    end if
+                    nodeCount = nodeCount + ni * nj
+                end if famInclude
+            end do bocos
+        end do domains
+
+        ! We know must consider additional connectivity required by the
+        ! zipper mesh triangles on the root proc
+
+        ! No overset or don't want zipper return immediately
+        if (.not. oversetPresent .or. .not. includeZipper) then
+            return
+        end if
+
+        ! If there are zipper meshes, we must include the nodes that the
+        ! zipper triangles will use.
+        BCGroupLoop: do iBCGroup = 1, nFamExchange
+
+            BCGroupNeeded = .False.
+            BCGroupFamLoop: do i = 1, size(BCFamGroups(iBCGroup)%famList)
+                if (famInList(BCFamGroups(iBCGroup)%famList(i), famList)) then
+                    BCGroupNeeded = .True.
+                    exit BCGroupFamLoop
+                end if
+            end do BCGroupFamLoop
+
+            if (.not. BCGroupNeeded) then
+                cycle
+            end if
+
+            ! Pointer for easier reading.
+            zipper => zipperMeshes(iBCGroup)
+
+            ! If the zipper isn't done yet, don't do anything
+            if (.not. zipper%allocated) then
+                cycle
+            end if
+
+            ! Include the extra number of cells. Not necessairly all cells
+            ! are needed, but ehre we have to check indvidually.
+
+            do i = 1, size(zipper%fam)
+                if (famInList(zipper%fam(i), famList)) then
+                    ! This triangle should be included. Note that we use
+                    ! degenerate quads for the triangles.
+                    conn(4 * cellCount + 1) = nodeCount + zipper%conn(1, i)
+                    conn(4 * cellCount + 2) = nodeCount + zipper%conn(2, i)
+                    conn(4 * cellCount + 3) = nodeCount + zipper%conn(3, i)
+                    conn(4 * cellCount + 4) = nodeCount + zipper%conn(3, i)
+                    cellCount = cellCount + 1
+                end if
+            end do
+        end do BCGroupLoop
+
+    end subroutine getSurfaceConnectivity
+
+    subroutine getSurfaceFamily(elemFam, ncell, famList, nFamList, includeZipper)
+
+        use constants
+        use blockPointers, only: nDom, nBocos, BCData
+        use utils, only: setPointers
+        use sorting, only: famInList
+        use surfaceFamilies, only: BCFamGroups
+        use oversetData, only: zipperMeshes, zipperMesh, oversetPresent
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType), intent(in) :: ncell
+        integer(kind=intType), intent(inout) :: elemFam(nCell)
+        integer(kind=intType), intent(in) :: nFamList, famList(nFamList)
+        logical, intent(in) :: includeZipper
+
+        ! Working
+        integer(kind=intType) :: nn, mm, cellCount, nodeCount, ni, nj, i, j
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iBCGroup
+        logical BCGroupNeeded
+        type(zipperMesh), pointer :: zipper
+
+        cellCount = 0
+        nodeCount = 0
+
+        domains: do nn = 1, nDom
+            call setPointers(nn, 1_intType, 1_intType)
+            bocos: do mm = 1, nBocos
+                famInclude: if (famInList(BCData(mm)%famID, famList)) then
+
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                    ni = iEnd - iBeg + 1
+                    nj = jEnd - jBeg + 1
+                    do j = 0, nj - 2
+                        do i = 0, ni - 2
+                            if (.not. includeZipper .or. BCData(mm)%iBlank(i + iBeg + 1, j + JBeg + 1) == 1) then
+                                cellCount = cellCount + 1
+                                elemFam(cellCount) = BCdata(mm)%famID
+                            end if
+                        end do
+                    end do
+                    nodeCount = nodeCount + ni * nj
+                end if famInclude
+            end do bocos
+        end do domains
+
+        ! We know must consider additional elements quired by the zipper
+        ! mesh triangles on the root proc
+
+        ! No overset or don't want zipper
+        if (.not. oversetPresent .or. .not. includeZipper) then
+            return
+        end if
+
+        ! If there are zipper meshes, we must include the nodes that the
+        ! zipper triangles will use.
+        BCGroupLoop: do iBCGroup = 1, nFamExchange
+
+            BCGroupNeeded = .False.
+            BCGroupFamLoop: do i = 1, size(BCFamGroups(iBCGroup)%famList)
+                if (famInList(BCFamGroups(iBCGroup)%famList(i), famList)) then
+                    BCGroupNeeded = .True.
+                    exit BCGroupFamLoop
+                end if
+            end do BCGroupFamLoop
+
+            if (.not. BCGroupNeeded) then
+                cycle
+            end if
+
+            ! Pointer for easier reading.
+            zipper => zipperMeshes(iBCGroup)
+
+            ! If the zipper isn't done yet, don't do anything
+            if (.not. zipper%allocated) then
+                cycle
+            end if
+
+            ! Include the extra number of cells. Not necessairly all cells
+            ! are needed, but ehre we have to check indvidually.
+
+            do i = 1, size(zipper%fam)
+                if (famInList(zipper%fam(i), famList)) then
+                    ! This triangle should be included. Note that we use
+                    ! degenerate quads for the triangles.
+                    cellCount = cellCount + 1
+                    elemFam(cellCount) = zipper%fam(i)
+                end if
+            end do
+        end do BCGroupLoop
+    end subroutine getSurfaceFamily
+
+    subroutine getSurfacePoints(points, npts, sps_in, famList, nFamList, includeZipper)
+        use constants
+        use communication, only: myid
+        use blockPointers, only: nDom, BCData, nBocos, x, BCFaceID, il, jl, kl
+        use BCPointers, only: xx
+        use surfaceFamilies, only: BCFamGroups, familyExchange, BCFamExchange
+        use oversetData, only: zipperMeshes, zipperMesh, oversetPresent
+        use sorting, only: famInList
+        use utils, only: setPointers, EChk, setBCPointers
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    !
-    !      Local variables.
-    !
-    integer(kind=intType), intent(in) :: npts,sps_in
-    real(kind=realType), intent(inout) :: points(3,npts)
-    integer(kind=intType), intent(in) :: nFamList, famList(nFamList)
-    logical, intent(in) :: includeZipper
-
-    integer(kind=intType) :: mm, nn, i, j, ii,sps, iDim, jj, ierr
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iBCGroup
-    type(zipperMesh), pointer :: zipper
-    type(familyexchange), pointer :: exch
-    logical :: BCGroupNeeded
-    real(kind=realType), dimension(:), pointer :: localPtr
-    sps = sps_in
-
-    ii = 0
-    domains: do nn=1,nDom
-       call setPointers(nn, 1_intType, sps)
-
-       ! Loop over the number of boundary subfaces of this block.
-       bocos: do mm=1,nBocos
-
-          famInclude: if (famInList(BCData(mm)%famID, famList)) then
-
-             ! NODE Based
-             jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-             iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-
-             do j=jBeg, jEnd ! This is a node loop
-                do i=iBeg, iEnd ! This is a node loop
-                   ii = ii +1
-                   select case(BCFaceID(mm))
-
-                   case(imin)
-                      points(:,ii) = x(1,i,j,:)
-                   case(imax)
-                      points(:,ii) = x(il,i,j,:)
-                   case(jmin)
-                      points(:,ii) = x(i,1,j,:)
-                   case(jmax)
-                      points(:,ii) = x(i,jl,j,:)
-                   case(kmin)
-                      points(:,ii) = x(i,j,1,:)
-                   case(kmax)
-                      points(:,ii) = x(i,j,kl,:)
-                   end select
+        use petsc
+        implicit none
+
+        !
+        !      Local variables.
+        !
+        integer(kind=intType), intent(in) :: npts, sps_in
+        real(kind=realType), intent(inout) :: points(3, npts)
+        integer(kind=intType), intent(in) :: nFamList, famList(nFamList)
+        logical, intent(in) :: includeZipper
+
+        integer(kind=intType) :: mm, nn, i, j, ii, sps, iDim, jj, ierr
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, iBCGroup
+        type(zipperMesh), pointer :: zipper
+        type(familyexchange), pointer :: exch
+        logical :: BCGroupNeeded
+        real(kind=realType), dimension(:), pointer :: localPtr
+        sps = sps_in
+
+        ii = 0
+        domains: do nn = 1, nDom
+            call setPointers(nn, 1_intType, sps)
+
+            ! Loop over the number of boundary subfaces of this block.
+            bocos: do mm = 1, nBocos
+
+                famInclude: if (famInList(BCData(mm)%famID, famList)) then
+
+                    ! NODE Based
+                    jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                    iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                    do j = jBeg, jEnd ! This is a node loop
+                        do i = iBeg, iEnd ! This is a node loop
+                            ii = ii + 1
+                            select case (BCFaceID(mm))
+
+                            case (imin)
+                                points(:, ii) = x(1, i, j, :)
+                            case (imax)
+                                points(:, ii) = x(il, i, j, :)
+                            case (jmin)
+                                points(:, ii) = x(i, 1, j, :)
+                            case (jmax)
+                                points(:, ii) = x(i, jl, j, :)
+                            case (kmin)
+                                points(:, ii) = x(i, j, 1, :)
+                            case (kmax)
+                                points(:, ii) = x(i, j, kl, :)
+                            end select
+                        end do
+                    end do
+                end if famInclude
+            end do bocos
+        end do domains
+
+        ! No overset or not zipper, return
+        if (.not. oversetPresent .or. .not. includeZipper) then
+            return
+        end if
+
+        ! If there are zipper meshes, we must include the nodes that the
+        ! zipper triangles will use.
+        do iBCGroup = 1, nFamExchange
+
+            zipper => zipperMeshes(iBCGroup)
+
+            if (.not. zipper%allocated) then
+                cycle
+            end if
+
+            exch => BCFamExchange(iBCGroup, sps)
+            BCGroupNeeded = .False.
+            BCGroupFamLoop: do i = 1, size(BCFamGroups(iBCGroup)%famList)
+                if (famInList(BCFamGroups(iBCGroup)%famList(i), famList)) then
+                    BCGroupNeeded = .True.
+                    exit BCGroupFamLoop
+                end if
+            end do BCGroupFamLoop
+
+            if (.not. BCGroupNeeded) then
+                cycle
+            end if
+
+            ! Now we know we *actually* need something from this BCGroup.
+
+            ! Loop over each dimension individually since we have a scalar
+            ! scatter.
+            dimLoop: do iDim = 1, 3
+
+                call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+                localPtr = zero
+
+                ! jj is the running counter through the pointer array.
+                jj = 0
+                do nn = 1, nDom
+                    call setPointers(nn, 1_intType, sps)
+                    do mm = 1, nBocos
+                        famInclude2: if (famInList(BCData(mm)%famID, exch%famList)) then
+                            iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                            jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                            call setBCPointers(mm, .True.)
+                            do j = jBeg, jEnd
+                                do i = iBeg, iEnd
+                                    jj = jj + 1
+                                    localPtr(jj) = xx(i + 1, j + 1, iDim)
+                                end do
+                            end do
+                        end if famInclude2
+                    end do
                 end do
-             end do
-          end if famInclude
-       end do bocos
-    end do domains
-
-    ! No overset or not zipper, return
-    if (.not. oversetPresent .or. .not. includeZipper) then
-       return
-    end if
-
-    ! If there are zipper meshes, we must include the nodes that the
-    ! zipper triangles will use.
-    do iBCGroup=1, nFamExchange
-
-       zipper => zipperMeshes(iBCGroup)
-
-       if (.not. zipper%allocated) then
-          cycle
-       end if
-
-       exch => BCFamExchange(iBCGroup, sps)
-       BCGroupNeeded = .False.
-       BCGroupFamLoop: do i=1, size(BCFamGroups(iBCGroup)%famList)
-          if (famInList(BCFamGroups(iBCGroup)%famList(i), famList)) then
-             BCGroupNeeded = .True.
-             exit BCGroupFamLoop
-          end if
-       end do BCGroupFamLoop
-
-       if (.not. BCGroupNeeded) then
-          cycle
-       end if
-
-       ! Now we know we *actually* need something from this BCGroup.
-
-       ! Loop over each dimension individually since we have a scalar
-       ! scatter.
-       dimLoop: do iDim=1,3
-
-          call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-          localPtr = zero
-
-          ! jj is the running counter through the pointer array.
-          jj = 0
-          do nn=1, nDom
-             call setPointers(nn, 1_intType, sps)
-             do mm=1, nBocos
-                famInclude2: if (famInList(BCData(mm)%famID, exch%famList)) then
-                   iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-                   jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-                   call setBCPointers(mm, .True.)
-                   do j=jBeg, jEnd
-                      do i=iBeg, iEnd
-                         jj = jj+ 1
-                         localPtr(jj) = xx(i+1, j+1, iDim)
-                      end do
-                   end do
-                end if famInclude2
-             end do
-          end do
-
-          ! Restore the pointer
-          call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Now scatter this to the zipper
-          call VecScatterBegin(zipper%scatter, exch%nodeValLocal,&
-               zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          call VecScatterEnd(zipper%scatter, exch%nodeValLocal,&
-               zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! The values we need are precisely what is in zipper%localVal
-          call vecGetArrayF90(zipper%localVal, localPtr, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Just copy the received data into the points array. Only root proc.
-          if (myid == 0) then
-             points(iDim, ii+1:ii+size(localPtr)) = localPtr
-          end if
-          ! The values we need are precisely what is in zipper%localVal
-          call vecRestoreArrayF90(zipper%localVal, localPtr, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-       end do dimLoop
-
-       ! Increcment the running ii counter.
-       ii = ii + size(localPtr)
-
-    end do
-  end subroutine getSurfacePoints
-
-  subroutine mapVector(vec1, n1, famList1, nf1, vec2, n2, famList2, nf2, includeZipper,nDim)
-
-    ! Map one vector, vec1 of size (3,n1) defined on family list 'famList1' onto
-    ! vector, vec2, of size (3, n2) defined on family list 'famList2'
-
-    ! This operation is actually pretty fast since it just requires a
-    ! single copy of surface-based data.
-    use constants
-    use blockPointers, onlY :nDom, flowDoms
-    use sorting, only : famInList
-    use surfaceFamilies, only : BCFamGroups
-    use oversetData, only : zipperMeshes, zipperMesh, oversetPresent
-
-    implicit none
-
-    ! Input/Output
-    integer(kind=intType) :: n1, n2, nf1, nf2,nDim
-    integer(kind=intType), intent(in) :: famList1(nf1), famList2(nf2)
-    real(kind=realType), intent(in) :: vec1(nDim, n1)
-    real(kind=realType), intent(inout) :: vec2(nDim, n2)
-    logical, intent(in) :: includeZipper
-
-    ! Working
-    integer(kind=intType) :: i, k, ii, jj, nn, mm, iSize
-    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, famID, iBCGroup
-    logical :: fam1Included, fam2Included
-    type(zipperMesh), pointer :: zipper
-    logical :: BCGroupNeeed
-    ii = 0
-    jj = 0
-    domains: do nn=1,nDom
-       ! Don't set pointers for speed
-
-       ! Loop over the number of boundary subfaces of this block.
-       bocos: do mm=1,flowDoms(nn, 1, 1)%nBocos
-          famId = flowDoms(nn, 1, 1)%BCdata(mm)%famID
-
-          fam1Included = famInList(famID, famList1)
-          fam2Included = famInList(famid, famList2)
-
-          jBeg = flowDoms(nn, 1, 1)%bcData(mm)%jnBeg
-          jEnd = flowDoms(nn, 1, 1)%bcData(mm)%jnEnd
-
-          iBeg = flowDoms(nn, 1, 1)%bcData(mm)%inBeg
-          iEnd = flowDoms(nn, 1, 1)%bcData(mm)%inEnd
-          iSize = (iEnd-iBeg+1)*(jEnd-jBeg+1)
-
-          if (fam1Included .and. fam2Included) then
-             ! The two lists overlap so copy:
-             do k=1, iSize
-                vec2(:, k+jj) = vec1(:, k+ii)
-             end do
-          end if
-
-          ! Finally increment the counters if the face had been inclded
-          if (fam1Included) then
-             ii = ii + iSize
-          end if
-
-          if (fam2Included) then
-             jj =jj + iSize
-          end if
-
-       end do bocos
-    end do domains
-
-    ! As with the rest of the code we have to account for the zipper
-    ! mesh on the root proc.
-
-    ! We know must consider additional nodes that are required by the
-    ! zipper mesh triangles on the root proc.
-
-    ! No overset or don't want to include zipper, return immediately
-    if (.not. oversetPresent .or. .not. includeZipper) then
-       return
-    end if
-
-    ! If there are zipper meshes, we must include the nodes that the
-    ! zipper triangles will use.
-    BCGroupLoop: do iBCGroup=1, nFamExchange
-
-       zipper => zipperMeshes(iBCGroup)
-
-       fam1Included = .False.
-       fam2Included = .False.
-       BCGroupFamLoop: do i=1, size(BCFamGroups(iBCGroup)%famList)
-          if (famInList(BCFamGroups(iBCGroup)%famList(i), famList1)) then
-             fam1Included = .True.
-          end if
-          if (famInList(BCFamGroups(iBCGroup)%famList(i), famList2)) then
-             fam2Included = .True.
-          end if
-       end do BCGroupFamLoop
-
-       ! This is the total number of nodes that this BCGroup has. It
-       ! is not further broken down by family group.
-       iSize = size(zipper%indices)
-
-       if (fam1Included .and. fam2Included) then
-          ! The two lists overlap so copy:
-          do k=1, iSize
-             if (k+ii <= n1) then
-                vec2(:, k+jj) = vec1(:, k+ii)
-             end if
-          end do
-       end if
-
-       ! Finally increment the counters if this BCGroup had been included.
-       if (fam1Included) then
-          ii = ii + iSize
-       end if
-
-       if (fam2Included) then
-          jj = jj + iSize
-       end if
-    end do BCGroupLoop
-
-  end subroutine mapVector
-
-  subroutine getWallList(wallList, nWallList, nFamTotal)
-
-    ! Python wrapped utility function to return the list of families
-    ! that are walls to Python since we need that information in
-    ! Python for a few default values.
-
-    use constants
-    use surfaceFamilies, only :BCFamGroups
-    implicit none
-
-    integer(kind=intType), intent(in) :: nFamtotal
-    integer(kind=intType), dimension(nFamTotal), intent(out) :: wallList
-    integer(kind=intType), intent(out) :: nWallList
-
-    nWallList = size(BCFamGroups(iBCGroupWalls)%famList)
-    wallList(1:nWallList) = BCfamGroups(iBCGroupWalls)%famList
-
-  end subroutine getWallList
+
+                ! Restore the pointer
+                call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Now scatter this to the zipper
+                call VecScatterBegin(zipper%scatter, exch%nodeValLocal, &
+                                     zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call VecScatterEnd(zipper%scatter, exch%nodeValLocal, &
+                                   zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! The values we need are precisely what is in zipper%localVal
+                call vecGetArrayF90(zipper%localVal, localPtr, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Just copy the received data into the points array. Only root proc.
+                if (myid == 0) then
+                    points(iDim, ii + 1:ii + size(localPtr)) = localPtr
+                end if
+                ! The values we need are precisely what is in zipper%localVal
+                call vecRestoreArrayF90(zipper%localVal, localPtr, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+            end do dimLoop
+
+            ! Increcment the running ii counter.
+            ii = ii + size(localPtr)
+
+        end do
+    end subroutine getSurfacePoints
+
+    subroutine mapVector(vec1, n1, famList1, nf1, vec2, n2, famList2, nf2, includeZipper, nDim)
+
+        ! Map one vector, vec1 of size (3,n1) defined on family list 'famList1' onto
+        ! vector, vec2, of size (3, n2) defined on family list 'famList2'
+
+        ! This operation is actually pretty fast since it just requires a
+        ! single copy of surface-based data.
+        use constants
+        use blockPointers, onlY: nDom, flowDoms
+        use sorting, only: famInList
+        use surfaceFamilies, only: BCFamGroups
+        use oversetData, only: zipperMeshes, zipperMesh, oversetPresent
+
+        implicit none
+
+        ! Input/Output
+        integer(kind=intType) :: n1, n2, nf1, nf2, nDim
+        integer(kind=intType), intent(in) :: famList1(nf1), famList2(nf2)
+        real(kind=realType), intent(in) :: vec1(nDim, n1)
+        real(kind=realType), intent(inout) :: vec2(nDim, n2)
+        logical, intent(in) :: includeZipper
+
+        ! Working
+        integer(kind=intType) :: i, k, ii, jj, nn, mm, iSize
+        integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, famID, iBCGroup
+        logical :: fam1Included, fam2Included
+        type(zipperMesh), pointer :: zipper
+        logical :: BCGroupNeeed
+        ii = 0
+        jj = 0
+        domains: do nn = 1, nDom
+            ! Don't set pointers for speed
+
+            ! Loop over the number of boundary subfaces of this block.
+            bocos: do mm = 1, flowDoms(nn, 1, 1)%nBocos
+                famId = flowDoms(nn, 1, 1)%BCdata(mm)%famID
+
+                fam1Included = famInList(famID, famList1)
+                fam2Included = famInList(famid, famList2)
+
+                jBeg = flowDoms(nn, 1, 1)%bcData(mm)%jnBeg
+                jEnd = flowDoms(nn, 1, 1)%bcData(mm)%jnEnd
+
+                iBeg = flowDoms(nn, 1, 1)%bcData(mm)%inBeg
+                iEnd = flowDoms(nn, 1, 1)%bcData(mm)%inEnd
+                iSize = (iEnd - iBeg + 1) * (jEnd - jBeg + 1)
+
+                if (fam1Included .and. fam2Included) then
+                    ! The two lists overlap so copy:
+                    do k = 1, iSize
+                        vec2(:, k + jj) = vec1(:, k + ii)
+                    end do
+                end if
+
+                ! Finally increment the counters if the face had been inclded
+                if (fam1Included) then
+                    ii = ii + iSize
+                end if
+
+                if (fam2Included) then
+                    jj = jj + iSize
+                end if
+
+            end do bocos
+        end do domains
+
+        ! As with the rest of the code we have to account for the zipper
+        ! mesh on the root proc.
+
+        ! We know must consider additional nodes that are required by the
+        ! zipper mesh triangles on the root proc.
+
+        ! No overset or don't want to include zipper, return immediately
+        if (.not. oversetPresent .or. .not. includeZipper) then
+            return
+        end if
+
+        ! If there are zipper meshes, we must include the nodes that the
+        ! zipper triangles will use.
+        BCGroupLoop: do iBCGroup = 1, nFamExchange
+
+            zipper => zipperMeshes(iBCGroup)
+
+            fam1Included = .False.
+            fam2Included = .False.
+            BCGroupFamLoop: do i = 1, size(BCFamGroups(iBCGroup)%famList)
+                if (famInList(BCFamGroups(iBCGroup)%famList(i), famList1)) then
+                    fam1Included = .True.
+                end if
+                if (famInList(BCFamGroups(iBCGroup)%famList(i), famList2)) then
+                    fam2Included = .True.
+                end if
+            end do BCGroupFamLoop
+
+            ! This is the total number of nodes that this BCGroup has. It
+            ! is not further broken down by family group.
+            iSize = size(zipper%indices)
+
+            if (fam1Included .and. fam2Included) then
+                ! The two lists overlap so copy:
+                do k = 1, iSize
+                    if (k + ii <= n1) then
+                        vec2(:, k + jj) = vec1(:, k + ii)
+                    end if
+                end do
+            end if
+
+            ! Finally increment the counters if this BCGroup had been included.
+            if (fam1Included) then
+                ii = ii + iSize
+            end if
+
+            if (fam2Included) then
+                jj = jj + iSize
+            end if
+        end do BCGroupLoop
+
+    end subroutine mapVector
+
+    subroutine getWallList(wallList, nWallList, nFamTotal)
+
+        ! Python wrapped utility function to return the list of families
+        ! that are walls to Python since we need that information in
+        ! Python for a few default values.
+
+        use constants
+        use surfaceFamilies, only: BCFamGroups
+        implicit none
+
+        integer(kind=intType), intent(in) :: nFamtotal
+        integer(kind=intType), dimension(nFamTotal), intent(out) :: wallList
+        integer(kind=intType), intent(out) :: nWallList
+
+        nWallList = size(BCFamGroups(iBCGroupWalls)%famList)
+        wallList(1:nWallList) = BCfamGroups(iBCGroupWalls)%famList
+
+    end subroutine getWallList
 
 end module surfaceUtils
diff --git a/src/utils/utils.F90 b/src/utils/utils.F90
index 0e20fe694..ad471d4c6 100644
--- a/src/utils/utils.F90
+++ b/src/utils/utils.F90
@@ -1195,13 +1195,13 @@ subroutine computeRootBendingMoment(cf, cm, bendingMoment)
         bendingMoment = zero
         if (liftIndex == 2) then
             !z out wing sum momentx,momentz
-  elasticMomentx = cm(1) + cf(2)*(pointRefEC(3) - pointRef(3))/lengthref - cf(3)*(pointRefEC(2) - pointRef(2))/lengthref
-  elasticMomentz = cm(3) - cf(2)*(pointRefEC(1) - pointref(1))/lengthref + cf(1)*(pointRefEC(2) - pointRef(2))/lengthref
+        elasticMomentx = cm(1) + cf(2)*(pointRefEC(3)-pointRef(3))/lengthref-cf(3)*(pointRefEC(2)-pointRef(2))/lengthref
+        elasticMomentz = cm(3) - cf(2)*(pointRefEC(1)-pointref(1))/lengthref+cf(1)*(pointRefEC(2)-pointRef(2))/lengthref
             bendingMoment = sqrt(elasticMomentx**2 + elasticMomentz**2)
         elseif (liftIndex == 3) then
             !y out wing sum momentx,momenty
-  elasticMomentx = cm(1) + cf(3)*(pointrefEC(2) - pointRef(2))/lengthref + cf(3)*(pointrefEC(3) - pointref(3))/lengthref
-  elasticMomenty = cm(2) + cf(3)*(pointRefEC(1) - pointRef(1))/lengthref + cf(1)*(pointrefEC(3) - pointRef(3))/lengthref
+        elasticMomentx = cm(1) + cf(3)*(pointrefEC(2)-pointRef(2))/lengthref+cf(3)*(pointrefEC(3)-pointref(3))/lengthref
+        elasticMomenty = cm(2) + cf(3)*(pointRefEC(1)-pointRef(1))/lengthref+cf(1)*(pointrefEC(3)-pointRef(3))/lengthref
             bendingMoment = sqrt(elasticMomentx**2 + elasticMomenty**2)
         end if
 
diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index 0ec6baebf..c0d2672fc 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -119,10 +119,17 @@ subroutine updateWallDistancesQuickly(nn, level, sps)
 
     end subroutine updateWallDistancesQuickly
 
-    subroutine updateWallRoughness()
+
+    ! ----------------------------------------------------------------------
+    !                                                                      |
+    !                    No Tapenade Routine below this line               |
+    !                                                                      |
+    ! ----------------------------------------------------------------------
 
 #ifndef USE_TAPENADE
 
+    subroutine updateWallRoughness()
+
         ! Sets the roughness-value (ks) of the nearest wall-cell in the volume cells.
         !
         ! At first, it creates two lists: (1) ks values on the surface; (2) global
@@ -319,17 +326,9 @@ subroutine updateWallRoughness()
                 deallocate (ksGlobal, cellIdGlobal)
             end do
         end do
-
-#endif
     end subroutine updateWallRoughness
 
-    ! ----------------------------------------------------------------------
-    !                                                                      |
-    !                    No Tapenade Routine below this line               |
-    !                                                                      |
-    ! ----------------------------------------------------------------------
 
-#ifndef USE_TAPENADE
     subroutine computeWallDistance(level, allocMem)
         !
         !       wallDistance computes the distances of the cell centers to
@@ -2128,7 +2127,6 @@ subroutine determineWallAssociation(level, sps)
                             if (useRoughSA) then
                                 flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k) = -1
                             end if
-
                         end if
                     end do
                 end do
diff --git a/src/warping/getAreas.f90 b/src/warping/getAreas.f90
index 67c04af25..398779472 100644
--- a/src/warping/getAreas.f90
+++ b/src/warping/getAreas.f90
@@ -1,256 +1,256 @@
 subroutine getAreas(areas, pts, npts, sps_in, axis)
-  use constants
-  use blockPointers
-  use flowVarRefState
-  use inputTimeSpectral
-  use communication
-  use inputPhysics
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType), intent(in) :: npts, sps_in
-  real(kind=realType), intent(in)  :: pts(3,npts), axis(3)
-  real(kind=realType), intent(out) :: areas(3,npts)
-
-  integer(kind=intType) :: mm, nn, i, j, ii, sps
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-  integer(kind=intType) :: lower_left,lower_right,upper_left,upper_right
-  real(kind=realType) :: da, fact, fact2
-
-  areas = zero
-  sps = sps_in
-
-  ! Compute the local forces (or tractions). Take the scaling
-  ! factor into account to obtain the forces in SI-units,
-  ! i.e. Newton.
-  ii = 0
-  domains: do nn=1,nDom
-     call setPointers(nn,1_intType,sps)
-     if (flowDoms(nn,1_intType,sps)%rightHanded) then
-        fact2 = one
-     else
-        fact2 = -one
-     end if
-
-     ! Loop over the number of boundary subfaces of this block.
-     bocos: do mm=1,nBocos
-
-        if(BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal) then
-
-           select case (BCFaceID(mm))
-
-              ! NOTE: The 'fact' here are NOT the same as you will
-              ! find in ForcesAndMoment.f90. The reason is that, we
-              ! are not using points to si, sj, sk. Those have teh
-              ! normals pointing in the direction of increasing
-              ! {i,j,k}. Here we are evaluating the normal from
-              ! directly from the coordinates on the faces. As it
-              ! happens, the normals for the jMin and jMax faces are
-              ! flipped.
-           case (iMin)
-              fact = one
-           case (iMax)
-              fact = -one
-           case (jMin)
-              fact = -one
-           case (jMax)
-              fact = one
-           case (kMin)
-              fact = one
-           case (kMax)
-              fact = -one
-           end select
-
-           ! Store the cell range of the subfaces a bit easier.
-           ! As only owned faces must be considered the nodal range
-           ! in BCData must be used to obtain this data.
-
-           jBeg = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
-           iBeg = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
-
-           ! Compute the dual area at each node. Just store in first dof
-           do j=jBeg, jEnd ! This is a face loop
-              do i=iBeg, iEnd ! This is a face loop
-
-                 ! Compute Normal
-
-                 lower_left  = ii + (j-jBeg)*(iEnd-iBeg+2) + i-iBeg + 1
-                 lower_right = lower_left + 1
-                 upper_left  = lower_right + iend - ibeg + 1
-                 upper_right = upper_left + 1
-
-                 call quad_area(&
-                      pts(:, lower_left), pts(:, lower_right), &
-                      pts(:, upper_left), pts(:, upper_right), &
-                      axis, da)
-                 da = fourth *da * fact * fact2
-
-                 if (da > zero) then
-                    ! Scatter to nodes
-                    areas(1,lower_left)  = areas(1,lower_left)  + da
-                    areas(1,lower_right) = areas(1,lower_right) + da
-                    areas(1,upper_left)  = areas(1,upper_left)  + da
-                    areas(1,upper_right) = areas(1,upper_right) + da
-                 end if
-              end do
-           end do
-
-           ! Note how iBeg,iBeg is defined above... it is one MORE
-           ! then the starting node (used for looping over faces, not
-           ! nodes)
-           ii = ii + (jEnd-jBeg+2)*(iEnd-iBeg+2)
-
+    use constants
+    use blockPointers
+    use flowVarRefState
+    use inputTimeSpectral
+    use communication
+    use inputPhysics
+    use utils, only: setPointers
+    implicit none
+    !
+    !      Local variables.
+    !
+    integer(kind=intType), intent(in) :: npts, sps_in
+    real(kind=realType), intent(in) :: pts(3, npts), axis(3)
+    real(kind=realType), intent(out) :: areas(3, npts)
+
+    integer(kind=intType) :: mm, nn, i, j, ii, sps
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+    integer(kind=intType) :: lower_left, lower_right, upper_left, upper_right
+    real(kind=realType) :: da, fact, fact2
+
+    areas = zero
+    sps = sps_in
+
+    ! Compute the local forces (or tractions). Take the scaling
+    ! factor into account to obtain the forces in SI-units,
+    ! i.e. Newton.
+    ii = 0
+    domains: do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        if (flowDoms(nn, 1_intType, sps)%rightHanded) then
+            fact2 = one
+        else
+            fact2 = -one
         end if
-     end do bocos
-  end do domains
+
+        ! Loop over the number of boundary subfaces of this block.
+        bocos: do mm = 1, nBocos
+
+            if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
+                BCType(mm) == NSWallIsothermal) then
+
+                select case (BCFaceID(mm))
+
+                    ! NOTE: The 'fact' here are NOT the same as you will
+                    ! find in ForcesAndMoment.f90. The reason is that, we
+                    ! are not using points to si, sj, sk. Those have teh
+                    ! normals pointing in the direction of increasing
+                    ! {i,j,k}. Here we are evaluating the normal from
+                    ! directly from the coordinates on the faces. As it
+                    ! happens, the normals for the jMin and jMax faces are
+                    ! flipped.
+                case (iMin)
+                    fact = one
+                case (iMax)
+                    fact = -one
+                case (jMin)
+                    fact = -one
+                case (jMax)
+                    fact = one
+                case (kMin)
+                    fact = one
+                case (kMax)
+                    fact = -one
+                end select
+
+                ! Store the cell range of the subfaces a bit easier.
+                ! As only owned faces must be considered the nodal range
+                ! in BCData must be used to obtain this data.
+
+                jBeg = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                iBeg = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+
+                ! Compute the dual area at each node. Just store in first dof
+                do j = jBeg, jEnd ! This is a face loop
+                    do i = iBeg, iEnd ! This is a face loop
+
+                        ! Compute Normal
+
+                        lower_left = ii + (j - jBeg) * (iEnd - iBeg + 2) + i - iBeg + 1
+                        lower_right = lower_left + 1
+                        upper_left = lower_right + iend - ibeg + 1
+                        upper_right = upper_left + 1
+
+                        call quad_area( &
+                            pts(:, lower_left), pts(:, lower_right), &
+                            pts(:, upper_left), pts(:, upper_right), &
+                            axis, da)
+                        da = fourth * da * fact * fact2
+
+                        if (da > zero) then
+                            ! Scatter to nodes
+                            areas(1, lower_left) = areas(1, lower_left) + da
+                            areas(1, lower_right) = areas(1, lower_right) + da
+                            areas(1, upper_left) = areas(1, upper_left) + da
+                            areas(1, upper_right) = areas(1, upper_right) + da
+                        end if
+                    end do
+                end do
+
+                ! Note how iBeg,iBeg is defined above... it is one MORE
+                ! then the starting node (used for looping over faces, not
+                ! nodes)
+                ii = ii + (jEnd - jBeg + 2) * (iEnd - iBeg + 2)
+
+            end if
+        end do bocos
+    end do domains
 end subroutine getAreas
 
 subroutine getAreaSensitivity(darea, pts, npts, sps_in, axis)
-  use constants
-  use blockPointers
-  use flowVarRefState
-  use inputTimeSpectral
-  use communication
-  use inputPhysics
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType), intent(in) :: npts, sps_in
-  real(kind=realType), intent(in)  :: pts(3,npts), axis(3)
-  real(kind=realType), intent(out) :: darea(3,npts)
-
-  integer(kind=intType) :: mm, nn, i, j, ii, sps
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-  integer(kind=intType) :: lower_left,lower_right,upper_left,upper_right
-  real(kind=realType) :: area, areab, pt1b(3), pt2b(3), pt3b(3), pt4b(3)
-  real(kind=realType) :: fact, fact2, da
-
-  darea = zero
-  sps = sps_in
-
-  ! Compute the local forces (or tractions). Take the scaling
-  ! factor into account to obtain the forces in SI-units,
-  ! i.e. Newton.
-  ii = 0
-  domains: do nn=1,nDom
-     call setPointers(nn,1_intType,sps)
-     if (flowDoms(nn,1_intType,sps)%rightHanded) then
-        fact2 = one
-     else
-        fact2 = -one
-     end if
-
-     ! Loop over the number of boundary subfaces of this block.
-     bocos: do mm=1,nBocos
-
-        if(BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal) then
-
-           select case (BCFaceID(mm))
-
-              ! NOTE: The 'fact' here are NOT the same as you will
-              ! find in ForcesAndMoment.f90. The reason is that, we
-              ! are not using points to si, sj, sk. Those have teh
-              ! normals pointing in the direction of increasing
-              ! {i,j,k}. Here we are evaluating the normal from
-              ! directly from the coordinates on the faces. As it
-              ! happens, the normals for the jMin and jMax faces are
-              ! flipped.
-           case (iMin)
-              fact = one
-           case (iMax)
-              fact = -one
-           case (jMin)
-              fact = -one
-           case (jMax)
-              fact = one
-           case (kMin)
-              fact = one
-           case (kMax)
-              fact = -one
-           end select
-
-           ! Store the cell range of the subfaces a bit easier.
-           ! As only owned faces must be considered the nodal range
-           ! in BCData must be used to obtain this data.
-
-           jBeg = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
-           iBeg = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
-
-           do j=jBeg, jEnd ! This is a face loop
-              do i=iBeg, iEnd ! This is a face loop
-
-                 ! Extract 4 corner points
-                 lower_left  = ii + (j-jBeg)*(iEnd-iBeg+2) + i-iBeg + 1
-                 lower_right = lower_left + 1
-                 upper_left  = lower_right + iend - ibeg + 1
-                 upper_right = upper_left + 1
-
-                 ! Compute actual area since we need to know to
-                 ! include or not. The reverse mode calc does NOT
-                 ! compute area
-                 call quad_area(&
-                      pts(:, lower_left), pts(:, lower_right), &
-                      pts(:, upper_left), pts(:, upper_right), &
-                      axis, da)
-
-                 da = fourth *da * fact * fact2
-                 if (da > zero) then
-
-                    areab = one
-                    call quad_area_b(&
-                         pts(:, lower_left) , pt1b, &
-                         pts(:, lower_right), pt2b, &
-                         pts(:, upper_left) , pt3b, &
-                         pts(:, upper_right), pt4b, &
-                         axis, area, areab)
-
-                    darea(:,lower_left)  = darea(:, lower_left)  + pt1b
-                    darea(:,lower_right) = darea(:, lower_right) + pt2b
-                    darea(:,upper_left)  = darea(:, upper_left)  + pt3b
-                    darea(:,upper_right) = darea(:, upper_right) + pt4b
-                 end if
-              end do
-           end do
-
-           ! Note how iBeg,iBeg is defined above... it is one MORE
-           ! then the starting node (used for looping over faces, not
-           ! nodes)
-           ii = ii + (jEnd-jBeg+2)*(iEnd-iBeg+2)
-
+    use constants
+    use blockPointers
+    use flowVarRefState
+    use inputTimeSpectral
+    use communication
+    use inputPhysics
+    use utils, only: setPointers
+    implicit none
+    !
+    !      Local variables.
+    !
+    integer(kind=intType), intent(in) :: npts, sps_in
+    real(kind=realType), intent(in) :: pts(3, npts), axis(3)
+    real(kind=realType), intent(out) :: darea(3, npts)
+
+    integer(kind=intType) :: mm, nn, i, j, ii, sps
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+    integer(kind=intType) :: lower_left, lower_right, upper_left, upper_right
+    real(kind=realType) :: area, areab, pt1b(3), pt2b(3), pt3b(3), pt4b(3)
+    real(kind=realType) :: fact, fact2, da
+
+    darea = zero
+    sps = sps_in
+
+    ! Compute the local forces (or tractions). Take the scaling
+    ! factor into account to obtain the forces in SI-units,
+    ! i.e. Newton.
+    ii = 0
+    domains: do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        if (flowDoms(nn, 1_intType, sps)%rightHanded) then
+            fact2 = one
+        else
+            fact2 = -one
         end if
-     end do bocos
-  end do domains
+
+        ! Loop over the number of boundary subfaces of this block.
+        bocos: do mm = 1, nBocos
+
+            if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
+                BCType(mm) == NSWallIsothermal) then
+
+                select case (BCFaceID(mm))
+
+                    ! NOTE: The 'fact' here are NOT the same as you will
+                    ! find in ForcesAndMoment.f90. The reason is that, we
+                    ! are not using points to si, sj, sk. Those have teh
+                    ! normals pointing in the direction of increasing
+                    ! {i,j,k}. Here we are evaluating the normal from
+                    ! directly from the coordinates on the faces. As it
+                    ! happens, the normals for the jMin and jMax faces are
+                    ! flipped.
+                case (iMin)
+                    fact = one
+                case (iMax)
+                    fact = -one
+                case (jMin)
+                    fact = -one
+                case (jMax)
+                    fact = one
+                case (kMin)
+                    fact = one
+                case (kMax)
+                    fact = -one
+                end select
+
+                ! Store the cell range of the subfaces a bit easier.
+                ! As only owned faces must be considered the nodal range
+                ! in BCData must be used to obtain this data.
+
+                jBeg = BCData(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+                iBeg = BCData(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+
+                do j = jBeg, jEnd ! This is a face loop
+                    do i = iBeg, iEnd ! This is a face loop
+
+                        ! Extract 4 corner points
+                        lower_left = ii + (j - jBeg) * (iEnd - iBeg + 2) + i - iBeg + 1
+                        lower_right = lower_left + 1
+                        upper_left = lower_right + iend - ibeg + 1
+                        upper_right = upper_left + 1
+
+                        ! Compute actual area since we need to know to
+                        ! include or not. The reverse mode calc does NOT
+                        ! compute area
+                        call quad_area( &
+                            pts(:, lower_left), pts(:, lower_right), &
+                            pts(:, upper_left), pts(:, upper_right), &
+                            axis, da)
+
+                        da = fourth * da * fact * fact2
+                        if (da > zero) then
+
+                            areab = one
+                            call quad_area_b( &
+                                pts(:, lower_left), pt1b, &
+                                pts(:, lower_right), pt2b, &
+                                pts(:, upper_left), pt3b, &
+                                pts(:, upper_right), pt4b, &
+                                axis, area, areab)
+
+                            darea(:, lower_left) = darea(:, lower_left) + pt1b
+                            darea(:, lower_right) = darea(:, lower_right) + pt2b
+                            darea(:, upper_left) = darea(:, upper_left) + pt3b
+                            darea(:, upper_right) = darea(:, upper_right) + pt4b
+                        end if
+                    end do
+                end do
+
+                ! Note how iBeg,iBeg is defined above... it is one MORE
+                ! then the starting node (used for looping over faces, not
+                ! nodes)
+                ii = ii + (jEnd - jBeg + 2) * (iEnd - iBeg + 2)
+
+            end if
+        end do bocos
+    end do domains
 end subroutine getAreaSensitivity
 
 subroutine quad_area(p1, p2, p3, p4, axis, area)
-  ! Kernel-level function to get area of quad defined by 4 points
-  ! projected onto plane defined by axis. Only +ve areas are computed.
-  use constants
-  implicit none
+    ! Kernel-level function to get area of quad defined by 4 points
+    ! projected onto plane defined by axis. Only +ve areas are computed.
+    use constants
+    implicit none
 
-  ! I/O
-  real(kind=realType), intent(in) :: p1(3), p2(3), p3(3), p4(3), axis(3)
-  real(kind=realType), intent(out) :: area
+    ! I/O
+    real(kind=realType), intent(in) :: p1(3), p2(3), p3(3), p4(3), axis(3)
+    real(kind=realType), intent(out) :: area
 
-  ! Working
-  real(kind=realType) :: v1(3), v2(3), sss(3)
-  ! Vectors for Cross Product
+    ! Working
+    real(kind=realType) :: v1(3), v2(3), sss(3)
+    ! Vectors for Cross Product
 
-  v1(:) = p4 - p1
-  v2(:) = p3 - p2
+    v1(:) = p4 - p1
+    v2(:) = p3 - p2
 
-  ! Cross Product
-  sss(1) = half*(v1(2)*v2(3) - v1(3)*v2(2))
-  sss(2) = half*(v1(3)*v2(1) - v1(1)*v2(3))
-  sss(3) = half*(v1(1)*v2(2) - v1(2)*v2(1))
+    ! Cross Product
+    sss(1) = half * (v1(2) * v2(3) - v1(3) * v2(2))
+    sss(2) = half * (v1(3) * v2(1) - v1(1) * v2(3))
+    sss(3) = half * (v1(1) * v2(2) - v1(2) * v2(1))
 
-  area = sss(1)*axis(1)+ sss(2)*axis(2)+ sss(3)*axis(3)
+    area = sss(1) * axis(1) + sss(2) * axis(2) + sss(3) * axis(3)
 
 end subroutine quad_area
 
@@ -264,64 +264,64 @@ end subroutine quad_area
 !                p4:out
 SUBROUTINE QUAD_AREA_B(p1, p1b, p2, p2b, p3, p3b, p4, p4b, axis, area, &
 &  areab)
-  use constants
-  IMPLICIT NONE
+    use constants
+    IMPLICIT NONE
 ! Kernel-level function to get area of quad defined by 4 points
 ! projected onto plane defined by axis. Only +ve areas are computed.
 ! I/O
-  REAL(kind=realtype), INTENT(IN) :: p1(3), p2(3), p3(3), p4(3), axis(3)
-  REAL(kind=realtype) :: p1b(3), p2b(3), p3b(3), p4b(3)
-  REAL(kind=realtype) :: area
-  REAL(kind=realtype) :: areab
+    REAL(kind=realtype), INTENT(IN) :: p1(3), p2(3), p3(3), p4(3), axis(3)
+    REAL(kind=realtype) :: p1b(3), p2b(3), p3b(3), p4b(3)
+    REAL(kind=realtype) :: area
+    REAL(kind=realtype) :: areab
 ! Working
-  REAL(kind=realtype) :: v1(3), v2(3), sss(3)
-  REAL(kind=realtype) :: v1b(3), v2b(3), sssb(3)
-  REAL(kind=realtype) :: tempb1
-  REAL(kind=realtype) :: tempb0
-  INTRINSIC ABS
-  REAL(kind=realtype) :: tempb
+    REAL(kind=realtype) :: v1(3), v2(3), sss(3)
+    REAL(kind=realtype) :: v1b(3), v2b(3), sssb(3)
+    REAL(kind=realtype) :: tempb1
+    REAL(kind=realtype) :: tempb0
+    INTRINSIC ABS
+    REAL(kind=realtype) :: tempb
 ! Vectors for Cross Product
-  v1(:) = p4 - p1
-  v2(:) = p3 - p2
+    v1(:) = p4 - p1
+    v2(:) = p3 - p2
 ! Cross Product
-  sss(1) = half*(v1(2)*v2(3)-v1(3)*v2(2))
-  sss(2) = half*(v1(3)*v2(1)-v1(1)*v2(3))
-  sss(3) = half*(v1(1)*v2(2)-v1(2)*v2(1))
-  IF (sss(1)*axis(1) + sss(2)*axis(2) + sss(3)*axis(3) .GE. 0.) THEN
-    sssb = 0.0
-    sssb(1) = axis(1)*areab
-    sssb(2) = axis(2)*areab
-    sssb(3) = axis(3)*areab
-  ELSE
-    sssb = 0.0
-    sssb(1) = -(axis(1)*areab)
-    sssb(2) = -(axis(2)*areab)
-    sssb(3) = -(axis(3)*areab)
-  END IF
-  v1b = 0.0
-  v2b = 0.0
-  tempb = half*sssb(3)
-  v1b(1) = v2(2)*tempb
-  v2b(2) = v1(1)*tempb
-  v1b(2) = -(v2(1)*tempb)
-  sssb(3) = 0.0
-  tempb0 = half*sssb(2)
-  v2b(1) = v1(3)*tempb0 - v1(2)*tempb
-  v1b(3) = v1b(3) + v2(1)*tempb0
-  v1b(1) = v1b(1) - v2(3)*tempb0
-  sssb(2) = 0.0
-  tempb1 = half*sssb(1)
-  v2b(3) = v2b(3) + v1(2)*tempb1 - v1(1)*tempb0
-  v1b(2) = v1b(2) + v2(3)*tempb1
-  v1b(3) = v1b(3) - v2(2)*tempb1
-  v2b(2) = v2b(2) - v1(3)*tempb1
-  p2b = 0.0
-  p3b = 0.0
-  p3b = v2b(:)
-  p2b = -v2b(:)
-  p1b = 0.0
-  p4b = 0.0
-  p4b = v1b(:)
-  p1b = -v1b(:)
-  areab = 0.0
+    sss(1) = half * (v1(2) * v2(3) - v1(3) * v2(2))
+    sss(2) = half * (v1(3) * v2(1) - v1(1) * v2(3))
+    sss(3) = half * (v1(1) * v2(2) - v1(2) * v2(1))
+    IF (sss(1) * axis(1) + sss(2) * axis(2) + sss(3) * axis(3) .GE. 0.) THEN
+        sssb = 0.0
+        sssb(1) = axis(1) * areab
+        sssb(2) = axis(2) * areab
+        sssb(3) = axis(3) * areab
+    ELSE
+        sssb = 0.0
+        sssb(1) = -(axis(1) * areab)
+        sssb(2) = -(axis(2) * areab)
+        sssb(3) = -(axis(3) * areab)
+    END IF
+    v1b = 0.0
+    v2b = 0.0
+    tempb = half * sssb(3)
+    v1b(1) = v2(2) * tempb
+    v2b(2) = v1(1) * tempb
+    v1b(2) = -(v2(1) * tempb)
+    sssb(3) = 0.0
+    tempb0 = half * sssb(2)
+    v2b(1) = v1(3) * tempb0 - v1(2) * tempb
+    v1b(3) = v1b(3) + v2(1) * tempb0
+    v1b(1) = v1b(1) - v2(3) * tempb0
+    sssb(2) = 0.0
+    tempb1 = half * sssb(1)
+    v2b(3) = v2b(3) + v1(2) * tempb1 - v1(1) * tempb0
+    v1b(2) = v1b(2) + v2(3) * tempb1
+    v1b(3) = v1b(3) - v2(2) * tempb1
+    v2b(2) = v2b(2) - v1(3) * tempb1
+    p2b = 0.0
+    p3b = 0.0
+    p3b = v2b(:)
+    p2b = -v2b(:)
+    p1b = 0.0
+    p4b = 0.0
+    p4b = v1b(:)
+    p1b = -v1b(:)
+    areab = 0.0
 END SUBROUTINE QUAD_AREA_B
diff --git a/src/warping/getForces.F90 b/src/warping/getForces.F90
index a2ba31482..04106510f 100644
--- a/src/warping/getForces.F90
+++ b/src/warping/getForces.F90
@@ -1,1667 +1,1661 @@
 
 subroutine getForces(forces, npts, sps)
-  use constants
-  use communication, only : myid
-  use blockPointers, only : BCData, nDom, nBocos, BCType
-  use inputPhysics, only : forcesAsTractions
-  use utils, only : setPointers, terminate, EChk
-  use surfaceIntegrations, only : integrateSurfaces
-  use surfaceFamilies, only : fullfamList
-  use oversetData, only : zipperMeshes, zipperMesh, oversetPresent
-  use surfaceFamilies, only : familyExchange, BCFamExchange
+    use constants
+    use communication, only: myid
+    use blockPointers, only: BCData, nDom, nBocos, BCType
+    use inputPhysics, only: forcesAsTractions
+    use utils, only: setPointers, terminate, EChk
+    use surfaceIntegrations, only: integrateSurfaces
+    use surfaceFamilies, only: fullfamList
+    use oversetData, only: zipperMeshes, zipperMesh, oversetPresent
+    use surfaceFamilies, only: familyExchange, BCFamExchange
 #include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
-  integer(kind=intType), intent(in) :: npts, sps
-  real(kind=realType), intent(inout) :: forces(3,npts)
-
-  integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-  real(kind=realType) :: sss(3),v2(3),v1(3), qa, sepSensor, Cavitation
-  real(kind=realType) :: sepSensorAvg(3)
-  real(kind=realType) :: Fp(3), Fv(3), Mp(3), Mv(3), yplusmax, qf(3)
-  real(kind=realType) :: localValues(nLocalValues)
-  type(zipperMesh), pointer :: zipper
-  type(familyexchange), pointer :: exch
-  real(kind=realType), dimension(:), pointer :: localPtr
-  real(kind=realType), dimension(nCostFunction) :: funcValues
-  ! Make sure *all* forces are computed. Sectioning will be done
-  ! else-where.
-
-  domains: do nn=1,nDom
-     call setPointers(nn, 1_intType, sps)
-     localValues = zero
-     call integrateSurfaces(localValues, fullFamList)
-  end do domains
-
-  if (forcesAsTractions) then
-     ! Compute tractions if necessary
-     call computeNodalTractions(sps)
-  else
-     call computeNodalForces(sps)
-  end if
-
-  ii = 0
-  domains2: do nn=1,nDom
-     call setPointers(nn, 1_intType, sps)
-
-     ! Loop over the number of boundary subfaces of this block.
-     bocos: do mm=1, nBocos
-        if(BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal) then
-
-           ! This is easy, just copy out F or T in continuous ordering.
-           do j=BCData(mm)%jnBeg, BCData(mm)%jnEnd
-              do i=BCData(mm)%inBeg, BCData(mm)%inEnd
-                 ii = ii + 1
-                 if (forcesAsTractions) then
-                    Forces(:, ii) = bcData(mm)%Tp(i, j, :) + bcData(mm)%Tv(i, j, :)
-                 else
-                    Forces(:, ii) = bcData(mm)%F(i, j, :)
-                 end if
-              end do
-           end do
-        end if
-     end do bocos
-  end do domains2
-
-  ! We know must consider additional forces that are required by the
-  ! zipper mesh triangles on the root proc.
-
-  ! Pointer for easier reading.
-  zipper => zipperMeshes(iBCGroupWalls)
-  exch => BCFamExchange(iBCGroupWalls, sps)
-  ! No overset present or the zipper isn't allocated nothing to do:
-  if (.not. oversetPresent .or. .not. zipper%allocated) then
-     return
-  end if
-
-  if (.not. forcesAsTractions) then
-     ! We have a zipper and regular forces are requested. This is not yet supported.
-     call terminate('getForces', 'getForces() is not implmented for zipper meshes and '&
-          &'forcesAsTractions=False')
-  end if
-
-  ! Loop over each dimension individually since we have a scalar
-  ! scatter.
-  dimLoop: do iDim=1,3
-
-     call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-     ! Copy in the values we already have to the exchange.
-     ii = size(LocalPtr)
-     localPtr = forces(iDim, 1:ii)
-
-     ! Restore the pointer
-     call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-     ! Now scatter this to the zipper
-     call VecScatterBegin(zipper%scatter, exch%nodeValLocal,&
-          zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-     call VecScatterEnd(zipper%scatter, exch%nodeValLocal,&
-          zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-     ! The values we need are precisely what is in zipper%localVal
-     call vecGetArrayF90(zipper%localVal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-     ! Just copy the received data into the forces array. Just on root proc:
-     if (myid == 0) then
-        forces(iDim, ii+1:ii+size(localPtr)) = localPtr
-     end if
-
-     call vecGetArrayF90(zipper%localVal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-  end do dimLoop
+    use petsc
+    implicit none
+    integer(kind=intType), intent(in) :: npts, sps
+    real(kind=realType), intent(inout) :: forces(3, npts)
+
+    integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+    real(kind=realType) :: sss(3), v2(3), v1(3), qa, sepSensor, Cavitation
+    real(kind=realType) :: sepSensorAvg(3)
+    real(kind=realType) :: Fp(3), Fv(3), Mp(3), Mv(3), yplusmax, qf(3)
+    real(kind=realType) :: localValues(nLocalValues)
+    type(zipperMesh), pointer :: zipper
+    type(familyexchange), pointer :: exch
+    real(kind=realType), dimension(:), pointer :: localPtr
+    real(kind=realType), dimension(nCostFunction) :: funcValues
+    ! Make sure *all* forces are computed. Sectioning will be done
+    ! else-where.
+
+    domains: do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        localValues = zero
+        call integrateSurfaces(localValues, fullFamList)
+    end do domains
+
+    if (forcesAsTractions) then
+        ! Compute tractions if necessary
+        call computeNodalTractions(sps)
+    else
+        call computeNodalForces(sps)
+    end if
+
+    ii = 0
+    domains2: do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
 
-end subroutine getForces
+        ! Loop over the number of boundary subfaces of this block.
+        bocos: do mm = 1, nBocos
+            if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
+                BCType(mm) == NSWallIsothermal) then
 
-subroutine getForces_d(forces, forcesd, npts, sps)
+                ! This is easy, just copy out F or T in continuous ordering.
+                do j = BCData(mm)%jnBeg, BCData(mm)%jnEnd
+                    do i = BCData(mm)%inBeg, BCData(mm)%inEnd
+                        ii = ii + 1
+                        if (forcesAsTractions) then
+                            Forces(:, ii) = bcData(mm)%Tp(i, j, :) + bcData(mm)%Tv(i, j, :)
+                        else
+                            Forces(:, ii) = bcData(mm)%F(i, j, :)
+                        end if
+                    end do
+                end do
+            end if
+        end do bocos
+    end do domains2
 
-  ! This routine performs the forward mode linearization getForces. It
-  ! takes in perturbations defined on bcData(mm)%Fp, bcData(mm)%Fv and
-  ! bcData(mm)%area and computes either the nodal forces or nodal
-  ! tractions.
-  use constants
-  use communication, only : myid
-  use blockPointers, only : nDom, nBocos, BCData, BCType, nBocos, BCDatad
-  use inputPhysics, only : forcesAsTractions
-  use surfaceFamilies, only: BCFamExchange, familyExchange
-  use utils, only : setPointers, setPointers_d, EChk, terminate
-  use oversetData, only : zipperMeshes, zipperMesh, oversetPresent
-  use surfaceFamilies, only : familyExchange, BCFamExchange
-#include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
-  integer(kind=intType), intent(in) :: npts, sps
-  real(kind=realType), intent(out), dimension(3, npts) :: forces, forcesd
-  integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
-  real(kind=realType) :: qa, qad, qf, qfd
-  real(kind=realType), dimension(:), pointer :: localPtr, localPtrd
-  type(zipperMesh), pointer :: zipper
-  type(familyexchange), pointer :: exch
-
-  if (forcesAsTractions) then
-     call computeNodalTractions_d(sps)
-  else
-     call computeNodalForces_d(sps)
-  end if
-
-  ! Extract the values out into the output derivative array
-  ii = 0
-  domains2: do nn=1,nDom
-     call setPointers_d(nn, 1_intType, sps)
-
-     ! Loop over the number of boundary subfaces of this block.
-     bocos: do mm=1, nBocos
-        if(BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal) then
-
-           ! This is easy, just copy out F or T in continuous ordering.
-           do j=BCData(mm)%jnBeg, BCData(mm)%jnEnd
-              do i=BCData(mm)%inBeg, BCData(mm)%inEnd
-                 ii = ii + 1
-                 if (forcesAsTractions) then
-                    Forcesd(:, ii) = bcDatad(mm)%Tp(i, j, :) + bcDatad(mm)%Tv(i, j, :)
-                 else
-                    Forcesd(:, ii) = bcDatad(mm)%F(i, j, :)
-                 end if
-              end do
-           end do
-        end if
-     end do bocos
-  end do domains2
-
-  ! We know must consider additional forces that are required by the
-  ! zipper mesh triangles on the root proc.
-
-  ! Pointer for easier reading.
-  zipper => zipperMeshes(iBCGroupWalls)
-  exch => BCFamExchange(iBCGroupWalls, sps)
-  ! No overset present or the zipper isn't allocated nothing to do:
-  if (.not. oversetPresent .or. .not. zipper%allocated) then
-     return
-  end if
-
-  if (.not. forcesAsTractions) then
-     ! We have a zipper and regular forces are requested. This is not yet supported.
-     call terminate('getForces', 'getForces() is not implmented for zipper meshes and '&
-          &'forcesAsTractions=False')
-  end if
-
-  ! Loop over each dimension individually since we have a scalar
-  ! scatter.
-  dimLoop: do iDim=1,3
-
-     call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-     ! Copy in the values we already have to the exchange.
-     ii = size(LocalPtr)
-     localPtr = forcesd(iDim, 1:ii)
-
-     ! Restore the pointer
-     call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-     ! Now scatter this to the zipper
-     call VecScatterBegin(zipper%scatter, exch%nodeValLocal,&
-          zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-     call VecScatterEnd(zipper%scatter, exch%nodeValLocal,&
-          zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-     ! The values we need are precisely what is in zipper%localVal
-     call vecGetArrayF90(zipper%localVal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-     ! Just copy the received data into the forces array. Just on root proc:
-     if (myid == 0) then
-        forcesd(iDim, ii+1:ii+size(localPtr)) = localPtr
-     end if
-
-     call vecGetArrayF90(zipper%localVal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-  end do dimLoop
+    ! We know must consider additional forces that are required by the
+    ! zipper mesh triangles on the root proc.
 
-end subroutine getForces_d
+    ! Pointer for easier reading.
+    zipper => zipperMeshes(iBCGroupWalls)
+    exch => BCFamExchange(iBCGroupWalls, sps)
+    ! No overset present or the zipper isn't allocated nothing to do:
+    if (.not. oversetPresent .or. .not. zipper%allocated) then
+        return
+    end if
 
-subroutine getForces_b(forcesd, npts, sps)
+    if (.not. forcesAsTractions) then
+        ! We have a zipper and regular forces are requested. This is not yet supported.
+        call terminate('getForces', 'getForces() is not implmented for zipper meshes and '&
+             &'forcesAsTractions=False')
+    end if
 
-  ! This routine performs the reverse of getForces. It takes in
-  ! forces_b and perfroms the reverse of the nodal averaging procedure
-  ! in getForces to compute bcDatad(mm)%Fp, bcDatad(mm)%Fv and
-  ! bcDatad(mm)%area.
-  use constants
-  use communication, only : myid
-  use blockPointers, only : nDom, nBocos, BCData, BCType, nBocos, BCDatad
-  use inputPhysics, only : forcesAsTractions
-  use surfaceFamilies, only: BCFamExchange, familyExchange
-  use utils, only : EChk, setPointers, setPointers_d
-  use oversetData, only : zipperMeshes, zipperMesh, oversetPresent
-  use surfaceFamilies, only : familyExchange, BCFamExchange
-#include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
-  integer(kind=intType), intent(in) :: npts, sps
-  real(kind=realType), intent(inout) :: forcesd(3, npts)
-  integer(kind=intType) :: mm, nn, i, j, ii, iDim, ierr
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-  type(zipperMesh), pointer :: zipper
-  type(familyexchange), pointer :: exch
-  real(kind=realType), dimension(:), pointer :: localPtr
-  real(kind=realType), dimension(3, npts) :: forces
-
-  ! Run nonlinear code to make sure that all intermediate values are
-  ! updated.
-  call getForces(forces, npts, sps)
-
-  ! We know must consider additional forces that are required by the
-  ! zipper mesh triangles on the root proc.
-
-  ! Pointer for easier reading.
-  zipper => zipperMeshes(iBCGroupWalls)
-  exch => BCFamExchange(iBCGroupWalls, sps)
-  ! No overset present or the zipper isn't allocated nothing to do:
-  zipperReverse: if (oversetPresent .and. zipper%allocated) then
-
-     ! Loop over each dimension individually since we have a scalar
-     ! scatter.
-     dimLoop: do iDim=1,3
+    ! Loop over each dimension individually since we have a scalar
+    ! scatter.
+    dimLoop: do iDim = 1, 3
+
+        call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Copy in the values we already have to the exchange.
+        ii = size(LocalPtr)
+        localPtr = forces(iDim, 1:ii)
+
+        ! Restore the pointer
+        call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
+        ! Now scatter this to the zipper
+        call VecScatterBegin(zipper%scatter, exch%nodeValLocal, &
+                             zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterEnd(zipper%scatter, exch%nodeValLocal, &
+                           zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! The values we need are precisely what is in zipper%localVal
         call vecGetArrayF90(zipper%localVal, localPtr, ierr)
-        call EChk(ierr,__FILE__,__LINE__)
+        call EChk(ierr, __FILE__, __LINE__)
 
-        ii = exch%nNodes
         ! Just copy the received data into the forces array. Just on root proc:
         if (myid == 0) then
-           do i=1, size(localPtr)
-              localPtr(i) = forcesd(iDim, ii+i)
-              forcesd(iDim, ii+i) = zero
-           end do
+            forces(iDim, ii + 1:ii + size(localPtr)) = localPtr
         end if
 
         call vecGetArrayF90(zipper%localVal, localPtr, ierr)
-        call EChk(ierr,__FILE__,__LINE__)
+        call EChk(ierr, __FILE__, __LINE__)
+    end do dimLoop
+
+end subroutine getForces
+
+subroutine getForces_d(forces, forcesd, npts, sps)
 
-        ! Zero the vector we are scatting into:
-        call VecSet(exch%nodeValLocal, zero, ierr)
-        call EChk(ierr,__FILE__,__LINE__)
+    ! This routine performs the forward mode linearization getForces. It
+    ! takes in perturbations defined on bcData(mm)%Fp, bcData(mm)%Fv and
+    ! bcData(mm)%area and computes either the nodal forces or nodal
+    ! tractions.
+    use constants
+    use communication, only: myid
+    use blockPointers, only: nDom, nBocos, BCData, BCType, nBocos, BCDatad
+    use inputPhysics, only: forcesAsTractions
+    use surfaceFamilies, only: BCFamExchange, familyExchange
+    use utils, only: setPointers, setPointers_d, EChk, terminate
+    use oversetData, only: zipperMeshes, zipperMesh, oversetPresent
+    use surfaceFamilies, only: familyExchange, BCFamExchange
+#include <petsc/finclude/petsc.h>
+    use petsc
+    implicit none
+    integer(kind=intType), intent(in) :: npts, sps
+    real(kind=realType), intent(out), dimension(3, npts) :: forces, forcesd
+    integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
+    real(kind=realType) :: qa, qad, qf, qfd
+    real(kind=realType), dimension(:), pointer :: localPtr, localPtrd
+    type(zipperMesh), pointer :: zipper
+    type(familyexchange), pointer :: exch
+
+    if (forcesAsTractions) then
+        call computeNodalTractions_d(sps)
+    else
+        call computeNodalForces_d(sps)
+    end if
+
+    ! Extract the values out into the output derivative array
+    ii = 0
+    domains2: do nn = 1, nDom
+        call setPointers_d(nn, 1_intType, sps)
 
-        ! Scatter values from the root using the zipper scatter.
-        call VecScatterBegin(zipper%scatter, zipper%localVal, &
-             exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
-        call EChk(ierr,__FILE__,__LINE__)
+        ! Loop over the number of boundary subfaces of this block.
+        bocos: do mm = 1, nBocos
+            if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
+                BCType(mm) == NSWallIsothermal) then
+
+                ! This is easy, just copy out F or T in continuous ordering.
+                do j = BCData(mm)%jnBeg, BCData(mm)%jnEnd
+                    do i = BCData(mm)%inBeg, BCData(mm)%inEnd
+                        ii = ii + 1
+                        if (forcesAsTractions) then
+                            Forcesd(:, ii) = bcDatad(mm)%Tp(i, j, :) + bcDatad(mm)%Tv(i, j, :)
+                        else
+                            Forcesd(:, ii) = bcDatad(mm)%F(i, j, :)
+                        end if
+                    end do
+                end do
+            end if
+        end do bocos
+    end do domains2
 
-        call VecScatterEnd(zipper%scatter, zipper%localVal, &
-             exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
-        call EChk(ierr,__FILE__,__LINE__)
+    ! We know must consider additional forces that are required by the
+    ! zipper mesh triangles on the root proc.
+
+    ! Pointer for easier reading.
+    zipper => zipperMeshes(iBCGroupWalls)
+    exch => BCFamExchange(iBCGroupWalls, sps)
+    ! No overset present or the zipper isn't allocated nothing to do:
+    if (.not. oversetPresent .or. .not. zipper%allocated) then
+        return
+    end if
+
+    if (.not. forcesAsTractions) then
+        ! We have a zipper and regular forces are requested. This is not yet supported.
+        call terminate('getForces', 'getForces() is not implmented for zipper meshes and '&
+             &'forcesAsTractions=False')
+    end if
+
+    ! Loop over each dimension individually since we have a scalar
+    ! scatter.
+    dimLoop: do iDim = 1, 3
 
         call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-        call EChk(ierr,__FILE__,__LINE__)
+        call EChk(ierr, __FILE__, __LINE__)
 
-        ! Accumulate the scatted values onto forcesd
-        ii = size(localPtr)
-        forcesd(iDim, 1:ii) = forcesd(iDim, 1:ii) + localPtr
+        ! Copy in the values we already have to the exchange.
+        ii = size(LocalPtr)
+        localPtr = forcesd(iDim, 1:ii)
 
         ! Restore the pointer
         call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-        call EChk(ierr,__FILE__,__LINE__)
-
-     end do dimLoop
-  end if zipperReverse
-
-  ! Set the incoming derivative values
-  ii = 0
-  domains2: do nn=1,nDom
-     call setPointers_d(nn, 1_intType, sps)
-
-     ! Loop over the number of boundary subfaces of this block.
-     bocos: do mm=1, nBocos
-        if(BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal) then
-           ! This is easy, just copy out F or T in continuous ordering.
-           do j=BCData(mm)%jnBeg, BCData(mm)%jnEnd
-              do i=BCData(mm)%inBeg, BCData(mm)%inEnd
-                 ii = ii + 1
-                 if (forcesAsTractions) then
-                    bcDatad(mm)%Tp(i, j, :) = forcesd(:, ii)
-                    bcDatad(mm)%Tv(i, j, :) = forcesd(:, ii)
-                 else
-                    bcDatad(mm)%F(i, j, :) = forcesd(:, ii)
-                 end if
-              end do
-           end do
-        end if
-     end do bocos
-  end do domains2
+        call EChk(ierr, __FILE__, __LINE__)
 
-  if (.not. forcesAsTractions) then
-     ! For forces, we can accumulate the nodal seeds on the Fp and Fv
-     ! values. The area seed is zeroed.
-     call computeNodalForces_b(sps)
-  else
-     call computeNodalTractions_b(sps)
-  end if
+        ! Now scatter this to the zipper
+        call VecScatterBegin(zipper%scatter, exch%nodeValLocal, &
+                             zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-end subroutine getForces_b
+        call VecScatterEnd(zipper%scatter, exch%nodeValLocal, &
+                           zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
+        ! The values we need are precisely what is in zipper%localVal
+        call vecGetArrayF90(zipper%localVal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-subroutine surfaceCellCenterToNode(exch)
+        ! Just copy the received data into the forces array. Just on root proc:
+        if (myid == 0) then
+            forcesd(iDim, ii + 1:ii + size(localPtr)) = localPtr
+        end if
 
-  use constants
-  use blockPointers, only : BCData, nDom, nBocos, BCType
-  use surfaceFamilies, only : familyExchange
-  use utils, only : setPointers, EChk
-  use sorting, only : famInList
-#include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
-
-  type(familyExchange) :: exch
-  integer(kind=intType) ::  sps
-  integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
-  real(kind=realType) :: qv
-  real(kind=realType), dimension(:), pointer :: localPtr
-
-  ! We assume that normalization factor is already computed
-  sps = exch%sps
-  call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-  localPtr = zero
-
-  ! ii is the running counter through the pointer array.
-  ii = 0
-  do nn=1, nDom
-     call setPointers(nn, 1_intType, sps)
-     do mm=1, nBocos
-        famInclude: if (famInList(BCData(mm)%famID, exch%famList)) then
-           iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-           jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-           ni = iEnd - iBeg + 1
-           nj = jEnd - jBeg + 1
-           do j=0,nj-2
-              do i=0,ni-2
-                 ! Note: No +iBeg, and +jBeg becuase cellVal is a pointer
-                 ! and always starts at one
-                 qv = fourth * BCData(mm)%cellVal(i+1, j+1)
-                 ind(1) = ii + (j  )*ni + i + 1
-                 ind(2) = ii + (j  )*ni + i + 2
-                 ind(3) = ii + (j+1)*ni + i + 2
-                 ind(4) = ii + (j+1)*ni + i + 1
-                 do jj=1,4
-                    localPtr(ind(jj)) = localPtr(ind(jj)) + qv
-                 end do
-              end do
-           end do
-           ii = ii + ni*nj
-        end if famInclude
-     end do
-  end do
-
-  call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ! Globalize the current face based value
-  call vecSet(exch%nodeValGlobal, zero, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
-       exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
-       exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ! Now divide by the weighting. We can do this with a vecpointwisemult
-  call vecPointwiseMult(exch%nodeValGlobal, exch%nodeValGlobal, &
-       exch%sumGlobal, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ! Push back to the local values
-  call VecScatterBegin(exch%scatter, exch%nodeValGlobal, &
-       exch%nodeValLocal, INSERT_VALUES, SCATTER_REVERSE, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecScatterEnd(exch%scatter, exch%nodeValGlobal, &
-       exch%nodeValLocal, INSERT_VALUES, SCATTER_REVERSE, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ii = 0
-  do nn=1, nDom
-     call setPointers(nn, 1_intType, sps)
-     do mm=1, nBocos
-        famInclude2: if (famInList(BCData(mm)%famID, exch%famList)) then
-           iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-           jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-
-           ni = iEnd - iBeg + 1
-           nj = jEnd - jBeg + 1
-           do j=1,nj
-              do i=1,ni
-                 ! Note: No +iBeg, and +jBeg becuase cellVal is a pointer
-                 ! and always starts at one
-                 ii = ii + 1
-                 BCData(mm)%nodeVal(i, j) = localPtr(ii)
-              end do
-           end do
-        end if famInclude2
-     end do
-  end do
-
-  call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+        call vecGetArrayF90(zipper%localVal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+    end do dimLoop
 
-end subroutine surfaceCellCenterToNode
+end subroutine getForces_d
 
-subroutine computeWeighting(exch)
+subroutine getForces_b(forcesd, npts, sps)
 
-  use constants
-  use blockPointers, only : BCData, nDom, nBocos, BCType
-  use surfaceFamilies, only : familyExchange
-  use utils, only : setPointers, EChk
-  use sorting, only : famInList
+    ! This routine performs the reverse of getForces. It takes in
+    ! forces_b and perfroms the reverse of the nodal averaging procedure
+    ! in getForces to compute bcDatad(mm)%Fp, bcDatad(mm)%Fv and
+    ! bcDatad(mm)%area.
+    use constants
+    use communication, only: myid
+    use blockPointers, only: nDom, nBocos, BCData, BCType, nBocos, BCDatad
+    use inputPhysics, only: forcesAsTractions
+    use surfaceFamilies, only: BCFamExchange, familyExchange
+    use utils, only: EChk, setPointers, setPointers_d
+    use oversetData, only: zipperMeshes, zipperMesh, oversetPresent
+    use surfaceFamilies, only: familyExchange, BCFamExchange
 #include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
-  type(familyExchange) :: exch
-  integer(kind=intType) ::  sps
-  integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
-  real(kind=realType) :: qf, qa
-  real(kind=realType), dimension(:), pointer :: localPtr
-
-  sps = exch%sps
-
-  call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  localPtr = zero
-  ! ii is the running counter through the pointer array.
-  ii = 0
-  do nn=1, nDom
-     call setPointers(nn, 1_intType, sps)
-     do mm=1, nBocos
-        famInclude: if (famInList(BCData(mm)%famID, exch%famList)) then
-           iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-           jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-           ni = iEnd - iBeg + 1
-           nj = jEnd - jBeg + 1
-           do j=0,nj-2
-              do i=0,ni-2
-
-                 ! Scatter a quarter of the face value to each node:
-                 ! Note: No +iBeg, and +jBeg becuase cellVal is a pointer
-                 ! and always starts at one
-                 qa = fourth*BCData(mm)%cellVal(i+1, j+1)
-                 ind(1) = ii + (j  )*ni + i + 1
-                 ind(2) = ii + (j  )*ni + i + 2
-                 ind(3) = ii + (j+1)*ni + i + 2
-                 ind(4) = ii + (j+1)*ni + i + 1
-                 do jj=1,4
-                    localPtr(ind(jj)) = localPtr(ind(jj)) + qa
-                 end do
-              end do
-           end do
-           ii = ii + ni*nj
-        end if famInclude
-     end do
-  end do
-
-  call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ! Globalize the face value
-  call vecSet(exch%sumGlobal, zero, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
-       exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
-       exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ! Now compute the inverse of the weighting so that we can multiply
-  ! instead of dividing. Note that we check dividing by zero and just
-  ! set those to zero.
-
-  call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-  do i=1, size(localPtr)
-     if (localPtr(i) == zero) then
-        localPtr(i) = zero
-     else
-        localPtr(i) = one/localPtr(i)
-     end if
-  end do
-
-  call vecRestoreArrayF90(exch%sumGlobal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    use petsc
+    implicit none
+    integer(kind=intType), intent(in) :: npts, sps
+    real(kind=realType), intent(inout) :: forcesd(3, npts)
+    integer(kind=intType) :: mm, nn, i, j, ii, iDim, ierr
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+    type(zipperMesh), pointer :: zipper
+    type(familyexchange), pointer :: exch
+    real(kind=realType), dimension(:), pointer :: localPtr
+    real(kind=realType), dimension(3, npts) :: forces
+
+    ! Run nonlinear code to make sure that all intermediate values are
+    ! updated.
+    call getForces(forces, npts, sps)
+
+    ! We know must consider additional forces that are required by the
+    ! zipper mesh triangles on the root proc.
+
+    ! Pointer for easier reading.
+    zipper => zipperMeshes(iBCGroupWalls)
+    exch => BCFamExchange(iBCGroupWalls, sps)
+    ! No overset present or the zipper isn't allocated nothing to do:
+    zipperReverse: if (oversetPresent .and. zipper%allocated) then
+
+        ! Loop over each dimension individually since we have a scalar
+        ! scatter.
+        dimLoop: do iDim = 1, 3
+
+            call vecGetArrayF90(zipper%localVal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ii = exch%nNodes
+            ! Just copy the received data into the forces array. Just on root proc:
+            if (myid == 0) then
+                do i = 1, size(localPtr)
+                    localPtr(i) = forcesd(iDim, ii + i)
+                    forcesd(iDim, ii + i) = zero
+                end do
+            end if
+
+            call vecGetArrayF90(zipper%localVal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Zero the vector we are scatting into:
+            call VecSet(exch%nodeValLocal, zero, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Scatter values from the root using the zipper scatter.
+            call VecScatterBegin(zipper%scatter, zipper%localVal, &
+                                 exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call VecScatterEnd(zipper%scatter, zipper%localVal, &
+                               exch%nodeValLocal, ADD_VALUES, SCATTER_REVERSE, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+            ! Accumulate the scatted values onto forcesd
+            ii = size(localPtr)
+            forcesd(iDim, 1:ii) = forcesd(iDim, 1:ii) + localPtr
+
+            ! Restore the pointer
+            call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+            call EChk(ierr, __FILE__, __LINE__)
+
+        end do dimLoop
+    end if zipperReverse
+
+    ! Set the incoming derivative values
+    ii = 0
+    domains2: do nn = 1, nDom
+        call setPointers_d(nn, 1_intType, sps)
 
-end subroutine computeWeighting
+        ! Loop over the number of boundary subfaces of this block.
+        bocos: do mm = 1, nBocos
+            if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
+                BCType(mm) == NSWallIsothermal) then
+                ! This is easy, just copy out F or T in continuous ordering.
+                do j = BCData(mm)%jnBeg, BCData(mm)%jnEnd
+                    do i = BCData(mm)%inBeg, BCData(mm)%inEnd
+                        ii = ii + 1
+                        if (forcesAsTractions) then
+                            bcDatad(mm)%Tp(i, j, :) = forcesd(:, ii)
+                            bcDatad(mm)%Tv(i, j, :) = forcesd(:, ii)
+                        else
+                            bcDatad(mm)%F(i, j, :) = forcesd(:, ii)
+                        end if
+                    end do
+                end do
+            end if
+        end do bocos
+    end do domains2
 
-subroutine computeNodalTractions(sps)
-  use constants
-  use blockPointers, only : BCData, nDom, nBocos, BCType
-  use surfaceFamilies, only : BCFamExchange, familyExchange
-  use utils, only : setPointers, EChk, isWallType
-  implicit none
-
-  integer(kind=intType), intent(in) ::  sps
-  integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
-  real(kind=realType) :: qf, qa
-  real(kind=realType), dimension(:), pointer :: localPtr
-  type(familyExchange), pointer :: exch
-
-  ! Set the pointer to the wall exchange:
-  exch => BCfamExchange(iBCGroupWalls, sps)
-
-  ! Set the weighting factors. In this case, area
-  ii = 0
-  do nn=1, nDom
-     call setPointers(nn, 1_intType, sps)
-     do mm=1, nBocos
-        iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-        jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-
-        bocoType1: if(isWallType(BCType(mm))) then
-           BCData(mm)%cellVal => BCData(mm)%area(:, :)
-        end if bocoType1
-     end do
-  end do
-  call computeWeighting(exch)
-
-  FpFvLoop: do iDim=1, 6
-     ! ii is the running counter through the pointer array.
-     ii = 0
-     do nn=1, nDom
+    if (.not. forcesAsTractions) then
+        ! For forces, we can accumulate the nodal seeds on the Fp and Fv
+        ! values. The area seed is zeroed.
+        call computeNodalForces_b(sps)
+    else
+        call computeNodalTractions_b(sps)
+    end if
+
+end subroutine getForces_b
+
+subroutine surfaceCellCenterToNode(exch)
+
+    use constants
+    use blockPointers, only: BCData, nDom, nBocos, BCType
+    use surfaceFamilies, only: familyExchange
+    use utils, only: setPointers, EChk
+    use sorting, only: famInList
+#include <petsc/finclude/petsc.h>
+    use petsc
+    implicit none
+
+    type(familyExchange) :: exch
+    integer(kind=intType) :: sps
+    integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
+    real(kind=realType) :: qv
+    real(kind=realType), dimension(:), pointer :: localPtr
+
+    ! We assume that normalization factor is already computed
+    sps = exch%sps
+    call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+    localPtr = zero
+
+    ! ii is the running counter through the pointer array.
+    ii = 0
+    do nn = 1, nDom
         call setPointers(nn, 1_intType, sps)
-        do mm=1, nBocos
-           bocoType2: if(isWallType(BCType(mm))) then
-              if (iDim <= 3) then
-                 BCData(mm)%cellVal => BCData(mm)%Fp(:, :, iDim)
-                 BCData(mm)%nodeVal => BCData(mm)%Tp(:, :, iDim)
-              else
-                 BCData(mm)%cellVal => BCData(mm)%Fv(:, :, iDim-3)
-                 BCData(mm)%nodeVal => BCData(mm)%Tv(:, :, iDim-3)
-              end if
-           end if bocoType2
+        do mm = 1, nBocos
+            famInclude: if (famInList(BCData(mm)%famID, exch%famList)) then
+                iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                ni = iEnd - iBeg + 1
+                nj = jEnd - jBeg + 1
+                do j = 0, nj - 2
+                    do i = 0, ni - 2
+                        ! Note: No +iBeg, and +jBeg becuase cellVal is a pointer
+                        ! and always starts at one
+                        qv = fourth * BCData(mm)%cellVal(i + 1, j + 1)
+                        ind(1) = ii + (j) * ni + i + 1
+                        ind(2) = ii + (j) * ni + i + 2
+                        ind(3) = ii + (j + 1) * ni + i + 2
+                        ind(4) = ii + (j + 1) * ni + i + 1
+                        do jj = 1, 4
+                            localPtr(ind(jj)) = localPtr(ind(jj)) + qv
+                        end do
+                    end do
+                end do
+                ii = ii + ni * nj
+            end if famInclude
         end do
-     end do
+    end do
 
-     call surfaceCellCenterToNode(exch)
+    call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  end do FpFVLoop
+    ! Globalize the current face based value
+    call vecSet(exch%nodeValGlobal, zero, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-end subroutine computeNodalTractions
+    call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
+                         exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-subroutine computeNodalTractions_d(sps)
+    call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
+                       exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  ! Forward mode lineariation of nodal tractions
+    ! Now divide by the weighting. We can do this with a vecpointwisemult
+    call vecPointwiseMult(exch%nodeValGlobal, exch%nodeValGlobal, &
+                          exch%sumGlobal, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  use constants
-  use blockPointers, only : nDom, nBocos, BCData, BCType, nBocos, BCDatad
-  use inputPhysics, only : forcesAsTractions
-  use surfaceFamilies, only: BCFamExchange, familyExchange
-  use utils, only : setPointers, setPointers_d, EChk
+    ! Push back to the local values
+    call VecScatterBegin(exch%scatter, exch%nodeValGlobal, &
+                         exch%nodeValLocal, INSERT_VALUES, SCATTER_REVERSE, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call VecScatterEnd(exch%scatter, exch%nodeValGlobal, &
+                       exch%nodeValLocal, INSERT_VALUES, SCATTER_REVERSE, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    ii = 0
+    do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        do mm = 1, nBocos
+            famInclude2: if (famInList(BCData(mm)%famID, exch%famList)) then
+                iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+
+                ni = iEnd - iBeg + 1
+                nj = jEnd - jBeg + 1
+                do j = 1, nj
+                    do i = 1, ni
+                        ! Note: No +iBeg, and +jBeg becuase cellVal is a pointer
+                        ! and always starts at one
+                        ii = ii + 1
+                        BCData(mm)%nodeVal(i, j) = localPtr(ii)
+                    end do
+                end do
+            end if famInclude2
+        end do
+    end do
+
+    call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+end subroutine surfaceCellCenterToNode
+
+subroutine computeWeighting(exch)
+
+    use constants
+    use blockPointers, only: BCData, nDom, nBocos, BCType
+    use surfaceFamilies, only: familyExchange
+    use utils, only: setPointers, EChk
+    use sorting, only: famInList
 #include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
-  integer(kind=intType), intent(in) :: sps
-  integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
-  real(kind=realType) :: qa, qad, qf, qfd
-  real(kind=realType), dimension(:), pointer :: localPtr, localPtrd
-  type(familyExchange), pointer :: exch
-  Vec nodeValLocald, nodeValGlobald, sumGlobald, tmp
-
-  exch => BCFamExchange(iBCGroupWalls, sps)
-
-  call VecDuplicate(exch%nodeValLocal, nodeValLocald, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecDuplicate(exch%nodeValGlobal, nodeValGlobald, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecDuplicate(exch%sumGlobal, sumGlobald, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecDuplicate(exch%sumGlobal, tmp, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call vecGetArrayF90(nodeValLocald, localPtrd, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  localPtrd = zero
-  localPtr = zero
-  ! ii is the running counter through the pointer array.
-  ii = 0
-  do nn=1, nDom
-     call setPointers_d(nn, 1_intType, sps)
-     do mm=1, nBocos
-        iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-        jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-        ni = iEnd - iBeg + 1
-        nj = jEnd - jBeg + 1
-
-        if(BCType(mm) == EulerWall .or. &
-             BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal) then
-
-           do j=0,nj-2
-              do i=0,ni-2
-
-                 ! Scatter a quarter of the area to each node:
-                 qa = fourth*BCData(mm)%area(i+iBeg+1, j+jBeg+1)
-                 qad = fourth*BCDatad(mm)%area(i+iBeg+1, j+jBeg+1)
-                 ind(1) = ii + (j  )*ni + i + 1
-                 ind(2) = ii + (j  )*ni + i + 2
-                 ind(3) = ii + (j+1)*ni + i + 2
-                 ind(4) = ii + (j+1)*ni + i + 1
-                 do jj=1,4
-                    localPtrd(ind(jj)) = localPtrd(ind(jj)) + qad
-                    localPtr(ind(jj)) = localPtr(ind(jj)) + qa
-                 end do
-              end do
-           end do
-           ii = ii + ni*nj
+    use petsc
+    implicit none
+    type(familyExchange) :: exch
+    integer(kind=intType) :: sps
+    integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
+    real(kind=realType) :: qf, qa
+    real(kind=realType), dimension(:), pointer :: localPtr
+
+    sps = exch%sps
+
+    call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    localPtr = zero
+    ! ii is the running counter through the pointer array.
+    ii = 0
+    do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        do mm = 1, nBocos
+            famInclude: if (famInList(BCData(mm)%famID, exch%famList)) then
+                iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                ni = iEnd - iBeg + 1
+                nj = jEnd - jBeg + 1
+                do j = 0, nj - 2
+                    do i = 0, ni - 2
+
+                        ! Scatter a quarter of the face value to each node:
+                        ! Note: No +iBeg, and +jBeg becuase cellVal is a pointer
+                        ! and always starts at one
+                        qa = fourth * BCData(mm)%cellVal(i + 1, j + 1)
+                        ind(1) = ii + (j) * ni + i + 1
+                        ind(2) = ii + (j) * ni + i + 2
+                        ind(3) = ii + (j + 1) * ni + i + 2
+                        ind(4) = ii + (j + 1) * ni + i + 1
+                        do jj = 1, 4
+                            localPtr(ind(jj)) = localPtr(ind(jj)) + qa
+                        end do
+                    end do
+                end do
+                ii = ii + ni * nj
+            end if famInclude
+        end do
+    end do
+
+    call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    ! Globalize the face value
+    call vecSet(exch%sumGlobal, zero, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
+                         exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
+                       exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    ! Now compute the inverse of the weighting so that we can multiply
+    ! instead of dividing. Note that we check dividing by zero and just
+    ! set those to zero.
+
+    call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+    do i = 1, size(localPtr)
+        if (localPtr(i) == zero) then
+            localPtr(i) = zero
+        else
+            localPtr(i) = one / localPtr(i)
         end if
-     end do
-  end do
-  call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    end do
 
-  call vecRestoreArrayF90(nodeValLocald, localPtrd, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call vecRestoreArrayF90(exch%sumGlobal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  ! Globalize the area
-  call vecSet(exch%sumGlobal, zero, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+end subroutine computeWeighting
 
-  call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
-       exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+subroutine computeNodalTractions(sps)
+    use constants
+    use blockPointers, only: BCData, nDom, nBocos, BCType
+    use surfaceFamilies, only: BCFamExchange, familyExchange
+    use utils, only: setPointers, EChk, isWallType
+    implicit none
+
+    integer(kind=intType), intent(in) :: sps
+    integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
+    real(kind=realType) :: qf, qa
+    real(kind=realType), dimension(:), pointer :: localPtr
+    type(familyExchange), pointer :: exch
+
+    ! Set the pointer to the wall exchange:
+    exch => BCfamExchange(iBCGroupWalls, sps)
+
+    ! Set the weighting factors. In this case, area
+    ii = 0
+    do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        do mm = 1, nBocos
+            iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+            jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
 
-  call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
-       exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+            bocoType1: if (isWallType(BCType(mm))) then
+                BCData(mm)%cellVal => BCData(mm)%area(:, :)
+            end if bocoType1
+        end do
+    end do
+    call computeWeighting(exch)
+
+    FpFvLoop: do iDim = 1, 6
+        ! ii is the running counter through the pointer array.
+        ii = 0
+        do nn = 1, nDom
+            call setPointers(nn, 1_intType, sps)
+            do mm = 1, nBocos
+                bocoType2: if (isWallType(BCType(mm))) then
+                    if (iDim <= 3) then
+                        BCData(mm)%cellVal => BCData(mm)%Fp(:, :, iDim)
+                        BCData(mm)%nodeVal => BCData(mm)%Tp(:, :, iDim)
+                    else
+                        BCData(mm)%cellVal => BCData(mm)%Fv(:, :, iDim - 3)
+                        BCData(mm)%nodeVal => BCData(mm)%Tv(:, :, iDim - 3)
+                    end if
+                end if bocoType2
+            end do
+        end do
 
-  ! Globalize the area derivative
-  call vecSet(sumGlobald, zero, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+        call surfaceCellCenterToNode(exch)
 
-  call VecScatterBegin(exch%scatter, nodeValLocald, &
-       sumGlobald, ADD_VALUES, SCATTER_FORWARD, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    end do FpFVLoop
 
-  call VecScatterEnd(exch%scatter, nodeValLocald, &
-       sumGlobald, ADD_VALUES, SCATTER_FORWARD, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+end subroutine computeNodalTractions
 
-  ! Now compute the inverse of the weighting so that we can multiply
-  ! instead of dividing. Here we need the original value too:
+subroutine computeNodalTractions_d(sps)
 
-  call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    ! Forward mode lineariation of nodal tractions
 
-  call vecGetArrayF90(sumGlobald, localPtrd, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    use constants
+    use blockPointers, only: nDom, nBocos, BCData, BCType, nBocos, BCDatad
+    use inputPhysics, only: forcesAsTractions
+    use surfaceFamilies, only: BCFamExchange, familyExchange
+    use utils, only: setPointers, setPointers_d, EChk
+#include <petsc/finclude/petsc.h>
+    use petsc
+    implicit none
+    integer(kind=intType), intent(in) :: sps
+    integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
+    real(kind=realType) :: qa, qad, qf, qfd
+    real(kind=realType), dimension(:), pointer :: localPtr, localPtrd
+    type(familyExchange), pointer :: exch
+    Vec nodeValLocald, nodeValGlobald, sumGlobald, tmp
 
-  localPtrd = -(localPtrd/localPtr**2)
-  localPtr = one/localPtr
+    exch => BCFamExchange(iBCGroupWalls, sps)
 
-  call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDuplicate(exch%nodeValLocal, nodeValLocald, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  call vecRestoreArrayF90(sumGlobald, localPtrd, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDuplicate(exch%nodeValGlobal, nodeValGlobald, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  ! Now do each of the three dimensions for the pressure and viscous forces
-  dimLoop: do iDim=1, 6
+    call VecDuplicate(exch%sumGlobal, sumGlobald, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call VecDuplicate(exch%sumGlobal, tmp, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call vecGetArrayF90(nodeValLocald, localPtrd, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     localPtr  = zero
-     localPtrd = zero
+    call vecGetArrayF90(nodeValLocald, localPtrd, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! ii is the running counter through the pointer array.
-     ii = 0
-     do nn=1, nDom
+    localPtrd = zero
+    localPtr = zero
+    ! ii is the running counter through the pointer array.
+    ii = 0
+    do nn = 1, nDom
         call setPointers_d(nn, 1_intType, sps)
-        do mm=1, nBocos
-           iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-           jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-           ni = iEnd - iBeg + 1
-           nj = jEnd - jBeg + 1
-           if(BCType(mm) == EulerWall .or. &
+        do mm = 1, nBocos
+            iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+            jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+            ni = iEnd - iBeg + 1
+            nj = jEnd - jBeg + 1
+
+            if (BCType(mm) == EulerWall .or. &
                 BCType(mm) == NSWallAdiabatic .or. &
                 BCType(mm) == NSWallIsothermal) then
-              do j=0,nj-2
-                 do i=0,ni-2
-                    if (iDim <= 3) then
-                       qf  = fourth*BCData (mm)%Fp(i+iBeg+1, j+jBeg+1, iDim)
-                       qfd = fourth*BCDatad(mm)%Fp(i+iBeg+1, j+jBeg+1, iDim)
-                    else
-                       qf  = fourth*BCData (mm)%Fv(i+iBeg+1, j+jBeg+1, iDim-3)
-                       qfd = fourth*BCDatad(mm)%Fv(i+iBeg+1, j+jBeg+1, iDim-3)
-                    end if
 
-                    ind(1) = ii + (j  )*ni + i + 1
-                    ind(2) = ii + (j  )*ni + i + 2
-                    ind(3) = ii + (j+1)*ni + i + 2
-                    ind(4) = ii + (j+1)*ni + i + 1
-                    do jj=1,4
-                       localPtr (ind(jj)) = localPtr (ind(jj)) + qf
-                       localPtrd(ind(jj)) = localPtrd(ind(jj)) + qfd
+                do j = 0, nj - 2
+                    do i = 0, ni - 2
+
+                        ! Scatter a quarter of the area to each node:
+                        qa = fourth * BCData(mm)%area(i + iBeg + 1, j + jBeg + 1)
+                        qad = fourth * BCDatad(mm)%area(i + iBeg + 1, j + jBeg + 1)
+                        ind(1) = ii + (j) * ni + i + 1
+                        ind(2) = ii + (j) * ni + i + 2
+                        ind(3) = ii + (j + 1) * ni + i + 2
+                        ind(4) = ii + (j + 1) * ni + i + 1
+                        do jj = 1, 4
+                            localPtrd(ind(jj)) = localPtrd(ind(jj)) + qad
+                            localPtr(ind(jj)) = localPtr(ind(jj)) + qa
+                        end do
                     end do
-                 end do
-              end do
-              ii = ii + ni*nj
-           end if
+                end do
+                ii = ii + ni * nj
+            end if
         end do
-     end do
+    end do
+    call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call vecRestoreArrayF90(nodeValLocald, localPtrd, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call vecRestoreArrayF90(nodeValLocald, localPtrd, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    ! Globalize the area
+    call vecSet(exch%sumGlobal, zero, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! Globalize the current force
-     call vecSet(exch%nodeValGlobal, zero, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
+                         exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
-          exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
+                       exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
-          exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    ! Globalize the area derivative
+    call vecSet(sumGlobald, zero, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! Globalize the current force derivative
-     call vecSet(nodeValGlobald, zero, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call VecScatterBegin(exch%scatter, nodeValLocald, &
+                         sumGlobald, ADD_VALUES, SCATTER_FORWARD, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call VecScatterBegin(exch%scatter, nodeValLocald, &
-          nodeValGlobald, ADD_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call VecScatterEnd(exch%scatter, nodeValLocald, &
+                       sumGlobald, ADD_VALUES, SCATTER_FORWARD, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call VecScatterEnd(exch%scatter, nodeValLocald, &
-          nodeValGlobald, ADD_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    ! Now compute the inverse of the weighting so that we can multiply
+    ! instead of dividing. Here we need the original value too:
 
-     ! The product rule here: (since we are multiplying)
-     ! nodeValGlobal = nodeValGlobal * invArea
-     ! nodeValGlobald = nodeValGlobald*invArea + nodeValGlobal*invAread
+    call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! First term:  nodeValGlobald = nodeValGlobald*invArea
-     call vecPointwiseMult(nodeValGlobald, nodeValGlobald, &
-          exch%sumGlobal, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call vecGetArrayF90(sumGlobald, localPtrd, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! Second term:, tmp = nodeValGlobal*invAread
-     call vecPointwiseMult(tmp, exch%nodeValGlobal, sumGlobald, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    localPtrd = -(localPtrd / localPtr**2)
+    localPtr = one / localPtr
 
-     ! Sum the second term into the first
-     call VecAXPY(nodeValGlobald, one, tmp, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! Push back to the local values
-     call VecScatterBegin(exch%scatter, nodeValGlobald, &
-          nodeValLocald, INSERT_VALUES, SCATTER_REVERSE, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call vecRestoreArrayF90(sumGlobald, localPtrd, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call VecScatterEnd(exch%scatter, nodeValGlobald, &
-          nodeValLocald, INSERT_VALUES, SCATTER_REVERSE, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    ! Now do each of the three dimensions for the pressure and viscous forces
+    dimLoop: do iDim = 1, 6
 
-     call vecGetArrayF90(nodeValLocald, localPtrd, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-     ii = 0
-     do nn=1, nDom
-        call setPointers_d(nn, 1_intType, sps)
-        do mm=1, nBocos
-           iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-           jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
+        call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call vecGetArrayF90(nodeValLocald, localPtrd, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        localPtr = zero
+        localPtrd = zero
+
+        ! ii is the running counter through the pointer array.
+        ii = 0
+        do nn = 1, nDom
+            call setPointers_d(nn, 1_intType, sps)
+            do mm = 1, nBocos
+                iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                ni = iEnd - iBeg + 1
+                nj = jEnd - jBeg + 1
+                if (BCType(mm) == EulerWall .or. &
+                    BCType(mm) == NSWallAdiabatic .or. &
+                    BCType(mm) == NSWallIsothermal) then
+                    do j = 0, nj - 2
+                        do i = 0, ni - 2
+                            if (iDim <= 3) then
+                                qf = fourth * BCData(mm)%Fp(i + iBeg + 1, j + jBeg + 1, iDim)
+                                qfd = fourth * BCDatad(mm)%Fp(i + iBeg + 1, j + jBeg + 1, iDim)
+                            else
+                                qf = fourth * BCData(mm)%Fv(i + iBeg + 1, j + jBeg + 1, iDim - 3)
+                                qfd = fourth * BCDatad(mm)%Fv(i + iBeg + 1, j + jBeg + 1, iDim - 3)
+                            end if
+
+                            ind(1) = ii + (j) * ni + i + 1
+                            ind(2) = ii + (j) * ni + i + 2
+                            ind(3) = ii + (j + 1) * ni + i + 2
+                            ind(4) = ii + (j + 1) * ni + i + 1
+                            do jj = 1, 4
+                                localPtr(ind(jj)) = localPtr(ind(jj)) + qf
+                                localPtrd(ind(jj)) = localPtrd(ind(jj)) + qfd
+                            end do
+                        end do
+                    end do
+                    ii = ii + ni * nj
+                end if
+            end do
+        end do
 
-           if(BCType(mm) == EulerWall .or. &
-                BCType(mm) == NSWallAdiabatic .or. &
-                BCType(mm) == NSWallIsothermal) then
-              do j=jBeg, jEnd
-                 do i=iBeg, iEnd
-                    ii = ii + 1
-                    if (iDim <= 3) then
-                       bcDatad(mm)%Tp(i, j, iDim) = localPtrd(ii)
-                    else
-                       bcDatad(mm)%Tv(i, j, iDim-3) = localPtrd(ii)
-                    end if
-                 end do
-              end do
-           end if
+        call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call vecRestoreArrayF90(nodeValLocald, localPtrd, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Globalize the current force
+        call vecSet(exch%nodeValGlobal, zero, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
+                             exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
+                           exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Globalize the current force derivative
+        call vecSet(nodeValGlobald, zero, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterBegin(exch%scatter, nodeValLocald, &
+                             nodeValGlobald, ADD_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterEnd(exch%scatter, nodeValLocald, &
+                           nodeValGlobald, ADD_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! The product rule here: (since we are multiplying)
+        ! nodeValGlobal = nodeValGlobal * invArea
+        ! nodeValGlobald = nodeValGlobald*invArea + nodeValGlobal*invAread
+
+        ! First term:  nodeValGlobald = nodeValGlobald*invArea
+        call vecPointwiseMult(nodeValGlobald, nodeValGlobald, &
+                              exch%sumGlobal, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Second term:, tmp = nodeValGlobal*invAread
+        call vecPointwiseMult(tmp, exch%nodeValGlobal, sumGlobald, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Sum the second term into the first
+        call VecAXPY(nodeValGlobald, one, tmp, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Push back to the local values
+        call VecScatterBegin(exch%scatter, nodeValGlobald, &
+                             nodeValLocald, INSERT_VALUES, SCATTER_REVERSE, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterEnd(exch%scatter, nodeValGlobald, &
+                           nodeValLocald, INSERT_VALUES, SCATTER_REVERSE, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call vecGetArrayF90(nodeValLocald, localPtrd, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ii = 0
+        do nn = 1, nDom
+            call setPointers_d(nn, 1_intType, sps)
+            do mm = 1, nBocos
+                iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+
+                if (BCType(mm) == EulerWall .or. &
+                    BCType(mm) == NSWallAdiabatic .or. &
+                    BCType(mm) == NSWallIsothermal) then
+                    do j = jBeg, jEnd
+                        do i = iBeg, iEnd
+                            ii = ii + 1
+                            if (iDim <= 3) then
+                                bcDatad(mm)%Tp(i, j, iDim) = localPtrd(ii)
+                            else
+                                bcDatad(mm)%Tv(i, j, iDim - 3) = localPtrd(ii)
+                            end if
+                        end do
+                    end do
+                end if
+            end do
         end do
-     end do
 
-     call vecRestoreArrayF90(nodeValLocald, localPtrd, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+        call vecRestoreArrayF90(nodeValLocald, localPtrd, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-  end do dimLoop
+    end do dimLoop
 
-  call VecDestroy(nodeValLocald, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDestroy(nodeValLocald, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  call VecDestroy(nodeValGlobald, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDestroy(nodeValGlobald, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  call VecDestroy(sumGlobald, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDestroy(sumGlobald, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  call VecDestroy(tmp, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDestroy(tmp, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
 end subroutine computeNodalTractions_d
 
 subroutine computeNodalTractions_b(sps)
 
-  ! This routine performs the reverse of computeNodalTractions. Tt
-  ! takes in bcDatad%Tv and bcDatad%Tp and perfroms the reverse of the
-  ! nodal averaging procedure in getForces to compute bcDatad(mm)%Fp,
-  ! bcDatad(mm)%Fv and bcDatad(mm)%area.
-
-  use constants
-  use blockPointers, only : nDom, nBocos, BCData, BCType, nBocos, BCDatad
-  use inputPhysics, only : forcesAsTractions
-  use surfaceFamilies, only: BCFamExchange, familyExchange
-  use communication
-  use utils, only : EChk, setPointers, setPointers_d
+    ! This routine performs the reverse of computeNodalTractions. Tt
+    ! takes in bcDatad%Tv and bcDatad%Tp and perfroms the reverse of the
+    ! nodal averaging procedure in getForces to compute bcDatad(mm)%Fp,
+    ! bcDatad(mm)%Fv and bcDatad(mm)%area.
+
+    use constants
+    use blockPointers, only: nDom, nBocos, BCData, BCType, nBocos, BCDatad
+    use inputPhysics, only: forcesAsTractions
+    use surfaceFamilies, only: BCFamExchange, familyExchange
+    use communication
+    use utils, only: EChk, setPointers, setPointers_d
 #include <petsc/finclude/petsc.h>
-  use petsc
-  implicit none
-
-  integer(kind=intType), intent(in) :: sps
-  integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
-  real(kind=realType) :: qf_b, qf, qa, qa_b
-  real(kind=realType), dimension(:), pointer :: localPtr, localPtr_b
-  type(familyExchange), pointer :: exch
-  Vec nodeValLocal_b, nodeValGlobal_b, sumGlobal_b, tmp, tmp_b, T_b
-
-  ! For better readibility
-  exch => BCFamExchange(iBCGroupWalls, sps)
-
-  call VecDuplicate(exch%nodeValLocal, nodeValLocal_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecDuplicate(exch%nodeValGlobal, nodeValGlobal_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecDuplicate(exch%sumGlobal, sumGlobal_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecDuplicate(exch%sumGlobal, tmp, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecDuplicate(tmp, T_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ! For tractions it's (a lot) more difficult becuase we have to do
-  ! the scatter/gather operation.
-
-  ! ==================================
-  !  Recompute the dual area
-  ! ==================================
-
-  call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  localPtr = zero
-  ! ii is the running counter through the pointer array.
-  ii = 0
-  do nn=1, nDom
-     call setPointers(nn, 1_intType, sps)
-     do mm=1, nBocos
-        iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-        jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-        ni = iEnd - iBeg + 1
-        nj = jEnd - jBeg + 1
-
-        if(BCType(mm) == EulerWall .or. &
-             BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal) then
-           do j=0,nj-2
-              do i=0,ni-2
-
-                 ! Scatter a quarter of the area to each node:
-                 qa = fourth*BCData(mm)%area(i+iBeg+1, j+jBeg+1)
-                 ind(1) = ii + (j  )*ni + i + 1
-                 ind(2) = ii + (j  )*ni + i + 2
-                 ind(3) = ii + (j+1)*ni + i + 2
-                 ind(4) = ii + (j+1)*ni + i + 1
-                 do jj=1,4
-                    localPtr(ind(jj)) = localPtr(ind(jj)) + qa
-                 end do
-              end do
-           end do
-           ii = ii + ni*nj
-        end if
-     end do
-  end do
+    use petsc
+    implicit none
 
-  call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    integer(kind=intType), intent(in) :: sps
+    integer(kind=intType) :: mm, nn, i, j, ii, jj, iDim, ierr
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd, ind(4), ni, nj
+    real(kind=realType) :: qf_b, qf, qa, qa_b
+    real(kind=realType), dimension(:), pointer :: localPtr, localPtr_b
+    type(familyExchange), pointer :: exch
+    Vec nodeValLocal_b, nodeValGlobal_b, sumGlobal_b, tmp, tmp_b, T_b
 
-  ! Globalize the area
-  call vecSet(exch%sumGlobal, zero, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    ! For better readibility
+    exch => BCFamExchange(iBCGroupWalls, sps)
 
-  call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
-       exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDuplicate(exch%nodeValLocal, nodeValLocal_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
-       exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDuplicate(exch%nodeValGlobal, nodeValGlobal_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  ! Now compute the inverse of the weighting so that we can multiply
-  ! instead of dividing.
+    call VecDuplicate(exch%sumGlobal, sumGlobal_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDuplicate(exch%sumGlobal, tmp, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  localPtr = one/localPtr
+    call VecDuplicate(tmp, T_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  call vecRestoreArrayF90(exch%sumGlobal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    ! For tractions it's (a lot) more difficult becuase we have to do
+    ! the scatter/gather operation.
 
-  ! ==================================
-  ! Now trace through the computeNodalTractions() routine
-  ! backwards. All the scatters flip direction and INSERT_VALUES
-  ! becomes ADD_VALUES and vice-versa
-  ! ==================================
-  dimLoop: do iDim=1, 6
+    ! ==================================
+    !  Recompute the dual area
+    ! ==================================
 
-     ! ====================
-     ! Do the forward pass:
-     ! ====================
-     call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     localPtr = zero
-     ! ii is the running counter through the pointer array.
-     ii = 0
-     do nn=1, nDom
+    localPtr = zero
+    ! ii is the running counter through the pointer array.
+    ii = 0
+    do nn = 1, nDom
         call setPointers(nn, 1_intType, sps)
-        do mm=1, nBocos
-           iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-           jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-           ni = iEnd - iBeg + 1
-           nj = jEnd - jBeg + 1
-           if(BCType(mm) == EulerWall .or. &
+        do mm = 1, nBocos
+            iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+            jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+            ni = iEnd - iBeg + 1
+            nj = jEnd - jBeg + 1
+
+            if (BCType(mm) == EulerWall .or. &
                 BCType(mm) == NSWallAdiabatic .or. &
                 BCType(mm) == NSWallIsothermal) then
-              do j=0,nj-2
-                 do i=0,ni-2
-                    if (iDim <= 3) then
-                       qf = fourth*BCData(mm)%Fp(i+iBeg+1, j+jBeg+1, iDim)
-                    else
-                       qf = fourth*BCData(mm)%Fv(i+iBeg+1, j+jBeg+1, iDim-3)
-                    end if
-
-                    ind(1) = ii + (j  )*ni + i + 1
-                    ind(2) = ii + (j  )*ni + i + 2
-                    ind(3) = ii + (j+1)*ni + i + 2
-                    ind(4) = ii + (j+1)*ni + i + 1
-                    do jj=1,4
-                       localPtr(ind(jj)) = localPtr(ind(jj)) + qf
+                do j = 0, nj - 2
+                    do i = 0, ni - 2
+
+                        ! Scatter a quarter of the area to each node:
+                        qa = fourth * BCData(mm)%area(i + iBeg + 1, j + jBeg + 1)
+                        ind(1) = ii + (j) * ni + i + 1
+                        ind(2) = ii + (j) * ni + i + 2
+                        ind(3) = ii + (j + 1) * ni + i + 2
+                        ind(4) = ii + (j + 1) * ni + i + 1
+                        do jj = 1, 4
+                            localPtr(ind(jj)) = localPtr(ind(jj)) + qa
+                        end do
                     end do
-                 end do
-              end do
-              ii = ii + ni*nj
-           end if
+                end do
+                ii = ii + ni * nj
+            end if
         end do
-     end do
+    end do
 
-     call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
+    ! Globalize the area
+    call vecSet(exch%sumGlobal, zero, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! Globalize the current force
-     call vecSet(exch%nodeValGlobal, zero, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
+                         exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
-          exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
+                       exch%sumGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
-          exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    ! Now compute the inverse of the weighting so that we can multiply
+    ! instead of dividing.
 
-     ! ====================
-     ! Do the reverse pass:
-     ! ====================
+    call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! Copy the reverse seed into the local values
-     call vecGetArrayF90(nodeValLocal_b, localPtr_b, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    localPtr = one / localPtr
 
-     ii = 0
-     domains: do nn=1,nDom
-        call setPointers_d(nn, 1_intType, sps)
+    call vecRestoreArrayF90(exch%sumGlobal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-        ! Loop over the number of boundary subfaces of this block.
-        bocos: do mm=1, nBocos
-           if(BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
-                BCType(mm) == NSWallIsothermal) then
+    ! ==================================
+    ! Now trace through the computeNodalTractions() routine
+    ! backwards. All the scatters flip direction and INSERT_VALUES
+    ! becomes ADD_VALUES and vice-versa
+    ! ==================================
+    dimLoop: do iDim = 1, 6
 
-              ! This is easy, just copy out F or T in continuous ordering.
-              do j=BCData(mm)%jnBeg, BCData(mm)%jnEnd
-                 do i=BCData(mm)%inBeg, BCData(mm)%inEnd
-                    ii = ii + 1
-                    if (iDim <= 3) then
-                       localPtr_b(ii) = BCDatad(mm)%Tp(i, j, iDim)
-                    else
-                       localPtr_b(ii) = BCDatad(mm)%Tv(i, j, iDim-3)
-                    end if
-                 end do
-              end do
-           end if
-        end do bocos
-     end do domains
+        ! ====================
+        ! Do the forward pass:
+        ! ====================
+        call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        localPtr = zero
+        ! ii is the running counter through the pointer array.
+        ii = 0
+        do nn = 1, nDom
+            call setPointers(nn, 1_intType, sps)
+            do mm = 1, nBocos
+                iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                ni = iEnd - iBeg + 1
+                nj = jEnd - jBeg + 1
+                if (BCType(mm) == EulerWall .or. &
+                    BCType(mm) == NSWallAdiabatic .or. &
+                    BCType(mm) == NSWallIsothermal) then
+                    do j = 0, nj - 2
+                        do i = 0, ni - 2
+                            if (iDim <= 3) then
+                                qf = fourth * BCData(mm)%Fp(i + iBeg + 1, j + jBeg + 1, iDim)
+                            else
+                                qf = fourth * BCData(mm)%Fv(i + iBeg + 1, j + jBeg + 1, iDim - 3)
+                            end if
+
+                            ind(1) = ii + (j) * ni + i + 1
+                            ind(2) = ii + (j) * ni + i + 2
+                            ind(3) = ii + (j + 1) * ni + i + 2
+                            ind(4) = ii + (j + 1) * ni + i + 1
+                            do jj = 1, 4
+                                localPtr(ind(jj)) = localPtr(ind(jj)) + qf
+                            end do
+                        end do
+                    end do
+                    ii = ii + ni * nj
+                end if
+            end do
+        end do
 
-     call vecRestoreArrayF90(nodeValLocal_b, localPtr_b, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+        call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Globalize the current force
+        call vecSet(exch%nodeValGlobal, zero, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterBegin(exch%scatter, exch%nodeValLocal, &
+                             exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterEnd(exch%scatter, exch%nodeValLocal, &
+                           exch%nodeValGlobal, ADD_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! ====================
+        ! Do the reverse pass:
+        ! ====================
+
+        ! Copy the reverse seed into the local values
+        call vecGetArrayF90(nodeValLocal_b, localPtr_b, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ii = 0
+        domains: do nn = 1, nDom
+            call setPointers_d(nn, 1_intType, sps)
+
+            ! Loop over the number of boundary subfaces of this block.
+            bocos: do mm = 1, nBocos
+                if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
+                    BCType(mm) == NSWallIsothermal) then
+
+                    ! This is easy, just copy out F or T in continuous ordering.
+                    do j = BCData(mm)%jnBeg, BCData(mm)%jnEnd
+                        do i = BCData(mm)%inBeg, BCData(mm)%inEnd
+                            ii = ii + 1
+                            if (iDim <= 3) then
+                                localPtr_b(ii) = BCDatad(mm)%Tp(i, j, iDim)
+                            else
+                                localPtr_b(ii) = BCDatad(mm)%Tv(i, j, iDim - 3)
+                            end if
+                        end do
+                    end do
+                end if
+            end do bocos
+        end do domains
+
+        call vecRestoreArrayF90(nodeValLocal_b, localPtr_b, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call vecSet(T_b, zero, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+        ! Push up to the global values
+        call VecScatterBegin(exch%scatter, nodeValLocal_b, &
+                             T_b, ADD_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterEnd(exch%scatter, nodeValLocal_b, &
+                           T_b, ADD_VALUES, SCATTER_FORWARD, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! this is particularly nasty. This is why you don't do
+        ! derivatives by hand, kids.
+        ! exch%nodeValGlobal = F
+        ! nodeValGlobal_b = F_b
+        ! T_b = reverse seed for tractions
+        ! sumGlobal_b =  inverseDualarea_b
+        ! exch%sumGlobal = invDualArea
+
+        ! Basically what we have to compute here is:
+        ! Fb = invDualArea * T_b
+        ! invDualAreab = invDualAreab + F*T_b
+
+        call vecPointwiseMult(nodeValGlobal_b, exch%sumGlobal, T_b, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call vecPointwiseMult(tmp, exch%nodeValGlobal, T_b, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Accumulate seed on adflowGlobal_b
+        call vecAXPY(sumGlobal_b, one, tmp, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! Now communicate F_b back to the local patches
+
+        call VecScatterBegin(exch%scatter, nodeValGlobal_b, &
+                             nodeValLocal_b, INSERT_VALUES, SCATTER_REVERSE, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        call VecScatterEnd(exch%scatter, nodeValGlobal_b, &
+                           nodeValLocal_b, INSERT_VALUES, SCATTER_REVERSE, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! ============================
+        ! Copy the values into patches
+        ! ============================
+
+        call vecGetArrayF90(nodeValLocal_b, localPtr_b, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
+
+        ! ii is the running counter through the pointer array.
+        ii = 0
+        do nn = 1, nDom
+            call setPointers_d(nn, 1_intType, sps)
+            do mm = 1, nBocos
+                iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                ni = iEnd - iBeg + 1
+                nj = jEnd - jBeg + 1
+                if (BCType(mm) == EulerWall .or. &
+                    BCType(mm) == NSWallAdiabatic .or. &
+                    BCType(mm) == NSWallIsothermal) then
+
+                    do j = 0, nj - 2
+                        do i = 0, ni - 2
+
+                            ind(1) = ii + (j) * ni + i + 1
+                            ind(2) = ii + (j) * ni + i + 2
+                            ind(3) = ii + (j + 1) * ni + i + 2
+                            ind(4) = ii + (j + 1) * ni + i + 1
+                            qf_b = zero
+                            do jj = 1, 4
+                                qf_b = qf_b + localPtr_b(ind(jj))
+                            end do
+                            qf_b = qf_b * fourth
+
+                            if (iDim <= 3) then
+                                BCDatad(mm)%Fp(i + iBeg + 1, j + jBeg + 1, iDim) = &
+                                    BCDatad(mm)%Fp(i + iBeg + 1, j + jBeg + 1, iDim) + qf_b
+                            else
+                                BCDatad(mm)%Fv(i + iBeg + 1, j + jBeg + 1, iDim - 3) = &
+                                    BCDatad(mm)%Fv(i + iBeg + 1, j + jBeg + 1, iDim - 3) + qf_b
+                            end if
+
+                        end do
+                    end do
+                    ii = ii + ni * nj
+                end if
+            end do
+        end do
 
-     call vecSet(T_b, zero, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-     ! Push up to the global values
-     call VecScatterBegin(exch%scatter, nodeValLocal_b, &
-          T_b, ADD_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+        call vecRestoreArrayF90(nodeValLocal_b, localPtr_b, ierr)
+        call EChk(ierr, __FILE__, __LINE__)
 
-     call VecScatterEnd(exch%scatter, nodeValLocal_b, &
-          T_b, ADD_VALUES, SCATTER_FORWARD, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    end do dimLoop
 
-     ! this is particularly nasty. This is why you don't do
-     ! derivatives by hand, kids.
-     ! exch%nodeValGlobal = F
-     ! nodeValGlobal_b = F_b
-     ! T_b = reverse seed for tractions
-     ! sumGlobal_b =  inverseDualarea_b
-     ! exch%sumGlobal = invDualArea
+    ! ============================
+    ! Finish the dual area sensitivity.
+    ! ============================
 
-     ! Basically what we have to compute here is:
-     ! Fb = invDualArea * T_b
-     ! invDualAreab = invDualAreab + F*T_b
+    ! On the forward pass we computed:
+    ! sumGlobal = one/sumGlobal
+    ! So on the reverse pass we need:
+    ! sumGlobalb = -(sumGlobalb/sumGlobal**2)
 
-     call vecPointwiseMult(nodeValGlobal_b, exch%sumGlobal, T_b, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    ! We will do this by getting pointers
 
-     call vecPointwiseMult(tmp, exch%nodeValGlobal, T_b, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call vecGetArrayF90(sumGlobal_b, localPtr_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! Accumulate seed on adflowGlobal_b
-     call vecAXPY(sumGlobal_b, one, tmp, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! Now communicate F_b back to the local patches
+    ! Keep in mind localPtr points to sumGlobal which already has
+    ! been inversed so we just multiply.
+    localPtr_b = -localPtr_b * localPtr**2
 
-     call VecScatterBegin(exch%scatter, nodeValGlobal_b, &
-          nodeValLocal_b, INSERT_VALUES, SCATTER_REVERSE, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call vecRestoreArrayF90(sumGlobal_b, localPtr_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call VecScatterEnd(exch%scatter, nodeValGlobal_b, &
-          nodeValLocal_b, INSERT_VALUES, SCATTER_REVERSE, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call vecRestoreArrayF90(exch%sumGlobal, localPtr, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! ============================
-     ! Copy the values into patches
-     ! ============================
+    ! Push back to the local patches
+    call VecScatterBegin(exch%scatter, sumGlobal_b, &
+                         nodeValLocal_b, INSERT_VALUES, SCATTER_REVERSE, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     call vecGetArrayF90(nodeValLocal_b, localPtr_b, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
+    call VecScatterEnd(exch%scatter, sumGlobal_b, &
+                       nodeValLocal_b, INSERT_VALUES, SCATTER_REVERSE, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-     ! ii is the running counter through the pointer array.
-     ii = 0
-     do nn=1, nDom
+    call vecGetArrayF90(nodeValLocal_b, localPtr_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
+
+    ! ii is the running counter through the pointer array.
+    ii = 0
+    do nn = 1, nDom
         call setPointers_d(nn, 1_intType, sps)
-        do mm=1, nBocos
-           iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-           jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-           ni = iEnd - iBeg + 1
-           nj = jEnd - jBeg + 1
-           if(BCType(mm) == EulerWall .or. &
+        do mm = 1, nBocos
+            iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+            jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+            ni = iEnd - iBeg + 1
+            nj = jEnd - jBeg + 1
+
+            if (BCType(mm) == EulerWall .or. &
                 BCType(mm) == NSWallAdiabatic .or. &
                 BCType(mm) == NSWallIsothermal) then
-
-              do j=0,nj-2
-                 do i=0,ni-2
-
-                    ind(1) = ii + (j  )*ni + i + 1
-                    ind(2) = ii + (j  )*ni + i + 2
-                    ind(3) = ii + (j+1)*ni + i + 2
-                    ind(4) = ii + (j+1)*ni + i + 1
-                    qf_b = zero
-                    do jj=1,4
-                       qf_b = qf_b + localPtr_b(ind(jj))
+                do j = 0, nj - 2
+                    do i = 0, ni - 2
+
+                        ind(1) = ii + (j) * ni + i + 1
+                        ind(2) = ii + (j) * ni + i + 2
+                        ind(3) = ii + (j + 1) * ni + i + 2
+                        ind(4) = ii + (j + 1) * ni + i + 1
+                        qa_b = zero
+                        do jj = 1, 4
+                            qa_b = qa_b + localPtr_b(ind(jj))
+                        end do
+                        qa_b = fourth * qa_b
+                        BCDatad(mm)%area(i + iBeg + 1, j + jBeg + 1) = &
+                            BCDatad(mm)%area(i + iBeg + 1, j + jBeg + 1) + qa_b
                     end do
-                    qf_b = qf_b*fourth
-
-                    if (iDim <= 3) then
-                       BCDatad(mm)%Fp(i+iBeg+1, j+jBeg+1, iDim) = &
-                            BCDatad(mm)%Fp(i+iBeg+1, j+jBeg+1, iDim) + qf_b
-                    else
-                       BCDatad(mm)%Fv(i+iBeg+1, j+jBeg+1, iDim-3) = &
-                            BCDatad(mm)%Fv(i+iBeg+1, j+jBeg+1, iDim-3) + qf_b
-                    end if
-
-                 end do
-              end do
-              ii = ii + ni*nj
-           end if
+                end do
+                ii = ii + ni * nj
+            end if
         end do
-     end do
-
-     call vecRestoreArrayF90(nodeValLocal_b, localPtr_b, ierr)
-     call EChk(ierr,__FILE__,__LINE__)
-
-  end do dimLoop
-
-  ! ============================
-  ! Finish the dual area sensitivity.
-  ! ============================
-
-  ! On the forward pass we computed:
-  ! sumGlobal = one/sumGlobal
-  ! So on the reverse pass we need:
-  ! sumGlobalb = -(sumGlobalb/sumGlobal**2)
-
-  ! We will do this by getting pointers
-
-  call vecGetArrayF90(sumGlobal_b, localPtr_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call vecGetArrayF90(exch%sumGlobal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ! Keep in mind localPtr points to sumGlobal which already has
-  ! been inversed so we just multiply.
-  localPtr_b = -localPtr_b*localPtr**2
-
-  call vecRestoreArrayF90(sumGlobal_b, localPtr_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call vecRestoreArrayF90(exch%sumGlobal, localPtr, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ! Push back to the local patches
-  call VecScatterBegin(exch%scatter, sumGlobal_b, &
-       nodeValLocal_b, INSERT_VALUES, SCATTER_REVERSE, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call VecScatterEnd(exch%scatter, sumGlobal_b, &
-       nodeValLocal_b, INSERT_VALUES, SCATTER_REVERSE, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  call vecGetArrayF90(nodeValLocal_b, localPtr_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
-
-  ! ii is the running counter through the pointer array.
-  ii = 0
-  do nn=1, nDom
-     call setPointers_d(nn, 1_intType, sps)
-     do mm=1, nBocos
-        iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-        jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-        ni = iEnd - iBeg + 1
-        nj = jEnd - jBeg + 1
-
-        if(BCType(mm) == EulerWall .or. &
-             BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal) then
-           do j=0,nj-2
-              do i=0,ni-2
-
-                 ind(1) = ii + (j  )*ni + i + 1
-                 ind(2) = ii + (j  )*ni + i + 2
-                 ind(3) = ii + (j+1)*ni + i + 2
-                 ind(4) = ii + (j+1)*ni + i + 1
-                 qa_b = zero
-                 do jj=1,4
-                    qa_b = qa_b + localPtr_b(ind(jj))
-                 end do
-                 qa_b = fourth*qa_b
-                 BCDatad(mm)%area(i+iBeg+1, j+jBeg+1) = &
-                      BCDatad(mm)%area(i+iBeg+1, j+jBeg+1) + qa_b
-              end do
-           end do
-           ii = ii + ni*nj
-        end if
-     end do
-  end do
+    end do
 
-  call vecRestoreArrayF90(nodeValLocal_b, localPtr_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call vecRestoreArrayF90(nodeValLocal_b, localPtr_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  ! Remove temporary petsc vecs
-  call VecDestroy(nodeValLocal_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    ! Remove temporary petsc vecs
+    call VecDestroy(nodeValLocal_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  call VecDestroy(nodeValGlobal_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDestroy(nodeValGlobal_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  call VecDestroy(sumGlobal_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDestroy(sumGlobal_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  call VecDestroy(tmp, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDestroy(tmp, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
-  call VecDestroy(T_b, ierr)
-  call EChk(ierr,__FILE__,__LINE__)
+    call VecDestroy(T_b, ierr)
+    call EChk(ierr, __FILE__, __LINE__)
 
+end subroutine computeNodalTractions_b
 
+subroutine computeNodalForces(sps)
 
+    ! This subroutine averages the cell based forces and tractions to
+    ! node based values. There is no need for communication since we are
+    ! simplying summing a quarter of each value to each corner.
 
+    use constants
+    use blockPointers, only: nDom, nBocos, BCType, BCData
+    use utils, only: setPointers
+    implicit none
 
-end subroutine computeNodalTractions_b
+    integer(kind=intType), intent(in) :: sps
 
-subroutine computeNodalForces(sps)
+    integer(kind=intType) :: mm, nn, i, j
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+    real(kind=realType) :: qf(3)
 
-  ! This subroutine averages the cell based forces and tractions to
-  ! node based values. There is no need for communication since we are
-  ! simplying summing a quarter of each value to each corner.
-
-  use constants
-  use blockPointers, only : nDom, nBocos, BCType, BCData
-  use utils, only : setPointers
-  implicit none
-
-  integer(kind=intType), intent(in) ::  sps
-
-  integer(kind=intType) :: mm, nn, i, j
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-  real(kind=realType) :: qf(3)
-
-  do nn=1, nDom
-     call setPointers(nn, 1_intType, sps)
-     do mm=1, nBocos
-        iBeg = BCdata(mm)%inBeg+1; iEnd=BCData(mm)%inEnd
-        jBeg = BCdata(mm)%jnBeg+1; jEnd=BCData(mm)%jnEnd
-
-        if(BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal) then
-           BCData(mm)%F = zero
-           do j=jBeg, jEnd
-              do i=iBeg, iEnd
-                 qf = fourth*(BCData(mm)%Fp(i,j,:) + BCData(mm)%Fv(i,j,:))
-                 BCData(mm)%F(i  , j,   :) = BCData(mm)%F(i  , j,   :) + qf
-                 BCData(mm)%F(i-1, j,   :) = BCData(mm)%F(i-1, j  , :) + qf
-                 BCData(mm)%F(i  , j-1, :) = BCData(mm)%F(i  , j-1, :) + qf
-                 BCData(mm)%F(i-1, j-1, :) = BCData(mm)%F(i-1, j-1, :) + qf
-              end do
-           end do
-        end if
-     end do
-  end do
+    do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        do mm = 1, nBocos
+            iBeg = BCdata(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+            jBeg = BCdata(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+
+            if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
+                BCType(mm) == NSWallIsothermal) then
+                BCData(mm)%F = zero
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        qf = fourth * (BCData(mm)%Fp(i, j, :) + BCData(mm)%Fv(i, j, :))
+                        BCData(mm)%F(i, j, :) = BCData(mm)%F(i, j, :) + qf
+                        BCData(mm)%F(i - 1, j, :) = BCData(mm)%F(i - 1, j, :) + qf
+                        BCData(mm)%F(i, j - 1, :) = BCData(mm)%F(i, j - 1, :) + qf
+                        BCData(mm)%F(i - 1, j - 1, :) = BCData(mm)%F(i - 1, j - 1, :) + qf
+                    end do
+                end do
+            end if
+        end do
+    end do
 end subroutine computeNodalForces
 
 subroutine computeNodalForces_d(sps)
 
-  ! Forward mode linearization of nodalForces
-
-  use constants
-  use blockPointers, only : nDom, nBocos, BCType, BCData, BCDatad
-  use utils, only : setPointers
-  implicit none
-
-  integer(kind=intType), intent(in) ::  sps
-
-  integer(kind=intType) :: mm, nn, i, j
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-  real(kind=realType) :: qfd(3)
-
-  do nn=1, nDom
-     call setPointers(nn, 1_intType, sps)
-     do mm=1, nBocos
-        iBeg = BCdata(mm)%inBeg+1; iEnd=BCData(mm)%inEnd
-        jBeg = BCdata(mm)%jnBeg+1; jEnd=BCData(mm)%jnEnd
-
-        if(BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal) then
-           BCDatad(mm)%F = zero
-           do j=jBeg, jEnd
-              do i=iBeg, iEnd
-                 qfd = fourth*(BCDatad(mm)%Fp(i,j,:) + BCDatad(mm)%Fv(i,j,:))
-                 BCDatad(mm)%F(i  , j,   :) = BCDatad(mm)%F(i  , j,   :) + qfd
-                 BCDatad(mm)%F(i-1, j,   :) = BCDatad(mm)%F(i-1, j  , :) + qfd
-                 BCDatad(mm)%F(i  , j-1, :) = BCDatad(mm)%F(i  , j-1, :) + qfd
-                 BCDatad(mm)%F(i-1, j-1, :) = BCDatad(mm)%F(i-1, j-1, :) + qfd
-              end do
-           end do
-        end if
-     end do
-  end do
+    ! Forward mode linearization of nodalForces
+
+    use constants
+    use blockPointers, only: nDom, nBocos, BCType, BCData, BCDatad
+    use utils, only: setPointers
+    implicit none
+
+    integer(kind=intType), intent(in) :: sps
+
+    integer(kind=intType) :: mm, nn, i, j
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+    real(kind=realType) :: qfd(3)
+
+    do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        do mm = 1, nBocos
+            iBeg = BCdata(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+            jBeg = BCdata(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+
+            if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
+                BCType(mm) == NSWallIsothermal) then
+                BCDatad(mm)%F = zero
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        qfd = fourth * (BCDatad(mm)%Fp(i, j, :) + BCDatad(mm)%Fv(i, j, :))
+                        BCDatad(mm)%F(i, j, :) = BCDatad(mm)%F(i, j, :) + qfd
+                        BCDatad(mm)%F(i - 1, j, :) = BCDatad(mm)%F(i - 1, j, :) + qfd
+                        BCDatad(mm)%F(i, j - 1, :) = BCDatad(mm)%F(i, j - 1, :) + qfd
+                        BCDatad(mm)%F(i - 1, j - 1, :) = BCDatad(mm)%F(i - 1, j - 1, :) + qfd
+                    end do
+                end do
+            end if
+        end do
+    end do
 end subroutine computeNodalForces_d
 
 subroutine computeNodalForces_b(sps)
 
-  ! Reverse mode linearization of nodalForces
-
-  use constants
-  use blockPointers, only : nDom, nBocos, BCType, BCData, BCDatad
-  use utils, only : setPointers_d
-  implicit none
-
-  integer(kind=intType), intent(in) ::  sps
-
-  integer(kind=intType) :: mm, nn, i, j
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-  real(kind=realType) :: qf_b(3)
-
-  domains: do nn=1,nDom
-     call setPointers_d(nn, 1_intType, sps)
-     do mm=1, nBocos
-        iBeg = BCdata(mm)%inBeg+1; iEnd=BCData(mm)%inEnd
-        jBeg = BCdata(mm)%jnBeg+1; jEnd=BCData(mm)%jnEnd
-        if(BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsothermal) then
-           do j=jBeg, jEnd
-              do i=iBeg, iEnd
-                 qf_b = fourth*(BCDatad(mm)%F(i, j, :) + BCdatad(mm)%F(i-1, j, :) + &
-                      BCDatad(mm)%F(i, j-1, :) + BCDatad(mm)%F(i-1, j-1, :))
-
-                 ! Fp and Fv are face-based values
-                 BCDatad(mm)%Fp(i, j, :) = BCDatad(mm)%Fp(i, j, :) + qf_b
-                 BCDatad(mm)%Fv(i, j, :) = BCDatad(mm)%Fv(i, j, :) + qf_b
-              end do
-           end do
-           ! this needs to be after the update to be the reverse of the forward mode.
-           BCDatad(mm)%F = zero
-        end if
-     end do
-  end do domains
+    ! Reverse mode linearization of nodalForces
+
+    use constants
+    use blockPointers, only: nDom, nBocos, BCType, BCData, BCDatad
+    use utils, only: setPointers_d
+    implicit none
+
+    integer(kind=intType), intent(in) :: sps
+
+    integer(kind=intType) :: mm, nn, i, j
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+    real(kind=realType) :: qf_b(3)
+
+    domains: do nn = 1, nDom
+        call setPointers_d(nn, 1_intType, sps)
+        do mm = 1, nBocos
+            iBeg = BCdata(mm)%inBeg + 1; iEnd = BCData(mm)%inEnd
+            jBeg = BCdata(mm)%jnBeg + 1; jEnd = BCData(mm)%jnEnd
+            if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
+                BCType(mm) == NSWallIsothermal) then
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        qf_b = fourth * (BCDatad(mm)%F(i, j, :) + BCdatad(mm)%F(i - 1, j, :) + &
+                                         BCDatad(mm)%F(i, j - 1, :) + BCDatad(mm)%F(i - 1, j - 1, :))
+
+                        ! Fp and Fv are face-based values
+                        BCDatad(mm)%Fp(i, j, :) = BCDatad(mm)%Fp(i, j, :) + qf_b
+                        BCDatad(mm)%Fv(i, j, :) = BCDatad(mm)%Fv(i, j, :) + qf_b
+                    end do
+                end do
+                ! this needs to be after the update to be the reverse of the forward mode.
+                BCDatad(mm)%F = zero
+            end if
+        end do
+    end do domains
 end subroutine computeNodalForces_b
 
 subroutine getHeatFlux(hflux, npts, sps)
-  use constants
-  use blockPointers, only : nDom, nBocos, BCType, BCData
-  use surfaceFamilies, only : BCFamExchange, familyExchange, &
-       zeroCellVal, zeroNodeVal
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType), intent(in) :: npts, sps
-  real(kind=realType), intent(out) :: hflux(npts)
-
-  integer(kind=intType) :: mm, nn, i, j, ii
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-  type(familyExchange), pointer :: exch
-
-  exch => BCFamExchange(iBCGroupWalls, sps)
-  do nn=1, nDom
-     call setPointers(nn, 1_intType, sps)
-     call heatFluxes()
-
-     do mm=1, nBocos
-        iBeg = BCdata(mm)%inBeg; iEnd=BCData(mm)%inEnd
-        jBeg = BCdata(mm)%jnBeg; jEnd=BCData(mm)%jnEnd
-
-        bocoType1: if (BCType(mm) == NSWallIsoThermal) then
-           BCData(mm)%cellVal => BCData(mm)%area(:, :)
-        else if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic) then
-           BCData(mm)%cellVal => zeroCellVal
-           BCData(mm)%nodeVal => zeroNodeVal
-        end if bocoType1
-     end do
-  end do
-
-  call computeWeighting(exch)
-
-  do nn=1, nDom
-     call setPointers(nn, 1_intType, sps)
-     do mm=1, nBocos
-        bocoType2: if (BCType(mm) == NSWallIsoThermal) then
-           BCData(mm)%cellVal => BCData(mm)%cellHeatFlux(:, :)
-           BCData(mm)%nodeVal => BCData(mm)%nodeHeatFlux(:, :)
-        end if bocoType2
-     end do
-  end do
-
-  call surfaceCellCenterToNode(exch)
-
-  ! Now extract into the flat array:
-  ii = 0
-  do nn=1,nDom
-     call setPointers(nn,1_intType,sps)
-
-     ! Loop over the number of viscous boundary subfaces of this block.
-     ! According to preprocessing/viscSubfaceInfo, visc bocos are numbered
-     ! before other bocos. Therefore, mm_nViscBocos == mm_nBocos
-     do mm=1,nBocos
-        bocoType3: if (BCType(mm) == NSWallIsoThermal) then
-           do j=BCData(mm)%jnBeg,BCData(mm)%jnEnd
-              do i=BCData(mm)%inBeg,BCData(mm)%inEnd
-                 ii = ii + 1
-                 hflux(ii) = BCData(mm)%nodeHeatFlux(i, j)
-              end do
-           end do
-           ! Simply put in zeros for the other wall BCs
-        else if (BCType(mm) == NSWallAdiabatic .or. BCType(mm) == EulerWall) then
-           do j=BCData(mm)%jnBeg,BCData(mm)%jnEnd
-              do i=BCData(mm)%inBeg,BCData(mm)%inEnd
-                 ii = ii + 1
-                 hflux(ii) = zero
-              end do
-           end do
-        end if bocoType3
-     end do
-  end do
+    use constants
+    use blockPointers, only: nDom, nBocos, BCType, BCData
+    use surfaceFamilies, only: BCFamExchange, familyExchange, &
+                               zeroCellVal, zeroNodeVal
+    use utils, only: setPointers
+    implicit none
+    !
+    !      Local variables.
+    !
+    integer(kind=intType), intent(in) :: npts, sps
+    real(kind=realType), intent(out) :: hflux(npts)
+
+    integer(kind=intType) :: mm, nn, i, j, ii
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+    type(familyExchange), pointer :: exch
+
+    exch => BCFamExchange(iBCGroupWalls, sps)
+    do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        call heatFluxes()
+
+        do mm = 1, nBocos
+            iBeg = BCdata(mm)%inBeg; iEnd = BCData(mm)%inEnd
+            jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+
+            bocoType1: if (BCType(mm) == NSWallIsoThermal) then
+                BCData(mm)%cellVal => BCData(mm)%area(:, :)
+            else if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic) then
+                BCData(mm)%cellVal => zeroCellVal
+                BCData(mm)%nodeVal => zeroNodeVal
+            end if bocoType1
+        end do
+    end do
+
+    call computeWeighting(exch)
+
+    do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        do mm = 1, nBocos
+            bocoType2: if (BCType(mm) == NSWallIsoThermal) then
+                BCData(mm)%cellVal => BCData(mm)%cellHeatFlux(:, :)
+                BCData(mm)%nodeVal => BCData(mm)%nodeHeatFlux(:, :)
+            end if bocoType2
+        end do
+    end do
+
+    call surfaceCellCenterToNode(exch)
+
+    ! Now extract into the flat array:
+    ii = 0
+    do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+
+        ! Loop over the number of viscous boundary subfaces of this block.
+        ! According to preprocessing/viscSubfaceInfo, visc bocos are numbered
+        ! before other bocos. Therefore, mm_nViscBocos == mm_nBocos
+        do mm = 1, nBocos
+            bocoType3: if (BCType(mm) == NSWallIsoThermal) then
+                do j = BCData(mm)%jnBeg, BCData(mm)%jnEnd
+                    do i = BCData(mm)%inBeg, BCData(mm)%inEnd
+                        ii = ii + 1
+                        hflux(ii) = BCData(mm)%nodeHeatFlux(i, j)
+                    end do
+                end do
+                ! Simply put in zeros for the other wall BCs
+            else if (BCType(mm) == NSWallAdiabatic .or. BCType(mm) == EulerWall) then
+                do j = BCData(mm)%jnBeg, BCData(mm)%jnEnd
+                    do i = BCData(mm)%inBeg, BCData(mm)%inEnd
+                        ii = ii + 1
+                        hflux(ii) = zero
+                    end do
+                end do
+            end if bocoType3
+        end do
+    end do
 end subroutine getHeatFlux
 
 subroutine heatFluxes
-  use constants
-  use blockPointers, only : BCData, nDom, nBocos, BCType, BCFaceID, viscSubFace
-  use BCPointers, only : ssi
-  use flowVarRefState, only : pRef, rhoRef
-  use utils, only : setPointers, setBCPointers
-  implicit none
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: i, j, ii, mm
-  real(kind=realType) :: fact, scaleDim
-  real(kind=realType) :: qw, qA
-  logical :: heatedSubface
-
-  ! Set the actual scaling factor such that ACTUAL heat flux is computed
-  ! The factor is determined from stanton number
-  scaleDim = pRef*sqrt(pRef/rhoRef)
-
-  ! Loop over the boundary subfaces of this block.
-  bocos: do mm=1, nBocos
-
-     ! Only do this on isoThermalWalls
-     if (BCType(mm) == NSWallIsoThermal) then
-
-        ! Set a bunch of pointers depending on the face id to make
-        ! a generic treatment possible. The routine setBcPointers
-        ! is not used, because quite a few other ones are needed.
-        call setBCPointers(mm, .True.)
-
-        select case (BCFaceID(mm))
-        case (iMin, jMin, kMin)
-           fact = -one
-        case (iMax, jMax, kMax)
-           fact = one
-        end select
-
-        ! Loop over the quadrilateral faces of the subface. Note that
-        ! the nodal range of BCData must be used and not the cell
-        ! range, because the latter may include the halo's in i and
-        ! j-direction. The offset +1 is there, because inBeg and jnBeg
-        ! refer to nodal ranges and not to cell ranges.
-        !
-        do j=(BCData(mm)%jnBeg+1), BCData(mm)%jnEnd
-           do i=(BCData(mm)%inBeg+1), BCData(mm)%inEnd
-
-              ! Compute the normal heat flux on the face. Inward positive.
-              BCData(mm)%cellHeatFlux(i,j) = -fact*scaleDim* &
-                   sqrt(ssi(i,j,1)**2 + ssi(i,j,2)**2 + ssi(i,j,3)**2) * &
-                   ( viscSubface(mm)%q(i,j,1)*BCData(mm)%norm(i,j,1) &
-                   + viscSubface(mm)%q(i,j,2)*BCData(mm)%norm(i,j,2) &
-                   + viscSubface(mm)%q(i,j,3)*BCData(mm)%norm(i,j,3))
-           enddo
-        end do
-     end if
-  enddo bocos
+    use constants
+    use blockPointers, only: BCData, nDom, nBocos, BCType, BCFaceID, viscSubFace
+    use BCPointers, only: ssi
+    use flowVarRefState, only: pRef, rhoRef
+    use utils, only: setPointers, setBCPointers
+    implicit none
+    !
+    !      Local variables.
+    !
+    integer(kind=intType) :: i, j, ii, mm
+    real(kind=realType) :: fact, scaleDim
+    real(kind=realType) :: qw, qA
+    logical :: heatedSubface
+
+    ! Set the actual scaling factor such that ACTUAL heat flux is computed
+    ! The factor is determined from stanton number
+    scaleDim = pRef * sqrt(pRef / rhoRef)
+
+    ! Loop over the boundary subfaces of this block.
+    bocos: do mm = 1, nBocos
+
+        ! Only do this on isoThermalWalls
+        if (BCType(mm) == NSWallIsoThermal) then
+
+            ! Set a bunch of pointers depending on the face id to make
+            ! a generic treatment possible. The routine setBcPointers
+            ! is not used, because quite a few other ones are needed.
+            call setBCPointers(mm, .True.)
+
+            select case (BCFaceID(mm))
+            case (iMin, jMin, kMin)
+                fact = -one
+            case (iMax, jMax, kMax)
+                fact = one
+            end select
+
+            ! Loop over the quadrilateral faces of the subface. Note that
+            ! the nodal range of BCData must be used and not the cell
+            ! range, because the latter may include the halo's in i and
+            ! j-direction. The offset +1 is there, because inBeg and jnBeg
+            ! refer to nodal ranges and not to cell ranges.
+            !
+            do j = (BCData(mm)%jnBeg + 1), BCData(mm)%jnEnd
+                do i = (BCData(mm)%inBeg + 1), BCData(mm)%inEnd
+
+                    ! Compute the normal heat flux on the face. Inward positive.
+                    BCData(mm)%cellHeatFlux(i, j) = -fact * scaleDim * &
+                                                    sqrt(ssi(i, j, 1)**2 + ssi(i, j, 2)**2 + ssi(i, j, 3)**2) * &
+                                                    (viscSubface(mm)%q(i, j, 1) * BCData(mm)%norm(i, j, 1) &
+                                                     + viscSubface(mm)%q(i, j, 2) * BCData(mm)%norm(i, j, 2) &
+                                                     + viscSubface(mm)%q(i, j, 3) * BCData(mm)%norm(i, j, 3))
+                end do
+            end do
+        end if
+    end do bocos
 end subroutine heatFluxes
 
 subroutine setTNSWall(tnsw, npts, sps)
 
-  use constants
-  use blockPointers, only : nDom, nBocos, BCData, BCType
-  use flowVarRefState, only : TRef
-  use utils, only : setPointers
-  implicit none
-
-  ! Input Variables
-  integer(kind=intType), intent(in) :: npts, sps
-  real(kind=realType), intent(in) :: tnsw(npts)
-
-  ! Local Variables
-  integer(kind=intType) :: mm, nn, i, j, ii
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-
-  ii = 0
-  domains: do nn=1,nDom
-     call setPointers(nn, 1_intType, sps)
-     ! Loop over the number of viscous boundary subfaces of this block.
-     bocos: do mm=1,nBocos
-        isoWall: if (BCType(mm) == NSWallIsoThermal) then
-           jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
-           iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
-           do j=jBeg,jEnd
-              do i=iBeg, iEnd
-                 ii = ii + 1
-                 BCData(mm)%TNS_Wall(i,j) = tnsw(ii)/TRef
-              end do
-           end do
-        end if isoWall
-     end do bocos
-  end do domains
-  ! TODO: The temperature must be interpolated to the coarse meshes.
-  !
-  ! The following lines are extracted from BCData/setBCDataCoarseGrid
-  ! By the design of the subroutine, TNSWall shall be interpolated during this process.
-  ! Yet, the subroutine requires an internal subroutine interpolateBcData.
-  ! It remains a question whether interpolateBcData shall become a normal subroutine.
-  !
-  ! use blockPointers, only : flowDoms
-  ! use inputTimeSpectral, only : nTimeIntervalsSpectral
-  ! use iteration, only : groundLevel
-  ! implicit none
-  ! !
-  ! !      Local variables.
-  ! !
-  ! integer(kind=intType) :: nLevels, level, levm1
-
-  ! ! Determine the number of grid levels.
-
-  ! nLevels = ubound(flowDoms,2)
-
-  ! ! Loop over the coarser grid levels. It is assumed that the
-  ! ! bc data of groundLevel is set correctly.
-
-  ! coarseLevelLoop: do level=(groundLevel+1),nLevels
-
-  !    ! Store the fine grid level a bit easier.
-
-  !    levm1 = level - 1
-
-  !    ! Loop over the number of spectral solutions and local blocks.
-
-  !    spectralLoop: do sps=1,nTimeIntervalsSpectral
-  !       domainsLoop: do i=1,nDom
-
-  !          ! Set the pointers to the coarse block.
-
-  !          call setPointers(i, level, sps)
-
-  !          ! Loop over the boundary subfaces and interpolate the
-  !          ! prescribed boundary data for this grid level.
-
-  !          bocoLoop: do j=1,nBocos
-
-  !             ! Interpolate the data for the possible prescribed boundary
-  !             ! data.
-
-  !             call interpolateBcData(BCData(j)%TNS_Wall, &
-  !                  flowDoms(i,levm1,sps)%BCData(j)%TNS_Wall)
-
-  !          enddo bocoLoop
-  !       enddo domainsLoop
-  !    enddo spectralLoop
-  ! enddo coarseLevelLoop
+    use constants
+    use blockPointers, only: nDom, nBocos, BCData, BCType
+    use flowVarRefState, only: TRef
+    use utils, only: setPointers
+    implicit none
+
+    ! Input Variables
+    integer(kind=intType), intent(in) :: npts, sps
+    real(kind=realType), intent(in) :: tnsw(npts)
+
+    ! Local Variables
+    integer(kind=intType) :: mm, nn, i, j, ii
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+
+    ii = 0
+    domains: do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        ! Loop over the number of viscous boundary subfaces of this block.
+        bocos: do mm = 1, nBocos
+            isoWall: if (BCType(mm) == NSWallIsoThermal) then
+                jBeg = BCdata(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        ii = ii + 1
+                        BCData(mm)%TNS_Wall(i, j) = tnsw(ii) / TRef
+                    end do
+                end do
+            end if isoWall
+        end do bocos
+    end do domains
+    ! TODO: The temperature must be interpolated to the coarse meshes.
+    !
+    ! The following lines are extracted from BCData/setBCDataCoarseGrid
+    ! By the design of the subroutine, TNSWall shall be interpolated during this process.
+    ! Yet, the subroutine requires an internal subroutine interpolateBcData.
+    ! It remains a question whether interpolateBcData shall become a normal subroutine.
+    !
+    ! use blockPointers, only : flowDoms
+    ! use inputTimeSpectral, only : nTimeIntervalsSpectral
+    ! use iteration, only : groundLevel
+    ! implicit none
+    ! !
+    ! !      Local variables.
+    ! !
+    ! integer(kind=intType) :: nLevels, level, levm1
+
+    ! ! Determine the number of grid levels.
+
+    ! nLevels = ubound(flowDoms,2)
+
+    ! ! Loop over the coarser grid levels. It is assumed that the
+    ! ! bc data of groundLevel is set correctly.
+
+    ! coarseLevelLoop: do level=(groundLevel+1),nLevels
+
+    !    ! Store the fine grid level a bit easier.
+
+    !    levm1 = level - 1
+
+    !    ! Loop over the number of spectral solutions and local blocks.
+
+    !    spectralLoop: do sps=1,nTimeIntervalsSpectral
+    !       domainsLoop: do i=1,nDom
+
+    !          ! Set the pointers to the coarse block.
+
+    !          call setPointers(i, level, sps)
+
+    !          ! Loop over the boundary subfaces and interpolate the
+    !          ! prescribed boundary data for this grid level.
+
+    !          bocoLoop: do j=1,nBocos
+
+    !             ! Interpolate the data for the possible prescribed boundary
+    !             ! data.
+
+    !             call interpolateBcData(BCData(j)%TNS_Wall, &
+    !                  flowDoms(i,levm1,sps)%BCData(j)%TNS_Wall)
+
+    !          enddo bocoLoop
+    !       enddo domainsLoop
+    !    enddo spectralLoop
+    ! enddo coarseLevelLoop
 
 end subroutine setTNSWall
 
 subroutine getTNSWall(tnsw, npts, sps)
 
-  use constants
-  use blockPointers, only : nDom, nBocos, BCData, BCType
-  use flowVarRefState, only : TRef
-  use utils, only : setPointers
-  implicit none
-
-  ! Input Variables
-  integer(kind=intType), intent(in) :: npts, sps
-  real(kind=realType), intent(out) :: tnsw(npts)
-
-  ! Local Variables
-  integer(kind=intType) :: mm, nn, i, j, ii
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-
-  ii = 0
-  domains: do nn=1,nDom
-     call setPointers(nn, 1_intType, sps)
-     ! Loop over the number of viscous boundary subfaces of this block.
-     bocos: do mm=1,nBocos
-        isoWall: if (BCType(mm) == NSWallIsoThermal) then
-           do j=jBeg,jEnd
-              do i=iBeg, iEnd
-                 ii = ii + 1
-                 tnsw(ii) = BCData(mm)%TNS_Wall(i,j)*Tref
-              end do
-           end do
-        end if isoWall
-     end do bocos
-  end do domains
+    use constants
+    use blockPointers, only: nDom, nBocos, BCData, BCType
+    use flowVarRefState, only: TRef
+    use utils, only: setPointers
+    implicit none
+
+    ! Input Variables
+    integer(kind=intType), intent(in) :: npts, sps
+    real(kind=realType), intent(out) :: tnsw(npts)
+
+    ! Local Variables
+    integer(kind=intType) :: mm, nn, i, j, ii
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+
+    ii = 0
+    domains: do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        ! Loop over the number of viscous boundary subfaces of this block.
+        bocos: do mm = 1, nBocos
+            isoWall: if (BCType(mm) == NSWallIsoThermal) then
+                do j = jBeg, jEnd
+                    do i = iBeg, iEnd
+                        ii = ii + 1
+                        tnsw(ii) = BCData(mm)%TNS_Wall(i, j) * Tref
+                    end do
+                end do
+            end if isoWall
+        end do bocos
+    end do domains
 end subroutine getTNSWall
diff --git a/src/warping/setCpTarget.f90 b/src/warping/setCpTarget.f90
index 429b8016b..7a54c2563 100644
--- a/src/warping/setCpTarget.f90
+++ b/src/warping/setCpTarget.f90
@@ -1,81 +1,78 @@
 subroutine setCpTargets(cptarget, npts, sps_in)
-  use constants
-  use blockPointers
-  use flowVarRefState
-  use inputTimeSpectral
-  use communication
-  use inputPhysics
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Arguments.
-  !
-  integer(kind=intType), intent(in) :: npts, sps_in
-  real(kind=realType), intent(in) :: cptarget(npts)
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: mm, nn, i, j, ii, sps
-  !integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+    use constants
+    use blockPointers
+    use flowVarRefState
+    use inputTimeSpectral
+    use communication
+    use inputPhysics
+    use utils, only: setPointers
+    implicit none
+    !
+    !      Arguments.
+    !
+    integer(kind=intType), intent(in) :: npts, sps_in
+    real(kind=realType), intent(in) :: cptarget(npts)
+    !
+    !      Local variables.
+    !
+    integer(kind=intType) :: mm, nn, i, j, ii, sps
+    !integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
 
-  sps = sps_in
+    sps = sps_in
 
-  ii = 0
-  domains: do nn=1,nDom
-     call setPointers(nn,1_intType,sps)
+    ii = 0
+    domains: do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
 
-     ! Loop over the number of viscous boundary subfaces of this block.
-     ! According to preprocessing/viscSubfaceInfo, visc bocos are numbered
-     ! before other bocos. Therefore, mm_nViscBocos == mm_nBocos
-     bocos: do mm=1,nBocos
-           if(BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
+        ! Loop over the number of viscous boundary subfaces of this block.
+        ! According to preprocessing/viscSubfaceInfo, visc bocos are numbered
+        ! before other bocos. Therefore, mm_nViscBocos == mm_nBocos
+        bocos: do mm = 1, nBocos
+            if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
                 BCType(mm) == NSWallIsothermal) then
-              do j=BCData(mm)%jnBeg,BCData(mm)%jnEnd
-                 do i=BCData(mm)%inBeg,BCData(mm)%inEnd
-                    ii = ii + 1
-                    BCData(mm)%CpTarget(i,j) = cptarget(ii)
-                 end do
-              end do
-           end if
+                do j = BCData(mm)%jnBeg, BCData(mm)%jnEnd
+                    do i = BCData(mm)%inBeg, BCData(mm)%inEnd
+                        ii = ii + 1
+                        BCData(mm)%CpTarget(i, j) = cptarget(ii)
+                    end do
+                end do
+            end if
 
+        end do bocos
+    end do domains
 
-     end do bocos
-  end do domains
-
-  
 end subroutine setCpTargets
 
-
 subroutine getCpTargets(cptarget, npts, sps)
 
-  use constants
-  use blockPointers, only : nDom, nBocos, BCData, BCType
+    use constants
+    use blockPointers, only: nDom, nBocos, BCData, BCType
 !  use flowVarRefState, only : TRef
-  use utils, only : setPointers
-  implicit none
+    use utils, only: setPointers
+    implicit none
 
-  ! Input Variables
-  integer(kind=intType), intent(in) :: npts, sps
-  real(kind=realType), intent(out) :: cptarget(npts)
+    ! Input Variables
+    integer(kind=intType), intent(in) :: npts, sps
+    real(kind=realType), intent(out) :: cptarget(npts)
 
-  ! Local Variables
-  integer(kind=intType) :: mm, nn, i, j, ii
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+    ! Local Variables
+    integer(kind=intType) :: mm, nn, i, j, ii
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
 
-  ii = 0
-  domains: do nn=1,nDom
-     call setPointers(nn, 1_intType, sps)
-     ! Loop over the number of viscous boundary subfaces of this block.
-     bocos: do mm=1,nBocos
-        wall: if (BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
-             BCType(mm) == NSWallIsoThermal) then
-           do j=BCData(mm)%jnBeg,BCData(mm)%jnEnd
-              do i=BCData(mm)%inBeg,BCData(mm)%inEnd
-                 ii = ii + 1
-                 CpTarget(ii) = BCData(mm)%CpTarget(i,j)
-              end do
-           end do
-        end if wall
-     end do bocos
-  end do domains
+    ii = 0
+    domains: do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+        ! Loop over the number of viscous boundary subfaces of this block.
+        bocos: do mm = 1, nBocos
+            wall: if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
+                      BCType(mm) == NSWallIsoThermal) then
+                do j = BCData(mm)%jnBeg, BCData(mm)%jnEnd
+                    do i = BCData(mm)%inBeg, BCData(mm)%inEnd
+                        ii = ii + 1
+                        CpTarget(ii) = BCData(mm)%CpTarget(i, j)
+                    end do
+                end do
+            end if wall
+        end do bocos
+    end do domains
 end subroutine getCpTargets
diff --git a/src/warping/setTNSWall.f90 b/src/warping/setTNSWall.f90
index ab9cb35db..51546239d 100644
--- a/src/warping/setTNSWall.f90
+++ b/src/warping/setTNSWall.f90
@@ -1,51 +1,48 @@
 subroutine setTNSWall(tnsw, npts, sps_in)
-  use constants
-  use blockPointers
-  use flowVarRefState
-  use inputTimeSpectral
-  use communication
-  use inputPhysics
-  use utils, only : setPointers
-  implicit none
-  !
-  !      Arguments.
-  !
-  integer(kind=intType), intent(in) :: npts, sps_in
-  real(kind=realType), intent(in) :: tnsw(npts)
-  !
-  !      Local variables.
-  !
-  integer(kind=intType) :: mm, nn, i, j, ii, sps
-  integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
-
-
-
-  sps = sps_in
-
-  ii = 0
-  domains: do nn=1,nDom
-     call setPointers(nn,1_intType,sps)
-
-     ! Loop over the number of viscous boundary subfaces of this block.
-     ! According to preprocessing/viscSubfaceInfo, visc bocos are numbered
-     ! before other bocos. Therefore, mm_nViscBocos == mm_nBocos
-     bocos: do mm=1,nBocos
-
-           if(BCType(mm) == EulerWall.or.BCType(mm) == NSWallAdiabatic .or. &
+    use constants
+    use blockPointers
+    use flowVarRefState
+    use inputTimeSpectral
+    use communication
+    use inputPhysics
+    use utils, only: setPointers
+    implicit none
+    !
+    !      Arguments.
+    !
+    integer(kind=intType), intent(in) :: npts, sps_in
+    real(kind=realType), intent(in) :: tnsw(npts)
+    !
+    !      Local variables.
+    !
+    integer(kind=intType) :: mm, nn, i, j, ii, sps
+    integer(kind=intType) :: iBeg, iEnd, jBeg, jEnd
+
+    sps = sps_in
+
+    ii = 0
+    domains: do nn = 1, nDom
+        call setPointers(nn, 1_intType, sps)
+
+        ! Loop over the number of viscous boundary subfaces of this block.
+        ! According to preprocessing/viscSubfaceInfo, visc bocos are numbered
+        ! before other bocos. Therefore, mm_nViscBocos == mm_nBocos
+        bocos: do mm = 1, nBocos
+
+            if (BCType(mm) == EulerWall .or. BCType(mm) == NSWallAdiabatic .or. &
                 BCType(mm) == NSWallIsothermal) then
-              do j=BCData(mm)%jnBeg,BCData(mm)%jnEnd
-                 do i=BCData(mm)%inBeg,BCData(mm)%inEnd
-                    ii = ii + 1
-                    BCData(mm)%TNS_Wall(i,j) = tnsw(ii)/TRef
-                 end do
-              end do
-           end if
-
-
-     end do bocos
-  end do domains
-
-  ! Interpolate TNS_Wall to coarse grids
-  ! call setBCDataCoarseGrid
+                do j = BCData(mm)%jnBeg, BCData(mm)%jnEnd
+                    do i = BCData(mm)%inBeg, BCData(mm)%inEnd
+                        ii = ii + 1
+                        BCData(mm)%TNS_Wall(i, j) = tnsw(ii) / TRef
+                    end do
+                end do
+            end if
+
+        end do bocos
+    end do domains
+
+    ! Interpolate TNS_Wall to coarse grids
+    ! call setBCDataCoarseGrid
 
 end subroutine setTNSWall
diff --git a/src/warping/warping.F90 b/src/warping/warping.F90
index 6c5f890da..51683c133 100644
--- a/src/warping/warping.F90
+++ b/src/warping/warping.F90
@@ -1,463 +1,460 @@
 module warping
 
-  ! This module cotains the required inferface functions for using an
-  ! external mesh warping utility with ADflow
+    ! This module cotains the required inferface functions for using an
+    ! external mesh warping utility with ADflow
 
 contains
 
-  subroutine getCGNSMeshIndices(ndof,indices)
-
-    use constants
-    use blockPointers, only : nDom, nBKGlobal, il, jl, kl, iBegOr, jBegOr, kBegOr
-    use cgnsGrid, only : cgnsDoms, cgnsnDom
-    use utils, only : setPointers
-    implicit none
-
-    ! subroutine arguments
-    integer(kind=intType), intent(in) :: ndof
-    integer(kind=intType), intent(out):: indices(ndof)
-
-    ! Local Variables
-    integer(kind=intType) :: nn,i,j,k,ii,indx,indy,indz,il_cg,jl_cg,kl_cg
-    integer(kind=intType) ,allocatable,dimension(:) :: dof_offset
-
-    allocate(dof_offset(cgnsNDom))
-    dof_offset(1) = 0
-    do nn=2,cgnsNDom
-       dof_offset(nn) = dof_offset(nn-1) + &
-            cgnsDoms(nn-1)%il*cgnsDoms(nn-1)%jl*cgnsDoms(nn-1)%kl*3
-    end do
-
-    ii = 0
-    do nn=1,nDom
-       call setPointers(nn,1_intType,1_intType)
-
-       do k=1,kl
-          do j=1,jl
-             do i=1,il
-                il_cg = cgnsDoms(nbkGlobal)%il
-                jl_cg = cgnsDoms(nbkGlobal)%jl
-                kl_cg = cgnsDoms(nbkGlobal)%kl
-
-                ii = ii + 1
-
-                indx = iBegOr + i - 1
-                indy = jBegOr + j - 1
-                indz = kBegOr + k - 1
-
-                indices(ii*3-2) = dof_offset(nbkGlobal)  + &
-                     (indz-1)*jl_cg*il_cg*3 + &
-                     (indy-1)*il_cg*3       + &
-                     (indx-1)*3
-
-                indices(ii*3-1) = dof_offset(nbkGlobal)  + &
-                     (indz-1)*jl_cg*il_cg*3 + &
-                     (indy-1)*il_cg*3       + &
-                     (indx-1)*3 + 1
-
-                indices(ii*3 ) = dof_offset(nbkGlobal)   + &
-                     (indz-1)*jl_cg*il_cg*3 + &
-                     (indy-1)*il_cg*3       + &
-                     (indx-1)*3 + 2
-
-             end do ! i loop
-          end do ! j loop
-       end do ! k loop
-    end do ! domain loop
-    deallocate(dof_offset)
-  end subroutine getCGNSMeshIndices
-
-  subroutine setGrid(grid,ndof)
-
-    ! The purpose of this routine is to set the grid dof as returned by
-    ! the external warping. This function takes the "Base" grid at the
-    ! first time instance and does rotation/translation operations to
-    ! get the grid at subsequent time instances
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, x
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use section, only : sections, nSections
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use monitor, only : timeUnsteadyRestart, timeUnsteady
-    use inputPhysics, only : equationMode
-    use utils, only : setPointers, rotMatrixRigidBody
-    use preprocessingAPI, only : xhalo
-    implicit none
-
-    integer(kind=intType),intent(in) :: ndof
-    real(kind=realType) ,intent(in) :: grid(ndof)
-
-    ! Local Variables
-
-    integer(kind=intType) :: nn,i,j,k,counter,sps
-    real(kind=realType) :: t(nSections),dt(nSections)
-    real(kind=realType) :: displ(3)
-    real(kind=realType) :: tOld,tNew
-
-    real(kind=realType), dimension(3)   :: rotationPoint,r
-    real(kind=realType), dimension(3,3) :: rotationMatrix
-
-
-    if (equationMode == steady .or. equationMode == TimeSpectral) then
-       timeUnsteady = zero
-
-       ! This is very straight forward...loop over all domains and set all elements
-       do nn=1,nSections
-          dt(nn) = sections(nn)%timePeriod &
-               / real(nTimeIntervalsSpectral,realType)
-       enddo
-
-       do sps = 1,nTimeIntervalsSpectral
-          do nn=1,nSections
-             t(nn) = (sps-1)*dt(nn)
-          enddo
-
-          ! Compute the displacements due to the rigid motion of the mesh.
-
-          displ(:) = zero
-
-          tNew = timeUnsteady + timeUnsteadyRestart
-          tOld = tNew - t(1)
-
-          call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
-          counter = 0
-          do nn=1,nDom
-             call setPointers(nn,1_intType,sps)
-             do k=1,kl
-                do j=1,jl
-                   do i=1,il
-                      ! r is distance from grid point to rotationPoint
-                      r = grid(3*counter+1:3*counter+3) - rotationPoint
-
-                      X(i,j,k,:) = rotationPoint + matmul(rotationMatrix,r) + displ
-                      counter = counter + 1
-
-                   end do
+    subroutine getCGNSMeshIndices(ndof, indices)
+
+        use constants
+        use blockPointers, only: nDom, nBKGlobal, il, jl, kl, iBegOr, jBegOr, kBegOr
+        use cgnsGrid, only: cgnsDoms, cgnsnDom
+        use utils, only: setPointers
+        implicit none
+
+        ! subroutine arguments
+        integer(kind=intType), intent(in) :: ndof
+        integer(kind=intType), intent(out) :: indices(ndof)
+
+        ! Local Variables
+        integer(kind=intType) :: nn, i, j, k, ii, indx, indy, indz, il_cg, jl_cg, kl_cg
+        integer(kind=intType), allocatable, dimension(:) :: dof_offset
+
+        allocate (dof_offset(cgnsNDom))
+        dof_offset(1) = 0
+        do nn = 2, cgnsNDom
+            dof_offset(nn) = dof_offset(nn - 1) + &
+                             cgnsDoms(nn - 1)%il * cgnsDoms(nn - 1)%jl * cgnsDoms(nn - 1)%kl * 3
+        end do
+
+        ii = 0
+        do nn = 1, nDom
+            call setPointers(nn, 1_intType, 1_intType)
+
+            do k = 1, kl
+                do j = 1, jl
+                    do i = 1, il
+                        il_cg = cgnsDoms(nbkGlobal)%il
+                        jl_cg = cgnsDoms(nbkGlobal)%jl
+                        kl_cg = cgnsDoms(nbkGlobal)%kl
+
+                        ii = ii + 1
+
+                        indx = iBegOr + i - 1
+                        indy = jBegOr + j - 1
+                        indz = kBegOr + k - 1
+
+                        indices(ii * 3 - 2) = dof_offset(nbkGlobal) + &
+                                              (indz - 1) * jl_cg * il_cg * 3 + &
+                                              (indy - 1) * il_cg * 3 + &
+                                              (indx - 1) * 3
+
+                        indices(ii * 3 - 1) = dof_offset(nbkGlobal) + &
+                                              (indz - 1) * jl_cg * il_cg * 3 + &
+                                              (indy - 1) * il_cg * 3 + &
+                                              (indx - 1) * 3 + 1
+
+                        indices(ii * 3) = dof_offset(nbkGlobal) + &
+                                          (indz - 1) * jl_cg * il_cg * 3 + &
+                                          (indy - 1) * il_cg * 3 + &
+                                          (indx - 1) * 3 + 2
+
+                    end do ! i loop
+                end do ! j loop
+            end do ! k loop
+        end do ! domain loop
+        deallocate (dof_offset)
+    end subroutine getCGNSMeshIndices
+
+    subroutine setGrid(grid, ndof)
+
+        ! The purpose of this routine is to set the grid dof as returned by
+        ! the external warping. This function takes the "Base" grid at the
+        ! first time instance and does rotation/translation operations to
+        ! get the grid at subsequent time instances
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, x
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use section, only: sections, nSections
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use monitor, only: timeUnsteadyRestart, timeUnsteady
+        use inputPhysics, only: equationMode
+        use utils, only: setPointers, rotMatrixRigidBody
+        use preprocessingAPI, only: xhalo
+        implicit none
+
+        integer(kind=intType), intent(in) :: ndof
+        real(kind=realType), intent(in) :: grid(ndof)
+
+        ! Local Variables
+
+        integer(kind=intType) :: nn, i, j, k, counter, sps
+        real(kind=realType) :: t(nSections), dt(nSections)
+        real(kind=realType) :: displ(3)
+        real(kind=realType) :: tOld, tNew
+
+        real(kind=realType), dimension(3) :: rotationPoint, r
+        real(kind=realType), dimension(3, 3) :: rotationMatrix
+
+        if (equationMode == steady .or. equationMode == TimeSpectral) then
+            timeUnsteady = zero
+
+            ! This is very straight forward...loop over all domains and set all elements
+            do nn = 1, nSections
+                dt(nn) = sections(nn)%timePeriod &
+                         / real(nTimeIntervalsSpectral, realType)
+            end do
+
+            do sps = 1, nTimeIntervalsSpectral
+                do nn = 1, nSections
+                    t(nn) = (sps - 1) * dt(nn)
                 end do
-             end do
-          end do
-          call xhalo(1_intType)
-       end do
-    else
-       counter = 0
-       sps = 1
-       do nn=1,nDom
-          call setPointers(nn,1_intType,sps)
-          do k=1,kl
-             do j=1,jl
-                do i=1,il
-                   X(i,j,k,:) = grid(3*counter+1:3*counter+3)
-                   counter = counter + 1
+
+                ! Compute the displacements due to the rigid motion of the mesh.
+
+                displ(:) = zero
+
+                tNew = timeUnsteady + timeUnsteadyRestart
+                tOld = tNew - t(1)
+
+                call rotMatrixRigidBody(tNew, tOld, rotationMatrix, rotationPoint)
+                counter = 0
+                do nn = 1, nDom
+                    call setPointers(nn, 1_intType, sps)
+                    do k = 1, kl
+                        do j = 1, jl
+                            do i = 1, il
+                                ! r is distance from grid point to rotationPoint
+                                r = grid(3 * counter + 1:3 * counter + 3) - rotationPoint
+
+                                X(i, j, k, :) = rotationPoint + matmul(rotationMatrix, r) + displ
+                                counter = counter + 1
+
+                            end do
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-       call xhalo(1_intType)
-    end if
-
-  end subroutine setGrid
-
-  subroutine setGridForOneInstance(grid,sps)
-
-    ! The purpose of this routine is to set the grid dof as returned by
-    ! the external warping. This routine will take in the deformed mesh
-    ! and set it to "sps"th time instance
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, x
-    use section, only : sections, nSections
-    use inputPhysics, only : equationMode
-    use utils, only : setPointers
-    use preprocessingAPI, only : xhalo
-    implicit none
-
-    real(kind=realType) ,dimension(:), intent(in) :: grid
-    integer, intent(in) :: sps
-
-    ! Local Variables
-
-    integer(kind=intType) :: nn,i,j,k,counter
-
-    counter = 0
-    do nn=1,nDom
-       call setPointers(nn,1_intType,sps)
-       do k=1,kl
-          do j=1,jl
-             do i=1,il
-                X(i,j,k,:) = grid(3*counter+1:3*counter+3)
-                counter = counter + 1
-             end do
-          end do
-       end do
-    end do
-    call xhalo(1_intType)
-
-  end subroutine setGridForOneInstance
-
-
-  subroutine getGrid(grid,ndof)
-
-    ! Opposite of setGrid. This is ONLY a debugging function. NOT used
-    ! in regular usage. Really only useful for direct mesh manipulation
-    ! on single block and a single processor. s
-
-    use constants
-    use blockPointers, only : nDom, il, jl, kl, x
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers
-    implicit none
-    integer(kind=intType),intent(in) :: ndof
-    real(kind=realType) ,intent(out) :: grid(ndof)
-
-    ! Local Variables
-    integer(kind=intType) :: nn,i,j,k,l,counter,sps
-
-    ! This is very straight forward...loop over all domains and copy out
-    counter = 1
-    do sps = 1,nTimeIntervalsSpectral
-       do nn=1,nDom
-          call setPointers(nn,1_intType,sps)
-          do k=1,kl
-             do j=1,jl
-                do i=1,il
-                   do l=1,3
-                      grid(counter) = X(i,j,k,l)
-                      counter = counter + 1
-                   end do
+                call xhalo(1_intType)
+            end do
+        else
+            counter = 0
+            sps = 1
+            do nn = 1, nDom
+                call setPointers(nn, 1_intType, sps)
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            X(i, j, k, :) = grid(3 * counter + 1:3 * counter + 3)
+                            counter = counter + 1
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-
-  end subroutine getGrid
-
-  subroutine getStatePerturbation(randVec, nRand, randState, nRandState)
-
-    use constants
-    use cgnsGrid, only : cgnsDoms, cgnsNDom
-    use blockPointers, only : nDom, il, jl, kl, nx, ny, nz, x, nbkglobal, iBegOr, jBegOr, kBegOr
-    use communication, only : adflow_comm_world, myid
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use adjointVars, only : nCellsLocal
-    use flowVarRefState, only : nw
-    use utils, only : setPointers, EChk
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType), intent(in) :: nRand, nRandState
-    real(kind=realType), intent(in), dimension(nRand) :: randVec
-
-    ! Ouput Parameters
-    real(kind=realType), intent(out), dimension(nRandState) :: randState
-
-    ! Working parameters
-    integer(kind=intType) :: i, j, k, ierr, l, nx_cg, ny_cg, nz_cg
-    integer(kind=intType) :: sps, ii, indx, indy, indz, nn, cgnsInd
-    integer(kind=intType) :: dofCGNSPerInstance
-    integer(kind=intType) ,allocatable, dimension(:) :: dof_offset
-
-    allocate(dof_offset(cgnsNDom))
-    dof_offset(1) = 0
-    do nn=2,cgnsNDom
-       dof_offset(nn) = dof_offset(nn-1) + &
-            cgnsDoms(nn-1)%nx*cgnsDoms(nn-1)%ny*cgnsDoms(nn-1)%nz*nw
-    end do
-
-    dofCGNSPerInstance = nRand/nTimeIntervalsSpectral
-
-    ii = 0
-    do nn=1, nDom
-       do sps=1, nTimeIntervalsSpectral
-          call setPointers(nn, 1, sps)
-          do k=2, kl
-             do j=2, jl
-                do i=2, il
-
-                   nx_cg = cgnsDoms(nbkGlobal)%nx
-                   ny_cg = cgnsDoms(nbkGlobal)%ny
-                   nz_cg = cgnsDoms(nbkGlobal)%nz
-
-                   indx = iBegOr + i - 2
-                   indy = jBegOr + j - 2
-                   indz = kBegOr + k - 2
-
-                   do l=1, nw
-                      cgnsInd =  (sps-1)*dofCGNSPerInstance + &
-                           dof_offset(nbkGlobal)  + &
-                           (indz-1)*ny_cg*nx_cg*nw + &
-                           (indy-1)*nx_cg*nw       + &
-                           (indx-1)*nw + l
-                      randState(nw*ii + l) = randVec(cgnsInd)
-                   end do
-
-                   ii = ii + 1
-
+            end do
+            call xhalo(1_intType)
+        end if
+
+    end subroutine setGrid
+
+    subroutine setGridForOneInstance(grid, sps)
+
+        ! The purpose of this routine is to set the grid dof as returned by
+        ! the external warping. This routine will take in the deformed mesh
+        ! and set it to "sps"th time instance
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, x
+        use section, only: sections, nSections
+        use inputPhysics, only: equationMode
+        use utils, only: setPointers
+        use preprocessingAPI, only: xhalo
+        implicit none
+
+        real(kind=realType), dimension(:), intent(in) :: grid
+        integer, intent(in) :: sps
+
+        ! Local Variables
+
+        integer(kind=intType) :: nn, i, j, k, counter
+
+        counter = 0
+        do nn = 1, nDom
+            call setPointers(nn, 1_intType, sps)
+            do k = 1, kl
+                do j = 1, jl
+                    do i = 1, il
+                        X(i, j, k, :) = grid(3 * counter + 1:3 * counter + 3)
+                        counter = counter + 1
+                    end do
+                end do
+            end do
+        end do
+        call xhalo(1_intType)
+
+    end subroutine setGridForOneInstance
+
+    subroutine getGrid(grid, ndof)
+
+        ! Opposite of setGrid. This is ONLY a debugging function. NOT used
+        ! in regular usage. Really only useful for direct mesh manipulation
+        ! on single block and a single processor. s
+
+        use constants
+        use blockPointers, only: nDom, il, jl, kl, x
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers
+        implicit none
+        integer(kind=intType), intent(in) :: ndof
+        real(kind=realType), intent(out) :: grid(ndof)
+
+        ! Local Variables
+        integer(kind=intType) :: nn, i, j, k, l, counter, sps
+
+        ! This is very straight forward...loop over all domains and copy out
+        counter = 1
+        do sps = 1, nTimeIntervalsSpectral
+            do nn = 1, nDom
+                call setPointers(nn, 1_intType, sps)
+                do k = 1, kl
+                    do j = 1, jl
+                        do i = 1, il
+                            do l = 1, 3
+                                grid(counter) = X(i, j, k, l)
+                                counter = counter + 1
+                            end do
+                        end do
+                    end do
+                end do
+            end do
+        end do
+
+    end subroutine getGrid
+
+    subroutine getStatePerturbation(randVec, nRand, randState, nRandState)
+
+        use constants
+        use cgnsGrid, only: cgnsDoms, cgnsNDom
+        use blockPointers, only: nDom, il, jl, kl, nx, ny, nz, x, nbkglobal, iBegOr, jBegOr, kBegOr
+        use communication, only: adflow_comm_world, myid
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use adjointVars, only: nCellsLocal
+        use flowVarRefState, only: nw
+        use utils, only: setPointers, EChk
+        implicit none
+
+        ! Input Parameters
+        integer(kind=intType), intent(in) :: nRand, nRandState
+        real(kind=realType), intent(in), dimension(nRand) :: randVec
+
+        ! Ouput Parameters
+        real(kind=realType), intent(out), dimension(nRandState) :: randState
+
+        ! Working parameters
+        integer(kind=intType) :: i, j, k, ierr, l, nx_cg, ny_cg, nz_cg
+        integer(kind=intType) :: sps, ii, indx, indy, indz, nn, cgnsInd
+        integer(kind=intType) :: dofCGNSPerInstance
+        integer(kind=intType), allocatable, dimension(:) :: dof_offset
+
+        allocate (dof_offset(cgnsNDom))
+        dof_offset(1) = 0
+        do nn = 2, cgnsNDom
+            dof_offset(nn) = dof_offset(nn - 1) + &
+                             cgnsDoms(nn - 1)%nx * cgnsDoms(nn - 1)%ny * cgnsDoms(nn - 1)%nz * nw
+        end do
+
+        dofCGNSPerInstance = nRand / nTimeIntervalsSpectral
+
+        ii = 0
+        do nn = 1, nDom
+            do sps = 1, nTimeIntervalsSpectral
+                call setPointers(nn, 1, sps)
+                do k = 2, kl
+                    do j = 2, jl
+                        do i = 2, il
+
+                            nx_cg = cgnsDoms(nbkGlobal)%nx
+                            ny_cg = cgnsDoms(nbkGlobal)%ny
+                            nz_cg = cgnsDoms(nbkGlobal)%nz
+
+                            indx = iBegOr + i - 2
+                            indy = jBegOr + j - 2
+                            indz = kBegOr + k - 2
+
+                            do l = 1, nw
+                                cgnsInd = (sps - 1) * dofCGNSPerInstance + &
+                                          dof_offset(nbkGlobal) + &
+                                          (indz - 1) * ny_cg * nx_cg * nw + &
+                                          (indy - 1) * nx_cg * nw + &
+                                          (indx - 1) * nw + l
+                                randState(nw * ii + l) = randVec(cgnsInd)
+                            end do
+
+                            ii = ii + 1
+
+                        end do
+                    end do
                 end do
-             end do
-          end do
-       end do
-    end do
-  end subroutine getStatePerturbation
-
-  subroutine getSurfacePerturbation(xRand, nRand, randSurface, nRandSurface, famList, nFamList, sps)
-
-    use constants
-    use blockPointers, only : nDom, BCData, nBocos, BCFaceID, il, jl ,kl
-    use communication, only : adflow_comm_world, myid
-    use inputTimeSpectral, only : nTimeIntervalsSpectral
-    use utils, only : setPointers, EChk
-    use sorting, only : famInList
-    use oversetData, only : zipperMeshes, zipperMesh, oversetPresent
-    use surfaceFamilies, only : BCFamGroups, familyExchange, BCFamExchange
+            end do
+        end do
+    end subroutine getStatePerturbation
+
+    subroutine getSurfacePerturbation(xRand, nRand, randSurface, nRandSurface, famList, nFamList, sps)
+
+        use constants
+        use blockPointers, only: nDom, BCData, nBocos, BCFaceID, il, jl, kl
+        use communication, only: adflow_comm_world, myid
+        use inputTimeSpectral, only: nTimeIntervalsSpectral
+        use utils, only: setPointers, EChk
+        use sorting, only: famInList
+        use oversetData, only: zipperMeshes, zipperMesh, oversetPresent
+        use surfaceFamilies, only: BCFamGroups, familyExchange, BCFamExchange
 #include <petsc/finclude/petsc.h>
-    use petsc
-    implicit none
-
-    ! Input Parameters
-    integer(kind=intType), intent(in) :: nRand, nRandSurface
-    real(kind=realType), intent(in), dimension(nRand) :: xRand
-    integer(kind=intType), intent(in) :: nFamList, famList(nFamList), sps
-    ! Ouput Parameters
-    real(kind=realType), intent(inout), dimension(3*nRandSurface) :: randSurface
-
-    ! Working parameters
-    integer(kind=intType) :: i, j, k, ierr, iDim, iBeg, iEnd, jBeg, jEnd, nn, mm
-    integer(kind=intType) :: ii, jj, indI, indJ, indK, jjInd, iBCGroup
-    type(zipperMesh), pointer :: zipper
-    type(familyexchange), pointer :: exch
-    logical :: BCGroupNeeded
-    real(kind=realType), dimension(:), pointer :: localPtr
-
-    ii = 0
-    jj = 0
-    domains: do nn=1,nDom
-       call setPointers(nn, 1_intType, sps)
-
-       ! Loop over the number of boundary subfaces of this block.
-       bocos: do mm=1,nBocos
-
-          ! NODE Based
-          jBeg = BCData(mm)%jnBeg ; jEnd = BCData(mm)%jnEnd
-          iBeg = BCData(mm)%inBeg ; iEnd = BCData(mm)%inEnd
-
-          famInclude: if (famInList(BCdata(mm)%famID, famList)) then
-
-             do j=jBeg, jEnd ! This is a node loop
-                do i=iBeg, iEnd ! This is a node loop
-                   select case(BCFaceID(mm))
-                   case(imin)
-                      indI = 1
-                      indJ = i
-                      indK = j
-                   case(imax)
-                      indI = il
-                      indJ = i
-                      indK = j
-                   case(jmin)
-                      indI = i
-                      indJ = 1
-                      indK = j
-                   case(jmax)
-                      indI = i
-                      indJ = jl
-                      indK = j
-                   case(kmin)
-                      indI = i
-                      indJ = j
-                      indK = 1
-                   case(kmax)
-                      indI = i
-                      indJ = j
-                      indK = kl
-                   end select
-
-                   do iDim=1,3
-                      jjInd = jj + (indK-1)*il*jl + (indJ-1)*il + (indI-1) + iDim
-                      randSurface(3*ii+iDim) = xRand(jjInd)
-                   end do
-                   ii = ii +1
+        use petsc
+        implicit none
+
+        ! Input Parameters
+        integer(kind=intType), intent(in) :: nRand, nRandSurface
+        real(kind=realType), intent(in), dimension(nRand) :: xRand
+        integer(kind=intType), intent(in) :: nFamList, famList(nFamList), sps
+        ! Ouput Parameters
+        real(kind=realType), intent(inout), dimension(3*nRandSurface) :: randSurface
+
+        ! Working parameters
+        integer(kind=intType) :: i, j, k, ierr, iDim, iBeg, iEnd, jBeg, jEnd, nn, mm
+        integer(kind=intType) :: ii, jj, indI, indJ, indK, jjInd, iBCGroup
+        type(zipperMesh), pointer :: zipper
+        type(familyexchange), pointer :: exch
+        logical :: BCGroupNeeded
+        real(kind=realType), dimension(:), pointer :: localPtr
+
+        ii = 0
+        jj = 0
+        domains: do nn = 1, nDom
+            call setPointers(nn, 1_intType, sps)
+
+            ! Loop over the number of boundary subfaces of this block.
+            bocos: do mm = 1, nBocos
+
+                ! NODE Based
+                jBeg = BCData(mm)%jnBeg; jEnd = BCData(mm)%jnEnd
+                iBeg = BCData(mm)%inBeg; iEnd = BCData(mm)%inEnd
+
+                famInclude: if (famInList(BCdata(mm)%famID, famList)) then
+
+                    do j = jBeg, jEnd ! This is a node loop
+                        do i = iBeg, iEnd ! This is a node loop
+                            select case (BCFaceID(mm))
+                            case (imin)
+                                indI = 1
+                                indJ = i
+                                indK = j
+                            case (imax)
+                                indI = il
+                                indJ = i
+                                indK = j
+                            case (jmin)
+                                indI = i
+                                indJ = 1
+                                indK = j
+                            case (jmax)
+                                indI = i
+                                indJ = jl
+                                indK = j
+                            case (kmin)
+                                indI = i
+                                indJ = j
+                                indK = 1
+                            case (kmax)
+                                indI = i
+                                indJ = j
+                                indK = kl
+                            end select
+
+                            do iDim = 1, 3
+                                jjInd = jj + (indK - 1) * il * jl + (indJ - 1) * il + (indI - 1) + iDim
+                                randSurface(3 * ii + iDim) = xRand(jjInd)
+                            end do
+                            ii = ii + 1
+                        end do
+                    end do
+                end if famInclude
+            end do bocos
+
+            ! jj is the counter through xRand. Increment it by the full
+            ! block.
+            jj = jj + il * jl * kl * 3
+        end do domains
+
+        ! No overset or not zipper, return
+        if (.not. oversetPresent) then ! .or. .not. includeZipper) then
+            return
+        end if
+
+        ! If there are zipper meshes, we must include the nodes that the
+        ! zipper triangles will use.
+        do iBCGroup = 1, nFamExchange
+
+            zipper => zipperMeshes(iBCGroup)
+
+            if (.not. zipper%allocated) then
+                cycle
+            end if
+
+            exch => BCFamExchange(iBCGroup, sps)
+            BCGroupNeeded = .False.
+            BCGroupFamLoop: do i = 1, size(BCFamGroups(iBCGroup)%famList)
+                if (famInList(BCFamGroups(iBCGroup)%famList(i), famList)) then
+                    BCGroupNeeded = .True.
+                    exit BCGroupFamLoop
+                end if
+            end do BCGroupFamLoop
+
+            if (.not. BCGroupNeeded) then
+                cycle
+            end if
+
+            ! Now we know we *actually* need something from this BCGroup.
+
+            ! Loop over each dimension individually since we have a scalar
+            ! scatter.
+            dimLoop: do iDim = 1, 3
+
+                call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! The local Pointer is just the localRandSurface we've set
+                ! above.
+                do j = 1, size(localPtr)
+                    localPtr(i) = randSurface(3 * (j - 1) + iDim)
                 end do
-             end do
-          end if famInclude
-       end do bocos
-
-       ! jj is the counter through xRand. Increment it by the full
-       ! block.
-       jj = jj + il*jl*kl*3
-    end do domains
-
-    ! No overset or not zipper, return
-    if (.not. oversetPresent) then ! .or. .not. includeZipper) then
-       return
-    end if
-
-
-    ! If there are zipper meshes, we must include the nodes that the
-    ! zipper triangles will use.
-    do iBCGroup=1, nFamExchange
-
-       zipper => zipperMeshes(iBCGroup)
-
-       if (.not. zipper%allocated) then
-          cycle
-       end if
-
-       exch => BCFamExchange(iBCGroup, sps)
-       BCGroupNeeded = .False.
-       BCGroupFamLoop: do i=1, size(BCFamGroups(iBCGroup)%famList)
-          if (famInList(BCFamGroups(iBCGroup)%famList(i), famList)) then
-             BCGroupNeeded = .True.
-             exit BCGroupFamLoop
-          end if
-       end do BCGroupFamLoop
-
-       if (.not. BCGroupNeeded) then
-          cycle
-       end if
-
-       ! Now we know we *actually* need something from this BCGroup.
-
-       ! Loop over each dimension individually since we have a scalar
-       ! scatter.
-       dimLoop: do iDim=1,3
-
-          call vecGetArrayF90(exch%nodeValLocal, localPtr, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! The local Pointer is just the localRandSurface we've set
-          ! above.
-          do j=1, size(localPtr)
-             localPtr(i) = randSurface(3*(j-1) + iDim)
-          end do
-
-          ! Restore the pointer
-          call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Now scatter this to the zipper
-          call VecScatterBegin(zipper%scatter, exch%nodeValLocal,&
-               zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          call VecScatterEnd(zipper%scatter, exch%nodeValLocal,&
-               zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! The values we need are precisely what is in zipper%localVal
-          call vecGetArrayF90(zipper%localVal, localPtr, ierr)
-          call EChk(ierr,__FILE__,__LINE__)
-
-          ! Just copy the received seeds into the random aray
-          do j=1, size(localPtr)
-             ! Careful here becuase we have to interlate the dim
-             randSurface(3*ii + 3*(j-1) + iDim) = localPtr(j)
-          end do
-
-       end do dimLoop
-
-       ! Increcment the running ii counter.
-       ii = ii + size(localPtr)
-    end do
-  end subroutine getSurfacePerturbation
+
+                ! Restore the pointer
+                call vecRestoreArrayF90(exch%nodeValLocal, localPtr, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Now scatter this to the zipper
+                call VecScatterBegin(zipper%scatter, exch%nodeValLocal, &
+                                     zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                call VecScatterEnd(zipper%scatter, exch%nodeValLocal, &
+                                   zipper%localVal, INSERT_VALUES, SCATTER_FORWARD, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! The values we need are precisely what is in zipper%localVal
+                call vecGetArrayF90(zipper%localVal, localPtr, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Just copy the received seeds into the random aray
+                do j = 1, size(localPtr)
+                    ! Careful here becuase we have to interlate the dim
+                    randSurface(3 * ii + 3 * (j - 1) + iDim) = localPtr(j)
+                end do
+
+            end do dimLoop
+
+            ! Increcment the running ii counter.
+            ii = ii + size(localPtr)
+        end do
+    end subroutine getSurfacePerturbation
 
 end module warping
diff --git a/src_cs/modules/complexify.f90 b/src_cs/modules/complexify.f90
index ff1269baa..4d115944c 100644
--- a/src_cs/modules/complexify.f90
+++ b/src_cs/modules/complexify.f90
@@ -1,5 +1,5 @@
-!	Written for 'complexify.py 1.3'
-!	J.R.R.A.Martins 1999
+!        Written for 'complexify.py 1.3'
+!        J.R.R.A.Martins 1999
 !       21-Apr-00  Fixed tan, sinh, cosh
 !                  sign now returns complex
 !                  added log10 and nint
@@ -24,13 +24,12 @@
 
 module complexify
 
-
-  implicit none
+    implicit none
 
 ! ABS
-  interface abs
-     module procedure abs_c
-  end interface
+    interface abs
+        module procedure abs_c
+    end interface
 
 ! COSD
 !  interface cosd
@@ -38,9 +37,9 @@ module complexify
 !  end interface
 
 ! ACOS
-  interface acos
-     module procedure acos_c
-  end interface
+    interface acos
+        module procedure acos_c
+    end interface
 
 ! SIND
 !  interface sind
@@ -48,126 +47,126 @@ module complexify
 !  end interface
 
 ! ASIN
-  interface asin
-     module procedure asin_c
-  end interface
+    interface asin
+        module procedure asin_c
+    end interface
 
 ! ATAN
-  interface atan
-     module procedure atan_c
-  end interface
+    interface atan
+        module procedure atan_c
+    end interface
 
 ! ATAN2
-  interface atan2
-     module procedure atan2_cc
-  end interface
+    interface atan2
+        module procedure atan2_cc
+    end interface
 
 ! COSH
-  interface cosh
-     module procedure cosh_c
-  end interface
+    interface cosh
+        module procedure cosh_c
+    end interface
 
 ! MAX (limited to 2-4 complex args, 2 mixed args)
-  interface max
-     module procedure max_cc
-     module procedure max_cr
-     module procedure max_rc
-     module procedure max_ccc     ! added because of DFLUX.f
-     module procedure max_cccc     ! added because of DFLUX.f
-  end interface
+    interface max
+        module procedure max_cc
+        module procedure max_cr
+        module procedure max_rc
+        module procedure max_ccc     ! added because of DFLUX.f
+        module procedure max_cccc     ! added because of DFLUX.f
+    end interface
 
 ! MIN (limited to 2-4 complex args, 2 mixed args)
-  interface min
-     module procedure min_cc
-     module procedure min_cr
-     module procedure min_rc
-     module procedure min_ccc
-     module procedure min_cccc
-  end interface
+    interface min
+        module procedure min_cc
+        module procedure min_cr
+        module procedure min_rc
+        module procedure min_ccc
+        module procedure min_cccc
+    end interface
 
 ! MINVAL
-  interface minval
-      module procedure minval_c
-  end interface minval
+    interface minval
+        module procedure minval_c
+    end interface minval
 
 ! MAXVAL
-  interface maxval
-      module procedure maxval_c
-  end interface maxval
+    interface maxval
+        module procedure maxval_c
+    end interface maxval
 
 ! SIGN
-  interface sign
-     module procedure sign_cc
-     module procedure sign_cca
-     module procedure sign_cr
-     module procedure sign_rc
-  end interface
+    interface sign
+        module procedure sign_cc
+        module procedure sign_cca
+        module procedure sign_cr
+        module procedure sign_rc
+    end interface
 
 ! DIM
-  interface dim
-     module procedure dim_cc
-     module procedure dim_cr
-     module procedure dim_rc
-  end interface
+    interface dim
+        module procedure dim_cc
+        module procedure dim_cr
+        module procedure dim_rc
+    end interface
 
 ! SINH
-  interface sinh
-     module procedure sinh_c
-  end interface
+    interface sinh
+        module procedure sinh_c
+    end interface
 
 ! TAN
-  interface tan
-     module procedure tan_c
-  end interface
+    interface tan
+        module procedure tan_c
+    end interface
 
 ! TANH
-  interface tanh
-     module procedure tanh_c
-  end interface
+    interface tanh
+        module procedure tanh_c
+    end interface
 
 ! LOG10
-  interface log10
-     module procedure log10_c
-  end interface
+    interface log10
+        module procedure log10_c
+    end interface
 
 ! NINT
-  interface nint
-     module procedure nint_c
-  end interface
+    interface nint
+        module procedure nint_c
+    end interface
 
 ! EPSILON
-  interface epsilon
-     module procedure epsilon_c
-  end interface
+    interface epsilon
+        module procedure epsilon_c
+    end interface
 
 ! <
-  interface operator (<)
-     module procedure lt_cc
-     module procedure lt_cr
-     module procedure lt_rc
-     module procedure lt_ci
-     module procedure lt_ic
-  end interface
+    interface operator(<)
+        module procedure lt_cc
+        module procedure lt_cr
+        module procedure lt_rc
+        module procedure lt_ci
+        module procedure lt_ic
+    end interface
 
 ! <=
-  interface operator (<=)
-     module procedure le_cc
-     module procedure le_cr
-     module procedure le_rc
-     module procedure le_ci
-     module procedure le_ic
-  end interface
+    interface operator(<=)
+        module procedure le_cc
+        module procedure le_cr
+        module procedure le_rc
+        module procedure le_ci
+        module procedure le_ic
+    end interface
 
 ! >
-  interface operator (>)
-     module procedure gt_cc
-     module procedure gt_cr
-     module procedure gt_rc
-     module procedure gt_ci
-     module procedure gt_ic
-     module procedure gt_cac
-     module procedure gt_car
-  end interface
+    interface operator(>)
+        module procedure gt_cc
+        module procedure gt_cr
+        module procedure gt_rc
+        module procedure gt_ci
+        module procedure gt_ic
+        module procedure gt_cac
+        module procedure gt_car
+    end interface
 
 !! MIPSpro Compilers: Version 7.30 won't take .ge. and .eq..
 !! But pgf90 on Linux doesn't complain, go figure.
@@ -180,13 +179,13 @@ module complexify
 !! your compiler
 !!
 ! >=
-  interface operator (>=)
-     module procedure ge_cc
-     module procedure ge_cr
-     module procedure ge_rc
-     module procedure ge_ci
-     module procedure ge_ic
-  end interface
+    interface operator(>=)
+        module procedure ge_cc
+        module procedure ge_cr
+        module procedure ge_rc
+        module procedure ge_ci
+        module procedure ge_ic
+    end interface
 ! interface operator (.cge.)
 !    module procedure ge_cc
 !    module procedure ge_rr
@@ -208,22 +207,22 @@ module complexify
 !     module procedure eq_ci
 !     module procedure eq_ic
 !  end interface
-  interface operator (.ceq.)
-     module procedure eq_cc
-     module procedure eq_rr
-     module procedure eq_ii
-     module procedure eq_iai
-     module procedure eq_iaia
-     module procedure eq_i8i8
-     module procedure eq_i1i1
-     module procedure eq_aa
-     module procedure eq_cr
-     module procedure eq_rc
-     module procedure eq_ci
-     module procedure eq_ic
-     module procedure eq_ir
-     module procedure eq_ri
-  end interface
+    interface operator(.ceq.)
+        module procedure eq_cc
+        module procedure eq_rr
+        module procedure eq_ii
+        module procedure eq_iai
+        module procedure eq_iaia
+        module procedure eq_i8i8
+        module procedure eq_i1i1
+        module procedure eq_aa
+        module procedure eq_cr
+        module procedure eq_rc
+        module procedure eq_ci
+        module procedure eq_ic
+        module procedure eq_ir
+        module procedure eq_ri
+    end interface
 
 ! /=
 !  interface operator (/=)
@@ -233,23 +232,23 @@ module complexify
 !     module procedure ne_ci
 !     module procedure ne_ic
 !  end interface
-  interface operator (.cne.)
-     module procedure ne_cc
-     module procedure ne_rr
-     module procedure ne_ii
-     module procedure ne_aa
-     module procedure ne_cr
-     module procedure ne_rc
-     module procedure ne_ci
-     module procedure ne_ic
-     module procedure ne_ir
-     module procedure ne_ri
-  end interface
+    interface operator(.cne.)
+        module procedure ne_cc
+        module procedure ne_rr
+        module procedure ne_ii
+        module procedure ne_aa
+        module procedure ne_cr
+        module procedure ne_rc
+        module procedure ne_ci
+        module procedure ne_ic
+        module procedure ne_ir
+        module procedure ne_ri
+    end interface
 
 ! floor
-  interface floor
-     module procedure floor_c
-  end interface
+    interface floor
+        module procedure floor_c
+    end interface
 
 contains
 
@@ -258,12 +257,12 @@ module complexify
 !
 
 ! ABS, intrinsic
-  complex(kind=8) function abs_c(val)
-    complex(kind=8), intent(in) :: val
-    abs_c = val
-    if (real(val) < 0) abs_c = cmplx(-real(val),-aimag(val))
-    return
-  end function abs_c
+    complex(kind=8) function abs_c(val)
+        complex(kind=8), intent(in) :: val
+        abs_c = val
+        if (real(val) < 0) abs_c = cmplx(-real(val), -aimag(val))
+        return
+    end function abs_c
 
 ! COSD
 !  complex(kind=8) function cosd_c(z)
@@ -278,26 +277,26 @@ end function abs_c
 !  end function sind_c
 
 ! ACOS
-  complex(kind=8) function acos_c(z)
-    complex(kind=8), intent(in) :: z
+    complex(kind=8) function acos_c(z)
+        complex(kind=8), intent(in) :: z
 !   acos_c = - cmplx(0., 1.)*log(z+sqrt(z**2-1.))
 !   not general complex valued formula:
-    acos_c = cmplx(acos(real(z)),-aimag(z)/sqrt(1.-real(z)**2))
-    return
-  end function acos_c
+        acos_c = cmplx(acos(real(z)), -aimag(z) / sqrt(1.-real(z)**2))
+        return
+    end function acos_c
 
 ! ASIN
-  complex(kind=8) function asin_c(z)
-    complex(kind=8), intent(in) :: z
+    complex(kind=8) function asin_c(z)
+        complex(kind=8), intent(in) :: z
 !   asin_c = - cmplx(0., 1.)*log(cmplx(0.,1.)*z+sqrt(1.-z**2))
 !   not general complex valued formula:
-    asin_c = cmplx(asin(real(z)),aimag(z)/sqrt(1.-real(z)**2))
-    return
-  end function asin_c
+        asin_c = cmplx(asin(real(z)), aimag(z) / sqrt(1.-real(z)**2))
+        return
+    end function asin_c
 
 ! ATAN
-  complex(kind=8) function atan_c(z)
-    complex(kind=8), intent(in) :: z
+    complex(kind=8) function atan_c(z)
+        complex(kind=8), intent(in) :: z
 !   complex(kind=8) z2
 !   real(kind=8) pi2, xans, yans, r, r2, x, y
 !   pi2 = 2.0*atan(1.0)
@@ -309,13 +308,13 @@ complex(kind=8) function atan_c(z)
 !   yans   = 0.25*log((r2+2.0*y+1.0)/(r2-2.0*y+1.0))
 !   atan_c = cmplx (xans, yans)
 !   not general complex valued formula:
-    atan_c = cmplx(atan(real(z)),aimag(z)/(1.+real(z)**2))
-    return
-  end function atan_c
+        atan_c = cmplx(atan(real(z)), aimag(z) / (1.+real(z)**2))
+        return
+    end function atan_c
 
 ! ATAN2
-  complex(kind=8) function atan2_cc(csn, ccs)
-    complex(kind=8), intent(in) :: csn, ccs
+    complex(kind=8) function atan2_cc(csn, ccs)
+        complex(kind=8), intent(in) :: csn, ccs
 !   real(kind=8) pi
 !   pi = 4.0*atan(1.0)
 !   if (sqrt(real(ccs)**2 + aimag(ccs)**2).eq.0.) then  ! abs orig
@@ -330,54 +329,54 @@ complex(kind=8) function atan2_cc(csn, ccs)
 !     if (real(atan2_cc).gt.pi) atan2_cc = atan2_cc - 2.0*pi
 !   end if
 !   not general complex valued formula:
-    real(kind=8) a,b,c,d
-    a=real(csn)
-    b=aimag(csn)
-    c=real(ccs)
-    d=aimag(ccs)
-    atan2_cc=cmplx(atan2(a,c),(c*b-a*d)/(a**2+c**2))
-    return
-  end function atan2_cc
+        real(kind=8) a, b, c, d
+        a = real(csn)
+        b = aimag(csn)
+        c = real(ccs)
+        d = aimag(ccs)
+        atan2_cc = cmplx(atan2(a, c), (c * b - a * d) / (a**2 + c**2))
+        return
+    end function atan2_cc
 
 ! COSH
-  complex(kind=8) function cosh_c(z)
-    complex(kind=8), intent(in) :: z
+    complex(kind=8) function cosh_c(z)
+        complex(kind=8), intent(in) :: z
 !   complex(kind=8) eplus, eminus
 !   eplus = exp(z)
 !   eminus = exp(z)
 !   cosh_c = (eplus + eminus)/2.
 !   not general complex valued formula:
-    cosh_c=cmplx(cosh(real(z)),aimag(z)*sinh(real(z)))
-    return
-  end function cosh_c
+        cosh_c = cmplx(cosh(real(z)), aimag(z) * sinh(real(z)))
+        return
+    end function cosh_c
 
 ! SINH
-  complex(kind=8) function sinh_c(z)
-    complex(kind=8), intent(in) :: z
+    complex(kind=8) function sinh_c(z)
+        complex(kind=8), intent(in) :: z
 !   complex(kind=8) eplus, eminus
 !   eplus = exp(z)
 !   eminus = exp(z)
 !   sinh_c = (eplus - eminus)/2.
 !   not general complex valued formula:
-    sinh_c=cmplx(sinh(real(z)),aimag(z)*cosh(real(z)))
-    return
-  end function sinh_c
+        sinh_c = cmplx(sinh(real(z)), aimag(z) * cosh(real(z)))
+        return
+    end function sinh_c
 
 ! TAN
-  complex(kind=8) function tan_c(z)
-    complex(kind=8), intent(in) :: z
+    complex(kind=8) function tan_c(z)
+        complex(kind=8), intent(in) :: z
 !   complex(kind=8) eiplus, eiminus
 !   eiplus = exp(cmplx(0.,1.)*z)
 !   eiminus = exp(-cmplx(0.,1.)*z)
 !   tan_c = cmplx(0.,1.)*(eiminus - eiplus)/(eiplus + eiminus)
 !   not general complex valued formula:
-    tan_c=cmplx(tan(real(z)),aimag(z)/cos(real(z))**2)
-    return
-  end function tan_c
+        tan_c = cmplx(tan(real(z)), aimag(z) / cos(real(z))**2)
+        return
+    end function tan_c
 
 ! TANH
-  complex(kind=8) function tanh_c(a)
-    complex(kind=8), intent(in) :: a
+    complex(kind=8) function tanh_c(a)
+        complex(kind=8), intent(in) :: a
 !   complex(kind=8) eplus, eminus
 !   if(real(a) > 50)then
 !     tanh_c = 1.
@@ -387,153 +386,153 @@ complex(kind=8) function tanh_c(a)
 !     tanh_c = (eplus - eminus)/(eplus + eminus)
 !   end if
 !   not general complex valued formula:
-    tanh_c=cmplx(tanh(real(a)),aimag(a)/cosh(real(a))**2)
-    return
-  end function tanh_c
+        tanh_c = cmplx(tanh(real(a)), aimag(a) / cosh(real(a))**2)
+        return
+    end function tanh_c
 ! MAX, intrinsic
 ! the logical statements here are chosen to match fwd AD code from tapenade
 ! this way they are consistent even when the real parts are equal
-  complex(kind=8) function max_cc(val1, val2)
-    complex(kind=8), intent(in) :: val1, val2
-    if (real(val1) < real(val2)) then
-      max_cc = val2
-    else
-      max_cc = val1
-    endif
-    return
-  end function max_cc
-  complex(kind=8) function max_cr(val1, val2)
-    complex(kind=8), intent(in) :: val1
-    real(kind=8), intent(in) :: val2
-    if (real(val1) < val2) then
-      max_cr = cmplx(val2, 0.)
-    else
-      max_cr = val1
-    endif
-    return
-  end function max_cr
-  complex(kind=8) function max_rc(val1, val2)
-    real(kind=8), intent(in) :: val1
-    complex(kind=8), intent(in) :: val2
-    if (val1 < real(val2)) then
-      max_rc = val2
-    else
-      max_rc = cmplx(val1, 0.)
-    endif
-    return
-  end function max_rc
-  complex(kind=8) function max_ccc(val1, val2, val3)
-    complex(kind=8), intent(in) :: val1, val2, val3
-    if (real(val1) < real(val2)) then
-      max_ccc = val2
-    else
-      max_ccc = val1
-    endif
-
-    if (real(max_ccc) < real(val3)) then
-      max_ccc = val3
-    endif
-    return
-  end function max_ccc
-  function max_cccc(val1, val2, val3, val4)
-    complex(kind=8), intent(in) :: val1, val2, val3, val4
-    complex(kind=8) max_cccc
-    complex(kind=8) max_cccc2
-    if (real(val1) < real(val2)) then
-      max_cccc = val2
-    else
-      max_cccc = val1
-    endif
-    if (real(val3) < real(val4)) then
-      max_cccc2 = val4
-    else
-      max_cccc2 = val3
-    endif
-    if ( real(max_cccc) < real(max_cccc2)) then
-      max_cccc = max_cccc2
-    endif
-    return
-  end function max_cccc
+    complex(kind=8) function max_cc(val1, val2)
+        complex(kind=8), intent(in) :: val1, val2
+        if (real(val1) < real(val2)) then
+            max_cc = val2
+        else
+            max_cc = val1
+        end if
+        return
+    end function max_cc
+    complex(kind=8) function max_cr(val1, val2)
+        complex(kind=8), intent(in) :: val1
+        real(kind=8), intent(in) :: val2
+        if (real(val1) < val2) then
+            max_cr = cmplx(val2, 0.)
+        else
+            max_cr = val1
+        end if
+        return
+    end function max_cr
+    complex(kind=8) function max_rc(val1, val2)
+        real(kind=8), intent(in) :: val1
+        complex(kind=8), intent(in) :: val2
+        if (val1 < real(val2)) then
+            max_rc = val2
+        else
+            max_rc = cmplx(val1, 0.)
+        end if
+        return
+    end function max_rc
+    complex(kind=8) function max_ccc(val1, val2, val3)
+        complex(kind=8), intent(in) :: val1, val2, val3
+        if (real(val1) < real(val2)) then
+            max_ccc = val2
+        else
+            max_ccc = val1
+        end if
+
+        if (real(max_ccc) < real(val3)) then
+            max_ccc = val3
+        end if
+        return
+    end function max_ccc
+    function max_cccc(val1, val2, val3, val4)
+        complex(kind=8), intent(in) :: val1, val2, val3, val4
+        complex(kind=8) max_cccc
+        complex(kind=8) max_cccc2
+        if (real(val1) < real(val2)) then
+            max_cccc = val2
+        else
+            max_cccc = val1
+        end if
+        if (real(val3) < real(val4)) then
+            max_cccc2 = val4
+        else
+            max_cccc2 = val3
+        end if
+        if (real(max_cccc) < real(max_cccc2)) then
+            max_cccc = max_cccc2
+        end if
+        return
+    end function max_cccc
 
 ! MIN, intrinsic
 ! the logical statements here are chosen to match fwd AD code from tapenade
 ! this way they are consistent even when the real parts are equal
-  complex(kind=8) function min_cc(val1, val2)
-    complex(kind=8), intent(in) :: val1, val2
-    if (real(val1) > real(val2)) then
-      min_cc = val2
-    else
-      min_cc = val1
-    endif
-    return
-  end function min_cc
-  complex(kind=8) function min_cr(val1, val2)
-    complex(kind=8), intent(in) :: val1
-    real(kind=8), intent(in) :: val2
-    if (real(val1) > val2) then
-      min_cr = cmplx(val2, 0.)
-    else
-      min_cr = val1
-    endif
-    return
-  end function min_cr
-  complex(kind=8) function min_rc(val1, val2)
-    real(kind=8), intent(in) :: val1
-    complex(kind=8), intent(in) :: val2
-    if (val1 > real(val2)) then
-      min_rc = val2
-    else
-      min_rc = cmplx(val1, 0.)
-    endif
-    return
-  end function min_rc
-  complex(kind=8) function min_ccc(val1, val2, val3)
-    complex(kind=8), intent(in) :: val1, val2, val3
-    if (real(val1) > real(val2)) then
-      min_ccc = val2
-    else
-      min_ccc = val1
-    endif
-
-    if (real(min_ccc) > real(val3)) then
-      min_ccc = val3
-    endif
-    return
-  end function min_ccc
-  function min_cccc(val1, val2, val3, val4)
-    complex(kind=8), intent(in) :: val1, val2, val3, val4
-    complex(kind=8) min_cccc
-    complex(kind=8) min_cccc2
-    if (real(val1) > real(val2)) then
-      min_cccc = val2
-    else
-      min_cccc = val1
-    endif
-
-    if (real(val3) > real(val4)) then
-      min_cccc2 = val4
-    else
-      min_cccc2 = val3
-    endif
-    if (real(min_cccc) > real(min_cccc2)) then
-      min_cccc = min_cccc2
-    endif
-    return
-  end function min_cccc
+    complex(kind=8) function min_cc(val1, val2)
+        complex(kind=8), intent(in) :: val1, val2
+        if (real(val1) > real(val2)) then
+            min_cc = val2
+        else
+            min_cc = val1
+        end if
+        return
+    end function min_cc
+    complex(kind=8) function min_cr(val1, val2)
+        complex(kind=8), intent(in) :: val1
+        real(kind=8), intent(in) :: val2
+        if (real(val1) > val2) then
+            min_cr = cmplx(val2, 0.)
+        else
+            min_cr = val1
+        end if
+        return
+    end function min_cr
+    complex(kind=8) function min_rc(val1, val2)
+        real(kind=8), intent(in) :: val1
+        complex(kind=8), intent(in) :: val2
+        if (val1 > real(val2)) then
+            min_rc = val2
+        else
+            min_rc = cmplx(val1, 0.)
+        end if
+        return
+    end function min_rc
+    complex(kind=8) function min_ccc(val1, val2, val3)
+        complex(kind=8), intent(in) :: val1, val2, val3
+        if (real(val1) > real(val2)) then
+            min_ccc = val2
+        else
+            min_ccc = val1
+        end if
+
+        if (real(min_ccc) > real(val3)) then
+            min_ccc = val3
+        end if
+        return
+    end function min_ccc
+    function min_cccc(val1, val2, val3, val4)
+        complex(kind=8), intent(in) :: val1, val2, val3, val4
+        complex(kind=8) min_cccc
+        complex(kind=8) min_cccc2
+        if (real(val1) > real(val2)) then
+            min_cccc = val2
+        else
+            min_cccc = val1
+        end if
+
+        if (real(val3) > real(val4)) then
+            min_cccc2 = val4
+        else
+            min_cccc2 = val3
+        end if
+        if (real(min_cccc) > real(min_cccc2)) then
+            min_cccc = min_cccc2
+        end if
+        return
+    end function min_cccc
 
 ! MINVAL: minimum of an array
 ! Assumes a 1D array!
-  complex(kind=8) function minval_c(z)
-    complex(kind=8), intent(in) :: z(:)
-    minval_c = cmplx(minval(real(z)), aimag(z(minloc(real(z),dim=1))))
-  end function minval_c
+    complex(kind=8) function minval_c(z)
+        complex(kind=8), intent(in) :: z(:)
+        minval_c = cmplx(minval(real(z)), aimag(z(minloc(real(z), dim=1))))
+    end function minval_c
 
 ! MAXVAL: maximum of an array
 ! Assumes a 1D array!
-  complex(kind=8) function maxval_c(z)
-    complex(kind=8), intent(in) :: z(:)
-    maxval_c = cmplx(maxval(real(z)), aimag(z(maxloc(real(z),dim=1))))
-  end function maxval_c
+    complex(kind=8) function maxval_c(z)
+        complex(kind=8), intent(in) :: z(:)
+        maxval_c = cmplx(maxval(real(z)), aimag(z(maxloc(real(z), dim=1))))
+    end function maxval_c
 
 !! MINLOC: location of minimum in an array
 !  complex(kind=8) function minloc_c(z)
@@ -543,188 +542,187 @@ end function maxval_c
 !    minloc_c = minloc(real(z))
 !  end function minval_c
 
-
 ! SIGN, intrinsic, assume that val1 is always a complex(kind=8)
 !                  in reality could be int
-  complex(kind=8) function sign_cc(val1, val2)
-    complex(kind=8), intent(in) :: val1, val2
-    real(kind=8)  sign
-    if (real(val2) < 0.) then
-      sign = -1.
-    else
-      sign = 1.
-    endif
-    sign_cc = sign * val1
-    return
-  end function sign_cc
-  function sign_cca(val1, val2) ! NEW, not verified
-    complex(kind=8), intent(in) :: val1
-    complex(kind=8), intent(in) :: val2(:)
-    complex(kind=8) sign_cca(size(val2))
-    real(kind=8) sign
-    integer i, n
-    n = size(val2)
-    do i = 1, n
-       if (real(val2(i)) < 0.) then
-          sign = -1.
-       else
-          sign = 1.
-       endif
-       sign_cca(i) = sign * val1
-    enddo
-    return
-  end function sign_cca
-  complex(kind=8) function sign_cr(val1, val2)
-    complex(kind=8), intent(in) :: val1
-    real(kind=8), intent(in) :: val2
-    real(kind=8) sign
-    if (real(val2) < 0.) then
-      sign = -1.
-    else
-      sign = 1.
-    endif
-    sign_cr = sign * val1
-    return
-  end function sign_cr
-  complex(kind=8) function sign_rc(val1, val2)
-    real(kind=8), intent(in) :: val1
-    complex(kind=8), intent(in) :: val2
-    real(kind=8) sign
-    if (real(val2) < 0.) then
-      sign = -1.
-    else
-      sign = 1.
-    endif
-    sign_rc = sign * val1
-    return
-  end function sign_rc
+    complex(kind=8) function sign_cc(val1, val2)
+        complex(kind=8), intent(in) :: val1, val2
+        real(kind=8) sign
+        if (real(val2) < 0.) then
+            sign = -1.
+        else
+            sign = 1.
+        end if
+        sign_cc = sign * val1
+        return
+    end function sign_cc
+    function sign_cca(val1, val2) ! NEW, not verified
+        complex(kind=8), intent(in) :: val1
+        complex(kind=8), intent(in) :: val2(:)
+        complex(kind=8) sign_cca(size(val2))
+        real(kind=8) sign
+        integer i, n
+        n = size(val2)
+        do i = 1, n
+            if (real(val2(i)) < 0.) then
+                sign = -1.
+            else
+                sign = 1.
+            end if
+            sign_cca(i) = sign * val1
+        end do
+        return
+    end function sign_cca
+    complex(kind=8) function sign_cr(val1, val2)
+        complex(kind=8), intent(in) :: val1
+        real(kind=8), intent(in) :: val2
+        real(kind=8) sign
+        if (real(val2) < 0.) then
+            sign = -1.
+        else
+            sign = 1.
+        end if
+        sign_cr = sign * val1
+        return
+    end function sign_cr
+    complex(kind=8) function sign_rc(val1, val2)
+        real(kind=8), intent(in) :: val1
+        complex(kind=8), intent(in) :: val2
+        real(kind=8) sign
+        if (real(val2) < 0.) then
+            sign = -1.
+        else
+            sign = 1.
+        end if
+        sign_rc = sign * val1
+        return
+    end function sign_rc
 
 ! DIM, intrinsic
-  complex(kind=8) function dim_cc(val1, val2)
-    complex(kind=8), intent(in) :: val1, val2
-    if (val1 > val2) then
-      dim_cc = val1 - val2
-    else
-      dim_cc = cmplx(0., 0.)
-    endif
-    return
-  end function dim_cc
-  complex(kind=8) function dim_cr(val1, val2)
-    complex(kind=8), intent(in) :: val1
-    real(kind=8), intent(in) :: val2
-    if (val1 > val2) then
-      dim_cr = val1 - cmplx(val2, 0.)
-    else
-      dim_cr = cmplx(0., 0.)
-    endif
-    return
-  end function dim_cr
-  complex(kind=8) function dim_rc(val1, val2)
-    real(kind=8), intent(in) :: val1
-    complex(kind=8), intent(in) :: val2
-    if (val1 > val2) then
-      dim_rc = cmplx(val1, 0.) - val2
-    else
-      dim_rc = cmplx(0., 0.)
-    endif
-    return
-  end function dim_rc
+    complex(kind=8) function dim_cc(val1, val2)
+        complex(kind=8), intent(in) :: val1, val2
+        if (val1 > val2) then
+            dim_cc = val1 - val2
+        else
+            dim_cc = cmplx(0., 0.)
+        end if
+        return
+    end function dim_cc
+    complex(kind=8) function dim_cr(val1, val2)
+        complex(kind=8), intent(in) :: val1
+        real(kind=8), intent(in) :: val2
+        if (val1 > val2) then
+            dim_cr = val1 - cmplx(val2, 0.)
+        else
+            dim_cr = cmplx(0., 0.)
+        end if
+        return
+    end function dim_cr
+    complex(kind=8) function dim_rc(val1, val2)
+        real(kind=8), intent(in) :: val1
+        complex(kind=8), intent(in) :: val2
+        if (val1 > val2) then
+            dim_rc = cmplx(val1, 0.) - val2
+        else
+            dim_rc = cmplx(0., 0.)
+        end if
+        return
+    end function dim_rc
 
 ! LOG10
-  complex(kind=8) function log10_c(z)
-    complex(kind=8), intent(in) :: z
-    log10_c=log(z)/log((10.0,0.0))
-  end function log10_c
+    complex(kind=8) function log10_c(z)
+        complex(kind=8), intent(in) :: z
+        log10_c = log(z) / log((10.0, 0.0))
+    end function log10_c
 
 ! NINT
-  integer function nint_c(z)
-    complex(kind=8), intent(in) :: z
-    nint_c = nint(real(z))
-  end function nint_c
+    integer function nint_c(z)
+        complex(kind=8), intent(in) :: z
+        nint_c = nint(real(z))
+    end function nint_c
 
 ! EPSILON !! bad news ulness compiled with -r8
-  complex(kind=8) function epsilon_c(z)
-    complex(kind=8), intent(in) :: z
-    epsilon_c=epsilon(real(z))
-  end function epsilon_c
+    complex(kind=8) function epsilon_c(z)
+        complex(kind=8), intent(in) :: z
+        epsilon_c = epsilon(real(z))
+    end function epsilon_c
 
 ! <, .lt.
-  logical function lt_cc(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs, rhs
-    lt_cc = real(lhs) < real(rhs)
-  end function lt_cc
-  logical function lt_cr(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    real(kind=8), intent(in) :: rhs
-    lt_cr = real(lhs) < rhs
-  end function lt_cr
-  logical function lt_rc(lhs, rhs)
-    real(kind=8), intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    lt_rc = lhs < real(rhs)
-  end function lt_rc
-  logical function lt_ci(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    integer, intent(in) :: rhs
-    lt_ci = real(lhs) < rhs
-  end function lt_ci
-  logical function lt_ic(lhs, rhs)
-    integer, intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    lt_ic = lhs < real(rhs)
-  end function lt_ic
+    logical function lt_cc(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs, rhs
+        lt_cc = real(lhs) < real(rhs)
+    end function lt_cc
+    logical function lt_cr(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        real(kind=8), intent(in) :: rhs
+        lt_cr = real(lhs) < rhs
+    end function lt_cr
+    logical function lt_rc(lhs, rhs)
+        real(kind=8), intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        lt_rc = lhs < real(rhs)
+    end function lt_rc
+    logical function lt_ci(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        integer, intent(in) :: rhs
+        lt_ci = real(lhs) < rhs
+    end function lt_ci
+    logical function lt_ic(lhs, rhs)
+        integer, intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        lt_ic = lhs < real(rhs)
+    end function lt_ic
 
 ! <=, .le.
-  logical function le_cc(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs, rhs
-    le_cc = real(lhs) <= real(rhs)
-  end function le_cc
-  logical function le_cr(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    real(kind=8), intent(in) :: rhs
-    le_cr = real(lhs) <= rhs
-  end function le_cr
-  logical function le_rc(lhs, rhs)
-    real(kind=8), intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    le_rc = lhs <= real(rhs)
-  end function le_rc
-  logical function le_ci(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    integer, intent(in) :: rhs
-    le_ci = real(lhs) <= rhs
-  end function le_ci
-  logical function le_ic(lhs, rhs)
-    integer, intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    le_ic = lhs <= real(rhs)
-  end function le_ic
+    logical function le_cc(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs, rhs
+        le_cc = real(lhs) <= real(rhs)
+    end function le_cc
+    logical function le_cr(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        real(kind=8), intent(in) :: rhs
+        le_cr = real(lhs) <= rhs
+    end function le_cr
+    logical function le_rc(lhs, rhs)
+        real(kind=8), intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        le_rc = lhs <= real(rhs)
+    end function le_rc
+    logical function le_ci(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        integer, intent(in) :: rhs
+        le_ci = real(lhs) <= rhs
+    end function le_ci
+    logical function le_ic(lhs, rhs)
+        integer, intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        le_ic = lhs <= real(rhs)
+    end function le_ic
 
 ! >, .gt.
-  logical function gt_cc(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs, rhs
-    gt_cc = real(lhs) > real(rhs)
-  end function gt_cc
-  logical function gt_cr(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    real(kind=8), intent(in) :: rhs
-    gt_cr = real(lhs) > rhs
-  end function gt_cr
-  logical function gt_rc(lhs, rhs)
-    real(kind=8), intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    gt_rc = lhs > real(rhs)
-  end function gt_rc
-  logical function gt_ci(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    integer, intent(in) :: rhs
-    gt_ci = real(lhs) > rhs
-  end function gt_ci
-  logical function gt_ic(lhs, rhs)
-    integer, intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    gt_ic = lhs > real(rhs)
-  end function gt_ic
+    logical function gt_cc(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs, rhs
+        gt_cc = real(lhs) > real(rhs)
+    end function gt_cc
+    logical function gt_cr(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        real(kind=8), intent(in) :: rhs
+        gt_cr = real(lhs) > rhs
+    end function gt_cr
+    logical function gt_rc(lhs, rhs)
+        real(kind=8), intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        gt_rc = lhs > real(rhs)
+    end function gt_rc
+    logical function gt_ci(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        integer, intent(in) :: rhs
+        gt_ci = real(lhs) > rhs
+    end function gt_ci
+    logical function gt_ic(lhs, rhs)
+        integer, intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        gt_ic = lhs > real(rhs)
+    end function gt_ic
 !  function gt_caca(lhs, rhs) ! Arrays
 !    complex(kind=8), intent(in) :: lhs(:), rhs(:)
 !    logical gt_caca(size(lhs))
@@ -732,197 +730,197 @@ end function gt_ic
 !    n = size(lhs)
 !    gt_caca = real(lhs) > real(rhs)
 !  end function gt_caca
-  function gt_cac(lhs, rhs) ! Arrays
-    complex(kind=8), intent(in) :: lhs(:)
-    complex(kind=8), intent(in) :: rhs
-    logical gt_cac(size(lhs))
-    integer n
-    n = size(lhs)
-    gt_cac = real(lhs) > real(rhs)
-  end function gt_cac
-  function gt_car(lhs, rhs) ! Arrays
-    complex(kind=8), intent(in) :: lhs(:)
-    real(kind=8), intent(in) :: rhs
-    logical gt_car(size(lhs))
-    integer n
-    n = size(lhs)
-    gt_car = real(lhs) > rhs
-  end function gt_car
+    function gt_cac(lhs, rhs) ! Arrays
+        complex(kind=8), intent(in) :: lhs(:)
+        complex(kind=8), intent(in) :: rhs
+        logical gt_cac(size(lhs))
+        integer n
+        n = size(lhs)
+        gt_cac = real(lhs) > real(rhs)
+    end function gt_cac
+    function gt_car(lhs, rhs) ! Arrays
+        complex(kind=8), intent(in) :: lhs(:)
+        real(kind=8), intent(in) :: rhs
+        logical gt_car(size(lhs))
+        integer n
+        n = size(lhs)
+        gt_car = real(lhs) > rhs
+    end function gt_car
 
 !! here are the redefined ones:
 ! >=, .ge.
-  logical function ge_cc(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs, rhs
-    ge_cc = real(lhs) >= real(rhs)
-  end function ge_cc
-  logical function ge_rr(lhs, rhs)
-    real(kind=8), intent(in) :: lhs, rhs
-    ge_rr = lhs >= rhs
-  end function ge_rr
-  logical function ge_ii(lhs, rhs)
-    integer, intent(in) :: lhs, rhs
-    ge_ii = lhs >= rhs
-  end function ge_ii
-  logical function ge_aa(lhs, rhs)
-    character(len=*), intent(in) :: lhs, rhs
-    ge_aa = lhs >= rhs
-  end function ge_aa
-  logical function ge_cr(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    real(kind=8), intent(in) :: rhs
-    ge_cr = real(lhs) >= rhs
-  end function ge_cr
-  logical function ge_rc(lhs, rhs)
-    real(kind=8), intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    ge_rc = lhs >= real(rhs)
-  end function ge_rc
-  logical function ge_ci(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    integer, intent(in) :: rhs
-    ge_ci = real(lhs) >= rhs
-  end function ge_ci
-  logical function ge_ic(lhs, rhs)
-    integer, intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    ge_ic = lhs >= real(rhs)
-  end function ge_ic
-  logical function ge_ir(lhs, rhs)
-    integer, intent(in) :: lhs
-    real(kind=8), intent(in) :: rhs
-    ge_ir = lhs >= rhs
-  end function ge_ir
-  logical function ge_ri(lhs, rhs)
-    real(kind=8), intent(in) :: lhs
-    integer, intent(in) :: rhs
-    ge_ri = lhs >= rhs
-  end function ge_ri
+    logical function ge_cc(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs, rhs
+        ge_cc = real(lhs) >= real(rhs)
+    end function ge_cc
+    logical function ge_rr(lhs, rhs)
+        real(kind=8), intent(in) :: lhs, rhs
+        ge_rr = lhs >= rhs
+    end function ge_rr
+    logical function ge_ii(lhs, rhs)
+        integer, intent(in) :: lhs, rhs
+        ge_ii = lhs >= rhs
+    end function ge_ii
+    logical function ge_aa(lhs, rhs)
+        character(len=*), intent(in) :: lhs, rhs
+        ge_aa = lhs >= rhs
+    end function ge_aa
+    logical function ge_cr(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        real(kind=8), intent(in) :: rhs
+        ge_cr = real(lhs) >= rhs
+    end function ge_cr
+    logical function ge_rc(lhs, rhs)
+        real(kind=8), intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        ge_rc = lhs >= real(rhs)
+    end function ge_rc
+    logical function ge_ci(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        integer, intent(in) :: rhs
+        ge_ci = real(lhs) >= rhs
+    end function ge_ci
+    logical function ge_ic(lhs, rhs)
+        integer, intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        ge_ic = lhs >= real(rhs)
+    end function ge_ic
+    logical function ge_ir(lhs, rhs)
+        integer, intent(in) :: lhs
+        real(kind=8), intent(in) :: rhs
+        ge_ir = lhs >= rhs
+    end function ge_ir
+    logical function ge_ri(lhs, rhs)
+        real(kind=8), intent(in) :: lhs
+        integer, intent(in) :: rhs
+        ge_ri = lhs >= rhs
+    end function ge_ri
 
 ! ==, .eq.
-  logical function eq_cc(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs, rhs
-    eq_cc = real(lhs) == real(rhs)
-  end function eq_cc
-  logical function eq_rr(lhs, rhs)
-    real(kind=8), intent(in) :: lhs, rhs
-    eq_rr = lhs == rhs
-  end function eq_rr
-  logical function eq_ii(lhs, rhs)
-    integer, intent(in) :: lhs, rhs
-    eq_ii = lhs == rhs
-  end function eq_ii
-  ! lhs and rhs are rank 1 integer arrays
-  function eq_iaia(lhs, rhs)
-    integer, intent(in) :: lhs(:), rhs(:)
-    logical eq_iaia(size(lhs))
-    eq_iaia = lhs == rhs
-  end function eq_iaia
-  ! lhs is a rank 3 integer array
-  function eq_iai(lhs, rhs)
-    integer, intent(in) :: lhs(:,:,:)
-    integer, intent(in) :: rhs
-    logical eq_iai(size(lhs,1), size(lhs,2), size(lhs,3))
-    eq_iai = lhs == rhs
-  end function eq_iai
-  logical function eq_i8i8(lhs, rhs)
-    integer(kind = 8), intent(in) :: lhs, rhs
-    eq_i8i8 = lhs == rhs
-  end function eq_i8i8
-  logical function eq_i1i1(lhs, rhs)
-    integer(kind = 1), intent(in) :: lhs, rhs
-    eq_i1i1 = lhs == rhs
-  end function eq_i1i1
-  logical function eq_aa(lhs, rhs)
-    character(len=*), intent(in) :: lhs, rhs
-    eq_aa = lhs == rhs
-  end function eq_aa
-  logical function eq_cr(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    real(kind=8), intent(in) :: rhs
-    eq_cr = real(lhs) == rhs
-  end function eq_cr
-  logical function eq_rc(lhs, rhs)
-    real(kind=8), intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    eq_rc = lhs == real(rhs)
-  end function eq_rc
-  logical function eq_ci(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    integer, intent(in) :: rhs
-    eq_ci = real(lhs) == rhs
-  end function eq_ci
-  logical function eq_ic(lhs, rhs)
-    integer, intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    eq_ic = lhs == real(rhs)
-  end function eq_ic
-  logical function eq_ir(lhs, rhs)
-    integer, intent(in) :: lhs
-    real(kind=8), intent(in) :: rhs
-    eq_ir = lhs == rhs
-  end function eq_ir
-  logical function eq_ri(lhs, rhs)
-    real(kind=8), intent(in) :: lhs
-    integer, intent(in) :: rhs
-    eq_ri = lhs == rhs
-  end function eq_ri
+    logical function eq_cc(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs, rhs
+        eq_cc = real(lhs) == real(rhs)
+    end function eq_cc
+    logical function eq_rr(lhs, rhs)
+        real(kind=8), intent(in) :: lhs, rhs
+        eq_rr = lhs == rhs
+    end function eq_rr
+    logical function eq_ii(lhs, rhs)
+        integer, intent(in) :: lhs, rhs
+        eq_ii = lhs == rhs
+    end function eq_ii
+    ! lhs and rhs are rank 1 integer arrays
+    function eq_iaia(lhs, rhs)
+        integer, intent(in) :: lhs(:), rhs(:)
+        logical eq_iaia(size(lhs))
+        eq_iaia = lhs == rhs
+    end function eq_iaia
+    ! lhs is a rank 3 integer array
+    function eq_iai(lhs, rhs)
+        integer, intent(in) :: lhs(:, :, :)
+        integer, intent(in) :: rhs
+        logical eq_iai(size(lhs, 1), size(lhs, 2), size(lhs, 3))
+        eq_iai = lhs == rhs
+    end function eq_iai
+    logical function eq_i8i8(lhs, rhs)
+        integer(kind=8), intent(in) :: lhs, rhs
+        eq_i8i8 = lhs == rhs
+    end function eq_i8i8
+    logical function eq_i1i1(lhs, rhs)
+        integer(kind=1), intent(in) :: lhs, rhs
+        eq_i1i1 = lhs == rhs
+    end function eq_i1i1
+    logical function eq_aa(lhs, rhs)
+        character(len=*), intent(in) :: lhs, rhs
+        eq_aa = lhs == rhs
+    end function eq_aa
+    logical function eq_cr(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        real(kind=8), intent(in) :: rhs
+        eq_cr = real(lhs) == rhs
+    end function eq_cr
+    logical function eq_rc(lhs, rhs)
+        real(kind=8), intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        eq_rc = lhs == real(rhs)
+    end function eq_rc
+    logical function eq_ci(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        integer, intent(in) :: rhs
+        eq_ci = real(lhs) == rhs
+    end function eq_ci
+    logical function eq_ic(lhs, rhs)
+        integer, intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        eq_ic = lhs == real(rhs)
+    end function eq_ic
+    logical function eq_ir(lhs, rhs)
+        integer, intent(in) :: lhs
+        real(kind=8), intent(in) :: rhs
+        eq_ir = lhs == rhs
+    end function eq_ir
+    logical function eq_ri(lhs, rhs)
+        real(kind=8), intent(in) :: lhs
+        integer, intent(in) :: rhs
+        eq_ri = lhs == rhs
+    end function eq_ri
 
 ! /=, .ne.
-  logical function ne_cc(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs, rhs
-    ne_cc = real(lhs) /= real(rhs)
-  end function ne_cc
-  logical function ne_rr(lhs, rhs)
-    real(kind=8), intent(in) :: lhs, rhs
-    ne_rr = lhs /= rhs
-  end function ne_rr
-  logical function ne_ii(lhs, rhs)
-    integer, intent(in) :: lhs, rhs
-    ne_ii = lhs /= rhs
-  end function ne_ii
-  logical function ne_aa(lhs, rhs)
-    character(len=*), intent(in) :: lhs, rhs
-    ne_aa = lhs /= rhs
-  end function ne_aa
-  logical function ne_cr(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    real(kind=8), intent(in) :: rhs
-    ne_cr = real(lhs) /= rhs
-  end function ne_cr
-  logical function ne_rc(lhs, rhs)
-    real(kind=8), intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    ne_rc = lhs /= real(rhs)
-  end function ne_rc
-  logical function ne_ci(lhs, rhs)
-    complex(kind=8), intent(in) :: lhs
-    integer, intent(in) :: rhs
-    ne_ci = real(lhs) /= rhs
-  end function ne_ci
-  logical function ne_ic(lhs, rhs)
-    integer, intent(in) :: lhs
-    complex(kind=8), intent(in) :: rhs
-    ne_ic = lhs /= real(rhs)
-  end function ne_ic
-  logical function ne_ir(lhs, rhs)
-    integer, intent(in) :: lhs
-    real(kind=8), intent(in) :: rhs
-    ne_ir = lhs /= rhs
-  end function ne_ir
-  logical function ne_ri(lhs, rhs)
-    real(kind=8), intent(in) :: lhs
-    integer, intent(in) :: rhs
-    ne_ri = lhs /= rhs
-  end function ne_ri
+    logical function ne_cc(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs, rhs
+        ne_cc = real(lhs) /= real(rhs)
+    end function ne_cc
+    logical function ne_rr(lhs, rhs)
+        real(kind=8), intent(in) :: lhs, rhs
+        ne_rr = lhs /= rhs
+    end function ne_rr
+    logical function ne_ii(lhs, rhs)
+        integer, intent(in) :: lhs, rhs
+        ne_ii = lhs /= rhs
+    end function ne_ii
+    logical function ne_aa(lhs, rhs)
+        character(len=*), intent(in) :: lhs, rhs
+        ne_aa = lhs /= rhs
+    end function ne_aa
+    logical function ne_cr(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        real(kind=8), intent(in) :: rhs
+        ne_cr = real(lhs) /= rhs
+    end function ne_cr
+    logical function ne_rc(lhs, rhs)
+        real(kind=8), intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        ne_rc = lhs /= real(rhs)
+    end function ne_rc
+    logical function ne_ci(lhs, rhs)
+        complex(kind=8), intent(in) :: lhs
+        integer, intent(in) :: rhs
+        ne_ci = real(lhs) /= rhs
+    end function ne_ci
+    logical function ne_ic(lhs, rhs)
+        integer, intent(in) :: lhs
+        complex(kind=8), intent(in) :: rhs
+        ne_ic = lhs /= real(rhs)
+    end function ne_ic
+    logical function ne_ir(lhs, rhs)
+        integer, intent(in) :: lhs
+        real(kind=8), intent(in) :: rhs
+        ne_ir = lhs /= rhs
+    end function ne_ir
+    logical function ne_ri(lhs, rhs)
+        real(kind=8), intent(in) :: lhs
+        integer, intent(in) :: rhs
+        ne_ri = lhs /= rhs
+    end function ne_ri
 
 ! floor: the largest integer less than or equal to the argument
-  function floor_c(z)
-    complex(kind=8), intent(in) :: z(:)
-    complex(kind=8) floor_c(size(z))
-    integer n
-    n = size(z)
-    floor_c = floor(real(z(1:n)))
-  end function floor_c
+    function floor_c(z)
+        complex(kind=8), intent(in) :: z(:)
+        complex(kind=8) floor_c(size(z))
+        integer n
+        n = size(z)
+        floor_c = floor(real(z(1:n)))
+    end function floor_c
 
 end module complexify
 
diff --git a/src_cs/modules/precision.F90 b/src_cs/modules/precision.F90
index ee947f6af..e56ce8275 100644
--- a/src_cs/modules/precision.F90
+++ b/src_cs/modules/precision.F90
@@ -1,130 +1,130 @@
 module precision
-  !
-  !       Definition of the kinds used for the integer and real types.
-  !       Due to MPI, it is a bit messy to use the compiler options -r8
-  !       and -r4 and therefore the kind construction is used here,
-  !       where the precision is set using compiler flags of -d type.
-  !       This is the only file of the code that should be changed when
-  !       a user wants single precision instead of double precision. All
-  !       other routines use the definitions in this file whenever
-  !       possible. If other definitions are used, there is a good
-  !       reason to do so, e.g. when calling the cgns or MPI functions.
-  !       The actual types used are determined by compiler flags like
-  !       -DUSE_LONG_INT and -DUSE_SINGLE_PRECISION. If these are
-  !       omitted the default integer and double precision are used.
-  !
-  !
-
-  use complexify
-  use mpi
-  implicit none
-  save
-
-  !
-  !       Definition of the integer type used in the entire code. There
-  !       might be a more elegant solution to do this, but be sure that
-  !       compatability with MPI must be guaranteed. Note that dummyInt
-  !       is a private variable, only used for the definition of the
-  !       integer type. Note furthermore that the parameters defining
-  !       the MPI types are integers. This is because of the definition
-  !       in MPI.
-  !
+    !
+    !       Definition of the kinds used for the integer and real types.
+    !       Due to MPI, it is a bit messy to use the compiler options -r8
+    !       and -r4 and therefore the kind construction is used here,
+    !       where the precision is set using compiler flags of -d type.
+    !       This is the only file of the code that should be changed when
+    !       a user wants single precision instead of double precision. All
+    !       other routines use the definitions in this file whenever
+    !       possible. If other definitions are used, there is a good
+    !       reason to do so, e.g. when calling the cgns or MPI functions.
+    !       The actual types used are determined by compiler flags like
+    !       -DUSE_LONG_INT and -DUSE_SINGLE_PRECISION. If these are
+    !       omitted the default integer and double precision are used.
+    !
+    !
+
+    use complexify
+    use mpi
+    implicit none
+    save
+
+    !
+    !       Definition of the integer type used in the entire code. There
+    !       might be a more elegant solution to do this, but be sure that
+    !       compatability with MPI must be guaranteed. Note that dummyInt
+    !       is a private variable, only used for the definition of the
+    !       integer type. Note furthermore that the parameters defining
+    !       the MPI types are integers. This is because of the definition
+    !       in MPI.
+    !
 
 #ifdef USE_LONG_INT
 
-  ! Long, i.e. 8 byte, integers are used as default integers
+    ! Long, i.e. 8 byte, integers are used as default integers
 
-  integer(kind=8), private :: dummyInt
-  integer, parameter       :: adflow_integer  = mpi_integer8
-  integer, parameter       :: sizeOfInteger = 8
+    integer(kind=8), private :: dummyInt
+    integer, parameter :: adflow_integer = mpi_integer8
+    integer, parameter :: sizeOfInteger = 8
 #else
 
-  ! Standard 4 byte integer types are used as default integers.
+    ! Standard 4 byte integer types are used as default integers.
 
-  integer(kind=4), private :: dummyInt
-  integer, parameter       :: adflow_integer  = mpi_integer4
-  integer, parameter       :: sizeOfInteger = 4
+    integer(kind=4), private :: dummyInt
+    integer, parameter :: adflow_integer = mpi_integer4
+    integer, parameter :: sizeOfInteger = 4
 #endif
 
-  !
-  !       Definition of the float type used in the entire code. The
-  !       remarks mentioned before the integer type definition also
-  !       apply here.
-  !
+    !
+    !       Definition of the float type used in the entire code. The
+    !       remarks mentioned before the integer type definition also
+    !       apply here.
+    !
 
 #ifdef USE_SINGLE_PRECISION
 
-  ! Single precision reals are used as default real types.
+    ! Single precision reals are used as default real types.
 
-  complex(kind=4), private :: dummyReal
-  integer, parameter    :: adflow_real  = MPI_DOUBLE_COMPLEX
-  integer, parameter    :: sizeOfReal = 4
-  real(kind=4), private :: dummyCGNSReal
+    complex(kind=4), private :: dummyReal
+    integer, parameter :: adflow_real = MPI_DOUBLE_COMPLEX
+    integer, parameter :: sizeOfReal = 4
+    real(kind=4), private :: dummyCGNSReal
 
 #elif USE_QUADRUPLE_PRECISION
 
-  ! Quadrupole precision reals are used as default real types.
-  ! This may not be supported on all platforms.
-  ! As cgns does not support quadrupole precision, double
-  ! precision is used instead.
+    ! Quadrupole precision reals are used as default real types.
+    ! This may not be supported on all platforms.
+    ! As cgns does not support quadrupole precision, double
+    ! precision is used instead.
 
-  complex(kind=16), private :: dummyReal
-  integer, parameter     :: adflow_real  = mpi_DOUBLE_COMPLE16
-  integer, parameter     :: sizeOfReal = 16
-  real(kind=8), private :: dummyCGNSReal
+    complex(kind=16), private :: dummyReal
+    integer, parameter :: adflow_real = mpi_DOUBLE_COMPLE16
+    integer, parameter :: sizeOfReal = 16
+    real(kind=8), private :: dummyCGNSReal
 
 #else
 
-  ! Double precision reals are used as default real types.
+    ! Double precision reals are used as default real types.
 
-  complex(kind=8), private :: dummyReal
-  integer, parameter    :: adflow_real   = MPI_DOUBLE_COMPLEX
-  integer, parameter    :: sizeOfReal = 8
-  real(kind=8), private :: dummyCGNSReal
+    complex(kind=8), private :: dummyReal
+    integer, parameter :: adflow_real = MPI_DOUBLE_COMPLEX
+    integer, parameter :: sizeOfReal = 8
+    real(kind=8), private :: dummyCGNSReal
 
 #endif
 
- ! Dummy single and double types
-  complex(kind=4) :: dummySingle
-  complex(kind=8) :: dummyDouble
-
-  !
-  !       Definition of the porosity type. As this is only a flag to
-  !       indicate whether or not fluxes must be computed, an integer1
-  !       is perfectly okay.
-  !
-  integer(kind=1), private :: dummyPor
-
-  !      Definition of the integer type for the element types. As only
-  !      a limited number element types are present, a 1 byte integer
-  !      is enough.
-  !
-  integer(kind=1), private :: adtDummyElementInt
-
-  !       Definition of the cgns periodic type.
-  !
-  real(kind=4), private :: dummyCGNSPer
-  !
-  !       Definition of the kind parameters for the integer and real
-  !       types.
-  !
-  integer, parameter :: intType      = kind(dummyInt)
-  integer, parameter :: porType      = kind(dummyPor)
-  integer, parameter :: realType     = kind(dummyReal)
-  integer, parameter :: adtElementType = kind(adtDummyElementInt)
-  integer, parameter :: cgnsRealType = kind(dummyCGNSReal)
-  integer, parameter :: cgnsPerType  = kind(dummyCGNSPer)
-  integer, parameter :: alwaysRealType = kind(dummyReal)
-  integer, parameter :: singleType   = kind(dummySingle)
-  integer, parameter :: doubleType   = kind(dummyDouble)
-
-  !
-  !       Set the parameter debug, depending on the compiler option.
-  !
+    ! Dummy single and double types
+    complex(kind=4) :: dummySingle
+    complex(kind=8) :: dummyDouble
+
+    !
+    !       Definition of the porosity type. As this is only a flag to
+    !       indicate whether or not fluxes must be computed, an integer1
+    !       is perfectly okay.
+    !
+    integer(kind=1), private :: dummyPor
+
+    !      Definition of the integer type for the element types. As only
+    !      a limited number element types are present, a 1 byte integer
+    !      is enough.
+    !
+    integer(kind=1), private :: adtDummyElementInt
+
+    !       Definition of the cgns periodic type.
+    !
+    real(kind=4), private :: dummyCGNSPer
+    !
+    !       Definition of the kind parameters for the integer and real
+    !       types.
+    !
+    integer, parameter :: intType = kind(dummyInt)
+    integer, parameter :: porType = kind(dummyPor)
+    integer, parameter :: realType = kind(dummyReal)
+    integer, parameter :: adtElementType = kind(adtDummyElementInt)
+    integer, parameter :: cgnsRealType = kind(dummyCGNSReal)
+    integer, parameter :: cgnsPerType = kind(dummyCGNSPer)
+    integer, parameter :: alwaysRealType = kind(dummyReal)
+    integer, parameter :: singleType = kind(dummySingle)
+    integer, parameter :: doubleType = kind(dummyDouble)
+
+    !
+    !       Set the parameter debug, depending on the compiler option.
+    !
 #ifdef DEBUG_MODE
-  logical, parameter :: debug = .true.
+    logical, parameter :: debug = .true.
 #else
-  logical, parameter :: debug = .false.
+    logical, parameter :: debug = .false.
 #endif
 
 end module precision

From 86ab6d3ab2da8cd3609043f47e619890e2cf1fad Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Tue, 21 Feb 2023 14:07:45 +0100
Subject: [PATCH 49/60] rerun fprettify + tapenade

---
 src/adjoint/outputForward/walldistance_d.f90          | 3 ---
 src/adjoint/outputReverse/walldistance_b.f90          | 3 ---
 src/adjoint/outputReverseFast/walldistance_fast_b.f90 | 3 ---
 src/wallDistance/wallDistance.F90                     | 2 --
 4 files changed, 11 deletions(-)

diff --git a/src/adjoint/outputForward/walldistance_d.f90 b/src/adjoint/outputForward/walldistance_d.f90
index 8b9323668..f4c433ed2 100644
--- a/src/adjoint/outputForward/walldistance_d.f90
+++ b/src/adjoint/outputForward/walldistance_d.f90
@@ -158,7 +158,4 @@ subroutine updatewalldistancesquickly(nn, level, sps)
       end do
     end do
   end subroutine updatewalldistancesquickly
-  subroutine updatewallroughness()
-    implicit none
-  end subroutine updatewallroughness
 end module walldistance_d
diff --git a/src/adjoint/outputReverse/walldistance_b.f90 b/src/adjoint/outputReverse/walldistance_b.f90
index ebc81b847..39c8f1c4d 100644
--- a/src/adjoint/outputReverse/walldistance_b.f90
+++ b/src/adjoint/outputReverse/walldistance_b.f90
@@ -189,7 +189,4 @@ subroutine updatewalldistancesquickly(nn, level, sps)
       end if
     end do
   end subroutine updatewalldistancesquickly
-  subroutine updatewallroughness()
-    implicit none
-  end subroutine updatewallroughness
 end module walldistance_b
diff --git a/src/adjoint/outputReverseFast/walldistance_fast_b.f90 b/src/adjoint/outputReverseFast/walldistance_fast_b.f90
index dc7ff4896..4a82e6539 100644
--- a/src/adjoint/outputReverseFast/walldistance_fast_b.f90
+++ b/src/adjoint/outputReverseFast/walldistance_fast_b.f90
@@ -68,7 +68,4 @@ subroutine updatewalldistancesquickly(nn, level, sps)
       end if
     end do
   end subroutine updatewalldistancesquickly
-  subroutine updatewallroughness()
-    implicit none
-  end subroutine updatewallroughness
 end module walldistance_fast_b
diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index c0d2672fc..82861c4dd 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -119,7 +119,6 @@ subroutine updateWallDistancesQuickly(nn, level, sps)
 
     end subroutine updateWallDistancesQuickly
 
-
     ! ----------------------------------------------------------------------
     !                                                                      |
     !                    No Tapenade Routine below this line               |
@@ -328,7 +327,6 @@ subroutine updateWallRoughness()
         end do
     end subroutine updateWallRoughness
 
-
     subroutine computeWallDistance(level, allocMem)
         !
         !       wallDistance computes the distances of the cell centers to

From e154b81f14a5fd370d4574ad17145aabb2de8607 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Tue, 21 Feb 2023 14:09:39 +0100
Subject: [PATCH 50/60] rerun black

---
 tests/reg_tests/test_adjoint.py     | 12 +++---------
 tests/reg_tests/test_functionals.py | 13 +++++--------
 2 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/tests/reg_tests/test_adjoint.py b/tests/reg_tests/test_adjoint.py
index a69006dd7..d422e405a 100644
--- a/tests/reg_tests/test_adjoint.py
+++ b/tests/reg_tests/test_adjoint.py
@@ -214,10 +214,9 @@ def span(val, geo):
         "options": {
             "gridFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
             "restartFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
-            'equationType': 'RANS',
-            'useBlockettes': False,
-            'useRoughSA': True,
-
+            "equationType": "RANS",
+            "useBlockettes": False,
+            "useRoughSA": True,
             "MGCycle": "2w",
             "equationType": "RANS",
             "smoother": "DADI",
@@ -236,14 +235,10 @@ def span(val, geo):
             "NKSwitchTol": 1e-5,
             "NKjacobianlag": 2,
             "L2Convergence": 1e-15,
-
             "adjointL2Convergence": 1e-16,
-
             # to get slightly better complex convergence
             "NKUseEW": False,
             "NKLinearSolveTol": 1e-6,
-
-
         },
         "ref_file": "adjoint_rans_rough_sa.json",
         "aero_prob": ap_tutorial_wing,
@@ -251,7 +246,6 @@ def span(val, geo):
         # "evalFuncs": ["cd"],
         "N_PROCS": 2,
     },
-
 ]
 
 
diff --git a/tests/reg_tests/test_functionals.py b/tests/reg_tests/test_functionals.py
index 6b0c58896..07bd73b3c 100644
--- a/tests/reg_tests/test_functionals.py
+++ b/tests/reg_tests/test_functionals.py
@@ -208,10 +208,9 @@
             "options": {
                 "gridFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
                 "restartFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
-                'equationType': 'RANS',
-                'useBlockettes': False,
-                'useRoughSA': True,
-
+                "equationType": "RANS",
+                "useBlockettes": False,
+                "useRoughSA": True,
                 "MGCycle": "2w",
                 "equationType": "RANS",
                 "smoother": "DADI",
@@ -266,11 +265,9 @@
             },
             "ref_file": "funcs_rans_tut_wing.json",
             "aero_prob": ap_tutorial_wing,
-            "no_train": True,       # This test should not be able to over-write
-                                    # the training file as it is coming from a different test
+            "no_train": True,  # This test should not be able to over-write
+            # the training file as it is coming from a different test
         },
-
-
     ]
 )
 class TestFunctionals(reg_test_classes.RegTest):

From 2ae3fe634954681b344d0ff4ac57f6d3e6eff615 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Tue, 21 Feb 2023 14:34:50 +0100
Subject: [PATCH 51/60] retrain SA-rough test

---
 tests/reg_tests/refs/funcs_rans_rough_sa.json | 557 ++++++++++++------
 1 file changed, 382 insertions(+), 175 deletions(-)

diff --git a/tests/reg_tests/refs/funcs_rans_rough_sa.json b/tests/reg_tests/refs/funcs_rans_rough_sa.json
index ccf3b126e..2bce0b7da 100644
--- a/tests/reg_tests/refs/funcs_rans_rough_sa.json
+++ b/tests/reg_tests/refs/funcs_rans_rough_sa.json
@@ -1,39 +1,48 @@
 {
     "Dot product test for (w, xV) -> (dw, F)": 25052510162.91482,
-    "Dot product test for Xv -> R": -7422557.015466748,
-    "Dot product test for w -> F": 48678.90552195662,
+    "Dot product test for Xv -> R": -7422557.015466751,
+    "Dot product test for w -> F": 48678.90552195639,
     "Dot product test for w -> R": 25061252292.45091,
-    "Dot product test for xV -> F": -1368251.4261434954,
+    "Dot product test for xV -> F": -1368251.4261434958,
     "Eval Functions:": {
-        "mdo_tutorial_cd": 0.024566400785789793,
-        "mdo_tutorial_cfx": 0.011668173003364507,
-        "mdo_tutorial_cfy": 0.4108141827884017,
-        "mdo_tutorial_cfz": 0.0075654034410337,
-        "mdo_tutorial_cl": 0.41024496461720367,
-        "mdo_tutorial_cmx": -0.7922014856117325,
+        "mdo_tutorial_cd": 0.0245664007857898,
+        "mdo_tutorial_cfx": 0.01166817300336451,
+        "mdo_tutorial_cfy": 0.4108141827884018,
+        "mdo_tutorial_cfz": 0.007565403441033702,
+        "mdo_tutorial_cl": 0.4102449646172038,
+        "mdo_tutorial_cmx": -0.7922014856117326,
         "mdo_tutorial_cmy": -0.008981003314113318,
         "mdo_tutorial_cmz": 0.6295255266022379,
-        "mdo_tutorial_drag": 10015.230272350784,
-        "mdo_tutorial_fx": 4756.880770011643,
-        "mdo_tutorial_fy": 167480.72603917564,
-        "mdo_tutorial_fz": 3084.2636748406194,
-        "mdo_tutorial_lift": 167248.66717514166,
-        "mdo_tutorial_mx": -1049635.2803761212,
-        "mdo_tutorial_my": -11899.470151067582,
-        "mdo_tutorial_mz": 834096.1417269013,
+        "mdo_tutorial_cofxx": 3.3323101324392925,
+        "mdo_tutorial_cofxy": -0.1348493497186727,
+        "mdo_tutorial_cofxz": 1.6012560760241181,
+        "mdo_tutorial_cofyx": 4.97642146149654,
+        "mdo_tutorial_cofyy": 0.2601267269394276,
+        "mdo_tutorial_cofyz": 6.271918158078769,
+        "mdo_tutorial_cofzx": 6.327751594707445,
+        "mdo_tutorial_cofzy": 0.25617991212816227,
+        "mdo_tutorial_cofzz": 10.432780012944532,
+        "mdo_tutorial_drag": 10015.230272350786,
+        "mdo_tutorial_fx": 4756.880770011644,
+        "mdo_tutorial_fy": 167480.72603917567,
+        "mdo_tutorial_fz": 3084.26367484062,
+        "mdo_tutorial_lift": 167248.6671751417,
+        "mdo_tutorial_mx": -1049635.2803761214,
+        "mdo_tutorial_my": -11899.470151067584,
+        "mdo_tutorial_mz": 834096.1417269012,
         "mdo_tutorial_sepsensor": 0.016301194542570726,
         "mdo_tutorial_sepsensoravgx": 0.09540342694812158,
         "mdo_tutorial_sepsensoravgy": 9.994583497283502e-05,
-        "mdo_tutorial_sepsensoravgz": 0.05019620000103363
+        "mdo_tutorial_sepsensoravgz": 0.05019620000103362
     },
-    "Norm of residual": 2.287162877608185e-16,
-    "Norm of state vector": 472.577787711008,
-    "Sum of Forces x": 4756.880770011647,
+    "Norm of residual": 2.287162877608165e-16,
+    "Norm of state vector": 472.57778771100806,
+    "Sum of Forces x": 4756.880770011646,
     "Sum of Forces y": 167480.72603917564,
-    "Sum of Forces z": 3084.263674840622,
-    "Sum of Tractions x": 336771.10695490666,
+    "Sum of Forces z": 3084.263674840623,
+    "Sum of Tractions x": 336771.1069549067,
     "Sum of Tractions y": 1650799.5101746526,
-    "Sum of Tractions z": 279940.140789498,
+    "Sum of Tractions z": 279940.1407894979,
     "Total number of adjoint state DOF": {
         "__ndarray__": 145152,
         "dtype": "int64",
@@ -50,18 +59,27 @@
         "shape": []
     },
     "dFuncs/dP": {
-        "cd": 1.0255863078366303e-07,
-        "cfx": 1.0254457111038081e-07,
-        "cfy": 2.0585057746076828e-09,
-        "cfz": 3.0809570252248843e-09,
-        "cl": -1.163512791647045e-09,
-        "cmx": -5.0258036886875666e-09,
-        "cmy": 1.8366346879812804e-07,
-        "cmz": 2.0700897670852983e-09,
+        "cd": 1.0255863078366282e-07,
+        "cfx": 1.025445711103807e-07,
+        "cfy": 2.0585057746041135e-09,
+        "cfz": 3.0809570252249344e-09,
+        "cl": -1.1635127916506094e-09,
+        "cmx": -5.02580368867878e-09,
+        "cmy": 1.836634687981281e-07,
+        "cmz": 2.0700897670896914e-09,
+        "cofxx": 1.0375012939594107e-05,
+        "cofxy": 1.4128496198936042e-06,
+        "cofxz": 3.848381803761703e-05,
+        "cofyx": -2.0907153334620347e-09,
+        "cofyy": 4.510954938890027e-12,
+        "cofyz": 7.602063203330032e-09,
+        "cofzx": -4.1846282243704764e-07,
+        "cofzy": -1.4398784783818334e-07,
+        "cofzz": -2.2729288082730607e-06,
         "drag": 0.542572616215423,
         "fx": 0.27964940925086224,
         "fy": 8.374875513592976,
-        "fz": 0.15546922830207469,
+        "fz": 0.1554692283020747,
         "lift": 8.361959017862185,
         "mx": -52.48842300766143,
         "my": -0.3516267579346115,
@@ -72,63 +90,90 @@
         "sepsensoravgz": 0.0
     },
     "dFuncs/dT": {
-        "cd": -6.434671811482668e-21,
-        "cfx": -6.433182828244117e-21,
+        "cd": -6.4346718114826674e-21,
+        "cfx": -6.433182828244116e-21,
         "cfy": -1.4846411730779536e-22,
-        "cfz": -1.846656102488088e-22,
-        "cl": 5.368029669547891e-23,
+        "cfz": -1.8466561024880875e-22,
+        "cl": 5.368029669547889e-23,
         "cmx": 3.5502025762938107e-22,
-        "cmy": -1.2030208599706458e-20,
-        "cmz": -1.3651932049147963e-22,
-        "drag": -2.6232870041052545e-15,
-        "fx": -2.6226799754185616e-15,
+        "cmy": -1.2030208599706455e-20,
+        "cmz": -1.3651932049147965e-22,
+        "cofxx": -7.112142957869289e-19,
+        "cofxy": -9.293132415071333e-20,
+        "cofxz": -2.5449799394819826e-18,
+        "cofyx": 1.9060681131220013e-22,
+        "cofyy": 1.393419545416948e-23,
+        "cofyz": -4.828727908497158e-22,
+        "cofzx": 3.572271393378157e-20,
+        "cofzy": 9.463827478678352e-21,
+        "cofzz": 1.5205639797937836e-19,
+        "drag": -2.623287004105254e-15,
+        "fx": -2.622679975418561e-15,
         "fy": -6.052585134404201e-17,
-        "fz": -7.528447598623437e-17,
-        "lift": 2.1884383356812845e-17,
+        "fz": -7.528447598623434e-17,
+        "lift": 2.1884383356812833e-17,
         "mx": 4.703876405486247e-16,
-        "my": -1.5939545186267067e-14,
-        "mz": -1.8088263887839085e-16,
+        "my": -1.5939545186267064e-14,
+        "mz": -1.8088263887839087e-16,
         "sepsensor": 0.0,
         "sepsensoravgx": 0.0,
         "sepsensoravgy": 0.0,
         "sepsensoravgz": 0.0
     },
     "dFuncs/dXv * xVDot": {
-        "cd": 4.252968273207523,
-        "cfx": 4.249035399663728,
+        "cd": 4.2529682732075225,
+        "cfx": 4.249035399663727,
         "cfy": 0.19195702983176227,
-        "cfz": 0.3506482601505134,
-        "cl": 0.058396883371890924,
-        "cmx": -1.0927832721306252,
+        "cfz": 0.35064826015051337,
+        "cl": 0.05839688337189095,
+        "cmx": -1.092783272130625,
         "cmy": -1.1999451606004616,
-        "cmz": 1.3595113636997629,
-        "drag": 1733850.105621243,
-        "fx": 1732246.7517349084,
+        "cmz": 1.359511363699763,
+        "cofxx": -488.09187989891836,
+        "cofxy": -76.51059615419035,
+        "cofxz": -739.7568018827054,
+        "cofyx": 4.862135728695633,
+        "cofyy": 0.37025124340482296,
+        "cofyz": 5.697393456600477,
+        "cofzx": -19.40543030804781,
+        "cofzy": -12.803792657294439,
+        "cofzz": -140.92641161911112,
+        "drag": 1733850.1056212427,
+        "fx": 1732246.7517349082,
         "fy": 78257.04192181284,
-        "fz": 142952.28269816132,
-        "lift": 23807.241413052492,
-        "mx": -1447894.124242193,
-        "my": -1589879.3399891877,
-        "mz": 1801298.1764476378,
+        "fz": 142952.2826981613,
+        "lift": 23807.2414130525,
+        "mx": -1447894.1242421928,
+        "my": -1589879.3399891874,
+        "mz": 1801298.176447638,
         "sepsensor": -0.11694957310130713,
         "sepsensoravgx": -0.6029863600829567,
-        "sepsensoravgy": 0.007262037328436887,
-        "sepsensoravgz": -0.09612647922522867
+        "sepsensoravgy": 0.007262037328436888,
+        "sepsensoravgz": -0.09612647922522868
     },
     "dFuncs/dalpha": {
-        "cd": 0.007160125372297845,
+        "cd": 0.007160125372297846,
         "cfx": 0.0,
         "cfy": 0.0,
         "cfz": 0.0,
-        "cl": -0.0004287645790765541,
+        "cl": -0.0004287645790765542,
         "cmx": 0.0,
         "cmy": 0.0,
         "cmz": 0.0,
-        "drag": 2919.0399117783854,
+        "cofxx": 0.0,
+        "cofxy": 0.0,
+        "cofxz": 0.0,
+        "cofyx": 0.0,
+        "cofyy": 0.0,
+        "cofyz": 0.0,
+        "cofzx": 0.0,
+        "cofzy": 0.0,
+        "cofzz": 0.0,
+        "drag": 2919.039911778386,
         "fx": 0.0,
         "fy": 0.0,
         "fz": 0.0,
-        "lift": -174.79874359792956,
+        "lift": -174.79874359792962,
         "mx": 0.0,
         "my": 0.0,
         "mz": 0.0,
@@ -138,36 +183,54 @@
         "sepsensoravgz": 0.006150040814983931
     },
     "dFuncs/dbeta": {
-        "cd": -0.007561670371126665,
+        "cd": -0.007561670371126666,
         "cfx": 0.0,
         "cfy": 0.0,
         "cfz": 0.0,
-        "cl": 0.00023763506481515236,
+        "cl": 0.0002376350648151524,
         "cmx": 0.0,
         "cmy": 0.0,
         "cmz": 0.0,
-        "drag": -3082.741776900919,
+        "cofxx": 0.0,
+        "cofxy": 0.0,
+        "cofxz": 0.0,
+        "cofyx": 0.0,
+        "cofyy": 0.0,
+        "cofyz": 0.0,
+        "cofzx": 0.0,
+        "cofzy": 0.0,
+        "cofzz": 0.0,
+        "drag": -3082.7417769009194,
         "fx": 0.0,
         "fy": 0.0,
         "fz": 0.0,
-        "lift": 96.87906322384131,
+        "lift": 96.87906322384133,
         "mx": 0.0,
         "my": 0.0,
         "mz": 0.0,
         "sepsensor": 0.18136049395962298,
-        "sepsensoravgx": 1.106075491578891,
+        "sepsensoravgx": 1.1060754915788906,
         "sepsensoravgy": 0.0011646306998538993,
-        "sepsensoravgz": 0.7192972767550524
+        "sepsensoravgz": 0.7192972767550525
     },
     "dFuncs/dmach": {
-        "cd": -0.06141600196447448,
-        "cfx": -0.02917043250841127,
-        "cfy": -1.0270354569710043,
-        "cfz": -0.018913508602584252,
-        "cl": -1.0256124115430092,
-        "cmx": 1.9805037140293313,
-        "cmy": 0.022452508285283296,
-        "cmz": -1.573813816505595,
+        "cd": -0.06141600196447449,
+        "cfx": -0.029170432508411278,
+        "cfy": -1.0270354569710045,
+        "cfz": -0.018913508602584256,
+        "cl": -1.0256124115430094,
+        "cmx": 1.9805037140293318,
+        "cmy": 0.0224525082852833,
+        "cmz": -1.5738138165055948,
+        "cofxx": 0.0,
+        "cofxy": 0.0,
+        "cofxz": 0.0,
+        "cofyx": 0.0,
+        "cofyy": 0.0,
+        "cofyz": 0.0,
+        "cofzx": 0.0,
+        "cofzy": 0.0,
+        "cofzz": 0.0,
         "drag": 0.0,
         "fx": 0.0,
         "fy": 0.0,
@@ -184,23 +247,32 @@
     "dFuncs/dw * wDot": {
         "cd": 0.031101767260431203,
         "cfx": 0.028800395325415784,
-        "cfy": 0.07371942797663707,
-        "cfz": 0.021716000987844,
-        "cl": 0.0727784096101363,
-        "cmx": -0.15842697054164168,
+        "cfy": 0.07371942797663704,
+        "cfz": 0.02171600098784401,
+        "cl": 0.07277840961013628,
+        "cmx": -0.15842697054164165,
         "cmy": 0.005024771647054986,
-        "cmz": 0.12014382033187082,
-        "drag": 12679.568476732593,
+        "cmz": 0.12014382033187078,
+        "cofxx": -5.883673815216705,
+        "cofxy": 0.4009727991396246,
+        "cofxz": 10.115850378144957,
+        "cofyx": 0.05940247196308512,
+        "cofyy": -0.3350265708291385,
+        "cofyz": 0.12791459575704778,
+        "cofzx": 1.3755158785313164,
+        "cofzy": -0.7322158660616289,
+        "cofzz": -15.884837396918174,
+        "drag": 12679.568476732591,
         "fx": 11741.345166265506,
-        "fy": 30053.936397515397,
-        "fz": 8853.179282724243,
-        "lift": 29670.30202986037,
-        "mx": -209909.39888885355,
+        "fy": 30053.936397515386,
+        "fz": 8853.179282724246,
+        "lift": 29670.30202986036,
+        "mx": -209909.39888885352,
         "my": 6657.621441481974,
-        "mz": 159185.75618691556,
+        "mz": 159185.7561869155,
         "sepsensor": -12.044419253289895,
-        "sepsensoravgx": -68.10234813562754,
-        "sepsensoravgy": -0.08084780944322403,
+        "sepsensoravgx": -68.10234813562751,
+        "sepsensoravgy": -0.08084780944322405,
         "sepsensoravgz": -28.399498948465343
     },
     "dFuncs/dxRef": {
@@ -210,16 +282,25 @@
         "cfz": 0.0,
         "cl": 0.0,
         "cmx": 0.0,
-        "cmy": 0.002327816443394985,
+        "cmy": 0.0023278164433949856,
         "cmz": -0.12640436393489288,
+        "cofxx": 0.0,
+        "cofxy": 0.0,
+        "cofxz": 0.0,
+        "cofyx": 0.0,
+        "cofyy": 0.0,
+        "cofyz": 0.0,
+        "cofzx": 0.0,
+        "cofzy": 0.0,
+        "cofzz": 0.0,
         "drag": 0.0,
         "fx": 0.0,
         "fy": 0.0,
         "fz": 0.0,
         "lift": 0.0,
         "mx": 0.0,
-        "my": 3084.2636748406194,
-        "mz": -167480.72603917564,
+        "my": 3084.26367484062,
+        "mz": -167480.72603917567,
         "sepsensor": 0.0,
         "sepsensoravgx": 0.0,
         "sepsensoravgy": 0.0,
@@ -231,17 +312,26 @@
         "cfy": 0.0,
         "cfz": 0.0,
         "cl": 0.0,
-        "cmx": -0.002327816443394985,
+        "cmx": -0.0023278164433949856,
         "cmy": 0.0,
-        "cmz": 0.0035902070779583106,
+        "cmz": 0.0035902070779583115,
+        "cofxx": 0.0,
+        "cofxy": 0.0,
+        "cofxz": 0.0,
+        "cofyx": 0.0,
+        "cofyy": 0.0,
+        "cofyz": 0.0,
+        "cofzx": 0.0,
+        "cofzy": 0.0,
+        "cofzz": 0.0,
         "drag": 0.0,
         "fx": 0.0,
         "fy": 0.0,
         "fz": 0.0,
         "lift": 0.0,
-        "mx": -3084.2636748406194,
+        "mx": -3084.26367484062,
         "my": 0.0,
-        "mz": 4756.880770011643,
+        "mz": 4756.880770011644,
         "sepsensor": 0.0,
         "sepsensoravgx": 0.0,
         "sepsensoravgy": 0.0,
@@ -254,15 +344,24 @@
         "cfz": 0.0,
         "cl": 0.0,
         "cmx": 0.12640436393489288,
-        "cmy": -0.0035902070779583106,
+        "cmy": -0.0035902070779583115,
         "cmz": 0.0,
+        "cofxx": 0.0,
+        "cofxy": 0.0,
+        "cofxz": 0.0,
+        "cofyx": 0.0,
+        "cofyy": 0.0,
+        "cofyz": 0.0,
+        "cofzx": 0.0,
+        "cofzy": 0.0,
+        "cofzz": 0.0,
         "drag": 0.0,
         "fx": 0.0,
         "fy": 0.0,
         "fz": 0.0,
         "lift": 0.0,
-        "mx": 167480.72603917564,
-        "my": -4756.880770011643,
+        "mx": 167480.72603917567,
+        "my": -4756.880770011644,
         "mz": 0.0,
         "sepsensor": 0.0,
         "sepsensoravgx": 0.0,
@@ -504,8 +603,8 @@
     "||FBar^T * dF/dXv||": 47500739.33793808,
     "||FBar^T * dF/dw||": 159520.82502357475,
     "||FBar^T * dF/xDv||": {
-        "P_mdo_tutorial": 57.286099642092886,
-        "T_mdo_tutorial": 9.947598300641403e-14,
+        "P_mdo_tutorial": 57.28609964209285,
+        "T_mdo_tutorial": 2.842170943040401e-14,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -519,35 +618,35 @@
     "||dF/dalpha||": 0.0,
     "||dF/dbeta||": 0.0,
     "||dF/dmach||": 0.0,
-    "||dF/dw * wDot||": 131479.675920046,
+    "||dF/dw * wDot||": 131479.67592004602,
     "||dF/dxRef||": 0.0,
     "||dF/dyRef||": 0.0,
     "||dF/dzRef||": 0.0,
     "||dR/dP||": 0.07356605182246512,
     "||dR/dT||": 9.22063005758926e-07,
     "||dR/dXv * xVDot||": 26736790.15992888,
-    "||dR/dalpha||": 0.010271084828244875,
-    "||dR/dbeta||": 0.6070879776942454,
+    "||dR/dalpha||": 0.010271084828244873,
+    "||dR/dbeta||": 0.6070879776942455,
     "||dR/dmach||": 0.9283860217641269,
     "||dR/dw * wDot||": 2695502679.7876015,
     "||dR/dxRef||": 0.0,
     "||dR/dyRef||": 0.0,
     "||dR/dzRef||": 0.0,
     "||dcd/dXdv||": {
-        "P_mdo_tutorial": 1.0255863078366315e-07,
-        "T_mdo_tutorial": 6.776263578034403e-21,
-        "alpha_mdo_tutorial": 0.007160125372297843,
-        "beta_mdo_tutorial": -0.00013197604492666193,
+        "P_mdo_tutorial": 1.0255863078366487e-07,
+        "T_mdo_tutorial": -3.3881317890172014e-21,
+        "alpha_mdo_tutorial": 0.007160125372297845,
+        "beta_mdo_tutorial": -0.00013197604492666196,
         "mach_mdo_tutorial": -0.06141600196447446,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcd/dXv||": 27.91337076794424,
+    "||dcd/dXv||": 27.913370767944244,
     "||dcd/dw||": 0.014161257652762545,
     "||dcfx/dXdv||": {
-        "P_mdo_tutorial": 1.0254457111038001e-07,
-        "T_mdo_tutorial": 1.0164395367051604e-20,
+        "P_mdo_tutorial": 1.0254457111038414e-07,
+        "T_mdo_tutorial": 1.8634724839594607e-20,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": -0.02917043250841126,
@@ -556,13 +655,13 @@
         "zRef_mdo_tutorial": 0.0
     },
     "||dcfx/dXv||": 27.912716195114633,
-    "||dcfx/dw||": 0.014090362780761697,
+    "||dcfx/dw||": 0.014090362780761695,
     "||dcfy/dXdv||": {
-        "P_mdo_tutorial": 2.058505774664427e-09,
-        "T_mdo_tutorial": 1.376428539288238e-21,
+        "P_mdo_tutorial": 2.058505774630354e-09,
+        "T_mdo_tutorial": 1.138200522872966e-21,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
-        "mach_mdo_tutorial": -1.0270354569710038,
+        "mach_mdo_tutorial": -1.027035456971004,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
@@ -570,11 +669,11 @@
     "||dcfy/dXv||": 2.7931029821306077,
     "||dcfy/dw||": 0.11354907277670545,
     "||dcfz/dXdv||": {
-        "P_mdo_tutorial": 3.0809570252252938e-09,
-        "T_mdo_tutorial": 6.352747104407253e-22,
+        "P_mdo_tutorial": 3.080957025225676e-09,
+        "T_mdo_tutorial": -1.0587911840678754e-22,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
-        "mach_mdo_tutorial": -0.018913508602584256,
+        "mach_mdo_tutorial": -0.01891350860258426,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
@@ -582,10 +681,10 @@
     "||dcfz/dXv||": 2.519605387991788,
     "||dcfz/dw||": 0.007655345743081828,
     "||dcl/dXdv||": {
-        "P_mdo_tutorial": -1.1635127915569029e-09,
-        "T_mdo_tutorial": 1.5881867761018131e-22,
-        "alpha_mdo_tutorial": -0.000428764579076554,
-        "beta_mdo_tutorial": 4.147514299214541e-06,
+        "P_mdo_tutorial": -1.163512791603402e-09,
+        "T_mdo_tutorial": 6.352747104407253e-22,
+        "alpha_mdo_tutorial": -0.0004287645790765541,
+        "beta_mdo_tutorial": 4.1475142992145416e-06,
         "mach_mdo_tutorial": -1.0256124115430092,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
@@ -594,46 +693,154 @@
     "||dcl/dXv||": 2.786553790809974,
     "||dcl/dw||": 0.11354025292131026,
     "||dcmx/dXdv||": {
-        "P_mdo_tutorial": -5.02580368859844e-09,
-        "T_mdo_tutorial": 3.1763735522036263e-21,
+        "P_mdo_tutorial": -5.025803688610319e-09,
+        "T_mdo_tutorial": -2.6469779601696886e-22,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
-        "mach_mdo_tutorial": 1.9805037140293307,
+        "mach_mdo_tutorial": 1.9805037140293311,
         "xRef_mdo_tutorial": 0.0,
-        "yRef_mdo_tutorial": -0.002327816443394987,
+        "yRef_mdo_tutorial": -0.0023278164433949873,
         "zRef_mdo_tutorial": 0.12640436393489274
     },
     "||dcmx/dXv||": 5.614216937125317,
     "||dcmx/dw||": 0.21272941416694607,
     "||dcmy/dXdv||": {
-        "P_mdo_tutorial": 1.8366346879813387e-07,
-        "T_mdo_tutorial": -2.0328790734103208e-20,
+        "P_mdo_tutorial": 1.8366346879813508e-07,
+        "T_mdo_tutorial": -5.759824041329242e-20,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
-        "mach_mdo_tutorial": 0.022452508285283296,
-        "xRef_mdo_tutorial": 0.002327816443394987,
+        "mach_mdo_tutorial": 0.022452508285283303,
+        "xRef_mdo_tutorial": 0.0023278164433949873,
         "yRef_mdo_tutorial": 0.0,
-        "zRef_mdo_tutorial": -0.0035902070779583175
+        "zRef_mdo_tutorial": -0.003590207077958318
     },
     "||dcmy/dXv||": 54.53814156559552,
     "||dcmy/dw||": 0.033839731077361684,
     "||dcmz/dXdv||": {
-        "P_mdo_tutorial": 2.070089767240118e-09,
-        "T_mdo_tutorial": 2.117582368135751e-22,
+        "P_mdo_tutorial": 2.0700897671846038e-09,
+        "T_mdo_tutorial": 4.450231695535289e-22,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
-        "mach_mdo_tutorial": -1.5738138165055944,
+        "mach_mdo_tutorial": -1.5738138165055942,
         "xRef_mdo_tutorial": -0.12640436393489274,
-        "yRef_mdo_tutorial": 0.0035902070779583175,
+        "yRef_mdo_tutorial": 0.003590207077958318,
         "zRef_mdo_tutorial": 0.0
     },
     "||dcmz/dXv||": 3.669722758336583,
     "||dcmz/dw||": 0.1701587174303123,
+    "||dcofxx/dXdv||": {
+        "P_mdo_tutorial": 1.0375012939594005e-05,
+        "T_mdo_tutorial": 0.0,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcofxx/dXv||": 3932.154034943831,
+    "||dcofxx/dw||": 2.443894963885114,
+    "||dcofxy/dXdv||": {
+        "P_mdo_tutorial": 1.4128496198935706e-06,
+        "T_mdo_tutorial": 9.486769009248164e-20,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcofxy/dXv||": 623.1593133276403,
+    "||dcofxy/dw||": 0.23025338834961062,
+    "||dcofxz/dXdv||": {
+        "P_mdo_tutorial": 3.848381803761737e-05,
+        "T_mdo_tutorial": -5.204170427930421e-18,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcofxz/dXv||": 11552.298850230323,
+    "||dcofxz/dw||": 5.698262110691421,
+    "||dcofyx/dXdv||": {
+        "P_mdo_tutorial": -2.0907153332361424e-09,
+        "T_mdo_tutorial": 2.541098841762901e-21,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcofyx/dXv||": 12.79568795413275,
+    "||dcofyx/dw||": 0.4192809957752443,
+    "||dcofyy/dXdv||": {
+        "P_mdo_tutorial": 4.510954947116929e-12,
+        "T_mdo_tutorial": 3.1763735522036263e-22,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcofyy/dXv||": 1.9032269476611579,
+    "||dcofyy/dw||": 0.0825146038902352,
+    "||dcofyz/dXdv||": {
+        "P_mdo_tutorial": 7.602063203684432e-09,
+        "T_mdo_tutorial": 2.8587361969832636e-21,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcofyz/dXv||": 20.03084467833954,
+    "||dcofyz/dw||": 0.8509322500970162,
+    "||dcofzx/dXdv||": {
+        "P_mdo_tutorial": -4.184628224365662e-07,
+        "T_mdo_tutorial": 2.1006417091906648e-19,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcofzx/dXv||": 827.0135715836628,
+    "||dcofzx/dw||": 2.862119939722488,
+    "||dcofzy/dXdv||": {
+        "P_mdo_tutorial": -1.4398784783814194e-07,
+        "T_mdo_tutorial": 1.6093625997831706e-20,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcofzy/dXv||": 83.78696877492817,
+    "||dcofzy/dw||": 0.2947788448865528,
+    "||dcofzz/dXdv||": {
+        "P_mdo_tutorial": -2.2729288082728007e-06,
+        "T_mdo_tutorial": -9.486769009248164e-20,
+        "alpha_mdo_tutorial": 0.0,
+        "beta_mdo_tutorial": 0.0,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcofzz/dXv||": 1682.371794945297,
+    "||dcofzz/dw||": 5.947434235310889,
     "||ddrag/dXdv||": {
-        "P_mdo_tutorial": 0.5425726162154224,
-        "T_mdo_tutorial": 1.7763568394002505e-15,
-        "alpha_mdo_tutorial": 2919.039911778386,
-        "beta_mdo_tutorial": -53.803993995701546,
+        "P_mdo_tutorial": 0.5425726162154232,
+        "T_mdo_tutorial": -2.6645352591003757e-15,
+        "alpha_mdo_tutorial": 2919.0399117783863,
+        "beta_mdo_tutorial": -53.80399399570155,
         "mach_mdo_tutorial": 0.0,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
@@ -642,8 +849,8 @@
     "||ddrag/dXv||": 11379722.994675511,
     "||ddrag/dw||": 5773.261519878235,
     "||dfx/dXdv||": {
-        "P_mdo_tutorial": 0.279649409250863,
-        "T_mdo_tutorial": -1.0658141036401503e-14,
+        "P_mdo_tutorial": 0.27964940925086246,
+        "T_mdo_tutorial": -2.6645352591003757e-15,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -654,8 +861,8 @@
     "||dfx/dXv||": 11379456.138424335,
     "||dfx/dw||": 5744.359098460929,
     "||dfy/dXdv||": {
-        "P_mdo_tutorial": 8.374875513592972,
-        "T_mdo_tutorial": 8.326672684688674e-17,
+        "P_mdo_tutorial": 8.374875513592984,
+        "T_mdo_tutorial": 9.71445146547012e-17,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -666,7 +873,7 @@
     "||dfy/dXv||": 1138692.2237550062,
     "||dfy/dw||": 46291.685989607286,
     "||dfz/dXdv||": {
-        "P_mdo_tutorial": 0.1554692283020747,
+        "P_mdo_tutorial": 0.15546922830207502,
         "T_mdo_tutorial": -5.551115123125783e-17,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
@@ -676,12 +883,12 @@
         "zRef_mdo_tutorial": 0.0
     },
     "||dfz/dXv||": 1027192.7245764921,
-    "||dfz/dw||": 3120.931352539599,
+    "||dfz/dw||": 3120.9313525395996,
     "||dlift/dXdv||": {
-        "P_mdo_tutorial": 8.361959017862194,
-        "T_mdo_tutorial": -2.914335439641036e-16,
-        "alpha_mdo_tutorial": -174.7987435979296,
-        "beta_mdo_tutorial": 1.6908586295037842,
+        "P_mdo_tutorial": 8.361959017862176,
+        "T_mdo_tutorial": -1.6653345369377348e-16,
+        "alpha_mdo_tutorial": -174.79874359792962,
+        "beta_mdo_tutorial": 1.6908586295037844,
         "mach_mdo_tutorial": 0.0,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
@@ -690,8 +897,8 @@
     "||dlift/dXv||": 1136022.2494374104,
     "||dlift/dw||": 46288.09031095977,
     "||dmx/dXdv||": {
-        "P_mdo_tutorial": -52.48842300766153,
-        "T_mdo_tutorial": -3.9968028886505635e-15,
+        "P_mdo_tutorial": -52.48842300766147,
+        "T_mdo_tutorial": -4.6074255521944e-15,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -699,28 +906,28 @@
         "yRef_mdo_tutorial": -3084.263674840622,
         "zRef_mdo_tutorial": 167480.72603917567
     },
-    "||dmx/dXv||": 7438612.873013561,
+    "||dmx/dXv||": 7438612.87301356,
     "||dmx/dw||": 281857.96459463687,
     "||dmy/dXdv||": {
-        "P_mdo_tutorial": -0.35162675793460774,
-        "T_mdo_tutorial": 8.526512829121202e-14,
+        "P_mdo_tutorial": -0.35162675793461595,
+        "T_mdo_tutorial": 6.750155989720952e-14,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
         "xRef_mdo_tutorial": 3084.263674840622,
         "yRef_mdo_tutorial": 0.0,
-        "zRef_mdo_tutorial": -4756.880770011648
+        "zRef_mdo_tutorial": -4756.880770011646
     },
     "||dmy/dXv||": 72260856.04875143,
     "||dmy/dw||": 44836.290088261136,
     "||dmz/dXdv||": {
         "P_mdo_tutorial": 41.70754987248287,
-        "T_mdo_tutorial": -6.661338147750939e-16,
+        "T_mdo_tutorial": -7.042977312465837e-16,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
         "xRef_mdo_tutorial": -167480.72603917567,
-        "yRef_mdo_tutorial": 4756.880770011648,
+        "yRef_mdo_tutorial": 4756.880770011646,
         "zRef_mdo_tutorial": 0.0
     },
     "||dmz/dXv||": 4862235.8658856405,
@@ -728,8 +935,8 @@
     "||dsepsensor/dXdv||": {
         "P_mdo_tutorial": 0.0,
         "T_mdo_tutorial": 0.0,
-        "alpha_mdo_tutorial": 0.0008890133278288007,
-        "beta_mdo_tutorial": 0.003165337752638709,
+        "alpha_mdo_tutorial": 0.0008890133278288006,
+        "beta_mdo_tutorial": 0.00316533775263871,
         "mach_mdo_tutorial": 0.0,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
@@ -741,31 +948,31 @@
         "P_mdo_tutorial": 0.0,
         "T_mdo_tutorial": 0.0,
         "alpha_mdo_tutorial": 0.0062118492074274565,
-        "beta_mdo_tutorial": 0.01930465910366647,
+        "beta_mdo_tutorial": 0.01930465910366646,
         "mach_mdo_tutorial": 0.0,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dsepsensoravgx/dXv||": 3.355503366983782,
+    "||dsepsensoravgx/dXv||": 3.3555033669837817,
     "||dsepsensoravgx/dw||": 42.297878810572435,
     "||dsepsensoravgy/dXdv||": {
         "P_mdo_tutorial": 0.0,
         "T_mdo_tutorial": 0.0,
-        "alpha_mdo_tutorial": 1.1629154598405565e-06,
-        "beta_mdo_tutorial": 2.032664028225635e-05,
+        "alpha_mdo_tutorial": 1.1629154598405582e-06,
+        "beta_mdo_tutorial": 2.0326640282256375e-05,
         "mach_mdo_tutorial": 0.0,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dsepsensoravgy/dXv||": 0.0037955898018984654,
-    "||dsepsensoravgy/dw||": 0.07333646978426119,
+    "||dsepsensoravgy/dXv||": 0.003795589801898465,
+    "||dsepsensoravgy/dw||": 0.07333646978426117,
     "||dsepsensoravgz/dXdv||": {
         "P_mdo_tutorial": 0.0,
         "T_mdo_tutorial": 0.0,
-        "alpha_mdo_tutorial": 0.006150040814983931,
-        "beta_mdo_tutorial": 0.012554105780004536,
+        "alpha_mdo_tutorial": 0.006150040814983934,
+        "beta_mdo_tutorial": 0.01255410578000454,
         "mach_mdo_tutorial": 0.0,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
@@ -773,14 +980,14 @@
     },
     "||dsepsensoravgz/dXv||": 3.3049236735056238,
     "||dsepsensoravgz/dw||": 10.598482329462021,
-    "||dwBar^T * dR/dXv||": 111601096.04341303,
-    "||dwBar^T * dR/dw||": 2217397593.2762733,
+    "||dwBar^T * dR/dXv||": 111601096.043413,
+    "||dwBar^T * dR/dw||": 2217397593.2762737,
     "||dwBar^T * dR/xDv||": {
-        "P_mdo_tutorial": 0.8893628179225441,
-        "T_mdo_tutorial": -7.760276361068463e-06,
+        "P_mdo_tutorial": 0.8893628179225485,
+        "T_mdo_tutorial": -7.76027680160496e-06,
         "alpha_mdo_tutorial": -0.0020004837586182936,
-        "beta_mdo_tutorial": -0.025156969826717016,
-        "mach_mdo_tutorial": -3.2833710241496035,
+        "beta_mdo_tutorial": -0.02515696982671701,
+        "mach_mdo_tutorial": -3.283371024149604,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0

From 7ca276b953ed8404587f58fdf20df00a3db4f3d6 Mon Sep 17 00:00:00 2001
From: andv <andv@zhaw.ch>
Date: Tue, 21 Feb 2023 16:35:02 +0100
Subject: [PATCH 52/60] rerun black with version 23.1.0

---
 tests/reg_tests/test_adjoint.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/reg_tests/test_adjoint.py b/tests/reg_tests/test_adjoint.py
index d422e405a..47ff0d4ae 100644
--- a/tests/reg_tests/test_adjoint.py
+++ b/tests/reg_tests/test_adjoint.py
@@ -424,7 +424,6 @@ def cmplx_test_geom_dvs(self):
         atol = 5e-9
 
         for dv in ["span", "twist", "shape"]:
-
             xRef[dv][0] += self.h * 1j
 
             self.CFDSolver.resetFlow(self.ap)

From eb85158aca99a407f38166bdde9964d708fdbbaf Mon Sep 17 00:00:00 2001
From: David Anderegg <andv@zhaw.ch>
Date: Fri, 20 Oct 2023 10:48:13 -0400
Subject: [PATCH 53/60] update rought test and retrain

---
 .../reg_tests/refs/adjoint_rans_rough_sa.json | 873 +++++++++---------
 tests/reg_tests/test_functionals.py           |   2 +
 2 files changed, 449 insertions(+), 426 deletions(-)

diff --git a/tests/reg_tests/refs/adjoint_rans_rough_sa.json b/tests/reg_tests/refs/adjoint_rans_rough_sa.json
index e0b3421db..b7af623a2 100644
--- a/tests/reg_tests/refs/adjoint_rans_rough_sa.json
+++ b/tests/reg_tests/refs/adjoint_rans_rough_sa.json
@@ -1,86 +1,86 @@
 {
     "Eval Functions Sens:": {
         "mdo_tutorial_cd": {
-            "P_mdo_tutorial": 1.1906967920306818e-08,
-            "T_mdo_tutorial": 5.703210137725844e-08,
-            "alpha_mdo_tutorial": 0.005064588210334571,
-            "beta_mdo_tutorial": -0.0008845056168120849,
-            "mach_mdo_tutorial": 0.09103183719324323,
+            "P_mdo_tutorial": 1.1906967920303641e-08,
+            "T_mdo_tutorial": 5.7032101376918994e-08,
+            "alpha_mdo_tutorial": 0.005064588210334737,
+            "beta_mdo_tutorial": -0.0008845056168120727,
+            "mach_mdo_tutorial": 0.09103183719324591,
             "shape": {
                 "__ndarray__": [
                     [
-                        0.0014588467683968598,
-                        -0.00014951327884760212,
-                        -0.015675496092974146,
-                        -0.014075985970051375,
-                        0.0013474930848888166,
-                        0.0018275829889606954,
-                        -0.012563965621770756,
-                        -0.010466958623448562,
-                        0.0013093995527863652,
-                        -0.0032557348500170855,
-                        0.002277825768411675,
-                        0.006688700916670462,
-                        -0.0018114377012302114,
-                        0.005977863740562657,
-                        0.009835326794037596,
-                        0.007136308037146596,
-                        0.00051486740314912,
-                        -0.001967845348703995,
-                        0.002182197087400649,
-                        0.004428495984520651,
-                        0.00434797688639819,
-                        0.003642262509593754,
-                        0.003191110798437138,
-                        0.004015682626633113,
-                        0.0020419681575339715,
-                        0.002166621981060197,
-                        0.0019894668966113013,
-                        0.0016303554139183803,
-                        -0.019657990220678254,
-                        -0.020829878357254417,
-                        -0.020784582903082136,
-                        -0.01888479845407782,
-                        -0.00036947254958874,
-                        -0.00031304699767892383,
-                        -0.0003057421005384802,
-                        0.0002688066991534009,
-                        -0.017918305571589913,
-                        -0.018865736825431162,
-                        -0.01859183680327999,
-                        -0.0164887342965249,
-                        0.00091265077701028,
-                        0.0008121102434689316,
-                        0.0008901049453480803,
-                        0.0010405908745272647,
-                        -0.004064652115740351,
-                        -0.0037963135756424967,
-                        -0.003204166967230152,
-                        -0.0026293301388228283,
-                        0.003333173380565299,
-                        0.0034921446557946326,
-                        0.0033575207726116423,
-                        0.0030107476981468774,
-                        0.0074402621899147,
-                        0.007319652738596415,
-                        0.006915763888469176,
-                        0.006162240091788922,
-                        0.00042023051048211465,
-                        0.00313156137607408,
-                        0.005383636958473503,
-                        0.006824954530041592,
-                        0.010375635834801573,
-                        0.01198796332413367,
-                        0.011950339779730068,
-                        0.009432494720749723,
-                        0.011250799431237779,
-                        0.009854867862598673,
-                        0.008009279266530044,
-                        0.005465377456483178,
-                        0.006177656586328,
-                        0.005047038528601074,
-                        0.004344575288306905,
-                        0.004048991557116562
+                        0.0014588467683956273,
+                        -0.00014951327884643356,
+                        -0.015675496092977064,
+                        -0.01407598597005118,
+                        0.0013474930848856718,
+                        0.0018275829889551137,
+                        -0.012563965621769828,
+                        -0.010466958623447776,
+                        0.001309399552783985,
+                        -0.0032557348500175582,
+                        0.002277825768411138,
+                        0.006688700916671499,
+                        -0.001811437701229527,
+                        0.005977863740563417,
+                        0.009835326794039072,
+                        0.0071363080371478,
+                        0.0005148674031452701,
+                        -0.00196784534869978,
+                        0.0021821970874052266,
+                        0.0044284959845190385,
+                        0.004347976886409987,
+                        0.0036422625095888,
+                        0.00319111079843394,
+                        0.004015682626633313,
+                        0.0020419681575322636,
+                        0.0021666219810591615,
+                        0.0019894668966112822,
+                        0.0016303554139134088,
+                        -0.01965799022068064,
+                        -0.020829878357256484,
+                        -0.020784582903082428,
+                        -0.018884798454076847,
+                        -0.00036947254958799624,
+                        -0.0003130469976779008,
+                        -0.00030574210053769873,
+                        0.00026880669914821536,
+                        -0.01791830557159001,
+                        -0.018865736825431564,
+                        -0.01859183680328005,
+                        -0.01648873429652418,
+                        0.0009126507770085303,
+                        0.0008121102434698606,
+                        0.0008901049453491454,
+                        0.0010405908745254582,
+                        -0.004064652115740942,
+                        -0.0037963135756442002,
+                        -0.0032041669672322024,
+                        -0.0026293301388217154,
+                        0.0033331733805644533,
+                        0.003492144655793166,
+                        0.0033575207726121146,
+                        0.003010747698152013,
+                        0.007440262189915593,
+                        0.007319652738596754,
+                        0.006915763888467916,
+                        0.006162240091787263,
+                        0.00042023051048307514,
+                        0.0031315613760729832,
+                        0.005383636958473412,
+                        0.006824954530055509,
+                        0.010375635834802659,
+                        0.011987963324134878,
+                        0.011950339779730942,
+                        0.009432494720746398,
+                        0.011250799431240419,
+                        0.00985486786260258,
+                        0.008009279266531904,
+                        0.005465377456478378,
+                        0.006177656586328975,
+                        0.0050470385286012194,
+                        0.004344575288306137,
+                        0.004048991557116653
                     ]
                 ],
                 "dtype": "float64",
@@ -89,16 +89,16 @@
                     72
                 ]
             },
-            "span": 0.0022966478428800424,
+            "span": 0.002296647842880653,
             "twist": {
                 "__ndarray__": [
                     [
-                        -0.0009085026908665598,
-                        -0.0011352775427239743,
-                        -0.0011130263520413698,
-                        -0.0009513309718359052,
-                        -0.000686800358123642,
-                        -0.00031510515859730894
+                        -0.0009085026908666076,
+                        -0.001135277542723973,
+                        -0.0011130263520414156,
+                        -0.0009513309718359675,
+                        -0.0006868003581235842,
+                        -0.00031510515859724665
                     ]
                 ],
                 "dtype": "float64",
@@ -112,86 +112,86 @@
             "zRef_mdo_tutorial": 0.0
         },
         "mdo_tutorial_cl": {
-            "P_mdo_tutorial": -2.2527598084618665e-07,
-            "T_mdo_tutorial": -1.3233798184444104e-07,
-            "alpha_mdo_tutorial": 0.11733097471393325,
-            "beta_mdo_tutorial": -0.005417244776871979,
-            "mach_mdo_tutorial": 0.4461752831328847,
+            "P_mdo_tutorial": -2.2527598084617437e-07,
+            "T_mdo_tutorial": -1.3233798184113253e-07,
+            "alpha_mdo_tutorial": 0.11733097471392283,
+            "beta_mdo_tutorial": -0.005417244776871343,
+            "mach_mdo_tutorial": 0.44617528313282584,
             "shape": {
                 "__ndarray__": [
                     [
-                        0.000664512668947459,
-                        -0.009764051446158363,
-                        -0.38014815803026536,
-                        -0.3170998471501844,
-                        0.01262978825010657,
-                        0.023153961286768835,
-                        -0.19944001682277823,
-                        -0.169191221601281,
-                        0.035149811661999326,
-                        0.06625711164122611,
-                        0.09120798114669897,
-                        0.1595909431983622,
-                        0.0784037050728186,
-                        0.06061704069028219,
-                        0.04711837791039026,
-                        0.16186094246740526,
-                        0.027871468602502482,
-                        0.032536469116873794,
-                        0.041772976243304574,
-                        0.058567756425349775,
-                        0.03130211475024963,
-                        0.02349065555604541,
-                        0.04666963738532008,
-                        0.07533987952044943,
-                        0.008047492664585834,
-                        0.01353940603382281,
-                        0.01504377805258511,
-                        0.01571595124130868,
-                        -0.4638501532002112,
-                        -0.47399094658617624,
-                        -0.44823619262467873,
-                        -0.3688999595192084,
-                        0.008718247472931123,
-                        0.029550002917121544,
-                        0.043126333367589316,
-                        0.04434156612727114,
-                        -0.3956395006967429,
-                        -0.4079460890282394,
-                        -0.38620462938522176,
-                        -0.3163656383524239,
-                        0.06398782335322896,
-                        0.07479424398972617,
-                        0.07286141978007313,
-                        0.05846132788211428,
-                        0.08736644102793703,
-                        0.08697238548655578,
-                        0.08008641195454812,
-                        0.06426734524155492,
-                        0.10936456518484935,
-                        0.10829983178785824,
-                        0.10157510763622173,
-                        0.0825523738580159,
-                        0.1743947516759498,
-                        0.1692317944353174,
-                        0.15458475447925518,
-                        0.12034837358470284,
-                        0.11961118306482939,
-                        0.12724594458071078,
-                        0.11338927137762066,
-                        0.07770667985489071,
-                        0.056709733905460566,
-                        0.03836441806462418,
-                        0.024469816046477105,
-                        0.019976154473583273,
-                        0.04934741117499243,
-                        0.051117751963949146,
-                        0.0563070196687687,
-                        0.06050160881794041,
-                        0.18336405571947617,
-                        0.1839640798107625,
-                        0.17295069839871852,
-                        0.1403137146906817
+                        0.0006645126688766545,
+                        -0.009764051446174316,
+                        -0.38014815803031066,
+                        -0.3170998471501314,
+                        0.012629788250227033,
+                        0.023153961286857292,
+                        -0.19944001682278154,
+                        -0.16919122160128627,
+                        0.035149811662053616,
+                        0.06625711164122668,
+                        0.09120798114669953,
+                        0.1595909431983964,
+                        0.0784037050728588,
+                        0.06061704069025734,
+                        0.047118377910311315,
+                        0.16186094246745536,
+                        0.027871468602449247,
+                        0.03253646911690112,
+                        0.04177297624327168,
+                        0.05856775642534094,
+                        0.03130211475000047,
+                        0.023490655556126543,
+                        0.04666963738539817,
+                        0.07533987952041057,
+                        0.00804749266451707,
+                        0.01353940603375206,
+                        0.015043778052562018,
+                        0.015715951241480397,
+                        -0.46385015320022327,
+                        -0.47399094658618945,
+                        -0.4482361926246763,
+                        -0.36889995951921006,
+                        0.008718247472936355,
+                        0.029550002917111684,
+                        0.04312633336755671,
+                        0.04434156612737814,
+                        -0.3956395006966927,
+                        -0.40794608902821317,
+                        -0.38620462938522016,
+                        -0.3163656383524376,
+                        0.06398782335327348,
+                        0.07479424398976865,
+                        0.07286141978007882,
+                        0.058461327882023476,
+                        0.08736644102793592,
+                        0.08697238548657082,
+                        0.08008641195458757,
+                        0.06426734524160192,
+                        0.10936456518485274,
+                        0.10829983178787791,
+                        0.10157510763623859,
+                        0.08255237385799354,
+                        0.17439475167596127,
+                        0.1692317944353169,
+                        0.15458475447923542,
+                        0.12034837358468198,
+                        0.1196111830648549,
+                        0.1272459445807717,
+                        0.11338927137762347,
+                        0.07770667985456997,
+                        0.05670973390543475,
+                        0.03836441806456059,
+                        0.024469816046396933,
+                        0.01997615447360168,
+                        0.049347411174940806,
+                        0.05111775196390402,
+                        0.056307019668804616,
+                        0.06050160881807444,
+                        0.18336405571950376,
+                        0.18396407981080587,
+                        0.172950698398754,
+                        0.1403137146906595
                     ]
                 ],
                 "dtype": "float64",
@@ -200,16 +200,16 @@
                     72
                 ]
             },
-            "span": 0.043457680475263105,
+            "span": 0.04345768047526699,
             "twist": {
                 "__ndarray__": [
                     [
-                        -0.024654343292180775,
-                        -0.02875083974451341,
-                        -0.025904215715540878,
-                        -0.02015846260061425,
-                        -0.01272474899214707,
-                        -0.004816129705518564
+                        -0.02465434329217739,
+                        -0.028750839744510325,
+                        -0.02590421571553876,
+                        -0.020158462600612555,
+                        -0.012724748992147882,
+                        -0.00481612970551939
                     ]
                 ],
                 "dtype": "float64",
@@ -223,86 +223,86 @@
             "zRef_mdo_tutorial": 0.0
         },
         "mdo_tutorial_cmz": {
-            "P_mdo_tutorial": -3.6562060386387263e-07,
-            "T_mdo_tutorial": -2.3220688985930304e-07,
-            "alpha_mdo_tutorial": 0.15587229388606796,
-            "beta_mdo_tutorial": -0.00960134679308337,
-            "mach_mdo_tutorial": 0.8704444540175647,
+            "P_mdo_tutorial": -3.656206038639332e-07,
+            "T_mdo_tutorial": -2.3220688985427506e-07,
+            "alpha_mdo_tutorial": 0.15587229388605317,
+            "beta_mdo_tutorial": -0.009601346793082532,
+            "mach_mdo_tutorial": 0.8704444540175915,
             "shape": {
                 "__ndarray__": [
                     [
-                        -0.02904712834133137,
-                        -0.04708047818363903,
-                        -0.49032482175390585,
-                        -0.401316554603264,
-                        0.009545464221693022,
-                        0.02672530112488119,
-                        -0.44647159525464397,
-                        -0.37531454266609365,
-                        0.017827410830409764,
-                        0.08962439141045295,
-                        0.13469818406324752,
-                        0.23088231518746674,
-                        0.06582830189607722,
-                        0.07763781029298673,
-                        0.09150772425797266,
-                        0.2507770031456443,
-                        0.051190462308772576,
-                        0.07611282984010259,
-                        0.10139818246299213,
-                        0.14288689493950352,
-                        0.05984752177317515,
-                        0.06087119675464407,
-                        0.11793996845148985,
-                        0.18543384750328865,
-                        -0.035792580712111305,
-                        -0.029063640559724546,
-                        -0.01801377651314351,
-                        -0.0024105812920647285,
-                        -0.6512115102017961,
-                        -0.7548563235266409,
-                        -0.8101129239208131,
-                        -0.7525332782418399,
-                        -0.04452557149988305,
-                        -0.01709954630398286,
-                        0.016810542144914467,
-                        0.041434039606613654,
-                        -0.5458096626601696,
-                        -0.6405871067374069,
-                        -0.6906119609438893,
-                        -0.6398074456856482,
-                        0.05479140879876329,
-                        0.08331778931396966,
-                        0.09984183171688307,
-                        0.09540804372093747,
-                        0.13061487068539895,
-                        0.1467662689081624,
-                        0.15217931951376726,
-                        0.13677491289715185,
-                        0.17617158652365011,
-                        0.19557374496959673,
-                        0.20395944940757538,
-                        0.18339268089443714,
-                        0.27865713047217044,
-                        0.3033889798196525,
-                        0.30782550157857647,
-                        0.26624801836986134,
-                        0.12544590383563262,
-                        0.16080952345936206,
-                        0.16870887439203017,
-                        0.1338404820039562,
-                        0.08919775817735803,
-                        0.07838533829431213,
-                        0.06617921442228045,
-                        0.061226194962847716,
-                        0.1122797518483396,
-                        0.12801585006675376,
-                        0.14524155735561856,
-                        0.15534721746744484,
-                        0.31101156418400344,
-                        0.34532436046246506,
-                        0.3561771297416096,
-                        0.3173191720433378
+                        -0.029047128341428835,
+                        -0.047080478183655516,
+                        -0.4903248217539634,
+                        -0.4013165546031968,
+                        0.00954546422194532,
+                        0.026725301125078336,
+                        -0.4464715952546496,
+                        -0.3753145426661034,
+                        0.0178274108304839,
+                        0.08962439141045284,
+                        0.13469818406324813,
+                        0.23088231518750404,
+                        0.06582830189612904,
+                        0.07763781029295294,
+                        0.09150772425787565,
+                        0.25077700314569784,
+                        0.05119046230867218,
+                        0.07611282984016528,
+                        0.10139818246290813,
+                        0.14288689493948598,
+                        0.0598475217726408,
+                        0.06087119675482909,
+                        0.11793996845165507,
+                        0.18543384750320266,
+                        -0.03579258071221712,
+                        -0.029063640559842896,
+                        -0.0180137765131618,
+                        -0.0024105812916838942,
+                        -0.6512115102018172,
+                        -0.7548563235266618,
+                        -0.8101129239208076,
+                        -0.7525332782418444,
+                        -0.04452557149987375,
+                        -0.017099546303999347,
+                        0.01681054214487729,
+                        0.04143403960686618,
+                        -0.5458096626601037,
+                        -0.6405871067373664,
+                        -0.6906119609438919,
+                        -0.6398074456856779,
+                        0.0547914087988338,
+                        0.08331778931403921,
+                        0.09984183171688447,
+                        0.09540804372074849,
+                        0.13061487068540045,
+                        0.14676626890819855,
+                        0.152179319513855,
+                        0.13677491289725968,
+                        0.17617158652365938,
+                        0.19557374496963953,
+                        0.20395944940760774,
+                        0.18339268089437616,
+                        0.2786571304721808,
+                        0.3033889798196467,
+                        0.30782550157853655,
+                        0.2662480183698179,
+                        0.12544590383567195,
+                        0.16080952345946356,
+                        0.16870887439199655,
+                        0.1338404820032475,
+                        0.08919775817731485,
+                        0.07838533829420004,
+                        0.06617921442213574,
+                        0.061226194962906565,
+                        0.11227975184827244,
+                        0.1280158500666968,
+                        0.14524155735570896,
+                        0.1553472174677229,
+                        0.31101156418403875,
+                        0.3453243604625382,
+                        0.35617712974166693,
+                        0.3173191720432818
                     ]
                 ],
                 "dtype": "float64",
@@ -311,16 +311,16 @@
                     72
                 ]
             },
-            "span": 0.06413599860398977,
+            "span": 0.06413599860398511,
             "twist": {
                 "__ndarray__": [
                     [
-                        -0.024967200640317398,
-                        -0.03216020114952957,
-                        -0.03408738804686446,
-                        -0.03126988598549719,
-                        -0.022967856493619272,
-                        -0.009658516327858274
+                        -0.024967200640313977,
+                        -0.032160201149526224,
+                        -0.03408738804686076,
+                        -0.031269885985493504,
+                        -0.022967856493621857,
+                        -0.009658516327860061
                     ]
                 ],
                 "dtype": "float64",
@@ -329,91 +329,91 @@
                     6
                 ]
             },
-            "xRef_mdo_tutorial": -0.12640436393489507,
-            "yRef_mdo_tutorial": 0.003590207077957915,
+            "xRef_mdo_tutorial": -0.12640436393489274,
+            "yRef_mdo_tutorial": 0.0035902070779581328,
             "zRef_mdo_tutorial": 0.0
         },
         "mdo_tutorial_drag": {
-            "P_mdo_tutorial": 0.505615746299267,
-            "T_mdo_tutorial": 0.02325084708947911,
-            "alpha_mdo_tutorial": 2064.7313215891973,
-            "beta_mdo_tutorial": -360.59524986195044,
-            "mach_mdo_tutorial": 62149.93506781718,
+            "P_mdo_tutorial": 0.5056157462992771,
+            "T_mdo_tutorial": 0.023250847089342996,
+            "alpha_mdo_tutorial": 2064.731321589265,
+            "beta_mdo_tutorial": -360.59524986194486,
+            "mach_mdo_tutorial": 62149.935067818835,
             "shape": {
                 "__ndarray__": [
                     [
-                        594.7426505400322,
-                        -60.95357352058238,
-                        -6390.58624718373,
-                        -5738.497960270588,
-                        549.345980847465,
-                        745.0690329394852,
-                        -5122.077504683481,
-                        -4267.169691607504,
-                        533.8160096799684,
-                        -1327.2979836548932,
-                        928.6240092660958,
-                        2726.849589708214,
-                        -738.4869220374837,
-                        2437.055489752636,
-                        4009.6660273932557,
-                        2909.330060583923,
-                        209.90114291584837,
-                        -802.2511917596289,
-                        889.6381085914871,
-                        1805.409242969377,
-                        1772.583217046823,
-                        1484.8775799111868,
-                        1300.9520503068206,
-                        1637.1134932257708,
-                        832.4695784634872,
-                        883.2884492386564,
-                        811.0658644105258,
-                        664.6632951462427,
-                        -8014.169453166152,
-                        -8491.924808685506,
-                        -8473.45875792854,
-                        -7698.954633758449,
-                        -150.62656901632556,
-                        -127.62300001372478,
-                        -124.64493954750833,
-                        109.5871151108405,
-                        -7304.9348154258005,
-                        -7691.183588991789,
-                        -7579.520027961214,
-                        -6722.127198007299,
-                        372.06946877170367,
-                        331.081104057561,
-                        362.87798411953634,
-                        424.22808772726233,
-                        -1657.0773745448485,
-                        -1547.6811185177953,
-                        -1306.2747892003838,
-                        -1071.9253109952701,
-                        1358.86812378893,
-                        1423.6775332743846,
-                        1368.7940685782762,
-                        1227.4216215805127,
-                        3033.246089584422,
-                        2984.076028470986,
-                        2819.418622051113,
-                        2512.2220406205115,
-                        171.3195745134269,
-                        1276.6749417978572,
-                        2194.8011152304007,
-                        2782.397462807346,
-                        4229.939217131979,
-                        4887.252887982736,
-                        4871.914521400283,
-                        3845.4394477552814,
-                        4586.725912127057,
-                        4017.6325302242367,
-                        3265.2229713789347,
-                        2228.12508145904,
-                        2518.507037114204,
-                        2057.576667340112,
-                        1771.196453536957,
-                        1650.6928780052563
+                        594.7426505395426,
+                        -60.95357352009637,
+                        -6390.5862471848795,
+                        -5738.497960270486,
+                        549.3459808461944,
+                        745.0690329372169,
+                        -5122.077504683097,
+                        -4267.16969160718,
+                        533.8160096790439,
+                        -1327.2979836550487,
+                        928.6240092658941,
+                        2726.849589708642,
+                        -738.4869220372068,
+                        2437.0554897528896,
+                        4009.6660273938514,
+                        2909.330060584425,
+                        209.9011429142525,
+                        -802.2511917579388,
+                        889.6381085933683,
+                        1805.409242968729,
+                        1772.5832170516571,
+                        1484.8775799091954,
+                        1300.9520503055649,
+                        1637.1134932258697,
+                        832.4695784627652,
+                        883.2884492382128,
+                        811.0658644104851,
+                        664.6632951442172,
+                        -8014.169453167122,
+                        -8491.924808686392,
+                        -8473.458757928694,
+                        -7698.954633758089,
+                        -150.62656901599314,
+                        -127.62300001332085,
+                        -124.644939547219,
+                        109.58711510874411,
+                        -7304.934815425823,
+                        -7691.1835889919685,
+                        -7579.520027961285,
+                        -6722.127198006985,
+                        372.06946877091474,
+                        331.0811040579845,
+                        362.87798412001905,
+                        424.22808772649057,
+                        -1657.0773745451215,
+                        -1547.6811185183667,
+                        -1306.2747892010625,
+                        -1071.9253109948243,
+                        1358.8681237885708,
+                        1423.67753327383,
+                        1368.7940685785716,
+                        1227.4216215826539,
+                        3033.246089584791,
+                        2984.076028471124,
+                        2819.4186220506044,
+                        2512.2220406198417,
+                        171.3195745137724,
+                        1276.6749417974427,
+                        2194.801115230459,
+                        2782.397462813064,
+                        4229.9392171322925,
+                        4887.252887983364,
+                        4871.914521400789,
+                        3845.439447753965,
+                        4586.725912128072,
+                        4017.6325302258406,
+                        3265.222971379758,
+                        2228.1250814571545,
+                        2518.507037114592,
+                        2057.5766673401545,
+                        1771.1964535366642,
+                        1650.6928780053324
                     ]
                 ],
                 "dtype": "float64",
@@ -422,16 +422,16 @@
                     72
                 ]
             },
-            "span": 936.2973925853767,
+            "span": 936.2973925855713,
             "twist": {
                 "__ndarray__": [
                     [
-                        -370.37837701247804,
-                        -462.82994861770794,
-                        -453.7585832002331,
-                        -387.8386105980703,
-                        -279.9947699998464,
-                        -128.46207105695308
+                        -370.3783770124885,
+                        -462.82994861771,
+                        -453.75858320025156,
+                        -387.83861059809044,
+                        -279.994769999822,
+                        -128.46207105692523
                     ]
                 ],
                 "dtype": "float64",
@@ -445,86 +445,86 @@
             "zRef_mdo_tutorial": 0.0
         },
         "mdo_tutorial_lift": {
-            "P_mdo_tutorial": 8.270592846885847,
-            "T_mdo_tutorial": -0.05395154843834504,
-            "alpha_mdo_tutorial": 47833.49177137628,
-            "beta_mdo_tutorial": -2208.502350635163,
-            "mach_mdo_tutorial": 600018.4073654747,
+            "P_mdo_tutorial": 8.270592846885688,
+            "T_mdo_tutorial": -0.053951548436925734,
+            "alpha_mdo_tutorial": 47833.491771372086,
+            "beta_mdo_tutorial": -2208.5023506349153,
+            "mach_mdo_tutorial": 600018.4073654441,
             "shape": {
                 "__ndarray__": [
                     [
-                        270.908524876344,
-                        -3980.608493570493,
-                        -154978.80106577938,
-                        -129275.2656861883,
-                        5148.912073803472,
-                        9439.40693739,
-                        -81307.7060583101,
-                        -68975.87722240997,
-                        14329.875218364088,
-                        27011.699273895196,
-                        37183.66975388621,
-                        65062.03572310845,
-                        31963.622484085907,
-                        24712.35514861459,
-                        19209.22030650783,
-                        65987.46902511171,
-                        11362.640319868198,
-                        13264.467729567019,
-                        17030.006954870347,
-                        23876.902939486547,
-                        12761.246141381704,
-                        9576.670457088181,
-                        19026.277769246855,
-                        30714.562082896737,
-                        3280.801809498298,
-                        5519.745051868973,
-                        6133.04743647798,
-                        6407.079002056791,
-                        -189102.43045666313,
-                        -193236.62910425203,
-                        -182736.93100922933,
-                        -150393.13549679087,
-                        3554.2551297638493,
-                        12046.945189252045,
-                        17581.743587298828,
-                        18077.169678765833,
-                        -161294.31164404895,
-                        -166311.46157503212,
-                        -157447.90330776715,
-                        -128975.94344351538,
-                        26086.555824644354,
-                        30492.117389731626,
-                        29704.143615940033,
-                        23833.51415098023,
-                        35617.55067826964,
-                        35456.90211515942,
-                        32649.62842562994,
-                        26200.51130807692,
-                        44585.74593455944,
-                        44151.67542327428,
-                        41410.13988113458,
-                        33654.951774435576,
-                        71097.25236325164,
-                        68992.41795539077,
-                        63021.112706102766,
-                        49063.62494301142,
-                        48763.08711186904,
-                        51875.626686665215,
-                        46226.53815522764,
-                        31679.45924324079,
-                        23119.42431857829,
-                        15640.40595658914,
-                        9975.854605827382,
-                        8143.878655788661,
-                        20117.952587818672,
-                        20839.685120662565,
-                        22955.24577856176,
-                        24665.295882896382,
-                        74753.85823571598,
-                        74998.47605725183,
-                        70508.5407231889,
-                        57203.095205096804
+                        270.9085248480369,
+                        -3980.608493575877,
+                        -154978.8010657982,
+                        -129275.2656861672,
+                        5148.912073852452,
+                        9439.406937425761,
+                        -81307.7060583119,
+                        -68975.87722241263,
+                        14329.875218386798,
+                        27011.699273895967,
+                        37183.66975388664,
+                        65062.035723122026,
+                        31963.622484102856,
+                        24712.35514860382,
+                        19209.220306474916,
+                        65987.46902513114,
+                        11362.640319846467,
+                        13264.467729578304,
+                        17030.006954857097,
+                        23876.902939482974,
+                        12761.246141280051,
+                        9576.670457121938,
+                        19026.277769279437,
+                        30714.56208288091,
+                        3280.801809470795,
+                        5519.745051840175,
+                        6133.047436468623,
+                        6407.079002126651,
+                        -189102.4304566693,
+                        -193236.62910425806,
+                        -182736.93100922904,
+                        -150393.13549679253,
+                        3554.255129766774,
+                        12046.945189247395,
+                        17581.743587285422,
+                        18077.169678809474,
+                        -161294.31164402986,
+                        -166311.46157502144,
+                        -157447.90330776767,
+                        -128975.94344352269,
+                        26086.555824663454,
+                        30492.11738974947,
+                        29704.14361594276,
+                        23833.514150943432,
+                        35617.55067826984,
+                        35456.902115165794,
+                        32649.62842564629,
+                        26200.511308096204,
+                        44585.74593456105,
+                        44151.67542328246,
+                        41410.13988114162,
+                        33654.95177442676,
+                        71097.25236325563,
+                        68992.41795539009,
+                        63021.1127060946,
+                        49063.624943003015,
+                        48763.08711187819,
+                        51875.62668668677,
+                        46226.538155227645,
+                        31679.459243110636,
+                        23119.424318565656,
+                        15640.405956559225,
+                        9975.85460579268,
+                        8143.8786557970925,
+                        20117.95258779807,
+                        20839.68512064548,
+                        22955.245778577548,
+                        24665.29588295175,
+                        74753.85823572593,
+                        74998.4760572695,
+                        70508.54072320399,
+                        57203.09520508784
                     ]
                 ],
                 "dtype": "float64",
@@ -533,16 +533,16 @@
                     72
                 ]
             },
-            "span": 17716.82717615593,
+            "span": 17716.82717615395,
             "twist": {
                 "__ndarray__": [
                     [
-                        -10051.082673356312,
-                        -11721.142347043358,
-                        -10560.630662911663,
-                        -8218.202033018604,
-                        -5187.625669118539,
-                        -1963.4397583458262
+                        -10051.082673355177,
+                        -11721.14234704246,
+                        -10560.630662910786,
+                        -8218.202033017795,
+                        -5187.62566911897,
+                        -1963.439758346148
                     ]
                 ],
                 "dtype": "float64",
@@ -556,12 +556,12 @@
             "zRef_mdo_tutorial": 0.0
         }
     },
-    "Norm of residual": 7.43989104883568e-13,
+    "Norm of residual": 3.237191827215383e-14,
     "metadata": {
         "ADPC": false,
-        "AGMGLevels": 1,
-        "AGMGNSmooth": 3,
         "ANKADPC": false,
+        "ANKAMGLevels": 2,
+        "ANKAMGNSmooth": 1,
         "ANKASMOverlap": 1,
         "ANKCFL0": 5.0,
         "ANKCFLCutback": 0.5,
@@ -569,18 +569,23 @@
         "ANKCFLFactor": 10.0,
         "ANKCFLLimit": 100000.0,
         "ANKCFLMin": 1.0,
+        "ANKCFLReset": true,
+        "ANKCharTimeStepType": "None",
         "ANKConstCFLStep": 0.4,
         "ANKCoupledSwitchTol": 1e-16,
+        "ANKGlobalPreconditioner": "additive Schwarz",
         "ANKInnerPreconIts": 1,
         "ANKJacobianLag": 10,
         "ANKLinResMax": 0.1,
+        "ANKLinearSolveBuffer": 0.01,
         "ANKLinearSolveTol": 0.05,
         "ANKMaxIter": 40,
         "ANKNSubiterTurb": 1,
         "ANKOuterPreconIts": 1,
         "ANKPCILUFill": 2,
-        "ANKPCUpdateCutoff": 1e-06,
+        "ANKPCUpdateCutoff": 1e-16,
         "ANKPCUpdateTol": 0.5,
+        "ANKPCUpdateTolAfterCutoff": 0.0001,
         "ANKPhysicalLSTol": 0.2,
         "ANKPhysicalLSTolTurb": 0.99,
         "ANKSecondOrdSwitchTol": 0.01,
@@ -607,8 +612,11 @@
         "MGCycle": "2w",
         "MGStartLevel": -1,
         "NKADPC": false,
+        "NKAMGLevels": 2,
+        "NKAMGNSmooth": 1,
         "NKASMOverlap": 1,
         "NKFixedStep": 0.25,
+        "NKGlobalPreconditioner": "additive Schwarz",
         "NKInnerPreconIts": 1,
         "NKJacobianLag": 2,
         "NKLS": "cubic",
@@ -621,6 +629,9 @@
         "NKViscPC": false,
         "RKReset": false,
         "TSStability": false,
+        "acousticScaleFactor": 1.0,
+        "adjointAMGLevels": 2,
+        "adjointAMGNSmooth": 1,
         "adjointDivTol": 100000.0,
         "adjointL2Convergence": 1e-16,
         "adjointL2ConvergenceAbs": 1e-16,
@@ -653,6 +664,9 @@
         "deltaT": 0.01,
         "designSurfaceFamily": null,
         "discretization": "central plus scalar dissipation",
+        "dissContMagnitude": 1.0,
+        "dissContMidpoint": 3.0,
+        "dissContSharpness": 3.0,
         "dissipationLumpingParameter": 6.0,
         "dissipationScalingExponent": 0.67,
         "eddyVisInfRatio": 0.009,
@@ -669,6 +683,8 @@
         "gridPrecision": "double",
         "gridPrecisionSurface": "single",
         "infChangeCorrection": true,
+        "infChangeCorrectionTol": 1e-12,
+        "infChangeCorrectionType": "offset",
         "innerPreconIts": 1,
         "isoVariables": [],
         "isosurface": {},
@@ -681,6 +697,7 @@
         "machMode": false,
         "matrixOrdering": "RCM",
         "maxL2DeviationFactor": 1.0,
+        "meshMaxSkewness": 1.0,
         "meshSurfaceFamily": null,
         "monitorVariables": [
             "resrho",
@@ -714,8 +731,10 @@
         "partitionOnly": false,
         "preconditionerSide": "right",
         "printAllOptions": true,
+        "printBadlySkewedCells": false,
         "printIntro": true,
         "printIterations": true,
+        "printNegativeVolumes": false,
         "printTiming": true,
         "printWarnings": true,
         "qMode": false,
@@ -755,6 +774,7 @@
         "useApproxWallDistance": true,
         "useBlockettes": false,
         "useDiagTSPC": true,
+        "useDissContinuation": false,
         "useExternalDynamicMesh": false,
         "useGridMotion": false,
         "useLinResMonitor": false,
@@ -764,6 +784,7 @@
         "useQCR": false,
         "useRotationSA": false,
         "useRoughSA": true,
+        "useSkewnessCheck": false,
         "useTSInterpolatedGridVelocity": false,
         "useWallFunctions": false,
         "useZipperMesh": true,
diff --git a/tests/reg_tests/test_functionals.py b/tests/reg_tests/test_functionals.py
index 81911232f..2767fa999 100644
--- a/tests/reg_tests/test_functionals.py
+++ b/tests/reg_tests/test_functionals.py
@@ -243,6 +243,7 @@
             },
             "ref_file": "funcs_rans_rough_sa.json",
             "aero_prob": ap_tutorial_wing,
+            "dot_prod_tol": 1e-10,
         },
         # Rough Tutorial wing RANS
         # This test makes sure a roughness value of 0 equals the standard SA model
@@ -276,6 +277,7 @@
             "aero_prob": ap_tutorial_wing,
             "no_train": True,  # This test should not be able to over-write
             # the training file as it is coming from a different test
+            "dot_prod_tol": 1e-10,
         },
     ]
 )

From a784190efd284f9c71dad3672dd8b1650d85fbdd Mon Sep 17 00:00:00 2001
From: David Anderegg <andv@zhaw.ch>
Date: Fri, 20 Oct 2023 11:15:47 -0400
Subject: [PATCH 54/60] get rid of 'fprettify' warning

---
 src/wallDistance/wallDistance.F90 | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index 924d7b9d4..b11b93295 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -266,8 +266,9 @@ subroutine updateWallRoughness()
                                     k = kl
                                 end select
 
-                cellIdLocal(iCell) = nCellBLockOffset(level, nn) * nTimeIntervalsSpectral + nx * ny * nz * (sps - 1) + &
-                                                     (i - 2) + (j - 2) * nx + (k - 2) * nx * ny
+                                cellIdLocal(iCell) = nCellBLockOffset(level, nn) * nTimeIntervalsSpectral + &
+                                                     nx * ny * nz * (sps - 1) + (i - 2) + (j - 2) * nx + &
+                                                     (k - 2) * nx * ny
                             end do
                         end do
                     end do
@@ -305,7 +306,10 @@ subroutine updateWallRoughness()
                                 end if
 
                                 ! find the index of the surface cell (Requires gfortran > 9.0 )
-                              iCell = findloc(cellIdGlobal, flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k), DIM=1)
+                                iCell = findloc( &
+                                cellIdGlobal, &
+                                    flowDoms(nn, level, sps)%nearestWallCellInd(i, j, k), &
+                                    DIM = 1)
 
                                 if (iCell == 0) then
                                     write (errorMessage, 100) &

From ee2b30209ebaa3d6ae5ee05ff41ce21a2b41dd40 Mon Sep 17 00:00:00 2001
From: DavidAnderegg <andv@zhaw.ch>
Date: Tue, 1 Oct 2024 09:50:47 +0200
Subject: [PATCH 55/60] Retrain 'test_functionals' and fix typo in setup

---
 tests/reg_tests/refs/funcs_rans_rough_sa.json | 590 +++++++++++-------
 tests/reg_tests/test_functionals.py           |   3 +-
 2 files changed, 349 insertions(+), 244 deletions(-)

diff --git a/tests/reg_tests/refs/funcs_rans_rough_sa.json b/tests/reg_tests/refs/funcs_rans_rough_sa.json
index 2bce0b7da..7338a4e74 100644
--- a/tests/reg_tests/refs/funcs_rans_rough_sa.json
+++ b/tests/reg_tests/refs/funcs_rans_rough_sa.json
@@ -1,45 +1,48 @@
 {
-    "Dot product test for (w, xV) -> (dw, F)": 25052510162.91482,
-    "Dot product test for Xv -> R": -7422557.015466751,
+    "Dot product test for (w, xV) -> (dw, F)": 25052510162.914825,
+    "Dot product test for Xv -> R": -7422557.015466749,
     "Dot product test for w -> F": 48678.90552195639,
-    "Dot product test for w -> R": 25061252292.45091,
-    "Dot product test for xV -> F": -1368251.4261434958,
+    "Dot product test for w -> R": 25061252292.450912,
+    "Dot product test for xV -> F": -1368251.4261434944,
     "Eval Functions:": {
-        "mdo_tutorial_cd": 0.0245664007857898,
-        "mdo_tutorial_cfx": 0.01166817300336451,
+        "mdo_tutorial_cd": 0.024566400785789796,
+        "mdo_tutorial_cfx": 0.011668173003364507,
         "mdo_tutorial_cfy": 0.4108141827884018,
-        "mdo_tutorial_cfz": 0.007565403441033702,
+        "mdo_tutorial_cfz": 0.0075654034410337035,
         "mdo_tutorial_cl": 0.4102449646172038,
-        "mdo_tutorial_cmx": -0.7922014856117326,
-        "mdo_tutorial_cmy": -0.008981003314113318,
+        "mdo_tutorial_cmx": -0.7922014856117325,
+        "mdo_tutorial_cmy": -0.008981003314113325,
         "mdo_tutorial_cmz": 0.6295255266022379,
-        "mdo_tutorial_cofxx": 3.3323101324392925,
+        "mdo_tutorial_cofxx": 3.332310132439291,
         "mdo_tutorial_cofxy": -0.1348493497186727,
-        "mdo_tutorial_cofxz": 1.6012560760241181,
+        "mdo_tutorial_cofxz": 1.6012560760241168,
         "mdo_tutorial_cofyx": 4.97642146149654,
         "mdo_tutorial_cofyy": 0.2601267269394276,
         "mdo_tutorial_cofyz": 6.271918158078769,
-        "mdo_tutorial_cofzx": 6.327751594707445,
-        "mdo_tutorial_cofzy": 0.25617991212816227,
-        "mdo_tutorial_cofzz": 10.432780012944532,
+        "mdo_tutorial_cofzx": 6.327751594707444,
+        "mdo_tutorial_cofzy": 0.25617991212816216,
+        "mdo_tutorial_cofzz": 10.43278001294453,
+        "mdo_tutorial_colx": 4.977890283785735,
+        "mdo_tutorial_coly": 0.2604795921346493,
+        "mdo_tutorial_colz": 6.276090851527301,
         "mdo_tutorial_drag": 10015.230272350786,
-        "mdo_tutorial_fx": 4756.880770011644,
+        "mdo_tutorial_fx": 4756.880770011643,
         "mdo_tutorial_fy": 167480.72603917567,
-        "mdo_tutorial_fz": 3084.26367484062,
+        "mdo_tutorial_fz": 3084.2636748406208,
         "mdo_tutorial_lift": 167248.6671751417,
-        "mdo_tutorial_mx": -1049635.2803761214,
-        "mdo_tutorial_my": -11899.470151067584,
+        "mdo_tutorial_mx": -1049635.2803761212,
+        "mdo_tutorial_my": -11899.470151067593,
         "mdo_tutorial_mz": 834096.1417269012,
         "mdo_tutorial_sepsensor": 0.016301194542570726,
         "mdo_tutorial_sepsensoravgx": 0.09540342694812158,
         "mdo_tutorial_sepsensoravgy": 9.994583497283502e-05,
         "mdo_tutorial_sepsensoravgz": 0.05019620000103362
     },
-    "Norm of residual": 2.287162877608165e-16,
+    "Norm of residual": 2.2925508532857043e-16,
     "Norm of state vector": 472.57778771100806,
     "Sum of Forces x": 4756.880770011646,
     "Sum of Forces y": 167480.72603917564,
-    "Sum of Forces z": 3084.263674840623,
+    "Sum of Forces z": 3084.263674840624,
     "Sum of Tractions x": 336771.1069549067,
     "Sum of Tractions y": 1650799.5101746526,
     "Sum of Tractions z": 279940.1407894979,
@@ -59,30 +62,33 @@
         "shape": []
     },
     "dFuncs/dP": {
-        "cd": 1.0255863078366282e-07,
-        "cfx": 1.025445711103807e-07,
-        "cfy": 2.0585057746041135e-09,
-        "cfz": 3.0809570252249344e-09,
-        "cl": -1.1635127916506094e-09,
-        "cmx": -5.02580368867878e-09,
-        "cmy": 1.836634687981281e-07,
-        "cmz": 2.0700897670896914e-09,
-        "cofxx": 1.0375012939594107e-05,
-        "cofxy": 1.4128496198936042e-06,
-        "cofxz": 3.848381803761703e-05,
-        "cofyx": -2.0907153334620347e-09,
-        "cofyy": 4.510954938890027e-12,
-        "cofyz": 7.602063203330032e-09,
-        "cofzx": -4.1846282243704764e-07,
-        "cofzy": -1.4398784783818334e-07,
-        "cofzz": -2.2729288082730607e-06,
-        "drag": 0.542572616215423,
-        "fx": 0.27964940925086224,
-        "fy": 8.374875513592976,
-        "fz": 0.1554692283020747,
-        "lift": 8.361959017862185,
+        "cd": 1.025586307836628e-07,
+        "cfx": 1.0254457111038074e-07,
+        "cfy": 2.0585057746026076e-09,
+        "cfz": 3.080957025224556e-09,
+        "cl": -1.1635127916521157e-09,
+        "cmx": -5.02580368870114e-09,
+        "cmy": 1.836634687981284e-07,
+        "cmz": 2.0700897671058624e-09,
+        "cofxx": 1.037501293959403e-05,
+        "cofxy": 1.4128496198936057e-06,
+        "cofxz": 3.848381803761694e-05,
+        "cofyx": -2.090715333502599e-09,
+        "cofyy": 4.5109549389085536e-12,
+        "cofyz": 7.602063203362935e-09,
+        "cofzx": -4.184628224366399e-07,
+        "cofzy": -1.4398784783816656e-07,
+        "cofzz": -2.272928808272515e-06,
+        "colx": 1.551312768394871e-09,
+        "coly": 1.8444193827696344e-09,
+        "colz": 9.911171655450564e-09,
+        "drag": 0.5425726162154229,
+        "fx": 0.2796494092508621,
+        "fy": 8.374875513592974,
+        "fz": 0.1554692283020746,
+        "lift": 8.361959017862183,
         "mx": -52.48842300766143,
-        "my": -0.3516267579346115,
+        "my": -0.35162675793461173,
         "mz": 41.70754987248287,
         "sepsensor": 0.0,
         "sepsensoravgx": 0.0,
@@ -90,31 +96,34 @@
         "sepsensoravgz": 0.0
     },
     "dFuncs/dT": {
-        "cd": -6.4346718114826674e-21,
-        "cfx": -6.433182828244116e-21,
-        "cfy": -1.4846411730779536e-22,
-        "cfz": -1.8466561024880875e-22,
-        "cl": 5.368029669547889e-23,
-        "cmx": 3.5502025762938107e-22,
-        "cmy": -1.2030208599706455e-20,
-        "cmz": -1.3651932049147965e-22,
-        "cofxx": -7.112142957869289e-19,
-        "cofxy": -9.293132415071333e-20,
-        "cofxz": -2.5449799394819826e-18,
-        "cofyx": 1.9060681131220013e-22,
-        "cofyy": 1.393419545416948e-23,
-        "cofyz": -4.828727908497158e-22,
-        "cofzx": 3.572271393378157e-20,
-        "cofzy": 9.463827478678352e-21,
-        "cofzz": 1.5205639797937836e-19,
-        "drag": -2.623287004105254e-15,
-        "fx": -2.622679975418561e-15,
-        "fy": -6.052585134404201e-17,
-        "fz": -7.528447598623434e-17,
-        "lift": 2.1884383356812833e-17,
-        "mx": 4.703876405486247e-16,
-        "my": -1.5939545186267064e-14,
-        "mz": -1.8088263887839087e-16,
+        "cd": -7.97407560708349e-21,
+        "cfx": -7.972667277561772e-21,
+        "cfy": -1.7008056169588572e-22,
+        "cfz": -2.2482267665849947e-22,
+        "cl": 8.043089385984346e-23,
+        "cmx": 4.087206383658274e-22,
+        "cmy": -1.4671395918848986e-20,
+        "cmz": -1.4513050628161639e-22,
+        "cofxx": -8.46577777879809e-19,
+        "cofxy": -1.1621669191703017e-19,
+        "cofxz": -3.0893539917499644e-18,
+        "cofyx": 2.2830502610902396e-22,
+        "cofyy": 4.3709209984172894e-24,
+        "cofyz": -5.698972668736165e-22,
+        "cofzx": 3.849894121230114e-20,
+        "cofzy": 1.1246443854871941e-20,
+        "cofzz": 1.811270017266413e-19,
+        "colx": -1.9081860443013808e-23,
+        "coly": -1.329751358440995e-22,
+        "colz": -6.623775130485751e-22,
+        "drag": -3.250871143495798e-15,
+        "fx": -3.250296995716384e-15,
+        "fy": -6.93384433921787e-17,
+        "fz": -9.165570882013707e-17,
+        "lift": 3.2790066808780993e-17,
+        "mx": 5.415384970091867e-16,
+        "my": -1.9439012736638154e-14,
+        "mz": -1.9229211560289045e-16,
         "sepsensor": 0.0,
         "sepsensoravgx": 0.0,
         "sepsensoravgy": 0.0,
@@ -123,40 +132,43 @@
     "dFuncs/dXv * xVDot": {
         "cd": 4.2529682732075225,
         "cfx": 4.249035399663727,
-        "cfy": 0.19195702983176227,
-        "cfz": 0.35064826015051337,
-        "cl": 0.05839688337189095,
-        "cmx": -1.092783272130625,
-        "cmy": -1.1999451606004616,
-        "cmz": 1.359511363699763,
-        "cofxx": -488.09187989891836,
-        "cofxy": -76.51059615419035,
-        "cofxz": -739.7568018827054,
-        "cofyx": 4.862135728695633,
-        "cofyy": 0.37025124340482296,
-        "cofyz": 5.697393456600477,
-        "cofzx": -19.40543030804781,
-        "cofzy": -12.803792657294439,
-        "cofzz": -140.92641161911112,
-        "drag": 1733850.1056212427,
-        "fx": 1732246.7517349082,
-        "fy": 78257.04192181284,
+        "cfy": 0.19195702983176197,
+        "cfz": 0.3506482601505133,
+        "cl": 0.05839688337189065,
+        "cmx": -1.0927832721306239,
+        "cmy": -1.1999451606004583,
+        "cmz": 1.3595113636997622,
+        "cofxx": -488.09187989891825,
+        "cofxy": -76.51059615419034,
+        "cofxz": -739.7568018827052,
+        "cofyx": 4.862135728695632,
+        "cofyy": 0.37025124340482335,
+        "cofyz": 5.697393456600473,
+        "cofzx": -19.405430308047734,
+        "cofzy": -12.803792657294428,
+        "cofzz": -140.92641161911095,
+        "colx": 5.837204211267569,
+        "coly": 0.567383095306283,
+        "colz": 7.882287552102762,
+        "drag": 1733850.1056212431,
+        "fx": 1732246.7517349087,
+        "fy": 78257.04192181274,
         "fz": 142952.2826981613,
-        "lift": 23807.2414130525,
-        "mx": -1447894.1242421928,
-        "my": -1589879.3399891874,
-        "mz": 1801298.176447638,
+        "lift": 23807.241413052383,
+        "mx": -1447894.1242421914,
+        "my": -1589879.3399891835,
+        "mz": 1801298.1764476371,
         "sepsensor": -0.11694957310130713,
         "sepsensoravgx": -0.6029863600829567,
         "sepsensoravgy": 0.007262037328436888,
         "sepsensoravgz": -0.09612647922522868
     },
     "dFuncs/dalpha": {
-        "cd": 0.007160125372297846,
+        "cd": 0.007160125372297845,
         "cfx": 0.0,
         "cfy": 0.0,
         "cfz": 0.0,
-        "cl": -0.0004287645790765542,
+        "cl": -0.00042876457907655406,
         "cmx": 0.0,
         "cmy": 0.0,
         "cmz": 0.0,
@@ -169,25 +181,28 @@
         "cofzx": 0.0,
         "cofzy": 0.0,
         "cofzz": 0.0,
+        "colx": 0.0008172790369756601,
+        "coly": 0.0001963405165176115,
+        "colz": 0.0023217613923074195,
         "drag": 2919.039911778386,
         "fx": 0.0,
         "fy": 0.0,
         "fz": 0.0,
-        "lift": -174.79874359792962,
+        "lift": -174.79874359792956,
         "mx": 0.0,
         "my": 0.0,
         "mz": 0.0,
-        "sepsensor": 0.000889013327828802,
+        "sepsensor": 0.0008890133278288019,
         "sepsensoravgx": 0.006211849207427457,
         "sepsensoravgy": 1.1629154598405563e-06,
         "sepsensoravgz": 0.006150040814983931
     },
     "dFuncs/dbeta": {
-        "cd": -0.007561670371126666,
+        "cd": -0.007561670371126668,
         "cfx": 0.0,
         "cfy": 0.0,
         "cfz": 0.0,
-        "cl": 0.0002376350648151524,
+        "cl": 0.00023763506481515244,
         "cmx": 0.0,
         "cmy": 0.0,
         "cmz": 0.0,
@@ -200,28 +215,31 @@
         "cofzx": 0.0,
         "cofzy": 0.0,
         "cofzz": 0.0,
-        "drag": -3082.7417769009194,
+        "colx": 0.0007819093658142975,
+        "coly": -2.490596656023271e-06,
+        "colz": 0.0024077689758154846,
+        "drag": -3082.7417769009207,
         "fx": 0.0,
         "fy": 0.0,
         "fz": 0.0,
-        "lift": 96.87906322384133,
+        "lift": 96.87906322384137,
         "mx": 0.0,
         "my": 0.0,
         "mz": 0.0,
-        "sepsensor": 0.18136049395962298,
+        "sepsensor": 0.181360493959623,
         "sepsensoravgx": 1.1060754915788906,
-        "sepsensoravgy": 0.0011646306998538993,
-        "sepsensoravgz": 0.7192972767550525
+        "sepsensoravgy": 0.001164630699853899,
+        "sepsensoravgz": 0.7192972767550523
     },
     "dFuncs/dmach": {
-        "cd": -0.06141600196447449,
-        "cfx": -0.029170432508411278,
-        "cfy": -1.0270354569710045,
-        "cfz": -0.018913508602584256,
-        "cl": -1.0256124115430094,
-        "cmx": 1.9805037140293318,
-        "cmy": 0.0224525082852833,
-        "cmz": -1.5738138165055948,
+        "cd": -0.06141600196447446,
+        "cfx": -0.029170432508411257,
+        "cfy": -1.0270354569710043,
+        "cfz": -0.01891350860258426,
+        "cl": -1.0256124115430092,
+        "cmx": 1.9805037140293307,
+        "cmy": 0.02245250828528332,
+        "cmz": -1.5738138165055942,
         "cofxx": 0.0,
         "cofxy": 0.0,
         "cofxz": 0.0,
@@ -231,6 +249,9 @@
         "cofzx": 0.0,
         "cofzy": 0.0,
         "cofzz": 0.0,
+        "colx": 0.0,
+        "coly": 0.0,
+        "colz": 0.0,
         "drag": 0.0,
         "fx": 0.0,
         "fy": 0.0,
@@ -245,35 +266,38 @@
         "sepsensoravgz": 0.0
     },
     "dFuncs/dw * wDot": {
-        "cd": 0.031101767260431203,
-        "cfx": 0.028800395325415784,
-        "cfy": 0.07371942797663704,
-        "cfz": 0.02171600098784401,
-        "cl": 0.07277840961013628,
+        "cd": 0.031101767260431196,
+        "cfx": 0.028800395325415777,
+        "cfy": 0.07371942797663703,
+        "cfz": 0.021716000987843997,
+        "cl": 0.07277840961013626,
         "cmx": -0.15842697054164165,
-        "cmy": 0.005024771647054986,
-        "cmz": 0.12014382033187078,
-        "cofxx": -5.883673815216705,
-        "cofxy": 0.4009727991396246,
-        "cofxz": 10.115850378144957,
-        "cofyx": 0.05940247196308512,
+        "cmy": 0.0050247716470549885,
+        "cmz": 0.12014382033187077,
+        "cofxx": -5.883673815216708,
+        "cofxy": 0.4009727991396247,
+        "cofxz": 10.11585037814496,
+        "cofyx": 0.05940247196308444,
         "cofyy": -0.3350265708291385,
-        "cofyz": 0.12791459575704778,
-        "cofzx": 1.3755158785313164,
-        "cofzy": -0.7322158660616289,
-        "cofzz": -15.884837396918174,
+        "cofyz": 0.127914595757047,
+        "cofzx": 1.3755158785313275,
+        "cofzy": -0.7322158660616279,
+        "cofzz": -15.884837396918154,
+        "colx": 0.06807682146337765,
+        "coly": -0.33487572734605553,
+        "colz": 0.12855069447564793,
         "drag": 12679.568476732591,
         "fx": 11741.345166265506,
         "fy": 30053.936397515386,
-        "fz": 8853.179282724246,
+        "fz": 8853.179282724243,
         "lift": 29670.30202986036,
         "mx": -209909.39888885352,
-        "my": 6657.621441481974,
+        "my": 6657.621441481978,
         "mz": 159185.7561869155,
-        "sepsensor": -12.044419253289895,
+        "sepsensor": -12.044419253289893,
         "sepsensoravgx": -68.10234813562751,
         "sepsensoravgy": -0.08084780944322405,
-        "sepsensoravgz": -28.399498948465343
+        "sepsensoravgz": -28.399498948465336
     },
     "dFuncs/dxRef": {
         "cd": 0.0,
@@ -282,7 +306,7 @@
         "cfz": 0.0,
         "cl": 0.0,
         "cmx": 0.0,
-        "cmy": 0.0023278164433949856,
+        "cmy": 0.0023278164433949864,
         "cmz": -0.12640436393489288,
         "cofxx": 0.0,
         "cofxy": 0.0,
@@ -293,13 +317,16 @@
         "cofzx": 0.0,
         "cofzy": 0.0,
         "cofzz": 0.0,
+        "colx": 0.0,
+        "coly": 0.0,
+        "colz": 0.0,
         "drag": 0.0,
         "fx": 0.0,
         "fy": 0.0,
         "fz": 0.0,
         "lift": 0.0,
         "mx": 0.0,
-        "my": 3084.26367484062,
+        "my": 3084.2636748406217,
         "mz": -167480.72603917567,
         "sepsensor": 0.0,
         "sepsensoravgx": 0.0,
@@ -312,9 +339,9 @@
         "cfy": 0.0,
         "cfz": 0.0,
         "cl": 0.0,
-        "cmx": -0.0023278164433949856,
+        "cmx": -0.0023278164433949864,
         "cmy": 0.0,
-        "cmz": 0.0035902070779583115,
+        "cmz": 0.003590207077958309,
         "cofxx": 0.0,
         "cofxy": 0.0,
         "cofxz": 0.0,
@@ -324,14 +351,17 @@
         "cofzx": 0.0,
         "cofzy": 0.0,
         "cofzz": 0.0,
+        "colx": 0.0,
+        "coly": 0.0,
+        "colz": 0.0,
         "drag": 0.0,
         "fx": 0.0,
         "fy": 0.0,
         "fz": 0.0,
         "lift": 0.0,
-        "mx": -3084.26367484062,
+        "mx": -3084.2636748406217,
         "my": 0.0,
-        "mz": 4756.880770011644,
+        "mz": 4756.880770011641,
         "sepsensor": 0.0,
         "sepsensoravgx": 0.0,
         "sepsensoravgy": 0.0,
@@ -343,8 +373,8 @@
         "cfy": 0.0,
         "cfz": 0.0,
         "cl": 0.0,
-        "cmx": 0.12640436393489288,
-        "cmy": -0.0035902070779583115,
+        "cmx": 0.12640436393489285,
+        "cmy": -0.003590207077958309,
         "cmz": 0.0,
         "cofxx": 0.0,
         "cofxy": 0.0,
@@ -355,13 +385,16 @@
         "cofzx": 0.0,
         "cofzy": 0.0,
         "cofzz": 0.0,
+        "colx": 0.0,
+        "coly": 0.0,
+        "colz": 0.0,
         "drag": 0.0,
         "fx": 0.0,
         "fy": 0.0,
         "fz": 0.0,
         "lift": 0.0,
-        "mx": 167480.72603917567,
-        "my": -4756.880770011644,
+        "mx": 167480.72603917564,
+        "my": -4756.880770011641,
         "mz": 0.0,
         "sepsensor": 0.0,
         "sepsensoravgx": 0.0,
@@ -370,28 +403,36 @@
     },
     "metadata": {
         "ADPC": false,
-        "AGMGLevels": 1,
-        "AGMGNSmooth": 3,
         "ANKADPC": false,
+        "ANKAMGLevels": 2,
+        "ANKAMGNSmooth": 1,
         "ANKASMOverlap": 1,
+        "ANKASMOverlapCoarse": 0,
         "ANKCFL0": 5.0,
         "ANKCFLCutback": 0.5,
         "ANKCFLExponent": 0.5,
         "ANKCFLFactor": 10.0,
         "ANKCFLLimit": 100000.0,
         "ANKCFLMin": 1.0,
+        "ANKCFLReset": true,
+        "ANKCharTimeStepType": "None",
         "ANKConstCFLStep": 0.4,
         "ANKCoupledSwitchTol": 1e-16,
+        "ANKGlobalPreconditioner": "additive Schwarz",
         "ANKInnerPreconIts": 1,
+        "ANKInnerPreconItsCoarse": 1,
         "ANKJacobianLag": 10,
         "ANKLinResMax": 0.1,
+        "ANKLinearSolveBuffer": 0.01,
         "ANKLinearSolveTol": 0.05,
         "ANKMaxIter": 40,
         "ANKNSubiterTurb": 1,
         "ANKOuterPreconIts": 1,
         "ANKPCILUFill": 2,
-        "ANKPCUpdateCutoff": 1e-06,
+        "ANKPCILUFillCoarse": 0,
+        "ANKPCUpdateCutoff": 1e-16,
         "ANKPCUpdateTol": 0.5,
+        "ANKPCUpdateTolAfterCutoff": 0.0001,
         "ANKPhysicalLSTol": 0.2,
         "ANKPhysicalLSTolTurb": 0.99,
         "ANKSecondOrdSwitchTol": 0.01,
@@ -407,31 +448,42 @@
         "ANKUseMatrixFree": true,
         "ANKUseTurbDADI": true,
         "ASMOverlap": 1,
+        "ASMOverlapCoarse": 0,
         "CFL": 1.5,
         "CFLCoarse": 1.25,
         "CFLLimit": 1.5,
         "GMRESOrthogonalizationType": "modified Gram-Schmidt",
         "ILUFill": 2,
+        "ILUFillCoarse": 0,
         "L2Convergence": 1e-15,
         "L2ConvergenceCoarse": 0.01,
         "L2ConvergenceRel": 1e-16,
         "MGCycle": "2w",
         "MGStartLevel": -1,
         "NKADPC": false,
+        "NKAMGLevels": 2,
+        "NKAMGNSmooth": 1,
         "NKASMOverlap": 1,
+        "NKASMOverlapCoarse": 0,
         "NKFixedStep": 0.25,
+        "NKGlobalPreconditioner": "additive Schwarz",
         "NKInnerPreconIts": 1,
+        "NKInnerPreconItsCoarse": 1,
         "NKJacobianLag": 2,
         "NKLS": "cubic",
         "NKLinearSolveTol": 0.3,
         "NKOuterPreconIts": 1,
         "NKPCILUFill": 2,
+        "NKPCILUFillCoarse": 0,
         "NKSubspaceSize": 60,
         "NKSwitchTol": 1e-05,
         "NKUseEW": true,
         "NKViscPC": false,
         "RKReset": false,
         "TSStability": false,
+        "acousticScaleFactor": 1.0,
+        "adjointAMGLevels": 2,
+        "adjointAMGNSmooth": 1,
         "adjointDivTol": 100000.0,
         "adjointL2Convergence": 1e-16,
         "adjointL2ConvergenceAbs": 1e-16,
@@ -464,6 +516,9 @@
         "deltaT": 0.01,
         "designSurfaceFamily": null,
         "discretization": "central plus scalar dissipation",
+        "dissContMagnitude": 1.0,
+        "dissContMidpoint": 3.0,
+        "dissContSharpness": 3.0,
         "dissipationLumpingParameter": 6.0,
         "dissipationScalingExponent": 0.67,
         "eddyVisInfRatio": 0.009,
@@ -480,7 +535,10 @@
         "gridPrecision": "double",
         "gridPrecisionSurface": "single",
         "infChangeCorrection": true,
+        "infChangeCorrectionTol": 1e-12,
+        "infChangeCorrectionType": "offset",
         "innerPreconIts": 1,
+        "innerPreconItsCoarse": 1,
         "isoVariables": [],
         "isosurface": {},
         "liftIndex": 2,
@@ -492,12 +550,17 @@
         "machMode": false,
         "matrixOrdering": "RCM",
         "maxL2DeviationFactor": 1.0,
+        "meshMaxSkewness": 1.0,
         "meshSurfaceFamily": null,
         "monitorVariables": [
-            "resrho",
+            "cpu",
             "resrho",
             "resturb",
-            "cd"
+            "cl",
+            "cd",
+            "cmz",
+            "yplus",
+            "totalr"
         ],
         "nCycles": 1000,
         "nCyclesCoarse": 100,
@@ -526,12 +589,15 @@
         "partitionOnly": false,
         "preconditionerSide": "right",
         "printAllOptions": true,
+        "printBadlySkewedCells": false,
         "printIntro": true,
         "printIterations": true,
+        "printNegativeVolumes": false,
         "printTiming": true,
         "printWarnings": true,
         "qMode": false,
         "rMode": false,
+        "recomputeOverlapMatrix": true,
         "resAveraging": "never",
         "restartAdjoint": true,
         "restartFile": "input_files/mdo_tutorial_rough.cgns",
@@ -562,11 +628,13 @@
         "turbulenceModel": "SA",
         "turbulenceOrder": "first order",
         "turbulenceProduction": "strain",
+        "updateWallAssociations": false,
         "useALE": true,
         "useANKSolver": true,
         "useApproxWallDistance": true,
         "useBlockettes": false,
         "useDiagTSPC": true,
+        "useDissContinuation": false,
         "useExternalDynamicMesh": false,
         "useGridMotion": false,
         "useLinResMonitor": false,
@@ -576,6 +644,7 @@
         "useQCR": false,
         "useRotationSA": false,
         "useRoughSA": true,
+        "useSkewnessCheck": false,
         "useTSInterpolatedGridVelocity": false,
         "useWallFunctions": false,
         "useZipperMesh": true,
@@ -594,17 +663,18 @@
         ],
         "wallDistCutoff": 1e+20,
         "windAxis": false,
+        "writeSolutionDigits": 3,
         "writeSolutionEachIter": false,
         "writeSurfaceSolution": true,
         "writeTecplotSurfaceSolution": false,
         "writeVolumeSolution": true,
         "zipperSurfaceFamily": null
     },
-    "||FBar^T * dF/dXv||": 47500739.33793808,
+    "||FBar^T * dF/dXv||": 47500739.33793807,
     "||FBar^T * dF/dw||": 159520.82502357475,
     "||FBar^T * dF/xDv||": {
-        "P_mdo_tutorial": 57.28609964209285,
-        "T_mdo_tutorial": 2.842170943040401e-14,
+        "P_mdo_tutorial": 57.28609964209288,
+        "T_mdo_tutorial": 3.552713678800501e-14,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -612,9 +682,9 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dF/dP||": 5.592228638945923,
-    "||dF/dT||": 1.259326633802106e-15,
-    "||dF/dXv * xVDot||": 7581135.361357855,
+    "||dF/dP||": 5.5922286389459215,
+    "||dF/dT||": 1.4357072986878935e-15,
+    "||dF/dXv * xVDot||": 7581135.361357854,
     "||dF/dalpha||": 0.0,
     "||dF/dbeta||": 0.0,
     "||dF/dmach||": 0.0,
@@ -622,43 +692,43 @@
     "||dF/dxRef||": 0.0,
     "||dF/dyRef||": 0.0,
     "||dF/dzRef||": 0.0,
-    "||dR/dP||": 0.07356605182246512,
-    "||dR/dT||": 9.22063005758926e-07,
+    "||dR/dP||": 0.07356605182246514,
+    "||dR/dT||": 9.220630057589264e-07,
     "||dR/dXv * xVDot||": 26736790.15992888,
-    "||dR/dalpha||": 0.010271084828244873,
+    "||dR/dalpha||": 0.010271084828244875,
     "||dR/dbeta||": 0.6070879776942455,
     "||dR/dmach||": 0.9283860217641269,
-    "||dR/dw * wDot||": 2695502679.7876015,
+    "||dR/dw * wDot||": 2695502679.787602,
     "||dR/dxRef||": 0.0,
     "||dR/dyRef||": 0.0,
     "||dR/dzRef||": 0.0,
     "||dcd/dXdv||": {
         "P_mdo_tutorial": 1.0255863078366487e-07,
-        "T_mdo_tutorial": -3.3881317890172014e-21,
+        "T_mdo_tutorial": -1.6940658945086007e-21,
         "alpha_mdo_tutorial": 0.007160125372297845,
-        "beta_mdo_tutorial": -0.00013197604492666196,
+        "beta_mdo_tutorial": -0.00013197604492666198,
         "mach_mdo_tutorial": -0.06141600196447446,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcd/dXv||": 27.913370767944244,
-    "||dcd/dw||": 0.014161257652762545,
+    "||dcd/dXv||": 27.91337076794424,
+    "||dcd/dw||": 0.014161257652762544,
     "||dcfx/dXdv||": {
-        "P_mdo_tutorial": 1.0254457111038414e-07,
+        "P_mdo_tutorial": 1.0254457111038457e-07,
         "T_mdo_tutorial": 1.8634724839594607e-20,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
-        "mach_mdo_tutorial": -0.02917043250841126,
+        "mach_mdo_tutorial": -0.029170432508411247,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcfx/dXv||": 27.912716195114633,
+    "||dcfx/dXv||": 27.91271619511463,
     "||dcfx/dw||": 0.014090362780761695,
     "||dcfy/dXdv||": {
-        "P_mdo_tutorial": 2.058505774630354e-09,
-        "T_mdo_tutorial": 1.138200522872966e-21,
+        "P_mdo_tutorial": 2.058505774629507e-09,
+        "T_mdo_tutorial": 1.164670302474663e-21,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": -1.027035456971004,
@@ -666,71 +736,71 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcfy/dXv||": 2.7931029821306077,
+    "||dcfy/dXv||": 2.7931029821306073,
     "||dcfy/dw||": 0.11354907277670545,
     "||dcfz/dXdv||": {
-        "P_mdo_tutorial": 3.080957025225676e-09,
-        "T_mdo_tutorial": -1.0587911840678754e-22,
+        "P_mdo_tutorial": 3.08095702522557e-09,
+        "T_mdo_tutorial": 1.0587911840678754e-22,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
-        "mach_mdo_tutorial": -0.01891350860258426,
+        "mach_mdo_tutorial": -0.018913508602584266,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
     "||dcfz/dXv||": 2.519605387991788,
-    "||dcfz/dw||": 0.007655345743081828,
+    "||dcfz/dw||": 0.007655345743081827,
     "||dcl/dXdv||": {
-        "P_mdo_tutorial": -1.163512791603402e-09,
-        "T_mdo_tutorial": 6.352747104407253e-22,
-        "alpha_mdo_tutorial": -0.0004287645790765541,
-        "beta_mdo_tutorial": 4.1475142992145416e-06,
+        "P_mdo_tutorial": -1.163512791604249e-09,
+        "T_mdo_tutorial": 5.293955920339377e-22,
+        "alpha_mdo_tutorial": -0.000428764579076554,
+        "beta_mdo_tutorial": 4.147514299214542e-06,
         "mach_mdo_tutorial": -1.0256124115430092,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcl/dXv||": 2.786553790809974,
-    "||dcl/dw||": 0.11354025292131026,
+    "||dcl/dXv||": 2.7865537908099736,
+    "||dcl/dw||": 0.11354025292131027,
     "||dcmx/dXdv||": {
-        "P_mdo_tutorial": -5.025803688610319e-09,
-        "T_mdo_tutorial": -2.6469779601696886e-22,
+        "P_mdo_tutorial": -5.025803688770831e-09,
+        "T_mdo_tutorial": 5.293955920339377e-22,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
-        "mach_mdo_tutorial": 1.9805037140293311,
+        "mach_mdo_tutorial": 1.9805037140293307,
         "xRef_mdo_tutorial": 0.0,
-        "yRef_mdo_tutorial": -0.0023278164433949873,
+        "yRef_mdo_tutorial": -0.0023278164433949877,
         "zRef_mdo_tutorial": 0.12640436393489274
     },
     "||dcmx/dXv||": 5.614216937125317,
     "||dcmx/dw||": 0.21272941416694607,
     "||dcmy/dXdv||": {
         "P_mdo_tutorial": 1.8366346879813508e-07,
-        "T_mdo_tutorial": -5.759824041329242e-20,
+        "T_mdo_tutorial": -6.437450399132683e-20,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
-        "mach_mdo_tutorial": 0.022452508285283303,
-        "xRef_mdo_tutorial": 0.0023278164433949873,
+        "mach_mdo_tutorial": 0.022452508285283317,
+        "xRef_mdo_tutorial": 0.0023278164433949877,
         "yRef_mdo_tutorial": 0.0,
-        "zRef_mdo_tutorial": -0.003590207077958318
+        "zRef_mdo_tutorial": -0.003590207077958317
     },
-    "||dcmy/dXv||": 54.53814156559552,
+    "||dcmy/dXv||": 54.53814156559551,
     "||dcmy/dw||": 0.033839731077361684,
     "||dcmz/dXdv||": {
-        "P_mdo_tutorial": 2.0700897671846038e-09,
-        "T_mdo_tutorial": 4.450231695535289e-22,
+        "P_mdo_tutorial": 2.0700897671831215e-09,
+        "T_mdo_tutorial": 2.4980854499101436e-22,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": -1.5738138165055942,
         "xRef_mdo_tutorial": -0.12640436393489274,
-        "yRef_mdo_tutorial": 0.003590207077958318,
+        "yRef_mdo_tutorial": 0.003590207077958317,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcmz/dXv||": 3.669722758336583,
+    "||dcmz/dXv||": 3.6697227583365826,
     "||dcmz/dw||": 0.1701587174303123,
     "||dcofxx/dXdv||": {
-        "P_mdo_tutorial": 1.0375012939594005e-05,
-        "T_mdo_tutorial": 0.0,
+        "P_mdo_tutorial": 1.0375012939594107e-05,
+        "T_mdo_tutorial": -2.168404344971009e-19,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -738,11 +808,11 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcofxx/dXv||": 3932.154034943831,
-    "||dcofxx/dw||": 2.443894963885114,
+    "||dcofxx/dXv||": 3932.1540349438337,
+    "||dcofxx/dw||": 2.443894963885116,
     "||dcofxy/dXdv||": {
-        "P_mdo_tutorial": 1.4128496198935706e-06,
-        "T_mdo_tutorial": 9.486769009248164e-20,
+        "P_mdo_tutorial": 1.4128496198935765e-06,
+        "T_mdo_tutorial": 9.063252535621014e-20,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -750,11 +820,11 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcofxy/dXv||": 623.1593133276403,
-    "||dcofxy/dw||": 0.23025338834961062,
+    "||dcofxy/dXv||": 623.1593133276406,
+    "||dcofxy/dw||": 0.23025338834961076,
     "||dcofxz/dXdv||": {
-        "P_mdo_tutorial": 3.848381803761737e-05,
-        "T_mdo_tutorial": -5.204170427930421e-18,
+        "P_mdo_tutorial": 3.848381803761748e-05,
+        "T_mdo_tutorial": -4.336808689942018e-18,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -762,11 +832,11 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcofxz/dXv||": 11552.298850230323,
-    "||dcofxz/dw||": 5.698262110691421,
+    "||dcofxz/dXv||": 11552.29885023033,
+    "||dcofxz/dw||": 5.698262110691424,
     "||dcofyx/dXdv||": {
-        "P_mdo_tutorial": -2.0907153332361424e-09,
-        "T_mdo_tutorial": 2.541098841762901e-21,
+        "P_mdo_tutorial": -2.0907153332327543e-09,
+        "T_mdo_tutorial": 3.8116482626443515e-21,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -777,8 +847,8 @@
     "||dcofyx/dXv||": 12.79568795413275,
     "||dcofyx/dw||": 0.4192809957752443,
     "||dcofyy/dXdv||": {
-        "P_mdo_tutorial": 4.510954947116929e-12,
-        "T_mdo_tutorial": 3.1763735522036263e-22,
+        "P_mdo_tutorial": 4.510954947963962e-12,
+        "T_mdo_tutorial": 3.705769144237564e-22,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -786,11 +856,11 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcofyy/dXv||": 1.9032269476611579,
+    "||dcofyy/dXv||": 1.9032269476611576,
     "||dcofyy/dw||": 0.0825146038902352,
     "||dcofyz/dXdv||": {
-        "P_mdo_tutorial": 7.602063203684432e-09,
-        "T_mdo_tutorial": 2.8587361969832636e-21,
+        "P_mdo_tutorial": 7.602063203682738e-09,
+        "T_mdo_tutorial": 1.5881867761018131e-21,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -798,11 +868,11 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcofyz/dXv||": 20.03084467833954,
+    "||dcofyz/dXv||": 20.030844678339538,
     "||dcofyz/dw||": 0.8509322500970162,
     "||dcofzx/dXdv||": {
         "P_mdo_tutorial": -4.184628224365662e-07,
-        "T_mdo_tutorial": 2.1006417091906648e-19,
+        "T_mdo_tutorial": 1.2197274440461925e-19,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -810,11 +880,11 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcofzx/dXv||": 827.0135715836628,
-    "||dcofzx/dw||": 2.862119939722488,
+    "||dcofzx/dXv||": 827.0135715836623,
+    "||dcofzx/dw||": 2.862119939722487,
     "||dcofzy/dXdv||": {
-        "P_mdo_tutorial": -1.4398784783814194e-07,
-        "T_mdo_tutorial": 1.6093625997831706e-20,
+        "P_mdo_tutorial": -1.4398784783813178e-07,
+        "T_mdo_tutorial": 8.470329472543003e-22,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -822,10 +892,10 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcofzy/dXv||": 83.78696877492817,
-    "||dcofzy/dw||": 0.2947788448865528,
+    "||dcofzy/dXv||": 83.78696877492811,
+    "||dcofzy/dw||": 0.29477884488655265,
     "||dcofzz/dXdv||": {
-        "P_mdo_tutorial": -2.2729288082728007e-06,
+        "P_mdo_tutorial": -2.272928808272855e-06,
         "T_mdo_tutorial": -9.486769009248164e-20,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
@@ -834,22 +904,58 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dcofzz/dXv||": 1682.371794945297,
+    "||dcofzz/dXv||": 1682.3717949452966,
     "||dcofzz/dw||": 5.947434235310889,
+    "||dcolx/dXdv||": {
+        "P_mdo_tutorial": 1.5513127686803932e-09,
+        "T_mdo_tutorial": -8.470329472543003e-22,
+        "alpha_mdo_tutorial": 0.0008172790369756611,
+        "beta_mdo_tutorial": 1.3646892885640295e-05,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcolx/dXv||": 12.775631153550293,
+    "||dcolx/dw||": 0.41986360397311606,
+    "||dcoly/dXdv||": {
+        "P_mdo_tutorial": 1.844419382777588e-09,
+        "T_mdo_tutorial": -3.970466940254533e-22,
+        "alpha_mdo_tutorial": 0.00019634051651761149,
+        "beta_mdo_tutorial": -4.346911198676798e-08,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcoly/dXv||": 2.121373186987298,
+    "||dcoly/dw||": 0.08274333483032113,
+    "||dcolz/dXdv||": {
+        "P_mdo_tutorial": 9.911171655798822e-09,
+        "T_mdo_tutorial": 4.235164736271502e-22,
+        "alpha_mdo_tutorial": 0.0023217613923074195,
+        "beta_mdo_tutorial": 4.202349625535192e-05,
+        "mach_mdo_tutorial": 0.0,
+        "xRef_mdo_tutorial": 0.0,
+        "yRef_mdo_tutorial": 0.0,
+        "zRef_mdo_tutorial": 0.0
+    },
+    "||dcolz/dXv||": 20.13173905846084,
+    "||dcolz/dw||": 0.852397192595701,
     "||ddrag/dXdv||": {
         "P_mdo_tutorial": 0.5425726162154232,
-        "T_mdo_tutorial": -2.6645352591003757e-15,
+        "T_mdo_tutorial": -3.552713678800501e-15,
         "alpha_mdo_tutorial": 2919.0399117783863,
-        "beta_mdo_tutorial": -53.80399399570155,
+        "beta_mdo_tutorial": -53.80399399570157,
         "mach_mdo_tutorial": 0.0,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||ddrag/dXv||": 11379722.994675511,
+    "||ddrag/dXv||": 11379722.99467551,
     "||ddrag/dw||": 5773.261519878235,
     "||dfx/dXdv||": {
-        "P_mdo_tutorial": 0.27964940925086246,
+        "P_mdo_tutorial": 0.2796494092508623,
         "T_mdo_tutorial": -2.6645352591003757e-15,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
@@ -858,11 +964,11 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dfx/dXv||": 11379456.138424335,
+    "||dfx/dXv||": 11379456.138424333,
     "||dfx/dw||": 5744.359098460929,
     "||dfy/dXdv||": {
         "P_mdo_tutorial": 8.374875513592984,
-        "T_mdo_tutorial": 9.71445146547012e-17,
+        "T_mdo_tutorial": 1.249000902703301e-16,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -870,11 +976,11 @@
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dfy/dXv||": 1138692.2237550062,
+    "||dfy/dXv||": 1138692.223755006,
     "||dfy/dw||": 46291.685989607286,
     "||dfz/dXdv||": {
-        "P_mdo_tutorial": 0.15546922830207502,
-        "T_mdo_tutorial": -5.551115123125783e-17,
+        "P_mdo_tutorial": 0.15546922830207505,
+        "T_mdo_tutorial": -8.326672684688674e-17,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -885,10 +991,10 @@
     "||dfz/dXv||": 1027192.7245764921,
     "||dfz/dw||": 3120.9313525395996,
     "||dlift/dXdv||": {
-        "P_mdo_tutorial": 8.361959017862176,
-        "T_mdo_tutorial": -1.6653345369377348e-16,
-        "alpha_mdo_tutorial": -174.79874359792962,
-        "beta_mdo_tutorial": 1.6908586295037844,
+        "P_mdo_tutorial": 8.361959017862175,
+        "T_mdo_tutorial": -8.326672684688674e-17,
+        "alpha_mdo_tutorial": -174.7987435979296,
+        "beta_mdo_tutorial": 1.6908586295037848,
         "mach_mdo_tutorial": 0.0,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
@@ -897,32 +1003,32 @@
     "||dlift/dXv||": 1136022.2494374104,
     "||dlift/dw||": 46288.09031095977,
     "||dmx/dXdv||": {
-        "P_mdo_tutorial": -52.48842300766147,
-        "T_mdo_tutorial": -4.6074255521944e-15,
+        "P_mdo_tutorial": -52.48842300766146,
+        "T_mdo_tutorial": -4.163336342344337e-15,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
         "xRef_mdo_tutorial": 0.0,
-        "yRef_mdo_tutorial": -3084.263674840622,
+        "yRef_mdo_tutorial": -3084.263674840623,
         "zRef_mdo_tutorial": 167480.72603917567
     },
     "||dmx/dXv||": 7438612.87301356,
     "||dmx/dw||": 281857.96459463687,
     "||dmy/dXdv||": {
-        "P_mdo_tutorial": -0.35162675793461595,
+        "P_mdo_tutorial": -0.3516267579346164,
         "T_mdo_tutorial": 6.750155989720952e-14,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
-        "xRef_mdo_tutorial": 3084.263674840622,
+        "xRef_mdo_tutorial": 3084.263674840623,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": -4756.880770011646
     },
     "||dmy/dXv||": 72260856.04875143,
     "||dmy/dw||": 44836.290088261136,
     "||dmz/dXdv||": {
-        "P_mdo_tutorial": 41.70754987248287,
-        "T_mdo_tutorial": -7.042977312465837e-16,
+        "P_mdo_tutorial": 41.70754987248288,
+        "T_mdo_tutorial": -7.181755190543981e-16,
         "alpha_mdo_tutorial": 0.0,
         "beta_mdo_tutorial": 0.0,
         "mach_mdo_tutorial": 0.0,
@@ -930,7 +1036,7 @@
         "yRef_mdo_tutorial": 4756.880770011646,
         "zRef_mdo_tutorial": 0.0
     },
-    "||dmz/dXv||": 4862235.8658856405,
+    "||dmz/dXv||": 4862235.86588564,
     "||dmz/dw||": 225453.4942464666,
     "||dsepsensor/dXdv||": {
         "P_mdo_tutorial": 0.0,
@@ -980,14 +1086,14 @@
     },
     "||dsepsensoravgz/dXv||": 3.3049236735056238,
     "||dsepsensoravgz/dw||": 10.598482329462021,
-    "||dwBar^T * dR/dXv||": 111601096.043413,
+    "||dwBar^T * dR/dXv||": 111601096.04341298,
     "||dwBar^T * dR/dw||": 2217397593.2762737,
     "||dwBar^T * dR/xDv||": {
-        "P_mdo_tutorial": 0.8893628179225485,
-        "T_mdo_tutorial": -7.76027680160496e-06,
-        "alpha_mdo_tutorial": -0.0020004837586182936,
-        "beta_mdo_tutorial": -0.02515696982671701,
-        "mach_mdo_tutorial": -3.283371024149604,
+        "P_mdo_tutorial": 0.8893628179225483,
+        "T_mdo_tutorial": -7.760276830026669e-06,
+        "alpha_mdo_tutorial": -0.002000483758618282,
+        "beta_mdo_tutorial": -0.025156969826716998,
+        "mach_mdo_tutorial": -3.283371024149603,
         "xRef_mdo_tutorial": 0.0,
         "yRef_mdo_tutorial": 0.0,
         "zRef_mdo_tutorial": 0.0
diff --git a/tests/reg_tests/test_functionals.py b/tests/reg_tests/test_functionals.py
index 2767fa999..d18a6e26b 100644
--- a/tests/reg_tests/test_functionals.py
+++ b/tests/reg_tests/test_functionals.py
@@ -230,8 +230,7 @@
                 "nSubiterTurb": 3,
                 "nCyclesCoarse": 100,
                 "nCycles": 1000,
-                "monitorVariables": ["resrho", "resrho", "resturb", "cd"],
-                "volumeVariables": ["resrho"],
+                "monitorVariables": ["cpu", "resrho", "resturb", "cl", "cd", "cmz", "yplus", "totalr"],
                 "useNKsolver": True,
                 "ANKSwitchTol": 1e-2,
                 "ANKSecondordSwitchTol": 1e-2,

From 30c13be745365b7f6887ce71af89d9ad750d06b1 Mon Sep 17 00:00:00 2001
From: DavidAnderegg <andv@zhaw.ch>
Date: Wed, 9 Oct 2024 17:38:27 +0200
Subject: [PATCH 56/60] adjust saFact so it is differentiable by tapenade and
 add test for jacvecpwd

---
 src/adjoint/outputForward/BCData_d.f90        |   2 +-
 src/adjoint/outputForward/sa_d.f90            |   2 +-
 .../outputForward/turbBCRoutines_d.f90        | 135 +++++++++---------
 src/adjoint/outputReverse/BCData_b.f90        |   2 +-
 src/adjoint/outputReverse/sa_b.f90            |   2 +-
 .../outputReverse/turbBCRoutines_b.f90        | 123 ++++++++--------
 src/adjoint/outputReverseFast/sa_fast_b.f90   |   2 +-
 .../turbBCRoutines_fast_b.f90                 |  70 +++++----
 src/turbulence/turbBCRoutines.F90             |  55 ++++---
 tests/reg_tests/test_jacVecProdBWDFast.py     |  34 +++++
 tests/reg_tests/test_jacVecProdFWD.py         |  35 +++++
 11 files changed, 267 insertions(+), 195 deletions(-)

diff --git a/src/adjoint/outputForward/BCData_d.f90 b/src/adjoint/outputForward/BCData_d.f90
index 09a9973a1..9593cac1f 100644
--- a/src/adjoint/outputForward/BCData_d.f90
+++ b/src/adjoint/outputForward/BCData_d.f90
@@ -1,5 +1,5 @@
 !        generated by tapenade     (inria, ecuador team)
-!  tapenade 3.16 (develop) - 13 sep 2023 12:36
+!  tapenade 3.16 (develop) - 22 aug 2023 15:51
 !
 module bcdata_d
   use constants
diff --git a/src/adjoint/outputForward/sa_d.f90 b/src/adjoint/outputForward/sa_d.f90
index 24cf20b07..c97a02d0e 100644
--- a/src/adjoint/outputForward/sa_d.f90
+++ b/src/adjoint/outputForward/sa_d.f90
@@ -1,5 +1,5 @@
 !        generated by tapenade     (inria, ecuador team)
-!  tapenade 3.16 (develop) - 13 sep 2023 12:36
+!  tapenade 3.16 (develop) - 22 aug 2023 15:51
 !
 ! this module contains the source code related to the sa turbulence
 ! model. it is slightly more modularized than the original which makes
diff --git a/src/adjoint/outputForward/turbBCRoutines_d.f90 b/src/adjoint/outputForward/turbBCRoutines_d.f90
index 1260818f4..da1446f55 100644
--- a/src/adjoint/outputForward/turbBCRoutines_d.f90
+++ b/src/adjoint/outputForward/turbBCRoutines_d.f90
@@ -1,5 +1,5 @@
 !        generated by tapenade     (inria, ecuador team)
-!  tapenade 3.16 (develop) - 13 sep 2023 12:36
+!  tapenade 3.16 (develop) - 22 aug 2023 15:51
 !
 module turbbcroutines_d
   implicit none
@@ -427,7 +427,7 @@ subroutine bceddywall_d(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j
-    real(kind=realtype) :: result1
+    real(kind=realtype) :: fact
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
 ! in the halo cells.
@@ -435,49 +435,49 @@ subroutine bceddywall_d(nn)
     case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(2, i, j)
-          revd(1, i, j) = result1*revd(2, i, j)
-          rev(1, i, j) = result1*rev(2, i, j)
+          call saroughfact(2, i, j, fact)
+          revd(1, i, j) = fact*revd(2, i, j)
+          rev(1, i, j) = fact*rev(2, i, j)
         end do
       end do
     case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(il, i, j)
-          revd(ie, i, j) = result1*revd(il, i, j)
-          rev(ie, i, j) = result1*rev(il, i, j)
+          call saroughfact(il, i, j, fact)
+          revd(ie, i, j) = fact*revd(il, i, j)
+          rev(ie, i, j) = fact*rev(il, i, j)
         end do
       end do
     case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, 2, j)
-          revd(i, 1, j) = result1*revd(i, 2, j)
-          rev(i, 1, j) = result1*rev(i, 2, j)
+          call saroughfact(i, 2, j, fact)
+          revd(i, 1, j) = fact*revd(i, 2, j)
+          rev(i, 1, j) = fact*rev(i, 2, j)
         end do
       end do
     case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, jl, j)
-          revd(i, je, j) = result1*revd(i, jl, j)
-          rev(i, je, j) = result1*rev(i, jl, j)
+          call saroughfact(i, jl, j, fact)
+          revd(i, je, j) = fact*revd(i, jl, j)
+          rev(i, je, j) = fact*rev(i, jl, j)
         end do
       end do
     case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, j, 2)
-          revd(i, j, 1) = result1*revd(i, j, 2)
-          rev(i, j, 1) = result1*rev(i, j, 2)
+          call saroughfact(i, j, 2, fact)
+          revd(i, j, 1) = fact*revd(i, j, 2)
+          rev(i, j, 1) = fact*rev(i, j, 2)
         end do
       end do
     case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, j, kl)
-          revd(i, j, ke) = result1*revd(i, j, kl)
-          rev(i, j, ke) = result1*rev(i, j, kl)
+          call saroughfact(i, j, kl, fact)
+          revd(i, j, ke) = fact*revd(i, j, kl)
+          rev(i, j, ke) = fact*rev(i, j, kl)
         end do
       end do
     end select
@@ -501,7 +501,7 @@ subroutine bceddywall(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j
-    real(kind=realtype) :: result1
+    real(kind=realtype) :: fact
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
 ! in the halo cells.
@@ -509,43 +509,43 @@ subroutine bceddywall(nn)
     case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(2, i, j)
-          rev(1, i, j) = result1*rev(2, i, j)
+          call saroughfact(2, i, j, fact)
+          rev(1, i, j) = fact*rev(2, i, j)
         end do
       end do
     case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(il, i, j)
-          rev(ie, i, j) = result1*rev(il, i, j)
+          call saroughfact(il, i, j, fact)
+          rev(ie, i, j) = fact*rev(il, i, j)
         end do
       end do
     case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, 2, j)
-          rev(i, 1, j) = result1*rev(i, 2, j)
+          call saroughfact(i, 2, j, fact)
+          rev(i, 1, j) = fact*rev(i, 2, j)
         end do
       end do
     case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, jl, j)
-          rev(i, je, j) = result1*rev(i, jl, j)
+          call saroughfact(i, jl, j, fact)
+          rev(i, je, j) = fact*rev(i, jl, j)
         end do
       end do
     case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, j, 2)
-          rev(i, j, 1) = result1*rev(i, j, 2)
+          call saroughfact(i, j, 2, fact)
+          rev(i, j, 1) = fact*rev(i, j, 2)
         end do
       end do
     case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, j, kl)
-          rev(i, j, ke) = result1*rev(i, j, kl)
+          call saroughfact(i, j, kl, fact)
+          rev(i, j, ke) = fact*rev(i, j, kl)
         end do
       end do
     end select
@@ -1202,7 +1202,7 @@ subroutine bcturbwall_d(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j, ii, jj, iimax, jjmax
-    real(kind=realtype) :: tmpd, tmpe, tmpf, nu
+    real(kind=realtype) :: tmpd, tmpe, tmpf, nu, fact
     real(kind=realtype) :: tmpdd, nud
     real(kind=realtype), dimension(:, :, :, :), pointer :: bmt
     real(kind=realtype), dimension(:, :, :), pointer :: bvt, ww2
@@ -1221,7 +1221,6 @@ subroutine bcturbwall_d(nn)
     integer(kind=inttype) :: y10
     integer(kind=inttype) :: y11
     integer(kind=inttype) :: y12
-    real(kind=realtype) :: result1
     real(kind=realtype) :: temp
     real(kind=realtype) :: temp0
 ! determine the turbulence model used and loop over the faces
@@ -1235,43 +1234,43 @@ subroutine bcturbwall_d(nn)
       case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(2, i, j)
-            bmti1(i, j, itu1, itu1) = -result1
+            call saroughfact(2, i, j, fact)
+            bmti1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(il, i, j)
-            bmti2(i, j, itu1, itu1) = -result1
+            call saroughfact(il, i, j, fact)
+            bmti2(i, j, itu1, itu1) = -fact
           end do
         end do
       case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, 2, j)
-            bmtj1(i, j, itu1, itu1) = -result1
+            call saroughfact(i, 2, j, fact)
+            bmtj1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, jl, j)
-            bmtj2(i, j, itu1, itu1) = -result1
+            call saroughfact(i, jl, j, fact)
+            bmtj2(i, j, itu1, itu1) = -fact
           end do
         end do
       case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, j, 2)
-            bmtk1(i, j, itu1, itu1) = -result1
+            call saroughfact(i, j, 2, fact)
+            bmtk1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, j, kl)
-            bmtk2(i, j, itu1, itu1) = -result1
+            call saroughfact(i, j, kl, fact)
+            bmtk2(i, j, itu1, itu1) = -fact
           end do
         end do
       end select
@@ -1594,7 +1593,7 @@ subroutine bcturbwall(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j, ii, jj, iimax, jjmax
-    real(kind=realtype) :: tmpd, tmpe, tmpf, nu
+    real(kind=realtype) :: tmpd, tmpe, tmpf, nu, fact
     real(kind=realtype), dimension(:, :, :, :), pointer :: bmt
     real(kind=realtype), dimension(:, :, :), pointer :: bvt, ww2
     real(kind=realtype), dimension(:, :), pointer :: rlv2, dd2wall
@@ -1612,7 +1611,6 @@ subroutine bcturbwall(nn)
     integer(kind=inttype) :: y10
     integer(kind=inttype) :: y11
     integer(kind=inttype) :: y12
-    real(kind=realtype) :: result1
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
@@ -1624,43 +1622,43 @@ subroutine bcturbwall(nn)
       case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(2, i, j)
-            bmti1(i, j, itu1, itu1) = -result1
+            call saroughfact(2, i, j, fact)
+            bmti1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(il, i, j)
-            bmti2(i, j, itu1, itu1) = -result1
+            call saroughfact(il, i, j, fact)
+            bmti2(i, j, itu1, itu1) = -fact
           end do
         end do
       case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, 2, j)
-            bmtj1(i, j, itu1, itu1) = -result1
+            call saroughfact(i, 2, j, fact)
+            bmtj1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, jl, j)
-            bmtj2(i, j, itu1, itu1) = -result1
+            call saroughfact(i, jl, j, fact)
+            bmtj2(i, j, itu1, itu1) = -fact
           end do
         end do
       case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, j, 2)
-            bmtk1(i, j, itu1, itu1) = -result1
+            call saroughfact(i, j, 2, fact)
+            bmtk1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, j, kl)
-            bmtk2(i, j, itu1, itu1) = -result1
+            call saroughfact(i, j, kl, fact)
+            bmtk2(i, j, itu1, itu1) = -fact
           end do
         end do
       end select
@@ -2252,25 +2250,24 @@ subroutine turbbcnswall(secondhalo)
     end do bocos
   end subroutine turbbcnswall
 
-  function saroughfact(i, j, k)
+  subroutine saroughfact(i, j, k, fact)
 ! returns either the regular sa-boundary condition
 ! or the modified roughness-boundary condition
     use constants
     use inputphysics, only : useroughsa
     use blockpointers, only : ks, d2wall
     implicit none
-! dummy arguments
-    real(kind=realtype) :: saroughfact
 ! local variablse
-    integer(kind=inttype) :: i, j, k
+    integer(kind=inttype), intent(in) :: i, j, k
+    real(kind=realtype), intent(out) :: fact
     if (.not.useroughsa) then
-      saroughfact = -one
+      fact = -one
       return
     else
-      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j&
-&       , k)+d2wall(i, j, k)/0.03_realtype)
+      fact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j, k)+&
+&       d2wall(i, j, k)/0.03_realtype)
     end if
-  end function saroughfact
+  end subroutine saroughfact
 
 end module turbbcroutines_d
 
diff --git a/src/adjoint/outputReverse/BCData_b.f90 b/src/adjoint/outputReverse/BCData_b.f90
index 95ad0854f..4d7ef5267 100644
--- a/src/adjoint/outputReverse/BCData_b.f90
+++ b/src/adjoint/outputReverse/BCData_b.f90
@@ -1,5 +1,5 @@
 !        generated by tapenade     (inria, ecuador team)
-!  tapenade 3.16 (develop) - 13 sep 2023 12:36
+!  tapenade 3.16 (develop) - 22 aug 2023 15:51
 !
 module bcdata_b
   use constants
diff --git a/src/adjoint/outputReverse/sa_b.f90 b/src/adjoint/outputReverse/sa_b.f90
index e8265f722..83b5c3b31 100644
--- a/src/adjoint/outputReverse/sa_b.f90
+++ b/src/adjoint/outputReverse/sa_b.f90
@@ -1,5 +1,5 @@
 !        generated by tapenade     (inria, ecuador team)
-!  tapenade 3.16 (develop) - 13 sep 2023 12:36
+!  tapenade 3.16 (develop) - 22 aug 2023 15:51
 !
 ! this module contains the source code related to the sa turbulence
 ! model. it is slightly more modularized than the original which makes
diff --git a/src/adjoint/outputReverse/turbBCRoutines_b.f90 b/src/adjoint/outputReverse/turbBCRoutines_b.f90
index 43a51aa5f..aa87f25bb 100644
--- a/src/adjoint/outputReverse/turbBCRoutines_b.f90
+++ b/src/adjoint/outputReverse/turbBCRoutines_b.f90
@@ -1,5 +1,5 @@
 !        generated by tapenade     (inria, ecuador team)
-!  tapenade 3.16 (develop) - 13 sep 2023 12:36
+!  tapenade 3.16 (develop) - 22 aug 2023 15:51
 !
 module turbbcroutines_b
   implicit none
@@ -583,7 +583,7 @@ subroutine bceddywall_b(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j
-    real(kind=realtype) :: result1
+    real(kind=realtype) :: fact
     real(kind=realtype) :: tmp
     real(kind=realtype) :: tmpd
     real(kind=realtype) :: tmp0
@@ -597,88 +597,88 @@ subroutine bceddywall_b(nn)
     case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call pushreal8(result1)
-          result1 = saroughfact(2, i, j)
+          call pushreal8(fact)
+          call saroughfact(2, i, j, fact)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
-          revd(2, i, j) = revd(2, i, j) + result1*revd(1, i, j)
+          revd(2, i, j) = revd(2, i, j) + fact*revd(1, i, j)
           revd(1, i, j) = 0.0_8
-          call popreal8(result1)
+          call popreal8(fact)
         end do
       end do
     case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call pushreal8(result1)
-          result1 = saroughfact(il, i, j)
+          call pushreal8(fact)
+          call saroughfact(il, i, j, fact)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd = revd(ie, i, j)
           revd(ie, i, j) = 0.0_8
-          revd(il, i, j) = revd(il, i, j) + result1*tmpd
-          call popreal8(result1)
+          revd(il, i, j) = revd(il, i, j) + fact*tmpd
+          call popreal8(fact)
         end do
       end do
     case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call pushreal8(result1)
-          result1 = saroughfact(i, 2, j)
+          call pushreal8(fact)
+          call saroughfact(i, 2, j, fact)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
-          revd(i, 2, j) = revd(i, 2, j) + result1*revd(i, 1, j)
+          revd(i, 2, j) = revd(i, 2, j) + fact*revd(i, 1, j)
           revd(i, 1, j) = 0.0_8
-          call popreal8(result1)
+          call popreal8(fact)
         end do
       end do
     case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call pushreal8(result1)
-          result1 = saroughfact(i, jl, j)
+          call pushreal8(fact)
+          call saroughfact(i, jl, j, fact)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd0 = revd(i, je, j)
           revd(i, je, j) = 0.0_8
-          revd(i, jl, j) = revd(i, jl, j) + result1*tmpd0
-          call popreal8(result1)
+          revd(i, jl, j) = revd(i, jl, j) + fact*tmpd0
+          call popreal8(fact)
         end do
       end do
     case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call pushreal8(result1)
-          result1 = saroughfact(i, j, 2)
+          call pushreal8(fact)
+          call saroughfact(i, j, 2, fact)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
-          revd(i, j, 2) = revd(i, j, 2) + result1*revd(i, j, 1)
+          revd(i, j, 2) = revd(i, j, 2) + fact*revd(i, j, 1)
           revd(i, j, 1) = 0.0_8
-          call popreal8(result1)
+          call popreal8(fact)
         end do
       end do
     case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call pushreal8(result1)
-          result1 = saroughfact(i, j, kl)
+          call pushreal8(fact)
+          call saroughfact(i, j, kl, fact)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
           tmpd1 = revd(i, j, ke)
           revd(i, j, ke) = 0.0_8
-          revd(i, j, kl) = revd(i, j, kl) + result1*tmpd1
-          call popreal8(result1)
+          revd(i, j, kl) = revd(i, j, kl) + fact*tmpd1
+          call popreal8(fact)
         end do
       end do
     end select
@@ -702,7 +702,7 @@ subroutine bceddywall(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j
-    real(kind=realtype) :: result1
+    real(kind=realtype) :: fact
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
 ! in the halo cells.
@@ -710,43 +710,43 @@ subroutine bceddywall(nn)
     case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(2, i, j)
-          rev(1, i, j) = result1*rev(2, i, j)
+          call saroughfact(2, i, j, fact)
+          rev(1, i, j) = fact*rev(2, i, j)
         end do
       end do
     case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(il, i, j)
-          rev(ie, i, j) = result1*rev(il, i, j)
+          call saroughfact(il, i, j, fact)
+          rev(ie, i, j) = fact*rev(il, i, j)
         end do
       end do
     case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, 2, j)
-          rev(i, 1, j) = result1*rev(i, 2, j)
+          call saroughfact(i, 2, j, fact)
+          rev(i, 1, j) = fact*rev(i, 2, j)
         end do
       end do
     case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, jl, j)
-          rev(i, je, j) = result1*rev(i, jl, j)
+          call saroughfact(i, jl, j, fact)
+          rev(i, je, j) = fact*rev(i, jl, j)
         end do
       end do
     case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, j, 2)
-          rev(i, j, 1) = result1*rev(i, j, 2)
+          call saroughfact(i, j, 2, fact)
+          rev(i, j, 1) = fact*rev(i, j, 2)
         end do
       end do
     case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, j, kl)
-          rev(i, j, ke) = result1*rev(i, j, kl)
+          call saroughfact(i, j, kl, fact)
+          rev(i, j, ke) = fact*rev(i, j, kl)
         end do
       end do
     end select
@@ -1416,7 +1416,7 @@ subroutine bcturbwall_b(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j, ii, jj, iimax, jjmax
-    real(kind=realtype) :: tmpd, tmpe, tmpf, nu
+    real(kind=realtype) :: tmpd, tmpe, tmpf, nu, fact
     real(kind=realtype) :: tmpdd, nud
     real(kind=realtype), dimension(:, :, :, :), pointer :: bmt
     real(kind=realtype), dimension(:, :, :), pointer :: bvt, ww2
@@ -1435,7 +1435,6 @@ subroutine bcturbwall_b(nn)
     integer(kind=inttype) :: y10
     integer(kind=inttype) :: y11
     integer(kind=inttype) :: y12
-    real(kind=realtype) :: result1
     real(kind=realtype) :: temp
     real(kind=realtype) :: tempd
     integer :: branch
@@ -1868,7 +1867,7 @@ subroutine bcturbwall(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j, ii, jj, iimax, jjmax
-    real(kind=realtype) :: tmpd, tmpe, tmpf, nu
+    real(kind=realtype) :: tmpd, tmpe, tmpf, nu, fact
     real(kind=realtype), dimension(:, :, :, :), pointer :: bmt
     real(kind=realtype), dimension(:, :, :), pointer :: bvt, ww2
     real(kind=realtype), dimension(:, :), pointer :: rlv2, dd2wall
@@ -1886,7 +1885,6 @@ subroutine bcturbwall(nn)
     integer(kind=inttype) :: y10
     integer(kind=inttype) :: y11
     integer(kind=inttype) :: y12
-    real(kind=realtype) :: result1
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
@@ -1898,43 +1896,43 @@ subroutine bcturbwall(nn)
       case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(2, i, j)
-            bmti1(i, j, itu1, itu1) = -result1
+            call saroughfact(2, i, j, fact)
+            bmti1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(il, i, j)
-            bmti2(i, j, itu1, itu1) = -result1
+            call saroughfact(il, i, j, fact)
+            bmti2(i, j, itu1, itu1) = -fact
           end do
         end do
       case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, 2, j)
-            bmtj1(i, j, itu1, itu1) = -result1
+            call saroughfact(i, 2, j, fact)
+            bmtj1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, jl, j)
-            bmtj2(i, j, itu1, itu1) = -result1
+            call saroughfact(i, jl, j, fact)
+            bmtj2(i, j, itu1, itu1) = -fact
           end do
         end do
       case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, j, 2)
-            bmtk1(i, j, itu1, itu1) = -result1
+            call saroughfact(i, j, 2, fact)
+            bmtk1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, j, kl)
-            bmtk2(i, j, itu1, itu1) = -result1
+            call saroughfact(i, j, kl, fact)
+            bmtk2(i, j, itu1, itu1) = -fact
           end do
         end do
       end select
@@ -2605,25 +2603,24 @@ subroutine turbbcnswall(secondhalo)
     end do bocos
   end subroutine turbbcnswall
 
-  function saroughfact(i, j, k)
+  subroutine saroughfact(i, j, k, fact)
 ! returns either the regular sa-boundary condition
 ! or the modified roughness-boundary condition
     use constants
     use inputphysics, only : useroughsa
     use blockpointers, only : ks, d2wall
     implicit none
-! dummy arguments
-    real(kind=realtype) :: saroughfact
 ! local variablse
-    integer(kind=inttype) :: i, j, k
+    integer(kind=inttype), intent(in) :: i, j, k
+    real(kind=realtype), intent(out) :: fact
     if (.not.useroughsa) then
-      saroughfact = -one
+      fact = -one
       return
     else
-      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j&
-&       , k)+d2wall(i, j, k)/0.03_realtype)
+      fact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j, k)+&
+&       d2wall(i, j, k)/0.03_realtype)
     end if
-  end function saroughfact
+  end subroutine saroughfact
 
 end module turbbcroutines_b
 
diff --git a/src/adjoint/outputReverseFast/sa_fast_b.f90 b/src/adjoint/outputReverseFast/sa_fast_b.f90
index 4f3d30945..2e320c0a3 100644
--- a/src/adjoint/outputReverseFast/sa_fast_b.f90
+++ b/src/adjoint/outputReverseFast/sa_fast_b.f90
@@ -1,5 +1,5 @@
 !        generated by tapenade     (inria, ecuador team)
-!  tapenade 3.16 (develop) - 13 sep 2023 12:36
+!  tapenade 3.16 (develop) - 22 aug 2023 15:51
 !
 ! this module contains the source code related to the sa turbulence
 ! model. it is slightly more modularized than the original which makes
diff --git a/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90 b/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90
index 13341d88f..d535813cc 100644
--- a/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90
@@ -1,5 +1,5 @@
 !        generated by tapenade     (inria, ecuador team)
-!  tapenade 3.16 (develop) - 13 sep 2023 12:36
+!  tapenade 3.16 (develop) - 22 aug 2023 15:51
 !
 module turbbcroutines_fast_b
   implicit none
@@ -206,7 +206,7 @@ subroutine bceddywall(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j
-    real(kind=realtype) :: result1
+    real(kind=realtype) :: fact
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
 ! in the halo cells.
@@ -214,43 +214,43 @@ subroutine bceddywall(nn)
     case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(2, i, j)
-          rev(1, i, j) = result1*rev(2, i, j)
+          call saroughfact(2, i, j, fact)
+          rev(1, i, j) = fact*rev(2, i, j)
         end do
       end do
     case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(il, i, j)
-          rev(ie, i, j) = result1*rev(il, i, j)
+          call saroughfact(il, i, j, fact)
+          rev(ie, i, j) = fact*rev(il, i, j)
         end do
       end do
     case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, 2, j)
-          rev(i, 1, j) = result1*rev(i, 2, j)
+          call saroughfact(i, 2, j, fact)
+          rev(i, 1, j) = fact*rev(i, 2, j)
         end do
       end do
     case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, jl, j)
-          rev(i, je, j) = result1*rev(i, jl, j)
+          call saroughfact(i, jl, j, fact)
+          rev(i, je, j) = fact*rev(i, jl, j)
         end do
       end do
     case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, j, 2)
-          rev(i, j, 1) = result1*rev(i, j, 2)
+          call saroughfact(i, j, 2, fact)
+          rev(i, j, 1) = fact*rev(i, j, 2)
         end do
       end do
     case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          result1 = saroughfact(i, j, kl)
-          rev(i, j, ke) = result1*rev(i, j, kl)
+          call saroughfact(i, j, kl, fact)
+          rev(i, j, ke) = fact*rev(i, j, kl)
         end do
       end do
     end select
@@ -639,7 +639,7 @@ subroutine bcturbwall(nn)
 !      local variables.
 !
     integer(kind=inttype) :: i, j, ii, jj, iimax, jjmax
-    real(kind=realtype) :: tmpd, tmpe, tmpf, nu
+    real(kind=realtype) :: tmpd, tmpe, tmpf, nu, fact
     real(kind=realtype), dimension(:, :, :, :), pointer :: bmt
     real(kind=realtype), dimension(:, :, :), pointer :: bvt, ww2
     real(kind=realtype), dimension(:, :), pointer :: rlv2, dd2wall
@@ -657,7 +657,6 @@ subroutine bcturbwall(nn)
     integer(kind=inttype) :: y10
     integer(kind=inttype) :: y11
     integer(kind=inttype) :: y12
-    real(kind=realtype) :: result1
 ! determine the turbulence model used and loop over the faces
 ! of the subface and set the values of bmt and bvt for an
 ! implicit treatment.
@@ -669,43 +668,43 @@ subroutine bcturbwall(nn)
       case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(2, i, j)
-            bmti1(i, j, itu1, itu1) = -result1
+            call saroughfact(2, i, j, fact)
+            bmti1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(il, i, j)
-            bmti2(i, j, itu1, itu1) = -result1
+            call saroughfact(il, i, j, fact)
+            bmti2(i, j, itu1, itu1) = -fact
           end do
         end do
       case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, 2, j)
-            bmtj1(i, j, itu1, itu1) = -result1
+            call saroughfact(i, 2, j, fact)
+            bmtj1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, jl, j)
-            bmtj2(i, j, itu1, itu1) = -result1
+            call saroughfact(i, jl, j, fact)
+            bmtj2(i, j, itu1, itu1) = -fact
           end do
         end do
       case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, j, 2)
-            bmtk1(i, j, itu1, itu1) = -result1
+            call saroughfact(i, j, 2, fact)
+            bmtk1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            result1 = saroughfact(i, j, kl)
-            bmtk2(i, j, itu1, itu1) = -result1
+            call saroughfact(i, j, kl, fact)
+            bmtk2(i, j, itu1, itu1) = -fact
           end do
         end do
       end select
@@ -1184,25 +1183,24 @@ subroutine turbbcnswall(secondhalo)
     end do bocos
   end subroutine turbbcnswall
 
-  function saroughfact(i, j, k)
+  subroutine saroughfact(i, j, k, fact)
 ! returns either the regular sa-boundary condition
 ! or the modified roughness-boundary condition
     use constants
     use inputphysics, only : useroughsa
     use blockpointers, only : ks, d2wall
     implicit none
-! dummy arguments
-    real(kind=realtype) :: saroughfact
 ! local variablse
-    integer(kind=inttype) :: i, j, k
+    integer(kind=inttype), intent(in) :: i, j, k
+    real(kind=realtype), intent(out) :: fact
     if (.not.useroughsa) then
-      saroughfact = -one
+      fact = -one
       return
     else
-      saroughfact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j&
-&       , k)+d2wall(i, j, k)/0.03_realtype)
+      fact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j, k)+&
+&       d2wall(i, j, k)/0.03_realtype)
     end if
-  end function saroughfact
+  end subroutine saroughfact
 
 end module turbbcroutines_fast_b
 
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index fe431b14d..f0f2777f1 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -320,6 +320,7 @@ subroutine bcEddyWall(nn)
         !      Local variables.
         !
         integer(kind=intType) :: i, j
+        real(kind=realType) :: fact
 
         ! Determine the face id on which the subface is located and
         ! loop over the faces of the subface and set the eddy viscosity
@@ -329,42 +330,48 @@ subroutine bcEddyWall(nn)
         case (iMin)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(1, i, j) = saRoughFact(2, i, j) * rev(2, i, j)
+                    call saRoughFact(2, i, j, fact)
+                    rev(1, i, j) = fact * rev(2, i, j)
                 end do
             end do
 
         case (iMax)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(ie, i, j) = saRoughFact(il, i, j) * rev(il, i, j)
+                    call saRoughFact(il, i, j, fact)
+                    rev(ie, i, j) = fact * rev(il, i, j)
                 end do
             end do
 
         case (jMin)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(i, 1, j) = saRoughFact(i, 2, j) * rev(i, 2, j)
+                    call saRoughFact(i, 2, j, fact)
+                    rev(i, 1, j) = fact * rev(i, 2, j)
                 end do
             end do
 
         case (jMax)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(i, je, j) = saRoughFact(i, jl, j) * rev(i, jl, j)
+                    call saRoughFact(i, jl, j, fact)
+                    rev(i, je, j) = fact * rev(i, jl, j)
                 end do
             end do
 
         case (kMin)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(i, j, 1) = saRoughFact(i, j, 2) * rev(i, j, 2)
+                    call saRoughFact(i, j, 2, fact)
+                    rev(i, j, 1) = fact * rev(i, j, 2)
                 end do
             end do
 
         case (kMax)
             do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                 do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                    rev(i, j, ke) = saRoughFact(i, j, kl) * rev(i, j, kl)
+                    call saRoughFact(i, j, kl, fact)
+                    rev(i, j, ke) = fact * rev(i, j, kl)
                 end do
             end do
         end select
@@ -819,7 +826,7 @@ subroutine bcTurbWall(nn)
         !
         integer(kind=intType) :: i, j, ii, jj, iiMax, jjMax
 
-        real(kind=realType) :: tmpd, tmpe, tmpf, nu
+        real(kind=realType) :: tmpd, tmpe, tmpf, nu, fact
 
         real(kind=realType), dimension(:, :, :, :), pointer :: bmt
         real(kind=realType), dimension(:, :, :), pointer :: bvt, ww2
@@ -839,39 +846,45 @@ subroutine bcTurbWall(nn)
             case (iMin)
                 do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                        bmti1(i, j, itu1, itu1) = -saRoughFact(2, i, j)
+                        call saRoughFact(2, i, j, fact)
+                        bmti1(i, j, itu1, itu1) = -fact
                     end do
                 end do
             case (iMax)
                 do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                        bmti2(i, j, itu1, itu1) = -saRoughFact(il, i, j)
+                        call saRoughFact(il, i, j, fact)
+                        bmti2(i, j, itu1, itu1) = -fact
                     end do
                 end do
             case (jMin)
                 do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                        bmtj1(i, j, itu1, itu1) = -saRoughFact(i, 2, j)
+                        call saRoughFact(i, 2, j, fact)
+                        bmtj1(i, j, itu1, itu1) = -fact
                     end do
                 end do
             case (jMax)
                 do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                        bmtj2(i, j, itu1, itu1) = -saRoughFact(i, jl, j)
+                        call saRoughFact(i, jl, j, fact)
+                        bmtj2(i, j, itu1, itu1) = -fact
                     end do
                 end do
 
             case (kMin)
                 do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                        bmtk1(i, j, itu1, itu1) = -saRoughFact(i, j, 2)
+                        call saRoughFact(i, j, 2, fact)
+                        bmtk1(i, j, itu1, itu1) = -fact
                     end do
                 end do
 
             case (kMax)
                 do j = BCData(nn)%jcBeg, BCData(nn)%jcEnd
                     do i = BCData(nn)%icBeg, BCData(nn)%icEnd
-                        bmtk2(i, j, itu1, itu1) = -saRoughFact(i, j, kl)
+                        call saRoughFact(i, j, kl, fact)
+                        bmtk2(i, j, itu1, itu1) = -fact
                     end do
                 end do
             end select
@@ -1385,7 +1398,7 @@ subroutine turbBCNSWall(secondHalo)
         end do bocos
     end subroutine turbBCNSWall
 
-    function saRoughFact(i, j, k)
+    subroutine saRoughFact(i, j, k, fact)
 
         ! returns either the regular SA-boundary condition
         ! or the modified Roughness-boundary condition
@@ -1395,20 +1408,18 @@ function saRoughFact(i, j, k)
         use BlockPointers, only: ks, d2wall
         implicit none
 
-        ! dummy arguments
-        real(kind=realType) :: saRoughFact
-
         ! local variablse
-        integer(kind=intType) :: i, j, k
+        integer(kind=intType), intent(in) :: i, j, k
+        real(kind=realType), intent(out) :: fact
 
         if (.not. useRoughSA) then
-            saRoughFact = -one
+            fact = -one
             return
         end if
 
-        saRoughFact = (ks(i, j, k) - d2wall(i, j, k) / 0.03_realType) / &
-                      (ks(i, j, k) + d2wall(i, j, k) / 0.03_realType)
+        fact = (ks(i, j, k) - d2wall(i, j, k) / 0.03_realType) / &
+               (ks(i, j, k) + d2wall(i, j, k) / 0.03_realType)
 
-    end function saRoughFact
+    end subroutine saRoughFact
 
 end module turbBCRoutines
diff --git a/tests/reg_tests/test_jacVecProdBWDFast.py b/tests/reg_tests/test_jacVecProdBWDFast.py
index 67d4c4a71..f56091c57 100644
--- a/tests/reg_tests/test_jacVecProdBWDFast.py
+++ b/tests/reg_tests/test_jacVecProdBWDFast.py
@@ -33,6 +33,40 @@
         "aero_prob": copy.deepcopy(ap_tutorial_wing),
         "N_PROCS": 1,
     },
+
+    {
+        "name": "rough_sa_tut_wing_1core",
+        "options": {
+            "gridFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
+            "restartFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
+                "equationType": "RANS",
+                "useBlockettes": False,
+                "useRoughSA": True,
+                "MGCycle": "2w",
+                "equationType": "RANS",
+                "smoother": "DADI",
+                "CFL": 1.5,
+                "CFLCoarse": 1.25,
+                "resAveraging": "never",
+                "nSubiter": 3,
+                "nSubiterTurb": 3,
+                "nCyclesCoarse": 100,
+                "nCycles": 1000,
+                "monitorVariables": ["cpu", "resrho", "resturb", "cl", "cd", "cmz", "yplus", "totalr"],
+                "useNKsolver": True,
+                "ANKSwitchTol": 1e-2,
+                "ANKSecondordSwitchTol": 1e-2,
+                "L2Convergence": 1e-15,
+                "NKSwitchTol": 1e-5,
+                "adjointL2Convergence": 1e-16,
+                "blockSplitting": True,
+                "NKjacobianlag": 2,
+
+        },
+        "ref_file": "funcs_rans_rough_sa.json",
+        "aero_prob": copy.deepcopy(ap_tutorial_wing),
+        "N_PROCS": 1,
+    },
 ]
 
 
diff --git a/tests/reg_tests/test_jacVecProdFWD.py b/tests/reg_tests/test_jacVecProdFWD.py
index 5b8ac34a8..bf5b1b9e4 100644
--- a/tests/reg_tests/test_jacVecProdFWD.py
+++ b/tests/reg_tests/test_jacVecProdFWD.py
@@ -35,6 +35,41 @@
         "aero_prob": copy.deepcopy(ap_tutorial_wing),
         "N_PROCS": 1,
     },
+
+    {
+        "name": "rough_sa_tut_wing_1core",
+        "options": {
+            "gridFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
+            "restartFile": os.path.join(baseDir, "../../input_files/mdo_tutorial_rough.cgns"),
+                "equationType": "RANS",
+                "useBlockettes": False,
+                "useRoughSA": True,
+                "MGCycle": "2w",
+                "equationType": "RANS",
+                "smoother": "DADI",
+                "CFL": 1.5,
+                "CFLCoarse": 1.25,
+                "resAveraging": "never",
+                "nSubiter": 3,
+                "nSubiterTurb": 3,
+                "nCyclesCoarse": 100,
+                "nCycles": 1000,
+                "monitorVariables": ["cpu", "resrho", "resturb", "cl", "cd", "cmz", "yplus", "totalr"],
+                "useNKsolver": True,
+                "ANKSwitchTol": 1e-2,
+                "ANKSecondordSwitchTol": 1e-2,
+                "L2Convergence": 1e-15,
+                "NKSwitchTol": 1e-5,
+                "adjointL2Convergence": 1e-16,
+                "blockSplitting": True,
+                "NKjacobianlag": 2,
+
+        },
+        "ref_file": "funcs_rans_rough_sa.json",
+        "aero_prob": copy.deepcopy(ap_tutorial_wing),
+        "N_PROCS": 1,
+    },
+
 ]
 
 

From 1c76920e198059ba6bd78eca5bf6ba22ec734cfa Mon Sep 17 00:00:00 2001
From: DavidAnderegg <andv@zhaw.ch>
Date: Thu, 10 Oct 2024 08:27:34 +0200
Subject: [PATCH 57/60] partials are wrong, but not crashing

---
 src/adjoint/Makefile_tapenade                 |   2 +-
 .../outputForward/turbBCRoutines_d.f90        |  96 +++++++++---
 src/adjoint/outputForward/turbUtils_d.f90     |   5 +-
 .../outputReverse/turbBCRoutines_b.f90        | 140 ++++++++++++++++--
 src/adjoint/outputReverse/turbUtils_b.f90     |   5 +-
 5 files changed, 211 insertions(+), 37 deletions(-)

diff --git a/src/adjoint/Makefile_tapenade b/src/adjoint/Makefile_tapenade
index d0077a2ea..a53c42709 100644
--- a/src/adjoint/Makefile_tapenade
+++ b/src/adjoint/Makefile_tapenade
@@ -162,7 +162,7 @@ BCRoutines%bcSubsonicInflow(ww0, ww1, ww2, pp0, pp1, pp2, rlv0, rlv1, rlv2, rev0
 BCRoutines%bcSubsonicOutflow(ww0, ww1, ww2, pp0, pp1, pp2, rlv0, rlv1, rlv2, rev0, rev1, rev2, bcData%norm, bcData%Ps) > \
                             (ww0, ww1, ww2, pp0, pp1, pp2, rlv0, rlv1, rlv2, rev0, rev1, rev2, bcData%norm, bcData%Ps) \
 \
-turbBCRoutines%applyAllTurbBCThisBlock(rev, w, bvtj1, bvtj2, bvtk1, bvtk2, bvti1, bvti2) > \
+turbBCRoutines%applyAllTurbBCThisBlock(rev, w, bvtj1, bvtj2, bvtk1, bvtk2, bvti1, bvti2, bmti1, bmti2, bmtj1, bmtj2, bmtk1, bmtk2) > \
                                       (rev, w) \
 \
 turbBCRoutines%bcTurbTreatment(w, rlv, d2wall, winf) > \
diff --git a/src/adjoint/outputForward/turbBCRoutines_d.f90 b/src/adjoint/outputForward/turbBCRoutines_d.f90
index da1446f55..b6d5d2869 100644
--- a/src/adjoint/outputForward/turbBCRoutines_d.f90
+++ b/src/adjoint/outputForward/turbBCRoutines_d.f90
@@ -7,12 +7,16 @@ module turbbcroutines_d
 contains
 !  differentiation of applyallturbbcthisblock in forward (tangent) mode (with options i4 dr8 r8):
 !   variations   of useful results: *rev *w
-!   with respect to varying inputs: *rev *bvtj1 *bvtj2 *w *bvtk1
-!                *bvtk2 *bvti1 *bvti2
+!   with respect to varying inputs: *rev *bvtj1 *bvtj2 *w *bmtk1
+!                *bmtk2 *bvtk1 *bvtk2 *bmti1 *bmti2 *bvti1 *bvti2
+!                *bmtj1 *bmtj2
 !   rw status of diff variables: *rev:in-out *bvtj1:in *bvtj2:in
-!                *w:in-out *bvtk1:in *bvtk2:in *bvti1:in *bvti2:in
+!                *w:in-out *bmtk1:in *bmtk2:in *bvtk1:in *bvtk2:in
+!                *bmti1:in *bmti2:in *bvti1:in *bvti2:in *bmtj1:in
+!                *bmtj2:in
 !   plus diff mem management of: rev:in bvtj1:in bvtj2:in w:in
-!                bvtk1:in bvtk2:in bvti1:in bvti2:in
+!                bmtk1:in bmtk2:in bvtk1:in bvtk2:in bmti1:in bmti2:in
+!                bvti1:in bvti2:in bmtj1:in bmtj2:in
 !      ==================================================================
   subroutine applyallturbbcthisblock_d(secondhalo)
 !
@@ -36,6 +40,7 @@ subroutine applyallturbbcthisblock_d(secondhalo)
     real(kind=realtype), dimension(:, :, :, :), pointer :: bmt
     real(kind=realtype), dimension(:, :, :), pointer :: bvt, ww1, ww2
     real(kind=realtype) :: temp
+    real(kind=realtype) :: temp0
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! loop over the faces and set the state in
@@ -49,9 +54,11 @@ subroutine applyallturbbcthisblock_d(secondhalo)
                 wd(1, i, j, l) = bvti1d(i, j, l)
                 w(1, i, j, l) = bvti1(i, j, l)
                 do m=nt1,nt2
-                  temp = bmti1(i, j, l, m)
-                  wd(1, i, j, l) = wd(1, i, j, l) - temp*wd(2, i, j, m)
-                  w(1, i, j, l) = w(1, i, j, l) - temp*w(2, i, j, m)
+                  temp = w(2, i, j, m)
+                  temp0 = bmti1(i, j, l, m)
+                  wd(1, i, j, l) = wd(1, i, j, l) - temp*bmti1d(i, j, l&
+&                   , m) - temp0*wd(2, i, j, m)
+                  w(1, i, j, l) = w(1, i, j, l) - temp0*temp
                 end do
               end do
             end do
@@ -63,10 +70,11 @@ subroutine applyallturbbcthisblock_d(secondhalo)
                 wd(ie, i, j, l) = bvti2d(i, j, l)
                 w(ie, i, j, l) = bvti2(i, j, l)
                 do m=nt1,nt2
+                  temp0 = w(il, i, j, m)
                   temp = bmti2(i, j, l, m)
-                  wd(ie, i, j, l) = wd(ie, i, j, l) - temp*wd(il, i, j, &
-&                   m)
-                  w(ie, i, j, l) = w(ie, i, j, l) - temp*w(il, i, j, m)
+                  wd(ie, i, j, l) = wd(ie, i, j, l) - temp0*bmti2d(i, j&
+&                   , l, m) - temp*wd(il, i, j, m)
+                  w(ie, i, j, l) = w(ie, i, j, l) - temp*temp0
                 end do
               end do
             end do
@@ -78,9 +86,11 @@ subroutine applyallturbbcthisblock_d(secondhalo)
                 wd(i, 1, j, l) = bvtj1d(i, j, l)
                 w(i, 1, j, l) = bvtj1(i, j, l)
                 do m=nt1,nt2
+                  temp0 = w(i, 2, j, m)
                   temp = bmtj1(i, j, l, m)
-                  wd(i, 1, j, l) = wd(i, 1, j, l) - temp*wd(i, 2, j, m)
-                  w(i, 1, j, l) = w(i, 1, j, l) - temp*w(i, 2, j, m)
+                  wd(i, 1, j, l) = wd(i, 1, j, l) - temp0*bmtj1d(i, j, l&
+&                   , m) - temp*wd(i, 2, j, m)
+                  w(i, 1, j, l) = w(i, 1, j, l) - temp*temp0
                 end do
               end do
             end do
@@ -92,10 +102,11 @@ subroutine applyallturbbcthisblock_d(secondhalo)
                 wd(i, je, j, l) = bvtj2d(i, j, l)
                 w(i, je, j, l) = bvtj2(i, j, l)
                 do m=nt1,nt2
+                  temp0 = w(i, jl, j, m)
                   temp = bmtj2(i, j, l, m)
-                  wd(i, je, j, l) = wd(i, je, j, l) - temp*wd(i, jl, j, &
-&                   m)
-                  w(i, je, j, l) = w(i, je, j, l) - temp*w(i, jl, j, m)
+                  wd(i, je, j, l) = wd(i, je, j, l) - temp0*bmtj2d(i, j&
+&                   , l, m) - temp*wd(i, jl, j, m)
+                  w(i, je, j, l) = w(i, je, j, l) - temp*temp0
                 end do
               end do
             end do
@@ -107,9 +118,11 @@ subroutine applyallturbbcthisblock_d(secondhalo)
                 wd(i, j, 1, l) = bvtk1d(i, j, l)
                 w(i, j, 1, l) = bvtk1(i, j, l)
                 do m=nt1,nt2
+                  temp0 = w(i, j, 2, m)
                   temp = bmtk1(i, j, l, m)
-                  wd(i, j, 1, l) = wd(i, j, 1, l) - temp*wd(i, j, 2, m)
-                  w(i, j, 1, l) = w(i, j, 1, l) - temp*w(i, j, 2, m)
+                  wd(i, j, 1, l) = wd(i, j, 1, l) - temp0*bmtk1d(i, j, l&
+&                   , m) - temp*wd(i, j, 2, m)
+                  w(i, j, 1, l) = w(i, j, 1, l) - temp*temp0
                 end do
               end do
             end do
@@ -121,10 +134,11 @@ subroutine applyallturbbcthisblock_d(secondhalo)
                 wd(i, j, ke, l) = bvtk2d(i, j, l)
                 w(i, j, ke, l) = bvtk2(i, j, l)
                 do m=nt1,nt2
+                  temp0 = w(i, j, kl, m)
                   temp = bmtk2(i, j, l, m)
-                  wd(i, j, ke, l) = wd(i, j, ke, l) - temp*wd(i, j, kl, &
-&                   m)
-                  w(i, j, ke, l) = w(i, j, ke, l) - temp*w(i, j, kl, m)
+                  wd(i, j, ke, l) = wd(i, j, ke, l) - temp0*bmtk2d(i, j&
+&                   , l, m) - temp*wd(i, j, kl, m)
+                  w(i, j, ke, l) = w(i, j, ke, l) - temp*temp0
                 end do
               end do
             end do
@@ -598,16 +612,22 @@ subroutine bcturbfarfield_d(nn)
           do l=nt1,nt2
             select case  (bcfaceid(nn)) 
             case (imin) 
+              bmti1d(i, j, l, l) = 0.0_8
               bmti1(i, j, l, l) = -one
             case (imax) 
+              bmti2d(i, j, l, l) = 0.0_8
               bmti2(i, j, l, l) = -one
             case (jmin) 
+              bmtj1d(i, j, l, l) = 0.0_8
               bmtj1(i, j, l, l) = -one
             case (jmax) 
+              bmtj2d(i, j, l, l) = 0.0_8
               bmtj2(i, j, l, l) = -one
             case (kmin) 
+              bmtk1d(i, j, l, l) = 0.0_8
               bmtk1(i, j, l, l) = -one
             case (kmax) 
+              bmtk2d(i, j, l, l) = 0.0_8
               bmtk2(i, j, l, l) = -one
             end select
           end do
@@ -1005,7 +1025,9 @@ subroutine bcturbtreatment_d()
       do j=1,je
         do l=nt1,nt2
           do m=nt1,nt2
+            bmti1d(j, k, l, m) = 0.0_8
             bmti1(j, k, l, m) = zero
+            bmti2d(j, k, l, m) = 0.0_8
             bmti2(j, k, l, m) = zero
           end do
           bvti1d(j, k, l) = 0.0_8
@@ -1019,7 +1041,9 @@ subroutine bcturbtreatment_d()
       do i=1,ie
         do l=nt1,nt2
           do m=nt1,nt2
+            bmtj1d(i, k, l, m) = 0.0_8
             bmtj1(i, k, l, m) = zero
+            bmtj2d(i, k, l, m) = 0.0_8
             bmtj2(i, k, l, m) = zero
           end do
           bvtj1d(i, k, l) = 0.0_8
@@ -1033,7 +1057,9 @@ subroutine bcturbtreatment_d()
       do i=1,ie
         do l=nt1,nt2
           do m=nt1,nt2
+            bmtk1d(i, j, l, m) = 0.0_8
             bmtk1(i, j, l, m) = zero
+            bmtk2d(i, j, l, m) = 0.0_8
             bmtk2(i, j, l, m) = zero
           end do
           bvtk1d(i, j, l) = 0.0_8
@@ -1235,6 +1261,7 @@ subroutine bcturbwall_d(nn)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             call saroughfact(2, i, j, fact)
+            bmti1d(i, j, itu1, itu1) = 0.0_8
             bmti1(i, j, itu1, itu1) = -fact
           end do
         end do
@@ -1242,6 +1269,7 @@ subroutine bcturbwall_d(nn)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             call saroughfact(il, i, j, fact)
+            bmti2d(i, j, itu1, itu1) = 0.0_8
             bmti2(i, j, itu1, itu1) = -fact
           end do
         end do
@@ -1249,6 +1277,7 @@ subroutine bcturbwall_d(nn)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             call saroughfact(i, 2, j, fact)
+            bmtj1d(i, j, itu1, itu1) = 0.0_8
             bmtj1(i, j, itu1, itu1) = -fact
           end do
         end do
@@ -1256,6 +1285,7 @@ subroutine bcturbwall_d(nn)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             call saroughfact(i, jl, j, fact)
+            bmtj2d(i, j, itu1, itu1) = 0.0_8
             bmtj2(i, j, itu1, itu1) = -fact
           end do
         end do
@@ -1263,6 +1293,7 @@ subroutine bcturbwall_d(nn)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             call saroughfact(i, j, 2, fact)
+            bmtk1d(i, j, itu1, itu1) = 0.0_8
             bmtk1(i, j, itu1, itu1) = -fact
           end do
         end do
@@ -1270,6 +1301,7 @@ subroutine bcturbwall_d(nn)
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
             call saroughfact(i, j, kl, fact)
+            bmtk2d(i, j, itu1, itu1) = 0.0_8
             bmtk2(i, j, itu1, itu1) = -fact
           end do
         end do
@@ -1319,7 +1351,9 @@ subroutine bcturbwall_d(nn)
             temp0 = one/(rkwbeta1*(d2wall(2, ii, jj)*d2wall(2, ii, jj)))
             tmpdd = -(temp0*2*d2walld(2, ii, jj)/d2wall(2, ii, jj))
             tmpd = temp0
+            bmti1d(i, j, itu1, itu1) = 0.0_8
             bmti1(i, j, itu1, itu1) = one
+            bmti1d(i, j, itu2, itu2) = 0.0_8
             bmti1(i, j, itu2, itu2) = one
             bvti1d(i, j, itu2) = two*60.0_realtype*(tmpd*nud+nu*tmpdd)
             bvti1(i, j, itu2) = two*60.0_realtype*nu*tmpd
@@ -1358,7 +1392,9 @@ subroutine bcturbwall_d(nn)
 &             ))
             tmpdd = -(temp0*2*d2walld(il, ii, jj)/d2wall(il, ii, jj))
             tmpd = temp0
+            bmti2d(i, j, itu1, itu1) = 0.0_8
             bmti2(i, j, itu1, itu1) = one
+            bmti2d(i, j, itu2, itu2) = 0.0_8
             bmti2(i, j, itu2, itu2) = one
             bvti2d(i, j, itu2) = two*60.0_realtype*(tmpd*nud+nu*tmpdd)
             bvti2(i, j, itu2) = two*60.0_realtype*nu*tmpd
@@ -1396,7 +1432,9 @@ subroutine bcturbwall_d(nn)
             temp0 = one/(rkwbeta1*(d2wall(ii, 2, jj)*d2wall(ii, 2, jj)))
             tmpdd = -(temp0*2*d2walld(ii, 2, jj)/d2wall(ii, 2, jj))
             tmpd = temp0
+            bmtj1d(i, j, itu1, itu1) = 0.0_8
             bmtj1(i, j, itu1, itu1) = one
+            bmtj1d(i, j, itu2, itu2) = 0.0_8
             bmtj1(i, j, itu2, itu2) = one
             bvtj1d(i, j, itu2) = two*60.0_realtype*(tmpd*nud+nu*tmpdd)
             bvtj1(i, j, itu2) = two*60.0_realtype*nu*tmpd
@@ -1435,7 +1473,9 @@ subroutine bcturbwall_d(nn)
 &             ))
             tmpdd = -(temp0*2*d2walld(ii, jl, jj)/d2wall(ii, jl, jj))
             tmpd = temp0
+            bmtj2d(i, j, itu1, itu1) = 0.0_8
             bmtj2(i, j, itu1, itu1) = one
+            bmtj2d(i, j, itu2, itu2) = 0.0_8
             bmtj2(i, j, itu2, itu2) = one
             bvtj2d(i, j, itu2) = two*60.0_realtype*(tmpd*nud+nu*tmpdd)
             bvtj2(i, j, itu2) = two*60.0_realtype*nu*tmpd
@@ -1473,7 +1513,9 @@ subroutine bcturbwall_d(nn)
             temp0 = one/(rkwbeta1*(d2wall(ii, jj, 2)*d2wall(ii, jj, 2)))
             tmpdd = -(temp0*2*d2walld(ii, jj, 2)/d2wall(ii, jj, 2))
             tmpd = temp0
+            bmtk1d(i, j, itu1, itu1) = 0.0_8
             bmtk1(i, j, itu1, itu1) = one
+            bmtk1d(i, j, itu2, itu2) = 0.0_8
             bmtk1(i, j, itu2, itu2) = one
             bvtk1d(i, j, itu2) = two*60.0_realtype*(tmpd*nud+nu*tmpdd)
             bvtk1(i, j, itu2) = two*60.0_realtype*nu*tmpd
@@ -1512,7 +1554,9 @@ subroutine bcturbwall_d(nn)
 &             ))
             tmpdd = -(temp0*2*d2walld(ii, jj, kl)/d2wall(ii, jj, kl))
             tmpd = temp0
+            bmtk2d(i, j, itu1, itu1) = 0.0_8
             bmtk2(i, j, itu1, itu1) = one
+            bmtk2d(i, j, itu2, itu2) = 0.0_8
             bmtk2(i, j, itu2, itu2) = one
             bvtk2d(i, j, itu2) = two*60.0_realtype*(tmpd*nud+nu*tmpdd)
             bvtk2(i, j, itu2) = two*60.0_realtype*nu*tmpd
@@ -1527,42 +1571,54 @@ subroutine bcturbwall_d(nn)
       case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+            bmti1d(i, j, itu1, itu1) = 0.0_8
             bmti1(i, j, itu1, itu1) = one
+            bmti1d(i, j, itu2, itu2) = 0.0_8
             bmti1(i, j, itu2, itu2) = one
           end do
         end do
       case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+            bmti2d(i, j, itu1, itu1) = 0.0_8
             bmti2(i, j, itu1, itu1) = one
+            bmti2d(i, j, itu2, itu2) = 0.0_8
             bmti2(i, j, itu2, itu2) = one
           end do
         end do
       case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+            bmtj1d(i, j, itu1, itu1) = 0.0_8
             bmtj1(i, j, itu1, itu1) = one
+            bmtj1d(i, j, itu2, itu2) = 0.0_8
             bmtj1(i, j, itu2, itu2) = one
           end do
         end do
       case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+            bmtj2d(i, j, itu1, itu1) = 0.0_8
             bmtj2(i, j, itu1, itu1) = one
+            bmtj2d(i, j, itu2, itu2) = 0.0_8
             bmtj2(i, j, itu2, itu2) = one
           end do
         end do
       case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+            bmtk1d(i, j, itu1, itu1) = 0.0_8
             bmtk1(i, j, itu1, itu1) = one
+            bmtk1d(i, j, itu2, itu2) = 0.0_8
             bmtk1(i, j, itu2, itu2) = one
           end do
         end do
       case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+            bmtk2d(i, j, itu1, itu1) = 0.0_8
             bmtk2(i, j, itu1, itu1) = one
+            bmtk2d(i, j, itu2, itu2) = 0.0_8
             bmtk2(i, j, itu2, itu2) = one
           end do
         end do
diff --git a/src/adjoint/outputForward/turbUtils_d.f90 b/src/adjoint/outputForward/turbUtils_d.f90
index 274215a60..d2cb16708 100644
--- a/src/adjoint/outputForward/turbUtils_d.f90
+++ b/src/adjoint/outputForward/turbUtils_d.f90
@@ -884,8 +884,9 @@ subroutine turbadvection_d(madv, nadv, offset, qq)
     use constants
     use blockpointers, only : nx, ny, nz, il, jl, kl, vol, vold, &
 &   sfacei, sfaceid, sfacej, sfacejd, sfacek, sfacekd, w, wd, si, sid, &
-&   sj, sjd, sk, skd, addgridvelocities, bmti1, bmti2, bmtj1, bmtj2, &
-&   bmtk1, bmtk2, scratch, scratchd
+&   sj, sjd, sk, skd, addgridvelocities, bmti1, bmti1d, bmti2, bmti2d, &
+&   bmtj1, bmtj1d, bmtj2, bmtj2d, bmtk1, bmtk1d, bmtk2, bmtk2d, scratch,&
+&   scratchd
     use inputdiscretization, only : orderturb
     use iteration, only : groundlevel
     use turbmod, only : secondord
diff --git a/src/adjoint/outputReverse/turbBCRoutines_b.f90 b/src/adjoint/outputReverse/turbBCRoutines_b.f90
index aa87f25bb..83aa7aea6 100644
--- a/src/adjoint/outputReverse/turbBCRoutines_b.f90
+++ b/src/adjoint/outputReverse/turbBCRoutines_b.f90
@@ -7,12 +7,16 @@ module turbbcroutines_b
 contains
 !  differentiation of applyallturbbcthisblock in reverse (adjoint) mode (with options noisize i4 dr8 r8):
 !   gradient     of useful results: *rev *w
-!   with respect to varying inputs: *rev *bvtj1 *bvtj2 *w *bvtk1
-!                *bvtk2 *bvti1 *bvti2
+!   with respect to varying inputs: *rev *bvtj1 *bvtj2 *w *bmtk1
+!                *bmtk2 *bvtk1 *bvtk2 *bmti1 *bmti2 *bvti1 *bvti2
+!                *bmtj1 *bmtj2
 !   rw status of diff variables: *rev:in-out *bvtj1:out *bvtj2:out
-!                *w:in-out *bvtk1:out *bvtk2:out *bvti1:out *bvti2:out
+!                *w:in-out *bmtk1:out *bmtk2:out *bvtk1:out *bvtk2:out
+!                *bmti1:out *bmti2:out *bvti1:out *bvti2:out *bmtj1:out
+!                *bmtj2:out
 !   plus diff mem management of: rev:in bvtj1:in bvtj2:in w:in
-!                bvtk1:in bvtk2:in bvti1:in bvti2:in
+!                bmtk1:in bmtk2:in bvtk1:in bvtk2:in bmti1:in bmti2:in
+!                bvti1:in bvti2:in bmtj1:in bmtj2:in
 !      ==================================================================
   subroutine applyallturbbcthisblock_b(secondhalo)
 !
@@ -41,6 +45,7 @@ subroutine applyallturbbcthisblock_b(secondhalo)
     real(kind=realtype) :: tmpd0
     real(kind=realtype) :: tmp1
     real(kind=realtype) :: tmpd1
+    integer :: branch
     integer :: ad_from
     integer :: ad_to
     integer :: ad_from0
@@ -65,7 +70,6 @@ subroutine applyallturbbcthisblock_b(secondhalo)
     integer :: ad_to9
     integer :: ad_from10
     integer :: ad_to10
-    integer :: branch
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! loop over the faces and set the state in
@@ -78,7 +82,17 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           ad_from0 = bcdata(nn)%jcbeg
           do j=ad_from0,bcdata(nn)%jcend
             ad_from = bcdata(nn)%icbeg
-            i = bcdata(nn)%icend + 1
+            do i=ad_from,bcdata(nn)%icend
+              do l=nt1,nt2
+                call pushreal8(w(1, i, j, l))
+                w(1, i, j, l) = bvti1(i, j, l)
+                do m=nt1,nt2
+                  call pushreal8(w(1, i, j, l))
+                  w(1, i, j, l) = w(1, i, j, l) - bmti1(i, j, l, m)*w(2&
+&                   , i, j, m)
+                end do
+              end do
+            end do
             call pushinteger4(i - 1)
             call pushinteger4(ad_from)
           end do
@@ -89,7 +103,18 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           ad_from2 = bcdata(nn)%jcbeg
           do j=ad_from2,bcdata(nn)%jcend
             ad_from1 = bcdata(nn)%icbeg
-            i = bcdata(nn)%icend + 1
+            do i=ad_from1,bcdata(nn)%icend
+              do l=nt1,nt2
+                call pushreal8(w(ie, i, j, l))
+                w(ie, i, j, l) = bvti2(i, j, l)
+                do m=nt1,nt2
+                  tmp = w(ie, i, j, l) - bmti2(i, j, l, m)*w(il, i, j, m&
+&                   )
+                  call pushreal8(w(ie, i, j, l))
+                  w(ie, i, j, l) = tmp
+                end do
+              end do
+            end do
             call pushinteger4(i - 1)
             call pushinteger4(ad_from1)
           end do
@@ -100,7 +125,17 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           ad_from4 = bcdata(nn)%jcbeg
           do j=ad_from4,bcdata(nn)%jcend
             ad_from3 = bcdata(nn)%icbeg
-            i = bcdata(nn)%icend + 1
+            do i=ad_from3,bcdata(nn)%icend
+              do l=nt1,nt2
+                call pushreal8(w(i, 1, j, l))
+                w(i, 1, j, l) = bvtj1(i, j, l)
+                do m=nt1,nt2
+                  call pushreal8(w(i, 1, j, l))
+                  w(i, 1, j, l) = w(i, 1, j, l) - bmtj1(i, j, l, m)*w(i&
+&                   , 2, j, m)
+                end do
+              end do
+            end do
             call pushinteger4(i - 1)
             call pushinteger4(ad_from3)
           end do
@@ -111,7 +146,18 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           ad_from6 = bcdata(nn)%jcbeg
           do j=ad_from6,bcdata(nn)%jcend
             ad_from5 = bcdata(nn)%icbeg
-            i = bcdata(nn)%icend + 1
+            do i=ad_from5,bcdata(nn)%icend
+              do l=nt1,nt2
+                call pushreal8(w(i, je, j, l))
+                w(i, je, j, l) = bvtj2(i, j, l)
+                do m=nt1,nt2
+                  tmp0 = w(i, je, j, l) - bmtj2(i, j, l, m)*w(i, jl, j, &
+&                   m)
+                  call pushreal8(w(i, je, j, l))
+                  w(i, je, j, l) = tmp0
+                end do
+              end do
+            end do
             call pushinteger4(i - 1)
             call pushinteger4(ad_from5)
           end do
@@ -122,7 +168,17 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           ad_from8 = bcdata(nn)%jcbeg
           do j=ad_from8,bcdata(nn)%jcend
             ad_from7 = bcdata(nn)%icbeg
-            i = bcdata(nn)%icend + 1
+            do i=ad_from7,bcdata(nn)%icend
+              do l=nt1,nt2
+                call pushreal8(w(i, j, 1, l))
+                w(i, j, 1, l) = bvtk1(i, j, l)
+                do m=nt1,nt2
+                  call pushreal8(w(i, j, 1, l))
+                  w(i, j, 1, l) = w(i, j, 1, l) - bmtk1(i, j, l, m)*w(i&
+&                   , j, 2, m)
+                end do
+              end do
+            end do
             call pushinteger4(i - 1)
             call pushinteger4(ad_from7)
           end do
@@ -133,7 +189,18 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           ad_from10 = bcdata(nn)%jcbeg
           do j=ad_from10,bcdata(nn)%jcend
             ad_from9 = bcdata(nn)%icbeg
-            i = bcdata(nn)%icend + 1
+            do i=ad_from9,bcdata(nn)%icend
+              do l=nt1,nt2
+                call pushreal8(w(i, j, ke, l))
+                w(i, j, ke, l) = bvtk2(i, j, l)
+                do m=nt1,nt2
+                  tmp1 = w(i, j, ke, l) - bmtk2(i, j, l, m)*w(i, j, kl, &
+&                   m)
+                  call pushreal8(w(i, j, ke, l))
+                  w(i, j, ke, l) = tmp1
+                end do
+              end do
+            end do
             call pushinteger4(i - 1)
             call pushinteger4(ad_from9)
           end do
@@ -150,8 +217,14 @@ subroutine applyallturbbcthisblock_b(secondhalo)
       if (eddymodel) then
         if (bctype(nn) .eq. nswalladiabatic .or. bctype(nn) .eq. &
 &           nswallisothermal) then
+! viscous wall boundary condition. eddy viscosity is
+! zero at the wall.
+          call bceddywall(nn)
           call pushcontrol2b(0)
         else
+! any boundary condition but viscous wall. a homogeneous
+! neumann condition is applied to the eddy viscosity.
+          call bceddynowall(nn)
           call pushcontrol2b(1)
         end if
       else
@@ -160,6 +233,14 @@ subroutine applyallturbbcthisblock_b(secondhalo)
 ! extrapolate the turbulent variables in case a second halo
 ! is needed.
       if (secondhalo) then
+        if (associated(w)) then
+          call pushreal8array(w, size(w, 1)*size(w, 2)*size(w, 3)*size(w&
+&                       , 4))
+          call pushcontrol1b(1)
+        else
+          call pushcontrol1b(0)
+        end if
+        call turb2ndhalo(nn)
         call pushcontrol1b(1)
       else
         call pushcontrol1b(0)
@@ -167,13 +248,24 @@ subroutine applyallturbbcthisblock_b(secondhalo)
     end do bocos
     if (associated(bvtj1d)) bvtj1d = 0.0_8
     if (associated(bvtj2d)) bvtj2d = 0.0_8
+    if (associated(bmtk1d)) bmtk1d = 0.0_8
+    if (associated(bmtk2d)) bmtk2d = 0.0_8
     if (associated(bvtk1d)) bvtk1d = 0.0_8
     if (associated(bvtk2d)) bvtk2d = 0.0_8
+    if (associated(bmti1d)) bmti1d = 0.0_8
+    if (associated(bmti2d)) bmti2d = 0.0_8
     if (associated(bvti1d)) bvti1d = 0.0_8
     if (associated(bvti2d)) bvti2d = 0.0_8
+    if (associated(bmtj1d)) bmtj1d = 0.0_8
+    if (associated(bmtj2d)) bmtj2d = 0.0_8
     do nn=nbocos,1,-1
       call popcontrol1b(branch)
-      if (branch .ne. 0) call turb2ndhalo_b(nn)
+      if (branch .ne. 0) then
+        call popcontrol1b(branch)
+        if (branch .eq. 1) call popreal8array(w, size(w, 1)*size(w, 2)*&
+&                                       size(w, 3)*size(w, 4))
+        call turb2ndhalo_b(nn)
+      end if
       call popcontrol2b(branch)
       if (branch .eq. 0) then
         call bceddywall_b(nn)
@@ -192,9 +284,13 @@ subroutine applyallturbbcthisblock_b(secondhalo)
               do i=ad_to,ad_from,-1
                 do l=nt2,nt1,-1
                   do m=nt2,nt1,-1
+                    call popreal8(w(1, i, j, l))
+                    bmti1d(i, j, l, m) = bmti1d(i, j, l, m) - w(2, i, j&
+&                     , m)*wd(1, i, j, l)
                     wd(2, i, j, m) = wd(2, i, j, m) - bmti1(i, j, l, m)*&
 &                     wd(1, i, j, l)
                   end do
+                  call popreal8(w(1, i, j, l))
                   bvti1d(i, j, l) = bvti1d(i, j, l) + wd(1, i, j, l)
                   wd(1, i, j, l) = 0.0_8
                 end do
@@ -209,11 +305,15 @@ subroutine applyallturbbcthisblock_b(secondhalo)
               do i=ad_to1,ad_from1,-1
                 do l=nt2,nt1,-1
                   do m=nt2,nt1,-1
+                    call popreal8(w(ie, i, j, l))
                     tmpd = wd(ie, i, j, l)
                     wd(ie, i, j, l) = tmpd
+                    bmti2d(i, j, l, m) = bmti2d(i, j, l, m) - w(il, i, j&
+&                     , m)*tmpd
                     wd(il, i, j, m) = wd(il, i, j, m) - bmti2(i, j, l, m&
 &                     )*tmpd
                   end do
+                  call popreal8(w(ie, i, j, l))
                   bvti2d(i, j, l) = bvti2d(i, j, l) + wd(ie, i, j, l)
                   wd(ie, i, j, l) = 0.0_8
                 end do
@@ -231,9 +331,13 @@ subroutine applyallturbbcthisblock_b(secondhalo)
             do i=ad_to3,ad_from3,-1
               do l=nt2,nt1,-1
                 do m=nt2,nt1,-1
+                  call popreal8(w(i, 1, j, l))
+                  bmtj1d(i, j, l, m) = bmtj1d(i, j, l, m) - w(i, 2, j, m&
+&                   )*wd(i, 1, j, l)
                   wd(i, 2, j, m) = wd(i, 2, j, m) - bmtj1(i, j, l, m)*wd&
 &                   (i, 1, j, l)
                 end do
+                call popreal8(w(i, 1, j, l))
                 bvtj1d(i, j, l) = bvtj1d(i, j, l) + wd(i, 1, j, l)
                 wd(i, 1, j, l) = 0.0_8
               end do
@@ -248,11 +352,15 @@ subroutine applyallturbbcthisblock_b(secondhalo)
             do i=ad_to5,ad_from5,-1
               do l=nt2,nt1,-1
                 do m=nt2,nt1,-1
+                  call popreal8(w(i, je, j, l))
                   tmpd0 = wd(i, je, j, l)
                   wd(i, je, j, l) = tmpd0
+                  bmtj2d(i, j, l, m) = bmtj2d(i, j, l, m) - w(i, jl, j, &
+&                   m)*tmpd0
                   wd(i, jl, j, m) = wd(i, jl, j, m) - bmtj2(i, j, l, m)*&
 &                   tmpd0
                 end do
+                call popreal8(w(i, je, j, l))
                 bvtj2d(i, j, l) = bvtj2d(i, j, l) + wd(i, je, j, l)
                 wd(i, je, j, l) = 0.0_8
               end do
@@ -268,9 +376,13 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           do i=ad_to7,ad_from7,-1
             do l=nt2,nt1,-1
               do m=nt2,nt1,-1
+                call popreal8(w(i, j, 1, l))
+                bmtk1d(i, j, l, m) = bmtk1d(i, j, l, m) - w(i, j, 2, m)*&
+&                 wd(i, j, 1, l)
                 wd(i, j, 2, m) = wd(i, j, 2, m) - bmtk1(i, j, l, m)*wd(i&
 &                 , j, 1, l)
               end do
+              call popreal8(w(i, j, 1, l))
               bvtk1d(i, j, l) = bvtk1d(i, j, l) + wd(i, j, 1, l)
               wd(i, j, 1, l) = 0.0_8
             end do
@@ -285,11 +397,15 @@ subroutine applyallturbbcthisblock_b(secondhalo)
           do i=ad_to9,ad_from9,-1
             do l=nt2,nt1,-1
               do m=nt2,nt1,-1
+                call popreal8(w(i, j, ke, l))
                 tmpd1 = wd(i, j, ke, l)
                 wd(i, j, ke, l) = tmpd1
+                bmtk2d(i, j, l, m) = bmtk2d(i, j, l, m) - w(i, j, kl, m)&
+&                 *tmpd1
                 wd(i, j, kl, m) = wd(i, j, kl, m) - bmtk2(i, j, l, m)*&
 &                 tmpd1
               end do
+              call popreal8(w(i, j, ke, l))
               bvtk2d(i, j, l) = bvtk2d(i, j, l) + wd(i, j, ke, l)
               wd(i, j, ke, l) = 0.0_8
             end do
diff --git a/src/adjoint/outputReverse/turbUtils_b.f90 b/src/adjoint/outputReverse/turbUtils_b.f90
index 50ebc309c..d42719519 100644
--- a/src/adjoint/outputReverse/turbUtils_b.f90
+++ b/src/adjoint/outputReverse/turbUtils_b.f90
@@ -906,8 +906,9 @@ subroutine turbadvection_b(madv, nadv, offset, qq)
     use constants
     use blockpointers, only : nx, ny, nz, il, jl, kl, vol, vold, &
 &   sfacei, sfaceid, sfacej, sfacejd, sfacek, sfacekd, w, wd, si, sid, &
-&   sj, sjd, sk, skd, addgridvelocities, bmti1, bmti2, bmtj1, bmtj2, &
-&   bmtk1, bmtk2, scratch, scratchd
+&   sj, sjd, sk, skd, addgridvelocities, bmti1, bmti1d, bmti2, bmti2d, &
+&   bmtj1, bmtj1d, bmtj2, bmtj2d, bmtk1, bmtk1d, bmtk2, bmtk2d, scratch,&
+&   scratchd
     use inputdiscretization, only : orderturb
     use iteration, only : groundlevel
     use turbmod, only : secondord

From bd4e3aee284e829fb81cb67b590ec75d12facffb Mon Sep 17 00:00:00 2001
From: DavidAnderegg <andv@zhaw.ch>
Date: Thu, 10 Oct 2024 10:31:43 +0200
Subject: [PATCH 58/60] partials even more wrong, but not crashing

---
 src/adjoint/Makefile_tapenade                 |   2 +-
 .../outputForward/turbBCRoutines_d.f90        | 189 ++++++++--
 .../outputReverse/turbBCRoutines_b.f90        | 357 ++++++++++++++++--
 .../turbBCRoutines_fast_b.f90                 |   8 +-
 src/turbulence/turbBCRoutines.F90             |  12 +-
 5 files changed, 497 insertions(+), 71 deletions(-)

diff --git a/src/adjoint/Makefile_tapenade b/src/adjoint/Makefile_tapenade
index a53c42709..b11030e67 100644
--- a/src/adjoint/Makefile_tapenade
+++ b/src/adjoint/Makefile_tapenade
@@ -166,7 +166,7 @@ turbBCRoutines%applyAllTurbBCThisBlock(rev, w, bvtj1, bvtj2, bvtk1, bvtk2, bvti1
                                       (rev, w) \
 \
 turbBCRoutines%bcTurbTreatment(w, rlv, d2wall, winf) > \
-                              (w, rlv, d2wall, winf, bvtj1, bvtj2, bvtk1, bvtk2, bvti1, bvti2) \
+                              (w, rlv, d2wall, winf, bvtj1, bvtj2, bvtk1, bvtk2, bvti1, bvti2, bmti1, bmti2, bmtj1, bmtj2, bmtk1, bmtk2) \
 \
 solverUtils%timeStep_block(w, pInfCorr, rhoInf, si, sj, sk, sFaceI, sFaceJ, sFaceK, p, radi, radj, radk, dtl, rlv, rev, vol) > \
               (w, pInfCorr, rhoInf, si, sj, sk, sFaceI, sFaceJ, sFaceK, p, radi, radj, radk, dtl, rlv, rev, vol) \
diff --git a/src/adjoint/outputForward/turbBCRoutines_d.f90 b/src/adjoint/outputForward/turbBCRoutines_d.f90
index b6d5d2869..a139ac136 100644
--- a/src/adjoint/outputForward/turbBCRoutines_d.f90
+++ b/src/adjoint/outputForward/turbBCRoutines_d.f90
@@ -15,8 +15,8 @@ module turbbcroutines_d
 !                *bmti1:in *bmti2:in *bvti1:in *bvti2:in *bmtj1:in
 !                *bmtj2:in
 !   plus diff mem management of: rev:in bvtj1:in bvtj2:in w:in
-!                bmtk1:in bmtk2:in bvtk1:in bvtk2:in bmti1:in bmti2:in
-!                bvti1:in bvti2:in bmtj1:in bmtj2:in
+!                bmtk1:in bmtk2:in bvtk1:in bvtk2:in d2wall:in
+!                bmti1:in bmti2:in bvti1:in bvti2:in bmtj1:in bmtj2:in
 !      ==================================================================
   subroutine applyallturbbcthisblock_d(secondhalo)
 !
@@ -422,7 +422,7 @@ end subroutine bceddynowall
 !  differentiation of bceddywall in forward (tangent) mode (with options i4 dr8 r8):
 !   variations   of useful results: *rev
 !   with respect to varying inputs: *rev
-!   plus diff mem management of: rev:in
+!   plus diff mem management of: rev:in d2wall:in
   subroutine bceddywall_d(nn)
 !
 !       bceddywall sets the eddy viscosity in the halo cells of
@@ -566,12 +566,15 @@ subroutine bceddywall(nn)
   end subroutine bceddywall
 
 !  differentiation of bcturbfarfield in forward (tangent) mode (with options i4 dr8 r8):
-!   variations   of useful results: *bvtj1 *bvtj2 *bvtk1 *bvtk2
-!                *bvti1 *bvti2
-!   with respect to varying inputs: winf *bvtj1 *bvtj2 *bvtk1 *bvtk2
-!                *bvti1 *bvti2
-!   plus diff mem management of: bvtj1:in bvtj2:in bvtk1:in bvtk2:in
-!                bvti1:in bvti2:in
+!   variations   of useful results: *bvtj1 *bvtj2 *bmtk1 *bmtk2
+!                *bvtk1 *bvtk2 *bmti1 *bmti2 *bvti1 *bvti2 *bmtj1
+!                *bmtj2
+!   with respect to varying inputs: winf *bvtj1 *bvtj2 *bmtk1 *bmtk2
+!                *bvtk1 *bvtk2 *bmti1 *bmti2 *bvti1 *bvti2 *bmtj1
+!                *bmtj2
+!   plus diff mem management of: bvtj1:in bvtj2:in bmtk1:in bmtk2:in
+!                bvtk1:in bvtk2:in bmti1:in bmti2:in bvti1:in bvti2:in
+!                bmtj1:in bmtj2:in
   subroutine bcturbfarfield_d(nn)
 !
 !       bcturbfarfield applies the implicit treatment of the
@@ -942,6 +945,65 @@ subroutine bcturboutflow(nn)
     end do
   end subroutine bcturboutflow
 
+!  differentiation of bcturbsymm in forward (tangent) mode (with options i4 dr8 r8):
+!   variations   of useful results: *bmtk1 *bmtk2 *bmti1 *bmti2
+!                *bmtj1 *bmtj2
+!   with respect to varying inputs: *bmtk1 *bmtk2 *bmti1 *bmti2
+!                *bmtj1 *bmtj2
+!   plus diff mem management of: bmtk1:in bmtk2:in bmti1:in bmti2:in
+!                bmtj1:in bmtj2:in
+  subroutine bcturbsymm_d(nn)
+!
+!       bcturbsymm applies the implicit treatment of the symmetry
+!       boundary condition (or inviscid wall) to subface nn. as the
+!       symmetry boundary condition is independent of the turbulence
+!       model, this routine is valid for all models. it is assumed
+!       that the pointers in blockpointers are already set to the
+!       correct block on the correct grid level.
+!
+    use constants
+    use blockpointers
+    use flowvarrefstate
+    implicit none
+!
+!      subroutine arguments.
+!
+    integer(kind=inttype), intent(in) :: nn
+!
+!      local variables.
+!
+    integer(kind=inttype) :: i, j, l
+! loop over the faces of the subfaces and set the values of bmt
+! for an implicit treatment. for a symmetry face this means
+! that the halo value is set to the internal value.
+    do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
+      do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+        do l=nt1,nt2
+          select case  (bcfaceid(nn)) 
+          case (imin) 
+            bmti1d(i, j, l, l) = 0.0_8
+            bmti1(i, j, l, l) = -one
+          case (imax) 
+            bmti2d(i, j, l, l) = 0.0_8
+            bmti2(i, j, l, l) = -one
+          case (jmin) 
+            bmtj1d(i, j, l, l) = 0.0_8
+            bmtj1(i, j, l, l) = -one
+          case (jmax) 
+            bmtj2d(i, j, l, l) = 0.0_8
+            bmtj2(i, j, l, l) = -one
+          case (kmin) 
+            bmtk1d(i, j, l, l) = 0.0_8
+            bmtk1(i, j, l, l) = -one
+          case (kmax) 
+            bmtk2d(i, j, l, l) = 0.0_8
+            bmtk2(i, j, l, l) = -one
+          end select
+        end do
+      end do
+    end do
+  end subroutine bcturbsymm_d
+
   subroutine bcturbsymm(nn)
 !
 !       bcturbsymm applies the implicit treatment of the symmetry
@@ -989,14 +1051,17 @@ subroutine bcturbsymm(nn)
   end subroutine bcturbsymm
 
 !  differentiation of bcturbtreatment in forward (tangent) mode (with options i4 dr8 r8):
-!   variations   of useful results: *bvtj1 *bvtj2 *bvtk1 *bvtk2
-!                *bvti1 *bvti2
+!   variations   of useful results: *bvtj1 *bvtj2 *bmtk1 *bmtk2
+!                *bvtk1 *bvtk2 *bmti1 *bmti2 *bvti1 *bvti2 *bmtj1
+!                *bmtj2
 !   with respect to varying inputs: winf *w *rlv *d2wall
 !   rw status of diff variables: winf:in *bvtj1:out *bvtj2:out
-!                *w:in *rlv:in *bvtk1:out *bvtk2:out *d2wall:in
-!                *bvti1:out *bvti2:out
-!   plus diff mem management of: bvtj1:in bvtj2:in w:in rlv:in
-!                bvtk1:in bvtk2:in d2wall:in bvti1:in bvti2:in
+!                *w:in *bmtk1:out *rlv:in *bmtk2:out *bvtk1:out
+!                *bvtk2:out *d2wall:in *bmti1:out *bmti2:out *bvti1:out
+!                *bvti2:out *bmtj1:out *bmtj2:out
+!   plus diff mem management of: bvtj1:in bvtj2:in w:in bmtk1:in
+!                rlv:in bmtk2:in bvtk1:in bvtk2:in d2wall:in bmti1:in
+!                bmti2:in bvti1:in bvti2:in bmtj1:in bmtj2:in
   subroutine bcturbtreatment_d()
 !
 !       bcturbtreatment sets the arrays bmti1, bvti1, etc, such that
@@ -1071,10 +1136,16 @@ subroutine bcturbtreatment_d()
     end do
     if (associated(bvtj1d)) bvtj1d = 0.0_8
     if (associated(bvtj2d)) bvtj2d = 0.0_8
+    if (associated(bmtk1d)) bmtk1d = 0.0_8
+    if (associated(bmtk2d)) bmtk2d = 0.0_8
     if (associated(bvtk1d)) bvtk1d = 0.0_8
     if (associated(bvtk2d)) bvtk2d = 0.0_8
+    if (associated(bmti1d)) bmti1d = 0.0_8
+    if (associated(bmti2d)) bmti2d = 0.0_8
     if (associated(bvti1d)) bvti1d = 0.0_8
     if (associated(bvti2d)) bvti2d = 0.0_8
+    if (associated(bmtj1d)) bmtj1d = 0.0_8
+    if (associated(bmtj2d)) bmtj2d = 0.0_8
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
@@ -1089,7 +1160,7 @@ subroutine bcturbtreatment_d()
       case (symm, symmpolar, eulerwall) 
 ! symmetry, polar symmetry or inviscid wall. treatment of
 ! the turbulent equations is identical.
-        call bcturbsymm(nn)
+        call bcturbsymm_d(nn)
 !=============================================================
       case (farfield) 
 ! farfield. the kind of boundary condition to be applied,
@@ -1200,12 +1271,15 @@ subroutine bcturbtreatment()
   end subroutine bcturbtreatment
 
 !  differentiation of bcturbwall in forward (tangent) mode (with options i4 dr8 r8):
-!   variations   of useful results: *bvtj1 *bvtj2 *bvtk1 *bvtk2
-!                *bvti1 *bvti2
-!   with respect to varying inputs: *bvtj1 *bvtj2 *w *rlv *bvtk1
-!                *bvtk2 *d2wall *bvti1 *bvti2
-!   plus diff mem management of: bvtj1:in bvtj2:in w:in rlv:in
-!                bvtk1:in bvtk2:in d2wall:in bvti1:in bvti2:in
+!   variations   of useful results: *bvtj1 *bvtj2 *bmtk1 *bmtk2
+!                *bvtk1 *bvtk2 *bmti1 *bmti2 *bvti1 *bvti2 *bmtj1
+!                *bmtj2
+!   with respect to varying inputs: *bvtj1 *bvtj2 *w *bmtk1 *rlv
+!                *bmtk2 *bvtk1 *bvtk2 *d2wall *bmti1 *bmti2 *bvti1
+!                *bvti2 *bmtj1 *bmtj2
+!   plus diff mem management of: bvtj1:in bvtj2:in w:in bmtk1:in
+!                rlv:in bmtk2:in bvtk1:in bvtk2:in d2wall:in bmti1:in
+!                bmti2:in bvti1:in bvti2:in bmtj1:in bmtj2:in
   subroutine bcturbwall_d(nn)
 !
 !       bcturbwall applies the implicit treatment of the viscous
@@ -1229,7 +1303,7 @@ subroutine bcturbwall_d(nn)
 !
     integer(kind=inttype) :: i, j, ii, jj, iimax, jjmax
     real(kind=realtype) :: tmpd, tmpe, tmpf, nu, fact
-    real(kind=realtype) :: tmpdd, nud
+    real(kind=realtype) :: tmpdd, nud, factd
     real(kind=realtype), dimension(:, :, :, :), pointer :: bmt
     real(kind=realtype), dimension(:, :, :), pointer :: bvt, ww2
     real(kind=realtype), dimension(:, :), pointer :: rlv2, dd2wall
@@ -1260,48 +1334,48 @@ subroutine bcturbwall_d(nn)
       case (imin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            call saroughfact(2, i, j, fact)
-            bmti1d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_d(2, i, j, fact, factd)
+            bmti1d(i, j, itu1, itu1) = -factd
             bmti1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (imax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            call saroughfact(il, i, j, fact)
-            bmti2d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_d(il, i, j, fact, factd)
+            bmti2d(i, j, itu1, itu1) = -factd
             bmti2(i, j, itu1, itu1) = -fact
           end do
         end do
       case (jmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            call saroughfact(i, 2, j, fact)
-            bmtj1d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_d(i, 2, j, fact, factd)
+            bmtj1d(i, j, itu1, itu1) = -factd
             bmtj1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (jmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            call saroughfact(i, jl, j, fact)
-            bmtj2d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_d(i, jl, j, fact, factd)
+            bmtj2d(i, j, itu1, itu1) = -factd
             bmtj2(i, j, itu1, itu1) = -fact
           end do
         end do
       case (kmin) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            call saroughfact(i, j, 2, fact)
-            bmtk1d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_d(i, j, 2, fact, factd)
+            bmtk1d(i, j, itu1, itu1) = -factd
             bmtk1(i, j, itu1, itu1) = -fact
           end do
         end do
       case (kmax) 
         do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
           do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-            call saroughfact(i, j, kl, fact)
-            bmtk2d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_d(i, j, kl, fact, factd)
+            bmtk2d(i, j, itu1, itu1) = -factd
             bmtk2(i, j, itu1, itu1) = -fact
           end do
         end do
@@ -2306,12 +2380,49 @@ subroutine turbbcnswall(secondhalo)
     end do bocos
   end subroutine turbbcnswall
 
+!  differentiation of saroughfact in forward (tangent) mode (with options i4 dr8 r8):
+!   variations   of useful results: fact
+!   with respect to varying inputs: *d2wall
+!   plus diff mem management of: d2wall:in
+  subroutine saroughfact_d(i, j, k, fact, factd)
+! returns either the regular sa-boundary condition
+! or the modified roughness-boundary condition
+    use constants
+    use inputphysics, only : useroughsa
+    use blockpointers, only : ks, d2wall, d2walld, il, jl, kl
+    implicit none
+! local variablse
+    integer(kind=inttype), intent(in) :: i, j, k
+    real(kind=realtype), intent(out) :: fact
+    real(kind=realtype), intent(out) :: factd
+    real(kind=realtype) :: temp
+    real(kind=realtype) :: temp0
+    if (.not.useroughsa) then
+      fact = -one
+      factd = 0.0_8
+      return
+    else if (((((i .lt. 2 .or. i .gt. il) .or. j .lt. 2) .or. j .gt. jl)&
+&       .or. k .lt. 2) .or. k .gt. kl) then
+! we need the distance to the wall, but this is not available for halo-cells, thus we simply return 
+! the regular sa-boundary condition
+      fact = -one
+      factd = 0.0_8
+      return
+    else
+      temp = ks(i, j, k) + d2wall(i, j, k)/0.03_realtype
+      temp0 = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/temp
+      factd = -((1.0/0.03_realtype+temp0/0.03_realtype)*d2walld(i, j, k)&
+&       /temp)
+      fact = temp0
+    end if
+  end subroutine saroughfact_d
+
   subroutine saroughfact(i, j, k, fact)
 ! returns either the regular sa-boundary condition
 ! or the modified roughness-boundary condition
     use constants
     use inputphysics, only : useroughsa
-    use blockpointers, only : ks, d2wall
+    use blockpointers, only : ks, d2wall, il, jl, kl
     implicit none
 ! local variablse
     integer(kind=inttype), intent(in) :: i, j, k
@@ -2319,6 +2430,12 @@ subroutine saroughfact(i, j, k, fact)
     if (.not.useroughsa) then
       fact = -one
       return
+    else if (((((i .lt. 2 .or. i .gt. il) .or. j .lt. 2) .or. j .gt. jl)&
+&       .or. k .lt. 2) .or. k .gt. kl) then
+! we need the distance to the wall, but this is not available for halo-cells, thus we simply return 
+! the regular sa-boundary condition
+      fact = -one
+      return
     else
       fact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j, k)+&
 &       d2wall(i, j, k)/0.03_realtype)
diff --git a/src/adjoint/outputReverse/turbBCRoutines_b.f90 b/src/adjoint/outputReverse/turbBCRoutines_b.f90
index 83aa7aea6..51c8d152b 100644
--- a/src/adjoint/outputReverse/turbBCRoutines_b.f90
+++ b/src/adjoint/outputReverse/turbBCRoutines_b.f90
@@ -15,8 +15,8 @@ module turbbcroutines_b
 !                *bmti1:out *bmti2:out *bvti1:out *bvti2:out *bmtj1:out
 !                *bmtj2:out
 !   plus diff mem management of: rev:in bvtj1:in bvtj2:in w:in
-!                bmtk1:in bmtk2:in bvtk1:in bvtk2:in bmti1:in bmti2:in
-!                bvti1:in bvti2:in bmtj1:in bmtj2:in
+!                bmtk1:in bmtk2:in bvtk1:in bvtk2:in d2wall:in
+!                bmti1:in bmti2:in bvti1:in bvti2:in bmtj1:in bmtj2:in
 !      ==================================================================
   subroutine applyallturbbcthisblock_b(secondhalo)
 !
@@ -680,7 +680,7 @@ end subroutine bceddynowall
 !  differentiation of bceddywall in reverse (adjoint) mode (with options noisize i4 dr8 r8):
 !   gradient     of useful results: *rev
 !   with respect to varying inputs: *rev
-!   plus diff mem management of: rev:in
+!   plus diff mem management of: rev:in d2wall:in
   subroutine bceddywall_b(nn)
 !
 !       bceddywall sets the eddy viscosity in the halo cells of
@@ -869,12 +869,15 @@ subroutine bceddywall(nn)
   end subroutine bceddywall
 
 !  differentiation of bcturbfarfield in reverse (adjoint) mode (with options noisize i4 dr8 r8):
-!   gradient     of useful results: winf *bvtj1 *bvtj2 *bvtk1 *bvtk2
-!                *bvti1 *bvti2
-!   with respect to varying inputs: winf *bvtj1 *bvtj2 *bvtk1 *bvtk2
-!                *bvti1 *bvti2
-!   plus diff mem management of: bvtj1:in bvtj2:in bvtk1:in bvtk2:in
-!                bvti1:in bvti2:in
+!   gradient     of useful results: winf *bvtj1 *bvtj2 *bmtk1 *bmtk2
+!                *bvtk1 *bvtk2 *bmti1 *bmti2 *bvti1 *bvti2 *bmtj1
+!                *bmtj2
+!   with respect to varying inputs: winf *bvtj1 *bvtj2 *bmtk1 *bmtk2
+!                *bvtk1 *bvtk2 *bmti1 *bmti2 *bvti1 *bvti2 *bmtj1
+!                *bmtj2
+!   plus diff mem management of: bvtj1:in bvtj2:in bmtk1:in bmtk2:in
+!                bvtk1:in bvtk2:in bmti1:in bmti2:in bvti1:in bvti2:in
+!                bmtj1:in bmtj2:in
   subroutine bcturbfarfield_b(nn)
 !
 !       bcturbfarfield applies the implicit treatment of the
@@ -911,6 +914,26 @@ subroutine bcturbfarfield_b(nn)
 ! determine whether we are dealing with an inflow or
 ! outflow boundary here.
         if (dot .gt. zero) then
+! outflow. simply extrapolation or zero neumann bc
+! of the turbulent variables.
+          do l=nt1,nt2
+            select case  (bcfaceid(nn)) 
+            case (imin) 
+              call pushcontrol3b(5)
+            case (imax) 
+              call pushcontrol3b(4)
+            case (jmin) 
+              call pushcontrol3b(3)
+            case (jmax) 
+              call pushcontrol3b(2)
+            case (kmin) 
+              call pushcontrol3b(1)
+            case (kmax) 
+              call pushcontrol3b(0)
+            case default
+              call pushcontrol3b(6)
+            end select
+          end do
           call pushcontrol1b(1)
         else
 ! inflow. turbulent variables are prescribed.
@@ -966,6 +989,27 @@ subroutine bcturbfarfield_b(nn)
               bvti1d(i, j, l) = 0.0_8
             end if
           end do
+        else
+          do l=nt2,nt1,-1
+            call popcontrol3b(branch)
+            if (branch .lt. 3) then
+              if (branch .eq. 0) then
+                bmtk2d(i, j, l, l) = 0.0_8
+              else if (branch .eq. 1) then
+                bmtk1d(i, j, l, l) = 0.0_8
+              else
+                bmtj2d(i, j, l, l) = 0.0_8
+              end if
+            else if (branch .lt. 5) then
+              if (branch .eq. 3) then
+                bmtj1d(i, j, l, l) = 0.0_8
+              else
+                bmti2d(i, j, l, l) = 0.0_8
+              end if
+            else if (branch .eq. 5) then
+              bmti1d(i, j, l, l) = 0.0_8
+            end if
+          end do
         end if
       end do
     end do
@@ -1277,6 +1321,86 @@ subroutine bcturboutflow(nn)
     end do
   end subroutine bcturboutflow
 
+!  differentiation of bcturbsymm in reverse (adjoint) mode (with options noisize i4 dr8 r8):
+!   gradient     of useful results: *bmtk1 *bmtk2 *bmti1 *bmti2
+!                *bmtj1 *bmtj2
+!   with respect to varying inputs: *bmtk1 *bmtk2 *bmti1 *bmti2
+!                *bmtj1 *bmtj2
+!   plus diff mem management of: bmtk1:in bmtk2:in bmti1:in bmti2:in
+!                bmtj1:in bmtj2:in
+  subroutine bcturbsymm_b(nn)
+!
+!       bcturbsymm applies the implicit treatment of the symmetry
+!       boundary condition (or inviscid wall) to subface nn. as the
+!       symmetry boundary condition is independent of the turbulence
+!       model, this routine is valid for all models. it is assumed
+!       that the pointers in blockpointers are already set to the
+!       correct block on the correct grid level.
+!
+    use constants
+    use blockpointers
+    use flowvarrefstate
+    implicit none
+!
+!      subroutine arguments.
+!
+    integer(kind=inttype), intent(in) :: nn
+!
+!      local variables.
+!
+    integer(kind=inttype) :: i, j, l
+    integer :: branch
+! loop over the faces of the subfaces and set the values of bmt
+! for an implicit treatment. for a symmetry face this means
+! that the halo value is set to the internal value.
+    do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
+      do i=bcdata(nn)%icbeg,bcdata(nn)%icend
+        do l=nt1,nt2
+          select case  (bcfaceid(nn)) 
+          case (imin) 
+            call pushcontrol3b(5)
+          case (imax) 
+            call pushcontrol3b(4)
+          case (jmin) 
+            call pushcontrol3b(3)
+          case (jmax) 
+            call pushcontrol3b(2)
+          case (kmin) 
+            call pushcontrol3b(1)
+          case (kmax) 
+            call pushcontrol3b(0)
+          case default
+            call pushcontrol3b(6)
+          end select
+        end do
+      end do
+    end do
+    do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+      do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+        do l=nt2,nt1,-1
+          call popcontrol3b(branch)
+          if (branch .lt. 3) then
+            if (branch .eq. 0) then
+              bmtk2d(i, j, l, l) = 0.0_8
+            else if (branch .eq. 1) then
+              bmtk1d(i, j, l, l) = 0.0_8
+            else
+              bmtj2d(i, j, l, l) = 0.0_8
+            end if
+          else if (branch .lt. 5) then
+            if (branch .eq. 3) then
+              bmtj1d(i, j, l, l) = 0.0_8
+            else
+              bmti2d(i, j, l, l) = 0.0_8
+            end if
+          else if (branch .eq. 5) then
+            bmti1d(i, j, l, l) = 0.0_8
+          end if
+        end do
+      end do
+    end do
+  end subroutine bcturbsymm_b
+
   subroutine bcturbsymm(nn)
 !
 !       bcturbsymm applies the implicit treatment of the symmetry
@@ -1324,15 +1448,20 @@ subroutine bcturbsymm(nn)
   end subroutine bcturbsymm
 
 !  differentiation of bcturbtreatment in reverse (adjoint) mode (with options noisize i4 dr8 r8):
-!   gradient     of useful results: winf *bvtj1 *bvtj2 *w *rlv
-!                *bvtk1 *bvtk2 *d2wall *bvti1 *bvti2
-!   with respect to varying inputs: winf *bvtj1 *bvtj2 *w *rlv
-!                *bvtk1 *bvtk2 *d2wall *bvti1 *bvti2
+!   gradient     of useful results: winf *bvtj1 *bvtj2 *w *bmtk1
+!                *rlv *bmtk2 *bvtk1 *bvtk2 *d2wall *bmti1 *bmti2
+!                *bvti1 *bvti2 *bmtj1 *bmtj2
+!   with respect to varying inputs: winf *bvtj1 *bvtj2 *w *bmtk1
+!                *rlv *bmtk2 *bvtk1 *bvtk2 *d2wall *bmti1 *bmti2
+!                *bvti1 *bvti2 *bmtj1 *bmtj2
 !   rw status of diff variables: winf:incr *bvtj1:in-out *bvtj2:in-out
-!                *w:incr *rlv:incr *bvtk1:in-out *bvtk2:in-out
-!                *d2wall:incr *bvti1:in-out *bvti2:in-out
-!   plus diff mem management of: bvtj1:in bvtj2:in w:in rlv:in
-!                bvtk1:in bvtk2:in d2wall:in bvti1:in bvti2:in
+!                *w:incr *bmtk1:in-out *rlv:incr *bmtk2:in-out
+!                *bvtk1:in-out *bvtk2:in-out *d2wall:incr *bmti1:in-out
+!                *bmti2:in-out *bvti1:in-out *bvti2:in-out *bmtj1:in-out
+!                *bmtj2:in-out
+!   plus diff mem management of: bvtj1:in bvtj2:in w:in bmtk1:in
+!                rlv:in bmtk2:in bvtk1:in bvtk2:in d2wall:in bmti1:in
+!                bmti2:in bvti1:in bvti2:in bmtj1:in bmtj2:in
   subroutine bcturbtreatment_b()
 !
 !       bcturbtreatment sets the arrays bmti1, bvti1, etc, such that
@@ -1360,21 +1489,21 @@ subroutine bcturbtreatment_b()
 ! determine the kind of boundary condition for this subface.
       select case  (bctype(nn)) 
       case (nswalladiabatic, nswallisothermal) 
-        call pushcontrol2b(2)
+        call pushcontrol3b(3)
       case (symm, symmpolar, eulerwall) 
-        call pushcontrol2b(3)
+        call pushcontrol3b(2)
       case (farfield) 
-        call pushcontrol2b(1)
+        call pushcontrol3b(1)
       case (slidinginterface, oversetouterbound, domaininterfaceall, &
 &     domaininterfacerhouvw, domaininterfacep, domaininterfacerho, &
 &     domaininterfacetotal) 
-        call pushcontrol2b(0)
+        call pushcontrol3b(0)
       case default
-        call pushcontrol2b(3)
+        call pushcontrol3b(4)
       end select
     end do bocos
     do nn=nbocos,1,-1
-      call popcontrol2b(branch)
+      call popcontrol3b(branch)
       if (branch .lt. 2) then
         if (branch .eq. 0) then
           call bcturbinterface_b(nn)
@@ -1382,6 +1511,8 @@ subroutine bcturbtreatment_b()
           call bcturbfarfield_b(nn)
         end if
       else if (branch .eq. 2) then
+        call bcturbsymm_b(nn)
+      else if (branch .eq. 3) then
         call bcturbwall_b(nn)
       end if
     end do
@@ -1390,6 +1521,10 @@ subroutine bcturbtreatment_b()
         do l=nt2,nt1,-1
           bvtk2d(i, j, l) = 0.0_8
           bvtk1d(i, j, l) = 0.0_8
+          do m=nt2,nt1,-1
+            bmtk2d(i, j, l, m) = 0.0_8
+            bmtk1d(i, j, l, m) = 0.0_8
+          end do
         end do
       end do
     end do
@@ -1398,6 +1533,10 @@ subroutine bcturbtreatment_b()
         do l=nt2,nt1,-1
           bvtj2d(i, k, l) = 0.0_8
           bvtj1d(i, k, l) = 0.0_8
+          do m=nt2,nt1,-1
+            bmtj2d(i, k, l, m) = 0.0_8
+            bmtj1d(i, k, l, m) = 0.0_8
+          end do
         end do
       end do
     end do
@@ -1406,6 +1545,10 @@ subroutine bcturbtreatment_b()
         do l=nt2,nt1,-1
           bvti2d(j, k, l) = 0.0_8
           bvti1d(j, k, l) = 0.0_8
+          do m=nt2,nt1,-1
+            bmti2d(j, k, l, m) = 0.0_8
+            bmti1d(j, k, l, m) = 0.0_8
+          end do
         end do
       end do
     end do
@@ -1504,12 +1647,15 @@ subroutine bcturbtreatment()
   end subroutine bcturbtreatment
 
 !  differentiation of bcturbwall in reverse (adjoint) mode (with options noisize i4 dr8 r8):
-!   gradient     of useful results: *bvtj1 *bvtj2 *w *rlv *bvtk1
-!                *bvtk2 *d2wall *bvti1 *bvti2
-!   with respect to varying inputs: *bvtj1 *bvtj2 *w *rlv *bvtk1
-!                *bvtk2 *d2wall *bvti1 *bvti2
-!   plus diff mem management of: bvtj1:in bvtj2:in w:in rlv:in
-!                bvtk1:in bvtk2:in d2wall:in bvti1:in bvti2:in
+!   gradient     of useful results: *bvtj1 *bvtj2 *w *bmtk1 *rlv
+!                *bmtk2 *bvtk1 *bvtk2 *d2wall *bmti1 *bmti2 *bvti1
+!                *bvti2 *bmtj1 *bmtj2
+!   with respect to varying inputs: *bvtj1 *bvtj2 *w *bmtk1 *rlv
+!                *bmtk2 *bvtk1 *bvtk2 *d2wall *bmti1 *bmti2 *bvti1
+!                *bvti2 *bmtj1 *bmtj2
+!   plus diff mem management of: bvtj1:in bvtj2:in w:in bmtk1:in
+!                rlv:in bmtk2:in bvtk1:in bvtk2:in d2wall:in bmti1:in
+!                bmti2:in bvti1:in bvti2:in bmtj1:in bmtj2:in
   subroutine bcturbwall_b(nn)
 !
 !       bcturbwall applies the implicit treatment of the viscous
@@ -1533,7 +1679,7 @@ subroutine bcturbwall_b(nn)
 !
     integer(kind=inttype) :: i, j, ii, jj, iimax, jjmax
     real(kind=realtype) :: tmpd, tmpe, tmpf, nu, fact
-    real(kind=realtype) :: tmpdd, nud
+    real(kind=realtype) :: tmpdd, nud, factd
     real(kind=realtype), dimension(:, :, :, :), pointer :: bmt
     real(kind=realtype), dimension(:, :, :), pointer :: bvt, ww2
     real(kind=realtype), dimension(:, :), pointer :: rlv2, dd2wall
@@ -1559,7 +1705,59 @@ subroutine bcturbwall_b(nn)
 ! implicit treatment.
     select case  (turbmodel) 
     case (spalartallmaras, spalartallmarasedwards) 
-
+! spalart-allmaras type of model. value at the wall is zero,
+! so simply negate the internal value.
+      select case  (bcfaceid(nn)) 
+      case (imin) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            factd = -bmti1d(i, j, itu1, itu1)
+            bmti1d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_b(2, i, j, fact, factd)
+          end do
+        end do
+      case (imax) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            factd = -bmti2d(i, j, itu1, itu1)
+            bmti2d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_b(il, i, j, fact, factd)
+          end do
+        end do
+      case (jmin) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            factd = -bmtj1d(i, j, itu1, itu1)
+            bmtj1d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_b(i, 2, j, fact, factd)
+          end do
+        end do
+      case (jmax) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            factd = -bmtj2d(i, j, itu1, itu1)
+            bmtj2d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_b(i, jl, j, fact, factd)
+          end do
+        end do
+      case (kmin) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            factd = -bmtk1d(i, j, itu1, itu1)
+            bmtk1d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_b(i, j, 2, fact, factd)
+          end do
+        end do
+      case (kmax) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            factd = -bmtk2d(i, j, itu1, itu1)
+            bmtk2d(i, j, itu1, itu1) = 0.0_8
+            call saroughfact_b(i, j, kl, fact, factd)
+          end do
+        end do
+      end select
+!        ================================================================
     case (komegawilcox, komegamodified, mentersst) 
 ! k-omega type of models. k is zero on the wall and thus the
 ! halo value is the negative of the first internal cell.
@@ -1615,6 +1813,8 @@ subroutine bcturbwall_b(nn)
             bvti1d(i, j, itu2) = 0.0_8
             nud = tmpd*tempd
             tmpdd = nu*tempd
+            bmti1d(i, j, itu2, itu2) = 0.0_8
+            bmti1d(i, j, itu1, itu1) = 0.0_8
             temp = rkwbeta1*(d2wall(2, ii, jj)*d2wall(2, ii, jj))
             d2walld(2, ii, jj) = d2walld(2, ii, jj) - 2*d2wall(2, ii, jj&
 &             )*rkwbeta1*one*tmpdd/temp**2
@@ -1679,6 +1879,8 @@ subroutine bcturbwall_b(nn)
             bvti2d(i, j, itu2) = 0.0_8
             nud = tmpd*tempd
             tmpdd = nu*tempd
+            bmti2d(i, j, itu2, itu2) = 0.0_8
+            bmti2d(i, j, itu1, itu1) = 0.0_8
             temp = rkwbeta1*(d2wall(il, ii, jj)*d2wall(il, ii, jj))
             d2walld(il, ii, jj) = d2walld(il, ii, jj) - 2*d2wall(il, ii&
 &             , jj)*rkwbeta1*one*tmpdd/temp**2
@@ -1743,6 +1945,8 @@ subroutine bcturbwall_b(nn)
             bvtj1d(i, j, itu2) = 0.0_8
             nud = tmpd*tempd
             tmpdd = nu*tempd
+            bmtj1d(i, j, itu2, itu2) = 0.0_8
+            bmtj1d(i, j, itu1, itu1) = 0.0_8
             temp = rkwbeta1*(d2wall(ii, 2, jj)*d2wall(ii, 2, jj))
             d2walld(ii, 2, jj) = d2walld(ii, 2, jj) - 2*d2wall(ii, 2, jj&
 &             )*rkwbeta1*one*tmpdd/temp**2
@@ -1807,6 +2011,8 @@ subroutine bcturbwall_b(nn)
             bvtj2d(i, j, itu2) = 0.0_8
             nud = tmpd*tempd
             tmpdd = nu*tempd
+            bmtj2d(i, j, itu2, itu2) = 0.0_8
+            bmtj2d(i, j, itu1, itu1) = 0.0_8
             temp = rkwbeta1*(d2wall(ii, jl, jj)*d2wall(ii, jl, jj))
             d2walld(ii, jl, jj) = d2walld(ii, jl, jj) - 2*d2wall(ii, jl&
 &             , jj)*rkwbeta1*one*tmpdd/temp**2
@@ -1871,6 +2077,8 @@ subroutine bcturbwall_b(nn)
             bvtk1d(i, j, itu2) = 0.0_8
             nud = tmpd*tempd
             tmpdd = nu*tempd
+            bmtk1d(i, j, itu2, itu2) = 0.0_8
+            bmtk1d(i, j, itu1, itu1) = 0.0_8
             temp = rkwbeta1*(d2wall(ii, jj, 2)*d2wall(ii, jj, 2))
             d2walld(ii, jj, 2) = d2walld(ii, jj, 2) - 2*d2wall(ii, jj, 2&
 &             )*rkwbeta1*one*tmpdd/temp**2
@@ -1935,6 +2143,8 @@ subroutine bcturbwall_b(nn)
             bvtk2d(i, j, itu2) = 0.0_8
             nud = tmpd*tempd
             tmpdd = nu*tempd
+            bmtk2d(i, j, itu2, itu2) = 0.0_8
+            bmtk2d(i, j, itu1, itu1) = 0.0_8
             temp = rkwbeta1*(d2wall(ii, jj, kl)*d2wall(ii, jj, kl))
             d2walld(ii, jj, kl) = d2walld(ii, jj, kl) - 2*d2wall(ii, jj&
 &             , kl)*rkwbeta1*one*tmpdd/temp**2
@@ -1957,6 +2167,54 @@ subroutine bcturbwall_b(nn)
           end if
         end do
       end select
+!        ================================================================
+    case (ktau) 
+! k-tau model. both k and tau are zero at the wall, so the
+! negative value of the internal cell is taken for the halo.
+      select case  (bcfaceid(nn)) 
+      case (imin) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            bmti1d(i, j, itu2, itu2) = 0.0_8
+            bmti1d(i, j, itu1, itu1) = 0.0_8
+          end do
+        end do
+      case (imax) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            bmti2d(i, j, itu2, itu2) = 0.0_8
+            bmti2d(i, j, itu1, itu1) = 0.0_8
+          end do
+        end do
+      case (jmin) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            bmtj1d(i, j, itu2, itu2) = 0.0_8
+            bmtj1d(i, j, itu1, itu1) = 0.0_8
+          end do
+        end do
+      case (jmax) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            bmtj2d(i, j, itu2, itu2) = 0.0_8
+            bmtj2d(i, j, itu1, itu1) = 0.0_8
+          end do
+        end do
+      case (kmin) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            bmtk1d(i, j, itu2, itu2) = 0.0_8
+            bmtk1d(i, j, itu1, itu1) = 0.0_8
+          end do
+        end do
+      case (kmax) 
+        do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
+          do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+            bmtk2d(i, j, itu2, itu2) = 0.0_8
+            bmtk2d(i, j, itu1, itu1) = 0.0_8
+          end do
+        end do
+      end select
 !        ================================================================
     end select
   end subroutine bcturbwall_b
@@ -2719,12 +2977,41 @@ subroutine turbbcnswall(secondhalo)
     end do bocos
   end subroutine turbbcnswall
 
+!  differentiation of saroughfact in reverse (adjoint) mode (with options noisize i4 dr8 r8):
+!   gradient     of useful results: *d2wall fact
+!   with respect to varying inputs: *d2wall
+!   plus diff mem management of: d2wall:in
+  subroutine saroughfact_b(i, j, k, fact, factd)
+! returns either the regular sa-boundary condition
+! or the modified roughness-boundary condition
+    use constants
+    use inputphysics, only : useroughsa
+    use blockpointers, only : ks, d2wall, d2walld, il, jl, kl
+    implicit none
+! local variablse
+    integer(kind=inttype), intent(in) :: i, j, k
+    real(kind=realtype) :: fact
+    real(kind=realtype) :: factd
+    real(kind=realtype) :: temp
+    if (useroughsa) then
+! we need the distance to the wall, but this is not available for halo-cells, thus we simply return 
+! the regular sa-boundary condition
+      if (.not.(((((i .lt. 2 .or. i .gt. il) .or. j .lt. 2) .or. j .gt. &
+&         jl) .or. k .lt. 2) .or. k .gt. kl)) then
+        temp = ks(i, j, k) + d2wall(i, j, k)/0.03_realtype
+        d2walld(i, j, k) = d2walld(i, j, k) - (1.0/(0.03_realtype*temp)+&
+&         (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(0.03_realtype*&
+&         temp**2))*factd
+      end if
+    end if
+  end subroutine saroughfact_b
+
   subroutine saroughfact(i, j, k, fact)
 ! returns either the regular sa-boundary condition
 ! or the modified roughness-boundary condition
     use constants
     use inputphysics, only : useroughsa
-    use blockpointers, only : ks, d2wall
+    use blockpointers, only : ks, d2wall, il, jl, kl
     implicit none
 ! local variablse
     integer(kind=inttype), intent(in) :: i, j, k
@@ -2732,6 +3019,12 @@ subroutine saroughfact(i, j, k, fact)
     if (.not.useroughsa) then
       fact = -one
       return
+    else if (((((i .lt. 2 .or. i .gt. il) .or. j .lt. 2) .or. j .gt. jl)&
+&       .or. k .lt. 2) .or. k .gt. kl) then
+! we need the distance to the wall, but this is not available for halo-cells, thus we simply return 
+! the regular sa-boundary condition
+      fact = -one
+      return
     else
       fact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j, k)+&
 &       d2wall(i, j, k)/0.03_realtype)
diff --git a/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90 b/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90
index d535813cc..ec5103154 100644
--- a/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90
@@ -1188,7 +1188,7 @@ subroutine saroughfact(i, j, k, fact)
 ! or the modified roughness-boundary condition
     use constants
     use inputphysics, only : useroughsa
-    use blockpointers, only : ks, d2wall
+    use blockpointers, only : ks, d2wall, il, jl, kl
     implicit none
 ! local variablse
     integer(kind=inttype), intent(in) :: i, j, k
@@ -1196,6 +1196,12 @@ subroutine saroughfact(i, j, k, fact)
     if (.not.useroughsa) then
       fact = -one
       return
+    else if (((((i .lt. 2 .or. i .gt. il) .or. j .lt. 2) .or. j .gt. jl)&
+&       .or. k .lt. 2) .or. k .gt. kl) then
+! we need the distance to the wall, but this is not available for halo-cells, thus we simply return 
+! the regular sa-boundary condition
+      fact = -one
+      return
     else
       fact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j, k)+&
 &       d2wall(i, j, k)/0.03_realtype)
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index f0f2777f1..0f33c488a 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -1405,7 +1405,7 @@ subroutine saRoughFact(i, j, k, fact)
 
         use constants
         use inputPhysics, only: useRoughSA
-        use BlockPointers, only: ks, d2wall
+        use BlockPointers, only: ks, d2wall, il, jl, kl
         implicit none
 
         ! local variablse
@@ -1417,6 +1417,16 @@ subroutine saRoughFact(i, j, k, fact)
             return
         end if
 
+        ! We need the distance to the wall, but this is not available for halo-cells, thus we simply return 
+        ! the regular SA-boundary condition
+        if (i .lt. 2 .or. i .gt. il .or. &
+            j .lt. 2 .or. j .gt. jl .or. &
+            k .lt. 2 .or. k .gt. kl) then
+            fact = -one
+            return
+        end if
+
+
         fact = (ks(i, j, k) - d2wall(i, j, k) / 0.03_realType) / &
                (ks(i, j, k) + d2wall(i, j, k) / 0.03_realType)
 

From 8ec0693d14e7658ee2863f1adedfd3226fbd69d2 Mon Sep 17 00:00:00 2001
From: DavidAnderegg <andv@zhaw.ch>
Date: Fri, 11 Oct 2024 11:05:55 +0200
Subject: [PATCH 59/60] Extend d2wall to include halos and fix sa-rough
 gradients

---
 src/NKSolver/blockette.F90                    |  10 +-
 src/adjoint/Makefile_tapenade                 |  22 +-
 src/adjoint/adjointUtils.F90                  |   6 +-
 src/adjoint/outputForward/BCRoutines_d.f90    | 132 ++-
 .../outputForward/turbBCRoutines_d.f90        |  86 +-
 src/adjoint/outputForward/utils_d.f90         |  67 +-
 src/adjoint/outputReverse/BCRoutines_b.f90    | 125 +--
 .../outputReverse/turbBCRoutines_b.f90        | 102 +-
 src/adjoint/outputReverse/utils_b.f90         |  67 +-
 .../outputReverseFast/BCRoutines_fast_b.f90   |  22 +-
 .../turbBCRoutines_fast_b.f90                 |  13 +-
 .../outputReverseFast/utils_fast_b.f90        |  67 +-
 src/modules/BCPointers.F90                    |   3 +
 src/modules/block.F90                         |   7 +-
 src/preprocessing/preprocessingAPI.F90        |   4 +
 src/solver/BCRoutines.F90                     |  21 +-
 src/turbulence/turbBCRoutines.F90             |  10 -
 src/utils/haloExchange.F90                    | 884 ++++++++++++++++--
 src/utils/utils.F90                           | 172 +++-
 src/wallDistance/wallDistance.F90             |  26 +-
 .../reg_tests/refs/adjoint_rans_rough_sa.json | 802 ++++++++--------
 tests/reg_tests/refs/funcs_rans_rough_sa.json |   9 +-
 22 files changed, 1927 insertions(+), 730 deletions(-)

diff --git a/src/NKSolver/blockette.F90 b/src/NKSolver/blockette.F90
index 6ce106c2e..3f34686f0 100644
--- a/src/NKSolver/blockette.F90
+++ b/src/NKSolver/blockette.F90
@@ -86,7 +86,7 @@ subroutine blocketteRes(useDissApprox, useViscApprox, useUpdateIntermed, useFlow
         use initializeFlow, only: referenceState
         use section, only: sections, nSections
         use iteration, only: rFil, currentLevel
-        use haloExchange, only: exchangeCoor, whalo2
+        use haloExchange, only: exchangeCoor, whalo2, exchanged2Wall
         use wallDistance, only: updateWallDistancesQuickly
         use utils, only: setPointers, EChk
         use turbUtils, only: computeEddyViscosity
@@ -181,12 +181,20 @@ subroutine blocketteRes(useDissApprox, useViscApprox, useUpdateIntermed, useFlow
                 do nn = 1, nDom
                     call setPointers(nn, currentLevel, sps)
                     call xhalo_block()
+
+                    if (equations == RANSEquations .and. useApproxWallDistance) then
+                        call updateWallDistancesQuickly(nn, 1, sps)
+                    end if
                 end do
             end do
 
             ! Now exchange the coordinates (fine level only)
             call exchangecoor(1)
 
+            if (equations == RANSEquations .and. useApproxWallDistance) then
+                call exchanged2Wall(1)
+            end if
+
             do sps = 1, nTimeIntervalsSpectral
                 ! Update overset connectivity if necessary
                 if (oversetPresent .and. oversetUpdateMode == updateFast) then
diff --git a/src/adjoint/Makefile_tapenade b/src/adjoint/Makefile_tapenade
index b11030e67..fc2686635 100644
--- a/src/adjoint/Makefile_tapenade
+++ b/src/adjoint/Makefile_tapenade
@@ -132,17 +132,17 @@ flowUtils%computeEtotBlock(w, p) > \
 \
 turbUtils%computeEddyViscosity(w,rlv)>(w,rlv,rev) \
 \
-BCRoutines%bcSymm1stHalo(ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2, bcData%norm) > \
-                        (ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2, bcData%norm) \
+BCRoutines%bcSymm1stHalo(ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2, d2wall1, d2wall2, bcData%norm) > \
+                        (ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2, d2wall1, d2wall2, bcData%norm) \
 \
-BCRoutines%bcSymm2ndHalo(ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3, bcData%norm) > \
-                        (ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3, bcData%norm) \
+BCRoutines%bcSymm2ndHalo(ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3, bd2wall0, d2wall3, cData%norm) > \
+                        (ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3, bd2wall0, d2wall3, bcData%norm) \
 \
-BCRoutines%bcSymmPolar1stHalo(xx, ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2) > \
-                             (xx, ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2) \
+BCRoutines%bcSymmPolar1stHalo(xx, ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2, d2wall1, d2wall2) > \
+                             (xx, ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2, d2wall1, d2wall2) \
 \
-BCRoutines%bcSymmPolar2ndHalo(xx, ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3) > \
-                             (xx, ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3) \
+BCRoutines%bcSymmPolar2ndHalo(xx, ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3, d2wall0, d2wall3) > \
+                             (xx, ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3, d2wall0, d2wall3) \
 \
 BCRoutines%bcNSWallAdiabatic(ww0, ww1, ww2, pp0, pp1, pp2, pp3, rlv0, rlv1, rlv2, rev0, rev1, rev2, bcData%uSlip) > \
                             (ww0, ww1, ww2, pp0, pp1, pp2, pp3, rlv0, rlv1, rlv2, rev0, rev1, rev2, bcData%uSlip) \
@@ -162,10 +162,10 @@ BCRoutines%bcSubsonicInflow(ww0, ww1, ww2, pp0, pp1, pp2, rlv0, rlv1, rlv2, rev0
 BCRoutines%bcSubsonicOutflow(ww0, ww1, ww2, pp0, pp1, pp2, rlv0, rlv1, rlv2, rev0, rev1, rev2, bcData%norm, bcData%Ps) > \
                             (ww0, ww1, ww2, pp0, pp1, pp2, rlv0, rlv1, rlv2, rev0, rev1, rev2, bcData%norm, bcData%Ps) \
 \
-turbBCRoutines%applyAllTurbBCThisBlock(rev, w, bvtj1, bvtj2, bvtk1, bvtk2, bvti1, bvti2, bmti1, bmti2, bmtj1, bmtj2, bmtk1, bmtk2) > \
-                                      (rev, w) \
+turbBCRoutines%applyAllTurbBCThisBlock(rev, w, bvtj1, bvtj2, bvtk1, bvtk2, bvti1, bvti2, bmti1, bmti2, bmtj1, bmtj2, bmtk1, bmtk2, d2wall) > \
+                                      (rev, w, bvtj1, bvtj2, bvtk1, bvtk2, bvti1, bvti2, bmti1, bmti2, bmtj1, bmtj2, bmtk1, bmtk2, d2wall) \
 \
-turbBCRoutines%bcTurbTreatment(w, rlv, d2wall, winf) > \
+turbBCRoutines%bcTurbTreatment(w, rlv, d2wall, winf, bvtj1, bvtj2, bvtk1, bvtk2, bvti1, bvti2, bmti1, bmti2, bmtj1, bmtj2, bmtk1, bmtk2) > \
                               (w, rlv, d2wall, winf, bvtj1, bvtj2, bvtk1, bvtk2, bvti1, bvti2, bmti1, bmti2, bmtj1, bmtj2, bmtk1, bmtk2) \
 \
 solverUtils%timeStep_block(w, pInfCorr, rhoInf, si, sj, sk, sFaceI, sFaceJ, sFaceK, p, radi, radj, radk, dtl, rlv, rev, vol) > \
diff --git a/src/adjoint/adjointUtils.F90 b/src/adjoint/adjointUtils.F90
index 14641f51f..b2a5a85a2 100644
--- a/src/adjoint/adjointUtils.F90
+++ b/src/adjoint/adjointUtils.F90
@@ -772,8 +772,10 @@ subroutine allocDerivativeValues(level)
 
                 ! Allocate d2wall if not already done so
                 if (.not. associated(flowDoms(nn, 1, sps)%d2wall)) then
-                    allocate (flowDoms(nn, 1, sps)%d2wall(2:il, 2:jl, 2:kl))
+                    allocate (flowDoms(nn, 1, sps)%d2wall(0:ib, 0:jb, 0:kb))
                     call EChk(ierr, __FILE__, __LINE__)
+
+                    flowDoms(nn, level, sps)%d2Wall = 0.01
                 end if
 
                 ! Now allocate all valus that have a differentiable
@@ -829,7 +831,7 @@ subroutine allocDerivativeValues(level)
                     flowDomsd(nn, level, sps)%bvtj2(ie, ke, nt1:nt2), &
                     flowDomsd(nn, level, sps)%bvtk1(ie, je, nt1:nt2), &
                     flowDomsd(nn, level, sps)%bvtk2(ie, je, nt1:nt2), &
-                    flowDomsd(nn, level, sps)%d2Wall(2:il, 2:jl, 2:kl), &
+                    flowDomsd(nn, level, sps)%d2Wall(0:ib, 0:jb, 0:kb), &
                     stat=ierr)
                 call EChk(ierr, __FILE__, __LINE__)
 
diff --git a/src/adjoint/outputForward/BCRoutines_d.f90 b/src/adjoint/outputForward/BCRoutines_d.f90
index 3d28d0bbf..16967bf6e 100644
--- a/src/adjoint/outputForward/BCRoutines_d.f90
+++ b/src/adjoint/outputForward/BCRoutines_d.f90
@@ -156,14 +156,15 @@ subroutine applyallbc_block(secondhalo)
   end subroutine applyallbc_block
 
 !  differentiation of bcsymm1sthalo in forward (tangent) mode (with options i4 dr8 r8):
-!   variations   of useful results: *rev1 *pp1 *rlv1 *ww1
-!   with respect to varying inputs: *rev1 *rev2 *pp1 *pp2 *rlv1
-!                *rlv2 *ww1 *ww2 *(*bcdata.norm)
-!   rw status of diff variables: *rev1:in-out *rev2:in *pp1:in-out
-!                *pp2:in *rlv1:in-out *rlv2:in *ww1:in-out *ww2:in
-!                *(*bcdata.norm):in
-!   plus diff mem management of: rev1:in rev2:in pp1:in pp2:in
-!                rlv1:in rlv2:in ww1:in ww2:in bcdata:in *bcdata.norm:in
+!   variations   of useful results: *rev1 *d2wall1 *pp1 *rlv1 *ww1
+!   with respect to varying inputs: *rev1 *rev2 *d2wall1 *d2wall2
+!                *pp1 *pp2 *rlv1 *rlv2 *ww1 *ww2 *(*bcdata.norm)
+!   rw status of diff variables: *rev1:in-out *rev2:in *d2wall1:in-out
+!                *d2wall2:in *pp1:in-out *pp2:in *rlv1:in-out *rlv2:in
+!                *ww1:in-out *ww2:in *(*bcdata.norm):in
+!   plus diff mem management of: rev1:in rev2:in d2wall1:in d2wall2:in
+!                pp1:in pp2:in rlv1:in rlv2:in ww1:in ww2:in bcdata:in
+!                *bcdata.norm:in
 ! ===================================================================
 !   actual implementation of each of the boundary condition routines
 ! ===================================================================
@@ -179,9 +180,11 @@ subroutine bcsymm1sthalo_d(nn)
     use constants
     use blockpointers, only : bcdata, bcdatad
     use flowvarrefstate, only : viscous, eddymodel
+    use inputphysics, only : useroughsa
     use bcpointers_d, only : gamma1, gamma2, ww1, ww1d, ww2, ww2d, pp1, &
 &   pp1d, pp2, pp2d, rlv1, rlv1d, rlv2, rlv2d, istart, jstart, isize, &
-&   jsize, rev1, rev1d, rev2, rev2d
+&   jsize, rev1, rev1d, rev2, rev2d, d2wall1, d2wall1d, d2wall2, &
+&   d2wall2d, ks1, ks2
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -232,6 +235,9 @@ subroutine bcsymm1sthalo_d(nn)
       gamma1(i, j) = gamma2(i, j)
       pp1d(i, j) = pp2d(i, j)
       pp1(i, j) = pp2(i, j)
+      d2wall1d(i, j) = d2wall2d(i, j)
+      d2wall1(i, j) = d2wall2(i, j)
+      if (useroughsa) ks1(i, j) = ks2(i, j)
       if (viscous) then
         rlv1d(i, j) = rlv2d(i, j)
         rlv1(i, j) = rlv2(i, j)
@@ -258,8 +264,10 @@ subroutine bcsymm1sthalo(nn)
     use constants
     use blockpointers, only : bcdata
     use flowvarrefstate, only : viscous, eddymodel
+    use inputphysics, only : useroughsa
     use bcpointers_d, only : gamma1, gamma2, ww1, ww2, pp1, pp2, rlv1, &
-&   rlv2, istart, jstart, isize, jsize, rev1, rev2
+&   rlv2, istart, jstart, isize, jsize, rev1, rev2, d2wall1, d2wall2, &
+&   ks1, ks2
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -289,6 +297,8 @@ subroutine bcsymm1sthalo(nn)
 ! laminar and eddy viscosity in the halo.
       gamma1(i, j) = gamma2(i, j)
       pp1(i, j) = pp2(i, j)
+      d2wall1(i, j) = d2wall2(i, j)
+      if (useroughsa) ks1(i, j) = ks2(i, j)
       if (viscous) rlv1(i, j) = rlv2(i, j)
       if (eddymodel) rev1(i, j) = rev2(i, j)
     end do
@@ -297,12 +307,11 @@ end subroutine bcsymm1sthalo
 !  differentiation of bcsymm2ndhalo in forward (tangent) mode (with options i4 dr8 r8):
 !   variations   of useful results: *rev0 *pp0 *rlv0 *ww0
 !   with respect to varying inputs: *rev0 *rev3 *pp0 *pp3 *rlv0
-!                *rlv3 *ww0 *ww3 *(*bcdata.norm)
+!                *rlv3 *ww0 *ww3
 !   rw status of diff variables: *rev0:in-out *rev3:in *pp0:in-out
 !                *pp3:in *rlv0:in-out *rlv3:in *ww0:in-out *ww3:in
-!                *(*bcdata.norm):in
 !   plus diff mem management of: rev0:in rev3:in pp0:in pp3:in
-!                rlv0:in rlv3:in ww0:in ww3:in bcdata:in *bcdata.norm:in
+!                rlv0:in rlv3:in ww0:in ww3:in
   subroutine bcsymm2ndhalo_d(nn)
 !  bcsymm2ndhalo applies the symmetry boundary conditions to a
 !  block for the 2nd halo. this routine is separate as it makes
@@ -310,9 +319,11 @@ subroutine bcsymm2ndhalo_d(nn)
     use constants
     use blockpointers, only : bcdata, bcdatad
     use flowvarrefstate, only : viscous, eddymodel
+    use inputphysics, only : useroughsa
     use bcpointers_d, only : gamma0, gamma3, ww0, ww0d, ww3, ww3d, pp0, &
-&   pp0d, pp3, pp3d, rlv0, rlv0d, rlv3, rlv3d, rev0, rev0d, rev3, rev3d,&
-&   istart, jstart, isize, jsize
+&   pp0d, pp3, pp3d, rlv0, rlv0d, rlv3, rlv3d, d2wall0, d2wall0d, &
+&   d2wall3, d2wall3d, ks0, ks3, rev0, rev0d, rev3, rev3d, istart, &
+&   jstart, isize, jsize
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -332,27 +343,22 @@ subroutine bcsymm2ndhalo_d(nn)
       temp = bcdata(nn)%norm(i, j, 1)
       temp0 = bcdata(nn)%norm(i, j, 2)
       temp1 = bcdata(nn)%norm(i, j, 3)
-      vnd = two*(temp*ww3d(i, j, ivx)+ww3(i, j, ivx)*bcdatad(nn)%norm(i&
-&       , j, 1)+temp0*ww3d(i, j, ivy)+ww3(i, j, ivy)*bcdatad(nn)%norm(i&
-&       , j, 2)+temp1*ww3d(i, j, ivz)+ww3(i, j, ivz)*bcdatad(nn)%norm(i&
-&       , j, 3))
-      vn = two*(ww3(i, j, ivx)*temp+ww3(i, j, ivy)*temp0+ww3(i, j, ivz)*&
-&       temp1)
+      vnd = two*(temp*ww3d(i, j, ivx)+temp0*ww3d(i, j, ivy)+temp1*ww3d(i&
+&       , j, ivz))
+      vn = two*(temp*ww3(i, j, ivx)+temp0*ww3(i, j, ivy)+temp1*ww3(i, j&
+&       , ivz))
 ! determine the flow variables in the halo cell.
       ww0d(i, j, irho) = ww3d(i, j, irho)
       ww0(i, j, irho) = ww3(i, j, irho)
       temp1 = bcdata(nn)%norm(i, j, 1)
-      ww0d(i, j, ivx) = ww3d(i, j, ivx) - temp1*vnd - vn*bcdatad(nn)%&
-&       norm(i, j, 1)
-      ww0(i, j, ivx) = ww3(i, j, ivx) - vn*temp1
+      ww0d(i, j, ivx) = ww3d(i, j, ivx) - temp1*vnd
+      ww0(i, j, ivx) = ww3(i, j, ivx) - temp1*vn
       temp1 = bcdata(nn)%norm(i, j, 2)
-      ww0d(i, j, ivy) = ww3d(i, j, ivy) - temp1*vnd - vn*bcdatad(nn)%&
-&       norm(i, j, 2)
-      ww0(i, j, ivy) = ww3(i, j, ivy) - vn*temp1
+      ww0d(i, j, ivy) = ww3d(i, j, ivy) - temp1*vnd
+      ww0(i, j, ivy) = ww3(i, j, ivy) - temp1*vn
       temp1 = bcdata(nn)%norm(i, j, 3)
-      ww0d(i, j, ivz) = ww3d(i, j, ivz) - temp1*vnd - vn*bcdatad(nn)%&
-&       norm(i, j, 3)
-      ww0(i, j, ivz) = ww3(i, j, ivz) - vn*temp1
+      ww0d(i, j, ivz) = ww3d(i, j, ivz) - temp1*vnd
+      ww0(i, j, ivz) = ww3(i, j, ivz) - temp1*vn
       ww0d(i, j, irhoe) = ww3d(i, j, irhoe)
       ww0(i, j, irhoe) = ww3(i, j, irhoe)
 ! set the pressure and gamma and possibly the
@@ -360,6 +366,9 @@ subroutine bcsymm2ndhalo_d(nn)
       gamma0(i, j) = gamma3(i, j)
       pp0d(i, j) = pp3d(i, j)
       pp0(i, j) = pp3(i, j)
+      d2wall0d(i, j) = 0.0_8
+      d2wall0(i, j) = d2wall3(i, j)
+      if (useroughsa) ks0(i, j) = ks3(i, j)
       if (viscous) then
         rlv0d(i, j) = rlv3d(i, j)
         rlv0(i, j) = rlv3(i, j)
@@ -378,8 +387,10 @@ subroutine bcsymm2ndhalo(nn)
     use constants
     use blockpointers, only : bcdata
     use flowvarrefstate, only : viscous, eddymodel
+    use inputphysics, only : useroughsa
     use bcpointers_d, only : gamma0, gamma3, ww0, ww3, pp0, pp3, rlv0, &
-&   rlv3, rev0, rev3, istart, jstart, isize, jsize
+&   rlv3, d2wall0, d2wall3, ks0, ks3, rev0, rev3, istart, jstart, isize,&
+&   jsize
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -406,19 +417,23 @@ subroutine bcsymm2ndhalo(nn)
 ! laminar and eddy viscosity in the halo.
       gamma0(i, j) = gamma3(i, j)
       pp0(i, j) = pp3(i, j)
+      d2wall0(i, j) = d2wall3(i, j)
+      if (useroughsa) ks0(i, j) = ks3(i, j)
       if (viscous) rlv0(i, j) = rlv3(i, j)
       if (eddymodel) rev0(i, j) = rev3(i, j)
     end do
   end subroutine bcsymm2ndhalo
 
 !  differentiation of bcsymmpolar1sthalo in forward (tangent) mode (with options i4 dr8 r8):
-!   variations   of useful results: *rev1 *pp1 *rlv1 *ww1
-!   with respect to varying inputs: *xx *rev1 *rev2 *pp1 *pp2 *rlv1
-!                *rlv2 *ww1 *ww2
-!   rw status of diff variables: *xx:in *rev1:in-out *rev2:in *pp1:in-out
-!                *pp2:in *rlv1:in-out *rlv2:in *ww1:in-out *ww2:in
-!   plus diff mem management of: xx:in rev1:in rev2:in pp1:in pp2:in
-!                rlv1:in rlv2:in ww1:in ww2:in
+!   variations   of useful results: *rev1 *d2wall1 *pp1 *rlv1 *ww1
+!   with respect to varying inputs: *xx *rev1 *rev2 *d2wall1 *d2wall2
+!                *pp1 *pp2 *rlv1 *rlv2 *ww1 *ww2
+!   rw status of diff variables: *xx:in *rev1:in-out *rev2:in *d2wall1:in-out
+!                *d2wall2:in *pp1:in-out *pp2:in *rlv1:in-out *rlv2:in
+!                *ww1:in-out *ww2:in
+!   plus diff mem management of: xx:in rev1:in rev2:in d2wall1:in
+!                d2wall2:in pp1:in pp2:in rlv1:in rlv2:in ww1:in
+!                ww2:in
   subroutine bcsymmpolar1sthalo_d(nn)
 ! bcsymmpolar applies the polar symmetry boundary conditions to a
 ! singular line of a block. it is assumed that the pointers in
@@ -427,9 +442,10 @@ subroutine bcsymmpolar1sthalo_d(nn)
 ! case of a degenerate line, as this line is the axi-symmetric
 ! centerline. this routine does just the 1st level halo.
     use constants
+    use inputphysics, only : useroughsa
     use bcpointers_d, only : ww1, ww1d, ww2, ww2d, pp1, pp1d, pp2, pp2d,&
 &   rlv1, rlv1d, rlv2, rlv2d, rev1, rev1d, rev2, rev2d, xx, xxd, istart,&
-&   jstart, isize, jsize
+&   jstart, isize, jsize, d2wall1, d2wall1d, d2wall2, d2wall2d, ks1, ks2
     use flowvarrefstate, only : viscous, eddymodel
     implicit none
 ! subroutine arguments.
@@ -514,6 +530,9 @@ subroutine bcsymmpolar1sthalo_d(nn)
 ! eddy viscosity in the halo.
       pp1d(i, j) = pp2d(i, j)
       pp1(i, j) = pp2(i, j)
+      d2wall1d(i, j) = d2wall2d(i, j)
+      d2wall1(i, j) = d2wall2(i, j)
+      if (useroughsa) ks1(i, j) = ks2(i, j)
       if (viscous) then
         rlv1d(i, j) = rlv2d(i, j)
         rlv1(i, j) = rlv2(i, j)
@@ -533,8 +552,9 @@ subroutine bcsymmpolar1sthalo(nn)
 ! case of a degenerate line, as this line is the axi-symmetric
 ! centerline. this routine does just the 1st level halo.
     use constants
+    use inputphysics, only : useroughsa
     use bcpointers_d, only : ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2,&
-&   xx, istart, jstart, isize, jsize
+&   xx, istart, jstart, isize, jsize, d2wall1, d2wall2, ks1, ks2
     use flowvarrefstate, only : viscous, eddymodel
     implicit none
 ! subroutine arguments.
@@ -590,19 +610,23 @@ subroutine bcsymmpolar1sthalo(nn)
 ! set the pressure and possibly the laminar and
 ! eddy viscosity in the halo.
       pp1(i, j) = pp2(i, j)
+      d2wall1(i, j) = d2wall2(i, j)
+      if (useroughsa) ks1(i, j) = ks2(i, j)
       if (viscous) rlv1(i, j) = rlv2(i, j)
       if (eddymodel) rev1(i, j) = rev2(i, j)
     end do
   end subroutine bcsymmpolar1sthalo
 
 !  differentiation of bcsymmpolar2ndhalo in forward (tangent) mode (with options i4 dr8 r8):
-!   variations   of useful results: *rev0 *pp0 *rlv0 *ww0
-!   with respect to varying inputs: *xx *rev0 *rev3 *pp0 *pp3 *rlv0
-!                *rlv3 *ww0 *ww3
-!   rw status of diff variables: *xx:in *rev0:in-out *rev3:in *pp0:in-out
-!                *pp3:in *rlv0:in-out *rlv3:in *ww0:in-out *ww3:in
-!   plus diff mem management of: xx:in rev0:in rev3:in pp0:in pp3:in
-!                rlv0:in rlv3:in ww0:in ww3:in
+!   variations   of useful results: *rev0 *d2wall0 *pp0 *rlv0 *ww0
+!   with respect to varying inputs: *xx *rev0 *d2wall0 *rev3 *d2wall3
+!                *pp0 *pp3 *rlv0 *rlv3 *ww0 *ww3
+!   rw status of diff variables: *xx:in *rev0:in-out *d2wall0:in-out
+!                *rev3:in *d2wall3:in *pp0:in-out *pp3:in *rlv0:in-out
+!                *rlv3:in *ww0:in-out *ww3:in
+!   plus diff mem management of: xx:in rev0:in d2wall0:in rev3:in
+!                d2wall3:in pp0:in pp3:in rlv0:in rlv3:in ww0:in
+!                ww3:in
   subroutine bcsymmpolar2ndhalo_d(nn)
 ! bcsymmpolar applies the polar symmetry boundary conditions to a
 ! singular line of a block. it is assumed that the pointers in
@@ -611,9 +635,11 @@ subroutine bcsymmpolar2ndhalo_d(nn)
 ! case of a degenerate line, as this line is the axi-symmetric
 ! centerline. this routine does just the 2nd level halo.
     use constants
+    use inputphysics, only : useroughsa
     use bcpointers_d, only : ww0, ww0d, ww3, ww3d, pp0, pp0d, pp3, pp3d,&
-&   rlv0, rlv0d, rlv3, rlv3d, rev0, rev0d, rev3, rev3d, xx, xxd, istart,&
-&   jstart, isize, jsize
+&   rlv0, rlv0d, rlv3, rlv3d, rev0, rev0d, rev3, rev3d, d2wall0, &
+&   d2wall0d, d2wall3, d2wall3d, ks0, ks3, xx, xxd, istart, jstart, &
+&   isize, jsize
     use flowvarrefstate, only : viscous, eddymodel
     implicit none
 ! subroutine arguments.
@@ -696,6 +722,9 @@ subroutine bcsymmpolar2ndhalo_d(nn)
 ! eddy viscosity in the halo.
       pp0d(i, j) = pp3d(i, j)
       pp0(i, j) = pp3(i, j)
+      d2wall0d(i, j) = d2wall3d(i, j)
+      d2wall0(i, j) = d2wall3(i, j)
+      if (useroughsa) ks0(i, j) = ks3(i, j)
       if (viscous) then
         rlv0d(i, j) = rlv3d(i, j)
         rlv0(i, j) = rlv3(i, j)
@@ -715,8 +744,9 @@ subroutine bcsymmpolar2ndhalo(nn)
 ! case of a degenerate line, as this line is the axi-symmetric
 ! centerline. this routine does just the 2nd level halo.
     use constants
+    use inputphysics, only : useroughsa
     use bcpointers_d, only : ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3,&
-&   xx, istart, jstart, isize, jsize
+&   d2wall0, d2wall3, ks0, ks3, xx, istart, jstart, isize, jsize
     use flowvarrefstate, only : viscous, eddymodel
     implicit none
 ! subroutine arguments.
@@ -770,6 +800,8 @@ subroutine bcsymmpolar2ndhalo(nn)
 ! set the pressure and possibly the laminar and
 ! eddy viscosity in the halo.
       pp0(i, j) = pp3(i, j)
+      d2wall0(i, j) = d2wall3(i, j)
+      if (useroughsa) ks0(i, j) = ks3(i, j)
       if (viscous) rlv0(i, j) = rlv3(i, j)
       if (eddymodel) rev0(i, j) = rev3(i, j)
     end do
diff --git a/src/adjoint/outputForward/turbBCRoutines_d.f90 b/src/adjoint/outputForward/turbBCRoutines_d.f90
index a139ac136..758cddfd9 100644
--- a/src/adjoint/outputForward/turbBCRoutines_d.f90
+++ b/src/adjoint/outputForward/turbBCRoutines_d.f90
@@ -8,12 +8,12 @@ module turbbcroutines_d
 !  differentiation of applyallturbbcthisblock in forward (tangent) mode (with options i4 dr8 r8):
 !   variations   of useful results: *rev *w
 !   with respect to varying inputs: *rev *bvtj1 *bvtj2 *w *bmtk1
-!                *bmtk2 *bvtk1 *bvtk2 *bmti1 *bmti2 *bvti1 *bvti2
-!                *bmtj1 *bmtj2
+!                *bmtk2 *bvtk1 *bvtk2 *d2wall *bmti1 *bmti2 *bvti1
+!                *bvti2 *bmtj1 *bmtj2
 !   rw status of diff variables: *rev:in-out *bvtj1:in *bvtj2:in
 !                *w:in-out *bmtk1:in *bmtk2:in *bvtk1:in *bvtk2:in
-!                *bmti1:in *bmti2:in *bvti1:in *bvti2:in *bmtj1:in
-!                *bmtj2:in
+!                *d2wall:in *bmti1:in *bmti2:in *bvti1:in *bvti2:in
+!                *bmtj1:in *bmtj2:in
 !   plus diff mem management of: rev:in bvtj1:in bvtj2:in w:in
 !                bmtk1:in bmtk2:in bvtk1:in bvtk2:in d2wall:in
 !                bmti1:in bmti2:in bvti1:in bvti2:in bmtj1:in bmtj2:in
@@ -421,7 +421,7 @@ end subroutine bceddynowall
 
 !  differentiation of bceddywall in forward (tangent) mode (with options i4 dr8 r8):
 !   variations   of useful results: *rev
-!   with respect to varying inputs: *rev
+!   with respect to varying inputs: *rev *d2wall
 !   plus diff mem management of: rev:in d2wall:in
   subroutine bceddywall_d(nn)
 !
@@ -442,6 +442,7 @@ subroutine bceddywall_d(nn)
 !
     integer(kind=inttype) :: i, j
     real(kind=realtype) :: fact
+    real(kind=realtype) :: factd
 ! determine the face id on which the subface is located and
 ! loop over the faces of the subface and set the eddy viscosity
 ! in the halo cells.
@@ -449,48 +450,48 @@ subroutine bceddywall_d(nn)
     case (imin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call saroughfact(2, i, j, fact)
-          revd(1, i, j) = fact*revd(2, i, j)
+          call saroughfact_d(2, i, j, fact, factd)
+          revd(1, i, j) = rev(2, i, j)*factd + fact*revd(2, i, j)
           rev(1, i, j) = fact*rev(2, i, j)
         end do
       end do
     case (imax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call saroughfact(il, i, j, fact)
-          revd(ie, i, j) = fact*revd(il, i, j)
+          call saroughfact_d(il, i, j, fact, factd)
+          revd(ie, i, j) = rev(il, i, j)*factd + fact*revd(il, i, j)
           rev(ie, i, j) = fact*rev(il, i, j)
         end do
       end do
     case (jmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call saroughfact(i, 2, j, fact)
-          revd(i, 1, j) = fact*revd(i, 2, j)
+          call saroughfact_d(i, 2, j, fact, factd)
+          revd(i, 1, j) = rev(i, 2, j)*factd + fact*revd(i, 2, j)
           rev(i, 1, j) = fact*rev(i, 2, j)
         end do
       end do
     case (jmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call saroughfact(i, jl, j, fact)
-          revd(i, je, j) = fact*revd(i, jl, j)
+          call saroughfact_d(i, jl, j, fact, factd)
+          revd(i, je, j) = rev(i, jl, j)*factd + fact*revd(i, jl, j)
           rev(i, je, j) = fact*rev(i, jl, j)
         end do
       end do
     case (kmin) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call saroughfact(i, j, 2, fact)
-          revd(i, j, 1) = fact*revd(i, j, 2)
+          call saroughfact_d(i, j, 2, fact, factd)
+          revd(i, j, 1) = rev(i, j, 2)*factd + fact*revd(i, j, 2)
           rev(i, j, 1) = fact*rev(i, j, 2)
         end do
       end do
     case (kmax) 
       do j=bcdata(nn)%jcbeg,bcdata(nn)%jcend
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
-          call saroughfact(i, j, kl, fact)
-          revd(i, j, ke) = fact*revd(i, j, kl)
+          call saroughfact_d(i, j, kl, fact, factd)
+          revd(i, j, ke) = rev(i, j, kl)*factd + fact*revd(i, j, kl)
           rev(i, j, ke) = fact*rev(i, j, kl)
         end do
       end do
@@ -1054,11 +1055,13 @@ end subroutine bcturbsymm
 !   variations   of useful results: *bvtj1 *bvtj2 *bmtk1 *bmtk2
 !                *bvtk1 *bvtk2 *bmti1 *bmti2 *bvti1 *bvti2 *bmtj1
 !                *bmtj2
-!   with respect to varying inputs: winf *w *rlv *d2wall
-!   rw status of diff variables: winf:in *bvtj1:out *bvtj2:out
-!                *w:in *bmtk1:out *rlv:in *bmtk2:out *bvtk1:out
-!                *bvtk2:out *d2wall:in *bmti1:out *bmti2:out *bvti1:out
-!                *bvti2:out *bmtj1:out *bmtj2:out
+!   with respect to varying inputs: winf *bvtj1 *bvtj2 *w *bmtk1
+!                *rlv *bmtk2 *bvtk1 *bvtk2 *d2wall *bmti1 *bmti2
+!                *bvti1 *bvti2 *bmtj1 *bmtj2
+!   rw status of diff variables: winf:in *bvtj1:in-out *bvtj2:in-out
+!                *w:in *bmtk1:in-out *rlv:in *bmtk2:in-out *bvtk1:in-out
+!                *bvtk2:in-out *d2wall:in *bmti1:in-out *bmti2:in-out
+!                *bvti1:in-out *bvti2:in-out *bmtj1:in-out *bmtj2:in-out
 !   plus diff mem management of: bvtj1:in bvtj2:in w:in bmtk1:in
 !                rlv:in bmtk2:in bvtk1:in bvtk2:in d2wall:in bmti1:in
 !                bmti2:in bvti1:in bvti2:in bmtj1:in bmtj2:in
@@ -1134,18 +1137,6 @@ subroutine bcturbtreatment_d()
         end do
       end do
     end do
-    if (associated(bvtj1d)) bvtj1d = 0.0_8
-    if (associated(bvtj2d)) bvtj2d = 0.0_8
-    if (associated(bmtk1d)) bmtk1d = 0.0_8
-    if (associated(bmtk2d)) bmtk2d = 0.0_8
-    if (associated(bvtk1d)) bvtk1d = 0.0_8
-    if (associated(bvtk2d)) bvtk2d = 0.0_8
-    if (associated(bmti1d)) bmti1d = 0.0_8
-    if (associated(bmti2d)) bmti2d = 0.0_8
-    if (associated(bvti1d)) bvti1d = 0.0_8
-    if (associated(bvti2d)) bvti2d = 0.0_8
-    if (associated(bmtj1d)) bmtj1d = 0.0_8
-    if (associated(bmtj2d)) bmtj2d = 0.0_8
 ! loop over the boundary condition subfaces of this block.
 bocos:do nn=1,nbocos
 ! determine the kind of boundary condition for this subface.
@@ -2401,19 +2392,19 @@ subroutine saroughfact_d(i, j, k, fact, factd)
       fact = -one
       factd = 0.0_8
       return
-    else if (((((i .lt. 2 .or. i .gt. il) .or. j .lt. 2) .or. j .gt. jl)&
-&       .or. k .lt. 2) .or. k .gt. kl) then
-! we need the distance to the wall, but this is not available for halo-cells, thus we simply return 
-! the regular sa-boundary condition
-      fact = -one
-      factd = 0.0_8
-      return
     else
       temp = ks(i, j, k) + d2wall(i, j, k)/0.03_realtype
       temp0 = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/temp
       factd = -((1.0/0.03_realtype+temp0/0.03_realtype)*d2walld(i, j, k)&
 &       /temp)
       fact = temp0
+      if (ks(i, j, k) .eq. 0.01 .or. d2wall(i, j, k) .eq. 0.01) print*, &
+&                                                               i, j, k&
+&                                                               , fact, &
+&                                                               d2wall(i&
+&                                                               , j, k)&
+&                                                               , ks(i, &
+&                                                               j, k)
     end if
   end subroutine saroughfact_d
 
@@ -2430,15 +2421,16 @@ subroutine saroughfact(i, j, k, fact)
     if (.not.useroughsa) then
       fact = -one
       return
-    else if (((((i .lt. 2 .or. i .gt. il) .or. j .lt. 2) .or. j .gt. jl)&
-&       .or. k .lt. 2) .or. k .gt. kl) then
-! we need the distance to the wall, but this is not available for halo-cells, thus we simply return 
-! the regular sa-boundary condition
-      fact = -one
-      return
     else
       fact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j, k)+&
 &       d2wall(i, j, k)/0.03_realtype)
+      if (ks(i, j, k) .eq. 0.01 .or. d2wall(i, j, k) .eq. 0.01) print*, &
+&                                                               i, j, k&
+&                                                               , fact, &
+&                                                               d2wall(i&
+&                                                               , j, k)&
+&                                                               , ks(i, &
+&                                                               j, k)
     end if
   end subroutine saroughfact
 
diff --git a/src/adjoint/outputForward/utils_d.f90 b/src/adjoint/outputForward/utils_d.f90
index ac9f6f0e4..09821228d 100644
--- a/src/adjoint/outputForward/utils_d.f90
+++ b/src/adjoint/outputForward/utils_d.f90
@@ -732,12 +732,13 @@ subroutine setbcpointers(nn, spatialpointers)
     use blockpointers, only : w, p, rlv, rev, gamma, x, d2wall, si, sj&
 &   , sk, s, globalcell, bcdata, nx, il, ie, ib, ny, jl, je, jb, nz, kl,&
 &   ke, kb, bcfaceid, addgridvelocities, sfacei, sfacej, sfacek, &
-&   addgridvelocities
+&   addgridvelocities, d2wall, ks
     use bcpointers_d, only : ww0, ww1, ww2, ww3, pp0, pp1, pp2, pp3, &
 &   rlv0, rlv1, rlv2, rlv3, rev0, rev1, rev2, rev3, gamma0, gamma1, &
 &   gamma2, gamma3, gcp, xx, ss, ssi, ssj, ssk, dd2wall, sface, istart, &
-&   iend, jstart, jend, isize, jsize
-    use inputphysics, only : cpmodel, equations
+&   iend, jstart, jend, isize, jsize, d2wall0, d2wall1, d2wall2, d2wall3&
+&   , ks0, ks1, ks2, ks3
+    use inputphysics, only : cpmodel, equations, useroughsa
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -772,6 +773,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(2, 1:, 1:)
       rev1 => rev(1, 1:, 1:)
       rev0 => rev(0, 1:, 1:)
+      d2wall3 => d2wall(3, 1:, 1:)
+      d2wall2 => d2wall(2, 1:, 1:)
+      d2wall1 => d2wall(1, 1:, 1:)
+      d2wall0 => d2wall(0, 1:, 1:)
+      if (useroughsa) then
+        ks3 => ks(3, 1:, 1:)
+        ks2 => ks(2, 1:, 1:)
+        ks1 => ks(1, 1:, 1:)
+        ks0 => ks(0, 1:, 1:)
+      end if
       gamma3 => gamma(3, 1:, 1:)
       gamma2 => gamma(2, 1:, 1:)
       gamma1 => gamma(1, 1:, 1:)
@@ -795,6 +806,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(il, 1:, 1:)
       rev1 => rev(ie, 1:, 1:)
       rev0 => rev(ib, 1:, 1:)
+      d2wall3 => d2wall(nx, 1:, 1:)
+      d2wall2 => d2wall(il, 1:, 1:)
+      d2wall1 => d2wall(ie, 1:, 1:)
+      d2wall0 => d2wall(ib, 1:, 1:)
+      if (useroughsa) then
+        ks3 => ks(nx, 1:, 1:)
+        ks2 => ks(il, 1:, 1:)
+        ks1 => ks(ie, 1:, 1:)
+        ks0 => ks(ib, 1:, 1:)
+      end if
       gamma3 => gamma(nx, 1:, 1:)
       gamma2 => gamma(il, 1:, 1:)
       gamma1 => gamma(ie, 1:, 1:)
@@ -818,6 +839,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, 2, 1:)
       rev1 => rev(1:, 1, 1:)
       rev0 => rev(1:, 0, 1:)
+      d2wall3 => d2wall(1:, 3, 1:)
+      d2wall2 => d2wall(1:, 2, 1:)
+      d2wall1 => d2wall(1:, 1, 1:)
+      d2wall0 => d2wall(1:, 0, 1:)
+      if (useroughsa) then
+        ks3 => ks(1:, 3, 1:)
+        ks2 => ks(1:, 2, 1:)
+        ks1 => ks(1:, 1, 1:)
+        ks0 => ks(1:, 0, 1:)
+      end if
       gamma3 => gamma(1:, 3, 1:)
       gamma2 => gamma(1:, 2, 1:)
       gamma1 => gamma(1:, 1, 1:)
@@ -841,6 +872,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, jl, 1:)
       rev1 => rev(1:, je, 1:)
       rev0 => rev(1:, jb, 1:)
+      d2wall3 => d2wall(1:, ny, 1:)
+      d2wall2 => d2wall(1:, jl, 1:)
+      d2wall1 => d2wall(1:, je, 1:)
+      d2wall0 => d2wall(1:, jb, 1:)
+      if (useroughsa) then
+        ks3 => ks(1:, ny, 1:)
+        ks2 => ks(1:, jl, 1:)
+        ks1 => ks(1:, je, 1:)
+        ks0 => ks(1:, jb, 1:)
+      end if
       gamma3 => gamma(1:, ny, 1:)
       gamma2 => gamma(1:, jl, 1:)
       gamma1 => gamma(1:, je, 1:)
@@ -864,6 +905,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, 1:, 2)
       rev1 => rev(1:, 1:, 1)
       rev0 => rev(1:, 1:, 0)
+      d2wall3 => d2wall(1:, 1:, 3)
+      d2wall2 => d2wall(1:, 1:, 2)
+      d2wall1 => d2wall(1:, 1:, 1)
+      d2wall0 => d2wall(1:, 1:, 0)
+      if (useroughsa) then
+        ks3 => ks(1:, 1:, 3)
+        ks2 => ks(1:, 1:, 2)
+        ks1 => ks(1:, 1:, 1)
+        ks0 => ks(1:, 1:, 0)
+      end if
       gamma3 => gamma(1:, 1:, 3)
       gamma2 => gamma(1:, 1:, 2)
       gamma1 => gamma(1:, 1:, 1)
@@ -887,6 +938,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, 1:, kl)
       rev1 => rev(1:, 1:, ke)
       rev0 => rev(1:, 1:, kb)
+      d2wall3 => d2wall(1:, 1:, nz)
+      d2wall2 => d2wall(1:, 1:, kl)
+      d2wall1 => d2wall(1:, 1:, ke)
+      d2wall0 => d2wall(1:, 1:, kb)
+      if (useroughsa) then
+        ks3 => ks(1:, 1:, nz)
+        ks2 => ks(1:, 1:, kl)
+        ks1 => ks(1:, 1:, ke)
+        ks0 => ks(1:, 1:, kb)
+      end if
       gamma3 => gamma(1:, 1:, nz)
       gamma2 => gamma(1:, 1:, kl)
       gamma1 => gamma(1:, 1:, ke)
diff --git a/src/adjoint/outputReverse/BCRoutines_b.f90 b/src/adjoint/outputReverse/BCRoutines_b.f90
index cdbf2ebf7..e8ed35f16 100644
--- a/src/adjoint/outputReverse/BCRoutines_b.f90
+++ b/src/adjoint/outputReverse/BCRoutines_b.f90
@@ -156,15 +156,16 @@ subroutine applyallbc_block(secondhalo)
   end subroutine applyallbc_block
 
 !  differentiation of bcsymm1sthalo in reverse (adjoint) mode (with options noisize i4 dr8 r8):
-!   gradient     of useful results: *rev1 *rev2 *pp1 *pp2 *rlv1
-!                *rlv2 *ww1 *ww2 *(*bcdata.norm)
-!   with respect to varying inputs: *rev1 *rev2 *pp1 *pp2 *rlv1
-!                *rlv2 *ww1 *ww2 *(*bcdata.norm)
-!   rw status of diff variables: *rev1:in-out *rev2:incr *pp1:in-out
-!                *pp2:incr *rlv1:in-out *rlv2:incr *ww1:in-out
-!                *ww2:incr *(*bcdata.norm):incr
-!   plus diff mem management of: rev1:in rev2:in pp1:in pp2:in
-!                rlv1:in rlv2:in ww1:in ww2:in bcdata:in *bcdata.norm:in
+!   gradient     of useful results: *rev1 *rev2 *d2wall1 *d2wall2
+!                *pp1 *pp2 *rlv1 *rlv2 *ww1 *ww2 *(*bcdata.norm)
+!   with respect to varying inputs: *rev1 *rev2 *d2wall1 *d2wall2
+!                *pp1 *pp2 *rlv1 *rlv2 *ww1 *ww2 *(*bcdata.norm)
+!   rw status of diff variables: *rev1:in-out *rev2:incr *d2wall1:in-out
+!                *d2wall2:incr *pp1:in-out *pp2:incr *rlv1:in-out
+!                *rlv2:incr *ww1:in-out *ww2:incr *(*bcdata.norm):incr
+!   plus diff mem management of: rev1:in rev2:in d2wall1:in d2wall2:in
+!                pp1:in pp2:in rlv1:in rlv2:in ww1:in ww2:in bcdata:in
+!                *bcdata.norm:in
 ! ===================================================================
 !   actual implementation of each of the boundary condition routines
 ! ===================================================================
@@ -180,9 +181,11 @@ subroutine bcsymm1sthalo_b(nn)
     use constants
     use blockpointers, only : bcdata, bcdatad
     use flowvarrefstate, only : viscous, eddymodel
+    use inputphysics, only : useroughsa
     use bcpointers_b, only : gamma1, gamma2, ww1, ww1d, ww2, ww2d, pp1, &
 &   pp1d, pp2, pp2d, rlv1, rlv1d, rlv2, rlv2d, istart, jstart, isize, &
-&   jsize, rev1, rev1d, rev2, rev2d
+&   jsize, rev1, rev1d, rev2, rev2d, d2wall1, d2wall1d, d2wall2, &
+&   d2wall2d, ks1, ks2
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -220,6 +223,8 @@ subroutine bcsymm1sthalo_b(nn)
         rlv2d(i, j) = rlv2d(i, j) + rlv1d(i, j)
         rlv1d(i, j) = 0.0_8
       end if
+      d2wall2d(i, j) = d2wall2d(i, j) + d2wall1d(i, j)
+      d2wall1d(i, j) = 0.0_8
       pp2d(i, j) = pp2d(i, j) + pp1d(i, j)
       pp1d(i, j) = 0.0_8
       ww2d(i, j, irhoe) = ww2d(i, j, irhoe) + ww1d(i, j, irhoe)
@@ -267,8 +272,10 @@ subroutine bcsymm1sthalo(nn)
     use constants
     use blockpointers, only : bcdata
     use flowvarrefstate, only : viscous, eddymodel
+    use inputphysics, only : useroughsa
     use bcpointers_b, only : gamma1, gamma2, ww1, ww2, pp1, pp2, rlv1, &
-&   rlv2, istart, jstart, isize, jsize, rev1, rev2
+&   rlv2, istart, jstart, isize, jsize, rev1, rev2, d2wall1, d2wall2, &
+&   ks1, ks2
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -298,6 +305,8 @@ subroutine bcsymm1sthalo(nn)
 ! laminar and eddy viscosity in the halo.
       gamma1(i, j) = gamma2(i, j)
       pp1(i, j) = pp2(i, j)
+      d2wall1(i, j) = d2wall2(i, j)
+      if (useroughsa) ks1(i, j) = ks2(i, j)
       if (viscous) rlv1(i, j) = rlv2(i, j)
       if (eddymodel) rev1(i, j) = rev2(i, j)
     end do
@@ -305,14 +314,14 @@ end subroutine bcsymm1sthalo
 
 !  differentiation of bcsymm2ndhalo in reverse (adjoint) mode (with options noisize i4 dr8 r8):
 !   gradient     of useful results: *rev0 *rev3 *pp0 *pp3 *rlv0
-!                *rlv3 *ww0 *ww3 *(*bcdata.norm)
+!                *rlv3 *ww0 *ww3
 !   with respect to varying inputs: *rev0 *rev3 *pp0 *pp3 *rlv0
-!                *rlv3 *ww0 *ww3 *(*bcdata.norm)
+!                *rlv3 *ww0 *ww3
 !   rw status of diff variables: *rev0:in-out *rev3:incr *pp0:in-out
 !                *pp3:incr *rlv0:in-out *rlv3:incr *ww0:in-out
-!                *ww3:incr *(*bcdata.norm):incr
+!                *ww3:incr
 !   plus diff mem management of: rev0:in rev3:in pp0:in pp3:in
-!                rlv0:in rlv3:in ww0:in ww3:in bcdata:in *bcdata.norm:in
+!                rlv0:in rlv3:in ww0:in ww3:in
   subroutine bcsymm2ndhalo_b(nn)
 !  bcsymm2ndhalo applies the symmetry boundary conditions to a
 !  block for the 2nd halo. this routine is separate as it makes
@@ -320,9 +329,11 @@ subroutine bcsymm2ndhalo_b(nn)
     use constants
     use blockpointers, only : bcdata, bcdatad
     use flowvarrefstate, only : viscous, eddymodel
+    use inputphysics, only : useroughsa
     use bcpointers_b, only : gamma0, gamma3, ww0, ww0d, ww3, ww3d, pp0, &
-&   pp0d, pp3, pp3d, rlv0, rlv0d, rlv3, rlv3d, rev0, rev0d, rev3, rev3d,&
-&   istart, jstart, isize, jsize
+&   pp0d, pp3, pp3d, rlv0, rlv0d, rlv3, rlv3d, d2wall0, d2wall0d, &
+&   d2wall3, d2wall3d, ks0, ks3, rev0, rev0d, rev3, rev3d, istart, &
+&   jstart, isize, jsize
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -337,9 +348,6 @@ subroutine bcsymm2ndhalo_b(nn)
     do ii=0,isize*jsize-1
       i = mod(ii, isize) + istart
       j = ii/isize + jstart
-      vn = two*(ww3(i, j, ivx)*bcdata(nn)%norm(i, j, 1)+ww3(i, j, ivy)*&
-&       bcdata(nn)%norm(i, j, 2)+ww3(i, j, ivz)*bcdata(nn)%norm(i, j, 3)&
-&       )
 ! determine the flow variables in the halo cell.
 ! set the pressure and gamma and possibly the
 ! laminar and eddy viscosity in the halo.
@@ -363,29 +371,19 @@ subroutine bcsymm2ndhalo_b(nn)
       ww0d(i, j, irhoe) = 0.0_8
       ww3d(i, j, ivz) = ww3d(i, j, ivz) + ww0d(i, j, ivz)
       vnd = -(bcdata(nn)%norm(i, j, 3)*ww0d(i, j, ivz))
-      bcdatad(nn)%norm(i, j, 3) = bcdatad(nn)%norm(i, j, 3) - vn*ww0d(i&
-&       , j, ivz)
       ww0d(i, j, ivz) = 0.0_8
       ww3d(i, j, ivy) = ww3d(i, j, ivy) + ww0d(i, j, ivy)
       vnd = vnd - bcdata(nn)%norm(i, j, 2)*ww0d(i, j, ivy)
-      bcdatad(nn)%norm(i, j, 2) = bcdatad(nn)%norm(i, j, 2) - vn*ww0d(i&
-&       , j, ivy)
       ww0d(i, j, ivy) = 0.0_8
       ww3d(i, j, ivx) = ww3d(i, j, ivx) + ww0d(i, j, ivx)
       vnd = vnd - bcdata(nn)%norm(i, j, 1)*ww0d(i, j, ivx)
-      tempd = two*vnd
-      bcdatad(nn)%norm(i, j, 1) = bcdatad(nn)%norm(i, j, 1) + ww3(i, j, &
-&       ivx)*tempd - vn*ww0d(i, j, ivx)
       ww0d(i, j, ivx) = 0.0_8
       ww3d(i, j, irho) = ww3d(i, j, irho) + ww0d(i, j, irho)
       ww0d(i, j, irho) = 0.0_8
+      tempd = two*vnd
       ww3d(i, j, ivx) = ww3d(i, j, ivx) + bcdata(nn)%norm(i, j, 1)*tempd
       ww3d(i, j, ivy) = ww3d(i, j, ivy) + bcdata(nn)%norm(i, j, 2)*tempd
-      bcdatad(nn)%norm(i, j, 2) = bcdatad(nn)%norm(i, j, 2) + ww3(i, j, &
-&       ivy)*tempd
       ww3d(i, j, ivz) = ww3d(i, j, ivz) + bcdata(nn)%norm(i, j, 3)*tempd
-      bcdatad(nn)%norm(i, j, 3) = bcdatad(nn)%norm(i, j, 3) + ww3(i, j, &
-&       ivz)*tempd
     end do
   end subroutine bcsymm2ndhalo_b
 
@@ -396,8 +394,10 @@ subroutine bcsymm2ndhalo(nn)
     use constants
     use blockpointers, only : bcdata
     use flowvarrefstate, only : viscous, eddymodel
+    use inputphysics, only : useroughsa
     use bcpointers_b, only : gamma0, gamma3, ww0, ww3, pp0, pp3, rlv0, &
-&   rlv3, rev0, rev3, istart, jstart, isize, jsize
+&   rlv3, d2wall0, d2wall3, ks0, ks3, rev0, rev3, istart, jstart, isize,&
+&   jsize
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -424,21 +424,24 @@ subroutine bcsymm2ndhalo(nn)
 ! laminar and eddy viscosity in the halo.
       gamma0(i, j) = gamma3(i, j)
       pp0(i, j) = pp3(i, j)
+      d2wall0(i, j) = d2wall3(i, j)
+      if (useroughsa) ks0(i, j) = ks3(i, j)
       if (viscous) rlv0(i, j) = rlv3(i, j)
       if (eddymodel) rev0(i, j) = rev3(i, j)
     end do
   end subroutine bcsymm2ndhalo
 
 !  differentiation of bcsymmpolar1sthalo in reverse (adjoint) mode (with options noisize i4 dr8 r8):
-!   gradient     of useful results: *xx *rev1 *rev2 *pp1 *pp2 *rlv1
-!                *rlv2 *ww1 *ww2
-!   with respect to varying inputs: *xx *rev1 *rev2 *pp1 *pp2 *rlv1
-!                *rlv2 *ww1 *ww2
+!   gradient     of useful results: *xx *rev1 *rev2 *d2wall1 *d2wall2
+!                *pp1 *pp2 *rlv1 *rlv2 *ww1 *ww2
+!   with respect to varying inputs: *xx *rev1 *rev2 *d2wall1 *d2wall2
+!                *pp1 *pp2 *rlv1 *rlv2 *ww1 *ww2
 !   rw status of diff variables: *xx:incr *rev1:in-out *rev2:incr
-!                *pp1:in-out *pp2:incr *rlv1:in-out *rlv2:incr
-!                *ww1:in-out *ww2:incr
-!   plus diff mem management of: xx:in rev1:in rev2:in pp1:in pp2:in
-!                rlv1:in rlv2:in ww1:in ww2:in
+!                *d2wall1:in-out *d2wall2:incr *pp1:in-out *pp2:incr
+!                *rlv1:in-out *rlv2:incr *ww1:in-out *ww2:incr
+!   plus diff mem management of: xx:in rev1:in rev2:in d2wall1:in
+!                d2wall2:in pp1:in pp2:in rlv1:in rlv2:in ww1:in
+!                ww2:in
   subroutine bcsymmpolar1sthalo_b(nn)
 ! bcsymmpolar applies the polar symmetry boundary conditions to a
 ! singular line of a block. it is assumed that the pointers in
@@ -447,9 +450,10 @@ subroutine bcsymmpolar1sthalo_b(nn)
 ! case of a degenerate line, as this line is the axi-symmetric
 ! centerline. this routine does just the 1st level halo.
     use constants
+    use inputphysics, only : useroughsa
     use bcpointers_b, only : ww1, ww1d, ww2, ww2d, pp1, pp1d, pp2, pp2d,&
 &   rlv1, rlv1d, rlv2, rlv2d, rev1, rev1d, rev2, rev2d, xx, xxd, istart,&
-&   jstart, isize, jsize
+&   jstart, isize, jsize, d2wall1, d2wall1d, d2wall2, d2wall2d, ks1, ks2
     use flowvarrefstate, only : viscous, eddymodel
     implicit none
 ! subroutine arguments.
@@ -514,6 +518,8 @@ subroutine bcsymmpolar1sthalo_b(nn)
         rlv2d(i, j) = rlv2d(i, j) + rlv1d(i, j)
         rlv1d(i, j) = 0.0_8
       end if
+      d2wall2d(i, j) = d2wall2d(i, j) + d2wall1d(i, j)
+      d2wall1d(i, j) = 0.0_8
       pp2d(i, j) = pp2d(i, j) + pp1d(i, j)
       pp1d(i, j) = 0.0_8
       ww2d(i, j, irhoe) = ww2d(i, j, irhoe) + ww1d(i, j, irhoe)
@@ -569,8 +575,9 @@ subroutine bcsymmpolar1sthalo(nn)
 ! case of a degenerate line, as this line is the axi-symmetric
 ! centerline. this routine does just the 1st level halo.
     use constants
+    use inputphysics, only : useroughsa
     use bcpointers_b, only : ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2,&
-&   xx, istart, jstart, isize, jsize
+&   xx, istart, jstart, isize, jsize, d2wall1, d2wall2, ks1, ks2
     use flowvarrefstate, only : viscous, eddymodel
     implicit none
 ! subroutine arguments.
@@ -622,21 +629,24 @@ subroutine bcsymmpolar1sthalo(nn)
 ! set the pressure and possibly the laminar and
 ! eddy viscosity in the halo.
       pp1(i, j) = pp2(i, j)
+      d2wall1(i, j) = d2wall2(i, j)
+      if (useroughsa) ks1(i, j) = ks2(i, j)
       if (viscous) rlv1(i, j) = rlv2(i, j)
       if (eddymodel) rev1(i, j) = rev2(i, j)
     end do
   end subroutine bcsymmpolar1sthalo
 
 !  differentiation of bcsymmpolar2ndhalo in reverse (adjoint) mode (with options noisize i4 dr8 r8):
-!   gradient     of useful results: *xx *rev0 *rev3 *pp0 *pp3 *rlv0
-!                *rlv3 *ww0 *ww3
-!   with respect to varying inputs: *xx *rev0 *rev3 *pp0 *pp3 *rlv0
-!                *rlv3 *ww0 *ww3
-!   rw status of diff variables: *xx:incr *rev0:in-out *rev3:incr
-!                *pp0:in-out *pp3:incr *rlv0:in-out *rlv3:incr
-!                *ww0:in-out *ww3:incr
-!   plus diff mem management of: xx:in rev0:in rev3:in pp0:in pp3:in
-!                rlv0:in rlv3:in ww0:in ww3:in
+!   gradient     of useful results: *xx *rev0 *d2wall0 *rev3 *d2wall3
+!                *pp0 *pp3 *rlv0 *rlv3 *ww0 *ww3
+!   with respect to varying inputs: *xx *rev0 *d2wall0 *rev3 *d2wall3
+!                *pp0 *pp3 *rlv0 *rlv3 *ww0 *ww3
+!   rw status of diff variables: *xx:incr *rev0:in-out *d2wall0:in-out
+!                *rev3:incr *d2wall3:incr *pp0:in-out *pp3:incr
+!                *rlv0:in-out *rlv3:incr *ww0:in-out *ww3:incr
+!   plus diff mem management of: xx:in rev0:in d2wall0:in rev3:in
+!                d2wall3:in pp0:in pp3:in rlv0:in rlv3:in ww0:in
+!                ww3:in
   subroutine bcsymmpolar2ndhalo_b(nn)
 ! bcsymmpolar applies the polar symmetry boundary conditions to a
 ! singular line of a block. it is assumed that the pointers in
@@ -645,9 +655,11 @@ subroutine bcsymmpolar2ndhalo_b(nn)
 ! case of a degenerate line, as this line is the axi-symmetric
 ! centerline. this routine does just the 2nd level halo.
     use constants
+    use inputphysics, only : useroughsa
     use bcpointers_b, only : ww0, ww0d, ww3, ww3d, pp0, pp0d, pp3, pp3d,&
-&   rlv0, rlv0d, rlv3, rlv3d, rev0, rev0d, rev3, rev3d, xx, xxd, istart,&
-&   jstart, isize, jsize
+&   rlv0, rlv0d, rlv3, rlv3d, rev0, rev0d, rev3, rev3d, d2wall0, &
+&   d2wall0d, d2wall3, d2wall3d, ks0, ks3, xx, xxd, istart, jstart, &
+&   isize, jsize
     use flowvarrefstate, only : viscous, eddymodel
     implicit none
 ! subroutine arguments.
@@ -712,6 +724,8 @@ subroutine bcsymmpolar2ndhalo_b(nn)
         rlv3d(i, j) = rlv3d(i, j) + rlv0d(i, j)
         rlv0d(i, j) = 0.0_8
       end if
+      d2wall3d(i, j) = d2wall3d(i, j) + d2wall0d(i, j)
+      d2wall0d(i, j) = 0.0_8
       pp3d(i, j) = pp3d(i, j) + pp0d(i, j)
       pp0d(i, j) = 0.0_8
       ww3d(i, j, irhoe) = ww3d(i, j, irhoe) + ww0d(i, j, irhoe)
@@ -767,8 +781,9 @@ subroutine bcsymmpolar2ndhalo(nn)
 ! case of a degenerate line, as this line is the axi-symmetric
 ! centerline. this routine does just the 2nd level halo.
     use constants
+    use inputphysics, only : useroughsa
     use bcpointers_b, only : ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3,&
-&   xx, istart, jstart, isize, jsize
+&   d2wall0, d2wall3, ks0, ks3, xx, istart, jstart, isize, jsize
     use flowvarrefstate, only : viscous, eddymodel
     implicit none
 ! subroutine arguments.
@@ -818,6 +833,8 @@ subroutine bcsymmpolar2ndhalo(nn)
 ! set the pressure and possibly the laminar and
 ! eddy viscosity in the halo.
       pp0(i, j) = pp3(i, j)
+      d2wall0(i, j) = d2wall3(i, j)
+      if (useroughsa) ks0(i, j) = ks3(i, j)
       if (viscous) rlv0(i, j) = rlv3(i, j)
       if (eddymodel) rev0(i, j) = rev3(i, j)
     end do
diff --git a/src/adjoint/outputReverse/turbBCRoutines_b.f90 b/src/adjoint/outputReverse/turbBCRoutines_b.f90
index 51c8d152b..c16bc2266 100644
--- a/src/adjoint/outputReverse/turbBCRoutines_b.f90
+++ b/src/adjoint/outputReverse/turbBCRoutines_b.f90
@@ -6,14 +6,16 @@ module turbbcroutines_b
 
 contains
 !  differentiation of applyallturbbcthisblock in reverse (adjoint) mode (with options noisize i4 dr8 r8):
-!   gradient     of useful results: *rev *w
+!   gradient     of useful results: *rev *bvtj1 *bvtj2 *w *bmtk1
+!                *bmtk2 *bvtk1 *bvtk2 *d2wall *bmti1 *bmti2 *bvti1
+!                *bvti2 *bmtj1 *bmtj2
 !   with respect to varying inputs: *rev *bvtj1 *bvtj2 *w *bmtk1
-!                *bmtk2 *bvtk1 *bvtk2 *bmti1 *bmti2 *bvti1 *bvti2
-!                *bmtj1 *bmtj2
-!   rw status of diff variables: *rev:in-out *bvtj1:out *bvtj2:out
-!                *w:in-out *bmtk1:out *bmtk2:out *bvtk1:out *bvtk2:out
-!                *bmti1:out *bmti2:out *bvti1:out *bvti2:out *bmtj1:out
-!                *bmtj2:out
+!                *bmtk2 *bvtk1 *bvtk2 *d2wall *bmti1 *bmti2 *bvti1
+!                *bvti2 *bmtj1 *bmtj2
+!   rw status of diff variables: *rev:in-out *bvtj1:incr *bvtj2:incr
+!                *w:in-out *bmtk1:incr *bmtk2:incr *bvtk1:incr
+!                *bvtk2:incr *d2wall:incr *bmti1:incr *bmti2:incr
+!                *bvti1:incr *bvti2:incr *bmtj1:incr *bmtj2:incr
 !   plus diff mem management of: rev:in bvtj1:in bvtj2:in w:in
 !                bmtk1:in bmtk2:in bvtk1:in bvtk2:in d2wall:in
 !                bmti1:in bmti2:in bvti1:in bvti2:in bmtj1:in bmtj2:in
@@ -219,6 +221,13 @@ subroutine applyallturbbcthisblock_b(secondhalo)
 &           nswallisothermal) then
 ! viscous wall boundary condition. eddy viscosity is
 ! zero at the wall.
+          if (associated(rev)) then
+            call pushreal8array(rev, size(rev, 1)*size(rev, 2)*size(rev&
+&                         , 3))
+            call pushcontrol1b(1)
+          else
+            call pushcontrol1b(0)
+          end if
           call bceddywall(nn)
           call pushcontrol2b(0)
         else
@@ -246,18 +255,6 @@ subroutine applyallturbbcthisblock_b(secondhalo)
         call pushcontrol1b(0)
       end if
     end do bocos
-    if (associated(bvtj1d)) bvtj1d = 0.0_8
-    if (associated(bvtj2d)) bvtj2d = 0.0_8
-    if (associated(bmtk1d)) bmtk1d = 0.0_8
-    if (associated(bmtk2d)) bmtk2d = 0.0_8
-    if (associated(bvtk1d)) bvtk1d = 0.0_8
-    if (associated(bvtk2d)) bvtk2d = 0.0_8
-    if (associated(bmti1d)) bmti1d = 0.0_8
-    if (associated(bmti2d)) bmti2d = 0.0_8
-    if (associated(bvti1d)) bvti1d = 0.0_8
-    if (associated(bvti2d)) bvti2d = 0.0_8
-    if (associated(bmtj1d)) bmtj1d = 0.0_8
-    if (associated(bmtj2d)) bmtj2d = 0.0_8
     do nn=nbocos,1,-1
       call popcontrol1b(branch)
       if (branch .ne. 0) then
@@ -268,6 +265,9 @@ subroutine applyallturbbcthisblock_b(secondhalo)
       end if
       call popcontrol2b(branch)
       if (branch .eq. 0) then
+        call popcontrol1b(branch)
+        if (branch .eq. 1) call popreal8array(rev, size(rev, 1)*size(rev&
+&                                       , 2)*size(rev, 3))
         call bceddywall_b(nn)
       else if (branch .eq. 1) then
         call bceddynowall_b(nn)
@@ -678,8 +678,8 @@ subroutine bceddynowall(nn)
   end subroutine bceddynowall
 
 !  differentiation of bceddywall in reverse (adjoint) mode (with options noisize i4 dr8 r8):
-!   gradient     of useful results: *rev
-!   with respect to varying inputs: *rev
+!   gradient     of useful results: *rev *d2wall
+!   with respect to varying inputs: *rev *d2wall
 !   plus diff mem management of: rev:in d2wall:in
   subroutine bceddywall_b(nn)
 !
@@ -700,6 +700,7 @@ subroutine bceddywall_b(nn)
 !
     integer(kind=inttype) :: i, j
     real(kind=realtype) :: fact
+    real(kind=realtype) :: factd
     real(kind=realtype) :: tmp
     real(kind=realtype) :: tmpd
     real(kind=realtype) :: tmp0
@@ -715,13 +716,18 @@ subroutine bceddywall_b(nn)
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(fact)
           call saroughfact(2, i, j, fact)
+          call pushreal8(rev(1, i, j))
+          rev(1, i, j) = fact*rev(2, i, j)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+          call popreal8(rev(1, i, j))
+          factd = rev(2, i, j)*revd(1, i, j)
           revd(2, i, j) = revd(2, i, j) + fact*revd(1, i, j)
           revd(1, i, j) = 0.0_8
           call popreal8(fact)
+          call saroughfact_b(2, i, j, fact, factd)
         end do
       end do
     case (imax) 
@@ -729,14 +735,20 @@ subroutine bceddywall_b(nn)
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(fact)
           call saroughfact(il, i, j, fact)
+          tmp = fact*rev(il, i, j)
+          call pushreal8(rev(ie, i, j))
+          rev(ie, i, j) = tmp
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+          call popreal8(rev(ie, i, j))
           tmpd = revd(ie, i, j)
           revd(ie, i, j) = 0.0_8
+          factd = rev(il, i, j)*tmpd
           revd(il, i, j) = revd(il, i, j) + fact*tmpd
           call popreal8(fact)
+          call saroughfact_b(il, i, j, fact, factd)
         end do
       end do
     case (jmin) 
@@ -744,13 +756,18 @@ subroutine bceddywall_b(nn)
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(fact)
           call saroughfact(i, 2, j, fact)
+          call pushreal8(rev(i, 1, j))
+          rev(i, 1, j) = fact*rev(i, 2, j)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+          call popreal8(rev(i, 1, j))
+          factd = rev(i, 2, j)*revd(i, 1, j)
           revd(i, 2, j) = revd(i, 2, j) + fact*revd(i, 1, j)
           revd(i, 1, j) = 0.0_8
           call popreal8(fact)
+          call saroughfact_b(i, 2, j, fact, factd)
         end do
       end do
     case (jmax) 
@@ -758,14 +775,20 @@ subroutine bceddywall_b(nn)
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(fact)
           call saroughfact(i, jl, j, fact)
+          tmp0 = fact*rev(i, jl, j)
+          call pushreal8(rev(i, je, j))
+          rev(i, je, j) = tmp0
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+          call popreal8(rev(i, je, j))
           tmpd0 = revd(i, je, j)
           revd(i, je, j) = 0.0_8
+          factd = rev(i, jl, j)*tmpd0
           revd(i, jl, j) = revd(i, jl, j) + fact*tmpd0
           call popreal8(fact)
+          call saroughfact_b(i, jl, j, fact, factd)
         end do
       end do
     case (kmin) 
@@ -773,13 +796,18 @@ subroutine bceddywall_b(nn)
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(fact)
           call saroughfact(i, j, 2, fact)
+          call pushreal8(rev(i, j, 1))
+          rev(i, j, 1) = fact*rev(i, j, 2)
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+          call popreal8(rev(i, j, 1))
+          factd = rev(i, j, 2)*revd(i, j, 1)
           revd(i, j, 2) = revd(i, j, 2) + fact*revd(i, j, 1)
           revd(i, j, 1) = 0.0_8
           call popreal8(fact)
+          call saroughfact_b(i, j, 2, fact, factd)
         end do
       end do
     case (kmax) 
@@ -787,14 +815,20 @@ subroutine bceddywall_b(nn)
         do i=bcdata(nn)%icbeg,bcdata(nn)%icend
           call pushreal8(fact)
           call saroughfact(i, j, kl, fact)
+          tmp1 = fact*rev(i, j, kl)
+          call pushreal8(rev(i, j, ke))
+          rev(i, j, ke) = tmp1
         end do
       end do
       do j=bcdata(nn)%jcend,bcdata(nn)%jcbeg,-1
         do i=bcdata(nn)%icend,bcdata(nn)%icbeg,-1
+          call popreal8(rev(i, j, ke))
           tmpd1 = revd(i, j, ke)
           revd(i, j, ke) = 0.0_8
+          factd = rev(i, j, kl)*tmpd1
           revd(i, j, kl) = revd(i, j, kl) + fact*tmpd1
           call popreal8(fact)
+          call saroughfact_b(i, j, kl, fact, factd)
         end do
       end do
     end select
@@ -2994,15 +3028,10 @@ subroutine saroughfact_b(i, j, k, fact, factd)
     real(kind=realtype) :: factd
     real(kind=realtype) :: temp
     if (useroughsa) then
-! we need the distance to the wall, but this is not available for halo-cells, thus we simply return 
-! the regular sa-boundary condition
-      if (.not.(((((i .lt. 2 .or. i .gt. il) .or. j .lt. 2) .or. j .gt. &
-&         jl) .or. k .lt. 2) .or. k .gt. kl)) then
-        temp = ks(i, j, k) + d2wall(i, j, k)/0.03_realtype
-        d2walld(i, j, k) = d2walld(i, j, k) - (1.0/(0.03_realtype*temp)+&
-&         (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(0.03_realtype*&
-&         temp**2))*factd
-      end if
+      temp = ks(i, j, k) + d2wall(i, j, k)/0.03_realtype
+      d2walld(i, j, k) = d2walld(i, j, k) - (1.0/(0.03_realtype*temp)+(&
+&       ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(0.03_realtype*temp**&
+&       2))*factd
     end if
   end subroutine saroughfact_b
 
@@ -3019,15 +3048,16 @@ subroutine saroughfact(i, j, k, fact)
     if (.not.useroughsa) then
       fact = -one
       return
-    else if (((((i .lt. 2 .or. i .gt. il) .or. j .lt. 2) .or. j .gt. jl)&
-&       .or. k .lt. 2) .or. k .gt. kl) then
-! we need the distance to the wall, but this is not available for halo-cells, thus we simply return 
-! the regular sa-boundary condition
-      fact = -one
-      return
     else
       fact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j, k)+&
 &       d2wall(i, j, k)/0.03_realtype)
+      if (ks(i, j, k) .eq. 0.01 .or. d2wall(i, j, k) .eq. 0.01) print*, &
+&                                                               i, j, k&
+&                                                               , fact, &
+&                                                               d2wall(i&
+&                                                               , j, k)&
+&                                                               , ks(i, &
+&                                                               j, k)
     end if
   end subroutine saroughfact
 
diff --git a/src/adjoint/outputReverse/utils_b.f90 b/src/adjoint/outputReverse/utils_b.f90
index 3ad24e4bb..9d3f1d1f8 100644
--- a/src/adjoint/outputReverse/utils_b.f90
+++ b/src/adjoint/outputReverse/utils_b.f90
@@ -725,12 +725,13 @@ subroutine setbcpointers(nn, spatialpointers)
     use blockpointers, only : w, p, rlv, rev, gamma, x, d2wall, si, sj&
 &   , sk, s, globalcell, bcdata, nx, il, ie, ib, ny, jl, je, jb, nz, kl,&
 &   ke, kb, bcfaceid, addgridvelocities, sfacei, sfacej, sfacek, &
-&   addgridvelocities
+&   addgridvelocities, d2wall, ks
     use bcpointers_b, only : ww0, ww1, ww2, ww3, pp0, pp1, pp2, pp3, &
 &   rlv0, rlv1, rlv2, rlv3, rev0, rev1, rev2, rev3, gamma0, gamma1, &
 &   gamma2, gamma3, gcp, xx, ss, ssi, ssj, ssk, dd2wall, sface, istart, &
-&   iend, jstart, jend, isize, jsize
-    use inputphysics, only : cpmodel, equations
+&   iend, jstart, jend, isize, jsize, d2wall0, d2wall1, d2wall2, d2wall3&
+&   , ks0, ks1, ks2, ks3
+    use inputphysics, only : cpmodel, equations, useroughsa
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -765,6 +766,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(2, 1:, 1:)
       rev1 => rev(1, 1:, 1:)
       rev0 => rev(0, 1:, 1:)
+      d2wall3 => d2wall(3, 1:, 1:)
+      d2wall2 => d2wall(2, 1:, 1:)
+      d2wall1 => d2wall(1, 1:, 1:)
+      d2wall0 => d2wall(0, 1:, 1:)
+      if (useroughsa) then
+        ks3 => ks(3, 1:, 1:)
+        ks2 => ks(2, 1:, 1:)
+        ks1 => ks(1, 1:, 1:)
+        ks0 => ks(0, 1:, 1:)
+      end if
       gamma3 => gamma(3, 1:, 1:)
       gamma2 => gamma(2, 1:, 1:)
       gamma1 => gamma(1, 1:, 1:)
@@ -788,6 +799,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(il, 1:, 1:)
       rev1 => rev(ie, 1:, 1:)
       rev0 => rev(ib, 1:, 1:)
+      d2wall3 => d2wall(nx, 1:, 1:)
+      d2wall2 => d2wall(il, 1:, 1:)
+      d2wall1 => d2wall(ie, 1:, 1:)
+      d2wall0 => d2wall(ib, 1:, 1:)
+      if (useroughsa) then
+        ks3 => ks(nx, 1:, 1:)
+        ks2 => ks(il, 1:, 1:)
+        ks1 => ks(ie, 1:, 1:)
+        ks0 => ks(ib, 1:, 1:)
+      end if
       gamma3 => gamma(nx, 1:, 1:)
       gamma2 => gamma(il, 1:, 1:)
       gamma1 => gamma(ie, 1:, 1:)
@@ -811,6 +832,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, 2, 1:)
       rev1 => rev(1:, 1, 1:)
       rev0 => rev(1:, 0, 1:)
+      d2wall3 => d2wall(1:, 3, 1:)
+      d2wall2 => d2wall(1:, 2, 1:)
+      d2wall1 => d2wall(1:, 1, 1:)
+      d2wall0 => d2wall(1:, 0, 1:)
+      if (useroughsa) then
+        ks3 => ks(1:, 3, 1:)
+        ks2 => ks(1:, 2, 1:)
+        ks1 => ks(1:, 1, 1:)
+        ks0 => ks(1:, 0, 1:)
+      end if
       gamma3 => gamma(1:, 3, 1:)
       gamma2 => gamma(1:, 2, 1:)
       gamma1 => gamma(1:, 1, 1:)
@@ -834,6 +865,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, jl, 1:)
       rev1 => rev(1:, je, 1:)
       rev0 => rev(1:, jb, 1:)
+      d2wall3 => d2wall(1:, ny, 1:)
+      d2wall2 => d2wall(1:, jl, 1:)
+      d2wall1 => d2wall(1:, je, 1:)
+      d2wall0 => d2wall(1:, jb, 1:)
+      if (useroughsa) then
+        ks3 => ks(1:, ny, 1:)
+        ks2 => ks(1:, jl, 1:)
+        ks1 => ks(1:, je, 1:)
+        ks0 => ks(1:, jb, 1:)
+      end if
       gamma3 => gamma(1:, ny, 1:)
       gamma2 => gamma(1:, jl, 1:)
       gamma1 => gamma(1:, je, 1:)
@@ -857,6 +898,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, 1:, 2)
       rev1 => rev(1:, 1:, 1)
       rev0 => rev(1:, 1:, 0)
+      d2wall3 => d2wall(1:, 1:, 3)
+      d2wall2 => d2wall(1:, 1:, 2)
+      d2wall1 => d2wall(1:, 1:, 1)
+      d2wall0 => d2wall(1:, 1:, 0)
+      if (useroughsa) then
+        ks3 => ks(1:, 1:, 3)
+        ks2 => ks(1:, 1:, 2)
+        ks1 => ks(1:, 1:, 1)
+        ks0 => ks(1:, 1:, 0)
+      end if
       gamma3 => gamma(1:, 1:, 3)
       gamma2 => gamma(1:, 1:, 2)
       gamma1 => gamma(1:, 1:, 1)
@@ -880,6 +931,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, 1:, kl)
       rev1 => rev(1:, 1:, ke)
       rev0 => rev(1:, 1:, kb)
+      d2wall3 => d2wall(1:, 1:, nz)
+      d2wall2 => d2wall(1:, 1:, kl)
+      d2wall1 => d2wall(1:, 1:, ke)
+      d2wall0 => d2wall(1:, 1:, kb)
+      if (useroughsa) then
+        ks3 => ks(1:, 1:, nz)
+        ks2 => ks(1:, 1:, kl)
+        ks1 => ks(1:, 1:, ke)
+        ks0 => ks(1:, 1:, kb)
+      end if
       gamma3 => gamma(1:, 1:, nz)
       gamma2 => gamma(1:, 1:, kl)
       gamma1 => gamma(1:, 1:, ke)
diff --git a/src/adjoint/outputReverseFast/BCRoutines_fast_b.f90 b/src/adjoint/outputReverseFast/BCRoutines_fast_b.f90
index 37f0e540f..cd24ca801 100644
--- a/src/adjoint/outputReverseFast/BCRoutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/BCRoutines_fast_b.f90
@@ -170,8 +170,10 @@ subroutine bcsymm1sthalo(nn)
     use constants
     use blockpointers, only : bcdata
     use flowvarrefstate, only : viscous, eddymodel
+    use inputphysics, only : useroughsa
     use bcpointers, only : gamma1, gamma2, ww1, ww2, pp1, pp2, rlv1, &
-&   rlv2, istart, jstart, isize, jsize, rev1, rev2
+&   rlv2, istart, jstart, isize, jsize, rev1, rev2, d2wall1, d2wall2, &
+&   ks1, ks2
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -201,6 +203,8 @@ subroutine bcsymm1sthalo(nn)
 ! laminar and eddy viscosity in the halo.
       gamma1(i, j) = gamma2(i, j)
       pp1(i, j) = pp2(i, j)
+      d2wall1(i, j) = d2wall2(i, j)
+      if (useroughsa) ks1(i, j) = ks2(i, j)
       if (viscous) rlv1(i, j) = rlv2(i, j)
       if (eddymodel) rev1(i, j) = rev2(i, j)
     end do
@@ -213,8 +217,10 @@ subroutine bcsymm2ndhalo(nn)
     use constants
     use blockpointers, only : bcdata
     use flowvarrefstate, only : viscous, eddymodel
+    use inputphysics, only : useroughsa
     use bcpointers, only : gamma0, gamma3, ww0, ww3, pp0, pp3, rlv0, &
-&   rlv3, rev0, rev3, istart, jstart, isize, jsize
+&   rlv3, d2wall0, d2wall3, ks0, ks3, rev0, rev3, istart, jstart, isize,&
+&   jsize
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -241,6 +247,8 @@ subroutine bcsymm2ndhalo(nn)
 ! laminar and eddy viscosity in the halo.
       gamma0(i, j) = gamma3(i, j)
       pp0(i, j) = pp3(i, j)
+      d2wall0(i, j) = d2wall3(i, j)
+      if (useroughsa) ks0(i, j) = ks3(i, j)
       if (viscous) rlv0(i, j) = rlv3(i, j)
       if (eddymodel) rev0(i, j) = rev3(i, j)
     end do
@@ -254,8 +262,9 @@ subroutine bcsymmpolar1sthalo(nn)
 ! case of a degenerate line, as this line is the axi-symmetric
 ! centerline. this routine does just the 1st level halo.
     use constants
+    use inputphysics, only : useroughsa
     use bcpointers, only : ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2, &
-&   xx, istart, jstart, isize, jsize
+&   xx, istart, jstart, isize, jsize, d2wall1, d2wall2, ks1, ks2
     use flowvarrefstate, only : viscous, eddymodel
     implicit none
 ! subroutine arguments.
@@ -307,6 +316,8 @@ subroutine bcsymmpolar1sthalo(nn)
 ! set the pressure and possibly the laminar and
 ! eddy viscosity in the halo.
       pp1(i, j) = pp2(i, j)
+      d2wall1(i, j) = d2wall2(i, j)
+      if (useroughsa) ks1(i, j) = ks2(i, j)
       if (viscous) rlv1(i, j) = rlv2(i, j)
       if (eddymodel) rev1(i, j) = rev2(i, j)
     end do
@@ -320,8 +331,9 @@ subroutine bcsymmpolar2ndhalo(nn)
 ! case of a degenerate line, as this line is the axi-symmetric
 ! centerline. this routine does just the 2nd level halo.
     use constants
+    use inputphysics, only : useroughsa
     use bcpointers, only : ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3, &
-&   xx, istart, jstart, isize, jsize
+&   d2wall0, d2wall3, ks0, ks3, xx, istart, jstart, isize, jsize
     use flowvarrefstate, only : viscous, eddymodel
     implicit none
 ! subroutine arguments.
@@ -371,6 +383,8 @@ subroutine bcsymmpolar2ndhalo(nn)
 ! set the pressure and possibly the laminar and
 ! eddy viscosity in the halo.
       pp0(i, j) = pp3(i, j)
+      d2wall0(i, j) = d2wall3(i, j)
+      if (useroughsa) ks0(i, j) = ks3(i, j)
       if (viscous) rlv0(i, j) = rlv3(i, j)
       if (eddymodel) rev0(i, j) = rev3(i, j)
     end do
diff --git a/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90 b/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90
index ec5103154..16d4465f5 100644
--- a/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90
+++ b/src/adjoint/outputReverseFast/turbBCRoutines_fast_b.f90
@@ -1196,15 +1196,16 @@ subroutine saroughfact(i, j, k, fact)
     if (.not.useroughsa) then
       fact = -one
       return
-    else if (((((i .lt. 2 .or. i .gt. il) .or. j .lt. 2) .or. j .gt. jl)&
-&       .or. k .lt. 2) .or. k .gt. kl) then
-! we need the distance to the wall, but this is not available for halo-cells, thus we simply return 
-! the regular sa-boundary condition
-      fact = -one
-      return
     else
       fact = (ks(i, j, k)-d2wall(i, j, k)/0.03_realtype)/(ks(i, j, k)+&
 &       d2wall(i, j, k)/0.03_realtype)
+      if (ks(i, j, k) .eq. 0.01 .or. d2wall(i, j, k) .eq. 0.01) print*, &
+&                                                               i, j, k&
+&                                                               , fact, &
+&                                                               d2wall(i&
+&                                                               , j, k)&
+&                                                               , ks(i, &
+&                                                               j, k)
     end if
   end subroutine saroughfact
 
diff --git a/src/adjoint/outputReverseFast/utils_fast_b.f90 b/src/adjoint/outputReverseFast/utils_fast_b.f90
index 09ee3bfa9..744c9bb53 100644
--- a/src/adjoint/outputReverseFast/utils_fast_b.f90
+++ b/src/adjoint/outputReverseFast/utils_fast_b.f90
@@ -671,12 +671,13 @@ subroutine setbcpointers(nn, spatialpointers)
     use blockpointers, only : w, p, rlv, rev, gamma, x, d2wall, &
 &   si, sj, sk, s, globalcell, bcdata, nx, il, ie, ib, ny, jl, je, jb, &
 &   nz, kl, ke, kb, bcfaceid, addgridvelocities, sfacei, sfacej, sfacek,&
-&   addgridvelocities
+&   addgridvelocities, d2wall, ks
     use bcpointers, only : ww0, ww1, ww2, ww3, pp0, pp1, pp2, pp3, rlv0,&
 &   rlv1, rlv2, rlv3, rev0, rev1, rev2, rev3, gamma0, gamma1, gamma2, &
 &   gamma3, gcp, xx, ss, ssi, ssj, ssk, dd2wall, sface, istart, iend, &
-&   jstart, jend, isize, jsize
-    use inputphysics, only : cpmodel, equations
+&   jstart, jend, isize, jsize, d2wall0, d2wall1, d2wall2, d2wall3, ks0,&
+&   ks1, ks2, ks3
+    use inputphysics, only : cpmodel, equations, useroughsa
     implicit none
 ! subroutine arguments.
     integer(kind=inttype), intent(in) :: nn
@@ -711,6 +712,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(2, 1:, 1:)
       rev1 => rev(1, 1:, 1:)
       rev0 => rev(0, 1:, 1:)
+      d2wall3 => d2wall(3, 1:, 1:)
+      d2wall2 => d2wall(2, 1:, 1:)
+      d2wall1 => d2wall(1, 1:, 1:)
+      d2wall0 => d2wall(0, 1:, 1:)
+      if (useroughsa) then
+        ks3 => ks(3, 1:, 1:)
+        ks2 => ks(2, 1:, 1:)
+        ks1 => ks(1, 1:, 1:)
+        ks0 => ks(0, 1:, 1:)
+      end if
       gamma3 => gamma(3, 1:, 1:)
       gamma2 => gamma(2, 1:, 1:)
       gamma1 => gamma(1, 1:, 1:)
@@ -734,6 +745,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(il, 1:, 1:)
       rev1 => rev(ie, 1:, 1:)
       rev0 => rev(ib, 1:, 1:)
+      d2wall3 => d2wall(nx, 1:, 1:)
+      d2wall2 => d2wall(il, 1:, 1:)
+      d2wall1 => d2wall(ie, 1:, 1:)
+      d2wall0 => d2wall(ib, 1:, 1:)
+      if (useroughsa) then
+        ks3 => ks(nx, 1:, 1:)
+        ks2 => ks(il, 1:, 1:)
+        ks1 => ks(ie, 1:, 1:)
+        ks0 => ks(ib, 1:, 1:)
+      end if
       gamma3 => gamma(nx, 1:, 1:)
       gamma2 => gamma(il, 1:, 1:)
       gamma1 => gamma(ie, 1:, 1:)
@@ -757,6 +778,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, 2, 1:)
       rev1 => rev(1:, 1, 1:)
       rev0 => rev(1:, 0, 1:)
+      d2wall3 => d2wall(1:, 3, 1:)
+      d2wall2 => d2wall(1:, 2, 1:)
+      d2wall1 => d2wall(1:, 1, 1:)
+      d2wall0 => d2wall(1:, 0, 1:)
+      if (useroughsa) then
+        ks3 => ks(1:, 3, 1:)
+        ks2 => ks(1:, 2, 1:)
+        ks1 => ks(1:, 1, 1:)
+        ks0 => ks(1:, 0, 1:)
+      end if
       gamma3 => gamma(1:, 3, 1:)
       gamma2 => gamma(1:, 2, 1:)
       gamma1 => gamma(1:, 1, 1:)
@@ -780,6 +811,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, jl, 1:)
       rev1 => rev(1:, je, 1:)
       rev0 => rev(1:, jb, 1:)
+      d2wall3 => d2wall(1:, ny, 1:)
+      d2wall2 => d2wall(1:, jl, 1:)
+      d2wall1 => d2wall(1:, je, 1:)
+      d2wall0 => d2wall(1:, jb, 1:)
+      if (useroughsa) then
+        ks3 => ks(1:, ny, 1:)
+        ks2 => ks(1:, jl, 1:)
+        ks1 => ks(1:, je, 1:)
+        ks0 => ks(1:, jb, 1:)
+      end if
       gamma3 => gamma(1:, ny, 1:)
       gamma2 => gamma(1:, jl, 1:)
       gamma1 => gamma(1:, je, 1:)
@@ -803,6 +844,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, 1:, 2)
       rev1 => rev(1:, 1:, 1)
       rev0 => rev(1:, 1:, 0)
+      d2wall3 => d2wall(1:, 1:, 3)
+      d2wall2 => d2wall(1:, 1:, 2)
+      d2wall1 => d2wall(1:, 1:, 1)
+      d2wall0 => d2wall(1:, 1:, 0)
+      if (useroughsa) then
+        ks3 => ks(1:, 1:, 3)
+        ks2 => ks(1:, 1:, 2)
+        ks1 => ks(1:, 1:, 1)
+        ks0 => ks(1:, 1:, 0)
+      end if
       gamma3 => gamma(1:, 1:, 3)
       gamma2 => gamma(1:, 1:, 2)
       gamma1 => gamma(1:, 1:, 1)
@@ -826,6 +877,16 @@ subroutine setbcpointers(nn, spatialpointers)
       rev2 => rev(1:, 1:, kl)
       rev1 => rev(1:, 1:, ke)
       rev0 => rev(1:, 1:, kb)
+      d2wall3 => d2wall(1:, 1:, nz)
+      d2wall2 => d2wall(1:, 1:, kl)
+      d2wall1 => d2wall(1:, 1:, ke)
+      d2wall0 => d2wall(1:, 1:, kb)
+      if (useroughsa) then
+        ks3 => ks(1:, 1:, nz)
+        ks2 => ks(1:, 1:, kl)
+        ks1 => ks(1:, 1:, ke)
+        ks0 => ks(1:, 1:, kb)
+      end if
       gamma3 => gamma(1:, 1:, nz)
       gamma2 => gamma(1:, 1:, kl)
       gamma1 => gamma(1:, 1:, ke)
diff --git a/src/modules/BCPointers.F90 b/src/modules/BCPointers.F90
index 39b8c6245..f52eda2e8 100644
--- a/src/modules/BCPointers.F90
+++ b/src/modules/BCPointers.F90
@@ -10,6 +10,8 @@ module BCPointers
     real(kind=realType), dimension(:, :, :), pointer :: ww0, ww1, ww2, ww3
     real(kind=realType), dimension(:, :), pointer :: pp0, pp1, pp2, pp3
     real(kind=realType), dimension(:, :), pointer :: rlv0, rlv1, rlv2, rlv3
+    real(kind=realType), dimension(:, :), pointer :: d2wall0, d2wall1, d2wall2, d2wall3
+    real(kind=realType), dimension(:, :), pointer :: ks0, ks1, ks2, ks3
     real(kind=realType), dimension(:, :), pointer :: rev0, rev1, rev2, rev3
     real(kind=realType), dimension(:, :), pointer :: gamma0, gamma1, gamma2, gamma3
     real(kind=realType), dimension(:, :, :), pointer :: ssi, ssj, ssk
@@ -25,6 +27,7 @@ module BCPointers
     real(kind=realType), dimension(:, :), pointer :: pp0d, pp1d, pp2d, pp3d
     real(kind=realType), dimension(:, :), pointer :: rlv0d, rlv1d, rlv2d, rlv3d
     real(kind=realType), dimension(:, :), pointer :: rev0d, rev1d, rev2d, rev3d
+    real(kind=realType), dimension(:, :), pointer :: d2wall0d, d2wall1d, d2wall2d, d2wall3d
     real(kind=realType), dimension(:, :), pointer :: gamma0d, gamma1d, gamma2d, gamma3d
     real(kind=realType), dimension(:, :, :), pointer :: ssid, ssjd, sskd, xxd
     real(kind=realType), dimension(:, :, :), pointer :: ssd
diff --git a/src/modules/block.F90 b/src/modules/block.F90
index a5f5884a4..ab3b66078 100644
--- a/src/modules/block.F90
+++ b/src/modules/block.F90
@@ -658,13 +658,14 @@ module block
         !
         !         Turbulence model variables.
         !
-        ! d2Wall(2:il,2:jl,2:kl) - Distance from the center of the cell
-        !                          to the nearest viscous wall.
+        ! d2Wall(0:ib, 0:jb, 0:kb) - Distance from the center of the cell
+        !                          to the nearest viscous wall. It is exchanged for non-BC halo cells and extrapolated 
+        !                          for symmetry type BCs. For other BCs, this value is initialize to a large number
         ! intermittency( )       - Function defining the transition location
         !
         ! The next two variables are only initialized if roughness is requested (useRoughSA = True)
         ! nearestWallCellInd(2:il,2:jl,2:kl)   - global cell ID for the nearest wall cell; is needed for rougness
-        ! ks(2:il,2:jl,2:kl)     - Roughness value of the nearest wall
+        ! ks(0:ib, 0:jb, 0:kb)     - Roughness value of the nearest wall - see d2wall description for the values set
 
         real(kind=realType), dimension(:, :, :), pointer :: d2Wall, filterDES
         real(kind=realType), dimension(:, :, :), pointer :: intermittency
diff --git a/src/preprocessing/preprocessingAPI.F90 b/src/preprocessing/preprocessingAPI.F90
index 5285b7f4e..c6f4098a6 100644
--- a/src/preprocessing/preprocessingAPI.F90
+++ b/src/preprocessing/preprocessingAPI.F90
@@ -19,6 +19,7 @@ subroutine preprocessing
                                  recvBufferSize_1to1, sendBufferSize_1to1, sendBufferSIzeOver, &
                                  recvBufferSizeOver, commPatternOverset, internalOverset, sendBuffer, &
                                  recvBuffer, sendBufferSize, recvBufferSize
+        use haloExchange, only: exchanged2Wall
         use inputPhysics
         use inputTimeSpectral
         use section
@@ -239,6 +240,9 @@ subroutine preprocessing
         ! be done and overset connectivity computed
         do level = 1, nLevels
             call computeWallDistance(level, .True.)
+            if (wallDistanceNeeded) then
+                call exchanged2Wall(level)
+            end if
         end do
         call preprocessingADjoint
 
diff --git a/src/solver/BCRoutines.F90 b/src/solver/BCRoutines.F90
index 9ce054802..4c48da790 100644
--- a/src/solver/BCRoutines.F90
+++ b/src/solver/BCRoutines.F90
@@ -234,8 +234,9 @@ subroutine bcSymm1stHalo(nn)
         use constants
         use blockPointers, only: BCdata
         use flowVarRefState, only: viscous, eddyModel
+        use inputPhysics, only: useRoughSA
         use BCPointers, only: gamma1, gamma2, ww1, ww2, pp1, pp2, rlv1, rlv2, &
-                              iStart, jStart, iSize, jSize, rev1, rev2
+                              iStart, jStart, iSize, jSize, rev1, rev2, d2wall1, d2wall2, ks1, ks2
         implicit none
 
         ! Subroutine arguments.
@@ -274,6 +275,8 @@ subroutine bcSymm1stHalo(nn)
 
             gamma1(i, j) = gamma2(i, j)
             pp1(i, j) = pp2(i, j)
+            d2wall1(i, j) = d2wall2(i, j)
+            if (useRoughSA) ks1(i, j) = ks2(i, j)
             if (viscous) rlv1(i, j) = rlv2(i, j)
             if (eddyModel) rev1(i, j) = rev2(i, j)
         end do
@@ -287,7 +290,8 @@ subroutine bcSymm2ndHalo(nn)
         use constants
         use blockPointers, only: BCdata
         use flowVarRefState, only: viscous, eddyModel
-        use BCPointers, only: gamma0, gamma3, ww0, ww3, pp0, pp3, rlv0, rlv3, &
+        use inputPhysics, only: useRoughSA
+        use BCPointers, only: gamma0, gamma3, ww0, ww3, pp0, pp3, rlv0, rlv3, d2wall0, d2wall3, ks0, ks3, &
                               rev0, rev3, iStart, jStart, iSize, jSize
         implicit none
 
@@ -323,6 +327,8 @@ subroutine bcSymm2ndHalo(nn)
 
             gamma0(i, j) = gamma3(i, j)
             pp0(i, j) = pp3(i, j)
+            d2wall0(i, j) = d2wall3(i, j)
+            if (useRoughSA) ks0(i, j) = ks3(i, j)
             if (viscous) rlv0(i, j) = rlv3(i, j)
             if (eddyModel) rev0(i, j) = rev3(i, j)
         end do
@@ -339,8 +345,10 @@ subroutine bcSymmPolar1stHalo(nn)
         ! centerline. This routine does just the 1st level halo.
 
         use constants
+        use inputPhysics, only: useRoughSA
         use BCPointers, only: ww1, ww2, pp1, pp2, rlv1, rlv2, rev1, rev2, &
-                              xx, iStart, jStart, iSize, jSize
+                              xx, iStart, jStart, iSize, jSize, &
+                              d2wall1, d2wall2, ks1, ks2
         use flowVarRefState, only: viscous, eddyModel
         implicit none
 
@@ -403,6 +411,8 @@ subroutine bcSymmPolar1stHalo(nn)
             ! eddy viscosity in the halo.
 
             pp1(i, j) = pp2(i, j)
+            d2wall1(i, j) = d2wall2(i, j)
+            if (useRoughSA) ks1(i, j) = ks2(i, j)
             if (viscous) rlv1(i, j) = rlv2(i, j)
             if (eddyModel) rev1(i, j) = rev2(i, j)
         end do
@@ -418,7 +428,8 @@ subroutine bcSymmPolar2ndHalo(nn)
         ! centerline. This routine does just the 2nd level halo.
 
         use constants
-        use BCPointers, only: ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3, &
+        use inputPhysics, only: useRoughSA
+        use BCPointers, only: ww0, ww3, pp0, pp3, rlv0, rlv3, rev0, rev3, d2wall0, d2wall3, ks0, ks3, &
                               xx, iStart, jStart, iSize, jSize
         use flowVarRefState, only: viscous, eddyModel
         implicit none
@@ -480,6 +491,8 @@ subroutine bcSymmPolar2ndHalo(nn)
             ! eddy viscosity in the halo.
 
             pp0(i, j) = pp3(i, j)
+            d2wall0(i, j) = d2wall3(i, j)
+            if (useRoughSA) ks0(i, j) = ks3(i, j)
             if (viscous) rlv0(i, j) = rlv3(i, j)
             if (eddyModel) rev0(i, j) = rev3(i, j)
         end do
diff --git a/src/turbulence/turbBCRoutines.F90 b/src/turbulence/turbBCRoutines.F90
index 0f33c488a..4824f4134 100644
--- a/src/turbulence/turbBCRoutines.F90
+++ b/src/turbulence/turbBCRoutines.F90
@@ -1417,16 +1417,6 @@ subroutine saRoughFact(i, j, k, fact)
             return
         end if
 
-        ! We need the distance to the wall, but this is not available for halo-cells, thus we simply return 
-        ! the regular SA-boundary condition
-        if (i .lt. 2 .or. i .gt. il .or. &
-            j .lt. 2 .or. j .gt. jl .or. &
-            k .lt. 2 .or. k .gt. kl) then
-            fact = -one
-            return
-        end if
-
-
         fact = (ks(i, j, k) - d2wall(i, j, k) / 0.03_realType) / &
                (ks(i, j, k) + d2wall(i, j, k) / 0.03_realType)
 
diff --git a/src/utils/haloExchange.F90 b/src/utils/haloExchange.F90
index 11d9c2665..61a5890ec 100644
--- a/src/utils/haloExchange.F90
+++ b/src/utils/haloExchange.F90
@@ -2639,6 +2639,367 @@ subroutine exchangeCoor(level)
 
     end subroutine exchangeCoor
 
+    subroutine exchanged2Wall(level)
+        !
+        !       ExchangeCoor exchanges the d2wall of the given grid
+        !       level.
+        !
+        use block
+        use communication
+        use inputTimeSpectral
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: i, j, ii, jj, mm
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+
+        ! Loop over the number of spectral solutions.
+
+        spectralLoop: do mm = 1, nTimeIntervalsSpectral
+
+            ! Send the coordinates i have to send. The data is first copied
+            ! into the send buffer and this buffer is sent.
+
+            ii = 1
+            sends: do i = 1, commPatternCell_2nd(level)%nProcSend
+
+                ! Store the processor id and the size of the message
+                ! a bit easier.
+
+                procID = commPatternCell_2nd(level)%sendProc(i)
+                size = commPatternCell_2nd(level)%nSend(i)
+
+                ! Copy the data in the correct part of the send buffer.
+
+                jj = ii
+                !DIR$ NOVECTOR
+                do j = 1, commPatternCell_2nd(level)%nSend(i)
+
+                    ! Store the block id and the indices of the donor
+                    ! a bit easier.
+
+                    d1 = commPatternCell_2nd(level)%sendList(i)%block(j)
+                    i1 = commPatternCell_2nd(level)%sendList(i)%indices(j, 1)
+                    j1 = commPatternCell_2nd(level)%sendList(i)%indices(j, 2)
+                    k1 = commPatternCell_2nd(level)%sendList(i)%indices(j, 3)
+
+                    ! Copy the coordinates of this point in the buffer.
+                    ! Update the counter jj accordingly.
+
+                    sendBuffer(jj) = flowDoms(d1, level, mm)%d2Wall(i1, j1, k1)
+                    jj = jj + 1
+
+                end do
+
+                ! Send the data.
+
+                call mpi_isend(sendBuffer(ii), size, adflow_real, procID, &
+                               procID, ADflow_comm_world, sendRequests(i), &
+                               ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Set ii to jj for the next processor.
+
+                ii = jj
+
+            end do sends
+
+            ! Post the nonblocking receives.
+
+            ii = 1
+            receives: do i = 1, commPatternCell_2nd(level)%nProcRecv
+
+                ! Store the processor id and the size of the message
+                ! a bit easier.
+
+                procID = commPatternCell_2nd(level)%recvProc(i)
+                size = commPatternCell_2nd(level)%nRecv(i)
+
+                ! Post the receive.
+
+                call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
+                               myID, ADflow_comm_world, recvRequests(i), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! And update ii.
+
+                ii = ii + size
+
+            end do receives
+
+            ! Copy the local data.
+            !DIR$ NOVECTOR
+            localCopy: do i = 1, internalCell_2nd(level)%nCopy
+
+                ! Store the block and the indices of the donor a bit easier.
+
+                d1 = internalCell_2nd(level)%donorBlock(i)
+                i1 = internalCell_2nd(level)%donorIndices(i, 1)
+                j1 = internalCell_2nd(level)%donorIndices(i, 2)
+                k1 = internalCell_2nd(level)%donorIndices(i, 3)
+                ! Idem for the halo's.
+
+                d2 = internalCell_2nd(level)%haloBlock(i)
+                i2 = internalCell_2nd(level)%haloIndices(i, 1)
+                j2 = internalCell_2nd(level)%haloIndices(i, 2)
+                k2 = internalCell_2nd(level)%haloIndices(i, 3)
+                ! Copy the coordinates.
+                flowDoms(d2, level, mm)%d2Wall(i2, j2, k2) = &
+                    flowDoms(d1, level, mm)%d2Wall(i1, j1, k1)
+
+            end do localCopy
+
+            ! Correct the periodic halos of the internal communication
+            ! pattern
+
+            ! call correctPeriodicCoor(level, mm, &
+            !                          internalCell_2nd(level)%nPeriodic, &
+            !                          internalCell_2nd(level)%periodicData)
+
+            ! Complete the nonblocking receives in an arbitrary sequence and
+            ! copy the coordinates from the buffer into the halo's.
+
+            size = commPatternCell_2nd(level)%nProcRecv
+            completeRecvs: do i = 1, commPatternCell_2nd(level)%nProcRecv
+
+                ! Complete any of the requests.
+
+                call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Copy the data just arrived in the halo's.
+
+                ii = index
+                jj = commPatternCell_2nd(level)%nRecvCum(ii - 1) + 1
+                !DIR$ NOVECTOR
+                do j = 1, commPatternCell_2nd(level)%nRecv(ii)
+
+                    ! Store the block and the indices of the halo a bit easier.
+
+                    d2 = commPatternCell_2nd(level)%recvList(ii)%block(j)
+                    i2 = commPatternCell_2nd(level)%recvList(ii)%indices(j, 1)
+                    j2 = commPatternCell_2nd(level)%recvList(ii)%indices(j, 2)
+                    k2 = commPatternCell_2nd(level)%recvList(ii)%indices(j, 3)
+
+                    ! Copy the data.
+
+                    flowDoms(d2, level, mm)%d2Wall(i2, j2, k2) = recvBuffer(jj)
+                    jj = jj + 1
+
+                end do
+
+            end do completeRecvs
+
+            ! Correct the periodic halos of the external communication
+            ! pattern.
+
+            ! call correctPeriodicCoor(level, mm, &
+            !                          commPatternCell_2nd(level)%nPeriodic, &
+            !                          commPatternCell_2nd(level)%periodicData)
+
+            ! Complete the nonblocking sends.
+
+            size = commPatternCell_2nd(level)%nProcSend
+            do i = 1, commPatternCell_2nd(level)%nProcSend
+                call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+
+        end do spectralLoop
+
+    end subroutine exchanged2Wall
+
+
+    subroutine exchangeKs(level)
+        !
+        !       ExchangeCoor exchanges the wall roughness of the nearest wall of the given grid
+        !       level.
+        !
+        use block
+        use communication
+        use inputTimeSpectral
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: i, j, ii, jj, mm
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+
+        ! Loop over the number of spectral solutions.
+
+        spectralLoop: do mm = 1, nTimeIntervalsSpectral
+
+            ! Send the coordinates i have to send. The data is first copied
+            ! into the send buffer and this buffer is sent.
+
+            ii = 1
+            sends: do i = 1, commPatternCell_2nd(level)%nProcSend
+
+                ! Store the processor id and the size of the message
+                ! a bit easier.
+
+                procID = commPatternCell_2nd(level)%sendProc(i)
+                size = commPatternCell_2nd(level)%nSend(i)
+
+                ! Copy the data in the correct part of the send buffer.
+
+                jj = ii
+                !DIR$ NOVECTOR
+                do j = 1, commPatternCell_2nd(level)%nSend(i)
+
+                    ! Store the block id and the indices of the donor
+                    ! a bit easier.
+
+                    d1 = commPatternCell_2nd(level)%sendList(i)%block(j)
+                    i1 = commPatternCell_2nd(level)%sendList(i)%indices(j, 1)
+                    j1 = commPatternCell_2nd(level)%sendList(i)%indices(j, 2)
+                    k1 = commPatternCell_2nd(level)%sendList(i)%indices(j, 3)
+
+                    ! Copy the coordinates of this point in the buffer.
+                    ! Update the counter jj accordingly.
+
+                    sendBuffer(jj) = flowDoms(d1, level, mm)%ks(i1, j1, k1)
+                    jj = jj + 1
+
+                end do
+
+                ! Send the data.
+
+                call mpi_isend(sendBuffer(ii), size, adflow_real, procID, &
+                               procID, ADflow_comm_world, sendRequests(i), &
+                               ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Set ii to jj for the next processor.
+
+                ii = jj
+
+            end do sends
+
+            ! Post the nonblocking receives.
+
+            ii = 1
+            receives: do i = 1, commPatternCell_2nd(level)%nProcRecv
+
+                ! Store the processor id and the size of the message
+                ! a bit easier.
+
+                procID = commPatternCell_2nd(level)%recvProc(i)
+                size = commPatternCell_2nd(level)%nRecv(i)
+
+                ! Post the receive.
+
+                call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
+                               myID, ADflow_comm_world, recvRequests(i), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! And update ii.
+
+                ii = ii + size
+
+            end do receives
+
+            ! Copy the local data.
+            !DIR$ NOVECTOR
+            localCopy: do i = 1, internalCell_2nd(level)%nCopy
+
+                ! Store the block and the indices of the donor a bit easier.
+
+                d1 = internalCell_2nd(level)%donorBlock(i)
+                i1 = internalCell_2nd(level)%donorIndices(i, 1)
+                j1 = internalCell_2nd(level)%donorIndices(i, 2)
+                k1 = internalCell_2nd(level)%donorIndices(i, 3)
+                ! Idem for the halo's.
+
+                d2 = internalCell_2nd(level)%haloBlock(i)
+                i2 = internalCell_2nd(level)%haloIndices(i, 1)
+                j2 = internalCell_2nd(level)%haloIndices(i, 2)
+                k2 = internalCell_2nd(level)%haloIndices(i, 3)
+                ! Copy the coordinates.
+                flowDoms(d2, level, mm)%ks(i2, j2, k2) = &
+                    flowDoms(d1, level, mm)%ks(i1, j1, k1)
+
+            end do localCopy
+
+            ! Correct the periodic halos of the internal communication
+            ! pattern
+
+            ! call correctPeriodicCoor(level, mm, &
+            !                          internalCell_2nd(level)%nPeriodic, &
+            !                          internalCell_2nd(level)%periodicData)
+
+            ! Complete the nonblocking receives in an arbitrary sequence and
+            ! copy the coordinates from the buffer into the halo's.
+
+            size = commPatternCell_2nd(level)%nProcRecv
+            completeRecvs: do i = 1, commPatternCell_2nd(level)%nProcRecv
+
+                ! Complete any of the requests.
+
+                call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Copy the data just arrived in the halo's.
+
+                ii = index
+                jj = commPatternCell_2nd(level)%nRecvCum(ii - 1) + 1
+                !DIR$ NOVECTOR
+                do j = 1, commPatternCell_2nd(level)%nRecv(ii)
+
+                    ! Store the block and the indices of the halo a bit easier.
+
+                    d2 = commPatternCell_2nd(level)%recvList(ii)%block(j)
+                    i2 = commPatternCell_2nd(level)%recvList(ii)%indices(j, 1)
+                    j2 = commPatternCell_2nd(level)%recvList(ii)%indices(j, 2)
+                    k2 = commPatternCell_2nd(level)%recvList(ii)%indices(j, 3)
+
+                    ! Copy the data.
+
+                    flowDoms(d2, level, mm)%ks(i2, j2, k2) = recvBuffer(jj)
+                    jj = jj + 1
+
+                end do
+
+            end do completeRecvs
+
+            ! Correct the periodic halos of the external communication
+            ! pattern.
+
+            ! call correctPeriodicCoor(level, mm, &
+            !                          commPatternCell_2nd(level)%nPeriodic, &
+            !                          commPatternCell_2nd(level)%periodicData)
+
+            ! Complete the nonblocking sends.
+
+            size = commPatternCell_2nd(level)%nProcSend
+            do i = 1, commPatternCell_2nd(level)%nProcSend
+                call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+
+        end do spectralLoop
+
+
+    end subroutine exchangeKs
+
+
     !      ==================================================================
 
     subroutine correctPeriodicCoor(level, sp, nPeriodic, periodicData)
@@ -2665,52 +3026,238 @@ subroutine correctPeriodicCoor(level, sp, nPeriodic, periodicData)
 
         ! Loop over the number of periodic transformations.
 
-        do nn = 1, nPeriodic
+        do nn = 1, nPeriodic
+
+            ! Store the rotation matrix, rotation center and translation
+            ! vector a bit easier.
+
+            rotMatrix = periodicData(nn)%rotMatrix
+            rotCenter = periodicData(nn)%rotCenter
+            translation = periodicData(nn)%translation + rotCenter
+
+            ! Loop over the number of halo nodes for this transformation.
+            !DIR$ NOVECTOR
+            do ii = 1, periodicData(nn)%nHalos
+
+                ! Store the block and the indices a bit easier.
+
+                mm = periodicData(nn)%block(ii)
+                i = periodicData(nn)%indices(ii, 1)
+                j = periodicData(nn)%indices(ii, 2)
+                k = periodicData(nn)%indices(ii, 3)
+
+                ! Determine the vector from the center of rotation to the
+                ! uncorrected halo value.
+
+                dx = flowDoms(mm, level, sp)%x(i, j, k, 1) - rotCenter(1)
+                dy = flowDoms(mm, level, sp)%x(i, j, k, 2) - rotCenter(2)
+                dz = flowDoms(mm, level, sp)%x(i, j, k, 3) - rotCenter(3)
+
+                ! Compute the corrected coordinates.
+
+                flowDoms(mm, level, sp)%x(i, j, k, 1) = rotMatrix(1, 1) * dx &
+                                                        + rotMatrix(1, 2) * dy &
+                                                        + rotMatrix(1, 3) * dz &
+                                                        + translation(1)
+                flowDoms(mm, level, sp)%x(i, j, k, 2) = rotMatrix(2, 1) * dx &
+                                                        + rotMatrix(2, 2) * dy &
+                                                        + rotMatrix(2, 3) * dz &
+                                                        + translation(2)
+                flowDoms(mm, level, sp)%x(i, j, k, 3) = rotMatrix(3, 1) * dx &
+                                                        + rotMatrix(3, 2) * dy &
+                                                        + rotMatrix(3, 3) * dz &
+                                                        + translation(3)
+            end do
+        end do
+
+    end subroutine correctPeriodicCoor
+    subroutine exchangeCoor_b(level)
+        !
+        !       ExchangeCoor_b exchanges the *derivatives* of the given grid
+        !       level IN REVERSE MODE.
+        !
+        use constants
+        use block
+        use communication
+        use inputTimeSpectral
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: i, j, ii, jj, mm, idim
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+
+        ! Loop over the number of spectral solutions.
+
+        spectralLoop: do mm = 1, nTimeIntervalsSpectral
+
+            ! Send the coordinates i have to send. The data is first copied
+            ! into the send buffer and this buffer is sent.
+
+            ii = 1
+            jj = 1
+            recvs: do i = 1, commPatternNode_1st(level)%nProcRecv
+
+                ! Store the processor id and the size of the message
+                ! a bit easier.
+
+                procID = commPatternNode_1st(level)%recvProc(i)
+                size = 3 * commPatternNode_1st(level)%nRecv(i)
+
+                ! Copy the data in the correct part of the send buffer.
+                !DIR$ NOVECTOR
+                do j = 1, commPatternNode_1st(level)%nRecv(i)
+
+                    ! Store the block id and the indices of the donor
+                    ! a bit easier.
+
+                    d1 = commPatternNode_1st(level)%recvList(i)%block(j)
+                    i1 = commPatternNode_1st(level)%recvList(i)%indices(j, 1)
+                    j1 = commPatternNode_1st(level)%recvList(i)%indices(j, 2)
+                    k1 = commPatternNode_1st(level)%recvList(i)%indices(j, 3)
+
+                    ! Copy the coordinates of this point in the buffer.
+                    ! Update the counter jj accordingly.
+
+                    recvBuffer(jj) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 1)
+                    recvBuffer(jj + 1) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 2)
+                    recvBuffer(jj + 2) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 3)
+                    jj = jj + 3
+                    flowDomsd(d1, level, mm)%x(i1, j1, k1, :) = zero
+                end do
+
+                ! Send the data.
+
+                call mpi_isend(recvBuffer(ii), size, adflow_real, procID, &
+                               procID, ADflow_comm_world, recvRequests(i), &
+                               ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Set ii to jj for the next processor.
+
+                ii = jj
+
+            end do recvs
+
+            ! Post the nonblocking receives.
+
+            ii = 1
+            send: do i = 1, commPatternNode_1st(level)%nProcSend
+
+                ! Store the processor id and the size of the message
+                ! a bit easier.
+
+                procID = commPatternNode_1st(level)%sendProc(i)
+                size = 3 * commPatternNode_1st(level)%nSend(i)
+
+                ! Post the receive.
+
+                call mpi_irecv(sendBuffer(ii), size, adflow_real, procID, &
+                               myID, ADflow_comm_world, sendRequests(i), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! And update ii.
+
+                ii = ii + size
+
+            end do send
+
+            ! Copy the local data.
+            !DIR$ NOVECTOR
+            localCopy: do i = 1, internalNode_1st(level)%nCopy
+
+                ! Store the block and the indices of the donor a bit easier.
+
+                d1 = internalNode_1st(level)%donorBlock(i)
+                i1 = internalNode_1st(level)%donorIndices(i, 1)
+                j1 = internalNode_1st(level)%donorIndices(i, 2)
+                k1 = internalNode_1st(level)%donorIndices(i, 3)
+                ! Idem for the halo's.
+
+                d2 = internalNode_1st(level)%haloBlock(i)
+                i2 = internalNode_1st(level)%haloIndices(i, 1)
+                j2 = internalNode_1st(level)%haloIndices(i, 2)
+                k2 = internalNode_1st(level)%haloIndices(i, 3)
+
+                ! Sum into the '1' values fro the '2' values
+                do idim = 1, 3
+                    flowDomsd(d1, level, mm)%x(i1, j1, k1, idim) = flowDomsd(d1, level, mm)%x(i1, j1, k1, idim) + &
+                                                                   flowDomsd(d2, level, mm)%x(i2, j2, k2, idim)
+                    flowDomsd(d2, level, mm)%x(i2, j2, k2, idim) = zero
+                end do
+            end do localCopy
+
+            ! Correct the periodic halos of the internal communication
+            ! pattern
+
+            ! NOT IMPLEMENTED
+            ! call correctPeriodicCoor(level, mm,                          &
+            !      internalNode_1st(level)%nPeriodic,  &
+            !      internalNode_1st(level)%periodicData)
+
+            ! Complete the nonblocking receives in an arbitrary sequence and
+            ! copy the coordinates from the buffer into the halo's.
+
+            size = commPatternNode_1st(level)%nProcSend
+            completeSends: do i = 1, commPatternNode_1st(level)%nProcSend
+
+                ! Complete any of the requests.
+
+                call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Copy the data just arrived in the halo's.
 
-            ! Store the rotation matrix, rotation center and translation
-            ! vector a bit easier.
+                ii = index
+                jj = 3 * commPatternNode_1st(level)%nSendCum(ii - 1)
+                !DIR$ NOVECTOR
+                do j = 1, commPatternNode_1st(level)%nSend(ii)
 
-            rotMatrix = periodicData(nn)%rotMatrix
-            rotCenter = periodicData(nn)%rotCenter
-            translation = periodicData(nn)%translation + rotCenter
+                    ! Store the block and the indices of the halo a bit easier.
 
-            ! Loop over the number of halo nodes for this transformation.
-            !DIR$ NOVECTOR
-            do ii = 1, periodicData(nn)%nHalos
+                    d2 = commPatternNode_1st(level)%sendList(ii)%block(j)
+                    i2 = commPatternNode_1st(level)%sendList(ii)%indices(j, 1)
+                    j2 = commPatternNode_1st(level)%sendList(ii)%indices(j, 2)
+                    k2 = commPatternNode_1st(level)%sendList(ii)%indices(j, 3)
 
-                ! Store the block and the indices a bit easier.
+                    ! Sum into the '2' values from the recv buffer
+                    do idim = 1, 3
+                        flowDomsd(d2, level, mm)%x(i2, j2, k2, idim) = flowDomsd(d2, level, mm)%x(i2, j2, k2, idim) + &
+                                                                       sendBuffer(jj + idim)
+                    end do
+                    jj = jj + 3
 
-                mm = periodicData(nn)%block(ii)
-                i = periodicData(nn)%indices(ii, 1)
-                j = periodicData(nn)%indices(ii, 2)
-                k = periodicData(nn)%indices(ii, 3)
+                end do
 
-                ! Determine the vector from the center of rotation to the
-                ! uncorrected halo value.
+            end do completeSends
 
-                dx = flowDoms(mm, level, sp)%x(i, j, k, 1) - rotCenter(1)
-                dy = flowDoms(mm, level, sp)%x(i, j, k, 2) - rotCenter(2)
-                dz = flowDoms(mm, level, sp)%x(i, j, k, 3) - rotCenter(3)
+            ! Correct the periodic halos of the external communication
+            ! pattern.
+            ! NOT IMLEMENTED
+            ! call correctPeriodicCoor(level, mm,                            &
+            !      commPatternNode_1st(level)%nPeriodic, &
+            !      commPatternNode_1st(level)%periodicData)
 
-                ! Compute the corrected coordinates.
+            ! Complete the nonblocking sends.
 
-                flowDoms(mm, level, sp)%x(i, j, k, 1) = rotMatrix(1, 1) * dx &
-                                                        + rotMatrix(1, 2) * dy &
-                                                        + rotMatrix(1, 3) * dz &
-                                                        + translation(1)
-                flowDoms(mm, level, sp)%x(i, j, k, 2) = rotMatrix(2, 1) * dx &
-                                                        + rotMatrix(2, 2) * dy &
-                                                        + rotMatrix(2, 3) * dz &
-                                                        + translation(2)
-                flowDoms(mm, level, sp)%x(i, j, k, 3) = rotMatrix(3, 1) * dx &
-                                                        + rotMatrix(3, 2) * dy &
-                                                        + rotMatrix(3, 3) * dz &
-                                                        + translation(3)
+            size = commPatternNode_1st(level)%nProcRecv
+            do i = 1, commPatternNode_1st(level)%nProcRecv
+                call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
             end do
-        end do
 
-    end subroutine correctPeriodicCoor
-    subroutine exchangeCoor_b(level)
+        end do spectralLoop
+
+    end subroutine exchangeCoor_b
+    subroutine exchanged2Wall_b(level)
         !
         !       ExchangeCoor_b exchanges the *derivatives* of the given grid
         !       level IN REVERSE MODE.
@@ -2743,34 +3290,32 @@ subroutine exchangeCoor_b(level)
 
             ii = 1
             jj = 1
-            recvs: do i = 1, commPatternNode_1st(level)%nProcRecv
+            recvs: do i = 1, commPatternCell_2nd(level)%nProcRecv
 
                 ! Store the processor id and the size of the message
                 ! a bit easier.
 
-                procID = commPatternNode_1st(level)%recvProc(i)
-                size = 3 * commPatternNode_1st(level)%nRecv(i)
+                procID = commPatternCell_2nd(level)%recvProc(i)
+                size = commPatternCell_2nd(level)%nRecv(i)
 
                 ! Copy the data in the correct part of the send buffer.
                 !DIR$ NOVECTOR
-                do j = 1, commPatternNode_1st(level)%nRecv(i)
+                do j = 1, commPatternCell_2nd(level)%nRecv(i)
 
                     ! Store the block id and the indices of the donor
                     ! a bit easier.
 
-                    d1 = commPatternNode_1st(level)%recvList(i)%block(j)
-                    i1 = commPatternNode_1st(level)%recvList(i)%indices(j, 1)
-                    j1 = commPatternNode_1st(level)%recvList(i)%indices(j, 2)
-                    k1 = commPatternNode_1st(level)%recvList(i)%indices(j, 3)
+                    d2 = commPatternCell_2nd(level)%recvList(i)%block(j)
+                    i2 = commPatternCell_2nd(level)%recvList(i)%indices(j, 1)
+                    j2 = commPatternCell_2nd(level)%recvList(i)%indices(j, 2)
+                    k2 = commPatternCell_2nd(level)%recvList(i)%indices(j, 3)
 
                     ! Copy the coordinates of this point in the buffer.
                     ! Update the counter jj accordingly.
 
-                    recvBuffer(jj) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 1)
-                    recvBuffer(jj + 1) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 2)
-                    recvBuffer(jj + 2) = flowDomsd(d1, level, mm)%x(i1, j1, k1, 3)
-                    jj = jj + 3
-                    flowDomsd(d1, level, mm)%x(i1, j1, k1, :) = zero
+                    recvBuffer(jj) = flowDomsd(d2, level, mm)%d2Wall(i2, j2, k2)
+                    flowDomsd(d2, level, mm)%d2Wall(i2, j2, k2) = zero
+                    jj = jj + 1
                 end do
 
                 ! Send the data.
@@ -2789,13 +3334,13 @@ subroutine exchangeCoor_b(level)
             ! Post the nonblocking receives.
 
             ii = 1
-            send: do i = 1, commPatternNode_1st(level)%nProcSend
+            send: do i = 1, commPatternCell_2nd(level)%nProcSend
 
                 ! Store the processor id and the size of the message
                 ! a bit easier.
 
-                procID = commPatternNode_1st(level)%sendProc(i)
-                size = 3 * commPatternNode_1st(level)%nSend(i)
+                procID = commPatternCell_2nd(level)%sendProc(i)
+                size = commPatternCell_2nd(level)%nSend(i)
 
                 ! Post the receive.
 
@@ -2811,27 +3356,27 @@ subroutine exchangeCoor_b(level)
 
             ! Copy the local data.
             !DIR$ NOVECTOR
-            localCopy: do i = 1, internalNode_1st(level)%nCopy
+            localCopy: do i = 1, internalCell_2nd(level)%nCopy
 
                 ! Store the block and the indices of the donor a bit easier.
 
-                d1 = internalNode_1st(level)%donorBlock(i)
-                i1 = internalNode_1st(level)%donorIndices(i, 1)
-                j1 = internalNode_1st(level)%donorIndices(i, 2)
-                k1 = internalNode_1st(level)%donorIndices(i, 3)
+                d1 = internalCell_2nd(level)%donorBlock(i)
+                i1 = internalCell_2nd(level)%donorIndices(i, 1)
+                j1 = internalCell_2nd(level)%donorIndices(i, 2)
+                k1 = internalCell_2nd(level)%donorIndices(i, 3)
+
                 ! Idem for the halo's.
 
-                d2 = internalNode_1st(level)%haloBlock(i)
-                i2 = internalNode_1st(level)%haloIndices(i, 1)
-                j2 = internalNode_1st(level)%haloIndices(i, 2)
-                k2 = internalNode_1st(level)%haloIndices(i, 3)
+                d2 = internalCell_2nd(level)%haloBlock(i)
+                i2 = internalCell_2nd(level)%haloIndices(i, 1)
+                j2 = internalCell_2nd(level)%haloIndices(i, 2)
+                k2 = internalCell_2nd(level)%haloIndices(i, 3)
 
                 ! Sum into the '1' values fro the '2' values
-                do idim = 1, 3
-                    flowDomsd(d1, level, mm)%x(i1, j1, k1, idim) = flowDomsd(d1, level, mm)%x(i1, j1, k1, idim) + &
-                                                                   flowDomsd(d2, level, mm)%x(i2, j2, k2, idim)
-                    flowDomsd(d2, level, mm)%x(i2, j2, k2, idim) = zero
-                end do
+                flowDomsd(d1, level, mm)%d2Wall(i1, j1, k1) = flowDomsd(d1, level, mm)%d2Wall(i1, j1, k1) + &
+                                                              flowDomsd(d2, level, mm)%d2Wall(i2, j2, k2)
+                flowDomsd(d2, level, mm)%d2Wall(i2, j2, k2) = zero
+
             end do localCopy
 
             ! Correct the periodic halos of the internal communication
@@ -2845,8 +3390,8 @@ subroutine exchangeCoor_b(level)
             ! Complete the nonblocking receives in an arbitrary sequence and
             ! copy the coordinates from the buffer into the halo's.
 
-            size = commPatternNode_1st(level)%nProcSend
-            completeSends: do i = 1, commPatternNode_1st(level)%nProcSend
+            size = commPatternCell_2nd(level)%nProcSend
+            completeSends: do i = 1, commPatternCell_2nd(level)%nProcSend
 
                 ! Complete any of the requests.
 
@@ -2856,23 +3401,21 @@ subroutine exchangeCoor_b(level)
                 ! Copy the data just arrived in the halo's.
 
                 ii = index
-                jj = 3 * commPatternNode_1st(level)%nSendCum(ii - 1)
+                jj = commPatternCell_2nd(level)%nSendCum(ii - 1)
                 !DIR$ NOVECTOR
-                do j = 1, commPatternNode_1st(level)%nSend(ii)
+                do j = 1, commPatternCell_2nd(level)%nSend(ii)
 
                     ! Store the block and the indices of the halo a bit easier.
 
-                    d2 = commPatternNode_1st(level)%sendList(ii)%block(j)
-                    i2 = commPatternNode_1st(level)%sendList(ii)%indices(j, 1)
-                    j2 = commPatternNode_1st(level)%sendList(ii)%indices(j, 2)
-                    k2 = commPatternNode_1st(level)%sendList(ii)%indices(j, 3)
+                    d2 = commPatternCell_2nd(level)%sendList(ii)%block(j)
+                    i2 = commPatternCell_2nd(level)%sendList(ii)%indices(j, 1)
+                    j2 = commPatternCell_2nd(level)%sendList(ii)%indices(j, 2)
+                    k2 = commPatternCell_2nd(level)%sendList(ii)%indices(j, 3)
 
                     ! Sum into the '2' values from the recv buffer
-                    do idim = 1, 3
-                        flowDomsd(d2, level, mm)%x(i2, j2, k2, idim) = flowDomsd(d2, level, mm)%x(i2, j2, k2, idim) + &
-                                                                       sendBuffer(jj + idim)
-                    end do
-                    jj = jj + 3
+                    jj = jj + 1
+                    flowDomsd(d2, level, mm)%d2Wall(i2, j2, k2) = flowDomsd(d2, level, mm)%d2Wall(i2, j2, k2) + &
+                                                                  sendBuffer(jj)
 
                 end do
 
@@ -2887,15 +3430,15 @@ subroutine exchangeCoor_b(level)
 
             ! Complete the nonblocking sends.
 
-            size = commPatternNode_1st(level)%nProcRecv
-            do i = 1, commPatternNode_1st(level)%nProcRecv
+            size = commPatternCell_2nd(level)%nProcRecv
+            do i = 1, commPatternCell_2nd(level)%nProcRecv
                 call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
                 call EChk(ierr, __FILE__, __LINE__)
             end do
 
         end do spectralLoop
 
-    end subroutine exchangeCoor_b
+    end subroutine exchanged2Wall_b
     subroutine exchangeCoor_d(level)
         !
         !       ExchangeCoor_d exchanges the *derivatives* of the given grid
@@ -3083,6 +3626,185 @@ subroutine exchangeCoor_d(level)
         end do spectralLoop
 
     end subroutine exchangeCoor_d
+    subroutine exchanged2Wall_d(level)
+        !
+        !       ExchangeCoor_d exchanges the *derivatives* of the given grid
+        !       level.
+        !
+        use block
+        use communication
+        use inputTimeSpectral
+        use utils, only: EChk
+        implicit none
+        !
+        !      Subroutine arguments.
+        !
+        integer(kind=intType), intent(in) :: level
+        !
+        !      Local variables.
+        !
+        integer :: size, procID, ierr, index
+        integer, dimension(mpi_status_size) :: mpiStatus
+
+        integer(kind=intType) :: i, j, ii, jj, mm
+        integer(kind=intType) :: d1, i1, j1, k1, d2, i2, j2, k2
+
+        ! Loop over the number of spectral solutions.
+
+        spectralLoop: do mm = 1, nTimeIntervalsSpectral
+
+            ! Send the coordinates i have to send. The data is first copied
+            ! into the send buffer and this buffer is sent.
+
+            ii = 1
+            sends: do i = 1, commPatternCell_2nd(level)%nProcSend
+
+                ! Store the processor id and the size of the message
+                ! a bit easier.
+
+                procID = commPatternCell_2nd(level)%sendProc(i)
+                size = commPatternCell_2nd(level)%nSend(i)
+
+                ! Copy the data in the correct part of the send buffer.
+
+                jj = ii
+                !DIR$ NOVECTOR
+                do j = 1, commPatternCell_2nd(level)%nSend(i)
+
+                    ! Store the block id and the indices of the donor
+                    ! a bit easier.
+
+                    d1 = commPatternCell_2nd(level)%sendList(i)%block(j)
+                    i1 = commPatternCell_2nd(level)%sendList(i)%indices(j, 1)
+                    j1 = commPatternCell_2nd(level)%sendList(i)%indices(j, 2)
+                    k1 = commPatternCell_2nd(level)%sendList(i)%indices(j, 3)
+
+                    ! Copy the coordinates of this point in the buffer.
+                    ! Update the counter jj accordingly.
+
+                    sendBuffer(jj) = flowDomsd(d1, level, mm)%d2Wall(i1, j1, k1)
+                    jj = jj + 1
+
+                end do
+
+                ! Send the data.
+
+                call mpi_isend(sendBuffer(ii), size, adflow_real, procID, &
+                               procID, ADflow_comm_world, sendRequests(i), &
+                               ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Set ii to jj for the next processor.
+
+                ii = jj
+
+            end do sends
+
+            ! Post the nonblocking receives.
+
+            ii = 1
+            receives: do i = 1, commPatternCell_2nd(level)%nProcRecv
+
+                ! Store the processor id and the size of the message
+                ! a bit easier.
+
+                procID = commPatternCell_2nd(level)%recvProc(i)
+                size = commPatternCell_2nd(level)%nRecv(i)
+
+                ! Post the receive.
+
+                call mpi_irecv(recvBuffer(ii), size, adflow_real, procID, &
+                               myID, ADflow_comm_world, recvRequests(i), ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! And update ii.
+
+                ii = ii + size
+
+            end do receives
+
+            ! Copy the local data.
+            !DIR$ NOVECTOR
+            localCopy: do i = 1, internalCell_2nd(level)%nCopy
+
+                ! Store the block and the indices of the donor a bit easier.
+
+                d1 = internalCell_2nd(level)%donorBlock(i)
+                i1 = internalCell_2nd(level)%donorIndices(i, 1)
+                j1 = internalCell_2nd(level)%donorIndices(i, 2)
+                k1 = internalCell_2nd(level)%donorIndices(i, 3)
+                ! Idem for the halo's.
+
+                d2 = internalCell_2nd(level)%haloBlock(i)
+                i2 = internalCell_2nd(level)%haloIndices(i, 1)
+                j2 = internalCell_2nd(level)%haloIndices(i, 2)
+                k2 = internalCell_2nd(level)%haloIndices(i, 3)
+                ! Copy the coordinates.
+                flowDomsd(d2, level, mm)%d2Wall(i2, j2, k2) = &
+                    flowDomsd(d1, level, mm)%d2Wall(i1, j1, k1)
+
+            end do localCopy
+
+            ! Correct the periodic halos of the internal communication
+            ! pattern
+
+            ! NOT IMPLEMENTED
+            ! call correctPeriodicCoor(level, mm,                          &
+            !      internalCell_2nd(level)%nPeriodic,  &
+            !      internalCell_2nd(level)%periodicData)
+
+            ! Complete the nonblocking receives in an arbitrary sequence and
+            ! copy the coordinates from the buffer into the halo's.
+
+            size = commPatternCell_2nd(level)%nProcRecv
+            completeRecvs: do i = 1, commPatternCell_2nd(level)%nProcRecv
+
+                ! Complete any of the requests.
+
+                call mpi_waitany(size, recvRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+
+                ! Copy the data just arrived in the halo's.
+
+                ii = index
+                jj = commPatternCell_2nd(level)%nRecvCum(ii - 1) + 1
+                !DIR$ NOVECTOR
+                do j = 1, commPatternCell_2nd(level)%nRecv(ii)
+
+                    ! Store the block and the indices of the halo a bit easier.
+
+                    d2 = commPatternCell_2nd(level)%recvList(ii)%block(j)
+                    i2 = commPatternCell_2nd(level)%recvList(ii)%indices(j, 1)
+                    j2 = commPatternCell_2nd(level)%recvList(ii)%indices(j, 2)
+                    k2 = commPatternCell_2nd(level)%recvList(ii)%indices(j, 3)
+
+                    ! Copy the data.
+
+                    flowDomsd(d2, level, mm)%d2Wall(i2, j2, k2) = recvBuffer(jj)
+                    jj = jj + 1
+
+                end do
+
+            end do completeRecvs
+
+            ! Correct the periodic halos of the external communication
+            ! pattern.
+            ! NOT IMLEMENTED
+            ! call correctPeriodicCoor(level, mm,                            &
+            !      commPatternCell_2nd(level)%nPeriodic, &
+            !      commPatternCell_2nd(level)%periodicData)
+
+            ! Complete the nonblocking sends.
+
+            size = commPatternCell_2nd(level)%nProcSend
+            do i = 1, commPatternCell_2nd(level)%nProcSend
+                call mpi_waitany(size, sendRequests, index, mpiStatus, ierr)
+                call EChk(ierr, __FILE__, __LINE__)
+            end do
+
+        end do spectralLoop
+
+    end subroutine exchanged2Wall_d
 
     ! -----------------------------------------------------------------
     !              Comm routines for zippers
diff --git a/src/utils/utils.F90 b/src/utils/utils.F90
index 0d245a2fe..a9a8b5a78 100644
--- a/src/utils/utils.F90
+++ b/src/utils/utils.F90
@@ -888,12 +888,14 @@ subroutine setBCPointers(nn, spatialPointers)
         use blockPointers, only: w, p, rlv, rev, gamma, x, d2wall, &
                                  si, sj, sk, s, globalCell, BCData, nx, il, ie, ib, &
                                  ny, jl, je, jb, nz, kl, ke, kb, BCFaceID, &
-                                 addgridvelocities, sFaceI, sFaceJ, sFaceK, addGridVelocities
+                                 addgridvelocities, sFaceI, sFaceJ, sFaceK, addGridVelocities, &
+                                 d2wall, ks
         use BCPointers, only: ww0, ww1, ww2, ww3, pp0, pp1, pp2, pp3, &
                               rlv0, rlv1, rlv2, rlv3, rev0, rev1, rev2, rev3, &
                               gamma0, gamma1, gamma2, gamma3, gcp, xx, ss, ssi, ssj, ssk, dd2wall, &
-                              sFace, iStart, iEnd, jStart, jEnd, iSize, jSize
-        use inputPhysics, only: cpModel, equations
+                              sFace, iStart, iEnd, jStart, jEnd, iSize, jSize, &
+                              d2wall0, d2wall1, d2wall2, d2wall3, ks0, ks1, ks2, ks3
+        use inputPhysics, only: cpModel, equations, useRoughSA
         implicit none
 
         ! Subroutine arguments.
@@ -939,6 +941,18 @@ subroutine setBCPointers(nn, spatialPointers)
             rev1 => rev(1, 1:, 1:)
             rev0 => rev(0, 1:, 1:)
 
+            d2wall3 => d2wall(3, 1:, 1:)
+            d2wall2 => d2wall(2, 1:, 1:)
+            d2wall1 => d2wall(1, 1:, 1:)
+            d2wall0 => d2wall(0, 1:, 1:)
+
+            if (useRoughSA) then
+                ks3 => ks(3, 1:, 1:)
+                ks2 => ks(2, 1:, 1:)
+                ks1 => ks(1, 1:, 1:)
+                ks0 => ks(0, 1:, 1:)
+            end if
+
             gamma3 => gamma(3, 1:, 1:)
             gamma2 => gamma(2, 1:, 1:)
             gamma1 => gamma(1, 1:, 1:)
@@ -969,6 +983,18 @@ subroutine setBCPointers(nn, spatialPointers)
             rev1 => rev(ie, 1:, 1:)
             rev0 => rev(ib, 1:, 1:)
 
+            d2wall3 => d2wall(nx, 1:, 1:)
+            d2wall2 => d2wall(il, 1:, 1:)
+            d2wall1 => d2wall(ie, 1:, 1:)
+            d2wall0 => d2wall(ib, 1:, 1:)
+
+            if (useRoughSA) then
+                ks3 => ks(nx, 1:, 1:)
+                ks2 => ks(il, 1:, 1:)
+                ks1 => ks(ie, 1:, 1:)
+                ks0 => ks(ib, 1:, 1:)
+            end if
+
             gamma3 => gamma(nx, 1:, 1:)
             gamma2 => gamma(il, 1:, 1:)
             gamma1 => gamma(ie, 1:, 1:)
@@ -999,6 +1025,18 @@ subroutine setBCPointers(nn, spatialPointers)
             rev1 => rev(1:, 1, 1:)
             rev0 => rev(1:, 0, 1:)
 
+            d2wall3 => d2wall(1:, 3, 1:)
+            d2wall2 => d2wall(1:, 2, 1:)
+            d2wall1 => d2wall(1:, 1, 1:)
+            d2wall0 => d2wall(1:, 0, 1:)
+
+            if (useRoughSA) then
+                ks3 => ks(1:, 3, 1:)
+                ks2 => ks(1:, 2, 1:)
+                ks1 => ks(1:, 1, 1:)
+                ks0 => ks(1:, 0, 1:)
+            end if
+
             gamma3 => gamma(1:, 3, 1:)
             gamma2 => gamma(1:, 2, 1:)
             gamma1 => gamma(1:, 1, 1:)
@@ -1029,6 +1067,18 @@ subroutine setBCPointers(nn, spatialPointers)
             rev1 => rev(1:, je, 1:)
             rev0 => rev(1:, jb, 1:)
 
+            d2wall3 => d2wall(1:, ny, 1:)
+            d2wall2 => d2wall(1:, jl, 1:)
+            d2wall1 => d2wall(1:, je, 1:)
+            d2wall0 => d2wall(1:, jb, 1:)
+
+            if (useRoughSA) then
+                ks3 => ks(1:, ny, 1:)
+                ks2 => ks(1:, jl, 1:)
+                ks1 => ks(1:, je, 1:)
+                ks0 => ks(1:, jb, 1:)
+            end if
+
             gamma3 => gamma(1:, ny, 1:)
             gamma2 => gamma(1:, jl, 1:)
             gamma1 => gamma(1:, je, 1:)
@@ -1059,6 +1109,18 @@ subroutine setBCPointers(nn, spatialPointers)
             rev1 => rev(1:, 1:, 1)
             rev0 => rev(1:, 1:, 0)
 
+            d2wall3 => d2wall(1:, 1:, 3)
+            d2wall2 => d2wall(1:, 1:, 2)
+            d2wall1 => d2wall(1:, 1:, 1)
+            d2wall0 => d2wall(1:, 1:, 0)
+
+            if (useRoughSA) then
+                ks3 => ks(1:, 1:, 3)
+                ks2 => ks(1:, 1:, 2)
+                ks1 => ks(1:, 1:, 1)
+                ks0 => ks(1:, 1:, 0)
+            end if
+
             gamma3 => gamma(1:, 1:, 3)
             gamma2 => gamma(1:, 1:, 2)
             gamma1 => gamma(1:, 1:, 1)
@@ -1089,6 +1151,18 @@ subroutine setBCPointers(nn, spatialPointers)
             rev1 => rev(1:, 1:, ke)
             rev0 => rev(1:, 1:, kb)
 
+            d2wall3 => d2wall(1:, 1:, nz)
+            d2wall2 => d2wall(1:, 1:, kl)
+            d2wall1 => d2wall(1:, 1:, ke)
+            d2wall0 => d2wall(1:, 1:, kb)
+
+            if (useRoughSA) then
+                ks3 => ks(1:, 1:, nz)
+                ks2 => ks(1:, 1:, kl)
+                ks1 => ks(1:, 1:, ke)
+                ks0 => ks(1:, 1:, kb)
+            end if
+
             gamma3 => gamma(1:, 1:, nz)
             gamma2 => gamma(1:, 1:, kl)
             gamma1 => gamma(1:, 1:, ke)
@@ -2056,14 +2130,16 @@ subroutine setbcpointers_d(nn, spatialpointers)
     &   gamma, x, xd, d2wall, d2walld, si, sid, sj, sjd, sk, skd, s, sd, &
     &   globalcell, bcdata, bcdatad, nx, il, ie, ib, ny, jl, je, jb, nz, kl,&
     &   ke, kb, bcfaceid, addgridvelocities, sfacei, sfaceid, sfacej, &
-    &   sfacejd, sfacek, sfacekd, addgridvelocities
+    &   sfacejd, sfacek, sfacekd, addgridvelocities, d2wall, d2walld, ks
         use bcpointers_d, only: ww0, ww0d, ww1, ww1d, ww2, ww2d, ww3, ww3d,&
     &   pp0, pp0d, pp1, pp1d, pp2, pp2d, pp3, pp3d, rlv0, rlv0d, rlv1, rlv1d&
     &   , rlv2, rlv2d, rlv3, rlv3d, rev0, rev0d, rev1, rev1d, rev2, rev2d, &
     &   rev3, rev3d, gamma0, gamma1, gamma2, gamma3, gcp, xx, xxd, ss, ssd, &
     &   ssi, ssid, ssj, ssjd, ssk, sskd, dd2wall, sface, istart, iend, &
-    &   jstart, jend, isize, jsize
-        use inputphysics, only: cpmodel, equations
+    &   jstart, jend, isize, jsize, &
+        d2wall0, d2wall0d, d2wall1, d2wall1d, d2wall2, d2wall2d, d2wall3, d2wall3d, &
+        ks0, ks1, ks2, ks3
+        use inputphysics, only: cpmodel, equations, useRoughSA
         implicit none
 ! subroutine arguments.
         integer(kind=inttype), intent(in) :: nn
@@ -2114,11 +2190,25 @@ subroutine setbcpointers_d(nn, spatialpointers)
             rev1 => rev(1, 1:, 1:)
             rev0d => revd(0, 1:, 1:)
             rev0 => rev(0, 1:, 1:)
+            d2wall3d => d2walld(3, 1:, 1:)
+            d2wall3 =>  d2wall(3, 1:, 1:)
+            d2wall2d => d2walld(2, 1:, 1:)
+            d2wall2 =>  d2wall(2, 1:, 1:)
+            d2wall1d => d2walld(1, 1:, 1:)
+            d2wall1 =>  d2wall(1, 1:, 1:)
+            d2wall0d => d2walld(0, 1:, 1:)
+            d2wall0 =>  d2wall(0, 1:, 1:)
             gamma3 => gamma(3, 1:, 1:)
             gamma2 => gamma(2, 1:, 1:)
             gamma1 => gamma(1, 1:, 1:)
             gamma0 => gamma(0, 1:, 1:)
             gcp => globalcell(2, 1:, 1:)
+            if (useRoughSA) then
+                ks3 => ks(3, 1:, 1:)
+                ks2 => ks(2, 1:, 1:)
+                ks1 => ks(1, 1:, 1:)
+                ks0 => ks(0, 1:, 1:)
+            end if
         case (imax)
 !---------------------------------------------------------------------------
             ww3d => wd(nx, 1:, 1:, :)
@@ -2153,11 +2243,25 @@ subroutine setbcpointers_d(nn, spatialpointers)
             rev1 => rev(ie, 1:, 1:)
             rev0d => revd(ib, 1:, 1:)
             rev0 => rev(ib, 1:, 1:)
+            d2wall3d => d2walld(nx, 1:, 1:)
+            d2wall3 =>  d2wall(nx, 1:, 1:)
+            d2wall2d => d2walld(il, 1:, 1:)
+            d2wall2 =>  d2wall(il, 1:, 1:)
+            d2wall1d => d2walld(ie, 1:, 1:)
+            d2wall1 =>  d2wall(ie, 1:, 1:)
+            d2wall0d => d2walld(ib, 1:, 1:)
+            d2wall0 =>  d2wall(ib, 1:, 1:)
             gamma3 => gamma(nx, 1:, 1:)
             gamma2 => gamma(il, 1:, 1:)
             gamma1 => gamma(ie, 1:, 1:)
             gamma0 => gamma(ib, 1:, 1:)
             gcp => globalcell(il, 1:, 1:)
+            if (useRoughSA) then
+                ks3 => ks(nx, 1:, 1:)
+                ks2 => ks(il, 1:, 1:)
+                ks1 => ks(ie, 1:, 1:)
+                ks0 => ks(ib, 1:, 1:)
+            end if
         case (jmin)
 !---------------------------------------------------------------------------
             ww3d => wd(1:, 3, 1:, :)
@@ -2192,11 +2296,25 @@ subroutine setbcpointers_d(nn, spatialpointers)
             rev1 => rev(1:, 1, 1:)
             rev0d => revd(1:, 0, 1:)
             rev0 => rev(1:, 0, 1:)
+            d2wall3d => d2walld(1:, 3, 1:)
+            d2wall3 =>  d2wall(1:, 3, 1:)
+            d2wall2d => d2walld(1:, 2, 1:)
+            d2wall2 =>  d2wall(1:, 2, 1:)
+            d2wall1d => d2walld(1:, 1, 1:)
+            d2wall1 =>  d2wall(1:, 1, 1:)
+            d2wall0d => d2walld(1:, 0, 1:)
+            d2wall0 =>  d2wall(1:, 0, 1:)
             gamma3 => gamma(1:, 3, 1:)
             gamma2 => gamma(1:, 2, 1:)
             gamma1 => gamma(1:, 1, 1:)
             gamma0 => gamma(1:, 0, 1:)
             gcp => globalcell(1:, 2, 1:)
+            if (useRoughSA) then
+                ks3 => ks(1:, 3, 1:)
+                ks2 => ks(1:, 2, 1:)
+                ks1 => ks(1:, 1, 1:)
+                ks0 => ks(1:, 0, 1:)
+            end if
         case (jmax)
 !---------------------------------------------------------------------------
             ww3d => wd(1:, ny, 1:, :)
@@ -2231,11 +2349,25 @@ subroutine setbcpointers_d(nn, spatialpointers)
             rev1 => rev(1:, je, 1:)
             rev0d => revd(1:, jb, 1:)
             rev0 => rev(1:, jb, 1:)
+            d2wall3d => d2walld(1:, ny, 1:)
+            d2wall3 =>  d2wall(1:, ny, 1:)
+            d2wall2d => d2walld(1:, jl, 1:)
+            d2wall2 =>  d2wall(1:, jl, 1:)
+            d2wall1d => d2walld(1:, je, 1:)
+            d2wall1 =>  d2wall(1:, je, 1:)
+            d2wall0d => d2walld(1:, jb, 1:)
+            d2wall0 =>  d2wall(1:, jb, 1:)
             gamma3 => gamma(1:, ny, 1:)
             gamma2 => gamma(1:, jl, 1:)
             gamma1 => gamma(1:, je, 1:)
             gamma0 => gamma(1:, jb, 1:)
             gcp => globalcell(1:, jl, 1:)
+            if (useRoughSA) then
+                ks3 => ks(1:, ny, 1:)
+                ks2 => ks(1:, jl, 1:)
+                ks1 => ks(1:, je, 1:)
+                ks0 => ks(1:, jb, 1:)
+            end if
         case (kmin)
 !---------------------------------------------------------------------------
             ww3d => wd(1:, 1:, 3, :)
@@ -2270,11 +2402,25 @@ subroutine setbcpointers_d(nn, spatialpointers)
             rev1 => rev(1:, 1:, 1)
             rev0d => revd(1:, 1:, 0)
             rev0 => rev(1:, 1:, 0)
+            d2wall3d => d2walld(1:, 1:, 3)
+            d2wall3 =>  d2wall(1:, 1:, 3)
+            d2wall2d => d2walld(1:, 1:, 2)
+            d2wall2 =>  d2wall(1:, 1:, 2)
+            d2wall1d => d2walld(1:, 1:, 1)
+            d2wall1 =>  d2wall(1:, 1:, 1)
+            d2wall0d => d2walld(1:, 1:, 0)
+            d2wall0 =>  d2wall(1:, 1:, 0)
             gamma3 => gamma(1:, 1:, 3)
             gamma2 => gamma(1:, 1:, 2)
             gamma1 => gamma(1:, 1:, 1)
             gamma0 => gamma(1:, 1:, 0)
             gcp => globalcell(1:, 1:, 2)
+            if (useRoughSA) then
+                ks3 => ks(1:, 1:, 3)
+                ks2 => ks(1:, 1:, 2)
+                ks1 => ks(1:, 1:, 1)
+                ks0 => ks(1:, 1:, 0)
+            end if
         case (kmax)
 !---------------------------------------------------------------------------
             ww3d => wd(1:, 1:, nz, :)
@@ -2309,11 +2455,25 @@ subroutine setbcpointers_d(nn, spatialpointers)
             rev1 => rev(1:, 1:, ke)
             rev0d => revd(1:, 1:, kb)
             rev0 => rev(1:, 1:, kb)
+            d2wall3d => d2walld(1:, 1:, nz)
+            d2wall3 =>  d2wall(1:, 1:, nz)
+            d2wall2d => d2walld(1:, 1:, kl)
+            d2wall2 =>  d2wall(1:, 1:, kl)
+            d2wall1d => d2walld(1:, 1:, ke)
+            d2wall1 =>  d2wall(1:, 1:, ke)
+            d2wall0d => d2walld(1:, 1:, kb)
+            d2wall0 =>  d2wall(1:, 1:, kb)
             gamma3 => gamma(1:, 1:, nz)
             gamma2 => gamma(1:, 1:, kl)
             gamma1 => gamma(1:, 1:, ke)
             gamma0 => gamma(1:, 1:, kb)
             gcp => globalcell(1:, 1:, kl)
+            if (useRoughSA) then
+                ks3 => ks(1:, 1:, nz)
+                ks2 => ks(1:, 1:, kl)
+                ks1 => ks(1:, 1:, ke)
+                ks0 => ks(1:, 1:, kb)
+            end if
         end select
         if (spatialpointers) then
             select case (bcfaceid(nn))
diff --git a/src/wallDistance/wallDistance.F90 b/src/wallDistance/wallDistance.F90
index 06dd17dd7..1a9845a4d 100644
--- a/src/wallDistance/wallDistance.F90
+++ b/src/wallDistance/wallDistance.F90
@@ -155,6 +155,7 @@ subroutine updateWallRoughness()
         use communication, only: adflow_comm_world, nProc, myID
         use sorting, only: famInList
         use wallDistanceData, only: nCellBlockOffset
+        use haloExchange, only : exchangeKs
         implicit none
 
         ! Local Variables
@@ -329,6 +330,10 @@ subroutine updateWallRoughness()
                 deallocate (ksGlobal, cellIdGlobal)
             end do
         end do
+
+
+        call exchangeKs(1)
+
     end subroutine updateWallRoughness
 
     subroutine computeWallDistance(level, allocMem)
@@ -692,7 +697,7 @@ subroutine initWallDistance(level, sps, allocMem)
         !
         integer :: ierr
 
-        integer(kind=intType) :: nn, il, jl, kl
+        integer(kind=intType) :: nn, ib, jb, kb
 
         ! Loop over the domains.
 
@@ -702,21 +707,25 @@ subroutine initWallDistance(level, sps, allocMem)
 
             if (allocMem) then
 
-                il = flowDoms(nn, level, sps)%il
-                jl = flowDoms(nn, level, sps)%jl
-                kl = flowDoms(nn, level, sps)%kl
+                ib = flowDoms(nn, level, sps)%ib
+                jb = flowDoms(nn, level, sps)%jb
+                kb = flowDoms(nn, level, sps)%kb
 
-                allocate (flowDoms(nn, level, sps)%d2Wall(2:il, 2:jl, 2:kl), &
+                allocate (flowDoms(nn, level, sps)%d2Wall(0:ib, 0:jb, 0:kb), &
                           stat=ierr)
                 if (ierr /= 0) &
                     call terminate("initWallDistance", &
                                    "Memory allocation failure for d2Wall")
+
                 if (useRoughSA) then
-                    allocate (flowDoms(nn, level, sps)%ks(2:il, 2:jl, 2:kl), &
+                    allocate (flowDoms(nn, level, sps)%ks(0:ib, 0:jb, 0:kb), &
                               stat=ierr)
                     if (ierr /= 0) &
                         call terminate("initWallDistance", &
                                        "Memory allocation failure for ks")
+
+                    ! initalize to zero
+                    flowDoms(nn, level, sps)%ks = zero
                 end if
             end if
 
@@ -1744,6 +1753,8 @@ subroutine updateWallDistanceAllLevels
         use block, only: flowDoms
         use inputPhysics, only: equations
         use iteration, only: groundLevel
+        use haloExchange, only: exchanged2Wall
+        use inputPhysics, only: wallDistanceNeeded
         implicit none
         !
         !      Local variables.
@@ -1759,6 +1770,9 @@ subroutine updateWallDistanceAllLevels
         nLevels = ubound(flowDoms, 2)
         do nn = groundLevel, nLevels
             call computeWallDistance(nn, .false.)
+            if (wallDistanceNeeded) then
+                call exchanged2Wall(nn)
+            end if
         end do
 
     end subroutine updateWallDistanceAllLevels
diff --git a/tests/reg_tests/refs/adjoint_rans_rough_sa.json b/tests/reg_tests/refs/adjoint_rans_rough_sa.json
index b7af623a2..11a894b86 100644
--- a/tests/reg_tests/refs/adjoint_rans_rough_sa.json
+++ b/tests/reg_tests/refs/adjoint_rans_rough_sa.json
@@ -9,78 +9,78 @@
             "shape": {
                 "__ndarray__": [
                     [
-                        0.0014588467683956273,
-                        -0.00014951327884643356,
-                        -0.015675496092977064,
-                        -0.01407598597005118,
-                        0.0013474930848856718,
-                        0.0018275829889551137,
-                        -0.012563965621769828,
-                        -0.010466958623447776,
-                        0.001309399552783985,
-                        -0.0032557348500175582,
-                        0.002277825768411138,
-                        0.006688700916671499,
-                        -0.001811437701229527,
-                        0.005977863740563417,
-                        0.009835326794039072,
-                        0.0071363080371478,
-                        0.0005148674031452701,
-                        -0.00196784534869978,
-                        0.0021821970874052266,
-                        0.0044284959845190385,
-                        0.004347976886409987,
-                        0.0036422625095888,
-                        0.00319111079843394,
-                        0.004015682626633313,
-                        0.0020419681575322636,
-                        0.0021666219810591615,
-                        0.0019894668966112822,
-                        0.0016303554139134088,
-                        -0.01965799022068064,
-                        -0.020829878357256484,
-                        -0.020784582903082428,
-                        -0.018884798454076847,
-                        -0.00036947254958799624,
-                        -0.0003130469976779008,
-                        -0.00030574210053769873,
-                        0.00026880669914821536,
-                        -0.01791830557159001,
-                        -0.018865736825431564,
-                        -0.01859183680328005,
-                        -0.01648873429652418,
-                        0.0009126507770085303,
-                        0.0008121102434698606,
-                        0.0008901049453491454,
-                        0.0010405908745254582,
-                        -0.004064652115740942,
-                        -0.0037963135756442002,
-                        -0.0032041669672322024,
-                        -0.0026293301388217154,
-                        0.0033331733805644533,
-                        0.003492144655793166,
-                        0.0033575207726121146,
-                        0.003010747698152013,
-                        0.007440262189915593,
-                        0.007319652738596754,
-                        0.006915763888467916,
-                        0.006162240091787263,
-                        0.00042023051048307514,
-                        0.0031315613760729832,
-                        0.005383636958473412,
-                        0.006824954530055509,
-                        0.010375635834802659,
-                        0.011987963324134878,
-                        0.011950339779730942,
-                        0.009432494720746398,
-                        0.011250799431240419,
-                        0.00985486786260258,
-                        0.008009279266531904,
-                        0.005465377456478378,
-                        0.006177656586328975,
-                        0.0050470385286012194,
-                        0.004344575288306137,
-                        0.004048991557116653
+                        0.0014588561633413414,
+                        -0.00014952202413208286,
+                        -0.015675497912023488,
+                        -0.014075987294571204,
+                        0.0013475090668360465,
+                        0.0018275783162532001,
+                        -0.012563970825243631,
+                        -0.010466961719321863,
+                        0.001309403504721567,
+                        -0.003255740658843033,
+                        0.0022778281149183153,
+                        0.006688705333327987,
+                        -0.0018114416783712132,
+                        0.005977863590813565,
+                        0.00983532858473589,
+                        0.007136309610539732,
+                        0.0005148684493146728,
+                        -0.0019678518803612076,
+                        0.002182198480909613,
+                        0.004428502895915052,
+                        0.004347966506713412,
+                        0.0036422607708835367,
+                        0.003191112901777175,
+                        0.004015687155590336,
+                        0.002041987254186593,
+                        0.00216664730487107,
+                        0.0019894942777849856,
+                        0.0016303801990091183,
+                        -0.019657992669769538,
+                        -0.020829881820670266,
+                        -0.02078458749231964,
+                        -0.018884803905128002,
+                        -0.0003694888783061555,
+                        -0.0003130644455712657,
+                        -0.0003057568816089407,
+                        0.0002687965388743136,
+                        -0.017918307322412758,
+                        -0.018865739227695775,
+                        -0.018591839838973466,
+                        -0.016488737753057772,
+                        0.0009126579204761291,
+                        0.0008121165240817337,
+                        0.0008901093226245515,
+                        0.0010405934847586217,
+                        -0.004064662192245191,
+                        -0.0037963248994502177,
+                        -0.0032041778422513857,
+                        -0.002629339527700363,
+                        0.0033331749562904543,
+                        0.0034921467579424295,
+                        0.003357522945849655,
+                        0.0030107497004535763,
+                        0.007440267908310526,
+                        0.007319659848397562,
+                        0.006915771850659987,
+                        0.00616224817217152,
+                        0.0004202215889943551,
+                        0.003131548800067598,
+                        0.005383622398366069,
+                        0.0068249401797487366,
+                        0.01037563525198086,
+                        0.011987962339168481,
+                        0.011950338215280503,
+                        0.009432492888063287,
+                        0.011250802433469834,
+                        0.009854871187475862,
+                        0.008009282452942326,
+                        0.005465380393532938,
+                        0.00617765891204428,
+                        0.005047041663475567,
+                        0.004344579308030662,
+                        0.004048996264051067
                     ]
                 ],
                 "dtype": "float64",
@@ -89,16 +89,16 @@
                     72
                 ]
             },
-            "span": 0.002296647842880653,
+            "span": 0.00229664892833175,
             "twist": {
                 "__ndarray__": [
                     [
-                        -0.0009085026908666076,
-                        -0.001135277542723973,
-                        -0.0011130263520414156,
-                        -0.0009513309718359675,
-                        -0.0006868003581235842,
-                        -0.00031510515859724665
+                        -0.0009085025652585955,
+                        -0.0011352774844096654,
+                        -0.001113026381374366,
+                        -0.0009513310772895932,
+                        -0.0006868004819044367,
+                        -0.0003151052366628259
                     ]
                 ],
                 "dtype": "float64",
@@ -120,78 +120,78 @@
             "shape": {
                 "__ndarray__": [
                     [
-                        0.0006645126688766545,
-                        -0.009764051446174316,
-                        -0.38014815803031066,
-                        -0.3170998471501314,
-                        0.012629788250227033,
-                        0.023153961286857292,
-                        -0.19944001682278154,
-                        -0.16919122160128627,
-                        0.035149811662053616,
-                        0.06625711164122668,
-                        0.09120798114669953,
-                        0.1595909431983964,
-                        0.0784037050728588,
-                        0.06061704069025734,
-                        0.047118377910311315,
-                        0.16186094246745536,
-                        0.027871468602449247,
-                        0.03253646911690112,
-                        0.04177297624327168,
-                        0.05856775642534094,
-                        0.03130211475000047,
-                        0.023490655556126543,
-                        0.04666963738539817,
-                        0.07533987952041057,
-                        0.00804749266451707,
-                        0.01353940603375206,
-                        0.015043778052562018,
-                        0.015715951241480397,
-                        -0.46385015320022327,
-                        -0.47399094658618945,
-                        -0.4482361926246763,
-                        -0.36889995951921006,
-                        0.008718247472936355,
-                        0.029550002917111684,
-                        0.04312633336755671,
-                        0.04434156612737814,
-                        -0.3956395006966927,
-                        -0.40794608902821317,
-                        -0.38620462938522016,
-                        -0.3163656383524376,
-                        0.06398782335327348,
-                        0.07479424398976865,
-                        0.07286141978007882,
-                        0.058461327882023476,
-                        0.08736644102793592,
-                        0.08697238548657082,
-                        0.08008641195458757,
-                        0.06426734524160192,
-                        0.10936456518485274,
-                        0.10829983178787791,
-                        0.10157510763623859,
-                        0.08255237385799354,
-                        0.17439475167596127,
-                        0.1692317944353169,
-                        0.15458475447923542,
-                        0.12034837358468198,
-                        0.1196111830648549,
-                        0.1272459445807717,
-                        0.11338927137762347,
-                        0.07770667985456997,
-                        0.05670973390543475,
-                        0.03836441806456059,
-                        0.024469816046396933,
-                        0.01997615447360168,
-                        0.049347411174940806,
-                        0.05111775196390402,
-                        0.056307019668804616,
-                        0.06050160881807444,
-                        0.18336405571950376,
-                        0.18396407981080587,
-                        0.172950698398754,
-                        0.1403137146906595
+                        0.0006642837325210593,
+                        -0.009763975859965732,
+                        -0.3801481999887656,
+                        -0.3170998426235122,
+                        0.012629573615866977,
+                        0.02315397958563808,
+                        -0.19944009087270742,
+                        -0.1691912552790567,
+                        0.0351498387083226,
+                        0.06625716321778724,
+                        0.09120797369196265,
+                        0.15959099100025592,
+                        0.07840389483505791,
+                        0.06061702563900386,
+                        0.0471183163334587,
+                        0.1618610610277068,
+                        0.027871493172952767,
+                        0.03253649812508121,
+                        0.04177297503503383,
+                        0.05856781186588396,
+                        0.0313022523470325,
+                        0.02349066959756217,
+                        0.04666961628569403,
+                        0.07533994488661211,
+                        0.008047029629268385,
+                        0.013538816921931512,
+                        0.01504317535187119,
+                        0.01571547469209155,
+                        -0.46385021365505386,
+                        -0.4739910307711922,
+                        -0.44823629888730077,
+                        -0.3689000683369753,
+                        0.008718298792151594,
+                        0.029550012094386215,
+                        0.043126338752077574,
+                        0.04434158317192526,
+                        -0.3956395141009607,
+                        -0.4079461176825463,
+                        -0.3862046757086361,
+                        -0.3163656910038524,
+                        0.06398787247549687,
+                        0.07479431304247744,
+                        0.07286149409348636,
+                        0.05846138573422667,
+                        0.08736651118290667,
+                        0.08697245700465966,
+                        0.0800864819250691,
+                        0.06426740266179096,
+                        0.10936456656828654,
+                        0.10829983086081271,
+                        0.10157510658835196,
+                        0.08255237277265481,
+                        0.174394811331467,
+                        0.16923186958607747,
+                        0.15458484011444382,
+                        0.1203484559615018,
+                        0.11961152090585189,
+                        0.12724633074407918,
+                        0.11338965533117872,
+                        0.07770697893018691,
+                        0.05670971831375479,
+                        0.03836441180009568,
+                        0.024469827398488818,
+                        0.019976171681793435,
+                        0.04934729534821406,
+                        0.05111766067208243,
+                        0.056306951989697436,
+                        0.060501561915658614,
+                        0.18336415728743627,
+                        0.1839642086404485,
+                        0.172950832038159,
+                        0.14031382776067444
                     ]
                 ],
                 "dtype": "float64",
@@ -200,16 +200,16 @@
                     72
                 ]
             },
-            "span": 0.04345768047526699,
+            "span": 0.04345766409999452,
             "twist": {
                 "__ndarray__": [
                     [
-                        -0.02465434329217739,
-                        -0.028750839744510325,
-                        -0.02590421571553876,
-                        -0.020158462600612555,
-                        -0.012724748992147882,
-                        -0.00481612970551939
+                        -0.024654336084479405,
+                        -0.028750831788918046,
+                        -0.025904206045517835,
+                        -0.020158455062478375,
+                        -0.012724745076232755,
+                        -0.004816128687393934
                     ]
                 ],
                 "dtype": "float64",
@@ -231,78 +231,78 @@
             "shape": {
                 "__ndarray__": [
                     [
-                        -0.029047128341428835,
-                        -0.047080478183655516,
-                        -0.4903248217539634,
-                        -0.4013165546031968,
-                        0.00954546422194532,
-                        0.026725301125078336,
-                        -0.4464715952546496,
-                        -0.3753145426661034,
-                        0.0178274108304839,
-                        0.08962439141045284,
-                        0.13469818406324813,
-                        0.23088231518750404,
-                        0.06582830189612904,
-                        0.07763781029295294,
-                        0.09150772425787565,
-                        0.25077700314569784,
-                        0.05119046230867218,
-                        0.07611282984016528,
-                        0.10139818246290813,
-                        0.14288689493948598,
-                        0.0598475217726408,
-                        0.06087119675482909,
-                        0.11793996845165507,
-                        0.18543384750320266,
-                        -0.03579258071221712,
-                        -0.029063640559842896,
-                        -0.0180137765131618,
-                        -0.0024105812916838942,
-                        -0.6512115102018172,
-                        -0.7548563235266618,
-                        -0.8101129239208076,
-                        -0.7525332782418444,
-                        -0.04452557149987375,
-                        -0.017099546303999347,
-                        0.01681054214487729,
-                        0.04143403960686618,
-                        -0.5458096626601037,
-                        -0.6405871067373664,
-                        -0.6906119609438919,
-                        -0.6398074456856779,
-                        0.0547914087988338,
-                        0.08331778931403921,
-                        0.09984183171688447,
-                        0.09540804372074849,
-                        0.13061487068540045,
-                        0.14676626890819855,
-                        0.152179319513855,
-                        0.13677491289725968,
-                        0.17617158652365938,
-                        0.19557374496963953,
-                        0.20395944940760774,
-                        0.18339268089437616,
-                        0.2786571304721808,
-                        0.3033889798196467,
-                        0.30782550157853655,
-                        0.2662480183698179,
-                        0.12544590383567195,
-                        0.16080952345946356,
-                        0.16870887439199655,
-                        0.1338404820032475,
-                        0.08919775817731485,
-                        0.07838533829420004,
-                        0.06617921442213574,
-                        0.061226194962906565,
-                        0.11227975184827244,
-                        0.1280158500666968,
-                        0.14524155735570896,
-                        0.1553472174677229,
-                        0.31101156418403875,
-                        0.3453243604625382,
-                        0.35617712974166693,
-                        0.3173191720432818
+                        -0.029047420922817188,
+                        -0.04708038357322772,
+                        -0.49032488213844316,
+                        -0.4013165545103022,
+                        0.009544996586770077,
+                        0.026725344461077258,
+                        -0.44647176923194504,
+                        -0.3753146224937229,
+                        0.01782744019989366,
+                        0.08962447709312292,
+                        0.13469816620993494,
+                        0.2308823656803091,
+                        0.06582854869951546,
+                        0.07763779529085339,
+                        0.09150764410109205,
+                        0.2507771488956023,
+                        0.05119051469137706,
+                        0.07611290577460351,
+                        0.10139817895149289,
+                        0.14288701914486024,
+                        0.05984782406821292,
+                        0.060871228049129186,
+                        0.11793992001578524,
+                        0.18543399479046915,
+                        -0.0357932204896516,
+                        -0.02906455119109569,
+                        -0.01801481334783786,
+                        -0.0024114995232683567,
+                        -0.6512116044666574,
+                        -0.7548564706156609,
+                        -0.8101131292991757,
+                        -0.7525335106763098,
+                        -0.044525501572274084,
+                        -0.017099525064875748,
+                        0.01681056318887561,
+                        0.041434083335101235,
+                        -0.545809688007461,
+                        -0.640587162239928,
+                        -0.6906120551468395,
+                        -0.6398075609220193,
+                        0.054791469450099516,
+                        0.08331789087620665,
+                        0.09984195609069375,
+                        0.09540815288288124,
+                        0.1306150031496364,
+                        0.14676641870076226,
+                        0.15217947592334216,
+                        0.13677505061708325,
+                        0.17617158370280409,
+                        0.1955737376811594,
+                        0.20395944236578603,
+                        0.18339267529484207,
+                        0.2786572043773582,
+                        0.3033890890341621,
+                        0.30782564735585616,
+                        0.26624818061430483,
+                        0.12544638422745383,
+                        0.1608101384189547,
+                        0.1687095486591529,
+                        0.13384106597714177,
+                        0.08919774441517907,
+                        0.07838533748537196,
+                        0.06617923921964464,
+                        0.06122622884183163,
+                        0.11227959483655524,
+                        0.1280157086416137,
+                        0.1452414371823498,
+                        0.15534712133864104,
+                        0.3110117072275599,
+                        0.34532456199659967,
+                        0.3561773650682832,
+                        0.31731939747434174
                     ]
                 ],
                 "dtype": "float64",
@@ -311,16 +311,16 @@
                     72
                 ]
             },
-            "span": 0.06413599860398511,
+            "span": 0.06413597231156332,
             "twist": {
                 "__ndarray__": [
                     [
-                        -0.024967200640313977,
-                        -0.032160201149526224,
-                        -0.03408738804686076,
-                        -0.031269885985493504,
-                        -0.022967856493621857,
-                        -0.009658516327860061
+                        -0.02496719304046418,
+                        -0.03216019106103158,
+                        -0.03408737451797081,
+                        -0.03126987441866044,
+                        -0.02296784989696189,
+                        -0.009658514402793021
                     ]
                 ],
                 "dtype": "float64",
@@ -342,78 +342,78 @@
             "shape": {
                 "__ndarray__": [
                     [
-                        594.7426505395426,
-                        -60.95357352009637,
-                        -6390.5862471848795,
-                        -5738.497960270486,
-                        549.3459808461944,
-                        745.0690329372169,
-                        -5122.077504683097,
-                        -4267.16969160718,
-                        533.8160096790439,
-                        -1327.2979836550487,
-                        928.6240092658941,
-                        2726.849589708642,
-                        -738.4869220372068,
-                        2437.0554897528896,
-                        4009.6660273938514,
-                        2909.330060584425,
-                        209.9011429142525,
-                        -802.2511917579388,
-                        889.6381085933683,
-                        1805.409242968729,
-                        1772.5832170516571,
-                        1484.8775799091954,
-                        1300.9520503055649,
-                        1637.1134932258697,
-                        832.4695784627652,
-                        883.2884492382128,
-                        811.0658644104851,
-                        664.6632951442172,
-                        -8014.169453167122,
-                        -8491.924808686392,
-                        -8473.458757928694,
-                        -7698.954633758089,
-                        -150.62656901599314,
-                        -127.62300001332085,
-                        -124.644939547219,
-                        109.58711510874411,
-                        -7304.934815425823,
-                        -7691.1835889919685,
-                        -7579.520027961285,
-                        -6722.127198006985,
-                        372.06946877091474,
-                        331.0811040579845,
-                        362.87798412001905,
-                        424.22808772649057,
-                        -1657.0773745451215,
-                        -1547.6811185183667,
-                        -1306.2747892010625,
-                        -1071.9253109948243,
-                        1358.8681237885708,
-                        1423.67753327383,
-                        1368.7940685785716,
-                        1227.4216215826539,
-                        3033.246089584791,
-                        2984.076028471124,
-                        2819.4186220506044,
-                        2512.2220406198417,
-                        171.3195745137724,
-                        1276.6749417974427,
-                        2194.801115230459,
-                        2782.397462813064,
-                        4229.9392171322925,
-                        4887.252887983364,
-                        4871.914521400789,
-                        3845.439447753965,
-                        4586.725912128072,
-                        4017.6325302258406,
-                        3265.222971379758,
-                        2228.1250814571545,
-                        2518.507037114592,
-                        2057.5766673401545,
-                        1771.1964535366642,
-                        1650.6928780053324
+                        594.7464806710151,
+                        -60.95713879814559,
+                        -6390.586988773744,
+                        -5738.498500250781,
+                        549.3524963677182,
+                        745.067127970098,
+                        -5122.079626035299,
+                        -4267.170953733126,
+                        533.8176208049622,
+                        -1327.3003517970392,
+                        928.624965889924,
+                        2726.851390291158,
+                        -738.4885434383391,
+                        2437.055428702888,
+                        4009.6667574251287,
+                        2909.330702024834,
+                        209.90156941660013,
+                        -802.2538545856731,
+                        889.6386766972295,
+                        1805.4120606066551,
+                        1772.5789854569516,
+                        1484.8768710738282,
+                        1300.952907796529,
+                        1637.1153395910706,
+                        832.4773637868072,
+                        883.298773249864,
+                        811.077027167412,
+                        664.6733995320438,
+                        -8014.1704516117015,
+                        -8491.92622065088,
+                        -8473.460628868917,
+                        -7698.956856042612,
+                        -150.63322590781638,
+                        -127.63011317048972,
+                        -124.65096549433201,
+                        109.58297296828539,
+                        -7304.935529201246,
+                        -7691.184568347064,
+                        -7579.521265552777,
+                        -6722.1286071665745,
+                        372.07238101978254,
+                        331.0836645378183,
+                        362.8797686476965,
+                        424.22915186639136,
+                        -1657.0814825344248,
+                        -1547.6857350076039,
+                        -1306.279222728841,
+                        -1071.9291386528516,
+                        1358.8687661805395,
+                        1423.6783902780644,
+                        1368.7949545640595,
+                        1227.4224378809442,
+                        3033.248420860041,
+                        2984.0789269947195,
+                        2819.421868077051,
+                        2512.225334830884,
+                        171.315937401259,
+                        1276.6698148115597,
+                        2194.795179365885,
+                        2782.3916124800135,
+                        4229.93897952753,
+                        4887.252486432245,
+                        4871.91388360555,
+                        3845.438700605667,
+                        4586.72713607695,
+                        4017.633885710158,
+                        3265.2242704154896,
+                        2228.1262788355048,
+                        2518.507985262193,
+                        2057.5779453657105,
+                        1771.1980922979335,
+                        1650.6947969283333
                     ]
                 ],
                 "dtype": "float64",
@@ -422,16 +422,16 @@
                     72
                 ]
             },
-            "span": 936.2973925855713,
+            "span": 936.2978351023139,
             "twist": {
                 "__ndarray__": [
                     [
-                        -370.3783770124885,
-                        -462.82994861771,
-                        -453.75858320025156,
-                        -387.83861059809044,
-                        -279.994769999822,
-                        -128.46207105692523
+                        -370.378325804621,
+                        -462.8299248441359,
+                        -453.758595158707,
+                        -387.83865358942626,
+                        -279.9948204628006,
+                        -128.4621028827006
                     ]
                 ],
                 "dtype": "float64",
@@ -453,78 +453,78 @@
             "shape": {
                 "__ndarray__": [
                     [
-                        270.9085248480369,
-                        -3980.608493575877,
-                        -154978.8010657982,
-                        -129275.2656861672,
-                        5148.912073852452,
-                        9439.406937425761,
-                        -81307.7060583119,
-                        -68975.87722241263,
-                        14329.875218386798,
-                        27011.699273895967,
-                        37183.66975388664,
-                        65062.035723122026,
-                        31963.622484102856,
-                        24712.35514860382,
-                        19209.220306474916,
-                        65987.46902513114,
-                        11362.640319846467,
-                        13264.467729578304,
-                        17030.006954857097,
-                        23876.902939482974,
-                        12761.246141280051,
-                        9576.670457121938,
-                        19026.277769279437,
-                        30714.56208288091,
-                        3280.801809470795,
-                        5519.745051840175,
-                        6133.047436468623,
-                        6407.079002126651,
-                        -189102.4304566693,
-                        -193236.62910425806,
-                        -182736.93100922904,
-                        -150393.13549679253,
-                        3554.255129766774,
-                        12046.945189247395,
-                        17581.743587285422,
-                        18077.169678809474,
-                        -161294.31164402986,
-                        -166311.46157502144,
-                        -157447.90330776767,
-                        -128975.94344352269,
-                        26086.555824663454,
-                        30492.11738974947,
-                        29704.14361594276,
-                        23833.514150943432,
-                        35617.55067826984,
-                        35456.902115165794,
-                        32649.62842564629,
-                        26200.511308096204,
-                        44585.74593456105,
-                        44151.67542328246,
-                        41410.13988114162,
-                        33654.95177442676,
-                        71097.25236325563,
-                        68992.41795539009,
-                        63021.1127060946,
-                        49063.624943003015,
-                        48763.08711187819,
-                        51875.62668668677,
-                        46226.538155227645,
-                        31679.459243110636,
-                        23119.424318565656,
-                        15640.405956559225,
-                        9975.85460579268,
-                        8143.8786557970925,
-                        20117.95258779807,
-                        20839.68512064548,
-                        22955.245778577548,
-                        24665.29588295175,
-                        74753.85823572593,
-                        74998.4760572695,
-                        70508.54072320399,
-                        57203.09520508784
+                        270.81519207461315,
+                        -3980.5776785900707,
+                        -154978.81817142142,
+                        -129275.2638407544,
+                        5148.8245717165655,
+                        9439.414397472698,
+                        -81307.73624698544,
+                        -68975.89095216605,
+                        14329.886244609828,
+                        27011.72030062839,
+                        37183.66671473962,
+                        65062.05521098424,
+                        31963.699846357027,
+                        24712.349012509727,
+                        19209.195202824365,
+                        65987.51735977479,
+                        11362.650336749326,
+                        13264.479555633185,
+                        17030.006462282712,
+                        23876.925541483586,
+                        12761.302236837797,
+                        9576.676181534183,
+                        19026.26916735209,
+                        30714.588731374057,
+                        3280.6130392607665,
+                        5519.504882733236,
+                        6132.801727450802,
+                        6406.884722471794,
+                        -189102.45510289547,
+                        -193236.6634248001,
+                        -182736.9743303752,
+                        -150393.17985961973,
+                        3554.2760515851987,
+                        12046.94893063933,
+                        17581.74578244701,
+                        18077.176627530247,
+                        -161294.31710868227,
+                        -166311.47325681994,
+                        -157447.92219289733,
+                        -128975.96490845073,
+                        26086.575850811285,
+                        30492.145541157413,
+                        29704.17391203255,
+                        23833.53773612961,
+                        35617.57927904792,
+                        35456.931271659865,
+                        32649.656951211997,
+                        26200.53471715887,
+                        44585.74649855906,
+                        44151.67504533622,
+                        41410.1394539392,
+                        33654.95133195592,
+                        71097.27668361232,
+                        68992.44859285219,
+                        63021.147617856484,
+                        49063.65852638495,
+                        48763.224842896896,
+                        51875.78411774403,
+                        46226.694685412665,
+                        31679.581170257843,
+                        23119.417962151383,
+                        15640.40340266184,
+                        9975.859233811978,
+                        8143.885671231952,
+                        20117.905367559186,
+                        20839.647902794906,
+                        22955.2181871585,
+                        24665.276761774807,
+                        74753.89964294108,
+                        74998.52857853827,
+                        70508.59520531689,
+                        57203.14130147164
                     ]
                 ],
                 "dtype": "float64",
@@ -533,16 +533,16 @@
                     72
                 ]
             },
-            "span": 17716.82717615395,
+            "span": 17716.820500284084,
             "twist": {
                 "__ndarray__": [
                     [
-                        -10051.082673355177,
-                        -11721.14234704246,
-                        -10560.630662910786,
-                        -8218.202033017795,
-                        -5187.62566911897,
-                        -1963.439758346148
+                        -10051.079734920826,
+                        -11721.139103706326,
+                        -10560.626720636898,
+                        -8218.19895987124,
+                        -5187.624072678598,
+                        -1963.439343276776
                     ]
                 ],
                 "dtype": "float64",
@@ -563,6 +563,7 @@
         "ANKAMGLevels": 2,
         "ANKAMGNSmooth": 1,
         "ANKASMOverlap": 1,
+        "ANKASMOverlapCoarse": 0,
         "ANKCFL0": 5.0,
         "ANKCFLCutback": 0.5,
         "ANKCFLExponent": 0.5,
@@ -575,6 +576,7 @@
         "ANKCoupledSwitchTol": 1e-16,
         "ANKGlobalPreconditioner": "additive Schwarz",
         "ANKInnerPreconIts": 1,
+        "ANKInnerPreconItsCoarse": 1,
         "ANKJacobianLag": 10,
         "ANKLinResMax": 0.1,
         "ANKLinearSolveBuffer": 0.01,
@@ -583,6 +585,7 @@
         "ANKNSubiterTurb": 1,
         "ANKOuterPreconIts": 1,
         "ANKPCILUFill": 2,
+        "ANKPCILUFillCoarse": 0,
         "ANKPCUpdateCutoff": 1e-16,
         "ANKPCUpdateTol": 0.5,
         "ANKPCUpdateTolAfterCutoff": 0.0001,
@@ -601,11 +604,13 @@
         "ANKUseMatrixFree": true,
         "ANKUseTurbDADI": true,
         "ASMOverlap": 1,
+        "ASMOverlapCoarse": 0,
         "CFL": 1.5,
         "CFLCoarse": 1.25,
         "CFLLimit": 1.5,
         "GMRESOrthogonalizationType": "modified Gram-Schmidt",
         "ILUFill": 2,
+        "ILUFillCoarse": 0,
         "L2Convergence": 1e-15,
         "L2ConvergenceCoarse": 0.01,
         "L2ConvergenceRel": 1e-16,
@@ -615,14 +620,17 @@
         "NKAMGLevels": 2,
         "NKAMGNSmooth": 1,
         "NKASMOverlap": 1,
+        "NKASMOverlapCoarse": 0,
         "NKFixedStep": 0.25,
         "NKGlobalPreconditioner": "additive Schwarz",
         "NKInnerPreconIts": 1,
+        "NKInnerPreconItsCoarse": 1,
         "NKJacobianLag": 2,
         "NKLS": "cubic",
         "NKLinearSolveTol": 1e-06,
         "NKOuterPreconIts": 1,
         "NKPCILUFill": 2,
+        "NKPCILUFillCoarse": 0,
         "NKSubspaceSize": 60,
         "NKSwitchTol": 1e-05,
         "NKUseEW": false,
@@ -686,6 +694,7 @@
         "infChangeCorrectionTol": 1e-12,
         "infChangeCorrectionType": "offset",
         "innerPreconIts": 1,
+        "innerPreconItsCoarse": 1,
         "isoVariables": [],
         "isosurface": {},
         "liftIndex": 2,
@@ -739,6 +748,7 @@
         "printWarnings": true,
         "qMode": false,
         "rMode": false,
+        "recomputeOverlapMatrix": true,
         "resAveraging": "never",
         "restartAdjoint": true,
         "restartFile": "input_files/mdo_tutorial_rough.cgns",
@@ -769,6 +779,7 @@
         "turbulenceModel": "SA",
         "turbulenceOrder": "first order",
         "turbulenceProduction": "strain",
+        "updateWallAssociations": false,
         "useALE": true,
         "useANKSolver": true,
         "useApproxWallDistance": true,
@@ -803,6 +814,7 @@
         ],
         "wallDistCutoff": 1e+20,
         "windAxis": false,
+        "writeSolutionDigits": 3,
         "writeSolutionEachIter": false,
         "writeSurfaceSolution": true,
         "writeTecplotSurfaceSolution": false,
diff --git a/tests/reg_tests/refs/funcs_rans_rough_sa.json b/tests/reg_tests/refs/funcs_rans_rough_sa.json
index 7338a4e74..2ab63b260 100644
--- a/tests/reg_tests/refs/funcs_rans_rough_sa.json
+++ b/tests/reg_tests/refs/funcs_rans_rough_sa.json
@@ -1,7 +1,6 @@
 {
-    "Dot product test for (w, xV) -> (dw, F)": 25052510162.914825,
-    "Dot product test for Xv -> R": -7422557.015466749,
-    "Dot product test for w -> F": 48678.90552195639,
+    "Dot product test for (w, xV) -> (dw, F)": 25051748893.868687,
+    "Dot product test for Xv -> R": -8183826.0616009245,
     "Dot product test for w -> R": 25061252292.450912,
     "Dot product test for xV -> F": -1368251.4261434944,
     "Eval Functions:": {
@@ -694,7 +693,7 @@
     "||dF/dzRef||": 0.0,
     "||dR/dP||": 0.07356605182246514,
     "||dR/dT||": 9.220630057589264e-07,
-    "||dR/dXv * xVDot||": 26736790.15992888,
+    "||dR/dXv * xVDot||": 26726959.34145183,
     "||dR/dalpha||": 0.010271084828244875,
     "||dR/dbeta||": 0.6070879776942455,
     "||dR/dmach||": 0.9283860217641269,
@@ -1086,7 +1085,7 @@
     },
     "||dsepsensoravgz/dXv||": 3.3049236735056238,
     "||dsepsensoravgz/dw||": 10.598482329462021,
-    "||dwBar^T * dR/dXv||": 111601096.04341298,
+    "||dwBar^T * dR/dXv||": 108834126.81700659,
     "||dwBar^T * dR/dw||": 2217397593.2762737,
     "||dwBar^T * dR/xDv||": {
         "P_mdo_tutorial": 0.8893628179225483,

From 0aa174198708797b13f30a8237b2b58701d0d375 Mon Sep 17 00:00:00 2001
From: DavidAnderegg <andv@zhaw.ch>
Date: Fri, 11 Oct 2024 11:34:58 +0200
Subject: [PATCH 60/60] Also train dot product test w -> F

---
 tests/reg_tests/refs/funcs_rans_rough_sa.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/reg_tests/refs/funcs_rans_rough_sa.json b/tests/reg_tests/refs/funcs_rans_rough_sa.json
index 2ab63b260..895d67e8b 100644
--- a/tests/reg_tests/refs/funcs_rans_rough_sa.json
+++ b/tests/reg_tests/refs/funcs_rans_rough_sa.json
@@ -1,6 +1,7 @@
 {
     "Dot product test for (w, xV) -> (dw, F)": 25051748893.868687,
     "Dot product test for Xv -> R": -8183826.0616009245,
+    "Dot product test for w -> F": 48678.90552195639,
     "Dot product test for w -> R": 25061252292.450912,
     "Dot product test for xV -> F": -1368251.4261434944,
     "Eval Functions:": {